{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9660032712383503, "eval_steps": 1000, "global_step": 22000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 4.39092396017432e-05, "grad_norm": 350.0, "learning_rate": 7.246376811594203e-07, "loss": 6.5493, "step": 1 }, { "epoch": 8.78184792034864e-05, "grad_norm": 318.0, "learning_rate": 1.4492753623188406e-06, "loss": 6.505, "step": 2 }, { "epoch": 0.0001756369584069728, "grad_norm": 366.0, "learning_rate": 2.898550724637681e-06, "loss": 6.4657, "step": 4 }, { "epoch": 0.00026345543761045916, "grad_norm": 390.0, "learning_rate": 4.347826086956522e-06, "loss": 6.1079, "step": 6 }, { "epoch": 0.0003512739168139456, "grad_norm": 300.0, "learning_rate": 5.797101449275362e-06, "loss": 5.4335, "step": 8 }, { "epoch": 0.000439092396017432, "grad_norm": 151.0, "learning_rate": 7.246376811594203e-06, "loss": 4.4914, "step": 10 }, { "epoch": 0.0005269108752209183, "grad_norm": 85.0, "learning_rate": 8.695652173913044e-06, "loss": 3.9792, "step": 12 }, { "epoch": 0.0006147293544244047, "grad_norm": 15.0, "learning_rate": 1.0144927536231885e-05, "loss": 3.7765, "step": 14 }, { "epoch": 0.0007025478336278912, "grad_norm": 9.5625, "learning_rate": 1.1594202898550725e-05, "loss": 3.7259, "step": 16 }, { "epoch": 0.0007903663128313776, "grad_norm": 7.25, "learning_rate": 1.3043478260869566e-05, "loss": 3.657, "step": 18 }, { "epoch": 0.000878184792034864, "grad_norm": 16.25, "learning_rate": 1.4492753623188407e-05, "loss": 3.6366, "step": 20 }, { "epoch": 0.0009660032712383504, "grad_norm": 8.0, "learning_rate": 1.5942028985507246e-05, "loss": 3.5582, "step": 22 }, { "epoch": 0.0010538217504418366, "grad_norm": 10.8125, "learning_rate": 1.739130434782609e-05, "loss": 3.4616, "step": 24 }, { "epoch": 0.0011416402296453231, "grad_norm": 6.65625, "learning_rate": 1.8840579710144928e-05, "loss": 3.3355, "step": 26 }, { "epoch": 0.0012294587088488094, "grad_norm": 6.21875, "learning_rate": 2.028985507246377e-05, "loss": 3.2562, "step": 28 }, { "epoch": 0.001317277188052296, "grad_norm": 5.75, "learning_rate": 2.173913043478261e-05, "loss": 3.1432, "step": 30 }, { "epoch": 0.0014050956672557824, "grad_norm": 7.15625, "learning_rate": 2.318840579710145e-05, "loss": 3.0361, "step": 32 }, { "epoch": 0.0014929141464592687, "grad_norm": 8.25, "learning_rate": 2.4637681159420292e-05, "loss": 2.9816, "step": 34 }, { "epoch": 0.0015807326256627552, "grad_norm": 7.4375, "learning_rate": 2.608695652173913e-05, "loss": 2.8625, "step": 36 }, { "epoch": 0.0016685511048662414, "grad_norm": 6.03125, "learning_rate": 2.753623188405797e-05, "loss": 2.712, "step": 38 }, { "epoch": 0.001756369584069728, "grad_norm": 9.375, "learning_rate": 2.8985507246376814e-05, "loss": 2.5506, "step": 40 }, { "epoch": 0.0018441880632732142, "grad_norm": 8.8125, "learning_rate": 3.0434782608695656e-05, "loss": 2.4374, "step": 42 }, { "epoch": 0.0019320065424767007, "grad_norm": 16.625, "learning_rate": 3.188405797101449e-05, "loss": 2.3639, "step": 44 }, { "epoch": 0.002019825021680187, "grad_norm": 15.1875, "learning_rate": 3.3333333333333335e-05, "loss": 2.2934, "step": 46 }, { "epoch": 0.0021076435008836733, "grad_norm": 9.375, "learning_rate": 3.478260869565218e-05, "loss": 2.2077, "step": 48 }, { "epoch": 0.00219546198008716, "grad_norm": 9.875, "learning_rate": 3.6231884057971014e-05, "loss": 2.1561, "step": 50 }, { "epoch": 0.0022832804592906463, "grad_norm": 13.0, "learning_rate": 3.7681159420289856e-05, "loss": 2.1002, "step": 52 }, { "epoch": 0.0023710989384941325, "grad_norm": 9.3125, "learning_rate": 3.91304347826087e-05, "loss": 2.0411, "step": 54 }, { "epoch": 0.002458917417697619, "grad_norm": 9.0625, "learning_rate": 4.057971014492754e-05, "loss": 2.0039, "step": 56 }, { "epoch": 0.0025467358969011055, "grad_norm": 8.0, "learning_rate": 4.202898550724638e-05, "loss": 2.0052, "step": 58 }, { "epoch": 0.002634554376104592, "grad_norm": 8.625, "learning_rate": 4.347826086956522e-05, "loss": 1.9758, "step": 60 }, { "epoch": 0.002722372855308078, "grad_norm": 13.125, "learning_rate": 4.492753623188406e-05, "loss": 1.9008, "step": 62 }, { "epoch": 0.002810191334511565, "grad_norm": 6.6875, "learning_rate": 4.63768115942029e-05, "loss": 1.8888, "step": 64 }, { "epoch": 0.002898009813715051, "grad_norm": 11.1875, "learning_rate": 4.782608695652174e-05, "loss": 1.8357, "step": 66 }, { "epoch": 0.0029858282929185373, "grad_norm": 29.625, "learning_rate": 4.9275362318840584e-05, "loss": 1.9061, "step": 68 }, { "epoch": 0.0030736467721220236, "grad_norm": 31.125, "learning_rate": 4.9999999760686754e-05, "loss": 1.8563, "step": 70 }, { "epoch": 0.0031614652513255103, "grad_norm": 13.0625, "learning_rate": 4.999999784618078e-05, "loss": 1.8569, "step": 72 }, { "epoch": 0.0032492837305289966, "grad_norm": 8.5625, "learning_rate": 4.999999401716897e-05, "loss": 1.8224, "step": 74 }, { "epoch": 0.003337102209732483, "grad_norm": 8.6875, "learning_rate": 4.9999988273651635e-05, "loss": 1.8069, "step": 76 }, { "epoch": 0.003424920688935969, "grad_norm": 10.0625, "learning_rate": 4.9999980615629205e-05, "loss": 1.7401, "step": 78 }, { "epoch": 0.003512739168139456, "grad_norm": 6.75, "learning_rate": 4.9999971043102264e-05, "loss": 1.7421, "step": 80 }, { "epoch": 0.003600557647342942, "grad_norm": 6.65625, "learning_rate": 4.999995955607155e-05, "loss": 1.7189, "step": 82 }, { "epoch": 0.0036883761265464284, "grad_norm": 7.125, "learning_rate": 4.999994615453794e-05, "loss": 1.714, "step": 84 }, { "epoch": 0.0037761946057499147, "grad_norm": 11.25, "learning_rate": 4.9999930838502456e-05, "loss": 1.6762, "step": 86 }, { "epoch": 0.0038640130849534014, "grad_norm": 12.25, "learning_rate": 4.999991360796628e-05, "loss": 1.6994, "step": 88 }, { "epoch": 0.003951831564156888, "grad_norm": 7.78125, "learning_rate": 4.9999894462930725e-05, "loss": 1.6493, "step": 90 }, { "epoch": 0.004039650043360374, "grad_norm": 6.9375, "learning_rate": 4.9999873403397254e-05, "loss": 1.6475, "step": 92 }, { "epoch": 0.004127468522563861, "grad_norm": 12.8125, "learning_rate": 4.99998504293675e-05, "loss": 1.6591, "step": 94 }, { "epoch": 0.0042152870017673465, "grad_norm": 11.25, "learning_rate": 4.999982554084319e-05, "loss": 1.6258, "step": 96 }, { "epoch": 0.004303105480970833, "grad_norm": 12.125, "learning_rate": 4.999979873782625e-05, "loss": 1.6365, "step": 98 }, { "epoch": 0.00439092396017432, "grad_norm": 10.9375, "learning_rate": 4.999977002031874e-05, "loss": 1.6345, "step": 100 }, { "epoch": 0.004478742439377806, "grad_norm": 8.9375, "learning_rate": 4.999973938832284e-05, "loss": 1.6242, "step": 102 }, { "epoch": 0.0045665609185812925, "grad_norm": 9.9375, "learning_rate": 4.999970684184091e-05, "loss": 1.6097, "step": 104 }, { "epoch": 0.004654379397784779, "grad_norm": 13.6875, "learning_rate": 4.999967238087544e-05, "loss": 1.5788, "step": 106 }, { "epoch": 0.004742197876988265, "grad_norm": 9.125, "learning_rate": 4.999963600542906e-05, "loss": 1.5573, "step": 108 }, { "epoch": 0.004830016356191752, "grad_norm": 7.46875, "learning_rate": 4.999959771550457e-05, "loss": 1.574, "step": 110 }, { "epoch": 0.004917834835395238, "grad_norm": 8.9375, "learning_rate": 4.999955751110488e-05, "loss": 1.5836, "step": 112 }, { "epoch": 0.005005653314598724, "grad_norm": 14.625, "learning_rate": 4.999951539223309e-05, "loss": 1.5808, "step": 114 }, { "epoch": 0.005093471793802211, "grad_norm": 10.625, "learning_rate": 4.999947135889242e-05, "loss": 1.569, "step": 116 }, { "epoch": 0.005181290273005697, "grad_norm": 8.1875, "learning_rate": 4.999942541108624e-05, "loss": 1.5499, "step": 118 }, { "epoch": 0.005269108752209184, "grad_norm": 8.375, "learning_rate": 4.999937754881807e-05, "loss": 1.5262, "step": 120 }, { "epoch": 0.00535692723141267, "grad_norm": 8.625, "learning_rate": 4.999932777209157e-05, "loss": 1.5405, "step": 122 }, { "epoch": 0.005444745710616156, "grad_norm": 9.5625, "learning_rate": 4.999927608091056e-05, "loss": 1.5341, "step": 124 }, { "epoch": 0.005532564189819643, "grad_norm": 9.9375, "learning_rate": 4.9999222475279003e-05, "loss": 1.5037, "step": 126 }, { "epoch": 0.00562038266902313, "grad_norm": 7.75, "learning_rate": 4.999916695520098e-05, "loss": 1.5074, "step": 128 }, { "epoch": 0.005708201148226615, "grad_norm": 9.25, "learning_rate": 4.999910952068077e-05, "loss": 1.5116, "step": 130 }, { "epoch": 0.005796019627430102, "grad_norm": 6.71875, "learning_rate": 4.999905017172276e-05, "loss": 1.5003, "step": 132 }, { "epoch": 0.005883838106633588, "grad_norm": 10.5, "learning_rate": 4.999898890833149e-05, "loss": 1.5161, "step": 134 }, { "epoch": 0.005971656585837075, "grad_norm": 10.125, "learning_rate": 4.999892573051166e-05, "loss": 1.477, "step": 136 }, { "epoch": 0.006059475065040561, "grad_norm": 10.5, "learning_rate": 4.999886063826811e-05, "loss": 1.4996, "step": 138 }, { "epoch": 0.006147293544244047, "grad_norm": 10.625, "learning_rate": 4.999879363160581e-05, "loss": 1.4417, "step": 140 }, { "epoch": 0.006235112023447534, "grad_norm": 13.375, "learning_rate": 4.999872471052991e-05, "loss": 1.457, "step": 142 }, { "epoch": 0.006322930502651021, "grad_norm": 10.3125, "learning_rate": 4.9998653875045666e-05, "loss": 1.4707, "step": 144 }, { "epoch": 0.0064107489818545065, "grad_norm": 10.0625, "learning_rate": 4.999858112515853e-05, "loss": 1.4844, "step": 146 }, { "epoch": 0.006498567461057993, "grad_norm": 11.8125, "learning_rate": 4.999850646087405e-05, "loss": 1.4749, "step": 148 }, { "epoch": 0.00658638594026148, "grad_norm": 9.125, "learning_rate": 4.999842988219795e-05, "loss": 1.4431, "step": 150 }, { "epoch": 0.006674204419464966, "grad_norm": 8.1875, "learning_rate": 4.99983513891361e-05, "loss": 1.4359, "step": 152 }, { "epoch": 0.0067620228986684525, "grad_norm": 6.53125, "learning_rate": 4.999827098169452e-05, "loss": 1.4179, "step": 154 }, { "epoch": 0.006849841377871938, "grad_norm": 7.9375, "learning_rate": 4.999818865987934e-05, "loss": 1.4185, "step": 156 }, { "epoch": 0.006937659857075425, "grad_norm": 9.1875, "learning_rate": 4.999810442369689e-05, "loss": 1.449, "step": 158 }, { "epoch": 0.007025478336278912, "grad_norm": 11.5625, "learning_rate": 4.9998018273153605e-05, "loss": 1.416, "step": 160 }, { "epoch": 0.007113296815482398, "grad_norm": 9.375, "learning_rate": 4.999793020825609e-05, "loss": 1.3884, "step": 162 }, { "epoch": 0.007201115294685884, "grad_norm": 5.8125, "learning_rate": 4.9997840229011085e-05, "loss": 1.3853, "step": 164 }, { "epoch": 0.007288933773889371, "grad_norm": 7.1875, "learning_rate": 4.999774833542549e-05, "loss": 1.4316, "step": 166 }, { "epoch": 0.007376752253092857, "grad_norm": 9.5, "learning_rate": 4.999765452750633e-05, "loss": 1.3818, "step": 168 }, { "epoch": 0.007464570732296344, "grad_norm": 7.28125, "learning_rate": 4.999755880526079e-05, "loss": 1.3969, "step": 170 }, { "epoch": 0.007552389211499829, "grad_norm": 7.9375, "learning_rate": 4.999746116869621e-05, "loss": 1.3647, "step": 172 }, { "epoch": 0.007640207690703316, "grad_norm": 6.9375, "learning_rate": 4.999736161782006e-05, "loss": 1.3707, "step": 174 }, { "epoch": 0.007728026169906803, "grad_norm": 7.53125, "learning_rate": 4.9997260152639966e-05, "loss": 1.3951, "step": 176 }, { "epoch": 0.007815844649110289, "grad_norm": 5.875, "learning_rate": 4.9997156773163694e-05, "loss": 1.3626, "step": 178 }, { "epoch": 0.007903663128313776, "grad_norm": 7.3125, "learning_rate": 4.999705147939917e-05, "loss": 1.3481, "step": 180 }, { "epoch": 0.007991481607517262, "grad_norm": 7.03125, "learning_rate": 4.999694427135445e-05, "loss": 1.3754, "step": 182 }, { "epoch": 0.008079300086720748, "grad_norm": 6.46875, "learning_rate": 4.9996835149037735e-05, "loss": 1.3436, "step": 184 }, { "epoch": 0.008167118565924234, "grad_norm": 8.625, "learning_rate": 4.999672411245741e-05, "loss": 1.3398, "step": 186 }, { "epoch": 0.008254937045127721, "grad_norm": 8.125, "learning_rate": 4.999661116162194e-05, "loss": 1.3604, "step": 188 }, { "epoch": 0.008342755524331207, "grad_norm": 11.875, "learning_rate": 4.9996496296540005e-05, "loss": 1.3354, "step": 190 }, { "epoch": 0.008430574003534693, "grad_norm": 7.53125, "learning_rate": 4.99963795172204e-05, "loss": 1.3341, "step": 192 }, { "epoch": 0.00851839248273818, "grad_norm": 6.84375, "learning_rate": 4.999626082367205e-05, "loss": 1.3466, "step": 194 }, { "epoch": 0.008606210961941667, "grad_norm": 7.8125, "learning_rate": 4.999614021590405e-05, "loss": 1.3208, "step": 196 }, { "epoch": 0.008694029441145152, "grad_norm": 6.21875, "learning_rate": 4.999601769392565e-05, "loss": 1.3211, "step": 198 }, { "epoch": 0.00878184792034864, "grad_norm": 6.3125, "learning_rate": 4.999589325774622e-05, "loss": 1.3199, "step": 200 }, { "epoch": 0.008869666399552126, "grad_norm": 8.875, "learning_rate": 4.99957669073753e-05, "loss": 1.2907, "step": 202 }, { "epoch": 0.008957484878755612, "grad_norm": 6.03125, "learning_rate": 4.9995638642822536e-05, "loss": 1.3227, "step": 204 }, { "epoch": 0.0090453033579591, "grad_norm": 10.25, "learning_rate": 4.9995508464097796e-05, "loss": 1.3157, "step": 206 }, { "epoch": 0.009133121837162585, "grad_norm": 7.625, "learning_rate": 4.999537637121101e-05, "loss": 1.2974, "step": 208 }, { "epoch": 0.009220940316366071, "grad_norm": 6.4375, "learning_rate": 4.999524236417232e-05, "loss": 1.3103, "step": 210 }, { "epoch": 0.009308758795569558, "grad_norm": 10.1875, "learning_rate": 4.999510644299198e-05, "loss": 1.3, "step": 212 }, { "epoch": 0.009396577274773044, "grad_norm": 7.53125, "learning_rate": 4.999496860768039e-05, "loss": 1.2689, "step": 214 }, { "epoch": 0.00948439575397653, "grad_norm": 6.0625, "learning_rate": 4.999482885824811e-05, "loss": 1.2809, "step": 216 }, { "epoch": 0.009572214233180018, "grad_norm": 8.125, "learning_rate": 4.9994687194705846e-05, "loss": 1.2937, "step": 218 }, { "epoch": 0.009660032712383504, "grad_norm": 13.4375, "learning_rate": 4.9994543617064445e-05, "loss": 1.3081, "step": 220 }, { "epoch": 0.00974785119158699, "grad_norm": 10.5, "learning_rate": 4.999439812533491e-05, "loss": 1.3092, "step": 222 }, { "epoch": 0.009835669670790475, "grad_norm": 8.375, "learning_rate": 4.9994250719528366e-05, "loss": 1.304, "step": 224 }, { "epoch": 0.009923488149993963, "grad_norm": 6.53125, "learning_rate": 4.999410139965612e-05, "loss": 1.2551, "step": 226 }, { "epoch": 0.010011306629197449, "grad_norm": 8.4375, "learning_rate": 4.9993950165729585e-05, "loss": 1.2683, "step": 228 }, { "epoch": 0.010099125108400935, "grad_norm": 10.5, "learning_rate": 4.9993797017760364e-05, "loss": 1.3054, "step": 230 }, { "epoch": 0.010186943587604422, "grad_norm": 10.8125, "learning_rate": 4.999364195576017e-05, "loss": 1.2796, "step": 232 }, { "epoch": 0.010274762066807908, "grad_norm": 13.1875, "learning_rate": 4.999348497974089e-05, "loss": 1.2596, "step": 234 }, { "epoch": 0.010362580546011394, "grad_norm": 8.9375, "learning_rate": 4.999332608971453e-05, "loss": 1.2413, "step": 236 }, { "epoch": 0.010450399025214881, "grad_norm": 6.78125, "learning_rate": 4.9993165285693275e-05, "loss": 1.2754, "step": 238 }, { "epoch": 0.010538217504418367, "grad_norm": 6.75, "learning_rate": 4.999300256768943e-05, "loss": 1.2392, "step": 240 }, { "epoch": 0.010626035983621853, "grad_norm": 7.4375, "learning_rate": 4.9992837935715455e-05, "loss": 1.2867, "step": 242 }, { "epoch": 0.01071385446282534, "grad_norm": 8.5625, "learning_rate": 4.999267138978396e-05, "loss": 1.2473, "step": 244 }, { "epoch": 0.010801672942028826, "grad_norm": 7.75, "learning_rate": 4.9992502929907706e-05, "loss": 1.2448, "step": 246 }, { "epoch": 0.010889491421232312, "grad_norm": 7.3125, "learning_rate": 4.999233255609957e-05, "loss": 1.2255, "step": 248 }, { "epoch": 0.0109773099004358, "grad_norm": 8.4375, "learning_rate": 4.999216026837263e-05, "loss": 1.2549, "step": 250 }, { "epoch": 0.011065128379639286, "grad_norm": 7.71875, "learning_rate": 4.999198606674006e-05, "loss": 1.2665, "step": 252 }, { "epoch": 0.011152946858842772, "grad_norm": 7.71875, "learning_rate": 4.9991809951215204e-05, "loss": 1.2329, "step": 254 }, { "epoch": 0.01124076533804626, "grad_norm": 11.0625, "learning_rate": 4.999163192181155e-05, "loss": 1.2516, "step": 256 }, { "epoch": 0.011328583817249745, "grad_norm": 8.375, "learning_rate": 4.9991451978542744e-05, "loss": 1.2441, "step": 258 }, { "epoch": 0.01141640229645323, "grad_norm": 7.9375, "learning_rate": 4.999127012142255e-05, "loss": 1.2242, "step": 260 }, { "epoch": 0.011504220775656717, "grad_norm": 6.34375, "learning_rate": 4.999108635046489e-05, "loss": 1.2447, "step": 262 }, { "epoch": 0.011592039254860204, "grad_norm": 7.875, "learning_rate": 4.999090066568385e-05, "loss": 1.2263, "step": 264 }, { "epoch": 0.01167985773406369, "grad_norm": 7.8125, "learning_rate": 4.9990713067093654e-05, "loss": 1.2216, "step": 266 }, { "epoch": 0.011767676213267176, "grad_norm": 8.5625, "learning_rate": 4.9990523554708655e-05, "loss": 1.2352, "step": 268 }, { "epoch": 0.011855494692470664, "grad_norm": 6.53125, "learning_rate": 4.999033212854337e-05, "loss": 1.2291, "step": 270 }, { "epoch": 0.01194331317167415, "grad_norm": 7.8125, "learning_rate": 4.999013878861246e-05, "loss": 1.223, "step": 272 }, { "epoch": 0.012031131650877635, "grad_norm": 9.5625, "learning_rate": 4.998994353493074e-05, "loss": 1.21, "step": 274 }, { "epoch": 0.012118950130081123, "grad_norm": 11.5, "learning_rate": 4.998974636751315e-05, "loss": 1.227, "step": 276 }, { "epoch": 0.012206768609284609, "grad_norm": 9.75, "learning_rate": 4.998954728637478e-05, "loss": 1.2142, "step": 278 }, { "epoch": 0.012294587088488095, "grad_norm": 7.46875, "learning_rate": 4.9989346291530904e-05, "loss": 1.2299, "step": 280 }, { "epoch": 0.012382405567691582, "grad_norm": 6.34375, "learning_rate": 4.998914338299689e-05, "loss": 1.2197, "step": 282 }, { "epoch": 0.012470224046895068, "grad_norm": 7.96875, "learning_rate": 4.998893856078829e-05, "loss": 1.227, "step": 284 }, { "epoch": 0.012558042526098554, "grad_norm": 11.1875, "learning_rate": 4.998873182492078e-05, "loss": 1.2082, "step": 286 }, { "epoch": 0.012645861005302041, "grad_norm": 9.9375, "learning_rate": 4.9988523175410204e-05, "loss": 1.2347, "step": 288 }, { "epoch": 0.012733679484505527, "grad_norm": 8.0625, "learning_rate": 4.9988312612272524e-05, "loss": 1.2427, "step": 290 }, { "epoch": 0.012821497963709013, "grad_norm": 8.5, "learning_rate": 4.998810013552388e-05, "loss": 1.1799, "step": 292 }, { "epoch": 0.0129093164429125, "grad_norm": 8.875, "learning_rate": 4.998788574518054e-05, "loss": 1.1828, "step": 294 }, { "epoch": 0.012997134922115986, "grad_norm": 9.375, "learning_rate": 4.998766944125891e-05, "loss": 1.1921, "step": 296 }, { "epoch": 0.013084953401319472, "grad_norm": 6.3125, "learning_rate": 4.998745122377557e-05, "loss": 1.1933, "step": 298 }, { "epoch": 0.01317277188052296, "grad_norm": 6.90625, "learning_rate": 4.998723109274722e-05, "loss": 1.1597, "step": 300 }, { "epoch": 0.013260590359726446, "grad_norm": 6.71875, "learning_rate": 4.998700904819074e-05, "loss": 1.1891, "step": 302 }, { "epoch": 0.013348408838929932, "grad_norm": 5.46875, "learning_rate": 4.99867850901231e-05, "loss": 1.1977, "step": 304 }, { "epoch": 0.013436227318133417, "grad_norm": 7.875, "learning_rate": 4.998655921856147e-05, "loss": 1.1838, "step": 306 }, { "epoch": 0.013524045797336905, "grad_norm": 5.46875, "learning_rate": 4.9986331433523156e-05, "loss": 1.1977, "step": 308 }, { "epoch": 0.01361186427654039, "grad_norm": 7.3125, "learning_rate": 4.998610173502558e-05, "loss": 1.1807, "step": 310 }, { "epoch": 0.013699682755743877, "grad_norm": 5.40625, "learning_rate": 4.9985870123086345e-05, "loss": 1.161, "step": 312 }, { "epoch": 0.013787501234947364, "grad_norm": 7.8125, "learning_rate": 4.9985636597723195e-05, "loss": 1.1476, "step": 314 }, { "epoch": 0.01387531971415085, "grad_norm": 5.625, "learning_rate": 4.9985401158954e-05, "loss": 1.1881, "step": 316 }, { "epoch": 0.013963138193354336, "grad_norm": 7.125, "learning_rate": 4.998516380679679e-05, "loss": 1.1831, "step": 318 }, { "epoch": 0.014050956672557824, "grad_norm": 9.3125, "learning_rate": 4.998492454126975e-05, "loss": 1.1747, "step": 320 }, { "epoch": 0.01413877515176131, "grad_norm": 5.5625, "learning_rate": 4.9984683362391204e-05, "loss": 1.1628, "step": 322 }, { "epoch": 0.014226593630964795, "grad_norm": 7.21875, "learning_rate": 4.998444027017961e-05, "loss": 1.1477, "step": 324 }, { "epoch": 0.014314412110168283, "grad_norm": 8.625, "learning_rate": 4.9984195264653596e-05, "loss": 1.1537, "step": 326 }, { "epoch": 0.014402230589371769, "grad_norm": 6.75, "learning_rate": 4.998394834583191e-05, "loss": 1.1832, "step": 328 }, { "epoch": 0.014490049068575254, "grad_norm": 6.34375, "learning_rate": 4.998369951373348e-05, "loss": 1.1519, "step": 330 }, { "epoch": 0.014577867547778742, "grad_norm": 6.5, "learning_rate": 4.998344876837735e-05, "loss": 1.1786, "step": 332 }, { "epoch": 0.014665686026982228, "grad_norm": 7.09375, "learning_rate": 4.998319610978273e-05, "loss": 1.1379, "step": 334 }, { "epoch": 0.014753504506185714, "grad_norm": 6.5, "learning_rate": 4.998294153796895e-05, "loss": 1.1754, "step": 336 }, { "epoch": 0.014841322985389201, "grad_norm": 6.5, "learning_rate": 4.998268505295552e-05, "loss": 1.1211, "step": 338 }, { "epoch": 0.014929141464592687, "grad_norm": 8.5625, "learning_rate": 4.9982426654762095e-05, "loss": 1.1669, "step": 340 }, { "epoch": 0.015016959943796173, "grad_norm": 7.4375, "learning_rate": 4.998216634340844e-05, "loss": 1.1609, "step": 342 }, { "epoch": 0.015104778422999659, "grad_norm": 5.625, "learning_rate": 4.998190411891449e-05, "loss": 1.1381, "step": 344 }, { "epoch": 0.015192596902203146, "grad_norm": 7.09375, "learning_rate": 4.9981639981300344e-05, "loss": 1.1383, "step": 346 }, { "epoch": 0.015280415381406632, "grad_norm": 8.5625, "learning_rate": 4.998137393058622e-05, "loss": 1.1462, "step": 348 }, { "epoch": 0.015368233860610118, "grad_norm": 7.0, "learning_rate": 4.9981105966792485e-05, "loss": 1.1386, "step": 350 }, { "epoch": 0.015456052339813606, "grad_norm": 8.0, "learning_rate": 4.9980836089939665e-05, "loss": 1.1212, "step": 352 }, { "epoch": 0.015543870819017092, "grad_norm": 6.3125, "learning_rate": 4.998056430004844e-05, "loss": 1.1352, "step": 354 }, { "epoch": 0.015631689298220577, "grad_norm": 8.8125, "learning_rate": 4.99802905971396e-05, "loss": 1.1435, "step": 356 }, { "epoch": 0.015719507777424063, "grad_norm": 7.3125, "learning_rate": 4.998001498123413e-05, "loss": 1.1223, "step": 358 }, { "epoch": 0.015807326256627553, "grad_norm": 5.84375, "learning_rate": 4.9979737452353114e-05, "loss": 1.1079, "step": 360 }, { "epoch": 0.01589514473583104, "grad_norm": 16.5, "learning_rate": 4.997945801051782e-05, "loss": 1.1403, "step": 362 }, { "epoch": 0.015982963215034524, "grad_norm": 9.6875, "learning_rate": 4.997917665574964e-05, "loss": 1.1463, "step": 364 }, { "epoch": 0.01607078169423801, "grad_norm": 7.78125, "learning_rate": 4.9978893388070135e-05, "loss": 1.1436, "step": 366 }, { "epoch": 0.016158600173441496, "grad_norm": 6.125, "learning_rate": 4.997860820750098e-05, "loss": 1.1402, "step": 368 }, { "epoch": 0.016246418652644982, "grad_norm": 7.8125, "learning_rate": 4.997832111406402e-05, "loss": 1.098, "step": 370 }, { "epoch": 0.016334237131848468, "grad_norm": 6.28125, "learning_rate": 4.997803210778124e-05, "loss": 1.1279, "step": 372 }, { "epoch": 0.016422055611051957, "grad_norm": 5.5625, "learning_rate": 4.997774118867477e-05, "loss": 1.1178, "step": 374 }, { "epoch": 0.016509874090255443, "grad_norm": 8.8125, "learning_rate": 4.99774483567669e-05, "loss": 1.1477, "step": 376 }, { "epoch": 0.01659769256945893, "grad_norm": 6.9375, "learning_rate": 4.997715361208004e-05, "loss": 1.1008, "step": 378 }, { "epoch": 0.016685511048662414, "grad_norm": 5.15625, "learning_rate": 4.997685695463677e-05, "loss": 1.1023, "step": 380 }, { "epoch": 0.0167733295278659, "grad_norm": 9.375, "learning_rate": 4.9976558384459815e-05, "loss": 1.1327, "step": 382 }, { "epoch": 0.016861148007069386, "grad_norm": 11.0625, "learning_rate": 4.997625790157203e-05, "loss": 1.1455, "step": 384 }, { "epoch": 0.016948966486272875, "grad_norm": 8.0625, "learning_rate": 4.997595550599642e-05, "loss": 1.1056, "step": 386 }, { "epoch": 0.01703678496547636, "grad_norm": 8.6875, "learning_rate": 4.9975651197756155e-05, "loss": 1.1077, "step": 388 }, { "epoch": 0.017124603444679847, "grad_norm": 7.03125, "learning_rate": 4.9975344976874536e-05, "loss": 1.1418, "step": 390 }, { "epoch": 0.017212421923883333, "grad_norm": 9.875, "learning_rate": 4.997503684337501e-05, "loss": 1.0905, "step": 392 }, { "epoch": 0.01730024040308682, "grad_norm": 8.375, "learning_rate": 4.997472679728118e-05, "loss": 1.1365, "step": 394 }, { "epoch": 0.017388058882290305, "grad_norm": 6.875, "learning_rate": 4.997441483861678e-05, "loss": 1.1076, "step": 396 }, { "epoch": 0.017475877361493794, "grad_norm": 6.21875, "learning_rate": 4.99741009674057e-05, "loss": 1.0944, "step": 398 }, { "epoch": 0.01756369584069728, "grad_norm": 9.3125, "learning_rate": 4.997378518367199e-05, "loss": 1.1178, "step": 400 }, { "epoch": 0.017651514319900766, "grad_norm": 11.0625, "learning_rate": 4.9973467487439816e-05, "loss": 1.1157, "step": 402 }, { "epoch": 0.01773933279910425, "grad_norm": 10.6875, "learning_rate": 4.997314787873352e-05, "loss": 1.1005, "step": 404 }, { "epoch": 0.017827151278307737, "grad_norm": 6.9375, "learning_rate": 4.9972826357577576e-05, "loss": 1.0849, "step": 406 }, { "epoch": 0.017914969757511223, "grad_norm": 5.46875, "learning_rate": 4.99725029239966e-05, "loss": 1.1028, "step": 408 }, { "epoch": 0.01800278823671471, "grad_norm": 7.34375, "learning_rate": 4.9972177578015364e-05, "loss": 1.1027, "step": 410 }, { "epoch": 0.0180906067159182, "grad_norm": 6.90625, "learning_rate": 4.997185031965878e-05, "loss": 1.0742, "step": 412 }, { "epoch": 0.018178425195121684, "grad_norm": 6.9375, "learning_rate": 4.9971521148951914e-05, "loss": 1.0998, "step": 414 }, { "epoch": 0.01826624367432517, "grad_norm": 6.3125, "learning_rate": 4.997119006591997e-05, "loss": 1.0511, "step": 416 }, { "epoch": 0.018354062153528656, "grad_norm": 8.875, "learning_rate": 4.997085707058832e-05, "loss": 1.0712, "step": 418 }, { "epoch": 0.018441880632732142, "grad_norm": 7.625, "learning_rate": 4.997052216298243e-05, "loss": 1.1015, "step": 420 }, { "epoch": 0.018529699111935628, "grad_norm": 7.5625, "learning_rate": 4.9970185343127975e-05, "loss": 1.071, "step": 422 }, { "epoch": 0.018617517591139117, "grad_norm": 5.78125, "learning_rate": 4.9969846611050744e-05, "loss": 1.0926, "step": 424 }, { "epoch": 0.018705336070342603, "grad_norm": 6.34375, "learning_rate": 4.9969505966776664e-05, "loss": 1.0808, "step": 426 }, { "epoch": 0.01879315454954609, "grad_norm": 6.53125, "learning_rate": 4.9969163410331845e-05, "loss": 1.0853, "step": 428 }, { "epoch": 0.018880973028749574, "grad_norm": 5.65625, "learning_rate": 4.996881894174249e-05, "loss": 1.0674, "step": 430 }, { "epoch": 0.01896879150795306, "grad_norm": 9.8125, "learning_rate": 4.996847256103501e-05, "loss": 1.0528, "step": 432 }, { "epoch": 0.019056609987156546, "grad_norm": 9.1875, "learning_rate": 4.9968124268235906e-05, "loss": 1.1181, "step": 434 }, { "epoch": 0.019144428466360035, "grad_norm": 7.0625, "learning_rate": 4.996777406337186e-05, "loss": 1.0888, "step": 436 }, { "epoch": 0.01923224694556352, "grad_norm": 7.0, "learning_rate": 4.99674219464697e-05, "loss": 1.043, "step": 438 }, { "epoch": 0.019320065424767007, "grad_norm": 5.96875, "learning_rate": 4.9967067917556376e-05, "loss": 1.0879, "step": 440 }, { "epoch": 0.019407883903970493, "grad_norm": 5.65625, "learning_rate": 4.996671197665901e-05, "loss": 1.072, "step": 442 }, { "epoch": 0.01949570238317398, "grad_norm": 5.9375, "learning_rate": 4.996635412380485e-05, "loss": 1.0603, "step": 444 }, { "epoch": 0.019583520862377465, "grad_norm": 10.125, "learning_rate": 4.996599435902131e-05, "loss": 1.0686, "step": 446 }, { "epoch": 0.01967133934158095, "grad_norm": 13.3125, "learning_rate": 4.996563268233594e-05, "loss": 1.075, "step": 448 }, { "epoch": 0.01975915782078444, "grad_norm": 12.25, "learning_rate": 4.996526909377643e-05, "loss": 1.0792, "step": 450 }, { "epoch": 0.019846976299987926, "grad_norm": 7.8125, "learning_rate": 4.996490359337062e-05, "loss": 1.0456, "step": 452 }, { "epoch": 0.01993479477919141, "grad_norm": 6.59375, "learning_rate": 4.9964536181146525e-05, "loss": 1.0603, "step": 454 }, { "epoch": 0.020022613258394897, "grad_norm": 9.125, "learning_rate": 4.9964166857132255e-05, "loss": 1.0536, "step": 456 }, { "epoch": 0.020110431737598383, "grad_norm": 11.0, "learning_rate": 4.996379562135611e-05, "loss": 1.0881, "step": 458 }, { "epoch": 0.02019825021680187, "grad_norm": 5.4375, "learning_rate": 4.9963422473846504e-05, "loss": 1.0952, "step": 460 }, { "epoch": 0.02028606869600536, "grad_norm": 5.9375, "learning_rate": 4.9963047414632024e-05, "loss": 1.0648, "step": 462 }, { "epoch": 0.020373887175208844, "grad_norm": 6.53125, "learning_rate": 4.996267044374139e-05, "loss": 1.0648, "step": 464 }, { "epoch": 0.02046170565441233, "grad_norm": 8.0625, "learning_rate": 4.9962291561203464e-05, "loss": 1.0814, "step": 466 }, { "epoch": 0.020549524133615816, "grad_norm": 9.8125, "learning_rate": 4.9961910767047275e-05, "loss": 1.0556, "step": 468 }, { "epoch": 0.020637342612819302, "grad_norm": 8.375, "learning_rate": 4.996152806130198e-05, "loss": 1.0354, "step": 470 }, { "epoch": 0.020725161092022788, "grad_norm": 8.375, "learning_rate": 4.996114344399687e-05, "loss": 1.0932, "step": 472 }, { "epoch": 0.020812979571226277, "grad_norm": 7.90625, "learning_rate": 4.996075691516141e-05, "loss": 1.0409, "step": 474 }, { "epoch": 0.020900798050429763, "grad_norm": 7.4375, "learning_rate": 4.996036847482521e-05, "loss": 1.0539, "step": 476 }, { "epoch": 0.02098861652963325, "grad_norm": 7.6875, "learning_rate": 4.9959978123018006e-05, "loss": 1.0252, "step": 478 }, { "epoch": 0.021076435008836734, "grad_norm": 6.875, "learning_rate": 4.9959585859769694e-05, "loss": 1.0492, "step": 480 }, { "epoch": 0.02116425348804022, "grad_norm": 6.625, "learning_rate": 4.995919168511032e-05, "loss": 1.0662, "step": 482 }, { "epoch": 0.021252071967243706, "grad_norm": 6.5, "learning_rate": 4.995879559907005e-05, "loss": 1.035, "step": 484 }, { "epoch": 0.021339890446447192, "grad_norm": 6.59375, "learning_rate": 4.995839760167924e-05, "loss": 1.0408, "step": 486 }, { "epoch": 0.02142770892565068, "grad_norm": 6.875, "learning_rate": 4.995799769296836e-05, "loss": 1.0301, "step": 488 }, { "epoch": 0.021515527404854167, "grad_norm": 5.90625, "learning_rate": 4.995759587296803e-05, "loss": 1.0198, "step": 490 }, { "epoch": 0.021603345884057653, "grad_norm": 5.53125, "learning_rate": 4.995719214170902e-05, "loss": 1.0354, "step": 492 }, { "epoch": 0.02169116436326114, "grad_norm": 7.5625, "learning_rate": 4.9956786499222263e-05, "loss": 1.044, "step": 494 }, { "epoch": 0.021778982842464625, "grad_norm": 6.375, "learning_rate": 4.995637894553881e-05, "loss": 1.0413, "step": 496 }, { "epoch": 0.02186680132166811, "grad_norm": 6.40625, "learning_rate": 4.9955969480689865e-05, "loss": 1.0098, "step": 498 }, { "epoch": 0.0219546198008716, "grad_norm": 6.1875, "learning_rate": 4.995555810470681e-05, "loss": 1.0262, "step": 500 }, { "epoch": 0.022042438280075086, "grad_norm": 4.96875, "learning_rate": 4.995514481762112e-05, "loss": 1.0375, "step": 502 }, { "epoch": 0.02213025675927857, "grad_norm": 5.8125, "learning_rate": 4.995472961946447e-05, "loss": 1.035, "step": 504 }, { "epoch": 0.022218075238482057, "grad_norm": 5.5, "learning_rate": 4.9954312510268636e-05, "loss": 1.0307, "step": 506 }, { "epoch": 0.022305893717685543, "grad_norm": 5.0625, "learning_rate": 4.995389349006557e-05, "loss": 0.9841, "step": 508 }, { "epoch": 0.02239371219688903, "grad_norm": 6.53125, "learning_rate": 4.995347255888736e-05, "loss": 1.0169, "step": 510 }, { "epoch": 0.02248153067609252, "grad_norm": 6.125, "learning_rate": 4.9953049716766234e-05, "loss": 1.0283, "step": 512 }, { "epoch": 0.022569349155296004, "grad_norm": 6.4375, "learning_rate": 4.9952624963734584e-05, "loss": 1.0303, "step": 514 }, { "epoch": 0.02265716763449949, "grad_norm": 5.84375, "learning_rate": 4.995219829982494e-05, "loss": 1.0268, "step": 516 }, { "epoch": 0.022744986113702976, "grad_norm": 6.59375, "learning_rate": 4.9951769725069954e-05, "loss": 1.0065, "step": 518 }, { "epoch": 0.02283280459290646, "grad_norm": 5.5625, "learning_rate": 4.995133923950247e-05, "loss": 1.0092, "step": 520 }, { "epoch": 0.022920623072109948, "grad_norm": 11.5625, "learning_rate": 4.995090684315544e-05, "loss": 1.0142, "step": 522 }, { "epoch": 0.023008441551313433, "grad_norm": 9.5625, "learning_rate": 4.9950472536061984e-05, "loss": 0.9864, "step": 524 }, { "epoch": 0.023096260030516923, "grad_norm": 7.09375, "learning_rate": 4.9950036318255364e-05, "loss": 1.0048, "step": 526 }, { "epoch": 0.02318407850972041, "grad_norm": 6.8125, "learning_rate": 4.9949598189768985e-05, "loss": 1.0157, "step": 528 }, { "epoch": 0.023271896988923894, "grad_norm": 5.5, "learning_rate": 4.994915815063639e-05, "loss": 1.0379, "step": 530 }, { "epoch": 0.02335971546812738, "grad_norm": 8.25, "learning_rate": 4.994871620089129e-05, "loss": 1.0017, "step": 532 }, { "epoch": 0.023447533947330866, "grad_norm": 7.8125, "learning_rate": 4.9948272340567514e-05, "loss": 1.0021, "step": 534 }, { "epoch": 0.023535352426534352, "grad_norm": 6.0625, "learning_rate": 4.994782656969906e-05, "loss": 0.9881, "step": 536 }, { "epoch": 0.02362317090573784, "grad_norm": 6.84375, "learning_rate": 4.994737888832007e-05, "loss": 0.9894, "step": 538 }, { "epoch": 0.023710989384941327, "grad_norm": 7.46875, "learning_rate": 4.9946929296464825e-05, "loss": 1.0131, "step": 540 }, { "epoch": 0.023798807864144813, "grad_norm": 8.6875, "learning_rate": 4.994647779416776e-05, "loss": 1.017, "step": 542 }, { "epoch": 0.0238866263433483, "grad_norm": 10.0625, "learning_rate": 4.994602438146344e-05, "loss": 0.982, "step": 544 }, { "epoch": 0.023974444822551785, "grad_norm": 8.625, "learning_rate": 4.99455690583866e-05, "loss": 1.0208, "step": 546 }, { "epoch": 0.02406226330175527, "grad_norm": 11.0625, "learning_rate": 4.994511182497209e-05, "loss": 1.0111, "step": 548 }, { "epoch": 0.02415008178095876, "grad_norm": 11.875, "learning_rate": 4.9944652681254944e-05, "loss": 0.9969, "step": 550 }, { "epoch": 0.024237900260162246, "grad_norm": 8.375, "learning_rate": 4.9944191627270314e-05, "loss": 1.0336, "step": 552 }, { "epoch": 0.02432571873936573, "grad_norm": 7.1875, "learning_rate": 4.994372866305351e-05, "loss": 1.0003, "step": 554 }, { "epoch": 0.024413537218569217, "grad_norm": 8.6875, "learning_rate": 4.9943263788639985e-05, "loss": 1.0197, "step": 556 }, { "epoch": 0.024501355697772703, "grad_norm": 6.3125, "learning_rate": 4.994279700406534e-05, "loss": 0.9861, "step": 558 }, { "epoch": 0.02458917417697619, "grad_norm": 7.125, "learning_rate": 4.994232830936532e-05, "loss": 1.0242, "step": 560 }, { "epoch": 0.02467699265617968, "grad_norm": 6.09375, "learning_rate": 4.994185770457582e-05, "loss": 0.999, "step": 562 }, { "epoch": 0.024764811135383164, "grad_norm": 7.5, "learning_rate": 4.994138518973288e-05, "loss": 0.9646, "step": 564 }, { "epoch": 0.02485262961458665, "grad_norm": 5.59375, "learning_rate": 4.9940910764872685e-05, "loss": 0.9966, "step": 566 }, { "epoch": 0.024940448093790136, "grad_norm": 5.28125, "learning_rate": 4.994043443003156e-05, "loss": 0.9982, "step": 568 }, { "epoch": 0.02502826657299362, "grad_norm": 6.15625, "learning_rate": 4.993995618524598e-05, "loss": 1.006, "step": 570 }, { "epoch": 0.025116085052197108, "grad_norm": 6.40625, "learning_rate": 4.993947603055259e-05, "loss": 0.9813, "step": 572 }, { "epoch": 0.025203903531400593, "grad_norm": 7.625, "learning_rate": 4.9938993965988145e-05, "loss": 0.9576, "step": 574 }, { "epoch": 0.025291722010604083, "grad_norm": 5.65625, "learning_rate": 4.993850999158956e-05, "loss": 1.0088, "step": 576 }, { "epoch": 0.02537954048980757, "grad_norm": 5.6875, "learning_rate": 4.993802410739391e-05, "loss": 0.9918, "step": 578 }, { "epoch": 0.025467358969011054, "grad_norm": 6.125, "learning_rate": 4.993753631343838e-05, "loss": 0.9659, "step": 580 }, { "epoch": 0.02555517744821454, "grad_norm": 8.5, "learning_rate": 4.9937046609760356e-05, "loss": 0.9646, "step": 582 }, { "epoch": 0.025642995927418026, "grad_norm": 7.0625, "learning_rate": 4.993655499639732e-05, "loss": 0.9554, "step": 584 }, { "epoch": 0.025730814406621512, "grad_norm": 5.0, "learning_rate": 4.9936061473386925e-05, "loss": 0.994, "step": 586 }, { "epoch": 0.025818632885825, "grad_norm": 5.28125, "learning_rate": 4.993556604076696e-05, "loss": 0.9731, "step": 588 }, { "epoch": 0.025906451365028487, "grad_norm": 7.0625, "learning_rate": 4.9935068698575375e-05, "loss": 0.9583, "step": 590 }, { "epoch": 0.025994269844231973, "grad_norm": 5.5, "learning_rate": 4.9934569446850256e-05, "loss": 0.9676, "step": 592 }, { "epoch": 0.02608208832343546, "grad_norm": 5.3125, "learning_rate": 4.993406828562982e-05, "loss": 0.9957, "step": 594 }, { "epoch": 0.026169906802638945, "grad_norm": 6.5, "learning_rate": 4.993356521495246e-05, "loss": 0.9717, "step": 596 }, { "epoch": 0.02625772528184243, "grad_norm": 10.5, "learning_rate": 4.993306023485671e-05, "loss": 0.9933, "step": 598 }, { "epoch": 0.02634554376104592, "grad_norm": 10.75, "learning_rate": 4.993255334538122e-05, "loss": 0.9773, "step": 600 }, { "epoch": 0.026433362240249406, "grad_norm": 11.1875, "learning_rate": 4.9932044546564824e-05, "loss": 0.9854, "step": 602 }, { "epoch": 0.02652118071945289, "grad_norm": 7.625, "learning_rate": 4.993153383844648e-05, "loss": 0.995, "step": 604 }, { "epoch": 0.026608999198656377, "grad_norm": 5.25, "learning_rate": 4.993102122106529e-05, "loss": 0.9621, "step": 606 }, { "epoch": 0.026696817677859863, "grad_norm": 6.71875, "learning_rate": 4.993050669446053e-05, "loss": 0.9785, "step": 608 }, { "epoch": 0.02678463615706335, "grad_norm": 6.1875, "learning_rate": 4.9929990258671574e-05, "loss": 0.9875, "step": 610 }, { "epoch": 0.026872454636266835, "grad_norm": 6.46875, "learning_rate": 4.9929471913738e-05, "loss": 0.9624, "step": 612 }, { "epoch": 0.026960273115470324, "grad_norm": 5.46875, "learning_rate": 4.99289516596995e-05, "loss": 0.9641, "step": 614 }, { "epoch": 0.02704809159467381, "grad_norm": 5.5625, "learning_rate": 4.992842949659589e-05, "loss": 0.9695, "step": 616 }, { "epoch": 0.027135910073877296, "grad_norm": 5.1875, "learning_rate": 4.9927905424467184e-05, "loss": 0.9731, "step": 618 }, { "epoch": 0.02722372855308078, "grad_norm": 6.78125, "learning_rate": 4.992737944335349e-05, "loss": 0.9354, "step": 620 }, { "epoch": 0.027311547032284268, "grad_norm": 8.25, "learning_rate": 4.992685155329512e-05, "loss": 0.944, "step": 622 }, { "epoch": 0.027399365511487753, "grad_norm": 8.4375, "learning_rate": 4.992632175433247e-05, "loss": 0.961, "step": 624 }, { "epoch": 0.027487183990691243, "grad_norm": 8.4375, "learning_rate": 4.9925790046506136e-05, "loss": 0.9396, "step": 626 }, { "epoch": 0.02757500246989473, "grad_norm": 7.65625, "learning_rate": 4.9925256429856814e-05, "loss": 0.9557, "step": 628 }, { "epoch": 0.027662820949098214, "grad_norm": 7.1875, "learning_rate": 4.992472090442539e-05, "loss": 0.9546, "step": 630 }, { "epoch": 0.0277506394283017, "grad_norm": 8.0625, "learning_rate": 4.992418347025286e-05, "loss": 0.9652, "step": 632 }, { "epoch": 0.027838457907505186, "grad_norm": 8.0625, "learning_rate": 4.9923644127380384e-05, "loss": 0.9419, "step": 634 }, { "epoch": 0.027926276386708672, "grad_norm": 5.75, "learning_rate": 4.992310287584926e-05, "loss": 0.9576, "step": 636 }, { "epoch": 0.02801409486591216, "grad_norm": 5.5, "learning_rate": 4.992255971570095e-05, "loss": 0.9492, "step": 638 }, { "epoch": 0.028101913345115647, "grad_norm": 6.0625, "learning_rate": 4.9922014646977046e-05, "loss": 0.9552, "step": 640 }, { "epoch": 0.028189731824319133, "grad_norm": 6.84375, "learning_rate": 4.992146766971928e-05, "loss": 0.9551, "step": 642 }, { "epoch": 0.02827755030352262, "grad_norm": 6.03125, "learning_rate": 4.992091878396955e-05, "loss": 0.9738, "step": 644 }, { "epoch": 0.028365368782726105, "grad_norm": 7.46875, "learning_rate": 4.9920367989769885e-05, "loss": 0.9445, "step": 646 }, { "epoch": 0.02845318726192959, "grad_norm": 8.4375, "learning_rate": 4.991981528716246e-05, "loss": 0.9412, "step": 648 }, { "epoch": 0.028541005741133076, "grad_norm": 6.875, "learning_rate": 4.991926067618962e-05, "loss": 0.9425, "step": 650 }, { "epoch": 0.028628824220336566, "grad_norm": 5.59375, "learning_rate": 4.991870415689381e-05, "loss": 0.9306, "step": 652 }, { "epoch": 0.02871664269954005, "grad_norm": 5.53125, "learning_rate": 4.9918145729317664e-05, "loss": 0.9298, "step": 654 }, { "epoch": 0.028804461178743537, "grad_norm": 4.84375, "learning_rate": 4.991758539350395e-05, "loss": 0.9475, "step": 656 }, { "epoch": 0.028892279657947023, "grad_norm": 6.65625, "learning_rate": 4.991702314949557e-05, "loss": 0.9702, "step": 658 }, { "epoch": 0.02898009813715051, "grad_norm": 7.5625, "learning_rate": 4.9916458997335583e-05, "loss": 0.9722, "step": 660 }, { "epoch": 0.029067916616353995, "grad_norm": 7.8125, "learning_rate": 4.99158929370672e-05, "loss": 0.9374, "step": 662 }, { "epoch": 0.029155735095557484, "grad_norm": 7.96875, "learning_rate": 4.991532496873376e-05, "loss": 0.9239, "step": 664 }, { "epoch": 0.02924355357476097, "grad_norm": 6.75, "learning_rate": 4.991475509237876e-05, "loss": 0.9186, "step": 666 }, { "epoch": 0.029331372053964456, "grad_norm": 6.03125, "learning_rate": 4.9914183308045836e-05, "loss": 0.9669, "step": 668 }, { "epoch": 0.02941919053316794, "grad_norm": 6.28125, "learning_rate": 4.991360961577879e-05, "loss": 0.9288, "step": 670 }, { "epoch": 0.029507009012371428, "grad_norm": 6.625, "learning_rate": 4.9913034015621545e-05, "loss": 0.9278, "step": 672 }, { "epoch": 0.029594827491574913, "grad_norm": 6.6875, "learning_rate": 4.9912456507618185e-05, "loss": 0.907, "step": 674 }, { "epoch": 0.029682645970778403, "grad_norm": 5.78125, "learning_rate": 4.991187709181293e-05, "loss": 0.908, "step": 676 }, { "epoch": 0.02977046444998189, "grad_norm": 6.5625, "learning_rate": 4.991129576825016e-05, "loss": 0.9143, "step": 678 }, { "epoch": 0.029858282929185374, "grad_norm": 5.875, "learning_rate": 4.991071253697439e-05, "loss": 0.9022, "step": 680 }, { "epoch": 0.02994610140838886, "grad_norm": 5.46875, "learning_rate": 4.991012739803028e-05, "loss": 0.9249, "step": 682 }, { "epoch": 0.030033919887592346, "grad_norm": 4.8125, "learning_rate": 4.990954035146264e-05, "loss": 0.9483, "step": 684 }, { "epoch": 0.030121738366795832, "grad_norm": 4.75, "learning_rate": 4.990895139731643e-05, "loss": 0.9301, "step": 686 }, { "epoch": 0.030209556845999318, "grad_norm": 5.0, "learning_rate": 4.990836053563674e-05, "loss": 0.9256, "step": 688 }, { "epoch": 0.030297375325202807, "grad_norm": 4.84375, "learning_rate": 4.990776776646885e-05, "loss": 0.9601, "step": 690 }, { "epoch": 0.030385193804406293, "grad_norm": 4.65625, "learning_rate": 4.990717308985812e-05, "loss": 0.9244, "step": 692 }, { "epoch": 0.03047301228360978, "grad_norm": 5.0, "learning_rate": 4.990657650585011e-05, "loss": 0.9357, "step": 694 }, { "epoch": 0.030560830762813265, "grad_norm": 6.28125, "learning_rate": 4.99059780144905e-05, "loss": 0.9379, "step": 696 }, { "epoch": 0.03064864924201675, "grad_norm": 5.90625, "learning_rate": 4.990537761582512e-05, "loss": 0.9476, "step": 698 }, { "epoch": 0.030736467721220236, "grad_norm": 5.34375, "learning_rate": 4.9904775309899955e-05, "loss": 0.9505, "step": 700 }, { "epoch": 0.030824286200423726, "grad_norm": 5.65625, "learning_rate": 4.9904171096761124e-05, "loss": 0.9479, "step": 702 }, { "epoch": 0.03091210467962721, "grad_norm": 4.78125, "learning_rate": 4.99035649764549e-05, "loss": 0.9047, "step": 704 }, { "epoch": 0.030999923158830697, "grad_norm": 5.46875, "learning_rate": 4.99029569490277e-05, "loss": 0.9234, "step": 706 }, { "epoch": 0.031087741638034183, "grad_norm": 7.96875, "learning_rate": 4.9902347014526093e-05, "loss": 0.9298, "step": 708 }, { "epoch": 0.03117556011723767, "grad_norm": 8.8125, "learning_rate": 4.9901735172996775e-05, "loss": 0.9372, "step": 710 }, { "epoch": 0.031263378596441155, "grad_norm": 7.3125, "learning_rate": 4.990112142448662e-05, "loss": 0.9224, "step": 712 }, { "epoch": 0.03135119707564464, "grad_norm": 9.75, "learning_rate": 4.990050576904261e-05, "loss": 0.9216, "step": 714 }, { "epoch": 0.031439015554848126, "grad_norm": 9.625, "learning_rate": 4.989988820671189e-05, "loss": 0.9117, "step": 716 }, { "epoch": 0.03152683403405161, "grad_norm": 8.25, "learning_rate": 4.989926873754177e-05, "loss": 0.9419, "step": 718 }, { "epoch": 0.031614652513255105, "grad_norm": 6.375, "learning_rate": 4.989864736157969e-05, "loss": 0.8973, "step": 720 }, { "epoch": 0.03170247099245859, "grad_norm": 8.1875, "learning_rate": 4.989802407887321e-05, "loss": 0.9172, "step": 722 }, { "epoch": 0.03179028947166208, "grad_norm": 9.9375, "learning_rate": 4.9897398889470086e-05, "loss": 0.9366, "step": 724 }, { "epoch": 0.03187810795086556, "grad_norm": 5.46875, "learning_rate": 4.989677179341818e-05, "loss": 0.9221, "step": 726 }, { "epoch": 0.03196592643006905, "grad_norm": 5.46875, "learning_rate": 4.989614279076553e-05, "loss": 0.8898, "step": 728 }, { "epoch": 0.032053744909272534, "grad_norm": 7.15625, "learning_rate": 4.9895511881560294e-05, "loss": 0.9261, "step": 730 }, { "epoch": 0.03214156338847602, "grad_norm": 7.78125, "learning_rate": 4.98948790658508e-05, "loss": 0.8904, "step": 732 }, { "epoch": 0.032229381867679506, "grad_norm": 7.71875, "learning_rate": 4.989424434368549e-05, "loss": 0.9143, "step": 734 }, { "epoch": 0.03231720034688299, "grad_norm": 7.875, "learning_rate": 4.989360771511298e-05, "loss": 0.9082, "step": 736 }, { "epoch": 0.03240501882608648, "grad_norm": 6.28125, "learning_rate": 4.989296918018203e-05, "loss": 0.8653, "step": 738 }, { "epoch": 0.032492837305289964, "grad_norm": 5.46875, "learning_rate": 4.989232873894152e-05, "loss": 0.8899, "step": 740 }, { "epoch": 0.03258065578449345, "grad_norm": 5.625, "learning_rate": 4.989168639144052e-05, "loss": 0.8836, "step": 742 }, { "epoch": 0.032668474263696935, "grad_norm": 5.4375, "learning_rate": 4.98910421377282e-05, "loss": 0.8981, "step": 744 }, { "epoch": 0.03275629274290043, "grad_norm": 6.03125, "learning_rate": 4.989039597785392e-05, "loss": 0.903, "step": 746 }, { "epoch": 0.032844111222103914, "grad_norm": 7.59375, "learning_rate": 4.988974791186713e-05, "loss": 0.885, "step": 748 }, { "epoch": 0.0329319297013074, "grad_norm": 8.3125, "learning_rate": 4.98890979398175e-05, "loss": 0.9071, "step": 750 }, { "epoch": 0.033019748180510886, "grad_norm": 6.75, "learning_rate": 4.988844606175477e-05, "loss": 0.9012, "step": 752 }, { "epoch": 0.03310756665971437, "grad_norm": 6.0625, "learning_rate": 4.988779227772888e-05, "loss": 0.8822, "step": 754 }, { "epoch": 0.03319538513891786, "grad_norm": 7.09375, "learning_rate": 4.9887136587789885e-05, "loss": 0.8973, "step": 756 }, { "epoch": 0.03328320361812134, "grad_norm": 9.125, "learning_rate": 4.9886478991988004e-05, "loss": 0.911, "step": 758 }, { "epoch": 0.03337102209732483, "grad_norm": 8.25, "learning_rate": 4.9885819490373605e-05, "loss": 0.8973, "step": 760 }, { "epoch": 0.033458840576528315, "grad_norm": 6.5, "learning_rate": 4.988515808299718e-05, "loss": 0.9305, "step": 762 }, { "epoch": 0.0335466590557318, "grad_norm": 6.46875, "learning_rate": 4.988449476990938e-05, "loss": 0.8778, "step": 764 }, { "epoch": 0.033634477534935286, "grad_norm": 6.21875, "learning_rate": 4.988382955116101e-05, "loss": 0.8987, "step": 766 }, { "epoch": 0.03372229601413877, "grad_norm": 5.84375, "learning_rate": 4.988316242680301e-05, "loss": 0.8735, "step": 768 }, { "epoch": 0.03381011449334226, "grad_norm": 5.28125, "learning_rate": 4.988249339688646e-05, "loss": 0.901, "step": 770 }, { "epoch": 0.03389793297254575, "grad_norm": 5.21875, "learning_rate": 4.98818224614626e-05, "loss": 0.8986, "step": 772 }, { "epoch": 0.03398575145174924, "grad_norm": 4.90625, "learning_rate": 4.9881149620582815e-05, "loss": 0.895, "step": 774 }, { "epoch": 0.03407356993095272, "grad_norm": 5.09375, "learning_rate": 4.9880474874298626e-05, "loss": 0.8893, "step": 776 }, { "epoch": 0.03416138841015621, "grad_norm": 5.1875, "learning_rate": 4.987979822266171e-05, "loss": 0.8624, "step": 778 }, { "epoch": 0.034249206889359694, "grad_norm": 6.0625, "learning_rate": 4.9879119665723885e-05, "loss": 0.8925, "step": 780 }, { "epoch": 0.03433702536856318, "grad_norm": 5.53125, "learning_rate": 4.9878439203537104e-05, "loss": 0.8488, "step": 782 }, { "epoch": 0.034424843847766666, "grad_norm": 6.46875, "learning_rate": 4.987775683615349e-05, "loss": 0.8915, "step": 784 }, { "epoch": 0.03451266232697015, "grad_norm": 5.09375, "learning_rate": 4.9877072563625285e-05, "loss": 0.8532, "step": 786 }, { "epoch": 0.03460048080617364, "grad_norm": 4.4375, "learning_rate": 4.9876386386004905e-05, "loss": 0.879, "step": 788 }, { "epoch": 0.034688299285377124, "grad_norm": 4.75, "learning_rate": 4.9875698303344896e-05, "loss": 0.8712, "step": 790 }, { "epoch": 0.03477611776458061, "grad_norm": 4.78125, "learning_rate": 4.987500831569795e-05, "loss": 0.8756, "step": 792 }, { "epoch": 0.034863936243784095, "grad_norm": 5.78125, "learning_rate": 4.987431642311689e-05, "loss": 0.8567, "step": 794 }, { "epoch": 0.03495175472298759, "grad_norm": 5.9375, "learning_rate": 4.987362262565474e-05, "loss": 0.887, "step": 796 }, { "epoch": 0.035039573202191074, "grad_norm": 4.9375, "learning_rate": 4.987292692336458e-05, "loss": 0.859, "step": 798 }, { "epoch": 0.03512739168139456, "grad_norm": 6.09375, "learning_rate": 4.9872229316299734e-05, "loss": 0.8759, "step": 800 }, { "epoch": 0.035215210160598046, "grad_norm": 4.90625, "learning_rate": 4.987152980451359e-05, "loss": 0.8624, "step": 802 }, { "epoch": 0.03530302863980153, "grad_norm": 5.90625, "learning_rate": 4.987082838805974e-05, "loss": 0.8837, "step": 804 }, { "epoch": 0.03539084711900502, "grad_norm": 5.0625, "learning_rate": 4.9870125066991894e-05, "loss": 0.8992, "step": 806 }, { "epoch": 0.0354786655982085, "grad_norm": 5.1875, "learning_rate": 4.98694198413639e-05, "loss": 0.849, "step": 808 }, { "epoch": 0.03556648407741199, "grad_norm": 4.875, "learning_rate": 4.986871271122977e-05, "loss": 0.8616, "step": 810 }, { "epoch": 0.035654302556615475, "grad_norm": 6.1875, "learning_rate": 4.986800367664367e-05, "loss": 0.9031, "step": 812 }, { "epoch": 0.03574212103581896, "grad_norm": 4.8125, "learning_rate": 4.986729273765988e-05, "loss": 0.8518, "step": 814 }, { "epoch": 0.035829939515022446, "grad_norm": 4.59375, "learning_rate": 4.9866579894332857e-05, "loss": 0.8672, "step": 816 }, { "epoch": 0.03591775799422593, "grad_norm": 5.25, "learning_rate": 4.9865865146717176e-05, "loss": 0.8661, "step": 818 }, { "epoch": 0.03600557647342942, "grad_norm": 6.84375, "learning_rate": 4.9865148494867584e-05, "loss": 0.8709, "step": 820 }, { "epoch": 0.03609339495263291, "grad_norm": 4.875, "learning_rate": 4.986442993883896e-05, "loss": 0.8915, "step": 822 }, { "epoch": 0.0361812134318364, "grad_norm": 4.53125, "learning_rate": 4.986370947868634e-05, "loss": 0.8497, "step": 824 }, { "epoch": 0.03626903191103988, "grad_norm": 5.46875, "learning_rate": 4.986298711446488e-05, "loss": 0.8722, "step": 826 }, { "epoch": 0.03635685039024337, "grad_norm": 8.0625, "learning_rate": 4.986226284622991e-05, "loss": 0.8664, "step": 828 }, { "epoch": 0.036444668869446854, "grad_norm": 9.0, "learning_rate": 4.9861536674036885e-05, "loss": 0.8541, "step": 830 }, { "epoch": 0.03653248734865034, "grad_norm": 8.3125, "learning_rate": 4.986080859794142e-05, "loss": 0.8752, "step": 832 }, { "epoch": 0.036620305827853826, "grad_norm": 8.8125, "learning_rate": 4.9860078617999284e-05, "loss": 0.8758, "step": 834 }, { "epoch": 0.03670812430705731, "grad_norm": 8.25, "learning_rate": 4.9859346734266365e-05, "loss": 0.8715, "step": 836 }, { "epoch": 0.0367959427862608, "grad_norm": 5.84375, "learning_rate": 4.9858612946798714e-05, "loss": 0.8564, "step": 838 }, { "epoch": 0.036883761265464284, "grad_norm": 5.71875, "learning_rate": 4.985787725565252e-05, "loss": 0.8502, "step": 840 }, { "epoch": 0.03697157974466777, "grad_norm": 5.71875, "learning_rate": 4.985713966088412e-05, "loss": 0.8582, "step": 842 }, { "epoch": 0.037059398223871255, "grad_norm": 6.59375, "learning_rate": 4.985640016255002e-05, "loss": 0.8623, "step": 844 }, { "epoch": 0.03714721670307474, "grad_norm": 5.375, "learning_rate": 4.985565876070683e-05, "loss": 0.8719, "step": 846 }, { "epoch": 0.037235035182278234, "grad_norm": 5.0, "learning_rate": 4.9854915455411334e-05, "loss": 0.9025, "step": 848 }, { "epoch": 0.03732285366148172, "grad_norm": 5.75, "learning_rate": 4.9854170246720456e-05, "loss": 0.8393, "step": 850 }, { "epoch": 0.037410672140685206, "grad_norm": 4.6875, "learning_rate": 4.9853423134691265e-05, "loss": 0.8611, "step": 852 }, { "epoch": 0.03749849061988869, "grad_norm": 6.375, "learning_rate": 4.985267411938097e-05, "loss": 0.8633, "step": 854 }, { "epoch": 0.03758630909909218, "grad_norm": 5.78125, "learning_rate": 4.9851923200846934e-05, "loss": 0.8516, "step": 856 }, { "epoch": 0.03767412757829566, "grad_norm": 6.15625, "learning_rate": 4.985117037914666e-05, "loss": 0.872, "step": 858 }, { "epoch": 0.03776194605749915, "grad_norm": 5.5625, "learning_rate": 4.9850415654337804e-05, "loss": 0.8479, "step": 860 }, { "epoch": 0.037849764536702635, "grad_norm": 6.125, "learning_rate": 4.9849659026478154e-05, "loss": 0.8318, "step": 862 }, { "epoch": 0.03793758301590612, "grad_norm": 5.125, "learning_rate": 4.9848900495625665e-05, "loss": 0.8792, "step": 864 }, { "epoch": 0.038025401495109606, "grad_norm": 5.09375, "learning_rate": 4.9848140061838424e-05, "loss": 0.855, "step": 866 }, { "epoch": 0.03811321997431309, "grad_norm": 4.53125, "learning_rate": 4.984737772517465e-05, "loss": 0.8471, "step": 868 }, { "epoch": 0.03820103845351658, "grad_norm": 6.375, "learning_rate": 4.984661348569274e-05, "loss": 0.8514, "step": 870 }, { "epoch": 0.03828885693272007, "grad_norm": 5.96875, "learning_rate": 4.984584734345121e-05, "loss": 0.842, "step": 872 }, { "epoch": 0.03837667541192356, "grad_norm": 8.4375, "learning_rate": 4.984507929850873e-05, "loss": 0.872, "step": 874 }, { "epoch": 0.03846449389112704, "grad_norm": 8.375, "learning_rate": 4.9844309350924135e-05, "loss": 0.8892, "step": 876 }, { "epoch": 0.03855231237033053, "grad_norm": 6.6875, "learning_rate": 4.9843537500756364e-05, "loss": 0.8866, "step": 878 }, { "epoch": 0.038640130849534014, "grad_norm": 5.03125, "learning_rate": 4.9842763748064536e-05, "loss": 0.8492, "step": 880 }, { "epoch": 0.0387279493287375, "grad_norm": 5.21875, "learning_rate": 4.984198809290791e-05, "loss": 0.8236, "step": 882 }, { "epoch": 0.038815767807940986, "grad_norm": 5.875, "learning_rate": 4.984121053534588e-05, "loss": 0.8429, "step": 884 }, { "epoch": 0.03890358628714447, "grad_norm": 6.03125, "learning_rate": 4.984043107543799e-05, "loss": 0.864, "step": 886 }, { "epoch": 0.03899140476634796, "grad_norm": 5.65625, "learning_rate": 4.983964971324393e-05, "loss": 0.8531, "step": 888 }, { "epoch": 0.039079223245551443, "grad_norm": 6.34375, "learning_rate": 4.983886644882354e-05, "loss": 0.8143, "step": 890 }, { "epoch": 0.03916704172475493, "grad_norm": 6.53125, "learning_rate": 4.9838081282236814e-05, "loss": 0.8478, "step": 892 }, { "epoch": 0.039254860203958415, "grad_norm": 6.96875, "learning_rate": 4.983729421354386e-05, "loss": 0.8864, "step": 894 }, { "epoch": 0.0393426786831619, "grad_norm": 8.5, "learning_rate": 4.9836505242804966e-05, "loss": 0.8228, "step": 896 }, { "epoch": 0.039430497162365394, "grad_norm": 7.21875, "learning_rate": 4.9835714370080546e-05, "loss": 0.8414, "step": 898 }, { "epoch": 0.03951831564156888, "grad_norm": 4.96875, "learning_rate": 4.983492159543116e-05, "loss": 0.8727, "step": 900 }, { "epoch": 0.039606134120772366, "grad_norm": 5.65625, "learning_rate": 4.983412691891753e-05, "loss": 0.8376, "step": 902 }, { "epoch": 0.03969395259997585, "grad_norm": 5.25, "learning_rate": 4.983333034060051e-05, "loss": 0.8527, "step": 904 }, { "epoch": 0.03978177107917934, "grad_norm": 6.1875, "learning_rate": 4.9832531860541096e-05, "loss": 0.8227, "step": 906 }, { "epoch": 0.03986958955838282, "grad_norm": 4.96875, "learning_rate": 4.9831731478800434e-05, "loss": 0.862, "step": 908 }, { "epoch": 0.03995740803758631, "grad_norm": 5.125, "learning_rate": 4.983092919543983e-05, "loss": 0.8069, "step": 910 }, { "epoch": 0.040045226516789795, "grad_norm": 4.03125, "learning_rate": 4.983012501052072e-05, "loss": 0.8215, "step": 912 }, { "epoch": 0.04013304499599328, "grad_norm": 4.0625, "learning_rate": 4.982931892410468e-05, "loss": 0.8391, "step": 914 }, { "epoch": 0.040220863475196766, "grad_norm": 5.5625, "learning_rate": 4.982851093625344e-05, "loss": 0.8268, "step": 916 }, { "epoch": 0.04030868195440025, "grad_norm": 5.25, "learning_rate": 4.982770104702888e-05, "loss": 0.8418, "step": 918 }, { "epoch": 0.04039650043360374, "grad_norm": 4.71875, "learning_rate": 4.9826889256493034e-05, "loss": 0.839, "step": 920 }, { "epoch": 0.04048431891280723, "grad_norm": 4.5625, "learning_rate": 4.9826075564708056e-05, "loss": 0.8414, "step": 922 }, { "epoch": 0.04057213739201072, "grad_norm": 5.4375, "learning_rate": 4.982525997173625e-05, "loss": 0.8294, "step": 924 }, { "epoch": 0.0406599558712142, "grad_norm": 5.75, "learning_rate": 4.982444247764009e-05, "loss": 0.8287, "step": 926 }, { "epoch": 0.04074777435041769, "grad_norm": 5.21875, "learning_rate": 4.982362308248217e-05, "loss": 0.8332, "step": 928 }, { "epoch": 0.040835592829621174, "grad_norm": 3.59375, "learning_rate": 4.9822801786325245e-05, "loss": 0.8231, "step": 930 }, { "epoch": 0.04092341130882466, "grad_norm": 4.34375, "learning_rate": 4.982197858923221e-05, "loss": 0.8413, "step": 932 }, { "epoch": 0.041011229788028146, "grad_norm": 5.4375, "learning_rate": 4.98211534912661e-05, "loss": 0.8083, "step": 934 }, { "epoch": 0.04109904826723163, "grad_norm": 6.21875, "learning_rate": 4.982032649249011e-05, "loss": 0.8205, "step": 936 }, { "epoch": 0.04118686674643512, "grad_norm": 6.15625, "learning_rate": 4.981949759296757e-05, "loss": 0.8354, "step": 938 }, { "epoch": 0.041274685225638603, "grad_norm": 5.90625, "learning_rate": 4.981866679276195e-05, "loss": 0.8274, "step": 940 }, { "epoch": 0.04136250370484209, "grad_norm": 4.9375, "learning_rate": 4.981783409193689e-05, "loss": 0.8363, "step": 942 }, { "epoch": 0.041450322184045575, "grad_norm": 4.65625, "learning_rate": 4.981699949055613e-05, "loss": 0.8658, "step": 944 }, { "epoch": 0.04153814066324906, "grad_norm": 4.4375, "learning_rate": 4.9816162988683604e-05, "loss": 0.8392, "step": 946 }, { "epoch": 0.041625959142452554, "grad_norm": 4.59375, "learning_rate": 4.981532458638337e-05, "loss": 0.8418, "step": 948 }, { "epoch": 0.04171377762165604, "grad_norm": 4.75, "learning_rate": 4.9814484283719634e-05, "loss": 0.8229, "step": 950 }, { "epoch": 0.041801596100859525, "grad_norm": 5.5, "learning_rate": 4.981364208075673e-05, "loss": 0.8107, "step": 952 }, { "epoch": 0.04188941458006301, "grad_norm": 4.90625, "learning_rate": 4.9812797977559176e-05, "loss": 0.8279, "step": 954 }, { "epoch": 0.0419772330592665, "grad_norm": 5.0, "learning_rate": 4.981195197419161e-05, "loss": 0.8215, "step": 956 }, { "epoch": 0.04206505153846998, "grad_norm": 4.46875, "learning_rate": 4.981110407071881e-05, "loss": 0.8302, "step": 958 }, { "epoch": 0.04215287001767347, "grad_norm": 3.78125, "learning_rate": 4.981025426720571e-05, "loss": 0.8162, "step": 960 }, { "epoch": 0.042240688496876955, "grad_norm": 4.15625, "learning_rate": 4.980940256371739e-05, "loss": 0.8318, "step": 962 }, { "epoch": 0.04232850697608044, "grad_norm": 3.78125, "learning_rate": 4.980854896031908e-05, "loss": 0.8177, "step": 964 }, { "epoch": 0.042416325455283926, "grad_norm": 4.125, "learning_rate": 4.9807693457076144e-05, "loss": 0.844, "step": 966 }, { "epoch": 0.04250414393448741, "grad_norm": 4.65625, "learning_rate": 4.980683605405408e-05, "loss": 0.8244, "step": 968 }, { "epoch": 0.0425919624136909, "grad_norm": 4.46875, "learning_rate": 4.980597675131858e-05, "loss": 0.7891, "step": 970 }, { "epoch": 0.042679780892894384, "grad_norm": 3.90625, "learning_rate": 4.980511554893543e-05, "loss": 0.8173, "step": 972 }, { "epoch": 0.04276759937209788, "grad_norm": 4.75, "learning_rate": 4.980425244697059e-05, "loss": 0.8342, "step": 974 }, { "epoch": 0.04285541785130136, "grad_norm": 4.375, "learning_rate": 4.9803387445490144e-05, "loss": 0.8508, "step": 976 }, { "epoch": 0.04294323633050485, "grad_norm": 4.8125, "learning_rate": 4.980252054456035e-05, "loss": 0.8379, "step": 978 }, { "epoch": 0.043031054809708334, "grad_norm": 5.65625, "learning_rate": 4.980165174424759e-05, "loss": 0.8501, "step": 980 }, { "epoch": 0.04311887328891182, "grad_norm": 5.3125, "learning_rate": 4.980078104461838e-05, "loss": 0.8109, "step": 982 }, { "epoch": 0.043206691768115306, "grad_norm": 4.375, "learning_rate": 4.979990844573942e-05, "loss": 0.8135, "step": 984 }, { "epoch": 0.04329451024731879, "grad_norm": 4.4375, "learning_rate": 4.979903394767752e-05, "loss": 0.7713, "step": 986 }, { "epoch": 0.04338232872652228, "grad_norm": 6.03125, "learning_rate": 4.979815755049967e-05, "loss": 0.8369, "step": 988 }, { "epoch": 0.04347014720572576, "grad_norm": 5.09375, "learning_rate": 4.9797279254272956e-05, "loss": 0.8063, "step": 990 }, { "epoch": 0.04355796568492925, "grad_norm": 4.875, "learning_rate": 4.979639905906466e-05, "loss": 0.8432, "step": 992 }, { "epoch": 0.043645784164132735, "grad_norm": 4.0625, "learning_rate": 4.9795516964942175e-05, "loss": 0.8128, "step": 994 }, { "epoch": 0.04373360264333622, "grad_norm": 5.6875, "learning_rate": 4.979463297197306e-05, "loss": 0.7988, "step": 996 }, { "epoch": 0.043821421122539714, "grad_norm": 5.21875, "learning_rate": 4.9793747080225004e-05, "loss": 0.7873, "step": 998 }, { "epoch": 0.0439092396017432, "grad_norm": 4.0, "learning_rate": 4.979285928976586e-05, "loss": 0.8221, "step": 1000 }, { "epoch": 0.043997058080946685, "grad_norm": 4.5, "learning_rate": 4.9791969600663605e-05, "loss": 0.8012, "step": 1002 }, { "epoch": 0.04408487656015017, "grad_norm": 4.5625, "learning_rate": 4.9791078012986375e-05, "loss": 0.8453, "step": 1004 }, { "epoch": 0.04417269503935366, "grad_norm": 4.84375, "learning_rate": 4.9790184526802444e-05, "loss": 0.8158, "step": 1006 }, { "epoch": 0.04426051351855714, "grad_norm": 3.984375, "learning_rate": 4.978928914218025e-05, "loss": 0.8095, "step": 1008 }, { "epoch": 0.04434833199776063, "grad_norm": 4.40625, "learning_rate": 4.9788391859188346e-05, "loss": 0.8196, "step": 1010 }, { "epoch": 0.044436150476964115, "grad_norm": 3.90625, "learning_rate": 4.9787492677895445e-05, "loss": 0.8302, "step": 1012 }, { "epoch": 0.0445239689561676, "grad_norm": 4.40625, "learning_rate": 4.978659159837041e-05, "loss": 0.7852, "step": 1014 }, { "epoch": 0.044611787435371086, "grad_norm": 4.0625, "learning_rate": 4.9785688620682265e-05, "loss": 0.8035, "step": 1016 }, { "epoch": 0.04469960591457457, "grad_norm": 5.0, "learning_rate": 4.978478374490013e-05, "loss": 0.7945, "step": 1018 }, { "epoch": 0.04478742439377806, "grad_norm": 4.21875, "learning_rate": 4.978387697109333e-05, "loss": 0.802, "step": 1020 }, { "epoch": 0.044875242872981544, "grad_norm": 4.875, "learning_rate": 4.978296829933127e-05, "loss": 0.8112, "step": 1022 }, { "epoch": 0.04496306135218504, "grad_norm": 4.84375, "learning_rate": 4.978205772968357e-05, "loss": 0.8123, "step": 1024 }, { "epoch": 0.04505087983138852, "grad_norm": 4.84375, "learning_rate": 4.978114526221994e-05, "loss": 0.8189, "step": 1026 }, { "epoch": 0.04513869831059201, "grad_norm": 4.59375, "learning_rate": 4.978023089701027e-05, "loss": 0.7864, "step": 1028 }, { "epoch": 0.045226516789795494, "grad_norm": 5.0, "learning_rate": 4.977931463412459e-05, "loss": 0.784, "step": 1030 }, { "epoch": 0.04531433526899898, "grad_norm": 4.875, "learning_rate": 4.9778396473633035e-05, "loss": 0.8088, "step": 1032 }, { "epoch": 0.045402153748202466, "grad_norm": 4.40625, "learning_rate": 4.977747641560595e-05, "loss": 0.7992, "step": 1034 }, { "epoch": 0.04548997222740595, "grad_norm": 4.21875, "learning_rate": 4.977655446011378e-05, "loss": 0.7899, "step": 1036 }, { "epoch": 0.04557779070660944, "grad_norm": 4.0625, "learning_rate": 4.9775630607227126e-05, "loss": 0.7699, "step": 1038 }, { "epoch": 0.04566560918581292, "grad_norm": 4.59375, "learning_rate": 4.977470485701674e-05, "loss": 0.8143, "step": 1040 }, { "epoch": 0.04575342766501641, "grad_norm": 4.0625, "learning_rate": 4.9773777209553517e-05, "loss": 0.8162, "step": 1042 }, { "epoch": 0.045841246144219895, "grad_norm": 3.90625, "learning_rate": 4.9772847664908505e-05, "loss": 0.7954, "step": 1044 }, { "epoch": 0.04592906462342338, "grad_norm": 4.125, "learning_rate": 4.977191622315288e-05, "loss": 0.7932, "step": 1046 }, { "epoch": 0.04601688310262687, "grad_norm": 5.375, "learning_rate": 4.977098288435796e-05, "loss": 0.791, "step": 1048 }, { "epoch": 0.04610470158183036, "grad_norm": 4.28125, "learning_rate": 4.977004764859524e-05, "loss": 0.7752, "step": 1050 }, { "epoch": 0.046192520061033845, "grad_norm": 5.34375, "learning_rate": 4.976911051593633e-05, "loss": 0.7977, "step": 1052 }, { "epoch": 0.04628033854023733, "grad_norm": 4.09375, "learning_rate": 4.976817148645301e-05, "loss": 0.7915, "step": 1054 }, { "epoch": 0.04636815701944082, "grad_norm": 5.0625, "learning_rate": 4.976723056021717e-05, "loss": 0.8112, "step": 1056 }, { "epoch": 0.0464559754986443, "grad_norm": 6.65625, "learning_rate": 4.976628773730088e-05, "loss": 0.818, "step": 1058 }, { "epoch": 0.04654379397784779, "grad_norm": 5.25, "learning_rate": 4.976534301777634e-05, "loss": 0.8005, "step": 1060 }, { "epoch": 0.046631612457051275, "grad_norm": 4.28125, "learning_rate": 4.9764396401715895e-05, "loss": 0.8215, "step": 1062 }, { "epoch": 0.04671943093625476, "grad_norm": 4.25, "learning_rate": 4.9763447889192034e-05, "loss": 0.785, "step": 1064 }, { "epoch": 0.046807249415458246, "grad_norm": 4.4375, "learning_rate": 4.97624974802774e-05, "loss": 0.8075, "step": 1066 }, { "epoch": 0.04689506789466173, "grad_norm": 4.84375, "learning_rate": 4.9761545175044764e-05, "loss": 0.8031, "step": 1068 }, { "epoch": 0.04698288637386522, "grad_norm": 3.765625, "learning_rate": 4.976059097356708e-05, "loss": 0.7689, "step": 1070 }, { "epoch": 0.047070704853068704, "grad_norm": 5.125, "learning_rate": 4.975963487591739e-05, "loss": 0.7636, "step": 1072 }, { "epoch": 0.0471585233322722, "grad_norm": 3.921875, "learning_rate": 4.9758676882168934e-05, "loss": 0.7856, "step": 1074 }, { "epoch": 0.04724634181147568, "grad_norm": 5.25, "learning_rate": 4.975771699239505e-05, "loss": 0.7742, "step": 1076 }, { "epoch": 0.04733416029067917, "grad_norm": 5.84375, "learning_rate": 4.975675520666928e-05, "loss": 0.8068, "step": 1078 }, { "epoch": 0.047421978769882654, "grad_norm": 6.96875, "learning_rate": 4.9755791525065266e-05, "loss": 0.8261, "step": 1080 }, { "epoch": 0.04750979724908614, "grad_norm": 7.625, "learning_rate": 4.975482594765679e-05, "loss": 0.8396, "step": 1082 }, { "epoch": 0.047597615728289626, "grad_norm": 5.46875, "learning_rate": 4.9753858474517815e-05, "loss": 0.7966, "step": 1084 }, { "epoch": 0.04768543420749311, "grad_norm": 4.28125, "learning_rate": 4.975288910572242e-05, "loss": 0.7745, "step": 1086 }, { "epoch": 0.0477732526866966, "grad_norm": 4.59375, "learning_rate": 4.975191784134485e-05, "loss": 0.7827, "step": 1088 }, { "epoch": 0.04786107116590008, "grad_norm": 4.4375, "learning_rate": 4.975094468145948e-05, "loss": 0.7728, "step": 1090 }, { "epoch": 0.04794888964510357, "grad_norm": 5.0, "learning_rate": 4.974996962614083e-05, "loss": 0.7792, "step": 1092 }, { "epoch": 0.048036708124307055, "grad_norm": 4.125, "learning_rate": 4.974899267546357e-05, "loss": 0.7991, "step": 1094 }, { "epoch": 0.04812452660351054, "grad_norm": 7.0625, "learning_rate": 4.974801382950252e-05, "loss": 0.7764, "step": 1096 }, { "epoch": 0.04821234508271403, "grad_norm": 8.0, "learning_rate": 4.9747033088332635e-05, "loss": 0.8007, "step": 1098 }, { "epoch": 0.04830016356191752, "grad_norm": 5.84375, "learning_rate": 4.9746050452029023e-05, "loss": 0.8096, "step": 1100 }, { "epoch": 0.048387982041121005, "grad_norm": 6.5625, "learning_rate": 4.974506592066695e-05, "loss": 0.8101, "step": 1102 }, { "epoch": 0.04847580052032449, "grad_norm": 8.8125, "learning_rate": 4.974407949432178e-05, "loss": 0.7943, "step": 1104 }, { "epoch": 0.04856361899952798, "grad_norm": 5.25, "learning_rate": 4.9743091173069075e-05, "loss": 0.8159, "step": 1106 }, { "epoch": 0.04865143747873146, "grad_norm": 4.03125, "learning_rate": 4.974210095698452e-05, "loss": 0.8159, "step": 1108 }, { "epoch": 0.04873925595793495, "grad_norm": 4.9375, "learning_rate": 4.9741108846143934e-05, "loss": 0.7488, "step": 1110 }, { "epoch": 0.048827074437138435, "grad_norm": 6.25, "learning_rate": 4.974011484062331e-05, "loss": 0.7729, "step": 1112 }, { "epoch": 0.04891489291634192, "grad_norm": 5.0, "learning_rate": 4.9739118940498766e-05, "loss": 0.7887, "step": 1114 }, { "epoch": 0.049002711395545406, "grad_norm": 4.40625, "learning_rate": 4.973812114584655e-05, "loss": 0.8019, "step": 1116 }, { "epoch": 0.04909052987474889, "grad_norm": 4.4375, "learning_rate": 4.9737121456743095e-05, "loss": 0.7422, "step": 1118 }, { "epoch": 0.04917834835395238, "grad_norm": 4.59375, "learning_rate": 4.9736119873264946e-05, "loss": 0.7503, "step": 1120 }, { "epoch": 0.049266166833155864, "grad_norm": 4.625, "learning_rate": 4.973511639548881e-05, "loss": 0.7732, "step": 1122 }, { "epoch": 0.04935398531235936, "grad_norm": 46.25, "learning_rate": 4.973411102349153e-05, "loss": 0.8023, "step": 1124 }, { "epoch": 0.04944180379156284, "grad_norm": 4.375, "learning_rate": 4.9733103757350096e-05, "loss": 0.7675, "step": 1126 }, { "epoch": 0.04952962227076633, "grad_norm": 4.65625, "learning_rate": 4.9732094597141654e-05, "loss": 0.8107, "step": 1128 }, { "epoch": 0.049617440749969814, "grad_norm": 4.46875, "learning_rate": 4.973108354294347e-05, "loss": 0.8025, "step": 1130 }, { "epoch": 0.0497052592291733, "grad_norm": 5.625, "learning_rate": 4.973007059483299e-05, "loss": 0.7767, "step": 1132 }, { "epoch": 0.049793077708376786, "grad_norm": 6.5, "learning_rate": 4.9729055752887764e-05, "loss": 0.8024, "step": 1134 }, { "epoch": 0.04988089618758027, "grad_norm": 6.28125, "learning_rate": 4.9728039017185535e-05, "loss": 0.7989, "step": 1136 }, { "epoch": 0.04996871466678376, "grad_norm": 5.71875, "learning_rate": 4.9727020387804136e-05, "loss": 0.7659, "step": 1138 }, { "epoch": 0.05005653314598724, "grad_norm": 5.46875, "learning_rate": 4.972599986482159e-05, "loss": 0.7758, "step": 1140 }, { "epoch": 0.05014435162519073, "grad_norm": 4.65625, "learning_rate": 4.972497744831606e-05, "loss": 0.7691, "step": 1142 }, { "epoch": 0.050232170104394215, "grad_norm": 4.84375, "learning_rate": 4.972395313836582e-05, "loss": 0.7584, "step": 1144 }, { "epoch": 0.0503199885835977, "grad_norm": 3.90625, "learning_rate": 4.9722926935049316e-05, "loss": 0.7792, "step": 1146 }, { "epoch": 0.05040780706280119, "grad_norm": 4.9375, "learning_rate": 4.9721898838445155e-05, "loss": 0.7649, "step": 1148 }, { "epoch": 0.05049562554200468, "grad_norm": 5.1875, "learning_rate": 4.972086884863204e-05, "loss": 0.7919, "step": 1150 }, { "epoch": 0.050583444021208165, "grad_norm": 5.34375, "learning_rate": 4.971983696568888e-05, "loss": 0.7349, "step": 1152 }, { "epoch": 0.05067126250041165, "grad_norm": 4.375, "learning_rate": 4.9718803189694666e-05, "loss": 0.7765, "step": 1154 }, { "epoch": 0.05075908097961514, "grad_norm": 3.703125, "learning_rate": 4.9717767520728585e-05, "loss": 0.7909, "step": 1156 }, { "epoch": 0.05084689945881862, "grad_norm": 5.1875, "learning_rate": 4.971672995886994e-05, "loss": 0.7727, "step": 1158 }, { "epoch": 0.05093471793802211, "grad_norm": 4.53125, "learning_rate": 4.9715690504198186e-05, "loss": 0.7504, "step": 1160 }, { "epoch": 0.051022536417225595, "grad_norm": 5.125, "learning_rate": 4.971464915679293e-05, "loss": 0.7518, "step": 1162 }, { "epoch": 0.05111035489642908, "grad_norm": 4.65625, "learning_rate": 4.971360591673392e-05, "loss": 0.764, "step": 1164 }, { "epoch": 0.051198173375632566, "grad_norm": 5.4375, "learning_rate": 4.971256078410104e-05, "loss": 0.7693, "step": 1166 }, { "epoch": 0.05128599185483605, "grad_norm": 4.53125, "learning_rate": 4.971151375897434e-05, "loss": 0.7753, "step": 1168 }, { "epoch": 0.05137381033403954, "grad_norm": 3.640625, "learning_rate": 4.9710464841433984e-05, "loss": 0.7606, "step": 1170 }, { "epoch": 0.051461628813243024, "grad_norm": 4.0, "learning_rate": 4.9709414031560306e-05, "loss": 0.7781, "step": 1172 }, { "epoch": 0.05154944729244651, "grad_norm": 4.25, "learning_rate": 4.9708361329433787e-05, "loss": 0.7369, "step": 1174 }, { "epoch": 0.05163726577165, "grad_norm": 3.90625, "learning_rate": 4.970730673513503e-05, "loss": 0.7699, "step": 1176 }, { "epoch": 0.05172508425085349, "grad_norm": 5.25, "learning_rate": 4.97062502487448e-05, "loss": 0.778, "step": 1178 }, { "epoch": 0.051812902730056974, "grad_norm": 6.21875, "learning_rate": 4.9705191870344e-05, "loss": 0.7993, "step": 1180 }, { "epoch": 0.05190072120926046, "grad_norm": 6.625, "learning_rate": 4.9704131600013686e-05, "loss": 0.7635, "step": 1182 }, { "epoch": 0.051988539688463946, "grad_norm": 6.40625, "learning_rate": 4.970306943783506e-05, "loss": 0.7772, "step": 1184 }, { "epoch": 0.05207635816766743, "grad_norm": 6.4375, "learning_rate": 4.9702005383889446e-05, "loss": 0.765, "step": 1186 }, { "epoch": 0.05216417664687092, "grad_norm": 7.3125, "learning_rate": 4.9700939438258334e-05, "loss": 0.7375, "step": 1188 }, { "epoch": 0.0522519951260744, "grad_norm": 6.71875, "learning_rate": 4.969987160102337e-05, "loss": 0.7828, "step": 1190 }, { "epoch": 0.05233981360527789, "grad_norm": 6.28125, "learning_rate": 4.969880187226631e-05, "loss": 0.7674, "step": 1192 }, { "epoch": 0.052427632084481375, "grad_norm": 7.34375, "learning_rate": 4.969773025206908e-05, "loss": 0.7823, "step": 1194 }, { "epoch": 0.05251545056368486, "grad_norm": 6.375, "learning_rate": 4.969665674051376e-05, "loss": 0.7578, "step": 1196 }, { "epoch": 0.05260326904288835, "grad_norm": 4.625, "learning_rate": 4.969558133768254e-05, "loss": 0.738, "step": 1198 }, { "epoch": 0.05269108752209184, "grad_norm": 5.09375, "learning_rate": 4.969450404365777e-05, "loss": 0.7709, "step": 1200 }, { "epoch": 0.052778906001295325, "grad_norm": 6.9375, "learning_rate": 4.969342485852197e-05, "loss": 0.7349, "step": 1202 }, { "epoch": 0.05286672448049881, "grad_norm": 3.828125, "learning_rate": 4.969234378235778e-05, "loss": 0.7782, "step": 1204 }, { "epoch": 0.0529545429597023, "grad_norm": 4.0, "learning_rate": 4.969126081524798e-05, "loss": 0.7804, "step": 1206 }, { "epoch": 0.05304236143890578, "grad_norm": 4.375, "learning_rate": 4.969017595727551e-05, "loss": 0.7332, "step": 1208 }, { "epoch": 0.05313017991810927, "grad_norm": 4.65625, "learning_rate": 4.968908920852344e-05, "loss": 0.7742, "step": 1210 }, { "epoch": 0.053217998397312755, "grad_norm": 4.40625, "learning_rate": 4.9688000569075e-05, "loss": 0.7555, "step": 1212 }, { "epoch": 0.05330581687651624, "grad_norm": 3.609375, "learning_rate": 4.9686910039013566e-05, "loss": 0.7543, "step": 1214 }, { "epoch": 0.053393635355719726, "grad_norm": 4.15625, "learning_rate": 4.9685817618422635e-05, "loss": 0.7569, "step": 1216 }, { "epoch": 0.05348145383492321, "grad_norm": 4.25, "learning_rate": 4.968472330738588e-05, "loss": 0.7303, "step": 1218 }, { "epoch": 0.0535692723141267, "grad_norm": 4.0625, "learning_rate": 4.96836271059871e-05, "loss": 0.7597, "step": 1220 }, { "epoch": 0.053657090793330184, "grad_norm": 4.6875, "learning_rate": 4.968252901431023e-05, "loss": 0.7583, "step": 1222 }, { "epoch": 0.05374490927253367, "grad_norm": 4.8125, "learning_rate": 4.968142903243938e-05, "loss": 0.754, "step": 1224 }, { "epoch": 0.05383272775173716, "grad_norm": 4.21875, "learning_rate": 4.968032716045877e-05, "loss": 0.7428, "step": 1226 }, { "epoch": 0.05392054623094065, "grad_norm": 3.46875, "learning_rate": 4.967922339845279e-05, "loss": 0.7675, "step": 1228 }, { "epoch": 0.054008364710144134, "grad_norm": 4.125, "learning_rate": 4.967811774650597e-05, "loss": 0.7386, "step": 1230 }, { "epoch": 0.05409618318934762, "grad_norm": 3.84375, "learning_rate": 4.967701020470298e-05, "loss": 0.7233, "step": 1232 }, { "epoch": 0.054184001668551106, "grad_norm": 3.90625, "learning_rate": 4.967590077312863e-05, "loss": 0.7297, "step": 1234 }, { "epoch": 0.05427182014775459, "grad_norm": 3.84375, "learning_rate": 4.967478945186788e-05, "loss": 0.7155, "step": 1236 }, { "epoch": 0.05435963862695808, "grad_norm": 4.09375, "learning_rate": 4.967367624100584e-05, "loss": 0.7515, "step": 1238 }, { "epoch": 0.05444745710616156, "grad_norm": 3.578125, "learning_rate": 4.967256114062776e-05, "loss": 0.7572, "step": 1240 }, { "epoch": 0.05453527558536505, "grad_norm": 4.53125, "learning_rate": 4.967144415081903e-05, "loss": 0.7531, "step": 1242 }, { "epoch": 0.054623094064568535, "grad_norm": 4.3125, "learning_rate": 4.96703252716652e-05, "loss": 0.7622, "step": 1244 }, { "epoch": 0.05471091254377202, "grad_norm": 3.53125, "learning_rate": 4.966920450325194e-05, "loss": 0.7755, "step": 1246 }, { "epoch": 0.05479873102297551, "grad_norm": 3.9375, "learning_rate": 4.9668081845665085e-05, "loss": 0.726, "step": 1248 }, { "epoch": 0.05488654950217899, "grad_norm": 4.09375, "learning_rate": 4.9666957298990616e-05, "loss": 0.7504, "step": 1250 }, { "epoch": 0.054974367981382485, "grad_norm": 4.375, "learning_rate": 4.9665830863314645e-05, "loss": 0.7783, "step": 1252 }, { "epoch": 0.05506218646058597, "grad_norm": 4.15625, "learning_rate": 4.966470253872343e-05, "loss": 0.7782, "step": 1254 }, { "epoch": 0.05515000493978946, "grad_norm": 3.65625, "learning_rate": 4.9663572325303376e-05, "loss": 0.7509, "step": 1256 }, { "epoch": 0.05523782341899294, "grad_norm": 3.984375, "learning_rate": 4.966244022314105e-05, "loss": 0.7475, "step": 1258 }, { "epoch": 0.05532564189819643, "grad_norm": 4.375, "learning_rate": 4.9661306232323134e-05, "loss": 0.7448, "step": 1260 }, { "epoch": 0.055413460377399915, "grad_norm": 4.375, "learning_rate": 4.966017035293648e-05, "loss": 0.7464, "step": 1262 }, { "epoch": 0.0555012788566034, "grad_norm": 4.125, "learning_rate": 4.965903258506806e-05, "loss": 0.771, "step": 1264 }, { "epoch": 0.055589097335806886, "grad_norm": 4.25, "learning_rate": 4.965789292880502e-05, "loss": 0.7344, "step": 1266 }, { "epoch": 0.05567691581501037, "grad_norm": 3.828125, "learning_rate": 4.965675138423463e-05, "loss": 0.7208, "step": 1268 }, { "epoch": 0.05576473429421386, "grad_norm": 3.625, "learning_rate": 4.9655607951444305e-05, "loss": 0.7481, "step": 1270 }, { "epoch": 0.055852552773417344, "grad_norm": 3.59375, "learning_rate": 4.9654462630521615e-05, "loss": 0.7729, "step": 1272 }, { "epoch": 0.05594037125262083, "grad_norm": 3.609375, "learning_rate": 4.9653315421554266e-05, "loss": 0.7597, "step": 1274 }, { "epoch": 0.05602818973182432, "grad_norm": 4.625, "learning_rate": 4.965216632463011e-05, "loss": 0.7347, "step": 1276 }, { "epoch": 0.05611600821102781, "grad_norm": 4.25, "learning_rate": 4.965101533983715e-05, "loss": 0.7614, "step": 1278 }, { "epoch": 0.056203826690231294, "grad_norm": 4.53125, "learning_rate": 4.9649862467263526e-05, "loss": 0.7808, "step": 1280 }, { "epoch": 0.05629164516943478, "grad_norm": 3.65625, "learning_rate": 4.964870770699752e-05, "loss": 0.7464, "step": 1282 }, { "epoch": 0.056379463648638266, "grad_norm": 4.5625, "learning_rate": 4.964755105912758e-05, "loss": 0.7517, "step": 1284 }, { "epoch": 0.05646728212784175, "grad_norm": 3.96875, "learning_rate": 4.964639252374226e-05, "loss": 0.7742, "step": 1286 }, { "epoch": 0.05655510060704524, "grad_norm": 4.75, "learning_rate": 4.96452321009303e-05, "loss": 0.7392, "step": 1288 }, { "epoch": 0.05664291908624872, "grad_norm": 3.390625, "learning_rate": 4.964406979078056e-05, "loss": 0.7295, "step": 1290 }, { "epoch": 0.05673073756545221, "grad_norm": 3.71875, "learning_rate": 4.964290559338204e-05, "loss": 0.722, "step": 1292 }, { "epoch": 0.056818556044655695, "grad_norm": 4.03125, "learning_rate": 4.96417395088239e-05, "loss": 0.7417, "step": 1294 }, { "epoch": 0.05690637452385918, "grad_norm": 3.28125, "learning_rate": 4.964057153719545e-05, "loss": 0.7169, "step": 1296 }, { "epoch": 0.05699419300306267, "grad_norm": 3.46875, "learning_rate": 4.963940167858613e-05, "loss": 0.7288, "step": 1298 }, { "epoch": 0.05708201148226615, "grad_norm": 4.03125, "learning_rate": 4.963822993308551e-05, "loss": 0.7436, "step": 1300 }, { "epoch": 0.057169829961469645, "grad_norm": 4.5, "learning_rate": 4.9637056300783343e-05, "loss": 0.7619, "step": 1302 }, { "epoch": 0.05725764844067313, "grad_norm": 4.1875, "learning_rate": 4.9635880781769495e-05, "loss": 0.7431, "step": 1304 }, { "epoch": 0.05734546691987662, "grad_norm": 3.875, "learning_rate": 4.963470337613399e-05, "loss": 0.7413, "step": 1306 }, { "epoch": 0.0574332853990801, "grad_norm": 4.3125, "learning_rate": 4.9633524083967e-05, "loss": 0.7325, "step": 1308 }, { "epoch": 0.05752110387828359, "grad_norm": 4.0625, "learning_rate": 4.963234290535883e-05, "loss": 0.7389, "step": 1310 }, { "epoch": 0.057608922357487075, "grad_norm": 4.21875, "learning_rate": 4.9631159840399935e-05, "loss": 0.7306, "step": 1312 }, { "epoch": 0.05769674083669056, "grad_norm": 3.953125, "learning_rate": 4.962997488918091e-05, "loss": 0.7374, "step": 1314 }, { "epoch": 0.057784559315894046, "grad_norm": 4.375, "learning_rate": 4.962878805179251e-05, "loss": 0.7699, "step": 1316 }, { "epoch": 0.05787237779509753, "grad_norm": 5.5625, "learning_rate": 4.9627599328325606e-05, "loss": 0.7359, "step": 1318 }, { "epoch": 0.05796019627430102, "grad_norm": 4.375, "learning_rate": 4.962640871887126e-05, "loss": 0.7165, "step": 1320 }, { "epoch": 0.058048014753504504, "grad_norm": 3.90625, "learning_rate": 4.962521622352061e-05, "loss": 0.7183, "step": 1322 }, { "epoch": 0.05813583323270799, "grad_norm": 4.09375, "learning_rate": 4.962402184236501e-05, "loss": 0.7297, "step": 1324 }, { "epoch": 0.05822365171191148, "grad_norm": 3.328125, "learning_rate": 4.962282557549591e-05, "loss": 0.7111, "step": 1326 }, { "epoch": 0.05831147019111497, "grad_norm": 3.796875, "learning_rate": 4.9621627423004933e-05, "loss": 0.7451, "step": 1328 }, { "epoch": 0.058399288670318454, "grad_norm": 4.03125, "learning_rate": 4.9620427384983824e-05, "loss": 0.7387, "step": 1330 }, { "epoch": 0.05848710714952194, "grad_norm": 4.40625, "learning_rate": 4.9619225461524484e-05, "loss": 0.7362, "step": 1332 }, { "epoch": 0.058574925628725426, "grad_norm": 3.8125, "learning_rate": 4.961802165271895e-05, "loss": 0.7149, "step": 1334 }, { "epoch": 0.05866274410792891, "grad_norm": 4.09375, "learning_rate": 4.9616815958659425e-05, "loss": 0.715, "step": 1336 }, { "epoch": 0.0587505625871324, "grad_norm": 3.75, "learning_rate": 4.961560837943823e-05, "loss": 0.7248, "step": 1338 }, { "epoch": 0.05883838106633588, "grad_norm": 4.09375, "learning_rate": 4.961439891514784e-05, "loss": 0.7568, "step": 1340 }, { "epoch": 0.05892619954553937, "grad_norm": 5.875, "learning_rate": 4.961318756588088e-05, "loss": 0.7527, "step": 1342 }, { "epoch": 0.059014018024742855, "grad_norm": 8.0625, "learning_rate": 4.961197433173012e-05, "loss": 0.7473, "step": 1344 }, { "epoch": 0.05910183650394634, "grad_norm": 9.0, "learning_rate": 4.961075921278846e-05, "loss": 0.7226, "step": 1346 }, { "epoch": 0.05918965498314983, "grad_norm": 5.8125, "learning_rate": 4.960954220914897e-05, "loss": 0.7375, "step": 1348 }, { "epoch": 0.05927747346235331, "grad_norm": 5.1875, "learning_rate": 4.9608323320904836e-05, "loss": 0.7501, "step": 1350 }, { "epoch": 0.059365291941556805, "grad_norm": 5.8125, "learning_rate": 4.9607102548149396e-05, "loss": 0.7003, "step": 1352 }, { "epoch": 0.05945311042076029, "grad_norm": 6.96875, "learning_rate": 4.960587989097615e-05, "loss": 0.7404, "step": 1354 }, { "epoch": 0.05954092889996378, "grad_norm": 5.1875, "learning_rate": 4.9604655349478726e-05, "loss": 0.7491, "step": 1356 }, { "epoch": 0.05962874737916726, "grad_norm": 5.25, "learning_rate": 4.960342892375089e-05, "loss": 0.7291, "step": 1358 }, { "epoch": 0.05971656585837075, "grad_norm": 5.53125, "learning_rate": 4.960220061388657e-05, "loss": 0.7123, "step": 1360 }, { "epoch": 0.059804384337574235, "grad_norm": 5.9375, "learning_rate": 4.960097041997984e-05, "loss": 0.7116, "step": 1362 }, { "epoch": 0.05989220281677772, "grad_norm": 7.65625, "learning_rate": 4.9599738342124884e-05, "loss": 0.7631, "step": 1364 }, { "epoch": 0.059980021295981206, "grad_norm": 3.625, "learning_rate": 4.959850438041608e-05, "loss": 0.7352, "step": 1366 }, { "epoch": 0.06006783977518469, "grad_norm": 3.84375, "learning_rate": 4.9597268534947906e-05, "loss": 0.7008, "step": 1368 }, { "epoch": 0.06015565825438818, "grad_norm": 3.546875, "learning_rate": 4.9596030805815016e-05, "loss": 0.718, "step": 1370 }, { "epoch": 0.060243476733591664, "grad_norm": 4.09375, "learning_rate": 4.9594791193112186e-05, "loss": 0.7109, "step": 1372 }, { "epoch": 0.06033129521279515, "grad_norm": 4.125, "learning_rate": 4.959354969693436e-05, "loss": 0.6931, "step": 1374 }, { "epoch": 0.060419113691998635, "grad_norm": 4.375, "learning_rate": 4.959230631737659e-05, "loss": 0.7214, "step": 1376 }, { "epoch": 0.06050693217120213, "grad_norm": 4.90625, "learning_rate": 4.9591061054534116e-05, "loss": 0.7364, "step": 1378 }, { "epoch": 0.060594750650405614, "grad_norm": 5.96875, "learning_rate": 4.9589813908502284e-05, "loss": 0.7526, "step": 1380 }, { "epoch": 0.0606825691296091, "grad_norm": 6.78125, "learning_rate": 4.958856487937661e-05, "loss": 0.7243, "step": 1382 }, { "epoch": 0.060770387608812586, "grad_norm": 6.59375, "learning_rate": 4.9587313967252755e-05, "loss": 0.7135, "step": 1384 }, { "epoch": 0.06085820608801607, "grad_norm": 8.5, "learning_rate": 4.958606117222649e-05, "loss": 0.7682, "step": 1386 }, { "epoch": 0.06094602456721956, "grad_norm": 4.59375, "learning_rate": 4.958480649439377e-05, "loss": 0.6967, "step": 1388 }, { "epoch": 0.06103384304642304, "grad_norm": 6.46875, "learning_rate": 4.958354993385068e-05, "loss": 0.737, "step": 1390 }, { "epoch": 0.06112166152562653, "grad_norm": 6.875, "learning_rate": 4.9582291490693434e-05, "loss": 0.751, "step": 1392 }, { "epoch": 0.061209480004830015, "grad_norm": 8.0, "learning_rate": 4.958103116501842e-05, "loss": 0.715, "step": 1394 }, { "epoch": 0.0612972984840335, "grad_norm": 4.8125, "learning_rate": 4.9579768956922145e-05, "loss": 0.7409, "step": 1396 }, { "epoch": 0.06138511696323699, "grad_norm": 4.3125, "learning_rate": 4.957850486650127e-05, "loss": 0.7097, "step": 1398 }, { "epoch": 0.06147293544244047, "grad_norm": 3.609375, "learning_rate": 4.957723889385259e-05, "loss": 0.733, "step": 1400 }, { "epoch": 0.061560753921643965, "grad_norm": 4.5, "learning_rate": 4.957597103907309e-05, "loss": 0.7219, "step": 1402 }, { "epoch": 0.06164857240084745, "grad_norm": 4.96875, "learning_rate": 4.957470130225982e-05, "loss": 0.7023, "step": 1404 }, { "epoch": 0.06173639088005094, "grad_norm": 6.0, "learning_rate": 4.957342968351003e-05, "loss": 0.6959, "step": 1406 }, { "epoch": 0.06182420935925442, "grad_norm": 5.875, "learning_rate": 4.957215618292111e-05, "loss": 0.7014, "step": 1408 }, { "epoch": 0.06191202783845791, "grad_norm": 7.25, "learning_rate": 4.957088080059058e-05, "loss": 0.7056, "step": 1410 }, { "epoch": 0.061999846317661395, "grad_norm": 5.8125, "learning_rate": 4.95696035366161e-05, "loss": 0.7327, "step": 1412 }, { "epoch": 0.06208766479686488, "grad_norm": 3.734375, "learning_rate": 4.95683243910955e-05, "loss": 0.6793, "step": 1414 }, { "epoch": 0.062175483276068366, "grad_norm": 3.3125, "learning_rate": 4.956704336412673e-05, "loss": 0.6629, "step": 1416 }, { "epoch": 0.06226330175527185, "grad_norm": 4.15625, "learning_rate": 4.9565760455807887e-05, "loss": 0.7449, "step": 1418 }, { "epoch": 0.06235112023447534, "grad_norm": 4.28125, "learning_rate": 4.956447566623722e-05, "loss": 0.723, "step": 1420 }, { "epoch": 0.062438938713678824, "grad_norm": 4.03125, "learning_rate": 4.956318899551311e-05, "loss": 0.695, "step": 1422 }, { "epoch": 0.06252675719288231, "grad_norm": 4.96875, "learning_rate": 4.956190044373411e-05, "loss": 0.7091, "step": 1424 }, { "epoch": 0.0626145756720858, "grad_norm": 5.46875, "learning_rate": 4.956061001099888e-05, "loss": 0.716, "step": 1426 }, { "epoch": 0.06270239415128928, "grad_norm": 4.6875, "learning_rate": 4.955931769740625e-05, "loss": 0.7345, "step": 1428 }, { "epoch": 0.06279021263049277, "grad_norm": 4.4375, "learning_rate": 4.955802350305518e-05, "loss": 0.7461, "step": 1430 }, { "epoch": 0.06287803110969625, "grad_norm": 5.15625, "learning_rate": 4.955672742804479e-05, "loss": 0.7486, "step": 1432 }, { "epoch": 0.06296584958889974, "grad_norm": 4.53125, "learning_rate": 4.955542947247432e-05, "loss": 0.7167, "step": 1434 }, { "epoch": 0.06305366806810322, "grad_norm": 4.4375, "learning_rate": 4.955412963644318e-05, "loss": 0.6938, "step": 1436 }, { "epoch": 0.06314148654730671, "grad_norm": 4.5, "learning_rate": 4.9552827920050906e-05, "loss": 0.7398, "step": 1438 }, { "epoch": 0.06322930502651021, "grad_norm": 4.3125, "learning_rate": 4.955152432339718e-05, "loss": 0.6898, "step": 1440 }, { "epoch": 0.0633171235057137, "grad_norm": 4.375, "learning_rate": 4.955021884658184e-05, "loss": 0.7295, "step": 1442 }, { "epoch": 0.06340494198491718, "grad_norm": 3.765625, "learning_rate": 4.9548911489704854e-05, "loss": 0.7075, "step": 1444 }, { "epoch": 0.06349276046412067, "grad_norm": 4.75, "learning_rate": 4.9547602252866343e-05, "loss": 0.7071, "step": 1446 }, { "epoch": 0.06358057894332415, "grad_norm": 4.53125, "learning_rate": 4.954629113616656e-05, "loss": 0.6837, "step": 1448 }, { "epoch": 0.06366839742252764, "grad_norm": 4.5, "learning_rate": 4.954497813970592e-05, "loss": 0.7418, "step": 1450 }, { "epoch": 0.06375621590173113, "grad_norm": 3.671875, "learning_rate": 4.9543663263584974e-05, "loss": 0.7308, "step": 1452 }, { "epoch": 0.06384403438093461, "grad_norm": 4.375, "learning_rate": 4.9542346507904415e-05, "loss": 0.7102, "step": 1454 }, { "epoch": 0.0639318528601381, "grad_norm": 3.640625, "learning_rate": 4.954102787276507e-05, "loss": 0.7463, "step": 1456 }, { "epoch": 0.06401967133934158, "grad_norm": 4.6875, "learning_rate": 4.9539707358267935e-05, "loss": 0.7124, "step": 1458 }, { "epoch": 0.06410748981854507, "grad_norm": 4.3125, "learning_rate": 4.9538384964514116e-05, "loss": 0.7378, "step": 1460 }, { "epoch": 0.06419530829774855, "grad_norm": 4.625, "learning_rate": 4.953706069160491e-05, "loss": 0.7251, "step": 1462 }, { "epoch": 0.06428312677695204, "grad_norm": 4.6875, "learning_rate": 4.95357345396417e-05, "loss": 0.7246, "step": 1464 }, { "epoch": 0.06437094525615553, "grad_norm": 4.71875, "learning_rate": 4.9534406508726065e-05, "loss": 0.722, "step": 1466 }, { "epoch": 0.06445876373535901, "grad_norm": 4.65625, "learning_rate": 4.95330765989597e-05, "loss": 0.7198, "step": 1468 }, { "epoch": 0.0645465822145625, "grad_norm": 3.6875, "learning_rate": 4.9531744810444443e-05, "loss": 0.691, "step": 1470 }, { "epoch": 0.06463440069376598, "grad_norm": 4.125, "learning_rate": 4.9530411143282283e-05, "loss": 0.7077, "step": 1472 }, { "epoch": 0.06472221917296947, "grad_norm": 3.796875, "learning_rate": 4.952907559757537e-05, "loss": 0.7558, "step": 1474 }, { "epoch": 0.06481003765217296, "grad_norm": 5.65625, "learning_rate": 4.9527738173425965e-05, "loss": 0.7045, "step": 1476 }, { "epoch": 0.06489785613137644, "grad_norm": 5.96875, "learning_rate": 4.952639887093648e-05, "loss": 0.7126, "step": 1478 }, { "epoch": 0.06498567461057993, "grad_norm": 5.03125, "learning_rate": 4.95250576902095e-05, "loss": 0.6971, "step": 1480 }, { "epoch": 0.06507349308978341, "grad_norm": 4.5, "learning_rate": 4.9523714631347716e-05, "loss": 0.7151, "step": 1482 }, { "epoch": 0.0651613115689869, "grad_norm": 4.0625, "learning_rate": 4.9522369694453996e-05, "loss": 0.7222, "step": 1484 }, { "epoch": 0.06524913004819038, "grad_norm": 4.96875, "learning_rate": 4.9521022879631325e-05, "loss": 0.6965, "step": 1486 }, { "epoch": 0.06533694852739387, "grad_norm": 3.71875, "learning_rate": 4.951967418698284e-05, "loss": 0.6897, "step": 1488 }, { "epoch": 0.06542476700659736, "grad_norm": 3.765625, "learning_rate": 4.951832361661183e-05, "loss": 0.7026, "step": 1490 }, { "epoch": 0.06551258548580086, "grad_norm": 4.90625, "learning_rate": 4.9516971168621716e-05, "loss": 0.702, "step": 1492 }, { "epoch": 0.06560040396500434, "grad_norm": 4.96875, "learning_rate": 4.951561684311608e-05, "loss": 0.698, "step": 1494 }, { "epoch": 0.06568822244420783, "grad_norm": 6.3125, "learning_rate": 4.951426064019862e-05, "loss": 0.7013, "step": 1496 }, { "epoch": 0.06577604092341131, "grad_norm": 4.53125, "learning_rate": 4.951290255997321e-05, "loss": 0.6693, "step": 1498 }, { "epoch": 0.0658638594026148, "grad_norm": 4.375, "learning_rate": 4.9511542602543836e-05, "loss": 0.6918, "step": 1500 }, { "epoch": 0.06595167788181829, "grad_norm": 4.34375, "learning_rate": 4.951018076801467e-05, "loss": 0.6929, "step": 1502 }, { "epoch": 0.06603949636102177, "grad_norm": 3.890625, "learning_rate": 4.950881705648998e-05, "loss": 0.7204, "step": 1504 }, { "epoch": 0.06612731484022526, "grad_norm": 4.15625, "learning_rate": 4.9507451468074194e-05, "loss": 0.7144, "step": 1506 }, { "epoch": 0.06621513331942874, "grad_norm": 3.578125, "learning_rate": 4.950608400287191e-05, "loss": 0.7081, "step": 1508 }, { "epoch": 0.06630295179863223, "grad_norm": 4.03125, "learning_rate": 4.950471466098784e-05, "loss": 0.7034, "step": 1510 }, { "epoch": 0.06639077027783571, "grad_norm": 3.421875, "learning_rate": 4.950334344252684e-05, "loss": 0.6886, "step": 1512 }, { "epoch": 0.0664785887570392, "grad_norm": 3.265625, "learning_rate": 4.950197034759393e-05, "loss": 0.7148, "step": 1514 }, { "epoch": 0.06656640723624269, "grad_norm": 3.421875, "learning_rate": 4.950059537629425e-05, "loss": 0.7137, "step": 1516 }, { "epoch": 0.06665422571544617, "grad_norm": 4.625, "learning_rate": 4.949921852873311e-05, "loss": 0.7077, "step": 1518 }, { "epoch": 0.06674204419464966, "grad_norm": 3.375, "learning_rate": 4.9497839805015945e-05, "loss": 0.7082, "step": 1520 }, { "epoch": 0.06682986267385314, "grad_norm": 3.671875, "learning_rate": 4.9496459205248325e-05, "loss": 0.6821, "step": 1522 }, { "epoch": 0.06691768115305663, "grad_norm": 3.53125, "learning_rate": 4.9495076729535994e-05, "loss": 0.697, "step": 1524 }, { "epoch": 0.06700549963226012, "grad_norm": 4.1875, "learning_rate": 4.9493692377984815e-05, "loss": 0.6875, "step": 1526 }, { "epoch": 0.0670933181114636, "grad_norm": 4.21875, "learning_rate": 4.94923061507008e-05, "loss": 0.6914, "step": 1528 }, { "epoch": 0.06718113659066709, "grad_norm": 4.21875, "learning_rate": 4.9490918047790114e-05, "loss": 0.7216, "step": 1530 }, { "epoch": 0.06726895506987057, "grad_norm": 5.0625, "learning_rate": 4.9489528069359047e-05, "loss": 0.7153, "step": 1532 }, { "epoch": 0.06735677354907406, "grad_norm": 4.875, "learning_rate": 4.9488136215514045e-05, "loss": 0.7029, "step": 1534 }, { "epoch": 0.06744459202827754, "grad_norm": 3.984375, "learning_rate": 4.9486742486361714e-05, "loss": 0.6867, "step": 1536 }, { "epoch": 0.06753241050748103, "grad_norm": 3.5, "learning_rate": 4.9485346882008765e-05, "loss": 0.6959, "step": 1538 }, { "epoch": 0.06762022898668452, "grad_norm": 6.3125, "learning_rate": 4.948394940256209e-05, "loss": 0.7248, "step": 1540 }, { "epoch": 0.06770804746588802, "grad_norm": 5.96875, "learning_rate": 4.948255004812869e-05, "loss": 0.7015, "step": 1542 }, { "epoch": 0.0677958659450915, "grad_norm": 5.5625, "learning_rate": 4.9481148818815746e-05, "loss": 0.7161, "step": 1544 }, { "epoch": 0.06788368442429499, "grad_norm": 5.0, "learning_rate": 4.947974571473055e-05, "loss": 0.7092, "step": 1546 }, { "epoch": 0.06797150290349847, "grad_norm": 3.65625, "learning_rate": 4.9478340735980565e-05, "loss": 0.7008, "step": 1548 }, { "epoch": 0.06805932138270196, "grad_norm": 4.5, "learning_rate": 4.947693388267338e-05, "loss": 0.7142, "step": 1550 }, { "epoch": 0.06814713986190545, "grad_norm": 4.5, "learning_rate": 4.947552515491673e-05, "loss": 0.6736, "step": 1552 }, { "epoch": 0.06823495834110893, "grad_norm": 5.3125, "learning_rate": 4.94741145528185e-05, "loss": 0.664, "step": 1554 }, { "epoch": 0.06832277682031242, "grad_norm": 4.71875, "learning_rate": 4.94727020764867e-05, "loss": 0.7062, "step": 1556 }, { "epoch": 0.0684105952995159, "grad_norm": 3.78125, "learning_rate": 4.947128772602951e-05, "loss": 0.6652, "step": 1558 }, { "epoch": 0.06849841377871939, "grad_norm": 4.15625, "learning_rate": 4.946987150155525e-05, "loss": 0.7125, "step": 1560 }, { "epoch": 0.06858623225792287, "grad_norm": 5.3125, "learning_rate": 4.9468453403172356e-05, "loss": 0.6811, "step": 1562 }, { "epoch": 0.06867405073712636, "grad_norm": 4.28125, "learning_rate": 4.946703343098944e-05, "loss": 0.7141, "step": 1564 }, { "epoch": 0.06876186921632985, "grad_norm": 3.578125, "learning_rate": 4.9465611585115235e-05, "loss": 0.7154, "step": 1566 }, { "epoch": 0.06884968769553333, "grad_norm": 4.0, "learning_rate": 4.946418786565863e-05, "loss": 0.7087, "step": 1568 }, { "epoch": 0.06893750617473682, "grad_norm": 3.640625, "learning_rate": 4.946276227272865e-05, "loss": 0.6681, "step": 1570 }, { "epoch": 0.0690253246539403, "grad_norm": 4.71875, "learning_rate": 4.9461334806434475e-05, "loss": 0.6754, "step": 1572 }, { "epoch": 0.06911314313314379, "grad_norm": 5.8125, "learning_rate": 4.945990546688542e-05, "loss": 0.6888, "step": 1574 }, { "epoch": 0.06920096161234728, "grad_norm": 5.21875, "learning_rate": 4.945847425419094e-05, "loss": 0.6945, "step": 1576 }, { "epoch": 0.06928878009155076, "grad_norm": 4.5625, "learning_rate": 4.945704116846064e-05, "loss": 0.6836, "step": 1578 }, { "epoch": 0.06937659857075425, "grad_norm": 3.828125, "learning_rate": 4.945560620980426e-05, "loss": 0.6874, "step": 1580 }, { "epoch": 0.06946441704995773, "grad_norm": 3.65625, "learning_rate": 4.945416937833169e-05, "loss": 0.6591, "step": 1582 }, { "epoch": 0.06955223552916122, "grad_norm": 3.125, "learning_rate": 4.945273067415298e-05, "loss": 0.68, "step": 1584 }, { "epoch": 0.0696400540083647, "grad_norm": 3.078125, "learning_rate": 4.945129009737828e-05, "loss": 0.6588, "step": 1586 }, { "epoch": 0.06972787248756819, "grad_norm": 3.515625, "learning_rate": 4.944984764811793e-05, "loss": 0.667, "step": 1588 }, { "epoch": 0.06981569096677168, "grad_norm": 4.625, "learning_rate": 4.9448403326482386e-05, "loss": 0.6798, "step": 1590 }, { "epoch": 0.06990350944597518, "grad_norm": 5.28125, "learning_rate": 4.944695713258225e-05, "loss": 0.6867, "step": 1592 }, { "epoch": 0.06999132792517866, "grad_norm": 4.84375, "learning_rate": 4.944550906652828e-05, "loss": 0.6764, "step": 1594 }, { "epoch": 0.07007914640438215, "grad_norm": 6.84375, "learning_rate": 4.944405912843136e-05, "loss": 0.6841, "step": 1596 }, { "epoch": 0.07016696488358563, "grad_norm": 8.1875, "learning_rate": 4.9442607318402543e-05, "loss": 0.7033, "step": 1598 }, { "epoch": 0.07025478336278912, "grad_norm": 4.46875, "learning_rate": 4.944115363655299e-05, "loss": 0.6732, "step": 1600 }, { "epoch": 0.0703426018419926, "grad_norm": 3.78125, "learning_rate": 4.943969808299404e-05, "loss": 0.6968, "step": 1602 }, { "epoch": 0.07043042032119609, "grad_norm": 3.71875, "learning_rate": 4.943824065783714e-05, "loss": 0.6775, "step": 1604 }, { "epoch": 0.07051823880039958, "grad_norm": 4.125, "learning_rate": 4.9436781361193926e-05, "loss": 0.646, "step": 1606 }, { "epoch": 0.07060605727960306, "grad_norm": 6.15625, "learning_rate": 4.943532019317613e-05, "loss": 0.7055, "step": 1608 }, { "epoch": 0.07069387575880655, "grad_norm": 4.8125, "learning_rate": 4.943385715389566e-05, "loss": 0.6727, "step": 1610 }, { "epoch": 0.07078169423801003, "grad_norm": 4.65625, "learning_rate": 4.9432392243464546e-05, "loss": 0.6792, "step": 1612 }, { "epoch": 0.07086951271721352, "grad_norm": 3.921875, "learning_rate": 4.943092546199498e-05, "loss": 0.651, "step": 1614 }, { "epoch": 0.070957331196417, "grad_norm": 4.0625, "learning_rate": 4.9429456809599286e-05, "loss": 0.6823, "step": 1616 }, { "epoch": 0.07104514967562049, "grad_norm": 3.828125, "learning_rate": 4.942798628638994e-05, "loss": 0.6846, "step": 1618 }, { "epoch": 0.07113296815482398, "grad_norm": 3.90625, "learning_rate": 4.942651389247954e-05, "loss": 0.6784, "step": 1620 }, { "epoch": 0.07122078663402746, "grad_norm": 3.28125, "learning_rate": 4.942503962798085e-05, "loss": 0.6826, "step": 1622 }, { "epoch": 0.07130860511323095, "grad_norm": 3.453125, "learning_rate": 4.9423563493006776e-05, "loss": 0.7006, "step": 1624 }, { "epoch": 0.07139642359243444, "grad_norm": 4.1875, "learning_rate": 4.9422085487670344e-05, "loss": 0.6968, "step": 1626 }, { "epoch": 0.07148424207163792, "grad_norm": 4.34375, "learning_rate": 4.942060561208476e-05, "loss": 0.7066, "step": 1628 }, { "epoch": 0.07157206055084141, "grad_norm": 4.8125, "learning_rate": 4.941912386636335e-05, "loss": 0.6673, "step": 1630 }, { "epoch": 0.07165987903004489, "grad_norm": 4.40625, "learning_rate": 4.941764025061957e-05, "loss": 0.6775, "step": 1632 }, { "epoch": 0.07174769750924838, "grad_norm": 4.65625, "learning_rate": 4.9416154764967046e-05, "loss": 0.6848, "step": 1634 }, { "epoch": 0.07183551598845186, "grad_norm": 3.90625, "learning_rate": 4.941466740951954e-05, "loss": 0.6707, "step": 1636 }, { "epoch": 0.07192333446765535, "grad_norm": 3.5, "learning_rate": 4.941317818439095e-05, "loss": 0.6565, "step": 1638 }, { "epoch": 0.07201115294685884, "grad_norm": 3.734375, "learning_rate": 4.941168708969533e-05, "loss": 0.6783, "step": 1640 }, { "epoch": 0.07209897142606234, "grad_norm": 3.71875, "learning_rate": 4.941019412554686e-05, "loss": 0.6587, "step": 1642 }, { "epoch": 0.07218678990526582, "grad_norm": 3.59375, "learning_rate": 4.9408699292059865e-05, "loss": 0.6873, "step": 1644 }, { "epoch": 0.07227460838446931, "grad_norm": 4.25, "learning_rate": 4.940720258934883e-05, "loss": 0.6964, "step": 1646 }, { "epoch": 0.0723624268636728, "grad_norm": 4.4375, "learning_rate": 4.940570401752836e-05, "loss": 0.6669, "step": 1648 }, { "epoch": 0.07245024534287628, "grad_norm": 4.25, "learning_rate": 4.940420357671324e-05, "loss": 0.7105, "step": 1650 }, { "epoch": 0.07253806382207977, "grad_norm": 3.4375, "learning_rate": 4.940270126701836e-05, "loss": 0.6843, "step": 1652 }, { "epoch": 0.07262588230128325, "grad_norm": 3.6875, "learning_rate": 4.940119708855876e-05, "loss": 0.6931, "step": 1654 }, { "epoch": 0.07271370078048674, "grad_norm": 3.96875, "learning_rate": 4.939969104144964e-05, "loss": 0.6722, "step": 1656 }, { "epoch": 0.07280151925969022, "grad_norm": 5.46875, "learning_rate": 4.939818312580633e-05, "loss": 0.708, "step": 1658 }, { "epoch": 0.07288933773889371, "grad_norm": 5.3125, "learning_rate": 4.939667334174431e-05, "loss": 0.7094, "step": 1660 }, { "epoch": 0.0729771562180972, "grad_norm": 4.28125, "learning_rate": 4.93951616893792e-05, "loss": 0.7017, "step": 1662 }, { "epoch": 0.07306497469730068, "grad_norm": 4.5, "learning_rate": 4.939364816882676e-05, "loss": 0.6851, "step": 1664 }, { "epoch": 0.07315279317650417, "grad_norm": 3.640625, "learning_rate": 4.939213278020288e-05, "loss": 0.6595, "step": 1666 }, { "epoch": 0.07324061165570765, "grad_norm": 3.359375, "learning_rate": 4.939061552362364e-05, "loss": 0.6878, "step": 1668 }, { "epoch": 0.07332843013491114, "grad_norm": 3.515625, "learning_rate": 4.938909639920521e-05, "loss": 0.6921, "step": 1670 }, { "epoch": 0.07341624861411462, "grad_norm": 3.59375, "learning_rate": 4.9387575407063936e-05, "loss": 0.6692, "step": 1672 }, { "epoch": 0.07350406709331811, "grad_norm": 3.796875, "learning_rate": 4.938605254731629e-05, "loss": 0.6822, "step": 1674 }, { "epoch": 0.0735918855725216, "grad_norm": 3.984375, "learning_rate": 4.93845278200789e-05, "loss": 0.6804, "step": 1676 }, { "epoch": 0.07367970405172508, "grad_norm": 4.25, "learning_rate": 4.938300122546851e-05, "loss": 0.6923, "step": 1678 }, { "epoch": 0.07376752253092857, "grad_norm": 4.8125, "learning_rate": 4.938147276360205e-05, "loss": 0.6968, "step": 1680 }, { "epoch": 0.07385534101013205, "grad_norm": 4.71875, "learning_rate": 4.937994243459656e-05, "loss": 0.6804, "step": 1682 }, { "epoch": 0.07394315948933554, "grad_norm": 4.21875, "learning_rate": 4.937841023856923e-05, "loss": 0.6759, "step": 1684 }, { "epoch": 0.07403097796853902, "grad_norm": 4.90625, "learning_rate": 4.937687617563741e-05, "loss": 0.6791, "step": 1686 }, { "epoch": 0.07411879644774251, "grad_norm": 4.53125, "learning_rate": 4.937534024591856e-05, "loss": 0.6773, "step": 1688 }, { "epoch": 0.074206614926946, "grad_norm": 5.78125, "learning_rate": 4.9373802449530316e-05, "loss": 0.7157, "step": 1690 }, { "epoch": 0.07429443340614948, "grad_norm": 5.34375, "learning_rate": 4.9372262786590436e-05, "loss": 0.6767, "step": 1692 }, { "epoch": 0.07438225188535298, "grad_norm": 4.15625, "learning_rate": 4.9370721257216824e-05, "loss": 0.6637, "step": 1694 }, { "epoch": 0.07447007036455647, "grad_norm": 4.21875, "learning_rate": 4.936917786152754e-05, "loss": 0.6797, "step": 1696 }, { "epoch": 0.07455788884375995, "grad_norm": 4.21875, "learning_rate": 4.936763259964078e-05, "loss": 0.7028, "step": 1698 }, { "epoch": 0.07464570732296344, "grad_norm": 4.15625, "learning_rate": 4.9366085471674864e-05, "loss": 0.6994, "step": 1700 }, { "epoch": 0.07473352580216693, "grad_norm": 4.1875, "learning_rate": 4.936453647774829e-05, "loss": 0.693, "step": 1702 }, { "epoch": 0.07482134428137041, "grad_norm": 3.9375, "learning_rate": 4.936298561797966e-05, "loss": 0.6734, "step": 1704 }, { "epoch": 0.0749091627605739, "grad_norm": 3.609375, "learning_rate": 4.936143289248776e-05, "loss": 0.681, "step": 1706 }, { "epoch": 0.07499698123977738, "grad_norm": 4.15625, "learning_rate": 4.9359878301391495e-05, "loss": 0.6764, "step": 1708 }, { "epoch": 0.07508479971898087, "grad_norm": 4.625, "learning_rate": 4.93583218448099e-05, "loss": 0.6641, "step": 1710 }, { "epoch": 0.07517261819818435, "grad_norm": 4.0, "learning_rate": 4.935676352286218e-05, "loss": 0.6791, "step": 1712 }, { "epoch": 0.07526043667738784, "grad_norm": 5.03125, "learning_rate": 4.935520333566768e-05, "loss": 0.6714, "step": 1714 }, { "epoch": 0.07534825515659133, "grad_norm": 4.90625, "learning_rate": 4.9353641283345863e-05, "loss": 0.7237, "step": 1716 }, { "epoch": 0.07543607363579481, "grad_norm": 4.46875, "learning_rate": 4.9352077366016355e-05, "loss": 0.6888, "step": 1718 }, { "epoch": 0.0755238921149983, "grad_norm": 4.0, "learning_rate": 4.935051158379893e-05, "loss": 0.6826, "step": 1720 }, { "epoch": 0.07561171059420178, "grad_norm": 3.25, "learning_rate": 4.934894393681349e-05, "loss": 0.666, "step": 1722 }, { "epoch": 0.07569952907340527, "grad_norm": 4.0625, "learning_rate": 4.934737442518009e-05, "loss": 0.6854, "step": 1724 }, { "epoch": 0.07578734755260876, "grad_norm": 3.21875, "learning_rate": 4.9345803049018914e-05, "loss": 0.692, "step": 1726 }, { "epoch": 0.07587516603181224, "grad_norm": 3.59375, "learning_rate": 4.9344229808450305e-05, "loss": 0.6561, "step": 1728 }, { "epoch": 0.07596298451101573, "grad_norm": 4.125, "learning_rate": 4.934265470359474e-05, "loss": 0.6502, "step": 1730 }, { "epoch": 0.07605080299021921, "grad_norm": 3.859375, "learning_rate": 4.934107773457285e-05, "loss": 0.6824, "step": 1732 }, { "epoch": 0.0761386214694227, "grad_norm": 3.625, "learning_rate": 4.933949890150539e-05, "loss": 0.6821, "step": 1734 }, { "epoch": 0.07622643994862618, "grad_norm": 3.90625, "learning_rate": 4.933791820451327e-05, "loss": 0.6656, "step": 1736 }, { "epoch": 0.07631425842782967, "grad_norm": 3.65625, "learning_rate": 4.933633564371753e-05, "loss": 0.6663, "step": 1738 }, { "epoch": 0.07640207690703316, "grad_norm": 3.625, "learning_rate": 4.933475121923938e-05, "loss": 0.6612, "step": 1740 }, { "epoch": 0.07648989538623664, "grad_norm": 3.46875, "learning_rate": 4.933316493120015e-05, "loss": 0.662, "step": 1742 }, { "epoch": 0.07657771386544014, "grad_norm": 4.21875, "learning_rate": 4.9331576779721314e-05, "loss": 0.6569, "step": 1744 }, { "epoch": 0.07666553234464363, "grad_norm": 3.921875, "learning_rate": 4.93299867649245e-05, "loss": 0.6772, "step": 1746 }, { "epoch": 0.07675335082384711, "grad_norm": 3.078125, "learning_rate": 4.9328394886931456e-05, "loss": 0.6619, "step": 1748 }, { "epoch": 0.0768411693030506, "grad_norm": 3.609375, "learning_rate": 4.932680114586411e-05, "loss": 0.6713, "step": 1750 }, { "epoch": 0.07692898778225409, "grad_norm": 3.25, "learning_rate": 4.9325205541844497e-05, "loss": 0.6795, "step": 1752 }, { "epoch": 0.07701680626145757, "grad_norm": 3.359375, "learning_rate": 4.932360807499481e-05, "loss": 0.6586, "step": 1754 }, { "epoch": 0.07710462474066106, "grad_norm": 3.515625, "learning_rate": 4.9322008745437385e-05, "loss": 0.675, "step": 1756 }, { "epoch": 0.07719244321986454, "grad_norm": 3.40625, "learning_rate": 4.932040755329471e-05, "loss": 0.6854, "step": 1758 }, { "epoch": 0.07728026169906803, "grad_norm": 3.28125, "learning_rate": 4.9318804498689384e-05, "loss": 0.694, "step": 1760 }, { "epoch": 0.07736808017827151, "grad_norm": 3.296875, "learning_rate": 4.9317199581744187e-05, "loss": 0.6677, "step": 1762 }, { "epoch": 0.077455898657475, "grad_norm": 3.4375, "learning_rate": 4.931559280258201e-05, "loss": 0.6727, "step": 1764 }, { "epoch": 0.07754371713667849, "grad_norm": 3.953125, "learning_rate": 4.931398416132591e-05, "loss": 0.6514, "step": 1766 }, { "epoch": 0.07763153561588197, "grad_norm": 3.640625, "learning_rate": 4.9312373658099076e-05, "loss": 0.6559, "step": 1768 }, { "epoch": 0.07771935409508546, "grad_norm": 4.21875, "learning_rate": 4.931076129302484e-05, "loss": 0.633, "step": 1770 }, { "epoch": 0.07780717257428894, "grad_norm": 3.625, "learning_rate": 4.930914706622668e-05, "loss": 0.6615, "step": 1772 }, { "epoch": 0.07789499105349243, "grad_norm": 3.75, "learning_rate": 4.93075309778282e-05, "loss": 0.6607, "step": 1774 }, { "epoch": 0.07798280953269592, "grad_norm": 3.40625, "learning_rate": 4.930591302795318e-05, "loss": 0.652, "step": 1776 }, { "epoch": 0.0780706280118994, "grad_norm": 3.40625, "learning_rate": 4.9304293216725505e-05, "loss": 0.7057, "step": 1778 }, { "epoch": 0.07815844649110289, "grad_norm": 3.453125, "learning_rate": 4.930267154426924e-05, "loss": 0.6711, "step": 1780 }, { "epoch": 0.07824626497030637, "grad_norm": 3.796875, "learning_rate": 4.9301048010708556e-05, "loss": 0.6659, "step": 1782 }, { "epoch": 0.07833408344950986, "grad_norm": 3.859375, "learning_rate": 4.929942261616779e-05, "loss": 0.6763, "step": 1784 }, { "epoch": 0.07842190192871334, "grad_norm": 3.328125, "learning_rate": 4.929779536077142e-05, "loss": 0.6799, "step": 1786 }, { "epoch": 0.07850972040791683, "grad_norm": 3.328125, "learning_rate": 4.929616624464405e-05, "loss": 0.6641, "step": 1788 }, { "epoch": 0.07859753888712032, "grad_norm": 3.65625, "learning_rate": 4.9294535267910446e-05, "loss": 0.6557, "step": 1790 }, { "epoch": 0.0786853573663238, "grad_norm": 5.125, "learning_rate": 4.929290243069551e-05, "loss": 0.6742, "step": 1792 }, { "epoch": 0.0787731758455273, "grad_norm": 3.484375, "learning_rate": 4.929126773312428e-05, "loss": 0.6501, "step": 1794 }, { "epoch": 0.07886099432473079, "grad_norm": 3.90625, "learning_rate": 4.928963117532195e-05, "loss": 0.671, "step": 1796 }, { "epoch": 0.07894881280393427, "grad_norm": 3.25, "learning_rate": 4.928799275741384e-05, "loss": 0.6796, "step": 1798 }, { "epoch": 0.07903663128313776, "grad_norm": 3.40625, "learning_rate": 4.928635247952541e-05, "loss": 0.6689, "step": 1800 }, { "epoch": 0.07912444976234125, "grad_norm": 4.71875, "learning_rate": 4.92847103417823e-05, "loss": 0.6507, "step": 1802 }, { "epoch": 0.07921226824154473, "grad_norm": 4.71875, "learning_rate": 4.928306634431025e-05, "loss": 0.6544, "step": 1804 }, { "epoch": 0.07930008672074822, "grad_norm": 4.34375, "learning_rate": 4.9281420487235144e-05, "loss": 0.6738, "step": 1806 }, { "epoch": 0.0793879051999517, "grad_norm": 3.984375, "learning_rate": 4.927977277068305e-05, "loss": 0.6526, "step": 1808 }, { "epoch": 0.07947572367915519, "grad_norm": 3.890625, "learning_rate": 4.9278123194780134e-05, "loss": 0.7137, "step": 1810 }, { "epoch": 0.07956354215835867, "grad_norm": 3.234375, "learning_rate": 4.927647175965272e-05, "loss": 0.6777, "step": 1812 }, { "epoch": 0.07965136063756216, "grad_norm": 3.765625, "learning_rate": 4.9274818465427285e-05, "loss": 0.6403, "step": 1814 }, { "epoch": 0.07973917911676565, "grad_norm": 3.828125, "learning_rate": 4.927316331223043e-05, "loss": 0.6577, "step": 1816 }, { "epoch": 0.07982699759596913, "grad_norm": 4.25, "learning_rate": 4.927150630018891e-05, "loss": 0.646, "step": 1818 }, { "epoch": 0.07991481607517262, "grad_norm": 3.375, "learning_rate": 4.926984742942961e-05, "loss": 0.6679, "step": 1820 }, { "epoch": 0.0800026345543761, "grad_norm": 3.5625, "learning_rate": 4.9268186700079594e-05, "loss": 0.6644, "step": 1822 }, { "epoch": 0.08009045303357959, "grad_norm": 3.53125, "learning_rate": 4.926652411226601e-05, "loss": 0.6556, "step": 1824 }, { "epoch": 0.08017827151278308, "grad_norm": 3.578125, "learning_rate": 4.92648596661162e-05, "loss": 0.6853, "step": 1826 }, { "epoch": 0.08026608999198656, "grad_norm": 4.40625, "learning_rate": 4.926319336175762e-05, "loss": 0.6583, "step": 1828 }, { "epoch": 0.08035390847119005, "grad_norm": 4.21875, "learning_rate": 4.926152519931787e-05, "loss": 0.6808, "step": 1830 }, { "epoch": 0.08044172695039353, "grad_norm": 4.28125, "learning_rate": 4.925985517892471e-05, "loss": 0.7052, "step": 1832 }, { "epoch": 0.08052954542959702, "grad_norm": 3.328125, "learning_rate": 4.9258183300706016e-05, "loss": 0.6652, "step": 1834 }, { "epoch": 0.0806173639088005, "grad_norm": 3.3125, "learning_rate": 4.9256509564789836e-05, "loss": 0.6473, "step": 1836 }, { "epoch": 0.08070518238800399, "grad_norm": 3.265625, "learning_rate": 4.9254833971304334e-05, "loss": 0.6697, "step": 1838 }, { "epoch": 0.08079300086720748, "grad_norm": 4.375, "learning_rate": 4.925315652037784e-05, "loss": 0.6887, "step": 1840 }, { "epoch": 0.08088081934641096, "grad_norm": 3.890625, "learning_rate": 4.925147721213881e-05, "loss": 0.652, "step": 1842 }, { "epoch": 0.08096863782561446, "grad_norm": 4.125, "learning_rate": 4.924979604671583e-05, "loss": 0.6581, "step": 1844 }, { "epoch": 0.08105645630481795, "grad_norm": 3.5, "learning_rate": 4.924811302423766e-05, "loss": 0.6593, "step": 1846 }, { "epoch": 0.08114427478402143, "grad_norm": 5.65625, "learning_rate": 4.924642814483318e-05, "loss": 0.6676, "step": 1848 }, { "epoch": 0.08123209326322492, "grad_norm": 4.8125, "learning_rate": 4.924474140863142e-05, "loss": 0.6476, "step": 1850 }, { "epoch": 0.0813199117424284, "grad_norm": 3.953125, "learning_rate": 4.924305281576156e-05, "loss": 0.6434, "step": 1852 }, { "epoch": 0.08140773022163189, "grad_norm": 4.09375, "learning_rate": 4.924136236635289e-05, "loss": 0.647, "step": 1854 }, { "epoch": 0.08149554870083538, "grad_norm": 4.75, "learning_rate": 4.923967006053489e-05, "loss": 0.6646, "step": 1856 }, { "epoch": 0.08158336718003886, "grad_norm": 4.34375, "learning_rate": 4.9237975898437144e-05, "loss": 0.6349, "step": 1858 }, { "epoch": 0.08167118565924235, "grad_norm": 4.09375, "learning_rate": 4.923627988018939e-05, "loss": 0.6667, "step": 1860 }, { "epoch": 0.08175900413844583, "grad_norm": 4.0, "learning_rate": 4.9234582005921514e-05, "loss": 0.6273, "step": 1862 }, { "epoch": 0.08184682261764932, "grad_norm": 3.359375, "learning_rate": 4.923288227576354e-05, "loss": 0.6516, "step": 1864 }, { "epoch": 0.0819346410968528, "grad_norm": 3.265625, "learning_rate": 4.923118068984564e-05, "loss": 0.6475, "step": 1866 }, { "epoch": 0.08202245957605629, "grad_norm": 3.171875, "learning_rate": 4.92294772482981e-05, "loss": 0.6575, "step": 1868 }, { "epoch": 0.08211027805525978, "grad_norm": 3.375, "learning_rate": 4.922777195125139e-05, "loss": 0.6461, "step": 1870 }, { "epoch": 0.08219809653446326, "grad_norm": 3.46875, "learning_rate": 4.922606479883609e-05, "loss": 0.664, "step": 1872 }, { "epoch": 0.08228591501366675, "grad_norm": 4.125, "learning_rate": 4.9224355791182955e-05, "loss": 0.6572, "step": 1874 }, { "epoch": 0.08237373349287024, "grad_norm": 4.28125, "learning_rate": 4.922264492842283e-05, "loss": 0.6558, "step": 1876 }, { "epoch": 0.08246155197207372, "grad_norm": 3.65625, "learning_rate": 4.922093221068676e-05, "loss": 0.6248, "step": 1878 }, { "epoch": 0.08254937045127721, "grad_norm": 3.34375, "learning_rate": 4.92192176381059e-05, "loss": 0.6539, "step": 1880 }, { "epoch": 0.08263718893048069, "grad_norm": 3.03125, "learning_rate": 4.9217501210811536e-05, "loss": 0.662, "step": 1882 }, { "epoch": 0.08272500740968418, "grad_norm": 3.734375, "learning_rate": 4.9215782928935126e-05, "loss": 0.6361, "step": 1884 }, { "epoch": 0.08281282588888766, "grad_norm": 3.875, "learning_rate": 4.921406279260826e-05, "loss": 0.6498, "step": 1886 }, { "epoch": 0.08290064436809115, "grad_norm": 3.078125, "learning_rate": 4.9212340801962655e-05, "loss": 0.6539, "step": 1888 }, { "epoch": 0.08298846284729464, "grad_norm": 3.75, "learning_rate": 4.9210616957130185e-05, "loss": 0.6717, "step": 1890 }, { "epoch": 0.08307628132649812, "grad_norm": 3.3125, "learning_rate": 4.9208891258242874e-05, "loss": 0.6808, "step": 1892 }, { "epoch": 0.08316409980570161, "grad_norm": 3.59375, "learning_rate": 4.9207163705432855e-05, "loss": 0.6205, "step": 1894 }, { "epoch": 0.08325191828490511, "grad_norm": 3.28125, "learning_rate": 4.920543429883245e-05, "loss": 0.6618, "step": 1896 }, { "epoch": 0.0833397367641086, "grad_norm": 3.34375, "learning_rate": 4.9203703038574076e-05, "loss": 0.6543, "step": 1898 }, { "epoch": 0.08342755524331208, "grad_norm": 3.40625, "learning_rate": 4.9201969924790324e-05, "loss": 0.6634, "step": 1900 }, { "epoch": 0.08351537372251557, "grad_norm": 4.28125, "learning_rate": 4.9200234957613915e-05, "loss": 0.6681, "step": 1902 }, { "epoch": 0.08360319220171905, "grad_norm": 5.09375, "learning_rate": 4.9198498137177705e-05, "loss": 0.6415, "step": 1904 }, { "epoch": 0.08369101068092254, "grad_norm": 5.5625, "learning_rate": 4.919675946361472e-05, "loss": 0.6455, "step": 1906 }, { "epoch": 0.08377882916012602, "grad_norm": 7.09375, "learning_rate": 4.919501893705808e-05, "loss": 0.6539, "step": 1908 }, { "epoch": 0.08386664763932951, "grad_norm": 6.65625, "learning_rate": 4.91932765576411e-05, "loss": 0.669, "step": 1910 }, { "epoch": 0.083954466118533, "grad_norm": 5.625, "learning_rate": 4.91915323254972e-05, "loss": 0.6528, "step": 1912 }, { "epoch": 0.08404228459773648, "grad_norm": 3.5625, "learning_rate": 4.918978624075995e-05, "loss": 0.6532, "step": 1914 }, { "epoch": 0.08413010307693997, "grad_norm": 3.0625, "learning_rate": 4.918803830356308e-05, "loss": 0.6647, "step": 1916 }, { "epoch": 0.08421792155614345, "grad_norm": 3.8125, "learning_rate": 4.918628851404043e-05, "loss": 0.7015, "step": 1918 }, { "epoch": 0.08430574003534694, "grad_norm": 3.9375, "learning_rate": 4.918453687232601e-05, "loss": 0.6318, "step": 1920 }, { "epoch": 0.08439355851455042, "grad_norm": 3.9375, "learning_rate": 4.918278337855396e-05, "loss": 0.6575, "step": 1922 }, { "epoch": 0.08448137699375391, "grad_norm": 5.21875, "learning_rate": 4.918102803285856e-05, "loss": 0.6639, "step": 1924 }, { "epoch": 0.0845691954729574, "grad_norm": 3.875, "learning_rate": 4.917927083537423e-05, "loss": 0.6426, "step": 1926 }, { "epoch": 0.08465701395216088, "grad_norm": 3.265625, "learning_rate": 4.9177511786235556e-05, "loss": 0.6318, "step": 1928 }, { "epoch": 0.08474483243136437, "grad_norm": 3.09375, "learning_rate": 4.917575088557723e-05, "loss": 0.6358, "step": 1930 }, { "epoch": 0.08483265091056785, "grad_norm": 3.5, "learning_rate": 4.917398813353411e-05, "loss": 0.6354, "step": 1932 }, { "epoch": 0.08492046938977134, "grad_norm": 4.5625, "learning_rate": 4.917222353024118e-05, "loss": 0.6469, "step": 1934 }, { "epoch": 0.08500828786897482, "grad_norm": 4.28125, "learning_rate": 4.9170457075833574e-05, "loss": 0.6359, "step": 1936 }, { "epoch": 0.08509610634817831, "grad_norm": 4.15625, "learning_rate": 4.916868877044657e-05, "loss": 0.6663, "step": 1938 }, { "epoch": 0.0851839248273818, "grad_norm": 5.78125, "learning_rate": 4.916691861421559e-05, "loss": 0.6841, "step": 1940 }, { "epoch": 0.08527174330658528, "grad_norm": 4.625, "learning_rate": 4.916514660727619e-05, "loss": 0.6488, "step": 1942 }, { "epoch": 0.08535956178578877, "grad_norm": 4.3125, "learning_rate": 4.916337274976407e-05, "loss": 0.6324, "step": 1944 }, { "epoch": 0.08544738026499227, "grad_norm": 3.84375, "learning_rate": 4.9161597041815075e-05, "loss": 0.6443, "step": 1946 }, { "epoch": 0.08553519874419575, "grad_norm": 4.03125, "learning_rate": 4.9159819483565175e-05, "loss": 0.6394, "step": 1948 }, { "epoch": 0.08562301722339924, "grad_norm": 3.96875, "learning_rate": 4.915804007515052e-05, "loss": 0.6319, "step": 1950 }, { "epoch": 0.08571083570260273, "grad_norm": 3.984375, "learning_rate": 4.915625881670736e-05, "loss": 0.6318, "step": 1952 }, { "epoch": 0.08579865418180621, "grad_norm": 3.4375, "learning_rate": 4.915447570837211e-05, "loss": 0.6436, "step": 1954 }, { "epoch": 0.0858864726610097, "grad_norm": 3.53125, "learning_rate": 4.9152690750281314e-05, "loss": 0.6554, "step": 1956 }, { "epoch": 0.08597429114021318, "grad_norm": 3.4375, "learning_rate": 4.915090394257168e-05, "loss": 0.6466, "step": 1958 }, { "epoch": 0.08606210961941667, "grad_norm": 3.046875, "learning_rate": 4.914911528538003e-05, "loss": 0.6518, "step": 1960 }, { "epoch": 0.08614992809862015, "grad_norm": 3.46875, "learning_rate": 4.914732477884334e-05, "loss": 0.6549, "step": 1962 }, { "epoch": 0.08623774657782364, "grad_norm": 4.65625, "learning_rate": 4.914553242309873e-05, "loss": 0.6223, "step": 1964 }, { "epoch": 0.08632556505702713, "grad_norm": 3.953125, "learning_rate": 4.9143738218283466e-05, "loss": 0.6593, "step": 1966 }, { "epoch": 0.08641338353623061, "grad_norm": 3.25, "learning_rate": 4.9141942164534936e-05, "loss": 0.6879, "step": 1968 }, { "epoch": 0.0865012020154341, "grad_norm": 4.0625, "learning_rate": 4.9140144261990687e-05, "loss": 0.6215, "step": 1970 }, { "epoch": 0.08658902049463758, "grad_norm": 3.5625, "learning_rate": 4.91383445107884e-05, "loss": 0.6731, "step": 1972 }, { "epoch": 0.08667683897384107, "grad_norm": 3.125, "learning_rate": 4.913654291106591e-05, "loss": 0.6251, "step": 1974 }, { "epoch": 0.08676465745304456, "grad_norm": 3.25, "learning_rate": 4.9134739462961174e-05, "loss": 0.6461, "step": 1976 }, { "epoch": 0.08685247593224804, "grad_norm": 3.5, "learning_rate": 4.913293416661231e-05, "loss": 0.6691, "step": 1978 }, { "epoch": 0.08694029441145153, "grad_norm": 3.125, "learning_rate": 4.913112702215756e-05, "loss": 0.6742, "step": 1980 }, { "epoch": 0.08702811289065501, "grad_norm": 4.4375, "learning_rate": 4.9129318029735315e-05, "loss": 0.6441, "step": 1982 }, { "epoch": 0.0871159313698585, "grad_norm": 4.0625, "learning_rate": 4.912750718948411e-05, "loss": 0.6627, "step": 1984 }, { "epoch": 0.08720374984906198, "grad_norm": 4.96875, "learning_rate": 4.912569450154263e-05, "loss": 0.667, "step": 1986 }, { "epoch": 0.08729156832826547, "grad_norm": 4.75, "learning_rate": 4.912387996604968e-05, "loss": 0.6616, "step": 1988 }, { "epoch": 0.08737938680746896, "grad_norm": 4.90625, "learning_rate": 4.9122063583144204e-05, "loss": 0.6596, "step": 1990 }, { "epoch": 0.08746720528667244, "grad_norm": 4.9375, "learning_rate": 4.912024535296533e-05, "loss": 0.6549, "step": 1992 }, { "epoch": 0.08755502376587593, "grad_norm": 3.3125, "learning_rate": 4.9118425275652286e-05, "loss": 0.6287, "step": 1994 }, { "epoch": 0.08764284224507943, "grad_norm": 3.78125, "learning_rate": 4.911660335134445e-05, "loss": 0.6587, "step": 1996 }, { "epoch": 0.08773066072428291, "grad_norm": 3.09375, "learning_rate": 4.9114779580181345e-05, "loss": 0.6227, "step": 1998 }, { "epoch": 0.0878184792034864, "grad_norm": 3.921875, "learning_rate": 4.9112953962302646e-05, "loss": 0.6558, "step": 2000 }, { "epoch": 0.08790629768268989, "grad_norm": 4.65625, "learning_rate": 4.9111126497848144e-05, "loss": 0.6681, "step": 2002 }, { "epoch": 0.08799411616189337, "grad_norm": 5.34375, "learning_rate": 4.9109297186957796e-05, "loss": 0.6741, "step": 2004 }, { "epoch": 0.08808193464109686, "grad_norm": 3.90625, "learning_rate": 4.91074660297717e-05, "loss": 0.6307, "step": 2006 }, { "epoch": 0.08816975312030034, "grad_norm": 4.3125, "learning_rate": 4.910563302643007e-05, "loss": 0.6463, "step": 2008 }, { "epoch": 0.08825757159950383, "grad_norm": 3.984375, "learning_rate": 4.910379817707328e-05, "loss": 0.6463, "step": 2010 }, { "epoch": 0.08834539007870731, "grad_norm": 3.59375, "learning_rate": 4.910196148184185e-05, "loss": 0.6553, "step": 2012 }, { "epoch": 0.0884332085579108, "grad_norm": 3.0625, "learning_rate": 4.9100122940876433e-05, "loss": 0.6324, "step": 2014 }, { "epoch": 0.08852102703711429, "grad_norm": 3.265625, "learning_rate": 4.9098282554317823e-05, "loss": 0.6297, "step": 2016 }, { "epoch": 0.08860884551631777, "grad_norm": 3.765625, "learning_rate": 4.9096440322306956e-05, "loss": 0.6239, "step": 2018 }, { "epoch": 0.08869666399552126, "grad_norm": 3.140625, "learning_rate": 4.909459624498491e-05, "loss": 0.6589, "step": 2020 }, { "epoch": 0.08878448247472474, "grad_norm": 3.734375, "learning_rate": 4.909275032249292e-05, "loss": 0.6233, "step": 2022 }, { "epoch": 0.08887230095392823, "grad_norm": 3.484375, "learning_rate": 4.909090255497233e-05, "loss": 0.6599, "step": 2024 }, { "epoch": 0.08896011943313172, "grad_norm": 3.28125, "learning_rate": 4.908905294256464e-05, "loss": 0.652, "step": 2026 }, { "epoch": 0.0890479379123352, "grad_norm": 3.515625, "learning_rate": 4.908720148541152e-05, "loss": 0.632, "step": 2028 }, { "epoch": 0.08913575639153869, "grad_norm": 3.484375, "learning_rate": 4.9085348183654714e-05, "loss": 0.6419, "step": 2030 }, { "epoch": 0.08922357487074217, "grad_norm": 3.15625, "learning_rate": 4.908349303743618e-05, "loss": 0.6383, "step": 2032 }, { "epoch": 0.08931139334994566, "grad_norm": 3.203125, "learning_rate": 4.908163604689798e-05, "loss": 0.6406, "step": 2034 }, { "epoch": 0.08939921182914914, "grad_norm": 3.09375, "learning_rate": 4.907977721218231e-05, "loss": 0.6367, "step": 2036 }, { "epoch": 0.08948703030835263, "grad_norm": 3.765625, "learning_rate": 4.907791653343153e-05, "loss": 0.6365, "step": 2038 }, { "epoch": 0.08957484878755612, "grad_norm": 3.0, "learning_rate": 4.907605401078814e-05, "loss": 0.6033, "step": 2040 }, { "epoch": 0.0896626672667596, "grad_norm": 3.28125, "learning_rate": 4.907418964439475e-05, "loss": 0.6359, "step": 2042 }, { "epoch": 0.08975048574596309, "grad_norm": 3.296875, "learning_rate": 4.907232343439415e-05, "loss": 0.6408, "step": 2044 }, { "epoch": 0.08983830422516659, "grad_norm": 2.9375, "learning_rate": 4.907045538092926e-05, "loss": 0.6232, "step": 2046 }, { "epoch": 0.08992612270437007, "grad_norm": 3.171875, "learning_rate": 4.906858548414311e-05, "loss": 0.6624, "step": 2048 }, { "epoch": 0.09001394118357356, "grad_norm": 3.5625, "learning_rate": 4.906671374417893e-05, "loss": 0.6527, "step": 2050 }, { "epoch": 0.09010175966277705, "grad_norm": 4.875, "learning_rate": 4.906484016118004e-05, "loss": 0.6559, "step": 2052 }, { "epoch": 0.09018957814198053, "grad_norm": 3.453125, "learning_rate": 4.906296473528991e-05, "loss": 0.6431, "step": 2054 }, { "epoch": 0.09027739662118402, "grad_norm": 3.609375, "learning_rate": 4.9061087466652183e-05, "loss": 0.6458, "step": 2056 }, { "epoch": 0.0903652151003875, "grad_norm": 3.015625, "learning_rate": 4.905920835541061e-05, "loss": 0.6394, "step": 2058 }, { "epoch": 0.09045303357959099, "grad_norm": 3.90625, "learning_rate": 4.9057327401709084e-05, "loss": 0.6526, "step": 2060 }, { "epoch": 0.09054085205879447, "grad_norm": 4.71875, "learning_rate": 4.905544460569167e-05, "loss": 0.668, "step": 2062 }, { "epoch": 0.09062867053799796, "grad_norm": 3.546875, "learning_rate": 4.9053559967502535e-05, "loss": 0.6496, "step": 2064 }, { "epoch": 0.09071648901720145, "grad_norm": 2.921875, "learning_rate": 4.905167348728601e-05, "loss": 0.6582, "step": 2066 }, { "epoch": 0.09080430749640493, "grad_norm": 3.3125, "learning_rate": 4.904978516518657e-05, "loss": 0.6153, "step": 2068 }, { "epoch": 0.09089212597560842, "grad_norm": 4.65625, "learning_rate": 4.904789500134881e-05, "loss": 0.6161, "step": 2070 }, { "epoch": 0.0909799444548119, "grad_norm": 6.375, "learning_rate": 4.90460029959175e-05, "loss": 0.6207, "step": 2072 }, { "epoch": 0.09106776293401539, "grad_norm": 5.5, "learning_rate": 4.90441091490375e-05, "loss": 0.6453, "step": 2074 }, { "epoch": 0.09115558141321888, "grad_norm": 5.8125, "learning_rate": 4.904221346085387e-05, "loss": 0.6337, "step": 2076 }, { "epoch": 0.09124339989242236, "grad_norm": 4.8125, "learning_rate": 4.904031593151176e-05, "loss": 0.6836, "step": 2078 }, { "epoch": 0.09133121837162585, "grad_norm": 3.25, "learning_rate": 4.90384165611565e-05, "loss": 0.6126, "step": 2080 }, { "epoch": 0.09141903685082933, "grad_norm": 3.546875, "learning_rate": 4.9036515349933534e-05, "loss": 0.6287, "step": 2082 }, { "epoch": 0.09150685533003282, "grad_norm": 3.015625, "learning_rate": 4.903461229798846e-05, "loss": 0.6347, "step": 2084 }, { "epoch": 0.0915946738092363, "grad_norm": 3.640625, "learning_rate": 4.903270740546701e-05, "loss": 0.6548, "step": 2086 }, { "epoch": 0.09168249228843979, "grad_norm": 3.796875, "learning_rate": 4.9030800672515075e-05, "loss": 0.639, "step": 2088 }, { "epoch": 0.09177031076764328, "grad_norm": 2.890625, "learning_rate": 4.902889209927866e-05, "loss": 0.6299, "step": 2090 }, { "epoch": 0.09185812924684676, "grad_norm": 3.34375, "learning_rate": 4.902698168590393e-05, "loss": 0.6132, "step": 2092 }, { "epoch": 0.09194594772605025, "grad_norm": 3.234375, "learning_rate": 4.902506943253717e-05, "loss": 0.6099, "step": 2094 }, { "epoch": 0.09203376620525373, "grad_norm": 3.4375, "learning_rate": 4.902315533932485e-05, "loss": 0.6253, "step": 2096 }, { "epoch": 0.09212158468445723, "grad_norm": 3.0625, "learning_rate": 4.9021239406413534e-05, "loss": 0.6292, "step": 2098 }, { "epoch": 0.09220940316366072, "grad_norm": 4.09375, "learning_rate": 4.901932163394994e-05, "loss": 0.6073, "step": 2100 }, { "epoch": 0.0922972216428642, "grad_norm": 3.84375, "learning_rate": 4.901740202208094e-05, "loss": 0.6577, "step": 2102 }, { "epoch": 0.09238504012206769, "grad_norm": 3.3125, "learning_rate": 4.901548057095353e-05, "loss": 0.6495, "step": 2104 }, { "epoch": 0.09247285860127118, "grad_norm": 3.40625, "learning_rate": 4.9013557280714874e-05, "loss": 0.6435, "step": 2106 }, { "epoch": 0.09256067708047466, "grad_norm": 3.484375, "learning_rate": 4.901163215151223e-05, "loss": 0.6131, "step": 2108 }, { "epoch": 0.09264849555967815, "grad_norm": 3.34375, "learning_rate": 4.900970518349305e-05, "loss": 0.6703, "step": 2110 }, { "epoch": 0.09273631403888163, "grad_norm": 2.828125, "learning_rate": 4.900777637680489e-05, "loss": 0.6473, "step": 2112 }, { "epoch": 0.09282413251808512, "grad_norm": 3.015625, "learning_rate": 4.9005845731595456e-05, "loss": 0.6411, "step": 2114 }, { "epoch": 0.0929119509972886, "grad_norm": 3.34375, "learning_rate": 4.9003913248012605e-05, "loss": 0.6409, "step": 2116 }, { "epoch": 0.09299976947649209, "grad_norm": 3.03125, "learning_rate": 4.900197892620432e-05, "loss": 0.6326, "step": 2118 }, { "epoch": 0.09308758795569558, "grad_norm": 3.0625, "learning_rate": 4.9000042766318744e-05, "loss": 0.617, "step": 2120 }, { "epoch": 0.09317540643489906, "grad_norm": 4.28125, "learning_rate": 4.899810476850413e-05, "loss": 0.627, "step": 2122 }, { "epoch": 0.09326322491410255, "grad_norm": 3.171875, "learning_rate": 4.899616493290891e-05, "loss": 0.6311, "step": 2124 }, { "epoch": 0.09335104339330604, "grad_norm": 3.0625, "learning_rate": 4.8994223259681615e-05, "loss": 0.6436, "step": 2126 }, { "epoch": 0.09343886187250952, "grad_norm": 3.4375, "learning_rate": 4.899227974897095e-05, "loss": 0.6159, "step": 2128 }, { "epoch": 0.093526680351713, "grad_norm": 3.515625, "learning_rate": 4.899033440092576e-05, "loss": 0.6209, "step": 2130 }, { "epoch": 0.09361449883091649, "grad_norm": 3.84375, "learning_rate": 4.8988387215695007e-05, "loss": 0.643, "step": 2132 }, { "epoch": 0.09370231731011998, "grad_norm": 3.859375, "learning_rate": 4.898643819342781e-05, "loss": 0.6131, "step": 2134 }, { "epoch": 0.09379013578932346, "grad_norm": 3.53125, "learning_rate": 4.898448733427343e-05, "loss": 0.6001, "step": 2136 }, { "epoch": 0.09387795426852695, "grad_norm": 3.328125, "learning_rate": 4.898253463838126e-05, "loss": 0.6278, "step": 2138 }, { "epoch": 0.09396577274773044, "grad_norm": 3.75, "learning_rate": 4.898058010590083e-05, "loss": 0.6501, "step": 2140 }, { "epoch": 0.09405359122693392, "grad_norm": 3.234375, "learning_rate": 4.897862373698184e-05, "loss": 0.6412, "step": 2142 }, { "epoch": 0.09414140970613741, "grad_norm": 3.796875, "learning_rate": 4.8976665531774094e-05, "loss": 0.6422, "step": 2144 }, { "epoch": 0.0942292281853409, "grad_norm": 6.4375, "learning_rate": 4.897470549042754e-05, "loss": 0.6464, "step": 2146 }, { "epoch": 0.0943170466645444, "grad_norm": 4.4375, "learning_rate": 4.8972743613092304e-05, "loss": 0.615, "step": 2148 }, { "epoch": 0.09440486514374788, "grad_norm": 4.375, "learning_rate": 4.897077989991862e-05, "loss": 0.6224, "step": 2150 }, { "epoch": 0.09449268362295137, "grad_norm": 5.0, "learning_rate": 4.896881435105685e-05, "loss": 0.6222, "step": 2152 }, { "epoch": 0.09458050210215485, "grad_norm": 4.625, "learning_rate": 4.896684696665754e-05, "loss": 0.632, "step": 2154 }, { "epoch": 0.09466832058135834, "grad_norm": 4.21875, "learning_rate": 4.896487774687135e-05, "loss": 0.6198, "step": 2156 }, { "epoch": 0.09475613906056182, "grad_norm": 4.28125, "learning_rate": 4.8962906691849066e-05, "loss": 0.6298, "step": 2158 }, { "epoch": 0.09484395753976531, "grad_norm": 3.4375, "learning_rate": 4.8960933801741646e-05, "loss": 0.6407, "step": 2160 }, { "epoch": 0.0949317760189688, "grad_norm": 3.328125, "learning_rate": 4.895895907670017e-05, "loss": 0.6368, "step": 2162 }, { "epoch": 0.09501959449817228, "grad_norm": 3.640625, "learning_rate": 4.895698251687587e-05, "loss": 0.6023, "step": 2164 }, { "epoch": 0.09510741297737577, "grad_norm": 3.359375, "learning_rate": 4.895500412242011e-05, "loss": 0.611, "step": 2166 }, { "epoch": 0.09519523145657925, "grad_norm": 3.859375, "learning_rate": 4.895302389348438e-05, "loss": 0.6478, "step": 2168 }, { "epoch": 0.09528304993578274, "grad_norm": 4.34375, "learning_rate": 4.8951041830220344e-05, "loss": 0.6434, "step": 2170 }, { "epoch": 0.09537086841498622, "grad_norm": 3.765625, "learning_rate": 4.8949057932779784e-05, "loss": 0.6129, "step": 2172 }, { "epoch": 0.09545868689418971, "grad_norm": 5.375, "learning_rate": 4.894707220131463e-05, "loss": 0.6264, "step": 2174 }, { "epoch": 0.0955465053733932, "grad_norm": 4.78125, "learning_rate": 4.8945084635976944e-05, "loss": 0.64, "step": 2176 }, { "epoch": 0.09563432385259668, "grad_norm": 3.09375, "learning_rate": 4.894309523691893e-05, "loss": 0.6259, "step": 2178 }, { "epoch": 0.09572214233180017, "grad_norm": 4.03125, "learning_rate": 4.8941104004292955e-05, "loss": 0.6351, "step": 2180 }, { "epoch": 0.09580996081100365, "grad_norm": 4.09375, "learning_rate": 4.8939110938251485e-05, "loss": 0.6288, "step": 2182 }, { "epoch": 0.09589777929020714, "grad_norm": 4.34375, "learning_rate": 4.8937116038947164e-05, "loss": 0.612, "step": 2184 }, { "epoch": 0.09598559776941062, "grad_norm": 3.46875, "learning_rate": 4.8935119306532764e-05, "loss": 0.6268, "step": 2186 }, { "epoch": 0.09607341624861411, "grad_norm": 3.703125, "learning_rate": 4.893312074116119e-05, "loss": 0.609, "step": 2188 }, { "epoch": 0.0961612347278176, "grad_norm": 3.625, "learning_rate": 4.893112034298548e-05, "loss": 0.6281, "step": 2190 }, { "epoch": 0.09624905320702108, "grad_norm": 3.265625, "learning_rate": 4.892911811215885e-05, "loss": 0.6052, "step": 2192 }, { "epoch": 0.09633687168622457, "grad_norm": 3.484375, "learning_rate": 4.8927114048834613e-05, "loss": 0.6247, "step": 2194 }, { "epoch": 0.09642469016542805, "grad_norm": 2.796875, "learning_rate": 4.892510815316625e-05, "loss": 0.6106, "step": 2196 }, { "epoch": 0.09651250864463155, "grad_norm": 3.328125, "learning_rate": 4.8923100425307365e-05, "loss": 0.6224, "step": 2198 }, { "epoch": 0.09660032712383504, "grad_norm": 3.078125, "learning_rate": 4.892109086541172e-05, "loss": 0.6136, "step": 2200 }, { "epoch": 0.09668814560303853, "grad_norm": 3.0625, "learning_rate": 4.89190794736332e-05, "loss": 0.6062, "step": 2202 }, { "epoch": 0.09677596408224201, "grad_norm": 3.265625, "learning_rate": 4.8917066250125834e-05, "loss": 0.65, "step": 2204 }, { "epoch": 0.0968637825614455, "grad_norm": 3.40625, "learning_rate": 4.891505119504381e-05, "loss": 0.5928, "step": 2206 }, { "epoch": 0.09695160104064898, "grad_norm": 3.84375, "learning_rate": 4.8913034308541425e-05, "loss": 0.6516, "step": 2208 }, { "epoch": 0.09703941951985247, "grad_norm": 2.828125, "learning_rate": 4.8911015590773145e-05, "loss": 0.6375, "step": 2210 }, { "epoch": 0.09712723799905595, "grad_norm": 3.28125, "learning_rate": 4.890899504189356e-05, "loss": 0.6432, "step": 2212 }, { "epoch": 0.09721505647825944, "grad_norm": 4.1875, "learning_rate": 4.8906972662057406e-05, "loss": 0.6211, "step": 2214 }, { "epoch": 0.09730287495746293, "grad_norm": 3.46875, "learning_rate": 4.890494845141955e-05, "loss": 0.6126, "step": 2216 }, { "epoch": 0.09739069343666641, "grad_norm": 3.265625, "learning_rate": 4.890292241013501e-05, "loss": 0.6094, "step": 2218 }, { "epoch": 0.0974785119158699, "grad_norm": 3.328125, "learning_rate": 4.8900894538358944e-05, "loss": 0.6204, "step": 2220 }, { "epoch": 0.09756633039507338, "grad_norm": 2.953125, "learning_rate": 4.889886483624664e-05, "loss": 0.6227, "step": 2222 }, { "epoch": 0.09765414887427687, "grad_norm": 4.09375, "learning_rate": 4.889683330395355e-05, "loss": 0.6102, "step": 2224 }, { "epoch": 0.09774196735348036, "grad_norm": 3.625, "learning_rate": 4.889479994163523e-05, "loss": 0.6213, "step": 2226 }, { "epoch": 0.09782978583268384, "grad_norm": 3.125, "learning_rate": 4.8892764749447395e-05, "loss": 0.61, "step": 2228 }, { "epoch": 0.09791760431188733, "grad_norm": 3.0, "learning_rate": 4.8890727727545916e-05, "loss": 0.6275, "step": 2230 }, { "epoch": 0.09800542279109081, "grad_norm": 3.0, "learning_rate": 4.8888688876086786e-05, "loss": 0.6525, "step": 2232 }, { "epoch": 0.0980932412702943, "grad_norm": 3.671875, "learning_rate": 4.8886648195226124e-05, "loss": 0.6094, "step": 2234 }, { "epoch": 0.09818105974949778, "grad_norm": 4.5625, "learning_rate": 4.8884605685120224e-05, "loss": 0.6006, "step": 2236 }, { "epoch": 0.09826887822870127, "grad_norm": 5.5, "learning_rate": 4.888256134592549e-05, "loss": 0.6413, "step": 2238 }, { "epoch": 0.09835669670790476, "grad_norm": 6.125, "learning_rate": 4.888051517779849e-05, "loss": 0.6073, "step": 2240 }, { "epoch": 0.09844451518710824, "grad_norm": 4.875, "learning_rate": 4.8878467180895906e-05, "loss": 0.5941, "step": 2242 }, { "epoch": 0.09853233366631173, "grad_norm": 4.53125, "learning_rate": 4.887641735537459e-05, "loss": 0.6117, "step": 2244 }, { "epoch": 0.09862015214551521, "grad_norm": 5.46875, "learning_rate": 4.88743657013915e-05, "loss": 0.6266, "step": 2246 }, { "epoch": 0.09870797062471871, "grad_norm": 4.84375, "learning_rate": 4.887231221910376e-05, "loss": 0.6203, "step": 2248 }, { "epoch": 0.0987957891039222, "grad_norm": 3.109375, "learning_rate": 4.8870256908668646e-05, "loss": 0.618, "step": 2250 }, { "epoch": 0.09888360758312568, "grad_norm": 2.890625, "learning_rate": 4.886819977024352e-05, "loss": 0.6034, "step": 2252 }, { "epoch": 0.09897142606232917, "grad_norm": 3.25, "learning_rate": 4.886614080398594e-05, "loss": 0.6134, "step": 2254 }, { "epoch": 0.09905924454153266, "grad_norm": 3.09375, "learning_rate": 4.886408001005357e-05, "loss": 0.6086, "step": 2256 }, { "epoch": 0.09914706302073614, "grad_norm": 3.953125, "learning_rate": 4.886201738860423e-05, "loss": 0.6463, "step": 2258 }, { "epoch": 0.09923488149993963, "grad_norm": 4.3125, "learning_rate": 4.885995293979589e-05, "loss": 0.6026, "step": 2260 }, { "epoch": 0.09932269997914311, "grad_norm": 4.78125, "learning_rate": 4.8857886663786626e-05, "loss": 0.6457, "step": 2262 }, { "epoch": 0.0994105184583466, "grad_norm": 4.375, "learning_rate": 4.885581856073468e-05, "loss": 0.5953, "step": 2264 }, { "epoch": 0.09949833693755009, "grad_norm": 5.03125, "learning_rate": 4.8853748630798434e-05, "loss": 0.606, "step": 2266 }, { "epoch": 0.09958615541675357, "grad_norm": 6.4375, "learning_rate": 4.88516768741364e-05, "loss": 0.6011, "step": 2268 }, { "epoch": 0.09967397389595706, "grad_norm": 5.1875, "learning_rate": 4.8849603290907234e-05, "loss": 0.6245, "step": 2270 }, { "epoch": 0.09976179237516054, "grad_norm": 3.234375, "learning_rate": 4.884752788126973e-05, "loss": 0.6454, "step": 2272 }, { "epoch": 0.09984961085436403, "grad_norm": 3.15625, "learning_rate": 4.884545064538283e-05, "loss": 0.6246, "step": 2274 }, { "epoch": 0.09993742933356752, "grad_norm": 2.96875, "learning_rate": 4.884337158340559e-05, "loss": 0.6095, "step": 2276 }, { "epoch": 0.100025247812771, "grad_norm": 3.6875, "learning_rate": 4.884129069549726e-05, "loss": 0.6126, "step": 2278 }, { "epoch": 0.10011306629197449, "grad_norm": 3.796875, "learning_rate": 4.883920798181715e-05, "loss": 0.621, "step": 2280 }, { "epoch": 0.10020088477117797, "grad_norm": 3.9375, "learning_rate": 4.883712344252479e-05, "loss": 0.604, "step": 2282 }, { "epoch": 0.10028870325038146, "grad_norm": 3.6875, "learning_rate": 4.88350370777798e-05, "loss": 0.607, "step": 2284 }, { "epoch": 0.10037652172958494, "grad_norm": 2.984375, "learning_rate": 4.8832948887741956e-05, "loss": 0.6061, "step": 2286 }, { "epoch": 0.10046434020878843, "grad_norm": 3.296875, "learning_rate": 4.883085887257117e-05, "loss": 0.6153, "step": 2288 }, { "epoch": 0.10055215868799192, "grad_norm": 3.890625, "learning_rate": 4.882876703242751e-05, "loss": 0.6025, "step": 2290 }, { "epoch": 0.1006399771671954, "grad_norm": 4.03125, "learning_rate": 4.882667336747115e-05, "loss": 0.5971, "step": 2292 }, { "epoch": 0.10072779564639889, "grad_norm": 3.015625, "learning_rate": 4.882457787786243e-05, "loss": 0.5883, "step": 2294 }, { "epoch": 0.10081561412560237, "grad_norm": 3.09375, "learning_rate": 4.882248056376183e-05, "loss": 0.6166, "step": 2296 }, { "epoch": 0.10090343260480586, "grad_norm": 3.5625, "learning_rate": 4.882038142532995e-05, "loss": 0.6313, "step": 2298 }, { "epoch": 0.10099125108400936, "grad_norm": 2.859375, "learning_rate": 4.881828046272756e-05, "loss": 0.5937, "step": 2300 }, { "epoch": 0.10107906956321284, "grad_norm": 2.84375, "learning_rate": 4.881617767611554e-05, "loss": 0.6125, "step": 2302 }, { "epoch": 0.10116688804241633, "grad_norm": 3.390625, "learning_rate": 4.881407306565492e-05, "loss": 0.596, "step": 2304 }, { "epoch": 0.10125470652161982, "grad_norm": 3.296875, "learning_rate": 4.881196663150689e-05, "loss": 0.6083, "step": 2306 }, { "epoch": 0.1013425250008233, "grad_norm": 3.484375, "learning_rate": 4.8809858373832726e-05, "loss": 0.597, "step": 2308 }, { "epoch": 0.10143034348002679, "grad_norm": 3.25, "learning_rate": 4.880774829279392e-05, "loss": 0.6188, "step": 2310 }, { "epoch": 0.10151816195923027, "grad_norm": 2.9375, "learning_rate": 4.8805636388552035e-05, "loss": 0.6203, "step": 2312 }, { "epoch": 0.10160598043843376, "grad_norm": 3.078125, "learning_rate": 4.8803522661268805e-05, "loss": 0.6083, "step": 2314 }, { "epoch": 0.10169379891763725, "grad_norm": 3.140625, "learning_rate": 4.88014071111061e-05, "loss": 0.6002, "step": 2316 }, { "epoch": 0.10178161739684073, "grad_norm": 3.65625, "learning_rate": 4.8799289738225936e-05, "loss": 0.6068, "step": 2318 }, { "epoch": 0.10186943587604422, "grad_norm": 3.28125, "learning_rate": 4.879717054279047e-05, "loss": 0.6208, "step": 2320 }, { "epoch": 0.1019572543552477, "grad_norm": 2.953125, "learning_rate": 4.879504952496197e-05, "loss": 0.6341, "step": 2322 }, { "epoch": 0.10204507283445119, "grad_norm": 3.4375, "learning_rate": 4.8792926684902875e-05, "loss": 0.6172, "step": 2324 }, { "epoch": 0.10213289131365468, "grad_norm": 3.34375, "learning_rate": 4.879080202277575e-05, "loss": 0.5982, "step": 2326 }, { "epoch": 0.10222070979285816, "grad_norm": 4.09375, "learning_rate": 4.87886755387433e-05, "loss": 0.6081, "step": 2328 }, { "epoch": 0.10230852827206165, "grad_norm": 3.265625, "learning_rate": 4.878654723296838e-05, "loss": 0.6505, "step": 2330 }, { "epoch": 0.10239634675126513, "grad_norm": 3.453125, "learning_rate": 4.878441710561397e-05, "loss": 0.6074, "step": 2332 }, { "epoch": 0.10248416523046862, "grad_norm": 3.125, "learning_rate": 4.878228515684319e-05, "loss": 0.6145, "step": 2334 }, { "epoch": 0.1025719837096721, "grad_norm": 3.734375, "learning_rate": 4.878015138681932e-05, "loss": 0.6188, "step": 2336 }, { "epoch": 0.10265980218887559, "grad_norm": 4.1875, "learning_rate": 4.877801579570575e-05, "loss": 0.6209, "step": 2338 }, { "epoch": 0.10274762066807908, "grad_norm": 2.765625, "learning_rate": 4.8775878383666035e-05, "loss": 0.6161, "step": 2340 }, { "epoch": 0.10283543914728256, "grad_norm": 2.890625, "learning_rate": 4.877373915086385e-05, "loss": 0.6258, "step": 2342 }, { "epoch": 0.10292325762648605, "grad_norm": 3.59375, "learning_rate": 4.8771598097463026e-05, "loss": 0.6049, "step": 2344 }, { "epoch": 0.10301107610568953, "grad_norm": 3.546875, "learning_rate": 4.876945522362752e-05, "loss": 0.6479, "step": 2346 }, { "epoch": 0.10309889458489302, "grad_norm": 3.6875, "learning_rate": 4.876731052952144e-05, "loss": 0.6041, "step": 2348 }, { "epoch": 0.10318671306409652, "grad_norm": 2.9375, "learning_rate": 4.876516401530901e-05, "loss": 0.6092, "step": 2350 }, { "epoch": 0.1032745315433, "grad_norm": 3.515625, "learning_rate": 4.876301568115463e-05, "loss": 0.5955, "step": 2352 }, { "epoch": 0.10336235002250349, "grad_norm": 3.546875, "learning_rate": 4.876086552722281e-05, "loss": 0.6191, "step": 2354 }, { "epoch": 0.10345016850170698, "grad_norm": 2.890625, "learning_rate": 4.875871355367822e-05, "loss": 0.6051, "step": 2356 }, { "epoch": 0.10353798698091046, "grad_norm": 2.90625, "learning_rate": 4.8756559760685644e-05, "loss": 0.601, "step": 2358 }, { "epoch": 0.10362580546011395, "grad_norm": 3.125, "learning_rate": 4.8754404148410025e-05, "loss": 0.6016, "step": 2360 }, { "epoch": 0.10371362393931743, "grad_norm": 5.03125, "learning_rate": 4.875224671701645e-05, "loss": 0.633, "step": 2362 }, { "epoch": 0.10380144241852092, "grad_norm": 4.875, "learning_rate": 4.8750087466670116e-05, "loss": 0.6384, "step": 2364 }, { "epoch": 0.1038892608977244, "grad_norm": 4.3125, "learning_rate": 4.87479263975364e-05, "loss": 0.6113, "step": 2366 }, { "epoch": 0.10397707937692789, "grad_norm": 3.78125, "learning_rate": 4.8745763509780785e-05, "loss": 0.5765, "step": 2368 }, { "epoch": 0.10406489785613138, "grad_norm": 3.65625, "learning_rate": 4.874359880356891e-05, "loss": 0.6533, "step": 2370 }, { "epoch": 0.10415271633533486, "grad_norm": 3.671875, "learning_rate": 4.874143227906654e-05, "loss": 0.6002, "step": 2372 }, { "epoch": 0.10424053481453835, "grad_norm": 4.4375, "learning_rate": 4.87392639364396e-05, "loss": 0.5987, "step": 2374 }, { "epoch": 0.10432835329374184, "grad_norm": 4.0625, "learning_rate": 4.873709377585414e-05, "loss": 0.6137, "step": 2376 }, { "epoch": 0.10441617177294532, "grad_norm": 4.5625, "learning_rate": 4.873492179747634e-05, "loss": 0.6091, "step": 2378 }, { "epoch": 0.1045039902521488, "grad_norm": 4.75, "learning_rate": 4.873274800147255e-05, "loss": 0.6199, "step": 2380 }, { "epoch": 0.10459180873135229, "grad_norm": 4.5625, "learning_rate": 4.873057238800922e-05, "loss": 0.6132, "step": 2382 }, { "epoch": 0.10467962721055578, "grad_norm": 6.375, "learning_rate": 4.872839495725297e-05, "loss": 0.6083, "step": 2384 }, { "epoch": 0.10476744568975926, "grad_norm": 4.46875, "learning_rate": 4.8726215709370546e-05, "loss": 0.6209, "step": 2386 }, { "epoch": 0.10485526416896275, "grad_norm": 2.6875, "learning_rate": 4.872403464452884e-05, "loss": 0.6004, "step": 2388 }, { "epoch": 0.10494308264816624, "grad_norm": 2.96875, "learning_rate": 4.8721851762894865e-05, "loss": 0.5997, "step": 2390 }, { "epoch": 0.10503090112736972, "grad_norm": 4.0625, "learning_rate": 4.871966706463581e-05, "loss": 0.6023, "step": 2392 }, { "epoch": 0.10511871960657321, "grad_norm": 3.25, "learning_rate": 4.871748054991895e-05, "loss": 0.5691, "step": 2394 }, { "epoch": 0.1052065380857767, "grad_norm": 3.59375, "learning_rate": 4.871529221891175e-05, "loss": 0.6085, "step": 2396 }, { "epoch": 0.10529435656498018, "grad_norm": 4.46875, "learning_rate": 4.871310207178179e-05, "loss": 0.6212, "step": 2398 }, { "epoch": 0.10538217504418368, "grad_norm": 4.03125, "learning_rate": 4.8710910108696786e-05, "loss": 0.6219, "step": 2400 }, { "epoch": 0.10546999352338716, "grad_norm": 3.953125, "learning_rate": 4.87087163298246e-05, "loss": 0.6105, "step": 2402 }, { "epoch": 0.10555781200259065, "grad_norm": 3.96875, "learning_rate": 4.870652073533324e-05, "loss": 0.621, "step": 2404 }, { "epoch": 0.10564563048179414, "grad_norm": 3.828125, "learning_rate": 4.8704323325390834e-05, "loss": 0.6141, "step": 2406 }, { "epoch": 0.10573344896099762, "grad_norm": 3.8125, "learning_rate": 4.8702124100165666e-05, "loss": 0.6142, "step": 2408 }, { "epoch": 0.10582126744020111, "grad_norm": 3.46875, "learning_rate": 4.869992305982615e-05, "loss": 0.5919, "step": 2410 }, { "epoch": 0.1059090859194046, "grad_norm": 2.859375, "learning_rate": 4.8697720204540846e-05, "loss": 0.5876, "step": 2412 }, { "epoch": 0.10599690439860808, "grad_norm": 2.90625, "learning_rate": 4.8695515534478456e-05, "loss": 0.6464, "step": 2414 }, { "epoch": 0.10608472287781157, "grad_norm": 3.0625, "learning_rate": 4.8693309049807795e-05, "loss": 0.5842, "step": 2416 }, { "epoch": 0.10617254135701505, "grad_norm": 3.40625, "learning_rate": 4.8691100750697856e-05, "loss": 0.5864, "step": 2418 }, { "epoch": 0.10626035983621854, "grad_norm": 3.296875, "learning_rate": 4.8688890637317734e-05, "loss": 0.6038, "step": 2420 }, { "epoch": 0.10634817831542202, "grad_norm": 3.375, "learning_rate": 4.86866787098367e-05, "loss": 0.6135, "step": 2422 }, { "epoch": 0.10643599679462551, "grad_norm": 3.5, "learning_rate": 4.868446496842412e-05, "loss": 0.6135, "step": 2424 }, { "epoch": 0.106523815273829, "grad_norm": 2.921875, "learning_rate": 4.868224941324954e-05, "loss": 0.6238, "step": 2426 }, { "epoch": 0.10661163375303248, "grad_norm": 2.859375, "learning_rate": 4.868003204448263e-05, "loss": 0.5728, "step": 2428 }, { "epoch": 0.10669945223223597, "grad_norm": 3.0, "learning_rate": 4.8677812862293184e-05, "loss": 0.5991, "step": 2430 }, { "epoch": 0.10678727071143945, "grad_norm": 2.578125, "learning_rate": 4.867559186685115e-05, "loss": 0.6117, "step": 2432 }, { "epoch": 0.10687508919064294, "grad_norm": 3.3125, "learning_rate": 4.867336905832661e-05, "loss": 0.6006, "step": 2434 }, { "epoch": 0.10696290766984642, "grad_norm": 3.375, "learning_rate": 4.8671144436889805e-05, "loss": 0.5793, "step": 2436 }, { "epoch": 0.10705072614904991, "grad_norm": 3.140625, "learning_rate": 4.866891800271108e-05, "loss": 0.5901, "step": 2438 }, { "epoch": 0.1071385446282534, "grad_norm": 3.0, "learning_rate": 4.8666689755960936e-05, "loss": 0.6089, "step": 2440 }, { "epoch": 0.10722636310745688, "grad_norm": 3.046875, "learning_rate": 4.866445969681003e-05, "loss": 0.6008, "step": 2442 }, { "epoch": 0.10731418158666037, "grad_norm": 3.328125, "learning_rate": 4.866222782542912e-05, "loss": 0.6111, "step": 2444 }, { "epoch": 0.10740200006586385, "grad_norm": 3.078125, "learning_rate": 4.865999414198913e-05, "loss": 0.6166, "step": 2446 }, { "epoch": 0.10748981854506734, "grad_norm": 3.03125, "learning_rate": 4.865775864666111e-05, "loss": 0.5883, "step": 2448 }, { "epoch": 0.10757763702427084, "grad_norm": 3.046875, "learning_rate": 4.8655521339616274e-05, "loss": 0.5968, "step": 2450 }, { "epoch": 0.10766545550347432, "grad_norm": 3.734375, "learning_rate": 4.865328222102594e-05, "loss": 0.61, "step": 2452 }, { "epoch": 0.10775327398267781, "grad_norm": 3.734375, "learning_rate": 4.865104129106158e-05, "loss": 0.5919, "step": 2454 }, { "epoch": 0.1078410924618813, "grad_norm": 2.921875, "learning_rate": 4.86487985498948e-05, "loss": 0.6421, "step": 2456 }, { "epoch": 0.10792891094108478, "grad_norm": 3.3125, "learning_rate": 4.8646553997697375e-05, "loss": 0.6171, "step": 2458 }, { "epoch": 0.10801672942028827, "grad_norm": 2.875, "learning_rate": 4.864430763464117e-05, "loss": 0.6143, "step": 2460 }, { "epoch": 0.10810454789949175, "grad_norm": 3.390625, "learning_rate": 4.8642059460898214e-05, "loss": 0.616, "step": 2462 }, { "epoch": 0.10819236637869524, "grad_norm": 2.875, "learning_rate": 4.8639809476640685e-05, "loss": 0.586, "step": 2464 }, { "epoch": 0.10828018485789873, "grad_norm": 3.8125, "learning_rate": 4.8637557682040876e-05, "loss": 0.581, "step": 2466 }, { "epoch": 0.10836800333710221, "grad_norm": 3.53125, "learning_rate": 4.863530407727123e-05, "loss": 0.6136, "step": 2468 }, { "epoch": 0.1084558218163057, "grad_norm": 3.75, "learning_rate": 4.863304866250433e-05, "loss": 0.6392, "step": 2470 }, { "epoch": 0.10854364029550918, "grad_norm": 2.9375, "learning_rate": 4.86307914379129e-05, "loss": 0.5975, "step": 2472 }, { "epoch": 0.10863145877471267, "grad_norm": 3.5, "learning_rate": 4.8628532403669805e-05, "loss": 0.6307, "step": 2474 }, { "epoch": 0.10871927725391616, "grad_norm": 3.0625, "learning_rate": 4.8626271559948036e-05, "loss": 0.6062, "step": 2476 }, { "epoch": 0.10880709573311964, "grad_norm": 3.34375, "learning_rate": 4.8624008906920714e-05, "loss": 0.6003, "step": 2478 }, { "epoch": 0.10889491421232313, "grad_norm": 3.0625, "learning_rate": 4.862174444476113e-05, "loss": 0.6069, "step": 2480 }, { "epoch": 0.10898273269152661, "grad_norm": 2.84375, "learning_rate": 4.86194781736427e-05, "loss": 0.5926, "step": 2482 }, { "epoch": 0.1090705511707301, "grad_norm": 3.5625, "learning_rate": 4.861721009373897e-05, "loss": 0.6071, "step": 2484 }, { "epoch": 0.10915836964993358, "grad_norm": 3.046875, "learning_rate": 4.8614940205223625e-05, "loss": 0.585, "step": 2486 }, { "epoch": 0.10924618812913707, "grad_norm": 3.6875, "learning_rate": 4.86126685082705e-05, "loss": 0.5705, "step": 2488 }, { "epoch": 0.10933400660834056, "grad_norm": 2.953125, "learning_rate": 4.861039500305356e-05, "loss": 0.6318, "step": 2490 }, { "epoch": 0.10942182508754404, "grad_norm": 3.203125, "learning_rate": 4.860811968974691e-05, "loss": 0.5763, "step": 2492 }, { "epoch": 0.10950964356674753, "grad_norm": 3.46875, "learning_rate": 4.86058425685248e-05, "loss": 0.619, "step": 2494 }, { "epoch": 0.10959746204595101, "grad_norm": 3.640625, "learning_rate": 4.86035636395616e-05, "loss": 0.6244, "step": 2496 }, { "epoch": 0.1096852805251545, "grad_norm": 3.265625, "learning_rate": 4.860128290303184e-05, "loss": 0.5996, "step": 2498 }, { "epoch": 0.10977309900435799, "grad_norm": 3.203125, "learning_rate": 4.859900035911018e-05, "loss": 0.5985, "step": 2500 }, { "epoch": 0.10986091748356148, "grad_norm": 3.671875, "learning_rate": 4.859671600797141e-05, "loss": 0.6311, "step": 2502 }, { "epoch": 0.10994873596276497, "grad_norm": 4.46875, "learning_rate": 4.8594429849790476e-05, "loss": 0.6137, "step": 2504 }, { "epoch": 0.11003655444196846, "grad_norm": 4.53125, "learning_rate": 4.8592141884742445e-05, "loss": 0.5877, "step": 2506 }, { "epoch": 0.11012437292117194, "grad_norm": 3.265625, "learning_rate": 4.8589852113002546e-05, "loss": 0.5774, "step": 2508 }, { "epoch": 0.11021219140037543, "grad_norm": 2.984375, "learning_rate": 4.85875605347461e-05, "loss": 0.6018, "step": 2510 }, { "epoch": 0.11030000987957891, "grad_norm": 3.25, "learning_rate": 4.8585267150148625e-05, "loss": 0.5877, "step": 2512 }, { "epoch": 0.1103878283587824, "grad_norm": 4.59375, "learning_rate": 4.8582971959385735e-05, "loss": 0.5886, "step": 2514 }, { "epoch": 0.11047564683798589, "grad_norm": 3.6875, "learning_rate": 4.85806749626332e-05, "loss": 0.6101, "step": 2516 }, { "epoch": 0.11056346531718937, "grad_norm": 3.421875, "learning_rate": 4.8578376160066916e-05, "loss": 0.5614, "step": 2518 }, { "epoch": 0.11065128379639286, "grad_norm": 4.53125, "learning_rate": 4.857607555186294e-05, "loss": 0.596, "step": 2520 }, { "epoch": 0.11073910227559634, "grad_norm": 3.65625, "learning_rate": 4.857377313819745e-05, "loss": 0.6134, "step": 2522 }, { "epoch": 0.11082692075479983, "grad_norm": 2.96875, "learning_rate": 4.8571468919246755e-05, "loss": 0.5675, "step": 2524 }, { "epoch": 0.11091473923400332, "grad_norm": 3.484375, "learning_rate": 4.8569162895187324e-05, "loss": 0.5876, "step": 2526 }, { "epoch": 0.1110025577132068, "grad_norm": 3.953125, "learning_rate": 4.856685506619575e-05, "loss": 0.6046, "step": 2528 }, { "epoch": 0.11109037619241029, "grad_norm": 4.125, "learning_rate": 4.8564545432448763e-05, "loss": 0.5904, "step": 2530 }, { "epoch": 0.11117819467161377, "grad_norm": 3.515625, "learning_rate": 4.856223399412324e-05, "loss": 0.5889, "step": 2532 }, { "epoch": 0.11126601315081726, "grad_norm": 3.203125, "learning_rate": 4.855992075139618e-05, "loss": 0.5859, "step": 2534 }, { "epoch": 0.11135383163002074, "grad_norm": 3.578125, "learning_rate": 4.8557605704444754e-05, "loss": 0.6318, "step": 2536 }, { "epoch": 0.11144165010922423, "grad_norm": 3.234375, "learning_rate": 4.8555288853446226e-05, "loss": 0.6019, "step": 2538 }, { "epoch": 0.11152946858842772, "grad_norm": 3.46875, "learning_rate": 4.8552970198578044e-05, "loss": 0.5788, "step": 2540 }, { "epoch": 0.1116172870676312, "grad_norm": 3.015625, "learning_rate": 4.8550649740017744e-05, "loss": 0.6114, "step": 2542 }, { "epoch": 0.11170510554683469, "grad_norm": 2.9375, "learning_rate": 4.854832747794305e-05, "loss": 0.5551, "step": 2544 }, { "epoch": 0.11179292402603817, "grad_norm": 2.84375, "learning_rate": 4.8546003412531785e-05, "loss": 0.5562, "step": 2546 }, { "epoch": 0.11188074250524166, "grad_norm": 3.078125, "learning_rate": 4.854367754396194e-05, "loss": 0.6005, "step": 2548 }, { "epoch": 0.11196856098444515, "grad_norm": 3.65625, "learning_rate": 4.854134987241162e-05, "loss": 0.5951, "step": 2550 }, { "epoch": 0.11205637946364864, "grad_norm": 4.4375, "learning_rate": 4.85390203980591e-05, "loss": 0.5995, "step": 2552 }, { "epoch": 0.11214419794285213, "grad_norm": 4.9375, "learning_rate": 4.853668912108273e-05, "loss": 0.628, "step": 2554 }, { "epoch": 0.11223201642205562, "grad_norm": 3.75, "learning_rate": 4.8534356041661085e-05, "loss": 0.6037, "step": 2556 }, { "epoch": 0.1123198349012591, "grad_norm": 3.46875, "learning_rate": 4.8532021159972804e-05, "loss": 0.5707, "step": 2558 }, { "epoch": 0.11240765338046259, "grad_norm": 3.4375, "learning_rate": 4.8529684476196705e-05, "loss": 0.6165, "step": 2560 }, { "epoch": 0.11249547185966607, "grad_norm": 3.234375, "learning_rate": 4.852734599051173e-05, "loss": 0.6, "step": 2562 }, { "epoch": 0.11258329033886956, "grad_norm": 2.90625, "learning_rate": 4.852500570309695e-05, "loss": 0.5954, "step": 2564 }, { "epoch": 0.11267110881807305, "grad_norm": 2.765625, "learning_rate": 4.8522663614131603e-05, "loss": 0.6065, "step": 2566 }, { "epoch": 0.11275892729727653, "grad_norm": 2.9375, "learning_rate": 4.8520319723795036e-05, "loss": 0.6159, "step": 2568 }, { "epoch": 0.11284674577648002, "grad_norm": 3.40625, "learning_rate": 4.8517974032266745e-05, "loss": 0.5932, "step": 2570 }, { "epoch": 0.1129345642556835, "grad_norm": 2.921875, "learning_rate": 4.851562653972637e-05, "loss": 0.5993, "step": 2572 }, { "epoch": 0.11302238273488699, "grad_norm": 3.375, "learning_rate": 4.851327724635366e-05, "loss": 0.5716, "step": 2574 }, { "epoch": 0.11311020121409048, "grad_norm": 3.328125, "learning_rate": 4.851092615232856e-05, "loss": 0.5877, "step": 2576 }, { "epoch": 0.11319801969329396, "grad_norm": 3.40625, "learning_rate": 4.8508573257831094e-05, "loss": 0.5821, "step": 2578 }, { "epoch": 0.11328583817249745, "grad_norm": 3.03125, "learning_rate": 4.850621856304145e-05, "loss": 0.5923, "step": 2580 }, { "epoch": 0.11337365665170093, "grad_norm": 3.359375, "learning_rate": 4.850386206813996e-05, "loss": 0.6087, "step": 2582 }, { "epoch": 0.11346147513090442, "grad_norm": 3.40625, "learning_rate": 4.8501503773307075e-05, "loss": 0.6313, "step": 2584 }, { "epoch": 0.1135492936101079, "grad_norm": 3.40625, "learning_rate": 4.849914367872339e-05, "loss": 0.5768, "step": 2586 }, { "epoch": 0.11363711208931139, "grad_norm": 3.40625, "learning_rate": 4.849678178456966e-05, "loss": 0.5633, "step": 2588 }, { "epoch": 0.11372493056851488, "grad_norm": 3.015625, "learning_rate": 4.8494418091026745e-05, "loss": 0.5882, "step": 2590 }, { "epoch": 0.11381274904771836, "grad_norm": 3.015625, "learning_rate": 4.849205259827566e-05, "loss": 0.5957, "step": 2592 }, { "epoch": 0.11390056752692185, "grad_norm": 2.90625, "learning_rate": 4.8489685306497554e-05, "loss": 0.5899, "step": 2594 }, { "epoch": 0.11398838600612533, "grad_norm": 2.765625, "learning_rate": 4.8487316215873715e-05, "loss": 0.5853, "step": 2596 }, { "epoch": 0.11407620448532882, "grad_norm": 3.078125, "learning_rate": 4.848494532658557e-05, "loss": 0.5899, "step": 2598 }, { "epoch": 0.1141640229645323, "grad_norm": 3.21875, "learning_rate": 4.848257263881469e-05, "loss": 0.5872, "step": 2600 }, { "epoch": 0.1142518414437358, "grad_norm": 3.046875, "learning_rate": 4.848019815274276e-05, "loss": 0.5806, "step": 2602 }, { "epoch": 0.11433965992293929, "grad_norm": 3.046875, "learning_rate": 4.847782186855163e-05, "loss": 0.5754, "step": 2604 }, { "epoch": 0.11442747840214278, "grad_norm": 3.0625, "learning_rate": 4.847544378642327e-05, "loss": 0.6083, "step": 2606 }, { "epoch": 0.11451529688134626, "grad_norm": 3.046875, "learning_rate": 4.8473063906539804e-05, "loss": 0.5894, "step": 2608 }, { "epoch": 0.11460311536054975, "grad_norm": 3.015625, "learning_rate": 4.8470682229083477e-05, "loss": 0.6113, "step": 2610 }, { "epoch": 0.11469093383975323, "grad_norm": 2.734375, "learning_rate": 4.846829875423667e-05, "loss": 0.5863, "step": 2612 }, { "epoch": 0.11477875231895672, "grad_norm": 3.390625, "learning_rate": 4.846591348218192e-05, "loss": 0.5958, "step": 2614 }, { "epoch": 0.1148665707981602, "grad_norm": 3.421875, "learning_rate": 4.84635264131019e-05, "loss": 0.5882, "step": 2616 }, { "epoch": 0.11495438927736369, "grad_norm": 2.8125, "learning_rate": 4.84611375471794e-05, "loss": 0.6003, "step": 2618 }, { "epoch": 0.11504220775656718, "grad_norm": 3.5625, "learning_rate": 4.845874688459736e-05, "loss": 0.6034, "step": 2620 }, { "epoch": 0.11513002623577066, "grad_norm": 2.84375, "learning_rate": 4.845635442553885e-05, "loss": 0.5967, "step": 2622 }, { "epoch": 0.11521784471497415, "grad_norm": 3.046875, "learning_rate": 4.8453960170187104e-05, "loss": 0.593, "step": 2624 }, { "epoch": 0.11530566319417763, "grad_norm": 3.234375, "learning_rate": 4.8451564118725474e-05, "loss": 0.5784, "step": 2626 }, { "epoch": 0.11539348167338112, "grad_norm": 3.0625, "learning_rate": 4.8449166271337434e-05, "loss": 0.5907, "step": 2628 }, { "epoch": 0.1154813001525846, "grad_norm": 3.265625, "learning_rate": 4.844676662820662e-05, "loss": 0.5818, "step": 2630 }, { "epoch": 0.11556911863178809, "grad_norm": 2.875, "learning_rate": 4.8444365189516796e-05, "loss": 0.6006, "step": 2632 }, { "epoch": 0.11565693711099158, "grad_norm": 3.296875, "learning_rate": 4.8441961955451865e-05, "loss": 0.5846, "step": 2634 }, { "epoch": 0.11574475559019506, "grad_norm": 2.953125, "learning_rate": 4.843955692619587e-05, "loss": 0.5827, "step": 2636 }, { "epoch": 0.11583257406939855, "grad_norm": 2.375, "learning_rate": 4.8437150101932996e-05, "loss": 0.6059, "step": 2638 }, { "epoch": 0.11592039254860204, "grad_norm": 3.078125, "learning_rate": 4.843474148284753e-05, "loss": 0.5945, "step": 2640 }, { "epoch": 0.11600821102780552, "grad_norm": 3.421875, "learning_rate": 4.843233106912396e-05, "loss": 0.5843, "step": 2642 }, { "epoch": 0.11609602950700901, "grad_norm": 2.671875, "learning_rate": 4.842991886094686e-05, "loss": 0.5994, "step": 2644 }, { "epoch": 0.1161838479862125, "grad_norm": 3.625, "learning_rate": 4.842750485850094e-05, "loss": 0.5767, "step": 2646 }, { "epoch": 0.11627166646541598, "grad_norm": 3.625, "learning_rate": 4.8425089061971094e-05, "loss": 0.5543, "step": 2648 }, { "epoch": 0.11635948494461947, "grad_norm": 3.140625, "learning_rate": 4.8422671471542314e-05, "loss": 0.6315, "step": 2650 }, { "epoch": 0.11644730342382296, "grad_norm": 2.921875, "learning_rate": 4.842025208739973e-05, "loss": 0.5896, "step": 2652 }, { "epoch": 0.11653512190302645, "grad_norm": 3.0, "learning_rate": 4.8417830909728637e-05, "loss": 0.5782, "step": 2654 }, { "epoch": 0.11662294038222994, "grad_norm": 3.046875, "learning_rate": 4.841540793871443e-05, "loss": 0.58, "step": 2656 }, { "epoch": 0.11671075886143342, "grad_norm": 3.125, "learning_rate": 4.841298317454267e-05, "loss": 0.5825, "step": 2658 }, { "epoch": 0.11679857734063691, "grad_norm": 3.28125, "learning_rate": 4.841055661739905e-05, "loss": 0.6037, "step": 2660 }, { "epoch": 0.1168863958198404, "grad_norm": 3.28125, "learning_rate": 4.8408128267469394e-05, "loss": 0.6085, "step": 2662 }, { "epoch": 0.11697421429904388, "grad_norm": 2.75, "learning_rate": 4.840569812493966e-05, "loss": 0.6025, "step": 2664 }, { "epoch": 0.11706203277824737, "grad_norm": 3.0625, "learning_rate": 4.840326618999595e-05, "loss": 0.5656, "step": 2666 }, { "epoch": 0.11714985125745085, "grad_norm": 3.125, "learning_rate": 4.840083246282452e-05, "loss": 0.5908, "step": 2668 }, { "epoch": 0.11723766973665434, "grad_norm": 2.6875, "learning_rate": 4.8398396943611715e-05, "loss": 0.5884, "step": 2670 }, { "epoch": 0.11732548821585782, "grad_norm": 3.328125, "learning_rate": 4.839595963254407e-05, "loss": 0.6311, "step": 2672 }, { "epoch": 0.11741330669506131, "grad_norm": 3.21875, "learning_rate": 4.8393520529808224e-05, "loss": 0.603, "step": 2674 }, { "epoch": 0.1175011251742648, "grad_norm": 2.71875, "learning_rate": 4.839107963559097e-05, "loss": 0.5678, "step": 2676 }, { "epoch": 0.11758894365346828, "grad_norm": 3.25, "learning_rate": 4.838863695007923e-05, "loss": 0.6127, "step": 2678 }, { "epoch": 0.11767676213267177, "grad_norm": 3.234375, "learning_rate": 4.838619247346007e-05, "loss": 0.6211, "step": 2680 }, { "epoch": 0.11776458061187525, "grad_norm": 3.046875, "learning_rate": 4.838374620592068e-05, "loss": 0.6083, "step": 2682 }, { "epoch": 0.11785239909107874, "grad_norm": 3.234375, "learning_rate": 4.83812981476484e-05, "loss": 0.5768, "step": 2684 }, { "epoch": 0.11794021757028222, "grad_norm": 3.265625, "learning_rate": 4.8378848298830706e-05, "loss": 0.5962, "step": 2686 }, { "epoch": 0.11802803604948571, "grad_norm": 5.21875, "learning_rate": 4.83763966596552e-05, "loss": 0.5941, "step": 2688 }, { "epoch": 0.1181158545286892, "grad_norm": 5.71875, "learning_rate": 4.837394323030964e-05, "loss": 0.6159, "step": 2690 }, { "epoch": 0.11820367300789268, "grad_norm": 4.65625, "learning_rate": 4.8371488010981894e-05, "loss": 0.5818, "step": 2692 }, { "epoch": 0.11829149148709617, "grad_norm": 4.09375, "learning_rate": 4.8369031001860005e-05, "loss": 0.5715, "step": 2694 }, { "epoch": 0.11837930996629965, "grad_norm": 4.375, "learning_rate": 4.836657220313211e-05, "loss": 0.5782, "step": 2696 }, { "epoch": 0.11846712844550314, "grad_norm": 2.890625, "learning_rate": 4.8364111614986527e-05, "loss": 0.5572, "step": 2698 }, { "epoch": 0.11855494692470663, "grad_norm": 2.8125, "learning_rate": 4.836164923761166e-05, "loss": 0.603, "step": 2700 }, { "epoch": 0.11864276540391011, "grad_norm": 2.609375, "learning_rate": 4.835918507119611e-05, "loss": 0.5851, "step": 2702 }, { "epoch": 0.11873058388311361, "grad_norm": 3.203125, "learning_rate": 4.8356719115928564e-05, "loss": 0.602, "step": 2704 }, { "epoch": 0.1188184023623171, "grad_norm": 2.984375, "learning_rate": 4.835425137199786e-05, "loss": 0.5614, "step": 2706 }, { "epoch": 0.11890622084152058, "grad_norm": 2.859375, "learning_rate": 4.835178183959299e-05, "loss": 0.6055, "step": 2708 }, { "epoch": 0.11899403932072407, "grad_norm": 2.6875, "learning_rate": 4.834931051890308e-05, "loss": 0.5567, "step": 2710 }, { "epoch": 0.11908185779992755, "grad_norm": 3.375, "learning_rate": 4.8346837410117365e-05, "loss": 0.5798, "step": 2712 }, { "epoch": 0.11916967627913104, "grad_norm": 4.0625, "learning_rate": 4.834436251342524e-05, "loss": 0.5719, "step": 2714 }, { "epoch": 0.11925749475833453, "grad_norm": 4.125, "learning_rate": 4.834188582901624e-05, "loss": 0.6048, "step": 2716 }, { "epoch": 0.11934531323753801, "grad_norm": 3.296875, "learning_rate": 4.833940735708003e-05, "loss": 0.5629, "step": 2718 }, { "epoch": 0.1194331317167415, "grad_norm": 2.703125, "learning_rate": 4.8336927097806415e-05, "loss": 0.5777, "step": 2720 }, { "epoch": 0.11952095019594498, "grad_norm": 3.171875, "learning_rate": 4.833444505138533e-05, "loss": 0.5959, "step": 2722 }, { "epoch": 0.11960876867514847, "grad_norm": 2.796875, "learning_rate": 4.833196121800684e-05, "loss": 0.5682, "step": 2724 }, { "epoch": 0.11969658715435195, "grad_norm": 3.03125, "learning_rate": 4.832947559786116e-05, "loss": 0.5562, "step": 2726 }, { "epoch": 0.11978440563355544, "grad_norm": 4.28125, "learning_rate": 4.8326988191138664e-05, "loss": 0.5755, "step": 2728 }, { "epoch": 0.11987222411275893, "grad_norm": 4.375, "learning_rate": 4.832449899802981e-05, "loss": 0.5914, "step": 2730 }, { "epoch": 0.11996004259196241, "grad_norm": 5.53125, "learning_rate": 4.832200801872523e-05, "loss": 0.5999, "step": 2732 }, { "epoch": 0.1200478610711659, "grad_norm": 6.25, "learning_rate": 4.8319515253415695e-05, "loss": 0.5996, "step": 2734 }, { "epoch": 0.12013567955036938, "grad_norm": 2.796875, "learning_rate": 4.831702070229208e-05, "loss": 0.6215, "step": 2736 }, { "epoch": 0.12022349802957287, "grad_norm": 2.75, "learning_rate": 4.831452436554544e-05, "loss": 0.6128, "step": 2738 }, { "epoch": 0.12031131650877636, "grad_norm": 2.96875, "learning_rate": 4.831202624336693e-05, "loss": 0.5602, "step": 2740 }, { "epoch": 0.12039913498797984, "grad_norm": 3.109375, "learning_rate": 4.830952633594786e-05, "loss": 0.5663, "step": 2742 }, { "epoch": 0.12048695346718333, "grad_norm": 3.359375, "learning_rate": 4.8307024643479684e-05, "loss": 0.5638, "step": 2744 }, { "epoch": 0.12057477194638681, "grad_norm": 4.5625, "learning_rate": 4.830452116615397e-05, "loss": 0.5799, "step": 2746 }, { "epoch": 0.1206625904255903, "grad_norm": 3.953125, "learning_rate": 4.830201590416243e-05, "loss": 0.5996, "step": 2748 }, { "epoch": 0.12075040890479379, "grad_norm": 3.25, "learning_rate": 4.829950885769694e-05, "loss": 0.5995, "step": 2750 }, { "epoch": 0.12083822738399727, "grad_norm": 2.71875, "learning_rate": 4.8297000026949466e-05, "loss": 0.5927, "step": 2752 }, { "epoch": 0.12092604586320077, "grad_norm": 2.734375, "learning_rate": 4.829448941211215e-05, "loss": 0.5874, "step": 2754 }, { "epoch": 0.12101386434240426, "grad_norm": 3.21875, "learning_rate": 4.8291977013377254e-05, "loss": 0.5724, "step": 2756 }, { "epoch": 0.12110168282160774, "grad_norm": 2.890625, "learning_rate": 4.828946283093717e-05, "loss": 0.6041, "step": 2758 }, { "epoch": 0.12118950130081123, "grad_norm": 3.09375, "learning_rate": 4.828694686498444e-05, "loss": 0.5648, "step": 2760 }, { "epoch": 0.12127731978001471, "grad_norm": 3.265625, "learning_rate": 4.8284429115711735e-05, "loss": 0.5902, "step": 2762 }, { "epoch": 0.1213651382592182, "grad_norm": 2.75, "learning_rate": 4.828190958331187e-05, "loss": 0.5663, "step": 2764 }, { "epoch": 0.12145295673842169, "grad_norm": 2.953125, "learning_rate": 4.827938826797779e-05, "loss": 0.5614, "step": 2766 }, { "epoch": 0.12154077521762517, "grad_norm": 3.78125, "learning_rate": 4.827686516990257e-05, "loss": 0.581, "step": 2768 }, { "epoch": 0.12162859369682866, "grad_norm": 4.375, "learning_rate": 4.827434028927944e-05, "loss": 0.5845, "step": 2770 }, { "epoch": 0.12171641217603214, "grad_norm": 3.65625, "learning_rate": 4.827181362630175e-05, "loss": 0.5912, "step": 2772 }, { "epoch": 0.12180423065523563, "grad_norm": 3.015625, "learning_rate": 4.8269285181162995e-05, "loss": 0.5885, "step": 2774 }, { "epoch": 0.12189204913443911, "grad_norm": 2.828125, "learning_rate": 4.82667549540568e-05, "loss": 0.5797, "step": 2776 }, { "epoch": 0.1219798676136426, "grad_norm": 3.25, "learning_rate": 4.826422294517693e-05, "loss": 0.5669, "step": 2778 }, { "epoch": 0.12206768609284609, "grad_norm": 3.125, "learning_rate": 4.826168915471729e-05, "loss": 0.6038, "step": 2780 }, { "epoch": 0.12215550457204957, "grad_norm": 3.609375, "learning_rate": 4.825915358287193e-05, "loss": 0.578, "step": 2782 }, { "epoch": 0.12224332305125306, "grad_norm": 3.140625, "learning_rate": 4.8256616229835e-05, "loss": 0.5762, "step": 2784 }, { "epoch": 0.12233114153045654, "grad_norm": 3.96875, "learning_rate": 4.825407709580083e-05, "loss": 0.612, "step": 2786 }, { "epoch": 0.12241896000966003, "grad_norm": 3.28125, "learning_rate": 4.825153618096386e-05, "loss": 0.6211, "step": 2788 }, { "epoch": 0.12250677848886352, "grad_norm": 3.6875, "learning_rate": 4.824899348551868e-05, "loss": 0.5634, "step": 2790 }, { "epoch": 0.122594596968067, "grad_norm": 2.71875, "learning_rate": 4.8246449009660004e-05, "loss": 0.5689, "step": 2792 }, { "epoch": 0.12268241544727049, "grad_norm": 3.25, "learning_rate": 4.8243902753582695e-05, "loss": 0.6071, "step": 2794 }, { "epoch": 0.12277023392647397, "grad_norm": 3.15625, "learning_rate": 4.8241354717481734e-05, "loss": 0.5847, "step": 2796 }, { "epoch": 0.12285805240567746, "grad_norm": 2.671875, "learning_rate": 4.823880490155226e-05, "loss": 0.5787, "step": 2798 }, { "epoch": 0.12294587088488095, "grad_norm": 2.90625, "learning_rate": 4.823625330598953e-05, "loss": 0.5927, "step": 2800 }, { "epoch": 0.12303368936408443, "grad_norm": 3.4375, "learning_rate": 4.823369993098896e-05, "loss": 0.593, "step": 2802 }, { "epoch": 0.12312150784328793, "grad_norm": 3.203125, "learning_rate": 4.823114477674607e-05, "loss": 0.5603, "step": 2804 }, { "epoch": 0.12320932632249142, "grad_norm": 2.96875, "learning_rate": 4.822858784345655e-05, "loss": 0.5736, "step": 2806 }, { "epoch": 0.1232971448016949, "grad_norm": 3.015625, "learning_rate": 4.822602913131621e-05, "loss": 0.594, "step": 2808 }, { "epoch": 0.12338496328089839, "grad_norm": 2.84375, "learning_rate": 4.8223468640520984e-05, "loss": 0.5968, "step": 2810 }, { "epoch": 0.12347278176010187, "grad_norm": 3.0625, "learning_rate": 4.822090637126697e-05, "loss": 0.5796, "step": 2812 }, { "epoch": 0.12356060023930536, "grad_norm": 2.890625, "learning_rate": 4.821834232375037e-05, "loss": 0.5728, "step": 2814 }, { "epoch": 0.12364841871850885, "grad_norm": 2.9375, "learning_rate": 4.8215776498167555e-05, "loss": 0.5684, "step": 2816 }, { "epoch": 0.12373623719771233, "grad_norm": 2.859375, "learning_rate": 4.8213208894715e-05, "loss": 0.5754, "step": 2818 }, { "epoch": 0.12382405567691582, "grad_norm": 2.78125, "learning_rate": 4.821063951358936e-05, "loss": 0.579, "step": 2820 }, { "epoch": 0.1239118741561193, "grad_norm": 2.90625, "learning_rate": 4.820806835498737e-05, "loss": 0.6119, "step": 2822 }, { "epoch": 0.12399969263532279, "grad_norm": 2.890625, "learning_rate": 4.820549541910595e-05, "loss": 0.5824, "step": 2824 }, { "epoch": 0.12408751111452627, "grad_norm": 3.28125, "learning_rate": 4.820292070614212e-05, "loss": 0.5643, "step": 2826 }, { "epoch": 0.12417532959372976, "grad_norm": 2.671875, "learning_rate": 4.820034421629307e-05, "loss": 0.5791, "step": 2828 }, { "epoch": 0.12426314807293325, "grad_norm": 3.71875, "learning_rate": 4.819776594975609e-05, "loss": 0.5905, "step": 2830 }, { "epoch": 0.12435096655213673, "grad_norm": 3.21875, "learning_rate": 4.819518590672863e-05, "loss": 0.6018, "step": 2832 }, { "epoch": 0.12443878503134022, "grad_norm": 3.515625, "learning_rate": 4.8192604087408285e-05, "loss": 0.5928, "step": 2834 }, { "epoch": 0.1245266035105437, "grad_norm": 4.15625, "learning_rate": 4.819002049199276e-05, "loss": 0.5817, "step": 2836 }, { "epoch": 0.12461442198974719, "grad_norm": 2.765625, "learning_rate": 4.818743512067989e-05, "loss": 0.6143, "step": 2838 }, { "epoch": 0.12470224046895068, "grad_norm": 3.21875, "learning_rate": 4.8184847973667695e-05, "loss": 0.5789, "step": 2840 }, { "epoch": 0.12479005894815416, "grad_norm": 2.9375, "learning_rate": 4.818225905115428e-05, "loss": 0.5805, "step": 2842 }, { "epoch": 0.12487787742735765, "grad_norm": 3.265625, "learning_rate": 4.817966835333791e-05, "loss": 0.5753, "step": 2844 }, { "epoch": 0.12496569590656113, "grad_norm": 2.90625, "learning_rate": 4.817707588041698e-05, "loss": 0.5686, "step": 2846 }, { "epoch": 0.12505351438576462, "grad_norm": 2.671875, "learning_rate": 4.817448163259002e-05, "loss": 0.5567, "step": 2848 }, { "epoch": 0.12514133286496812, "grad_norm": 2.703125, "learning_rate": 4.8171885610055705e-05, "loss": 0.5747, "step": 2850 }, { "epoch": 0.1252291513441716, "grad_norm": 3.0, "learning_rate": 4.816928781301283e-05, "loss": 0.5622, "step": 2852 }, { "epoch": 0.1253169698233751, "grad_norm": 3.03125, "learning_rate": 4.816668824166035e-05, "loss": 0.5603, "step": 2854 }, { "epoch": 0.12540478830257856, "grad_norm": 2.59375, "learning_rate": 4.8164086896197325e-05, "loss": 0.5682, "step": 2856 }, { "epoch": 0.12549260678178206, "grad_norm": 3.09375, "learning_rate": 4.816148377682297e-05, "loss": 0.6089, "step": 2858 }, { "epoch": 0.12558042526098553, "grad_norm": 2.859375, "learning_rate": 4.8158878883736644e-05, "loss": 0.5962, "step": 2860 }, { "epoch": 0.12566824374018903, "grad_norm": 3.375, "learning_rate": 4.815627221713781e-05, "loss": 0.5576, "step": 2862 }, { "epoch": 0.1257560622193925, "grad_norm": 2.84375, "learning_rate": 4.815366377722611e-05, "loss": 0.6009, "step": 2864 }, { "epoch": 0.125843880698596, "grad_norm": 3.109375, "learning_rate": 4.815105356420128e-05, "loss": 0.5888, "step": 2866 }, { "epoch": 0.12593169917779948, "grad_norm": 3.03125, "learning_rate": 4.814844157826323e-05, "loss": 0.5765, "step": 2868 }, { "epoch": 0.12601951765700298, "grad_norm": 3.25, "learning_rate": 4.814582781961195e-05, "loss": 0.5808, "step": 2870 }, { "epoch": 0.12610733613620645, "grad_norm": 3.515625, "learning_rate": 4.814321228844765e-05, "loss": 0.5683, "step": 2872 }, { "epoch": 0.12619515461540995, "grad_norm": 3.25, "learning_rate": 4.81405949849706e-05, "loss": 0.5604, "step": 2874 }, { "epoch": 0.12628297309461342, "grad_norm": 3.15625, "learning_rate": 4.813797590938124e-05, "loss": 0.5842, "step": 2876 }, { "epoch": 0.12637079157381692, "grad_norm": 2.875, "learning_rate": 4.8135355061880124e-05, "loss": 0.577, "step": 2878 }, { "epoch": 0.12645861005302042, "grad_norm": 4.0625, "learning_rate": 4.813273244266799e-05, "loss": 0.5972, "step": 2880 }, { "epoch": 0.1265464285322239, "grad_norm": 4.6875, "learning_rate": 4.8130108051945655e-05, "loss": 0.5887, "step": 2882 }, { "epoch": 0.1266342470114274, "grad_norm": 4.5625, "learning_rate": 4.8127481889914096e-05, "loss": 0.6029, "step": 2884 }, { "epoch": 0.12672206549063086, "grad_norm": 3.609375, "learning_rate": 4.812485395677443e-05, "loss": 0.589, "step": 2886 }, { "epoch": 0.12680988396983436, "grad_norm": 3.34375, "learning_rate": 4.812222425272791e-05, "loss": 0.5527, "step": 2888 }, { "epoch": 0.12689770244903784, "grad_norm": 3.015625, "learning_rate": 4.811959277797591e-05, "loss": 0.6044, "step": 2890 }, { "epoch": 0.12698552092824134, "grad_norm": 3.3125, "learning_rate": 4.8116959532719954e-05, "loss": 0.5792, "step": 2892 }, { "epoch": 0.1270733394074448, "grad_norm": 3.734375, "learning_rate": 4.81143245171617e-05, "loss": 0.5781, "step": 2894 }, { "epoch": 0.1271611578866483, "grad_norm": 2.8125, "learning_rate": 4.8111687731502925e-05, "loss": 0.5827, "step": 2896 }, { "epoch": 0.12724897636585178, "grad_norm": 2.984375, "learning_rate": 4.8109049175945566e-05, "loss": 0.5314, "step": 2898 }, { "epoch": 0.12733679484505528, "grad_norm": 3.140625, "learning_rate": 4.810640885069169e-05, "loss": 0.5687, "step": 2900 }, { "epoch": 0.12742461332425875, "grad_norm": 3.0, "learning_rate": 4.810376675594347e-05, "loss": 0.5552, "step": 2902 }, { "epoch": 0.12751243180346225, "grad_norm": 3.375, "learning_rate": 4.8101122891903263e-05, "loss": 0.5637, "step": 2904 }, { "epoch": 0.12760025028266572, "grad_norm": 3.265625, "learning_rate": 4.809847725877352e-05, "loss": 0.5685, "step": 2906 }, { "epoch": 0.12768806876186922, "grad_norm": 3.109375, "learning_rate": 4.8095829856756866e-05, "loss": 0.581, "step": 2908 }, { "epoch": 0.1277758872410727, "grad_norm": 3.421875, "learning_rate": 4.809318068605602e-05, "loss": 0.5857, "step": 2910 }, { "epoch": 0.1278637057202762, "grad_norm": 2.890625, "learning_rate": 4.8090529746873845e-05, "loss": 0.5687, "step": 2912 }, { "epoch": 0.12795152419947967, "grad_norm": 2.796875, "learning_rate": 4.8087877039413386e-05, "loss": 0.5868, "step": 2914 }, { "epoch": 0.12803934267868317, "grad_norm": 3.03125, "learning_rate": 4.8085222563877766e-05, "loss": 0.5768, "step": 2916 }, { "epoch": 0.12812716115788664, "grad_norm": 2.9375, "learning_rate": 4.808256632047026e-05, "loss": 0.5833, "step": 2918 }, { "epoch": 0.12821497963709014, "grad_norm": 2.640625, "learning_rate": 4.80799083093943e-05, "loss": 0.5633, "step": 2920 }, { "epoch": 0.1283027981162936, "grad_norm": 3.296875, "learning_rate": 4.8077248530853416e-05, "loss": 0.5658, "step": 2922 }, { "epoch": 0.1283906165954971, "grad_norm": 2.8125, "learning_rate": 4.807458698505132e-05, "loss": 0.6031, "step": 2924 }, { "epoch": 0.12847843507470058, "grad_norm": 2.9375, "learning_rate": 4.807192367219182e-05, "loss": 0.5631, "step": 2926 }, { "epoch": 0.12856625355390408, "grad_norm": 2.65625, "learning_rate": 4.8069258592478875e-05, "loss": 0.5809, "step": 2928 }, { "epoch": 0.12865407203310758, "grad_norm": 2.59375, "learning_rate": 4.8066591746116575e-05, "loss": 0.5332, "step": 2930 }, { "epoch": 0.12874189051231105, "grad_norm": 2.796875, "learning_rate": 4.8063923133309144e-05, "loss": 0.5767, "step": 2932 }, { "epoch": 0.12882970899151455, "grad_norm": 3.5625, "learning_rate": 4.8061252754260954e-05, "loss": 0.5972, "step": 2934 }, { "epoch": 0.12891752747071802, "grad_norm": 3.734375, "learning_rate": 4.805858060917651e-05, "loss": 0.5573, "step": 2936 }, { "epoch": 0.12900534594992152, "grad_norm": 4.4375, "learning_rate": 4.8055906698260424e-05, "loss": 0.582, "step": 2938 }, { "epoch": 0.129093164429125, "grad_norm": 4.09375, "learning_rate": 4.805323102171748e-05, "loss": 0.5729, "step": 2940 }, { "epoch": 0.1291809829083285, "grad_norm": 4.40625, "learning_rate": 4.805055357975257e-05, "loss": 0.5866, "step": 2942 }, { "epoch": 0.12926880138753197, "grad_norm": 4.59375, "learning_rate": 4.804787437257075e-05, "loss": 0.6013, "step": 2944 }, { "epoch": 0.12935661986673547, "grad_norm": 4.59375, "learning_rate": 4.8045193400377186e-05, "loss": 0.5556, "step": 2946 }, { "epoch": 0.12944443834593894, "grad_norm": 5.9375, "learning_rate": 4.8042510663377184e-05, "loss": 0.5914, "step": 2948 }, { "epoch": 0.12953225682514244, "grad_norm": 4.375, "learning_rate": 4.803982616177619e-05, "loss": 0.5841, "step": 2950 }, { "epoch": 0.1296200753043459, "grad_norm": 2.34375, "learning_rate": 4.803713989577979e-05, "loss": 0.5732, "step": 2952 }, { "epoch": 0.1297078937835494, "grad_norm": 3.015625, "learning_rate": 4.803445186559369e-05, "loss": 0.5683, "step": 2954 }, { "epoch": 0.12979571226275288, "grad_norm": 3.59375, "learning_rate": 4.803176207142375e-05, "loss": 0.5492, "step": 2956 }, { "epoch": 0.12988353074195638, "grad_norm": 4.0625, "learning_rate": 4.802907051347595e-05, "loss": 0.5642, "step": 2958 }, { "epoch": 0.12997134922115985, "grad_norm": 3.421875, "learning_rate": 4.8026377191956404e-05, "loss": 0.5762, "step": 2960 }, { "epoch": 0.13005916770036335, "grad_norm": 3.625, "learning_rate": 4.802368210707138e-05, "loss": 0.5616, "step": 2962 }, { "epoch": 0.13014698617956683, "grad_norm": 3.203125, "learning_rate": 4.802098525902725e-05, "loss": 0.557, "step": 2964 }, { "epoch": 0.13023480465877033, "grad_norm": 2.875, "learning_rate": 4.801828664803056e-05, "loss": 0.5589, "step": 2966 }, { "epoch": 0.1303226231379738, "grad_norm": 2.5, "learning_rate": 4.8015586274287954e-05, "loss": 0.5587, "step": 2968 }, { "epoch": 0.1304104416171773, "grad_norm": 2.578125, "learning_rate": 4.8012884138006236e-05, "loss": 0.5536, "step": 2970 }, { "epoch": 0.13049826009638077, "grad_norm": 2.53125, "learning_rate": 4.8010180239392336e-05, "loss": 0.5832, "step": 2972 }, { "epoch": 0.13058607857558427, "grad_norm": 2.75, "learning_rate": 4.8007474578653315e-05, "loss": 0.5525, "step": 2974 }, { "epoch": 0.13067389705478774, "grad_norm": 2.78125, "learning_rate": 4.800476715599638e-05, "loss": 0.554, "step": 2976 }, { "epoch": 0.13076171553399124, "grad_norm": 2.640625, "learning_rate": 4.800205797162885e-05, "loss": 0.5811, "step": 2978 }, { "epoch": 0.1308495340131947, "grad_norm": 2.890625, "learning_rate": 4.7999347025758226e-05, "loss": 0.5601, "step": 2980 }, { "epoch": 0.1309373524923982, "grad_norm": 2.78125, "learning_rate": 4.7996634318592084e-05, "loss": 0.5816, "step": 2982 }, { "epoch": 0.1310251709716017, "grad_norm": 2.9375, "learning_rate": 4.7993919850338165e-05, "loss": 0.5796, "step": 2984 }, { "epoch": 0.13111298945080518, "grad_norm": 3.15625, "learning_rate": 4.799120362120436e-05, "loss": 0.5404, "step": 2986 }, { "epoch": 0.13120080793000868, "grad_norm": 3.359375, "learning_rate": 4.798848563139867e-05, "loss": 0.5436, "step": 2988 }, { "epoch": 0.13128862640921216, "grad_norm": 3.0, "learning_rate": 4.7985765881129244e-05, "loss": 0.5516, "step": 2990 }, { "epoch": 0.13137644488841566, "grad_norm": 2.546875, "learning_rate": 4.798304437060435e-05, "loss": 0.5522, "step": 2992 }, { "epoch": 0.13146426336761913, "grad_norm": 2.796875, "learning_rate": 4.798032110003241e-05, "loss": 0.5484, "step": 2994 }, { "epoch": 0.13155208184682263, "grad_norm": 3.078125, "learning_rate": 4.797759606962196e-05, "loss": 0.5625, "step": 2996 }, { "epoch": 0.1316399003260261, "grad_norm": 3.984375, "learning_rate": 4.797486927958171e-05, "loss": 0.5799, "step": 2998 }, { "epoch": 0.1317277188052296, "grad_norm": 2.828125, "learning_rate": 4.797214073012046e-05, "loss": 0.5912, "step": 3000 }, { "epoch": 0.13181553728443307, "grad_norm": 2.484375, "learning_rate": 4.796941042144717e-05, "loss": 0.5771, "step": 3002 }, { "epoch": 0.13190335576363657, "grad_norm": 2.65625, "learning_rate": 4.796667835377092e-05, "loss": 0.5842, "step": 3004 }, { "epoch": 0.13199117424284004, "grad_norm": 3.328125, "learning_rate": 4.796394452730094e-05, "loss": 0.5651, "step": 3006 }, { "epoch": 0.13207899272204354, "grad_norm": 2.734375, "learning_rate": 4.796120894224657e-05, "loss": 0.5555, "step": 3008 }, { "epoch": 0.13216681120124701, "grad_norm": 2.8125, "learning_rate": 4.795847159881733e-05, "loss": 0.5811, "step": 3010 }, { "epoch": 0.13225462968045051, "grad_norm": 2.78125, "learning_rate": 4.795573249722282e-05, "loss": 0.5715, "step": 3012 }, { "epoch": 0.13234244815965399, "grad_norm": 2.5, "learning_rate": 4.795299163767282e-05, "loss": 0.5762, "step": 3014 }, { "epoch": 0.13243026663885749, "grad_norm": 2.984375, "learning_rate": 4.7950249020377215e-05, "loss": 0.5867, "step": 3016 }, { "epoch": 0.13251808511806096, "grad_norm": 3.34375, "learning_rate": 4.7947504645546034e-05, "loss": 0.5801, "step": 3018 }, { "epoch": 0.13260590359726446, "grad_norm": 3.203125, "learning_rate": 4.794475851338946e-05, "loss": 0.5964, "step": 3020 }, { "epoch": 0.13269372207646793, "grad_norm": 2.921875, "learning_rate": 4.794201062411777e-05, "loss": 0.54, "step": 3022 }, { "epoch": 0.13278154055567143, "grad_norm": 2.65625, "learning_rate": 4.79392609779414e-05, "loss": 0.5766, "step": 3024 }, { "epoch": 0.1328693590348749, "grad_norm": 3.0, "learning_rate": 4.7936509575070945e-05, "loss": 0.6018, "step": 3026 }, { "epoch": 0.1329571775140784, "grad_norm": 3.0, "learning_rate": 4.793375641571707e-05, "loss": 0.5358, "step": 3028 }, { "epoch": 0.13304499599328187, "grad_norm": 3.109375, "learning_rate": 4.793100150009064e-05, "loss": 0.5788, "step": 3030 }, { "epoch": 0.13313281447248537, "grad_norm": 3.015625, "learning_rate": 4.7928244828402613e-05, "loss": 0.5617, "step": 3032 }, { "epoch": 0.13322063295168887, "grad_norm": 2.734375, "learning_rate": 4.7925486400864104e-05, "loss": 0.5712, "step": 3034 }, { "epoch": 0.13330845143089234, "grad_norm": 2.765625, "learning_rate": 4.7922726217686355e-05, "loss": 0.5818, "step": 3036 }, { "epoch": 0.13339626991009584, "grad_norm": 2.71875, "learning_rate": 4.7919964279080724e-05, "loss": 0.58, "step": 3038 }, { "epoch": 0.13348408838929932, "grad_norm": 3.015625, "learning_rate": 4.7917200585258746e-05, "loss": 0.563, "step": 3040 }, { "epoch": 0.13357190686850282, "grad_norm": 2.921875, "learning_rate": 4.791443513643205e-05, "loss": 0.5491, "step": 3042 }, { "epoch": 0.1336597253477063, "grad_norm": 2.71875, "learning_rate": 4.791166793281242e-05, "loss": 0.5647, "step": 3044 }, { "epoch": 0.1337475438269098, "grad_norm": 2.640625, "learning_rate": 4.790889897461176e-05, "loss": 0.5556, "step": 3046 }, { "epoch": 0.13383536230611326, "grad_norm": 3.1875, "learning_rate": 4.790612826204214e-05, "loss": 0.5618, "step": 3048 }, { "epoch": 0.13392318078531676, "grad_norm": 3.25, "learning_rate": 4.7903355795315714e-05, "loss": 0.5705, "step": 3050 }, { "epoch": 0.13401099926452023, "grad_norm": 3.125, "learning_rate": 4.790058157464481e-05, "loss": 0.5471, "step": 3052 }, { "epoch": 0.13409881774372373, "grad_norm": 2.921875, "learning_rate": 4.789780560024188e-05, "loss": 0.6094, "step": 3054 }, { "epoch": 0.1341866362229272, "grad_norm": 2.6875, "learning_rate": 4.789502787231952e-05, "loss": 0.5525, "step": 3056 }, { "epoch": 0.1342744547021307, "grad_norm": 2.609375, "learning_rate": 4.7892248391090426e-05, "loss": 0.5455, "step": 3058 }, { "epoch": 0.13436227318133417, "grad_norm": 2.765625, "learning_rate": 4.788946715676747e-05, "loss": 0.5991, "step": 3060 }, { "epoch": 0.13445009166053767, "grad_norm": 2.671875, "learning_rate": 4.788668416956362e-05, "loss": 0.5645, "step": 3062 }, { "epoch": 0.13453791013974115, "grad_norm": 2.890625, "learning_rate": 4.788389942969202e-05, "loss": 0.5732, "step": 3064 }, { "epoch": 0.13462572861894465, "grad_norm": 3.3125, "learning_rate": 4.788111293736591e-05, "loss": 0.5562, "step": 3066 }, { "epoch": 0.13471354709814812, "grad_norm": 2.703125, "learning_rate": 4.7878324692798694e-05, "loss": 0.5768, "step": 3068 }, { "epoch": 0.13480136557735162, "grad_norm": 2.5, "learning_rate": 4.787553469620388e-05, "loss": 0.5782, "step": 3070 }, { "epoch": 0.1348891840565551, "grad_norm": 2.703125, "learning_rate": 4.787274294779515e-05, "loss": 0.5552, "step": 3072 }, { "epoch": 0.1349770025357586, "grad_norm": 2.828125, "learning_rate": 4.7869949447786266e-05, "loss": 0.5638, "step": 3074 }, { "epoch": 0.13506482101496206, "grad_norm": 2.828125, "learning_rate": 4.7867154196391184e-05, "loss": 0.5941, "step": 3076 }, { "epoch": 0.13515263949416556, "grad_norm": 2.65625, "learning_rate": 4.786435719382394e-05, "loss": 0.5407, "step": 3078 }, { "epoch": 0.13524045797336903, "grad_norm": 3.109375, "learning_rate": 4.7861558440298745e-05, "loss": 0.5807, "step": 3080 }, { "epoch": 0.13532827645257253, "grad_norm": 3.296875, "learning_rate": 4.785875793602993e-05, "loss": 0.5439, "step": 3082 }, { "epoch": 0.13541609493177603, "grad_norm": 2.765625, "learning_rate": 4.785595568123195e-05, "loss": 0.5342, "step": 3084 }, { "epoch": 0.1355039134109795, "grad_norm": 2.90625, "learning_rate": 4.78531516761194e-05, "loss": 0.5854, "step": 3086 }, { "epoch": 0.135591731890183, "grad_norm": 2.96875, "learning_rate": 4.785034592090702e-05, "loss": 0.5676, "step": 3088 }, { "epoch": 0.13567955036938648, "grad_norm": 3.203125, "learning_rate": 4.784753841580967e-05, "loss": 0.5573, "step": 3090 }, { "epoch": 0.13576736884858998, "grad_norm": 3.375, "learning_rate": 4.7844729161042355e-05, "loss": 0.567, "step": 3092 }, { "epoch": 0.13585518732779345, "grad_norm": 3.046875, "learning_rate": 4.78419181568202e-05, "loss": 0.5619, "step": 3094 }, { "epoch": 0.13594300580699695, "grad_norm": 2.84375, "learning_rate": 4.783910540335848e-05, "loss": 0.5454, "step": 3096 }, { "epoch": 0.13603082428620042, "grad_norm": 3.4375, "learning_rate": 4.783629090087259e-05, "loss": 0.5401, "step": 3098 }, { "epoch": 0.13611864276540392, "grad_norm": 4.375, "learning_rate": 4.783347464957807e-05, "loss": 0.5515, "step": 3100 }, { "epoch": 0.1362064612446074, "grad_norm": 3.9375, "learning_rate": 4.783065664969059e-05, "loss": 0.5766, "step": 3102 }, { "epoch": 0.1362942797238109, "grad_norm": 4.3125, "learning_rate": 4.782783690142595e-05, "loss": 0.5804, "step": 3104 }, { "epoch": 0.13638209820301436, "grad_norm": 3.96875, "learning_rate": 4.782501540500009e-05, "loss": 0.5908, "step": 3106 }, { "epoch": 0.13646991668221786, "grad_norm": 3.15625, "learning_rate": 4.7822192160629074e-05, "loss": 0.5553, "step": 3108 }, { "epoch": 0.13655773516142133, "grad_norm": 3.796875, "learning_rate": 4.781936716852912e-05, "loss": 0.5416, "step": 3110 }, { "epoch": 0.13664555364062483, "grad_norm": 2.8125, "learning_rate": 4.781654042891655e-05, "loss": 0.5252, "step": 3112 }, { "epoch": 0.1367333721198283, "grad_norm": 2.578125, "learning_rate": 4.781371194200784e-05, "loss": 0.5582, "step": 3114 }, { "epoch": 0.1368211905990318, "grad_norm": 2.59375, "learning_rate": 4.781088170801961e-05, "loss": 0.5422, "step": 3116 }, { "epoch": 0.13690900907823528, "grad_norm": 3.0625, "learning_rate": 4.780804972716859e-05, "loss": 0.5477, "step": 3118 }, { "epoch": 0.13699682755743878, "grad_norm": 2.484375, "learning_rate": 4.780521599967165e-05, "loss": 0.567, "step": 3120 }, { "epoch": 0.13708464603664225, "grad_norm": 3.0625, "learning_rate": 4.78023805257458e-05, "loss": 0.5809, "step": 3122 }, { "epoch": 0.13717246451584575, "grad_norm": 2.75, "learning_rate": 4.77995433056082e-05, "loss": 0.5676, "step": 3124 }, { "epoch": 0.13726028299504922, "grad_norm": 2.71875, "learning_rate": 4.779670433947608e-05, "loss": 0.5617, "step": 3126 }, { "epoch": 0.13734810147425272, "grad_norm": 2.828125, "learning_rate": 4.77938636275669e-05, "loss": 0.5592, "step": 3128 }, { "epoch": 0.1374359199534562, "grad_norm": 2.984375, "learning_rate": 4.779102117009817e-05, "loss": 0.5645, "step": 3130 }, { "epoch": 0.1375237384326597, "grad_norm": 2.609375, "learning_rate": 4.778817696728758e-05, "loss": 0.5756, "step": 3132 }, { "epoch": 0.1376115569118632, "grad_norm": 3.09375, "learning_rate": 4.778533101935293e-05, "loss": 0.5531, "step": 3134 }, { "epoch": 0.13769937539106666, "grad_norm": 2.953125, "learning_rate": 4.778248332651217e-05, "loss": 0.588, "step": 3136 }, { "epoch": 0.13778719387027016, "grad_norm": 2.96875, "learning_rate": 4.7779633888983375e-05, "loss": 0.5559, "step": 3138 }, { "epoch": 0.13787501234947364, "grad_norm": 3.015625, "learning_rate": 4.7776782706984754e-05, "loss": 0.5385, "step": 3140 }, { "epoch": 0.13796283082867714, "grad_norm": 2.65625, "learning_rate": 4.777392978073466e-05, "loss": 0.5593, "step": 3142 }, { "epoch": 0.1380506493078806, "grad_norm": 2.90625, "learning_rate": 4.777107511045157e-05, "loss": 0.5674, "step": 3144 }, { "epoch": 0.1381384677870841, "grad_norm": 3.34375, "learning_rate": 4.776821869635407e-05, "loss": 0.5298, "step": 3146 }, { "epoch": 0.13822628626628758, "grad_norm": 3.34375, "learning_rate": 4.776536053866094e-05, "loss": 0.5588, "step": 3148 }, { "epoch": 0.13831410474549108, "grad_norm": 2.9375, "learning_rate": 4.7762500637591036e-05, "loss": 0.5656, "step": 3150 }, { "epoch": 0.13840192322469455, "grad_norm": 2.65625, "learning_rate": 4.775963899336338e-05, "loss": 0.5465, "step": 3152 }, { "epoch": 0.13848974170389805, "grad_norm": 2.890625, "learning_rate": 4.7756775606197114e-05, "loss": 0.556, "step": 3154 }, { "epoch": 0.13857756018310152, "grad_norm": 2.859375, "learning_rate": 4.775391047631151e-05, "loss": 0.5616, "step": 3156 }, { "epoch": 0.13866537866230502, "grad_norm": 3.09375, "learning_rate": 4.7751043603925996e-05, "loss": 0.5721, "step": 3158 }, { "epoch": 0.1387531971415085, "grad_norm": 3.1875, "learning_rate": 4.77481749892601e-05, "loss": 0.5668, "step": 3160 }, { "epoch": 0.138841015620712, "grad_norm": 3.0625, "learning_rate": 4.774530463253352e-05, "loss": 0.5333, "step": 3162 }, { "epoch": 0.13892883409991547, "grad_norm": 2.984375, "learning_rate": 4.774243253396605e-05, "loss": 0.5342, "step": 3164 }, { "epoch": 0.13901665257911897, "grad_norm": 3.171875, "learning_rate": 4.7739558693777654e-05, "loss": 0.5521, "step": 3166 }, { "epoch": 0.13910447105832244, "grad_norm": 3.796875, "learning_rate": 4.7736683112188396e-05, "loss": 0.5404, "step": 3168 }, { "epoch": 0.13919228953752594, "grad_norm": 3.921875, "learning_rate": 4.77338057894185e-05, "loss": 0.5795, "step": 3170 }, { "epoch": 0.1392801080167294, "grad_norm": 4.75, "learning_rate": 4.773092672568829e-05, "loss": 0.5364, "step": 3172 }, { "epoch": 0.1393679264959329, "grad_norm": 5.125, "learning_rate": 4.7728045921218286e-05, "loss": 0.5539, "step": 3174 }, { "epoch": 0.13945574497513638, "grad_norm": 4.59375, "learning_rate": 4.7725163376229064e-05, "loss": 0.5781, "step": 3176 }, { "epoch": 0.13954356345433988, "grad_norm": 3.5, "learning_rate": 4.772227909094139e-05, "loss": 0.572, "step": 3178 }, { "epoch": 0.13963138193354335, "grad_norm": 3.59375, "learning_rate": 4.771939306557613e-05, "loss": 0.5433, "step": 3180 }, { "epoch": 0.13971920041274685, "grad_norm": 2.625, "learning_rate": 4.77165053003543e-05, "loss": 0.5494, "step": 3182 }, { "epoch": 0.13980701889195035, "grad_norm": 2.765625, "learning_rate": 4.7713615795497055e-05, "loss": 0.5664, "step": 3184 }, { "epoch": 0.13989483737115382, "grad_norm": 2.984375, "learning_rate": 4.771072455122567e-05, "loss": 0.5408, "step": 3186 }, { "epoch": 0.13998265585035732, "grad_norm": 2.765625, "learning_rate": 4.770783156776155e-05, "loss": 0.5764, "step": 3188 }, { "epoch": 0.1400704743295608, "grad_norm": 3.109375, "learning_rate": 4.770493684532624e-05, "loss": 0.5377, "step": 3190 }, { "epoch": 0.1401582928087643, "grad_norm": 3.078125, "learning_rate": 4.770204038414143e-05, "loss": 0.5654, "step": 3192 }, { "epoch": 0.14024611128796777, "grad_norm": 2.65625, "learning_rate": 4.769914218442892e-05, "loss": 0.5729, "step": 3194 }, { "epoch": 0.14033392976717127, "grad_norm": 2.53125, "learning_rate": 4.7696242246410674e-05, "loss": 0.5623, "step": 3196 }, { "epoch": 0.14042174824637474, "grad_norm": 3.03125, "learning_rate": 4.769334057030874e-05, "loss": 0.5485, "step": 3198 }, { "epoch": 0.14050956672557824, "grad_norm": 3.140625, "learning_rate": 4.7690437156345356e-05, "loss": 0.5654, "step": 3200 }, { "epoch": 0.1405973852047817, "grad_norm": 2.859375, "learning_rate": 4.768753200474285e-05, "loss": 0.5322, "step": 3202 }, { "epoch": 0.1406852036839852, "grad_norm": 2.46875, "learning_rate": 4.768462511572371e-05, "loss": 0.5448, "step": 3204 }, { "epoch": 0.14077302216318868, "grad_norm": 2.8125, "learning_rate": 4.768171648951054e-05, "loss": 0.5783, "step": 3206 }, { "epoch": 0.14086084064239218, "grad_norm": 3.25, "learning_rate": 4.767880612632608e-05, "loss": 0.5502, "step": 3208 }, { "epoch": 0.14094865912159565, "grad_norm": 3.28125, "learning_rate": 4.767589402639321e-05, "loss": 0.5776, "step": 3210 }, { "epoch": 0.14103647760079915, "grad_norm": 3.09375, "learning_rate": 4.7672980189934935e-05, "loss": 0.5423, "step": 3212 }, { "epoch": 0.14112429608000263, "grad_norm": 2.828125, "learning_rate": 4.7670064617174414e-05, "loss": 0.5671, "step": 3214 }, { "epoch": 0.14121211455920613, "grad_norm": 2.859375, "learning_rate": 4.7667147308334906e-05, "loss": 0.5757, "step": 3216 }, { "epoch": 0.1412999330384096, "grad_norm": 2.859375, "learning_rate": 4.766422826363982e-05, "loss": 0.5452, "step": 3218 }, { "epoch": 0.1413877515176131, "grad_norm": 2.515625, "learning_rate": 4.76613074833127e-05, "loss": 0.5573, "step": 3220 }, { "epoch": 0.14147556999681657, "grad_norm": 2.765625, "learning_rate": 4.765838496757722e-05, "loss": 0.5512, "step": 3222 }, { "epoch": 0.14156338847602007, "grad_norm": 3.390625, "learning_rate": 4.765546071665719e-05, "loss": 0.5689, "step": 3224 }, { "epoch": 0.14165120695522354, "grad_norm": 3.34375, "learning_rate": 4.765253473077655e-05, "loss": 0.5606, "step": 3226 }, { "epoch": 0.14173902543442704, "grad_norm": 2.765625, "learning_rate": 4.764960701015937e-05, "loss": 0.5573, "step": 3228 }, { "epoch": 0.1418268439136305, "grad_norm": 2.921875, "learning_rate": 4.764667755502985e-05, "loss": 0.5643, "step": 3230 }, { "epoch": 0.141914662392834, "grad_norm": 3.34375, "learning_rate": 4.764374636561234e-05, "loss": 0.5225, "step": 3232 }, { "epoch": 0.1420024808720375, "grad_norm": 2.953125, "learning_rate": 4.76408134421313e-05, "loss": 0.5291, "step": 3234 }, { "epoch": 0.14209029935124098, "grad_norm": 2.984375, "learning_rate": 4.7637878784811343e-05, "loss": 0.5567, "step": 3236 }, { "epoch": 0.14217811783044448, "grad_norm": 2.453125, "learning_rate": 4.76349423938772e-05, "loss": 0.5848, "step": 3238 }, { "epoch": 0.14226593630964796, "grad_norm": 2.8125, "learning_rate": 4.7632004269553746e-05, "loss": 0.5553, "step": 3240 }, { "epoch": 0.14235375478885146, "grad_norm": 3.65625, "learning_rate": 4.762906441206597e-05, "loss": 0.5725, "step": 3242 }, { "epoch": 0.14244157326805493, "grad_norm": 2.765625, "learning_rate": 4.762612282163903e-05, "loss": 0.5571, "step": 3244 }, { "epoch": 0.14252939174725843, "grad_norm": 2.9375, "learning_rate": 4.762317949849817e-05, "loss": 0.5418, "step": 3246 }, { "epoch": 0.1426172102264619, "grad_norm": 2.734375, "learning_rate": 4.7620234442868806e-05, "loss": 0.5296, "step": 3248 }, { "epoch": 0.1427050287056654, "grad_norm": 2.90625, "learning_rate": 4.7617287654976466e-05, "loss": 0.5702, "step": 3250 }, { "epoch": 0.14279284718486887, "grad_norm": 2.96875, "learning_rate": 4.7614339135046816e-05, "loss": 0.5605, "step": 3252 }, { "epoch": 0.14288066566407237, "grad_norm": 2.96875, "learning_rate": 4.761138888330565e-05, "loss": 0.546, "step": 3254 }, { "epoch": 0.14296848414327584, "grad_norm": 2.953125, "learning_rate": 4.760843689997891e-05, "loss": 0.5669, "step": 3256 }, { "epoch": 0.14305630262247934, "grad_norm": 2.875, "learning_rate": 4.760548318529265e-05, "loss": 0.5651, "step": 3258 }, { "epoch": 0.14314412110168281, "grad_norm": 2.984375, "learning_rate": 4.760252773947307e-05, "loss": 0.5565, "step": 3260 }, { "epoch": 0.1432319395808863, "grad_norm": 2.546875, "learning_rate": 4.7599570562746486e-05, "loss": 0.5608, "step": 3262 }, { "epoch": 0.14331975806008979, "grad_norm": 2.921875, "learning_rate": 4.7596611655339384e-05, "loss": 0.5184, "step": 3264 }, { "epoch": 0.14340757653929329, "grad_norm": 3.328125, "learning_rate": 4.759365101747833e-05, "loss": 0.5226, "step": 3266 }, { "epoch": 0.14349539501849676, "grad_norm": 3.046875, "learning_rate": 4.759068864939008e-05, "loss": 0.5637, "step": 3268 }, { "epoch": 0.14358321349770026, "grad_norm": 3.734375, "learning_rate": 4.7587724551301474e-05, "loss": 0.5445, "step": 3270 }, { "epoch": 0.14367103197690373, "grad_norm": 3.203125, "learning_rate": 4.758475872343951e-05, "loss": 0.5533, "step": 3272 }, { "epoch": 0.14375885045610723, "grad_norm": 2.796875, "learning_rate": 4.7581791166031307e-05, "loss": 0.5308, "step": 3274 }, { "epoch": 0.1438466689353107, "grad_norm": 2.484375, "learning_rate": 4.757882187930412e-05, "loss": 0.5278, "step": 3276 }, { "epoch": 0.1439344874145142, "grad_norm": 3.15625, "learning_rate": 4.7575850863485345e-05, "loss": 0.5135, "step": 3278 }, { "epoch": 0.14402230589371767, "grad_norm": 2.953125, "learning_rate": 4.7572878118802496e-05, "loss": 0.5093, "step": 3280 }, { "epoch": 0.14411012437292117, "grad_norm": 3.484375, "learning_rate": 4.756990364548323e-05, "loss": 0.5549, "step": 3282 }, { "epoch": 0.14419794285212467, "grad_norm": 3.390625, "learning_rate": 4.7566927443755324e-05, "loss": 0.5789, "step": 3284 }, { "epoch": 0.14428576133132814, "grad_norm": 2.328125, "learning_rate": 4.756394951384672e-05, "loss": 0.533, "step": 3286 }, { "epoch": 0.14437357981053164, "grad_norm": 2.9375, "learning_rate": 4.756096985598545e-05, "loss": 0.5552, "step": 3288 }, { "epoch": 0.14446139828973512, "grad_norm": 2.59375, "learning_rate": 4.7557988470399695e-05, "loss": 0.5436, "step": 3290 }, { "epoch": 0.14454921676893862, "grad_norm": 2.71875, "learning_rate": 4.7555005357317774e-05, "loss": 0.5551, "step": 3292 }, { "epoch": 0.1446370352481421, "grad_norm": 3.0, "learning_rate": 4.7552020516968144e-05, "loss": 0.5224, "step": 3294 }, { "epoch": 0.1447248537273456, "grad_norm": 2.75, "learning_rate": 4.754903394957937e-05, "loss": 0.5368, "step": 3296 }, { "epoch": 0.14481267220654906, "grad_norm": 2.78125, "learning_rate": 4.7546045655380174e-05, "loss": 0.5495, "step": 3298 }, { "epoch": 0.14490049068575256, "grad_norm": 2.375, "learning_rate": 4.7543055634599394e-05, "loss": 0.5518, "step": 3300 }, { "epoch": 0.14498830916495603, "grad_norm": 2.484375, "learning_rate": 4.754006388746601e-05, "loss": 0.5558, "step": 3302 }, { "epoch": 0.14507612764415953, "grad_norm": 2.40625, "learning_rate": 4.7537070414209134e-05, "loss": 0.5431, "step": 3304 }, { "epoch": 0.145163946123363, "grad_norm": 2.90625, "learning_rate": 4.7534075215058e-05, "loss": 0.5392, "step": 3306 }, { "epoch": 0.1452517646025665, "grad_norm": 2.578125, "learning_rate": 4.753107829024198e-05, "loss": 0.5357, "step": 3308 }, { "epoch": 0.14533958308176997, "grad_norm": 2.46875, "learning_rate": 4.7528079639990596e-05, "loss": 0.5398, "step": 3310 }, { "epoch": 0.14542740156097347, "grad_norm": 2.640625, "learning_rate": 4.7525079264533464e-05, "loss": 0.5326, "step": 3312 }, { "epoch": 0.14551522004017695, "grad_norm": 3.125, "learning_rate": 4.752207716410036e-05, "loss": 0.5444, "step": 3314 }, { "epoch": 0.14560303851938045, "grad_norm": 2.640625, "learning_rate": 4.7519073338921196e-05, "loss": 0.5697, "step": 3316 }, { "epoch": 0.14569085699858392, "grad_norm": 2.59375, "learning_rate": 4.751606778922599e-05, "loss": 0.551, "step": 3318 }, { "epoch": 0.14577867547778742, "grad_norm": 2.703125, "learning_rate": 4.751306051524492e-05, "loss": 0.5416, "step": 3320 }, { "epoch": 0.1458664939569909, "grad_norm": 3.359375, "learning_rate": 4.7510051517208276e-05, "loss": 0.5223, "step": 3322 }, { "epoch": 0.1459543124361944, "grad_norm": 4.34375, "learning_rate": 4.750704079534649e-05, "loss": 0.5606, "step": 3324 }, { "epoch": 0.14604213091539786, "grad_norm": 3.921875, "learning_rate": 4.750402834989013e-05, "loss": 0.5694, "step": 3326 }, { "epoch": 0.14612994939460136, "grad_norm": 3.578125, "learning_rate": 4.7501014181069884e-05, "loss": 0.5552, "step": 3328 }, { "epoch": 0.14621776787380483, "grad_norm": 3.640625, "learning_rate": 4.749799828911657e-05, "loss": 0.5606, "step": 3330 }, { "epoch": 0.14630558635300833, "grad_norm": 3.0625, "learning_rate": 4.749498067426116e-05, "loss": 0.5545, "step": 3332 }, { "epoch": 0.14639340483221183, "grad_norm": 2.6875, "learning_rate": 4.7491961336734735e-05, "loss": 0.5326, "step": 3334 }, { "epoch": 0.1464812233114153, "grad_norm": 2.546875, "learning_rate": 4.7488940276768525e-05, "loss": 0.5556, "step": 3336 }, { "epoch": 0.1465690417906188, "grad_norm": 3.328125, "learning_rate": 4.7485917494593866e-05, "loss": 0.5552, "step": 3338 }, { "epoch": 0.14665686026982228, "grad_norm": 2.59375, "learning_rate": 4.748289299044226e-05, "loss": 0.5554, "step": 3340 }, { "epoch": 0.14674467874902578, "grad_norm": 3.09375, "learning_rate": 4.747986676454533e-05, "loss": 0.5636, "step": 3342 }, { "epoch": 0.14683249722822925, "grad_norm": 3.109375, "learning_rate": 4.747683881713481e-05, "loss": 0.5333, "step": 3344 }, { "epoch": 0.14692031570743275, "grad_norm": 2.859375, "learning_rate": 4.747380914844257e-05, "loss": 0.5331, "step": 3346 }, { "epoch": 0.14700813418663622, "grad_norm": 2.5625, "learning_rate": 4.7470777758700655e-05, "loss": 0.5181, "step": 3348 }, { "epoch": 0.14709595266583972, "grad_norm": 3.078125, "learning_rate": 4.746774464814119e-05, "loss": 0.5348, "step": 3350 }, { "epoch": 0.1471837711450432, "grad_norm": 3.03125, "learning_rate": 4.7464709816996445e-05, "loss": 0.5384, "step": 3352 }, { "epoch": 0.1472715896242467, "grad_norm": 2.78125, "learning_rate": 4.746167326549884e-05, "loss": 0.5614, "step": 3354 }, { "epoch": 0.14735940810345016, "grad_norm": 3.078125, "learning_rate": 4.745863499388092e-05, "loss": 0.5327, "step": 3356 }, { "epoch": 0.14744722658265366, "grad_norm": 2.890625, "learning_rate": 4.7455595002375344e-05, "loss": 0.5381, "step": 3358 }, { "epoch": 0.14753504506185713, "grad_norm": 3.6875, "learning_rate": 4.745255329121492e-05, "loss": 0.5582, "step": 3360 }, { "epoch": 0.14762286354106063, "grad_norm": 4.28125, "learning_rate": 4.744950986063258e-05, "loss": 0.5844, "step": 3362 }, { "epoch": 0.1477106820202641, "grad_norm": 3.390625, "learning_rate": 4.744646471086139e-05, "loss": 0.525, "step": 3364 }, { "epoch": 0.1477985004994676, "grad_norm": 3.1875, "learning_rate": 4.744341784213456e-05, "loss": 0.5444, "step": 3366 }, { "epoch": 0.14788631897867108, "grad_norm": 2.6875, "learning_rate": 4.744036925468541e-05, "loss": 0.5375, "step": 3368 }, { "epoch": 0.14797413745787458, "grad_norm": 2.796875, "learning_rate": 4.74373189487474e-05, "loss": 0.5399, "step": 3370 }, { "epoch": 0.14806195593707805, "grad_norm": 3.4375, "learning_rate": 4.743426692455413e-05, "loss": 0.5581, "step": 3372 }, { "epoch": 0.14814977441628155, "grad_norm": 3.71875, "learning_rate": 4.7431213182339315e-05, "loss": 0.5768, "step": 3374 }, { "epoch": 0.14823759289548502, "grad_norm": 3.375, "learning_rate": 4.742815772233682e-05, "loss": 0.5311, "step": 3376 }, { "epoch": 0.14832541137468852, "grad_norm": 3.25, "learning_rate": 4.742510054478063e-05, "loss": 0.539, "step": 3378 }, { "epoch": 0.148413229853892, "grad_norm": 3.375, "learning_rate": 4.7422041649904867e-05, "loss": 0.5256, "step": 3380 }, { "epoch": 0.1485010483330955, "grad_norm": 2.859375, "learning_rate": 4.7418981037943785e-05, "loss": 0.5492, "step": 3382 }, { "epoch": 0.14858886681229896, "grad_norm": 3.34375, "learning_rate": 4.741591870913175e-05, "loss": 0.5769, "step": 3384 }, { "epoch": 0.14867668529150246, "grad_norm": 2.921875, "learning_rate": 4.741285466370329e-05, "loss": 0.5478, "step": 3386 }, { "epoch": 0.14876450377070596, "grad_norm": 3.046875, "learning_rate": 4.740978890189305e-05, "loss": 0.5265, "step": 3388 }, { "epoch": 0.14885232224990944, "grad_norm": 2.8125, "learning_rate": 4.740672142393581e-05, "loss": 0.5097, "step": 3390 }, { "epoch": 0.14894014072911294, "grad_norm": 2.46875, "learning_rate": 4.740365223006646e-05, "loss": 0.5272, "step": 3392 }, { "epoch": 0.1490279592083164, "grad_norm": 3.0, "learning_rate": 4.7400581320520055e-05, "loss": 0.5462, "step": 3394 }, { "epoch": 0.1491157776875199, "grad_norm": 2.421875, "learning_rate": 4.7397508695531764e-05, "loss": 0.5324, "step": 3396 }, { "epoch": 0.14920359616672338, "grad_norm": 2.859375, "learning_rate": 4.739443435533689e-05, "loss": 0.5603, "step": 3398 }, { "epoch": 0.14929141464592688, "grad_norm": 2.515625, "learning_rate": 4.7391358300170865e-05, "loss": 0.5247, "step": 3400 }, { "epoch": 0.14937923312513035, "grad_norm": 3.046875, "learning_rate": 4.738828053026925e-05, "loss": 0.5094, "step": 3402 }, { "epoch": 0.14946705160433385, "grad_norm": 2.828125, "learning_rate": 4.7385201045867747e-05, "loss": 0.5529, "step": 3404 }, { "epoch": 0.14955487008353732, "grad_norm": 2.828125, "learning_rate": 4.738211984720218e-05, "loss": 0.5386, "step": 3406 }, { "epoch": 0.14964268856274082, "grad_norm": 2.640625, "learning_rate": 4.7379036934508506e-05, "loss": 0.5707, "step": 3408 }, { "epoch": 0.1497305070419443, "grad_norm": 2.75, "learning_rate": 4.7375952308022824e-05, "loss": 0.5598, "step": 3410 }, { "epoch": 0.1498183255211478, "grad_norm": 3.65625, "learning_rate": 4.737286596798135e-05, "loss": 0.5739, "step": 3412 }, { "epoch": 0.14990614400035127, "grad_norm": 3.734375, "learning_rate": 4.7369777914620436e-05, "loss": 0.5426, "step": 3414 }, { "epoch": 0.14999396247955477, "grad_norm": 2.625, "learning_rate": 4.736668814817657e-05, "loss": 0.5247, "step": 3416 }, { "epoch": 0.15008178095875824, "grad_norm": 2.828125, "learning_rate": 4.7363596668886364e-05, "loss": 0.5513, "step": 3418 }, { "epoch": 0.15016959943796174, "grad_norm": 2.96875, "learning_rate": 4.736050347698656e-05, "loss": 0.5728, "step": 3420 }, { "epoch": 0.1502574179171652, "grad_norm": 3.0625, "learning_rate": 4.7357408572714046e-05, "loss": 0.554, "step": 3422 }, { "epoch": 0.1503452363963687, "grad_norm": 2.96875, "learning_rate": 4.735431195630582e-05, "loss": 0.5098, "step": 3424 }, { "epoch": 0.15043305487557218, "grad_norm": 2.390625, "learning_rate": 4.7351213627999027e-05, "loss": 0.5609, "step": 3426 }, { "epoch": 0.15052087335477568, "grad_norm": 3.0, "learning_rate": 4.734811358803093e-05, "loss": 0.5503, "step": 3428 }, { "epoch": 0.15060869183397915, "grad_norm": 3.046875, "learning_rate": 4.734501183663894e-05, "loss": 0.5682, "step": 3430 }, { "epoch": 0.15069651031318265, "grad_norm": 2.828125, "learning_rate": 4.7341908374060595e-05, "loss": 0.5374, "step": 3432 }, { "epoch": 0.15078432879238612, "grad_norm": 2.90625, "learning_rate": 4.733880320053354e-05, "loss": 0.5598, "step": 3434 }, { "epoch": 0.15087214727158962, "grad_norm": 2.828125, "learning_rate": 4.733569631629559e-05, "loss": 0.5408, "step": 3436 }, { "epoch": 0.15095996575079312, "grad_norm": 2.875, "learning_rate": 4.7332587721584656e-05, "loss": 0.5214, "step": 3438 }, { "epoch": 0.1510477842299966, "grad_norm": 2.625, "learning_rate": 4.732947741663881e-05, "loss": 0.5384, "step": 3440 }, { "epoch": 0.1511356027092001, "grad_norm": 2.515625, "learning_rate": 4.732636540169621e-05, "loss": 0.5455, "step": 3442 }, { "epoch": 0.15122342118840357, "grad_norm": 2.578125, "learning_rate": 4.732325167699522e-05, "loss": 0.5517, "step": 3444 }, { "epoch": 0.15131123966760707, "grad_norm": 2.59375, "learning_rate": 4.732013624277425e-05, "loss": 0.5336, "step": 3446 }, { "epoch": 0.15139905814681054, "grad_norm": 2.640625, "learning_rate": 4.73170190992719e-05, "loss": 0.5347, "step": 3448 }, { "epoch": 0.15148687662601404, "grad_norm": 2.5, "learning_rate": 4.731390024672688e-05, "loss": 0.5212, "step": 3450 }, { "epoch": 0.1515746951052175, "grad_norm": 2.609375, "learning_rate": 4.731077968537803e-05, "loss": 0.5469, "step": 3452 }, { "epoch": 0.151662513584421, "grad_norm": 3.234375, "learning_rate": 4.7307657415464324e-05, "loss": 0.559, "step": 3454 }, { "epoch": 0.15175033206362448, "grad_norm": 2.859375, "learning_rate": 4.7304533437224866e-05, "loss": 0.5247, "step": 3456 }, { "epoch": 0.15183815054282798, "grad_norm": 3.765625, "learning_rate": 4.730140775089888e-05, "loss": 0.5356, "step": 3458 }, { "epoch": 0.15192596902203145, "grad_norm": 3.59375, "learning_rate": 4.729828035672576e-05, "loss": 0.5345, "step": 3460 }, { "epoch": 0.15201378750123495, "grad_norm": 3.578125, "learning_rate": 4.729515125494497e-05, "loss": 0.5386, "step": 3462 }, { "epoch": 0.15210160598043843, "grad_norm": 5.0625, "learning_rate": 4.729202044579616e-05, "loss": 0.531, "step": 3464 }, { "epoch": 0.15218942445964193, "grad_norm": 7.71875, "learning_rate": 4.7288887929519074e-05, "loss": 0.5581, "step": 3466 }, { "epoch": 0.1522772429388454, "grad_norm": 3.484375, "learning_rate": 4.7285753706353614e-05, "loss": 0.5721, "step": 3468 }, { "epoch": 0.1523650614180489, "grad_norm": 2.703125, "learning_rate": 4.728261777653979e-05, "loss": 0.5701, "step": 3470 }, { "epoch": 0.15245287989725237, "grad_norm": 4.1875, "learning_rate": 4.7279480140317756e-05, "loss": 0.5465, "step": 3472 }, { "epoch": 0.15254069837645587, "grad_norm": 3.90625, "learning_rate": 4.727634079792779e-05, "loss": 0.5398, "step": 3474 }, { "epoch": 0.15262851685565934, "grad_norm": 3.3125, "learning_rate": 4.727319974961031e-05, "loss": 0.5536, "step": 3476 }, { "epoch": 0.15271633533486284, "grad_norm": 3.71875, "learning_rate": 4.7270056995605846e-05, "loss": 0.5496, "step": 3478 }, { "epoch": 0.1528041538140663, "grad_norm": 3.65625, "learning_rate": 4.726691253615509e-05, "loss": 0.5222, "step": 3480 }, { "epoch": 0.1528919722932698, "grad_norm": 3.25, "learning_rate": 4.726376637149883e-05, "loss": 0.5456, "step": 3482 }, { "epoch": 0.15297979077247328, "grad_norm": 3.078125, "learning_rate": 4.7260618501877994e-05, "loss": 0.5076, "step": 3484 }, { "epoch": 0.15306760925167678, "grad_norm": 3.3125, "learning_rate": 4.725746892753367e-05, "loss": 0.5468, "step": 3486 }, { "epoch": 0.15315542773088028, "grad_norm": 3.171875, "learning_rate": 4.725431764870704e-05, "loss": 0.5179, "step": 3488 }, { "epoch": 0.15324324621008376, "grad_norm": 3.1875, "learning_rate": 4.7251164665639426e-05, "loss": 0.5522, "step": 3490 }, { "epoch": 0.15333106468928726, "grad_norm": 3.953125, "learning_rate": 4.724800997857228e-05, "loss": 0.5371, "step": 3492 }, { "epoch": 0.15341888316849073, "grad_norm": 3.796875, "learning_rate": 4.724485358774721e-05, "loss": 0.5303, "step": 3494 }, { "epoch": 0.15350670164769423, "grad_norm": 3.59375, "learning_rate": 4.724169549340591e-05, "loss": 0.5051, "step": 3496 }, { "epoch": 0.1535945201268977, "grad_norm": 3.59375, "learning_rate": 4.723853569579024e-05, "loss": 0.5499, "step": 3498 }, { "epoch": 0.1536823386061012, "grad_norm": 3.265625, "learning_rate": 4.723537419514218e-05, "loss": 0.5448, "step": 3500 }, { "epoch": 0.15377015708530467, "grad_norm": 3.953125, "learning_rate": 4.723221099170383e-05, "loss": 0.5605, "step": 3502 }, { "epoch": 0.15385797556450817, "grad_norm": 2.765625, "learning_rate": 4.7229046085717434e-05, "loss": 0.5353, "step": 3504 }, { "epoch": 0.15394579404371164, "grad_norm": 2.5625, "learning_rate": 4.7225879477425364e-05, "loss": 0.5276, "step": 3506 }, { "epoch": 0.15403361252291514, "grad_norm": 2.546875, "learning_rate": 4.722271116707011e-05, "loss": 0.5488, "step": 3508 }, { "epoch": 0.15412143100211861, "grad_norm": 2.640625, "learning_rate": 4.72195411548943e-05, "loss": 0.5281, "step": 3510 }, { "epoch": 0.1542092494813221, "grad_norm": 3.203125, "learning_rate": 4.7216369441140715e-05, "loss": 0.5644, "step": 3512 }, { "epoch": 0.15429706796052559, "grad_norm": 2.84375, "learning_rate": 4.721319602605223e-05, "loss": 0.503, "step": 3514 }, { "epoch": 0.15438488643972909, "grad_norm": 2.859375, "learning_rate": 4.721002090987187e-05, "loss": 0.5472, "step": 3516 }, { "epoch": 0.15447270491893256, "grad_norm": 2.9375, "learning_rate": 4.720684409284277e-05, "loss": 0.5533, "step": 3518 }, { "epoch": 0.15456052339813606, "grad_norm": 3.125, "learning_rate": 4.7203665575208244e-05, "loss": 0.5354, "step": 3520 }, { "epoch": 0.15464834187733953, "grad_norm": 2.5625, "learning_rate": 4.720048535721168e-05, "loss": 0.5225, "step": 3522 }, { "epoch": 0.15473616035654303, "grad_norm": 2.78125, "learning_rate": 4.7197303439096626e-05, "loss": 0.5436, "step": 3524 }, { "epoch": 0.1548239788357465, "grad_norm": 3.21875, "learning_rate": 4.7194119821106754e-05, "loss": 0.5509, "step": 3526 }, { "epoch": 0.15491179731495, "grad_norm": 3.28125, "learning_rate": 4.719093450348586e-05, "loss": 0.5602, "step": 3528 }, { "epoch": 0.15499961579415347, "grad_norm": 3.09375, "learning_rate": 4.718774748647789e-05, "loss": 0.5344, "step": 3530 }, { "epoch": 0.15508743427335697, "grad_norm": 3.1875, "learning_rate": 4.718455877032689e-05, "loss": 0.5435, "step": 3532 }, { "epoch": 0.15517525275256044, "grad_norm": 3.0625, "learning_rate": 4.718136835527707e-05, "loss": 0.5291, "step": 3534 }, { "epoch": 0.15526307123176394, "grad_norm": 2.953125, "learning_rate": 4.7178176241572735e-05, "loss": 0.5308, "step": 3536 }, { "epoch": 0.15535088971096744, "grad_norm": 3.0, "learning_rate": 4.717498242945836e-05, "loss": 0.5524, "step": 3538 }, { "epoch": 0.15543870819017092, "grad_norm": 2.578125, "learning_rate": 4.717178691917851e-05, "loss": 0.5144, "step": 3540 }, { "epoch": 0.15552652666937442, "grad_norm": 2.84375, "learning_rate": 4.71685897109779e-05, "loss": 0.566, "step": 3542 }, { "epoch": 0.1556143451485779, "grad_norm": 3.578125, "learning_rate": 4.716539080510137e-05, "loss": 0.5421, "step": 3544 }, { "epoch": 0.1557021636277814, "grad_norm": 2.90625, "learning_rate": 4.7162190201793904e-05, "loss": 0.5453, "step": 3546 }, { "epoch": 0.15578998210698486, "grad_norm": 3.09375, "learning_rate": 4.71589879013006e-05, "loss": 0.5369, "step": 3548 }, { "epoch": 0.15587780058618836, "grad_norm": 3.0, "learning_rate": 4.715578390386669e-05, "loss": 0.5164, "step": 3550 }, { "epoch": 0.15596561906539183, "grad_norm": 2.46875, "learning_rate": 4.715257820973754e-05, "loss": 0.5128, "step": 3552 }, { "epoch": 0.15605343754459533, "grad_norm": 2.65625, "learning_rate": 4.7149370819158635e-05, "loss": 0.5106, "step": 3554 }, { "epoch": 0.1561412560237988, "grad_norm": 2.578125, "learning_rate": 4.714616173237561e-05, "loss": 0.5405, "step": 3556 }, { "epoch": 0.1562290745030023, "grad_norm": 2.515625, "learning_rate": 4.7142950949634206e-05, "loss": 0.5533, "step": 3558 }, { "epoch": 0.15631689298220577, "grad_norm": 2.765625, "learning_rate": 4.7139738471180314e-05, "loss": 0.5214, "step": 3560 }, { "epoch": 0.15640471146140927, "grad_norm": 2.875, "learning_rate": 4.713652429725994e-05, "loss": 0.5371, "step": 3562 }, { "epoch": 0.15649252994061275, "grad_norm": 2.578125, "learning_rate": 4.713330842811923e-05, "loss": 0.5322, "step": 3564 }, { "epoch": 0.15658034841981625, "grad_norm": 2.59375, "learning_rate": 4.713009086400445e-05, "loss": 0.5006, "step": 3566 }, { "epoch": 0.15666816689901972, "grad_norm": 2.5625, "learning_rate": 4.712687160516202e-05, "loss": 0.5391, "step": 3568 }, { "epoch": 0.15675598537822322, "grad_norm": 2.515625, "learning_rate": 4.712365065183844e-05, "loss": 0.5302, "step": 3570 }, { "epoch": 0.1568438038574267, "grad_norm": 2.703125, "learning_rate": 4.71204280042804e-05, "loss": 0.5188, "step": 3572 }, { "epoch": 0.1569316223366302, "grad_norm": 2.671875, "learning_rate": 4.711720366273468e-05, "loss": 0.5314, "step": 3574 }, { "epoch": 0.15701944081583366, "grad_norm": 2.46875, "learning_rate": 4.71139776274482e-05, "loss": 0.5141, "step": 3576 }, { "epoch": 0.15710725929503716, "grad_norm": 2.5625, "learning_rate": 4.711074989866802e-05, "loss": 0.5583, "step": 3578 }, { "epoch": 0.15719507777424063, "grad_norm": 2.578125, "learning_rate": 4.71075204766413e-05, "loss": 0.5613, "step": 3580 }, { "epoch": 0.15728289625344413, "grad_norm": 2.53125, "learning_rate": 4.710428936161537e-05, "loss": 0.5293, "step": 3582 }, { "epoch": 0.1573707147326476, "grad_norm": 2.765625, "learning_rate": 4.7101056553837665e-05, "loss": 0.5258, "step": 3584 }, { "epoch": 0.1574585332118511, "grad_norm": 2.78125, "learning_rate": 4.709782205355574e-05, "loss": 0.5555, "step": 3586 }, { "epoch": 0.1575463516910546, "grad_norm": 4.46875, "learning_rate": 4.709458586101731e-05, "loss": 0.5505, "step": 3588 }, { "epoch": 0.15763417017025808, "grad_norm": 4.09375, "learning_rate": 4.70913479764702e-05, "loss": 0.5502, "step": 3590 }, { "epoch": 0.15772198864946158, "grad_norm": 3.28125, "learning_rate": 4.708810840016237e-05, "loss": 0.5308, "step": 3592 }, { "epoch": 0.15780980712866505, "grad_norm": 3.25, "learning_rate": 4.7084867132341895e-05, "loss": 0.5257, "step": 3594 }, { "epoch": 0.15789762560786855, "grad_norm": 2.90625, "learning_rate": 4.708162417325701e-05, "loss": 0.5202, "step": 3596 }, { "epoch": 0.15798544408707202, "grad_norm": 2.578125, "learning_rate": 4.7078379523156045e-05, "loss": 0.5356, "step": 3598 }, { "epoch": 0.15807326256627552, "grad_norm": 2.90625, "learning_rate": 4.707513318228749e-05, "loss": 0.5395, "step": 3600 }, { "epoch": 0.158161081045479, "grad_norm": 2.828125, "learning_rate": 4.707188515089994e-05, "loss": 0.5471, "step": 3602 }, { "epoch": 0.1582488995246825, "grad_norm": 2.640625, "learning_rate": 4.706863542924213e-05, "loss": 0.544, "step": 3604 }, { "epoch": 0.15833671800388596, "grad_norm": 2.796875, "learning_rate": 4.706538401756294e-05, "loss": 0.5097, "step": 3606 }, { "epoch": 0.15842453648308946, "grad_norm": 2.8125, "learning_rate": 4.7062130916111344e-05, "loss": 0.5489, "step": 3608 }, { "epoch": 0.15851235496229293, "grad_norm": 2.65625, "learning_rate": 4.705887612513647e-05, "loss": 0.5344, "step": 3610 }, { "epoch": 0.15860017344149643, "grad_norm": 3.359375, "learning_rate": 4.705561964488758e-05, "loss": 0.5395, "step": 3612 }, { "epoch": 0.1586879919206999, "grad_norm": 2.75, "learning_rate": 4.705236147561405e-05, "loss": 0.5617, "step": 3614 }, { "epoch": 0.1587758103999034, "grad_norm": 2.40625, "learning_rate": 4.70491016175654e-05, "loss": 0.5352, "step": 3616 }, { "epoch": 0.15886362887910688, "grad_norm": 2.421875, "learning_rate": 4.704584007099125e-05, "loss": 0.5261, "step": 3618 }, { "epoch": 0.15895144735831038, "grad_norm": 2.6875, "learning_rate": 4.7042576836141395e-05, "loss": 0.5221, "step": 3620 }, { "epoch": 0.15903926583751385, "grad_norm": 2.4375, "learning_rate": 4.703931191326572e-05, "loss": 0.5078, "step": 3622 }, { "epoch": 0.15912708431671735, "grad_norm": 2.671875, "learning_rate": 4.703604530261424e-05, "loss": 0.5281, "step": 3624 }, { "epoch": 0.15921490279592082, "grad_norm": 2.890625, "learning_rate": 4.7032777004437136e-05, "loss": 0.532, "step": 3626 }, { "epoch": 0.15930272127512432, "grad_norm": 2.84375, "learning_rate": 4.70295070189847e-05, "loss": 0.5323, "step": 3628 }, { "epoch": 0.1593905397543278, "grad_norm": 2.890625, "learning_rate": 4.702623534650732e-05, "loss": 0.5521, "step": 3630 }, { "epoch": 0.1594783582335313, "grad_norm": 2.609375, "learning_rate": 4.7022961987255566e-05, "loss": 0.5377, "step": 3632 }, { "epoch": 0.15956617671273476, "grad_norm": 3.515625, "learning_rate": 4.70196869414801e-05, "loss": 0.5471, "step": 3634 }, { "epoch": 0.15965399519193826, "grad_norm": 3.609375, "learning_rate": 4.701641020943173e-05, "loss": 0.5355, "step": 3636 }, { "epoch": 0.15974181367114176, "grad_norm": 3.546875, "learning_rate": 4.7013131791361385e-05, "loss": 0.5412, "step": 3638 }, { "epoch": 0.15982963215034524, "grad_norm": 2.640625, "learning_rate": 4.7009851687520134e-05, "loss": 0.5286, "step": 3640 }, { "epoch": 0.15991745062954874, "grad_norm": 3.234375, "learning_rate": 4.7006569898159165e-05, "loss": 0.5662, "step": 3642 }, { "epoch": 0.1600052691087522, "grad_norm": 3.0, "learning_rate": 4.7003286423529795e-05, "loss": 0.5374, "step": 3644 }, { "epoch": 0.1600930875879557, "grad_norm": 2.703125, "learning_rate": 4.700000126388348e-05, "loss": 0.535, "step": 3646 }, { "epoch": 0.16018090606715918, "grad_norm": 2.515625, "learning_rate": 4.69967144194718e-05, "loss": 0.5094, "step": 3648 }, { "epoch": 0.16026872454636268, "grad_norm": 2.453125, "learning_rate": 4.6993425890546444e-05, "loss": 0.5443, "step": 3650 }, { "epoch": 0.16035654302556615, "grad_norm": 2.21875, "learning_rate": 4.699013567735927e-05, "loss": 0.5244, "step": 3652 }, { "epoch": 0.16044436150476965, "grad_norm": 2.578125, "learning_rate": 4.698684378016222e-05, "loss": 0.5368, "step": 3654 }, { "epoch": 0.16053217998397312, "grad_norm": 2.78125, "learning_rate": 4.698355019920743e-05, "loss": 0.5424, "step": 3656 }, { "epoch": 0.16061999846317662, "grad_norm": 2.921875, "learning_rate": 4.698025493474707e-05, "loss": 0.5187, "step": 3658 }, { "epoch": 0.1607078169423801, "grad_norm": 3.609375, "learning_rate": 4.697695798703353e-05, "loss": 0.5546, "step": 3660 }, { "epoch": 0.1607956354215836, "grad_norm": 3.09375, "learning_rate": 4.697365935631928e-05, "loss": 0.5323, "step": 3662 }, { "epoch": 0.16088345390078707, "grad_norm": 2.3125, "learning_rate": 4.697035904285693e-05, "loss": 0.5484, "step": 3664 }, { "epoch": 0.16097127237999057, "grad_norm": 2.84375, "learning_rate": 4.696705704689921e-05, "loss": 0.524, "step": 3666 }, { "epoch": 0.16105909085919404, "grad_norm": 3.34375, "learning_rate": 4.6963753368699e-05, "loss": 0.5624, "step": 3668 }, { "epoch": 0.16114690933839754, "grad_norm": 2.53125, "learning_rate": 4.6960448008509296e-05, "loss": 0.5487, "step": 3670 }, { "epoch": 0.161234727817601, "grad_norm": 2.640625, "learning_rate": 4.695714096658321e-05, "loss": 0.5272, "step": 3672 }, { "epoch": 0.1613225462968045, "grad_norm": 2.71875, "learning_rate": 4.695383224317401e-05, "loss": 0.5231, "step": 3674 }, { "epoch": 0.16141036477600798, "grad_norm": 2.859375, "learning_rate": 4.695052183853508e-05, "loss": 0.5264, "step": 3676 }, { "epoch": 0.16149818325521148, "grad_norm": 2.640625, "learning_rate": 4.694720975291991e-05, "loss": 0.5296, "step": 3678 }, { "epoch": 0.16158600173441495, "grad_norm": 3.140625, "learning_rate": 4.694389598658217e-05, "loss": 0.533, "step": 3680 }, { "epoch": 0.16167382021361845, "grad_norm": 2.671875, "learning_rate": 4.6940580539775616e-05, "loss": 0.4755, "step": 3682 }, { "epoch": 0.16176163869282192, "grad_norm": 2.9375, "learning_rate": 4.6937263412754135e-05, "loss": 0.5346, "step": 3684 }, { "epoch": 0.16184945717202542, "grad_norm": 3.09375, "learning_rate": 4.693394460577177e-05, "loss": 0.5129, "step": 3686 }, { "epoch": 0.16193727565122892, "grad_norm": 2.6875, "learning_rate": 4.693062411908267e-05, "loss": 0.5131, "step": 3688 }, { "epoch": 0.1620250941304324, "grad_norm": 2.5, "learning_rate": 4.6927301952941105e-05, "loss": 0.5093, "step": 3690 }, { "epoch": 0.1621129126096359, "grad_norm": 2.828125, "learning_rate": 4.6923978107601516e-05, "loss": 0.5425, "step": 3692 }, { "epoch": 0.16220073108883937, "grad_norm": 2.5, "learning_rate": 4.692065258331842e-05, "loss": 0.5489, "step": 3694 }, { "epoch": 0.16228854956804287, "grad_norm": 2.8125, "learning_rate": 4.69173253803465e-05, "loss": 0.5584, "step": 3696 }, { "epoch": 0.16237636804724634, "grad_norm": 3.34375, "learning_rate": 4.691399649894054e-05, "loss": 0.5217, "step": 3698 }, { "epoch": 0.16246418652644984, "grad_norm": 2.828125, "learning_rate": 4.691066593935548e-05, "loss": 0.5334, "step": 3700 }, { "epoch": 0.1625520050056533, "grad_norm": 3.140625, "learning_rate": 4.6907333701846365e-05, "loss": 0.5613, "step": 3702 }, { "epoch": 0.1626398234848568, "grad_norm": 4.25, "learning_rate": 4.690399978666839e-05, "loss": 0.5262, "step": 3704 }, { "epoch": 0.16272764196406028, "grad_norm": 2.828125, "learning_rate": 4.690066419407686e-05, "loss": 0.5465, "step": 3706 }, { "epoch": 0.16281546044326378, "grad_norm": 3.25, "learning_rate": 4.689732692432722e-05, "loss": 0.511, "step": 3708 }, { "epoch": 0.16290327892246725, "grad_norm": 3.109375, "learning_rate": 4.6893987977675026e-05, "loss": 0.5364, "step": 3710 }, { "epoch": 0.16299109740167075, "grad_norm": 2.859375, "learning_rate": 4.689064735437599e-05, "loss": 0.5153, "step": 3712 }, { "epoch": 0.16307891588087423, "grad_norm": 2.515625, "learning_rate": 4.688730505468593e-05, "loss": 0.5287, "step": 3714 }, { "epoch": 0.16316673436007773, "grad_norm": 2.625, "learning_rate": 4.688396107886081e-05, "loss": 0.5298, "step": 3716 }, { "epoch": 0.1632545528392812, "grad_norm": 2.75, "learning_rate": 4.688061542715669e-05, "loss": 0.5106, "step": 3718 }, { "epoch": 0.1633423713184847, "grad_norm": 2.375, "learning_rate": 4.6877268099829804e-05, "loss": 0.555, "step": 3720 }, { "epoch": 0.16343018979768817, "grad_norm": 2.78125, "learning_rate": 4.687391909713648e-05, "loss": 0.5382, "step": 3722 }, { "epoch": 0.16351800827689167, "grad_norm": 2.578125, "learning_rate": 4.6870568419333185e-05, "loss": 0.5163, "step": 3724 }, { "epoch": 0.16360582675609514, "grad_norm": 2.4375, "learning_rate": 4.6867216066676524e-05, "loss": 0.5185, "step": 3726 }, { "epoch": 0.16369364523529864, "grad_norm": 2.515625, "learning_rate": 4.686386203942321e-05, "loss": 0.5344, "step": 3728 }, { "epoch": 0.1637814637145021, "grad_norm": 2.953125, "learning_rate": 4.6860506337830105e-05, "loss": 0.5251, "step": 3730 }, { "epoch": 0.1638692821937056, "grad_norm": 2.640625, "learning_rate": 4.6857148962154185e-05, "loss": 0.4998, "step": 3732 }, { "epoch": 0.16395710067290908, "grad_norm": 2.859375, "learning_rate": 4.6853789912652554e-05, "loss": 0.5155, "step": 3734 }, { "epoch": 0.16404491915211258, "grad_norm": 2.859375, "learning_rate": 4.6850429189582454e-05, "loss": 0.5498, "step": 3736 }, { "epoch": 0.16413273763131608, "grad_norm": 2.859375, "learning_rate": 4.684706679320125e-05, "loss": 0.4993, "step": 3738 }, { "epoch": 0.16422055611051956, "grad_norm": 2.46875, "learning_rate": 4.684370272376643e-05, "loss": 0.543, "step": 3740 }, { "epoch": 0.16430837458972306, "grad_norm": 2.40625, "learning_rate": 4.684033698153562e-05, "loss": 0.5161, "step": 3742 }, { "epoch": 0.16439619306892653, "grad_norm": 3.140625, "learning_rate": 4.683696956676657e-05, "loss": 0.5639, "step": 3744 }, { "epoch": 0.16448401154813003, "grad_norm": 2.53125, "learning_rate": 4.6833600479717155e-05, "loss": 0.5518, "step": 3746 }, { "epoch": 0.1645718300273335, "grad_norm": 2.625, "learning_rate": 4.683022972064538e-05, "loss": 0.5062, "step": 3748 }, { "epoch": 0.164659648506537, "grad_norm": 2.203125, "learning_rate": 4.682685728980939e-05, "loss": 0.537, "step": 3750 }, { "epoch": 0.16474746698574047, "grad_norm": 2.78125, "learning_rate": 4.682348318746742e-05, "loss": 0.5347, "step": 3752 }, { "epoch": 0.16483528546494397, "grad_norm": 2.75, "learning_rate": 4.682010741387789e-05, "loss": 0.514, "step": 3754 }, { "epoch": 0.16492310394414744, "grad_norm": 3.3125, "learning_rate": 4.6816729969299295e-05, "loss": 0.5405, "step": 3756 }, { "epoch": 0.16501092242335094, "grad_norm": 2.578125, "learning_rate": 4.681335085399029e-05, "loss": 0.5304, "step": 3758 }, { "epoch": 0.16509874090255441, "grad_norm": 2.59375, "learning_rate": 4.680997006820965e-05, "loss": 0.5137, "step": 3760 }, { "epoch": 0.1651865593817579, "grad_norm": 2.796875, "learning_rate": 4.680658761221628e-05, "loss": 0.53, "step": 3762 }, { "epoch": 0.16527437786096139, "grad_norm": 2.34375, "learning_rate": 4.6803203486269195e-05, "loss": 0.552, "step": 3764 }, { "epoch": 0.16536219634016489, "grad_norm": 2.421875, "learning_rate": 4.679981769062756e-05, "loss": 0.527, "step": 3766 }, { "epoch": 0.16545001481936836, "grad_norm": 2.46875, "learning_rate": 4.6796430225550664e-05, "loss": 0.5067, "step": 3768 }, { "epoch": 0.16553783329857186, "grad_norm": 2.625, "learning_rate": 4.679304109129792e-05, "loss": 0.5127, "step": 3770 }, { "epoch": 0.16562565177777533, "grad_norm": 2.578125, "learning_rate": 4.6789650288128855e-05, "loss": 0.5087, "step": 3772 }, { "epoch": 0.16571347025697883, "grad_norm": 2.8125, "learning_rate": 4.678625781630315e-05, "loss": 0.5192, "step": 3774 }, { "epoch": 0.1658012887361823, "grad_norm": 3.46875, "learning_rate": 4.6782863676080605e-05, "loss": 0.5258, "step": 3776 }, { "epoch": 0.1658891072153858, "grad_norm": 3.296875, "learning_rate": 4.6779467867721135e-05, "loss": 0.5352, "step": 3778 }, { "epoch": 0.16597692569458927, "grad_norm": 3.28125, "learning_rate": 4.67760703914848e-05, "loss": 0.5331, "step": 3780 }, { "epoch": 0.16606474417379277, "grad_norm": 2.75, "learning_rate": 4.6772671247631764e-05, "loss": 0.5256, "step": 3782 }, { "epoch": 0.16615256265299624, "grad_norm": 2.671875, "learning_rate": 4.676927043642235e-05, "loss": 0.5047, "step": 3784 }, { "epoch": 0.16624038113219974, "grad_norm": 2.5625, "learning_rate": 4.676586795811699e-05, "loss": 0.5068, "step": 3786 }, { "epoch": 0.16632819961140322, "grad_norm": 2.578125, "learning_rate": 4.676246381297624e-05, "loss": 0.5406, "step": 3788 }, { "epoch": 0.16641601809060672, "grad_norm": 2.671875, "learning_rate": 4.6759058001260794e-05, "loss": 0.4963, "step": 3790 }, { "epoch": 0.16650383656981022, "grad_norm": 2.75, "learning_rate": 4.6755650523231476e-05, "loss": 0.5411, "step": 3792 }, { "epoch": 0.1665916550490137, "grad_norm": 2.6875, "learning_rate": 4.675224137914922e-05, "loss": 0.5449, "step": 3794 }, { "epoch": 0.1666794735282172, "grad_norm": 3.0625, "learning_rate": 4.674883056927511e-05, "loss": 0.5374, "step": 3796 }, { "epoch": 0.16676729200742066, "grad_norm": 3.40625, "learning_rate": 4.674541809387033e-05, "loss": 0.531, "step": 3798 }, { "epoch": 0.16685511048662416, "grad_norm": 2.828125, "learning_rate": 4.674200395319623e-05, "loss": 0.5496, "step": 3800 }, { "epoch": 0.16694292896582763, "grad_norm": 2.578125, "learning_rate": 4.673858814751425e-05, "loss": 0.5026, "step": 3802 }, { "epoch": 0.16703074744503113, "grad_norm": 2.65625, "learning_rate": 4.673517067708598e-05, "loss": 0.5376, "step": 3804 }, { "epoch": 0.1671185659242346, "grad_norm": 2.5625, "learning_rate": 4.6731751542173136e-05, "loss": 0.4992, "step": 3806 }, { "epoch": 0.1672063844034381, "grad_norm": 2.515625, "learning_rate": 4.672833074303754e-05, "loss": 0.5246, "step": 3808 }, { "epoch": 0.16729420288264157, "grad_norm": 2.9375, "learning_rate": 4.672490827994117e-05, "loss": 0.5128, "step": 3810 }, { "epoch": 0.16738202136184507, "grad_norm": 3.078125, "learning_rate": 4.6721484153146124e-05, "loss": 0.5459, "step": 3812 }, { "epoch": 0.16746983984104855, "grad_norm": 3.265625, "learning_rate": 4.671805836291461e-05, "loss": 0.5388, "step": 3814 }, { "epoch": 0.16755765832025205, "grad_norm": 2.75, "learning_rate": 4.671463090950897e-05, "loss": 0.5167, "step": 3816 }, { "epoch": 0.16764547679945552, "grad_norm": 3.125, "learning_rate": 4.67112017931917e-05, "loss": 0.5059, "step": 3818 }, { "epoch": 0.16773329527865902, "grad_norm": 2.8125, "learning_rate": 4.670777101422539e-05, "loss": 0.5303, "step": 3820 }, { "epoch": 0.1678211137578625, "grad_norm": 2.703125, "learning_rate": 4.6704338572872773e-05, "loss": 0.5233, "step": 3822 }, { "epoch": 0.167908932237066, "grad_norm": 3.21875, "learning_rate": 4.67009044693967e-05, "loss": 0.5091, "step": 3824 }, { "epoch": 0.16799675071626946, "grad_norm": 2.703125, "learning_rate": 4.6697468704060166e-05, "loss": 0.524, "step": 3826 }, { "epoch": 0.16808456919547296, "grad_norm": 2.90625, "learning_rate": 4.6694031277126285e-05, "loss": 0.5386, "step": 3828 }, { "epoch": 0.16817238767467643, "grad_norm": 3.1875, "learning_rate": 4.6690592188858275e-05, "loss": 0.5196, "step": 3830 }, { "epoch": 0.16826020615387993, "grad_norm": 3.4375, "learning_rate": 4.6687151439519516e-05, "loss": 0.5395, "step": 3832 }, { "epoch": 0.1683480246330834, "grad_norm": 3.34375, "learning_rate": 4.668370902937351e-05, "loss": 0.5279, "step": 3834 }, { "epoch": 0.1684358431122869, "grad_norm": 3.46875, "learning_rate": 4.668026495868386e-05, "loss": 0.5156, "step": 3836 }, { "epoch": 0.16852366159149038, "grad_norm": 2.625, "learning_rate": 4.667681922771433e-05, "loss": 0.4916, "step": 3838 }, { "epoch": 0.16861148007069388, "grad_norm": 2.546875, "learning_rate": 4.6673371836728785e-05, "loss": 0.5422, "step": 3840 }, { "epoch": 0.16869929854989738, "grad_norm": 2.71875, "learning_rate": 4.6669922785991225e-05, "loss": 0.5224, "step": 3842 }, { "epoch": 0.16878711702910085, "grad_norm": 2.65625, "learning_rate": 4.666647207576579e-05, "loss": 0.5403, "step": 3844 }, { "epoch": 0.16887493550830435, "grad_norm": 2.515625, "learning_rate": 4.666301970631672e-05, "loss": 0.5061, "step": 3846 }, { "epoch": 0.16896275398750782, "grad_norm": 2.859375, "learning_rate": 4.6659565677908414e-05, "loss": 0.504, "step": 3848 }, { "epoch": 0.16905057246671132, "grad_norm": 3.953125, "learning_rate": 4.665610999080537e-05, "loss": 0.5388, "step": 3850 }, { "epoch": 0.1691383909459148, "grad_norm": 3.0625, "learning_rate": 4.6652652645272244e-05, "loss": 0.5559, "step": 3852 }, { "epoch": 0.1692262094251183, "grad_norm": 3.046875, "learning_rate": 4.6649193641573784e-05, "loss": 0.5189, "step": 3854 }, { "epoch": 0.16931402790432176, "grad_norm": 3.453125, "learning_rate": 4.6645732979974884e-05, "loss": 0.5244, "step": 3856 }, { "epoch": 0.16940184638352526, "grad_norm": 3.0, "learning_rate": 4.664227066074056e-05, "loss": 0.5008, "step": 3858 }, { "epoch": 0.16948966486272873, "grad_norm": 2.953125, "learning_rate": 4.663880668413596e-05, "loss": 0.5339, "step": 3860 }, { "epoch": 0.16957748334193223, "grad_norm": 2.828125, "learning_rate": 4.663534105042636e-05, "loss": 0.5334, "step": 3862 }, { "epoch": 0.1696653018211357, "grad_norm": 3.296875, "learning_rate": 4.6631873759877156e-05, "loss": 0.5465, "step": 3864 }, { "epoch": 0.1697531203003392, "grad_norm": 3.9375, "learning_rate": 4.6628404812753876e-05, "loss": 0.523, "step": 3866 }, { "epoch": 0.16984093877954268, "grad_norm": 3.453125, "learning_rate": 4.662493420932217e-05, "loss": 0.5374, "step": 3868 }, { "epoch": 0.16992875725874618, "grad_norm": 3.234375, "learning_rate": 4.662146194984782e-05, "loss": 0.5576, "step": 3870 }, { "epoch": 0.17001657573794965, "grad_norm": 2.890625, "learning_rate": 4.661798803459673e-05, "loss": 0.521, "step": 3872 }, { "epoch": 0.17010439421715315, "grad_norm": 3.328125, "learning_rate": 4.661451246383492e-05, "loss": 0.5187, "step": 3874 }, { "epoch": 0.17019221269635662, "grad_norm": 3.46875, "learning_rate": 4.661103523782858e-05, "loss": 0.4808, "step": 3876 }, { "epoch": 0.17028003117556012, "grad_norm": 3.703125, "learning_rate": 4.660755635684398e-05, "loss": 0.5104, "step": 3878 }, { "epoch": 0.1703678496547636, "grad_norm": 3.734375, "learning_rate": 4.660407582114753e-05, "loss": 0.5527, "step": 3880 }, { "epoch": 0.1704556681339671, "grad_norm": 3.328125, "learning_rate": 4.6600593631005776e-05, "loss": 0.5165, "step": 3882 }, { "epoch": 0.17054348661317056, "grad_norm": 2.59375, "learning_rate": 4.6597109786685386e-05, "loss": 0.5142, "step": 3884 }, { "epoch": 0.17063130509237406, "grad_norm": 2.65625, "learning_rate": 4.6593624288453146e-05, "loss": 0.5223, "step": 3886 }, { "epoch": 0.17071912357157754, "grad_norm": 2.4375, "learning_rate": 4.6590137136575986e-05, "loss": 0.54, "step": 3888 }, { "epoch": 0.17080694205078104, "grad_norm": 2.6875, "learning_rate": 4.6586648331320946e-05, "loss": 0.4989, "step": 3890 }, { "epoch": 0.17089476052998454, "grad_norm": 2.53125, "learning_rate": 4.6583157872955205e-05, "loss": 0.5059, "step": 3892 }, { "epoch": 0.170982579009188, "grad_norm": 2.84375, "learning_rate": 4.657966576174606e-05, "loss": 0.5045, "step": 3894 }, { "epoch": 0.1710703974883915, "grad_norm": 2.75, "learning_rate": 4.657617199796094e-05, "loss": 0.5024, "step": 3896 }, { "epoch": 0.17115821596759498, "grad_norm": 2.8125, "learning_rate": 4.657267658186739e-05, "loss": 0.5587, "step": 3898 }, { "epoch": 0.17124603444679848, "grad_norm": 2.59375, "learning_rate": 4.65691795137331e-05, "loss": 0.4818, "step": 3900 }, { "epoch": 0.17133385292600195, "grad_norm": 2.78125, "learning_rate": 4.6565680793825874e-05, "loss": 0.5296, "step": 3902 }, { "epoch": 0.17142167140520545, "grad_norm": 2.640625, "learning_rate": 4.6562180422413635e-05, "loss": 0.5506, "step": 3904 }, { "epoch": 0.17150948988440892, "grad_norm": 2.78125, "learning_rate": 4.655867839976447e-05, "loss": 0.5211, "step": 3906 }, { "epoch": 0.17159730836361242, "grad_norm": 2.484375, "learning_rate": 4.655517472614652e-05, "loss": 0.5108, "step": 3908 }, { "epoch": 0.1716851268428159, "grad_norm": 2.390625, "learning_rate": 4.655166940182813e-05, "loss": 0.5492, "step": 3910 }, { "epoch": 0.1717729453220194, "grad_norm": 2.75, "learning_rate": 4.654816242707774e-05, "loss": 0.5077, "step": 3912 }, { "epoch": 0.17186076380122287, "grad_norm": 2.53125, "learning_rate": 4.6544653802163904e-05, "loss": 0.5369, "step": 3914 }, { "epoch": 0.17194858228042637, "grad_norm": 2.328125, "learning_rate": 4.654114352735531e-05, "loss": 0.512, "step": 3916 }, { "epoch": 0.17203640075962984, "grad_norm": 2.28125, "learning_rate": 4.653763160292078e-05, "loss": 0.5409, "step": 3918 }, { "epoch": 0.17212421923883334, "grad_norm": 2.578125, "learning_rate": 4.653411802912926e-05, "loss": 0.5141, "step": 3920 }, { "epoch": 0.1722120377180368, "grad_norm": 2.65625, "learning_rate": 4.6530602806249825e-05, "loss": 0.5354, "step": 3922 }, { "epoch": 0.1722998561972403, "grad_norm": 2.390625, "learning_rate": 4.652708593455166e-05, "loss": 0.5425, "step": 3924 }, { "epoch": 0.17238767467644378, "grad_norm": 2.71875, "learning_rate": 4.6523567414304095e-05, "loss": 0.5343, "step": 3926 }, { "epoch": 0.17247549315564728, "grad_norm": 2.953125, "learning_rate": 4.6520047245776575e-05, "loss": 0.5373, "step": 3928 }, { "epoch": 0.17256331163485075, "grad_norm": 2.640625, "learning_rate": 4.651652542923869e-05, "loss": 0.5034, "step": 3930 }, { "epoch": 0.17265113011405425, "grad_norm": 2.71875, "learning_rate": 4.6513001964960116e-05, "loss": 0.514, "step": 3932 }, { "epoch": 0.17273894859325772, "grad_norm": 3.515625, "learning_rate": 4.65094768532107e-05, "loss": 0.5189, "step": 3934 }, { "epoch": 0.17282676707246122, "grad_norm": 3.25, "learning_rate": 4.650595009426039e-05, "loss": 0.5327, "step": 3936 }, { "epoch": 0.1729145855516647, "grad_norm": 2.78125, "learning_rate": 4.650242168837927e-05, "loss": 0.5272, "step": 3938 }, { "epoch": 0.1730024040308682, "grad_norm": 2.90625, "learning_rate": 4.649889163583754e-05, "loss": 0.5732, "step": 3940 }, { "epoch": 0.1730902225100717, "grad_norm": 2.671875, "learning_rate": 4.649535993690554e-05, "loss": 0.5236, "step": 3942 }, { "epoch": 0.17317804098927517, "grad_norm": 2.5625, "learning_rate": 4.649182659185371e-05, "loss": 0.4678, "step": 3944 }, { "epoch": 0.17326585946847867, "grad_norm": 2.40625, "learning_rate": 4.648829160095265e-05, "loss": 0.5066, "step": 3946 }, { "epoch": 0.17335367794768214, "grad_norm": 2.421875, "learning_rate": 4.6484754964473076e-05, "loss": 0.4917, "step": 3948 }, { "epoch": 0.17344149642688564, "grad_norm": 2.578125, "learning_rate": 4.6481216682685816e-05, "loss": 0.5121, "step": 3950 }, { "epoch": 0.1735293149060891, "grad_norm": 2.359375, "learning_rate": 4.647767675586183e-05, "loss": 0.5145, "step": 3952 }, { "epoch": 0.1736171333852926, "grad_norm": 2.875, "learning_rate": 4.647413518427221e-05, "loss": 0.5074, "step": 3954 }, { "epoch": 0.17370495186449608, "grad_norm": 2.546875, "learning_rate": 4.647059196818817e-05, "loss": 0.5168, "step": 3956 }, { "epoch": 0.17379277034369958, "grad_norm": 2.421875, "learning_rate": 4.646704710788105e-05, "loss": 0.5364, "step": 3958 }, { "epoch": 0.17388058882290305, "grad_norm": 2.71875, "learning_rate": 4.6463500603622316e-05, "loss": 0.5028, "step": 3960 }, { "epoch": 0.17396840730210655, "grad_norm": 3.140625, "learning_rate": 4.645995245568357e-05, "loss": 0.5176, "step": 3962 }, { "epoch": 0.17405622578131003, "grad_norm": 3.203125, "learning_rate": 4.645640266433651e-05, "loss": 0.5466, "step": 3964 }, { "epoch": 0.17414404426051353, "grad_norm": 2.9375, "learning_rate": 4.645285122985299e-05, "loss": 0.5072, "step": 3966 }, { "epoch": 0.174231862739717, "grad_norm": 3.40625, "learning_rate": 4.6449298152504986e-05, "loss": 0.5595, "step": 3968 }, { "epoch": 0.1743196812189205, "grad_norm": 3.546875, "learning_rate": 4.644574343256458e-05, "loss": 0.5338, "step": 3970 }, { "epoch": 0.17440749969812397, "grad_norm": 3.46875, "learning_rate": 4.6442187070304004e-05, "loss": 0.5155, "step": 3972 }, { "epoch": 0.17449531817732747, "grad_norm": 3.421875, "learning_rate": 4.643862906599561e-05, "loss": 0.513, "step": 3974 }, { "epoch": 0.17458313665653094, "grad_norm": 3.5625, "learning_rate": 4.6435069419911854e-05, "loss": 0.5436, "step": 3976 }, { "epoch": 0.17467095513573444, "grad_norm": 3.09375, "learning_rate": 4.643150813232534e-05, "loss": 0.5065, "step": 3978 }, { "epoch": 0.1747587736149379, "grad_norm": 3.90625, "learning_rate": 4.6427945203508806e-05, "loss": 0.5314, "step": 3980 }, { "epoch": 0.1748465920941414, "grad_norm": 4.03125, "learning_rate": 4.642438063373509e-05, "loss": 0.5111, "step": 3982 }, { "epoch": 0.17493441057334488, "grad_norm": 3.125, "learning_rate": 4.642081442327716e-05, "loss": 0.5112, "step": 3984 }, { "epoch": 0.17502222905254838, "grad_norm": 3.015625, "learning_rate": 4.6417246572408134e-05, "loss": 0.5393, "step": 3986 }, { "epoch": 0.17511004753175186, "grad_norm": 2.546875, "learning_rate": 4.641367708140124e-05, "loss": 0.5214, "step": 3988 }, { "epoch": 0.17519786601095536, "grad_norm": 2.703125, "learning_rate": 4.641010595052981e-05, "loss": 0.5073, "step": 3990 }, { "epoch": 0.17528568449015886, "grad_norm": 2.578125, "learning_rate": 4.640653318006733e-05, "loss": 0.5078, "step": 3992 }, { "epoch": 0.17537350296936233, "grad_norm": 2.625, "learning_rate": 4.640295877028742e-05, "loss": 0.5006, "step": 3994 }, { "epoch": 0.17546132144856583, "grad_norm": 2.390625, "learning_rate": 4.639938272146379e-05, "loss": 0.5217, "step": 3996 }, { "epoch": 0.1755491399277693, "grad_norm": 2.8125, "learning_rate": 4.639580503387031e-05, "loss": 0.5335, "step": 3998 }, { "epoch": 0.1756369584069728, "grad_norm": 2.875, "learning_rate": 4.6392225707780935e-05, "loss": 0.5138, "step": 4000 }, { "epoch": 0.17572477688617627, "grad_norm": 2.890625, "learning_rate": 4.63886447434698e-05, "loss": 0.5127, "step": 4002 }, { "epoch": 0.17581259536537977, "grad_norm": 2.65625, "learning_rate": 4.638506214121112e-05, "loss": 0.5268, "step": 4004 }, { "epoch": 0.17590041384458324, "grad_norm": 2.9375, "learning_rate": 4.6381477901279255e-05, "loss": 0.5176, "step": 4006 }, { "epoch": 0.17598823232378674, "grad_norm": 2.53125, "learning_rate": 4.6377892023948684e-05, "loss": 0.5373, "step": 4008 }, { "epoch": 0.1760760508029902, "grad_norm": 2.390625, "learning_rate": 4.6374304509494016e-05, "loss": 0.5187, "step": 4010 }, { "epoch": 0.1761638692821937, "grad_norm": 3.1875, "learning_rate": 4.637071535818999e-05, "loss": 0.5387, "step": 4012 }, { "epoch": 0.17625168776139719, "grad_norm": 2.53125, "learning_rate": 4.636712457031146e-05, "loss": 0.5008, "step": 4014 }, { "epoch": 0.17633950624060069, "grad_norm": 2.5625, "learning_rate": 4.63635321461334e-05, "loss": 0.5233, "step": 4016 }, { "epoch": 0.17642732471980416, "grad_norm": 2.5625, "learning_rate": 4.635993808593093e-05, "loss": 0.5024, "step": 4018 }, { "epoch": 0.17651514319900766, "grad_norm": 2.765625, "learning_rate": 4.635634238997929e-05, "loss": 0.5411, "step": 4020 }, { "epoch": 0.17660296167821113, "grad_norm": 2.28125, "learning_rate": 4.6352745058553826e-05, "loss": 0.5314, "step": 4022 }, { "epoch": 0.17669078015741463, "grad_norm": 2.484375, "learning_rate": 4.634914609193002e-05, "loss": 0.5405, "step": 4024 }, { "epoch": 0.1767785986366181, "grad_norm": 2.703125, "learning_rate": 4.63455454903835e-05, "loss": 0.5601, "step": 4026 }, { "epoch": 0.1768664171158216, "grad_norm": 2.8125, "learning_rate": 4.634194325418998e-05, "loss": 0.5059, "step": 4028 }, { "epoch": 0.17695423559502507, "grad_norm": 2.5, "learning_rate": 4.633833938362533e-05, "loss": 0.5236, "step": 4030 }, { "epoch": 0.17704205407422857, "grad_norm": 2.828125, "learning_rate": 4.633473387896554e-05, "loss": 0.5242, "step": 4032 }, { "epoch": 0.17712987255343204, "grad_norm": 2.625, "learning_rate": 4.6331126740486704e-05, "loss": 0.5302, "step": 4034 }, { "epoch": 0.17721769103263554, "grad_norm": 2.21875, "learning_rate": 4.632751796846508e-05, "loss": 0.4761, "step": 4036 }, { "epoch": 0.17730550951183902, "grad_norm": 2.5, "learning_rate": 4.632390756317702e-05, "loss": 0.5416, "step": 4038 }, { "epoch": 0.17739332799104252, "grad_norm": 2.34375, "learning_rate": 4.6320295524898996e-05, "loss": 0.5317, "step": 4040 }, { "epoch": 0.17748114647024602, "grad_norm": 2.765625, "learning_rate": 4.631668185390764e-05, "loss": 0.5271, "step": 4042 }, { "epoch": 0.1775689649494495, "grad_norm": 2.3125, "learning_rate": 4.631306655047968e-05, "loss": 0.5316, "step": 4044 }, { "epoch": 0.177656783428653, "grad_norm": 2.6875, "learning_rate": 4.630944961489196e-05, "loss": 0.548, "step": 4046 }, { "epoch": 0.17774460190785646, "grad_norm": 2.484375, "learning_rate": 4.630583104742149e-05, "loss": 0.5027, "step": 4048 }, { "epoch": 0.17783242038705996, "grad_norm": 2.421875, "learning_rate": 4.6302210848345367e-05, "loss": 0.5404, "step": 4050 }, { "epoch": 0.17792023886626343, "grad_norm": 2.9375, "learning_rate": 4.6298589017940836e-05, "loss": 0.5267, "step": 4052 }, { "epoch": 0.17800805734546693, "grad_norm": 2.28125, "learning_rate": 4.629496555648525e-05, "loss": 0.5045, "step": 4054 }, { "epoch": 0.1780958758246704, "grad_norm": 2.25, "learning_rate": 4.629134046425609e-05, "loss": 0.4785, "step": 4056 }, { "epoch": 0.1781836943038739, "grad_norm": 2.34375, "learning_rate": 4.628771374153098e-05, "loss": 0.5289, "step": 4058 }, { "epoch": 0.17827151278307737, "grad_norm": 2.4375, "learning_rate": 4.628408538858765e-05, "loss": 0.5295, "step": 4060 }, { "epoch": 0.17835933126228087, "grad_norm": 2.421875, "learning_rate": 4.628045540570396e-05, "loss": 0.4988, "step": 4062 }, { "epoch": 0.17844714974148435, "grad_norm": 2.6875, "learning_rate": 4.6276823793157884e-05, "loss": 0.5448, "step": 4064 }, { "epoch": 0.17853496822068785, "grad_norm": 2.5625, "learning_rate": 4.627319055122755e-05, "loss": 0.5266, "step": 4066 }, { "epoch": 0.17862278669989132, "grad_norm": 2.640625, "learning_rate": 4.626955568019119e-05, "loss": 0.5096, "step": 4068 }, { "epoch": 0.17871060517909482, "grad_norm": 3.21875, "learning_rate": 4.626591918032715e-05, "loss": 0.5105, "step": 4070 }, { "epoch": 0.1787984236582983, "grad_norm": 3.265625, "learning_rate": 4.626228105191392e-05, "loss": 0.4999, "step": 4072 }, { "epoch": 0.1788862421375018, "grad_norm": 3.0, "learning_rate": 4.625864129523011e-05, "loss": 0.5009, "step": 4074 }, { "epoch": 0.17897406061670526, "grad_norm": 3.265625, "learning_rate": 4.6254999910554456e-05, "loss": 0.5187, "step": 4076 }, { "epoch": 0.17906187909590876, "grad_norm": 2.90625, "learning_rate": 4.625135689816581e-05, "loss": 0.5063, "step": 4078 }, { "epoch": 0.17914969757511223, "grad_norm": 3.015625, "learning_rate": 4.624771225834316e-05, "loss": 0.5143, "step": 4080 }, { "epoch": 0.17923751605431573, "grad_norm": 2.734375, "learning_rate": 4.624406599136562e-05, "loss": 0.519, "step": 4082 }, { "epoch": 0.1793253345335192, "grad_norm": 2.640625, "learning_rate": 4.62404180975124e-05, "loss": 0.5254, "step": 4084 }, { "epoch": 0.1794131530127227, "grad_norm": 2.78125, "learning_rate": 4.6236768577062876e-05, "loss": 0.5358, "step": 4086 }, { "epoch": 0.17950097149192618, "grad_norm": 2.578125, "learning_rate": 4.623311743029652e-05, "loss": 0.4824, "step": 4088 }, { "epoch": 0.17958878997112968, "grad_norm": 2.9375, "learning_rate": 4.622946465749295e-05, "loss": 0.5151, "step": 4090 }, { "epoch": 0.17967660845033318, "grad_norm": 3.1875, "learning_rate": 4.622581025893189e-05, "loss": 0.5062, "step": 4092 }, { "epoch": 0.17976442692953665, "grad_norm": 3.234375, "learning_rate": 4.622215423489318e-05, "loss": 0.5197, "step": 4094 }, { "epoch": 0.17985224540874015, "grad_norm": 3.09375, "learning_rate": 4.6218496585656815e-05, "loss": 0.5057, "step": 4096 }, { "epoch": 0.17994006388794362, "grad_norm": 2.78125, "learning_rate": 4.62148373115029e-05, "loss": 0.516, "step": 4098 }, { "epoch": 0.18002788236714712, "grad_norm": 2.921875, "learning_rate": 4.621117641271166e-05, "loss": 0.5342, "step": 4100 }, { "epoch": 0.1801157008463506, "grad_norm": 2.796875, "learning_rate": 4.620751388956344e-05, "loss": 0.52, "step": 4102 }, { "epoch": 0.1802035193255541, "grad_norm": 2.703125, "learning_rate": 4.620384974233872e-05, "loss": 0.5188, "step": 4104 }, { "epoch": 0.18029133780475756, "grad_norm": 2.765625, "learning_rate": 4.6200183971318104e-05, "loss": 0.5444, "step": 4106 }, { "epoch": 0.18037915628396106, "grad_norm": 3.171875, "learning_rate": 4.619651657678232e-05, "loss": 0.526, "step": 4108 }, { "epoch": 0.18046697476316453, "grad_norm": 2.34375, "learning_rate": 4.619284755901222e-05, "loss": 0.476, "step": 4110 }, { "epoch": 0.18055479324236803, "grad_norm": 3.078125, "learning_rate": 4.618917691828877e-05, "loss": 0.5217, "step": 4112 }, { "epoch": 0.1806426117215715, "grad_norm": 2.84375, "learning_rate": 4.618550465489307e-05, "loss": 0.5009, "step": 4114 }, { "epoch": 0.180730430200775, "grad_norm": 2.59375, "learning_rate": 4.618183076910635e-05, "loss": 0.5349, "step": 4116 }, { "epoch": 0.18081824867997848, "grad_norm": 3.109375, "learning_rate": 4.6178155261209946e-05, "loss": 0.5149, "step": 4118 }, { "epoch": 0.18090606715918198, "grad_norm": 2.90625, "learning_rate": 4.6174478131485334e-05, "loss": 0.513, "step": 4120 }, { "epoch": 0.18099388563838545, "grad_norm": 3.359375, "learning_rate": 4.617079938021412e-05, "loss": 0.5095, "step": 4122 }, { "epoch": 0.18108170411758895, "grad_norm": 3.5625, "learning_rate": 4.616711900767801e-05, "loss": 0.4875, "step": 4124 }, { "epoch": 0.18116952259679242, "grad_norm": 2.953125, "learning_rate": 4.616343701415885e-05, "loss": 0.5149, "step": 4126 }, { "epoch": 0.18125734107599592, "grad_norm": 3.125, "learning_rate": 4.615975339993861e-05, "loss": 0.5326, "step": 4128 }, { "epoch": 0.1813451595551994, "grad_norm": 2.859375, "learning_rate": 4.615606816529939e-05, "loss": 0.4981, "step": 4130 }, { "epoch": 0.1814329780344029, "grad_norm": 2.6875, "learning_rate": 4.6152381310523387e-05, "loss": 0.5039, "step": 4132 }, { "epoch": 0.18152079651360636, "grad_norm": 2.359375, "learning_rate": 4.614869283589296e-05, "loss": 0.5351, "step": 4134 }, { "epoch": 0.18160861499280986, "grad_norm": 2.5625, "learning_rate": 4.614500274169057e-05, "loss": 0.5039, "step": 4136 }, { "epoch": 0.18169643347201334, "grad_norm": 2.6875, "learning_rate": 4.6141311028198786e-05, "loss": 0.5379, "step": 4138 }, { "epoch": 0.18178425195121684, "grad_norm": 2.4375, "learning_rate": 4.613761769570035e-05, "loss": 0.5038, "step": 4140 }, { "epoch": 0.18187207043042034, "grad_norm": 3.3125, "learning_rate": 4.6133922744478076e-05, "loss": 0.509, "step": 4142 }, { "epoch": 0.1819598889096238, "grad_norm": 3.109375, "learning_rate": 4.613022617481494e-05, "loss": 0.4996, "step": 4144 }, { "epoch": 0.1820477073888273, "grad_norm": 2.546875, "learning_rate": 4.612652798699401e-05, "loss": 0.5164, "step": 4146 }, { "epoch": 0.18213552586803078, "grad_norm": 2.84375, "learning_rate": 4.612282818129851e-05, "loss": 0.4928, "step": 4148 }, { "epoch": 0.18222334434723428, "grad_norm": 2.5, "learning_rate": 4.611912675801176e-05, "loss": 0.499, "step": 4150 }, { "epoch": 0.18231116282643775, "grad_norm": 2.359375, "learning_rate": 4.611542371741722e-05, "loss": 0.4836, "step": 4152 }, { "epoch": 0.18239898130564125, "grad_norm": 2.453125, "learning_rate": 4.6111719059798466e-05, "loss": 0.4947, "step": 4154 }, { "epoch": 0.18248679978484472, "grad_norm": 2.5, "learning_rate": 4.610801278543921e-05, "loss": 0.5091, "step": 4156 }, { "epoch": 0.18257461826404822, "grad_norm": 2.640625, "learning_rate": 4.610430489462327e-05, "loss": 0.5287, "step": 4158 }, { "epoch": 0.1826624367432517, "grad_norm": 3.015625, "learning_rate": 4.6100595387634616e-05, "loss": 0.4934, "step": 4160 }, { "epoch": 0.1827502552224552, "grad_norm": 2.875, "learning_rate": 4.60968842647573e-05, "loss": 0.5256, "step": 4162 }, { "epoch": 0.18283807370165867, "grad_norm": 2.578125, "learning_rate": 4.6093171526275524e-05, "loss": 0.5091, "step": 4164 }, { "epoch": 0.18292589218086217, "grad_norm": 2.421875, "learning_rate": 4.608945717247363e-05, "loss": 0.5385, "step": 4166 }, { "epoch": 0.18301371066006564, "grad_norm": 2.546875, "learning_rate": 4.6085741203636035e-05, "loss": 0.5128, "step": 4168 }, { "epoch": 0.18310152913926914, "grad_norm": 2.53125, "learning_rate": 4.6082023620047335e-05, "loss": 0.5135, "step": 4170 }, { "epoch": 0.1831893476184726, "grad_norm": 2.453125, "learning_rate": 4.60783044219922e-05, "loss": 0.5336, "step": 4172 }, { "epoch": 0.1832771660976761, "grad_norm": 2.46875, "learning_rate": 4.607458360975547e-05, "loss": 0.5043, "step": 4174 }, { "epoch": 0.18336498457687958, "grad_norm": 2.609375, "learning_rate": 4.607086118362208e-05, "loss": 0.4748, "step": 4176 }, { "epoch": 0.18345280305608308, "grad_norm": 3.125, "learning_rate": 4.6067137143877084e-05, "loss": 0.5004, "step": 4178 }, { "epoch": 0.18354062153528655, "grad_norm": 3.6875, "learning_rate": 4.606341149080567e-05, "loss": 0.4805, "step": 4180 }, { "epoch": 0.18362844001449005, "grad_norm": 3.71875, "learning_rate": 4.605968422469316e-05, "loss": 0.4993, "step": 4182 }, { "epoch": 0.18371625849369352, "grad_norm": 4.28125, "learning_rate": 4.605595534582498e-05, "loss": 0.539, "step": 4184 }, { "epoch": 0.18380407697289702, "grad_norm": 3.953125, "learning_rate": 4.60522248544867e-05, "loss": 0.5105, "step": 4186 }, { "epoch": 0.1838918954521005, "grad_norm": 3.15625, "learning_rate": 4.604849275096399e-05, "loss": 0.5194, "step": 4188 }, { "epoch": 0.183979713931304, "grad_norm": 2.6875, "learning_rate": 4.604475903554266e-05, "loss": 0.5028, "step": 4190 }, { "epoch": 0.18406753241050747, "grad_norm": 2.53125, "learning_rate": 4.6041023708508635e-05, "loss": 0.49, "step": 4192 }, { "epoch": 0.18415535088971097, "grad_norm": 2.296875, "learning_rate": 4.603728677014797e-05, "loss": 0.4791, "step": 4194 }, { "epoch": 0.18424316936891447, "grad_norm": 2.9375, "learning_rate": 4.6033548220746846e-05, "loss": 0.5261, "step": 4196 }, { "epoch": 0.18433098784811794, "grad_norm": 2.359375, "learning_rate": 4.602980806059155e-05, "loss": 0.5155, "step": 4198 }, { "epoch": 0.18441880632732144, "grad_norm": 2.671875, "learning_rate": 4.602606628996852e-05, "loss": 0.4972, "step": 4200 }, { "epoch": 0.1845066248065249, "grad_norm": 2.65625, "learning_rate": 4.6022322909164286e-05, "loss": 0.5362, "step": 4202 }, { "epoch": 0.1845944432857284, "grad_norm": 3.5625, "learning_rate": 4.601857791846553e-05, "loss": 0.4848, "step": 4204 }, { "epoch": 0.18468226176493188, "grad_norm": 3.59375, "learning_rate": 4.6014831318159036e-05, "loss": 0.5352, "step": 4206 }, { "epoch": 0.18477008024413538, "grad_norm": 3.4375, "learning_rate": 4.6011083108531725e-05, "loss": 0.5178, "step": 4208 }, { "epoch": 0.18485789872333885, "grad_norm": 2.921875, "learning_rate": 4.6007333289870625e-05, "loss": 0.4915, "step": 4210 }, { "epoch": 0.18494571720254235, "grad_norm": 2.59375, "learning_rate": 4.600358186246291e-05, "loss": 0.4989, "step": 4212 }, { "epoch": 0.18503353568174583, "grad_norm": 2.578125, "learning_rate": 4.599982882659586e-05, "loss": 0.5105, "step": 4214 }, { "epoch": 0.18512135416094933, "grad_norm": 2.4375, "learning_rate": 4.599607418255689e-05, "loss": 0.5102, "step": 4216 }, { "epoch": 0.1852091726401528, "grad_norm": 2.421875, "learning_rate": 4.5992317930633524e-05, "loss": 0.5115, "step": 4218 }, { "epoch": 0.1852969911193563, "grad_norm": 2.46875, "learning_rate": 4.5988560071113416e-05, "loss": 0.5128, "step": 4220 }, { "epoch": 0.18538480959855977, "grad_norm": 2.921875, "learning_rate": 4.598480060428435e-05, "loss": 0.5167, "step": 4222 }, { "epoch": 0.18547262807776327, "grad_norm": 3.25, "learning_rate": 4.598103953043422e-05, "loss": 0.525, "step": 4224 }, { "epoch": 0.18556044655696674, "grad_norm": 3.640625, "learning_rate": 4.597727684985105e-05, "loss": 0.5016, "step": 4226 }, { "epoch": 0.18564826503617024, "grad_norm": 4.9375, "learning_rate": 4.5973512562823e-05, "loss": 0.5174, "step": 4228 }, { "epoch": 0.1857360835153737, "grad_norm": 3.78125, "learning_rate": 4.596974666963832e-05, "loss": 0.5166, "step": 4230 }, { "epoch": 0.1858239019945772, "grad_norm": 2.78125, "learning_rate": 4.596597917058542e-05, "loss": 0.5226, "step": 4232 }, { "epoch": 0.18591172047378068, "grad_norm": 2.203125, "learning_rate": 4.59622100659528e-05, "loss": 0.5479, "step": 4234 }, { "epoch": 0.18599953895298418, "grad_norm": 2.71875, "learning_rate": 4.595843935602913e-05, "loss": 0.5321, "step": 4236 }, { "epoch": 0.18608735743218766, "grad_norm": 3.25, "learning_rate": 4.5954667041103126e-05, "loss": 0.5096, "step": 4238 }, { "epoch": 0.18617517591139116, "grad_norm": 5.15625, "learning_rate": 4.595089312146371e-05, "loss": 0.4967, "step": 4240 }, { "epoch": 0.18626299439059463, "grad_norm": 4.5625, "learning_rate": 4.594711759739987e-05, "loss": 0.5105, "step": 4242 }, { "epoch": 0.18635081286979813, "grad_norm": 3.453125, "learning_rate": 4.594334046920075e-05, "loss": 0.5182, "step": 4244 }, { "epoch": 0.18643863134900163, "grad_norm": 2.671875, "learning_rate": 4.593956173715559e-05, "loss": 0.5176, "step": 4246 }, { "epoch": 0.1865264498282051, "grad_norm": 3.25, "learning_rate": 4.593578140155377e-05, "loss": 0.4972, "step": 4248 }, { "epoch": 0.1866142683074086, "grad_norm": 3.3125, "learning_rate": 4.593199946268479e-05, "loss": 0.4917, "step": 4250 }, { "epoch": 0.18670208678661207, "grad_norm": 2.984375, "learning_rate": 4.592821592083829e-05, "loss": 0.5161, "step": 4252 }, { "epoch": 0.18678990526581557, "grad_norm": 3.0625, "learning_rate": 4.592443077630398e-05, "loss": 0.4934, "step": 4254 }, { "epoch": 0.18687772374501904, "grad_norm": 2.8125, "learning_rate": 4.5920644029371764e-05, "loss": 0.4934, "step": 4256 }, { "epoch": 0.18696554222422254, "grad_norm": 2.625, "learning_rate": 4.5916855680331596e-05, "loss": 0.493, "step": 4258 }, { "epoch": 0.187053360703426, "grad_norm": 2.5, "learning_rate": 4.5913065729473614e-05, "loss": 0.5358, "step": 4260 }, { "epoch": 0.1871411791826295, "grad_norm": 2.703125, "learning_rate": 4.590927417708804e-05, "loss": 0.4723, "step": 4262 }, { "epoch": 0.18722899766183299, "grad_norm": 2.59375, "learning_rate": 4.590548102346525e-05, "loss": 0.5063, "step": 4264 }, { "epoch": 0.18731681614103649, "grad_norm": 2.921875, "learning_rate": 4.5901686268895694e-05, "loss": 0.5412, "step": 4266 }, { "epoch": 0.18740463462023996, "grad_norm": 2.953125, "learning_rate": 4.589788991367e-05, "loss": 0.496, "step": 4268 }, { "epoch": 0.18749245309944346, "grad_norm": 2.828125, "learning_rate": 4.5894091958078886e-05, "loss": 0.4843, "step": 4270 }, { "epoch": 0.18758027157864693, "grad_norm": 2.34375, "learning_rate": 4.5890292402413205e-05, "loss": 0.4969, "step": 4272 }, { "epoch": 0.18766809005785043, "grad_norm": 2.359375, "learning_rate": 4.5886491246963925e-05, "loss": 0.4921, "step": 4274 }, { "epoch": 0.1877559085370539, "grad_norm": 2.578125, "learning_rate": 4.5882688492022134e-05, "loss": 0.5024, "step": 4276 }, { "epoch": 0.1878437270162574, "grad_norm": 2.609375, "learning_rate": 4.587888413787905e-05, "loss": 0.4909, "step": 4278 }, { "epoch": 0.18793154549546087, "grad_norm": 2.5, "learning_rate": 4.587507818482603e-05, "loss": 0.5371, "step": 4280 }, { "epoch": 0.18801936397466437, "grad_norm": 2.578125, "learning_rate": 4.58712706331545e-05, "loss": 0.5265, "step": 4282 }, { "epoch": 0.18810718245386784, "grad_norm": 2.265625, "learning_rate": 4.586746148315607e-05, "loss": 0.5251, "step": 4284 }, { "epoch": 0.18819500093307134, "grad_norm": 2.546875, "learning_rate": 4.5863650735122435e-05, "loss": 0.4889, "step": 4286 }, { "epoch": 0.18828281941227482, "grad_norm": 2.484375, "learning_rate": 4.5859838389345425e-05, "loss": 0.4883, "step": 4288 }, { "epoch": 0.18837063789147832, "grad_norm": 2.359375, "learning_rate": 4.585602444611699e-05, "loss": 0.4715, "step": 4290 }, { "epoch": 0.1884584563706818, "grad_norm": 2.578125, "learning_rate": 4.58522089057292e-05, "loss": 0.4844, "step": 4292 }, { "epoch": 0.1885462748498853, "grad_norm": 2.6875, "learning_rate": 4.584839176847426e-05, "loss": 0.516, "step": 4294 }, { "epoch": 0.1886340933290888, "grad_norm": 2.25, "learning_rate": 4.5844573034644485e-05, "loss": 0.5009, "step": 4296 }, { "epoch": 0.18872191180829226, "grad_norm": 2.5625, "learning_rate": 4.5840752704532304e-05, "loss": 0.5116, "step": 4298 }, { "epoch": 0.18880973028749576, "grad_norm": 3.015625, "learning_rate": 4.583693077843028e-05, "loss": 0.5064, "step": 4300 }, { "epoch": 0.18889754876669923, "grad_norm": 3.078125, "learning_rate": 4.5833107256631114e-05, "loss": 0.513, "step": 4302 }, { "epoch": 0.18898536724590273, "grad_norm": 2.484375, "learning_rate": 4.582928213942759e-05, "loss": 0.5207, "step": 4304 }, { "epoch": 0.1890731857251062, "grad_norm": 2.3125, "learning_rate": 4.582545542711265e-05, "loss": 0.5153, "step": 4306 }, { "epoch": 0.1891610042043097, "grad_norm": 2.40625, "learning_rate": 4.582162711997934e-05, "loss": 0.4788, "step": 4308 }, { "epoch": 0.18924882268351317, "grad_norm": 2.5625, "learning_rate": 4.581779721832084e-05, "loss": 0.4913, "step": 4310 }, { "epoch": 0.18933664116271667, "grad_norm": 2.296875, "learning_rate": 4.581396572243043e-05, "loss": 0.507, "step": 4312 }, { "epoch": 0.18942445964192015, "grad_norm": 2.375, "learning_rate": 4.581013263260154e-05, "loss": 0.4794, "step": 4314 }, { "epoch": 0.18951227812112365, "grad_norm": 2.578125, "learning_rate": 4.58062979491277e-05, "loss": 0.4966, "step": 4316 }, { "epoch": 0.18960009660032712, "grad_norm": 2.625, "learning_rate": 4.580246167230258e-05, "loss": 0.5002, "step": 4318 }, { "epoch": 0.18968791507953062, "grad_norm": 2.640625, "learning_rate": 4.579862380241996e-05, "loss": 0.4809, "step": 4320 }, { "epoch": 0.1897757335587341, "grad_norm": 3.046875, "learning_rate": 4.579478433977374e-05, "loss": 0.4919, "step": 4322 }, { "epoch": 0.1898635520379376, "grad_norm": 2.796875, "learning_rate": 4.579094328465795e-05, "loss": 0.5036, "step": 4324 }, { "epoch": 0.18995137051714106, "grad_norm": 2.65625, "learning_rate": 4.578710063736674e-05, "loss": 0.5281, "step": 4326 }, { "epoch": 0.19003918899634456, "grad_norm": 2.71875, "learning_rate": 4.578325639819438e-05, "loss": 0.5063, "step": 4328 }, { "epoch": 0.19012700747554803, "grad_norm": 3.046875, "learning_rate": 4.5779410567435264e-05, "loss": 0.5193, "step": 4330 }, { "epoch": 0.19021482595475153, "grad_norm": 2.765625, "learning_rate": 4.577556314538391e-05, "loss": 0.5139, "step": 4332 }, { "epoch": 0.190302644433955, "grad_norm": 2.28125, "learning_rate": 4.5771714132334945e-05, "loss": 0.5132, "step": 4334 }, { "epoch": 0.1903904629131585, "grad_norm": 2.46875, "learning_rate": 4.576786352858313e-05, "loss": 0.5337, "step": 4336 }, { "epoch": 0.19047828139236198, "grad_norm": 2.234375, "learning_rate": 4.5764011334423354e-05, "loss": 0.5202, "step": 4338 }, { "epoch": 0.19056609987156548, "grad_norm": 2.484375, "learning_rate": 4.576015755015061e-05, "loss": 0.5288, "step": 4340 }, { "epoch": 0.19065391835076895, "grad_norm": 2.625, "learning_rate": 4.575630217606002e-05, "loss": 0.5126, "step": 4342 }, { "epoch": 0.19074173682997245, "grad_norm": 2.359375, "learning_rate": 4.5752445212446836e-05, "loss": 0.5032, "step": 4344 }, { "epoch": 0.19082955530917595, "grad_norm": 2.390625, "learning_rate": 4.574858665960643e-05, "loss": 0.5159, "step": 4346 }, { "epoch": 0.19091737378837942, "grad_norm": 2.59375, "learning_rate": 4.574472651783428e-05, "loss": 0.5548, "step": 4348 }, { "epoch": 0.19100519226758292, "grad_norm": 2.96875, "learning_rate": 4.5740864787425995e-05, "loss": 0.4725, "step": 4350 }, { "epoch": 0.1910930107467864, "grad_norm": 2.734375, "learning_rate": 4.573700146867732e-05, "loss": 0.4993, "step": 4352 }, { "epoch": 0.1911808292259899, "grad_norm": 2.84375, "learning_rate": 4.573313656188409e-05, "loss": 0.5261, "step": 4354 }, { "epoch": 0.19126864770519336, "grad_norm": 2.59375, "learning_rate": 4.57292700673423e-05, "loss": 0.5127, "step": 4356 }, { "epoch": 0.19135646618439686, "grad_norm": 2.828125, "learning_rate": 4.572540198534804e-05, "loss": 0.5066, "step": 4358 }, { "epoch": 0.19144428466360033, "grad_norm": 2.40625, "learning_rate": 4.572153231619752e-05, "loss": 0.5019, "step": 4360 }, { "epoch": 0.19153210314280383, "grad_norm": 2.65625, "learning_rate": 4.57176610601871e-05, "loss": 0.4771, "step": 4362 }, { "epoch": 0.1916199216220073, "grad_norm": 2.1875, "learning_rate": 4.571378821761322e-05, "loss": 0.525, "step": 4364 }, { "epoch": 0.1917077401012108, "grad_norm": 2.296875, "learning_rate": 4.5709913788772486e-05, "loss": 0.5299, "step": 4366 }, { "epoch": 0.19179555858041428, "grad_norm": 2.953125, "learning_rate": 4.570603777396158e-05, "loss": 0.5113, "step": 4368 }, { "epoch": 0.19188337705961778, "grad_norm": 4.09375, "learning_rate": 4.5702160173477336e-05, "loss": 0.4981, "step": 4370 }, { "epoch": 0.19197119553882125, "grad_norm": 4.59375, "learning_rate": 4.56982809876167e-05, "loss": 0.4895, "step": 4372 }, { "epoch": 0.19205901401802475, "grad_norm": 4.03125, "learning_rate": 4.5694400216676755e-05, "loss": 0.4936, "step": 4374 }, { "epoch": 0.19214683249722822, "grad_norm": 3.234375, "learning_rate": 4.569051786095467e-05, "loss": 0.5052, "step": 4376 }, { "epoch": 0.19223465097643172, "grad_norm": 2.78125, "learning_rate": 4.568663392074778e-05, "loss": 0.4914, "step": 4378 }, { "epoch": 0.1923224694556352, "grad_norm": 2.4375, "learning_rate": 4.568274839635349e-05, "loss": 0.4874, "step": 4380 }, { "epoch": 0.1924102879348387, "grad_norm": 2.734375, "learning_rate": 4.567886128806939e-05, "loss": 0.5129, "step": 4382 }, { "epoch": 0.19249810641404216, "grad_norm": 2.84375, "learning_rate": 4.5674972596193114e-05, "loss": 0.5016, "step": 4384 }, { "epoch": 0.19258592489324566, "grad_norm": 3.0625, "learning_rate": 4.567108232102249e-05, "loss": 0.5084, "step": 4386 }, { "epoch": 0.19267374337244914, "grad_norm": 2.96875, "learning_rate": 4.566719046285542e-05, "loss": 0.5344, "step": 4388 }, { "epoch": 0.19276156185165264, "grad_norm": 2.625, "learning_rate": 4.5663297021989963e-05, "loss": 0.4685, "step": 4390 }, { "epoch": 0.1928493803308561, "grad_norm": 2.84375, "learning_rate": 4.565940199872426e-05, "loss": 0.5278, "step": 4392 }, { "epoch": 0.1929371988100596, "grad_norm": 2.796875, "learning_rate": 4.56555053933566e-05, "loss": 0.4925, "step": 4394 }, { "epoch": 0.1930250172892631, "grad_norm": 3.078125, "learning_rate": 4.5651607206185384e-05, "loss": 0.5183, "step": 4396 }, { "epoch": 0.19311283576846658, "grad_norm": 2.984375, "learning_rate": 4.564770743750914e-05, "loss": 0.4626, "step": 4398 }, { "epoch": 0.19320065424767008, "grad_norm": 3.453125, "learning_rate": 4.564380608762651e-05, "loss": 0.4903, "step": 4400 }, { "epoch": 0.19328847272687355, "grad_norm": 3.984375, "learning_rate": 4.563990315683626e-05, "loss": 0.4885, "step": 4402 }, { "epoch": 0.19337629120607705, "grad_norm": 3.25, "learning_rate": 4.5635998645437286e-05, "loss": 0.5027, "step": 4404 }, { "epoch": 0.19346410968528052, "grad_norm": 3.140625, "learning_rate": 4.5632092553728576e-05, "loss": 0.4832, "step": 4406 }, { "epoch": 0.19355192816448402, "grad_norm": 3.34375, "learning_rate": 4.5628184882009276e-05, "loss": 0.514, "step": 4408 }, { "epoch": 0.1936397466436875, "grad_norm": 2.890625, "learning_rate": 4.562427563057864e-05, "loss": 0.5072, "step": 4410 }, { "epoch": 0.193727565122891, "grad_norm": 2.515625, "learning_rate": 4.5620364799736026e-05, "loss": 0.4741, "step": 4412 }, { "epoch": 0.19381538360209447, "grad_norm": 2.65625, "learning_rate": 4.561645238978094e-05, "loss": 0.4841, "step": 4414 }, { "epoch": 0.19390320208129797, "grad_norm": 2.703125, "learning_rate": 4.561253840101298e-05, "loss": 0.5125, "step": 4416 }, { "epoch": 0.19399102056050144, "grad_norm": 2.796875, "learning_rate": 4.560862283373188e-05, "loss": 0.4877, "step": 4418 }, { "epoch": 0.19407883903970494, "grad_norm": 3.21875, "learning_rate": 4.560470568823753e-05, "loss": 0.5164, "step": 4420 }, { "epoch": 0.1941666575189084, "grad_norm": 3.328125, "learning_rate": 4.5600786964829854e-05, "loss": 0.481, "step": 4422 }, { "epoch": 0.1942544759981119, "grad_norm": 3.15625, "learning_rate": 4.559686666380898e-05, "loss": 0.4857, "step": 4424 }, { "epoch": 0.19434229447731538, "grad_norm": 3.546875, "learning_rate": 4.559294478547512e-05, "loss": 0.4905, "step": 4426 }, { "epoch": 0.19443011295651888, "grad_norm": 3.96875, "learning_rate": 4.5589021330128615e-05, "loss": 0.512, "step": 4428 }, { "epoch": 0.19451793143572235, "grad_norm": 3.140625, "learning_rate": 4.558509629806992e-05, "loss": 0.5058, "step": 4430 }, { "epoch": 0.19460574991492585, "grad_norm": 2.90625, "learning_rate": 4.558116968959962e-05, "loss": 0.4877, "step": 4432 }, { "epoch": 0.19469356839412932, "grad_norm": 2.5, "learning_rate": 4.5577241505018404e-05, "loss": 0.4911, "step": 4434 }, { "epoch": 0.19478138687333282, "grad_norm": 2.546875, "learning_rate": 4.557331174462711e-05, "loss": 0.5044, "step": 4436 }, { "epoch": 0.1948692053525363, "grad_norm": 2.8125, "learning_rate": 4.556938040872666e-05, "loss": 0.5064, "step": 4438 }, { "epoch": 0.1949570238317398, "grad_norm": 2.4375, "learning_rate": 4.556544749761813e-05, "loss": 0.4767, "step": 4440 }, { "epoch": 0.19504484231094327, "grad_norm": 2.265625, "learning_rate": 4.556151301160271e-05, "loss": 0.5161, "step": 4442 }, { "epoch": 0.19513266079014677, "grad_norm": 2.28125, "learning_rate": 4.5557576950981684e-05, "loss": 0.5014, "step": 4444 }, { "epoch": 0.19522047926935027, "grad_norm": 2.46875, "learning_rate": 4.555363931605649e-05, "loss": 0.4857, "step": 4446 }, { "epoch": 0.19530829774855374, "grad_norm": 2.5625, "learning_rate": 4.554970010712867e-05, "loss": 0.5094, "step": 4448 }, { "epoch": 0.19539611622775724, "grad_norm": 2.265625, "learning_rate": 4.5545759324499896e-05, "loss": 0.5082, "step": 4450 }, { "epoch": 0.1954839347069607, "grad_norm": 2.640625, "learning_rate": 4.554181696847194e-05, "loss": 0.515, "step": 4452 }, { "epoch": 0.1955717531861642, "grad_norm": 2.3125, "learning_rate": 4.5537873039346716e-05, "loss": 0.5094, "step": 4454 }, { "epoch": 0.19565957166536768, "grad_norm": 2.390625, "learning_rate": 4.553392753742626e-05, "loss": 0.5027, "step": 4456 }, { "epoch": 0.19574739014457118, "grad_norm": 2.5625, "learning_rate": 4.55299804630127e-05, "loss": 0.5025, "step": 4458 }, { "epoch": 0.19583520862377465, "grad_norm": 2.546875, "learning_rate": 4.5526031816408324e-05, "loss": 0.4915, "step": 4460 }, { "epoch": 0.19592302710297815, "grad_norm": 2.75, "learning_rate": 4.55220815979155e-05, "loss": 0.5053, "step": 4462 }, { "epoch": 0.19601084558218163, "grad_norm": 2.5625, "learning_rate": 4.5518129807836755e-05, "loss": 0.5056, "step": 4464 }, { "epoch": 0.19609866406138513, "grad_norm": 2.59375, "learning_rate": 4.551417644647471e-05, "loss": 0.476, "step": 4466 }, { "epoch": 0.1961864825405886, "grad_norm": 2.3125, "learning_rate": 4.551022151413212e-05, "loss": 0.5206, "step": 4468 }, { "epoch": 0.1962743010197921, "grad_norm": 2.640625, "learning_rate": 4.5506265011111845e-05, "loss": 0.4979, "step": 4470 }, { "epoch": 0.19636211949899557, "grad_norm": 2.9375, "learning_rate": 4.5502306937716873e-05, "loss": 0.5007, "step": 4472 }, { "epoch": 0.19644993797819907, "grad_norm": 2.96875, "learning_rate": 4.5498347294250335e-05, "loss": 0.4995, "step": 4474 }, { "epoch": 0.19653775645740254, "grad_norm": 2.890625, "learning_rate": 4.549438608101544e-05, "loss": 0.5034, "step": 4476 }, { "epoch": 0.19662557493660604, "grad_norm": 2.921875, "learning_rate": 4.549042329831554e-05, "loss": 0.5002, "step": 4478 }, { "epoch": 0.1967133934158095, "grad_norm": 2.84375, "learning_rate": 4.548645894645413e-05, "loss": 0.5025, "step": 4480 }, { "epoch": 0.196801211895013, "grad_norm": 2.96875, "learning_rate": 4.548249302573477e-05, "loss": 0.5033, "step": 4482 }, { "epoch": 0.19688903037421648, "grad_norm": 2.75, "learning_rate": 4.547852553646118e-05, "loss": 0.4976, "step": 4484 }, { "epoch": 0.19697684885341998, "grad_norm": 2.375, "learning_rate": 4.54745564789372e-05, "loss": 0.4862, "step": 4486 }, { "epoch": 0.19706466733262346, "grad_norm": 2.375, "learning_rate": 4.547058585346678e-05, "loss": 0.4548, "step": 4488 }, { "epoch": 0.19715248581182696, "grad_norm": 2.796875, "learning_rate": 4.5466613660353985e-05, "loss": 0.5254, "step": 4490 }, { "epoch": 0.19724030429103043, "grad_norm": 2.234375, "learning_rate": 4.546263989990302e-05, "loss": 0.4946, "step": 4492 }, { "epoch": 0.19732812277023393, "grad_norm": 2.609375, "learning_rate": 4.545866457241817e-05, "loss": 0.4971, "step": 4494 }, { "epoch": 0.19741594124943743, "grad_norm": 2.625, "learning_rate": 4.545468767820389e-05, "loss": 0.4876, "step": 4496 }, { "epoch": 0.1975037597286409, "grad_norm": 3.015625, "learning_rate": 4.545070921756472e-05, "loss": 0.5235, "step": 4498 }, { "epoch": 0.1975915782078444, "grad_norm": 3.0625, "learning_rate": 4.5446729190805345e-05, "loss": 0.501, "step": 4500 }, { "epoch": 0.19767939668704787, "grad_norm": 3.015625, "learning_rate": 4.544274759823054e-05, "loss": 0.5097, "step": 4502 }, { "epoch": 0.19776721516625137, "grad_norm": 2.375, "learning_rate": 4.5438764440145225e-05, "loss": 0.4899, "step": 4504 }, { "epoch": 0.19785503364545484, "grad_norm": 2.296875, "learning_rate": 4.543477971685443e-05, "loss": 0.4941, "step": 4506 }, { "epoch": 0.19794285212465834, "grad_norm": 2.625, "learning_rate": 4.543079342866331e-05, "loss": 0.5089, "step": 4508 }, { "epoch": 0.1980306706038618, "grad_norm": 2.578125, "learning_rate": 4.542680557587712e-05, "loss": 0.4812, "step": 4510 }, { "epoch": 0.1981184890830653, "grad_norm": 2.390625, "learning_rate": 4.5422816158801275e-05, "loss": 0.4979, "step": 4512 }, { "epoch": 0.19820630756226879, "grad_norm": 2.4375, "learning_rate": 4.5418825177741266e-05, "loss": 0.4836, "step": 4514 }, { "epoch": 0.19829412604147229, "grad_norm": 2.53125, "learning_rate": 4.541483263300273e-05, "loss": 0.5024, "step": 4516 }, { "epoch": 0.19838194452067576, "grad_norm": 2.28125, "learning_rate": 4.5410838524891416e-05, "loss": 0.4997, "step": 4518 }, { "epoch": 0.19846976299987926, "grad_norm": 2.4375, "learning_rate": 4.540684285371321e-05, "loss": 0.5085, "step": 4520 }, { "epoch": 0.19855758147908273, "grad_norm": 2.609375, "learning_rate": 4.540284561977406e-05, "loss": 0.4939, "step": 4522 }, { "epoch": 0.19864539995828623, "grad_norm": 2.796875, "learning_rate": 4.5398846823380124e-05, "loss": 0.5064, "step": 4524 }, { "epoch": 0.1987332184374897, "grad_norm": 2.765625, "learning_rate": 4.53948464648376e-05, "loss": 0.462, "step": 4526 }, { "epoch": 0.1988210369166932, "grad_norm": 2.375, "learning_rate": 4.539084454445284e-05, "loss": 0.492, "step": 4528 }, { "epoch": 0.19890885539589667, "grad_norm": 2.359375, "learning_rate": 4.5386841062532325e-05, "loss": 0.4829, "step": 4530 }, { "epoch": 0.19899667387510017, "grad_norm": 2.421875, "learning_rate": 4.538283601938263e-05, "loss": 0.4771, "step": 4532 }, { "epoch": 0.19908449235430364, "grad_norm": 2.609375, "learning_rate": 4.5378829415310465e-05, "loss": 0.5275, "step": 4534 }, { "epoch": 0.19917231083350714, "grad_norm": 2.5, "learning_rate": 4.5374821250622665e-05, "loss": 0.5158, "step": 4536 }, { "epoch": 0.19926012931271062, "grad_norm": 2.65625, "learning_rate": 4.5370811525626166e-05, "loss": 0.5223, "step": 4538 }, { "epoch": 0.19934794779191412, "grad_norm": 2.734375, "learning_rate": 4.5366800240628034e-05, "loss": 0.5189, "step": 4540 }, { "epoch": 0.1994357662711176, "grad_norm": 2.40625, "learning_rate": 4.536278739593546e-05, "loss": 0.5121, "step": 4542 }, { "epoch": 0.1995235847503211, "grad_norm": 2.609375, "learning_rate": 4.535877299185575e-05, "loss": 0.5178, "step": 4544 }, { "epoch": 0.19961140322952456, "grad_norm": 2.59375, "learning_rate": 4.5354757028696315e-05, "loss": 0.5079, "step": 4546 }, { "epoch": 0.19969922170872806, "grad_norm": 2.3125, "learning_rate": 4.5350739506764714e-05, "loss": 0.4885, "step": 4548 }, { "epoch": 0.19978704018793156, "grad_norm": 2.703125, "learning_rate": 4.53467204263686e-05, "loss": 0.5099, "step": 4550 }, { "epoch": 0.19987485866713503, "grad_norm": 2.46875, "learning_rate": 4.5342699787815755e-05, "loss": 0.4823, "step": 4552 }, { "epoch": 0.19996267714633853, "grad_norm": 2.890625, "learning_rate": 4.5338677591414095e-05, "loss": 0.4902, "step": 4554 }, { "epoch": 0.200050495625542, "grad_norm": 2.3125, "learning_rate": 4.533465383747162e-05, "loss": 0.5072, "step": 4556 }, { "epoch": 0.2001383141047455, "grad_norm": 2.40625, "learning_rate": 4.533062852629648e-05, "loss": 0.5244, "step": 4558 }, { "epoch": 0.20022613258394897, "grad_norm": 2.265625, "learning_rate": 4.5326601658196937e-05, "loss": 0.5189, "step": 4560 }, { "epoch": 0.20031395106315247, "grad_norm": 2.53125, "learning_rate": 4.532257323348137e-05, "loss": 0.5431, "step": 4562 }, { "epoch": 0.20040176954235595, "grad_norm": 2.5625, "learning_rate": 4.5318543252458266e-05, "loss": 0.4636, "step": 4564 }, { "epoch": 0.20048958802155945, "grad_norm": 2.484375, "learning_rate": 4.5314511715436246e-05, "loss": 0.4813, "step": 4566 }, { "epoch": 0.20057740650076292, "grad_norm": 3.140625, "learning_rate": 4.531047862272406e-05, "loss": 0.5049, "step": 4568 }, { "epoch": 0.20066522497996642, "grad_norm": 2.296875, "learning_rate": 4.530644397463055e-05, "loss": 0.5004, "step": 4570 }, { "epoch": 0.2007530434591699, "grad_norm": 2.859375, "learning_rate": 4.530240777146468e-05, "loss": 0.4946, "step": 4572 }, { "epoch": 0.2008408619383734, "grad_norm": 2.640625, "learning_rate": 4.529837001353557e-05, "loss": 0.4981, "step": 4574 }, { "epoch": 0.20092868041757686, "grad_norm": 2.296875, "learning_rate": 4.529433070115241e-05, "loss": 0.5206, "step": 4576 }, { "epoch": 0.20101649889678036, "grad_norm": 2.703125, "learning_rate": 4.529028983462455e-05, "loss": 0.5008, "step": 4578 }, { "epoch": 0.20110431737598383, "grad_norm": 2.40625, "learning_rate": 4.528624741426143e-05, "loss": 0.4744, "step": 4580 }, { "epoch": 0.20119213585518733, "grad_norm": 2.515625, "learning_rate": 4.528220344037261e-05, "loss": 0.5024, "step": 4582 }, { "epoch": 0.2012799543343908, "grad_norm": 3.0625, "learning_rate": 4.527815791326779e-05, "loss": 0.5053, "step": 4584 }, { "epoch": 0.2013677728135943, "grad_norm": 2.625, "learning_rate": 4.5274110833256785e-05, "loss": 0.5126, "step": 4586 }, { "epoch": 0.20145559129279778, "grad_norm": 2.5, "learning_rate": 4.52700622006495e-05, "loss": 0.4648, "step": 4588 }, { "epoch": 0.20154340977200128, "grad_norm": 2.53125, "learning_rate": 4.5266012015756e-05, "loss": 0.5069, "step": 4590 }, { "epoch": 0.20163122825120475, "grad_norm": 2.53125, "learning_rate": 4.5261960278886443e-05, "loss": 0.5303, "step": 4592 }, { "epoch": 0.20171904673040825, "grad_norm": 3.015625, "learning_rate": 4.5257906990351114e-05, "loss": 0.5173, "step": 4594 }, { "epoch": 0.20180686520961172, "grad_norm": 2.890625, "learning_rate": 4.5253852150460406e-05, "loss": 0.4768, "step": 4596 }, { "epoch": 0.20189468368881522, "grad_norm": 2.234375, "learning_rate": 4.524979575952484e-05, "loss": 0.4905, "step": 4598 }, { "epoch": 0.20198250216801872, "grad_norm": 2.375, "learning_rate": 4.524573781785507e-05, "loss": 0.4856, "step": 4600 }, { "epoch": 0.2020703206472222, "grad_norm": 2.6875, "learning_rate": 4.5241678325761835e-05, "loss": 0.5001, "step": 4602 }, { "epoch": 0.2021581391264257, "grad_norm": 2.5625, "learning_rate": 4.523761728355603e-05, "loss": 0.5012, "step": 4604 }, { "epoch": 0.20224595760562916, "grad_norm": 2.5, "learning_rate": 4.523355469154864e-05, "loss": 0.497, "step": 4606 }, { "epoch": 0.20233377608483266, "grad_norm": 2.484375, "learning_rate": 4.522949055005078e-05, "loss": 0.5138, "step": 4608 }, { "epoch": 0.20242159456403613, "grad_norm": 2.515625, "learning_rate": 4.522542485937369e-05, "loss": 0.506, "step": 4610 }, { "epoch": 0.20250941304323963, "grad_norm": 2.3125, "learning_rate": 4.522135761982871e-05, "loss": 0.5185, "step": 4612 }, { "epoch": 0.2025972315224431, "grad_norm": 2.515625, "learning_rate": 4.521728883172732e-05, "loss": 0.4962, "step": 4614 }, { "epoch": 0.2026850500016466, "grad_norm": 2.390625, "learning_rate": 4.52132184953811e-05, "loss": 0.4924, "step": 4616 }, { "epoch": 0.20277286848085008, "grad_norm": 2.453125, "learning_rate": 4.520914661110176e-05, "loss": 0.4647, "step": 4618 }, { "epoch": 0.20286068696005358, "grad_norm": 2.5625, "learning_rate": 4.5205073179201134e-05, "loss": 0.4827, "step": 4620 }, { "epoch": 0.20294850543925705, "grad_norm": 2.59375, "learning_rate": 4.5200998199991165e-05, "loss": 0.4757, "step": 4622 }, { "epoch": 0.20303632391846055, "grad_norm": 2.5, "learning_rate": 4.5196921673783896e-05, "loss": 0.5093, "step": 4624 }, { "epoch": 0.20312414239766402, "grad_norm": 2.3125, "learning_rate": 4.519284360089153e-05, "loss": 0.5032, "step": 4626 }, { "epoch": 0.20321196087686752, "grad_norm": 2.484375, "learning_rate": 4.518876398162637e-05, "loss": 0.5029, "step": 4628 }, { "epoch": 0.203299779356071, "grad_norm": 2.734375, "learning_rate": 4.5184682816300815e-05, "loss": 0.4898, "step": 4630 }, { "epoch": 0.2033875978352745, "grad_norm": 2.5625, "learning_rate": 4.518060010522742e-05, "loss": 0.4841, "step": 4632 }, { "epoch": 0.20347541631447796, "grad_norm": 2.484375, "learning_rate": 4.5176515848718825e-05, "loss": 0.4545, "step": 4634 }, { "epoch": 0.20356323479368146, "grad_norm": 2.21875, "learning_rate": 4.517243004708781e-05, "loss": 0.4862, "step": 4636 }, { "epoch": 0.20365105327288494, "grad_norm": 2.484375, "learning_rate": 4.5168342700647264e-05, "loss": 0.4902, "step": 4638 }, { "epoch": 0.20373887175208844, "grad_norm": 2.515625, "learning_rate": 4.51642538097102e-05, "loss": 0.478, "step": 4640 }, { "epoch": 0.2038266902312919, "grad_norm": 2.234375, "learning_rate": 4.516016337458975e-05, "loss": 0.5129, "step": 4642 }, { "epoch": 0.2039145087104954, "grad_norm": 2.5, "learning_rate": 4.5156071395599156e-05, "loss": 0.4711, "step": 4644 }, { "epoch": 0.20400232718969888, "grad_norm": 2.453125, "learning_rate": 4.5151977873051774e-05, "loss": 0.4948, "step": 4646 }, { "epoch": 0.20409014566890238, "grad_norm": 2.390625, "learning_rate": 4.5147882807261104e-05, "loss": 0.4951, "step": 4648 }, { "epoch": 0.20417796414810588, "grad_norm": 2.8125, "learning_rate": 4.5143786198540736e-05, "loss": 0.4779, "step": 4650 }, { "epoch": 0.20426578262730935, "grad_norm": 2.703125, "learning_rate": 4.51396880472044e-05, "loss": 0.518, "step": 4652 }, { "epoch": 0.20435360110651285, "grad_norm": 2.96875, "learning_rate": 4.5135588353565914e-05, "loss": 0.491, "step": 4654 }, { "epoch": 0.20444141958571632, "grad_norm": 3.453125, "learning_rate": 4.5131487117939246e-05, "loss": 0.5192, "step": 4656 }, { "epoch": 0.20452923806491982, "grad_norm": 2.75, "learning_rate": 4.5127384340638474e-05, "loss": 0.4967, "step": 4658 }, { "epoch": 0.2046170565441233, "grad_norm": 2.359375, "learning_rate": 4.512328002197779e-05, "loss": 0.5179, "step": 4660 }, { "epoch": 0.2047048750233268, "grad_norm": 2.34375, "learning_rate": 4.5119174162271484e-05, "loss": 0.4966, "step": 4662 }, { "epoch": 0.20479269350253027, "grad_norm": 2.421875, "learning_rate": 4.5115066761834e-05, "loss": 0.5001, "step": 4664 }, { "epoch": 0.20488051198173376, "grad_norm": 2.640625, "learning_rate": 4.511095782097989e-05, "loss": 0.5273, "step": 4666 }, { "epoch": 0.20496833046093724, "grad_norm": 2.671875, "learning_rate": 4.510684734002381e-05, "loss": 0.4918, "step": 4668 }, { "epoch": 0.20505614894014074, "grad_norm": 2.578125, "learning_rate": 4.510273531928053e-05, "loss": 0.4928, "step": 4670 }, { "epoch": 0.2051439674193442, "grad_norm": 3.265625, "learning_rate": 4.5098621759064964e-05, "loss": 0.4911, "step": 4672 }, { "epoch": 0.2052317858985477, "grad_norm": 3.109375, "learning_rate": 4.5094506659692126e-05, "loss": 0.5041, "step": 4674 }, { "epoch": 0.20531960437775118, "grad_norm": 2.71875, "learning_rate": 4.509039002147716e-05, "loss": 0.4829, "step": 4676 }, { "epoch": 0.20540742285695468, "grad_norm": 2.671875, "learning_rate": 4.50862718447353e-05, "loss": 0.4726, "step": 4678 }, { "epoch": 0.20549524133615815, "grad_norm": 2.734375, "learning_rate": 4.508215212978193e-05, "loss": 0.4512, "step": 4680 }, { "epoch": 0.20558305981536165, "grad_norm": 2.6875, "learning_rate": 4.507803087693253e-05, "loss": 0.497, "step": 4682 }, { "epoch": 0.20567087829456512, "grad_norm": 2.859375, "learning_rate": 4.507390808650272e-05, "loss": 0.4954, "step": 4684 }, { "epoch": 0.20575869677376862, "grad_norm": 2.65625, "learning_rate": 4.506978375880822e-05, "loss": 0.4796, "step": 4686 }, { "epoch": 0.2058465152529721, "grad_norm": 2.5, "learning_rate": 4.5065657894164856e-05, "loss": 0.478, "step": 4688 }, { "epoch": 0.2059343337321756, "grad_norm": 2.28125, "learning_rate": 4.506153049288861e-05, "loss": 0.5176, "step": 4690 }, { "epoch": 0.20602215221137907, "grad_norm": 2.625, "learning_rate": 4.505740155529555e-05, "loss": 0.5044, "step": 4692 }, { "epoch": 0.20610997069058257, "grad_norm": 2.765625, "learning_rate": 4.505327108170187e-05, "loss": 0.5094, "step": 4694 }, { "epoch": 0.20619778916978604, "grad_norm": 2.359375, "learning_rate": 4.504913907242389e-05, "loss": 0.4767, "step": 4696 }, { "epoch": 0.20628560764898954, "grad_norm": 2.609375, "learning_rate": 4.504500552777802e-05, "loss": 0.4734, "step": 4698 }, { "epoch": 0.20637342612819304, "grad_norm": 2.234375, "learning_rate": 4.5040870448080824e-05, "loss": 0.4629, "step": 4700 }, { "epoch": 0.2064612446073965, "grad_norm": 2.78125, "learning_rate": 4.503673383364897e-05, "loss": 0.499, "step": 4702 }, { "epoch": 0.2065490630866, "grad_norm": 2.71875, "learning_rate": 4.5032595684799236e-05, "loss": 0.5027, "step": 4704 }, { "epoch": 0.20663688156580348, "grad_norm": 2.359375, "learning_rate": 4.502845600184852e-05, "loss": 0.5164, "step": 4706 }, { "epoch": 0.20672470004500698, "grad_norm": 2.84375, "learning_rate": 4.502431478511384e-05, "loss": 0.4971, "step": 4708 }, { "epoch": 0.20681251852421045, "grad_norm": 2.734375, "learning_rate": 4.5020172034912336e-05, "loss": 0.448, "step": 4710 }, { "epoch": 0.20690033700341395, "grad_norm": 2.734375, "learning_rate": 4.501602775156126e-05, "loss": 0.4835, "step": 4712 }, { "epoch": 0.20698815548261743, "grad_norm": 2.859375, "learning_rate": 4.501188193537798e-05, "loss": 0.4712, "step": 4714 }, { "epoch": 0.20707597396182092, "grad_norm": 3.03125, "learning_rate": 4.500773458667998e-05, "loss": 0.492, "step": 4716 }, { "epoch": 0.2071637924410244, "grad_norm": 2.578125, "learning_rate": 4.500358570578488e-05, "loss": 0.4948, "step": 4718 }, { "epoch": 0.2072516109202279, "grad_norm": 2.359375, "learning_rate": 4.4999435293010384e-05, "loss": 0.458, "step": 4720 }, { "epoch": 0.20733942939943137, "grad_norm": 2.65625, "learning_rate": 4.4995283348674336e-05, "loss": 0.4802, "step": 4722 }, { "epoch": 0.20742724787863487, "grad_norm": 2.5625, "learning_rate": 4.49911298730947e-05, "loss": 0.4884, "step": 4724 }, { "epoch": 0.20751506635783834, "grad_norm": 2.671875, "learning_rate": 4.4986974866589546e-05, "loss": 0.476, "step": 4726 }, { "epoch": 0.20760288483704184, "grad_norm": 2.46875, "learning_rate": 4.498281832947706e-05, "loss": 0.5139, "step": 4728 }, { "epoch": 0.2076907033162453, "grad_norm": 2.703125, "learning_rate": 4.4978660262075566e-05, "loss": 0.4709, "step": 4730 }, { "epoch": 0.2077785217954488, "grad_norm": 3.0625, "learning_rate": 4.4974500664703475e-05, "loss": 0.5018, "step": 4732 }, { "epoch": 0.20786634027465228, "grad_norm": 2.78125, "learning_rate": 4.4970339537679336e-05, "loss": 0.4881, "step": 4734 }, { "epoch": 0.20795415875385578, "grad_norm": 2.640625, "learning_rate": 4.49661768813218e-05, "loss": 0.5178, "step": 4736 }, { "epoch": 0.20804197723305926, "grad_norm": 2.375, "learning_rate": 4.496201269594966e-05, "loss": 0.5246, "step": 4738 }, { "epoch": 0.20812979571226276, "grad_norm": 2.390625, "learning_rate": 4.49578469818818e-05, "loss": 0.4945, "step": 4740 }, { "epoch": 0.20821761419146623, "grad_norm": 2.453125, "learning_rate": 4.495367973943724e-05, "loss": 0.5048, "step": 4742 }, { "epoch": 0.20830543267066973, "grad_norm": 2.59375, "learning_rate": 4.494951096893509e-05, "loss": 0.4675, "step": 4744 }, { "epoch": 0.2083932511498732, "grad_norm": 3.234375, "learning_rate": 4.494534067069463e-05, "loss": 0.5013, "step": 4746 }, { "epoch": 0.2084810696290767, "grad_norm": 3.171875, "learning_rate": 4.494116884503518e-05, "loss": 0.4897, "step": 4748 }, { "epoch": 0.2085688881082802, "grad_norm": 2.296875, "learning_rate": 4.493699549227624e-05, "loss": 0.5059, "step": 4750 }, { "epoch": 0.20865670658748367, "grad_norm": 2.515625, "learning_rate": 4.493282061273741e-05, "loss": 0.5089, "step": 4752 }, { "epoch": 0.20874452506668717, "grad_norm": 2.125, "learning_rate": 4.4928644206738404e-05, "loss": 0.4795, "step": 4754 }, { "epoch": 0.20883234354589064, "grad_norm": 2.3125, "learning_rate": 4.492446627459904e-05, "loss": 0.4664, "step": 4756 }, { "epoch": 0.20892016202509414, "grad_norm": 2.515625, "learning_rate": 4.4920286816639277e-05, "loss": 0.5075, "step": 4758 }, { "epoch": 0.2090079805042976, "grad_norm": 2.109375, "learning_rate": 4.491610583317918e-05, "loss": 0.4821, "step": 4760 }, { "epoch": 0.2090957989835011, "grad_norm": 2.09375, "learning_rate": 4.491192332453892e-05, "loss": 0.5116, "step": 4762 }, { "epoch": 0.20918361746270459, "grad_norm": 2.71875, "learning_rate": 4.490773929103879e-05, "loss": 0.4873, "step": 4764 }, { "epoch": 0.20927143594190808, "grad_norm": 2.734375, "learning_rate": 4.4903553732999225e-05, "loss": 0.5058, "step": 4766 }, { "epoch": 0.20935925442111156, "grad_norm": 2.484375, "learning_rate": 4.489936665074074e-05, "loss": 0.4896, "step": 4768 }, { "epoch": 0.20944707290031506, "grad_norm": 2.90625, "learning_rate": 4.489517804458398e-05, "loss": 0.4906, "step": 4770 }, { "epoch": 0.20953489137951853, "grad_norm": 3.3125, "learning_rate": 4.4890987914849724e-05, "loss": 0.4861, "step": 4772 }, { "epoch": 0.20962270985872203, "grad_norm": 3.125, "learning_rate": 4.488679626185884e-05, "loss": 0.4878, "step": 4774 }, { "epoch": 0.2097105283379255, "grad_norm": 3.15625, "learning_rate": 4.488260308593233e-05, "loss": 0.4785, "step": 4776 }, { "epoch": 0.209798346817129, "grad_norm": 3.84375, "learning_rate": 4.4878408387391314e-05, "loss": 0.4722, "step": 4778 }, { "epoch": 0.20988616529633247, "grad_norm": 3.046875, "learning_rate": 4.487421216655702e-05, "loss": 0.4687, "step": 4780 }, { "epoch": 0.20997398377553597, "grad_norm": 2.84375, "learning_rate": 4.487001442375079e-05, "loss": 0.4903, "step": 4782 }, { "epoch": 0.21006180225473944, "grad_norm": 2.53125, "learning_rate": 4.4865815159294094e-05, "loss": 0.4738, "step": 4784 }, { "epoch": 0.21014962073394294, "grad_norm": 2.6875, "learning_rate": 4.486161437350851e-05, "loss": 0.4617, "step": 4786 }, { "epoch": 0.21023743921314642, "grad_norm": 2.515625, "learning_rate": 4.485741206671574e-05, "loss": 0.4792, "step": 4788 }, { "epoch": 0.21032525769234992, "grad_norm": 2.5, "learning_rate": 4.4853208239237586e-05, "loss": 0.4983, "step": 4790 }, { "epoch": 0.2104130761715534, "grad_norm": 3.078125, "learning_rate": 4.4849002891395995e-05, "loss": 0.5093, "step": 4792 }, { "epoch": 0.2105008946507569, "grad_norm": 2.546875, "learning_rate": 4.4844796023512995e-05, "loss": 0.4946, "step": 4794 }, { "epoch": 0.21058871312996036, "grad_norm": 2.390625, "learning_rate": 4.484058763591077e-05, "loss": 0.5118, "step": 4796 }, { "epoch": 0.21067653160916386, "grad_norm": 2.515625, "learning_rate": 4.4836377728911574e-05, "loss": 0.5033, "step": 4798 }, { "epoch": 0.21076435008836736, "grad_norm": 2.65625, "learning_rate": 4.483216630283783e-05, "loss": 0.4579, "step": 4800 }, { "epoch": 0.21085216856757083, "grad_norm": 2.21875, "learning_rate": 4.4827953358012024e-05, "loss": 0.5025, "step": 4802 }, { "epoch": 0.21093998704677433, "grad_norm": 2.515625, "learning_rate": 4.482373889475681e-05, "loss": 0.474, "step": 4804 }, { "epoch": 0.2110278055259778, "grad_norm": 2.453125, "learning_rate": 4.481952291339491e-05, "loss": 0.5077, "step": 4806 }, { "epoch": 0.2111156240051813, "grad_norm": 2.4375, "learning_rate": 4.48153054142492e-05, "loss": 0.4601, "step": 4808 }, { "epoch": 0.21120344248438477, "grad_norm": 2.671875, "learning_rate": 4.481108639764264e-05, "loss": 0.4722, "step": 4810 }, { "epoch": 0.21129126096358827, "grad_norm": 2.421875, "learning_rate": 4.4806865863898346e-05, "loss": 0.4735, "step": 4812 }, { "epoch": 0.21137907944279175, "grad_norm": 2.484375, "learning_rate": 4.480264381333951e-05, "loss": 0.459, "step": 4814 }, { "epoch": 0.21146689792199524, "grad_norm": 2.625, "learning_rate": 4.479842024628946e-05, "loss": 0.4863, "step": 4816 }, { "epoch": 0.21155471640119872, "grad_norm": 2.234375, "learning_rate": 4.4794195163071656e-05, "loss": 0.4931, "step": 4818 }, { "epoch": 0.21164253488040222, "grad_norm": 2.15625, "learning_rate": 4.478996856400963e-05, "loss": 0.4681, "step": 4820 }, { "epoch": 0.2117303533596057, "grad_norm": 2.40625, "learning_rate": 4.478574044942707e-05, "loss": 0.5064, "step": 4822 }, { "epoch": 0.2118181718388092, "grad_norm": 2.40625, "learning_rate": 4.478151081964777e-05, "loss": 0.4992, "step": 4824 }, { "epoch": 0.21190599031801266, "grad_norm": 2.75, "learning_rate": 4.477727967499562e-05, "loss": 0.4752, "step": 4826 }, { "epoch": 0.21199380879721616, "grad_norm": 2.4375, "learning_rate": 4.477304701579466e-05, "loss": 0.4972, "step": 4828 }, { "epoch": 0.21208162727641963, "grad_norm": 2.203125, "learning_rate": 4.476881284236901e-05, "loss": 0.4839, "step": 4830 }, { "epoch": 0.21216944575562313, "grad_norm": 2.78125, "learning_rate": 4.4764577155042935e-05, "loss": 0.4915, "step": 4832 }, { "epoch": 0.2122572642348266, "grad_norm": 2.421875, "learning_rate": 4.4760339954140805e-05, "loss": 0.516, "step": 4834 }, { "epoch": 0.2123450827140301, "grad_norm": 2.265625, "learning_rate": 4.475610123998711e-05, "loss": 0.4683, "step": 4836 }, { "epoch": 0.21243290119323358, "grad_norm": 2.5625, "learning_rate": 4.4751861012906445e-05, "loss": 0.4993, "step": 4838 }, { "epoch": 0.21252071967243708, "grad_norm": 2.21875, "learning_rate": 4.4747619273223525e-05, "loss": 0.483, "step": 4840 }, { "epoch": 0.21260853815164055, "grad_norm": 2.203125, "learning_rate": 4.474337602126319e-05, "loss": 0.4766, "step": 4842 }, { "epoch": 0.21269635663084405, "grad_norm": 2.53125, "learning_rate": 4.473913125735038e-05, "loss": 0.4787, "step": 4844 }, { "epoch": 0.21278417511004752, "grad_norm": 2.640625, "learning_rate": 4.4734884981810174e-05, "loss": 0.4699, "step": 4846 }, { "epoch": 0.21287199358925102, "grad_norm": 2.703125, "learning_rate": 4.473063719496774e-05, "loss": 0.4876, "step": 4848 }, { "epoch": 0.21295981206845452, "grad_norm": 2.453125, "learning_rate": 4.472638789714838e-05, "loss": 0.483, "step": 4850 }, { "epoch": 0.213047630547658, "grad_norm": 2.34375, "learning_rate": 4.472213708867751e-05, "loss": 0.5009, "step": 4852 }, { "epoch": 0.2131354490268615, "grad_norm": 2.828125, "learning_rate": 4.471788476988066e-05, "loss": 0.4802, "step": 4854 }, { "epoch": 0.21322326750606496, "grad_norm": 2.453125, "learning_rate": 4.4713630941083454e-05, "loss": 0.4822, "step": 4856 }, { "epoch": 0.21331108598526846, "grad_norm": 2.25, "learning_rate": 4.470937560261167e-05, "loss": 0.4849, "step": 4858 }, { "epoch": 0.21339890446447193, "grad_norm": 2.46875, "learning_rate": 4.4705118754791184e-05, "loss": 0.4776, "step": 4860 }, { "epoch": 0.21348672294367543, "grad_norm": 2.265625, "learning_rate": 4.470086039794797e-05, "loss": 0.4665, "step": 4862 }, { "epoch": 0.2135745414228789, "grad_norm": 2.953125, "learning_rate": 4.469660053240815e-05, "loss": 0.4877, "step": 4864 }, { "epoch": 0.2136623599020824, "grad_norm": 2.8125, "learning_rate": 4.469233915849794e-05, "loss": 0.5137, "step": 4866 }, { "epoch": 0.21375017838128588, "grad_norm": 2.46875, "learning_rate": 4.468807627654368e-05, "loss": 0.498, "step": 4868 }, { "epoch": 0.21383799686048938, "grad_norm": 2.984375, "learning_rate": 4.4683811886871804e-05, "loss": 0.495, "step": 4870 }, { "epoch": 0.21392581533969285, "grad_norm": 2.453125, "learning_rate": 4.467954598980891e-05, "loss": 0.4879, "step": 4872 }, { "epoch": 0.21401363381889635, "grad_norm": 2.578125, "learning_rate": 4.4675278585681665e-05, "loss": 0.4437, "step": 4874 }, { "epoch": 0.21410145229809982, "grad_norm": 2.453125, "learning_rate": 4.467100967481687e-05, "loss": 0.4846, "step": 4876 }, { "epoch": 0.21418927077730332, "grad_norm": 2.484375, "learning_rate": 4.466673925754143e-05, "loss": 0.4913, "step": 4878 }, { "epoch": 0.2142770892565068, "grad_norm": 2.109375, "learning_rate": 4.4662467334182387e-05, "loss": 0.4757, "step": 4880 }, { "epoch": 0.2143649077357103, "grad_norm": 2.40625, "learning_rate": 4.465819390506689e-05, "loss": 0.4906, "step": 4882 }, { "epoch": 0.21445272621491376, "grad_norm": 2.3125, "learning_rate": 4.465391897052218e-05, "loss": 0.4691, "step": 4884 }, { "epoch": 0.21454054469411726, "grad_norm": 2.328125, "learning_rate": 4.4649642530875645e-05, "loss": 0.497, "step": 4886 }, { "epoch": 0.21462836317332074, "grad_norm": 2.515625, "learning_rate": 4.464536458645479e-05, "loss": 0.4595, "step": 4888 }, { "epoch": 0.21471618165252424, "grad_norm": 2.1875, "learning_rate": 4.464108513758719e-05, "loss": 0.4769, "step": 4890 }, { "epoch": 0.2148040001317277, "grad_norm": 2.046875, "learning_rate": 4.4636804184600575e-05, "loss": 0.4988, "step": 4892 }, { "epoch": 0.2148918186109312, "grad_norm": 2.171875, "learning_rate": 4.4632521727822805e-05, "loss": 0.4872, "step": 4894 }, { "epoch": 0.21497963709013468, "grad_norm": 2.71875, "learning_rate": 4.4628237767581814e-05, "loss": 0.4902, "step": 4896 }, { "epoch": 0.21506745556933818, "grad_norm": 2.46875, "learning_rate": 4.462395230420566e-05, "loss": 0.5361, "step": 4898 }, { "epoch": 0.21515527404854168, "grad_norm": 2.609375, "learning_rate": 4.4619665338022545e-05, "loss": 0.5288, "step": 4900 }, { "epoch": 0.21524309252774515, "grad_norm": 2.921875, "learning_rate": 4.461537686936075e-05, "loss": 0.481, "step": 4902 }, { "epoch": 0.21533091100694865, "grad_norm": 2.9375, "learning_rate": 4.461108689854869e-05, "loss": 0.4923, "step": 4904 }, { "epoch": 0.21541872948615212, "grad_norm": 2.4375, "learning_rate": 4.460679542591489e-05, "loss": 0.4724, "step": 4906 }, { "epoch": 0.21550654796535562, "grad_norm": 2.421875, "learning_rate": 4.4602502451788005e-05, "loss": 0.4788, "step": 4908 }, { "epoch": 0.2155943664445591, "grad_norm": 2.4375, "learning_rate": 4.459820797649678e-05, "loss": 0.4707, "step": 4910 }, { "epoch": 0.2156821849237626, "grad_norm": 2.28125, "learning_rate": 4.4593912000370085e-05, "loss": 0.4789, "step": 4912 }, { "epoch": 0.21577000340296607, "grad_norm": 2.59375, "learning_rate": 4.458961452373692e-05, "loss": 0.4999, "step": 4914 }, { "epoch": 0.21585782188216956, "grad_norm": 2.484375, "learning_rate": 4.458531554692638e-05, "loss": 0.4717, "step": 4916 }, { "epoch": 0.21594564036137304, "grad_norm": 2.4375, "learning_rate": 4.458101507026767e-05, "loss": 0.4894, "step": 4918 }, { "epoch": 0.21603345884057654, "grad_norm": 2.09375, "learning_rate": 4.4576713094090146e-05, "loss": 0.4699, "step": 4920 }, { "epoch": 0.21612127731978, "grad_norm": 2.28125, "learning_rate": 4.457240961872323e-05, "loss": 0.4841, "step": 4922 }, { "epoch": 0.2162090957989835, "grad_norm": 2.296875, "learning_rate": 4.45681046444965e-05, "loss": 0.4954, "step": 4924 }, { "epoch": 0.21629691427818698, "grad_norm": 2.984375, "learning_rate": 4.4563798171739626e-05, "loss": 0.4853, "step": 4926 }, { "epoch": 0.21638473275739048, "grad_norm": 2.3125, "learning_rate": 4.455949020078239e-05, "loss": 0.5074, "step": 4928 }, { "epoch": 0.21647255123659395, "grad_norm": 2.328125, "learning_rate": 4.455518073195471e-05, "loss": 0.5, "step": 4930 }, { "epoch": 0.21656036971579745, "grad_norm": 2.546875, "learning_rate": 4.45508697655866e-05, "loss": 0.473, "step": 4932 }, { "epoch": 0.21664818819500092, "grad_norm": 2.421875, "learning_rate": 4.4546557302008195e-05, "loss": 0.4746, "step": 4934 }, { "epoch": 0.21673600667420442, "grad_norm": 2.234375, "learning_rate": 4.454224334154975e-05, "loss": 0.4836, "step": 4936 }, { "epoch": 0.2168238251534079, "grad_norm": 2.34375, "learning_rate": 4.453792788454163e-05, "loss": 0.4828, "step": 4938 }, { "epoch": 0.2169116436326114, "grad_norm": 2.3125, "learning_rate": 4.45336109313143e-05, "loss": 0.5062, "step": 4940 }, { "epoch": 0.21699946211181487, "grad_norm": 2.546875, "learning_rate": 4.452929248219837e-05, "loss": 0.4902, "step": 4942 }, { "epoch": 0.21708728059101837, "grad_norm": 2.515625, "learning_rate": 4.4524972537524535e-05, "loss": 0.4679, "step": 4944 }, { "epoch": 0.21717509907022184, "grad_norm": 2.40625, "learning_rate": 4.4520651097623625e-05, "loss": 0.4979, "step": 4946 }, { "epoch": 0.21726291754942534, "grad_norm": 2.21875, "learning_rate": 4.451632816282657e-05, "loss": 0.4708, "step": 4948 }, { "epoch": 0.2173507360286288, "grad_norm": 2.40625, "learning_rate": 4.4512003733464435e-05, "loss": 0.4937, "step": 4950 }, { "epoch": 0.2174385545078323, "grad_norm": 2.359375, "learning_rate": 4.450767780986837e-05, "loss": 0.47, "step": 4952 }, { "epoch": 0.2175263729870358, "grad_norm": 2.46875, "learning_rate": 4.4503350392369664e-05, "loss": 0.4776, "step": 4954 }, { "epoch": 0.21761419146623928, "grad_norm": 2.40625, "learning_rate": 4.4499021481299705e-05, "loss": 0.4668, "step": 4956 }, { "epoch": 0.21770200994544278, "grad_norm": 2.15625, "learning_rate": 4.449469107699001e-05, "loss": 0.4934, "step": 4958 }, { "epoch": 0.21778982842464625, "grad_norm": 2.421875, "learning_rate": 4.4490359179772204e-05, "loss": 0.4434, "step": 4960 }, { "epoch": 0.21787764690384975, "grad_norm": 2.484375, "learning_rate": 4.4486025789978016e-05, "loss": 0.4831, "step": 4962 }, { "epoch": 0.21796546538305323, "grad_norm": 2.578125, "learning_rate": 4.44816909079393e-05, "loss": 0.4856, "step": 4964 }, { "epoch": 0.21805328386225672, "grad_norm": 2.8125, "learning_rate": 4.4477354533988025e-05, "loss": 0.4619, "step": 4966 }, { "epoch": 0.2181411023414602, "grad_norm": 2.53125, "learning_rate": 4.447301666845628e-05, "loss": 0.5101, "step": 4968 }, { "epoch": 0.2182289208206637, "grad_norm": 2.953125, "learning_rate": 4.4468677311676236e-05, "loss": 0.4775, "step": 4970 }, { "epoch": 0.21831673929986717, "grad_norm": 2.640625, "learning_rate": 4.4464336463980226e-05, "loss": 0.4981, "step": 4972 }, { "epoch": 0.21840455777907067, "grad_norm": 2.140625, "learning_rate": 4.445999412570065e-05, "loss": 0.496, "step": 4974 }, { "epoch": 0.21849237625827414, "grad_norm": 2.125, "learning_rate": 4.445565029717008e-05, "loss": 0.499, "step": 4976 }, { "epoch": 0.21858019473747764, "grad_norm": 2.1875, "learning_rate": 4.445130497872113e-05, "loss": 0.4771, "step": 4978 }, { "epoch": 0.2186680132166811, "grad_norm": 2.375, "learning_rate": 4.4446958170686593e-05, "loss": 0.5265, "step": 4980 }, { "epoch": 0.2187558316958846, "grad_norm": 2.40625, "learning_rate": 4.444260987339933e-05, "loss": 0.4932, "step": 4982 }, { "epoch": 0.21884365017508808, "grad_norm": 2.5, "learning_rate": 4.443826008719235e-05, "loss": 0.4904, "step": 4984 }, { "epoch": 0.21893146865429158, "grad_norm": 2.34375, "learning_rate": 4.4433908812398736e-05, "loss": 0.4713, "step": 4986 }, { "epoch": 0.21901928713349506, "grad_norm": 2.484375, "learning_rate": 4.442955604935174e-05, "loss": 0.5122, "step": 4988 }, { "epoch": 0.21910710561269856, "grad_norm": 2.34375, "learning_rate": 4.4425201798384686e-05, "loss": 0.4549, "step": 4990 }, { "epoch": 0.21919492409190203, "grad_norm": 2.25, "learning_rate": 4.442084605983102e-05, "loss": 0.4765, "step": 4992 }, { "epoch": 0.21928274257110553, "grad_norm": 2.78125, "learning_rate": 4.441648883402431e-05, "loss": 0.5142, "step": 4994 }, { "epoch": 0.219370561050309, "grad_norm": 2.765625, "learning_rate": 4.441213012129822e-05, "loss": 0.4851, "step": 4996 }, { "epoch": 0.2194583795295125, "grad_norm": 2.265625, "learning_rate": 4.4407769921986554e-05, "loss": 0.4862, "step": 4998 }, { "epoch": 0.21954619800871597, "grad_norm": 2.34375, "learning_rate": 4.4403408236423224e-05, "loss": 0.4953, "step": 5000 }, { "epoch": 0.21963401648791947, "grad_norm": 2.453125, "learning_rate": 4.4399045064942236e-05, "loss": 0.478, "step": 5002 }, { "epoch": 0.21972183496712297, "grad_norm": 2.171875, "learning_rate": 4.439468040787772e-05, "loss": 0.5044, "step": 5004 }, { "epoch": 0.21980965344632644, "grad_norm": 2.375, "learning_rate": 4.439031426556394e-05, "loss": 0.5156, "step": 5006 }, { "epoch": 0.21989747192552994, "grad_norm": 2.296875, "learning_rate": 4.438594663833523e-05, "loss": 0.4612, "step": 5008 }, { "epoch": 0.2199852904047334, "grad_norm": 2.203125, "learning_rate": 4.4381577526526094e-05, "loss": 0.4944, "step": 5010 }, { "epoch": 0.2200731088839369, "grad_norm": 2.34375, "learning_rate": 4.43772069304711e-05, "loss": 0.511, "step": 5012 }, { "epoch": 0.22016092736314039, "grad_norm": 2.375, "learning_rate": 4.437283485050495e-05, "loss": 0.4886, "step": 5014 }, { "epoch": 0.22024874584234388, "grad_norm": 2.5625, "learning_rate": 4.436846128696247e-05, "loss": 0.5139, "step": 5016 }, { "epoch": 0.22033656432154736, "grad_norm": 2.28125, "learning_rate": 4.4364086240178584e-05, "loss": 0.4997, "step": 5018 }, { "epoch": 0.22042438280075086, "grad_norm": 2.40625, "learning_rate": 4.435970971048832e-05, "loss": 0.4923, "step": 5020 }, { "epoch": 0.22051220127995433, "grad_norm": 2.703125, "learning_rate": 4.435533169822685e-05, "loss": 0.5019, "step": 5022 }, { "epoch": 0.22060001975915783, "grad_norm": 2.25, "learning_rate": 4.435095220372945e-05, "loss": 0.4697, "step": 5024 }, { "epoch": 0.2206878382383613, "grad_norm": 2.109375, "learning_rate": 4.434657122733148e-05, "loss": 0.4798, "step": 5026 }, { "epoch": 0.2207756567175648, "grad_norm": 2.4375, "learning_rate": 4.4342188769368446e-05, "loss": 0.4827, "step": 5028 }, { "epoch": 0.22086347519676827, "grad_norm": 2.3125, "learning_rate": 4.433780483017597e-05, "loss": 0.4766, "step": 5030 }, { "epoch": 0.22095129367597177, "grad_norm": 2.25, "learning_rate": 4.433341941008975e-05, "loss": 0.502, "step": 5032 }, { "epoch": 0.22103911215517524, "grad_norm": 2.40625, "learning_rate": 4.432903250944565e-05, "loss": 0.4846, "step": 5034 }, { "epoch": 0.22112693063437874, "grad_norm": 2.65625, "learning_rate": 4.43246441285796e-05, "loss": 0.4871, "step": 5036 }, { "epoch": 0.22121474911358222, "grad_norm": 2.0625, "learning_rate": 4.4320254267827675e-05, "loss": 0.4614, "step": 5038 }, { "epoch": 0.22130256759278571, "grad_norm": 2.296875, "learning_rate": 4.4315862927526044e-05, "loss": 0.525, "step": 5040 }, { "epoch": 0.2213903860719892, "grad_norm": 2.296875, "learning_rate": 4.4311470108011e-05, "loss": 0.4742, "step": 5042 }, { "epoch": 0.2214782045511927, "grad_norm": 2.078125, "learning_rate": 4.4307075809618946e-05, "loss": 0.4935, "step": 5044 }, { "epoch": 0.22156602303039616, "grad_norm": 2.484375, "learning_rate": 4.4302680032686395e-05, "loss": 0.4644, "step": 5046 }, { "epoch": 0.22165384150959966, "grad_norm": 2.234375, "learning_rate": 4.429828277754998e-05, "loss": 0.4973, "step": 5048 }, { "epoch": 0.22174165998880313, "grad_norm": 2.140625, "learning_rate": 4.4293884044546455e-05, "loss": 0.4719, "step": 5050 }, { "epoch": 0.22182947846800663, "grad_norm": 2.484375, "learning_rate": 4.428948383401265e-05, "loss": 0.4938, "step": 5052 }, { "epoch": 0.22191729694721013, "grad_norm": 2.375, "learning_rate": 4.4285082146285556e-05, "loss": 0.4923, "step": 5054 }, { "epoch": 0.2220051154264136, "grad_norm": 2.28125, "learning_rate": 4.428067898170225e-05, "loss": 0.4724, "step": 5056 }, { "epoch": 0.2220929339056171, "grad_norm": 2.171875, "learning_rate": 4.427627434059992e-05, "loss": 0.5209, "step": 5058 }, { "epoch": 0.22218075238482057, "grad_norm": 2.421875, "learning_rate": 4.4271868223315884e-05, "loss": 0.4846, "step": 5060 }, { "epoch": 0.22226857086402407, "grad_norm": 2.125, "learning_rate": 4.4267460630187566e-05, "loss": 0.4762, "step": 5062 }, { "epoch": 0.22235638934322755, "grad_norm": 2.328125, "learning_rate": 4.4263051561552485e-05, "loss": 0.4835, "step": 5064 }, { "epoch": 0.22244420782243104, "grad_norm": 2.609375, "learning_rate": 4.42586410177483e-05, "loss": 0.4987, "step": 5066 }, { "epoch": 0.22253202630163452, "grad_norm": 2.3125, "learning_rate": 4.425422899911277e-05, "loss": 0.4749, "step": 5068 }, { "epoch": 0.22261984478083802, "grad_norm": 2.421875, "learning_rate": 4.424981550598376e-05, "loss": 0.5063, "step": 5070 }, { "epoch": 0.2227076632600415, "grad_norm": 2.484375, "learning_rate": 4.4245400538699275e-05, "loss": 0.4963, "step": 5072 }, { "epoch": 0.222795481739245, "grad_norm": 2.359375, "learning_rate": 4.42409840975974e-05, "loss": 0.5134, "step": 5074 }, { "epoch": 0.22288330021844846, "grad_norm": 2.5625, "learning_rate": 4.4236566183016345e-05, "loss": 0.4914, "step": 5076 }, { "epoch": 0.22297111869765196, "grad_norm": 2.328125, "learning_rate": 4.423214679529445e-05, "loss": 0.5302, "step": 5078 }, { "epoch": 0.22305893717685543, "grad_norm": 2.671875, "learning_rate": 4.422772593477014e-05, "loss": 0.4669, "step": 5080 }, { "epoch": 0.22314675565605893, "grad_norm": 2.625, "learning_rate": 4.4223303601781966e-05, "loss": 0.4804, "step": 5082 }, { "epoch": 0.2232345741352624, "grad_norm": 2.3125, "learning_rate": 4.421887979666859e-05, "loss": 0.4832, "step": 5084 }, { "epoch": 0.2233223926144659, "grad_norm": 2.53125, "learning_rate": 4.42144545197688e-05, "loss": 0.4871, "step": 5086 }, { "epoch": 0.22341021109366938, "grad_norm": 2.3125, "learning_rate": 4.421002777142148e-05, "loss": 0.4789, "step": 5088 }, { "epoch": 0.22349802957287287, "grad_norm": 2.3125, "learning_rate": 4.420559955196562e-05, "loss": 0.4875, "step": 5090 }, { "epoch": 0.22358584805207635, "grad_norm": 2.4375, "learning_rate": 4.420116986174034e-05, "loss": 0.48, "step": 5092 }, { "epoch": 0.22367366653127985, "grad_norm": 2.59375, "learning_rate": 4.419673870108488e-05, "loss": 0.4575, "step": 5094 }, { "epoch": 0.22376148501048332, "grad_norm": 3.109375, "learning_rate": 4.419230607033856e-05, "loss": 0.4913, "step": 5096 }, { "epoch": 0.22384930348968682, "grad_norm": 2.328125, "learning_rate": 4.4187871969840844e-05, "loss": 0.4545, "step": 5098 }, { "epoch": 0.2239371219688903, "grad_norm": 2.5625, "learning_rate": 4.418343639993129e-05, "loss": 0.4741, "step": 5100 }, { "epoch": 0.2240249404480938, "grad_norm": 2.296875, "learning_rate": 4.417899936094958e-05, "loss": 0.4738, "step": 5102 }, { "epoch": 0.2241127589272973, "grad_norm": 2.640625, "learning_rate": 4.4174560853235505e-05, "loss": 0.5039, "step": 5104 }, { "epoch": 0.22420057740650076, "grad_norm": 2.359375, "learning_rate": 4.4170120877128964e-05, "loss": 0.4679, "step": 5106 }, { "epoch": 0.22428839588570426, "grad_norm": 2.25, "learning_rate": 4.4165679432969956e-05, "loss": 0.4549, "step": 5108 }, { "epoch": 0.22437621436490773, "grad_norm": 2.578125, "learning_rate": 4.416123652109864e-05, "loss": 0.5007, "step": 5110 }, { "epoch": 0.22446403284411123, "grad_norm": 2.59375, "learning_rate": 4.415679214185523e-05, "loss": 0.4696, "step": 5112 }, { "epoch": 0.2245518513233147, "grad_norm": 2.234375, "learning_rate": 4.415234629558008e-05, "loss": 0.4881, "step": 5114 }, { "epoch": 0.2246396698025182, "grad_norm": 2.359375, "learning_rate": 4.4147898982613675e-05, "loss": 0.4651, "step": 5116 }, { "epoch": 0.22472748828172168, "grad_norm": 2.5, "learning_rate": 4.4143450203296566e-05, "loss": 0.4887, "step": 5118 }, { "epoch": 0.22481530676092518, "grad_norm": 2.78125, "learning_rate": 4.413899995796945e-05, "loss": 0.4802, "step": 5120 }, { "epoch": 0.22490312524012865, "grad_norm": 2.3125, "learning_rate": 4.413454824697313e-05, "loss": 0.4907, "step": 5122 }, { "epoch": 0.22499094371933215, "grad_norm": 2.71875, "learning_rate": 4.4130095070648524e-05, "loss": 0.4741, "step": 5124 }, { "epoch": 0.22507876219853562, "grad_norm": 2.6875, "learning_rate": 4.4125640429336646e-05, "loss": 0.4801, "step": 5126 }, { "epoch": 0.22516658067773912, "grad_norm": 2.3125, "learning_rate": 4.4121184323378636e-05, "loss": 0.4828, "step": 5128 }, { "epoch": 0.2252543991569426, "grad_norm": 2.265625, "learning_rate": 4.411672675311576e-05, "loss": 0.468, "step": 5130 }, { "epoch": 0.2253422176361461, "grad_norm": 2.453125, "learning_rate": 4.4112267718889355e-05, "loss": 0.4707, "step": 5132 }, { "epoch": 0.22543003611534956, "grad_norm": 3.015625, "learning_rate": 4.410780722104091e-05, "loss": 0.5055, "step": 5134 }, { "epoch": 0.22551785459455306, "grad_norm": 2.546875, "learning_rate": 4.410334525991201e-05, "loss": 0.4714, "step": 5136 }, { "epoch": 0.22560567307375654, "grad_norm": 2.953125, "learning_rate": 4.409888183584435e-05, "loss": 0.4737, "step": 5138 }, { "epoch": 0.22569349155296003, "grad_norm": 3.421875, "learning_rate": 4.409441694917973e-05, "loss": 0.4804, "step": 5140 }, { "epoch": 0.2257813100321635, "grad_norm": 2.5, "learning_rate": 4.40899506002601e-05, "loss": 0.4848, "step": 5142 }, { "epoch": 0.225869128511367, "grad_norm": 2.09375, "learning_rate": 4.408548278942747e-05, "loss": 0.4637, "step": 5144 }, { "epoch": 0.22595694699057048, "grad_norm": 2.03125, "learning_rate": 4.408101351702398e-05, "loss": 0.4872, "step": 5146 }, { "epoch": 0.22604476546977398, "grad_norm": 2.296875, "learning_rate": 4.4076542783391925e-05, "loss": 0.4784, "step": 5148 }, { "epoch": 0.22613258394897745, "grad_norm": 2.328125, "learning_rate": 4.4072070588873635e-05, "loss": 0.5011, "step": 5150 }, { "epoch": 0.22622040242818095, "grad_norm": 2.21875, "learning_rate": 4.406759693381161e-05, "loss": 0.4579, "step": 5152 }, { "epoch": 0.22630822090738445, "grad_norm": 2.96875, "learning_rate": 4.4063121818548435e-05, "loss": 0.4964, "step": 5154 }, { "epoch": 0.22639603938658792, "grad_norm": 2.71875, "learning_rate": 4.4058645243426835e-05, "loss": 0.4828, "step": 5156 }, { "epoch": 0.22648385786579142, "grad_norm": 2.5625, "learning_rate": 4.40541672087896e-05, "loss": 0.4821, "step": 5158 }, { "epoch": 0.2265716763449949, "grad_norm": 2.8125, "learning_rate": 4.404968771497968e-05, "loss": 0.4809, "step": 5160 }, { "epoch": 0.2266594948241984, "grad_norm": 2.515625, "learning_rate": 4.404520676234011e-05, "loss": 0.4946, "step": 5162 }, { "epoch": 0.22674731330340187, "grad_norm": 1.9921875, "learning_rate": 4.404072435121404e-05, "loss": 0.46, "step": 5164 }, { "epoch": 0.22683513178260536, "grad_norm": 2.25, "learning_rate": 4.403624048194474e-05, "loss": 0.4687, "step": 5166 }, { "epoch": 0.22692295026180884, "grad_norm": 2.390625, "learning_rate": 4.403175515487557e-05, "loss": 0.494, "step": 5168 }, { "epoch": 0.22701076874101234, "grad_norm": 2.59375, "learning_rate": 4.402726837035002e-05, "loss": 0.4823, "step": 5170 }, { "epoch": 0.2270985872202158, "grad_norm": 2.65625, "learning_rate": 4.402278012871172e-05, "loss": 0.4808, "step": 5172 }, { "epoch": 0.2271864056994193, "grad_norm": 2.546875, "learning_rate": 4.401829043030434e-05, "loss": 0.4776, "step": 5174 }, { "epoch": 0.22727422417862278, "grad_norm": 2.765625, "learning_rate": 4.401379927547172e-05, "loss": 0.4646, "step": 5176 }, { "epoch": 0.22736204265782628, "grad_norm": 2.3125, "learning_rate": 4.40093066645578e-05, "loss": 0.4778, "step": 5178 }, { "epoch": 0.22744986113702975, "grad_norm": 2.265625, "learning_rate": 4.400481259790662e-05, "loss": 0.4713, "step": 5180 }, { "epoch": 0.22753767961623325, "grad_norm": 2.28125, "learning_rate": 4.400031707586234e-05, "loss": 0.4591, "step": 5182 }, { "epoch": 0.22762549809543672, "grad_norm": 2.75, "learning_rate": 4.3995820098769217e-05, "loss": 0.4826, "step": 5184 }, { "epoch": 0.22771331657464022, "grad_norm": 2.53125, "learning_rate": 4.3991321666971636e-05, "loss": 0.4527, "step": 5186 }, { "epoch": 0.2278011350538437, "grad_norm": 2.359375, "learning_rate": 4.3986821780814095e-05, "loss": 0.4742, "step": 5188 }, { "epoch": 0.2278889535330472, "grad_norm": 2.390625, "learning_rate": 4.398232044064118e-05, "loss": 0.464, "step": 5190 }, { "epoch": 0.22797677201225067, "grad_norm": 2.359375, "learning_rate": 4.397781764679762e-05, "loss": 0.4821, "step": 5192 }, { "epoch": 0.22806459049145417, "grad_norm": 2.125, "learning_rate": 4.397331339962824e-05, "loss": 0.5096, "step": 5194 }, { "epoch": 0.22815240897065764, "grad_norm": 2.34375, "learning_rate": 4.396880769947796e-05, "loss": 0.5087, "step": 5196 }, { "epoch": 0.22824022744986114, "grad_norm": 2.125, "learning_rate": 4.396430054669186e-05, "loss": 0.4886, "step": 5198 }, { "epoch": 0.2283280459290646, "grad_norm": 2.5, "learning_rate": 4.395979194161506e-05, "loss": 0.4747, "step": 5200 }, { "epoch": 0.2284158644082681, "grad_norm": 2.71875, "learning_rate": 4.395528188459286e-05, "loss": 0.4934, "step": 5202 }, { "epoch": 0.2285036828874716, "grad_norm": 2.234375, "learning_rate": 4.395077037597062e-05, "loss": 0.4631, "step": 5204 }, { "epoch": 0.22859150136667508, "grad_norm": 2.453125, "learning_rate": 4.394625741609384e-05, "loss": 0.4963, "step": 5206 }, { "epoch": 0.22867931984587858, "grad_norm": 2.15625, "learning_rate": 4.3941743005308136e-05, "loss": 0.4941, "step": 5208 }, { "epoch": 0.22876713832508205, "grad_norm": 2.40625, "learning_rate": 4.39372271439592e-05, "loss": 0.4873, "step": 5210 }, { "epoch": 0.22885495680428555, "grad_norm": 2.765625, "learning_rate": 4.393270983239288e-05, "loss": 0.4807, "step": 5212 }, { "epoch": 0.22894277528348903, "grad_norm": 2.453125, "learning_rate": 4.39281910709551e-05, "loss": 0.5022, "step": 5214 }, { "epoch": 0.22903059376269252, "grad_norm": 2.421875, "learning_rate": 4.3923670859991906e-05, "loss": 0.4848, "step": 5216 }, { "epoch": 0.229118412241896, "grad_norm": 2.28125, "learning_rate": 4.391914919984947e-05, "loss": 0.4556, "step": 5218 }, { "epoch": 0.2292062307210995, "grad_norm": 2.703125, "learning_rate": 4.3914626090874044e-05, "loss": 0.4881, "step": 5220 }, { "epoch": 0.22929404920030297, "grad_norm": 2.46875, "learning_rate": 4.391010153341203e-05, "loss": 0.478, "step": 5222 }, { "epoch": 0.22938186767950647, "grad_norm": 2.328125, "learning_rate": 4.390557552780989e-05, "loss": 0.485, "step": 5224 }, { "epoch": 0.22946968615870994, "grad_norm": 2.3125, "learning_rate": 4.390104807441425e-05, "loss": 0.4779, "step": 5226 }, { "epoch": 0.22955750463791344, "grad_norm": 2.0, "learning_rate": 4.3896519173571824e-05, "loss": 0.4636, "step": 5228 }, { "epoch": 0.2296453231171169, "grad_norm": 2.265625, "learning_rate": 4.389198882562943e-05, "loss": 0.4515, "step": 5230 }, { "epoch": 0.2297331415963204, "grad_norm": 2.546875, "learning_rate": 4.3887457030934e-05, "loss": 0.4524, "step": 5232 }, { "epoch": 0.22982096007552388, "grad_norm": 2.578125, "learning_rate": 4.388292378983258e-05, "loss": 0.4781, "step": 5234 }, { "epoch": 0.22990877855472738, "grad_norm": 2.53125, "learning_rate": 4.387838910267233e-05, "loss": 0.48, "step": 5236 }, { "epoch": 0.22999659703393086, "grad_norm": 2.375, "learning_rate": 4.387385296980052e-05, "loss": 0.4608, "step": 5238 }, { "epoch": 0.23008441551313435, "grad_norm": 2.515625, "learning_rate": 4.3869315391564525e-05, "loss": 0.4516, "step": 5240 }, { "epoch": 0.23017223399233783, "grad_norm": 2.53125, "learning_rate": 4.3864776368311835e-05, "loss": 0.4758, "step": 5242 }, { "epoch": 0.23026005247154133, "grad_norm": 2.28125, "learning_rate": 4.3860235900390046e-05, "loss": 0.474, "step": 5244 }, { "epoch": 0.2303478709507448, "grad_norm": 2.8125, "learning_rate": 4.3855693988146876e-05, "loss": 0.4854, "step": 5246 }, { "epoch": 0.2304356894299483, "grad_norm": 2.390625, "learning_rate": 4.3851150631930124e-05, "loss": 0.4763, "step": 5248 }, { "epoch": 0.23052350790915177, "grad_norm": 2.25, "learning_rate": 4.384660583208776e-05, "loss": 0.4844, "step": 5250 }, { "epoch": 0.23061132638835527, "grad_norm": 2.4375, "learning_rate": 4.3842059588967785e-05, "loss": 0.485, "step": 5252 }, { "epoch": 0.23069914486755877, "grad_norm": 2.546875, "learning_rate": 4.3837511902918384e-05, "loss": 0.4942, "step": 5254 }, { "epoch": 0.23078696334676224, "grad_norm": 2.625, "learning_rate": 4.38329627742878e-05, "loss": 0.4815, "step": 5256 }, { "epoch": 0.23087478182596574, "grad_norm": 2.15625, "learning_rate": 4.382841220342441e-05, "loss": 0.4634, "step": 5258 }, { "epoch": 0.2309626003051692, "grad_norm": 2.390625, "learning_rate": 4.38238601906767e-05, "loss": 0.4528, "step": 5260 }, { "epoch": 0.2310504187843727, "grad_norm": 2.4375, "learning_rate": 4.3819306736393265e-05, "loss": 0.4836, "step": 5262 }, { "epoch": 0.23113823726357619, "grad_norm": 2.390625, "learning_rate": 4.3814751840922816e-05, "loss": 0.4351, "step": 5264 }, { "epoch": 0.23122605574277968, "grad_norm": 2.296875, "learning_rate": 4.381019550461415e-05, "loss": 0.4912, "step": 5266 }, { "epoch": 0.23131387422198316, "grad_norm": 2.21875, "learning_rate": 4.3805637727816205e-05, "loss": 0.4542, "step": 5268 }, { "epoch": 0.23140169270118666, "grad_norm": 2.5625, "learning_rate": 4.3801078510878025e-05, "loss": 0.4752, "step": 5270 }, { "epoch": 0.23148951118039013, "grad_norm": 2.453125, "learning_rate": 4.3796517854148735e-05, "loss": 0.4868, "step": 5272 }, { "epoch": 0.23157732965959363, "grad_norm": 2.734375, "learning_rate": 4.3791955757977604e-05, "loss": 0.4772, "step": 5274 }, { "epoch": 0.2316651481387971, "grad_norm": 2.828125, "learning_rate": 4.3787392222713996e-05, "loss": 0.5021, "step": 5276 }, { "epoch": 0.2317529666180006, "grad_norm": 2.96875, "learning_rate": 4.378282724870739e-05, "loss": 0.4864, "step": 5278 }, { "epoch": 0.23184078509720407, "grad_norm": 3.765625, "learning_rate": 4.3778260836307373e-05, "loss": 0.4819, "step": 5280 }, { "epoch": 0.23192860357640757, "grad_norm": 3.9375, "learning_rate": 4.3773692985863635e-05, "loss": 0.4849, "step": 5282 }, { "epoch": 0.23201642205561104, "grad_norm": 3.921875, "learning_rate": 4.3769123697725986e-05, "loss": 0.4509, "step": 5284 }, { "epoch": 0.23210424053481454, "grad_norm": 2.484375, "learning_rate": 4.376455297224435e-05, "loss": 0.4961, "step": 5286 }, { "epoch": 0.23219205901401802, "grad_norm": 2.359375, "learning_rate": 4.3759980809768756e-05, "loss": 0.4676, "step": 5288 }, { "epoch": 0.23227987749322151, "grad_norm": 3.875, "learning_rate": 4.3755407210649325e-05, "loss": 0.5097, "step": 5290 }, { "epoch": 0.232367695972425, "grad_norm": 3.609375, "learning_rate": 4.375083217523631e-05, "loss": 0.4754, "step": 5292 }, { "epoch": 0.2324555144516285, "grad_norm": 3.453125, "learning_rate": 4.374625570388008e-05, "loss": 0.466, "step": 5294 }, { "epoch": 0.23254333293083196, "grad_norm": 3.28125, "learning_rate": 4.374167779693109e-05, "loss": 0.4691, "step": 5296 }, { "epoch": 0.23263115141003546, "grad_norm": 2.5625, "learning_rate": 4.3737098454739924e-05, "loss": 0.4651, "step": 5298 }, { "epoch": 0.23271896988923893, "grad_norm": 2.78125, "learning_rate": 4.373251767765727e-05, "loss": 0.4742, "step": 5300 }, { "epoch": 0.23280678836844243, "grad_norm": 3.625, "learning_rate": 4.372793546603392e-05, "loss": 0.4641, "step": 5302 }, { "epoch": 0.23289460684764593, "grad_norm": 2.234375, "learning_rate": 4.372335182022078e-05, "loss": 0.4854, "step": 5304 }, { "epoch": 0.2329824253268494, "grad_norm": 2.359375, "learning_rate": 4.371876674056886e-05, "loss": 0.4809, "step": 5306 }, { "epoch": 0.2330702438060529, "grad_norm": 2.078125, "learning_rate": 4.3714180227429316e-05, "loss": 0.4776, "step": 5308 }, { "epoch": 0.23315806228525637, "grad_norm": 2.59375, "learning_rate": 4.370959228115335e-05, "loss": 0.4895, "step": 5310 }, { "epoch": 0.23324588076445987, "grad_norm": 2.515625, "learning_rate": 4.3705002902092326e-05, "loss": 0.4962, "step": 5312 }, { "epoch": 0.23333369924366335, "grad_norm": 2.328125, "learning_rate": 4.37004120905977e-05, "loss": 0.4616, "step": 5314 }, { "epoch": 0.23342151772286684, "grad_norm": 2.296875, "learning_rate": 4.369581984702102e-05, "loss": 0.4799, "step": 5316 }, { "epoch": 0.23350933620207032, "grad_norm": 2.328125, "learning_rate": 4.3691226171713986e-05, "loss": 0.4715, "step": 5318 }, { "epoch": 0.23359715468127382, "grad_norm": 2.5, "learning_rate": 4.3686631065028374e-05, "loss": 0.4639, "step": 5320 }, { "epoch": 0.2336849731604773, "grad_norm": 2.6875, "learning_rate": 4.3682034527316064e-05, "loss": 0.4588, "step": 5322 }, { "epoch": 0.2337727916396808, "grad_norm": 3.328125, "learning_rate": 4.367743655892908e-05, "loss": 0.4905, "step": 5324 }, { "epoch": 0.23386061011888426, "grad_norm": 2.859375, "learning_rate": 4.367283716021953e-05, "loss": 0.4552, "step": 5326 }, { "epoch": 0.23394842859808776, "grad_norm": 2.765625, "learning_rate": 4.366823633153963e-05, "loss": 0.5003, "step": 5328 }, { "epoch": 0.23403624707729123, "grad_norm": 3.078125, "learning_rate": 4.366363407324171e-05, "loss": 0.4557, "step": 5330 }, { "epoch": 0.23412406555649473, "grad_norm": 2.84375, "learning_rate": 4.365903038567822e-05, "loss": 0.4511, "step": 5332 }, { "epoch": 0.2342118840356982, "grad_norm": 2.875, "learning_rate": 4.3654425269201716e-05, "loss": 0.4675, "step": 5334 }, { "epoch": 0.2342997025149017, "grad_norm": 3.203125, "learning_rate": 4.364981872416485e-05, "loss": 0.4511, "step": 5336 }, { "epoch": 0.23438752099410518, "grad_norm": 3.21875, "learning_rate": 4.36452107509204e-05, "loss": 0.4933, "step": 5338 }, { "epoch": 0.23447533947330867, "grad_norm": 2.90625, "learning_rate": 4.364060134982124e-05, "loss": 0.4911, "step": 5340 }, { "epoch": 0.23456315795251215, "grad_norm": 2.796875, "learning_rate": 4.3635990521220355e-05, "loss": 0.5039, "step": 5342 }, { "epoch": 0.23465097643171565, "grad_norm": 3.390625, "learning_rate": 4.363137826547085e-05, "loss": 0.4649, "step": 5344 }, { "epoch": 0.23473879491091912, "grad_norm": 2.203125, "learning_rate": 4.362676458292594e-05, "loss": 0.4797, "step": 5346 }, { "epoch": 0.23482661339012262, "grad_norm": 2.125, "learning_rate": 4.362214947393892e-05, "loss": 0.4704, "step": 5348 }, { "epoch": 0.2349144318693261, "grad_norm": 2.671875, "learning_rate": 4.361753293886324e-05, "loss": 0.4758, "step": 5350 }, { "epoch": 0.2350022503485296, "grad_norm": 2.921875, "learning_rate": 4.361291497805242e-05, "loss": 0.4899, "step": 5352 }, { "epoch": 0.23509006882773306, "grad_norm": 3.03125, "learning_rate": 4.3608295591860105e-05, "loss": 0.4964, "step": 5354 }, { "epoch": 0.23517788730693656, "grad_norm": 2.890625, "learning_rate": 4.360367478064006e-05, "loss": 0.4743, "step": 5356 }, { "epoch": 0.23526570578614006, "grad_norm": 2.71875, "learning_rate": 4.3599052544746136e-05, "loss": 0.4564, "step": 5358 }, { "epoch": 0.23535352426534353, "grad_norm": 2.21875, "learning_rate": 4.359442888453231e-05, "loss": 0.4697, "step": 5360 }, { "epoch": 0.23544134274454703, "grad_norm": 2.84375, "learning_rate": 4.3589803800352666e-05, "loss": 0.4618, "step": 5362 }, { "epoch": 0.2355291612237505, "grad_norm": 2.34375, "learning_rate": 4.3585177292561386e-05, "loss": 0.4454, "step": 5364 }, { "epoch": 0.235616979702954, "grad_norm": 2.4375, "learning_rate": 4.358054936151278e-05, "loss": 0.4874, "step": 5366 }, { "epoch": 0.23570479818215748, "grad_norm": 2.328125, "learning_rate": 4.3575920007561245e-05, "loss": 0.4826, "step": 5368 }, { "epoch": 0.23579261666136098, "grad_norm": 2.421875, "learning_rate": 4.35712892310613e-05, "loss": 0.4856, "step": 5370 }, { "epoch": 0.23588043514056445, "grad_norm": 2.140625, "learning_rate": 4.356665703236758e-05, "loss": 0.4558, "step": 5372 }, { "epoch": 0.23596825361976795, "grad_norm": 2.5625, "learning_rate": 4.356202341183481e-05, "loss": 0.4791, "step": 5374 }, { "epoch": 0.23605607209897142, "grad_norm": 2.9375, "learning_rate": 4.355738836981784e-05, "loss": 0.4504, "step": 5376 }, { "epoch": 0.23614389057817492, "grad_norm": 3.296875, "learning_rate": 4.3552751906671616e-05, "loss": 0.4624, "step": 5378 }, { "epoch": 0.2362317090573784, "grad_norm": 2.65625, "learning_rate": 4.3548114022751206e-05, "loss": 0.4574, "step": 5380 }, { "epoch": 0.2363195275365819, "grad_norm": 2.203125, "learning_rate": 4.354347471841178e-05, "loss": 0.476, "step": 5382 }, { "epoch": 0.23640734601578536, "grad_norm": 2.3125, "learning_rate": 4.3538833994008614e-05, "loss": 0.4668, "step": 5384 }, { "epoch": 0.23649516449498886, "grad_norm": 2.265625, "learning_rate": 4.35341918498971e-05, "loss": 0.4589, "step": 5386 }, { "epoch": 0.23658298297419234, "grad_norm": 2.640625, "learning_rate": 4.3529548286432724e-05, "loss": 0.456, "step": 5388 }, { "epoch": 0.23667080145339583, "grad_norm": 2.140625, "learning_rate": 4.3524903303971104e-05, "loss": 0.4643, "step": 5390 }, { "epoch": 0.2367586199325993, "grad_norm": 2.140625, "learning_rate": 4.352025690286795e-05, "loss": 0.4801, "step": 5392 }, { "epoch": 0.2368464384118028, "grad_norm": 2.1875, "learning_rate": 4.3515609083479066e-05, "loss": 0.4824, "step": 5394 }, { "epoch": 0.23693425689100628, "grad_norm": 2.359375, "learning_rate": 4.351095984616042e-05, "loss": 0.4977, "step": 5396 }, { "epoch": 0.23702207537020978, "grad_norm": 2.4375, "learning_rate": 4.350630919126803e-05, "loss": 0.4508, "step": 5398 }, { "epoch": 0.23710989384941325, "grad_norm": 2.140625, "learning_rate": 4.350165711915803e-05, "loss": 0.453, "step": 5400 }, { "epoch": 0.23719771232861675, "grad_norm": 2.25, "learning_rate": 4.349700363018671e-05, "loss": 0.4878, "step": 5402 }, { "epoch": 0.23728553080782022, "grad_norm": 2.25, "learning_rate": 4.349234872471041e-05, "loss": 0.454, "step": 5404 }, { "epoch": 0.23737334928702372, "grad_norm": 2.546875, "learning_rate": 4.348769240308561e-05, "loss": 0.4635, "step": 5406 }, { "epoch": 0.23746116776622722, "grad_norm": 2.921875, "learning_rate": 4.34830346656689e-05, "loss": 0.4546, "step": 5408 }, { "epoch": 0.2375489862454307, "grad_norm": 3.5, "learning_rate": 4.347837551281696e-05, "loss": 0.4531, "step": 5410 }, { "epoch": 0.2376368047246342, "grad_norm": 3.0, "learning_rate": 4.347371494488659e-05, "loss": 0.4688, "step": 5412 }, { "epoch": 0.23772462320383766, "grad_norm": 2.375, "learning_rate": 4.346905296223471e-05, "loss": 0.4568, "step": 5414 }, { "epoch": 0.23781244168304116, "grad_norm": 3.078125, "learning_rate": 4.346438956521832e-05, "loss": 0.4738, "step": 5416 }, { "epoch": 0.23790026016224464, "grad_norm": 2.90625, "learning_rate": 4.345972475419455e-05, "loss": 0.4641, "step": 5418 }, { "epoch": 0.23798807864144814, "grad_norm": 2.859375, "learning_rate": 4.345505852952064e-05, "loss": 0.4585, "step": 5420 }, { "epoch": 0.2380758971206516, "grad_norm": 2.375, "learning_rate": 4.345039089155392e-05, "loss": 0.4776, "step": 5422 }, { "epoch": 0.2381637155998551, "grad_norm": 2.21875, "learning_rate": 4.344572184065184e-05, "loss": 0.4306, "step": 5424 }, { "epoch": 0.23825153407905858, "grad_norm": 2.15625, "learning_rate": 4.344105137717197e-05, "loss": 0.455, "step": 5426 }, { "epoch": 0.23833935255826208, "grad_norm": 2.28125, "learning_rate": 4.343637950147196e-05, "loss": 0.4614, "step": 5428 }, { "epoch": 0.23842717103746555, "grad_norm": 2.4375, "learning_rate": 4.343170621390958e-05, "loss": 0.453, "step": 5430 }, { "epoch": 0.23851498951666905, "grad_norm": 2.5, "learning_rate": 4.3427031514842733e-05, "loss": 0.4712, "step": 5432 }, { "epoch": 0.23860280799587252, "grad_norm": 2.203125, "learning_rate": 4.3422355404629384e-05, "loss": 0.452, "step": 5434 }, { "epoch": 0.23869062647507602, "grad_norm": 2.25, "learning_rate": 4.3417677883627644e-05, "loss": 0.455, "step": 5436 }, { "epoch": 0.2387784449542795, "grad_norm": 2.3125, "learning_rate": 4.341299895219572e-05, "loss": 0.4595, "step": 5438 }, { "epoch": 0.238866263433483, "grad_norm": 2.0625, "learning_rate": 4.340831861069192e-05, "loss": 0.4738, "step": 5440 }, { "epoch": 0.23895408191268647, "grad_norm": 2.453125, "learning_rate": 4.340363685947467e-05, "loss": 0.4465, "step": 5442 }, { "epoch": 0.23904190039188997, "grad_norm": 2.421875, "learning_rate": 4.33989536989025e-05, "loss": 0.4713, "step": 5444 }, { "epoch": 0.23912971887109344, "grad_norm": 2.15625, "learning_rate": 4.3394269129334044e-05, "loss": 0.4876, "step": 5446 }, { "epoch": 0.23921753735029694, "grad_norm": 2.78125, "learning_rate": 4.338958315112804e-05, "loss": 0.484, "step": 5448 }, { "epoch": 0.2393053558295004, "grad_norm": 2.59375, "learning_rate": 4.338489576464336e-05, "loss": 0.4481, "step": 5450 }, { "epoch": 0.2393931743087039, "grad_norm": 2.46875, "learning_rate": 4.338020697023895e-05, "loss": 0.465, "step": 5452 }, { "epoch": 0.23948099278790738, "grad_norm": 2.234375, "learning_rate": 4.337551676827389e-05, "loss": 0.4872, "step": 5454 }, { "epoch": 0.23956881126711088, "grad_norm": 2.171875, "learning_rate": 4.337082515910734e-05, "loss": 0.488, "step": 5456 }, { "epoch": 0.23965662974631438, "grad_norm": 2.546875, "learning_rate": 4.3366132143098606e-05, "loss": 0.4662, "step": 5458 }, { "epoch": 0.23974444822551785, "grad_norm": 2.65625, "learning_rate": 4.336143772060707e-05, "loss": 0.4747, "step": 5460 }, { "epoch": 0.23983226670472135, "grad_norm": 3.0, "learning_rate": 4.3356741891992226e-05, "loss": 0.4739, "step": 5462 }, { "epoch": 0.23992008518392482, "grad_norm": 2.3125, "learning_rate": 4.335204465761369e-05, "loss": 0.427, "step": 5464 }, { "epoch": 0.24000790366312832, "grad_norm": 2.1875, "learning_rate": 4.334734601783117e-05, "loss": 0.4881, "step": 5466 }, { "epoch": 0.2400957221423318, "grad_norm": 2.046875, "learning_rate": 4.3342645973004504e-05, "loss": 0.4672, "step": 5468 }, { "epoch": 0.2401835406215353, "grad_norm": 2.75, "learning_rate": 4.33379445234936e-05, "loss": 0.468, "step": 5470 }, { "epoch": 0.24027135910073877, "grad_norm": 2.53125, "learning_rate": 4.333324166965852e-05, "loss": 0.491, "step": 5472 }, { "epoch": 0.24035917757994227, "grad_norm": 2.484375, "learning_rate": 4.3328537411859394e-05, "loss": 0.4633, "step": 5474 }, { "epoch": 0.24044699605914574, "grad_norm": 2.828125, "learning_rate": 4.332383175045648e-05, "loss": 0.4597, "step": 5476 }, { "epoch": 0.24053481453834924, "grad_norm": 2.390625, "learning_rate": 4.331912468581013e-05, "loss": 0.453, "step": 5478 }, { "epoch": 0.2406226330175527, "grad_norm": 2.171875, "learning_rate": 4.331441621828083e-05, "loss": 0.4825, "step": 5480 }, { "epoch": 0.2407104514967562, "grad_norm": 2.265625, "learning_rate": 4.330970634822914e-05, "loss": 0.5258, "step": 5482 }, { "epoch": 0.24079826997595968, "grad_norm": 2.46875, "learning_rate": 4.330499507601575e-05, "loss": 0.4577, "step": 5484 }, { "epoch": 0.24088608845516318, "grad_norm": 2.171875, "learning_rate": 4.330028240200146e-05, "loss": 0.4376, "step": 5486 }, { "epoch": 0.24097390693436666, "grad_norm": 2.453125, "learning_rate": 4.3295568326547144e-05, "loss": 0.4581, "step": 5488 }, { "epoch": 0.24106172541357015, "grad_norm": 2.25, "learning_rate": 4.329085285001382e-05, "loss": 0.4796, "step": 5490 }, { "epoch": 0.24114954389277363, "grad_norm": 2.1875, "learning_rate": 4.3286135972762596e-05, "loss": 0.4707, "step": 5492 }, { "epoch": 0.24123736237197713, "grad_norm": 2.296875, "learning_rate": 4.328141769515471e-05, "loss": 0.4675, "step": 5494 }, { "epoch": 0.2413251808511806, "grad_norm": 2.109375, "learning_rate": 4.3276698017551464e-05, "loss": 0.4809, "step": 5496 }, { "epoch": 0.2414129993303841, "grad_norm": 3.109375, "learning_rate": 4.327197694031431e-05, "loss": 0.4557, "step": 5498 }, { "epoch": 0.24150081780958757, "grad_norm": 2.65625, "learning_rate": 4.3267254463804775e-05, "loss": 0.4709, "step": 5500 }, { "epoch": 0.24158863628879107, "grad_norm": 2.375, "learning_rate": 4.326253058838452e-05, "loss": 0.4782, "step": 5502 }, { "epoch": 0.24167645476799454, "grad_norm": 2.171875, "learning_rate": 4.325780531441529e-05, "loss": 0.4655, "step": 5504 }, { "epoch": 0.24176427324719804, "grad_norm": 2.28125, "learning_rate": 4.325307864225895e-05, "loss": 0.4288, "step": 5506 }, { "epoch": 0.24185209172640154, "grad_norm": 2.265625, "learning_rate": 4.324835057227748e-05, "loss": 0.4439, "step": 5508 }, { "epoch": 0.241939910205605, "grad_norm": 2.203125, "learning_rate": 4.324362110483294e-05, "loss": 0.4501, "step": 5510 }, { "epoch": 0.2420277286848085, "grad_norm": 2.390625, "learning_rate": 4.3238890240287536e-05, "loss": 0.4746, "step": 5512 }, { "epoch": 0.24211554716401198, "grad_norm": 2.390625, "learning_rate": 4.323415797900353e-05, "loss": 0.4878, "step": 5514 }, { "epoch": 0.24220336564321548, "grad_norm": 2.609375, "learning_rate": 4.322942432134335e-05, "loss": 0.4404, "step": 5516 }, { "epoch": 0.24229118412241896, "grad_norm": 2.609375, "learning_rate": 4.322468926766947e-05, "loss": 0.4871, "step": 5518 }, { "epoch": 0.24237900260162246, "grad_norm": 2.78125, "learning_rate": 4.321995281834452e-05, "loss": 0.4665, "step": 5520 }, { "epoch": 0.24246682108082593, "grad_norm": 2.5, "learning_rate": 4.3215214973731225e-05, "loss": 0.4825, "step": 5522 }, { "epoch": 0.24255463956002943, "grad_norm": 2.3125, "learning_rate": 4.32104757341924e-05, "loss": 0.4422, "step": 5524 }, { "epoch": 0.2426424580392329, "grad_norm": 2.25, "learning_rate": 4.320573510009097e-05, "loss": 0.4534, "step": 5526 }, { "epoch": 0.2427302765184364, "grad_norm": 2.15625, "learning_rate": 4.320099307178999e-05, "loss": 0.4488, "step": 5528 }, { "epoch": 0.24281809499763987, "grad_norm": 2.203125, "learning_rate": 4.3196249649652585e-05, "loss": 0.4662, "step": 5530 }, { "epoch": 0.24290591347684337, "grad_norm": 2.3125, "learning_rate": 4.319150483404203e-05, "loss": 0.4582, "step": 5532 }, { "epoch": 0.24299373195604684, "grad_norm": 2.1875, "learning_rate": 4.318675862532167e-05, "loss": 0.4754, "step": 5534 }, { "epoch": 0.24308155043525034, "grad_norm": 2.375, "learning_rate": 4.318201102385497e-05, "loss": 0.4612, "step": 5536 }, { "epoch": 0.24316936891445382, "grad_norm": 2.359375, "learning_rate": 4.317726203000552e-05, "loss": 0.4556, "step": 5538 }, { "epoch": 0.24325718739365731, "grad_norm": 2.328125, "learning_rate": 4.317251164413698e-05, "loss": 0.5085, "step": 5540 }, { "epoch": 0.2433450058728608, "grad_norm": 2.171875, "learning_rate": 4.316775986661314e-05, "loss": 0.4796, "step": 5542 }, { "epoch": 0.2434328243520643, "grad_norm": 2.421875, "learning_rate": 4.31630066977979e-05, "loss": 0.4599, "step": 5544 }, { "epoch": 0.24352064283126776, "grad_norm": 2.046875, "learning_rate": 4.315825213805525e-05, "loss": 0.4443, "step": 5546 }, { "epoch": 0.24360846131047126, "grad_norm": 2.1875, "learning_rate": 4.31534961877493e-05, "loss": 0.4585, "step": 5548 }, { "epoch": 0.24369627978967473, "grad_norm": 2.359375, "learning_rate": 4.314873884724425e-05, "loss": 0.4578, "step": 5550 }, { "epoch": 0.24378409826887823, "grad_norm": 2.40625, "learning_rate": 4.3143980116904436e-05, "loss": 0.4922, "step": 5552 }, { "epoch": 0.2438719167480817, "grad_norm": 2.25, "learning_rate": 4.313921999709428e-05, "loss": 0.4907, "step": 5554 }, { "epoch": 0.2439597352272852, "grad_norm": 2.640625, "learning_rate": 4.313445848817831e-05, "loss": 0.4842, "step": 5556 }, { "epoch": 0.2440475537064887, "grad_norm": 2.3125, "learning_rate": 4.312969559052115e-05, "loss": 0.4853, "step": 5558 }, { "epoch": 0.24413537218569217, "grad_norm": 2.4375, "learning_rate": 4.312493130448756e-05, "loss": 0.4377, "step": 5560 }, { "epoch": 0.24422319066489567, "grad_norm": 2.375, "learning_rate": 4.312016563044239e-05, "loss": 0.4781, "step": 5562 }, { "epoch": 0.24431100914409914, "grad_norm": 2.5, "learning_rate": 4.311539856875059e-05, "loss": 0.4625, "step": 5564 }, { "epoch": 0.24439882762330264, "grad_norm": 2.34375, "learning_rate": 4.311063011977723e-05, "loss": 0.4681, "step": 5566 }, { "epoch": 0.24448664610250612, "grad_norm": 2.421875, "learning_rate": 4.3105860283887464e-05, "loss": 0.4618, "step": 5568 }, { "epoch": 0.24457446458170962, "grad_norm": 2.75, "learning_rate": 4.3101089061446585e-05, "loss": 0.4472, "step": 5570 }, { "epoch": 0.2446622830609131, "grad_norm": 2.296875, "learning_rate": 4.3096316452819964e-05, "loss": 0.4859, "step": 5572 }, { "epoch": 0.2447501015401166, "grad_norm": 2.265625, "learning_rate": 4.309154245837309e-05, "loss": 0.4876, "step": 5574 }, { "epoch": 0.24483792001932006, "grad_norm": 2.515625, "learning_rate": 4.308676707847156e-05, "loss": 0.4829, "step": 5576 }, { "epoch": 0.24492573849852356, "grad_norm": 2.203125, "learning_rate": 4.308199031348107e-05, "loss": 0.4657, "step": 5578 }, { "epoch": 0.24501355697772703, "grad_norm": 2.390625, "learning_rate": 4.3077212163767425e-05, "loss": 0.4184, "step": 5580 }, { "epoch": 0.24510137545693053, "grad_norm": 2.515625, "learning_rate": 4.307243262969654e-05, "loss": 0.471, "step": 5582 }, { "epoch": 0.245189193936134, "grad_norm": 2.5625, "learning_rate": 4.306765171163443e-05, "loss": 0.4686, "step": 5584 }, { "epoch": 0.2452770124153375, "grad_norm": 1.9765625, "learning_rate": 4.306286940994723e-05, "loss": 0.48, "step": 5586 }, { "epoch": 0.24536483089454098, "grad_norm": 2.421875, "learning_rate": 4.3058085725001154e-05, "loss": 0.4761, "step": 5588 }, { "epoch": 0.24545264937374447, "grad_norm": 2.03125, "learning_rate": 4.305330065716254e-05, "loss": 0.492, "step": 5590 }, { "epoch": 0.24554046785294795, "grad_norm": 2.28125, "learning_rate": 4.304851420679784e-05, "loss": 0.466, "step": 5592 }, { "epoch": 0.24562828633215145, "grad_norm": 2.15625, "learning_rate": 4.30437263742736e-05, "loss": 0.4475, "step": 5594 }, { "epoch": 0.24571610481135492, "grad_norm": 2.328125, "learning_rate": 4.303893715995646e-05, "loss": 0.459, "step": 5596 }, { "epoch": 0.24580392329055842, "grad_norm": 2.296875, "learning_rate": 4.303414656421319e-05, "loss": 0.4527, "step": 5598 }, { "epoch": 0.2458917417697619, "grad_norm": 2.265625, "learning_rate": 4.302935458741066e-05, "loss": 0.4525, "step": 5600 }, { "epoch": 0.2459795602489654, "grad_norm": 2.53125, "learning_rate": 4.3024561229915826e-05, "loss": 0.4693, "step": 5602 }, { "epoch": 0.24606737872816886, "grad_norm": 2.375, "learning_rate": 4.301976649209577e-05, "loss": 0.4656, "step": 5604 }, { "epoch": 0.24615519720737236, "grad_norm": 2.1875, "learning_rate": 4.301497037431769e-05, "loss": 0.4751, "step": 5606 }, { "epoch": 0.24624301568657586, "grad_norm": 2.5, "learning_rate": 4.3010172876948844e-05, "loss": 0.463, "step": 5608 }, { "epoch": 0.24633083416577933, "grad_norm": 2.203125, "learning_rate": 4.300537400035665e-05, "loss": 0.4701, "step": 5610 }, { "epoch": 0.24641865264498283, "grad_norm": 2.1875, "learning_rate": 4.30005737449086e-05, "loss": 0.4647, "step": 5612 }, { "epoch": 0.2465064711241863, "grad_norm": 2.203125, "learning_rate": 4.2995772110972296e-05, "loss": 0.4869, "step": 5614 }, { "epoch": 0.2465942896033898, "grad_norm": 2.328125, "learning_rate": 4.299096909891545e-05, "loss": 0.445, "step": 5616 }, { "epoch": 0.24668210808259328, "grad_norm": 2.296875, "learning_rate": 4.2986164709105877e-05, "loss": 0.475, "step": 5618 }, { "epoch": 0.24676992656179678, "grad_norm": 2.1875, "learning_rate": 4.29813589419115e-05, "loss": 0.4494, "step": 5620 }, { "epoch": 0.24685774504100025, "grad_norm": 2.265625, "learning_rate": 4.2976551797700336e-05, "loss": 0.4499, "step": 5622 }, { "epoch": 0.24694556352020375, "grad_norm": 2.5, "learning_rate": 4.297174327684054e-05, "loss": 0.4804, "step": 5624 }, { "epoch": 0.24703338199940722, "grad_norm": 2.421875, "learning_rate": 4.296693337970033e-05, "loss": 0.4644, "step": 5626 }, { "epoch": 0.24712120047861072, "grad_norm": 2.625, "learning_rate": 4.296212210664805e-05, "loss": 0.4614, "step": 5628 }, { "epoch": 0.2472090189578142, "grad_norm": 2.828125, "learning_rate": 4.2957309458052156e-05, "loss": 0.4825, "step": 5630 }, { "epoch": 0.2472968374370177, "grad_norm": 2.390625, "learning_rate": 4.2952495434281204e-05, "loss": 0.4424, "step": 5632 }, { "epoch": 0.24738465591622116, "grad_norm": 2.671875, "learning_rate": 4.294768003570384e-05, "loss": 0.486, "step": 5634 }, { "epoch": 0.24747247439542466, "grad_norm": 2.078125, "learning_rate": 4.294286326268885e-05, "loss": 0.4528, "step": 5636 }, { "epoch": 0.24756029287462814, "grad_norm": 2.46875, "learning_rate": 4.2938045115605074e-05, "loss": 0.4554, "step": 5638 }, { "epoch": 0.24764811135383163, "grad_norm": 2.234375, "learning_rate": 4.2933225594821514e-05, "loss": 0.4572, "step": 5640 }, { "epoch": 0.2477359298330351, "grad_norm": 2.15625, "learning_rate": 4.292840470070724e-05, "loss": 0.4692, "step": 5642 }, { "epoch": 0.2478237483122386, "grad_norm": 2.21875, "learning_rate": 4.2923582433631424e-05, "loss": 0.4767, "step": 5644 }, { "epoch": 0.24791156679144208, "grad_norm": 2.484375, "learning_rate": 4.291875879396338e-05, "loss": 0.4665, "step": 5646 }, { "epoch": 0.24799938527064558, "grad_norm": 2.53125, "learning_rate": 4.291393378207249e-05, "loss": 0.4811, "step": 5648 }, { "epoch": 0.24808720374984905, "grad_norm": 2.09375, "learning_rate": 4.290910739832825e-05, "loss": 0.4653, "step": 5650 }, { "epoch": 0.24817502222905255, "grad_norm": 2.234375, "learning_rate": 4.2904279643100276e-05, "loss": 0.4598, "step": 5652 }, { "epoch": 0.24826284070825602, "grad_norm": 2.390625, "learning_rate": 4.2899450516758275e-05, "loss": 0.4546, "step": 5654 }, { "epoch": 0.24835065918745952, "grad_norm": 2.796875, "learning_rate": 4.289462001967207e-05, "loss": 0.4407, "step": 5656 }, { "epoch": 0.24843847766666302, "grad_norm": 3.125, "learning_rate": 4.288978815221157e-05, "loss": 0.466, "step": 5658 }, { "epoch": 0.2485262961458665, "grad_norm": 3.328125, "learning_rate": 4.28849549147468e-05, "loss": 0.4913, "step": 5660 }, { "epoch": 0.24861411462507, "grad_norm": 3.484375, "learning_rate": 4.28801203076479e-05, "loss": 0.4755, "step": 5662 }, { "epoch": 0.24870193310427346, "grad_norm": 3.09375, "learning_rate": 4.2875284331285105e-05, "loss": 0.4563, "step": 5664 }, { "epoch": 0.24878975158347696, "grad_norm": 2.671875, "learning_rate": 4.287044698602874e-05, "loss": 0.4635, "step": 5666 }, { "epoch": 0.24887757006268044, "grad_norm": 2.203125, "learning_rate": 4.286560827224927e-05, "loss": 0.4683, "step": 5668 }, { "epoch": 0.24896538854188394, "grad_norm": 2.15625, "learning_rate": 4.286076819031723e-05, "loss": 0.4425, "step": 5670 }, { "epoch": 0.2490532070210874, "grad_norm": 2.390625, "learning_rate": 4.285592674060328e-05, "loss": 0.4542, "step": 5672 }, { "epoch": 0.2491410255002909, "grad_norm": 2.53125, "learning_rate": 4.2851083923478186e-05, "loss": 0.4687, "step": 5674 }, { "epoch": 0.24922884397949438, "grad_norm": 2.140625, "learning_rate": 4.2846239739312796e-05, "loss": 0.4361, "step": 5676 }, { "epoch": 0.24931666245869788, "grad_norm": 2.21875, "learning_rate": 4.28413941884781e-05, "loss": 0.4518, "step": 5678 }, { "epoch": 0.24940448093790135, "grad_norm": 2.53125, "learning_rate": 4.283654727134515e-05, "loss": 0.47, "step": 5680 }, { "epoch": 0.24949229941710485, "grad_norm": 3.09375, "learning_rate": 4.2831698988285144e-05, "loss": 0.4626, "step": 5682 }, { "epoch": 0.24958011789630832, "grad_norm": 2.828125, "learning_rate": 4.282684933966935e-05, "loss": 0.4662, "step": 5684 }, { "epoch": 0.24966793637551182, "grad_norm": 2.296875, "learning_rate": 4.282199832586916e-05, "loss": 0.4362, "step": 5686 }, { "epoch": 0.2497557548547153, "grad_norm": 2.328125, "learning_rate": 4.2817145947256064e-05, "loss": 0.4661, "step": 5688 }, { "epoch": 0.2498435733339188, "grad_norm": 2.40625, "learning_rate": 4.281229220420167e-05, "loss": 0.4632, "step": 5690 }, { "epoch": 0.24993139181312227, "grad_norm": 2.40625, "learning_rate": 4.2807437097077654e-05, "loss": 0.4642, "step": 5692 }, { "epoch": 0.25001921029232577, "grad_norm": 2.28125, "learning_rate": 4.280258062625585e-05, "loss": 0.4546, "step": 5694 }, { "epoch": 0.25010702877152924, "grad_norm": 2.265625, "learning_rate": 4.279772279210814e-05, "loss": 0.441, "step": 5696 }, { "epoch": 0.2501948472507327, "grad_norm": 2.1875, "learning_rate": 4.279286359500657e-05, "loss": 0.4551, "step": 5698 }, { "epoch": 0.25028266572993624, "grad_norm": 2.359375, "learning_rate": 4.2788003035323225e-05, "loss": 0.4417, "step": 5700 }, { "epoch": 0.2503704842091397, "grad_norm": 2.078125, "learning_rate": 4.278314111343035e-05, "loss": 0.4275, "step": 5702 }, { "epoch": 0.2504583026883432, "grad_norm": 2.15625, "learning_rate": 4.277827782970026e-05, "loss": 0.4649, "step": 5704 }, { "epoch": 0.25054612116754665, "grad_norm": 2.453125, "learning_rate": 4.277341318450541e-05, "loss": 0.4618, "step": 5706 }, { "epoch": 0.2506339396467502, "grad_norm": 2.4375, "learning_rate": 4.27685471782183e-05, "loss": 0.4681, "step": 5708 }, { "epoch": 0.25072175812595365, "grad_norm": 2.21875, "learning_rate": 4.276367981121159e-05, "loss": 0.4521, "step": 5710 }, { "epoch": 0.2508095766051571, "grad_norm": 2.4375, "learning_rate": 4.275881108385802e-05, "loss": 0.455, "step": 5712 }, { "epoch": 0.25089739508436065, "grad_norm": 2.109375, "learning_rate": 4.275394099653045e-05, "loss": 0.4738, "step": 5714 }, { "epoch": 0.2509852135635641, "grad_norm": 2.109375, "learning_rate": 4.2749069549601816e-05, "loss": 0.4609, "step": 5716 }, { "epoch": 0.2510730320427676, "grad_norm": 2.171875, "learning_rate": 4.274419674344519e-05, "loss": 0.4693, "step": 5718 }, { "epoch": 0.25116085052197107, "grad_norm": 2.40625, "learning_rate": 4.273932257843371e-05, "loss": 0.4611, "step": 5720 }, { "epoch": 0.2512486690011746, "grad_norm": 2.1875, "learning_rate": 4.273444705494066e-05, "loss": 0.4809, "step": 5722 }, { "epoch": 0.25133648748037807, "grad_norm": 2.390625, "learning_rate": 4.272957017333941e-05, "loss": 0.446, "step": 5724 }, { "epoch": 0.25142430595958154, "grad_norm": 2.265625, "learning_rate": 4.2724691934003414e-05, "loss": 0.463, "step": 5726 }, { "epoch": 0.251512124438785, "grad_norm": 2.203125, "learning_rate": 4.271981233730626e-05, "loss": 0.4614, "step": 5728 }, { "epoch": 0.25159994291798854, "grad_norm": 2.15625, "learning_rate": 4.271493138362165e-05, "loss": 0.4615, "step": 5730 }, { "epoch": 0.251687761397192, "grad_norm": 2.40625, "learning_rate": 4.271004907332333e-05, "loss": 0.4866, "step": 5732 }, { "epoch": 0.2517755798763955, "grad_norm": 2.453125, "learning_rate": 4.270516540678521e-05, "loss": 0.473, "step": 5734 }, { "epoch": 0.25186339835559896, "grad_norm": 2.5625, "learning_rate": 4.2700280384381275e-05, "loss": 0.4526, "step": 5736 }, { "epoch": 0.2519512168348025, "grad_norm": 2.5, "learning_rate": 4.269539400648563e-05, "loss": 0.4582, "step": 5738 }, { "epoch": 0.25203903531400595, "grad_norm": 2.328125, "learning_rate": 4.269050627347247e-05, "loss": 0.4638, "step": 5740 }, { "epoch": 0.2521268537932094, "grad_norm": 1.9609375, "learning_rate": 4.2685617185716104e-05, "loss": 0.4635, "step": 5742 }, { "epoch": 0.2522146722724129, "grad_norm": 2.15625, "learning_rate": 4.268072674359093e-05, "loss": 0.4475, "step": 5744 }, { "epoch": 0.2523024907516164, "grad_norm": 2.375, "learning_rate": 4.267583494747146e-05, "loss": 0.4297, "step": 5746 }, { "epoch": 0.2523903092308199, "grad_norm": 2.421875, "learning_rate": 4.267094179773232e-05, "loss": 0.4615, "step": 5748 }, { "epoch": 0.25247812771002337, "grad_norm": 2.515625, "learning_rate": 4.266604729474821e-05, "loss": 0.4567, "step": 5750 }, { "epoch": 0.25256594618922684, "grad_norm": 2.390625, "learning_rate": 4.2661151438893974e-05, "loss": 0.4569, "step": 5752 }, { "epoch": 0.25265376466843037, "grad_norm": 2.390625, "learning_rate": 4.265625423054452e-05, "loss": 0.4475, "step": 5754 }, { "epoch": 0.25274158314763384, "grad_norm": 2.4375, "learning_rate": 4.265135567007489e-05, "loss": 0.4421, "step": 5756 }, { "epoch": 0.2528294016268373, "grad_norm": 2.609375, "learning_rate": 4.264645575786021e-05, "loss": 0.4542, "step": 5758 }, { "epoch": 0.25291722010604084, "grad_norm": 2.109375, "learning_rate": 4.264155449427572e-05, "loss": 0.4345, "step": 5760 }, { "epoch": 0.2530050385852443, "grad_norm": 2.21875, "learning_rate": 4.263665187969675e-05, "loss": 0.4468, "step": 5762 }, { "epoch": 0.2530928570644478, "grad_norm": 2.25, "learning_rate": 4.263174791449875e-05, "loss": 0.4696, "step": 5764 }, { "epoch": 0.25318067554365126, "grad_norm": 2.21875, "learning_rate": 4.262684259905728e-05, "loss": 0.4805, "step": 5766 }, { "epoch": 0.2532684940228548, "grad_norm": 2.359375, "learning_rate": 4.2621935933747974e-05, "loss": 0.4899, "step": 5768 }, { "epoch": 0.25335631250205826, "grad_norm": 2.203125, "learning_rate": 4.261702791894659e-05, "loss": 0.4855, "step": 5770 }, { "epoch": 0.25344413098126173, "grad_norm": 2.203125, "learning_rate": 4.261211855502898e-05, "loss": 0.4947, "step": 5772 }, { "epoch": 0.2535319494604652, "grad_norm": 2.140625, "learning_rate": 4.260720784237111e-05, "loss": 0.457, "step": 5774 }, { "epoch": 0.2536197679396687, "grad_norm": 2.71875, "learning_rate": 4.260229578134904e-05, "loss": 0.4763, "step": 5776 }, { "epoch": 0.2537075864188722, "grad_norm": 2.28125, "learning_rate": 4.259738237233896e-05, "loss": 0.4496, "step": 5778 }, { "epoch": 0.25379540489807567, "grad_norm": 2.140625, "learning_rate": 4.2592467615717105e-05, "loss": 0.4484, "step": 5780 }, { "epoch": 0.25388322337727914, "grad_norm": 2.109375, "learning_rate": 4.258755151185986e-05, "loss": 0.4883, "step": 5782 }, { "epoch": 0.25397104185648267, "grad_norm": 2.53125, "learning_rate": 4.25826340611437e-05, "loss": 0.4727, "step": 5784 }, { "epoch": 0.25405886033568614, "grad_norm": 2.46875, "learning_rate": 4.2577715263945216e-05, "loss": 0.4786, "step": 5786 }, { "epoch": 0.2541466788148896, "grad_norm": 2.53125, "learning_rate": 4.2572795120641086e-05, "loss": 0.4564, "step": 5788 }, { "epoch": 0.2542344972940931, "grad_norm": 2.34375, "learning_rate": 4.256787363160809e-05, "loss": 0.477, "step": 5790 }, { "epoch": 0.2543223157732966, "grad_norm": 2.234375, "learning_rate": 4.256295079722311e-05, "loss": 0.4591, "step": 5792 }, { "epoch": 0.2544101342525001, "grad_norm": 2.796875, "learning_rate": 4.255802661786316e-05, "loss": 0.4847, "step": 5794 }, { "epoch": 0.25449795273170356, "grad_norm": 2.96875, "learning_rate": 4.2553101093905325e-05, "loss": 0.4582, "step": 5796 }, { "epoch": 0.25458577121090703, "grad_norm": 2.640625, "learning_rate": 4.2548174225726795e-05, "loss": 0.4743, "step": 5798 }, { "epoch": 0.25467358969011056, "grad_norm": 2.3125, "learning_rate": 4.254324601370487e-05, "loss": 0.4626, "step": 5800 }, { "epoch": 0.25476140816931403, "grad_norm": 2.703125, "learning_rate": 4.253831645821697e-05, "loss": 0.453, "step": 5802 }, { "epoch": 0.2548492266485175, "grad_norm": 2.3125, "learning_rate": 4.253338555964059e-05, "loss": 0.455, "step": 5804 }, { "epoch": 0.254937045127721, "grad_norm": 2.265625, "learning_rate": 4.252845331835333e-05, "loss": 0.447, "step": 5806 }, { "epoch": 0.2550248636069245, "grad_norm": 2.265625, "learning_rate": 4.252351973473293e-05, "loss": 0.4533, "step": 5808 }, { "epoch": 0.255112682086128, "grad_norm": 2.234375, "learning_rate": 4.251858480915718e-05, "loss": 0.4477, "step": 5810 }, { "epoch": 0.25520050056533145, "grad_norm": 2.359375, "learning_rate": 4.2513648542004006e-05, "loss": 0.4466, "step": 5812 }, { "epoch": 0.255288319044535, "grad_norm": 2.296875, "learning_rate": 4.250871093365143e-05, "loss": 0.4656, "step": 5814 }, { "epoch": 0.25537613752373844, "grad_norm": 2.390625, "learning_rate": 4.250377198447757e-05, "loss": 0.4657, "step": 5816 }, { "epoch": 0.2554639560029419, "grad_norm": 2.1875, "learning_rate": 4.249883169486066e-05, "loss": 0.4741, "step": 5818 }, { "epoch": 0.2555517744821454, "grad_norm": 2.5625, "learning_rate": 4.249389006517902e-05, "loss": 0.4608, "step": 5820 }, { "epoch": 0.2556395929613489, "grad_norm": 2.125, "learning_rate": 4.248894709581108e-05, "loss": 0.4406, "step": 5822 }, { "epoch": 0.2557274114405524, "grad_norm": 2.421875, "learning_rate": 4.248400278713539e-05, "loss": 0.4559, "step": 5824 }, { "epoch": 0.25581522991975586, "grad_norm": 2.984375, "learning_rate": 4.247905713953057e-05, "loss": 0.441, "step": 5826 }, { "epoch": 0.25590304839895933, "grad_norm": 2.859375, "learning_rate": 4.247411015337537e-05, "loss": 0.464, "step": 5828 }, { "epoch": 0.25599086687816286, "grad_norm": 2.578125, "learning_rate": 4.246916182904862e-05, "loss": 0.4813, "step": 5830 }, { "epoch": 0.25607868535736633, "grad_norm": 2.515625, "learning_rate": 4.246421216692926e-05, "loss": 0.4666, "step": 5832 }, { "epoch": 0.2561665038365698, "grad_norm": 2.34375, "learning_rate": 4.245926116739636e-05, "loss": 0.4726, "step": 5834 }, { "epoch": 0.2562543223157733, "grad_norm": 2.28125, "learning_rate": 4.245430883082905e-05, "loss": 0.4512, "step": 5836 }, { "epoch": 0.2563421407949768, "grad_norm": 3.1875, "learning_rate": 4.2449355157606584e-05, "loss": 0.4321, "step": 5838 }, { "epoch": 0.2564299592741803, "grad_norm": 2.875, "learning_rate": 4.244440014810832e-05, "loss": 0.4447, "step": 5840 }, { "epoch": 0.25651777775338375, "grad_norm": 3.125, "learning_rate": 4.243944380271372e-05, "loss": 0.4621, "step": 5842 }, { "epoch": 0.2566055962325872, "grad_norm": 2.625, "learning_rate": 4.243448612180232e-05, "loss": 0.445, "step": 5844 }, { "epoch": 0.25669341471179075, "grad_norm": 2.875, "learning_rate": 4.242952710575379e-05, "loss": 0.4493, "step": 5846 }, { "epoch": 0.2567812331909942, "grad_norm": 3.1875, "learning_rate": 4.24245667549479e-05, "loss": 0.472, "step": 5848 }, { "epoch": 0.2568690516701977, "grad_norm": 3.671875, "learning_rate": 4.241960506976452e-05, "loss": 0.4749, "step": 5850 }, { "epoch": 0.25695687014940116, "grad_norm": 3.140625, "learning_rate": 4.2414642050583595e-05, "loss": 0.4881, "step": 5852 }, { "epoch": 0.2570446886286047, "grad_norm": 3.03125, "learning_rate": 4.240967769778522e-05, "loss": 0.4714, "step": 5854 }, { "epoch": 0.25713250710780816, "grad_norm": 2.234375, "learning_rate": 4.240471201174955e-05, "loss": 0.424, "step": 5856 }, { "epoch": 0.25722032558701163, "grad_norm": 2.28125, "learning_rate": 4.239974499285686e-05, "loss": 0.4458, "step": 5858 }, { "epoch": 0.25730814406621516, "grad_norm": 2.90625, "learning_rate": 4.2394776641487525e-05, "loss": 0.4459, "step": 5860 }, { "epoch": 0.25739596254541863, "grad_norm": 2.515625, "learning_rate": 4.238980695802203e-05, "loss": 0.4279, "step": 5862 }, { "epoch": 0.2574837810246221, "grad_norm": 2.84375, "learning_rate": 4.238483594284094e-05, "loss": 0.471, "step": 5864 }, { "epoch": 0.2575715995038256, "grad_norm": 3.03125, "learning_rate": 4.2379863596324953e-05, "loss": 0.4962, "step": 5866 }, { "epoch": 0.2576594179830291, "grad_norm": 2.859375, "learning_rate": 4.2374889918854846e-05, "loss": 0.4755, "step": 5868 }, { "epoch": 0.2577472364622326, "grad_norm": 3.265625, "learning_rate": 4.236991491081151e-05, "loss": 0.4539, "step": 5870 }, { "epoch": 0.25783505494143605, "grad_norm": 2.546875, "learning_rate": 4.236493857257591e-05, "loss": 0.4678, "step": 5872 }, { "epoch": 0.2579228734206395, "grad_norm": 2.65625, "learning_rate": 4.235996090452916e-05, "loss": 0.4569, "step": 5874 }, { "epoch": 0.25801069189984305, "grad_norm": 2.9375, "learning_rate": 4.2354981907052446e-05, "loss": 0.4773, "step": 5876 }, { "epoch": 0.2580985103790465, "grad_norm": 2.46875, "learning_rate": 4.2350001580527057e-05, "loss": 0.4491, "step": 5878 }, { "epoch": 0.25818632885825, "grad_norm": 1.9765625, "learning_rate": 4.234501992533438e-05, "loss": 0.4411, "step": 5880 }, { "epoch": 0.25827414733745346, "grad_norm": 2.0, "learning_rate": 4.2340036941855924e-05, "loss": 0.4516, "step": 5882 }, { "epoch": 0.258361965816657, "grad_norm": 2.15625, "learning_rate": 4.233505263047328e-05, "loss": 0.4443, "step": 5884 }, { "epoch": 0.25844978429586046, "grad_norm": 2.15625, "learning_rate": 4.233006699156816e-05, "loss": 0.4445, "step": 5886 }, { "epoch": 0.25853760277506393, "grad_norm": 2.65625, "learning_rate": 4.232508002552235e-05, "loss": 0.458, "step": 5888 }, { "epoch": 0.2586254212542674, "grad_norm": 2.234375, "learning_rate": 4.232009173271776e-05, "loss": 0.4336, "step": 5890 }, { "epoch": 0.25871323973347093, "grad_norm": 2.015625, "learning_rate": 4.231510211353639e-05, "loss": 0.4569, "step": 5892 }, { "epoch": 0.2588010582126744, "grad_norm": 2.046875, "learning_rate": 4.2310111168360345e-05, "loss": 0.4377, "step": 5894 }, { "epoch": 0.2588888766918779, "grad_norm": 2.078125, "learning_rate": 4.230511889757184e-05, "loss": 0.4822, "step": 5896 }, { "epoch": 0.25897669517108135, "grad_norm": 2.0, "learning_rate": 4.230012530155318e-05, "loss": 0.4582, "step": 5898 }, { "epoch": 0.2590645136502849, "grad_norm": 2.078125, "learning_rate": 4.229513038068678e-05, "loss": 0.4536, "step": 5900 }, { "epoch": 0.25915233212948835, "grad_norm": 2.203125, "learning_rate": 4.229013413535515e-05, "loss": 0.4522, "step": 5902 }, { "epoch": 0.2592401506086918, "grad_norm": 2.328125, "learning_rate": 4.228513656594091e-05, "loss": 0.4709, "step": 5904 }, { "epoch": 0.2593279690878953, "grad_norm": 2.171875, "learning_rate": 4.228013767282676e-05, "loss": 0.4666, "step": 5906 }, { "epoch": 0.2594157875670988, "grad_norm": 2.484375, "learning_rate": 4.227513745639553e-05, "loss": 0.4448, "step": 5908 }, { "epoch": 0.2595036060463023, "grad_norm": 2.359375, "learning_rate": 4.227013591703012e-05, "loss": 0.4491, "step": 5910 }, { "epoch": 0.25959142452550577, "grad_norm": 2.328125, "learning_rate": 4.226513305511357e-05, "loss": 0.4451, "step": 5912 }, { "epoch": 0.2596792430047093, "grad_norm": 2.09375, "learning_rate": 4.226012887102899e-05, "loss": 0.4603, "step": 5914 }, { "epoch": 0.25976706148391276, "grad_norm": 2.328125, "learning_rate": 4.225512336515961e-05, "loss": 0.4343, "step": 5916 }, { "epoch": 0.25985487996311624, "grad_norm": 2.109375, "learning_rate": 4.225011653788874e-05, "loss": 0.465, "step": 5918 }, { "epoch": 0.2599426984423197, "grad_norm": 2.3125, "learning_rate": 4.224510838959981e-05, "loss": 0.4542, "step": 5920 }, { "epoch": 0.26003051692152324, "grad_norm": 2.25, "learning_rate": 4.2240098920676353e-05, "loss": 0.4268, "step": 5922 }, { "epoch": 0.2601183354007267, "grad_norm": 2.125, "learning_rate": 4.223508813150198e-05, "loss": 0.476, "step": 5924 }, { "epoch": 0.2602061538799302, "grad_norm": 2.265625, "learning_rate": 4.223007602246043e-05, "loss": 0.4375, "step": 5926 }, { "epoch": 0.26029397235913365, "grad_norm": 2.25, "learning_rate": 4.2225062593935524e-05, "loss": 0.4468, "step": 5928 }, { "epoch": 0.2603817908383372, "grad_norm": 2.765625, "learning_rate": 4.22200478463112e-05, "loss": 0.4386, "step": 5930 }, { "epoch": 0.26046960931754065, "grad_norm": 2.359375, "learning_rate": 4.221503177997148e-05, "loss": 0.4495, "step": 5932 }, { "epoch": 0.2605574277967441, "grad_norm": 2.34375, "learning_rate": 4.221001439530051e-05, "loss": 0.4712, "step": 5934 }, { "epoch": 0.2606452462759476, "grad_norm": 2.15625, "learning_rate": 4.2204995692682504e-05, "loss": 0.4401, "step": 5936 }, { "epoch": 0.2607330647551511, "grad_norm": 2.203125, "learning_rate": 4.21999756725018e-05, "loss": 0.4546, "step": 5938 }, { "epoch": 0.2608208832343546, "grad_norm": 2.359375, "learning_rate": 4.219495433514284e-05, "loss": 0.4559, "step": 5940 }, { "epoch": 0.26090870171355807, "grad_norm": 3.0, "learning_rate": 4.2189931680990155e-05, "loss": 0.444, "step": 5942 }, { "epoch": 0.26099652019276154, "grad_norm": 3.0, "learning_rate": 4.2184907710428375e-05, "loss": 0.4885, "step": 5944 }, { "epoch": 0.26108433867196507, "grad_norm": 2.421875, "learning_rate": 4.217988242384225e-05, "loss": 0.468, "step": 5946 }, { "epoch": 0.26117215715116854, "grad_norm": 2.703125, "learning_rate": 4.217485582161661e-05, "loss": 0.4635, "step": 5948 }, { "epoch": 0.261259975630372, "grad_norm": 2.625, "learning_rate": 4.2169827904136396e-05, "loss": 0.4728, "step": 5950 }, { "epoch": 0.2613477941095755, "grad_norm": 2.640625, "learning_rate": 4.216479867178664e-05, "loss": 0.4572, "step": 5952 }, { "epoch": 0.261435612588779, "grad_norm": 2.5, "learning_rate": 4.215976812495249e-05, "loss": 0.4628, "step": 5954 }, { "epoch": 0.2615234310679825, "grad_norm": 2.5625, "learning_rate": 4.2154736264019184e-05, "loss": 0.444, "step": 5956 }, { "epoch": 0.26161124954718595, "grad_norm": 2.53125, "learning_rate": 4.214970308937206e-05, "loss": 0.4545, "step": 5958 }, { "epoch": 0.2616990680263894, "grad_norm": 2.25, "learning_rate": 4.2144668601396566e-05, "loss": 0.4274, "step": 5960 }, { "epoch": 0.26178688650559295, "grad_norm": 2.59375, "learning_rate": 4.2139632800478234e-05, "loss": 0.4839, "step": 5962 }, { "epoch": 0.2618747049847964, "grad_norm": 2.6875, "learning_rate": 4.213459568700273e-05, "loss": 0.4527, "step": 5964 }, { "epoch": 0.2619625234639999, "grad_norm": 2.9375, "learning_rate": 4.212955726135577e-05, "loss": 0.453, "step": 5966 }, { "epoch": 0.2620503419432034, "grad_norm": 2.09375, "learning_rate": 4.21245175239232e-05, "loss": 0.4747, "step": 5968 }, { "epoch": 0.2621381604224069, "grad_norm": 2.25, "learning_rate": 4.211947647509098e-05, "loss": 0.4459, "step": 5970 }, { "epoch": 0.26222597890161037, "grad_norm": 2.171875, "learning_rate": 4.211443411524515e-05, "loss": 0.4452, "step": 5972 }, { "epoch": 0.26231379738081384, "grad_norm": 2.296875, "learning_rate": 4.210939044477185e-05, "loss": 0.4412, "step": 5974 }, { "epoch": 0.26240161586001737, "grad_norm": 2.421875, "learning_rate": 4.210434546405733e-05, "loss": 0.4675, "step": 5976 }, { "epoch": 0.26248943433922084, "grad_norm": 2.171875, "learning_rate": 4.2099299173487936e-05, "loss": 0.4373, "step": 5978 }, { "epoch": 0.2625772528184243, "grad_norm": 2.1875, "learning_rate": 4.209425157345011e-05, "loss": 0.4514, "step": 5980 }, { "epoch": 0.2626650712976278, "grad_norm": 2.421875, "learning_rate": 4.20892026643304e-05, "loss": 0.4374, "step": 5982 }, { "epoch": 0.2627528897768313, "grad_norm": 2.5, "learning_rate": 4.208415244651546e-05, "loss": 0.4501, "step": 5984 }, { "epoch": 0.2628407082560348, "grad_norm": 2.28125, "learning_rate": 4.207910092039202e-05, "loss": 0.4409, "step": 5986 }, { "epoch": 0.26292852673523825, "grad_norm": 2.921875, "learning_rate": 4.207404808634694e-05, "loss": 0.4469, "step": 5988 }, { "epoch": 0.2630163452144417, "grad_norm": 2.109375, "learning_rate": 4.206899394476717e-05, "loss": 0.4775, "step": 5990 }, { "epoch": 0.26310416369364525, "grad_norm": 2.234375, "learning_rate": 4.2063938496039746e-05, "loss": 0.4512, "step": 5992 }, { "epoch": 0.2631919821728487, "grad_norm": 2.40625, "learning_rate": 4.2058881740551825e-05, "loss": 0.4476, "step": 5994 }, { "epoch": 0.2632798006520522, "grad_norm": 2.421875, "learning_rate": 4.2053823678690655e-05, "loss": 0.4599, "step": 5996 }, { "epoch": 0.26336761913125567, "grad_norm": 2.09375, "learning_rate": 4.2048764310843566e-05, "loss": 0.4945, "step": 5998 }, { "epoch": 0.2634554376104592, "grad_norm": 2.15625, "learning_rate": 4.204370363739803e-05, "loss": 0.4648, "step": 6000 }, { "epoch": 0.26354325608966267, "grad_norm": 2.46875, "learning_rate": 4.203864165874158e-05, "loss": 0.4385, "step": 6002 }, { "epoch": 0.26363107456886614, "grad_norm": 2.125, "learning_rate": 4.203357837526187e-05, "loss": 0.4218, "step": 6004 }, { "epoch": 0.2637188930480696, "grad_norm": 2.09375, "learning_rate": 4.202851378734664e-05, "loss": 0.4294, "step": 6006 }, { "epoch": 0.26380671152727314, "grad_norm": 2.078125, "learning_rate": 4.2023447895383746e-05, "loss": 0.4554, "step": 6008 }, { "epoch": 0.2638945300064766, "grad_norm": 2.375, "learning_rate": 4.201838069976114e-05, "loss": 0.4748, "step": 6010 }, { "epoch": 0.2639823484856801, "grad_norm": 2.046875, "learning_rate": 4.201331220086685e-05, "loss": 0.4362, "step": 6012 }, { "epoch": 0.2640701669648836, "grad_norm": 2.265625, "learning_rate": 4.2008242399089036e-05, "loss": 0.4517, "step": 6014 }, { "epoch": 0.2641579854440871, "grad_norm": 2.375, "learning_rate": 4.200317129481594e-05, "loss": 0.4676, "step": 6016 }, { "epoch": 0.26424580392329056, "grad_norm": 2.125, "learning_rate": 4.199809888843591e-05, "loss": 0.4595, "step": 6018 }, { "epoch": 0.26433362240249403, "grad_norm": 2.421875, "learning_rate": 4.19930251803374e-05, "loss": 0.434, "step": 6020 }, { "epoch": 0.26442144088169756, "grad_norm": 2.28125, "learning_rate": 4.198795017090894e-05, "loss": 0.4706, "step": 6022 }, { "epoch": 0.26450925936090103, "grad_norm": 2.21875, "learning_rate": 4.1982873860539186e-05, "loss": 0.4541, "step": 6024 }, { "epoch": 0.2645970778401045, "grad_norm": 2.234375, "learning_rate": 4.197779624961688e-05, "loss": 0.4787, "step": 6026 }, { "epoch": 0.26468489631930797, "grad_norm": 2.203125, "learning_rate": 4.1972717338530865e-05, "loss": 0.4714, "step": 6028 }, { "epoch": 0.2647727147985115, "grad_norm": 2.03125, "learning_rate": 4.196763712767009e-05, "loss": 0.4667, "step": 6030 }, { "epoch": 0.26486053327771497, "grad_norm": 2.40625, "learning_rate": 4.19625556174236e-05, "loss": 0.4655, "step": 6032 }, { "epoch": 0.26494835175691844, "grad_norm": 2.234375, "learning_rate": 4.195747280818053e-05, "loss": 0.4729, "step": 6034 }, { "epoch": 0.2650361702361219, "grad_norm": 2.078125, "learning_rate": 4.195238870033012e-05, "loss": 0.4532, "step": 6036 }, { "epoch": 0.26512398871532544, "grad_norm": 2.09375, "learning_rate": 4.194730329426173e-05, "loss": 0.4614, "step": 6038 }, { "epoch": 0.2652118071945289, "grad_norm": 2.046875, "learning_rate": 4.194221659036479e-05, "loss": 0.4355, "step": 6040 }, { "epoch": 0.2652996256737324, "grad_norm": 2.109375, "learning_rate": 4.1937128589028845e-05, "loss": 0.4523, "step": 6042 }, { "epoch": 0.26538744415293586, "grad_norm": 2.265625, "learning_rate": 4.193203929064353e-05, "loss": 0.4551, "step": 6044 }, { "epoch": 0.2654752626321394, "grad_norm": 2.140625, "learning_rate": 4.192694869559859e-05, "loss": 0.4766, "step": 6046 }, { "epoch": 0.26556308111134286, "grad_norm": 2.328125, "learning_rate": 4.1921856804283854e-05, "loss": 0.4742, "step": 6048 }, { "epoch": 0.26565089959054633, "grad_norm": 2.5, "learning_rate": 4.191676361708927e-05, "loss": 0.4476, "step": 6050 }, { "epoch": 0.2657387180697498, "grad_norm": 2.5, "learning_rate": 4.191166913440487e-05, "loss": 0.4644, "step": 6052 }, { "epoch": 0.26582653654895333, "grad_norm": 2.3125, "learning_rate": 4.1906573356620795e-05, "loss": 0.4646, "step": 6054 }, { "epoch": 0.2659143550281568, "grad_norm": 2.46875, "learning_rate": 4.190147628412729e-05, "loss": 0.4577, "step": 6056 }, { "epoch": 0.2660021735073603, "grad_norm": 2.28125, "learning_rate": 4.189637791731467e-05, "loss": 0.4707, "step": 6058 }, { "epoch": 0.26608999198656375, "grad_norm": 2.109375, "learning_rate": 4.1891278256573384e-05, "loss": 0.4336, "step": 6060 }, { "epoch": 0.2661778104657673, "grad_norm": 2.75, "learning_rate": 4.188617730229395e-05, "loss": 0.4459, "step": 6062 }, { "epoch": 0.26626562894497074, "grad_norm": 2.171875, "learning_rate": 4.188107505486702e-05, "loss": 0.4513, "step": 6064 }, { "epoch": 0.2663534474241742, "grad_norm": 2.15625, "learning_rate": 4.187597151468331e-05, "loss": 0.4408, "step": 6066 }, { "epoch": 0.26644126590337774, "grad_norm": 2.421875, "learning_rate": 4.187086668213366e-05, "loss": 0.454, "step": 6068 }, { "epoch": 0.2665290843825812, "grad_norm": 2.328125, "learning_rate": 4.186576055760899e-05, "loss": 0.4469, "step": 6070 }, { "epoch": 0.2666169028617847, "grad_norm": 2.0625, "learning_rate": 4.186065314150034e-05, "loss": 0.4321, "step": 6072 }, { "epoch": 0.26670472134098816, "grad_norm": 2.0625, "learning_rate": 4.1855544434198826e-05, "loss": 0.4323, "step": 6074 }, { "epoch": 0.2667925398201917, "grad_norm": 2.3125, "learning_rate": 4.185043443609569e-05, "loss": 0.4487, "step": 6076 }, { "epoch": 0.26688035829939516, "grad_norm": 2.171875, "learning_rate": 4.184532314758223e-05, "loss": 0.4653, "step": 6078 }, { "epoch": 0.26696817677859863, "grad_norm": 2.1875, "learning_rate": 4.184021056904989e-05, "loss": 0.489, "step": 6080 }, { "epoch": 0.2670559952578021, "grad_norm": 2.78125, "learning_rate": 4.183509670089018e-05, "loss": 0.4378, "step": 6082 }, { "epoch": 0.26714381373700563, "grad_norm": 3.28125, "learning_rate": 4.1829981543494746e-05, "loss": 0.4667, "step": 6084 }, { "epoch": 0.2672316322162091, "grad_norm": 3.875, "learning_rate": 4.1824865097255284e-05, "loss": 0.4541, "step": 6086 }, { "epoch": 0.2673194506954126, "grad_norm": 2.15625, "learning_rate": 4.181974736256362e-05, "loss": 0.4202, "step": 6088 }, { "epoch": 0.26740726917461605, "grad_norm": 2.515625, "learning_rate": 4.181462833981167e-05, "loss": 0.474, "step": 6090 }, { "epoch": 0.2674950876538196, "grad_norm": 2.359375, "learning_rate": 4.180950802939145e-05, "loss": 0.4343, "step": 6092 }, { "epoch": 0.26758290613302305, "grad_norm": 1.9921875, "learning_rate": 4.1804386431695076e-05, "loss": 0.4172, "step": 6094 }, { "epoch": 0.2676707246122265, "grad_norm": 2.296875, "learning_rate": 4.179926354711476e-05, "loss": 0.442, "step": 6096 }, { "epoch": 0.26775854309143, "grad_norm": 2.015625, "learning_rate": 4.179413937604282e-05, "loss": 0.4464, "step": 6098 }, { "epoch": 0.2678463615706335, "grad_norm": 2.21875, "learning_rate": 4.178901391887165e-05, "loss": 0.4462, "step": 6100 }, { "epoch": 0.267934180049837, "grad_norm": 2.296875, "learning_rate": 4.178388717599378e-05, "loss": 0.4389, "step": 6102 }, { "epoch": 0.26802199852904046, "grad_norm": 2.859375, "learning_rate": 4.17787591478018e-05, "loss": 0.4595, "step": 6104 }, { "epoch": 0.26810981700824393, "grad_norm": 2.640625, "learning_rate": 4.177362983468843e-05, "loss": 0.4419, "step": 6106 }, { "epoch": 0.26819763548744746, "grad_norm": 2.671875, "learning_rate": 4.1768499237046455e-05, "loss": 0.4286, "step": 6108 }, { "epoch": 0.26828545396665093, "grad_norm": 2.203125, "learning_rate": 4.17633673552688e-05, "loss": 0.4302, "step": 6110 }, { "epoch": 0.2683732724458544, "grad_norm": 2.1875, "learning_rate": 4.175823418974845e-05, "loss": 0.4421, "step": 6112 }, { "epoch": 0.26846109092505793, "grad_norm": 2.765625, "learning_rate": 4.17530997408785e-05, "loss": 0.471, "step": 6114 }, { "epoch": 0.2685489094042614, "grad_norm": 2.890625, "learning_rate": 4.174796400905216e-05, "loss": 0.4329, "step": 6116 }, { "epoch": 0.2686367278834649, "grad_norm": 2.421875, "learning_rate": 4.1742826994662734e-05, "loss": 0.4473, "step": 6118 }, { "epoch": 0.26872454636266835, "grad_norm": 2.296875, "learning_rate": 4.1737688698103595e-05, "loss": 0.5077, "step": 6120 }, { "epoch": 0.2688123648418719, "grad_norm": 2.296875, "learning_rate": 4.173254911976824e-05, "loss": 0.4531, "step": 6122 }, { "epoch": 0.26890018332107535, "grad_norm": 2.453125, "learning_rate": 4.172740826005027e-05, "loss": 0.4318, "step": 6124 }, { "epoch": 0.2689880018002788, "grad_norm": 2.40625, "learning_rate": 4.1722266119343357e-05, "loss": 0.4643, "step": 6126 }, { "epoch": 0.2690758202794823, "grad_norm": 2.34375, "learning_rate": 4.1717122698041296e-05, "loss": 0.4479, "step": 6128 }, { "epoch": 0.2691636387586858, "grad_norm": 2.015625, "learning_rate": 4.1711977996537976e-05, "loss": 0.4423, "step": 6130 }, { "epoch": 0.2692514572378893, "grad_norm": 2.0, "learning_rate": 4.170683201522737e-05, "loss": 0.4564, "step": 6132 }, { "epoch": 0.26933927571709276, "grad_norm": 2.0625, "learning_rate": 4.170168475450357e-05, "loss": 0.4654, "step": 6134 }, { "epoch": 0.26942709419629624, "grad_norm": 2.90625, "learning_rate": 4.1696536214760746e-05, "loss": 0.4642, "step": 6136 }, { "epoch": 0.26951491267549976, "grad_norm": 2.390625, "learning_rate": 4.169138639639317e-05, "loss": 0.452, "step": 6138 }, { "epoch": 0.26960273115470323, "grad_norm": 2.28125, "learning_rate": 4.1686235299795226e-05, "loss": 0.4645, "step": 6140 }, { "epoch": 0.2696905496339067, "grad_norm": 2.15625, "learning_rate": 4.168108292536139e-05, "loss": 0.4592, "step": 6142 }, { "epoch": 0.2697783681131102, "grad_norm": 2.34375, "learning_rate": 4.167592927348622e-05, "loss": 0.4458, "step": 6144 }, { "epoch": 0.2698661865923137, "grad_norm": 2.203125, "learning_rate": 4.167077434456439e-05, "loss": 0.4366, "step": 6146 }, { "epoch": 0.2699540050715172, "grad_norm": 2.1875, "learning_rate": 4.166561813899066e-05, "loss": 0.4384, "step": 6148 }, { "epoch": 0.27004182355072065, "grad_norm": 2.265625, "learning_rate": 4.16604606571599e-05, "loss": 0.4309, "step": 6150 }, { "epoch": 0.2701296420299241, "grad_norm": 2.234375, "learning_rate": 4.165530189946707e-05, "loss": 0.4639, "step": 6152 }, { "epoch": 0.27021746050912765, "grad_norm": 2.203125, "learning_rate": 4.1650141866307224e-05, "loss": 0.4496, "step": 6154 }, { "epoch": 0.2703052789883311, "grad_norm": 2.46875, "learning_rate": 4.164498055807553e-05, "loss": 0.4343, "step": 6156 }, { "epoch": 0.2703930974675346, "grad_norm": 2.734375, "learning_rate": 4.163981797516723e-05, "loss": 0.4438, "step": 6158 }, { "epoch": 0.27048091594673807, "grad_norm": 3.28125, "learning_rate": 4.163465411797768e-05, "loss": 0.4517, "step": 6160 }, { "epoch": 0.2705687344259416, "grad_norm": 3.484375, "learning_rate": 4.162948898690233e-05, "loss": 0.4539, "step": 6162 }, { "epoch": 0.27065655290514506, "grad_norm": 2.5, "learning_rate": 4.162432258233673e-05, "loss": 0.4514, "step": 6164 }, { "epoch": 0.27074437138434854, "grad_norm": 2.828125, "learning_rate": 4.1619154904676525e-05, "loss": 0.4465, "step": 6166 }, { "epoch": 0.27083218986355206, "grad_norm": 2.71875, "learning_rate": 4.1613985954317446e-05, "loss": 0.44, "step": 6168 }, { "epoch": 0.27092000834275554, "grad_norm": 2.0625, "learning_rate": 4.1608815731655345e-05, "loss": 0.4482, "step": 6170 }, { "epoch": 0.271007826821959, "grad_norm": 2.078125, "learning_rate": 4.160364423708615e-05, "loss": 0.4504, "step": 6172 }, { "epoch": 0.2710956453011625, "grad_norm": 2.421875, "learning_rate": 4.15984714710059e-05, "loss": 0.4325, "step": 6174 }, { "epoch": 0.271183463780366, "grad_norm": 2.34375, "learning_rate": 4.159329743381072e-05, "loss": 0.4344, "step": 6176 }, { "epoch": 0.2712712822595695, "grad_norm": 2.1875, "learning_rate": 4.1588122125896854e-05, "loss": 0.4401, "step": 6178 }, { "epoch": 0.27135910073877295, "grad_norm": 2.171875, "learning_rate": 4.1582945547660625e-05, "loss": 0.4931, "step": 6180 }, { "epoch": 0.2714469192179764, "grad_norm": 2.078125, "learning_rate": 4.157776769949844e-05, "loss": 0.4262, "step": 6182 }, { "epoch": 0.27153473769717995, "grad_norm": 2.140625, "learning_rate": 4.157258858180683e-05, "loss": 0.4675, "step": 6184 }, { "epoch": 0.2716225561763834, "grad_norm": 2.3125, "learning_rate": 4.156740819498242e-05, "loss": 0.4665, "step": 6186 }, { "epoch": 0.2717103746555869, "grad_norm": 2.0625, "learning_rate": 4.156222653942191e-05, "loss": 0.4537, "step": 6188 }, { "epoch": 0.27179819313479037, "grad_norm": 2.234375, "learning_rate": 4.1557043615522125e-05, "loss": 0.4607, "step": 6190 }, { "epoch": 0.2718860116139939, "grad_norm": 2.21875, "learning_rate": 4.155185942367997e-05, "loss": 0.4709, "step": 6192 }, { "epoch": 0.27197383009319737, "grad_norm": 2.25, "learning_rate": 4.154667396429246e-05, "loss": 0.4343, "step": 6194 }, { "epoch": 0.27206164857240084, "grad_norm": 2.296875, "learning_rate": 4.1541487237756686e-05, "loss": 0.4345, "step": 6196 }, { "epoch": 0.2721494670516043, "grad_norm": 2.171875, "learning_rate": 4.153629924446986e-05, "loss": 0.457, "step": 6198 }, { "epoch": 0.27223728553080784, "grad_norm": 2.109375, "learning_rate": 4.153110998482926e-05, "loss": 0.4675, "step": 6200 }, { "epoch": 0.2723251040100113, "grad_norm": 2.578125, "learning_rate": 4.152591945923231e-05, "loss": 0.4952, "step": 6202 }, { "epoch": 0.2724129224892148, "grad_norm": 2.125, "learning_rate": 4.152072766807648e-05, "loss": 0.46, "step": 6204 }, { "epoch": 0.27250074096841825, "grad_norm": 2.71875, "learning_rate": 4.151553461175936e-05, "loss": 0.4656, "step": 6206 }, { "epoch": 0.2725885594476218, "grad_norm": 2.421875, "learning_rate": 4.151034029067864e-05, "loss": 0.4346, "step": 6208 }, { "epoch": 0.27267637792682525, "grad_norm": 2.359375, "learning_rate": 4.1505144705232114e-05, "loss": 0.4237, "step": 6210 }, { "epoch": 0.2727641964060287, "grad_norm": 1.9296875, "learning_rate": 4.149994785581764e-05, "loss": 0.4196, "step": 6212 }, { "epoch": 0.27285201488523225, "grad_norm": 2.03125, "learning_rate": 4.149474974283321e-05, "loss": 0.4359, "step": 6214 }, { "epoch": 0.2729398333644357, "grad_norm": 2.265625, "learning_rate": 4.148955036667689e-05, "loss": 0.4525, "step": 6216 }, { "epoch": 0.2730276518436392, "grad_norm": 2.15625, "learning_rate": 4.148434972774685e-05, "loss": 0.4554, "step": 6218 }, { "epoch": 0.27311547032284267, "grad_norm": 2.09375, "learning_rate": 4.147914782644134e-05, "loss": 0.4339, "step": 6220 }, { "epoch": 0.2732032888020462, "grad_norm": 2.265625, "learning_rate": 4.147394466315876e-05, "loss": 0.4376, "step": 6222 }, { "epoch": 0.27329110728124967, "grad_norm": 2.484375, "learning_rate": 4.146874023829754e-05, "loss": 0.4694, "step": 6224 }, { "epoch": 0.27337892576045314, "grad_norm": 2.125, "learning_rate": 4.146353455225625e-05, "loss": 0.4433, "step": 6226 }, { "epoch": 0.2734667442396566, "grad_norm": 2.125, "learning_rate": 4.145832760543353e-05, "loss": 0.4423, "step": 6228 }, { "epoch": 0.27355456271886014, "grad_norm": 2.15625, "learning_rate": 4.1453119398228146e-05, "loss": 0.4558, "step": 6230 }, { "epoch": 0.2736423811980636, "grad_norm": 2.140625, "learning_rate": 4.144790993103893e-05, "loss": 0.4371, "step": 6232 }, { "epoch": 0.2737301996772671, "grad_norm": 2.359375, "learning_rate": 4.144269920426482e-05, "loss": 0.4518, "step": 6234 }, { "epoch": 0.27381801815647056, "grad_norm": 2.40625, "learning_rate": 4.1437487218304875e-05, "loss": 0.4369, "step": 6236 }, { "epoch": 0.2739058366356741, "grad_norm": 2.671875, "learning_rate": 4.1432273973558215e-05, "loss": 0.4403, "step": 6238 }, { "epoch": 0.27399365511487755, "grad_norm": 2.28125, "learning_rate": 4.142705947042408e-05, "loss": 0.4483, "step": 6240 }, { "epoch": 0.274081473594081, "grad_norm": 2.1875, "learning_rate": 4.142184370930178e-05, "loss": 0.4426, "step": 6242 }, { "epoch": 0.2741692920732845, "grad_norm": 1.890625, "learning_rate": 4.141662669059076e-05, "loss": 0.4282, "step": 6244 }, { "epoch": 0.274257110552488, "grad_norm": 2.09375, "learning_rate": 4.1411408414690536e-05, "loss": 0.4406, "step": 6246 }, { "epoch": 0.2743449290316915, "grad_norm": 2.328125, "learning_rate": 4.140618888200072e-05, "loss": 0.4505, "step": 6248 }, { "epoch": 0.27443274751089497, "grad_norm": 2.25, "learning_rate": 4.140096809292102e-05, "loss": 0.4422, "step": 6250 }, { "epoch": 0.27452056599009844, "grad_norm": 2.15625, "learning_rate": 4.1395746047851256e-05, "loss": 0.4614, "step": 6252 }, { "epoch": 0.27460838446930197, "grad_norm": 2.3125, "learning_rate": 4.139052274719133e-05, "loss": 0.4416, "step": 6254 }, { "epoch": 0.27469620294850544, "grad_norm": 2.21875, "learning_rate": 4.1385298191341246e-05, "loss": 0.468, "step": 6256 }, { "epoch": 0.2747840214277089, "grad_norm": 3.890625, "learning_rate": 4.1380072380701097e-05, "loss": 0.453, "step": 6258 }, { "epoch": 0.2748718399069124, "grad_norm": 2.03125, "learning_rate": 4.137484531567107e-05, "loss": 0.4224, "step": 6260 }, { "epoch": 0.2749596583861159, "grad_norm": 2.234375, "learning_rate": 4.136961699665147e-05, "loss": 0.4824, "step": 6262 }, { "epoch": 0.2750474768653194, "grad_norm": 2.203125, "learning_rate": 4.136438742404268e-05, "loss": 0.4251, "step": 6264 }, { "epoch": 0.27513529534452286, "grad_norm": 2.265625, "learning_rate": 4.1359156598245176e-05, "loss": 0.4623, "step": 6266 }, { "epoch": 0.2752231138237264, "grad_norm": 2.3125, "learning_rate": 4.1353924519659534e-05, "loss": 0.4438, "step": 6268 }, { "epoch": 0.27531093230292986, "grad_norm": 2.359375, "learning_rate": 4.1348691188686436e-05, "loss": 0.466, "step": 6270 }, { "epoch": 0.27539875078213333, "grad_norm": 2.1875, "learning_rate": 4.134345660572665e-05, "loss": 0.4888, "step": 6272 }, { "epoch": 0.2754865692613368, "grad_norm": 2.28125, "learning_rate": 4.1338220771181036e-05, "loss": 0.4234, "step": 6274 }, { "epoch": 0.2755743877405403, "grad_norm": 2.5, "learning_rate": 4.1332983685450556e-05, "loss": 0.4546, "step": 6276 }, { "epoch": 0.2756622062197438, "grad_norm": 2.171875, "learning_rate": 4.132774534893628e-05, "loss": 0.4614, "step": 6278 }, { "epoch": 0.27575002469894727, "grad_norm": 2.15625, "learning_rate": 4.132250576203934e-05, "loss": 0.4509, "step": 6280 }, { "epoch": 0.27583784317815074, "grad_norm": 2.21875, "learning_rate": 4.131726492516099e-05, "loss": 0.4766, "step": 6282 }, { "epoch": 0.27592566165735427, "grad_norm": 2.203125, "learning_rate": 4.1312022838702595e-05, "loss": 0.4546, "step": 6284 }, { "epoch": 0.27601348013655774, "grad_norm": 2.5625, "learning_rate": 4.1306779503065585e-05, "loss": 0.4589, "step": 6286 }, { "epoch": 0.2761012986157612, "grad_norm": 2.296875, "learning_rate": 4.130153491865148e-05, "loss": 0.4231, "step": 6288 }, { "epoch": 0.2761891170949647, "grad_norm": 2.0625, "learning_rate": 4.1296289085861924e-05, "loss": 0.4614, "step": 6290 }, { "epoch": 0.2762769355741682, "grad_norm": 2.34375, "learning_rate": 4.129104200509865e-05, "loss": 0.4485, "step": 6292 }, { "epoch": 0.2763647540533717, "grad_norm": 2.390625, "learning_rate": 4.128579367676346e-05, "loss": 0.4608, "step": 6294 }, { "epoch": 0.27645257253257516, "grad_norm": 2.625, "learning_rate": 4.12805441012583e-05, "loss": 0.4491, "step": 6296 }, { "epoch": 0.27654039101177863, "grad_norm": 2.203125, "learning_rate": 4.1275293278985163e-05, "loss": 0.4294, "step": 6298 }, { "epoch": 0.27662820949098216, "grad_norm": 2.640625, "learning_rate": 4.127004121034617e-05, "loss": 0.4214, "step": 6300 }, { "epoch": 0.27671602797018563, "grad_norm": 2.453125, "learning_rate": 4.126478789574352e-05, "loss": 0.4354, "step": 6302 }, { "epoch": 0.2768038464493891, "grad_norm": 2.25, "learning_rate": 4.1259533335579516e-05, "loss": 0.4204, "step": 6304 }, { "epoch": 0.2768916649285926, "grad_norm": 2.140625, "learning_rate": 4.125427753025655e-05, "loss": 0.439, "step": 6306 }, { "epoch": 0.2769794834077961, "grad_norm": 2.421875, "learning_rate": 4.12490204801771e-05, "loss": 0.4297, "step": 6308 }, { "epoch": 0.2770673018869996, "grad_norm": 2.234375, "learning_rate": 4.1243762185743784e-05, "loss": 0.4689, "step": 6310 }, { "epoch": 0.27715512036620304, "grad_norm": 2.21875, "learning_rate": 4.123850264735926e-05, "loss": 0.4831, "step": 6312 }, { "epoch": 0.2772429388454066, "grad_norm": 2.234375, "learning_rate": 4.123324186542631e-05, "loss": 0.4377, "step": 6314 }, { "epoch": 0.27733075732461004, "grad_norm": 2.4375, "learning_rate": 4.1227979840347806e-05, "loss": 0.4299, "step": 6316 }, { "epoch": 0.2774185758038135, "grad_norm": 2.203125, "learning_rate": 4.1222716572526725e-05, "loss": 0.4539, "step": 6318 }, { "epoch": 0.277506394283017, "grad_norm": 2.296875, "learning_rate": 4.121745206236611e-05, "loss": 0.4649, "step": 6320 }, { "epoch": 0.2775942127622205, "grad_norm": 2.0, "learning_rate": 4.121218631026913e-05, "loss": 0.4221, "step": 6322 }, { "epoch": 0.277682031241424, "grad_norm": 2.390625, "learning_rate": 4.120691931663904e-05, "loss": 0.4379, "step": 6324 }, { "epoch": 0.27776984972062746, "grad_norm": 2.21875, "learning_rate": 4.120165108187918e-05, "loss": 0.4545, "step": 6326 }, { "epoch": 0.27785766819983093, "grad_norm": 2.203125, "learning_rate": 4.1196381606393e-05, "loss": 0.4437, "step": 6328 }, { "epoch": 0.27794548667903446, "grad_norm": 2.3125, "learning_rate": 4.119111089058403e-05, "loss": 0.4657, "step": 6330 }, { "epoch": 0.27803330515823793, "grad_norm": 2.265625, "learning_rate": 4.118583893485592e-05, "loss": 0.4183, "step": 6332 }, { "epoch": 0.2781211236374414, "grad_norm": 2.34375, "learning_rate": 4.1180565739612365e-05, "loss": 0.4495, "step": 6334 }, { "epoch": 0.2782089421166449, "grad_norm": 2.1875, "learning_rate": 4.117529130525721e-05, "loss": 0.4459, "step": 6336 }, { "epoch": 0.2782967605958484, "grad_norm": 2.46875, "learning_rate": 4.117001563219438e-05, "loss": 0.4392, "step": 6338 }, { "epoch": 0.2783845790750519, "grad_norm": 2.078125, "learning_rate": 4.1164738720827864e-05, "loss": 0.4609, "step": 6340 }, { "epoch": 0.27847239755425535, "grad_norm": 2.59375, "learning_rate": 4.1159460571561795e-05, "loss": 0.451, "step": 6342 }, { "epoch": 0.2785602160334588, "grad_norm": 2.71875, "learning_rate": 4.1154181184800344e-05, "loss": 0.4578, "step": 6344 }, { "epoch": 0.27864803451266235, "grad_norm": 2.65625, "learning_rate": 4.114890056094784e-05, "loss": 0.4411, "step": 6346 }, { "epoch": 0.2787358529918658, "grad_norm": 2.65625, "learning_rate": 4.114361870040866e-05, "loss": 0.4315, "step": 6348 }, { "epoch": 0.2788236714710693, "grad_norm": 2.53125, "learning_rate": 4.1138335603587284e-05, "loss": 0.4445, "step": 6350 }, { "epoch": 0.27891148995027276, "grad_norm": 2.6875, "learning_rate": 4.11330512708883e-05, "loss": 0.4583, "step": 6352 }, { "epoch": 0.2789993084294763, "grad_norm": 2.375, "learning_rate": 4.112776570271639e-05, "loss": 0.4679, "step": 6354 }, { "epoch": 0.27908712690867976, "grad_norm": 2.9375, "learning_rate": 4.11224788994763e-05, "loss": 0.4387, "step": 6356 }, { "epoch": 0.27917494538788323, "grad_norm": 2.734375, "learning_rate": 4.111719086157293e-05, "loss": 0.4503, "step": 6358 }, { "epoch": 0.2792627638670867, "grad_norm": 2.78125, "learning_rate": 4.111190158941121e-05, "loss": 0.4793, "step": 6360 }, { "epoch": 0.27935058234629023, "grad_norm": 3.171875, "learning_rate": 4.11066110833962e-05, "loss": 0.4142, "step": 6362 }, { "epoch": 0.2794384008254937, "grad_norm": 3.265625, "learning_rate": 4.1101319343933064e-05, "loss": 0.4495, "step": 6364 }, { "epoch": 0.2795262193046972, "grad_norm": 3.15625, "learning_rate": 4.109602637142703e-05, "loss": 0.4436, "step": 6366 }, { "epoch": 0.2796140377839007, "grad_norm": 2.703125, "learning_rate": 4.109073216628343e-05, "loss": 0.4644, "step": 6368 }, { "epoch": 0.2797018562631042, "grad_norm": 2.34375, "learning_rate": 4.108543672890771e-05, "loss": 0.461, "step": 6370 }, { "epoch": 0.27978967474230765, "grad_norm": 2.390625, "learning_rate": 4.108014005970538e-05, "loss": 0.4372, "step": 6372 }, { "epoch": 0.2798774932215111, "grad_norm": 2.28125, "learning_rate": 4.107484215908208e-05, "loss": 0.4156, "step": 6374 }, { "epoch": 0.27996531170071465, "grad_norm": 2.515625, "learning_rate": 4.106954302744351e-05, "loss": 0.4507, "step": 6376 }, { "epoch": 0.2800531301799181, "grad_norm": 2.265625, "learning_rate": 4.1064242665195486e-05, "loss": 0.4574, "step": 6378 }, { "epoch": 0.2801409486591216, "grad_norm": 3.015625, "learning_rate": 4.105894107274391e-05, "loss": 0.4322, "step": 6380 }, { "epoch": 0.28022876713832506, "grad_norm": 2.203125, "learning_rate": 4.105363825049476e-05, "loss": 0.4504, "step": 6382 }, { "epoch": 0.2803165856175286, "grad_norm": 2.109375, "learning_rate": 4.104833419885417e-05, "loss": 0.4319, "step": 6384 }, { "epoch": 0.28040440409673206, "grad_norm": 2.265625, "learning_rate": 4.104302891822828e-05, "loss": 0.4637, "step": 6386 }, { "epoch": 0.28049222257593553, "grad_norm": 2.125, "learning_rate": 4.1037722409023396e-05, "loss": 0.4497, "step": 6388 }, { "epoch": 0.280580041055139, "grad_norm": 2.296875, "learning_rate": 4.1032414671645894e-05, "loss": 0.4482, "step": 6390 }, { "epoch": 0.28066785953434253, "grad_norm": 2.75, "learning_rate": 4.102710570650222e-05, "loss": 0.457, "step": 6392 }, { "epoch": 0.280755678013546, "grad_norm": 2.96875, "learning_rate": 4.102179551399895e-05, "loss": 0.4158, "step": 6394 }, { "epoch": 0.2808434964927495, "grad_norm": 2.734375, "learning_rate": 4.1016484094542754e-05, "loss": 0.4455, "step": 6396 }, { "epoch": 0.28093131497195295, "grad_norm": 2.6875, "learning_rate": 4.101117144854035e-05, "loss": 0.433, "step": 6398 }, { "epoch": 0.2810191334511565, "grad_norm": 2.515625, "learning_rate": 4.10058575763986e-05, "loss": 0.4576, "step": 6400 }, { "epoch": 0.28110695193035995, "grad_norm": 2.375, "learning_rate": 4.100054247852445e-05, "loss": 0.4346, "step": 6402 }, { "epoch": 0.2811947704095634, "grad_norm": 2.390625, "learning_rate": 4.099522615532491e-05, "loss": 0.4536, "step": 6404 }, { "epoch": 0.2812825888887669, "grad_norm": 2.453125, "learning_rate": 4.098990860720712e-05, "loss": 0.4584, "step": 6406 }, { "epoch": 0.2813704073679704, "grad_norm": 2.25, "learning_rate": 4.09845898345783e-05, "loss": 0.4555, "step": 6408 }, { "epoch": 0.2814582258471739, "grad_norm": 2.34375, "learning_rate": 4.0979269837845754e-05, "loss": 0.4722, "step": 6410 }, { "epoch": 0.28154604432637736, "grad_norm": 2.328125, "learning_rate": 4.09739486174169e-05, "loss": 0.4457, "step": 6412 }, { "epoch": 0.28163386280558084, "grad_norm": 2.125, "learning_rate": 4.0968626173699234e-05, "loss": 0.4123, "step": 6414 }, { "epoch": 0.28172168128478436, "grad_norm": 2.0625, "learning_rate": 4.0963302507100336e-05, "loss": 0.4087, "step": 6416 }, { "epoch": 0.28180949976398784, "grad_norm": 2.34375, "learning_rate": 4.095797761802791e-05, "loss": 0.4396, "step": 6418 }, { "epoch": 0.2818973182431913, "grad_norm": 2.03125, "learning_rate": 4.0952651506889735e-05, "loss": 0.4509, "step": 6420 }, { "epoch": 0.28198513672239484, "grad_norm": 2.140625, "learning_rate": 4.094732417409368e-05, "loss": 0.4242, "step": 6422 }, { "epoch": 0.2820729552015983, "grad_norm": 2.0, "learning_rate": 4.094199562004772e-05, "loss": 0.428, "step": 6424 }, { "epoch": 0.2821607736808018, "grad_norm": 2.328125, "learning_rate": 4.0936665845159915e-05, "loss": 0.4399, "step": 6426 }, { "epoch": 0.28224859216000525, "grad_norm": 2.15625, "learning_rate": 4.0931334849838414e-05, "loss": 0.4446, "step": 6428 }, { "epoch": 0.2823364106392088, "grad_norm": 2.296875, "learning_rate": 4.0926002634491476e-05, "loss": 0.4622, "step": 6430 }, { "epoch": 0.28242422911841225, "grad_norm": 2.140625, "learning_rate": 4.092066919952743e-05, "loss": 0.4531, "step": 6432 }, { "epoch": 0.2825120475976157, "grad_norm": 2.140625, "learning_rate": 4.0915334545354734e-05, "loss": 0.4387, "step": 6434 }, { "epoch": 0.2825998660768192, "grad_norm": 2.09375, "learning_rate": 4.0909998672381897e-05, "loss": 0.4392, "step": 6436 }, { "epoch": 0.2826876845560227, "grad_norm": 2.359375, "learning_rate": 4.090466158101754e-05, "loss": 0.4385, "step": 6438 }, { "epoch": 0.2827755030352262, "grad_norm": 2.109375, "learning_rate": 4.0899323271670395e-05, "loss": 0.4336, "step": 6440 }, { "epoch": 0.28286332151442967, "grad_norm": 2.390625, "learning_rate": 4.0893983744749265e-05, "loss": 0.4414, "step": 6442 }, { "epoch": 0.28295113999363314, "grad_norm": 2.15625, "learning_rate": 4.088864300066304e-05, "loss": 0.4467, "step": 6444 }, { "epoch": 0.28303895847283667, "grad_norm": 2.09375, "learning_rate": 4.088330103982074e-05, "loss": 0.4311, "step": 6446 }, { "epoch": 0.28312677695204014, "grad_norm": 2.546875, "learning_rate": 4.0877957862631425e-05, "loss": 0.4265, "step": 6448 }, { "epoch": 0.2832145954312436, "grad_norm": 2.84375, "learning_rate": 4.087261346950429e-05, "loss": 0.4405, "step": 6450 }, { "epoch": 0.2833024139104471, "grad_norm": 2.1875, "learning_rate": 4.086726786084862e-05, "loss": 0.42, "step": 6452 }, { "epoch": 0.2833902323896506, "grad_norm": 2.703125, "learning_rate": 4.086192103707377e-05, "loss": 0.4197, "step": 6454 }, { "epoch": 0.2834780508688541, "grad_norm": 2.546875, "learning_rate": 4.0856572998589206e-05, "loss": 0.4347, "step": 6456 }, { "epoch": 0.28356586934805755, "grad_norm": 2.578125, "learning_rate": 4.0851223745804476e-05, "loss": 0.4318, "step": 6458 }, { "epoch": 0.283653687827261, "grad_norm": 2.25, "learning_rate": 4.0845873279129246e-05, "loss": 0.4353, "step": 6460 }, { "epoch": 0.28374150630646455, "grad_norm": 2.109375, "learning_rate": 4.0840521598973223e-05, "loss": 0.4395, "step": 6462 }, { "epoch": 0.283829324785668, "grad_norm": 2.203125, "learning_rate": 4.083516870574626e-05, "loss": 0.4345, "step": 6464 }, { "epoch": 0.2839171432648715, "grad_norm": 2.328125, "learning_rate": 4.0829814599858296e-05, "loss": 0.4437, "step": 6466 }, { "epoch": 0.284004961744075, "grad_norm": 2.140625, "learning_rate": 4.0824459281719326e-05, "loss": 0.4519, "step": 6468 }, { "epoch": 0.2840927802232785, "grad_norm": 2.015625, "learning_rate": 4.0819102751739466e-05, "loss": 0.4521, "step": 6470 }, { "epoch": 0.28418059870248197, "grad_norm": 2.171875, "learning_rate": 4.081374501032894e-05, "loss": 0.462, "step": 6472 }, { "epoch": 0.28426841718168544, "grad_norm": 2.171875, "learning_rate": 4.080838605789802e-05, "loss": 0.4368, "step": 6474 }, { "epoch": 0.28435623566088897, "grad_norm": 2.0625, "learning_rate": 4.08030258948571e-05, "loss": 0.4292, "step": 6476 }, { "epoch": 0.28444405414009244, "grad_norm": 2.078125, "learning_rate": 4.0797664521616684e-05, "loss": 0.4388, "step": 6478 }, { "epoch": 0.2845318726192959, "grad_norm": 2.171875, "learning_rate": 4.079230193858732e-05, "loss": 0.4426, "step": 6480 }, { "epoch": 0.2846196910984994, "grad_norm": 2.28125, "learning_rate": 4.078693814617969e-05, "loss": 0.4409, "step": 6482 }, { "epoch": 0.2847075095777029, "grad_norm": 2.015625, "learning_rate": 4.078157314480456e-05, "loss": 0.4673, "step": 6484 }, { "epoch": 0.2847953280569064, "grad_norm": 2.15625, "learning_rate": 4.077620693487277e-05, "loss": 0.4272, "step": 6486 }, { "epoch": 0.28488314653610985, "grad_norm": 2.21875, "learning_rate": 4.0770839516795265e-05, "loss": 0.4592, "step": 6488 }, { "epoch": 0.2849709650153133, "grad_norm": 2.3125, "learning_rate": 4.07654708909831e-05, "loss": 0.4513, "step": 6490 }, { "epoch": 0.28505878349451685, "grad_norm": 2.28125, "learning_rate": 4.076010105784739e-05, "loss": 0.4398, "step": 6492 }, { "epoch": 0.2851466019737203, "grad_norm": 2.109375, "learning_rate": 4.075473001779936e-05, "loss": 0.4455, "step": 6494 }, { "epoch": 0.2852344204529238, "grad_norm": 2.484375, "learning_rate": 4.0749357771250335e-05, "loss": 0.4208, "step": 6496 }, { "epoch": 0.28532223893212727, "grad_norm": 2.28125, "learning_rate": 4.074398431861171e-05, "loss": 0.4249, "step": 6498 }, { "epoch": 0.2854100574113308, "grad_norm": 2.453125, "learning_rate": 4.0738609660295e-05, "loss": 0.4631, "step": 6500 }, { "epoch": 0.28549787589053427, "grad_norm": 2.265625, "learning_rate": 4.073323379671179e-05, "loss": 0.4618, "step": 6502 }, { "epoch": 0.28558569436973774, "grad_norm": 2.203125, "learning_rate": 4.072785672827375e-05, "loss": 0.4415, "step": 6504 }, { "epoch": 0.2856735128489412, "grad_norm": 2.578125, "learning_rate": 4.072247845539268e-05, "loss": 0.4629, "step": 6506 }, { "epoch": 0.28576133132814474, "grad_norm": 2.28125, "learning_rate": 4.0717098978480444e-05, "loss": 0.45, "step": 6508 }, { "epoch": 0.2858491498073482, "grad_norm": 2.03125, "learning_rate": 4.0711718297949e-05, "loss": 0.4641, "step": 6510 }, { "epoch": 0.2859369682865517, "grad_norm": 2.203125, "learning_rate": 4.07063364142104e-05, "loss": 0.429, "step": 6512 }, { "epoch": 0.28602478676575516, "grad_norm": 2.34375, "learning_rate": 4.0700953327676797e-05, "loss": 0.4409, "step": 6514 }, { "epoch": 0.2861126052449587, "grad_norm": 2.40625, "learning_rate": 4.0695569038760416e-05, "loss": 0.4768, "step": 6516 }, { "epoch": 0.28620042372416216, "grad_norm": 2.234375, "learning_rate": 4.0690183547873594e-05, "loss": 0.453, "step": 6518 }, { "epoch": 0.28628824220336563, "grad_norm": 2.125, "learning_rate": 4.068479685542876e-05, "loss": 0.4708, "step": 6520 }, { "epoch": 0.28637606068256916, "grad_norm": 2.28125, "learning_rate": 4.067940896183843e-05, "loss": 0.4502, "step": 6522 }, { "epoch": 0.2864638791617726, "grad_norm": 2.140625, "learning_rate": 4.067401986751519e-05, "loss": 0.4225, "step": 6524 }, { "epoch": 0.2865516976409761, "grad_norm": 2.21875, "learning_rate": 4.0668629572871765e-05, "loss": 0.4193, "step": 6526 }, { "epoch": 0.28663951612017957, "grad_norm": 2.484375, "learning_rate": 4.066323807832092e-05, "loss": 0.4282, "step": 6528 }, { "epoch": 0.2867273345993831, "grad_norm": 2.296875, "learning_rate": 4.065784538427555e-05, "loss": 0.4534, "step": 6530 }, { "epoch": 0.28681515307858657, "grad_norm": 2.328125, "learning_rate": 4.0652451491148636e-05, "loss": 0.4383, "step": 6532 }, { "epoch": 0.28690297155779004, "grad_norm": 2.21875, "learning_rate": 4.0647056399353225e-05, "loss": 0.4463, "step": 6534 }, { "epoch": 0.2869907900369935, "grad_norm": 2.328125, "learning_rate": 4.0641660109302485e-05, "loss": 0.4432, "step": 6536 }, { "epoch": 0.28707860851619704, "grad_norm": 2.015625, "learning_rate": 4.063626262140967e-05, "loss": 0.4369, "step": 6538 }, { "epoch": 0.2871664269954005, "grad_norm": 2.28125, "learning_rate": 4.0630863936088104e-05, "loss": 0.462, "step": 6540 }, { "epoch": 0.287254245474604, "grad_norm": 2.015625, "learning_rate": 4.062546405375124e-05, "loss": 0.451, "step": 6542 }, { "epoch": 0.28734206395380746, "grad_norm": 2.203125, "learning_rate": 4.062006297481259e-05, "loss": 0.4816, "step": 6544 }, { "epoch": 0.287429882433011, "grad_norm": 2.21875, "learning_rate": 4.061466069968577e-05, "loss": 0.4531, "step": 6546 }, { "epoch": 0.28751770091221446, "grad_norm": 2.078125, "learning_rate": 4.06092572287845e-05, "loss": 0.4612, "step": 6548 }, { "epoch": 0.28760551939141793, "grad_norm": 2.3125, "learning_rate": 4.0603852562522564e-05, "loss": 0.4173, "step": 6550 }, { "epoch": 0.2876933378706214, "grad_norm": 2.234375, "learning_rate": 4.0598446701313865e-05, "loss": 0.457, "step": 6552 }, { "epoch": 0.28778115634982493, "grad_norm": 2.421875, "learning_rate": 4.059303964557237e-05, "loss": 0.4227, "step": 6554 }, { "epoch": 0.2878689748290284, "grad_norm": 2.84375, "learning_rate": 4.058763139571216e-05, "loss": 0.4605, "step": 6556 }, { "epoch": 0.2879567933082319, "grad_norm": 2.84375, "learning_rate": 4.05822219521474e-05, "loss": 0.438, "step": 6558 }, { "epoch": 0.28804461178743535, "grad_norm": 2.453125, "learning_rate": 4.057681131529235e-05, "loss": 0.4462, "step": 6560 }, { "epoch": 0.2881324302666389, "grad_norm": 2.328125, "learning_rate": 4.0571399485561366e-05, "loss": 0.4389, "step": 6562 }, { "epoch": 0.28822024874584234, "grad_norm": 2.34375, "learning_rate": 4.0565986463368865e-05, "loss": 0.4411, "step": 6564 }, { "epoch": 0.2883080672250458, "grad_norm": 2.296875, "learning_rate": 4.0560572249129394e-05, "loss": 0.4564, "step": 6566 }, { "epoch": 0.28839588570424934, "grad_norm": 2.0625, "learning_rate": 4.0555156843257566e-05, "loss": 0.4626, "step": 6568 }, { "epoch": 0.2884837041834528, "grad_norm": 2.109375, "learning_rate": 4.05497402461681e-05, "loss": 0.4507, "step": 6570 }, { "epoch": 0.2885715226626563, "grad_norm": 1.9453125, "learning_rate": 4.0544322458275796e-05, "loss": 0.4225, "step": 6572 }, { "epoch": 0.28865934114185976, "grad_norm": 2.34375, "learning_rate": 4.0538903479995554e-05, "loss": 0.4069, "step": 6574 }, { "epoch": 0.2887471596210633, "grad_norm": 2.421875, "learning_rate": 4.053348331174236e-05, "loss": 0.4189, "step": 6576 }, { "epoch": 0.28883497810026676, "grad_norm": 2.5625, "learning_rate": 4.052806195393129e-05, "loss": 0.4453, "step": 6578 }, { "epoch": 0.28892279657947023, "grad_norm": 2.984375, "learning_rate": 4.0522639406977516e-05, "loss": 0.4598, "step": 6580 }, { "epoch": 0.2890106150586737, "grad_norm": 3.484375, "learning_rate": 4.051721567129629e-05, "loss": 0.4437, "step": 6582 }, { "epoch": 0.28909843353787723, "grad_norm": 2.625, "learning_rate": 4.051179074730297e-05, "loss": 0.446, "step": 6584 }, { "epoch": 0.2891862520170807, "grad_norm": 2.5, "learning_rate": 4.0506364635413e-05, "loss": 0.4417, "step": 6586 }, { "epoch": 0.2892740704962842, "grad_norm": 2.046875, "learning_rate": 4.05009373360419e-05, "loss": 0.4179, "step": 6588 }, { "epoch": 0.28936188897548765, "grad_norm": 2.375, "learning_rate": 4.049550884960531e-05, "loss": 0.4153, "step": 6590 }, { "epoch": 0.2894497074546912, "grad_norm": 2.40625, "learning_rate": 4.049007917651894e-05, "loss": 0.457, "step": 6592 }, { "epoch": 0.28953752593389465, "grad_norm": 2.6875, "learning_rate": 4.0484648317198585e-05, "loss": 0.4651, "step": 6594 }, { "epoch": 0.2896253444130981, "grad_norm": 2.703125, "learning_rate": 4.047921627206015e-05, "loss": 0.4265, "step": 6596 }, { "epoch": 0.2897131628923016, "grad_norm": 2.265625, "learning_rate": 4.047378304151963e-05, "loss": 0.437, "step": 6598 }, { "epoch": 0.2898009813715051, "grad_norm": 2.296875, "learning_rate": 4.046834862599309e-05, "loss": 0.4362, "step": 6600 }, { "epoch": 0.2898887998507086, "grad_norm": 2.015625, "learning_rate": 4.046291302589671e-05, "loss": 0.4426, "step": 6602 }, { "epoch": 0.28997661832991206, "grad_norm": 2.328125, "learning_rate": 4.045747624164674e-05, "loss": 0.4191, "step": 6604 }, { "epoch": 0.29006443680911553, "grad_norm": 2.046875, "learning_rate": 4.045203827365953e-05, "loss": 0.3972, "step": 6606 }, { "epoch": 0.29015225528831906, "grad_norm": 2.09375, "learning_rate": 4.0446599122351535e-05, "loss": 0.4173, "step": 6608 }, { "epoch": 0.29024007376752253, "grad_norm": 1.9765625, "learning_rate": 4.044115878813927e-05, "loss": 0.4359, "step": 6610 }, { "epoch": 0.290327892246726, "grad_norm": 2.15625, "learning_rate": 4.043571727143936e-05, "loss": 0.4373, "step": 6612 }, { "epoch": 0.2904157107259295, "grad_norm": 2.46875, "learning_rate": 4.043027457266853e-05, "loss": 0.4602, "step": 6614 }, { "epoch": 0.290503529205133, "grad_norm": 2.359375, "learning_rate": 4.0424830692243566e-05, "loss": 0.4517, "step": 6616 }, { "epoch": 0.2905913476843365, "grad_norm": 2.40625, "learning_rate": 4.0419385630581376e-05, "loss": 0.4215, "step": 6618 }, { "epoch": 0.29067916616353995, "grad_norm": 2.328125, "learning_rate": 4.041393938809893e-05, "loss": 0.4377, "step": 6620 }, { "epoch": 0.2907669846427435, "grad_norm": 1.9765625, "learning_rate": 4.0408491965213315e-05, "loss": 0.4292, "step": 6622 }, { "epoch": 0.29085480312194695, "grad_norm": 2.109375, "learning_rate": 4.04030433623417e-05, "loss": 0.4647, "step": 6624 }, { "epoch": 0.2909426216011504, "grad_norm": 2.234375, "learning_rate": 4.039759357990133e-05, "loss": 0.416, "step": 6626 }, { "epoch": 0.2910304400803539, "grad_norm": 2.234375, "learning_rate": 4.039214261830954e-05, "loss": 0.4596, "step": 6628 }, { "epoch": 0.2911182585595574, "grad_norm": 2.28125, "learning_rate": 4.0386690477983786e-05, "loss": 0.4676, "step": 6630 }, { "epoch": 0.2912060770387609, "grad_norm": 2.1875, "learning_rate": 4.038123715934158e-05, "loss": 0.4404, "step": 6632 }, { "epoch": 0.29129389551796436, "grad_norm": 2.21875, "learning_rate": 4.0375782662800555e-05, "loss": 0.4359, "step": 6634 }, { "epoch": 0.29138171399716783, "grad_norm": 2.28125, "learning_rate": 4.037032698877841e-05, "loss": 0.4296, "step": 6636 }, { "epoch": 0.29146953247637136, "grad_norm": 2.578125, "learning_rate": 4.0364870137692925e-05, "loss": 0.4485, "step": 6638 }, { "epoch": 0.29155735095557483, "grad_norm": 1.984375, "learning_rate": 4.035941210996202e-05, "loss": 0.4222, "step": 6640 }, { "epoch": 0.2916451694347783, "grad_norm": 2.203125, "learning_rate": 4.035395290600365e-05, "loss": 0.4314, "step": 6642 }, { "epoch": 0.2917329879139818, "grad_norm": 2.359375, "learning_rate": 4.034849252623587e-05, "loss": 0.455, "step": 6644 }, { "epoch": 0.2918208063931853, "grad_norm": 1.875, "learning_rate": 4.034303097107687e-05, "loss": 0.4175, "step": 6646 }, { "epoch": 0.2919086248723888, "grad_norm": 2.09375, "learning_rate": 4.033756824094487e-05, "loss": 0.4194, "step": 6648 }, { "epoch": 0.29199644335159225, "grad_norm": 2.140625, "learning_rate": 4.033210433625822e-05, "loss": 0.4332, "step": 6650 }, { "epoch": 0.2920842618307957, "grad_norm": 2.25, "learning_rate": 4.0326639257435343e-05, "loss": 0.4202, "step": 6652 }, { "epoch": 0.29217208030999925, "grad_norm": 2.3125, "learning_rate": 4.032117300489476e-05, "loss": 0.4369, "step": 6654 }, { "epoch": 0.2922598987892027, "grad_norm": 2.359375, "learning_rate": 4.031570557905508e-05, "loss": 0.4293, "step": 6656 }, { "epoch": 0.2923477172684062, "grad_norm": 2.234375, "learning_rate": 4.031023698033499e-05, "loss": 0.4306, "step": 6658 }, { "epoch": 0.29243553574760967, "grad_norm": 2.921875, "learning_rate": 4.030476720915328e-05, "loss": 0.4439, "step": 6660 }, { "epoch": 0.2925233542268132, "grad_norm": 2.265625, "learning_rate": 4.029929626592884e-05, "loss": 0.4316, "step": 6662 }, { "epoch": 0.29261117270601666, "grad_norm": 2.28125, "learning_rate": 4.0293824151080614e-05, "loss": 0.4389, "step": 6664 }, { "epoch": 0.29269899118522014, "grad_norm": 2.203125, "learning_rate": 4.028835086502767e-05, "loss": 0.423, "step": 6666 }, { "epoch": 0.29278680966442366, "grad_norm": 2.0625, "learning_rate": 4.028287640818915e-05, "loss": 0.4596, "step": 6668 }, { "epoch": 0.29287462814362714, "grad_norm": 2.265625, "learning_rate": 4.02774007809843e-05, "loss": 0.4145, "step": 6670 }, { "epoch": 0.2929624466228306, "grad_norm": 2.21875, "learning_rate": 4.027192398383243e-05, "loss": 0.4104, "step": 6672 }, { "epoch": 0.2930502651020341, "grad_norm": 2.234375, "learning_rate": 4.0266446017152956e-05, "loss": 0.4175, "step": 6674 }, { "epoch": 0.2931380835812376, "grad_norm": 2.671875, "learning_rate": 4.0260966881365395e-05, "loss": 0.4496, "step": 6676 }, { "epoch": 0.2932259020604411, "grad_norm": 2.25, "learning_rate": 4.0255486576889315e-05, "loss": 0.4373, "step": 6678 }, { "epoch": 0.29331372053964455, "grad_norm": 2.359375, "learning_rate": 4.0250005104144425e-05, "loss": 0.4386, "step": 6680 }, { "epoch": 0.293401539018848, "grad_norm": 2.140625, "learning_rate": 4.0244522463550494e-05, "loss": 0.4338, "step": 6682 }, { "epoch": 0.29348935749805155, "grad_norm": 2.046875, "learning_rate": 4.023903865552738e-05, "loss": 0.4828, "step": 6684 }, { "epoch": 0.293577175977255, "grad_norm": 1.9296875, "learning_rate": 4.0233553680495027e-05, "loss": 0.4387, "step": 6686 }, { "epoch": 0.2936649944564585, "grad_norm": 2.296875, "learning_rate": 4.022806753887349e-05, "loss": 0.4357, "step": 6688 }, { "epoch": 0.29375281293566197, "grad_norm": 2.203125, "learning_rate": 4.022258023108288e-05, "loss": 0.4067, "step": 6690 }, { "epoch": 0.2938406314148655, "grad_norm": 2.65625, "learning_rate": 4.021709175754342e-05, "loss": 0.4142, "step": 6692 }, { "epoch": 0.29392844989406897, "grad_norm": 2.640625, "learning_rate": 4.021160211867544e-05, "loss": 0.4647, "step": 6694 }, { "epoch": 0.29401626837327244, "grad_norm": 2.84375, "learning_rate": 4.020611131489932e-05, "loss": 0.4448, "step": 6696 }, { "epoch": 0.2941040868524759, "grad_norm": 2.25, "learning_rate": 4.020061934663555e-05, "loss": 0.4183, "step": 6698 }, { "epoch": 0.29419190533167944, "grad_norm": 2.375, "learning_rate": 4.0195126214304704e-05, "loss": 0.4467, "step": 6700 }, { "epoch": 0.2942797238108829, "grad_norm": 2.140625, "learning_rate": 4.018963191832746e-05, "loss": 0.4432, "step": 6702 }, { "epoch": 0.2943675422900864, "grad_norm": 2.203125, "learning_rate": 4.018413645912455e-05, "loss": 0.4159, "step": 6704 }, { "epoch": 0.29445536076928985, "grad_norm": 2.203125, "learning_rate": 4.0178639837116836e-05, "loss": 0.412, "step": 6706 }, { "epoch": 0.2945431792484934, "grad_norm": 2.328125, "learning_rate": 4.0173142052725244e-05, "loss": 0.4499, "step": 6708 }, { "epoch": 0.29463099772769685, "grad_norm": 2.328125, "learning_rate": 4.0167643106370786e-05, "loss": 0.4282, "step": 6710 }, { "epoch": 0.2947188162069003, "grad_norm": 2.71875, "learning_rate": 4.016214299847459e-05, "loss": 0.4674, "step": 6712 }, { "epoch": 0.2948066346861038, "grad_norm": 2.796875, "learning_rate": 4.0156641729457855e-05, "loss": 0.4387, "step": 6714 }, { "epoch": 0.2948944531653073, "grad_norm": 2.640625, "learning_rate": 4.015113929974187e-05, "loss": 0.4541, "step": 6716 }, { "epoch": 0.2949822716445108, "grad_norm": 2.546875, "learning_rate": 4.014563570974799e-05, "loss": 0.4202, "step": 6718 }, { "epoch": 0.29507009012371427, "grad_norm": 3.296875, "learning_rate": 4.01401309598977e-05, "loss": 0.4555, "step": 6720 }, { "epoch": 0.2951579086029178, "grad_norm": 2.484375, "learning_rate": 4.013462505061255e-05, "loss": 0.4327, "step": 6722 }, { "epoch": 0.29524572708212127, "grad_norm": 2.3125, "learning_rate": 4.0129117982314194e-05, "loss": 0.4166, "step": 6724 }, { "epoch": 0.29533354556132474, "grad_norm": 2.390625, "learning_rate": 4.012360975542434e-05, "loss": 0.3892, "step": 6726 }, { "epoch": 0.2954213640405282, "grad_norm": 2.34375, "learning_rate": 4.011810037036484e-05, "loss": 0.4572, "step": 6728 }, { "epoch": 0.29550918251973174, "grad_norm": 2.390625, "learning_rate": 4.011258982755759e-05, "loss": 0.4488, "step": 6730 }, { "epoch": 0.2955970009989352, "grad_norm": 2.171875, "learning_rate": 4.010707812742459e-05, "loss": 0.4369, "step": 6732 }, { "epoch": 0.2956848194781387, "grad_norm": 2.296875, "learning_rate": 4.010156527038791e-05, "loss": 0.433, "step": 6734 }, { "epoch": 0.29577263795734215, "grad_norm": 2.6875, "learning_rate": 4.009605125686975e-05, "loss": 0.4481, "step": 6736 }, { "epoch": 0.2958604564365457, "grad_norm": 2.421875, "learning_rate": 4.009053608729237e-05, "loss": 0.4412, "step": 6738 }, { "epoch": 0.29594827491574915, "grad_norm": 2.359375, "learning_rate": 4.0085019762078116e-05, "loss": 0.4396, "step": 6740 }, { "epoch": 0.2960360933949526, "grad_norm": 2.78125, "learning_rate": 4.007950228164943e-05, "loss": 0.4157, "step": 6742 }, { "epoch": 0.2961239118741561, "grad_norm": 2.328125, "learning_rate": 4.007398364642885e-05, "loss": 0.4439, "step": 6744 }, { "epoch": 0.2962117303533596, "grad_norm": 2.46875, "learning_rate": 4.006846385683899e-05, "loss": 0.4452, "step": 6746 }, { "epoch": 0.2962995488325631, "grad_norm": 2.875, "learning_rate": 4.006294291330255e-05, "loss": 0.4207, "step": 6748 }, { "epoch": 0.29638736731176657, "grad_norm": 2.578125, "learning_rate": 4.005742081624233e-05, "loss": 0.4459, "step": 6750 }, { "epoch": 0.29647518579097004, "grad_norm": 2.53125, "learning_rate": 4.005189756608122e-05, "loss": 0.433, "step": 6752 }, { "epoch": 0.29656300427017357, "grad_norm": 2.359375, "learning_rate": 4.004637316324218e-05, "loss": 0.4451, "step": 6754 }, { "epoch": 0.29665082274937704, "grad_norm": 2.265625, "learning_rate": 4.004084760814828e-05, "loss": 0.4489, "step": 6756 }, { "epoch": 0.2967386412285805, "grad_norm": 2.390625, "learning_rate": 4.003532090122266e-05, "loss": 0.4177, "step": 6758 }, { "epoch": 0.296826459707784, "grad_norm": 2.25, "learning_rate": 4.002979304288857e-05, "loss": 0.4251, "step": 6760 }, { "epoch": 0.2969142781869875, "grad_norm": 2.375, "learning_rate": 4.002426403356932e-05, "loss": 0.4323, "step": 6762 }, { "epoch": 0.297002096666191, "grad_norm": 2.125, "learning_rate": 4.001873387368833e-05, "loss": 0.4477, "step": 6764 }, { "epoch": 0.29708991514539446, "grad_norm": 2.296875, "learning_rate": 4.0013202563669104e-05, "loss": 0.4452, "step": 6766 }, { "epoch": 0.29717773362459793, "grad_norm": 2.671875, "learning_rate": 4.000767010393522e-05, "loss": 0.4423, "step": 6768 }, { "epoch": 0.29726555210380146, "grad_norm": 2.265625, "learning_rate": 4.0002136494910366e-05, "loss": 0.4301, "step": 6770 }, { "epoch": 0.29735337058300493, "grad_norm": 2.5, "learning_rate": 3.9996601737018294e-05, "loss": 0.4507, "step": 6772 }, { "epoch": 0.2974411890622084, "grad_norm": 2.234375, "learning_rate": 3.9991065830682875e-05, "loss": 0.4243, "step": 6774 }, { "epoch": 0.2975290075414119, "grad_norm": 2.203125, "learning_rate": 3.998552877632804e-05, "loss": 0.4369, "step": 6776 }, { "epoch": 0.2976168260206154, "grad_norm": 2.0, "learning_rate": 3.9979990574377825e-05, "loss": 0.4455, "step": 6778 }, { "epoch": 0.29770464449981887, "grad_norm": 2.265625, "learning_rate": 3.997445122525633e-05, "loss": 0.413, "step": 6780 }, { "epoch": 0.29779246297902234, "grad_norm": 2.046875, "learning_rate": 3.996891072938778e-05, "loss": 0.4478, "step": 6782 }, { "epoch": 0.29788028145822587, "grad_norm": 2.359375, "learning_rate": 3.9963369087196444e-05, "loss": 0.4367, "step": 6784 }, { "epoch": 0.29796809993742934, "grad_norm": 2.328125, "learning_rate": 3.9957826299106724e-05, "loss": 0.4002, "step": 6786 }, { "epoch": 0.2980559184166328, "grad_norm": 2.28125, "learning_rate": 3.9952282365543083e-05, "loss": 0.4108, "step": 6788 }, { "epoch": 0.2981437368958363, "grad_norm": 2.15625, "learning_rate": 3.9946737286930076e-05, "loss": 0.4468, "step": 6790 }, { "epoch": 0.2982315553750398, "grad_norm": 2.125, "learning_rate": 3.994119106369234e-05, "loss": 0.4116, "step": 6792 }, { "epoch": 0.2983193738542433, "grad_norm": 2.203125, "learning_rate": 3.993564369625461e-05, "loss": 0.4294, "step": 6794 }, { "epoch": 0.29840719233344676, "grad_norm": 2.296875, "learning_rate": 3.993009518504171e-05, "loss": 0.4148, "step": 6796 }, { "epoch": 0.29849501081265023, "grad_norm": 2.546875, "learning_rate": 3.9924545530478544e-05, "loss": 0.4224, "step": 6798 }, { "epoch": 0.29858282929185376, "grad_norm": 2.0, "learning_rate": 3.99189947329901e-05, "loss": 0.3931, "step": 6800 }, { "epoch": 0.29867064777105723, "grad_norm": 2.1875, "learning_rate": 3.991344279300145e-05, "loss": 0.445, "step": 6802 }, { "epoch": 0.2987584662502607, "grad_norm": 2.390625, "learning_rate": 3.990788971093779e-05, "loss": 0.4219, "step": 6804 }, { "epoch": 0.2988462847294642, "grad_norm": 2.28125, "learning_rate": 3.9902335487224364e-05, "loss": 0.4343, "step": 6806 }, { "epoch": 0.2989341032086677, "grad_norm": 2.15625, "learning_rate": 3.989678012228651e-05, "loss": 0.4439, "step": 6808 }, { "epoch": 0.2990219216878712, "grad_norm": 2.421875, "learning_rate": 3.989122361654967e-05, "loss": 0.452, "step": 6810 }, { "epoch": 0.29910974016707464, "grad_norm": 2.03125, "learning_rate": 3.988566597043935e-05, "loss": 0.4407, "step": 6812 }, { "epoch": 0.2991975586462781, "grad_norm": 2.484375, "learning_rate": 3.988010718438115e-05, "loss": 0.4095, "step": 6814 }, { "epoch": 0.29928537712548164, "grad_norm": 2.6875, "learning_rate": 3.987454725880079e-05, "loss": 0.4345, "step": 6816 }, { "epoch": 0.2993731956046851, "grad_norm": 2.0, "learning_rate": 3.986898619412402e-05, "loss": 0.4371, "step": 6818 }, { "epoch": 0.2994610140838886, "grad_norm": 2.203125, "learning_rate": 3.986342399077674e-05, "loss": 0.4375, "step": 6820 }, { "epoch": 0.2995488325630921, "grad_norm": 1.9765625, "learning_rate": 3.985786064918489e-05, "loss": 0.4311, "step": 6822 }, { "epoch": 0.2996366510422956, "grad_norm": 2.359375, "learning_rate": 3.9852296169774493e-05, "loss": 0.4073, "step": 6824 }, { "epoch": 0.29972446952149906, "grad_norm": 2.171875, "learning_rate": 3.9846730552971705e-05, "loss": 0.4334, "step": 6826 }, { "epoch": 0.29981228800070253, "grad_norm": 2.328125, "learning_rate": 3.984116379920273e-05, "loss": 0.4197, "step": 6828 }, { "epoch": 0.29990010647990606, "grad_norm": 2.375, "learning_rate": 3.983559590889387e-05, "loss": 0.453, "step": 6830 }, { "epoch": 0.29998792495910953, "grad_norm": 2.6875, "learning_rate": 3.9830026882471526e-05, "loss": 0.4483, "step": 6832 }, { "epoch": 0.300075743438313, "grad_norm": 2.828125, "learning_rate": 3.982445672036216e-05, "loss": 0.4343, "step": 6834 }, { "epoch": 0.3001635619175165, "grad_norm": 2.5625, "learning_rate": 3.9818885422992355e-05, "loss": 0.4299, "step": 6836 }, { "epoch": 0.30025138039672, "grad_norm": 2.34375, "learning_rate": 3.9813312990788757e-05, "loss": 0.4304, "step": 6838 }, { "epoch": 0.3003391988759235, "grad_norm": 2.5, "learning_rate": 3.980773942417808e-05, "loss": 0.4219, "step": 6840 }, { "epoch": 0.30042701735512695, "grad_norm": 2.265625, "learning_rate": 3.980216472358718e-05, "loss": 0.4112, "step": 6842 }, { "epoch": 0.3005148358343304, "grad_norm": 1.921875, "learning_rate": 3.979658888944296e-05, "loss": 0.4221, "step": 6844 }, { "epoch": 0.30060265431353395, "grad_norm": 2.0, "learning_rate": 3.97910119221724e-05, "loss": 0.4513, "step": 6846 }, { "epoch": 0.3006904727927374, "grad_norm": 2.34375, "learning_rate": 3.9785433822202614e-05, "loss": 0.4373, "step": 6848 }, { "epoch": 0.3007782912719409, "grad_norm": 2.25, "learning_rate": 3.977985458996076e-05, "loss": 0.43, "step": 6850 }, { "epoch": 0.30086610975114436, "grad_norm": 1.9765625, "learning_rate": 3.97742742258741e-05, "loss": 0.3945, "step": 6852 }, { "epoch": 0.3009539282303479, "grad_norm": 2.203125, "learning_rate": 3.976869273036997e-05, "loss": 0.4025, "step": 6854 }, { "epoch": 0.30104174670955136, "grad_norm": 2.1875, "learning_rate": 3.9763110103875824e-05, "loss": 0.4207, "step": 6856 }, { "epoch": 0.30112956518875483, "grad_norm": 2.21875, "learning_rate": 3.975752634681915e-05, "loss": 0.4163, "step": 6858 }, { "epoch": 0.3012173836679583, "grad_norm": 2.1875, "learning_rate": 3.975194145962758e-05, "loss": 0.4299, "step": 6860 }, { "epoch": 0.30130520214716183, "grad_norm": 1.8984375, "learning_rate": 3.97463554427288e-05, "loss": 0.434, "step": 6862 }, { "epoch": 0.3013930206263653, "grad_norm": 2.40625, "learning_rate": 3.974076829655058e-05, "loss": 0.4472, "step": 6864 }, { "epoch": 0.3014808391055688, "grad_norm": 2.28125, "learning_rate": 3.97351800215208e-05, "loss": 0.417, "step": 6866 }, { "epoch": 0.30156865758477225, "grad_norm": 2.21875, "learning_rate": 3.972959061806739e-05, "loss": 0.425, "step": 6868 }, { "epoch": 0.3016564760639758, "grad_norm": 2.125, "learning_rate": 3.972400008661841e-05, "loss": 0.4394, "step": 6870 }, { "epoch": 0.30174429454317925, "grad_norm": 2.1875, "learning_rate": 3.971840842760196e-05, "loss": 0.4763, "step": 6872 }, { "epoch": 0.3018321130223827, "grad_norm": 2.3125, "learning_rate": 3.971281564144628e-05, "loss": 0.4281, "step": 6874 }, { "epoch": 0.30191993150158625, "grad_norm": 1.953125, "learning_rate": 3.9707221728579634e-05, "loss": 0.459, "step": 6876 }, { "epoch": 0.3020077499807897, "grad_norm": 2.5625, "learning_rate": 3.970162668943044e-05, "loss": 0.4476, "step": 6878 }, { "epoch": 0.3020955684599932, "grad_norm": 2.375, "learning_rate": 3.9696030524427144e-05, "loss": 0.4341, "step": 6880 }, { "epoch": 0.30218338693919666, "grad_norm": 2.25, "learning_rate": 3.96904332339983e-05, "loss": 0.4166, "step": 6882 }, { "epoch": 0.3022712054184002, "grad_norm": 2.171875, "learning_rate": 3.968483481857256e-05, "loss": 0.4051, "step": 6884 }, { "epoch": 0.30235902389760366, "grad_norm": 2.1875, "learning_rate": 3.9679235278578654e-05, "loss": 0.4281, "step": 6886 }, { "epoch": 0.30244684237680713, "grad_norm": 2.078125, "learning_rate": 3.967363461444539e-05, "loss": 0.431, "step": 6888 }, { "epoch": 0.3025346608560106, "grad_norm": 2.03125, "learning_rate": 3.966803282660167e-05, "loss": 0.4171, "step": 6890 }, { "epoch": 0.30262247933521413, "grad_norm": 2.171875, "learning_rate": 3.9662429915476476e-05, "loss": 0.4436, "step": 6892 }, { "epoch": 0.3027102978144176, "grad_norm": 2.34375, "learning_rate": 3.9656825881498885e-05, "loss": 0.4615, "step": 6894 }, { "epoch": 0.3027981162936211, "grad_norm": 1.984375, "learning_rate": 3.965122072509806e-05, "loss": 0.446, "step": 6896 }, { "epoch": 0.30288593477282455, "grad_norm": 2.21875, "learning_rate": 3.964561444670324e-05, "loss": 0.4475, "step": 6898 }, { "epoch": 0.3029737532520281, "grad_norm": 2.34375, "learning_rate": 3.9640007046743756e-05, "loss": 0.3973, "step": 6900 }, { "epoch": 0.30306157173123155, "grad_norm": 2.203125, "learning_rate": 3.963439852564901e-05, "loss": 0.427, "step": 6902 }, { "epoch": 0.303149390210435, "grad_norm": 2.171875, "learning_rate": 3.962878888384853e-05, "loss": 0.427, "step": 6904 }, { "epoch": 0.3032372086896385, "grad_norm": 2.1875, "learning_rate": 3.9623178121771886e-05, "loss": 0.4341, "step": 6906 }, { "epoch": 0.303325027168842, "grad_norm": 2.40625, "learning_rate": 3.9617566239848755e-05, "loss": 0.4268, "step": 6908 }, { "epoch": 0.3034128456480455, "grad_norm": 2.4375, "learning_rate": 3.961195323850889e-05, "loss": 0.4002, "step": 6910 }, { "epoch": 0.30350066412724896, "grad_norm": 3.109375, "learning_rate": 3.960633911818216e-05, "loss": 0.4214, "step": 6912 }, { "epoch": 0.30358848260645244, "grad_norm": 2.65625, "learning_rate": 3.960072387929847e-05, "loss": 0.4517, "step": 6914 }, { "epoch": 0.30367630108565596, "grad_norm": 2.34375, "learning_rate": 3.959510752228784e-05, "loss": 0.4415, "step": 6916 }, { "epoch": 0.30376411956485944, "grad_norm": 2.375, "learning_rate": 3.958949004758039e-05, "loss": 0.4185, "step": 6918 }, { "epoch": 0.3038519380440629, "grad_norm": 2.90625, "learning_rate": 3.958387145560628e-05, "loss": 0.431, "step": 6920 }, { "epoch": 0.30393975652326644, "grad_norm": 2.296875, "learning_rate": 3.957825174679581e-05, "loss": 0.4448, "step": 6922 }, { "epoch": 0.3040275750024699, "grad_norm": 2.296875, "learning_rate": 3.9572630921579324e-05, "loss": 0.4327, "step": 6924 }, { "epoch": 0.3041153934816734, "grad_norm": 2.34375, "learning_rate": 3.9567008980387264e-05, "loss": 0.4188, "step": 6926 }, { "epoch": 0.30420321196087685, "grad_norm": 2.875, "learning_rate": 3.956138592365017e-05, "loss": 0.4284, "step": 6928 }, { "epoch": 0.3042910304400804, "grad_norm": 3.46875, "learning_rate": 3.9555761751798646e-05, "loss": 0.4069, "step": 6930 }, { "epoch": 0.30437884891928385, "grad_norm": 2.453125, "learning_rate": 3.95501364652634e-05, "loss": 0.4025, "step": 6932 }, { "epoch": 0.3044666673984873, "grad_norm": 2.21875, "learning_rate": 3.9544510064475214e-05, "loss": 0.4497, "step": 6934 }, { "epoch": 0.3045544858776908, "grad_norm": 1.921875, "learning_rate": 3.953888254986496e-05, "loss": 0.4263, "step": 6936 }, { "epoch": 0.3046423043568943, "grad_norm": 2.375, "learning_rate": 3.95332539218636e-05, "loss": 0.4268, "step": 6938 }, { "epoch": 0.3047301228360978, "grad_norm": 2.65625, "learning_rate": 3.952762418090217e-05, "loss": 0.4395, "step": 6940 }, { "epoch": 0.30481794131530127, "grad_norm": 2.3125, "learning_rate": 3.9521993327411797e-05, "loss": 0.4417, "step": 6942 }, { "epoch": 0.30490575979450474, "grad_norm": 2.03125, "learning_rate": 3.9516361361823696e-05, "loss": 0.4297, "step": 6944 }, { "epoch": 0.30499357827370827, "grad_norm": 2.28125, "learning_rate": 3.951072828456916e-05, "loss": 0.4091, "step": 6946 }, { "epoch": 0.30508139675291174, "grad_norm": 2.40625, "learning_rate": 3.9505094096079577e-05, "loss": 0.4255, "step": 6948 }, { "epoch": 0.3051692152321152, "grad_norm": 2.21875, "learning_rate": 3.9499458796786406e-05, "loss": 0.433, "step": 6950 }, { "epoch": 0.3052570337113187, "grad_norm": 2.359375, "learning_rate": 3.94938223871212e-05, "loss": 0.4533, "step": 6952 }, { "epoch": 0.3053448521905222, "grad_norm": 2.4375, "learning_rate": 3.948818486751561e-05, "loss": 0.4138, "step": 6954 }, { "epoch": 0.3054326706697257, "grad_norm": 2.203125, "learning_rate": 3.948254623840134e-05, "loss": 0.4251, "step": 6956 }, { "epoch": 0.30552048914892915, "grad_norm": 2.390625, "learning_rate": 3.947690650021022e-05, "loss": 0.4489, "step": 6958 }, { "epoch": 0.3056083076281326, "grad_norm": 2.28125, "learning_rate": 3.947126565337412e-05, "loss": 0.4479, "step": 6960 }, { "epoch": 0.30569612610733615, "grad_norm": 1.890625, "learning_rate": 3.946562369832503e-05, "loss": 0.4553, "step": 6962 }, { "epoch": 0.3057839445865396, "grad_norm": 1.96875, "learning_rate": 3.945998063549501e-05, "loss": 0.4209, "step": 6964 }, { "epoch": 0.3058717630657431, "grad_norm": 1.9453125, "learning_rate": 3.945433646531621e-05, "loss": 0.3993, "step": 6966 }, { "epoch": 0.30595958154494657, "grad_norm": 2.125, "learning_rate": 3.9448691188220854e-05, "loss": 0.4497, "step": 6968 }, { "epoch": 0.3060474000241501, "grad_norm": 2.171875, "learning_rate": 3.944304480464126e-05, "loss": 0.4177, "step": 6970 }, { "epoch": 0.30613521850335357, "grad_norm": 2.140625, "learning_rate": 3.943739731500984e-05, "loss": 0.4476, "step": 6972 }, { "epoch": 0.30622303698255704, "grad_norm": 1.96875, "learning_rate": 3.943174871975907e-05, "loss": 0.4581, "step": 6974 }, { "epoch": 0.30631085546176057, "grad_norm": 1.96875, "learning_rate": 3.942609901932153e-05, "loss": 0.448, "step": 6976 }, { "epoch": 0.30639867394096404, "grad_norm": 2.109375, "learning_rate": 3.942044821412986e-05, "loss": 0.4786, "step": 6978 }, { "epoch": 0.3064864924201675, "grad_norm": 2.3125, "learning_rate": 3.941479630461681e-05, "loss": 0.4589, "step": 6980 }, { "epoch": 0.306574310899371, "grad_norm": 2.4375, "learning_rate": 3.94091432912152e-05, "loss": 0.4237, "step": 6982 }, { "epoch": 0.3066621293785745, "grad_norm": 2.578125, "learning_rate": 3.940348917435796e-05, "loss": 0.4291, "step": 6984 }, { "epoch": 0.306749947857778, "grad_norm": 2.296875, "learning_rate": 3.939783395447805e-05, "loss": 0.4183, "step": 6986 }, { "epoch": 0.30683776633698145, "grad_norm": 2.25, "learning_rate": 3.939217763200857e-05, "loss": 0.4078, "step": 6988 }, { "epoch": 0.3069255848161849, "grad_norm": 2.328125, "learning_rate": 3.938652020738267e-05, "loss": 0.4128, "step": 6990 }, { "epoch": 0.30701340329538845, "grad_norm": 2.0, "learning_rate": 3.9380861681033606e-05, "loss": 0.4092, "step": 6992 }, { "epoch": 0.3071012217745919, "grad_norm": 2.25, "learning_rate": 3.937520205339471e-05, "loss": 0.4189, "step": 6994 }, { "epoch": 0.3071890402537954, "grad_norm": 2.265625, "learning_rate": 3.93695413248994e-05, "loss": 0.4114, "step": 6996 }, { "epoch": 0.30727685873299887, "grad_norm": 2.15625, "learning_rate": 3.9363879495981166e-05, "loss": 0.4252, "step": 6998 }, { "epoch": 0.3073646772122024, "grad_norm": 2.296875, "learning_rate": 3.935821656707359e-05, "loss": 0.4006, "step": 7000 }, { "epoch": 0.30745249569140587, "grad_norm": 2.203125, "learning_rate": 3.935255253861036e-05, "loss": 0.434, "step": 7002 }, { "epoch": 0.30754031417060934, "grad_norm": 1.9375, "learning_rate": 3.934688741102521e-05, "loss": 0.4251, "step": 7004 }, { "epoch": 0.3076281326498128, "grad_norm": 2.421875, "learning_rate": 3.934122118475197e-05, "loss": 0.4504, "step": 7006 }, { "epoch": 0.30771595112901634, "grad_norm": 2.640625, "learning_rate": 3.93355538602246e-05, "loss": 0.426, "step": 7008 }, { "epoch": 0.3078037696082198, "grad_norm": 2.75, "learning_rate": 3.932988543787707e-05, "loss": 0.398, "step": 7010 }, { "epoch": 0.3078915880874233, "grad_norm": 2.21875, "learning_rate": 3.932421591814347e-05, "loss": 0.4376, "step": 7012 }, { "epoch": 0.30797940656662676, "grad_norm": 2.625, "learning_rate": 3.9318545301457985e-05, "loss": 0.4604, "step": 7014 }, { "epoch": 0.3080672250458303, "grad_norm": 1.9765625, "learning_rate": 3.931287358825486e-05, "loss": 0.4587, "step": 7016 }, { "epoch": 0.30815504352503376, "grad_norm": 2.453125, "learning_rate": 3.930720077896846e-05, "loss": 0.4166, "step": 7018 }, { "epoch": 0.30824286200423723, "grad_norm": 2.140625, "learning_rate": 3.930152687403319e-05, "loss": 0.4667, "step": 7020 }, { "epoch": 0.30833068048344076, "grad_norm": 2.125, "learning_rate": 3.929585187388356e-05, "loss": 0.4017, "step": 7022 }, { "epoch": 0.3084184989626442, "grad_norm": 2.09375, "learning_rate": 3.929017577895416e-05, "loss": 0.426, "step": 7024 }, { "epoch": 0.3085063174418477, "grad_norm": 2.15625, "learning_rate": 3.928449858967969e-05, "loss": 0.4435, "step": 7026 }, { "epoch": 0.30859413592105117, "grad_norm": 2.1875, "learning_rate": 3.9278820306494876e-05, "loss": 0.4275, "step": 7028 }, { "epoch": 0.3086819544002547, "grad_norm": 2.109375, "learning_rate": 3.92731409298346e-05, "loss": 0.4406, "step": 7030 }, { "epoch": 0.30876977287945817, "grad_norm": 2.1875, "learning_rate": 3.9267460460133756e-05, "loss": 0.4393, "step": 7032 }, { "epoch": 0.30885759135866164, "grad_norm": 2.328125, "learning_rate": 3.9261778897827376e-05, "loss": 0.443, "step": 7034 }, { "epoch": 0.3089454098378651, "grad_norm": 2.0, "learning_rate": 3.925609624335054e-05, "loss": 0.4156, "step": 7036 }, { "epoch": 0.30903322831706864, "grad_norm": 2.203125, "learning_rate": 3.925041249713844e-05, "loss": 0.4142, "step": 7038 }, { "epoch": 0.3091210467962721, "grad_norm": 2.046875, "learning_rate": 3.9244727659626346e-05, "loss": 0.421, "step": 7040 }, { "epoch": 0.3092088652754756, "grad_norm": 2.265625, "learning_rate": 3.923904173124958e-05, "loss": 0.4122, "step": 7042 }, { "epoch": 0.30929668375467906, "grad_norm": 2.390625, "learning_rate": 3.923335471244359e-05, "loss": 0.4319, "step": 7044 }, { "epoch": 0.3093845022338826, "grad_norm": 1.9609375, "learning_rate": 3.922766660364388e-05, "loss": 0.4363, "step": 7046 }, { "epoch": 0.30947232071308606, "grad_norm": 2.375, "learning_rate": 3.9221977405286057e-05, "loss": 0.4407, "step": 7048 }, { "epoch": 0.30956013919228953, "grad_norm": 2.078125, "learning_rate": 3.9216287117805787e-05, "loss": 0.4583, "step": 7050 }, { "epoch": 0.309647957671493, "grad_norm": 2.296875, "learning_rate": 3.9210595741638853e-05, "loss": 0.4364, "step": 7052 }, { "epoch": 0.30973577615069653, "grad_norm": 1.9140625, "learning_rate": 3.9204903277221086e-05, "loss": 0.4248, "step": 7054 }, { "epoch": 0.3098235946299, "grad_norm": 1.953125, "learning_rate": 3.9199209724988415e-05, "loss": 0.4312, "step": 7056 }, { "epoch": 0.3099114131091035, "grad_norm": 2.21875, "learning_rate": 3.9193515085376867e-05, "loss": 0.43, "step": 7058 }, { "epoch": 0.30999923158830694, "grad_norm": 2.21875, "learning_rate": 3.918781935882253e-05, "loss": 0.4222, "step": 7060 }, { "epoch": 0.3100870500675105, "grad_norm": 2.515625, "learning_rate": 3.918212254576158e-05, "loss": 0.4161, "step": 7062 }, { "epoch": 0.31017486854671394, "grad_norm": 2.03125, "learning_rate": 3.91764246466303e-05, "loss": 0.4363, "step": 7064 }, { "epoch": 0.3102626870259174, "grad_norm": 2.421875, "learning_rate": 3.9170725661865e-05, "loss": 0.3983, "step": 7066 }, { "epoch": 0.3103505055051209, "grad_norm": 2.171875, "learning_rate": 3.916502559190215e-05, "loss": 0.4453, "step": 7068 }, { "epoch": 0.3104383239843244, "grad_norm": 2.15625, "learning_rate": 3.9159324437178236e-05, "loss": 0.4246, "step": 7070 }, { "epoch": 0.3105261424635279, "grad_norm": 2.0625, "learning_rate": 3.915362219812986e-05, "loss": 0.4217, "step": 7072 }, { "epoch": 0.31061396094273136, "grad_norm": 2.296875, "learning_rate": 3.914791887519371e-05, "loss": 0.4183, "step": 7074 }, { "epoch": 0.3107017794219349, "grad_norm": 2.25, "learning_rate": 3.914221446880654e-05, "loss": 0.4117, "step": 7076 }, { "epoch": 0.31078959790113836, "grad_norm": 2.328125, "learning_rate": 3.91365089794052e-05, "loss": 0.4156, "step": 7078 }, { "epoch": 0.31087741638034183, "grad_norm": 2.375, "learning_rate": 3.91308024074266e-05, "loss": 0.4281, "step": 7080 }, { "epoch": 0.3109652348595453, "grad_norm": 2.1875, "learning_rate": 3.9125094753307775e-05, "loss": 0.428, "step": 7082 }, { "epoch": 0.31105305333874883, "grad_norm": 2.296875, "learning_rate": 3.91193860174858e-05, "loss": 0.4356, "step": 7084 }, { "epoch": 0.3111408718179523, "grad_norm": 2.375, "learning_rate": 3.911367620039787e-05, "loss": 0.4351, "step": 7086 }, { "epoch": 0.3112286902971558, "grad_norm": 2.140625, "learning_rate": 3.910796530248123e-05, "loss": 0.455, "step": 7088 }, { "epoch": 0.31131650877635925, "grad_norm": 2.703125, "learning_rate": 3.910225332417322e-05, "loss": 0.3949, "step": 7090 }, { "epoch": 0.3114043272555628, "grad_norm": 2.546875, "learning_rate": 3.909654026591127e-05, "loss": 0.4086, "step": 7092 }, { "epoch": 0.31149214573476625, "grad_norm": 2.265625, "learning_rate": 3.9090826128132896e-05, "loss": 0.4271, "step": 7094 }, { "epoch": 0.3115799642139697, "grad_norm": 2.421875, "learning_rate": 3.908511091127567e-05, "loss": 0.4195, "step": 7096 }, { "epoch": 0.3116677826931732, "grad_norm": 2.328125, "learning_rate": 3.907939461577727e-05, "loss": 0.4229, "step": 7098 }, { "epoch": 0.3117556011723767, "grad_norm": 2.0625, "learning_rate": 3.9073677242075466e-05, "loss": 0.4651, "step": 7100 }, { "epoch": 0.3118434196515802, "grad_norm": 2.296875, "learning_rate": 3.906795879060809e-05, "loss": 0.4029, "step": 7102 }, { "epoch": 0.31193123813078366, "grad_norm": 2.140625, "learning_rate": 3.906223926181305e-05, "loss": 0.4254, "step": 7104 }, { "epoch": 0.31201905660998713, "grad_norm": 2.1875, "learning_rate": 3.905651865612835e-05, "loss": 0.4422, "step": 7106 }, { "epoch": 0.31210687508919066, "grad_norm": 2.03125, "learning_rate": 3.9050796973992084e-05, "loss": 0.432, "step": 7108 }, { "epoch": 0.31219469356839413, "grad_norm": 2.234375, "learning_rate": 3.9045074215842425e-05, "loss": 0.4265, "step": 7110 }, { "epoch": 0.3122825120475976, "grad_norm": 2.328125, "learning_rate": 3.903935038211761e-05, "loss": 0.4098, "step": 7112 }, { "epoch": 0.3123703305268011, "grad_norm": 1.8828125, "learning_rate": 3.903362547325598e-05, "loss": 0.407, "step": 7114 }, { "epoch": 0.3124581490060046, "grad_norm": 1.8984375, "learning_rate": 3.9027899489695954e-05, "loss": 0.4245, "step": 7116 }, { "epoch": 0.3125459674852081, "grad_norm": 2.328125, "learning_rate": 3.902217243187601e-05, "loss": 0.4124, "step": 7118 }, { "epoch": 0.31263378596441155, "grad_norm": 2.4375, "learning_rate": 3.9016444300234747e-05, "loss": 0.4477, "step": 7120 }, { "epoch": 0.312721604443615, "grad_norm": 2.015625, "learning_rate": 3.9010715095210816e-05, "loss": 0.403, "step": 7122 }, { "epoch": 0.31280942292281855, "grad_norm": 2.09375, "learning_rate": 3.900498481724296e-05, "loss": 0.4407, "step": 7124 }, { "epoch": 0.312897241402022, "grad_norm": 2.171875, "learning_rate": 3.899925346677002e-05, "loss": 0.4491, "step": 7126 }, { "epoch": 0.3129850598812255, "grad_norm": 2.015625, "learning_rate": 3.8993521044230884e-05, "loss": 0.4311, "step": 7128 }, { "epoch": 0.313072878360429, "grad_norm": 2.1875, "learning_rate": 3.8987787550064555e-05, "loss": 0.4356, "step": 7130 }, { "epoch": 0.3131606968396325, "grad_norm": 2.546875, "learning_rate": 3.8982052984710105e-05, "loss": 0.4152, "step": 7132 }, { "epoch": 0.31324851531883596, "grad_norm": 2.1875, "learning_rate": 3.8976317348606684e-05, "loss": 0.411, "step": 7134 }, { "epoch": 0.31333633379803943, "grad_norm": 2.40625, "learning_rate": 3.8970580642193534e-05, "loss": 0.4311, "step": 7136 }, { "epoch": 0.31342415227724296, "grad_norm": 2.34375, "learning_rate": 3.896484286590997e-05, "loss": 0.4259, "step": 7138 }, { "epoch": 0.31351197075644643, "grad_norm": 2.078125, "learning_rate": 3.895910402019538e-05, "loss": 0.4198, "step": 7140 }, { "epoch": 0.3135997892356499, "grad_norm": 2.0625, "learning_rate": 3.895336410548927e-05, "loss": 0.4099, "step": 7142 }, { "epoch": 0.3136876077148534, "grad_norm": 2.140625, "learning_rate": 3.894762312223118e-05, "loss": 0.4274, "step": 7144 }, { "epoch": 0.3137754261940569, "grad_norm": 1.9921875, "learning_rate": 3.894188107086078e-05, "loss": 0.436, "step": 7146 }, { "epoch": 0.3138632446732604, "grad_norm": 2.046875, "learning_rate": 3.893613795181778e-05, "loss": 0.437, "step": 7148 }, { "epoch": 0.31395106315246385, "grad_norm": 2.328125, "learning_rate": 3.8930393765542e-05, "loss": 0.4435, "step": 7150 }, { "epoch": 0.3140388816316673, "grad_norm": 2.65625, "learning_rate": 3.892464851247332e-05, "loss": 0.412, "step": 7152 }, { "epoch": 0.31412670011087085, "grad_norm": 2.453125, "learning_rate": 3.891890219305172e-05, "loss": 0.4144, "step": 7154 }, { "epoch": 0.3142145185900743, "grad_norm": 2.046875, "learning_rate": 3.8913154807717255e-05, "loss": 0.4099, "step": 7156 }, { "epoch": 0.3143023370692778, "grad_norm": 1.9609375, "learning_rate": 3.8907406356910054e-05, "loss": 0.4196, "step": 7158 }, { "epoch": 0.31439015554848126, "grad_norm": 2.078125, "learning_rate": 3.890165684107035e-05, "loss": 0.4157, "step": 7160 }, { "epoch": 0.3144779740276848, "grad_norm": 2.15625, "learning_rate": 3.8895906260638426e-05, "loss": 0.4065, "step": 7162 }, { "epoch": 0.31456579250688826, "grad_norm": 2.328125, "learning_rate": 3.8890154616054676e-05, "loss": 0.4396, "step": 7164 }, { "epoch": 0.31465361098609174, "grad_norm": 1.9921875, "learning_rate": 3.888440190775955e-05, "loss": 0.4087, "step": 7166 }, { "epoch": 0.3147414294652952, "grad_norm": 1.859375, "learning_rate": 3.8878648136193596e-05, "loss": 0.4512, "step": 7168 }, { "epoch": 0.31482924794449874, "grad_norm": 1.921875, "learning_rate": 3.887289330179744e-05, "loss": 0.4095, "step": 7170 }, { "epoch": 0.3149170664237022, "grad_norm": 1.96875, "learning_rate": 3.8867137405011786e-05, "loss": 0.4463, "step": 7172 }, { "epoch": 0.3150048849029057, "grad_norm": 2.15625, "learning_rate": 3.886138044627744e-05, "loss": 0.4024, "step": 7174 }, { "epoch": 0.3150927033821092, "grad_norm": 2.1875, "learning_rate": 3.885562242603525e-05, "loss": 0.4472, "step": 7176 }, { "epoch": 0.3151805218613127, "grad_norm": 1.9765625, "learning_rate": 3.8849863344726175e-05, "loss": 0.4177, "step": 7178 }, { "epoch": 0.31526834034051615, "grad_norm": 2.0625, "learning_rate": 3.884410320279124e-05, "loss": 0.4592, "step": 7180 }, { "epoch": 0.3153561588197196, "grad_norm": 2.140625, "learning_rate": 3.883834200067157e-05, "loss": 0.4344, "step": 7182 }, { "epoch": 0.31544397729892315, "grad_norm": 2.0625, "learning_rate": 3.883257973880834e-05, "loss": 0.4343, "step": 7184 }, { "epoch": 0.3155317957781266, "grad_norm": 1.9453125, "learning_rate": 3.8826816417642845e-05, "loss": 0.4252, "step": 7186 }, { "epoch": 0.3156196142573301, "grad_norm": 2.03125, "learning_rate": 3.882105203761644e-05, "loss": 0.4246, "step": 7188 }, { "epoch": 0.31570743273653357, "grad_norm": 2.09375, "learning_rate": 3.8815286599170544e-05, "loss": 0.4142, "step": 7190 }, { "epoch": 0.3157952512157371, "grad_norm": 2.28125, "learning_rate": 3.88095201027467e-05, "loss": 0.4259, "step": 7192 }, { "epoch": 0.31588306969494057, "grad_norm": 2.046875, "learning_rate": 3.880375254878649e-05, "loss": 0.4249, "step": 7194 }, { "epoch": 0.31597088817414404, "grad_norm": 2.5, "learning_rate": 3.87979839377316e-05, "loss": 0.4373, "step": 7196 }, { "epoch": 0.3160587066533475, "grad_norm": 2.03125, "learning_rate": 3.87922142700238e-05, "loss": 0.4233, "step": 7198 }, { "epoch": 0.31614652513255104, "grad_norm": 2.359375, "learning_rate": 3.878644354610492e-05, "loss": 0.4137, "step": 7200 }, { "epoch": 0.3162343436117545, "grad_norm": 2.140625, "learning_rate": 3.8780671766416885e-05, "loss": 0.4349, "step": 7202 }, { "epoch": 0.316322162090958, "grad_norm": 2.765625, "learning_rate": 3.8774898931401706e-05, "loss": 0.425, "step": 7204 }, { "epoch": 0.31640998057016145, "grad_norm": 2.328125, "learning_rate": 3.8769125041501466e-05, "loss": 0.466, "step": 7206 }, { "epoch": 0.316497799049365, "grad_norm": 1.9453125, "learning_rate": 3.876335009715833e-05, "loss": 0.4166, "step": 7208 }, { "epoch": 0.31658561752856845, "grad_norm": 2.140625, "learning_rate": 3.8757574098814544e-05, "loss": 0.4177, "step": 7210 }, { "epoch": 0.3166734360077719, "grad_norm": 2.65625, "learning_rate": 3.875179704691243e-05, "loss": 0.4321, "step": 7212 }, { "epoch": 0.3167612544869754, "grad_norm": 2.46875, "learning_rate": 3.874601894189441e-05, "loss": 0.4159, "step": 7214 }, { "epoch": 0.3168490729661789, "grad_norm": 2.625, "learning_rate": 3.8740239784202956e-05, "loss": 0.4265, "step": 7216 }, { "epoch": 0.3169368914453824, "grad_norm": 2.75, "learning_rate": 3.873445957428065e-05, "loss": 0.4415, "step": 7218 }, { "epoch": 0.31702470992458587, "grad_norm": 2.140625, "learning_rate": 3.872867831257014e-05, "loss": 0.4174, "step": 7220 }, { "epoch": 0.31711252840378934, "grad_norm": 2.375, "learning_rate": 3.872289599951415e-05, "loss": 0.4364, "step": 7222 }, { "epoch": 0.31720034688299287, "grad_norm": 2.328125, "learning_rate": 3.8717112635555494e-05, "loss": 0.4321, "step": 7224 }, { "epoch": 0.31728816536219634, "grad_norm": 2.109375, "learning_rate": 3.8711328221137066e-05, "loss": 0.4507, "step": 7226 }, { "epoch": 0.3173759838413998, "grad_norm": 2.078125, "learning_rate": 3.870554275670184e-05, "loss": 0.4347, "step": 7228 }, { "epoch": 0.31746380232060334, "grad_norm": 1.9765625, "learning_rate": 3.8699756242692854e-05, "loss": 0.3905, "step": 7230 }, { "epoch": 0.3175516207998068, "grad_norm": 2.046875, "learning_rate": 3.869396867955326e-05, "loss": 0.4089, "step": 7232 }, { "epoch": 0.3176394392790103, "grad_norm": 2.109375, "learning_rate": 3.868818006772626e-05, "loss": 0.4114, "step": 7234 }, { "epoch": 0.31772725775821375, "grad_norm": 2.203125, "learning_rate": 3.8682390407655145e-05, "loss": 0.39, "step": 7236 }, { "epoch": 0.3178150762374173, "grad_norm": 2.078125, "learning_rate": 3.86765996997833e-05, "loss": 0.4383, "step": 7238 }, { "epoch": 0.31790289471662075, "grad_norm": 2.046875, "learning_rate": 3.867080794455416e-05, "loss": 0.415, "step": 7240 }, { "epoch": 0.3179907131958242, "grad_norm": 2.109375, "learning_rate": 3.866501514241129e-05, "loss": 0.4171, "step": 7242 }, { "epoch": 0.3180785316750277, "grad_norm": 2.15625, "learning_rate": 3.8659221293798265e-05, "loss": 0.4294, "step": 7244 }, { "epoch": 0.3181663501542312, "grad_norm": 1.984375, "learning_rate": 3.865342639915881e-05, "loss": 0.4058, "step": 7246 }, { "epoch": 0.3182541686334347, "grad_norm": 2.078125, "learning_rate": 3.864763045893668e-05, "loss": 0.4064, "step": 7248 }, { "epoch": 0.31834198711263817, "grad_norm": 2.15625, "learning_rate": 3.8641833473575745e-05, "loss": 0.4323, "step": 7250 }, { "epoch": 0.31842980559184164, "grad_norm": 2.359375, "learning_rate": 3.863603544351993e-05, "loss": 0.4051, "step": 7252 }, { "epoch": 0.31851762407104517, "grad_norm": 2.484375, "learning_rate": 3.863023636921326e-05, "loss": 0.4387, "step": 7254 }, { "epoch": 0.31860544255024864, "grad_norm": 2.421875, "learning_rate": 3.862443625109981e-05, "loss": 0.4106, "step": 7256 }, { "epoch": 0.3186932610294521, "grad_norm": 2.578125, "learning_rate": 3.861863508962377e-05, "loss": 0.4498, "step": 7258 }, { "epoch": 0.3187810795086556, "grad_norm": 2.8125, "learning_rate": 3.861283288522939e-05, "loss": 0.4025, "step": 7260 }, { "epoch": 0.3188688979878591, "grad_norm": 2.390625, "learning_rate": 3.8607029638361005e-05, "loss": 0.4204, "step": 7262 }, { "epoch": 0.3189567164670626, "grad_norm": 2.34375, "learning_rate": 3.860122534946302e-05, "loss": 0.4304, "step": 7264 }, { "epoch": 0.31904453494626606, "grad_norm": 1.96875, "learning_rate": 3.859542001897994e-05, "loss": 0.4281, "step": 7266 }, { "epoch": 0.31913235342546953, "grad_norm": 1.953125, "learning_rate": 3.858961364735635e-05, "loss": 0.4151, "step": 7268 }, { "epoch": 0.31922017190467306, "grad_norm": 2.0625, "learning_rate": 3.858380623503688e-05, "loss": 0.421, "step": 7270 }, { "epoch": 0.3193079903838765, "grad_norm": 1.890625, "learning_rate": 3.857799778246627e-05, "loss": 0.429, "step": 7272 }, { "epoch": 0.31939580886308, "grad_norm": 2.015625, "learning_rate": 3.857218829008934e-05, "loss": 0.442, "step": 7274 }, { "epoch": 0.3194836273422835, "grad_norm": 2.0625, "learning_rate": 3.856637775835097e-05, "loss": 0.4132, "step": 7276 }, { "epoch": 0.319571445821487, "grad_norm": 2.203125, "learning_rate": 3.856056618769614e-05, "loss": 0.4476, "step": 7278 }, { "epoch": 0.31965926430069047, "grad_norm": 1.9765625, "learning_rate": 3.855475357856991e-05, "loss": 0.4238, "step": 7280 }, { "epoch": 0.31974708277989394, "grad_norm": 2.046875, "learning_rate": 3.854893993141739e-05, "loss": 0.3769, "step": 7282 }, { "epoch": 0.31983490125909747, "grad_norm": 2.1875, "learning_rate": 3.854312524668381e-05, "loss": 0.4558, "step": 7284 }, { "epoch": 0.31992271973830094, "grad_norm": 2.359375, "learning_rate": 3.853730952481446e-05, "loss": 0.4316, "step": 7286 }, { "epoch": 0.3200105382175044, "grad_norm": 2.125, "learning_rate": 3.853149276625468e-05, "loss": 0.4224, "step": 7288 }, { "epoch": 0.3200983566967079, "grad_norm": 2.296875, "learning_rate": 3.8525674971449956e-05, "loss": 0.4253, "step": 7290 }, { "epoch": 0.3201861751759114, "grad_norm": 2.21875, "learning_rate": 3.85198561408458e-05, "loss": 0.435, "step": 7292 }, { "epoch": 0.3202739936551149, "grad_norm": 2.296875, "learning_rate": 3.851403627488781e-05, "loss": 0.4294, "step": 7294 }, { "epoch": 0.32036181213431836, "grad_norm": 2.640625, "learning_rate": 3.8508215374021695e-05, "loss": 0.4705, "step": 7296 }, { "epoch": 0.32044963061352183, "grad_norm": 2.140625, "learning_rate": 3.8502393438693203e-05, "loss": 0.4311, "step": 7298 }, { "epoch": 0.32053744909272536, "grad_norm": 2.109375, "learning_rate": 3.8496570469348184e-05, "loss": 0.436, "step": 7300 }, { "epoch": 0.32062526757192883, "grad_norm": 2.0625, "learning_rate": 3.8490746466432556e-05, "loss": 0.4277, "step": 7302 }, { "epoch": 0.3207130860511323, "grad_norm": 2.25, "learning_rate": 3.848492143039234e-05, "loss": 0.4082, "step": 7304 }, { "epoch": 0.3208009045303358, "grad_norm": 2.0625, "learning_rate": 3.8479095361673604e-05, "loss": 0.4182, "step": 7306 }, { "epoch": 0.3208887230095393, "grad_norm": 1.9609375, "learning_rate": 3.847326826072252e-05, "loss": 0.4079, "step": 7308 }, { "epoch": 0.3209765414887428, "grad_norm": 2.046875, "learning_rate": 3.846744012798531e-05, "loss": 0.4158, "step": 7310 }, { "epoch": 0.32106435996794624, "grad_norm": 2.15625, "learning_rate": 3.8461610963908314e-05, "loss": 0.4115, "step": 7312 }, { "epoch": 0.3211521784471497, "grad_norm": 1.875, "learning_rate": 3.845578076893793e-05, "loss": 0.4487, "step": 7314 }, { "epoch": 0.32123999692635324, "grad_norm": 2.171875, "learning_rate": 3.8449949543520625e-05, "loss": 0.4127, "step": 7316 }, { "epoch": 0.3213278154055567, "grad_norm": 2.125, "learning_rate": 3.8444117288102956e-05, "loss": 0.434, "step": 7318 }, { "epoch": 0.3214156338847602, "grad_norm": 1.9765625, "learning_rate": 3.8438284003131566e-05, "loss": 0.4443, "step": 7320 }, { "epoch": 0.32150345236396366, "grad_norm": 2.125, "learning_rate": 3.843244968905316e-05, "loss": 0.4392, "step": 7322 }, { "epoch": 0.3215912708431672, "grad_norm": 2.078125, "learning_rate": 3.842661434631454e-05, "loss": 0.434, "step": 7324 }, { "epoch": 0.32167908932237066, "grad_norm": 2.109375, "learning_rate": 3.842077797536258e-05, "loss": 0.4356, "step": 7326 }, { "epoch": 0.32176690780157413, "grad_norm": 2.5, "learning_rate": 3.8414940576644215e-05, "loss": 0.4315, "step": 7328 }, { "epoch": 0.32185472628077766, "grad_norm": 2.3125, "learning_rate": 3.840910215060649e-05, "loss": 0.4577, "step": 7330 }, { "epoch": 0.32194254475998113, "grad_norm": 3.03125, "learning_rate": 3.8403262697696514e-05, "loss": 0.4243, "step": 7332 }, { "epoch": 0.3220303632391846, "grad_norm": 2.71875, "learning_rate": 3.8397422218361454e-05, "loss": 0.4241, "step": 7334 }, { "epoch": 0.3221181817183881, "grad_norm": 3.078125, "learning_rate": 3.8391580713048604e-05, "loss": 0.3999, "step": 7336 }, { "epoch": 0.3222060001975916, "grad_norm": 3.734375, "learning_rate": 3.8385738182205276e-05, "loss": 0.4357, "step": 7338 }, { "epoch": 0.3222938186767951, "grad_norm": 2.34375, "learning_rate": 3.837989462627893e-05, "loss": 0.4123, "step": 7340 }, { "epoch": 0.32238163715599855, "grad_norm": 2.09375, "learning_rate": 3.837405004571703e-05, "loss": 0.4154, "step": 7342 }, { "epoch": 0.322469455635202, "grad_norm": 2.234375, "learning_rate": 3.836820444096718e-05, "loss": 0.3876, "step": 7344 }, { "epoch": 0.32255727411440555, "grad_norm": 2.671875, "learning_rate": 3.8362357812477025e-05, "loss": 0.4418, "step": 7346 }, { "epoch": 0.322645092593609, "grad_norm": 2.671875, "learning_rate": 3.8356510160694305e-05, "loss": 0.4353, "step": 7348 }, { "epoch": 0.3227329110728125, "grad_norm": 2.65625, "learning_rate": 3.835066148606683e-05, "loss": 0.4217, "step": 7350 }, { "epoch": 0.32282072955201596, "grad_norm": 2.640625, "learning_rate": 3.834481178904251e-05, "loss": 0.4379, "step": 7352 }, { "epoch": 0.3229085480312195, "grad_norm": 2.1875, "learning_rate": 3.83389610700693e-05, "loss": 0.4549, "step": 7354 }, { "epoch": 0.32299636651042296, "grad_norm": 2.046875, "learning_rate": 3.833310932959525e-05, "loss": 0.4456, "step": 7356 }, { "epoch": 0.32308418498962643, "grad_norm": 2.46875, "learning_rate": 3.8327256568068495e-05, "loss": 0.4234, "step": 7358 }, { "epoch": 0.3231720034688299, "grad_norm": 2.453125, "learning_rate": 3.832140278593724e-05, "loss": 0.42, "step": 7360 }, { "epoch": 0.32325982194803343, "grad_norm": 2.796875, "learning_rate": 3.8315547983649764e-05, "loss": 0.4267, "step": 7362 }, { "epoch": 0.3233476404272369, "grad_norm": 2.359375, "learning_rate": 3.830969216165443e-05, "loss": 0.4332, "step": 7364 }, { "epoch": 0.3234354589064404, "grad_norm": 2.15625, "learning_rate": 3.8303835320399685e-05, "loss": 0.4232, "step": 7366 }, { "epoch": 0.32352327738564385, "grad_norm": 2.28125, "learning_rate": 3.829797746033404e-05, "loss": 0.4553, "step": 7368 }, { "epoch": 0.3236110958648474, "grad_norm": 2.4375, "learning_rate": 3.829211858190608e-05, "loss": 0.4008, "step": 7370 }, { "epoch": 0.32369891434405085, "grad_norm": 2.21875, "learning_rate": 3.8286258685564505e-05, "loss": 0.4607, "step": 7372 }, { "epoch": 0.3237867328232543, "grad_norm": 2.515625, "learning_rate": 3.828039777175805e-05, "loss": 0.4038, "step": 7374 }, { "epoch": 0.32387455130245785, "grad_norm": 2.671875, "learning_rate": 3.8274535840935553e-05, "loss": 0.4249, "step": 7376 }, { "epoch": 0.3239623697816613, "grad_norm": 2.140625, "learning_rate": 3.8268672893545924e-05, "loss": 0.4222, "step": 7378 }, { "epoch": 0.3240501882608648, "grad_norm": 2.140625, "learning_rate": 3.826280893003814e-05, "loss": 0.4074, "step": 7380 }, { "epoch": 0.32413800674006826, "grad_norm": 2.03125, "learning_rate": 3.8256943950861264e-05, "loss": 0.4298, "step": 7382 }, { "epoch": 0.3242258252192718, "grad_norm": 2.203125, "learning_rate": 3.825107795646444e-05, "loss": 0.4323, "step": 7384 }, { "epoch": 0.32431364369847526, "grad_norm": 2.265625, "learning_rate": 3.82452109472969e-05, "loss": 0.4272, "step": 7386 }, { "epoch": 0.32440146217767873, "grad_norm": 2.125, "learning_rate": 3.823934292380793e-05, "loss": 0.4031, "step": 7388 }, { "epoch": 0.3244892806568822, "grad_norm": 2.203125, "learning_rate": 3.82334738864469e-05, "loss": 0.4268, "step": 7390 }, { "epoch": 0.32457709913608573, "grad_norm": 2.203125, "learning_rate": 3.822760383566327e-05, "loss": 0.413, "step": 7392 }, { "epoch": 0.3246649176152892, "grad_norm": 2.0625, "learning_rate": 3.822173277190657e-05, "loss": 0.4249, "step": 7394 }, { "epoch": 0.3247527360944927, "grad_norm": 2.109375, "learning_rate": 3.8215860695626396e-05, "loss": 0.4091, "step": 7396 }, { "epoch": 0.32484055457369615, "grad_norm": 1.9375, "learning_rate": 3.8209987607272444e-05, "loss": 0.4415, "step": 7398 }, { "epoch": 0.3249283730528997, "grad_norm": 2.34375, "learning_rate": 3.820411350729448e-05, "loss": 0.4045, "step": 7400 }, { "epoch": 0.32501619153210315, "grad_norm": 2.046875, "learning_rate": 3.819823839614234e-05, "loss": 0.4163, "step": 7402 }, { "epoch": 0.3251040100113066, "grad_norm": 2.375, "learning_rate": 3.8192362274265934e-05, "loss": 0.3869, "step": 7404 }, { "epoch": 0.3251918284905101, "grad_norm": 2.1875, "learning_rate": 3.8186485142115266e-05, "loss": 0.4192, "step": 7406 }, { "epoch": 0.3252796469697136, "grad_norm": 1.9609375, "learning_rate": 3.81806070001404e-05, "loss": 0.4053, "step": 7408 }, { "epoch": 0.3253674654489171, "grad_norm": 1.96875, "learning_rate": 3.817472784879149e-05, "loss": 0.4142, "step": 7410 }, { "epoch": 0.32545528392812056, "grad_norm": 1.984375, "learning_rate": 3.816884768851877e-05, "loss": 0.4375, "step": 7412 }, { "epoch": 0.32554310240732404, "grad_norm": 1.921875, "learning_rate": 3.816296651977254e-05, "loss": 0.4372, "step": 7414 }, { "epoch": 0.32563092088652756, "grad_norm": 2.171875, "learning_rate": 3.815708434300317e-05, "loss": 0.4311, "step": 7416 }, { "epoch": 0.32571873936573104, "grad_norm": 2.375, "learning_rate": 3.815120115866113e-05, "loss": 0.4264, "step": 7418 }, { "epoch": 0.3258065578449345, "grad_norm": 2.171875, "learning_rate": 3.814531696719695e-05, "loss": 0.4248, "step": 7420 }, { "epoch": 0.325894376324138, "grad_norm": 2.359375, "learning_rate": 3.813943176906125e-05, "loss": 0.4473, "step": 7422 }, { "epoch": 0.3259821948033415, "grad_norm": 2.3125, "learning_rate": 3.813354556470471e-05, "loss": 0.4678, "step": 7424 }, { "epoch": 0.326070013282545, "grad_norm": 2.46875, "learning_rate": 3.812765835457811e-05, "loss": 0.432, "step": 7426 }, { "epoch": 0.32615783176174845, "grad_norm": 2.0625, "learning_rate": 3.812177013913228e-05, "loss": 0.4186, "step": 7428 }, { "epoch": 0.326245650240952, "grad_norm": 2.03125, "learning_rate": 3.8115880918818147e-05, "loss": 0.4128, "step": 7430 }, { "epoch": 0.32633346872015545, "grad_norm": 2.109375, "learning_rate": 3.810999069408671e-05, "loss": 0.4101, "step": 7432 }, { "epoch": 0.3264212871993589, "grad_norm": 2.03125, "learning_rate": 3.810409946538904e-05, "loss": 0.3973, "step": 7434 }, { "epoch": 0.3265091056785624, "grad_norm": 1.921875, "learning_rate": 3.80982072331763e-05, "loss": 0.417, "step": 7436 }, { "epoch": 0.3265969241577659, "grad_norm": 2.015625, "learning_rate": 3.809231399789971e-05, "loss": 0.4263, "step": 7438 }, { "epoch": 0.3266847426369694, "grad_norm": 2.203125, "learning_rate": 3.808641976001057e-05, "loss": 0.4417, "step": 7440 }, { "epoch": 0.32677256111617287, "grad_norm": 2.234375, "learning_rate": 3.808052451996027e-05, "loss": 0.4295, "step": 7442 }, { "epoch": 0.32686037959537634, "grad_norm": 2.28125, "learning_rate": 3.8074628278200266e-05, "loss": 0.405, "step": 7444 }, { "epoch": 0.32694819807457987, "grad_norm": 2.265625, "learning_rate": 3.806873103518209e-05, "loss": 0.4161, "step": 7446 }, { "epoch": 0.32703601655378334, "grad_norm": 2.125, "learning_rate": 3.806283279135736e-05, "loss": 0.4151, "step": 7448 }, { "epoch": 0.3271238350329868, "grad_norm": 2.125, "learning_rate": 3.805693354717777e-05, "loss": 0.4141, "step": 7450 }, { "epoch": 0.3272116535121903, "grad_norm": 2.140625, "learning_rate": 3.805103330309508e-05, "loss": 0.4028, "step": 7452 }, { "epoch": 0.3272994719913938, "grad_norm": 2.203125, "learning_rate": 3.804513205956113e-05, "loss": 0.4393, "step": 7454 }, { "epoch": 0.3273872904705973, "grad_norm": 2.171875, "learning_rate": 3.8039229817027834e-05, "loss": 0.4222, "step": 7456 }, { "epoch": 0.32747510894980075, "grad_norm": 2.15625, "learning_rate": 3.803332657594719e-05, "loss": 0.4171, "step": 7458 }, { "epoch": 0.3275629274290042, "grad_norm": 2.03125, "learning_rate": 3.8027422336771275e-05, "loss": 0.3707, "step": 7460 }, { "epoch": 0.32765074590820775, "grad_norm": 2.03125, "learning_rate": 3.802151709995224e-05, "loss": 0.39, "step": 7462 }, { "epoch": 0.3277385643874112, "grad_norm": 2.5, "learning_rate": 3.80156108659423e-05, "loss": 0.44, "step": 7464 }, { "epoch": 0.3278263828666147, "grad_norm": 2.15625, "learning_rate": 3.800970363519376e-05, "loss": 0.3907, "step": 7466 }, { "epoch": 0.32791420134581817, "grad_norm": 2.421875, "learning_rate": 3.8003795408159004e-05, "loss": 0.412, "step": 7468 }, { "epoch": 0.3280020198250217, "grad_norm": 2.109375, "learning_rate": 3.799788618529046e-05, "loss": 0.4174, "step": 7470 }, { "epoch": 0.32808983830422517, "grad_norm": 2.25, "learning_rate": 3.7991975967040694e-05, "loss": 0.4239, "step": 7472 }, { "epoch": 0.32817765678342864, "grad_norm": 2.453125, "learning_rate": 3.798606475386229e-05, "loss": 0.4289, "step": 7474 }, { "epoch": 0.32826547526263217, "grad_norm": 2.234375, "learning_rate": 3.798015254620794e-05, "loss": 0.4332, "step": 7476 }, { "epoch": 0.32835329374183564, "grad_norm": 1.8984375, "learning_rate": 3.797423934453038e-05, "loss": 0.4123, "step": 7478 }, { "epoch": 0.3284411122210391, "grad_norm": 2.0625, "learning_rate": 3.796832514928247e-05, "loss": 0.4131, "step": 7480 }, { "epoch": 0.3285289307002426, "grad_norm": 2.0625, "learning_rate": 3.796240996091711e-05, "loss": 0.4259, "step": 7482 }, { "epoch": 0.3286167491794461, "grad_norm": 2.34375, "learning_rate": 3.795649377988729e-05, "loss": 0.3994, "step": 7484 }, { "epoch": 0.3287045676586496, "grad_norm": 1.8046875, "learning_rate": 3.795057660664607e-05, "loss": 0.4358, "step": 7486 }, { "epoch": 0.32879238613785305, "grad_norm": 2.03125, "learning_rate": 3.794465844164659e-05, "loss": 0.4326, "step": 7488 }, { "epoch": 0.3288802046170565, "grad_norm": 2.234375, "learning_rate": 3.793873928534206e-05, "loss": 0.4255, "step": 7490 }, { "epoch": 0.32896802309626005, "grad_norm": 1.9765625, "learning_rate": 3.793281913818578e-05, "loss": 0.4384, "step": 7492 }, { "epoch": 0.3290558415754635, "grad_norm": 1.90625, "learning_rate": 3.7926898000631106e-05, "loss": 0.4206, "step": 7494 }, { "epoch": 0.329143660054667, "grad_norm": 2.21875, "learning_rate": 3.792097587313148e-05, "loss": 0.4219, "step": 7496 }, { "epoch": 0.32923147853387047, "grad_norm": 2.140625, "learning_rate": 3.791505275614043e-05, "loss": 0.4579, "step": 7498 }, { "epoch": 0.329319297013074, "grad_norm": 2.0, "learning_rate": 3.790912865011154e-05, "loss": 0.4063, "step": 7500 }, { "epoch": 0.32940711549227747, "grad_norm": 2.015625, "learning_rate": 3.790320355549849e-05, "loss": 0.4278, "step": 7502 }, { "epoch": 0.32949493397148094, "grad_norm": 2.03125, "learning_rate": 3.789727747275502e-05, "loss": 0.4088, "step": 7504 }, { "epoch": 0.3295827524506844, "grad_norm": 2.28125, "learning_rate": 3.7891350402334935e-05, "loss": 0.3918, "step": 7506 }, { "epoch": 0.32967057092988794, "grad_norm": 2.0, "learning_rate": 3.788542234469216e-05, "loss": 0.4272, "step": 7508 }, { "epoch": 0.3297583894090914, "grad_norm": 2.53125, "learning_rate": 3.7879493300280643e-05, "loss": 0.4163, "step": 7510 }, { "epoch": 0.3298462078882949, "grad_norm": 2.3125, "learning_rate": 3.7873563269554454e-05, "loss": 0.436, "step": 7512 }, { "epoch": 0.32993402636749836, "grad_norm": 2.53125, "learning_rate": 3.78676322529677e-05, "loss": 0.4093, "step": 7514 }, { "epoch": 0.3300218448467019, "grad_norm": 2.625, "learning_rate": 3.786170025097457e-05, "loss": 0.4155, "step": 7516 }, { "epoch": 0.33010966332590536, "grad_norm": 2.046875, "learning_rate": 3.7855767264029366e-05, "loss": 0.401, "step": 7518 }, { "epoch": 0.33019748180510883, "grad_norm": 2.296875, "learning_rate": 3.784983329258642e-05, "loss": 0.4521, "step": 7520 }, { "epoch": 0.3302853002843123, "grad_norm": 2.15625, "learning_rate": 3.784389833710016e-05, "loss": 0.3999, "step": 7522 }, { "epoch": 0.3303731187635158, "grad_norm": 1.9375, "learning_rate": 3.783796239802509e-05, "loss": 0.44, "step": 7524 }, { "epoch": 0.3304609372427193, "grad_norm": 1.9609375, "learning_rate": 3.783202547581577e-05, "loss": 0.4255, "step": 7526 }, { "epoch": 0.33054875572192277, "grad_norm": 2.296875, "learning_rate": 3.782608757092687e-05, "loss": 0.4362, "step": 7528 }, { "epoch": 0.3306365742011263, "grad_norm": 2.03125, "learning_rate": 3.782014868381312e-05, "loss": 0.4049, "step": 7530 }, { "epoch": 0.33072439268032977, "grad_norm": 2.171875, "learning_rate": 3.781420881492929e-05, "loss": 0.4214, "step": 7532 }, { "epoch": 0.33081221115953324, "grad_norm": 2.109375, "learning_rate": 3.780826796473029e-05, "loss": 0.4206, "step": 7534 }, { "epoch": 0.3309000296387367, "grad_norm": 2.140625, "learning_rate": 3.780232613367105e-05, "loss": 0.4223, "step": 7536 }, { "epoch": 0.33098784811794024, "grad_norm": 1.8359375, "learning_rate": 3.779638332220662e-05, "loss": 0.3965, "step": 7538 }, { "epoch": 0.3310756665971437, "grad_norm": 2.0, "learning_rate": 3.7790439530792075e-05, "loss": 0.386, "step": 7540 }, { "epoch": 0.3311634850763472, "grad_norm": 2.109375, "learning_rate": 3.778449475988261e-05, "loss": 0.4401, "step": 7542 }, { "epoch": 0.33125130355555066, "grad_norm": 1.921875, "learning_rate": 3.777854900993347e-05, "loss": 0.4216, "step": 7544 }, { "epoch": 0.3313391220347542, "grad_norm": 2.015625, "learning_rate": 3.777260228139999e-05, "loss": 0.3927, "step": 7546 }, { "epoch": 0.33142694051395766, "grad_norm": 2.234375, "learning_rate": 3.776665457473756e-05, "loss": 0.4544, "step": 7548 }, { "epoch": 0.33151475899316113, "grad_norm": 1.9375, "learning_rate": 3.776070589040166e-05, "loss": 0.401, "step": 7550 }, { "epoch": 0.3316025774723646, "grad_norm": 2.09375, "learning_rate": 3.775475622884785e-05, "loss": 0.4037, "step": 7552 }, { "epoch": 0.33169039595156813, "grad_norm": 2.078125, "learning_rate": 3.774880559053175e-05, "loss": 0.4327, "step": 7554 }, { "epoch": 0.3317782144307716, "grad_norm": 2.0, "learning_rate": 3.7742853975909056e-05, "loss": 0.4202, "step": 7556 }, { "epoch": 0.3318660329099751, "grad_norm": 2.03125, "learning_rate": 3.773690138543555e-05, "loss": 0.3931, "step": 7558 }, { "epoch": 0.33195385138917854, "grad_norm": 2.09375, "learning_rate": 3.773094781956709e-05, "loss": 0.4, "step": 7560 }, { "epoch": 0.33204166986838207, "grad_norm": 2.125, "learning_rate": 3.772499327875959e-05, "loss": 0.4021, "step": 7562 }, { "epoch": 0.33212948834758554, "grad_norm": 2.4375, "learning_rate": 3.771903776346905e-05, "loss": 0.4401, "step": 7564 }, { "epoch": 0.332217306826789, "grad_norm": 1.9140625, "learning_rate": 3.771308127415155e-05, "loss": 0.4201, "step": 7566 }, { "epoch": 0.3323051253059925, "grad_norm": 2.15625, "learning_rate": 3.770712381126325e-05, "loss": 0.4519, "step": 7568 }, { "epoch": 0.332392943785196, "grad_norm": 2.1875, "learning_rate": 3.7701165375260344e-05, "loss": 0.4011, "step": 7570 }, { "epoch": 0.3324807622643995, "grad_norm": 2.171875, "learning_rate": 3.7695205966599154e-05, "loss": 0.3996, "step": 7572 }, { "epoch": 0.33256858074360296, "grad_norm": 2.171875, "learning_rate": 3.768924558573606e-05, "loss": 0.406, "step": 7574 }, { "epoch": 0.33265639922280643, "grad_norm": 2.09375, "learning_rate": 3.768328423312749e-05, "loss": 0.4221, "step": 7576 }, { "epoch": 0.33274421770200996, "grad_norm": 2.03125, "learning_rate": 3.767732190922997e-05, "loss": 0.4178, "step": 7578 }, { "epoch": 0.33283203618121343, "grad_norm": 2.09375, "learning_rate": 3.767135861450011e-05, "loss": 0.4026, "step": 7580 }, { "epoch": 0.3329198546604169, "grad_norm": 1.921875, "learning_rate": 3.7665394349394556e-05, "loss": 0.4005, "step": 7582 }, { "epoch": 0.33300767313962043, "grad_norm": 2.171875, "learning_rate": 3.765942911437007e-05, "loss": 0.4165, "step": 7584 }, { "epoch": 0.3330954916188239, "grad_norm": 1.9140625, "learning_rate": 3.7653462909883474e-05, "loss": 0.3877, "step": 7586 }, { "epoch": 0.3331833100980274, "grad_norm": 1.953125, "learning_rate": 3.764749573639165e-05, "loss": 0.4211, "step": 7588 }, { "epoch": 0.33327112857723085, "grad_norm": 2.453125, "learning_rate": 3.7641527594351577e-05, "loss": 0.4552, "step": 7590 }, { "epoch": 0.3333589470564344, "grad_norm": 2.203125, "learning_rate": 3.763555848422028e-05, "loss": 0.4462, "step": 7592 }, { "epoch": 0.33344676553563785, "grad_norm": 2.0625, "learning_rate": 3.762958840645489e-05, "loss": 0.4193, "step": 7594 }, { "epoch": 0.3335345840148413, "grad_norm": 2.0625, "learning_rate": 3.7623617361512595e-05, "loss": 0.4079, "step": 7596 }, { "epoch": 0.3336224024940448, "grad_norm": 2.3125, "learning_rate": 3.7617645349850655e-05, "loss": 0.4191, "step": 7598 }, { "epoch": 0.3337102209732483, "grad_norm": 2.109375, "learning_rate": 3.761167237192641e-05, "loss": 0.4295, "step": 7600 }, { "epoch": 0.3337980394524518, "grad_norm": 2.203125, "learning_rate": 3.7605698428197265e-05, "loss": 0.4517, "step": 7602 }, { "epoch": 0.33388585793165526, "grad_norm": 2.078125, "learning_rate": 3.7599723519120725e-05, "loss": 0.4201, "step": 7604 }, { "epoch": 0.33397367641085873, "grad_norm": 1.90625, "learning_rate": 3.759374764515433e-05, "loss": 0.4051, "step": 7606 }, { "epoch": 0.33406149489006226, "grad_norm": 2.21875, "learning_rate": 3.7587770806755715e-05, "loss": 0.4344, "step": 7608 }, { "epoch": 0.33414931336926573, "grad_norm": 2.0625, "learning_rate": 3.7581793004382603e-05, "loss": 0.4551, "step": 7610 }, { "epoch": 0.3342371318484692, "grad_norm": 2.296875, "learning_rate": 3.757581423849277e-05, "loss": 0.4337, "step": 7612 }, { "epoch": 0.3343249503276727, "grad_norm": 2.140625, "learning_rate": 3.7569834509544054e-05, "loss": 0.4249, "step": 7614 }, { "epoch": 0.3344127688068762, "grad_norm": 2.28125, "learning_rate": 3.756385381799441e-05, "loss": 0.4328, "step": 7616 }, { "epoch": 0.3345005872860797, "grad_norm": 1.984375, "learning_rate": 3.755787216430182e-05, "loss": 0.3575, "step": 7618 }, { "epoch": 0.33458840576528315, "grad_norm": 2.46875, "learning_rate": 3.755188954892438e-05, "loss": 0.4209, "step": 7620 }, { "epoch": 0.3346762242444866, "grad_norm": 2.40625, "learning_rate": 3.754590597232023e-05, "loss": 0.4171, "step": 7622 }, { "epoch": 0.33476404272369015, "grad_norm": 2.21875, "learning_rate": 3.753992143494759e-05, "loss": 0.4345, "step": 7624 }, { "epoch": 0.3348518612028936, "grad_norm": 2.109375, "learning_rate": 3.753393593726475e-05, "loss": 0.4031, "step": 7626 }, { "epoch": 0.3349396796820971, "grad_norm": 2.203125, "learning_rate": 3.7527949479730104e-05, "loss": 0.4153, "step": 7628 }, { "epoch": 0.3350274981613006, "grad_norm": 2.078125, "learning_rate": 3.752196206280207e-05, "loss": 0.4425, "step": 7630 }, { "epoch": 0.3351153166405041, "grad_norm": 1.9609375, "learning_rate": 3.7515973686939184e-05, "loss": 0.3951, "step": 7632 }, { "epoch": 0.33520313511970756, "grad_norm": 1.984375, "learning_rate": 3.7509984352600044e-05, "loss": 0.4232, "step": 7634 }, { "epoch": 0.33529095359891103, "grad_norm": 2.609375, "learning_rate": 3.7503994060243296e-05, "loss": 0.3971, "step": 7636 }, { "epoch": 0.33537877207811456, "grad_norm": 2.234375, "learning_rate": 3.7498002810327694e-05, "loss": 0.451, "step": 7638 }, { "epoch": 0.33546659055731803, "grad_norm": 2.234375, "learning_rate": 3.749201060331203e-05, "loss": 0.3823, "step": 7640 }, { "epoch": 0.3355544090365215, "grad_norm": 2.328125, "learning_rate": 3.74860174396552e-05, "loss": 0.4138, "step": 7642 }, { "epoch": 0.335642227515725, "grad_norm": 1.7890625, "learning_rate": 3.7480023319816164e-05, "loss": 0.4207, "step": 7644 }, { "epoch": 0.3357300459949285, "grad_norm": 1.8828125, "learning_rate": 3.747402824425395e-05, "loss": 0.3933, "step": 7646 }, { "epoch": 0.335817864474132, "grad_norm": 2.359375, "learning_rate": 3.7468032213427666e-05, "loss": 0.4109, "step": 7648 }, { "epoch": 0.33590568295333545, "grad_norm": 1.8046875, "learning_rate": 3.7462035227796484e-05, "loss": 0.447, "step": 7650 }, { "epoch": 0.3359935014325389, "grad_norm": 2.171875, "learning_rate": 3.745603728781966e-05, "loss": 0.3935, "step": 7652 }, { "epoch": 0.33608131991174245, "grad_norm": 2.03125, "learning_rate": 3.745003839395651e-05, "loss": 0.4265, "step": 7654 }, { "epoch": 0.3361691383909459, "grad_norm": 1.9453125, "learning_rate": 3.744403854666643e-05, "loss": 0.4304, "step": 7656 }, { "epoch": 0.3362569568701494, "grad_norm": 1.953125, "learning_rate": 3.743803774640891e-05, "loss": 0.4293, "step": 7658 }, { "epoch": 0.33634477534935286, "grad_norm": 2.125, "learning_rate": 3.743203599364347e-05, "loss": 0.3955, "step": 7660 }, { "epoch": 0.3364325938285564, "grad_norm": 2.0625, "learning_rate": 3.7426033288829725e-05, "loss": 0.4054, "step": 7662 }, { "epoch": 0.33652041230775986, "grad_norm": 2.21875, "learning_rate": 3.7420029632427375e-05, "loss": 0.4184, "step": 7664 }, { "epoch": 0.33660823078696334, "grad_norm": 2.28125, "learning_rate": 3.7414025024896195e-05, "loss": 0.4242, "step": 7666 }, { "epoch": 0.3366960492661668, "grad_norm": 2.34375, "learning_rate": 3.740801946669599e-05, "loss": 0.4264, "step": 7668 }, { "epoch": 0.33678386774537034, "grad_norm": 2.34375, "learning_rate": 3.740201295828668e-05, "loss": 0.4062, "step": 7670 }, { "epoch": 0.3368716862245738, "grad_norm": 2.453125, "learning_rate": 3.739600550012824e-05, "loss": 0.4151, "step": 7672 }, { "epoch": 0.3369595047037773, "grad_norm": 2.046875, "learning_rate": 3.738999709268074e-05, "loss": 0.3959, "step": 7674 }, { "epoch": 0.33704732318298075, "grad_norm": 2.15625, "learning_rate": 3.738398773640428e-05, "loss": 0.4408, "step": 7676 }, { "epoch": 0.3371351416621843, "grad_norm": 1.921875, "learning_rate": 3.737797743175907e-05, "loss": 0.4428, "step": 7678 }, { "epoch": 0.33722296014138775, "grad_norm": 2.078125, "learning_rate": 3.7371966179205386e-05, "loss": 0.4136, "step": 7680 }, { "epoch": 0.3373107786205912, "grad_norm": 1.8359375, "learning_rate": 3.7365953979203574e-05, "loss": 0.3965, "step": 7682 }, { "epoch": 0.33739859709979475, "grad_norm": 1.890625, "learning_rate": 3.735994083221403e-05, "loss": 0.4064, "step": 7684 }, { "epoch": 0.3374864155789982, "grad_norm": 1.984375, "learning_rate": 3.735392673869726e-05, "loss": 0.4215, "step": 7686 }, { "epoch": 0.3375742340582017, "grad_norm": 2.25, "learning_rate": 3.734791169911382e-05, "loss": 0.4152, "step": 7688 }, { "epoch": 0.33766205253740517, "grad_norm": 2.328125, "learning_rate": 3.734189571392434e-05, "loss": 0.402, "step": 7690 }, { "epoch": 0.3377498710166087, "grad_norm": 2.0, "learning_rate": 3.7335878783589525e-05, "loss": 0.4335, "step": 7692 }, { "epoch": 0.33783768949581217, "grad_norm": 1.984375, "learning_rate": 3.7329860908570156e-05, "loss": 0.4104, "step": 7694 }, { "epoch": 0.33792550797501564, "grad_norm": 1.921875, "learning_rate": 3.732384208932709e-05, "loss": 0.4066, "step": 7696 }, { "epoch": 0.3380133264542191, "grad_norm": 1.9609375, "learning_rate": 3.7317822326321236e-05, "loss": 0.4157, "step": 7698 }, { "epoch": 0.33810114493342264, "grad_norm": 2.296875, "learning_rate": 3.7311801620013596e-05, "loss": 0.4167, "step": 7700 }, { "epoch": 0.3381889634126261, "grad_norm": 2.0, "learning_rate": 3.730577997086524e-05, "loss": 0.4178, "step": 7702 }, { "epoch": 0.3382767818918296, "grad_norm": 2.1875, "learning_rate": 3.72997573793373e-05, "loss": 0.4049, "step": 7704 }, { "epoch": 0.33836460037103305, "grad_norm": 2.21875, "learning_rate": 3.729373384589099e-05, "loss": 0.428, "step": 7706 }, { "epoch": 0.3384524188502366, "grad_norm": 2.171875, "learning_rate": 3.72877093709876e-05, "loss": 0.4219, "step": 7708 }, { "epoch": 0.33854023732944005, "grad_norm": 2.328125, "learning_rate": 3.728168395508848e-05, "loss": 0.415, "step": 7710 }, { "epoch": 0.3386280558086435, "grad_norm": 2.109375, "learning_rate": 3.7275657598655066e-05, "loss": 0.4394, "step": 7712 }, { "epoch": 0.338715874287847, "grad_norm": 2.171875, "learning_rate": 3.726963030214884e-05, "loss": 0.425, "step": 7714 }, { "epoch": 0.3388036927670505, "grad_norm": 2.125, "learning_rate": 3.726360206603138e-05, "loss": 0.4231, "step": 7716 }, { "epoch": 0.338891511246254, "grad_norm": 2.03125, "learning_rate": 3.725757289076434e-05, "loss": 0.4206, "step": 7718 }, { "epoch": 0.33897932972545747, "grad_norm": 2.203125, "learning_rate": 3.725154277680943e-05, "loss": 0.4093, "step": 7720 }, { "epoch": 0.33906714820466094, "grad_norm": 1.90625, "learning_rate": 3.7245511724628444e-05, "loss": 0.4074, "step": 7722 }, { "epoch": 0.33915496668386447, "grad_norm": 2.15625, "learning_rate": 3.7239479734683226e-05, "loss": 0.4005, "step": 7724 }, { "epoch": 0.33924278516306794, "grad_norm": 2.484375, "learning_rate": 3.723344680743571e-05, "loss": 0.436, "step": 7726 }, { "epoch": 0.3393306036422714, "grad_norm": 1.96875, "learning_rate": 3.722741294334792e-05, "loss": 0.4087, "step": 7728 }, { "epoch": 0.33941842212147494, "grad_norm": 2.265625, "learning_rate": 3.722137814288191e-05, "loss": 0.4382, "step": 7730 }, { "epoch": 0.3395062406006784, "grad_norm": 2.140625, "learning_rate": 3.721534240649983e-05, "loss": 0.3787, "step": 7732 }, { "epoch": 0.3395940590798819, "grad_norm": 2.28125, "learning_rate": 3.720930573466389e-05, "loss": 0.401, "step": 7734 }, { "epoch": 0.33968187755908535, "grad_norm": 2.125, "learning_rate": 3.720326812783641e-05, "loss": 0.4417, "step": 7736 }, { "epoch": 0.3397696960382889, "grad_norm": 2.3125, "learning_rate": 3.719722958647972e-05, "loss": 0.4282, "step": 7738 }, { "epoch": 0.33985751451749235, "grad_norm": 2.125, "learning_rate": 3.719119011105627e-05, "loss": 0.4352, "step": 7740 }, { "epoch": 0.3399453329966958, "grad_norm": 2.09375, "learning_rate": 3.7185149702028555e-05, "loss": 0.4154, "step": 7742 }, { "epoch": 0.3400331514758993, "grad_norm": 2.234375, "learning_rate": 3.717910835985916e-05, "loss": 0.4041, "step": 7744 }, { "epoch": 0.3401209699551028, "grad_norm": 2.09375, "learning_rate": 3.717306608501072e-05, "loss": 0.3961, "step": 7746 }, { "epoch": 0.3402087884343063, "grad_norm": 2.140625, "learning_rate": 3.716702287794597e-05, "loss": 0.4197, "step": 7748 }, { "epoch": 0.34029660691350977, "grad_norm": 2.046875, "learning_rate": 3.7160978739127684e-05, "loss": 0.4037, "step": 7750 }, { "epoch": 0.34038442539271324, "grad_norm": 2.234375, "learning_rate": 3.7154933669018724e-05, "loss": 0.4529, "step": 7752 }, { "epoch": 0.34047224387191677, "grad_norm": 2.03125, "learning_rate": 3.714888766808204e-05, "loss": 0.414, "step": 7754 }, { "epoch": 0.34056006235112024, "grad_norm": 2.265625, "learning_rate": 3.714284073678063e-05, "loss": 0.4048, "step": 7756 }, { "epoch": 0.3406478808303237, "grad_norm": 2.0625, "learning_rate": 3.7136792875577556e-05, "loss": 0.4016, "step": 7758 }, { "epoch": 0.3407356993095272, "grad_norm": 2.46875, "learning_rate": 3.713074408493598e-05, "loss": 0.4241, "step": 7760 }, { "epoch": 0.3408235177887307, "grad_norm": 2.1875, "learning_rate": 3.7124694365319114e-05, "loss": 0.4218, "step": 7762 }, { "epoch": 0.3409113362679342, "grad_norm": 1.90625, "learning_rate": 3.711864371719024e-05, "loss": 0.4143, "step": 7764 }, { "epoch": 0.34099915474713766, "grad_norm": 2.0, "learning_rate": 3.711259214101273e-05, "loss": 0.4018, "step": 7766 }, { "epoch": 0.34108697322634113, "grad_norm": 2.171875, "learning_rate": 3.710653963725001e-05, "loss": 0.4196, "step": 7768 }, { "epoch": 0.34117479170554466, "grad_norm": 2.109375, "learning_rate": 3.710048620636558e-05, "loss": 0.404, "step": 7770 }, { "epoch": 0.3412626101847481, "grad_norm": 1.984375, "learning_rate": 3.7094431848823026e-05, "loss": 0.3881, "step": 7772 }, { "epoch": 0.3413504286639516, "grad_norm": 2.34375, "learning_rate": 3.708837656508597e-05, "loss": 0.3954, "step": 7774 }, { "epoch": 0.34143824714315507, "grad_norm": 2.1875, "learning_rate": 3.708232035561815e-05, "loss": 0.3994, "step": 7776 }, { "epoch": 0.3415260656223586, "grad_norm": 2.21875, "learning_rate": 3.707626322088333e-05, "loss": 0.4353, "step": 7778 }, { "epoch": 0.34161388410156207, "grad_norm": 2.15625, "learning_rate": 3.707020516134539e-05, "loss": 0.4419, "step": 7780 }, { "epoch": 0.34170170258076554, "grad_norm": 2.203125, "learning_rate": 3.706414617746823e-05, "loss": 0.4037, "step": 7782 }, { "epoch": 0.34178952105996907, "grad_norm": 1.9453125, "learning_rate": 3.705808626971587e-05, "loss": 0.4231, "step": 7784 }, { "epoch": 0.34187733953917254, "grad_norm": 2.09375, "learning_rate": 3.705202543855237e-05, "loss": 0.4201, "step": 7786 }, { "epoch": 0.341965158018376, "grad_norm": 2.125, "learning_rate": 3.704596368444187e-05, "loss": 0.4198, "step": 7788 }, { "epoch": 0.3420529764975795, "grad_norm": 1.8671875, "learning_rate": 3.70399010078486e-05, "loss": 0.4011, "step": 7790 }, { "epoch": 0.342140794976783, "grad_norm": 2.015625, "learning_rate": 3.703383740923682e-05, "loss": 0.4002, "step": 7792 }, { "epoch": 0.3422286134559865, "grad_norm": 2.171875, "learning_rate": 3.7027772889070875e-05, "loss": 0.4169, "step": 7794 }, { "epoch": 0.34231643193518996, "grad_norm": 1.859375, "learning_rate": 3.702170744781521e-05, "loss": 0.3867, "step": 7796 }, { "epoch": 0.34240425041439343, "grad_norm": 2.03125, "learning_rate": 3.70156410859343e-05, "loss": 0.4391, "step": 7798 }, { "epoch": 0.34249206889359696, "grad_norm": 2.296875, "learning_rate": 3.700957380389272e-05, "loss": 0.4103, "step": 7800 }, { "epoch": 0.34257988737280043, "grad_norm": 2.28125, "learning_rate": 3.7003505602155085e-05, "loss": 0.4221, "step": 7802 }, { "epoch": 0.3426677058520039, "grad_norm": 2.0, "learning_rate": 3.6997436481186134e-05, "loss": 0.4157, "step": 7804 }, { "epoch": 0.3427555243312074, "grad_norm": 2.21875, "learning_rate": 3.699136644145061e-05, "loss": 0.4063, "step": 7806 }, { "epoch": 0.3428433428104109, "grad_norm": 2.125, "learning_rate": 3.698529548341337e-05, "loss": 0.4183, "step": 7808 }, { "epoch": 0.3429311612896144, "grad_norm": 1.875, "learning_rate": 3.697922360753933e-05, "loss": 0.4057, "step": 7810 }, { "epoch": 0.34301897976881784, "grad_norm": 1.828125, "learning_rate": 3.697315081429348e-05, "loss": 0.3984, "step": 7812 }, { "epoch": 0.3431067982480213, "grad_norm": 1.9609375, "learning_rate": 3.696707710414086e-05, "loss": 0.4262, "step": 7814 }, { "epoch": 0.34319461672722484, "grad_norm": 2.0625, "learning_rate": 3.696100247754661e-05, "loss": 0.4487, "step": 7816 }, { "epoch": 0.3432824352064283, "grad_norm": 2.25, "learning_rate": 3.6954926934975925e-05, "loss": 0.4345, "step": 7818 }, { "epoch": 0.3433702536856318, "grad_norm": 2.3125, "learning_rate": 3.694885047689407e-05, "loss": 0.3907, "step": 7820 }, { "epoch": 0.34345807216483526, "grad_norm": 2.1875, "learning_rate": 3.694277310376639e-05, "loss": 0.4209, "step": 7822 }, { "epoch": 0.3435458906440388, "grad_norm": 2.390625, "learning_rate": 3.693669481605827e-05, "loss": 0.4242, "step": 7824 }, { "epoch": 0.34363370912324226, "grad_norm": 2.640625, "learning_rate": 3.69306156142352e-05, "loss": 0.3965, "step": 7826 }, { "epoch": 0.34372152760244573, "grad_norm": 2.671875, "learning_rate": 3.692453549876273e-05, "loss": 0.4348, "step": 7828 }, { "epoch": 0.34380934608164926, "grad_norm": 2.375, "learning_rate": 3.691845447010647e-05, "loss": 0.3989, "step": 7830 }, { "epoch": 0.34389716456085273, "grad_norm": 2.40625, "learning_rate": 3.691237252873211e-05, "loss": 0.3943, "step": 7832 }, { "epoch": 0.3439849830400562, "grad_norm": 2.234375, "learning_rate": 3.690628967510541e-05, "loss": 0.4105, "step": 7834 }, { "epoch": 0.3440728015192597, "grad_norm": 2.46875, "learning_rate": 3.69002059096922e-05, "loss": 0.4227, "step": 7836 }, { "epoch": 0.3441606199984632, "grad_norm": 2.265625, "learning_rate": 3.6894121232958354e-05, "loss": 0.4361, "step": 7838 }, { "epoch": 0.3442484384776667, "grad_norm": 2.15625, "learning_rate": 3.688803564536986e-05, "loss": 0.4565, "step": 7840 }, { "epoch": 0.34433625695687015, "grad_norm": 2.09375, "learning_rate": 3.688194914739274e-05, "loss": 0.4395, "step": 7842 }, { "epoch": 0.3444240754360736, "grad_norm": 2.484375, "learning_rate": 3.687586173949311e-05, "loss": 0.4276, "step": 7844 }, { "epoch": 0.34451189391527715, "grad_norm": 2.3125, "learning_rate": 3.686977342213714e-05, "loss": 0.4263, "step": 7846 }, { "epoch": 0.3445997123944806, "grad_norm": 2.765625, "learning_rate": 3.686368419579108e-05, "loss": 0.3967, "step": 7848 }, { "epoch": 0.3446875308736841, "grad_norm": 2.328125, "learning_rate": 3.685759406092124e-05, "loss": 0.4122, "step": 7850 }, { "epoch": 0.34477534935288756, "grad_norm": 1.8984375, "learning_rate": 3.6851503017994e-05, "loss": 0.4182, "step": 7852 }, { "epoch": 0.3448631678320911, "grad_norm": 2.0, "learning_rate": 3.6845411067475825e-05, "loss": 0.4255, "step": 7854 }, { "epoch": 0.34495098631129456, "grad_norm": 2.140625, "learning_rate": 3.683931820983322e-05, "loss": 0.3841, "step": 7856 }, { "epoch": 0.34503880479049803, "grad_norm": 2.09375, "learning_rate": 3.68332244455328e-05, "loss": 0.4518, "step": 7858 }, { "epoch": 0.3451266232697015, "grad_norm": 2.625, "learning_rate": 3.6827129775041216e-05, "loss": 0.3772, "step": 7860 }, { "epoch": 0.34521444174890503, "grad_norm": 2.0625, "learning_rate": 3.682103419882519e-05, "loss": 0.4353, "step": 7862 }, { "epoch": 0.3453022602281085, "grad_norm": 2.078125, "learning_rate": 3.6814937717351525e-05, "loss": 0.3897, "step": 7864 }, { "epoch": 0.345390078707312, "grad_norm": 2.296875, "learning_rate": 3.6808840331087115e-05, "loss": 0.4197, "step": 7866 }, { "epoch": 0.34547789718651545, "grad_norm": 1.953125, "learning_rate": 3.6802742040498875e-05, "loss": 0.4177, "step": 7868 }, { "epoch": 0.345565715665719, "grad_norm": 2.203125, "learning_rate": 3.679664284605381e-05, "loss": 0.4177, "step": 7870 }, { "epoch": 0.34565353414492245, "grad_norm": 2.390625, "learning_rate": 3.679054274821903e-05, "loss": 0.4031, "step": 7872 }, { "epoch": 0.3457413526241259, "grad_norm": 2.6875, "learning_rate": 3.678444174746164e-05, "loss": 0.4048, "step": 7874 }, { "epoch": 0.3458291711033294, "grad_norm": 2.265625, "learning_rate": 3.677833984424888e-05, "loss": 0.4164, "step": 7876 }, { "epoch": 0.3459169895825329, "grad_norm": 2.140625, "learning_rate": 3.677223703904803e-05, "loss": 0.4107, "step": 7878 }, { "epoch": 0.3460048080617364, "grad_norm": 2.65625, "learning_rate": 3.6766133332326455e-05, "loss": 0.4107, "step": 7880 }, { "epoch": 0.34609262654093986, "grad_norm": 2.375, "learning_rate": 3.676002872455157e-05, "loss": 0.4097, "step": 7882 }, { "epoch": 0.3461804450201434, "grad_norm": 2.421875, "learning_rate": 3.675392321619086e-05, "loss": 0.4145, "step": 7884 }, { "epoch": 0.34626826349934686, "grad_norm": 1.921875, "learning_rate": 3.674781680771189e-05, "loss": 0.4026, "step": 7886 }, { "epoch": 0.34635608197855033, "grad_norm": 1.921875, "learning_rate": 3.674170949958229e-05, "loss": 0.4055, "step": 7888 }, { "epoch": 0.3464439004577538, "grad_norm": 1.8125, "learning_rate": 3.673560129226976e-05, "loss": 0.4262, "step": 7890 }, { "epoch": 0.34653171893695733, "grad_norm": 2.359375, "learning_rate": 3.6729492186242073e-05, "loss": 0.4097, "step": 7892 }, { "epoch": 0.3466195374161608, "grad_norm": 2.140625, "learning_rate": 3.672338218196708e-05, "loss": 0.4043, "step": 7894 }, { "epoch": 0.3467073558953643, "grad_norm": 2.1875, "learning_rate": 3.6717271279912645e-05, "loss": 0.4346, "step": 7896 }, { "epoch": 0.34679517437456775, "grad_norm": 2.296875, "learning_rate": 3.6711159480546785e-05, "loss": 0.4076, "step": 7898 }, { "epoch": 0.3468829928537713, "grad_norm": 2.515625, "learning_rate": 3.6705046784337514e-05, "loss": 0.4282, "step": 7900 }, { "epoch": 0.34697081133297475, "grad_norm": 2.734375, "learning_rate": 3.669893319175296e-05, "loss": 0.4104, "step": 7902 }, { "epoch": 0.3470586298121782, "grad_norm": 2.5, "learning_rate": 3.6692818703261286e-05, "loss": 0.4188, "step": 7904 }, { "epoch": 0.3471464482913817, "grad_norm": 1.8671875, "learning_rate": 3.668670331933076e-05, "loss": 0.4156, "step": 7906 }, { "epoch": 0.3472342667705852, "grad_norm": 2.078125, "learning_rate": 3.6680587040429696e-05, "loss": 0.4276, "step": 7908 }, { "epoch": 0.3473220852497887, "grad_norm": 1.796875, "learning_rate": 3.667446986702647e-05, "loss": 0.4195, "step": 7910 }, { "epoch": 0.34740990372899216, "grad_norm": 2.0, "learning_rate": 3.6668351799589557e-05, "loss": 0.4033, "step": 7912 }, { "epoch": 0.34749772220819564, "grad_norm": 2.1875, "learning_rate": 3.666223283858745e-05, "loss": 0.4044, "step": 7914 }, { "epoch": 0.34758554068739916, "grad_norm": 1.9921875, "learning_rate": 3.6656112984488765e-05, "loss": 0.4108, "step": 7916 }, { "epoch": 0.34767335916660264, "grad_norm": 2.28125, "learning_rate": 3.664999223776215e-05, "loss": 0.4093, "step": 7918 }, { "epoch": 0.3477611776458061, "grad_norm": 2.109375, "learning_rate": 3.664387059887634e-05, "loss": 0.3886, "step": 7920 }, { "epoch": 0.3478489961250096, "grad_norm": 2.0625, "learning_rate": 3.6637748068300123e-05, "loss": 0.4283, "step": 7922 }, { "epoch": 0.3479368146042131, "grad_norm": 2.015625, "learning_rate": 3.663162464650237e-05, "loss": 0.398, "step": 7924 }, { "epoch": 0.3480246330834166, "grad_norm": 1.7578125, "learning_rate": 3.662550033395202e-05, "loss": 0.4176, "step": 7926 }, { "epoch": 0.34811245156262005, "grad_norm": 2.0, "learning_rate": 3.661937513111806e-05, "loss": 0.3965, "step": 7928 }, { "epoch": 0.3482002700418235, "grad_norm": 2.34375, "learning_rate": 3.661324903846957e-05, "loss": 0.4322, "step": 7930 }, { "epoch": 0.34828808852102705, "grad_norm": 2.1875, "learning_rate": 3.660712205647568e-05, "loss": 0.3999, "step": 7932 }, { "epoch": 0.3483759070002305, "grad_norm": 2.234375, "learning_rate": 3.6600994185605614e-05, "loss": 0.4251, "step": 7934 }, { "epoch": 0.348463725479434, "grad_norm": 2.4375, "learning_rate": 3.6594865426328625e-05, "loss": 0.4191, "step": 7936 }, { "epoch": 0.3485515439586375, "grad_norm": 2.484375, "learning_rate": 3.658873577911406e-05, "loss": 0.3965, "step": 7938 }, { "epoch": 0.348639362437841, "grad_norm": 1.9140625, "learning_rate": 3.658260524443133e-05, "loss": 0.4126, "step": 7940 }, { "epoch": 0.34872718091704447, "grad_norm": 2.046875, "learning_rate": 3.657647382274992e-05, "loss": 0.4249, "step": 7942 }, { "epoch": 0.34881499939624794, "grad_norm": 2.203125, "learning_rate": 3.657034151453936e-05, "loss": 0.39, "step": 7944 }, { "epoch": 0.34890281787545147, "grad_norm": 2.1875, "learning_rate": 3.656420832026928e-05, "loss": 0.4209, "step": 7946 }, { "epoch": 0.34899063635465494, "grad_norm": 1.890625, "learning_rate": 3.655807424040936e-05, "loss": 0.4177, "step": 7948 }, { "epoch": 0.3490784548338584, "grad_norm": 1.921875, "learning_rate": 3.655193927542933e-05, "loss": 0.4071, "step": 7950 }, { "epoch": 0.3491662733130619, "grad_norm": 1.921875, "learning_rate": 3.654580342579903e-05, "loss": 0.4208, "step": 7952 }, { "epoch": 0.3492540917922654, "grad_norm": 2.015625, "learning_rate": 3.6539666691988336e-05, "loss": 0.3849, "step": 7954 }, { "epoch": 0.3493419102714689, "grad_norm": 1.8828125, "learning_rate": 3.65335290744672e-05, "loss": 0.4318, "step": 7956 }, { "epoch": 0.34942972875067235, "grad_norm": 1.890625, "learning_rate": 3.6527390573705645e-05, "loss": 0.3816, "step": 7958 }, { "epoch": 0.3495175472298758, "grad_norm": 2.015625, "learning_rate": 3.652125119017375e-05, "loss": 0.4051, "step": 7960 }, { "epoch": 0.34960536570907935, "grad_norm": 2.109375, "learning_rate": 3.651511092434168e-05, "loss": 0.386, "step": 7962 }, { "epoch": 0.3496931841882828, "grad_norm": 1.84375, "learning_rate": 3.650896977667965e-05, "loss": 0.438, "step": 7964 }, { "epoch": 0.3497810026674863, "grad_norm": 2.1875, "learning_rate": 3.6502827747657964e-05, "loss": 0.3965, "step": 7966 }, { "epoch": 0.34986882114668977, "grad_norm": 1.9375, "learning_rate": 3.649668483774696e-05, "loss": 0.419, "step": 7968 }, { "epoch": 0.3499566396258933, "grad_norm": 1.9140625, "learning_rate": 3.649054104741709e-05, "loss": 0.3912, "step": 7970 }, { "epoch": 0.35004445810509677, "grad_norm": 2.109375, "learning_rate": 3.648439637713883e-05, "loss": 0.4051, "step": 7972 }, { "epoch": 0.35013227658430024, "grad_norm": 1.9453125, "learning_rate": 3.6478250827382734e-05, "loss": 0.4089, "step": 7974 }, { "epoch": 0.3502200950635037, "grad_norm": 1.96875, "learning_rate": 3.647210439861944e-05, "loss": 0.4123, "step": 7976 }, { "epoch": 0.35030791354270724, "grad_norm": 2.125, "learning_rate": 3.646595709131965e-05, "loss": 0.4088, "step": 7978 }, { "epoch": 0.3503957320219107, "grad_norm": 1.8671875, "learning_rate": 3.6459808905954105e-05, "loss": 0.396, "step": 7980 }, { "epoch": 0.3504835505011142, "grad_norm": 1.8203125, "learning_rate": 3.645365984299366e-05, "loss": 0.4095, "step": 7982 }, { "epoch": 0.3505713689803177, "grad_norm": 2.015625, "learning_rate": 3.644750990290919e-05, "loss": 0.4179, "step": 7984 }, { "epoch": 0.3506591874595212, "grad_norm": 2.171875, "learning_rate": 3.6441359086171665e-05, "loss": 0.4492, "step": 7986 }, { "epoch": 0.35074700593872465, "grad_norm": 2.015625, "learning_rate": 3.643520739325213e-05, "loss": 0.4249, "step": 7988 }, { "epoch": 0.3508348244179281, "grad_norm": 2.3125, "learning_rate": 3.6429054824621656e-05, "loss": 0.4198, "step": 7990 }, { "epoch": 0.35092264289713165, "grad_norm": 1.9296875, "learning_rate": 3.6422901380751436e-05, "loss": 0.4131, "step": 7992 }, { "epoch": 0.3510104613763351, "grad_norm": 2.21875, "learning_rate": 3.641674706211269e-05, "loss": 0.4094, "step": 7994 }, { "epoch": 0.3510982798555386, "grad_norm": 2.1875, "learning_rate": 3.641059186917671e-05, "loss": 0.418, "step": 7996 }, { "epoch": 0.35118609833474207, "grad_norm": 2.34375, "learning_rate": 3.6404435802414866e-05, "loss": 0.4279, "step": 7998 }, { "epoch": 0.3512739168139456, "grad_norm": 1.8984375, "learning_rate": 3.63982788622986e-05, "loss": 0.4254, "step": 8000 }, { "epoch": 0.35136173529314907, "grad_norm": 2.125, "learning_rate": 3.63921210492994e-05, "loss": 0.3997, "step": 8002 }, { "epoch": 0.35144955377235254, "grad_norm": 2.1875, "learning_rate": 3.638596236388886e-05, "loss": 0.4476, "step": 8004 }, { "epoch": 0.351537372251556, "grad_norm": 2.09375, "learning_rate": 3.637980280653858e-05, "loss": 0.4054, "step": 8006 }, { "epoch": 0.35162519073075954, "grad_norm": 1.984375, "learning_rate": 3.637364237772027e-05, "loss": 0.4005, "step": 8008 }, { "epoch": 0.351713009209963, "grad_norm": 2.140625, "learning_rate": 3.63674810779057e-05, "loss": 0.4214, "step": 8010 }, { "epoch": 0.3518008276891665, "grad_norm": 2.015625, "learning_rate": 3.63613189075667e-05, "loss": 0.4369, "step": 8012 }, { "epoch": 0.35188864616836996, "grad_norm": 2.21875, "learning_rate": 3.6355155867175185e-05, "loss": 0.4079, "step": 8014 }, { "epoch": 0.3519764646475735, "grad_norm": 2.328125, "learning_rate": 3.63489919572031e-05, "loss": 0.3932, "step": 8016 }, { "epoch": 0.35206428312677696, "grad_norm": 2.6875, "learning_rate": 3.6342827178122505e-05, "loss": 0.4338, "step": 8018 }, { "epoch": 0.3521521016059804, "grad_norm": 2.234375, "learning_rate": 3.6336661530405486e-05, "loss": 0.4306, "step": 8020 }, { "epoch": 0.3522399200851839, "grad_norm": 2.125, "learning_rate": 3.63304950145242e-05, "loss": 0.3814, "step": 8022 }, { "epoch": 0.3523277385643874, "grad_norm": 2.375, "learning_rate": 3.6324327630950886e-05, "loss": 0.4214, "step": 8024 }, { "epoch": 0.3524155570435909, "grad_norm": 1.9921875, "learning_rate": 3.6318159380157855e-05, "loss": 0.4044, "step": 8026 }, { "epoch": 0.35250337552279437, "grad_norm": 1.953125, "learning_rate": 3.631199026261746e-05, "loss": 0.402, "step": 8028 }, { "epoch": 0.35259119400199784, "grad_norm": 1.78125, "learning_rate": 3.6305820278802143e-05, "loss": 0.4106, "step": 8030 }, { "epoch": 0.35267901248120137, "grad_norm": 1.90625, "learning_rate": 3.62996494291844e-05, "loss": 0.3897, "step": 8032 }, { "epoch": 0.35276683096040484, "grad_norm": 1.9765625, "learning_rate": 3.62934777142368e-05, "loss": 0.4002, "step": 8034 }, { "epoch": 0.3528546494396083, "grad_norm": 1.9609375, "learning_rate": 3.6287305134431956e-05, "loss": 0.4293, "step": 8036 }, { "epoch": 0.35294246791881184, "grad_norm": 2.078125, "learning_rate": 3.6281131690242594e-05, "loss": 0.4017, "step": 8038 }, { "epoch": 0.3530302863980153, "grad_norm": 1.9609375, "learning_rate": 3.627495738214145e-05, "loss": 0.3893, "step": 8040 }, { "epoch": 0.3531181048772188, "grad_norm": 1.9453125, "learning_rate": 3.626878221060137e-05, "loss": 0.4135, "step": 8042 }, { "epoch": 0.35320592335642226, "grad_norm": 2.296875, "learning_rate": 3.626260617609525e-05, "loss": 0.4264, "step": 8044 }, { "epoch": 0.3532937418356258, "grad_norm": 1.96875, "learning_rate": 3.625642927909605e-05, "loss": 0.4158, "step": 8046 }, { "epoch": 0.35338156031482926, "grad_norm": 2.15625, "learning_rate": 3.625025152007679e-05, "loss": 0.4414, "step": 8048 }, { "epoch": 0.35346937879403273, "grad_norm": 2.09375, "learning_rate": 3.6244072899510586e-05, "loss": 0.4108, "step": 8050 }, { "epoch": 0.3535571972732362, "grad_norm": 1.8671875, "learning_rate": 3.6237893417870574e-05, "loss": 0.4202, "step": 8052 }, { "epoch": 0.35364501575243973, "grad_norm": 2.171875, "learning_rate": 3.623171307562999e-05, "loss": 0.4186, "step": 8054 }, { "epoch": 0.3537328342316432, "grad_norm": 2.046875, "learning_rate": 3.622553187326213e-05, "loss": 0.3793, "step": 8056 }, { "epoch": 0.3538206527108467, "grad_norm": 2.078125, "learning_rate": 3.6219349811240345e-05, "loss": 0.4024, "step": 8058 }, { "epoch": 0.35390847119005014, "grad_norm": 1.96875, "learning_rate": 3.621316689003806e-05, "loss": 0.4224, "step": 8060 }, { "epoch": 0.35399628966925367, "grad_norm": 2.109375, "learning_rate": 3.6206983110128765e-05, "loss": 0.4361, "step": 8062 }, { "epoch": 0.35408410814845714, "grad_norm": 2.1875, "learning_rate": 3.620079847198602e-05, "loss": 0.4311, "step": 8064 }, { "epoch": 0.3541719266276606, "grad_norm": 2.09375, "learning_rate": 3.619461297608345e-05, "loss": 0.3927, "step": 8066 }, { "epoch": 0.3542597451068641, "grad_norm": 2.03125, "learning_rate": 3.6188426622894726e-05, "loss": 0.429, "step": 8068 }, { "epoch": 0.3543475635860676, "grad_norm": 2.140625, "learning_rate": 3.618223941289362e-05, "loss": 0.3921, "step": 8070 }, { "epoch": 0.3544353820652711, "grad_norm": 2.046875, "learning_rate": 3.617605134655393e-05, "loss": 0.4144, "step": 8072 }, { "epoch": 0.35452320054447456, "grad_norm": 2.5625, "learning_rate": 3.6169862424349544e-05, "loss": 0.3993, "step": 8074 }, { "epoch": 0.35461101902367803, "grad_norm": 2.34375, "learning_rate": 3.6163672646754423e-05, "loss": 0.3845, "step": 8076 }, { "epoch": 0.35469883750288156, "grad_norm": 2.15625, "learning_rate": 3.615748201424257e-05, "loss": 0.4187, "step": 8078 }, { "epoch": 0.35478665598208503, "grad_norm": 2.421875, "learning_rate": 3.615129052728808e-05, "loss": 0.3998, "step": 8080 }, { "epoch": 0.3548744744612885, "grad_norm": 2.1875, "learning_rate": 3.6145098186365085e-05, "loss": 0.437, "step": 8082 }, { "epoch": 0.35496229294049203, "grad_norm": 2.53125, "learning_rate": 3.6138904991947794e-05, "loss": 0.4331, "step": 8084 }, { "epoch": 0.3550501114196955, "grad_norm": 1.9765625, "learning_rate": 3.613271094451049e-05, "loss": 0.392, "step": 8086 }, { "epoch": 0.355137929898899, "grad_norm": 2.046875, "learning_rate": 3.612651604452752e-05, "loss": 0.407, "step": 8088 }, { "epoch": 0.35522574837810245, "grad_norm": 2.140625, "learning_rate": 3.612032029247326e-05, "loss": 0.4184, "step": 8090 }, { "epoch": 0.355313566857306, "grad_norm": 2.078125, "learning_rate": 3.611412368882223e-05, "loss": 0.4275, "step": 8092 }, { "epoch": 0.35540138533650945, "grad_norm": 1.9921875, "learning_rate": 3.610792623404894e-05, "loss": 0.3976, "step": 8094 }, { "epoch": 0.3554892038157129, "grad_norm": 2.25, "learning_rate": 3.6101727928628e-05, "loss": 0.3745, "step": 8096 }, { "epoch": 0.3555770222949164, "grad_norm": 2.15625, "learning_rate": 3.6095528773034065e-05, "loss": 0.4196, "step": 8098 }, { "epoch": 0.3556648407741199, "grad_norm": 2.25, "learning_rate": 3.608932876774188e-05, "loss": 0.3838, "step": 8100 }, { "epoch": 0.3557526592533234, "grad_norm": 2.359375, "learning_rate": 3.6083127913226235e-05, "loss": 0.4328, "step": 8102 }, { "epoch": 0.35584047773252686, "grad_norm": 2.6875, "learning_rate": 3.6076926209962e-05, "loss": 0.41, "step": 8104 }, { "epoch": 0.35592829621173033, "grad_norm": 2.640625, "learning_rate": 3.607072365842411e-05, "loss": 0.3981, "step": 8106 }, { "epoch": 0.35601611469093386, "grad_norm": 2.171875, "learning_rate": 3.606452025908754e-05, "loss": 0.4053, "step": 8108 }, { "epoch": 0.35610393317013733, "grad_norm": 2.109375, "learning_rate": 3.605831601242735e-05, "loss": 0.4144, "step": 8110 }, { "epoch": 0.3561917516493408, "grad_norm": 1.8515625, "learning_rate": 3.605211091891868e-05, "loss": 0.3831, "step": 8112 }, { "epoch": 0.3562795701285443, "grad_norm": 2.109375, "learning_rate": 3.604590497903671e-05, "loss": 0.404, "step": 8114 }, { "epoch": 0.3563673886077478, "grad_norm": 2.6875, "learning_rate": 3.603969819325668e-05, "loss": 0.4024, "step": 8116 }, { "epoch": 0.3564552070869513, "grad_norm": 2.375, "learning_rate": 3.6033490562053915e-05, "loss": 0.4221, "step": 8118 }, { "epoch": 0.35654302556615475, "grad_norm": 2.09375, "learning_rate": 3.6027282085903795e-05, "loss": 0.3815, "step": 8120 }, { "epoch": 0.3566308440453582, "grad_norm": 2.265625, "learning_rate": 3.6021072765281776e-05, "loss": 0.3893, "step": 8122 }, { "epoch": 0.35671866252456175, "grad_norm": 2.421875, "learning_rate": 3.6014862600663354e-05, "loss": 0.4161, "step": 8124 }, { "epoch": 0.3568064810037652, "grad_norm": 2.3125, "learning_rate": 3.600865159252413e-05, "loss": 0.4167, "step": 8126 }, { "epoch": 0.3568942994829687, "grad_norm": 2.09375, "learning_rate": 3.6002439741339715e-05, "loss": 0.4056, "step": 8128 }, { "epoch": 0.35698211796217216, "grad_norm": 1.90625, "learning_rate": 3.5996227047585837e-05, "loss": 0.416, "step": 8130 }, { "epoch": 0.3570699364413757, "grad_norm": 2.515625, "learning_rate": 3.599001351173825e-05, "loss": 0.386, "step": 8132 }, { "epoch": 0.35715775492057916, "grad_norm": 2.25, "learning_rate": 3.598379913427279e-05, "loss": 0.4333, "step": 8134 }, { "epoch": 0.35724557339978263, "grad_norm": 1.9921875, "learning_rate": 3.597758391566536e-05, "loss": 0.396, "step": 8136 }, { "epoch": 0.35733339187898616, "grad_norm": 2.140625, "learning_rate": 3.5971367856391925e-05, "loss": 0.3873, "step": 8138 }, { "epoch": 0.35742121035818963, "grad_norm": 2.25, "learning_rate": 3.596515095692851e-05, "loss": 0.4016, "step": 8140 }, { "epoch": 0.3575090288373931, "grad_norm": 2.28125, "learning_rate": 3.5958933217751214e-05, "loss": 0.3966, "step": 8142 }, { "epoch": 0.3575968473165966, "grad_norm": 2.46875, "learning_rate": 3.595271463933617e-05, "loss": 0.3895, "step": 8144 }, { "epoch": 0.3576846657958001, "grad_norm": 2.28125, "learning_rate": 3.5946495222159624e-05, "loss": 0.4021, "step": 8146 }, { "epoch": 0.3577724842750036, "grad_norm": 2.09375, "learning_rate": 3.5940274966697846e-05, "loss": 0.4019, "step": 8148 }, { "epoch": 0.35786030275420705, "grad_norm": 2.21875, "learning_rate": 3.593405387342719e-05, "loss": 0.4004, "step": 8150 }, { "epoch": 0.3579481212334105, "grad_norm": 1.84375, "learning_rate": 3.5927831942824056e-05, "loss": 0.3691, "step": 8152 }, { "epoch": 0.35803593971261405, "grad_norm": 2.0, "learning_rate": 3.592160917536495e-05, "loss": 0.386, "step": 8154 }, { "epoch": 0.3581237581918175, "grad_norm": 1.8828125, "learning_rate": 3.5915385571526385e-05, "loss": 0.394, "step": 8156 }, { "epoch": 0.358211576671021, "grad_norm": 2.03125, "learning_rate": 3.590916113178498e-05, "loss": 0.398, "step": 8158 }, { "epoch": 0.35829939515022446, "grad_norm": 1.9609375, "learning_rate": 3.5902935856617403e-05, "loss": 0.411, "step": 8160 }, { "epoch": 0.358387213629428, "grad_norm": 2.203125, "learning_rate": 3.589670974650038e-05, "loss": 0.4071, "step": 8162 }, { "epoch": 0.35847503210863146, "grad_norm": 2.390625, "learning_rate": 3.5890482801910705e-05, "loss": 0.4023, "step": 8164 }, { "epoch": 0.35856285058783494, "grad_norm": 2.15625, "learning_rate": 3.5884255023325256e-05, "loss": 0.3921, "step": 8166 }, { "epoch": 0.3586506690670384, "grad_norm": 2.203125, "learning_rate": 3.587802641122095e-05, "loss": 0.449, "step": 8168 }, { "epoch": 0.35873848754624194, "grad_norm": 2.390625, "learning_rate": 3.5871796966074775e-05, "loss": 0.398, "step": 8170 }, { "epoch": 0.3588263060254454, "grad_norm": 2.25, "learning_rate": 3.586556668836378e-05, "loss": 0.4156, "step": 8172 }, { "epoch": 0.3589141245046489, "grad_norm": 2.0625, "learning_rate": 3.585933557856508e-05, "loss": 0.4224, "step": 8174 }, { "epoch": 0.35900194298385235, "grad_norm": 2.296875, "learning_rate": 3.5853103637155854e-05, "loss": 0.4249, "step": 8176 }, { "epoch": 0.3590897614630559, "grad_norm": 1.8046875, "learning_rate": 3.5846870864613355e-05, "loss": 0.4294, "step": 8178 }, { "epoch": 0.35917757994225935, "grad_norm": 2.203125, "learning_rate": 3.584063726141489e-05, "loss": 0.4183, "step": 8180 }, { "epoch": 0.3592653984214628, "grad_norm": 1.9140625, "learning_rate": 3.5834402828037816e-05, "loss": 0.4175, "step": 8182 }, { "epoch": 0.35935321690066635, "grad_norm": 1.8984375, "learning_rate": 3.582816756495958e-05, "loss": 0.4311, "step": 8184 }, { "epoch": 0.3594410353798698, "grad_norm": 2.0625, "learning_rate": 3.5821931472657674e-05, "loss": 0.38, "step": 8186 }, { "epoch": 0.3595288538590733, "grad_norm": 2.375, "learning_rate": 3.581569455160967e-05, "loss": 0.4245, "step": 8188 }, { "epoch": 0.35961667233827677, "grad_norm": 1.984375, "learning_rate": 3.580945680229317e-05, "loss": 0.3865, "step": 8190 }, { "epoch": 0.3597044908174803, "grad_norm": 1.8125, "learning_rate": 3.580321822518588e-05, "loss": 0.4175, "step": 8192 }, { "epoch": 0.35979230929668377, "grad_norm": 1.8671875, "learning_rate": 3.579697882076557e-05, "loss": 0.3859, "step": 8194 }, { "epoch": 0.35988012777588724, "grad_norm": 2.25, "learning_rate": 3.5790738589510015e-05, "loss": 0.3937, "step": 8196 }, { "epoch": 0.3599679462550907, "grad_norm": 2.21875, "learning_rate": 3.578449753189711e-05, "loss": 0.397, "step": 8198 }, { "epoch": 0.36005576473429424, "grad_norm": 2.125, "learning_rate": 3.5778255648404805e-05, "loss": 0.409, "step": 8200 }, { "epoch": 0.3601435832134977, "grad_norm": 2.140625, "learning_rate": 3.57720129395111e-05, "loss": 0.4007, "step": 8202 }, { "epoch": 0.3602314016927012, "grad_norm": 2.03125, "learning_rate": 3.576576940569406e-05, "loss": 0.4047, "step": 8204 }, { "epoch": 0.36031922017190465, "grad_norm": 1.8515625, "learning_rate": 3.5759525047431816e-05, "loss": 0.4141, "step": 8206 }, { "epoch": 0.3604070386511082, "grad_norm": 1.9375, "learning_rate": 3.575327986520257e-05, "loss": 0.3848, "step": 8208 }, { "epoch": 0.36049485713031165, "grad_norm": 2.0625, "learning_rate": 3.574703385948457e-05, "loss": 0.4296, "step": 8210 }, { "epoch": 0.3605826756095151, "grad_norm": 2.03125, "learning_rate": 3.574078703075613e-05, "loss": 0.4021, "step": 8212 }, { "epoch": 0.3606704940887186, "grad_norm": 2.046875, "learning_rate": 3.573453937949566e-05, "loss": 0.4013, "step": 8214 }, { "epoch": 0.3607583125679221, "grad_norm": 2.015625, "learning_rate": 3.572829090618159e-05, "loss": 0.376, "step": 8216 }, { "epoch": 0.3608461310471256, "grad_norm": 2.15625, "learning_rate": 3.572204161129243e-05, "loss": 0.4091, "step": 8218 }, { "epoch": 0.36093394952632907, "grad_norm": 1.953125, "learning_rate": 3.571579149530675e-05, "loss": 0.4112, "step": 8220 }, { "epoch": 0.36102176800553254, "grad_norm": 2.046875, "learning_rate": 3.5709540558703186e-05, "loss": 0.4014, "step": 8222 }, { "epoch": 0.36110958648473607, "grad_norm": 1.921875, "learning_rate": 3.570328880196044e-05, "loss": 0.3932, "step": 8224 }, { "epoch": 0.36119740496393954, "grad_norm": 2.125, "learning_rate": 3.569703622555727e-05, "loss": 0.4222, "step": 8226 }, { "epoch": 0.361285223443143, "grad_norm": 2.359375, "learning_rate": 3.569078282997251e-05, "loss": 0.4291, "step": 8228 }, { "epoch": 0.3613730419223465, "grad_norm": 1.9609375, "learning_rate": 3.568452861568503e-05, "loss": 0.3827, "step": 8230 }, { "epoch": 0.36146086040155, "grad_norm": 1.9140625, "learning_rate": 3.5678273583173795e-05, "loss": 0.4059, "step": 8232 }, { "epoch": 0.3615486788807535, "grad_norm": 2.125, "learning_rate": 3.567201773291781e-05, "loss": 0.4157, "step": 8234 }, { "epoch": 0.36163649735995695, "grad_norm": 2.375, "learning_rate": 3.5665761065396134e-05, "loss": 0.3946, "step": 8236 }, { "epoch": 0.3617243158391605, "grad_norm": 2.171875, "learning_rate": 3.565950358108793e-05, "loss": 0.3954, "step": 8238 }, { "epoch": 0.36181213431836395, "grad_norm": 2.40625, "learning_rate": 3.565324528047238e-05, "loss": 0.4223, "step": 8240 }, { "epoch": 0.3618999527975674, "grad_norm": 2.09375, "learning_rate": 3.5646986164028765e-05, "loss": 0.4139, "step": 8242 }, { "epoch": 0.3619877712767709, "grad_norm": 2.515625, "learning_rate": 3.564072623223639e-05, "loss": 0.4005, "step": 8244 }, { "epoch": 0.3620755897559744, "grad_norm": 2.328125, "learning_rate": 3.5634465485574644e-05, "loss": 0.3918, "step": 8246 }, { "epoch": 0.3621634082351779, "grad_norm": 2.84375, "learning_rate": 3.5628203924522984e-05, "loss": 0.3848, "step": 8248 }, { "epoch": 0.36225122671438137, "grad_norm": 2.78125, "learning_rate": 3.562194154956093e-05, "loss": 0.391, "step": 8250 }, { "epoch": 0.36233904519358484, "grad_norm": 2.171875, "learning_rate": 3.561567836116804e-05, "loss": 0.4067, "step": 8252 }, { "epoch": 0.36242686367278837, "grad_norm": 1.9921875, "learning_rate": 3.5609414359823956e-05, "loss": 0.4135, "step": 8254 }, { "epoch": 0.36251468215199184, "grad_norm": 2.0, "learning_rate": 3.5603149546008373e-05, "loss": 0.4034, "step": 8256 }, { "epoch": 0.3626025006311953, "grad_norm": 1.875, "learning_rate": 3.5596883920201054e-05, "loss": 0.4041, "step": 8258 }, { "epoch": 0.3626903191103988, "grad_norm": 1.984375, "learning_rate": 3.559061748288183e-05, "loss": 0.4162, "step": 8260 }, { "epoch": 0.3627781375896023, "grad_norm": 2.078125, "learning_rate": 3.558435023453058e-05, "loss": 0.3935, "step": 8262 }, { "epoch": 0.3628659560688058, "grad_norm": 2.21875, "learning_rate": 3.557808217562726e-05, "loss": 0.4009, "step": 8264 }, { "epoch": 0.36295377454800926, "grad_norm": 1.8828125, "learning_rate": 3.557181330665186e-05, "loss": 0.369, "step": 8266 }, { "epoch": 0.36304159302721273, "grad_norm": 2.046875, "learning_rate": 3.5565543628084466e-05, "loss": 0.3915, "step": 8268 }, { "epoch": 0.36312941150641626, "grad_norm": 1.9453125, "learning_rate": 3.55592731404052e-05, "loss": 0.3905, "step": 8270 }, { "epoch": 0.3632172299856197, "grad_norm": 2.078125, "learning_rate": 3.555300184409428e-05, "loss": 0.4033, "step": 8272 }, { "epoch": 0.3633050484648232, "grad_norm": 2.0625, "learning_rate": 3.554672973963194e-05, "loss": 0.3804, "step": 8274 }, { "epoch": 0.36339286694402667, "grad_norm": 2.03125, "learning_rate": 3.554045682749851e-05, "loss": 0.388, "step": 8276 }, { "epoch": 0.3634806854232302, "grad_norm": 2.40625, "learning_rate": 3.553418310817437e-05, "loss": 0.4108, "step": 8278 }, { "epoch": 0.36356850390243367, "grad_norm": 1.90625, "learning_rate": 3.5527908582139965e-05, "loss": 0.3755, "step": 8280 }, { "epoch": 0.36365632238163714, "grad_norm": 1.8984375, "learning_rate": 3.5521633249875796e-05, "loss": 0.3783, "step": 8282 }, { "epoch": 0.36374414086084067, "grad_norm": 2.09375, "learning_rate": 3.551535711186243e-05, "loss": 0.405, "step": 8284 }, { "epoch": 0.36383195934004414, "grad_norm": 2.171875, "learning_rate": 3.550908016858049e-05, "loss": 0.3899, "step": 8286 }, { "epoch": 0.3639197778192476, "grad_norm": 2.234375, "learning_rate": 3.550280242051067e-05, "loss": 0.4381, "step": 8288 }, { "epoch": 0.3640075962984511, "grad_norm": 2.140625, "learning_rate": 3.5496523868133735e-05, "loss": 0.3999, "step": 8290 }, { "epoch": 0.3640954147776546, "grad_norm": 2.015625, "learning_rate": 3.549024451193048e-05, "loss": 0.4372, "step": 8292 }, { "epoch": 0.3641832332568581, "grad_norm": 2.0625, "learning_rate": 3.548396435238179e-05, "loss": 0.3942, "step": 8294 }, { "epoch": 0.36427105173606156, "grad_norm": 2.203125, "learning_rate": 3.547768338996859e-05, "loss": 0.3907, "step": 8296 }, { "epoch": 0.36435887021526503, "grad_norm": 2.71875, "learning_rate": 3.547140162517189e-05, "loss": 0.3835, "step": 8298 }, { "epoch": 0.36444668869446856, "grad_norm": 1.84375, "learning_rate": 3.5465119058472736e-05, "loss": 0.4294, "step": 8300 }, { "epoch": 0.36453450717367203, "grad_norm": 2.015625, "learning_rate": 3.545883569035226e-05, "loss": 0.4001, "step": 8302 }, { "epoch": 0.3646223256528755, "grad_norm": 1.890625, "learning_rate": 3.545255152129164e-05, "loss": 0.3965, "step": 8304 }, { "epoch": 0.364710144132079, "grad_norm": 1.859375, "learning_rate": 3.544626655177212e-05, "loss": 0.411, "step": 8306 }, { "epoch": 0.3647979626112825, "grad_norm": 1.9140625, "learning_rate": 3.5439980782275e-05, "loss": 0.4039, "step": 8308 }, { "epoch": 0.36488578109048597, "grad_norm": 2.140625, "learning_rate": 3.543369421328165e-05, "loss": 0.4358, "step": 8310 }, { "epoch": 0.36497359956968944, "grad_norm": 2.171875, "learning_rate": 3.5427406845273506e-05, "loss": 0.4205, "step": 8312 }, { "epoch": 0.3650614180488929, "grad_norm": 1.9609375, "learning_rate": 3.542111867873203e-05, "loss": 0.391, "step": 8314 }, { "epoch": 0.36514923652809644, "grad_norm": 1.8984375, "learning_rate": 3.5414829714138795e-05, "loss": 0.4033, "step": 8316 }, { "epoch": 0.3652370550072999, "grad_norm": 2.046875, "learning_rate": 3.540853995197541e-05, "loss": 0.3773, "step": 8318 }, { "epoch": 0.3653248734865034, "grad_norm": 2.0625, "learning_rate": 3.540224939272353e-05, "loss": 0.396, "step": 8320 }, { "epoch": 0.36541269196570686, "grad_norm": 2.0625, "learning_rate": 3.5395958036864896e-05, "loss": 0.3724, "step": 8322 }, { "epoch": 0.3655005104449104, "grad_norm": 2.046875, "learning_rate": 3.538966588488131e-05, "loss": 0.398, "step": 8324 }, { "epoch": 0.36558832892411386, "grad_norm": 1.84375, "learning_rate": 3.538337293725462e-05, "loss": 0.3854, "step": 8326 }, { "epoch": 0.36567614740331733, "grad_norm": 1.9609375, "learning_rate": 3.5377079194466737e-05, "loss": 0.4364, "step": 8328 }, { "epoch": 0.3657639658825208, "grad_norm": 2.15625, "learning_rate": 3.5370784656999655e-05, "loss": 0.414, "step": 8330 }, { "epoch": 0.36585178436172433, "grad_norm": 2.34375, "learning_rate": 3.536448932533538e-05, "loss": 0.3896, "step": 8332 }, { "epoch": 0.3659396028409278, "grad_norm": 1.921875, "learning_rate": 3.5358193199956036e-05, "loss": 0.4054, "step": 8334 }, { "epoch": 0.3660274213201313, "grad_norm": 2.34375, "learning_rate": 3.5351896281343774e-05, "loss": 0.3983, "step": 8336 }, { "epoch": 0.3661152397993348, "grad_norm": 1.9453125, "learning_rate": 3.5345598569980814e-05, "loss": 0.4626, "step": 8338 }, { "epoch": 0.3662030582785383, "grad_norm": 2.375, "learning_rate": 3.5339300066349435e-05, "loss": 0.4138, "step": 8340 }, { "epoch": 0.36629087675774175, "grad_norm": 1.7890625, "learning_rate": 3.5333000770931986e-05, "loss": 0.4116, "step": 8342 }, { "epoch": 0.3663786952369452, "grad_norm": 1.9765625, "learning_rate": 3.532670068421085e-05, "loss": 0.4138, "step": 8344 }, { "epoch": 0.36646651371614875, "grad_norm": 2.15625, "learning_rate": 3.5320399806668506e-05, "loss": 0.4108, "step": 8346 }, { "epoch": 0.3665543321953522, "grad_norm": 2.328125, "learning_rate": 3.531409813878746e-05, "loss": 0.3859, "step": 8348 }, { "epoch": 0.3666421506745557, "grad_norm": 2.34375, "learning_rate": 3.5307795681050316e-05, "loss": 0.4094, "step": 8350 }, { "epoch": 0.36672996915375916, "grad_norm": 2.09375, "learning_rate": 3.530149243393971e-05, "loss": 0.3775, "step": 8352 }, { "epoch": 0.3668177876329627, "grad_norm": 2.0, "learning_rate": 3.5295188397938336e-05, "loss": 0.4058, "step": 8354 }, { "epoch": 0.36690560611216616, "grad_norm": 2.078125, "learning_rate": 3.528888357352898e-05, "loss": 0.374, "step": 8356 }, { "epoch": 0.36699342459136963, "grad_norm": 2.046875, "learning_rate": 3.528257796119443e-05, "loss": 0.3964, "step": 8358 }, { "epoch": 0.3670812430705731, "grad_norm": 2.140625, "learning_rate": 3.527627156141761e-05, "loss": 0.4265, "step": 8360 }, { "epoch": 0.36716906154977663, "grad_norm": 1.8671875, "learning_rate": 3.5269964374681454e-05, "loss": 0.3935, "step": 8362 }, { "epoch": 0.3672568800289801, "grad_norm": 2.0, "learning_rate": 3.526365640146896e-05, "loss": 0.4064, "step": 8364 }, { "epoch": 0.3673446985081836, "grad_norm": 2.03125, "learning_rate": 3.5257347642263194e-05, "loss": 0.4018, "step": 8366 }, { "epoch": 0.36743251698738705, "grad_norm": 2.046875, "learning_rate": 3.52510380975473e-05, "loss": 0.405, "step": 8368 }, { "epoch": 0.3675203354665906, "grad_norm": 2.171875, "learning_rate": 3.524472776780443e-05, "loss": 0.4101, "step": 8370 }, { "epoch": 0.36760815394579405, "grad_norm": 2.0625, "learning_rate": 3.523841665351787e-05, "loss": 0.4013, "step": 8372 }, { "epoch": 0.3676959724249975, "grad_norm": 1.90625, "learning_rate": 3.5232104755170905e-05, "loss": 0.4238, "step": 8374 }, { "epoch": 0.367783790904201, "grad_norm": 2.765625, "learning_rate": 3.522579207324689e-05, "loss": 0.3948, "step": 8376 }, { "epoch": 0.3678716093834045, "grad_norm": 1.9921875, "learning_rate": 3.5219478608229284e-05, "loss": 0.4009, "step": 8378 }, { "epoch": 0.367959427862608, "grad_norm": 2.0625, "learning_rate": 3.5213164360601555e-05, "loss": 0.4123, "step": 8380 }, { "epoch": 0.36804724634181146, "grad_norm": 2.015625, "learning_rate": 3.5206849330847244e-05, "loss": 0.3838, "step": 8382 }, { "epoch": 0.36813506482101493, "grad_norm": 2.21875, "learning_rate": 3.520053351944996e-05, "loss": 0.4118, "step": 8384 }, { "epoch": 0.36822288330021846, "grad_norm": 2.34375, "learning_rate": 3.5194216926893395e-05, "loss": 0.4044, "step": 8386 }, { "epoch": 0.36831070177942193, "grad_norm": 2.03125, "learning_rate": 3.5187899553661236e-05, "loss": 0.4166, "step": 8388 }, { "epoch": 0.3683985202586254, "grad_norm": 2.03125, "learning_rate": 3.518158140023729e-05, "loss": 0.4072, "step": 8390 }, { "epoch": 0.36848633873782893, "grad_norm": 2.015625, "learning_rate": 3.5175262467105404e-05, "loss": 0.4168, "step": 8392 }, { "epoch": 0.3685741572170324, "grad_norm": 2.28125, "learning_rate": 3.5168942754749476e-05, "loss": 0.3971, "step": 8394 }, { "epoch": 0.3686619756962359, "grad_norm": 2.203125, "learning_rate": 3.516262226365347e-05, "loss": 0.3804, "step": 8396 }, { "epoch": 0.36874979417543935, "grad_norm": 2.03125, "learning_rate": 3.515630099430142e-05, "loss": 0.4292, "step": 8398 }, { "epoch": 0.3688376126546429, "grad_norm": 2.1875, "learning_rate": 3.5149978947177396e-05, "loss": 0.4162, "step": 8400 }, { "epoch": 0.36892543113384635, "grad_norm": 2.15625, "learning_rate": 3.514365612276557e-05, "loss": 0.4324, "step": 8402 }, { "epoch": 0.3690132496130498, "grad_norm": 2.03125, "learning_rate": 3.5137332521550116e-05, "loss": 0.4021, "step": 8404 }, { "epoch": 0.3691010680922533, "grad_norm": 1.8671875, "learning_rate": 3.513100814401531e-05, "loss": 0.4144, "step": 8406 }, { "epoch": 0.3691888865714568, "grad_norm": 1.9765625, "learning_rate": 3.512468299064546e-05, "loss": 0.3933, "step": 8408 }, { "epoch": 0.3692767050506603, "grad_norm": 2.0, "learning_rate": 3.5118357061924974e-05, "loss": 0.3806, "step": 8410 }, { "epoch": 0.36936452352986376, "grad_norm": 2.4375, "learning_rate": 3.511203035833827e-05, "loss": 0.3926, "step": 8412 }, { "epoch": 0.36945234200906724, "grad_norm": 2.3125, "learning_rate": 3.5105702880369864e-05, "loss": 0.4062, "step": 8414 }, { "epoch": 0.36954016048827076, "grad_norm": 2.578125, "learning_rate": 3.509937462850431e-05, "loss": 0.4053, "step": 8416 }, { "epoch": 0.36962797896747424, "grad_norm": 2.390625, "learning_rate": 3.509304560322622e-05, "loss": 0.4269, "step": 8418 }, { "epoch": 0.3697157974466777, "grad_norm": 2.25, "learning_rate": 3.5086715805020274e-05, "loss": 0.38, "step": 8420 }, { "epoch": 0.3698036159258812, "grad_norm": 2.09375, "learning_rate": 3.508038523437122e-05, "loss": 0.3872, "step": 8422 }, { "epoch": 0.3698914344050847, "grad_norm": 2.171875, "learning_rate": 3.5074053891763844e-05, "loss": 0.3803, "step": 8424 }, { "epoch": 0.3699792528842882, "grad_norm": 2.203125, "learning_rate": 3.506772177768301e-05, "loss": 0.4023, "step": 8426 }, { "epoch": 0.37006707136349165, "grad_norm": 2.046875, "learning_rate": 3.506138889261364e-05, "loss": 0.4042, "step": 8428 }, { "epoch": 0.3701548898426951, "grad_norm": 1.8984375, "learning_rate": 3.505505523704068e-05, "loss": 0.3977, "step": 8430 }, { "epoch": 0.37024270832189865, "grad_norm": 2.046875, "learning_rate": 3.5048720811449185e-05, "loss": 0.3787, "step": 8432 }, { "epoch": 0.3703305268011021, "grad_norm": 2.03125, "learning_rate": 3.504238561632424e-05, "loss": 0.3843, "step": 8434 }, { "epoch": 0.3704183452803056, "grad_norm": 2.0, "learning_rate": 3.5036049652151e-05, "loss": 0.3983, "step": 8436 }, { "epoch": 0.3705061637595091, "grad_norm": 1.984375, "learning_rate": 3.5029712919414664e-05, "loss": 0.4091, "step": 8438 }, { "epoch": 0.3705939822387126, "grad_norm": 1.9296875, "learning_rate": 3.5023375418600524e-05, "loss": 0.3897, "step": 8440 }, { "epoch": 0.37068180071791607, "grad_norm": 1.9921875, "learning_rate": 3.501703715019388e-05, "loss": 0.4017, "step": 8442 }, { "epoch": 0.37076961919711954, "grad_norm": 1.953125, "learning_rate": 3.501069811468013e-05, "loss": 0.4036, "step": 8444 }, { "epoch": 0.37085743767632307, "grad_norm": 2.0625, "learning_rate": 3.5004358312544714e-05, "loss": 0.3832, "step": 8446 }, { "epoch": 0.37094525615552654, "grad_norm": 1.8203125, "learning_rate": 3.499801774427315e-05, "loss": 0.4105, "step": 8448 }, { "epoch": 0.37103307463473, "grad_norm": 2.3125, "learning_rate": 3.499167641035099e-05, "loss": 0.4005, "step": 8450 }, { "epoch": 0.3711208931139335, "grad_norm": 2.09375, "learning_rate": 3.498533431126386e-05, "loss": 0.3815, "step": 8452 }, { "epoch": 0.371208711593137, "grad_norm": 2.203125, "learning_rate": 3.497899144749742e-05, "loss": 0.4332, "step": 8454 }, { "epoch": 0.3712965300723405, "grad_norm": 2.390625, "learning_rate": 3.497264781953743e-05, "loss": 0.4117, "step": 8456 }, { "epoch": 0.37138434855154395, "grad_norm": 2.3125, "learning_rate": 3.496630342786968e-05, "loss": 0.3979, "step": 8458 }, { "epoch": 0.3714721670307474, "grad_norm": 2.3125, "learning_rate": 3.495995827298002e-05, "loss": 0.4247, "step": 8460 }, { "epoch": 0.37155998550995095, "grad_norm": 1.9453125, "learning_rate": 3.4953612355354373e-05, "loss": 0.4288, "step": 8462 }, { "epoch": 0.3716478039891544, "grad_norm": 1.890625, "learning_rate": 3.494726567547871e-05, "loss": 0.4028, "step": 8464 }, { "epoch": 0.3717356224683579, "grad_norm": 2.265625, "learning_rate": 3.4940918233839056e-05, "loss": 0.379, "step": 8466 }, { "epoch": 0.37182344094756137, "grad_norm": 2.21875, "learning_rate": 3.4934570030921494e-05, "loss": 0.4114, "step": 8468 }, { "epoch": 0.3719112594267649, "grad_norm": 2.078125, "learning_rate": 3.492822106721217e-05, "loss": 0.3756, "step": 8470 }, { "epoch": 0.37199907790596837, "grad_norm": 2.296875, "learning_rate": 3.492187134319731e-05, "loss": 0.4375, "step": 8472 }, { "epoch": 0.37208689638517184, "grad_norm": 2.0625, "learning_rate": 3.491552085936316e-05, "loss": 0.4181, "step": 8474 }, { "epoch": 0.3721747148643753, "grad_norm": 2.53125, "learning_rate": 3.4909169616196055e-05, "loss": 0.3895, "step": 8476 }, { "epoch": 0.37226253334357884, "grad_norm": 2.109375, "learning_rate": 3.490281761418236e-05, "loss": 0.3932, "step": 8478 }, { "epoch": 0.3723503518227823, "grad_norm": 2.046875, "learning_rate": 3.489646485380851e-05, "loss": 0.4005, "step": 8480 }, { "epoch": 0.3724381703019858, "grad_norm": 1.78125, "learning_rate": 3.4890111335561016e-05, "loss": 0.3688, "step": 8482 }, { "epoch": 0.37252598878118925, "grad_norm": 2.0625, "learning_rate": 3.488375705992642e-05, "loss": 0.3811, "step": 8484 }, { "epoch": 0.3726138072603928, "grad_norm": 2.109375, "learning_rate": 3.487740202739134e-05, "loss": 0.4228, "step": 8486 }, { "epoch": 0.37270162573959625, "grad_norm": 2.015625, "learning_rate": 3.487104623844245e-05, "loss": 0.3903, "step": 8488 }, { "epoch": 0.3727894442187997, "grad_norm": 2.0625, "learning_rate": 3.486468969356647e-05, "loss": 0.3835, "step": 8490 }, { "epoch": 0.37287726269800325, "grad_norm": 1.984375, "learning_rate": 3.485833239325019e-05, "loss": 0.4037, "step": 8492 }, { "epoch": 0.3729650811772067, "grad_norm": 2.375, "learning_rate": 3.485197433798045e-05, "loss": 0.372, "step": 8494 }, { "epoch": 0.3730528996564102, "grad_norm": 2.25, "learning_rate": 3.484561552824416e-05, "loss": 0.3698, "step": 8496 }, { "epoch": 0.37314071813561367, "grad_norm": 1.984375, "learning_rate": 3.483925596452826e-05, "loss": 0.4107, "step": 8498 }, { "epoch": 0.3732285366148172, "grad_norm": 2.03125, "learning_rate": 3.4832895647319786e-05, "loss": 0.4162, "step": 8500 }, { "epoch": 0.37331635509402067, "grad_norm": 2.171875, "learning_rate": 3.482653457710581e-05, "loss": 0.3997, "step": 8502 }, { "epoch": 0.37340417357322414, "grad_norm": 2.140625, "learning_rate": 3.482017275437346e-05, "loss": 0.4082, "step": 8504 }, { "epoch": 0.3734919920524276, "grad_norm": 2.203125, "learning_rate": 3.481381017960992e-05, "loss": 0.3791, "step": 8506 }, { "epoch": 0.37357981053163114, "grad_norm": 2.09375, "learning_rate": 3.480744685330244e-05, "loss": 0.3819, "step": 8508 }, { "epoch": 0.3736676290108346, "grad_norm": 2.296875, "learning_rate": 3.480108277593834e-05, "loss": 0.4309, "step": 8510 }, { "epoch": 0.3737554474900381, "grad_norm": 1.953125, "learning_rate": 3.4794717948004974e-05, "loss": 0.3988, "step": 8512 }, { "epoch": 0.37384326596924156, "grad_norm": 1.828125, "learning_rate": 3.478835236998976e-05, "loss": 0.4027, "step": 8514 }, { "epoch": 0.3739310844484451, "grad_norm": 1.8046875, "learning_rate": 3.4781986042380164e-05, "loss": 0.3995, "step": 8516 }, { "epoch": 0.37401890292764856, "grad_norm": 1.9609375, "learning_rate": 3.4775618965663736e-05, "loss": 0.4285, "step": 8518 }, { "epoch": 0.374106721406852, "grad_norm": 2.015625, "learning_rate": 3.476925114032806e-05, "loss": 0.3945, "step": 8520 }, { "epoch": 0.3741945398860555, "grad_norm": 1.921875, "learning_rate": 3.47628825668608e-05, "loss": 0.3842, "step": 8522 }, { "epoch": 0.374282358365259, "grad_norm": 1.9765625, "learning_rate": 3.475651324574965e-05, "loss": 0.4019, "step": 8524 }, { "epoch": 0.3743701768444625, "grad_norm": 2.0, "learning_rate": 3.4750143177482366e-05, "loss": 0.4039, "step": 8526 }, { "epoch": 0.37445799532366597, "grad_norm": 2.25, "learning_rate": 3.47437723625468e-05, "loss": 0.3881, "step": 8528 }, { "epoch": 0.37454581380286944, "grad_norm": 2.296875, "learning_rate": 3.47374008014308e-05, "loss": 0.4122, "step": 8530 }, { "epoch": 0.37463363228207297, "grad_norm": 1.8671875, "learning_rate": 3.473102849462231e-05, "loss": 0.4023, "step": 8532 }, { "epoch": 0.37472145076127644, "grad_norm": 2.390625, "learning_rate": 3.472465544260932e-05, "loss": 0.406, "step": 8534 }, { "epoch": 0.3748092692404799, "grad_norm": 2.328125, "learning_rate": 3.471828164587989e-05, "loss": 0.3963, "step": 8536 }, { "epoch": 0.37489708771968344, "grad_norm": 1.9296875, "learning_rate": 3.471190710492213e-05, "loss": 0.3803, "step": 8538 }, { "epoch": 0.3749849061988869, "grad_norm": 1.953125, "learning_rate": 3.470553182022419e-05, "loss": 0.4057, "step": 8540 }, { "epoch": 0.3750727246780904, "grad_norm": 2.125, "learning_rate": 3.46991557922743e-05, "loss": 0.384, "step": 8542 }, { "epoch": 0.37516054315729386, "grad_norm": 2.15625, "learning_rate": 3.4692779021560726e-05, "loss": 0.3787, "step": 8544 }, { "epoch": 0.3752483616364974, "grad_norm": 2.1875, "learning_rate": 3.468640150857181e-05, "loss": 0.3673, "step": 8546 }, { "epoch": 0.37533618011570086, "grad_norm": 1.8359375, "learning_rate": 3.4680023253795956e-05, "loss": 0.3796, "step": 8548 }, { "epoch": 0.37542399859490433, "grad_norm": 1.8125, "learning_rate": 3.46736442577216e-05, "loss": 0.3703, "step": 8550 }, { "epoch": 0.3755118170741078, "grad_norm": 1.8125, "learning_rate": 3.466726452083724e-05, "loss": 0.4013, "step": 8552 }, { "epoch": 0.37559963555331133, "grad_norm": 2.28125, "learning_rate": 3.466088404363145e-05, "loss": 0.3854, "step": 8554 }, { "epoch": 0.3756874540325148, "grad_norm": 2.09375, "learning_rate": 3.465450282659285e-05, "loss": 0.4204, "step": 8556 }, { "epoch": 0.3757752725117183, "grad_norm": 2.078125, "learning_rate": 3.464812087021009e-05, "loss": 0.412, "step": 8558 }, { "epoch": 0.37586309099092174, "grad_norm": 1.984375, "learning_rate": 3.4641738174971936e-05, "loss": 0.4043, "step": 8560 }, { "epoch": 0.37595090947012527, "grad_norm": 2.046875, "learning_rate": 3.463535474136716e-05, "loss": 0.3919, "step": 8562 }, { "epoch": 0.37603872794932874, "grad_norm": 2.25, "learning_rate": 3.462897056988461e-05, "loss": 0.389, "step": 8564 }, { "epoch": 0.3761265464285322, "grad_norm": 1.859375, "learning_rate": 3.462258566101318e-05, "loss": 0.3905, "step": 8566 }, { "epoch": 0.3762143649077357, "grad_norm": 2.6875, "learning_rate": 3.461620001524183e-05, "loss": 0.4025, "step": 8568 }, { "epoch": 0.3763021833869392, "grad_norm": 2.046875, "learning_rate": 3.460981363305959e-05, "loss": 0.4231, "step": 8570 }, { "epoch": 0.3763900018661427, "grad_norm": 2.28125, "learning_rate": 3.460342651495551e-05, "loss": 0.423, "step": 8572 }, { "epoch": 0.37647782034534616, "grad_norm": 1.984375, "learning_rate": 3.459703866141872e-05, "loss": 0.396, "step": 8574 }, { "epoch": 0.37656563882454963, "grad_norm": 2.046875, "learning_rate": 3.459065007293842e-05, "loss": 0.4062, "step": 8576 }, { "epoch": 0.37665345730375316, "grad_norm": 1.9296875, "learning_rate": 3.458426075000383e-05, "loss": 0.409, "step": 8578 }, { "epoch": 0.37674127578295663, "grad_norm": 1.875, "learning_rate": 3.4577870693104256e-05, "loss": 0.4096, "step": 8580 }, { "epoch": 0.3768290942621601, "grad_norm": 1.96875, "learning_rate": 3.4571479902729045e-05, "loss": 0.4013, "step": 8582 }, { "epoch": 0.3769169127413636, "grad_norm": 2.078125, "learning_rate": 3.456508837936762e-05, "loss": 0.4027, "step": 8584 }, { "epoch": 0.3770047312205671, "grad_norm": 1.9140625, "learning_rate": 3.4558696123509426e-05, "loss": 0.4047, "step": 8586 }, { "epoch": 0.3770925496997706, "grad_norm": 1.8984375, "learning_rate": 3.455230313564399e-05, "loss": 0.3918, "step": 8588 }, { "epoch": 0.37718036817897405, "grad_norm": 2.078125, "learning_rate": 3.45459094162609e-05, "loss": 0.3935, "step": 8590 }, { "epoch": 0.3772681866581776, "grad_norm": 1.9765625, "learning_rate": 3.453951496584977e-05, "loss": 0.4381, "step": 8592 }, { "epoch": 0.37735600513738105, "grad_norm": 1.9296875, "learning_rate": 3.45331197849003e-05, "loss": 0.3799, "step": 8594 }, { "epoch": 0.3774438236165845, "grad_norm": 2.125, "learning_rate": 3.452672387390223e-05, "loss": 0.3954, "step": 8596 }, { "epoch": 0.377531642095788, "grad_norm": 2.203125, "learning_rate": 3.452032723334536e-05, "loss": 0.4193, "step": 8598 }, { "epoch": 0.3776194605749915, "grad_norm": 2.234375, "learning_rate": 3.451392986371955e-05, "loss": 0.3764, "step": 8600 }, { "epoch": 0.377707279054195, "grad_norm": 1.90625, "learning_rate": 3.450753176551472e-05, "loss": 0.4466, "step": 8602 }, { "epoch": 0.37779509753339846, "grad_norm": 2.0625, "learning_rate": 3.4501132939220816e-05, "loss": 0.3867, "step": 8604 }, { "epoch": 0.37788291601260193, "grad_norm": 1.7890625, "learning_rate": 3.4494733385327875e-05, "loss": 0.4123, "step": 8606 }, { "epoch": 0.37797073449180546, "grad_norm": 1.9765625, "learning_rate": 3.4488333104325975e-05, "loss": 0.3987, "step": 8608 }, { "epoch": 0.37805855297100893, "grad_norm": 2.03125, "learning_rate": 3.448193209670526e-05, "loss": 0.3798, "step": 8610 }, { "epoch": 0.3781463714502124, "grad_norm": 2.03125, "learning_rate": 3.44755303629559e-05, "loss": 0.3834, "step": 8612 }, { "epoch": 0.3782341899294159, "grad_norm": 1.9453125, "learning_rate": 3.446912790356817e-05, "loss": 0.3721, "step": 8614 }, { "epoch": 0.3783220084086194, "grad_norm": 2.109375, "learning_rate": 3.446272471903235e-05, "loss": 0.3949, "step": 8616 }, { "epoch": 0.3784098268878229, "grad_norm": 2.296875, "learning_rate": 3.445632080983879e-05, "loss": 0.389, "step": 8618 }, { "epoch": 0.37849764536702635, "grad_norm": 2.5625, "learning_rate": 3.444991617647792e-05, "loss": 0.3948, "step": 8620 }, { "epoch": 0.3785854638462298, "grad_norm": 2.21875, "learning_rate": 3.44435108194402e-05, "loss": 0.4126, "step": 8622 }, { "epoch": 0.37867328232543335, "grad_norm": 2.15625, "learning_rate": 3.443710473921617e-05, "loss": 0.4307, "step": 8624 }, { "epoch": 0.3787611008046368, "grad_norm": 2.265625, "learning_rate": 3.44306979362964e-05, "loss": 0.3778, "step": 8626 }, { "epoch": 0.3788489192838403, "grad_norm": 2.34375, "learning_rate": 3.4424290411171505e-05, "loss": 0.4118, "step": 8628 }, { "epoch": 0.37893673776304376, "grad_norm": 2.359375, "learning_rate": 3.44178821643322e-05, "loss": 0.3992, "step": 8630 }, { "epoch": 0.3790245562422473, "grad_norm": 2.53125, "learning_rate": 3.441147319626922e-05, "loss": 0.4063, "step": 8632 }, { "epoch": 0.37911237472145076, "grad_norm": 1.890625, "learning_rate": 3.440506350747337e-05, "loss": 0.3811, "step": 8634 }, { "epoch": 0.37920019320065423, "grad_norm": 2.078125, "learning_rate": 3.43986530984355e-05, "loss": 0.3884, "step": 8636 }, { "epoch": 0.37928801167985776, "grad_norm": 2.078125, "learning_rate": 3.439224196964652e-05, "loss": 0.3733, "step": 8638 }, { "epoch": 0.37937583015906123, "grad_norm": 2.21875, "learning_rate": 3.43858301215974e-05, "loss": 0.3773, "step": 8640 }, { "epoch": 0.3794636486382647, "grad_norm": 2.046875, "learning_rate": 3.437941755477916e-05, "loss": 0.4134, "step": 8642 }, { "epoch": 0.3795514671174682, "grad_norm": 1.890625, "learning_rate": 3.437300426968287e-05, "loss": 0.3893, "step": 8644 }, { "epoch": 0.3796392855966717, "grad_norm": 1.9921875, "learning_rate": 3.436659026679967e-05, "loss": 0.4045, "step": 8646 }, { "epoch": 0.3797271040758752, "grad_norm": 1.9921875, "learning_rate": 3.436017554662074e-05, "loss": 0.4129, "step": 8648 }, { "epoch": 0.37981492255507865, "grad_norm": 2.140625, "learning_rate": 3.4353760109637336e-05, "loss": 0.4034, "step": 8650 }, { "epoch": 0.3799027410342821, "grad_norm": 2.125, "learning_rate": 3.4347343956340726e-05, "loss": 0.3959, "step": 8652 }, { "epoch": 0.37999055951348565, "grad_norm": 2.15625, "learning_rate": 3.434092708722228e-05, "loss": 0.4124, "step": 8654 }, { "epoch": 0.3800783779926891, "grad_norm": 1.9375, "learning_rate": 3.43345095027734e-05, "loss": 0.4073, "step": 8656 }, { "epoch": 0.3801661964718926, "grad_norm": 2.140625, "learning_rate": 3.432809120348553e-05, "loss": 0.3992, "step": 8658 }, { "epoch": 0.38025401495109606, "grad_norm": 1.9296875, "learning_rate": 3.432167218985022e-05, "loss": 0.4143, "step": 8660 }, { "epoch": 0.3803418334302996, "grad_norm": 1.890625, "learning_rate": 3.4315252462359015e-05, "loss": 0.4135, "step": 8662 }, { "epoch": 0.38042965190950306, "grad_norm": 2.109375, "learning_rate": 3.4308832021503544e-05, "loss": 0.3911, "step": 8664 }, { "epoch": 0.38051747038870654, "grad_norm": 2.078125, "learning_rate": 3.430241086777548e-05, "loss": 0.4136, "step": 8666 }, { "epoch": 0.38060528886791, "grad_norm": 2.25, "learning_rate": 3.429598900166656e-05, "loss": 0.3959, "step": 8668 }, { "epoch": 0.38069310734711354, "grad_norm": 1.7890625, "learning_rate": 3.428956642366857e-05, "loss": 0.3828, "step": 8670 }, { "epoch": 0.380780925826317, "grad_norm": 1.9921875, "learning_rate": 3.4283143134273365e-05, "loss": 0.414, "step": 8672 }, { "epoch": 0.3808687443055205, "grad_norm": 1.921875, "learning_rate": 3.427671913397283e-05, "loss": 0.4011, "step": 8674 }, { "epoch": 0.38095656278472395, "grad_norm": 2.0625, "learning_rate": 3.427029442325893e-05, "loss": 0.37, "step": 8676 }, { "epoch": 0.3810443812639275, "grad_norm": 2.078125, "learning_rate": 3.426386900262365e-05, "loss": 0.4073, "step": 8678 }, { "epoch": 0.38113219974313095, "grad_norm": 2.5625, "learning_rate": 3.425744287255907e-05, "loss": 0.4084, "step": 8680 }, { "epoch": 0.3812200182223344, "grad_norm": 1.9609375, "learning_rate": 3.425101603355728e-05, "loss": 0.4015, "step": 8682 }, { "epoch": 0.3813078367015379, "grad_norm": 1.8515625, "learning_rate": 3.4244588486110475e-05, "loss": 0.3831, "step": 8684 }, { "epoch": 0.3813956551807414, "grad_norm": 2.03125, "learning_rate": 3.423816023071087e-05, "loss": 0.4229, "step": 8686 }, { "epoch": 0.3814834736599449, "grad_norm": 2.21875, "learning_rate": 3.423173126785073e-05, "loss": 0.4307, "step": 8688 }, { "epoch": 0.38157129213914837, "grad_norm": 2.15625, "learning_rate": 3.42253015980224e-05, "loss": 0.4027, "step": 8690 }, { "epoch": 0.3816591106183519, "grad_norm": 1.9765625, "learning_rate": 3.4218871221718266e-05, "loss": 0.4117, "step": 8692 }, { "epoch": 0.38174692909755537, "grad_norm": 2.03125, "learning_rate": 3.4212440139430765e-05, "loss": 0.362, "step": 8694 }, { "epoch": 0.38183474757675884, "grad_norm": 1.9296875, "learning_rate": 3.420600835165239e-05, "loss": 0.396, "step": 8696 }, { "epoch": 0.3819225660559623, "grad_norm": 2.0625, "learning_rate": 3.419957585887568e-05, "loss": 0.3867, "step": 8698 }, { "epoch": 0.38201038453516584, "grad_norm": 1.9140625, "learning_rate": 3.4193142661593255e-05, "loss": 0.4201, "step": 8700 }, { "epoch": 0.3820982030143693, "grad_norm": 2.34375, "learning_rate": 3.418670876029776e-05, "loss": 0.3989, "step": 8702 }, { "epoch": 0.3821860214935728, "grad_norm": 1.765625, "learning_rate": 3.41802741554819e-05, "loss": 0.3829, "step": 8704 }, { "epoch": 0.38227383997277625, "grad_norm": 1.8671875, "learning_rate": 3.417383884763845e-05, "loss": 0.4079, "step": 8706 }, { "epoch": 0.3823616584519798, "grad_norm": 2.453125, "learning_rate": 3.416740283726022e-05, "loss": 0.396, "step": 8708 }, { "epoch": 0.38244947693118325, "grad_norm": 1.9609375, "learning_rate": 3.416096612484008e-05, "loss": 0.3813, "step": 8710 }, { "epoch": 0.3825372954103867, "grad_norm": 2.140625, "learning_rate": 3.415452871087097e-05, "loss": 0.4056, "step": 8712 }, { "epoch": 0.3826251138895902, "grad_norm": 2.03125, "learning_rate": 3.414809059584585e-05, "loss": 0.3915, "step": 8714 }, { "epoch": 0.3827129323687937, "grad_norm": 1.96875, "learning_rate": 3.414165178025775e-05, "loss": 0.3685, "step": 8716 }, { "epoch": 0.3828007508479972, "grad_norm": 2.65625, "learning_rate": 3.413521226459977e-05, "loss": 0.4022, "step": 8718 }, { "epoch": 0.38288856932720067, "grad_norm": 2.640625, "learning_rate": 3.412877204936505e-05, "loss": 0.4032, "step": 8720 }, { "epoch": 0.38297638780640414, "grad_norm": 2.328125, "learning_rate": 3.412233113504677e-05, "loss": 0.3842, "step": 8722 }, { "epoch": 0.38306420628560767, "grad_norm": 2.125, "learning_rate": 3.41158895221382e-05, "loss": 0.3474, "step": 8724 }, { "epoch": 0.38315202476481114, "grad_norm": 2.015625, "learning_rate": 3.4109447211132616e-05, "loss": 0.38, "step": 8726 }, { "epoch": 0.3832398432440146, "grad_norm": 1.9765625, "learning_rate": 3.410300420252338e-05, "loss": 0.3728, "step": 8728 }, { "epoch": 0.3833276617232181, "grad_norm": 2.015625, "learning_rate": 3.40965604968039e-05, "loss": 0.3944, "step": 8730 }, { "epoch": 0.3834154802024216, "grad_norm": 2.515625, "learning_rate": 3.409011609446763e-05, "loss": 0.4302, "step": 8732 }, { "epoch": 0.3835032986816251, "grad_norm": 2.140625, "learning_rate": 3.40836709960081e-05, "loss": 0.4424, "step": 8734 }, { "epoch": 0.38359111716082855, "grad_norm": 2.140625, "learning_rate": 3.407722520191887e-05, "loss": 0.4114, "step": 8736 }, { "epoch": 0.383678935640032, "grad_norm": 2.0625, "learning_rate": 3.4070778712693555e-05, "loss": 0.388, "step": 8738 }, { "epoch": 0.38376675411923555, "grad_norm": 2.109375, "learning_rate": 3.4064331528825834e-05, "loss": 0.3817, "step": 8740 }, { "epoch": 0.383854572598439, "grad_norm": 2.046875, "learning_rate": 3.405788365080942e-05, "loss": 0.4326, "step": 8742 }, { "epoch": 0.3839423910776425, "grad_norm": 1.7890625, "learning_rate": 3.405143507913812e-05, "loss": 0.4267, "step": 8744 }, { "epoch": 0.384030209556846, "grad_norm": 1.984375, "learning_rate": 3.404498581430574e-05, "loss": 0.4047, "step": 8746 }, { "epoch": 0.3841180280360495, "grad_norm": 1.9140625, "learning_rate": 3.403853585680619e-05, "loss": 0.3583, "step": 8748 }, { "epoch": 0.38420584651525297, "grad_norm": 1.9921875, "learning_rate": 3.403208520713338e-05, "loss": 0.3668, "step": 8750 }, { "epoch": 0.38429366499445644, "grad_norm": 2.125, "learning_rate": 3.402563386578133e-05, "loss": 0.4008, "step": 8752 }, { "epoch": 0.38438148347365997, "grad_norm": 1.9140625, "learning_rate": 3.401918183324408e-05, "loss": 0.3952, "step": 8754 }, { "epoch": 0.38446930195286344, "grad_norm": 2.09375, "learning_rate": 3.4012729110015715e-05, "loss": 0.3952, "step": 8756 }, { "epoch": 0.3845571204320669, "grad_norm": 1.9296875, "learning_rate": 3.4006275696590394e-05, "loss": 0.4012, "step": 8758 }, { "epoch": 0.3846449389112704, "grad_norm": 1.8203125, "learning_rate": 3.399982159346232e-05, "loss": 0.3867, "step": 8760 }, { "epoch": 0.3847327573904739, "grad_norm": 2.046875, "learning_rate": 3.3993366801125766e-05, "loss": 0.3622, "step": 8762 }, { "epoch": 0.3848205758696774, "grad_norm": 2.015625, "learning_rate": 3.398691132007501e-05, "loss": 0.4084, "step": 8764 }, { "epoch": 0.38490839434888086, "grad_norm": 1.921875, "learning_rate": 3.398045515080443e-05, "loss": 0.4114, "step": 8766 }, { "epoch": 0.3849962128280843, "grad_norm": 2.171875, "learning_rate": 3.397399829380845e-05, "loss": 0.3966, "step": 8768 }, { "epoch": 0.38508403130728786, "grad_norm": 2.203125, "learning_rate": 3.3967540749581535e-05, "loss": 0.4238, "step": 8770 }, { "epoch": 0.3851718497864913, "grad_norm": 2.5625, "learning_rate": 3.3961082518618195e-05, "loss": 0.3761, "step": 8772 }, { "epoch": 0.3852596682656948, "grad_norm": 2.03125, "learning_rate": 3.395462360141301e-05, "loss": 0.368, "step": 8774 }, { "epoch": 0.38534748674489827, "grad_norm": 1.90625, "learning_rate": 3.394816399846059e-05, "loss": 0.4066, "step": 8776 }, { "epoch": 0.3854353052241018, "grad_norm": 1.8828125, "learning_rate": 3.3941703710255634e-05, "loss": 0.4185, "step": 8778 }, { "epoch": 0.38552312370330527, "grad_norm": 1.9140625, "learning_rate": 3.393524273729286e-05, "loss": 0.3871, "step": 8780 }, { "epoch": 0.38561094218250874, "grad_norm": 2.5, "learning_rate": 3.3928781080067064e-05, "loss": 0.4065, "step": 8782 }, { "epoch": 0.3856987606617122, "grad_norm": 2.21875, "learning_rate": 3.392231873907307e-05, "loss": 0.4016, "step": 8784 }, { "epoch": 0.38578657914091574, "grad_norm": 2.109375, "learning_rate": 3.3915855714805766e-05, "loss": 0.3787, "step": 8786 }, { "epoch": 0.3858743976201192, "grad_norm": 2.109375, "learning_rate": 3.39093920077601e-05, "loss": 0.3677, "step": 8788 }, { "epoch": 0.3859622160993227, "grad_norm": 1.9609375, "learning_rate": 3.3902927618431044e-05, "loss": 0.3667, "step": 8790 }, { "epoch": 0.3860500345785262, "grad_norm": 1.875, "learning_rate": 3.3896462547313665e-05, "loss": 0.3921, "step": 8792 }, { "epoch": 0.3861378530577297, "grad_norm": 2.421875, "learning_rate": 3.3889996794903055e-05, "loss": 0.4078, "step": 8794 }, { "epoch": 0.38622567153693316, "grad_norm": 2.328125, "learning_rate": 3.3883530361694355e-05, "loss": 0.388, "step": 8796 }, { "epoch": 0.38631349001613663, "grad_norm": 2.4375, "learning_rate": 3.3877063248182775e-05, "loss": 0.386, "step": 8798 }, { "epoch": 0.38640130849534016, "grad_norm": 1.9453125, "learning_rate": 3.3870595454863564e-05, "loss": 0.4035, "step": 8800 }, { "epoch": 0.38648912697454363, "grad_norm": 2.4375, "learning_rate": 3.386412698223202e-05, "loss": 0.3703, "step": 8802 }, { "epoch": 0.3865769454537471, "grad_norm": 2.1875, "learning_rate": 3.385765783078351e-05, "loss": 0.4085, "step": 8804 }, { "epoch": 0.3866647639329506, "grad_norm": 2.25, "learning_rate": 3.385118800101344e-05, "loss": 0.3968, "step": 8806 }, { "epoch": 0.3867525824121541, "grad_norm": 2.15625, "learning_rate": 3.384471749341727e-05, "loss": 0.3996, "step": 8808 }, { "epoch": 0.38684040089135757, "grad_norm": 1.890625, "learning_rate": 3.383824630849052e-05, "loss": 0.4039, "step": 8810 }, { "epoch": 0.38692821937056104, "grad_norm": 1.9453125, "learning_rate": 3.383177444672874e-05, "loss": 0.3871, "step": 8812 }, { "epoch": 0.3870160378497645, "grad_norm": 2.078125, "learning_rate": 3.3825301908627556e-05, "loss": 0.4155, "step": 8814 }, { "epoch": 0.38710385632896804, "grad_norm": 1.8515625, "learning_rate": 3.381882869468264e-05, "loss": 0.374, "step": 8816 }, { "epoch": 0.3871916748081715, "grad_norm": 1.8046875, "learning_rate": 3.3812354805389713e-05, "loss": 0.4077, "step": 8818 }, { "epoch": 0.387279493287375, "grad_norm": 1.8359375, "learning_rate": 3.380588024124454e-05, "loss": 0.3662, "step": 8820 }, { "epoch": 0.38736731176657846, "grad_norm": 2.078125, "learning_rate": 3.379940500274294e-05, "loss": 0.4216, "step": 8822 }, { "epoch": 0.387455130245782, "grad_norm": 2.015625, "learning_rate": 3.3792929090380806e-05, "loss": 0.4041, "step": 8824 }, { "epoch": 0.38754294872498546, "grad_norm": 1.9609375, "learning_rate": 3.3786452504654045e-05, "loss": 0.3596, "step": 8826 }, { "epoch": 0.38763076720418893, "grad_norm": 1.75, "learning_rate": 3.377997524605865e-05, "loss": 0.3894, "step": 8828 }, { "epoch": 0.3877185856833924, "grad_norm": 1.9453125, "learning_rate": 3.377349731509064e-05, "loss": 0.3847, "step": 8830 }, { "epoch": 0.38780640416259593, "grad_norm": 2.265625, "learning_rate": 3.376701871224611e-05, "loss": 0.39, "step": 8832 }, { "epoch": 0.3878942226417994, "grad_norm": 2.1875, "learning_rate": 3.3760539438021184e-05, "loss": 0.4149, "step": 8834 }, { "epoch": 0.3879820411210029, "grad_norm": 2.09375, "learning_rate": 3.375405949291205e-05, "loss": 0.3674, "step": 8836 }, { "epoch": 0.38806985960020635, "grad_norm": 1.90625, "learning_rate": 3.374757887741494e-05, "loss": 0.3604, "step": 8838 }, { "epoch": 0.3881576780794099, "grad_norm": 1.921875, "learning_rate": 3.3741097592026136e-05, "loss": 0.392, "step": 8840 }, { "epoch": 0.38824549655861335, "grad_norm": 2.03125, "learning_rate": 3.373461563724198e-05, "loss": 0.411, "step": 8842 }, { "epoch": 0.3883333150378168, "grad_norm": 2.109375, "learning_rate": 3.372813301355888e-05, "loss": 0.4219, "step": 8844 }, { "epoch": 0.38842113351702034, "grad_norm": 2.3125, "learning_rate": 3.3721649721473255e-05, "loss": 0.4035, "step": 8846 }, { "epoch": 0.3885089519962238, "grad_norm": 1.9140625, "learning_rate": 3.3715165761481606e-05, "loss": 0.3774, "step": 8848 }, { "epoch": 0.3885967704754273, "grad_norm": 1.9453125, "learning_rate": 3.370868113408047e-05, "loss": 0.3749, "step": 8850 }, { "epoch": 0.38868458895463076, "grad_norm": 1.8984375, "learning_rate": 3.3702195839766445e-05, "loss": 0.4078, "step": 8852 }, { "epoch": 0.3887724074338343, "grad_norm": 1.9609375, "learning_rate": 3.369570987903618e-05, "loss": 0.3886, "step": 8854 }, { "epoch": 0.38886022591303776, "grad_norm": 2.421875, "learning_rate": 3.368922325238636e-05, "loss": 0.3892, "step": 8856 }, { "epoch": 0.38894804439224123, "grad_norm": 1.9140625, "learning_rate": 3.368273596031374e-05, "loss": 0.388, "step": 8858 }, { "epoch": 0.3890358628714447, "grad_norm": 2.140625, "learning_rate": 3.367624800331513e-05, "loss": 0.3847, "step": 8860 }, { "epoch": 0.38912368135064823, "grad_norm": 2.40625, "learning_rate": 3.366975938188737e-05, "loss": 0.3817, "step": 8862 }, { "epoch": 0.3892114998298517, "grad_norm": 2.109375, "learning_rate": 3.3663270096527344e-05, "loss": 0.3949, "step": 8864 }, { "epoch": 0.3892993183090552, "grad_norm": 2.078125, "learning_rate": 3.3656780147732024e-05, "loss": 0.3954, "step": 8866 }, { "epoch": 0.38938713678825865, "grad_norm": 2.078125, "learning_rate": 3.3650289535998406e-05, "loss": 0.3788, "step": 8868 }, { "epoch": 0.3894749552674622, "grad_norm": 1.90625, "learning_rate": 3.364379826182354e-05, "loss": 0.3814, "step": 8870 }, { "epoch": 0.38956277374666565, "grad_norm": 2.125, "learning_rate": 3.363730632570453e-05, "loss": 0.4086, "step": 8872 }, { "epoch": 0.3896505922258691, "grad_norm": 1.7578125, "learning_rate": 3.363081372813853e-05, "loss": 0.3798, "step": 8874 }, { "epoch": 0.3897384107050726, "grad_norm": 1.9296875, "learning_rate": 3.362432046962275e-05, "loss": 0.4, "step": 8876 }, { "epoch": 0.3898262291842761, "grad_norm": 1.8515625, "learning_rate": 3.3617826550654445e-05, "loss": 0.4174, "step": 8878 }, { "epoch": 0.3899140476634796, "grad_norm": 2.0, "learning_rate": 3.361133197173091e-05, "loss": 0.388, "step": 8880 }, { "epoch": 0.39000186614268306, "grad_norm": 2.03125, "learning_rate": 3.360483673334951e-05, "loss": 0.3948, "step": 8882 }, { "epoch": 0.39008968462188653, "grad_norm": 2.078125, "learning_rate": 3.359834083600765e-05, "loss": 0.3934, "step": 8884 }, { "epoch": 0.39017750310109006, "grad_norm": 1.7421875, "learning_rate": 3.359184428020279e-05, "loss": 0.3894, "step": 8886 }, { "epoch": 0.39026532158029353, "grad_norm": 1.8515625, "learning_rate": 3.3585347066432435e-05, "loss": 0.4051, "step": 8888 }, { "epoch": 0.390353140059497, "grad_norm": 1.828125, "learning_rate": 3.357884919519414e-05, "loss": 0.3849, "step": 8890 }, { "epoch": 0.39044095853870053, "grad_norm": 1.78125, "learning_rate": 3.357235066698552e-05, "loss": 0.4023, "step": 8892 }, { "epoch": 0.390528777017904, "grad_norm": 1.90625, "learning_rate": 3.356585148230423e-05, "loss": 0.3851, "step": 8894 }, { "epoch": 0.3906165954971075, "grad_norm": 1.890625, "learning_rate": 3.355935164164798e-05, "loss": 0.4059, "step": 8896 }, { "epoch": 0.39070441397631095, "grad_norm": 1.8046875, "learning_rate": 3.355285114551453e-05, "loss": 0.3912, "step": 8898 }, { "epoch": 0.3907922324555145, "grad_norm": 2.015625, "learning_rate": 3.3546349994401686e-05, "loss": 0.4149, "step": 8900 }, { "epoch": 0.39088005093471795, "grad_norm": 2.046875, "learning_rate": 3.3539848188807315e-05, "loss": 0.3998, "step": 8902 }, { "epoch": 0.3909678694139214, "grad_norm": 1.890625, "learning_rate": 3.3533345729229315e-05, "loss": 0.3907, "step": 8904 }, { "epoch": 0.3910556878931249, "grad_norm": 2.109375, "learning_rate": 3.352684261616566e-05, "loss": 0.4005, "step": 8906 }, { "epoch": 0.3911435063723284, "grad_norm": 2.0, "learning_rate": 3.352033885011436e-05, "loss": 0.3899, "step": 8908 }, { "epoch": 0.3912313248515319, "grad_norm": 2.078125, "learning_rate": 3.351383443157347e-05, "loss": 0.3959, "step": 8910 }, { "epoch": 0.39131914333073536, "grad_norm": 2.40625, "learning_rate": 3.350732936104108e-05, "loss": 0.3978, "step": 8912 }, { "epoch": 0.39140696180993884, "grad_norm": 2.078125, "learning_rate": 3.3500823639015376e-05, "loss": 0.3955, "step": 8914 }, { "epoch": 0.39149478028914236, "grad_norm": 2.484375, "learning_rate": 3.3494317265994565e-05, "loss": 0.4163, "step": 8916 }, { "epoch": 0.39158259876834584, "grad_norm": 2.34375, "learning_rate": 3.348781024247689e-05, "loss": 0.3811, "step": 8918 }, { "epoch": 0.3916704172475493, "grad_norm": 2.09375, "learning_rate": 3.3481302568960684e-05, "loss": 0.3828, "step": 8920 }, { "epoch": 0.3917582357267528, "grad_norm": 1.8125, "learning_rate": 3.3474794245944294e-05, "loss": 0.3912, "step": 8922 }, { "epoch": 0.3918460542059563, "grad_norm": 1.9765625, "learning_rate": 3.3468285273926123e-05, "loss": 0.3943, "step": 8924 }, { "epoch": 0.3919338726851598, "grad_norm": 1.7578125, "learning_rate": 3.346177565340464e-05, "loss": 0.3705, "step": 8926 }, { "epoch": 0.39202169116436325, "grad_norm": 2.078125, "learning_rate": 3.345526538487834e-05, "loss": 0.4072, "step": 8928 }, { "epoch": 0.3921095096435667, "grad_norm": 2.0625, "learning_rate": 3.34487544688458e-05, "loss": 0.4101, "step": 8930 }, { "epoch": 0.39219732812277025, "grad_norm": 1.875, "learning_rate": 3.3442242905805614e-05, "loss": 0.3849, "step": 8932 }, { "epoch": 0.3922851466019737, "grad_norm": 2.046875, "learning_rate": 3.343573069625645e-05, "loss": 0.3763, "step": 8934 }, { "epoch": 0.3923729650811772, "grad_norm": 2.0625, "learning_rate": 3.3429217840697e-05, "loss": 0.3715, "step": 8936 }, { "epoch": 0.39246078356038067, "grad_norm": 1.953125, "learning_rate": 3.3422704339626024e-05, "loss": 0.4216, "step": 8938 }, { "epoch": 0.3925486020395842, "grad_norm": 1.875, "learning_rate": 3.341619019354233e-05, "loss": 0.3712, "step": 8940 }, { "epoch": 0.39263642051878767, "grad_norm": 1.8359375, "learning_rate": 3.3409675402944786e-05, "loss": 0.3723, "step": 8942 }, { "epoch": 0.39272423899799114, "grad_norm": 1.9921875, "learning_rate": 3.3403159968332273e-05, "loss": 0.3759, "step": 8944 }, { "epoch": 0.39281205747719466, "grad_norm": 2.3125, "learning_rate": 3.339664389020376e-05, "loss": 0.3894, "step": 8946 }, { "epoch": 0.39289987595639814, "grad_norm": 1.9375, "learning_rate": 3.339012716905824e-05, "loss": 0.3792, "step": 8948 }, { "epoch": 0.3929876944356016, "grad_norm": 2.109375, "learning_rate": 3.338360980539477e-05, "loss": 0.4002, "step": 8950 }, { "epoch": 0.3930755129148051, "grad_norm": 2.140625, "learning_rate": 3.3377091799712454e-05, "loss": 0.3979, "step": 8952 }, { "epoch": 0.3931633313940086, "grad_norm": 1.8125, "learning_rate": 3.3370573152510445e-05, "loss": 0.4109, "step": 8954 }, { "epoch": 0.3932511498732121, "grad_norm": 1.96875, "learning_rate": 3.336405386428792e-05, "loss": 0.4168, "step": 8956 }, { "epoch": 0.39333896835241555, "grad_norm": 1.78125, "learning_rate": 3.335753393554416e-05, "loss": 0.3975, "step": 8958 }, { "epoch": 0.393426786831619, "grad_norm": 1.984375, "learning_rate": 3.335101336677845e-05, "loss": 0.388, "step": 8960 }, { "epoch": 0.39351460531082255, "grad_norm": 1.9296875, "learning_rate": 3.3344492158490134e-05, "loss": 0.4136, "step": 8962 }, { "epoch": 0.393602423790026, "grad_norm": 2.171875, "learning_rate": 3.33379703111786e-05, "loss": 0.4321, "step": 8964 }, { "epoch": 0.3936902422692295, "grad_norm": 2.015625, "learning_rate": 3.3331447825343306e-05, "loss": 0.3847, "step": 8966 }, { "epoch": 0.39377806074843297, "grad_norm": 1.984375, "learning_rate": 3.3324924701483734e-05, "loss": 0.3922, "step": 8968 }, { "epoch": 0.3938658792276365, "grad_norm": 1.890625, "learning_rate": 3.331840094009944e-05, "loss": 0.3798, "step": 8970 }, { "epoch": 0.39395369770683997, "grad_norm": 1.921875, "learning_rate": 3.3311876541690014e-05, "loss": 0.3968, "step": 8972 }, { "epoch": 0.39404151618604344, "grad_norm": 2.046875, "learning_rate": 3.330535150675508e-05, "loss": 0.4071, "step": 8974 }, { "epoch": 0.3941293346652469, "grad_norm": 2.140625, "learning_rate": 3.329882583579433e-05, "loss": 0.381, "step": 8976 }, { "epoch": 0.39421715314445044, "grad_norm": 2.421875, "learning_rate": 3.329229952930752e-05, "loss": 0.3968, "step": 8978 }, { "epoch": 0.3943049716236539, "grad_norm": 2.296875, "learning_rate": 3.3285772587794417e-05, "loss": 0.3864, "step": 8980 }, { "epoch": 0.3943927901028574, "grad_norm": 2.03125, "learning_rate": 3.3279245011754874e-05, "loss": 0.4029, "step": 8982 }, { "epoch": 0.39448060858206085, "grad_norm": 2.046875, "learning_rate": 3.3272716801688754e-05, "loss": 0.3987, "step": 8984 }, { "epoch": 0.3945684270612644, "grad_norm": 1.859375, "learning_rate": 3.3266187958096e-05, "loss": 0.4145, "step": 8986 }, { "epoch": 0.39465624554046785, "grad_norm": 1.96875, "learning_rate": 3.325965848147659e-05, "loss": 0.3853, "step": 8988 }, { "epoch": 0.3947440640196713, "grad_norm": 2.015625, "learning_rate": 3.3253128372330556e-05, "loss": 0.3784, "step": 8990 }, { "epoch": 0.39483188249887485, "grad_norm": 1.7734375, "learning_rate": 3.324659763115797e-05, "loss": 0.3869, "step": 8992 }, { "epoch": 0.3949197009780783, "grad_norm": 1.78125, "learning_rate": 3.324006625845896e-05, "loss": 0.4024, "step": 8994 }, { "epoch": 0.3950075194572818, "grad_norm": 2.046875, "learning_rate": 3.3233534254733706e-05, "loss": 0.4002, "step": 8996 }, { "epoch": 0.39509533793648527, "grad_norm": 1.875, "learning_rate": 3.322700162048242e-05, "loss": 0.3972, "step": 8998 }, { "epoch": 0.3951831564156888, "grad_norm": 1.84375, "learning_rate": 3.322046835620538e-05, "loss": 0.3898, "step": 9000 }, { "epoch": 0.39527097489489227, "grad_norm": 2.046875, "learning_rate": 3.32139344624029e-05, "loss": 0.3801, "step": 9002 }, { "epoch": 0.39535879337409574, "grad_norm": 1.9453125, "learning_rate": 3.320739993957535e-05, "loss": 0.4005, "step": 9004 }, { "epoch": 0.3954466118532992, "grad_norm": 2.078125, "learning_rate": 3.320086478822315e-05, "loss": 0.3623, "step": 9006 }, { "epoch": 0.39553443033250274, "grad_norm": 1.953125, "learning_rate": 3.319432900884676e-05, "loss": 0.379, "step": 9008 }, { "epoch": 0.3956222488117062, "grad_norm": 1.8515625, "learning_rate": 3.318779260194668e-05, "loss": 0.3844, "step": 9010 }, { "epoch": 0.3957100672909097, "grad_norm": 1.8359375, "learning_rate": 3.318125556802348e-05, "loss": 0.3694, "step": 9012 }, { "epoch": 0.39579788577011316, "grad_norm": 2.078125, "learning_rate": 3.317471790757778e-05, "loss": 0.3808, "step": 9014 }, { "epoch": 0.3958857042493167, "grad_norm": 1.8984375, "learning_rate": 3.316817962111022e-05, "loss": 0.4207, "step": 9016 }, { "epoch": 0.39597352272852016, "grad_norm": 1.9296875, "learning_rate": 3.31616407091215e-05, "loss": 0.3916, "step": 9018 }, { "epoch": 0.3960613412077236, "grad_norm": 1.96875, "learning_rate": 3.315510117211238e-05, "loss": 0.379, "step": 9020 }, { "epoch": 0.3961491596869271, "grad_norm": 1.7734375, "learning_rate": 3.314856101058366e-05, "loss": 0.3991, "step": 9022 }, { "epoch": 0.3962369781661306, "grad_norm": 2.03125, "learning_rate": 3.314202022503618e-05, "loss": 0.3913, "step": 9024 }, { "epoch": 0.3963247966453341, "grad_norm": 1.90625, "learning_rate": 3.313547881597084e-05, "loss": 0.3706, "step": 9026 }, { "epoch": 0.39641261512453757, "grad_norm": 1.9375, "learning_rate": 3.312893678388858e-05, "loss": 0.3919, "step": 9028 }, { "epoch": 0.39650043360374104, "grad_norm": 1.8671875, "learning_rate": 3.31223941292904e-05, "loss": 0.3523, "step": 9030 }, { "epoch": 0.39658825208294457, "grad_norm": 2.125, "learning_rate": 3.3115850852677327e-05, "loss": 0.3827, "step": 9032 }, { "epoch": 0.39667607056214804, "grad_norm": 2.0, "learning_rate": 3.310930695455046e-05, "loss": 0.3588, "step": 9034 }, { "epoch": 0.3967638890413515, "grad_norm": 2.03125, "learning_rate": 3.3102762435410904e-05, "loss": 0.4117, "step": 9036 }, { "epoch": 0.396851707520555, "grad_norm": 1.9921875, "learning_rate": 3.3096217295759866e-05, "loss": 0.3824, "step": 9038 }, { "epoch": 0.3969395259997585, "grad_norm": 2.09375, "learning_rate": 3.308967153609857e-05, "loss": 0.3642, "step": 9040 }, { "epoch": 0.397027344478962, "grad_norm": 2.15625, "learning_rate": 3.308312515692828e-05, "loss": 0.3674, "step": 9042 }, { "epoch": 0.39711516295816546, "grad_norm": 1.8671875, "learning_rate": 3.307657815875034e-05, "loss": 0.4016, "step": 9044 }, { "epoch": 0.397202981437369, "grad_norm": 2.15625, "learning_rate": 3.30700305420661e-05, "loss": 0.372, "step": 9046 }, { "epoch": 0.39729079991657246, "grad_norm": 2.203125, "learning_rate": 3.306348230737699e-05, "loss": 0.426, "step": 9048 }, { "epoch": 0.39737861839577593, "grad_norm": 1.859375, "learning_rate": 3.305693345518447e-05, "loss": 0.3717, "step": 9050 }, { "epoch": 0.3974664368749794, "grad_norm": 2.171875, "learning_rate": 3.3050383985990056e-05, "loss": 0.4166, "step": 9052 }, { "epoch": 0.39755425535418293, "grad_norm": 2.328125, "learning_rate": 3.304383390029531e-05, "loss": 0.4124, "step": 9054 }, { "epoch": 0.3976420738333864, "grad_norm": 2.21875, "learning_rate": 3.3037283198601826e-05, "loss": 0.4108, "step": 9056 }, { "epoch": 0.39772989231258987, "grad_norm": 1.8515625, "learning_rate": 3.303073188141128e-05, "loss": 0.3818, "step": 9058 }, { "epoch": 0.39781771079179334, "grad_norm": 2.046875, "learning_rate": 3.3024179949225355e-05, "loss": 0.3845, "step": 9060 }, { "epoch": 0.39790552927099687, "grad_norm": 2.078125, "learning_rate": 3.30176274025458e-05, "loss": 0.4132, "step": 9062 }, { "epoch": 0.39799334775020034, "grad_norm": 2.3125, "learning_rate": 3.301107424187443e-05, "loss": 0.3977, "step": 9064 }, { "epoch": 0.3980811662294038, "grad_norm": 2.265625, "learning_rate": 3.300452046771306e-05, "loss": 0.3447, "step": 9066 }, { "epoch": 0.3981689847086073, "grad_norm": 1.953125, "learning_rate": 3.29979660805636e-05, "loss": 0.3903, "step": 9068 }, { "epoch": 0.3982568031878108, "grad_norm": 1.984375, "learning_rate": 3.299141108092799e-05, "loss": 0.3841, "step": 9070 }, { "epoch": 0.3983446216670143, "grad_norm": 2.125, "learning_rate": 3.298485546930819e-05, "loss": 0.3854, "step": 9072 }, { "epoch": 0.39843244014621776, "grad_norm": 1.8125, "learning_rate": 3.2978299246206246e-05, "loss": 0.3778, "step": 9074 }, { "epoch": 0.39852025862542123, "grad_norm": 2.125, "learning_rate": 3.297174241212424e-05, "loss": 0.4199, "step": 9076 }, { "epoch": 0.39860807710462476, "grad_norm": 2.09375, "learning_rate": 3.296518496756428e-05, "loss": 0.3954, "step": 9078 }, { "epoch": 0.39869589558382823, "grad_norm": 1.984375, "learning_rate": 3.295862691302855e-05, "loss": 0.3856, "step": 9080 }, { "epoch": 0.3987837140630317, "grad_norm": 1.84375, "learning_rate": 3.295206824901926e-05, "loss": 0.3974, "step": 9082 }, { "epoch": 0.3988715325422352, "grad_norm": 2.046875, "learning_rate": 3.2945508976038694e-05, "loss": 0.4178, "step": 9084 }, { "epoch": 0.3989593510214387, "grad_norm": 1.9609375, "learning_rate": 3.293894909458913e-05, "loss": 0.3664, "step": 9086 }, { "epoch": 0.3990471695006422, "grad_norm": 1.984375, "learning_rate": 3.2932388605172946e-05, "loss": 0.3838, "step": 9088 }, { "epoch": 0.39913498797984565, "grad_norm": 2.625, "learning_rate": 3.2925827508292535e-05, "loss": 0.3785, "step": 9090 }, { "epoch": 0.3992228064590491, "grad_norm": 1.96875, "learning_rate": 3.2919265804450364e-05, "loss": 0.3911, "step": 9092 }, { "epoch": 0.39931062493825265, "grad_norm": 2.109375, "learning_rate": 3.291270349414891e-05, "loss": 0.3595, "step": 9094 }, { "epoch": 0.3993984434174561, "grad_norm": 1.921875, "learning_rate": 3.290614057789073e-05, "loss": 0.3703, "step": 9096 }, { "epoch": 0.3994862618966596, "grad_norm": 1.9453125, "learning_rate": 3.289957705617841e-05, "loss": 0.3909, "step": 9098 }, { "epoch": 0.3995740803758631, "grad_norm": 2.015625, "learning_rate": 3.2893012929514574e-05, "loss": 0.391, "step": 9100 }, { "epoch": 0.3996618988550666, "grad_norm": 2.53125, "learning_rate": 3.288644819840193e-05, "loss": 0.3775, "step": 9102 }, { "epoch": 0.39974971733427006, "grad_norm": 2.0, "learning_rate": 3.2879882863343184e-05, "loss": 0.4158, "step": 9104 }, { "epoch": 0.39983753581347353, "grad_norm": 2.15625, "learning_rate": 3.287331692484113e-05, "loss": 0.37, "step": 9106 }, { "epoch": 0.39992535429267706, "grad_norm": 2.109375, "learning_rate": 3.286675038339857e-05, "loss": 0.3767, "step": 9108 }, { "epoch": 0.40001317277188053, "grad_norm": 2.1875, "learning_rate": 3.286018323951838e-05, "loss": 0.3919, "step": 9110 }, { "epoch": 0.400100991251084, "grad_norm": 1.9375, "learning_rate": 3.2853615493703475e-05, "loss": 0.4056, "step": 9112 }, { "epoch": 0.4001888097302875, "grad_norm": 1.8046875, "learning_rate": 3.284704714645681e-05, "loss": 0.4012, "step": 9114 }, { "epoch": 0.400276628209491, "grad_norm": 1.9453125, "learning_rate": 3.28404781982814e-05, "loss": 0.4101, "step": 9116 }, { "epoch": 0.4003644466886945, "grad_norm": 1.8359375, "learning_rate": 3.283390864968029e-05, "loss": 0.3904, "step": 9118 }, { "epoch": 0.40045226516789795, "grad_norm": 1.8359375, "learning_rate": 3.282733850115657e-05, "loss": 0.4022, "step": 9120 }, { "epoch": 0.4005400836471014, "grad_norm": 1.796875, "learning_rate": 3.28207677532134e-05, "loss": 0.3885, "step": 9122 }, { "epoch": 0.40062790212630495, "grad_norm": 1.8046875, "learning_rate": 3.281419640635395e-05, "loss": 0.4061, "step": 9124 }, { "epoch": 0.4007157206055084, "grad_norm": 1.875, "learning_rate": 3.2807624461081477e-05, "loss": 0.3681, "step": 9126 }, { "epoch": 0.4008035390847119, "grad_norm": 1.8515625, "learning_rate": 3.280105191789925e-05, "loss": 0.3851, "step": 9128 }, { "epoch": 0.40089135756391536, "grad_norm": 1.8125, "learning_rate": 3.279447877731058e-05, "loss": 0.3873, "step": 9130 }, { "epoch": 0.4009791760431189, "grad_norm": 1.8828125, "learning_rate": 3.2787905039818875e-05, "loss": 0.3851, "step": 9132 }, { "epoch": 0.40106699452232236, "grad_norm": 2.125, "learning_rate": 3.278133070592753e-05, "loss": 0.3864, "step": 9134 }, { "epoch": 0.40115481300152583, "grad_norm": 2.0625, "learning_rate": 3.277475577614002e-05, "loss": 0.3968, "step": 9136 }, { "epoch": 0.4012426314807293, "grad_norm": 1.8671875, "learning_rate": 3.276818025095984e-05, "loss": 0.3765, "step": 9138 }, { "epoch": 0.40133044995993283, "grad_norm": 1.859375, "learning_rate": 3.276160413089056e-05, "loss": 0.3751, "step": 9140 }, { "epoch": 0.4014182684391363, "grad_norm": 1.7890625, "learning_rate": 3.275502741643577e-05, "loss": 0.4117, "step": 9142 }, { "epoch": 0.4015060869183398, "grad_norm": 1.8046875, "learning_rate": 3.274845010809913e-05, "loss": 0.3599, "step": 9144 }, { "epoch": 0.4015939053975433, "grad_norm": 1.96875, "learning_rate": 3.274187220638431e-05, "loss": 0.4034, "step": 9146 }, { "epoch": 0.4016817238767468, "grad_norm": 2.15625, "learning_rate": 3.273529371179507e-05, "loss": 0.3909, "step": 9148 }, { "epoch": 0.40176954235595025, "grad_norm": 2.171875, "learning_rate": 3.2728714624835174e-05, "loss": 0.3765, "step": 9150 }, { "epoch": 0.4018573608351537, "grad_norm": 2.359375, "learning_rate": 3.272213494600847e-05, "loss": 0.3848, "step": 9152 }, { "epoch": 0.40194517931435725, "grad_norm": 1.984375, "learning_rate": 3.271555467581882e-05, "loss": 0.3683, "step": 9154 }, { "epoch": 0.4020329977935607, "grad_norm": 2.078125, "learning_rate": 3.270897381477014e-05, "loss": 0.376, "step": 9156 }, { "epoch": 0.4021208162727642, "grad_norm": 2.265625, "learning_rate": 3.27023923633664e-05, "loss": 0.4018, "step": 9158 }, { "epoch": 0.40220863475196766, "grad_norm": 1.9609375, "learning_rate": 3.26958103221116e-05, "loss": 0.4136, "step": 9160 }, { "epoch": 0.4022964532311712, "grad_norm": 1.875, "learning_rate": 3.26892276915098e-05, "loss": 0.3917, "step": 9162 }, { "epoch": 0.40238427171037466, "grad_norm": 1.8359375, "learning_rate": 3.268264447206511e-05, "loss": 0.3996, "step": 9164 }, { "epoch": 0.40247209018957814, "grad_norm": 2.0, "learning_rate": 3.267606066428166e-05, "loss": 0.3714, "step": 9166 }, { "epoch": 0.4025599086687816, "grad_norm": 2.25, "learning_rate": 3.266947626866365e-05, "loss": 0.3664, "step": 9168 }, { "epoch": 0.40264772714798513, "grad_norm": 1.984375, "learning_rate": 3.26628912857153e-05, "loss": 0.3979, "step": 9170 }, { "epoch": 0.4027355456271886, "grad_norm": 1.796875, "learning_rate": 3.2656305715940905e-05, "loss": 0.3641, "step": 9172 }, { "epoch": 0.4028233641063921, "grad_norm": 2.1875, "learning_rate": 3.264971955984478e-05, "loss": 0.3947, "step": 9174 }, { "epoch": 0.40291118258559555, "grad_norm": 1.9140625, "learning_rate": 3.2643132817931294e-05, "loss": 0.3627, "step": 9176 }, { "epoch": 0.4029990010647991, "grad_norm": 1.96875, "learning_rate": 3.263654549070486e-05, "loss": 0.3983, "step": 9178 }, { "epoch": 0.40308681954400255, "grad_norm": 1.8671875, "learning_rate": 3.262995757866996e-05, "loss": 0.4208, "step": 9180 }, { "epoch": 0.403174638023206, "grad_norm": 2.09375, "learning_rate": 3.262336908233106e-05, "loss": 0.4071, "step": 9182 }, { "epoch": 0.4032624565024095, "grad_norm": 2.1875, "learning_rate": 3.2616780002192746e-05, "loss": 0.4131, "step": 9184 }, { "epoch": 0.403350274981613, "grad_norm": 1.9375, "learning_rate": 3.2610190338759586e-05, "loss": 0.4001, "step": 9186 }, { "epoch": 0.4034380934608165, "grad_norm": 1.875, "learning_rate": 3.2603600092536216e-05, "loss": 0.4055, "step": 9188 }, { "epoch": 0.40352591194001997, "grad_norm": 1.8046875, "learning_rate": 3.259700926402734e-05, "loss": 0.3863, "step": 9190 }, { "epoch": 0.40361373041922344, "grad_norm": 1.8828125, "learning_rate": 3.2590417853737666e-05, "loss": 0.4105, "step": 9192 }, { "epoch": 0.40370154889842697, "grad_norm": 1.8828125, "learning_rate": 3.258382586217198e-05, "loss": 0.4068, "step": 9194 }, { "epoch": 0.40378936737763044, "grad_norm": 1.9453125, "learning_rate": 3.2577233289835085e-05, "loss": 0.3828, "step": 9196 }, { "epoch": 0.4038771858568339, "grad_norm": 1.9609375, "learning_rate": 3.257064013723185e-05, "loss": 0.3971, "step": 9198 }, { "epoch": 0.40396500433603744, "grad_norm": 1.984375, "learning_rate": 3.256404640486719e-05, "loss": 0.4, "step": 9200 }, { "epoch": 0.4040528228152409, "grad_norm": 1.875, "learning_rate": 3.2557452093246025e-05, "loss": 0.3995, "step": 9202 }, { "epoch": 0.4041406412944444, "grad_norm": 1.828125, "learning_rate": 3.255085720287337e-05, "loss": 0.3784, "step": 9204 }, { "epoch": 0.40422845977364785, "grad_norm": 2.140625, "learning_rate": 3.254426173425428e-05, "loss": 0.4098, "step": 9206 }, { "epoch": 0.4043162782528514, "grad_norm": 2.234375, "learning_rate": 3.2537665687893796e-05, "loss": 0.4037, "step": 9208 }, { "epoch": 0.40440409673205485, "grad_norm": 2.15625, "learning_rate": 3.253106906429707e-05, "loss": 0.3656, "step": 9210 }, { "epoch": 0.4044919152112583, "grad_norm": 2.09375, "learning_rate": 3.2524471863969274e-05, "loss": 0.382, "step": 9212 }, { "epoch": 0.4045797336904618, "grad_norm": 1.828125, "learning_rate": 3.251787408741562e-05, "loss": 0.3949, "step": 9214 }, { "epoch": 0.4046675521696653, "grad_norm": 2.40625, "learning_rate": 3.2511275735141365e-05, "loss": 0.4174, "step": 9216 }, { "epoch": 0.4047553706488688, "grad_norm": 1.9609375, "learning_rate": 3.250467680765181e-05, "loss": 0.4042, "step": 9218 }, { "epoch": 0.40484318912807227, "grad_norm": 1.9140625, "learning_rate": 3.2498077305452316e-05, "loss": 0.3811, "step": 9220 }, { "epoch": 0.40493100760727574, "grad_norm": 1.796875, "learning_rate": 3.249147722904826e-05, "loss": 0.3887, "step": 9222 }, { "epoch": 0.40501882608647927, "grad_norm": 1.8828125, "learning_rate": 3.248487657894508e-05, "loss": 0.3687, "step": 9224 }, { "epoch": 0.40510664456568274, "grad_norm": 1.90625, "learning_rate": 3.247827535564826e-05, "loss": 0.377, "step": 9226 }, { "epoch": 0.4051944630448862, "grad_norm": 2.046875, "learning_rate": 3.2471673559663314e-05, "loss": 0.382, "step": 9228 }, { "epoch": 0.4052822815240897, "grad_norm": 1.828125, "learning_rate": 3.246507119149582e-05, "loss": 0.3965, "step": 9230 }, { "epoch": 0.4053701000032932, "grad_norm": 1.859375, "learning_rate": 3.245846825165139e-05, "loss": 0.3912, "step": 9232 }, { "epoch": 0.4054579184824967, "grad_norm": 2.203125, "learning_rate": 3.245186474063566e-05, "loss": 0.3826, "step": 9234 }, { "epoch": 0.40554573696170015, "grad_norm": 2.3125, "learning_rate": 3.244526065895436e-05, "loss": 0.3925, "step": 9236 }, { "epoch": 0.4056335554409036, "grad_norm": 2.0625, "learning_rate": 3.24386560071132e-05, "loss": 0.3818, "step": 9238 }, { "epoch": 0.40572137392010715, "grad_norm": 1.7890625, "learning_rate": 3.243205078561798e-05, "loss": 0.3815, "step": 9240 }, { "epoch": 0.4058091923993106, "grad_norm": 1.875, "learning_rate": 3.242544499497453e-05, "loss": 0.3767, "step": 9242 }, { "epoch": 0.4058970108785141, "grad_norm": 2.125, "learning_rate": 3.241883863568873e-05, "loss": 0.3857, "step": 9244 }, { "epoch": 0.4059848293577176, "grad_norm": 1.9453125, "learning_rate": 3.241223170826648e-05, "loss": 0.393, "step": 9246 }, { "epoch": 0.4060726478369211, "grad_norm": 2.234375, "learning_rate": 3.240562421321376e-05, "loss": 0.3945, "step": 9248 }, { "epoch": 0.40616046631612457, "grad_norm": 1.8671875, "learning_rate": 3.2399016151036555e-05, "loss": 0.3851, "step": 9250 }, { "epoch": 0.40624828479532804, "grad_norm": 2.4375, "learning_rate": 3.239240752224091e-05, "loss": 0.377, "step": 9252 }, { "epoch": 0.40633610327453157, "grad_norm": 2.03125, "learning_rate": 3.238579832733294e-05, "loss": 0.3738, "step": 9254 }, { "epoch": 0.40642392175373504, "grad_norm": 2.125, "learning_rate": 3.2379188566818765e-05, "loss": 0.3562, "step": 9256 }, { "epoch": 0.4065117402329385, "grad_norm": 1.9765625, "learning_rate": 3.237257824120455e-05, "loss": 0.3817, "step": 9258 }, { "epoch": 0.406599558712142, "grad_norm": 2.078125, "learning_rate": 3.2365967350996526e-05, "loss": 0.3536, "step": 9260 }, { "epoch": 0.4066873771913455, "grad_norm": 2.4375, "learning_rate": 3.2359355896700964e-05, "loss": 0.3519, "step": 9262 }, { "epoch": 0.406775195670549, "grad_norm": 1.859375, "learning_rate": 3.235274387882416e-05, "loss": 0.3692, "step": 9264 }, { "epoch": 0.40686301414975246, "grad_norm": 1.9375, "learning_rate": 3.234613129787246e-05, "loss": 0.3724, "step": 9266 }, { "epoch": 0.4069508326289559, "grad_norm": 1.75, "learning_rate": 3.233951815435228e-05, "loss": 0.3574, "step": 9268 }, { "epoch": 0.40703865110815945, "grad_norm": 2.25, "learning_rate": 3.233290444877003e-05, "loss": 0.3948, "step": 9270 }, { "epoch": 0.4071264695873629, "grad_norm": 2.0625, "learning_rate": 3.2326290181632204e-05, "loss": 0.4047, "step": 9272 }, { "epoch": 0.4072142880665664, "grad_norm": 1.953125, "learning_rate": 3.2319675353445314e-05, "loss": 0.3698, "step": 9274 }, { "epoch": 0.40730210654576987, "grad_norm": 1.875, "learning_rate": 3.231305996471593e-05, "loss": 0.3717, "step": 9276 }, { "epoch": 0.4073899250249734, "grad_norm": 2.015625, "learning_rate": 3.230644401595067e-05, "loss": 0.4248, "step": 9278 }, { "epoch": 0.40747774350417687, "grad_norm": 2.140625, "learning_rate": 3.2299827507656165e-05, "loss": 0.3944, "step": 9280 }, { "epoch": 0.40756556198338034, "grad_norm": 2.46875, "learning_rate": 3.229321044033913e-05, "loss": 0.3982, "step": 9282 }, { "epoch": 0.4076533804625838, "grad_norm": 3.28125, "learning_rate": 3.228659281450628e-05, "loss": 0.3924, "step": 9284 }, { "epoch": 0.40774119894178734, "grad_norm": 2.125, "learning_rate": 3.227997463066441e-05, "loss": 0.3693, "step": 9286 }, { "epoch": 0.4078290174209908, "grad_norm": 2.3125, "learning_rate": 3.227335588932034e-05, "loss": 0.3468, "step": 9288 }, { "epoch": 0.4079168359001943, "grad_norm": 1.6875, "learning_rate": 3.226673659098093e-05, "loss": 0.4128, "step": 9290 }, { "epoch": 0.40800465437939776, "grad_norm": 1.828125, "learning_rate": 3.226011673615309e-05, "loss": 0.3843, "step": 9292 }, { "epoch": 0.4080924728586013, "grad_norm": 2.15625, "learning_rate": 3.225349632534378e-05, "loss": 0.4072, "step": 9294 }, { "epoch": 0.40818029133780476, "grad_norm": 1.765625, "learning_rate": 3.2246875359059966e-05, "loss": 0.3747, "step": 9296 }, { "epoch": 0.40826810981700823, "grad_norm": 2.15625, "learning_rate": 3.2240253837808706e-05, "loss": 0.3809, "step": 9298 }, { "epoch": 0.40835592829621176, "grad_norm": 2.234375, "learning_rate": 3.223363176209708e-05, "loss": 0.3934, "step": 9300 }, { "epoch": 0.40844374677541523, "grad_norm": 1.8828125, "learning_rate": 3.222700913243219e-05, "loss": 0.4021, "step": 9302 }, { "epoch": 0.4085315652546187, "grad_norm": 1.796875, "learning_rate": 3.2220385949321215e-05, "loss": 0.379, "step": 9304 }, { "epoch": 0.4086193837338222, "grad_norm": 1.859375, "learning_rate": 3.221376221327135e-05, "loss": 0.4074, "step": 9306 }, { "epoch": 0.4087072022130257, "grad_norm": 1.9765625, "learning_rate": 3.220713792478984e-05, "loss": 0.4007, "step": 9308 }, { "epoch": 0.40879502069222917, "grad_norm": 1.8515625, "learning_rate": 3.220051308438399e-05, "loss": 0.3983, "step": 9310 }, { "epoch": 0.40888283917143264, "grad_norm": 1.8828125, "learning_rate": 3.2193887692561115e-05, "loss": 0.3699, "step": 9312 }, { "epoch": 0.4089706576506361, "grad_norm": 2.0, "learning_rate": 3.2187261749828594e-05, "loss": 0.3806, "step": 9314 }, { "epoch": 0.40905847612983964, "grad_norm": 2.296875, "learning_rate": 3.218063525669385e-05, "loss": 0.3909, "step": 9316 }, { "epoch": 0.4091462946090431, "grad_norm": 1.8984375, "learning_rate": 3.2174008213664335e-05, "loss": 0.3955, "step": 9318 }, { "epoch": 0.4092341130882466, "grad_norm": 2.15625, "learning_rate": 3.216738062124756e-05, "loss": 0.3532, "step": 9320 }, { "epoch": 0.40932193156745006, "grad_norm": 1.9375, "learning_rate": 3.216075247995105e-05, "loss": 0.375, "step": 9322 }, { "epoch": 0.4094097500466536, "grad_norm": 1.9140625, "learning_rate": 3.21541237902824e-05, "loss": 0.378, "step": 9324 }, { "epoch": 0.40949756852585706, "grad_norm": 1.9296875, "learning_rate": 3.214749455274923e-05, "loss": 0.3989, "step": 9326 }, { "epoch": 0.40958538700506053, "grad_norm": 2.28125, "learning_rate": 3.2140864767859216e-05, "loss": 0.3949, "step": 9328 }, { "epoch": 0.409673205484264, "grad_norm": 1.9453125, "learning_rate": 3.213423443612007e-05, "loss": 0.4091, "step": 9330 }, { "epoch": 0.40976102396346753, "grad_norm": 1.9296875, "learning_rate": 3.2127603558039545e-05, "loss": 0.4058, "step": 9332 }, { "epoch": 0.409848842442671, "grad_norm": 1.890625, "learning_rate": 3.212097213412542e-05, "loss": 0.3779, "step": 9334 }, { "epoch": 0.4099366609218745, "grad_norm": 1.9921875, "learning_rate": 3.211434016488555e-05, "loss": 0.3734, "step": 9336 }, { "epoch": 0.41002447940107795, "grad_norm": 2.0625, "learning_rate": 3.2107707650827804e-05, "loss": 0.3795, "step": 9338 }, { "epoch": 0.4101122978802815, "grad_norm": 1.921875, "learning_rate": 3.2101074592460094e-05, "loss": 0.3807, "step": 9340 }, { "epoch": 0.41020011635948495, "grad_norm": 1.8359375, "learning_rate": 3.2094440990290395e-05, "loss": 0.3797, "step": 9342 }, { "epoch": 0.4102879348386884, "grad_norm": 1.8515625, "learning_rate": 3.208780684482671e-05, "loss": 0.3895, "step": 9344 }, { "epoch": 0.41037575331789194, "grad_norm": 2.015625, "learning_rate": 3.208117215657707e-05, "loss": 0.3704, "step": 9346 }, { "epoch": 0.4104635717970954, "grad_norm": 1.9453125, "learning_rate": 3.207453692604957e-05, "loss": 0.3739, "step": 9348 }, { "epoch": 0.4105513902762989, "grad_norm": 2.03125, "learning_rate": 3.206790115375234e-05, "loss": 0.3953, "step": 9350 }, { "epoch": 0.41063920875550236, "grad_norm": 1.9453125, "learning_rate": 3.206126484019354e-05, "loss": 0.3564, "step": 9352 }, { "epoch": 0.4107270272347059, "grad_norm": 2.015625, "learning_rate": 3.205462798588139e-05, "loss": 0.3534, "step": 9354 }, { "epoch": 0.41081484571390936, "grad_norm": 1.8828125, "learning_rate": 3.204799059132414e-05, "loss": 0.4022, "step": 9356 }, { "epoch": 0.41090266419311283, "grad_norm": 2.28125, "learning_rate": 3.204135265703008e-05, "loss": 0.385, "step": 9358 }, { "epoch": 0.4109904826723163, "grad_norm": 2.203125, "learning_rate": 3.203471418350754e-05, "loss": 0.3867, "step": 9360 }, { "epoch": 0.41107830115151983, "grad_norm": 2.0625, "learning_rate": 3.20280751712649e-05, "loss": 0.4418, "step": 9362 }, { "epoch": 0.4111661196307233, "grad_norm": 2.21875, "learning_rate": 3.2021435620810587e-05, "loss": 0.3827, "step": 9364 }, { "epoch": 0.4112539381099268, "grad_norm": 1.796875, "learning_rate": 3.2014795532653054e-05, "loss": 0.4018, "step": 9366 }, { "epoch": 0.41134175658913025, "grad_norm": 1.9921875, "learning_rate": 3.200815490730079e-05, "loss": 0.4129, "step": 9368 }, { "epoch": 0.4114295750683338, "grad_norm": 2.046875, "learning_rate": 3.200151374526234e-05, "loss": 0.3971, "step": 9370 }, { "epoch": 0.41151739354753725, "grad_norm": 1.984375, "learning_rate": 3.19948720470463e-05, "loss": 0.3694, "step": 9372 }, { "epoch": 0.4116052120267407, "grad_norm": 1.9921875, "learning_rate": 3.198822981316127e-05, "loss": 0.3975, "step": 9374 }, { "epoch": 0.4116930305059442, "grad_norm": 1.984375, "learning_rate": 3.198158704411593e-05, "loss": 0.3705, "step": 9376 }, { "epoch": 0.4117808489851477, "grad_norm": 2.0, "learning_rate": 3.1974943740418986e-05, "loss": 0.4057, "step": 9378 }, { "epoch": 0.4118686674643512, "grad_norm": 1.953125, "learning_rate": 3.1968299902579166e-05, "loss": 0.3794, "step": 9380 }, { "epoch": 0.41195648594355466, "grad_norm": 2.265625, "learning_rate": 3.196165553110528e-05, "loss": 0.3638, "step": 9382 }, { "epoch": 0.41204430442275813, "grad_norm": 2.1875, "learning_rate": 3.1955010626506146e-05, "loss": 0.3815, "step": 9384 }, { "epoch": 0.41213212290196166, "grad_norm": 2.109375, "learning_rate": 3.1948365189290625e-05, "loss": 0.379, "step": 9386 }, { "epoch": 0.41221994138116513, "grad_norm": 1.8828125, "learning_rate": 3.194171921996763e-05, "loss": 0.3453, "step": 9388 }, { "epoch": 0.4123077598603686, "grad_norm": 1.65625, "learning_rate": 3.1935072719046115e-05, "loss": 0.3958, "step": 9390 }, { "epoch": 0.4123955783395721, "grad_norm": 1.9375, "learning_rate": 3.192842568703508e-05, "loss": 0.4017, "step": 9392 }, { "epoch": 0.4124833968187756, "grad_norm": 2.140625, "learning_rate": 3.192177812444353e-05, "loss": 0.4008, "step": 9394 }, { "epoch": 0.4125712152979791, "grad_norm": 1.8359375, "learning_rate": 3.191513003178055e-05, "loss": 0.3786, "step": 9396 }, { "epoch": 0.41265903377718255, "grad_norm": 2.015625, "learning_rate": 3.1908481409555266e-05, "loss": 0.3702, "step": 9398 }, { "epoch": 0.4127468522563861, "grad_norm": 1.8671875, "learning_rate": 3.190183225827682e-05, "loss": 0.3878, "step": 9400 }, { "epoch": 0.41283467073558955, "grad_norm": 2.171875, "learning_rate": 3.1895182578454395e-05, "loss": 0.3722, "step": 9402 }, { "epoch": 0.412922489214793, "grad_norm": 1.8671875, "learning_rate": 3.188853237059725e-05, "loss": 0.3754, "step": 9404 }, { "epoch": 0.4130103076939965, "grad_norm": 2.25, "learning_rate": 3.188188163521463e-05, "loss": 0.4122, "step": 9406 }, { "epoch": 0.4130981261732, "grad_norm": 2.03125, "learning_rate": 3.1875230372815864e-05, "loss": 0.3988, "step": 9408 }, { "epoch": 0.4131859446524035, "grad_norm": 1.9296875, "learning_rate": 3.1868578583910316e-05, "loss": 0.364, "step": 9410 }, { "epoch": 0.41327376313160696, "grad_norm": 2.0625, "learning_rate": 3.186192626900737e-05, "loss": 0.379, "step": 9412 }, { "epoch": 0.41336158161081044, "grad_norm": 2.0, "learning_rate": 3.185527342861647e-05, "loss": 0.4191, "step": 9414 }, { "epoch": 0.41344940009001396, "grad_norm": 2.25, "learning_rate": 3.184862006324709e-05, "loss": 0.414, "step": 9416 }, { "epoch": 0.41353721856921744, "grad_norm": 1.984375, "learning_rate": 3.184196617340874e-05, "loss": 0.4011, "step": 9418 }, { "epoch": 0.4136250370484209, "grad_norm": 1.7421875, "learning_rate": 3.1835311759610975e-05, "loss": 0.3796, "step": 9420 }, { "epoch": 0.4137128555276244, "grad_norm": 1.9765625, "learning_rate": 3.18286568223634e-05, "loss": 0.3948, "step": 9422 }, { "epoch": 0.4138006740068279, "grad_norm": 1.9140625, "learning_rate": 3.1822001362175646e-05, "loss": 0.3655, "step": 9424 }, { "epoch": 0.4138884924860314, "grad_norm": 2.109375, "learning_rate": 3.18153453795574e-05, "loss": 0.3793, "step": 9426 }, { "epoch": 0.41397631096523485, "grad_norm": 1.8125, "learning_rate": 3.180868887501837e-05, "loss": 0.3982, "step": 9428 }, { "epoch": 0.4140641294444383, "grad_norm": 1.96875, "learning_rate": 3.1802031849068316e-05, "loss": 0.3917, "step": 9430 }, { "epoch": 0.41415194792364185, "grad_norm": 1.9453125, "learning_rate": 3.1795374302217025e-05, "loss": 0.3937, "step": 9432 }, { "epoch": 0.4142397664028453, "grad_norm": 2.0, "learning_rate": 3.178871623497434e-05, "loss": 0.3886, "step": 9434 }, { "epoch": 0.4143275848820488, "grad_norm": 2.046875, "learning_rate": 3.178205764785014e-05, "loss": 0.3583, "step": 9436 }, { "epoch": 0.41441540336125227, "grad_norm": 1.8359375, "learning_rate": 3.177539854135434e-05, "loss": 0.3792, "step": 9438 }, { "epoch": 0.4145032218404558, "grad_norm": 1.8828125, "learning_rate": 3.1768738915996896e-05, "loss": 0.3909, "step": 9440 }, { "epoch": 0.41459104031965927, "grad_norm": 2.015625, "learning_rate": 3.1762078772287804e-05, "loss": 0.3741, "step": 9442 }, { "epoch": 0.41467885879886274, "grad_norm": 2.3125, "learning_rate": 3.17554181107371e-05, "loss": 0.3985, "step": 9444 }, { "epoch": 0.41476667727806626, "grad_norm": 1.921875, "learning_rate": 3.174875693185486e-05, "loss": 0.3948, "step": 9446 }, { "epoch": 0.41485449575726974, "grad_norm": 2.078125, "learning_rate": 3.174209523615119e-05, "loss": 0.3797, "step": 9448 }, { "epoch": 0.4149423142364732, "grad_norm": 2.234375, "learning_rate": 3.173543302413625e-05, "loss": 0.3699, "step": 9450 }, { "epoch": 0.4150301327156767, "grad_norm": 1.953125, "learning_rate": 3.172877029632023e-05, "loss": 0.3681, "step": 9452 }, { "epoch": 0.4151179511948802, "grad_norm": 1.90625, "learning_rate": 3.1722107053213386e-05, "loss": 0.361, "step": 9454 }, { "epoch": 0.4152057696740837, "grad_norm": 1.9921875, "learning_rate": 3.171544329532596e-05, "loss": 0.3652, "step": 9456 }, { "epoch": 0.41529358815328715, "grad_norm": 2.265625, "learning_rate": 3.1708779023168275e-05, "loss": 0.3872, "step": 9458 }, { "epoch": 0.4153814066324906, "grad_norm": 1.984375, "learning_rate": 3.170211423725069e-05, "loss": 0.3984, "step": 9460 }, { "epoch": 0.41546922511169415, "grad_norm": 2.09375, "learning_rate": 3.169544893808359e-05, "loss": 0.4018, "step": 9462 }, { "epoch": 0.4155570435908976, "grad_norm": 1.9296875, "learning_rate": 3.16887831261774e-05, "loss": 0.4096, "step": 9464 }, { "epoch": 0.4156448620701011, "grad_norm": 1.859375, "learning_rate": 3.16821168020426e-05, "loss": 0.3844, "step": 9466 }, { "epoch": 0.41573268054930457, "grad_norm": 1.9921875, "learning_rate": 3.167544996618969e-05, "loss": 0.4051, "step": 9468 }, { "epoch": 0.4158204990285081, "grad_norm": 2.09375, "learning_rate": 3.166878261912922e-05, "loss": 0.3821, "step": 9470 }, { "epoch": 0.41590831750771157, "grad_norm": 1.9140625, "learning_rate": 3.166211476137178e-05, "loss": 0.4056, "step": 9472 }, { "epoch": 0.41599613598691504, "grad_norm": 1.9765625, "learning_rate": 3.1655446393427994e-05, "loss": 0.3796, "step": 9474 }, { "epoch": 0.4160839544661185, "grad_norm": 2.484375, "learning_rate": 3.164877751580853e-05, "loss": 0.3845, "step": 9476 }, { "epoch": 0.41617177294532204, "grad_norm": 2.078125, "learning_rate": 3.164210812902409e-05, "loss": 0.3913, "step": 9478 }, { "epoch": 0.4162595914245255, "grad_norm": 2.09375, "learning_rate": 3.1635438233585425e-05, "loss": 0.401, "step": 9480 }, { "epoch": 0.416347409903729, "grad_norm": 1.9140625, "learning_rate": 3.162876783000329e-05, "loss": 0.3596, "step": 9482 }, { "epoch": 0.41643522838293245, "grad_norm": 2.15625, "learning_rate": 3.162209691878854e-05, "loss": 0.3905, "step": 9484 }, { "epoch": 0.416523046862136, "grad_norm": 1.828125, "learning_rate": 3.161542550045202e-05, "loss": 0.3869, "step": 9486 }, { "epoch": 0.41661086534133945, "grad_norm": 2.015625, "learning_rate": 3.160875357550462e-05, "loss": 0.3911, "step": 9488 }, { "epoch": 0.4166986838205429, "grad_norm": 1.7890625, "learning_rate": 3.1602081144457297e-05, "loss": 0.3836, "step": 9490 }, { "epoch": 0.4167865022997464, "grad_norm": 1.9296875, "learning_rate": 3.159540820782102e-05, "loss": 0.3814, "step": 9492 }, { "epoch": 0.4168743207789499, "grad_norm": 1.8828125, "learning_rate": 3.1588734766106794e-05, "loss": 0.3724, "step": 9494 }, { "epoch": 0.4169621392581534, "grad_norm": 1.7421875, "learning_rate": 3.1582060819825674e-05, "loss": 0.387, "step": 9496 }, { "epoch": 0.41704995773735687, "grad_norm": 1.7890625, "learning_rate": 3.1575386369488766e-05, "loss": 0.3574, "step": 9498 }, { "epoch": 0.4171377762165604, "grad_norm": 1.9609375, "learning_rate": 3.15687114156072e-05, "loss": 0.3769, "step": 9500 }, { "epoch": 0.41722559469576387, "grad_norm": 1.7734375, "learning_rate": 3.156203595869213e-05, "loss": 0.4157, "step": 9502 }, { "epoch": 0.41731341317496734, "grad_norm": 2.015625, "learning_rate": 3.155535999925478e-05, "loss": 0.401, "step": 9504 }, { "epoch": 0.4174012316541708, "grad_norm": 1.8203125, "learning_rate": 3.1548683537806384e-05, "loss": 0.3857, "step": 9506 }, { "epoch": 0.41748905013337434, "grad_norm": 1.90625, "learning_rate": 3.1542006574858236e-05, "loss": 0.365, "step": 9508 }, { "epoch": 0.4175768686125778, "grad_norm": 2.140625, "learning_rate": 3.153532911092165e-05, "loss": 0.3915, "step": 9510 }, { "epoch": 0.4176646870917813, "grad_norm": 2.125, "learning_rate": 3.1528651146508e-05, "loss": 0.3779, "step": 9512 }, { "epoch": 0.41775250557098476, "grad_norm": 2.1875, "learning_rate": 3.1521972682128674e-05, "loss": 0.3807, "step": 9514 }, { "epoch": 0.4178403240501883, "grad_norm": 2.1875, "learning_rate": 3.151529371829513e-05, "loss": 0.3603, "step": 9516 }, { "epoch": 0.41792814252939176, "grad_norm": 1.8828125, "learning_rate": 3.150861425551882e-05, "loss": 0.3942, "step": 9518 }, { "epoch": 0.4180159610085952, "grad_norm": 1.890625, "learning_rate": 3.150193429431127e-05, "loss": 0.3824, "step": 9520 }, { "epoch": 0.4181037794877987, "grad_norm": 1.984375, "learning_rate": 3.149525383518404e-05, "loss": 0.4087, "step": 9522 }, { "epoch": 0.4181915979670022, "grad_norm": 1.953125, "learning_rate": 3.148857287864871e-05, "loss": 0.3705, "step": 9524 }, { "epoch": 0.4182794164462057, "grad_norm": 1.8984375, "learning_rate": 3.148189142521691e-05, "loss": 0.3858, "step": 9526 }, { "epoch": 0.41836723492540917, "grad_norm": 1.984375, "learning_rate": 3.1475209475400316e-05, "loss": 0.3966, "step": 9528 }, { "epoch": 0.41845505340461264, "grad_norm": 1.8828125, "learning_rate": 3.146852702971063e-05, "loss": 0.3847, "step": 9530 }, { "epoch": 0.41854287188381617, "grad_norm": 1.9296875, "learning_rate": 3.146184408865959e-05, "loss": 0.4065, "step": 9532 }, { "epoch": 0.41863069036301964, "grad_norm": 1.8515625, "learning_rate": 3.1455160652758975e-05, "loss": 0.3839, "step": 9534 }, { "epoch": 0.4187185088422231, "grad_norm": 1.8359375, "learning_rate": 3.1448476722520625e-05, "loss": 0.3895, "step": 9536 }, { "epoch": 0.4188063273214266, "grad_norm": 2.125, "learning_rate": 3.144179229845637e-05, "loss": 0.4202, "step": 9538 }, { "epoch": 0.4188941458006301, "grad_norm": 1.953125, "learning_rate": 3.143510738107812e-05, "loss": 0.4095, "step": 9540 }, { "epoch": 0.4189819642798336, "grad_norm": 1.6484375, "learning_rate": 3.1428421970897804e-05, "loss": 0.4062, "step": 9542 }, { "epoch": 0.41906978275903706, "grad_norm": 1.9609375, "learning_rate": 3.142173606842739e-05, "loss": 0.3639, "step": 9544 }, { "epoch": 0.41915760123824053, "grad_norm": 1.9140625, "learning_rate": 3.1415049674178884e-05, "loss": 0.3899, "step": 9546 }, { "epoch": 0.41924541971744406, "grad_norm": 2.03125, "learning_rate": 3.1408362788664346e-05, "loss": 0.3779, "step": 9548 }, { "epoch": 0.41933323819664753, "grad_norm": 2.0, "learning_rate": 3.1401675412395845e-05, "loss": 0.4029, "step": 9550 }, { "epoch": 0.419421056675851, "grad_norm": 1.9140625, "learning_rate": 3.139498754588551e-05, "loss": 0.3936, "step": 9552 }, { "epoch": 0.41950887515505453, "grad_norm": 1.7578125, "learning_rate": 3.1388299189645496e-05, "loss": 0.407, "step": 9554 }, { "epoch": 0.419596693634258, "grad_norm": 1.9140625, "learning_rate": 3.1381610344187995e-05, "loss": 0.3474, "step": 9556 }, { "epoch": 0.41968451211346147, "grad_norm": 2.296875, "learning_rate": 3.1374921010025246e-05, "loss": 0.4169, "step": 9558 }, { "epoch": 0.41977233059266494, "grad_norm": 1.9921875, "learning_rate": 3.136823118766951e-05, "loss": 0.3908, "step": 9560 }, { "epoch": 0.41986014907186847, "grad_norm": 1.859375, "learning_rate": 3.1361540877633114e-05, "loss": 0.3901, "step": 9562 }, { "epoch": 0.41994796755107194, "grad_norm": 2.09375, "learning_rate": 3.135485008042839e-05, "loss": 0.3783, "step": 9564 }, { "epoch": 0.4200357860302754, "grad_norm": 1.8359375, "learning_rate": 3.134815879656772e-05, "loss": 0.3723, "step": 9566 }, { "epoch": 0.4201236045094789, "grad_norm": 1.7734375, "learning_rate": 3.134146702656353e-05, "loss": 0.391, "step": 9568 }, { "epoch": 0.4202114229886824, "grad_norm": 1.828125, "learning_rate": 3.1334774770928276e-05, "loss": 0.3982, "step": 9570 }, { "epoch": 0.4202992414678859, "grad_norm": 1.9375, "learning_rate": 3.132808203017445e-05, "loss": 0.3727, "step": 9572 }, { "epoch": 0.42038705994708936, "grad_norm": 1.8984375, "learning_rate": 3.1321388804814584e-05, "loss": 0.365, "step": 9574 }, { "epoch": 0.42047487842629283, "grad_norm": 1.703125, "learning_rate": 3.131469509536125e-05, "loss": 0.4185, "step": 9576 }, { "epoch": 0.42056269690549636, "grad_norm": 1.953125, "learning_rate": 3.130800090232705e-05, "loss": 0.3825, "step": 9578 }, { "epoch": 0.42065051538469983, "grad_norm": 1.8515625, "learning_rate": 3.1301306226224625e-05, "loss": 0.3819, "step": 9580 }, { "epoch": 0.4207383338639033, "grad_norm": 1.828125, "learning_rate": 3.129461106756667e-05, "loss": 0.3885, "step": 9582 }, { "epoch": 0.4208261523431068, "grad_norm": 1.9140625, "learning_rate": 3.128791542686588e-05, "loss": 0.3542, "step": 9584 }, { "epoch": 0.4209139708223103, "grad_norm": 2.140625, "learning_rate": 3.128121930463502e-05, "loss": 0.3729, "step": 9586 }, { "epoch": 0.4210017893015138, "grad_norm": 1.96875, "learning_rate": 3.1274522701386886e-05, "loss": 0.4038, "step": 9588 }, { "epoch": 0.42108960778071725, "grad_norm": 2.40625, "learning_rate": 3.12678256176343e-05, "loss": 0.3794, "step": 9590 }, { "epoch": 0.4211774262599207, "grad_norm": 1.9140625, "learning_rate": 3.126112805389012e-05, "loss": 0.3976, "step": 9592 }, { "epoch": 0.42126524473912424, "grad_norm": 1.7890625, "learning_rate": 3.1254430010667254e-05, "loss": 0.3973, "step": 9594 }, { "epoch": 0.4213530632183277, "grad_norm": 1.96875, "learning_rate": 3.1247731488478636e-05, "loss": 0.3854, "step": 9596 }, { "epoch": 0.4214408816975312, "grad_norm": 2.140625, "learning_rate": 3.124103248783725e-05, "loss": 0.4122, "step": 9598 }, { "epoch": 0.4215287001767347, "grad_norm": 1.9453125, "learning_rate": 3.1234333009256095e-05, "loss": 0.3758, "step": 9600 }, { "epoch": 0.4216165186559382, "grad_norm": 2.09375, "learning_rate": 3.122763305324823e-05, "loss": 0.374, "step": 9602 }, { "epoch": 0.42170433713514166, "grad_norm": 1.9453125, "learning_rate": 3.1220932620326726e-05, "loss": 0.3778, "step": 9604 }, { "epoch": 0.42179215561434513, "grad_norm": 2.09375, "learning_rate": 3.1214231711004716e-05, "loss": 0.3878, "step": 9606 }, { "epoch": 0.42187997409354866, "grad_norm": 1.90625, "learning_rate": 3.1207530325795344e-05, "loss": 0.3833, "step": 9608 }, { "epoch": 0.42196779257275213, "grad_norm": 1.7890625, "learning_rate": 3.120082846521181e-05, "loss": 0.3614, "step": 9610 }, { "epoch": 0.4220556110519556, "grad_norm": 1.90625, "learning_rate": 3.1194126129767356e-05, "loss": 0.3733, "step": 9612 }, { "epoch": 0.4221434295311591, "grad_norm": 1.875, "learning_rate": 3.1187423319975236e-05, "loss": 0.3895, "step": 9614 }, { "epoch": 0.4222312480103626, "grad_norm": 1.8515625, "learning_rate": 3.118072003634876e-05, "loss": 0.3825, "step": 9616 }, { "epoch": 0.4223190664895661, "grad_norm": 1.9453125, "learning_rate": 3.117401627940125e-05, "loss": 0.3708, "step": 9618 }, { "epoch": 0.42240688496876955, "grad_norm": 1.890625, "learning_rate": 3.1167312049646094e-05, "loss": 0.3634, "step": 9620 }, { "epoch": 0.422494703447973, "grad_norm": 1.7734375, "learning_rate": 3.116060734759671e-05, "loss": 0.3742, "step": 9622 }, { "epoch": 0.42258252192717655, "grad_norm": 1.96875, "learning_rate": 3.115390217376654e-05, "loss": 0.3826, "step": 9624 }, { "epoch": 0.42267034040638, "grad_norm": 1.8984375, "learning_rate": 3.1147196528669056e-05, "loss": 0.3936, "step": 9626 }, { "epoch": 0.4227581588855835, "grad_norm": 1.9453125, "learning_rate": 3.11404904128178e-05, "loss": 0.3715, "step": 9628 }, { "epoch": 0.42284597736478696, "grad_norm": 1.7578125, "learning_rate": 3.113378382672631e-05, "loss": 0.3611, "step": 9630 }, { "epoch": 0.4229337958439905, "grad_norm": 1.71875, "learning_rate": 3.112707677090818e-05, "loss": 0.3716, "step": 9632 }, { "epoch": 0.42302161432319396, "grad_norm": 1.7890625, "learning_rate": 3.1120369245877045e-05, "loss": 0.3768, "step": 9634 }, { "epoch": 0.42310943280239743, "grad_norm": 1.890625, "learning_rate": 3.111366125214657e-05, "loss": 0.356, "step": 9636 }, { "epoch": 0.4231972512816009, "grad_norm": 1.8671875, "learning_rate": 3.110695279023045e-05, "loss": 0.3886, "step": 9638 }, { "epoch": 0.42328506976080443, "grad_norm": 2.46875, "learning_rate": 3.110024386064242e-05, "loss": 0.3937, "step": 9640 }, { "epoch": 0.4233728882400079, "grad_norm": 1.9609375, "learning_rate": 3.109353446389625e-05, "loss": 0.3737, "step": 9642 }, { "epoch": 0.4234607067192114, "grad_norm": 1.8984375, "learning_rate": 3.108682460050576e-05, "loss": 0.3738, "step": 9644 }, { "epoch": 0.42354852519841485, "grad_norm": 2.125, "learning_rate": 3.108011427098477e-05, "loss": 0.38, "step": 9646 }, { "epoch": 0.4236363436776184, "grad_norm": 2.171875, "learning_rate": 3.107340347584718e-05, "loss": 0.3693, "step": 9648 }, { "epoch": 0.42372416215682185, "grad_norm": 2.0625, "learning_rate": 3.1066692215606894e-05, "loss": 0.3689, "step": 9650 }, { "epoch": 0.4238119806360253, "grad_norm": 2.15625, "learning_rate": 3.1059980490777865e-05, "loss": 0.3757, "step": 9652 }, { "epoch": 0.42389979911522885, "grad_norm": 1.8046875, "learning_rate": 3.1053268301874074e-05, "loss": 0.3936, "step": 9654 }, { "epoch": 0.4239876175944323, "grad_norm": 2.25, "learning_rate": 3.104655564940954e-05, "loss": 0.3597, "step": 9656 }, { "epoch": 0.4240754360736358, "grad_norm": 2.046875, "learning_rate": 3.103984253389833e-05, "loss": 0.3453, "step": 9658 }, { "epoch": 0.42416325455283926, "grad_norm": 2.390625, "learning_rate": 3.103312895585454e-05, "loss": 0.3657, "step": 9660 }, { "epoch": 0.4242510730320428, "grad_norm": 2.3125, "learning_rate": 3.102641491579228e-05, "loss": 0.3713, "step": 9662 }, { "epoch": 0.42433889151124626, "grad_norm": 1.859375, "learning_rate": 3.101970041422572e-05, "loss": 0.3867, "step": 9664 }, { "epoch": 0.42442670999044974, "grad_norm": 1.875, "learning_rate": 3.1012985451669065e-05, "loss": 0.4136, "step": 9666 }, { "epoch": 0.4245145284696532, "grad_norm": 1.890625, "learning_rate": 3.100627002863654e-05, "loss": 0.4042, "step": 9668 }, { "epoch": 0.42460234694885673, "grad_norm": 1.953125, "learning_rate": 3.099955414564241e-05, "loss": 0.3814, "step": 9670 }, { "epoch": 0.4246901654280602, "grad_norm": 1.84375, "learning_rate": 3.099283780320099e-05, "loss": 0.3804, "step": 9672 }, { "epoch": 0.4247779839072637, "grad_norm": 2.171875, "learning_rate": 3.098612100182662e-05, "loss": 0.3602, "step": 9674 }, { "epoch": 0.42486580238646715, "grad_norm": 2.0625, "learning_rate": 3.0979403742033664e-05, "loss": 0.3581, "step": 9676 }, { "epoch": 0.4249536208656707, "grad_norm": 1.8125, "learning_rate": 3.097268602433654e-05, "loss": 0.3673, "step": 9678 }, { "epoch": 0.42504143934487415, "grad_norm": 1.8515625, "learning_rate": 3.096596784924968e-05, "loss": 0.3822, "step": 9680 }, { "epoch": 0.4251292578240776, "grad_norm": 1.8984375, "learning_rate": 3.0959249217287574e-05, "loss": 0.3619, "step": 9682 }, { "epoch": 0.4252170763032811, "grad_norm": 1.8359375, "learning_rate": 3.095253012896474e-05, "loss": 0.4173, "step": 9684 }, { "epoch": 0.4253048947824846, "grad_norm": 1.984375, "learning_rate": 3.094581058479571e-05, "loss": 0.3643, "step": 9686 }, { "epoch": 0.4253927132616881, "grad_norm": 2.046875, "learning_rate": 3.0939090585295094e-05, "loss": 0.3801, "step": 9688 }, { "epoch": 0.42548053174089157, "grad_norm": 1.78125, "learning_rate": 3.09323701309775e-05, "loss": 0.4019, "step": 9690 }, { "epoch": 0.42556835022009504, "grad_norm": 1.78125, "learning_rate": 3.092564922235757e-05, "loss": 0.4079, "step": 9692 }, { "epoch": 0.42565616869929856, "grad_norm": 1.703125, "learning_rate": 3.091892785995e-05, "loss": 0.3587, "step": 9694 }, { "epoch": 0.42574398717850204, "grad_norm": 1.8125, "learning_rate": 3.0912206044269515e-05, "loss": 0.4073, "step": 9696 }, { "epoch": 0.4258318056577055, "grad_norm": 2.203125, "learning_rate": 3.090548377583088e-05, "loss": 0.3923, "step": 9698 }, { "epoch": 0.42591962413690904, "grad_norm": 1.984375, "learning_rate": 3.089876105514888e-05, "loss": 0.4088, "step": 9700 }, { "epoch": 0.4260074426161125, "grad_norm": 1.921875, "learning_rate": 3.089203788273834e-05, "loss": 0.3799, "step": 9702 }, { "epoch": 0.426095261095316, "grad_norm": 2.28125, "learning_rate": 3.088531425911413e-05, "loss": 0.3668, "step": 9704 }, { "epoch": 0.42618307957451945, "grad_norm": 2.1875, "learning_rate": 3.087859018479115e-05, "loss": 0.3907, "step": 9706 }, { "epoch": 0.426270898053723, "grad_norm": 1.8046875, "learning_rate": 3.0871865660284316e-05, "loss": 0.4238, "step": 9708 }, { "epoch": 0.42635871653292645, "grad_norm": 2.71875, "learning_rate": 3.0865140686108606e-05, "loss": 0.3756, "step": 9710 }, { "epoch": 0.4264465350121299, "grad_norm": 2.078125, "learning_rate": 3.085841526277901e-05, "loss": 0.3871, "step": 9712 }, { "epoch": 0.4265343534913334, "grad_norm": 1.875, "learning_rate": 3.085168939081058e-05, "loss": 0.3876, "step": 9714 }, { "epoch": 0.4266221719705369, "grad_norm": 2.03125, "learning_rate": 3.0844963070718366e-05, "loss": 0.3558, "step": 9716 }, { "epoch": 0.4267099904497404, "grad_norm": 2.0625, "learning_rate": 3.0838236303017476e-05, "loss": 0.3686, "step": 9718 }, { "epoch": 0.42679780892894387, "grad_norm": 2.1875, "learning_rate": 3.083150908822306e-05, "loss": 0.3684, "step": 9720 }, { "epoch": 0.42688562740814734, "grad_norm": 1.9765625, "learning_rate": 3.082478142685027e-05, "loss": 0.3866, "step": 9722 }, { "epoch": 0.42697344588735087, "grad_norm": 1.9453125, "learning_rate": 3.081805331941433e-05, "loss": 0.3644, "step": 9724 }, { "epoch": 0.42706126436655434, "grad_norm": 1.8515625, "learning_rate": 3.081132476643047e-05, "loss": 0.4389, "step": 9726 }, { "epoch": 0.4271490828457578, "grad_norm": 1.8125, "learning_rate": 3.0804595768413964e-05, "loss": 0.3857, "step": 9728 }, { "epoch": 0.4272369013249613, "grad_norm": 1.890625, "learning_rate": 3.079786632588012e-05, "loss": 0.3536, "step": 9730 }, { "epoch": 0.4273247198041648, "grad_norm": 1.9609375, "learning_rate": 3.079113643934429e-05, "loss": 0.4163, "step": 9732 }, { "epoch": 0.4274125382833683, "grad_norm": 1.9765625, "learning_rate": 3.078440610932184e-05, "loss": 0.3987, "step": 9734 }, { "epoch": 0.42750035676257175, "grad_norm": 2.015625, "learning_rate": 3.077767533632818e-05, "loss": 0.3931, "step": 9736 }, { "epoch": 0.4275881752417752, "grad_norm": 1.8046875, "learning_rate": 3.077094412087877e-05, "loss": 0.3957, "step": 9738 }, { "epoch": 0.42767599372097875, "grad_norm": 1.921875, "learning_rate": 3.076421246348906e-05, "loss": 0.342, "step": 9740 }, { "epoch": 0.4277638122001822, "grad_norm": 1.9140625, "learning_rate": 3.075748036467458e-05, "loss": 0.3825, "step": 9742 }, { "epoch": 0.4278516306793857, "grad_norm": 1.8828125, "learning_rate": 3.0750747824950885e-05, "loss": 0.3787, "step": 9744 }, { "epoch": 0.42793944915858917, "grad_norm": 1.8671875, "learning_rate": 3.0744014844833535e-05, "loss": 0.3855, "step": 9746 }, { "epoch": 0.4280272676377927, "grad_norm": 1.703125, "learning_rate": 3.0737281424838146e-05, "loss": 0.3554, "step": 9748 }, { "epoch": 0.42811508611699617, "grad_norm": 1.9609375, "learning_rate": 3.073054756548038e-05, "loss": 0.4223, "step": 9750 }, { "epoch": 0.42820290459619964, "grad_norm": 1.9375, "learning_rate": 3.0723813267275915e-05, "loss": 0.3837, "step": 9752 }, { "epoch": 0.42829072307540317, "grad_norm": 1.9609375, "learning_rate": 3.071707853074045e-05, "loss": 0.4027, "step": 9754 }, { "epoch": 0.42837854155460664, "grad_norm": 1.875, "learning_rate": 3.071034335638973e-05, "loss": 0.3697, "step": 9756 }, { "epoch": 0.4284663600338101, "grad_norm": 2.234375, "learning_rate": 3.070360774473956e-05, "loss": 0.3743, "step": 9758 }, { "epoch": 0.4285541785130136, "grad_norm": 2.03125, "learning_rate": 3.0696871696305726e-05, "loss": 0.3974, "step": 9760 }, { "epoch": 0.4286419969922171, "grad_norm": 1.8984375, "learning_rate": 3.069013521160411e-05, "loss": 0.3907, "step": 9762 }, { "epoch": 0.4287298154714206, "grad_norm": 1.859375, "learning_rate": 3.068339829115057e-05, "loss": 0.427, "step": 9764 }, { "epoch": 0.42881763395062406, "grad_norm": 1.875, "learning_rate": 3.067666093546102e-05, "loss": 0.3888, "step": 9766 }, { "epoch": 0.4289054524298275, "grad_norm": 2.0, "learning_rate": 3.066992314505142e-05, "loss": 0.3796, "step": 9768 }, { "epoch": 0.42899327090903105, "grad_norm": 2.03125, "learning_rate": 3.066318492043774e-05, "loss": 0.357, "step": 9770 }, { "epoch": 0.4290810893882345, "grad_norm": 1.78125, "learning_rate": 3.065644626213601e-05, "loss": 0.3919, "step": 9772 }, { "epoch": 0.429168907867438, "grad_norm": 1.890625, "learning_rate": 3.064970717066227e-05, "loss": 0.3633, "step": 9774 }, { "epoch": 0.42925672634664147, "grad_norm": 1.8359375, "learning_rate": 3.064296764653259e-05, "loss": 0.3975, "step": 9776 }, { "epoch": 0.429344544825845, "grad_norm": 2.015625, "learning_rate": 3.0636227690263104e-05, "loss": 0.3941, "step": 9778 }, { "epoch": 0.42943236330504847, "grad_norm": 1.984375, "learning_rate": 3.0629487302369945e-05, "loss": 0.3861, "step": 9780 }, { "epoch": 0.42952018178425194, "grad_norm": 1.9609375, "learning_rate": 3.0622746483369306e-05, "loss": 0.3714, "step": 9782 }, { "epoch": 0.4296080002634554, "grad_norm": 1.9609375, "learning_rate": 3.061600523377739e-05, "loss": 0.3706, "step": 9784 }, { "epoch": 0.42969581874265894, "grad_norm": 2.046875, "learning_rate": 3.0609263554110445e-05, "loss": 0.3677, "step": 9786 }, { "epoch": 0.4297836372218624, "grad_norm": 1.875, "learning_rate": 3.060252144488476e-05, "loss": 0.3784, "step": 9788 }, { "epoch": 0.4298714557010659, "grad_norm": 2.203125, "learning_rate": 3.059577890661663e-05, "loss": 0.3611, "step": 9790 }, { "epoch": 0.42995927418026936, "grad_norm": 2.328125, "learning_rate": 3.058903593982241e-05, "loss": 0.41, "step": 9792 }, { "epoch": 0.4300470926594729, "grad_norm": 2.0625, "learning_rate": 3.058229254501848e-05, "loss": 0.371, "step": 9794 }, { "epoch": 0.43013491113867636, "grad_norm": 1.6953125, "learning_rate": 3.057554872272125e-05, "loss": 0.3496, "step": 9796 }, { "epoch": 0.43022272961787983, "grad_norm": 2.0, "learning_rate": 3.0568804473447164e-05, "loss": 0.3618, "step": 9798 }, { "epoch": 0.43031054809708336, "grad_norm": 1.9140625, "learning_rate": 3.05620597977127e-05, "loss": 0.3626, "step": 9800 }, { "epoch": 0.43039836657628683, "grad_norm": 2.078125, "learning_rate": 3.0555314696034356e-05, "loss": 0.3567, "step": 9802 }, { "epoch": 0.4304861850554903, "grad_norm": 1.875, "learning_rate": 3.054856916892868e-05, "loss": 0.3825, "step": 9804 }, { "epoch": 0.43057400353469377, "grad_norm": 1.7265625, "learning_rate": 3.0541823216912245e-05, "loss": 0.3721, "step": 9806 }, { "epoch": 0.4306618220138973, "grad_norm": 1.796875, "learning_rate": 3.0535076840501665e-05, "loss": 0.3852, "step": 9808 }, { "epoch": 0.43074964049310077, "grad_norm": 2.1875, "learning_rate": 3.052833004021357e-05, "loss": 0.3748, "step": 9810 }, { "epoch": 0.43083745897230424, "grad_norm": 1.90625, "learning_rate": 3.052158281656465e-05, "loss": 0.4204, "step": 9812 }, { "epoch": 0.4309252774515077, "grad_norm": 1.7578125, "learning_rate": 3.0514835170071582e-05, "loss": 0.3771, "step": 9814 }, { "epoch": 0.43101309593071124, "grad_norm": 1.9375, "learning_rate": 3.0508087101251115e-05, "loss": 0.3823, "step": 9816 }, { "epoch": 0.4311009144099147, "grad_norm": 2.234375, "learning_rate": 3.0501338610620017e-05, "loss": 0.377, "step": 9818 }, { "epoch": 0.4311887328891182, "grad_norm": 1.8984375, "learning_rate": 3.0494589698695087e-05, "loss": 0.405, "step": 9820 }, { "epoch": 0.43127655136832166, "grad_norm": 1.96875, "learning_rate": 3.0487840365993164e-05, "loss": 0.3876, "step": 9822 }, { "epoch": 0.4313643698475252, "grad_norm": 1.9453125, "learning_rate": 3.0481090613031115e-05, "loss": 0.3827, "step": 9824 }, { "epoch": 0.43145218832672866, "grad_norm": 2.015625, "learning_rate": 3.0474340440325822e-05, "loss": 0.4024, "step": 9826 }, { "epoch": 0.43154000680593213, "grad_norm": 1.859375, "learning_rate": 3.046758984839424e-05, "loss": 0.3622, "step": 9828 }, { "epoch": 0.4316278252851356, "grad_norm": 1.765625, "learning_rate": 3.0460838837753304e-05, "loss": 0.4005, "step": 9830 }, { "epoch": 0.43171564376433913, "grad_norm": 1.7890625, "learning_rate": 3.0454087408920024e-05, "loss": 0.3643, "step": 9832 }, { "epoch": 0.4318034622435426, "grad_norm": 2.03125, "learning_rate": 3.0447335562411423e-05, "loss": 0.3626, "step": 9834 }, { "epoch": 0.4318912807227461, "grad_norm": 1.953125, "learning_rate": 3.044058329874456e-05, "loss": 0.3631, "step": 9836 }, { "epoch": 0.43197909920194955, "grad_norm": 2.03125, "learning_rate": 3.0433830618436528e-05, "loss": 0.413, "step": 9838 }, { "epoch": 0.4320669176811531, "grad_norm": 2.15625, "learning_rate": 3.042707752200444e-05, "loss": 0.3812, "step": 9840 }, { "epoch": 0.43215473616035655, "grad_norm": 1.65625, "learning_rate": 3.042032400996545e-05, "loss": 0.3471, "step": 9842 }, { "epoch": 0.43224255463956, "grad_norm": 2.15625, "learning_rate": 3.0413570082836757e-05, "loss": 0.3952, "step": 9844 }, { "epoch": 0.4323303731187635, "grad_norm": 1.8046875, "learning_rate": 3.0406815741135563e-05, "loss": 0.3993, "step": 9846 }, { "epoch": 0.432418191597967, "grad_norm": 2.171875, "learning_rate": 3.0400060985379124e-05, "loss": 0.3543, "step": 9848 }, { "epoch": 0.4325060100771705, "grad_norm": 1.8359375, "learning_rate": 3.0393305816084728e-05, "loss": 0.4002, "step": 9850 }, { "epoch": 0.43259382855637396, "grad_norm": 2.171875, "learning_rate": 3.0386550233769673e-05, "loss": 0.3702, "step": 9852 }, { "epoch": 0.4326816470355775, "grad_norm": 1.8515625, "learning_rate": 3.037979423895131e-05, "loss": 0.402, "step": 9854 }, { "epoch": 0.43276946551478096, "grad_norm": 1.875, "learning_rate": 3.037303783214701e-05, "loss": 0.3626, "step": 9856 }, { "epoch": 0.43285728399398443, "grad_norm": 2.03125, "learning_rate": 3.03662810138742e-05, "loss": 0.3723, "step": 9858 }, { "epoch": 0.4329451024731879, "grad_norm": 2.15625, "learning_rate": 3.035952378465029e-05, "loss": 0.3783, "step": 9860 }, { "epoch": 0.43303292095239143, "grad_norm": 2.03125, "learning_rate": 3.0352766144992768e-05, "loss": 0.3801, "step": 9862 }, { "epoch": 0.4331207394315949, "grad_norm": 1.9296875, "learning_rate": 3.034600809541913e-05, "loss": 0.3734, "step": 9864 }, { "epoch": 0.4332085579107984, "grad_norm": 1.859375, "learning_rate": 3.033924963644691e-05, "loss": 0.3632, "step": 9866 }, { "epoch": 0.43329637639000185, "grad_norm": 1.8359375, "learning_rate": 3.0332490768593675e-05, "loss": 0.3899, "step": 9868 }, { "epoch": 0.4333841948692054, "grad_norm": 1.921875, "learning_rate": 3.0325731492377015e-05, "loss": 0.3708, "step": 9870 }, { "epoch": 0.43347201334840885, "grad_norm": 2.109375, "learning_rate": 3.0318971808314566e-05, "loss": 0.3751, "step": 9872 }, { "epoch": 0.4335598318276123, "grad_norm": 1.8515625, "learning_rate": 3.0312211716923978e-05, "loss": 0.3686, "step": 9874 }, { "epoch": 0.4336476503068158, "grad_norm": 2.265625, "learning_rate": 3.0305451218722947e-05, "loss": 0.3846, "step": 9876 }, { "epoch": 0.4337354687860193, "grad_norm": 1.9453125, "learning_rate": 3.0298690314229184e-05, "loss": 0.3559, "step": 9878 }, { "epoch": 0.4338232872652228, "grad_norm": 2.28125, "learning_rate": 3.029192900396045e-05, "loss": 0.4058, "step": 9880 }, { "epoch": 0.43391110574442626, "grad_norm": 1.9609375, "learning_rate": 3.0285167288434518e-05, "loss": 0.3496, "step": 9882 }, { "epoch": 0.43399892422362973, "grad_norm": 1.8515625, "learning_rate": 3.0278405168169215e-05, "loss": 0.3949, "step": 9884 }, { "epoch": 0.43408674270283326, "grad_norm": 1.828125, "learning_rate": 3.0271642643682378e-05, "loss": 0.3738, "step": 9886 }, { "epoch": 0.43417456118203673, "grad_norm": 1.75, "learning_rate": 3.0264879715491883e-05, "loss": 0.3839, "step": 9888 }, { "epoch": 0.4342623796612402, "grad_norm": 2.015625, "learning_rate": 3.0258116384115643e-05, "loss": 0.3591, "step": 9890 }, { "epoch": 0.4343501981404437, "grad_norm": 1.8359375, "learning_rate": 3.0251352650071578e-05, "loss": 0.3743, "step": 9892 }, { "epoch": 0.4344380166196472, "grad_norm": 2.21875, "learning_rate": 3.0244588513877676e-05, "loss": 0.3767, "step": 9894 }, { "epoch": 0.4345258350988507, "grad_norm": 1.890625, "learning_rate": 3.0237823976051925e-05, "loss": 0.3498, "step": 9896 }, { "epoch": 0.43461365357805415, "grad_norm": 1.78125, "learning_rate": 3.0231059037112363e-05, "loss": 0.3935, "step": 9898 }, { "epoch": 0.4347014720572576, "grad_norm": 1.734375, "learning_rate": 3.0224293697577045e-05, "loss": 0.3661, "step": 9900 }, { "epoch": 0.43478929053646115, "grad_norm": 2.046875, "learning_rate": 3.021752795796406e-05, "loss": 0.4047, "step": 9902 }, { "epoch": 0.4348771090156646, "grad_norm": 1.984375, "learning_rate": 3.021076181879154e-05, "loss": 0.3855, "step": 9904 }, { "epoch": 0.4349649274948681, "grad_norm": 1.984375, "learning_rate": 3.0203995280577618e-05, "loss": 0.3811, "step": 9906 }, { "epoch": 0.4350527459740716, "grad_norm": 1.7890625, "learning_rate": 3.0197228343840502e-05, "loss": 0.3624, "step": 9908 }, { "epoch": 0.4351405644532751, "grad_norm": 1.953125, "learning_rate": 3.0190461009098382e-05, "loss": 0.3668, "step": 9910 }, { "epoch": 0.43522838293247856, "grad_norm": 1.859375, "learning_rate": 3.018369327686953e-05, "loss": 0.3465, "step": 9912 }, { "epoch": 0.43531620141168204, "grad_norm": 1.984375, "learning_rate": 3.0176925147672192e-05, "loss": 0.4023, "step": 9914 }, { "epoch": 0.43540401989088556, "grad_norm": 1.921875, "learning_rate": 3.017015662202468e-05, "loss": 0.3568, "step": 9916 }, { "epoch": 0.43549183837008903, "grad_norm": 1.7265625, "learning_rate": 3.0163387700445345e-05, "loss": 0.3598, "step": 9918 }, { "epoch": 0.4355796568492925, "grad_norm": 1.796875, "learning_rate": 3.0156618383452545e-05, "loss": 0.3737, "step": 9920 }, { "epoch": 0.435667475328496, "grad_norm": 1.8203125, "learning_rate": 3.0149848671564663e-05, "loss": 0.4093, "step": 9922 }, { "epoch": 0.4357552938076995, "grad_norm": 1.734375, "learning_rate": 3.014307856530015e-05, "loss": 0.3829, "step": 9924 }, { "epoch": 0.435843112286903, "grad_norm": 1.8828125, "learning_rate": 3.0136308065177434e-05, "loss": 0.3661, "step": 9926 }, { "epoch": 0.43593093076610645, "grad_norm": 2.078125, "learning_rate": 3.0129537171715016e-05, "loss": 0.389, "step": 9928 }, { "epoch": 0.4360187492453099, "grad_norm": 2.21875, "learning_rate": 3.0122765885431414e-05, "loss": 0.4172, "step": 9930 }, { "epoch": 0.43610656772451345, "grad_norm": 1.84375, "learning_rate": 3.0115994206845173e-05, "loss": 0.3846, "step": 9932 }, { "epoch": 0.4361943862037169, "grad_norm": 1.8828125, "learning_rate": 3.010922213647487e-05, "loss": 0.3701, "step": 9934 }, { "epoch": 0.4362822046829204, "grad_norm": 2.09375, "learning_rate": 3.0102449674839117e-05, "loss": 0.3837, "step": 9936 }, { "epoch": 0.43637002316212387, "grad_norm": 2.0625, "learning_rate": 3.0095676822456532e-05, "loss": 0.394, "step": 9938 }, { "epoch": 0.4364578416413274, "grad_norm": 1.890625, "learning_rate": 3.00889035798458e-05, "loss": 0.3849, "step": 9940 }, { "epoch": 0.43654566012053087, "grad_norm": 1.796875, "learning_rate": 3.008212994752561e-05, "loss": 0.3773, "step": 9942 }, { "epoch": 0.43663347859973434, "grad_norm": 1.9375, "learning_rate": 3.007535592601469e-05, "loss": 0.3691, "step": 9944 }, { "epoch": 0.4367212970789378, "grad_norm": 1.8125, "learning_rate": 3.00685815158318e-05, "loss": 0.3364, "step": 9946 }, { "epoch": 0.43680911555814134, "grad_norm": 2.09375, "learning_rate": 3.0061806717495728e-05, "loss": 0.3628, "step": 9948 }, { "epoch": 0.4368969340373448, "grad_norm": 1.828125, "learning_rate": 3.005503153152528e-05, "loss": 0.3841, "step": 9950 }, { "epoch": 0.4369847525165483, "grad_norm": 2.296875, "learning_rate": 3.0048255958439303e-05, "loss": 0.347, "step": 9952 }, { "epoch": 0.4370725709957518, "grad_norm": 1.8828125, "learning_rate": 3.0041479998756673e-05, "loss": 0.386, "step": 9954 }, { "epoch": 0.4371603894749553, "grad_norm": 1.84375, "learning_rate": 3.00347036529963e-05, "loss": 0.3891, "step": 9956 }, { "epoch": 0.43724820795415875, "grad_norm": 1.8671875, "learning_rate": 3.0027926921677108e-05, "loss": 0.4011, "step": 9958 }, { "epoch": 0.4373360264333622, "grad_norm": 1.9921875, "learning_rate": 3.0021149805318072e-05, "loss": 0.4107, "step": 9960 }, { "epoch": 0.43742384491256575, "grad_norm": 2.078125, "learning_rate": 3.001437230443818e-05, "loss": 0.3708, "step": 9962 }, { "epoch": 0.4375116633917692, "grad_norm": 1.9140625, "learning_rate": 3.0007594419556456e-05, "loss": 0.3526, "step": 9964 }, { "epoch": 0.4375994818709727, "grad_norm": 2.3125, "learning_rate": 3.0000816151191952e-05, "loss": 0.3973, "step": 9966 }, { "epoch": 0.43768730035017617, "grad_norm": 1.953125, "learning_rate": 2.9994037499863747e-05, "loss": 0.3634, "step": 9968 }, { "epoch": 0.4377751188293797, "grad_norm": 1.875, "learning_rate": 2.998725846609095e-05, "loss": 0.3703, "step": 9970 }, { "epoch": 0.43786293730858317, "grad_norm": 1.9296875, "learning_rate": 2.9980479050392702e-05, "loss": 0.382, "step": 9972 }, { "epoch": 0.43795075578778664, "grad_norm": 2.234375, "learning_rate": 2.9973699253288186e-05, "loss": 0.3804, "step": 9974 }, { "epoch": 0.4380385742669901, "grad_norm": 2.109375, "learning_rate": 2.996691907529658e-05, "loss": 0.3785, "step": 9976 }, { "epoch": 0.43812639274619364, "grad_norm": 1.7578125, "learning_rate": 2.996013851693712e-05, "loss": 0.3478, "step": 9978 }, { "epoch": 0.4382142112253971, "grad_norm": 1.7890625, "learning_rate": 2.9953357578729064e-05, "loss": 0.3744, "step": 9980 }, { "epoch": 0.4383020297046006, "grad_norm": 1.9140625, "learning_rate": 2.99465762611917e-05, "loss": 0.3685, "step": 9982 }, { "epoch": 0.43838984818380405, "grad_norm": 2.125, "learning_rate": 2.9939794564844335e-05, "loss": 0.3512, "step": 9984 }, { "epoch": 0.4384776666630076, "grad_norm": 2.546875, "learning_rate": 2.993301249020633e-05, "loss": 0.3636, "step": 9986 }, { "epoch": 0.43856548514221105, "grad_norm": 2.125, "learning_rate": 2.9926230037797036e-05, "loss": 0.3955, "step": 9988 }, { "epoch": 0.4386533036214145, "grad_norm": 1.875, "learning_rate": 2.9919447208135865e-05, "loss": 0.383, "step": 9990 }, { "epoch": 0.438741122100618, "grad_norm": 2.84375, "learning_rate": 2.9912664001742246e-05, "loss": 0.3825, "step": 9992 }, { "epoch": 0.4388289405798215, "grad_norm": 2.390625, "learning_rate": 2.9905880419135646e-05, "loss": 0.3879, "step": 9994 }, { "epoch": 0.438916759059025, "grad_norm": 2.109375, "learning_rate": 2.9899096460835545e-05, "loss": 0.3831, "step": 9996 }, { "epoch": 0.43900457753822847, "grad_norm": 1.6796875, "learning_rate": 2.9892312127361464e-05, "loss": 0.3886, "step": 9998 }, { "epoch": 0.43909239601743194, "grad_norm": 2.953125, "learning_rate": 2.988552741923295e-05, "loss": 0.3905, "step": 10000 }, { "epoch": 0.43918021449663547, "grad_norm": 2.46875, "learning_rate": 2.9878742336969568e-05, "loss": 0.367, "step": 10002 }, { "epoch": 0.43926803297583894, "grad_norm": 2.15625, "learning_rate": 2.987195688109093e-05, "loss": 0.4148, "step": 10004 }, { "epoch": 0.4393558514550424, "grad_norm": 1.9453125, "learning_rate": 2.9865171052116664e-05, "loss": 0.3781, "step": 10006 }, { "epoch": 0.43944366993424594, "grad_norm": 2.359375, "learning_rate": 2.9858384850566435e-05, "loss": 0.3622, "step": 10008 }, { "epoch": 0.4395314884134494, "grad_norm": 2.25, "learning_rate": 2.9851598276959935e-05, "loss": 0.3743, "step": 10010 }, { "epoch": 0.4396193068926529, "grad_norm": 1.765625, "learning_rate": 2.984481133181688e-05, "loss": 0.4079, "step": 10012 }, { "epoch": 0.43970712537185636, "grad_norm": 1.9765625, "learning_rate": 2.9838024015657e-05, "loss": 0.3881, "step": 10014 }, { "epoch": 0.4397949438510599, "grad_norm": 2.0625, "learning_rate": 2.9831236329000084e-05, "loss": 0.3975, "step": 10016 }, { "epoch": 0.43988276233026335, "grad_norm": 2.140625, "learning_rate": 2.9824448272365928e-05, "loss": 0.3801, "step": 10018 }, { "epoch": 0.4399705808094668, "grad_norm": 2.046875, "learning_rate": 2.981765984627437e-05, "loss": 0.3606, "step": 10020 }, { "epoch": 0.4400583992886703, "grad_norm": 2.046875, "learning_rate": 2.981087105124527e-05, "loss": 0.3692, "step": 10022 }, { "epoch": 0.4401462177678738, "grad_norm": 1.8828125, "learning_rate": 2.9804081887798508e-05, "loss": 0.3693, "step": 10024 }, { "epoch": 0.4402340362470773, "grad_norm": 1.6875, "learning_rate": 2.9797292356454004e-05, "loss": 0.3681, "step": 10026 }, { "epoch": 0.44032185472628077, "grad_norm": 1.671875, "learning_rate": 2.9790502457731706e-05, "loss": 0.3738, "step": 10028 }, { "epoch": 0.44040967320548424, "grad_norm": 1.8671875, "learning_rate": 2.9783712192151576e-05, "loss": 0.3856, "step": 10030 }, { "epoch": 0.44049749168468777, "grad_norm": 2.03125, "learning_rate": 2.9776921560233616e-05, "loss": 0.3701, "step": 10032 }, { "epoch": 0.44058531016389124, "grad_norm": 2.0, "learning_rate": 2.9770130562497867e-05, "loss": 0.3902, "step": 10034 }, { "epoch": 0.4406731286430947, "grad_norm": 1.7734375, "learning_rate": 2.9763339199464374e-05, "loss": 0.3585, "step": 10036 }, { "epoch": 0.4407609471222982, "grad_norm": 1.8828125, "learning_rate": 2.9756547471653218e-05, "loss": 0.3656, "step": 10038 }, { "epoch": 0.4408487656015017, "grad_norm": 1.9140625, "learning_rate": 2.9749755379584515e-05, "loss": 0.3603, "step": 10040 }, { "epoch": 0.4409365840807052, "grad_norm": 1.828125, "learning_rate": 2.9742962923778417e-05, "loss": 0.4048, "step": 10042 }, { "epoch": 0.44102440255990866, "grad_norm": 2.09375, "learning_rate": 2.9736170104755075e-05, "loss": 0.4039, "step": 10044 }, { "epoch": 0.44111222103911213, "grad_norm": 1.921875, "learning_rate": 2.9729376923034684e-05, "loss": 0.3617, "step": 10046 }, { "epoch": 0.44120003951831566, "grad_norm": 2.1875, "learning_rate": 2.9722583379137493e-05, "loss": 0.4059, "step": 10048 }, { "epoch": 0.44128785799751913, "grad_norm": 1.9609375, "learning_rate": 2.9715789473583715e-05, "loss": 0.376, "step": 10050 }, { "epoch": 0.4413756764767226, "grad_norm": 1.859375, "learning_rate": 2.9708995206893658e-05, "loss": 0.351, "step": 10052 }, { "epoch": 0.44146349495592613, "grad_norm": 1.875, "learning_rate": 2.970220057958762e-05, "loss": 0.3672, "step": 10054 }, { "epoch": 0.4415513134351296, "grad_norm": 1.9375, "learning_rate": 2.9695405592185925e-05, "loss": 0.36, "step": 10056 }, { "epoch": 0.44163913191433307, "grad_norm": 2.1875, "learning_rate": 2.968861024520896e-05, "loss": 0.3663, "step": 10058 }, { "epoch": 0.44172695039353654, "grad_norm": 1.9609375, "learning_rate": 2.9681814539177094e-05, "loss": 0.3859, "step": 10060 }, { "epoch": 0.44181476887274007, "grad_norm": 2.25, "learning_rate": 2.9675018474610743e-05, "loss": 0.3617, "step": 10062 }, { "epoch": 0.44190258735194354, "grad_norm": 1.75, "learning_rate": 2.9668222052030353e-05, "loss": 0.364, "step": 10064 }, { "epoch": 0.441990405831147, "grad_norm": 1.9453125, "learning_rate": 2.9661425271956406e-05, "loss": 0.3872, "step": 10066 }, { "epoch": 0.4420782243103505, "grad_norm": 1.859375, "learning_rate": 2.965462813490939e-05, "loss": 0.3916, "step": 10068 }, { "epoch": 0.442166042789554, "grad_norm": 1.8046875, "learning_rate": 2.964783064140984e-05, "loss": 0.3791, "step": 10070 }, { "epoch": 0.4422538612687575, "grad_norm": 2.171875, "learning_rate": 2.9641032791978307e-05, "loss": 0.3698, "step": 10072 }, { "epoch": 0.44234167974796096, "grad_norm": 1.75, "learning_rate": 2.9634234587135366e-05, "loss": 0.3751, "step": 10074 }, { "epoch": 0.44242949822716443, "grad_norm": 1.921875, "learning_rate": 2.9627436027401633e-05, "loss": 0.3814, "step": 10076 }, { "epoch": 0.44251731670636796, "grad_norm": 1.828125, "learning_rate": 2.9620637113297735e-05, "loss": 0.3538, "step": 10078 }, { "epoch": 0.44260513518557143, "grad_norm": 2.40625, "learning_rate": 2.961383784534434e-05, "loss": 0.3811, "step": 10080 }, { "epoch": 0.4426929536647749, "grad_norm": 1.7890625, "learning_rate": 2.9607038224062133e-05, "loss": 0.3739, "step": 10082 }, { "epoch": 0.4427807721439784, "grad_norm": 1.984375, "learning_rate": 2.9600238249971846e-05, "loss": 0.3835, "step": 10084 }, { "epoch": 0.4428685906231819, "grad_norm": 1.984375, "learning_rate": 2.9593437923594204e-05, "loss": 0.3694, "step": 10086 }, { "epoch": 0.4429564091023854, "grad_norm": 1.890625, "learning_rate": 2.958663724544999e-05, "loss": 0.3621, "step": 10088 }, { "epoch": 0.44304422758158885, "grad_norm": 1.734375, "learning_rate": 2.9579836216059988e-05, "loss": 0.3912, "step": 10090 }, { "epoch": 0.4431320460607923, "grad_norm": 2.28125, "learning_rate": 2.9573034835945028e-05, "loss": 0.3912, "step": 10092 }, { "epoch": 0.44321986453999584, "grad_norm": 1.8984375, "learning_rate": 2.9566233105625973e-05, "loss": 0.3883, "step": 10094 }, { "epoch": 0.4433076830191993, "grad_norm": 1.9140625, "learning_rate": 2.955943102562369e-05, "loss": 0.361, "step": 10096 }, { "epoch": 0.4433955014984028, "grad_norm": 1.9140625, "learning_rate": 2.9552628596459086e-05, "loss": 0.3964, "step": 10098 }, { "epoch": 0.44348331997760626, "grad_norm": 1.9453125, "learning_rate": 2.9545825818653087e-05, "loss": 0.3786, "step": 10100 }, { "epoch": 0.4435711384568098, "grad_norm": 1.859375, "learning_rate": 2.9539022692726665e-05, "loss": 0.3958, "step": 10102 }, { "epoch": 0.44365895693601326, "grad_norm": 1.90625, "learning_rate": 2.9532219219200797e-05, "loss": 0.3772, "step": 10104 }, { "epoch": 0.44374677541521673, "grad_norm": 1.921875, "learning_rate": 2.952541539859649e-05, "loss": 0.3753, "step": 10106 }, { "epoch": 0.44383459389442026, "grad_norm": 1.7890625, "learning_rate": 2.951861123143479e-05, "loss": 0.373, "step": 10108 }, { "epoch": 0.44392241237362373, "grad_norm": 2.03125, "learning_rate": 2.9511806718236764e-05, "loss": 0.3747, "step": 10110 }, { "epoch": 0.4440102308528272, "grad_norm": 1.8203125, "learning_rate": 2.9505001859523484e-05, "loss": 0.357, "step": 10112 }, { "epoch": 0.4440980493320307, "grad_norm": 1.84375, "learning_rate": 2.949819665581609e-05, "loss": 0.3729, "step": 10114 }, { "epoch": 0.4441858678112342, "grad_norm": 2.03125, "learning_rate": 2.9491391107635715e-05, "loss": 0.3629, "step": 10116 }, { "epoch": 0.4442736862904377, "grad_norm": 1.953125, "learning_rate": 2.9484585215503537e-05, "loss": 0.3556, "step": 10118 }, { "epoch": 0.44436150476964115, "grad_norm": 2.046875, "learning_rate": 2.9477778979940745e-05, "loss": 0.3578, "step": 10120 }, { "epoch": 0.4444493232488446, "grad_norm": 1.734375, "learning_rate": 2.947097240146857e-05, "loss": 0.3788, "step": 10122 }, { "epoch": 0.44453714172804815, "grad_norm": 1.9296875, "learning_rate": 2.9464165480608252e-05, "loss": 0.3716, "step": 10124 }, { "epoch": 0.4446249602072516, "grad_norm": 2.078125, "learning_rate": 2.945735821788107e-05, "loss": 0.3521, "step": 10126 }, { "epoch": 0.4447127786864551, "grad_norm": 1.9453125, "learning_rate": 2.945055061380833e-05, "loss": 0.3624, "step": 10128 }, { "epoch": 0.44480059716565856, "grad_norm": 1.9296875, "learning_rate": 2.9443742668911357e-05, "loss": 0.3788, "step": 10130 }, { "epoch": 0.4448884156448621, "grad_norm": 2.03125, "learning_rate": 2.9436934383711508e-05, "loss": 0.3681, "step": 10132 }, { "epoch": 0.44497623412406556, "grad_norm": 1.9140625, "learning_rate": 2.943012575873016e-05, "loss": 0.3704, "step": 10134 }, { "epoch": 0.44506405260326903, "grad_norm": 1.8828125, "learning_rate": 2.9423316794488716e-05, "loss": 0.4046, "step": 10136 }, { "epoch": 0.4451518710824725, "grad_norm": 1.6640625, "learning_rate": 2.9416507491508606e-05, "loss": 0.3611, "step": 10138 }, { "epoch": 0.44523968956167603, "grad_norm": 1.9296875, "learning_rate": 2.9409697850311296e-05, "loss": 0.3797, "step": 10140 }, { "epoch": 0.4453275080408795, "grad_norm": 2.171875, "learning_rate": 2.940288787141827e-05, "loss": 0.4003, "step": 10142 }, { "epoch": 0.445415326520083, "grad_norm": 2.1875, "learning_rate": 2.9396077555351038e-05, "loss": 0.3505, "step": 10144 }, { "epoch": 0.44550314499928645, "grad_norm": 2.265625, "learning_rate": 2.9389266902631137e-05, "loss": 0.3681, "step": 10146 }, { "epoch": 0.44559096347849, "grad_norm": 2.140625, "learning_rate": 2.9382455913780115e-05, "loss": 0.3881, "step": 10148 }, { "epoch": 0.44567878195769345, "grad_norm": 1.765625, "learning_rate": 2.9375644589319572e-05, "loss": 0.3798, "step": 10150 }, { "epoch": 0.4457666004368969, "grad_norm": 2.15625, "learning_rate": 2.936883292977112e-05, "loss": 0.39, "step": 10152 }, { "epoch": 0.44585441891610045, "grad_norm": 2.09375, "learning_rate": 2.936202093565639e-05, "loss": 0.3701, "step": 10154 }, { "epoch": 0.4459422373953039, "grad_norm": 2.0625, "learning_rate": 2.9355208607497053e-05, "loss": 0.3647, "step": 10156 }, { "epoch": 0.4460300558745074, "grad_norm": 1.9609375, "learning_rate": 2.93483959458148e-05, "loss": 0.3879, "step": 10158 }, { "epoch": 0.44611787435371086, "grad_norm": 1.875, "learning_rate": 2.9341582951131343e-05, "loss": 0.3899, "step": 10160 }, { "epoch": 0.4462056928329144, "grad_norm": 2.015625, "learning_rate": 2.9334769623968417e-05, "loss": 0.3776, "step": 10162 }, { "epoch": 0.44629351131211786, "grad_norm": 2.0625, "learning_rate": 2.9327955964847798e-05, "loss": 0.3546, "step": 10164 }, { "epoch": 0.44638132979132134, "grad_norm": 2.15625, "learning_rate": 2.9321141974291277e-05, "loss": 0.3971, "step": 10166 }, { "epoch": 0.4464691482705248, "grad_norm": 2.1875, "learning_rate": 2.931432765282066e-05, "loss": 0.3852, "step": 10168 }, { "epoch": 0.44655696674972833, "grad_norm": 1.9921875, "learning_rate": 2.9307513000957797e-05, "loss": 0.3891, "step": 10170 }, { "epoch": 0.4466447852289318, "grad_norm": 1.9296875, "learning_rate": 2.930069801922457e-05, "loss": 0.3529, "step": 10172 }, { "epoch": 0.4467326037081353, "grad_norm": 1.9453125, "learning_rate": 2.9293882708142846e-05, "loss": 0.3754, "step": 10174 }, { "epoch": 0.44682042218733875, "grad_norm": 1.71875, "learning_rate": 2.9287067068234554e-05, "loss": 0.3734, "step": 10176 }, { "epoch": 0.4469082406665423, "grad_norm": 1.859375, "learning_rate": 2.928025110002164e-05, "loss": 0.378, "step": 10178 }, { "epoch": 0.44699605914574575, "grad_norm": 1.7578125, "learning_rate": 2.9273434804026072e-05, "loss": 0.3614, "step": 10180 }, { "epoch": 0.4470838776249492, "grad_norm": 1.75, "learning_rate": 2.9266618180769846e-05, "loss": 0.3388, "step": 10182 }, { "epoch": 0.4471716961041527, "grad_norm": 1.875, "learning_rate": 2.9259801230774974e-05, "loss": 0.3852, "step": 10184 }, { "epoch": 0.4472595145833562, "grad_norm": 1.7890625, "learning_rate": 2.92529839545635e-05, "loss": 0.3585, "step": 10186 }, { "epoch": 0.4473473330625597, "grad_norm": 1.8984375, "learning_rate": 2.9246166352657494e-05, "loss": 0.3929, "step": 10188 }, { "epoch": 0.44743515154176317, "grad_norm": 2.1875, "learning_rate": 2.923934842557905e-05, "loss": 0.3593, "step": 10190 }, { "epoch": 0.44752297002096664, "grad_norm": 1.703125, "learning_rate": 2.923253017385029e-05, "loss": 0.4071, "step": 10192 }, { "epoch": 0.44761078850017016, "grad_norm": 1.984375, "learning_rate": 2.9225711597993362e-05, "loss": 0.34, "step": 10194 }, { "epoch": 0.44769860697937364, "grad_norm": 2.140625, "learning_rate": 2.9218892698530427e-05, "loss": 0.3652, "step": 10196 }, { "epoch": 0.4477864254585771, "grad_norm": 1.9140625, "learning_rate": 2.9212073475983663e-05, "loss": 0.398, "step": 10198 }, { "epoch": 0.4478742439377806, "grad_norm": 2.140625, "learning_rate": 2.9205253930875315e-05, "loss": 0.3866, "step": 10200 }, { "epoch": 0.4479620624169841, "grad_norm": 1.8828125, "learning_rate": 2.9198434063727602e-05, "loss": 0.4063, "step": 10202 }, { "epoch": 0.4480498808961876, "grad_norm": 2.25, "learning_rate": 2.919161387506281e-05, "loss": 0.4008, "step": 10204 }, { "epoch": 0.44813769937539105, "grad_norm": 1.671875, "learning_rate": 2.9184793365403217e-05, "loss": 0.3405, "step": 10206 }, { "epoch": 0.4482255178545946, "grad_norm": 2.125, "learning_rate": 2.917797253527116e-05, "loss": 0.4057, "step": 10208 }, { "epoch": 0.44831333633379805, "grad_norm": 1.671875, "learning_rate": 2.917115138518895e-05, "loss": 0.3616, "step": 10210 }, { "epoch": 0.4484011548130015, "grad_norm": 1.6640625, "learning_rate": 2.916432991567897e-05, "loss": 0.3644, "step": 10212 }, { "epoch": 0.448488973292205, "grad_norm": 1.90625, "learning_rate": 2.9157508127263612e-05, "loss": 0.3851, "step": 10214 }, { "epoch": 0.4485767917714085, "grad_norm": 1.859375, "learning_rate": 2.915068602046528e-05, "loss": 0.3828, "step": 10216 }, { "epoch": 0.448664610250612, "grad_norm": 1.8515625, "learning_rate": 2.9143863595806413e-05, "loss": 0.382, "step": 10218 }, { "epoch": 0.44875242872981547, "grad_norm": 2.03125, "learning_rate": 2.9137040853809487e-05, "loss": 0.3827, "step": 10220 }, { "epoch": 0.44884024720901894, "grad_norm": 1.90625, "learning_rate": 2.9130217794996977e-05, "loss": 0.3805, "step": 10222 }, { "epoch": 0.44892806568822247, "grad_norm": 1.734375, "learning_rate": 2.9123394419891396e-05, "loss": 0.3908, "step": 10224 }, { "epoch": 0.44901588416742594, "grad_norm": 2.03125, "learning_rate": 2.911657072901529e-05, "loss": 0.3937, "step": 10226 }, { "epoch": 0.4491037026466294, "grad_norm": 1.8984375, "learning_rate": 2.91097467228912e-05, "loss": 0.3825, "step": 10228 }, { "epoch": 0.4491915211258329, "grad_norm": 2.078125, "learning_rate": 2.9102922402041728e-05, "loss": 0.388, "step": 10230 }, { "epoch": 0.4492793396050364, "grad_norm": 1.859375, "learning_rate": 2.9096097766989478e-05, "loss": 0.3475, "step": 10232 }, { "epoch": 0.4493671580842399, "grad_norm": 1.7734375, "learning_rate": 2.9089272818257073e-05, "loss": 0.4138, "step": 10234 }, { "epoch": 0.44945497656344335, "grad_norm": 2.0, "learning_rate": 2.908244755636717e-05, "loss": 0.3681, "step": 10236 }, { "epoch": 0.4495427950426468, "grad_norm": 1.75, "learning_rate": 2.907562198184246e-05, "loss": 0.3713, "step": 10238 }, { "epoch": 0.44963061352185035, "grad_norm": 1.765625, "learning_rate": 2.906879609520564e-05, "loss": 0.3717, "step": 10240 }, { "epoch": 0.4497184320010538, "grad_norm": 1.859375, "learning_rate": 2.9061969896979447e-05, "loss": 0.3774, "step": 10242 }, { "epoch": 0.4498062504802573, "grad_norm": 1.84375, "learning_rate": 2.9055143387686624e-05, "loss": 0.3603, "step": 10244 }, { "epoch": 0.44989406895946077, "grad_norm": 1.796875, "learning_rate": 2.9048316567849947e-05, "loss": 0.3547, "step": 10246 }, { "epoch": 0.4499818874386643, "grad_norm": 1.765625, "learning_rate": 2.9041489437992215e-05, "loss": 0.3901, "step": 10248 }, { "epoch": 0.45006970591786777, "grad_norm": 2.09375, "learning_rate": 2.9034661998636248e-05, "loss": 0.3638, "step": 10250 }, { "epoch": 0.45015752439707124, "grad_norm": 1.8671875, "learning_rate": 2.9027834250304904e-05, "loss": 0.3831, "step": 10252 }, { "epoch": 0.45024534287627477, "grad_norm": 1.8203125, "learning_rate": 2.9021006193521043e-05, "loss": 0.3749, "step": 10254 }, { "epoch": 0.45033316135547824, "grad_norm": 1.890625, "learning_rate": 2.901417782880757e-05, "loss": 0.3908, "step": 10256 }, { "epoch": 0.4504209798346817, "grad_norm": 1.8125, "learning_rate": 2.9007349156687404e-05, "loss": 0.3754, "step": 10258 }, { "epoch": 0.4505087983138852, "grad_norm": 1.8203125, "learning_rate": 2.900052017768346e-05, "loss": 0.375, "step": 10260 }, { "epoch": 0.4505966167930887, "grad_norm": 2.015625, "learning_rate": 2.899369089231873e-05, "loss": 0.3856, "step": 10262 }, { "epoch": 0.4506844352722922, "grad_norm": 1.8515625, "learning_rate": 2.8986861301116196e-05, "loss": 0.3618, "step": 10264 }, { "epoch": 0.45077225375149566, "grad_norm": 2.171875, "learning_rate": 2.8980031404598862e-05, "loss": 0.414, "step": 10266 }, { "epoch": 0.4508600722306991, "grad_norm": 1.8203125, "learning_rate": 2.897320120328978e-05, "loss": 0.4064, "step": 10268 }, { "epoch": 0.45094789070990265, "grad_norm": 1.828125, "learning_rate": 2.8966370697711988e-05, "loss": 0.3708, "step": 10270 }, { "epoch": 0.4510357091891061, "grad_norm": 2.09375, "learning_rate": 2.895953988838859e-05, "loss": 0.3726, "step": 10272 }, { "epoch": 0.4511235276683096, "grad_norm": 1.703125, "learning_rate": 2.8952708775842664e-05, "loss": 0.3504, "step": 10274 }, { "epoch": 0.45121134614751307, "grad_norm": 1.8203125, "learning_rate": 2.8945877360597352e-05, "loss": 0.3642, "step": 10276 }, { "epoch": 0.4512991646267166, "grad_norm": 2.15625, "learning_rate": 2.8939045643175812e-05, "loss": 0.3775, "step": 10278 }, { "epoch": 0.45138698310592007, "grad_norm": 1.828125, "learning_rate": 2.8932213624101207e-05, "loss": 0.3676, "step": 10280 }, { "epoch": 0.45147480158512354, "grad_norm": 2.921875, "learning_rate": 2.8925381303896747e-05, "loss": 0.3688, "step": 10282 }, { "epoch": 0.451562620064327, "grad_norm": 2.0625, "learning_rate": 2.8918548683085643e-05, "loss": 0.3705, "step": 10284 }, { "epoch": 0.45165043854353054, "grad_norm": 2.015625, "learning_rate": 2.891171576219114e-05, "loss": 0.3961, "step": 10286 }, { "epoch": 0.451738257022734, "grad_norm": 1.7890625, "learning_rate": 2.8904882541736512e-05, "loss": 0.3567, "step": 10288 }, { "epoch": 0.4518260755019375, "grad_norm": 2.484375, "learning_rate": 2.8898049022245034e-05, "loss": 0.3565, "step": 10290 }, { "epoch": 0.45191389398114096, "grad_norm": 1.8671875, "learning_rate": 2.889121520424003e-05, "loss": 0.3552, "step": 10292 }, { "epoch": 0.4520017124603445, "grad_norm": 1.8984375, "learning_rate": 2.888438108824484e-05, "loss": 0.3441, "step": 10294 }, { "epoch": 0.45208953093954796, "grad_norm": 1.75, "learning_rate": 2.8877546674782806e-05, "loss": 0.3561, "step": 10296 }, { "epoch": 0.45217734941875143, "grad_norm": 1.734375, "learning_rate": 2.8870711964377322e-05, "loss": 0.365, "step": 10298 }, { "epoch": 0.4522651678979549, "grad_norm": 2.078125, "learning_rate": 2.8863876957551784e-05, "loss": 0.381, "step": 10300 }, { "epoch": 0.45235298637715843, "grad_norm": 2.203125, "learning_rate": 2.8857041654829625e-05, "loss": 0.3885, "step": 10302 }, { "epoch": 0.4524408048563619, "grad_norm": 2.390625, "learning_rate": 2.8850206056734297e-05, "loss": 0.3848, "step": 10304 }, { "epoch": 0.45252862333556537, "grad_norm": 1.9140625, "learning_rate": 2.8843370163789264e-05, "loss": 0.3426, "step": 10306 }, { "epoch": 0.4526164418147689, "grad_norm": 1.6328125, "learning_rate": 2.883653397651802e-05, "loss": 0.3648, "step": 10308 }, { "epoch": 0.45270426029397237, "grad_norm": 1.7265625, "learning_rate": 2.8829697495444087e-05, "loss": 0.365, "step": 10310 }, { "epoch": 0.45279207877317584, "grad_norm": 1.8984375, "learning_rate": 2.8822860721090995e-05, "loss": 0.3502, "step": 10312 }, { "epoch": 0.4528798972523793, "grad_norm": 1.828125, "learning_rate": 2.8816023653982317e-05, "loss": 0.3714, "step": 10314 }, { "epoch": 0.45296771573158284, "grad_norm": 1.8046875, "learning_rate": 2.8809186294641634e-05, "loss": 0.3823, "step": 10316 }, { "epoch": 0.4530555342107863, "grad_norm": 1.9765625, "learning_rate": 2.8802348643592552e-05, "loss": 0.3837, "step": 10318 }, { "epoch": 0.4531433526899898, "grad_norm": 1.921875, "learning_rate": 2.8795510701358703e-05, "loss": 0.3675, "step": 10320 }, { "epoch": 0.45323117116919326, "grad_norm": 1.921875, "learning_rate": 2.878867246846373e-05, "loss": 0.373, "step": 10322 }, { "epoch": 0.4533189896483968, "grad_norm": 1.8125, "learning_rate": 2.87818339454313e-05, "loss": 0.3807, "step": 10324 }, { "epoch": 0.45340680812760026, "grad_norm": 1.9375, "learning_rate": 2.877499513278513e-05, "loss": 0.3547, "step": 10326 }, { "epoch": 0.45349462660680373, "grad_norm": 1.9609375, "learning_rate": 2.876815603104893e-05, "loss": 0.3703, "step": 10328 }, { "epoch": 0.4535824450860072, "grad_norm": 1.796875, "learning_rate": 2.8761316640746437e-05, "loss": 0.3889, "step": 10330 }, { "epoch": 0.45367026356521073, "grad_norm": 1.796875, "learning_rate": 2.8754476962401418e-05, "loss": 0.384, "step": 10332 }, { "epoch": 0.4537580820444142, "grad_norm": 1.8046875, "learning_rate": 2.874763699653765e-05, "loss": 0.3686, "step": 10334 }, { "epoch": 0.4538459005236177, "grad_norm": 1.796875, "learning_rate": 2.874079674367894e-05, "loss": 0.3623, "step": 10336 }, { "epoch": 0.45393371900282115, "grad_norm": 1.7734375, "learning_rate": 2.8733956204349117e-05, "loss": 0.3689, "step": 10338 }, { "epoch": 0.4540215374820247, "grad_norm": 1.828125, "learning_rate": 2.8727115379072034e-05, "loss": 0.3677, "step": 10340 }, { "epoch": 0.45410935596122814, "grad_norm": 1.8359375, "learning_rate": 2.872027426837156e-05, "loss": 0.3815, "step": 10342 }, { "epoch": 0.4541971744404316, "grad_norm": 1.984375, "learning_rate": 2.87134328727716e-05, "loss": 0.3619, "step": 10344 }, { "epoch": 0.4542849929196351, "grad_norm": 1.8203125, "learning_rate": 2.870659119279605e-05, "loss": 0.3892, "step": 10346 }, { "epoch": 0.4543728113988386, "grad_norm": 1.8203125, "learning_rate": 2.8699749228968865e-05, "loss": 0.4045, "step": 10348 }, { "epoch": 0.4544606298780421, "grad_norm": 2.0625, "learning_rate": 2.8692906981813993e-05, "loss": 0.4142, "step": 10350 }, { "epoch": 0.45454844835724556, "grad_norm": 1.65625, "learning_rate": 2.8686064451855422e-05, "loss": 0.3741, "step": 10352 }, { "epoch": 0.45463626683644903, "grad_norm": 1.9921875, "learning_rate": 2.867922163961715e-05, "loss": 0.3552, "step": 10354 }, { "epoch": 0.45472408531565256, "grad_norm": 2.109375, "learning_rate": 2.86723785456232e-05, "loss": 0.398, "step": 10356 }, { "epoch": 0.45481190379485603, "grad_norm": 1.8203125, "learning_rate": 2.866553517039763e-05, "loss": 0.3416, "step": 10358 }, { "epoch": 0.4548997222740595, "grad_norm": 1.9375, "learning_rate": 2.8658691514464488e-05, "loss": 0.3898, "step": 10360 }, { "epoch": 0.45498754075326303, "grad_norm": 1.7890625, "learning_rate": 2.8651847578347873e-05, "loss": 0.3765, "step": 10362 }, { "epoch": 0.4550753592324665, "grad_norm": 2.0, "learning_rate": 2.8645003362571897e-05, "loss": 0.3381, "step": 10364 }, { "epoch": 0.45516317771167, "grad_norm": 1.8671875, "learning_rate": 2.863815886766069e-05, "loss": 0.3649, "step": 10366 }, { "epoch": 0.45525099619087345, "grad_norm": 1.9296875, "learning_rate": 2.8631314094138405e-05, "loss": 0.3858, "step": 10368 }, { "epoch": 0.455338814670077, "grad_norm": 1.9921875, "learning_rate": 2.862446904252922e-05, "loss": 0.3454, "step": 10370 }, { "epoch": 0.45542663314928045, "grad_norm": 1.953125, "learning_rate": 2.8617623713357326e-05, "loss": 0.3886, "step": 10372 }, { "epoch": 0.4555144516284839, "grad_norm": 1.7109375, "learning_rate": 2.8610778107146934e-05, "loss": 0.3711, "step": 10374 }, { "epoch": 0.4556022701076874, "grad_norm": 1.890625, "learning_rate": 2.8603932224422297e-05, "loss": 0.3676, "step": 10376 }, { "epoch": 0.4556900885868909, "grad_norm": 1.8671875, "learning_rate": 2.8597086065707655e-05, "loss": 0.3693, "step": 10378 }, { "epoch": 0.4557779070660944, "grad_norm": 2.03125, "learning_rate": 2.8590239631527314e-05, "loss": 0.34, "step": 10380 }, { "epoch": 0.45586572554529786, "grad_norm": 1.7578125, "learning_rate": 2.858339292240556e-05, "loss": 0.3826, "step": 10382 }, { "epoch": 0.45595354402450133, "grad_norm": 1.875, "learning_rate": 2.857654593886671e-05, "loss": 0.3651, "step": 10384 }, { "epoch": 0.45604136250370486, "grad_norm": 1.9765625, "learning_rate": 2.856969868143512e-05, "loss": 0.3667, "step": 10386 }, { "epoch": 0.45612918098290833, "grad_norm": 2.03125, "learning_rate": 2.856285115063514e-05, "loss": 0.3768, "step": 10388 }, { "epoch": 0.4562169994621118, "grad_norm": 1.9765625, "learning_rate": 2.8556003346991174e-05, "loss": 0.3444, "step": 10390 }, { "epoch": 0.4563048179413153, "grad_norm": 1.84375, "learning_rate": 2.8549155271027617e-05, "loss": 0.3787, "step": 10392 }, { "epoch": 0.4563926364205188, "grad_norm": 2.0, "learning_rate": 2.8542306923268897e-05, "loss": 0.379, "step": 10394 }, { "epoch": 0.4564804548997223, "grad_norm": 1.90625, "learning_rate": 2.853545830423947e-05, "loss": 0.373, "step": 10396 }, { "epoch": 0.45656827337892575, "grad_norm": 1.9375, "learning_rate": 2.8528609414463793e-05, "loss": 0.3874, "step": 10398 }, { "epoch": 0.4566560918581292, "grad_norm": 1.765625, "learning_rate": 2.8521760254466355e-05, "loss": 0.3398, "step": 10400 }, { "epoch": 0.45674391033733275, "grad_norm": 1.8359375, "learning_rate": 2.851491082477168e-05, "loss": 0.3801, "step": 10402 }, { "epoch": 0.4568317288165362, "grad_norm": 1.8515625, "learning_rate": 2.8508061125904284e-05, "loss": 0.3614, "step": 10404 }, { "epoch": 0.4569195472957397, "grad_norm": 1.9765625, "learning_rate": 2.850121115838874e-05, "loss": 0.3851, "step": 10406 }, { "epoch": 0.4570073657749432, "grad_norm": 2.046875, "learning_rate": 2.8494360922749595e-05, "loss": 0.3495, "step": 10408 }, { "epoch": 0.4570951842541467, "grad_norm": 1.7734375, "learning_rate": 2.848751041951146e-05, "loss": 0.3773, "step": 10410 }, { "epoch": 0.45718300273335016, "grad_norm": 1.8828125, "learning_rate": 2.8480659649198937e-05, "loss": 0.3699, "step": 10412 }, { "epoch": 0.45727082121255364, "grad_norm": 2.0, "learning_rate": 2.8473808612336662e-05, "loss": 0.3567, "step": 10414 }, { "epoch": 0.45735863969175716, "grad_norm": 1.84375, "learning_rate": 2.8466957309449287e-05, "loss": 0.3647, "step": 10416 }, { "epoch": 0.45744645817096063, "grad_norm": 1.9453125, "learning_rate": 2.8460105741061505e-05, "loss": 0.3707, "step": 10418 }, { "epoch": 0.4575342766501641, "grad_norm": 2.25, "learning_rate": 2.8453253907697985e-05, "loss": 0.3728, "step": 10420 }, { "epoch": 0.4576220951293676, "grad_norm": 2.0, "learning_rate": 2.844640180988345e-05, "loss": 0.3649, "step": 10422 }, { "epoch": 0.4577099136085711, "grad_norm": 1.9296875, "learning_rate": 2.8439549448142644e-05, "loss": 0.3704, "step": 10424 }, { "epoch": 0.4577977320877746, "grad_norm": 2.09375, "learning_rate": 2.8432696823000314e-05, "loss": 0.3709, "step": 10426 }, { "epoch": 0.45788555056697805, "grad_norm": 2.0625, "learning_rate": 2.8425843934981245e-05, "loss": 0.3849, "step": 10428 }, { "epoch": 0.4579733690461815, "grad_norm": 2.046875, "learning_rate": 2.8418990784610223e-05, "loss": 0.388, "step": 10430 }, { "epoch": 0.45806118752538505, "grad_norm": 2.125, "learning_rate": 2.8412137372412062e-05, "loss": 0.3925, "step": 10432 }, { "epoch": 0.4581490060045885, "grad_norm": 1.890625, "learning_rate": 2.8405283698911605e-05, "loss": 0.3642, "step": 10434 }, { "epoch": 0.458236824483792, "grad_norm": 1.921875, "learning_rate": 2.8398429764633706e-05, "loss": 0.3798, "step": 10436 }, { "epoch": 0.45832464296299547, "grad_norm": 1.8046875, "learning_rate": 2.839157557010324e-05, "loss": 0.3691, "step": 10438 }, { "epoch": 0.458412461442199, "grad_norm": 1.71875, "learning_rate": 2.838472111584511e-05, "loss": 0.3631, "step": 10440 }, { "epoch": 0.45850027992140246, "grad_norm": 2.0625, "learning_rate": 2.8377866402384223e-05, "loss": 0.4049, "step": 10442 }, { "epoch": 0.45858809840060594, "grad_norm": 1.8984375, "learning_rate": 2.837101143024552e-05, "loss": 0.374, "step": 10444 }, { "epoch": 0.4586759168798094, "grad_norm": 1.8828125, "learning_rate": 2.836415619995395e-05, "loss": 0.3938, "step": 10446 }, { "epoch": 0.45876373535901294, "grad_norm": 1.7421875, "learning_rate": 2.835730071203449e-05, "loss": 0.3711, "step": 10448 }, { "epoch": 0.4588515538382164, "grad_norm": 2.1875, "learning_rate": 2.8350444967012134e-05, "loss": 0.3739, "step": 10450 }, { "epoch": 0.4589393723174199, "grad_norm": 1.859375, "learning_rate": 2.8343588965411905e-05, "loss": 0.3799, "step": 10452 }, { "epoch": 0.45902719079662335, "grad_norm": 2.015625, "learning_rate": 2.833673270775883e-05, "loss": 0.3414, "step": 10454 }, { "epoch": 0.4591150092758269, "grad_norm": 1.7578125, "learning_rate": 2.832987619457797e-05, "loss": 0.3798, "step": 10456 }, { "epoch": 0.45920282775503035, "grad_norm": 2.390625, "learning_rate": 2.832301942639439e-05, "loss": 0.3828, "step": 10458 }, { "epoch": 0.4592906462342338, "grad_norm": 1.984375, "learning_rate": 2.8316162403733177e-05, "loss": 0.3562, "step": 10460 }, { "epoch": 0.45937846471343735, "grad_norm": 2.140625, "learning_rate": 2.8309305127119456e-05, "loss": 0.3902, "step": 10462 }, { "epoch": 0.4594662831926408, "grad_norm": 2.203125, "learning_rate": 2.8302447597078353e-05, "loss": 0.3925, "step": 10464 }, { "epoch": 0.4595541016718443, "grad_norm": 2.09375, "learning_rate": 2.8295589814135032e-05, "loss": 0.3432, "step": 10466 }, { "epoch": 0.45964192015104777, "grad_norm": 2.078125, "learning_rate": 2.8288731778814642e-05, "loss": 0.3987, "step": 10468 }, { "epoch": 0.4597297386302513, "grad_norm": 1.9140625, "learning_rate": 2.8281873491642392e-05, "loss": 0.3686, "step": 10470 }, { "epoch": 0.45981755710945477, "grad_norm": 1.9765625, "learning_rate": 2.827501495314348e-05, "loss": 0.3607, "step": 10472 }, { "epoch": 0.45990537558865824, "grad_norm": 2.109375, "learning_rate": 2.8268156163843136e-05, "loss": 0.3818, "step": 10474 }, { "epoch": 0.4599931940678617, "grad_norm": 2.1875, "learning_rate": 2.8261297124266613e-05, "loss": 0.3941, "step": 10476 }, { "epoch": 0.46008101254706524, "grad_norm": 1.84375, "learning_rate": 2.825443783493917e-05, "loss": 0.3418, "step": 10478 }, { "epoch": 0.4601688310262687, "grad_norm": 1.859375, "learning_rate": 2.8247578296386102e-05, "loss": 0.3479, "step": 10480 }, { "epoch": 0.4602566495054722, "grad_norm": 1.7265625, "learning_rate": 2.824071850913271e-05, "loss": 0.3828, "step": 10482 }, { "epoch": 0.46034446798467565, "grad_norm": 1.78125, "learning_rate": 2.823385847370431e-05, "loss": 0.3475, "step": 10484 }, { "epoch": 0.4604322864638792, "grad_norm": 2.015625, "learning_rate": 2.822699819062626e-05, "loss": 0.3772, "step": 10486 }, { "epoch": 0.46052010494308265, "grad_norm": 1.7578125, "learning_rate": 2.822013766042391e-05, "loss": 0.3601, "step": 10488 }, { "epoch": 0.4606079234222861, "grad_norm": 1.8828125, "learning_rate": 2.8213276883622654e-05, "loss": 0.383, "step": 10490 }, { "epoch": 0.4606957419014896, "grad_norm": 1.8984375, "learning_rate": 2.820641586074788e-05, "loss": 0.3687, "step": 10492 }, { "epoch": 0.4607835603806931, "grad_norm": 1.8125, "learning_rate": 2.8199554592325005e-05, "loss": 0.3863, "step": 10494 }, { "epoch": 0.4608713788598966, "grad_norm": 2.015625, "learning_rate": 2.819269307887948e-05, "loss": 0.3222, "step": 10496 }, { "epoch": 0.46095919733910007, "grad_norm": 1.7421875, "learning_rate": 2.818583132093675e-05, "loss": 0.3738, "step": 10498 }, { "epoch": 0.46104701581830354, "grad_norm": 2.0625, "learning_rate": 2.8178969319022292e-05, "loss": 0.3666, "step": 10500 }, { "epoch": 0.46113483429750707, "grad_norm": 1.859375, "learning_rate": 2.8172107073661607e-05, "loss": 0.3504, "step": 10502 }, { "epoch": 0.46122265277671054, "grad_norm": 1.7578125, "learning_rate": 2.8165244585380197e-05, "loss": 0.3886, "step": 10504 }, { "epoch": 0.461310471255914, "grad_norm": 1.8515625, "learning_rate": 2.815838185470361e-05, "loss": 0.3935, "step": 10506 }, { "epoch": 0.46139828973511754, "grad_norm": 1.8125, "learning_rate": 2.815151888215737e-05, "loss": 0.3804, "step": 10508 }, { "epoch": 0.461486108214321, "grad_norm": 1.6640625, "learning_rate": 2.8144655668267056e-05, "loss": 0.3321, "step": 10510 }, { "epoch": 0.4615739266935245, "grad_norm": 2.015625, "learning_rate": 2.813779221355826e-05, "loss": 0.3835, "step": 10512 }, { "epoch": 0.46166174517272796, "grad_norm": 1.828125, "learning_rate": 2.8130928518556588e-05, "loss": 0.3922, "step": 10514 }, { "epoch": 0.4617495636519315, "grad_norm": 1.734375, "learning_rate": 2.8124064583787662e-05, "loss": 0.3696, "step": 10516 }, { "epoch": 0.46183738213113495, "grad_norm": 2.15625, "learning_rate": 2.8117200409777124e-05, "loss": 0.3481, "step": 10518 }, { "epoch": 0.4619252006103384, "grad_norm": 1.7890625, "learning_rate": 2.8110335997050624e-05, "loss": 0.3764, "step": 10520 }, { "epoch": 0.4620130190895419, "grad_norm": 1.859375, "learning_rate": 2.8103471346133848e-05, "loss": 0.3645, "step": 10522 }, { "epoch": 0.4621008375687454, "grad_norm": 1.9375, "learning_rate": 2.8096606457552488e-05, "loss": 0.3537, "step": 10524 }, { "epoch": 0.4621886560479489, "grad_norm": 1.703125, "learning_rate": 2.808974133183227e-05, "loss": 0.3692, "step": 10526 }, { "epoch": 0.46227647452715237, "grad_norm": 1.90625, "learning_rate": 2.8082875969498922e-05, "loss": 0.3552, "step": 10528 }, { "epoch": 0.46236429300635584, "grad_norm": 1.8671875, "learning_rate": 2.8076010371078186e-05, "loss": 0.3821, "step": 10530 }, { "epoch": 0.46245211148555937, "grad_norm": 2.109375, "learning_rate": 2.8069144537095842e-05, "loss": 0.3728, "step": 10532 }, { "epoch": 0.46253992996476284, "grad_norm": 1.859375, "learning_rate": 2.8062278468077678e-05, "loss": 0.3553, "step": 10534 }, { "epoch": 0.4626277484439663, "grad_norm": 1.875, "learning_rate": 2.8055412164549488e-05, "loss": 0.36, "step": 10536 }, { "epoch": 0.4627155669231698, "grad_norm": 1.8984375, "learning_rate": 2.8048545627037102e-05, "loss": 0.3602, "step": 10538 }, { "epoch": 0.4628033854023733, "grad_norm": 2.140625, "learning_rate": 2.8041678856066367e-05, "loss": 0.3827, "step": 10540 }, { "epoch": 0.4628912038815768, "grad_norm": 2.015625, "learning_rate": 2.8034811852163136e-05, "loss": 0.3562, "step": 10542 }, { "epoch": 0.46297902236078026, "grad_norm": 1.828125, "learning_rate": 2.802794461585328e-05, "loss": 0.3726, "step": 10544 }, { "epoch": 0.46306684083998373, "grad_norm": 2.140625, "learning_rate": 2.80210771476627e-05, "loss": 0.3806, "step": 10546 }, { "epoch": 0.46315465931918726, "grad_norm": 1.9140625, "learning_rate": 2.8014209448117317e-05, "loss": 0.3816, "step": 10548 }, { "epoch": 0.46324247779839073, "grad_norm": 1.921875, "learning_rate": 2.8007341517743042e-05, "loss": 0.3968, "step": 10550 }, { "epoch": 0.4633302962775942, "grad_norm": 1.8515625, "learning_rate": 2.8000473357065838e-05, "loss": 0.3709, "step": 10552 }, { "epoch": 0.46341811475679767, "grad_norm": 2.203125, "learning_rate": 2.7993604966611665e-05, "loss": 0.3425, "step": 10554 }, { "epoch": 0.4635059332360012, "grad_norm": 1.75, "learning_rate": 2.79867363469065e-05, "loss": 0.3793, "step": 10556 }, { "epoch": 0.46359375171520467, "grad_norm": 2.3125, "learning_rate": 2.7979867498476354e-05, "loss": 0.3728, "step": 10558 }, { "epoch": 0.46368157019440814, "grad_norm": 1.828125, "learning_rate": 2.7972998421847235e-05, "loss": 0.382, "step": 10560 }, { "epoch": 0.46376938867361167, "grad_norm": 2.484375, "learning_rate": 2.7966129117545194e-05, "loss": 0.3628, "step": 10562 }, { "epoch": 0.46385720715281514, "grad_norm": 1.828125, "learning_rate": 2.7959259586096272e-05, "loss": 0.372, "step": 10564 }, { "epoch": 0.4639450256320186, "grad_norm": 2.28125, "learning_rate": 2.7952389828026538e-05, "loss": 0.3911, "step": 10566 }, { "epoch": 0.4640328441112221, "grad_norm": 1.734375, "learning_rate": 2.7945519843862083e-05, "loss": 0.3848, "step": 10568 }, { "epoch": 0.4641206625904256, "grad_norm": 1.890625, "learning_rate": 2.7938649634129015e-05, "loss": 0.3828, "step": 10570 }, { "epoch": 0.4642084810696291, "grad_norm": 1.8359375, "learning_rate": 2.7931779199353448e-05, "loss": 0.3414, "step": 10572 }, { "epoch": 0.46429629954883256, "grad_norm": 1.953125, "learning_rate": 2.792490854006153e-05, "loss": 0.3428, "step": 10574 }, { "epoch": 0.46438411802803603, "grad_norm": 1.84375, "learning_rate": 2.7918037656779417e-05, "loss": 0.3689, "step": 10576 }, { "epoch": 0.46447193650723956, "grad_norm": 2.09375, "learning_rate": 2.791116655003328e-05, "loss": 0.3652, "step": 10578 }, { "epoch": 0.46455975498644303, "grad_norm": 1.984375, "learning_rate": 2.790429522034932e-05, "loss": 0.3702, "step": 10580 }, { "epoch": 0.4646475734656465, "grad_norm": 2.40625, "learning_rate": 2.789742366825372e-05, "loss": 0.3559, "step": 10582 }, { "epoch": 0.46473539194485, "grad_norm": 2.25, "learning_rate": 2.7890551894272726e-05, "loss": 0.3607, "step": 10584 }, { "epoch": 0.4648232104240535, "grad_norm": 1.9453125, "learning_rate": 2.7883679898932575e-05, "loss": 0.3528, "step": 10586 }, { "epoch": 0.464911028903257, "grad_norm": 2.1875, "learning_rate": 2.7876807682759526e-05, "loss": 0.3599, "step": 10588 }, { "epoch": 0.46499884738246045, "grad_norm": 2.203125, "learning_rate": 2.786993524627986e-05, "loss": 0.3848, "step": 10590 }, { "epoch": 0.4650866658616639, "grad_norm": 2.046875, "learning_rate": 2.786306259001986e-05, "loss": 0.3684, "step": 10592 }, { "epoch": 0.46517448434086744, "grad_norm": 2.09375, "learning_rate": 2.7856189714505843e-05, "loss": 0.38, "step": 10594 }, { "epoch": 0.4652623028200709, "grad_norm": 1.9140625, "learning_rate": 2.7849316620264133e-05, "loss": 0.3607, "step": 10596 }, { "epoch": 0.4653501212992744, "grad_norm": 1.8359375, "learning_rate": 2.784244330782107e-05, "loss": 0.3588, "step": 10598 }, { "epoch": 0.46543793977847786, "grad_norm": 2.015625, "learning_rate": 2.783556977770302e-05, "loss": 0.3734, "step": 10600 }, { "epoch": 0.4655257582576814, "grad_norm": 1.984375, "learning_rate": 2.7828696030436353e-05, "loss": 0.3406, "step": 10602 }, { "epoch": 0.46561357673688486, "grad_norm": 2.171875, "learning_rate": 2.782182206654747e-05, "loss": 0.3711, "step": 10604 }, { "epoch": 0.46570139521608833, "grad_norm": 1.8515625, "learning_rate": 2.7814947886562774e-05, "loss": 0.3544, "step": 10606 }, { "epoch": 0.46578921369529186, "grad_norm": 1.6484375, "learning_rate": 2.7808073491008698e-05, "loss": 0.3632, "step": 10608 }, { "epoch": 0.46587703217449533, "grad_norm": 1.8828125, "learning_rate": 2.780119888041168e-05, "loss": 0.3707, "step": 10610 }, { "epoch": 0.4659648506536988, "grad_norm": 2.296875, "learning_rate": 2.7794324055298178e-05, "loss": 0.3833, "step": 10612 }, { "epoch": 0.4660526691329023, "grad_norm": 2.203125, "learning_rate": 2.7787449016194665e-05, "loss": 0.3892, "step": 10614 }, { "epoch": 0.4661404876121058, "grad_norm": 1.78125, "learning_rate": 2.7780573763627648e-05, "loss": 0.3694, "step": 10616 }, { "epoch": 0.4662283060913093, "grad_norm": 1.8671875, "learning_rate": 2.777369829812362e-05, "loss": 0.3798, "step": 10618 }, { "epoch": 0.46631612457051275, "grad_norm": 1.9140625, "learning_rate": 2.776682262020911e-05, "loss": 0.388, "step": 10620 }, { "epoch": 0.4664039430497162, "grad_norm": 2.015625, "learning_rate": 2.7759946730410663e-05, "loss": 0.3559, "step": 10622 }, { "epoch": 0.46649176152891975, "grad_norm": 2.09375, "learning_rate": 2.7753070629254835e-05, "loss": 0.3877, "step": 10624 }, { "epoch": 0.4665795800081232, "grad_norm": 1.7734375, "learning_rate": 2.7746194317268197e-05, "loss": 0.3885, "step": 10626 }, { "epoch": 0.4666673984873267, "grad_norm": 1.7578125, "learning_rate": 2.773931779497735e-05, "loss": 0.3809, "step": 10628 }, { "epoch": 0.46675521696653016, "grad_norm": 1.734375, "learning_rate": 2.7732441062908877e-05, "loss": 0.3759, "step": 10630 }, { "epoch": 0.4668430354457337, "grad_norm": 2.3125, "learning_rate": 2.772556412158941e-05, "loss": 0.3576, "step": 10632 }, { "epoch": 0.46693085392493716, "grad_norm": 1.6953125, "learning_rate": 2.7718686971545592e-05, "loss": 0.3965, "step": 10634 }, { "epoch": 0.46701867240414063, "grad_norm": 1.78125, "learning_rate": 2.771180961330408e-05, "loss": 0.3518, "step": 10636 }, { "epoch": 0.4671064908833441, "grad_norm": 1.7421875, "learning_rate": 2.770493204739153e-05, "loss": 0.3663, "step": 10638 }, { "epoch": 0.46719430936254763, "grad_norm": 1.8203125, "learning_rate": 2.7698054274334645e-05, "loss": 0.3468, "step": 10640 }, { "epoch": 0.4672821278417511, "grad_norm": 1.859375, "learning_rate": 2.7691176294660114e-05, "loss": 0.3605, "step": 10642 }, { "epoch": 0.4673699463209546, "grad_norm": 1.7734375, "learning_rate": 2.768429810889465e-05, "loss": 0.3526, "step": 10644 }, { "epoch": 0.46745776480015805, "grad_norm": 2.078125, "learning_rate": 2.7677419717565e-05, "loss": 0.3834, "step": 10646 }, { "epoch": 0.4675455832793616, "grad_norm": 2.140625, "learning_rate": 2.76705411211979e-05, "loss": 0.3985, "step": 10648 }, { "epoch": 0.46763340175856505, "grad_norm": 1.78125, "learning_rate": 2.766366232032013e-05, "loss": 0.3632, "step": 10650 }, { "epoch": 0.4677212202377685, "grad_norm": 1.7109375, "learning_rate": 2.7656783315458457e-05, "loss": 0.341, "step": 10652 }, { "epoch": 0.467809038716972, "grad_norm": 2.546875, "learning_rate": 2.7649904107139675e-05, "loss": 0.3662, "step": 10654 }, { "epoch": 0.4678968571961755, "grad_norm": 2.3125, "learning_rate": 2.7643024695890613e-05, "loss": 0.3479, "step": 10656 }, { "epoch": 0.467984675675379, "grad_norm": 1.7265625, "learning_rate": 2.7636145082238074e-05, "loss": 0.3481, "step": 10658 }, { "epoch": 0.46807249415458246, "grad_norm": 1.9140625, "learning_rate": 2.762926526670892e-05, "loss": 0.3795, "step": 10660 }, { "epoch": 0.468160312633786, "grad_norm": 1.9296875, "learning_rate": 2.7622385249829997e-05, "loss": 0.3054, "step": 10662 }, { "epoch": 0.46824813111298946, "grad_norm": 2.453125, "learning_rate": 2.761550503212818e-05, "loss": 0.3707, "step": 10664 }, { "epoch": 0.46833594959219293, "grad_norm": 1.8984375, "learning_rate": 2.7608624614130374e-05, "loss": 0.3639, "step": 10666 }, { "epoch": 0.4684237680713964, "grad_norm": 2.265625, "learning_rate": 2.760174399636346e-05, "loss": 0.3831, "step": 10668 }, { "epoch": 0.46851158655059993, "grad_norm": 2.03125, "learning_rate": 2.759486317935437e-05, "loss": 0.3721, "step": 10670 }, { "epoch": 0.4685994050298034, "grad_norm": 1.8984375, "learning_rate": 2.7587982163630033e-05, "loss": 0.365, "step": 10672 }, { "epoch": 0.4686872235090069, "grad_norm": 1.9765625, "learning_rate": 2.7581100949717402e-05, "loss": 0.3823, "step": 10674 }, { "epoch": 0.46877504198821035, "grad_norm": 1.796875, "learning_rate": 2.7574219538143443e-05, "loss": 0.3744, "step": 10676 }, { "epoch": 0.4688628604674139, "grad_norm": 1.8984375, "learning_rate": 2.756733792943514e-05, "loss": 0.3758, "step": 10678 }, { "epoch": 0.46895067894661735, "grad_norm": 1.890625, "learning_rate": 2.7560456124119473e-05, "loss": 0.3831, "step": 10680 }, { "epoch": 0.4690384974258208, "grad_norm": 1.7109375, "learning_rate": 2.7553574122723462e-05, "loss": 0.3569, "step": 10682 }, { "epoch": 0.4691263159050243, "grad_norm": 1.9609375, "learning_rate": 2.7546691925774137e-05, "loss": 0.3486, "step": 10684 }, { "epoch": 0.4692141343842278, "grad_norm": 1.78125, "learning_rate": 2.753980953379854e-05, "loss": 0.3897, "step": 10686 }, { "epoch": 0.4693019528634313, "grad_norm": 1.875, "learning_rate": 2.753292694732371e-05, "loss": 0.3905, "step": 10688 }, { "epoch": 0.46938977134263477, "grad_norm": 1.84375, "learning_rate": 2.7526044166876737e-05, "loss": 0.3352, "step": 10690 }, { "epoch": 0.46947758982183824, "grad_norm": 1.875, "learning_rate": 2.751916119298469e-05, "loss": 0.3845, "step": 10692 }, { "epoch": 0.46956540830104176, "grad_norm": 1.6640625, "learning_rate": 2.751227802617467e-05, "loss": 0.3655, "step": 10694 }, { "epoch": 0.46965322678024524, "grad_norm": 1.7265625, "learning_rate": 2.7505394666973803e-05, "loss": 0.3813, "step": 10696 }, { "epoch": 0.4697410452594487, "grad_norm": 1.9609375, "learning_rate": 2.749851111590921e-05, "loss": 0.4094, "step": 10698 }, { "epoch": 0.4698288637386522, "grad_norm": 1.7890625, "learning_rate": 2.7491627373508035e-05, "loss": 0.3623, "step": 10700 }, { "epoch": 0.4699166822178557, "grad_norm": 1.921875, "learning_rate": 2.7484743440297445e-05, "loss": 0.3805, "step": 10702 }, { "epoch": 0.4700045006970592, "grad_norm": 1.875, "learning_rate": 2.7477859316804604e-05, "loss": 0.3795, "step": 10704 }, { "epoch": 0.47009231917626265, "grad_norm": 1.953125, "learning_rate": 2.7470975003556705e-05, "loss": 0.3744, "step": 10706 }, { "epoch": 0.4701801376554661, "grad_norm": 1.671875, "learning_rate": 2.746409050108094e-05, "loss": 0.3915, "step": 10708 }, { "epoch": 0.47026795613466965, "grad_norm": 1.8828125, "learning_rate": 2.7457205809904534e-05, "loss": 0.3823, "step": 10710 }, { "epoch": 0.4703557746138731, "grad_norm": 1.890625, "learning_rate": 2.7450320930554724e-05, "loss": 0.3686, "step": 10712 }, { "epoch": 0.4704435930930766, "grad_norm": 1.9921875, "learning_rate": 2.7443435863558748e-05, "loss": 0.3657, "step": 10714 }, { "epoch": 0.4705314115722801, "grad_norm": 1.8125, "learning_rate": 2.743655060944387e-05, "loss": 0.3807, "step": 10716 }, { "epoch": 0.4706192300514836, "grad_norm": 2.015625, "learning_rate": 2.7429665168737363e-05, "loss": 0.3675, "step": 10718 }, { "epoch": 0.47070704853068707, "grad_norm": 1.6875, "learning_rate": 2.742277954196651e-05, "loss": 0.3704, "step": 10720 }, { "epoch": 0.47079486700989054, "grad_norm": 2.0, "learning_rate": 2.7415893729658616e-05, "loss": 0.3794, "step": 10722 }, { "epoch": 0.47088268548909407, "grad_norm": 1.90625, "learning_rate": 2.7409007732341004e-05, "loss": 0.3534, "step": 10724 }, { "epoch": 0.47097050396829754, "grad_norm": 2.046875, "learning_rate": 2.740212155054101e-05, "loss": 0.3631, "step": 10726 }, { "epoch": 0.471058322447501, "grad_norm": 1.8515625, "learning_rate": 2.739523518478596e-05, "loss": 0.3783, "step": 10728 }, { "epoch": 0.4711461409267045, "grad_norm": 2.03125, "learning_rate": 2.7388348635603235e-05, "loss": 0.3625, "step": 10730 }, { "epoch": 0.471233959405908, "grad_norm": 1.8671875, "learning_rate": 2.7381461903520194e-05, "loss": 0.3539, "step": 10732 }, { "epoch": 0.4713217778851115, "grad_norm": 1.9296875, "learning_rate": 2.7374574989064234e-05, "loss": 0.3576, "step": 10734 }, { "epoch": 0.47140959636431495, "grad_norm": 1.8125, "learning_rate": 2.7367687892762745e-05, "loss": 0.3955, "step": 10736 }, { "epoch": 0.4714974148435184, "grad_norm": 1.8125, "learning_rate": 2.736080061514315e-05, "loss": 0.3655, "step": 10738 }, { "epoch": 0.47158523332272195, "grad_norm": 1.859375, "learning_rate": 2.7353913156732884e-05, "loss": 0.3624, "step": 10740 }, { "epoch": 0.4716730518019254, "grad_norm": 1.984375, "learning_rate": 2.7347025518059383e-05, "loss": 0.3643, "step": 10742 }, { "epoch": 0.4717608702811289, "grad_norm": 1.7890625, "learning_rate": 2.7340137699650103e-05, "loss": 0.353, "step": 10744 }, { "epoch": 0.47184868876033237, "grad_norm": 2.03125, "learning_rate": 2.7333249702032516e-05, "loss": 0.3639, "step": 10746 }, { "epoch": 0.4719365072395359, "grad_norm": 1.875, "learning_rate": 2.7326361525734112e-05, "loss": 0.3855, "step": 10748 }, { "epoch": 0.47202432571873937, "grad_norm": 1.8125, "learning_rate": 2.731947317128238e-05, "loss": 0.3408, "step": 10750 }, { "epoch": 0.47211214419794284, "grad_norm": 1.7578125, "learning_rate": 2.731258463920484e-05, "loss": 0.3931, "step": 10752 }, { "epoch": 0.4721999626771463, "grad_norm": 1.890625, "learning_rate": 2.730569593002901e-05, "loss": 0.3563, "step": 10754 }, { "epoch": 0.47228778115634984, "grad_norm": 2.0625, "learning_rate": 2.7298807044282438e-05, "loss": 0.3742, "step": 10756 }, { "epoch": 0.4723755996355533, "grad_norm": 2.140625, "learning_rate": 2.7291917982492664e-05, "loss": 0.4072, "step": 10758 }, { "epoch": 0.4724634181147568, "grad_norm": 2.0625, "learning_rate": 2.7285028745187268e-05, "loss": 0.3598, "step": 10760 }, { "epoch": 0.4725512365939603, "grad_norm": 1.8828125, "learning_rate": 2.7278139332893827e-05, "loss": 0.3398, "step": 10762 }, { "epoch": 0.4726390550731638, "grad_norm": 2.0, "learning_rate": 2.727124974613992e-05, "loss": 0.3882, "step": 10764 }, { "epoch": 0.47272687355236725, "grad_norm": 1.75, "learning_rate": 2.7264359985453168e-05, "loss": 0.391, "step": 10766 }, { "epoch": 0.4728146920315707, "grad_norm": 1.7578125, "learning_rate": 2.7257470051361184e-05, "loss": 0.3629, "step": 10768 }, { "epoch": 0.47290251051077425, "grad_norm": 2.0, "learning_rate": 2.72505799443916e-05, "loss": 0.3444, "step": 10770 }, { "epoch": 0.4729903289899777, "grad_norm": 2.390625, "learning_rate": 2.724368966507207e-05, "loss": 0.3814, "step": 10772 }, { "epoch": 0.4730781474691812, "grad_norm": 1.8984375, "learning_rate": 2.7236799213930243e-05, "loss": 0.3891, "step": 10774 }, { "epoch": 0.47316596594838467, "grad_norm": 1.8984375, "learning_rate": 2.7229908591493804e-05, "loss": 0.3767, "step": 10776 }, { "epoch": 0.4732537844275882, "grad_norm": 1.890625, "learning_rate": 2.7223017798290423e-05, "loss": 0.3763, "step": 10778 }, { "epoch": 0.47334160290679167, "grad_norm": 1.8125, "learning_rate": 2.7216126834847805e-05, "loss": 0.3715, "step": 10780 }, { "epoch": 0.47342942138599514, "grad_norm": 1.8203125, "learning_rate": 2.7209235701693663e-05, "loss": 0.3626, "step": 10782 }, { "epoch": 0.4735172398651986, "grad_norm": 1.796875, "learning_rate": 2.7202344399355722e-05, "loss": 0.3342, "step": 10784 }, { "epoch": 0.47360505834440214, "grad_norm": 1.84375, "learning_rate": 2.719545292836172e-05, "loss": 0.3692, "step": 10786 }, { "epoch": 0.4736928768236056, "grad_norm": 1.7421875, "learning_rate": 2.7188561289239413e-05, "loss": 0.3625, "step": 10788 }, { "epoch": 0.4737806953028091, "grad_norm": 1.859375, "learning_rate": 2.7181669482516546e-05, "loss": 0.3626, "step": 10790 }, { "epoch": 0.47386851378201256, "grad_norm": 1.890625, "learning_rate": 2.7174777508720917e-05, "loss": 0.3878, "step": 10792 }, { "epoch": 0.4739563322612161, "grad_norm": 1.8125, "learning_rate": 2.7167885368380304e-05, "loss": 0.347, "step": 10794 }, { "epoch": 0.47404415074041956, "grad_norm": 1.921875, "learning_rate": 2.7160993062022504e-05, "loss": 0.3518, "step": 10796 }, { "epoch": 0.47413196921962303, "grad_norm": 1.953125, "learning_rate": 2.715410059017534e-05, "loss": 0.3369, "step": 10798 }, { "epoch": 0.4742197876988265, "grad_norm": 1.890625, "learning_rate": 2.7147207953366633e-05, "loss": 0.3438, "step": 10800 }, { "epoch": 0.47430760617803003, "grad_norm": 1.859375, "learning_rate": 2.7140315152124233e-05, "loss": 0.3813, "step": 10802 }, { "epoch": 0.4743954246572335, "grad_norm": 1.8828125, "learning_rate": 2.713342218697598e-05, "loss": 0.3613, "step": 10804 }, { "epoch": 0.47448324313643697, "grad_norm": 1.625, "learning_rate": 2.7126529058449747e-05, "loss": 0.3814, "step": 10806 }, { "epoch": 0.47457106161564044, "grad_norm": 2.078125, "learning_rate": 2.711963576707341e-05, "loss": 0.3403, "step": 10808 }, { "epoch": 0.47465888009484397, "grad_norm": 2.0, "learning_rate": 2.7112742313374855e-05, "loss": 0.3498, "step": 10810 }, { "epoch": 0.47474669857404744, "grad_norm": 1.84375, "learning_rate": 2.710584869788199e-05, "loss": 0.3991, "step": 10812 }, { "epoch": 0.4748345170532509, "grad_norm": 1.734375, "learning_rate": 2.7098954921122726e-05, "loss": 0.3418, "step": 10814 }, { "epoch": 0.47492233553245444, "grad_norm": 1.75, "learning_rate": 2.709206098362499e-05, "loss": 0.3639, "step": 10816 }, { "epoch": 0.4750101540116579, "grad_norm": 1.9609375, "learning_rate": 2.7085166885916723e-05, "loss": 0.3521, "step": 10818 }, { "epoch": 0.4750979724908614, "grad_norm": 1.90625, "learning_rate": 2.7078272628525875e-05, "loss": 0.3859, "step": 10820 }, { "epoch": 0.47518579097006486, "grad_norm": 1.78125, "learning_rate": 2.707137821198041e-05, "loss": 0.3664, "step": 10822 }, { "epoch": 0.4752736094492684, "grad_norm": 1.890625, "learning_rate": 2.7064483636808313e-05, "loss": 0.3766, "step": 10824 }, { "epoch": 0.47536142792847186, "grad_norm": 1.7265625, "learning_rate": 2.705758890353756e-05, "loss": 0.3534, "step": 10826 }, { "epoch": 0.47544924640767533, "grad_norm": 1.859375, "learning_rate": 2.7050694012696155e-05, "loss": 0.3678, "step": 10828 }, { "epoch": 0.4755370648868788, "grad_norm": 1.8359375, "learning_rate": 2.7043798964812107e-05, "loss": 0.3803, "step": 10830 }, { "epoch": 0.47562488336608233, "grad_norm": 1.890625, "learning_rate": 2.7036903760413447e-05, "loss": 0.3502, "step": 10832 }, { "epoch": 0.4757127018452858, "grad_norm": 1.9453125, "learning_rate": 2.7030008400028205e-05, "loss": 0.381, "step": 10834 }, { "epoch": 0.4758005203244893, "grad_norm": 1.859375, "learning_rate": 2.702311288418443e-05, "loss": 0.3547, "step": 10836 }, { "epoch": 0.47588833880369275, "grad_norm": 1.734375, "learning_rate": 2.70162172134102e-05, "loss": 0.3324, "step": 10838 }, { "epoch": 0.4759761572828963, "grad_norm": 2.125, "learning_rate": 2.7009321388233567e-05, "loss": 0.3896, "step": 10840 }, { "epoch": 0.47606397576209974, "grad_norm": 2.265625, "learning_rate": 2.700242540918262e-05, "loss": 0.3554, "step": 10842 }, { "epoch": 0.4761517942413032, "grad_norm": 1.9609375, "learning_rate": 2.6995529276785446e-05, "loss": 0.3728, "step": 10844 }, { "epoch": 0.4762396127205067, "grad_norm": 1.7421875, "learning_rate": 2.6988632991570174e-05, "loss": 0.364, "step": 10846 }, { "epoch": 0.4763274311997102, "grad_norm": 1.875, "learning_rate": 2.6981736554064903e-05, "loss": 0.4019, "step": 10848 }, { "epoch": 0.4764152496789137, "grad_norm": 1.9921875, "learning_rate": 2.6974839964797776e-05, "loss": 0.3606, "step": 10850 }, { "epoch": 0.47650306815811716, "grad_norm": 1.828125, "learning_rate": 2.6967943224296927e-05, "loss": 0.3407, "step": 10852 }, { "epoch": 0.47659088663732063, "grad_norm": 1.828125, "learning_rate": 2.6961046333090517e-05, "loss": 0.3656, "step": 10854 }, { "epoch": 0.47667870511652416, "grad_norm": 1.9375, "learning_rate": 2.695414929170671e-05, "loss": 0.3354, "step": 10856 }, { "epoch": 0.47676652359572763, "grad_norm": 1.8984375, "learning_rate": 2.694725210067368e-05, "loss": 0.3991, "step": 10858 }, { "epoch": 0.4768543420749311, "grad_norm": 2.15625, "learning_rate": 2.6940354760519616e-05, "loss": 0.3437, "step": 10860 }, { "epoch": 0.47694216055413463, "grad_norm": 1.9140625, "learning_rate": 2.6933457271772718e-05, "loss": 0.3562, "step": 10862 }, { "epoch": 0.4770299790333381, "grad_norm": 1.921875, "learning_rate": 2.692655963496121e-05, "loss": 0.4094, "step": 10864 }, { "epoch": 0.4771177975125416, "grad_norm": 1.859375, "learning_rate": 2.6919661850613287e-05, "loss": 0.3828, "step": 10866 }, { "epoch": 0.47720561599174505, "grad_norm": 1.8984375, "learning_rate": 2.6912763919257207e-05, "loss": 0.3572, "step": 10868 }, { "epoch": 0.4772934344709486, "grad_norm": 2.28125, "learning_rate": 2.690586584142121e-05, "loss": 0.3654, "step": 10870 }, { "epoch": 0.47738125295015205, "grad_norm": 2.375, "learning_rate": 2.6898967617633546e-05, "loss": 0.3713, "step": 10872 }, { "epoch": 0.4774690714293555, "grad_norm": 1.953125, "learning_rate": 2.6892069248422486e-05, "loss": 0.3445, "step": 10874 }, { "epoch": 0.477556889908559, "grad_norm": 1.8203125, "learning_rate": 2.6885170734316317e-05, "loss": 0.36, "step": 10876 }, { "epoch": 0.4776447083877625, "grad_norm": 1.9765625, "learning_rate": 2.6878272075843313e-05, "loss": 0.334, "step": 10878 }, { "epoch": 0.477732526866966, "grad_norm": 1.875, "learning_rate": 2.6871373273531785e-05, "loss": 0.3442, "step": 10880 }, { "epoch": 0.47782034534616946, "grad_norm": 1.890625, "learning_rate": 2.6864474327910038e-05, "loss": 0.3635, "step": 10882 }, { "epoch": 0.47790816382537293, "grad_norm": 1.6796875, "learning_rate": 2.6857575239506406e-05, "loss": 0.3776, "step": 10884 }, { "epoch": 0.47799598230457646, "grad_norm": 1.6953125, "learning_rate": 2.6850676008849222e-05, "loss": 0.3681, "step": 10886 }, { "epoch": 0.47808380078377993, "grad_norm": 1.78125, "learning_rate": 2.6843776636466828e-05, "loss": 0.3486, "step": 10888 }, { "epoch": 0.4781716192629834, "grad_norm": 1.75, "learning_rate": 2.6836877122887565e-05, "loss": 0.3536, "step": 10890 }, { "epoch": 0.4782594377421869, "grad_norm": 1.7421875, "learning_rate": 2.6829977468639815e-05, "loss": 0.3377, "step": 10892 }, { "epoch": 0.4783472562213904, "grad_norm": 2.4375, "learning_rate": 2.682307767425195e-05, "loss": 0.357, "step": 10894 }, { "epoch": 0.4784350747005939, "grad_norm": 1.96875, "learning_rate": 2.6816177740252362e-05, "loss": 0.3781, "step": 10896 }, { "epoch": 0.47852289317979735, "grad_norm": 1.71875, "learning_rate": 2.6809277667169446e-05, "loss": 0.3761, "step": 10898 }, { "epoch": 0.4786107116590008, "grad_norm": 1.78125, "learning_rate": 2.680237745553162e-05, "loss": 0.3505, "step": 10900 }, { "epoch": 0.47869853013820435, "grad_norm": 1.9765625, "learning_rate": 2.6795477105867295e-05, "loss": 0.3985, "step": 10902 }, { "epoch": 0.4787863486174078, "grad_norm": 2.03125, "learning_rate": 2.6788576618704897e-05, "loss": 0.382, "step": 10904 }, { "epoch": 0.4788741670966113, "grad_norm": 1.6875, "learning_rate": 2.6781675994572876e-05, "loss": 0.3692, "step": 10906 }, { "epoch": 0.47896198557581476, "grad_norm": 2.046875, "learning_rate": 2.677477523399967e-05, "loss": 0.3521, "step": 10908 }, { "epoch": 0.4790498040550183, "grad_norm": 2.328125, "learning_rate": 2.676787433751376e-05, "loss": 0.3662, "step": 10910 }, { "epoch": 0.47913762253422176, "grad_norm": 2.234375, "learning_rate": 2.6760973305643616e-05, "loss": 0.3562, "step": 10912 }, { "epoch": 0.47922544101342524, "grad_norm": 1.75, "learning_rate": 2.6754072138917708e-05, "loss": 0.362, "step": 10914 }, { "epoch": 0.47931325949262876, "grad_norm": 2.265625, "learning_rate": 2.674717083786454e-05, "loss": 0.3507, "step": 10916 }, { "epoch": 0.47940107797183223, "grad_norm": 3.125, "learning_rate": 2.6740269403012595e-05, "loss": 0.3584, "step": 10918 }, { "epoch": 0.4794888964510357, "grad_norm": 1.796875, "learning_rate": 2.6733367834890415e-05, "loss": 0.376, "step": 10920 }, { "epoch": 0.4795767149302392, "grad_norm": 1.75, "learning_rate": 2.6726466134026507e-05, "loss": 0.3462, "step": 10922 }, { "epoch": 0.4796645334094427, "grad_norm": 1.7265625, "learning_rate": 2.6719564300949413e-05, "loss": 0.3701, "step": 10924 }, { "epoch": 0.4797523518886462, "grad_norm": 1.9921875, "learning_rate": 2.6712662336187667e-05, "loss": 0.3719, "step": 10926 }, { "epoch": 0.47984017036784965, "grad_norm": 1.9921875, "learning_rate": 2.6705760240269828e-05, "loss": 0.3555, "step": 10928 }, { "epoch": 0.4799279888470531, "grad_norm": 1.78125, "learning_rate": 2.6698858013724466e-05, "loss": 0.365, "step": 10930 }, { "epoch": 0.48001580732625665, "grad_norm": 1.7734375, "learning_rate": 2.669195565708015e-05, "loss": 0.3934, "step": 10932 }, { "epoch": 0.4801036258054601, "grad_norm": 1.96875, "learning_rate": 2.668505317086546e-05, "loss": 0.3618, "step": 10934 }, { "epoch": 0.4801914442846636, "grad_norm": 2.171875, "learning_rate": 2.6678150555608993e-05, "loss": 0.3615, "step": 10936 }, { "epoch": 0.48027926276386707, "grad_norm": 2.25, "learning_rate": 2.667124781183936e-05, "loss": 0.3968, "step": 10938 }, { "epoch": 0.4803670812430706, "grad_norm": 1.671875, "learning_rate": 2.6664344940085166e-05, "loss": 0.3858, "step": 10940 }, { "epoch": 0.48045489972227406, "grad_norm": 2.046875, "learning_rate": 2.665744194087503e-05, "loss": 0.3488, "step": 10942 }, { "epoch": 0.48054271820147754, "grad_norm": 2.40625, "learning_rate": 2.6650538814737603e-05, "loss": 0.3385, "step": 10944 }, { "epoch": 0.480630536680681, "grad_norm": 1.890625, "learning_rate": 2.6643635562201513e-05, "loss": 0.3834, "step": 10946 }, { "epoch": 0.48071835515988454, "grad_norm": 1.9375, "learning_rate": 2.6636732183795426e-05, "loss": 0.3588, "step": 10948 }, { "epoch": 0.480806173639088, "grad_norm": 1.9140625, "learning_rate": 2.6629828680047998e-05, "loss": 0.3857, "step": 10950 }, { "epoch": 0.4808939921182915, "grad_norm": 2.234375, "learning_rate": 2.6622925051487884e-05, "loss": 0.3765, "step": 10952 }, { "epoch": 0.48098181059749495, "grad_norm": 2.421875, "learning_rate": 2.6616021298643788e-05, "loss": 0.3642, "step": 10954 }, { "epoch": 0.4810696290766985, "grad_norm": 1.8046875, "learning_rate": 2.6609117422044394e-05, "loss": 0.3503, "step": 10956 }, { "epoch": 0.48115744755590195, "grad_norm": 2.125, "learning_rate": 2.66022134222184e-05, "loss": 0.3387, "step": 10958 }, { "epoch": 0.4812452660351054, "grad_norm": 2.109375, "learning_rate": 2.659530929969452e-05, "loss": 0.3796, "step": 10960 }, { "epoch": 0.48133308451430895, "grad_norm": 1.9296875, "learning_rate": 2.658840505500147e-05, "loss": 0.3883, "step": 10962 }, { "epoch": 0.4814209029935124, "grad_norm": 2.4375, "learning_rate": 2.658150068866798e-05, "loss": 0.3505, "step": 10964 }, { "epoch": 0.4815087214727159, "grad_norm": 1.5859375, "learning_rate": 2.657459620122279e-05, "loss": 0.3476, "step": 10966 }, { "epoch": 0.48159653995191937, "grad_norm": 2.046875, "learning_rate": 2.6567691593194644e-05, "loss": 0.3604, "step": 10968 }, { "epoch": 0.4816843584311229, "grad_norm": 1.8828125, "learning_rate": 2.6560786865112296e-05, "loss": 0.3394, "step": 10970 }, { "epoch": 0.48177217691032637, "grad_norm": 1.890625, "learning_rate": 2.6553882017504523e-05, "loss": 0.3632, "step": 10972 }, { "epoch": 0.48185999538952984, "grad_norm": 1.765625, "learning_rate": 2.654697705090009e-05, "loss": 0.3815, "step": 10974 }, { "epoch": 0.4819478138687333, "grad_norm": 1.9296875, "learning_rate": 2.6540071965827784e-05, "loss": 0.3448, "step": 10976 }, { "epoch": 0.48203563234793684, "grad_norm": 1.8671875, "learning_rate": 2.6533166762816404e-05, "loss": 0.3669, "step": 10978 }, { "epoch": 0.4821234508271403, "grad_norm": 2.109375, "learning_rate": 2.6526261442394734e-05, "loss": 0.3838, "step": 10980 }, { "epoch": 0.4822112693063438, "grad_norm": 2.03125, "learning_rate": 2.65193560050916e-05, "loss": 0.3641, "step": 10982 }, { "epoch": 0.48229908778554725, "grad_norm": 1.7421875, "learning_rate": 2.6512450451435822e-05, "loss": 0.3629, "step": 10984 }, { "epoch": 0.4823869062647508, "grad_norm": 2.03125, "learning_rate": 2.6505544781956225e-05, "loss": 0.3732, "step": 10986 }, { "epoch": 0.48247472474395425, "grad_norm": 1.7734375, "learning_rate": 2.6498638997181648e-05, "loss": 0.3918, "step": 10988 }, { "epoch": 0.4825625432231577, "grad_norm": 1.8515625, "learning_rate": 2.649173309764093e-05, "loss": 0.3696, "step": 10990 }, { "epoch": 0.4826503617023612, "grad_norm": 1.8515625, "learning_rate": 2.648482708386294e-05, "loss": 0.3544, "step": 10992 }, { "epoch": 0.4827381801815647, "grad_norm": 1.734375, "learning_rate": 2.647792095637654e-05, "loss": 0.3607, "step": 10994 }, { "epoch": 0.4828259986607682, "grad_norm": 2.140625, "learning_rate": 2.6471014715710595e-05, "loss": 0.3559, "step": 10996 }, { "epoch": 0.48291381713997167, "grad_norm": 1.8515625, "learning_rate": 2.646410836239399e-05, "loss": 0.3294, "step": 10998 }, { "epoch": 0.48300163561917514, "grad_norm": 1.734375, "learning_rate": 2.645720189695562e-05, "loss": 0.3633, "step": 11000 }, { "epoch": 0.48308945409837867, "grad_norm": 1.9140625, "learning_rate": 2.6450295319924374e-05, "loss": 0.359, "step": 11002 }, { "epoch": 0.48317727257758214, "grad_norm": 1.953125, "learning_rate": 2.6443388631829162e-05, "loss": 0.3626, "step": 11004 }, { "epoch": 0.4832650910567856, "grad_norm": 1.84375, "learning_rate": 2.6436481833198905e-05, "loss": 0.3442, "step": 11006 }, { "epoch": 0.4833529095359891, "grad_norm": 1.8203125, "learning_rate": 2.6429574924562534e-05, "loss": 0.3785, "step": 11008 }, { "epoch": 0.4834407280151926, "grad_norm": 1.71875, "learning_rate": 2.642266790644896e-05, "loss": 0.3794, "step": 11010 }, { "epoch": 0.4835285464943961, "grad_norm": 2.015625, "learning_rate": 2.641576077938715e-05, "loss": 0.3482, "step": 11012 }, { "epoch": 0.48361636497359956, "grad_norm": 1.875, "learning_rate": 2.640885354390603e-05, "loss": 0.3612, "step": 11014 }, { "epoch": 0.4837041834528031, "grad_norm": 1.875, "learning_rate": 2.6401946200534567e-05, "loss": 0.3577, "step": 11016 }, { "epoch": 0.48379200193200655, "grad_norm": 1.9296875, "learning_rate": 2.639503874980173e-05, "loss": 0.3491, "step": 11018 }, { "epoch": 0.48387982041121, "grad_norm": 1.78125, "learning_rate": 2.6388131192236493e-05, "loss": 0.372, "step": 11020 }, { "epoch": 0.4839676388904135, "grad_norm": 1.7421875, "learning_rate": 2.6381223528367836e-05, "loss": 0.3848, "step": 11022 }, { "epoch": 0.484055457369617, "grad_norm": 1.8359375, "learning_rate": 2.6374315758724755e-05, "loss": 0.3491, "step": 11024 }, { "epoch": 0.4841432758488205, "grad_norm": 1.859375, "learning_rate": 2.6367407883836238e-05, "loss": 0.3776, "step": 11026 }, { "epoch": 0.48423109432802397, "grad_norm": 1.8359375, "learning_rate": 2.6360499904231297e-05, "loss": 0.3584, "step": 11028 }, { "epoch": 0.48431891280722744, "grad_norm": 1.703125, "learning_rate": 2.6353591820438945e-05, "loss": 0.356, "step": 11030 }, { "epoch": 0.48440673128643097, "grad_norm": 1.8515625, "learning_rate": 2.6346683632988213e-05, "loss": 0.3673, "step": 11032 }, { "epoch": 0.48449454976563444, "grad_norm": 1.8671875, "learning_rate": 2.633977534240812e-05, "loss": 0.3659, "step": 11034 }, { "epoch": 0.4845823682448379, "grad_norm": 1.84375, "learning_rate": 2.6332866949227713e-05, "loss": 0.3441, "step": 11036 }, { "epoch": 0.4846701867240414, "grad_norm": 1.828125, "learning_rate": 2.6325958453976036e-05, "loss": 0.374, "step": 11038 }, { "epoch": 0.4847580052032449, "grad_norm": 1.7265625, "learning_rate": 2.6319049857182144e-05, "loss": 0.3385, "step": 11040 }, { "epoch": 0.4848458236824484, "grad_norm": 2.03125, "learning_rate": 2.631214115937509e-05, "loss": 0.3632, "step": 11042 }, { "epoch": 0.48493364216165186, "grad_norm": 1.859375, "learning_rate": 2.630523236108396e-05, "loss": 0.3257, "step": 11044 }, { "epoch": 0.48502146064085533, "grad_norm": 1.7578125, "learning_rate": 2.629832346283782e-05, "loss": 0.3693, "step": 11046 }, { "epoch": 0.48510927912005886, "grad_norm": 2.15625, "learning_rate": 2.629141446516576e-05, "loss": 0.3815, "step": 11048 }, { "epoch": 0.48519709759926233, "grad_norm": 1.984375, "learning_rate": 2.628450536859687e-05, "loss": 0.3517, "step": 11050 }, { "epoch": 0.4852849160784658, "grad_norm": 1.75, "learning_rate": 2.6277596173660246e-05, "loss": 0.3754, "step": 11052 }, { "epoch": 0.48537273455766927, "grad_norm": 1.75, "learning_rate": 2.627068688088501e-05, "loss": 0.401, "step": 11054 }, { "epoch": 0.4854605530368728, "grad_norm": 1.7578125, "learning_rate": 2.6263777490800263e-05, "loss": 0.3613, "step": 11056 }, { "epoch": 0.48554837151607627, "grad_norm": 1.6875, "learning_rate": 2.6256868003935127e-05, "loss": 0.3688, "step": 11058 }, { "epoch": 0.48563618999527974, "grad_norm": 1.75, "learning_rate": 2.6249958420818744e-05, "loss": 0.3926, "step": 11060 }, { "epoch": 0.48572400847448327, "grad_norm": 1.6796875, "learning_rate": 2.6243048741980246e-05, "loss": 0.3964, "step": 11062 }, { "epoch": 0.48581182695368674, "grad_norm": 1.7578125, "learning_rate": 2.6236138967948776e-05, "loss": 0.3771, "step": 11064 }, { "epoch": 0.4858996454328902, "grad_norm": 1.859375, "learning_rate": 2.622922909925349e-05, "loss": 0.3446, "step": 11066 }, { "epoch": 0.4859874639120937, "grad_norm": 1.9140625, "learning_rate": 2.6222319136423533e-05, "loss": 0.337, "step": 11068 }, { "epoch": 0.4860752823912972, "grad_norm": 1.8125, "learning_rate": 2.62154090799881e-05, "loss": 0.3735, "step": 11070 }, { "epoch": 0.4861631008705007, "grad_norm": 1.7421875, "learning_rate": 2.6208498930476337e-05, "loss": 0.3807, "step": 11072 }, { "epoch": 0.48625091934970416, "grad_norm": 1.671875, "learning_rate": 2.620158868841745e-05, "loss": 0.3486, "step": 11074 }, { "epoch": 0.48633873782890763, "grad_norm": 1.6640625, "learning_rate": 2.6194678354340602e-05, "loss": 0.3378, "step": 11076 }, { "epoch": 0.48642655630811116, "grad_norm": 1.8359375, "learning_rate": 2.6187767928774998e-05, "loss": 0.3508, "step": 11078 }, { "epoch": 0.48651437478731463, "grad_norm": 1.8359375, "learning_rate": 2.6180857412249842e-05, "loss": 0.3512, "step": 11080 }, { "epoch": 0.4866021932665181, "grad_norm": 1.75, "learning_rate": 2.6173946805294348e-05, "loss": 0.338, "step": 11082 }, { "epoch": 0.4866900117457216, "grad_norm": 1.859375, "learning_rate": 2.6167036108437724e-05, "loss": 0.3863, "step": 11084 }, { "epoch": 0.4867778302249251, "grad_norm": 1.7734375, "learning_rate": 2.6160125322209194e-05, "loss": 0.3513, "step": 11086 }, { "epoch": 0.4868656487041286, "grad_norm": 1.8515625, "learning_rate": 2.615321444713799e-05, "loss": 0.3806, "step": 11088 }, { "epoch": 0.48695346718333204, "grad_norm": 2.0, "learning_rate": 2.6146303483753343e-05, "loss": 0.3856, "step": 11090 }, { "epoch": 0.4870412856625355, "grad_norm": 1.71875, "learning_rate": 2.6139392432584504e-05, "loss": 0.3408, "step": 11092 }, { "epoch": 0.48712910414173904, "grad_norm": 1.8203125, "learning_rate": 2.6132481294160715e-05, "loss": 0.3723, "step": 11094 }, { "epoch": 0.4872169226209425, "grad_norm": 2.125, "learning_rate": 2.612557006901124e-05, "loss": 0.3541, "step": 11096 }, { "epoch": 0.487304741100146, "grad_norm": 1.7109375, "learning_rate": 2.6118658757665343e-05, "loss": 0.37, "step": 11098 }, { "epoch": 0.48739255957934946, "grad_norm": 1.9453125, "learning_rate": 2.6111747360652295e-05, "loss": 0.3398, "step": 11100 }, { "epoch": 0.487480378058553, "grad_norm": 1.671875, "learning_rate": 2.6104835878501353e-05, "loss": 0.3591, "step": 11102 }, { "epoch": 0.48756819653775646, "grad_norm": 1.9453125, "learning_rate": 2.609792431174182e-05, "loss": 0.3655, "step": 11104 }, { "epoch": 0.48765601501695993, "grad_norm": 1.7109375, "learning_rate": 2.609101266090298e-05, "loss": 0.3481, "step": 11106 }, { "epoch": 0.4877438334961634, "grad_norm": 1.9765625, "learning_rate": 2.608410092651413e-05, "loss": 0.372, "step": 11108 }, { "epoch": 0.48783165197536693, "grad_norm": 1.9140625, "learning_rate": 2.6077189109104577e-05, "loss": 0.3566, "step": 11110 }, { "epoch": 0.4879194704545704, "grad_norm": 2.09375, "learning_rate": 2.6070277209203613e-05, "loss": 0.352, "step": 11112 }, { "epoch": 0.4880072889337739, "grad_norm": 1.828125, "learning_rate": 2.606336522734057e-05, "loss": 0.3427, "step": 11114 }, { "epoch": 0.4880951074129774, "grad_norm": 1.8203125, "learning_rate": 2.605645316404477e-05, "loss": 0.3579, "step": 11116 }, { "epoch": 0.4881829258921809, "grad_norm": 2.015625, "learning_rate": 2.604954101984552e-05, "loss": 0.3444, "step": 11118 }, { "epoch": 0.48827074437138435, "grad_norm": 1.8671875, "learning_rate": 2.6042628795272174e-05, "loss": 0.345, "step": 11120 }, { "epoch": 0.4883585628505878, "grad_norm": 1.8046875, "learning_rate": 2.6035716490854067e-05, "loss": 0.3319, "step": 11122 }, { "epoch": 0.48844638132979135, "grad_norm": 1.75, "learning_rate": 2.6028804107120535e-05, "loss": 0.3964, "step": 11124 }, { "epoch": 0.4885341998089948, "grad_norm": 1.6875, "learning_rate": 2.6021891644600944e-05, "loss": 0.3471, "step": 11126 }, { "epoch": 0.4886220182881983, "grad_norm": 2.015625, "learning_rate": 2.6014979103824637e-05, "loss": 0.3365, "step": 11128 }, { "epoch": 0.48870983676740176, "grad_norm": 1.640625, "learning_rate": 2.6008066485320998e-05, "loss": 0.3383, "step": 11130 }, { "epoch": 0.4887976552466053, "grad_norm": 2.125, "learning_rate": 2.6001153789619383e-05, "loss": 0.3568, "step": 11132 }, { "epoch": 0.48888547372580876, "grad_norm": 1.8359375, "learning_rate": 2.5994241017249167e-05, "loss": 0.3439, "step": 11134 }, { "epoch": 0.48897329220501223, "grad_norm": 2.1875, "learning_rate": 2.5987328168739745e-05, "loss": 0.3543, "step": 11136 }, { "epoch": 0.4890611106842157, "grad_norm": 1.8984375, "learning_rate": 2.5980415244620486e-05, "loss": 0.3801, "step": 11138 }, { "epoch": 0.48914892916341923, "grad_norm": 1.8359375, "learning_rate": 2.5973502245420793e-05, "loss": 0.3759, "step": 11140 }, { "epoch": 0.4892367476426227, "grad_norm": 1.6953125, "learning_rate": 2.5966589171670064e-05, "loss": 0.3677, "step": 11142 }, { "epoch": 0.4893245661218262, "grad_norm": 1.9453125, "learning_rate": 2.5959676023897706e-05, "loss": 0.3884, "step": 11144 }, { "epoch": 0.48941238460102965, "grad_norm": 1.75, "learning_rate": 2.5952762802633136e-05, "loss": 0.3424, "step": 11146 }, { "epoch": 0.4895002030802332, "grad_norm": 1.734375, "learning_rate": 2.5945849508405755e-05, "loss": 0.3576, "step": 11148 }, { "epoch": 0.48958802155943665, "grad_norm": 1.7421875, "learning_rate": 2.593893614174499e-05, "loss": 0.3672, "step": 11150 }, { "epoch": 0.4896758400386401, "grad_norm": 1.796875, "learning_rate": 2.5932022703180277e-05, "loss": 0.3371, "step": 11152 }, { "epoch": 0.4897636585178436, "grad_norm": 1.6953125, "learning_rate": 2.592510919324103e-05, "loss": 0.3762, "step": 11154 }, { "epoch": 0.4898514769970471, "grad_norm": 1.65625, "learning_rate": 2.591819561245671e-05, "loss": 0.3559, "step": 11156 }, { "epoch": 0.4899392954762506, "grad_norm": 1.8515625, "learning_rate": 2.591128196135675e-05, "loss": 0.3713, "step": 11158 }, { "epoch": 0.49002711395545406, "grad_norm": 2.046875, "learning_rate": 2.5904368240470595e-05, "loss": 0.3572, "step": 11160 }, { "epoch": 0.49011493243465754, "grad_norm": 1.8984375, "learning_rate": 2.5897454450327707e-05, "loss": 0.3401, "step": 11162 }, { "epoch": 0.49020275091386106, "grad_norm": 2.15625, "learning_rate": 2.5890540591457535e-05, "loss": 0.3697, "step": 11164 }, { "epoch": 0.49029056939306453, "grad_norm": 1.7265625, "learning_rate": 2.5883626664389556e-05, "loss": 0.3574, "step": 11166 }, { "epoch": 0.490378387872268, "grad_norm": 1.7578125, "learning_rate": 2.5876712669653232e-05, "loss": 0.3785, "step": 11168 }, { "epoch": 0.49046620635147153, "grad_norm": 1.7578125, "learning_rate": 2.586979860777804e-05, "loss": 0.3473, "step": 11170 }, { "epoch": 0.490554024830675, "grad_norm": 1.609375, "learning_rate": 2.586288447929347e-05, "loss": 0.349, "step": 11172 }, { "epoch": 0.4906418433098785, "grad_norm": 1.8671875, "learning_rate": 2.5855970284728993e-05, "loss": 0.3541, "step": 11174 }, { "epoch": 0.49072966178908195, "grad_norm": 1.8515625, "learning_rate": 2.584905602461411e-05, "loss": 0.3555, "step": 11176 }, { "epoch": 0.4908174802682855, "grad_norm": 1.8125, "learning_rate": 2.5842141699478317e-05, "loss": 0.3449, "step": 11178 }, { "epoch": 0.49090529874748895, "grad_norm": 1.6953125, "learning_rate": 2.58352273098511e-05, "loss": 0.3378, "step": 11180 }, { "epoch": 0.4909931172266924, "grad_norm": 1.7890625, "learning_rate": 2.5828312856261982e-05, "loss": 0.3507, "step": 11182 }, { "epoch": 0.4910809357058959, "grad_norm": 1.90625, "learning_rate": 2.582139833924047e-05, "loss": 0.3718, "step": 11184 }, { "epoch": 0.4911687541850994, "grad_norm": 2.03125, "learning_rate": 2.581448375931607e-05, "loss": 0.3828, "step": 11186 }, { "epoch": 0.4912565726643029, "grad_norm": 1.8515625, "learning_rate": 2.580756911701831e-05, "loss": 0.3782, "step": 11188 }, { "epoch": 0.49134439114350636, "grad_norm": 2.0, "learning_rate": 2.5800654412876713e-05, "loss": 0.3224, "step": 11190 }, { "epoch": 0.49143220962270984, "grad_norm": 1.8671875, "learning_rate": 2.5793739647420817e-05, "loss": 0.3679, "step": 11192 }, { "epoch": 0.49152002810191336, "grad_norm": 1.765625, "learning_rate": 2.578682482118014e-05, "loss": 0.3788, "step": 11194 }, { "epoch": 0.49160784658111684, "grad_norm": 1.9609375, "learning_rate": 2.5779909934684227e-05, "loss": 0.3789, "step": 11196 }, { "epoch": 0.4916956650603203, "grad_norm": 1.9609375, "learning_rate": 2.5772994988462634e-05, "loss": 0.3832, "step": 11198 }, { "epoch": 0.4917834835395238, "grad_norm": 1.8671875, "learning_rate": 2.5766079983044893e-05, "loss": 0.3572, "step": 11200 }, { "epoch": 0.4918713020187273, "grad_norm": 1.703125, "learning_rate": 2.5759164918960567e-05, "loss": 0.3766, "step": 11202 }, { "epoch": 0.4919591204979308, "grad_norm": 1.75, "learning_rate": 2.5752249796739202e-05, "loss": 0.3484, "step": 11204 }, { "epoch": 0.49204693897713425, "grad_norm": 1.9921875, "learning_rate": 2.574533461691037e-05, "loss": 0.3933, "step": 11206 }, { "epoch": 0.4921347574563377, "grad_norm": 2.078125, "learning_rate": 2.5738419380003642e-05, "loss": 0.346, "step": 11208 }, { "epoch": 0.49222257593554125, "grad_norm": 2.28125, "learning_rate": 2.573150408654858e-05, "loss": 0.3779, "step": 11210 }, { "epoch": 0.4923103944147447, "grad_norm": 1.7734375, "learning_rate": 2.572458873707475e-05, "loss": 0.3431, "step": 11212 }, { "epoch": 0.4923982128939482, "grad_norm": 1.765625, "learning_rate": 2.5717673332111748e-05, "loss": 0.3638, "step": 11214 }, { "epoch": 0.4924860313731517, "grad_norm": 1.8515625, "learning_rate": 2.5710757872189145e-05, "loss": 0.3546, "step": 11216 }, { "epoch": 0.4925738498523552, "grad_norm": 1.5859375, "learning_rate": 2.5703842357836537e-05, "loss": 0.3372, "step": 11218 }, { "epoch": 0.49266166833155867, "grad_norm": 1.7421875, "learning_rate": 2.5696926789583513e-05, "loss": 0.3499, "step": 11220 }, { "epoch": 0.49274948681076214, "grad_norm": 2.0, "learning_rate": 2.569001116795967e-05, "loss": 0.3435, "step": 11222 }, { "epoch": 0.49283730528996567, "grad_norm": 1.875, "learning_rate": 2.5683095493494607e-05, "loss": 0.3628, "step": 11224 }, { "epoch": 0.49292512376916914, "grad_norm": 1.8203125, "learning_rate": 2.5676179766717923e-05, "loss": 0.3492, "step": 11226 }, { "epoch": 0.4930129422483726, "grad_norm": 1.6640625, "learning_rate": 2.566926398815923e-05, "loss": 0.3667, "step": 11228 }, { "epoch": 0.4931007607275761, "grad_norm": 1.6484375, "learning_rate": 2.5662348158348142e-05, "loss": 0.3326, "step": 11230 }, { "epoch": 0.4931885792067796, "grad_norm": 1.78125, "learning_rate": 2.5655432277814273e-05, "loss": 0.3653, "step": 11232 }, { "epoch": 0.4932763976859831, "grad_norm": 1.7109375, "learning_rate": 2.5648516347087248e-05, "loss": 0.3682, "step": 11234 }, { "epoch": 0.49336421616518655, "grad_norm": 1.96875, "learning_rate": 2.564160036669668e-05, "loss": 0.3406, "step": 11236 }, { "epoch": 0.49345203464439, "grad_norm": 1.90625, "learning_rate": 2.5634684337172205e-05, "loss": 0.3647, "step": 11238 }, { "epoch": 0.49353985312359355, "grad_norm": 1.8046875, "learning_rate": 2.562776825904345e-05, "loss": 0.3419, "step": 11240 }, { "epoch": 0.493627671602797, "grad_norm": 1.828125, "learning_rate": 2.562085213284005e-05, "loss": 0.3472, "step": 11242 }, { "epoch": 0.4937154900820005, "grad_norm": 1.8125, "learning_rate": 2.5613935959091647e-05, "loss": 0.3834, "step": 11244 }, { "epoch": 0.49380330856120397, "grad_norm": 1.9609375, "learning_rate": 2.5607019738327882e-05, "loss": 0.3647, "step": 11246 }, { "epoch": 0.4938911270404075, "grad_norm": 1.78125, "learning_rate": 2.5600103471078397e-05, "loss": 0.3764, "step": 11248 }, { "epoch": 0.49397894551961097, "grad_norm": 1.8203125, "learning_rate": 2.5593187157872845e-05, "loss": 0.364, "step": 11250 }, { "epoch": 0.49406676399881444, "grad_norm": 1.6953125, "learning_rate": 2.5586270799240876e-05, "loss": 0.3456, "step": 11252 }, { "epoch": 0.4941545824780179, "grad_norm": 1.6640625, "learning_rate": 2.557935439571215e-05, "loss": 0.3279, "step": 11254 }, { "epoch": 0.49424240095722144, "grad_norm": 1.6875, "learning_rate": 2.557243794781633e-05, "loss": 0.3656, "step": 11256 }, { "epoch": 0.4943302194364249, "grad_norm": 1.6953125, "learning_rate": 2.556552145608307e-05, "loss": 0.3393, "step": 11258 }, { "epoch": 0.4944180379156284, "grad_norm": 1.6796875, "learning_rate": 2.5558604921042045e-05, "loss": 0.3982, "step": 11260 }, { "epoch": 0.49450585639483186, "grad_norm": 1.703125, "learning_rate": 2.555168834322292e-05, "loss": 0.3508, "step": 11262 }, { "epoch": 0.4945936748740354, "grad_norm": 1.7890625, "learning_rate": 2.5544771723155365e-05, "loss": 0.3574, "step": 11264 }, { "epoch": 0.49468149335323885, "grad_norm": 1.703125, "learning_rate": 2.553785506136906e-05, "loss": 0.3464, "step": 11266 }, { "epoch": 0.4947693118324423, "grad_norm": 1.8125, "learning_rate": 2.553093835839369e-05, "loss": 0.3589, "step": 11268 }, { "epoch": 0.49485713031164585, "grad_norm": 1.78125, "learning_rate": 2.5524021614758934e-05, "loss": 0.3419, "step": 11270 }, { "epoch": 0.4949449487908493, "grad_norm": 1.734375, "learning_rate": 2.551710483099448e-05, "loss": 0.3627, "step": 11272 }, { "epoch": 0.4950327672700528, "grad_norm": 1.6796875, "learning_rate": 2.551018800763001e-05, "loss": 0.3796, "step": 11274 }, { "epoch": 0.49512058574925627, "grad_norm": 1.625, "learning_rate": 2.5503271145195217e-05, "loss": 0.3389, "step": 11276 }, { "epoch": 0.4952084042284598, "grad_norm": 1.609375, "learning_rate": 2.5496354244219805e-05, "loss": 0.3406, "step": 11278 }, { "epoch": 0.49529622270766327, "grad_norm": 1.734375, "learning_rate": 2.548943730523346e-05, "loss": 0.3669, "step": 11280 }, { "epoch": 0.49538404118686674, "grad_norm": 1.7890625, "learning_rate": 2.5482520328765898e-05, "loss": 0.3812, "step": 11282 }, { "epoch": 0.4954718596660702, "grad_norm": 1.8125, "learning_rate": 2.5475603315346807e-05, "loss": 0.3608, "step": 11284 }, { "epoch": 0.49555967814527374, "grad_norm": 1.765625, "learning_rate": 2.5468686265505903e-05, "loss": 0.3618, "step": 11286 }, { "epoch": 0.4956474966244772, "grad_norm": 1.7265625, "learning_rate": 2.5461769179772886e-05, "loss": 0.3699, "step": 11288 }, { "epoch": 0.4957353151036807, "grad_norm": 1.84375, "learning_rate": 2.545485205867748e-05, "loss": 0.3482, "step": 11290 }, { "epoch": 0.49582313358288416, "grad_norm": 1.765625, "learning_rate": 2.5447934902749393e-05, "loss": 0.3649, "step": 11292 }, { "epoch": 0.4959109520620877, "grad_norm": 2.078125, "learning_rate": 2.5441017712518337e-05, "loss": 0.3537, "step": 11294 }, { "epoch": 0.49599877054129116, "grad_norm": 2.0, "learning_rate": 2.5434100488514053e-05, "loss": 0.3684, "step": 11296 }, { "epoch": 0.49608658902049463, "grad_norm": 1.8046875, "learning_rate": 2.542718323126624e-05, "loss": 0.3429, "step": 11298 }, { "epoch": 0.4961744074996981, "grad_norm": 1.6796875, "learning_rate": 2.542026594130464e-05, "loss": 0.3532, "step": 11300 }, { "epoch": 0.4962622259789016, "grad_norm": 1.796875, "learning_rate": 2.5413348619158967e-05, "loss": 0.3644, "step": 11302 }, { "epoch": 0.4963500444581051, "grad_norm": 1.7734375, "learning_rate": 2.5406431265358955e-05, "loss": 0.3644, "step": 11304 }, { "epoch": 0.49643786293730857, "grad_norm": 1.78125, "learning_rate": 2.5399513880434345e-05, "loss": 0.3695, "step": 11306 }, { "epoch": 0.49652568141651204, "grad_norm": 1.8203125, "learning_rate": 2.5392596464914864e-05, "loss": 0.3724, "step": 11308 }, { "epoch": 0.49661349989571557, "grad_norm": 1.8515625, "learning_rate": 2.5385679019330257e-05, "loss": 0.3812, "step": 11310 }, { "epoch": 0.49670131837491904, "grad_norm": 2.015625, "learning_rate": 2.537876154421025e-05, "loss": 0.3289, "step": 11312 }, { "epoch": 0.4967891368541225, "grad_norm": 1.828125, "learning_rate": 2.537184404008459e-05, "loss": 0.3673, "step": 11314 }, { "epoch": 0.49687695533332604, "grad_norm": 1.9609375, "learning_rate": 2.536492650748304e-05, "loss": 0.3545, "step": 11316 }, { "epoch": 0.4969647738125295, "grad_norm": 2.0, "learning_rate": 2.535800894693532e-05, "loss": 0.3674, "step": 11318 }, { "epoch": 0.497052592291733, "grad_norm": 1.9765625, "learning_rate": 2.535109135897119e-05, "loss": 0.3536, "step": 11320 }, { "epoch": 0.49714041077093646, "grad_norm": 1.7265625, "learning_rate": 2.5344173744120402e-05, "loss": 0.3547, "step": 11322 }, { "epoch": 0.49722822925014, "grad_norm": 1.8359375, "learning_rate": 2.53372561029127e-05, "loss": 0.3864, "step": 11324 }, { "epoch": 0.49731604772934346, "grad_norm": 1.6796875, "learning_rate": 2.533033843587785e-05, "loss": 0.3391, "step": 11326 }, { "epoch": 0.49740386620854693, "grad_norm": 1.890625, "learning_rate": 2.53234207435456e-05, "loss": 0.374, "step": 11328 }, { "epoch": 0.4974916846877504, "grad_norm": 1.796875, "learning_rate": 2.531650302644572e-05, "loss": 0.3793, "step": 11330 }, { "epoch": 0.49757950316695393, "grad_norm": 1.7890625, "learning_rate": 2.530958528510795e-05, "loss": 0.3879, "step": 11332 }, { "epoch": 0.4976673216461574, "grad_norm": 1.90625, "learning_rate": 2.530266752006208e-05, "loss": 0.3623, "step": 11334 }, { "epoch": 0.4977551401253609, "grad_norm": 1.9296875, "learning_rate": 2.529574973183785e-05, "loss": 0.3668, "step": 11336 }, { "epoch": 0.49784295860456435, "grad_norm": 2.21875, "learning_rate": 2.5288831920965028e-05, "loss": 0.3766, "step": 11338 }, { "epoch": 0.4979307770837679, "grad_norm": 2.171875, "learning_rate": 2.5281914087973395e-05, "loss": 0.375, "step": 11340 }, { "epoch": 0.49801859556297134, "grad_norm": 1.6484375, "learning_rate": 2.5274996233392712e-05, "loss": 0.3424, "step": 11342 }, { "epoch": 0.4981064140421748, "grad_norm": 1.8359375, "learning_rate": 2.5268078357752757e-05, "loss": 0.3336, "step": 11344 }, { "epoch": 0.4981942325213783, "grad_norm": 1.984375, "learning_rate": 2.52611604615833e-05, "loss": 0.3356, "step": 11346 }, { "epoch": 0.4982820510005818, "grad_norm": 1.8046875, "learning_rate": 2.5254242545414104e-05, "loss": 0.3336, "step": 11348 }, { "epoch": 0.4983698694797853, "grad_norm": 2.171875, "learning_rate": 2.5247324609774953e-05, "loss": 0.3769, "step": 11350 }, { "epoch": 0.49845768795898876, "grad_norm": 1.9375, "learning_rate": 2.524040665519562e-05, "loss": 0.3574, "step": 11352 }, { "epoch": 0.49854550643819223, "grad_norm": 1.734375, "learning_rate": 2.523348868220589e-05, "loss": 0.3693, "step": 11354 }, { "epoch": 0.49863332491739576, "grad_norm": 1.8671875, "learning_rate": 2.522657069133555e-05, "loss": 0.3505, "step": 11356 }, { "epoch": 0.49872114339659923, "grad_norm": 1.9453125, "learning_rate": 2.5219652683114363e-05, "loss": 0.3886, "step": 11358 }, { "epoch": 0.4988089618758027, "grad_norm": 2.234375, "learning_rate": 2.5212734658072124e-05, "loss": 0.384, "step": 11360 }, { "epoch": 0.4988967803550062, "grad_norm": 1.7734375, "learning_rate": 2.520581661673862e-05, "loss": 0.3613, "step": 11362 }, { "epoch": 0.4989845988342097, "grad_norm": 1.8515625, "learning_rate": 2.5198898559643618e-05, "loss": 0.3673, "step": 11364 }, { "epoch": 0.4990724173134132, "grad_norm": 1.953125, "learning_rate": 2.5191980487316924e-05, "loss": 0.353, "step": 11366 }, { "epoch": 0.49916023579261665, "grad_norm": 1.9609375, "learning_rate": 2.5185062400288316e-05, "loss": 0.3536, "step": 11368 }, { "epoch": 0.4992480542718202, "grad_norm": 1.8359375, "learning_rate": 2.517814429908759e-05, "loss": 0.35, "step": 11370 }, { "epoch": 0.49933587275102365, "grad_norm": 1.8125, "learning_rate": 2.5171226184244525e-05, "loss": 0.3362, "step": 11372 }, { "epoch": 0.4994236912302271, "grad_norm": 1.7734375, "learning_rate": 2.5164308056288915e-05, "loss": 0.3457, "step": 11374 }, { "epoch": 0.4995115097094306, "grad_norm": 1.828125, "learning_rate": 2.515738991575057e-05, "loss": 0.3596, "step": 11376 }, { "epoch": 0.4995993281886341, "grad_norm": 2.078125, "learning_rate": 2.515047176315925e-05, "loss": 0.3743, "step": 11378 }, { "epoch": 0.4996871466678376, "grad_norm": 2.140625, "learning_rate": 2.5143553599044773e-05, "loss": 0.3283, "step": 11380 }, { "epoch": 0.49977496514704106, "grad_norm": 1.8203125, "learning_rate": 2.513663542393693e-05, "loss": 0.3711, "step": 11382 }, { "epoch": 0.49986278362624453, "grad_norm": 2.296875, "learning_rate": 2.5129717238365508e-05, "loss": 0.3386, "step": 11384 }, { "epoch": 0.49995060210544806, "grad_norm": 1.8125, "learning_rate": 2.512279904286031e-05, "loss": 0.3563, "step": 11386 }, { "epoch": 0.5000384205846515, "grad_norm": 1.890625, "learning_rate": 2.5115880837951134e-05, "loss": 0.3429, "step": 11388 }, { "epoch": 0.500126239063855, "grad_norm": 1.84375, "learning_rate": 2.510896262416777e-05, "loss": 0.3711, "step": 11390 }, { "epoch": 0.5002140575430585, "grad_norm": 1.671875, "learning_rate": 2.5102044402040033e-05, "loss": 0.3793, "step": 11392 }, { "epoch": 0.500301876022262, "grad_norm": 1.9296875, "learning_rate": 2.5095126172097705e-05, "loss": 0.3527, "step": 11394 }, { "epoch": 0.5003896945014654, "grad_norm": 1.6953125, "learning_rate": 2.5088207934870593e-05, "loss": 0.3562, "step": 11396 }, { "epoch": 0.500477512980669, "grad_norm": 1.9609375, "learning_rate": 2.5081289690888495e-05, "loss": 0.3647, "step": 11398 }, { "epoch": 0.5005653314598725, "grad_norm": 1.734375, "learning_rate": 2.507437144068121e-05, "loss": 0.344, "step": 11400 }, { "epoch": 0.500653149939076, "grad_norm": 1.8515625, "learning_rate": 2.5067453184778545e-05, "loss": 0.3791, "step": 11402 }, { "epoch": 0.5007409684182794, "grad_norm": 1.953125, "learning_rate": 2.5060534923710298e-05, "loss": 0.3648, "step": 11404 }, { "epoch": 0.5008287868974829, "grad_norm": 1.703125, "learning_rate": 2.505361665800628e-05, "loss": 0.3255, "step": 11406 }, { "epoch": 0.5009166053766864, "grad_norm": 1.75, "learning_rate": 2.5046698388196284e-05, "loss": 0.3838, "step": 11408 }, { "epoch": 0.5010044238558898, "grad_norm": 1.6796875, "learning_rate": 2.50397801148101e-05, "loss": 0.3467, "step": 11410 }, { "epoch": 0.5010922423350933, "grad_norm": 1.8984375, "learning_rate": 2.5032861838377557e-05, "loss": 0.3475, "step": 11412 }, { "epoch": 0.5011800608142969, "grad_norm": 1.734375, "learning_rate": 2.5025943559428444e-05, "loss": 0.3663, "step": 11414 }, { "epoch": 0.5012678792935004, "grad_norm": 1.7265625, "learning_rate": 2.5019025278492565e-05, "loss": 0.3312, "step": 11416 }, { "epoch": 0.5013556977727038, "grad_norm": 1.7265625, "learning_rate": 2.501210699609973e-05, "loss": 0.3312, "step": 11418 }, { "epoch": 0.5014435162519073, "grad_norm": 1.90625, "learning_rate": 2.5005188712779736e-05, "loss": 0.3418, "step": 11420 }, { "epoch": 0.5015313347311108, "grad_norm": 1.796875, "learning_rate": 2.4998270429062394e-05, "loss": 0.3551, "step": 11422 }, { "epoch": 0.5016191532103143, "grad_norm": 1.7265625, "learning_rate": 2.4991352145477497e-05, "loss": 0.3624, "step": 11424 }, { "epoch": 0.5017069716895177, "grad_norm": 1.8671875, "learning_rate": 2.498443386255485e-05, "loss": 0.3742, "step": 11426 }, { "epoch": 0.5017947901687213, "grad_norm": 2.015625, "learning_rate": 2.497751558082427e-05, "loss": 0.3658, "step": 11428 }, { "epoch": 0.5018826086479248, "grad_norm": 1.7734375, "learning_rate": 2.4970597300815545e-05, "loss": 0.3503, "step": 11430 }, { "epoch": 0.5019704271271282, "grad_norm": 1.6875, "learning_rate": 2.4963679023058495e-05, "loss": 0.3415, "step": 11432 }, { "epoch": 0.5020582456063317, "grad_norm": 1.9140625, "learning_rate": 2.495676074808291e-05, "loss": 0.3646, "step": 11434 }, { "epoch": 0.5021460640855352, "grad_norm": 2.0625, "learning_rate": 2.4949842476418592e-05, "loss": 0.3604, "step": 11436 }, { "epoch": 0.5022338825647387, "grad_norm": 1.8359375, "learning_rate": 2.4942924208595352e-05, "loss": 0.371, "step": 11438 }, { "epoch": 0.5023217010439421, "grad_norm": 1.859375, "learning_rate": 2.493600594514299e-05, "loss": 0.3412, "step": 11440 }, { "epoch": 0.5024095195231456, "grad_norm": 1.7734375, "learning_rate": 2.4929087686591317e-05, "loss": 0.344, "step": 11442 }, { "epoch": 0.5024973380023492, "grad_norm": 1.6875, "learning_rate": 2.4922169433470112e-05, "loss": 0.3685, "step": 11444 }, { "epoch": 0.5025851564815527, "grad_norm": 1.9765625, "learning_rate": 2.491525118630921e-05, "loss": 0.3741, "step": 11446 }, { "epoch": 0.5026729749607561, "grad_norm": 2.125, "learning_rate": 2.4908332945638378e-05, "loss": 0.3448, "step": 11448 }, { "epoch": 0.5027607934399596, "grad_norm": 1.8046875, "learning_rate": 2.4901414711987432e-05, "loss": 0.3645, "step": 11450 }, { "epoch": 0.5028486119191631, "grad_norm": 1.78125, "learning_rate": 2.489449648588617e-05, "loss": 0.3583, "step": 11452 }, { "epoch": 0.5029364303983666, "grad_norm": 1.625, "learning_rate": 2.4887578267864393e-05, "loss": 0.341, "step": 11454 }, { "epoch": 0.50302424887757, "grad_norm": 1.703125, "learning_rate": 2.4880660058451895e-05, "loss": 0.3411, "step": 11456 }, { "epoch": 0.5031120673567735, "grad_norm": 1.828125, "learning_rate": 2.487374185817849e-05, "loss": 0.3633, "step": 11458 }, { "epoch": 0.5031998858359771, "grad_norm": 1.8203125, "learning_rate": 2.4866823667573954e-05, "loss": 0.3506, "step": 11460 }, { "epoch": 0.5032877043151806, "grad_norm": 2.0, "learning_rate": 2.4859905487168095e-05, "loss": 0.3882, "step": 11462 }, { "epoch": 0.503375522794384, "grad_norm": 1.78125, "learning_rate": 2.485298731749071e-05, "loss": 0.3581, "step": 11464 }, { "epoch": 0.5034633412735875, "grad_norm": 1.671875, "learning_rate": 2.4846069159071586e-05, "loss": 0.3581, "step": 11466 }, { "epoch": 0.503551159752791, "grad_norm": 2.328125, "learning_rate": 2.4839151012440532e-05, "loss": 0.356, "step": 11468 }, { "epoch": 0.5036389782319944, "grad_norm": 1.734375, "learning_rate": 2.4832232878127327e-05, "loss": 0.3814, "step": 11470 }, { "epoch": 0.5037267967111979, "grad_norm": 1.9765625, "learning_rate": 2.4825314756661765e-05, "loss": 0.3626, "step": 11472 }, { "epoch": 0.5038146151904015, "grad_norm": 1.6796875, "learning_rate": 2.4818396648573645e-05, "loss": 0.3471, "step": 11474 }, { "epoch": 0.503902433669605, "grad_norm": 1.765625, "learning_rate": 2.4811478554392762e-05, "loss": 0.3433, "step": 11476 }, { "epoch": 0.5039902521488084, "grad_norm": 1.6171875, "learning_rate": 2.4804560474648885e-05, "loss": 0.3593, "step": 11478 }, { "epoch": 0.5040780706280119, "grad_norm": 1.6875, "learning_rate": 2.4797642409871816e-05, "loss": 0.3472, "step": 11480 }, { "epoch": 0.5041658891072154, "grad_norm": 1.921875, "learning_rate": 2.479072436059134e-05, "loss": 0.3718, "step": 11482 }, { "epoch": 0.5042537075864189, "grad_norm": 1.6953125, "learning_rate": 2.4783806327337242e-05, "loss": 0.3797, "step": 11484 }, { "epoch": 0.5043415260656223, "grad_norm": 1.78125, "learning_rate": 2.477688831063931e-05, "loss": 0.3475, "step": 11486 }, { "epoch": 0.5044293445448258, "grad_norm": 1.7265625, "learning_rate": 2.476997031102732e-05, "loss": 0.4019, "step": 11488 }, { "epoch": 0.5045171630240294, "grad_norm": 1.7265625, "learning_rate": 2.4763052329031055e-05, "loss": 0.3631, "step": 11490 }, { "epoch": 0.5046049815032329, "grad_norm": 1.796875, "learning_rate": 2.4756134365180306e-05, "loss": 0.3593, "step": 11492 }, { "epoch": 0.5046927999824363, "grad_norm": 1.7734375, "learning_rate": 2.4749216420004852e-05, "loss": 0.357, "step": 11494 }, { "epoch": 0.5047806184616398, "grad_norm": 1.7265625, "learning_rate": 2.4742298494034453e-05, "loss": 0.3796, "step": 11496 }, { "epoch": 0.5048684369408433, "grad_norm": 1.7421875, "learning_rate": 2.4735380587798902e-05, "loss": 0.354, "step": 11498 }, { "epoch": 0.5049562554200467, "grad_norm": 1.734375, "learning_rate": 2.4728462701827968e-05, "loss": 0.3939, "step": 11500 }, { "epoch": 0.5050440738992502, "grad_norm": 1.8515625, "learning_rate": 2.472154483665142e-05, "loss": 0.376, "step": 11502 }, { "epoch": 0.5051318923784537, "grad_norm": 1.78125, "learning_rate": 2.4714626992799047e-05, "loss": 0.3363, "step": 11504 }, { "epoch": 0.5052197108576573, "grad_norm": 2.28125, "learning_rate": 2.4707709170800608e-05, "loss": 0.3781, "step": 11506 }, { "epoch": 0.5053075293368607, "grad_norm": 1.75, "learning_rate": 2.4700791371185868e-05, "loss": 0.3709, "step": 11508 }, { "epoch": 0.5053953478160642, "grad_norm": 1.8125, "learning_rate": 2.469387359448459e-05, "loss": 0.345, "step": 11510 }, { "epoch": 0.5054831662952677, "grad_norm": 1.9296875, "learning_rate": 2.4686955841226546e-05, "loss": 0.3357, "step": 11512 }, { "epoch": 0.5055709847744712, "grad_norm": 1.921875, "learning_rate": 2.4680038111941504e-05, "loss": 0.3312, "step": 11514 }, { "epoch": 0.5056588032536746, "grad_norm": 2.109375, "learning_rate": 2.4673120407159214e-05, "loss": 0.3445, "step": 11516 }, { "epoch": 0.5057466217328781, "grad_norm": 1.828125, "learning_rate": 2.466620272740945e-05, "loss": 0.3678, "step": 11518 }, { "epoch": 0.5058344402120817, "grad_norm": 1.6796875, "learning_rate": 2.4659285073221955e-05, "loss": 0.3549, "step": 11520 }, { "epoch": 0.5059222586912852, "grad_norm": 2.21875, "learning_rate": 2.465236744512649e-05, "loss": 0.3821, "step": 11522 }, { "epoch": 0.5060100771704886, "grad_norm": 2.15625, "learning_rate": 2.464544984365281e-05, "loss": 0.358, "step": 11524 }, { "epoch": 0.5060978956496921, "grad_norm": 1.953125, "learning_rate": 2.4638532269330665e-05, "loss": 0.3459, "step": 11526 }, { "epoch": 0.5061857141288956, "grad_norm": 1.8671875, "learning_rate": 2.4631614722689806e-05, "loss": 0.3657, "step": 11528 }, { "epoch": 0.506273532608099, "grad_norm": 1.7265625, "learning_rate": 2.4624697204259986e-05, "loss": 0.3525, "step": 11530 }, { "epoch": 0.5063613510873025, "grad_norm": 1.90625, "learning_rate": 2.461777971457094e-05, "loss": 0.3466, "step": 11532 }, { "epoch": 0.506449169566506, "grad_norm": 2.0, "learning_rate": 2.461086225415241e-05, "loss": 0.3451, "step": 11534 }, { "epoch": 0.5065369880457096, "grad_norm": 1.6875, "learning_rate": 2.4603944823534146e-05, "loss": 0.3576, "step": 11536 }, { "epoch": 0.506624806524913, "grad_norm": 1.6640625, "learning_rate": 2.4597027423245893e-05, "loss": 0.3392, "step": 11538 }, { "epoch": 0.5067126250041165, "grad_norm": 1.7265625, "learning_rate": 2.4590110053817364e-05, "loss": 0.3546, "step": 11540 }, { "epoch": 0.50680044348332, "grad_norm": 1.921875, "learning_rate": 2.4583192715778304e-05, "loss": 0.3363, "step": 11542 }, { "epoch": 0.5068882619625235, "grad_norm": 1.7890625, "learning_rate": 2.4576275409658454e-05, "loss": 0.359, "step": 11544 }, { "epoch": 0.5069760804417269, "grad_norm": 1.75, "learning_rate": 2.456935813598753e-05, "loss": 0.3529, "step": 11546 }, { "epoch": 0.5070638989209304, "grad_norm": 1.890625, "learning_rate": 2.4562440895295265e-05, "loss": 0.3435, "step": 11548 }, { "epoch": 0.5071517174001339, "grad_norm": 1.8515625, "learning_rate": 2.4555523688111377e-05, "loss": 0.3664, "step": 11550 }, { "epoch": 0.5072395358793375, "grad_norm": 1.9453125, "learning_rate": 2.454860651496559e-05, "loss": 0.3318, "step": 11552 }, { "epoch": 0.5073273543585409, "grad_norm": 1.71875, "learning_rate": 2.4541689376387627e-05, "loss": 0.3551, "step": 11554 }, { "epoch": 0.5074151728377444, "grad_norm": 1.796875, "learning_rate": 2.453477227290721e-05, "loss": 0.3396, "step": 11556 }, { "epoch": 0.5075029913169479, "grad_norm": 1.8125, "learning_rate": 2.452785520505403e-05, "loss": 0.3486, "step": 11558 }, { "epoch": 0.5075908097961513, "grad_norm": 1.8828125, "learning_rate": 2.452093817335782e-05, "loss": 0.357, "step": 11560 }, { "epoch": 0.5076786282753548, "grad_norm": 1.6640625, "learning_rate": 2.4514021178348276e-05, "loss": 0.356, "step": 11562 }, { "epoch": 0.5077664467545583, "grad_norm": 1.7265625, "learning_rate": 2.4507104220555106e-05, "loss": 0.3339, "step": 11564 }, { "epoch": 0.5078542652337618, "grad_norm": 2.015625, "learning_rate": 2.450018730050802e-05, "loss": 0.3823, "step": 11566 }, { "epoch": 0.5079420837129653, "grad_norm": 1.59375, "learning_rate": 2.4493270418736707e-05, "loss": 0.3737, "step": 11568 }, { "epoch": 0.5080299021921688, "grad_norm": 1.953125, "learning_rate": 2.448635357577087e-05, "loss": 0.3471, "step": 11570 }, { "epoch": 0.5081177206713723, "grad_norm": 2.0, "learning_rate": 2.4479436772140195e-05, "loss": 0.3523, "step": 11572 }, { "epoch": 0.5082055391505758, "grad_norm": 2.015625, "learning_rate": 2.4472520008374375e-05, "loss": 0.3427, "step": 11574 }, { "epoch": 0.5082933576297792, "grad_norm": 1.875, "learning_rate": 2.44656032850031e-05, "loss": 0.34, "step": 11576 }, { "epoch": 0.5083811761089827, "grad_norm": 1.9453125, "learning_rate": 2.4458686602556052e-05, "loss": 0.3527, "step": 11578 }, { "epoch": 0.5084689945881862, "grad_norm": 1.734375, "learning_rate": 2.445176996156292e-05, "loss": 0.3461, "step": 11580 }, { "epoch": 0.5085568130673898, "grad_norm": 1.9296875, "learning_rate": 2.444485336255337e-05, "loss": 0.3473, "step": 11582 }, { "epoch": 0.5086446315465932, "grad_norm": 1.703125, "learning_rate": 2.4437936806057082e-05, "loss": 0.3454, "step": 11584 }, { "epoch": 0.5087324500257967, "grad_norm": 1.8828125, "learning_rate": 2.443102029260373e-05, "loss": 0.371, "step": 11586 }, { "epoch": 0.5088202685050002, "grad_norm": 1.7890625, "learning_rate": 2.442410382272298e-05, "loss": 0.3645, "step": 11588 }, { "epoch": 0.5089080869842036, "grad_norm": 1.859375, "learning_rate": 2.4417187396944496e-05, "loss": 0.3277, "step": 11590 }, { "epoch": 0.5089959054634071, "grad_norm": 1.8359375, "learning_rate": 2.4410271015797947e-05, "loss": 0.3338, "step": 11592 }, { "epoch": 0.5090837239426106, "grad_norm": 1.828125, "learning_rate": 2.440335467981298e-05, "loss": 0.3514, "step": 11594 }, { "epoch": 0.5091715424218141, "grad_norm": 1.8125, "learning_rate": 2.439643838951925e-05, "loss": 0.365, "step": 11596 }, { "epoch": 0.5092593609010176, "grad_norm": 1.828125, "learning_rate": 2.438952214544643e-05, "loss": 0.3909, "step": 11598 }, { "epoch": 0.5093471793802211, "grad_norm": 1.9609375, "learning_rate": 2.4382605948124137e-05, "loss": 0.3652, "step": 11600 }, { "epoch": 0.5094349978594246, "grad_norm": 1.75, "learning_rate": 2.437568979808203e-05, "loss": 0.3538, "step": 11602 }, { "epoch": 0.5095228163386281, "grad_norm": 1.7578125, "learning_rate": 2.436877369584975e-05, "loss": 0.3569, "step": 11604 }, { "epoch": 0.5096106348178315, "grad_norm": 1.890625, "learning_rate": 2.436185764195693e-05, "loss": 0.3627, "step": 11606 }, { "epoch": 0.509698453297035, "grad_norm": 1.7890625, "learning_rate": 2.4354941636933203e-05, "loss": 0.3579, "step": 11608 }, { "epoch": 0.5097862717762385, "grad_norm": 1.9765625, "learning_rate": 2.43480256813082e-05, "loss": 0.3583, "step": 11610 }, { "epoch": 0.509874090255442, "grad_norm": 1.7734375, "learning_rate": 2.4341109775611547e-05, "loss": 0.3649, "step": 11612 }, { "epoch": 0.5099619087346455, "grad_norm": 1.84375, "learning_rate": 2.4334193920372867e-05, "loss": 0.3639, "step": 11614 }, { "epoch": 0.510049727213849, "grad_norm": 1.6875, "learning_rate": 2.432727811612178e-05, "loss": 0.3859, "step": 11616 }, { "epoch": 0.5101375456930525, "grad_norm": 1.7578125, "learning_rate": 2.4320362363387893e-05, "loss": 0.3314, "step": 11618 }, { "epoch": 0.510225364172256, "grad_norm": 1.734375, "learning_rate": 2.4313446662700813e-05, "loss": 0.3727, "step": 11620 }, { "epoch": 0.5103131826514594, "grad_norm": 1.8125, "learning_rate": 2.4306531014590156e-05, "loss": 0.3434, "step": 11622 }, { "epoch": 0.5104010011306629, "grad_norm": 1.75, "learning_rate": 2.429961541958552e-05, "loss": 0.3612, "step": 11624 }, { "epoch": 0.5104888196098664, "grad_norm": 1.65625, "learning_rate": 2.4292699878216505e-05, "loss": 0.3464, "step": 11626 }, { "epoch": 0.51057663808907, "grad_norm": 1.75, "learning_rate": 2.4285784391012706e-05, "loss": 0.337, "step": 11628 }, { "epoch": 0.5106644565682734, "grad_norm": 1.6875, "learning_rate": 2.4278868958503708e-05, "loss": 0.3909, "step": 11630 }, { "epoch": 0.5107522750474769, "grad_norm": 1.828125, "learning_rate": 2.42719535812191e-05, "loss": 0.3323, "step": 11632 }, { "epoch": 0.5108400935266804, "grad_norm": 1.6875, "learning_rate": 2.4265038259688456e-05, "loss": 0.3545, "step": 11634 }, { "epoch": 0.5109279120058838, "grad_norm": 1.859375, "learning_rate": 2.425812299444136e-05, "loss": 0.3492, "step": 11636 }, { "epoch": 0.5110157304850873, "grad_norm": 1.6875, "learning_rate": 2.425120778600738e-05, "loss": 0.3437, "step": 11638 }, { "epoch": 0.5111035489642908, "grad_norm": 1.8515625, "learning_rate": 2.424429263491609e-05, "loss": 0.3651, "step": 11640 }, { "epoch": 0.5111913674434942, "grad_norm": 1.6015625, "learning_rate": 2.4237377541697055e-05, "loss": 0.3285, "step": 11642 }, { "epoch": 0.5112791859226978, "grad_norm": 1.78125, "learning_rate": 2.4230462506879824e-05, "loss": 0.338, "step": 11644 }, { "epoch": 0.5113670044019013, "grad_norm": 1.9453125, "learning_rate": 2.4223547530993958e-05, "loss": 0.3836, "step": 11646 }, { "epoch": 0.5114548228811048, "grad_norm": 1.875, "learning_rate": 2.4216632614569012e-05, "loss": 0.382, "step": 11648 }, { "epoch": 0.5115426413603082, "grad_norm": 1.734375, "learning_rate": 2.4209717758134525e-05, "loss": 0.3605, "step": 11650 }, { "epoch": 0.5116304598395117, "grad_norm": 1.7265625, "learning_rate": 2.4202802962220047e-05, "loss": 0.3905, "step": 11652 }, { "epoch": 0.5117182783187152, "grad_norm": 1.9140625, "learning_rate": 2.4195888227355105e-05, "loss": 0.329, "step": 11654 }, { "epoch": 0.5118060967979187, "grad_norm": 1.75, "learning_rate": 2.4188973554069236e-05, "loss": 0.3697, "step": 11656 }, { "epoch": 0.5118939152771221, "grad_norm": 1.7421875, "learning_rate": 2.4182058942891966e-05, "loss": 0.3456, "step": 11658 }, { "epoch": 0.5119817337563257, "grad_norm": 1.828125, "learning_rate": 2.417514439435283e-05, "loss": 0.3294, "step": 11660 }, { "epoch": 0.5120695522355292, "grad_norm": 1.7109375, "learning_rate": 2.416822990898132e-05, "loss": 0.3599, "step": 11662 }, { "epoch": 0.5121573707147327, "grad_norm": 1.8671875, "learning_rate": 2.4161315487306965e-05, "loss": 0.3617, "step": 11664 }, { "epoch": 0.5122451891939361, "grad_norm": 1.765625, "learning_rate": 2.415440112985927e-05, "loss": 0.3578, "step": 11666 }, { "epoch": 0.5123330076731396, "grad_norm": 1.7734375, "learning_rate": 2.4147486837167748e-05, "loss": 0.3638, "step": 11668 }, { "epoch": 0.5124208261523431, "grad_norm": 1.9140625, "learning_rate": 2.414057260976188e-05, "loss": 0.3481, "step": 11670 }, { "epoch": 0.5125086446315466, "grad_norm": 1.734375, "learning_rate": 2.413365844817117e-05, "loss": 0.3752, "step": 11672 }, { "epoch": 0.5125964631107501, "grad_norm": 1.8203125, "learning_rate": 2.41267443529251e-05, "loss": 0.353, "step": 11674 }, { "epoch": 0.5126842815899536, "grad_norm": 1.65625, "learning_rate": 2.411983032455316e-05, "loss": 0.3513, "step": 11676 }, { "epoch": 0.5127721000691571, "grad_norm": 1.8515625, "learning_rate": 2.4112916363584828e-05, "loss": 0.3692, "step": 11678 }, { "epoch": 0.5128599185483605, "grad_norm": 1.9609375, "learning_rate": 2.410600247054957e-05, "loss": 0.3459, "step": 11680 }, { "epoch": 0.512947737027564, "grad_norm": 1.7421875, "learning_rate": 2.4099088645976855e-05, "loss": 0.3597, "step": 11682 }, { "epoch": 0.5130355555067675, "grad_norm": 1.921875, "learning_rate": 2.409217489039615e-05, "loss": 0.3649, "step": 11684 }, { "epoch": 0.513123373985971, "grad_norm": 1.7890625, "learning_rate": 2.4085261204336905e-05, "loss": 0.3304, "step": 11686 }, { "epoch": 0.5132111924651744, "grad_norm": 2.0, "learning_rate": 2.407834758832858e-05, "loss": 0.3645, "step": 11688 }, { "epoch": 0.513299010944378, "grad_norm": 2.109375, "learning_rate": 2.4071434042900627e-05, "loss": 0.4048, "step": 11690 }, { "epoch": 0.5133868294235815, "grad_norm": 1.7265625, "learning_rate": 2.4064520568582468e-05, "loss": 0.3575, "step": 11692 }, { "epoch": 0.513474647902785, "grad_norm": 1.84375, "learning_rate": 2.4057607165903557e-05, "loss": 0.3888, "step": 11694 }, { "epoch": 0.5135624663819884, "grad_norm": 2.03125, "learning_rate": 2.4050693835393303e-05, "loss": 0.3731, "step": 11696 }, { "epoch": 0.5136502848611919, "grad_norm": 1.8125, "learning_rate": 2.4043780577581145e-05, "loss": 0.3508, "step": 11698 }, { "epoch": 0.5137381033403954, "grad_norm": 1.734375, "learning_rate": 2.40368673929965e-05, "loss": 0.359, "step": 11700 }, { "epoch": 0.5138259218195989, "grad_norm": 1.8359375, "learning_rate": 2.4029954282168782e-05, "loss": 0.3378, "step": 11702 }, { "epoch": 0.5139137402988023, "grad_norm": 1.6953125, "learning_rate": 2.40230412456274e-05, "loss": 0.3509, "step": 11704 }, { "epoch": 0.5140015587780059, "grad_norm": 1.8984375, "learning_rate": 2.401612828390175e-05, "loss": 0.3493, "step": 11706 }, { "epoch": 0.5140893772572094, "grad_norm": 1.7734375, "learning_rate": 2.4009215397521234e-05, "loss": 0.3398, "step": 11708 }, { "epoch": 0.5141771957364129, "grad_norm": 1.8203125, "learning_rate": 2.4002302587015236e-05, "loss": 0.3232, "step": 11710 }, { "epoch": 0.5142650142156163, "grad_norm": 1.8828125, "learning_rate": 2.399538985291315e-05, "loss": 0.3542, "step": 11712 }, { "epoch": 0.5143528326948198, "grad_norm": 1.75, "learning_rate": 2.3988477195744353e-05, "loss": 0.353, "step": 11714 }, { "epoch": 0.5144406511740233, "grad_norm": 1.8828125, "learning_rate": 2.398156461603821e-05, "loss": 0.3577, "step": 11716 }, { "epoch": 0.5145284696532267, "grad_norm": 1.9296875, "learning_rate": 2.397465211432409e-05, "loss": 0.3709, "step": 11718 }, { "epoch": 0.5146162881324303, "grad_norm": 2.015625, "learning_rate": 2.396773969113136e-05, "loss": 0.3302, "step": 11720 }, { "epoch": 0.5147041066116338, "grad_norm": 1.9140625, "learning_rate": 2.396082734698938e-05, "loss": 0.3461, "step": 11722 }, { "epoch": 0.5147919250908373, "grad_norm": 1.7734375, "learning_rate": 2.3953915082427482e-05, "loss": 0.3456, "step": 11724 }, { "epoch": 0.5148797435700407, "grad_norm": 1.9375, "learning_rate": 2.3947002897975018e-05, "loss": 0.3699, "step": 11726 }, { "epoch": 0.5149675620492442, "grad_norm": 1.7421875, "learning_rate": 2.3940090794161324e-05, "loss": 0.3854, "step": 11728 }, { "epoch": 0.5150553805284477, "grad_norm": 1.796875, "learning_rate": 2.3933178771515735e-05, "loss": 0.3623, "step": 11730 }, { "epoch": 0.5151431990076512, "grad_norm": 1.8671875, "learning_rate": 2.3926266830567567e-05, "loss": 0.3466, "step": 11732 }, { "epoch": 0.5152310174868546, "grad_norm": 1.734375, "learning_rate": 2.3919354971846143e-05, "loss": 0.3677, "step": 11734 }, { "epoch": 0.5153188359660582, "grad_norm": 1.8125, "learning_rate": 2.3912443195880776e-05, "loss": 0.3551, "step": 11736 }, { "epoch": 0.5154066544452617, "grad_norm": 1.8515625, "learning_rate": 2.3905531503200768e-05, "loss": 0.3481, "step": 11738 }, { "epoch": 0.5154944729244652, "grad_norm": 2.109375, "learning_rate": 2.3898619894335425e-05, "loss": 0.3936, "step": 11740 }, { "epoch": 0.5155822914036686, "grad_norm": 1.75, "learning_rate": 2.3891708369814028e-05, "loss": 0.3743, "step": 11742 }, { "epoch": 0.5156701098828721, "grad_norm": 1.7421875, "learning_rate": 2.3884796930165875e-05, "loss": 0.3537, "step": 11744 }, { "epoch": 0.5157579283620756, "grad_norm": 1.6796875, "learning_rate": 2.3877885575920235e-05, "loss": 0.3446, "step": 11746 }, { "epoch": 0.515845746841279, "grad_norm": 1.8359375, "learning_rate": 2.3870974307606386e-05, "loss": 0.3612, "step": 11748 }, { "epoch": 0.5159335653204825, "grad_norm": 1.90625, "learning_rate": 2.38640631257536e-05, "loss": 0.3155, "step": 11750 }, { "epoch": 0.5160213837996861, "grad_norm": 2.03125, "learning_rate": 2.385715203089114e-05, "loss": 0.3664, "step": 11752 }, { "epoch": 0.5161092022788896, "grad_norm": 1.9140625, "learning_rate": 2.385024102354824e-05, "loss": 0.3454, "step": 11754 }, { "epoch": 0.516197020758093, "grad_norm": 1.6796875, "learning_rate": 2.3843330104254165e-05, "loss": 0.3397, "step": 11756 }, { "epoch": 0.5162848392372965, "grad_norm": 1.796875, "learning_rate": 2.383641927353814e-05, "loss": 0.3574, "step": 11758 }, { "epoch": 0.5163726577165, "grad_norm": 1.7734375, "learning_rate": 2.3829508531929408e-05, "loss": 0.3543, "step": 11760 }, { "epoch": 0.5164604761957035, "grad_norm": 2.0, "learning_rate": 2.3822597879957192e-05, "loss": 0.3621, "step": 11762 }, { "epoch": 0.5165482946749069, "grad_norm": 1.765625, "learning_rate": 2.3815687318150716e-05, "loss": 0.3541, "step": 11764 }, { "epoch": 0.5166361131541104, "grad_norm": 1.765625, "learning_rate": 2.3808776847039187e-05, "loss": 0.3748, "step": 11766 }, { "epoch": 0.516723931633314, "grad_norm": 2.4375, "learning_rate": 2.3801866467151813e-05, "loss": 0.3511, "step": 11768 }, { "epoch": 0.5168117501125175, "grad_norm": 1.75, "learning_rate": 2.3794956179017792e-05, "loss": 0.3926, "step": 11770 }, { "epoch": 0.5168995685917209, "grad_norm": 1.6640625, "learning_rate": 2.3788045983166317e-05, "loss": 0.3538, "step": 11772 }, { "epoch": 0.5169873870709244, "grad_norm": 1.765625, "learning_rate": 2.378113588012657e-05, "loss": 0.334, "step": 11774 }, { "epoch": 0.5170752055501279, "grad_norm": 1.5859375, "learning_rate": 2.377422587042773e-05, "loss": 0.3454, "step": 11776 }, { "epoch": 0.5171630240293313, "grad_norm": 1.578125, "learning_rate": 2.376731595459897e-05, "loss": 0.3581, "step": 11778 }, { "epoch": 0.5172508425085348, "grad_norm": 1.75, "learning_rate": 2.3760406133169443e-05, "loss": 0.3241, "step": 11780 }, { "epoch": 0.5173386609877384, "grad_norm": 1.8203125, "learning_rate": 2.3753496406668325e-05, "loss": 0.3687, "step": 11782 }, { "epoch": 0.5174264794669419, "grad_norm": 1.6015625, "learning_rate": 2.3746586775624738e-05, "loss": 0.3487, "step": 11784 }, { "epoch": 0.5175142979461453, "grad_norm": 1.7265625, "learning_rate": 2.3739677240567836e-05, "loss": 0.349, "step": 11786 }, { "epoch": 0.5176021164253488, "grad_norm": 1.6328125, "learning_rate": 2.3732767802026757e-05, "loss": 0.3642, "step": 11788 }, { "epoch": 0.5176899349045523, "grad_norm": 1.734375, "learning_rate": 2.372585846053062e-05, "loss": 0.3601, "step": 11790 }, { "epoch": 0.5177777533837558, "grad_norm": 2.015625, "learning_rate": 2.3718949216608556e-05, "loss": 0.3388, "step": 11792 }, { "epoch": 0.5178655718629592, "grad_norm": 1.7265625, "learning_rate": 2.371204007078966e-05, "loss": 0.3462, "step": 11794 }, { "epoch": 0.5179533903421627, "grad_norm": 1.7421875, "learning_rate": 2.3705131023603043e-05, "loss": 0.3661, "step": 11796 }, { "epoch": 0.5180412088213663, "grad_norm": 1.78125, "learning_rate": 2.3698222075577805e-05, "loss": 0.3453, "step": 11798 }, { "epoch": 0.5181290273005698, "grad_norm": 1.90625, "learning_rate": 2.3691313227243033e-05, "loss": 0.3558, "step": 11800 }, { "epoch": 0.5182168457797732, "grad_norm": 1.9609375, "learning_rate": 2.3684404479127813e-05, "loss": 0.3436, "step": 11802 }, { "epoch": 0.5183046642589767, "grad_norm": 1.6875, "learning_rate": 2.3677495831761205e-05, "loss": 0.3589, "step": 11804 }, { "epoch": 0.5183924827381802, "grad_norm": 1.703125, "learning_rate": 2.3670587285672284e-05, "loss": 0.3264, "step": 11806 }, { "epoch": 0.5184803012173836, "grad_norm": 1.703125, "learning_rate": 2.366367884139011e-05, "loss": 0.3578, "step": 11808 }, { "epoch": 0.5185681196965871, "grad_norm": 1.6796875, "learning_rate": 2.365677049944373e-05, "loss": 0.3635, "step": 11810 }, { "epoch": 0.5186559381757906, "grad_norm": 1.6953125, "learning_rate": 2.364986226036219e-05, "loss": 0.3389, "step": 11812 }, { "epoch": 0.5187437566549942, "grad_norm": 1.7578125, "learning_rate": 2.3642954124674523e-05, "loss": 0.3554, "step": 11814 }, { "epoch": 0.5188315751341976, "grad_norm": 1.875, "learning_rate": 2.3636046092909754e-05, "loss": 0.3582, "step": 11816 }, { "epoch": 0.5189193936134011, "grad_norm": 1.9140625, "learning_rate": 2.36291381655969e-05, "loss": 0.3479, "step": 11818 }, { "epoch": 0.5190072120926046, "grad_norm": 1.8515625, "learning_rate": 2.362223034326497e-05, "loss": 0.3522, "step": 11820 }, { "epoch": 0.5190950305718081, "grad_norm": 1.9609375, "learning_rate": 2.3615322626442977e-05, "loss": 0.3455, "step": 11822 }, { "epoch": 0.5191828490510115, "grad_norm": 2.109375, "learning_rate": 2.36084150156599e-05, "loss": 0.3522, "step": 11824 }, { "epoch": 0.519270667530215, "grad_norm": 1.8984375, "learning_rate": 2.360150751144474e-05, "loss": 0.3604, "step": 11826 }, { "epoch": 0.5193584860094186, "grad_norm": 1.625, "learning_rate": 2.3594600114326476e-05, "loss": 0.3782, "step": 11828 }, { "epoch": 0.5194463044886221, "grad_norm": 1.6015625, "learning_rate": 2.3587692824834066e-05, "loss": 0.3688, "step": 11830 }, { "epoch": 0.5195341229678255, "grad_norm": 1.8984375, "learning_rate": 2.358078564349648e-05, "loss": 0.3603, "step": 11832 }, { "epoch": 0.519621941447029, "grad_norm": 1.75, "learning_rate": 2.3573878570842664e-05, "loss": 0.3842, "step": 11834 }, { "epoch": 0.5197097599262325, "grad_norm": 1.7890625, "learning_rate": 2.3566971607401574e-05, "loss": 0.3662, "step": 11836 }, { "epoch": 0.519797578405436, "grad_norm": 1.8125, "learning_rate": 2.3560064753702144e-05, "loss": 0.3501, "step": 11838 }, { "epoch": 0.5198853968846394, "grad_norm": 1.625, "learning_rate": 2.3553158010273295e-05, "loss": 0.3431, "step": 11840 }, { "epoch": 0.5199732153638429, "grad_norm": 1.75, "learning_rate": 2.3546251377643955e-05, "loss": 0.3624, "step": 11842 }, { "epoch": 0.5200610338430465, "grad_norm": 1.828125, "learning_rate": 2.353934485634304e-05, "loss": 0.3681, "step": 11844 }, { "epoch": 0.5201488523222499, "grad_norm": 1.8828125, "learning_rate": 2.3532438446899437e-05, "loss": 0.3293, "step": 11846 }, { "epoch": 0.5202366708014534, "grad_norm": 1.75, "learning_rate": 2.3525532149842054e-05, "loss": 0.351, "step": 11848 }, { "epoch": 0.5203244892806569, "grad_norm": 1.6328125, "learning_rate": 2.3518625965699767e-05, "loss": 0.3449, "step": 11850 }, { "epoch": 0.5204123077598604, "grad_norm": 1.8984375, "learning_rate": 2.351171989500146e-05, "loss": 0.3673, "step": 11852 }, { "epoch": 0.5205001262390638, "grad_norm": 1.6875, "learning_rate": 2.3504813938276005e-05, "loss": 0.3409, "step": 11854 }, { "epoch": 0.5205879447182673, "grad_norm": 1.8671875, "learning_rate": 2.349790809605225e-05, "loss": 0.3701, "step": 11856 }, { "epoch": 0.5206757631974708, "grad_norm": 1.859375, "learning_rate": 2.349100236885906e-05, "loss": 0.3485, "step": 11858 }, { "epoch": 0.5207635816766744, "grad_norm": 1.7265625, "learning_rate": 2.3484096757225263e-05, "loss": 0.3583, "step": 11860 }, { "epoch": 0.5208514001558778, "grad_norm": 2.0625, "learning_rate": 2.3477191261679704e-05, "loss": 0.35, "step": 11862 }, { "epoch": 0.5209392186350813, "grad_norm": 1.8125, "learning_rate": 2.3470285882751205e-05, "loss": 0.3593, "step": 11864 }, { "epoch": 0.5210270371142848, "grad_norm": 1.8359375, "learning_rate": 2.3463380620968576e-05, "loss": 0.3636, "step": 11866 }, { "epoch": 0.5211148555934882, "grad_norm": 1.75, "learning_rate": 2.345647547686063e-05, "loss": 0.3752, "step": 11868 }, { "epoch": 0.5212026740726917, "grad_norm": 1.84375, "learning_rate": 2.344957045095616e-05, "loss": 0.3505, "step": 11870 }, { "epoch": 0.5212904925518952, "grad_norm": 2.140625, "learning_rate": 2.3442665543783957e-05, "loss": 0.3737, "step": 11872 }, { "epoch": 0.5213783110310988, "grad_norm": 2.09375, "learning_rate": 2.343576075587281e-05, "loss": 0.3533, "step": 11874 }, { "epoch": 0.5214661295103022, "grad_norm": 1.8359375, "learning_rate": 2.342885608775147e-05, "loss": 0.3613, "step": 11876 }, { "epoch": 0.5215539479895057, "grad_norm": 1.9140625, "learning_rate": 2.342195153994871e-05, "loss": 0.342, "step": 11878 }, { "epoch": 0.5216417664687092, "grad_norm": 1.7421875, "learning_rate": 2.3415047112993274e-05, "loss": 0.3746, "step": 11880 }, { "epoch": 0.5217295849479127, "grad_norm": 1.7734375, "learning_rate": 2.340814280741391e-05, "loss": 0.3362, "step": 11882 }, { "epoch": 0.5218174034271161, "grad_norm": 1.8671875, "learning_rate": 2.3401238623739354e-05, "loss": 0.3482, "step": 11884 }, { "epoch": 0.5219052219063196, "grad_norm": 1.6015625, "learning_rate": 2.3394334562498328e-05, "loss": 0.3655, "step": 11886 }, { "epoch": 0.5219930403855231, "grad_norm": 1.609375, "learning_rate": 2.3387430624219546e-05, "loss": 0.3439, "step": 11888 }, { "epoch": 0.5220808588647267, "grad_norm": 2.109375, "learning_rate": 2.3380526809431717e-05, "loss": 0.3549, "step": 11890 }, { "epoch": 0.5221686773439301, "grad_norm": 1.8515625, "learning_rate": 2.3373623118663528e-05, "loss": 0.3444, "step": 11892 }, { "epoch": 0.5222564958231336, "grad_norm": 1.796875, "learning_rate": 2.336671955244367e-05, "loss": 0.3373, "step": 11894 }, { "epoch": 0.5223443143023371, "grad_norm": 1.734375, "learning_rate": 2.335981611130082e-05, "loss": 0.3576, "step": 11896 }, { "epoch": 0.5224321327815405, "grad_norm": 2.046875, "learning_rate": 2.3352912795763646e-05, "loss": 0.3655, "step": 11898 }, { "epoch": 0.522519951260744, "grad_norm": 2.0, "learning_rate": 2.334600960636081e-05, "loss": 0.376, "step": 11900 }, { "epoch": 0.5226077697399475, "grad_norm": 1.84375, "learning_rate": 2.333910654362095e-05, "loss": 0.3281, "step": 11902 }, { "epoch": 0.522695588219151, "grad_norm": 1.8203125, "learning_rate": 2.333220360807271e-05, "loss": 0.3497, "step": 11904 }, { "epoch": 0.5227834066983545, "grad_norm": 1.828125, "learning_rate": 2.3325300800244726e-05, "loss": 0.3409, "step": 11906 }, { "epoch": 0.522871225177558, "grad_norm": 1.890625, "learning_rate": 2.3318398120665598e-05, "loss": 0.3553, "step": 11908 }, { "epoch": 0.5229590436567615, "grad_norm": 1.6875, "learning_rate": 2.3311495569863945e-05, "loss": 0.3339, "step": 11910 }, { "epoch": 0.523046862135965, "grad_norm": 1.859375, "learning_rate": 2.3304593148368366e-05, "loss": 0.3571, "step": 11912 }, { "epoch": 0.5231346806151684, "grad_norm": 1.8515625, "learning_rate": 2.3297690856707458e-05, "loss": 0.3476, "step": 11914 }, { "epoch": 0.5232224990943719, "grad_norm": 1.8203125, "learning_rate": 2.3290788695409785e-05, "loss": 0.3489, "step": 11916 }, { "epoch": 0.5233103175735754, "grad_norm": 1.6875, "learning_rate": 2.3283886665003924e-05, "loss": 0.3602, "step": 11918 }, { "epoch": 0.5233981360527789, "grad_norm": 1.8671875, "learning_rate": 2.327698476601843e-05, "loss": 0.3553, "step": 11920 }, { "epoch": 0.5234859545319824, "grad_norm": 1.7421875, "learning_rate": 2.327008299898186e-05, "loss": 0.3447, "step": 11922 }, { "epoch": 0.5235737730111859, "grad_norm": 1.7578125, "learning_rate": 2.3263181364422748e-05, "loss": 0.3234, "step": 11924 }, { "epoch": 0.5236615914903894, "grad_norm": 1.828125, "learning_rate": 2.3256279862869626e-05, "loss": 0.3697, "step": 11926 }, { "epoch": 0.5237494099695928, "grad_norm": 1.7265625, "learning_rate": 2.3249378494851008e-05, "loss": 0.3523, "step": 11928 }, { "epoch": 0.5238372284487963, "grad_norm": 1.8203125, "learning_rate": 2.3242477260895404e-05, "loss": 0.356, "step": 11930 }, { "epoch": 0.5239250469279998, "grad_norm": 1.7109375, "learning_rate": 2.323557616153131e-05, "loss": 0.3551, "step": 11932 }, { "epoch": 0.5240128654072033, "grad_norm": 1.8359375, "learning_rate": 2.322867519728722e-05, "loss": 0.3631, "step": 11934 }, { "epoch": 0.5241006838864068, "grad_norm": 1.8046875, "learning_rate": 2.3221774368691616e-05, "loss": 0.34, "step": 11936 }, { "epoch": 0.5241885023656103, "grad_norm": 1.578125, "learning_rate": 2.3214873676272948e-05, "loss": 0.3139, "step": 11938 }, { "epoch": 0.5242763208448138, "grad_norm": 1.765625, "learning_rate": 2.320797312055969e-05, "loss": 0.3523, "step": 11940 }, { "epoch": 0.5243641393240173, "grad_norm": 1.7734375, "learning_rate": 2.3201072702080267e-05, "loss": 0.3513, "step": 11942 }, { "epoch": 0.5244519578032207, "grad_norm": 1.6015625, "learning_rate": 2.3194172421363132e-05, "loss": 0.3215, "step": 11944 }, { "epoch": 0.5245397762824242, "grad_norm": 1.75, "learning_rate": 2.3187272278936705e-05, "loss": 0.3471, "step": 11946 }, { "epoch": 0.5246275947616277, "grad_norm": 1.9921875, "learning_rate": 2.3180372275329404e-05, "loss": 0.3485, "step": 11948 }, { "epoch": 0.5247154132408312, "grad_norm": 1.9140625, "learning_rate": 2.317347241106963e-05, "loss": 0.3585, "step": 11950 }, { "epoch": 0.5248032317200347, "grad_norm": 1.65625, "learning_rate": 2.316657268668578e-05, "loss": 0.3414, "step": 11952 }, { "epoch": 0.5248910501992382, "grad_norm": 1.96875, "learning_rate": 2.315967310270623e-05, "loss": 0.3344, "step": 11954 }, { "epoch": 0.5249788686784417, "grad_norm": 2.1875, "learning_rate": 2.315277365965935e-05, "loss": 0.3659, "step": 11956 }, { "epoch": 0.5250666871576452, "grad_norm": 2.046875, "learning_rate": 2.314587435807351e-05, "loss": 0.3661, "step": 11958 }, { "epoch": 0.5251545056368486, "grad_norm": 2.1875, "learning_rate": 2.3138975198477056e-05, "loss": 0.3642, "step": 11960 }, { "epoch": 0.5252423241160521, "grad_norm": 1.9140625, "learning_rate": 2.3132076181398332e-05, "loss": 0.3845, "step": 11962 }, { "epoch": 0.5253301425952556, "grad_norm": 1.703125, "learning_rate": 2.3125177307365658e-05, "loss": 0.3446, "step": 11964 }, { "epoch": 0.525417961074459, "grad_norm": 1.9296875, "learning_rate": 2.3118278576907366e-05, "loss": 0.3537, "step": 11966 }, { "epoch": 0.5255057795536626, "grad_norm": 2.0625, "learning_rate": 2.3111379990551736e-05, "loss": 0.3397, "step": 11968 }, { "epoch": 0.5255935980328661, "grad_norm": 1.6015625, "learning_rate": 2.3104481548827084e-05, "loss": 0.3742, "step": 11970 }, { "epoch": 0.5256814165120696, "grad_norm": 1.828125, "learning_rate": 2.309758325226169e-05, "loss": 0.3405, "step": 11972 }, { "epoch": 0.525769234991273, "grad_norm": 1.8828125, "learning_rate": 2.3090685101383824e-05, "loss": 0.333, "step": 11974 }, { "epoch": 0.5258570534704765, "grad_norm": 1.7421875, "learning_rate": 2.3083787096721756e-05, "loss": 0.3498, "step": 11976 }, { "epoch": 0.52594487194968, "grad_norm": 1.9765625, "learning_rate": 2.3076889238803727e-05, "loss": 0.3476, "step": 11978 }, { "epoch": 0.5260326904288835, "grad_norm": 1.671875, "learning_rate": 2.3069991528157982e-05, "loss": 0.3544, "step": 11980 }, { "epoch": 0.526120508908087, "grad_norm": 1.90625, "learning_rate": 2.3063093965312747e-05, "loss": 0.3416, "step": 11982 }, { "epoch": 0.5262083273872905, "grad_norm": 1.75, "learning_rate": 2.305619655079624e-05, "loss": 0.3546, "step": 11984 }, { "epoch": 0.526296145866494, "grad_norm": 1.984375, "learning_rate": 2.3049299285136667e-05, "loss": 0.3546, "step": 11986 }, { "epoch": 0.5263839643456975, "grad_norm": 1.796875, "learning_rate": 2.304240216886223e-05, "loss": 0.3563, "step": 11988 }, { "epoch": 0.5264717828249009, "grad_norm": 1.5546875, "learning_rate": 2.3035505202501095e-05, "loss": 0.3371, "step": 11990 }, { "epoch": 0.5265596013041044, "grad_norm": 1.8046875, "learning_rate": 2.3028608386581446e-05, "loss": 0.3759, "step": 11992 }, { "epoch": 0.5266474197833079, "grad_norm": 1.609375, "learning_rate": 2.302171172163144e-05, "loss": 0.3462, "step": 11994 }, { "epoch": 0.5267352382625113, "grad_norm": 1.7890625, "learning_rate": 2.301481520817922e-05, "loss": 0.3657, "step": 11996 }, { "epoch": 0.5268230567417149, "grad_norm": 1.65625, "learning_rate": 2.300791884675294e-05, "loss": 0.3392, "step": 11998 }, { "epoch": 0.5269108752209184, "grad_norm": 1.640625, "learning_rate": 2.3001022637880705e-05, "loss": 0.3366, "step": 12000 }, { "epoch": 0.5269986937001219, "grad_norm": 1.65625, "learning_rate": 2.2994126582090635e-05, "loss": 0.3486, "step": 12002 }, { "epoch": 0.5270865121793253, "grad_norm": 1.71875, "learning_rate": 2.298723067991083e-05, "loss": 0.3687, "step": 12004 }, { "epoch": 0.5271743306585288, "grad_norm": 1.7421875, "learning_rate": 2.2980334931869386e-05, "loss": 0.3494, "step": 12006 }, { "epoch": 0.5272621491377323, "grad_norm": 1.8671875, "learning_rate": 2.2973439338494372e-05, "loss": 0.3455, "step": 12008 }, { "epoch": 0.5273499676169358, "grad_norm": 1.6484375, "learning_rate": 2.296654390031386e-05, "loss": 0.3771, "step": 12010 }, { "epoch": 0.5274377860961392, "grad_norm": 1.671875, "learning_rate": 2.295964861785591e-05, "loss": 0.3492, "step": 12012 }, { "epoch": 0.5275256045753428, "grad_norm": 1.7890625, "learning_rate": 2.295275349164855e-05, "loss": 0.3332, "step": 12014 }, { "epoch": 0.5276134230545463, "grad_norm": 1.6953125, "learning_rate": 2.2945858522219822e-05, "loss": 0.3673, "step": 12016 }, { "epoch": 0.5277012415337498, "grad_norm": 1.734375, "learning_rate": 2.293896371009774e-05, "loss": 0.3439, "step": 12018 }, { "epoch": 0.5277890600129532, "grad_norm": 1.671875, "learning_rate": 2.293206905581031e-05, "loss": 0.3268, "step": 12020 }, { "epoch": 0.5278768784921567, "grad_norm": 1.7265625, "learning_rate": 2.2925174559885526e-05, "loss": 0.3308, "step": 12022 }, { "epoch": 0.5279646969713602, "grad_norm": 2.0, "learning_rate": 2.291828022285138e-05, "loss": 0.3547, "step": 12024 }, { "epoch": 0.5280525154505636, "grad_norm": 1.703125, "learning_rate": 2.2911386045235826e-05, "loss": 0.3416, "step": 12026 }, { "epoch": 0.5281403339297672, "grad_norm": 2.171875, "learning_rate": 2.290449202756684e-05, "loss": 0.3571, "step": 12028 }, { "epoch": 0.5282281524089707, "grad_norm": 2.0625, "learning_rate": 2.2897598170372346e-05, "loss": 0.3452, "step": 12030 }, { "epoch": 0.5283159708881742, "grad_norm": 1.7578125, "learning_rate": 2.289070447418029e-05, "loss": 0.3385, "step": 12032 }, { "epoch": 0.5284037893673776, "grad_norm": 1.6796875, "learning_rate": 2.288381093951859e-05, "loss": 0.3539, "step": 12034 }, { "epoch": 0.5284916078465811, "grad_norm": 1.953125, "learning_rate": 2.2876917566915158e-05, "loss": 0.3663, "step": 12036 }, { "epoch": 0.5285794263257846, "grad_norm": 1.640625, "learning_rate": 2.2870024356897887e-05, "loss": 0.3517, "step": 12038 }, { "epoch": 0.5286672448049881, "grad_norm": 1.765625, "learning_rate": 2.286313130999466e-05, "loss": 0.321, "step": 12040 }, { "epoch": 0.5287550632841915, "grad_norm": 2.09375, "learning_rate": 2.285623842673335e-05, "loss": 0.3667, "step": 12042 }, { "epoch": 0.5288428817633951, "grad_norm": 1.6640625, "learning_rate": 2.2849345707641815e-05, "loss": 0.3561, "step": 12044 }, { "epoch": 0.5289307002425986, "grad_norm": 1.71875, "learning_rate": 2.28424531532479e-05, "loss": 0.3227, "step": 12046 }, { "epoch": 0.5290185187218021, "grad_norm": 1.7578125, "learning_rate": 2.2835560764079446e-05, "loss": 0.3416, "step": 12048 }, { "epoch": 0.5291063372010055, "grad_norm": 1.8828125, "learning_rate": 2.282866854066426e-05, "loss": 0.3529, "step": 12050 }, { "epoch": 0.529194155680209, "grad_norm": 2.125, "learning_rate": 2.282177648353016e-05, "loss": 0.3454, "step": 12052 }, { "epoch": 0.5292819741594125, "grad_norm": 1.7578125, "learning_rate": 2.281488459320494e-05, "loss": 0.3768, "step": 12054 }, { "epoch": 0.5293697926386159, "grad_norm": 1.8828125, "learning_rate": 2.280799287021638e-05, "loss": 0.3357, "step": 12056 }, { "epoch": 0.5294576111178194, "grad_norm": 1.796875, "learning_rate": 2.280110131509226e-05, "loss": 0.3533, "step": 12058 }, { "epoch": 0.529545429597023, "grad_norm": 1.671875, "learning_rate": 2.2794209928360322e-05, "loss": 0.3469, "step": 12060 }, { "epoch": 0.5296332480762265, "grad_norm": 1.875, "learning_rate": 2.2787318710548313e-05, "loss": 0.3259, "step": 12062 }, { "epoch": 0.5297210665554299, "grad_norm": 1.890625, "learning_rate": 2.2780427662183975e-05, "loss": 0.3613, "step": 12064 }, { "epoch": 0.5298088850346334, "grad_norm": 1.859375, "learning_rate": 2.2773536783795012e-05, "loss": 0.3602, "step": 12066 }, { "epoch": 0.5298967035138369, "grad_norm": 1.6015625, "learning_rate": 2.2766646075909137e-05, "loss": 0.3444, "step": 12068 }, { "epoch": 0.5299845219930404, "grad_norm": 1.8046875, "learning_rate": 2.275975553905404e-05, "loss": 0.3097, "step": 12070 }, { "epoch": 0.5300723404722438, "grad_norm": 1.734375, "learning_rate": 2.2752865173757403e-05, "loss": 0.3388, "step": 12072 }, { "epoch": 0.5301601589514474, "grad_norm": 1.78125, "learning_rate": 2.2745974980546893e-05, "loss": 0.3593, "step": 12074 }, { "epoch": 0.5302479774306509, "grad_norm": 1.6484375, "learning_rate": 2.273908495995015e-05, "loss": 0.3607, "step": 12076 }, { "epoch": 0.5303357959098544, "grad_norm": 1.75, "learning_rate": 2.273219511249483e-05, "loss": 0.3437, "step": 12078 }, { "epoch": 0.5304236143890578, "grad_norm": 1.703125, "learning_rate": 2.272530543870855e-05, "loss": 0.3302, "step": 12080 }, { "epoch": 0.5305114328682613, "grad_norm": 1.84375, "learning_rate": 2.2718415939118924e-05, "loss": 0.3521, "step": 12082 }, { "epoch": 0.5305992513474648, "grad_norm": 1.859375, "learning_rate": 2.271152661425355e-05, "loss": 0.3512, "step": 12084 }, { "epoch": 0.5306870698266682, "grad_norm": 1.6640625, "learning_rate": 2.2704637464640026e-05, "loss": 0.3374, "step": 12086 }, { "epoch": 0.5307748883058717, "grad_norm": 1.859375, "learning_rate": 2.2697748490805905e-05, "loss": 0.3573, "step": 12088 }, { "epoch": 0.5308627067850753, "grad_norm": 1.5625, "learning_rate": 2.2690859693278772e-05, "loss": 0.3552, "step": 12090 }, { "epoch": 0.5309505252642788, "grad_norm": 1.8671875, "learning_rate": 2.2683971072586145e-05, "loss": 0.3585, "step": 12092 }, { "epoch": 0.5310383437434822, "grad_norm": 1.828125, "learning_rate": 2.2677082629255566e-05, "loss": 0.3361, "step": 12094 }, { "epoch": 0.5311261622226857, "grad_norm": 1.734375, "learning_rate": 2.2670194363814558e-05, "loss": 0.3917, "step": 12096 }, { "epoch": 0.5312139807018892, "grad_norm": 1.6484375, "learning_rate": 2.2663306276790625e-05, "loss": 0.3448, "step": 12098 }, { "epoch": 0.5313017991810927, "grad_norm": 1.7109375, "learning_rate": 2.265641836871126e-05, "loss": 0.3332, "step": 12100 }, { "epoch": 0.5313896176602961, "grad_norm": 1.765625, "learning_rate": 2.2649530640103936e-05, "loss": 0.3468, "step": 12102 }, { "epoch": 0.5314774361394996, "grad_norm": 1.8671875, "learning_rate": 2.264264309149612e-05, "loss": 0.362, "step": 12104 }, { "epoch": 0.5315652546187032, "grad_norm": 1.7734375, "learning_rate": 2.2635755723415256e-05, "loss": 0.3369, "step": 12106 }, { "epoch": 0.5316530730979067, "grad_norm": 1.9375, "learning_rate": 2.262886853638879e-05, "loss": 0.3461, "step": 12108 }, { "epoch": 0.5317408915771101, "grad_norm": 1.625, "learning_rate": 2.262198153094414e-05, "loss": 0.3503, "step": 12110 }, { "epoch": 0.5318287100563136, "grad_norm": 1.8359375, "learning_rate": 2.2615094707608715e-05, "loss": 0.3132, "step": 12112 }, { "epoch": 0.5319165285355171, "grad_norm": 1.8359375, "learning_rate": 2.2608208066909904e-05, "loss": 0.3519, "step": 12114 }, { "epoch": 0.5320043470147205, "grad_norm": 2.046875, "learning_rate": 2.2601321609375097e-05, "loss": 0.3466, "step": 12116 }, { "epoch": 0.532092165493924, "grad_norm": 2.078125, "learning_rate": 2.2594435335531655e-05, "loss": 0.3607, "step": 12118 }, { "epoch": 0.5321799839731275, "grad_norm": 1.5859375, "learning_rate": 2.258754924590694e-05, "loss": 0.3258, "step": 12120 }, { "epoch": 0.5322678024523311, "grad_norm": 1.859375, "learning_rate": 2.2580663341028273e-05, "loss": 0.3427, "step": 12122 }, { "epoch": 0.5323556209315345, "grad_norm": 1.7734375, "learning_rate": 2.2573777621422985e-05, "loss": 0.3639, "step": 12124 }, { "epoch": 0.532443439410738, "grad_norm": 1.9765625, "learning_rate": 2.25668920876184e-05, "loss": 0.3382, "step": 12126 }, { "epoch": 0.5325312578899415, "grad_norm": 1.734375, "learning_rate": 2.256000674014179e-05, "loss": 0.3362, "step": 12128 }, { "epoch": 0.532619076369145, "grad_norm": 1.78125, "learning_rate": 2.2553121579520454e-05, "loss": 0.3644, "step": 12130 }, { "epoch": 0.5327068948483484, "grad_norm": 1.7578125, "learning_rate": 2.254623660628165e-05, "loss": 0.3392, "step": 12132 }, { "epoch": 0.5327947133275519, "grad_norm": 1.6875, "learning_rate": 2.2539351820952636e-05, "loss": 0.3422, "step": 12134 }, { "epoch": 0.5328825318067555, "grad_norm": 1.890625, "learning_rate": 2.2532467224060656e-05, "loss": 0.3382, "step": 12136 }, { "epoch": 0.532970350285959, "grad_norm": 1.7265625, "learning_rate": 2.2525582816132922e-05, "loss": 0.3704, "step": 12138 }, { "epoch": 0.5330581687651624, "grad_norm": 1.890625, "learning_rate": 2.2518698597696646e-05, "loss": 0.3222, "step": 12140 }, { "epoch": 0.5331459872443659, "grad_norm": 1.640625, "learning_rate": 2.251181456927903e-05, "loss": 0.3759, "step": 12142 }, { "epoch": 0.5332338057235694, "grad_norm": 1.8671875, "learning_rate": 2.2504930731407252e-05, "loss": 0.3332, "step": 12144 }, { "epoch": 0.5333216242027728, "grad_norm": 1.75, "learning_rate": 2.2498047084608477e-05, "loss": 0.343, "step": 12146 }, { "epoch": 0.5334094426819763, "grad_norm": 1.6953125, "learning_rate": 2.2491163629409852e-05, "loss": 0.3348, "step": 12148 }, { "epoch": 0.5334972611611798, "grad_norm": 1.6640625, "learning_rate": 2.2484280366338527e-05, "loss": 0.3569, "step": 12150 }, { "epoch": 0.5335850796403834, "grad_norm": 1.71875, "learning_rate": 2.247739729592161e-05, "loss": 0.357, "step": 12152 }, { "epoch": 0.5336728981195868, "grad_norm": 1.6875, "learning_rate": 2.247051441868621e-05, "loss": 0.3752, "step": 12154 }, { "epoch": 0.5337607165987903, "grad_norm": 1.6328125, "learning_rate": 2.2463631735159422e-05, "loss": 0.3608, "step": 12156 }, { "epoch": 0.5338485350779938, "grad_norm": 1.5546875, "learning_rate": 2.2456749245868323e-05, "loss": 0.3554, "step": 12158 }, { "epoch": 0.5339363535571973, "grad_norm": 1.6328125, "learning_rate": 2.2449866951339976e-05, "loss": 0.3527, "step": 12160 }, { "epoch": 0.5340241720364007, "grad_norm": 1.8671875, "learning_rate": 2.2442984852101435e-05, "loss": 0.3513, "step": 12162 }, { "epoch": 0.5341119905156042, "grad_norm": 1.546875, "learning_rate": 2.2436102948679722e-05, "loss": 0.3651, "step": 12164 }, { "epoch": 0.5341998089948077, "grad_norm": 1.671875, "learning_rate": 2.242922124160186e-05, "loss": 0.3336, "step": 12166 }, { "epoch": 0.5342876274740113, "grad_norm": 1.859375, "learning_rate": 2.2422339731394848e-05, "loss": 0.3534, "step": 12168 }, { "epoch": 0.5343754459532147, "grad_norm": 1.8125, "learning_rate": 2.2415458418585682e-05, "loss": 0.3558, "step": 12170 }, { "epoch": 0.5344632644324182, "grad_norm": 1.8671875, "learning_rate": 2.2408577303701334e-05, "loss": 0.3285, "step": 12172 }, { "epoch": 0.5345510829116217, "grad_norm": 1.9453125, "learning_rate": 2.240169638726875e-05, "loss": 0.3693, "step": 12174 }, { "epoch": 0.5346389013908251, "grad_norm": 1.8515625, "learning_rate": 2.239481566981488e-05, "loss": 0.3854, "step": 12176 }, { "epoch": 0.5347267198700286, "grad_norm": 1.7578125, "learning_rate": 2.2387935151866653e-05, "loss": 0.3369, "step": 12178 }, { "epoch": 0.5348145383492321, "grad_norm": 1.9609375, "learning_rate": 2.2381054833950976e-05, "loss": 0.3574, "step": 12180 }, { "epoch": 0.5349023568284357, "grad_norm": 1.8671875, "learning_rate": 2.2374174716594758e-05, "loss": 0.3278, "step": 12182 }, { "epoch": 0.5349901753076391, "grad_norm": 1.6484375, "learning_rate": 2.2367294800324862e-05, "loss": 0.3384, "step": 12184 }, { "epoch": 0.5350779937868426, "grad_norm": 1.8671875, "learning_rate": 2.236041508566816e-05, "loss": 0.3491, "step": 12186 }, { "epoch": 0.5351658122660461, "grad_norm": 1.734375, "learning_rate": 2.2353535573151506e-05, "loss": 0.3535, "step": 12188 }, { "epoch": 0.5352536307452496, "grad_norm": 1.84375, "learning_rate": 2.234665626330173e-05, "loss": 0.3765, "step": 12190 }, { "epoch": 0.535341449224453, "grad_norm": 1.765625, "learning_rate": 2.233977715664565e-05, "loss": 0.3506, "step": 12192 }, { "epoch": 0.5354292677036565, "grad_norm": 1.578125, "learning_rate": 2.2332898253710077e-05, "loss": 0.3636, "step": 12194 }, { "epoch": 0.53551708618286, "grad_norm": 1.6796875, "learning_rate": 2.2326019555021793e-05, "loss": 0.331, "step": 12196 }, { "epoch": 0.5356049046620636, "grad_norm": 1.6484375, "learning_rate": 2.231914106110758e-05, "loss": 0.3456, "step": 12198 }, { "epoch": 0.535692723141267, "grad_norm": 1.8203125, "learning_rate": 2.231226277249418e-05, "loss": 0.3505, "step": 12200 }, { "epoch": 0.5357805416204705, "grad_norm": 1.90625, "learning_rate": 2.2305384689708342e-05, "loss": 0.3472, "step": 12202 }, { "epoch": 0.535868360099674, "grad_norm": 1.84375, "learning_rate": 2.229850681327679e-05, "loss": 0.3306, "step": 12204 }, { "epoch": 0.5359561785788775, "grad_norm": 1.9375, "learning_rate": 2.2291629143726232e-05, "loss": 0.3345, "step": 12206 }, { "epoch": 0.5360439970580809, "grad_norm": 1.6328125, "learning_rate": 2.228475168158337e-05, "loss": 0.3393, "step": 12208 }, { "epoch": 0.5361318155372844, "grad_norm": 1.671875, "learning_rate": 2.227787442737487e-05, "loss": 0.327, "step": 12210 }, { "epoch": 0.5362196340164879, "grad_norm": 1.8828125, "learning_rate": 2.2270997381627407e-05, "loss": 0.3662, "step": 12212 }, { "epoch": 0.5363074524956915, "grad_norm": 1.6640625, "learning_rate": 2.2264120544867615e-05, "loss": 0.365, "step": 12214 }, { "epoch": 0.5363952709748949, "grad_norm": 1.7265625, "learning_rate": 2.2257243917622124e-05, "loss": 0.3556, "step": 12216 }, { "epoch": 0.5364830894540984, "grad_norm": 1.6875, "learning_rate": 2.2250367500417547e-05, "loss": 0.3615, "step": 12218 }, { "epoch": 0.5365709079333019, "grad_norm": 2.53125, "learning_rate": 2.2243491293780493e-05, "loss": 0.3571, "step": 12220 }, { "epoch": 0.5366587264125053, "grad_norm": 1.6875, "learning_rate": 2.223661529823753e-05, "loss": 0.3442, "step": 12222 }, { "epoch": 0.5367465448917088, "grad_norm": 1.984375, "learning_rate": 2.222973951431524e-05, "loss": 0.3453, "step": 12224 }, { "epoch": 0.5368343633709123, "grad_norm": 1.796875, "learning_rate": 2.2222863942540153e-05, "loss": 0.3537, "step": 12226 }, { "epoch": 0.5369221818501159, "grad_norm": 1.8046875, "learning_rate": 2.2215988583438814e-05, "loss": 0.3566, "step": 12228 }, { "epoch": 0.5370100003293193, "grad_norm": 1.796875, "learning_rate": 2.2209113437537738e-05, "loss": 0.3607, "step": 12230 }, { "epoch": 0.5370978188085228, "grad_norm": 1.75, "learning_rate": 2.220223850536342e-05, "loss": 0.3418, "step": 12232 }, { "epoch": 0.5371856372877263, "grad_norm": 1.625, "learning_rate": 2.2195363787442356e-05, "loss": 0.3311, "step": 12234 }, { "epoch": 0.5372734557669298, "grad_norm": 1.8984375, "learning_rate": 2.2188489284300997e-05, "loss": 0.3293, "step": 12236 }, { "epoch": 0.5373612742461332, "grad_norm": 1.671875, "learning_rate": 2.2181614996465805e-05, "loss": 0.3569, "step": 12238 }, { "epoch": 0.5374490927253367, "grad_norm": 2.015625, "learning_rate": 2.217474092446321e-05, "loss": 0.3676, "step": 12240 }, { "epoch": 0.5375369112045402, "grad_norm": 1.78125, "learning_rate": 2.2167867068819646e-05, "loss": 0.3687, "step": 12242 }, { "epoch": 0.5376247296837438, "grad_norm": 1.7421875, "learning_rate": 2.2160993430061488e-05, "loss": 0.3539, "step": 12244 }, { "epoch": 0.5377125481629472, "grad_norm": 1.703125, "learning_rate": 2.2154120008715135e-05, "loss": 0.3229, "step": 12246 }, { "epoch": 0.5378003666421507, "grad_norm": 1.8203125, "learning_rate": 2.2147246805306955e-05, "loss": 0.3524, "step": 12248 }, { "epoch": 0.5378881851213542, "grad_norm": 1.921875, "learning_rate": 2.21403738203633e-05, "loss": 0.3487, "step": 12250 }, { "epoch": 0.5379760036005576, "grad_norm": 1.8125, "learning_rate": 2.2133501054410503e-05, "loss": 0.3325, "step": 12252 }, { "epoch": 0.5380638220797611, "grad_norm": 1.6875, "learning_rate": 2.2126628507974877e-05, "loss": 0.315, "step": 12254 }, { "epoch": 0.5381516405589646, "grad_norm": 1.7421875, "learning_rate": 2.2119756181582733e-05, "loss": 0.3404, "step": 12256 }, { "epoch": 0.538239459038168, "grad_norm": 1.6484375, "learning_rate": 2.2112884075760347e-05, "loss": 0.3195, "step": 12258 }, { "epoch": 0.5383272775173716, "grad_norm": 1.75, "learning_rate": 2.2106012191033998e-05, "loss": 0.3416, "step": 12260 }, { "epoch": 0.5384150959965751, "grad_norm": 1.6875, "learning_rate": 2.2099140527929926e-05, "loss": 0.3675, "step": 12262 }, { "epoch": 0.5385029144757786, "grad_norm": 1.6015625, "learning_rate": 2.2092269086974367e-05, "loss": 0.3592, "step": 12264 }, { "epoch": 0.538590732954982, "grad_norm": 1.5859375, "learning_rate": 2.2085397868693537e-05, "loss": 0.3565, "step": 12266 }, { "epoch": 0.5386785514341855, "grad_norm": 1.875, "learning_rate": 2.2078526873613637e-05, "loss": 0.3504, "step": 12268 }, { "epoch": 0.538766369913389, "grad_norm": 1.859375, "learning_rate": 2.207165610226085e-05, "loss": 0.3347, "step": 12270 }, { "epoch": 0.5388541883925925, "grad_norm": 1.71875, "learning_rate": 2.2064785555161343e-05, "loss": 0.3376, "step": 12272 }, { "epoch": 0.5389420068717959, "grad_norm": 2.015625, "learning_rate": 2.205791523284127e-05, "loss": 0.366, "step": 12274 }, { "epoch": 0.5390298253509995, "grad_norm": 2.3125, "learning_rate": 2.2051045135826743e-05, "loss": 0.3163, "step": 12276 }, { "epoch": 0.539117643830203, "grad_norm": 1.6953125, "learning_rate": 2.2044175264643884e-05, "loss": 0.3666, "step": 12278 }, { "epoch": 0.5392054623094065, "grad_norm": 1.8671875, "learning_rate": 2.2037305619818792e-05, "loss": 0.3586, "step": 12280 }, { "epoch": 0.5392932807886099, "grad_norm": 2.25, "learning_rate": 2.203043620187755e-05, "loss": 0.3081, "step": 12282 }, { "epoch": 0.5393810992678134, "grad_norm": 1.984375, "learning_rate": 2.2023567011346213e-05, "loss": 0.3113, "step": 12284 }, { "epoch": 0.5394689177470169, "grad_norm": 2.140625, "learning_rate": 2.2016698048750832e-05, "loss": 0.339, "step": 12286 }, { "epoch": 0.5395567362262204, "grad_norm": 1.6953125, "learning_rate": 2.200982931461743e-05, "loss": 0.3853, "step": 12288 }, { "epoch": 0.5396445547054239, "grad_norm": 2.09375, "learning_rate": 2.200296080947201e-05, "loss": 0.3577, "step": 12290 }, { "epoch": 0.5397323731846274, "grad_norm": 1.6484375, "learning_rate": 2.199609253384057e-05, "loss": 0.3771, "step": 12292 }, { "epoch": 0.5398201916638309, "grad_norm": 1.71875, "learning_rate": 2.198922448824908e-05, "loss": 0.342, "step": 12294 }, { "epoch": 0.5399080101430344, "grad_norm": 1.9765625, "learning_rate": 2.1982356673223516e-05, "loss": 0.3599, "step": 12296 }, { "epoch": 0.5399958286222378, "grad_norm": 1.7890625, "learning_rate": 2.1975489089289788e-05, "loss": 0.3807, "step": 12298 }, { "epoch": 0.5400836471014413, "grad_norm": 1.75, "learning_rate": 2.1968621736973836e-05, "loss": 0.3404, "step": 12300 }, { "epoch": 0.5401714655806448, "grad_norm": 1.765625, "learning_rate": 2.1961754616801553e-05, "loss": 0.3492, "step": 12302 }, { "epoch": 0.5402592840598482, "grad_norm": 1.5703125, "learning_rate": 2.1954887729298844e-05, "loss": 0.3583, "step": 12304 }, { "epoch": 0.5403471025390518, "grad_norm": 1.65625, "learning_rate": 2.1948021074991552e-05, "loss": 0.3626, "step": 12306 }, { "epoch": 0.5404349210182553, "grad_norm": 2.015625, "learning_rate": 2.1941154654405536e-05, "loss": 0.3354, "step": 12308 }, { "epoch": 0.5405227394974588, "grad_norm": 1.6796875, "learning_rate": 2.1934288468066627e-05, "loss": 0.3288, "step": 12310 }, { "epoch": 0.5406105579766622, "grad_norm": 1.6484375, "learning_rate": 2.192742251650065e-05, "loss": 0.3443, "step": 12312 }, { "epoch": 0.5406983764558657, "grad_norm": 1.8671875, "learning_rate": 2.1920556800233392e-05, "loss": 0.3248, "step": 12314 }, { "epoch": 0.5407861949350692, "grad_norm": 1.75, "learning_rate": 2.1913691319790627e-05, "loss": 0.3393, "step": 12316 }, { "epoch": 0.5408740134142727, "grad_norm": 1.640625, "learning_rate": 2.190682607569812e-05, "loss": 0.3671, "step": 12318 }, { "epoch": 0.5409618318934761, "grad_norm": 1.6796875, "learning_rate": 2.189996106848162e-05, "loss": 0.3408, "step": 12320 }, { "epoch": 0.5410496503726797, "grad_norm": 1.7265625, "learning_rate": 2.1893096298666844e-05, "loss": 0.3222, "step": 12322 }, { "epoch": 0.5411374688518832, "grad_norm": 1.7109375, "learning_rate": 2.1886231766779495e-05, "loss": 0.3502, "step": 12324 }, { "epoch": 0.5412252873310867, "grad_norm": 1.6875, "learning_rate": 2.1879367473345263e-05, "loss": 0.3585, "step": 12326 }, { "epoch": 0.5413131058102901, "grad_norm": 1.640625, "learning_rate": 2.187250341888982e-05, "loss": 0.3369, "step": 12328 }, { "epoch": 0.5414009242894936, "grad_norm": 1.828125, "learning_rate": 2.186563960393881e-05, "loss": 0.3656, "step": 12330 }, { "epoch": 0.5414887427686971, "grad_norm": 1.78125, "learning_rate": 2.185877602901788e-05, "loss": 0.3492, "step": 12332 }, { "epoch": 0.5415765612479005, "grad_norm": 1.75, "learning_rate": 2.1851912694652634e-05, "loss": 0.3264, "step": 12334 }, { "epoch": 0.5416643797271041, "grad_norm": 1.71875, "learning_rate": 2.184504960136867e-05, "loss": 0.3433, "step": 12336 }, { "epoch": 0.5417521982063076, "grad_norm": 1.859375, "learning_rate": 2.1838186749691557e-05, "loss": 0.3419, "step": 12338 }, { "epoch": 0.5418400166855111, "grad_norm": 2.03125, "learning_rate": 2.183132414014686e-05, "loss": 0.3602, "step": 12340 }, { "epoch": 0.5419278351647145, "grad_norm": 1.7890625, "learning_rate": 2.1824461773260122e-05, "loss": 0.3398, "step": 12342 }, { "epoch": 0.542015653643918, "grad_norm": 1.671875, "learning_rate": 2.1817599649556858e-05, "loss": 0.3386, "step": 12344 }, { "epoch": 0.5421034721231215, "grad_norm": 1.671875, "learning_rate": 2.181073776956258e-05, "loss": 0.3365, "step": 12346 }, { "epoch": 0.542191290602325, "grad_norm": 1.9453125, "learning_rate": 2.1803876133802773e-05, "loss": 0.316, "step": 12348 }, { "epoch": 0.5422791090815284, "grad_norm": 1.9140625, "learning_rate": 2.179701474280289e-05, "loss": 0.357, "step": 12350 }, { "epoch": 0.542366927560732, "grad_norm": 1.9375, "learning_rate": 2.179015359708839e-05, "loss": 0.3212, "step": 12352 }, { "epoch": 0.5424547460399355, "grad_norm": 1.6953125, "learning_rate": 2.1783292697184694e-05, "loss": 0.3282, "step": 12354 }, { "epoch": 0.542542564519139, "grad_norm": 1.890625, "learning_rate": 2.1776432043617214e-05, "loss": 0.3623, "step": 12356 }, { "epoch": 0.5426303829983424, "grad_norm": 1.7578125, "learning_rate": 2.1769571636911347e-05, "loss": 0.3273, "step": 12358 }, { "epoch": 0.5427182014775459, "grad_norm": 1.6171875, "learning_rate": 2.1762711477592454e-05, "loss": 0.3147, "step": 12360 }, { "epoch": 0.5428060199567494, "grad_norm": 1.9140625, "learning_rate": 2.1755851566185888e-05, "loss": 0.3477, "step": 12362 }, { "epoch": 0.5428938384359528, "grad_norm": 2.125, "learning_rate": 2.174899190321699e-05, "loss": 0.3381, "step": 12364 }, { "epoch": 0.5429816569151563, "grad_norm": 1.7578125, "learning_rate": 2.1742132489211082e-05, "loss": 0.35, "step": 12366 }, { "epoch": 0.5430694753943599, "grad_norm": 1.6328125, "learning_rate": 2.173527332469344e-05, "loss": 0.3457, "step": 12368 }, { "epoch": 0.5431572938735634, "grad_norm": 1.8515625, "learning_rate": 2.1728414410189347e-05, "loss": 0.3632, "step": 12370 }, { "epoch": 0.5432451123527668, "grad_norm": 1.859375, "learning_rate": 2.172155574622407e-05, "loss": 0.3292, "step": 12372 }, { "epoch": 0.5433329308319703, "grad_norm": 1.734375, "learning_rate": 2.1714697333322832e-05, "loss": 0.3531, "step": 12374 }, { "epoch": 0.5434207493111738, "grad_norm": 1.828125, "learning_rate": 2.1707839172010862e-05, "loss": 0.3582, "step": 12376 }, { "epoch": 0.5435085677903773, "grad_norm": 1.703125, "learning_rate": 2.1700981262813357e-05, "loss": 0.3639, "step": 12378 }, { "epoch": 0.5435963862695807, "grad_norm": 1.734375, "learning_rate": 2.1694123606255496e-05, "loss": 0.3343, "step": 12380 }, { "epoch": 0.5436842047487843, "grad_norm": 1.8046875, "learning_rate": 2.1687266202862445e-05, "loss": 0.3665, "step": 12382 }, { "epoch": 0.5437720232279878, "grad_norm": 1.75, "learning_rate": 2.1680409053159345e-05, "loss": 0.3628, "step": 12384 }, { "epoch": 0.5438598417071913, "grad_norm": 1.703125, "learning_rate": 2.1673552157671308e-05, "loss": 0.3443, "step": 12386 }, { "epoch": 0.5439476601863947, "grad_norm": 1.640625, "learning_rate": 2.1666695516923445e-05, "loss": 0.3487, "step": 12388 }, { "epoch": 0.5440354786655982, "grad_norm": 1.7421875, "learning_rate": 2.1659839131440844e-05, "loss": 0.3419, "step": 12390 }, { "epoch": 0.5441232971448017, "grad_norm": 1.65625, "learning_rate": 2.1652983001748558e-05, "loss": 0.3457, "step": 12392 }, { "epoch": 0.5442111156240051, "grad_norm": 1.796875, "learning_rate": 2.1646127128371644e-05, "loss": 0.3612, "step": 12394 }, { "epoch": 0.5442989341032086, "grad_norm": 1.6484375, "learning_rate": 2.1639271511835117e-05, "loss": 0.3232, "step": 12396 }, { "epoch": 0.5443867525824122, "grad_norm": 1.8359375, "learning_rate": 2.163241615266398e-05, "loss": 0.3363, "step": 12398 }, { "epoch": 0.5444745710616157, "grad_norm": 1.765625, "learning_rate": 2.162556105138322e-05, "loss": 0.3479, "step": 12400 }, { "epoch": 0.5445623895408191, "grad_norm": 1.8203125, "learning_rate": 2.1618706208517804e-05, "loss": 0.3632, "step": 12402 }, { "epoch": 0.5446502080200226, "grad_norm": 1.78125, "learning_rate": 2.1611851624592678e-05, "loss": 0.3729, "step": 12404 }, { "epoch": 0.5447380264992261, "grad_norm": 1.6953125, "learning_rate": 2.160499730013276e-05, "loss": 0.3483, "step": 12406 }, { "epoch": 0.5448258449784296, "grad_norm": 1.6875, "learning_rate": 2.1598143235662977e-05, "loss": 0.3499, "step": 12408 }, { "epoch": 0.544913663457633, "grad_norm": 1.7734375, "learning_rate": 2.1591289431708188e-05, "loss": 0.3555, "step": 12410 }, { "epoch": 0.5450014819368365, "grad_norm": 1.765625, "learning_rate": 2.1584435888793276e-05, "loss": 0.3504, "step": 12412 }, { "epoch": 0.5450893004160401, "grad_norm": 1.9765625, "learning_rate": 2.157758260744308e-05, "loss": 0.34, "step": 12414 }, { "epoch": 0.5451771188952436, "grad_norm": 1.84375, "learning_rate": 2.157072958818243e-05, "loss": 0.3406, "step": 12416 }, { "epoch": 0.545264937374447, "grad_norm": 1.84375, "learning_rate": 2.1563876831536127e-05, "loss": 0.3288, "step": 12418 }, { "epoch": 0.5453527558536505, "grad_norm": 1.8984375, "learning_rate": 2.1557024338028968e-05, "loss": 0.3326, "step": 12420 }, { "epoch": 0.545440574332854, "grad_norm": 1.78125, "learning_rate": 2.1550172108185703e-05, "loss": 0.3643, "step": 12422 }, { "epoch": 0.5455283928120574, "grad_norm": 1.7734375, "learning_rate": 2.1543320142531087e-05, "loss": 0.368, "step": 12424 }, { "epoch": 0.5456162112912609, "grad_norm": 1.6640625, "learning_rate": 2.1536468441589855e-05, "loss": 0.3537, "step": 12426 }, { "epoch": 0.5457040297704645, "grad_norm": 1.8359375, "learning_rate": 2.1529617005886687e-05, "loss": 0.3417, "step": 12428 }, { "epoch": 0.545791848249668, "grad_norm": 1.921875, "learning_rate": 2.1522765835946283e-05, "loss": 0.3658, "step": 12430 }, { "epoch": 0.5458796667288714, "grad_norm": 1.765625, "learning_rate": 2.1515914932293302e-05, "loss": 0.3662, "step": 12432 }, { "epoch": 0.5459674852080749, "grad_norm": 1.8125, "learning_rate": 2.1509064295452395e-05, "loss": 0.3521, "step": 12434 }, { "epoch": 0.5460553036872784, "grad_norm": 1.8125, "learning_rate": 2.150221392594818e-05, "loss": 0.3552, "step": 12436 }, { "epoch": 0.5461431221664819, "grad_norm": 1.8984375, "learning_rate": 2.149536382430526e-05, "loss": 0.333, "step": 12438 }, { "epoch": 0.5462309406456853, "grad_norm": 1.875, "learning_rate": 2.1488513991048215e-05, "loss": 0.3332, "step": 12440 }, { "epoch": 0.5463187591248888, "grad_norm": 1.7421875, "learning_rate": 2.1481664426701615e-05, "loss": 0.3674, "step": 12442 }, { "epoch": 0.5464065776040924, "grad_norm": 1.6875, "learning_rate": 2.1474815131789993e-05, "loss": 0.361, "step": 12444 }, { "epoch": 0.5464943960832959, "grad_norm": 1.6953125, "learning_rate": 2.1467966106837877e-05, "loss": 0.3478, "step": 12446 }, { "epoch": 0.5465822145624993, "grad_norm": 1.96875, "learning_rate": 2.1461117352369765e-05, "loss": 0.3433, "step": 12448 }, { "epoch": 0.5466700330417028, "grad_norm": 1.75, "learning_rate": 2.1454268868910126e-05, "loss": 0.3391, "step": 12450 }, { "epoch": 0.5467578515209063, "grad_norm": 1.9765625, "learning_rate": 2.1447420656983432e-05, "loss": 0.3514, "step": 12452 }, { "epoch": 0.5468456700001098, "grad_norm": 1.671875, "learning_rate": 2.1440572717114115e-05, "loss": 0.35, "step": 12454 }, { "epoch": 0.5469334884793132, "grad_norm": 1.75, "learning_rate": 2.1433725049826597e-05, "loss": 0.3498, "step": 12456 }, { "epoch": 0.5470213069585167, "grad_norm": 1.625, "learning_rate": 2.1426877655645274e-05, "loss": 0.3254, "step": 12458 }, { "epoch": 0.5471091254377203, "grad_norm": 1.796875, "learning_rate": 2.142003053509451e-05, "loss": 0.3818, "step": 12460 }, { "epoch": 0.5471969439169238, "grad_norm": 1.8125, "learning_rate": 2.1413183688698664e-05, "loss": 0.3622, "step": 12462 }, { "epoch": 0.5472847623961272, "grad_norm": 1.953125, "learning_rate": 2.1406337116982074e-05, "loss": 0.3316, "step": 12464 }, { "epoch": 0.5473725808753307, "grad_norm": 1.671875, "learning_rate": 2.1399490820469042e-05, "loss": 0.3456, "step": 12466 }, { "epoch": 0.5474603993545342, "grad_norm": 1.765625, "learning_rate": 2.139264479968387e-05, "loss": 0.3177, "step": 12468 }, { "epoch": 0.5475482178337376, "grad_norm": 1.6875, "learning_rate": 2.138579905515083e-05, "loss": 0.3627, "step": 12470 }, { "epoch": 0.5476360363129411, "grad_norm": 1.640625, "learning_rate": 2.137895358739416e-05, "loss": 0.3348, "step": 12472 }, { "epoch": 0.5477238547921446, "grad_norm": 1.6875, "learning_rate": 2.137210839693809e-05, "loss": 0.3557, "step": 12474 }, { "epoch": 0.5478116732713482, "grad_norm": 1.8984375, "learning_rate": 2.1365263484306828e-05, "loss": 0.3701, "step": 12476 }, { "epoch": 0.5478994917505516, "grad_norm": 1.859375, "learning_rate": 2.135841885002456e-05, "loss": 0.3403, "step": 12478 }, { "epoch": 0.5479873102297551, "grad_norm": 1.7265625, "learning_rate": 2.135157449461545e-05, "loss": 0.3498, "step": 12480 }, { "epoch": 0.5480751287089586, "grad_norm": 1.6015625, "learning_rate": 2.1344730418603646e-05, "loss": 0.3349, "step": 12482 }, { "epoch": 0.548162947188162, "grad_norm": 1.8203125, "learning_rate": 2.1337886622513257e-05, "loss": 0.3614, "step": 12484 }, { "epoch": 0.5482507656673655, "grad_norm": 1.921875, "learning_rate": 2.1331043106868386e-05, "loss": 0.3407, "step": 12486 }, { "epoch": 0.548338584146569, "grad_norm": 1.703125, "learning_rate": 2.1324199872193128e-05, "loss": 0.3853, "step": 12488 }, { "epoch": 0.5484264026257726, "grad_norm": 1.6328125, "learning_rate": 2.1317356919011513e-05, "loss": 0.3742, "step": 12490 }, { "epoch": 0.548514221104976, "grad_norm": 1.6171875, "learning_rate": 2.1310514247847587e-05, "loss": 0.3465, "step": 12492 }, { "epoch": 0.5486020395841795, "grad_norm": 1.8046875, "learning_rate": 2.1303671859225364e-05, "loss": 0.3242, "step": 12494 }, { "epoch": 0.548689858063383, "grad_norm": 1.90625, "learning_rate": 2.1296829753668844e-05, "loss": 0.3473, "step": 12496 }, { "epoch": 0.5487776765425865, "grad_norm": 1.7265625, "learning_rate": 2.1289987931701983e-05, "loss": 0.3608, "step": 12498 }, { "epoch": 0.5488654950217899, "grad_norm": 1.4921875, "learning_rate": 2.1283146393848733e-05, "loss": 0.3289, "step": 12500 }, { "epoch": 0.5489533135009934, "grad_norm": 1.6796875, "learning_rate": 2.1276305140633024e-05, "loss": 0.349, "step": 12502 }, { "epoch": 0.5490411319801969, "grad_norm": 1.640625, "learning_rate": 2.1269464172578764e-05, "loss": 0.3402, "step": 12504 }, { "epoch": 0.5491289504594005, "grad_norm": 1.890625, "learning_rate": 2.1262623490209834e-05, "loss": 0.3441, "step": 12506 }, { "epoch": 0.5492167689386039, "grad_norm": 1.734375, "learning_rate": 2.1255783094050087e-05, "loss": 0.3782, "step": 12508 }, { "epoch": 0.5493045874178074, "grad_norm": 1.6796875, "learning_rate": 2.1248942984623367e-05, "loss": 0.3275, "step": 12510 }, { "epoch": 0.5493924058970109, "grad_norm": 1.8671875, "learning_rate": 2.1242103162453495e-05, "loss": 0.3401, "step": 12512 }, { "epoch": 0.5494802243762144, "grad_norm": 1.8359375, "learning_rate": 2.123526362806426e-05, "loss": 0.3422, "step": 12514 }, { "epoch": 0.5495680428554178, "grad_norm": 1.703125, "learning_rate": 2.1228424381979446e-05, "loss": 0.3726, "step": 12516 }, { "epoch": 0.5496558613346213, "grad_norm": 1.5625, "learning_rate": 2.12215854247228e-05, "loss": 0.3331, "step": 12518 }, { "epoch": 0.5497436798138248, "grad_norm": 1.78125, "learning_rate": 2.1214746756818044e-05, "loss": 0.3523, "step": 12520 }, { "epoch": 0.5498314982930284, "grad_norm": 1.9765625, "learning_rate": 2.1207908378788888e-05, "loss": 0.3616, "step": 12522 }, { "epoch": 0.5499193167722318, "grad_norm": 1.7265625, "learning_rate": 2.1201070291159014e-05, "loss": 0.3123, "step": 12524 }, { "epoch": 0.5500071352514353, "grad_norm": 1.8125, "learning_rate": 2.119423249445209e-05, "loss": 0.3212, "step": 12526 }, { "epoch": 0.5500949537306388, "grad_norm": 1.84375, "learning_rate": 2.1187394989191754e-05, "loss": 0.3603, "step": 12528 }, { "epoch": 0.5501827722098422, "grad_norm": 1.78125, "learning_rate": 2.118055777590162e-05, "loss": 0.3227, "step": 12530 }, { "epoch": 0.5502705906890457, "grad_norm": 1.875, "learning_rate": 2.1173720855105295e-05, "loss": 0.3258, "step": 12532 }, { "epoch": 0.5503584091682492, "grad_norm": 1.875, "learning_rate": 2.1166884227326338e-05, "loss": 0.3497, "step": 12534 }, { "epoch": 0.5504462276474528, "grad_norm": 1.8203125, "learning_rate": 2.116004789308831e-05, "loss": 0.3479, "step": 12536 }, { "epoch": 0.5505340461266562, "grad_norm": 1.84375, "learning_rate": 2.115321185291473e-05, "loss": 0.3353, "step": 12538 }, { "epoch": 0.5506218646058597, "grad_norm": 1.6953125, "learning_rate": 2.114637610732911e-05, "loss": 0.3391, "step": 12540 }, { "epoch": 0.5507096830850632, "grad_norm": 1.9296875, "learning_rate": 2.1139540656854934e-05, "loss": 0.3559, "step": 12542 }, { "epoch": 0.5507975015642667, "grad_norm": 1.828125, "learning_rate": 2.1132705502015657e-05, "loss": 0.359, "step": 12544 }, { "epoch": 0.5508853200434701, "grad_norm": 1.78125, "learning_rate": 2.112587064333472e-05, "loss": 0.3664, "step": 12546 }, { "epoch": 0.5509731385226736, "grad_norm": 1.7109375, "learning_rate": 2.1119036081335536e-05, "loss": 0.3311, "step": 12548 }, { "epoch": 0.5510609570018771, "grad_norm": 1.7109375, "learning_rate": 2.111220181654151e-05, "loss": 0.3288, "step": 12550 }, { "epoch": 0.5511487754810807, "grad_norm": 1.703125, "learning_rate": 2.1105367849475993e-05, "loss": 0.331, "step": 12552 }, { "epoch": 0.5512365939602841, "grad_norm": 1.6796875, "learning_rate": 2.1098534180662337e-05, "loss": 0.3219, "step": 12554 }, { "epoch": 0.5513244124394876, "grad_norm": 1.8359375, "learning_rate": 2.109170081062387e-05, "loss": 0.3198, "step": 12556 }, { "epoch": 0.5514122309186911, "grad_norm": 1.7421875, "learning_rate": 2.1084867739883894e-05, "loss": 0.339, "step": 12558 }, { "epoch": 0.5515000493978945, "grad_norm": 1.6875, "learning_rate": 2.1078034968965685e-05, "loss": 0.3557, "step": 12560 }, { "epoch": 0.551587867877098, "grad_norm": 1.84375, "learning_rate": 2.1071202498392493e-05, "loss": 0.3697, "step": 12562 }, { "epoch": 0.5516756863563015, "grad_norm": 1.640625, "learning_rate": 2.106437032868756e-05, "loss": 0.3533, "step": 12564 }, { "epoch": 0.551763504835505, "grad_norm": 1.84375, "learning_rate": 2.105753846037409e-05, "loss": 0.3537, "step": 12566 }, { "epoch": 0.5518513233147085, "grad_norm": 1.7578125, "learning_rate": 2.1050706893975274e-05, "loss": 0.3369, "step": 12568 }, { "epoch": 0.551939141793912, "grad_norm": 1.640625, "learning_rate": 2.1043875630014265e-05, "loss": 0.3325, "step": 12570 }, { "epoch": 0.5520269602731155, "grad_norm": 1.609375, "learning_rate": 2.103704466901421e-05, "loss": 0.3235, "step": 12572 }, { "epoch": 0.552114778752319, "grad_norm": 1.8515625, "learning_rate": 2.1030214011498224e-05, "loss": 0.3459, "step": 12574 }, { "epoch": 0.5522025972315224, "grad_norm": 1.8671875, "learning_rate": 2.10233836579894e-05, "loss": 0.3483, "step": 12576 }, { "epoch": 0.5522904157107259, "grad_norm": 1.6875, "learning_rate": 2.1016553609010807e-05, "loss": 0.326, "step": 12578 }, { "epoch": 0.5523782341899294, "grad_norm": 1.6875, "learning_rate": 2.1009723865085506e-05, "loss": 0.3566, "step": 12580 }, { "epoch": 0.552466052669133, "grad_norm": 1.7421875, "learning_rate": 2.10028944267365e-05, "loss": 0.3525, "step": 12582 }, { "epoch": 0.5525538711483364, "grad_norm": 1.703125, "learning_rate": 2.0996065294486798e-05, "loss": 0.3291, "step": 12584 }, { "epoch": 0.5526416896275399, "grad_norm": 1.53125, "learning_rate": 2.098923646885937e-05, "loss": 0.3655, "step": 12586 }, { "epoch": 0.5527295081067434, "grad_norm": 1.5625, "learning_rate": 2.0982407950377177e-05, "loss": 0.3266, "step": 12588 }, { "epoch": 0.5528173265859468, "grad_norm": 1.71875, "learning_rate": 2.0975579739563144e-05, "loss": 0.348, "step": 12590 }, { "epoch": 0.5529051450651503, "grad_norm": 1.8046875, "learning_rate": 2.096875183694018e-05, "loss": 0.3656, "step": 12592 }, { "epoch": 0.5529929635443538, "grad_norm": 1.7578125, "learning_rate": 2.0961924243031177e-05, "loss": 0.3601, "step": 12594 }, { "epoch": 0.5530807820235573, "grad_norm": 1.6328125, "learning_rate": 2.0955096958358973e-05, "loss": 0.365, "step": 12596 }, { "epoch": 0.5531686005027608, "grad_norm": 1.734375, "learning_rate": 2.0948269983446413e-05, "loss": 0.3289, "step": 12598 }, { "epoch": 0.5532564189819643, "grad_norm": 1.703125, "learning_rate": 2.0941443318816313e-05, "loss": 0.3671, "step": 12600 }, { "epoch": 0.5533442374611678, "grad_norm": 1.90625, "learning_rate": 2.093461696499145e-05, "loss": 0.34, "step": 12602 }, { "epoch": 0.5534320559403713, "grad_norm": 1.9765625, "learning_rate": 2.0927790922494607e-05, "loss": 0.3728, "step": 12604 }, { "epoch": 0.5535198744195747, "grad_norm": 1.84375, "learning_rate": 2.0920965191848503e-05, "loss": 0.3345, "step": 12606 }, { "epoch": 0.5536076928987782, "grad_norm": 1.9765625, "learning_rate": 2.091413977357586e-05, "loss": 0.3539, "step": 12608 }, { "epoch": 0.5536955113779817, "grad_norm": 1.828125, "learning_rate": 2.0907314668199386e-05, "loss": 0.3434, "step": 12610 }, { "epoch": 0.5537833298571851, "grad_norm": 1.7890625, "learning_rate": 2.0900489876241724e-05, "loss": 0.3751, "step": 12612 }, { "epoch": 0.5538711483363887, "grad_norm": 1.96875, "learning_rate": 2.089366539822553e-05, "loss": 0.3286, "step": 12614 }, { "epoch": 0.5539589668155922, "grad_norm": 1.6484375, "learning_rate": 2.0886841234673426e-05, "loss": 0.3511, "step": 12616 }, { "epoch": 0.5540467852947957, "grad_norm": 1.96875, "learning_rate": 2.0880017386108005e-05, "loss": 0.3467, "step": 12618 }, { "epoch": 0.5541346037739991, "grad_norm": 1.765625, "learning_rate": 2.0873193853051844e-05, "loss": 0.3602, "step": 12620 }, { "epoch": 0.5542224222532026, "grad_norm": 1.8046875, "learning_rate": 2.0866370636027484e-05, "loss": 0.3364, "step": 12622 }, { "epoch": 0.5543102407324061, "grad_norm": 1.8671875, "learning_rate": 2.0859547735557453e-05, "loss": 0.3398, "step": 12624 }, { "epoch": 0.5543980592116096, "grad_norm": 1.6875, "learning_rate": 2.0852725152164247e-05, "loss": 0.3592, "step": 12626 }, { "epoch": 0.5544858776908131, "grad_norm": 1.84375, "learning_rate": 2.0845902886370342e-05, "loss": 0.364, "step": 12628 }, { "epoch": 0.5545736961700166, "grad_norm": 1.6953125, "learning_rate": 2.0839080938698193e-05, "loss": 0.3523, "step": 12630 }, { "epoch": 0.5546615146492201, "grad_norm": 1.734375, "learning_rate": 2.0832259309670223e-05, "loss": 0.3408, "step": 12632 }, { "epoch": 0.5547493331284236, "grad_norm": 2.015625, "learning_rate": 2.082543799980883e-05, "loss": 0.3592, "step": 12634 }, { "epoch": 0.554837151607627, "grad_norm": 1.828125, "learning_rate": 2.08186170096364e-05, "loss": 0.3799, "step": 12636 }, { "epoch": 0.5549249700868305, "grad_norm": 1.765625, "learning_rate": 2.0811796339675277e-05, "loss": 0.3433, "step": 12638 }, { "epoch": 0.555012788566034, "grad_norm": 1.765625, "learning_rate": 2.0804975990447802e-05, "loss": 0.3505, "step": 12640 }, { "epoch": 0.5551006070452374, "grad_norm": 1.984375, "learning_rate": 2.079815596247627e-05, "loss": 0.3466, "step": 12642 }, { "epoch": 0.555188425524441, "grad_norm": 1.7421875, "learning_rate": 2.0791336256282958e-05, "loss": 0.3317, "step": 12644 }, { "epoch": 0.5552762440036445, "grad_norm": 1.703125, "learning_rate": 2.078451687239013e-05, "loss": 0.3306, "step": 12646 }, { "epoch": 0.555364062482848, "grad_norm": 1.921875, "learning_rate": 2.0777697811320003e-05, "loss": 0.3522, "step": 12648 }, { "epoch": 0.5554518809620514, "grad_norm": 1.671875, "learning_rate": 2.0770879073594794e-05, "loss": 0.3407, "step": 12650 }, { "epoch": 0.5555396994412549, "grad_norm": 1.765625, "learning_rate": 2.0764060659736674e-05, "loss": 0.3543, "step": 12652 }, { "epoch": 0.5556275179204584, "grad_norm": 1.6875, "learning_rate": 2.0757242570267808e-05, "loss": 0.3488, "step": 12654 }, { "epoch": 0.5557153363996619, "grad_norm": 1.640625, "learning_rate": 2.0750424805710326e-05, "loss": 0.344, "step": 12656 }, { "epoch": 0.5558031548788653, "grad_norm": 1.734375, "learning_rate": 2.0743607366586327e-05, "loss": 0.3543, "step": 12658 }, { "epoch": 0.5558909733580689, "grad_norm": 1.640625, "learning_rate": 2.0736790253417893e-05, "loss": 0.3518, "step": 12660 }, { "epoch": 0.5559787918372724, "grad_norm": 1.734375, "learning_rate": 2.0729973466727086e-05, "loss": 0.3597, "step": 12662 }, { "epoch": 0.5560666103164759, "grad_norm": 1.6484375, "learning_rate": 2.0723157007035938e-05, "loss": 0.3324, "step": 12664 }, { "epoch": 0.5561544287956793, "grad_norm": 1.6484375, "learning_rate": 2.0716340874866453e-05, "loss": 0.3592, "step": 12666 }, { "epoch": 0.5562422472748828, "grad_norm": 1.90625, "learning_rate": 2.0709525070740604e-05, "loss": 0.3427, "step": 12668 }, { "epoch": 0.5563300657540863, "grad_norm": 1.609375, "learning_rate": 2.0702709595180357e-05, "loss": 0.3546, "step": 12670 }, { "epoch": 0.5564178842332898, "grad_norm": 1.6640625, "learning_rate": 2.0695894448707647e-05, "loss": 0.3577, "step": 12672 }, { "epoch": 0.5565057027124932, "grad_norm": 1.6953125, "learning_rate": 2.0689079631844363e-05, "loss": 0.3441, "step": 12674 }, { "epoch": 0.5565935211916968, "grad_norm": 1.8203125, "learning_rate": 2.0682265145112395e-05, "loss": 0.3469, "step": 12676 }, { "epoch": 0.5566813396709003, "grad_norm": 1.7421875, "learning_rate": 2.0675450989033595e-05, "loss": 0.368, "step": 12678 }, { "epoch": 0.5567691581501037, "grad_norm": 1.671875, "learning_rate": 2.0668637164129796e-05, "loss": 0.333, "step": 12680 }, { "epoch": 0.5568569766293072, "grad_norm": 1.6953125, "learning_rate": 2.0661823670922806e-05, "loss": 0.3168, "step": 12682 }, { "epoch": 0.5569447951085107, "grad_norm": 1.6875, "learning_rate": 2.0655010509934395e-05, "loss": 0.3233, "step": 12684 }, { "epoch": 0.5570326135877142, "grad_norm": 2.046875, "learning_rate": 2.064819768168632e-05, "loss": 0.3385, "step": 12686 }, { "epoch": 0.5571204320669176, "grad_norm": 1.71875, "learning_rate": 2.064138518670031e-05, "loss": 0.3491, "step": 12688 }, { "epoch": 0.5572082505461212, "grad_norm": 1.6875, "learning_rate": 2.0634573025498067e-05, "loss": 0.329, "step": 12690 }, { "epoch": 0.5572960690253247, "grad_norm": 1.8203125, "learning_rate": 2.0627761198601273e-05, "loss": 0.3355, "step": 12692 }, { "epoch": 0.5573838875045282, "grad_norm": 1.6328125, "learning_rate": 2.062094970653157e-05, "loss": 0.349, "step": 12694 }, { "epoch": 0.5574717059837316, "grad_norm": 1.75, "learning_rate": 2.0614138549810587e-05, "loss": 0.3676, "step": 12696 }, { "epoch": 0.5575595244629351, "grad_norm": 1.71875, "learning_rate": 2.0607327728959928e-05, "loss": 0.3912, "step": 12698 }, { "epoch": 0.5576473429421386, "grad_norm": 1.9375, "learning_rate": 2.060051724450116e-05, "loss": 0.3439, "step": 12700 }, { "epoch": 0.557735161421342, "grad_norm": 1.7578125, "learning_rate": 2.059370709695585e-05, "loss": 0.3781, "step": 12702 }, { "epoch": 0.5578229799005455, "grad_norm": 1.6953125, "learning_rate": 2.0586897286845498e-05, "loss": 0.3375, "step": 12704 }, { "epoch": 0.5579107983797491, "grad_norm": 1.6796875, "learning_rate": 2.0580087814691607e-05, "loss": 0.3421, "step": 12706 }, { "epoch": 0.5579986168589526, "grad_norm": 1.875, "learning_rate": 2.0573278681015657e-05, "loss": 0.3377, "step": 12708 }, { "epoch": 0.558086435338156, "grad_norm": 1.671875, "learning_rate": 2.056646988633908e-05, "loss": 0.3251, "step": 12710 }, { "epoch": 0.5581742538173595, "grad_norm": 1.640625, "learning_rate": 2.0559661431183303e-05, "loss": 0.3299, "step": 12712 }, { "epoch": 0.558262072296563, "grad_norm": 1.65625, "learning_rate": 2.0552853316069717e-05, "loss": 0.3172, "step": 12714 }, { "epoch": 0.5583498907757665, "grad_norm": 1.7109375, "learning_rate": 2.054604554151969e-05, "loss": 0.3287, "step": 12716 }, { "epoch": 0.5584377092549699, "grad_norm": 1.7890625, "learning_rate": 2.053923810805457e-05, "loss": 0.3309, "step": 12718 }, { "epoch": 0.5585255277341734, "grad_norm": 1.71875, "learning_rate": 2.0532431016195658e-05, "loss": 0.3412, "step": 12720 }, { "epoch": 0.558613346213377, "grad_norm": 1.859375, "learning_rate": 2.052562426646425e-05, "loss": 0.3359, "step": 12722 }, { "epoch": 0.5587011646925805, "grad_norm": 1.734375, "learning_rate": 2.0518817859381607e-05, "loss": 0.351, "step": 12724 }, { "epoch": 0.5587889831717839, "grad_norm": 2.28125, "learning_rate": 2.0512011795468972e-05, "loss": 0.3586, "step": 12726 }, { "epoch": 0.5588768016509874, "grad_norm": 1.6640625, "learning_rate": 2.0505206075247552e-05, "loss": 0.3383, "step": 12728 }, { "epoch": 0.5589646201301909, "grad_norm": 1.625, "learning_rate": 2.0498400699238522e-05, "loss": 0.3388, "step": 12730 }, { "epoch": 0.5590524386093944, "grad_norm": 1.7109375, "learning_rate": 2.0491595667963042e-05, "loss": 0.3217, "step": 12732 }, { "epoch": 0.5591402570885978, "grad_norm": 1.9453125, "learning_rate": 2.0484790981942263e-05, "loss": 0.3533, "step": 12734 }, { "epoch": 0.5592280755678014, "grad_norm": 1.59375, "learning_rate": 2.047798664169726e-05, "loss": 0.3279, "step": 12736 }, { "epoch": 0.5593158940470049, "grad_norm": 1.8671875, "learning_rate": 2.0471182647749123e-05, "loss": 0.3693, "step": 12738 }, { "epoch": 0.5594037125262084, "grad_norm": 1.6796875, "learning_rate": 2.0464379000618906e-05, "loss": 0.3364, "step": 12740 }, { "epoch": 0.5594915310054118, "grad_norm": 1.6875, "learning_rate": 2.0457575700827634e-05, "loss": 0.3385, "step": 12742 }, { "epoch": 0.5595793494846153, "grad_norm": 1.625, "learning_rate": 2.0450772748896307e-05, "loss": 0.3681, "step": 12744 }, { "epoch": 0.5596671679638188, "grad_norm": 1.6015625, "learning_rate": 2.0443970145345887e-05, "loss": 0.3346, "step": 12746 }, { "epoch": 0.5597549864430222, "grad_norm": 1.8359375, "learning_rate": 2.0437167890697324e-05, "loss": 0.328, "step": 12748 }, { "epoch": 0.5598428049222257, "grad_norm": 1.8046875, "learning_rate": 2.0430365985471542e-05, "loss": 0.3155, "step": 12750 }, { "epoch": 0.5599306234014293, "grad_norm": 1.6484375, "learning_rate": 2.0423564430189427e-05, "loss": 0.3309, "step": 12752 }, { "epoch": 0.5600184418806328, "grad_norm": 1.8671875, "learning_rate": 2.0416763225371848e-05, "loss": 0.3572, "step": 12754 }, { "epoch": 0.5601062603598362, "grad_norm": 1.8203125, "learning_rate": 2.0409962371539637e-05, "loss": 0.341, "step": 12756 }, { "epoch": 0.5601940788390397, "grad_norm": 1.78125, "learning_rate": 2.0403161869213608e-05, "loss": 0.3409, "step": 12758 }, { "epoch": 0.5602818973182432, "grad_norm": 1.78125, "learning_rate": 2.0396361718914545e-05, "loss": 0.3501, "step": 12760 }, { "epoch": 0.5603697157974467, "grad_norm": 1.8671875, "learning_rate": 2.0389561921163203e-05, "loss": 0.3461, "step": 12762 }, { "epoch": 0.5604575342766501, "grad_norm": 1.8671875, "learning_rate": 2.0382762476480326e-05, "loss": 0.3411, "step": 12764 }, { "epoch": 0.5605453527558536, "grad_norm": 1.8984375, "learning_rate": 2.03759633853866e-05, "loss": 0.3693, "step": 12766 }, { "epoch": 0.5606331712350572, "grad_norm": 1.90625, "learning_rate": 2.0369164648402704e-05, "loss": 0.3263, "step": 12768 }, { "epoch": 0.5607209897142607, "grad_norm": 1.6640625, "learning_rate": 2.0362366266049288e-05, "loss": 0.3549, "step": 12770 }, { "epoch": 0.5608088081934641, "grad_norm": 1.6953125, "learning_rate": 2.0355568238846972e-05, "loss": 0.3589, "step": 12772 }, { "epoch": 0.5608966266726676, "grad_norm": 1.6015625, "learning_rate": 2.0348770567316354e-05, "loss": 0.336, "step": 12774 }, { "epoch": 0.5609844451518711, "grad_norm": 1.7734375, "learning_rate": 2.0341973251978003e-05, "loss": 0.3383, "step": 12776 }, { "epoch": 0.5610722636310745, "grad_norm": 1.6171875, "learning_rate": 2.0335176293352456e-05, "loss": 0.331, "step": 12778 }, { "epoch": 0.561160082110278, "grad_norm": 1.875, "learning_rate": 2.0328379691960235e-05, "loss": 0.3607, "step": 12780 }, { "epoch": 0.5612479005894816, "grad_norm": 1.7109375, "learning_rate": 2.032158344832181e-05, "loss": 0.3026, "step": 12782 }, { "epoch": 0.5613357190686851, "grad_norm": 1.90625, "learning_rate": 2.0314787562957645e-05, "loss": 0.3605, "step": 12784 }, { "epoch": 0.5614235375478885, "grad_norm": 2.015625, "learning_rate": 2.0307992036388167e-05, "loss": 0.3378, "step": 12786 }, { "epoch": 0.561511356027092, "grad_norm": 1.796875, "learning_rate": 2.030119686913379e-05, "loss": 0.3292, "step": 12788 }, { "epoch": 0.5615991745062955, "grad_norm": 1.734375, "learning_rate": 2.0294402061714887e-05, "loss": 0.3457, "step": 12790 }, { "epoch": 0.561686992985499, "grad_norm": 1.6640625, "learning_rate": 2.0287607614651797e-05, "loss": 0.3479, "step": 12792 }, { "epoch": 0.5617748114647024, "grad_norm": 1.65625, "learning_rate": 2.0280813528464855e-05, "loss": 0.3728, "step": 12794 }, { "epoch": 0.5618626299439059, "grad_norm": 1.84375, "learning_rate": 2.0274019803674335e-05, "loss": 0.3711, "step": 12796 }, { "epoch": 0.5619504484231095, "grad_norm": 1.8671875, "learning_rate": 2.026722644080051e-05, "loss": 0.3665, "step": 12798 }, { "epoch": 0.562038266902313, "grad_norm": 1.6953125, "learning_rate": 2.0260433440363617e-05, "loss": 0.3475, "step": 12800 }, { "epoch": 0.5621260853815164, "grad_norm": 1.6328125, "learning_rate": 2.025364080288387e-05, "loss": 0.3542, "step": 12802 }, { "epoch": 0.5622139038607199, "grad_norm": 1.9609375, "learning_rate": 2.0246848528881455e-05, "loss": 0.3597, "step": 12804 }, { "epoch": 0.5623017223399234, "grad_norm": 1.9453125, "learning_rate": 2.0240056618876514e-05, "loss": 0.3296, "step": 12806 }, { "epoch": 0.5623895408191268, "grad_norm": 1.671875, "learning_rate": 2.0233265073389173e-05, "loss": 0.3503, "step": 12808 }, { "epoch": 0.5624773592983303, "grad_norm": 1.609375, "learning_rate": 2.022647389293954e-05, "loss": 0.3532, "step": 12810 }, { "epoch": 0.5625651777775338, "grad_norm": 1.640625, "learning_rate": 2.0219683078047678e-05, "loss": 0.3695, "step": 12812 }, { "epoch": 0.5626529962567374, "grad_norm": 1.6875, "learning_rate": 2.0212892629233633e-05, "loss": 0.3369, "step": 12814 }, { "epoch": 0.5627408147359408, "grad_norm": 1.734375, "learning_rate": 2.0206102547017426e-05, "loss": 0.3601, "step": 12816 }, { "epoch": 0.5628286332151443, "grad_norm": 1.671875, "learning_rate": 2.0199312831919024e-05, "loss": 0.3639, "step": 12818 }, { "epoch": 0.5629164516943478, "grad_norm": 1.796875, "learning_rate": 2.0192523484458397e-05, "loss": 0.3868, "step": 12820 }, { "epoch": 0.5630042701735513, "grad_norm": 1.7578125, "learning_rate": 2.018573450515548e-05, "loss": 0.3354, "step": 12822 }, { "epoch": 0.5630920886527547, "grad_norm": 1.734375, "learning_rate": 2.0178945894530165e-05, "loss": 0.3583, "step": 12824 }, { "epoch": 0.5631799071319582, "grad_norm": 1.71875, "learning_rate": 2.0172157653102337e-05, "loss": 0.3593, "step": 12826 }, { "epoch": 0.5632677256111617, "grad_norm": 1.703125, "learning_rate": 2.0165369781391824e-05, "loss": 0.3227, "step": 12828 }, { "epoch": 0.5633555440903653, "grad_norm": 1.9453125, "learning_rate": 2.0158582279918458e-05, "loss": 0.3631, "step": 12830 }, { "epoch": 0.5634433625695687, "grad_norm": 1.78125, "learning_rate": 2.015179514920202e-05, "loss": 0.3561, "step": 12832 }, { "epoch": 0.5635311810487722, "grad_norm": 1.7109375, "learning_rate": 2.0145008389762265e-05, "loss": 0.3265, "step": 12834 }, { "epoch": 0.5636189995279757, "grad_norm": 1.6953125, "learning_rate": 2.0138222002118934e-05, "loss": 0.3476, "step": 12836 }, { "epoch": 0.5637068180071791, "grad_norm": 1.859375, "learning_rate": 2.013143598679173e-05, "loss": 0.3589, "step": 12838 }, { "epoch": 0.5637946364863826, "grad_norm": 1.703125, "learning_rate": 2.0124650344300323e-05, "loss": 0.3578, "step": 12840 }, { "epoch": 0.5638824549655861, "grad_norm": 1.71875, "learning_rate": 2.0117865075164366e-05, "loss": 0.3233, "step": 12842 }, { "epoch": 0.5639702734447897, "grad_norm": 1.609375, "learning_rate": 2.011108017990347e-05, "loss": 0.3266, "step": 12844 }, { "epoch": 0.5640580919239931, "grad_norm": 1.703125, "learning_rate": 2.010429565903722e-05, "loss": 0.3269, "step": 12846 }, { "epoch": 0.5641459104031966, "grad_norm": 1.921875, "learning_rate": 2.009751151308519e-05, "loss": 0.3282, "step": 12848 }, { "epoch": 0.5642337288824001, "grad_norm": 1.828125, "learning_rate": 2.0090727742566894e-05, "loss": 0.3494, "step": 12850 }, { "epoch": 0.5643215473616036, "grad_norm": 1.875, "learning_rate": 2.0083944348001856e-05, "loss": 0.3248, "step": 12852 }, { "epoch": 0.564409365840807, "grad_norm": 1.9296875, "learning_rate": 2.0077161329909533e-05, "loss": 0.3379, "step": 12854 }, { "epoch": 0.5644971843200105, "grad_norm": 1.5859375, "learning_rate": 2.007037868880938e-05, "loss": 0.3335, "step": 12856 }, { "epoch": 0.564585002799214, "grad_norm": 1.734375, "learning_rate": 2.0063596425220803e-05, "loss": 0.3479, "step": 12858 }, { "epoch": 0.5646728212784176, "grad_norm": 1.8828125, "learning_rate": 2.0056814539663196e-05, "loss": 0.3678, "step": 12860 }, { "epoch": 0.564760639757621, "grad_norm": 1.53125, "learning_rate": 2.0050033032655915e-05, "loss": 0.3086, "step": 12862 }, { "epoch": 0.5648484582368245, "grad_norm": 1.65625, "learning_rate": 2.004325190471829e-05, "loss": 0.354, "step": 12864 }, { "epoch": 0.564936276716028, "grad_norm": 1.6015625, "learning_rate": 2.0036471156369632e-05, "loss": 0.3582, "step": 12866 }, { "epoch": 0.5650240951952314, "grad_norm": 1.6953125, "learning_rate": 2.0029690788129194e-05, "loss": 0.3552, "step": 12868 }, { "epoch": 0.5651119136744349, "grad_norm": 1.6484375, "learning_rate": 2.0022910800516227e-05, "loss": 0.3399, "step": 12870 }, { "epoch": 0.5651997321536384, "grad_norm": 1.6796875, "learning_rate": 2.001613119404994e-05, "loss": 0.3803, "step": 12872 }, { "epoch": 0.5652875506328419, "grad_norm": 1.609375, "learning_rate": 2.000935196924953e-05, "loss": 0.3496, "step": 12874 }, { "epoch": 0.5653753691120454, "grad_norm": 1.640625, "learning_rate": 2.0002573126634136e-05, "loss": 0.3368, "step": 12876 }, { "epoch": 0.5654631875912489, "grad_norm": 1.7265625, "learning_rate": 1.99957946667229e-05, "loss": 0.3569, "step": 12878 }, { "epoch": 0.5655510060704524, "grad_norm": 1.6328125, "learning_rate": 1.9989016590034896e-05, "loss": 0.3643, "step": 12880 }, { "epoch": 0.5656388245496559, "grad_norm": 1.7109375, "learning_rate": 1.9982238897089207e-05, "loss": 0.3609, "step": 12882 }, { "epoch": 0.5657266430288593, "grad_norm": 1.8671875, "learning_rate": 1.9975461588404866e-05, "loss": 0.3383, "step": 12884 }, { "epoch": 0.5658144615080628, "grad_norm": 1.671875, "learning_rate": 1.9968684664500888e-05, "loss": 0.3505, "step": 12886 }, { "epoch": 0.5659022799872663, "grad_norm": 1.7578125, "learning_rate": 1.996190812589624e-05, "loss": 0.3292, "step": 12888 }, { "epoch": 0.5659900984664699, "grad_norm": 1.6171875, "learning_rate": 1.995513197310987e-05, "loss": 0.3501, "step": 12890 }, { "epoch": 0.5660779169456733, "grad_norm": 1.6484375, "learning_rate": 1.994835620666071e-05, "loss": 0.3541, "step": 12892 }, { "epoch": 0.5661657354248768, "grad_norm": 1.546875, "learning_rate": 1.994158082706764e-05, "loss": 0.3566, "step": 12894 }, { "epoch": 0.5662535539040803, "grad_norm": 1.6875, "learning_rate": 1.9934805834849518e-05, "loss": 0.3237, "step": 12896 }, { "epoch": 0.5663413723832837, "grad_norm": 1.65625, "learning_rate": 1.992803123052518e-05, "loss": 0.3658, "step": 12898 }, { "epoch": 0.5664291908624872, "grad_norm": 1.8515625, "learning_rate": 1.992125701461343e-05, "loss": 0.3631, "step": 12900 }, { "epoch": 0.5665170093416907, "grad_norm": 1.59375, "learning_rate": 1.9914483187633042e-05, "loss": 0.3414, "step": 12902 }, { "epoch": 0.5666048278208942, "grad_norm": 1.6875, "learning_rate": 1.990770975010274e-05, "loss": 0.34, "step": 12904 }, { "epoch": 0.5666926463000977, "grad_norm": 1.7578125, "learning_rate": 1.990093670254125e-05, "loss": 0.3301, "step": 12906 }, { "epoch": 0.5667804647793012, "grad_norm": 1.6875, "learning_rate": 1.9894164045467246e-05, "loss": 0.3471, "step": 12908 }, { "epoch": 0.5668682832585047, "grad_norm": 1.7421875, "learning_rate": 1.9887391779399383e-05, "loss": 0.3316, "step": 12910 }, { "epoch": 0.5669561017377082, "grad_norm": 1.75, "learning_rate": 1.9880619904856288e-05, "loss": 0.3578, "step": 12912 }, { "epoch": 0.5670439202169116, "grad_norm": 1.5625, "learning_rate": 1.987384842235655e-05, "loss": 0.339, "step": 12914 }, { "epoch": 0.5671317386961151, "grad_norm": 1.6796875, "learning_rate": 1.9867077332418723e-05, "loss": 0.3202, "step": 12916 }, { "epoch": 0.5672195571753186, "grad_norm": 1.8203125, "learning_rate": 1.9860306635561353e-05, "loss": 0.3416, "step": 12918 }, { "epoch": 0.567307375654522, "grad_norm": 1.6328125, "learning_rate": 1.985353633230293e-05, "loss": 0.3111, "step": 12920 }, { "epoch": 0.5673951941337256, "grad_norm": 1.71875, "learning_rate": 1.984676642316192e-05, "loss": 0.3586, "step": 12922 }, { "epoch": 0.5674830126129291, "grad_norm": 1.6328125, "learning_rate": 1.983999690865678e-05, "loss": 0.3161, "step": 12924 }, { "epoch": 0.5675708310921326, "grad_norm": 1.6953125, "learning_rate": 1.9833227789305906e-05, "loss": 0.3488, "step": 12926 }, { "epoch": 0.567658649571336, "grad_norm": 1.8203125, "learning_rate": 1.9826459065627694e-05, "loss": 0.3508, "step": 12928 }, { "epoch": 0.5677464680505395, "grad_norm": 2.03125, "learning_rate": 1.9819690738140484e-05, "loss": 0.332, "step": 12930 }, { "epoch": 0.567834286529743, "grad_norm": 1.703125, "learning_rate": 1.9812922807362598e-05, "loss": 0.3561, "step": 12932 }, { "epoch": 0.5679221050089465, "grad_norm": 1.6171875, "learning_rate": 1.980615527381233e-05, "loss": 0.3248, "step": 12934 }, { "epoch": 0.56800992348815, "grad_norm": 1.9765625, "learning_rate": 1.9799388138007928e-05, "loss": 0.3775, "step": 12936 }, { "epoch": 0.5680977419673535, "grad_norm": 1.75, "learning_rate": 1.979262140046764e-05, "loss": 0.3259, "step": 12938 }, { "epoch": 0.568185560446557, "grad_norm": 1.84375, "learning_rate": 1.978585506170965e-05, "loss": 0.3159, "step": 12940 }, { "epoch": 0.5682733789257605, "grad_norm": 1.8984375, "learning_rate": 1.9779089122252125e-05, "loss": 0.3324, "step": 12942 }, { "epoch": 0.5683611974049639, "grad_norm": 1.6875, "learning_rate": 1.977232358261321e-05, "loss": 0.3292, "step": 12944 }, { "epoch": 0.5684490158841674, "grad_norm": 1.8125, "learning_rate": 1.9765558443311004e-05, "loss": 0.3388, "step": 12946 }, { "epoch": 0.5685368343633709, "grad_norm": 1.9375, "learning_rate": 1.9758793704863606e-05, "loss": 0.334, "step": 12948 }, { "epoch": 0.5686246528425744, "grad_norm": 1.7109375, "learning_rate": 1.9752029367789023e-05, "loss": 0.3225, "step": 12950 }, { "epoch": 0.5687124713217779, "grad_norm": 1.75, "learning_rate": 1.9745265432605293e-05, "loss": 0.3441, "step": 12952 }, { "epoch": 0.5688002898009814, "grad_norm": 1.8984375, "learning_rate": 1.97385018998304e-05, "loss": 0.3379, "step": 12954 }, { "epoch": 0.5688881082801849, "grad_norm": 1.75, "learning_rate": 1.9731738769982287e-05, "loss": 0.3578, "step": 12956 }, { "epoch": 0.5689759267593884, "grad_norm": 2.15625, "learning_rate": 1.9724976043578882e-05, "loss": 0.3437, "step": 12958 }, { "epoch": 0.5690637452385918, "grad_norm": 1.640625, "learning_rate": 1.971821372113808e-05, "loss": 0.3526, "step": 12960 }, { "epoch": 0.5691515637177953, "grad_norm": 1.7109375, "learning_rate": 1.9711451803177734e-05, "loss": 0.3622, "step": 12962 }, { "epoch": 0.5692393821969988, "grad_norm": 2.140625, "learning_rate": 1.970469029021568e-05, "loss": 0.3646, "step": 12964 }, { "epoch": 0.5693272006762022, "grad_norm": 1.703125, "learning_rate": 1.9697929182769712e-05, "loss": 0.3192, "step": 12966 }, { "epoch": 0.5694150191554058, "grad_norm": 1.671875, "learning_rate": 1.9691168481357594e-05, "loss": 0.3408, "step": 12968 }, { "epoch": 0.5695028376346093, "grad_norm": 1.7421875, "learning_rate": 1.9684408186497068e-05, "loss": 0.3438, "step": 12970 }, { "epoch": 0.5695906561138128, "grad_norm": 1.703125, "learning_rate": 1.9677648298705838e-05, "loss": 0.3413, "step": 12972 }, { "epoch": 0.5696784745930162, "grad_norm": 1.9453125, "learning_rate": 1.9670888818501576e-05, "loss": 0.35, "step": 12974 }, { "epoch": 0.5697662930722197, "grad_norm": 1.6640625, "learning_rate": 1.9664129746401933e-05, "loss": 0.327, "step": 12976 }, { "epoch": 0.5698541115514232, "grad_norm": 1.8125, "learning_rate": 1.9657371082924515e-05, "loss": 0.3242, "step": 12978 }, { "epoch": 0.5699419300306267, "grad_norm": 1.859375, "learning_rate": 1.9650612828586895e-05, "loss": 0.3106, "step": 12980 }, { "epoch": 0.5700297485098302, "grad_norm": 1.6328125, "learning_rate": 1.964385498390662e-05, "loss": 0.3208, "step": 12982 }, { "epoch": 0.5701175669890337, "grad_norm": 1.8046875, "learning_rate": 1.9637097549401223e-05, "loss": 0.3237, "step": 12984 }, { "epoch": 0.5702053854682372, "grad_norm": 1.734375, "learning_rate": 1.9630340525588176e-05, "loss": 0.3198, "step": 12986 }, { "epoch": 0.5702932039474407, "grad_norm": 1.6953125, "learning_rate": 1.9623583912984938e-05, "loss": 0.3426, "step": 12988 }, { "epoch": 0.5703810224266441, "grad_norm": 1.7890625, "learning_rate": 1.961682771210894e-05, "loss": 0.3656, "step": 12990 }, { "epoch": 0.5704688409058476, "grad_norm": 1.7421875, "learning_rate": 1.961007192347756e-05, "loss": 0.3708, "step": 12992 }, { "epoch": 0.5705566593850511, "grad_norm": 1.6328125, "learning_rate": 1.9603316547608165e-05, "loss": 0.3506, "step": 12994 }, { "epoch": 0.5706444778642545, "grad_norm": 1.71875, "learning_rate": 1.9596561585018082e-05, "loss": 0.3227, "step": 12996 }, { "epoch": 0.5707322963434581, "grad_norm": 1.8984375, "learning_rate": 1.9589807036224607e-05, "loss": 0.3619, "step": 12998 }, { "epoch": 0.5708201148226616, "grad_norm": 1.6015625, "learning_rate": 1.9583052901745012e-05, "loss": 0.3424, "step": 13000 }, { "epoch": 0.5709079333018651, "grad_norm": 1.7890625, "learning_rate": 1.957629918209652e-05, "loss": 0.3276, "step": 13002 }, { "epoch": 0.5709957517810685, "grad_norm": 1.53125, "learning_rate": 1.9569545877796336e-05, "loss": 0.3318, "step": 13004 }, { "epoch": 0.571083570260272, "grad_norm": 1.7734375, "learning_rate": 1.9562792989361628e-05, "loss": 0.3371, "step": 13006 }, { "epoch": 0.5711713887394755, "grad_norm": 1.703125, "learning_rate": 1.955604051730954e-05, "loss": 0.3476, "step": 13008 }, { "epoch": 0.571259207218679, "grad_norm": 1.8671875, "learning_rate": 1.9549288462157185e-05, "loss": 0.3263, "step": 13010 }, { "epoch": 0.5713470256978824, "grad_norm": 1.671875, "learning_rate": 1.9542536824421613e-05, "loss": 0.3142, "step": 13012 }, { "epoch": 0.571434844177086, "grad_norm": 1.7109375, "learning_rate": 1.953578560461988e-05, "loss": 0.356, "step": 13014 }, { "epoch": 0.5715226626562895, "grad_norm": 1.6328125, "learning_rate": 1.9529034803269e-05, "loss": 0.3482, "step": 13016 }, { "epoch": 0.571610481135493, "grad_norm": 1.9296875, "learning_rate": 1.9522284420885942e-05, "loss": 0.3243, "step": 13018 }, { "epoch": 0.5716982996146964, "grad_norm": 1.9140625, "learning_rate": 1.9515534457987655e-05, "loss": 0.3365, "step": 13020 }, { "epoch": 0.5717861180938999, "grad_norm": 1.671875, "learning_rate": 1.9508784915091057e-05, "loss": 0.3138, "step": 13022 }, { "epoch": 0.5718739365731034, "grad_norm": 1.609375, "learning_rate": 1.9502035792713026e-05, "loss": 0.3385, "step": 13024 }, { "epoch": 0.5719617550523068, "grad_norm": 1.609375, "learning_rate": 1.9495287091370414e-05, "loss": 0.3446, "step": 13026 }, { "epoch": 0.5720495735315103, "grad_norm": 1.640625, "learning_rate": 1.9488538811580033e-05, "loss": 0.3483, "step": 13028 }, { "epoch": 0.5721373920107139, "grad_norm": 1.546875, "learning_rate": 1.948179095385867e-05, "loss": 0.3579, "step": 13030 }, { "epoch": 0.5722252104899174, "grad_norm": 1.671875, "learning_rate": 1.947504351872308e-05, "loss": 0.3803, "step": 13032 }, { "epoch": 0.5723130289691208, "grad_norm": 1.75, "learning_rate": 1.946829650668998e-05, "loss": 0.3694, "step": 13034 }, { "epoch": 0.5724008474483243, "grad_norm": 1.90625, "learning_rate": 1.9461549918276066e-05, "loss": 0.3297, "step": 13036 }, { "epoch": 0.5724886659275278, "grad_norm": 1.640625, "learning_rate": 1.945480375399798e-05, "loss": 0.3655, "step": 13038 }, { "epoch": 0.5725764844067313, "grad_norm": 2.03125, "learning_rate": 1.944805801437236e-05, "loss": 0.3315, "step": 13040 }, { "epoch": 0.5726643028859347, "grad_norm": 1.9609375, "learning_rate": 1.944131269991579e-05, "loss": 0.3349, "step": 13042 }, { "epoch": 0.5727521213651383, "grad_norm": 1.71875, "learning_rate": 1.9434567811144813e-05, "loss": 0.3429, "step": 13044 }, { "epoch": 0.5728399398443418, "grad_norm": 1.828125, "learning_rate": 1.9427823348575972e-05, "loss": 0.3571, "step": 13046 }, { "epoch": 0.5729277583235453, "grad_norm": 1.6953125, "learning_rate": 1.9421079312725755e-05, "loss": 0.3535, "step": 13048 }, { "epoch": 0.5730155768027487, "grad_norm": 1.9140625, "learning_rate": 1.941433570411062e-05, "loss": 0.298, "step": 13050 }, { "epoch": 0.5731033952819522, "grad_norm": 1.75, "learning_rate": 1.9407592523247002e-05, "loss": 0.3675, "step": 13052 }, { "epoch": 0.5731912137611557, "grad_norm": 1.6015625, "learning_rate": 1.940084977065128e-05, "loss": 0.3698, "step": 13054 }, { "epoch": 0.5732790322403591, "grad_norm": 1.7109375, "learning_rate": 1.9394107446839828e-05, "loss": 0.3116, "step": 13056 }, { "epoch": 0.5733668507195626, "grad_norm": 1.703125, "learning_rate": 1.9387365552328973e-05, "loss": 0.3263, "step": 13058 }, { "epoch": 0.5734546691987662, "grad_norm": 1.6484375, "learning_rate": 1.938062408763501e-05, "loss": 0.3317, "step": 13060 }, { "epoch": 0.5735424876779697, "grad_norm": 1.703125, "learning_rate": 1.9373883053274206e-05, "loss": 0.346, "step": 13062 }, { "epoch": 0.5736303061571731, "grad_norm": 1.671875, "learning_rate": 1.936714244976278e-05, "loss": 0.3404, "step": 13064 }, { "epoch": 0.5737181246363766, "grad_norm": 1.7578125, "learning_rate": 1.9360402277616936e-05, "loss": 0.3712, "step": 13066 }, { "epoch": 0.5738059431155801, "grad_norm": 1.671875, "learning_rate": 1.9353662537352833e-05, "loss": 0.3626, "step": 13068 }, { "epoch": 0.5738937615947836, "grad_norm": 1.5859375, "learning_rate": 1.9346923229486625e-05, "loss": 0.3185, "step": 13070 }, { "epoch": 0.573981580073987, "grad_norm": 1.5234375, "learning_rate": 1.9340184354534376e-05, "loss": 0.4062, "step": 13072 }, { "epoch": 0.5740693985531905, "grad_norm": 1.8203125, "learning_rate": 1.9333445913012165e-05, "loss": 0.3457, "step": 13074 }, { "epoch": 0.5741572170323941, "grad_norm": 1.6015625, "learning_rate": 1.9326707905436026e-05, "loss": 0.3193, "step": 13076 }, { "epoch": 0.5742450355115976, "grad_norm": 1.8046875, "learning_rate": 1.931997033232196e-05, "loss": 0.3622, "step": 13078 }, { "epoch": 0.574332853990801, "grad_norm": 1.765625, "learning_rate": 1.9313233194185925e-05, "loss": 0.336, "step": 13080 }, { "epoch": 0.5744206724700045, "grad_norm": 1.6015625, "learning_rate": 1.9306496491543853e-05, "loss": 0.3208, "step": 13082 }, { "epoch": 0.574508490949208, "grad_norm": 1.7578125, "learning_rate": 1.9299760224911644e-05, "loss": 0.3113, "step": 13084 }, { "epoch": 0.5745963094284114, "grad_norm": 1.8125, "learning_rate": 1.9293024394805166e-05, "loss": 0.3527, "step": 13086 }, { "epoch": 0.5746841279076149, "grad_norm": 1.546875, "learning_rate": 1.928628900174025e-05, "loss": 0.3387, "step": 13088 }, { "epoch": 0.5747719463868185, "grad_norm": 1.7109375, "learning_rate": 1.927955404623269e-05, "loss": 0.3336, "step": 13090 }, { "epoch": 0.574859764866022, "grad_norm": 1.765625, "learning_rate": 1.927281952879825e-05, "loss": 0.3422, "step": 13092 }, { "epoch": 0.5749475833452254, "grad_norm": 1.59375, "learning_rate": 1.9266085449952664e-05, "loss": 0.3387, "step": 13094 }, { "epoch": 0.5750354018244289, "grad_norm": 1.65625, "learning_rate": 1.925935181021163e-05, "loss": 0.3232, "step": 13096 }, { "epoch": 0.5751232203036324, "grad_norm": 1.8203125, "learning_rate": 1.925261861009081e-05, "loss": 0.3756, "step": 13098 }, { "epoch": 0.5752110387828359, "grad_norm": 1.734375, "learning_rate": 1.9245885850105834e-05, "loss": 0.3472, "step": 13100 }, { "epoch": 0.5752988572620393, "grad_norm": 1.5859375, "learning_rate": 1.9239153530772307e-05, "loss": 0.3247, "step": 13102 }, { "epoch": 0.5753866757412428, "grad_norm": 1.7578125, "learning_rate": 1.923242165260578e-05, "loss": 0.3371, "step": 13104 }, { "epoch": 0.5754744942204464, "grad_norm": 1.8515625, "learning_rate": 1.9225690216121782e-05, "loss": 0.3503, "step": 13106 }, { "epoch": 0.5755623126996499, "grad_norm": 1.7265625, "learning_rate": 1.921895922183581e-05, "loss": 0.3884, "step": 13108 }, { "epoch": 0.5756501311788533, "grad_norm": 1.875, "learning_rate": 1.9212228670263326e-05, "loss": 0.322, "step": 13110 }, { "epoch": 0.5757379496580568, "grad_norm": 1.546875, "learning_rate": 1.920549856191976e-05, "loss": 0.3361, "step": 13112 }, { "epoch": 0.5758257681372603, "grad_norm": 1.640625, "learning_rate": 1.9198768897320503e-05, "loss": 0.3715, "step": 13114 }, { "epoch": 0.5759135866164637, "grad_norm": 1.71875, "learning_rate": 1.9192039676980917e-05, "loss": 0.3504, "step": 13116 }, { "epoch": 0.5760014050956672, "grad_norm": 1.640625, "learning_rate": 1.918531090141632e-05, "loss": 0.3315, "step": 13118 }, { "epoch": 0.5760892235748707, "grad_norm": 1.7109375, "learning_rate": 1.9178582571142008e-05, "loss": 0.3459, "step": 13120 }, { "epoch": 0.5761770420540743, "grad_norm": 1.734375, "learning_rate": 1.917185468667324e-05, "loss": 0.3454, "step": 13122 }, { "epoch": 0.5762648605332777, "grad_norm": 1.78125, "learning_rate": 1.916512724852524e-05, "loss": 0.3098, "step": 13124 }, { "epoch": 0.5763526790124812, "grad_norm": 1.6015625, "learning_rate": 1.9158400257213195e-05, "loss": 0.321, "step": 13126 }, { "epoch": 0.5764404974916847, "grad_norm": 1.84375, "learning_rate": 1.9151673713252253e-05, "loss": 0.3499, "step": 13128 }, { "epoch": 0.5765283159708882, "grad_norm": 1.84375, "learning_rate": 1.914494761715754e-05, "loss": 0.3356, "step": 13130 }, { "epoch": 0.5766161344500916, "grad_norm": 1.859375, "learning_rate": 1.9138221969444153e-05, "loss": 0.3424, "step": 13132 }, { "epoch": 0.5767039529292951, "grad_norm": 1.6640625, "learning_rate": 1.9131496770627123e-05, "loss": 0.3522, "step": 13134 }, { "epoch": 0.5767917714084987, "grad_norm": 1.65625, "learning_rate": 1.9124772021221476e-05, "loss": 0.3515, "step": 13136 }, { "epoch": 0.5768795898877022, "grad_norm": 1.7421875, "learning_rate": 1.91180477217422e-05, "loss": 0.3192, "step": 13138 }, { "epoch": 0.5769674083669056, "grad_norm": 1.75, "learning_rate": 1.911132387270424e-05, "loss": 0.353, "step": 13140 }, { "epoch": 0.5770552268461091, "grad_norm": 1.671875, "learning_rate": 1.9104600474622505e-05, "loss": 0.3523, "step": 13142 }, { "epoch": 0.5771430453253126, "grad_norm": 1.6015625, "learning_rate": 1.9097877528011882e-05, "loss": 0.3311, "step": 13144 }, { "epoch": 0.577230863804516, "grad_norm": 1.7890625, "learning_rate": 1.9091155033387208e-05, "loss": 0.3442, "step": 13146 }, { "epoch": 0.5773186822837195, "grad_norm": 1.8125, "learning_rate": 1.9084432991263302e-05, "loss": 0.3278, "step": 13148 }, { "epoch": 0.577406500762923, "grad_norm": 1.6796875, "learning_rate": 1.9077711402154942e-05, "loss": 0.3689, "step": 13150 }, { "epoch": 0.5774943192421266, "grad_norm": 1.640625, "learning_rate": 1.907099026657685e-05, "loss": 0.3226, "step": 13152 }, { "epoch": 0.57758213772133, "grad_norm": 1.7734375, "learning_rate": 1.906426958504375e-05, "loss": 0.3529, "step": 13154 }, { "epoch": 0.5776699562005335, "grad_norm": 1.734375, "learning_rate": 1.905754935807031e-05, "loss": 0.3441, "step": 13156 }, { "epoch": 0.577757774679737, "grad_norm": 1.6171875, "learning_rate": 1.905082958617116e-05, "loss": 0.3536, "step": 13158 }, { "epoch": 0.5778455931589405, "grad_norm": 1.703125, "learning_rate": 1.9044110269860916e-05, "loss": 0.3286, "step": 13160 }, { "epoch": 0.5779334116381439, "grad_norm": 1.6796875, "learning_rate": 1.9037391409654133e-05, "loss": 0.3429, "step": 13162 }, { "epoch": 0.5780212301173474, "grad_norm": 1.71875, "learning_rate": 1.9030673006065346e-05, "loss": 0.3406, "step": 13164 }, { "epoch": 0.5781090485965509, "grad_norm": 1.6796875, "learning_rate": 1.9023955059609042e-05, "loss": 0.3143, "step": 13166 }, { "epoch": 0.5781968670757545, "grad_norm": 1.6484375, "learning_rate": 1.9017237570799697e-05, "loss": 0.3395, "step": 13168 }, { "epoch": 0.5782846855549579, "grad_norm": 1.8671875, "learning_rate": 1.901052054015173e-05, "loss": 0.3471, "step": 13170 }, { "epoch": 0.5783725040341614, "grad_norm": 1.65625, "learning_rate": 1.9003803968179537e-05, "loss": 0.3708, "step": 13172 }, { "epoch": 0.5784603225133649, "grad_norm": 1.53125, "learning_rate": 1.899708785539747e-05, "loss": 0.3453, "step": 13174 }, { "epoch": 0.5785481409925683, "grad_norm": 1.671875, "learning_rate": 1.8990372202319864e-05, "loss": 0.3442, "step": 13176 }, { "epoch": 0.5786359594717718, "grad_norm": 1.65625, "learning_rate": 1.898365700946099e-05, "loss": 0.3297, "step": 13178 }, { "epoch": 0.5787237779509753, "grad_norm": 1.65625, "learning_rate": 1.8976942277335103e-05, "loss": 0.3381, "step": 13180 }, { "epoch": 0.5788115964301788, "grad_norm": 1.703125, "learning_rate": 1.897022800645642e-05, "loss": 0.335, "step": 13182 }, { "epoch": 0.5788994149093823, "grad_norm": 1.8671875, "learning_rate": 1.896351419733912e-05, "loss": 0.3333, "step": 13184 }, { "epoch": 0.5789872333885858, "grad_norm": 1.6328125, "learning_rate": 1.895680085049736e-05, "loss": 0.3333, "step": 13186 }, { "epoch": 0.5790750518677893, "grad_norm": 1.734375, "learning_rate": 1.895008796644523e-05, "loss": 0.3366, "step": 13188 }, { "epoch": 0.5791628703469928, "grad_norm": 1.609375, "learning_rate": 1.8943375545696816e-05, "loss": 0.3414, "step": 13190 }, { "epoch": 0.5792506888261962, "grad_norm": 1.7890625, "learning_rate": 1.8936663588766154e-05, "loss": 0.3546, "step": 13192 }, { "epoch": 0.5793385073053997, "grad_norm": 1.7109375, "learning_rate": 1.8929952096167258e-05, "loss": 0.3291, "step": 13194 }, { "epoch": 0.5794263257846032, "grad_norm": 1.65625, "learning_rate": 1.8923241068414076e-05, "loss": 0.3341, "step": 13196 }, { "epoch": 0.5795141442638068, "grad_norm": 1.6875, "learning_rate": 1.891653050602055e-05, "loss": 0.3349, "step": 13198 }, { "epoch": 0.5796019627430102, "grad_norm": 1.5625, "learning_rate": 1.8909820409500573e-05, "loss": 0.3762, "step": 13200 }, { "epoch": 0.5796897812222137, "grad_norm": 1.734375, "learning_rate": 1.8903110779368018e-05, "loss": 0.3333, "step": 13202 }, { "epoch": 0.5797775997014172, "grad_norm": 1.6796875, "learning_rate": 1.8896401616136692e-05, "loss": 0.3159, "step": 13204 }, { "epoch": 0.5798654181806207, "grad_norm": 1.6640625, "learning_rate": 1.8889692920320397e-05, "loss": 0.3489, "step": 13206 }, { "epoch": 0.5799532366598241, "grad_norm": 1.7265625, "learning_rate": 1.888298469243288e-05, "loss": 0.3647, "step": 13208 }, { "epoch": 0.5800410551390276, "grad_norm": 1.6640625, "learning_rate": 1.8876276932987864e-05, "loss": 0.3274, "step": 13210 }, { "epoch": 0.5801288736182311, "grad_norm": 1.6171875, "learning_rate": 1.8869569642499032e-05, "loss": 0.328, "step": 13212 }, { "epoch": 0.5802166920974346, "grad_norm": 1.65625, "learning_rate": 1.8862862821480025e-05, "loss": 0.3536, "step": 13214 }, { "epoch": 0.5803045105766381, "grad_norm": 1.71875, "learning_rate": 1.885615647044445e-05, "loss": 0.331, "step": 13216 }, { "epoch": 0.5803923290558416, "grad_norm": 1.6953125, "learning_rate": 1.8849450589905887e-05, "loss": 0.3362, "step": 13218 }, { "epoch": 0.5804801475350451, "grad_norm": 1.65625, "learning_rate": 1.884274518037787e-05, "loss": 0.3125, "step": 13220 }, { "epoch": 0.5805679660142485, "grad_norm": 1.8828125, "learning_rate": 1.883604024237391e-05, "loss": 0.3284, "step": 13222 }, { "epoch": 0.580655784493452, "grad_norm": 1.7109375, "learning_rate": 1.8829335776407464e-05, "loss": 0.3651, "step": 13224 }, { "epoch": 0.5807436029726555, "grad_norm": 1.8671875, "learning_rate": 1.8822631782991967e-05, "loss": 0.337, "step": 13226 }, { "epoch": 0.580831421451859, "grad_norm": 1.5703125, "learning_rate": 1.8815928262640804e-05, "loss": 0.3373, "step": 13228 }, { "epoch": 0.5809192399310625, "grad_norm": 1.7109375, "learning_rate": 1.8809225215867334e-05, "loss": 0.3681, "step": 13230 }, { "epoch": 0.581007058410266, "grad_norm": 1.8046875, "learning_rate": 1.8802522643184882e-05, "loss": 0.3509, "step": 13232 }, { "epoch": 0.5810948768894695, "grad_norm": 1.5546875, "learning_rate": 1.879582054510673e-05, "loss": 0.3459, "step": 13234 }, { "epoch": 0.581182695368673, "grad_norm": 1.8046875, "learning_rate": 1.878911892214613e-05, "loss": 0.3446, "step": 13236 }, { "epoch": 0.5812705138478764, "grad_norm": 1.6796875, "learning_rate": 1.8782417774816295e-05, "loss": 0.3414, "step": 13238 }, { "epoch": 0.5813583323270799, "grad_norm": 1.5859375, "learning_rate": 1.8775717103630395e-05, "loss": 0.3279, "step": 13240 }, { "epoch": 0.5814461508062834, "grad_norm": 1.59375, "learning_rate": 1.876901690910157e-05, "loss": 0.3356, "step": 13242 }, { "epoch": 0.581533969285487, "grad_norm": 1.6328125, "learning_rate": 1.8762317191742924e-05, "loss": 0.3271, "step": 13244 }, { "epoch": 0.5816217877646904, "grad_norm": 1.671875, "learning_rate": 1.875561795206752e-05, "loss": 0.3237, "step": 13246 }, { "epoch": 0.5817096062438939, "grad_norm": 1.7578125, "learning_rate": 1.8748919190588398e-05, "loss": 0.3382, "step": 13248 }, { "epoch": 0.5817974247230974, "grad_norm": 1.7421875, "learning_rate": 1.8742220907818535e-05, "loss": 0.3319, "step": 13250 }, { "epoch": 0.5818852432023008, "grad_norm": 1.6875, "learning_rate": 1.87355231042709e-05, "loss": 0.3463, "step": 13252 }, { "epoch": 0.5819730616815043, "grad_norm": 1.6484375, "learning_rate": 1.8728825780458415e-05, "loss": 0.3481, "step": 13254 }, { "epoch": 0.5820608801607078, "grad_norm": 1.640625, "learning_rate": 1.8722128936893946e-05, "loss": 0.3337, "step": 13256 }, { "epoch": 0.5821486986399113, "grad_norm": 1.671875, "learning_rate": 1.8715432574090344e-05, "loss": 0.3141, "step": 13258 }, { "epoch": 0.5822365171191148, "grad_norm": 1.640625, "learning_rate": 1.8708736692560424e-05, "loss": 0.3838, "step": 13260 }, { "epoch": 0.5823243355983183, "grad_norm": 1.6796875, "learning_rate": 1.8702041292816963e-05, "loss": 0.3102, "step": 13262 }, { "epoch": 0.5824121540775218, "grad_norm": 1.796875, "learning_rate": 1.8695346375372686e-05, "loss": 0.3297, "step": 13264 }, { "epoch": 0.5824999725567253, "grad_norm": 1.7734375, "learning_rate": 1.868865194074029e-05, "loss": 0.3576, "step": 13266 }, { "epoch": 0.5825877910359287, "grad_norm": 1.953125, "learning_rate": 1.868195798943244e-05, "loss": 0.3449, "step": 13268 }, { "epoch": 0.5826756095151322, "grad_norm": 1.6640625, "learning_rate": 1.8675264521961765e-05, "loss": 0.3412, "step": 13270 }, { "epoch": 0.5827634279943357, "grad_norm": 1.71875, "learning_rate": 1.866857153884085e-05, "loss": 0.3659, "step": 13272 }, { "epoch": 0.5828512464735391, "grad_norm": 1.7734375, "learning_rate": 1.8661879040582254e-05, "loss": 0.3646, "step": 13274 }, { "epoch": 0.5829390649527427, "grad_norm": 1.7265625, "learning_rate": 1.8655187027698467e-05, "loss": 0.3396, "step": 13276 }, { "epoch": 0.5830268834319462, "grad_norm": 1.6484375, "learning_rate": 1.864849550070198e-05, "loss": 0.3503, "step": 13278 }, { "epoch": 0.5831147019111497, "grad_norm": 1.8125, "learning_rate": 1.8641804460105233e-05, "loss": 0.3492, "step": 13280 }, { "epoch": 0.5832025203903531, "grad_norm": 1.6484375, "learning_rate": 1.8635113906420622e-05, "loss": 0.3658, "step": 13282 }, { "epoch": 0.5832903388695566, "grad_norm": 1.796875, "learning_rate": 1.862842384016052e-05, "loss": 0.3581, "step": 13284 }, { "epoch": 0.5833781573487601, "grad_norm": 1.6640625, "learning_rate": 1.862173426183725e-05, "loss": 0.3505, "step": 13286 }, { "epoch": 0.5834659758279636, "grad_norm": 1.9765625, "learning_rate": 1.8615045171963098e-05, "loss": 0.3254, "step": 13288 }, { "epoch": 0.5835537943071671, "grad_norm": 1.8515625, "learning_rate": 1.860835657105031e-05, "loss": 0.318, "step": 13290 }, { "epoch": 0.5836416127863706, "grad_norm": 1.671875, "learning_rate": 1.8601668459611106e-05, "loss": 0.3535, "step": 13292 }, { "epoch": 0.5837294312655741, "grad_norm": 1.859375, "learning_rate": 1.859498083815767e-05, "loss": 0.3671, "step": 13294 }, { "epoch": 0.5838172497447776, "grad_norm": 1.65625, "learning_rate": 1.8588293707202133e-05, "loss": 0.3476, "step": 13296 }, { "epoch": 0.583905068223981, "grad_norm": 1.6953125, "learning_rate": 1.858160706725661e-05, "loss": 0.3515, "step": 13298 }, { "epoch": 0.5839928867031845, "grad_norm": 1.6875, "learning_rate": 1.857492091883315e-05, "loss": 0.3621, "step": 13300 }, { "epoch": 0.584080705182388, "grad_norm": 1.6171875, "learning_rate": 1.8568235262443782e-05, "loss": 0.3171, "step": 13302 }, { "epoch": 0.5841685236615914, "grad_norm": 1.6171875, "learning_rate": 1.8561550098600505e-05, "loss": 0.351, "step": 13304 }, { "epoch": 0.584256342140795, "grad_norm": 2.046875, "learning_rate": 1.8554865427815262e-05, "loss": 0.3387, "step": 13306 }, { "epoch": 0.5843441606199985, "grad_norm": 1.765625, "learning_rate": 1.854818125059997e-05, "loss": 0.3562, "step": 13308 }, { "epoch": 0.584431979099202, "grad_norm": 1.8984375, "learning_rate": 1.8541497567466508e-05, "loss": 0.3394, "step": 13310 }, { "epoch": 0.5845197975784054, "grad_norm": 1.65625, "learning_rate": 1.853481437892671e-05, "loss": 0.3271, "step": 13312 }, { "epoch": 0.5846076160576089, "grad_norm": 1.6171875, "learning_rate": 1.852813168549237e-05, "loss": 0.3523, "step": 13314 }, { "epoch": 0.5846954345368124, "grad_norm": 1.75, "learning_rate": 1.8521449487675275e-05, "loss": 0.3484, "step": 13316 }, { "epoch": 0.5847832530160159, "grad_norm": 1.7734375, "learning_rate": 1.851476778598712e-05, "loss": 0.3407, "step": 13318 }, { "epoch": 0.5848710714952193, "grad_norm": 1.6328125, "learning_rate": 1.8508086580939598e-05, "loss": 0.3632, "step": 13320 }, { "epoch": 0.5849588899744229, "grad_norm": 1.859375, "learning_rate": 1.8501405873044363e-05, "loss": 0.3391, "step": 13322 }, { "epoch": 0.5850467084536264, "grad_norm": 1.5859375, "learning_rate": 1.8494725662813028e-05, "loss": 0.3268, "step": 13324 }, { "epoch": 0.5851345269328299, "grad_norm": 1.9609375, "learning_rate": 1.848804595075716e-05, "loss": 0.3522, "step": 13326 }, { "epoch": 0.5852223454120333, "grad_norm": 1.6171875, "learning_rate": 1.848136673738829e-05, "loss": 0.316, "step": 13328 }, { "epoch": 0.5853101638912368, "grad_norm": 1.8515625, "learning_rate": 1.847468802321792e-05, "loss": 0.3361, "step": 13330 }, { "epoch": 0.5853979823704403, "grad_norm": 1.7734375, "learning_rate": 1.8468009808757505e-05, "loss": 0.3726, "step": 13332 }, { "epoch": 0.5854858008496437, "grad_norm": 1.828125, "learning_rate": 1.8461332094518465e-05, "loss": 0.345, "step": 13334 }, { "epoch": 0.5855736193288473, "grad_norm": 1.6640625, "learning_rate": 1.8454654881012184e-05, "loss": 0.3274, "step": 13336 }, { "epoch": 0.5856614378080508, "grad_norm": 1.6640625, "learning_rate": 1.8447978168750003e-05, "loss": 0.3359, "step": 13338 }, { "epoch": 0.5857492562872543, "grad_norm": 1.90625, "learning_rate": 1.844130195824322e-05, "loss": 0.387, "step": 13340 }, { "epoch": 0.5858370747664577, "grad_norm": 1.6796875, "learning_rate": 1.84346262500031e-05, "loss": 0.3119, "step": 13342 }, { "epoch": 0.5859248932456612, "grad_norm": 1.90625, "learning_rate": 1.842795104454088e-05, "loss": 0.3082, "step": 13344 }, { "epoch": 0.5860127117248647, "grad_norm": 1.6875, "learning_rate": 1.8421276342367756e-05, "loss": 0.3793, "step": 13346 }, { "epoch": 0.5861005302040682, "grad_norm": 1.640625, "learning_rate": 1.8414602143994856e-05, "loss": 0.3599, "step": 13348 }, { "epoch": 0.5861883486832716, "grad_norm": 1.6796875, "learning_rate": 1.8407928449933304e-05, "loss": 0.3041, "step": 13350 }, { "epoch": 0.5862761671624752, "grad_norm": 1.6015625, "learning_rate": 1.8401255260694173e-05, "loss": 0.315, "step": 13352 }, { "epoch": 0.5863639856416787, "grad_norm": 1.6953125, "learning_rate": 1.8394582576788487e-05, "loss": 0.3373, "step": 13354 }, { "epoch": 0.5864518041208822, "grad_norm": 1.7578125, "learning_rate": 1.838791039872726e-05, "loss": 0.3263, "step": 13356 }, { "epoch": 0.5865396226000856, "grad_norm": 1.90625, "learning_rate": 1.8381238727021433e-05, "loss": 0.3309, "step": 13358 }, { "epoch": 0.5866274410792891, "grad_norm": 1.8515625, "learning_rate": 1.837456756218194e-05, "loss": 0.3174, "step": 13360 }, { "epoch": 0.5867152595584926, "grad_norm": 1.5546875, "learning_rate": 1.8367896904719645e-05, "loss": 0.3269, "step": 13362 }, { "epoch": 0.586803078037696, "grad_norm": 1.8515625, "learning_rate": 1.8361226755145394e-05, "loss": 0.3741, "step": 13364 }, { "epoch": 0.5868908965168995, "grad_norm": 1.59375, "learning_rate": 1.835455711396999e-05, "loss": 0.3335, "step": 13366 }, { "epoch": 0.5869787149961031, "grad_norm": 1.578125, "learning_rate": 1.8347887981704195e-05, "loss": 0.3346, "step": 13368 }, { "epoch": 0.5870665334753066, "grad_norm": 1.7890625, "learning_rate": 1.8341219358858734e-05, "loss": 0.3419, "step": 13370 }, { "epoch": 0.58715435195451, "grad_norm": 1.5390625, "learning_rate": 1.833455124594429e-05, "loss": 0.3505, "step": 13372 }, { "epoch": 0.5872421704337135, "grad_norm": 1.71875, "learning_rate": 1.8327883643471514e-05, "loss": 0.3448, "step": 13374 }, { "epoch": 0.587329988912917, "grad_norm": 1.765625, "learning_rate": 1.8321216551951002e-05, "loss": 0.3481, "step": 13376 }, { "epoch": 0.5874178073921205, "grad_norm": 1.828125, "learning_rate": 1.8314549971893342e-05, "loss": 0.3309, "step": 13378 }, { "epoch": 0.5875056258713239, "grad_norm": 1.609375, "learning_rate": 1.8307883903809035e-05, "loss": 0.3391, "step": 13380 }, { "epoch": 0.5875934443505274, "grad_norm": 1.71875, "learning_rate": 1.8301218348208583e-05, "loss": 0.3251, "step": 13382 }, { "epoch": 0.587681262829731, "grad_norm": 1.640625, "learning_rate": 1.8294553305602434e-05, "loss": 0.3132, "step": 13384 }, { "epoch": 0.5877690813089345, "grad_norm": 1.6015625, "learning_rate": 1.8287888776501007e-05, "loss": 0.3206, "step": 13386 }, { "epoch": 0.5878568997881379, "grad_norm": 1.65625, "learning_rate": 1.828122476141466e-05, "loss": 0.3317, "step": 13388 }, { "epoch": 0.5879447182673414, "grad_norm": 1.6875, "learning_rate": 1.8274561260853733e-05, "loss": 0.3096, "step": 13390 }, { "epoch": 0.5880325367465449, "grad_norm": 1.6796875, "learning_rate": 1.8267898275328516e-05, "loss": 0.3219, "step": 13392 }, { "epoch": 0.5881203552257483, "grad_norm": 1.6796875, "learning_rate": 1.8261235805349263e-05, "loss": 0.322, "step": 13394 }, { "epoch": 0.5882081737049518, "grad_norm": 1.6875, "learning_rate": 1.8254573851426195e-05, "loss": 0.354, "step": 13396 }, { "epoch": 0.5882959921841554, "grad_norm": 1.6171875, "learning_rate": 1.824791241406947e-05, "loss": 0.3446, "step": 13398 }, { "epoch": 0.5883838106633589, "grad_norm": 1.75, "learning_rate": 1.824125149378923e-05, "loss": 0.3736, "step": 13400 }, { "epoch": 0.5884716291425623, "grad_norm": 1.65625, "learning_rate": 1.823459109109557e-05, "loss": 0.3377, "step": 13402 }, { "epoch": 0.5885594476217658, "grad_norm": 1.640625, "learning_rate": 1.822793120649855e-05, "loss": 0.3308, "step": 13404 }, { "epoch": 0.5886472661009693, "grad_norm": 1.828125, "learning_rate": 1.822127184050818e-05, "loss": 0.3304, "step": 13406 }, { "epoch": 0.5887350845801728, "grad_norm": 1.7109375, "learning_rate": 1.821461299363445e-05, "loss": 0.3367, "step": 13408 }, { "epoch": 0.5888229030593762, "grad_norm": 1.8046875, "learning_rate": 1.820795466638727e-05, "loss": 0.3298, "step": 13410 }, { "epoch": 0.5889107215385797, "grad_norm": 1.7734375, "learning_rate": 1.8201296859276555e-05, "loss": 0.3305, "step": 13412 }, { "epoch": 0.5889985400177833, "grad_norm": 1.6640625, "learning_rate": 1.8194639572812157e-05, "loss": 0.3244, "step": 13414 }, { "epoch": 0.5890863584969868, "grad_norm": 1.5625, "learning_rate": 1.8187982807503885e-05, "loss": 0.3538, "step": 13416 }, { "epoch": 0.5891741769761902, "grad_norm": 1.671875, "learning_rate": 1.818132656386152e-05, "loss": 0.3192, "step": 13418 }, { "epoch": 0.5892619954553937, "grad_norm": 1.671875, "learning_rate": 1.817467084239481e-05, "loss": 0.3559, "step": 13420 }, { "epoch": 0.5893498139345972, "grad_norm": 1.578125, "learning_rate": 1.8168015643613445e-05, "loss": 0.333, "step": 13422 }, { "epoch": 0.5894376324138006, "grad_norm": 1.6953125, "learning_rate": 1.8161360968027073e-05, "loss": 0.324, "step": 13424 }, { "epoch": 0.5895254508930041, "grad_norm": 1.5703125, "learning_rate": 1.8154706816145315e-05, "loss": 0.3465, "step": 13426 }, { "epoch": 0.5896132693722076, "grad_norm": 1.671875, "learning_rate": 1.8148053188477754e-05, "loss": 0.3364, "step": 13428 }, { "epoch": 0.5897010878514112, "grad_norm": 1.7265625, "learning_rate": 1.814140008553392e-05, "loss": 0.3523, "step": 13430 }, { "epoch": 0.5897889063306146, "grad_norm": 1.9375, "learning_rate": 1.8134747507823323e-05, "loss": 0.3177, "step": 13432 }, { "epoch": 0.5898767248098181, "grad_norm": 1.7734375, "learning_rate": 1.8128095455855395e-05, "loss": 0.3485, "step": 13434 }, { "epoch": 0.5899645432890216, "grad_norm": 1.6640625, "learning_rate": 1.8121443930139568e-05, "loss": 0.3294, "step": 13436 }, { "epoch": 0.5900523617682251, "grad_norm": 1.8046875, "learning_rate": 1.8114792931185223e-05, "loss": 0.3434, "step": 13438 }, { "epoch": 0.5901401802474285, "grad_norm": 1.6640625, "learning_rate": 1.8108142459501674e-05, "loss": 0.3251, "step": 13440 }, { "epoch": 0.590227998726632, "grad_norm": 1.7421875, "learning_rate": 1.810149251559823e-05, "loss": 0.3167, "step": 13442 }, { "epoch": 0.5903158172058356, "grad_norm": 1.6953125, "learning_rate": 1.8094843099984144e-05, "loss": 0.3115, "step": 13444 }, { "epoch": 0.5904036356850391, "grad_norm": 1.6484375, "learning_rate": 1.8088194213168626e-05, "loss": 0.34, "step": 13446 }, { "epoch": 0.5904914541642425, "grad_norm": 1.6328125, "learning_rate": 1.8081545855660858e-05, "loss": 0.3425, "step": 13448 }, { "epoch": 0.590579272643446, "grad_norm": 1.6640625, "learning_rate": 1.807489802796996e-05, "loss": 0.3633, "step": 13450 }, { "epoch": 0.5906670911226495, "grad_norm": 1.703125, "learning_rate": 1.8068250730605034e-05, "loss": 0.332, "step": 13452 }, { "epoch": 0.590754909601853, "grad_norm": 1.6796875, "learning_rate": 1.8061603964075125e-05, "loss": 0.3062, "step": 13454 }, { "epoch": 0.5908427280810564, "grad_norm": 1.6328125, "learning_rate": 1.805495772888925e-05, "loss": 0.3278, "step": 13456 }, { "epoch": 0.5909305465602599, "grad_norm": 1.5390625, "learning_rate": 1.8048312025556384e-05, "loss": 0.3581, "step": 13458 }, { "epoch": 0.5910183650394635, "grad_norm": 1.703125, "learning_rate": 1.8041666854585443e-05, "loss": 0.3449, "step": 13460 }, { "epoch": 0.591106183518667, "grad_norm": 1.8203125, "learning_rate": 1.803502221648532e-05, "loss": 0.3533, "step": 13462 }, { "epoch": 0.5911940019978704, "grad_norm": 1.7109375, "learning_rate": 1.8028378111764864e-05, "loss": 0.2833, "step": 13464 }, { "epoch": 0.5912818204770739, "grad_norm": 1.5859375, "learning_rate": 1.802173454093289e-05, "loss": 0.338, "step": 13466 }, { "epoch": 0.5913696389562774, "grad_norm": 1.8828125, "learning_rate": 1.801509150449815e-05, "loss": 0.3585, "step": 13468 }, { "epoch": 0.5914574574354808, "grad_norm": 1.6953125, "learning_rate": 1.8008449002969395e-05, "loss": 0.3445, "step": 13470 }, { "epoch": 0.5915452759146843, "grad_norm": 1.65625, "learning_rate": 1.8001807036855278e-05, "loss": 0.3491, "step": 13472 }, { "epoch": 0.5916330943938878, "grad_norm": 1.59375, "learning_rate": 1.799516560666446e-05, "loss": 0.3108, "step": 13474 }, { "epoch": 0.5917209128730914, "grad_norm": 1.7421875, "learning_rate": 1.7988524712905532e-05, "loss": 0.3521, "step": 13476 }, { "epoch": 0.5918087313522948, "grad_norm": 1.7890625, "learning_rate": 1.7981884356087065e-05, "loss": 0.3642, "step": 13478 }, { "epoch": 0.5918965498314983, "grad_norm": 1.7421875, "learning_rate": 1.7975244536717576e-05, "loss": 0.3198, "step": 13480 }, { "epoch": 0.5919843683107018, "grad_norm": 1.6484375, "learning_rate": 1.7968605255305542e-05, "loss": 0.344, "step": 13482 }, { "epoch": 0.5920721867899053, "grad_norm": 1.671875, "learning_rate": 1.7961966512359414e-05, "loss": 0.3449, "step": 13484 }, { "epoch": 0.5921600052691087, "grad_norm": 1.53125, "learning_rate": 1.7955328308387563e-05, "loss": 0.3294, "step": 13486 }, { "epoch": 0.5922478237483122, "grad_norm": 1.6015625, "learning_rate": 1.794869064389837e-05, "loss": 0.3317, "step": 13488 }, { "epoch": 0.5923356422275158, "grad_norm": 1.703125, "learning_rate": 1.7942053519400133e-05, "loss": 0.3468, "step": 13490 }, { "epoch": 0.5924234607067193, "grad_norm": 1.53125, "learning_rate": 1.7935416935401128e-05, "loss": 0.3326, "step": 13492 }, { "epoch": 0.5925112791859227, "grad_norm": 1.609375, "learning_rate": 1.7928780892409592e-05, "loss": 0.32, "step": 13494 }, { "epoch": 0.5925990976651262, "grad_norm": 1.6015625, "learning_rate": 1.7922145390933707e-05, "loss": 0.3368, "step": 13496 }, { "epoch": 0.5926869161443297, "grad_norm": 1.71875, "learning_rate": 1.7915510431481625e-05, "loss": 0.3298, "step": 13498 }, { "epoch": 0.5927747346235331, "grad_norm": 1.796875, "learning_rate": 1.790887601456146e-05, "loss": 0.3706, "step": 13500 }, { "epoch": 0.5928625531027366, "grad_norm": 1.6875, "learning_rate": 1.7902242140681264e-05, "loss": 0.3379, "step": 13502 }, { "epoch": 0.5929503715819401, "grad_norm": 1.578125, "learning_rate": 1.7895608810349062e-05, "loss": 0.3249, "step": 13504 }, { "epoch": 0.5930381900611437, "grad_norm": 1.6953125, "learning_rate": 1.788897602407284e-05, "loss": 0.3453, "step": 13506 }, { "epoch": 0.5931260085403471, "grad_norm": 1.6640625, "learning_rate": 1.7882343782360546e-05, "loss": 0.3356, "step": 13508 }, { "epoch": 0.5932138270195506, "grad_norm": 1.59375, "learning_rate": 1.787571208572007e-05, "loss": 0.3301, "step": 13510 }, { "epoch": 0.5933016454987541, "grad_norm": 1.6640625, "learning_rate": 1.7869080934659265e-05, "loss": 0.3048, "step": 13512 }, { "epoch": 0.5933894639779576, "grad_norm": 1.7890625, "learning_rate": 1.7862450329685952e-05, "loss": 0.3584, "step": 13514 }, { "epoch": 0.593477282457161, "grad_norm": 1.625, "learning_rate": 1.7855820271307906e-05, "loss": 0.3314, "step": 13516 }, { "epoch": 0.5935651009363645, "grad_norm": 1.5859375, "learning_rate": 1.7849190760032853e-05, "loss": 0.3615, "step": 13518 }, { "epoch": 0.593652919415568, "grad_norm": 1.65625, "learning_rate": 1.7842561796368496e-05, "loss": 0.3443, "step": 13520 }, { "epoch": 0.5937407378947716, "grad_norm": 1.8828125, "learning_rate": 1.7835933380822462e-05, "loss": 0.3515, "step": 13522 }, { "epoch": 0.593828556373975, "grad_norm": 1.703125, "learning_rate": 1.7829305513902366e-05, "loss": 0.3048, "step": 13524 }, { "epoch": 0.5939163748531785, "grad_norm": 1.6640625, "learning_rate": 1.782267819611578e-05, "loss": 0.3268, "step": 13526 }, { "epoch": 0.594004193332382, "grad_norm": 1.5234375, "learning_rate": 1.7816051427970213e-05, "loss": 0.3424, "step": 13528 }, { "epoch": 0.5940920118115854, "grad_norm": 1.6484375, "learning_rate": 1.780942520997316e-05, "loss": 0.3329, "step": 13530 }, { "epoch": 0.5941798302907889, "grad_norm": 1.5625, "learning_rate": 1.780279954263204e-05, "loss": 0.3417, "step": 13532 }, { "epoch": 0.5942676487699924, "grad_norm": 1.859375, "learning_rate": 1.7796174426454255e-05, "loss": 0.3385, "step": 13534 }, { "epoch": 0.5943554672491959, "grad_norm": 1.5859375, "learning_rate": 1.7789549861947165e-05, "loss": 0.3405, "step": 13536 }, { "epoch": 0.5944432857283994, "grad_norm": 1.5625, "learning_rate": 1.778292584961807e-05, "loss": 0.3766, "step": 13538 }, { "epoch": 0.5945311042076029, "grad_norm": 1.6484375, "learning_rate": 1.7776302389974243e-05, "loss": 0.3446, "step": 13540 }, { "epoch": 0.5946189226868064, "grad_norm": 1.71875, "learning_rate": 1.776967948352291e-05, "loss": 0.318, "step": 13542 }, { "epoch": 0.5947067411660099, "grad_norm": 1.6796875, "learning_rate": 1.7763057130771254e-05, "loss": 0.353, "step": 13544 }, { "epoch": 0.5947945596452133, "grad_norm": 1.6328125, "learning_rate": 1.7756435332226424e-05, "loss": 0.3147, "step": 13546 }, { "epoch": 0.5948823781244168, "grad_norm": 1.7265625, "learning_rate": 1.774981408839551e-05, "loss": 0.3275, "step": 13548 }, { "epoch": 0.5949701966036203, "grad_norm": 1.578125, "learning_rate": 1.774319339978557e-05, "loss": 0.3295, "step": 13550 }, { "epoch": 0.5950580150828239, "grad_norm": 1.75, "learning_rate": 1.7736573266903617e-05, "loss": 0.3415, "step": 13552 }, { "epoch": 0.5951458335620273, "grad_norm": 1.6875, "learning_rate": 1.7729953690256625e-05, "loss": 0.3136, "step": 13554 }, { "epoch": 0.5952336520412308, "grad_norm": 1.6328125, "learning_rate": 1.772333467035153e-05, "loss": 0.3453, "step": 13556 }, { "epoch": 0.5953214705204343, "grad_norm": 1.609375, "learning_rate": 1.7716716207695202e-05, "loss": 0.3261, "step": 13558 }, { "epoch": 0.5954092889996377, "grad_norm": 2.015625, "learning_rate": 1.7710098302794495e-05, "loss": 0.3554, "step": 13560 }, { "epoch": 0.5954971074788412, "grad_norm": 1.671875, "learning_rate": 1.7703480956156215e-05, "loss": 0.3133, "step": 13562 }, { "epoch": 0.5955849259580447, "grad_norm": 1.875, "learning_rate": 1.7696864168287105e-05, "loss": 0.3392, "step": 13564 }, { "epoch": 0.5956727444372482, "grad_norm": 1.7734375, "learning_rate": 1.7690247939693887e-05, "loss": 0.3364, "step": 13566 }, { "epoch": 0.5957605629164517, "grad_norm": 1.7421875, "learning_rate": 1.7683632270883233e-05, "loss": 0.3271, "step": 13568 }, { "epoch": 0.5958483813956552, "grad_norm": 1.7109375, "learning_rate": 1.7677017162361776e-05, "loss": 0.3657, "step": 13570 }, { "epoch": 0.5959361998748587, "grad_norm": 1.609375, "learning_rate": 1.7670402614636104e-05, "loss": 0.327, "step": 13572 }, { "epoch": 0.5960240183540622, "grad_norm": 1.625, "learning_rate": 1.7663788628212752e-05, "loss": 0.3258, "step": 13574 }, { "epoch": 0.5961118368332656, "grad_norm": 1.6796875, "learning_rate": 1.7657175203598222e-05, "loss": 0.3347, "step": 13576 }, { "epoch": 0.5961996553124691, "grad_norm": 1.7578125, "learning_rate": 1.765056234129898e-05, "loss": 0.3133, "step": 13578 }, { "epoch": 0.5962874737916726, "grad_norm": 1.84375, "learning_rate": 1.7643950041821434e-05, "loss": 0.3587, "step": 13580 }, { "epoch": 0.596375292270876, "grad_norm": 1.8125, "learning_rate": 1.763733830567196e-05, "loss": 0.3578, "step": 13582 }, { "epoch": 0.5964631107500796, "grad_norm": 1.5546875, "learning_rate": 1.763072713335688e-05, "loss": 0.3336, "step": 13584 }, { "epoch": 0.5965509292292831, "grad_norm": 1.7421875, "learning_rate": 1.7624116525382482e-05, "loss": 0.3035, "step": 13586 }, { "epoch": 0.5966387477084866, "grad_norm": 1.546875, "learning_rate": 1.761750648225501e-05, "loss": 0.33, "step": 13588 }, { "epoch": 0.59672656618769, "grad_norm": 1.65625, "learning_rate": 1.7610897004480658e-05, "loss": 0.2962, "step": 13590 }, { "epoch": 0.5968143846668935, "grad_norm": 1.9453125, "learning_rate": 1.76042880925656e-05, "loss": 0.3426, "step": 13592 }, { "epoch": 0.596902203146097, "grad_norm": 1.6875, "learning_rate": 1.7597679747015922e-05, "loss": 0.3492, "step": 13594 }, { "epoch": 0.5969900216253005, "grad_norm": 1.96875, "learning_rate": 1.75910719683377e-05, "loss": 0.3161, "step": 13596 }, { "epoch": 0.597077840104504, "grad_norm": 1.6171875, "learning_rate": 1.758446475703697e-05, "loss": 0.3393, "step": 13598 }, { "epoch": 0.5971656585837075, "grad_norm": 1.921875, "learning_rate": 1.75778581136197e-05, "loss": 0.3211, "step": 13600 }, { "epoch": 0.597253477062911, "grad_norm": 1.828125, "learning_rate": 1.7571252038591835e-05, "loss": 0.3378, "step": 13602 }, { "epoch": 0.5973412955421145, "grad_norm": 1.6328125, "learning_rate": 1.7564646532459273e-05, "loss": 0.34, "step": 13604 }, { "epoch": 0.5974291140213179, "grad_norm": 1.734375, "learning_rate": 1.755804159572786e-05, "loss": 0.3273, "step": 13606 }, { "epoch": 0.5975169325005214, "grad_norm": 1.921875, "learning_rate": 1.7551437228903407e-05, "loss": 0.3273, "step": 13608 }, { "epoch": 0.5976047509797249, "grad_norm": 1.796875, "learning_rate": 1.7544833432491674e-05, "loss": 0.3469, "step": 13610 }, { "epoch": 0.5976925694589283, "grad_norm": 2.015625, "learning_rate": 1.7538230206998386e-05, "loss": 0.3423, "step": 13612 }, { "epoch": 0.5977803879381319, "grad_norm": 1.609375, "learning_rate": 1.7531627552929214e-05, "loss": 0.3512, "step": 13614 }, { "epoch": 0.5978682064173354, "grad_norm": 1.8984375, "learning_rate": 1.7525025470789797e-05, "loss": 0.3472, "step": 13616 }, { "epoch": 0.5979560248965389, "grad_norm": 1.6171875, "learning_rate": 1.7518423961085725e-05, "loss": 0.3479, "step": 13618 }, { "epoch": 0.5980438433757423, "grad_norm": 1.890625, "learning_rate": 1.7511823024322534e-05, "loss": 0.3252, "step": 13620 }, { "epoch": 0.5981316618549458, "grad_norm": 1.9453125, "learning_rate": 1.7505222661005745e-05, "loss": 0.3326, "step": 13622 }, { "epoch": 0.5982194803341493, "grad_norm": 1.96875, "learning_rate": 1.7498622871640785e-05, "loss": 0.3116, "step": 13624 }, { "epoch": 0.5983072988133528, "grad_norm": 1.6875, "learning_rate": 1.7492023656733085e-05, "loss": 0.3341, "step": 13626 }, { "epoch": 0.5983951172925562, "grad_norm": 1.6484375, "learning_rate": 1.7485425016788016e-05, "loss": 0.3458, "step": 13628 }, { "epoch": 0.5984829357717598, "grad_norm": 1.609375, "learning_rate": 1.74788269523109e-05, "loss": 0.3681, "step": 13630 }, { "epoch": 0.5985707542509633, "grad_norm": 1.71875, "learning_rate": 1.747222946380702e-05, "loss": 0.3796, "step": 13632 }, { "epoch": 0.5986585727301668, "grad_norm": 1.609375, "learning_rate": 1.7465632551781614e-05, "loss": 0.3366, "step": 13634 }, { "epoch": 0.5987463912093702, "grad_norm": 1.765625, "learning_rate": 1.745903621673987e-05, "loss": 0.3106, "step": 13636 }, { "epoch": 0.5988342096885737, "grad_norm": 1.5859375, "learning_rate": 1.745244045918694e-05, "loss": 0.3482, "step": 13638 }, { "epoch": 0.5989220281677772, "grad_norm": 1.6953125, "learning_rate": 1.744584527962793e-05, "loss": 0.3297, "step": 13640 }, { "epoch": 0.5990098466469806, "grad_norm": 1.6328125, "learning_rate": 1.7439250678567897e-05, "loss": 0.3305, "step": 13642 }, { "epoch": 0.5990976651261842, "grad_norm": 1.6484375, "learning_rate": 1.7432656656511866e-05, "loss": 0.3571, "step": 13644 }, { "epoch": 0.5991854836053877, "grad_norm": 1.6171875, "learning_rate": 1.7426063213964796e-05, "loss": 0.3437, "step": 13646 }, { "epoch": 0.5992733020845912, "grad_norm": 1.609375, "learning_rate": 1.741947035143162e-05, "loss": 0.3357, "step": 13648 }, { "epoch": 0.5993611205637946, "grad_norm": 1.765625, "learning_rate": 1.7412878069417227e-05, "loss": 0.3361, "step": 13650 }, { "epoch": 0.5994489390429981, "grad_norm": 1.6796875, "learning_rate": 1.7406286368426445e-05, "loss": 0.3687, "step": 13652 }, { "epoch": 0.5995367575222016, "grad_norm": 1.625, "learning_rate": 1.7399695248964086e-05, "loss": 0.3249, "step": 13654 }, { "epoch": 0.5996245760014051, "grad_norm": 1.59375, "learning_rate": 1.7393104711534874e-05, "loss": 0.3255, "step": 13656 }, { "epoch": 0.5997123944806085, "grad_norm": 1.796875, "learning_rate": 1.7386514756643536e-05, "loss": 0.3569, "step": 13658 }, { "epoch": 0.5998002129598121, "grad_norm": 1.5, "learning_rate": 1.7379925384794716e-05, "loss": 0.3281, "step": 13660 }, { "epoch": 0.5998880314390156, "grad_norm": 1.609375, "learning_rate": 1.7373336596493033e-05, "loss": 0.3503, "step": 13662 }, { "epoch": 0.5999758499182191, "grad_norm": 1.75, "learning_rate": 1.7366748392243064e-05, "loss": 0.3302, "step": 13664 }, { "epoch": 0.6000636683974225, "grad_norm": 1.6875, "learning_rate": 1.7360160772549333e-05, "loss": 0.3629, "step": 13666 }, { "epoch": 0.600151486876626, "grad_norm": 1.6328125, "learning_rate": 1.7353573737916322e-05, "loss": 0.369, "step": 13668 }, { "epoch": 0.6002393053558295, "grad_norm": 1.65625, "learning_rate": 1.7346987288848473e-05, "loss": 0.3343, "step": 13670 }, { "epoch": 0.600327123835033, "grad_norm": 1.734375, "learning_rate": 1.7340401425850168e-05, "loss": 0.3406, "step": 13672 }, { "epoch": 0.6004149423142364, "grad_norm": 1.625, "learning_rate": 1.7333816149425753e-05, "loss": 0.3344, "step": 13674 }, { "epoch": 0.60050276079344, "grad_norm": 1.6015625, "learning_rate": 1.732723146007954e-05, "loss": 0.3399, "step": 13676 }, { "epoch": 0.6005905792726435, "grad_norm": 1.5625, "learning_rate": 1.7320647358315777e-05, "loss": 0.3329, "step": 13678 }, { "epoch": 0.600678397751847, "grad_norm": 1.6796875, "learning_rate": 1.731406384463869e-05, "loss": 0.345, "step": 13680 }, { "epoch": 0.6007662162310504, "grad_norm": 1.671875, "learning_rate": 1.7307480919552427e-05, "loss": 0.3567, "step": 13682 }, { "epoch": 0.6008540347102539, "grad_norm": 1.671875, "learning_rate": 1.730089858356113e-05, "loss": 0.3426, "step": 13684 }, { "epoch": 0.6009418531894574, "grad_norm": 1.828125, "learning_rate": 1.7294316837168857e-05, "loss": 0.2965, "step": 13686 }, { "epoch": 0.6010296716686608, "grad_norm": 1.6171875, "learning_rate": 1.7287735680879645e-05, "loss": 0.3596, "step": 13688 }, { "epoch": 0.6011174901478644, "grad_norm": 1.6953125, "learning_rate": 1.7281155115197484e-05, "loss": 0.3422, "step": 13690 }, { "epoch": 0.6012053086270679, "grad_norm": 1.828125, "learning_rate": 1.7274575140626318e-05, "loss": 0.3203, "step": 13692 }, { "epoch": 0.6012931271062714, "grad_norm": 1.6953125, "learning_rate": 1.726799575767004e-05, "loss": 0.3258, "step": 13694 }, { "epoch": 0.6013809455854748, "grad_norm": 1.6484375, "learning_rate": 1.72614169668325e-05, "loss": 0.3454, "step": 13696 }, { "epoch": 0.6014687640646783, "grad_norm": 1.6640625, "learning_rate": 1.7254838768617497e-05, "loss": 0.3279, "step": 13698 }, { "epoch": 0.6015565825438818, "grad_norm": 1.71875, "learning_rate": 1.7248261163528806e-05, "loss": 0.3692, "step": 13700 }, { "epoch": 0.6016444010230853, "grad_norm": 1.8671875, "learning_rate": 1.724168415207013e-05, "loss": 0.366, "step": 13702 }, { "epoch": 0.6017322195022887, "grad_norm": 1.65625, "learning_rate": 1.7235107734745136e-05, "loss": 0.3487, "step": 13704 }, { "epoch": 0.6018200379814923, "grad_norm": 1.6484375, "learning_rate": 1.7228531912057465e-05, "loss": 0.3285, "step": 13706 }, { "epoch": 0.6019078564606958, "grad_norm": 1.765625, "learning_rate": 1.7221956684510677e-05, "loss": 0.3312, "step": 13708 }, { "epoch": 0.6019956749398993, "grad_norm": 1.6484375, "learning_rate": 1.721538205260831e-05, "loss": 0.3263, "step": 13710 }, { "epoch": 0.6020834934191027, "grad_norm": 1.7421875, "learning_rate": 1.720880801685385e-05, "loss": 0.3295, "step": 13712 }, { "epoch": 0.6021713118983062, "grad_norm": 1.765625, "learning_rate": 1.720223457775075e-05, "loss": 0.3395, "step": 13714 }, { "epoch": 0.6022591303775097, "grad_norm": 1.8671875, "learning_rate": 1.719566173580239e-05, "loss": 0.3319, "step": 13716 }, { "epoch": 0.6023469488567131, "grad_norm": 1.7421875, "learning_rate": 1.7189089491512116e-05, "loss": 0.3508, "step": 13718 }, { "epoch": 0.6024347673359166, "grad_norm": 1.796875, "learning_rate": 1.7182517845383252e-05, "loss": 0.3301, "step": 13720 }, { "epoch": 0.6025225858151202, "grad_norm": 1.7578125, "learning_rate": 1.717594679791904e-05, "loss": 0.3258, "step": 13722 }, { "epoch": 0.6026104042943237, "grad_norm": 1.8125, "learning_rate": 1.7169376349622698e-05, "loss": 0.3483, "step": 13724 }, { "epoch": 0.6026982227735271, "grad_norm": 1.8125, "learning_rate": 1.716280650099739e-05, "loss": 0.3264, "step": 13726 }, { "epoch": 0.6027860412527306, "grad_norm": 1.84375, "learning_rate": 1.7156237252546242e-05, "loss": 0.3246, "step": 13728 }, { "epoch": 0.6028738597319341, "grad_norm": 1.8203125, "learning_rate": 1.7149668604772324e-05, "loss": 0.3438, "step": 13730 }, { "epoch": 0.6029616782111376, "grad_norm": 1.671875, "learning_rate": 1.714310055817867e-05, "loss": 0.3036, "step": 13732 }, { "epoch": 0.603049496690341, "grad_norm": 1.9296875, "learning_rate": 1.7136533113268256e-05, "loss": 0.35, "step": 13734 }, { "epoch": 0.6031373151695445, "grad_norm": 1.640625, "learning_rate": 1.712996627054402e-05, "loss": 0.3397, "step": 13736 }, { "epoch": 0.6032251336487481, "grad_norm": 1.6328125, "learning_rate": 1.7123400030508852e-05, "loss": 0.3716, "step": 13738 }, { "epoch": 0.6033129521279516, "grad_norm": 1.6796875, "learning_rate": 1.71168343936656e-05, "loss": 0.3035, "step": 13740 }, { "epoch": 0.603400770607155, "grad_norm": 1.6171875, "learning_rate": 1.7110269360517066e-05, "loss": 0.3052, "step": 13742 }, { "epoch": 0.6034885890863585, "grad_norm": 1.7890625, "learning_rate": 1.710370493156599e-05, "loss": 0.3412, "step": 13744 }, { "epoch": 0.603576407565562, "grad_norm": 1.6953125, "learning_rate": 1.709714110731509e-05, "loss": 0.3488, "step": 13746 }, { "epoch": 0.6036642260447654, "grad_norm": 1.640625, "learning_rate": 1.709057788826701e-05, "loss": 0.324, "step": 13748 }, { "epoch": 0.6037520445239689, "grad_norm": 1.6484375, "learning_rate": 1.7084015274924373e-05, "loss": 0.3332, "step": 13750 }, { "epoch": 0.6038398630031725, "grad_norm": 1.7734375, "learning_rate": 1.7077453267789746e-05, "loss": 0.3286, "step": 13752 }, { "epoch": 0.603927681482376, "grad_norm": 1.6796875, "learning_rate": 1.707089186736564e-05, "loss": 0.3647, "step": 13754 }, { "epoch": 0.6040154999615794, "grad_norm": 1.7421875, "learning_rate": 1.7064331074154543e-05, "loss": 0.33, "step": 13756 }, { "epoch": 0.6041033184407829, "grad_norm": 1.765625, "learning_rate": 1.7057770888658873e-05, "loss": 0.3588, "step": 13758 }, { "epoch": 0.6041911369199864, "grad_norm": 1.484375, "learning_rate": 1.705121131138101e-05, "loss": 0.3133, "step": 13760 }, { "epoch": 0.6042789553991899, "grad_norm": 1.6875, "learning_rate": 1.7044652342823292e-05, "loss": 0.3258, "step": 13762 }, { "epoch": 0.6043667738783933, "grad_norm": 1.8515625, "learning_rate": 1.7038093983488003e-05, "loss": 0.3074, "step": 13764 }, { "epoch": 0.6044545923575968, "grad_norm": 1.59375, "learning_rate": 1.703153623387738e-05, "loss": 0.3299, "step": 13766 }, { "epoch": 0.6045424108368004, "grad_norm": 1.671875, "learning_rate": 1.7024979094493637e-05, "loss": 0.3334, "step": 13768 }, { "epoch": 0.6046302293160039, "grad_norm": 1.5390625, "learning_rate": 1.7018422565838896e-05, "loss": 0.3128, "step": 13770 }, { "epoch": 0.6047180477952073, "grad_norm": 1.59375, "learning_rate": 1.701186664841527e-05, "loss": 0.3236, "step": 13772 }, { "epoch": 0.6048058662744108, "grad_norm": 1.5625, "learning_rate": 1.7005311342724812e-05, "loss": 0.347, "step": 13774 }, { "epoch": 0.6048936847536143, "grad_norm": 1.8046875, "learning_rate": 1.6998756649269535e-05, "loss": 0.359, "step": 13776 }, { "epoch": 0.6049815032328177, "grad_norm": 1.640625, "learning_rate": 1.6992202568551383e-05, "loss": 0.322, "step": 13778 }, { "epoch": 0.6050693217120212, "grad_norm": 1.7421875, "learning_rate": 1.6985649101072277e-05, "loss": 0.3372, "step": 13780 }, { "epoch": 0.6051571401912247, "grad_norm": 1.625, "learning_rate": 1.6979096247334092e-05, "loss": 0.3177, "step": 13782 }, { "epoch": 0.6052449586704283, "grad_norm": 1.6328125, "learning_rate": 1.697254400783863e-05, "loss": 0.3389, "step": 13784 }, { "epoch": 0.6053327771496317, "grad_norm": 1.5859375, "learning_rate": 1.6965992383087677e-05, "loss": 0.3674, "step": 13786 }, { "epoch": 0.6054205956288352, "grad_norm": 1.84375, "learning_rate": 1.6959441373582947e-05, "loss": 0.3298, "step": 13788 }, { "epoch": 0.6055084141080387, "grad_norm": 1.515625, "learning_rate": 1.6952890979826132e-05, "loss": 0.3318, "step": 13790 }, { "epoch": 0.6055962325872422, "grad_norm": 1.90625, "learning_rate": 1.6946341202318854e-05, "loss": 0.3524, "step": 13792 }, { "epoch": 0.6056840510664456, "grad_norm": 1.6328125, "learning_rate": 1.6939792041562695e-05, "loss": 0.3478, "step": 13794 }, { "epoch": 0.6057718695456491, "grad_norm": 1.75, "learning_rate": 1.693324349805919e-05, "loss": 0.3476, "step": 13796 }, { "epoch": 0.6058596880248527, "grad_norm": 1.6875, "learning_rate": 1.6926695572309837e-05, "loss": 0.3191, "step": 13798 }, { "epoch": 0.6059475065040562, "grad_norm": 1.6484375, "learning_rate": 1.6920148264816064e-05, "loss": 0.3439, "step": 13800 }, { "epoch": 0.6060353249832596, "grad_norm": 1.8125, "learning_rate": 1.691360157607928e-05, "loss": 0.3311, "step": 13802 }, { "epoch": 0.6061231434624631, "grad_norm": 1.5703125, "learning_rate": 1.6907055506600834e-05, "loss": 0.347, "step": 13804 }, { "epoch": 0.6062109619416666, "grad_norm": 1.6171875, "learning_rate": 1.6900510056882012e-05, "loss": 0.3227, "step": 13806 }, { "epoch": 0.60629878042087, "grad_norm": 1.6171875, "learning_rate": 1.6893965227424073e-05, "loss": 0.3598, "step": 13808 }, { "epoch": 0.6063865989000735, "grad_norm": 1.7421875, "learning_rate": 1.6887421018728215e-05, "loss": 0.3397, "step": 13810 }, { "epoch": 0.606474417379277, "grad_norm": 1.6171875, "learning_rate": 1.68808774312956e-05, "loss": 0.3515, "step": 13812 }, { "epoch": 0.6065622358584806, "grad_norm": 1.703125, "learning_rate": 1.6874334465627335e-05, "loss": 0.3346, "step": 13814 }, { "epoch": 0.606650054337684, "grad_norm": 1.5859375, "learning_rate": 1.686779212222449e-05, "loss": 0.3284, "step": 13816 }, { "epoch": 0.6067378728168875, "grad_norm": 1.6328125, "learning_rate": 1.6861250401588075e-05, "loss": 0.3169, "step": 13818 }, { "epoch": 0.606825691296091, "grad_norm": 1.7421875, "learning_rate": 1.685470930421905e-05, "loss": 0.3321, "step": 13820 }, { "epoch": 0.6069135097752945, "grad_norm": 1.578125, "learning_rate": 1.6848168830618338e-05, "loss": 0.3241, "step": 13822 }, { "epoch": 0.6070013282544979, "grad_norm": 1.6015625, "learning_rate": 1.6841628981286814e-05, "loss": 0.3567, "step": 13824 }, { "epoch": 0.6070891467337014, "grad_norm": 1.6484375, "learning_rate": 1.6835089756725297e-05, "loss": 0.3199, "step": 13826 }, { "epoch": 0.6071769652129049, "grad_norm": 1.8515625, "learning_rate": 1.6828551157434568e-05, "loss": 0.3317, "step": 13828 }, { "epoch": 0.6072647836921085, "grad_norm": 1.609375, "learning_rate": 1.6822013183915347e-05, "loss": 0.3371, "step": 13830 }, { "epoch": 0.6073526021713119, "grad_norm": 1.6953125, "learning_rate": 1.6815475836668317e-05, "loss": 0.3358, "step": 13832 }, { "epoch": 0.6074404206505154, "grad_norm": 1.7734375, "learning_rate": 1.6808939116194107e-05, "loss": 0.3335, "step": 13834 }, { "epoch": 0.6075282391297189, "grad_norm": 1.6796875, "learning_rate": 1.6802403022993304e-05, "loss": 0.3321, "step": 13836 }, { "epoch": 0.6076160576089223, "grad_norm": 1.65625, "learning_rate": 1.6795867557566454e-05, "loss": 0.3463, "step": 13838 }, { "epoch": 0.6077038760881258, "grad_norm": 1.7265625, "learning_rate": 1.6789332720414023e-05, "loss": 0.3516, "step": 13840 }, { "epoch": 0.6077916945673293, "grad_norm": 1.5390625, "learning_rate": 1.6782798512036457e-05, "loss": 0.3261, "step": 13842 }, { "epoch": 0.6078795130465329, "grad_norm": 1.640625, "learning_rate": 1.677626493293416e-05, "loss": 0.3435, "step": 13844 }, { "epoch": 0.6079673315257363, "grad_norm": 1.6328125, "learning_rate": 1.676973198360746e-05, "loss": 0.3584, "step": 13846 }, { "epoch": 0.6080551500049398, "grad_norm": 1.6015625, "learning_rate": 1.6763199664556656e-05, "loss": 0.3631, "step": 13848 }, { "epoch": 0.6081429684841433, "grad_norm": 1.6640625, "learning_rate": 1.6756667976281997e-05, "loss": 0.3259, "step": 13850 }, { "epoch": 0.6082307869633468, "grad_norm": 1.953125, "learning_rate": 1.6750136919283678e-05, "loss": 0.3486, "step": 13852 }, { "epoch": 0.6083186054425502, "grad_norm": 1.625, "learning_rate": 1.674360649406186e-05, "loss": 0.3215, "step": 13854 }, { "epoch": 0.6084064239217537, "grad_norm": 1.9609375, "learning_rate": 1.673707670111663e-05, "loss": 0.3284, "step": 13856 }, { "epoch": 0.6084942424009572, "grad_norm": 1.7265625, "learning_rate": 1.6730547540948048e-05, "loss": 0.336, "step": 13858 }, { "epoch": 0.6085820608801608, "grad_norm": 1.671875, "learning_rate": 1.6724019014056115e-05, "loss": 0.3323, "step": 13860 }, { "epoch": 0.6086698793593642, "grad_norm": 1.671875, "learning_rate": 1.6717491120940793e-05, "loss": 0.358, "step": 13862 }, { "epoch": 0.6087576978385677, "grad_norm": 1.7421875, "learning_rate": 1.671096386210198e-05, "loss": 0.331, "step": 13864 }, { "epoch": 0.6088455163177712, "grad_norm": 1.5859375, "learning_rate": 1.670443723803955e-05, "loss": 0.3196, "step": 13866 }, { "epoch": 0.6089333347969746, "grad_norm": 1.6484375, "learning_rate": 1.669791124925331e-05, "loss": 0.3557, "step": 13868 }, { "epoch": 0.6090211532761781, "grad_norm": 1.84375, "learning_rate": 1.669138589624301e-05, "loss": 0.3677, "step": 13870 }, { "epoch": 0.6091089717553816, "grad_norm": 1.578125, "learning_rate": 1.668486117950837e-05, "loss": 0.2952, "step": 13872 }, { "epoch": 0.6091967902345851, "grad_norm": 1.78125, "learning_rate": 1.6678337099549052e-05, "loss": 0.3623, "step": 13874 }, { "epoch": 0.6092846087137886, "grad_norm": 1.546875, "learning_rate": 1.667181365686467e-05, "loss": 0.3241, "step": 13876 }, { "epoch": 0.6093724271929921, "grad_norm": 1.609375, "learning_rate": 1.66652908519548e-05, "loss": 0.3287, "step": 13878 }, { "epoch": 0.6094602456721956, "grad_norm": 1.765625, "learning_rate": 1.6658768685318955e-05, "loss": 0.337, "step": 13880 }, { "epoch": 0.6095480641513991, "grad_norm": 1.6171875, "learning_rate": 1.6652247157456603e-05, "loss": 0.3112, "step": 13882 }, { "epoch": 0.6096358826306025, "grad_norm": 1.59375, "learning_rate": 1.6645726268867163e-05, "loss": 0.3299, "step": 13884 }, { "epoch": 0.609723701109806, "grad_norm": 1.6796875, "learning_rate": 1.6639206020050006e-05, "loss": 0.322, "step": 13886 }, { "epoch": 0.6098115195890095, "grad_norm": 1.6015625, "learning_rate": 1.6632686411504455e-05, "loss": 0.3119, "step": 13888 }, { "epoch": 0.609899338068213, "grad_norm": 1.8984375, "learning_rate": 1.6626167443729797e-05, "loss": 0.3334, "step": 13890 }, { "epoch": 0.6099871565474165, "grad_norm": 1.5390625, "learning_rate": 1.6619649117225233e-05, "loss": 0.354, "step": 13892 }, { "epoch": 0.61007497502662, "grad_norm": 1.6796875, "learning_rate": 1.6613131432489947e-05, "loss": 0.3137, "step": 13894 }, { "epoch": 0.6101627935058235, "grad_norm": 1.6171875, "learning_rate": 1.6606614390023066e-05, "loss": 0.3491, "step": 13896 }, { "epoch": 0.610250611985027, "grad_norm": 1.703125, "learning_rate": 1.660009799032368e-05, "loss": 0.3024, "step": 13898 }, { "epoch": 0.6103384304642304, "grad_norm": 1.609375, "learning_rate": 1.65935822338908e-05, "loss": 0.3068, "step": 13900 }, { "epoch": 0.6104262489434339, "grad_norm": 1.6328125, "learning_rate": 1.6587067121223397e-05, "loss": 0.3341, "step": 13902 }, { "epoch": 0.6105140674226374, "grad_norm": 1.8203125, "learning_rate": 1.6580552652820412e-05, "loss": 0.3257, "step": 13904 }, { "epoch": 0.610601885901841, "grad_norm": 1.6328125, "learning_rate": 1.6574038829180733e-05, "loss": 0.3143, "step": 13906 }, { "epoch": 0.6106897043810444, "grad_norm": 1.640625, "learning_rate": 1.6567525650803174e-05, "loss": 0.3188, "step": 13908 }, { "epoch": 0.6107775228602479, "grad_norm": 1.703125, "learning_rate": 1.656101311818652e-05, "loss": 0.3143, "step": 13910 }, { "epoch": 0.6108653413394514, "grad_norm": 1.59375, "learning_rate": 1.6554501231829504e-05, "loss": 0.3411, "step": 13912 }, { "epoch": 0.6109531598186548, "grad_norm": 1.6328125, "learning_rate": 1.654798999223081e-05, "loss": 0.3352, "step": 13914 }, { "epoch": 0.6110409782978583, "grad_norm": 1.6171875, "learning_rate": 1.654147939988907e-05, "loss": 0.3489, "step": 13916 }, { "epoch": 0.6111287967770618, "grad_norm": 1.71875, "learning_rate": 1.6534969455302864e-05, "loss": 0.351, "step": 13918 }, { "epoch": 0.6112166152562652, "grad_norm": 1.609375, "learning_rate": 1.6528460158970727e-05, "loss": 0.3228, "step": 13920 }, { "epoch": 0.6113044337354688, "grad_norm": 1.6171875, "learning_rate": 1.652195151139114e-05, "loss": 0.3586, "step": 13922 }, { "epoch": 0.6113922522146723, "grad_norm": 1.8125, "learning_rate": 1.651544351306254e-05, "loss": 0.326, "step": 13924 }, { "epoch": 0.6114800706938758, "grad_norm": 1.6328125, "learning_rate": 1.6508936164483314e-05, "loss": 0.3237, "step": 13926 }, { "epoch": 0.6115678891730792, "grad_norm": 1.6015625, "learning_rate": 1.6502429466151788e-05, "loss": 0.3357, "step": 13928 }, { "epoch": 0.6116557076522827, "grad_norm": 1.65625, "learning_rate": 1.649592341856625e-05, "loss": 0.33, "step": 13930 }, { "epoch": 0.6117435261314862, "grad_norm": 1.6328125, "learning_rate": 1.648941802222494e-05, "loss": 0.3332, "step": 13932 }, { "epoch": 0.6118313446106897, "grad_norm": 1.671875, "learning_rate": 1.6482913277626033e-05, "loss": 0.3155, "step": 13934 }, { "epoch": 0.6119191630898931, "grad_norm": 1.7890625, "learning_rate": 1.6476409185267666e-05, "loss": 0.3584, "step": 13936 }, { "epoch": 0.6120069815690967, "grad_norm": 1.6015625, "learning_rate": 1.6469905745647928e-05, "loss": 0.343, "step": 13938 }, { "epoch": 0.6120948000483002, "grad_norm": 1.640625, "learning_rate": 1.6463402959264858e-05, "loss": 0.3573, "step": 13940 }, { "epoch": 0.6121826185275037, "grad_norm": 1.53125, "learning_rate": 1.6456900826616433e-05, "loss": 0.3155, "step": 13942 }, { "epoch": 0.6122704370067071, "grad_norm": 1.7109375, "learning_rate": 1.645039934820059e-05, "loss": 0.3426, "step": 13944 }, { "epoch": 0.6123582554859106, "grad_norm": 1.6171875, "learning_rate": 1.644389852451521e-05, "loss": 0.3317, "step": 13946 }, { "epoch": 0.6124460739651141, "grad_norm": 1.5, "learning_rate": 1.6437398356058137e-05, "loss": 0.3334, "step": 13948 }, { "epoch": 0.6125338924443176, "grad_norm": 1.609375, "learning_rate": 1.643089884332715e-05, "loss": 0.3079, "step": 13950 }, { "epoch": 0.6126217109235211, "grad_norm": 1.7421875, "learning_rate": 1.642439998681999e-05, "loss": 0.3297, "step": 13952 }, { "epoch": 0.6127095294027246, "grad_norm": 1.7265625, "learning_rate": 1.6417901787034324e-05, "loss": 0.309, "step": 13954 }, { "epoch": 0.6127973478819281, "grad_norm": 1.6328125, "learning_rate": 1.64114042444678e-05, "loss": 0.3131, "step": 13956 }, { "epoch": 0.6128851663611316, "grad_norm": 1.8203125, "learning_rate": 1.6404907359618e-05, "loss": 0.3473, "step": 13958 }, { "epoch": 0.612972984840335, "grad_norm": 1.84375, "learning_rate": 1.639841113298246e-05, "loss": 0.3242, "step": 13960 }, { "epoch": 0.6130608033195385, "grad_norm": 1.640625, "learning_rate": 1.6391915565058653e-05, "loss": 0.3567, "step": 13962 }, { "epoch": 0.613148621798742, "grad_norm": 1.625, "learning_rate": 1.6385420656344007e-05, "loss": 0.3253, "step": 13964 }, { "epoch": 0.6132364402779454, "grad_norm": 1.7890625, "learning_rate": 1.637892640733592e-05, "loss": 0.3385, "step": 13966 }, { "epoch": 0.613324258757149, "grad_norm": 1.703125, "learning_rate": 1.637243281853172e-05, "loss": 0.3117, "step": 13968 }, { "epoch": 0.6134120772363525, "grad_norm": 1.625, "learning_rate": 1.6365939890428673e-05, "loss": 0.3177, "step": 13970 }, { "epoch": 0.613499895715556, "grad_norm": 1.6171875, "learning_rate": 1.6359447623524022e-05, "loss": 0.3271, "step": 13972 }, { "epoch": 0.6135877141947594, "grad_norm": 1.640625, "learning_rate": 1.635295601831494e-05, "loss": 0.3415, "step": 13974 }, { "epoch": 0.6136755326739629, "grad_norm": 1.6328125, "learning_rate": 1.6346465075298564e-05, "loss": 0.3051, "step": 13976 }, { "epoch": 0.6137633511531664, "grad_norm": 1.59375, "learning_rate": 1.633997479497197e-05, "loss": 0.3285, "step": 13978 }, { "epoch": 0.6138511696323699, "grad_norm": 1.578125, "learning_rate": 1.6333485177832176e-05, "loss": 0.3322, "step": 13980 }, { "epoch": 0.6139389881115733, "grad_norm": 1.6484375, "learning_rate": 1.632699622437617e-05, "loss": 0.3341, "step": 13982 }, { "epoch": 0.6140268065907769, "grad_norm": 1.6328125, "learning_rate": 1.6320507935100863e-05, "loss": 0.3281, "step": 13984 }, { "epoch": 0.6141146250699804, "grad_norm": 1.5234375, "learning_rate": 1.6314020310503144e-05, "loss": 0.3057, "step": 13986 }, { "epoch": 0.6142024435491839, "grad_norm": 1.8359375, "learning_rate": 1.630753335107984e-05, "loss": 0.3259, "step": 13988 }, { "epoch": 0.6142902620283873, "grad_norm": 1.6796875, "learning_rate": 1.630104705732771e-05, "loss": 0.3332, "step": 13990 }, { "epoch": 0.6143780805075908, "grad_norm": 1.6875, "learning_rate": 1.6294561429743475e-05, "loss": 0.3268, "step": 13992 }, { "epoch": 0.6144658989867943, "grad_norm": 1.703125, "learning_rate": 1.6288076468823827e-05, "loss": 0.311, "step": 13994 }, { "epoch": 0.6145537174659977, "grad_norm": 1.875, "learning_rate": 1.6281592175065357e-05, "loss": 0.3465, "step": 13996 }, { "epoch": 0.6146415359452013, "grad_norm": 1.7109375, "learning_rate": 1.6275108548964653e-05, "loss": 0.3331, "step": 13998 }, { "epoch": 0.6147293544244048, "grad_norm": 1.640625, "learning_rate": 1.626862559101823e-05, "loss": 0.3438, "step": 14000 }, { "epoch": 0.6148171729036083, "grad_norm": 1.5703125, "learning_rate": 1.6262143301722547e-05, "loss": 0.3152, "step": 14002 }, { "epoch": 0.6149049913828117, "grad_norm": 1.8125, "learning_rate": 1.625566168157403e-05, "loss": 0.3089, "step": 14004 }, { "epoch": 0.6149928098620152, "grad_norm": 1.6171875, "learning_rate": 1.6249180731069036e-05, "loss": 0.3415, "step": 14006 }, { "epoch": 0.6150806283412187, "grad_norm": 1.65625, "learning_rate": 1.6242700450703876e-05, "loss": 0.3317, "step": 14008 }, { "epoch": 0.6151684468204222, "grad_norm": 1.84375, "learning_rate": 1.6236220840974815e-05, "loss": 0.3361, "step": 14010 }, { "epoch": 0.6152562652996256, "grad_norm": 1.6796875, "learning_rate": 1.6229741902378063e-05, "loss": 0.3206, "step": 14012 }, { "epoch": 0.6153440837788292, "grad_norm": 1.9765625, "learning_rate": 1.6223263635409785e-05, "loss": 0.3096, "step": 14014 }, { "epoch": 0.6154319022580327, "grad_norm": 1.84375, "learning_rate": 1.621678604056608e-05, "loss": 0.3521, "step": 14016 }, { "epoch": 0.6155197207372362, "grad_norm": 1.6328125, "learning_rate": 1.6210309118343e-05, "loss": 0.3101, "step": 14018 }, { "epoch": 0.6156075392164396, "grad_norm": 1.6953125, "learning_rate": 1.6203832869236557e-05, "loss": 0.3519, "step": 14020 }, { "epoch": 0.6156953576956431, "grad_norm": 1.546875, "learning_rate": 1.619735729374271e-05, "loss": 0.3418, "step": 14022 }, { "epoch": 0.6157831761748466, "grad_norm": 1.6328125, "learning_rate": 1.6190882392357342e-05, "loss": 0.3315, "step": 14024 }, { "epoch": 0.61587099465405, "grad_norm": 1.546875, "learning_rate": 1.6184408165576316e-05, "loss": 0.3609, "step": 14026 }, { "epoch": 0.6159588131332535, "grad_norm": 1.78125, "learning_rate": 1.6177934613895422e-05, "loss": 0.3276, "step": 14028 }, { "epoch": 0.6160466316124571, "grad_norm": 1.7578125, "learning_rate": 1.6171461737810413e-05, "loss": 0.3124, "step": 14030 }, { "epoch": 0.6161344500916606, "grad_norm": 1.671875, "learning_rate": 1.616498953781698e-05, "loss": 0.3149, "step": 14032 }, { "epoch": 0.616222268570864, "grad_norm": 1.609375, "learning_rate": 1.6158518014410762e-05, "loss": 0.3245, "step": 14034 }, { "epoch": 0.6163100870500675, "grad_norm": 1.5390625, "learning_rate": 1.615204716808736e-05, "loss": 0.341, "step": 14036 }, { "epoch": 0.616397905529271, "grad_norm": 1.5546875, "learning_rate": 1.61455769993423e-05, "loss": 0.3343, "step": 14038 }, { "epoch": 0.6164857240084745, "grad_norm": 1.59375, "learning_rate": 1.6139107508671086e-05, "loss": 0.3379, "step": 14040 }, { "epoch": 0.6165735424876779, "grad_norm": 1.640625, "learning_rate": 1.6132638696569134e-05, "loss": 0.3453, "step": 14042 }, { "epoch": 0.6166613609668815, "grad_norm": 1.6796875, "learning_rate": 1.612617056353184e-05, "loss": 0.3367, "step": 14044 }, { "epoch": 0.616749179446085, "grad_norm": 1.59375, "learning_rate": 1.611970311005453e-05, "loss": 0.3481, "step": 14046 }, { "epoch": 0.6168369979252885, "grad_norm": 1.6484375, "learning_rate": 1.611323633663248e-05, "loss": 0.3198, "step": 14048 }, { "epoch": 0.6169248164044919, "grad_norm": 2.015625, "learning_rate": 1.610677024376093e-05, "loss": 0.3636, "step": 14050 }, { "epoch": 0.6170126348836954, "grad_norm": 1.609375, "learning_rate": 1.6100304831935052e-05, "loss": 0.316, "step": 14052 }, { "epoch": 0.6171004533628989, "grad_norm": 1.6796875, "learning_rate": 1.609384010164996e-05, "loss": 0.3195, "step": 14054 }, { "epoch": 0.6171882718421023, "grad_norm": 1.6484375, "learning_rate": 1.608737605340072e-05, "loss": 0.3629, "step": 14056 }, { "epoch": 0.6172760903213058, "grad_norm": 1.546875, "learning_rate": 1.608091268768236e-05, "loss": 0.2921, "step": 14058 }, { "epoch": 0.6173639088005094, "grad_norm": 1.609375, "learning_rate": 1.6074450004989844e-05, "loss": 0.3225, "step": 14060 }, { "epoch": 0.6174517272797129, "grad_norm": 1.5546875, "learning_rate": 1.606798800581809e-05, "loss": 0.3192, "step": 14062 }, { "epoch": 0.6175395457589163, "grad_norm": 1.7265625, "learning_rate": 1.6061526690661947e-05, "loss": 0.3356, "step": 14064 }, { "epoch": 0.6176273642381198, "grad_norm": 1.6953125, "learning_rate": 1.6055066060016247e-05, "loss": 0.3436, "step": 14066 }, { "epoch": 0.6177151827173233, "grad_norm": 1.875, "learning_rate": 1.6048606114375723e-05, "loss": 0.3204, "step": 14068 }, { "epoch": 0.6178030011965268, "grad_norm": 1.5, "learning_rate": 1.604214685423509e-05, "loss": 0.3223, "step": 14070 }, { "epoch": 0.6178908196757302, "grad_norm": 1.53125, "learning_rate": 1.6035688280088995e-05, "loss": 0.3394, "step": 14072 }, { "epoch": 0.6179786381549337, "grad_norm": 1.515625, "learning_rate": 1.6029230392432043e-05, "loss": 0.3128, "step": 14074 }, { "epoch": 0.6180664566341373, "grad_norm": 1.6640625, "learning_rate": 1.6022773191758784e-05, "loss": 0.3205, "step": 14076 }, { "epoch": 0.6181542751133408, "grad_norm": 1.6328125, "learning_rate": 1.60163166785637e-05, "loss": 0.343, "step": 14078 }, { "epoch": 0.6182420935925442, "grad_norm": 1.640625, "learning_rate": 1.6009860853341237e-05, "loss": 0.3274, "step": 14080 }, { "epoch": 0.6183299120717477, "grad_norm": 1.640625, "learning_rate": 1.6003405716585798e-05, "loss": 0.3696, "step": 14082 }, { "epoch": 0.6184177305509512, "grad_norm": 1.5546875, "learning_rate": 1.5996951268791695e-05, "loss": 0.3001, "step": 14084 }, { "epoch": 0.6185055490301546, "grad_norm": 1.578125, "learning_rate": 1.599049751045322e-05, "loss": 0.3314, "step": 14086 }, { "epoch": 0.6185933675093581, "grad_norm": 1.59375, "learning_rate": 1.5984044442064606e-05, "loss": 0.3451, "step": 14088 }, { "epoch": 0.6186811859885616, "grad_norm": 1.6796875, "learning_rate": 1.5977592064120027e-05, "loss": 0.3189, "step": 14090 }, { "epoch": 0.6187690044677652, "grad_norm": 1.625, "learning_rate": 1.5971140377113623e-05, "loss": 0.3403, "step": 14092 }, { "epoch": 0.6188568229469686, "grad_norm": 1.703125, "learning_rate": 1.5964689381539445e-05, "loss": 0.3287, "step": 14094 }, { "epoch": 0.6189446414261721, "grad_norm": 1.734375, "learning_rate": 1.595823907789152e-05, "loss": 0.3246, "step": 14096 }, { "epoch": 0.6190324599053756, "grad_norm": 1.5625, "learning_rate": 1.595178946666381e-05, "loss": 0.3468, "step": 14098 }, { "epoch": 0.6191202783845791, "grad_norm": 1.75, "learning_rate": 1.5945340548350235e-05, "loss": 0.329, "step": 14100 }, { "epoch": 0.6192080968637825, "grad_norm": 1.6640625, "learning_rate": 1.5938892323444653e-05, "loss": 0.3288, "step": 14102 }, { "epoch": 0.619295915342986, "grad_norm": 1.75, "learning_rate": 1.593244479244087e-05, "loss": 0.3357, "step": 14104 }, { "epoch": 0.6193837338221896, "grad_norm": 1.6484375, "learning_rate": 1.5925997955832633e-05, "loss": 0.3161, "step": 14106 }, { "epoch": 0.6194715523013931, "grad_norm": 1.7578125, "learning_rate": 1.591955181411365e-05, "loss": 0.3194, "step": 14108 }, { "epoch": 0.6195593707805965, "grad_norm": 1.5234375, "learning_rate": 1.5913106367777567e-05, "loss": 0.3085, "step": 14110 }, { "epoch": 0.6196471892598, "grad_norm": 1.8828125, "learning_rate": 1.590666161731798e-05, "loss": 0.3346, "step": 14112 }, { "epoch": 0.6197350077390035, "grad_norm": 1.7578125, "learning_rate": 1.5900217563228426e-05, "loss": 0.3722, "step": 14114 }, { "epoch": 0.619822826218207, "grad_norm": 1.6953125, "learning_rate": 1.5893774206002393e-05, "loss": 0.3329, "step": 14116 }, { "epoch": 0.6199106446974104, "grad_norm": 1.8203125, "learning_rate": 1.588733154613331e-05, "loss": 0.367, "step": 14118 }, { "epoch": 0.6199984631766139, "grad_norm": 1.7265625, "learning_rate": 1.588088958411456e-05, "loss": 0.3305, "step": 14120 }, { "epoch": 0.6200862816558175, "grad_norm": 1.7421875, "learning_rate": 1.5874448320439475e-05, "loss": 0.3372, "step": 14122 }, { "epoch": 0.620174100135021, "grad_norm": 1.578125, "learning_rate": 1.586800775560132e-05, "loss": 0.3231, "step": 14124 }, { "epoch": 0.6202619186142244, "grad_norm": 1.6171875, "learning_rate": 1.5861567890093328e-05, "loss": 0.3355, "step": 14126 }, { "epoch": 0.6203497370934279, "grad_norm": 1.6953125, "learning_rate": 1.5855128724408655e-05, "loss": 0.3658, "step": 14128 }, { "epoch": 0.6204375555726314, "grad_norm": 1.5390625, "learning_rate": 1.5848690259040414e-05, "loss": 0.3452, "step": 14130 }, { "epoch": 0.6205253740518348, "grad_norm": 1.6796875, "learning_rate": 1.5842252494481664e-05, "loss": 0.3205, "step": 14132 }, { "epoch": 0.6206131925310383, "grad_norm": 1.6953125, "learning_rate": 1.5835815431225418e-05, "loss": 0.3294, "step": 14134 }, { "epoch": 0.6207010110102418, "grad_norm": 1.921875, "learning_rate": 1.5829379069764622e-05, "loss": 0.3441, "step": 14136 }, { "epoch": 0.6207888294894454, "grad_norm": 1.765625, "learning_rate": 1.582294341059218e-05, "loss": 0.3576, "step": 14138 }, { "epoch": 0.6208766479686488, "grad_norm": 1.796875, "learning_rate": 1.5816508454200922e-05, "loss": 0.3253, "step": 14140 }, { "epoch": 0.6209644664478523, "grad_norm": 1.6328125, "learning_rate": 1.581007420108365e-05, "loss": 0.3422, "step": 14142 }, { "epoch": 0.6210522849270558, "grad_norm": 1.9140625, "learning_rate": 1.5803640651733115e-05, "loss": 0.329, "step": 14144 }, { "epoch": 0.6211401034062592, "grad_norm": 1.6171875, "learning_rate": 1.579720780664197e-05, "loss": 0.3193, "step": 14146 }, { "epoch": 0.6212279218854627, "grad_norm": 1.6796875, "learning_rate": 1.5790775666302855e-05, "loss": 0.3378, "step": 14148 }, { "epoch": 0.6213157403646662, "grad_norm": 1.609375, "learning_rate": 1.5784344231208347e-05, "loss": 0.3437, "step": 14150 }, { "epoch": 0.6214035588438698, "grad_norm": 1.5390625, "learning_rate": 1.577791350185097e-05, "loss": 0.3464, "step": 14152 }, { "epoch": 0.6214913773230732, "grad_norm": 1.6484375, "learning_rate": 1.5771483478723188e-05, "loss": 0.3521, "step": 14154 }, { "epoch": 0.6215791958022767, "grad_norm": 1.6015625, "learning_rate": 1.576505416231741e-05, "loss": 0.344, "step": 14156 }, { "epoch": 0.6216670142814802, "grad_norm": 1.6171875, "learning_rate": 1.5758625553126e-05, "loss": 0.3501, "step": 14158 }, { "epoch": 0.6217548327606837, "grad_norm": 1.6640625, "learning_rate": 1.575219765164126e-05, "loss": 0.3525, "step": 14160 }, { "epoch": 0.6218426512398871, "grad_norm": 1.640625, "learning_rate": 1.5745770458355442e-05, "loss": 0.3339, "step": 14162 }, { "epoch": 0.6219304697190906, "grad_norm": 1.671875, "learning_rate": 1.5739343973760743e-05, "loss": 0.3257, "step": 14164 }, { "epoch": 0.6220182881982941, "grad_norm": 1.6328125, "learning_rate": 1.57329181983493e-05, "loss": 0.3317, "step": 14166 }, { "epoch": 0.6221061066774977, "grad_norm": 1.796875, "learning_rate": 1.5726493132613203e-05, "loss": 0.3308, "step": 14168 }, { "epoch": 0.6221939251567011, "grad_norm": 1.5546875, "learning_rate": 1.5720068777044476e-05, "loss": 0.3298, "step": 14170 }, { "epoch": 0.6222817436359046, "grad_norm": 1.5703125, "learning_rate": 1.5713645132135118e-05, "loss": 0.3412, "step": 14172 }, { "epoch": 0.6223695621151081, "grad_norm": 1.6328125, "learning_rate": 1.570722219837705e-05, "loss": 0.3044, "step": 14174 }, { "epoch": 0.6224573805943115, "grad_norm": 1.6875, "learning_rate": 1.570079997626212e-05, "loss": 0.3277, "step": 14176 }, { "epoch": 0.622545199073515, "grad_norm": 1.5859375, "learning_rate": 1.569437846628216e-05, "loss": 0.3342, "step": 14178 }, { "epoch": 0.6226330175527185, "grad_norm": 1.6875, "learning_rate": 1.5687957668928927e-05, "loss": 0.3118, "step": 14180 }, { "epoch": 0.622720836031922, "grad_norm": 1.640625, "learning_rate": 1.5681537584694128e-05, "loss": 0.3523, "step": 14182 }, { "epoch": 0.6228086545111255, "grad_norm": 1.5546875, "learning_rate": 1.567511821406941e-05, "loss": 0.3283, "step": 14184 }, { "epoch": 0.622896472990329, "grad_norm": 1.484375, "learning_rate": 1.566869955754638e-05, "loss": 0.324, "step": 14186 }, { "epoch": 0.6229842914695325, "grad_norm": 1.6328125, "learning_rate": 1.5662281615616582e-05, "loss": 0.3403, "step": 14188 }, { "epoch": 0.623072109948736, "grad_norm": 1.609375, "learning_rate": 1.5655864388771486e-05, "loss": 0.3209, "step": 14190 }, { "epoch": 0.6231599284279394, "grad_norm": 1.5546875, "learning_rate": 1.5649447877502537e-05, "loss": 0.3108, "step": 14192 }, { "epoch": 0.6232477469071429, "grad_norm": 1.671875, "learning_rate": 1.5643032082301106e-05, "loss": 0.3284, "step": 14194 }, { "epoch": 0.6233355653863464, "grad_norm": 1.671875, "learning_rate": 1.5636617003658527e-05, "loss": 0.3531, "step": 14196 }, { "epoch": 0.62342338386555, "grad_norm": 1.5625, "learning_rate": 1.5630202642066062e-05, "loss": 0.3265, "step": 14198 }, { "epoch": 0.6235112023447534, "grad_norm": 1.6875, "learning_rate": 1.5623788998014925e-05, "loss": 0.3469, "step": 14200 }, { "epoch": 0.6235990208239569, "grad_norm": 1.703125, "learning_rate": 1.5617376071996277e-05, "loss": 0.3165, "step": 14202 }, { "epoch": 0.6236868393031604, "grad_norm": 1.53125, "learning_rate": 1.5610963864501212e-05, "loss": 0.3144, "step": 14204 }, { "epoch": 0.6237746577823639, "grad_norm": 1.625, "learning_rate": 1.5604552376020797e-05, "loss": 0.3202, "step": 14206 }, { "epoch": 0.6238624762615673, "grad_norm": 1.6875, "learning_rate": 1.5598141607046004e-05, "loss": 0.3172, "step": 14208 }, { "epoch": 0.6239502947407708, "grad_norm": 1.5625, "learning_rate": 1.559173155806778e-05, "loss": 0.3439, "step": 14210 }, { "epoch": 0.6240381132199743, "grad_norm": 1.671875, "learning_rate": 1.558532222957701e-05, "loss": 0.292, "step": 14212 }, { "epoch": 0.6241259316991778, "grad_norm": 1.71875, "learning_rate": 1.5578913622064523e-05, "loss": 0.3282, "step": 14214 }, { "epoch": 0.6242137501783813, "grad_norm": 1.671875, "learning_rate": 1.5572505736021088e-05, "loss": 0.3597, "step": 14216 }, { "epoch": 0.6243015686575848, "grad_norm": 1.5234375, "learning_rate": 1.5566098571937416e-05, "loss": 0.3474, "step": 14218 }, { "epoch": 0.6243893871367883, "grad_norm": 2.1875, "learning_rate": 1.5559692130304185e-05, "loss": 0.3342, "step": 14220 }, { "epoch": 0.6244772056159917, "grad_norm": 1.671875, "learning_rate": 1.555328641161199e-05, "loss": 0.3256, "step": 14222 }, { "epoch": 0.6245650240951952, "grad_norm": 1.59375, "learning_rate": 1.5546881416351385e-05, "loss": 0.3209, "step": 14224 }, { "epoch": 0.6246528425743987, "grad_norm": 1.53125, "learning_rate": 1.5540477145012876e-05, "loss": 0.3217, "step": 14226 }, { "epoch": 0.6247406610536022, "grad_norm": 1.6328125, "learning_rate": 1.5534073598086888e-05, "loss": 0.3278, "step": 14228 }, { "epoch": 0.6248284795328057, "grad_norm": 1.8046875, "learning_rate": 1.5527670776063812e-05, "loss": 0.3114, "step": 14230 }, { "epoch": 0.6249162980120092, "grad_norm": 1.765625, "learning_rate": 1.552126867943398e-05, "loss": 0.3408, "step": 14232 }, { "epoch": 0.6250041164912127, "grad_norm": 1.65625, "learning_rate": 1.5514867308687665e-05, "loss": 0.3077, "step": 14234 }, { "epoch": 0.6250919349704162, "grad_norm": 1.703125, "learning_rate": 1.5508466664315092e-05, "loss": 0.3432, "step": 14236 }, { "epoch": 0.6251797534496196, "grad_norm": 1.6484375, "learning_rate": 1.550206674680641e-05, "loss": 0.3146, "step": 14238 }, { "epoch": 0.6252675719288231, "grad_norm": 1.8203125, "learning_rate": 1.5495667556651738e-05, "loss": 0.3549, "step": 14240 }, { "epoch": 0.6253553904080266, "grad_norm": 1.4765625, "learning_rate": 1.548926909434112e-05, "loss": 0.3519, "step": 14242 }, { "epoch": 0.62544320888723, "grad_norm": 1.59375, "learning_rate": 1.5482871360364548e-05, "loss": 0.3228, "step": 14244 }, { "epoch": 0.6255310273664336, "grad_norm": 1.8046875, "learning_rate": 1.5476474355211973e-05, "loss": 0.3409, "step": 14246 }, { "epoch": 0.6256188458456371, "grad_norm": 1.7421875, "learning_rate": 1.5470078079373275e-05, "loss": 0.3254, "step": 14248 }, { "epoch": 0.6257066643248406, "grad_norm": 1.6484375, "learning_rate": 1.5463682533338286e-05, "loss": 0.3055, "step": 14250 }, { "epoch": 0.625794482804044, "grad_norm": 1.578125, "learning_rate": 1.545728771759677e-05, "loss": 0.3273, "step": 14252 }, { "epoch": 0.6258823012832475, "grad_norm": 1.53125, "learning_rate": 1.545089363263845e-05, "loss": 0.3481, "step": 14254 }, { "epoch": 0.625970119762451, "grad_norm": 1.7734375, "learning_rate": 1.5444500278952982e-05, "loss": 0.3302, "step": 14256 }, { "epoch": 0.6260579382416545, "grad_norm": 1.5546875, "learning_rate": 1.5438107657029975e-05, "loss": 0.3343, "step": 14258 }, { "epoch": 0.626145756720858, "grad_norm": 1.8984375, "learning_rate": 1.543171576735898e-05, "loss": 0.3062, "step": 14260 }, { "epoch": 0.6262335752000615, "grad_norm": 1.6171875, "learning_rate": 1.542532461042948e-05, "loss": 0.3297, "step": 14262 }, { "epoch": 0.626321393679265, "grad_norm": 1.7265625, "learning_rate": 1.5418934186730923e-05, "loss": 0.3328, "step": 14264 }, { "epoch": 0.6264092121584685, "grad_norm": 1.6171875, "learning_rate": 1.5412544496752686e-05, "loss": 0.3082, "step": 14266 }, { "epoch": 0.6264970306376719, "grad_norm": 1.5625, "learning_rate": 1.540615554098408e-05, "loss": 0.3179, "step": 14268 }, { "epoch": 0.6265848491168754, "grad_norm": 1.59375, "learning_rate": 1.539976731991438e-05, "loss": 0.3258, "step": 14270 }, { "epoch": 0.6266726675960789, "grad_norm": 1.6484375, "learning_rate": 1.5393379834032804e-05, "loss": 0.3596, "step": 14272 }, { "epoch": 0.6267604860752823, "grad_norm": 1.671875, "learning_rate": 1.53869930838285e-05, "loss": 0.3398, "step": 14274 }, { "epoch": 0.6268483045544859, "grad_norm": 1.890625, "learning_rate": 1.5380607069790577e-05, "loss": 0.3567, "step": 14276 }, { "epoch": 0.6269361230336894, "grad_norm": 1.5703125, "learning_rate": 1.5374221792408067e-05, "loss": 0.3124, "step": 14278 }, { "epoch": 0.6270239415128929, "grad_norm": 1.625, "learning_rate": 1.536783725216996e-05, "loss": 0.3209, "step": 14280 }, { "epoch": 0.6271117599920963, "grad_norm": 1.7109375, "learning_rate": 1.5361453449565183e-05, "loss": 0.3161, "step": 14282 }, { "epoch": 0.6271995784712998, "grad_norm": 1.53125, "learning_rate": 1.535507038508261e-05, "loss": 0.35, "step": 14284 }, { "epoch": 0.6272873969505033, "grad_norm": 1.7578125, "learning_rate": 1.5348688059211067e-05, "loss": 0.3225, "step": 14286 }, { "epoch": 0.6273752154297068, "grad_norm": 1.515625, "learning_rate": 1.53423064724393e-05, "loss": 0.3094, "step": 14288 }, { "epoch": 0.6274630339089102, "grad_norm": 1.546875, "learning_rate": 1.5335925625256017e-05, "loss": 0.3151, "step": 14290 }, { "epoch": 0.6275508523881138, "grad_norm": 1.7734375, "learning_rate": 1.5329545518149867e-05, "loss": 0.3424, "step": 14292 }, { "epoch": 0.6276386708673173, "grad_norm": 1.65625, "learning_rate": 1.532316615160944e-05, "loss": 0.3404, "step": 14294 }, { "epoch": 0.6277264893465208, "grad_norm": 2.0, "learning_rate": 1.5316787526123273e-05, "loss": 0.3184, "step": 14296 }, { "epoch": 0.6278143078257242, "grad_norm": 1.6953125, "learning_rate": 1.531040964217984e-05, "loss": 0.3307, "step": 14298 }, { "epoch": 0.6279021263049277, "grad_norm": 1.7890625, "learning_rate": 1.5304032500267557e-05, "loss": 0.3201, "step": 14300 }, { "epoch": 0.6279899447841312, "grad_norm": 1.6875, "learning_rate": 1.529765610087479e-05, "loss": 0.3219, "step": 14302 }, { "epoch": 0.6280777632633346, "grad_norm": 1.8515625, "learning_rate": 1.529128044448984e-05, "loss": 0.3523, "step": 14304 }, { "epoch": 0.6281655817425382, "grad_norm": 1.6796875, "learning_rate": 1.528490553160096e-05, "loss": 0.3338, "step": 14306 }, { "epoch": 0.6282534002217417, "grad_norm": 1.859375, "learning_rate": 1.5278531362696348e-05, "loss": 0.3669, "step": 14308 }, { "epoch": 0.6283412187009452, "grad_norm": 1.671875, "learning_rate": 1.5272157938264127e-05, "loss": 0.3476, "step": 14310 }, { "epoch": 0.6284290371801486, "grad_norm": 1.5625, "learning_rate": 1.5265785258792395e-05, "loss": 0.3176, "step": 14312 }, { "epoch": 0.6285168556593521, "grad_norm": 1.59375, "learning_rate": 1.5259413324769153e-05, "loss": 0.311, "step": 14314 }, { "epoch": 0.6286046741385556, "grad_norm": 1.921875, "learning_rate": 1.5253042136682374e-05, "loss": 0.338, "step": 14316 }, { "epoch": 0.6286924926177591, "grad_norm": 1.71875, "learning_rate": 1.5246671695019966e-05, "loss": 0.3778, "step": 14318 }, { "epoch": 0.6287803110969625, "grad_norm": 1.7734375, "learning_rate": 1.5240302000269774e-05, "loss": 0.3273, "step": 14320 }, { "epoch": 0.6288681295761661, "grad_norm": 1.6875, "learning_rate": 1.5233933052919602e-05, "loss": 0.3136, "step": 14322 }, { "epoch": 0.6289559480553696, "grad_norm": 1.6796875, "learning_rate": 1.5227564853457173e-05, "loss": 0.335, "step": 14324 }, { "epoch": 0.6290437665345731, "grad_norm": 1.6796875, "learning_rate": 1.5221197402370172e-05, "loss": 0.3299, "step": 14326 }, { "epoch": 0.6291315850137765, "grad_norm": 1.640625, "learning_rate": 1.5214830700146227e-05, "loss": 0.3594, "step": 14328 }, { "epoch": 0.62921940349298, "grad_norm": 1.5390625, "learning_rate": 1.520846474727288e-05, "loss": 0.3397, "step": 14330 }, { "epoch": 0.6293072219721835, "grad_norm": 1.6171875, "learning_rate": 1.5202099544237653e-05, "loss": 0.3367, "step": 14332 }, { "epoch": 0.629395040451387, "grad_norm": 1.6484375, "learning_rate": 1.519573509152799e-05, "loss": 0.3105, "step": 14334 }, { "epoch": 0.6294828589305904, "grad_norm": 1.5234375, "learning_rate": 1.5189371389631284e-05, "loss": 0.3246, "step": 14336 }, { "epoch": 0.629570677409794, "grad_norm": 1.609375, "learning_rate": 1.5183008439034873e-05, "loss": 0.3085, "step": 14338 }, { "epoch": 0.6296584958889975, "grad_norm": 1.5546875, "learning_rate": 1.5176646240226025e-05, "loss": 0.3442, "step": 14340 }, { "epoch": 0.6297463143682009, "grad_norm": 1.59375, "learning_rate": 1.5170284793691963e-05, "loss": 0.3371, "step": 14342 }, { "epoch": 0.6298341328474044, "grad_norm": 1.6640625, "learning_rate": 1.5163924099919846e-05, "loss": 0.3339, "step": 14344 }, { "epoch": 0.6299219513266079, "grad_norm": 1.6484375, "learning_rate": 1.5157564159396781e-05, "loss": 0.3161, "step": 14346 }, { "epoch": 0.6300097698058114, "grad_norm": 1.546875, "learning_rate": 1.5151204972609818e-05, "loss": 0.3199, "step": 14348 }, { "epoch": 0.6300975882850148, "grad_norm": 1.59375, "learning_rate": 1.5144846540045932e-05, "loss": 0.3202, "step": 14350 }, { "epoch": 0.6301854067642184, "grad_norm": 1.5390625, "learning_rate": 1.5138488862192063e-05, "loss": 0.3289, "step": 14352 }, { "epoch": 0.6302732252434219, "grad_norm": 1.796875, "learning_rate": 1.5132131939535076e-05, "loss": 0.3225, "step": 14354 }, { "epoch": 0.6303610437226254, "grad_norm": 1.609375, "learning_rate": 1.5125775772561795e-05, "loss": 0.3161, "step": 14356 }, { "epoch": 0.6304488622018288, "grad_norm": 1.6484375, "learning_rate": 1.5119420361758982e-05, "loss": 0.3352, "step": 14358 }, { "epoch": 0.6305366806810323, "grad_norm": 1.65625, "learning_rate": 1.5113065707613317e-05, "loss": 0.3359, "step": 14360 }, { "epoch": 0.6306244991602358, "grad_norm": 1.671875, "learning_rate": 1.5106711810611446e-05, "loss": 0.3457, "step": 14362 }, { "epoch": 0.6307123176394392, "grad_norm": 1.71875, "learning_rate": 1.5100358671239964e-05, "loss": 0.3188, "step": 14364 }, { "epoch": 0.6308001361186427, "grad_norm": 1.71875, "learning_rate": 1.5094006289985385e-05, "loss": 0.3278, "step": 14366 }, { "epoch": 0.6308879545978463, "grad_norm": 1.6953125, "learning_rate": 1.5087654667334174e-05, "loss": 0.3287, "step": 14368 }, { "epoch": 0.6309757730770498, "grad_norm": 1.890625, "learning_rate": 1.5081303803772751e-05, "loss": 0.3183, "step": 14370 }, { "epoch": 0.6310635915562532, "grad_norm": 1.640625, "learning_rate": 1.5074953699787452e-05, "loss": 0.3278, "step": 14372 }, { "epoch": 0.6311514100354567, "grad_norm": 1.7109375, "learning_rate": 1.506860435586459e-05, "loss": 0.2985, "step": 14374 }, { "epoch": 0.6312392285146602, "grad_norm": 1.609375, "learning_rate": 1.506225577249038e-05, "loss": 0.3348, "step": 14376 }, { "epoch": 0.6313270469938637, "grad_norm": 1.609375, "learning_rate": 1.5055907950151004e-05, "loss": 0.3477, "step": 14378 }, { "epoch": 0.6314148654730671, "grad_norm": 1.59375, "learning_rate": 1.5049560889332581e-05, "loss": 0.3124, "step": 14380 }, { "epoch": 0.6315026839522706, "grad_norm": 1.59375, "learning_rate": 1.5043214590521174e-05, "loss": 0.3292, "step": 14382 }, { "epoch": 0.6315905024314742, "grad_norm": 1.6875, "learning_rate": 1.5036869054202782e-05, "loss": 0.3574, "step": 14384 }, { "epoch": 0.6316783209106777, "grad_norm": 1.6328125, "learning_rate": 1.5030524280863342e-05, "loss": 0.2946, "step": 14386 }, { "epoch": 0.6317661393898811, "grad_norm": 1.6328125, "learning_rate": 1.5024180270988741e-05, "loss": 0.352, "step": 14388 }, { "epoch": 0.6318539578690846, "grad_norm": 1.6328125, "learning_rate": 1.5017837025064818e-05, "loss": 0.3146, "step": 14390 }, { "epoch": 0.6319417763482881, "grad_norm": 1.671875, "learning_rate": 1.501149454357732e-05, "loss": 0.3527, "step": 14392 }, { "epoch": 0.6320295948274915, "grad_norm": 1.859375, "learning_rate": 1.5005152827011962e-05, "loss": 0.3064, "step": 14394 }, { "epoch": 0.632117413306695, "grad_norm": 1.6875, "learning_rate": 1.4998811875854396e-05, "loss": 0.3194, "step": 14396 }, { "epoch": 0.6322052317858986, "grad_norm": 1.6328125, "learning_rate": 1.4992471690590216e-05, "loss": 0.3234, "step": 14398 }, { "epoch": 0.6322930502651021, "grad_norm": 1.6875, "learning_rate": 1.4986132271704955e-05, "loss": 0.3459, "step": 14400 }, { "epoch": 0.6323808687443055, "grad_norm": 1.703125, "learning_rate": 1.4979793619684082e-05, "loss": 0.3138, "step": 14402 }, { "epoch": 0.632468687223509, "grad_norm": 1.6484375, "learning_rate": 1.4973455735013015e-05, "loss": 0.327, "step": 14404 }, { "epoch": 0.6325565057027125, "grad_norm": 1.640625, "learning_rate": 1.4967118618177112e-05, "loss": 0.3255, "step": 14406 }, { "epoch": 0.632644324181916, "grad_norm": 1.546875, "learning_rate": 1.4960782269661672e-05, "loss": 0.3421, "step": 14408 }, { "epoch": 0.6327321426611194, "grad_norm": 1.6640625, "learning_rate": 1.4954446689951934e-05, "loss": 0.3058, "step": 14410 }, { "epoch": 0.6328199611403229, "grad_norm": 1.5859375, "learning_rate": 1.4948111879533071e-05, "loss": 0.3665, "step": 14412 }, { "epoch": 0.6329077796195265, "grad_norm": 1.6953125, "learning_rate": 1.4941777838890215e-05, "loss": 0.3343, "step": 14414 }, { "epoch": 0.63299559809873, "grad_norm": 1.6484375, "learning_rate": 1.4935444568508419e-05, "loss": 0.3379, "step": 14416 }, { "epoch": 0.6330834165779334, "grad_norm": 1.6875, "learning_rate": 1.4929112068872691e-05, "loss": 0.3326, "step": 14418 }, { "epoch": 0.6331712350571369, "grad_norm": 1.65625, "learning_rate": 1.4922780340467984e-05, "loss": 0.3167, "step": 14420 }, { "epoch": 0.6332590535363404, "grad_norm": 1.53125, "learning_rate": 1.4916449383779169e-05, "loss": 0.3186, "step": 14422 }, { "epoch": 0.6333468720155438, "grad_norm": 1.8125, "learning_rate": 1.4910119199291072e-05, "loss": 0.3316, "step": 14424 }, { "epoch": 0.6334346904947473, "grad_norm": 1.8359375, "learning_rate": 1.4903789787488474e-05, "loss": 0.3321, "step": 14426 }, { "epoch": 0.6335225089739508, "grad_norm": 1.6328125, "learning_rate": 1.4897461148856068e-05, "loss": 0.3481, "step": 14428 }, { "epoch": 0.6336103274531544, "grad_norm": 1.6953125, "learning_rate": 1.4891133283878509e-05, "loss": 0.3035, "step": 14430 }, { "epoch": 0.6336981459323578, "grad_norm": 1.6875, "learning_rate": 1.4884806193040384e-05, "loss": 0.3554, "step": 14432 }, { "epoch": 0.6337859644115613, "grad_norm": 1.6640625, "learning_rate": 1.487847987682623e-05, "loss": 0.3422, "step": 14434 }, { "epoch": 0.6338737828907648, "grad_norm": 1.6015625, "learning_rate": 1.4872154335720518e-05, "loss": 0.3272, "step": 14436 }, { "epoch": 0.6339616013699683, "grad_norm": 1.65625, "learning_rate": 1.4865829570207645e-05, "loss": 0.3523, "step": 14438 }, { "epoch": 0.6340494198491717, "grad_norm": 1.59375, "learning_rate": 1.4859505580771977e-05, "loss": 0.3274, "step": 14440 }, { "epoch": 0.6341372383283752, "grad_norm": 1.625, "learning_rate": 1.48531823678978e-05, "loss": 0.3391, "step": 14442 }, { "epoch": 0.6342250568075787, "grad_norm": 1.640625, "learning_rate": 1.484685993206935e-05, "loss": 0.3422, "step": 14444 }, { "epoch": 0.6343128752867823, "grad_norm": 1.703125, "learning_rate": 1.4840538273770807e-05, "loss": 0.3105, "step": 14446 }, { "epoch": 0.6344006937659857, "grad_norm": 1.6953125, "learning_rate": 1.4834217393486272e-05, "loss": 0.3629, "step": 14448 }, { "epoch": 0.6344885122451892, "grad_norm": 1.6171875, "learning_rate": 1.4827897291699816e-05, "loss": 0.3606, "step": 14450 }, { "epoch": 0.6345763307243927, "grad_norm": 1.640625, "learning_rate": 1.4821577968895414e-05, "loss": 0.3118, "step": 14452 }, { "epoch": 0.6346641492035962, "grad_norm": 1.6640625, "learning_rate": 1.4815259425557013e-05, "loss": 0.3374, "step": 14454 }, { "epoch": 0.6347519676827996, "grad_norm": 1.6640625, "learning_rate": 1.4808941662168485e-05, "loss": 0.3343, "step": 14456 }, { "epoch": 0.6348397861620031, "grad_norm": 1.625, "learning_rate": 1.4802624679213645e-05, "loss": 0.3287, "step": 14458 }, { "epoch": 0.6349276046412067, "grad_norm": 1.78125, "learning_rate": 1.4796308477176258e-05, "loss": 0.3231, "step": 14460 }, { "epoch": 0.6350154231204101, "grad_norm": 1.6171875, "learning_rate": 1.4789993056540013e-05, "loss": 0.3116, "step": 14462 }, { "epoch": 0.6351032415996136, "grad_norm": 1.5859375, "learning_rate": 1.4783678417788544e-05, "loss": 0.3213, "step": 14464 }, { "epoch": 0.6351910600788171, "grad_norm": 1.65625, "learning_rate": 1.477736456140543e-05, "loss": 0.313, "step": 14466 }, { "epoch": 0.6352788785580206, "grad_norm": 1.6484375, "learning_rate": 1.4771051487874189e-05, "loss": 0.321, "step": 14468 }, { "epoch": 0.635366697037224, "grad_norm": 1.609375, "learning_rate": 1.4764739197678279e-05, "loss": 0.332, "step": 14470 }, { "epoch": 0.6354545155164275, "grad_norm": 1.6015625, "learning_rate": 1.4758427691301096e-05, "loss": 0.3222, "step": 14472 }, { "epoch": 0.635542333995631, "grad_norm": 1.65625, "learning_rate": 1.475211696922597e-05, "loss": 0.3467, "step": 14474 }, { "epoch": 0.6356301524748346, "grad_norm": 1.7421875, "learning_rate": 1.4745807031936188e-05, "loss": 0.3384, "step": 14476 }, { "epoch": 0.635717970954038, "grad_norm": 1.6875, "learning_rate": 1.473949787991496e-05, "loss": 0.3454, "step": 14478 }, { "epoch": 0.6358057894332415, "grad_norm": 1.6953125, "learning_rate": 1.4733189513645443e-05, "loss": 0.3301, "step": 14480 }, { "epoch": 0.635893607912445, "grad_norm": 1.65625, "learning_rate": 1.4726881933610742e-05, "loss": 0.3258, "step": 14482 }, { "epoch": 0.6359814263916485, "grad_norm": 1.5859375, "learning_rate": 1.472057514029388e-05, "loss": 0.3416, "step": 14484 }, { "epoch": 0.6360692448708519, "grad_norm": 1.5, "learning_rate": 1.4714269134177836e-05, "loss": 0.3131, "step": 14486 }, { "epoch": 0.6361570633500554, "grad_norm": 1.734375, "learning_rate": 1.4707963915745531e-05, "loss": 0.3115, "step": 14488 }, { "epoch": 0.6362448818292589, "grad_norm": 1.515625, "learning_rate": 1.4701659485479813e-05, "loss": 0.3337, "step": 14490 }, { "epoch": 0.6363327003084625, "grad_norm": 1.7421875, "learning_rate": 1.4695355843863479e-05, "loss": 0.348, "step": 14492 }, { "epoch": 0.6364205187876659, "grad_norm": 1.7109375, "learning_rate": 1.4689052991379266e-05, "loss": 0.3505, "step": 14494 }, { "epoch": 0.6365083372668694, "grad_norm": 1.9296875, "learning_rate": 1.4682750928509845e-05, "loss": 0.3427, "step": 14496 }, { "epoch": 0.6365961557460729, "grad_norm": 1.6171875, "learning_rate": 1.4676449655737837e-05, "loss": 0.3425, "step": 14498 }, { "epoch": 0.6366839742252763, "grad_norm": 1.7890625, "learning_rate": 1.4670149173545783e-05, "loss": 0.3313, "step": 14500 }, { "epoch": 0.6367717927044798, "grad_norm": 1.625, "learning_rate": 1.4663849482416184e-05, "loss": 0.3294, "step": 14502 }, { "epoch": 0.6368596111836833, "grad_norm": 1.59375, "learning_rate": 1.4657550582831467e-05, "loss": 0.3425, "step": 14504 }, { "epoch": 0.6369474296628869, "grad_norm": 1.828125, "learning_rate": 1.4651252475274007e-05, "loss": 0.3109, "step": 14506 }, { "epoch": 0.6370352481420903, "grad_norm": 1.6328125, "learning_rate": 1.4644955160226118e-05, "loss": 0.3427, "step": 14508 }, { "epoch": 0.6371230666212938, "grad_norm": 1.8125, "learning_rate": 1.4638658638170038e-05, "loss": 0.3422, "step": 14510 }, { "epoch": 0.6372108851004973, "grad_norm": 1.9453125, "learning_rate": 1.4632362909587977e-05, "loss": 0.3501, "step": 14512 }, { "epoch": 0.6372987035797008, "grad_norm": 1.8828125, "learning_rate": 1.4626067974962038e-05, "loss": 0.3177, "step": 14514 }, { "epoch": 0.6373865220589042, "grad_norm": 1.5, "learning_rate": 1.46197738347743e-05, "loss": 0.3215, "step": 14516 }, { "epoch": 0.6374743405381077, "grad_norm": 1.59375, "learning_rate": 1.461348048950677e-05, "loss": 0.3276, "step": 14518 }, { "epoch": 0.6375621590173112, "grad_norm": 1.9140625, "learning_rate": 1.4607187939641393e-05, "loss": 0.3418, "step": 14520 }, { "epoch": 0.6376499774965148, "grad_norm": 1.7578125, "learning_rate": 1.4600896185660057e-05, "loss": 0.3179, "step": 14522 }, { "epoch": 0.6377377959757182, "grad_norm": 1.609375, "learning_rate": 1.459460522804459e-05, "loss": 0.328, "step": 14524 }, { "epoch": 0.6378256144549217, "grad_norm": 1.9453125, "learning_rate": 1.4588315067276737e-05, "loss": 0.3368, "step": 14526 }, { "epoch": 0.6379134329341252, "grad_norm": 1.734375, "learning_rate": 1.458202570383822e-05, "loss": 0.3467, "step": 14528 }, { "epoch": 0.6380012514133286, "grad_norm": 1.71875, "learning_rate": 1.4575737138210674e-05, "loss": 0.2989, "step": 14530 }, { "epoch": 0.6380890698925321, "grad_norm": 1.7734375, "learning_rate": 1.4569449370875665e-05, "loss": 0.3633, "step": 14532 }, { "epoch": 0.6381768883717356, "grad_norm": 1.578125, "learning_rate": 1.4563162402314737e-05, "loss": 0.3455, "step": 14534 }, { "epoch": 0.6382647068509391, "grad_norm": 1.78125, "learning_rate": 1.4556876233009323e-05, "loss": 0.334, "step": 14536 }, { "epoch": 0.6383525253301426, "grad_norm": 1.5546875, "learning_rate": 1.4550590863440838e-05, "loss": 0.3335, "step": 14538 }, { "epoch": 0.6384403438093461, "grad_norm": 1.6875, "learning_rate": 1.4544306294090612e-05, "loss": 0.3423, "step": 14540 }, { "epoch": 0.6385281622885496, "grad_norm": 1.5234375, "learning_rate": 1.4538022525439915e-05, "loss": 0.3175, "step": 14542 }, { "epoch": 0.638615980767753, "grad_norm": 1.4921875, "learning_rate": 1.4531739557969964e-05, "loss": 0.3087, "step": 14544 }, { "epoch": 0.6387037992469565, "grad_norm": 1.59375, "learning_rate": 1.4525457392161895e-05, "loss": 0.3289, "step": 14546 }, { "epoch": 0.63879161772616, "grad_norm": 1.6640625, "learning_rate": 1.4519176028496817e-05, "loss": 0.3231, "step": 14548 }, { "epoch": 0.6388794362053635, "grad_norm": 1.796875, "learning_rate": 1.4512895467455745e-05, "loss": 0.3068, "step": 14550 }, { "epoch": 0.638967254684567, "grad_norm": 1.71875, "learning_rate": 1.4506615709519661e-05, "loss": 0.3506, "step": 14552 }, { "epoch": 0.6390550731637705, "grad_norm": 1.671875, "learning_rate": 1.4500336755169464e-05, "loss": 0.3405, "step": 14554 }, { "epoch": 0.639142891642974, "grad_norm": 1.671875, "learning_rate": 1.449405860488598e-05, "loss": 0.3079, "step": 14556 }, { "epoch": 0.6392307101221775, "grad_norm": 1.6015625, "learning_rate": 1.4487781259150018e-05, "loss": 0.3093, "step": 14558 }, { "epoch": 0.6393185286013809, "grad_norm": 1.9609375, "learning_rate": 1.448150471844228e-05, "loss": 0.3463, "step": 14560 }, { "epoch": 0.6394063470805844, "grad_norm": 1.609375, "learning_rate": 1.4475228983243438e-05, "loss": 0.3015, "step": 14562 }, { "epoch": 0.6394941655597879, "grad_norm": 1.6796875, "learning_rate": 1.4468954054034087e-05, "loss": 0.3246, "step": 14564 }, { "epoch": 0.6395819840389914, "grad_norm": 1.703125, "learning_rate": 1.4462679931294749e-05, "loss": 0.3138, "step": 14566 }, { "epoch": 0.6396698025181949, "grad_norm": 1.546875, "learning_rate": 1.4456406615505916e-05, "loss": 0.3133, "step": 14568 }, { "epoch": 0.6397576209973984, "grad_norm": 1.9609375, "learning_rate": 1.4450134107147983e-05, "loss": 0.3602, "step": 14570 }, { "epoch": 0.6398454394766019, "grad_norm": 1.6796875, "learning_rate": 1.4443862406701325e-05, "loss": 0.331, "step": 14572 }, { "epoch": 0.6399332579558054, "grad_norm": 1.7109375, "learning_rate": 1.4437591514646198e-05, "loss": 0.3548, "step": 14574 }, { "epoch": 0.6400210764350088, "grad_norm": 1.8671875, "learning_rate": 1.4431321431462858e-05, "loss": 0.3461, "step": 14576 }, { "epoch": 0.6401088949142123, "grad_norm": 1.671875, "learning_rate": 1.4425052157631441e-05, "loss": 0.3374, "step": 14578 }, { "epoch": 0.6401967133934158, "grad_norm": 1.6796875, "learning_rate": 1.4418783693632077e-05, "loss": 0.3284, "step": 14580 }, { "epoch": 0.6402845318726192, "grad_norm": 1.5625, "learning_rate": 1.4412516039944793e-05, "loss": 0.3184, "step": 14582 }, { "epoch": 0.6403723503518228, "grad_norm": 1.8828125, "learning_rate": 1.4406249197049559e-05, "loss": 0.32, "step": 14584 }, { "epoch": 0.6404601688310263, "grad_norm": 1.71875, "learning_rate": 1.4399983165426312e-05, "loss": 0.3213, "step": 14586 }, { "epoch": 0.6405479873102298, "grad_norm": 1.609375, "learning_rate": 1.4393717945554885e-05, "loss": 0.3198, "step": 14588 }, { "epoch": 0.6406358057894332, "grad_norm": 1.5546875, "learning_rate": 1.438745353791509e-05, "loss": 0.3314, "step": 14590 }, { "epoch": 0.6407236242686367, "grad_norm": 1.484375, "learning_rate": 1.4381189942986644e-05, "loss": 0.3409, "step": 14592 }, { "epoch": 0.6408114427478402, "grad_norm": 1.5078125, "learning_rate": 1.4374927161249212e-05, "loss": 0.3322, "step": 14594 }, { "epoch": 0.6408992612270437, "grad_norm": 1.640625, "learning_rate": 1.4368665193182416e-05, "loss": 0.3318, "step": 14596 }, { "epoch": 0.6409870797062472, "grad_norm": 1.6875, "learning_rate": 1.4362404039265775e-05, "loss": 0.3138, "step": 14598 }, { "epoch": 0.6410748981854507, "grad_norm": 1.6875, "learning_rate": 1.435614369997879e-05, "loss": 0.3314, "step": 14600 }, { "epoch": 0.6411627166646542, "grad_norm": 1.5, "learning_rate": 1.4349884175800876e-05, "loss": 0.2977, "step": 14602 }, { "epoch": 0.6412505351438577, "grad_norm": 1.703125, "learning_rate": 1.4343625467211386e-05, "loss": 0.3434, "step": 14604 }, { "epoch": 0.6413383536230611, "grad_norm": 1.8125, "learning_rate": 1.4337367574689609e-05, "loss": 0.3048, "step": 14606 }, { "epoch": 0.6414261721022646, "grad_norm": 1.8359375, "learning_rate": 1.4331110498714773e-05, "loss": 0.3218, "step": 14608 }, { "epoch": 0.6415139905814681, "grad_norm": 1.59375, "learning_rate": 1.4324854239766059e-05, "loss": 0.3395, "step": 14610 }, { "epoch": 0.6416018090606715, "grad_norm": 1.5390625, "learning_rate": 1.4318598798322557e-05, "loss": 0.3108, "step": 14612 }, { "epoch": 0.6416896275398751, "grad_norm": 1.578125, "learning_rate": 1.4312344174863329e-05, "loss": 0.3237, "step": 14614 }, { "epoch": 0.6417774460190786, "grad_norm": 1.625, "learning_rate": 1.4306090369867348e-05, "loss": 0.343, "step": 14616 }, { "epoch": 0.6418652644982821, "grad_norm": 1.5, "learning_rate": 1.429983738381352e-05, "loss": 0.3108, "step": 14618 }, { "epoch": 0.6419530829774855, "grad_norm": 1.6484375, "learning_rate": 1.4293585217180717e-05, "loss": 0.3313, "step": 14620 }, { "epoch": 0.642040901456689, "grad_norm": 1.6015625, "learning_rate": 1.4287333870447716e-05, "loss": 0.3604, "step": 14622 }, { "epoch": 0.6421287199358925, "grad_norm": 1.6640625, "learning_rate": 1.4281083344093265e-05, "loss": 0.3235, "step": 14624 }, { "epoch": 0.642216538415096, "grad_norm": 1.6015625, "learning_rate": 1.4274833638596024e-05, "loss": 0.3281, "step": 14626 }, { "epoch": 0.6423043568942994, "grad_norm": 1.59375, "learning_rate": 1.4268584754434583e-05, "loss": 0.309, "step": 14628 }, { "epoch": 0.642392175373503, "grad_norm": 1.6484375, "learning_rate": 1.4262336692087503e-05, "loss": 0.3422, "step": 14630 }, { "epoch": 0.6424799938527065, "grad_norm": 1.6796875, "learning_rate": 1.4256089452033241e-05, "loss": 0.3389, "step": 14632 }, { "epoch": 0.64256781233191, "grad_norm": 1.65625, "learning_rate": 1.4249843034750246e-05, "loss": 0.3091, "step": 14634 }, { "epoch": 0.6426556308111134, "grad_norm": 1.625, "learning_rate": 1.4243597440716827e-05, "loss": 0.3495, "step": 14636 }, { "epoch": 0.6427434492903169, "grad_norm": 1.6875, "learning_rate": 1.4237352670411308e-05, "loss": 0.3063, "step": 14638 }, { "epoch": 0.6428312677695204, "grad_norm": 1.6640625, "learning_rate": 1.423110872431189e-05, "loss": 0.3091, "step": 14640 }, { "epoch": 0.6429190862487238, "grad_norm": 1.6796875, "learning_rate": 1.4224865602896757e-05, "loss": 0.3372, "step": 14642 }, { "epoch": 0.6430069047279273, "grad_norm": 1.5078125, "learning_rate": 1.4218623306643997e-05, "loss": 0.3061, "step": 14644 }, { "epoch": 0.6430947232071309, "grad_norm": 1.703125, "learning_rate": 1.421238183603164e-05, "loss": 0.3507, "step": 14646 }, { "epoch": 0.6431825416863344, "grad_norm": 1.640625, "learning_rate": 1.4206141191537682e-05, "loss": 0.3205, "step": 14648 }, { "epoch": 0.6432703601655378, "grad_norm": 1.65625, "learning_rate": 1.4199901373640005e-05, "loss": 0.3764, "step": 14650 }, { "epoch": 0.6433581786447413, "grad_norm": 1.5625, "learning_rate": 1.419366238281648e-05, "loss": 0.3491, "step": 14652 }, { "epoch": 0.6434459971239448, "grad_norm": 1.578125, "learning_rate": 1.4187424219544882e-05, "loss": 0.3463, "step": 14654 }, { "epoch": 0.6435338156031483, "grad_norm": 1.6796875, "learning_rate": 1.4181186884302916e-05, "loss": 0.343, "step": 14656 }, { "epoch": 0.6436216340823517, "grad_norm": 1.8984375, "learning_rate": 1.4174950377568264e-05, "loss": 0.373, "step": 14658 }, { "epoch": 0.6437094525615553, "grad_norm": 1.7109375, "learning_rate": 1.4168714699818498e-05, "loss": 0.3412, "step": 14660 }, { "epoch": 0.6437972710407588, "grad_norm": 1.90625, "learning_rate": 1.4162479851531163e-05, "loss": 0.3298, "step": 14662 }, { "epoch": 0.6438850895199623, "grad_norm": 1.625, "learning_rate": 1.4156245833183723e-05, "loss": 0.3244, "step": 14664 }, { "epoch": 0.6439729079991657, "grad_norm": 1.640625, "learning_rate": 1.4150012645253575e-05, "loss": 0.3533, "step": 14666 }, { "epoch": 0.6440607264783692, "grad_norm": 1.8125, "learning_rate": 1.4143780288218058e-05, "loss": 0.3331, "step": 14668 }, { "epoch": 0.6441485449575727, "grad_norm": 1.609375, "learning_rate": 1.4137548762554443e-05, "loss": 0.3425, "step": 14670 }, { "epoch": 0.6442363634367761, "grad_norm": 1.640625, "learning_rate": 1.4131318068739951e-05, "loss": 0.3373, "step": 14672 }, { "epoch": 0.6443241819159796, "grad_norm": 1.546875, "learning_rate": 1.4125088207251722e-05, "loss": 0.3299, "step": 14674 }, { "epoch": 0.6444120003951832, "grad_norm": 1.609375, "learning_rate": 1.4118859178566853e-05, "loss": 0.3592, "step": 14676 }, { "epoch": 0.6444998188743867, "grad_norm": 1.53125, "learning_rate": 1.4112630983162356e-05, "loss": 0.3314, "step": 14678 }, { "epoch": 0.6445876373535901, "grad_norm": 1.6015625, "learning_rate": 1.410640362151518e-05, "loss": 0.3222, "step": 14680 }, { "epoch": 0.6446754558327936, "grad_norm": 1.578125, "learning_rate": 1.4100177094102235e-05, "loss": 0.3264, "step": 14682 }, { "epoch": 0.6447632743119971, "grad_norm": 1.640625, "learning_rate": 1.4093951401400335e-05, "loss": 0.3309, "step": 14684 }, { "epoch": 0.6448510927912006, "grad_norm": 1.6953125, "learning_rate": 1.4087726543886254e-05, "loss": 0.3072, "step": 14686 }, { "epoch": 0.644938911270404, "grad_norm": 1.59375, "learning_rate": 1.4081502522036693e-05, "loss": 0.3241, "step": 14688 }, { "epoch": 0.6450267297496075, "grad_norm": 1.6640625, "learning_rate": 1.4075279336328279e-05, "loss": 0.3302, "step": 14690 }, { "epoch": 0.6451145482288111, "grad_norm": 1.578125, "learning_rate": 1.40690569872376e-05, "loss": 0.3425, "step": 14692 }, { "epoch": 0.6452023667080146, "grad_norm": 1.5390625, "learning_rate": 1.4062835475241148e-05, "loss": 0.3169, "step": 14694 }, { "epoch": 0.645290185187218, "grad_norm": 1.6171875, "learning_rate": 1.4056614800815396e-05, "loss": 0.3517, "step": 14696 }, { "epoch": 0.6453780036664215, "grad_norm": 1.59375, "learning_rate": 1.4050394964436686e-05, "loss": 0.3589, "step": 14698 }, { "epoch": 0.645465822145625, "grad_norm": 1.6328125, "learning_rate": 1.4044175966581363e-05, "loss": 0.3343, "step": 14700 }, { "epoch": 0.6455536406248285, "grad_norm": 1.6171875, "learning_rate": 1.4037957807725666e-05, "loss": 0.3384, "step": 14702 }, { "epoch": 0.6456414591040319, "grad_norm": 1.6875, "learning_rate": 1.403174048834579e-05, "loss": 0.3348, "step": 14704 }, { "epoch": 0.6457292775832355, "grad_norm": 1.6640625, "learning_rate": 1.4025524008917861e-05, "loss": 0.3253, "step": 14706 }, { "epoch": 0.645817096062439, "grad_norm": 1.703125, "learning_rate": 1.4019308369917928e-05, "loss": 0.3263, "step": 14708 }, { "epoch": 0.6459049145416424, "grad_norm": 1.6796875, "learning_rate": 1.4013093571821994e-05, "loss": 0.3211, "step": 14710 }, { "epoch": 0.6459927330208459, "grad_norm": 1.6484375, "learning_rate": 1.4006879615105984e-05, "loss": 0.3175, "step": 14712 }, { "epoch": 0.6460805515000494, "grad_norm": 1.7421875, "learning_rate": 1.400066650024578e-05, "loss": 0.3165, "step": 14714 }, { "epoch": 0.6461683699792529, "grad_norm": 1.453125, "learning_rate": 1.3994454227717168e-05, "loss": 0.3272, "step": 14716 }, { "epoch": 0.6462561884584563, "grad_norm": 1.609375, "learning_rate": 1.398824279799588e-05, "loss": 0.3444, "step": 14718 }, { "epoch": 0.6463440069376598, "grad_norm": 1.6328125, "learning_rate": 1.3982032211557609e-05, "loss": 0.3389, "step": 14720 }, { "epoch": 0.6464318254168634, "grad_norm": 1.6171875, "learning_rate": 1.3975822468877942e-05, "loss": 0.3294, "step": 14722 }, { "epoch": 0.6465196438960669, "grad_norm": 1.5390625, "learning_rate": 1.396961357043244e-05, "loss": 0.3239, "step": 14724 }, { "epoch": 0.6466074623752703, "grad_norm": 1.5625, "learning_rate": 1.3963405516696579e-05, "loss": 0.3458, "step": 14726 }, { "epoch": 0.6466952808544738, "grad_norm": 1.6328125, "learning_rate": 1.3957198308145769e-05, "loss": 0.3198, "step": 14728 }, { "epoch": 0.6467830993336773, "grad_norm": 1.5625, "learning_rate": 1.395099194525536e-05, "loss": 0.3392, "step": 14730 }, { "epoch": 0.6468709178128808, "grad_norm": 1.671875, "learning_rate": 1.3944786428500623e-05, "loss": 0.3456, "step": 14732 }, { "epoch": 0.6469587362920842, "grad_norm": 1.578125, "learning_rate": 1.3938581758356806e-05, "loss": 0.3485, "step": 14734 }, { "epoch": 0.6470465547712877, "grad_norm": 1.5703125, "learning_rate": 1.3932377935299035e-05, "loss": 0.3474, "step": 14736 }, { "epoch": 0.6471343732504913, "grad_norm": 1.75, "learning_rate": 1.3926174959802429e-05, "loss": 0.3339, "step": 14738 }, { "epoch": 0.6472221917296948, "grad_norm": 1.7109375, "learning_rate": 1.3919972832341997e-05, "loss": 0.3329, "step": 14740 }, { "epoch": 0.6473100102088982, "grad_norm": 1.7109375, "learning_rate": 1.391377155339269e-05, "loss": 0.3519, "step": 14742 }, { "epoch": 0.6473978286881017, "grad_norm": 1.609375, "learning_rate": 1.3907571123429427e-05, "loss": 0.3232, "step": 14744 }, { "epoch": 0.6474856471673052, "grad_norm": 1.734375, "learning_rate": 1.3901371542927016e-05, "loss": 0.3239, "step": 14746 }, { "epoch": 0.6475734656465086, "grad_norm": 1.5234375, "learning_rate": 1.3895172812360244e-05, "loss": 0.3496, "step": 14748 }, { "epoch": 0.6476612841257121, "grad_norm": 1.7421875, "learning_rate": 1.3888974932203797e-05, "loss": 0.3183, "step": 14750 }, { "epoch": 0.6477491026049157, "grad_norm": 1.53125, "learning_rate": 1.3882777902932306e-05, "loss": 0.3365, "step": 14752 }, { "epoch": 0.6478369210841192, "grad_norm": 1.6953125, "learning_rate": 1.387658172502036e-05, "loss": 0.314, "step": 14754 }, { "epoch": 0.6479247395633226, "grad_norm": 1.6953125, "learning_rate": 1.3870386398942447e-05, "loss": 0.3087, "step": 14756 }, { "epoch": 0.6480125580425261, "grad_norm": 1.5546875, "learning_rate": 1.3864191925173015e-05, "loss": 0.3027, "step": 14758 }, { "epoch": 0.6481003765217296, "grad_norm": 1.7890625, "learning_rate": 1.3857998304186423e-05, "loss": 0.3307, "step": 14760 }, { "epoch": 0.648188195000933, "grad_norm": 1.703125, "learning_rate": 1.3851805536457003e-05, "loss": 0.3298, "step": 14762 }, { "epoch": 0.6482760134801365, "grad_norm": 1.8046875, "learning_rate": 1.3845613622458986e-05, "loss": 0.3379, "step": 14764 }, { "epoch": 0.64836383195934, "grad_norm": 1.625, "learning_rate": 1.3839422562666543e-05, "loss": 0.3376, "step": 14766 }, { "epoch": 0.6484516504385436, "grad_norm": 1.6953125, "learning_rate": 1.3833232357553804e-05, "loss": 0.3247, "step": 14768 }, { "epoch": 0.648539468917747, "grad_norm": 1.6328125, "learning_rate": 1.3827043007594798e-05, "loss": 0.2991, "step": 14770 }, { "epoch": 0.6486272873969505, "grad_norm": 1.7578125, "learning_rate": 1.3820854513263532e-05, "loss": 0.3261, "step": 14772 }, { "epoch": 0.648715105876154, "grad_norm": 1.6640625, "learning_rate": 1.381466687503389e-05, "loss": 0.3293, "step": 14774 }, { "epoch": 0.6488029243553575, "grad_norm": 1.515625, "learning_rate": 1.3808480093379755e-05, "loss": 0.3416, "step": 14776 }, { "epoch": 0.6488907428345609, "grad_norm": 1.6171875, "learning_rate": 1.3802294168774893e-05, "loss": 0.3469, "step": 14778 }, { "epoch": 0.6489785613137644, "grad_norm": 1.609375, "learning_rate": 1.3796109101693022e-05, "loss": 0.3225, "step": 14780 }, { "epoch": 0.6490663797929679, "grad_norm": 1.90625, "learning_rate": 1.3789924892607808e-05, "loss": 0.3461, "step": 14782 }, { "epoch": 0.6491541982721715, "grad_norm": 1.625, "learning_rate": 1.3783741541992826e-05, "loss": 0.332, "step": 14784 }, { "epoch": 0.6492420167513749, "grad_norm": 1.890625, "learning_rate": 1.3777559050321615e-05, "loss": 0.3151, "step": 14786 }, { "epoch": 0.6493298352305784, "grad_norm": 1.7734375, "learning_rate": 1.3771377418067621e-05, "loss": 0.2975, "step": 14788 }, { "epoch": 0.6494176537097819, "grad_norm": 1.5546875, "learning_rate": 1.3765196645704236e-05, "loss": 0.3272, "step": 14790 }, { "epoch": 0.6495054721889854, "grad_norm": 1.6015625, "learning_rate": 1.3759016733704783e-05, "loss": 0.3647, "step": 14792 }, { "epoch": 0.6495932906681888, "grad_norm": 1.703125, "learning_rate": 1.375283768254252e-05, "loss": 0.3253, "step": 14794 }, { "epoch": 0.6496811091473923, "grad_norm": 1.59375, "learning_rate": 1.3746659492690645e-05, "loss": 0.3017, "step": 14796 }, { "epoch": 0.6497689276265958, "grad_norm": 1.6171875, "learning_rate": 1.3740482164622279e-05, "loss": 0.3483, "step": 14798 }, { "epoch": 0.6498567461057994, "grad_norm": 1.6796875, "learning_rate": 1.3734305698810496e-05, "loss": 0.3406, "step": 14800 }, { "epoch": 0.6499445645850028, "grad_norm": 1.59375, "learning_rate": 1.3728130095728284e-05, "loss": 0.3054, "step": 14802 }, { "epoch": 0.6500323830642063, "grad_norm": 1.6484375, "learning_rate": 1.3721955355848562e-05, "loss": 0.335, "step": 14804 }, { "epoch": 0.6501202015434098, "grad_norm": 1.5234375, "learning_rate": 1.371578147964421e-05, "loss": 0.3368, "step": 14806 }, { "epoch": 0.6502080200226132, "grad_norm": 1.6484375, "learning_rate": 1.3709608467588008e-05, "loss": 0.3173, "step": 14808 }, { "epoch": 0.6502958385018167, "grad_norm": 1.6015625, "learning_rate": 1.3703436320152708e-05, "loss": 0.3083, "step": 14810 }, { "epoch": 0.6503836569810202, "grad_norm": 1.65625, "learning_rate": 1.3697265037810964e-05, "loss": 0.3077, "step": 14812 }, { "epoch": 0.6504714754602238, "grad_norm": 1.515625, "learning_rate": 1.3691094621035358e-05, "loss": 0.3309, "step": 14814 }, { "epoch": 0.6505592939394272, "grad_norm": 1.640625, "learning_rate": 1.368492507029845e-05, "loss": 0.3468, "step": 14816 }, { "epoch": 0.6506471124186307, "grad_norm": 1.6875, "learning_rate": 1.367875638607269e-05, "loss": 0.3506, "step": 14818 }, { "epoch": 0.6507349308978342, "grad_norm": 1.703125, "learning_rate": 1.367258856883048e-05, "loss": 0.3203, "step": 14820 }, { "epoch": 0.6508227493770377, "grad_norm": 1.765625, "learning_rate": 1.3666421619044146e-05, "loss": 0.3387, "step": 14822 }, { "epoch": 0.6509105678562411, "grad_norm": 1.625, "learning_rate": 1.366025553718597e-05, "loss": 0.335, "step": 14824 }, { "epoch": 0.6509983863354446, "grad_norm": 1.703125, "learning_rate": 1.3654090323728142e-05, "loss": 0.3325, "step": 14826 }, { "epoch": 0.6510862048146481, "grad_norm": 1.6328125, "learning_rate": 1.3647925979142789e-05, "loss": 0.326, "step": 14828 }, { "epoch": 0.6511740232938517, "grad_norm": 1.625, "learning_rate": 1.3641762503901994e-05, "loss": 0.3521, "step": 14830 }, { "epoch": 0.6512618417730551, "grad_norm": 1.8671875, "learning_rate": 1.3635599898477738e-05, "loss": 0.348, "step": 14832 }, { "epoch": 0.6513496602522586, "grad_norm": 1.8046875, "learning_rate": 1.3629438163341978e-05, "loss": 0.3248, "step": 14834 }, { "epoch": 0.6514374787314621, "grad_norm": 1.6484375, "learning_rate": 1.3623277298966558e-05, "loss": 0.3101, "step": 14836 }, { "epoch": 0.6515252972106655, "grad_norm": 1.6953125, "learning_rate": 1.36171173058233e-05, "loss": 0.3505, "step": 14838 }, { "epoch": 0.651613115689869, "grad_norm": 1.671875, "learning_rate": 1.3610958184383928e-05, "loss": 0.2986, "step": 14840 }, { "epoch": 0.6517009341690725, "grad_norm": 1.53125, "learning_rate": 1.3604799935120099e-05, "loss": 0.307, "step": 14842 }, { "epoch": 0.651788752648276, "grad_norm": 1.625, "learning_rate": 1.3598642558503432e-05, "loss": 0.3515, "step": 14844 }, { "epoch": 0.6518765711274795, "grad_norm": 1.5546875, "learning_rate": 1.3592486055005441e-05, "loss": 0.3184, "step": 14846 }, { "epoch": 0.651964389606683, "grad_norm": 1.7890625, "learning_rate": 1.3586330425097621e-05, "loss": 0.3355, "step": 14848 }, { "epoch": 0.6520522080858865, "grad_norm": 1.65625, "learning_rate": 1.3580175669251336e-05, "loss": 0.3188, "step": 14850 }, { "epoch": 0.65214002656509, "grad_norm": 1.671875, "learning_rate": 1.3574021787937944e-05, "loss": 0.3224, "step": 14852 }, { "epoch": 0.6522278450442934, "grad_norm": 1.6328125, "learning_rate": 1.35678687816287e-05, "loss": 0.3439, "step": 14854 }, { "epoch": 0.6523156635234969, "grad_norm": 1.5234375, "learning_rate": 1.3561716650794798e-05, "loss": 0.356, "step": 14856 }, { "epoch": 0.6524034820027004, "grad_norm": 1.578125, "learning_rate": 1.3555565395907388e-05, "loss": 0.3344, "step": 14858 }, { "epoch": 0.652491300481904, "grad_norm": 1.625, "learning_rate": 1.3549415017437512e-05, "loss": 0.3208, "step": 14860 }, { "epoch": 0.6525791189611074, "grad_norm": 1.625, "learning_rate": 1.3543265515856191e-05, "loss": 0.3234, "step": 14862 }, { "epoch": 0.6526669374403109, "grad_norm": 1.671875, "learning_rate": 1.3537116891634338e-05, "loss": 0.3535, "step": 14864 }, { "epoch": 0.6527547559195144, "grad_norm": 1.6484375, "learning_rate": 1.3530969145242816e-05, "loss": 0.3412, "step": 14866 }, { "epoch": 0.6528425743987178, "grad_norm": 1.6875, "learning_rate": 1.3524822277152433e-05, "loss": 0.328, "step": 14868 }, { "epoch": 0.6529303928779213, "grad_norm": 1.578125, "learning_rate": 1.3518676287833904e-05, "loss": 0.3258, "step": 14870 }, { "epoch": 0.6530182113571248, "grad_norm": 1.546875, "learning_rate": 1.3512531177757904e-05, "loss": 0.3029, "step": 14872 }, { "epoch": 0.6531060298363283, "grad_norm": 1.59375, "learning_rate": 1.3506386947395022e-05, "loss": 0.3254, "step": 14874 }, { "epoch": 0.6531938483155318, "grad_norm": 1.6171875, "learning_rate": 1.3500243597215773e-05, "loss": 0.3469, "step": 14876 }, { "epoch": 0.6532816667947353, "grad_norm": 1.6640625, "learning_rate": 1.3494101127690633e-05, "loss": 0.3451, "step": 14878 }, { "epoch": 0.6533694852739388, "grad_norm": 1.6640625, "learning_rate": 1.348795953928999e-05, "loss": 0.3097, "step": 14880 }, { "epoch": 0.6534573037531423, "grad_norm": 1.6328125, "learning_rate": 1.3481818832484163e-05, "loss": 0.3298, "step": 14882 }, { "epoch": 0.6535451222323457, "grad_norm": 1.5859375, "learning_rate": 1.3475679007743402e-05, "loss": 0.3281, "step": 14884 }, { "epoch": 0.6536329407115492, "grad_norm": 1.546875, "learning_rate": 1.3469540065537917e-05, "loss": 0.3299, "step": 14886 }, { "epoch": 0.6537207591907527, "grad_norm": 1.6484375, "learning_rate": 1.3463402006337817e-05, "loss": 0.3511, "step": 14888 }, { "epoch": 0.6538085776699561, "grad_norm": 1.5859375, "learning_rate": 1.3457264830613141e-05, "loss": 0.3132, "step": 14890 }, { "epoch": 0.6538963961491597, "grad_norm": 1.6640625, "learning_rate": 1.3451128538833906e-05, "loss": 0.3346, "step": 14892 }, { "epoch": 0.6539842146283632, "grad_norm": 1.609375, "learning_rate": 1.3444993131470006e-05, "loss": 0.3232, "step": 14894 }, { "epoch": 0.6540720331075667, "grad_norm": 1.609375, "learning_rate": 1.3438858608991315e-05, "loss": 0.3156, "step": 14896 }, { "epoch": 0.6541598515867701, "grad_norm": 1.7265625, "learning_rate": 1.3432724971867599e-05, "loss": 0.3412, "step": 14898 }, { "epoch": 0.6542476700659736, "grad_norm": 1.7578125, "learning_rate": 1.3426592220568568e-05, "loss": 0.3014, "step": 14900 }, { "epoch": 0.6543354885451771, "grad_norm": 1.65625, "learning_rate": 1.3420460355563891e-05, "loss": 0.3055, "step": 14902 }, { "epoch": 0.6544233070243806, "grad_norm": 1.515625, "learning_rate": 1.3414329377323126e-05, "loss": 0.3393, "step": 14904 }, { "epoch": 0.6545111255035841, "grad_norm": 1.78125, "learning_rate": 1.3408199286315803e-05, "loss": 0.344, "step": 14906 }, { "epoch": 0.6545989439827876, "grad_norm": 1.7421875, "learning_rate": 1.3402070083011348e-05, "loss": 0.3015, "step": 14908 }, { "epoch": 0.6546867624619911, "grad_norm": 1.6328125, "learning_rate": 1.3395941767879164e-05, "loss": 0.3608, "step": 14910 }, { "epoch": 0.6547745809411946, "grad_norm": 1.5234375, "learning_rate": 1.3389814341388523e-05, "loss": 0.3229, "step": 14912 }, { "epoch": 0.654862399420398, "grad_norm": 1.6796875, "learning_rate": 1.3383687804008693e-05, "loss": 0.3233, "step": 14914 }, { "epoch": 0.6549502178996015, "grad_norm": 1.7265625, "learning_rate": 1.3377562156208833e-05, "loss": 0.3179, "step": 14916 }, { "epoch": 0.655038036378805, "grad_norm": 1.5625, "learning_rate": 1.3371437398458042e-05, "loss": 0.3171, "step": 14918 }, { "epoch": 0.6551258548580084, "grad_norm": 1.5859375, "learning_rate": 1.3365313531225374e-05, "loss": 0.3321, "step": 14920 }, { "epoch": 0.655213673337212, "grad_norm": 1.59375, "learning_rate": 1.3359190554979772e-05, "loss": 0.3269, "step": 14922 }, { "epoch": 0.6553014918164155, "grad_norm": 1.5234375, "learning_rate": 1.3353068470190161e-05, "loss": 0.3241, "step": 14924 }, { "epoch": 0.655389310295619, "grad_norm": 1.59375, "learning_rate": 1.3346947277325356e-05, "loss": 0.2946, "step": 14926 }, { "epoch": 0.6554771287748224, "grad_norm": 1.703125, "learning_rate": 1.334082697685411e-05, "loss": 0.3364, "step": 14928 }, { "epoch": 0.6555649472540259, "grad_norm": 1.6640625, "learning_rate": 1.3334707569245142e-05, "loss": 0.35, "step": 14930 }, { "epoch": 0.6556527657332294, "grad_norm": 1.640625, "learning_rate": 1.3328589054967056e-05, "loss": 0.3134, "step": 14932 }, { "epoch": 0.6557405842124329, "grad_norm": 1.6171875, "learning_rate": 1.3322471434488424e-05, "loss": 0.3198, "step": 14934 }, { "epoch": 0.6558284026916363, "grad_norm": 1.6171875, "learning_rate": 1.3316354708277728e-05, "loss": 0.3519, "step": 14936 }, { "epoch": 0.6559162211708399, "grad_norm": 1.765625, "learning_rate": 1.3310238876803383e-05, "loss": 0.3331, "step": 14938 }, { "epoch": 0.6560040396500434, "grad_norm": 1.6484375, "learning_rate": 1.3304123940533767e-05, "loss": 0.339, "step": 14940 }, { "epoch": 0.6560918581292469, "grad_norm": 1.625, "learning_rate": 1.3298009899937123e-05, "loss": 0.3369, "step": 14942 }, { "epoch": 0.6561796766084503, "grad_norm": 1.8046875, "learning_rate": 1.3291896755481694e-05, "loss": 0.3124, "step": 14944 }, { "epoch": 0.6562674950876538, "grad_norm": 1.6875, "learning_rate": 1.3285784507635609e-05, "loss": 0.3247, "step": 14946 }, { "epoch": 0.6563553135668573, "grad_norm": 1.6953125, "learning_rate": 1.3279673156866967e-05, "loss": 0.3425, "step": 14948 }, { "epoch": 0.6564431320460608, "grad_norm": 1.6015625, "learning_rate": 1.327356270364376e-05, "loss": 0.3239, "step": 14950 }, { "epoch": 0.6565309505252643, "grad_norm": 1.7734375, "learning_rate": 1.3267453148433926e-05, "loss": 0.322, "step": 14952 }, { "epoch": 0.6566187690044678, "grad_norm": 1.5703125, "learning_rate": 1.326134449170535e-05, "loss": 0.3282, "step": 14954 }, { "epoch": 0.6567065874836713, "grad_norm": 1.734375, "learning_rate": 1.3255236733925819e-05, "loss": 0.3349, "step": 14956 }, { "epoch": 0.6567944059628747, "grad_norm": 1.75, "learning_rate": 1.3249129875563083e-05, "loss": 0.3605, "step": 14958 }, { "epoch": 0.6568822244420782, "grad_norm": 1.484375, "learning_rate": 1.3243023917084796e-05, "loss": 0.3174, "step": 14960 }, { "epoch": 0.6569700429212817, "grad_norm": 1.6640625, "learning_rate": 1.3236918858958547e-05, "loss": 0.3258, "step": 14962 }, { "epoch": 0.6570578614004852, "grad_norm": 1.578125, "learning_rate": 1.3230814701651884e-05, "loss": 0.2999, "step": 14964 }, { "epoch": 0.6571456798796886, "grad_norm": 1.734375, "learning_rate": 1.3224711445632237e-05, "loss": 0.2968, "step": 14966 }, { "epoch": 0.6572334983588922, "grad_norm": 1.65625, "learning_rate": 1.3218609091367024e-05, "loss": 0.3169, "step": 14968 }, { "epoch": 0.6573213168380957, "grad_norm": 1.625, "learning_rate": 1.321250763932354e-05, "loss": 0.3351, "step": 14970 }, { "epoch": 0.6574091353172992, "grad_norm": 1.5625, "learning_rate": 1.320640708996907e-05, "loss": 0.3572, "step": 14972 }, { "epoch": 0.6574969537965026, "grad_norm": 1.609375, "learning_rate": 1.3200307443770748e-05, "loss": 0.3086, "step": 14974 }, { "epoch": 0.6575847722757061, "grad_norm": 1.5390625, "learning_rate": 1.319420870119572e-05, "loss": 0.3494, "step": 14976 }, { "epoch": 0.6576725907549096, "grad_norm": 1.609375, "learning_rate": 1.3188110862711023e-05, "loss": 0.3088, "step": 14978 }, { "epoch": 0.657760409234113, "grad_norm": 1.5234375, "learning_rate": 1.3182013928783618e-05, "loss": 0.3322, "step": 14980 }, { "epoch": 0.6578482277133165, "grad_norm": 1.6796875, "learning_rate": 1.3175917899880427e-05, "loss": 0.3124, "step": 14982 }, { "epoch": 0.6579360461925201, "grad_norm": 1.609375, "learning_rate": 1.3169822776468268e-05, "loss": 0.3098, "step": 14984 }, { "epoch": 0.6580238646717236, "grad_norm": 1.5, "learning_rate": 1.3163728559013928e-05, "loss": 0.2701, "step": 14986 }, { "epoch": 0.658111683150927, "grad_norm": 1.625, "learning_rate": 1.3157635247984091e-05, "loss": 0.3256, "step": 14988 }, { "epoch": 0.6581995016301305, "grad_norm": 1.53125, "learning_rate": 1.3151542843845377e-05, "loss": 0.3451, "step": 14990 }, { "epoch": 0.658287320109334, "grad_norm": 1.59375, "learning_rate": 1.3145451347064358e-05, "loss": 0.3386, "step": 14992 }, { "epoch": 0.6583751385885375, "grad_norm": 1.7109375, "learning_rate": 1.313936075810751e-05, "loss": 0.3218, "step": 14994 }, { "epoch": 0.6584629570677409, "grad_norm": 1.5078125, "learning_rate": 1.313327107744127e-05, "loss": 0.3355, "step": 14996 }, { "epoch": 0.6585507755469444, "grad_norm": 1.703125, "learning_rate": 1.3127182305531971e-05, "loss": 0.3392, "step": 14998 }, { "epoch": 0.658638594026148, "grad_norm": 1.609375, "learning_rate": 1.3121094442845893e-05, "loss": 0.3311, "step": 15000 }, { "epoch": 0.6587264125053515, "grad_norm": 1.640625, "learning_rate": 1.3115007489849265e-05, "loss": 0.3168, "step": 15002 }, { "epoch": 0.6588142309845549, "grad_norm": 1.640625, "learning_rate": 1.3108921447008194e-05, "loss": 0.3516, "step": 15004 }, { "epoch": 0.6589020494637584, "grad_norm": 1.7109375, "learning_rate": 1.310283631478878e-05, "loss": 0.3232, "step": 15006 }, { "epoch": 0.6589898679429619, "grad_norm": 1.671875, "learning_rate": 1.3096752093657002e-05, "loss": 0.3072, "step": 15008 }, { "epoch": 0.6590776864221654, "grad_norm": 1.6015625, "learning_rate": 1.3090668784078813e-05, "loss": 0.3238, "step": 15010 }, { "epoch": 0.6591655049013688, "grad_norm": 1.640625, "learning_rate": 1.3084586386520062e-05, "loss": 0.3492, "step": 15012 }, { "epoch": 0.6592533233805724, "grad_norm": 1.671875, "learning_rate": 1.3078504901446533e-05, "loss": 0.3287, "step": 15014 }, { "epoch": 0.6593411418597759, "grad_norm": 1.6796875, "learning_rate": 1.3072424329323968e-05, "loss": 0.3733, "step": 15016 }, { "epoch": 0.6594289603389794, "grad_norm": 1.53125, "learning_rate": 1.3066344670617991e-05, "loss": 0.3267, "step": 15018 }, { "epoch": 0.6595167788181828, "grad_norm": 1.6875, "learning_rate": 1.3060265925794218e-05, "loss": 0.326, "step": 15020 }, { "epoch": 0.6596045972973863, "grad_norm": 1.5859375, "learning_rate": 1.3054188095318137e-05, "loss": 0.2889, "step": 15022 }, { "epoch": 0.6596924157765898, "grad_norm": 1.5, "learning_rate": 1.3048111179655186e-05, "loss": 0.2946, "step": 15024 }, { "epoch": 0.6597802342557932, "grad_norm": 1.6171875, "learning_rate": 1.3042035179270756e-05, "loss": 0.3259, "step": 15026 }, { "epoch": 0.6598680527349967, "grad_norm": 1.5703125, "learning_rate": 1.3035960094630132e-05, "loss": 0.3238, "step": 15028 }, { "epoch": 0.6599558712142003, "grad_norm": 1.7265625, "learning_rate": 1.302988592619856e-05, "loss": 0.3381, "step": 15030 }, { "epoch": 0.6600436896934038, "grad_norm": 1.7109375, "learning_rate": 1.3023812674441189e-05, "loss": 0.3074, "step": 15032 }, { "epoch": 0.6601315081726072, "grad_norm": 1.5, "learning_rate": 1.301774033982312e-05, "loss": 0.3234, "step": 15034 }, { "epoch": 0.6602193266518107, "grad_norm": 1.5703125, "learning_rate": 1.3011668922809355e-05, "loss": 0.3027, "step": 15036 }, { "epoch": 0.6603071451310142, "grad_norm": 1.6875, "learning_rate": 1.3005598423864868e-05, "loss": 0.3438, "step": 15038 }, { "epoch": 0.6603949636102177, "grad_norm": 1.6328125, "learning_rate": 1.2999528843454528e-05, "loss": 0.3359, "step": 15040 }, { "epoch": 0.6604827820894211, "grad_norm": 1.609375, "learning_rate": 1.2993460182043138e-05, "loss": 0.3283, "step": 15042 }, { "epoch": 0.6605706005686246, "grad_norm": 1.65625, "learning_rate": 1.2987392440095455e-05, "loss": 0.3231, "step": 15044 }, { "epoch": 0.6606584190478282, "grad_norm": 1.59375, "learning_rate": 1.2981325618076129e-05, "loss": 0.302, "step": 15046 }, { "epoch": 0.6607462375270317, "grad_norm": 1.625, "learning_rate": 1.2975259716449778e-05, "loss": 0.3357, "step": 15048 }, { "epoch": 0.6608340560062351, "grad_norm": 1.6015625, "learning_rate": 1.2969194735680917e-05, "loss": 0.3215, "step": 15050 }, { "epoch": 0.6609218744854386, "grad_norm": 1.5546875, "learning_rate": 1.2963130676234003e-05, "loss": 0.3265, "step": 15052 }, { "epoch": 0.6610096929646421, "grad_norm": 1.75, "learning_rate": 1.2957067538573434e-05, "loss": 0.3357, "step": 15054 }, { "epoch": 0.6610975114438455, "grad_norm": 1.5546875, "learning_rate": 1.2951005323163509e-05, "loss": 0.3197, "step": 15056 }, { "epoch": 0.661185329923049, "grad_norm": 1.4765625, "learning_rate": 1.2944944030468498e-05, "loss": 0.3311, "step": 15058 }, { "epoch": 0.6612731484022526, "grad_norm": 1.609375, "learning_rate": 1.2938883660952558e-05, "loss": 0.3386, "step": 15060 }, { "epoch": 0.6613609668814561, "grad_norm": 1.6484375, "learning_rate": 1.2932824215079792e-05, "loss": 0.2935, "step": 15062 }, { "epoch": 0.6614487853606595, "grad_norm": 1.6015625, "learning_rate": 1.2926765693314257e-05, "loss": 0.324, "step": 15064 }, { "epoch": 0.661536603839863, "grad_norm": 1.6796875, "learning_rate": 1.2920708096119883e-05, "loss": 0.3579, "step": 15066 }, { "epoch": 0.6616244223190665, "grad_norm": 1.6015625, "learning_rate": 1.291465142396059e-05, "loss": 0.3081, "step": 15068 }, { "epoch": 0.66171224079827, "grad_norm": 1.546875, "learning_rate": 1.2908595677300172e-05, "loss": 0.3196, "step": 15070 }, { "epoch": 0.6618000592774734, "grad_norm": 1.625, "learning_rate": 1.2902540856602414e-05, "loss": 0.319, "step": 15072 }, { "epoch": 0.6618878777566769, "grad_norm": 1.6953125, "learning_rate": 1.289648696233097e-05, "loss": 0.3505, "step": 15074 }, { "epoch": 0.6619756962358805, "grad_norm": 1.6015625, "learning_rate": 1.2890433994949447e-05, "loss": 0.3762, "step": 15076 }, { "epoch": 0.662063514715084, "grad_norm": 1.6171875, "learning_rate": 1.2884381954921404e-05, "loss": 0.3002, "step": 15078 }, { "epoch": 0.6621513331942874, "grad_norm": 1.546875, "learning_rate": 1.2878330842710284e-05, "loss": 0.317, "step": 15080 }, { "epoch": 0.6622391516734909, "grad_norm": 1.53125, "learning_rate": 1.2872280658779501e-05, "loss": 0.329, "step": 15082 }, { "epoch": 0.6623269701526944, "grad_norm": 1.546875, "learning_rate": 1.2866231403592377e-05, "loss": 0.2896, "step": 15084 }, { "epoch": 0.6624147886318978, "grad_norm": 1.5703125, "learning_rate": 1.2860183077612148e-05, "loss": 0.3293, "step": 15086 }, { "epoch": 0.6625026071111013, "grad_norm": 1.8046875, "learning_rate": 1.285413568130202e-05, "loss": 0.3361, "step": 15088 }, { "epoch": 0.6625904255903048, "grad_norm": 1.6875, "learning_rate": 1.2848089215125084e-05, "loss": 0.3096, "step": 15090 }, { "epoch": 0.6626782440695084, "grad_norm": 1.5546875, "learning_rate": 1.2842043679544397e-05, "loss": 0.3276, "step": 15092 }, { "epoch": 0.6627660625487118, "grad_norm": 1.5625, "learning_rate": 1.283599907502292e-05, "loss": 0.3178, "step": 15094 }, { "epoch": 0.6628538810279153, "grad_norm": 1.7421875, "learning_rate": 1.2829955402023549e-05, "loss": 0.3348, "step": 15096 }, { "epoch": 0.6629416995071188, "grad_norm": 1.515625, "learning_rate": 1.2823912661009102e-05, "loss": 0.2979, "step": 15098 }, { "epoch": 0.6630295179863223, "grad_norm": 1.59375, "learning_rate": 1.2817870852442355e-05, "loss": 0.3386, "step": 15100 }, { "epoch": 0.6631173364655257, "grad_norm": 1.625, "learning_rate": 1.2811829976785971e-05, "loss": 0.314, "step": 15102 }, { "epoch": 0.6632051549447292, "grad_norm": 1.59375, "learning_rate": 1.2805790034502565e-05, "loss": 0.3095, "step": 15104 }, { "epoch": 0.6632929734239328, "grad_norm": 1.578125, "learning_rate": 1.2799751026054691e-05, "loss": 0.3297, "step": 15106 }, { "epoch": 0.6633807919031363, "grad_norm": 1.4921875, "learning_rate": 1.2793712951904796e-05, "loss": 0.3125, "step": 15108 }, { "epoch": 0.6634686103823397, "grad_norm": 1.546875, "learning_rate": 1.2787675812515299e-05, "loss": 0.3212, "step": 15110 }, { "epoch": 0.6635564288615432, "grad_norm": 1.546875, "learning_rate": 1.2781639608348517e-05, "loss": 0.3171, "step": 15112 }, { "epoch": 0.6636442473407467, "grad_norm": 1.6953125, "learning_rate": 1.2775604339866692e-05, "loss": 0.3389, "step": 15114 }, { "epoch": 0.6637320658199501, "grad_norm": 1.6484375, "learning_rate": 1.2769570007532027e-05, "loss": 0.3024, "step": 15116 }, { "epoch": 0.6638198842991536, "grad_norm": 1.6796875, "learning_rate": 1.2763536611806615e-05, "loss": 0.3248, "step": 15118 }, { "epoch": 0.6639077027783571, "grad_norm": 1.8203125, "learning_rate": 1.275750415315251e-05, "loss": 0.3536, "step": 15120 }, { "epoch": 0.6639955212575607, "grad_norm": 1.6171875, "learning_rate": 1.2751472632031672e-05, "loss": 0.3393, "step": 15122 }, { "epoch": 0.6640833397367641, "grad_norm": 1.4765625, "learning_rate": 1.2745442048905998e-05, "loss": 0.3293, "step": 15124 }, { "epoch": 0.6641711582159676, "grad_norm": 1.796875, "learning_rate": 1.2739412404237306e-05, "loss": 0.3239, "step": 15126 }, { "epoch": 0.6642589766951711, "grad_norm": 1.59375, "learning_rate": 1.2733383698487344e-05, "loss": 0.3344, "step": 15128 }, { "epoch": 0.6643467951743746, "grad_norm": 1.6484375, "learning_rate": 1.2727355932117806e-05, "loss": 0.3109, "step": 15130 }, { "epoch": 0.664434613653578, "grad_norm": 1.6484375, "learning_rate": 1.2721329105590284e-05, "loss": 0.3262, "step": 15132 }, { "epoch": 0.6645224321327815, "grad_norm": 1.75, "learning_rate": 1.2715303219366337e-05, "loss": 0.3469, "step": 15134 }, { "epoch": 0.664610250611985, "grad_norm": 1.6484375, "learning_rate": 1.2709278273907408e-05, "loss": 0.3071, "step": 15136 }, { "epoch": 0.6646980690911886, "grad_norm": 1.6875, "learning_rate": 1.2703254269674885e-05, "loss": 0.3076, "step": 15138 }, { "epoch": 0.664785887570392, "grad_norm": 1.6953125, "learning_rate": 1.269723120713011e-05, "loss": 0.3035, "step": 15140 }, { "epoch": 0.6648737060495955, "grad_norm": 1.7890625, "learning_rate": 1.2691209086734313e-05, "loss": 0.3316, "step": 15142 }, { "epoch": 0.664961524528799, "grad_norm": 1.6875, "learning_rate": 1.2685187908948678e-05, "loss": 0.3274, "step": 15144 }, { "epoch": 0.6650493430080024, "grad_norm": 1.6875, "learning_rate": 1.2679167674234308e-05, "loss": 0.305, "step": 15146 }, { "epoch": 0.6651371614872059, "grad_norm": 1.640625, "learning_rate": 1.267314838305222e-05, "loss": 0.3264, "step": 15148 }, { "epoch": 0.6652249799664094, "grad_norm": 1.5625, "learning_rate": 1.2667130035863395e-05, "loss": 0.3286, "step": 15150 }, { "epoch": 0.6653127984456129, "grad_norm": 1.59375, "learning_rate": 1.2661112633128696e-05, "loss": 0.3141, "step": 15152 }, { "epoch": 0.6654006169248164, "grad_norm": 1.5625, "learning_rate": 1.2655096175308962e-05, "loss": 0.3165, "step": 15154 }, { "epoch": 0.6654884354040199, "grad_norm": 1.5859375, "learning_rate": 1.264908066286492e-05, "loss": 0.3351, "step": 15156 }, { "epoch": 0.6655762538832234, "grad_norm": 1.59375, "learning_rate": 1.2643066096257244e-05, "loss": 0.3032, "step": 15158 }, { "epoch": 0.6656640723624269, "grad_norm": 1.6015625, "learning_rate": 1.2637052475946526e-05, "loss": 0.303, "step": 15160 }, { "epoch": 0.6657518908416303, "grad_norm": 1.546875, "learning_rate": 1.2631039802393286e-05, "loss": 0.3241, "step": 15162 }, { "epoch": 0.6658397093208338, "grad_norm": 1.6171875, "learning_rate": 1.2625028076057987e-05, "loss": 0.3363, "step": 15164 }, { "epoch": 0.6659275278000373, "grad_norm": 1.53125, "learning_rate": 1.2619017297400998e-05, "loss": 0.3022, "step": 15166 }, { "epoch": 0.6660153462792409, "grad_norm": 1.53125, "learning_rate": 1.2613007466882643e-05, "loss": 0.3297, "step": 15168 }, { "epoch": 0.6661031647584443, "grad_norm": 1.5234375, "learning_rate": 1.2606998584963136e-05, "loss": 0.3167, "step": 15170 }, { "epoch": 0.6661909832376478, "grad_norm": 1.75, "learning_rate": 1.2600990652102656e-05, "loss": 0.3309, "step": 15172 }, { "epoch": 0.6662788017168513, "grad_norm": 1.7421875, "learning_rate": 1.2594983668761286e-05, "loss": 0.3045, "step": 15174 }, { "epoch": 0.6663666201960547, "grad_norm": 1.6484375, "learning_rate": 1.2588977635399029e-05, "loss": 0.3155, "step": 15176 }, { "epoch": 0.6664544386752582, "grad_norm": 1.65625, "learning_rate": 1.2582972552475852e-05, "loss": 0.3326, "step": 15178 }, { "epoch": 0.6665422571544617, "grad_norm": 1.65625, "learning_rate": 1.2576968420451601e-05, "loss": 0.3442, "step": 15180 }, { "epoch": 0.6666300756336652, "grad_norm": 1.7578125, "learning_rate": 1.2570965239786098e-05, "loss": 0.313, "step": 15182 }, { "epoch": 0.6667178941128687, "grad_norm": 1.75, "learning_rate": 1.2564963010939057e-05, "loss": 0.3379, "step": 15184 }, { "epoch": 0.6668057125920722, "grad_norm": 1.6015625, "learning_rate": 1.2558961734370128e-05, "loss": 0.3332, "step": 15186 }, { "epoch": 0.6668935310712757, "grad_norm": 1.7890625, "learning_rate": 1.2552961410538894e-05, "loss": 0.3414, "step": 15188 }, { "epoch": 0.6669813495504792, "grad_norm": 1.5625, "learning_rate": 1.2546962039904847e-05, "loss": 0.334, "step": 15190 }, { "epoch": 0.6670691680296826, "grad_norm": 1.5859375, "learning_rate": 1.254096362292744e-05, "loss": 0.3496, "step": 15192 }, { "epoch": 0.6671569865088861, "grad_norm": 1.4921875, "learning_rate": 1.253496616006602e-05, "loss": 0.3553, "step": 15194 }, { "epoch": 0.6672448049880896, "grad_norm": 1.6796875, "learning_rate": 1.2528969651779888e-05, "loss": 0.3289, "step": 15196 }, { "epoch": 0.667332623467293, "grad_norm": 1.5546875, "learning_rate": 1.2522974098528245e-05, "loss": 0.2986, "step": 15198 }, { "epoch": 0.6674204419464966, "grad_norm": 1.5625, "learning_rate": 1.2516979500770232e-05, "loss": 0.3219, "step": 15200 }, { "epoch": 0.6675082604257001, "grad_norm": 1.6015625, "learning_rate": 1.251098585896493e-05, "loss": 0.3009, "step": 15202 }, { "epoch": 0.6675960789049036, "grad_norm": 1.5234375, "learning_rate": 1.250499317357131e-05, "loss": 0.3442, "step": 15204 }, { "epoch": 0.667683897384107, "grad_norm": 1.640625, "learning_rate": 1.2499001445048325e-05, "loss": 0.3178, "step": 15206 }, { "epoch": 0.6677717158633105, "grad_norm": 1.7265625, "learning_rate": 1.2493010673854803e-05, "loss": 0.3308, "step": 15208 }, { "epoch": 0.667859534342514, "grad_norm": 1.421875, "learning_rate": 1.2487020860449511e-05, "loss": 0.3033, "step": 15210 }, { "epoch": 0.6679473528217175, "grad_norm": 1.5546875, "learning_rate": 1.2481032005291173e-05, "loss": 0.3283, "step": 15212 }, { "epoch": 0.668035171300921, "grad_norm": 1.5234375, "learning_rate": 1.2475044108838393e-05, "loss": 0.2983, "step": 15214 }, { "epoch": 0.6681229897801245, "grad_norm": 1.6640625, "learning_rate": 1.2469057171549759e-05, "loss": 0.3213, "step": 15216 }, { "epoch": 0.668210808259328, "grad_norm": 1.671875, "learning_rate": 1.246307119388371e-05, "loss": 0.3042, "step": 15218 }, { "epoch": 0.6682986267385315, "grad_norm": 1.5625, "learning_rate": 1.2457086176298685e-05, "loss": 0.3172, "step": 15220 }, { "epoch": 0.6683864452177349, "grad_norm": 1.6171875, "learning_rate": 1.2451102119253009e-05, "loss": 0.3245, "step": 15222 }, { "epoch": 0.6684742636969384, "grad_norm": 1.6171875, "learning_rate": 1.2445119023204926e-05, "loss": 0.32, "step": 15224 }, { "epoch": 0.6685620821761419, "grad_norm": 1.7734375, "learning_rate": 1.2439136888612652e-05, "loss": 0.3245, "step": 15226 }, { "epoch": 0.6686499006553454, "grad_norm": 1.625, "learning_rate": 1.2433155715934275e-05, "loss": 0.3037, "step": 15228 }, { "epoch": 0.6687377191345489, "grad_norm": 1.5078125, "learning_rate": 1.2427175505627856e-05, "loss": 0.3555, "step": 15230 }, { "epoch": 0.6688255376137524, "grad_norm": 1.53125, "learning_rate": 1.2421196258151337e-05, "loss": 0.3333, "step": 15232 }, { "epoch": 0.6689133560929559, "grad_norm": 1.6640625, "learning_rate": 1.241521797396264e-05, "loss": 0.3125, "step": 15234 }, { "epoch": 0.6690011745721594, "grad_norm": 1.6953125, "learning_rate": 1.2409240653519564e-05, "loss": 0.3263, "step": 15236 }, { "epoch": 0.6690889930513628, "grad_norm": 1.5625, "learning_rate": 1.2403264297279849e-05, "loss": 0.3191, "step": 15238 }, { "epoch": 0.6691768115305663, "grad_norm": 1.640625, "learning_rate": 1.239728890570118e-05, "loss": 0.3162, "step": 15240 }, { "epoch": 0.6692646300097698, "grad_norm": 1.640625, "learning_rate": 1.239131447924114e-05, "loss": 0.3191, "step": 15242 }, { "epoch": 0.6693524484889732, "grad_norm": 1.5703125, "learning_rate": 1.238534101835727e-05, "loss": 0.3004, "step": 15244 }, { "epoch": 0.6694402669681768, "grad_norm": 1.859375, "learning_rate": 1.2379368523507007e-05, "loss": 0.3416, "step": 15246 }, { "epoch": 0.6695280854473803, "grad_norm": 1.6640625, "learning_rate": 1.2373396995147729e-05, "loss": 0.3377, "step": 15248 }, { "epoch": 0.6696159039265838, "grad_norm": 1.5703125, "learning_rate": 1.2367426433736737e-05, "loss": 0.3255, "step": 15250 }, { "epoch": 0.6697037224057872, "grad_norm": 1.7890625, "learning_rate": 1.2361456839731245e-05, "loss": 0.3023, "step": 15252 }, { "epoch": 0.6697915408849907, "grad_norm": 1.75, "learning_rate": 1.2355488213588429e-05, "loss": 0.3236, "step": 15254 }, { "epoch": 0.6698793593641942, "grad_norm": 1.65625, "learning_rate": 1.2349520555765348e-05, "loss": 0.3063, "step": 15256 }, { "epoch": 0.6699671778433977, "grad_norm": 1.6953125, "learning_rate": 1.2343553866719024e-05, "loss": 0.3225, "step": 15258 }, { "epoch": 0.6700549963226012, "grad_norm": 1.6328125, "learning_rate": 1.2337588146906378e-05, "loss": 0.2899, "step": 15260 }, { "epoch": 0.6701428148018047, "grad_norm": 1.5703125, "learning_rate": 1.2331623396784258e-05, "loss": 0.3267, "step": 15262 }, { "epoch": 0.6702306332810082, "grad_norm": 1.5546875, "learning_rate": 1.2325659616809466e-05, "loss": 0.299, "step": 15264 }, { "epoch": 0.6703184517602117, "grad_norm": 1.609375, "learning_rate": 1.2319696807438686e-05, "loss": 0.3019, "step": 15266 }, { "epoch": 0.6704062702394151, "grad_norm": 1.5703125, "learning_rate": 1.2313734969128576e-05, "loss": 0.2836, "step": 15268 }, { "epoch": 0.6704940887186186, "grad_norm": 1.5703125, "learning_rate": 1.2307774102335685e-05, "loss": 0.3468, "step": 15270 }, { "epoch": 0.6705819071978221, "grad_norm": 1.5390625, "learning_rate": 1.2301814207516482e-05, "loss": 0.3267, "step": 15272 }, { "epoch": 0.6706697256770255, "grad_norm": 1.6171875, "learning_rate": 1.2295855285127403e-05, "loss": 0.3108, "step": 15274 }, { "epoch": 0.6707575441562291, "grad_norm": 1.7109375, "learning_rate": 1.2289897335624761e-05, "loss": 0.3076, "step": 15276 }, { "epoch": 0.6708453626354326, "grad_norm": 1.578125, "learning_rate": 1.2283940359464849e-05, "loss": 0.3156, "step": 15278 }, { "epoch": 0.6709331811146361, "grad_norm": 1.625, "learning_rate": 1.2277984357103811e-05, "loss": 0.3288, "step": 15280 }, { "epoch": 0.6710209995938395, "grad_norm": 1.640625, "learning_rate": 1.2272029328997791e-05, "loss": 0.3448, "step": 15282 }, { "epoch": 0.671108818073043, "grad_norm": 1.6640625, "learning_rate": 1.2266075275602818e-05, "loss": 0.3192, "step": 15284 }, { "epoch": 0.6711966365522465, "grad_norm": 1.5625, "learning_rate": 1.226012219737484e-05, "loss": 0.3315, "step": 15286 }, { "epoch": 0.67128445503145, "grad_norm": 1.5703125, "learning_rate": 1.2254170094769771e-05, "loss": 0.3169, "step": 15288 }, { "epoch": 0.6713722735106534, "grad_norm": 1.640625, "learning_rate": 1.22482189682434e-05, "loss": 0.3263, "step": 15290 }, { "epoch": 0.671460091989857, "grad_norm": 1.53125, "learning_rate": 1.2242268818251486e-05, "loss": 0.3187, "step": 15292 }, { "epoch": 0.6715479104690605, "grad_norm": 1.84375, "learning_rate": 1.2236319645249677e-05, "loss": 0.3476, "step": 15294 }, { "epoch": 0.671635728948264, "grad_norm": 1.6484375, "learning_rate": 1.2230371449693578e-05, "loss": 0.3365, "step": 15296 }, { "epoch": 0.6717235474274674, "grad_norm": 1.5859375, "learning_rate": 1.2224424232038692e-05, "loss": 0.299, "step": 15298 }, { "epoch": 0.6718113659066709, "grad_norm": 1.625, "learning_rate": 1.2218477992740455e-05, "loss": 0.3148, "step": 15300 }, { "epoch": 0.6718991843858744, "grad_norm": 1.6015625, "learning_rate": 1.2212532732254245e-05, "loss": 0.3289, "step": 15302 }, { "epoch": 0.6719870028650778, "grad_norm": 1.7265625, "learning_rate": 1.2206588451035334e-05, "loss": 0.3381, "step": 15304 }, { "epoch": 0.6720748213442814, "grad_norm": 1.546875, "learning_rate": 1.2200645149538955e-05, "loss": 0.3261, "step": 15306 }, { "epoch": 0.6721626398234849, "grad_norm": 1.8125, "learning_rate": 1.2194702828220241e-05, "loss": 0.3341, "step": 15308 }, { "epoch": 0.6722504583026884, "grad_norm": 1.5546875, "learning_rate": 1.2188761487534254e-05, "loss": 0.3285, "step": 15310 }, { "epoch": 0.6723382767818918, "grad_norm": 1.71875, "learning_rate": 1.218282112793598e-05, "loss": 0.3094, "step": 15312 }, { "epoch": 0.6724260952610953, "grad_norm": 1.5625, "learning_rate": 1.2176881749880328e-05, "loss": 0.3299, "step": 15314 }, { "epoch": 0.6725139137402988, "grad_norm": 1.5390625, "learning_rate": 1.2170943353822155e-05, "loss": 0.3249, "step": 15316 }, { "epoch": 0.6726017322195023, "grad_norm": 1.625, "learning_rate": 1.2165005940216209e-05, "loss": 0.3287, "step": 15318 }, { "epoch": 0.6726895506987057, "grad_norm": 1.6875, "learning_rate": 1.2159069509517193e-05, "loss": 0.3378, "step": 15320 }, { "epoch": 0.6727773691779093, "grad_norm": 1.515625, "learning_rate": 1.2153134062179711e-05, "loss": 0.3203, "step": 15322 }, { "epoch": 0.6728651876571128, "grad_norm": 1.4921875, "learning_rate": 1.2147199598658293e-05, "loss": 0.3337, "step": 15324 }, { "epoch": 0.6729530061363163, "grad_norm": 1.609375, "learning_rate": 1.2141266119407421e-05, "loss": 0.3331, "step": 15326 }, { "epoch": 0.6730408246155197, "grad_norm": 1.65625, "learning_rate": 1.2135333624881463e-05, "loss": 0.3097, "step": 15328 }, { "epoch": 0.6731286430947232, "grad_norm": 1.6484375, "learning_rate": 1.2129402115534747e-05, "loss": 0.3504, "step": 15330 }, { "epoch": 0.6732164615739267, "grad_norm": 1.75, "learning_rate": 1.2123471591821503e-05, "loss": 0.337, "step": 15332 }, { "epoch": 0.6733042800531301, "grad_norm": 1.6796875, "learning_rate": 1.2117542054195882e-05, "loss": 0.2912, "step": 15334 }, { "epoch": 0.6733920985323336, "grad_norm": 1.578125, "learning_rate": 1.2111613503111987e-05, "loss": 0.3178, "step": 15336 }, { "epoch": 0.6734799170115372, "grad_norm": 1.6796875, "learning_rate": 1.2105685939023811e-05, "loss": 0.3123, "step": 15338 }, { "epoch": 0.6735677354907407, "grad_norm": 1.5546875, "learning_rate": 1.2099759362385318e-05, "loss": 0.3066, "step": 15340 }, { "epoch": 0.6736555539699441, "grad_norm": 1.6328125, "learning_rate": 1.2093833773650324e-05, "loss": 0.3333, "step": 15342 }, { "epoch": 0.6737433724491476, "grad_norm": 1.5625, "learning_rate": 1.2087909173272644e-05, "loss": 0.3225, "step": 15344 }, { "epoch": 0.6738311909283511, "grad_norm": 1.59375, "learning_rate": 1.2081985561705975e-05, "loss": 0.3382, "step": 15346 }, { "epoch": 0.6739190094075546, "grad_norm": 1.6796875, "learning_rate": 1.2076062939403937e-05, "loss": 0.2952, "step": 15348 }, { "epoch": 0.674006827886758, "grad_norm": 1.6015625, "learning_rate": 1.2070141306820106e-05, "loss": 0.3388, "step": 15350 }, { "epoch": 0.6740946463659615, "grad_norm": 1.5703125, "learning_rate": 1.2064220664407946e-05, "loss": 0.3128, "step": 15352 }, { "epoch": 0.6741824648451651, "grad_norm": 1.5625, "learning_rate": 1.205830101262088e-05, "loss": 0.3194, "step": 15354 }, { "epoch": 0.6742702833243686, "grad_norm": 1.546875, "learning_rate": 1.2052382351912219e-05, "loss": 0.3389, "step": 15356 }, { "epoch": 0.674358101803572, "grad_norm": 1.5546875, "learning_rate": 1.2046464682735217e-05, "loss": 0.3324, "step": 15358 }, { "epoch": 0.6744459202827755, "grad_norm": 1.5234375, "learning_rate": 1.2040548005543062e-05, "loss": 0.327, "step": 15360 }, { "epoch": 0.674533738761979, "grad_norm": 1.5859375, "learning_rate": 1.2034632320788839e-05, "loss": 0.3616, "step": 15362 }, { "epoch": 0.6746215572411824, "grad_norm": 1.6171875, "learning_rate": 1.2028717628925587e-05, "loss": 0.3157, "step": 15364 }, { "epoch": 0.6747093757203859, "grad_norm": 1.640625, "learning_rate": 1.2022803930406242e-05, "loss": 0.3295, "step": 15366 }, { "epoch": 0.6747971941995895, "grad_norm": 1.8046875, "learning_rate": 1.2016891225683691e-05, "loss": 0.3492, "step": 15368 }, { "epoch": 0.674885012678793, "grad_norm": 1.7109375, "learning_rate": 1.2010979515210724e-05, "loss": 0.3435, "step": 15370 }, { "epoch": 0.6749728311579964, "grad_norm": 1.625, "learning_rate": 1.2005068799440059e-05, "loss": 0.3334, "step": 15372 }, { "epoch": 0.6750606496371999, "grad_norm": 1.734375, "learning_rate": 1.1999159078824337e-05, "loss": 0.3308, "step": 15374 }, { "epoch": 0.6751484681164034, "grad_norm": 1.765625, "learning_rate": 1.1993250353816124e-05, "loss": 0.3166, "step": 15376 }, { "epoch": 0.6752362865956069, "grad_norm": 1.796875, "learning_rate": 1.1987342624867926e-05, "loss": 0.3151, "step": 15378 }, { "epoch": 0.6753241050748103, "grad_norm": 2.015625, "learning_rate": 1.1981435892432139e-05, "loss": 0.3483, "step": 15380 }, { "epoch": 0.6754119235540138, "grad_norm": 1.5234375, "learning_rate": 1.1975530156961119e-05, "loss": 0.3139, "step": 15382 }, { "epoch": 0.6754997420332174, "grad_norm": 1.5, "learning_rate": 1.1969625418907123e-05, "loss": 0.3511, "step": 15384 }, { "epoch": 0.6755875605124209, "grad_norm": 1.71875, "learning_rate": 1.1963721678722328e-05, "loss": 0.332, "step": 15386 }, { "epoch": 0.6756753789916243, "grad_norm": 1.875, "learning_rate": 1.1957818936858862e-05, "loss": 0.3403, "step": 15388 }, { "epoch": 0.6757631974708278, "grad_norm": 1.5078125, "learning_rate": 1.1951917193768736e-05, "loss": 0.3234, "step": 15390 }, { "epoch": 0.6758510159500313, "grad_norm": 1.765625, "learning_rate": 1.194601644990393e-05, "loss": 0.3602, "step": 15392 }, { "epoch": 0.6759388344292347, "grad_norm": 1.609375, "learning_rate": 1.1940116705716315e-05, "loss": 0.3491, "step": 15394 }, { "epoch": 0.6760266529084382, "grad_norm": 1.640625, "learning_rate": 1.1934217961657682e-05, "loss": 0.335, "step": 15396 }, { "epoch": 0.6761144713876417, "grad_norm": 1.640625, "learning_rate": 1.1928320218179779e-05, "loss": 0.3248, "step": 15398 }, { "epoch": 0.6762022898668453, "grad_norm": 1.6953125, "learning_rate": 1.1922423475734248e-05, "loss": 0.3105, "step": 15400 }, { "epoch": 0.6762901083460487, "grad_norm": 1.6953125, "learning_rate": 1.1916527734772661e-05, "loss": 0.3222, "step": 15402 }, { "epoch": 0.6763779268252522, "grad_norm": 1.53125, "learning_rate": 1.191063299574651e-05, "loss": 0.3442, "step": 15404 }, { "epoch": 0.6764657453044557, "grad_norm": 1.703125, "learning_rate": 1.1904739259107228e-05, "loss": 0.3104, "step": 15406 }, { "epoch": 0.6765535637836592, "grad_norm": 1.5234375, "learning_rate": 1.1898846525306154e-05, "loss": 0.3163, "step": 15408 }, { "epoch": 0.6766413822628626, "grad_norm": 1.5546875, "learning_rate": 1.1892954794794545e-05, "loss": 0.3274, "step": 15410 }, { "epoch": 0.6767292007420661, "grad_norm": 1.5390625, "learning_rate": 1.1887064068023607e-05, "loss": 0.336, "step": 15412 }, { "epoch": 0.6768170192212697, "grad_norm": 1.640625, "learning_rate": 1.1881174345444437e-05, "loss": 0.3256, "step": 15414 }, { "epoch": 0.6769048377004732, "grad_norm": 1.609375, "learning_rate": 1.187528562750809e-05, "loss": 0.3031, "step": 15416 }, { "epoch": 0.6769926561796766, "grad_norm": 1.6875, "learning_rate": 1.1869397914665516e-05, "loss": 0.355, "step": 15418 }, { "epoch": 0.6770804746588801, "grad_norm": 1.5625, "learning_rate": 1.186351120736759e-05, "loss": 0.3176, "step": 15420 }, { "epoch": 0.6771682931380836, "grad_norm": 1.625, "learning_rate": 1.1857625506065135e-05, "loss": 0.3048, "step": 15422 }, { "epoch": 0.677256111617287, "grad_norm": 1.625, "learning_rate": 1.1851740811208856e-05, "loss": 0.3217, "step": 15424 }, { "epoch": 0.6773439300964905, "grad_norm": 1.75, "learning_rate": 1.1845857123249427e-05, "loss": 0.3281, "step": 15426 }, { "epoch": 0.677431748575694, "grad_norm": 1.5390625, "learning_rate": 1.1839974442637406e-05, "loss": 0.3514, "step": 15428 }, { "epoch": 0.6775195670548976, "grad_norm": 1.4921875, "learning_rate": 1.1834092769823304e-05, "loss": 0.3142, "step": 15430 }, { "epoch": 0.677607385534101, "grad_norm": 1.6484375, "learning_rate": 1.1828212105257536e-05, "loss": 0.3072, "step": 15432 }, { "epoch": 0.6776952040133045, "grad_norm": 1.6484375, "learning_rate": 1.1822332449390441e-05, "loss": 0.3315, "step": 15434 }, { "epoch": 0.677783022492508, "grad_norm": 1.515625, "learning_rate": 1.1816453802672286e-05, "loss": 0.2974, "step": 15436 }, { "epoch": 0.6778708409717115, "grad_norm": 1.609375, "learning_rate": 1.181057616555325e-05, "loss": 0.3113, "step": 15438 }, { "epoch": 0.6779586594509149, "grad_norm": 1.5625, "learning_rate": 1.1804699538483462e-05, "loss": 0.3069, "step": 15440 }, { "epoch": 0.6780464779301184, "grad_norm": 1.6171875, "learning_rate": 1.1798823921912937e-05, "loss": 0.3202, "step": 15442 }, { "epoch": 0.6781342964093219, "grad_norm": 1.671875, "learning_rate": 1.1792949316291651e-05, "loss": 0.3149, "step": 15444 }, { "epoch": 0.6782221148885255, "grad_norm": 1.578125, "learning_rate": 1.1787075722069471e-05, "loss": 0.3165, "step": 15446 }, { "epoch": 0.6783099333677289, "grad_norm": 1.6640625, "learning_rate": 1.1781203139696192e-05, "loss": 0.348, "step": 15448 }, { "epoch": 0.6783977518469324, "grad_norm": 1.5546875, "learning_rate": 1.1775331569621553e-05, "loss": 0.3292, "step": 15450 }, { "epoch": 0.6784855703261359, "grad_norm": 1.5625, "learning_rate": 1.1769461012295183e-05, "loss": 0.3128, "step": 15452 }, { "epoch": 0.6785733888053394, "grad_norm": 1.65625, "learning_rate": 1.1763591468166671e-05, "loss": 0.3219, "step": 15454 }, { "epoch": 0.6786612072845428, "grad_norm": 1.78125, "learning_rate": 1.1757722937685498e-05, "loss": 0.337, "step": 15456 }, { "epoch": 0.6787490257637463, "grad_norm": 1.6015625, "learning_rate": 1.1751855421301064e-05, "loss": 0.3249, "step": 15458 }, { "epoch": 0.6788368442429499, "grad_norm": 1.640625, "learning_rate": 1.174598891946273e-05, "loss": 0.31, "step": 15460 }, { "epoch": 0.6789246627221533, "grad_norm": 1.6640625, "learning_rate": 1.1740123432619741e-05, "loss": 0.3139, "step": 15462 }, { "epoch": 0.6790124812013568, "grad_norm": 1.5859375, "learning_rate": 1.1734258961221278e-05, "loss": 0.3365, "step": 15464 }, { "epoch": 0.6791002996805603, "grad_norm": 1.6171875, "learning_rate": 1.1728395505716433e-05, "loss": 0.3293, "step": 15466 }, { "epoch": 0.6791881181597638, "grad_norm": 1.5546875, "learning_rate": 1.1722533066554254e-05, "loss": 0.336, "step": 15468 }, { "epoch": 0.6792759366389672, "grad_norm": 1.4453125, "learning_rate": 1.1716671644183674e-05, "loss": 0.3193, "step": 15470 }, { "epoch": 0.6793637551181707, "grad_norm": 1.6640625, "learning_rate": 1.1710811239053553e-05, "loss": 0.3072, "step": 15472 }, { "epoch": 0.6794515735973742, "grad_norm": 1.59375, "learning_rate": 1.1704951851612705e-05, "loss": 0.3524, "step": 15474 }, { "epoch": 0.6795393920765778, "grad_norm": 1.5546875, "learning_rate": 1.169909348230982e-05, "loss": 0.323, "step": 15476 }, { "epoch": 0.6796272105557812, "grad_norm": 1.4921875, "learning_rate": 1.1693236131593555e-05, "loss": 0.3359, "step": 15478 }, { "epoch": 0.6797150290349847, "grad_norm": 1.5703125, "learning_rate": 1.1687379799912457e-05, "loss": 0.3165, "step": 15480 }, { "epoch": 0.6798028475141882, "grad_norm": 1.6171875, "learning_rate": 1.1681524487714995e-05, "loss": 0.3152, "step": 15482 }, { "epoch": 0.6798906659933917, "grad_norm": 1.6640625, "learning_rate": 1.167567019544959e-05, "loss": 0.3278, "step": 15484 }, { "epoch": 0.6799784844725951, "grad_norm": 1.6015625, "learning_rate": 1.1669816923564544e-05, "loss": 0.3325, "step": 15486 }, { "epoch": 0.6800663029517986, "grad_norm": 1.546875, "learning_rate": 1.1663964672508126e-05, "loss": 0.327, "step": 15488 }, { "epoch": 0.6801541214310021, "grad_norm": 1.5546875, "learning_rate": 1.1658113442728489e-05, "loss": 0.3238, "step": 15490 }, { "epoch": 0.6802419399102057, "grad_norm": 1.453125, "learning_rate": 1.1652263234673725e-05, "loss": 0.3403, "step": 15492 }, { "epoch": 0.6803297583894091, "grad_norm": 1.5625, "learning_rate": 1.164641404879183e-05, "loss": 0.321, "step": 15494 }, { "epoch": 0.6804175768686126, "grad_norm": 1.5234375, "learning_rate": 1.1640565885530758e-05, "loss": 0.3183, "step": 15496 }, { "epoch": 0.6805053953478161, "grad_norm": 1.734375, "learning_rate": 1.1634718745338353e-05, "loss": 0.3223, "step": 15498 }, { "epoch": 0.6805932138270195, "grad_norm": 1.625, "learning_rate": 1.1628872628662381e-05, "loss": 0.3443, "step": 15500 }, { "epoch": 0.680681032306223, "grad_norm": 1.546875, "learning_rate": 1.1623027535950559e-05, "loss": 0.3374, "step": 15502 }, { "epoch": 0.6807688507854265, "grad_norm": 1.5625, "learning_rate": 1.1617183467650483e-05, "loss": 0.33, "step": 15504 }, { "epoch": 0.68085666926463, "grad_norm": 1.53125, "learning_rate": 1.1611340424209715e-05, "loss": 0.3205, "step": 15506 }, { "epoch": 0.6809444877438335, "grad_norm": 1.75, "learning_rate": 1.160549840607571e-05, "loss": 0.3222, "step": 15508 }, { "epoch": 0.681032306223037, "grad_norm": 1.6875, "learning_rate": 1.1599657413695836e-05, "loss": 0.362, "step": 15510 }, { "epoch": 0.6811201247022405, "grad_norm": 1.6875, "learning_rate": 1.1593817447517419e-05, "loss": 0.3281, "step": 15512 }, { "epoch": 0.681207943181444, "grad_norm": 1.4296875, "learning_rate": 1.1587978507987667e-05, "loss": 0.2968, "step": 15514 }, { "epoch": 0.6812957616606474, "grad_norm": 1.5703125, "learning_rate": 1.1582140595553746e-05, "loss": 0.3268, "step": 15516 }, { "epoch": 0.6813835801398509, "grad_norm": 1.6015625, "learning_rate": 1.157630371066271e-05, "loss": 0.312, "step": 15518 }, { "epoch": 0.6814713986190544, "grad_norm": 1.65625, "learning_rate": 1.1570467853761552e-05, "loss": 0.3154, "step": 15520 }, { "epoch": 0.681559217098258, "grad_norm": 1.609375, "learning_rate": 1.156463302529719e-05, "loss": 0.2936, "step": 15522 }, { "epoch": 0.6816470355774614, "grad_norm": 1.4609375, "learning_rate": 1.1558799225716451e-05, "loss": 0.3225, "step": 15524 }, { "epoch": 0.6817348540566649, "grad_norm": 1.65625, "learning_rate": 1.155296645546609e-05, "loss": 0.3127, "step": 15526 }, { "epoch": 0.6818226725358684, "grad_norm": 1.5625, "learning_rate": 1.1547134714992772e-05, "loss": 0.3079, "step": 15528 }, { "epoch": 0.6819104910150718, "grad_norm": 1.6796875, "learning_rate": 1.1541304004743112e-05, "loss": 0.3163, "step": 15530 }, { "epoch": 0.6819983094942753, "grad_norm": 1.6171875, "learning_rate": 1.1535474325163618e-05, "loss": 0.2959, "step": 15532 }, { "epoch": 0.6820861279734788, "grad_norm": 1.703125, "learning_rate": 1.1529645676700717e-05, "loss": 0.3165, "step": 15534 }, { "epoch": 0.6821739464526823, "grad_norm": 1.53125, "learning_rate": 1.1523818059800793e-05, "loss": 0.3352, "step": 15536 }, { "epoch": 0.6822617649318858, "grad_norm": 1.6328125, "learning_rate": 1.1517991474910097e-05, "loss": 0.321, "step": 15538 }, { "epoch": 0.6823495834110893, "grad_norm": 1.5703125, "learning_rate": 1.1512165922474857e-05, "loss": 0.3393, "step": 15540 }, { "epoch": 0.6824374018902928, "grad_norm": 1.6171875, "learning_rate": 1.1506341402941187e-05, "loss": 0.3403, "step": 15542 }, { "epoch": 0.6825252203694963, "grad_norm": 1.59375, "learning_rate": 1.1500517916755115e-05, "loss": 0.316, "step": 15544 }, { "epoch": 0.6826130388486997, "grad_norm": 1.578125, "learning_rate": 1.1494695464362627e-05, "loss": 0.3239, "step": 15546 }, { "epoch": 0.6827008573279032, "grad_norm": 1.625, "learning_rate": 1.1488874046209588e-05, "loss": 0.3235, "step": 15548 }, { "epoch": 0.6827886758071067, "grad_norm": 1.5078125, "learning_rate": 1.1483053662741822e-05, "loss": 0.3239, "step": 15550 }, { "epoch": 0.6828764942863101, "grad_norm": 1.6171875, "learning_rate": 1.1477234314405048e-05, "loss": 0.304, "step": 15552 }, { "epoch": 0.6829643127655137, "grad_norm": 1.6640625, "learning_rate": 1.1471416001644911e-05, "loss": 0.3451, "step": 15554 }, { "epoch": 0.6830521312447172, "grad_norm": 1.5625, "learning_rate": 1.146559872490697e-05, "loss": 0.2889, "step": 15556 }, { "epoch": 0.6831399497239207, "grad_norm": 1.765625, "learning_rate": 1.1459782484636734e-05, "loss": 0.3077, "step": 15558 }, { "epoch": 0.6832277682031241, "grad_norm": 1.625, "learning_rate": 1.1453967281279601e-05, "loss": 0.346, "step": 15560 }, { "epoch": 0.6833155866823276, "grad_norm": 1.578125, "learning_rate": 1.144815311528089e-05, "loss": 0.3292, "step": 15562 }, { "epoch": 0.6834034051615311, "grad_norm": 1.578125, "learning_rate": 1.1442339987085873e-05, "loss": 0.3335, "step": 15564 }, { "epoch": 0.6834912236407346, "grad_norm": 1.5546875, "learning_rate": 1.1436527897139698e-05, "loss": 0.2855, "step": 15566 }, { "epoch": 0.6835790421199381, "grad_norm": 1.6171875, "learning_rate": 1.1430716845887478e-05, "loss": 0.322, "step": 15568 }, { "epoch": 0.6836668605991416, "grad_norm": 1.5859375, "learning_rate": 1.1424906833774218e-05, "loss": 0.297, "step": 15570 }, { "epoch": 0.6837546790783451, "grad_norm": 1.578125, "learning_rate": 1.1419097861244834e-05, "loss": 0.299, "step": 15572 }, { "epoch": 0.6838424975575486, "grad_norm": 1.625, "learning_rate": 1.1413289928744203e-05, "loss": 0.3015, "step": 15574 }, { "epoch": 0.683930316036752, "grad_norm": 1.640625, "learning_rate": 1.1407483036717076e-05, "loss": 0.3119, "step": 15576 }, { "epoch": 0.6840181345159555, "grad_norm": 1.4453125, "learning_rate": 1.1401677185608165e-05, "loss": 0.3085, "step": 15578 }, { "epoch": 0.684105952995159, "grad_norm": 1.625, "learning_rate": 1.1395872375862074e-05, "loss": 0.3267, "step": 15580 }, { "epoch": 0.6841937714743624, "grad_norm": 1.8671875, "learning_rate": 1.139006860792333e-05, "loss": 0.3639, "step": 15582 }, { "epoch": 0.684281589953566, "grad_norm": 1.6875, "learning_rate": 1.1384265882236414e-05, "loss": 0.3307, "step": 15584 }, { "epoch": 0.6843694084327695, "grad_norm": 1.6796875, "learning_rate": 1.1378464199245659e-05, "loss": 0.3185, "step": 15586 }, { "epoch": 0.684457226911973, "grad_norm": 1.6875, "learning_rate": 1.137266355939539e-05, "loss": 0.2997, "step": 15588 }, { "epoch": 0.6845450453911764, "grad_norm": 1.5234375, "learning_rate": 1.1366863963129805e-05, "loss": 0.3336, "step": 15590 }, { "epoch": 0.6846328638703799, "grad_norm": 1.5390625, "learning_rate": 1.1361065410893057e-05, "loss": 0.2917, "step": 15592 }, { "epoch": 0.6847206823495834, "grad_norm": 1.59375, "learning_rate": 1.1355267903129187e-05, "loss": 0.3479, "step": 15594 }, { "epoch": 0.6848085008287869, "grad_norm": 1.53125, "learning_rate": 1.1349471440282164e-05, "loss": 0.2962, "step": 15596 }, { "epoch": 0.6848963193079903, "grad_norm": 1.59375, "learning_rate": 1.1343676022795898e-05, "loss": 0.3089, "step": 15598 }, { "epoch": 0.6849841377871939, "grad_norm": 1.625, "learning_rate": 1.1337881651114188e-05, "loss": 0.3061, "step": 15600 }, { "epoch": 0.6850719562663974, "grad_norm": 1.6484375, "learning_rate": 1.1332088325680783e-05, "loss": 0.3318, "step": 15602 }, { "epoch": 0.6851597747456009, "grad_norm": 1.546875, "learning_rate": 1.1326296046939333e-05, "loss": 0.2978, "step": 15604 }, { "epoch": 0.6852475932248043, "grad_norm": 1.4921875, "learning_rate": 1.1320504815333399e-05, "loss": 0.3091, "step": 15606 }, { "epoch": 0.6853354117040078, "grad_norm": 1.671875, "learning_rate": 1.1314714631306495e-05, "loss": 0.3161, "step": 15608 }, { "epoch": 0.6854232301832113, "grad_norm": 1.6171875, "learning_rate": 1.1308925495302017e-05, "loss": 0.3394, "step": 15610 }, { "epoch": 0.6855110486624147, "grad_norm": 1.5703125, "learning_rate": 1.1303137407763314e-05, "loss": 0.3152, "step": 15612 }, { "epoch": 0.6855988671416183, "grad_norm": 1.671875, "learning_rate": 1.1297350369133632e-05, "loss": 0.3281, "step": 15614 }, { "epoch": 0.6856866856208218, "grad_norm": 1.6015625, "learning_rate": 1.1291564379856145e-05, "loss": 0.3239, "step": 15616 }, { "epoch": 0.6857745041000253, "grad_norm": 1.6875, "learning_rate": 1.1285779440373943e-05, "loss": 0.3456, "step": 15618 }, { "epoch": 0.6858623225792287, "grad_norm": 1.5625, "learning_rate": 1.1279995551130029e-05, "loss": 0.3397, "step": 15620 }, { "epoch": 0.6859501410584322, "grad_norm": 1.5, "learning_rate": 1.1274212712567354e-05, "loss": 0.3237, "step": 15622 }, { "epoch": 0.6860379595376357, "grad_norm": 1.5, "learning_rate": 1.126843092512875e-05, "loss": 0.3207, "step": 15624 }, { "epoch": 0.6861257780168392, "grad_norm": 1.5546875, "learning_rate": 1.126265018925701e-05, "loss": 0.3607, "step": 15626 }, { "epoch": 0.6862135964960426, "grad_norm": 1.5234375, "learning_rate": 1.1256870505394798e-05, "loss": 0.2892, "step": 15628 }, { "epoch": 0.6863014149752462, "grad_norm": 1.515625, "learning_rate": 1.1251091873984748e-05, "loss": 0.326, "step": 15630 }, { "epoch": 0.6863892334544497, "grad_norm": 1.59375, "learning_rate": 1.1245314295469379e-05, "loss": 0.3374, "step": 15632 }, { "epoch": 0.6864770519336532, "grad_norm": 1.5078125, "learning_rate": 1.1239537770291128e-05, "loss": 0.3499, "step": 15634 }, { "epoch": 0.6865648704128566, "grad_norm": 1.53125, "learning_rate": 1.1233762298892384e-05, "loss": 0.3229, "step": 15636 }, { "epoch": 0.6866526888920601, "grad_norm": 1.5546875, "learning_rate": 1.1227987881715412e-05, "loss": 0.3101, "step": 15638 }, { "epoch": 0.6867405073712636, "grad_norm": 1.609375, "learning_rate": 1.1222214519202439e-05, "loss": 0.326, "step": 15640 }, { "epoch": 0.686828325850467, "grad_norm": 1.6328125, "learning_rate": 1.1216442211795582e-05, "loss": 0.3178, "step": 15642 }, { "epoch": 0.6869161443296705, "grad_norm": 1.5234375, "learning_rate": 1.1210670959936875e-05, "loss": 0.2925, "step": 15644 }, { "epoch": 0.6870039628088741, "grad_norm": 1.46875, "learning_rate": 1.120490076406831e-05, "loss": 0.2749, "step": 15646 }, { "epoch": 0.6870917812880776, "grad_norm": 1.59375, "learning_rate": 1.1199131624631734e-05, "loss": 0.313, "step": 15648 }, { "epoch": 0.687179599767281, "grad_norm": 1.609375, "learning_rate": 1.1193363542068974e-05, "loss": 0.3055, "step": 15650 }, { "epoch": 0.6872674182464845, "grad_norm": 1.484375, "learning_rate": 1.1187596516821734e-05, "loss": 0.3377, "step": 15652 }, { "epoch": 0.687355236725688, "grad_norm": 1.484375, "learning_rate": 1.1181830549331674e-05, "loss": 0.3177, "step": 15654 }, { "epoch": 0.6874430552048915, "grad_norm": 1.578125, "learning_rate": 1.1176065640040342e-05, "loss": 0.3335, "step": 15656 }, { "epoch": 0.6875308736840949, "grad_norm": 1.5859375, "learning_rate": 1.1170301789389209e-05, "loss": 0.2978, "step": 15658 }, { "epoch": 0.6876186921632985, "grad_norm": 1.6328125, "learning_rate": 1.116453899781969e-05, "loss": 0.3564, "step": 15660 }, { "epoch": 0.687706510642502, "grad_norm": 1.734375, "learning_rate": 1.115877726577308e-05, "loss": 0.3511, "step": 15662 }, { "epoch": 0.6877943291217055, "grad_norm": 1.53125, "learning_rate": 1.1153016593690634e-05, "loss": 0.3411, "step": 15664 }, { "epoch": 0.6878821476009089, "grad_norm": 1.6953125, "learning_rate": 1.11472569820135e-05, "loss": 0.333, "step": 15666 }, { "epoch": 0.6879699660801124, "grad_norm": 2.015625, "learning_rate": 1.1141498431182735e-05, "loss": 0.3437, "step": 15668 }, { "epoch": 0.6880577845593159, "grad_norm": 1.578125, "learning_rate": 1.1135740941639353e-05, "loss": 0.3365, "step": 15670 }, { "epoch": 0.6881456030385193, "grad_norm": 1.6640625, "learning_rate": 1.1129984513824241e-05, "loss": 0.3471, "step": 15672 }, { "epoch": 0.6882334215177228, "grad_norm": 1.6328125, "learning_rate": 1.1124229148178253e-05, "loss": 0.3298, "step": 15674 }, { "epoch": 0.6883212399969264, "grad_norm": 1.6875, "learning_rate": 1.1118474845142122e-05, "loss": 0.3135, "step": 15676 }, { "epoch": 0.6884090584761299, "grad_norm": 1.78125, "learning_rate": 1.1112721605156511e-05, "loss": 0.3343, "step": 15678 }, { "epoch": 0.6884968769553333, "grad_norm": 1.5625, "learning_rate": 1.1106969428662011e-05, "loss": 0.3215, "step": 15680 }, { "epoch": 0.6885846954345368, "grad_norm": 1.5546875, "learning_rate": 1.1101218316099115e-05, "loss": 0.3083, "step": 15682 }, { "epoch": 0.6886725139137403, "grad_norm": 1.546875, "learning_rate": 1.109546826790826e-05, "loss": 0.3814, "step": 15684 }, { "epoch": 0.6887603323929438, "grad_norm": 1.765625, "learning_rate": 1.108971928452977e-05, "loss": 0.3308, "step": 15686 }, { "epoch": 0.6888481508721472, "grad_norm": 1.5703125, "learning_rate": 1.108397136640392e-05, "loss": 0.3188, "step": 15688 }, { "epoch": 0.6889359693513507, "grad_norm": 1.578125, "learning_rate": 1.107822451397087e-05, "loss": 0.3336, "step": 15690 }, { "epoch": 0.6890237878305543, "grad_norm": 1.6640625, "learning_rate": 1.1072478727670732e-05, "loss": 0.3128, "step": 15692 }, { "epoch": 0.6891116063097578, "grad_norm": 1.71875, "learning_rate": 1.1066734007943514e-05, "loss": 0.3277, "step": 15694 }, { "epoch": 0.6891994247889612, "grad_norm": 1.546875, "learning_rate": 1.1060990355229134e-05, "loss": 0.3215, "step": 15696 }, { "epoch": 0.6892872432681647, "grad_norm": 1.6328125, "learning_rate": 1.1055247769967465e-05, "loss": 0.3183, "step": 15698 }, { "epoch": 0.6893750617473682, "grad_norm": 1.5078125, "learning_rate": 1.1049506252598255e-05, "loss": 0.3155, "step": 15700 }, { "epoch": 0.6894628802265717, "grad_norm": 1.59375, "learning_rate": 1.1043765803561207e-05, "loss": 0.3152, "step": 15702 }, { "epoch": 0.6895506987057751, "grad_norm": 1.59375, "learning_rate": 1.1038026423295923e-05, "loss": 0.3361, "step": 15704 }, { "epoch": 0.6896385171849786, "grad_norm": 1.5234375, "learning_rate": 1.103228811224191e-05, "loss": 0.3342, "step": 15706 }, { "epoch": 0.6897263356641822, "grad_norm": 1.5390625, "learning_rate": 1.1026550870838643e-05, "loss": 0.3308, "step": 15708 }, { "epoch": 0.6898141541433856, "grad_norm": 1.59375, "learning_rate": 1.1020814699525439e-05, "loss": 0.3376, "step": 15710 }, { "epoch": 0.6899019726225891, "grad_norm": 1.546875, "learning_rate": 1.1015079598741607e-05, "loss": 0.3261, "step": 15712 }, { "epoch": 0.6899897911017926, "grad_norm": 1.625, "learning_rate": 1.1009345568926321e-05, "loss": 0.3297, "step": 15714 }, { "epoch": 0.6900776095809961, "grad_norm": 1.53125, "learning_rate": 1.1003612610518718e-05, "loss": 0.3, "step": 15716 }, { "epoch": 0.6901654280601995, "grad_norm": 1.65625, "learning_rate": 1.0997880723957812e-05, "loss": 0.2955, "step": 15718 }, { "epoch": 0.690253246539403, "grad_norm": 1.5859375, "learning_rate": 1.099214990968255e-05, "loss": 0.3062, "step": 15720 }, { "epoch": 0.6903410650186066, "grad_norm": 1.640625, "learning_rate": 1.0986420168131817e-05, "loss": 0.3384, "step": 15722 }, { "epoch": 0.6904288834978101, "grad_norm": 1.5078125, "learning_rate": 1.0980691499744375e-05, "loss": 0.3259, "step": 15724 }, { "epoch": 0.6905167019770135, "grad_norm": 1.65625, "learning_rate": 1.0974963904958947e-05, "loss": 0.3113, "step": 15726 }, { "epoch": 0.690604520456217, "grad_norm": 1.5234375, "learning_rate": 1.0969237384214146e-05, "loss": 0.3524, "step": 15728 }, { "epoch": 0.6906923389354205, "grad_norm": 1.546875, "learning_rate": 1.0963511937948501e-05, "loss": 0.2989, "step": 15730 }, { "epoch": 0.690780157414624, "grad_norm": 1.546875, "learning_rate": 1.0957787566600486e-05, "loss": 0.3107, "step": 15732 }, { "epoch": 0.6908679758938274, "grad_norm": 1.5859375, "learning_rate": 1.0952064270608452e-05, "loss": 0.3216, "step": 15734 }, { "epoch": 0.6909557943730309, "grad_norm": 1.515625, "learning_rate": 1.0946342050410719e-05, "loss": 0.3078, "step": 15736 }, { "epoch": 0.6910436128522345, "grad_norm": 1.515625, "learning_rate": 1.0940620906445478e-05, "loss": 0.323, "step": 15738 }, { "epoch": 0.691131431331438, "grad_norm": 1.5390625, "learning_rate": 1.0934900839150858e-05, "loss": 0.2987, "step": 15740 }, { "epoch": 0.6912192498106414, "grad_norm": 1.5859375, "learning_rate": 1.0929181848964904e-05, "loss": 0.3072, "step": 15742 }, { "epoch": 0.6913070682898449, "grad_norm": 1.515625, "learning_rate": 1.0923463936325568e-05, "loss": 0.2987, "step": 15744 }, { "epoch": 0.6913948867690484, "grad_norm": 1.6171875, "learning_rate": 1.0917747101670744e-05, "loss": 0.2951, "step": 15746 }, { "epoch": 0.6914827052482518, "grad_norm": 1.484375, "learning_rate": 1.0912031345438218e-05, "loss": 0.3291, "step": 15748 }, { "epoch": 0.6915705237274553, "grad_norm": 1.5390625, "learning_rate": 1.0906316668065717e-05, "loss": 0.3089, "step": 15750 }, { "epoch": 0.6916583422066588, "grad_norm": 1.5625, "learning_rate": 1.0900603069990861e-05, "loss": 0.3456, "step": 15752 }, { "epoch": 0.6917461606858624, "grad_norm": 1.6015625, "learning_rate": 1.0894890551651197e-05, "loss": 0.337, "step": 15754 }, { "epoch": 0.6918339791650658, "grad_norm": 1.5234375, "learning_rate": 1.0889179113484202e-05, "loss": 0.3076, "step": 15756 }, { "epoch": 0.6919217976442693, "grad_norm": 1.484375, "learning_rate": 1.0883468755927245e-05, "loss": 0.322, "step": 15758 }, { "epoch": 0.6920096161234728, "grad_norm": 1.7109375, "learning_rate": 1.0877759479417643e-05, "loss": 0.3365, "step": 15760 }, { "epoch": 0.6920974346026763, "grad_norm": 1.53125, "learning_rate": 1.0872051284392596e-05, "loss": 0.3051, "step": 15762 }, { "epoch": 0.6921852530818797, "grad_norm": 1.53125, "learning_rate": 1.0866344171289259e-05, "loss": 0.2926, "step": 15764 }, { "epoch": 0.6922730715610832, "grad_norm": 1.5703125, "learning_rate": 1.0860638140544672e-05, "loss": 0.302, "step": 15766 }, { "epoch": 0.6923608900402868, "grad_norm": 1.6171875, "learning_rate": 1.0854933192595806e-05, "loss": 0.3183, "step": 15768 }, { "epoch": 0.6924487085194903, "grad_norm": 1.6484375, "learning_rate": 1.0849229327879548e-05, "loss": 0.3038, "step": 15770 }, { "epoch": 0.6925365269986937, "grad_norm": 1.6015625, "learning_rate": 1.0843526546832688e-05, "loss": 0.3107, "step": 15772 }, { "epoch": 0.6926243454778972, "grad_norm": 1.4921875, "learning_rate": 1.0837824849891972e-05, "loss": 0.32, "step": 15774 }, { "epoch": 0.6927121639571007, "grad_norm": 1.5234375, "learning_rate": 1.0832124237494013e-05, "loss": 0.3063, "step": 15776 }, { "epoch": 0.6927999824363041, "grad_norm": 1.5546875, "learning_rate": 1.0826424710075383e-05, "loss": 0.3032, "step": 15778 }, { "epoch": 0.6928878009155076, "grad_norm": 1.6171875, "learning_rate": 1.082072626807255e-05, "loss": 0.3558, "step": 15780 }, { "epoch": 0.6929756193947111, "grad_norm": 1.5625, "learning_rate": 1.0815028911921887e-05, "loss": 0.2991, "step": 15782 }, { "epoch": 0.6930634378739147, "grad_norm": 1.578125, "learning_rate": 1.0809332642059721e-05, "loss": 0.3309, "step": 15784 }, { "epoch": 0.6931512563531181, "grad_norm": 1.7421875, "learning_rate": 1.0803637458922253e-05, "loss": 0.313, "step": 15786 }, { "epoch": 0.6932390748323216, "grad_norm": 1.5546875, "learning_rate": 1.079794336294564e-05, "loss": 0.2886, "step": 15788 }, { "epoch": 0.6933268933115251, "grad_norm": 1.6875, "learning_rate": 1.079225035456593e-05, "loss": 0.3351, "step": 15790 }, { "epoch": 0.6934147117907286, "grad_norm": 1.6015625, "learning_rate": 1.0786558434219082e-05, "loss": 0.3275, "step": 15792 }, { "epoch": 0.693502530269932, "grad_norm": 1.5859375, "learning_rate": 1.0780867602341007e-05, "loss": 0.2797, "step": 15794 }, { "epoch": 0.6935903487491355, "grad_norm": 1.6796875, "learning_rate": 1.0775177859367492e-05, "loss": 0.319, "step": 15796 }, { "epoch": 0.693678167228339, "grad_norm": 1.609375, "learning_rate": 1.0769489205734276e-05, "loss": 0.3419, "step": 15798 }, { "epoch": 0.6937659857075426, "grad_norm": 1.453125, "learning_rate": 1.0763801641876986e-05, "loss": 0.2933, "step": 15800 }, { "epoch": 0.693853804186746, "grad_norm": 1.5859375, "learning_rate": 1.0758115168231178e-05, "loss": 0.2888, "step": 15802 }, { "epoch": 0.6939416226659495, "grad_norm": 1.8046875, "learning_rate": 1.0752429785232326e-05, "loss": 0.3036, "step": 15804 }, { "epoch": 0.694029441145153, "grad_norm": 1.6484375, "learning_rate": 1.0746745493315807e-05, "loss": 0.316, "step": 15806 }, { "epoch": 0.6941172596243564, "grad_norm": 1.6640625, "learning_rate": 1.0741062292916943e-05, "loss": 0.3299, "step": 15808 }, { "epoch": 0.6942050781035599, "grad_norm": 1.703125, "learning_rate": 1.073538018447094e-05, "loss": 0.3164, "step": 15810 }, { "epoch": 0.6942928965827634, "grad_norm": 1.734375, "learning_rate": 1.072969916841295e-05, "loss": 0.3163, "step": 15812 }, { "epoch": 0.694380715061967, "grad_norm": 1.5390625, "learning_rate": 1.0724019245178016e-05, "loss": 0.3286, "step": 15814 }, { "epoch": 0.6944685335411704, "grad_norm": 1.6328125, "learning_rate": 1.0718340415201104e-05, "loss": 0.3321, "step": 15816 }, { "epoch": 0.6945563520203739, "grad_norm": 1.5078125, "learning_rate": 1.0712662678917115e-05, "loss": 0.3012, "step": 15818 }, { "epoch": 0.6946441704995774, "grad_norm": 1.5, "learning_rate": 1.0706986036760833e-05, "loss": 0.3009, "step": 15820 }, { "epoch": 0.6947319889787809, "grad_norm": 1.5546875, "learning_rate": 1.0701310489166997e-05, "loss": 0.3225, "step": 15822 }, { "epoch": 0.6948198074579843, "grad_norm": 1.6484375, "learning_rate": 1.0695636036570222e-05, "loss": 0.3273, "step": 15824 }, { "epoch": 0.6949076259371878, "grad_norm": 1.546875, "learning_rate": 1.0689962679405077e-05, "loss": 0.3408, "step": 15826 }, { "epoch": 0.6949954444163913, "grad_norm": 1.5625, "learning_rate": 1.0684290418106022e-05, "loss": 0.2963, "step": 15828 }, { "epoch": 0.6950832628955949, "grad_norm": 1.5859375, "learning_rate": 1.0678619253107436e-05, "loss": 0.3406, "step": 15830 }, { "epoch": 0.6951710813747983, "grad_norm": 1.515625, "learning_rate": 1.0672949184843622e-05, "loss": 0.3546, "step": 15832 }, { "epoch": 0.6952588998540018, "grad_norm": 1.5625, "learning_rate": 1.0667280213748784e-05, "loss": 0.3345, "step": 15834 }, { "epoch": 0.6953467183332053, "grad_norm": 1.625, "learning_rate": 1.0661612340257071e-05, "loss": 0.3073, "step": 15836 }, { "epoch": 0.6954345368124087, "grad_norm": 1.6171875, "learning_rate": 1.0655945564802517e-05, "loss": 0.3455, "step": 15838 }, { "epoch": 0.6955223552916122, "grad_norm": 1.5234375, "learning_rate": 1.0650279887819094e-05, "loss": 0.3232, "step": 15840 }, { "epoch": 0.6956101737708157, "grad_norm": 1.578125, "learning_rate": 1.0644615309740683e-05, "loss": 0.3087, "step": 15842 }, { "epoch": 0.6956979922500192, "grad_norm": 1.53125, "learning_rate": 1.063895183100106e-05, "loss": 0.3733, "step": 15844 }, { "epoch": 0.6957858107292227, "grad_norm": 1.6015625, "learning_rate": 1.0633289452033957e-05, "loss": 0.3111, "step": 15846 }, { "epoch": 0.6958736292084262, "grad_norm": 1.5703125, "learning_rate": 1.0627628173272986e-05, "loss": 0.3288, "step": 15848 }, { "epoch": 0.6959614476876297, "grad_norm": 1.5703125, "learning_rate": 1.0621967995151699e-05, "loss": 0.3649, "step": 15850 }, { "epoch": 0.6960492661668332, "grad_norm": 1.6015625, "learning_rate": 1.0616308918103554e-05, "loss": 0.3319, "step": 15852 }, { "epoch": 0.6961370846460366, "grad_norm": 1.578125, "learning_rate": 1.0610650942561909e-05, "loss": 0.3189, "step": 15854 }, { "epoch": 0.6962249031252401, "grad_norm": 1.609375, "learning_rate": 1.0604994068960072e-05, "loss": 0.3257, "step": 15856 }, { "epoch": 0.6963127216044436, "grad_norm": 1.5703125, "learning_rate": 1.0599338297731231e-05, "loss": 0.3204, "step": 15858 }, { "epoch": 0.696400540083647, "grad_norm": 1.640625, "learning_rate": 1.0593683629308537e-05, "loss": 0.3182, "step": 15860 }, { "epoch": 0.6964883585628506, "grad_norm": 1.546875, "learning_rate": 1.0588030064124981e-05, "loss": 0.3205, "step": 15862 }, { "epoch": 0.6965761770420541, "grad_norm": 1.5625, "learning_rate": 1.058237760261355e-05, "loss": 0.3209, "step": 15864 }, { "epoch": 0.6966639955212576, "grad_norm": 1.4921875, "learning_rate": 1.05767262452071e-05, "loss": 0.2977, "step": 15866 }, { "epoch": 0.696751814000461, "grad_norm": 1.5625, "learning_rate": 1.0571075992338398e-05, "loss": 0.3416, "step": 15868 }, { "epoch": 0.6968396324796645, "grad_norm": 1.515625, "learning_rate": 1.0565426844440166e-05, "loss": 0.3263, "step": 15870 }, { "epoch": 0.696927450958868, "grad_norm": 1.5625, "learning_rate": 1.0559778801945e-05, "loss": 0.3241, "step": 15872 }, { "epoch": 0.6970152694380715, "grad_norm": 1.4296875, "learning_rate": 1.0554131865285441e-05, "loss": 0.332, "step": 15874 }, { "epoch": 0.697103087917275, "grad_norm": 1.6328125, "learning_rate": 1.0548486034893926e-05, "loss": 0.3466, "step": 15876 }, { "epoch": 0.6971909063964785, "grad_norm": 1.4609375, "learning_rate": 1.0542841311202809e-05, "loss": 0.3141, "step": 15878 }, { "epoch": 0.697278724875682, "grad_norm": 1.5625, "learning_rate": 1.0537197694644376e-05, "loss": 0.3049, "step": 15880 }, { "epoch": 0.6973665433548855, "grad_norm": 1.4375, "learning_rate": 1.0531555185650802e-05, "loss": 0.3313, "step": 15882 }, { "epoch": 0.6974543618340889, "grad_norm": 1.578125, "learning_rate": 1.052591378465421e-05, "loss": 0.3541, "step": 15884 }, { "epoch": 0.6975421803132924, "grad_norm": 1.59375, "learning_rate": 1.052027349208661e-05, "loss": 0.3362, "step": 15886 }, { "epoch": 0.6976299987924959, "grad_norm": 1.578125, "learning_rate": 1.0514634308379928e-05, "loss": 0.3457, "step": 15888 }, { "epoch": 0.6977178172716993, "grad_norm": 1.609375, "learning_rate": 1.050899623396603e-05, "loss": 0.3524, "step": 15890 }, { "epoch": 0.6978056357509029, "grad_norm": 1.5703125, "learning_rate": 1.0503359269276678e-05, "loss": 0.3214, "step": 15892 }, { "epoch": 0.6978934542301064, "grad_norm": 1.6484375, "learning_rate": 1.0497723414743546e-05, "loss": 0.2927, "step": 15894 }, { "epoch": 0.6979812727093099, "grad_norm": 1.453125, "learning_rate": 1.0492088670798223e-05, "loss": 0.317, "step": 15896 }, { "epoch": 0.6980690911885133, "grad_norm": 1.6015625, "learning_rate": 1.0486455037872236e-05, "loss": 0.3112, "step": 15898 }, { "epoch": 0.6981569096677168, "grad_norm": 1.5703125, "learning_rate": 1.0480822516396994e-05, "loss": 0.3209, "step": 15900 }, { "epoch": 0.6982447281469203, "grad_norm": 1.4765625, "learning_rate": 1.047519110680385e-05, "loss": 0.3126, "step": 15902 }, { "epoch": 0.6983325466261238, "grad_norm": 1.59375, "learning_rate": 1.0469560809524056e-05, "loss": 0.2958, "step": 15904 }, { "epoch": 0.6984203651053272, "grad_norm": 1.5625, "learning_rate": 1.0463931624988769e-05, "loss": 0.308, "step": 15906 }, { "epoch": 0.6985081835845308, "grad_norm": 1.65625, "learning_rate": 1.045830355362909e-05, "loss": 0.3281, "step": 15908 }, { "epoch": 0.6985960020637343, "grad_norm": 1.5546875, "learning_rate": 1.0452676595876001e-05, "loss": 0.3242, "step": 15910 }, { "epoch": 0.6986838205429378, "grad_norm": 1.625, "learning_rate": 1.0447050752160436e-05, "loss": 0.3326, "step": 15912 }, { "epoch": 0.6987716390221412, "grad_norm": 1.6015625, "learning_rate": 1.0441426022913211e-05, "loss": 0.3141, "step": 15914 }, { "epoch": 0.6988594575013447, "grad_norm": 1.640625, "learning_rate": 1.0435802408565065e-05, "loss": 0.3224, "step": 15916 }, { "epoch": 0.6989472759805482, "grad_norm": 1.546875, "learning_rate": 1.043017990954667e-05, "loss": 0.3246, "step": 15918 }, { "epoch": 0.6990350944597516, "grad_norm": 1.7421875, "learning_rate": 1.042455852628858e-05, "loss": 0.318, "step": 15920 }, { "epoch": 0.6991229129389552, "grad_norm": 1.7890625, "learning_rate": 1.0418938259221311e-05, "loss": 0.3173, "step": 15922 }, { "epoch": 0.6992107314181587, "grad_norm": 1.640625, "learning_rate": 1.041331910877523e-05, "loss": 0.2939, "step": 15924 }, { "epoch": 0.6992985498973622, "grad_norm": 1.5078125, "learning_rate": 1.0407701075380674e-05, "loss": 0.313, "step": 15926 }, { "epoch": 0.6993863683765656, "grad_norm": 1.578125, "learning_rate": 1.0402084159467867e-05, "loss": 0.3124, "step": 15928 }, { "epoch": 0.6994741868557691, "grad_norm": 1.625, "learning_rate": 1.0396468361466947e-05, "loss": 0.3338, "step": 15930 }, { "epoch": 0.6995620053349726, "grad_norm": 1.59375, "learning_rate": 1.0390853681807989e-05, "loss": 0.2974, "step": 15932 }, { "epoch": 0.6996498238141761, "grad_norm": 1.5546875, "learning_rate": 1.038524012092095e-05, "loss": 0.3551, "step": 15934 }, { "epoch": 0.6997376422933795, "grad_norm": 1.609375, "learning_rate": 1.0379627679235734e-05, "loss": 0.3138, "step": 15936 }, { "epoch": 0.6998254607725831, "grad_norm": 1.5234375, "learning_rate": 1.0374016357182137e-05, "loss": 0.3576, "step": 15938 }, { "epoch": 0.6999132792517866, "grad_norm": 1.6171875, "learning_rate": 1.0368406155189862e-05, "loss": 0.3219, "step": 15940 }, { "epoch": 0.7000010977309901, "grad_norm": 1.6796875, "learning_rate": 1.036279707368856e-05, "loss": 0.3306, "step": 15942 }, { "epoch": 0.7000889162101935, "grad_norm": 1.6015625, "learning_rate": 1.035718911310776e-05, "loss": 0.3197, "step": 15944 }, { "epoch": 0.700176734689397, "grad_norm": 1.625, "learning_rate": 1.0351582273876936e-05, "loss": 0.2982, "step": 15946 }, { "epoch": 0.7002645531686005, "grad_norm": 1.5, "learning_rate": 1.0345976556425452e-05, "loss": 0.3038, "step": 15948 }, { "epoch": 0.700352371647804, "grad_norm": 1.71875, "learning_rate": 1.0340371961182588e-05, "loss": 0.3031, "step": 15950 }, { "epoch": 0.7004401901270074, "grad_norm": 1.4765625, "learning_rate": 1.0334768488577563e-05, "loss": 0.3268, "step": 15952 }, { "epoch": 0.700528008606211, "grad_norm": 1.8671875, "learning_rate": 1.032916613903948e-05, "loss": 0.317, "step": 15954 }, { "epoch": 0.7006158270854145, "grad_norm": 1.640625, "learning_rate": 1.0323564912997371e-05, "loss": 0.3188, "step": 15956 }, { "epoch": 0.700703645564618, "grad_norm": 1.6328125, "learning_rate": 1.0317964810880173e-05, "loss": 0.33, "step": 15958 }, { "epoch": 0.7007914640438214, "grad_norm": 1.53125, "learning_rate": 1.0312365833116757e-05, "loss": 0.3089, "step": 15960 }, { "epoch": 0.7008792825230249, "grad_norm": 1.5625, "learning_rate": 1.0306767980135878e-05, "loss": 0.3248, "step": 15962 }, { "epoch": 0.7009671010022284, "grad_norm": 1.5, "learning_rate": 1.0301171252366238e-05, "loss": 0.3073, "step": 15964 }, { "epoch": 0.7010549194814318, "grad_norm": 1.4609375, "learning_rate": 1.0295575650236428e-05, "loss": 0.339, "step": 15966 }, { "epoch": 0.7011427379606354, "grad_norm": 1.5546875, "learning_rate": 1.0289981174174947e-05, "loss": 0.3299, "step": 15968 }, { "epoch": 0.7012305564398389, "grad_norm": 1.609375, "learning_rate": 1.0284387824610247e-05, "loss": 0.3266, "step": 15970 }, { "epoch": 0.7013183749190424, "grad_norm": 1.640625, "learning_rate": 1.0278795601970646e-05, "loss": 0.3139, "step": 15972 }, { "epoch": 0.7014061933982458, "grad_norm": 1.6796875, "learning_rate": 1.027320450668441e-05, "loss": 0.3182, "step": 15974 }, { "epoch": 0.7014940118774493, "grad_norm": 1.640625, "learning_rate": 1.026761453917971e-05, "loss": 0.3215, "step": 15976 }, { "epoch": 0.7015818303566528, "grad_norm": 1.4609375, "learning_rate": 1.026202569988461e-05, "loss": 0.3387, "step": 15978 }, { "epoch": 0.7016696488358563, "grad_norm": 1.5546875, "learning_rate": 1.025643798922712e-05, "loss": 0.3404, "step": 15980 }, { "epoch": 0.7017574673150597, "grad_norm": 1.5546875, "learning_rate": 1.0250851407635137e-05, "loss": 0.3075, "step": 15982 }, { "epoch": 0.7018452857942633, "grad_norm": 1.53125, "learning_rate": 1.0245265955536503e-05, "loss": 0.3429, "step": 15984 }, { "epoch": 0.7019331042734668, "grad_norm": 1.6015625, "learning_rate": 1.0239681633358924e-05, "loss": 0.3293, "step": 15986 }, { "epoch": 0.7020209227526703, "grad_norm": 1.5859375, "learning_rate": 1.0234098441530075e-05, "loss": 0.3247, "step": 15988 }, { "epoch": 0.7021087412318737, "grad_norm": 1.6953125, "learning_rate": 1.0228516380477504e-05, "loss": 0.3329, "step": 15990 }, { "epoch": 0.7021965597110772, "grad_norm": 1.4609375, "learning_rate": 1.0222935450628681e-05, "loss": 0.2793, "step": 15992 }, { "epoch": 0.7022843781902807, "grad_norm": 1.609375, "learning_rate": 1.0217355652411015e-05, "loss": 0.3479, "step": 15994 }, { "epoch": 0.7023721966694841, "grad_norm": 1.796875, "learning_rate": 1.0211776986251784e-05, "loss": 0.3216, "step": 15996 }, { "epoch": 0.7024600151486876, "grad_norm": 1.625, "learning_rate": 1.0206199452578228e-05, "loss": 0.3071, "step": 15998 }, { "epoch": 0.7025478336278912, "grad_norm": 1.625, "learning_rate": 1.0200623051817462e-05, "loss": 0.303, "step": 16000 }, { "epoch": 0.7026356521070947, "grad_norm": 1.7421875, "learning_rate": 1.0195047784396524e-05, "loss": 0.3267, "step": 16002 }, { "epoch": 0.7027234705862981, "grad_norm": 1.6171875, "learning_rate": 1.0189473650742385e-05, "loss": 0.3194, "step": 16004 }, { "epoch": 0.7028112890655016, "grad_norm": 1.796875, "learning_rate": 1.018390065128189e-05, "loss": 0.3244, "step": 16006 }, { "epoch": 0.7028991075447051, "grad_norm": 1.7265625, "learning_rate": 1.0178328786441848e-05, "loss": 0.3149, "step": 16008 }, { "epoch": 0.7029869260239086, "grad_norm": 1.4921875, "learning_rate": 1.017275805664894e-05, "loss": 0.3069, "step": 16010 }, { "epoch": 0.703074744503112, "grad_norm": 1.8046875, "learning_rate": 1.0167188462329767e-05, "loss": 0.3143, "step": 16012 }, { "epoch": 0.7031625629823156, "grad_norm": 1.765625, "learning_rate": 1.016162000391087e-05, "loss": 0.3319, "step": 16014 }, { "epoch": 0.7032503814615191, "grad_norm": 1.5703125, "learning_rate": 1.0156052681818659e-05, "loss": 0.3057, "step": 16016 }, { "epoch": 0.7033381999407226, "grad_norm": 1.5703125, "learning_rate": 1.0150486496479498e-05, "loss": 0.2966, "step": 16018 }, { "epoch": 0.703426018419926, "grad_norm": 1.546875, "learning_rate": 1.014492144831963e-05, "loss": 0.3022, "step": 16020 }, { "epoch": 0.7035138368991295, "grad_norm": 1.5625, "learning_rate": 1.0139357537765249e-05, "loss": 0.2942, "step": 16022 }, { "epoch": 0.703601655378333, "grad_norm": 1.6484375, "learning_rate": 1.013379476524242e-05, "loss": 0.3157, "step": 16024 }, { "epoch": 0.7036894738575364, "grad_norm": 1.421875, "learning_rate": 1.0128233131177161e-05, "loss": 0.3504, "step": 16026 }, { "epoch": 0.7037772923367399, "grad_norm": 1.515625, "learning_rate": 1.0122672635995375e-05, "loss": 0.2993, "step": 16028 }, { "epoch": 0.7038651108159435, "grad_norm": 1.921875, "learning_rate": 1.0117113280122875e-05, "loss": 0.3401, "step": 16030 }, { "epoch": 0.703952929295147, "grad_norm": 1.5546875, "learning_rate": 1.0111555063985418e-05, "loss": 0.3181, "step": 16032 }, { "epoch": 0.7040407477743504, "grad_norm": 1.5625, "learning_rate": 1.0105997988008631e-05, "loss": 0.3173, "step": 16034 }, { "epoch": 0.7041285662535539, "grad_norm": 1.4921875, "learning_rate": 1.01004420526181e-05, "loss": 0.3183, "step": 16036 }, { "epoch": 0.7042163847327574, "grad_norm": 1.4765625, "learning_rate": 1.0094887258239288e-05, "loss": 0.3163, "step": 16038 }, { "epoch": 0.7043042032119609, "grad_norm": 1.5390625, "learning_rate": 1.0089333605297574e-05, "loss": 0.3289, "step": 16040 }, { "epoch": 0.7043920216911643, "grad_norm": 1.4609375, "learning_rate": 1.0083781094218275e-05, "loss": 0.2794, "step": 16042 }, { "epoch": 0.7044798401703678, "grad_norm": 1.6484375, "learning_rate": 1.0078229725426594e-05, "loss": 0.3269, "step": 16044 }, { "epoch": 0.7045676586495714, "grad_norm": 1.609375, "learning_rate": 1.0072679499347663e-05, "loss": 0.3037, "step": 16046 }, { "epoch": 0.7046554771287749, "grad_norm": 1.6015625, "learning_rate": 1.00671304164065e-05, "loss": 0.3021, "step": 16048 }, { "epoch": 0.7047432956079783, "grad_norm": 1.5390625, "learning_rate": 1.0061582477028078e-05, "loss": 0.3212, "step": 16050 }, { "epoch": 0.7048311140871818, "grad_norm": 1.59375, "learning_rate": 1.0056035681637254e-05, "loss": 0.313, "step": 16052 }, { "epoch": 0.7049189325663853, "grad_norm": 1.5390625, "learning_rate": 1.005049003065879e-05, "loss": 0.3348, "step": 16054 }, { "epoch": 0.7050067510455887, "grad_norm": 1.578125, "learning_rate": 1.0044945524517391e-05, "loss": 0.324, "step": 16056 }, { "epoch": 0.7050945695247922, "grad_norm": 1.8046875, "learning_rate": 1.003940216363764e-05, "loss": 0.3451, "step": 16058 }, { "epoch": 0.7051823880039957, "grad_norm": 1.5625, "learning_rate": 1.0033859948444069e-05, "loss": 0.32, "step": 16060 }, { "epoch": 0.7052702064831993, "grad_norm": 1.546875, "learning_rate": 1.0028318879361087e-05, "loss": 0.3068, "step": 16062 }, { "epoch": 0.7053580249624027, "grad_norm": 1.515625, "learning_rate": 1.0022778956813028e-05, "loss": 0.3484, "step": 16064 }, { "epoch": 0.7054458434416062, "grad_norm": 1.5390625, "learning_rate": 1.0017240181224155e-05, "loss": 0.325, "step": 16066 }, { "epoch": 0.7055336619208097, "grad_norm": 1.671875, "learning_rate": 1.0011702553018612e-05, "loss": 0.3345, "step": 16068 }, { "epoch": 0.7056214804000132, "grad_norm": 1.5, "learning_rate": 1.0006166072620488e-05, "loss": 0.3413, "step": 16070 }, { "epoch": 0.7057092988792166, "grad_norm": 1.609375, "learning_rate": 1.0000630740453762e-05, "loss": 0.2878, "step": 16072 }, { "epoch": 0.7057971173584201, "grad_norm": 1.59375, "learning_rate": 9.995096556942319e-06, "loss": 0.3334, "step": 16074 }, { "epoch": 0.7058849358376237, "grad_norm": 1.5, "learning_rate": 9.989563522509997e-06, "loss": 0.3158, "step": 16076 }, { "epoch": 0.7059727543168272, "grad_norm": 1.515625, "learning_rate": 9.984031637580483e-06, "loss": 0.2987, "step": 16078 }, { "epoch": 0.7060605727960306, "grad_norm": 1.5078125, "learning_rate": 9.978500902577432e-06, "loss": 0.3314, "step": 16080 }, { "epoch": 0.7061483912752341, "grad_norm": 1.6328125, "learning_rate": 9.972971317924374e-06, "loss": 0.3223, "step": 16082 }, { "epoch": 0.7062362097544376, "grad_norm": 1.6328125, "learning_rate": 9.967442884044784e-06, "loss": 0.3217, "step": 16084 }, { "epoch": 0.706324028233641, "grad_norm": 1.453125, "learning_rate": 9.961915601362013e-06, "loss": 0.3206, "step": 16086 }, { "epoch": 0.7064118467128445, "grad_norm": 1.4453125, "learning_rate": 9.95638947029936e-06, "loss": 0.3122, "step": 16088 }, { "epoch": 0.706499665192048, "grad_norm": 1.4765625, "learning_rate": 9.950864491280004e-06, "loss": 0.2964, "step": 16090 }, { "epoch": 0.7065874836712516, "grad_norm": 1.6015625, "learning_rate": 9.945340664727048e-06, "loss": 0.3622, "step": 16092 }, { "epoch": 0.706675302150455, "grad_norm": 1.421875, "learning_rate": 9.939817991063518e-06, "loss": 0.3222, "step": 16094 }, { "epoch": 0.7067631206296585, "grad_norm": 1.671875, "learning_rate": 9.934296470712331e-06, "loss": 0.307, "step": 16096 }, { "epoch": 0.706850939108862, "grad_norm": 1.609375, "learning_rate": 9.928776104096338e-06, "loss": 0.298, "step": 16098 }, { "epoch": 0.7069387575880655, "grad_norm": 1.5, "learning_rate": 9.923256891638285e-06, "loss": 0.3036, "step": 16100 }, { "epoch": 0.7070265760672689, "grad_norm": 1.5234375, "learning_rate": 9.917738833760826e-06, "loss": 0.3059, "step": 16102 }, { "epoch": 0.7071143945464724, "grad_norm": 1.4921875, "learning_rate": 9.91222193088655e-06, "loss": 0.325, "step": 16104 }, { "epoch": 0.7072022130256759, "grad_norm": 1.796875, "learning_rate": 9.906706183437933e-06, "loss": 0.3055, "step": 16106 }, { "epoch": 0.7072900315048795, "grad_norm": 1.5546875, "learning_rate": 9.901191591837378e-06, "loss": 0.3273, "step": 16108 }, { "epoch": 0.7073778499840829, "grad_norm": 1.53125, "learning_rate": 9.89567815650718e-06, "loss": 0.2972, "step": 16110 }, { "epoch": 0.7074656684632864, "grad_norm": 1.5234375, "learning_rate": 9.890165877869578e-06, "loss": 0.3642, "step": 16112 }, { "epoch": 0.7075534869424899, "grad_norm": 1.4375, "learning_rate": 9.884654756346698e-06, "loss": 0.3163, "step": 16114 }, { "epoch": 0.7076413054216933, "grad_norm": 1.5859375, "learning_rate": 9.879144792360567e-06, "loss": 0.3013, "step": 16116 }, { "epoch": 0.7077291239008968, "grad_norm": 1.5234375, "learning_rate": 9.873635986333162e-06, "loss": 0.3158, "step": 16118 }, { "epoch": 0.7078169423801003, "grad_norm": 1.53125, "learning_rate": 9.868128338686334e-06, "loss": 0.3253, "step": 16120 }, { "epoch": 0.7079047608593039, "grad_norm": 1.609375, "learning_rate": 9.862621849841871e-06, "loss": 0.3257, "step": 16122 }, { "epoch": 0.7079925793385073, "grad_norm": 1.4140625, "learning_rate": 9.857116520221457e-06, "loss": 0.3153, "step": 16124 }, { "epoch": 0.7080803978177108, "grad_norm": 1.5703125, "learning_rate": 9.85161235024668e-06, "loss": 0.3007, "step": 16126 }, { "epoch": 0.7081682162969143, "grad_norm": 1.6875, "learning_rate": 9.846109340339068e-06, "loss": 0.334, "step": 16128 }, { "epoch": 0.7082560347761178, "grad_norm": 1.59375, "learning_rate": 9.840607490920031e-06, "loss": 0.3236, "step": 16130 }, { "epoch": 0.7083438532553212, "grad_norm": 1.578125, "learning_rate": 9.835106802410913e-06, "loss": 0.2867, "step": 16132 }, { "epoch": 0.7084316717345247, "grad_norm": 1.5390625, "learning_rate": 9.829607275232949e-06, "loss": 0.294, "step": 16134 }, { "epoch": 0.7085194902137282, "grad_norm": 1.4453125, "learning_rate": 9.824108909807297e-06, "loss": 0.3177, "step": 16136 }, { "epoch": 0.7086073086929318, "grad_norm": 1.5078125, "learning_rate": 9.818611706555026e-06, "loss": 0.3327, "step": 16138 }, { "epoch": 0.7086951271721352, "grad_norm": 1.5078125, "learning_rate": 9.813115665897096e-06, "loss": 0.3253, "step": 16140 }, { "epoch": 0.7087829456513387, "grad_norm": 1.5234375, "learning_rate": 9.807620788254421e-06, "loss": 0.3269, "step": 16142 }, { "epoch": 0.7088707641305422, "grad_norm": 1.5078125, "learning_rate": 9.802127074047779e-06, "loss": 0.3377, "step": 16144 }, { "epoch": 0.7089585826097456, "grad_norm": 1.65625, "learning_rate": 9.796634523697898e-06, "loss": 0.3253, "step": 16146 }, { "epoch": 0.7090464010889491, "grad_norm": 1.5859375, "learning_rate": 9.79114313762539e-06, "loss": 0.3284, "step": 16148 }, { "epoch": 0.7091342195681526, "grad_norm": 1.6015625, "learning_rate": 9.785652916250773e-06, "loss": 0.3334, "step": 16150 }, { "epoch": 0.7092220380473561, "grad_norm": 1.5859375, "learning_rate": 9.780163859994515e-06, "loss": 0.2905, "step": 16152 }, { "epoch": 0.7093098565265596, "grad_norm": 1.5546875, "learning_rate": 9.77467596927695e-06, "loss": 0.3026, "step": 16154 }, { "epoch": 0.7093976750057631, "grad_norm": 1.578125, "learning_rate": 9.769189244518354e-06, "loss": 0.3614, "step": 16156 }, { "epoch": 0.7094854934849666, "grad_norm": 1.6484375, "learning_rate": 9.763703686138892e-06, "loss": 0.3132, "step": 16158 }, { "epoch": 0.7095733119641701, "grad_norm": 1.5625, "learning_rate": 9.75821929455866e-06, "loss": 0.2955, "step": 16160 }, { "epoch": 0.7096611304433735, "grad_norm": 1.6484375, "learning_rate": 9.75273607019765e-06, "loss": 0.3125, "step": 16162 }, { "epoch": 0.709748948922577, "grad_norm": 1.5, "learning_rate": 9.747254013475754e-06, "loss": 0.3066, "step": 16164 }, { "epoch": 0.7098367674017805, "grad_norm": 1.78125, "learning_rate": 9.741773124812814e-06, "loss": 0.3302, "step": 16166 }, { "epoch": 0.7099245858809841, "grad_norm": 1.5703125, "learning_rate": 9.736293404628546e-06, "loss": 0.3334, "step": 16168 }, { "epoch": 0.7100124043601875, "grad_norm": 1.4765625, "learning_rate": 9.730814853342587e-06, "loss": 0.3064, "step": 16170 }, { "epoch": 0.710100222839391, "grad_norm": 1.5546875, "learning_rate": 9.72533747137448e-06, "loss": 0.3152, "step": 16172 }, { "epoch": 0.7101880413185945, "grad_norm": 1.578125, "learning_rate": 9.719861259143698e-06, "loss": 0.3324, "step": 16174 }, { "epoch": 0.710275859797798, "grad_norm": 1.7578125, "learning_rate": 9.714386217069604e-06, "loss": 0.347, "step": 16176 }, { "epoch": 0.7103636782770014, "grad_norm": 1.625, "learning_rate": 9.708912345571469e-06, "loss": 0.3197, "step": 16178 }, { "epoch": 0.7104514967562049, "grad_norm": 1.515625, "learning_rate": 9.7034396450685e-06, "loss": 0.3114, "step": 16180 }, { "epoch": 0.7105393152354084, "grad_norm": 1.703125, "learning_rate": 9.69796811597978e-06, "loss": 0.3416, "step": 16182 }, { "epoch": 0.710627133714612, "grad_norm": 1.8125, "learning_rate": 9.692497758724342e-06, "loss": 0.3315, "step": 16184 }, { "epoch": 0.7107149521938154, "grad_norm": 1.5, "learning_rate": 9.687028573721094e-06, "loss": 0.3204, "step": 16186 }, { "epoch": 0.7108027706730189, "grad_norm": 1.5859375, "learning_rate": 9.681560561388858e-06, "loss": 0.3229, "step": 16188 }, { "epoch": 0.7108905891522224, "grad_norm": 1.5390625, "learning_rate": 9.676093722146399e-06, "loss": 0.3268, "step": 16190 }, { "epoch": 0.7109784076314258, "grad_norm": 1.5703125, "learning_rate": 9.670628056412342e-06, "loss": 0.2892, "step": 16192 }, { "epoch": 0.7110662261106293, "grad_norm": 1.5234375, "learning_rate": 9.665163564605275e-06, "loss": 0.3335, "step": 16194 }, { "epoch": 0.7111540445898328, "grad_norm": 1.71875, "learning_rate": 9.659700247143658e-06, "loss": 0.3094, "step": 16196 }, { "epoch": 0.7112418630690363, "grad_norm": 1.515625, "learning_rate": 9.654238104445873e-06, "loss": 0.3172, "step": 16198 }, { "epoch": 0.7113296815482398, "grad_norm": 1.546875, "learning_rate": 9.648777136930215e-06, "loss": 0.3063, "step": 16200 }, { "epoch": 0.7114175000274433, "grad_norm": 1.53125, "learning_rate": 9.64331734501487e-06, "loss": 0.3165, "step": 16202 }, { "epoch": 0.7115053185066468, "grad_norm": 1.5, "learning_rate": 9.637858729117977e-06, "loss": 0.3141, "step": 16204 }, { "epoch": 0.7115931369858502, "grad_norm": 1.5078125, "learning_rate": 9.632401289657537e-06, "loss": 0.3346, "step": 16206 }, { "epoch": 0.7116809554650537, "grad_norm": 1.59375, "learning_rate": 9.626945027051495e-06, "loss": 0.3514, "step": 16208 }, { "epoch": 0.7117687739442572, "grad_norm": 1.5625, "learning_rate": 9.621489941717691e-06, "loss": 0.3265, "step": 16210 }, { "epoch": 0.7118565924234607, "grad_norm": 1.546875, "learning_rate": 9.616036034073863e-06, "loss": 0.3285, "step": 16212 }, { "epoch": 0.7119444109026641, "grad_norm": 1.5703125, "learning_rate": 9.610583304537693e-06, "loss": 0.3249, "step": 16214 }, { "epoch": 0.7120322293818677, "grad_norm": 1.4765625, "learning_rate": 9.605131753526733e-06, "loss": 0.3098, "step": 16216 }, { "epoch": 0.7121200478610712, "grad_norm": 1.546875, "learning_rate": 9.599681381458483e-06, "loss": 0.3362, "step": 16218 }, { "epoch": 0.7122078663402747, "grad_norm": 1.6328125, "learning_rate": 9.594232188750316e-06, "loss": 0.3121, "step": 16220 }, { "epoch": 0.7122956848194781, "grad_norm": 1.546875, "learning_rate": 9.58878417581955e-06, "loss": 0.3073, "step": 16222 }, { "epoch": 0.7123835032986816, "grad_norm": 1.515625, "learning_rate": 9.583337343083387e-06, "loss": 0.3131, "step": 16224 }, { "epoch": 0.7124713217778851, "grad_norm": 1.484375, "learning_rate": 9.577891690958935e-06, "loss": 0.2908, "step": 16226 }, { "epoch": 0.7125591402570886, "grad_norm": 1.578125, "learning_rate": 9.572447219863253e-06, "loss": 0.3163, "step": 16228 }, { "epoch": 0.7126469587362921, "grad_norm": 1.640625, "learning_rate": 9.567003930213241e-06, "loss": 0.3299, "step": 16230 }, { "epoch": 0.7127347772154956, "grad_norm": 1.484375, "learning_rate": 9.56156182242578e-06, "loss": 0.3156, "step": 16232 }, { "epoch": 0.7128225956946991, "grad_norm": 1.5859375, "learning_rate": 9.556120896917605e-06, "loss": 0.3299, "step": 16234 }, { "epoch": 0.7129104141739026, "grad_norm": 1.4921875, "learning_rate": 9.550681154105403e-06, "loss": 0.3117, "step": 16236 }, { "epoch": 0.712998232653106, "grad_norm": 1.5625, "learning_rate": 9.545242594405743e-06, "loss": 0.295, "step": 16238 }, { "epoch": 0.7130860511323095, "grad_norm": 1.5625, "learning_rate": 9.539805218235101e-06, "loss": 0.3388, "step": 16240 }, { "epoch": 0.713173869611513, "grad_norm": 1.546875, "learning_rate": 9.534369026009888e-06, "loss": 0.3074, "step": 16242 }, { "epoch": 0.7132616880907164, "grad_norm": 1.625, "learning_rate": 9.528934018146396e-06, "loss": 0.3924, "step": 16244 }, { "epoch": 0.71334950656992, "grad_norm": 1.515625, "learning_rate": 9.523500195060852e-06, "loss": 0.308, "step": 16246 }, { "epoch": 0.7134373250491235, "grad_norm": 1.5546875, "learning_rate": 9.518067557169375e-06, "loss": 0.3017, "step": 16248 }, { "epoch": 0.713525143528327, "grad_norm": 1.5078125, "learning_rate": 9.512636104887984e-06, "loss": 0.3188, "step": 16250 }, { "epoch": 0.7136129620075304, "grad_norm": 1.578125, "learning_rate": 9.507205838632643e-06, "loss": 0.3262, "step": 16252 }, { "epoch": 0.7137007804867339, "grad_norm": 1.5625, "learning_rate": 9.501776758819186e-06, "loss": 0.3345, "step": 16254 }, { "epoch": 0.7137885989659374, "grad_norm": 1.5078125, "learning_rate": 9.496348865863386e-06, "loss": 0.3339, "step": 16256 }, { "epoch": 0.7138764174451409, "grad_norm": 1.5390625, "learning_rate": 9.49092216018091e-06, "loss": 0.3174, "step": 16258 }, { "epoch": 0.7139642359243443, "grad_norm": 1.6015625, "learning_rate": 9.485496642187328e-06, "loss": 0.3242, "step": 16260 }, { "epoch": 0.7140520544035479, "grad_norm": 1.5625, "learning_rate": 9.480072312298135e-06, "loss": 0.3445, "step": 16262 }, { "epoch": 0.7141398728827514, "grad_norm": 1.671875, "learning_rate": 9.474649170928714e-06, "loss": 0.3035, "step": 16264 }, { "epoch": 0.7142276913619549, "grad_norm": 1.5, "learning_rate": 9.469227218494391e-06, "loss": 0.305, "step": 16266 }, { "epoch": 0.7143155098411583, "grad_norm": 1.71875, "learning_rate": 9.463806455410365e-06, "loss": 0.3175, "step": 16268 }, { "epoch": 0.7144033283203618, "grad_norm": 1.5859375, "learning_rate": 9.458386882091769e-06, "loss": 0.3037, "step": 16270 }, { "epoch": 0.7144911467995653, "grad_norm": 1.53125, "learning_rate": 9.452968498953634e-06, "loss": 0.3173, "step": 16272 }, { "epoch": 0.7145789652787687, "grad_norm": 1.8125, "learning_rate": 9.447551306410887e-06, "loss": 0.3008, "step": 16274 }, { "epoch": 0.7146667837579723, "grad_norm": 1.515625, "learning_rate": 9.442135304878403e-06, "loss": 0.31, "step": 16276 }, { "epoch": 0.7147546022371758, "grad_norm": 1.546875, "learning_rate": 9.436720494770912e-06, "loss": 0.3117, "step": 16278 }, { "epoch": 0.7148424207163793, "grad_norm": 1.5390625, "learning_rate": 9.431306876503108e-06, "loss": 0.3323, "step": 16280 }, { "epoch": 0.7149302391955827, "grad_norm": 1.6171875, "learning_rate": 9.425894450489556e-06, "loss": 0.3338, "step": 16282 }, { "epoch": 0.7150180576747862, "grad_norm": 1.53125, "learning_rate": 9.420483217144729e-06, "loss": 0.3296, "step": 16284 }, { "epoch": 0.7151058761539897, "grad_norm": 1.5078125, "learning_rate": 9.415073176883043e-06, "loss": 0.3117, "step": 16286 }, { "epoch": 0.7151936946331932, "grad_norm": 1.5859375, "learning_rate": 9.409664330118778e-06, "loss": 0.3294, "step": 16288 }, { "epoch": 0.7152815131123966, "grad_norm": 1.65625, "learning_rate": 9.404256677266176e-06, "loss": 0.2995, "step": 16290 }, { "epoch": 0.7153693315916002, "grad_norm": 1.5859375, "learning_rate": 9.398850218739319e-06, "loss": 0.3314, "step": 16292 }, { "epoch": 0.7154571500708037, "grad_norm": 1.5078125, "learning_rate": 9.393444954952257e-06, "loss": 0.3079, "step": 16294 }, { "epoch": 0.7155449685500072, "grad_norm": 1.5703125, "learning_rate": 9.388040886318916e-06, "loss": 0.3207, "step": 16296 }, { "epoch": 0.7156327870292106, "grad_norm": 1.578125, "learning_rate": 9.382638013253156e-06, "loss": 0.3263, "step": 16298 }, { "epoch": 0.7157206055084141, "grad_norm": 1.4765625, "learning_rate": 9.377236336168717e-06, "loss": 0.2916, "step": 16300 }, { "epoch": 0.7158084239876176, "grad_norm": 1.546875, "learning_rate": 9.371835855479258e-06, "loss": 0.3527, "step": 16302 }, { "epoch": 0.715896242466821, "grad_norm": 1.5703125, "learning_rate": 9.366436571598364e-06, "loss": 0.2921, "step": 16304 }, { "epoch": 0.7159840609460245, "grad_norm": 1.5703125, "learning_rate": 9.361038484939496e-06, "loss": 0.3471, "step": 16306 }, { "epoch": 0.7160718794252281, "grad_norm": 1.609375, "learning_rate": 9.355641595916059e-06, "loss": 0.3398, "step": 16308 }, { "epoch": 0.7161596979044316, "grad_norm": 1.5, "learning_rate": 9.350245904941338e-06, "loss": 0.3452, "step": 16310 }, { "epoch": 0.716247516383635, "grad_norm": 1.5625, "learning_rate": 9.34485141242853e-06, "loss": 0.3264, "step": 16312 }, { "epoch": 0.7163353348628385, "grad_norm": 1.5390625, "learning_rate": 9.339458118790761e-06, "loss": 0.2958, "step": 16314 }, { "epoch": 0.716423153342042, "grad_norm": 1.7109375, "learning_rate": 9.334066024441035e-06, "loss": 0.3358, "step": 16316 }, { "epoch": 0.7165109718212455, "grad_norm": 1.6875, "learning_rate": 9.328675129792298e-06, "loss": 0.318, "step": 16318 }, { "epoch": 0.7165987903004489, "grad_norm": 1.703125, "learning_rate": 9.323285435257373e-06, "loss": 0.3243, "step": 16320 }, { "epoch": 0.7166866087796525, "grad_norm": 1.640625, "learning_rate": 9.31789694124901e-06, "loss": 0.3271, "step": 16322 }, { "epoch": 0.716774427258856, "grad_norm": 1.53125, "learning_rate": 9.312509648179856e-06, "loss": 0.3216, "step": 16324 }, { "epoch": 0.7168622457380595, "grad_norm": 1.4765625, "learning_rate": 9.30712355646247e-06, "loss": 0.3215, "step": 16326 }, { "epoch": 0.7169500642172629, "grad_norm": 1.5390625, "learning_rate": 9.301738666509327e-06, "loss": 0.2933, "step": 16328 }, { "epoch": 0.7170378826964664, "grad_norm": 1.78125, "learning_rate": 9.296354978732793e-06, "loss": 0.3125, "step": 16330 }, { "epoch": 0.7171257011756699, "grad_norm": 1.515625, "learning_rate": 9.29097249354517e-06, "loss": 0.3495, "step": 16332 }, { "epoch": 0.7172135196548733, "grad_norm": 1.6796875, "learning_rate": 9.285591211358637e-06, "loss": 0.3285, "step": 16334 }, { "epoch": 0.7173013381340768, "grad_norm": 1.4921875, "learning_rate": 9.28021113258529e-06, "loss": 0.3444, "step": 16336 }, { "epoch": 0.7173891566132804, "grad_norm": 1.6171875, "learning_rate": 9.274832257637148e-06, "loss": 0.3218, "step": 16338 }, { "epoch": 0.7174769750924839, "grad_norm": 1.5, "learning_rate": 9.269454586926116e-06, "loss": 0.3116, "step": 16340 }, { "epoch": 0.7175647935716873, "grad_norm": 1.5625, "learning_rate": 9.264078120864029e-06, "loss": 0.3402, "step": 16342 }, { "epoch": 0.7176526120508908, "grad_norm": 1.546875, "learning_rate": 9.258702859862612e-06, "loss": 0.3325, "step": 16344 }, { "epoch": 0.7177404305300943, "grad_norm": 1.6484375, "learning_rate": 9.253328804333495e-06, "loss": 0.3402, "step": 16346 }, { "epoch": 0.7178282490092978, "grad_norm": 1.5234375, "learning_rate": 9.247955954688242e-06, "loss": 0.349, "step": 16348 }, { "epoch": 0.7179160674885012, "grad_norm": 1.59375, "learning_rate": 9.242584311338288e-06, "loss": 0.3494, "step": 16350 }, { "epoch": 0.7180038859677047, "grad_norm": 1.5625, "learning_rate": 9.237213874695021e-06, "loss": 0.3199, "step": 16352 }, { "epoch": 0.7180917044469083, "grad_norm": 1.6484375, "learning_rate": 9.231844645169679e-06, "loss": 0.3132, "step": 16354 }, { "epoch": 0.7181795229261118, "grad_norm": 1.515625, "learning_rate": 9.226476623173464e-06, "loss": 0.3385, "step": 16356 }, { "epoch": 0.7182673414053152, "grad_norm": 1.6015625, "learning_rate": 9.22110980911744e-06, "loss": 0.3345, "step": 16358 }, { "epoch": 0.7183551598845187, "grad_norm": 1.46875, "learning_rate": 9.215744203412619e-06, "loss": 0.3218, "step": 16360 }, { "epoch": 0.7184429783637222, "grad_norm": 1.5546875, "learning_rate": 9.210379806469888e-06, "loss": 0.317, "step": 16362 }, { "epoch": 0.7185307968429256, "grad_norm": 1.5234375, "learning_rate": 9.205016618700049e-06, "loss": 0.3086, "step": 16364 }, { "epoch": 0.7186186153221291, "grad_norm": 1.5, "learning_rate": 9.199654640513833e-06, "loss": 0.3399, "step": 16366 }, { "epoch": 0.7187064338013327, "grad_norm": 1.484375, "learning_rate": 9.194293872321843e-06, "loss": 0.3437, "step": 16368 }, { "epoch": 0.7187942522805362, "grad_norm": 1.59375, "learning_rate": 9.188934314534625e-06, "loss": 0.3387, "step": 16370 }, { "epoch": 0.7188820707597396, "grad_norm": 1.59375, "learning_rate": 9.18357596756261e-06, "loss": 0.2981, "step": 16372 }, { "epoch": 0.7189698892389431, "grad_norm": 1.5390625, "learning_rate": 9.178218831816126e-06, "loss": 0.315, "step": 16374 }, { "epoch": 0.7190577077181466, "grad_norm": 1.5859375, "learning_rate": 9.17286290770545e-06, "loss": 0.2909, "step": 16376 }, { "epoch": 0.7191455261973501, "grad_norm": 1.4921875, "learning_rate": 9.167508195640714e-06, "loss": 0.3028, "step": 16378 }, { "epoch": 0.7192333446765535, "grad_norm": 1.5078125, "learning_rate": 9.162154696032007e-06, "loss": 0.3327, "step": 16380 }, { "epoch": 0.719321163155757, "grad_norm": 1.5625, "learning_rate": 9.156802409289289e-06, "loss": 0.3227, "step": 16382 }, { "epoch": 0.7194089816349606, "grad_norm": 1.5625, "learning_rate": 9.151451335822442e-06, "loss": 0.311, "step": 16384 }, { "epoch": 0.7194968001141641, "grad_norm": 1.484375, "learning_rate": 9.146101476041249e-06, "loss": 0.3146, "step": 16386 }, { "epoch": 0.7195846185933675, "grad_norm": 1.65625, "learning_rate": 9.140752830355395e-06, "loss": 0.3515, "step": 16388 }, { "epoch": 0.719672437072571, "grad_norm": 1.703125, "learning_rate": 9.135405399174504e-06, "loss": 0.3435, "step": 16390 }, { "epoch": 0.7197602555517745, "grad_norm": 1.4921875, "learning_rate": 9.13005918290806e-06, "loss": 0.3042, "step": 16392 }, { "epoch": 0.719848074030978, "grad_norm": 1.6328125, "learning_rate": 9.124714181965497e-06, "loss": 0.3166, "step": 16394 }, { "epoch": 0.7199358925101814, "grad_norm": 1.5859375, "learning_rate": 9.119370396756125e-06, "loss": 0.3493, "step": 16396 }, { "epoch": 0.7200237109893849, "grad_norm": 1.6328125, "learning_rate": 9.114027827689168e-06, "loss": 0.3101, "step": 16398 }, { "epoch": 0.7201115294685885, "grad_norm": 1.671875, "learning_rate": 9.108686475173777e-06, "loss": 0.3509, "step": 16400 }, { "epoch": 0.720199347947792, "grad_norm": 1.5390625, "learning_rate": 9.103346339618975e-06, "loss": 0.3276, "step": 16402 }, { "epoch": 0.7202871664269954, "grad_norm": 1.515625, "learning_rate": 9.098007421433732e-06, "loss": 0.3511, "step": 16404 }, { "epoch": 0.7203749849061989, "grad_norm": 1.8125, "learning_rate": 9.092669721026892e-06, "loss": 0.3317, "step": 16406 }, { "epoch": 0.7204628033854024, "grad_norm": 1.5390625, "learning_rate": 9.087333238807208e-06, "loss": 0.3172, "step": 16408 }, { "epoch": 0.7205506218646058, "grad_norm": 1.53125, "learning_rate": 9.081997975183368e-06, "loss": 0.3195, "step": 16410 }, { "epoch": 0.7206384403438093, "grad_norm": 1.6328125, "learning_rate": 9.07666393056394e-06, "loss": 0.3213, "step": 16412 }, { "epoch": 0.7207262588230128, "grad_norm": 1.6328125, "learning_rate": 9.071331105357406e-06, "loss": 0.3312, "step": 16414 }, { "epoch": 0.7208140773022164, "grad_norm": 1.515625, "learning_rate": 9.065999499972144e-06, "loss": 0.3171, "step": 16416 }, { "epoch": 0.7209018957814198, "grad_norm": 1.5, "learning_rate": 9.06066911481647e-06, "loss": 0.3196, "step": 16418 }, { "epoch": 0.7209897142606233, "grad_norm": 1.625, "learning_rate": 9.055339950298564e-06, "loss": 0.3272, "step": 16420 }, { "epoch": 0.7210775327398268, "grad_norm": 1.5859375, "learning_rate": 9.050012006826558e-06, "loss": 0.2998, "step": 16422 }, { "epoch": 0.7211653512190302, "grad_norm": 1.625, "learning_rate": 9.044685284808458e-06, "loss": 0.3191, "step": 16424 }, { "epoch": 0.7212531696982337, "grad_norm": 1.5546875, "learning_rate": 9.03935978465217e-06, "loss": 0.3121, "step": 16426 }, { "epoch": 0.7213409881774372, "grad_norm": 1.5859375, "learning_rate": 9.034035506765548e-06, "loss": 0.3239, "step": 16428 }, { "epoch": 0.7214288066566408, "grad_norm": 1.5703125, "learning_rate": 9.028712451556307e-06, "loss": 0.2768, "step": 16430 }, { "epoch": 0.7215166251358442, "grad_norm": 1.53125, "learning_rate": 9.023390619432101e-06, "loss": 0.3418, "step": 16432 }, { "epoch": 0.7216044436150477, "grad_norm": 1.59375, "learning_rate": 9.018070010800472e-06, "loss": 0.3248, "step": 16434 }, { "epoch": 0.7216922620942512, "grad_norm": 1.53125, "learning_rate": 9.012750626068864e-06, "loss": 0.2989, "step": 16436 }, { "epoch": 0.7217800805734547, "grad_norm": 1.578125, "learning_rate": 9.007432465644652e-06, "loss": 0.3299, "step": 16438 }, { "epoch": 0.7218678990526581, "grad_norm": 1.6328125, "learning_rate": 9.00211552993509e-06, "loss": 0.3024, "step": 16440 }, { "epoch": 0.7219557175318616, "grad_norm": 1.609375, "learning_rate": 8.996799819347363e-06, "loss": 0.3125, "step": 16442 }, { "epoch": 0.7220435360110651, "grad_norm": 1.6796875, "learning_rate": 8.991485334288542e-06, "loss": 0.3349, "step": 16444 }, { "epoch": 0.7221313544902687, "grad_norm": 1.578125, "learning_rate": 8.986172075165611e-06, "loss": 0.3068, "step": 16446 }, { "epoch": 0.7222191729694721, "grad_norm": 1.59375, "learning_rate": 8.98086004238546e-06, "loss": 0.3324, "step": 16448 }, { "epoch": 0.7223069914486756, "grad_norm": 1.6328125, "learning_rate": 8.975549236354882e-06, "loss": 0.3361, "step": 16450 }, { "epoch": 0.7223948099278791, "grad_norm": 1.6015625, "learning_rate": 8.970239657480592e-06, "loss": 0.3373, "step": 16452 }, { "epoch": 0.7224826284070825, "grad_norm": 1.6484375, "learning_rate": 8.964931306169182e-06, "loss": 0.3342, "step": 16454 }, { "epoch": 0.722570446886286, "grad_norm": 1.6875, "learning_rate": 8.959624182827187e-06, "loss": 0.311, "step": 16456 }, { "epoch": 0.7226582653654895, "grad_norm": 1.515625, "learning_rate": 8.954318287861016e-06, "loss": 0.3384, "step": 16458 }, { "epoch": 0.722746083844693, "grad_norm": 1.6015625, "learning_rate": 8.949013621676988e-06, "loss": 0.3401, "step": 16460 }, { "epoch": 0.7228339023238965, "grad_norm": 1.5859375, "learning_rate": 8.943710184681353e-06, "loss": 0.3354, "step": 16462 }, { "epoch": 0.7229217208031, "grad_norm": 1.484375, "learning_rate": 8.938407977280233e-06, "loss": 0.3297, "step": 16464 }, { "epoch": 0.7230095392823035, "grad_norm": 1.53125, "learning_rate": 8.93310699987969e-06, "loss": 0.2986, "step": 16466 }, { "epoch": 0.723097357761507, "grad_norm": 1.6015625, "learning_rate": 8.927807252885664e-06, "loss": 0.3087, "step": 16468 }, { "epoch": 0.7231851762407104, "grad_norm": 1.59375, "learning_rate": 8.922508736704002e-06, "loss": 0.3098, "step": 16470 }, { "epoch": 0.7232729947199139, "grad_norm": 1.59375, "learning_rate": 8.917211451740484e-06, "loss": 0.3198, "step": 16472 }, { "epoch": 0.7233608131991174, "grad_norm": 1.5078125, "learning_rate": 8.911915398400767e-06, "loss": 0.3407, "step": 16474 }, { "epoch": 0.723448631678321, "grad_norm": 1.75, "learning_rate": 8.906620577090427e-06, "loss": 0.3181, "step": 16476 }, { "epoch": 0.7235364501575244, "grad_norm": 1.546875, "learning_rate": 8.90132698821493e-06, "loss": 0.3371, "step": 16478 }, { "epoch": 0.7236242686367279, "grad_norm": 1.5390625, "learning_rate": 8.896034632179683e-06, "loss": 0.342, "step": 16480 }, { "epoch": 0.7237120871159314, "grad_norm": 1.640625, "learning_rate": 8.890743509389953e-06, "loss": 0.3339, "step": 16482 }, { "epoch": 0.7237999055951349, "grad_norm": 1.5703125, "learning_rate": 8.885453620250958e-06, "loss": 0.3264, "step": 16484 }, { "epoch": 0.7238877240743383, "grad_norm": 1.515625, "learning_rate": 8.880164965167787e-06, "loss": 0.3196, "step": 16486 }, { "epoch": 0.7239755425535418, "grad_norm": 1.5703125, "learning_rate": 8.874877544545438e-06, "loss": 0.3019, "step": 16488 }, { "epoch": 0.7240633610327453, "grad_norm": 1.5234375, "learning_rate": 8.86959135878884e-06, "loss": 0.3177, "step": 16490 }, { "epoch": 0.7241511795119489, "grad_norm": 1.59375, "learning_rate": 8.864306408302795e-06, "loss": 0.3494, "step": 16492 }, { "epoch": 0.7242389979911523, "grad_norm": 1.6328125, "learning_rate": 8.859022693492042e-06, "loss": 0.3201, "step": 16494 }, { "epoch": 0.7243268164703558, "grad_norm": 1.59375, "learning_rate": 8.8537402147612e-06, "loss": 0.2976, "step": 16496 }, { "epoch": 0.7244146349495593, "grad_norm": 1.4765625, "learning_rate": 8.848458972514792e-06, "loss": 0.332, "step": 16498 }, { "epoch": 0.7245024534287627, "grad_norm": 1.609375, "learning_rate": 8.843178967157278e-06, "loss": 0.312, "step": 16500 }, { "epoch": 0.7245902719079662, "grad_norm": 1.625, "learning_rate": 8.837900199092986e-06, "loss": 0.3438, "step": 16502 }, { "epoch": 0.7246780903871697, "grad_norm": 1.484375, "learning_rate": 8.832622668726184e-06, "loss": 0.3145, "step": 16504 }, { "epoch": 0.7247659088663732, "grad_norm": 1.4765625, "learning_rate": 8.827346376460998e-06, "loss": 0.3059, "step": 16506 }, { "epoch": 0.7248537273455767, "grad_norm": 1.59375, "learning_rate": 8.822071322701513e-06, "loss": 0.3079, "step": 16508 }, { "epoch": 0.7249415458247802, "grad_norm": 1.5703125, "learning_rate": 8.816797507851682e-06, "loss": 0.3218, "step": 16510 }, { "epoch": 0.7250293643039837, "grad_norm": 1.640625, "learning_rate": 8.811524932315371e-06, "loss": 0.31, "step": 16512 }, { "epoch": 0.7251171827831872, "grad_norm": 1.4609375, "learning_rate": 8.806253596496369e-06, "loss": 0.3252, "step": 16514 }, { "epoch": 0.7252050012623906, "grad_norm": 1.4765625, "learning_rate": 8.800983500798341e-06, "loss": 0.3262, "step": 16516 }, { "epoch": 0.7252928197415941, "grad_norm": 1.5625, "learning_rate": 8.795714645624887e-06, "loss": 0.3043, "step": 16518 }, { "epoch": 0.7253806382207976, "grad_norm": 1.53125, "learning_rate": 8.79044703137949e-06, "loss": 0.3325, "step": 16520 }, { "epoch": 0.7254684567000012, "grad_norm": 1.46875, "learning_rate": 8.785180658465536e-06, "loss": 0.3342, "step": 16522 }, { "epoch": 0.7255562751792046, "grad_norm": 1.578125, "learning_rate": 8.779915527286343e-06, "loss": 0.3068, "step": 16524 }, { "epoch": 0.7256440936584081, "grad_norm": 1.5625, "learning_rate": 8.7746516382451e-06, "loss": 0.3271, "step": 16526 }, { "epoch": 0.7257319121376116, "grad_norm": 1.6171875, "learning_rate": 8.769388991744928e-06, "loss": 0.2964, "step": 16528 }, { "epoch": 0.725819730616815, "grad_norm": 1.6953125, "learning_rate": 8.764127588188842e-06, "loss": 0.3511, "step": 16530 }, { "epoch": 0.7259075490960185, "grad_norm": 1.640625, "learning_rate": 8.758867427979748e-06, "loss": 0.3357, "step": 16532 }, { "epoch": 0.725995367575222, "grad_norm": 1.5859375, "learning_rate": 8.753608511520489e-06, "loss": 0.348, "step": 16534 }, { "epoch": 0.7260831860544255, "grad_norm": 1.5546875, "learning_rate": 8.748350839213782e-06, "loss": 0.3523, "step": 16536 }, { "epoch": 0.726171004533629, "grad_norm": 1.5390625, "learning_rate": 8.743094411462266e-06, "loss": 0.3257, "step": 16538 }, { "epoch": 0.7262588230128325, "grad_norm": 1.453125, "learning_rate": 8.737839228668468e-06, "loss": 0.307, "step": 16540 }, { "epoch": 0.726346641492036, "grad_norm": 1.4765625, "learning_rate": 8.73258529123485e-06, "loss": 0.3303, "step": 16542 }, { "epoch": 0.7264344599712395, "grad_norm": 1.6953125, "learning_rate": 8.727332599563745e-06, "loss": 0.3162, "step": 16544 }, { "epoch": 0.7265222784504429, "grad_norm": 1.5546875, "learning_rate": 8.722081154057408e-06, "loss": 0.3351, "step": 16546 }, { "epoch": 0.7266100969296464, "grad_norm": 1.6484375, "learning_rate": 8.716830955118002e-06, "loss": 0.329, "step": 16548 }, { "epoch": 0.7266979154088499, "grad_norm": 1.6796875, "learning_rate": 8.711582003147578e-06, "loss": 0.3177, "step": 16550 }, { "epoch": 0.7267857338880533, "grad_norm": 1.5078125, "learning_rate": 8.706334298548119e-06, "loss": 0.3, "step": 16552 }, { "epoch": 0.7268735523672569, "grad_norm": 1.484375, "learning_rate": 8.701087841721475e-06, "loss": 0.3191, "step": 16554 }, { "epoch": 0.7269613708464604, "grad_norm": 1.484375, "learning_rate": 8.69584263306944e-06, "loss": 0.3198, "step": 16556 }, { "epoch": 0.7270491893256639, "grad_norm": 1.5703125, "learning_rate": 8.690598672993683e-06, "loss": 0.3061, "step": 16558 }, { "epoch": 0.7271370078048673, "grad_norm": 1.4375, "learning_rate": 8.685355961895784e-06, "loss": 0.3073, "step": 16560 }, { "epoch": 0.7272248262840708, "grad_norm": 1.6328125, "learning_rate": 8.680114500177241e-06, "loss": 0.3, "step": 16562 }, { "epoch": 0.7273126447632743, "grad_norm": 1.5, "learning_rate": 8.674874288239438e-06, "loss": 0.3184, "step": 16564 }, { "epoch": 0.7274004632424778, "grad_norm": 1.4453125, "learning_rate": 8.669635326483688e-06, "loss": 0.2938, "step": 16566 }, { "epoch": 0.7274882817216813, "grad_norm": 1.5703125, "learning_rate": 8.664397615311165e-06, "loss": 0.3619, "step": 16568 }, { "epoch": 0.7275761002008848, "grad_norm": 1.5859375, "learning_rate": 8.659161155122997e-06, "loss": 0.3037, "step": 16570 }, { "epoch": 0.7276639186800883, "grad_norm": 1.5546875, "learning_rate": 8.653925946320183e-06, "loss": 0.3034, "step": 16572 }, { "epoch": 0.7277517371592918, "grad_norm": 1.4375, "learning_rate": 8.648691989303631e-06, "loss": 0.3027, "step": 16574 }, { "epoch": 0.7278395556384952, "grad_norm": 1.515625, "learning_rate": 8.643459284474176e-06, "loss": 0.3192, "step": 16576 }, { "epoch": 0.7279273741176987, "grad_norm": 1.53125, "learning_rate": 8.638227832232517e-06, "loss": 0.3333, "step": 16578 }, { "epoch": 0.7280151925969022, "grad_norm": 1.6015625, "learning_rate": 8.632997632979306e-06, "loss": 0.3472, "step": 16580 }, { "epoch": 0.7281030110761056, "grad_norm": 1.5859375, "learning_rate": 8.62776868711506e-06, "loss": 0.3078, "step": 16582 }, { "epoch": 0.7281908295553092, "grad_norm": 1.6484375, "learning_rate": 8.622540995040202e-06, "loss": 0.3002, "step": 16584 }, { "epoch": 0.7282786480345127, "grad_norm": 1.5546875, "learning_rate": 8.617314557155087e-06, "loss": 0.3192, "step": 16586 }, { "epoch": 0.7283664665137162, "grad_norm": 1.5234375, "learning_rate": 8.612089373859945e-06, "loss": 0.3664, "step": 16588 }, { "epoch": 0.7284542849929196, "grad_norm": 1.703125, "learning_rate": 8.606865445554934e-06, "loss": 0.3197, "step": 16590 }, { "epoch": 0.7285421034721231, "grad_norm": 1.578125, "learning_rate": 8.601642772640097e-06, "loss": 0.3189, "step": 16592 }, { "epoch": 0.7286299219513266, "grad_norm": 1.578125, "learning_rate": 8.596421355515383e-06, "loss": 0.3084, "step": 16594 }, { "epoch": 0.7287177404305301, "grad_norm": 1.5859375, "learning_rate": 8.591201194580667e-06, "loss": 0.3358, "step": 16596 }, { "epoch": 0.7288055589097335, "grad_norm": 1.5859375, "learning_rate": 8.585982290235681e-06, "loss": 0.3179, "step": 16598 }, { "epoch": 0.7288933773889371, "grad_norm": 1.515625, "learning_rate": 8.580764642880113e-06, "loss": 0.3107, "step": 16600 }, { "epoch": 0.7289811958681406, "grad_norm": 1.5078125, "learning_rate": 8.575548252913515e-06, "loss": 0.3655, "step": 16602 }, { "epoch": 0.7290690143473441, "grad_norm": 1.515625, "learning_rate": 8.57033312073538e-06, "loss": 0.3162, "step": 16604 }, { "epoch": 0.7291568328265475, "grad_norm": 1.5625, "learning_rate": 8.565119246745074e-06, "loss": 0.3325, "step": 16606 }, { "epoch": 0.729244651305751, "grad_norm": 1.5625, "learning_rate": 8.559906631341866e-06, "loss": 0.3174, "step": 16608 }, { "epoch": 0.7293324697849545, "grad_norm": 1.4921875, "learning_rate": 8.554695274924956e-06, "loss": 0.3142, "step": 16610 }, { "epoch": 0.729420288264158, "grad_norm": 1.5625, "learning_rate": 8.549485177893418e-06, "loss": 0.3224, "step": 16612 }, { "epoch": 0.7295081067433614, "grad_norm": 1.6640625, "learning_rate": 8.544276340646256e-06, "loss": 0.3582, "step": 16614 }, { "epoch": 0.729595925222565, "grad_norm": 1.53125, "learning_rate": 8.539068763582347e-06, "loss": 0.3463, "step": 16616 }, { "epoch": 0.7296837437017685, "grad_norm": 1.5078125, "learning_rate": 8.533862447100511e-06, "loss": 0.297, "step": 16618 }, { "epoch": 0.7297715621809719, "grad_norm": 1.5546875, "learning_rate": 8.528657391599431e-06, "loss": 0.3059, "step": 16620 }, { "epoch": 0.7298593806601754, "grad_norm": 1.5625, "learning_rate": 8.52345359747771e-06, "loss": 0.3107, "step": 16622 }, { "epoch": 0.7299471991393789, "grad_norm": 1.5078125, "learning_rate": 8.51825106513387e-06, "loss": 0.3003, "step": 16624 }, { "epoch": 0.7300350176185824, "grad_norm": 1.515625, "learning_rate": 8.513049794966305e-06, "loss": 0.3071, "step": 16626 }, { "epoch": 0.7301228360977858, "grad_norm": 1.46875, "learning_rate": 8.507849787373356e-06, "loss": 0.3396, "step": 16628 }, { "epoch": 0.7302106545769894, "grad_norm": 1.484375, "learning_rate": 8.50265104275321e-06, "loss": 0.3191, "step": 16630 }, { "epoch": 0.7302984730561929, "grad_norm": 1.5546875, "learning_rate": 8.497453561504007e-06, "loss": 0.2972, "step": 16632 }, { "epoch": 0.7303862915353964, "grad_norm": 1.71875, "learning_rate": 8.492257344023769e-06, "loss": 0.3168, "step": 16634 }, { "epoch": 0.7304741100145998, "grad_norm": 1.5078125, "learning_rate": 8.48706239071041e-06, "loss": 0.3114, "step": 16636 }, { "epoch": 0.7305619284938033, "grad_norm": 1.4375, "learning_rate": 8.481868701961782e-06, "loss": 0.3148, "step": 16638 }, { "epoch": 0.7306497469730068, "grad_norm": 1.515625, "learning_rate": 8.476676278175597e-06, "loss": 0.3238, "step": 16640 }, { "epoch": 0.7307375654522102, "grad_norm": 1.6953125, "learning_rate": 8.471485119749514e-06, "loss": 0.3176, "step": 16642 }, { "epoch": 0.7308253839314137, "grad_norm": 1.53125, "learning_rate": 8.466295227081061e-06, "loss": 0.3225, "step": 16644 }, { "epoch": 0.7309132024106173, "grad_norm": 1.671875, "learning_rate": 8.461106600567679e-06, "loss": 0.3377, "step": 16646 }, { "epoch": 0.7310010208898208, "grad_norm": 1.5859375, "learning_rate": 8.455919240606722e-06, "loss": 0.3063, "step": 16648 }, { "epoch": 0.7310888393690242, "grad_norm": 1.5546875, "learning_rate": 8.450733147595427e-06, "loss": 0.3358, "step": 16650 }, { "epoch": 0.7311766578482277, "grad_norm": 1.6015625, "learning_rate": 8.445548321930966e-06, "loss": 0.3395, "step": 16652 }, { "epoch": 0.7312644763274312, "grad_norm": 1.5390625, "learning_rate": 8.44036476401038e-06, "loss": 0.3459, "step": 16654 }, { "epoch": 0.7313522948066347, "grad_norm": 1.59375, "learning_rate": 8.435182474230624e-06, "loss": 0.3312, "step": 16656 }, { "epoch": 0.7314401132858381, "grad_norm": 1.6328125, "learning_rate": 8.430001452988582e-06, "loss": 0.3066, "step": 16658 }, { "epoch": 0.7315279317650416, "grad_norm": 1.546875, "learning_rate": 8.424821700680982e-06, "loss": 0.3105, "step": 16660 }, { "epoch": 0.7316157502442452, "grad_norm": 1.53125, "learning_rate": 8.419643217704517e-06, "loss": 0.3185, "step": 16662 }, { "epoch": 0.7317035687234487, "grad_norm": 1.5546875, "learning_rate": 8.414466004455743e-06, "loss": 0.3262, "step": 16664 }, { "epoch": 0.7317913872026521, "grad_norm": 1.5625, "learning_rate": 8.409290061331145e-06, "loss": 0.3022, "step": 16666 }, { "epoch": 0.7318792056818556, "grad_norm": 1.5078125, "learning_rate": 8.404115388727093e-06, "loss": 0.3006, "step": 16668 }, { "epoch": 0.7319670241610591, "grad_norm": 1.5234375, "learning_rate": 8.398941987039854e-06, "loss": 0.3115, "step": 16670 }, { "epoch": 0.7320548426402625, "grad_norm": 1.5703125, "learning_rate": 8.393769856665626e-06, "loss": 0.3167, "step": 16672 }, { "epoch": 0.732142661119466, "grad_norm": 1.4453125, "learning_rate": 8.388598998000472e-06, "loss": 0.284, "step": 16674 }, { "epoch": 0.7322304795986696, "grad_norm": 1.5078125, "learning_rate": 8.383429411440399e-06, "loss": 0.2953, "step": 16676 }, { "epoch": 0.7323182980778731, "grad_norm": 1.546875, "learning_rate": 8.378261097381285e-06, "loss": 0.3249, "step": 16678 }, { "epoch": 0.7324061165570765, "grad_norm": 1.6171875, "learning_rate": 8.373094056218913e-06, "loss": 0.2998, "step": 16680 }, { "epoch": 0.73249393503628, "grad_norm": 1.53125, "learning_rate": 8.367928288348992e-06, "loss": 0.3415, "step": 16682 }, { "epoch": 0.7325817535154835, "grad_norm": 1.46875, "learning_rate": 8.3627637941671e-06, "loss": 0.2889, "step": 16684 }, { "epoch": 0.732669571994687, "grad_norm": 1.5859375, "learning_rate": 8.357600574068755e-06, "loss": 0.3368, "step": 16686 }, { "epoch": 0.7327573904738904, "grad_norm": 1.5546875, "learning_rate": 8.352438628449347e-06, "loss": 0.2997, "step": 16688 }, { "epoch": 0.7328452089530939, "grad_norm": 1.53125, "learning_rate": 8.347277957704178e-06, "loss": 0.3069, "step": 16690 }, { "epoch": 0.7329330274322975, "grad_norm": 1.5078125, "learning_rate": 8.34211856222845e-06, "loss": 0.3195, "step": 16692 }, { "epoch": 0.733020845911501, "grad_norm": 1.59375, "learning_rate": 8.33696044241728e-06, "loss": 0.3095, "step": 16694 }, { "epoch": 0.7331086643907044, "grad_norm": 1.5078125, "learning_rate": 8.331803598665674e-06, "loss": 0.2801, "step": 16696 }, { "epoch": 0.7331964828699079, "grad_norm": 1.5, "learning_rate": 8.326648031368536e-06, "loss": 0.3288, "step": 16698 }, { "epoch": 0.7332843013491114, "grad_norm": 1.5703125, "learning_rate": 8.321493740920699e-06, "loss": 0.3092, "step": 16700 }, { "epoch": 0.7333721198283149, "grad_norm": 1.5625, "learning_rate": 8.316340727716862e-06, "loss": 0.3149, "step": 16702 }, { "epoch": 0.7334599383075183, "grad_norm": 1.6484375, "learning_rate": 8.311188992151656e-06, "loss": 0.3189, "step": 16704 }, { "epoch": 0.7335477567867218, "grad_norm": 1.4296875, "learning_rate": 8.3060385346196e-06, "loss": 0.3034, "step": 16706 }, { "epoch": 0.7336355752659254, "grad_norm": 1.5078125, "learning_rate": 8.300889355515107e-06, "loss": 0.3473, "step": 16708 }, { "epoch": 0.7337233937451288, "grad_norm": 1.453125, "learning_rate": 8.295741455232517e-06, "loss": 0.3084, "step": 16710 }, { "epoch": 0.7338112122243323, "grad_norm": 1.546875, "learning_rate": 8.290594834166043e-06, "loss": 0.332, "step": 16712 }, { "epoch": 0.7338990307035358, "grad_norm": 1.515625, "learning_rate": 8.285449492709829e-06, "loss": 0.325, "step": 16714 }, { "epoch": 0.7339868491827393, "grad_norm": 1.5078125, "learning_rate": 8.2803054312579e-06, "loss": 0.3286, "step": 16716 }, { "epoch": 0.7340746676619427, "grad_norm": 1.5078125, "learning_rate": 8.275162650204182e-06, "loss": 0.3368, "step": 16718 }, { "epoch": 0.7341624861411462, "grad_norm": 1.78125, "learning_rate": 8.270021149942536e-06, "loss": 0.3279, "step": 16720 }, { "epoch": 0.7342503046203498, "grad_norm": 1.609375, "learning_rate": 8.26488093086666e-06, "loss": 0.3313, "step": 16722 }, { "epoch": 0.7343381230995533, "grad_norm": 1.59375, "learning_rate": 8.259741993370227e-06, "loss": 0.3095, "step": 16724 }, { "epoch": 0.7344259415787567, "grad_norm": 1.5625, "learning_rate": 8.254604337846753e-06, "loss": 0.3407, "step": 16726 }, { "epoch": 0.7345137600579602, "grad_norm": 1.671875, "learning_rate": 8.2494679646897e-06, "loss": 0.3175, "step": 16728 }, { "epoch": 0.7346015785371637, "grad_norm": 1.6328125, "learning_rate": 8.24433287429241e-06, "loss": 0.3401, "step": 16730 }, { "epoch": 0.7346893970163672, "grad_norm": 1.4453125, "learning_rate": 8.239199067048114e-06, "loss": 0.316, "step": 16732 }, { "epoch": 0.7347772154955706, "grad_norm": 1.484375, "learning_rate": 8.23406654334998e-06, "loss": 0.3004, "step": 16734 }, { "epoch": 0.7348650339747741, "grad_norm": 1.515625, "learning_rate": 8.22893530359104e-06, "loss": 0.319, "step": 16736 }, { "epoch": 0.7349528524539777, "grad_norm": 1.546875, "learning_rate": 8.223805348164265e-06, "loss": 0.319, "step": 16738 }, { "epoch": 0.7350406709331812, "grad_norm": 1.53125, "learning_rate": 8.218676677462497e-06, "loss": 0.3369, "step": 16740 }, { "epoch": 0.7351284894123846, "grad_norm": 1.5703125, "learning_rate": 8.213549291878483e-06, "loss": 0.3231, "step": 16742 }, { "epoch": 0.7352163078915881, "grad_norm": 1.4921875, "learning_rate": 8.208423191804899e-06, "loss": 0.3173, "step": 16744 }, { "epoch": 0.7353041263707916, "grad_norm": 1.3984375, "learning_rate": 8.20329837763428e-06, "loss": 0.3021, "step": 16746 }, { "epoch": 0.735391944849995, "grad_norm": 1.7578125, "learning_rate": 8.19817484975911e-06, "loss": 0.32, "step": 16748 }, { "epoch": 0.7354797633291985, "grad_norm": 1.578125, "learning_rate": 8.193052608571736e-06, "loss": 0.309, "step": 16750 }, { "epoch": 0.735567581808402, "grad_norm": 1.5859375, "learning_rate": 8.18793165446442e-06, "loss": 0.2867, "step": 16752 }, { "epoch": 0.7356554002876056, "grad_norm": 1.46875, "learning_rate": 8.182811987829323e-06, "loss": 0.3224, "step": 16754 }, { "epoch": 0.735743218766809, "grad_norm": 1.421875, "learning_rate": 8.177693609058521e-06, "loss": 0.3212, "step": 16756 }, { "epoch": 0.7358310372460125, "grad_norm": 1.578125, "learning_rate": 8.172576518543976e-06, "loss": 0.3462, "step": 16758 }, { "epoch": 0.735918855725216, "grad_norm": 1.5390625, "learning_rate": 8.167460716677546e-06, "loss": 0.3368, "step": 16760 }, { "epoch": 0.7360066742044195, "grad_norm": 1.5234375, "learning_rate": 8.16234620385102e-06, "loss": 0.3412, "step": 16762 }, { "epoch": 0.7360944926836229, "grad_norm": 1.4921875, "learning_rate": 8.157232980456047e-06, "loss": 0.3209, "step": 16764 }, { "epoch": 0.7361823111628264, "grad_norm": 1.5234375, "learning_rate": 8.15212104688422e-06, "loss": 0.2946, "step": 16766 }, { "epoch": 0.7362701296420299, "grad_norm": 1.5234375, "learning_rate": 8.147010403527003e-06, "loss": 0.3094, "step": 16768 }, { "epoch": 0.7363579481212335, "grad_norm": 1.515625, "learning_rate": 8.141901050775758e-06, "loss": 0.3224, "step": 16770 }, { "epoch": 0.7364457666004369, "grad_norm": 1.53125, "learning_rate": 8.136792989021783e-06, "loss": 0.3271, "step": 16772 }, { "epoch": 0.7365335850796404, "grad_norm": 1.7265625, "learning_rate": 8.131686218656231e-06, "loss": 0.3339, "step": 16774 }, { "epoch": 0.7366214035588439, "grad_norm": 1.5078125, "learning_rate": 8.126580740070202e-06, "loss": 0.311, "step": 16776 }, { "epoch": 0.7367092220380473, "grad_norm": 1.6328125, "learning_rate": 8.121476553654666e-06, "loss": 0.309, "step": 16778 }, { "epoch": 0.7367970405172508, "grad_norm": 1.5703125, "learning_rate": 8.116373659800502e-06, "loss": 0.3169, "step": 16780 }, { "epoch": 0.7368848589964543, "grad_norm": 1.5, "learning_rate": 8.11127205889849e-06, "loss": 0.331, "step": 16782 }, { "epoch": 0.7369726774756579, "grad_norm": 1.578125, "learning_rate": 8.106171751339303e-06, "loss": 0.3081, "step": 16784 }, { "epoch": 0.7370604959548613, "grad_norm": 1.5234375, "learning_rate": 8.10107273751354e-06, "loss": 0.3114, "step": 16786 }, { "epoch": 0.7371483144340648, "grad_norm": 1.546875, "learning_rate": 8.095975017811671e-06, "loss": 0.332, "step": 16788 }, { "epoch": 0.7372361329132683, "grad_norm": 1.546875, "learning_rate": 8.090878592624097e-06, "loss": 0.3164, "step": 16790 }, { "epoch": 0.7373239513924718, "grad_norm": 1.4921875, "learning_rate": 8.085783462341093e-06, "loss": 0.3269, "step": 16792 }, { "epoch": 0.7374117698716752, "grad_norm": 1.5078125, "learning_rate": 8.080689627352837e-06, "loss": 0.3515, "step": 16794 }, { "epoch": 0.7374995883508787, "grad_norm": 1.5390625, "learning_rate": 8.075597088049434e-06, "loss": 0.2985, "step": 16796 }, { "epoch": 0.7375874068300822, "grad_norm": 1.59375, "learning_rate": 8.070505844820853e-06, "loss": 0.3259, "step": 16798 }, { "epoch": 0.7376752253092858, "grad_norm": 1.5078125, "learning_rate": 8.065415898057003e-06, "loss": 0.328, "step": 16800 }, { "epoch": 0.7377630437884892, "grad_norm": 1.4765625, "learning_rate": 8.060327248147662e-06, "loss": 0.2985, "step": 16802 }, { "epoch": 0.7378508622676927, "grad_norm": 1.4765625, "learning_rate": 8.055239895482514e-06, "loss": 0.3036, "step": 16804 }, { "epoch": 0.7379386807468962, "grad_norm": 1.484375, "learning_rate": 8.050153840451163e-06, "loss": 0.3052, "step": 16806 }, { "epoch": 0.7380264992260996, "grad_norm": 1.5234375, "learning_rate": 8.045069083443088e-06, "loss": 0.339, "step": 16808 }, { "epoch": 0.7381143177053031, "grad_norm": 1.4609375, "learning_rate": 8.039985624847692e-06, "loss": 0.3002, "step": 16810 }, { "epoch": 0.7382021361845066, "grad_norm": 1.546875, "learning_rate": 8.034903465054266e-06, "loss": 0.3592, "step": 16812 }, { "epoch": 0.7382899546637101, "grad_norm": 1.53125, "learning_rate": 8.029822604451997e-06, "loss": 0.3023, "step": 16814 }, { "epoch": 0.7383777731429136, "grad_norm": 1.5390625, "learning_rate": 8.024743043429975e-06, "loss": 0.3548, "step": 16816 }, { "epoch": 0.7384655916221171, "grad_norm": 1.6015625, "learning_rate": 8.019664782377207e-06, "loss": 0.3256, "step": 16818 }, { "epoch": 0.7385534101013206, "grad_norm": 1.59375, "learning_rate": 8.014587821682578e-06, "loss": 0.3362, "step": 16820 }, { "epoch": 0.7386412285805241, "grad_norm": 1.53125, "learning_rate": 8.009512161734881e-06, "loss": 0.2868, "step": 16822 }, { "epoch": 0.7387290470597275, "grad_norm": 1.453125, "learning_rate": 8.004437802922823e-06, "loss": 0.3509, "step": 16824 }, { "epoch": 0.738816865538931, "grad_norm": 1.5234375, "learning_rate": 7.999364745634982e-06, "loss": 0.3372, "step": 16826 }, { "epoch": 0.7389046840181345, "grad_norm": 1.5546875, "learning_rate": 7.994292990259875e-06, "loss": 0.3092, "step": 16828 }, { "epoch": 0.738992502497338, "grad_norm": 1.4609375, "learning_rate": 7.989222537185886e-06, "loss": 0.3271, "step": 16830 }, { "epoch": 0.7390803209765415, "grad_norm": 1.6640625, "learning_rate": 7.984153386801304e-06, "loss": 0.3161, "step": 16832 }, { "epoch": 0.739168139455745, "grad_norm": 1.546875, "learning_rate": 7.979085539494344e-06, "loss": 0.3168, "step": 16834 }, { "epoch": 0.7392559579349485, "grad_norm": 1.5078125, "learning_rate": 7.974018995653085e-06, "loss": 0.3135, "step": 16836 }, { "epoch": 0.7393437764141519, "grad_norm": 1.4609375, "learning_rate": 7.96895375566554e-06, "loss": 0.3193, "step": 16838 }, { "epoch": 0.7394315948933554, "grad_norm": 1.515625, "learning_rate": 7.963889819919599e-06, "loss": 0.3187, "step": 16840 }, { "epoch": 0.7395194133725589, "grad_norm": 1.578125, "learning_rate": 7.95882718880306e-06, "loss": 0.3404, "step": 16842 }, { "epoch": 0.7396072318517624, "grad_norm": 1.53125, "learning_rate": 7.953765862703622e-06, "loss": 0.3411, "step": 16844 }, { "epoch": 0.7396950503309659, "grad_norm": 1.5078125, "learning_rate": 7.948705842008869e-06, "loss": 0.3225, "step": 16846 }, { "epoch": 0.7397828688101694, "grad_norm": 1.5234375, "learning_rate": 7.943647127106318e-06, "loss": 0.346, "step": 16848 }, { "epoch": 0.7398706872893729, "grad_norm": 1.515625, "learning_rate": 7.938589718383354e-06, "loss": 0.2811, "step": 16850 }, { "epoch": 0.7399585057685764, "grad_norm": 1.59375, "learning_rate": 7.933533616227284e-06, "loss": 0.3548, "step": 16852 }, { "epoch": 0.7400463242477798, "grad_norm": 1.4453125, "learning_rate": 7.928478821025304e-06, "loss": 0.2942, "step": 16854 }, { "epoch": 0.7401341427269833, "grad_norm": 1.5078125, "learning_rate": 7.923425333164497e-06, "loss": 0.3173, "step": 16856 }, { "epoch": 0.7402219612061868, "grad_norm": 1.6484375, "learning_rate": 7.918373153031882e-06, "loss": 0.2976, "step": 16858 }, { "epoch": 0.7403097796853902, "grad_norm": 1.5390625, "learning_rate": 7.913322281014337e-06, "loss": 0.3343, "step": 16860 }, { "epoch": 0.7403975981645938, "grad_norm": 1.5546875, "learning_rate": 7.908272717498674e-06, "loss": 0.3248, "step": 16862 }, { "epoch": 0.7404854166437973, "grad_norm": 1.4921875, "learning_rate": 7.903224462871586e-06, "loss": 0.3092, "step": 16864 }, { "epoch": 0.7405732351230008, "grad_norm": 1.453125, "learning_rate": 7.898177517519659e-06, "loss": 0.3267, "step": 16866 }, { "epoch": 0.7406610536022042, "grad_norm": 1.5078125, "learning_rate": 7.893131881829405e-06, "loss": 0.3049, "step": 16868 }, { "epoch": 0.7407488720814077, "grad_norm": 1.4296875, "learning_rate": 7.888087556187201e-06, "loss": 0.3228, "step": 16870 }, { "epoch": 0.7408366905606112, "grad_norm": 1.546875, "learning_rate": 7.883044540979373e-06, "loss": 0.2953, "step": 16872 }, { "epoch": 0.7409245090398147, "grad_norm": 1.625, "learning_rate": 7.878002836592082e-06, "loss": 0.331, "step": 16874 }, { "epoch": 0.7410123275190182, "grad_norm": 1.640625, "learning_rate": 7.872962443411445e-06, "loss": 0.3062, "step": 16876 }, { "epoch": 0.7411001459982217, "grad_norm": 1.4921875, "learning_rate": 7.86792336182344e-06, "loss": 0.3085, "step": 16878 }, { "epoch": 0.7411879644774252, "grad_norm": 1.7265625, "learning_rate": 7.86288559221398e-06, "loss": 0.3122, "step": 16880 }, { "epoch": 0.7412757829566287, "grad_norm": 1.5859375, "learning_rate": 7.85784913496885e-06, "loss": 0.3141, "step": 16882 }, { "epoch": 0.7413636014358321, "grad_norm": 1.5703125, "learning_rate": 7.852813990473734e-06, "loss": 0.3276, "step": 16884 }, { "epoch": 0.7414514199150356, "grad_norm": 1.5, "learning_rate": 7.847780159114243e-06, "loss": 0.3191, "step": 16886 }, { "epoch": 0.7415392383942391, "grad_norm": 1.7734375, "learning_rate": 7.84274764127585e-06, "loss": 0.3259, "step": 16888 }, { "epoch": 0.7416270568734425, "grad_norm": 1.640625, "learning_rate": 7.837716437343961e-06, "loss": 0.31, "step": 16890 }, { "epoch": 0.7417148753526461, "grad_norm": 1.5078125, "learning_rate": 7.832686547703866e-06, "loss": 0.344, "step": 16892 }, { "epoch": 0.7418026938318496, "grad_norm": 1.4453125, "learning_rate": 7.827657972740738e-06, "loss": 0.3119, "step": 16894 }, { "epoch": 0.7418905123110531, "grad_norm": 1.6015625, "learning_rate": 7.82263071283969e-06, "loss": 0.3151, "step": 16896 }, { "epoch": 0.7419783307902565, "grad_norm": 1.5078125, "learning_rate": 7.81760476838569e-06, "loss": 0.3009, "step": 16898 }, { "epoch": 0.74206614926946, "grad_norm": 1.4453125, "learning_rate": 7.812580139763646e-06, "loss": 0.3434, "step": 16900 }, { "epoch": 0.7421539677486635, "grad_norm": 1.6171875, "learning_rate": 7.807556827358331e-06, "loss": 0.3139, "step": 16902 }, { "epoch": 0.742241786227867, "grad_norm": 1.46875, "learning_rate": 7.802534831554437e-06, "loss": 0.3411, "step": 16904 }, { "epoch": 0.7423296047070704, "grad_norm": 1.4453125, "learning_rate": 7.797514152736548e-06, "loss": 0.3141, "step": 16906 }, { "epoch": 0.742417423186274, "grad_norm": 1.4921875, "learning_rate": 7.792494791289142e-06, "loss": 0.301, "step": 16908 }, { "epoch": 0.7425052416654775, "grad_norm": 1.4609375, "learning_rate": 7.787476747596618e-06, "loss": 0.3115, "step": 16910 }, { "epoch": 0.742593060144681, "grad_norm": 1.5234375, "learning_rate": 7.782460022043242e-06, "loss": 0.3402, "step": 16912 }, { "epoch": 0.7426808786238844, "grad_norm": 1.53125, "learning_rate": 7.777444615013213e-06, "loss": 0.3368, "step": 16914 }, { "epoch": 0.7427686971030879, "grad_norm": 1.6015625, "learning_rate": 7.772430526890603e-06, "loss": 0.3026, "step": 16916 }, { "epoch": 0.7428565155822914, "grad_norm": 1.5078125, "learning_rate": 7.767417758059386e-06, "loss": 0.3252, "step": 16918 }, { "epoch": 0.7429443340614948, "grad_norm": 1.4921875, "learning_rate": 7.762406308903458e-06, "loss": 0.3343, "step": 16920 }, { "epoch": 0.7430321525406984, "grad_norm": 1.5625, "learning_rate": 7.757396179806576e-06, "loss": 0.3512, "step": 16922 }, { "epoch": 0.7431199710199019, "grad_norm": 1.734375, "learning_rate": 7.752387371152436e-06, "loss": 0.3558, "step": 16924 }, { "epoch": 0.7432077894991054, "grad_norm": 1.4609375, "learning_rate": 7.747379883324606e-06, "loss": 0.3414, "step": 16926 }, { "epoch": 0.7432956079783088, "grad_norm": 1.5, "learning_rate": 7.742373716706556e-06, "loss": 0.3374, "step": 16928 }, { "epoch": 0.7433834264575123, "grad_norm": 1.5859375, "learning_rate": 7.737368871681666e-06, "loss": 0.3306, "step": 16930 }, { "epoch": 0.7434712449367158, "grad_norm": 1.5390625, "learning_rate": 7.732365348633203e-06, "loss": 0.2976, "step": 16932 }, { "epoch": 0.7435590634159193, "grad_norm": 1.46875, "learning_rate": 7.727363147944352e-06, "loss": 0.3012, "step": 16934 }, { "epoch": 0.7436468818951227, "grad_norm": 1.46875, "learning_rate": 7.722362269998159e-06, "loss": 0.2872, "step": 16936 }, { "epoch": 0.7437347003743263, "grad_norm": 1.5, "learning_rate": 7.717362715177611e-06, "loss": 0.3526, "step": 16938 }, { "epoch": 0.7438225188535298, "grad_norm": 1.671875, "learning_rate": 7.712364483865564e-06, "loss": 0.3291, "step": 16940 }, { "epoch": 0.7439103373327333, "grad_norm": 1.5546875, "learning_rate": 7.707367576444796e-06, "loss": 0.3077, "step": 16942 }, { "epoch": 0.7439981558119367, "grad_norm": 1.4609375, "learning_rate": 7.702371993297963e-06, "loss": 0.3227, "step": 16944 }, { "epoch": 0.7440859742911402, "grad_norm": 1.515625, "learning_rate": 7.697377734807623e-06, "loss": 0.319, "step": 16946 }, { "epoch": 0.7441737927703437, "grad_norm": 1.53125, "learning_rate": 7.69238480135625e-06, "loss": 0.3222, "step": 16948 }, { "epoch": 0.7442616112495472, "grad_norm": 1.5390625, "learning_rate": 7.68739319332619e-06, "loss": 0.3384, "step": 16950 }, { "epoch": 0.7443494297287506, "grad_norm": 1.53125, "learning_rate": 7.682402911099717e-06, "loss": 0.2967, "step": 16952 }, { "epoch": 0.7444372482079542, "grad_norm": 1.5390625, "learning_rate": 7.677413955058982e-06, "loss": 0.3151, "step": 16954 }, { "epoch": 0.7445250666871577, "grad_norm": 1.5, "learning_rate": 7.67242632558603e-06, "loss": 0.309, "step": 16956 }, { "epoch": 0.7446128851663611, "grad_norm": 1.5234375, "learning_rate": 7.667440023062833e-06, "loss": 0.3118, "step": 16958 }, { "epoch": 0.7447007036455646, "grad_norm": 1.5703125, "learning_rate": 7.662455047871226e-06, "loss": 0.3044, "step": 16960 }, { "epoch": 0.7447885221247681, "grad_norm": 1.5390625, "learning_rate": 7.657471400392974e-06, "loss": 0.3071, "step": 16962 }, { "epoch": 0.7448763406039716, "grad_norm": 1.609375, "learning_rate": 7.652489081009718e-06, "loss": 0.3313, "step": 16964 }, { "epoch": 0.744964159083175, "grad_norm": 1.453125, "learning_rate": 7.647508090103009e-06, "loss": 0.3014, "step": 16966 }, { "epoch": 0.7450519775623785, "grad_norm": 1.5234375, "learning_rate": 7.642528428054288e-06, "loss": 0.3124, "step": 16968 }, { "epoch": 0.7451397960415821, "grad_norm": 1.59375, "learning_rate": 7.637550095244894e-06, "loss": 0.2996, "step": 16970 }, { "epoch": 0.7452276145207856, "grad_norm": 1.6328125, "learning_rate": 7.632573092056086e-06, "loss": 0.3438, "step": 16972 }, { "epoch": 0.745315432999989, "grad_norm": 1.53125, "learning_rate": 7.627597418868984e-06, "loss": 0.3348, "step": 16974 }, { "epoch": 0.7454032514791925, "grad_norm": 1.546875, "learning_rate": 7.622623076064645e-06, "loss": 0.3366, "step": 16976 }, { "epoch": 0.745491069958396, "grad_norm": 1.6640625, "learning_rate": 7.617650064023996e-06, "loss": 0.3047, "step": 16978 }, { "epoch": 0.7455788884375995, "grad_norm": 1.5546875, "learning_rate": 7.6126783831278605e-06, "loss": 0.3447, "step": 16980 }, { "epoch": 0.7456667069168029, "grad_norm": 1.5625, "learning_rate": 7.607708033756994e-06, "loss": 0.3101, "step": 16982 }, { "epoch": 0.7457545253960065, "grad_norm": 1.546875, "learning_rate": 7.602739016292007e-06, "loss": 0.31, "step": 16984 }, { "epoch": 0.74584234387521, "grad_norm": 1.5625, "learning_rate": 7.5977713311134454e-06, "loss": 0.3373, "step": 16986 }, { "epoch": 0.7459301623544135, "grad_norm": 1.640625, "learning_rate": 7.592804978601725e-06, "loss": 0.3055, "step": 16988 }, { "epoch": 0.7460179808336169, "grad_norm": 1.53125, "learning_rate": 7.587839959137166e-06, "loss": 0.3276, "step": 16990 }, { "epoch": 0.7461057993128204, "grad_norm": 1.6015625, "learning_rate": 7.582876273100004e-06, "loss": 0.3149, "step": 16992 }, { "epoch": 0.7461936177920239, "grad_norm": 1.5703125, "learning_rate": 7.5779139208703446e-06, "loss": 0.3327, "step": 16994 }, { "epoch": 0.7462814362712273, "grad_norm": 1.5078125, "learning_rate": 7.572952902828229e-06, "loss": 0.3176, "step": 16996 }, { "epoch": 0.7463692547504308, "grad_norm": 1.53125, "learning_rate": 7.567993219353542e-06, "loss": 0.3338, "step": 16998 }, { "epoch": 0.7464570732296344, "grad_norm": 1.5078125, "learning_rate": 7.563034870826121e-06, "loss": 0.3115, "step": 17000 }, { "epoch": 0.7465448917088379, "grad_norm": 1.578125, "learning_rate": 7.55807785762567e-06, "loss": 0.3558, "step": 17002 }, { "epoch": 0.7466327101880413, "grad_norm": 1.5234375, "learning_rate": 7.553122180131788e-06, "loss": 0.3067, "step": 17004 }, { "epoch": 0.7467205286672448, "grad_norm": 1.5390625, "learning_rate": 7.548167838724002e-06, "loss": 0.3201, "step": 17006 }, { "epoch": 0.7468083471464483, "grad_norm": 1.5390625, "learning_rate": 7.543214833781695e-06, "loss": 0.3435, "step": 17008 }, { "epoch": 0.7468961656256518, "grad_norm": 1.671875, "learning_rate": 7.538263165684192e-06, "loss": 0.3126, "step": 17010 }, { "epoch": 0.7469839841048552, "grad_norm": 1.5078125, "learning_rate": 7.533312834810672e-06, "loss": 0.3172, "step": 17012 }, { "epoch": 0.7470718025840587, "grad_norm": 1.578125, "learning_rate": 7.5283638415402505e-06, "loss": 0.3332, "step": 17014 }, { "epoch": 0.7471596210632623, "grad_norm": 1.5625, "learning_rate": 7.523416186251917e-06, "loss": 0.3223, "step": 17016 }, { "epoch": 0.7472474395424658, "grad_norm": 1.484375, "learning_rate": 7.518469869324548e-06, "loss": 0.3223, "step": 17018 }, { "epoch": 0.7473352580216692, "grad_norm": 1.5546875, "learning_rate": 7.513524891136958e-06, "loss": 0.3237, "step": 17020 }, { "epoch": 0.7474230765008727, "grad_norm": 1.546875, "learning_rate": 7.5085812520678174e-06, "loss": 0.3332, "step": 17022 }, { "epoch": 0.7475108949800762, "grad_norm": 1.5390625, "learning_rate": 7.503638952495723e-06, "loss": 0.3227, "step": 17024 }, { "epoch": 0.7475987134592796, "grad_norm": 1.609375, "learning_rate": 7.498697992799153e-06, "loss": 0.3195, "step": 17026 }, { "epoch": 0.7476865319384831, "grad_norm": 1.53125, "learning_rate": 7.4937583733564855e-06, "loss": 0.3361, "step": 17028 }, { "epoch": 0.7477743504176867, "grad_norm": 1.6328125, "learning_rate": 7.488820094545998e-06, "loss": 0.2991, "step": 17030 }, { "epoch": 0.7478621688968902, "grad_norm": 1.5390625, "learning_rate": 7.483883156745858e-06, "loss": 0.3425, "step": 17032 }, { "epoch": 0.7479499873760936, "grad_norm": 1.484375, "learning_rate": 7.478947560334151e-06, "loss": 0.3203, "step": 17034 }, { "epoch": 0.7480378058552971, "grad_norm": 1.5390625, "learning_rate": 7.4740133056888345e-06, "loss": 0.3273, "step": 17036 }, { "epoch": 0.7481256243345006, "grad_norm": 1.515625, "learning_rate": 7.469080393187786e-06, "loss": 0.3265, "step": 17038 }, { "epoch": 0.748213442813704, "grad_norm": 1.71875, "learning_rate": 7.464148823208764e-06, "loss": 0.3364, "step": 17040 }, { "epoch": 0.7483012612929075, "grad_norm": 1.4921875, "learning_rate": 7.459218596129422e-06, "loss": 0.2967, "step": 17042 }, { "epoch": 0.748389079772111, "grad_norm": 1.5546875, "learning_rate": 7.454289712327333e-06, "loss": 0.3519, "step": 17044 }, { "epoch": 0.7484768982513146, "grad_norm": 1.5859375, "learning_rate": 7.449362172179936e-06, "loss": 0.3004, "step": 17046 }, { "epoch": 0.748564716730518, "grad_norm": 1.6171875, "learning_rate": 7.444435976064595e-06, "loss": 0.2956, "step": 17048 }, { "epoch": 0.7486525352097215, "grad_norm": 1.4609375, "learning_rate": 7.439511124358558e-06, "loss": 0.3085, "step": 17050 }, { "epoch": 0.748740353688925, "grad_norm": 1.6171875, "learning_rate": 7.434587617438962e-06, "loss": 0.3035, "step": 17052 }, { "epoch": 0.7488281721681285, "grad_norm": 1.484375, "learning_rate": 7.4296654556828635e-06, "loss": 0.307, "step": 17054 }, { "epoch": 0.7489159906473319, "grad_norm": 1.53125, "learning_rate": 7.424744639467196e-06, "loss": 0.3258, "step": 17056 }, { "epoch": 0.7490038091265354, "grad_norm": 1.515625, "learning_rate": 7.419825169168798e-06, "loss": 0.3134, "step": 17058 }, { "epoch": 0.7490916276057389, "grad_norm": 1.46875, "learning_rate": 7.4149070451643955e-06, "loss": 0.3302, "step": 17060 }, { "epoch": 0.7491794460849425, "grad_norm": 1.484375, "learning_rate": 7.4099902678306324e-06, "loss": 0.3126, "step": 17062 }, { "epoch": 0.7492672645641459, "grad_norm": 1.5859375, "learning_rate": 7.405074837544035e-06, "loss": 0.3404, "step": 17064 }, { "epoch": 0.7493550830433494, "grad_norm": 1.4609375, "learning_rate": 7.400160754681012e-06, "loss": 0.3275, "step": 17066 }, { "epoch": 0.7494429015225529, "grad_norm": 1.578125, "learning_rate": 7.3952480196179094e-06, "loss": 0.3284, "step": 17068 }, { "epoch": 0.7495307200017564, "grad_norm": 1.484375, "learning_rate": 7.3903366327309235e-06, "loss": 0.3025, "step": 17070 }, { "epoch": 0.7496185384809598, "grad_norm": 1.4765625, "learning_rate": 7.385426594396186e-06, "loss": 0.3241, "step": 17072 }, { "epoch": 0.7497063569601633, "grad_norm": 1.59375, "learning_rate": 7.3805179049896975e-06, "loss": 0.3365, "step": 17074 }, { "epoch": 0.7497941754393669, "grad_norm": 1.65625, "learning_rate": 7.375610564887378e-06, "loss": 0.3483, "step": 17076 }, { "epoch": 0.7498819939185704, "grad_norm": 1.46875, "learning_rate": 7.3707045744650265e-06, "loss": 0.31, "step": 17078 }, { "epoch": 0.7499698123977738, "grad_norm": 1.6640625, "learning_rate": 7.365799934098336e-06, "loss": 0.3239, "step": 17080 }, { "epoch": 0.7500576308769773, "grad_norm": 1.515625, "learning_rate": 7.360896644162924e-06, "loss": 0.3389, "step": 17082 }, { "epoch": 0.7501454493561808, "grad_norm": 1.5625, "learning_rate": 7.355994705034267e-06, "loss": 0.3029, "step": 17084 }, { "epoch": 0.7502332678353842, "grad_norm": 1.40625, "learning_rate": 7.35109411708777e-06, "loss": 0.3065, "step": 17086 }, { "epoch": 0.7503210863145877, "grad_norm": 1.578125, "learning_rate": 7.346194880698718e-06, "loss": 0.3347, "step": 17088 }, { "epoch": 0.7504089047937912, "grad_norm": 1.6484375, "learning_rate": 7.341296996242295e-06, "loss": 0.3161, "step": 17090 }, { "epoch": 0.7504967232729948, "grad_norm": 1.5625, "learning_rate": 7.336400464093579e-06, "loss": 0.318, "step": 17092 }, { "epoch": 0.7505845417521982, "grad_norm": 1.5078125, "learning_rate": 7.331505284627543e-06, "loss": 0.3086, "step": 17094 }, { "epoch": 0.7506723602314017, "grad_norm": 1.578125, "learning_rate": 7.326611458219077e-06, "loss": 0.3083, "step": 17096 }, { "epoch": 0.7507601787106052, "grad_norm": 1.546875, "learning_rate": 7.321718985242931e-06, "loss": 0.3355, "step": 17098 }, { "epoch": 0.7508479971898087, "grad_norm": 1.828125, "learning_rate": 7.316827866073794e-06, "loss": 0.3235, "step": 17100 }, { "epoch": 0.7509358156690121, "grad_norm": 1.59375, "learning_rate": 7.3119381010862155e-06, "loss": 0.2957, "step": 17102 }, { "epoch": 0.7510236341482156, "grad_norm": 1.484375, "learning_rate": 7.307049690654649e-06, "loss": 0.3141, "step": 17104 }, { "epoch": 0.7511114526274191, "grad_norm": 1.5390625, "learning_rate": 7.30216263515347e-06, "loss": 0.3129, "step": 17106 }, { "epoch": 0.7511992711066227, "grad_norm": 1.53125, "learning_rate": 7.297276934956909e-06, "loss": 0.3318, "step": 17108 }, { "epoch": 0.7512870895858261, "grad_norm": 1.6640625, "learning_rate": 7.292392590439132e-06, "loss": 0.3171, "step": 17110 }, { "epoch": 0.7513749080650296, "grad_norm": 1.625, "learning_rate": 7.287509601974174e-06, "loss": 0.3326, "step": 17112 }, { "epoch": 0.7514627265442331, "grad_norm": 1.53125, "learning_rate": 7.28262796993597e-06, "loss": 0.3218, "step": 17114 }, { "epoch": 0.7515505450234365, "grad_norm": 1.5546875, "learning_rate": 7.2777476946983696e-06, "loss": 0.3128, "step": 17116 }, { "epoch": 0.75163836350264, "grad_norm": 1.5703125, "learning_rate": 7.2728687766351e-06, "loss": 0.3404, "step": 17118 }, { "epoch": 0.7517261819818435, "grad_norm": 1.6953125, "learning_rate": 7.267991216119791e-06, "loss": 0.3249, "step": 17120 }, { "epoch": 0.751814000461047, "grad_norm": 1.65625, "learning_rate": 7.263115013525956e-06, "loss": 0.3063, "step": 17122 }, { "epoch": 0.7519018189402505, "grad_norm": 1.640625, "learning_rate": 7.258240169227032e-06, "loss": 0.3196, "step": 17124 }, { "epoch": 0.751989637419454, "grad_norm": 1.71875, "learning_rate": 7.25336668359633e-06, "loss": 0.3088, "step": 17126 }, { "epoch": 0.7520774558986575, "grad_norm": 1.4609375, "learning_rate": 7.248494557007051e-06, "loss": 0.3015, "step": 17128 }, { "epoch": 0.752165274377861, "grad_norm": 1.65625, "learning_rate": 7.2436237898323236e-06, "loss": 0.3299, "step": 17130 }, { "epoch": 0.7522530928570644, "grad_norm": 1.5234375, "learning_rate": 7.238754382445137e-06, "loss": 0.3355, "step": 17132 }, { "epoch": 0.7523409113362679, "grad_norm": 1.4921875, "learning_rate": 7.233886335218404e-06, "loss": 0.3233, "step": 17134 }, { "epoch": 0.7524287298154714, "grad_norm": 1.5390625, "learning_rate": 7.2290196485249155e-06, "loss": 0.3121, "step": 17136 }, { "epoch": 0.752516548294675, "grad_norm": 1.453125, "learning_rate": 7.22415432273735e-06, "loss": 0.3085, "step": 17138 }, { "epoch": 0.7526043667738784, "grad_norm": 1.5390625, "learning_rate": 7.21929035822832e-06, "loss": 0.3517, "step": 17140 }, { "epoch": 0.7526921852530819, "grad_norm": 1.484375, "learning_rate": 7.214427755370287e-06, "loss": 0.3077, "step": 17142 }, { "epoch": 0.7527800037322854, "grad_norm": 1.4453125, "learning_rate": 7.209566514535648e-06, "loss": 0.3223, "step": 17144 }, { "epoch": 0.7528678222114888, "grad_norm": 1.5234375, "learning_rate": 7.204706636096664e-06, "loss": 0.3242, "step": 17146 }, { "epoch": 0.7529556406906923, "grad_norm": 1.9296875, "learning_rate": 7.199848120425526e-06, "loss": 0.3434, "step": 17148 }, { "epoch": 0.7530434591698958, "grad_norm": 1.546875, "learning_rate": 7.194990967894269e-06, "loss": 0.298, "step": 17150 }, { "epoch": 0.7531312776490993, "grad_norm": 1.640625, "learning_rate": 7.1901351788748795e-06, "loss": 0.288, "step": 17152 }, { "epoch": 0.7532190961283028, "grad_norm": 1.5234375, "learning_rate": 7.1852807537392095e-06, "loss": 0.3107, "step": 17154 }, { "epoch": 0.7533069146075063, "grad_norm": 1.5234375, "learning_rate": 7.1804276928590015e-06, "loss": 0.2997, "step": 17156 }, { "epoch": 0.7533947330867098, "grad_norm": 1.6015625, "learning_rate": 7.175575996605918e-06, "loss": 0.2898, "step": 17158 }, { "epoch": 0.7534825515659133, "grad_norm": 1.4765625, "learning_rate": 7.170725665351493e-06, "loss": 0.3254, "step": 17160 }, { "epoch": 0.7535703700451167, "grad_norm": 1.5703125, "learning_rate": 7.165876699467175e-06, "loss": 0.3258, "step": 17162 }, { "epoch": 0.7536581885243202, "grad_norm": 1.4765625, "learning_rate": 7.161029099324299e-06, "loss": 0.3426, "step": 17164 }, { "epoch": 0.7537460070035237, "grad_norm": 1.4375, "learning_rate": 7.156182865294078e-06, "loss": 0.2992, "step": 17166 }, { "epoch": 0.7538338254827271, "grad_norm": 1.5078125, "learning_rate": 7.151337997747662e-06, "loss": 0.3336, "step": 17168 }, { "epoch": 0.7539216439619307, "grad_norm": 1.546875, "learning_rate": 7.14649449705605e-06, "loss": 0.3151, "step": 17170 }, { "epoch": 0.7540094624411342, "grad_norm": 1.515625, "learning_rate": 7.1416523635901785e-06, "loss": 0.2882, "step": 17172 }, { "epoch": 0.7540972809203377, "grad_norm": 1.5, "learning_rate": 7.136811597720852e-06, "loss": 0.3184, "step": 17174 }, { "epoch": 0.7541850993995411, "grad_norm": 1.515625, "learning_rate": 7.131972199818765e-06, "loss": 0.3324, "step": 17176 }, { "epoch": 0.7542729178787446, "grad_norm": 1.46875, "learning_rate": 7.12713417025454e-06, "loss": 0.323, "step": 17178 }, { "epoch": 0.7543607363579481, "grad_norm": 1.6328125, "learning_rate": 7.122297509398662e-06, "loss": 0.3348, "step": 17180 }, { "epoch": 0.7544485548371516, "grad_norm": 1.546875, "learning_rate": 7.117462217621529e-06, "loss": 0.3198, "step": 17182 }, { "epoch": 0.7545363733163551, "grad_norm": 1.5234375, "learning_rate": 7.112628295293417e-06, "loss": 0.318, "step": 17184 }, { "epoch": 0.7546241917955586, "grad_norm": 1.4609375, "learning_rate": 7.107795742784526e-06, "loss": 0.2928, "step": 17186 }, { "epoch": 0.7547120102747621, "grad_norm": 1.53125, "learning_rate": 7.102964560464925e-06, "loss": 0.3126, "step": 17188 }, { "epoch": 0.7547998287539656, "grad_norm": 1.7421875, "learning_rate": 7.0981347487045825e-06, "loss": 0.3456, "step": 17190 }, { "epoch": 0.754887647233169, "grad_norm": 1.6484375, "learning_rate": 7.093306307873376e-06, "loss": 0.3137, "step": 17192 }, { "epoch": 0.7549754657123725, "grad_norm": 1.5859375, "learning_rate": 7.0884792383410615e-06, "loss": 0.3314, "step": 17194 }, { "epoch": 0.755063284191576, "grad_norm": 1.625, "learning_rate": 7.083653540477306e-06, "loss": 0.3567, "step": 17196 }, { "epoch": 0.7551511026707795, "grad_norm": 1.484375, "learning_rate": 7.078829214651658e-06, "loss": 0.308, "step": 17198 }, { "epoch": 0.755238921149983, "grad_norm": 1.4453125, "learning_rate": 7.074006261233559e-06, "loss": 0.3441, "step": 17200 }, { "epoch": 0.7553267396291865, "grad_norm": 1.5703125, "learning_rate": 7.0691846805923635e-06, "loss": 0.3215, "step": 17202 }, { "epoch": 0.75541455810839, "grad_norm": 1.5, "learning_rate": 7.064364473097296e-06, "loss": 0.3256, "step": 17204 }, { "epoch": 0.7555023765875934, "grad_norm": 1.4453125, "learning_rate": 7.059545639117504e-06, "loss": 0.3148, "step": 17206 }, { "epoch": 0.7555901950667969, "grad_norm": 1.46875, "learning_rate": 7.054728179021999e-06, "loss": 0.3193, "step": 17208 }, { "epoch": 0.7556780135460004, "grad_norm": 1.5859375, "learning_rate": 7.049912093179728e-06, "loss": 0.2858, "step": 17210 }, { "epoch": 0.7557658320252039, "grad_norm": 1.515625, "learning_rate": 7.0450973819594785e-06, "loss": 0.3166, "step": 17212 }, { "epoch": 0.7558536505044073, "grad_norm": 1.609375, "learning_rate": 7.04028404572998e-06, "loss": 0.3183, "step": 17214 }, { "epoch": 0.7559414689836109, "grad_norm": 1.53125, "learning_rate": 7.035472084859837e-06, "loss": 0.299, "step": 17216 }, { "epoch": 0.7560292874628144, "grad_norm": 1.5546875, "learning_rate": 7.030661499717539e-06, "loss": 0.3222, "step": 17218 }, { "epoch": 0.7561171059420179, "grad_norm": 1.484375, "learning_rate": 7.0258522906715e-06, "loss": 0.3416, "step": 17220 }, { "epoch": 0.7562049244212213, "grad_norm": 1.4921875, "learning_rate": 7.0210444580899925e-06, "loss": 0.3432, "step": 17222 }, { "epoch": 0.7562927429004248, "grad_norm": 1.5625, "learning_rate": 7.016238002341219e-06, "loss": 0.316, "step": 17224 }, { "epoch": 0.7563805613796283, "grad_norm": 1.4375, "learning_rate": 7.0114329237932485e-06, "loss": 0.3184, "step": 17226 }, { "epoch": 0.7564683798588318, "grad_norm": 1.53125, "learning_rate": 7.006629222814048e-06, "loss": 0.3126, "step": 17228 }, { "epoch": 0.7565561983380353, "grad_norm": 1.4453125, "learning_rate": 7.001826899771505e-06, "loss": 0.3099, "step": 17230 }, { "epoch": 0.7566440168172388, "grad_norm": 1.4609375, "learning_rate": 6.997025955033365e-06, "loss": 0.3084, "step": 17232 }, { "epoch": 0.7567318352964423, "grad_norm": 1.6171875, "learning_rate": 6.992226388967302e-06, "loss": 0.3072, "step": 17234 }, { "epoch": 0.7568196537756458, "grad_norm": 1.5078125, "learning_rate": 6.987428201940854e-06, "loss": 0.3085, "step": 17236 }, { "epoch": 0.7569074722548492, "grad_norm": 1.5546875, "learning_rate": 6.982631394321468e-06, "loss": 0.3344, "step": 17238 }, { "epoch": 0.7569952907340527, "grad_norm": 1.6171875, "learning_rate": 6.977835966476503e-06, "loss": 0.3099, "step": 17240 }, { "epoch": 0.7570831092132562, "grad_norm": 1.5703125, "learning_rate": 6.973041918773168e-06, "loss": 0.3596, "step": 17242 }, { "epoch": 0.7571709276924596, "grad_norm": 1.4765625, "learning_rate": 6.9682492515786096e-06, "loss": 0.3381, "step": 17244 }, { "epoch": 0.7572587461716632, "grad_norm": 1.5546875, "learning_rate": 6.963457965259837e-06, "loss": 0.3254, "step": 17246 }, { "epoch": 0.7573465646508667, "grad_norm": 1.5625, "learning_rate": 6.958668060183785e-06, "loss": 0.3155, "step": 17248 }, { "epoch": 0.7574343831300702, "grad_norm": 1.515625, "learning_rate": 6.953879536717259e-06, "loss": 0.3348, "step": 17250 }, { "epoch": 0.7575222016092736, "grad_norm": 1.578125, "learning_rate": 6.949092395226955e-06, "loss": 0.3602, "step": 17252 }, { "epoch": 0.7576100200884771, "grad_norm": 1.4765625, "learning_rate": 6.944306636079492e-06, "loss": 0.2985, "step": 17254 }, { "epoch": 0.7576978385676806, "grad_norm": 1.53125, "learning_rate": 6.939522259641346e-06, "loss": 0.2893, "step": 17256 }, { "epoch": 0.757785657046884, "grad_norm": 1.5390625, "learning_rate": 6.934739266278923e-06, "loss": 0.3334, "step": 17258 }, { "epoch": 0.7578734755260875, "grad_norm": 1.53125, "learning_rate": 6.929957656358496e-06, "loss": 0.3047, "step": 17260 }, { "epoch": 0.7579612940052911, "grad_norm": 1.4921875, "learning_rate": 6.925177430246238e-06, "loss": 0.3304, "step": 17262 }, { "epoch": 0.7580491124844946, "grad_norm": 1.5859375, "learning_rate": 6.920398588308233e-06, "loss": 0.3467, "step": 17264 }, { "epoch": 0.758136930963698, "grad_norm": 1.515625, "learning_rate": 6.915621130910427e-06, "loss": 0.3235, "step": 17266 }, { "epoch": 0.7582247494429015, "grad_norm": 1.515625, "learning_rate": 6.9108450584187e-06, "loss": 0.3322, "step": 17268 }, { "epoch": 0.758312567922105, "grad_norm": 1.546875, "learning_rate": 6.9060703711987944e-06, "loss": 0.3114, "step": 17270 }, { "epoch": 0.7584003864013085, "grad_norm": 1.5, "learning_rate": 6.9012970696163585e-06, "loss": 0.3164, "step": 17272 }, { "epoch": 0.7584882048805119, "grad_norm": 1.6328125, "learning_rate": 6.896525154036923e-06, "loss": 0.323, "step": 17274 }, { "epoch": 0.7585760233597155, "grad_norm": 1.578125, "learning_rate": 6.891754624825939e-06, "loss": 0.3231, "step": 17276 }, { "epoch": 0.758663841838919, "grad_norm": 1.4921875, "learning_rate": 6.886985482348726e-06, "loss": 0.3322, "step": 17278 }, { "epoch": 0.7587516603181225, "grad_norm": 1.609375, "learning_rate": 6.8822177269704965e-06, "loss": 0.3385, "step": 17280 }, { "epoch": 0.7588394787973259, "grad_norm": 1.609375, "learning_rate": 6.877451359056389e-06, "loss": 0.3327, "step": 17282 }, { "epoch": 0.7589272972765294, "grad_norm": 1.59375, "learning_rate": 6.872686378971391e-06, "loss": 0.3015, "step": 17284 }, { "epoch": 0.7590151157557329, "grad_norm": 1.640625, "learning_rate": 6.867922787080422e-06, "loss": 0.3224, "step": 17286 }, { "epoch": 0.7591029342349364, "grad_norm": 1.515625, "learning_rate": 6.863160583748274e-06, "loss": 0.3328, "step": 17288 }, { "epoch": 0.7591907527141398, "grad_norm": 1.6875, "learning_rate": 6.858399769339627e-06, "loss": 0.3141, "step": 17290 }, { "epoch": 0.7592785711933434, "grad_norm": 1.5546875, "learning_rate": 6.853640344219084e-06, "loss": 0.3176, "step": 17292 }, { "epoch": 0.7593663896725469, "grad_norm": 1.4921875, "learning_rate": 6.848882308751106e-06, "loss": 0.2885, "step": 17294 }, { "epoch": 0.7594542081517504, "grad_norm": 1.484375, "learning_rate": 6.844125663300077e-06, "loss": 0.3246, "step": 17296 }, { "epoch": 0.7595420266309538, "grad_norm": 1.5859375, "learning_rate": 6.839370408230259e-06, "loss": 0.3065, "step": 17298 }, { "epoch": 0.7596298451101573, "grad_norm": 1.5078125, "learning_rate": 6.8346165439058e-06, "loss": 0.3019, "step": 17300 }, { "epoch": 0.7597176635893608, "grad_norm": 1.5859375, "learning_rate": 6.829864070690778e-06, "loss": 0.3003, "step": 17302 }, { "epoch": 0.7598054820685642, "grad_norm": 1.53125, "learning_rate": 6.825112988949103e-06, "loss": 0.3029, "step": 17304 }, { "epoch": 0.7598933005477677, "grad_norm": 1.5234375, "learning_rate": 6.820363299044641e-06, "loss": 0.3294, "step": 17306 }, { "epoch": 0.7599811190269713, "grad_norm": 1.5625, "learning_rate": 6.815615001341108e-06, "loss": 0.2908, "step": 17308 }, { "epoch": 0.7600689375061748, "grad_norm": 1.53125, "learning_rate": 6.810868096202144e-06, "loss": 0.3225, "step": 17310 }, { "epoch": 0.7601567559853782, "grad_norm": 1.46875, "learning_rate": 6.806122583991264e-06, "loss": 0.3106, "step": 17312 }, { "epoch": 0.7602445744645817, "grad_norm": 1.6171875, "learning_rate": 6.801378465071867e-06, "loss": 0.3209, "step": 17314 }, { "epoch": 0.7603323929437852, "grad_norm": 1.5546875, "learning_rate": 6.7966357398072804e-06, "loss": 0.3088, "step": 17316 }, { "epoch": 0.7604202114229887, "grad_norm": 1.5, "learning_rate": 6.791894408560681e-06, "loss": 0.3274, "step": 17318 }, { "epoch": 0.7605080299021921, "grad_norm": 1.5703125, "learning_rate": 6.787154471695184e-06, "loss": 0.3167, "step": 17320 }, { "epoch": 0.7605958483813956, "grad_norm": 1.4609375, "learning_rate": 6.7824159295737625e-06, "loss": 0.3075, "step": 17322 }, { "epoch": 0.7606836668605992, "grad_norm": 1.515625, "learning_rate": 6.777678782559288e-06, "loss": 0.3066, "step": 17324 }, { "epoch": 0.7607714853398027, "grad_norm": 1.453125, "learning_rate": 6.772943031014548e-06, "loss": 0.2794, "step": 17326 }, { "epoch": 0.7608593038190061, "grad_norm": 1.625, "learning_rate": 6.768208675302193e-06, "loss": 0.3188, "step": 17328 }, { "epoch": 0.7609471222982096, "grad_norm": 1.4296875, "learning_rate": 6.763475715784795e-06, "loss": 0.2992, "step": 17330 }, { "epoch": 0.7610349407774131, "grad_norm": 1.625, "learning_rate": 6.758744152824798e-06, "loss": 0.3023, "step": 17332 }, { "epoch": 0.7611227592566165, "grad_norm": 1.609375, "learning_rate": 6.754013986784546e-06, "loss": 0.3005, "step": 17334 }, { "epoch": 0.76121057773582, "grad_norm": 1.578125, "learning_rate": 6.749285218026272e-06, "loss": 0.3198, "step": 17336 }, { "epoch": 0.7612983962150236, "grad_norm": 1.5390625, "learning_rate": 6.744557846912114e-06, "loss": 0.3374, "step": 17338 }, { "epoch": 0.7613862146942271, "grad_norm": 1.625, "learning_rate": 6.739831873804095e-06, "loss": 0.3041, "step": 17340 }, { "epoch": 0.7614740331734305, "grad_norm": 1.609375, "learning_rate": 6.7351072990641225e-06, "loss": 0.3155, "step": 17342 }, { "epoch": 0.761561851652634, "grad_norm": 1.4609375, "learning_rate": 6.730384123054018e-06, "loss": 0.311, "step": 17344 }, { "epoch": 0.7616496701318375, "grad_norm": 1.59375, "learning_rate": 6.725662346135467e-06, "loss": 0.3149, "step": 17346 }, { "epoch": 0.761737488611041, "grad_norm": 1.4375, "learning_rate": 6.720941968670083e-06, "loss": 0.309, "step": 17348 }, { "epoch": 0.7618253070902444, "grad_norm": 1.453125, "learning_rate": 6.716222991019347e-06, "loss": 0.3154, "step": 17350 }, { "epoch": 0.7619131255694479, "grad_norm": 1.6171875, "learning_rate": 6.711505413544628e-06, "loss": 0.3152, "step": 17352 }, { "epoch": 0.7620009440486515, "grad_norm": 1.4375, "learning_rate": 6.706789236607214e-06, "loss": 0.298, "step": 17354 }, { "epoch": 0.762088762527855, "grad_norm": 1.609375, "learning_rate": 6.7020744605682616e-06, "loss": 0.351, "step": 17356 }, { "epoch": 0.7621765810070584, "grad_norm": 1.5546875, "learning_rate": 6.697361085788839e-06, "loss": 0.3015, "step": 17358 }, { "epoch": 0.7622643994862619, "grad_norm": 1.5859375, "learning_rate": 6.69264911262989e-06, "loss": 0.3124, "step": 17360 }, { "epoch": 0.7623522179654654, "grad_norm": 1.390625, "learning_rate": 6.687938541452257e-06, "loss": 0.3441, "step": 17362 }, { "epoch": 0.7624400364446688, "grad_norm": 1.5625, "learning_rate": 6.6832293726166926e-06, "loss": 0.3319, "step": 17364 }, { "epoch": 0.7625278549238723, "grad_norm": 1.4765625, "learning_rate": 6.678521606483798e-06, "loss": 0.3132, "step": 17366 }, { "epoch": 0.7626156734030758, "grad_norm": 1.5078125, "learning_rate": 6.673815243414119e-06, "loss": 0.3261, "step": 17368 }, { "epoch": 0.7627034918822794, "grad_norm": 1.515625, "learning_rate": 6.669110283768057e-06, "loss": 0.3018, "step": 17370 }, { "epoch": 0.7627913103614828, "grad_norm": 1.71875, "learning_rate": 6.664406727905928e-06, "loss": 0.3038, "step": 17372 }, { "epoch": 0.7628791288406863, "grad_norm": 1.5, "learning_rate": 6.659704576187928e-06, "loss": 0.3124, "step": 17374 }, { "epoch": 0.7629669473198898, "grad_norm": 1.578125, "learning_rate": 6.655003828974141e-06, "loss": 0.3278, "step": 17376 }, { "epoch": 0.7630547657990933, "grad_norm": 1.4921875, "learning_rate": 6.650304486624565e-06, "loss": 0.3151, "step": 17378 }, { "epoch": 0.7631425842782967, "grad_norm": 1.5546875, "learning_rate": 6.645606549499062e-06, "loss": 0.3277, "step": 17380 }, { "epoch": 0.7632304027575002, "grad_norm": 1.453125, "learning_rate": 6.640910017957419e-06, "loss": 0.296, "step": 17382 }, { "epoch": 0.7633182212367038, "grad_norm": 1.5703125, "learning_rate": 6.6362148923592854e-06, "loss": 0.3152, "step": 17384 }, { "epoch": 0.7634060397159073, "grad_norm": 1.4453125, "learning_rate": 6.6315211730642114e-06, "loss": 0.3105, "step": 17386 }, { "epoch": 0.7634938581951107, "grad_norm": 1.625, "learning_rate": 6.626828860431658e-06, "loss": 0.3076, "step": 17388 }, { "epoch": 0.7635816766743142, "grad_norm": 1.59375, "learning_rate": 6.622137954820945e-06, "loss": 0.3395, "step": 17390 }, { "epoch": 0.7636694951535177, "grad_norm": 1.5078125, "learning_rate": 6.617448456591321e-06, "loss": 0.3213, "step": 17392 }, { "epoch": 0.7637573136327211, "grad_norm": 1.59375, "learning_rate": 6.612760366101902e-06, "loss": 0.2909, "step": 17394 }, { "epoch": 0.7638451321119246, "grad_norm": 1.515625, "learning_rate": 6.608073683711699e-06, "loss": 0.3157, "step": 17396 }, { "epoch": 0.7639329505911281, "grad_norm": 1.5234375, "learning_rate": 6.603388409779626e-06, "loss": 0.3192, "step": 17398 }, { "epoch": 0.7640207690703317, "grad_norm": 1.640625, "learning_rate": 6.598704544664469e-06, "loss": 0.3447, "step": 17400 }, { "epoch": 0.7641085875495351, "grad_norm": 1.53125, "learning_rate": 6.594022088724935e-06, "loss": 0.3035, "step": 17402 }, { "epoch": 0.7641964060287386, "grad_norm": 1.4921875, "learning_rate": 6.589341042319597e-06, "loss": 0.3009, "step": 17404 }, { "epoch": 0.7642842245079421, "grad_norm": 1.515625, "learning_rate": 6.5846614058069415e-06, "loss": 0.296, "step": 17406 }, { "epoch": 0.7643720429871456, "grad_norm": 1.5390625, "learning_rate": 6.579983179545324e-06, "loss": 0.3131, "step": 17408 }, { "epoch": 0.764459861466349, "grad_norm": 1.578125, "learning_rate": 6.5753063638930165e-06, "loss": 0.3251, "step": 17410 }, { "epoch": 0.7645476799455525, "grad_norm": 1.765625, "learning_rate": 6.570630959208163e-06, "loss": 0.3128, "step": 17412 }, { "epoch": 0.764635498424756, "grad_norm": 1.5703125, "learning_rate": 6.5659569658488015e-06, "loss": 0.3027, "step": 17414 }, { "epoch": 0.7647233169039596, "grad_norm": 1.5546875, "learning_rate": 6.561284384172883e-06, "loss": 0.3251, "step": 17416 }, { "epoch": 0.764811135383163, "grad_norm": 1.453125, "learning_rate": 6.556613214538218e-06, "loss": 0.3083, "step": 17418 }, { "epoch": 0.7648989538623665, "grad_norm": 1.484375, "learning_rate": 6.551943457302543e-06, "loss": 0.3032, "step": 17420 }, { "epoch": 0.76498677234157, "grad_norm": 1.6015625, "learning_rate": 6.547275112823459e-06, "loss": 0.2952, "step": 17422 }, { "epoch": 0.7650745908207734, "grad_norm": 1.578125, "learning_rate": 6.542608181458471e-06, "loss": 0.3373, "step": 17424 }, { "epoch": 0.7651624092999769, "grad_norm": 1.46875, "learning_rate": 6.537942663564975e-06, "loss": 0.324, "step": 17426 }, { "epoch": 0.7652502277791804, "grad_norm": 1.625, "learning_rate": 6.5332785595002446e-06, "loss": 0.3154, "step": 17428 }, { "epoch": 0.765338046258384, "grad_norm": 1.578125, "learning_rate": 6.528615869621477e-06, "loss": 0.3149, "step": 17430 }, { "epoch": 0.7654258647375874, "grad_norm": 1.5234375, "learning_rate": 6.523954594285728e-06, "loss": 0.3135, "step": 17432 }, { "epoch": 0.7655136832167909, "grad_norm": 1.5078125, "learning_rate": 6.51929473384997e-06, "loss": 0.3317, "step": 17434 }, { "epoch": 0.7656015016959944, "grad_norm": 1.5, "learning_rate": 6.514636288671056e-06, "loss": 0.3129, "step": 17436 }, { "epoch": 0.7656893201751979, "grad_norm": 1.5234375, "learning_rate": 6.509979259105714e-06, "loss": 0.2847, "step": 17438 }, { "epoch": 0.7657771386544013, "grad_norm": 1.5859375, "learning_rate": 6.505323645510603e-06, "loss": 0.3047, "step": 17440 }, { "epoch": 0.7658649571336048, "grad_norm": 1.53125, "learning_rate": 6.500669448242233e-06, "loss": 0.3203, "step": 17442 }, { "epoch": 0.7659527756128083, "grad_norm": 1.5546875, "learning_rate": 6.496016667657037e-06, "loss": 0.2908, "step": 17444 }, { "epoch": 0.7660405940920119, "grad_norm": 1.5546875, "learning_rate": 6.491365304111322e-06, "loss": 0.3149, "step": 17446 }, { "epoch": 0.7661284125712153, "grad_norm": 1.53125, "learning_rate": 6.486715357961281e-06, "loss": 0.3405, "step": 17448 }, { "epoch": 0.7662162310504188, "grad_norm": 1.4375, "learning_rate": 6.4820668295630245e-06, "loss": 0.2915, "step": 17450 }, { "epoch": 0.7663040495296223, "grad_norm": 1.4453125, "learning_rate": 6.47741971927252e-06, "loss": 0.3329, "step": 17452 }, { "epoch": 0.7663918680088257, "grad_norm": 1.5546875, "learning_rate": 6.4727740274456605e-06, "loss": 0.3177, "step": 17454 }, { "epoch": 0.7664796864880292, "grad_norm": 1.4609375, "learning_rate": 6.46812975443821e-06, "loss": 0.3042, "step": 17456 }, { "epoch": 0.7665675049672327, "grad_norm": 1.5078125, "learning_rate": 6.463486900605822e-06, "loss": 0.3394, "step": 17458 }, { "epoch": 0.7666553234464362, "grad_norm": 1.3828125, "learning_rate": 6.458845466304053e-06, "loss": 0.2948, "step": 17460 }, { "epoch": 0.7667431419256397, "grad_norm": 1.5234375, "learning_rate": 6.454205451888335e-06, "loss": 0.3091, "step": 17462 }, { "epoch": 0.7668309604048432, "grad_norm": 1.5859375, "learning_rate": 6.449566857714015e-06, "loss": 0.3186, "step": 17464 }, { "epoch": 0.7669187788840467, "grad_norm": 1.625, "learning_rate": 6.444929684136306e-06, "loss": 0.3018, "step": 17466 }, { "epoch": 0.7670065973632502, "grad_norm": 1.484375, "learning_rate": 6.440293931510336e-06, "loss": 0.3279, "step": 17468 }, { "epoch": 0.7670944158424536, "grad_norm": 1.5, "learning_rate": 6.435659600191099e-06, "loss": 0.3197, "step": 17470 }, { "epoch": 0.7671822343216571, "grad_norm": 1.5078125, "learning_rate": 6.43102669053351e-06, "loss": 0.2848, "step": 17472 }, { "epoch": 0.7672700528008606, "grad_norm": 1.484375, "learning_rate": 6.4263952028923435e-06, "loss": 0.2845, "step": 17474 }, { "epoch": 0.767357871280064, "grad_norm": 1.546875, "learning_rate": 6.421765137622282e-06, "loss": 0.3126, "step": 17476 }, { "epoch": 0.7674456897592676, "grad_norm": 1.5859375, "learning_rate": 6.417136495077905e-06, "loss": 0.3325, "step": 17478 }, { "epoch": 0.7675335082384711, "grad_norm": 1.4765625, "learning_rate": 6.4125092756136625e-06, "loss": 0.3037, "step": 17480 }, { "epoch": 0.7676213267176746, "grad_norm": 1.546875, "learning_rate": 6.407883479583921e-06, "loss": 0.3043, "step": 17482 }, { "epoch": 0.767709145196878, "grad_norm": 1.5078125, "learning_rate": 6.403259107342921e-06, "loss": 0.34, "step": 17484 }, { "epoch": 0.7677969636760815, "grad_norm": 1.5703125, "learning_rate": 6.398636159244797e-06, "loss": 0.3392, "step": 17486 }, { "epoch": 0.767884782155285, "grad_norm": 1.5, "learning_rate": 6.394014635643575e-06, "loss": 0.3265, "step": 17488 }, { "epoch": 0.7679726006344885, "grad_norm": 1.5546875, "learning_rate": 6.389394536893165e-06, "loss": 0.3089, "step": 17490 }, { "epoch": 0.768060419113692, "grad_norm": 1.4921875, "learning_rate": 6.384775863347389e-06, "loss": 0.2844, "step": 17492 }, { "epoch": 0.7681482375928955, "grad_norm": 1.7265625, "learning_rate": 6.380158615359932e-06, "loss": 0.3237, "step": 17494 }, { "epoch": 0.768236056072099, "grad_norm": 1.4375, "learning_rate": 6.3755427932844005e-06, "loss": 0.2955, "step": 17496 }, { "epoch": 0.7683238745513025, "grad_norm": 1.40625, "learning_rate": 6.3709283974742654e-06, "loss": 0.3182, "step": 17498 }, { "epoch": 0.7684116930305059, "grad_norm": 1.4921875, "learning_rate": 6.366315428282893e-06, "loss": 0.3128, "step": 17500 }, { "epoch": 0.7684995115097094, "grad_norm": 1.4375, "learning_rate": 6.361703886063561e-06, "loss": 0.2936, "step": 17502 }, { "epoch": 0.7685873299889129, "grad_norm": 1.4921875, "learning_rate": 6.357093771169403e-06, "loss": 0.3087, "step": 17504 }, { "epoch": 0.7686751484681164, "grad_norm": 1.625, "learning_rate": 6.35248508395348e-06, "loss": 0.3173, "step": 17506 }, { "epoch": 0.7687629669473199, "grad_norm": 1.484375, "learning_rate": 6.347877824768722e-06, "loss": 0.307, "step": 17508 }, { "epoch": 0.7688507854265234, "grad_norm": 1.5703125, "learning_rate": 6.343271993967942e-06, "loss": 0.3284, "step": 17510 }, { "epoch": 0.7689386039057269, "grad_norm": 1.546875, "learning_rate": 6.338667591903874e-06, "loss": 0.3296, "step": 17512 }, { "epoch": 0.7690264223849304, "grad_norm": 1.4921875, "learning_rate": 6.334064618929106e-06, "loss": 0.3028, "step": 17514 }, { "epoch": 0.7691142408641338, "grad_norm": 1.609375, "learning_rate": 6.329463075396161e-06, "loss": 0.3115, "step": 17516 }, { "epoch": 0.7692020593433373, "grad_norm": 1.453125, "learning_rate": 6.324862961657393e-06, "loss": 0.3134, "step": 17518 }, { "epoch": 0.7692898778225408, "grad_norm": 1.5546875, "learning_rate": 6.320264278065103e-06, "loss": 0.3194, "step": 17520 }, { "epoch": 0.7693776963017442, "grad_norm": 1.5546875, "learning_rate": 6.315667024971453e-06, "loss": 0.2776, "step": 17522 }, { "epoch": 0.7694655147809478, "grad_norm": 1.421875, "learning_rate": 6.311071202728494e-06, "loss": 0.3067, "step": 17524 }, { "epoch": 0.7695533332601513, "grad_norm": 1.5, "learning_rate": 6.306476811688189e-06, "loss": 0.3116, "step": 17526 }, { "epoch": 0.7696411517393548, "grad_norm": 1.546875, "learning_rate": 6.301883852202365e-06, "loss": 0.2875, "step": 17528 }, { "epoch": 0.7697289702185582, "grad_norm": 1.5, "learning_rate": 6.2972923246227635e-06, "loss": 0.297, "step": 17530 }, { "epoch": 0.7698167886977617, "grad_norm": 1.546875, "learning_rate": 6.292702229301001e-06, "loss": 0.2835, "step": 17532 }, { "epoch": 0.7699046071769652, "grad_norm": 1.546875, "learning_rate": 6.288113566588577e-06, "loss": 0.2944, "step": 17534 }, { "epoch": 0.7699924256561687, "grad_norm": 1.453125, "learning_rate": 6.283526336836912e-06, "loss": 0.3091, "step": 17536 }, { "epoch": 0.7700802441353722, "grad_norm": 1.53125, "learning_rate": 6.2789405403972765e-06, "loss": 0.3228, "step": 17538 }, { "epoch": 0.7701680626145757, "grad_norm": 1.5, "learning_rate": 6.274356177620871e-06, "loss": 0.3132, "step": 17540 }, { "epoch": 0.7702558810937792, "grad_norm": 1.5546875, "learning_rate": 6.269773248858748e-06, "loss": 0.3091, "step": 17542 }, { "epoch": 0.7703436995729827, "grad_norm": 1.5859375, "learning_rate": 6.265191754461891e-06, "loss": 0.3336, "step": 17544 }, { "epoch": 0.7704315180521861, "grad_norm": 1.4375, "learning_rate": 6.260611694781138e-06, "loss": 0.2966, "step": 17546 }, { "epoch": 0.7705193365313896, "grad_norm": 1.5390625, "learning_rate": 6.256033070167236e-06, "loss": 0.2936, "step": 17548 }, { "epoch": 0.7706071550105931, "grad_norm": 1.5703125, "learning_rate": 6.251455880970811e-06, "loss": 0.3251, "step": 17550 }, { "epoch": 0.7706949734897965, "grad_norm": 1.5078125, "learning_rate": 6.246880127542385e-06, "loss": 0.3251, "step": 17552 }, { "epoch": 0.7707827919690001, "grad_norm": 1.5078125, "learning_rate": 6.242305810232379e-06, "loss": 0.3179, "step": 17554 }, { "epoch": 0.7708706104482036, "grad_norm": 1.53125, "learning_rate": 6.237732929391085e-06, "loss": 0.3123, "step": 17556 }, { "epoch": 0.7709584289274071, "grad_norm": 1.5390625, "learning_rate": 6.233161485368707e-06, "loss": 0.3289, "step": 17558 }, { "epoch": 0.7710462474066105, "grad_norm": 1.46875, "learning_rate": 6.228591478515322e-06, "loss": 0.308, "step": 17560 }, { "epoch": 0.771134065885814, "grad_norm": 1.4296875, "learning_rate": 6.224022909180893e-06, "loss": 0.3368, "step": 17562 }, { "epoch": 0.7712218843650175, "grad_norm": 1.5, "learning_rate": 6.219455777715299e-06, "loss": 0.3168, "step": 17564 }, { "epoch": 0.771309702844221, "grad_norm": 1.5390625, "learning_rate": 6.2148900844682775e-06, "loss": 0.3278, "step": 17566 }, { "epoch": 0.7713975213234244, "grad_norm": 1.4765625, "learning_rate": 6.210325829789481e-06, "loss": 0.3131, "step": 17568 }, { "epoch": 0.771485339802628, "grad_norm": 1.5625, "learning_rate": 6.205763014028437e-06, "loss": 0.2855, "step": 17570 }, { "epoch": 0.7715731582818315, "grad_norm": 1.5625, "learning_rate": 6.201201637534562e-06, "loss": 0.3164, "step": 17572 }, { "epoch": 0.771660976761035, "grad_norm": 1.6484375, "learning_rate": 6.196641700657177e-06, "loss": 0.3106, "step": 17574 }, { "epoch": 0.7717487952402384, "grad_norm": 1.4609375, "learning_rate": 6.192083203745472e-06, "loss": 0.2999, "step": 17576 }, { "epoch": 0.7718366137194419, "grad_norm": 1.515625, "learning_rate": 6.187526147148557e-06, "loss": 0.3328, "step": 17578 }, { "epoch": 0.7719244321986454, "grad_norm": 1.5078125, "learning_rate": 6.182970531215384e-06, "loss": 0.3015, "step": 17580 }, { "epoch": 0.7720122506778488, "grad_norm": 1.4609375, "learning_rate": 6.1784163562948476e-06, "loss": 0.3401, "step": 17582 }, { "epoch": 0.7721000691570524, "grad_norm": 1.515625, "learning_rate": 6.173863622735698e-06, "loss": 0.3226, "step": 17584 }, { "epoch": 0.7721878876362559, "grad_norm": 1.4296875, "learning_rate": 6.169312330886578e-06, "loss": 0.3179, "step": 17586 }, { "epoch": 0.7722757061154594, "grad_norm": 1.53125, "learning_rate": 6.164762481096042e-06, "loss": 0.3081, "step": 17588 }, { "epoch": 0.7723635245946628, "grad_norm": 1.484375, "learning_rate": 6.1602140737125e-06, "loss": 0.2958, "step": 17590 }, { "epoch": 0.7724513430738663, "grad_norm": 1.5234375, "learning_rate": 6.15566710908429e-06, "loss": 0.3202, "step": 17592 }, { "epoch": 0.7725391615530698, "grad_norm": 1.5625, "learning_rate": 6.151121587559611e-06, "loss": 0.2966, "step": 17594 }, { "epoch": 0.7726269800322733, "grad_norm": 1.4609375, "learning_rate": 6.146577509486551e-06, "loss": 0.3459, "step": 17596 }, { "epoch": 0.7727147985114767, "grad_norm": 1.5, "learning_rate": 6.1420348752131095e-06, "loss": 0.3385, "step": 17598 }, { "epoch": 0.7728026169906803, "grad_norm": 1.46875, "learning_rate": 6.137493685087154e-06, "loss": 0.2862, "step": 17600 }, { "epoch": 0.7728904354698838, "grad_norm": 1.4140625, "learning_rate": 6.1329539394564596e-06, "loss": 0.2925, "step": 17602 }, { "epoch": 0.7729782539490873, "grad_norm": 1.65625, "learning_rate": 6.128415638668669e-06, "loss": 0.2776, "step": 17604 }, { "epoch": 0.7730660724282907, "grad_norm": 1.578125, "learning_rate": 6.123878783071338e-06, "loss": 0.3116, "step": 17606 }, { "epoch": 0.7731538909074942, "grad_norm": 1.5625, "learning_rate": 6.119343373011896e-06, "loss": 0.3309, "step": 17608 }, { "epoch": 0.7732417093866977, "grad_norm": 1.484375, "learning_rate": 6.114809408837665e-06, "loss": 0.3367, "step": 17610 }, { "epoch": 0.7733295278659011, "grad_norm": 1.5234375, "learning_rate": 6.1102768908958555e-06, "loss": 0.326, "step": 17612 }, { "epoch": 0.7734173463451046, "grad_norm": 1.5, "learning_rate": 6.105745819533562e-06, "loss": 0.3058, "step": 17614 }, { "epoch": 0.7735051648243082, "grad_norm": 1.3984375, "learning_rate": 6.101216195097792e-06, "loss": 0.3065, "step": 17616 }, { "epoch": 0.7735929833035117, "grad_norm": 1.609375, "learning_rate": 6.096688017935406e-06, "loss": 0.3105, "step": 17618 }, { "epoch": 0.7736808017827151, "grad_norm": 1.625, "learning_rate": 6.0921612883931944e-06, "loss": 0.3188, "step": 17620 }, { "epoch": 0.7737686202619186, "grad_norm": 1.59375, "learning_rate": 6.087636006817801e-06, "loss": 0.3217, "step": 17622 }, { "epoch": 0.7738564387411221, "grad_norm": 1.5078125, "learning_rate": 6.083112173555769e-06, "loss": 0.3665, "step": 17624 }, { "epoch": 0.7739442572203256, "grad_norm": 1.5859375, "learning_rate": 6.0785897889535485e-06, "loss": 0.2938, "step": 17626 }, { "epoch": 0.774032075699529, "grad_norm": 1.546875, "learning_rate": 6.074068853357451e-06, "loss": 0.3167, "step": 17628 }, { "epoch": 0.7741198941787326, "grad_norm": 1.6171875, "learning_rate": 6.069549367113706e-06, "loss": 0.3021, "step": 17630 }, { "epoch": 0.7742077126579361, "grad_norm": 1.4296875, "learning_rate": 6.065031330568408e-06, "loss": 0.2957, "step": 17632 }, { "epoch": 0.7742955311371396, "grad_norm": 1.5078125, "learning_rate": 6.0605147440675415e-06, "loss": 0.3229, "step": 17634 }, { "epoch": 0.774383349616343, "grad_norm": 1.5234375, "learning_rate": 6.0559996079570025e-06, "loss": 0.2996, "step": 17636 }, { "epoch": 0.7744711680955465, "grad_norm": 1.4765625, "learning_rate": 6.051485922582548e-06, "loss": 0.315, "step": 17638 }, { "epoch": 0.77455898657475, "grad_norm": 1.5, "learning_rate": 6.046973688289859e-06, "loss": 0.3164, "step": 17640 }, { "epoch": 0.7746468050539534, "grad_norm": 1.546875, "learning_rate": 6.042462905424454e-06, "loss": 0.3116, "step": 17642 }, { "epoch": 0.7747346235331569, "grad_norm": 1.5234375, "learning_rate": 6.0379535743317896e-06, "loss": 0.3056, "step": 17644 }, { "epoch": 0.7748224420123605, "grad_norm": 1.625, "learning_rate": 6.033445695357187e-06, "loss": 0.2799, "step": 17646 }, { "epoch": 0.774910260491564, "grad_norm": 1.5390625, "learning_rate": 6.0289392688458544e-06, "loss": 0.3192, "step": 17648 }, { "epoch": 0.7749980789707674, "grad_norm": 1.5625, "learning_rate": 6.024434295142905e-06, "loss": 0.3216, "step": 17650 }, { "epoch": 0.7750858974499709, "grad_norm": 1.4296875, "learning_rate": 6.019930774593318e-06, "loss": 0.3274, "step": 17652 }, { "epoch": 0.7751737159291744, "grad_norm": 1.5234375, "learning_rate": 6.015428707541993e-06, "loss": 0.325, "step": 17654 }, { "epoch": 0.7752615344083779, "grad_norm": 1.5625, "learning_rate": 6.010928094333684e-06, "loss": 0.336, "step": 17656 }, { "epoch": 0.7753493528875813, "grad_norm": 1.6875, "learning_rate": 6.006428935313049e-06, "loss": 0.3216, "step": 17658 }, { "epoch": 0.7754371713667848, "grad_norm": 1.5078125, "learning_rate": 6.001931230824648e-06, "loss": 0.2961, "step": 17660 }, { "epoch": 0.7755249898459884, "grad_norm": 1.5078125, "learning_rate": 5.997434981212896e-06, "loss": 0.2915, "step": 17662 }, { "epoch": 0.7756128083251919, "grad_norm": 1.546875, "learning_rate": 5.992940186822138e-06, "loss": 0.2998, "step": 17664 }, { "epoch": 0.7757006268043953, "grad_norm": 1.5234375, "learning_rate": 5.988446847996579e-06, "loss": 0.3264, "step": 17666 }, { "epoch": 0.7757884452835988, "grad_norm": 1.484375, "learning_rate": 5.983954965080307e-06, "loss": 0.3098, "step": 17668 }, { "epoch": 0.7758762637628023, "grad_norm": 1.4453125, "learning_rate": 5.9794645384173314e-06, "loss": 0.2944, "step": 17670 }, { "epoch": 0.7759640822420057, "grad_norm": 1.6171875, "learning_rate": 5.974975568351521e-06, "loss": 0.3112, "step": 17672 }, { "epoch": 0.7760519007212092, "grad_norm": 1.46875, "learning_rate": 5.970488055226642e-06, "loss": 0.3, "step": 17674 }, { "epoch": 0.7761397192004127, "grad_norm": 1.6796875, "learning_rate": 5.966001999386339e-06, "loss": 0.3396, "step": 17676 }, { "epoch": 0.7762275376796163, "grad_norm": 1.46875, "learning_rate": 5.9615174011741774e-06, "loss": 0.3165, "step": 17678 }, { "epoch": 0.7763153561588197, "grad_norm": 1.671875, "learning_rate": 5.957034260933567e-06, "loss": 0.3076, "step": 17680 }, { "epoch": 0.7764031746380232, "grad_norm": 1.65625, "learning_rate": 5.952552579007847e-06, "loss": 0.2645, "step": 17682 }, { "epoch": 0.7764909931172267, "grad_norm": 1.5859375, "learning_rate": 5.948072355740214e-06, "loss": 0.3043, "step": 17684 }, { "epoch": 0.7765788115964302, "grad_norm": 1.6328125, "learning_rate": 5.943593591473762e-06, "loss": 0.295, "step": 17686 }, { "epoch": 0.7766666300756336, "grad_norm": 1.4296875, "learning_rate": 5.939116286551488e-06, "loss": 0.3101, "step": 17688 }, { "epoch": 0.7767544485548371, "grad_norm": 1.53125, "learning_rate": 5.9346404413162494e-06, "loss": 0.3455, "step": 17690 }, { "epoch": 0.7768422670340407, "grad_norm": 1.4609375, "learning_rate": 5.930166056110825e-06, "loss": 0.3092, "step": 17692 }, { "epoch": 0.7769300855132442, "grad_norm": 1.4921875, "learning_rate": 5.925693131277854e-06, "loss": 0.328, "step": 17694 }, { "epoch": 0.7770179039924476, "grad_norm": 1.5078125, "learning_rate": 5.921221667159868e-06, "loss": 0.3365, "step": 17696 }, { "epoch": 0.7771057224716511, "grad_norm": 1.4296875, "learning_rate": 5.91675166409931e-06, "loss": 0.2962, "step": 17698 }, { "epoch": 0.7771935409508546, "grad_norm": 1.5078125, "learning_rate": 5.912283122438481e-06, "loss": 0.3077, "step": 17700 }, { "epoch": 0.777281359430058, "grad_norm": 1.5234375, "learning_rate": 5.907816042519587e-06, "loss": 0.3033, "step": 17702 }, { "epoch": 0.7773691779092615, "grad_norm": 1.5, "learning_rate": 5.903350424684712e-06, "loss": 0.3449, "step": 17704 }, { "epoch": 0.777456996388465, "grad_norm": 1.7578125, "learning_rate": 5.898886269275844e-06, "loss": 0.2861, "step": 17706 }, { "epoch": 0.7775448148676686, "grad_norm": 1.6875, "learning_rate": 5.894423576634847e-06, "loss": 0.3205, "step": 17708 }, { "epoch": 0.777632633346872, "grad_norm": 1.46875, "learning_rate": 5.889962347103461e-06, "loss": 0.3092, "step": 17710 }, { "epoch": 0.7777204518260755, "grad_norm": 1.4453125, "learning_rate": 5.88550258102335e-06, "loss": 0.3223, "step": 17712 }, { "epoch": 0.777808270305279, "grad_norm": 1.7265625, "learning_rate": 5.881044278736025e-06, "loss": 0.3207, "step": 17714 }, { "epoch": 0.7778960887844825, "grad_norm": 1.5859375, "learning_rate": 5.87658744058292e-06, "loss": 0.3131, "step": 17716 }, { "epoch": 0.7779839072636859, "grad_norm": 1.5, "learning_rate": 5.8721320669053335e-06, "loss": 0.3225, "step": 17718 }, { "epoch": 0.7780717257428894, "grad_norm": 1.515625, "learning_rate": 5.867678158044451e-06, "loss": 0.3102, "step": 17720 }, { "epoch": 0.7781595442220929, "grad_norm": 1.5, "learning_rate": 5.863225714341367e-06, "loss": 0.3148, "step": 17722 }, { "epoch": 0.7782473627012965, "grad_norm": 1.484375, "learning_rate": 5.85877473613704e-06, "loss": 0.324, "step": 17724 }, { "epoch": 0.7783351811804999, "grad_norm": 1.484375, "learning_rate": 5.854325223772339e-06, "loss": 0.3006, "step": 17726 }, { "epoch": 0.7784229996597034, "grad_norm": 1.5234375, "learning_rate": 5.849877177588e-06, "loss": 0.3068, "step": 17728 }, { "epoch": 0.7785108181389069, "grad_norm": 1.515625, "learning_rate": 5.845430597924653e-06, "loss": 0.3072, "step": 17730 }, { "epoch": 0.7785986366181104, "grad_norm": 1.5546875, "learning_rate": 5.840985485122829e-06, "loss": 0.3408, "step": 17732 }, { "epoch": 0.7786864550973138, "grad_norm": 1.4609375, "learning_rate": 5.836541839522927e-06, "loss": 0.3022, "step": 17734 }, { "epoch": 0.7787742735765173, "grad_norm": 1.578125, "learning_rate": 5.832099661465248e-06, "loss": 0.3409, "step": 17736 }, { "epoch": 0.7788620920557209, "grad_norm": 1.5234375, "learning_rate": 5.827658951289963e-06, "loss": 0.3165, "step": 17738 }, { "epoch": 0.7789499105349244, "grad_norm": 1.515625, "learning_rate": 5.823219709337158e-06, "loss": 0.3617, "step": 17740 }, { "epoch": 0.7790377290141278, "grad_norm": 1.453125, "learning_rate": 5.818781935946779e-06, "loss": 0.2971, "step": 17742 }, { "epoch": 0.7791255474933313, "grad_norm": 1.5078125, "learning_rate": 5.814345631458684e-06, "loss": 0.3356, "step": 17744 }, { "epoch": 0.7792133659725348, "grad_norm": 1.5625, "learning_rate": 5.8099107962125975e-06, "loss": 0.3173, "step": 17746 }, { "epoch": 0.7793011844517382, "grad_norm": 1.4609375, "learning_rate": 5.80547743054814e-06, "loss": 0.3085, "step": 17748 }, { "epoch": 0.7793890029309417, "grad_norm": 1.4765625, "learning_rate": 5.801045534804825e-06, "loss": 0.3229, "step": 17750 }, { "epoch": 0.7794768214101452, "grad_norm": 1.484375, "learning_rate": 5.7966151093220396e-06, "loss": 0.318, "step": 17752 }, { "epoch": 0.7795646398893488, "grad_norm": 1.4921875, "learning_rate": 5.7921861544390805e-06, "loss": 0.3038, "step": 17754 }, { "epoch": 0.7796524583685522, "grad_norm": 1.546875, "learning_rate": 5.787758670495108e-06, "loss": 0.334, "step": 17756 }, { "epoch": 0.7797402768477557, "grad_norm": 1.484375, "learning_rate": 5.783332657829177e-06, "loss": 0.2941, "step": 17758 }, { "epoch": 0.7798280953269592, "grad_norm": 1.484375, "learning_rate": 5.778908116780244e-06, "loss": 0.3116, "step": 17760 }, { "epoch": 0.7799159138061627, "grad_norm": 1.59375, "learning_rate": 5.7744850476871335e-06, "loss": 0.3276, "step": 17762 }, { "epoch": 0.7800037322853661, "grad_norm": 1.5546875, "learning_rate": 5.770063450888569e-06, "loss": 0.3172, "step": 17764 }, { "epoch": 0.7800915507645696, "grad_norm": 1.3984375, "learning_rate": 5.765643326723147e-06, "loss": 0.3122, "step": 17766 }, { "epoch": 0.7801793692437731, "grad_norm": 1.6015625, "learning_rate": 5.761224675529375e-06, "loss": 0.3361, "step": 17768 }, { "epoch": 0.7802671877229767, "grad_norm": 1.4765625, "learning_rate": 5.756807497645633e-06, "loss": 0.3319, "step": 17770 }, { "epoch": 0.7803550062021801, "grad_norm": 1.4375, "learning_rate": 5.752391793410175e-06, "loss": 0.3019, "step": 17772 }, { "epoch": 0.7804428246813836, "grad_norm": 1.453125, "learning_rate": 5.7479775631611775e-06, "loss": 0.3042, "step": 17774 }, { "epoch": 0.7805306431605871, "grad_norm": 1.4375, "learning_rate": 5.743564807236665e-06, "loss": 0.2863, "step": 17776 }, { "epoch": 0.7806184616397905, "grad_norm": 1.578125, "learning_rate": 5.739153525974583e-06, "loss": 0.3002, "step": 17778 }, { "epoch": 0.780706280118994, "grad_norm": 1.421875, "learning_rate": 5.73474371971274e-06, "loss": 0.3111, "step": 17780 }, { "epoch": 0.7807940985981975, "grad_norm": 1.5234375, "learning_rate": 5.730335388788835e-06, "loss": 0.318, "step": 17782 }, { "epoch": 0.7808819170774011, "grad_norm": 1.453125, "learning_rate": 5.725928533540473e-06, "loss": 0.3323, "step": 17784 }, { "epoch": 0.7809697355566045, "grad_norm": 1.5390625, "learning_rate": 5.721523154305117e-06, "loss": 0.3277, "step": 17786 }, { "epoch": 0.781057554035808, "grad_norm": 1.4765625, "learning_rate": 5.717119251420145e-06, "loss": 0.3311, "step": 17788 }, { "epoch": 0.7811453725150115, "grad_norm": 1.4375, "learning_rate": 5.712716825222803e-06, "loss": 0.319, "step": 17790 }, { "epoch": 0.781233190994215, "grad_norm": 1.65625, "learning_rate": 5.7083158760502295e-06, "loss": 0.3544, "step": 17792 }, { "epoch": 0.7813210094734184, "grad_norm": 1.5, "learning_rate": 5.703916404239454e-06, "loss": 0.31, "step": 17794 }, { "epoch": 0.7814088279526219, "grad_norm": 1.5078125, "learning_rate": 5.699518410127375e-06, "loss": 0.3007, "step": 17796 }, { "epoch": 0.7814966464318254, "grad_norm": 1.515625, "learning_rate": 5.695121894050812e-06, "loss": 0.3031, "step": 17798 }, { "epoch": 0.781584464911029, "grad_norm": 1.5078125, "learning_rate": 5.690726856346434e-06, "loss": 0.3103, "step": 17800 }, { "epoch": 0.7816722833902324, "grad_norm": 1.40625, "learning_rate": 5.686333297350832e-06, "loss": 0.3037, "step": 17802 }, { "epoch": 0.7817601018694359, "grad_norm": 1.484375, "learning_rate": 5.681941217400446e-06, "loss": 0.3205, "step": 17804 }, { "epoch": 0.7818479203486394, "grad_norm": 1.5, "learning_rate": 5.677550616831639e-06, "loss": 0.2974, "step": 17806 }, { "epoch": 0.7819357388278428, "grad_norm": 1.5390625, "learning_rate": 5.673161495980639e-06, "loss": 0.3513, "step": 17808 }, { "epoch": 0.7820235573070463, "grad_norm": 1.4921875, "learning_rate": 5.668773855183557e-06, "loss": 0.3199, "step": 17810 }, { "epoch": 0.7821113757862498, "grad_norm": 1.5234375, "learning_rate": 5.664387694776416e-06, "loss": 0.327, "step": 17812 }, { "epoch": 0.7821991942654533, "grad_norm": 1.4765625, "learning_rate": 5.660003015095092e-06, "loss": 0.3015, "step": 17814 }, { "epoch": 0.7822870127446568, "grad_norm": 1.59375, "learning_rate": 5.65561981647538e-06, "loss": 0.2809, "step": 17816 }, { "epoch": 0.7823748312238603, "grad_norm": 1.5546875, "learning_rate": 5.65123809925294e-06, "loss": 0.3464, "step": 17818 }, { "epoch": 0.7824626497030638, "grad_norm": 1.5390625, "learning_rate": 5.646857863763317e-06, "loss": 0.335, "step": 17820 }, { "epoch": 0.7825504681822673, "grad_norm": 1.4921875, "learning_rate": 5.642479110341964e-06, "loss": 0.3084, "step": 17822 }, { "epoch": 0.7826382866614707, "grad_norm": 1.6015625, "learning_rate": 5.638101839324203e-06, "loss": 0.3195, "step": 17824 }, { "epoch": 0.7827261051406742, "grad_norm": 1.5078125, "learning_rate": 5.633726051045243e-06, "loss": 0.3401, "step": 17826 }, { "epoch": 0.7828139236198777, "grad_norm": 1.5546875, "learning_rate": 5.629351745840181e-06, "loss": 0.3235, "step": 17828 }, { "epoch": 0.7829017420990811, "grad_norm": 1.515625, "learning_rate": 5.624978924044008e-06, "loss": 0.3002, "step": 17830 }, { "epoch": 0.7829895605782847, "grad_norm": 1.46875, "learning_rate": 5.620607585991597e-06, "loss": 0.288, "step": 17832 }, { "epoch": 0.7830773790574882, "grad_norm": 1.453125, "learning_rate": 5.616237732017693e-06, "loss": 0.3017, "step": 17834 }, { "epoch": 0.7831651975366917, "grad_norm": 1.46875, "learning_rate": 5.611869362456959e-06, "loss": 0.3192, "step": 17836 }, { "epoch": 0.7832530160158951, "grad_norm": 1.4609375, "learning_rate": 5.607502477643908e-06, "loss": 0.2933, "step": 17838 }, { "epoch": 0.7833408344950986, "grad_norm": 1.4296875, "learning_rate": 5.603137077912976e-06, "loss": 0.3554, "step": 17840 }, { "epoch": 0.7834286529743021, "grad_norm": 1.53125, "learning_rate": 5.598773163598456e-06, "loss": 0.2942, "step": 17842 }, { "epoch": 0.7835164714535056, "grad_norm": 1.4765625, "learning_rate": 5.5944107350345274e-06, "loss": 0.3343, "step": 17844 }, { "epoch": 0.7836042899327091, "grad_norm": 1.4609375, "learning_rate": 5.590049792555285e-06, "loss": 0.3123, "step": 17846 }, { "epoch": 0.7836921084119126, "grad_norm": 1.4453125, "learning_rate": 5.5856903364946785e-06, "loss": 0.3106, "step": 17848 }, { "epoch": 0.7837799268911161, "grad_norm": 1.5234375, "learning_rate": 5.581332367186562e-06, "loss": 0.3005, "step": 17850 }, { "epoch": 0.7838677453703196, "grad_norm": 1.546875, "learning_rate": 5.576975884964672e-06, "loss": 0.3089, "step": 17852 }, { "epoch": 0.783955563849523, "grad_norm": 1.7734375, "learning_rate": 5.572620890162622e-06, "loss": 0.3218, "step": 17854 }, { "epoch": 0.7840433823287265, "grad_norm": 1.4609375, "learning_rate": 5.568267383113923e-06, "loss": 0.3051, "step": 17856 }, { "epoch": 0.78413120080793, "grad_norm": 1.546875, "learning_rate": 5.563915364151959e-06, "loss": 0.2974, "step": 17858 }, { "epoch": 0.7842190192871334, "grad_norm": 1.5546875, "learning_rate": 5.55956483361002e-06, "loss": 0.3184, "step": 17860 }, { "epoch": 0.784306837766337, "grad_norm": 1.484375, "learning_rate": 5.555215791821261e-06, "loss": 0.2942, "step": 17862 }, { "epoch": 0.7843946562455405, "grad_norm": 1.578125, "learning_rate": 5.550868239118745e-06, "loss": 0.3245, "step": 17864 }, { "epoch": 0.784482474724744, "grad_norm": 1.5390625, "learning_rate": 5.5465221758353945e-06, "loss": 0.2906, "step": 17866 }, { "epoch": 0.7845702932039474, "grad_norm": 1.53125, "learning_rate": 5.542177602304047e-06, "loss": 0.3384, "step": 17868 }, { "epoch": 0.7846581116831509, "grad_norm": 1.4140625, "learning_rate": 5.537834518857401e-06, "loss": 0.3252, "step": 17870 }, { "epoch": 0.7847459301623544, "grad_norm": 1.5, "learning_rate": 5.5334929258280485e-06, "loss": 0.3389, "step": 17872 }, { "epoch": 0.7848337486415579, "grad_norm": 1.4453125, "learning_rate": 5.5291528235484776e-06, "loss": 0.3351, "step": 17874 }, { "epoch": 0.7849215671207613, "grad_norm": 1.4453125, "learning_rate": 5.524814212351048e-06, "loss": 0.3114, "step": 17876 }, { "epoch": 0.7850093855999649, "grad_norm": 1.4296875, "learning_rate": 5.520477092568019e-06, "loss": 0.3462, "step": 17878 }, { "epoch": 0.7850972040791684, "grad_norm": 1.4765625, "learning_rate": 5.516141464531524e-06, "loss": 0.3045, "step": 17880 }, { "epoch": 0.7851850225583719, "grad_norm": 1.5625, "learning_rate": 5.511807328573579e-06, "loss": 0.3109, "step": 17882 }, { "epoch": 0.7852728410375753, "grad_norm": 1.4921875, "learning_rate": 5.507474685026115e-06, "loss": 0.3091, "step": 17884 }, { "epoch": 0.7853606595167788, "grad_norm": 1.484375, "learning_rate": 5.503143534220901e-06, "loss": 0.3276, "step": 17886 }, { "epoch": 0.7854484779959823, "grad_norm": 1.421875, "learning_rate": 5.4988138764896305e-06, "loss": 0.3302, "step": 17888 }, { "epoch": 0.7855362964751857, "grad_norm": 1.5390625, "learning_rate": 5.494485712163866e-06, "loss": 0.3134, "step": 17890 }, { "epoch": 0.7856241149543893, "grad_norm": 1.4375, "learning_rate": 5.490159041575066e-06, "loss": 0.3505, "step": 17892 }, { "epoch": 0.7857119334335928, "grad_norm": 1.546875, "learning_rate": 5.485833865054563e-06, "loss": 0.3337, "step": 17894 }, { "epoch": 0.7857997519127963, "grad_norm": 1.46875, "learning_rate": 5.481510182933575e-06, "loss": 0.2966, "step": 17896 }, { "epoch": 0.7858875703919997, "grad_norm": 1.5625, "learning_rate": 5.4771879955432205e-06, "loss": 0.316, "step": 17898 }, { "epoch": 0.7859753888712032, "grad_norm": 1.5078125, "learning_rate": 5.472867303214485e-06, "loss": 0.3278, "step": 17900 }, { "epoch": 0.7860632073504067, "grad_norm": 1.4140625, "learning_rate": 5.46854810627826e-06, "loss": 0.2878, "step": 17902 }, { "epoch": 0.7861510258296102, "grad_norm": 1.484375, "learning_rate": 5.464230405065301e-06, "loss": 0.3051, "step": 17904 }, { "epoch": 0.7862388443088136, "grad_norm": 1.46875, "learning_rate": 5.459914199906252e-06, "loss": 0.3092, "step": 17906 }, { "epoch": 0.7863266627880172, "grad_norm": 1.5546875, "learning_rate": 5.455599491131669e-06, "loss": 0.3125, "step": 17908 }, { "epoch": 0.7864144812672207, "grad_norm": 1.4921875, "learning_rate": 5.451286279071952e-06, "loss": 0.3291, "step": 17910 }, { "epoch": 0.7865022997464242, "grad_norm": 1.5625, "learning_rate": 5.446974564057425e-06, "loss": 0.3274, "step": 17912 }, { "epoch": 0.7865901182256276, "grad_norm": 1.578125, "learning_rate": 5.442664346418275e-06, "loss": 0.3265, "step": 17914 }, { "epoch": 0.7866779367048311, "grad_norm": 1.453125, "learning_rate": 5.438355626484576e-06, "loss": 0.3356, "step": 17916 }, { "epoch": 0.7867657551840346, "grad_norm": 1.5, "learning_rate": 5.434048404586292e-06, "loss": 0.3155, "step": 17918 }, { "epoch": 0.786853573663238, "grad_norm": 1.578125, "learning_rate": 5.429742681053266e-06, "loss": 0.3409, "step": 17920 }, { "epoch": 0.7869413921424415, "grad_norm": 1.4296875, "learning_rate": 5.4254384562152424e-06, "loss": 0.3067, "step": 17922 }, { "epoch": 0.7870292106216451, "grad_norm": 1.4921875, "learning_rate": 5.421135730401828e-06, "loss": 0.3168, "step": 17924 }, { "epoch": 0.7871170291008486, "grad_norm": 1.5625, "learning_rate": 5.416834503942539e-06, "loss": 0.3115, "step": 17926 }, { "epoch": 0.787204847580052, "grad_norm": 1.609375, "learning_rate": 5.4125347771667585e-06, "loss": 0.3487, "step": 17928 }, { "epoch": 0.7872926660592555, "grad_norm": 1.546875, "learning_rate": 5.408236550403753e-06, "loss": 0.3071, "step": 17930 }, { "epoch": 0.787380484538459, "grad_norm": 1.4453125, "learning_rate": 5.403939823982698e-06, "loss": 0.3221, "step": 17932 }, { "epoch": 0.7874683030176625, "grad_norm": 1.5390625, "learning_rate": 5.39964459823262e-06, "loss": 0.299, "step": 17934 }, { "epoch": 0.7875561214968659, "grad_norm": 1.421875, "learning_rate": 5.395350873482463e-06, "loss": 0.2888, "step": 17936 }, { "epoch": 0.7876439399760695, "grad_norm": 1.484375, "learning_rate": 5.391058650061032e-06, "loss": 0.3037, "step": 17938 }, { "epoch": 0.787731758455273, "grad_norm": 1.484375, "learning_rate": 5.3867679282970345e-06, "loss": 0.3282, "step": 17940 }, { "epoch": 0.7878195769344765, "grad_norm": 1.46875, "learning_rate": 5.382478708519051e-06, "loss": 0.3471, "step": 17942 }, { "epoch": 0.7879073954136799, "grad_norm": 1.4609375, "learning_rate": 5.378190991055543e-06, "loss": 0.292, "step": 17944 }, { "epoch": 0.7879952138928834, "grad_norm": 1.625, "learning_rate": 5.373904776234886e-06, "loss": 0.3057, "step": 17946 }, { "epoch": 0.7880830323720869, "grad_norm": 1.5078125, "learning_rate": 5.369620064385294e-06, "loss": 0.3136, "step": 17948 }, { "epoch": 0.7881708508512903, "grad_norm": 1.59375, "learning_rate": 5.36533685583491e-06, "loss": 0.3183, "step": 17950 }, { "epoch": 0.7882586693304938, "grad_norm": 1.5625, "learning_rate": 5.361055150911729e-06, "loss": 0.3312, "step": 17952 }, { "epoch": 0.7883464878096974, "grad_norm": 1.4765625, "learning_rate": 5.3567749499436605e-06, "loss": 0.3276, "step": 17954 }, { "epoch": 0.7884343062889009, "grad_norm": 1.5390625, "learning_rate": 5.352496253258474e-06, "loss": 0.3156, "step": 17956 }, { "epoch": 0.7885221247681043, "grad_norm": 1.4921875, "learning_rate": 5.348219061183826e-06, "loss": 0.3053, "step": 17958 }, { "epoch": 0.7886099432473078, "grad_norm": 1.515625, "learning_rate": 5.34394337404728e-06, "loss": 0.3108, "step": 17960 }, { "epoch": 0.7886977617265113, "grad_norm": 1.484375, "learning_rate": 5.339669192176258e-06, "loss": 0.3091, "step": 17962 }, { "epoch": 0.7887855802057148, "grad_norm": 1.4453125, "learning_rate": 5.335396515898086e-06, "loss": 0.2981, "step": 17964 }, { "epoch": 0.7888733986849182, "grad_norm": 1.5078125, "learning_rate": 5.331125345539967e-06, "loss": 0.344, "step": 17966 }, { "epoch": 0.7889612171641217, "grad_norm": 1.46875, "learning_rate": 5.326855681428974e-06, "loss": 0.3075, "step": 17968 }, { "epoch": 0.7890490356433253, "grad_norm": 1.4921875, "learning_rate": 5.3225875238920945e-06, "loss": 0.311, "step": 17970 }, { "epoch": 0.7891368541225288, "grad_norm": 1.5, "learning_rate": 5.318320873256174e-06, "loss": 0.3374, "step": 17972 }, { "epoch": 0.7892246726017322, "grad_norm": 1.4609375, "learning_rate": 5.314055729847967e-06, "loss": 0.2925, "step": 17974 }, { "epoch": 0.7893124910809357, "grad_norm": 1.5, "learning_rate": 5.309792093994093e-06, "loss": 0.3213, "step": 17976 }, { "epoch": 0.7894003095601392, "grad_norm": 1.5546875, "learning_rate": 5.3055299660210555e-06, "loss": 0.3087, "step": 17978 }, { "epoch": 0.7894881280393427, "grad_norm": 1.46875, "learning_rate": 5.301269346255258e-06, "loss": 0.3155, "step": 17980 }, { "epoch": 0.7895759465185461, "grad_norm": 1.4375, "learning_rate": 5.2970102350229675e-06, "loss": 0.3026, "step": 17982 }, { "epoch": 0.7896637649977497, "grad_norm": 1.546875, "learning_rate": 5.292752632650363e-06, "loss": 0.3308, "step": 17984 }, { "epoch": 0.7897515834769532, "grad_norm": 1.53125, "learning_rate": 5.288496539463481e-06, "loss": 0.3074, "step": 17986 }, { "epoch": 0.7898394019561567, "grad_norm": 1.578125, "learning_rate": 5.284241955788266e-06, "loss": 0.3303, "step": 17988 }, { "epoch": 0.7899272204353601, "grad_norm": 1.546875, "learning_rate": 5.279988881950526e-06, "loss": 0.3099, "step": 17990 }, { "epoch": 0.7900150389145636, "grad_norm": 1.4609375, "learning_rate": 5.2757373182759585e-06, "loss": 0.3013, "step": 17992 }, { "epoch": 0.7901028573937671, "grad_norm": 1.671875, "learning_rate": 5.271487265090163e-06, "loss": 0.3248, "step": 17994 }, { "epoch": 0.7901906758729705, "grad_norm": 1.8984375, "learning_rate": 5.2672387227185954e-06, "loss": 0.3134, "step": 17996 }, { "epoch": 0.790278494352174, "grad_norm": 1.5, "learning_rate": 5.262991691486624e-06, "loss": 0.3233, "step": 17998 }, { "epoch": 0.7903663128313776, "grad_norm": 1.484375, "learning_rate": 5.25874617171947e-06, "loss": 0.31, "step": 18000 }, { "epoch": 0.7904541313105811, "grad_norm": 1.5703125, "learning_rate": 5.254502163742275e-06, "loss": 0.2897, "step": 18002 }, { "epoch": 0.7905419497897845, "grad_norm": 1.4765625, "learning_rate": 5.250259667880039e-06, "loss": 0.317, "step": 18004 }, { "epoch": 0.790629768268988, "grad_norm": 1.4921875, "learning_rate": 5.246018684457646e-06, "loss": 0.2996, "step": 18006 }, { "epoch": 0.7907175867481915, "grad_norm": 1.4609375, "learning_rate": 5.241779213799888e-06, "loss": 0.3249, "step": 18008 }, { "epoch": 0.790805405227395, "grad_norm": 1.515625, "learning_rate": 5.237541256231402e-06, "loss": 0.3213, "step": 18010 }, { "epoch": 0.7908932237065984, "grad_norm": 1.5546875, "learning_rate": 5.23330481207675e-06, "loss": 0.3026, "step": 18012 }, { "epoch": 0.7909810421858019, "grad_norm": 1.546875, "learning_rate": 5.22906988166035e-06, "loss": 0.3241, "step": 18014 }, { "epoch": 0.7910688606650055, "grad_norm": 1.4921875, "learning_rate": 5.224836465306521e-06, "loss": 0.3207, "step": 18016 }, { "epoch": 0.791156679144209, "grad_norm": 1.4296875, "learning_rate": 5.22060456333946e-06, "loss": 0.3222, "step": 18018 }, { "epoch": 0.7912444976234124, "grad_norm": 1.453125, "learning_rate": 5.216374176083233e-06, "loss": 0.3284, "step": 18020 }, { "epoch": 0.7913323161026159, "grad_norm": 1.5625, "learning_rate": 5.212145303861821e-06, "loss": 0.311, "step": 18022 }, { "epoch": 0.7914201345818194, "grad_norm": 1.5078125, "learning_rate": 5.207917946999058e-06, "loss": 0.3093, "step": 18024 }, { "epoch": 0.7915079530610228, "grad_norm": 1.5546875, "learning_rate": 5.2036921058186915e-06, "loss": 0.3083, "step": 18026 }, { "epoch": 0.7915957715402263, "grad_norm": 1.59375, "learning_rate": 5.199467780644329e-06, "loss": 0.3385, "step": 18028 }, { "epoch": 0.7916835900194298, "grad_norm": 1.5546875, "learning_rate": 5.195244971799462e-06, "loss": 0.3325, "step": 18030 }, { "epoch": 0.7917714084986334, "grad_norm": 1.4921875, "learning_rate": 5.19102367960749e-06, "loss": 0.3155, "step": 18032 }, { "epoch": 0.7918592269778368, "grad_norm": 1.5625, "learning_rate": 5.186803904391669e-06, "loss": 0.3371, "step": 18034 }, { "epoch": 0.7919470454570403, "grad_norm": 1.421875, "learning_rate": 5.1825856464751575e-06, "loss": 0.3148, "step": 18036 }, { "epoch": 0.7920348639362438, "grad_norm": 1.5, "learning_rate": 5.178368906180989e-06, "loss": 0.2917, "step": 18038 }, { "epoch": 0.7921226824154473, "grad_norm": 1.484375, "learning_rate": 5.174153683832081e-06, "loss": 0.3068, "step": 18040 }, { "epoch": 0.7922105008946507, "grad_norm": 1.4453125, "learning_rate": 5.1699399797512375e-06, "loss": 0.3011, "step": 18042 }, { "epoch": 0.7922983193738542, "grad_norm": 1.59375, "learning_rate": 5.165727794261135e-06, "loss": 0.3002, "step": 18044 }, { "epoch": 0.7923861378530578, "grad_norm": 1.5234375, "learning_rate": 5.161517127684362e-06, "loss": 0.3056, "step": 18046 }, { "epoch": 0.7924739563322613, "grad_norm": 1.59375, "learning_rate": 5.157307980343357e-06, "loss": 0.3399, "step": 18048 }, { "epoch": 0.7925617748114647, "grad_norm": 1.578125, "learning_rate": 5.153100352560467e-06, "loss": 0.2994, "step": 18050 }, { "epoch": 0.7926495932906682, "grad_norm": 1.4921875, "learning_rate": 5.148894244657912e-06, "loss": 0.3117, "step": 18052 }, { "epoch": 0.7927374117698717, "grad_norm": 1.4765625, "learning_rate": 5.144689656957785e-06, "loss": 0.3101, "step": 18054 }, { "epoch": 0.7928252302490751, "grad_norm": 1.4765625, "learning_rate": 5.140486589782092e-06, "loss": 0.3079, "step": 18056 }, { "epoch": 0.7929130487282786, "grad_norm": 1.6015625, "learning_rate": 5.13628504345269e-06, "loss": 0.3244, "step": 18058 }, { "epoch": 0.7930008672074821, "grad_norm": 1.4765625, "learning_rate": 5.13208501829135e-06, "loss": 0.3635, "step": 18060 }, { "epoch": 0.7930886856866857, "grad_norm": 1.4609375, "learning_rate": 5.127886514619698e-06, "loss": 0.3079, "step": 18062 }, { "epoch": 0.7931765041658891, "grad_norm": 1.4140625, "learning_rate": 5.123689532759254e-06, "loss": 0.2855, "step": 18064 }, { "epoch": 0.7932643226450926, "grad_norm": 1.5703125, "learning_rate": 5.119494073031439e-06, "loss": 0.3265, "step": 18066 }, { "epoch": 0.7933521411242961, "grad_norm": 1.4140625, "learning_rate": 5.115300135757534e-06, "loss": 0.3127, "step": 18068 }, { "epoch": 0.7934399596034996, "grad_norm": 1.5625, "learning_rate": 5.11110772125871e-06, "loss": 0.3004, "step": 18070 }, { "epoch": 0.793527778082703, "grad_norm": 1.5, "learning_rate": 5.1069168298560176e-06, "loss": 0.3214, "step": 18072 }, { "epoch": 0.7936155965619065, "grad_norm": 1.453125, "learning_rate": 5.10272746187041e-06, "loss": 0.3196, "step": 18074 }, { "epoch": 0.79370341504111, "grad_norm": 1.4375, "learning_rate": 5.098539617622697e-06, "loss": 0.3235, "step": 18076 }, { "epoch": 0.7937912335203136, "grad_norm": 1.484375, "learning_rate": 5.094353297433596e-06, "loss": 0.3163, "step": 18078 }, { "epoch": 0.793879051999517, "grad_norm": 1.4921875, "learning_rate": 5.090168501623693e-06, "loss": 0.3085, "step": 18080 }, { "epoch": 0.7939668704787205, "grad_norm": 1.4453125, "learning_rate": 5.085985230513451e-06, "loss": 0.2908, "step": 18082 }, { "epoch": 0.794054688957924, "grad_norm": 1.53125, "learning_rate": 5.081803484423242e-06, "loss": 0.3026, "step": 18084 }, { "epoch": 0.7941425074371274, "grad_norm": 1.546875, "learning_rate": 5.077623263673289e-06, "loss": 0.3191, "step": 18086 }, { "epoch": 0.7942303259163309, "grad_norm": 1.5078125, "learning_rate": 5.07344456858373e-06, "loss": 0.3342, "step": 18088 }, { "epoch": 0.7943181443955344, "grad_norm": 1.546875, "learning_rate": 5.069267399474559e-06, "loss": 0.3048, "step": 18090 }, { "epoch": 0.794405962874738, "grad_norm": 1.5625, "learning_rate": 5.0650917566656656e-06, "loss": 0.2963, "step": 18092 }, { "epoch": 0.7944937813539414, "grad_norm": 1.484375, "learning_rate": 5.0609176404768285e-06, "loss": 0.3213, "step": 18094 }, { "epoch": 0.7945815998331449, "grad_norm": 1.515625, "learning_rate": 5.056745051227693e-06, "loss": 0.3337, "step": 18096 }, { "epoch": 0.7946694183123484, "grad_norm": 1.53125, "learning_rate": 5.052573989237808e-06, "loss": 0.3239, "step": 18098 }, { "epoch": 0.7947572367915519, "grad_norm": 1.53125, "learning_rate": 5.048404454826588e-06, "loss": 0.3279, "step": 18100 }, { "epoch": 0.7948450552707553, "grad_norm": 1.484375, "learning_rate": 5.044236448313339e-06, "loss": 0.3332, "step": 18102 }, { "epoch": 0.7949328737499588, "grad_norm": 1.578125, "learning_rate": 5.040069970017247e-06, "loss": 0.3256, "step": 18104 }, { "epoch": 0.7950206922291623, "grad_norm": 1.4765625, "learning_rate": 5.035905020257373e-06, "loss": 0.3154, "step": 18106 }, { "epoch": 0.7951085107083659, "grad_norm": 1.5546875, "learning_rate": 5.031741599352685e-06, "loss": 0.3131, "step": 18108 }, { "epoch": 0.7951963291875693, "grad_norm": 1.4375, "learning_rate": 5.027579707622007e-06, "loss": 0.3013, "step": 18110 }, { "epoch": 0.7952841476667728, "grad_norm": 1.6171875, "learning_rate": 5.02341934538407e-06, "loss": 0.3147, "step": 18112 }, { "epoch": 0.7953719661459763, "grad_norm": 1.5546875, "learning_rate": 5.019260512957466e-06, "loss": 0.3319, "step": 18114 }, { "epoch": 0.7954597846251797, "grad_norm": 1.4921875, "learning_rate": 5.0151032106606764e-06, "loss": 0.3055, "step": 18116 }, { "epoch": 0.7955476031043832, "grad_norm": 1.484375, "learning_rate": 5.010947438812078e-06, "loss": 0.3211, "step": 18118 }, { "epoch": 0.7956354215835867, "grad_norm": 1.5859375, "learning_rate": 5.006793197729912e-06, "loss": 0.3466, "step": 18120 }, { "epoch": 0.7957232400627902, "grad_norm": 1.484375, "learning_rate": 5.002640487732321e-06, "loss": 0.3254, "step": 18122 }, { "epoch": 0.7958110585419937, "grad_norm": 1.4375, "learning_rate": 4.9984893091373165e-06, "loss": 0.3338, "step": 18124 }, { "epoch": 0.7958988770211972, "grad_norm": 1.53125, "learning_rate": 4.994339662262787e-06, "loss": 0.3281, "step": 18126 }, { "epoch": 0.7959866955004007, "grad_norm": 1.453125, "learning_rate": 4.990191547426531e-06, "loss": 0.3051, "step": 18128 }, { "epoch": 0.7960745139796042, "grad_norm": 1.4453125, "learning_rate": 4.986044964946201e-06, "loss": 0.3234, "step": 18130 }, { "epoch": 0.7961623324588076, "grad_norm": 1.515625, "learning_rate": 4.981899915139346e-06, "loss": 0.3067, "step": 18132 }, { "epoch": 0.7962501509380111, "grad_norm": 1.5703125, "learning_rate": 4.977756398323388e-06, "loss": 0.2966, "step": 18134 }, { "epoch": 0.7963379694172146, "grad_norm": 1.4765625, "learning_rate": 4.97361441481565e-06, "loss": 0.3272, "step": 18136 }, { "epoch": 0.7964257878964182, "grad_norm": 1.546875, "learning_rate": 4.969473964933313e-06, "loss": 0.2979, "step": 18138 }, { "epoch": 0.7965136063756216, "grad_norm": 1.484375, "learning_rate": 4.965335048993472e-06, "loss": 0.2876, "step": 18140 }, { "epoch": 0.7966014248548251, "grad_norm": 1.515625, "learning_rate": 4.961197667313072e-06, "loss": 0.3546, "step": 18142 }, { "epoch": 0.7966892433340286, "grad_norm": 1.5546875, "learning_rate": 4.957061820208952e-06, "loss": 0.3265, "step": 18144 }, { "epoch": 0.796777061813232, "grad_norm": 1.453125, "learning_rate": 4.952927507997851e-06, "loss": 0.3119, "step": 18146 }, { "epoch": 0.7968648802924355, "grad_norm": 1.5, "learning_rate": 4.948794730996359e-06, "loss": 0.3151, "step": 18148 }, { "epoch": 0.796952698771639, "grad_norm": 1.4375, "learning_rate": 4.9446634895209815e-06, "loss": 0.3152, "step": 18150 }, { "epoch": 0.7970405172508425, "grad_norm": 1.5234375, "learning_rate": 4.940533783888079e-06, "loss": 0.3161, "step": 18152 }, { "epoch": 0.797128335730046, "grad_norm": 1.5390625, "learning_rate": 4.936405614413903e-06, "loss": 0.3029, "step": 18154 }, { "epoch": 0.7972161542092495, "grad_norm": 1.5, "learning_rate": 4.932278981414601e-06, "loss": 0.3507, "step": 18156 }, { "epoch": 0.797303972688453, "grad_norm": 1.4921875, "learning_rate": 4.92815388520618e-06, "loss": 0.3181, "step": 18158 }, { "epoch": 0.7973917911676565, "grad_norm": 1.453125, "learning_rate": 4.924030326104556e-06, "loss": 0.303, "step": 18160 }, { "epoch": 0.7974796096468599, "grad_norm": 1.5625, "learning_rate": 4.9199083044254915e-06, "loss": 0.3098, "step": 18162 }, { "epoch": 0.7975674281260634, "grad_norm": 1.453125, "learning_rate": 4.915787820484669e-06, "loss": 0.3075, "step": 18164 }, { "epoch": 0.7976552466052669, "grad_norm": 1.4921875, "learning_rate": 4.911668874597628e-06, "loss": 0.326, "step": 18166 }, { "epoch": 0.7977430650844703, "grad_norm": 1.5234375, "learning_rate": 4.9075514670797935e-06, "loss": 0.3283, "step": 18168 }, { "epoch": 0.7978308835636739, "grad_norm": 1.484375, "learning_rate": 4.903435598246492e-06, "loss": 0.3154, "step": 18170 }, { "epoch": 0.7979187020428774, "grad_norm": 1.484375, "learning_rate": 4.899321268412904e-06, "loss": 0.3441, "step": 18172 }, { "epoch": 0.7980065205220809, "grad_norm": 1.6015625, "learning_rate": 4.895208477894117e-06, "loss": 0.3069, "step": 18174 }, { "epoch": 0.7980943390012843, "grad_norm": 1.5234375, "learning_rate": 4.891097227005085e-06, "loss": 0.307, "step": 18176 }, { "epoch": 0.7981821574804878, "grad_norm": 1.46875, "learning_rate": 4.88698751606064e-06, "loss": 0.3102, "step": 18178 }, { "epoch": 0.7982699759596913, "grad_norm": 1.5, "learning_rate": 4.882879345375521e-06, "loss": 0.3094, "step": 18180 }, { "epoch": 0.7983577944388948, "grad_norm": 1.515625, "learning_rate": 4.878772715264315e-06, "loss": 0.3016, "step": 18182 }, { "epoch": 0.7984456129180982, "grad_norm": 1.4453125, "learning_rate": 4.874667626041526e-06, "loss": 0.3108, "step": 18184 }, { "epoch": 0.7985334313973018, "grad_norm": 1.5703125, "learning_rate": 4.870564078021514e-06, "loss": 0.3263, "step": 18186 }, { "epoch": 0.7986212498765053, "grad_norm": 1.46875, "learning_rate": 4.866462071518524e-06, "loss": 0.2997, "step": 18188 }, { "epoch": 0.7987090683557088, "grad_norm": 1.59375, "learning_rate": 4.862361606846702e-06, "loss": 0.3249, "step": 18190 }, { "epoch": 0.7987968868349122, "grad_norm": 1.671875, "learning_rate": 4.858262684320056e-06, "loss": 0.3326, "step": 18192 }, { "epoch": 0.7988847053141157, "grad_norm": 1.5546875, "learning_rate": 4.854165304252481e-06, "loss": 0.3095, "step": 18194 }, { "epoch": 0.7989725237933192, "grad_norm": 1.46875, "learning_rate": 4.850069466957749e-06, "loss": 0.3536, "step": 18196 }, { "epoch": 0.7990603422725227, "grad_norm": 1.5078125, "learning_rate": 4.8459751727495335e-06, "loss": 0.3048, "step": 18198 }, { "epoch": 0.7991481607517262, "grad_norm": 1.5390625, "learning_rate": 4.841882421941365e-06, "loss": 0.3148, "step": 18200 }, { "epoch": 0.7992359792309297, "grad_norm": 1.53125, "learning_rate": 4.837791214846679e-06, "loss": 0.3154, "step": 18202 }, { "epoch": 0.7993237977101332, "grad_norm": 1.515625, "learning_rate": 4.833701551778777e-06, "loss": 0.3204, "step": 18204 }, { "epoch": 0.7994116161893366, "grad_norm": 1.4375, "learning_rate": 4.829613433050837e-06, "loss": 0.3105, "step": 18206 }, { "epoch": 0.7994994346685401, "grad_norm": 1.4453125, "learning_rate": 4.82552685897594e-06, "loss": 0.2983, "step": 18208 }, { "epoch": 0.7995872531477436, "grad_norm": 1.4921875, "learning_rate": 4.8214418298670264e-06, "loss": 0.3086, "step": 18210 }, { "epoch": 0.7996750716269471, "grad_norm": 1.5, "learning_rate": 4.8173583460369435e-06, "loss": 0.3359, "step": 18212 }, { "epoch": 0.7997628901061505, "grad_norm": 1.484375, "learning_rate": 4.813276407798395e-06, "loss": 0.2954, "step": 18214 }, { "epoch": 0.7998507085853541, "grad_norm": 1.4921875, "learning_rate": 4.809196015463971e-06, "loss": 0.3317, "step": 18216 }, { "epoch": 0.7999385270645576, "grad_norm": 1.515625, "learning_rate": 4.805117169346163e-06, "loss": 0.3251, "step": 18218 }, { "epoch": 0.8000263455437611, "grad_norm": 1.4609375, "learning_rate": 4.801039869757318e-06, "loss": 0.3396, "step": 18220 }, { "epoch": 0.8001141640229645, "grad_norm": 1.609375, "learning_rate": 4.7969641170096944e-06, "loss": 0.3243, "step": 18222 }, { "epoch": 0.800201982502168, "grad_norm": 1.5, "learning_rate": 4.792889911415388e-06, "loss": 0.2899, "step": 18224 }, { "epoch": 0.8002898009813715, "grad_norm": 1.4296875, "learning_rate": 4.788817253286424e-06, "loss": 0.3137, "step": 18226 }, { "epoch": 0.800377619460575, "grad_norm": 1.4296875, "learning_rate": 4.784746142934676e-06, "loss": 0.2939, "step": 18228 }, { "epoch": 0.8004654379397784, "grad_norm": 1.5234375, "learning_rate": 4.780676580671911e-06, "loss": 0.3131, "step": 18230 }, { "epoch": 0.800553256418982, "grad_norm": 1.5078125, "learning_rate": 4.776608566809787e-06, "loss": 0.3141, "step": 18232 }, { "epoch": 0.8006410748981855, "grad_norm": 1.4609375, "learning_rate": 4.772542101659819e-06, "loss": 0.3322, "step": 18234 }, { "epoch": 0.800728893377389, "grad_norm": 1.4609375, "learning_rate": 4.7684771855334324e-06, "loss": 0.3079, "step": 18236 }, { "epoch": 0.8008167118565924, "grad_norm": 1.4453125, "learning_rate": 4.764413818741914e-06, "loss": 0.3413, "step": 18238 }, { "epoch": 0.8009045303357959, "grad_norm": 1.53125, "learning_rate": 4.760352001596427e-06, "loss": 0.3347, "step": 18240 }, { "epoch": 0.8009923488149994, "grad_norm": 1.5, "learning_rate": 4.756291734408044e-06, "loss": 0.3092, "step": 18242 }, { "epoch": 0.8010801672942028, "grad_norm": 1.421875, "learning_rate": 4.752233017487687e-06, "loss": 0.3148, "step": 18244 }, { "epoch": 0.8011679857734064, "grad_norm": 1.53125, "learning_rate": 4.748175851146186e-06, "loss": 0.3175, "step": 18246 }, { "epoch": 0.8012558042526099, "grad_norm": 1.453125, "learning_rate": 4.74412023569423e-06, "loss": 0.2983, "step": 18248 }, { "epoch": 0.8013436227318134, "grad_norm": 1.4296875, "learning_rate": 4.740066171442398e-06, "loss": 0.3073, "step": 18250 }, { "epoch": 0.8014314412110168, "grad_norm": 1.6171875, "learning_rate": 4.7360136587011665e-06, "loss": 0.321, "step": 18252 }, { "epoch": 0.8015192596902203, "grad_norm": 1.4921875, "learning_rate": 4.731962697780856e-06, "loss": 0.3364, "step": 18254 }, { "epoch": 0.8016070781694238, "grad_norm": 1.421875, "learning_rate": 4.727913288991706e-06, "loss": 0.2831, "step": 18256 }, { "epoch": 0.8016948966486273, "grad_norm": 1.5546875, "learning_rate": 4.723865432643809e-06, "loss": 0.3107, "step": 18258 }, { "epoch": 0.8017827151278307, "grad_norm": 1.59375, "learning_rate": 4.719819129047165e-06, "loss": 0.322, "step": 18260 }, { "epoch": 0.8018705336070343, "grad_norm": 1.6640625, "learning_rate": 4.7157743785116255e-06, "loss": 0.2842, "step": 18262 }, { "epoch": 0.8019583520862378, "grad_norm": 1.546875, "learning_rate": 4.711731181346954e-06, "loss": 0.3266, "step": 18264 }, { "epoch": 0.8020461705654413, "grad_norm": 1.4296875, "learning_rate": 4.707689537862772e-06, "loss": 0.2794, "step": 18266 }, { "epoch": 0.8021339890446447, "grad_norm": 1.484375, "learning_rate": 4.703649448368583e-06, "loss": 0.3098, "step": 18268 }, { "epoch": 0.8022218075238482, "grad_norm": 1.5859375, "learning_rate": 4.699610913173791e-06, "loss": 0.3132, "step": 18270 }, { "epoch": 0.8023096260030517, "grad_norm": 1.5078125, "learning_rate": 4.695573932587657e-06, "loss": 0.3035, "step": 18272 }, { "epoch": 0.8023974444822551, "grad_norm": 1.4140625, "learning_rate": 4.691538506919344e-06, "loss": 0.3148, "step": 18274 }, { "epoch": 0.8024852629614586, "grad_norm": 1.4921875, "learning_rate": 4.68750463647788e-06, "loss": 0.3232, "step": 18276 }, { "epoch": 0.8025730814406622, "grad_norm": 1.4765625, "learning_rate": 4.683472321572172e-06, "loss": 0.3216, "step": 18278 }, { "epoch": 0.8026608999198657, "grad_norm": 1.40625, "learning_rate": 4.679441562511033e-06, "loss": 0.3349, "step": 18280 }, { "epoch": 0.8027487183990691, "grad_norm": 1.59375, "learning_rate": 4.675412359603121e-06, "loss": 0.2902, "step": 18282 }, { "epoch": 0.8028365368782726, "grad_norm": 1.6015625, "learning_rate": 4.671384713157018e-06, "loss": 0.3061, "step": 18284 }, { "epoch": 0.8029243553574761, "grad_norm": 1.5078125, "learning_rate": 4.667358623481132e-06, "loss": 0.3198, "step": 18286 }, { "epoch": 0.8030121738366796, "grad_norm": 1.4921875, "learning_rate": 4.663334090883806e-06, "loss": 0.2927, "step": 18288 }, { "epoch": 0.803099992315883, "grad_norm": 1.4921875, "learning_rate": 4.659311115673229e-06, "loss": 0.3303, "step": 18290 }, { "epoch": 0.8031878107950866, "grad_norm": 1.4921875, "learning_rate": 4.655289698157475e-06, "loss": 0.3044, "step": 18292 }, { "epoch": 0.8032756292742901, "grad_norm": 1.53125, "learning_rate": 4.65126983864452e-06, "loss": 0.3317, "step": 18294 }, { "epoch": 0.8033634477534936, "grad_norm": 1.484375, "learning_rate": 4.647251537442193e-06, "loss": 0.2918, "step": 18296 }, { "epoch": 0.803451266232697, "grad_norm": 1.46875, "learning_rate": 4.643234794858229e-06, "loss": 0.3144, "step": 18298 }, { "epoch": 0.8035390847119005, "grad_norm": 1.5, "learning_rate": 4.639219611200221e-06, "loss": 0.3063, "step": 18300 }, { "epoch": 0.803626903191104, "grad_norm": 1.53125, "learning_rate": 4.635205986775654e-06, "loss": 0.3148, "step": 18302 }, { "epoch": 0.8037147216703074, "grad_norm": 1.453125, "learning_rate": 4.6311939218918995e-06, "loss": 0.3087, "step": 18304 }, { "epoch": 0.8038025401495109, "grad_norm": 1.546875, "learning_rate": 4.627183416856187e-06, "loss": 0.2936, "step": 18306 }, { "epoch": 0.8038903586287145, "grad_norm": 1.53125, "learning_rate": 4.623174471975664e-06, "loss": 0.2981, "step": 18308 }, { "epoch": 0.803978177107918, "grad_norm": 1.578125, "learning_rate": 4.619167087557322e-06, "loss": 0.2947, "step": 18310 }, { "epoch": 0.8040659955871214, "grad_norm": 1.5390625, "learning_rate": 4.615161263908044e-06, "loss": 0.3003, "step": 18312 }, { "epoch": 0.8041538140663249, "grad_norm": 1.5078125, "learning_rate": 4.611157001334615e-06, "loss": 0.3225, "step": 18314 }, { "epoch": 0.8042416325455284, "grad_norm": 1.5078125, "learning_rate": 4.607154300143657e-06, "loss": 0.2862, "step": 18316 }, { "epoch": 0.8043294510247319, "grad_norm": 1.4765625, "learning_rate": 4.603153160641719e-06, "loss": 0.29, "step": 18318 }, { "epoch": 0.8044172695039353, "grad_norm": 1.5234375, "learning_rate": 4.5991535831351964e-06, "loss": 0.3264, "step": 18320 }, { "epoch": 0.8045050879831388, "grad_norm": 1.484375, "learning_rate": 4.595155567930387e-06, "loss": 0.2995, "step": 18322 }, { "epoch": 0.8045929064623424, "grad_norm": 1.4765625, "learning_rate": 4.591159115333454e-06, "loss": 0.3019, "step": 18324 }, { "epoch": 0.8046807249415459, "grad_norm": 1.4140625, "learning_rate": 4.587164225650445e-06, "loss": 0.3189, "step": 18326 }, { "epoch": 0.8047685434207493, "grad_norm": 1.53125, "learning_rate": 4.583170899187298e-06, "loss": 0.3148, "step": 18328 }, { "epoch": 0.8048563618999528, "grad_norm": 1.4609375, "learning_rate": 4.579179136249812e-06, "loss": 0.318, "step": 18330 }, { "epoch": 0.8049441803791563, "grad_norm": 1.5859375, "learning_rate": 4.5751889371436905e-06, "loss": 0.3232, "step": 18332 }, { "epoch": 0.8050319988583597, "grad_norm": 1.4296875, "learning_rate": 4.571200302174489e-06, "loss": 0.3035, "step": 18334 }, { "epoch": 0.8051198173375632, "grad_norm": 1.625, "learning_rate": 4.567213231647669e-06, "loss": 0.3367, "step": 18336 }, { "epoch": 0.8052076358167668, "grad_norm": 1.4921875, "learning_rate": 4.563227725868561e-06, "loss": 0.3073, "step": 18338 }, { "epoch": 0.8052954542959703, "grad_norm": 1.4921875, "learning_rate": 4.559243785142367e-06, "loss": 0.3291, "step": 18340 }, { "epoch": 0.8053832727751737, "grad_norm": 1.4453125, "learning_rate": 4.555261409774187e-06, "loss": 0.3237, "step": 18342 }, { "epoch": 0.8054710912543772, "grad_norm": 1.4609375, "learning_rate": 4.5512806000689916e-06, "loss": 0.2984, "step": 18344 }, { "epoch": 0.8055589097335807, "grad_norm": 1.3828125, "learning_rate": 4.547301356331629e-06, "loss": 0.3249, "step": 18346 }, { "epoch": 0.8056467282127842, "grad_norm": 1.4765625, "learning_rate": 4.543323678866826e-06, "loss": 0.3269, "step": 18348 }, { "epoch": 0.8057345466919876, "grad_norm": 1.453125, "learning_rate": 4.539347567979205e-06, "loss": 0.3146, "step": 18350 }, { "epoch": 0.8058223651711911, "grad_norm": 1.59375, "learning_rate": 4.535373023973253e-06, "loss": 0.3335, "step": 18352 }, { "epoch": 0.8059101836503947, "grad_norm": 1.53125, "learning_rate": 4.531400047153331e-06, "loss": 0.2805, "step": 18354 }, { "epoch": 0.8059980021295982, "grad_norm": 1.53125, "learning_rate": 4.52742863782371e-06, "loss": 0.3067, "step": 18356 }, { "epoch": 0.8060858206088016, "grad_norm": 1.5234375, "learning_rate": 4.5234587962885045e-06, "loss": 0.2892, "step": 18358 }, { "epoch": 0.8061736390880051, "grad_norm": 1.5546875, "learning_rate": 4.519490522851738e-06, "loss": 0.3257, "step": 18360 }, { "epoch": 0.8062614575672086, "grad_norm": 1.5234375, "learning_rate": 4.515523817817297e-06, "loss": 0.3211, "step": 18362 }, { "epoch": 0.806349276046412, "grad_norm": 1.5078125, "learning_rate": 4.511558681488945e-06, "loss": 0.2904, "step": 18364 }, { "epoch": 0.8064370945256155, "grad_norm": 1.4765625, "learning_rate": 4.507595114170349e-06, "loss": 0.3069, "step": 18366 }, { "epoch": 0.806524913004819, "grad_norm": 1.515625, "learning_rate": 4.503633116165026e-06, "loss": 0.323, "step": 18368 }, { "epoch": 0.8066127314840226, "grad_norm": 1.484375, "learning_rate": 4.499672687776396e-06, "loss": 0.3047, "step": 18370 }, { "epoch": 0.806700549963226, "grad_norm": 1.5546875, "learning_rate": 4.495713829307749e-06, "loss": 0.3107, "step": 18372 }, { "epoch": 0.8067883684424295, "grad_norm": 1.546875, "learning_rate": 4.491756541062242e-06, "loss": 0.3108, "step": 18374 }, { "epoch": 0.806876186921633, "grad_norm": 1.4765625, "learning_rate": 4.48780082334295e-06, "loss": 0.3186, "step": 18376 }, { "epoch": 0.8069640054008365, "grad_norm": 1.546875, "learning_rate": 4.483846676452777e-06, "loss": 0.3069, "step": 18378 }, { "epoch": 0.8070518238800399, "grad_norm": 1.4765625, "learning_rate": 4.479894100694545e-06, "loss": 0.3001, "step": 18380 }, { "epoch": 0.8071396423592434, "grad_norm": 1.5078125, "learning_rate": 4.4759430963709406e-06, "loss": 0.3085, "step": 18382 }, { "epoch": 0.8072274608384469, "grad_norm": 1.7578125, "learning_rate": 4.471993663784538e-06, "loss": 0.3099, "step": 18384 }, { "epoch": 0.8073152793176505, "grad_norm": 1.5625, "learning_rate": 4.468045803237783e-06, "loss": 0.2982, "step": 18386 }, { "epoch": 0.8074030977968539, "grad_norm": 1.5, "learning_rate": 4.464099515032993e-06, "loss": 0.3134, "step": 18388 }, { "epoch": 0.8074909162760574, "grad_norm": 1.46875, "learning_rate": 4.460154799472394e-06, "loss": 0.3161, "step": 18390 }, { "epoch": 0.8075787347552609, "grad_norm": 1.4375, "learning_rate": 4.456211656858056e-06, "loss": 0.3432, "step": 18392 }, { "epoch": 0.8076665532344643, "grad_norm": 1.5703125, "learning_rate": 4.452270087491961e-06, "loss": 0.317, "step": 18394 }, { "epoch": 0.8077543717136678, "grad_norm": 1.4609375, "learning_rate": 4.448330091675943e-06, "loss": 0.3255, "step": 18396 }, { "epoch": 0.8078421901928713, "grad_norm": 1.515625, "learning_rate": 4.444391669711737e-06, "loss": 0.2994, "step": 18398 }, { "epoch": 0.8079300086720749, "grad_norm": 1.4609375, "learning_rate": 4.440454821900947e-06, "loss": 0.3137, "step": 18400 }, { "epoch": 0.8080178271512783, "grad_norm": 1.46875, "learning_rate": 4.436519548545049e-06, "loss": 0.3011, "step": 18402 }, { "epoch": 0.8081056456304818, "grad_norm": 1.4609375, "learning_rate": 4.432585849945417e-06, "loss": 0.3021, "step": 18404 }, { "epoch": 0.8081934641096853, "grad_norm": 1.4453125, "learning_rate": 4.428653726403292e-06, "loss": 0.321, "step": 18406 }, { "epoch": 0.8082812825888888, "grad_norm": 1.421875, "learning_rate": 4.424723178219798e-06, "loss": 0.304, "step": 18408 }, { "epoch": 0.8083691010680922, "grad_norm": 1.5078125, "learning_rate": 4.4207942056959275e-06, "loss": 0.3304, "step": 18410 }, { "epoch": 0.8084569195472957, "grad_norm": 1.4453125, "learning_rate": 4.416866809132575e-06, "loss": 0.2906, "step": 18412 }, { "epoch": 0.8085447380264992, "grad_norm": 1.53125, "learning_rate": 4.412940988830497e-06, "loss": 0.3207, "step": 18414 }, { "epoch": 0.8086325565057028, "grad_norm": 1.515625, "learning_rate": 4.409016745090327e-06, "loss": 0.3424, "step": 18416 }, { "epoch": 0.8087203749849062, "grad_norm": 1.453125, "learning_rate": 4.405094078212599e-06, "loss": 0.3357, "step": 18418 }, { "epoch": 0.8088081934641097, "grad_norm": 1.53125, "learning_rate": 4.4011729884976955e-06, "loss": 0.3276, "step": 18420 }, { "epoch": 0.8088960119433132, "grad_norm": 1.46875, "learning_rate": 4.397253476245908e-06, "loss": 0.3036, "step": 18422 }, { "epoch": 0.8089838304225166, "grad_norm": 1.484375, "learning_rate": 4.393335541757387e-06, "loss": 0.3055, "step": 18424 }, { "epoch": 0.8090716489017201, "grad_norm": 1.515625, "learning_rate": 4.389419185332167e-06, "loss": 0.3363, "step": 18426 }, { "epoch": 0.8091594673809236, "grad_norm": 1.5546875, "learning_rate": 4.3855044072701715e-06, "loss": 0.3146, "step": 18428 }, { "epoch": 0.8092472858601271, "grad_norm": 1.375, "learning_rate": 4.381591207871183e-06, "loss": 0.311, "step": 18430 }, { "epoch": 0.8093351043393306, "grad_norm": 1.53125, "learning_rate": 4.377679587434888e-06, "loss": 0.3498, "step": 18432 }, { "epoch": 0.8094229228185341, "grad_norm": 1.5, "learning_rate": 4.373769546260836e-06, "loss": 0.3082, "step": 18434 }, { "epoch": 0.8095107412977376, "grad_norm": 1.640625, "learning_rate": 4.369861084648455e-06, "loss": 0.3477, "step": 18436 }, { "epoch": 0.8095985597769411, "grad_norm": 1.5390625, "learning_rate": 4.365954202897058e-06, "loss": 0.3237, "step": 18438 }, { "epoch": 0.8096863782561445, "grad_norm": 1.5546875, "learning_rate": 4.362048901305829e-06, "loss": 0.3204, "step": 18440 }, { "epoch": 0.809774196735348, "grad_norm": 1.4609375, "learning_rate": 4.358145180173847e-06, "loss": 0.3125, "step": 18442 }, { "epoch": 0.8098620152145515, "grad_norm": 1.5, "learning_rate": 4.354243039800049e-06, "loss": 0.2819, "step": 18444 }, { "epoch": 0.8099498336937551, "grad_norm": 1.4375, "learning_rate": 4.350342480483277e-06, "loss": 0.3345, "step": 18446 }, { "epoch": 0.8100376521729585, "grad_norm": 1.515625, "learning_rate": 4.346443502522226e-06, "loss": 0.3361, "step": 18448 }, { "epoch": 0.810125470652162, "grad_norm": 1.4609375, "learning_rate": 4.3425461062154755e-06, "loss": 0.3344, "step": 18450 }, { "epoch": 0.8102132891313655, "grad_norm": 1.4453125, "learning_rate": 4.338650291861504e-06, "loss": 0.2927, "step": 18452 }, { "epoch": 0.810301107610569, "grad_norm": 1.46875, "learning_rate": 4.334756059758638e-06, "loss": 0.3279, "step": 18454 }, { "epoch": 0.8103889260897724, "grad_norm": 1.5234375, "learning_rate": 4.330863410205116e-06, "loss": 0.327, "step": 18456 }, { "epoch": 0.8104767445689759, "grad_norm": 1.484375, "learning_rate": 4.326972343499025e-06, "loss": 0.3323, "step": 18458 }, { "epoch": 0.8105645630481794, "grad_norm": 1.5, "learning_rate": 4.323082859938343e-06, "loss": 0.3349, "step": 18460 }, { "epoch": 0.810652381527383, "grad_norm": 1.5078125, "learning_rate": 4.319194959820941e-06, "loss": 0.312, "step": 18462 }, { "epoch": 0.8107402000065864, "grad_norm": 1.5625, "learning_rate": 4.315308643444537e-06, "loss": 0.3173, "step": 18464 }, { "epoch": 0.8108280184857899, "grad_norm": 1.5, "learning_rate": 4.3114239111067625e-06, "loss": 0.3198, "step": 18466 }, { "epoch": 0.8109158369649934, "grad_norm": 1.46875, "learning_rate": 4.307540763105103e-06, "loss": 0.3422, "step": 18468 }, { "epoch": 0.8110036554441968, "grad_norm": 1.4609375, "learning_rate": 4.303659199736934e-06, "loss": 0.3171, "step": 18470 }, { "epoch": 0.8110914739234003, "grad_norm": 1.5078125, "learning_rate": 4.299779221299499e-06, "loss": 0.3135, "step": 18472 }, { "epoch": 0.8111792924026038, "grad_norm": 1.546875, "learning_rate": 4.295900828089938e-06, "loss": 0.3031, "step": 18474 }, { "epoch": 0.8112671108818073, "grad_norm": 1.625, "learning_rate": 4.292024020405255e-06, "loss": 0.3092, "step": 18476 }, { "epoch": 0.8113549293610108, "grad_norm": 1.59375, "learning_rate": 4.288148798542332e-06, "loss": 0.3243, "step": 18478 }, { "epoch": 0.8114427478402143, "grad_norm": 1.59375, "learning_rate": 4.284275162797943e-06, "loss": 0.3145, "step": 18480 }, { "epoch": 0.8115305663194178, "grad_norm": 1.484375, "learning_rate": 4.2804031134687255e-06, "loss": 0.2968, "step": 18482 }, { "epoch": 0.8116183847986213, "grad_norm": 1.4609375, "learning_rate": 4.276532650851206e-06, "loss": 0.3296, "step": 18484 }, { "epoch": 0.8117062032778247, "grad_norm": 1.484375, "learning_rate": 4.272663775241787e-06, "loss": 0.3132, "step": 18486 }, { "epoch": 0.8117940217570282, "grad_norm": 1.5, "learning_rate": 4.268796486936738e-06, "loss": 0.3123, "step": 18488 }, { "epoch": 0.8118818402362317, "grad_norm": 1.4921875, "learning_rate": 4.264930786232227e-06, "loss": 0.3005, "step": 18490 }, { "epoch": 0.8119696587154352, "grad_norm": 1.5, "learning_rate": 4.2610666734242825e-06, "loss": 0.3299, "step": 18492 }, { "epoch": 0.8120574771946387, "grad_norm": 1.5, "learning_rate": 4.2572041488088325e-06, "loss": 0.2917, "step": 18494 }, { "epoch": 0.8121452956738422, "grad_norm": 1.5546875, "learning_rate": 4.253343212681657e-06, "loss": 0.335, "step": 18496 }, { "epoch": 0.8122331141530457, "grad_norm": 1.5078125, "learning_rate": 4.249483865338435e-06, "loss": 0.324, "step": 18498 }, { "epoch": 0.8123209326322491, "grad_norm": 1.546875, "learning_rate": 4.24562610707471e-06, "loss": 0.3325, "step": 18500 }, { "epoch": 0.8124087511114526, "grad_norm": 1.4375, "learning_rate": 4.241769938185907e-06, "loss": 0.3297, "step": 18502 }, { "epoch": 0.8124965695906561, "grad_norm": 1.4296875, "learning_rate": 4.237915358967348e-06, "loss": 0.3245, "step": 18504 }, { "epoch": 0.8125843880698596, "grad_norm": 1.453125, "learning_rate": 4.234062369714198e-06, "loss": 0.2839, "step": 18506 }, { "epoch": 0.8126722065490631, "grad_norm": 1.4609375, "learning_rate": 4.230210970721538e-06, "loss": 0.3228, "step": 18508 }, { "epoch": 0.8127600250282666, "grad_norm": 1.4375, "learning_rate": 4.226361162284298e-06, "loss": 0.3114, "step": 18510 }, { "epoch": 0.8128478435074701, "grad_norm": 1.5859375, "learning_rate": 4.222512944697296e-06, "loss": 0.3034, "step": 18512 }, { "epoch": 0.8129356619866736, "grad_norm": 1.546875, "learning_rate": 4.218666318255238e-06, "loss": 0.2847, "step": 18514 }, { "epoch": 0.813023480465877, "grad_norm": 1.453125, "learning_rate": 4.21482128325269e-06, "loss": 0.3017, "step": 18516 }, { "epoch": 0.8131112989450805, "grad_norm": 1.6171875, "learning_rate": 4.210977839984117e-06, "loss": 0.3169, "step": 18518 }, { "epoch": 0.813199117424284, "grad_norm": 1.5625, "learning_rate": 4.207135988743844e-06, "loss": 0.3233, "step": 18520 }, { "epoch": 0.8132869359034874, "grad_norm": 1.5625, "learning_rate": 4.203295729826076e-06, "loss": 0.3397, "step": 18522 }, { "epoch": 0.813374754382691, "grad_norm": 1.453125, "learning_rate": 4.199457063524911e-06, "loss": 0.3195, "step": 18524 }, { "epoch": 0.8134625728618945, "grad_norm": 1.53125, "learning_rate": 4.1956199901343055e-06, "loss": 0.3229, "step": 18526 }, { "epoch": 0.813550391341098, "grad_norm": 1.53125, "learning_rate": 4.191784509948121e-06, "loss": 0.2953, "step": 18528 }, { "epoch": 0.8136382098203014, "grad_norm": 1.515625, "learning_rate": 4.187950623260053e-06, "loss": 0.3143, "step": 18530 }, { "epoch": 0.8137260282995049, "grad_norm": 1.4921875, "learning_rate": 4.184118330363721e-06, "loss": 0.3555, "step": 18532 }, { "epoch": 0.8138138467787084, "grad_norm": 1.4296875, "learning_rate": 4.180287631552593e-06, "loss": 0.3137, "step": 18534 }, { "epoch": 0.8139016652579119, "grad_norm": 1.46875, "learning_rate": 4.176458527120034e-06, "loss": 0.3324, "step": 18536 }, { "epoch": 0.8139894837371154, "grad_norm": 1.5546875, "learning_rate": 4.172631017359274e-06, "loss": 0.3093, "step": 18538 }, { "epoch": 0.8140773022163189, "grad_norm": 1.5390625, "learning_rate": 4.168805102563414e-06, "loss": 0.3012, "step": 18540 }, { "epoch": 0.8141651206955224, "grad_norm": 1.484375, "learning_rate": 4.164980783025463e-06, "loss": 0.2968, "step": 18542 }, { "epoch": 0.8142529391747259, "grad_norm": 1.5234375, "learning_rate": 4.1611580590382695e-06, "loss": 0.3131, "step": 18544 }, { "epoch": 0.8143407576539293, "grad_norm": 1.4453125, "learning_rate": 4.157336930894593e-06, "loss": 0.3175, "step": 18546 }, { "epoch": 0.8144285761331328, "grad_norm": 1.484375, "learning_rate": 4.153517398887053e-06, "loss": 0.2955, "step": 18548 }, { "epoch": 0.8145163946123363, "grad_norm": 1.5, "learning_rate": 4.14969946330814e-06, "loss": 0.3392, "step": 18550 }, { "epoch": 0.8146042130915397, "grad_norm": 1.5703125, "learning_rate": 4.145883124450245e-06, "loss": 0.3102, "step": 18552 }, { "epoch": 0.8146920315707433, "grad_norm": 1.4453125, "learning_rate": 4.142068382605615e-06, "loss": 0.3169, "step": 18554 }, { "epoch": 0.8147798500499468, "grad_norm": 1.5, "learning_rate": 4.138255238066397e-06, "loss": 0.3488, "step": 18556 }, { "epoch": 0.8148676685291503, "grad_norm": 1.6015625, "learning_rate": 4.13444369112459e-06, "loss": 0.3287, "step": 18558 }, { "epoch": 0.8149554870083537, "grad_norm": 1.40625, "learning_rate": 4.130633742072087e-06, "loss": 0.3307, "step": 18560 }, { "epoch": 0.8150433054875572, "grad_norm": 1.5703125, "learning_rate": 4.126825391200656e-06, "loss": 0.3352, "step": 18562 }, { "epoch": 0.8151311239667607, "grad_norm": 1.5234375, "learning_rate": 4.123018638801935e-06, "loss": 0.3291, "step": 18564 }, { "epoch": 0.8152189424459642, "grad_norm": 1.4609375, "learning_rate": 4.119213485167456e-06, "loss": 0.3104, "step": 18566 }, { "epoch": 0.8153067609251676, "grad_norm": 1.4765625, "learning_rate": 4.115409930588606e-06, "loss": 0.312, "step": 18568 }, { "epoch": 0.8153945794043712, "grad_norm": 1.5390625, "learning_rate": 4.111607975356679e-06, "loss": 0.3169, "step": 18570 }, { "epoch": 0.8154823978835747, "grad_norm": 1.5625, "learning_rate": 4.1078076197628214e-06, "loss": 0.3016, "step": 18572 }, { "epoch": 0.8155702163627782, "grad_norm": 1.4609375, "learning_rate": 4.104008864098055e-06, "loss": 0.3218, "step": 18574 }, { "epoch": 0.8156580348419816, "grad_norm": 1.4765625, "learning_rate": 4.100211708653306e-06, "loss": 0.305, "step": 18576 }, { "epoch": 0.8157458533211851, "grad_norm": 1.5, "learning_rate": 4.0964161537193486e-06, "loss": 0.3193, "step": 18578 }, { "epoch": 0.8158336718003886, "grad_norm": 1.4609375, "learning_rate": 4.092622199586859e-06, "loss": 0.3265, "step": 18580 }, { "epoch": 0.815921490279592, "grad_norm": 1.5390625, "learning_rate": 4.088829846546374e-06, "loss": 0.2984, "step": 18582 }, { "epoch": 0.8160093087587955, "grad_norm": 1.6015625, "learning_rate": 4.085039094888307e-06, "loss": 0.3128, "step": 18584 }, { "epoch": 0.8160971272379991, "grad_norm": 1.5078125, "learning_rate": 4.0812499449029624e-06, "loss": 0.3125, "step": 18586 }, { "epoch": 0.8161849457172026, "grad_norm": 1.46875, "learning_rate": 4.077462396880508e-06, "loss": 0.3213, "step": 18588 }, { "epoch": 0.816272764196406, "grad_norm": 1.4375, "learning_rate": 4.073676451111011e-06, "loss": 0.2875, "step": 18590 }, { "epoch": 0.8163605826756095, "grad_norm": 1.4609375, "learning_rate": 4.069892107884374e-06, "loss": 0.3203, "step": 18592 }, { "epoch": 0.816448401154813, "grad_norm": 1.4921875, "learning_rate": 4.066109367490426e-06, "loss": 0.3497, "step": 18594 }, { "epoch": 0.8165362196340165, "grad_norm": 1.4921875, "learning_rate": 4.062328230218831e-06, "loss": 0.2914, "step": 18596 }, { "epoch": 0.8166240381132199, "grad_norm": 1.640625, "learning_rate": 4.0585486963591655e-06, "loss": 0.3261, "step": 18598 }, { "epoch": 0.8167118565924235, "grad_norm": 1.46875, "learning_rate": 4.0547707662008634e-06, "loss": 0.3163, "step": 18600 }, { "epoch": 0.816799675071627, "grad_norm": 1.5, "learning_rate": 4.050994440033229e-06, "loss": 0.3222, "step": 18602 }, { "epoch": 0.8168874935508305, "grad_norm": 1.5234375, "learning_rate": 4.04721971814547e-06, "loss": 0.3136, "step": 18604 }, { "epoch": 0.8169753120300339, "grad_norm": 1.4921875, "learning_rate": 4.0434466008266395e-06, "loss": 0.3103, "step": 18606 }, { "epoch": 0.8170631305092374, "grad_norm": 1.4296875, "learning_rate": 4.0396750883657e-06, "loss": 0.3185, "step": 18608 }, { "epoch": 0.8171509489884409, "grad_norm": 1.5, "learning_rate": 4.035905181051464e-06, "loss": 0.3172, "step": 18610 }, { "epoch": 0.8172387674676443, "grad_norm": 1.515625, "learning_rate": 4.0321368791726325e-06, "loss": 0.3128, "step": 18612 }, { "epoch": 0.8173265859468478, "grad_norm": 1.3984375, "learning_rate": 4.028370183017788e-06, "loss": 0.3079, "step": 18614 }, { "epoch": 0.8174144044260514, "grad_norm": 1.578125, "learning_rate": 4.024605092875378e-06, "loss": 0.3365, "step": 18616 }, { "epoch": 0.8175022229052549, "grad_norm": 1.46875, "learning_rate": 4.020841609033743e-06, "loss": 0.3228, "step": 18618 }, { "epoch": 0.8175900413844583, "grad_norm": 1.4296875, "learning_rate": 4.0170797317810875e-06, "loss": 0.3432, "step": 18620 }, { "epoch": 0.8176778598636618, "grad_norm": 1.46875, "learning_rate": 4.013319461405493e-06, "loss": 0.3294, "step": 18622 }, { "epoch": 0.8177656783428653, "grad_norm": 1.4296875, "learning_rate": 4.009560798194928e-06, "loss": 0.3218, "step": 18624 }, { "epoch": 0.8178534968220688, "grad_norm": 1.4375, "learning_rate": 4.005803742437222e-06, "loss": 0.3066, "step": 18626 }, { "epoch": 0.8179413153012722, "grad_norm": 1.5234375, "learning_rate": 4.002048294420105e-06, "loss": 0.3282, "step": 18628 }, { "epoch": 0.8180291337804757, "grad_norm": 1.4453125, "learning_rate": 3.998294454431157e-06, "loss": 0.3375, "step": 18630 }, { "epoch": 0.8181169522596793, "grad_norm": 1.5625, "learning_rate": 3.99454222275786e-06, "loss": 0.3129, "step": 18632 }, { "epoch": 0.8182047707388828, "grad_norm": 1.3828125, "learning_rate": 3.990791599687554e-06, "loss": 0.332, "step": 18634 }, { "epoch": 0.8182925892180862, "grad_norm": 1.578125, "learning_rate": 3.987042585507458e-06, "loss": 0.325, "step": 18636 }, { "epoch": 0.8183804076972897, "grad_norm": 1.5625, "learning_rate": 3.983295180504685e-06, "loss": 0.2998, "step": 18638 }, { "epoch": 0.8184682261764932, "grad_norm": 1.4921875, "learning_rate": 3.979549384966197e-06, "loss": 0.2963, "step": 18640 }, { "epoch": 0.8185560446556966, "grad_norm": 1.4765625, "learning_rate": 3.975805199178865e-06, "loss": 0.3278, "step": 18642 }, { "epoch": 0.8186438631349001, "grad_norm": 1.4765625, "learning_rate": 3.972062623429409e-06, "loss": 0.3575, "step": 18644 }, { "epoch": 0.8187316816141037, "grad_norm": 1.453125, "learning_rate": 3.968321658004431e-06, "loss": 0.3186, "step": 18646 }, { "epoch": 0.8188195000933072, "grad_norm": 1.5859375, "learning_rate": 3.964582303190428e-06, "loss": 0.3378, "step": 18648 }, { "epoch": 0.8189073185725106, "grad_norm": 1.578125, "learning_rate": 3.9608445592737575e-06, "loss": 0.311, "step": 18650 }, { "epoch": 0.8189951370517141, "grad_norm": 1.515625, "learning_rate": 3.957108426540654e-06, "loss": 0.2803, "step": 18652 }, { "epoch": 0.8190829555309176, "grad_norm": 1.4453125, "learning_rate": 3.953373905277222e-06, "loss": 0.2866, "step": 18654 }, { "epoch": 0.8191707740101211, "grad_norm": 1.5, "learning_rate": 3.949640995769471e-06, "loss": 0.3107, "step": 18656 }, { "epoch": 0.8192585924893245, "grad_norm": 1.4375, "learning_rate": 3.945909698303249e-06, "loss": 0.321, "step": 18658 }, { "epoch": 0.819346410968528, "grad_norm": 1.5625, "learning_rate": 3.942180013164318e-06, "loss": 0.305, "step": 18660 }, { "epoch": 0.8194342294477316, "grad_norm": 1.4453125, "learning_rate": 3.938451940638291e-06, "loss": 0.3157, "step": 18662 }, { "epoch": 0.8195220479269351, "grad_norm": 1.5, "learning_rate": 3.934725481010653e-06, "loss": 0.3003, "step": 18664 }, { "epoch": 0.8196098664061385, "grad_norm": 1.4921875, "learning_rate": 3.931000634566798e-06, "loss": 0.343, "step": 18666 }, { "epoch": 0.819697684885342, "grad_norm": 1.4921875, "learning_rate": 3.927277401591956e-06, "loss": 0.3026, "step": 18668 }, { "epoch": 0.8197855033645455, "grad_norm": 1.5, "learning_rate": 3.923555782371269e-06, "loss": 0.3104, "step": 18670 }, { "epoch": 0.819873321843749, "grad_norm": 1.5703125, "learning_rate": 3.919835777189732e-06, "loss": 0.3062, "step": 18672 }, { "epoch": 0.8199611403229524, "grad_norm": 1.4921875, "learning_rate": 3.916117386332219e-06, "loss": 0.31, "step": 18674 }, { "epoch": 0.8200489588021559, "grad_norm": 1.5390625, "learning_rate": 3.912400610083494e-06, "loss": 0.2994, "step": 18676 }, { "epoch": 0.8201367772813595, "grad_norm": 1.5703125, "learning_rate": 3.908685448728183e-06, "loss": 0.3164, "step": 18678 }, { "epoch": 0.820224595760563, "grad_norm": 1.5, "learning_rate": 3.9049719025508e-06, "loss": 0.3125, "step": 18680 }, { "epoch": 0.8203124142397664, "grad_norm": 1.5234375, "learning_rate": 3.901259971835728e-06, "loss": 0.3175, "step": 18682 }, { "epoch": 0.8204002327189699, "grad_norm": 1.5234375, "learning_rate": 3.897549656867222e-06, "loss": 0.3205, "step": 18684 }, { "epoch": 0.8204880511981734, "grad_norm": 1.484375, "learning_rate": 3.893840957929423e-06, "loss": 0.3137, "step": 18686 }, { "epoch": 0.8205758696773768, "grad_norm": 1.578125, "learning_rate": 3.8901338753063375e-06, "loss": 0.3512, "step": 18688 }, { "epoch": 0.8206636881565803, "grad_norm": 1.4609375, "learning_rate": 3.886428409281867e-06, "loss": 0.3016, "step": 18690 }, { "epoch": 0.8207515066357839, "grad_norm": 1.578125, "learning_rate": 3.882724560139764e-06, "loss": 0.3361, "step": 18692 }, { "epoch": 0.8208393251149874, "grad_norm": 1.5, "learning_rate": 3.879022328163681e-06, "loss": 0.3345, "step": 18694 }, { "epoch": 0.8209271435941908, "grad_norm": 1.6328125, "learning_rate": 3.875321713637131e-06, "loss": 0.3463, "step": 18696 }, { "epoch": 0.8210149620733943, "grad_norm": 1.5703125, "learning_rate": 3.8716227168435035e-06, "loss": 0.3222, "step": 18698 }, { "epoch": 0.8211027805525978, "grad_norm": 1.5, "learning_rate": 3.867925338066078e-06, "loss": 0.3165, "step": 18700 }, { "epoch": 0.8211905990318012, "grad_norm": 1.53125, "learning_rate": 3.864229577587991e-06, "loss": 0.2992, "step": 18702 }, { "epoch": 0.8212784175110047, "grad_norm": 1.5234375, "learning_rate": 3.860535435692275e-06, "loss": 0.3192, "step": 18704 }, { "epoch": 0.8213662359902082, "grad_norm": 1.5078125, "learning_rate": 3.856842912661823e-06, "loss": 0.3102, "step": 18706 }, { "epoch": 0.8214540544694118, "grad_norm": 1.4609375, "learning_rate": 3.853152008779401e-06, "loss": 0.3308, "step": 18708 }, { "epoch": 0.8215418729486152, "grad_norm": 1.453125, "learning_rate": 3.8494627243276764e-06, "loss": 0.3053, "step": 18710 }, { "epoch": 0.8216296914278187, "grad_norm": 1.4453125, "learning_rate": 3.8457750595891656e-06, "loss": 0.3065, "step": 18712 }, { "epoch": 0.8217175099070222, "grad_norm": 1.4609375, "learning_rate": 3.84208901484627e-06, "loss": 0.3132, "step": 18714 }, { "epoch": 0.8218053283862257, "grad_norm": 1.53125, "learning_rate": 3.838404590381267e-06, "loss": 0.3347, "step": 18716 }, { "epoch": 0.8218931468654291, "grad_norm": 1.46875, "learning_rate": 3.834721786476317e-06, "loss": 0.2712, "step": 18718 }, { "epoch": 0.8219809653446326, "grad_norm": 1.4921875, "learning_rate": 3.831040603413441e-06, "loss": 0.3004, "step": 18720 }, { "epoch": 0.8220687838238361, "grad_norm": 1.5078125, "learning_rate": 3.827361041474556e-06, "loss": 0.3115, "step": 18722 }, { "epoch": 0.8221566023030397, "grad_norm": 1.46875, "learning_rate": 3.823683100941436e-06, "loss": 0.3081, "step": 18724 }, { "epoch": 0.8222444207822431, "grad_norm": 1.5, "learning_rate": 3.820006782095736e-06, "loss": 0.3414, "step": 18726 }, { "epoch": 0.8223322392614466, "grad_norm": 1.4921875, "learning_rate": 3.816332085218999e-06, "loss": 0.3236, "step": 18728 }, { "epoch": 0.8224200577406501, "grad_norm": 1.4921875, "learning_rate": 3.812659010592626e-06, "loss": 0.3281, "step": 18730 }, { "epoch": 0.8225078762198536, "grad_norm": 1.4453125, "learning_rate": 3.808987558497906e-06, "loss": 0.302, "step": 18732 }, { "epoch": 0.822595694699057, "grad_norm": 1.4765625, "learning_rate": 3.8053177292160015e-06, "loss": 0.3059, "step": 18734 }, { "epoch": 0.8226835131782605, "grad_norm": 1.4765625, "learning_rate": 3.801649523027942e-06, "loss": 0.325, "step": 18736 }, { "epoch": 0.822771331657464, "grad_norm": 1.5703125, "learning_rate": 3.7979829402146477e-06, "loss": 0.3035, "step": 18738 }, { "epoch": 0.8228591501366675, "grad_norm": 1.46875, "learning_rate": 3.794317981056894e-06, "loss": 0.3197, "step": 18740 }, { "epoch": 0.822946968615871, "grad_norm": 1.4765625, "learning_rate": 3.7906546458353677e-06, "loss": 0.3018, "step": 18742 }, { "epoch": 0.8230347870950745, "grad_norm": 1.4375, "learning_rate": 3.78699293483058e-06, "loss": 0.3275, "step": 18744 }, { "epoch": 0.823122605574278, "grad_norm": 1.484375, "learning_rate": 3.783332848322965e-06, "loss": 0.3183, "step": 18746 }, { "epoch": 0.8232104240534814, "grad_norm": 1.484375, "learning_rate": 3.7796743865928045e-06, "loss": 0.3437, "step": 18748 }, { "epoch": 0.8232982425326849, "grad_norm": 1.3984375, "learning_rate": 3.776017549920263e-06, "loss": 0.3303, "step": 18750 }, { "epoch": 0.8233860610118884, "grad_norm": 1.453125, "learning_rate": 3.772362338585389e-06, "loss": 0.3062, "step": 18752 }, { "epoch": 0.823473879491092, "grad_norm": 1.4609375, "learning_rate": 3.7687087528680915e-06, "loss": 0.3194, "step": 18754 }, { "epoch": 0.8235616979702954, "grad_norm": 1.546875, "learning_rate": 3.7650567930481716e-06, "loss": 0.3231, "step": 18756 }, { "epoch": 0.8236495164494989, "grad_norm": 1.5, "learning_rate": 3.761406459405292e-06, "loss": 0.3186, "step": 18758 }, { "epoch": 0.8237373349287024, "grad_norm": 1.453125, "learning_rate": 3.7577577522189935e-06, "loss": 0.3101, "step": 18760 }, { "epoch": 0.8238251534079059, "grad_norm": 1.5703125, "learning_rate": 3.754110671768704e-06, "loss": 0.313, "step": 18762 }, { "epoch": 0.8239129718871093, "grad_norm": 1.4140625, "learning_rate": 3.750465218333704e-06, "loss": 0.3161, "step": 18764 }, { "epoch": 0.8240007903663128, "grad_norm": 1.546875, "learning_rate": 3.746821392193181e-06, "loss": 0.3347, "step": 18766 }, { "epoch": 0.8240886088455163, "grad_norm": 1.484375, "learning_rate": 3.7431791936261672e-06, "loss": 0.3277, "step": 18768 }, { "epoch": 0.8241764273247199, "grad_norm": 1.5546875, "learning_rate": 3.739538622911584e-06, "loss": 0.3013, "step": 18770 }, { "epoch": 0.8242642458039233, "grad_norm": 1.5, "learning_rate": 3.7358996803282335e-06, "loss": 0.3254, "step": 18772 }, { "epoch": 0.8243520642831268, "grad_norm": 1.4296875, "learning_rate": 3.7322623661547816e-06, "loss": 0.3235, "step": 18774 }, { "epoch": 0.8244398827623303, "grad_norm": 1.4375, "learning_rate": 3.7286266806697777e-06, "loss": 0.3175, "step": 18776 }, { "epoch": 0.8245277012415337, "grad_norm": 1.4375, "learning_rate": 3.7249926241516353e-06, "loss": 0.2844, "step": 18778 }, { "epoch": 0.8246155197207372, "grad_norm": 1.5234375, "learning_rate": 3.721360196878662e-06, "loss": 0.3042, "step": 18780 }, { "epoch": 0.8247033381999407, "grad_norm": 1.4765625, "learning_rate": 3.7177293991290273e-06, "loss": 0.3065, "step": 18782 }, { "epoch": 0.8247911566791442, "grad_norm": 1.5078125, "learning_rate": 3.7141002311807698e-06, "loss": 0.2974, "step": 18784 }, { "epoch": 0.8248789751583477, "grad_norm": 1.4375, "learning_rate": 3.7104726933118282e-06, "loss": 0.3056, "step": 18786 }, { "epoch": 0.8249667936375512, "grad_norm": 1.421875, "learning_rate": 3.7068467857999832e-06, "loss": 0.3465, "step": 18788 }, { "epoch": 0.8250546121167547, "grad_norm": 1.515625, "learning_rate": 3.703222508922921e-06, "loss": 0.3242, "step": 18790 }, { "epoch": 0.8251424305959582, "grad_norm": 1.5625, "learning_rate": 3.699599862958178e-06, "loss": 0.3172, "step": 18792 }, { "epoch": 0.8252302490751616, "grad_norm": 1.6484375, "learning_rate": 3.69597884818319e-06, "loss": 0.3146, "step": 18794 }, { "epoch": 0.8253180675543651, "grad_norm": 1.4296875, "learning_rate": 3.69235946487525e-06, "loss": 0.3353, "step": 18796 }, { "epoch": 0.8254058860335686, "grad_norm": 1.4609375, "learning_rate": 3.688741713311522e-06, "loss": 0.3312, "step": 18798 }, { "epoch": 0.8254937045127722, "grad_norm": 1.453125, "learning_rate": 3.6851255937690704e-06, "loss": 0.3205, "step": 18800 }, { "epoch": 0.8255815229919756, "grad_norm": 1.4375, "learning_rate": 3.6815111065248043e-06, "loss": 0.2822, "step": 18802 }, { "epoch": 0.8256693414711791, "grad_norm": 1.671875, "learning_rate": 3.677898251855538e-06, "loss": 0.3132, "step": 18804 }, { "epoch": 0.8257571599503826, "grad_norm": 1.53125, "learning_rate": 3.6742870300379232e-06, "loss": 0.3054, "step": 18806 }, { "epoch": 0.825844978429586, "grad_norm": 1.4453125, "learning_rate": 3.6706774413485275e-06, "loss": 0.2958, "step": 18808 }, { "epoch": 0.8259327969087895, "grad_norm": 1.5078125, "learning_rate": 3.667069486063765e-06, "loss": 0.3208, "step": 18810 }, { "epoch": 0.826020615387993, "grad_norm": 1.515625, "learning_rate": 3.6634631644599295e-06, "loss": 0.3191, "step": 18812 }, { "epoch": 0.8261084338671965, "grad_norm": 1.5, "learning_rate": 3.659858476813205e-06, "loss": 0.3186, "step": 18814 }, { "epoch": 0.8261962523464, "grad_norm": 1.46875, "learning_rate": 3.6562554233996295e-06, "loss": 0.3111, "step": 18816 }, { "epoch": 0.8262840708256035, "grad_norm": 1.4453125, "learning_rate": 3.6526540044951346e-06, "loss": 0.3027, "step": 18818 }, { "epoch": 0.826371889304807, "grad_norm": 1.4140625, "learning_rate": 3.649054220375514e-06, "loss": 0.2807, "step": 18820 }, { "epoch": 0.8264597077840105, "grad_norm": 1.5, "learning_rate": 3.6454560713164334e-06, "loss": 0.3191, "step": 18822 }, { "epoch": 0.8265475262632139, "grad_norm": 1.484375, "learning_rate": 3.6418595575934524e-06, "loss": 0.3282, "step": 18824 }, { "epoch": 0.8266353447424174, "grad_norm": 1.4609375, "learning_rate": 3.6382646794819785e-06, "loss": 0.3097, "step": 18826 }, { "epoch": 0.8267231632216209, "grad_norm": 1.578125, "learning_rate": 3.6346714372573224e-06, "loss": 0.3229, "step": 18828 }, { "epoch": 0.8268109817008243, "grad_norm": 1.46875, "learning_rate": 3.6310798311946503e-06, "loss": 0.3392, "step": 18830 }, { "epoch": 0.8268988001800279, "grad_norm": 1.421875, "learning_rate": 3.627489861569003e-06, "loss": 0.2793, "step": 18832 }, { "epoch": 0.8269866186592314, "grad_norm": 1.484375, "learning_rate": 3.623901528655313e-06, "loss": 0.3158, "step": 18834 }, { "epoch": 0.8270744371384349, "grad_norm": 1.4375, "learning_rate": 3.620314832728361e-06, "loss": 0.3022, "step": 18836 }, { "epoch": 0.8271622556176383, "grad_norm": 1.515625, "learning_rate": 3.616729774062827e-06, "loss": 0.2894, "step": 18838 }, { "epoch": 0.8272500740968418, "grad_norm": 1.53125, "learning_rate": 3.613146352933247e-06, "loss": 0.2916, "step": 18840 }, { "epoch": 0.8273378925760453, "grad_norm": 1.3828125, "learning_rate": 3.6095645696140547e-06, "loss": 0.3082, "step": 18842 }, { "epoch": 0.8274257110552488, "grad_norm": 1.3984375, "learning_rate": 3.6059844243795327e-06, "loss": 0.3201, "step": 18844 }, { "epoch": 0.8275135295344523, "grad_norm": 1.765625, "learning_rate": 3.6024059175038455e-06, "loss": 0.293, "step": 18846 }, { "epoch": 0.8276013480136558, "grad_norm": 1.4453125, "learning_rate": 3.5988290492610488e-06, "loss": 0.3128, "step": 18848 }, { "epoch": 0.8276891664928593, "grad_norm": 1.40625, "learning_rate": 3.5952538199250515e-06, "loss": 0.2913, "step": 18850 }, { "epoch": 0.8277769849720628, "grad_norm": 1.46875, "learning_rate": 3.5916802297696506e-06, "loss": 0.3064, "step": 18852 }, { "epoch": 0.8278648034512662, "grad_norm": 1.546875, "learning_rate": 3.5881082790685026e-06, "loss": 0.322, "step": 18854 }, { "epoch": 0.8279526219304697, "grad_norm": 1.4765625, "learning_rate": 3.5845379680951614e-06, "loss": 0.3225, "step": 18856 }, { "epoch": 0.8280404404096732, "grad_norm": 1.5390625, "learning_rate": 3.580969297123038e-06, "loss": 0.3068, "step": 18858 }, { "epoch": 0.8281282588888766, "grad_norm": 1.7578125, "learning_rate": 3.577402266425414e-06, "loss": 0.3302, "step": 18860 }, { "epoch": 0.8282160773680802, "grad_norm": 1.5078125, "learning_rate": 3.573836876275466e-06, "loss": 0.3012, "step": 18862 }, { "epoch": 0.8283038958472837, "grad_norm": 1.5, "learning_rate": 3.5702731269462193e-06, "loss": 0.338, "step": 18864 }, { "epoch": 0.8283917143264872, "grad_norm": 1.4609375, "learning_rate": 3.5667110187106056e-06, "loss": 0.3062, "step": 18866 }, { "epoch": 0.8284795328056906, "grad_norm": 1.4140625, "learning_rate": 3.5631505518413904e-06, "loss": 0.2983, "step": 18868 }, { "epoch": 0.8285673512848941, "grad_norm": 1.5234375, "learning_rate": 3.5595917266112474e-06, "loss": 0.309, "step": 18870 }, { "epoch": 0.8286551697640976, "grad_norm": 1.546875, "learning_rate": 3.5560345432927104e-06, "loss": 0.3137, "step": 18872 }, { "epoch": 0.8287429882433011, "grad_norm": 1.4453125, "learning_rate": 3.552479002158185e-06, "loss": 0.3057, "step": 18874 }, { "epoch": 0.8288308067225045, "grad_norm": 1.4765625, "learning_rate": 3.5489251034799608e-06, "loss": 0.328, "step": 18876 }, { "epoch": 0.8289186252017081, "grad_norm": 1.46875, "learning_rate": 3.545372847530193e-06, "loss": 0.3488, "step": 18878 }, { "epoch": 0.8290064436809116, "grad_norm": 1.5390625, "learning_rate": 3.5418222345809186e-06, "loss": 0.3083, "step": 18880 }, { "epoch": 0.8290942621601151, "grad_norm": 1.6640625, "learning_rate": 3.5382732649040405e-06, "loss": 0.3395, "step": 18882 }, { "epoch": 0.8291820806393185, "grad_norm": 1.53125, "learning_rate": 3.5347259387713354e-06, "loss": 0.314, "step": 18884 }, { "epoch": 0.829269899118522, "grad_norm": 1.5078125, "learning_rate": 3.5311802564544723e-06, "loss": 0.2924, "step": 18886 }, { "epoch": 0.8293577175977255, "grad_norm": 1.46875, "learning_rate": 3.527636218224961e-06, "loss": 0.3303, "step": 18888 }, { "epoch": 0.829445536076929, "grad_norm": 1.578125, "learning_rate": 3.5240938243542244e-06, "loss": 0.3289, "step": 18890 }, { "epoch": 0.8295333545561325, "grad_norm": 1.4375, "learning_rate": 3.5205530751135307e-06, "loss": 0.3181, "step": 18892 }, { "epoch": 0.829621173035336, "grad_norm": 1.6015625, "learning_rate": 3.5170139707740247e-06, "loss": 0.3109, "step": 18894 }, { "epoch": 0.8297089915145395, "grad_norm": 1.5625, "learning_rate": 3.5134765116067504e-06, "loss": 0.3041, "step": 18896 }, { "epoch": 0.829796809993743, "grad_norm": 1.453125, "learning_rate": 3.5099406978825857e-06, "loss": 0.3023, "step": 18898 }, { "epoch": 0.8298846284729464, "grad_norm": 1.578125, "learning_rate": 3.5064065298723163e-06, "loss": 0.3104, "step": 18900 }, { "epoch": 0.8299724469521499, "grad_norm": 1.46875, "learning_rate": 3.502874007846585e-06, "loss": 0.3267, "step": 18902 }, { "epoch": 0.8300602654313534, "grad_norm": 1.4609375, "learning_rate": 3.499343132075919e-06, "loss": 0.2919, "step": 18904 }, { "epoch": 0.8301480839105568, "grad_norm": 1.59375, "learning_rate": 3.495813902830711e-06, "loss": 0.3234, "step": 18906 }, { "epoch": 0.8302359023897604, "grad_norm": 1.4609375, "learning_rate": 3.492286320381222e-06, "loss": 0.3001, "step": 18908 }, { "epoch": 0.8303237208689639, "grad_norm": 1.5859375, "learning_rate": 3.488760384997608e-06, "loss": 0.2751, "step": 18910 }, { "epoch": 0.8304115393481674, "grad_norm": 1.53125, "learning_rate": 3.485236096949876e-06, "loss": 0.2929, "step": 18912 }, { "epoch": 0.8304993578273708, "grad_norm": 1.4453125, "learning_rate": 3.4817134565079264e-06, "loss": 0.3345, "step": 18914 }, { "epoch": 0.8305871763065743, "grad_norm": 1.53125, "learning_rate": 3.478192463941518e-06, "loss": 0.3347, "step": 18916 }, { "epoch": 0.8306749947857778, "grad_norm": 1.546875, "learning_rate": 3.4746731195202857e-06, "loss": 0.3297, "step": 18918 }, { "epoch": 0.8307628132649812, "grad_norm": 1.484375, "learning_rate": 3.47115542351375e-06, "loss": 0.2782, "step": 18920 }, { "epoch": 0.8308506317441847, "grad_norm": 1.375, "learning_rate": 3.467639376191287e-06, "loss": 0.3137, "step": 18922 }, { "epoch": 0.8309384502233883, "grad_norm": 1.4140625, "learning_rate": 3.4641249778221694e-06, "loss": 0.3205, "step": 18924 }, { "epoch": 0.8310262687025918, "grad_norm": 1.4609375, "learning_rate": 3.460612228675522e-06, "loss": 0.3171, "step": 18926 }, { "epoch": 0.8311140871817952, "grad_norm": 1.484375, "learning_rate": 3.4571011290203543e-06, "loss": 0.2997, "step": 18928 }, { "epoch": 0.8312019056609987, "grad_norm": 1.46875, "learning_rate": 3.4535916791255424e-06, "loss": 0.3184, "step": 18930 }, { "epoch": 0.8312897241402022, "grad_norm": 1.53125, "learning_rate": 3.4500838792598467e-06, "loss": 0.3275, "step": 18932 }, { "epoch": 0.8313775426194057, "grad_norm": 1.5, "learning_rate": 3.4465777296918965e-06, "loss": 0.3294, "step": 18934 }, { "epoch": 0.8314653610986091, "grad_norm": 1.4765625, "learning_rate": 3.443073230690183e-06, "loss": 0.3117, "step": 18936 }, { "epoch": 0.8315531795778126, "grad_norm": 1.484375, "learning_rate": 3.439570382523094e-06, "loss": 0.2968, "step": 18938 }, { "epoch": 0.8316409980570162, "grad_norm": 1.484375, "learning_rate": 3.4360691854588707e-06, "loss": 0.3137, "step": 18940 }, { "epoch": 0.8317288165362197, "grad_norm": 1.4765625, "learning_rate": 3.4325696397656425e-06, "loss": 0.3005, "step": 18942 }, { "epoch": 0.8318166350154231, "grad_norm": 1.4765625, "learning_rate": 3.429071745711401e-06, "loss": 0.3195, "step": 18944 }, { "epoch": 0.8319044534946266, "grad_norm": 1.5078125, "learning_rate": 3.42557550356401e-06, "loss": 0.3318, "step": 18946 }, { "epoch": 0.8319922719738301, "grad_norm": 1.453125, "learning_rate": 3.4220809135912247e-06, "loss": 0.3077, "step": 18948 }, { "epoch": 0.8320800904530335, "grad_norm": 1.390625, "learning_rate": 3.418587976060653e-06, "loss": 0.2822, "step": 18950 }, { "epoch": 0.832167908932237, "grad_norm": 1.5234375, "learning_rate": 3.4150966912397888e-06, "loss": 0.329, "step": 18952 }, { "epoch": 0.8322557274114406, "grad_norm": 1.4296875, "learning_rate": 3.4116070593959963e-06, "loss": 0.3254, "step": 18954 }, { "epoch": 0.8323435458906441, "grad_norm": 1.5234375, "learning_rate": 3.4081190807965043e-06, "loss": 0.3085, "step": 18956 }, { "epoch": 0.8324313643698475, "grad_norm": 1.46875, "learning_rate": 3.4046327557084396e-06, "loss": 0.3283, "step": 18958 }, { "epoch": 0.832519182849051, "grad_norm": 1.40625, "learning_rate": 3.4011480843987643e-06, "loss": 0.3184, "step": 18960 }, { "epoch": 0.8326070013282545, "grad_norm": 1.4453125, "learning_rate": 3.3976650671343535e-06, "loss": 0.3031, "step": 18962 }, { "epoch": 0.832694819807458, "grad_norm": 1.609375, "learning_rate": 3.3941837041819247e-06, "loss": 0.3436, "step": 18964 }, { "epoch": 0.8327826382866614, "grad_norm": 1.4921875, "learning_rate": 3.3907039958080895e-06, "loss": 0.3419, "step": 18966 }, { "epoch": 0.8328704567658649, "grad_norm": 1.5078125, "learning_rate": 3.3872259422793263e-06, "loss": 0.3346, "step": 18968 }, { "epoch": 0.8329582752450685, "grad_norm": 1.5234375, "learning_rate": 3.383749543861972e-06, "loss": 0.3143, "step": 18970 }, { "epoch": 0.833046093724272, "grad_norm": 1.4765625, "learning_rate": 3.3802748008222667e-06, "loss": 0.3023, "step": 18972 }, { "epoch": 0.8331339122034754, "grad_norm": 1.6015625, "learning_rate": 3.3768017134262945e-06, "loss": 0.3151, "step": 18974 }, { "epoch": 0.8332217306826789, "grad_norm": 1.5390625, "learning_rate": 3.3733302819400376e-06, "loss": 0.3068, "step": 18976 }, { "epoch": 0.8333095491618824, "grad_norm": 1.3984375, "learning_rate": 3.36986050662933e-06, "loss": 0.3282, "step": 18978 }, { "epoch": 0.8333973676410859, "grad_norm": 1.3984375, "learning_rate": 3.366392387759884e-06, "loss": 0.3174, "step": 18980 }, { "epoch": 0.8334851861202893, "grad_norm": 1.4453125, "learning_rate": 3.3629259255973017e-06, "loss": 0.3048, "step": 18982 }, { "epoch": 0.8335730045994928, "grad_norm": 1.4140625, "learning_rate": 3.3594611204070313e-06, "loss": 0.3117, "step": 18984 }, { "epoch": 0.8336608230786964, "grad_norm": 1.46875, "learning_rate": 3.355997972454425e-06, "loss": 0.3045, "step": 18986 }, { "epoch": 0.8337486415578998, "grad_norm": 1.5390625, "learning_rate": 3.3525364820046782e-06, "loss": 0.3071, "step": 18988 }, { "epoch": 0.8338364600371033, "grad_norm": 1.46875, "learning_rate": 3.34907664932288e-06, "loss": 0.2948, "step": 18990 }, { "epoch": 0.8339242785163068, "grad_norm": 1.546875, "learning_rate": 3.3456184746739753e-06, "loss": 0.3179, "step": 18992 }, { "epoch": 0.8340120969955103, "grad_norm": 1.453125, "learning_rate": 3.3421619583228036e-06, "loss": 0.3317, "step": 18994 }, { "epoch": 0.8340999154747137, "grad_norm": 1.4296875, "learning_rate": 3.338707100534061e-06, "loss": 0.2953, "step": 18996 }, { "epoch": 0.8341877339539172, "grad_norm": 1.4140625, "learning_rate": 3.335253901572316e-06, "loss": 0.2883, "step": 18998 }, { "epoch": 0.8342755524331208, "grad_norm": 1.5625, "learning_rate": 3.3318023617020273e-06, "loss": 0.3376, "step": 19000 }, { "epoch": 0.8343633709123243, "grad_norm": 1.4375, "learning_rate": 3.3283524811875023e-06, "loss": 0.3111, "step": 19002 }, { "epoch": 0.8344511893915277, "grad_norm": 1.421875, "learning_rate": 3.3249042602929437e-06, "loss": 0.3274, "step": 19004 }, { "epoch": 0.8345390078707312, "grad_norm": 1.4921875, "learning_rate": 3.3214576992824125e-06, "loss": 0.336, "step": 19006 }, { "epoch": 0.8346268263499347, "grad_norm": 1.46875, "learning_rate": 3.3180127984198423e-06, "loss": 0.3123, "step": 19008 }, { "epoch": 0.8347146448291382, "grad_norm": 1.4921875, "learning_rate": 3.314569557969055e-06, "loss": 0.3108, "step": 19010 }, { "epoch": 0.8348024633083416, "grad_norm": 1.40625, "learning_rate": 3.311127978193726e-06, "loss": 0.3298, "step": 19012 }, { "epoch": 0.8348902817875451, "grad_norm": 1.4765625, "learning_rate": 3.3076880593574196e-06, "loss": 0.3208, "step": 19014 }, { "epoch": 0.8349781002667487, "grad_norm": 1.6328125, "learning_rate": 3.3042498017235607e-06, "loss": 0.342, "step": 19016 }, { "epoch": 0.8350659187459522, "grad_norm": 1.4375, "learning_rate": 3.300813205555453e-06, "loss": 0.2855, "step": 19018 }, { "epoch": 0.8351537372251556, "grad_norm": 1.46875, "learning_rate": 3.2973782711162722e-06, "loss": 0.3106, "step": 19020 }, { "epoch": 0.8352415557043591, "grad_norm": 1.46875, "learning_rate": 3.29394499866906e-06, "loss": 0.3279, "step": 19022 }, { "epoch": 0.8353293741835626, "grad_norm": 1.4375, "learning_rate": 3.2905133884767486e-06, "loss": 0.3162, "step": 19024 }, { "epoch": 0.835417192662766, "grad_norm": 1.4609375, "learning_rate": 3.287083440802122e-06, "loss": 0.3103, "step": 19026 }, { "epoch": 0.8355050111419695, "grad_norm": 1.4453125, "learning_rate": 3.2836551559078525e-06, "loss": 0.2965, "step": 19028 }, { "epoch": 0.835592829621173, "grad_norm": 1.40625, "learning_rate": 3.280228534056479e-06, "loss": 0.3235, "step": 19030 }, { "epoch": 0.8356806481003766, "grad_norm": 1.46875, "learning_rate": 3.2768035755104064e-06, "loss": 0.29, "step": 19032 }, { "epoch": 0.83576846657958, "grad_norm": 1.453125, "learning_rate": 3.273380280531929e-06, "loss": 0.3043, "step": 19034 }, { "epoch": 0.8358562850587835, "grad_norm": 1.5078125, "learning_rate": 3.2699586493831895e-06, "loss": 0.3144, "step": 19036 }, { "epoch": 0.835944103537987, "grad_norm": 1.4375, "learning_rate": 3.266538682326234e-06, "loss": 0.3357, "step": 19038 }, { "epoch": 0.8360319220171905, "grad_norm": 1.4453125, "learning_rate": 3.2631203796229555e-06, "loss": 0.3015, "step": 19040 }, { "epoch": 0.8361197404963939, "grad_norm": 1.4140625, "learning_rate": 3.259703741535122e-06, "loss": 0.2977, "step": 19042 }, { "epoch": 0.8362075589755974, "grad_norm": 1.3984375, "learning_rate": 3.256288768324395e-06, "loss": 0.299, "step": 19044 }, { "epoch": 0.836295377454801, "grad_norm": 1.5234375, "learning_rate": 3.2528754602522804e-06, "loss": 0.3027, "step": 19046 }, { "epoch": 0.8363831959340045, "grad_norm": 1.6015625, "learning_rate": 3.249463817580181e-06, "loss": 0.3362, "step": 19048 }, { "epoch": 0.8364710144132079, "grad_norm": 1.5234375, "learning_rate": 3.2460538405693573e-06, "loss": 0.31, "step": 19050 }, { "epoch": 0.8365588328924114, "grad_norm": 1.46875, "learning_rate": 3.242645529480945e-06, "loss": 0.3407, "step": 19052 }, { "epoch": 0.8366466513716149, "grad_norm": 1.5390625, "learning_rate": 3.239238884575949e-06, "loss": 0.3327, "step": 19054 }, { "epoch": 0.8367344698508183, "grad_norm": 1.4609375, "learning_rate": 3.23583390611526e-06, "loss": 0.3321, "step": 19056 }, { "epoch": 0.8368222883300218, "grad_norm": 1.4765625, "learning_rate": 3.2324305943596256e-06, "loss": 0.2949, "step": 19058 }, { "epoch": 0.8369101068092253, "grad_norm": 1.5078125, "learning_rate": 3.2290289495696705e-06, "loss": 0.2996, "step": 19060 }, { "epoch": 0.8369979252884289, "grad_norm": 1.5390625, "learning_rate": 3.2256289720059035e-06, "loss": 0.3195, "step": 19062 }, { "epoch": 0.8370857437676323, "grad_norm": 1.5390625, "learning_rate": 3.2222306619286852e-06, "loss": 0.2868, "step": 19064 }, { "epoch": 0.8371735622468358, "grad_norm": 1.4765625, "learning_rate": 3.2188340195982685e-06, "loss": 0.3008, "step": 19066 }, { "epoch": 0.8372613807260393, "grad_norm": 1.453125, "learning_rate": 3.2154390452747625e-06, "loss": 0.3051, "step": 19068 }, { "epoch": 0.8373491992052428, "grad_norm": 1.546875, "learning_rate": 3.2120457392181503e-06, "loss": 0.3128, "step": 19070 }, { "epoch": 0.8374370176844462, "grad_norm": 1.46875, "learning_rate": 3.208654101688305e-06, "loss": 0.3044, "step": 19072 }, { "epoch": 0.8375248361636497, "grad_norm": 1.3984375, "learning_rate": 3.205264132944946e-06, "loss": 0.3149, "step": 19074 }, { "epoch": 0.8376126546428532, "grad_norm": 1.4765625, "learning_rate": 3.2018758332476916e-06, "loss": 0.3165, "step": 19076 }, { "epoch": 0.8377004731220568, "grad_norm": 1.4375, "learning_rate": 3.198489202856009e-06, "loss": 0.2965, "step": 19078 }, { "epoch": 0.8377882916012602, "grad_norm": 1.5078125, "learning_rate": 3.195104242029251e-06, "loss": 0.3335, "step": 19080 }, { "epoch": 0.8378761100804637, "grad_norm": 1.6171875, "learning_rate": 3.191720951026636e-06, "loss": 0.3275, "step": 19082 }, { "epoch": 0.8379639285596672, "grad_norm": 1.515625, "learning_rate": 3.188339330107254e-06, "loss": 0.3249, "step": 19084 }, { "epoch": 0.8380517470388706, "grad_norm": 1.4609375, "learning_rate": 3.184959379530081e-06, "loss": 0.3148, "step": 19086 }, { "epoch": 0.8381395655180741, "grad_norm": 1.46875, "learning_rate": 3.1815810995539404e-06, "loss": 0.3339, "step": 19088 }, { "epoch": 0.8382273839972776, "grad_norm": 1.40625, "learning_rate": 3.178204490437556e-06, "loss": 0.3135, "step": 19090 }, { "epoch": 0.8383152024764811, "grad_norm": 1.4765625, "learning_rate": 3.174829552439504e-06, "loss": 0.3102, "step": 19092 }, { "epoch": 0.8384030209556846, "grad_norm": 1.5703125, "learning_rate": 3.1714562858182277e-06, "loss": 0.3405, "step": 19094 }, { "epoch": 0.8384908394348881, "grad_norm": 1.5546875, "learning_rate": 3.168084690832071e-06, "loss": 0.3078, "step": 19096 }, { "epoch": 0.8385786579140916, "grad_norm": 1.4765625, "learning_rate": 3.1647147677392157e-06, "loss": 0.3009, "step": 19098 }, { "epoch": 0.8386664763932951, "grad_norm": 1.515625, "learning_rate": 3.161346516797742e-06, "loss": 0.3162, "step": 19100 }, { "epoch": 0.8387542948724985, "grad_norm": 1.53125, "learning_rate": 3.157979938265587e-06, "loss": 0.3176, "step": 19102 }, { "epoch": 0.838842113351702, "grad_norm": 1.5546875, "learning_rate": 3.1546150324005595e-06, "loss": 0.3332, "step": 19104 }, { "epoch": 0.8389299318309055, "grad_norm": 1.4453125, "learning_rate": 3.1512517994603556e-06, "loss": 0.2953, "step": 19106 }, { "epoch": 0.8390177503101091, "grad_norm": 1.4140625, "learning_rate": 3.1478902397025196e-06, "loss": 0.3121, "step": 19108 }, { "epoch": 0.8391055687893125, "grad_norm": 1.4375, "learning_rate": 3.1445303533845005e-06, "loss": 0.303, "step": 19110 }, { "epoch": 0.839193387268516, "grad_norm": 1.5, "learning_rate": 3.1411721407635738e-06, "loss": 0.3029, "step": 19112 }, { "epoch": 0.8392812057477195, "grad_norm": 1.5703125, "learning_rate": 3.13781560209693e-06, "loss": 0.3016, "step": 19114 }, { "epoch": 0.8393690242269229, "grad_norm": 1.4609375, "learning_rate": 3.1344607376416008e-06, "loss": 0.3275, "step": 19116 }, { "epoch": 0.8394568427061264, "grad_norm": 1.4453125, "learning_rate": 3.131107547654519e-06, "loss": 0.2944, "step": 19118 }, { "epoch": 0.8395446611853299, "grad_norm": 1.515625, "learning_rate": 3.1277560323924593e-06, "loss": 0.3159, "step": 19120 }, { "epoch": 0.8396324796645334, "grad_norm": 1.4453125, "learning_rate": 3.1244061921120838e-06, "loss": 0.3307, "step": 19122 }, { "epoch": 0.8397202981437369, "grad_norm": 1.53125, "learning_rate": 3.1210580270699285e-06, "loss": 0.3018, "step": 19124 }, { "epoch": 0.8398081166229404, "grad_norm": 1.4453125, "learning_rate": 3.1177115375223915e-06, "loss": 0.3169, "step": 19126 }, { "epoch": 0.8398959351021439, "grad_norm": 1.5546875, "learning_rate": 3.1143667237257563e-06, "loss": 0.3231, "step": 19128 }, { "epoch": 0.8399837535813474, "grad_norm": 1.4296875, "learning_rate": 3.1110235859361597e-06, "loss": 0.3246, "step": 19130 }, { "epoch": 0.8400715720605508, "grad_norm": 1.4375, "learning_rate": 3.107682124409622e-06, "loss": 0.2926, "step": 19132 }, { "epoch": 0.8401593905397543, "grad_norm": 1.625, "learning_rate": 3.1043423394020417e-06, "loss": 0.3063, "step": 19134 }, { "epoch": 0.8402472090189578, "grad_norm": 1.4453125, "learning_rate": 3.1010042311691663e-06, "loss": 0.3252, "step": 19136 }, { "epoch": 0.8403350274981612, "grad_norm": 1.4921875, "learning_rate": 3.0976677999666414e-06, "loss": 0.3084, "step": 19138 }, { "epoch": 0.8404228459773648, "grad_norm": 1.4765625, "learning_rate": 3.094333046049966e-06, "loss": 0.3037, "step": 19140 }, { "epoch": 0.8405106644565683, "grad_norm": 1.5859375, "learning_rate": 3.0909999696745185e-06, "loss": 0.3232, "step": 19142 }, { "epoch": 0.8405984829357718, "grad_norm": 1.4453125, "learning_rate": 3.0876685710955476e-06, "loss": 0.3094, "step": 19144 }, { "epoch": 0.8406863014149752, "grad_norm": 1.53125, "learning_rate": 3.084338850568161e-06, "loss": 0.2993, "step": 19146 }, { "epoch": 0.8407741198941787, "grad_norm": 1.546875, "learning_rate": 3.081010808347365e-06, "loss": 0.314, "step": 19148 }, { "epoch": 0.8408619383733822, "grad_norm": 1.4765625, "learning_rate": 3.0776844446880115e-06, "loss": 0.3055, "step": 19150 }, { "epoch": 0.8409497568525857, "grad_norm": 1.40625, "learning_rate": 3.074359759844844e-06, "loss": 0.2917, "step": 19152 }, { "epoch": 0.8410375753317892, "grad_norm": 1.5078125, "learning_rate": 3.071036754072462e-06, "loss": 0.3387, "step": 19154 }, { "epoch": 0.8411253938109927, "grad_norm": 1.453125, "learning_rate": 3.067715427625334e-06, "loss": 0.3268, "step": 19156 }, { "epoch": 0.8412132122901962, "grad_norm": 1.4296875, "learning_rate": 3.0643957807578253e-06, "loss": 0.2954, "step": 19158 }, { "epoch": 0.8413010307693997, "grad_norm": 1.359375, "learning_rate": 3.061077813724139e-06, "loss": 0.3172, "step": 19160 }, { "epoch": 0.8413888492486031, "grad_norm": 1.453125, "learning_rate": 3.0577615267783773e-06, "loss": 0.2994, "step": 19162 }, { "epoch": 0.8414766677278066, "grad_norm": 1.453125, "learning_rate": 3.0544469201744976e-06, "loss": 0.3058, "step": 19164 }, { "epoch": 0.8415644862070101, "grad_norm": 1.4765625, "learning_rate": 3.0511339941663303e-06, "loss": 0.313, "step": 19166 }, { "epoch": 0.8416523046862135, "grad_norm": 1.4296875, "learning_rate": 3.0478227490075866e-06, "loss": 0.313, "step": 19168 }, { "epoch": 0.8417401231654171, "grad_norm": 1.4453125, "learning_rate": 3.044513184951833e-06, "loss": 0.3098, "step": 19170 }, { "epoch": 0.8418279416446206, "grad_norm": 1.515625, "learning_rate": 3.0412053022525367e-06, "loss": 0.2796, "step": 19172 }, { "epoch": 0.8419157601238241, "grad_norm": 1.4921875, "learning_rate": 3.037899101162989e-06, "loss": 0.3176, "step": 19174 }, { "epoch": 0.8420035786030275, "grad_norm": 1.46875, "learning_rate": 3.034594581936398e-06, "loss": 0.2792, "step": 19176 }, { "epoch": 0.842091397082231, "grad_norm": 1.5078125, "learning_rate": 3.0312917448258205e-06, "loss": 0.3012, "step": 19178 }, { "epoch": 0.8421792155614345, "grad_norm": 1.375, "learning_rate": 3.0279905900841815e-06, "loss": 0.3114, "step": 19180 }, { "epoch": 0.842267034040638, "grad_norm": 1.4765625, "learning_rate": 3.024691117964298e-06, "loss": 0.3407, "step": 19182 }, { "epoch": 0.8423548525198414, "grad_norm": 1.5, "learning_rate": 3.0213933287188272e-06, "loss": 0.2972, "step": 19184 }, { "epoch": 0.842442670999045, "grad_norm": 1.5, "learning_rate": 3.0180972226003327e-06, "loss": 0.2992, "step": 19186 }, { "epoch": 0.8425304894782485, "grad_norm": 1.4296875, "learning_rate": 3.014802799861216e-06, "loss": 0.2968, "step": 19188 }, { "epoch": 0.842618307957452, "grad_norm": 1.484375, "learning_rate": 3.011510060753775e-06, "loss": 0.3046, "step": 19190 }, { "epoch": 0.8427061264366554, "grad_norm": 1.4375, "learning_rate": 3.008219005530166e-06, "loss": 0.3093, "step": 19192 }, { "epoch": 0.8427939449158589, "grad_norm": 1.5234375, "learning_rate": 3.0049296344424103e-06, "loss": 0.3324, "step": 19194 }, { "epoch": 0.8428817633950624, "grad_norm": 1.5625, "learning_rate": 3.001641947742423e-06, "loss": 0.3031, "step": 19196 }, { "epoch": 0.8429695818742658, "grad_norm": 1.53125, "learning_rate": 2.9983559456819633e-06, "loss": 0.3161, "step": 19198 }, { "epoch": 0.8430574003534694, "grad_norm": 1.4375, "learning_rate": 2.9950716285126827e-06, "loss": 0.3365, "step": 19200 }, { "epoch": 0.8431452188326729, "grad_norm": 1.390625, "learning_rate": 2.9917889964860917e-06, "loss": 0.3279, "step": 19202 }, { "epoch": 0.8432330373118764, "grad_norm": 1.5, "learning_rate": 2.9885080498535777e-06, "loss": 0.3001, "step": 19204 }, { "epoch": 0.8433208557910798, "grad_norm": 1.5, "learning_rate": 2.9852287888663928e-06, "loss": 0.2887, "step": 19206 }, { "epoch": 0.8434086742702833, "grad_norm": 1.5703125, "learning_rate": 2.9819512137756577e-06, "loss": 0.3053, "step": 19208 }, { "epoch": 0.8434964927494868, "grad_norm": 1.4453125, "learning_rate": 2.9786753248323833e-06, "loss": 0.3567, "step": 19210 }, { "epoch": 0.8435843112286903, "grad_norm": 1.5234375, "learning_rate": 2.9754011222874275e-06, "loss": 0.2944, "step": 19212 }, { "epoch": 0.8436721297078937, "grad_norm": 1.4609375, "learning_rate": 2.972128606391536e-06, "loss": 0.3047, "step": 19214 }, { "epoch": 0.8437599481870973, "grad_norm": 1.4609375, "learning_rate": 2.9688577773953176e-06, "loss": 0.2938, "step": 19216 }, { "epoch": 0.8438477666663008, "grad_norm": 1.453125, "learning_rate": 2.965588635549249e-06, "loss": 0.3215, "step": 19218 }, { "epoch": 0.8439355851455043, "grad_norm": 1.6171875, "learning_rate": 2.9623211811036862e-06, "loss": 0.3035, "step": 19220 }, { "epoch": 0.8440234036247077, "grad_norm": 1.4609375, "learning_rate": 2.959055414308845e-06, "loss": 0.3194, "step": 19222 }, { "epoch": 0.8441112221039112, "grad_norm": 1.453125, "learning_rate": 2.9557913354148316e-06, "loss": 0.3443, "step": 19224 }, { "epoch": 0.8441990405831147, "grad_norm": 1.4765625, "learning_rate": 2.952528944671601e-06, "loss": 0.3636, "step": 19226 }, { "epoch": 0.8442868590623182, "grad_norm": 1.5703125, "learning_rate": 2.9492682423289843e-06, "loss": 0.2937, "step": 19228 }, { "epoch": 0.8443746775415216, "grad_norm": 1.421875, "learning_rate": 2.9460092286366955e-06, "loss": 0.3119, "step": 19230 }, { "epoch": 0.8444624960207252, "grad_norm": 1.4921875, "learning_rate": 2.9427519038443014e-06, "loss": 0.2929, "step": 19232 }, { "epoch": 0.8445503144999287, "grad_norm": 1.515625, "learning_rate": 2.939496268201264e-06, "loss": 0.3141, "step": 19234 }, { "epoch": 0.8446381329791322, "grad_norm": 1.4453125, "learning_rate": 2.936242321956881e-06, "loss": 0.3232, "step": 19236 }, { "epoch": 0.8447259514583356, "grad_norm": 1.4765625, "learning_rate": 2.9329900653603553e-06, "loss": 0.3165, "step": 19238 }, { "epoch": 0.8448137699375391, "grad_norm": 1.609375, "learning_rate": 2.929739498660741e-06, "loss": 0.3192, "step": 19240 }, { "epoch": 0.8449015884167426, "grad_norm": 1.4375, "learning_rate": 2.9264906221069586e-06, "loss": 0.3393, "step": 19242 }, { "epoch": 0.844989406895946, "grad_norm": 1.5234375, "learning_rate": 2.9232434359478256e-06, "loss": 0.3182, "step": 19244 }, { "epoch": 0.8450772253751496, "grad_norm": 1.4296875, "learning_rate": 2.919997940431993e-06, "loss": 0.2901, "step": 19246 }, { "epoch": 0.8451650438543531, "grad_norm": 1.5234375, "learning_rate": 2.916754135808017e-06, "loss": 0.2945, "step": 19248 }, { "epoch": 0.8452528623335566, "grad_norm": 1.4921875, "learning_rate": 2.9135120223242996e-06, "loss": 0.3016, "step": 19250 }, { "epoch": 0.84534068081276, "grad_norm": 1.4375, "learning_rate": 2.9102716002291307e-06, "loss": 0.2988, "step": 19252 }, { "epoch": 0.8454284992919635, "grad_norm": 1.7109375, "learning_rate": 2.907032869770657e-06, "loss": 0.3238, "step": 19254 }, { "epoch": 0.845516317771167, "grad_norm": 1.5078125, "learning_rate": 2.903795831196898e-06, "loss": 0.3231, "step": 19256 }, { "epoch": 0.8456041362503705, "grad_norm": 1.4375, "learning_rate": 2.900560484755754e-06, "loss": 0.3126, "step": 19258 }, { "epoch": 0.8456919547295739, "grad_norm": 1.4609375, "learning_rate": 2.8973268306949824e-06, "loss": 0.3204, "step": 19260 }, { "epoch": 0.8457797732087775, "grad_norm": 1.5703125, "learning_rate": 2.894094869262226e-06, "loss": 0.3058, "step": 19262 }, { "epoch": 0.845867591687981, "grad_norm": 1.3984375, "learning_rate": 2.8908646007049843e-06, "loss": 0.3002, "step": 19264 }, { "epoch": 0.8459554101671845, "grad_norm": 1.5234375, "learning_rate": 2.887636025270629e-06, "loss": 0.3175, "step": 19266 }, { "epoch": 0.8460432286463879, "grad_norm": 1.5234375, "learning_rate": 2.8844091432064097e-06, "loss": 0.3171, "step": 19268 }, { "epoch": 0.8461310471255914, "grad_norm": 1.4375, "learning_rate": 2.8811839547594337e-06, "loss": 0.3075, "step": 19270 }, { "epoch": 0.8462188656047949, "grad_norm": 1.4921875, "learning_rate": 2.8779604601766957e-06, "loss": 0.3231, "step": 19272 }, { "epoch": 0.8463066840839983, "grad_norm": 1.59375, "learning_rate": 2.874738659705048e-06, "loss": 0.3302, "step": 19274 }, { "epoch": 0.8463945025632018, "grad_norm": 1.484375, "learning_rate": 2.8715185535912182e-06, "loss": 0.3165, "step": 19276 }, { "epoch": 0.8464823210424054, "grad_norm": 1.5390625, "learning_rate": 2.868300142081806e-06, "loss": 0.3322, "step": 19278 }, { "epoch": 0.8465701395216089, "grad_norm": 1.46875, "learning_rate": 2.865083425423265e-06, "loss": 0.3214, "step": 19280 }, { "epoch": 0.8466579580008123, "grad_norm": 1.4765625, "learning_rate": 2.8618684038619504e-06, "loss": 0.327, "step": 19282 }, { "epoch": 0.8467457764800158, "grad_norm": 1.484375, "learning_rate": 2.858655077644054e-06, "loss": 0.314, "step": 19284 }, { "epoch": 0.8468335949592193, "grad_norm": 1.609375, "learning_rate": 2.855443447015663e-06, "loss": 0.3064, "step": 19286 }, { "epoch": 0.8469214134384228, "grad_norm": 1.4765625, "learning_rate": 2.8522335122227214e-06, "loss": 0.29, "step": 19288 }, { "epoch": 0.8470092319176262, "grad_norm": 1.6953125, "learning_rate": 2.849025273511044e-06, "loss": 0.3206, "step": 19290 }, { "epoch": 0.8470970503968297, "grad_norm": 1.453125, "learning_rate": 2.8458187311263233e-06, "loss": 0.327, "step": 19292 }, { "epoch": 0.8471848688760333, "grad_norm": 1.5078125, "learning_rate": 2.842613885314116e-06, "loss": 0.3152, "step": 19294 }, { "epoch": 0.8472726873552368, "grad_norm": 1.5, "learning_rate": 2.83941073631985e-06, "loss": 0.3103, "step": 19296 }, { "epoch": 0.8473605058344402, "grad_norm": 1.5234375, "learning_rate": 2.836209284388816e-06, "loss": 0.3559, "step": 19298 }, { "epoch": 0.8474483243136437, "grad_norm": 1.4453125, "learning_rate": 2.8330095297661925e-06, "loss": 0.3054, "step": 19300 }, { "epoch": 0.8475361427928472, "grad_norm": 1.453125, "learning_rate": 2.8298114726970138e-06, "loss": 0.2983, "step": 19302 }, { "epoch": 0.8476239612720506, "grad_norm": 1.421875, "learning_rate": 2.8266151134261815e-06, "loss": 0.2949, "step": 19304 }, { "epoch": 0.8477117797512541, "grad_norm": 1.546875, "learning_rate": 2.8234204521984857e-06, "loss": 0.3187, "step": 19306 }, { "epoch": 0.8477995982304577, "grad_norm": 1.4765625, "learning_rate": 2.8202274892585617e-06, "loss": 0.3119, "step": 19308 }, { "epoch": 0.8478874167096612, "grad_norm": 1.546875, "learning_rate": 2.8170362248509386e-06, "loss": 0.3204, "step": 19310 }, { "epoch": 0.8479752351888646, "grad_norm": 1.53125, "learning_rate": 2.8138466592199986e-06, "loss": 0.3137, "step": 19312 }, { "epoch": 0.8480630536680681, "grad_norm": 1.484375, "learning_rate": 2.8106587926099963e-06, "loss": 0.2915, "step": 19314 }, { "epoch": 0.8481508721472716, "grad_norm": 1.453125, "learning_rate": 2.8074726252650645e-06, "loss": 0.322, "step": 19316 }, { "epoch": 0.8482386906264751, "grad_norm": 1.484375, "learning_rate": 2.804288157429197e-06, "loss": 0.3051, "step": 19318 }, { "epoch": 0.8483265091056785, "grad_norm": 1.5234375, "learning_rate": 2.8011053893462676e-06, "loss": 0.3313, "step": 19320 }, { "epoch": 0.848414327584882, "grad_norm": 1.515625, "learning_rate": 2.7979243212600035e-06, "loss": 0.331, "step": 19322 }, { "epoch": 0.8485021460640856, "grad_norm": 1.4921875, "learning_rate": 2.7947449534140217e-06, "loss": 0.3045, "step": 19324 }, { "epoch": 0.848589964543289, "grad_norm": 1.40625, "learning_rate": 2.7915672860517937e-06, "loss": 0.3273, "step": 19326 }, { "epoch": 0.8486777830224925, "grad_norm": 1.484375, "learning_rate": 2.7883913194166693e-06, "loss": 0.3184, "step": 19328 }, { "epoch": 0.848765601501696, "grad_norm": 1.4765625, "learning_rate": 2.7852170537518594e-06, "loss": 0.3319, "step": 19330 }, { "epoch": 0.8488534199808995, "grad_norm": 1.6953125, "learning_rate": 2.782044489300448e-06, "loss": 0.313, "step": 19332 }, { "epoch": 0.8489412384601029, "grad_norm": 1.546875, "learning_rate": 2.7788736263054033e-06, "loss": 0.3191, "step": 19334 }, { "epoch": 0.8490290569393064, "grad_norm": 1.5078125, "learning_rate": 2.775704465009535e-06, "loss": 0.3005, "step": 19336 }, { "epoch": 0.8491168754185099, "grad_norm": 1.40625, "learning_rate": 2.772537005655554e-06, "loss": 0.2972, "step": 19338 }, { "epoch": 0.8492046938977135, "grad_norm": 1.46875, "learning_rate": 2.7693712484860135e-06, "loss": 0.3037, "step": 19340 }, { "epoch": 0.8492925123769169, "grad_norm": 1.484375, "learning_rate": 2.7662071937433502e-06, "loss": 0.3017, "step": 19342 }, { "epoch": 0.8493803308561204, "grad_norm": 1.53125, "learning_rate": 2.7630448416698734e-06, "loss": 0.304, "step": 19344 }, { "epoch": 0.8494681493353239, "grad_norm": 1.46875, "learning_rate": 2.75988419250775e-06, "loss": 0.2853, "step": 19346 }, { "epoch": 0.8495559678145274, "grad_norm": 1.53125, "learning_rate": 2.7567252464990283e-06, "loss": 0.3149, "step": 19348 }, { "epoch": 0.8496437862937308, "grad_norm": 1.4453125, "learning_rate": 2.753568003885623e-06, "loss": 0.2996, "step": 19350 }, { "epoch": 0.8497316047729343, "grad_norm": 1.421875, "learning_rate": 2.750412464909305e-06, "loss": 0.3227, "step": 19352 }, { "epoch": 0.8498194232521379, "grad_norm": 1.46875, "learning_rate": 2.7472586298117385e-06, "loss": 0.311, "step": 19354 }, { "epoch": 0.8499072417313414, "grad_norm": 1.4453125, "learning_rate": 2.7441064988344424e-06, "loss": 0.3119, "step": 19356 }, { "epoch": 0.8499950602105448, "grad_norm": 1.671875, "learning_rate": 2.740956072218806e-06, "loss": 0.3566, "step": 19358 }, { "epoch": 0.8500828786897483, "grad_norm": 1.453125, "learning_rate": 2.737807350206084e-06, "loss": 0.3247, "step": 19360 }, { "epoch": 0.8501706971689518, "grad_norm": 1.4921875, "learning_rate": 2.734660333037417e-06, "loss": 0.3071, "step": 19362 }, { "epoch": 0.8502585156481552, "grad_norm": 1.4375, "learning_rate": 2.7315150209537984e-06, "loss": 0.3137, "step": 19364 }, { "epoch": 0.8503463341273587, "grad_norm": 1.5625, "learning_rate": 2.7283714141960905e-06, "loss": 0.3234, "step": 19366 }, { "epoch": 0.8504341526065622, "grad_norm": 1.515625, "learning_rate": 2.7252295130050483e-06, "loss": 0.2924, "step": 19368 }, { "epoch": 0.8505219710857658, "grad_norm": 1.4609375, "learning_rate": 2.7220893176212595e-06, "loss": 0.3097, "step": 19370 }, { "epoch": 0.8506097895649692, "grad_norm": 1.4921875, "learning_rate": 2.718950828285219e-06, "loss": 0.2995, "step": 19372 }, { "epoch": 0.8506976080441727, "grad_norm": 1.5078125, "learning_rate": 2.7158140452372666e-06, "loss": 0.3103, "step": 19374 }, { "epoch": 0.8507854265233762, "grad_norm": 1.4765625, "learning_rate": 2.712678968717608e-06, "loss": 0.2902, "step": 19376 }, { "epoch": 0.8508732450025797, "grad_norm": 1.484375, "learning_rate": 2.7095455989663452e-06, "loss": 0.315, "step": 19378 }, { "epoch": 0.8509610634817831, "grad_norm": 1.5546875, "learning_rate": 2.7064139362234174e-06, "loss": 0.3209, "step": 19380 }, { "epoch": 0.8510488819609866, "grad_norm": 1.59375, "learning_rate": 2.7032839807286575e-06, "loss": 0.3109, "step": 19382 }, { "epoch": 0.8511367004401901, "grad_norm": 1.6640625, "learning_rate": 2.700155732721751e-06, "loss": 0.3309, "step": 19384 }, { "epoch": 0.8512245189193937, "grad_norm": 1.515625, "learning_rate": 2.6970291924422768e-06, "loss": 0.2905, "step": 19386 }, { "epoch": 0.8513123373985971, "grad_norm": 1.453125, "learning_rate": 2.6939043601296425e-06, "loss": 0.3297, "step": 19388 }, { "epoch": 0.8514001558778006, "grad_norm": 1.625, "learning_rate": 2.690781236023163e-06, "loss": 0.347, "step": 19390 }, { "epoch": 0.8514879743570041, "grad_norm": 1.4296875, "learning_rate": 2.687659820362004e-06, "loss": 0.2977, "step": 19392 }, { "epoch": 0.8515757928362075, "grad_norm": 1.53125, "learning_rate": 2.6845401133852005e-06, "loss": 0.3306, "step": 19394 }, { "epoch": 0.851663611315411, "grad_norm": 1.4375, "learning_rate": 2.6814221153316694e-06, "loss": 0.3197, "step": 19396 }, { "epoch": 0.8517514297946145, "grad_norm": 1.5390625, "learning_rate": 2.6783058264401776e-06, "loss": 0.2946, "step": 19398 }, { "epoch": 0.8518392482738181, "grad_norm": 1.5078125, "learning_rate": 2.675191246949382e-06, "loss": 0.3442, "step": 19400 }, { "epoch": 0.8519270667530215, "grad_norm": 1.4296875, "learning_rate": 2.672078377097792e-06, "loss": 0.292, "step": 19402 }, { "epoch": 0.852014885232225, "grad_norm": 1.4296875, "learning_rate": 2.6689672171237885e-06, "loss": 0.3111, "step": 19404 }, { "epoch": 0.8521027037114285, "grad_norm": 1.515625, "learning_rate": 2.6658577672656337e-06, "loss": 0.2968, "step": 19406 }, { "epoch": 0.852190522190632, "grad_norm": 1.4921875, "learning_rate": 2.6627500277614376e-06, "loss": 0.303, "step": 19408 }, { "epoch": 0.8522783406698354, "grad_norm": 1.46875, "learning_rate": 2.6596439988492065e-06, "loss": 0.2896, "step": 19410 }, { "epoch": 0.8523661591490389, "grad_norm": 1.546875, "learning_rate": 2.6565396807667895e-06, "loss": 0.3206, "step": 19412 }, { "epoch": 0.8524539776282424, "grad_norm": 1.546875, "learning_rate": 2.6534370737519188e-06, "loss": 0.3197, "step": 19414 }, { "epoch": 0.852541796107446, "grad_norm": 1.5, "learning_rate": 2.6503361780421983e-06, "loss": 0.2924, "step": 19416 }, { "epoch": 0.8526296145866494, "grad_norm": 1.46875, "learning_rate": 2.647236993875088e-06, "loss": 0.3023, "step": 19418 }, { "epoch": 0.8527174330658529, "grad_norm": 1.421875, "learning_rate": 2.644139521487929e-06, "loss": 0.316, "step": 19420 }, { "epoch": 0.8528052515450564, "grad_norm": 1.5078125, "learning_rate": 2.64104376111792e-06, "loss": 0.3262, "step": 19422 }, { "epoch": 0.8528930700242598, "grad_norm": 1.5078125, "learning_rate": 2.6379497130021443e-06, "loss": 0.322, "step": 19424 }, { "epoch": 0.8529808885034633, "grad_norm": 1.609375, "learning_rate": 2.6348573773775394e-06, "loss": 0.31, "step": 19426 }, { "epoch": 0.8530687069826668, "grad_norm": 1.4375, "learning_rate": 2.6317667544809134e-06, "loss": 0.3069, "step": 19428 }, { "epoch": 0.8531565254618703, "grad_norm": 1.5859375, "learning_rate": 2.6286778445489574e-06, "loss": 0.3245, "step": 19430 }, { "epoch": 0.8532443439410738, "grad_norm": 1.515625, "learning_rate": 2.6255906478182075e-06, "loss": 0.3098, "step": 19432 }, { "epoch": 0.8533321624202773, "grad_norm": 1.5, "learning_rate": 2.6225051645250965e-06, "loss": 0.2982, "step": 19434 }, { "epoch": 0.8534199808994808, "grad_norm": 1.484375, "learning_rate": 2.619421394905902e-06, "loss": 0.3077, "step": 19436 }, { "epoch": 0.8535077993786843, "grad_norm": 1.515625, "learning_rate": 2.61633933919678e-06, "loss": 0.2876, "step": 19438 }, { "epoch": 0.8535956178578877, "grad_norm": 1.4296875, "learning_rate": 2.613258997633761e-06, "loss": 0.3006, "step": 19440 }, { "epoch": 0.8536834363370912, "grad_norm": 1.5234375, "learning_rate": 2.610180370452728e-06, "loss": 0.3323, "step": 19442 }, { "epoch": 0.8537712548162947, "grad_norm": 1.484375, "learning_rate": 2.6071034578894567e-06, "loss": 0.3108, "step": 19444 }, { "epoch": 0.8538590732954981, "grad_norm": 1.5546875, "learning_rate": 2.6040282601795697e-06, "loss": 0.3092, "step": 19446 }, { "epoch": 0.8539468917747017, "grad_norm": 1.4609375, "learning_rate": 2.6009547775585648e-06, "loss": 0.3037, "step": 19448 }, { "epoch": 0.8540347102539052, "grad_norm": 1.4765625, "learning_rate": 2.597883010261809e-06, "loss": 0.3239, "step": 19450 }, { "epoch": 0.8541225287331087, "grad_norm": 1.453125, "learning_rate": 2.594812958524548e-06, "loss": 0.3089, "step": 19452 }, { "epoch": 0.8542103472123121, "grad_norm": 1.4765625, "learning_rate": 2.591744622581882e-06, "loss": 0.2894, "step": 19454 }, { "epoch": 0.8542981656915156, "grad_norm": 1.390625, "learning_rate": 2.588678002668779e-06, "loss": 0.2996, "step": 19456 }, { "epoch": 0.8543859841707191, "grad_norm": 1.5546875, "learning_rate": 2.585613099020093e-06, "loss": 0.32, "step": 19458 }, { "epoch": 0.8544738026499226, "grad_norm": 1.46875, "learning_rate": 2.5825499118705213e-06, "loss": 0.2837, "step": 19460 }, { "epoch": 0.8545616211291261, "grad_norm": 1.4765625, "learning_rate": 2.5794884414546584e-06, "loss": 0.3122, "step": 19462 }, { "epoch": 0.8546494396083296, "grad_norm": 1.6328125, "learning_rate": 2.5764286880069434e-06, "loss": 0.3227, "step": 19464 }, { "epoch": 0.8547372580875331, "grad_norm": 1.4296875, "learning_rate": 2.573370651761692e-06, "loss": 0.3458, "step": 19466 }, { "epoch": 0.8548250765667366, "grad_norm": 1.609375, "learning_rate": 2.5703143329530943e-06, "loss": 0.3311, "step": 19468 }, { "epoch": 0.85491289504594, "grad_norm": 1.46875, "learning_rate": 2.5672597318152e-06, "loss": 0.2945, "step": 19470 }, { "epoch": 0.8550007135251435, "grad_norm": 1.4609375, "learning_rate": 2.5642068485819347e-06, "loss": 0.3096, "step": 19472 }, { "epoch": 0.855088532004347, "grad_norm": 1.5, "learning_rate": 2.5611556834870905e-06, "loss": 0.3302, "step": 19474 }, { "epoch": 0.8551763504835505, "grad_norm": 1.53125, "learning_rate": 2.5581062367643154e-06, "loss": 0.3125, "step": 19476 }, { "epoch": 0.855264168962754, "grad_norm": 1.71875, "learning_rate": 2.555058508647154e-06, "loss": 0.2861, "step": 19478 }, { "epoch": 0.8553519874419575, "grad_norm": 1.4375, "learning_rate": 2.5520124993689858e-06, "loss": 0.3093, "step": 19480 }, { "epoch": 0.855439805921161, "grad_norm": 1.578125, "learning_rate": 2.548968209163083e-06, "loss": 0.2958, "step": 19482 }, { "epoch": 0.8555276244003645, "grad_norm": 1.4921875, "learning_rate": 2.5459256382625723e-06, "loss": 0.3501, "step": 19484 }, { "epoch": 0.8556154428795679, "grad_norm": 1.5625, "learning_rate": 2.542884786900465e-06, "loss": 0.3296, "step": 19486 }, { "epoch": 0.8557032613587714, "grad_norm": 1.5234375, "learning_rate": 2.539845655309625e-06, "loss": 0.3145, "step": 19488 }, { "epoch": 0.8557910798379749, "grad_norm": 1.4375, "learning_rate": 2.5368082437227825e-06, "loss": 0.3172, "step": 19490 }, { "epoch": 0.8558788983171783, "grad_norm": 1.5234375, "learning_rate": 2.533772552372554e-06, "loss": 0.3262, "step": 19492 }, { "epoch": 0.8559667167963819, "grad_norm": 1.421875, "learning_rate": 2.5307385814914043e-06, "loss": 0.307, "step": 19494 }, { "epoch": 0.8560545352755854, "grad_norm": 1.484375, "learning_rate": 2.5277063313116854e-06, "loss": 0.3223, "step": 19496 }, { "epoch": 0.8561423537547889, "grad_norm": 1.4453125, "learning_rate": 2.5246758020656013e-06, "loss": 0.3303, "step": 19498 }, { "epoch": 0.8562301722339923, "grad_norm": 1.484375, "learning_rate": 2.52164699398523e-06, "loss": 0.3126, "step": 19500 }, { "epoch": 0.8563179907131958, "grad_norm": 1.5078125, "learning_rate": 2.518619907302522e-06, "loss": 0.2908, "step": 19502 }, { "epoch": 0.8564058091923993, "grad_norm": 1.3984375, "learning_rate": 2.515594542249289e-06, "loss": 0.2945, "step": 19504 }, { "epoch": 0.8564936276716028, "grad_norm": 1.5, "learning_rate": 2.512570899057218e-06, "loss": 0.3543, "step": 19506 }, { "epoch": 0.8565814461508063, "grad_norm": 1.4765625, "learning_rate": 2.5095489779578595e-06, "loss": 0.3082, "step": 19508 }, { "epoch": 0.8566692646300098, "grad_norm": 1.4609375, "learning_rate": 2.506528779182632e-06, "loss": 0.3024, "step": 19510 }, { "epoch": 0.8567570831092133, "grad_norm": 1.5546875, "learning_rate": 2.5035103029628166e-06, "loss": 0.3181, "step": 19512 }, { "epoch": 0.8568449015884168, "grad_norm": 1.4453125, "learning_rate": 2.5004935495295813e-06, "loss": 0.2928, "step": 19514 }, { "epoch": 0.8569327200676202, "grad_norm": 1.453125, "learning_rate": 2.497478519113941e-06, "loss": 0.295, "step": 19516 }, { "epoch": 0.8570205385468237, "grad_norm": 1.5234375, "learning_rate": 2.4944652119467864e-06, "loss": 0.3238, "step": 19518 }, { "epoch": 0.8571083570260272, "grad_norm": 1.5390625, "learning_rate": 2.4914536282588856e-06, "loss": 0.3081, "step": 19520 }, { "epoch": 0.8571961755052306, "grad_norm": 1.5, "learning_rate": 2.488443768280857e-06, "loss": 0.3123, "step": 19522 }, { "epoch": 0.8572839939844342, "grad_norm": 1.4375, "learning_rate": 2.4854356322432078e-06, "loss": 0.3266, "step": 19524 }, { "epoch": 0.8573718124636377, "grad_norm": 1.515625, "learning_rate": 2.482429220376292e-06, "loss": 0.3134, "step": 19526 }, { "epoch": 0.8574596309428412, "grad_norm": 1.4296875, "learning_rate": 2.4794245329103406e-06, "loss": 0.3128, "step": 19528 }, { "epoch": 0.8575474494220446, "grad_norm": 1.4765625, "learning_rate": 2.476421570075463e-06, "loss": 0.3205, "step": 19530 }, { "epoch": 0.8576352679012481, "grad_norm": 1.421875, "learning_rate": 2.4734203321016143e-06, "loss": 0.3162, "step": 19532 }, { "epoch": 0.8577230863804516, "grad_norm": 1.4609375, "learning_rate": 2.470420819218641e-06, "loss": 0.3417, "step": 19534 }, { "epoch": 0.857810904859655, "grad_norm": 1.5078125, "learning_rate": 2.4674230316562435e-06, "loss": 0.3085, "step": 19536 }, { "epoch": 0.8578987233388585, "grad_norm": 1.453125, "learning_rate": 2.4644269696439847e-06, "loss": 0.3028, "step": 19538 }, { "epoch": 0.8579865418180621, "grad_norm": 1.4296875, "learning_rate": 2.461432633411323e-06, "loss": 0.3199, "step": 19540 }, { "epoch": 0.8580743602972656, "grad_norm": 1.546875, "learning_rate": 2.4584400231875416e-06, "loss": 0.3034, "step": 19542 }, { "epoch": 0.858162178776469, "grad_norm": 1.4921875, "learning_rate": 2.4554491392018347e-06, "loss": 0.3213, "step": 19544 }, { "epoch": 0.8582499972556725, "grad_norm": 1.4765625, "learning_rate": 2.4524599816832276e-06, "loss": 0.3014, "step": 19546 }, { "epoch": 0.858337815734876, "grad_norm": 1.4453125, "learning_rate": 2.4494725508606488e-06, "loss": 0.2933, "step": 19548 }, { "epoch": 0.8584256342140795, "grad_norm": 1.4765625, "learning_rate": 2.446486846962867e-06, "loss": 0.3237, "step": 19550 }, { "epoch": 0.8585134526932829, "grad_norm": 1.484375, "learning_rate": 2.4435028702185227e-06, "loss": 0.3021, "step": 19552 }, { "epoch": 0.8586012711724865, "grad_norm": 1.4453125, "learning_rate": 2.4405206208561403e-06, "loss": 0.2859, "step": 19554 }, { "epoch": 0.85868908965169, "grad_norm": 1.4296875, "learning_rate": 2.437540099104094e-06, "loss": 0.3134, "step": 19556 }, { "epoch": 0.8587769081308935, "grad_norm": 1.5078125, "learning_rate": 2.4345613051906384e-06, "loss": 0.3277, "step": 19558 }, { "epoch": 0.8588647266100969, "grad_norm": 1.453125, "learning_rate": 2.431584239343887e-06, "loss": 0.2996, "step": 19560 }, { "epoch": 0.8589525450893004, "grad_norm": 1.4921875, "learning_rate": 2.4286089017918233e-06, "loss": 0.3341, "step": 19562 }, { "epoch": 0.8590403635685039, "grad_norm": 1.484375, "learning_rate": 2.4256352927623015e-06, "loss": 0.2953, "step": 19564 }, { "epoch": 0.8591281820477074, "grad_norm": 1.484375, "learning_rate": 2.4226634124830366e-06, "loss": 0.2946, "step": 19566 }, { "epoch": 0.8592160005269108, "grad_norm": 1.40625, "learning_rate": 2.419693261181627e-06, "loss": 0.3183, "step": 19568 }, { "epoch": 0.8593038190061144, "grad_norm": 1.4765625, "learning_rate": 2.4167248390855187e-06, "loss": 0.2981, "step": 19570 }, { "epoch": 0.8593916374853179, "grad_norm": 1.4609375, "learning_rate": 2.413758146422035e-06, "loss": 0.3042, "step": 19572 }, { "epoch": 0.8594794559645214, "grad_norm": 1.4921875, "learning_rate": 2.4107931834183667e-06, "loss": 0.321, "step": 19574 }, { "epoch": 0.8595672744437248, "grad_norm": 1.515625, "learning_rate": 2.4078299503015654e-06, "loss": 0.3173, "step": 19576 }, { "epoch": 0.8596550929229283, "grad_norm": 1.421875, "learning_rate": 2.4048684472985694e-06, "loss": 0.3045, "step": 19578 }, { "epoch": 0.8597429114021318, "grad_norm": 1.59375, "learning_rate": 2.401908674636158e-06, "loss": 0.3217, "step": 19580 }, { "epoch": 0.8598307298813352, "grad_norm": 1.484375, "learning_rate": 2.398950632541003e-06, "loss": 0.3452, "step": 19582 }, { "epoch": 0.8599185483605387, "grad_norm": 1.40625, "learning_rate": 2.3959943212396223e-06, "loss": 0.312, "step": 19584 }, { "epoch": 0.8600063668397423, "grad_norm": 1.4921875, "learning_rate": 2.3930397409584194e-06, "loss": 0.3037, "step": 19586 }, { "epoch": 0.8600941853189458, "grad_norm": 1.3984375, "learning_rate": 2.3900868919236515e-06, "loss": 0.2944, "step": 19588 }, { "epoch": 0.8601820037981492, "grad_norm": 1.515625, "learning_rate": 2.387135774361446e-06, "loss": 0.3224, "step": 19590 }, { "epoch": 0.8602698222773527, "grad_norm": 1.5, "learning_rate": 2.384186388497808e-06, "loss": 0.3191, "step": 19592 }, { "epoch": 0.8603576407565562, "grad_norm": 1.4921875, "learning_rate": 2.3812387345585914e-06, "loss": 0.311, "step": 19594 }, { "epoch": 0.8604454592357597, "grad_norm": 1.515625, "learning_rate": 2.378292812769542e-06, "loss": 0.3169, "step": 19596 }, { "epoch": 0.8605332777149631, "grad_norm": 1.4765625, "learning_rate": 2.3753486233562496e-06, "loss": 0.2885, "step": 19598 }, { "epoch": 0.8606210961941667, "grad_norm": 1.515625, "learning_rate": 2.3724061665441777e-06, "loss": 0.33, "step": 19600 }, { "epoch": 0.8607089146733702, "grad_norm": 1.46875, "learning_rate": 2.3694654425586772e-06, "loss": 0.2909, "step": 19602 }, { "epoch": 0.8607967331525737, "grad_norm": 1.53125, "learning_rate": 2.3665264516249287e-06, "loss": 0.292, "step": 19604 }, { "epoch": 0.8608845516317771, "grad_norm": 1.4453125, "learning_rate": 2.363589193968013e-06, "loss": 0.2853, "step": 19606 }, { "epoch": 0.8609723701109806, "grad_norm": 1.5859375, "learning_rate": 2.3606536698128595e-06, "loss": 0.2869, "step": 19608 }, { "epoch": 0.8610601885901841, "grad_norm": 1.4765625, "learning_rate": 2.357719879384279e-06, "loss": 0.2986, "step": 19610 }, { "epoch": 0.8611480070693875, "grad_norm": 1.375, "learning_rate": 2.3547878229069397e-06, "loss": 0.2786, "step": 19612 }, { "epoch": 0.861235825548591, "grad_norm": 1.484375, "learning_rate": 2.35185750060537e-06, "loss": 0.2973, "step": 19614 }, { "epoch": 0.8613236440277946, "grad_norm": 1.5, "learning_rate": 2.3489289127039906e-06, "loss": 0.3174, "step": 19616 }, { "epoch": 0.8614114625069981, "grad_norm": 1.5234375, "learning_rate": 2.3460020594270575e-06, "loss": 0.3406, "step": 19618 }, { "epoch": 0.8614992809862015, "grad_norm": 1.5390625, "learning_rate": 2.343076940998723e-06, "loss": 0.3065, "step": 19620 }, { "epoch": 0.861587099465405, "grad_norm": 1.4609375, "learning_rate": 2.340153557642985e-06, "loss": 0.335, "step": 19622 }, { "epoch": 0.8616749179446085, "grad_norm": 1.5546875, "learning_rate": 2.3372319095837174e-06, "loss": 0.2874, "step": 19624 }, { "epoch": 0.861762736423812, "grad_norm": 1.4765625, "learning_rate": 2.334311997044666e-06, "loss": 0.2934, "step": 19626 }, { "epoch": 0.8618505549030154, "grad_norm": 1.453125, "learning_rate": 2.3313938202494307e-06, "loss": 0.3114, "step": 19628 }, { "epoch": 0.8619383733822189, "grad_norm": 1.53125, "learning_rate": 2.328477379421498e-06, "loss": 0.3105, "step": 19630 }, { "epoch": 0.8620261918614225, "grad_norm": 1.5234375, "learning_rate": 2.3255626747841985e-06, "loss": 0.319, "step": 19632 }, { "epoch": 0.862114010340626, "grad_norm": 1.4296875, "learning_rate": 2.3226497065607478e-06, "loss": 0.2853, "step": 19634 }, { "epoch": 0.8622018288198294, "grad_norm": 1.46875, "learning_rate": 2.3197384749742175e-06, "loss": 0.3208, "step": 19636 }, { "epoch": 0.8622896472990329, "grad_norm": 1.484375, "learning_rate": 2.316828980247546e-06, "loss": 0.3086, "step": 19638 }, { "epoch": 0.8623774657782364, "grad_norm": 1.4375, "learning_rate": 2.3139212226035543e-06, "loss": 0.2994, "step": 19640 }, { "epoch": 0.8624652842574398, "grad_norm": 1.53125, "learning_rate": 2.3110152022649103e-06, "loss": 0.3229, "step": 19642 }, { "epoch": 0.8625531027366433, "grad_norm": 1.5390625, "learning_rate": 2.308110919454165e-06, "loss": 0.3214, "step": 19644 }, { "epoch": 0.8626409212158468, "grad_norm": 1.515625, "learning_rate": 2.305208374393719e-06, "loss": 0.3302, "step": 19646 }, { "epoch": 0.8627287396950504, "grad_norm": 1.4765625, "learning_rate": 2.3023075673058615e-06, "loss": 0.3134, "step": 19648 }, { "epoch": 0.8628165581742538, "grad_norm": 1.4609375, "learning_rate": 2.299408498412731e-06, "loss": 0.319, "step": 19650 }, { "epoch": 0.8629043766534573, "grad_norm": 1.5234375, "learning_rate": 2.2965111679363355e-06, "loss": 0.2917, "step": 19652 }, { "epoch": 0.8629921951326608, "grad_norm": 1.703125, "learning_rate": 2.293615576098562e-06, "loss": 0.3199, "step": 19654 }, { "epoch": 0.8630800136118643, "grad_norm": 1.4296875, "learning_rate": 2.290721723121145e-06, "loss": 0.3266, "step": 19656 }, { "epoch": 0.8631678320910677, "grad_norm": 1.5, "learning_rate": 2.2878296092257085e-06, "loss": 0.323, "step": 19658 }, { "epoch": 0.8632556505702712, "grad_norm": 1.390625, "learning_rate": 2.2849392346337235e-06, "loss": 0.328, "step": 19660 }, { "epoch": 0.8633434690494748, "grad_norm": 1.4765625, "learning_rate": 2.2820505995665378e-06, "loss": 0.3261, "step": 19662 }, { "epoch": 0.8634312875286783, "grad_norm": 1.453125, "learning_rate": 2.2791637042453628e-06, "loss": 0.3211, "step": 19664 }, { "epoch": 0.8635191060078817, "grad_norm": 1.5390625, "learning_rate": 2.2762785488912724e-06, "loss": 0.3188, "step": 19666 }, { "epoch": 0.8636069244870852, "grad_norm": 1.4609375, "learning_rate": 2.273395133725223e-06, "loss": 0.3157, "step": 19668 }, { "epoch": 0.8636947429662887, "grad_norm": 1.359375, "learning_rate": 2.270513458968018e-06, "loss": 0.3233, "step": 19670 }, { "epoch": 0.8637825614454921, "grad_norm": 1.3984375, "learning_rate": 2.267633524840346e-06, "loss": 0.3448, "step": 19672 }, { "epoch": 0.8638703799246956, "grad_norm": 1.484375, "learning_rate": 2.2647553315627485e-06, "loss": 0.3084, "step": 19674 }, { "epoch": 0.8639581984038991, "grad_norm": 1.578125, "learning_rate": 2.261878879355633e-06, "loss": 0.297, "step": 19676 }, { "epoch": 0.8640460168831027, "grad_norm": 1.5625, "learning_rate": 2.2590041684392854e-06, "loss": 0.3149, "step": 19678 }, { "epoch": 0.8641338353623061, "grad_norm": 1.4609375, "learning_rate": 2.256131199033848e-06, "loss": 0.3068, "step": 19680 }, { "epoch": 0.8642216538415096, "grad_norm": 1.5390625, "learning_rate": 2.2532599713593396e-06, "loss": 0.3197, "step": 19682 }, { "epoch": 0.8643094723207131, "grad_norm": 1.4609375, "learning_rate": 2.2503904856356366e-06, "loss": 0.3218, "step": 19684 }, { "epoch": 0.8643972907999166, "grad_norm": 1.3984375, "learning_rate": 2.247522742082478e-06, "loss": 0.3111, "step": 19686 }, { "epoch": 0.86448510927912, "grad_norm": 1.421875, "learning_rate": 2.244656740919485e-06, "loss": 0.3329, "step": 19688 }, { "epoch": 0.8645729277583235, "grad_norm": 1.4921875, "learning_rate": 2.241792482366131e-06, "loss": 0.3249, "step": 19690 }, { "epoch": 0.864660746237527, "grad_norm": 1.4765625, "learning_rate": 2.238929966641767e-06, "loss": 0.3023, "step": 19692 }, { "epoch": 0.8647485647167306, "grad_norm": 1.453125, "learning_rate": 2.2360691939656036e-06, "loss": 0.3127, "step": 19694 }, { "epoch": 0.864836383195934, "grad_norm": 1.4609375, "learning_rate": 2.2332101645567176e-06, "loss": 0.3544, "step": 19696 }, { "epoch": 0.8649242016751375, "grad_norm": 1.4375, "learning_rate": 2.2303528786340565e-06, "loss": 0.31, "step": 19698 }, { "epoch": 0.865012020154341, "grad_norm": 1.421875, "learning_rate": 2.227497336416423e-06, "loss": 0.3246, "step": 19700 }, { "epoch": 0.8650998386335444, "grad_norm": 1.5234375, "learning_rate": 2.224643538122509e-06, "loss": 0.3037, "step": 19702 }, { "epoch": 0.8651876571127479, "grad_norm": 1.4375, "learning_rate": 2.2217914839708477e-06, "loss": 0.3307, "step": 19704 }, { "epoch": 0.8652754755919514, "grad_norm": 1.4453125, "learning_rate": 2.2189411741798587e-06, "loss": 0.3167, "step": 19706 }, { "epoch": 0.865363294071155, "grad_norm": 1.53125, "learning_rate": 2.216092608967818e-06, "loss": 0.3497, "step": 19708 }, { "epoch": 0.8654511125503584, "grad_norm": 1.46875, "learning_rate": 2.213245788552859e-06, "loss": 0.3256, "step": 19710 }, { "epoch": 0.8655389310295619, "grad_norm": 1.4375, "learning_rate": 2.2104007131530046e-06, "loss": 0.2998, "step": 19712 }, { "epoch": 0.8656267495087654, "grad_norm": 1.421875, "learning_rate": 2.207557382986125e-06, "loss": 0.3256, "step": 19714 }, { "epoch": 0.8657145679879689, "grad_norm": 1.5546875, "learning_rate": 2.2047157982699686e-06, "loss": 0.3113, "step": 19716 }, { "epoch": 0.8658023864671723, "grad_norm": 1.4453125, "learning_rate": 2.201875959222133e-06, "loss": 0.2938, "step": 19718 }, { "epoch": 0.8658902049463758, "grad_norm": 1.5546875, "learning_rate": 2.199037866060108e-06, "loss": 0.3194, "step": 19720 }, { "epoch": 0.8659780234255793, "grad_norm": 1.4765625, "learning_rate": 2.1962015190012315e-06, "loss": 0.2859, "step": 19722 }, { "epoch": 0.8660658419047829, "grad_norm": 1.4453125, "learning_rate": 2.1933669182627044e-06, "loss": 0.3604, "step": 19724 }, { "epoch": 0.8661536603839863, "grad_norm": 1.46875, "learning_rate": 2.1905340640616085e-06, "loss": 0.2944, "step": 19726 }, { "epoch": 0.8662414788631898, "grad_norm": 1.390625, "learning_rate": 2.187702956614879e-06, "loss": 0.2785, "step": 19728 }, { "epoch": 0.8663292973423933, "grad_norm": 1.515625, "learning_rate": 2.184873596139328e-06, "loss": 0.3243, "step": 19730 }, { "epoch": 0.8664171158215968, "grad_norm": 1.5, "learning_rate": 2.1820459828516215e-06, "loss": 0.3116, "step": 19732 }, { "epoch": 0.8665049343008002, "grad_norm": 1.453125, "learning_rate": 2.1792201169683085e-06, "loss": 0.3112, "step": 19734 }, { "epoch": 0.8665927527800037, "grad_norm": 1.390625, "learning_rate": 2.1763959987057882e-06, "loss": 0.3129, "step": 19736 }, { "epoch": 0.8666805712592072, "grad_norm": 1.40625, "learning_rate": 2.1735736282803314e-06, "loss": 0.3219, "step": 19738 }, { "epoch": 0.8667683897384107, "grad_norm": 1.4609375, "learning_rate": 2.17075300590808e-06, "loss": 0.295, "step": 19740 }, { "epoch": 0.8668562082176142, "grad_norm": 1.4375, "learning_rate": 2.1679341318050327e-06, "loss": 0.2926, "step": 19742 }, { "epoch": 0.8669440266968177, "grad_norm": 1.515625, "learning_rate": 2.1651170061870675e-06, "loss": 0.3007, "step": 19744 }, { "epoch": 0.8670318451760212, "grad_norm": 1.46875, "learning_rate": 2.1623016292699137e-06, "loss": 0.3406, "step": 19746 }, { "epoch": 0.8671196636552246, "grad_norm": 1.4296875, "learning_rate": 2.159488001269175e-06, "loss": 0.3446, "step": 19748 }, { "epoch": 0.8672074821344281, "grad_norm": 1.515625, "learning_rate": 2.156676122400322e-06, "loss": 0.3072, "step": 19750 }, { "epoch": 0.8672953006136316, "grad_norm": 1.4765625, "learning_rate": 2.153865992878684e-06, "loss": 0.3402, "step": 19752 }, { "epoch": 0.8673831190928352, "grad_norm": 1.4375, "learning_rate": 2.151057612919474e-06, "loss": 0.2902, "step": 19754 }, { "epoch": 0.8674709375720386, "grad_norm": 1.4921875, "learning_rate": 2.14825098273774e-06, "loss": 0.286, "step": 19756 }, { "epoch": 0.8675587560512421, "grad_norm": 1.5390625, "learning_rate": 2.1454461025484292e-06, "loss": 0.2923, "step": 19758 }, { "epoch": 0.8676465745304456, "grad_norm": 1.5546875, "learning_rate": 2.1426429725663335e-06, "loss": 0.3102, "step": 19760 }, { "epoch": 0.867734393009649, "grad_norm": 1.515625, "learning_rate": 2.1398415930061146e-06, "loss": 0.3185, "step": 19762 }, { "epoch": 0.8678222114888525, "grad_norm": 1.3984375, "learning_rate": 2.1370419640823094e-06, "loss": 0.305, "step": 19764 }, { "epoch": 0.867910029968056, "grad_norm": 1.4296875, "learning_rate": 2.134244086009307e-06, "loss": 0.3049, "step": 19766 }, { "epoch": 0.8679978484472595, "grad_norm": 1.5078125, "learning_rate": 2.1314479590013786e-06, "loss": 0.3173, "step": 19768 }, { "epoch": 0.868085666926463, "grad_norm": 1.46875, "learning_rate": 2.1286535832726467e-06, "loss": 0.3196, "step": 19770 }, { "epoch": 0.8681734854056665, "grad_norm": 1.5390625, "learning_rate": 2.125860959037104e-06, "loss": 0.3416, "step": 19772 }, { "epoch": 0.86826130388487, "grad_norm": 1.5234375, "learning_rate": 2.1230700865086134e-06, "loss": 0.3315, "step": 19774 }, { "epoch": 0.8683491223640735, "grad_norm": 1.4375, "learning_rate": 2.1202809659008976e-06, "loss": 0.317, "step": 19776 }, { "epoch": 0.8684369408432769, "grad_norm": 1.390625, "learning_rate": 2.1174935974275556e-06, "loss": 0.3034, "step": 19778 }, { "epoch": 0.8685247593224804, "grad_norm": 1.453125, "learning_rate": 2.114707981302033e-06, "loss": 0.3161, "step": 19780 }, { "epoch": 0.8686125778016839, "grad_norm": 1.546875, "learning_rate": 2.1119241177376626e-06, "loss": 0.2952, "step": 19782 }, { "epoch": 0.8687003962808874, "grad_norm": 1.4609375, "learning_rate": 2.1091420069476316e-06, "loss": 0.3344, "step": 19784 }, { "epoch": 0.8687882147600909, "grad_norm": 1.4609375, "learning_rate": 2.1063616491449917e-06, "loss": 0.3011, "step": 19786 }, { "epoch": 0.8688760332392944, "grad_norm": 1.4375, "learning_rate": 2.1035830445426648e-06, "loss": 0.3266, "step": 19788 }, { "epoch": 0.8689638517184979, "grad_norm": 1.4296875, "learning_rate": 2.1008061933534327e-06, "loss": 0.3156, "step": 19790 }, { "epoch": 0.8690516701977014, "grad_norm": 1.546875, "learning_rate": 2.098031095789957e-06, "loss": 0.3302, "step": 19792 }, { "epoch": 0.8691394886769048, "grad_norm": 1.5078125, "learning_rate": 2.095257752064747e-06, "loss": 0.309, "step": 19794 }, { "epoch": 0.8692273071561083, "grad_norm": 1.5703125, "learning_rate": 2.092486162390192e-06, "loss": 0.3126, "step": 19796 }, { "epoch": 0.8693151256353118, "grad_norm": 1.5390625, "learning_rate": 2.089716326978536e-06, "loss": 0.3339, "step": 19798 }, { "epoch": 0.8694029441145152, "grad_norm": 1.4765625, "learning_rate": 2.086948246041892e-06, "loss": 0.3164, "step": 19800 }, { "epoch": 0.8694907625937188, "grad_norm": 1.484375, "learning_rate": 2.0841819197922494e-06, "loss": 0.3027, "step": 19802 }, { "epoch": 0.8695785810729223, "grad_norm": 1.46875, "learning_rate": 2.0814173484414447e-06, "loss": 0.3284, "step": 19804 }, { "epoch": 0.8696663995521258, "grad_norm": 1.4453125, "learning_rate": 2.0786545322011965e-06, "loss": 0.3101, "step": 19806 }, { "epoch": 0.8697542180313292, "grad_norm": 1.5703125, "learning_rate": 2.07589347128308e-06, "loss": 0.3168, "step": 19808 }, { "epoch": 0.8698420365105327, "grad_norm": 1.5703125, "learning_rate": 2.0731341658985324e-06, "loss": 0.3165, "step": 19810 }, { "epoch": 0.8699298549897362, "grad_norm": 1.5078125, "learning_rate": 2.0703766162588706e-06, "loss": 0.3176, "step": 19812 }, { "epoch": 0.8700176734689397, "grad_norm": 1.4765625, "learning_rate": 2.0676208225752585e-06, "loss": 0.3196, "step": 19814 }, { "epoch": 0.8701054919481432, "grad_norm": 1.515625, "learning_rate": 2.06486678505875e-06, "loss": 0.3524, "step": 19816 }, { "epoch": 0.8701933104273467, "grad_norm": 1.4609375, "learning_rate": 2.0621145039202343e-06, "loss": 0.2923, "step": 19818 }, { "epoch": 0.8702811289065502, "grad_norm": 1.4140625, "learning_rate": 2.0593639793704905e-06, "loss": 0.3128, "step": 19820 }, { "epoch": 0.8703689473857537, "grad_norm": 1.4921875, "learning_rate": 2.0566152116201528e-06, "loss": 0.3216, "step": 19822 }, { "epoch": 0.8704567658649571, "grad_norm": 1.4375, "learning_rate": 2.053868200879719e-06, "loss": 0.2935, "step": 19824 }, { "epoch": 0.8705445843441606, "grad_norm": 1.4765625, "learning_rate": 2.0511229473595626e-06, "loss": 0.3261, "step": 19826 }, { "epoch": 0.8706324028233641, "grad_norm": 1.5546875, "learning_rate": 2.0483794512699077e-06, "loss": 0.305, "step": 19828 }, { "epoch": 0.8707202213025675, "grad_norm": 1.421875, "learning_rate": 2.045637712820861e-06, "loss": 0.3249, "step": 19830 }, { "epoch": 0.8708080397817711, "grad_norm": 1.453125, "learning_rate": 2.042897732222379e-06, "loss": 0.3237, "step": 19832 }, { "epoch": 0.8708958582609746, "grad_norm": 1.4140625, "learning_rate": 2.040159509684289e-06, "loss": 0.3407, "step": 19834 }, { "epoch": 0.8709836767401781, "grad_norm": 1.4765625, "learning_rate": 2.0374230454162924e-06, "loss": 0.3136, "step": 19836 }, { "epoch": 0.8710714952193815, "grad_norm": 1.4453125, "learning_rate": 2.034688339627938e-06, "loss": 0.3151, "step": 19838 }, { "epoch": 0.871159313698585, "grad_norm": 1.390625, "learning_rate": 2.031955392528662e-06, "loss": 0.3123, "step": 19840 }, { "epoch": 0.8712471321777885, "grad_norm": 1.5234375, "learning_rate": 2.0292242043277455e-06, "loss": 0.3193, "step": 19842 }, { "epoch": 0.871334950656992, "grad_norm": 1.484375, "learning_rate": 2.0264947752343446e-06, "loss": 0.2988, "step": 19844 }, { "epoch": 0.8714227691361954, "grad_norm": 1.5, "learning_rate": 2.023767105457483e-06, "loss": 0.3288, "step": 19846 }, { "epoch": 0.871510587615399, "grad_norm": 1.4453125, "learning_rate": 2.0210411952060444e-06, "loss": 0.2927, "step": 19848 }, { "epoch": 0.8715984060946025, "grad_norm": 1.53125, "learning_rate": 2.0183170446887807e-06, "loss": 0.2949, "step": 19850 }, { "epoch": 0.871686224573806, "grad_norm": 1.4609375, "learning_rate": 2.0155946541143e-06, "loss": 0.3076, "step": 19852 }, { "epoch": 0.8717740430530094, "grad_norm": 1.6796875, "learning_rate": 2.012874023691097e-06, "loss": 0.3349, "step": 19854 }, { "epoch": 0.8718618615322129, "grad_norm": 1.5703125, "learning_rate": 2.010155153627505e-06, "loss": 0.3102, "step": 19856 }, { "epoch": 0.8719496800114164, "grad_norm": 1.5, "learning_rate": 2.0074380441317488e-06, "loss": 0.3419, "step": 19858 }, { "epoch": 0.8720374984906198, "grad_norm": 1.4375, "learning_rate": 2.004722695411898e-06, "loss": 0.3193, "step": 19860 }, { "epoch": 0.8721253169698234, "grad_norm": 1.484375, "learning_rate": 2.0020091076758922e-06, "loss": 0.3176, "step": 19862 }, { "epoch": 0.8722131354490269, "grad_norm": 1.421875, "learning_rate": 1.9992972811315458e-06, "loss": 0.2944, "step": 19864 }, { "epoch": 0.8723009539282304, "grad_norm": 1.421875, "learning_rate": 1.9965872159865227e-06, "loss": 0.3135, "step": 19866 }, { "epoch": 0.8723887724074338, "grad_norm": 1.4609375, "learning_rate": 1.9938789124483687e-06, "loss": 0.3138, "step": 19868 }, { "epoch": 0.8724765908866373, "grad_norm": 1.5078125, "learning_rate": 1.991172370724484e-06, "loss": 0.322, "step": 19870 }, { "epoch": 0.8725644093658408, "grad_norm": 1.546875, "learning_rate": 1.988467591022131e-06, "loss": 0.2898, "step": 19872 }, { "epoch": 0.8726522278450443, "grad_norm": 1.46875, "learning_rate": 1.9857645735484486e-06, "loss": 0.3098, "step": 19874 }, { "epoch": 0.8727400463242477, "grad_norm": 1.421875, "learning_rate": 1.9830633185104282e-06, "loss": 0.3381, "step": 19876 }, { "epoch": 0.8728278648034513, "grad_norm": 1.4609375, "learning_rate": 1.980363826114945e-06, "loss": 0.2921, "step": 19878 }, { "epoch": 0.8729156832826548, "grad_norm": 1.5078125, "learning_rate": 1.977666096568709e-06, "loss": 0.2974, "step": 19880 }, { "epoch": 0.8730035017618583, "grad_norm": 1.4921875, "learning_rate": 1.974970130078327e-06, "loss": 0.3526, "step": 19882 }, { "epoch": 0.8730913202410617, "grad_norm": 1.4453125, "learning_rate": 1.9722759268502507e-06, "loss": 0.3244, "step": 19884 }, { "epoch": 0.8731791387202652, "grad_norm": 1.4375, "learning_rate": 1.9695834870908007e-06, "loss": 0.3194, "step": 19886 }, { "epoch": 0.8732669571994687, "grad_norm": 1.5, "learning_rate": 1.966892811006174e-06, "loss": 0.3328, "step": 19888 }, { "epoch": 0.8733547756786721, "grad_norm": 1.4453125, "learning_rate": 1.964203898802411e-06, "loss": 0.3276, "step": 19890 }, { "epoch": 0.8734425941578756, "grad_norm": 1.46875, "learning_rate": 1.961516750685441e-06, "loss": 0.2992, "step": 19892 }, { "epoch": 0.8735304126370792, "grad_norm": 1.421875, "learning_rate": 1.9588313668610385e-06, "loss": 0.3253, "step": 19894 }, { "epoch": 0.8736182311162827, "grad_norm": 1.4921875, "learning_rate": 1.9561477475348504e-06, "loss": 0.329, "step": 19896 }, { "epoch": 0.8737060495954861, "grad_norm": 1.46875, "learning_rate": 1.9534658929123984e-06, "loss": 0.2999, "step": 19898 }, { "epoch": 0.8737938680746896, "grad_norm": 1.4609375, "learning_rate": 1.9507858031990454e-06, "loss": 0.3202, "step": 19900 }, { "epoch": 0.8738816865538931, "grad_norm": 1.4296875, "learning_rate": 1.9481074786000473e-06, "loss": 0.2992, "step": 19902 }, { "epoch": 0.8739695050330966, "grad_norm": 1.5078125, "learning_rate": 1.945430919320504e-06, "loss": 0.3101, "step": 19904 }, { "epoch": 0.8740573235123, "grad_norm": 1.40625, "learning_rate": 1.9427561255653816e-06, "loss": 0.3208, "step": 19906 }, { "epoch": 0.8741451419915036, "grad_norm": 1.4296875, "learning_rate": 1.9400830975395272e-06, "loss": 0.3327, "step": 19908 }, { "epoch": 0.8742329604707071, "grad_norm": 1.5390625, "learning_rate": 1.937411835447639e-06, "loss": 0.3426, "step": 19910 }, { "epoch": 0.8743207789499106, "grad_norm": 1.3984375, "learning_rate": 1.9347423394942774e-06, "loss": 0.299, "step": 19912 }, { "epoch": 0.874408597429114, "grad_norm": 1.4765625, "learning_rate": 1.9320746098838744e-06, "loss": 0.3203, "step": 19914 }, { "epoch": 0.8744964159083175, "grad_norm": 1.484375, "learning_rate": 1.9294086468207325e-06, "loss": 0.3169, "step": 19916 }, { "epoch": 0.874584234387521, "grad_norm": 1.5078125, "learning_rate": 1.9267444505090022e-06, "loss": 0.3429, "step": 19918 }, { "epoch": 0.8746720528667244, "grad_norm": 1.4375, "learning_rate": 1.9240820211527148e-06, "loss": 0.2993, "step": 19920 }, { "epoch": 0.8747598713459279, "grad_norm": 1.59375, "learning_rate": 1.9214213589557608e-06, "loss": 0.3321, "step": 19922 }, { "epoch": 0.8748476898251315, "grad_norm": 1.4609375, "learning_rate": 1.918762464121887e-06, "loss": 0.2825, "step": 19924 }, { "epoch": 0.874935508304335, "grad_norm": 1.5625, "learning_rate": 1.9161053368547207e-06, "loss": 0.3315, "step": 19926 }, { "epoch": 0.8750233267835384, "grad_norm": 1.5078125, "learning_rate": 1.913449977357737e-06, "loss": 0.3192, "step": 19928 }, { "epoch": 0.8751111452627419, "grad_norm": 1.4609375, "learning_rate": 1.9107963858342905e-06, "loss": 0.323, "step": 19930 }, { "epoch": 0.8751989637419454, "grad_norm": 1.4921875, "learning_rate": 1.9081445624875933e-06, "loss": 0.3143, "step": 19932 }, { "epoch": 0.8752867822211489, "grad_norm": 1.453125, "learning_rate": 1.9054945075207164e-06, "loss": 0.3259, "step": 19934 }, { "epoch": 0.8753746007003523, "grad_norm": 1.546875, "learning_rate": 1.9028462211366117e-06, "loss": 0.3299, "step": 19936 }, { "epoch": 0.8754624191795558, "grad_norm": 1.4453125, "learning_rate": 1.9001997035380776e-06, "loss": 0.3065, "step": 19938 }, { "epoch": 0.8755502376587594, "grad_norm": 1.5859375, "learning_rate": 1.897554954927791e-06, "loss": 0.3281, "step": 19940 }, { "epoch": 0.8756380561379629, "grad_norm": 1.546875, "learning_rate": 1.894911975508276e-06, "loss": 0.3459, "step": 19942 }, { "epoch": 0.8757258746171663, "grad_norm": 1.515625, "learning_rate": 1.8922707654819488e-06, "loss": 0.3059, "step": 19944 }, { "epoch": 0.8758136930963698, "grad_norm": 1.5234375, "learning_rate": 1.8896313250510612e-06, "loss": 0.2982, "step": 19946 }, { "epoch": 0.8759015115755733, "grad_norm": 1.53125, "learning_rate": 1.8869936544177458e-06, "loss": 0.3176, "step": 19948 }, { "epoch": 0.8759893300547767, "grad_norm": 1.4921875, "learning_rate": 1.8843577537839996e-06, "loss": 0.2997, "step": 19950 }, { "epoch": 0.8760771485339802, "grad_norm": 1.515625, "learning_rate": 1.881723623351675e-06, "loss": 0.3199, "step": 19952 }, { "epoch": 0.8761649670131838, "grad_norm": 1.4453125, "learning_rate": 1.8790912633224994e-06, "loss": 0.3363, "step": 19954 }, { "epoch": 0.8762527854923873, "grad_norm": 1.5625, "learning_rate": 1.876460673898059e-06, "loss": 0.3124, "step": 19956 }, { "epoch": 0.8763406039715907, "grad_norm": 1.421875, "learning_rate": 1.8738318552797978e-06, "loss": 0.2969, "step": 19958 }, { "epoch": 0.8764284224507942, "grad_norm": 1.5, "learning_rate": 1.8712048076690442e-06, "loss": 0.2909, "step": 19960 }, { "epoch": 0.8765162409299977, "grad_norm": 1.4453125, "learning_rate": 1.8685795312669619e-06, "loss": 0.3397, "step": 19962 }, { "epoch": 0.8766040594092012, "grad_norm": 1.4921875, "learning_rate": 1.8659560262746123e-06, "loss": 0.3175, "step": 19964 }, { "epoch": 0.8766918778884046, "grad_norm": 1.546875, "learning_rate": 1.8633342928928931e-06, "loss": 0.2963, "step": 19966 }, { "epoch": 0.8767796963676081, "grad_norm": 1.5, "learning_rate": 1.8607143313225773e-06, "loss": 0.296, "step": 19968 }, { "epoch": 0.8768675148468117, "grad_norm": 1.5078125, "learning_rate": 1.8580961417643123e-06, "loss": 0.3398, "step": 19970 }, { "epoch": 0.8769553333260152, "grad_norm": 1.6015625, "learning_rate": 1.855479724418585e-06, "loss": 0.3342, "step": 19972 }, { "epoch": 0.8770431518052186, "grad_norm": 1.515625, "learning_rate": 1.8528650794857716e-06, "loss": 0.3249, "step": 19974 }, { "epoch": 0.8771309702844221, "grad_norm": 1.4453125, "learning_rate": 1.850252207166095e-06, "loss": 0.3134, "step": 19976 }, { "epoch": 0.8772187887636256, "grad_norm": 1.4921875, "learning_rate": 1.847641107659659e-06, "loss": 0.323, "step": 19978 }, { "epoch": 0.877306607242829, "grad_norm": 1.5546875, "learning_rate": 1.845031781166412e-06, "loss": 0.2874, "step": 19980 }, { "epoch": 0.8773944257220325, "grad_norm": 1.4140625, "learning_rate": 1.8424242278861858e-06, "loss": 0.3032, "step": 19982 }, { "epoch": 0.877482244201236, "grad_norm": 1.4609375, "learning_rate": 1.8398184480186654e-06, "loss": 0.3245, "step": 19984 }, { "epoch": 0.8775700626804396, "grad_norm": 1.4453125, "learning_rate": 1.8372144417633935e-06, "loss": 0.3182, "step": 19986 }, { "epoch": 0.877657881159643, "grad_norm": 1.5078125, "learning_rate": 1.834612209319797e-06, "loss": 0.3319, "step": 19988 }, { "epoch": 0.8777456996388465, "grad_norm": 1.453125, "learning_rate": 1.832011750887147e-06, "loss": 0.2969, "step": 19990 }, { "epoch": 0.87783351811805, "grad_norm": 1.46875, "learning_rate": 1.8294130666645926e-06, "loss": 0.3246, "step": 19992 }, { "epoch": 0.8779213365972535, "grad_norm": 1.5, "learning_rate": 1.8268161568511438e-06, "loss": 0.3301, "step": 19994 }, { "epoch": 0.8780091550764569, "grad_norm": 1.5078125, "learning_rate": 1.8242210216456612e-06, "loss": 0.2898, "step": 19996 }, { "epoch": 0.8780969735556604, "grad_norm": 1.4296875, "learning_rate": 1.8216276612468941e-06, "loss": 0.3018, "step": 19998 }, { "epoch": 0.8781847920348639, "grad_norm": 1.609375, "learning_rate": 1.8190360758534392e-06, "loss": 0.277, "step": 20000 }, { "epoch": 0.8782726105140675, "grad_norm": 1.5859375, "learning_rate": 1.8164462656637544e-06, "loss": 0.3097, "step": 20002 }, { "epoch": 0.8783604289932709, "grad_norm": 1.453125, "learning_rate": 1.8138582308761698e-06, "loss": 0.3035, "step": 20004 }, { "epoch": 0.8784482474724744, "grad_norm": 1.4375, "learning_rate": 1.8112719716888853e-06, "loss": 0.3346, "step": 20006 }, { "epoch": 0.8785360659516779, "grad_norm": 1.4296875, "learning_rate": 1.808687488299951e-06, "loss": 0.3034, "step": 20008 }, { "epoch": 0.8786238844308814, "grad_norm": 1.375, "learning_rate": 1.8061047809072834e-06, "loss": 0.3205, "step": 20010 }, { "epoch": 0.8787117029100848, "grad_norm": 1.5546875, "learning_rate": 1.8035238497086743e-06, "loss": 0.3309, "step": 20012 }, { "epoch": 0.8787995213892883, "grad_norm": 1.4609375, "learning_rate": 1.8009446949017683e-06, "loss": 0.3232, "step": 20014 }, { "epoch": 0.8788873398684919, "grad_norm": 1.5078125, "learning_rate": 1.7983673166840825e-06, "loss": 0.3017, "step": 20016 }, { "epoch": 0.8789751583476954, "grad_norm": 1.5078125, "learning_rate": 1.7957917152529869e-06, "loss": 0.3277, "step": 20018 }, { "epoch": 0.8790629768268988, "grad_norm": 1.5, "learning_rate": 1.7932178908057208e-06, "loss": 0.2818, "step": 20020 }, { "epoch": 0.8791507953061023, "grad_norm": 1.5078125, "learning_rate": 1.7906458435393986e-06, "loss": 0.3283, "step": 20022 }, { "epoch": 0.8792386137853058, "grad_norm": 1.3984375, "learning_rate": 1.7880755736509741e-06, "loss": 0.3126, "step": 20024 }, { "epoch": 0.8793264322645092, "grad_norm": 1.4375, "learning_rate": 1.7855070813372926e-06, "loss": 0.3258, "step": 20026 }, { "epoch": 0.8794142507437127, "grad_norm": 1.53125, "learning_rate": 1.7829403667950412e-06, "loss": 0.3533, "step": 20028 }, { "epoch": 0.8795020692229162, "grad_norm": 1.4921875, "learning_rate": 1.7803754302207848e-06, "loss": 0.3357, "step": 20030 }, { "epoch": 0.8795898877021198, "grad_norm": 1.453125, "learning_rate": 1.7778122718109441e-06, "loss": 0.318, "step": 20032 }, { "epoch": 0.8796777061813232, "grad_norm": 1.453125, "learning_rate": 1.7752508917618011e-06, "loss": 0.3047, "step": 20034 }, { "epoch": 0.8797655246605267, "grad_norm": 1.4453125, "learning_rate": 1.7726912902695152e-06, "loss": 0.3174, "step": 20036 }, { "epoch": 0.8798533431397302, "grad_norm": 1.5859375, "learning_rate": 1.770133467530094e-06, "loss": 0.3134, "step": 20038 }, { "epoch": 0.8799411616189337, "grad_norm": 1.3984375, "learning_rate": 1.7675774237394278e-06, "loss": 0.2914, "step": 20040 }, { "epoch": 0.8800289800981371, "grad_norm": 1.5546875, "learning_rate": 1.765023159093246e-06, "loss": 0.3218, "step": 20042 }, { "epoch": 0.8801167985773406, "grad_norm": 1.484375, "learning_rate": 1.7624706737871643e-06, "loss": 0.3444, "step": 20044 }, { "epoch": 0.8802046170565441, "grad_norm": 1.4765625, "learning_rate": 1.7599199680166462e-06, "loss": 0.3358, "step": 20046 }, { "epoch": 0.8802924355357477, "grad_norm": 1.5546875, "learning_rate": 1.7573710419770267e-06, "loss": 0.3213, "step": 20048 }, { "epoch": 0.8803802540149511, "grad_norm": 1.4921875, "learning_rate": 1.7548238958635082e-06, "loss": 0.2897, "step": 20050 }, { "epoch": 0.8804680724941546, "grad_norm": 1.453125, "learning_rate": 1.7522785298711457e-06, "loss": 0.3444, "step": 20052 }, { "epoch": 0.8805558909733581, "grad_norm": 1.5390625, "learning_rate": 1.7497349441948668e-06, "loss": 0.2972, "step": 20054 }, { "epoch": 0.8806437094525615, "grad_norm": 1.46875, "learning_rate": 1.7471931390294627e-06, "loss": 0.3189, "step": 20056 }, { "epoch": 0.880731527931765, "grad_norm": 1.484375, "learning_rate": 1.744653114569575e-06, "loss": 0.3152, "step": 20058 }, { "epoch": 0.8808193464109685, "grad_norm": 1.5, "learning_rate": 1.7421148710097312e-06, "loss": 0.3031, "step": 20060 }, { "epoch": 0.8809071648901721, "grad_norm": 1.4921875, "learning_rate": 1.7395784085443068e-06, "loss": 0.3525, "step": 20062 }, { "epoch": 0.8809949833693755, "grad_norm": 1.4375, "learning_rate": 1.737043727367546e-06, "loss": 0.3036, "step": 20064 }, { "epoch": 0.881082801848579, "grad_norm": 1.4921875, "learning_rate": 1.7345108276735467e-06, "loss": 0.3163, "step": 20066 }, { "epoch": 0.8811706203277825, "grad_norm": 1.4375, "learning_rate": 1.7319797096562867e-06, "loss": 0.3123, "step": 20068 }, { "epoch": 0.881258438806986, "grad_norm": 1.4765625, "learning_rate": 1.729450373509603e-06, "loss": 0.2987, "step": 20070 }, { "epoch": 0.8813462572861894, "grad_norm": 1.546875, "learning_rate": 1.726922819427182e-06, "loss": 0.3379, "step": 20072 }, { "epoch": 0.8814340757653929, "grad_norm": 1.484375, "learning_rate": 1.724397047602594e-06, "loss": 0.3099, "step": 20074 }, { "epoch": 0.8815218942445964, "grad_norm": 1.4296875, "learning_rate": 1.7218730582292563e-06, "loss": 0.3147, "step": 20076 }, { "epoch": 0.8816097127238, "grad_norm": 1.5390625, "learning_rate": 1.719350851500462e-06, "loss": 0.3137, "step": 20078 }, { "epoch": 0.8816975312030034, "grad_norm": 1.4375, "learning_rate": 1.7168304276093616e-06, "loss": 0.3025, "step": 20080 }, { "epoch": 0.8817853496822069, "grad_norm": 1.390625, "learning_rate": 1.714311786748965e-06, "loss": 0.3322, "step": 20082 }, { "epoch": 0.8818731681614104, "grad_norm": 1.4609375, "learning_rate": 1.7117949291121565e-06, "loss": 0.3313, "step": 20084 }, { "epoch": 0.8819609866406138, "grad_norm": 1.4453125, "learning_rate": 1.7092798548916738e-06, "loss": 0.3306, "step": 20086 }, { "epoch": 0.8820488051198173, "grad_norm": 1.5078125, "learning_rate": 1.706766564280124e-06, "loss": 0.3208, "step": 20088 }, { "epoch": 0.8821366235990208, "grad_norm": 1.4296875, "learning_rate": 1.704255057469975e-06, "loss": 0.3011, "step": 20090 }, { "epoch": 0.8822244420782243, "grad_norm": 1.4375, "learning_rate": 1.7017453346535595e-06, "loss": 0.3052, "step": 20092 }, { "epoch": 0.8823122605574278, "grad_norm": 1.453125, "learning_rate": 1.6992373960230707e-06, "loss": 0.3114, "step": 20094 }, { "epoch": 0.8824000790366313, "grad_norm": 1.484375, "learning_rate": 1.6967312417705634e-06, "loss": 0.2927, "step": 20096 }, { "epoch": 0.8824878975158348, "grad_norm": 1.4921875, "learning_rate": 1.6942268720879672e-06, "loss": 0.2854, "step": 20098 }, { "epoch": 0.8825757159950383, "grad_norm": 1.5078125, "learning_rate": 1.6917242871670596e-06, "loss": 0.3352, "step": 20100 }, { "epoch": 0.8826635344742417, "grad_norm": 1.453125, "learning_rate": 1.6892234871994983e-06, "loss": 0.3241, "step": 20102 }, { "epoch": 0.8827513529534452, "grad_norm": 1.5234375, "learning_rate": 1.686724472376791e-06, "loss": 0.3028, "step": 20104 }, { "epoch": 0.8828391714326487, "grad_norm": 1.4921875, "learning_rate": 1.6842272428903073e-06, "loss": 0.3198, "step": 20106 }, { "epoch": 0.8829269899118523, "grad_norm": 1.484375, "learning_rate": 1.6817317989312935e-06, "loss": 0.3084, "step": 20108 }, { "epoch": 0.8830148083910557, "grad_norm": 1.484375, "learning_rate": 1.6792381406908475e-06, "loss": 0.3003, "step": 20110 }, { "epoch": 0.8831026268702592, "grad_norm": 1.4765625, "learning_rate": 1.6767462683599355e-06, "loss": 0.3089, "step": 20112 }, { "epoch": 0.8831904453494627, "grad_norm": 1.4765625, "learning_rate": 1.6742561821293829e-06, "loss": 0.2854, "step": 20114 }, { "epoch": 0.8832782638286661, "grad_norm": 1.5, "learning_rate": 1.671767882189887e-06, "loss": 0.3033, "step": 20116 }, { "epoch": 0.8833660823078696, "grad_norm": 1.46875, "learning_rate": 1.6692813687320008e-06, "loss": 0.33, "step": 20118 }, { "epoch": 0.8834539007870731, "grad_norm": 1.625, "learning_rate": 1.6667966419461333e-06, "loss": 0.3175, "step": 20120 }, { "epoch": 0.8835417192662766, "grad_norm": 1.46875, "learning_rate": 1.6643137020225824e-06, "loss": 0.3049, "step": 20122 }, { "epoch": 0.8836295377454801, "grad_norm": 1.3984375, "learning_rate": 1.6618325491514736e-06, "loss": 0.3186, "step": 20124 }, { "epoch": 0.8837173562246836, "grad_norm": 1.5390625, "learning_rate": 1.6593531835228244e-06, "loss": 0.3149, "step": 20126 }, { "epoch": 0.8838051747038871, "grad_norm": 1.5, "learning_rate": 1.6568756053265023e-06, "loss": 0.3189, "step": 20128 }, { "epoch": 0.8838929931830906, "grad_norm": 1.71875, "learning_rate": 1.6543998147522444e-06, "loss": 0.3124, "step": 20130 }, { "epoch": 0.883980811662294, "grad_norm": 1.40625, "learning_rate": 1.6519258119896463e-06, "loss": 0.2992, "step": 20132 }, { "epoch": 0.8840686301414975, "grad_norm": 1.46875, "learning_rate": 1.6494535972281623e-06, "loss": 0.3163, "step": 20134 }, { "epoch": 0.884156448620701, "grad_norm": 1.4921875, "learning_rate": 1.6469831706571237e-06, "loss": 0.3111, "step": 20136 }, { "epoch": 0.8842442670999044, "grad_norm": 1.4453125, "learning_rate": 1.6445145324657075e-06, "loss": 0.3072, "step": 20138 }, { "epoch": 0.884332085579108, "grad_norm": 1.5, "learning_rate": 1.6420476828429704e-06, "loss": 0.3367, "step": 20140 }, { "epoch": 0.8844199040583115, "grad_norm": 1.453125, "learning_rate": 1.6395826219778226e-06, "loss": 0.3162, "step": 20142 }, { "epoch": 0.884507722537515, "grad_norm": 1.4921875, "learning_rate": 1.6371193500590325e-06, "loss": 0.3177, "step": 20144 }, { "epoch": 0.8845955410167184, "grad_norm": 1.4140625, "learning_rate": 1.6346578672752467e-06, "loss": 0.3382, "step": 20146 }, { "epoch": 0.8846833594959219, "grad_norm": 1.59375, "learning_rate": 1.6321981738149584e-06, "loss": 0.3199, "step": 20148 }, { "epoch": 0.8847711779751254, "grad_norm": 1.4921875, "learning_rate": 1.6297402698665392e-06, "loss": 0.3121, "step": 20150 }, { "epoch": 0.8848589964543289, "grad_norm": 1.4921875, "learning_rate": 1.6272841556182139e-06, "loss": 0.3434, "step": 20152 }, { "epoch": 0.8849468149335324, "grad_norm": 1.5, "learning_rate": 1.6248298312580706e-06, "loss": 0.2925, "step": 20154 }, { "epoch": 0.8850346334127359, "grad_norm": 1.5390625, "learning_rate": 1.6223772969740592e-06, "loss": 0.3513, "step": 20156 }, { "epoch": 0.8851224518919394, "grad_norm": 1.4765625, "learning_rate": 1.6199265529539987e-06, "loss": 0.3147, "step": 20158 }, { "epoch": 0.8852102703711429, "grad_norm": 1.4296875, "learning_rate": 1.6174775993855695e-06, "loss": 0.303, "step": 20160 }, { "epoch": 0.8852980888503463, "grad_norm": 1.4765625, "learning_rate": 1.615030436456308e-06, "loss": 0.3321, "step": 20162 }, { "epoch": 0.8853859073295498, "grad_norm": 1.3828125, "learning_rate": 1.6125850643536221e-06, "loss": 0.3239, "step": 20164 }, { "epoch": 0.8854737258087533, "grad_norm": 1.4765625, "learning_rate": 1.6101414832647822e-06, "loss": 0.3308, "step": 20166 }, { "epoch": 0.8855615442879567, "grad_norm": 1.46875, "learning_rate": 1.6076996933769073e-06, "loss": 0.3111, "step": 20168 }, { "epoch": 0.8856493627671603, "grad_norm": 1.5078125, "learning_rate": 1.6052596948770037e-06, "loss": 0.3035, "step": 20170 }, { "epoch": 0.8857371812463638, "grad_norm": 1.5, "learning_rate": 1.6028214879519166e-06, "loss": 0.3051, "step": 20172 }, { "epoch": 0.8858249997255673, "grad_norm": 1.5234375, "learning_rate": 1.6003850727883713e-06, "loss": 0.2882, "step": 20174 }, { "epoch": 0.8859128182047707, "grad_norm": 1.4296875, "learning_rate": 1.5979504495729436e-06, "loss": 0.3004, "step": 20176 }, { "epoch": 0.8860006366839742, "grad_norm": 1.578125, "learning_rate": 1.5955176184920844e-06, "loss": 0.3127, "step": 20178 }, { "epoch": 0.8860884551631777, "grad_norm": 1.4765625, "learning_rate": 1.5930865797320948e-06, "loss": 0.3208, "step": 20180 }, { "epoch": 0.8861762736423812, "grad_norm": 1.5859375, "learning_rate": 1.5906573334791424e-06, "loss": 0.321, "step": 20182 }, { "epoch": 0.8862640921215846, "grad_norm": 1.5234375, "learning_rate": 1.588229879919273e-06, "loss": 0.3277, "step": 20184 }, { "epoch": 0.8863519106007882, "grad_norm": 1.46875, "learning_rate": 1.5858042192383626e-06, "loss": 0.3402, "step": 20186 }, { "epoch": 0.8864397290799917, "grad_norm": 1.5078125, "learning_rate": 1.5833803516221795e-06, "loss": 0.321, "step": 20188 }, { "epoch": 0.8865275475591952, "grad_norm": 1.3984375, "learning_rate": 1.580958277256342e-06, "loss": 0.3062, "step": 20190 }, { "epoch": 0.8866153660383986, "grad_norm": 1.5703125, "learning_rate": 1.5785379963263347e-06, "loss": 0.3405, "step": 20192 }, { "epoch": 0.8867031845176021, "grad_norm": 1.6484375, "learning_rate": 1.5761195090175041e-06, "loss": 0.3248, "step": 20194 }, { "epoch": 0.8867910029968056, "grad_norm": 1.453125, "learning_rate": 1.5737028155150517e-06, "loss": 0.3085, "step": 20196 }, { "epoch": 0.886878821476009, "grad_norm": 1.546875, "learning_rate": 1.5712879160040578e-06, "loss": 0.3068, "step": 20198 }, { "epoch": 0.8869666399552125, "grad_norm": 1.4609375, "learning_rate": 1.568874810669449e-06, "loss": 0.3134, "step": 20200 }, { "epoch": 0.8870544584344161, "grad_norm": 1.46875, "learning_rate": 1.5664634996960248e-06, "loss": 0.3131, "step": 20202 }, { "epoch": 0.8871422769136196, "grad_norm": 1.4609375, "learning_rate": 1.564053983268446e-06, "loss": 0.318, "step": 20204 }, { "epoch": 0.887230095392823, "grad_norm": 1.515625, "learning_rate": 1.5616462615712257e-06, "loss": 0.2956, "step": 20206 }, { "epoch": 0.8873179138720265, "grad_norm": 1.421875, "learning_rate": 1.5592403347887558e-06, "loss": 0.2928, "step": 20208 }, { "epoch": 0.88740573235123, "grad_norm": 1.4296875, "learning_rate": 1.5568362031052803e-06, "loss": 0.3236, "step": 20210 }, { "epoch": 0.8874935508304335, "grad_norm": 1.4453125, "learning_rate": 1.5544338667049075e-06, "loss": 0.3086, "step": 20212 }, { "epoch": 0.8875813693096369, "grad_norm": 1.5859375, "learning_rate": 1.5520333257716125e-06, "loss": 0.3167, "step": 20214 }, { "epoch": 0.8876691877888405, "grad_norm": 1.5, "learning_rate": 1.549634580489226e-06, "loss": 0.3088, "step": 20216 }, { "epoch": 0.887757006268044, "grad_norm": 1.4921875, "learning_rate": 1.5472376310414428e-06, "loss": 0.2893, "step": 20218 }, { "epoch": 0.8878448247472475, "grad_norm": 1.546875, "learning_rate": 1.5448424776118215e-06, "loss": 0.3047, "step": 20220 }, { "epoch": 0.8879326432264509, "grad_norm": 1.4609375, "learning_rate": 1.5424491203837903e-06, "loss": 0.2981, "step": 20222 }, { "epoch": 0.8880204617056544, "grad_norm": 1.46875, "learning_rate": 1.5400575595406226e-06, "loss": 0.3243, "step": 20224 }, { "epoch": 0.8881082801848579, "grad_norm": 1.4921875, "learning_rate": 1.5376677952654767e-06, "loss": 0.3125, "step": 20226 }, { "epoch": 0.8881960986640614, "grad_norm": 1.53125, "learning_rate": 1.5352798277413539e-06, "loss": 0.3246, "step": 20228 }, { "epoch": 0.8882839171432648, "grad_norm": 1.5546875, "learning_rate": 1.5328936571511244e-06, "loss": 0.3198, "step": 20230 }, { "epoch": 0.8883717356224684, "grad_norm": 1.5703125, "learning_rate": 1.530509283677528e-06, "loss": 0.3416, "step": 20232 }, { "epoch": 0.8884595541016719, "grad_norm": 1.5234375, "learning_rate": 1.5281267075031497e-06, "loss": 0.313, "step": 20234 }, { "epoch": 0.8885473725808753, "grad_norm": 1.4765625, "learning_rate": 1.5257459288104625e-06, "loss": 0.3235, "step": 20236 }, { "epoch": 0.8886351910600788, "grad_norm": 1.4296875, "learning_rate": 1.5233669477817737e-06, "loss": 0.2899, "step": 20238 }, { "epoch": 0.8887230095392823, "grad_norm": 1.4453125, "learning_rate": 1.520989764599276e-06, "loss": 0.3083, "step": 20240 }, { "epoch": 0.8888108280184858, "grad_norm": 1.4921875, "learning_rate": 1.5186143794450103e-06, "loss": 0.3121, "step": 20242 }, { "epoch": 0.8888986464976892, "grad_norm": 1.4140625, "learning_rate": 1.516240792500878e-06, "loss": 0.3316, "step": 20244 }, { "epoch": 0.8889864649768927, "grad_norm": 1.515625, "learning_rate": 1.5138690039486675e-06, "loss": 0.2773, "step": 20246 }, { "epoch": 0.8890742834560963, "grad_norm": 1.4609375, "learning_rate": 1.5114990139699886e-06, "loss": 0.3228, "step": 20248 }, { "epoch": 0.8891621019352998, "grad_norm": 1.53125, "learning_rate": 1.5091308227463492e-06, "loss": 0.3265, "step": 20250 }, { "epoch": 0.8892499204145032, "grad_norm": 1.4453125, "learning_rate": 1.5067644304590983e-06, "loss": 0.3161, "step": 20252 }, { "epoch": 0.8893377388937067, "grad_norm": 1.4375, "learning_rate": 1.5043998372894634e-06, "loss": 0.3223, "step": 20254 }, { "epoch": 0.8894255573729102, "grad_norm": 1.4765625, "learning_rate": 1.5020370434185221e-06, "loss": 0.3077, "step": 20256 }, { "epoch": 0.8895133758521137, "grad_norm": 1.5390625, "learning_rate": 1.4996760490272127e-06, "loss": 0.3164, "step": 20258 }, { "epoch": 0.8896011943313171, "grad_norm": 1.5234375, "learning_rate": 1.4973168542963489e-06, "loss": 0.299, "step": 20260 }, { "epoch": 0.8896890128105207, "grad_norm": 1.484375, "learning_rate": 1.494959459406589e-06, "loss": 0.3543, "step": 20262 }, { "epoch": 0.8897768312897242, "grad_norm": 1.515625, "learning_rate": 1.4926038645384749e-06, "loss": 0.3334, "step": 20264 }, { "epoch": 0.8898646497689277, "grad_norm": 1.46875, "learning_rate": 1.4902500698723897e-06, "loss": 0.3223, "step": 20266 }, { "epoch": 0.8899524682481311, "grad_norm": 1.5234375, "learning_rate": 1.4878980755885868e-06, "loss": 0.3124, "step": 20268 }, { "epoch": 0.8900402867273346, "grad_norm": 1.46875, "learning_rate": 1.4855478818671887e-06, "loss": 0.3265, "step": 20270 }, { "epoch": 0.8901281052065381, "grad_norm": 1.40625, "learning_rate": 1.483199488888168e-06, "loss": 0.3368, "step": 20272 }, { "epoch": 0.8902159236857415, "grad_norm": 1.4453125, "learning_rate": 1.4808528968313695e-06, "loss": 0.3398, "step": 20274 }, { "epoch": 0.890303742164945, "grad_norm": 1.3828125, "learning_rate": 1.4785081058764972e-06, "loss": 0.3304, "step": 20276 }, { "epoch": 0.8903915606441486, "grad_norm": 1.4609375, "learning_rate": 1.4761651162031099e-06, "loss": 0.3183, "step": 20278 }, { "epoch": 0.8904793791233521, "grad_norm": 1.5234375, "learning_rate": 1.473823927990639e-06, "loss": 0.3324, "step": 20280 }, { "epoch": 0.8905671976025555, "grad_norm": 1.4375, "learning_rate": 1.4714845414183665e-06, "loss": 0.3068, "step": 20282 }, { "epoch": 0.890655016081759, "grad_norm": 1.4453125, "learning_rate": 1.4691469566654514e-06, "loss": 0.3171, "step": 20284 }, { "epoch": 0.8907428345609625, "grad_norm": 1.5078125, "learning_rate": 1.466811173910898e-06, "loss": 0.3047, "step": 20286 }, { "epoch": 0.890830653040166, "grad_norm": 1.53125, "learning_rate": 1.4644771933335938e-06, "loss": 0.3207, "step": 20288 }, { "epoch": 0.8909184715193694, "grad_norm": 1.53125, "learning_rate": 1.4621450151122652e-06, "loss": 0.3226, "step": 20290 }, { "epoch": 0.8910062899985729, "grad_norm": 1.4140625, "learning_rate": 1.4598146394255109e-06, "loss": 0.3179, "step": 20292 }, { "epoch": 0.8910941084777765, "grad_norm": 1.46875, "learning_rate": 1.4574860664517964e-06, "loss": 0.2931, "step": 20294 }, { "epoch": 0.89118192695698, "grad_norm": 1.4921875, "learning_rate": 1.4551592963694404e-06, "loss": 0.305, "step": 20296 }, { "epoch": 0.8912697454361834, "grad_norm": 1.6171875, "learning_rate": 1.4528343293566337e-06, "loss": 0.288, "step": 20298 }, { "epoch": 0.8913575639153869, "grad_norm": 1.5390625, "learning_rate": 1.4505111655914199e-06, "loss": 0.3151, "step": 20300 }, { "epoch": 0.8914453823945904, "grad_norm": 1.484375, "learning_rate": 1.4481898052517013e-06, "loss": 0.339, "step": 20302 }, { "epoch": 0.8915332008737938, "grad_norm": 1.4375, "learning_rate": 1.4458702485152549e-06, "loss": 0.2906, "step": 20304 }, { "epoch": 0.8916210193529973, "grad_norm": 1.5859375, "learning_rate": 1.4435524955597134e-06, "loss": 0.3058, "step": 20306 }, { "epoch": 0.8917088378322009, "grad_norm": 1.4609375, "learning_rate": 1.4412365465625689e-06, "loss": 0.3228, "step": 20308 }, { "epoch": 0.8917966563114044, "grad_norm": 1.5078125, "learning_rate": 1.4389224017011704e-06, "loss": 0.3381, "step": 20310 }, { "epoch": 0.8918844747906078, "grad_norm": 1.5, "learning_rate": 1.436610061152749e-06, "loss": 0.323, "step": 20312 }, { "epoch": 0.8919722932698113, "grad_norm": 1.5625, "learning_rate": 1.4342995250943735e-06, "loss": 0.3117, "step": 20314 }, { "epoch": 0.8920601117490148, "grad_norm": 1.4375, "learning_rate": 1.431990793702992e-06, "loss": 0.3206, "step": 20316 }, { "epoch": 0.8921479302282183, "grad_norm": 1.4921875, "learning_rate": 1.429683867155407e-06, "loss": 0.2996, "step": 20318 }, { "epoch": 0.8922357487074217, "grad_norm": 1.515625, "learning_rate": 1.4273787456282771e-06, "loss": 0.3233, "step": 20320 }, { "epoch": 0.8923235671866252, "grad_norm": 1.4453125, "learning_rate": 1.4250754292981366e-06, "loss": 0.3058, "step": 20322 }, { "epoch": 0.8924113856658288, "grad_norm": 1.453125, "learning_rate": 1.4227739183413663e-06, "loss": 0.3134, "step": 20324 }, { "epoch": 0.8924992041450323, "grad_norm": 1.484375, "learning_rate": 1.420474212934228e-06, "loss": 0.3056, "step": 20326 }, { "epoch": 0.8925870226242357, "grad_norm": 1.515625, "learning_rate": 1.4181763132528253e-06, "loss": 0.3122, "step": 20328 }, { "epoch": 0.8926748411034392, "grad_norm": 1.4609375, "learning_rate": 1.4158802194731285e-06, "loss": 0.2905, "step": 20330 }, { "epoch": 0.8927626595826427, "grad_norm": 1.515625, "learning_rate": 1.4135859317709832e-06, "loss": 0.3222, "step": 20332 }, { "epoch": 0.8928504780618461, "grad_norm": 1.4140625, "learning_rate": 1.4112934503220766e-06, "loss": 0.2815, "step": 20334 }, { "epoch": 0.8929382965410496, "grad_norm": 1.484375, "learning_rate": 1.4090027753019763e-06, "loss": 0.3155, "step": 20336 }, { "epoch": 0.8930261150202531, "grad_norm": 1.453125, "learning_rate": 1.4067139068861007e-06, "loss": 0.3158, "step": 20338 }, { "epoch": 0.8931139334994567, "grad_norm": 1.625, "learning_rate": 1.404426845249729e-06, "loss": 0.3042, "step": 20340 }, { "epoch": 0.8932017519786601, "grad_norm": 1.34375, "learning_rate": 1.4021415905680042e-06, "loss": 0.3318, "step": 20342 }, { "epoch": 0.8932895704578636, "grad_norm": 1.453125, "learning_rate": 1.399858143015928e-06, "loss": 0.3627, "step": 20344 }, { "epoch": 0.8933773889370671, "grad_norm": 1.4453125, "learning_rate": 1.39757650276838e-06, "loss": 0.3007, "step": 20346 }, { "epoch": 0.8934652074162706, "grad_norm": 1.5, "learning_rate": 1.3952966700000764e-06, "loss": 0.3236, "step": 20348 }, { "epoch": 0.893553025895474, "grad_norm": 1.53125, "learning_rate": 1.393018644885613e-06, "loss": 0.3086, "step": 20350 }, { "epoch": 0.8936408443746775, "grad_norm": 1.5, "learning_rate": 1.3907424275994452e-06, "loss": 0.284, "step": 20352 }, { "epoch": 0.893728662853881, "grad_norm": 1.421875, "learning_rate": 1.3884680183158748e-06, "loss": 0.2977, "step": 20354 }, { "epoch": 0.8938164813330846, "grad_norm": 1.4296875, "learning_rate": 1.3861954172090907e-06, "loss": 0.301, "step": 20356 }, { "epoch": 0.893904299812288, "grad_norm": 1.546875, "learning_rate": 1.3839246244531145e-06, "loss": 0.3359, "step": 20358 }, { "epoch": 0.8939921182914915, "grad_norm": 1.53125, "learning_rate": 1.3816556402218573e-06, "loss": 0.3188, "step": 20360 }, { "epoch": 0.894079936770695, "grad_norm": 1.4140625, "learning_rate": 1.3793884646890742e-06, "loss": 0.3213, "step": 20362 }, { "epoch": 0.8941677552498984, "grad_norm": 1.4921875, "learning_rate": 1.3771230980283795e-06, "loss": 0.3277, "step": 20364 }, { "epoch": 0.8942555737291019, "grad_norm": 1.5859375, "learning_rate": 1.3748595404132648e-06, "loss": 0.3196, "step": 20366 }, { "epoch": 0.8943433922083054, "grad_norm": 1.453125, "learning_rate": 1.372597792017069e-06, "loss": 0.3485, "step": 20368 }, { "epoch": 0.894431210687509, "grad_norm": 1.4765625, "learning_rate": 1.3703378530129984e-06, "loss": 0.3237, "step": 20370 }, { "epoch": 0.8945190291667124, "grad_norm": 1.4296875, "learning_rate": 1.3680797235741171e-06, "loss": 0.3003, "step": 20372 }, { "epoch": 0.8946068476459159, "grad_norm": 1.4453125, "learning_rate": 1.3658234038733591e-06, "loss": 0.2984, "step": 20374 }, { "epoch": 0.8946946661251194, "grad_norm": 1.4453125, "learning_rate": 1.3635688940835056e-06, "loss": 0.306, "step": 20376 }, { "epoch": 0.8947824846043229, "grad_norm": 1.53125, "learning_rate": 1.3613161943772156e-06, "loss": 0.3302, "step": 20378 }, { "epoch": 0.8948703030835263, "grad_norm": 1.4140625, "learning_rate": 1.3590653049269985e-06, "loss": 0.2974, "step": 20380 }, { "epoch": 0.8949581215627298, "grad_norm": 1.4765625, "learning_rate": 1.3568162259052247e-06, "loss": 0.3034, "step": 20382 }, { "epoch": 0.8950459400419333, "grad_norm": 1.4453125, "learning_rate": 1.3545689574841342e-06, "loss": 0.3418, "step": 20384 }, { "epoch": 0.8951337585211369, "grad_norm": 1.53125, "learning_rate": 1.352323499835817e-06, "loss": 0.3485, "step": 20386 }, { "epoch": 0.8952215770003403, "grad_norm": 1.484375, "learning_rate": 1.3500798531322412e-06, "loss": 0.3248, "step": 20388 }, { "epoch": 0.8953093954795438, "grad_norm": 1.484375, "learning_rate": 1.3478380175452165e-06, "loss": 0.3054, "step": 20390 }, { "epoch": 0.8953972139587473, "grad_norm": 1.53125, "learning_rate": 1.3455979932464253e-06, "loss": 0.3101, "step": 20392 }, { "epoch": 0.8954850324379507, "grad_norm": 1.4453125, "learning_rate": 1.3433597804074132e-06, "loss": 0.2924, "step": 20394 }, { "epoch": 0.8955728509171542, "grad_norm": 1.4296875, "learning_rate": 1.3411233791995743e-06, "loss": 0.2853, "step": 20396 }, { "epoch": 0.8956606693963577, "grad_norm": 1.578125, "learning_rate": 1.3388887897941877e-06, "loss": 0.3011, "step": 20398 }, { "epoch": 0.8957484878755612, "grad_norm": 1.484375, "learning_rate": 1.336656012362361e-06, "loss": 0.3603, "step": 20400 }, { "epoch": 0.8958363063547647, "grad_norm": 1.4765625, "learning_rate": 1.3344250470750941e-06, "loss": 0.3254, "step": 20402 }, { "epoch": 0.8959241248339682, "grad_norm": 1.453125, "learning_rate": 1.3321958941032303e-06, "loss": 0.294, "step": 20404 }, { "epoch": 0.8960119433131717, "grad_norm": 1.5078125, "learning_rate": 1.3299685536174749e-06, "loss": 0.3111, "step": 20406 }, { "epoch": 0.8960997617923752, "grad_norm": 1.5234375, "learning_rate": 1.3277430257884055e-06, "loss": 0.3373, "step": 20408 }, { "epoch": 0.8961875802715786, "grad_norm": 1.546875, "learning_rate": 1.3255193107864438e-06, "loss": 0.3106, "step": 20410 }, { "epoch": 0.8962753987507821, "grad_norm": 1.3828125, "learning_rate": 1.3232974087818955e-06, "loss": 0.328, "step": 20412 }, { "epoch": 0.8963632172299856, "grad_norm": 1.453125, "learning_rate": 1.321077319944905e-06, "loss": 0.3167, "step": 20414 }, { "epoch": 0.8964510357091892, "grad_norm": 1.5390625, "learning_rate": 1.3188590444454863e-06, "loss": 0.3185, "step": 20416 }, { "epoch": 0.8965388541883926, "grad_norm": 1.5, "learning_rate": 1.3166425824535227e-06, "loss": 0.2994, "step": 20418 }, { "epoch": 0.8966266726675961, "grad_norm": 1.46875, "learning_rate": 1.3144279341387427e-06, "loss": 0.3108, "step": 20420 }, { "epoch": 0.8967144911467996, "grad_norm": 1.4921875, "learning_rate": 1.312215099670755e-06, "loss": 0.3096, "step": 20422 }, { "epoch": 0.896802309626003, "grad_norm": 1.5390625, "learning_rate": 1.3100040792190127e-06, "loss": 0.3016, "step": 20424 }, { "epoch": 0.8968901281052065, "grad_norm": 1.4140625, "learning_rate": 1.3077948729528333e-06, "loss": 0.3203, "step": 20426 }, { "epoch": 0.89697794658441, "grad_norm": 1.484375, "learning_rate": 1.3055874810414037e-06, "loss": 0.3113, "step": 20428 }, { "epoch": 0.8970657650636135, "grad_norm": 1.421875, "learning_rate": 1.3033819036537665e-06, "loss": 0.3138, "step": 20430 }, { "epoch": 0.897153583542817, "grad_norm": 1.484375, "learning_rate": 1.3011781409588225e-06, "loss": 0.336, "step": 20432 }, { "epoch": 0.8972414020220205, "grad_norm": 1.546875, "learning_rate": 1.298976193125334e-06, "loss": 0.304, "step": 20434 }, { "epoch": 0.897329220501224, "grad_norm": 1.453125, "learning_rate": 1.2967760603219358e-06, "loss": 0.3237, "step": 20436 }, { "epoch": 0.8974170389804275, "grad_norm": 1.453125, "learning_rate": 1.294577742717104e-06, "loss": 0.3243, "step": 20438 }, { "epoch": 0.8975048574596309, "grad_norm": 1.4609375, "learning_rate": 1.2923812404791958e-06, "loss": 0.2941, "step": 20440 }, { "epoch": 0.8975926759388344, "grad_norm": 1.4765625, "learning_rate": 1.2901865537764124e-06, "loss": 0.3097, "step": 20442 }, { "epoch": 0.8976804944180379, "grad_norm": 1.453125, "learning_rate": 1.2879936827768253e-06, "loss": 0.3195, "step": 20444 }, { "epoch": 0.8977683128972413, "grad_norm": 1.4296875, "learning_rate": 1.2858026276483691e-06, "loss": 0.3152, "step": 20446 }, { "epoch": 0.8978561313764449, "grad_norm": 1.3828125, "learning_rate": 1.2836133885588297e-06, "loss": 0.3098, "step": 20448 }, { "epoch": 0.8979439498556484, "grad_norm": 1.484375, "learning_rate": 1.2814259656758643e-06, "loss": 0.322, "step": 20450 }, { "epoch": 0.8980317683348519, "grad_norm": 1.4765625, "learning_rate": 1.2792403591669832e-06, "loss": 0.2979, "step": 20452 }, { "epoch": 0.8981195868140553, "grad_norm": 1.5234375, "learning_rate": 1.277056569199561e-06, "loss": 0.2918, "step": 20454 }, { "epoch": 0.8982074052932588, "grad_norm": 1.4765625, "learning_rate": 1.2748745959408365e-06, "loss": 0.3074, "step": 20456 }, { "epoch": 0.8982952237724623, "grad_norm": 1.40625, "learning_rate": 1.2726944395578978e-06, "loss": 0.2898, "step": 20458 }, { "epoch": 0.8983830422516658, "grad_norm": 1.546875, "learning_rate": 1.270516100217714e-06, "loss": 0.3297, "step": 20460 }, { "epoch": 0.8984708607308693, "grad_norm": 1.5390625, "learning_rate": 1.2683395780870883e-06, "loss": 0.3182, "step": 20462 }, { "epoch": 0.8985586792100728, "grad_norm": 1.578125, "learning_rate": 1.266164873332712e-06, "loss": 0.3075, "step": 20464 }, { "epoch": 0.8986464976892763, "grad_norm": 1.515625, "learning_rate": 1.2639919861211158e-06, "loss": 0.3312, "step": 20466 }, { "epoch": 0.8987343161684798, "grad_norm": 1.53125, "learning_rate": 1.261820916618703e-06, "loss": 0.3273, "step": 20468 }, { "epoch": 0.8988221346476832, "grad_norm": 1.4296875, "learning_rate": 1.2596516649917373e-06, "loss": 0.3323, "step": 20470 }, { "epoch": 0.8989099531268867, "grad_norm": 1.4609375, "learning_rate": 1.2574842314063335e-06, "loss": 0.3316, "step": 20472 }, { "epoch": 0.8989977716060902, "grad_norm": 1.46875, "learning_rate": 1.2553186160284837e-06, "loss": 0.3054, "step": 20474 }, { "epoch": 0.8990855900852937, "grad_norm": 1.5390625, "learning_rate": 1.2531548190240244e-06, "loss": 0.2856, "step": 20476 }, { "epoch": 0.8991734085644972, "grad_norm": 1.4140625, "learning_rate": 1.2509928405586596e-06, "loss": 0.2936, "step": 20478 }, { "epoch": 0.8992612270437007, "grad_norm": 1.4375, "learning_rate": 1.2488326807979594e-06, "loss": 0.3209, "step": 20480 }, { "epoch": 0.8993490455229042, "grad_norm": 1.4921875, "learning_rate": 1.2466743399073415e-06, "loss": 0.3034, "step": 20482 }, { "epoch": 0.8994368640021076, "grad_norm": 1.5546875, "learning_rate": 1.2445178180521016e-06, "loss": 0.3014, "step": 20484 }, { "epoch": 0.8995246824813111, "grad_norm": 1.4921875, "learning_rate": 1.2423631153973824e-06, "loss": 0.3369, "step": 20486 }, { "epoch": 0.8996125009605146, "grad_norm": 1.421875, "learning_rate": 1.2402102321081854e-06, "loss": 0.3253, "step": 20488 }, { "epoch": 0.8997003194397181, "grad_norm": 1.46875, "learning_rate": 1.2380591683493926e-06, "loss": 0.3104, "step": 20490 }, { "epoch": 0.8997881379189215, "grad_norm": 1.46875, "learning_rate": 1.2359099242857164e-06, "loss": 0.2955, "step": 20492 }, { "epoch": 0.8998759563981251, "grad_norm": 1.4296875, "learning_rate": 1.2337625000817616e-06, "loss": 0.3055, "step": 20494 }, { "epoch": 0.8999637748773286, "grad_norm": 1.5078125, "learning_rate": 1.2316168959019658e-06, "loss": 0.3278, "step": 20496 }, { "epoch": 0.9000515933565321, "grad_norm": 1.484375, "learning_rate": 1.2294731119106479e-06, "loss": 0.319, "step": 20498 }, { "epoch": 0.9001394118357355, "grad_norm": 1.5078125, "learning_rate": 1.2273311482719764e-06, "loss": 0.3076, "step": 20500 }, { "epoch": 0.900227230314939, "grad_norm": 1.40625, "learning_rate": 1.2251910051499865e-06, "loss": 0.2989, "step": 20502 }, { "epoch": 0.9003150487941425, "grad_norm": 1.5703125, "learning_rate": 1.2230526827085698e-06, "loss": 0.3129, "step": 20504 }, { "epoch": 0.900402867273346, "grad_norm": 1.3984375, "learning_rate": 1.2209161811114727e-06, "loss": 0.3273, "step": 20506 }, { "epoch": 0.9004906857525495, "grad_norm": 1.75, "learning_rate": 1.2187815005223202e-06, "loss": 0.3165, "step": 20508 }, { "epoch": 0.900578504231753, "grad_norm": 1.546875, "learning_rate": 1.2166486411045786e-06, "loss": 0.33, "step": 20510 }, { "epoch": 0.9006663227109565, "grad_norm": 1.484375, "learning_rate": 1.2145176030215866e-06, "loss": 0.3129, "step": 20512 }, { "epoch": 0.90075414119016, "grad_norm": 1.421875, "learning_rate": 1.2123883864365388e-06, "loss": 0.3177, "step": 20514 }, { "epoch": 0.9008419596693634, "grad_norm": 1.546875, "learning_rate": 1.210260991512488e-06, "loss": 0.3488, "step": 20516 }, { "epoch": 0.9009297781485669, "grad_norm": 1.5625, "learning_rate": 1.208135418412354e-06, "loss": 0.3142, "step": 20518 }, { "epoch": 0.9010175966277704, "grad_norm": 1.5390625, "learning_rate": 1.2060116672989118e-06, "loss": 0.3306, "step": 20520 }, { "epoch": 0.9011054151069738, "grad_norm": 1.453125, "learning_rate": 1.203889738334807e-06, "loss": 0.3044, "step": 20522 }, { "epoch": 0.9011932335861774, "grad_norm": 1.515625, "learning_rate": 1.2017696316825228e-06, "loss": 0.3305, "step": 20524 }, { "epoch": 0.9012810520653809, "grad_norm": 1.453125, "learning_rate": 1.1996513475044297e-06, "loss": 0.3275, "step": 20526 }, { "epoch": 0.9013688705445844, "grad_norm": 1.515625, "learning_rate": 1.1975348859627395e-06, "loss": 0.3127, "step": 20528 }, { "epoch": 0.9014566890237878, "grad_norm": 1.5078125, "learning_rate": 1.195420247219531e-06, "loss": 0.3118, "step": 20530 }, { "epoch": 0.9015445075029913, "grad_norm": 1.453125, "learning_rate": 1.193307431436752e-06, "loss": 0.3125, "step": 20532 }, { "epoch": 0.9016323259821948, "grad_norm": 1.4296875, "learning_rate": 1.1911964387761904e-06, "loss": 0.2804, "step": 20534 }, { "epoch": 0.9017201444613983, "grad_norm": 1.4765625, "learning_rate": 1.1890872693995165e-06, "loss": 0.3351, "step": 20536 }, { "epoch": 0.9018079629406017, "grad_norm": 1.4375, "learning_rate": 1.1869799234682482e-06, "loss": 0.3175, "step": 20538 }, { "epoch": 0.9018957814198053, "grad_norm": 1.5078125, "learning_rate": 1.1848744011437623e-06, "loss": 0.3165, "step": 20540 }, { "epoch": 0.9019835998990088, "grad_norm": 1.5625, "learning_rate": 1.1827707025873074e-06, "loss": 0.3231, "step": 20542 }, { "epoch": 0.9020714183782123, "grad_norm": 1.453125, "learning_rate": 1.1806688279599798e-06, "loss": 0.3045, "step": 20544 }, { "epoch": 0.9021592368574157, "grad_norm": 1.5078125, "learning_rate": 1.1785687774227422e-06, "loss": 0.3261, "step": 20546 }, { "epoch": 0.9022470553366192, "grad_norm": 1.484375, "learning_rate": 1.1764705511364215e-06, "loss": 0.2926, "step": 20548 }, { "epoch": 0.9023348738158227, "grad_norm": 1.453125, "learning_rate": 1.1743741492616922e-06, "loss": 0.2985, "step": 20550 }, { "epoch": 0.9024226922950261, "grad_norm": 1.4296875, "learning_rate": 1.172279571959109e-06, "loss": 0.2882, "step": 20552 }, { "epoch": 0.9025105107742296, "grad_norm": 1.484375, "learning_rate": 1.170186819389063e-06, "loss": 0.3183, "step": 20554 }, { "epoch": 0.9025983292534332, "grad_norm": 1.5625, "learning_rate": 1.1680958917118235e-06, "loss": 0.2962, "step": 20556 }, { "epoch": 0.9026861477326367, "grad_norm": 1.4765625, "learning_rate": 1.1660067890875092e-06, "loss": 0.3091, "step": 20558 }, { "epoch": 0.9027739662118401, "grad_norm": 1.5625, "learning_rate": 1.1639195116761148e-06, "loss": 0.328, "step": 20560 }, { "epoch": 0.9028617846910436, "grad_norm": 1.46875, "learning_rate": 1.161834059637476e-06, "loss": 0.2942, "step": 20562 }, { "epoch": 0.9029496031702471, "grad_norm": 1.5, "learning_rate": 1.1597504331312986e-06, "loss": 0.2917, "step": 20564 }, { "epoch": 0.9030374216494506, "grad_norm": 1.4296875, "learning_rate": 1.1576686323171493e-06, "loss": 0.297, "step": 20566 }, { "epoch": 0.903125240128654, "grad_norm": 1.5546875, "learning_rate": 1.1555886573544478e-06, "loss": 0.336, "step": 20568 }, { "epoch": 0.9032130586078576, "grad_norm": 1.625, "learning_rate": 1.1535105084024862e-06, "loss": 0.3405, "step": 20570 }, { "epoch": 0.9033008770870611, "grad_norm": 1.5234375, "learning_rate": 1.1514341856204037e-06, "loss": 0.3324, "step": 20572 }, { "epoch": 0.9033886955662646, "grad_norm": 1.65625, "learning_rate": 1.149359689167212e-06, "loss": 0.3195, "step": 20574 }, { "epoch": 0.903476514045468, "grad_norm": 1.578125, "learning_rate": 1.1472870192017732e-06, "loss": 0.319, "step": 20576 }, { "epoch": 0.9035643325246715, "grad_norm": 1.484375, "learning_rate": 1.1452161758828074e-06, "loss": 0.3324, "step": 20578 }, { "epoch": 0.903652151003875, "grad_norm": 1.4921875, "learning_rate": 1.1431471593689097e-06, "loss": 0.3128, "step": 20580 }, { "epoch": 0.9037399694830784, "grad_norm": 1.4765625, "learning_rate": 1.1410799698185204e-06, "loss": 0.3055, "step": 20582 }, { "epoch": 0.9038277879622819, "grad_norm": 1.46875, "learning_rate": 1.139014607389946e-06, "loss": 0.2876, "step": 20584 }, { "epoch": 0.9039156064414855, "grad_norm": 1.4140625, "learning_rate": 1.136951072241349e-06, "loss": 0.3179, "step": 20586 }, { "epoch": 0.904003424920689, "grad_norm": 1.46875, "learning_rate": 1.134889364530764e-06, "loss": 0.3113, "step": 20588 }, { "epoch": 0.9040912433998924, "grad_norm": 1.5625, "learning_rate": 1.1328294844160732e-06, "loss": 0.3197, "step": 20590 }, { "epoch": 0.9041790618790959, "grad_norm": 1.4453125, "learning_rate": 1.1307714320550167e-06, "loss": 0.3094, "step": 20592 }, { "epoch": 0.9042668803582994, "grad_norm": 1.53125, "learning_rate": 1.1287152076052104e-06, "loss": 0.2775, "step": 20594 }, { "epoch": 0.9043546988375029, "grad_norm": 1.4765625, "learning_rate": 1.1266608112241118e-06, "loss": 0.3146, "step": 20596 }, { "epoch": 0.9044425173167063, "grad_norm": 1.3984375, "learning_rate": 1.1246082430690558e-06, "loss": 0.2975, "step": 20598 }, { "epoch": 0.9045303357959098, "grad_norm": 1.453125, "learning_rate": 1.1225575032972223e-06, "loss": 0.306, "step": 20600 }, { "epoch": 0.9046181542751134, "grad_norm": 1.4609375, "learning_rate": 1.1205085920656556e-06, "loss": 0.3098, "step": 20602 }, { "epoch": 0.9047059727543169, "grad_norm": 1.453125, "learning_rate": 1.1184615095312684e-06, "loss": 0.3128, "step": 20604 }, { "epoch": 0.9047937912335203, "grad_norm": 1.5078125, "learning_rate": 1.1164162558508217e-06, "loss": 0.2778, "step": 20606 }, { "epoch": 0.9048816097127238, "grad_norm": 1.4453125, "learning_rate": 1.114372831180946e-06, "loss": 0.3463, "step": 20608 }, { "epoch": 0.9049694281919273, "grad_norm": 1.515625, "learning_rate": 1.112331235678124e-06, "loss": 0.3002, "step": 20610 }, { "epoch": 0.9050572466711307, "grad_norm": 1.4765625, "learning_rate": 1.1102914694987004e-06, "loss": 0.3101, "step": 20612 }, { "epoch": 0.9051450651503342, "grad_norm": 1.5078125, "learning_rate": 1.1082535327988864e-06, "loss": 0.313, "step": 20614 }, { "epoch": 0.9052328836295378, "grad_norm": 1.4765625, "learning_rate": 1.1062174257347402e-06, "loss": 0.3067, "step": 20616 }, { "epoch": 0.9053207021087413, "grad_norm": 1.515625, "learning_rate": 1.1041831484621956e-06, "loss": 0.3436, "step": 20618 }, { "epoch": 0.9054085205879447, "grad_norm": 1.59375, "learning_rate": 1.102150701137028e-06, "loss": 0.3385, "step": 20620 }, { "epoch": 0.9054963390671482, "grad_norm": 1.4765625, "learning_rate": 1.1001200839148934e-06, "loss": 0.3233, "step": 20622 }, { "epoch": 0.9055841575463517, "grad_norm": 1.484375, "learning_rate": 1.0980912969512897e-06, "loss": 0.3154, "step": 20624 }, { "epoch": 0.9056719760255552, "grad_norm": 1.4921875, "learning_rate": 1.0960643404015813e-06, "loss": 0.3367, "step": 20626 }, { "epoch": 0.9057597945047586, "grad_norm": 1.515625, "learning_rate": 1.0940392144210027e-06, "loss": 0.3401, "step": 20628 }, { "epoch": 0.9058476129839621, "grad_norm": 1.4765625, "learning_rate": 1.092015919164624e-06, "loss": 0.3034, "step": 20630 }, { "epoch": 0.9059354314631657, "grad_norm": 1.46875, "learning_rate": 1.089994454787402e-06, "loss": 0.3058, "step": 20632 }, { "epoch": 0.9060232499423692, "grad_norm": 1.4765625, "learning_rate": 1.0879748214441348e-06, "loss": 0.3177, "step": 20634 }, { "epoch": 0.9061110684215726, "grad_norm": 1.515625, "learning_rate": 1.0859570192894908e-06, "loss": 0.32, "step": 20636 }, { "epoch": 0.9061988869007761, "grad_norm": 1.375, "learning_rate": 1.0839410484779876e-06, "loss": 0.2909, "step": 20638 }, { "epoch": 0.9062867053799796, "grad_norm": 1.453125, "learning_rate": 1.0819269091640134e-06, "loss": 0.3363, "step": 20640 }, { "epoch": 0.906374523859183, "grad_norm": 1.421875, "learning_rate": 1.0799146015018109e-06, "loss": 0.3271, "step": 20642 }, { "epoch": 0.9064623423383865, "grad_norm": 1.546875, "learning_rate": 1.077904125645482e-06, "loss": 0.3047, "step": 20644 }, { "epoch": 0.90655016081759, "grad_norm": 1.671875, "learning_rate": 1.0758954817489897e-06, "loss": 0.3081, "step": 20646 }, { "epoch": 0.9066379792967936, "grad_norm": 1.546875, "learning_rate": 1.0738886699661528e-06, "loss": 0.2996, "step": 20648 }, { "epoch": 0.906725797775997, "grad_norm": 1.4296875, "learning_rate": 1.0718836904506617e-06, "loss": 0.3251, "step": 20650 }, { "epoch": 0.9068136162552005, "grad_norm": 1.4609375, "learning_rate": 1.0698805433560528e-06, "loss": 0.3055, "step": 20652 }, { "epoch": 0.906901434734404, "grad_norm": 1.5, "learning_rate": 1.0678792288357249e-06, "loss": 0.3177, "step": 20654 }, { "epoch": 0.9069892532136075, "grad_norm": 1.484375, "learning_rate": 1.0658797470429443e-06, "loss": 0.3137, "step": 20656 }, { "epoch": 0.9070770716928109, "grad_norm": 1.4609375, "learning_rate": 1.0638820981308305e-06, "loss": 0.3099, "step": 20658 }, { "epoch": 0.9071648901720144, "grad_norm": 1.4609375, "learning_rate": 1.0618862822523639e-06, "loss": 0.3337, "step": 20660 }, { "epoch": 0.907252708651218, "grad_norm": 1.4375, "learning_rate": 1.0598922995603861e-06, "loss": 0.3277, "step": 20662 }, { "epoch": 0.9073405271304215, "grad_norm": 1.46875, "learning_rate": 1.0579001502075887e-06, "loss": 0.334, "step": 20664 }, { "epoch": 0.9074283456096249, "grad_norm": 1.46875, "learning_rate": 1.055909834346544e-06, "loss": 0.3086, "step": 20666 }, { "epoch": 0.9075161640888284, "grad_norm": 1.453125, "learning_rate": 1.0539213521296582e-06, "loss": 0.3132, "step": 20668 }, { "epoch": 0.9076039825680319, "grad_norm": 1.4296875, "learning_rate": 1.0519347037092175e-06, "loss": 0.3115, "step": 20670 }, { "epoch": 0.9076918010472353, "grad_norm": 1.4765625, "learning_rate": 1.0499498892373616e-06, "loss": 0.3169, "step": 20672 }, { "epoch": 0.9077796195264388, "grad_norm": 1.515625, "learning_rate": 1.0479669088660827e-06, "loss": 0.3559, "step": 20674 }, { "epoch": 0.9078674380056423, "grad_norm": 1.46875, "learning_rate": 1.0459857627472396e-06, "loss": 0.3287, "step": 20676 }, { "epoch": 0.9079552564848459, "grad_norm": 1.515625, "learning_rate": 1.0440064510325448e-06, "loss": 0.3209, "step": 20678 }, { "epoch": 0.9080430749640493, "grad_norm": 1.4765625, "learning_rate": 1.0420289738735822e-06, "loss": 0.3123, "step": 20680 }, { "epoch": 0.9081308934432528, "grad_norm": 1.4375, "learning_rate": 1.0400533314217837e-06, "loss": 0.332, "step": 20682 }, { "epoch": 0.9082187119224563, "grad_norm": 1.484375, "learning_rate": 1.0380795238284446e-06, "loss": 0.299, "step": 20684 }, { "epoch": 0.9083065304016598, "grad_norm": 1.4921875, "learning_rate": 1.0361075512447193e-06, "loss": 0.3013, "step": 20686 }, { "epoch": 0.9083943488808632, "grad_norm": 1.4921875, "learning_rate": 1.0341374138216203e-06, "loss": 0.314, "step": 20688 }, { "epoch": 0.9084821673600667, "grad_norm": 1.4921875, "learning_rate": 1.0321691117100268e-06, "loss": 0.3044, "step": 20690 }, { "epoch": 0.9085699858392702, "grad_norm": 1.4296875, "learning_rate": 1.0302026450606656e-06, "loss": 0.3105, "step": 20692 }, { "epoch": 0.9086578043184738, "grad_norm": 1.484375, "learning_rate": 1.0282380140241326e-06, "loss": 0.3198, "step": 20694 }, { "epoch": 0.9087456227976772, "grad_norm": 1.421875, "learning_rate": 1.02627521875088e-06, "loss": 0.3752, "step": 20696 }, { "epoch": 0.9088334412768807, "grad_norm": 1.4453125, "learning_rate": 1.0243142593912153e-06, "loss": 0.3131, "step": 20698 }, { "epoch": 0.9089212597560842, "grad_norm": 1.421875, "learning_rate": 1.0223551360953154e-06, "loss": 0.2975, "step": 20700 }, { "epoch": 0.9090090782352876, "grad_norm": 1.421875, "learning_rate": 1.020397849013205e-06, "loss": 0.3036, "step": 20702 }, { "epoch": 0.9090968967144911, "grad_norm": 1.5546875, "learning_rate": 1.018442398294775e-06, "loss": 0.285, "step": 20704 }, { "epoch": 0.9091847151936946, "grad_norm": 1.5234375, "learning_rate": 1.016488784089778e-06, "loss": 0.3174, "step": 20706 }, { "epoch": 0.9092725336728981, "grad_norm": 1.5078125, "learning_rate": 1.0145370065478194e-06, "loss": 0.3333, "step": 20708 }, { "epoch": 0.9093603521521016, "grad_norm": 1.5703125, "learning_rate": 1.01258706581836e-06, "loss": 0.311, "step": 20710 }, { "epoch": 0.9094481706313051, "grad_norm": 1.6171875, "learning_rate": 1.010638962050739e-06, "loss": 0.3376, "step": 20712 }, { "epoch": 0.9095359891105086, "grad_norm": 1.46875, "learning_rate": 1.0086926953941368e-06, "loss": 0.3444, "step": 20714 }, { "epoch": 0.9096238075897121, "grad_norm": 1.59375, "learning_rate": 1.0067482659975953e-06, "loss": 0.2992, "step": 20716 }, { "epoch": 0.9097116260689155, "grad_norm": 1.4609375, "learning_rate": 1.0048056740100286e-06, "loss": 0.337, "step": 20718 }, { "epoch": 0.909799444548119, "grad_norm": 1.3984375, "learning_rate": 1.0028649195801903e-06, "loss": 0.3046, "step": 20720 }, { "epoch": 0.9098872630273225, "grad_norm": 1.4453125, "learning_rate": 1.0009260028567113e-06, "loss": 0.3162, "step": 20722 }, { "epoch": 0.9099750815065261, "grad_norm": 1.421875, "learning_rate": 9.989889239880729e-07, "loss": 0.301, "step": 20724 }, { "epoch": 0.9100628999857295, "grad_norm": 1.4453125, "learning_rate": 9.970536831226145e-07, "loss": 0.301, "step": 20726 }, { "epoch": 0.910150718464933, "grad_norm": 1.484375, "learning_rate": 9.951202804085402e-07, "loss": 0.3301, "step": 20728 }, { "epoch": 0.9102385369441365, "grad_norm": 1.5546875, "learning_rate": 9.931887159939062e-07, "loss": 0.302, "step": 20730 }, { "epoch": 0.91032635542334, "grad_norm": 1.5234375, "learning_rate": 9.91258990026639e-07, "loss": 0.292, "step": 20732 }, { "epoch": 0.9104141739025434, "grad_norm": 1.4921875, "learning_rate": 9.893311026545116e-07, "loss": 0.33, "step": 20734 }, { "epoch": 0.9105019923817469, "grad_norm": 1.453125, "learning_rate": 9.874050540251672e-07, "loss": 0.2974, "step": 20736 }, { "epoch": 0.9105898108609504, "grad_norm": 1.4921875, "learning_rate": 9.85480844286099e-07, "loss": 0.3126, "step": 20738 }, { "epoch": 0.910677629340154, "grad_norm": 1.4609375, "learning_rate": 9.835584735846588e-07, "loss": 0.3131, "step": 20740 }, { "epoch": 0.9107654478193574, "grad_norm": 1.375, "learning_rate": 9.816379420680727e-07, "loss": 0.3123, "step": 20742 }, { "epoch": 0.9108532662985609, "grad_norm": 1.4609375, "learning_rate": 9.797192498834096e-07, "loss": 0.3226, "step": 20744 }, { "epoch": 0.9109410847777644, "grad_norm": 1.453125, "learning_rate": 9.778023971776045e-07, "loss": 0.324, "step": 20746 }, { "epoch": 0.9110289032569678, "grad_norm": 1.53125, "learning_rate": 9.758873840974514e-07, "loss": 0.3356, "step": 20748 }, { "epoch": 0.9111167217361713, "grad_norm": 1.53125, "learning_rate": 9.739742107895994e-07, "loss": 0.3209, "step": 20750 }, { "epoch": 0.9112045402153748, "grad_norm": 1.546875, "learning_rate": 9.720628774005647e-07, "loss": 0.2962, "step": 20752 }, { "epoch": 0.9112923586945783, "grad_norm": 1.5078125, "learning_rate": 9.701533840767108e-07, "loss": 0.3132, "step": 20754 }, { "epoch": 0.9113801771737818, "grad_norm": 1.5, "learning_rate": 9.682457309642735e-07, "loss": 0.2998, "step": 20756 }, { "epoch": 0.9114679956529853, "grad_norm": 1.4140625, "learning_rate": 9.663399182093386e-07, "loss": 0.338, "step": 20758 }, { "epoch": 0.9115558141321888, "grad_norm": 1.4140625, "learning_rate": 9.644359459578533e-07, "loss": 0.329, "step": 20760 }, { "epoch": 0.9116436326113923, "grad_norm": 1.4921875, "learning_rate": 9.62533814355626e-07, "loss": 0.2874, "step": 20762 }, { "epoch": 0.9117314510905957, "grad_norm": 1.46875, "learning_rate": 9.606335235483182e-07, "loss": 0.3186, "step": 20764 }, { "epoch": 0.9118192695697992, "grad_norm": 1.453125, "learning_rate": 9.58735073681466e-07, "loss": 0.3224, "step": 20766 }, { "epoch": 0.9119070880490027, "grad_norm": 1.453125, "learning_rate": 9.568384649004363e-07, "loss": 0.337, "step": 20768 }, { "epoch": 0.9119949065282063, "grad_norm": 1.4375, "learning_rate": 9.549436973504855e-07, "loss": 0.2885, "step": 20770 }, { "epoch": 0.9120827250074097, "grad_norm": 1.53125, "learning_rate": 9.530507711767056e-07, "loss": 0.3455, "step": 20772 }, { "epoch": 0.9121705434866132, "grad_norm": 1.4921875, "learning_rate": 9.511596865240669e-07, "loss": 0.3165, "step": 20774 }, { "epoch": 0.9122583619658167, "grad_norm": 1.4609375, "learning_rate": 9.49270443537384e-07, "loss": 0.294, "step": 20776 }, { "epoch": 0.9123461804450201, "grad_norm": 1.4296875, "learning_rate": 9.473830423613328e-07, "loss": 0.339, "step": 20778 }, { "epoch": 0.9124339989242236, "grad_norm": 1.4296875, "learning_rate": 9.454974831404561e-07, "loss": 0.3194, "step": 20780 }, { "epoch": 0.9125218174034271, "grad_norm": 1.421875, "learning_rate": 9.436137660191469e-07, "loss": 0.3044, "step": 20782 }, { "epoch": 0.9126096358826306, "grad_norm": 1.53125, "learning_rate": 9.417318911416644e-07, "loss": 0.315, "step": 20784 }, { "epoch": 0.9126974543618341, "grad_norm": 1.5, "learning_rate": 9.398518586521188e-07, "loss": 0.2958, "step": 20786 }, { "epoch": 0.9127852728410376, "grad_norm": 1.4296875, "learning_rate": 9.379736686944862e-07, "loss": 0.305, "step": 20788 }, { "epoch": 0.9128730913202411, "grad_norm": 1.5234375, "learning_rate": 9.36097321412599e-07, "loss": 0.322, "step": 20790 }, { "epoch": 0.9129609097994446, "grad_norm": 1.4453125, "learning_rate": 9.342228169501449e-07, "loss": 0.3145, "step": 20792 }, { "epoch": 0.913048728278648, "grad_norm": 1.53125, "learning_rate": 9.323501554506786e-07, "loss": 0.2959, "step": 20794 }, { "epoch": 0.9131365467578515, "grad_norm": 1.484375, "learning_rate": 9.304793370576076e-07, "loss": 0.311, "step": 20796 }, { "epoch": 0.913224365237055, "grad_norm": 1.546875, "learning_rate": 9.286103619141978e-07, "loss": 0.3461, "step": 20798 }, { "epoch": 0.9133121837162584, "grad_norm": 1.4765625, "learning_rate": 9.267432301635792e-07, "loss": 0.2936, "step": 20800 }, { "epoch": 0.913400002195462, "grad_norm": 1.421875, "learning_rate": 9.248779419487292e-07, "loss": 0.289, "step": 20802 }, { "epoch": 0.9134878206746655, "grad_norm": 1.453125, "learning_rate": 9.230144974125027e-07, "loss": 0.3029, "step": 20804 }, { "epoch": 0.913575639153869, "grad_norm": 1.453125, "learning_rate": 9.211528966975941e-07, "loss": 0.3303, "step": 20806 }, { "epoch": 0.9136634576330724, "grad_norm": 1.4375, "learning_rate": 9.192931399465698e-07, "loss": 0.3011, "step": 20808 }, { "epoch": 0.9137512761122759, "grad_norm": 1.46875, "learning_rate": 9.174352273018521e-07, "loss": 0.3299, "step": 20810 }, { "epoch": 0.9138390945914794, "grad_norm": 1.4375, "learning_rate": 9.155791589057133e-07, "loss": 0.3313, "step": 20812 }, { "epoch": 0.9139269130706829, "grad_norm": 1.3984375, "learning_rate": 9.137249349002979e-07, "loss": 0.3033, "step": 20814 }, { "epoch": 0.9140147315498864, "grad_norm": 1.4453125, "learning_rate": 9.118725554276008e-07, "loss": 0.3177, "step": 20816 }, { "epoch": 0.9141025500290899, "grad_norm": 1.546875, "learning_rate": 9.10022020629478e-07, "loss": 0.3122, "step": 20818 }, { "epoch": 0.9141903685082934, "grad_norm": 1.5234375, "learning_rate": 9.081733306476437e-07, "loss": 0.3234, "step": 20820 }, { "epoch": 0.9142781869874969, "grad_norm": 1.5, "learning_rate": 9.063264856236708e-07, "loss": 0.3054, "step": 20822 }, { "epoch": 0.9143660054667003, "grad_norm": 1.4296875, "learning_rate": 9.044814856989908e-07, "loss": 0.3372, "step": 20824 }, { "epoch": 0.9144538239459038, "grad_norm": 1.4296875, "learning_rate": 9.026383310148933e-07, "loss": 0.3083, "step": 20826 }, { "epoch": 0.9145416424251073, "grad_norm": 1.4375, "learning_rate": 9.00797021712535e-07, "loss": 0.323, "step": 20828 }, { "epoch": 0.9146294609043107, "grad_norm": 1.640625, "learning_rate": 8.989575579329113e-07, "loss": 0.3073, "step": 20830 }, { "epoch": 0.9147172793835143, "grad_norm": 1.484375, "learning_rate": 8.971199398168983e-07, "loss": 0.3294, "step": 20832 }, { "epoch": 0.9148050978627178, "grad_norm": 1.4921875, "learning_rate": 8.95284167505217e-07, "loss": 0.3088, "step": 20834 }, { "epoch": 0.9148929163419213, "grad_norm": 1.4765625, "learning_rate": 8.934502411384549e-07, "loss": 0.3042, "step": 20836 }, { "epoch": 0.9149807348211247, "grad_norm": 1.4921875, "learning_rate": 8.916181608570495e-07, "loss": 0.3142, "step": 20838 }, { "epoch": 0.9150685533003282, "grad_norm": 1.4296875, "learning_rate": 8.897879268013027e-07, "loss": 0.3137, "step": 20840 }, { "epoch": 0.9151563717795317, "grad_norm": 1.484375, "learning_rate": 8.879595391113798e-07, "loss": 0.3189, "step": 20842 }, { "epoch": 0.9152441902587352, "grad_norm": 1.4453125, "learning_rate": 8.861329979272915e-07, "loss": 0.3058, "step": 20844 }, { "epoch": 0.9153320087379386, "grad_norm": 1.390625, "learning_rate": 8.843083033889227e-07, "loss": 0.3186, "step": 20846 }, { "epoch": 0.9154198272171422, "grad_norm": 1.46875, "learning_rate": 8.824854556360062e-07, "loss": 0.3289, "step": 20848 }, { "epoch": 0.9155076456963457, "grad_norm": 1.40625, "learning_rate": 8.806644548081305e-07, "loss": 0.3385, "step": 20850 }, { "epoch": 0.9155954641755492, "grad_norm": 1.53125, "learning_rate": 8.788453010447534e-07, "loss": 0.3497, "step": 20852 }, { "epoch": 0.9156832826547526, "grad_norm": 1.4375, "learning_rate": 8.770279944851856e-07, "loss": 0.3319, "step": 20854 }, { "epoch": 0.9157711011339561, "grad_norm": 1.4765625, "learning_rate": 8.752125352685992e-07, "loss": 0.3535, "step": 20856 }, { "epoch": 0.9158589196131596, "grad_norm": 1.4921875, "learning_rate": 8.73398923534019e-07, "loss": 0.3008, "step": 20858 }, { "epoch": 0.915946738092363, "grad_norm": 1.421875, "learning_rate": 8.715871594203312e-07, "loss": 0.3212, "step": 20860 }, { "epoch": 0.9160345565715666, "grad_norm": 1.53125, "learning_rate": 8.697772430662859e-07, "loss": 0.2935, "step": 20862 }, { "epoch": 0.9161223750507701, "grad_norm": 1.4140625, "learning_rate": 8.679691746104807e-07, "loss": 0.3038, "step": 20864 }, { "epoch": 0.9162101935299736, "grad_norm": 1.46875, "learning_rate": 8.661629541913824e-07, "loss": 0.2944, "step": 20866 }, { "epoch": 0.916298012009177, "grad_norm": 1.5546875, "learning_rate": 8.643585819473055e-07, "loss": 0.3247, "step": 20868 }, { "epoch": 0.9163858304883805, "grad_norm": 1.4765625, "learning_rate": 8.625560580164394e-07, "loss": 0.3316, "step": 20870 }, { "epoch": 0.916473648967584, "grad_norm": 1.5390625, "learning_rate": 8.60755382536818e-07, "loss": 0.2913, "step": 20872 }, { "epoch": 0.9165614674467875, "grad_norm": 1.578125, "learning_rate": 8.589565556463314e-07, "loss": 0.346, "step": 20874 }, { "epoch": 0.9166492859259909, "grad_norm": 1.53125, "learning_rate": 8.571595774827413e-07, "loss": 0.3234, "step": 20876 }, { "epoch": 0.9167371044051945, "grad_norm": 1.515625, "learning_rate": 8.553644481836542e-07, "loss": 0.2973, "step": 20878 }, { "epoch": 0.916824922884398, "grad_norm": 1.609375, "learning_rate": 8.535711678865493e-07, "loss": 0.2843, "step": 20880 }, { "epoch": 0.9169127413636015, "grad_norm": 1.4921875, "learning_rate": 8.517797367287555e-07, "loss": 0.3122, "step": 20882 }, { "epoch": 0.9170005598428049, "grad_norm": 1.4296875, "learning_rate": 8.49990154847452e-07, "loss": 0.2951, "step": 20884 }, { "epoch": 0.9170883783220084, "grad_norm": 1.4453125, "learning_rate": 8.482024223796958e-07, "loss": 0.3027, "step": 20886 }, { "epoch": 0.9171761968012119, "grad_norm": 1.4375, "learning_rate": 8.464165394623829e-07, "loss": 0.3013, "step": 20888 }, { "epoch": 0.9172640152804153, "grad_norm": 1.5, "learning_rate": 8.446325062322902e-07, "loss": 0.3374, "step": 20890 }, { "epoch": 0.9173518337596188, "grad_norm": 1.46875, "learning_rate": 8.428503228260221e-07, "loss": 0.3171, "step": 20892 }, { "epoch": 0.9174396522388224, "grad_norm": 1.421875, "learning_rate": 8.410699893800722e-07, "loss": 0.3274, "step": 20894 }, { "epoch": 0.9175274707180259, "grad_norm": 1.4921875, "learning_rate": 8.392915060307704e-07, "loss": 0.2995, "step": 20896 }, { "epoch": 0.9176152891972293, "grad_norm": 1.453125, "learning_rate": 8.37514872914319e-07, "loss": 0.3186, "step": 20898 }, { "epoch": 0.9177031076764328, "grad_norm": 1.4765625, "learning_rate": 8.3574009016677e-07, "loss": 0.3063, "step": 20900 }, { "epoch": 0.9177909261556363, "grad_norm": 1.546875, "learning_rate": 8.339671579240371e-07, "loss": 0.3366, "step": 20902 }, { "epoch": 0.9178787446348398, "grad_norm": 1.546875, "learning_rate": 8.321960763218922e-07, "loss": 0.3206, "step": 20904 }, { "epoch": 0.9179665631140432, "grad_norm": 1.484375, "learning_rate": 8.304268454959657e-07, "loss": 0.3006, "step": 20906 }, { "epoch": 0.9180543815932467, "grad_norm": 1.453125, "learning_rate": 8.286594655817465e-07, "loss": 0.3366, "step": 20908 }, { "epoch": 0.9181422000724503, "grad_norm": 1.40625, "learning_rate": 8.268939367145789e-07, "loss": 0.3098, "step": 20910 }, { "epoch": 0.9182300185516538, "grad_norm": 1.4765625, "learning_rate": 8.251302590296661e-07, "loss": 0.3572, "step": 20912 }, { "epoch": 0.9183178370308572, "grad_norm": 1.4296875, "learning_rate": 8.23368432662075e-07, "loss": 0.271, "step": 20914 }, { "epoch": 0.9184056555100607, "grad_norm": 1.515625, "learning_rate": 8.216084577467226e-07, "loss": 0.3109, "step": 20916 }, { "epoch": 0.9184934739892642, "grad_norm": 1.46875, "learning_rate": 8.198503344183955e-07, "loss": 0.3085, "step": 20918 }, { "epoch": 0.9185812924684676, "grad_norm": 1.4921875, "learning_rate": 8.180940628117223e-07, "loss": 0.3106, "step": 20920 }, { "epoch": 0.9186691109476711, "grad_norm": 1.484375, "learning_rate": 8.163396430612063e-07, "loss": 0.3375, "step": 20922 }, { "epoch": 0.9187569294268747, "grad_norm": 1.5, "learning_rate": 8.145870753011958e-07, "loss": 0.3231, "step": 20924 }, { "epoch": 0.9188447479060782, "grad_norm": 1.5, "learning_rate": 8.128363596659e-07, "loss": 0.2954, "step": 20926 }, { "epoch": 0.9189325663852816, "grad_norm": 1.46875, "learning_rate": 8.110874962894005e-07, "loss": 0.3181, "step": 20928 }, { "epoch": 0.9190203848644851, "grad_norm": 1.484375, "learning_rate": 8.093404853056125e-07, "loss": 0.3185, "step": 20930 }, { "epoch": 0.9191082033436886, "grad_norm": 1.4296875, "learning_rate": 8.075953268483344e-07, "loss": 0.3389, "step": 20932 }, { "epoch": 0.9191960218228921, "grad_norm": 1.5078125, "learning_rate": 8.058520210512066e-07, "loss": 0.3182, "step": 20934 }, { "epoch": 0.9192838403020955, "grad_norm": 1.59375, "learning_rate": 8.041105680477251e-07, "loss": 0.3405, "step": 20936 }, { "epoch": 0.919371658781299, "grad_norm": 1.453125, "learning_rate": 8.02370967971261e-07, "loss": 0.3015, "step": 20938 }, { "epoch": 0.9194594772605026, "grad_norm": 1.4296875, "learning_rate": 8.006332209550244e-07, "loss": 0.297, "step": 20940 }, { "epoch": 0.9195472957397061, "grad_norm": 1.4453125, "learning_rate": 7.988973271321004e-07, "loss": 0.3436, "step": 20942 }, { "epoch": 0.9196351142189095, "grad_norm": 1.421875, "learning_rate": 7.971632866354189e-07, "loss": 0.3226, "step": 20944 }, { "epoch": 0.919722932698113, "grad_norm": 1.4296875, "learning_rate": 7.954310995977737e-07, "loss": 0.3318, "step": 20946 }, { "epoch": 0.9198107511773165, "grad_norm": 1.421875, "learning_rate": 7.937007661518198e-07, "loss": 0.3187, "step": 20948 }, { "epoch": 0.91989856965652, "grad_norm": 1.4609375, "learning_rate": 7.919722864300649e-07, "loss": 0.3513, "step": 20950 }, { "epoch": 0.9199863881357234, "grad_norm": 1.4609375, "learning_rate": 7.902456605648756e-07, "loss": 0.3281, "step": 20952 }, { "epoch": 0.9200742066149269, "grad_norm": 1.4140625, "learning_rate": 7.885208886884709e-07, "loss": 0.2876, "step": 20954 }, { "epoch": 0.9201620250941305, "grad_norm": 1.5, "learning_rate": 7.867979709329454e-07, "loss": 0.2869, "step": 20956 }, { "epoch": 0.920249843573334, "grad_norm": 1.4609375, "learning_rate": 7.85076907430235e-07, "loss": 0.3085, "step": 20958 }, { "epoch": 0.9203376620525374, "grad_norm": 1.5625, "learning_rate": 7.833576983121399e-07, "loss": 0.3395, "step": 20960 }, { "epoch": 0.9204254805317409, "grad_norm": 1.453125, "learning_rate": 7.816403437103159e-07, "loss": 0.3473, "step": 20962 }, { "epoch": 0.9205132990109444, "grad_norm": 1.4921875, "learning_rate": 7.799248437562801e-07, "loss": 0.2932, "step": 20964 }, { "epoch": 0.9206011174901478, "grad_norm": 1.40625, "learning_rate": 7.782111985814077e-07, "loss": 0.31, "step": 20966 }, { "epoch": 0.9206889359693513, "grad_norm": 1.53125, "learning_rate": 7.764994083169247e-07, "loss": 0.3037, "step": 20968 }, { "epoch": 0.9207767544485549, "grad_norm": 1.4296875, "learning_rate": 7.747894730939259e-07, "loss": 0.3276, "step": 20970 }, { "epoch": 0.9208645729277584, "grad_norm": 1.40625, "learning_rate": 7.730813930433567e-07, "loss": 0.285, "step": 20972 }, { "epoch": 0.9209523914069618, "grad_norm": 1.484375, "learning_rate": 7.713751682960207e-07, "loss": 0.3125, "step": 20974 }, { "epoch": 0.9210402098861653, "grad_norm": 1.4375, "learning_rate": 7.69670798982583e-07, "loss": 0.3246, "step": 20976 }, { "epoch": 0.9211280283653688, "grad_norm": 1.6015625, "learning_rate": 7.67968285233564e-07, "loss": 0.3272, "step": 20978 }, { "epoch": 0.9212158468445723, "grad_norm": 1.609375, "learning_rate": 7.662676271793429e-07, "loss": 0.2933, "step": 20980 }, { "epoch": 0.9213036653237757, "grad_norm": 1.5625, "learning_rate": 7.64568824950157e-07, "loss": 0.3296, "step": 20982 }, { "epoch": 0.9213914838029792, "grad_norm": 1.46875, "learning_rate": 7.628718786760997e-07, "loss": 0.3073, "step": 20984 }, { "epoch": 0.9214793022821828, "grad_norm": 1.4375, "learning_rate": 7.611767884871251e-07, "loss": 0.3272, "step": 20986 }, { "epoch": 0.9215671207613862, "grad_norm": 1.5703125, "learning_rate": 7.59483554513038e-07, "loss": 0.3255, "step": 20988 }, { "epoch": 0.9216549392405897, "grad_norm": 1.453125, "learning_rate": 7.57792176883515e-07, "loss": 0.3086, "step": 20990 }, { "epoch": 0.9217427577197932, "grad_norm": 1.453125, "learning_rate": 7.561026557280748e-07, "loss": 0.3066, "step": 20992 }, { "epoch": 0.9218305761989967, "grad_norm": 1.46875, "learning_rate": 7.544149911761084e-07, "loss": 0.3216, "step": 20994 }, { "epoch": 0.9219183946782001, "grad_norm": 1.4140625, "learning_rate": 7.527291833568539e-07, "loss": 0.3202, "step": 20996 }, { "epoch": 0.9220062131574036, "grad_norm": 1.4296875, "learning_rate": 7.510452323994083e-07, "loss": 0.3295, "step": 20998 }, { "epoch": 0.9220940316366071, "grad_norm": 1.4296875, "learning_rate": 7.493631384327348e-07, "loss": 0.2978, "step": 21000 }, { "epoch": 0.9221818501158107, "grad_norm": 1.46875, "learning_rate": 7.476829015856446e-07, "loss": 0.3027, "step": 21002 }, { "epoch": 0.9222696685950141, "grad_norm": 1.4453125, "learning_rate": 7.460045219868095e-07, "loss": 0.3039, "step": 21004 }, { "epoch": 0.9223574870742176, "grad_norm": 1.59375, "learning_rate": 7.443279997647657e-07, "loss": 0.3202, "step": 21006 }, { "epoch": 0.9224453055534211, "grad_norm": 1.4453125, "learning_rate": 7.426533350478937e-07, "loss": 0.2813, "step": 21008 }, { "epoch": 0.9225331240326246, "grad_norm": 1.46875, "learning_rate": 7.409805279644494e-07, "loss": 0.3119, "step": 21010 }, { "epoch": 0.922620942511828, "grad_norm": 1.453125, "learning_rate": 7.393095786425275e-07, "loss": 0.3038, "step": 21012 }, { "epoch": 0.9227087609910315, "grad_norm": 1.46875, "learning_rate": 7.376404872100978e-07, "loss": 0.3249, "step": 21014 }, { "epoch": 0.9227965794702351, "grad_norm": 1.53125, "learning_rate": 7.359732537949693e-07, "loss": 0.3023, "step": 21016 }, { "epoch": 0.9228843979494386, "grad_norm": 1.3984375, "learning_rate": 7.343078785248315e-07, "loss": 0.3033, "step": 21018 }, { "epoch": 0.922972216428642, "grad_norm": 1.5, "learning_rate": 7.326443615272099e-07, "loss": 0.3115, "step": 21020 }, { "epoch": 0.9230600349078455, "grad_norm": 1.546875, "learning_rate": 7.309827029295002e-07, "loss": 0.3266, "step": 21022 }, { "epoch": 0.923147853387049, "grad_norm": 1.4609375, "learning_rate": 7.29322902858956e-07, "loss": 0.3321, "step": 21024 }, { "epoch": 0.9232356718662524, "grad_norm": 1.6640625, "learning_rate": 7.276649614426784e-07, "loss": 0.3087, "step": 21026 }, { "epoch": 0.9233234903454559, "grad_norm": 1.390625, "learning_rate": 7.26008878807638e-07, "loss": 0.3076, "step": 21028 }, { "epoch": 0.9234113088246594, "grad_norm": 1.4765625, "learning_rate": 7.243546550806557e-07, "loss": 0.3151, "step": 21030 }, { "epoch": 0.923499127303863, "grad_norm": 1.5078125, "learning_rate": 7.227022903884134e-07, "loss": 0.2999, "step": 21032 }, { "epoch": 0.9235869457830664, "grad_norm": 1.4453125, "learning_rate": 7.210517848574516e-07, "loss": 0.335, "step": 21034 }, { "epoch": 0.9236747642622699, "grad_norm": 1.5078125, "learning_rate": 7.194031386141608e-07, "loss": 0.3007, "step": 21036 }, { "epoch": 0.9237625827414734, "grad_norm": 1.3828125, "learning_rate": 7.177563517848013e-07, "loss": 0.2695, "step": 21038 }, { "epoch": 0.9238504012206769, "grad_norm": 1.515625, "learning_rate": 7.161114244954775e-07, "loss": 0.3298, "step": 21040 }, { "epoch": 0.9239382196998803, "grad_norm": 1.4296875, "learning_rate": 7.144683568721694e-07, "loss": 0.3179, "step": 21042 }, { "epoch": 0.9240260381790838, "grad_norm": 1.5234375, "learning_rate": 7.128271490406873e-07, "loss": 0.3385, "step": 21044 }, { "epoch": 0.9241138566582873, "grad_norm": 1.4375, "learning_rate": 7.111878011267309e-07, "loss": 0.3113, "step": 21046 }, { "epoch": 0.9242016751374909, "grad_norm": 1.453125, "learning_rate": 7.095503132558329e-07, "loss": 0.3134, "step": 21048 }, { "epoch": 0.9242894936166943, "grad_norm": 1.40625, "learning_rate": 7.07914685553393e-07, "loss": 0.2692, "step": 21050 }, { "epoch": 0.9243773120958978, "grad_norm": 1.5078125, "learning_rate": 7.062809181446695e-07, "loss": 0.3174, "step": 21052 }, { "epoch": 0.9244651305751013, "grad_norm": 1.5, "learning_rate": 7.046490111547788e-07, "loss": 0.3495, "step": 21054 }, { "epoch": 0.9245529490543047, "grad_norm": 1.4453125, "learning_rate": 7.030189647086904e-07, "loss": 0.3221, "step": 21056 }, { "epoch": 0.9246407675335082, "grad_norm": 1.46875, "learning_rate": 7.013907789312352e-07, "loss": 0.3218, "step": 21058 }, { "epoch": 0.9247285860127117, "grad_norm": 1.390625, "learning_rate": 6.997644539470938e-07, "loss": 0.3023, "step": 21060 }, { "epoch": 0.9248164044919152, "grad_norm": 1.515625, "learning_rate": 6.981399898808222e-07, "loss": 0.3328, "step": 21062 }, { "epoch": 0.9249042229711187, "grad_norm": 1.5234375, "learning_rate": 6.965173868568098e-07, "loss": 0.3167, "step": 21064 }, { "epoch": 0.9249920414503222, "grad_norm": 1.4921875, "learning_rate": 6.948966449993266e-07, "loss": 0.3245, "step": 21066 }, { "epoch": 0.9250798599295257, "grad_norm": 1.4453125, "learning_rate": 6.932777644324844e-07, "loss": 0.3237, "step": 21068 }, { "epoch": 0.9251676784087292, "grad_norm": 1.4453125, "learning_rate": 6.916607452802538e-07, "loss": 0.319, "step": 21070 }, { "epoch": 0.9252554968879326, "grad_norm": 1.5, "learning_rate": 6.90045587666474e-07, "loss": 0.3122, "step": 21072 }, { "epoch": 0.9253433153671361, "grad_norm": 1.5859375, "learning_rate": 6.884322917148328e-07, "loss": 0.3193, "step": 21074 }, { "epoch": 0.9254311338463396, "grad_norm": 1.46875, "learning_rate": 6.868208575488699e-07, "loss": 0.3228, "step": 21076 }, { "epoch": 0.9255189523255432, "grad_norm": 1.4765625, "learning_rate": 6.852112852919951e-07, "loss": 0.3195, "step": 21078 }, { "epoch": 0.9256067708047466, "grad_norm": 1.5234375, "learning_rate": 6.836035750674708e-07, "loss": 0.3395, "step": 21080 }, { "epoch": 0.9256945892839501, "grad_norm": 1.4765625, "learning_rate": 6.819977269984123e-07, "loss": 0.3088, "step": 21082 }, { "epoch": 0.9257824077631536, "grad_norm": 1.5234375, "learning_rate": 6.803937412077965e-07, "loss": 0.3288, "step": 21084 }, { "epoch": 0.925870226242357, "grad_norm": 1.5, "learning_rate": 6.787916178184583e-07, "loss": 0.3166, "step": 21086 }, { "epoch": 0.9259580447215605, "grad_norm": 1.4140625, "learning_rate": 6.771913569530857e-07, "loss": 0.2714, "step": 21088 }, { "epoch": 0.926045863200764, "grad_norm": 1.453125, "learning_rate": 6.755929587342336e-07, "loss": 0.3068, "step": 21090 }, { "epoch": 0.9261336816799675, "grad_norm": 1.453125, "learning_rate": 6.739964232843038e-07, "loss": 0.3145, "step": 21092 }, { "epoch": 0.926221500159171, "grad_norm": 1.4921875, "learning_rate": 6.72401750725557e-07, "loss": 0.3062, "step": 21094 }, { "epoch": 0.9263093186383745, "grad_norm": 1.5078125, "learning_rate": 6.708089411801177e-07, "loss": 0.2989, "step": 21096 }, { "epoch": 0.926397137117578, "grad_norm": 1.421875, "learning_rate": 6.692179947699579e-07, "loss": 0.2991, "step": 21098 }, { "epoch": 0.9264849555967815, "grad_norm": 1.5703125, "learning_rate": 6.676289116169188e-07, "loss": 0.342, "step": 21100 }, { "epoch": 0.9265727740759849, "grad_norm": 1.4765625, "learning_rate": 6.660416918426892e-07, "loss": 0.2854, "step": 21102 }, { "epoch": 0.9266605925551884, "grad_norm": 1.4296875, "learning_rate": 6.644563355688277e-07, "loss": 0.3511, "step": 21104 }, { "epoch": 0.9267484110343919, "grad_norm": 1.53125, "learning_rate": 6.628728429167258e-07, "loss": 0.3465, "step": 21106 }, { "epoch": 0.9268362295135953, "grad_norm": 1.578125, "learning_rate": 6.612912140076588e-07, "loss": 0.3519, "step": 21108 }, { "epoch": 0.9269240479927989, "grad_norm": 1.515625, "learning_rate": 6.597114489627437e-07, "loss": 0.3272, "step": 21110 }, { "epoch": 0.9270118664720024, "grad_norm": 1.4453125, "learning_rate": 6.581335479029588e-07, "loss": 0.2801, "step": 21112 }, { "epoch": 0.9270996849512059, "grad_norm": 1.484375, "learning_rate": 6.565575109491462e-07, "loss": 0.3199, "step": 21114 }, { "epoch": 0.9271875034304093, "grad_norm": 1.46875, "learning_rate": 6.5498333822199e-07, "loss": 0.2785, "step": 21116 }, { "epoch": 0.9272753219096128, "grad_norm": 1.40625, "learning_rate": 6.534110298420493e-07, "loss": 0.3323, "step": 21118 }, { "epoch": 0.9273631403888163, "grad_norm": 1.453125, "learning_rate": 6.518405859297277e-07, "loss": 0.3108, "step": 21120 }, { "epoch": 0.9274509588680198, "grad_norm": 1.53125, "learning_rate": 6.502720066052903e-07, "loss": 0.315, "step": 21122 }, { "epoch": 0.9275387773472233, "grad_norm": 1.421875, "learning_rate": 6.487052919888603e-07, "loss": 0.3075, "step": 21124 }, { "epoch": 0.9276265958264268, "grad_norm": 1.4140625, "learning_rate": 6.47140442200414e-07, "loss": 0.3272, "step": 21126 }, { "epoch": 0.9277144143056303, "grad_norm": 1.3984375, "learning_rate": 6.455774573597917e-07, "loss": 0.3082, "step": 21128 }, { "epoch": 0.9278022327848338, "grad_norm": 1.4765625, "learning_rate": 6.440163375866892e-07, "loss": 0.3059, "step": 21130 }, { "epoch": 0.9278900512640372, "grad_norm": 1.546875, "learning_rate": 6.424570830006498e-07, "loss": 0.3184, "step": 21132 }, { "epoch": 0.9279778697432407, "grad_norm": 1.46875, "learning_rate": 6.408996937210892e-07, "loss": 0.3014, "step": 21134 }, { "epoch": 0.9280656882224442, "grad_norm": 1.4453125, "learning_rate": 6.393441698672647e-07, "loss": 0.3257, "step": 21136 }, { "epoch": 0.9281535067016476, "grad_norm": 1.4296875, "learning_rate": 6.377905115583088e-07, "loss": 0.2957, "step": 21138 }, { "epoch": 0.9282413251808512, "grad_norm": 1.484375, "learning_rate": 6.362387189131902e-07, "loss": 0.2928, "step": 21140 }, { "epoch": 0.9283291436600547, "grad_norm": 1.4765625, "learning_rate": 6.346887920507555e-07, "loss": 0.3254, "step": 21142 }, { "epoch": 0.9284169621392582, "grad_norm": 1.4921875, "learning_rate": 6.33140731089693e-07, "loss": 0.3156, "step": 21144 }, { "epoch": 0.9285047806184616, "grad_norm": 1.4375, "learning_rate": 6.315945361485498e-07, "loss": 0.3211, "step": 21146 }, { "epoch": 0.9285925990976651, "grad_norm": 1.5078125, "learning_rate": 6.300502073457448e-07, "loss": 0.3218, "step": 21148 }, { "epoch": 0.9286804175768686, "grad_norm": 1.515625, "learning_rate": 6.285077447995307e-07, "loss": 0.2848, "step": 21150 }, { "epoch": 0.9287682360560721, "grad_norm": 1.453125, "learning_rate": 6.26967148628041e-07, "loss": 0.306, "step": 21152 }, { "epoch": 0.9288560545352755, "grad_norm": 1.515625, "learning_rate": 6.254284189492476e-07, "loss": 0.3131, "step": 21154 }, { "epoch": 0.9289438730144791, "grad_norm": 1.4609375, "learning_rate": 6.238915558809899e-07, "loss": 0.3059, "step": 21156 }, { "epoch": 0.9290316914936826, "grad_norm": 1.46875, "learning_rate": 6.223565595409597e-07, "loss": 0.2828, "step": 21158 }, { "epoch": 0.9291195099728861, "grad_norm": 1.53125, "learning_rate": 6.208234300467048e-07, "loss": 0.3251, "step": 21160 }, { "epoch": 0.9292073284520895, "grad_norm": 1.46875, "learning_rate": 6.192921675156394e-07, "loss": 0.3106, "step": 21162 }, { "epoch": 0.929295146931293, "grad_norm": 1.4765625, "learning_rate": 6.177627720650226e-07, "loss": 0.3275, "step": 21164 }, { "epoch": 0.9293829654104965, "grad_norm": 1.5078125, "learning_rate": 6.1623524381198e-07, "loss": 0.2936, "step": 21166 }, { "epoch": 0.9294707838897, "grad_norm": 1.453125, "learning_rate": 6.14709582873485e-07, "loss": 0.2886, "step": 21168 }, { "epoch": 0.9295586023689035, "grad_norm": 1.46875, "learning_rate": 6.131857893663772e-07, "loss": 0.3234, "step": 21170 }, { "epoch": 0.929646420848107, "grad_norm": 1.515625, "learning_rate": 6.116638634073496e-07, "loss": 0.3274, "step": 21172 }, { "epoch": 0.9297342393273105, "grad_norm": 1.4921875, "learning_rate": 6.101438051129449e-07, "loss": 0.3255, "step": 21174 }, { "epoch": 0.929822057806514, "grad_norm": 1.5078125, "learning_rate": 6.086256145995783e-07, "loss": 0.3073, "step": 21176 }, { "epoch": 0.9299098762857174, "grad_norm": 1.5234375, "learning_rate": 6.071092919835042e-07, "loss": 0.3081, "step": 21178 }, { "epoch": 0.9299976947649209, "grad_norm": 1.46875, "learning_rate": 6.055948373808517e-07, "loss": 0.3165, "step": 21180 }, { "epoch": 0.9300855132441244, "grad_norm": 1.46875, "learning_rate": 6.04082250907595e-07, "loss": 0.3216, "step": 21182 }, { "epoch": 0.9301733317233278, "grad_norm": 1.4375, "learning_rate": 6.025715326795633e-07, "loss": 0.3113, "step": 21184 }, { "epoch": 0.9302611502025314, "grad_norm": 1.6015625, "learning_rate": 6.01062682812456e-07, "loss": 0.3103, "step": 21186 }, { "epoch": 0.9303489686817349, "grad_norm": 1.4453125, "learning_rate": 5.995557014218168e-07, "loss": 0.2916, "step": 21188 }, { "epoch": 0.9304367871609384, "grad_norm": 1.484375, "learning_rate": 5.980505886230503e-07, "loss": 0.2992, "step": 21190 }, { "epoch": 0.9305246056401418, "grad_norm": 1.5390625, "learning_rate": 5.965473445314201e-07, "loss": 0.2912, "step": 21192 }, { "epoch": 0.9306124241193453, "grad_norm": 1.515625, "learning_rate": 5.950459692620425e-07, "loss": 0.2946, "step": 21194 }, { "epoch": 0.9307002425985488, "grad_norm": 1.5546875, "learning_rate": 5.935464629298975e-07, "loss": 0.3201, "step": 21196 }, { "epoch": 0.9307880610777522, "grad_norm": 1.4765625, "learning_rate": 5.920488256498131e-07, "loss": 0.3221, "step": 21198 }, { "epoch": 0.9308758795569557, "grad_norm": 1.4609375, "learning_rate": 5.905530575364831e-07, "loss": 0.3377, "step": 21200 }, { "epoch": 0.9309636980361593, "grad_norm": 1.484375, "learning_rate": 5.89059158704447e-07, "loss": 0.3286, "step": 21202 }, { "epoch": 0.9310515165153628, "grad_norm": 1.6640625, "learning_rate": 5.875671292681157e-07, "loss": 0.2927, "step": 21204 }, { "epoch": 0.9311393349945662, "grad_norm": 1.5, "learning_rate": 5.860769693417451e-07, "loss": 0.3109, "step": 21206 }, { "epoch": 0.9312271534737697, "grad_norm": 1.546875, "learning_rate": 5.845886790394495e-07, "loss": 0.3093, "step": 21208 }, { "epoch": 0.9313149719529732, "grad_norm": 1.5234375, "learning_rate": 5.8310225847521e-07, "loss": 0.3427, "step": 21210 }, { "epoch": 0.9314027904321767, "grad_norm": 1.4921875, "learning_rate": 5.816177077628493e-07, "loss": 0.3308, "step": 21212 }, { "epoch": 0.9314906089113801, "grad_norm": 1.5546875, "learning_rate": 5.801350270160599e-07, "loss": 0.3064, "step": 21214 }, { "epoch": 0.9315784273905837, "grad_norm": 1.515625, "learning_rate": 5.786542163483843e-07, "loss": 0.3105, "step": 21216 }, { "epoch": 0.9316662458697872, "grad_norm": 1.5, "learning_rate": 5.771752758732207e-07, "loss": 0.3202, "step": 21218 }, { "epoch": 0.9317540643489907, "grad_norm": 1.421875, "learning_rate": 5.756982057038312e-07, "loss": 0.3233, "step": 21220 }, { "epoch": 0.9318418828281941, "grad_norm": 1.453125, "learning_rate": 5.742230059533255e-07, "loss": 0.3054, "step": 21222 }, { "epoch": 0.9319297013073976, "grad_norm": 1.46875, "learning_rate": 5.727496767346796e-07, "loss": 0.3086, "step": 21224 }, { "epoch": 0.9320175197866011, "grad_norm": 1.4375, "learning_rate": 5.712782181607202e-07, "loss": 0.3207, "step": 21226 }, { "epoch": 0.9321053382658046, "grad_norm": 1.5703125, "learning_rate": 5.698086303441292e-07, "loss": 0.3119, "step": 21228 }, { "epoch": 0.932193156745008, "grad_norm": 1.515625, "learning_rate": 5.683409133974499e-07, "loss": 0.304, "step": 21230 }, { "epoch": 0.9322809752242116, "grad_norm": 1.546875, "learning_rate": 5.668750674330786e-07, "loss": 0.3022, "step": 21232 }, { "epoch": 0.9323687937034151, "grad_norm": 1.578125, "learning_rate": 5.654110925632756e-07, "loss": 0.2937, "step": 21234 }, { "epoch": 0.9324566121826185, "grad_norm": 1.5703125, "learning_rate": 5.639489889001426e-07, "loss": 0.3227, "step": 21236 }, { "epoch": 0.932544430661822, "grad_norm": 1.4921875, "learning_rate": 5.624887565556596e-07, "loss": 0.3311, "step": 21238 }, { "epoch": 0.9326322491410255, "grad_norm": 1.4375, "learning_rate": 5.610303956416402e-07, "loss": 0.3161, "step": 21240 }, { "epoch": 0.932720067620229, "grad_norm": 1.5, "learning_rate": 5.595739062697752e-07, "loss": 0.2872, "step": 21242 }, { "epoch": 0.9328078860994324, "grad_norm": 1.4609375, "learning_rate": 5.581192885516006e-07, "loss": 0.3168, "step": 21244 }, { "epoch": 0.9328957045786359, "grad_norm": 1.515625, "learning_rate": 5.566665425985052e-07, "loss": 0.3393, "step": 21246 }, { "epoch": 0.9329835230578395, "grad_norm": 1.4140625, "learning_rate": 5.552156685217497e-07, "loss": 0.3114, "step": 21248 }, { "epoch": 0.933071341537043, "grad_norm": 1.46875, "learning_rate": 5.537666664324342e-07, "loss": 0.2953, "step": 21250 }, { "epoch": 0.9331591600162464, "grad_norm": 1.4296875, "learning_rate": 5.523195364415312e-07, "loss": 0.3083, "step": 21252 }, { "epoch": 0.9332469784954499, "grad_norm": 1.5, "learning_rate": 5.508742786598575e-07, "loss": 0.3182, "step": 21254 }, { "epoch": 0.9333347969746534, "grad_norm": 1.484375, "learning_rate": 5.494308931980913e-07, "loss": 0.3156, "step": 21256 }, { "epoch": 0.9334226154538569, "grad_norm": 1.4375, "learning_rate": 5.479893801667718e-07, "loss": 0.321, "step": 21258 }, { "epoch": 0.9335104339330603, "grad_norm": 1.4921875, "learning_rate": 5.465497396762831e-07, "loss": 0.3114, "step": 21260 }, { "epoch": 0.9335982524122638, "grad_norm": 1.484375, "learning_rate": 5.451119718368786e-07, "loss": 0.3205, "step": 21262 }, { "epoch": 0.9336860708914674, "grad_norm": 1.4921875, "learning_rate": 5.436760767586618e-07, "loss": 0.3173, "step": 21264 }, { "epoch": 0.9337738893706709, "grad_norm": 1.4921875, "learning_rate": 5.422420545515949e-07, "loss": 0.2962, "step": 21266 }, { "epoch": 0.9338617078498743, "grad_norm": 1.421875, "learning_rate": 5.408099053254929e-07, "loss": 0.3042, "step": 21268 }, { "epoch": 0.9339495263290778, "grad_norm": 1.4140625, "learning_rate": 5.393796291900316e-07, "loss": 0.3332, "step": 21270 }, { "epoch": 0.9340373448082813, "grad_norm": 1.5078125, "learning_rate": 5.379512262547431e-07, "loss": 0.3221, "step": 21272 }, { "epoch": 0.9341251632874847, "grad_norm": 1.4609375, "learning_rate": 5.365246966290094e-07, "loss": 0.3113, "step": 21274 }, { "epoch": 0.9342129817666882, "grad_norm": 1.5546875, "learning_rate": 5.351000404220846e-07, "loss": 0.2909, "step": 21276 }, { "epoch": 0.9343008002458918, "grad_norm": 1.453125, "learning_rate": 5.336772577430593e-07, "loss": 0.2881, "step": 21278 }, { "epoch": 0.9343886187250953, "grad_norm": 1.4453125, "learning_rate": 5.322563487008964e-07, "loss": 0.3233, "step": 21280 }, { "epoch": 0.9344764372042987, "grad_norm": 1.40625, "learning_rate": 5.308373134044059e-07, "loss": 0.297, "step": 21282 }, { "epoch": 0.9345642556835022, "grad_norm": 1.421875, "learning_rate": 5.294201519622594e-07, "loss": 0.3024, "step": 21284 }, { "epoch": 0.9346520741627057, "grad_norm": 1.4296875, "learning_rate": 5.280048644829866e-07, "loss": 0.2991, "step": 21286 }, { "epoch": 0.9347398926419092, "grad_norm": 1.453125, "learning_rate": 5.265914510749676e-07, "loss": 0.325, "step": 21288 }, { "epoch": 0.9348277111211126, "grad_norm": 1.421875, "learning_rate": 5.251799118464407e-07, "loss": 0.311, "step": 21290 }, { "epoch": 0.9349155296003161, "grad_norm": 1.5, "learning_rate": 5.237702469055028e-07, "loss": 0.3254, "step": 21292 }, { "epoch": 0.9350033480795197, "grad_norm": 1.4296875, "learning_rate": 5.223624563601065e-07, "loss": 0.3102, "step": 21294 }, { "epoch": 0.9350911665587232, "grad_norm": 1.5234375, "learning_rate": 5.209565403180627e-07, "loss": 0.3131, "step": 21296 }, { "epoch": 0.9351789850379266, "grad_norm": 1.46875, "learning_rate": 5.195524988870326e-07, "loss": 0.3322, "step": 21298 }, { "epoch": 0.9352668035171301, "grad_norm": 1.625, "learning_rate": 5.18150332174544e-07, "loss": 0.3213, "step": 21300 }, { "epoch": 0.9353546219963336, "grad_norm": 1.4765625, "learning_rate": 5.167500402879665e-07, "loss": 0.2772, "step": 21302 }, { "epoch": 0.935442440475537, "grad_norm": 1.421875, "learning_rate": 5.153516233345451e-07, "loss": 0.3016, "step": 21304 }, { "epoch": 0.9355302589547405, "grad_norm": 1.4453125, "learning_rate": 5.139550814213634e-07, "loss": 0.2926, "step": 21306 }, { "epoch": 0.935618077433944, "grad_norm": 1.5546875, "learning_rate": 5.125604146553692e-07, "loss": 0.3425, "step": 21308 }, { "epoch": 0.9357058959131476, "grad_norm": 1.4921875, "learning_rate": 5.111676231433715e-07, "loss": 0.3165, "step": 21310 }, { "epoch": 0.935793714392351, "grad_norm": 1.4140625, "learning_rate": 5.097767069920267e-07, "loss": 0.313, "step": 21312 }, { "epoch": 0.9358815328715545, "grad_norm": 1.4375, "learning_rate": 5.083876663078523e-07, "loss": 0.3176, "step": 21314 }, { "epoch": 0.935969351350758, "grad_norm": 1.4140625, "learning_rate": 5.070005011972218e-07, "loss": 0.3126, "step": 21316 }, { "epoch": 0.9360571698299615, "grad_norm": 1.46875, "learning_rate": 5.056152117663665e-07, "loss": 0.3096, "step": 21318 }, { "epoch": 0.9361449883091649, "grad_norm": 1.4609375, "learning_rate": 5.042317981213684e-07, "loss": 0.3051, "step": 21320 }, { "epoch": 0.9362328067883684, "grad_norm": 1.46875, "learning_rate": 5.028502603681678e-07, "loss": 0.3365, "step": 21322 }, { "epoch": 0.936320625267572, "grad_norm": 1.546875, "learning_rate": 5.01470598612569e-07, "loss": 0.3218, "step": 21324 }, { "epoch": 0.9364084437467755, "grad_norm": 1.578125, "learning_rate": 5.000928129602234e-07, "loss": 0.2954, "step": 21326 }, { "epoch": 0.9364962622259789, "grad_norm": 1.6875, "learning_rate": 4.987169035166467e-07, "loss": 0.3334, "step": 21328 }, { "epoch": 0.9365840807051824, "grad_norm": 1.5, "learning_rate": 4.973428703872018e-07, "loss": 0.3243, "step": 21330 }, { "epoch": 0.9366718991843859, "grad_norm": 1.453125, "learning_rate": 4.959707136771103e-07, "loss": 0.297, "step": 21332 }, { "epoch": 0.9367597176635893, "grad_norm": 1.5546875, "learning_rate": 4.9460043349146e-07, "loss": 0.3222, "step": 21334 }, { "epoch": 0.9368475361427928, "grad_norm": 1.5546875, "learning_rate": 4.932320299351784e-07, "loss": 0.3216, "step": 21336 }, { "epoch": 0.9369353546219963, "grad_norm": 1.4921875, "learning_rate": 4.918655031130648e-07, "loss": 0.3081, "step": 21338 }, { "epoch": 0.9370231731011999, "grad_norm": 1.4296875, "learning_rate": 4.905008531297661e-07, "loss": 0.3371, "step": 21340 }, { "epoch": 0.9371109915804033, "grad_norm": 1.53125, "learning_rate": 4.891380800897877e-07, "loss": 0.3587, "step": 21342 }, { "epoch": 0.9371988100596068, "grad_norm": 1.3984375, "learning_rate": 4.877771840974904e-07, "loss": 0.2993, "step": 21344 }, { "epoch": 0.9372866285388103, "grad_norm": 1.546875, "learning_rate": 4.86418165257091e-07, "loss": 0.3142, "step": 21346 }, { "epoch": 0.9373744470180138, "grad_norm": 1.4140625, "learning_rate": 4.850610236726672e-07, "loss": 0.3118, "step": 21348 }, { "epoch": 0.9374622654972172, "grad_norm": 1.484375, "learning_rate": 4.83705759448147e-07, "loss": 0.2905, "step": 21350 }, { "epoch": 0.9375500839764207, "grad_norm": 1.5390625, "learning_rate": 4.823523726873169e-07, "loss": 0.3095, "step": 21352 }, { "epoch": 0.9376379024556242, "grad_norm": 1.46875, "learning_rate": 4.810008634938163e-07, "loss": 0.3168, "step": 21354 }, { "epoch": 0.9377257209348278, "grad_norm": 1.4453125, "learning_rate": 4.796512319711482e-07, "loss": 0.3462, "step": 21356 }, { "epoch": 0.9378135394140312, "grad_norm": 1.515625, "learning_rate": 4.783034782226691e-07, "loss": 0.3285, "step": 21358 }, { "epoch": 0.9379013578932347, "grad_norm": 1.421875, "learning_rate": 4.769576023515854e-07, "loss": 0.2981, "step": 21360 }, { "epoch": 0.9379891763724382, "grad_norm": 1.421875, "learning_rate": 4.7561360446096714e-07, "loss": 0.3216, "step": 21362 }, { "epoch": 0.9380769948516416, "grad_norm": 1.421875, "learning_rate": 4.742714846537377e-07, "loss": 0.298, "step": 21364 }, { "epoch": 0.9381648133308451, "grad_norm": 1.515625, "learning_rate": 4.729312430326788e-07, "loss": 0.3255, "step": 21366 }, { "epoch": 0.9382526318100486, "grad_norm": 1.5546875, "learning_rate": 4.7159287970042485e-07, "loss": 0.3263, "step": 21368 }, { "epoch": 0.9383404502892522, "grad_norm": 1.46875, "learning_rate": 4.702563947594663e-07, "loss": 0.3215, "step": 21370 }, { "epoch": 0.9384282687684556, "grad_norm": 1.4375, "learning_rate": 4.6892178831215437e-07, "loss": 0.3175, "step": 21372 }, { "epoch": 0.9385160872476591, "grad_norm": 1.40625, "learning_rate": 4.6758906046069084e-07, "loss": 0.3307, "step": 21374 }, { "epoch": 0.9386039057268626, "grad_norm": 1.4609375, "learning_rate": 4.662582113071412e-07, "loss": 0.313, "step": 21376 }, { "epoch": 0.9386917242060661, "grad_norm": 1.46875, "learning_rate": 4.649292409534184e-07, "loss": 0.2962, "step": 21378 }, { "epoch": 0.9387795426852695, "grad_norm": 1.5, "learning_rate": 4.6360214950129375e-07, "loss": 0.3318, "step": 21380 }, { "epoch": 0.938867361164473, "grad_norm": 1.4765625, "learning_rate": 4.6227693705239993e-07, "loss": 0.2868, "step": 21382 }, { "epoch": 0.9389551796436765, "grad_norm": 1.515625, "learning_rate": 4.6095360370821685e-07, "loss": 0.3004, "step": 21384 }, { "epoch": 0.9390429981228801, "grad_norm": 1.53125, "learning_rate": 4.5963214957009394e-07, "loss": 0.3312, "step": 21386 }, { "epoch": 0.9391308166020835, "grad_norm": 1.3984375, "learning_rate": 4.5831257473921973e-07, "loss": 0.3139, "step": 21388 }, { "epoch": 0.939218635081287, "grad_norm": 1.4765625, "learning_rate": 4.569948793166551e-07, "loss": 0.2907, "step": 21390 }, { "epoch": 0.9393064535604905, "grad_norm": 1.4765625, "learning_rate": 4.5567906340330537e-07, "loss": 0.3269, "step": 21392 }, { "epoch": 0.939394272039694, "grad_norm": 1.515625, "learning_rate": 4.543651270999344e-07, "loss": 0.3064, "step": 21394 }, { "epoch": 0.9394820905188974, "grad_norm": 1.4765625, "learning_rate": 4.530530705071673e-07, "loss": 0.3008, "step": 21396 }, { "epoch": 0.9395699089981009, "grad_norm": 1.375, "learning_rate": 4.5174289372547917e-07, "loss": 0.3103, "step": 21398 }, { "epoch": 0.9396577274773044, "grad_norm": 1.421875, "learning_rate": 4.504345968552065e-07, "loss": 0.2845, "step": 21400 }, { "epoch": 0.9397455459565079, "grad_norm": 1.3828125, "learning_rate": 4.4912817999653576e-07, "loss": 0.3474, "step": 21402 }, { "epoch": 0.9398333644357114, "grad_norm": 1.484375, "learning_rate": 4.4782364324951476e-07, "loss": 0.2995, "step": 21404 }, { "epoch": 0.9399211829149149, "grad_norm": 1.5234375, "learning_rate": 4.4652098671404685e-07, "loss": 0.2957, "step": 21406 }, { "epoch": 0.9400090013941184, "grad_norm": 1.46875, "learning_rate": 4.452202104898828e-07, "loss": 0.305, "step": 21408 }, { "epoch": 0.9400968198733218, "grad_norm": 1.453125, "learning_rate": 4.4392131467664853e-07, "loss": 0.3167, "step": 21410 }, { "epoch": 0.9401846383525253, "grad_norm": 1.5234375, "learning_rate": 4.4262429937380057e-07, "loss": 0.3332, "step": 21412 }, { "epoch": 0.9402724568317288, "grad_norm": 1.46875, "learning_rate": 4.4132916468067333e-07, "loss": 0.2977, "step": 21414 }, { "epoch": 0.9403602753109322, "grad_norm": 1.4609375, "learning_rate": 4.400359106964458e-07, "loss": 0.3122, "step": 21416 }, { "epoch": 0.9404480937901358, "grad_norm": 1.484375, "learning_rate": 4.387445375201527e-07, "loss": 0.3073, "step": 21418 }, { "epoch": 0.9405359122693393, "grad_norm": 1.4765625, "learning_rate": 4.374550452506926e-07, "loss": 0.3325, "step": 21420 }, { "epoch": 0.9406237307485428, "grad_norm": 1.5, "learning_rate": 4.3616743398681157e-07, "loss": 0.283, "step": 21422 }, { "epoch": 0.9407115492277462, "grad_norm": 1.484375, "learning_rate": 4.348817038271197e-07, "loss": 0.3195, "step": 21424 }, { "epoch": 0.9407993677069497, "grad_norm": 1.453125, "learning_rate": 4.335978548700742e-07, "loss": 0.3162, "step": 21426 }, { "epoch": 0.9408871861861532, "grad_norm": 1.4375, "learning_rate": 4.323158872139937e-07, "loss": 0.2864, "step": 21428 }, { "epoch": 0.9409750046653567, "grad_norm": 1.453125, "learning_rate": 4.3103580095705256e-07, "loss": 0.3208, "step": 21430 }, { "epoch": 0.9410628231445602, "grad_norm": 1.421875, "learning_rate": 4.2975759619727775e-07, "loss": 0.3209, "step": 21432 }, { "epoch": 0.9411506416237637, "grad_norm": 1.40625, "learning_rate": 4.284812730325577e-07, "loss": 0.3148, "step": 21434 }, { "epoch": 0.9412384601029672, "grad_norm": 1.5234375, "learning_rate": 4.272068315606309e-07, "loss": 0.3439, "step": 21436 }, { "epoch": 0.9413262785821707, "grad_norm": 1.5, "learning_rate": 4.2593427187909705e-07, "loss": 0.2771, "step": 21438 }, { "epoch": 0.9414140970613741, "grad_norm": 1.46875, "learning_rate": 4.246635940854088e-07, "loss": 0.3518, "step": 21440 }, { "epoch": 0.9415019155405776, "grad_norm": 1.484375, "learning_rate": 4.233947982768716e-07, "loss": 0.3015, "step": 21442 }, { "epoch": 0.9415897340197811, "grad_norm": 1.46875, "learning_rate": 4.2212788455065213e-07, "loss": 0.3217, "step": 21444 }, { "epoch": 0.9416775524989845, "grad_norm": 1.4609375, "learning_rate": 4.2086285300377004e-07, "loss": 0.2966, "step": 21446 }, { "epoch": 0.9417653709781881, "grad_norm": 1.4375, "learning_rate": 4.195997037331034e-07, "loss": 0.3075, "step": 21448 }, { "epoch": 0.9418531894573916, "grad_norm": 1.3984375, "learning_rate": 4.183384368353832e-07, "loss": 0.3088, "step": 21450 }, { "epoch": 0.9419410079365951, "grad_norm": 1.46875, "learning_rate": 4.170790524071988e-07, "loss": 0.3209, "step": 21452 }, { "epoch": 0.9420288264157985, "grad_norm": 1.4140625, "learning_rate": 4.158215505449953e-07, "loss": 0.2918, "step": 21454 }, { "epoch": 0.942116644895002, "grad_norm": 1.484375, "learning_rate": 4.145659313450678e-07, "loss": 0.3079, "step": 21456 }, { "epoch": 0.9422044633742055, "grad_norm": 1.4921875, "learning_rate": 4.1331219490357563e-07, "loss": 0.298, "step": 21458 }, { "epoch": 0.942292281853409, "grad_norm": 1.5234375, "learning_rate": 4.12060341316528e-07, "loss": 0.31, "step": 21460 }, { "epoch": 0.9423801003326124, "grad_norm": 1.515625, "learning_rate": 4.1081037067979553e-07, "loss": 0.329, "step": 21462 }, { "epoch": 0.942467918811816, "grad_norm": 1.5, "learning_rate": 4.095622830891016e-07, "loss": 0.3058, "step": 21464 }, { "epoch": 0.9425557372910195, "grad_norm": 1.4609375, "learning_rate": 4.08316078640017e-07, "loss": 0.3173, "step": 21466 }, { "epoch": 0.942643555770223, "grad_norm": 1.5859375, "learning_rate": 4.070717574279875e-07, "loss": 0.339, "step": 21468 }, { "epoch": 0.9427313742494264, "grad_norm": 1.4375, "learning_rate": 4.058293195482954e-07, "loss": 0.3143, "step": 21470 }, { "epoch": 0.9428191927286299, "grad_norm": 1.5, "learning_rate": 4.045887650960922e-07, "loss": 0.3114, "step": 21472 }, { "epoch": 0.9429070112078334, "grad_norm": 1.4609375, "learning_rate": 4.0335009416637426e-07, "loss": 0.3175, "step": 21474 }, { "epoch": 0.9429948296870369, "grad_norm": 1.484375, "learning_rate": 4.021133068540045e-07, "loss": 0.312, "step": 21476 }, { "epoch": 0.9430826481662404, "grad_norm": 1.515625, "learning_rate": 4.008784032536933e-07, "loss": 0.3322, "step": 21478 }, { "epoch": 0.9431704666454439, "grad_norm": 1.4609375, "learning_rate": 3.9964538346000945e-07, "loss": 0.2827, "step": 21480 }, { "epoch": 0.9432582851246474, "grad_norm": 1.4375, "learning_rate": 3.9841424756738023e-07, "loss": 0.3179, "step": 21482 }, { "epoch": 0.9433461036038508, "grad_norm": 1.6171875, "learning_rate": 3.9718499567008573e-07, "loss": 0.3429, "step": 21484 }, { "epoch": 0.9434339220830543, "grad_norm": 1.46875, "learning_rate": 3.959576278622618e-07, "loss": 0.3006, "step": 21486 }, { "epoch": 0.9435217405622578, "grad_norm": 1.5234375, "learning_rate": 3.9473214423789983e-07, "loss": 0.3245, "step": 21488 }, { "epoch": 0.9436095590414613, "grad_norm": 1.4765625, "learning_rate": 3.9350854489084985e-07, "loss": 0.3254, "step": 21490 }, { "epoch": 0.9436973775206647, "grad_norm": 1.4609375, "learning_rate": 3.922868299148119e-07, "loss": 0.3228, "step": 21492 }, { "epoch": 0.9437851959998683, "grad_norm": 1.4921875, "learning_rate": 3.9106699940335004e-07, "loss": 0.3083, "step": 21494 }, { "epoch": 0.9438730144790718, "grad_norm": 1.4765625, "learning_rate": 3.898490534498755e-07, "loss": 0.2993, "step": 21496 }, { "epoch": 0.9439608329582753, "grad_norm": 1.453125, "learning_rate": 3.8863299214765834e-07, "loss": 0.3122, "step": 21498 }, { "epoch": 0.9440486514374787, "grad_norm": 1.4296875, "learning_rate": 3.874188155898295e-07, "loss": 0.3229, "step": 21500 }, { "epoch": 0.9441364699166822, "grad_norm": 1.5390625, "learning_rate": 3.862065238693674e-07, "loss": 0.3165, "step": 21502 }, { "epoch": 0.9442242883958857, "grad_norm": 1.4921875, "learning_rate": 3.8499611707910887e-07, "loss": 0.3188, "step": 21504 }, { "epoch": 0.9443121068750892, "grad_norm": 1.5234375, "learning_rate": 3.8378759531174924e-07, "loss": 0.2803, "step": 21506 }, { "epoch": 0.9443999253542926, "grad_norm": 1.421875, "learning_rate": 3.825809586598339e-07, "loss": 0.3319, "step": 21508 }, { "epoch": 0.9444877438334962, "grad_norm": 1.484375, "learning_rate": 3.8137620721577227e-07, "loss": 0.3344, "step": 21510 }, { "epoch": 0.9445755623126997, "grad_norm": 1.4609375, "learning_rate": 3.8017334107182113e-07, "loss": 0.2868, "step": 21512 }, { "epoch": 0.9446633807919032, "grad_norm": 1.4140625, "learning_rate": 3.789723603200984e-07, "loss": 0.2926, "step": 21514 }, { "epoch": 0.9447511992711066, "grad_norm": 1.40625, "learning_rate": 3.7777326505257503e-07, "loss": 0.2918, "step": 21516 }, { "epoch": 0.9448390177503101, "grad_norm": 1.546875, "learning_rate": 3.765760553610748e-07, "loss": 0.2923, "step": 21518 }, { "epoch": 0.9449268362295136, "grad_norm": 1.5390625, "learning_rate": 3.7538073133728256e-07, "loss": 0.3119, "step": 21520 }, { "epoch": 0.945014654708717, "grad_norm": 1.53125, "learning_rate": 3.7418729307273913e-07, "loss": 0.3021, "step": 21522 }, { "epoch": 0.9451024731879206, "grad_norm": 1.4921875, "learning_rate": 3.7299574065883527e-07, "loss": 0.3275, "step": 21524 }, { "epoch": 0.9451902916671241, "grad_norm": 1.4921875, "learning_rate": 3.718060741868229e-07, "loss": 0.3329, "step": 21526 }, { "epoch": 0.9452781101463276, "grad_norm": 1.5390625, "learning_rate": 3.706182937478014e-07, "loss": 0.3302, "step": 21528 }, { "epoch": 0.945365928625531, "grad_norm": 1.4765625, "learning_rate": 3.69432399432737e-07, "loss": 0.3176, "step": 21530 }, { "epoch": 0.9454537471047345, "grad_norm": 1.484375, "learning_rate": 3.6824839133244303e-07, "loss": 0.313, "step": 21532 }, { "epoch": 0.945541565583938, "grad_norm": 1.484375, "learning_rate": 3.6706626953759427e-07, "loss": 0.3476, "step": 21534 }, { "epoch": 0.9456293840631415, "grad_norm": 1.4296875, "learning_rate": 3.6588603413871267e-07, "loss": 0.321, "step": 21536 }, { "epoch": 0.9457172025423449, "grad_norm": 1.5234375, "learning_rate": 3.6470768522618713e-07, "loss": 0.3198, "step": 21538 }, { "epoch": 0.9458050210215485, "grad_norm": 1.4296875, "learning_rate": 3.6353122289025096e-07, "loss": 0.3369, "step": 21540 }, { "epoch": 0.945892839500752, "grad_norm": 1.53125, "learning_rate": 3.623566472209988e-07, "loss": 0.304, "step": 21542 }, { "epoch": 0.9459806579799555, "grad_norm": 1.4453125, "learning_rate": 3.6118395830838095e-07, "loss": 0.3101, "step": 21544 }, { "epoch": 0.9460684764591589, "grad_norm": 1.46875, "learning_rate": 3.6001315624220046e-07, "loss": 0.3317, "step": 21546 }, { "epoch": 0.9461562949383624, "grad_norm": 1.5078125, "learning_rate": 3.588442411121218e-07, "loss": 0.2873, "step": 21548 }, { "epoch": 0.9462441134175659, "grad_norm": 1.4296875, "learning_rate": 3.5767721300765666e-07, "loss": 0.3251, "step": 21550 }, { "epoch": 0.9463319318967693, "grad_norm": 1.5, "learning_rate": 3.5651207201817527e-07, "loss": 0.3384, "step": 21552 }, { "epoch": 0.9464197503759728, "grad_norm": 1.4296875, "learning_rate": 3.553488182329118e-07, "loss": 0.3002, "step": 21554 }, { "epoch": 0.9465075688551764, "grad_norm": 1.4921875, "learning_rate": 3.5418745174093936e-07, "loss": 0.3427, "step": 21556 }, { "epoch": 0.9465953873343799, "grad_norm": 1.546875, "learning_rate": 3.530279726312008e-07, "loss": 0.3356, "step": 21558 }, { "epoch": 0.9466832058135833, "grad_norm": 1.4296875, "learning_rate": 3.5187038099248893e-07, "loss": 0.2836, "step": 21560 }, { "epoch": 0.9467710242927868, "grad_norm": 1.4609375, "learning_rate": 3.5071467691345226e-07, "loss": 0.3062, "step": 21562 }, { "epoch": 0.9468588427719903, "grad_norm": 1.6328125, "learning_rate": 3.495608604825951e-07, "loss": 0.297, "step": 21564 }, { "epoch": 0.9469466612511938, "grad_norm": 1.4453125, "learning_rate": 3.4840893178827715e-07, "loss": 0.3281, "step": 21566 }, { "epoch": 0.9470344797303972, "grad_norm": 1.6015625, "learning_rate": 3.472588909187113e-07, "loss": 0.3245, "step": 21568 }, { "epoch": 0.9471222982096008, "grad_norm": 1.46875, "learning_rate": 3.461107379619688e-07, "loss": 0.3271, "step": 21570 }, { "epoch": 0.9472101166888043, "grad_norm": 1.453125, "learning_rate": 3.4496447300597647e-07, "loss": 0.297, "step": 21572 }, { "epoch": 0.9472979351680078, "grad_norm": 1.5, "learning_rate": 3.438200961385141e-07, "loss": 0.2927, "step": 21574 }, { "epoch": 0.9473857536472112, "grad_norm": 1.453125, "learning_rate": 3.426776074472199e-07, "loss": 0.3157, "step": 21576 }, { "epoch": 0.9474735721264147, "grad_norm": 1.5234375, "learning_rate": 3.41537007019585e-07, "loss": 0.3015, "step": 21578 }, { "epoch": 0.9475613906056182, "grad_norm": 1.5234375, "learning_rate": 3.4039829494295626e-07, "loss": 0.3131, "step": 21580 }, { "epoch": 0.9476492090848216, "grad_norm": 1.546875, "learning_rate": 3.392614713045389e-07, "loss": 0.3216, "step": 21582 }, { "epoch": 0.9477370275640251, "grad_norm": 1.4921875, "learning_rate": 3.381265361913882e-07, "loss": 0.3391, "step": 21584 }, { "epoch": 0.9478248460432287, "grad_norm": 1.5859375, "learning_rate": 3.369934896904209e-07, "loss": 0.2709, "step": 21586 }, { "epoch": 0.9479126645224322, "grad_norm": 1.5703125, "learning_rate": 3.3586233188840355e-07, "loss": 0.2907, "step": 21588 }, { "epoch": 0.9480004830016356, "grad_norm": 1.4375, "learning_rate": 3.3473306287196136e-07, "loss": 0.3163, "step": 21590 }, { "epoch": 0.9480883014808391, "grad_norm": 1.453125, "learning_rate": 3.336056827275752e-07, "loss": 0.2975, "step": 21592 }, { "epoch": 0.9481761199600426, "grad_norm": 1.6015625, "learning_rate": 3.3248019154157595e-07, "loss": 0.3388, "step": 21594 }, { "epoch": 0.9482639384392461, "grad_norm": 1.5390625, "learning_rate": 3.3135658940015857e-07, "loss": 0.2906, "step": 21596 }, { "epoch": 0.9483517569184495, "grad_norm": 1.453125, "learning_rate": 3.302348763893681e-07, "loss": 0.3181, "step": 21598 }, { "epoch": 0.948439575397653, "grad_norm": 1.4609375, "learning_rate": 3.2911505259510255e-07, "loss": 0.3341, "step": 21600 }, { "epoch": 0.9485273938768566, "grad_norm": 1.421875, "learning_rate": 3.2799711810312107e-07, "loss": 0.3411, "step": 21602 }, { "epoch": 0.9486152123560601, "grad_norm": 1.5625, "learning_rate": 3.268810729990329e-07, "loss": 0.3265, "step": 21604 }, { "epoch": 0.9487030308352635, "grad_norm": 1.453125, "learning_rate": 3.2576691736831144e-07, "loss": 0.3259, "step": 21606 }, { "epoch": 0.948790849314467, "grad_norm": 1.53125, "learning_rate": 3.246546512962689e-07, "loss": 0.3057, "step": 21608 }, { "epoch": 0.9488786677936705, "grad_norm": 1.4453125, "learning_rate": 3.2354427486809e-07, "loss": 0.3533, "step": 21610 }, { "epoch": 0.9489664862728739, "grad_norm": 1.5234375, "learning_rate": 3.224357881688067e-07, "loss": 0.3012, "step": 21612 }, { "epoch": 0.9490543047520774, "grad_norm": 1.453125, "learning_rate": 3.2132919128330664e-07, "loss": 0.3049, "step": 21614 }, { "epoch": 0.9491421232312809, "grad_norm": 1.46875, "learning_rate": 3.202244842963331e-07, "loss": 0.2996, "step": 21616 }, { "epoch": 0.9492299417104845, "grad_norm": 1.5703125, "learning_rate": 3.191216672924824e-07, "loss": 0.2839, "step": 21618 }, { "epoch": 0.9493177601896879, "grad_norm": 1.4375, "learning_rate": 3.180207403562119e-07, "loss": 0.3177, "step": 21620 }, { "epoch": 0.9494055786688914, "grad_norm": 1.53125, "learning_rate": 3.1692170357183193e-07, "loss": 0.3231, "step": 21622 }, { "epoch": 0.9494933971480949, "grad_norm": 1.421875, "learning_rate": 3.15824557023503e-07, "loss": 0.3116, "step": 21624 }, { "epoch": 0.9495812156272984, "grad_norm": 1.546875, "learning_rate": 3.1472930079524674e-07, "loss": 0.2935, "step": 21626 }, { "epoch": 0.9496690341065018, "grad_norm": 1.5234375, "learning_rate": 3.136359349709378e-07, "loss": 0.3204, "step": 21628 }, { "epoch": 0.9497568525857053, "grad_norm": 1.546875, "learning_rate": 3.1254445963430914e-07, "loss": 0.31, "step": 21630 }, { "epoch": 0.9498446710649089, "grad_norm": 1.5, "learning_rate": 3.114548748689411e-07, "loss": 0.3214, "step": 21632 }, { "epoch": 0.9499324895441124, "grad_norm": 1.4453125, "learning_rate": 3.1036718075827806e-07, "loss": 0.3303, "step": 21634 }, { "epoch": 0.9500203080233158, "grad_norm": 1.4453125, "learning_rate": 3.092813773856118e-07, "loss": 0.3243, "step": 21636 }, { "epoch": 0.9501081265025193, "grad_norm": 1.5078125, "learning_rate": 3.0819746483410075e-07, "loss": 0.317, "step": 21638 }, { "epoch": 0.9501959449817228, "grad_norm": 1.5078125, "learning_rate": 3.0711544318674514e-07, "loss": 0.3354, "step": 21640 }, { "epoch": 0.9502837634609262, "grad_norm": 1.46875, "learning_rate": 3.060353125264065e-07, "loss": 0.2811, "step": 21642 }, { "epoch": 0.9503715819401297, "grad_norm": 1.4375, "learning_rate": 3.049570729358076e-07, "loss": 0.3008, "step": 21644 }, { "epoch": 0.9504594004193332, "grad_norm": 1.453125, "learning_rate": 3.0388072449751e-07, "loss": 0.2968, "step": 21646 }, { "epoch": 0.9505472188985368, "grad_norm": 1.515625, "learning_rate": 3.028062672939508e-07, "loss": 0.3307, "step": 21648 }, { "epoch": 0.9506350373777402, "grad_norm": 1.484375, "learning_rate": 3.017337014074084e-07, "loss": 0.3145, "step": 21650 }, { "epoch": 0.9507228558569437, "grad_norm": 1.4296875, "learning_rate": 3.0066302692001724e-07, "loss": 0.3367, "step": 21652 }, { "epoch": 0.9508106743361472, "grad_norm": 1.5078125, "learning_rate": 2.995942439137728e-07, "loss": 0.3307, "step": 21654 }, { "epoch": 0.9508984928153507, "grad_norm": 1.5, "learning_rate": 2.9852735247052346e-07, "loss": 0.3209, "step": 21656 }, { "epoch": 0.9509863112945541, "grad_norm": 1.4609375, "learning_rate": 2.9746235267197053e-07, "loss": 0.3235, "step": 21658 }, { "epoch": 0.9510741297737576, "grad_norm": 1.421875, "learning_rate": 2.9639924459967105e-07, "loss": 0.3129, "step": 21660 }, { "epoch": 0.9511619482529611, "grad_norm": 1.4921875, "learning_rate": 2.9533802833504043e-07, "loss": 0.3338, "step": 21662 }, { "epoch": 0.9512497667321647, "grad_norm": 1.46875, "learning_rate": 2.94278703959347e-07, "loss": 0.3112, "step": 21664 }, { "epoch": 0.9513375852113681, "grad_norm": 1.4375, "learning_rate": 2.932212715537092e-07, "loss": 0.311, "step": 21666 }, { "epoch": 0.9514254036905716, "grad_norm": 1.3828125, "learning_rate": 2.9216573119911217e-07, "loss": 0.3196, "step": 21668 }, { "epoch": 0.9515132221697751, "grad_norm": 1.5078125, "learning_rate": 2.9111208297638303e-07, "loss": 0.2995, "step": 21670 }, { "epoch": 0.9516010406489785, "grad_norm": 1.5390625, "learning_rate": 2.9006032696621833e-07, "loss": 0.3024, "step": 21672 }, { "epoch": 0.951688859128182, "grad_norm": 1.5078125, "learning_rate": 2.890104632491536e-07, "loss": 0.3242, "step": 21674 }, { "epoch": 0.9517766776073855, "grad_norm": 1.4140625, "learning_rate": 2.879624919055912e-07, "loss": 0.3083, "step": 21676 }, { "epoch": 0.9518644960865891, "grad_norm": 1.640625, "learning_rate": 2.869164130157864e-07, "loss": 0.3083, "step": 21678 }, { "epoch": 0.9519523145657925, "grad_norm": 1.4609375, "learning_rate": 2.858722266598474e-07, "loss": 0.287, "step": 21680 }, { "epoch": 0.952040133044996, "grad_norm": 1.3984375, "learning_rate": 2.8482993291773506e-07, "loss": 0.3142, "step": 21682 }, { "epoch": 0.9521279515241995, "grad_norm": 1.5546875, "learning_rate": 2.8378953186927457e-07, "loss": 0.3063, "step": 21684 }, { "epoch": 0.952215770003403, "grad_norm": 1.4765625, "learning_rate": 2.827510235941355e-07, "loss": 0.3172, "step": 21686 }, { "epoch": 0.9523035884826064, "grad_norm": 1.484375, "learning_rate": 2.817144081718459e-07, "loss": 0.3375, "step": 21688 }, { "epoch": 0.9523914069618099, "grad_norm": 1.5078125, "learning_rate": 2.8067968568179505e-07, "loss": 0.283, "step": 21690 }, { "epoch": 0.9524792254410134, "grad_norm": 1.453125, "learning_rate": 2.7964685620321953e-07, "loss": 0.3213, "step": 21692 }, { "epoch": 0.952567043920217, "grad_norm": 1.5234375, "learning_rate": 2.786159198152116e-07, "loss": 0.3321, "step": 21694 }, { "epoch": 0.9526548623994204, "grad_norm": 1.46875, "learning_rate": 2.775868765967221e-07, "loss": 0.3309, "step": 21696 }, { "epoch": 0.9527426808786239, "grad_norm": 1.4296875, "learning_rate": 2.7655972662655736e-07, "loss": 0.301, "step": 21698 }, { "epoch": 0.9528304993578274, "grad_norm": 1.4296875, "learning_rate": 2.755344699833767e-07, "loss": 0.3061, "step": 21700 }, { "epoch": 0.9529183178370308, "grad_norm": 1.5390625, "learning_rate": 2.7451110674569237e-07, "loss": 0.3429, "step": 21702 }, { "epoch": 0.9530061363162343, "grad_norm": 1.4453125, "learning_rate": 2.7348963699187214e-07, "loss": 0.3205, "step": 21704 }, { "epoch": 0.9530939547954378, "grad_norm": 1.4921875, "learning_rate": 2.7247006080014513e-07, "loss": 0.324, "step": 21706 }, { "epoch": 0.9531817732746413, "grad_norm": 1.53125, "learning_rate": 2.714523782485878e-07, "loss": 0.3208, "step": 21708 }, { "epoch": 0.9532695917538448, "grad_norm": 1.4921875, "learning_rate": 2.7043658941513783e-07, "loss": 0.3202, "step": 21710 }, { "epoch": 0.9533574102330483, "grad_norm": 1.5390625, "learning_rate": 2.6942269437758015e-07, "loss": 0.2927, "step": 21712 }, { "epoch": 0.9534452287122518, "grad_norm": 1.4765625, "learning_rate": 2.6841069321355827e-07, "loss": 0.337, "step": 21714 }, { "epoch": 0.9535330471914553, "grad_norm": 1.46875, "learning_rate": 2.674005860005768e-07, "loss": 0.2998, "step": 21716 }, { "epoch": 0.9536208656706587, "grad_norm": 1.515625, "learning_rate": 2.6639237281598783e-07, "loss": 0.2922, "step": 21718 }, { "epoch": 0.9537086841498622, "grad_norm": 1.40625, "learning_rate": 2.6538605373699897e-07, "loss": 0.2955, "step": 21720 }, { "epoch": 0.9537965026290657, "grad_norm": 1.46875, "learning_rate": 2.6438162884067365e-07, "loss": 0.3054, "step": 21722 }, { "epoch": 0.9538843211082693, "grad_norm": 1.53125, "learning_rate": 2.6337909820393634e-07, "loss": 0.326, "step": 21724 }, { "epoch": 0.9539721395874727, "grad_norm": 1.546875, "learning_rate": 2.623784619035535e-07, "loss": 0.2911, "step": 21726 }, { "epoch": 0.9540599580666762, "grad_norm": 1.46875, "learning_rate": 2.613797200161611e-07, "loss": 0.3035, "step": 21728 }, { "epoch": 0.9541477765458797, "grad_norm": 1.4921875, "learning_rate": 2.6038287261823944e-07, "loss": 0.3146, "step": 21730 }, { "epoch": 0.9542355950250831, "grad_norm": 1.4375, "learning_rate": 2.593879197861249e-07, "loss": 0.298, "step": 21732 }, { "epoch": 0.9543234135042866, "grad_norm": 1.484375, "learning_rate": 2.5839486159601746e-07, "loss": 0.2863, "step": 21734 }, { "epoch": 0.9544112319834901, "grad_norm": 1.4609375, "learning_rate": 2.5740369812396193e-07, "loss": 0.2981, "step": 21736 }, { "epoch": 0.9544990504626936, "grad_norm": 1.4453125, "learning_rate": 2.5641442944586144e-07, "loss": 0.2854, "step": 21738 }, { "epoch": 0.9545868689418971, "grad_norm": 1.515625, "learning_rate": 2.554270556374777e-07, "loss": 0.2901, "step": 21740 }, { "epoch": 0.9546746874211006, "grad_norm": 1.453125, "learning_rate": 2.544415767744196e-07, "loss": 0.3035, "step": 21742 }, { "epoch": 0.9547625059003041, "grad_norm": 1.4765625, "learning_rate": 2.5345799293215734e-07, "loss": 0.3136, "step": 21744 }, { "epoch": 0.9548503243795076, "grad_norm": 1.546875, "learning_rate": 2.5247630418601673e-07, "loss": 0.3432, "step": 21746 }, { "epoch": 0.954938142858711, "grad_norm": 1.46875, "learning_rate": 2.5149651061117105e-07, "loss": 0.3208, "step": 21748 }, { "epoch": 0.9550259613379145, "grad_norm": 1.515625, "learning_rate": 2.505186122826547e-07, "loss": 0.3199, "step": 21750 }, { "epoch": 0.955113779817118, "grad_norm": 1.53125, "learning_rate": 2.495426092753578e-07, "loss": 0.32, "step": 21752 }, { "epoch": 0.9552015982963215, "grad_norm": 1.4921875, "learning_rate": 2.485685016640177e-07, "loss": 0.3055, "step": 21754 }, { "epoch": 0.955289416775525, "grad_norm": 1.421875, "learning_rate": 2.4759628952323867e-07, "loss": 0.2988, "step": 21756 }, { "epoch": 0.9553772352547285, "grad_norm": 1.484375, "learning_rate": 2.466259729274667e-07, "loss": 0.3319, "step": 21758 }, { "epoch": 0.955465053733932, "grad_norm": 1.4375, "learning_rate": 2.456575519510118e-07, "loss": 0.3322, "step": 21760 }, { "epoch": 0.9555528722131355, "grad_norm": 1.421875, "learning_rate": 2.446910266680369e-07, "loss": 0.3233, "step": 21762 }, { "epoch": 0.9556406906923389, "grad_norm": 1.421875, "learning_rate": 2.4372639715255776e-07, "loss": 0.3157, "step": 21764 }, { "epoch": 0.9557285091715424, "grad_norm": 1.5625, "learning_rate": 2.4276366347844305e-07, "loss": 0.2984, "step": 21766 }, { "epoch": 0.9558163276507459, "grad_norm": 1.484375, "learning_rate": 2.4180282571942546e-07, "loss": 0.3368, "step": 21768 }, { "epoch": 0.9559041461299493, "grad_norm": 1.4296875, "learning_rate": 2.4084388394907954e-07, "loss": 0.3091, "step": 21770 }, { "epoch": 0.9559919646091529, "grad_norm": 1.6796875, "learning_rate": 2.398868382408437e-07, "loss": 0.3215, "step": 21772 }, { "epoch": 0.9560797830883564, "grad_norm": 1.484375, "learning_rate": 2.389316886680121e-07, "loss": 0.315, "step": 21774 }, { "epoch": 0.9561676015675599, "grad_norm": 1.4609375, "learning_rate": 2.3797843530372344e-07, "loss": 0.3483, "step": 21776 }, { "epoch": 0.9562554200467633, "grad_norm": 1.5, "learning_rate": 2.370270782209888e-07, "loss": 0.3088, "step": 21778 }, { "epoch": 0.9563432385259668, "grad_norm": 1.4765625, "learning_rate": 2.360776174926499e-07, "loss": 0.3143, "step": 21780 }, { "epoch": 0.9564310570051703, "grad_norm": 1.4375, "learning_rate": 2.3513005319142634e-07, "loss": 0.3293, "step": 21782 }, { "epoch": 0.9565188754843738, "grad_norm": 1.4765625, "learning_rate": 2.3418438538987952e-07, "loss": 0.3208, "step": 21784 }, { "epoch": 0.9566066939635773, "grad_norm": 1.4609375, "learning_rate": 2.332406141604293e-07, "loss": 0.3239, "step": 21786 }, { "epoch": 0.9566945124427808, "grad_norm": 1.546875, "learning_rate": 2.3229873957534841e-07, "loss": 0.3204, "step": 21788 }, { "epoch": 0.9567823309219843, "grad_norm": 1.46875, "learning_rate": 2.3135876170676806e-07, "loss": 0.3359, "step": 21790 }, { "epoch": 0.9568701494011878, "grad_norm": 1.5390625, "learning_rate": 2.3042068062667232e-07, "loss": 0.3261, "step": 21792 }, { "epoch": 0.9569579678803912, "grad_norm": 1.53125, "learning_rate": 2.294844964068954e-07, "loss": 0.3228, "step": 21794 }, { "epoch": 0.9570457863595947, "grad_norm": 1.484375, "learning_rate": 2.2855020911913826e-07, "loss": 0.3152, "step": 21796 }, { "epoch": 0.9571336048387982, "grad_norm": 1.4921875, "learning_rate": 2.2761781883494094e-07, "loss": 0.316, "step": 21798 }, { "epoch": 0.9572214233180016, "grad_norm": 1.46875, "learning_rate": 2.2668732562571016e-07, "loss": 0.283, "step": 21800 }, { "epoch": 0.9573092417972052, "grad_norm": 1.484375, "learning_rate": 2.2575872956270283e-07, "loss": 0.3198, "step": 21802 }, { "epoch": 0.9573970602764087, "grad_norm": 1.453125, "learning_rate": 2.248320307170315e-07, "loss": 0.3325, "step": 21804 }, { "epoch": 0.9574848787556122, "grad_norm": 1.4296875, "learning_rate": 2.2390722915966167e-07, "loss": 0.3085, "step": 21806 }, { "epoch": 0.9575726972348156, "grad_norm": 1.53125, "learning_rate": 2.2298432496141441e-07, "loss": 0.346, "step": 21808 }, { "epoch": 0.9576605157140191, "grad_norm": 1.5, "learning_rate": 2.2206331819296934e-07, "loss": 0.3222, "step": 21810 }, { "epoch": 0.9577483341932226, "grad_norm": 1.46875, "learning_rate": 2.2114420892485333e-07, "loss": 0.3223, "step": 21812 }, { "epoch": 0.957836152672426, "grad_norm": 1.515625, "learning_rate": 2.2022699722745454e-07, "loss": 0.3287, "step": 21814 }, { "epoch": 0.9579239711516295, "grad_norm": 1.515625, "learning_rate": 2.1931168317101125e-07, "loss": 0.3536, "step": 21816 }, { "epoch": 0.9580117896308331, "grad_norm": 1.4140625, "learning_rate": 2.1839826682562015e-07, "loss": 0.3204, "step": 21818 }, { "epoch": 0.9580996081100366, "grad_norm": 1.5, "learning_rate": 2.1748674826123084e-07, "loss": 0.3263, "step": 21820 }, { "epoch": 0.95818742658924, "grad_norm": 1.546875, "learning_rate": 2.165771275476458e-07, "loss": 0.3258, "step": 21822 }, { "epoch": 0.9582752450684435, "grad_norm": 1.5078125, "learning_rate": 2.1566940475452602e-07, "loss": 0.3304, "step": 21824 }, { "epoch": 0.958363063547647, "grad_norm": 1.3984375, "learning_rate": 2.147635799513853e-07, "loss": 0.304, "step": 21826 }, { "epoch": 0.9584508820268505, "grad_norm": 1.453125, "learning_rate": 2.1385965320759038e-07, "loss": 0.3063, "step": 21828 }, { "epoch": 0.9585387005060539, "grad_norm": 1.5, "learning_rate": 2.1295762459236368e-07, "loss": 0.3192, "step": 21830 }, { "epoch": 0.9586265189852575, "grad_norm": 1.4453125, "learning_rate": 2.1205749417478604e-07, "loss": 0.3364, "step": 21832 }, { "epoch": 0.958714337464461, "grad_norm": 1.453125, "learning_rate": 2.1115926202378565e-07, "loss": 0.3031, "step": 21834 }, { "epoch": 0.9588021559436645, "grad_norm": 1.546875, "learning_rate": 2.1026292820815195e-07, "loss": 0.3354, "step": 21836 }, { "epoch": 0.9588899744228679, "grad_norm": 1.453125, "learning_rate": 2.0936849279652727e-07, "loss": 0.2824, "step": 21838 }, { "epoch": 0.9589777929020714, "grad_norm": 1.4375, "learning_rate": 2.0847595585740676e-07, "loss": 0.3315, "step": 21840 }, { "epoch": 0.9590656113812749, "grad_norm": 1.5078125, "learning_rate": 2.075853174591358e-07, "loss": 0.288, "step": 21842 }, { "epoch": 0.9591534298604784, "grad_norm": 1.4765625, "learning_rate": 2.0669657766992923e-07, "loss": 0.317, "step": 21844 }, { "epoch": 0.9592412483396818, "grad_norm": 1.3984375, "learning_rate": 2.058097365578382e-07, "loss": 0.2892, "step": 21846 }, { "epoch": 0.9593290668188854, "grad_norm": 1.4765625, "learning_rate": 2.0492479419078336e-07, "loss": 0.3132, "step": 21848 }, { "epoch": 0.9594168852980889, "grad_norm": 1.5390625, "learning_rate": 2.0404175063653275e-07, "loss": 0.3096, "step": 21850 }, { "epoch": 0.9595047037772924, "grad_norm": 1.4453125, "learning_rate": 2.0316060596270726e-07, "loss": 0.3124, "step": 21852 }, { "epoch": 0.9595925222564958, "grad_norm": 1.5859375, "learning_rate": 2.0228136023678623e-07, "loss": 0.3051, "step": 21854 }, { "epoch": 0.9596803407356993, "grad_norm": 1.4765625, "learning_rate": 2.0140401352610195e-07, "loss": 0.2949, "step": 21856 }, { "epoch": 0.9597681592149028, "grad_norm": 1.46875, "learning_rate": 2.0052856589784507e-07, "loss": 0.3101, "step": 21858 }, { "epoch": 0.9598559776941062, "grad_norm": 1.4453125, "learning_rate": 1.9965501741905645e-07, "loss": 0.302, "step": 21860 }, { "epoch": 0.9599437961733097, "grad_norm": 1.5078125, "learning_rate": 1.9878336815662978e-07, "loss": 0.2926, "step": 21862 }, { "epoch": 0.9600316146525133, "grad_norm": 1.5078125, "learning_rate": 1.9791361817732002e-07, "loss": 0.3298, "step": 21864 }, { "epoch": 0.9601194331317168, "grad_norm": 1.578125, "learning_rate": 1.9704576754772663e-07, "loss": 0.3247, "step": 21866 }, { "epoch": 0.9602072516109202, "grad_norm": 1.5546875, "learning_rate": 1.961798163343187e-07, "loss": 0.2966, "step": 21868 }, { "epoch": 0.9602950700901237, "grad_norm": 1.515625, "learning_rate": 1.953157646034043e-07, "loss": 0.3175, "step": 21870 }, { "epoch": 0.9603828885693272, "grad_norm": 1.453125, "learning_rate": 1.9445361242115545e-07, "loss": 0.3292, "step": 21872 }, { "epoch": 0.9604707070485307, "grad_norm": 1.5078125, "learning_rate": 1.935933598535944e-07, "loss": 0.3042, "step": 21874 }, { "epoch": 0.9605585255277341, "grad_norm": 1.4296875, "learning_rate": 1.9273500696659896e-07, "loss": 0.3043, "step": 21876 }, { "epoch": 0.9606463440069377, "grad_norm": 1.4765625, "learning_rate": 1.9187855382590547e-07, "loss": 0.3103, "step": 21878 }, { "epoch": 0.9607341624861412, "grad_norm": 1.4375, "learning_rate": 1.9102400049710035e-07, "loss": 0.3412, "step": 21880 }, { "epoch": 0.9608219809653447, "grad_norm": 1.4765625, "learning_rate": 1.901713470456229e-07, "loss": 0.3092, "step": 21882 }, { "epoch": 0.9609097994445481, "grad_norm": 1.5859375, "learning_rate": 1.893205935367709e-07, "loss": 0.3328, "step": 21884 }, { "epoch": 0.9609976179237516, "grad_norm": 1.4140625, "learning_rate": 1.88471740035695e-07, "loss": 0.3059, "step": 21886 }, { "epoch": 0.9610854364029551, "grad_norm": 1.5078125, "learning_rate": 1.8762478660740156e-07, "loss": 0.3126, "step": 21888 }, { "epoch": 0.9611732548821585, "grad_norm": 1.4140625, "learning_rate": 1.8677973331674982e-07, "loss": 0.3218, "step": 21890 }, { "epoch": 0.961261073361362, "grad_norm": 1.3984375, "learning_rate": 1.8593658022845462e-07, "loss": 0.2991, "step": 21892 }, { "epoch": 0.9613488918405656, "grad_norm": 1.453125, "learning_rate": 1.8509532740708102e-07, "loss": 0.3121, "step": 21894 }, { "epoch": 0.9614367103197691, "grad_norm": 1.5078125, "learning_rate": 1.842559749170608e-07, "loss": 0.2948, "step": 21896 }, { "epoch": 0.9615245287989725, "grad_norm": 1.5, "learning_rate": 1.834185228226648e-07, "loss": 0.3117, "step": 21898 }, { "epoch": 0.961612347278176, "grad_norm": 1.5, "learning_rate": 1.8258297118802502e-07, "loss": 0.3318, "step": 21900 }, { "epoch": 0.9617001657573795, "grad_norm": 1.453125, "learning_rate": 1.8174932007713476e-07, "loss": 0.3209, "step": 21902 }, { "epoch": 0.961787984236583, "grad_norm": 1.515625, "learning_rate": 1.80917569553829e-07, "loss": 0.3037, "step": 21904 }, { "epoch": 0.9618758027157864, "grad_norm": 1.4296875, "learning_rate": 1.8008771968180403e-07, "loss": 0.2937, "step": 21906 }, { "epoch": 0.9619636211949899, "grad_norm": 1.5390625, "learning_rate": 1.7925977052461186e-07, "loss": 0.3217, "step": 21908 }, { "epoch": 0.9620514396741935, "grad_norm": 1.4921875, "learning_rate": 1.7843372214565723e-07, "loss": 0.3387, "step": 21910 }, { "epoch": 0.962139258153397, "grad_norm": 1.46875, "learning_rate": 1.7760957460819793e-07, "loss": 0.3066, "step": 21912 }, { "epoch": 0.9622270766326004, "grad_norm": 1.5078125, "learning_rate": 1.7678732797534735e-07, "loss": 0.3127, "step": 21914 }, { "epoch": 0.9623148951118039, "grad_norm": 1.515625, "learning_rate": 1.7596698231007459e-07, "loss": 0.3274, "step": 21916 }, { "epoch": 0.9624027135910074, "grad_norm": 1.4765625, "learning_rate": 1.7514853767519878e-07, "loss": 0.3352, "step": 21918 }, { "epoch": 0.9624905320702108, "grad_norm": 1.5, "learning_rate": 1.7433199413340317e-07, "loss": 0.2812, "step": 21920 }, { "epoch": 0.9625783505494143, "grad_norm": 1.4140625, "learning_rate": 1.7351735174721274e-07, "loss": 0.286, "step": 21922 }, { "epoch": 0.9626661690286179, "grad_norm": 1.4375, "learning_rate": 1.7270461057901367e-07, "loss": 0.3296, "step": 21924 }, { "epoch": 0.9627539875078214, "grad_norm": 1.4140625, "learning_rate": 1.7189377069104784e-07, "loss": 0.306, "step": 21926 }, { "epoch": 0.9628418059870248, "grad_norm": 1.4765625, "learning_rate": 1.7108483214540726e-07, "loss": 0.3067, "step": 21928 }, { "epoch": 0.9629296244662283, "grad_norm": 1.46875, "learning_rate": 1.702777950040424e-07, "loss": 0.3172, "step": 21930 }, { "epoch": 0.9630174429454318, "grad_norm": 1.5390625, "learning_rate": 1.6947265932875655e-07, "loss": 0.3347, "step": 21932 }, { "epoch": 0.9631052614246353, "grad_norm": 1.4375, "learning_rate": 1.6866942518120877e-07, "loss": 0.2929, "step": 21934 }, { "epoch": 0.9631930799038387, "grad_norm": 1.3984375, "learning_rate": 1.6786809262290816e-07, "loss": 0.3185, "step": 21936 }, { "epoch": 0.9632808983830422, "grad_norm": 1.4609375, "learning_rate": 1.6706866171521952e-07, "loss": 0.3116, "step": 21938 }, { "epoch": 0.9633687168622458, "grad_norm": 1.484375, "learning_rate": 1.662711325193689e-07, "loss": 0.307, "step": 21940 }, { "epoch": 0.9634565353414493, "grad_norm": 1.515625, "learning_rate": 1.6547550509642406e-07, "loss": 0.3371, "step": 21942 }, { "epoch": 0.9635443538206527, "grad_norm": 1.421875, "learning_rate": 1.6468177950731967e-07, "loss": 0.3152, "step": 21944 }, { "epoch": 0.9636321722998562, "grad_norm": 1.421875, "learning_rate": 1.638899558128404e-07, "loss": 0.317, "step": 21946 }, { "epoch": 0.9637199907790597, "grad_norm": 1.4140625, "learning_rate": 1.631000340736183e-07, "loss": 0.3107, "step": 21948 }, { "epoch": 0.9638078092582631, "grad_norm": 1.484375, "learning_rate": 1.623120143501522e-07, "loss": 0.3131, "step": 21950 }, { "epoch": 0.9638956277374666, "grad_norm": 1.46875, "learning_rate": 1.6152589670278552e-07, "loss": 0.3149, "step": 21952 }, { "epoch": 0.9639834462166701, "grad_norm": 1.5234375, "learning_rate": 1.6074168119172006e-07, "loss": 0.3243, "step": 21954 }, { "epoch": 0.9640712646958737, "grad_norm": 1.3984375, "learning_rate": 1.5995936787700782e-07, "loss": 0.3016, "step": 21956 }, { "epoch": 0.9641590831750771, "grad_norm": 1.4140625, "learning_rate": 1.5917895681856475e-07, "loss": 0.3022, "step": 21958 }, { "epoch": 0.9642469016542806, "grad_norm": 1.4921875, "learning_rate": 1.5840044807615138e-07, "loss": 0.3238, "step": 21960 }, { "epoch": 0.9643347201334841, "grad_norm": 1.453125, "learning_rate": 1.5762384170938947e-07, "loss": 0.3044, "step": 21962 }, { "epoch": 0.9644225386126876, "grad_norm": 1.4765625, "learning_rate": 1.5684913777774536e-07, "loss": 0.3587, "step": 21964 }, { "epoch": 0.964510357091891, "grad_norm": 1.46875, "learning_rate": 1.560763363405493e-07, "loss": 0.306, "step": 21966 }, { "epoch": 0.9645981755710945, "grad_norm": 1.421875, "learning_rate": 1.5530543745698457e-07, "loss": 0.3222, "step": 21968 }, { "epoch": 0.964685994050298, "grad_norm": 1.59375, "learning_rate": 1.5453644118608447e-07, "loss": 0.3017, "step": 21970 }, { "epoch": 0.9647738125295016, "grad_norm": 1.546875, "learning_rate": 1.5376934758674077e-07, "loss": 0.3307, "step": 21972 }, { "epoch": 0.964861631008705, "grad_norm": 1.453125, "learning_rate": 1.530041567176954e-07, "loss": 0.3025, "step": 21974 }, { "epoch": 0.9649494494879085, "grad_norm": 1.4609375, "learning_rate": 1.5224086863754594e-07, "loss": 0.3043, "step": 21976 }, { "epoch": 0.965037267967112, "grad_norm": 1.421875, "learning_rate": 1.5147948340475115e-07, "loss": 0.3172, "step": 21978 }, { "epoch": 0.9651250864463154, "grad_norm": 1.484375, "learning_rate": 1.5072000107761164e-07, "loss": 0.3221, "step": 21980 }, { "epoch": 0.9652129049255189, "grad_norm": 1.515625, "learning_rate": 1.4996242171429197e-07, "loss": 0.3245, "step": 21982 }, { "epoch": 0.9653007234047224, "grad_norm": 1.3984375, "learning_rate": 1.4920674537280688e-07, "loss": 0.3153, "step": 21984 }, { "epoch": 0.965388541883926, "grad_norm": 1.4609375, "learning_rate": 1.4845297211102393e-07, "loss": 0.3186, "step": 21986 }, { "epoch": 0.9654763603631294, "grad_norm": 1.328125, "learning_rate": 1.4770110198667197e-07, "loss": 0.3236, "step": 21988 }, { "epoch": 0.9655641788423329, "grad_norm": 1.5703125, "learning_rate": 1.4695113505732715e-07, "loss": 0.3182, "step": 21990 }, { "epoch": 0.9656519973215364, "grad_norm": 1.5, "learning_rate": 1.4620307138042412e-07, "loss": 0.303, "step": 21992 }, { "epoch": 0.9657398158007399, "grad_norm": 1.5546875, "learning_rate": 1.4545691101324476e-07, "loss": 0.337, "step": 21994 }, { "epoch": 0.9658276342799433, "grad_norm": 1.5625, "learning_rate": 1.447126540129351e-07, "loss": 0.3146, "step": 21996 }, { "epoch": 0.9659154527591468, "grad_norm": 1.4453125, "learning_rate": 1.439703004364884e-07, "loss": 0.3166, "step": 21998 }, { "epoch": 0.9660032712383503, "grad_norm": 1.421875, "learning_rate": 1.4322985034075366e-07, "loss": 0.2911, "step": 22000 } ], "logging_steps": 2, "max_steps": 22774, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "total_flos": 1.498954685106157e+20, "train_batch_size": 16, "trial_name": null, "trial_params": null }