{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999889013440473, "eval_steps": 500, "global_step": 45050, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0001109865595276412, "grad_norm": 3.869100332260132, "learning_rate": 1.9999999392116768e-05, "loss": 1.2, "step": 5 }, { "epoch": 0.0002219731190552824, "grad_norm": 4.66916561126709, "learning_rate": 1.9999997568467152e-05, "loss": 0.9035, "step": 10 }, { "epoch": 0.0003329596785829236, "grad_norm": 1.8067188262939453, "learning_rate": 1.9999994529051363e-05, "loss": 0.8638, "step": 15 }, { "epoch": 0.0004439462381105648, "grad_norm": 1.7449718713760376, "learning_rate": 1.999999027386978e-05, "loss": 0.7258, "step": 20 }, { "epoch": 0.000554932797638206, "grad_norm": 2.081190586090088, "learning_rate": 1.9999984802922915e-05, "loss": 0.7764, "step": 25 }, { "epoch": 0.0006659193571658472, "grad_norm": 4.897317886352539, "learning_rate": 1.9999978116211434e-05, "loss": 0.8836, "step": 30 }, { "epoch": 0.0007769059166934884, "grad_norm": 2.0032882690429688, "learning_rate": 1.9999970213736153e-05, "loss": 0.7637, "step": 35 }, { "epoch": 0.0008878924762211296, "grad_norm": 1.2999836206436157, "learning_rate": 1.999996109549803e-05, "loss": 0.6827, "step": 40 }, { "epoch": 0.0009988790357487707, "grad_norm": 2.4639241695404053, "learning_rate": 1.9999950761498176e-05, "loss": 0.8187, "step": 45 }, { "epoch": 0.001109865595276412, "grad_norm": 1.5862199068069458, "learning_rate": 1.9999939211737842e-05, "loss": 0.826, "step": 50 }, { "epoch": 0.0012208521548040532, "grad_norm": 2.156992197036743, "learning_rate": 1.999992644621844e-05, "loss": 0.6859, "step": 55 }, { "epoch": 0.0013318387143316944, "grad_norm": 1.9007676839828491, "learning_rate": 1.9999912464941515e-05, "loss": 0.8637, "step": 60 }, { "epoch": 0.0014428252738593356, "grad_norm": 1.9827724695205688, "learning_rate": 1.999989726790877e-05, "loss": 0.8192, "step": 65 }, { "epoch": 0.0015538118333869768, "grad_norm": 1.9432623386383057, "learning_rate": 1.999988085512205e-05, "loss": 0.7394, "step": 70 }, { "epoch": 0.001664798392914618, "grad_norm": 1.9758037328720093, "learning_rate": 1.9999863226583357e-05, "loss": 0.5363, "step": 75 }, { "epoch": 0.0017757849524422593, "grad_norm": 1.2889810800552368, "learning_rate": 1.9999844382294827e-05, "loss": 0.7151, "step": 80 }, { "epoch": 0.0018867715119699005, "grad_norm": 1.4423288106918335, "learning_rate": 1.9999824322258754e-05, "loss": 0.7741, "step": 85 }, { "epoch": 0.0019977580714975415, "grad_norm": 1.5920696258544922, "learning_rate": 1.9999803046477578e-05, "loss": 0.7286, "step": 90 }, { "epoch": 0.0021087446310251827, "grad_norm": 1.4489409923553467, "learning_rate": 1.9999780554953886e-05, "loss": 0.6861, "step": 95 }, { "epoch": 0.002219731190552824, "grad_norm": 1.9922000169754028, "learning_rate": 1.9999756847690408e-05, "loss": 0.6803, "step": 100 }, { "epoch": 0.002330717750080465, "grad_norm": 1.5229793787002563, "learning_rate": 1.9999731924690028e-05, "loss": 0.7924, "step": 105 }, { "epoch": 0.0024417043096081063, "grad_norm": 1.939287543296814, "learning_rate": 1.999970578595578e-05, "loss": 0.7265, "step": 110 }, { "epoch": 0.0025526908691357476, "grad_norm": 2.4952993392944336, "learning_rate": 1.999967843149084e-05, "loss": 0.66, "step": 115 }, { "epoch": 0.002663677428663389, "grad_norm": 1.9537633657455444, "learning_rate": 1.999964986129853e-05, "loss": 0.6382, "step": 120 }, { "epoch": 0.00277466398819103, "grad_norm": 1.8744527101516724, "learning_rate": 1.9999620075382326e-05, "loss": 0.7525, "step": 125 }, { "epoch": 0.0028856505477186712, "grad_norm": 2.0237603187561035, "learning_rate": 1.999958907374585e-05, "loss": 0.7356, "step": 130 }, { "epoch": 0.0029966371072463124, "grad_norm": 1.49530827999115, "learning_rate": 1.999955685639287e-05, "loss": 0.5226, "step": 135 }, { "epoch": 0.0031076236667739537, "grad_norm": 2.271005153656006, "learning_rate": 1.9999523423327304e-05, "loss": 0.7534, "step": 140 }, { "epoch": 0.003218610226301595, "grad_norm": 1.3122650384902954, "learning_rate": 1.9999488774553213e-05, "loss": 0.6388, "step": 145 }, { "epoch": 0.003329596785829236, "grad_norm": 2.2374744415283203, "learning_rate": 1.9999452910074817e-05, "loss": 0.637, "step": 150 }, { "epoch": 0.0034405833453568773, "grad_norm": 1.688567042350769, "learning_rate": 1.9999415829896466e-05, "loss": 0.6446, "step": 155 }, { "epoch": 0.0035515699048845185, "grad_norm": 1.5541659593582153, "learning_rate": 1.999937753402268e-05, "loss": 0.744, "step": 160 }, { "epoch": 0.0036625564644121597, "grad_norm": 1.51518976688385, "learning_rate": 1.9999338022458107e-05, "loss": 0.6366, "step": 165 }, { "epoch": 0.003773543023939801, "grad_norm": 2.0031962394714355, "learning_rate": 1.999929729520755e-05, "loss": 0.6538, "step": 170 }, { "epoch": 0.003884529583467442, "grad_norm": 2.005131721496582, "learning_rate": 1.9999255352275965e-05, "loss": 0.5916, "step": 175 }, { "epoch": 0.003995516142995083, "grad_norm": 1.7754985094070435, "learning_rate": 1.9999212193668448e-05, "loss": 0.6784, "step": 180 }, { "epoch": 0.004106502702522725, "grad_norm": 1.4435465335845947, "learning_rate": 1.999916781939025e-05, "loss": 0.7264, "step": 185 }, { "epoch": 0.004217489262050365, "grad_norm": 1.649026870727539, "learning_rate": 1.9999122229446758e-05, "loss": 0.6314, "step": 190 }, { "epoch": 0.004328475821578007, "grad_norm": 2.0196125507354736, "learning_rate": 1.9999075423843525e-05, "loss": 0.7619, "step": 195 }, { "epoch": 0.004439462381105648, "grad_norm": 1.6189427375793457, "learning_rate": 1.9999027402586235e-05, "loss": 0.7378, "step": 200 }, { "epoch": 0.0045504489406332895, "grad_norm": 1.5108414888381958, "learning_rate": 1.9998978165680728e-05, "loss": 0.558, "step": 205 }, { "epoch": 0.00466143550016093, "grad_norm": 1.4623931646347046, "learning_rate": 1.9998927713132986e-05, "loss": 0.8298, "step": 210 }, { "epoch": 0.004772422059688572, "grad_norm": 1.4418224096298218, "learning_rate": 1.999887604494915e-05, "loss": 0.5532, "step": 215 }, { "epoch": 0.004883408619216213, "grad_norm": 1.648908257484436, "learning_rate": 1.9998823161135498e-05, "loss": 0.6328, "step": 220 }, { "epoch": 0.004994395178743854, "grad_norm": 1.5702770948410034, "learning_rate": 1.9998769061698457e-05, "loss": 0.658, "step": 225 }, { "epoch": 0.005105381738271495, "grad_norm": 1.683289647102356, "learning_rate": 1.9998713746644606e-05, "loss": 0.565, "step": 230 }, { "epoch": 0.005216368297799137, "grad_norm": 2.054819345474243, "learning_rate": 1.9998657215980674e-05, "loss": 0.7035, "step": 235 }, { "epoch": 0.005327354857326778, "grad_norm": 1.2517290115356445, "learning_rate": 1.9998599469713528e-05, "loss": 0.5752, "step": 240 }, { "epoch": 0.005438341416854419, "grad_norm": 2.055407762527466, "learning_rate": 1.9998540507850195e-05, "loss": 0.7533, "step": 245 }, { "epoch": 0.00554932797638206, "grad_norm": 1.7456523180007935, "learning_rate": 1.9998480330397836e-05, "loss": 0.5713, "step": 250 }, { "epoch": 0.005660314535909702, "grad_norm": 1.9250489473342896, "learning_rate": 1.999841893736377e-05, "loss": 0.8103, "step": 255 }, { "epoch": 0.0057713010954373424, "grad_norm": 1.3744711875915527, "learning_rate": 1.9998356328755467e-05, "loss": 0.6879, "step": 260 }, { "epoch": 0.005882287654964984, "grad_norm": 1.9396206140518188, "learning_rate": 1.9998292504580528e-05, "loss": 0.5112, "step": 265 }, { "epoch": 0.005993274214492625, "grad_norm": 1.8470498323440552, "learning_rate": 1.999822746484672e-05, "loss": 0.8324, "step": 270 }, { "epoch": 0.0061042607740202665, "grad_norm": 1.656162142753601, "learning_rate": 1.999816120956195e-05, "loss": 0.7627, "step": 275 }, { "epoch": 0.006215247333547907, "grad_norm": 1.5918843746185303, "learning_rate": 1.9998093738734267e-05, "loss": 0.5311, "step": 280 }, { "epoch": 0.006326233893075549, "grad_norm": 1.779537558555603, "learning_rate": 1.999802505237188e-05, "loss": 0.8021, "step": 285 }, { "epoch": 0.00643722045260319, "grad_norm": 1.4962438344955444, "learning_rate": 1.9997955150483142e-05, "loss": 0.6775, "step": 290 }, { "epoch": 0.0065482070121308305, "grad_norm": 1.7058830261230469, "learning_rate": 1.9997884033076542e-05, "loss": 0.6195, "step": 295 }, { "epoch": 0.006659193571658472, "grad_norm": 1.3699376583099365, "learning_rate": 1.9997811700160735e-05, "loss": 0.5338, "step": 300 }, { "epoch": 0.006770180131186113, "grad_norm": 20.352764129638672, "learning_rate": 1.999773815174451e-05, "loss": 0.7159, "step": 305 }, { "epoch": 0.006881166690713755, "grad_norm": 1.6667920351028442, "learning_rate": 1.999766338783681e-05, "loss": 0.6633, "step": 310 }, { "epoch": 0.006992153250241395, "grad_norm": 1.438814401626587, "learning_rate": 1.9997587408446725e-05, "loss": 0.6731, "step": 315 }, { "epoch": 0.007103139809769037, "grad_norm": 1.7060329914093018, "learning_rate": 1.999751021358349e-05, "loss": 0.8527, "step": 320 }, { "epoch": 0.007214126369296678, "grad_norm": 1.8527767658233643, "learning_rate": 1.9997431803256496e-05, "loss": 0.673, "step": 325 }, { "epoch": 0.0073251129288243195, "grad_norm": 2.1131327152252197, "learning_rate": 1.9997352177475274e-05, "loss": 0.6467, "step": 330 }, { "epoch": 0.00743609948835196, "grad_norm": 1.617082118988037, "learning_rate": 1.99972713362495e-05, "loss": 0.4799, "step": 335 }, { "epoch": 0.007547086047879602, "grad_norm": 1.6008981466293335, "learning_rate": 1.9997189279589003e-05, "loss": 0.5518, "step": 340 }, { "epoch": 0.007658072607407243, "grad_norm": 1.9231702089309692, "learning_rate": 1.9997106007503765e-05, "loss": 0.5805, "step": 345 }, { "epoch": 0.007769059166934884, "grad_norm": 2.0241827964782715, "learning_rate": 1.9997021520003903e-05, "loss": 0.806, "step": 350 }, { "epoch": 0.007880045726462526, "grad_norm": 1.3821715116500854, "learning_rate": 1.9996935817099695e-05, "loss": 0.7182, "step": 355 }, { "epoch": 0.007991032285990166, "grad_norm": 1.4263622760772705, "learning_rate": 1.9996848898801554e-05, "loss": 0.7668, "step": 360 }, { "epoch": 0.008102018845517808, "grad_norm": 1.726434588432312, "learning_rate": 1.9996760765120052e-05, "loss": 0.7493, "step": 365 }, { "epoch": 0.00821300540504545, "grad_norm": 1.2283848524093628, "learning_rate": 1.9996671416065906e-05, "loss": 0.6539, "step": 370 }, { "epoch": 0.008323991964573091, "grad_norm": 1.572723627090454, "learning_rate": 1.999658085164997e-05, "loss": 0.6009, "step": 375 }, { "epoch": 0.00843497852410073, "grad_norm": 1.8220628499984741, "learning_rate": 1.9996489071883265e-05, "loss": 0.6171, "step": 380 }, { "epoch": 0.008545965083628372, "grad_norm": 3.054668664932251, "learning_rate": 1.9996396076776943e-05, "loss": 0.7583, "step": 385 }, { "epoch": 0.008656951643156014, "grad_norm": 10.986349105834961, "learning_rate": 1.999630186634231e-05, "loss": 0.6993, "step": 390 }, { "epoch": 0.008767938202683656, "grad_norm": 1.3439058065414429, "learning_rate": 1.999620644059082e-05, "loss": 0.593, "step": 395 }, { "epoch": 0.008878924762211296, "grad_norm": 1.6584223508834839, "learning_rate": 1.999610979953408e-05, "loss": 0.7222, "step": 400 }, { "epoch": 0.008989911321738937, "grad_norm": 1.4647761583328247, "learning_rate": 1.999601194318383e-05, "loss": 0.7219, "step": 405 }, { "epoch": 0.009100897881266579, "grad_norm": 1.648807168006897, "learning_rate": 1.9995912871551975e-05, "loss": 0.5759, "step": 410 }, { "epoch": 0.00921188444079422, "grad_norm": 1.1908730268478394, "learning_rate": 1.9995812584650555e-05, "loss": 0.4634, "step": 415 }, { "epoch": 0.00932287100032186, "grad_norm": 1.2616108655929565, "learning_rate": 1.9995711082491765e-05, "loss": 0.6941, "step": 420 }, { "epoch": 0.009433857559849502, "grad_norm": 1.5647404193878174, "learning_rate": 1.9995608365087945e-05, "loss": 0.6583, "step": 425 }, { "epoch": 0.009544844119377144, "grad_norm": 1.7183318138122559, "learning_rate": 1.9995504432451583e-05, "loss": 0.7446, "step": 430 }, { "epoch": 0.009655830678904785, "grad_norm": 1.254790186882019, "learning_rate": 1.9995399284595314e-05, "loss": 0.7101, "step": 435 }, { "epoch": 0.009766817238432425, "grad_norm": 1.774034023284912, "learning_rate": 1.999529292153192e-05, "loss": 0.7042, "step": 440 }, { "epoch": 0.009877803797960067, "grad_norm": 1.9237412214279175, "learning_rate": 1.9995185343274336e-05, "loss": 0.69, "step": 445 }, { "epoch": 0.009988790357487709, "grad_norm": 1.4360047578811646, "learning_rate": 1.9995076549835638e-05, "loss": 0.6877, "step": 450 }, { "epoch": 0.010099776917015349, "grad_norm": 1.3835448026657104, "learning_rate": 1.9994966541229057e-05, "loss": 0.7267, "step": 455 }, { "epoch": 0.01021076347654299, "grad_norm": 1.4885082244873047, "learning_rate": 1.9994855317467963e-05, "loss": 0.5813, "step": 460 }, { "epoch": 0.010321750036070632, "grad_norm": 1.4832953214645386, "learning_rate": 1.9994742878565878e-05, "loss": 0.6522, "step": 465 }, { "epoch": 0.010432736595598274, "grad_norm": 1.3984161615371704, "learning_rate": 1.9994629224536477e-05, "loss": 0.6394, "step": 470 }, { "epoch": 0.010543723155125913, "grad_norm": 1.5427662134170532, "learning_rate": 1.999451435539357e-05, "loss": 0.6314, "step": 475 }, { "epoch": 0.010654709714653555, "grad_norm": 3.5436573028564453, "learning_rate": 1.999439827115113e-05, "loss": 0.7408, "step": 480 }, { "epoch": 0.010765696274181197, "grad_norm": 1.8555324077606201, "learning_rate": 1.9994280971823267e-05, "loss": 0.661, "step": 485 }, { "epoch": 0.010876682833708838, "grad_norm": 1.6472523212432861, "learning_rate": 1.9994162457424238e-05, "loss": 0.4967, "step": 490 }, { "epoch": 0.010987669393236478, "grad_norm": 1.7284140586853027, "learning_rate": 1.999404272796846e-05, "loss": 0.6819, "step": 495 }, { "epoch": 0.01109865595276412, "grad_norm": 2.7859838008880615, "learning_rate": 1.999392178347048e-05, "loss": 0.7583, "step": 500 }, { "epoch": 0.011209642512291762, "grad_norm": 2.0837457180023193, "learning_rate": 1.999379962394501e-05, "loss": 0.6279, "step": 505 }, { "epoch": 0.011320629071819403, "grad_norm": 2.999171733856201, "learning_rate": 1.9993676249406895e-05, "loss": 0.6355, "step": 510 }, { "epoch": 0.011431615631347043, "grad_norm": 2.121840238571167, "learning_rate": 1.9993551659871138e-05, "loss": 0.742, "step": 515 }, { "epoch": 0.011542602190874685, "grad_norm": 1.540635585784912, "learning_rate": 1.9993425855352887e-05, "loss": 0.6726, "step": 520 }, { "epoch": 0.011653588750402327, "grad_norm": 1.9277762174606323, "learning_rate": 1.999329883586744e-05, "loss": 0.608, "step": 525 }, { "epoch": 0.011764575309929968, "grad_norm": 1.9572395086288452, "learning_rate": 1.9993170601430233e-05, "loss": 0.657, "step": 530 }, { "epoch": 0.011875561869457608, "grad_norm": 1.517907977104187, "learning_rate": 1.9993041152056856e-05, "loss": 0.5259, "step": 535 }, { "epoch": 0.01198654842898525, "grad_norm": 1.538671612739563, "learning_rate": 1.9992910487763052e-05, "loss": 0.7345, "step": 540 }, { "epoch": 0.012097534988512891, "grad_norm": 1.4196572303771973, "learning_rate": 1.99927786085647e-05, "loss": 0.6745, "step": 545 }, { "epoch": 0.012208521548040533, "grad_norm": 1.7517229318618774, "learning_rate": 1.9992645514477843e-05, "loss": 0.5735, "step": 550 }, { "epoch": 0.012319508107568173, "grad_norm": 2.0889716148376465, "learning_rate": 1.9992511205518656e-05, "loss": 0.6405, "step": 555 }, { "epoch": 0.012430494667095815, "grad_norm": 1.3219823837280273, "learning_rate": 1.999237568170347e-05, "loss": 0.6927, "step": 560 }, { "epoch": 0.012541481226623456, "grad_norm": 1.0619243383407593, "learning_rate": 1.999223894304876e-05, "loss": 0.6309, "step": 565 }, { "epoch": 0.012652467786151098, "grad_norm": 1.5480470657348633, "learning_rate": 1.999210098957115e-05, "loss": 0.5383, "step": 570 }, { "epoch": 0.012763454345678738, "grad_norm": 1.854567289352417, "learning_rate": 1.9991961821287412e-05, "loss": 0.6096, "step": 575 }, { "epoch": 0.01287444090520638, "grad_norm": 1.3984990119934082, "learning_rate": 1.999182143821447e-05, "loss": 0.5172, "step": 580 }, { "epoch": 0.012985427464734021, "grad_norm": 1.5796399116516113, "learning_rate": 1.9991679840369383e-05, "loss": 0.6999, "step": 585 }, { "epoch": 0.013096414024261661, "grad_norm": 1.6039270162582397, "learning_rate": 1.999153702776937e-05, "loss": 0.7429, "step": 590 }, { "epoch": 0.013207400583789303, "grad_norm": 1.6594629287719727, "learning_rate": 1.9991393000431798e-05, "loss": 0.5898, "step": 595 }, { "epoch": 0.013318387143316944, "grad_norm": 1.4121723175048828, "learning_rate": 1.999124775837417e-05, "loss": 0.9492, "step": 600 }, { "epoch": 0.013429373702844586, "grad_norm": 1.264452576637268, "learning_rate": 1.999110130161415e-05, "loss": 0.5776, "step": 605 }, { "epoch": 0.013540360262372226, "grad_norm": 1.4184213876724243, "learning_rate": 1.999095363016954e-05, "loss": 0.6565, "step": 610 }, { "epoch": 0.013651346821899868, "grad_norm": 1.5732219219207764, "learning_rate": 1.9990804744058294e-05, "loss": 0.491, "step": 615 }, { "epoch": 0.01376233338142751, "grad_norm": 2.139857053756714, "learning_rate": 1.9990654643298514e-05, "loss": 0.608, "step": 620 }, { "epoch": 0.013873319940955151, "grad_norm": 1.4825429916381836, "learning_rate": 1.9990503327908452e-05, "loss": 0.6254, "step": 625 }, { "epoch": 0.01398430650048279, "grad_norm": 1.615256905555725, "learning_rate": 1.9990350797906497e-05, "loss": 0.628, "step": 630 }, { "epoch": 0.014095293060010432, "grad_norm": 1.2488242387771606, "learning_rate": 1.99901970533112e-05, "loss": 0.6429, "step": 635 }, { "epoch": 0.014206279619538074, "grad_norm": 3.8135104179382324, "learning_rate": 1.9990042094141246e-05, "loss": 0.5945, "step": 640 }, { "epoch": 0.014317266179065716, "grad_norm": 1.429972767829895, "learning_rate": 1.9989885920415483e-05, "loss": 0.7532, "step": 645 }, { "epoch": 0.014428252738593356, "grad_norm": 1.3050944805145264, "learning_rate": 1.998972853215289e-05, "loss": 0.5597, "step": 650 }, { "epoch": 0.014539239298120997, "grad_norm": 1.4591888189315796, "learning_rate": 1.9989569929372604e-05, "loss": 0.6986, "step": 655 }, { "epoch": 0.014650225857648639, "grad_norm": 1.4118572473526, "learning_rate": 1.9989410112093914e-05, "loss": 0.71, "step": 660 }, { "epoch": 0.01476121241717628, "grad_norm": 1.1846052408218384, "learning_rate": 1.9989249080336236e-05, "loss": 0.6567, "step": 665 }, { "epoch": 0.01487219897670392, "grad_norm": 1.5836447477340698, "learning_rate": 1.9989086834119164e-05, "loss": 0.7658, "step": 670 }, { "epoch": 0.014983185536231562, "grad_norm": 1.3657773733139038, "learning_rate": 1.998892337346241e-05, "loss": 0.4862, "step": 675 }, { "epoch": 0.015094172095759204, "grad_norm": 1.6087766885757446, "learning_rate": 1.9988758698385854e-05, "loss": 0.5732, "step": 680 }, { "epoch": 0.015205158655286845, "grad_norm": 1.1223222017288208, "learning_rate": 1.9988592808909514e-05, "loss": 0.6215, "step": 685 }, { "epoch": 0.015316145214814485, "grad_norm": 1.4434326887130737, "learning_rate": 1.998842570505356e-05, "loss": 0.6406, "step": 690 }, { "epoch": 0.015427131774342127, "grad_norm": 1.4812527894973755, "learning_rate": 1.9988257386838313e-05, "loss": 0.7588, "step": 695 }, { "epoch": 0.015538118333869769, "grad_norm": 1.7070807218551636, "learning_rate": 1.9988087854284224e-05, "loss": 0.6366, "step": 700 }, { "epoch": 0.01564910489339741, "grad_norm": 1.4044088125228882, "learning_rate": 1.9987917107411915e-05, "loss": 0.6937, "step": 705 }, { "epoch": 0.015760091452925052, "grad_norm": 1.299124836921692, "learning_rate": 1.998774514624214e-05, "loss": 0.5945, "step": 710 }, { "epoch": 0.015871078012452692, "grad_norm": 1.1433302164077759, "learning_rate": 1.9987571970795807e-05, "loss": 0.6471, "step": 715 }, { "epoch": 0.015982064571980332, "grad_norm": 1.9760017395019531, "learning_rate": 1.9987397581093966e-05, "loss": 0.6213, "step": 720 }, { "epoch": 0.016093051131507975, "grad_norm": 1.4171857833862305, "learning_rate": 1.9987221977157826e-05, "loss": 0.7268, "step": 725 }, { "epoch": 0.016204037691035615, "grad_norm": 1.2093497514724731, "learning_rate": 1.998704515900873e-05, "loss": 0.6168, "step": 730 }, { "epoch": 0.016315024250563255, "grad_norm": 1.9509270191192627, "learning_rate": 1.998686712666818e-05, "loss": 0.6815, "step": 735 }, { "epoch": 0.0164260108100909, "grad_norm": 1.7262238264083862, "learning_rate": 1.9986687880157815e-05, "loss": 0.6855, "step": 740 }, { "epoch": 0.01653699736961854, "grad_norm": 1.6272399425506592, "learning_rate": 1.9986507419499435e-05, "loss": 0.4647, "step": 745 }, { "epoch": 0.016647983929146182, "grad_norm": 1.5864605903625488, "learning_rate": 1.998632574471497e-05, "loss": 0.5712, "step": 750 }, { "epoch": 0.01675897048867382, "grad_norm": 2.9724490642547607, "learning_rate": 1.9986142855826515e-05, "loss": 0.618, "step": 755 }, { "epoch": 0.01686995704820146, "grad_norm": 1.1995495557785034, "learning_rate": 1.9985958752856304e-05, "loss": 0.4329, "step": 760 }, { "epoch": 0.016980943607729105, "grad_norm": 1.5253748893737793, "learning_rate": 1.9985773435826716e-05, "loss": 0.416, "step": 765 }, { "epoch": 0.017091930167256745, "grad_norm": 1.8420664072036743, "learning_rate": 1.9985586904760285e-05, "loss": 0.6585, "step": 770 }, { "epoch": 0.017202916726784385, "grad_norm": 1.2143425941467285, "learning_rate": 1.9985399159679684e-05, "loss": 0.5646, "step": 775 }, { "epoch": 0.017313903286312028, "grad_norm": 1.7242611646652222, "learning_rate": 1.9985210200607743e-05, "loss": 0.6403, "step": 780 }, { "epoch": 0.017424889845839668, "grad_norm": 1.7307857275009155, "learning_rate": 1.9985020027567433e-05, "loss": 0.651, "step": 785 }, { "epoch": 0.01753587640536731, "grad_norm": 1.5188896656036377, "learning_rate": 1.998482864058188e-05, "loss": 0.6383, "step": 790 }, { "epoch": 0.01764686296489495, "grad_norm": 1.3000458478927612, "learning_rate": 1.9984636039674342e-05, "loss": 0.5734, "step": 795 }, { "epoch": 0.01775784952442259, "grad_norm": 2.1906466484069824, "learning_rate": 1.998444222486824e-05, "loss": 0.6683, "step": 800 }, { "epoch": 0.017868836083950235, "grad_norm": 1.3485846519470215, "learning_rate": 1.998424719618714e-05, "loss": 0.5797, "step": 805 }, { "epoch": 0.017979822643477875, "grad_norm": 1.6266671419143677, "learning_rate": 1.998405095365475e-05, "loss": 0.5214, "step": 810 }, { "epoch": 0.018090809203005515, "grad_norm": 1.570114016532898, "learning_rate": 1.998385349729493e-05, "loss": 0.6316, "step": 815 }, { "epoch": 0.018201795762533158, "grad_norm": 1.1716421842575073, "learning_rate": 1.9983654827131685e-05, "loss": 0.6731, "step": 820 }, { "epoch": 0.018312782322060798, "grad_norm": 0.9852983951568604, "learning_rate": 1.9983454943189168e-05, "loss": 0.4708, "step": 825 }, { "epoch": 0.01842376888158844, "grad_norm": 1.6441328525543213, "learning_rate": 1.9983253845491676e-05, "loss": 0.5872, "step": 830 }, { "epoch": 0.01853475544111608, "grad_norm": 1.401671290397644, "learning_rate": 1.998305153406367e-05, "loss": 0.4897, "step": 835 }, { "epoch": 0.01864574200064372, "grad_norm": 1.5671908855438232, "learning_rate": 1.9982848008929736e-05, "loss": 0.7187, "step": 840 }, { "epoch": 0.018756728560171364, "grad_norm": 1.1266714334487915, "learning_rate": 1.9982643270114617e-05, "loss": 0.6468, "step": 845 }, { "epoch": 0.018867715119699004, "grad_norm": 1.074792504310608, "learning_rate": 1.9982437317643218e-05, "loss": 0.7326, "step": 850 }, { "epoch": 0.018978701679226644, "grad_norm": 1.2255818843841553, "learning_rate": 1.998223015154056e-05, "loss": 0.6069, "step": 855 }, { "epoch": 0.019089688238754288, "grad_norm": 1.5439560413360596, "learning_rate": 1.9982021771831845e-05, "loss": 0.7673, "step": 860 }, { "epoch": 0.019200674798281928, "grad_norm": 1.186937928199768, "learning_rate": 1.9981812178542394e-05, "loss": 0.5751, "step": 865 }, { "epoch": 0.01931166135780957, "grad_norm": 6.929201602935791, "learning_rate": 1.9981601371697693e-05, "loss": 0.5142, "step": 870 }, { "epoch": 0.01942264791733721, "grad_norm": 1.579282522201538, "learning_rate": 1.998138935132338e-05, "loss": 0.7182, "step": 875 }, { "epoch": 0.01953363447686485, "grad_norm": 1.0298939943313599, "learning_rate": 1.998117611744522e-05, "loss": 0.5654, "step": 880 }, { "epoch": 0.019644621036392494, "grad_norm": 1.312228798866272, "learning_rate": 1.9980961670089144e-05, "loss": 0.5333, "step": 885 }, { "epoch": 0.019755607595920134, "grad_norm": 1.3584381341934204, "learning_rate": 1.998074600928122e-05, "loss": 0.5383, "step": 890 }, { "epoch": 0.019866594155447774, "grad_norm": 1.1420618295669556, "learning_rate": 1.998052913504767e-05, "loss": 0.6038, "step": 895 }, { "epoch": 0.019977580714975417, "grad_norm": 1.7165864706039429, "learning_rate": 1.998031104741486e-05, "loss": 0.6515, "step": 900 }, { "epoch": 0.020088567274503057, "grad_norm": 1.3264087438583374, "learning_rate": 1.9980091746409303e-05, "loss": 0.6116, "step": 905 }, { "epoch": 0.020199553834030697, "grad_norm": 1.443520188331604, "learning_rate": 1.9979871232057665e-05, "loss": 0.5549, "step": 910 }, { "epoch": 0.02031054039355834, "grad_norm": 1.1463545560836792, "learning_rate": 1.997964950438675e-05, "loss": 0.6164, "step": 915 }, { "epoch": 0.02042152695308598, "grad_norm": 1.5313615798950195, "learning_rate": 1.997942656342352e-05, "loss": 0.6918, "step": 920 }, { "epoch": 0.020532513512613624, "grad_norm": 1.35427725315094, "learning_rate": 1.9979202409195073e-05, "loss": 0.5805, "step": 925 }, { "epoch": 0.020643500072141264, "grad_norm": 1.8106743097305298, "learning_rate": 1.9978977041728665e-05, "loss": 0.7505, "step": 930 }, { "epoch": 0.020754486631668904, "grad_norm": 1.3848850727081299, "learning_rate": 1.9978750461051698e-05, "loss": 0.8439, "step": 935 }, { "epoch": 0.020865473191196547, "grad_norm": 0.8859273195266724, "learning_rate": 1.9978522667191714e-05, "loss": 0.7039, "step": 940 }, { "epoch": 0.020976459750724187, "grad_norm": 1.45253324508667, "learning_rate": 1.997829366017641e-05, "loss": 0.5935, "step": 945 }, { "epoch": 0.021087446310251827, "grad_norm": 1.2368254661560059, "learning_rate": 1.997806344003363e-05, "loss": 0.585, "step": 950 }, { "epoch": 0.02119843286977947, "grad_norm": 1.277491807937622, "learning_rate": 1.997783200679136e-05, "loss": 0.6367, "step": 955 }, { "epoch": 0.02130941942930711, "grad_norm": 1.7970563173294067, "learning_rate": 1.997759936047773e-05, "loss": 0.4719, "step": 960 }, { "epoch": 0.021420405988834754, "grad_norm": 2.2373642921447754, "learning_rate": 1.997736550112104e-05, "loss": 0.5275, "step": 965 }, { "epoch": 0.021531392548362394, "grad_norm": 1.474535346031189, "learning_rate": 1.9977130428749715e-05, "loss": 0.5877, "step": 970 }, { "epoch": 0.021642379107890033, "grad_norm": 1.587221384048462, "learning_rate": 1.9976894143392326e-05, "loss": 0.617, "step": 975 }, { "epoch": 0.021753365667417677, "grad_norm": 1.6356884241104126, "learning_rate": 1.9976656645077613e-05, "loss": 0.6357, "step": 980 }, { "epoch": 0.021864352226945317, "grad_norm": 1.512257695198059, "learning_rate": 1.997641793383444e-05, "loss": 0.5766, "step": 985 }, { "epoch": 0.021975338786472957, "grad_norm": 1.3856985569000244, "learning_rate": 1.9976178009691836e-05, "loss": 0.5846, "step": 990 }, { "epoch": 0.0220863253460006, "grad_norm": 1.6915297508239746, "learning_rate": 1.997593687267897e-05, "loss": 0.608, "step": 995 }, { "epoch": 0.02219731190552824, "grad_norm": 1.709142804145813, "learning_rate": 1.997569452282515e-05, "loss": 0.6902, "step": 1000 }, { "epoch": 0.022308298465055883, "grad_norm": 1.3087146282196045, "learning_rate": 1.9975450960159847e-05, "loss": 0.648, "step": 1005 }, { "epoch": 0.022419285024583523, "grad_norm": 1.3179740905761719, "learning_rate": 1.9975206184712673e-05, "loss": 0.6586, "step": 1010 }, { "epoch": 0.022530271584111163, "grad_norm": 1.5364478826522827, "learning_rate": 1.9974960196513383e-05, "loss": 0.6554, "step": 1015 }, { "epoch": 0.022641258143638807, "grad_norm": 1.292578935623169, "learning_rate": 1.9974712995591887e-05, "loss": 0.6711, "step": 1020 }, { "epoch": 0.022752244703166447, "grad_norm": 1.5487346649169922, "learning_rate": 1.997446458197824e-05, "loss": 0.7997, "step": 1025 }, { "epoch": 0.022863231262694086, "grad_norm": 1.3361338376998901, "learning_rate": 1.9974214955702637e-05, "loss": 0.712, "step": 1030 }, { "epoch": 0.02297421782222173, "grad_norm": 1.3260635137557983, "learning_rate": 1.9973964116795432e-05, "loss": 0.643, "step": 1035 }, { "epoch": 0.02308520438174937, "grad_norm": 1.4649790525436401, "learning_rate": 1.997371206528712e-05, "loss": 0.6557, "step": 1040 }, { "epoch": 0.02319619094127701, "grad_norm": 1.4138814210891724, "learning_rate": 1.9973458801208342e-05, "loss": 0.6153, "step": 1045 }, { "epoch": 0.023307177500804653, "grad_norm": 1.391961932182312, "learning_rate": 1.9973204324589895e-05, "loss": 0.7188, "step": 1050 }, { "epoch": 0.023418164060332293, "grad_norm": 1.024633765220642, "learning_rate": 1.9972948635462712e-05, "loss": 0.6313, "step": 1055 }, { "epoch": 0.023529150619859936, "grad_norm": 1.5817911624908447, "learning_rate": 1.997269173385788e-05, "loss": 0.6091, "step": 1060 }, { "epoch": 0.023640137179387576, "grad_norm": 2.2141048908233643, "learning_rate": 1.9972433619806634e-05, "loss": 0.7489, "step": 1065 }, { "epoch": 0.023751123738915216, "grad_norm": 1.731985330581665, "learning_rate": 1.9972174293340355e-05, "loss": 0.4683, "step": 1070 }, { "epoch": 0.02386211029844286, "grad_norm": 1.7920399904251099, "learning_rate": 1.997191375449057e-05, "loss": 0.6375, "step": 1075 }, { "epoch": 0.0239730968579705, "grad_norm": 1.3139867782592773, "learning_rate": 1.9971652003288947e-05, "loss": 0.5733, "step": 1080 }, { "epoch": 0.02408408341749814, "grad_norm": 2.353349208831787, "learning_rate": 1.9971389039767323e-05, "loss": 0.6415, "step": 1085 }, { "epoch": 0.024195069977025783, "grad_norm": 1.2308602333068848, "learning_rate": 1.997112486395766e-05, "loss": 0.4836, "step": 1090 }, { "epoch": 0.024306056536553423, "grad_norm": 1.1983168125152588, "learning_rate": 1.997085947589207e-05, "loss": 0.5186, "step": 1095 }, { "epoch": 0.024417043096081066, "grad_norm": 1.446942687034607, "learning_rate": 1.9970592875602833e-05, "loss": 0.6611, "step": 1100 }, { "epoch": 0.024528029655608706, "grad_norm": 2.764981746673584, "learning_rate": 1.9970325063122348e-05, "loss": 0.7114, "step": 1105 }, { "epoch": 0.024639016215136346, "grad_norm": 1.2632938623428345, "learning_rate": 1.9970056038483184e-05, "loss": 0.5719, "step": 1110 }, { "epoch": 0.02475000277466399, "grad_norm": 1.2802735567092896, "learning_rate": 1.996978580171804e-05, "loss": 0.5928, "step": 1115 }, { "epoch": 0.02486098933419163, "grad_norm": 0.9999619722366333, "learning_rate": 1.9969514352859774e-05, "loss": 0.5643, "step": 1120 }, { "epoch": 0.02497197589371927, "grad_norm": 1.2582635879516602, "learning_rate": 1.996924169194139e-05, "loss": 0.5307, "step": 1125 }, { "epoch": 0.025082962453246913, "grad_norm": 1.2746455669403076, "learning_rate": 1.9968967818996036e-05, "loss": 0.6455, "step": 1130 }, { "epoch": 0.025193949012774552, "grad_norm": 1.2235496044158936, "learning_rate": 1.9968692734057006e-05, "loss": 0.7033, "step": 1135 }, { "epoch": 0.025304935572302196, "grad_norm": 1.1782463788986206, "learning_rate": 1.9968416437157743e-05, "loss": 0.4876, "step": 1140 }, { "epoch": 0.025415922131829836, "grad_norm": 1.56901216506958, "learning_rate": 1.9968138928331847e-05, "loss": 0.5141, "step": 1145 }, { "epoch": 0.025526908691357476, "grad_norm": 1.7434866428375244, "learning_rate": 1.9967860207613047e-05, "loss": 0.5785, "step": 1150 }, { "epoch": 0.02563789525088512, "grad_norm": 1.423734188079834, "learning_rate": 1.9967580275035234e-05, "loss": 0.5057, "step": 1155 }, { "epoch": 0.02574888181041276, "grad_norm": 1.0542765855789185, "learning_rate": 1.996729913063244e-05, "loss": 0.6828, "step": 1160 }, { "epoch": 0.0258598683699404, "grad_norm": 1.0192484855651855, "learning_rate": 1.9967016774438847e-05, "loss": 0.5117, "step": 1165 }, { "epoch": 0.025970854929468042, "grad_norm": 1.1470212936401367, "learning_rate": 1.9966733206488777e-05, "loss": 0.4174, "step": 1170 }, { "epoch": 0.026081841488995682, "grad_norm": 2.4811060428619385, "learning_rate": 1.996644842681671e-05, "loss": 0.4642, "step": 1175 }, { "epoch": 0.026192828048523322, "grad_norm": 1.4627830982208252, "learning_rate": 1.996616243545727e-05, "loss": 0.7485, "step": 1180 }, { "epoch": 0.026303814608050965, "grad_norm": 1.440354347229004, "learning_rate": 1.9965875232445227e-05, "loss": 0.5197, "step": 1185 }, { "epoch": 0.026414801167578605, "grad_norm": 1.37411630153656, "learning_rate": 1.9965586817815494e-05, "loss": 0.7286, "step": 1190 }, { "epoch": 0.02652578772710625, "grad_norm": 1.1004072427749634, "learning_rate": 1.996529719160314e-05, "loss": 0.6062, "step": 1195 }, { "epoch": 0.02663677428663389, "grad_norm": 1.3563573360443115, "learning_rate": 1.996500635384337e-05, "loss": 0.5376, "step": 1200 }, { "epoch": 0.02674776084616153, "grad_norm": 1.093072533607483, "learning_rate": 1.996471430457155e-05, "loss": 0.6083, "step": 1205 }, { "epoch": 0.026858747405689172, "grad_norm": 1.385481357574463, "learning_rate": 1.9964421043823186e-05, "loss": 0.5489, "step": 1210 }, { "epoch": 0.026969733965216812, "grad_norm": 1.5628325939178467, "learning_rate": 1.9964126571633925e-05, "loss": 0.5826, "step": 1215 }, { "epoch": 0.027080720524744452, "grad_norm": 1.501821517944336, "learning_rate": 1.9963830888039576e-05, "loss": 0.6989, "step": 1220 }, { "epoch": 0.027191707084272095, "grad_norm": 0.900786817073822, "learning_rate": 1.996353399307608e-05, "loss": 0.6027, "step": 1225 }, { "epoch": 0.027302693643799735, "grad_norm": 1.074896216392517, "learning_rate": 1.996323588677954e-05, "loss": 0.5475, "step": 1230 }, { "epoch": 0.02741368020332738, "grad_norm": 1.3888300657272339, "learning_rate": 1.9962936569186195e-05, "loss": 0.5201, "step": 1235 }, { "epoch": 0.02752466676285502, "grad_norm": 1.707251787185669, "learning_rate": 1.9962636040332432e-05, "loss": 0.7034, "step": 1240 }, { "epoch": 0.02763565332238266, "grad_norm": 1.8080699443817139, "learning_rate": 1.9962334300254796e-05, "loss": 0.7599, "step": 1245 }, { "epoch": 0.027746639881910302, "grad_norm": 1.0971603393554688, "learning_rate": 1.9962031348989962e-05, "loss": 0.587, "step": 1250 }, { "epoch": 0.02785762644143794, "grad_norm": 1.1529098749160767, "learning_rate": 1.9961727186574768e-05, "loss": 0.5845, "step": 1255 }, { "epoch": 0.02796861300096558, "grad_norm": 1.1753336191177368, "learning_rate": 1.9961421813046193e-05, "loss": 0.4196, "step": 1260 }, { "epoch": 0.028079599560493225, "grad_norm": 1.2487094402313232, "learning_rate": 1.9961115228441363e-05, "loss": 0.6387, "step": 1265 }, { "epoch": 0.028190586120020865, "grad_norm": 1.2633018493652344, "learning_rate": 1.9960807432797545e-05, "loss": 0.5321, "step": 1270 }, { "epoch": 0.02830157267954851, "grad_norm": 1.3093974590301514, "learning_rate": 1.996049842615217e-05, "loss": 0.4955, "step": 1275 }, { "epoch": 0.028412559239076148, "grad_norm": 1.301692247390747, "learning_rate": 1.99601882085428e-05, "loss": 0.7267, "step": 1280 }, { "epoch": 0.028523545798603788, "grad_norm": 1.0108011960983276, "learning_rate": 1.995987678000715e-05, "loss": 0.5799, "step": 1285 }, { "epoch": 0.02863453235813143, "grad_norm": 1.1895384788513184, "learning_rate": 1.9959564140583088e-05, "loss": 0.5349, "step": 1290 }, { "epoch": 0.02874551891765907, "grad_norm": 1.179839015007019, "learning_rate": 1.9959250290308617e-05, "loss": 0.6611, "step": 1295 }, { "epoch": 0.02885650547718671, "grad_norm": 1.1100138425827026, "learning_rate": 1.99589352292219e-05, "loss": 0.4495, "step": 1300 }, { "epoch": 0.028967492036714355, "grad_norm": 1.2159394025802612, "learning_rate": 1.9958618957361233e-05, "loss": 0.6009, "step": 1305 }, { "epoch": 0.029078478596241995, "grad_norm": 0.953895092010498, "learning_rate": 1.995830147476507e-05, "loss": 0.5071, "step": 1310 }, { "epoch": 0.029189465155769638, "grad_norm": 1.41812002658844, "learning_rate": 1.9957982781472016e-05, "loss": 0.603, "step": 1315 }, { "epoch": 0.029300451715297278, "grad_norm": 1.746551752090454, "learning_rate": 1.995766287752081e-05, "loss": 0.7819, "step": 1320 }, { "epoch": 0.029411438274824918, "grad_norm": 1.238605260848999, "learning_rate": 1.9957341762950346e-05, "loss": 0.6801, "step": 1325 }, { "epoch": 0.02952242483435256, "grad_norm": 1.1259163618087769, "learning_rate": 1.9957019437799666e-05, "loss": 0.6514, "step": 1330 }, { "epoch": 0.0296334113938802, "grad_norm": 1.6120734214782715, "learning_rate": 1.9956695902107956e-05, "loss": 0.5272, "step": 1335 }, { "epoch": 0.02974439795340784, "grad_norm": 1.0889034271240234, "learning_rate": 1.9956371155914552e-05, "loss": 0.6868, "step": 1340 }, { "epoch": 0.029855384512935484, "grad_norm": 1.2904348373413086, "learning_rate": 1.9956045199258927e-05, "loss": 0.6264, "step": 1345 }, { "epoch": 0.029966371072463124, "grad_norm": 1.2338993549346924, "learning_rate": 1.9955718032180725e-05, "loss": 0.5667, "step": 1350 }, { "epoch": 0.030077357631990764, "grad_norm": 1.6639360189437866, "learning_rate": 1.995538965471971e-05, "loss": 0.5418, "step": 1355 }, { "epoch": 0.030188344191518408, "grad_norm": 1.203410267829895, "learning_rate": 1.995506006691581e-05, "loss": 0.4512, "step": 1360 }, { "epoch": 0.030299330751046048, "grad_norm": 1.343341588973999, "learning_rate": 1.995472926880909e-05, "loss": 0.499, "step": 1365 }, { "epoch": 0.03041031731057369, "grad_norm": 1.2672549486160278, "learning_rate": 1.9954397260439777e-05, "loss": 0.6129, "step": 1370 }, { "epoch": 0.03052130387010133, "grad_norm": 1.2296212911605835, "learning_rate": 1.9954064041848223e-05, "loss": 0.607, "step": 1375 }, { "epoch": 0.03063229042962897, "grad_norm": 1.239903211593628, "learning_rate": 1.995372961307495e-05, "loss": 0.5965, "step": 1380 }, { "epoch": 0.030743276989156614, "grad_norm": 1.1556373834609985, "learning_rate": 1.995339397416061e-05, "loss": 0.5994, "step": 1385 }, { "epoch": 0.030854263548684254, "grad_norm": 0.9935384392738342, "learning_rate": 1.9953057125146017e-05, "loss": 0.4611, "step": 1390 }, { "epoch": 0.030965250108211894, "grad_norm": 0.9856865406036377, "learning_rate": 1.9952719066072115e-05, "loss": 0.4357, "step": 1395 }, { "epoch": 0.031076236667739537, "grad_norm": 1.1673684120178223, "learning_rate": 1.9952379796980007e-05, "loss": 0.5264, "step": 1400 }, { "epoch": 0.031187223227267177, "grad_norm": 1.4866957664489746, "learning_rate": 1.995203931791094e-05, "loss": 0.6887, "step": 1405 }, { "epoch": 0.03129820978679482, "grad_norm": 1.1869754791259766, "learning_rate": 1.9951697628906316e-05, "loss": 0.6099, "step": 1410 }, { "epoch": 0.03140919634632246, "grad_norm": 1.7387142181396484, "learning_rate": 1.9951354730007662e-05, "loss": 0.7121, "step": 1415 }, { "epoch": 0.031520182905850104, "grad_norm": 1.1650878190994263, "learning_rate": 1.9951010621256678e-05, "loss": 0.6531, "step": 1420 }, { "epoch": 0.031631169465377744, "grad_norm": 1.3697797060012817, "learning_rate": 1.9950665302695195e-05, "loss": 0.7252, "step": 1425 }, { "epoch": 0.031742156024905384, "grad_norm": 1.2272151708602905, "learning_rate": 1.9950318774365195e-05, "loss": 0.5068, "step": 1430 }, { "epoch": 0.031853142584433024, "grad_norm": 1.00801420211792, "learning_rate": 1.9949971036308814e-05, "loss": 0.6587, "step": 1435 }, { "epoch": 0.031964129143960664, "grad_norm": 0.9060214757919312, "learning_rate": 1.9949622088568323e-05, "loss": 0.7408, "step": 1440 }, { "epoch": 0.03207511570348831, "grad_norm": 1.053296685218811, "learning_rate": 1.994927193118614e-05, "loss": 0.678, "step": 1445 }, { "epoch": 0.03218610226301595, "grad_norm": 1.3420132398605347, "learning_rate": 1.994892056420485e-05, "loss": 0.7067, "step": 1450 }, { "epoch": 0.03229708882254359, "grad_norm": 1.5862083435058594, "learning_rate": 1.994856798766716e-05, "loss": 0.6518, "step": 1455 }, { "epoch": 0.03240807538207123, "grad_norm": 1.2782789468765259, "learning_rate": 1.994821420161594e-05, "loss": 0.4185, "step": 1460 }, { "epoch": 0.03251906194159887, "grad_norm": 2.185882568359375, "learning_rate": 1.9947859206094202e-05, "loss": 0.6727, "step": 1465 }, { "epoch": 0.03263004850112651, "grad_norm": 1.0432374477386475, "learning_rate": 1.9947503001145104e-05, "loss": 0.4949, "step": 1470 }, { "epoch": 0.03274103506065416, "grad_norm": 1.2504695653915405, "learning_rate": 1.9947145586811955e-05, "loss": 0.6297, "step": 1475 }, { "epoch": 0.0328520216201818, "grad_norm": 1.5974503755569458, "learning_rate": 1.99467869631382e-05, "loss": 0.5971, "step": 1480 }, { "epoch": 0.03296300817970944, "grad_norm": 1.4650466442108154, "learning_rate": 1.9946427130167446e-05, "loss": 0.564, "step": 1485 }, { "epoch": 0.03307399473923708, "grad_norm": 1.6814020872116089, "learning_rate": 1.9946066087943442e-05, "loss": 0.509, "step": 1490 }, { "epoch": 0.03318498129876472, "grad_norm": 1.4288887977600098, "learning_rate": 1.994570383651008e-05, "loss": 0.5716, "step": 1495 }, { "epoch": 0.033295967858292363, "grad_norm": 0.8717050552368164, "learning_rate": 1.99453403759114e-05, "loss": 0.5378, "step": 1500 }, { "epoch": 0.03340695441782, "grad_norm": 1.4430837631225586, "learning_rate": 1.994497570619159e-05, "loss": 0.6293, "step": 1505 }, { "epoch": 0.03351794097734764, "grad_norm": 1.6973576545715332, "learning_rate": 1.9944609827394986e-05, "loss": 0.6011, "step": 1510 }, { "epoch": 0.03362892753687528, "grad_norm": 1.0696516036987305, "learning_rate": 1.9944242739566072e-05, "loss": 0.5927, "step": 1515 }, { "epoch": 0.03373991409640292, "grad_norm": 1.1888644695281982, "learning_rate": 1.9943874442749478e-05, "loss": 0.6889, "step": 1520 }, { "epoch": 0.03385090065593057, "grad_norm": 1.1972169876098633, "learning_rate": 1.9943504936989978e-05, "loss": 0.6074, "step": 1525 }, { "epoch": 0.03396188721545821, "grad_norm": 1.407692313194275, "learning_rate": 1.9943134222332493e-05, "loss": 0.6392, "step": 1530 }, { "epoch": 0.03407287377498585, "grad_norm": 1.3288302421569824, "learning_rate": 1.9942762298822095e-05, "loss": 0.5331, "step": 1535 }, { "epoch": 0.03418386033451349, "grad_norm": 1.2641630172729492, "learning_rate": 1.9942389166504005e-05, "loss": 0.5805, "step": 1540 }, { "epoch": 0.03429484689404113, "grad_norm": 1.5590423345565796, "learning_rate": 1.9942014825423583e-05, "loss": 0.5848, "step": 1545 }, { "epoch": 0.03440583345356877, "grad_norm": 1.3845590353012085, "learning_rate": 1.9941639275626343e-05, "loss": 0.6635, "step": 1550 }, { "epoch": 0.034516820013096416, "grad_norm": 1.493355393409729, "learning_rate": 1.994126251715794e-05, "loss": 0.494, "step": 1555 }, { "epoch": 0.034627806572624056, "grad_norm": 1.3604371547698975, "learning_rate": 1.9940884550064182e-05, "loss": 0.6372, "step": 1560 }, { "epoch": 0.034738793132151696, "grad_norm": 1.330060601234436, "learning_rate": 1.994050537439102e-05, "loss": 0.6928, "step": 1565 }, { "epoch": 0.034849779691679336, "grad_norm": 1.2125601768493652, "learning_rate": 1.994012499018455e-05, "loss": 0.6707, "step": 1570 }, { "epoch": 0.034960766251206976, "grad_norm": 1.265838623046875, "learning_rate": 1.993974339749102e-05, "loss": 0.4771, "step": 1575 }, { "epoch": 0.03507175281073462, "grad_norm": 1.1168771982192993, "learning_rate": 1.9939360596356824e-05, "loss": 0.628, "step": 1580 }, { "epoch": 0.03518273937026226, "grad_norm": 1.2629988193511963, "learning_rate": 1.9938976586828503e-05, "loss": 0.766, "step": 1585 }, { "epoch": 0.0352937259297899, "grad_norm": 1.5460317134857178, "learning_rate": 1.993859136895274e-05, "loss": 0.6064, "step": 1590 }, { "epoch": 0.03540471248931754, "grad_norm": 1.1441147327423096, "learning_rate": 1.9938204942776367e-05, "loss": 0.5786, "step": 1595 }, { "epoch": 0.03551569904884518, "grad_norm": 1.3351715803146362, "learning_rate": 1.993781730834637e-05, "loss": 0.6855, "step": 1600 }, { "epoch": 0.03562668560837283, "grad_norm": 1.2413687705993652, "learning_rate": 1.9937428465709875e-05, "loss": 0.7534, "step": 1605 }, { "epoch": 0.03573767216790047, "grad_norm": 1.6053916215896606, "learning_rate": 1.993703841491415e-05, "loss": 0.6457, "step": 1610 }, { "epoch": 0.03584865872742811, "grad_norm": 1.6114028692245483, "learning_rate": 1.9936647156006623e-05, "loss": 0.5664, "step": 1615 }, { "epoch": 0.03595964528695575, "grad_norm": 1.053146243095398, "learning_rate": 1.9936254689034863e-05, "loss": 0.6224, "step": 1620 }, { "epoch": 0.03607063184648339, "grad_norm": 1.386884093284607, "learning_rate": 1.9935861014046578e-05, "loss": 0.677, "step": 1625 }, { "epoch": 0.03618161840601103, "grad_norm": 1.376198410987854, "learning_rate": 1.993546613108963e-05, "loss": 0.8053, "step": 1630 }, { "epoch": 0.036292604965538676, "grad_norm": 1.3916345834732056, "learning_rate": 1.9935070040212038e-05, "loss": 0.6456, "step": 1635 }, { "epoch": 0.036403591525066316, "grad_norm": 1.714755654335022, "learning_rate": 1.9934672741461946e-05, "loss": 0.579, "step": 1640 }, { "epoch": 0.036514578084593956, "grad_norm": 1.1382675170898438, "learning_rate": 1.993427423488766e-05, "loss": 0.5205, "step": 1645 }, { "epoch": 0.036625564644121596, "grad_norm": 3.2711870670318604, "learning_rate": 1.993387452053763e-05, "loss": 0.7955, "step": 1650 }, { "epoch": 0.036736551203649236, "grad_norm": 1.1714918613433838, "learning_rate": 1.9933473598460454e-05, "loss": 0.6621, "step": 1655 }, { "epoch": 0.03684753776317688, "grad_norm": 1.2545034885406494, "learning_rate": 1.993307146870487e-05, "loss": 0.6243, "step": 1660 }, { "epoch": 0.03695852432270452, "grad_norm": 1.188735008239746, "learning_rate": 1.993266813131977e-05, "loss": 0.7094, "step": 1665 }, { "epoch": 0.03706951088223216, "grad_norm": 1.543319582939148, "learning_rate": 1.993226358635419e-05, "loss": 0.6565, "step": 1670 }, { "epoch": 0.0371804974417598, "grad_norm": 0.9534319043159485, "learning_rate": 1.9931857833857313e-05, "loss": 0.4431, "step": 1675 }, { "epoch": 0.03729148400128744, "grad_norm": 1.7737338542938232, "learning_rate": 1.993145087387847e-05, "loss": 0.735, "step": 1680 }, { "epoch": 0.03740247056081508, "grad_norm": 1.04007089138031, "learning_rate": 1.993104270646714e-05, "loss": 0.5504, "step": 1685 }, { "epoch": 0.03751345712034273, "grad_norm": 1.4022916555404663, "learning_rate": 1.993063333167294e-05, "loss": 0.5101, "step": 1690 }, { "epoch": 0.03762444367987037, "grad_norm": 1.1499395370483398, "learning_rate": 1.9930222749545643e-05, "loss": 0.5378, "step": 1695 }, { "epoch": 0.03773543023939801, "grad_norm": 1.1989136934280396, "learning_rate": 1.992981096013517e-05, "loss": 0.7049, "step": 1700 }, { "epoch": 0.03784641679892565, "grad_norm": 1.572718620300293, "learning_rate": 1.9929397963491583e-05, "loss": 0.6205, "step": 1705 }, { "epoch": 0.03795740335845329, "grad_norm": 1.2384471893310547, "learning_rate": 1.9928983759665092e-05, "loss": 0.6183, "step": 1710 }, { "epoch": 0.038068389917980935, "grad_norm": 1.4257042407989502, "learning_rate": 1.9928568348706053e-05, "loss": 0.5604, "step": 1715 }, { "epoch": 0.038179376477508575, "grad_norm": 0.9981622695922852, "learning_rate": 1.9928151730664975e-05, "loss": 0.656, "step": 1720 }, { "epoch": 0.038290363037036215, "grad_norm": 1.3974087238311768, "learning_rate": 1.9927733905592505e-05, "loss": 0.7721, "step": 1725 }, { "epoch": 0.038401349596563855, "grad_norm": 1.5321563482284546, "learning_rate": 1.992731487353944e-05, "loss": 0.6978, "step": 1730 }, { "epoch": 0.038512336156091495, "grad_norm": 1.3335909843444824, "learning_rate": 1.9926894634556726e-05, "loss": 0.651, "step": 1735 }, { "epoch": 0.03862332271561914, "grad_norm": 1.35151207447052, "learning_rate": 1.992647318869546e-05, "loss": 0.5383, "step": 1740 }, { "epoch": 0.03873430927514678, "grad_norm": 1.6774109601974487, "learning_rate": 1.992605053600687e-05, "loss": 0.6962, "step": 1745 }, { "epoch": 0.03884529583467442, "grad_norm": 0.9621152877807617, "learning_rate": 1.992562667654234e-05, "loss": 0.5274, "step": 1750 }, { "epoch": 0.03895628239420206, "grad_norm": 2.0539510250091553, "learning_rate": 1.9925201610353415e-05, "loss": 0.7013, "step": 1755 }, { "epoch": 0.0390672689537297, "grad_norm": 1.0954846143722534, "learning_rate": 1.992477533749176e-05, "loss": 0.5413, "step": 1760 }, { "epoch": 0.03917825551325734, "grad_norm": 1.2433022260665894, "learning_rate": 1.992434785800921e-05, "loss": 0.5258, "step": 1765 }, { "epoch": 0.03928924207278499, "grad_norm": 1.1122106313705444, "learning_rate": 1.992391917195773e-05, "loss": 0.5136, "step": 1770 }, { "epoch": 0.03940022863231263, "grad_norm": 1.3719093799591064, "learning_rate": 1.9923489279389433e-05, "loss": 0.5683, "step": 1775 }, { "epoch": 0.03951121519184027, "grad_norm": 1.1773172616958618, "learning_rate": 1.9923058180356595e-05, "loss": 0.5897, "step": 1780 }, { "epoch": 0.03962220175136791, "grad_norm": 1.156784176826477, "learning_rate": 1.9922625874911624e-05, "loss": 0.4755, "step": 1785 }, { "epoch": 0.03973318831089555, "grad_norm": 1.1960108280181885, "learning_rate": 1.9922192363107075e-05, "loss": 0.7606, "step": 1790 }, { "epoch": 0.039844174870423195, "grad_norm": 1.7010750770568848, "learning_rate": 1.9921757644995656e-05, "loss": 0.5427, "step": 1795 }, { "epoch": 0.039955161429950835, "grad_norm": 1.354822039604187, "learning_rate": 1.9921321720630216e-05, "loss": 0.5386, "step": 1800 }, { "epoch": 0.040066147989478475, "grad_norm": 1.4349123239517212, "learning_rate": 1.9920884590063755e-05, "loss": 0.6347, "step": 1805 }, { "epoch": 0.040177134549006115, "grad_norm": 1.2014262676239014, "learning_rate": 1.9920446253349417e-05, "loss": 0.7894, "step": 1810 }, { "epoch": 0.040288121108533755, "grad_norm": 1.3427788019180298, "learning_rate": 1.9920006710540495e-05, "loss": 0.6631, "step": 1815 }, { "epoch": 0.040399107668061394, "grad_norm": 1.188988447189331, "learning_rate": 1.9919565961690426e-05, "loss": 0.598, "step": 1820 }, { "epoch": 0.04051009422758904, "grad_norm": 1.388492226600647, "learning_rate": 1.9919124006852794e-05, "loss": 0.6441, "step": 1825 }, { "epoch": 0.04062108078711668, "grad_norm": 1.0690385103225708, "learning_rate": 1.9918680846081334e-05, "loss": 0.6082, "step": 1830 }, { "epoch": 0.04073206734664432, "grad_norm": 1.3798823356628418, "learning_rate": 1.991823647942992e-05, "loss": 0.7001, "step": 1835 }, { "epoch": 0.04084305390617196, "grad_norm": 1.3722220659255981, "learning_rate": 1.991779090695258e-05, "loss": 0.5362, "step": 1840 }, { "epoch": 0.0409540404656996, "grad_norm": 1.292083501815796, "learning_rate": 1.991734412870348e-05, "loss": 0.508, "step": 1845 }, { "epoch": 0.04106502702522725, "grad_norm": 1.1517761945724487, "learning_rate": 1.9916896144736943e-05, "loss": 0.3987, "step": 1850 }, { "epoch": 0.04117601358475489, "grad_norm": 1.3085436820983887, "learning_rate": 1.991644695510743e-05, "loss": 0.748, "step": 1855 }, { "epoch": 0.04128700014428253, "grad_norm": 1.3864504098892212, "learning_rate": 1.9915996559869553e-05, "loss": 0.7465, "step": 1860 }, { "epoch": 0.04139798670381017, "grad_norm": 1.2560830116271973, "learning_rate": 1.9915544959078072e-05, "loss": 0.5718, "step": 1865 }, { "epoch": 0.04150897326333781, "grad_norm": 1.312229871749878, "learning_rate": 1.9915092152787888e-05, "loss": 0.5943, "step": 1870 }, { "epoch": 0.041619959822865454, "grad_norm": 1.272802710533142, "learning_rate": 1.9914638141054053e-05, "loss": 0.5184, "step": 1875 }, { "epoch": 0.041730946382393094, "grad_norm": 1.5974937677383423, "learning_rate": 1.9914182923931766e-05, "loss": 0.6155, "step": 1880 }, { "epoch": 0.041841932941920734, "grad_norm": 1.1618620157241821, "learning_rate": 1.9913726501476366e-05, "loss": 0.5379, "step": 1885 }, { "epoch": 0.041952919501448374, "grad_norm": 0.9664960503578186, "learning_rate": 1.9913268873743342e-05, "loss": 0.6048, "step": 1890 }, { "epoch": 0.042063906060976014, "grad_norm": 1.270060420036316, "learning_rate": 1.991281004078834e-05, "loss": 0.59, "step": 1895 }, { "epoch": 0.042174892620503654, "grad_norm": 1.2572544813156128, "learning_rate": 1.9912350002667137e-05, "loss": 0.7689, "step": 1900 }, { "epoch": 0.0422858791800313, "grad_norm": 1.559762954711914, "learning_rate": 1.9911888759435665e-05, "loss": 0.5408, "step": 1905 }, { "epoch": 0.04239686573955894, "grad_norm": 1.2439285516738892, "learning_rate": 1.991142631115e-05, "loss": 0.5589, "step": 1910 }, { "epoch": 0.04250785229908658, "grad_norm": 1.8407187461853027, "learning_rate": 1.9910962657866366e-05, "loss": 0.4827, "step": 1915 }, { "epoch": 0.04261883885861422, "grad_norm": 1.6191024780273438, "learning_rate": 1.9910497799641126e-05, "loss": 0.5419, "step": 1920 }, { "epoch": 0.04272982541814186, "grad_norm": 1.4953969717025757, "learning_rate": 1.9910031736530803e-05, "loss": 0.5792, "step": 1925 }, { "epoch": 0.04284081197766951, "grad_norm": 1.3562936782836914, "learning_rate": 1.990956446859206e-05, "loss": 0.5718, "step": 1930 }, { "epoch": 0.04295179853719715, "grad_norm": 1.5445222854614258, "learning_rate": 1.9909095995881697e-05, "loss": 0.5163, "step": 1935 }, { "epoch": 0.04306278509672479, "grad_norm": 1.349182367324829, "learning_rate": 1.990862631845668e-05, "loss": 0.6125, "step": 1940 }, { "epoch": 0.04317377165625243, "grad_norm": 1.4996899366378784, "learning_rate": 1.9908155436374102e-05, "loss": 0.5883, "step": 1945 }, { "epoch": 0.04328475821578007, "grad_norm": 1.2809373140335083, "learning_rate": 1.990768334969122e-05, "loss": 0.5395, "step": 1950 }, { "epoch": 0.04339574477530771, "grad_norm": 2.493131399154663, "learning_rate": 1.990721005846542e-05, "loss": 0.5445, "step": 1955 }, { "epoch": 0.043506731334835354, "grad_norm": 0.9056739211082458, "learning_rate": 1.9906735562754253e-05, "loss": 0.5177, "step": 1960 }, { "epoch": 0.043617717894362994, "grad_norm": 1.4022185802459717, "learning_rate": 1.9906259862615396e-05, "loss": 0.6715, "step": 1965 }, { "epoch": 0.043728704453890634, "grad_norm": 1.3620365858078003, "learning_rate": 1.990578295810669e-05, "loss": 0.6178, "step": 1970 }, { "epoch": 0.043839691013418274, "grad_norm": 1.3010473251342773, "learning_rate": 1.9905304849286114e-05, "loss": 0.6329, "step": 1975 }, { "epoch": 0.04395067757294591, "grad_norm": 1.640214443206787, "learning_rate": 1.9904825536211793e-05, "loss": 0.6614, "step": 1980 }, { "epoch": 0.04406166413247356, "grad_norm": 1.3884638547897339, "learning_rate": 1.9904345018942e-05, "loss": 0.6241, "step": 1985 }, { "epoch": 0.0441726506920012, "grad_norm": 1.1741259098052979, "learning_rate": 1.990386329753516e-05, "loss": 0.5398, "step": 1990 }, { "epoch": 0.04428363725152884, "grad_norm": 1.1659259796142578, "learning_rate": 1.9903380372049832e-05, "loss": 0.6726, "step": 1995 }, { "epoch": 0.04439462381105648, "grad_norm": 1.649901270866394, "learning_rate": 1.990289624254473e-05, "loss": 0.5993, "step": 2000 }, { "epoch": 0.04450561037058412, "grad_norm": 1.0309951305389404, "learning_rate": 1.990241090907872e-05, "loss": 0.6359, "step": 2005 }, { "epoch": 0.04461659693011177, "grad_norm": 1.2367216348648071, "learning_rate": 1.99019243717108e-05, "loss": 0.6339, "step": 2010 }, { "epoch": 0.04472758348963941, "grad_norm": 2.2584967613220215, "learning_rate": 1.9901436630500122e-05, "loss": 0.7368, "step": 2015 }, { "epoch": 0.04483857004916705, "grad_norm": 1.4870028495788574, "learning_rate": 1.9900947685505983e-05, "loss": 0.5566, "step": 2020 }, { "epoch": 0.04494955660869469, "grad_norm": 1.9198880195617676, "learning_rate": 1.9900457536787834e-05, "loss": 0.582, "step": 2025 }, { "epoch": 0.045060543168222326, "grad_norm": 1.3042656183242798, "learning_rate": 1.9899966184405255e-05, "loss": 0.552, "step": 2030 }, { "epoch": 0.045171529727749966, "grad_norm": 1.6364234685897827, "learning_rate": 1.9899473628417997e-05, "loss": 0.6122, "step": 2035 }, { "epoch": 0.04528251628727761, "grad_norm": 1.215649962425232, "learning_rate": 1.9898979868885933e-05, "loss": 0.4283, "step": 2040 }, { "epoch": 0.04539350284680525, "grad_norm": 1.1958799362182617, "learning_rate": 1.9898484905869095e-05, "loss": 0.6035, "step": 2045 }, { "epoch": 0.04550448940633289, "grad_norm": 1.2653090953826904, "learning_rate": 1.989798873942766e-05, "loss": 0.6591, "step": 2050 }, { "epoch": 0.04561547596586053, "grad_norm": 1.3977992534637451, "learning_rate": 1.9897491369621945e-05, "loss": 0.5056, "step": 2055 }, { "epoch": 0.04572646252538817, "grad_norm": 1.7142751216888428, "learning_rate": 1.9896992796512427e-05, "loss": 0.7221, "step": 2060 }, { "epoch": 0.04583744908491582, "grad_norm": 1.326062560081482, "learning_rate": 1.9896493020159715e-05, "loss": 0.5673, "step": 2065 }, { "epoch": 0.04594843564444346, "grad_norm": 1.3358633518218994, "learning_rate": 1.9895992040624573e-05, "loss": 0.4271, "step": 2070 }, { "epoch": 0.0460594222039711, "grad_norm": 1.0794224739074707, "learning_rate": 1.9895489857967908e-05, "loss": 0.5734, "step": 2075 }, { "epoch": 0.04617040876349874, "grad_norm": 1.3193761110305786, "learning_rate": 1.989498647225077e-05, "loss": 0.4369, "step": 2080 }, { "epoch": 0.04628139532302638, "grad_norm": 1.1502362489700317, "learning_rate": 1.9894481883534364e-05, "loss": 0.5416, "step": 2085 }, { "epoch": 0.04639238188255402, "grad_norm": 1.6109846830368042, "learning_rate": 1.9893976091880033e-05, "loss": 0.5072, "step": 2090 }, { "epoch": 0.046503368442081666, "grad_norm": 1.4186127185821533, "learning_rate": 1.989346909734927e-05, "loss": 0.4665, "step": 2095 }, { "epoch": 0.046614355001609306, "grad_norm": 1.2119293212890625, "learning_rate": 1.9892960900003716e-05, "loss": 0.5227, "step": 2100 }, { "epoch": 0.046725341561136946, "grad_norm": 1.1812269687652588, "learning_rate": 1.9892451499905153e-05, "loss": 0.5563, "step": 2105 }, { "epoch": 0.046836328120664586, "grad_norm": 1.3973302841186523, "learning_rate": 1.9891940897115513e-05, "loss": 0.4548, "step": 2110 }, { "epoch": 0.046947314680192226, "grad_norm": 2.288679361343384, "learning_rate": 1.9891429091696873e-05, "loss": 0.4953, "step": 2115 }, { "epoch": 0.04705830123971987, "grad_norm": 1.3049923181533813, "learning_rate": 1.9890916083711463e-05, "loss": 0.5927, "step": 2120 }, { "epoch": 0.04716928779924751, "grad_norm": 1.1998517513275146, "learning_rate": 1.9890401873221642e-05, "loss": 0.6343, "step": 2125 }, { "epoch": 0.04728027435877515, "grad_norm": 1.413918375968933, "learning_rate": 1.988988646028993e-05, "loss": 0.5395, "step": 2130 }, { "epoch": 0.04739126091830279, "grad_norm": 1.0503027439117432, "learning_rate": 1.9889369844978996e-05, "loss": 0.6966, "step": 2135 }, { "epoch": 0.04750224747783043, "grad_norm": 0.8282071352005005, "learning_rate": 1.9888852027351636e-05, "loss": 0.535, "step": 2140 }, { "epoch": 0.04761323403735808, "grad_norm": 3.530345916748047, "learning_rate": 1.9888333007470815e-05, "loss": 0.6562, "step": 2145 }, { "epoch": 0.04772422059688572, "grad_norm": 1.287862777709961, "learning_rate": 1.988781278539963e-05, "loss": 0.749, "step": 2150 }, { "epoch": 0.04783520715641336, "grad_norm": 1.1997264623641968, "learning_rate": 1.9887291361201328e-05, "loss": 0.7251, "step": 2155 }, { "epoch": 0.047946193715941, "grad_norm": 1.6142421960830688, "learning_rate": 1.9886768734939297e-05, "loss": 0.5778, "step": 2160 }, { "epoch": 0.04805718027546864, "grad_norm": 1.3144336938858032, "learning_rate": 1.9886244906677087e-05, "loss": 0.6244, "step": 2165 }, { "epoch": 0.04816816683499628, "grad_norm": 1.6684969663619995, "learning_rate": 1.9885719876478374e-05, "loss": 0.6855, "step": 2170 }, { "epoch": 0.048279153394523926, "grad_norm": 1.1297353506088257, "learning_rate": 1.9885193644406994e-05, "loss": 0.5669, "step": 2175 }, { "epoch": 0.048390139954051566, "grad_norm": 13.155599594116211, "learning_rate": 1.988466621052692e-05, "loss": 0.6243, "step": 2180 }, { "epoch": 0.048501126513579206, "grad_norm": 1.2790757417678833, "learning_rate": 1.988413757490228e-05, "loss": 0.499, "step": 2185 }, { "epoch": 0.048612113073106845, "grad_norm": 3.179291248321533, "learning_rate": 1.9883607737597344e-05, "loss": 0.6489, "step": 2190 }, { "epoch": 0.048723099632634485, "grad_norm": 1.96470046043396, "learning_rate": 1.9883076698676523e-05, "loss": 0.6279, "step": 2195 }, { "epoch": 0.04883408619216213, "grad_norm": 2.6033077239990234, "learning_rate": 1.9882544458204386e-05, "loss": 0.5934, "step": 2200 }, { "epoch": 0.04894507275168977, "grad_norm": 1.5848404169082642, "learning_rate": 1.988201101624564e-05, "loss": 0.5393, "step": 2205 }, { "epoch": 0.04905605931121741, "grad_norm": 1.7415422201156616, "learning_rate": 1.988147637286513e-05, "loss": 0.7068, "step": 2210 }, { "epoch": 0.04916704587074505, "grad_norm": 1.6085535287857056, "learning_rate": 1.9880940528127866e-05, "loss": 0.6785, "step": 2215 }, { "epoch": 0.04927803243027269, "grad_norm": 1.283326506614685, "learning_rate": 1.9880403482098985e-05, "loss": 0.4254, "step": 2220 }, { "epoch": 0.04938901898980033, "grad_norm": 1.446057915687561, "learning_rate": 1.9879865234843795e-05, "loss": 0.5926, "step": 2225 }, { "epoch": 0.04950000554932798, "grad_norm": 1.6062350273132324, "learning_rate": 1.9879325786427716e-05, "loss": 0.7388, "step": 2230 }, { "epoch": 0.04961099210885562, "grad_norm": 1.3095510005950928, "learning_rate": 1.9878785136916343e-05, "loss": 0.6358, "step": 2235 }, { "epoch": 0.04972197866838326, "grad_norm": 1.1560978889465332, "learning_rate": 1.9878243286375405e-05, "loss": 0.5217, "step": 2240 }, { "epoch": 0.0498329652279109, "grad_norm": 1.2814440727233887, "learning_rate": 1.9877700234870775e-05, "loss": 0.6681, "step": 2245 }, { "epoch": 0.04994395178743854, "grad_norm": 1.244834065437317, "learning_rate": 1.9877155982468478e-05, "loss": 0.6368, "step": 2250 }, { "epoch": 0.050054938346966185, "grad_norm": 1.2957700490951538, "learning_rate": 1.9876610529234686e-05, "loss": 0.6633, "step": 2255 }, { "epoch": 0.050165924906493825, "grad_norm": 1.300630807876587, "learning_rate": 1.987606387523571e-05, "loss": 0.5377, "step": 2260 }, { "epoch": 0.050276911466021465, "grad_norm": 1.4381990432739258, "learning_rate": 1.9875516020538e-05, "loss": 0.4867, "step": 2265 }, { "epoch": 0.050387898025549105, "grad_norm": 1.7026110887527466, "learning_rate": 1.987496696520818e-05, "loss": 0.6134, "step": 2270 }, { "epoch": 0.050498884585076745, "grad_norm": 1.3532987833023071, "learning_rate": 1.9874416709312994e-05, "loss": 0.8066, "step": 2275 }, { "epoch": 0.05060987114460439, "grad_norm": 1.2888226509094238, "learning_rate": 1.987386525291934e-05, "loss": 0.5613, "step": 2280 }, { "epoch": 0.05072085770413203, "grad_norm": 1.1642128229141235, "learning_rate": 1.9873312596094264e-05, "loss": 0.5735, "step": 2285 }, { "epoch": 0.05083184426365967, "grad_norm": 1.2660547494888306, "learning_rate": 1.9872758738904952e-05, "loss": 0.6068, "step": 2290 }, { "epoch": 0.05094283082318731, "grad_norm": 1.2771880626678467, "learning_rate": 1.9872203681418745e-05, "loss": 0.575, "step": 2295 }, { "epoch": 0.05105381738271495, "grad_norm": 0.9723901152610779, "learning_rate": 1.9871647423703126e-05, "loss": 0.5135, "step": 2300 }, { "epoch": 0.05116480394224259, "grad_norm": 1.1216762065887451, "learning_rate": 1.9871089965825713e-05, "loss": 0.5082, "step": 2305 }, { "epoch": 0.05127579050177024, "grad_norm": 1.0608837604522705, "learning_rate": 1.987053130785429e-05, "loss": 0.4207, "step": 2310 }, { "epoch": 0.05138677706129788, "grad_norm": 1.3452649116516113, "learning_rate": 1.9869971449856778e-05, "loss": 0.6087, "step": 2315 }, { "epoch": 0.05149776362082552, "grad_norm": 1.0215181112289429, "learning_rate": 1.9869410391901237e-05, "loss": 0.4663, "step": 2320 }, { "epoch": 0.05160875018035316, "grad_norm": 1.0570729970932007, "learning_rate": 1.986884813405588e-05, "loss": 0.5788, "step": 2325 }, { "epoch": 0.0517197367398808, "grad_norm": 1.1869689226150513, "learning_rate": 1.986828467638906e-05, "loss": 0.6118, "step": 2330 }, { "epoch": 0.051830723299408445, "grad_norm": 0.9934154748916626, "learning_rate": 1.9867720018969287e-05, "loss": 0.7029, "step": 2335 }, { "epoch": 0.051941709858936085, "grad_norm": 1.4993884563446045, "learning_rate": 1.9867154161865207e-05, "loss": 0.6805, "step": 2340 }, { "epoch": 0.052052696418463724, "grad_norm": 1.1254603862762451, "learning_rate": 1.9866587105145617e-05, "loss": 0.4247, "step": 2345 }, { "epoch": 0.052163682977991364, "grad_norm": 1.353786587715149, "learning_rate": 1.986601884887946e-05, "loss": 0.6416, "step": 2350 }, { "epoch": 0.052274669537519004, "grad_norm": 1.1726566553115845, "learning_rate": 1.9865449393135816e-05, "loss": 0.7834, "step": 2355 }, { "epoch": 0.052385656097046644, "grad_norm": 1.455482006072998, "learning_rate": 1.986487873798392e-05, "loss": 0.6141, "step": 2360 }, { "epoch": 0.05249664265657429, "grad_norm": 2.1738498210906982, "learning_rate": 1.9864306883493154e-05, "loss": 0.5314, "step": 2365 }, { "epoch": 0.05260762921610193, "grad_norm": 1.433218002319336, "learning_rate": 1.986373382973304e-05, "loss": 0.5529, "step": 2370 }, { "epoch": 0.05271861577562957, "grad_norm": 1.2258576154708862, "learning_rate": 1.9863159576773243e-05, "loss": 0.5788, "step": 2375 }, { "epoch": 0.05282960233515721, "grad_norm": 1.0413432121276855, "learning_rate": 1.9862584124683587e-05, "loss": 0.5304, "step": 2380 }, { "epoch": 0.05294058889468485, "grad_norm": 1.2066630125045776, "learning_rate": 1.9862007473534026e-05, "loss": 0.5745, "step": 2385 }, { "epoch": 0.0530515754542125, "grad_norm": 0.985165536403656, "learning_rate": 1.9861429623394676e-05, "loss": 0.5299, "step": 2390 }, { "epoch": 0.05316256201374014, "grad_norm": 1.5206027030944824, "learning_rate": 1.986085057433578e-05, "loss": 0.3735, "step": 2395 }, { "epoch": 0.05327354857326778, "grad_norm": 1.7408087253570557, "learning_rate": 1.9860270326427743e-05, "loss": 0.6242, "step": 2400 }, { "epoch": 0.05338453513279542, "grad_norm": 1.4238530397415161, "learning_rate": 1.985968887974111e-05, "loss": 0.764, "step": 2405 }, { "epoch": 0.05349552169232306, "grad_norm": 1.4702534675598145, "learning_rate": 1.985910623434657e-05, "loss": 0.5784, "step": 2410 }, { "epoch": 0.053606508251850704, "grad_norm": 1.1298567056655884, "learning_rate": 1.985852239031496e-05, "loss": 0.6189, "step": 2415 }, { "epoch": 0.053717494811378344, "grad_norm": 0.9069703221321106, "learning_rate": 1.985793734771726e-05, "loss": 0.5496, "step": 2420 }, { "epoch": 0.053828481370905984, "grad_norm": 1.1809360980987549, "learning_rate": 1.9857351106624595e-05, "loss": 0.3049, "step": 2425 }, { "epoch": 0.053939467930433624, "grad_norm": 2.4820363521575928, "learning_rate": 1.9856763667108243e-05, "loss": 0.484, "step": 2430 }, { "epoch": 0.054050454489961264, "grad_norm": 1.0644019842147827, "learning_rate": 1.9856175029239624e-05, "loss": 0.5912, "step": 2435 }, { "epoch": 0.054161441049488904, "grad_norm": 1.3724855184555054, "learning_rate": 1.9855585193090297e-05, "loss": 0.4491, "step": 2440 }, { "epoch": 0.05427242760901655, "grad_norm": 1.0048718452453613, "learning_rate": 1.9854994158731978e-05, "loss": 0.4543, "step": 2445 }, { "epoch": 0.05438341416854419, "grad_norm": 1.835546612739563, "learning_rate": 1.9854401926236518e-05, "loss": 0.5102, "step": 2450 }, { "epoch": 0.05449440072807183, "grad_norm": 1.3178610801696777, "learning_rate": 1.985380849567592e-05, "loss": 0.6391, "step": 2455 }, { "epoch": 0.05460538728759947, "grad_norm": 1.2502537965774536, "learning_rate": 1.9853213867122333e-05, "loss": 0.4346, "step": 2460 }, { "epoch": 0.05471637384712711, "grad_norm": 1.0760316848754883, "learning_rate": 1.985261804064805e-05, "loss": 0.5103, "step": 2465 }, { "epoch": 0.05482736040665476, "grad_norm": 0.797814130783081, "learning_rate": 1.985202101632551e-05, "loss": 0.4749, "step": 2470 }, { "epoch": 0.0549383469661824, "grad_norm": 1.1549592018127441, "learning_rate": 1.9851422794227295e-05, "loss": 0.5325, "step": 2475 }, { "epoch": 0.05504933352571004, "grad_norm": 1.5827157497406006, "learning_rate": 1.9850823374426136e-05, "loss": 0.5886, "step": 2480 }, { "epoch": 0.05516032008523768, "grad_norm": 1.293227195739746, "learning_rate": 1.985022275699491e-05, "loss": 0.6185, "step": 2485 }, { "epoch": 0.05527130664476532, "grad_norm": 1.5905836820602417, "learning_rate": 1.984962094200663e-05, "loss": 0.6508, "step": 2490 }, { "epoch": 0.05538229320429296, "grad_norm": 1.8819011449813843, "learning_rate": 1.9849017929534474e-05, "loss": 0.7041, "step": 2495 }, { "epoch": 0.055493279763820604, "grad_norm": 1.5715081691741943, "learning_rate": 1.9848413719651745e-05, "loss": 0.5766, "step": 2500 }, { "epoch": 0.05560426632334824, "grad_norm": 1.348044514656067, "learning_rate": 1.984780831243191e-05, "loss": 0.751, "step": 2505 }, { "epoch": 0.05571525288287588, "grad_norm": 1.3613786697387695, "learning_rate": 1.9847201707948567e-05, "loss": 0.5741, "step": 2510 }, { "epoch": 0.05582623944240352, "grad_norm": 1.4741175174713135, "learning_rate": 1.9846593906275463e-05, "loss": 0.5456, "step": 2515 }, { "epoch": 0.05593722600193116, "grad_norm": 1.2884457111358643, "learning_rate": 1.9845984907486494e-05, "loss": 0.6757, "step": 2520 }, { "epoch": 0.05604821256145881, "grad_norm": 1.2677499055862427, "learning_rate": 1.9845374711655703e-05, "loss": 0.6415, "step": 2525 }, { "epoch": 0.05615919912098645, "grad_norm": 1.4850881099700928, "learning_rate": 1.9844763318857275e-05, "loss": 0.6886, "step": 2530 }, { "epoch": 0.05627018568051409, "grad_norm": 1.1189000606536865, "learning_rate": 1.9844150729165536e-05, "loss": 0.5813, "step": 2535 }, { "epoch": 0.05638117224004173, "grad_norm": 1.2875266075134277, "learning_rate": 1.9843536942654967e-05, "loss": 0.4793, "step": 2540 }, { "epoch": 0.05649215879956937, "grad_norm": 1.367203950881958, "learning_rate": 1.984292195940019e-05, "loss": 0.4914, "step": 2545 }, { "epoch": 0.05660314535909702, "grad_norm": 1.3838480710983276, "learning_rate": 1.984230577947597e-05, "loss": 0.6269, "step": 2550 }, { "epoch": 0.056714131918624656, "grad_norm": 1.0736616849899292, "learning_rate": 1.9841688402957223e-05, "loss": 0.7574, "step": 2555 }, { "epoch": 0.056825118478152296, "grad_norm": 1.273168683052063, "learning_rate": 1.9841069829919006e-05, "loss": 0.5862, "step": 2560 }, { "epoch": 0.056936105037679936, "grad_norm": 1.210391640663147, "learning_rate": 1.9840450060436523e-05, "loss": 0.5604, "step": 2565 }, { "epoch": 0.057047091597207576, "grad_norm": 1.031866431236267, "learning_rate": 1.9839829094585125e-05, "loss": 0.5356, "step": 2570 }, { "epoch": 0.057158078156735216, "grad_norm": 1.0833243131637573, "learning_rate": 1.9839206932440307e-05, "loss": 0.5123, "step": 2575 }, { "epoch": 0.05726906471626286, "grad_norm": 1.3849217891693115, "learning_rate": 1.983858357407771e-05, "loss": 0.6217, "step": 2580 }, { "epoch": 0.0573800512757905, "grad_norm": 1.1831570863723755, "learning_rate": 1.983795901957311e-05, "loss": 0.704, "step": 2585 }, { "epoch": 0.05749103783531814, "grad_norm": 1.2546558380126953, "learning_rate": 1.9837333269002452e-05, "loss": 0.5062, "step": 2590 }, { "epoch": 0.05760202439484578, "grad_norm": 1.8262821435928345, "learning_rate": 1.9836706322441806e-05, "loss": 0.7465, "step": 2595 }, { "epoch": 0.05771301095437342, "grad_norm": 1.107417345046997, "learning_rate": 1.9836078179967394e-05, "loss": 0.5504, "step": 2600 }, { "epoch": 0.05782399751390107, "grad_norm": 1.3223316669464111, "learning_rate": 1.983544884165559e-05, "loss": 0.6295, "step": 2605 }, { "epoch": 0.05793498407342871, "grad_norm": 1.1613926887512207, "learning_rate": 1.9834818307582896e-05, "loss": 0.5908, "step": 2610 }, { "epoch": 0.05804597063295635, "grad_norm": 1.026995301246643, "learning_rate": 1.9834186577825977e-05, "loss": 0.5483, "step": 2615 }, { "epoch": 0.05815695719248399, "grad_norm": 1.147704005241394, "learning_rate": 1.9833553652461636e-05, "loss": 0.5194, "step": 2620 }, { "epoch": 0.05826794375201163, "grad_norm": 1.3523058891296387, "learning_rate": 1.9832919531566822e-05, "loss": 0.6718, "step": 2625 }, { "epoch": 0.058378930311539276, "grad_norm": 1.3443208932876587, "learning_rate": 1.9832284215218623e-05, "loss": 0.7098, "step": 2630 }, { "epoch": 0.058489916871066916, "grad_norm": 1.56340491771698, "learning_rate": 1.9831647703494287e-05, "loss": 0.6716, "step": 2635 }, { "epoch": 0.058600903430594556, "grad_norm": 1.7553986310958862, "learning_rate": 1.9831009996471197e-05, "loss": 0.5632, "step": 2640 }, { "epoch": 0.058711889990122196, "grad_norm": 1.7398900985717773, "learning_rate": 1.9830371094226882e-05, "loss": 0.5898, "step": 2645 }, { "epoch": 0.058822876549649836, "grad_norm": 1.50764799118042, "learning_rate": 1.982973099683902e-05, "loss": 0.6524, "step": 2650 }, { "epoch": 0.058933863109177476, "grad_norm": 1.5842028856277466, "learning_rate": 1.9829089704385426e-05, "loss": 0.5745, "step": 2655 }, { "epoch": 0.05904484966870512, "grad_norm": 1.5821632146835327, "learning_rate": 1.982844721694407e-05, "loss": 0.7652, "step": 2660 }, { "epoch": 0.05915583622823276, "grad_norm": 2.011687755584717, "learning_rate": 1.982780353459307e-05, "loss": 0.5884, "step": 2665 }, { "epoch": 0.0592668227877604, "grad_norm": 1.7357864379882812, "learning_rate": 1.9827158657410667e-05, "loss": 0.6314, "step": 2670 }, { "epoch": 0.05937780934728804, "grad_norm": 1.1531578302383423, "learning_rate": 1.982651258547528e-05, "loss": 0.5567, "step": 2675 }, { "epoch": 0.05948879590681568, "grad_norm": 1.2788385152816772, "learning_rate": 1.982586531886544e-05, "loss": 0.4528, "step": 2680 }, { "epoch": 0.05959978246634333, "grad_norm": 1.8556158542633057, "learning_rate": 1.9825216857659855e-05, "loss": 0.5766, "step": 2685 }, { "epoch": 0.05971076902587097, "grad_norm": 1.9483803510665894, "learning_rate": 1.9824567201937354e-05, "loss": 0.584, "step": 2690 }, { "epoch": 0.05982175558539861, "grad_norm": 1.6182191371917725, "learning_rate": 1.9823916351776922e-05, "loss": 0.6361, "step": 2695 }, { "epoch": 0.05993274214492625, "grad_norm": 1.289291501045227, "learning_rate": 1.9823264307257683e-05, "loss": 0.4522, "step": 2700 }, { "epoch": 0.06004372870445389, "grad_norm": 1.3541606664657593, "learning_rate": 1.9822611068458916e-05, "loss": 0.7097, "step": 2705 }, { "epoch": 0.06015471526398153, "grad_norm": 2.0710556507110596, "learning_rate": 1.982195663546004e-05, "loss": 0.5523, "step": 2710 }, { "epoch": 0.060265701823509175, "grad_norm": 1.5621989965438843, "learning_rate": 1.9821301008340614e-05, "loss": 0.5443, "step": 2715 }, { "epoch": 0.060376688383036815, "grad_norm": 1.361464500427246, "learning_rate": 1.9820644187180354e-05, "loss": 0.7128, "step": 2720 }, { "epoch": 0.060487674942564455, "grad_norm": 1.134830355644226, "learning_rate": 1.9819986172059105e-05, "loss": 0.6254, "step": 2725 }, { "epoch": 0.060598661502092095, "grad_norm": 1.1800482273101807, "learning_rate": 1.981932696305687e-05, "loss": 0.5438, "step": 2730 }, { "epoch": 0.060709648061619735, "grad_norm": 1.2705154418945312, "learning_rate": 1.98186665602538e-05, "loss": 0.5399, "step": 2735 }, { "epoch": 0.06082063462114738, "grad_norm": 1.5200409889221191, "learning_rate": 1.9818004963730174e-05, "loss": 0.6409, "step": 2740 }, { "epoch": 0.06093162118067502, "grad_norm": 1.0456277132034302, "learning_rate": 1.9817342173566435e-05, "loss": 0.5863, "step": 2745 }, { "epoch": 0.06104260774020266, "grad_norm": 1.1642462015151978, "learning_rate": 1.9816678189843156e-05, "loss": 0.5681, "step": 2750 }, { "epoch": 0.0611535942997303, "grad_norm": 1.3238295316696167, "learning_rate": 1.9816013012641066e-05, "loss": 0.7381, "step": 2755 }, { "epoch": 0.06126458085925794, "grad_norm": 1.229519009590149, "learning_rate": 1.9815346642041032e-05, "loss": 0.8392, "step": 2760 }, { "epoch": 0.06137556741878559, "grad_norm": 1.1470119953155518, "learning_rate": 1.9814679078124076e-05, "loss": 0.6714, "step": 2765 }, { "epoch": 0.06148655397831323, "grad_norm": 1.3214852809906006, "learning_rate": 1.9814010320971353e-05, "loss": 0.5478, "step": 2770 }, { "epoch": 0.06159754053784087, "grad_norm": 1.2812148332595825, "learning_rate": 1.9813340370664167e-05, "loss": 0.6498, "step": 2775 }, { "epoch": 0.06170852709736851, "grad_norm": 1.038292646408081, "learning_rate": 1.981266922728397e-05, "loss": 0.4434, "step": 2780 }, { "epoch": 0.06181951365689615, "grad_norm": 0.957730233669281, "learning_rate": 1.981199689091236e-05, "loss": 0.5538, "step": 2785 }, { "epoch": 0.06193050021642379, "grad_norm": 1.2136549949645996, "learning_rate": 1.981132336163107e-05, "loss": 0.5467, "step": 2790 }, { "epoch": 0.062041486775951435, "grad_norm": 1.0682697296142578, "learning_rate": 1.9810648639521996e-05, "loss": 0.4941, "step": 2795 }, { "epoch": 0.062152473335479075, "grad_norm": 1.0867773294448853, "learning_rate": 1.9809972724667158e-05, "loss": 0.5495, "step": 2800 }, { "epoch": 0.062263459895006715, "grad_norm": 0.9544880986213684, "learning_rate": 1.980929561714874e-05, "loss": 0.5601, "step": 2805 }, { "epoch": 0.062374446454534355, "grad_norm": 3.496631145477295, "learning_rate": 1.9808617317049055e-05, "loss": 0.6168, "step": 2810 }, { "epoch": 0.062485433014061995, "grad_norm": 1.2551007270812988, "learning_rate": 1.9807937824450576e-05, "loss": 0.3769, "step": 2815 }, { "epoch": 0.06259641957358963, "grad_norm": 1.3092126846313477, "learning_rate": 1.9807257139435906e-05, "loss": 0.6537, "step": 2820 }, { "epoch": 0.06270740613311727, "grad_norm": 1.0793254375457764, "learning_rate": 1.9806575262087806e-05, "loss": 0.686, "step": 2825 }, { "epoch": 0.06281839269264491, "grad_norm": 1.1246548891067505, "learning_rate": 1.9805892192489177e-05, "loss": 0.5737, "step": 2830 }, { "epoch": 0.06292937925217257, "grad_norm": 1.2988542318344116, "learning_rate": 1.9805207930723056e-05, "loss": 0.6251, "step": 2835 }, { "epoch": 0.06304036581170021, "grad_norm": 1.405332326889038, "learning_rate": 1.9804522476872644e-05, "loss": 0.5313, "step": 2840 }, { "epoch": 0.06315135237122785, "grad_norm": 1.222214937210083, "learning_rate": 1.9803835831021264e-05, "loss": 0.4689, "step": 2845 }, { "epoch": 0.06326233893075549, "grad_norm": 1.2090331315994263, "learning_rate": 1.980314799325241e-05, "loss": 0.6677, "step": 2850 }, { "epoch": 0.06337332549028313, "grad_norm": 1.2813634872436523, "learning_rate": 1.9802458963649696e-05, "loss": 0.4276, "step": 2855 }, { "epoch": 0.06348431204981077, "grad_norm": 1.3058404922485352, "learning_rate": 1.98017687422969e-05, "loss": 0.5755, "step": 2860 }, { "epoch": 0.06359529860933841, "grad_norm": 1.4632521867752075, "learning_rate": 1.9801077329277932e-05, "loss": 0.4694, "step": 2865 }, { "epoch": 0.06370628516886605, "grad_norm": 1.2117215394973755, "learning_rate": 1.980038472467685e-05, "loss": 0.6069, "step": 2870 }, { "epoch": 0.06381727172839369, "grad_norm": 0.6046699285507202, "learning_rate": 1.9799690928577865e-05, "loss": 0.4746, "step": 2875 }, { "epoch": 0.06392825828792133, "grad_norm": 1.6112840175628662, "learning_rate": 1.9798995941065318e-05, "loss": 0.4418, "step": 2880 }, { "epoch": 0.06403924484744897, "grad_norm": 1.112791895866394, "learning_rate": 1.9798299762223713e-05, "loss": 0.7471, "step": 2885 }, { "epoch": 0.06415023140697662, "grad_norm": 2.807286262512207, "learning_rate": 1.9797602392137678e-05, "loss": 0.4693, "step": 2890 }, { "epoch": 0.06426121796650426, "grad_norm": 1.076808214187622, "learning_rate": 1.9796903830892008e-05, "loss": 0.479, "step": 2895 }, { "epoch": 0.0643722045260319, "grad_norm": 0.9873941540718079, "learning_rate": 1.9796204078571623e-05, "loss": 0.6405, "step": 2900 }, { "epoch": 0.06448319108555954, "grad_norm": 1.0882630348205566, "learning_rate": 1.97955031352616e-05, "loss": 0.5057, "step": 2905 }, { "epoch": 0.06459417764508718, "grad_norm": 1.2589354515075684, "learning_rate": 1.9794801001047158e-05, "loss": 0.5496, "step": 2910 }, { "epoch": 0.06470516420461482, "grad_norm": 1.3534345626831055, "learning_rate": 1.979409767601366e-05, "loss": 0.6252, "step": 2915 }, { "epoch": 0.06481615076414246, "grad_norm": 1.2870107889175415, "learning_rate": 1.9793393160246613e-05, "loss": 0.572, "step": 2920 }, { "epoch": 0.0649271373236701, "grad_norm": 1.279741883277893, "learning_rate": 1.9792687453831673e-05, "loss": 0.5784, "step": 2925 }, { "epoch": 0.06503812388319774, "grad_norm": 1.2085089683532715, "learning_rate": 1.979198055685463e-05, "loss": 0.5717, "step": 2930 }, { "epoch": 0.06514911044272538, "grad_norm": 1.5447397232055664, "learning_rate": 1.9791272469401432e-05, "loss": 0.6508, "step": 2935 }, { "epoch": 0.06526009700225302, "grad_norm": 1.1712005138397217, "learning_rate": 1.9790563191558167e-05, "loss": 0.5406, "step": 2940 }, { "epoch": 0.06537108356178067, "grad_norm": 1.8229413032531738, "learning_rate": 1.978985272341106e-05, "loss": 0.5619, "step": 2945 }, { "epoch": 0.06548207012130831, "grad_norm": 3.0190622806549072, "learning_rate": 1.9789141065046495e-05, "loss": 0.5988, "step": 2950 }, { "epoch": 0.06559305668083595, "grad_norm": 1.4421160221099854, "learning_rate": 1.9788428216550988e-05, "loss": 0.6442, "step": 2955 }, { "epoch": 0.0657040432403636, "grad_norm": 1.2881371974945068, "learning_rate": 1.9787714178011206e-05, "loss": 0.5356, "step": 2960 }, { "epoch": 0.06581502979989123, "grad_norm": 1.5065562725067139, "learning_rate": 1.978699894951396e-05, "loss": 0.5315, "step": 2965 }, { "epoch": 0.06592601635941887, "grad_norm": 1.547042965888977, "learning_rate": 1.9786282531146207e-05, "loss": 0.5213, "step": 2970 }, { "epoch": 0.06603700291894651, "grad_norm": 1.183430790901184, "learning_rate": 1.9785564922995042e-05, "loss": 0.4528, "step": 2975 }, { "epoch": 0.06614798947847415, "grad_norm": 0.9143704771995544, "learning_rate": 1.9784846125147712e-05, "loss": 0.6407, "step": 2980 }, { "epoch": 0.0662589760380018, "grad_norm": 0.9499570727348328, "learning_rate": 1.9784126137691606e-05, "loss": 0.2765, "step": 2985 }, { "epoch": 0.06636996259752943, "grad_norm": 1.2335505485534668, "learning_rate": 1.9783404960714258e-05, "loss": 0.6352, "step": 2990 }, { "epoch": 0.06648094915705709, "grad_norm": 1.1898120641708374, "learning_rate": 1.9782682594303348e-05, "loss": 0.4826, "step": 2995 }, { "epoch": 0.06659193571658473, "grad_norm": 1.3998817205429077, "learning_rate": 1.9781959038546693e-05, "loss": 0.5894, "step": 3000 }, { "epoch": 0.06670292227611237, "grad_norm": 1.0510846376419067, "learning_rate": 1.9781234293532264e-05, "loss": 0.6697, "step": 3005 }, { "epoch": 0.06681390883564, "grad_norm": 1.3648271560668945, "learning_rate": 1.9780508359348175e-05, "loss": 0.6685, "step": 3010 }, { "epoch": 0.06692489539516765, "grad_norm": 1.1597286462783813, "learning_rate": 1.9779781236082683e-05, "loss": 0.589, "step": 3015 }, { "epoch": 0.06703588195469529, "grad_norm": 1.1021475791931152, "learning_rate": 1.9779052923824186e-05, "loss": 0.6973, "step": 3020 }, { "epoch": 0.06714686851422293, "grad_norm": 1.321478247642517, "learning_rate": 1.977832342266123e-05, "loss": 0.6676, "step": 3025 }, { "epoch": 0.06725785507375057, "grad_norm": 1.3852922916412354, "learning_rate": 1.9777592732682507e-05, "loss": 0.579, "step": 3030 }, { "epoch": 0.0673688416332782, "grad_norm": 1.547518253326416, "learning_rate": 1.977686085397685e-05, "loss": 0.5779, "step": 3035 }, { "epoch": 0.06747982819280585, "grad_norm": 0.9579359889030457, "learning_rate": 1.977612778663324e-05, "loss": 0.5756, "step": 3040 }, { "epoch": 0.06759081475233349, "grad_norm": 1.5292962789535522, "learning_rate": 1.9775393530740797e-05, "loss": 0.6468, "step": 3045 }, { "epoch": 0.06770180131186114, "grad_norm": 1.042737364768982, "learning_rate": 1.97746580863888e-05, "loss": 0.5905, "step": 3050 }, { "epoch": 0.06781278787138878, "grad_norm": 1.162990927696228, "learning_rate": 1.9773921453666647e-05, "loss": 0.5959, "step": 3055 }, { "epoch": 0.06792377443091642, "grad_norm": 1.2333893775939941, "learning_rate": 1.9773183632663907e-05, "loss": 0.6628, "step": 3060 }, { "epoch": 0.06803476099044406, "grad_norm": 0.8473436236381531, "learning_rate": 1.9772444623470277e-05, "loss": 0.5039, "step": 3065 }, { "epoch": 0.0681457475499717, "grad_norm": 1.0550405979156494, "learning_rate": 1.9771704426175605e-05, "loss": 0.5449, "step": 3070 }, { "epoch": 0.06825673410949934, "grad_norm": 1.0942840576171875, "learning_rate": 1.9770963040869878e-05, "loss": 0.5346, "step": 3075 }, { "epoch": 0.06836772066902698, "grad_norm": 1.232731819152832, "learning_rate": 1.9770220467643235e-05, "loss": 0.47, "step": 3080 }, { "epoch": 0.06847870722855462, "grad_norm": 1.2322814464569092, "learning_rate": 1.9769476706585956e-05, "loss": 0.4938, "step": 3085 }, { "epoch": 0.06858969378808226, "grad_norm": 1.3374191522598267, "learning_rate": 1.9768731757788462e-05, "loss": 0.755, "step": 3090 }, { "epoch": 0.0687006803476099, "grad_norm": 0.9323263168334961, "learning_rate": 1.976798562134132e-05, "loss": 0.5455, "step": 3095 }, { "epoch": 0.06881166690713754, "grad_norm": 1.150692105293274, "learning_rate": 1.976723829733525e-05, "loss": 0.5345, "step": 3100 }, { "epoch": 0.06892265346666519, "grad_norm": 1.3430273532867432, "learning_rate": 1.97664897858611e-05, "loss": 0.6931, "step": 3105 }, { "epoch": 0.06903364002619283, "grad_norm": 1.2106941938400269, "learning_rate": 1.976574008700988e-05, "loss": 0.4909, "step": 3110 }, { "epoch": 0.06914462658572047, "grad_norm": 1.5072520971298218, "learning_rate": 1.976498920087273e-05, "loss": 0.5841, "step": 3115 }, { "epoch": 0.06925561314524811, "grad_norm": 1.5390129089355469, "learning_rate": 1.9764237127540943e-05, "loss": 0.6413, "step": 3120 }, { "epoch": 0.06936659970477575, "grad_norm": 1.5383436679840088, "learning_rate": 1.976348386710595e-05, "loss": 0.5086, "step": 3125 }, { "epoch": 0.06947758626430339, "grad_norm": 1.0449554920196533, "learning_rate": 1.9762729419659335e-05, "loss": 0.4917, "step": 3130 }, { "epoch": 0.06958857282383103, "grad_norm": 1.008422613143921, "learning_rate": 1.9761973785292822e-05, "loss": 0.5789, "step": 3135 }, { "epoch": 0.06969955938335867, "grad_norm": 1.426741600036621, "learning_rate": 1.976121696409827e-05, "loss": 0.4162, "step": 3140 }, { "epoch": 0.06981054594288631, "grad_norm": 1.1669197082519531, "learning_rate": 1.9760458956167698e-05, "loss": 0.6258, "step": 3145 }, { "epoch": 0.06992153250241395, "grad_norm": 1.39824378490448, "learning_rate": 1.975969976159326e-05, "loss": 0.7105, "step": 3150 }, { "epoch": 0.07003251906194159, "grad_norm": 1.38515043258667, "learning_rate": 1.975893938046726e-05, "loss": 0.5529, "step": 3155 }, { "epoch": 0.07014350562146925, "grad_norm": 1.5153920650482178, "learning_rate": 1.9758177812882134e-05, "loss": 0.7217, "step": 3160 }, { "epoch": 0.07025449218099689, "grad_norm": 1.4021879434585571, "learning_rate": 1.9757415058930477e-05, "loss": 0.4648, "step": 3165 }, { "epoch": 0.07036547874052453, "grad_norm": 1.093927264213562, "learning_rate": 1.9756651118705023e-05, "loss": 0.3276, "step": 3170 }, { "epoch": 0.07047646530005217, "grad_norm": 1.226275086402893, "learning_rate": 1.9755885992298648e-05, "loss": 0.6317, "step": 3175 }, { "epoch": 0.0705874518595798, "grad_norm": 1.1503514051437378, "learning_rate": 1.975511967980437e-05, "loss": 0.5181, "step": 3180 }, { "epoch": 0.07069843841910745, "grad_norm": 1.0098612308502197, "learning_rate": 1.9754352181315358e-05, "loss": 0.542, "step": 3185 }, { "epoch": 0.07080942497863509, "grad_norm": 1.1693663597106934, "learning_rate": 1.975358349692492e-05, "loss": 0.6094, "step": 3190 }, { "epoch": 0.07092041153816273, "grad_norm": 1.1020994186401367, "learning_rate": 1.9752813626726512e-05, "loss": 0.6664, "step": 3195 }, { "epoch": 0.07103139809769037, "grad_norm": 0.7710314393043518, "learning_rate": 1.9752042570813733e-05, "loss": 0.4683, "step": 3200 }, { "epoch": 0.071142384657218, "grad_norm": 1.5390743017196655, "learning_rate": 1.9751270329280324e-05, "loss": 0.653, "step": 3205 }, { "epoch": 0.07125337121674566, "grad_norm": 1.0856455564498901, "learning_rate": 1.9750496902220172e-05, "loss": 0.6865, "step": 3210 }, { "epoch": 0.0713643577762733, "grad_norm": 1.365512490272522, "learning_rate": 1.9749722289727303e-05, "loss": 0.5476, "step": 3215 }, { "epoch": 0.07147534433580094, "grad_norm": 1.1265281438827515, "learning_rate": 1.97489464918959e-05, "loss": 0.5567, "step": 3220 }, { "epoch": 0.07158633089532858, "grad_norm": 2.017101287841797, "learning_rate": 1.974816950882028e-05, "loss": 0.4984, "step": 3225 }, { "epoch": 0.07169731745485622, "grad_norm": 1.2441028356552124, "learning_rate": 1.97473913405949e-05, "loss": 0.5496, "step": 3230 }, { "epoch": 0.07180830401438386, "grad_norm": 1.2447353601455688, "learning_rate": 1.9746611987314375e-05, "loss": 0.4673, "step": 3235 }, { "epoch": 0.0719192905739115, "grad_norm": 0.9409399628639221, "learning_rate": 1.9745831449073448e-05, "loss": 0.4932, "step": 3240 }, { "epoch": 0.07203027713343914, "grad_norm": 1.3958779573440552, "learning_rate": 1.974504972596702e-05, "loss": 0.6988, "step": 3245 }, { "epoch": 0.07214126369296678, "grad_norm": 1.4717780351638794, "learning_rate": 1.9744266818090127e-05, "loss": 0.6872, "step": 3250 }, { "epoch": 0.07225225025249442, "grad_norm": 1.0255435705184937, "learning_rate": 1.9743482725537956e-05, "loss": 0.4785, "step": 3255 }, { "epoch": 0.07236323681202206, "grad_norm": 1.3051496744155884, "learning_rate": 1.9742697448405834e-05, "loss": 0.5958, "step": 3260 }, { "epoch": 0.07247422337154971, "grad_norm": 1.1969037055969238, "learning_rate": 1.974191098678923e-05, "loss": 0.473, "step": 3265 }, { "epoch": 0.07258520993107735, "grad_norm": 1.1456329822540283, "learning_rate": 1.9741123340783756e-05, "loss": 0.5494, "step": 3270 }, { "epoch": 0.07269619649060499, "grad_norm": 1.8885293006896973, "learning_rate": 1.974033451048518e-05, "loss": 0.4941, "step": 3275 }, { "epoch": 0.07280718305013263, "grad_norm": 1.1382877826690674, "learning_rate": 1.97395444959894e-05, "loss": 0.5873, "step": 3280 }, { "epoch": 0.07291816960966027, "grad_norm": 0.9775564670562744, "learning_rate": 1.973875329739246e-05, "loss": 0.5017, "step": 3285 }, { "epoch": 0.07302915616918791, "grad_norm": 1.51008141040802, "learning_rate": 1.9737960914790562e-05, "loss": 0.5956, "step": 3290 }, { "epoch": 0.07314014272871555, "grad_norm": 1.453376293182373, "learning_rate": 1.973716734828003e-05, "loss": 0.7438, "step": 3295 }, { "epoch": 0.07325112928824319, "grad_norm": 1.4281340837478638, "learning_rate": 1.973637259795735e-05, "loss": 0.5661, "step": 3300 }, { "epoch": 0.07336211584777083, "grad_norm": 1.2644604444503784, "learning_rate": 1.9735576663919138e-05, "loss": 0.7236, "step": 3305 }, { "epoch": 0.07347310240729847, "grad_norm": 1.2021888494491577, "learning_rate": 1.973477954626217e-05, "loss": 0.5634, "step": 3310 }, { "epoch": 0.07358408896682611, "grad_norm": 1.2512317895889282, "learning_rate": 1.9733981245083355e-05, "loss": 0.4663, "step": 3315 }, { "epoch": 0.07369507552635376, "grad_norm": 1.073941946029663, "learning_rate": 1.973318176047974e-05, "loss": 0.6247, "step": 3320 }, { "epoch": 0.0738060620858814, "grad_norm": 1.1235560178756714, "learning_rate": 1.973238109254853e-05, "loss": 0.6401, "step": 3325 }, { "epoch": 0.07391704864540904, "grad_norm": 1.138898491859436, "learning_rate": 1.9731579241387068e-05, "loss": 0.5697, "step": 3330 }, { "epoch": 0.07402803520493668, "grad_norm": 1.557690143585205, "learning_rate": 1.9730776207092842e-05, "loss": 0.5626, "step": 3335 }, { "epoch": 0.07413902176446432, "grad_norm": 1.0606776475906372, "learning_rate": 1.9729971989763474e-05, "loss": 0.5082, "step": 3340 }, { "epoch": 0.07425000832399196, "grad_norm": 1.6427574157714844, "learning_rate": 1.9729166589496748e-05, "loss": 0.5541, "step": 3345 }, { "epoch": 0.0743609948835196, "grad_norm": 1.224596619606018, "learning_rate": 1.9728360006390575e-05, "loss": 0.4651, "step": 3350 }, { "epoch": 0.07447198144304724, "grad_norm": 1.2702159881591797, "learning_rate": 1.9727552240543018e-05, "loss": 0.5905, "step": 3355 }, { "epoch": 0.07458296800257488, "grad_norm": 0.9899827241897583, "learning_rate": 1.972674329205228e-05, "loss": 0.5382, "step": 3360 }, { "epoch": 0.07469395456210252, "grad_norm": 1.2235766649246216, "learning_rate": 1.972593316101672e-05, "loss": 0.53, "step": 3365 }, { "epoch": 0.07480494112163016, "grad_norm": 1.7147908210754395, "learning_rate": 1.972512184753482e-05, "loss": 0.4942, "step": 3370 }, { "epoch": 0.07491592768115782, "grad_norm": 1.137715458869934, "learning_rate": 1.9724309351705225e-05, "loss": 0.6969, "step": 3375 }, { "epoch": 0.07502691424068546, "grad_norm": 1.1159820556640625, "learning_rate": 1.972349567362671e-05, "loss": 0.7205, "step": 3380 }, { "epoch": 0.0751379008002131, "grad_norm": 1.1172508001327515, "learning_rate": 1.97226808133982e-05, "loss": 0.6243, "step": 3385 }, { "epoch": 0.07524888735974074, "grad_norm": 1.3358166217803955, "learning_rate": 1.9721864771118764e-05, "loss": 0.542, "step": 3390 }, { "epoch": 0.07535987391926838, "grad_norm": 1.0049517154693604, "learning_rate": 1.9721047546887617e-05, "loss": 0.6114, "step": 3395 }, { "epoch": 0.07547086047879602, "grad_norm": 1.2963844537734985, "learning_rate": 1.972022914080411e-05, "loss": 0.4937, "step": 3400 }, { "epoch": 0.07558184703832366, "grad_norm": 1.0634539127349854, "learning_rate": 1.9719409552967744e-05, "loss": 0.3353, "step": 3405 }, { "epoch": 0.0756928335978513, "grad_norm": 0.957071840763092, "learning_rate": 1.9718588783478156e-05, "loss": 0.7273, "step": 3410 }, { "epoch": 0.07580382015737894, "grad_norm": 1.2983900308609009, "learning_rate": 1.971776683243514e-05, "loss": 0.6289, "step": 3415 }, { "epoch": 0.07591480671690658, "grad_norm": 0.9925946593284607, "learning_rate": 1.9716943699938624e-05, "loss": 0.5156, "step": 3420 }, { "epoch": 0.07602579327643422, "grad_norm": 1.1367206573486328, "learning_rate": 1.971611938608868e-05, "loss": 0.5861, "step": 3425 }, { "epoch": 0.07613677983596187, "grad_norm": 1.0216517448425293, "learning_rate": 1.971529389098553e-05, "loss": 0.6049, "step": 3430 }, { "epoch": 0.07624776639548951, "grad_norm": 1.3960784673690796, "learning_rate": 1.971446721472953e-05, "loss": 0.7103, "step": 3435 }, { "epoch": 0.07635875295501715, "grad_norm": 1.090830683708191, "learning_rate": 1.9713639357421182e-05, "loss": 0.5972, "step": 3440 }, { "epoch": 0.07646973951454479, "grad_norm": 1.1981265544891357, "learning_rate": 1.971281031916114e-05, "loss": 0.5033, "step": 3445 }, { "epoch": 0.07658072607407243, "grad_norm": 1.483801007270813, "learning_rate": 1.9711980100050196e-05, "loss": 0.445, "step": 3450 }, { "epoch": 0.07669171263360007, "grad_norm": 1.3446348905563354, "learning_rate": 1.971114870018928e-05, "loss": 0.6291, "step": 3455 }, { "epoch": 0.07680269919312771, "grad_norm": 1.2881728410720825, "learning_rate": 1.9710316119679474e-05, "loss": 0.4737, "step": 3460 }, { "epoch": 0.07691368575265535, "grad_norm": 1.2318109273910522, "learning_rate": 1.9709482358622002e-05, "loss": 0.4223, "step": 3465 }, { "epoch": 0.07702467231218299, "grad_norm": 1.3753089904785156, "learning_rate": 1.9708647417118225e-05, "loss": 0.4553, "step": 3470 }, { "epoch": 0.07713565887171063, "grad_norm": 2.218982219696045, "learning_rate": 1.9707811295269656e-05, "loss": 0.6495, "step": 3475 }, { "epoch": 0.07724664543123828, "grad_norm": 1.3118571043014526, "learning_rate": 1.9706973993177948e-05, "loss": 0.507, "step": 3480 }, { "epoch": 0.07735763199076592, "grad_norm": 1.707163691520691, "learning_rate": 1.9706135510944894e-05, "loss": 0.6421, "step": 3485 }, { "epoch": 0.07746861855029356, "grad_norm": 1.357856035232544, "learning_rate": 1.9705295848672443e-05, "loss": 0.7232, "step": 3490 }, { "epoch": 0.0775796051098212, "grad_norm": 0.8372821807861328, "learning_rate": 1.9704455006462666e-05, "loss": 0.6043, "step": 3495 }, { "epoch": 0.07769059166934884, "grad_norm": 1.6511869430541992, "learning_rate": 1.9703612984417797e-05, "loss": 0.6053, "step": 3500 }, { "epoch": 0.07780157822887648, "grad_norm": 1.1607153415679932, "learning_rate": 1.9702769782640204e-05, "loss": 0.5912, "step": 3505 }, { "epoch": 0.07791256478840412, "grad_norm": 1.3973238468170166, "learning_rate": 1.9701925401232406e-05, "loss": 0.5424, "step": 3510 }, { "epoch": 0.07802355134793176, "grad_norm": 1.4439544677734375, "learning_rate": 1.970107984029705e-05, "loss": 0.7179, "step": 3515 }, { "epoch": 0.0781345379074594, "grad_norm": 1.0689724683761597, "learning_rate": 1.9700233099936944e-05, "loss": 0.6357, "step": 3520 }, { "epoch": 0.07824552446698704, "grad_norm": 1.298864483833313, "learning_rate": 1.9699385180255027e-05, "loss": 0.6049, "step": 3525 }, { "epoch": 0.07835651102651468, "grad_norm": 0.9402378797531128, "learning_rate": 1.969853608135439e-05, "loss": 0.5692, "step": 3530 }, { "epoch": 0.07846749758604234, "grad_norm": 1.4564235210418701, "learning_rate": 1.9697685803338267e-05, "loss": 0.5447, "step": 3535 }, { "epoch": 0.07857848414556998, "grad_norm": 1.2981603145599365, "learning_rate": 1.9696834346310024e-05, "loss": 0.5633, "step": 3540 }, { "epoch": 0.07868947070509762, "grad_norm": 0.8520748019218445, "learning_rate": 1.969598171037318e-05, "loss": 0.5538, "step": 3545 }, { "epoch": 0.07880045726462526, "grad_norm": 1.0657398700714111, "learning_rate": 1.9695127895631403e-05, "loss": 0.6826, "step": 3550 }, { "epoch": 0.0789114438241529, "grad_norm": 1.444388747215271, "learning_rate": 1.9694272902188486e-05, "loss": 0.5651, "step": 3555 }, { "epoch": 0.07902243038368054, "grad_norm": 1.5944229364395142, "learning_rate": 1.9693416730148388e-05, "loss": 0.5611, "step": 3560 }, { "epoch": 0.07913341694320818, "grad_norm": 1.327964425086975, "learning_rate": 1.969255937961519e-05, "loss": 0.6285, "step": 3565 }, { "epoch": 0.07924440350273582, "grad_norm": 1.199369192123413, "learning_rate": 1.9691700850693126e-05, "loss": 0.5577, "step": 3570 }, { "epoch": 0.07935539006226346, "grad_norm": 1.0459039211273193, "learning_rate": 1.9690841143486575e-05, "loss": 0.5764, "step": 3575 }, { "epoch": 0.0794663766217911, "grad_norm": 1.2481250762939453, "learning_rate": 1.9689980258100065e-05, "loss": 0.6886, "step": 3580 }, { "epoch": 0.07957736318131874, "grad_norm": 1.0888584852218628, "learning_rate": 1.9689118194638248e-05, "loss": 0.5258, "step": 3585 }, { "epoch": 0.07968834974084639, "grad_norm": 1.121347188949585, "learning_rate": 1.9688254953205935e-05, "loss": 0.531, "step": 3590 }, { "epoch": 0.07979933630037403, "grad_norm": 0.9125674962997437, "learning_rate": 1.968739053390808e-05, "loss": 0.5368, "step": 3595 }, { "epoch": 0.07991032285990167, "grad_norm": 1.4685077667236328, "learning_rate": 1.968652493684977e-05, "loss": 0.6108, "step": 3600 }, { "epoch": 0.08002130941942931, "grad_norm": 1.0957640409469604, "learning_rate": 1.9685658162136247e-05, "loss": 0.6653, "step": 3605 }, { "epoch": 0.08013229597895695, "grad_norm": 1.6297954320907593, "learning_rate": 1.9684790209872885e-05, "loss": 0.4481, "step": 3610 }, { "epoch": 0.08024328253848459, "grad_norm": 0.9564645886421204, "learning_rate": 1.9683921080165215e-05, "loss": 0.523, "step": 3615 }, { "epoch": 0.08035426909801223, "grad_norm": 0.9796757102012634, "learning_rate": 1.9683050773118892e-05, "loss": 0.7131, "step": 3620 }, { "epoch": 0.08046525565753987, "grad_norm": 0.8716163039207458, "learning_rate": 1.968217928883973e-05, "loss": 0.463, "step": 3625 }, { "epoch": 0.08057624221706751, "grad_norm": 1.0294742584228516, "learning_rate": 1.9681306627433683e-05, "loss": 0.6369, "step": 3630 }, { "epoch": 0.08068722877659515, "grad_norm": 1.519837498664856, "learning_rate": 1.9680432789006846e-05, "loss": 0.5785, "step": 3635 }, { "epoch": 0.08079821533612279, "grad_norm": 0.9069929718971252, "learning_rate": 1.9679557773665457e-05, "loss": 0.6427, "step": 3640 }, { "epoch": 0.08090920189565044, "grad_norm": 1.3364211320877075, "learning_rate": 1.9678681581515894e-05, "loss": 0.5294, "step": 3645 }, { "epoch": 0.08102018845517808, "grad_norm": 1.2609714269638062, "learning_rate": 1.9677804212664685e-05, "loss": 0.5018, "step": 3650 }, { "epoch": 0.08113117501470572, "grad_norm": 1.5361000299453735, "learning_rate": 1.9676925667218498e-05, "loss": 0.5415, "step": 3655 }, { "epoch": 0.08124216157423336, "grad_norm": 1.722002625465393, "learning_rate": 1.967604594528414e-05, "loss": 0.5294, "step": 3660 }, { "epoch": 0.081353148133761, "grad_norm": 1.276304006576538, "learning_rate": 1.967516504696857e-05, "loss": 0.6848, "step": 3665 }, { "epoch": 0.08146413469328864, "grad_norm": 1.476733922958374, "learning_rate": 1.9674282972378878e-05, "loss": 0.388, "step": 3670 }, { "epoch": 0.08157512125281628, "grad_norm": 1.4457347393035889, "learning_rate": 1.9673399721622307e-05, "loss": 0.4752, "step": 3675 }, { "epoch": 0.08168610781234392, "grad_norm": 1.381885290145874, "learning_rate": 1.9672515294806242e-05, "loss": 0.4878, "step": 3680 }, { "epoch": 0.08179709437187156, "grad_norm": 1.1364555358886719, "learning_rate": 1.9671629692038206e-05, "loss": 0.4585, "step": 3685 }, { "epoch": 0.0819080809313992, "grad_norm": 1.1215564012527466, "learning_rate": 1.967074291342587e-05, "loss": 0.5158, "step": 3690 }, { "epoch": 0.08201906749092684, "grad_norm": 1.0232094526290894, "learning_rate": 1.9669854959077043e-05, "loss": 0.6409, "step": 3695 }, { "epoch": 0.0821300540504545, "grad_norm": 1.3816514015197754, "learning_rate": 1.966896582909968e-05, "loss": 0.5302, "step": 3700 }, { "epoch": 0.08224104060998214, "grad_norm": 1.197551965713501, "learning_rate": 1.9668075523601877e-05, "loss": 0.5766, "step": 3705 }, { "epoch": 0.08235202716950978, "grad_norm": 1.2883515357971191, "learning_rate": 1.9667184042691877e-05, "loss": 0.4869, "step": 3710 }, { "epoch": 0.08246301372903742, "grad_norm": 1.4811487197875977, "learning_rate": 1.966629138647806e-05, "loss": 0.5379, "step": 3715 }, { "epoch": 0.08257400028856506, "grad_norm": 1.2199665307998657, "learning_rate": 1.966539755506896e-05, "loss": 0.5752, "step": 3720 }, { "epoch": 0.0826849868480927, "grad_norm": 1.278082251548767, "learning_rate": 1.9664502548573234e-05, "loss": 0.3786, "step": 3725 }, { "epoch": 0.08279597340762034, "grad_norm": 1.4458006620407104, "learning_rate": 1.9663606367099704e-05, "loss": 0.4773, "step": 3730 }, { "epoch": 0.08290695996714798, "grad_norm": 1.4444694519042969, "learning_rate": 1.966270901075732e-05, "loss": 0.6588, "step": 3735 }, { "epoch": 0.08301794652667562, "grad_norm": 0.9793612360954285, "learning_rate": 1.9661810479655184e-05, "loss": 0.5976, "step": 3740 }, { "epoch": 0.08312893308620325, "grad_norm": 1.261999249458313, "learning_rate": 1.9660910773902532e-05, "loss": 0.6603, "step": 3745 }, { "epoch": 0.08323991964573091, "grad_norm": 1.2012444734573364, "learning_rate": 1.9660009893608744e-05, "loss": 0.7029, "step": 3750 }, { "epoch": 0.08335090620525855, "grad_norm": 1.2707288265228271, "learning_rate": 1.9659107838883354e-05, "loss": 0.5535, "step": 3755 }, { "epoch": 0.08346189276478619, "grad_norm": 1.252837061882019, "learning_rate": 1.9658204609836026e-05, "loss": 0.6405, "step": 3760 }, { "epoch": 0.08357287932431383, "grad_norm": 1.2707806825637817, "learning_rate": 1.9657300206576573e-05, "loss": 0.5725, "step": 3765 }, { "epoch": 0.08368386588384147, "grad_norm": 1.62176513671875, "learning_rate": 1.965639462921495e-05, "loss": 0.5944, "step": 3770 }, { "epoch": 0.08379485244336911, "grad_norm": 1.136454701423645, "learning_rate": 1.965548787786125e-05, "loss": 0.6375, "step": 3775 }, { "epoch": 0.08390583900289675, "grad_norm": 1.5717942714691162, "learning_rate": 1.965457995262572e-05, "loss": 0.5957, "step": 3780 }, { "epoch": 0.08401682556242439, "grad_norm": 0.764004111289978, "learning_rate": 1.9653670853618733e-05, "loss": 0.4561, "step": 3785 }, { "epoch": 0.08412781212195203, "grad_norm": 2.068720579147339, "learning_rate": 1.9652760580950825e-05, "loss": 0.4438, "step": 3790 }, { "epoch": 0.08423879868147967, "grad_norm": 0.9934617877006531, "learning_rate": 1.9651849134732653e-05, "loss": 0.6083, "step": 3795 }, { "epoch": 0.08434978524100731, "grad_norm": 1.0511016845703125, "learning_rate": 1.965093651507504e-05, "loss": 0.4666, "step": 3800 }, { "epoch": 0.08446077180053496, "grad_norm": 1.0432274341583252, "learning_rate": 1.9650022722088927e-05, "loss": 0.5368, "step": 3805 }, { "epoch": 0.0845717583600626, "grad_norm": 1.2682653665542603, "learning_rate": 1.9649107755885416e-05, "loss": 0.4956, "step": 3810 }, { "epoch": 0.08468274491959024, "grad_norm": 0.9422754645347595, "learning_rate": 1.9648191616575745e-05, "loss": 0.5083, "step": 3815 }, { "epoch": 0.08479373147911788, "grad_norm": 1.4312782287597656, "learning_rate": 1.9647274304271297e-05, "loss": 0.4671, "step": 3820 }, { "epoch": 0.08490471803864552, "grad_norm": 0.8657413125038147, "learning_rate": 1.964635581908359e-05, "loss": 0.4275, "step": 3825 }, { "epoch": 0.08501570459817316, "grad_norm": 1.6890164613723755, "learning_rate": 1.9645436161124293e-05, "loss": 0.6362, "step": 3830 }, { "epoch": 0.0851266911577008, "grad_norm": 0.9134665131568909, "learning_rate": 1.9644515330505218e-05, "loss": 0.6467, "step": 3835 }, { "epoch": 0.08523767771722844, "grad_norm": 1.2863361835479736, "learning_rate": 1.9643593327338312e-05, "loss": 0.4728, "step": 3840 }, { "epoch": 0.08534866427675608, "grad_norm": 1.3430936336517334, "learning_rate": 1.9642670151735675e-05, "loss": 0.7618, "step": 3845 }, { "epoch": 0.08545965083628372, "grad_norm": 1.1958832740783691, "learning_rate": 1.964174580380954e-05, "loss": 0.612, "step": 3850 }, { "epoch": 0.08557063739581136, "grad_norm": 1.1422450542449951, "learning_rate": 1.9640820283672284e-05, "loss": 0.5366, "step": 3855 }, { "epoch": 0.08568162395533901, "grad_norm": 1.3729885816574097, "learning_rate": 1.963989359143643e-05, "loss": 0.6063, "step": 3860 }, { "epoch": 0.08579261051486665, "grad_norm": 0.8676313161849976, "learning_rate": 1.9638965727214645e-05, "loss": 0.4358, "step": 3865 }, { "epoch": 0.0859035970743943, "grad_norm": 1.2360765933990479, "learning_rate": 1.9638036691119734e-05, "loss": 0.4915, "step": 3870 }, { "epoch": 0.08601458363392193, "grad_norm": 0.9501989483833313, "learning_rate": 1.963710648326464e-05, "loss": 0.5179, "step": 3875 }, { "epoch": 0.08612557019344957, "grad_norm": 1.4758225679397583, "learning_rate": 1.9636175103762466e-05, "loss": 0.5411, "step": 3880 }, { "epoch": 0.08623655675297721, "grad_norm": 1.0443713665008545, "learning_rate": 1.963524255272644e-05, "loss": 0.487, "step": 3885 }, { "epoch": 0.08634754331250485, "grad_norm": 1.2120336294174194, "learning_rate": 1.9634308830269936e-05, "loss": 0.6933, "step": 3890 }, { "epoch": 0.0864585298720325, "grad_norm": 1.120863676071167, "learning_rate": 1.9633373936506478e-05, "loss": 0.5542, "step": 3895 }, { "epoch": 0.08656951643156013, "grad_norm": 1.210645079612732, "learning_rate": 1.963243787154972e-05, "loss": 0.612, "step": 3900 }, { "epoch": 0.08668050299108777, "grad_norm": 1.8348194360733032, "learning_rate": 1.9631500635513475e-05, "loss": 0.4468, "step": 3905 }, { "epoch": 0.08679148955061541, "grad_norm": 1.100720763206482, "learning_rate": 1.9630562228511682e-05, "loss": 0.5213, "step": 3910 }, { "epoch": 0.08690247611014307, "grad_norm": 1.0100977420806885, "learning_rate": 1.9629622650658434e-05, "loss": 0.539, "step": 3915 }, { "epoch": 0.08701346266967071, "grad_norm": 1.1190155744552612, "learning_rate": 1.9628681902067956e-05, "loss": 0.4504, "step": 3920 }, { "epoch": 0.08712444922919835, "grad_norm": 0.9896363019943237, "learning_rate": 1.9627739982854625e-05, "loss": 0.5673, "step": 3925 }, { "epoch": 0.08723543578872599, "grad_norm": 0.9710890054702759, "learning_rate": 1.9626796893132955e-05, "loss": 0.4689, "step": 3930 }, { "epoch": 0.08734642234825363, "grad_norm": 1.296876311302185, "learning_rate": 1.9625852633017608e-05, "loss": 0.6207, "step": 3935 }, { "epoch": 0.08745740890778127, "grad_norm": 1.5721899271011353, "learning_rate": 1.9624907202623378e-05, "loss": 0.6262, "step": 3940 }, { "epoch": 0.08756839546730891, "grad_norm": 1.4297751188278198, "learning_rate": 1.9623960602065213e-05, "loss": 0.4816, "step": 3945 }, { "epoch": 0.08767938202683655, "grad_norm": 1.1989187002182007, "learning_rate": 1.962301283145819e-05, "loss": 0.6097, "step": 3950 }, { "epoch": 0.08779036858636419, "grad_norm": 1.1087952852249146, "learning_rate": 1.9622063890917543e-05, "loss": 0.5414, "step": 3955 }, { "epoch": 0.08790135514589183, "grad_norm": 1.1103991270065308, "learning_rate": 1.9621113780558635e-05, "loss": 0.6886, "step": 3960 }, { "epoch": 0.08801234170541947, "grad_norm": 1.356634259223938, "learning_rate": 1.962016250049698e-05, "loss": 0.5059, "step": 3965 }, { "epoch": 0.08812332826494712, "grad_norm": 1.3991785049438477, "learning_rate": 1.9619210050848236e-05, "loss": 0.5072, "step": 3970 }, { "epoch": 0.08823431482447476, "grad_norm": 1.5779005289077759, "learning_rate": 1.961825643172819e-05, "loss": 0.7137, "step": 3975 }, { "epoch": 0.0883453013840024, "grad_norm": 1.3491464853286743, "learning_rate": 1.9617301643252787e-05, "loss": 0.8226, "step": 3980 }, { "epoch": 0.08845628794353004, "grad_norm": 1.3575505018234253, "learning_rate": 1.9616345685538107e-05, "loss": 0.5308, "step": 3985 }, { "epoch": 0.08856727450305768, "grad_norm": 1.6040680408477783, "learning_rate": 1.9615388558700363e-05, "loss": 0.414, "step": 3990 }, { "epoch": 0.08867826106258532, "grad_norm": 1.4553582668304443, "learning_rate": 1.961443026285593e-05, "loss": 0.7059, "step": 3995 }, { "epoch": 0.08878924762211296, "grad_norm": 1.59603750705719, "learning_rate": 1.9613470798121307e-05, "loss": 0.6761, "step": 4000 }, { "epoch": 0.0889002341816406, "grad_norm": 1.192799687385559, "learning_rate": 1.9612510164613145e-05, "loss": 0.5735, "step": 4005 }, { "epoch": 0.08901122074116824, "grad_norm": 1.0729697942733765, "learning_rate": 1.961154836244824e-05, "loss": 0.6648, "step": 4010 }, { "epoch": 0.08912220730069588, "grad_norm": 1.526694893836975, "learning_rate": 1.9610585391743516e-05, "loss": 0.6473, "step": 4015 }, { "epoch": 0.08923319386022353, "grad_norm": 1.1947791576385498, "learning_rate": 1.960962125261605e-05, "loss": 0.6093, "step": 4020 }, { "epoch": 0.08934418041975117, "grad_norm": 1.1378854513168335, "learning_rate": 1.960865594518306e-05, "loss": 0.6483, "step": 4025 }, { "epoch": 0.08945516697927881, "grad_norm": 1.3792065382003784, "learning_rate": 1.9607689469561908e-05, "loss": 0.8147, "step": 4030 }, { "epoch": 0.08956615353880645, "grad_norm": 1.592315673828125, "learning_rate": 1.960672182587009e-05, "loss": 0.6142, "step": 4035 }, { "epoch": 0.0896771400983341, "grad_norm": 1.6274422407150269, "learning_rate": 1.9605753014225254e-05, "loss": 0.8193, "step": 4040 }, { "epoch": 0.08978812665786173, "grad_norm": 1.5865644216537476, "learning_rate": 1.9604783034745182e-05, "loss": 0.6778, "step": 4045 }, { "epoch": 0.08989911321738937, "grad_norm": 1.2761083841323853, "learning_rate": 1.9603811887547797e-05, "loss": 0.6685, "step": 4050 }, { "epoch": 0.09001009977691701, "grad_norm": 1.147282600402832, "learning_rate": 1.960283957275117e-05, "loss": 0.5395, "step": 4055 }, { "epoch": 0.09012108633644465, "grad_norm": 1.1976675987243652, "learning_rate": 1.9601866090473517e-05, "loss": 0.5897, "step": 4060 }, { "epoch": 0.09023207289597229, "grad_norm": 1.3472319841384888, "learning_rate": 1.9600891440833187e-05, "loss": 0.5855, "step": 4065 }, { "epoch": 0.09034305945549993, "grad_norm": 0.9857687950134277, "learning_rate": 1.9599915623948673e-05, "loss": 0.6031, "step": 4070 }, { "epoch": 0.09045404601502759, "grad_norm": 1.6844218969345093, "learning_rate": 1.9598938639938613e-05, "loss": 0.6718, "step": 4075 }, { "epoch": 0.09056503257455523, "grad_norm": 0.9948323965072632, "learning_rate": 1.9597960488921785e-05, "loss": 0.4619, "step": 4080 }, { "epoch": 0.09067601913408287, "grad_norm": 0.9627229571342468, "learning_rate": 1.959698117101711e-05, "loss": 0.43, "step": 4085 }, { "epoch": 0.0907870056936105, "grad_norm": 0.9202539324760437, "learning_rate": 1.959600068634365e-05, "loss": 0.5921, "step": 4090 }, { "epoch": 0.09089799225313815, "grad_norm": 1.9609408378601074, "learning_rate": 1.959501903502061e-05, "loss": 0.6661, "step": 4095 }, { "epoch": 0.09100897881266579, "grad_norm": 1.3210110664367676, "learning_rate": 1.9594036217167336e-05, "loss": 0.5077, "step": 4100 }, { "epoch": 0.09111996537219343, "grad_norm": 1.2981141805648804, "learning_rate": 1.9593052232903312e-05, "loss": 0.5604, "step": 4105 }, { "epoch": 0.09123095193172107, "grad_norm": 0.9609360098838806, "learning_rate": 1.9592067082348172e-05, "loss": 0.5489, "step": 4110 }, { "epoch": 0.0913419384912487, "grad_norm": 1.3565384149551392, "learning_rate": 1.9591080765621685e-05, "loss": 0.6582, "step": 4115 }, { "epoch": 0.09145292505077635, "grad_norm": 1.7355798482894897, "learning_rate": 1.9590093282843764e-05, "loss": 0.5174, "step": 4120 }, { "epoch": 0.09156391161030399, "grad_norm": 1.0244948863983154, "learning_rate": 1.9589104634134465e-05, "loss": 0.5201, "step": 4125 }, { "epoch": 0.09167489816983164, "grad_norm": 1.2683207988739014, "learning_rate": 1.9588114819613984e-05, "loss": 0.6843, "step": 4130 }, { "epoch": 0.09178588472935928, "grad_norm": 1.62088942527771, "learning_rate": 1.9587123839402658e-05, "loss": 0.7301, "step": 4135 }, { "epoch": 0.09189687128888692, "grad_norm": 1.758149266242981, "learning_rate": 1.9586131693620973e-05, "loss": 0.5188, "step": 4140 }, { "epoch": 0.09200785784841456, "grad_norm": 1.5698381662368774, "learning_rate": 1.9585138382389538e-05, "loss": 0.5827, "step": 4145 }, { "epoch": 0.0921188444079422, "grad_norm": 1.6577712297439575, "learning_rate": 1.9584143905829128e-05, "loss": 0.6137, "step": 4150 }, { "epoch": 0.09222983096746984, "grad_norm": 1.1802431344985962, "learning_rate": 1.9583148264060648e-05, "loss": 0.6676, "step": 4155 }, { "epoch": 0.09234081752699748, "grad_norm": 1.1364336013793945, "learning_rate": 1.9582151457205135e-05, "loss": 0.4683, "step": 4160 }, { "epoch": 0.09245180408652512, "grad_norm": 0.9396209716796875, "learning_rate": 1.958115348538379e-05, "loss": 0.5817, "step": 4165 }, { "epoch": 0.09256279064605276, "grad_norm": 1.372672438621521, "learning_rate": 1.9580154348717935e-05, "loss": 0.567, "step": 4170 }, { "epoch": 0.0926737772055804, "grad_norm": 1.1881585121154785, "learning_rate": 1.9579154047329045e-05, "loss": 0.5306, "step": 4175 }, { "epoch": 0.09278476376510804, "grad_norm": 1.2390387058258057, "learning_rate": 1.9578152581338726e-05, "loss": 0.5603, "step": 4180 }, { "epoch": 0.09289575032463569, "grad_norm": 1.2957234382629395, "learning_rate": 1.9577149950868744e-05, "loss": 0.4922, "step": 4185 }, { "epoch": 0.09300673688416333, "grad_norm": 1.6602461338043213, "learning_rate": 1.957614615604099e-05, "loss": 0.5974, "step": 4190 }, { "epoch": 0.09311772344369097, "grad_norm": 1.2627344131469727, "learning_rate": 1.9575141196977502e-05, "loss": 0.6923, "step": 4195 }, { "epoch": 0.09322871000321861, "grad_norm": 1.0888328552246094, "learning_rate": 1.957413507380046e-05, "loss": 0.614, "step": 4200 }, { "epoch": 0.09333969656274625, "grad_norm": 1.0514628887176514, "learning_rate": 1.9573127786632184e-05, "loss": 0.5061, "step": 4205 }, { "epoch": 0.09345068312227389, "grad_norm": 1.1055153608322144, "learning_rate": 1.9572119335595135e-05, "loss": 0.4432, "step": 4210 }, { "epoch": 0.09356166968180153, "grad_norm": 1.2091655731201172, "learning_rate": 1.9571109720811924e-05, "loss": 0.6272, "step": 4215 }, { "epoch": 0.09367265624132917, "grad_norm": 1.4013484716415405, "learning_rate": 1.957009894240529e-05, "loss": 0.5315, "step": 4220 }, { "epoch": 0.09378364280085681, "grad_norm": 1.4002299308776855, "learning_rate": 1.9569087000498123e-05, "loss": 0.5467, "step": 4225 }, { "epoch": 0.09389462936038445, "grad_norm": 1.728796124458313, "learning_rate": 1.956807389521345e-05, "loss": 0.6269, "step": 4230 }, { "epoch": 0.0940056159199121, "grad_norm": 1.7954356670379639, "learning_rate": 1.956705962667444e-05, "loss": 0.4745, "step": 4235 }, { "epoch": 0.09411660247943975, "grad_norm": 0.8677371740341187, "learning_rate": 1.956604419500441e-05, "loss": 0.6557, "step": 4240 }, { "epoch": 0.09422758903896739, "grad_norm": 1.9286059141159058, "learning_rate": 1.9565027600326806e-05, "loss": 0.5488, "step": 4245 }, { "epoch": 0.09433857559849503, "grad_norm": 1.1704398393630981, "learning_rate": 1.9564009842765225e-05, "loss": 0.4972, "step": 4250 }, { "epoch": 0.09444956215802267, "grad_norm": 1.3009753227233887, "learning_rate": 1.9562990922443404e-05, "loss": 0.5304, "step": 4255 }, { "epoch": 0.0945605487175503, "grad_norm": 0.9224164485931396, "learning_rate": 1.9561970839485218e-05, "loss": 0.5462, "step": 4260 }, { "epoch": 0.09467153527707795, "grad_norm": 1.1949145793914795, "learning_rate": 1.9560949594014684e-05, "loss": 0.5034, "step": 4265 }, { "epoch": 0.09478252183660558, "grad_norm": 1.560428261756897, "learning_rate": 1.9559927186155967e-05, "loss": 0.6407, "step": 4270 }, { "epoch": 0.09489350839613322, "grad_norm": 1.1759483814239502, "learning_rate": 1.955890361603336e-05, "loss": 0.5272, "step": 4275 }, { "epoch": 0.09500449495566086, "grad_norm": 1.8360085487365723, "learning_rate": 1.9557878883771312e-05, "loss": 0.5146, "step": 4280 }, { "epoch": 0.0951154815151885, "grad_norm": 1.4351441860198975, "learning_rate": 1.9556852989494403e-05, "loss": 0.5396, "step": 4285 }, { "epoch": 0.09522646807471616, "grad_norm": 1.1562596559524536, "learning_rate": 1.9555825933327358e-05, "loss": 0.4966, "step": 4290 }, { "epoch": 0.0953374546342438, "grad_norm": 1.140037178993225, "learning_rate": 1.9554797715395045e-05, "loss": 0.5588, "step": 4295 }, { "epoch": 0.09544844119377144, "grad_norm": 1.1700094938278198, "learning_rate": 1.955376833582247e-05, "loss": 0.4458, "step": 4300 }, { "epoch": 0.09555942775329908, "grad_norm": 1.3324131965637207, "learning_rate": 1.955273779473478e-05, "loss": 0.5194, "step": 4305 }, { "epoch": 0.09567041431282672, "grad_norm": 1.4174566268920898, "learning_rate": 1.9551706092257268e-05, "loss": 0.5176, "step": 4310 }, { "epoch": 0.09578140087235436, "grad_norm": 1.7331970930099487, "learning_rate": 1.9550673228515366e-05, "loss": 0.5411, "step": 4315 }, { "epoch": 0.095892387431882, "grad_norm": 1.608151912689209, "learning_rate": 1.954963920363464e-05, "loss": 0.5237, "step": 4320 }, { "epoch": 0.09600337399140964, "grad_norm": 1.16243314743042, "learning_rate": 1.9548604017740806e-05, "loss": 0.6231, "step": 4325 }, { "epoch": 0.09611436055093728, "grad_norm": 1.1721985340118408, "learning_rate": 1.954756767095972e-05, "loss": 0.6126, "step": 4330 }, { "epoch": 0.09622534711046492, "grad_norm": 1.2511540651321411, "learning_rate": 1.9546530163417374e-05, "loss": 0.6161, "step": 4335 }, { "epoch": 0.09633633366999256, "grad_norm": 1.3616172075271606, "learning_rate": 1.9545491495239913e-05, "loss": 0.6517, "step": 4340 }, { "epoch": 0.09644732022952021, "grad_norm": 1.3992600440979004, "learning_rate": 1.9544451666553603e-05, "loss": 0.5141, "step": 4345 }, { "epoch": 0.09655830678904785, "grad_norm": 1.0629327297210693, "learning_rate": 1.9543410677484873e-05, "loss": 0.6008, "step": 4350 }, { "epoch": 0.09666929334857549, "grad_norm": 1.3158684968948364, "learning_rate": 1.9542368528160276e-05, "loss": 0.5928, "step": 4355 }, { "epoch": 0.09678027990810313, "grad_norm": 1.9079759120941162, "learning_rate": 1.954132521870652e-05, "loss": 0.4011, "step": 4360 }, { "epoch": 0.09689126646763077, "grad_norm": 1.2915329933166504, "learning_rate": 1.954028074925044e-05, "loss": 0.4968, "step": 4365 }, { "epoch": 0.09700225302715841, "grad_norm": 0.9710379838943481, "learning_rate": 1.9539235119919025e-05, "loss": 0.4912, "step": 4370 }, { "epoch": 0.09711323958668605, "grad_norm": 1.1526905298233032, "learning_rate": 1.9538188330839393e-05, "loss": 0.4315, "step": 4375 }, { "epoch": 0.09722422614621369, "grad_norm": 1.2658145427703857, "learning_rate": 1.953714038213881e-05, "loss": 0.664, "step": 4380 }, { "epoch": 0.09733521270574133, "grad_norm": 1.0740182399749756, "learning_rate": 1.953609127394469e-05, "loss": 0.4432, "step": 4385 }, { "epoch": 0.09744619926526897, "grad_norm": 1.336108922958374, "learning_rate": 1.9535041006384572e-05, "loss": 0.5297, "step": 4390 }, { "epoch": 0.09755718582479661, "grad_norm": 1.3726085424423218, "learning_rate": 1.953398957958615e-05, "loss": 0.6767, "step": 4395 }, { "epoch": 0.09766817238432426, "grad_norm": 1.1402976512908936, "learning_rate": 1.9532936993677243e-05, "loss": 0.5696, "step": 4400 }, { "epoch": 0.0977791589438519, "grad_norm": 1.2232763767242432, "learning_rate": 1.9531883248785833e-05, "loss": 0.672, "step": 4405 }, { "epoch": 0.09789014550337954, "grad_norm": 1.2305266857147217, "learning_rate": 1.9530828345040022e-05, "loss": 0.5717, "step": 4410 }, { "epoch": 0.09800113206290718, "grad_norm": 0.9083310961723328, "learning_rate": 1.9529772282568064e-05, "loss": 0.6324, "step": 4415 }, { "epoch": 0.09811211862243482, "grad_norm": 0.9469552636146545, "learning_rate": 1.9528715061498355e-05, "loss": 0.4787, "step": 4420 }, { "epoch": 0.09822310518196246, "grad_norm": 1.191145896911621, "learning_rate": 1.9527656681959425e-05, "loss": 0.5145, "step": 4425 }, { "epoch": 0.0983340917414901, "grad_norm": 1.0690206289291382, "learning_rate": 1.9526597144079947e-05, "loss": 0.3813, "step": 4430 }, { "epoch": 0.09844507830101774, "grad_norm": 0.9900261163711548, "learning_rate": 1.952553644798874e-05, "loss": 0.5721, "step": 4435 }, { "epoch": 0.09855606486054538, "grad_norm": 1.2516738176345825, "learning_rate": 1.9524474593814756e-05, "loss": 0.5769, "step": 4440 }, { "epoch": 0.09866705142007302, "grad_norm": 1.4266631603240967, "learning_rate": 1.9523411581687097e-05, "loss": 0.4992, "step": 4445 }, { "epoch": 0.09877803797960066, "grad_norm": 1.3769086599349976, "learning_rate": 1.952234741173499e-05, "loss": 0.5517, "step": 4450 }, { "epoch": 0.09888902453912832, "grad_norm": 1.4339829683303833, "learning_rate": 1.9521282084087823e-05, "loss": 0.605, "step": 4455 }, { "epoch": 0.09900001109865596, "grad_norm": 1.2619751691818237, "learning_rate": 1.9520215598875117e-05, "loss": 0.7132, "step": 4460 }, { "epoch": 0.0991109976581836, "grad_norm": 1.1638000011444092, "learning_rate": 1.9519147956226522e-05, "loss": 0.4482, "step": 4465 }, { "epoch": 0.09922198421771124, "grad_norm": 1.5981131792068481, "learning_rate": 1.9518079156271844e-05, "loss": 0.6933, "step": 4470 }, { "epoch": 0.09933297077723888, "grad_norm": 1.3889281749725342, "learning_rate": 1.9517009199141025e-05, "loss": 0.553, "step": 4475 }, { "epoch": 0.09944395733676652, "grad_norm": 0.9670382738113403, "learning_rate": 1.9515938084964145e-05, "loss": 0.5892, "step": 4480 }, { "epoch": 0.09955494389629416, "grad_norm": 1.1507785320281982, "learning_rate": 1.9514865813871427e-05, "loss": 0.4446, "step": 4485 }, { "epoch": 0.0996659304558218, "grad_norm": 1.9023184776306152, "learning_rate": 1.9513792385993233e-05, "loss": 0.5241, "step": 4490 }, { "epoch": 0.09977691701534944, "grad_norm": 1.3594270944595337, "learning_rate": 1.9512717801460064e-05, "loss": 0.5864, "step": 4495 }, { "epoch": 0.09988790357487708, "grad_norm": 1.3646306991577148, "learning_rate": 1.9511642060402573e-05, "loss": 0.6054, "step": 4500 }, { "epoch": 0.09999889013440473, "grad_norm": 1.2112932205200195, "learning_rate": 1.9510565162951538e-05, "loss": 0.6177, "step": 4505 }, { "epoch": 0.10010987669393237, "grad_norm": 1.1686182022094727, "learning_rate": 1.9509487109237887e-05, "loss": 0.4071, "step": 4510 }, { "epoch": 0.10022086325346001, "grad_norm": 1.53573739528656, "learning_rate": 1.9508407899392682e-05, "loss": 0.6475, "step": 4515 }, { "epoch": 0.10033184981298765, "grad_norm": 1.5155771970748901, "learning_rate": 1.9507327533547137e-05, "loss": 0.6208, "step": 4520 }, { "epoch": 0.10044283637251529, "grad_norm": 1.6997084617614746, "learning_rate": 1.9506246011832595e-05, "loss": 0.5137, "step": 4525 }, { "epoch": 0.10055382293204293, "grad_norm": 1.683323621749878, "learning_rate": 1.9505163334380547e-05, "loss": 0.5684, "step": 4530 }, { "epoch": 0.10066480949157057, "grad_norm": 1.1800998449325562, "learning_rate": 1.9504079501322614e-05, "loss": 0.4743, "step": 4535 }, { "epoch": 0.10077579605109821, "grad_norm": 3.659878969192505, "learning_rate": 1.9502994512790568e-05, "loss": 0.51, "step": 4540 }, { "epoch": 0.10088678261062585, "grad_norm": 3.420764446258545, "learning_rate": 1.9501908368916326e-05, "loss": 0.5603, "step": 4545 }, { "epoch": 0.10099776917015349, "grad_norm": 1.8362491130828857, "learning_rate": 1.950082106983193e-05, "loss": 0.455, "step": 4550 }, { "epoch": 0.10110875572968113, "grad_norm": 1.11005699634552, "learning_rate": 1.9499732615669567e-05, "loss": 0.3984, "step": 4555 }, { "epoch": 0.10121974228920878, "grad_norm": 1.2431269884109497, "learning_rate": 1.9498643006561576e-05, "loss": 0.5654, "step": 4560 }, { "epoch": 0.10133072884873642, "grad_norm": 1.6340370178222656, "learning_rate": 1.9497552242640424e-05, "loss": 0.4444, "step": 4565 }, { "epoch": 0.10144171540826406, "grad_norm": 0.8741961121559143, "learning_rate": 1.949646032403872e-05, "loss": 0.6445, "step": 4570 }, { "epoch": 0.1015527019677917, "grad_norm": 1.056175708770752, "learning_rate": 1.949536725088922e-05, "loss": 0.7095, "step": 4575 }, { "epoch": 0.10166368852731934, "grad_norm": 1.3246148824691772, "learning_rate": 1.9494273023324814e-05, "loss": 0.4938, "step": 4580 }, { "epoch": 0.10177467508684698, "grad_norm": 1.1658315658569336, "learning_rate": 1.9493177641478538e-05, "loss": 0.6678, "step": 4585 }, { "epoch": 0.10188566164637462, "grad_norm": 1.5370222330093384, "learning_rate": 1.949208110548356e-05, "loss": 0.5387, "step": 4590 }, { "epoch": 0.10199664820590226, "grad_norm": 1.4635233879089355, "learning_rate": 1.9490983415473198e-05, "loss": 0.5722, "step": 4595 }, { "epoch": 0.1021076347654299, "grad_norm": 1.1367441415786743, "learning_rate": 1.9489884571580896e-05, "loss": 0.643, "step": 4600 }, { "epoch": 0.10221862132495754, "grad_norm": 1.5496591329574585, "learning_rate": 1.948878457394026e-05, "loss": 0.5476, "step": 4605 }, { "epoch": 0.10232960788448518, "grad_norm": 0.7282701134681702, "learning_rate": 1.9487683422685016e-05, "loss": 0.4071, "step": 4610 }, { "epoch": 0.10244059444401284, "grad_norm": 1.3425761461257935, "learning_rate": 1.9486581117949042e-05, "loss": 0.5461, "step": 4615 }, { "epoch": 0.10255158100354048, "grad_norm": 1.3550171852111816, "learning_rate": 1.9485477659866352e-05, "loss": 0.5746, "step": 4620 }, { "epoch": 0.10266256756306812, "grad_norm": 1.4267661571502686, "learning_rate": 1.9484373048571097e-05, "loss": 0.728, "step": 4625 }, { "epoch": 0.10277355412259576, "grad_norm": 0.9552726745605469, "learning_rate": 1.948326728419758e-05, "loss": 0.6333, "step": 4630 }, { "epoch": 0.1028845406821234, "grad_norm": 0.883658766746521, "learning_rate": 1.9482160366880224e-05, "loss": 0.5297, "step": 4635 }, { "epoch": 0.10299552724165104, "grad_norm": 1.2870208024978638, "learning_rate": 1.9481052296753617e-05, "loss": 0.5575, "step": 4640 }, { "epoch": 0.10310651380117868, "grad_norm": 1.1301369667053223, "learning_rate": 1.9479943073952462e-05, "loss": 0.5955, "step": 4645 }, { "epoch": 0.10321750036070632, "grad_norm": 1.2876123189926147, "learning_rate": 1.947883269861163e-05, "loss": 0.6565, "step": 4650 }, { "epoch": 0.10332848692023396, "grad_norm": 1.3741137981414795, "learning_rate": 1.94777211708661e-05, "loss": 0.4552, "step": 4655 }, { "epoch": 0.1034394734797616, "grad_norm": 1.6406534910202026, "learning_rate": 1.9476608490851023e-05, "loss": 0.6859, "step": 4660 }, { "epoch": 0.10355046003928924, "grad_norm": 1.4455381631851196, "learning_rate": 1.9475494658701663e-05, "loss": 0.5057, "step": 4665 }, { "epoch": 0.10366144659881689, "grad_norm": 1.0080723762512207, "learning_rate": 1.9474379674553445e-05, "loss": 0.5527, "step": 4670 }, { "epoch": 0.10377243315834453, "grad_norm": 1.3177790641784668, "learning_rate": 1.9473263538541916e-05, "loss": 0.7814, "step": 4675 }, { "epoch": 0.10388341971787217, "grad_norm": 0.9700146317481995, "learning_rate": 1.9472146250802778e-05, "loss": 0.5032, "step": 4680 }, { "epoch": 0.10399440627739981, "grad_norm": 3.535909414291382, "learning_rate": 1.947102781147187e-05, "loss": 0.4977, "step": 4685 }, { "epoch": 0.10410539283692745, "grad_norm": 1.0130237340927124, "learning_rate": 1.9469908220685158e-05, "loss": 0.4246, "step": 4690 }, { "epoch": 0.10421637939645509, "grad_norm": 1.2062435150146484, "learning_rate": 1.9468787478578765e-05, "loss": 0.5761, "step": 4695 }, { "epoch": 0.10432736595598273, "grad_norm": 1.0337311029434204, "learning_rate": 1.946766558528895e-05, "loss": 0.5987, "step": 4700 }, { "epoch": 0.10443835251551037, "grad_norm": 1.33479642868042, "learning_rate": 1.9466542540952105e-05, "loss": 0.426, "step": 4705 }, { "epoch": 0.10454933907503801, "grad_norm": 0.9896667003631592, "learning_rate": 1.9465418345704762e-05, "loss": 0.5159, "step": 4710 }, { "epoch": 0.10466032563456565, "grad_norm": 1.5902118682861328, "learning_rate": 1.9464292999683603e-05, "loss": 0.6882, "step": 4715 }, { "epoch": 0.10477131219409329, "grad_norm": 1.0780678987503052, "learning_rate": 1.9463166503025444e-05, "loss": 0.5413, "step": 4720 }, { "epoch": 0.10488229875362094, "grad_norm": 1.0648760795593262, "learning_rate": 1.9462038855867238e-05, "loss": 0.5374, "step": 4725 }, { "epoch": 0.10499328531314858, "grad_norm": 1.0896109342575073, "learning_rate": 1.9460910058346082e-05, "loss": 0.5873, "step": 4730 }, { "epoch": 0.10510427187267622, "grad_norm": 1.179976224899292, "learning_rate": 1.9459780110599204e-05, "loss": 0.5956, "step": 4735 }, { "epoch": 0.10521525843220386, "grad_norm": 1.718687891960144, "learning_rate": 1.945864901276399e-05, "loss": 0.6551, "step": 4740 }, { "epoch": 0.1053262449917315, "grad_norm": 1.0115402936935425, "learning_rate": 1.9457516764977954e-05, "loss": 0.5906, "step": 4745 }, { "epoch": 0.10543723155125914, "grad_norm": 1.2545442581176758, "learning_rate": 1.9456383367378744e-05, "loss": 0.5814, "step": 4750 }, { "epoch": 0.10554821811078678, "grad_norm": 0.939911425113678, "learning_rate": 1.945524882010416e-05, "loss": 0.5721, "step": 4755 }, { "epoch": 0.10565920467031442, "grad_norm": 1.1681970357894897, "learning_rate": 1.9454113123292133e-05, "loss": 0.6557, "step": 4760 }, { "epoch": 0.10577019122984206, "grad_norm": 1.1056734323501587, "learning_rate": 1.9452976277080743e-05, "loss": 0.622, "step": 4765 }, { "epoch": 0.1058811777893697, "grad_norm": 1.0342731475830078, "learning_rate": 1.94518382816082e-05, "loss": 0.5997, "step": 4770 }, { "epoch": 0.10599216434889736, "grad_norm": 1.1820440292358398, "learning_rate": 1.9450699137012852e-05, "loss": 0.5982, "step": 4775 }, { "epoch": 0.106103150908425, "grad_norm": 1.011205792427063, "learning_rate": 1.9449558843433202e-05, "loss": 0.4623, "step": 4780 }, { "epoch": 0.10621413746795264, "grad_norm": 1.4157917499542236, "learning_rate": 1.944841740100788e-05, "loss": 0.5038, "step": 4785 }, { "epoch": 0.10632512402748028, "grad_norm": 1.3615347146987915, "learning_rate": 1.9447274809875653e-05, "loss": 0.6751, "step": 4790 }, { "epoch": 0.10643611058700791, "grad_norm": 1.102033257484436, "learning_rate": 1.944613107017544e-05, "loss": 0.5384, "step": 4795 }, { "epoch": 0.10654709714653555, "grad_norm": 1.1616215705871582, "learning_rate": 1.9444986182046296e-05, "loss": 0.642, "step": 4800 }, { "epoch": 0.1066580837060632, "grad_norm": 1.0611157417297363, "learning_rate": 1.9443840145627408e-05, "loss": 0.6964, "step": 4805 }, { "epoch": 0.10676907026559083, "grad_norm": 1.1561607122421265, "learning_rate": 1.9442692961058104e-05, "loss": 0.5766, "step": 4810 }, { "epoch": 0.10688005682511847, "grad_norm": 1.2140254974365234, "learning_rate": 1.9441544628477858e-05, "loss": 0.5897, "step": 4815 }, { "epoch": 0.10699104338464611, "grad_norm": 1.4890080690383911, "learning_rate": 1.9440395148026283e-05, "loss": 0.6208, "step": 4820 }, { "epoch": 0.10710202994417375, "grad_norm": 1.263723373413086, "learning_rate": 1.9439244519843123e-05, "loss": 0.4529, "step": 4825 }, { "epoch": 0.10721301650370141, "grad_norm": 1.4165737628936768, "learning_rate": 1.943809274406827e-05, "loss": 0.6019, "step": 4830 }, { "epoch": 0.10732400306322905, "grad_norm": 0.9435001611709595, "learning_rate": 1.9436939820841757e-05, "loss": 0.7244, "step": 4835 }, { "epoch": 0.10743498962275669, "grad_norm": 1.3421839475631714, "learning_rate": 1.943578575030375e-05, "loss": 0.6204, "step": 4840 }, { "epoch": 0.10754597618228433, "grad_norm": 0.8362665176391602, "learning_rate": 1.9434630532594555e-05, "loss": 0.4819, "step": 4845 }, { "epoch": 0.10765696274181197, "grad_norm": 1.0521749258041382, "learning_rate": 1.9433474167854624e-05, "loss": 0.6576, "step": 4850 }, { "epoch": 0.10776794930133961, "grad_norm": 0.8353959321975708, "learning_rate": 1.943231665622454e-05, "loss": 0.7751, "step": 4855 }, { "epoch": 0.10787893586086725, "grad_norm": 0.8259255290031433, "learning_rate": 1.943115799784503e-05, "loss": 0.4372, "step": 4860 }, { "epoch": 0.10798992242039489, "grad_norm": 1.1912258863449097, "learning_rate": 1.9429998192856957e-05, "loss": 0.407, "step": 4865 }, { "epoch": 0.10810090897992253, "grad_norm": 0.8998766541481018, "learning_rate": 1.9428837241401334e-05, "loss": 0.5316, "step": 4870 }, { "epoch": 0.10821189553945017, "grad_norm": 1.0864561796188354, "learning_rate": 1.94276751436193e-05, "loss": 0.6402, "step": 4875 }, { "epoch": 0.10832288209897781, "grad_norm": 1.6458591222763062, "learning_rate": 1.942651189965214e-05, "loss": 0.4848, "step": 4880 }, { "epoch": 0.10843386865850546, "grad_norm": 1.5769037008285522, "learning_rate": 1.9425347509641276e-05, "loss": 0.5189, "step": 4885 }, { "epoch": 0.1085448552180331, "grad_norm": 1.1521767377853394, "learning_rate": 1.9424181973728274e-05, "loss": 0.5762, "step": 4890 }, { "epoch": 0.10865584177756074, "grad_norm": 1.4296083450317383, "learning_rate": 1.9423015292054834e-05, "loss": 0.6333, "step": 4895 }, { "epoch": 0.10876682833708838, "grad_norm": 1.4438923597335815, "learning_rate": 1.9421847464762793e-05, "loss": 0.7679, "step": 4900 }, { "epoch": 0.10887781489661602, "grad_norm": 0.8514357805252075, "learning_rate": 1.942067849199414e-05, "loss": 0.5387, "step": 4905 }, { "epoch": 0.10898880145614366, "grad_norm": 0.9357392191886902, "learning_rate": 1.941950837389099e-05, "loss": 0.5576, "step": 4910 }, { "epoch": 0.1090997880156713, "grad_norm": 1.3870668411254883, "learning_rate": 1.94183371105956e-05, "loss": 0.5933, "step": 4915 }, { "epoch": 0.10921077457519894, "grad_norm": 1.166416883468628, "learning_rate": 1.9417164702250374e-05, "loss": 0.5293, "step": 4920 }, { "epoch": 0.10932176113472658, "grad_norm": 1.2793128490447998, "learning_rate": 1.9415991148997843e-05, "loss": 0.6781, "step": 4925 }, { "epoch": 0.10943274769425422, "grad_norm": 1.0564727783203125, "learning_rate": 1.9414816450980686e-05, "loss": 0.6231, "step": 4930 }, { "epoch": 0.10954373425378186, "grad_norm": 1.6393249034881592, "learning_rate": 1.9413640608341725e-05, "loss": 0.7114, "step": 4935 }, { "epoch": 0.10965472081330951, "grad_norm": 1.1309027671813965, "learning_rate": 1.9412463621223904e-05, "loss": 0.4825, "step": 4940 }, { "epoch": 0.10976570737283715, "grad_norm": 1.133995771408081, "learning_rate": 1.9411285489770328e-05, "loss": 0.5351, "step": 4945 }, { "epoch": 0.1098766939323648, "grad_norm": 0.8685352802276611, "learning_rate": 1.941010621412422e-05, "loss": 0.499, "step": 4950 }, { "epoch": 0.10998768049189243, "grad_norm": 1.7553036212921143, "learning_rate": 1.9408925794428964e-05, "loss": 0.5974, "step": 4955 }, { "epoch": 0.11009866705142007, "grad_norm": 1.2436516284942627, "learning_rate": 1.940774423082806e-05, "loss": 0.5045, "step": 4960 }, { "epoch": 0.11020965361094771, "grad_norm": 1.2464370727539062, "learning_rate": 1.9406561523465164e-05, "loss": 0.5085, "step": 4965 }, { "epoch": 0.11032064017047535, "grad_norm": 1.2938464879989624, "learning_rate": 1.9405377672484068e-05, "loss": 0.4118, "step": 4970 }, { "epoch": 0.110431626730003, "grad_norm": 1.4582078456878662, "learning_rate": 1.9404192678028693e-05, "loss": 0.7055, "step": 4975 }, { "epoch": 0.11054261328953063, "grad_norm": 1.1133296489715576, "learning_rate": 1.9403006540243113e-05, "loss": 0.5686, "step": 4980 }, { "epoch": 0.11065359984905827, "grad_norm": 1.1432008743286133, "learning_rate": 1.9401819259271537e-05, "loss": 0.5702, "step": 4985 }, { "epoch": 0.11076458640858591, "grad_norm": 0.8886599540710449, "learning_rate": 1.9400630835258302e-05, "loss": 0.7432, "step": 4990 }, { "epoch": 0.11087557296811357, "grad_norm": 1.0646532773971558, "learning_rate": 1.93994412683479e-05, "loss": 0.5704, "step": 4995 }, { "epoch": 0.11098655952764121, "grad_norm": 1.7483314275741577, "learning_rate": 1.939825055868495e-05, "loss": 0.4863, "step": 5000 }, { "epoch": 0.11109754608716885, "grad_norm": 1.3594627380371094, "learning_rate": 1.939705870641422e-05, "loss": 0.4865, "step": 5005 }, { "epoch": 0.11120853264669649, "grad_norm": 1.101257085800171, "learning_rate": 1.9395865711680605e-05, "loss": 0.4982, "step": 5010 }, { "epoch": 0.11131951920622413, "grad_norm": 1.4391264915466309, "learning_rate": 1.9394671574629147e-05, "loss": 0.4815, "step": 5015 }, { "epoch": 0.11143050576575177, "grad_norm": 1.1944100856781006, "learning_rate": 1.9393476295405028e-05, "loss": 0.5674, "step": 5020 }, { "epoch": 0.1115414923252794, "grad_norm": 1.2506046295166016, "learning_rate": 1.9392279874153563e-05, "loss": 0.4759, "step": 5025 }, { "epoch": 0.11165247888480705, "grad_norm": 1.2169004678726196, "learning_rate": 1.9391082311020214e-05, "loss": 0.5196, "step": 5030 }, { "epoch": 0.11176346544433469, "grad_norm": 1.5347559452056885, "learning_rate": 1.938988360615057e-05, "loss": 0.5685, "step": 5035 }, { "epoch": 0.11187445200386233, "grad_norm": 1.0205435752868652, "learning_rate": 1.9388683759690365e-05, "loss": 0.4319, "step": 5040 }, { "epoch": 0.11198543856338998, "grad_norm": 1.3055386543273926, "learning_rate": 1.938748277178548e-05, "loss": 0.4752, "step": 5045 }, { "epoch": 0.11209642512291762, "grad_norm": 1.2094687223434448, "learning_rate": 1.938628064258192e-05, "loss": 0.5108, "step": 5050 }, { "epoch": 0.11220741168244526, "grad_norm": 1.497977614402771, "learning_rate": 1.938507737222584e-05, "loss": 0.6327, "step": 5055 }, { "epoch": 0.1123183982419729, "grad_norm": 1.4827046394348145, "learning_rate": 1.938387296086353e-05, "loss": 0.7167, "step": 5060 }, { "epoch": 0.11242938480150054, "grad_norm": 1.149739146232605, "learning_rate": 1.9382667408641413e-05, "loss": 0.4916, "step": 5065 }, { "epoch": 0.11254037136102818, "grad_norm": 1.4573441743850708, "learning_rate": 1.9381460715706064e-05, "loss": 0.5497, "step": 5070 }, { "epoch": 0.11265135792055582, "grad_norm": 1.1646956205368042, "learning_rate": 1.938025288220418e-05, "loss": 0.6231, "step": 5075 }, { "epoch": 0.11276234448008346, "grad_norm": 1.2401013374328613, "learning_rate": 1.937904390828261e-05, "loss": 0.5464, "step": 5080 }, { "epoch": 0.1128733310396111, "grad_norm": 0.9036456346511841, "learning_rate": 1.937783379408834e-05, "loss": 0.5749, "step": 5085 }, { "epoch": 0.11298431759913874, "grad_norm": 1.4067643880844116, "learning_rate": 1.9376622539768487e-05, "loss": 0.4189, "step": 5090 }, { "epoch": 0.11309530415866638, "grad_norm": 1.0962282419204712, "learning_rate": 1.9375410145470307e-05, "loss": 0.5467, "step": 5095 }, { "epoch": 0.11320629071819403, "grad_norm": 1.381117343902588, "learning_rate": 1.9374196611341212e-05, "loss": 0.5089, "step": 5100 }, { "epoch": 0.11331727727772167, "grad_norm": 1.3301773071289062, "learning_rate": 1.9372981937528728e-05, "loss": 0.7172, "step": 5105 }, { "epoch": 0.11342826383724931, "grad_norm": 1.3714052438735962, "learning_rate": 1.9371766124180532e-05, "loss": 0.6936, "step": 5110 }, { "epoch": 0.11353925039677695, "grad_norm": 1.2375777959823608, "learning_rate": 1.9370549171444443e-05, "loss": 0.5489, "step": 5115 }, { "epoch": 0.11365023695630459, "grad_norm": 1.1200803518295288, "learning_rate": 1.9369331079468413e-05, "loss": 0.5341, "step": 5120 }, { "epoch": 0.11376122351583223, "grad_norm": 1.386127233505249, "learning_rate": 1.936811184840053e-05, "loss": 0.3938, "step": 5125 }, { "epoch": 0.11387221007535987, "grad_norm": 1.595672607421875, "learning_rate": 1.9366891478389034e-05, "loss": 0.5444, "step": 5130 }, { "epoch": 0.11398319663488751, "grad_norm": 0.7065406441688538, "learning_rate": 1.936566996958228e-05, "loss": 0.4774, "step": 5135 }, { "epoch": 0.11409418319441515, "grad_norm": 1.3757734298706055, "learning_rate": 1.9364447322128784e-05, "loss": 0.594, "step": 5140 }, { "epoch": 0.11420516975394279, "grad_norm": 1.257797122001648, "learning_rate": 1.9363223536177186e-05, "loss": 0.6493, "step": 5145 }, { "epoch": 0.11431615631347043, "grad_norm": 1.1501637697219849, "learning_rate": 1.9361998611876272e-05, "loss": 0.4744, "step": 5150 }, { "epoch": 0.11442714287299809, "grad_norm": 1.1146260499954224, "learning_rate": 1.9360772549374968e-05, "loss": 0.4714, "step": 5155 }, { "epoch": 0.11453812943252573, "grad_norm": 1.1389085054397583, "learning_rate": 1.9359545348822326e-05, "loss": 0.602, "step": 5160 }, { "epoch": 0.11464911599205337, "grad_norm": 1.3476996421813965, "learning_rate": 1.935831701036755e-05, "loss": 0.677, "step": 5165 }, { "epoch": 0.114760102551581, "grad_norm": 1.0346547365188599, "learning_rate": 1.9357087534159982e-05, "loss": 0.7114, "step": 5170 }, { "epoch": 0.11487108911110865, "grad_norm": 1.2413575649261475, "learning_rate": 1.9355856920349092e-05, "loss": 0.6657, "step": 5175 }, { "epoch": 0.11498207567063629, "grad_norm": 1.1822632551193237, "learning_rate": 1.9354625169084494e-05, "loss": 0.5644, "step": 5180 }, { "epoch": 0.11509306223016393, "grad_norm": 1.2111988067626953, "learning_rate": 1.9353392280515938e-05, "loss": 0.505, "step": 5185 }, { "epoch": 0.11520404878969157, "grad_norm": 1.4630290269851685, "learning_rate": 1.935215825479332e-05, "loss": 0.4985, "step": 5190 }, { "epoch": 0.1153150353492192, "grad_norm": 1.3057409524917603, "learning_rate": 1.9350923092066668e-05, "loss": 0.541, "step": 5195 }, { "epoch": 0.11542602190874685, "grad_norm": 1.3751634359359741, "learning_rate": 1.9349686792486143e-05, "loss": 0.6219, "step": 5200 }, { "epoch": 0.11553700846827449, "grad_norm": 0.8946861028671265, "learning_rate": 1.9348449356202054e-05, "loss": 0.4943, "step": 5205 }, { "epoch": 0.11564799502780214, "grad_norm": 1.3821775913238525, "learning_rate": 1.9347210783364846e-05, "loss": 0.5903, "step": 5210 }, { "epoch": 0.11575898158732978, "grad_norm": 1.5298506021499634, "learning_rate": 1.93459710741251e-05, "loss": 0.6664, "step": 5215 }, { "epoch": 0.11586996814685742, "grad_norm": 1.3290419578552246, "learning_rate": 1.9344730228633535e-05, "loss": 0.681, "step": 5220 }, { "epoch": 0.11598095470638506, "grad_norm": 0.906023383140564, "learning_rate": 1.934348824704101e-05, "loss": 0.4962, "step": 5225 }, { "epoch": 0.1160919412659127, "grad_norm": 1.286221981048584, "learning_rate": 1.9342245129498516e-05, "loss": 0.6054, "step": 5230 }, { "epoch": 0.11620292782544034, "grad_norm": 1.4231698513031006, "learning_rate": 1.9341000876157193e-05, "loss": 0.487, "step": 5235 }, { "epoch": 0.11631391438496798, "grad_norm": 1.2439308166503906, "learning_rate": 1.933975548716831e-05, "loss": 0.5635, "step": 5240 }, { "epoch": 0.11642490094449562, "grad_norm": 1.6440762281417847, "learning_rate": 1.9338508962683278e-05, "loss": 0.6192, "step": 5245 }, { "epoch": 0.11653588750402326, "grad_norm": 1.43458092212677, "learning_rate": 1.9337261302853644e-05, "loss": 0.6437, "step": 5250 }, { "epoch": 0.1166468740635509, "grad_norm": 1.1676011085510254, "learning_rate": 1.9336012507831097e-05, "loss": 0.4349, "step": 5255 }, { "epoch": 0.11675786062307855, "grad_norm": 1.3660240173339844, "learning_rate": 1.9334762577767458e-05, "loss": 0.4539, "step": 5260 }, { "epoch": 0.11686884718260619, "grad_norm": 1.2643660306930542, "learning_rate": 1.9333511512814692e-05, "loss": 0.6076, "step": 5265 }, { "epoch": 0.11697983374213383, "grad_norm": 0.8496934175491333, "learning_rate": 1.93322593131249e-05, "loss": 0.6067, "step": 5270 }, { "epoch": 0.11709082030166147, "grad_norm": 0.9153058528900146, "learning_rate": 1.933100597885032e-05, "loss": 0.4675, "step": 5275 }, { "epoch": 0.11720180686118911, "grad_norm": 1.0530915260314941, "learning_rate": 1.932975151014332e-05, "loss": 0.5721, "step": 5280 }, { "epoch": 0.11731279342071675, "grad_norm": 1.0556403398513794, "learning_rate": 1.932849590715643e-05, "loss": 0.5501, "step": 5285 }, { "epoch": 0.11742377998024439, "grad_norm": 1.5842150449752808, "learning_rate": 1.9327239170042288e-05, "loss": 0.3836, "step": 5290 }, { "epoch": 0.11753476653977203, "grad_norm": 0.7407785654067993, "learning_rate": 1.9325981298953688e-05, "loss": 0.5094, "step": 5295 }, { "epoch": 0.11764575309929967, "grad_norm": 1.1622848510742188, "learning_rate": 1.932472229404356e-05, "loss": 0.5122, "step": 5300 }, { "epoch": 0.11775673965882731, "grad_norm": 1.4510892629623413, "learning_rate": 1.9323462155464967e-05, "loss": 0.6759, "step": 5305 }, { "epoch": 0.11786772621835495, "grad_norm": 1.4540632963180542, "learning_rate": 1.9322200883371118e-05, "loss": 0.575, "step": 5310 }, { "epoch": 0.1179787127778826, "grad_norm": 1.0026957988739014, "learning_rate": 1.9320938477915346e-05, "loss": 0.5546, "step": 5315 }, { "epoch": 0.11808969933741024, "grad_norm": 1.5292295217514038, "learning_rate": 1.931967493925113e-05, "loss": 0.4083, "step": 5320 }, { "epoch": 0.11820068589693788, "grad_norm": 1.1803737878799438, "learning_rate": 1.9318410267532096e-05, "loss": 0.581, "step": 5325 }, { "epoch": 0.11831167245646552, "grad_norm": 0.9704664945602417, "learning_rate": 1.9317144462911992e-05, "loss": 0.5058, "step": 5330 }, { "epoch": 0.11842265901599316, "grad_norm": 0.9553489089012146, "learning_rate": 1.9315877525544712e-05, "loss": 0.486, "step": 5335 }, { "epoch": 0.1185336455755208, "grad_norm": 1.0737321376800537, "learning_rate": 1.9314609455584285e-05, "loss": 0.5671, "step": 5340 }, { "epoch": 0.11864463213504844, "grad_norm": 1.0113078355789185, "learning_rate": 1.931334025318488e-05, "loss": 0.4365, "step": 5345 }, { "epoch": 0.11875561869457608, "grad_norm": 1.2628003358840942, "learning_rate": 1.93120699185008e-05, "loss": 0.5765, "step": 5350 }, { "epoch": 0.11886660525410372, "grad_norm": 1.0682296752929688, "learning_rate": 1.9310798451686488e-05, "loss": 0.7266, "step": 5355 }, { "epoch": 0.11897759181363136, "grad_norm": 1.291569471359253, "learning_rate": 1.9309525852896533e-05, "loss": 0.4749, "step": 5360 }, { "epoch": 0.119088578373159, "grad_norm": 1.0546334981918335, "learning_rate": 1.9308252122285643e-05, "loss": 0.353, "step": 5365 }, { "epoch": 0.11919956493268666, "grad_norm": 1.0950523614883423, "learning_rate": 1.9306977260008676e-05, "loss": 0.5275, "step": 5370 }, { "epoch": 0.1193105514922143, "grad_norm": 1.3744865655899048, "learning_rate": 1.9305701266220626e-05, "loss": 0.552, "step": 5375 }, { "epoch": 0.11942153805174194, "grad_norm": 1.0956076383590698, "learning_rate": 1.9304424141076627e-05, "loss": 0.5896, "step": 5380 }, { "epoch": 0.11953252461126958, "grad_norm": 1.3505736589431763, "learning_rate": 1.9303145884731946e-05, "loss": 0.5449, "step": 5385 }, { "epoch": 0.11964351117079722, "grad_norm": 1.0760904550552368, "learning_rate": 1.9301866497341984e-05, "loss": 0.5097, "step": 5390 }, { "epoch": 0.11975449773032486, "grad_norm": 1.2607932090759277, "learning_rate": 1.9300585979062295e-05, "loss": 0.5546, "step": 5395 }, { "epoch": 0.1198654842898525, "grad_norm": 1.0783039331436157, "learning_rate": 1.9299304330048554e-05, "loss": 0.5221, "step": 5400 }, { "epoch": 0.11997647084938014, "grad_norm": 1.6365145444869995, "learning_rate": 1.929802155045658e-05, "loss": 0.5797, "step": 5405 }, { "epoch": 0.12008745740890778, "grad_norm": 1.2198350429534912, "learning_rate": 1.9296737640442325e-05, "loss": 0.5849, "step": 5410 }, { "epoch": 0.12019844396843542, "grad_norm": 1.1546142101287842, "learning_rate": 1.929545260016189e-05, "loss": 0.5482, "step": 5415 }, { "epoch": 0.12030943052796306, "grad_norm": 1.031053900718689, "learning_rate": 1.92941664297715e-05, "loss": 0.6086, "step": 5420 }, { "epoch": 0.12042041708749071, "grad_norm": 1.257300615310669, "learning_rate": 1.9292879129427528e-05, "loss": 0.5569, "step": 5425 }, { "epoch": 0.12053140364701835, "grad_norm": 1.3408029079437256, "learning_rate": 1.9291590699286474e-05, "loss": 0.5987, "step": 5430 }, { "epoch": 0.12064239020654599, "grad_norm": 1.0354056358337402, "learning_rate": 1.9290301139504988e-05, "loss": 0.5358, "step": 5435 }, { "epoch": 0.12075337676607363, "grad_norm": 1.4962389469146729, "learning_rate": 1.9289010450239843e-05, "loss": 0.5749, "step": 5440 }, { "epoch": 0.12086436332560127, "grad_norm": 0.8980013132095337, "learning_rate": 1.9287718631647964e-05, "loss": 0.4667, "step": 5445 }, { "epoch": 0.12097534988512891, "grad_norm": 1.0994272232055664, "learning_rate": 1.9286425683886403e-05, "loss": 0.453, "step": 5450 }, { "epoch": 0.12108633644465655, "grad_norm": 1.1135613918304443, "learning_rate": 1.928513160711235e-05, "loss": 0.5495, "step": 5455 }, { "epoch": 0.12119732300418419, "grad_norm": 1.1412851810455322, "learning_rate": 1.9283836401483132e-05, "loss": 0.4949, "step": 5460 }, { "epoch": 0.12130830956371183, "grad_norm": 1.3352820873260498, "learning_rate": 1.9282540067156224e-05, "loss": 0.6287, "step": 5465 }, { "epoch": 0.12141929612323947, "grad_norm": 1.2998685836791992, "learning_rate": 1.9281242604289228e-05, "loss": 0.4789, "step": 5470 }, { "epoch": 0.12153028268276711, "grad_norm": 0.9999222755432129, "learning_rate": 1.927994401303988e-05, "loss": 0.4718, "step": 5475 }, { "epoch": 0.12164126924229476, "grad_norm": 1.091381549835205, "learning_rate": 1.9278644293566064e-05, "loss": 0.5984, "step": 5480 }, { "epoch": 0.1217522558018224, "grad_norm": 1.0779790878295898, "learning_rate": 1.9277343446025788e-05, "loss": 0.5483, "step": 5485 }, { "epoch": 0.12186324236135004, "grad_norm": 1.052999496459961, "learning_rate": 1.9276041470577213e-05, "loss": 0.5924, "step": 5490 }, { "epoch": 0.12197422892087768, "grad_norm": 1.0639392137527466, "learning_rate": 1.9274738367378627e-05, "loss": 0.5566, "step": 5495 }, { "epoch": 0.12208521548040532, "grad_norm": 1.0299577713012695, "learning_rate": 1.927343413658845e-05, "loss": 0.4247, "step": 5500 }, { "epoch": 0.12219620203993296, "grad_norm": 0.8614324331283569, "learning_rate": 1.9272128778365258e-05, "loss": 0.7018, "step": 5505 }, { "epoch": 0.1223071885994606, "grad_norm": 1.3008124828338623, "learning_rate": 1.9270822292867742e-05, "loss": 0.5816, "step": 5510 }, { "epoch": 0.12241817515898824, "grad_norm": 0.9771310687065125, "learning_rate": 1.9269514680254742e-05, "loss": 0.5727, "step": 5515 }, { "epoch": 0.12252916171851588, "grad_norm": 0.9506747126579285, "learning_rate": 1.9268205940685236e-05, "loss": 0.5689, "step": 5520 }, { "epoch": 0.12264014827804352, "grad_norm": 0.9057345986366272, "learning_rate": 1.9266896074318335e-05, "loss": 0.5966, "step": 5525 }, { "epoch": 0.12275113483757118, "grad_norm": 1.8075003623962402, "learning_rate": 1.926558508131329e-05, "loss": 0.5467, "step": 5530 }, { "epoch": 0.12286212139709882, "grad_norm": 1.395269751548767, "learning_rate": 1.9264272961829484e-05, "loss": 0.4531, "step": 5535 }, { "epoch": 0.12297310795662646, "grad_norm": 0.9755896329879761, "learning_rate": 1.926295971602644e-05, "loss": 0.4991, "step": 5540 }, { "epoch": 0.1230840945161541, "grad_norm": 1.3618781566619873, "learning_rate": 1.926164534406382e-05, "loss": 0.4559, "step": 5545 }, { "epoch": 0.12319508107568174, "grad_norm": 1.4191176891326904, "learning_rate": 1.926032984610142e-05, "loss": 0.5268, "step": 5550 }, { "epoch": 0.12330606763520938, "grad_norm": 1.3266234397888184, "learning_rate": 1.9259013222299174e-05, "loss": 0.6116, "step": 5555 }, { "epoch": 0.12341705419473702, "grad_norm": 1.6718363761901855, "learning_rate": 1.9257695472817152e-05, "loss": 0.4408, "step": 5560 }, { "epoch": 0.12352804075426466, "grad_norm": 1.0698461532592773, "learning_rate": 1.9256376597815565e-05, "loss": 0.3938, "step": 5565 }, { "epoch": 0.1236390273137923, "grad_norm": 0.9428433179855347, "learning_rate": 1.9255056597454755e-05, "loss": 0.6155, "step": 5570 }, { "epoch": 0.12375001387331994, "grad_norm": 1.1381133794784546, "learning_rate": 1.9253735471895198e-05, "loss": 0.5286, "step": 5575 }, { "epoch": 0.12386100043284758, "grad_norm": 0.7855455279350281, "learning_rate": 1.925241322129752e-05, "loss": 0.578, "step": 5580 }, { "epoch": 0.12397198699237523, "grad_norm": 0.7438721060752869, "learning_rate": 1.9251089845822472e-05, "loss": 0.452, "step": 5585 }, { "epoch": 0.12408297355190287, "grad_norm": 1.136475682258606, "learning_rate": 1.9249765345630948e-05, "loss": 0.469, "step": 5590 }, { "epoch": 0.12419396011143051, "grad_norm": 1.2477785348892212, "learning_rate": 1.9248439720883975e-05, "loss": 0.4939, "step": 5595 }, { "epoch": 0.12430494667095815, "grad_norm": 1.3942158222198486, "learning_rate": 1.9247112971742713e-05, "loss": 0.5784, "step": 5600 }, { "epoch": 0.12441593323048579, "grad_norm": 1.3450487852096558, "learning_rate": 1.9245785098368474e-05, "loss": 0.5385, "step": 5605 }, { "epoch": 0.12452691979001343, "grad_norm": 0.8705973029136658, "learning_rate": 1.924445610092269e-05, "loss": 0.3492, "step": 5610 }, { "epoch": 0.12463790634954107, "grad_norm": 2.236539602279663, "learning_rate": 1.9243125979566933e-05, "loss": 0.634, "step": 5615 }, { "epoch": 0.12474889290906871, "grad_norm": 1.088524580001831, "learning_rate": 1.924179473446292e-05, "loss": 0.4404, "step": 5620 }, { "epoch": 0.12485987946859635, "grad_norm": 1.0636274814605713, "learning_rate": 1.9240462365772495e-05, "loss": 0.3987, "step": 5625 }, { "epoch": 0.12497086602812399, "grad_norm": 1.0778905153274536, "learning_rate": 1.923912887365765e-05, "loss": 0.6523, "step": 5630 }, { "epoch": 0.12508185258765164, "grad_norm": 1.0517983436584473, "learning_rate": 1.9237794258280503e-05, "loss": 0.5392, "step": 5635 }, { "epoch": 0.12519283914717927, "grad_norm": 1.3265271186828613, "learning_rate": 1.923645851980331e-05, "loss": 0.7403, "step": 5640 }, { "epoch": 0.12530382570670692, "grad_norm": 1.2673416137695312, "learning_rate": 1.9235121658388463e-05, "loss": 0.364, "step": 5645 }, { "epoch": 0.12541481226623455, "grad_norm": 1.2772183418273926, "learning_rate": 1.9233783674198502e-05, "loss": 0.6218, "step": 5650 }, { "epoch": 0.1255257988257622, "grad_norm": 0.9975202083587646, "learning_rate": 1.9232444567396088e-05, "loss": 0.4819, "step": 5655 }, { "epoch": 0.12563678538528983, "grad_norm": 1.3848180770874023, "learning_rate": 1.9231104338144027e-05, "loss": 0.6272, "step": 5660 }, { "epoch": 0.12574777194481748, "grad_norm": 1.2946040630340576, "learning_rate": 1.9229762986605257e-05, "loss": 0.6036, "step": 5665 }, { "epoch": 0.12585875850434514, "grad_norm": 1.101122498512268, "learning_rate": 1.922842051294286e-05, "loss": 0.5324, "step": 5670 }, { "epoch": 0.12596974506387276, "grad_norm": 1.2069895267486572, "learning_rate": 1.9227076917320045e-05, "loss": 0.5448, "step": 5675 }, { "epoch": 0.12608073162340042, "grad_norm": 0.9136874675750732, "learning_rate": 1.9225732199900164e-05, "loss": 0.5141, "step": 5680 }, { "epoch": 0.12619171818292804, "grad_norm": 1.1953130960464478, "learning_rate": 1.92243863608467e-05, "loss": 0.5794, "step": 5685 }, { "epoch": 0.1263027047424557, "grad_norm": 0.9920978546142578, "learning_rate": 1.9223039400323284e-05, "loss": 0.6553, "step": 5690 }, { "epoch": 0.12641369130198332, "grad_norm": 1.174495816230774, "learning_rate": 1.9221691318493666e-05, "loss": 0.4533, "step": 5695 }, { "epoch": 0.12652467786151098, "grad_norm": 1.5274707078933716, "learning_rate": 1.9220342115521746e-05, "loss": 0.6146, "step": 5700 }, { "epoch": 0.1266356644210386, "grad_norm": 1.2732661962509155, "learning_rate": 1.9218991791571553e-05, "loss": 0.4841, "step": 5705 }, { "epoch": 0.12674665098056626, "grad_norm": 1.6695985794067383, "learning_rate": 1.921764034680726e-05, "loss": 0.5141, "step": 5710 }, { "epoch": 0.12685763754009388, "grad_norm": 0.9126291275024414, "learning_rate": 1.9216287781393165e-05, "loss": 0.6414, "step": 5715 }, { "epoch": 0.12696862409962154, "grad_norm": 1.1736328601837158, "learning_rate": 1.9214934095493706e-05, "loss": 0.5365, "step": 5720 }, { "epoch": 0.1270796106591492, "grad_norm": 1.4496265649795532, "learning_rate": 1.921357928927347e-05, "loss": 0.6288, "step": 5725 }, { "epoch": 0.12719059721867682, "grad_norm": 1.6686094999313354, "learning_rate": 1.921222336289716e-05, "loss": 0.5421, "step": 5730 }, { "epoch": 0.12730158377820447, "grad_norm": 1.0650641918182373, "learning_rate": 1.921086631652963e-05, "loss": 0.5287, "step": 5735 }, { "epoch": 0.1274125703377321, "grad_norm": 1.0161991119384766, "learning_rate": 1.9209508150335864e-05, "loss": 0.4341, "step": 5740 }, { "epoch": 0.12752355689725975, "grad_norm": 1.4244519472122192, "learning_rate": 1.9208148864480987e-05, "loss": 0.5102, "step": 5745 }, { "epoch": 0.12763454345678737, "grad_norm": 1.0328584909439087, "learning_rate": 1.920678845913025e-05, "loss": 0.2526, "step": 5750 }, { "epoch": 0.12774553001631503, "grad_norm": 1.2607382535934448, "learning_rate": 1.9205426934449047e-05, "loss": 0.5606, "step": 5755 }, { "epoch": 0.12785651657584265, "grad_norm": 1.7482237815856934, "learning_rate": 1.9204064290602912e-05, "loss": 0.4606, "step": 5760 }, { "epoch": 0.1279675031353703, "grad_norm": 1.1892881393432617, "learning_rate": 1.920270052775751e-05, "loss": 0.4634, "step": 5765 }, { "epoch": 0.12807848969489793, "grad_norm": 1.5729913711547852, "learning_rate": 1.920133564607864e-05, "loss": 0.6063, "step": 5770 }, { "epoch": 0.1281894762544256, "grad_norm": 1.244259238243103, "learning_rate": 1.9199969645732238e-05, "loss": 0.5447, "step": 5775 }, { "epoch": 0.12830046281395324, "grad_norm": 1.4631567001342773, "learning_rate": 1.9198602526884388e-05, "loss": 0.5313, "step": 5780 }, { "epoch": 0.12841144937348087, "grad_norm": 1.0688591003417969, "learning_rate": 1.9197234289701286e-05, "loss": 0.5582, "step": 5785 }, { "epoch": 0.12852243593300852, "grad_norm": 1.647247552871704, "learning_rate": 1.9195864934349286e-05, "loss": 0.5988, "step": 5790 }, { "epoch": 0.12863342249253615, "grad_norm": 1.6146548986434937, "learning_rate": 1.919449446099487e-05, "loss": 0.5862, "step": 5795 }, { "epoch": 0.1287444090520638, "grad_norm": 1.2010599374771118, "learning_rate": 1.919312286980465e-05, "loss": 0.6445, "step": 5800 }, { "epoch": 0.12885539561159143, "grad_norm": 1.0428661108016968, "learning_rate": 1.9191750160945382e-05, "loss": 0.4673, "step": 5805 }, { "epoch": 0.12896638217111908, "grad_norm": 0.9652555584907532, "learning_rate": 1.9190376334583963e-05, "loss": 0.4984, "step": 5810 }, { "epoch": 0.1290773687306467, "grad_norm": 1.1768306493759155, "learning_rate": 1.9189001390887404e-05, "loss": 0.5078, "step": 5815 }, { "epoch": 0.12918835529017436, "grad_norm": 1.0328947305679321, "learning_rate": 1.918762533002288e-05, "loss": 0.5983, "step": 5820 }, { "epoch": 0.129299341849702, "grad_norm": 1.2240079641342163, "learning_rate": 1.9186248152157676e-05, "loss": 0.6412, "step": 5825 }, { "epoch": 0.12941032840922964, "grad_norm": 1.1072537899017334, "learning_rate": 1.9184869857459233e-05, "loss": 0.45, "step": 5830 }, { "epoch": 0.1295213149687573, "grad_norm": 1.5246691703796387, "learning_rate": 1.9183490446095116e-05, "loss": 0.6749, "step": 5835 }, { "epoch": 0.12963230152828492, "grad_norm": 1.4271918535232544, "learning_rate": 1.9182109918233024e-05, "loss": 0.626, "step": 5840 }, { "epoch": 0.12974328808781257, "grad_norm": 1.091291904449463, "learning_rate": 1.918072827404081e-05, "loss": 0.4738, "step": 5845 }, { "epoch": 0.1298542746473402, "grad_norm": 1.253570795059204, "learning_rate": 1.9179345513686442e-05, "loss": 0.6955, "step": 5850 }, { "epoch": 0.12996526120686785, "grad_norm": 0.9824383854866028, "learning_rate": 1.9177961637338027e-05, "loss": 0.6052, "step": 5855 }, { "epoch": 0.13007624776639548, "grad_norm": 1.1693501472473145, "learning_rate": 1.9176576645163816e-05, "loss": 0.5972, "step": 5860 }, { "epoch": 0.13018723432592313, "grad_norm": 1.0172988176345825, "learning_rate": 1.9175190537332198e-05, "loss": 0.4938, "step": 5865 }, { "epoch": 0.13029822088545076, "grad_norm": 1.4785795211791992, "learning_rate": 1.9173803314011682e-05, "loss": 0.5899, "step": 5870 }, { "epoch": 0.13040920744497841, "grad_norm": 1.265539526939392, "learning_rate": 1.9172414975370925e-05, "loss": 0.5751, "step": 5875 }, { "epoch": 0.13052019400450604, "grad_norm": 1.1638941764831543, "learning_rate": 1.917102552157872e-05, "loss": 0.6387, "step": 5880 }, { "epoch": 0.1306311805640337, "grad_norm": 1.3803809881210327, "learning_rate": 1.9169634952803988e-05, "loss": 0.5157, "step": 5885 }, { "epoch": 0.13074216712356135, "grad_norm": 1.2643953561782837, "learning_rate": 1.916824326921579e-05, "loss": 0.4576, "step": 5890 }, { "epoch": 0.13085315368308897, "grad_norm": 1.0698553323745728, "learning_rate": 1.9166850470983323e-05, "loss": 0.5362, "step": 5895 }, { "epoch": 0.13096414024261663, "grad_norm": 1.2042971849441528, "learning_rate": 1.916545655827592e-05, "loss": 0.5263, "step": 5900 }, { "epoch": 0.13107512680214425, "grad_norm": 1.6081792116165161, "learning_rate": 1.9164061531263047e-05, "loss": 0.4717, "step": 5905 }, { "epoch": 0.1311861133616719, "grad_norm": 1.3085589408874512, "learning_rate": 1.9162665390114305e-05, "loss": 0.4643, "step": 5910 }, { "epoch": 0.13129709992119953, "grad_norm": 1.3212761878967285, "learning_rate": 1.916126813499944e-05, "loss": 0.5564, "step": 5915 }, { "epoch": 0.1314080864807272, "grad_norm": 0.9593100547790527, "learning_rate": 1.9159869766088315e-05, "loss": 0.37, "step": 5920 }, { "epoch": 0.1315190730402548, "grad_norm": 1.0401902198791504, "learning_rate": 1.9158470283550944e-05, "loss": 0.6124, "step": 5925 }, { "epoch": 0.13163005959978247, "grad_norm": 1.2800883054733276, "learning_rate": 1.915706968755747e-05, "loss": 0.4894, "step": 5930 }, { "epoch": 0.13174104615931012, "grad_norm": 1.1242331266403198, "learning_rate": 1.9155667978278175e-05, "loss": 0.5963, "step": 5935 }, { "epoch": 0.13185203271883775, "grad_norm": 1.1818042993545532, "learning_rate": 1.9154265155883473e-05, "loss": 0.4622, "step": 5940 }, { "epoch": 0.1319630192783654, "grad_norm": 0.9774150848388672, "learning_rate": 1.9152861220543918e-05, "loss": 0.5023, "step": 5945 }, { "epoch": 0.13207400583789303, "grad_norm": 0.8884637951850891, "learning_rate": 1.9151456172430186e-05, "loss": 0.3321, "step": 5950 }, { "epoch": 0.13218499239742068, "grad_norm": 1.5701459646224976, "learning_rate": 1.9150050011713105e-05, "loss": 0.5133, "step": 5955 }, { "epoch": 0.1322959789569483, "grad_norm": 0.9315885901451111, "learning_rate": 1.9148642738563636e-05, "loss": 0.5302, "step": 5960 }, { "epoch": 0.13240696551647596, "grad_norm": 1.5833088159561157, "learning_rate": 1.9147234353152862e-05, "loss": 0.5988, "step": 5965 }, { "epoch": 0.1325179520760036, "grad_norm": 1.3734363317489624, "learning_rate": 1.914582485565201e-05, "loss": 0.5986, "step": 5970 }, { "epoch": 0.13262893863553124, "grad_norm": 1.3130841255187988, "learning_rate": 1.9144414246232448e-05, "loss": 0.4492, "step": 5975 }, { "epoch": 0.13273992519505887, "grad_norm": 1.4003158807754517, "learning_rate": 1.914300252506567e-05, "loss": 0.3576, "step": 5980 }, { "epoch": 0.13285091175458652, "grad_norm": 0.9939400553703308, "learning_rate": 1.9141589692323304e-05, "loss": 0.413, "step": 5985 }, { "epoch": 0.13296189831411417, "grad_norm": 1.0590107440948486, "learning_rate": 1.9140175748177126e-05, "loss": 0.5614, "step": 5990 }, { "epoch": 0.1330728848736418, "grad_norm": 1.2037274837493896, "learning_rate": 1.9138760692799033e-05, "loss": 0.4923, "step": 5995 }, { "epoch": 0.13318387143316945, "grad_norm": 1.8287067413330078, "learning_rate": 1.9137344526361064e-05, "loss": 0.4642, "step": 6000 }, { "epoch": 0.13329485799269708, "grad_norm": 1.330986499786377, "learning_rate": 1.9135927249035393e-05, "loss": 0.5365, "step": 6005 }, { "epoch": 0.13340584455222473, "grad_norm": 0.9285293817520142, "learning_rate": 1.9134508860994323e-05, "loss": 0.4484, "step": 6010 }, { "epoch": 0.13351683111175236, "grad_norm": 0.8908197283744812, "learning_rate": 1.9133089362410305e-05, "loss": 0.4485, "step": 6015 }, { "epoch": 0.13362781767128, "grad_norm": 1.3462244272232056, "learning_rate": 1.9131668753455906e-05, "loss": 0.5788, "step": 6020 }, { "epoch": 0.13373880423080764, "grad_norm": 0.9598202109336853, "learning_rate": 1.9130247034303852e-05, "loss": 0.4366, "step": 6025 }, { "epoch": 0.1338497907903353, "grad_norm": 1.2950756549835205, "learning_rate": 1.912882420512698e-05, "loss": 0.4562, "step": 6030 }, { "epoch": 0.13396077734986292, "grad_norm": 1.2489832639694214, "learning_rate": 1.912740026609828e-05, "loss": 0.5912, "step": 6035 }, { "epoch": 0.13407176390939057, "grad_norm": 1.3526968955993652, "learning_rate": 1.9125975217390865e-05, "loss": 0.4472, "step": 6040 }, { "epoch": 0.13418275046891823, "grad_norm": 0.9325576424598694, "learning_rate": 1.9124549059177988e-05, "loss": 0.4222, "step": 6045 }, { "epoch": 0.13429373702844585, "grad_norm": 0.9659907817840576, "learning_rate": 1.912312179163304e-05, "loss": 0.4955, "step": 6050 }, { "epoch": 0.1344047235879735, "grad_norm": 0.9910063147544861, "learning_rate": 1.912169341492954e-05, "loss": 0.6083, "step": 6055 }, { "epoch": 0.13451571014750113, "grad_norm": 1.6210241317749023, "learning_rate": 1.9120263929241147e-05, "loss": 0.5013, "step": 6060 }, { "epoch": 0.1346266967070288, "grad_norm": 1.1847623586654663, "learning_rate": 1.911883333474165e-05, "loss": 0.6173, "step": 6065 }, { "epoch": 0.1347376832665564, "grad_norm": 1.136588454246521, "learning_rate": 1.9117401631604978e-05, "loss": 0.6391, "step": 6070 }, { "epoch": 0.13484866982608407, "grad_norm": 1.2647699117660522, "learning_rate": 1.9115968820005197e-05, "loss": 0.5167, "step": 6075 }, { "epoch": 0.1349596563856117, "grad_norm": 1.39508056640625, "learning_rate": 1.9114534900116496e-05, "loss": 0.5092, "step": 6080 }, { "epoch": 0.13507064294513935, "grad_norm": 1.0421615839004517, "learning_rate": 1.9113099872113212e-05, "loss": 0.5522, "step": 6085 }, { "epoch": 0.13518162950466697, "grad_norm": 1.1894711256027222, "learning_rate": 1.9111663736169806e-05, "loss": 0.5877, "step": 6090 }, { "epoch": 0.13529261606419463, "grad_norm": 1.133510708808899, "learning_rate": 1.9110226492460886e-05, "loss": 0.3857, "step": 6095 }, { "epoch": 0.13540360262372228, "grad_norm": 1.1787534952163696, "learning_rate": 1.9108788141161178e-05, "loss": 0.5668, "step": 6100 }, { "epoch": 0.1355145891832499, "grad_norm": 0.940147340297699, "learning_rate": 1.9107348682445556e-05, "loss": 0.4328, "step": 6105 }, { "epoch": 0.13562557574277756, "grad_norm": 1.5695501565933228, "learning_rate": 1.910590811648903e-05, "loss": 0.5116, "step": 6110 }, { "epoch": 0.13573656230230519, "grad_norm": 0.8534506559371948, "learning_rate": 1.910446644346673e-05, "loss": 0.5906, "step": 6115 }, { "epoch": 0.13584754886183284, "grad_norm": 1.2275046110153198, "learning_rate": 1.910302366355393e-05, "loss": 0.6306, "step": 6120 }, { "epoch": 0.13595853542136047, "grad_norm": 1.118098497390747, "learning_rate": 1.910157977692605e-05, "loss": 0.4972, "step": 6125 }, { "epoch": 0.13606952198088812, "grad_norm": 1.447643518447876, "learning_rate": 1.910013478375862e-05, "loss": 0.5456, "step": 6130 }, { "epoch": 0.13618050854041575, "grad_norm": 1.6203237771987915, "learning_rate": 1.9098688684227324e-05, "loss": 0.491, "step": 6135 }, { "epoch": 0.1362914950999434, "grad_norm": 1.125411033630371, "learning_rate": 1.9097241478507973e-05, "loss": 0.4878, "step": 6140 }, { "epoch": 0.13640248165947103, "grad_norm": 1.341929316520691, "learning_rate": 1.9095793166776513e-05, "loss": 0.4489, "step": 6145 }, { "epoch": 0.13651346821899868, "grad_norm": 0.8598207235336304, "learning_rate": 1.909434374920902e-05, "loss": 0.4814, "step": 6150 }, { "epoch": 0.13662445477852633, "grad_norm": 1.4603968858718872, "learning_rate": 1.909289322598172e-05, "loss": 0.6108, "step": 6155 }, { "epoch": 0.13673544133805396, "grad_norm": 1.2341361045837402, "learning_rate": 1.9091441597270955e-05, "loss": 0.5515, "step": 6160 }, { "epoch": 0.1368464278975816, "grad_norm": 0.923833429813385, "learning_rate": 1.908998886325321e-05, "loss": 0.3516, "step": 6165 }, { "epoch": 0.13695741445710924, "grad_norm": 0.9731249213218689, "learning_rate": 1.9088535024105105e-05, "loss": 0.414, "step": 6170 }, { "epoch": 0.1370684010166369, "grad_norm": 1.5650506019592285, "learning_rate": 1.9087080080003394e-05, "loss": 0.6774, "step": 6175 }, { "epoch": 0.13717938757616452, "grad_norm": 1.0991238355636597, "learning_rate": 1.908562403112496e-05, "loss": 0.4536, "step": 6180 }, { "epoch": 0.13729037413569217, "grad_norm": 1.1593619585037231, "learning_rate": 1.9084166877646825e-05, "loss": 0.5965, "step": 6185 }, { "epoch": 0.1374013606952198, "grad_norm": 1.6231162548065186, "learning_rate": 1.908270861974615e-05, "loss": 0.6396, "step": 6190 }, { "epoch": 0.13751234725474745, "grad_norm": 1.1264643669128418, "learning_rate": 1.9081249257600226e-05, "loss": 0.5087, "step": 6195 }, { "epoch": 0.13762333381427508, "grad_norm": 1.6626774072647095, "learning_rate": 1.9079788791386468e-05, "loss": 0.4835, "step": 6200 }, { "epoch": 0.13773432037380273, "grad_norm": 1.7249454259872437, "learning_rate": 1.907832722128244e-05, "loss": 0.5835, "step": 6205 }, { "epoch": 0.13784530693333039, "grad_norm": 0.7329836487770081, "learning_rate": 1.9076864547465836e-05, "loss": 0.6058, "step": 6210 }, { "epoch": 0.137956293492858, "grad_norm": 1.4405927658081055, "learning_rate": 1.9075400770114482e-05, "loss": 0.7529, "step": 6215 }, { "epoch": 0.13806728005238567, "grad_norm": 1.5921906232833862, "learning_rate": 1.9073935889406343e-05, "loss": 0.4808, "step": 6220 }, { "epoch": 0.1381782666119133, "grad_norm": 1.237058162689209, "learning_rate": 1.90724699055195e-05, "loss": 0.5102, "step": 6225 }, { "epoch": 0.13828925317144095, "grad_norm": 1.3936007022857666, "learning_rate": 1.9071002818632203e-05, "loss": 0.6837, "step": 6230 }, { "epoch": 0.13840023973096857, "grad_norm": 1.1461912393569946, "learning_rate": 1.9069534628922797e-05, "loss": 0.4168, "step": 6235 }, { "epoch": 0.13851122629049623, "grad_norm": 1.069848656654358, "learning_rate": 1.906806533656979e-05, "loss": 0.4901, "step": 6240 }, { "epoch": 0.13862221285002385, "grad_norm": 1.3110460042953491, "learning_rate": 1.906659494175182e-05, "loss": 0.6338, "step": 6245 }, { "epoch": 0.1387331994095515, "grad_norm": 1.3842331171035767, "learning_rate": 1.9065123444647633e-05, "loss": 0.4519, "step": 6250 }, { "epoch": 0.13884418596907913, "grad_norm": 1.457643747329712, "learning_rate": 1.9063650845436143e-05, "loss": 0.4812, "step": 6255 }, { "epoch": 0.13895517252860679, "grad_norm": 0.7833005785942078, "learning_rate": 1.906217714429638e-05, "loss": 0.4751, "step": 6260 }, { "epoch": 0.13906615908813444, "grad_norm": 1.5647377967834473, "learning_rate": 1.9060702341407516e-05, "loss": 0.5807, "step": 6265 }, { "epoch": 0.13917714564766206, "grad_norm": 0.883370041847229, "learning_rate": 1.9059226436948844e-05, "loss": 0.4939, "step": 6270 }, { "epoch": 0.13928813220718972, "grad_norm": 0.9018153548240662, "learning_rate": 1.9057749431099807e-05, "loss": 0.4014, "step": 6275 }, { "epoch": 0.13939911876671734, "grad_norm": 0.9330711364746094, "learning_rate": 1.905627132403997e-05, "loss": 0.5366, "step": 6280 }, { "epoch": 0.139510105326245, "grad_norm": 1.2406800985336304, "learning_rate": 1.9054792115949033e-05, "loss": 0.4553, "step": 6285 }, { "epoch": 0.13962109188577262, "grad_norm": 1.1809135675430298, "learning_rate": 1.9053311807006845e-05, "loss": 0.6236, "step": 6290 }, { "epoch": 0.13973207844530028, "grad_norm": 1.25314462184906, "learning_rate": 1.9051830397393366e-05, "loss": 0.6285, "step": 6295 }, { "epoch": 0.1398430650048279, "grad_norm": 1.5022709369659424, "learning_rate": 1.9050347887288708e-05, "loss": 0.4595, "step": 6300 }, { "epoch": 0.13995405156435556, "grad_norm": 0.9192030429840088, "learning_rate": 1.9048864276873103e-05, "loss": 0.5529, "step": 6305 }, { "epoch": 0.14006503812388318, "grad_norm": 1.0889347791671753, "learning_rate": 1.904737956632693e-05, "loss": 0.5217, "step": 6310 }, { "epoch": 0.14017602468341084, "grad_norm": 1.2225440740585327, "learning_rate": 1.9045893755830688e-05, "loss": 0.5005, "step": 6315 }, { "epoch": 0.1402870112429385, "grad_norm": 1.1170283555984497, "learning_rate": 1.9044406845565025e-05, "loss": 0.5876, "step": 6320 }, { "epoch": 0.14039799780246612, "grad_norm": 1.2859587669372559, "learning_rate": 1.9042918835710708e-05, "loss": 0.6355, "step": 6325 }, { "epoch": 0.14050898436199377, "grad_norm": 1.228201985359192, "learning_rate": 1.9041429726448645e-05, "loss": 0.6205, "step": 6330 }, { "epoch": 0.1406199709215214, "grad_norm": 1.601422667503357, "learning_rate": 1.9039939517959882e-05, "loss": 0.6231, "step": 6335 }, { "epoch": 0.14073095748104905, "grad_norm": 1.0911506414413452, "learning_rate": 1.9038448210425588e-05, "loss": 0.6034, "step": 6340 }, { "epoch": 0.14084194404057668, "grad_norm": 1.59726083278656, "learning_rate": 1.9036955804027073e-05, "loss": 0.6577, "step": 6345 }, { "epoch": 0.14095293060010433, "grad_norm": 1.157368779182434, "learning_rate": 1.903546229894578e-05, "loss": 0.6007, "step": 6350 }, { "epoch": 0.14106391715963196, "grad_norm": 1.2398027181625366, "learning_rate": 1.9033967695363283e-05, "loss": 0.5543, "step": 6355 }, { "epoch": 0.1411749037191596, "grad_norm": 1.2566787004470825, "learning_rate": 1.903247199346129e-05, "loss": 0.5806, "step": 6360 }, { "epoch": 0.14128589027868724, "grad_norm": 1.6907583475112915, "learning_rate": 1.9030975193421647e-05, "loss": 0.5773, "step": 6365 }, { "epoch": 0.1413968768382149, "grad_norm": 1.620954155921936, "learning_rate": 1.9029477295426324e-05, "loss": 0.5538, "step": 6370 }, { "epoch": 0.14150786339774254, "grad_norm": 1.1079115867614746, "learning_rate": 1.9027978299657436e-05, "loss": 0.5525, "step": 6375 }, { "epoch": 0.14161884995727017, "grad_norm": 1.0491001605987549, "learning_rate": 1.9026478206297224e-05, "loss": 0.4703, "step": 6380 }, { "epoch": 0.14172983651679782, "grad_norm": 3.4920918941497803, "learning_rate": 1.9024977015528064e-05, "loss": 0.6538, "step": 6385 }, { "epoch": 0.14184082307632545, "grad_norm": 1.1903600692749023, "learning_rate": 1.9023474727532466e-05, "loss": 0.4085, "step": 6390 }, { "epoch": 0.1419518096358531, "grad_norm": 1.0123170614242554, "learning_rate": 1.9021971342493072e-05, "loss": 0.4996, "step": 6395 }, { "epoch": 0.14206279619538073, "grad_norm": 0.8986278176307678, "learning_rate": 1.9020466860592663e-05, "loss": 0.5676, "step": 6400 }, { "epoch": 0.14217378275490838, "grad_norm": 1.3018028736114502, "learning_rate": 1.901896128201414e-05, "loss": 0.504, "step": 6405 }, { "epoch": 0.142284769314436, "grad_norm": 1.483491063117981, "learning_rate": 1.9017454606940557e-05, "loss": 0.6871, "step": 6410 }, { "epoch": 0.14239575587396366, "grad_norm": 1.6943657398223877, "learning_rate": 1.9015946835555083e-05, "loss": 0.4479, "step": 6415 }, { "epoch": 0.14250674243349132, "grad_norm": 1.2507892847061157, "learning_rate": 1.9014437968041026e-05, "loss": 0.5551, "step": 6420 }, { "epoch": 0.14261772899301894, "grad_norm": 0.8554363250732422, "learning_rate": 1.9012928004581837e-05, "loss": 0.6889, "step": 6425 }, { "epoch": 0.1427287155525466, "grad_norm": 1.5508023500442505, "learning_rate": 1.9011416945361088e-05, "loss": 0.6158, "step": 6430 }, { "epoch": 0.14283970211207422, "grad_norm": 0.8269447684288025, "learning_rate": 1.9009904790562487e-05, "loss": 0.5652, "step": 6435 }, { "epoch": 0.14295068867160188, "grad_norm": 0.9781570434570312, "learning_rate": 1.900839154036988e-05, "loss": 0.5546, "step": 6440 }, { "epoch": 0.1430616752311295, "grad_norm": 0.9161341190338135, "learning_rate": 1.900687719496724e-05, "loss": 0.514, "step": 6445 }, { "epoch": 0.14317266179065716, "grad_norm": 1.2776657342910767, "learning_rate": 1.9005361754538677e-05, "loss": 0.5783, "step": 6450 }, { "epoch": 0.14328364835018478, "grad_norm": 1.1995465755462646, "learning_rate": 1.9003845219268436e-05, "loss": 0.4091, "step": 6455 }, { "epoch": 0.14339463490971244, "grad_norm": 1.4316763877868652, "learning_rate": 1.900232758934089e-05, "loss": 0.5167, "step": 6460 }, { "epoch": 0.14350562146924006, "grad_norm": 1.3804174661636353, "learning_rate": 1.9000808864940543e-05, "loss": 0.4177, "step": 6465 }, { "epoch": 0.14361660802876772, "grad_norm": 1.1941735744476318, "learning_rate": 1.8999289046252044e-05, "loss": 0.591, "step": 6470 }, { "epoch": 0.14372759458829537, "grad_norm": 1.327609658241272, "learning_rate": 1.8997768133460163e-05, "loss": 0.4782, "step": 6475 }, { "epoch": 0.143838581147823, "grad_norm": 1.247695803642273, "learning_rate": 1.899624612674981e-05, "loss": 0.6462, "step": 6480 }, { "epoch": 0.14394956770735065, "grad_norm": 0.884784460067749, "learning_rate": 1.8994723026306024e-05, "loss": 0.568, "step": 6485 }, { "epoch": 0.14406055426687828, "grad_norm": 1.2470338344573975, "learning_rate": 1.899319883231398e-05, "loss": 0.4931, "step": 6490 }, { "epoch": 0.14417154082640593, "grad_norm": 1.626511573791504, "learning_rate": 1.8991673544958975e-05, "loss": 0.4637, "step": 6495 }, { "epoch": 0.14428252738593356, "grad_norm": 1.8598002195358276, "learning_rate": 1.899014716442646e-05, "loss": 0.6426, "step": 6500 }, { "epoch": 0.1443935139454612, "grad_norm": 1.0883430242538452, "learning_rate": 1.8988619690902005e-05, "loss": 0.5175, "step": 6505 }, { "epoch": 0.14450450050498884, "grad_norm": 1.258001685142517, "learning_rate": 1.8987091124571315e-05, "loss": 0.4383, "step": 6510 }, { "epoch": 0.1446154870645165, "grad_norm": 0.7219902276992798, "learning_rate": 1.8985561465620225e-05, "loss": 0.4931, "step": 6515 }, { "epoch": 0.14472647362404412, "grad_norm": 0.8572632074356079, "learning_rate": 1.8984030714234704e-05, "loss": 0.6104, "step": 6520 }, { "epoch": 0.14483746018357177, "grad_norm": 1.0302547216415405, "learning_rate": 1.8982498870600864e-05, "loss": 0.5779, "step": 6525 }, { "epoch": 0.14494844674309942, "grad_norm": 1.225276231765747, "learning_rate": 1.8980965934904932e-05, "loss": 0.4816, "step": 6530 }, { "epoch": 0.14505943330262705, "grad_norm": 1.3861422538757324, "learning_rate": 1.8979431907333282e-05, "loss": 0.4285, "step": 6535 }, { "epoch": 0.1451704198621547, "grad_norm": 1.709928035736084, "learning_rate": 1.8977896788072416e-05, "loss": 0.5852, "step": 6540 }, { "epoch": 0.14528140642168233, "grad_norm": 1.502562403678894, "learning_rate": 1.897636057730897e-05, "loss": 0.5503, "step": 6545 }, { "epoch": 0.14539239298120998, "grad_norm": 0.9345400929450989, "learning_rate": 1.897482327522971e-05, "loss": 0.4356, "step": 6550 }, { "epoch": 0.1455033795407376, "grad_norm": 0.9455825686454773, "learning_rate": 1.897328488202153e-05, "loss": 0.4978, "step": 6555 }, { "epoch": 0.14561436610026526, "grad_norm": 1.2369747161865234, "learning_rate": 1.8971745397871473e-05, "loss": 0.6224, "step": 6560 }, { "epoch": 0.1457253526597929, "grad_norm": 0.9470257759094238, "learning_rate": 1.89702048229667e-05, "loss": 0.4977, "step": 6565 }, { "epoch": 0.14583633921932054, "grad_norm": 2.130319595336914, "learning_rate": 1.8968663157494503e-05, "loss": 0.5318, "step": 6570 }, { "epoch": 0.14594732577884817, "grad_norm": 1.1602154970169067, "learning_rate": 1.8967120401642324e-05, "loss": 0.5466, "step": 6575 }, { "epoch": 0.14605831233837582, "grad_norm": 1.5695816278457642, "learning_rate": 1.8965576555597717e-05, "loss": 0.5732, "step": 6580 }, { "epoch": 0.14616929889790348, "grad_norm": 1.0248501300811768, "learning_rate": 1.896403161954838e-05, "loss": 0.404, "step": 6585 }, { "epoch": 0.1462802854574311, "grad_norm": 1.1695456504821777, "learning_rate": 1.896248559368214e-05, "loss": 0.5657, "step": 6590 }, { "epoch": 0.14639127201695876, "grad_norm": 0.8363463878631592, "learning_rate": 1.8960938478186962e-05, "loss": 0.5698, "step": 6595 }, { "epoch": 0.14650225857648638, "grad_norm": 1.3290842771530151, "learning_rate": 1.8959390273250938e-05, "loss": 0.5124, "step": 6600 }, { "epoch": 0.14661324513601404, "grad_norm": 1.2733120918273926, "learning_rate": 1.895784097906229e-05, "loss": 0.5128, "step": 6605 }, { "epoch": 0.14672423169554166, "grad_norm": 1.1366569995880127, "learning_rate": 1.8956290595809378e-05, "loss": 0.4937, "step": 6610 }, { "epoch": 0.14683521825506932, "grad_norm": 1.2039333581924438, "learning_rate": 1.895473912368069e-05, "loss": 0.5817, "step": 6615 }, { "epoch": 0.14694620481459694, "grad_norm": 0.963930606842041, "learning_rate": 1.8953186562864857e-05, "loss": 0.5287, "step": 6620 }, { "epoch": 0.1470571913741246, "grad_norm": 1.2638262510299683, "learning_rate": 1.8951632913550625e-05, "loss": 0.6006, "step": 6625 }, { "epoch": 0.14716817793365222, "grad_norm": 1.433885097503662, "learning_rate": 1.8950078175926886e-05, "loss": 0.5561, "step": 6630 }, { "epoch": 0.14727916449317988, "grad_norm": 1.211980938911438, "learning_rate": 1.8948522350182655e-05, "loss": 0.5017, "step": 6635 }, { "epoch": 0.14739015105270753, "grad_norm": 0.7199383974075317, "learning_rate": 1.8946965436507094e-05, "loss": 0.5482, "step": 6640 }, { "epoch": 0.14750113761223516, "grad_norm": 1.4644221067428589, "learning_rate": 1.8945407435089477e-05, "loss": 0.5931, "step": 6645 }, { "epoch": 0.1476121241717628, "grad_norm": 0.8318582773208618, "learning_rate": 1.8943848346119225e-05, "loss": 0.4987, "step": 6650 }, { "epoch": 0.14772311073129044, "grad_norm": 0.7859492301940918, "learning_rate": 1.8942288169785884e-05, "loss": 0.485, "step": 6655 }, { "epoch": 0.1478340972908181, "grad_norm": 0.8700778484344482, "learning_rate": 1.8940726906279142e-05, "loss": 0.4495, "step": 6660 }, { "epoch": 0.14794508385034572, "grad_norm": 1.5189528465270996, "learning_rate": 1.8939164555788805e-05, "loss": 0.8257, "step": 6665 }, { "epoch": 0.14805607040987337, "grad_norm": 0.9018239974975586, "learning_rate": 1.893760111850482e-05, "loss": 0.4749, "step": 6670 }, { "epoch": 0.148167056969401, "grad_norm": 0.8321853876113892, "learning_rate": 1.893603659461727e-05, "loss": 0.5725, "step": 6675 }, { "epoch": 0.14827804352892865, "grad_norm": 1.372806191444397, "learning_rate": 1.8934470984316352e-05, "loss": 0.5039, "step": 6680 }, { "epoch": 0.14838903008845628, "grad_norm": 1.2954940795898438, "learning_rate": 1.893290428779242e-05, "loss": 0.5461, "step": 6685 }, { "epoch": 0.14850001664798393, "grad_norm": 1.6831293106079102, "learning_rate": 1.8931336505235947e-05, "loss": 0.5783, "step": 6690 }, { "epoch": 0.14861100320751158, "grad_norm": 1.0016860961914062, "learning_rate": 1.892976763683753e-05, "loss": 0.5389, "step": 6695 }, { "epoch": 0.1487219897670392, "grad_norm": 1.2121920585632324, "learning_rate": 1.8928197682787914e-05, "loss": 0.5567, "step": 6700 }, { "epoch": 0.14883297632656686, "grad_norm": 1.14114511013031, "learning_rate": 1.8926626643277966e-05, "loss": 0.4992, "step": 6705 }, { "epoch": 0.1489439628860945, "grad_norm": 1.1122453212738037, "learning_rate": 1.892505451849869e-05, "loss": 0.614, "step": 6710 }, { "epoch": 0.14905494944562214, "grad_norm": 1.7418065071105957, "learning_rate": 1.8923481308641216e-05, "loss": 0.5832, "step": 6715 }, { "epoch": 0.14916593600514977, "grad_norm": 1.6564451456069946, "learning_rate": 1.892190701389681e-05, "loss": 0.4955, "step": 6720 }, { "epoch": 0.14927692256467742, "grad_norm": 1.2826859951019287, "learning_rate": 1.8920331634456874e-05, "loss": 0.5107, "step": 6725 }, { "epoch": 0.14938790912420505, "grad_norm": 1.092296838760376, "learning_rate": 1.8918755170512932e-05, "loss": 0.5884, "step": 6730 }, { "epoch": 0.1494988956837327, "grad_norm": 1.3704930543899536, "learning_rate": 1.8917177622256647e-05, "loss": 0.4831, "step": 6735 }, { "epoch": 0.14960988224326033, "grad_norm": 1.782360553741455, "learning_rate": 1.8915598989879816e-05, "loss": 0.6033, "step": 6740 }, { "epoch": 0.14972086880278798, "grad_norm": 1.6985117197036743, "learning_rate": 1.891401927357436e-05, "loss": 0.644, "step": 6745 }, { "epoch": 0.14983185536231564, "grad_norm": 1.4931162595748901, "learning_rate": 1.8912438473532335e-05, "loss": 0.5186, "step": 6750 }, { "epoch": 0.14994284192184326, "grad_norm": 0.8564159870147705, "learning_rate": 1.891085658994593e-05, "loss": 0.5087, "step": 6755 }, { "epoch": 0.15005382848137092, "grad_norm": 1.126528024673462, "learning_rate": 1.8909273623007466e-05, "loss": 0.4766, "step": 6760 }, { "epoch": 0.15016481504089854, "grad_norm": 0.8299586772918701, "learning_rate": 1.8907689572909394e-05, "loss": 0.5409, "step": 6765 }, { "epoch": 0.1502758016004262, "grad_norm": 1.3447434902191162, "learning_rate": 1.8906104439844297e-05, "loss": 0.5607, "step": 6770 }, { "epoch": 0.15038678815995382, "grad_norm": 1.2309798002243042, "learning_rate": 1.8904518224004894e-05, "loss": 0.6018, "step": 6775 }, { "epoch": 0.15049777471948148, "grad_norm": 0.9505862593650818, "learning_rate": 1.8902930925584025e-05, "loss": 0.4819, "step": 6780 }, { "epoch": 0.1506087612790091, "grad_norm": 1.3469936847686768, "learning_rate": 1.8901342544774674e-05, "loss": 0.5222, "step": 6785 }, { "epoch": 0.15071974783853676, "grad_norm": 1.1433323621749878, "learning_rate": 1.8899753081769948e-05, "loss": 0.4524, "step": 6790 }, { "epoch": 0.15083073439806438, "grad_norm": 1.206285834312439, "learning_rate": 1.8898162536763092e-05, "loss": 0.5026, "step": 6795 }, { "epoch": 0.15094172095759203, "grad_norm": 1.9741181135177612, "learning_rate": 1.8896570909947477e-05, "loss": 0.3818, "step": 6800 }, { "epoch": 0.1510527075171197, "grad_norm": 1.362396478652954, "learning_rate": 1.8894978201516603e-05, "loss": 0.7034, "step": 6805 }, { "epoch": 0.15116369407664731, "grad_norm": 1.2455583810806274, "learning_rate": 1.8893384411664115e-05, "loss": 0.4963, "step": 6810 }, { "epoch": 0.15127468063617497, "grad_norm": 1.1156567335128784, "learning_rate": 1.8891789540583777e-05, "loss": 0.4454, "step": 6815 }, { "epoch": 0.1513856671957026, "grad_norm": 1.0928586721420288, "learning_rate": 1.8890193588469484e-05, "loss": 0.5486, "step": 6820 }, { "epoch": 0.15149665375523025, "grad_norm": 1.0376923084259033, "learning_rate": 1.888859655551527e-05, "loss": 0.4751, "step": 6825 }, { "epoch": 0.15160764031475787, "grad_norm": 1.1090154647827148, "learning_rate": 1.8886998441915298e-05, "loss": 0.3916, "step": 6830 }, { "epoch": 0.15171862687428553, "grad_norm": 1.081882119178772, "learning_rate": 1.888539924786386e-05, "loss": 0.4398, "step": 6835 }, { "epoch": 0.15182961343381315, "grad_norm": 2.7401556968688965, "learning_rate": 1.888379897355538e-05, "loss": 0.56, "step": 6840 }, { "epoch": 0.1519405999933408, "grad_norm": 0.9954392313957214, "learning_rate": 1.8882197619184417e-05, "loss": 0.4477, "step": 6845 }, { "epoch": 0.15205158655286843, "grad_norm": 3.0417439937591553, "learning_rate": 1.8880595184945653e-05, "loss": 0.6061, "step": 6850 }, { "epoch": 0.1521625731123961, "grad_norm": 1.5988818407058716, "learning_rate": 1.8878991671033913e-05, "loss": 0.5861, "step": 6855 }, { "epoch": 0.15227355967192374, "grad_norm": 1.2149866819381714, "learning_rate": 1.8877387077644143e-05, "loss": 0.4352, "step": 6860 }, { "epoch": 0.15238454623145137, "grad_norm": 1.172912359237671, "learning_rate": 1.8875781404971424e-05, "loss": 0.5736, "step": 6865 }, { "epoch": 0.15249553279097902, "grad_norm": 1.0310333967208862, "learning_rate": 1.8874174653210967e-05, "loss": 0.4938, "step": 6870 }, { "epoch": 0.15260651935050665, "grad_norm": 2.8296666145324707, "learning_rate": 1.887256682255812e-05, "loss": 0.5292, "step": 6875 }, { "epoch": 0.1527175059100343, "grad_norm": 1.6240949630737305, "learning_rate": 1.8870957913208354e-05, "loss": 0.6376, "step": 6880 }, { "epoch": 0.15282849246956193, "grad_norm": 2.6802828311920166, "learning_rate": 1.8869347925357275e-05, "loss": 0.4638, "step": 6885 }, { "epoch": 0.15293947902908958, "grad_norm": 1.5533713102340698, "learning_rate": 1.886773685920062e-05, "loss": 0.44, "step": 6890 }, { "epoch": 0.1530504655886172, "grad_norm": 2.4309866428375244, "learning_rate": 1.886612471493426e-05, "loss": 0.6499, "step": 6895 }, { "epoch": 0.15316145214814486, "grad_norm": 1.664516568183899, "learning_rate": 1.886451149275419e-05, "loss": 0.4546, "step": 6900 }, { "epoch": 0.1532724387076725, "grad_norm": 1.5281312465667725, "learning_rate": 1.8862897192856545e-05, "loss": 0.5696, "step": 6905 }, { "epoch": 0.15338342526720014, "grad_norm": 1.1347835063934326, "learning_rate": 1.8861281815437578e-05, "loss": 0.5733, "step": 6910 }, { "epoch": 0.1534944118267278, "grad_norm": 1.2266205549240112, "learning_rate": 1.885966536069369e-05, "loss": 0.6103, "step": 6915 }, { "epoch": 0.15360539838625542, "grad_norm": 1.4315506219863892, "learning_rate": 1.88580478288214e-05, "loss": 0.6006, "step": 6920 }, { "epoch": 0.15371638494578307, "grad_norm": 2.4873852729797363, "learning_rate": 1.8856429220017364e-05, "loss": 0.5671, "step": 6925 }, { "epoch": 0.1538273715053107, "grad_norm": 1.1945925951004028, "learning_rate": 1.885480953447836e-05, "loss": 0.57, "step": 6930 }, { "epoch": 0.15393835806483835, "grad_norm": 1.2514160871505737, "learning_rate": 1.8853188772401316e-05, "loss": 0.6649, "step": 6935 }, { "epoch": 0.15404934462436598, "grad_norm": 1.9401576519012451, "learning_rate": 1.8851566933983266e-05, "loss": 0.5246, "step": 6940 }, { "epoch": 0.15416033118389363, "grad_norm": 1.5348749160766602, "learning_rate": 1.88499440194214e-05, "loss": 0.6484, "step": 6945 }, { "epoch": 0.15427131774342126, "grad_norm": 1.4494093656539917, "learning_rate": 1.8848320028913017e-05, "loss": 0.6021, "step": 6950 }, { "epoch": 0.15438230430294891, "grad_norm": 1.3363004922866821, "learning_rate": 1.8846694962655564e-05, "loss": 0.7334, "step": 6955 }, { "epoch": 0.15449329086247657, "grad_norm": 1.1602723598480225, "learning_rate": 1.88450688208466e-05, "loss": 0.5157, "step": 6960 }, { "epoch": 0.1546042774220042, "grad_norm": 1.3268187046051025, "learning_rate": 1.884344160368384e-05, "loss": 0.4829, "step": 6965 }, { "epoch": 0.15471526398153185, "grad_norm": 1.1891200542449951, "learning_rate": 1.8841813311365105e-05, "loss": 0.6283, "step": 6970 }, { "epoch": 0.15482625054105947, "grad_norm": 1.3743573427200317, "learning_rate": 1.884018394408836e-05, "loss": 0.5518, "step": 6975 }, { "epoch": 0.15493723710058713, "grad_norm": 1.314160704612732, "learning_rate": 1.88385535020517e-05, "loss": 0.4715, "step": 6980 }, { "epoch": 0.15504822366011475, "grad_norm": 0.9060925245285034, "learning_rate": 1.8836921985453347e-05, "loss": 0.5421, "step": 6985 }, { "epoch": 0.1551592102196424, "grad_norm": 0.9292426705360413, "learning_rate": 1.8835289394491655e-05, "loss": 0.6992, "step": 6990 }, { "epoch": 0.15527019677917003, "grad_norm": 1.238547682762146, "learning_rate": 1.883365572936511e-05, "loss": 0.438, "step": 6995 }, { "epoch": 0.1553811833386977, "grad_norm": 1.2419242858886719, "learning_rate": 1.883202099027233e-05, "loss": 0.6075, "step": 7000 }, { "epoch": 0.1554921698982253, "grad_norm": 1.0661461353302002, "learning_rate": 1.8830385177412054e-05, "loss": 0.4628, "step": 7005 }, { "epoch": 0.15560315645775297, "grad_norm": 1.3712948560714722, "learning_rate": 1.8828748290983166e-05, "loss": 0.6168, "step": 7010 }, { "epoch": 0.15571414301728062, "grad_norm": 1.3899610042572021, "learning_rate": 1.8827110331184667e-05, "loss": 0.596, "step": 7015 }, { "epoch": 0.15582512957680825, "grad_norm": 1.5190640687942505, "learning_rate": 1.88254712982157e-05, "loss": 0.592, "step": 7020 }, { "epoch": 0.1559361161363359, "grad_norm": 1.1492244005203247, "learning_rate": 1.8823831192275533e-05, "loss": 0.5391, "step": 7025 }, { "epoch": 0.15604710269586353, "grad_norm": 1.307806134223938, "learning_rate": 1.8822190013563562e-05, "loss": 0.4242, "step": 7030 }, { "epoch": 0.15615808925539118, "grad_norm": 1.2493864297866821, "learning_rate": 1.882054776227931e-05, "loss": 0.5422, "step": 7035 }, { "epoch": 0.1562690758149188, "grad_norm": 1.2601972818374634, "learning_rate": 1.881890443862245e-05, "loss": 0.5582, "step": 7040 }, { "epoch": 0.15638006237444646, "grad_norm": 0.8862437605857849, "learning_rate": 1.8817260042792763e-05, "loss": 0.4275, "step": 7045 }, { "epoch": 0.1564910489339741, "grad_norm": 1.2929998636245728, "learning_rate": 1.881561457499017e-05, "loss": 0.4504, "step": 7050 }, { "epoch": 0.15660203549350174, "grad_norm": 2.1380231380462646, "learning_rate": 1.881396803541472e-05, "loss": 0.5916, "step": 7055 }, { "epoch": 0.15671302205302937, "grad_norm": 1.1888693571090698, "learning_rate": 1.88123204242666e-05, "loss": 0.5793, "step": 7060 }, { "epoch": 0.15682400861255702, "grad_norm": 1.548318862915039, "learning_rate": 1.8810671741746115e-05, "loss": 0.505, "step": 7065 }, { "epoch": 0.15693499517208467, "grad_norm": 1.4964656829833984, "learning_rate": 1.8809021988053707e-05, "loss": 0.3319, "step": 7070 }, { "epoch": 0.1570459817316123, "grad_norm": 1.2946470975875854, "learning_rate": 1.8807371163389955e-05, "loss": 0.6479, "step": 7075 }, { "epoch": 0.15715696829113995, "grad_norm": 1.5597033500671387, "learning_rate": 1.880571926795555e-05, "loss": 0.4953, "step": 7080 }, { "epoch": 0.15726795485066758, "grad_norm": 0.9103341102600098, "learning_rate": 1.8804066301951324e-05, "loss": 0.5956, "step": 7085 }, { "epoch": 0.15737894141019523, "grad_norm": 0.9429594874382019, "learning_rate": 1.880241226557825e-05, "loss": 0.5973, "step": 7090 }, { "epoch": 0.15748992796972286, "grad_norm": 1.2466639280319214, "learning_rate": 1.880075715903741e-05, "loss": 0.5657, "step": 7095 }, { "epoch": 0.1576009145292505, "grad_norm": 1.170981764793396, "learning_rate": 1.8799100982530034e-05, "loss": 0.6455, "step": 7100 }, { "epoch": 0.15771190108877814, "grad_norm": 1.7617019414901733, "learning_rate": 1.879744373625747e-05, "loss": 0.5598, "step": 7105 }, { "epoch": 0.1578228876483058, "grad_norm": 0.8788993954658508, "learning_rate": 1.8795785420421198e-05, "loss": 0.4858, "step": 7110 }, { "epoch": 0.15793387420783342, "grad_norm": 0.8216165900230408, "learning_rate": 1.8794126035222833e-05, "loss": 0.4852, "step": 7115 }, { "epoch": 0.15804486076736107, "grad_norm": 1.0266494750976562, "learning_rate": 1.879246558086412e-05, "loss": 0.5108, "step": 7120 }, { "epoch": 0.15815584732688873, "grad_norm": 1.1185437440872192, "learning_rate": 1.879080405754693e-05, "loss": 0.3881, "step": 7125 }, { "epoch": 0.15826683388641635, "grad_norm": 1.2968014478683472, "learning_rate": 1.8789141465473263e-05, "loss": 0.4948, "step": 7130 }, { "epoch": 0.158377820445944, "grad_norm": 0.8087155818939209, "learning_rate": 1.8787477804845255e-05, "loss": 0.5251, "step": 7135 }, { "epoch": 0.15848880700547163, "grad_norm": 1.3040249347686768, "learning_rate": 1.8785813075865164e-05, "loss": 0.7207, "step": 7140 }, { "epoch": 0.1585997935649993, "grad_norm": 1.23062002658844, "learning_rate": 1.8784147278735386e-05, "loss": 0.4471, "step": 7145 }, { "epoch": 0.1587107801245269, "grad_norm": 1.129224181175232, "learning_rate": 1.878248041365844e-05, "loss": 0.4338, "step": 7150 }, { "epoch": 0.15882176668405457, "grad_norm": 2.8109686374664307, "learning_rate": 1.878081248083698e-05, "loss": 0.568, "step": 7155 }, { "epoch": 0.1589327532435822, "grad_norm": 1.2243515253067017, "learning_rate": 1.8779143480473787e-05, "loss": 0.4675, "step": 7160 }, { "epoch": 0.15904373980310985, "grad_norm": 1.5151373147964478, "learning_rate": 1.8777473412771777e-05, "loss": 0.5826, "step": 7165 }, { "epoch": 0.15915472636263747, "grad_norm": 0.8092393279075623, "learning_rate": 1.877580227793398e-05, "loss": 0.6009, "step": 7170 }, { "epoch": 0.15926571292216513, "grad_norm": 1.2688989639282227, "learning_rate": 1.8774130076163575e-05, "loss": 0.4677, "step": 7175 }, { "epoch": 0.15937669948169278, "grad_norm": 1.2532013654708862, "learning_rate": 1.877245680766387e-05, "loss": 0.3996, "step": 7180 }, { "epoch": 0.1594876860412204, "grad_norm": 1.0712350606918335, "learning_rate": 1.8770782472638276e-05, "loss": 0.4464, "step": 7185 }, { "epoch": 0.15959867260074806, "grad_norm": 1.1912342309951782, "learning_rate": 1.8769107071290367e-05, "loss": 0.497, "step": 7190 }, { "epoch": 0.15970965916027569, "grad_norm": 1.4670413732528687, "learning_rate": 1.8767430603823833e-05, "loss": 0.5811, "step": 7195 }, { "epoch": 0.15982064571980334, "grad_norm": 1.3657194375991821, "learning_rate": 1.8765753070442486e-05, "loss": 0.5604, "step": 7200 }, { "epoch": 0.15993163227933097, "grad_norm": 1.1779944896697998, "learning_rate": 1.8764074471350282e-05, "loss": 0.5235, "step": 7205 }, { "epoch": 0.16004261883885862, "grad_norm": 1.0121064186096191, "learning_rate": 1.8762394806751295e-05, "loss": 0.4274, "step": 7210 }, { "epoch": 0.16015360539838625, "grad_norm": 1.2211484909057617, "learning_rate": 1.8760714076849734e-05, "loss": 0.5711, "step": 7215 }, { "epoch": 0.1602645919579139, "grad_norm": 1.132825493812561, "learning_rate": 1.8759032281849937e-05, "loss": 0.5821, "step": 7220 }, { "epoch": 0.16037557851744152, "grad_norm": 1.1440246105194092, "learning_rate": 1.875734942195637e-05, "loss": 0.5773, "step": 7225 }, { "epoch": 0.16048656507696918, "grad_norm": 1.1753207445144653, "learning_rate": 1.8755665497373628e-05, "loss": 0.5393, "step": 7230 }, { "epoch": 0.16059755163649683, "grad_norm": 1.1559597253799438, "learning_rate": 1.8753980508306442e-05, "loss": 0.4608, "step": 7235 }, { "epoch": 0.16070853819602446, "grad_norm": 1.5974079370498657, "learning_rate": 1.8752294454959665e-05, "loss": 0.4511, "step": 7240 }, { "epoch": 0.1608195247555521, "grad_norm": 0.7395541667938232, "learning_rate": 1.875060733753828e-05, "loss": 0.2952, "step": 7245 }, { "epoch": 0.16093051131507974, "grad_norm": 1.159072756767273, "learning_rate": 1.8748919156247402e-05, "loss": 0.5593, "step": 7250 }, { "epoch": 0.1610414978746074, "grad_norm": 2.472195625305176, "learning_rate": 1.8747229911292273e-05, "loss": 0.5588, "step": 7255 }, { "epoch": 0.16115248443413502, "grad_norm": 1.0677604675292969, "learning_rate": 1.874553960287827e-05, "loss": 0.3486, "step": 7260 }, { "epoch": 0.16126347099366267, "grad_norm": 1.0419409275054932, "learning_rate": 1.8743848231210894e-05, "loss": 0.521, "step": 7265 }, { "epoch": 0.1613744575531903, "grad_norm": 1.076755404472351, "learning_rate": 1.874215579649577e-05, "loss": 0.6069, "step": 7270 }, { "epoch": 0.16148544411271795, "grad_norm": 0.9147228598594666, "learning_rate": 1.8740462298938666e-05, "loss": 0.5681, "step": 7275 }, { "epoch": 0.16159643067224558, "grad_norm": 1.19989812374115, "learning_rate": 1.8738767738745467e-05, "loss": 0.7323, "step": 7280 }, { "epoch": 0.16170741723177323, "grad_norm": 1.425941824913025, "learning_rate": 1.8737072116122194e-05, "loss": 0.5968, "step": 7285 }, { "epoch": 0.16181840379130089, "grad_norm": 1.1933568716049194, "learning_rate": 1.8735375431275e-05, "loss": 0.4999, "step": 7290 }, { "epoch": 0.1619293903508285, "grad_norm": 1.1296350955963135, "learning_rate": 1.873367768441015e-05, "loss": 0.4927, "step": 7295 }, { "epoch": 0.16204037691035617, "grad_norm": 1.342839002609253, "learning_rate": 1.8731978875734062e-05, "loss": 0.5207, "step": 7300 }, { "epoch": 0.1621513634698838, "grad_norm": 1.3544893264770508, "learning_rate": 1.8730279005453264e-05, "loss": 0.4706, "step": 7305 }, { "epoch": 0.16226235002941145, "grad_norm": 1.0300699472427368, "learning_rate": 1.8728578073774427e-05, "loss": 0.4753, "step": 7310 }, { "epoch": 0.16237333658893907, "grad_norm": 1.0216705799102783, "learning_rate": 1.8726876080904338e-05, "loss": 0.5686, "step": 7315 }, { "epoch": 0.16248432314846672, "grad_norm": 1.362499713897705, "learning_rate": 1.8725173027049927e-05, "loss": 0.5845, "step": 7320 }, { "epoch": 0.16259530970799435, "grad_norm": 1.2206189632415771, "learning_rate": 1.8723468912418233e-05, "loss": 0.5174, "step": 7325 }, { "epoch": 0.162706296267522, "grad_norm": 1.0743138790130615, "learning_rate": 1.8721763737216453e-05, "loss": 0.5493, "step": 7330 }, { "epoch": 0.16281728282704963, "grad_norm": 1.1450023651123047, "learning_rate": 1.8720057501651885e-05, "loss": 0.4913, "step": 7335 }, { "epoch": 0.16292826938657728, "grad_norm": 1.2171339988708496, "learning_rate": 1.8718350205931975e-05, "loss": 0.4827, "step": 7340 }, { "epoch": 0.16303925594610494, "grad_norm": 1.5790528059005737, "learning_rate": 1.871664185026428e-05, "loss": 0.4709, "step": 7345 }, { "epoch": 0.16315024250563256, "grad_norm": 1.3697818517684937, "learning_rate": 1.8714932434856507e-05, "loss": 0.3998, "step": 7350 }, { "epoch": 0.16326122906516022, "grad_norm": 1.1617748737335205, "learning_rate": 1.8713221959916472e-05, "loss": 0.5325, "step": 7355 }, { "epoch": 0.16337221562468784, "grad_norm": 1.0637340545654297, "learning_rate": 1.8711510425652134e-05, "loss": 0.5762, "step": 7360 }, { "epoch": 0.1634832021842155, "grad_norm": 1.0662593841552734, "learning_rate": 1.8709797832271575e-05, "loss": 0.4272, "step": 7365 }, { "epoch": 0.16359418874374312, "grad_norm": 1.1800469160079956, "learning_rate": 1.870808417998301e-05, "loss": 0.6615, "step": 7370 }, { "epoch": 0.16370517530327078, "grad_norm": 1.1630209684371948, "learning_rate": 1.870636946899477e-05, "loss": 0.4881, "step": 7375 }, { "epoch": 0.1638161618627984, "grad_norm": 1.7020683288574219, "learning_rate": 1.8704653699515328e-05, "loss": 0.527, "step": 7380 }, { "epoch": 0.16392714842232606, "grad_norm": 1.147692322731018, "learning_rate": 1.8702936871753284e-05, "loss": 0.4046, "step": 7385 }, { "epoch": 0.16403813498185368, "grad_norm": 1.619726538658142, "learning_rate": 1.8701218985917364e-05, "loss": 0.7949, "step": 7390 }, { "epoch": 0.16414912154138134, "grad_norm": 1.1566890478134155, "learning_rate": 1.8699500042216423e-05, "loss": 0.766, "step": 7395 }, { "epoch": 0.164260108100909, "grad_norm": 1.1725993156433105, "learning_rate": 1.869778004085944e-05, "loss": 0.3988, "step": 7400 }, { "epoch": 0.16437109466043662, "grad_norm": 0.9177306294441223, "learning_rate": 1.8696058982055532e-05, "loss": 0.4696, "step": 7405 }, { "epoch": 0.16448208121996427, "grad_norm": 1.2283203601837158, "learning_rate": 1.8694336866013932e-05, "loss": 0.5214, "step": 7410 }, { "epoch": 0.1645930677794919, "grad_norm": 1.3590775728225708, "learning_rate": 1.869261369294402e-05, "loss": 0.4894, "step": 7415 }, { "epoch": 0.16470405433901955, "grad_norm": 1.28773832321167, "learning_rate": 1.8690889463055285e-05, "loss": 0.5813, "step": 7420 }, { "epoch": 0.16481504089854718, "grad_norm": 1.0126246213912964, "learning_rate": 1.868916417655736e-05, "loss": 0.4549, "step": 7425 }, { "epoch": 0.16492602745807483, "grad_norm": 1.1124584674835205, "learning_rate": 1.8687437833659986e-05, "loss": 0.5777, "step": 7430 }, { "epoch": 0.16503701401760246, "grad_norm": 1.226993441581726, "learning_rate": 1.8685710434573066e-05, "loss": 0.5007, "step": 7435 }, { "epoch": 0.1651480005771301, "grad_norm": 1.3269422054290771, "learning_rate": 1.8683981979506597e-05, "loss": 0.4307, "step": 7440 }, { "epoch": 0.16525898713665776, "grad_norm": 1.6435010433197021, "learning_rate": 1.868225246867072e-05, "loss": 0.5969, "step": 7445 }, { "epoch": 0.1653699736961854, "grad_norm": 1.360838770866394, "learning_rate": 1.868052190227571e-05, "loss": 0.4791, "step": 7450 }, { "epoch": 0.16548096025571304, "grad_norm": 1.045859932899475, "learning_rate": 1.8678790280531956e-05, "loss": 0.5986, "step": 7455 }, { "epoch": 0.16559194681524067, "grad_norm": 1.305838942527771, "learning_rate": 1.867705760364999e-05, "loss": 0.4359, "step": 7460 }, { "epoch": 0.16570293337476832, "grad_norm": 0.9243895411491394, "learning_rate": 1.8675323871840462e-05, "loss": 0.4628, "step": 7465 }, { "epoch": 0.16581391993429595, "grad_norm": 1.281638264656067, "learning_rate": 1.8673589085314145e-05, "loss": 0.4752, "step": 7470 }, { "epoch": 0.1659249064938236, "grad_norm": 1.163022756576538, "learning_rate": 1.8671853244281962e-05, "loss": 0.6009, "step": 7475 }, { "epoch": 0.16603589305335123, "grad_norm": 1.445329189300537, "learning_rate": 1.8670116348954945e-05, "loss": 0.364, "step": 7480 }, { "epoch": 0.16614687961287888, "grad_norm": 1.2617442607879639, "learning_rate": 1.8668378399544254e-05, "loss": 0.644, "step": 7485 }, { "epoch": 0.1662578661724065, "grad_norm": 0.9319959282875061, "learning_rate": 1.866663939626119e-05, "loss": 0.493, "step": 7490 }, { "epoch": 0.16636885273193416, "grad_norm": 0.986315906047821, "learning_rate": 1.866489933931718e-05, "loss": 0.4185, "step": 7495 }, { "epoch": 0.16647983929146182, "grad_norm": 0.9533819556236267, "learning_rate": 1.8663158228923762e-05, "loss": 0.5356, "step": 7500 }, { "epoch": 0.16659082585098944, "grad_norm": 1.27620267868042, "learning_rate": 1.8661416065292624e-05, "loss": 0.5966, "step": 7505 }, { "epoch": 0.1667018124105171, "grad_norm": 1.3088321685791016, "learning_rate": 1.8659672848635568e-05, "loss": 0.5771, "step": 7510 }, { "epoch": 0.16681279897004472, "grad_norm": 0.8196889758110046, "learning_rate": 1.865792857916453e-05, "loss": 0.3497, "step": 7515 }, { "epoch": 0.16692378552957238, "grad_norm": 1.3427016735076904, "learning_rate": 1.8656183257091572e-05, "loss": 0.6173, "step": 7520 }, { "epoch": 0.1670347720891, "grad_norm": 1.2403075695037842, "learning_rate": 1.865443688262888e-05, "loss": 0.5343, "step": 7525 }, { "epoch": 0.16714575864862766, "grad_norm": 1.0151913166046143, "learning_rate": 1.8652689455988784e-05, "loss": 0.3825, "step": 7530 }, { "epoch": 0.16725674520815528, "grad_norm": 1.1205071210861206, "learning_rate": 1.865094097738372e-05, "loss": 0.5593, "step": 7535 }, { "epoch": 0.16736773176768294, "grad_norm": 1.0770914554595947, "learning_rate": 1.864919144702626e-05, "loss": 0.4712, "step": 7540 }, { "epoch": 0.16747871832721056, "grad_norm": 1.0981290340423584, "learning_rate": 1.8647440865129115e-05, "loss": 0.526, "step": 7545 }, { "epoch": 0.16758970488673822, "grad_norm": 1.3834798336029053, "learning_rate": 1.8645689231905112e-05, "loss": 0.5559, "step": 7550 }, { "epoch": 0.16770069144626587, "grad_norm": 1.1029125452041626, "learning_rate": 1.8643936547567205e-05, "loss": 0.6514, "step": 7555 }, { "epoch": 0.1678116780057935, "grad_norm": 1.110398530960083, "learning_rate": 1.8642182812328483e-05, "loss": 0.6274, "step": 7560 }, { "epoch": 0.16792266456532115, "grad_norm": 1.2129943370819092, "learning_rate": 1.8640428026402158e-05, "loss": 0.6039, "step": 7565 }, { "epoch": 0.16803365112484878, "grad_norm": 1.3205411434173584, "learning_rate": 1.863867219000157e-05, "loss": 0.5309, "step": 7570 }, { "epoch": 0.16814463768437643, "grad_norm": 0.8426812291145325, "learning_rate": 1.8636915303340193e-05, "loss": 0.4231, "step": 7575 }, { "epoch": 0.16825562424390406, "grad_norm": 1.1089075803756714, "learning_rate": 1.8635157366631614e-05, "loss": 0.5547, "step": 7580 }, { "epoch": 0.1683666108034317, "grad_norm": 1.6381417512893677, "learning_rate": 1.8633398380089567e-05, "loss": 0.4485, "step": 7585 }, { "epoch": 0.16847759736295934, "grad_norm": 1.5178996324539185, "learning_rate": 1.86316383439279e-05, "loss": 0.4218, "step": 7590 }, { "epoch": 0.168588583922487, "grad_norm": 1.508217692375183, "learning_rate": 1.8629877258360587e-05, "loss": 0.5992, "step": 7595 }, { "epoch": 0.16869957048201462, "grad_norm": 1.512963056564331, "learning_rate": 1.862811512360174e-05, "loss": 0.4963, "step": 7600 }, { "epoch": 0.16881055704154227, "grad_norm": 0.9588901996612549, "learning_rate": 1.8626351939865594e-05, "loss": 0.5764, "step": 7605 }, { "epoch": 0.16892154360106992, "grad_norm": 1.1882424354553223, "learning_rate": 1.862458770736651e-05, "loss": 0.4822, "step": 7610 }, { "epoch": 0.16903253016059755, "grad_norm": 0.9214734435081482, "learning_rate": 1.8622822426318978e-05, "loss": 0.474, "step": 7615 }, { "epoch": 0.1691435167201252, "grad_norm": 0.8436447381973267, "learning_rate": 1.862105609693761e-05, "loss": 0.5628, "step": 7620 }, { "epoch": 0.16925450327965283, "grad_norm": 1.0661908388137817, "learning_rate": 1.8619288719437158e-05, "loss": 0.5611, "step": 7625 }, { "epoch": 0.16936548983918048, "grad_norm": 1.3087199926376343, "learning_rate": 1.861752029403249e-05, "loss": 0.529, "step": 7630 }, { "epoch": 0.1694764763987081, "grad_norm": 1.2095814943313599, "learning_rate": 1.8615750820938605e-05, "loss": 0.5776, "step": 7635 }, { "epoch": 0.16958746295823576, "grad_norm": 1.132745623588562, "learning_rate": 1.861398030037063e-05, "loss": 0.5001, "step": 7640 }, { "epoch": 0.1696984495177634, "grad_norm": 1.1624484062194824, "learning_rate": 1.8612208732543823e-05, "loss": 0.6071, "step": 7645 }, { "epoch": 0.16980943607729104, "grad_norm": 2.140655517578125, "learning_rate": 1.8610436117673557e-05, "loss": 0.4338, "step": 7650 }, { "epoch": 0.16992042263681867, "grad_norm": 1.5044407844543457, "learning_rate": 1.8608662455975345e-05, "loss": 0.5192, "step": 7655 }, { "epoch": 0.17003140919634632, "grad_norm": 0.6824148297309875, "learning_rate": 1.8606887747664823e-05, "loss": 0.4568, "step": 7660 }, { "epoch": 0.17014239575587398, "grad_norm": 1.387648105621338, "learning_rate": 1.8605111992957757e-05, "loss": 0.4171, "step": 7665 }, { "epoch": 0.1702533823154016, "grad_norm": 1.3381781578063965, "learning_rate": 1.860333519207003e-05, "loss": 0.4753, "step": 7670 }, { "epoch": 0.17036436887492926, "grad_norm": 1.1267739534378052, "learning_rate": 1.8601557345217667e-05, "loss": 0.597, "step": 7675 }, { "epoch": 0.17047535543445688, "grad_norm": 1.2873826026916504, "learning_rate": 1.8599778452616806e-05, "loss": 0.4989, "step": 7680 }, { "epoch": 0.17058634199398454, "grad_norm": 0.9000810980796814, "learning_rate": 1.8597998514483724e-05, "loss": 0.4732, "step": 7685 }, { "epoch": 0.17069732855351216, "grad_norm": 0.9879721403121948, "learning_rate": 1.859621753103482e-05, "loss": 0.535, "step": 7690 }, { "epoch": 0.17080831511303982, "grad_norm": 0.6511263251304626, "learning_rate": 1.8594435502486618e-05, "loss": 0.3743, "step": 7695 }, { "epoch": 0.17091930167256744, "grad_norm": 1.4531750679016113, "learning_rate": 1.859265242905577e-05, "loss": 0.6158, "step": 7700 }, { "epoch": 0.1710302882320951, "grad_norm": 0.7871989011764526, "learning_rate": 1.8590868310959054e-05, "loss": 0.7176, "step": 7705 }, { "epoch": 0.17114127479162272, "grad_norm": 0.882152795791626, "learning_rate": 1.8589083148413384e-05, "loss": 0.339, "step": 7710 }, { "epoch": 0.17125226135115038, "grad_norm": 0.832227349281311, "learning_rate": 1.8587296941635787e-05, "loss": 0.5013, "step": 7715 }, { "epoch": 0.17136324791067803, "grad_norm": 1.1753653287887573, "learning_rate": 1.858550969084343e-05, "loss": 0.343, "step": 7720 }, { "epoch": 0.17147423447020566, "grad_norm": 1.0593284368515015, "learning_rate": 1.8583721396253597e-05, "loss": 0.5349, "step": 7725 }, { "epoch": 0.1715852210297333, "grad_norm": 1.1809357404708862, "learning_rate": 1.8581932058083705e-05, "loss": 0.5084, "step": 7730 }, { "epoch": 0.17169620758926094, "grad_norm": 1.158944010734558, "learning_rate": 1.8580141676551298e-05, "loss": 0.4514, "step": 7735 }, { "epoch": 0.1718071941487886, "grad_norm": 1.3387936353683472, "learning_rate": 1.8578350251874037e-05, "loss": 0.5451, "step": 7740 }, { "epoch": 0.17191818070831622, "grad_norm": 1.0001437664031982, "learning_rate": 1.8576557784269724e-05, "loss": 0.467, "step": 7745 }, { "epoch": 0.17202916726784387, "grad_norm": 1.3797436952590942, "learning_rate": 1.8574764273956278e-05, "loss": 0.5427, "step": 7750 }, { "epoch": 0.1721401538273715, "grad_norm": 1.282599925994873, "learning_rate": 1.857296972115175e-05, "loss": 0.5228, "step": 7755 }, { "epoch": 0.17225114038689915, "grad_norm": 1.4311726093292236, "learning_rate": 1.8571174126074313e-05, "loss": 0.5726, "step": 7760 }, { "epoch": 0.17236212694642677, "grad_norm": 1.367200493812561, "learning_rate": 1.8569377488942273e-05, "loss": 0.5565, "step": 7765 }, { "epoch": 0.17247311350595443, "grad_norm": 1.4041142463684082, "learning_rate": 1.856757980997406e-05, "loss": 0.4476, "step": 7770 }, { "epoch": 0.17258410006548208, "grad_norm": 1.3798754215240479, "learning_rate": 1.8565781089388223e-05, "loss": 0.5945, "step": 7775 }, { "epoch": 0.1726950866250097, "grad_norm": 1.224753737449646, "learning_rate": 1.856398132740345e-05, "loss": 0.4528, "step": 7780 }, { "epoch": 0.17280607318453736, "grad_norm": 1.2565453052520752, "learning_rate": 1.856218052423855e-05, "loss": 0.4699, "step": 7785 }, { "epoch": 0.172917059744065, "grad_norm": 1.1906086206436157, "learning_rate": 1.8560378680112453e-05, "loss": 0.5168, "step": 7790 }, { "epoch": 0.17302804630359264, "grad_norm": 1.0104856491088867, "learning_rate": 1.855857579524423e-05, "loss": 0.5057, "step": 7795 }, { "epoch": 0.17313903286312027, "grad_norm": 1.1050394773483276, "learning_rate": 1.855677186985306e-05, "loss": 0.5146, "step": 7800 }, { "epoch": 0.17325001942264792, "grad_norm": 1.3951995372772217, "learning_rate": 1.855496690415827e-05, "loss": 0.446, "step": 7805 }, { "epoch": 0.17336100598217555, "grad_norm": 1.1135098934173584, "learning_rate": 1.8553160898379286e-05, "loss": 0.5184, "step": 7810 }, { "epoch": 0.1734719925417032, "grad_norm": 1.2136260271072388, "learning_rate": 1.8551353852735693e-05, "loss": 0.5071, "step": 7815 }, { "epoch": 0.17358297910123083, "grad_norm": 1.019028663635254, "learning_rate": 1.8549545767447174e-05, "loss": 0.6108, "step": 7820 }, { "epoch": 0.17369396566075848, "grad_norm": 1.4090794324874878, "learning_rate": 1.8547736642733554e-05, "loss": 0.3684, "step": 7825 }, { "epoch": 0.17380495222028614, "grad_norm": 0.944814920425415, "learning_rate": 1.854592647881478e-05, "loss": 0.4649, "step": 7830 }, { "epoch": 0.17391593877981376, "grad_norm": 1.4081575870513916, "learning_rate": 1.8544115275910925e-05, "loss": 0.5574, "step": 7835 }, { "epoch": 0.17402692533934142, "grad_norm": 1.8278511762619019, "learning_rate": 1.854230303424219e-05, "loss": 0.5835, "step": 7840 }, { "epoch": 0.17413791189886904, "grad_norm": 1.2721360921859741, "learning_rate": 1.8540489754028902e-05, "loss": 0.4323, "step": 7845 }, { "epoch": 0.1742488984583967, "grad_norm": 1.2163597345352173, "learning_rate": 1.8538675435491515e-05, "loss": 0.4292, "step": 7850 }, { "epoch": 0.17435988501792432, "grad_norm": 1.269092321395874, "learning_rate": 1.8536860078850598e-05, "loss": 0.3953, "step": 7855 }, { "epoch": 0.17447087157745197, "grad_norm": 1.0180573463439941, "learning_rate": 1.853504368432687e-05, "loss": 0.3855, "step": 7860 }, { "epoch": 0.1745818581369796, "grad_norm": 0.8039788603782654, "learning_rate": 1.8533226252141148e-05, "loss": 0.5043, "step": 7865 }, { "epoch": 0.17469284469650725, "grad_norm": 1.2647064924240112, "learning_rate": 1.85314077825144e-05, "loss": 0.6227, "step": 7870 }, { "epoch": 0.17480383125603488, "grad_norm": 1.075291395187378, "learning_rate": 1.8529588275667706e-05, "loss": 0.4935, "step": 7875 }, { "epoch": 0.17491481781556253, "grad_norm": 1.4665888547897339, "learning_rate": 1.8527767731822276e-05, "loss": 0.7483, "step": 7880 }, { "epoch": 0.1750258043750902, "grad_norm": 1.266965627670288, "learning_rate": 1.8525946151199444e-05, "loss": 0.5532, "step": 7885 }, { "epoch": 0.17513679093461781, "grad_norm": 1.1272302865982056, "learning_rate": 1.8524123534020674e-05, "loss": 0.343, "step": 7890 }, { "epoch": 0.17524777749414547, "grad_norm": 1.407818078994751, "learning_rate": 1.852229988050755e-05, "loss": 0.6022, "step": 7895 }, { "epoch": 0.1753587640536731, "grad_norm": 1.155456304550171, "learning_rate": 1.852047519088179e-05, "loss": 0.411, "step": 7900 }, { "epoch": 0.17546975061320075, "grad_norm": 1.0109899044036865, "learning_rate": 1.851864946536523e-05, "loss": 0.5448, "step": 7905 }, { "epoch": 0.17558073717272837, "grad_norm": 1.2911863327026367, "learning_rate": 1.851682270417984e-05, "loss": 0.4231, "step": 7910 }, { "epoch": 0.17569172373225603, "grad_norm": 1.4397233724594116, "learning_rate": 1.8514994907547707e-05, "loss": 0.4755, "step": 7915 }, { "epoch": 0.17580271029178365, "grad_norm": 1.1584182977676392, "learning_rate": 1.8513166075691052e-05, "loss": 0.4597, "step": 7920 }, { "epoch": 0.1759136968513113, "grad_norm": 1.5332622528076172, "learning_rate": 1.8511336208832214e-05, "loss": 0.5899, "step": 7925 }, { "epoch": 0.17602468341083893, "grad_norm": 1.0838502645492554, "learning_rate": 1.8509505307193666e-05, "loss": 0.573, "step": 7930 }, { "epoch": 0.1761356699703666, "grad_norm": 1.9301401376724243, "learning_rate": 1.8507673370998e-05, "loss": 0.5122, "step": 7935 }, { "epoch": 0.17624665652989424, "grad_norm": 1.3569397926330566, "learning_rate": 1.850584040046794e-05, "loss": 0.5364, "step": 7940 }, { "epoch": 0.17635764308942187, "grad_norm": 1.0816103219985962, "learning_rate": 1.850400639582633e-05, "loss": 0.661, "step": 7945 }, { "epoch": 0.17646862964894952, "grad_norm": 0.9141400456428528, "learning_rate": 1.8502171357296144e-05, "loss": 0.4285, "step": 7950 }, { "epoch": 0.17657961620847715, "grad_norm": 0.9878506064414978, "learning_rate": 1.8500335285100477e-05, "loss": 0.4673, "step": 7955 }, { "epoch": 0.1766906027680048, "grad_norm": 1.0910661220550537, "learning_rate": 1.849849817946255e-05, "loss": 0.577, "step": 7960 }, { "epoch": 0.17680158932753243, "grad_norm": 1.1815053224563599, "learning_rate": 1.8496660040605722e-05, "loss": 0.487, "step": 7965 }, { "epoch": 0.17691257588706008, "grad_norm": 1.0612215995788574, "learning_rate": 1.849482086875346e-05, "loss": 0.5338, "step": 7970 }, { "epoch": 0.1770235624465877, "grad_norm": 1.564182162284851, "learning_rate": 1.8492980664129368e-05, "loss": 0.4815, "step": 7975 }, { "epoch": 0.17713454900611536, "grad_norm": 1.1242620944976807, "learning_rate": 1.849113942695717e-05, "loss": 0.4546, "step": 7980 }, { "epoch": 0.17724553556564301, "grad_norm": 1.010068655014038, "learning_rate": 1.8489297157460712e-05, "loss": 0.4586, "step": 7985 }, { "epoch": 0.17735652212517064, "grad_norm": 1.251988172531128, "learning_rate": 1.848745385586398e-05, "loss": 0.5537, "step": 7990 }, { "epoch": 0.1774675086846983, "grad_norm": 1.282753825187683, "learning_rate": 1.8485609522391073e-05, "loss": 0.491, "step": 7995 }, { "epoch": 0.17757849524422592, "grad_norm": 1.0400362014770508, "learning_rate": 1.8483764157266218e-05, "loss": 0.5087, "step": 8000 }, { "epoch": 0.17768948180375357, "grad_norm": 2.066152334213257, "learning_rate": 1.848191776071377e-05, "loss": 0.4898, "step": 8005 }, { "epoch": 0.1778004683632812, "grad_norm": 0.9378485083580017, "learning_rate": 1.8480070332958207e-05, "loss": 0.4535, "step": 8010 }, { "epoch": 0.17791145492280885, "grad_norm": 1.3070062398910522, "learning_rate": 1.847822187422413e-05, "loss": 0.4423, "step": 8015 }, { "epoch": 0.17802244148233648, "grad_norm": 1.1166318655014038, "learning_rate": 1.8476372384736278e-05, "loss": 0.4741, "step": 8020 }, { "epoch": 0.17813342804186413, "grad_norm": 1.023356556892395, "learning_rate": 1.847452186471949e-05, "loss": 0.5005, "step": 8025 }, { "epoch": 0.17824441460139176, "grad_norm": 1.0579627752304077, "learning_rate": 1.8472670314398763e-05, "loss": 0.5418, "step": 8030 }, { "epoch": 0.1783554011609194, "grad_norm": 1.3721271753311157, "learning_rate": 1.847081773399919e-05, "loss": 0.5112, "step": 8035 }, { "epoch": 0.17846638772044707, "grad_norm": 1.1013174057006836, "learning_rate": 1.8468964123746008e-05, "loss": 0.4563, "step": 8040 }, { "epoch": 0.1785773742799747, "grad_norm": 1.1645931005477905, "learning_rate": 1.846710948386457e-05, "loss": 0.4572, "step": 8045 }, { "epoch": 0.17868836083950235, "grad_norm": 1.0203478336334229, "learning_rate": 1.8465253814580356e-05, "loss": 0.4665, "step": 8050 }, { "epoch": 0.17879934739902997, "grad_norm": 1.235601782798767, "learning_rate": 1.8463397116118976e-05, "loss": 0.5339, "step": 8055 }, { "epoch": 0.17891033395855763, "grad_norm": 0.7012696266174316, "learning_rate": 1.8461539388706156e-05, "loss": 0.4055, "step": 8060 }, { "epoch": 0.17902132051808525, "grad_norm": 0.9720817804336548, "learning_rate": 1.8459680632567757e-05, "loss": 0.4616, "step": 8065 }, { "epoch": 0.1791323070776129, "grad_norm": 1.1411256790161133, "learning_rate": 1.8457820847929755e-05, "loss": 0.6752, "step": 8070 }, { "epoch": 0.17924329363714053, "grad_norm": 0.9367457032203674, "learning_rate": 1.845596003501826e-05, "loss": 0.4181, "step": 8075 }, { "epoch": 0.1793542801966682, "grad_norm": 1.1422454118728638, "learning_rate": 1.845409819405951e-05, "loss": 0.4357, "step": 8080 }, { "epoch": 0.1794652667561958, "grad_norm": 1.0749188661575317, "learning_rate": 1.8452235325279847e-05, "loss": 0.5333, "step": 8085 }, { "epoch": 0.17957625331572347, "grad_norm": 1.013289213180542, "learning_rate": 1.845037142890576e-05, "loss": 0.5039, "step": 8090 }, { "epoch": 0.17968723987525112, "grad_norm": 1.0286688804626465, "learning_rate": 1.8448506505163858e-05, "loss": 0.549, "step": 8095 }, { "epoch": 0.17979822643477875, "grad_norm": 1.7231297492980957, "learning_rate": 1.844664055428087e-05, "loss": 0.6414, "step": 8100 }, { "epoch": 0.1799092129943064, "grad_norm": 1.311028242111206, "learning_rate": 1.8444773576483647e-05, "loss": 0.5827, "step": 8105 }, { "epoch": 0.18002019955383403, "grad_norm": 1.5582536458969116, "learning_rate": 1.844290557199918e-05, "loss": 0.7342, "step": 8110 }, { "epoch": 0.18013118611336168, "grad_norm": 1.0582084655761719, "learning_rate": 1.8441036541054564e-05, "loss": 0.5613, "step": 8115 }, { "epoch": 0.1802421726728893, "grad_norm": 1.0997862815856934, "learning_rate": 1.8439166483877032e-05, "loss": 0.5699, "step": 8120 }, { "epoch": 0.18035315923241696, "grad_norm": 0.9655759334564209, "learning_rate": 1.843729540069395e-05, "loss": 0.3168, "step": 8125 }, { "epoch": 0.18046414579194459, "grad_norm": 1.0601000785827637, "learning_rate": 1.8435423291732783e-05, "loss": 0.4598, "step": 8130 }, { "epoch": 0.18057513235147224, "grad_norm": 1.1877996921539307, "learning_rate": 1.8433550157221145e-05, "loss": 0.5095, "step": 8135 }, { "epoch": 0.18068611891099987, "grad_norm": 1.2203222513198853, "learning_rate": 1.8431675997386764e-05, "loss": 0.4699, "step": 8140 }, { "epoch": 0.18079710547052752, "grad_norm": 2.0087506771087646, "learning_rate": 1.842980081245749e-05, "loss": 0.5853, "step": 8145 }, { "epoch": 0.18090809203005517, "grad_norm": 1.2762497663497925, "learning_rate": 1.8427924602661305e-05, "loss": 0.4663, "step": 8150 }, { "epoch": 0.1810190785895828, "grad_norm": 0.9818155765533447, "learning_rate": 1.842604736822631e-05, "loss": 0.472, "step": 8155 }, { "epoch": 0.18113006514911045, "grad_norm": 1.3976755142211914, "learning_rate": 1.842416910938074e-05, "loss": 0.3402, "step": 8160 }, { "epoch": 0.18124105170863808, "grad_norm": 1.2582039833068848, "learning_rate": 1.842228982635294e-05, "loss": 0.4522, "step": 8165 }, { "epoch": 0.18135203826816573, "grad_norm": 3.259056568145752, "learning_rate": 1.842040951937139e-05, "loss": 0.4833, "step": 8170 }, { "epoch": 0.18146302482769336, "grad_norm": 0.9759329557418823, "learning_rate": 1.841852818866469e-05, "loss": 0.3992, "step": 8175 }, { "epoch": 0.181574011387221, "grad_norm": 1.2619388103485107, "learning_rate": 1.8416645834461564e-05, "loss": 0.5316, "step": 8180 }, { "epoch": 0.18168499794674864, "grad_norm": 0.9483252167701721, "learning_rate": 1.8414762456990868e-05, "loss": 0.4188, "step": 8185 }, { "epoch": 0.1817959845062763, "grad_norm": 0.9606418013572693, "learning_rate": 1.8412878056481567e-05, "loss": 0.5083, "step": 8190 }, { "epoch": 0.18190697106580392, "grad_norm": 1.6246932744979858, "learning_rate": 1.841099263316277e-05, "loss": 0.4212, "step": 8195 }, { "epoch": 0.18201795762533157, "grad_norm": 0.8691072463989258, "learning_rate": 1.84091061872637e-05, "loss": 0.4484, "step": 8200 }, { "epoch": 0.18212894418485923, "grad_norm": 1.4100844860076904, "learning_rate": 1.84072187190137e-05, "loss": 0.65, "step": 8205 }, { "epoch": 0.18223993074438685, "grad_norm": 1.1791785955429077, "learning_rate": 1.8405330228642246e-05, "loss": 0.4917, "step": 8210 }, { "epoch": 0.1823509173039145, "grad_norm": 1.2096881866455078, "learning_rate": 1.840344071637893e-05, "loss": 0.617, "step": 8215 }, { "epoch": 0.18246190386344213, "grad_norm": 1.1849205493927002, "learning_rate": 1.8401550182453475e-05, "loss": 0.4585, "step": 8220 }, { "epoch": 0.18257289042296979, "grad_norm": 1.3436826467514038, "learning_rate": 1.839965862709572e-05, "loss": 0.3399, "step": 8225 }, { "epoch": 0.1826838769824974, "grad_norm": 1.2477469444274902, "learning_rate": 1.8397766050535648e-05, "loss": 0.565, "step": 8230 }, { "epoch": 0.18279486354202507, "grad_norm": 1.2094818353652954, "learning_rate": 1.839587245300334e-05, "loss": 0.536, "step": 8235 }, { "epoch": 0.1829058501015527, "grad_norm": 1.6377440690994263, "learning_rate": 1.8393977834729012e-05, "loss": 0.5195, "step": 8240 }, { "epoch": 0.18301683666108035, "grad_norm": 1.2369369268417358, "learning_rate": 1.8392082195943017e-05, "loss": 0.7542, "step": 8245 }, { "epoch": 0.18312782322060797, "grad_norm": 1.154937744140625, "learning_rate": 1.8390185536875812e-05, "loss": 0.4717, "step": 8250 }, { "epoch": 0.18323880978013563, "grad_norm": 1.0476173162460327, "learning_rate": 1.8388287857757986e-05, "loss": 0.5616, "step": 8255 }, { "epoch": 0.18334979633966328, "grad_norm": 1.2952115535736084, "learning_rate": 1.8386389158820254e-05, "loss": 0.491, "step": 8260 }, { "epoch": 0.1834607828991909, "grad_norm": 1.501037359237671, "learning_rate": 1.8384489440293455e-05, "loss": 0.5717, "step": 8265 }, { "epoch": 0.18357176945871856, "grad_norm": 1.3697893619537354, "learning_rate": 1.838258870240855e-05, "loss": 0.5399, "step": 8270 }, { "epoch": 0.18368275601824618, "grad_norm": 1.2369073629379272, "learning_rate": 1.838068694539662e-05, "loss": 0.5484, "step": 8275 }, { "epoch": 0.18379374257777384, "grad_norm": 1.4244351387023926, "learning_rate": 1.8378784169488884e-05, "loss": 0.5621, "step": 8280 }, { "epoch": 0.18390472913730146, "grad_norm": 1.5416419506072998, "learning_rate": 1.8376880374916666e-05, "loss": 0.4107, "step": 8285 }, { "epoch": 0.18401571569682912, "grad_norm": 0.9979259967803955, "learning_rate": 1.8374975561911426e-05, "loss": 0.5422, "step": 8290 }, { "epoch": 0.18412670225635674, "grad_norm": 1.306522250175476, "learning_rate": 1.8373069730704743e-05, "loss": 0.4937, "step": 8295 }, { "epoch": 0.1842376888158844, "grad_norm": 1.3945233821868896, "learning_rate": 1.8371162881528324e-05, "loss": 0.7341, "step": 8300 }, { "epoch": 0.18434867537541202, "grad_norm": 1.2541005611419678, "learning_rate": 1.8369255014613996e-05, "loss": 0.4913, "step": 8305 }, { "epoch": 0.18445966193493968, "grad_norm": 1.2350218296051025, "learning_rate": 1.8367346130193713e-05, "loss": 0.5077, "step": 8310 }, { "epoch": 0.18457064849446733, "grad_norm": 1.4759026765823364, "learning_rate": 1.836543622849955e-05, "loss": 0.5094, "step": 8315 }, { "epoch": 0.18468163505399496, "grad_norm": 1.2139075994491577, "learning_rate": 1.8363525309763703e-05, "loss": 0.3951, "step": 8320 }, { "epoch": 0.1847926216135226, "grad_norm": 1.1291913986206055, "learning_rate": 1.83616133742185e-05, "loss": 0.4726, "step": 8325 }, { "epoch": 0.18490360817305024, "grad_norm": 1.0854945182800293, "learning_rate": 1.8359700422096385e-05, "loss": 0.5243, "step": 8330 }, { "epoch": 0.1850145947325779, "grad_norm": 1.083794116973877, "learning_rate": 1.8357786453629932e-05, "loss": 0.4193, "step": 8335 }, { "epoch": 0.18512558129210552, "grad_norm": 0.9918060898780823, "learning_rate": 1.8355871469051825e-05, "loss": 0.2593, "step": 8340 }, { "epoch": 0.18523656785163317, "grad_norm": 1.0464586019515991, "learning_rate": 1.8353955468594894e-05, "loss": 0.6056, "step": 8345 }, { "epoch": 0.1853475544111608, "grad_norm": 1.2094873189926147, "learning_rate": 1.8352038452492075e-05, "loss": 0.5531, "step": 8350 }, { "epoch": 0.18545854097068845, "grad_norm": 1.3338085412979126, "learning_rate": 1.8350120420976426e-05, "loss": 0.4514, "step": 8355 }, { "epoch": 0.18556952753021608, "grad_norm": 1.1383191347122192, "learning_rate": 1.8348201374281146e-05, "loss": 0.5114, "step": 8360 }, { "epoch": 0.18568051408974373, "grad_norm": 1.254416584968567, "learning_rate": 1.8346281312639534e-05, "loss": 0.6463, "step": 8365 }, { "epoch": 0.18579150064927138, "grad_norm": 1.138234257698059, "learning_rate": 1.834436023628504e-05, "loss": 0.3798, "step": 8370 }, { "epoch": 0.185902487208799, "grad_norm": 1.7597484588623047, "learning_rate": 1.8342438145451207e-05, "loss": 0.4756, "step": 8375 }, { "epoch": 0.18601347376832666, "grad_norm": 0.9508825540542603, "learning_rate": 1.8340515040371724e-05, "loss": 0.446, "step": 8380 }, { "epoch": 0.1861244603278543, "grad_norm": 1.4030269384384155, "learning_rate": 1.8338590921280396e-05, "loss": 0.688, "step": 8385 }, { "epoch": 0.18623544688738194, "grad_norm": 1.4024935960769653, "learning_rate": 1.8336665788411147e-05, "loss": 0.4606, "step": 8390 }, { "epoch": 0.18634643344690957, "grad_norm": 1.5650089979171753, "learning_rate": 1.833473964199803e-05, "loss": 0.4514, "step": 8395 }, { "epoch": 0.18645742000643722, "grad_norm": 1.0336397886276245, "learning_rate": 1.833281248227522e-05, "loss": 0.5878, "step": 8400 }, { "epoch": 0.18656840656596485, "grad_norm": 0.9039632081985474, "learning_rate": 1.8330884309477017e-05, "loss": 0.6019, "step": 8405 }, { "epoch": 0.1866793931254925, "grad_norm": 1.5472464561462402, "learning_rate": 1.8328955123837837e-05, "loss": 0.5514, "step": 8410 }, { "epoch": 0.18679037968502013, "grad_norm": 0.9944090843200684, "learning_rate": 1.8327024925592226e-05, "loss": 0.5208, "step": 8415 }, { "epoch": 0.18690136624454778, "grad_norm": 1.2867696285247803, "learning_rate": 1.8325093714974852e-05, "loss": 0.4037, "step": 8420 }, { "epoch": 0.18701235280407544, "grad_norm": 1.4909098148345947, "learning_rate": 1.8323161492220506e-05, "loss": 0.5026, "step": 8425 }, { "epoch": 0.18712333936360306, "grad_norm": 1.0257625579833984, "learning_rate": 1.8321228257564098e-05, "loss": 0.5233, "step": 8430 }, { "epoch": 0.18723432592313072, "grad_norm": 1.1350610256195068, "learning_rate": 1.8319294011240662e-05, "loss": 0.6545, "step": 8435 }, { "epoch": 0.18734531248265834, "grad_norm": 1.3230243921279907, "learning_rate": 1.8317358753485365e-05, "loss": 0.5876, "step": 8440 }, { "epoch": 0.187456299042186, "grad_norm": 1.0084996223449707, "learning_rate": 1.8315422484533486e-05, "loss": 0.5972, "step": 8445 }, { "epoch": 0.18756728560171362, "grad_norm": 0.8630398511886597, "learning_rate": 1.8313485204620428e-05, "loss": 0.4348, "step": 8450 }, { "epoch": 0.18767827216124128, "grad_norm": 1.212876558303833, "learning_rate": 1.8311546913981718e-05, "loss": 0.5198, "step": 8455 }, { "epoch": 0.1877892587207689, "grad_norm": 1.1673882007598877, "learning_rate": 1.830960761285301e-05, "loss": 0.5926, "step": 8460 }, { "epoch": 0.18790024528029656, "grad_norm": 1.3223752975463867, "learning_rate": 1.830766730147008e-05, "loss": 0.5681, "step": 8465 }, { "epoch": 0.1880112318398242, "grad_norm": 1.0794979333877563, "learning_rate": 1.8305725980068814e-05, "loss": 0.4693, "step": 8470 }, { "epoch": 0.18812221839935184, "grad_norm": 0.965379536151886, "learning_rate": 1.8303783648885245e-05, "loss": 0.4256, "step": 8475 }, { "epoch": 0.1882332049588795, "grad_norm": 1.4646849632263184, "learning_rate": 1.8301840308155507e-05, "loss": 0.4721, "step": 8480 }, { "epoch": 0.18834419151840712, "grad_norm": 0.721495509147644, "learning_rate": 1.8299895958115867e-05, "loss": 0.4394, "step": 8485 }, { "epoch": 0.18845517807793477, "grad_norm": 1.1117589473724365, "learning_rate": 1.8297950599002713e-05, "loss": 0.6224, "step": 8490 }, { "epoch": 0.1885661646374624, "grad_norm": 1.2575618028640747, "learning_rate": 1.829600423105255e-05, "loss": 0.5526, "step": 8495 }, { "epoch": 0.18867715119699005, "grad_norm": 0.9530577659606934, "learning_rate": 1.829405685450202e-05, "loss": 0.6299, "step": 8500 }, { "epoch": 0.18878813775651768, "grad_norm": 1.205317735671997, "learning_rate": 1.829210846958787e-05, "loss": 0.5405, "step": 8505 }, { "epoch": 0.18889912431604533, "grad_norm": 1.6385353803634644, "learning_rate": 1.8290159076546985e-05, "loss": 0.414, "step": 8510 }, { "epoch": 0.18901011087557296, "grad_norm": 0.900580883026123, "learning_rate": 1.8288208675616363e-05, "loss": 0.5026, "step": 8515 }, { "epoch": 0.1891210974351006, "grad_norm": 1.253099799156189, "learning_rate": 1.8286257267033124e-05, "loss": 0.4916, "step": 8520 }, { "epoch": 0.18923208399462826, "grad_norm": 1.2847565412521362, "learning_rate": 1.828430485103452e-05, "loss": 0.6544, "step": 8525 }, { "epoch": 0.1893430705541559, "grad_norm": 1.2562917470932007, "learning_rate": 1.8282351427857906e-05, "loss": 0.3892, "step": 8530 }, { "epoch": 0.18945405711368354, "grad_norm": 1.5068742036819458, "learning_rate": 1.828039699774079e-05, "loss": 0.4453, "step": 8535 }, { "epoch": 0.18956504367321117, "grad_norm": 1.0743848085403442, "learning_rate": 1.827844156092078e-05, "loss": 0.5158, "step": 8540 }, { "epoch": 0.18967603023273882, "grad_norm": 1.0758819580078125, "learning_rate": 1.8276485117635603e-05, "loss": 0.6229, "step": 8545 }, { "epoch": 0.18978701679226645, "grad_norm": 1.0991002321243286, "learning_rate": 1.8274527668123126e-05, "loss": 0.5309, "step": 8550 }, { "epoch": 0.1898980033517941, "grad_norm": 1.1984364986419678, "learning_rate": 1.8272569212621323e-05, "loss": 0.4739, "step": 8555 }, { "epoch": 0.19000898991132173, "grad_norm": 1.335144281387329, "learning_rate": 1.82706097513683e-05, "loss": 0.5791, "step": 8560 }, { "epoch": 0.19011997647084938, "grad_norm": 0.9494961500167847, "learning_rate": 1.826864928460228e-05, "loss": 0.4352, "step": 8565 }, { "epoch": 0.190230963030377, "grad_norm": 1.3024598360061646, "learning_rate": 1.8266687812561614e-05, "loss": 0.4986, "step": 8570 }, { "epoch": 0.19034194958990466, "grad_norm": 1.7000435590744019, "learning_rate": 1.8264725335484766e-05, "loss": 0.5095, "step": 8575 }, { "epoch": 0.19045293614943232, "grad_norm": 1.4852977991104126, "learning_rate": 1.826276185361033e-05, "loss": 0.5957, "step": 8580 }, { "epoch": 0.19056392270895994, "grad_norm": 1.6019827127456665, "learning_rate": 1.826079736717702e-05, "loss": 0.4828, "step": 8585 }, { "epoch": 0.1906749092684876, "grad_norm": 1.0514659881591797, "learning_rate": 1.825883187642367e-05, "loss": 0.4597, "step": 8590 }, { "epoch": 0.19078589582801522, "grad_norm": 1.223839282989502, "learning_rate": 1.825686538158924e-05, "loss": 0.708, "step": 8595 }, { "epoch": 0.19089688238754288, "grad_norm": 1.0082738399505615, "learning_rate": 1.8254897882912804e-05, "loss": 0.4521, "step": 8600 }, { "epoch": 0.1910078689470705, "grad_norm": 1.421229600906372, "learning_rate": 1.825292938063357e-05, "loss": 0.6726, "step": 8605 }, { "epoch": 0.19111885550659816, "grad_norm": 1.07749605178833, "learning_rate": 1.8250959874990862e-05, "loss": 0.5096, "step": 8610 }, { "epoch": 0.19122984206612578, "grad_norm": 1.0139070749282837, "learning_rate": 1.824898936622412e-05, "loss": 0.5871, "step": 8615 }, { "epoch": 0.19134082862565344, "grad_norm": 1.6385419368743896, "learning_rate": 1.824701785457292e-05, "loss": 0.577, "step": 8620 }, { "epoch": 0.19145181518518106, "grad_norm": 1.14400053024292, "learning_rate": 1.8245045340276945e-05, "loss": 0.6675, "step": 8625 }, { "epoch": 0.19156280174470872, "grad_norm": 1.2335268259048462, "learning_rate": 1.8243071823576012e-05, "loss": 0.5573, "step": 8630 }, { "epoch": 0.19167378830423637, "grad_norm": 1.0109930038452148, "learning_rate": 1.824109730471005e-05, "loss": 0.5114, "step": 8635 }, { "epoch": 0.191784774863764, "grad_norm": 1.3708138465881348, "learning_rate": 1.8239121783919117e-05, "loss": 0.4552, "step": 8640 }, { "epoch": 0.19189576142329165, "grad_norm": 1.415886640548706, "learning_rate": 1.823714526144339e-05, "loss": 0.6616, "step": 8645 }, { "epoch": 0.19200674798281928, "grad_norm": 1.3275566101074219, "learning_rate": 1.8235167737523162e-05, "loss": 0.4096, "step": 8650 }, { "epoch": 0.19211773454234693, "grad_norm": 1.259953260421753, "learning_rate": 1.823318921239886e-05, "loss": 0.6252, "step": 8655 }, { "epoch": 0.19222872110187456, "grad_norm": 1.3756461143493652, "learning_rate": 1.823120968631103e-05, "loss": 0.4376, "step": 8660 }, { "epoch": 0.1923397076614022, "grad_norm": 1.2732354402542114, "learning_rate": 1.8229229159500333e-05, "loss": 0.4112, "step": 8665 }, { "epoch": 0.19245069422092984, "grad_norm": 0.9623165130615234, "learning_rate": 1.822724763220755e-05, "loss": 0.589, "step": 8670 }, { "epoch": 0.1925616807804575, "grad_norm": 0.9481779336929321, "learning_rate": 1.822526510467359e-05, "loss": 0.2917, "step": 8675 }, { "epoch": 0.19267266733998512, "grad_norm": 0.9726129174232483, "learning_rate": 1.822328157713949e-05, "loss": 0.4917, "step": 8680 }, { "epoch": 0.19278365389951277, "grad_norm": 1.8361598253250122, "learning_rate": 1.8221297049846388e-05, "loss": 0.6041, "step": 8685 }, { "epoch": 0.19289464045904042, "grad_norm": 1.1872971057891846, "learning_rate": 1.8219311523035568e-05, "loss": 0.3696, "step": 8690 }, { "epoch": 0.19300562701856805, "grad_norm": 1.2972650527954102, "learning_rate": 1.8217324996948416e-05, "loss": 0.4888, "step": 8695 }, { "epoch": 0.1931166135780957, "grad_norm": 1.157384991645813, "learning_rate": 1.821533747182645e-05, "loss": 0.4873, "step": 8700 }, { "epoch": 0.19322760013762333, "grad_norm": 0.9881042242050171, "learning_rate": 1.8213348947911304e-05, "loss": 0.4809, "step": 8705 }, { "epoch": 0.19333858669715098, "grad_norm": 0.7585896253585815, "learning_rate": 1.8211359425444742e-05, "loss": 0.3606, "step": 8710 }, { "epoch": 0.1934495732566786, "grad_norm": 0.9092647433280945, "learning_rate": 1.8209368904668638e-05, "loss": 0.4519, "step": 8715 }, { "epoch": 0.19356055981620626, "grad_norm": 1.3298051357269287, "learning_rate": 1.8207377385824997e-05, "loss": 0.5164, "step": 8720 }, { "epoch": 0.1936715463757339, "grad_norm": 0.9970882534980774, "learning_rate": 1.8205384869155937e-05, "loss": 0.5339, "step": 8725 }, { "epoch": 0.19378253293526154, "grad_norm": 1.05615234375, "learning_rate": 1.8203391354903703e-05, "loss": 0.5448, "step": 8730 }, { "epoch": 0.19389351949478917, "grad_norm": 1.287419319152832, "learning_rate": 1.8201396843310658e-05, "loss": 0.5216, "step": 8735 }, { "epoch": 0.19400450605431682, "grad_norm": 0.873881459236145, "learning_rate": 1.8199401334619295e-05, "loss": 0.546, "step": 8740 }, { "epoch": 0.19411549261384448, "grad_norm": 0.8294119238853455, "learning_rate": 1.8197404829072214e-05, "loss": 0.5422, "step": 8745 }, { "epoch": 0.1942264791733721, "grad_norm": 1.0979552268981934, "learning_rate": 1.8195407326912144e-05, "loss": 0.4362, "step": 8750 }, { "epoch": 0.19433746573289976, "grad_norm": 1.397440791130066, "learning_rate": 1.819340882838194e-05, "loss": 0.5967, "step": 8755 }, { "epoch": 0.19444845229242738, "grad_norm": 1.3883029222488403, "learning_rate": 1.819140933372457e-05, "loss": 0.695, "step": 8760 }, { "epoch": 0.19455943885195504, "grad_norm": 1.1809195280075073, "learning_rate": 1.818940884318312e-05, "loss": 0.5256, "step": 8765 }, { "epoch": 0.19467042541148266, "grad_norm": 1.1587636470794678, "learning_rate": 1.818740735700081e-05, "loss": 0.5528, "step": 8770 }, { "epoch": 0.19478141197101032, "grad_norm": 1.2209142446517944, "learning_rate": 1.818540487542097e-05, "loss": 0.3887, "step": 8775 }, { "epoch": 0.19489239853053794, "grad_norm": 1.2413638830184937, "learning_rate": 1.818340139868706e-05, "loss": 0.6362, "step": 8780 }, { "epoch": 0.1950033850900656, "grad_norm": 0.9728131294250488, "learning_rate": 1.818139692704265e-05, "loss": 0.3275, "step": 8785 }, { "epoch": 0.19511437164959322, "grad_norm": 0.8453243970870972, "learning_rate": 1.8179391460731445e-05, "loss": 0.504, "step": 8790 }, { "epoch": 0.19522535820912088, "grad_norm": 1.1394267082214355, "learning_rate": 1.8177384999997258e-05, "loss": 0.4273, "step": 8795 }, { "epoch": 0.19533634476864853, "grad_norm": 1.447892665863037, "learning_rate": 1.817537754508402e-05, "loss": 0.5201, "step": 8800 }, { "epoch": 0.19544733132817615, "grad_norm": 1.434080719947815, "learning_rate": 1.8173369096235804e-05, "loss": 0.5835, "step": 8805 }, { "epoch": 0.1955583178877038, "grad_norm": 1.1676700115203857, "learning_rate": 1.8171359653696784e-05, "loss": 0.7007, "step": 8810 }, { "epoch": 0.19566930444723143, "grad_norm": 0.8635067343711853, "learning_rate": 1.8169349217711262e-05, "loss": 0.4438, "step": 8815 }, { "epoch": 0.1957802910067591, "grad_norm": 1.5453852415084839, "learning_rate": 1.8167337788523654e-05, "loss": 0.5355, "step": 8820 }, { "epoch": 0.19589127756628671, "grad_norm": 1.5389227867126465, "learning_rate": 1.8165325366378516e-05, "loss": 0.3978, "step": 8825 }, { "epoch": 0.19600226412581437, "grad_norm": 0.9028152823448181, "learning_rate": 1.8163311951520505e-05, "loss": 0.5507, "step": 8830 }, { "epoch": 0.196113250685342, "grad_norm": 1.4843121767044067, "learning_rate": 1.81612975441944e-05, "loss": 0.459, "step": 8835 }, { "epoch": 0.19622423724486965, "grad_norm": 1.0722191333770752, "learning_rate": 1.815928214464511e-05, "loss": 0.4453, "step": 8840 }, { "epoch": 0.19633522380439727, "grad_norm": 1.3621715307235718, "learning_rate": 1.8157265753117665e-05, "loss": 0.4542, "step": 8845 }, { "epoch": 0.19644621036392493, "grad_norm": 1.274541974067688, "learning_rate": 1.8155248369857207e-05, "loss": 0.459, "step": 8850 }, { "epoch": 0.19655719692345258, "grad_norm": 0.9879027009010315, "learning_rate": 1.8153229995109e-05, "loss": 0.4796, "step": 8855 }, { "epoch": 0.1966681834829802, "grad_norm": 1.4747463464736938, "learning_rate": 1.8151210629118435e-05, "loss": 0.5642, "step": 8860 }, { "epoch": 0.19677917004250786, "grad_norm": 1.3962020874023438, "learning_rate": 1.814919027213102e-05, "loss": 0.4157, "step": 8865 }, { "epoch": 0.1968901566020355, "grad_norm": 0.8555780053138733, "learning_rate": 1.814716892439238e-05, "loss": 0.5106, "step": 8870 }, { "epoch": 0.19700114316156314, "grad_norm": 1.1619625091552734, "learning_rate": 1.8145146586148266e-05, "loss": 0.3707, "step": 8875 }, { "epoch": 0.19711212972109077, "grad_norm": 0.9830631017684937, "learning_rate": 1.8143123257644548e-05, "loss": 0.4867, "step": 8880 }, { "epoch": 0.19722311628061842, "grad_norm": 0.9953300356864929, "learning_rate": 1.8141098939127214e-05, "loss": 0.5274, "step": 8885 }, { "epoch": 0.19733410284014605, "grad_norm": 1.415956735610962, "learning_rate": 1.8139073630842373e-05, "loss": 0.4372, "step": 8890 }, { "epoch": 0.1974450893996737, "grad_norm": 0.6621501445770264, "learning_rate": 1.8137047333036256e-05, "loss": 0.4938, "step": 8895 }, { "epoch": 0.19755607595920133, "grad_norm": 1.6966521739959717, "learning_rate": 1.8135020045955217e-05, "loss": 0.5423, "step": 8900 }, { "epoch": 0.19766706251872898, "grad_norm": 1.799787998199463, "learning_rate": 1.8132991769845717e-05, "loss": 0.5886, "step": 8905 }, { "epoch": 0.19777804907825663, "grad_norm": 1.1175798177719116, "learning_rate": 1.813096250495436e-05, "loss": 0.4745, "step": 8910 }, { "epoch": 0.19788903563778426, "grad_norm": 1.0063509941101074, "learning_rate": 1.812893225152785e-05, "loss": 0.5334, "step": 8915 }, { "epoch": 0.19800002219731191, "grad_norm": 0.8781239986419678, "learning_rate": 1.8126901009813016e-05, "loss": 0.4526, "step": 8920 }, { "epoch": 0.19811100875683954, "grad_norm": 1.2490911483764648, "learning_rate": 1.8124868780056814e-05, "loss": 0.5334, "step": 8925 }, { "epoch": 0.1982219953163672, "grad_norm": 1.2448034286499023, "learning_rate": 1.8122835562506314e-05, "loss": 0.4468, "step": 8930 }, { "epoch": 0.19833298187589482, "grad_norm": 1.0076313018798828, "learning_rate": 1.812080135740871e-05, "loss": 0.4711, "step": 8935 }, { "epoch": 0.19844396843542247, "grad_norm": 1.3471753597259521, "learning_rate": 1.811876616501131e-05, "loss": 0.5166, "step": 8940 }, { "epoch": 0.1985549549949501, "grad_norm": 1.514022946357727, "learning_rate": 1.811672998556155e-05, "loss": 0.5828, "step": 8945 }, { "epoch": 0.19866594155447775, "grad_norm": 1.2273184061050415, "learning_rate": 1.811469281930698e-05, "loss": 0.4267, "step": 8950 }, { "epoch": 0.19877692811400538, "grad_norm": 0.7829887270927429, "learning_rate": 1.811265466649527e-05, "loss": 0.4773, "step": 8955 }, { "epoch": 0.19888791467353303, "grad_norm": 2.0429697036743164, "learning_rate": 1.8110615527374212e-05, "loss": 0.5285, "step": 8960 }, { "epoch": 0.1989989012330607, "grad_norm": 1.5543320178985596, "learning_rate": 1.810857540219172e-05, "loss": 0.5802, "step": 8965 }, { "epoch": 0.1991098877925883, "grad_norm": 1.1182522773742676, "learning_rate": 1.8106534291195826e-05, "loss": 0.3691, "step": 8970 }, { "epoch": 0.19922087435211597, "grad_norm": 1.3988370895385742, "learning_rate": 1.810449219463468e-05, "loss": 0.3581, "step": 8975 }, { "epoch": 0.1993318609116436, "grad_norm": 1.0593804121017456, "learning_rate": 1.8102449112756554e-05, "loss": 0.4253, "step": 8980 }, { "epoch": 0.19944284747117125, "grad_norm": 0.8948121666908264, "learning_rate": 1.8100405045809836e-05, "loss": 0.4184, "step": 8985 }, { "epoch": 0.19955383403069887, "grad_norm": 1.1369259357452393, "learning_rate": 1.809835999404304e-05, "loss": 0.4053, "step": 8990 }, { "epoch": 0.19966482059022653, "grad_norm": 1.3200005292892456, "learning_rate": 1.8096313957704795e-05, "loss": 0.5784, "step": 8995 }, { "epoch": 0.19977580714975415, "grad_norm": 1.4808971881866455, "learning_rate": 1.8094266937043853e-05, "loss": 0.5999, "step": 9000 }, { "epoch": 0.1998867937092818, "grad_norm": 0.8824801445007324, "learning_rate": 1.8092218932309086e-05, "loss": 0.4985, "step": 9005 }, { "epoch": 0.19999778026880946, "grad_norm": 1.0153508186340332, "learning_rate": 1.8090169943749477e-05, "loss": 0.5107, "step": 9010 }, { "epoch": 0.2001087668283371, "grad_norm": 1.2240535020828247, "learning_rate": 1.808811997161414e-05, "loss": 0.6717, "step": 9015 }, { "epoch": 0.20021975338786474, "grad_norm": 1.374133586883545, "learning_rate": 1.80860690161523e-05, "loss": 0.6474, "step": 9020 }, { "epoch": 0.20033073994739237, "grad_norm": 1.1667531728744507, "learning_rate": 1.808401707761331e-05, "loss": 0.4666, "step": 9025 }, { "epoch": 0.20044172650692002, "grad_norm": 1.0652427673339844, "learning_rate": 1.808196415624663e-05, "loss": 0.426, "step": 9030 }, { "epoch": 0.20055271306644765, "grad_norm": 0.9931832551956177, "learning_rate": 1.807991025230186e-05, "loss": 0.3811, "step": 9035 }, { "epoch": 0.2006636996259753, "grad_norm": 1.2800239324569702, "learning_rate": 1.8077855366028695e-05, "loss": 0.4609, "step": 9040 }, { "epoch": 0.20077468618550293, "grad_norm": 1.4617905616760254, "learning_rate": 1.8075799497676967e-05, "loss": 0.4553, "step": 9045 }, { "epoch": 0.20088567274503058, "grad_norm": 1.4740259647369385, "learning_rate": 1.807374264749662e-05, "loss": 0.5532, "step": 9050 }, { "epoch": 0.2009966593045582, "grad_norm": 0.8939075469970703, "learning_rate": 1.8071684815737717e-05, "loss": 0.4947, "step": 9055 }, { "epoch": 0.20110764586408586, "grad_norm": 1.3319915533065796, "learning_rate": 1.806962600265045e-05, "loss": 0.5024, "step": 9060 }, { "epoch": 0.2012186324236135, "grad_norm": 0.9438144564628601, "learning_rate": 1.8067566208485112e-05, "loss": 0.4628, "step": 9065 }, { "epoch": 0.20132961898314114, "grad_norm": 1.2884577512741089, "learning_rate": 1.8065505433492135e-05, "loss": 0.5252, "step": 9070 }, { "epoch": 0.2014406055426688, "grad_norm": 0.9813497066497803, "learning_rate": 1.8063443677922052e-05, "loss": 0.5909, "step": 9075 }, { "epoch": 0.20155159210219642, "grad_norm": 1.1486997604370117, "learning_rate": 1.8061380942025532e-05, "loss": 0.3774, "step": 9080 }, { "epoch": 0.20166257866172407, "grad_norm": 1.1693344116210938, "learning_rate": 1.8059317226053353e-05, "loss": 0.495, "step": 9085 }, { "epoch": 0.2017735652212517, "grad_norm": 1.0905404090881348, "learning_rate": 1.8057252530256414e-05, "loss": 0.4009, "step": 9090 }, { "epoch": 0.20188455178077935, "grad_norm": 0.8068780303001404, "learning_rate": 1.8055186854885733e-05, "loss": 0.4361, "step": 9095 }, { "epoch": 0.20199553834030698, "grad_norm": 0.8546350598335266, "learning_rate": 1.8053120200192452e-05, "loss": 0.6194, "step": 9100 }, { "epoch": 0.20210652489983463, "grad_norm": 1.1348129510879517, "learning_rate": 1.8051052566427824e-05, "loss": 0.5497, "step": 9105 }, { "epoch": 0.20221751145936226, "grad_norm": 1.852582573890686, "learning_rate": 1.8048983953843226e-05, "loss": 0.5829, "step": 9110 }, { "epoch": 0.2023284980188899, "grad_norm": 1.4869229793548584, "learning_rate": 1.8046914362690153e-05, "loss": 0.4338, "step": 9115 }, { "epoch": 0.20243948457841757, "grad_norm": 1.2775733470916748, "learning_rate": 1.804484379322022e-05, "loss": 0.5854, "step": 9120 }, { "epoch": 0.2025504711379452, "grad_norm": 0.8230618834495544, "learning_rate": 1.804277224568516e-05, "loss": 0.4142, "step": 9125 }, { "epoch": 0.20266145769747285, "grad_norm": 1.7515913248062134, "learning_rate": 1.8040699720336817e-05, "loss": 0.5701, "step": 9130 }, { "epoch": 0.20277244425700047, "grad_norm": 1.0117206573486328, "learning_rate": 1.8038626217427176e-05, "loss": 0.4278, "step": 9135 }, { "epoch": 0.20288343081652813, "grad_norm": 1.2293572425842285, "learning_rate": 1.8036551737208314e-05, "loss": 0.6121, "step": 9140 }, { "epoch": 0.20299441737605575, "grad_norm": 0.8745748400688171, "learning_rate": 1.803447627993245e-05, "loss": 0.5783, "step": 9145 }, { "epoch": 0.2031054039355834, "grad_norm": 1.1734461784362793, "learning_rate": 1.8032399845851896e-05, "loss": 0.6365, "step": 9150 }, { "epoch": 0.20321639049511103, "grad_norm": 1.4227900505065918, "learning_rate": 1.8030322435219117e-05, "loss": 0.4876, "step": 9155 }, { "epoch": 0.20332737705463869, "grad_norm": 0.7563652396202087, "learning_rate": 1.8028244048286663e-05, "loss": 0.4521, "step": 9160 }, { "epoch": 0.2034383636141663, "grad_norm": 1.1069258451461792, "learning_rate": 1.8026164685307224e-05, "loss": 0.583, "step": 9165 }, { "epoch": 0.20354935017369397, "grad_norm": 1.4049474000930786, "learning_rate": 1.8024084346533598e-05, "loss": 0.4168, "step": 9170 }, { "epoch": 0.20366033673322162, "grad_norm": 1.0486278533935547, "learning_rate": 1.802200303221871e-05, "loss": 0.3907, "step": 9175 }, { "epoch": 0.20377132329274925, "grad_norm": 2.22003173828125, "learning_rate": 1.8019920742615596e-05, "loss": 0.4856, "step": 9180 }, { "epoch": 0.2038823098522769, "grad_norm": 1.4012434482574463, "learning_rate": 1.8017837477977416e-05, "loss": 0.5096, "step": 9185 }, { "epoch": 0.20399329641180453, "grad_norm": 0.994441568851471, "learning_rate": 1.8015753238557444e-05, "loss": 0.4237, "step": 9190 }, { "epoch": 0.20410428297133218, "grad_norm": 1.2336442470550537, "learning_rate": 1.8013668024609078e-05, "loss": 0.479, "step": 9195 }, { "epoch": 0.2042152695308598, "grad_norm": 0.9091594815254211, "learning_rate": 1.8011581836385828e-05, "loss": 0.5212, "step": 9200 }, { "epoch": 0.20432625609038746, "grad_norm": 1.4611879587173462, "learning_rate": 1.8009494674141327e-05, "loss": 0.5236, "step": 9205 }, { "epoch": 0.20443724264991509, "grad_norm": 1.1127291917800903, "learning_rate": 1.800740653812932e-05, "loss": 0.5092, "step": 9210 }, { "epoch": 0.20454822920944274, "grad_norm": 1.1224825382232666, "learning_rate": 1.8005317428603687e-05, "loss": 0.4901, "step": 9215 }, { "epoch": 0.20465921576897037, "grad_norm": 1.2642055749893188, "learning_rate": 1.8003227345818407e-05, "loss": 0.6672, "step": 9220 }, { "epoch": 0.20477020232849802, "grad_norm": 1.1066789627075195, "learning_rate": 1.800113629002759e-05, "loss": 0.4579, "step": 9225 }, { "epoch": 0.20488118888802567, "grad_norm": 0.9728310108184814, "learning_rate": 1.7999044261485453e-05, "loss": 0.3675, "step": 9230 }, { "epoch": 0.2049921754475533, "grad_norm": 1.205916404724121, "learning_rate": 1.799695126044634e-05, "loss": 0.4924, "step": 9235 }, { "epoch": 0.20510316200708095, "grad_norm": 1.0212023258209229, "learning_rate": 1.799485728716472e-05, "loss": 0.4738, "step": 9240 }, { "epoch": 0.20521414856660858, "grad_norm": 1.249037742614746, "learning_rate": 1.7992762341895157e-05, "loss": 0.4826, "step": 9245 }, { "epoch": 0.20532513512613623, "grad_norm": 1.789096713066101, "learning_rate": 1.7990666424892354e-05, "loss": 0.4677, "step": 9250 }, { "epoch": 0.20543612168566386, "grad_norm": 1.6068724393844604, "learning_rate": 1.798856953641113e-05, "loss": 0.4278, "step": 9255 }, { "epoch": 0.2055471082451915, "grad_norm": 1.5494028329849243, "learning_rate": 1.798647167670641e-05, "loss": 0.4621, "step": 9260 }, { "epoch": 0.20565809480471914, "grad_norm": 0.8551934957504272, "learning_rate": 1.7984372846033252e-05, "loss": 0.5474, "step": 9265 }, { "epoch": 0.2057690813642468, "grad_norm": 1.3189067840576172, "learning_rate": 1.7982273044646817e-05, "loss": 0.4884, "step": 9270 }, { "epoch": 0.20588006792377442, "grad_norm": 1.0660463571548462, "learning_rate": 1.7980172272802398e-05, "loss": 0.5418, "step": 9275 }, { "epoch": 0.20599105448330207, "grad_norm": 1.1619466543197632, "learning_rate": 1.7978070530755393e-05, "loss": 0.4406, "step": 9280 }, { "epoch": 0.20610204104282973, "grad_norm": 1.0269361734390259, "learning_rate": 1.7975967818761334e-05, "loss": 0.4529, "step": 9285 }, { "epoch": 0.20621302760235735, "grad_norm": 1.061600923538208, "learning_rate": 1.7973864137075856e-05, "loss": 0.4159, "step": 9290 }, { "epoch": 0.206324014161885, "grad_norm": 1.8789093494415283, "learning_rate": 1.797175948595472e-05, "loss": 0.5636, "step": 9295 }, { "epoch": 0.20643500072141263, "grad_norm": 1.2407394647598267, "learning_rate": 1.7969653865653794e-05, "loss": 0.5232, "step": 9300 }, { "epoch": 0.20654598728094029, "grad_norm": 1.0408992767333984, "learning_rate": 1.7967547276429086e-05, "loss": 0.5879, "step": 9305 }, { "epoch": 0.2066569738404679, "grad_norm": 1.20352041721344, "learning_rate": 1.79654397185367e-05, "loss": 0.4962, "step": 9310 }, { "epoch": 0.20676796039999557, "grad_norm": 1.2517502307891846, "learning_rate": 1.7963331192232863e-05, "loss": 0.3936, "step": 9315 }, { "epoch": 0.2068789469595232, "grad_norm": 1.6825429201126099, "learning_rate": 1.7961221697773932e-05, "loss": 0.5146, "step": 9320 }, { "epoch": 0.20698993351905084, "grad_norm": 1.1055762767791748, "learning_rate": 1.7959111235416364e-05, "loss": 0.5971, "step": 9325 }, { "epoch": 0.20710092007857847, "grad_norm": 1.408706784248352, "learning_rate": 1.7956999805416746e-05, "loss": 0.4739, "step": 9330 }, { "epoch": 0.20721190663810612, "grad_norm": 1.5727156400680542, "learning_rate": 1.7954887408031777e-05, "loss": 0.4651, "step": 9335 }, { "epoch": 0.20732289319763378, "grad_norm": 1.158872365951538, "learning_rate": 1.7952774043518273e-05, "loss": 0.4464, "step": 9340 }, { "epoch": 0.2074338797571614, "grad_norm": 1.1882591247558594, "learning_rate": 1.7950659712133178e-05, "loss": 0.5427, "step": 9345 }, { "epoch": 0.20754486631668906, "grad_norm": 0.9776345491409302, "learning_rate": 1.7948544414133534e-05, "loss": 0.5132, "step": 9350 }, { "epoch": 0.20765585287621668, "grad_norm": 1.3551688194274902, "learning_rate": 1.794642814977652e-05, "loss": 0.5469, "step": 9355 }, { "epoch": 0.20776683943574434, "grad_norm": 1.2750028371810913, "learning_rate": 1.794431091931942e-05, "loss": 0.4309, "step": 9360 }, { "epoch": 0.20787782599527196, "grad_norm": 1.3404597043991089, "learning_rate": 1.7942192723019643e-05, "loss": 0.6556, "step": 9365 }, { "epoch": 0.20798881255479962, "grad_norm": 1.260688066482544, "learning_rate": 1.7940073561134713e-05, "loss": 0.5327, "step": 9370 }, { "epoch": 0.20809979911432724, "grad_norm": 1.068941354751587, "learning_rate": 1.7937953433922265e-05, "loss": 0.5154, "step": 9375 }, { "epoch": 0.2082107856738549, "grad_norm": 0.9720483422279358, "learning_rate": 1.793583234164006e-05, "loss": 0.3935, "step": 9380 }, { "epoch": 0.20832177223338252, "grad_norm": 1.0248860120773315, "learning_rate": 1.793371028454598e-05, "loss": 0.6002, "step": 9385 }, { "epoch": 0.20843275879291018, "grad_norm": 1.149561882019043, "learning_rate": 1.7931587262898004e-05, "loss": 0.6452, "step": 9390 }, { "epoch": 0.20854374535243783, "grad_norm": 1.3280117511749268, "learning_rate": 1.792946327695425e-05, "loss": 0.575, "step": 9395 }, { "epoch": 0.20865473191196546, "grad_norm": 1.1656508445739746, "learning_rate": 1.7927338326972947e-05, "loss": 0.4884, "step": 9400 }, { "epoch": 0.2087657184714931, "grad_norm": 1.3832805156707764, "learning_rate": 1.7925212413212435e-05, "loss": 0.4673, "step": 9405 }, { "epoch": 0.20887670503102074, "grad_norm": 1.300185203552246, "learning_rate": 1.7923085535931176e-05, "loss": 0.4559, "step": 9410 }, { "epoch": 0.2089876915905484, "grad_norm": 1.452893614768982, "learning_rate": 1.792095769538775e-05, "loss": 0.6083, "step": 9415 }, { "epoch": 0.20909867815007602, "grad_norm": 1.1308960914611816, "learning_rate": 1.7918828891840853e-05, "loss": 0.3727, "step": 9420 }, { "epoch": 0.20920966470960367, "grad_norm": 1.103422999382019, "learning_rate": 1.79166991255493e-05, "loss": 0.3363, "step": 9425 }, { "epoch": 0.2093206512691313, "grad_norm": 1.3565524816513062, "learning_rate": 1.791456839677201e-05, "loss": 0.5528, "step": 9430 }, { "epoch": 0.20943163782865895, "grad_norm": 0.6596664786338806, "learning_rate": 1.7912436705768045e-05, "loss": 0.423, "step": 9435 }, { "epoch": 0.20954262438818658, "grad_norm": 1.1548718214035034, "learning_rate": 1.7910304052796558e-05, "loss": 0.5097, "step": 9440 }, { "epoch": 0.20965361094771423, "grad_norm": 1.3421484231948853, "learning_rate": 1.7908170438116835e-05, "loss": 0.4689, "step": 9445 }, { "epoch": 0.20976459750724188, "grad_norm": 0.8850771188735962, "learning_rate": 1.790603586198827e-05, "loss": 0.4149, "step": 9450 }, { "epoch": 0.2098755840667695, "grad_norm": 1.0380505323410034, "learning_rate": 1.790390032467038e-05, "loss": 0.5725, "step": 9455 }, { "epoch": 0.20998657062629716, "grad_norm": 1.1664137840270996, "learning_rate": 1.7901763826422797e-05, "loss": 0.5316, "step": 9460 }, { "epoch": 0.2100975571858248, "grad_norm": 1.3658771514892578, "learning_rate": 1.7899626367505266e-05, "loss": 0.4926, "step": 9465 }, { "epoch": 0.21020854374535244, "grad_norm": 1.3364830017089844, "learning_rate": 1.789748794817766e-05, "loss": 0.599, "step": 9470 }, { "epoch": 0.21031953030488007, "grad_norm": 1.1915723085403442, "learning_rate": 1.7895348568699953e-05, "loss": 0.5151, "step": 9475 }, { "epoch": 0.21043051686440772, "grad_norm": 1.026668667793274, "learning_rate": 1.7893208229332245e-05, "loss": 0.5308, "step": 9480 }, { "epoch": 0.21054150342393535, "grad_norm": 1.2627990245819092, "learning_rate": 1.789106693033475e-05, "loss": 0.5808, "step": 9485 }, { "epoch": 0.210652489983463, "grad_norm": 1.0648430585861206, "learning_rate": 1.7888924671967808e-05, "loss": 0.4118, "step": 9490 }, { "epoch": 0.21076347654299066, "grad_norm": 1.0836087465286255, "learning_rate": 1.7886781454491856e-05, "loss": 0.637, "step": 9495 }, { "epoch": 0.21087446310251828, "grad_norm": 0.976684033870697, "learning_rate": 1.788463727816747e-05, "loss": 0.3573, "step": 9500 }, { "epoch": 0.21098544966204594, "grad_norm": 1.099665641784668, "learning_rate": 1.7882492143255323e-05, "loss": 0.5833, "step": 9505 }, { "epoch": 0.21109643622157356, "grad_norm": 1.015887975692749, "learning_rate": 1.788034605001622e-05, "loss": 0.49, "step": 9510 }, { "epoch": 0.21120742278110122, "grad_norm": 1.1677489280700684, "learning_rate": 1.7878198998711068e-05, "loss": 0.2998, "step": 9515 }, { "epoch": 0.21131840934062884, "grad_norm": 1.1017683744430542, "learning_rate": 1.7876050989600908e-05, "loss": 0.4565, "step": 9520 }, { "epoch": 0.2114293959001565, "grad_norm": 1.3350118398666382, "learning_rate": 1.7873902022946882e-05, "loss": 0.3332, "step": 9525 }, { "epoch": 0.21154038245968412, "grad_norm": 1.3715494871139526, "learning_rate": 1.7871752099010256e-05, "loss": 0.4485, "step": 9530 }, { "epoch": 0.21165136901921178, "grad_norm": 1.1493537425994873, "learning_rate": 1.7869601218052405e-05, "loss": 0.4861, "step": 9535 }, { "epoch": 0.2117623555787394, "grad_norm": 1.5250881910324097, "learning_rate": 1.7867449380334834e-05, "loss": 0.625, "step": 9540 }, { "epoch": 0.21187334213826706, "grad_norm": 1.3963158130645752, "learning_rate": 1.786529658611915e-05, "loss": 0.546, "step": 9545 }, { "epoch": 0.2119843286977947, "grad_norm": 1.4127143621444702, "learning_rate": 1.786314283566709e-05, "loss": 0.5782, "step": 9550 }, { "epoch": 0.21209531525732234, "grad_norm": 1.5299954414367676, "learning_rate": 1.786098812924049e-05, "loss": 0.6186, "step": 9555 }, { "epoch": 0.21220630181685, "grad_norm": 1.4167520999908447, "learning_rate": 1.785883246710132e-05, "loss": 0.4006, "step": 9560 }, { "epoch": 0.21231728837637762, "grad_norm": 0.9981635212898254, "learning_rate": 1.7856675849511657e-05, "loss": 0.5463, "step": 9565 }, { "epoch": 0.21242827493590527, "grad_norm": 1.4359947443008423, "learning_rate": 1.785451827673369e-05, "loss": 0.5462, "step": 9570 }, { "epoch": 0.2125392614954329, "grad_norm": 1.2355036735534668, "learning_rate": 1.7852359749029734e-05, "loss": 0.5466, "step": 9575 }, { "epoch": 0.21265024805496055, "grad_norm": 1.7637877464294434, "learning_rate": 1.7850200266662212e-05, "loss": 0.5638, "step": 9580 }, { "epoch": 0.21276123461448818, "grad_norm": 1.4671874046325684, "learning_rate": 1.7848039829893672e-05, "loss": 0.4638, "step": 9585 }, { "epoch": 0.21287222117401583, "grad_norm": 1.0217411518096924, "learning_rate": 1.784587843898677e-05, "loss": 0.6062, "step": 9590 }, { "epoch": 0.21298320773354346, "grad_norm": 1.0460752248764038, "learning_rate": 1.784371609420428e-05, "loss": 0.5773, "step": 9595 }, { "epoch": 0.2130941942930711, "grad_norm": 1.8018296957015991, "learning_rate": 1.7841552795809095e-05, "loss": 0.4543, "step": 9600 }, { "epoch": 0.21320518085259876, "grad_norm": 0.9428887367248535, "learning_rate": 1.7839388544064215e-05, "loss": 0.4272, "step": 9605 }, { "epoch": 0.2133161674121264, "grad_norm": 0.9210885763168335, "learning_rate": 1.7837223339232767e-05, "loss": 0.5215, "step": 9610 }, { "epoch": 0.21342715397165404, "grad_norm": 1.9459435939788818, "learning_rate": 1.7835057181577996e-05, "loss": 0.6155, "step": 9615 }, { "epoch": 0.21353814053118167, "grad_norm": 1.2487971782684326, "learning_rate": 1.7832890071363243e-05, "loss": 0.5127, "step": 9620 }, { "epoch": 0.21364912709070932, "grad_norm": 0.7779967784881592, "learning_rate": 1.7830722008851988e-05, "loss": 0.4642, "step": 9625 }, { "epoch": 0.21376011365023695, "grad_norm": 1.3998373746871948, "learning_rate": 1.7828552994307812e-05, "loss": 0.6303, "step": 9630 }, { "epoch": 0.2138711002097646, "grad_norm": 1.4580212831497192, "learning_rate": 1.7826383027994415e-05, "loss": 0.4693, "step": 9635 }, { "epoch": 0.21398208676929223, "grad_norm": 1.251250147819519, "learning_rate": 1.7824212110175623e-05, "loss": 0.4477, "step": 9640 }, { "epoch": 0.21409307332881988, "grad_norm": 1.3137695789337158, "learning_rate": 1.7822040241115358e-05, "loss": 0.4949, "step": 9645 }, { "epoch": 0.2142040598883475, "grad_norm": 1.1955955028533936, "learning_rate": 1.7819867421077678e-05, "loss": 0.4285, "step": 9650 }, { "epoch": 0.21431504644787516, "grad_norm": 0.9138591289520264, "learning_rate": 1.781769365032674e-05, "loss": 0.5015, "step": 9655 }, { "epoch": 0.21442603300740282, "grad_norm": 1.0733075141906738, "learning_rate": 1.7815518929126827e-05, "loss": 0.5615, "step": 9660 }, { "epoch": 0.21453701956693044, "grad_norm": 1.475874423980713, "learning_rate": 1.7813343257742333e-05, "loss": 0.4636, "step": 9665 }, { "epoch": 0.2146480061264581, "grad_norm": 1.453596830368042, "learning_rate": 1.7811166636437775e-05, "loss": 0.4062, "step": 9670 }, { "epoch": 0.21475899268598572, "grad_norm": 1.1406927108764648, "learning_rate": 1.7808989065477766e-05, "loss": 0.6224, "step": 9675 }, { "epoch": 0.21486997924551338, "grad_norm": 1.306417465209961, "learning_rate": 1.780681054512706e-05, "loss": 0.6121, "step": 9680 }, { "epoch": 0.214980965805041, "grad_norm": 1.2868911027908325, "learning_rate": 1.780463107565051e-05, "loss": 0.5233, "step": 9685 }, { "epoch": 0.21509195236456866, "grad_norm": 1.5072758197784424, "learning_rate": 1.7802450657313086e-05, "loss": 0.3335, "step": 9690 }, { "epoch": 0.21520293892409628, "grad_norm": 1.284999966621399, "learning_rate": 1.780026929037988e-05, "loss": 0.4095, "step": 9695 }, { "epoch": 0.21531392548362394, "grad_norm": 1.141349196434021, "learning_rate": 1.7798086975116096e-05, "loss": 0.4452, "step": 9700 }, { "epoch": 0.21542491204315156, "grad_norm": 1.745187759399414, "learning_rate": 1.7795903711787046e-05, "loss": 0.5534, "step": 9705 }, { "epoch": 0.21553589860267922, "grad_norm": 1.3602417707443237, "learning_rate": 1.779371950065817e-05, "loss": 0.5419, "step": 9710 }, { "epoch": 0.21564688516220687, "grad_norm": 1.2203381061553955, "learning_rate": 1.7791534341995018e-05, "loss": 0.5834, "step": 9715 }, { "epoch": 0.2157578717217345, "grad_norm": 1.0616192817687988, "learning_rate": 1.7789348236063245e-05, "loss": 0.4124, "step": 9720 }, { "epoch": 0.21586885828126215, "grad_norm": 1.2460315227508545, "learning_rate": 1.7787161183128643e-05, "loss": 0.5519, "step": 9725 }, { "epoch": 0.21597984484078978, "grad_norm": 1.378010869026184, "learning_rate": 1.7784973183457097e-05, "loss": 0.5782, "step": 9730 }, { "epoch": 0.21609083140031743, "grad_norm": 1.131535291671753, "learning_rate": 1.778278423731462e-05, "loss": 0.5816, "step": 9735 }, { "epoch": 0.21620181795984506, "grad_norm": 1.2070375680923462, "learning_rate": 1.778059434496734e-05, "loss": 0.3977, "step": 9740 }, { "epoch": 0.2163128045193727, "grad_norm": 1.092206358909607, "learning_rate": 1.7778403506681493e-05, "loss": 0.4687, "step": 9745 }, { "epoch": 0.21642379107890033, "grad_norm": 1.1560908555984497, "learning_rate": 1.7776211722723437e-05, "loss": 0.4611, "step": 9750 }, { "epoch": 0.216534777638428, "grad_norm": 0.817129909992218, "learning_rate": 1.7774018993359633e-05, "loss": 0.4608, "step": 9755 }, { "epoch": 0.21664576419795561, "grad_norm": 1.1447831392288208, "learning_rate": 1.7771825318856676e-05, "loss": 0.5585, "step": 9760 }, { "epoch": 0.21675675075748327, "grad_norm": 1.21500825881958, "learning_rate": 1.776963069948126e-05, "loss": 0.485, "step": 9765 }, { "epoch": 0.21686773731701092, "grad_norm": 0.931507408618927, "learning_rate": 1.77674351355002e-05, "loss": 0.4524, "step": 9770 }, { "epoch": 0.21697872387653855, "grad_norm": 1.2368406057357788, "learning_rate": 1.7765238627180424e-05, "loss": 0.5286, "step": 9775 }, { "epoch": 0.2170897104360662, "grad_norm": 1.8320626020431519, "learning_rate": 1.7763041174788984e-05, "loss": 0.5937, "step": 9780 }, { "epoch": 0.21720069699559383, "grad_norm": 0.9292985796928406, "learning_rate": 1.7760842778593027e-05, "loss": 0.4569, "step": 9785 }, { "epoch": 0.21731168355512148, "grad_norm": 1.2462888956069946, "learning_rate": 1.7758643438859836e-05, "loss": 0.5901, "step": 9790 }, { "epoch": 0.2174226701146491, "grad_norm": 1.6621441841125488, "learning_rate": 1.7756443155856796e-05, "loss": 0.6449, "step": 9795 }, { "epoch": 0.21753365667417676, "grad_norm": 0.8483054041862488, "learning_rate": 1.7754241929851413e-05, "loss": 0.5905, "step": 9800 }, { "epoch": 0.2176446432337044, "grad_norm": 0.9630061388015747, "learning_rate": 1.77520397611113e-05, "loss": 0.3271, "step": 9805 }, { "epoch": 0.21775562979323204, "grad_norm": 0.8938013315200806, "learning_rate": 1.7749836649904192e-05, "loss": 0.4271, "step": 9810 }, { "epoch": 0.21786661635275967, "grad_norm": 1.181822657585144, "learning_rate": 1.7747632596497932e-05, "loss": 0.537, "step": 9815 }, { "epoch": 0.21797760291228732, "grad_norm": 0.8262178897857666, "learning_rate": 1.7745427601160487e-05, "loss": 0.5994, "step": 9820 }, { "epoch": 0.21808858947181498, "grad_norm": 1.1472630500793457, "learning_rate": 1.7743221664159927e-05, "loss": 0.5166, "step": 9825 }, { "epoch": 0.2181995760313426, "grad_norm": 1.1295238733291626, "learning_rate": 1.774101478576445e-05, "loss": 0.6013, "step": 9830 }, { "epoch": 0.21831056259087026, "grad_norm": 1.3434393405914307, "learning_rate": 1.7738806966242355e-05, "loss": 0.5125, "step": 9835 }, { "epoch": 0.21842154915039788, "grad_norm": 0.963297426700592, "learning_rate": 1.7736598205862064e-05, "loss": 0.6378, "step": 9840 }, { "epoch": 0.21853253570992554, "grad_norm": 1.5868712663650513, "learning_rate": 1.773438850489211e-05, "loss": 0.4859, "step": 9845 }, { "epoch": 0.21864352226945316, "grad_norm": 1.6960935592651367, "learning_rate": 1.7732177863601135e-05, "loss": 0.4339, "step": 9850 }, { "epoch": 0.21875450882898081, "grad_norm": 1.0453115701675415, "learning_rate": 1.7729966282257912e-05, "loss": 0.4424, "step": 9855 }, { "epoch": 0.21886549538850844, "grad_norm": 0.9868830442428589, "learning_rate": 1.7727753761131312e-05, "loss": 0.4167, "step": 9860 }, { "epoch": 0.2189764819480361, "grad_norm": 1.3542011976242065, "learning_rate": 1.7725540300490326e-05, "loss": 0.4964, "step": 9865 }, { "epoch": 0.21908746850756372, "grad_norm": 0.8640686273574829, "learning_rate": 1.7723325900604063e-05, "loss": 0.4559, "step": 9870 }, { "epoch": 0.21919845506709137, "grad_norm": 1.0085946321487427, "learning_rate": 1.7721110561741737e-05, "loss": 0.3331, "step": 9875 }, { "epoch": 0.21930944162661903, "grad_norm": 1.2680177688598633, "learning_rate": 1.7718894284172684e-05, "loss": 0.5908, "step": 9880 }, { "epoch": 0.21942042818614665, "grad_norm": 1.1593209505081177, "learning_rate": 1.771667706816635e-05, "loss": 0.6322, "step": 9885 }, { "epoch": 0.2195314147456743, "grad_norm": 1.3372281789779663, "learning_rate": 1.7714458913992297e-05, "loss": 0.5001, "step": 9890 }, { "epoch": 0.21964240130520193, "grad_norm": 1.2422916889190674, "learning_rate": 1.7712239821920202e-05, "loss": 0.499, "step": 9895 }, { "epoch": 0.2197533878647296, "grad_norm": 1.1619045734405518, "learning_rate": 1.7710019792219856e-05, "loss": 0.5293, "step": 9900 }, { "epoch": 0.21986437442425721, "grad_norm": 0.9704828262329102, "learning_rate": 1.7707798825161155e-05, "loss": 0.4543, "step": 9905 }, { "epoch": 0.21997536098378487, "grad_norm": 1.4115480184555054, "learning_rate": 1.770557692101413e-05, "loss": 0.3108, "step": 9910 }, { "epoch": 0.2200863475433125, "grad_norm": 0.9354934692382812, "learning_rate": 1.77033540800489e-05, "loss": 0.5608, "step": 9915 }, { "epoch": 0.22019733410284015, "grad_norm": 1.1399041414260864, "learning_rate": 1.770113030253572e-05, "loss": 0.4043, "step": 9920 }, { "epoch": 0.22030832066236777, "grad_norm": 1.5980947017669678, "learning_rate": 1.7698905588744946e-05, "loss": 0.4469, "step": 9925 }, { "epoch": 0.22041930722189543, "grad_norm": 1.166395664215088, "learning_rate": 1.769667993894705e-05, "loss": 0.4852, "step": 9930 }, { "epoch": 0.22053029378142308, "grad_norm": 0.9159113168716431, "learning_rate": 1.7694453353412618e-05, "loss": 0.6554, "step": 9935 }, { "epoch": 0.2206412803409507, "grad_norm": 1.1559749841690063, "learning_rate": 1.7692225832412354e-05, "loss": 0.4948, "step": 9940 }, { "epoch": 0.22075226690047836, "grad_norm": 1.3015704154968262, "learning_rate": 1.768999737621707e-05, "loss": 0.7356, "step": 9945 }, { "epoch": 0.220863253460006, "grad_norm": 2.7410922050476074, "learning_rate": 1.7687767985097695e-05, "loss": 0.4758, "step": 9950 }, { "epoch": 0.22097424001953364, "grad_norm": 1.1932713985443115, "learning_rate": 1.7685537659325272e-05, "loss": 0.4975, "step": 9955 }, { "epoch": 0.22108522657906127, "grad_norm": 1.03585946559906, "learning_rate": 1.768330639917095e-05, "loss": 0.4919, "step": 9960 }, { "epoch": 0.22119621313858892, "grad_norm": 1.0269675254821777, "learning_rate": 1.7681074204906013e-05, "loss": 0.5063, "step": 9965 }, { "epoch": 0.22130719969811655, "grad_norm": 1.6854188442230225, "learning_rate": 1.767884107680183e-05, "loss": 0.5432, "step": 9970 }, { "epoch": 0.2214181862576442, "grad_norm": 1.463315725326538, "learning_rate": 1.7676607015129904e-05, "loss": 0.6055, "step": 9975 }, { "epoch": 0.22152917281717183, "grad_norm": 1.0149184465408325, "learning_rate": 1.767437202016184e-05, "loss": 0.3995, "step": 9980 }, { "epoch": 0.22164015937669948, "grad_norm": 1.0655211210250854, "learning_rate": 1.767213609216936e-05, "loss": 0.4974, "step": 9985 }, { "epoch": 0.22175114593622713, "grad_norm": 0.9974083304405212, "learning_rate": 1.766989923142431e-05, "loss": 0.4742, "step": 9990 }, { "epoch": 0.22186213249575476, "grad_norm": 1.0866353511810303, "learning_rate": 1.7667661438198635e-05, "loss": 0.5871, "step": 9995 }, { "epoch": 0.22197311905528241, "grad_norm": 0.9190327525138855, "learning_rate": 1.7665422712764394e-05, "loss": 0.4662, "step": 10000 }, { "epoch": 0.22208410561481004, "grad_norm": 1.1974272727966309, "learning_rate": 1.766318305539377e-05, "loss": 0.5835, "step": 10005 }, { "epoch": 0.2221950921743377, "grad_norm": 1.0719481706619263, "learning_rate": 1.766094246635905e-05, "loss": 0.4291, "step": 10010 }, { "epoch": 0.22230607873386532, "grad_norm": 0.9069860577583313, "learning_rate": 1.7658700945932637e-05, "loss": 0.4985, "step": 10015 }, { "epoch": 0.22241706529339297, "grad_norm": 1.5030319690704346, "learning_rate": 1.7656458494387047e-05, "loss": 0.4545, "step": 10020 }, { "epoch": 0.2225280518529206, "grad_norm": 1.5551732778549194, "learning_rate": 1.7654215111994912e-05, "loss": 0.454, "step": 10025 }, { "epoch": 0.22263903841244825, "grad_norm": 0.9678833484649658, "learning_rate": 1.7651970799028976e-05, "loss": 0.4953, "step": 10030 }, { "epoch": 0.2227500249719759, "grad_norm": 1.0727559328079224, "learning_rate": 1.764972555576209e-05, "loss": 0.6616, "step": 10035 }, { "epoch": 0.22286101153150353, "grad_norm": 1.8546661138534546, "learning_rate": 1.7647479382467227e-05, "loss": 0.6262, "step": 10040 }, { "epoch": 0.2229719980910312, "grad_norm": 1.2231254577636719, "learning_rate": 1.764523227941747e-05, "loss": 0.56, "step": 10045 }, { "epoch": 0.2230829846505588, "grad_norm": 0.9730484485626221, "learning_rate": 1.764298424688601e-05, "loss": 0.5865, "step": 10050 }, { "epoch": 0.22319397121008647, "grad_norm": 1.9160155057907104, "learning_rate": 1.764073528514616e-05, "loss": 0.6857, "step": 10055 }, { "epoch": 0.2233049577696141, "grad_norm": 0.9943063259124756, "learning_rate": 1.7638485394471337e-05, "loss": 0.4019, "step": 10060 }, { "epoch": 0.22341594432914175, "grad_norm": 1.3601323366165161, "learning_rate": 1.7636234575135082e-05, "loss": 0.3918, "step": 10065 }, { "epoch": 0.22352693088866937, "grad_norm": 1.2443233728408813, "learning_rate": 1.763398282741103e-05, "loss": 0.5831, "step": 10070 }, { "epoch": 0.22363791744819703, "grad_norm": 0.9492146968841553, "learning_rate": 1.7631730151572952e-05, "loss": 0.4517, "step": 10075 }, { "epoch": 0.22374890400772465, "grad_norm": 1.3512073755264282, "learning_rate": 1.7629476547894716e-05, "loss": 0.5045, "step": 10080 }, { "epoch": 0.2238598905672523, "grad_norm": 1.1920839548110962, "learning_rate": 1.7627222016650313e-05, "loss": 0.6147, "step": 10085 }, { "epoch": 0.22397087712677996, "grad_norm": 1.264012098312378, "learning_rate": 1.7624966558113833e-05, "loss": 0.4869, "step": 10090 }, { "epoch": 0.2240818636863076, "grad_norm": 1.0589563846588135, "learning_rate": 1.762271017255949e-05, "loss": 0.4367, "step": 10095 }, { "epoch": 0.22419285024583524, "grad_norm": 1.3212761878967285, "learning_rate": 1.7620452860261614e-05, "loss": 0.488, "step": 10100 }, { "epoch": 0.22430383680536287, "grad_norm": 1.3312410116195679, "learning_rate": 1.761819462149463e-05, "loss": 0.4991, "step": 10105 }, { "epoch": 0.22441482336489052, "grad_norm": 1.1420173645019531, "learning_rate": 1.7615935456533094e-05, "loss": 0.4348, "step": 10110 }, { "epoch": 0.22452580992441815, "grad_norm": 1.0570411682128906, "learning_rate": 1.761367536565167e-05, "loss": 0.4494, "step": 10115 }, { "epoch": 0.2246367964839458, "grad_norm": 1.0421570539474487, "learning_rate": 1.7611414349125128e-05, "loss": 0.4582, "step": 10120 }, { "epoch": 0.22474778304347343, "grad_norm": 0.9665072560310364, "learning_rate": 1.7609152407228358e-05, "loss": 0.5182, "step": 10125 }, { "epoch": 0.22485876960300108, "grad_norm": 1.0663903951644897, "learning_rate": 1.7606889540236352e-05, "loss": 0.4897, "step": 10130 }, { "epoch": 0.2249697561625287, "grad_norm": 1.3296186923980713, "learning_rate": 1.7604625748424237e-05, "loss": 0.5323, "step": 10135 }, { "epoch": 0.22508074272205636, "grad_norm": 1.2479513883590698, "learning_rate": 1.760236103206722e-05, "loss": 0.5191, "step": 10140 }, { "epoch": 0.225191729281584, "grad_norm": 1.2304879426956177, "learning_rate": 1.7600095391440645e-05, "loss": 0.4828, "step": 10145 }, { "epoch": 0.22530271584111164, "grad_norm": 1.3983750343322754, "learning_rate": 1.7597828826819966e-05, "loss": 0.5423, "step": 10150 }, { "epoch": 0.2254137024006393, "grad_norm": 0.6361042261123657, "learning_rate": 1.7595561338480733e-05, "loss": 0.3157, "step": 10155 }, { "epoch": 0.22552468896016692, "grad_norm": 0.9633803367614746, "learning_rate": 1.759329292669863e-05, "loss": 0.4553, "step": 10160 }, { "epoch": 0.22563567551969457, "grad_norm": 1.2229855060577393, "learning_rate": 1.7591023591749436e-05, "loss": 0.4053, "step": 10165 }, { "epoch": 0.2257466620792222, "grad_norm": 1.512622356414795, "learning_rate": 1.7588753333909053e-05, "loss": 0.3636, "step": 10170 }, { "epoch": 0.22585764863874985, "grad_norm": 1.1153056621551514, "learning_rate": 1.7586482153453492e-05, "loss": 0.6119, "step": 10175 }, { "epoch": 0.22596863519827748, "grad_norm": 1.1370549201965332, "learning_rate": 1.7584210050658873e-05, "loss": 0.6283, "step": 10180 }, { "epoch": 0.22607962175780513, "grad_norm": 1.1045695543289185, "learning_rate": 1.7581937025801433e-05, "loss": 0.2812, "step": 10185 }, { "epoch": 0.22619060831733276, "grad_norm": 1.1070390939712524, "learning_rate": 1.757966307915752e-05, "loss": 0.4798, "step": 10190 }, { "epoch": 0.2263015948768604, "grad_norm": 1.2510517835617065, "learning_rate": 1.7577388211003584e-05, "loss": 0.4774, "step": 10195 }, { "epoch": 0.22641258143638807, "grad_norm": 1.1064683198928833, "learning_rate": 1.7575112421616203e-05, "loss": 0.4421, "step": 10200 }, { "epoch": 0.2265235679959157, "grad_norm": 1.3662172555923462, "learning_rate": 1.757283571127206e-05, "loss": 0.5346, "step": 10205 }, { "epoch": 0.22663455455544335, "grad_norm": 1.113659143447876, "learning_rate": 1.7570558080247945e-05, "loss": 0.4849, "step": 10210 }, { "epoch": 0.22674554111497097, "grad_norm": 1.5465953350067139, "learning_rate": 1.7568279528820774e-05, "loss": 0.5242, "step": 10215 }, { "epoch": 0.22685652767449863, "grad_norm": 0.9810808897018433, "learning_rate": 1.756600005726756e-05, "loss": 0.4816, "step": 10220 }, { "epoch": 0.22696751423402625, "grad_norm": 1.075408935546875, "learning_rate": 1.7563719665865425e-05, "loss": 0.4855, "step": 10225 }, { "epoch": 0.2270785007935539, "grad_norm": 1.4035685062408447, "learning_rate": 1.7561438354891628e-05, "loss": 0.5537, "step": 10230 }, { "epoch": 0.22718948735308153, "grad_norm": 0.9641634821891785, "learning_rate": 1.7559156124623513e-05, "loss": 0.414, "step": 10235 }, { "epoch": 0.22730047391260919, "grad_norm": 1.221092700958252, "learning_rate": 1.7556872975338545e-05, "loss": 0.483, "step": 10240 }, { "epoch": 0.2274114604721368, "grad_norm": 1.4603421688079834, "learning_rate": 1.755458890731431e-05, "loss": 0.3984, "step": 10245 }, { "epoch": 0.22752244703166447, "grad_norm": 1.086022138595581, "learning_rate": 1.755230392082849e-05, "loss": 0.499, "step": 10250 }, { "epoch": 0.22763343359119212, "grad_norm": 1.0802093744277954, "learning_rate": 1.7550018016158884e-05, "loss": 0.5606, "step": 10255 }, { "epoch": 0.22774442015071975, "grad_norm": 1.469793677330017, "learning_rate": 1.754773119358341e-05, "loss": 0.4977, "step": 10260 }, { "epoch": 0.2278554067102474, "grad_norm": 1.184227705001831, "learning_rate": 1.754544345338009e-05, "loss": 0.4903, "step": 10265 }, { "epoch": 0.22796639326977503, "grad_norm": 1.258518099784851, "learning_rate": 1.754315479582706e-05, "loss": 0.5834, "step": 10270 }, { "epoch": 0.22807737982930268, "grad_norm": 1.2839202880859375, "learning_rate": 1.7540865221202574e-05, "loss": 0.6525, "step": 10275 }, { "epoch": 0.2281883663888303, "grad_norm": 1.084051489830017, "learning_rate": 1.7538574729784977e-05, "loss": 0.4572, "step": 10280 }, { "epoch": 0.22829935294835796, "grad_norm": 1.2756541967391968, "learning_rate": 1.753628332185275e-05, "loss": 0.4312, "step": 10285 }, { "epoch": 0.22841033950788558, "grad_norm": 1.2992979288101196, "learning_rate": 1.7533990997684473e-05, "loss": 0.3662, "step": 10290 }, { "epoch": 0.22852132606741324, "grad_norm": 1.274697184562683, "learning_rate": 1.7531697757558833e-05, "loss": 0.5757, "step": 10295 }, { "epoch": 0.22863231262694086, "grad_norm": 1.0342131853103638, "learning_rate": 1.7529403601754645e-05, "loss": 0.3428, "step": 10300 }, { "epoch": 0.22874329918646852, "grad_norm": 1.0127534866333008, "learning_rate": 1.7527108530550815e-05, "loss": 0.2972, "step": 10305 }, { "epoch": 0.22885428574599617, "grad_norm": 1.185622215270996, "learning_rate": 1.752481254422637e-05, "loss": 0.4231, "step": 10310 }, { "epoch": 0.2289652723055238, "grad_norm": 1.262616753578186, "learning_rate": 1.752251564306046e-05, "loss": 0.5794, "step": 10315 }, { "epoch": 0.22907625886505145, "grad_norm": 1.0973269939422607, "learning_rate": 1.752021782733232e-05, "loss": 0.4948, "step": 10320 }, { "epoch": 0.22918724542457908, "grad_norm": 0.6889248490333557, "learning_rate": 1.7517919097321323e-05, "loss": 0.6251, "step": 10325 }, { "epoch": 0.22929823198410673, "grad_norm": 1.0883080959320068, "learning_rate": 1.751561945330693e-05, "loss": 0.7084, "step": 10330 }, { "epoch": 0.22940921854363436, "grad_norm": 0.9828934669494629, "learning_rate": 1.7513318895568734e-05, "loss": 0.495, "step": 10335 }, { "epoch": 0.229520205103162, "grad_norm": 1.5232964754104614, "learning_rate": 1.7511017424386423e-05, "loss": 0.5004, "step": 10340 }, { "epoch": 0.22963119166268964, "grad_norm": 1.1135151386260986, "learning_rate": 1.7508715040039805e-05, "loss": 0.4907, "step": 10345 }, { "epoch": 0.2297421782222173, "grad_norm": 1.2410166263580322, "learning_rate": 1.750641174280879e-05, "loss": 0.5564, "step": 10350 }, { "epoch": 0.22985316478174492, "grad_norm": 1.2214136123657227, "learning_rate": 1.750410753297341e-05, "loss": 0.6031, "step": 10355 }, { "epoch": 0.22996415134127257, "grad_norm": 1.2132198810577393, "learning_rate": 1.750180241081381e-05, "loss": 0.5265, "step": 10360 }, { "epoch": 0.23007513790080023, "grad_norm": 1.0986378192901611, "learning_rate": 1.7499496376610225e-05, "loss": 0.5982, "step": 10365 }, { "epoch": 0.23018612446032785, "grad_norm": 1.078561544418335, "learning_rate": 1.7497189430643025e-05, "loss": 0.374, "step": 10370 }, { "epoch": 0.2302971110198555, "grad_norm": 1.3416097164154053, "learning_rate": 1.749488157319268e-05, "loss": 0.4589, "step": 10375 }, { "epoch": 0.23040809757938313, "grad_norm": 1.3649656772613525, "learning_rate": 1.7492572804539763e-05, "loss": 0.4703, "step": 10380 }, { "epoch": 0.23051908413891078, "grad_norm": 1.029783844947815, "learning_rate": 1.7490263124964976e-05, "loss": 0.4931, "step": 10385 }, { "epoch": 0.2306300706984384, "grad_norm": 0.9972415566444397, "learning_rate": 1.7487952534749116e-05, "loss": 0.4425, "step": 10390 }, { "epoch": 0.23074105725796606, "grad_norm": 1.2904571294784546, "learning_rate": 1.7485641034173103e-05, "loss": 0.4746, "step": 10395 }, { "epoch": 0.2308520438174937, "grad_norm": 1.2893401384353638, "learning_rate": 1.748332862351796e-05, "loss": 0.5975, "step": 10400 }, { "epoch": 0.23096303037702134, "grad_norm": 1.3970812559127808, "learning_rate": 1.7481015303064816e-05, "loss": 0.4489, "step": 10405 }, { "epoch": 0.23107401693654897, "grad_norm": 1.0396674871444702, "learning_rate": 1.747870107309492e-05, "loss": 0.5176, "step": 10410 }, { "epoch": 0.23118500349607662, "grad_norm": 0.7239720225334167, "learning_rate": 1.7476385933889633e-05, "loss": 0.4588, "step": 10415 }, { "epoch": 0.23129599005560428, "grad_norm": 1.1714787483215332, "learning_rate": 1.7474069885730414e-05, "loss": 0.5548, "step": 10420 }, { "epoch": 0.2314069766151319, "grad_norm": 1.6496156454086304, "learning_rate": 1.7471752928898847e-05, "loss": 0.6559, "step": 10425 }, { "epoch": 0.23151796317465956, "grad_norm": 1.0505659580230713, "learning_rate": 1.7469435063676615e-05, "loss": 0.5648, "step": 10430 }, { "epoch": 0.23162894973418718, "grad_norm": 0.9392535090446472, "learning_rate": 1.746711629034552e-05, "loss": 0.6279, "step": 10435 }, { "epoch": 0.23173993629371484, "grad_norm": 1.05547034740448, "learning_rate": 1.746479660918747e-05, "loss": 0.511, "step": 10440 }, { "epoch": 0.23185092285324246, "grad_norm": 0.9287381768226624, "learning_rate": 1.7462476020484484e-05, "loss": 0.4376, "step": 10445 }, { "epoch": 0.23196190941277012, "grad_norm": 1.5444824695587158, "learning_rate": 1.7460154524518688e-05, "loss": 0.4187, "step": 10450 }, { "epoch": 0.23207289597229774, "grad_norm": 1.0833466053009033, "learning_rate": 1.7457832121572323e-05, "loss": 0.5985, "step": 10455 }, { "epoch": 0.2321838825318254, "grad_norm": 1.059387445449829, "learning_rate": 1.7455508811927746e-05, "loss": 0.3834, "step": 10460 }, { "epoch": 0.23229486909135302, "grad_norm": 1.185230016708374, "learning_rate": 1.7453184595867404e-05, "loss": 0.5965, "step": 10465 }, { "epoch": 0.23240585565088068, "grad_norm": 0.722567081451416, "learning_rate": 1.7450859473673882e-05, "loss": 0.5079, "step": 10470 }, { "epoch": 0.23251684221040833, "grad_norm": 1.3505364656448364, "learning_rate": 1.744853344562985e-05, "loss": 0.4602, "step": 10475 }, { "epoch": 0.23262782876993596, "grad_norm": 1.1881821155548096, "learning_rate": 1.7446206512018103e-05, "loss": 0.3281, "step": 10480 }, { "epoch": 0.2327388153294636, "grad_norm": 1.5479166507720947, "learning_rate": 1.744387867312154e-05, "loss": 0.531, "step": 10485 }, { "epoch": 0.23284980188899124, "grad_norm": 1.6832934617996216, "learning_rate": 1.7441549929223173e-05, "loss": 0.6222, "step": 10490 }, { "epoch": 0.2329607884485189, "grad_norm": 1.35410475730896, "learning_rate": 1.743922028060612e-05, "loss": 0.545, "step": 10495 }, { "epoch": 0.23307177500804652, "grad_norm": 1.073199987411499, "learning_rate": 1.743688972755362e-05, "loss": 0.5057, "step": 10500 }, { "epoch": 0.23318276156757417, "grad_norm": 1.2246018648147583, "learning_rate": 1.7434558270349006e-05, "loss": 0.386, "step": 10505 }, { "epoch": 0.2332937481271018, "grad_norm": 1.15493643283844, "learning_rate": 1.743222590927573e-05, "loss": 0.6794, "step": 10510 }, { "epoch": 0.23340473468662945, "grad_norm": 1.451859712600708, "learning_rate": 1.7429892644617354e-05, "loss": 0.5025, "step": 10515 }, { "epoch": 0.2335157212461571, "grad_norm": 1.6235580444335938, "learning_rate": 1.742755847665755e-05, "loss": 0.4602, "step": 10520 }, { "epoch": 0.23362670780568473, "grad_norm": 1.356918454170227, "learning_rate": 1.7425223405680098e-05, "loss": 0.4022, "step": 10525 }, { "epoch": 0.23373769436521238, "grad_norm": 1.1652427911758423, "learning_rate": 1.742288743196888e-05, "loss": 0.5255, "step": 10530 }, { "epoch": 0.23384868092474, "grad_norm": 1.1124521493911743, "learning_rate": 1.7420550555807906e-05, "loss": 0.462, "step": 10535 }, { "epoch": 0.23395966748426766, "grad_norm": 1.566153645515442, "learning_rate": 1.741821277748128e-05, "loss": 0.4599, "step": 10540 }, { "epoch": 0.2340706540437953, "grad_norm": 1.2258182764053345, "learning_rate": 1.741587409727323e-05, "loss": 0.457, "step": 10545 }, { "epoch": 0.23418164060332294, "grad_norm": 1.0857577323913574, "learning_rate": 1.7413534515468075e-05, "loss": 0.5158, "step": 10550 }, { "epoch": 0.23429262716285057, "grad_norm": 0.9319967031478882, "learning_rate": 1.7411194032350252e-05, "loss": 0.454, "step": 10555 }, { "epoch": 0.23440361372237822, "grad_norm": 1.503821849822998, "learning_rate": 1.7408852648204317e-05, "loss": 0.5528, "step": 10560 }, { "epoch": 0.23451460028190585, "grad_norm": 0.9573317766189575, "learning_rate": 1.7406510363314922e-05, "loss": 0.5481, "step": 10565 }, { "epoch": 0.2346255868414335, "grad_norm": 1.5251328945159912, "learning_rate": 1.740416717796684e-05, "loss": 0.4372, "step": 10570 }, { "epoch": 0.23473657340096116, "grad_norm": 0.8173489570617676, "learning_rate": 1.7401823092444945e-05, "loss": 0.3856, "step": 10575 }, { "epoch": 0.23484755996048878, "grad_norm": 1.7951514720916748, "learning_rate": 1.739947810703422e-05, "loss": 0.5434, "step": 10580 }, { "epoch": 0.23495854652001644, "grad_norm": 0.8231841325759888, "learning_rate": 1.739713222201976e-05, "loss": 0.5926, "step": 10585 }, { "epoch": 0.23506953307954406, "grad_norm": 1.47267746925354, "learning_rate": 1.739478543768678e-05, "loss": 0.4497, "step": 10590 }, { "epoch": 0.23518051963907172, "grad_norm": 1.2468472719192505, "learning_rate": 1.7392437754320577e-05, "loss": 0.437, "step": 10595 }, { "epoch": 0.23529150619859934, "grad_norm": 1.423754334449768, "learning_rate": 1.7390089172206594e-05, "loss": 0.5477, "step": 10600 }, { "epoch": 0.235402492758127, "grad_norm": 1.2863191366195679, "learning_rate": 1.7387739691630346e-05, "loss": 0.4375, "step": 10605 }, { "epoch": 0.23551347931765462, "grad_norm": 1.250435471534729, "learning_rate": 1.738538931287749e-05, "loss": 0.6207, "step": 10610 }, { "epoch": 0.23562446587718228, "grad_norm": 1.0339642763137817, "learning_rate": 1.7383038036233762e-05, "loss": 0.5425, "step": 10615 }, { "epoch": 0.2357354524367099, "grad_norm": 1.1191165447235107, "learning_rate": 1.7380685861985037e-05, "loss": 0.4339, "step": 10620 }, { "epoch": 0.23584643899623756, "grad_norm": 1.4968637228012085, "learning_rate": 1.7378332790417275e-05, "loss": 0.4038, "step": 10625 }, { "epoch": 0.2359574255557652, "grad_norm": 1.324038028717041, "learning_rate": 1.7375978821816557e-05, "loss": 0.4687, "step": 10630 }, { "epoch": 0.23606841211529284, "grad_norm": 1.475159764289856, "learning_rate": 1.737362395646907e-05, "loss": 0.5988, "step": 10635 }, { "epoch": 0.2361793986748205, "grad_norm": 1.462770700454712, "learning_rate": 1.7371268194661114e-05, "loss": 0.5812, "step": 10640 }, { "epoch": 0.23629038523434812, "grad_norm": 1.8520547151565552, "learning_rate": 1.7368911536679092e-05, "loss": 0.4466, "step": 10645 }, { "epoch": 0.23640137179387577, "grad_norm": 1.1543737649917603, "learning_rate": 1.736655398280952e-05, "loss": 0.4219, "step": 10650 }, { "epoch": 0.2365123583534034, "grad_norm": 1.0772531032562256, "learning_rate": 1.7364195533339017e-05, "loss": 0.6008, "step": 10655 }, { "epoch": 0.23662334491293105, "grad_norm": 1.693520426750183, "learning_rate": 1.736183618855432e-05, "loss": 0.5874, "step": 10660 }, { "epoch": 0.23673433147245868, "grad_norm": 0.9491427540779114, "learning_rate": 1.735947594874227e-05, "loss": 0.4984, "step": 10665 }, { "epoch": 0.23684531803198633, "grad_norm": 1.327712893486023, "learning_rate": 1.7357114814189812e-05, "loss": 0.4518, "step": 10670 }, { "epoch": 0.23695630459151396, "grad_norm": 1.220430850982666, "learning_rate": 1.735475278518401e-05, "loss": 0.3435, "step": 10675 }, { "epoch": 0.2370672911510416, "grad_norm": 1.355727195739746, "learning_rate": 1.7352389862012034e-05, "loss": 0.5366, "step": 10680 }, { "epoch": 0.23717827771056926, "grad_norm": 0.9620851278305054, "learning_rate": 1.7350026044961155e-05, "loss": 0.2839, "step": 10685 }, { "epoch": 0.2372892642700969, "grad_norm": 1.1203703880310059, "learning_rate": 1.734766133431876e-05, "loss": 0.4745, "step": 10690 }, { "epoch": 0.23740025082962454, "grad_norm": 1.1614298820495605, "learning_rate": 1.734529573037234e-05, "loss": 0.5457, "step": 10695 }, { "epoch": 0.23751123738915217, "grad_norm": 1.0189214944839478, "learning_rate": 1.73429292334095e-05, "loss": 0.4508, "step": 10700 }, { "epoch": 0.23762222394867982, "grad_norm": 1.196600317955017, "learning_rate": 1.734056184371795e-05, "loss": 0.4637, "step": 10705 }, { "epoch": 0.23773321050820745, "grad_norm": 1.19878089427948, "learning_rate": 1.7338193561585507e-05, "loss": 0.4069, "step": 10710 }, { "epoch": 0.2378441970677351, "grad_norm": 1.5032724142074585, "learning_rate": 1.7335824387300106e-05, "loss": 0.3725, "step": 10715 }, { "epoch": 0.23795518362726273, "grad_norm": 1.6843661069869995, "learning_rate": 1.7333454321149777e-05, "loss": 0.4016, "step": 10720 }, { "epoch": 0.23806617018679038, "grad_norm": 1.3617744445800781, "learning_rate": 1.7331083363422665e-05, "loss": 0.5763, "step": 10725 }, { "epoch": 0.238177156746318, "grad_norm": 1.376902461051941, "learning_rate": 1.7328711514407025e-05, "loss": 0.3615, "step": 10730 }, { "epoch": 0.23828814330584566, "grad_norm": 1.3558409214019775, "learning_rate": 1.732633877439122e-05, "loss": 0.5211, "step": 10735 }, { "epoch": 0.23839912986537332, "grad_norm": 1.5150203704833984, "learning_rate": 1.7323965143663713e-05, "loss": 0.5227, "step": 10740 }, { "epoch": 0.23851011642490094, "grad_norm": 0.9877330660820007, "learning_rate": 1.7321590622513088e-05, "loss": 0.4526, "step": 10745 }, { "epoch": 0.2386211029844286, "grad_norm": 0.9728530049324036, "learning_rate": 1.731921521122803e-05, "loss": 0.4788, "step": 10750 }, { "epoch": 0.23873208954395622, "grad_norm": 1.4340505599975586, "learning_rate": 1.7316838910097332e-05, "loss": 0.4781, "step": 10755 }, { "epoch": 0.23884307610348388, "grad_norm": 0.9711979031562805, "learning_rate": 1.7314461719409902e-05, "loss": 0.427, "step": 10760 }, { "epoch": 0.2389540626630115, "grad_norm": 1.3422971963882446, "learning_rate": 1.7312083639454743e-05, "loss": 0.5741, "step": 10765 }, { "epoch": 0.23906504922253916, "grad_norm": 0.8390370607376099, "learning_rate": 1.730970467052098e-05, "loss": 0.4605, "step": 10770 }, { "epoch": 0.23917603578206678, "grad_norm": 1.2565598487854004, "learning_rate": 1.7307324812897836e-05, "loss": 0.4963, "step": 10775 }, { "epoch": 0.23928702234159444, "grad_norm": 1.854711890220642, "learning_rate": 1.730494406687465e-05, "loss": 0.3802, "step": 10780 }, { "epoch": 0.23939800890112206, "grad_norm": 1.0575193166732788, "learning_rate": 1.7302562432740864e-05, "loss": 0.5513, "step": 10785 }, { "epoch": 0.23950899546064972, "grad_norm": 1.5515862703323364, "learning_rate": 1.7300179910786027e-05, "loss": 0.6286, "step": 10790 }, { "epoch": 0.23961998202017737, "grad_norm": 1.8107975721359253, "learning_rate": 1.72977965012998e-05, "loss": 0.5424, "step": 10795 }, { "epoch": 0.239730968579705, "grad_norm": 1.4742685556411743, "learning_rate": 1.7295412204571945e-05, "loss": 0.4547, "step": 10800 }, { "epoch": 0.23984195513923265, "grad_norm": 1.0533596277236938, "learning_rate": 1.7293027020892348e-05, "loss": 0.4738, "step": 10805 }, { "epoch": 0.23995294169876027, "grad_norm": 1.5168896913528442, "learning_rate": 1.7290640950550985e-05, "loss": 0.4324, "step": 10810 }, { "epoch": 0.24006392825828793, "grad_norm": 1.048251748085022, "learning_rate": 1.7288253993837936e-05, "loss": 0.5434, "step": 10815 }, { "epoch": 0.24017491481781555, "grad_norm": 0.968961238861084, "learning_rate": 1.7285866151043417e-05, "loss": 0.2887, "step": 10820 }, { "epoch": 0.2402859013773432, "grad_norm": 1.3361490964889526, "learning_rate": 1.728347742245773e-05, "loss": 0.5679, "step": 10825 }, { "epoch": 0.24039688793687083, "grad_norm": 1.0251753330230713, "learning_rate": 1.7281087808371278e-05, "loss": 0.5579, "step": 10830 }, { "epoch": 0.2405078744963985, "grad_norm": 1.205301284790039, "learning_rate": 1.727869730907459e-05, "loss": 0.554, "step": 10835 }, { "epoch": 0.24061886105592611, "grad_norm": 1.1939102411270142, "learning_rate": 1.7276305924858297e-05, "loss": 0.443, "step": 10840 }, { "epoch": 0.24072984761545377, "grad_norm": 1.080856204032898, "learning_rate": 1.727391365601313e-05, "loss": 0.3908, "step": 10845 }, { "epoch": 0.24084083417498142, "grad_norm": 1.2553077936172485, "learning_rate": 1.727152050282994e-05, "loss": 0.4487, "step": 10850 }, { "epoch": 0.24095182073450905, "grad_norm": 1.3635225296020508, "learning_rate": 1.7269126465599667e-05, "loss": 0.4693, "step": 10855 }, { "epoch": 0.2410628072940367, "grad_norm": 1.2379069328308105, "learning_rate": 1.726673154461338e-05, "loss": 0.5879, "step": 10860 }, { "epoch": 0.24117379385356433, "grad_norm": 1.0275567770004272, "learning_rate": 1.7264335740162244e-05, "loss": 0.7738, "step": 10865 }, { "epoch": 0.24128478041309198, "grad_norm": 1.408523678779602, "learning_rate": 1.726193905253753e-05, "loss": 0.4684, "step": 10870 }, { "epoch": 0.2413957669726196, "grad_norm": 0.8828668594360352, "learning_rate": 1.7259541482030623e-05, "loss": 0.473, "step": 10875 }, { "epoch": 0.24150675353214726, "grad_norm": 1.1700650453567505, "learning_rate": 1.7257143028933004e-05, "loss": 0.4779, "step": 10880 }, { "epoch": 0.2416177400916749, "grad_norm": 1.1337982416152954, "learning_rate": 1.7254743693536276e-05, "loss": 0.5127, "step": 10885 }, { "epoch": 0.24172872665120254, "grad_norm": 1.749471664428711, "learning_rate": 1.7252343476132143e-05, "loss": 0.4679, "step": 10890 }, { "epoch": 0.24183971321073017, "grad_norm": 1.214697241783142, "learning_rate": 1.724994237701241e-05, "loss": 0.5016, "step": 10895 }, { "epoch": 0.24195069977025782, "grad_norm": 1.2082881927490234, "learning_rate": 1.7247540396469e-05, "loss": 0.6128, "step": 10900 }, { "epoch": 0.24206168632978547, "grad_norm": 1.2025545835494995, "learning_rate": 1.7245137534793933e-05, "loss": 0.4832, "step": 10905 }, { "epoch": 0.2421726728893131, "grad_norm": 1.2197436094284058, "learning_rate": 1.7242733792279342e-05, "loss": 0.5686, "step": 10910 }, { "epoch": 0.24228365944884075, "grad_norm": 1.3093212842941284, "learning_rate": 1.7240329169217468e-05, "loss": 0.5299, "step": 10915 }, { "epoch": 0.24239464600836838, "grad_norm": 1.033205270767212, "learning_rate": 1.7237923665900656e-05, "loss": 0.5039, "step": 10920 }, { "epoch": 0.24250563256789603, "grad_norm": 1.2853460311889648, "learning_rate": 1.723551728262136e-05, "loss": 0.4971, "step": 10925 }, { "epoch": 0.24261661912742366, "grad_norm": 1.6411842107772827, "learning_rate": 1.723311001967214e-05, "loss": 0.4512, "step": 10930 }, { "epoch": 0.24272760568695131, "grad_norm": 1.0631998777389526, "learning_rate": 1.7230701877345658e-05, "loss": 0.4894, "step": 10935 }, { "epoch": 0.24283859224647894, "grad_norm": 1.1609796285629272, "learning_rate": 1.722829285593469e-05, "loss": 0.5161, "step": 10940 }, { "epoch": 0.2429495788060066, "grad_norm": 1.4562970399856567, "learning_rate": 1.7225882955732124e-05, "loss": 0.6736, "step": 10945 }, { "epoch": 0.24306056536553422, "grad_norm": 1.154300332069397, "learning_rate": 1.722347217703094e-05, "loss": 0.6721, "step": 10950 }, { "epoch": 0.24317155192506187, "grad_norm": 1.2888139486312866, "learning_rate": 1.722106052012423e-05, "loss": 0.4758, "step": 10955 }, { "epoch": 0.24328253848458953, "grad_norm": 1.2322896718978882, "learning_rate": 1.7218647985305204e-05, "loss": 0.6146, "step": 10960 }, { "epoch": 0.24339352504411715, "grad_norm": 1.266477108001709, "learning_rate": 1.7216234572867165e-05, "loss": 0.4997, "step": 10965 }, { "epoch": 0.2435045116036448, "grad_norm": 1.1659526824951172, "learning_rate": 1.7213820283103526e-05, "loss": 0.4684, "step": 10970 }, { "epoch": 0.24361549816317243, "grad_norm": 1.1297948360443115, "learning_rate": 1.7211405116307815e-05, "loss": 0.4648, "step": 10975 }, { "epoch": 0.2437264847227001, "grad_norm": 1.3273589611053467, "learning_rate": 1.720898907277365e-05, "loss": 0.5179, "step": 10980 }, { "epoch": 0.2438374712822277, "grad_norm": 0.9976261258125305, "learning_rate": 1.720657215279477e-05, "loss": 0.5025, "step": 10985 }, { "epoch": 0.24394845784175537, "grad_norm": 0.9172224998474121, "learning_rate": 1.7204154356665023e-05, "loss": 0.5558, "step": 10990 }, { "epoch": 0.244059444401283, "grad_norm": 1.1094406843185425, "learning_rate": 1.7201735684678348e-05, "loss": 0.5454, "step": 10995 }, { "epoch": 0.24417043096081065, "grad_norm": 1.3654953241348267, "learning_rate": 1.7199316137128797e-05, "loss": 0.5252, "step": 11000 }, { "epoch": 0.24428141752033827, "grad_norm": 1.7020251750946045, "learning_rate": 1.7196895714310536e-05, "loss": 0.4773, "step": 11005 }, { "epoch": 0.24439240407986593, "grad_norm": 1.4368350505828857, "learning_rate": 1.7194474416517832e-05, "loss": 0.3861, "step": 11010 }, { "epoch": 0.24450339063939358, "grad_norm": 1.4007991552352905, "learning_rate": 1.719205224404506e-05, "loss": 0.4601, "step": 11015 }, { "epoch": 0.2446143771989212, "grad_norm": 1.3285207748413086, "learning_rate": 1.718962919718669e-05, "loss": 0.4993, "step": 11020 }, { "epoch": 0.24472536375844886, "grad_norm": 1.0307044982910156, "learning_rate": 1.7187205276237316e-05, "loss": 0.3924, "step": 11025 }, { "epoch": 0.2448363503179765, "grad_norm": 0.9056999683380127, "learning_rate": 1.718478048149163e-05, "loss": 0.4292, "step": 11030 }, { "epoch": 0.24494733687750414, "grad_norm": 0.9134597778320312, "learning_rate": 1.718235481324443e-05, "loss": 0.3722, "step": 11035 }, { "epoch": 0.24505832343703177, "grad_norm": 1.4173113107681274, "learning_rate": 1.7179928271790617e-05, "loss": 0.5344, "step": 11040 }, { "epoch": 0.24516930999655942, "grad_norm": 1.6890437602996826, "learning_rate": 1.7177500857425207e-05, "loss": 0.4563, "step": 11045 }, { "epoch": 0.24528029655608705, "grad_norm": 1.0443166494369507, "learning_rate": 1.717507257044331e-05, "loss": 0.4444, "step": 11050 }, { "epoch": 0.2453912831156147, "grad_norm": 1.5586121082305908, "learning_rate": 1.717264341114016e-05, "loss": 0.5696, "step": 11055 }, { "epoch": 0.24550226967514235, "grad_norm": 0.7577741742134094, "learning_rate": 1.7170213379811077e-05, "loss": 0.361, "step": 11060 }, { "epoch": 0.24561325623466998, "grad_norm": 1.1730875968933105, "learning_rate": 1.7167782476751494e-05, "loss": 0.4033, "step": 11065 }, { "epoch": 0.24572424279419763, "grad_norm": 2.1000208854675293, "learning_rate": 1.716535070225696e-05, "loss": 0.4252, "step": 11070 }, { "epoch": 0.24583522935372526, "grad_norm": 1.5296849012374878, "learning_rate": 1.7162918056623116e-05, "loss": 0.4881, "step": 11075 }, { "epoch": 0.2459462159132529, "grad_norm": 1.5113800764083862, "learning_rate": 1.716048454014572e-05, "loss": 0.5569, "step": 11080 }, { "epoch": 0.24605720247278054, "grad_norm": 1.1769105195999146, "learning_rate": 1.7158050153120623e-05, "loss": 0.4193, "step": 11085 }, { "epoch": 0.2461681890323082, "grad_norm": 1.3997236490249634, "learning_rate": 1.71556148958438e-05, "loss": 0.5501, "step": 11090 }, { "epoch": 0.24627917559183582, "grad_norm": 1.0395276546478271, "learning_rate": 1.7153178768611317e-05, "loss": 0.3598, "step": 11095 }, { "epoch": 0.24639016215136347, "grad_norm": 0.9971006512641907, "learning_rate": 1.7150741771719345e-05, "loss": 0.5354, "step": 11100 }, { "epoch": 0.2465011487108911, "grad_norm": 1.4031944274902344, "learning_rate": 1.714830390546417e-05, "loss": 0.4695, "step": 11105 }, { "epoch": 0.24661213527041875, "grad_norm": 0.9989936947822571, "learning_rate": 1.7145865170142186e-05, "loss": 0.4709, "step": 11110 }, { "epoch": 0.2467231218299464, "grad_norm": 1.3992575407028198, "learning_rate": 1.7143425566049873e-05, "loss": 0.5232, "step": 11115 }, { "epoch": 0.24683410838947403, "grad_norm": 1.3434380292892456, "learning_rate": 1.714098509348384e-05, "loss": 0.6072, "step": 11120 }, { "epoch": 0.2469450949490017, "grad_norm": 1.2431334257125854, "learning_rate": 1.7138543752740785e-05, "loss": 0.4156, "step": 11125 }, { "epoch": 0.2470560815085293, "grad_norm": 1.1508046388626099, "learning_rate": 1.7136101544117526e-05, "loss": 0.4603, "step": 11130 }, { "epoch": 0.24716706806805697, "grad_norm": 1.0870983600616455, "learning_rate": 1.7133658467910968e-05, "loss": 0.4438, "step": 11135 }, { "epoch": 0.2472780546275846, "grad_norm": 1.3615703582763672, "learning_rate": 1.7131214524418146e-05, "loss": 0.5001, "step": 11140 }, { "epoch": 0.24738904118711225, "grad_norm": 1.087088942527771, "learning_rate": 1.7128769713936173e-05, "loss": 0.4491, "step": 11145 }, { "epoch": 0.24750002774663987, "grad_norm": 1.6181623935699463, "learning_rate": 1.712632403676229e-05, "loss": 0.4658, "step": 11150 }, { "epoch": 0.24761101430616753, "grad_norm": 1.419737696647644, "learning_rate": 1.7123877493193825e-05, "loss": 0.4683, "step": 11155 }, { "epoch": 0.24772200086569515, "grad_norm": 1.0363826751708984, "learning_rate": 1.7121430083528227e-05, "loss": 0.4743, "step": 11160 }, { "epoch": 0.2478329874252228, "grad_norm": 1.0170842409133911, "learning_rate": 1.7118981808063043e-05, "loss": 0.3609, "step": 11165 }, { "epoch": 0.24794397398475046, "grad_norm": 1.1048935651779175, "learning_rate": 1.7116532667095928e-05, "loss": 0.4537, "step": 11170 }, { "epoch": 0.24805496054427809, "grad_norm": 1.2576793432235718, "learning_rate": 1.711408266092464e-05, "loss": 0.4805, "step": 11175 }, { "epoch": 0.24816594710380574, "grad_norm": 1.1600898504257202, "learning_rate": 1.7111631789847038e-05, "loss": 0.4923, "step": 11180 }, { "epoch": 0.24827693366333337, "grad_norm": 1.6441643238067627, "learning_rate": 1.7109180054161093e-05, "loss": 0.5338, "step": 11185 }, { "epoch": 0.24838792022286102, "grad_norm": 0.9537017345428467, "learning_rate": 1.710672745416488e-05, "loss": 0.6072, "step": 11190 }, { "epoch": 0.24849890678238865, "grad_norm": 0.8803986310958862, "learning_rate": 1.710427399015658e-05, "loss": 0.5011, "step": 11195 }, { "epoch": 0.2486098933419163, "grad_norm": 1.057271957397461, "learning_rate": 1.710181966243447e-05, "loss": 0.33, "step": 11200 }, { "epoch": 0.24872087990144393, "grad_norm": 0.9839698672294617, "learning_rate": 1.7099364471296947e-05, "loss": 0.5186, "step": 11205 }, { "epoch": 0.24883186646097158, "grad_norm": 1.279700517654419, "learning_rate": 1.70969084170425e-05, "loss": 0.5572, "step": 11210 }, { "epoch": 0.2489428530204992, "grad_norm": 1.7117375135421753, "learning_rate": 1.7094451499969725e-05, "loss": 0.4079, "step": 11215 }, { "epoch": 0.24905383958002686, "grad_norm": 1.1009594202041626, "learning_rate": 1.7091993720377336e-05, "loss": 0.4195, "step": 11220 }, { "epoch": 0.2491648261395545, "grad_norm": 1.4836571216583252, "learning_rate": 1.708953507856413e-05, "loss": 0.4876, "step": 11225 }, { "epoch": 0.24927581269908214, "grad_norm": 1.6875817775726318, "learning_rate": 1.708707557482903e-05, "loss": 0.4789, "step": 11230 }, { "epoch": 0.2493867992586098, "grad_norm": 1.110202670097351, "learning_rate": 1.7084615209471045e-05, "loss": 0.4394, "step": 11235 }, { "epoch": 0.24949778581813742, "grad_norm": 1.2237122058868408, "learning_rate": 1.7082153982789305e-05, "loss": 0.4599, "step": 11240 }, { "epoch": 0.24960877237766507, "grad_norm": 1.3197574615478516, "learning_rate": 1.7079691895083036e-05, "loss": 0.4665, "step": 11245 }, { "epoch": 0.2497197589371927, "grad_norm": 1.1246968507766724, "learning_rate": 1.7077228946651567e-05, "loss": 0.5569, "step": 11250 }, { "epoch": 0.24983074549672035, "grad_norm": 1.3298134803771973, "learning_rate": 1.7074765137794343e-05, "loss": 0.5649, "step": 11255 }, { "epoch": 0.24994173205624798, "grad_norm": 1.000502109527588, "learning_rate": 1.7072300468810896e-05, "loss": 0.568, "step": 11260 }, { "epoch": 0.2500527186157756, "grad_norm": 1.1524144411087036, "learning_rate": 1.7069834940000878e-05, "loss": 0.4625, "step": 11265 }, { "epoch": 0.2501637051753033, "grad_norm": 0.940650224685669, "learning_rate": 1.706736855166404e-05, "loss": 0.6018, "step": 11270 }, { "epoch": 0.2502746917348309, "grad_norm": 1.473748803138733, "learning_rate": 1.7064901304100233e-05, "loss": 0.5434, "step": 11275 }, { "epoch": 0.25038567829435854, "grad_norm": 1.3512476682662964, "learning_rate": 1.706243319760942e-05, "loss": 0.438, "step": 11280 }, { "epoch": 0.2504966648538862, "grad_norm": 1.119840145111084, "learning_rate": 1.7059964232491666e-05, "loss": 0.5395, "step": 11285 }, { "epoch": 0.25060765141341385, "grad_norm": 2.1528420448303223, "learning_rate": 1.7057494409047136e-05, "loss": 0.4493, "step": 11290 }, { "epoch": 0.25071863797294147, "grad_norm": 1.6110953092575073, "learning_rate": 1.7055023727576106e-05, "loss": 0.4385, "step": 11295 }, { "epoch": 0.2508296245324691, "grad_norm": 1.4127633571624756, "learning_rate": 1.7052552188378954e-05, "loss": 0.5768, "step": 11300 }, { "epoch": 0.2509406110919968, "grad_norm": 1.075862169265747, "learning_rate": 1.7050079791756157e-05, "loss": 0.4207, "step": 11305 }, { "epoch": 0.2510515976515244, "grad_norm": 1.2337510585784912, "learning_rate": 1.70476065380083e-05, "loss": 0.3383, "step": 11310 }, { "epoch": 0.25116258421105203, "grad_norm": 1.2561253309249878, "learning_rate": 1.704513242743608e-05, "loss": 0.3997, "step": 11315 }, { "epoch": 0.25127357077057966, "grad_norm": 1.2801952362060547, "learning_rate": 1.7042657460340283e-05, "loss": 0.4344, "step": 11320 }, { "epoch": 0.25138455733010734, "grad_norm": 1.136273980140686, "learning_rate": 1.7040181637021812e-05, "loss": 0.4397, "step": 11325 }, { "epoch": 0.25149554388963496, "grad_norm": 1.1857062578201294, "learning_rate": 1.7037704957781674e-05, "loss": 0.4559, "step": 11330 }, { "epoch": 0.2516065304491626, "grad_norm": 1.4461593627929688, "learning_rate": 1.7035227422920965e-05, "loss": 0.4814, "step": 11335 }, { "epoch": 0.2517175170086903, "grad_norm": 1.012604832649231, "learning_rate": 1.7032749032740904e-05, "loss": 0.5578, "step": 11340 }, { "epoch": 0.2518285035682179, "grad_norm": 1.427864670753479, "learning_rate": 1.7030269787542798e-05, "loss": 0.5969, "step": 11345 }, { "epoch": 0.2519394901277455, "grad_norm": 0.997259259223938, "learning_rate": 1.702778968762807e-05, "loss": 0.4033, "step": 11350 }, { "epoch": 0.25205047668727315, "grad_norm": 1.1704946756362915, "learning_rate": 1.702530873329824e-05, "loss": 0.6685, "step": 11355 }, { "epoch": 0.25216146324680083, "grad_norm": 1.036297082901001, "learning_rate": 1.702282692485494e-05, "loss": 0.3092, "step": 11360 }, { "epoch": 0.25227244980632846, "grad_norm": 1.4215106964111328, "learning_rate": 1.702034426259989e-05, "loss": 0.3612, "step": 11365 }, { "epoch": 0.2523834363658561, "grad_norm": 1.420967698097229, "learning_rate": 1.7017860746834932e-05, "loss": 0.5245, "step": 11370 }, { "epoch": 0.2524944229253837, "grad_norm": 1.196954607963562, "learning_rate": 1.7015376377861998e-05, "loss": 0.5223, "step": 11375 }, { "epoch": 0.2526054094849114, "grad_norm": 1.2215323448181152, "learning_rate": 1.7012891155983133e-05, "loss": 0.3851, "step": 11380 }, { "epoch": 0.252716396044439, "grad_norm": 1.6690142154693604, "learning_rate": 1.701040508150048e-05, "loss": 0.5762, "step": 11385 }, { "epoch": 0.25282738260396664, "grad_norm": 1.261279582977295, "learning_rate": 1.7007918154716286e-05, "loss": 0.505, "step": 11390 }, { "epoch": 0.2529383691634943, "grad_norm": 1.0459377765655518, "learning_rate": 1.700543037593291e-05, "loss": 0.4814, "step": 11395 }, { "epoch": 0.25304935572302195, "grad_norm": 1.0348485708236694, "learning_rate": 1.7002941745452804e-05, "loss": 0.345, "step": 11400 }, { "epoch": 0.2531603422825496, "grad_norm": 1.0944418907165527, "learning_rate": 1.7000452263578523e-05, "loss": 0.4802, "step": 11405 }, { "epoch": 0.2532713288420772, "grad_norm": 1.30760657787323, "learning_rate": 1.6997961930612733e-05, "loss": 0.439, "step": 11410 }, { "epoch": 0.2533823154016049, "grad_norm": 1.5078095197677612, "learning_rate": 1.6995470746858204e-05, "loss": 0.6, "step": 11415 }, { "epoch": 0.2534933019611325, "grad_norm": 1.358195185661316, "learning_rate": 1.6992978712617802e-05, "loss": 0.6164, "step": 11420 }, { "epoch": 0.25360428852066014, "grad_norm": 0.7815698981285095, "learning_rate": 1.69904858281945e-05, "loss": 0.464, "step": 11425 }, { "epoch": 0.25371527508018776, "grad_norm": 0.9305065274238586, "learning_rate": 1.6987992093891375e-05, "loss": 0.4365, "step": 11430 }, { "epoch": 0.25382626163971544, "grad_norm": 0.8990150094032288, "learning_rate": 1.6985497510011606e-05, "loss": 0.4459, "step": 11435 }, { "epoch": 0.25393724819924307, "grad_norm": 1.0652459859848022, "learning_rate": 1.698300207685848e-05, "loss": 0.3107, "step": 11440 }, { "epoch": 0.2540482347587707, "grad_norm": 0.9058518409729004, "learning_rate": 1.698050579473538e-05, "loss": 0.4161, "step": 11445 }, { "epoch": 0.2541592213182984, "grad_norm": 1.298906922340393, "learning_rate": 1.6978008663945794e-05, "loss": 0.5682, "step": 11450 }, { "epoch": 0.254270207877826, "grad_norm": 1.582791805267334, "learning_rate": 1.6975510684793318e-05, "loss": 0.6219, "step": 11455 }, { "epoch": 0.25438119443735363, "grad_norm": 0.9926007986068726, "learning_rate": 1.697301185758165e-05, "loss": 0.4707, "step": 11460 }, { "epoch": 0.25449218099688126, "grad_norm": 1.685634732246399, "learning_rate": 1.697051218261458e-05, "loss": 0.4601, "step": 11465 }, { "epoch": 0.25460316755640894, "grad_norm": 1.1291474103927612, "learning_rate": 1.696801166019602e-05, "loss": 0.5155, "step": 11470 }, { "epoch": 0.25471415411593656, "grad_norm": 0.8793083429336548, "learning_rate": 1.6965510290629973e-05, "loss": 0.3604, "step": 11475 }, { "epoch": 0.2548251406754642, "grad_norm": 1.654395341873169, "learning_rate": 1.6963008074220542e-05, "loss": 0.5149, "step": 11480 }, { "epoch": 0.2549361272349918, "grad_norm": 1.1508978605270386, "learning_rate": 1.696050501127194e-05, "loss": 0.4138, "step": 11485 }, { "epoch": 0.2550471137945195, "grad_norm": 1.3007711172103882, "learning_rate": 1.6958001102088485e-05, "loss": 0.5267, "step": 11490 }, { "epoch": 0.2551581003540471, "grad_norm": 1.4527742862701416, "learning_rate": 1.6955496346974595e-05, "loss": 0.2901, "step": 11495 }, { "epoch": 0.25526908691357475, "grad_norm": 0.9203034043312073, "learning_rate": 1.695299074623478e-05, "loss": 0.4912, "step": 11500 }, { "epoch": 0.25538007347310243, "grad_norm": 1.4323278665542603, "learning_rate": 1.6950484300173676e-05, "loss": 0.4698, "step": 11505 }, { "epoch": 0.25549106003263006, "grad_norm": 1.0388230085372925, "learning_rate": 1.6947977009095994e-05, "loss": 0.458, "step": 11510 }, { "epoch": 0.2556020465921577, "grad_norm": 1.0585956573486328, "learning_rate": 1.694546887330657e-05, "loss": 0.575, "step": 11515 }, { "epoch": 0.2557130331516853, "grad_norm": 1.583940863609314, "learning_rate": 1.6942959893110335e-05, "loss": 0.4435, "step": 11520 }, { "epoch": 0.255824019711213, "grad_norm": 1.193070650100708, "learning_rate": 1.694045006881232e-05, "loss": 0.4773, "step": 11525 }, { "epoch": 0.2559350062707406, "grad_norm": 1.2020277976989746, "learning_rate": 1.6937939400717663e-05, "loss": 0.377, "step": 11530 }, { "epoch": 0.25604599283026824, "grad_norm": 1.052960753440857, "learning_rate": 1.69354278891316e-05, "loss": 0.7132, "step": 11535 }, { "epoch": 0.25615697938979587, "grad_norm": 1.1283267736434937, "learning_rate": 1.693291553435948e-05, "loss": 0.4637, "step": 11540 }, { "epoch": 0.25626796594932355, "grad_norm": 1.459197998046875, "learning_rate": 1.6930402336706735e-05, "loss": 0.4351, "step": 11545 }, { "epoch": 0.2563789525088512, "grad_norm": 1.013622760772705, "learning_rate": 1.6927888296478918e-05, "loss": 0.486, "step": 11550 }, { "epoch": 0.2564899390683788, "grad_norm": 0.8087404370307922, "learning_rate": 1.6925373413981673e-05, "loss": 0.4736, "step": 11555 }, { "epoch": 0.2566009256279065, "grad_norm": 0.8877795338630676, "learning_rate": 1.692285768952076e-05, "loss": 0.4087, "step": 11560 }, { "epoch": 0.2567119121874341, "grad_norm": 0.8639494776725769, "learning_rate": 1.692034112340202e-05, "loss": 0.4806, "step": 11565 }, { "epoch": 0.25682289874696174, "grad_norm": 0.7945966720581055, "learning_rate": 1.691782371593142e-05, "loss": 0.5977, "step": 11570 }, { "epoch": 0.25693388530648936, "grad_norm": 1.2927353382110596, "learning_rate": 1.6915305467415014e-05, "loss": 0.5544, "step": 11575 }, { "epoch": 0.25704487186601704, "grad_norm": 0.9172951579093933, "learning_rate": 1.6912786378158957e-05, "loss": 0.6539, "step": 11580 }, { "epoch": 0.25715585842554467, "grad_norm": 1.2856504917144775, "learning_rate": 1.691026644846952e-05, "loss": 0.4804, "step": 11585 }, { "epoch": 0.2572668449850723, "grad_norm": 1.0011804103851318, "learning_rate": 1.6907745678653064e-05, "loss": 0.4702, "step": 11590 }, { "epoch": 0.2573778315445999, "grad_norm": 1.5778934955596924, "learning_rate": 1.690522406901605e-05, "loss": 0.4763, "step": 11595 }, { "epoch": 0.2574888181041276, "grad_norm": 1.3447763919830322, "learning_rate": 1.6902701619865056e-05, "loss": 0.547, "step": 11600 }, { "epoch": 0.25759980466365523, "grad_norm": 1.4077445268630981, "learning_rate": 1.690017833150675e-05, "loss": 0.4881, "step": 11605 }, { "epoch": 0.25771079122318286, "grad_norm": 0.993047297000885, "learning_rate": 1.6897654204247897e-05, "loss": 0.6155, "step": 11610 }, { "epoch": 0.25782177778271054, "grad_norm": 0.7152522802352905, "learning_rate": 1.6895129238395386e-05, "loss": 0.3392, "step": 11615 }, { "epoch": 0.25793276434223816, "grad_norm": 1.6415187120437622, "learning_rate": 1.6892603434256184e-05, "loss": 0.4007, "step": 11620 }, { "epoch": 0.2580437509017658, "grad_norm": 1.0869807004928589, "learning_rate": 1.6890076792137373e-05, "loss": 0.55, "step": 11625 }, { "epoch": 0.2581547374612934, "grad_norm": 1.0225802659988403, "learning_rate": 1.688754931234613e-05, "loss": 0.3417, "step": 11630 }, { "epoch": 0.2582657240208211, "grad_norm": 1.0578125715255737, "learning_rate": 1.6885020995189743e-05, "loss": 0.4709, "step": 11635 }, { "epoch": 0.2583767105803487, "grad_norm": 1.2576895952224731, "learning_rate": 1.6882491840975593e-05, "loss": 0.504, "step": 11640 }, { "epoch": 0.25848769713987635, "grad_norm": 1.2828280925750732, "learning_rate": 1.6879961850011174e-05, "loss": 0.4219, "step": 11645 }, { "epoch": 0.258598683699404, "grad_norm": 1.12638521194458, "learning_rate": 1.6877431022604057e-05, "loss": 0.556, "step": 11650 }, { "epoch": 0.25870967025893166, "grad_norm": 1.4913954734802246, "learning_rate": 1.6874899359061946e-05, "loss": 0.6028, "step": 11655 }, { "epoch": 0.2588206568184593, "grad_norm": 1.3855817317962646, "learning_rate": 1.687236685969263e-05, "loss": 0.5162, "step": 11660 }, { "epoch": 0.2589316433779869, "grad_norm": 1.228534460067749, "learning_rate": 1.6869833524803995e-05, "loss": 0.6225, "step": 11665 }, { "epoch": 0.2590426299375146, "grad_norm": 1.5325289964675903, "learning_rate": 1.686729935470404e-05, "loss": 0.5332, "step": 11670 }, { "epoch": 0.2591536164970422, "grad_norm": 1.2924435138702393, "learning_rate": 1.6864764349700866e-05, "loss": 0.4786, "step": 11675 }, { "epoch": 0.25926460305656984, "grad_norm": 1.0833882093429565, "learning_rate": 1.6862228510102657e-05, "loss": 0.5727, "step": 11680 }, { "epoch": 0.25937558961609747, "grad_norm": 1.1746231317520142, "learning_rate": 1.6859691836217725e-05, "loss": 0.2722, "step": 11685 }, { "epoch": 0.25948657617562515, "grad_norm": 5.168801784515381, "learning_rate": 1.6857154328354463e-05, "loss": 0.6013, "step": 11690 }, { "epoch": 0.2595975627351528, "grad_norm": 1.4809683561325073, "learning_rate": 1.6854615986821377e-05, "loss": 0.5135, "step": 11695 }, { "epoch": 0.2597085492946804, "grad_norm": 1.0130324363708496, "learning_rate": 1.6852076811927066e-05, "loss": 0.3937, "step": 11700 }, { "epoch": 0.25981953585420803, "grad_norm": 1.0422590970993042, "learning_rate": 1.6849536803980238e-05, "loss": 0.4183, "step": 11705 }, { "epoch": 0.2599305224137357, "grad_norm": 1.2698982954025269, "learning_rate": 1.6846995963289696e-05, "loss": 0.4465, "step": 11710 }, { "epoch": 0.26004150897326334, "grad_norm": 1.3013725280761719, "learning_rate": 1.684445429016435e-05, "loss": 0.5689, "step": 11715 }, { "epoch": 0.26015249553279096, "grad_norm": 1.5303215980529785, "learning_rate": 1.68419117849132e-05, "loss": 0.4574, "step": 11720 }, { "epoch": 0.26026348209231864, "grad_norm": 1.2155689001083374, "learning_rate": 1.6839368447845366e-05, "loss": 0.3824, "step": 11725 }, { "epoch": 0.26037446865184627, "grad_norm": 1.172390341758728, "learning_rate": 1.6836824279270053e-05, "loss": 0.379, "step": 11730 }, { "epoch": 0.2604854552113739, "grad_norm": 1.3164101839065552, "learning_rate": 1.683427927949657e-05, "loss": 0.4448, "step": 11735 }, { "epoch": 0.2605964417709015, "grad_norm": 1.1558310985565186, "learning_rate": 1.6831733448834336e-05, "loss": 0.521, "step": 11740 }, { "epoch": 0.2607074283304292, "grad_norm": 1.3138155937194824, "learning_rate": 1.682918678759286e-05, "loss": 0.5066, "step": 11745 }, { "epoch": 0.26081841488995683, "grad_norm": 1.0508272647857666, "learning_rate": 1.682663929608176e-05, "loss": 0.3492, "step": 11750 }, { "epoch": 0.26092940144948445, "grad_norm": 0.9779253005981445, "learning_rate": 1.6824090974610742e-05, "loss": 0.3875, "step": 11755 }, { "epoch": 0.2610403880090121, "grad_norm": 1.0405664443969727, "learning_rate": 1.6821541823489636e-05, "loss": 0.4794, "step": 11760 }, { "epoch": 0.26115137456853976, "grad_norm": 1.8929942846298218, "learning_rate": 1.6818991843028353e-05, "loss": 0.5157, "step": 11765 }, { "epoch": 0.2612623611280674, "grad_norm": 1.7605929374694824, "learning_rate": 1.681644103353691e-05, "loss": 0.3625, "step": 11770 }, { "epoch": 0.261373347687595, "grad_norm": 1.064549207687378, "learning_rate": 1.6813889395325423e-05, "loss": 0.4564, "step": 11775 }, { "epoch": 0.2614843342471227, "grad_norm": 0.8532626032829285, "learning_rate": 1.681133692870412e-05, "loss": 0.4323, "step": 11780 }, { "epoch": 0.2615953208066503, "grad_norm": 1.594598650932312, "learning_rate": 1.6808783633983315e-05, "loss": 0.4004, "step": 11785 }, { "epoch": 0.26170630736617795, "grad_norm": 1.5274829864501953, "learning_rate": 1.680622951147343e-05, "loss": 0.3958, "step": 11790 }, { "epoch": 0.2618172939257056, "grad_norm": 1.0514847040176392, "learning_rate": 1.6803674561484987e-05, "loss": 0.4269, "step": 11795 }, { "epoch": 0.26192828048523326, "grad_norm": 1.3296819925308228, "learning_rate": 1.680111878432861e-05, "loss": 0.6155, "step": 11800 }, { "epoch": 0.2620392670447609, "grad_norm": 0.8670737147331238, "learning_rate": 1.679856218031502e-05, "loss": 0.4943, "step": 11805 }, { "epoch": 0.2621502536042885, "grad_norm": 0.9139857888221741, "learning_rate": 1.6796004749755043e-05, "loss": 0.4629, "step": 11810 }, { "epoch": 0.2622612401638162, "grad_norm": 1.5310050249099731, "learning_rate": 1.6793446492959596e-05, "loss": 0.3929, "step": 11815 }, { "epoch": 0.2623722267233438, "grad_norm": 1.3357696533203125, "learning_rate": 1.679088741023971e-05, "loss": 0.4999, "step": 11820 }, { "epoch": 0.26248321328287144, "grad_norm": 1.1147435903549194, "learning_rate": 1.6788327501906507e-05, "loss": 0.6116, "step": 11825 }, { "epoch": 0.26259419984239907, "grad_norm": 1.315943717956543, "learning_rate": 1.678576676827121e-05, "loss": 0.3868, "step": 11830 }, { "epoch": 0.26270518640192675, "grad_norm": 0.9401881098747253, "learning_rate": 1.678320520964515e-05, "loss": 0.5025, "step": 11835 }, { "epoch": 0.2628161729614544, "grad_norm": 1.2512296438217163, "learning_rate": 1.678064282633975e-05, "loss": 0.4544, "step": 11840 }, { "epoch": 0.262927159520982, "grad_norm": 0.9268614649772644, "learning_rate": 1.6778079618666536e-05, "loss": 0.4752, "step": 11845 }, { "epoch": 0.2630381460805096, "grad_norm": 1.4356162548065186, "learning_rate": 1.6775515586937135e-05, "loss": 0.6672, "step": 11850 }, { "epoch": 0.2631491326400373, "grad_norm": 1.5182090997695923, "learning_rate": 1.677295073146327e-05, "loss": 0.6735, "step": 11855 }, { "epoch": 0.26326011919956493, "grad_norm": 1.3008809089660645, "learning_rate": 1.677038505255677e-05, "loss": 0.5279, "step": 11860 }, { "epoch": 0.26337110575909256, "grad_norm": 0.9265232086181641, "learning_rate": 1.6767818550529564e-05, "loss": 0.3631, "step": 11865 }, { "epoch": 0.26348209231862024, "grad_norm": 1.1864442825317383, "learning_rate": 1.676525122569367e-05, "loss": 0.4952, "step": 11870 }, { "epoch": 0.26359307887814787, "grad_norm": 1.2192386388778687, "learning_rate": 1.676268307836123e-05, "loss": 0.5234, "step": 11875 }, { "epoch": 0.2637040654376755, "grad_norm": 0.9491934180259705, "learning_rate": 1.6760114108844453e-05, "loss": 0.5122, "step": 11880 }, { "epoch": 0.2638150519972031, "grad_norm": 1.0653088092803955, "learning_rate": 1.6757544317455677e-05, "loss": 0.5234, "step": 11885 }, { "epoch": 0.2639260385567308, "grad_norm": 0.9259722828865051, "learning_rate": 1.6754973704507325e-05, "loss": 0.5128, "step": 11890 }, { "epoch": 0.26403702511625843, "grad_norm": 1.3802366256713867, "learning_rate": 1.6752402270311928e-05, "loss": 0.4486, "step": 11895 }, { "epoch": 0.26414801167578605, "grad_norm": 1.1017757654190063, "learning_rate": 1.6749830015182106e-05, "loss": 0.5004, "step": 11900 }, { "epoch": 0.2642589982353137, "grad_norm": 1.4015121459960938, "learning_rate": 1.674725693943059e-05, "loss": 0.3029, "step": 11905 }, { "epoch": 0.26436998479484136, "grad_norm": 1.651033878326416, "learning_rate": 1.6744683043370204e-05, "loss": 0.514, "step": 11910 }, { "epoch": 0.264480971354369, "grad_norm": 1.0948195457458496, "learning_rate": 1.6742108327313872e-05, "loss": 0.6031, "step": 11915 }, { "epoch": 0.2645919579138966, "grad_norm": 0.9787044525146484, "learning_rate": 1.673953279157462e-05, "loss": 0.5408, "step": 11920 }, { "epoch": 0.2647029444734243, "grad_norm": 1.5225952863693237, "learning_rate": 1.6736956436465573e-05, "loss": 0.5068, "step": 11925 }, { "epoch": 0.2648139310329519, "grad_norm": 0.8884637951850891, "learning_rate": 1.6734379262299957e-05, "loss": 0.4416, "step": 11930 }, { "epoch": 0.26492491759247955, "grad_norm": 1.1771692037582397, "learning_rate": 1.6731801269391098e-05, "loss": 0.518, "step": 11935 }, { "epoch": 0.2650359041520072, "grad_norm": 0.838868260383606, "learning_rate": 1.672922245805242e-05, "loss": 0.5394, "step": 11940 }, { "epoch": 0.26514689071153486, "grad_norm": 1.4811780452728271, "learning_rate": 1.6726642828597436e-05, "loss": 0.5489, "step": 11945 }, { "epoch": 0.2652578772710625, "grad_norm": 0.8113346099853516, "learning_rate": 1.672406238133978e-05, "loss": 0.4482, "step": 11950 }, { "epoch": 0.2653688638305901, "grad_norm": 1.0179511308670044, "learning_rate": 1.672148111659317e-05, "loss": 0.382, "step": 11955 }, { "epoch": 0.26547985039011773, "grad_norm": 1.4388052225112915, "learning_rate": 1.671889903467143e-05, "loss": 0.3759, "step": 11960 }, { "epoch": 0.2655908369496454, "grad_norm": 1.3310468196868896, "learning_rate": 1.6716316135888478e-05, "loss": 0.3242, "step": 11965 }, { "epoch": 0.26570182350917304, "grad_norm": 1.3747960329055786, "learning_rate": 1.6713732420558333e-05, "loss": 0.4654, "step": 11970 }, { "epoch": 0.26581281006870067, "grad_norm": 1.1084215641021729, "learning_rate": 1.6711147888995117e-05, "loss": 0.4765, "step": 11975 }, { "epoch": 0.26592379662822835, "grad_norm": 1.0055367946624756, "learning_rate": 1.670856254151305e-05, "loss": 0.4503, "step": 11980 }, { "epoch": 0.266034783187756, "grad_norm": 1.1023660898208618, "learning_rate": 1.6705976378426447e-05, "loss": 0.4315, "step": 11985 }, { "epoch": 0.2661457697472836, "grad_norm": 1.4151127338409424, "learning_rate": 1.6703389400049724e-05, "loss": 0.4301, "step": 11990 }, { "epoch": 0.2662567563068112, "grad_norm": 1.0657737255096436, "learning_rate": 1.67008016066974e-05, "loss": 0.4067, "step": 11995 }, { "epoch": 0.2663677428663389, "grad_norm": 1.3572338819503784, "learning_rate": 1.669821299868409e-05, "loss": 0.504, "step": 12000 }, { "epoch": 0.26647872942586653, "grad_norm": 1.3516322374343872, "learning_rate": 1.669562357632451e-05, "loss": 0.3876, "step": 12005 }, { "epoch": 0.26658971598539416, "grad_norm": 1.1288280487060547, "learning_rate": 1.669303333993347e-05, "loss": 0.5649, "step": 12010 }, { "epoch": 0.2667007025449218, "grad_norm": 1.294328212738037, "learning_rate": 1.6690442289825882e-05, "loss": 0.6067, "step": 12015 }, { "epoch": 0.26681168910444947, "grad_norm": 0.8820907473564148, "learning_rate": 1.6687850426316758e-05, "loss": 0.4497, "step": 12020 }, { "epoch": 0.2669226756639771, "grad_norm": 0.8859009742736816, "learning_rate": 1.668525774972121e-05, "loss": 0.4719, "step": 12025 }, { "epoch": 0.2670336622235047, "grad_norm": 1.128180742263794, "learning_rate": 1.6682664260354445e-05, "loss": 0.5357, "step": 12030 }, { "epoch": 0.2671446487830324, "grad_norm": 1.1565715074539185, "learning_rate": 1.6680069958531772e-05, "loss": 0.4663, "step": 12035 }, { "epoch": 0.26725563534256, "grad_norm": 1.0250076055526733, "learning_rate": 1.6677474844568593e-05, "loss": 0.5358, "step": 12040 }, { "epoch": 0.26736662190208765, "grad_norm": 1.1731986999511719, "learning_rate": 1.667487891878042e-05, "loss": 0.4901, "step": 12045 }, { "epoch": 0.2674776084616153, "grad_norm": 1.184151291847229, "learning_rate": 1.667228218148285e-05, "loss": 0.3815, "step": 12050 }, { "epoch": 0.26758859502114296, "grad_norm": 1.1311311721801758, "learning_rate": 1.6669684632991594e-05, "loss": 0.5596, "step": 12055 }, { "epoch": 0.2676995815806706, "grad_norm": 1.0905286073684692, "learning_rate": 1.6667086273622447e-05, "loss": 0.4146, "step": 12060 }, { "epoch": 0.2678105681401982, "grad_norm": 1.4904059171676636, "learning_rate": 1.666448710369131e-05, "loss": 0.6837, "step": 12065 }, { "epoch": 0.26792155469972584, "grad_norm": 1.134896159172058, "learning_rate": 1.6661887123514183e-05, "loss": 0.3944, "step": 12070 }, { "epoch": 0.2680325412592535, "grad_norm": 1.0507289171218872, "learning_rate": 1.665928633340716e-05, "loss": 0.3522, "step": 12075 }, { "epoch": 0.26814352781878115, "grad_norm": 1.3701528310775757, "learning_rate": 1.6656684733686443e-05, "loss": 0.4823, "step": 12080 }, { "epoch": 0.2682545143783088, "grad_norm": 1.105454683303833, "learning_rate": 1.6654082324668316e-05, "loss": 0.4494, "step": 12085 }, { "epoch": 0.26836550093783645, "grad_norm": 1.7847298383712769, "learning_rate": 1.6651479106669177e-05, "loss": 0.3987, "step": 12090 }, { "epoch": 0.2684764874973641, "grad_norm": 1.2652286291122437, "learning_rate": 1.6648875080005515e-05, "loss": 0.5751, "step": 12095 }, { "epoch": 0.2685874740568917, "grad_norm": 1.2714149951934814, "learning_rate": 1.664627024499392e-05, "loss": 0.5911, "step": 12100 }, { "epoch": 0.26869846061641933, "grad_norm": 1.3231234550476074, "learning_rate": 1.664366460195108e-05, "loss": 0.5036, "step": 12105 }, { "epoch": 0.268809447175947, "grad_norm": 1.1678462028503418, "learning_rate": 1.6641058151193776e-05, "loss": 0.5526, "step": 12110 }, { "epoch": 0.26892043373547464, "grad_norm": 1.274032711982727, "learning_rate": 1.6638450893038895e-05, "loss": 0.5519, "step": 12115 }, { "epoch": 0.26903142029500227, "grad_norm": 1.2381004095077515, "learning_rate": 1.663584282780342e-05, "loss": 0.4028, "step": 12120 }, { "epoch": 0.2691424068545299, "grad_norm": 1.223659873008728, "learning_rate": 1.6633233955804428e-05, "loss": 0.4646, "step": 12125 }, { "epoch": 0.2692533934140576, "grad_norm": 1.3925018310546875, "learning_rate": 1.66306242773591e-05, "loss": 0.3454, "step": 12130 }, { "epoch": 0.2693643799735852, "grad_norm": 1.2066676616668701, "learning_rate": 1.6628013792784705e-05, "loss": 0.5239, "step": 12135 }, { "epoch": 0.2694753665331128, "grad_norm": 1.4494436979293823, "learning_rate": 1.6625402502398623e-05, "loss": 0.4883, "step": 12140 }, { "epoch": 0.2695863530926405, "grad_norm": 1.3394190073013306, "learning_rate": 1.6622790406518327e-05, "loss": 0.508, "step": 12145 }, { "epoch": 0.26969733965216813, "grad_norm": 0.7215554714202881, "learning_rate": 1.6620177505461383e-05, "loss": 0.4276, "step": 12150 }, { "epoch": 0.26980832621169576, "grad_norm": 1.1991218328475952, "learning_rate": 1.6617563799545462e-05, "loss": 0.3537, "step": 12155 }, { "epoch": 0.2699193127712234, "grad_norm": 0.8841428756713867, "learning_rate": 1.6614949289088323e-05, "loss": 0.3806, "step": 12160 }, { "epoch": 0.27003029933075107, "grad_norm": 1.0124881267547607, "learning_rate": 1.661233397440784e-05, "loss": 0.6561, "step": 12165 }, { "epoch": 0.2701412858902787, "grad_norm": 1.4342435598373413, "learning_rate": 1.6609717855821965e-05, "loss": 0.3026, "step": 12170 }, { "epoch": 0.2702522724498063, "grad_norm": 1.2713871002197266, "learning_rate": 1.6607100933648763e-05, "loss": 0.489, "step": 12175 }, { "epoch": 0.27036325900933394, "grad_norm": 1.5005031824111938, "learning_rate": 1.6604483208206387e-05, "loss": 0.4706, "step": 12180 }, { "epoch": 0.2704742455688616, "grad_norm": 1.0889896154403687, "learning_rate": 1.6601864679813088e-05, "loss": 0.4989, "step": 12185 }, { "epoch": 0.27058523212838925, "grad_norm": 1.6298508644104004, "learning_rate": 1.659924534878723e-05, "loss": 0.4919, "step": 12190 }, { "epoch": 0.2706962186879169, "grad_norm": 1.2517374753952026, "learning_rate": 1.659662521544725e-05, "loss": 0.4966, "step": 12195 }, { "epoch": 0.27080720524744456, "grad_norm": 1.2407162189483643, "learning_rate": 1.6594004280111697e-05, "loss": 0.6297, "step": 12200 }, { "epoch": 0.2709181918069722, "grad_norm": 1.2246013879776, "learning_rate": 1.6591382543099222e-05, "loss": 0.5794, "step": 12205 }, { "epoch": 0.2710291783664998, "grad_norm": 1.1958905458450317, "learning_rate": 1.6588760004728565e-05, "loss": 0.4974, "step": 12210 }, { "epoch": 0.27114016492602744, "grad_norm": 1.1367417573928833, "learning_rate": 1.658613666531856e-05, "loss": 0.4294, "step": 12215 }, { "epoch": 0.2712511514855551, "grad_norm": 0.8926473259925842, "learning_rate": 1.6583512525188146e-05, "loss": 0.4302, "step": 12220 }, { "epoch": 0.27136213804508275, "grad_norm": 1.131288766860962, "learning_rate": 1.658088758465636e-05, "loss": 0.528, "step": 12225 }, { "epoch": 0.27147312460461037, "grad_norm": 1.3164432048797607, "learning_rate": 1.6578261844042335e-05, "loss": 0.6973, "step": 12230 }, { "epoch": 0.271584111164138, "grad_norm": 1.7050387859344482, "learning_rate": 1.6575635303665296e-05, "loss": 0.4737, "step": 12235 }, { "epoch": 0.2716950977236657, "grad_norm": 1.5529253482818604, "learning_rate": 1.657300796384457e-05, "loss": 0.5328, "step": 12240 }, { "epoch": 0.2718060842831933, "grad_norm": 0.7697571516036987, "learning_rate": 1.6570379824899576e-05, "loss": 0.6543, "step": 12245 }, { "epoch": 0.27191707084272093, "grad_norm": 0.7909753322601318, "learning_rate": 1.656775088714984e-05, "loss": 0.4149, "step": 12250 }, { "epoch": 0.2720280574022486, "grad_norm": 1.0096886157989502, "learning_rate": 1.656512115091498e-05, "loss": 0.4909, "step": 12255 }, { "epoch": 0.27213904396177624, "grad_norm": 0.891508162021637, "learning_rate": 1.6562490616514705e-05, "loss": 0.3828, "step": 12260 }, { "epoch": 0.27225003052130387, "grad_norm": 2.050764560699463, "learning_rate": 1.6559859284268833e-05, "loss": 0.5661, "step": 12265 }, { "epoch": 0.2723610170808315, "grad_norm": 1.347254991531372, "learning_rate": 1.6557227154497266e-05, "loss": 0.5759, "step": 12270 }, { "epoch": 0.2724720036403592, "grad_norm": 0.8337706327438354, "learning_rate": 1.6554594227520015e-05, "loss": 0.5092, "step": 12275 }, { "epoch": 0.2725829901998868, "grad_norm": 1.2662127017974854, "learning_rate": 1.6551960503657182e-05, "loss": 0.4536, "step": 12280 }, { "epoch": 0.2726939767594144, "grad_norm": 1.0730518102645874, "learning_rate": 1.654932598322896e-05, "loss": 0.4121, "step": 12285 }, { "epoch": 0.27280496331894205, "grad_norm": 1.139347791671753, "learning_rate": 1.6546690666555652e-05, "loss": 0.4884, "step": 12290 }, { "epoch": 0.27291594987846973, "grad_norm": 1.3560361862182617, "learning_rate": 1.654405455395765e-05, "loss": 0.4048, "step": 12295 }, { "epoch": 0.27302693643799736, "grad_norm": 0.7781074643135071, "learning_rate": 1.654141764575544e-05, "loss": 0.3949, "step": 12300 }, { "epoch": 0.273137922997525, "grad_norm": 0.5743057131767273, "learning_rate": 1.6538779942269613e-05, "loss": 0.3579, "step": 12305 }, { "epoch": 0.27324890955705267, "grad_norm": 1.3679219484329224, "learning_rate": 1.6536141443820844e-05, "loss": 0.557, "step": 12310 }, { "epoch": 0.2733598961165803, "grad_norm": 1.3961282968521118, "learning_rate": 1.6533502150729925e-05, "loss": 0.4722, "step": 12315 }, { "epoch": 0.2734708826761079, "grad_norm": 1.3438667058944702, "learning_rate": 1.6530862063317726e-05, "loss": 0.465, "step": 12320 }, { "epoch": 0.27358186923563554, "grad_norm": 1.1423835754394531, "learning_rate": 1.6528221181905217e-05, "loss": 0.5002, "step": 12325 }, { "epoch": 0.2736928557951632, "grad_norm": 0.8907164335250854, "learning_rate": 1.6525579506813472e-05, "loss": 0.3263, "step": 12330 }, { "epoch": 0.27380384235469085, "grad_norm": 1.622192144393921, "learning_rate": 1.652293703836366e-05, "loss": 0.5461, "step": 12335 }, { "epoch": 0.2739148289142185, "grad_norm": 0.7274703979492188, "learning_rate": 1.6520293776877033e-05, "loss": 0.2692, "step": 12340 }, { "epoch": 0.2740258154737461, "grad_norm": 1.012320876121521, "learning_rate": 1.6517649722674958e-05, "loss": 0.4936, "step": 12345 }, { "epoch": 0.2741368020332738, "grad_norm": 1.2902557849884033, "learning_rate": 1.6515004876078887e-05, "loss": 0.5255, "step": 12350 }, { "epoch": 0.2742477885928014, "grad_norm": 1.0321226119995117, "learning_rate": 1.6512359237410375e-05, "loss": 0.37, "step": 12355 }, { "epoch": 0.27435877515232904, "grad_norm": 1.5266025066375732, "learning_rate": 1.650971280699107e-05, "loss": 0.4362, "step": 12360 }, { "epoch": 0.2744697617118567, "grad_norm": 1.1383706331253052, "learning_rate": 1.6507065585142707e-05, "loss": 0.3219, "step": 12365 }, { "epoch": 0.27458074827138435, "grad_norm": 1.4877493381500244, "learning_rate": 1.6504417572187138e-05, "loss": 0.6639, "step": 12370 }, { "epoch": 0.27469173483091197, "grad_norm": 1.5238726139068604, "learning_rate": 1.6501768768446292e-05, "loss": 0.5665, "step": 12375 }, { "epoch": 0.2748027213904396, "grad_norm": 0.9921168684959412, "learning_rate": 1.6499119174242207e-05, "loss": 0.4326, "step": 12380 }, { "epoch": 0.2749137079499673, "grad_norm": 1.0095458030700684, "learning_rate": 1.649646878989701e-05, "loss": 0.3943, "step": 12385 }, { "epoch": 0.2750246945094949, "grad_norm": 1.2044517993927002, "learning_rate": 1.649381761573292e-05, "loss": 0.4818, "step": 12390 }, { "epoch": 0.27513568106902253, "grad_norm": 0.9758161306381226, "learning_rate": 1.6491165652072268e-05, "loss": 0.4202, "step": 12395 }, { "epoch": 0.27524666762855016, "grad_norm": 1.5961153507232666, "learning_rate": 1.648851289923746e-05, "loss": 0.4546, "step": 12400 }, { "epoch": 0.27535765418807784, "grad_norm": 1.5342501401901245, "learning_rate": 1.648585935755102e-05, "loss": 0.5894, "step": 12405 }, { "epoch": 0.27546864074760546, "grad_norm": 1.1912126541137695, "learning_rate": 1.648320502733555e-05, "loss": 0.4693, "step": 12410 }, { "epoch": 0.2755796273071331, "grad_norm": 1.8135324716567993, "learning_rate": 1.6480549908913756e-05, "loss": 0.3781, "step": 12415 }, { "epoch": 0.27569061386666077, "grad_norm": 1.4575508832931519, "learning_rate": 1.6477894002608435e-05, "loss": 0.331, "step": 12420 }, { "epoch": 0.2758016004261884, "grad_norm": 1.269692063331604, "learning_rate": 1.647523730874249e-05, "loss": 0.4155, "step": 12425 }, { "epoch": 0.275912586985716, "grad_norm": 1.1125141382217407, "learning_rate": 1.6472579827638906e-05, "loss": 0.4862, "step": 12430 }, { "epoch": 0.27602357354524365, "grad_norm": 1.6777321100234985, "learning_rate": 1.6469921559620777e-05, "loss": 0.3672, "step": 12435 }, { "epoch": 0.27613456010477133, "grad_norm": 1.0261355638504028, "learning_rate": 1.6467262505011282e-05, "loss": 0.3913, "step": 12440 }, { "epoch": 0.27624554666429896, "grad_norm": 1.3293694257736206, "learning_rate": 1.64646026641337e-05, "loss": 0.5825, "step": 12445 }, { "epoch": 0.2763565332238266, "grad_norm": 1.1149784326553345, "learning_rate": 1.6461942037311406e-05, "loss": 0.348, "step": 12450 }, { "epoch": 0.2764675197833542, "grad_norm": 0.995347261428833, "learning_rate": 1.6459280624867876e-05, "loss": 0.421, "step": 12455 }, { "epoch": 0.2765785063428819, "grad_norm": 0.7128069400787354, "learning_rate": 1.6456618427126664e-05, "loss": 0.4621, "step": 12460 }, { "epoch": 0.2766894929024095, "grad_norm": 1.2790063619613647, "learning_rate": 1.645395544441144e-05, "loss": 0.554, "step": 12465 }, { "epoch": 0.27680047946193714, "grad_norm": 1.239193320274353, "learning_rate": 1.645129167704596e-05, "loss": 0.3666, "step": 12470 }, { "epoch": 0.2769114660214648, "grad_norm": 1.1735997200012207, "learning_rate": 1.644862712535407e-05, "loss": 0.6032, "step": 12475 }, { "epoch": 0.27702245258099245, "grad_norm": 1.070043683052063, "learning_rate": 1.6445961789659724e-05, "loss": 0.5279, "step": 12480 }, { "epoch": 0.2771334391405201, "grad_norm": 2.237879991531372, "learning_rate": 1.644329567028696e-05, "loss": 0.5118, "step": 12485 }, { "epoch": 0.2772444257000477, "grad_norm": 0.990967333316803, "learning_rate": 1.644062876755992e-05, "loss": 0.5334, "step": 12490 }, { "epoch": 0.2773554122595754, "grad_norm": 1.605442762374878, "learning_rate": 1.6437961081802835e-05, "loss": 0.5114, "step": 12495 }, { "epoch": 0.277466398819103, "grad_norm": 1.337741494178772, "learning_rate": 1.643529261334003e-05, "loss": 0.6015, "step": 12500 }, { "epoch": 0.27757738537863064, "grad_norm": 1.5959599018096924, "learning_rate": 1.643262336249593e-05, "loss": 0.5041, "step": 12505 }, { "epoch": 0.27768837193815826, "grad_norm": 1.140795350074768, "learning_rate": 1.642995332959506e-05, "loss": 0.5169, "step": 12510 }, { "epoch": 0.27779935849768594, "grad_norm": 1.01712965965271, "learning_rate": 1.6427282514962027e-05, "loss": 0.3743, "step": 12515 }, { "epoch": 0.27791034505721357, "grad_norm": 1.3541737794876099, "learning_rate": 1.642461091892154e-05, "loss": 0.595, "step": 12520 }, { "epoch": 0.2780213316167412, "grad_norm": 0.8120675683021545, "learning_rate": 1.64219385417984e-05, "loss": 0.4762, "step": 12525 }, { "epoch": 0.2781323181762689, "grad_norm": 1.5176632404327393, "learning_rate": 1.6419265383917515e-05, "loss": 0.5956, "step": 12530 }, { "epoch": 0.2782433047357965, "grad_norm": 2.3387813568115234, "learning_rate": 1.641659144560387e-05, "loss": 0.4821, "step": 12535 }, { "epoch": 0.27835429129532413, "grad_norm": 1.152773380279541, "learning_rate": 1.6413916727182562e-05, "loss": 0.4751, "step": 12540 }, { "epoch": 0.27846527785485176, "grad_norm": 0.9320288300514221, "learning_rate": 1.6411241228978764e-05, "loss": 0.4971, "step": 12545 }, { "epoch": 0.27857626441437944, "grad_norm": 1.1640987396240234, "learning_rate": 1.640856495131776e-05, "loss": 0.4763, "step": 12550 }, { "epoch": 0.27868725097390706, "grad_norm": 1.433164119720459, "learning_rate": 1.6405887894524925e-05, "loss": 0.4243, "step": 12555 }, { "epoch": 0.2787982375334347, "grad_norm": 1.2848843336105347, "learning_rate": 1.640321005892572e-05, "loss": 0.4844, "step": 12560 }, { "epoch": 0.2789092240929623, "grad_norm": 1.153416633605957, "learning_rate": 1.640053144484571e-05, "loss": 0.5273, "step": 12565 }, { "epoch": 0.27902021065249, "grad_norm": 1.2133926153182983, "learning_rate": 1.6397852052610554e-05, "loss": 0.6296, "step": 12570 }, { "epoch": 0.2791311972120176, "grad_norm": 1.1986991167068481, "learning_rate": 1.6395171882546002e-05, "loss": 0.5515, "step": 12575 }, { "epoch": 0.27924218377154525, "grad_norm": 0.9386587142944336, "learning_rate": 1.63924909349779e-05, "loss": 0.5135, "step": 12580 }, { "epoch": 0.27935317033107293, "grad_norm": 0.9406764507293701, "learning_rate": 1.6389809210232193e-05, "loss": 0.4616, "step": 12585 }, { "epoch": 0.27946415689060056, "grad_norm": 1.2825262546539307, "learning_rate": 1.6387126708634905e-05, "loss": 0.5227, "step": 12590 }, { "epoch": 0.2795751434501282, "grad_norm": 0.9845243096351624, "learning_rate": 1.6384443430512176e-05, "loss": 0.5347, "step": 12595 }, { "epoch": 0.2796861300096558, "grad_norm": 1.4052627086639404, "learning_rate": 1.638175937619023e-05, "loss": 0.3989, "step": 12600 }, { "epoch": 0.2797971165691835, "grad_norm": 1.7123472690582275, "learning_rate": 1.6379074545995374e-05, "loss": 0.5085, "step": 12605 }, { "epoch": 0.2799081031287111, "grad_norm": 0.9626907706260681, "learning_rate": 1.6376388940254034e-05, "loss": 0.5472, "step": 12610 }, { "epoch": 0.28001908968823874, "grad_norm": 1.1217817068099976, "learning_rate": 1.6373702559292712e-05, "loss": 0.4536, "step": 12615 }, { "epoch": 0.28013007624776637, "grad_norm": 1.1611618995666504, "learning_rate": 1.6371015403438006e-05, "loss": 0.4647, "step": 12620 }, { "epoch": 0.28024106280729405, "grad_norm": 1.1444199085235596, "learning_rate": 1.6368327473016613e-05, "loss": 0.3815, "step": 12625 }, { "epoch": 0.2803520493668217, "grad_norm": 1.4538722038269043, "learning_rate": 1.6365638768355325e-05, "loss": 0.4752, "step": 12630 }, { "epoch": 0.2804630359263493, "grad_norm": 1.1787018775939941, "learning_rate": 1.6362949289781026e-05, "loss": 0.4783, "step": 12635 }, { "epoch": 0.280574022485877, "grad_norm": 1.4149473905563354, "learning_rate": 1.6360259037620688e-05, "loss": 0.4009, "step": 12640 }, { "epoch": 0.2806850090454046, "grad_norm": 1.4412766695022583, "learning_rate": 1.635756801220139e-05, "loss": 0.5886, "step": 12645 }, { "epoch": 0.28079599560493224, "grad_norm": 1.1926918029785156, "learning_rate": 1.6354876213850296e-05, "loss": 0.4492, "step": 12650 }, { "epoch": 0.28090698216445986, "grad_norm": 1.1497153043746948, "learning_rate": 1.6352183642894662e-05, "loss": 0.4405, "step": 12655 }, { "epoch": 0.28101796872398754, "grad_norm": 1.1040529012680054, "learning_rate": 1.6349490299661845e-05, "loss": 0.4971, "step": 12660 }, { "epoch": 0.28112895528351517, "grad_norm": 1.356378197669983, "learning_rate": 1.6346796184479293e-05, "loss": 0.4433, "step": 12665 }, { "epoch": 0.2812399418430428, "grad_norm": 1.1610051393508911, "learning_rate": 1.6344101297674545e-05, "loss": 0.5633, "step": 12670 }, { "epoch": 0.2813509284025704, "grad_norm": 1.1507076025009155, "learning_rate": 1.6341405639575235e-05, "loss": 0.4373, "step": 12675 }, { "epoch": 0.2814619149620981, "grad_norm": 1.0662920475006104, "learning_rate": 1.6338709210509098e-05, "loss": 0.6102, "step": 12680 }, { "epoch": 0.28157290152162573, "grad_norm": 1.0761206150054932, "learning_rate": 1.6336012010803953e-05, "loss": 0.664, "step": 12685 }, { "epoch": 0.28168388808115336, "grad_norm": 0.75835782289505, "learning_rate": 1.6333314040787716e-05, "loss": 0.3878, "step": 12690 }, { "epoch": 0.28179487464068104, "grad_norm": 1.2015706300735474, "learning_rate": 1.6330615300788403e-05, "loss": 0.4494, "step": 12695 }, { "epoch": 0.28190586120020866, "grad_norm": 1.2543123960494995, "learning_rate": 1.6327915791134107e-05, "loss": 0.4978, "step": 12700 }, { "epoch": 0.2820168477597363, "grad_norm": 1.263872504234314, "learning_rate": 1.6325215512153035e-05, "loss": 0.6351, "step": 12705 }, { "epoch": 0.2821278343192639, "grad_norm": 1.4159135818481445, "learning_rate": 1.6322514464173472e-05, "loss": 0.4785, "step": 12710 }, { "epoch": 0.2822388208787916, "grad_norm": 1.1585519313812256, "learning_rate": 1.6319812647523805e-05, "loss": 0.513, "step": 12715 }, { "epoch": 0.2823498074383192, "grad_norm": 1.630321979522705, "learning_rate": 1.631711006253251e-05, "loss": 0.5437, "step": 12720 }, { "epoch": 0.28246079399784685, "grad_norm": 1.5868574380874634, "learning_rate": 1.6314406709528164e-05, "loss": 0.3609, "step": 12725 }, { "epoch": 0.2825717805573745, "grad_norm": 1.2006999254226685, "learning_rate": 1.6311702588839423e-05, "loss": 0.4274, "step": 12730 }, { "epoch": 0.28268276711690216, "grad_norm": 1.2211520671844482, "learning_rate": 1.630899770079505e-05, "loss": 0.4321, "step": 12735 }, { "epoch": 0.2827937536764298, "grad_norm": 0.8724921941757202, "learning_rate": 1.6306292045723894e-05, "loss": 0.3865, "step": 12740 }, { "epoch": 0.2829047402359574, "grad_norm": 1.4219202995300293, "learning_rate": 1.6303585623954904e-05, "loss": 0.5281, "step": 12745 }, { "epoch": 0.2830157267954851, "grad_norm": 1.7321420907974243, "learning_rate": 1.6300878435817115e-05, "loss": 0.5838, "step": 12750 }, { "epoch": 0.2831267133550127, "grad_norm": 1.188020944595337, "learning_rate": 1.629817048163965e-05, "loss": 0.6092, "step": 12755 }, { "epoch": 0.28323769991454034, "grad_norm": 1.03322434425354, "learning_rate": 1.629546176175175e-05, "loss": 0.3896, "step": 12760 }, { "epoch": 0.28334868647406797, "grad_norm": 1.1465661525726318, "learning_rate": 1.6292752276482714e-05, "loss": 0.4183, "step": 12765 }, { "epoch": 0.28345967303359565, "grad_norm": 1.3475068807601929, "learning_rate": 1.6290042026161964e-05, "loss": 0.4569, "step": 12770 }, { "epoch": 0.2835706595931233, "grad_norm": 0.868467390537262, "learning_rate": 1.6287331011119002e-05, "loss": 0.3988, "step": 12775 }, { "epoch": 0.2836816461526509, "grad_norm": 1.208096981048584, "learning_rate": 1.6284619231683418e-05, "loss": 0.417, "step": 12780 }, { "epoch": 0.2837926327121785, "grad_norm": 1.2644482851028442, "learning_rate": 1.6281906688184905e-05, "loss": 0.4912, "step": 12785 }, { "epoch": 0.2839036192717062, "grad_norm": 1.1698830127716064, "learning_rate": 1.6279193380953247e-05, "loss": 0.4079, "step": 12790 }, { "epoch": 0.28401460583123384, "grad_norm": 1.3247534036636353, "learning_rate": 1.6276479310318315e-05, "loss": 0.5211, "step": 12795 }, { "epoch": 0.28412559239076146, "grad_norm": 1.4367425441741943, "learning_rate": 1.627376447661008e-05, "loss": 0.4516, "step": 12800 }, { "epoch": 0.28423657895028914, "grad_norm": 1.5465408563613892, "learning_rate": 1.62710488801586e-05, "loss": 0.4393, "step": 12805 }, { "epoch": 0.28434756550981677, "grad_norm": 1.4768105745315552, "learning_rate": 1.626833252129403e-05, "loss": 0.4672, "step": 12810 }, { "epoch": 0.2844585520693444, "grad_norm": 0.8354441523551941, "learning_rate": 1.626561540034661e-05, "loss": 0.5801, "step": 12815 }, { "epoch": 0.284569538628872, "grad_norm": 1.3228951692581177, "learning_rate": 1.6262897517646684e-05, "loss": 0.52, "step": 12820 }, { "epoch": 0.2846805251883997, "grad_norm": 1.2378075122833252, "learning_rate": 1.6260178873524682e-05, "loss": 0.5024, "step": 12825 }, { "epoch": 0.28479151174792733, "grad_norm": 0.9964748024940491, "learning_rate": 1.625745946831113e-05, "loss": 0.4219, "step": 12830 }, { "epoch": 0.28490249830745495, "grad_norm": 1.212092638015747, "learning_rate": 1.625473930233664e-05, "loss": 0.6788, "step": 12835 }, { "epoch": 0.28501348486698264, "grad_norm": 1.2740265130996704, "learning_rate": 1.6252018375931923e-05, "loss": 0.5355, "step": 12840 }, { "epoch": 0.28512447142651026, "grad_norm": 1.424972653388977, "learning_rate": 1.624929668942778e-05, "loss": 0.4398, "step": 12845 }, { "epoch": 0.2852354579860379, "grad_norm": 1.238060474395752, "learning_rate": 1.62465742431551e-05, "loss": 0.6256, "step": 12850 }, { "epoch": 0.2853464445455655, "grad_norm": 1.0929948091506958, "learning_rate": 1.624385103744488e-05, "loss": 0.5098, "step": 12855 }, { "epoch": 0.2854574311050932, "grad_norm": 1.0573160648345947, "learning_rate": 1.6241127072628186e-05, "loss": 0.4568, "step": 12860 }, { "epoch": 0.2855684176646208, "grad_norm": 0.8821727633476257, "learning_rate": 1.623840234903619e-05, "loss": 0.4309, "step": 12865 }, { "epoch": 0.28567940422414845, "grad_norm": 1.5702420473098755, "learning_rate": 1.623567686700017e-05, "loss": 0.7043, "step": 12870 }, { "epoch": 0.2857903907836761, "grad_norm": 1.2420892715454102, "learning_rate": 1.6232950626851458e-05, "loss": 0.3862, "step": 12875 }, { "epoch": 0.28590137734320376, "grad_norm": 1.5035573244094849, "learning_rate": 1.6230223628921518e-05, "loss": 0.4591, "step": 12880 }, { "epoch": 0.2860123639027314, "grad_norm": 1.1416951417922974, "learning_rate": 1.6227495873541883e-05, "loss": 0.4764, "step": 12885 }, { "epoch": 0.286123350462259, "grad_norm": 3.584693193435669, "learning_rate": 1.6224767361044186e-05, "loss": 0.416, "step": 12890 }, { "epoch": 0.2862343370217867, "grad_norm": 1.1235774755477905, "learning_rate": 1.6222038091760145e-05, "loss": 0.474, "step": 12895 }, { "epoch": 0.2863453235813143, "grad_norm": 0.9305126667022705, "learning_rate": 1.6219308066021584e-05, "loss": 0.4715, "step": 12900 }, { "epoch": 0.28645631014084194, "grad_norm": 1.4266836643218994, "learning_rate": 1.6216577284160408e-05, "loss": 0.4006, "step": 12905 }, { "epoch": 0.28656729670036957, "grad_norm": 1.0521955490112305, "learning_rate": 1.6213845746508612e-05, "loss": 0.6069, "step": 12910 }, { "epoch": 0.28667828325989725, "grad_norm": 1.033524990081787, "learning_rate": 1.621111345339829e-05, "loss": 0.4645, "step": 12915 }, { "epoch": 0.2867892698194249, "grad_norm": 1.3503462076187134, "learning_rate": 1.6208380405161623e-05, "loss": 0.5717, "step": 12920 }, { "epoch": 0.2869002563789525, "grad_norm": 1.0602374076843262, "learning_rate": 1.6205646602130893e-05, "loss": 0.3502, "step": 12925 }, { "epoch": 0.2870112429384801, "grad_norm": 1.0799397230148315, "learning_rate": 1.6202912044638453e-05, "loss": 0.4389, "step": 12930 }, { "epoch": 0.2871222294980078, "grad_norm": 1.3221521377563477, "learning_rate": 1.6200176733016775e-05, "loss": 0.5155, "step": 12935 }, { "epoch": 0.28723321605753543, "grad_norm": 1.3143270015716553, "learning_rate": 1.6197440667598404e-05, "loss": 0.6275, "step": 12940 }, { "epoch": 0.28734420261706306, "grad_norm": 0.879645049571991, "learning_rate": 1.619470384871598e-05, "loss": 0.539, "step": 12945 }, { "epoch": 0.28745518917659074, "grad_norm": 1.2223107814788818, "learning_rate": 1.6191966276702235e-05, "loss": 0.648, "step": 12950 }, { "epoch": 0.28756617573611837, "grad_norm": 1.7858378887176514, "learning_rate": 1.618922795189e-05, "loss": 0.5519, "step": 12955 }, { "epoch": 0.287677162295646, "grad_norm": 1.4419081211090088, "learning_rate": 1.6186488874612186e-05, "loss": 0.4212, "step": 12960 }, { "epoch": 0.2877881488551736, "grad_norm": 1.1229172945022583, "learning_rate": 1.6183749045201804e-05, "loss": 0.3266, "step": 12965 }, { "epoch": 0.2878991354147013, "grad_norm": 1.1010665893554688, "learning_rate": 1.6181008463991948e-05, "loss": 0.4354, "step": 12970 }, { "epoch": 0.2880101219742289, "grad_norm": 1.0741912126541138, "learning_rate": 1.6178267131315816e-05, "loss": 0.5179, "step": 12975 }, { "epoch": 0.28812110853375655, "grad_norm": 1.205074667930603, "learning_rate": 1.6175525047506686e-05, "loss": 0.5594, "step": 12980 }, { "epoch": 0.2882320950932842, "grad_norm": 1.9375433921813965, "learning_rate": 1.617278221289793e-05, "loss": 0.4357, "step": 12985 }, { "epoch": 0.28834308165281186, "grad_norm": 1.1160500049591064, "learning_rate": 1.6170038627823016e-05, "loss": 0.4056, "step": 12990 }, { "epoch": 0.2884540682123395, "grad_norm": 1.011465311050415, "learning_rate": 1.6167294292615498e-05, "loss": 0.4658, "step": 12995 }, { "epoch": 0.2885650547718671, "grad_norm": 1.1458672285079956, "learning_rate": 1.6164549207609024e-05, "loss": 0.4624, "step": 13000 }, { "epoch": 0.2886760413313948, "grad_norm": 1.1138023138046265, "learning_rate": 1.616180337313733e-05, "loss": 0.5013, "step": 13005 }, { "epoch": 0.2887870278909224, "grad_norm": 1.3170747756958008, "learning_rate": 1.615905678953425e-05, "loss": 0.4251, "step": 13010 }, { "epoch": 0.28889801445045005, "grad_norm": 1.0966858863830566, "learning_rate": 1.6156309457133698e-05, "loss": 0.5265, "step": 13015 }, { "epoch": 0.2890090010099777, "grad_norm": 1.7116811275482178, "learning_rate": 1.615356137626969e-05, "loss": 0.4759, "step": 13020 }, { "epoch": 0.28911998756950535, "grad_norm": 0.9442894458770752, "learning_rate": 1.615081254727633e-05, "loss": 0.3931, "step": 13025 }, { "epoch": 0.289230974129033, "grad_norm": 1.521214485168457, "learning_rate": 1.6148062970487804e-05, "loss": 0.5104, "step": 13030 }, { "epoch": 0.2893419606885606, "grad_norm": 1.3783694505691528, "learning_rate": 1.6145312646238406e-05, "loss": 0.5016, "step": 13035 }, { "epoch": 0.28945294724808823, "grad_norm": 1.7637170553207397, "learning_rate": 1.6142561574862505e-05, "loss": 0.4152, "step": 13040 }, { "epoch": 0.2895639338076159, "grad_norm": 1.185720443725586, "learning_rate": 1.6139809756694565e-05, "loss": 0.4615, "step": 13045 }, { "epoch": 0.28967492036714354, "grad_norm": 1.0681581497192383, "learning_rate": 1.6137057192069146e-05, "loss": 0.5975, "step": 13050 }, { "epoch": 0.28978590692667117, "grad_norm": 1.145227313041687, "learning_rate": 1.61343038813209e-05, "loss": 0.7614, "step": 13055 }, { "epoch": 0.28989689348619885, "grad_norm": 2.2884984016418457, "learning_rate": 1.6131549824784557e-05, "loss": 0.503, "step": 13060 }, { "epoch": 0.2900078800457265, "grad_norm": 1.2518259286880493, "learning_rate": 1.6128795022794954e-05, "loss": 0.4066, "step": 13065 }, { "epoch": 0.2901188666052541, "grad_norm": 1.4038550853729248, "learning_rate": 1.6126039475687006e-05, "loss": 0.4795, "step": 13070 }, { "epoch": 0.2902298531647817, "grad_norm": 1.1985700130462646, "learning_rate": 1.612328318379572e-05, "loss": 0.4906, "step": 13075 }, { "epoch": 0.2903408397243094, "grad_norm": 1.0185683965682983, "learning_rate": 1.6120526147456202e-05, "loss": 0.3498, "step": 13080 }, { "epoch": 0.29045182628383703, "grad_norm": 1.2648568153381348, "learning_rate": 1.611776836700364e-05, "loss": 0.5293, "step": 13085 }, { "epoch": 0.29056281284336466, "grad_norm": 1.1476002931594849, "learning_rate": 1.6115009842773322e-05, "loss": 0.6391, "step": 13090 }, { "epoch": 0.2906737994028923, "grad_norm": 1.592239260673523, "learning_rate": 1.611225057510061e-05, "loss": 0.5028, "step": 13095 }, { "epoch": 0.29078478596241997, "grad_norm": 1.238527536392212, "learning_rate": 1.6109490564320974e-05, "loss": 0.3906, "step": 13100 }, { "epoch": 0.2908957725219476, "grad_norm": 1.1238560676574707, "learning_rate": 1.6106729810769968e-05, "loss": 0.4653, "step": 13105 }, { "epoch": 0.2910067590814752, "grad_norm": 1.2527645826339722, "learning_rate": 1.610396831478323e-05, "loss": 0.4405, "step": 13110 }, { "epoch": 0.2911177456410029, "grad_norm": 1.5676945447921753, "learning_rate": 1.6101206076696496e-05, "loss": 0.4006, "step": 13115 }, { "epoch": 0.2912287322005305, "grad_norm": 1.342868447303772, "learning_rate": 1.609844309684559e-05, "loss": 0.5548, "step": 13120 }, { "epoch": 0.29133971876005815, "grad_norm": 1.2602522373199463, "learning_rate": 1.609567937556642e-05, "loss": 0.4057, "step": 13125 }, { "epoch": 0.2914507053195858, "grad_norm": 1.3834385871887207, "learning_rate": 1.6092914913194997e-05, "loss": 0.5533, "step": 13130 }, { "epoch": 0.29156169187911346, "grad_norm": 1.2576740980148315, "learning_rate": 1.6090149710067412e-05, "loss": 0.5351, "step": 13135 }, { "epoch": 0.2916726784386411, "grad_norm": 1.4116957187652588, "learning_rate": 1.608738376651985e-05, "loss": 0.3774, "step": 13140 }, { "epoch": 0.2917836649981687, "grad_norm": 1.215957760810852, "learning_rate": 1.608461708288859e-05, "loss": 0.3787, "step": 13145 }, { "epoch": 0.29189465155769634, "grad_norm": 1.041744351387024, "learning_rate": 1.608184965950999e-05, "loss": 0.4878, "step": 13150 }, { "epoch": 0.292005638117224, "grad_norm": 0.9204108119010925, "learning_rate": 1.60790814967205e-05, "loss": 0.5201, "step": 13155 }, { "epoch": 0.29211662467675165, "grad_norm": 0.9996805787086487, "learning_rate": 1.6076312594856673e-05, "loss": 0.3782, "step": 13160 }, { "epoch": 0.29222761123627927, "grad_norm": 0.9885636568069458, "learning_rate": 1.6073542954255137e-05, "loss": 0.5069, "step": 13165 }, { "epoch": 0.29233859779580695, "grad_norm": 0.8402591347694397, "learning_rate": 1.6070772575252623e-05, "loss": 0.3521, "step": 13170 }, { "epoch": 0.2924495843553346, "grad_norm": 2.6965503692626953, "learning_rate": 1.6068001458185934e-05, "loss": 0.2871, "step": 13175 }, { "epoch": 0.2925605709148622, "grad_norm": 1.0516703128814697, "learning_rate": 1.606522960339198e-05, "loss": 0.5634, "step": 13180 }, { "epoch": 0.29267155747438983, "grad_norm": 1.8383911848068237, "learning_rate": 1.6062457011207753e-05, "loss": 0.5725, "step": 13185 }, { "epoch": 0.2927825440339175, "grad_norm": 1.5198522806167603, "learning_rate": 1.6059683681970334e-05, "loss": 0.3936, "step": 13190 }, { "epoch": 0.29289353059344514, "grad_norm": 1.1313891410827637, "learning_rate": 1.6056909616016895e-05, "loss": 0.5721, "step": 13195 }, { "epoch": 0.29300451715297277, "grad_norm": 1.2808656692504883, "learning_rate": 1.6054134813684697e-05, "loss": 0.5572, "step": 13200 }, { "epoch": 0.2931155037125004, "grad_norm": 1.470443844795227, "learning_rate": 1.6051359275311093e-05, "loss": 0.589, "step": 13205 }, { "epoch": 0.2932264902720281, "grad_norm": 1.6670235395431519, "learning_rate": 1.6048583001233525e-05, "loss": 0.5116, "step": 13210 }, { "epoch": 0.2933374768315557, "grad_norm": 1.2965604066848755, "learning_rate": 1.6045805991789518e-05, "loss": 0.3875, "step": 13215 }, { "epoch": 0.2934484633910833, "grad_norm": 1.1231025457382202, "learning_rate": 1.6043028247316696e-05, "loss": 0.3527, "step": 13220 }, { "epoch": 0.293559449950611, "grad_norm": 1.045440912246704, "learning_rate": 1.6040249768152767e-05, "loss": 0.4948, "step": 13225 }, { "epoch": 0.29367043651013863, "grad_norm": 1.2011396884918213, "learning_rate": 1.603747055463553e-05, "loss": 0.4802, "step": 13230 }, { "epoch": 0.29378142306966626, "grad_norm": 1.2609519958496094, "learning_rate": 1.603469060710287e-05, "loss": 0.5255, "step": 13235 }, { "epoch": 0.2938924096291939, "grad_norm": 1.356063961982727, "learning_rate": 1.603190992589276e-05, "loss": 0.4837, "step": 13240 }, { "epoch": 0.29400339618872157, "grad_norm": 1.5714696645736694, "learning_rate": 1.6029128511343276e-05, "loss": 0.5208, "step": 13245 }, { "epoch": 0.2941143827482492, "grad_norm": 1.3724098205566406, "learning_rate": 1.6026346363792565e-05, "loss": 0.6037, "step": 13250 }, { "epoch": 0.2942253693077768, "grad_norm": 1.2276989221572876, "learning_rate": 1.6023563483578874e-05, "loss": 0.5841, "step": 13255 }, { "epoch": 0.29433635586730444, "grad_norm": 1.3937835693359375, "learning_rate": 1.6020779871040538e-05, "loss": 0.4431, "step": 13260 }, { "epoch": 0.2944473424268321, "grad_norm": 0.9244763851165771, "learning_rate": 1.6017995526515976e-05, "loss": 0.4486, "step": 13265 }, { "epoch": 0.29455832898635975, "grad_norm": 1.135107159614563, "learning_rate": 1.60152104503437e-05, "loss": 0.5151, "step": 13270 }, { "epoch": 0.2946693155458874, "grad_norm": 1.617790937423706, "learning_rate": 1.6012424642862315e-05, "loss": 0.3665, "step": 13275 }, { "epoch": 0.29478030210541506, "grad_norm": 1.2885212898254395, "learning_rate": 1.6009638104410503e-05, "loss": 0.5053, "step": 13280 }, { "epoch": 0.2948912886649427, "grad_norm": 0.9920433759689331, "learning_rate": 1.6006850835327044e-05, "loss": 0.4176, "step": 13285 }, { "epoch": 0.2950022752244703, "grad_norm": 1.2480064630508423, "learning_rate": 1.600406283595081e-05, "loss": 0.5412, "step": 13290 }, { "epoch": 0.29511326178399794, "grad_norm": 1.3954321146011353, "learning_rate": 1.600127410662075e-05, "loss": 0.461, "step": 13295 }, { "epoch": 0.2952242483435256, "grad_norm": 1.1513477563858032, "learning_rate": 1.599848464767591e-05, "loss": 0.5022, "step": 13300 }, { "epoch": 0.29533523490305325, "grad_norm": 1.1079208850860596, "learning_rate": 1.599569445945542e-05, "loss": 0.7639, "step": 13305 }, { "epoch": 0.29544622146258087, "grad_norm": 1.159168004989624, "learning_rate": 1.599290354229851e-05, "loss": 0.5514, "step": 13310 }, { "epoch": 0.2955572080221085, "grad_norm": 0.9496512413024902, "learning_rate": 1.5990111896544488e-05, "loss": 0.4599, "step": 13315 }, { "epoch": 0.2956681945816362, "grad_norm": 1.3379199504852295, "learning_rate": 1.598731952253275e-05, "loss": 0.44, "step": 13320 }, { "epoch": 0.2957791811411638, "grad_norm": 0.7901808619499207, "learning_rate": 1.5984526420602782e-05, "loss": 0.322, "step": 13325 }, { "epoch": 0.29589016770069143, "grad_norm": 1.442884922027588, "learning_rate": 1.5981732591094164e-05, "loss": 0.4746, "step": 13330 }, { "epoch": 0.2960011542602191, "grad_norm": 1.3721030950546265, "learning_rate": 1.5978938034346557e-05, "loss": 0.4736, "step": 13335 }, { "epoch": 0.29611214081974674, "grad_norm": 1.2049025297164917, "learning_rate": 1.597614275069972e-05, "loss": 0.6011, "step": 13340 }, { "epoch": 0.29622312737927436, "grad_norm": 1.2768781185150146, "learning_rate": 1.5973346740493486e-05, "loss": 0.4977, "step": 13345 }, { "epoch": 0.296334113938802, "grad_norm": 1.061110258102417, "learning_rate": 1.597055000406779e-05, "loss": 0.4888, "step": 13350 }, { "epoch": 0.2964451004983297, "grad_norm": 1.1296803951263428, "learning_rate": 1.5967752541762648e-05, "loss": 0.377, "step": 13355 }, { "epoch": 0.2965560870578573, "grad_norm": 1.1014056205749512, "learning_rate": 1.5964954353918163e-05, "loss": 0.5997, "step": 13360 }, { "epoch": 0.2966670736173849, "grad_norm": 0.9890385270118713, "learning_rate": 1.5962155440874535e-05, "loss": 0.5013, "step": 13365 }, { "epoch": 0.29677806017691255, "grad_norm": 1.223296880722046, "learning_rate": 1.5959355802972044e-05, "loss": 0.4773, "step": 13370 }, { "epoch": 0.29688904673644023, "grad_norm": 0.9817351698875427, "learning_rate": 1.595655544055106e-05, "loss": 0.4686, "step": 13375 }, { "epoch": 0.29700003329596786, "grad_norm": 1.1673156023025513, "learning_rate": 1.5953754353952043e-05, "loss": 0.3406, "step": 13380 }, { "epoch": 0.2971110198554955, "grad_norm": 1.035629391670227, "learning_rate": 1.595095254351554e-05, "loss": 0.3881, "step": 13385 }, { "epoch": 0.29722200641502317, "grad_norm": 1.143067479133606, "learning_rate": 1.5948150009582183e-05, "loss": 0.5803, "step": 13390 }, { "epoch": 0.2973329929745508, "grad_norm": 1.0391279458999634, "learning_rate": 1.5945346752492697e-05, "loss": 0.4273, "step": 13395 }, { "epoch": 0.2974439795340784, "grad_norm": 1.1800872087478638, "learning_rate": 1.5942542772587893e-05, "loss": 0.4803, "step": 13400 }, { "epoch": 0.29755496609360604, "grad_norm": 1.7651307582855225, "learning_rate": 1.5939738070208667e-05, "loss": 0.4946, "step": 13405 }, { "epoch": 0.2976659526531337, "grad_norm": 1.0853071212768555, "learning_rate": 1.5936932645696005e-05, "loss": 0.3652, "step": 13410 }, { "epoch": 0.29777693921266135, "grad_norm": 1.0994185209274292, "learning_rate": 1.5934126499390986e-05, "loss": 0.4054, "step": 13415 }, { "epoch": 0.297887925772189, "grad_norm": 0.9863249659538269, "learning_rate": 1.593131963163477e-05, "loss": 0.4278, "step": 13420 }, { "epoch": 0.2979989123317166, "grad_norm": 0.9838482141494751, "learning_rate": 1.59285120427686e-05, "loss": 0.4615, "step": 13425 }, { "epoch": 0.2981098988912443, "grad_norm": 1.1166720390319824, "learning_rate": 1.5925703733133823e-05, "loss": 0.5157, "step": 13430 }, { "epoch": 0.2982208854507719, "grad_norm": 1.2304351329803467, "learning_rate": 1.5922894703071858e-05, "loss": 0.5007, "step": 13435 }, { "epoch": 0.29833187201029954, "grad_norm": 1.102946162223816, "learning_rate": 1.592008495292422e-05, "loss": 0.4565, "step": 13440 }, { "epoch": 0.2984428585698272, "grad_norm": 1.1732909679412842, "learning_rate": 1.5917274483032505e-05, "loss": 0.3954, "step": 13445 }, { "epoch": 0.29855384512935484, "grad_norm": 1.0969271659851074, "learning_rate": 1.5914463293738402e-05, "loss": 0.4611, "step": 13450 }, { "epoch": 0.29866483168888247, "grad_norm": 0.8274635672569275, "learning_rate": 1.5911651385383692e-05, "loss": 0.3072, "step": 13455 }, { "epoch": 0.2987758182484101, "grad_norm": 1.5720192193984985, "learning_rate": 1.5908838758310234e-05, "loss": 0.5685, "step": 13460 }, { "epoch": 0.2988868048079378, "grad_norm": 0.8137319684028625, "learning_rate": 1.590602541285997e-05, "loss": 0.4301, "step": 13465 }, { "epoch": 0.2989977913674654, "grad_norm": 1.040312647819519, "learning_rate": 1.590321134937495e-05, "loss": 0.5921, "step": 13470 }, { "epoch": 0.29910877792699303, "grad_norm": 1.202492117881775, "learning_rate": 1.5900396568197287e-05, "loss": 0.4665, "step": 13475 }, { "epoch": 0.29921976448652066, "grad_norm": 1.6085517406463623, "learning_rate": 1.58975810696692e-05, "loss": 0.4096, "step": 13480 }, { "epoch": 0.29933075104604834, "grad_norm": 0.8945106267929077, "learning_rate": 1.5894764854132985e-05, "loss": 0.4441, "step": 13485 }, { "epoch": 0.29944173760557596, "grad_norm": 1.0774484872817993, "learning_rate": 1.5891947921931027e-05, "loss": 0.4949, "step": 13490 }, { "epoch": 0.2995527241651036, "grad_norm": 0.8812748789787292, "learning_rate": 1.5889130273405805e-05, "loss": 0.5329, "step": 13495 }, { "epoch": 0.29966371072463127, "grad_norm": 1.6877503395080566, "learning_rate": 1.588631190889987e-05, "loss": 0.3313, "step": 13500 }, { "epoch": 0.2997746972841589, "grad_norm": 1.2463515996932983, "learning_rate": 1.5883492828755876e-05, "loss": 0.3487, "step": 13505 }, { "epoch": 0.2998856838436865, "grad_norm": 1.5215736627578735, "learning_rate": 1.5880673033316555e-05, "loss": 0.4009, "step": 13510 }, { "epoch": 0.29999667040321415, "grad_norm": 1.2792853116989136, "learning_rate": 1.5877852522924733e-05, "loss": 0.4295, "step": 13515 }, { "epoch": 0.30010765696274183, "grad_norm": 1.0436149835586548, "learning_rate": 1.587503129792331e-05, "loss": 0.4025, "step": 13520 }, { "epoch": 0.30021864352226946, "grad_norm": 0.9356552958488464, "learning_rate": 1.5872209358655286e-05, "loss": 0.2919, "step": 13525 }, { "epoch": 0.3003296300817971, "grad_norm": 1.2793176174163818, "learning_rate": 1.5869386705463742e-05, "loss": 0.3657, "step": 13530 }, { "epoch": 0.3004406166413247, "grad_norm": 1.219204306602478, "learning_rate": 1.586656333869185e-05, "loss": 0.5315, "step": 13535 }, { "epoch": 0.3005516032008524, "grad_norm": 1.4818681478500366, "learning_rate": 1.5863739258682858e-05, "loss": 0.4937, "step": 13540 }, { "epoch": 0.30066258976038, "grad_norm": 1.1443345546722412, "learning_rate": 1.5860914465780112e-05, "loss": 0.4912, "step": 13545 }, { "epoch": 0.30077357631990764, "grad_norm": 1.6327272653579712, "learning_rate": 1.5858088960327043e-05, "loss": 0.3595, "step": 13550 }, { "epoch": 0.3008845628794353, "grad_norm": 1.1001689434051514, "learning_rate": 1.5855262742667165e-05, "loss": 0.526, "step": 13555 }, { "epoch": 0.30099554943896295, "grad_norm": 0.9390714168548584, "learning_rate": 1.585243581314408e-05, "loss": 0.5033, "step": 13560 }, { "epoch": 0.3011065359984906, "grad_norm": 1.3417963981628418, "learning_rate": 1.5849608172101472e-05, "loss": 0.4682, "step": 13565 }, { "epoch": 0.3012175225580182, "grad_norm": 0.9747509360313416, "learning_rate": 1.5846779819883127e-05, "loss": 0.6363, "step": 13570 }, { "epoch": 0.3013285091175459, "grad_norm": 1.1777548789978027, "learning_rate": 1.58439507568329e-05, "loss": 0.4182, "step": 13575 }, { "epoch": 0.3014394956770735, "grad_norm": 1.6634676456451416, "learning_rate": 1.5841120983294732e-05, "loss": 0.4847, "step": 13580 }, { "epoch": 0.30155048223660114, "grad_norm": 1.0142072439193726, "learning_rate": 1.583829049961267e-05, "loss": 0.4785, "step": 13585 }, { "epoch": 0.30166146879612876, "grad_norm": 1.115145206451416, "learning_rate": 1.5835459306130828e-05, "loss": 0.5335, "step": 13590 }, { "epoch": 0.30177245535565644, "grad_norm": 1.2043323516845703, "learning_rate": 1.5832627403193414e-05, "loss": 0.5331, "step": 13595 }, { "epoch": 0.30188344191518407, "grad_norm": 0.8347172737121582, "learning_rate": 1.5829794791144723e-05, "loss": 0.4027, "step": 13600 }, { "epoch": 0.3019944284747117, "grad_norm": 0.7217290997505188, "learning_rate": 1.582696147032913e-05, "loss": 0.4824, "step": 13605 }, { "epoch": 0.3021054150342394, "grad_norm": 0.854871928691864, "learning_rate": 1.5824127441091107e-05, "loss": 0.3582, "step": 13610 }, { "epoch": 0.302216401593767, "grad_norm": 1.1389976739883423, "learning_rate": 1.58212927037752e-05, "loss": 0.3694, "step": 13615 }, { "epoch": 0.30232738815329463, "grad_norm": 1.5510504245758057, "learning_rate": 1.5818457258726048e-05, "loss": 0.5034, "step": 13620 }, { "epoch": 0.30243837471282226, "grad_norm": 1.252467155456543, "learning_rate": 1.5815621106288377e-05, "loss": 0.4467, "step": 13625 }, { "epoch": 0.30254936127234994, "grad_norm": 1.3585219383239746, "learning_rate": 1.5812784246806998e-05, "loss": 0.3812, "step": 13630 }, { "epoch": 0.30266034783187756, "grad_norm": 1.4387974739074707, "learning_rate": 1.5809946680626804e-05, "loss": 0.5139, "step": 13635 }, { "epoch": 0.3027713343914052, "grad_norm": 1.2116645574569702, "learning_rate": 1.5807108408092778e-05, "loss": 0.4336, "step": 13640 }, { "epoch": 0.3028823209509328, "grad_norm": 0.818150520324707, "learning_rate": 1.5804269429549983e-05, "loss": 0.444, "step": 13645 }, { "epoch": 0.3029933075104605, "grad_norm": 0.7917804718017578, "learning_rate": 1.5801429745343583e-05, "loss": 0.3267, "step": 13650 }, { "epoch": 0.3031042940699881, "grad_norm": 2.8502650260925293, "learning_rate": 1.5798589355818807e-05, "loss": 0.4336, "step": 13655 }, { "epoch": 0.30321528062951575, "grad_norm": 0.9612025618553162, "learning_rate": 1.5795748261320984e-05, "loss": 0.5098, "step": 13660 }, { "epoch": 0.30332626718904343, "grad_norm": 0.6825042366981506, "learning_rate": 1.5792906462195524e-05, "loss": 0.4194, "step": 13665 }, { "epoch": 0.30343725374857106, "grad_norm": 1.3684370517730713, "learning_rate": 1.579006395878793e-05, "loss": 0.63, "step": 13670 }, { "epoch": 0.3035482403080987, "grad_norm": 1.1626808643341064, "learning_rate": 1.5787220751443773e-05, "loss": 0.4685, "step": 13675 }, { "epoch": 0.3036592268676263, "grad_norm": 1.3281182050704956, "learning_rate": 1.5784376840508725e-05, "loss": 0.5736, "step": 13680 }, { "epoch": 0.303770213427154, "grad_norm": 1.0269482135772705, "learning_rate": 1.5781532226328544e-05, "loss": 0.3554, "step": 13685 }, { "epoch": 0.3038811999866816, "grad_norm": 1.3451710939407349, "learning_rate": 1.5778686909249062e-05, "loss": 0.4192, "step": 13690 }, { "epoch": 0.30399218654620924, "grad_norm": 1.0704500675201416, "learning_rate": 1.577584088961621e-05, "loss": 0.3632, "step": 13695 }, { "epoch": 0.30410317310573687, "grad_norm": 1.3877739906311035, "learning_rate": 1.5772994167775986e-05, "loss": 0.5938, "step": 13700 }, { "epoch": 0.30421415966526455, "grad_norm": 1.3011753559112549, "learning_rate": 1.57701467440745e-05, "loss": 0.5205, "step": 13705 }, { "epoch": 0.3043251462247922, "grad_norm": 2.0894100666046143, "learning_rate": 1.576729861885792e-05, "loss": 0.4538, "step": 13710 }, { "epoch": 0.3044361327843198, "grad_norm": 1.0299859046936035, "learning_rate": 1.5764449792472518e-05, "loss": 0.4944, "step": 13715 }, { "epoch": 0.3045471193438475, "grad_norm": 1.1739716529846191, "learning_rate": 1.576160026526464e-05, "loss": 0.4768, "step": 13720 }, { "epoch": 0.3046581059033751, "grad_norm": 0.999975860118866, "learning_rate": 1.5758750037580726e-05, "loss": 0.4753, "step": 13725 }, { "epoch": 0.30476909246290274, "grad_norm": 1.3396409749984741, "learning_rate": 1.5755899109767298e-05, "loss": 0.6012, "step": 13730 }, { "epoch": 0.30488007902243036, "grad_norm": 0.9428462982177734, "learning_rate": 1.5753047482170956e-05, "loss": 0.4727, "step": 13735 }, { "epoch": 0.30499106558195804, "grad_norm": 1.2645313739776611, "learning_rate": 1.5750195155138394e-05, "loss": 0.4488, "step": 13740 }, { "epoch": 0.30510205214148567, "grad_norm": 0.9989715814590454, "learning_rate": 1.5747342129016395e-05, "loss": 0.3837, "step": 13745 }, { "epoch": 0.3052130387010133, "grad_norm": 1.5138322114944458, "learning_rate": 1.574448840415181e-05, "loss": 0.5546, "step": 13750 }, { "epoch": 0.3053240252605409, "grad_norm": 1.6559644937515259, "learning_rate": 1.5741633980891596e-05, "loss": 0.5383, "step": 13755 }, { "epoch": 0.3054350118200686, "grad_norm": 0.8841571807861328, "learning_rate": 1.5738778859582776e-05, "loss": 0.3331, "step": 13760 }, { "epoch": 0.30554599837959623, "grad_norm": 1.1933643817901611, "learning_rate": 1.5735923040572467e-05, "loss": 0.5135, "step": 13765 }, { "epoch": 0.30565698493912385, "grad_norm": 1.1483535766601562, "learning_rate": 1.5733066524207875e-05, "loss": 0.3668, "step": 13770 }, { "epoch": 0.30576797149865154, "grad_norm": 1.5365492105484009, "learning_rate": 1.573020931083628e-05, "loss": 0.3984, "step": 13775 }, { "epoch": 0.30587895805817916, "grad_norm": 1.2241919040679932, "learning_rate": 1.5727351400805054e-05, "loss": 0.4336, "step": 13780 }, { "epoch": 0.3059899446177068, "grad_norm": 1.256589651107788, "learning_rate": 1.572449279446165e-05, "loss": 0.5923, "step": 13785 }, { "epoch": 0.3061009311772344, "grad_norm": 0.9442324638366699, "learning_rate": 1.572163349215362e-05, "loss": 0.3944, "step": 13790 }, { "epoch": 0.3062119177367621, "grad_norm": 1.2820990085601807, "learning_rate": 1.5718773494228572e-05, "loss": 0.6538, "step": 13795 }, { "epoch": 0.3063229042962897, "grad_norm": 1.3526954650878906, "learning_rate": 1.5715912801034223e-05, "loss": 0.5573, "step": 13800 }, { "epoch": 0.30643389085581735, "grad_norm": 2.695969343185425, "learning_rate": 1.5713051412918363e-05, "loss": 0.4623, "step": 13805 }, { "epoch": 0.306544877415345, "grad_norm": 1.3611737489700317, "learning_rate": 1.5710189330228873e-05, "loss": 0.369, "step": 13810 }, { "epoch": 0.30665586397487266, "grad_norm": 1.6916635036468506, "learning_rate": 1.5707326553313714e-05, "loss": 0.5734, "step": 13815 }, { "epoch": 0.3067668505344003, "grad_norm": 1.3984599113464355, "learning_rate": 1.570446308252094e-05, "loss": 0.4552, "step": 13820 }, { "epoch": 0.3068778370939279, "grad_norm": 1.157272219657898, "learning_rate": 1.5701598918198667e-05, "loss": 0.6662, "step": 13825 }, { "epoch": 0.3069888236534556, "grad_norm": 1.7061251401901245, "learning_rate": 1.5698734060695127e-05, "loss": 0.5608, "step": 13830 }, { "epoch": 0.3070998102129832, "grad_norm": 1.0720163583755493, "learning_rate": 1.5695868510358607e-05, "loss": 0.6503, "step": 13835 }, { "epoch": 0.30721079677251084, "grad_norm": 0.9545885324478149, "learning_rate": 1.5693002267537497e-05, "loss": 0.509, "step": 13840 }, { "epoch": 0.30732178333203847, "grad_norm": 1.1842597723007202, "learning_rate": 1.5690135332580266e-05, "loss": 0.2632, "step": 13845 }, { "epoch": 0.30743276989156615, "grad_norm": 1.174399733543396, "learning_rate": 1.5687267705835463e-05, "loss": 0.342, "step": 13850 }, { "epoch": 0.3075437564510938, "grad_norm": 1.443686842918396, "learning_rate": 1.5684399387651725e-05, "loss": 0.5161, "step": 13855 }, { "epoch": 0.3076547430106214, "grad_norm": 0.8438587784767151, "learning_rate": 1.5681530378377777e-05, "loss": 0.3586, "step": 13860 }, { "epoch": 0.3077657295701491, "grad_norm": 0.9586126208305359, "learning_rate": 1.5678660678362416e-05, "loss": 0.4851, "step": 13865 }, { "epoch": 0.3078767161296767, "grad_norm": 1.103760838508606, "learning_rate": 1.5675790287954535e-05, "loss": 0.5001, "step": 13870 }, { "epoch": 0.30798770268920433, "grad_norm": 1.0907628536224365, "learning_rate": 1.5672919207503108e-05, "loss": 0.4651, "step": 13875 }, { "epoch": 0.30809868924873196, "grad_norm": 0.6905645132064819, "learning_rate": 1.5670047437357188e-05, "loss": 0.4394, "step": 13880 }, { "epoch": 0.30820967580825964, "grad_norm": 1.0371253490447998, "learning_rate": 1.5667174977865917e-05, "loss": 0.6306, "step": 13885 }, { "epoch": 0.30832066236778727, "grad_norm": 1.486870288848877, "learning_rate": 1.5664301829378515e-05, "loss": 0.3986, "step": 13890 }, { "epoch": 0.3084316489273149, "grad_norm": 1.2162060737609863, "learning_rate": 1.5661427992244297e-05, "loss": 0.6083, "step": 13895 }, { "epoch": 0.3085426354868425, "grad_norm": 1.528130292892456, "learning_rate": 1.5658553466812652e-05, "loss": 0.5284, "step": 13900 }, { "epoch": 0.3086536220463702, "grad_norm": 1.2805726528167725, "learning_rate": 1.565567825343305e-05, "loss": 0.3335, "step": 13905 }, { "epoch": 0.30876460860589783, "grad_norm": 1.0317713022232056, "learning_rate": 1.5652802352455057e-05, "loss": 0.5244, "step": 13910 }, { "epoch": 0.30887559516542545, "grad_norm": 1.0753859281539917, "learning_rate": 1.564992576422831e-05, "loss": 0.4692, "step": 13915 }, { "epoch": 0.30898658172495314, "grad_norm": 1.336464762687683, "learning_rate": 1.5647048489102535e-05, "loss": 0.4959, "step": 13920 }, { "epoch": 0.30909756828448076, "grad_norm": 1.2710378170013428, "learning_rate": 1.5644170527427545e-05, "loss": 0.377, "step": 13925 }, { "epoch": 0.3092085548440084, "grad_norm": 1.192382574081421, "learning_rate": 1.5641291879553233e-05, "loss": 0.4898, "step": 13930 }, { "epoch": 0.309319541403536, "grad_norm": 1.1654630899429321, "learning_rate": 1.5638412545829575e-05, "loss": 0.4206, "step": 13935 }, { "epoch": 0.3094305279630637, "grad_norm": 1.0210871696472168, "learning_rate": 1.5635532526606625e-05, "loss": 0.5103, "step": 13940 }, { "epoch": 0.3095415145225913, "grad_norm": 1.240159034729004, "learning_rate": 1.5632651822234533e-05, "loss": 0.465, "step": 13945 }, { "epoch": 0.30965250108211895, "grad_norm": 1.2842824459075928, "learning_rate": 1.5629770433063523e-05, "loss": 0.413, "step": 13950 }, { "epoch": 0.3097634876416466, "grad_norm": 1.2681161165237427, "learning_rate": 1.5626888359443905e-05, "loss": 0.5637, "step": 13955 }, { "epoch": 0.30987447420117425, "grad_norm": 1.4573901891708374, "learning_rate": 1.5624005601726068e-05, "loss": 0.5249, "step": 13960 }, { "epoch": 0.3099854607607019, "grad_norm": 1.5680787563323975, "learning_rate": 1.5621122160260496e-05, "loss": 0.5479, "step": 13965 }, { "epoch": 0.3100964473202295, "grad_norm": 1.434630036354065, "learning_rate": 1.561823803539774e-05, "loss": 0.5073, "step": 13970 }, { "epoch": 0.3102074338797572, "grad_norm": 1.25895094871521, "learning_rate": 1.561535322748845e-05, "loss": 0.412, "step": 13975 }, { "epoch": 0.3103184204392848, "grad_norm": 1.2706303596496582, "learning_rate": 1.5612467736883343e-05, "loss": 0.5886, "step": 13980 }, { "epoch": 0.31042940699881244, "grad_norm": 1.2274819612503052, "learning_rate": 1.560958156393323e-05, "loss": 0.4487, "step": 13985 }, { "epoch": 0.31054039355834007, "grad_norm": 0.989825427532196, "learning_rate": 1.5606694708989007e-05, "loss": 0.4792, "step": 13990 }, { "epoch": 0.31065138011786775, "grad_norm": 1.1437995433807373, "learning_rate": 1.5603807172401644e-05, "loss": 0.4597, "step": 13995 }, { "epoch": 0.3107623666773954, "grad_norm": 1.5155508518218994, "learning_rate": 1.5600918954522198e-05, "loss": 0.5427, "step": 14000 }, { "epoch": 0.310873353236923, "grad_norm": 1.084276556968689, "learning_rate": 1.559803005570181e-05, "loss": 0.512, "step": 14005 }, { "epoch": 0.3109843397964506, "grad_norm": 0.9007726311683655, "learning_rate": 1.55951404762917e-05, "loss": 0.4989, "step": 14010 }, { "epoch": 0.3110953263559783, "grad_norm": 1.1757463216781616, "learning_rate": 1.559225021664318e-05, "loss": 0.4425, "step": 14015 }, { "epoch": 0.31120631291550593, "grad_norm": 2.251450538635254, "learning_rate": 1.558935927710763e-05, "loss": 0.5534, "step": 14020 }, { "epoch": 0.31131729947503356, "grad_norm": 1.3141568899154663, "learning_rate": 1.5586467658036526e-05, "loss": 0.5211, "step": 14025 }, { "epoch": 0.31142828603456124, "grad_norm": 1.1067291498184204, "learning_rate": 1.558357535978142e-05, "loss": 0.4725, "step": 14030 }, { "epoch": 0.31153927259408887, "grad_norm": 1.132810115814209, "learning_rate": 1.5580682382693947e-05, "loss": 0.4732, "step": 14035 }, { "epoch": 0.3116502591536165, "grad_norm": 0.9449180364608765, "learning_rate": 1.5577788727125824e-05, "loss": 0.4662, "step": 14040 }, { "epoch": 0.3117612457131441, "grad_norm": 1.2642930746078491, "learning_rate": 1.5574894393428856e-05, "loss": 0.5886, "step": 14045 }, { "epoch": 0.3118722322726718, "grad_norm": 1.315994381904602, "learning_rate": 1.5571999381954925e-05, "loss": 0.4998, "step": 14050 }, { "epoch": 0.3119832188321994, "grad_norm": 1.6780071258544922, "learning_rate": 1.5569103693055996e-05, "loss": 0.3815, "step": 14055 }, { "epoch": 0.31209420539172705, "grad_norm": 0.7860261797904968, "learning_rate": 1.5566207327084116e-05, "loss": 0.3847, "step": 14060 }, { "epoch": 0.3122051919512547, "grad_norm": 1.3429057598114014, "learning_rate": 1.556331028439142e-05, "loss": 0.4024, "step": 14065 }, { "epoch": 0.31231617851078236, "grad_norm": 1.730366587638855, "learning_rate": 1.5560412565330115e-05, "loss": 0.3026, "step": 14070 }, { "epoch": 0.31242716507031, "grad_norm": 1.083951711654663, "learning_rate": 1.5557514170252497e-05, "loss": 0.4883, "step": 14075 }, { "epoch": 0.3125381516298376, "grad_norm": 1.4283391237258911, "learning_rate": 1.5554615099510945e-05, "loss": 0.4442, "step": 14080 }, { "epoch": 0.3126491381893653, "grad_norm": 1.7201144695281982, "learning_rate": 1.5551715353457918e-05, "loss": 0.5614, "step": 14085 }, { "epoch": 0.3127601247488929, "grad_norm": 1.1675817966461182, "learning_rate": 1.5548814932445958e-05, "loss": 0.3333, "step": 14090 }, { "epoch": 0.31287111130842055, "grad_norm": 1.1178109645843506, "learning_rate": 1.554591383682769e-05, "loss": 0.5362, "step": 14095 }, { "epoch": 0.3129820978679482, "grad_norm": 0.872790515422821, "learning_rate": 1.5543012066955816e-05, "loss": 0.3317, "step": 14100 }, { "epoch": 0.31309308442747585, "grad_norm": 1.2365795373916626, "learning_rate": 1.5540109623183127e-05, "loss": 0.4653, "step": 14105 }, { "epoch": 0.3132040709870035, "grad_norm": 1.199913501739502, "learning_rate": 1.5537206505862486e-05, "loss": 0.5698, "step": 14110 }, { "epoch": 0.3133150575465311, "grad_norm": 1.396148443222046, "learning_rate": 1.553430271534685e-05, "loss": 0.5484, "step": 14115 }, { "epoch": 0.31342604410605873, "grad_norm": 0.9446309208869934, "learning_rate": 1.553139825198925e-05, "loss": 0.5779, "step": 14120 }, { "epoch": 0.3135370306655864, "grad_norm": 1.3726730346679688, "learning_rate": 1.55284931161428e-05, "loss": 0.3918, "step": 14125 }, { "epoch": 0.31364801722511404, "grad_norm": 1.2780450582504272, "learning_rate": 1.55255873081607e-05, "loss": 0.3834, "step": 14130 }, { "epoch": 0.31375900378464167, "grad_norm": 1.1577582359313965, "learning_rate": 1.5522680828396225e-05, "loss": 0.3983, "step": 14135 }, { "epoch": 0.31386999034416935, "grad_norm": 1.1583194732666016, "learning_rate": 1.551977367720274e-05, "loss": 0.4249, "step": 14140 }, { "epoch": 0.313980976903697, "grad_norm": 0.9807047247886658, "learning_rate": 1.551686585493368e-05, "loss": 0.4428, "step": 14145 }, { "epoch": 0.3140919634632246, "grad_norm": 1.1752185821533203, "learning_rate": 1.5513957361942572e-05, "loss": 0.3854, "step": 14150 }, { "epoch": 0.3142029500227522, "grad_norm": 1.083425760269165, "learning_rate": 1.551104819858302e-05, "loss": 0.4976, "step": 14155 }, { "epoch": 0.3143139365822799, "grad_norm": 1.3348230123519897, "learning_rate": 1.550813836520871e-05, "loss": 0.4052, "step": 14160 }, { "epoch": 0.31442492314180753, "grad_norm": 1.4912233352661133, "learning_rate": 1.5505227862173416e-05, "loss": 0.4185, "step": 14165 }, { "epoch": 0.31453590970133516, "grad_norm": 1.264873743057251, "learning_rate": 1.5502316689830977e-05, "loss": 0.4244, "step": 14170 }, { "epoch": 0.3146468962608628, "grad_norm": 1.6147271394729614, "learning_rate": 1.5499404848535323e-05, "loss": 0.6673, "step": 14175 }, { "epoch": 0.31475788282039047, "grad_norm": 0.8825428485870361, "learning_rate": 1.549649233864048e-05, "loss": 0.5818, "step": 14180 }, { "epoch": 0.3148688693799181, "grad_norm": 1.313036561012268, "learning_rate": 1.549357916050053e-05, "loss": 0.5293, "step": 14185 }, { "epoch": 0.3149798559394457, "grad_norm": 1.228447437286377, "learning_rate": 1.5490665314469647e-05, "loss": 0.6636, "step": 14190 }, { "epoch": 0.3150908424989734, "grad_norm": 0.9629234671592712, "learning_rate": 1.5487750800902094e-05, "loss": 0.4886, "step": 14195 }, { "epoch": 0.315201829058501, "grad_norm": 0.9011967182159424, "learning_rate": 1.5484835620152198e-05, "loss": 0.4487, "step": 14200 }, { "epoch": 0.31531281561802865, "grad_norm": 0.8641685843467712, "learning_rate": 1.5481919772574384e-05, "loss": 0.518, "step": 14205 }, { "epoch": 0.3154238021775563, "grad_norm": 0.7143692374229431, "learning_rate": 1.547900325852315e-05, "loss": 0.3314, "step": 14210 }, { "epoch": 0.31553478873708396, "grad_norm": 1.0830315351486206, "learning_rate": 1.5476086078353073e-05, "loss": 0.6533, "step": 14215 }, { "epoch": 0.3156457752966116, "grad_norm": 1.2461847066879272, "learning_rate": 1.547316823241882e-05, "loss": 0.5277, "step": 14220 }, { "epoch": 0.3157567618561392, "grad_norm": 1.260133147239685, "learning_rate": 1.5470249721075123e-05, "loss": 0.3431, "step": 14225 }, { "epoch": 0.31586774841566684, "grad_norm": 1.3078408241271973, "learning_rate": 1.5467330544676814e-05, "loss": 0.3828, "step": 14230 }, { "epoch": 0.3159787349751945, "grad_norm": 1.5358682870864868, "learning_rate": 1.5464410703578788e-05, "loss": 0.5465, "step": 14235 }, { "epoch": 0.31608972153472215, "grad_norm": 1.4887757301330566, "learning_rate": 1.5461490198136043e-05, "loss": 0.4876, "step": 14240 }, { "epoch": 0.31620070809424977, "grad_norm": 1.0123707056045532, "learning_rate": 1.5458569028703632e-05, "loss": 0.5419, "step": 14245 }, { "epoch": 0.31631169465377745, "grad_norm": 1.4290823936462402, "learning_rate": 1.5455647195636706e-05, "loss": 0.5567, "step": 14250 }, { "epoch": 0.3164226812133051, "grad_norm": 1.0080236196517944, "learning_rate": 1.5452724699290494e-05, "loss": 0.4064, "step": 14255 }, { "epoch": 0.3165336677728327, "grad_norm": 1.5305603742599487, "learning_rate": 1.5449801540020294e-05, "loss": 0.3978, "step": 14260 }, { "epoch": 0.31664465433236033, "grad_norm": 1.0586655139923096, "learning_rate": 1.5446877718181502e-05, "loss": 0.3429, "step": 14265 }, { "epoch": 0.316755640891888, "grad_norm": 1.0884453058242798, "learning_rate": 1.5443953234129588e-05, "loss": 0.5679, "step": 14270 }, { "epoch": 0.31686662745141564, "grad_norm": 1.7376161813735962, "learning_rate": 1.5441028088220094e-05, "loss": 0.4128, "step": 14275 }, { "epoch": 0.31697761401094326, "grad_norm": 1.216302752494812, "learning_rate": 1.5438102280808653e-05, "loss": 0.4103, "step": 14280 }, { "epoch": 0.3170886005704709, "grad_norm": 1.2784180641174316, "learning_rate": 1.5435175812250975e-05, "loss": 0.567, "step": 14285 }, { "epoch": 0.3171995871299986, "grad_norm": 1.0405032634735107, "learning_rate": 1.543224868290285e-05, "loss": 0.4103, "step": 14290 }, { "epoch": 0.3173105736895262, "grad_norm": 0.7695423364639282, "learning_rate": 1.542932089312015e-05, "loss": 0.4365, "step": 14295 }, { "epoch": 0.3174215602490538, "grad_norm": 1.6393778324127197, "learning_rate": 1.5426392443258823e-05, "loss": 0.5539, "step": 14300 }, { "epoch": 0.3175325468085815, "grad_norm": 1.1428499221801758, "learning_rate": 1.54234633336749e-05, "loss": 0.3819, "step": 14305 }, { "epoch": 0.31764353336810913, "grad_norm": 2.0584399700164795, "learning_rate": 1.5420533564724495e-05, "loss": 0.5535, "step": 14310 }, { "epoch": 0.31775451992763676, "grad_norm": 0.7380337715148926, "learning_rate": 1.5417603136763797e-05, "loss": 0.524, "step": 14315 }, { "epoch": 0.3178655064871644, "grad_norm": 1.0620688199996948, "learning_rate": 1.5414672050149084e-05, "loss": 0.4992, "step": 14320 }, { "epoch": 0.31797649304669207, "grad_norm": 1.6038435697555542, "learning_rate": 1.5411740305236698e-05, "loss": 0.5696, "step": 14325 }, { "epoch": 0.3180874796062197, "grad_norm": 1.363045573234558, "learning_rate": 1.5408807902383074e-05, "loss": 0.6078, "step": 14330 }, { "epoch": 0.3181984661657473, "grad_norm": 1.191996693611145, "learning_rate": 1.540587484194473e-05, "loss": 0.5009, "step": 14335 }, { "epoch": 0.31830945272527494, "grad_norm": 1.5661468505859375, "learning_rate": 1.540294112427825e-05, "loss": 0.3821, "step": 14340 }, { "epoch": 0.3184204392848026, "grad_norm": 0.9835692644119263, "learning_rate": 1.5400006749740305e-05, "loss": 0.4166, "step": 14345 }, { "epoch": 0.31853142584433025, "grad_norm": 1.3363474607467651, "learning_rate": 1.539707171868765e-05, "loss": 0.349, "step": 14350 }, { "epoch": 0.3186424124038579, "grad_norm": 1.2180577516555786, "learning_rate": 1.539413603147712e-05, "loss": 0.4327, "step": 14355 }, { "epoch": 0.31875339896338556, "grad_norm": 1.1217113733291626, "learning_rate": 1.539119968846562e-05, "loss": 0.4633, "step": 14360 }, { "epoch": 0.3188643855229132, "grad_norm": 1.0430909395217896, "learning_rate": 1.538826269001014e-05, "loss": 0.5603, "step": 14365 }, { "epoch": 0.3189753720824408, "grad_norm": 1.2054392099380493, "learning_rate": 1.538532503646776e-05, "loss": 0.6168, "step": 14370 }, { "epoch": 0.31908635864196844, "grad_norm": 1.1659835577011108, "learning_rate": 1.5382386728195616e-05, "loss": 0.4366, "step": 14375 }, { "epoch": 0.3191973452014961, "grad_norm": 1.2844847440719604, "learning_rate": 1.537944776555095e-05, "loss": 0.5524, "step": 14380 }, { "epoch": 0.31930833176102374, "grad_norm": 0.9078483581542969, "learning_rate": 1.537650814889106e-05, "loss": 0.4722, "step": 14385 }, { "epoch": 0.31941931832055137, "grad_norm": 1.4450150728225708, "learning_rate": 1.5373567878573345e-05, "loss": 0.3376, "step": 14390 }, { "epoch": 0.319530304880079, "grad_norm": 1.5254474878311157, "learning_rate": 1.5370626954955268e-05, "loss": 0.3074, "step": 14395 }, { "epoch": 0.3196412914396067, "grad_norm": 1.281997561454773, "learning_rate": 1.5367685378394376e-05, "loss": 0.5479, "step": 14400 }, { "epoch": 0.3197522779991343, "grad_norm": 1.493674874305725, "learning_rate": 1.53647431492483e-05, "loss": 0.3875, "step": 14405 }, { "epoch": 0.31986326455866193, "grad_norm": 1.1070647239685059, "learning_rate": 1.536180026787474e-05, "loss": 0.3339, "step": 14410 }, { "epoch": 0.3199742511181896, "grad_norm": 2.0354061126708984, "learning_rate": 1.5358856734631488e-05, "loss": 0.4553, "step": 14415 }, { "epoch": 0.32008523767771724, "grad_norm": 1.0139501094818115, "learning_rate": 1.5355912549876408e-05, "loss": 0.5503, "step": 14420 }, { "epoch": 0.32019622423724486, "grad_norm": 1.764954924583435, "learning_rate": 1.5352967713967442e-05, "loss": 0.4297, "step": 14425 }, { "epoch": 0.3203072107967725, "grad_norm": 1.7344118356704712, "learning_rate": 1.5350022227262613e-05, "loss": 0.4172, "step": 14430 }, { "epoch": 0.32041819735630017, "grad_norm": 0.7399135828018188, "learning_rate": 1.5347076090120025e-05, "loss": 0.5299, "step": 14435 }, { "epoch": 0.3205291839158278, "grad_norm": 1.514784812927246, "learning_rate": 1.5344129302897857e-05, "loss": 0.6035, "step": 14440 }, { "epoch": 0.3206401704753554, "grad_norm": 2.186124801635742, "learning_rate": 1.5341181865954372e-05, "loss": 0.3716, "step": 14445 }, { "epoch": 0.32075115703488305, "grad_norm": 1.0691059827804565, "learning_rate": 1.533823377964791e-05, "loss": 0.4006, "step": 14450 }, { "epoch": 0.32086214359441073, "grad_norm": 1.0893067121505737, "learning_rate": 1.5335285044336887e-05, "loss": 0.4, "step": 14455 }, { "epoch": 0.32097313015393836, "grad_norm": 1.1478650569915771, "learning_rate": 1.53323356603798e-05, "loss": 0.4683, "step": 14460 }, { "epoch": 0.321084116713466, "grad_norm": 1.1583954095840454, "learning_rate": 1.5329385628135227e-05, "loss": 0.5464, "step": 14465 }, { "epoch": 0.32119510327299367, "grad_norm": 1.4609736204147339, "learning_rate": 1.5326434947961825e-05, "loss": 0.4552, "step": 14470 }, { "epoch": 0.3213060898325213, "grad_norm": 1.2604814767837524, "learning_rate": 1.5323483620218324e-05, "loss": 0.3968, "step": 14475 }, { "epoch": 0.3214170763920489, "grad_norm": 0.8882197737693787, "learning_rate": 1.5320531645263538e-05, "loss": 0.5652, "step": 14480 }, { "epoch": 0.32152806295157654, "grad_norm": 1.5108416080474854, "learning_rate": 1.5317579023456355e-05, "loss": 0.3902, "step": 14485 }, { "epoch": 0.3216390495111042, "grad_norm": 1.3611268997192383, "learning_rate": 1.5314625755155753e-05, "loss": 0.401, "step": 14490 }, { "epoch": 0.32175003607063185, "grad_norm": 1.0313974618911743, "learning_rate": 1.5311671840720775e-05, "loss": 0.2965, "step": 14495 }, { "epoch": 0.3218610226301595, "grad_norm": 0.9944019913673401, "learning_rate": 1.5308717280510547e-05, "loss": 0.5465, "step": 14500 }, { "epoch": 0.3219720091896871, "grad_norm": 1.3820387125015259, "learning_rate": 1.5305762074884276e-05, "loss": 0.4807, "step": 14505 }, { "epoch": 0.3220829957492148, "grad_norm": 1.0571720600128174, "learning_rate": 1.5302806224201247e-05, "loss": 0.5615, "step": 14510 }, { "epoch": 0.3221939823087424, "grad_norm": 1.5374435186386108, "learning_rate": 1.529984972882082e-05, "loss": 0.5059, "step": 14515 }, { "epoch": 0.32230496886827004, "grad_norm": 1.101186752319336, "learning_rate": 1.529689258910244e-05, "loss": 0.3979, "step": 14520 }, { "epoch": 0.3224159554277977, "grad_norm": 1.292052984237671, "learning_rate": 1.529393480540562e-05, "loss": 0.442, "step": 14525 }, { "epoch": 0.32252694198732534, "grad_norm": 0.9494848251342773, "learning_rate": 1.5290976378089962e-05, "loss": 0.4632, "step": 14530 }, { "epoch": 0.32263792854685297, "grad_norm": 1.0273785591125488, "learning_rate": 1.5288017307515142e-05, "loss": 0.2587, "step": 14535 }, { "epoch": 0.3227489151063806, "grad_norm": 1.0562678575515747, "learning_rate": 1.5285057594040912e-05, "loss": 0.4169, "step": 14540 }, { "epoch": 0.3228599016659083, "grad_norm": 3.380312204360962, "learning_rate": 1.5282097238027106e-05, "loss": 0.3716, "step": 14545 }, { "epoch": 0.3229708882254359, "grad_norm": 1.0881527662277222, "learning_rate": 1.527913623983363e-05, "loss": 0.5425, "step": 14550 }, { "epoch": 0.32308187478496353, "grad_norm": 1.6390355825424194, "learning_rate": 1.5276174599820476e-05, "loss": 0.6704, "step": 14555 }, { "epoch": 0.32319286134449116, "grad_norm": 2.2281415462493896, "learning_rate": 1.527321231834771e-05, "loss": 0.4503, "step": 14560 }, { "epoch": 0.32330384790401884, "grad_norm": 0.8207435011863708, "learning_rate": 1.5270249395775473e-05, "loss": 0.357, "step": 14565 }, { "epoch": 0.32341483446354646, "grad_norm": 1.023201823234558, "learning_rate": 1.526728583246399e-05, "loss": 0.436, "step": 14570 }, { "epoch": 0.3235258210230741, "grad_norm": 0.9416097402572632, "learning_rate": 1.526432162877356e-05, "loss": 0.5039, "step": 14575 }, { "epoch": 0.32363680758260177, "grad_norm": 1.2997766733169556, "learning_rate": 1.526135678506456e-05, "loss": 0.437, "step": 14580 }, { "epoch": 0.3237477941421294, "grad_norm": 1.6290065050125122, "learning_rate": 1.5258391301697452e-05, "loss": 0.4522, "step": 14585 }, { "epoch": 0.323858780701657, "grad_norm": 1.3406147956848145, "learning_rate": 1.5255425179032763e-05, "loss": 0.4906, "step": 14590 }, { "epoch": 0.32396976726118465, "grad_norm": 1.1843397617340088, "learning_rate": 1.5252458417431106e-05, "loss": 0.5041, "step": 14595 }, { "epoch": 0.32408075382071233, "grad_norm": 1.036899209022522, "learning_rate": 1.5249491017253166e-05, "loss": 0.5141, "step": 14600 }, { "epoch": 0.32419174038023996, "grad_norm": 1.191863775253296, "learning_rate": 1.524652297885972e-05, "loss": 0.6475, "step": 14605 }, { "epoch": 0.3243027269397676, "grad_norm": 1.0216777324676514, "learning_rate": 1.52435543026116e-05, "loss": 0.4453, "step": 14610 }, { "epoch": 0.3244137134992952, "grad_norm": 1.1279789209365845, "learning_rate": 1.5240584988869738e-05, "loss": 0.3538, "step": 14615 }, { "epoch": 0.3245247000588229, "grad_norm": 1.2324609756469727, "learning_rate": 1.5237615037995129e-05, "loss": 0.2331, "step": 14620 }, { "epoch": 0.3246356866183505, "grad_norm": 0.8681323528289795, "learning_rate": 1.5234644450348848e-05, "loss": 0.3599, "step": 14625 }, { "epoch": 0.32474667317787814, "grad_norm": 0.8179816603660583, "learning_rate": 1.5231673226292048e-05, "loss": 0.4646, "step": 14630 }, { "epoch": 0.3248576597374058, "grad_norm": 0.84742271900177, "learning_rate": 1.5228701366185963e-05, "loss": 0.466, "step": 14635 }, { "epoch": 0.32496864629693345, "grad_norm": 1.1196081638336182, "learning_rate": 1.5225728870391902e-05, "loss": 0.6054, "step": 14640 }, { "epoch": 0.3250796328564611, "grad_norm": 0.9078447818756104, "learning_rate": 1.522275573927125e-05, "loss": 0.4472, "step": 14645 }, { "epoch": 0.3251906194159887, "grad_norm": 1.1502723693847656, "learning_rate": 1.5219781973185477e-05, "loss": 0.4347, "step": 14650 }, { "epoch": 0.3253016059755164, "grad_norm": 0.9937777519226074, "learning_rate": 1.521680757249611e-05, "loss": 0.5165, "step": 14655 }, { "epoch": 0.325412592535044, "grad_norm": 1.1031843423843384, "learning_rate": 1.5213832537564778e-05, "loss": 0.6251, "step": 14660 }, { "epoch": 0.32552357909457164, "grad_norm": 1.3701297044754028, "learning_rate": 1.5210856868753173e-05, "loss": 0.6917, "step": 14665 }, { "epoch": 0.32563456565409926, "grad_norm": 0.9994934797286987, "learning_rate": 1.5207880566423064e-05, "loss": 0.4558, "step": 14670 }, { "epoch": 0.32574555221362694, "grad_norm": 1.2605825662612915, "learning_rate": 1.5204903630936301e-05, "loss": 0.365, "step": 14675 }, { "epoch": 0.32585653877315457, "grad_norm": 1.5471420288085938, "learning_rate": 1.5201926062654812e-05, "loss": 0.3223, "step": 14680 }, { "epoch": 0.3259675253326822, "grad_norm": 1.0369585752487183, "learning_rate": 1.5198947861940596e-05, "loss": 0.4513, "step": 14685 }, { "epoch": 0.3260785118922099, "grad_norm": 1.1226929426193237, "learning_rate": 1.5195969029155735e-05, "loss": 0.5279, "step": 14690 }, { "epoch": 0.3261894984517375, "grad_norm": 2.179290294647217, "learning_rate": 1.5192989564662388e-05, "loss": 0.5105, "step": 14695 }, { "epoch": 0.32630048501126513, "grad_norm": 1.558292269706726, "learning_rate": 1.5190009468822782e-05, "loss": 0.6155, "step": 14700 }, { "epoch": 0.32641147157079275, "grad_norm": 0.9227891564369202, "learning_rate": 1.5187028741999234e-05, "loss": 0.5226, "step": 14705 }, { "epoch": 0.32652245813032044, "grad_norm": 1.2568672895431519, "learning_rate": 1.5184047384554128e-05, "loss": 0.3348, "step": 14710 }, { "epoch": 0.32663344468984806, "grad_norm": 0.7160055637359619, "learning_rate": 1.5181065396849924e-05, "loss": 0.4504, "step": 14715 }, { "epoch": 0.3267444312493757, "grad_norm": 0.9879772663116455, "learning_rate": 1.5178082779249166e-05, "loss": 0.4325, "step": 14720 }, { "epoch": 0.3268554178089033, "grad_norm": 0.9792838096618652, "learning_rate": 1.5175099532114468e-05, "loss": 0.451, "step": 14725 }, { "epoch": 0.326966404368431, "grad_norm": 1.3058290481567383, "learning_rate": 1.5172115655808527e-05, "loss": 0.5641, "step": 14730 }, { "epoch": 0.3270773909279586, "grad_norm": 1.0594699382781982, "learning_rate": 1.5169131150694112e-05, "loss": 0.4265, "step": 14735 }, { "epoch": 0.32718837748748625, "grad_norm": 1.0784043073654175, "learning_rate": 1.5166146017134063e-05, "loss": 0.4794, "step": 14740 }, { "epoch": 0.32729936404701393, "grad_norm": 1.2410465478897095, "learning_rate": 1.5163160255491312e-05, "loss": 0.5655, "step": 14745 }, { "epoch": 0.32741035060654156, "grad_norm": 1.521320104598999, "learning_rate": 1.5160173866128848e-05, "loss": 0.5817, "step": 14750 }, { "epoch": 0.3275213371660692, "grad_norm": 1.2554552555084229, "learning_rate": 1.5157186849409755e-05, "loss": 0.4564, "step": 14755 }, { "epoch": 0.3276323237255968, "grad_norm": 1.2104326486587524, "learning_rate": 1.515419920569718e-05, "loss": 0.6301, "step": 14760 }, { "epoch": 0.3277433102851245, "grad_norm": 1.1501840353012085, "learning_rate": 1.5151210935354352e-05, "loss": 0.5422, "step": 14765 }, { "epoch": 0.3278542968446521, "grad_norm": 1.3156615495681763, "learning_rate": 1.5148222038744571e-05, "loss": 0.6681, "step": 14770 }, { "epoch": 0.32796528340417974, "grad_norm": 1.120080590248108, "learning_rate": 1.5145232516231226e-05, "loss": 0.4336, "step": 14775 }, { "epoch": 0.32807626996370737, "grad_norm": 1.1407369375228882, "learning_rate": 1.5142242368177762e-05, "loss": 0.6098, "step": 14780 }, { "epoch": 0.32818725652323505, "grad_norm": 0.8843163251876831, "learning_rate": 1.5139251594947721e-05, "loss": 0.4413, "step": 14785 }, { "epoch": 0.3282982430827627, "grad_norm": 1.3494120836257935, "learning_rate": 1.5136260196904704e-05, "loss": 0.3554, "step": 14790 }, { "epoch": 0.3284092296422903, "grad_norm": 1.1606281995773315, "learning_rate": 1.5133268174412399e-05, "loss": 0.3569, "step": 14795 }, { "epoch": 0.328520216201818, "grad_norm": 1.158096194267273, "learning_rate": 1.5130275527834566e-05, "loss": 0.606, "step": 14800 }, { "epoch": 0.3286312027613456, "grad_norm": 1.2391105890274048, "learning_rate": 1.5127282257535037e-05, "loss": 0.3439, "step": 14805 }, { "epoch": 0.32874218932087323, "grad_norm": 1.2935023307800293, "learning_rate": 1.5124288363877728e-05, "loss": 0.4074, "step": 14810 }, { "epoch": 0.32885317588040086, "grad_norm": 1.0960984230041504, "learning_rate": 1.5121293847226626e-05, "loss": 0.5407, "step": 14815 }, { "epoch": 0.32896416243992854, "grad_norm": 1.383482575416565, "learning_rate": 1.5118298707945794e-05, "loss": 0.4033, "step": 14820 }, { "epoch": 0.32907514899945617, "grad_norm": 1.285130262374878, "learning_rate": 1.5115302946399368e-05, "loss": 0.5377, "step": 14825 }, { "epoch": 0.3291861355589838, "grad_norm": 1.1885371208190918, "learning_rate": 1.5112306562951569e-05, "loss": 0.4775, "step": 14830 }, { "epoch": 0.3292971221185114, "grad_norm": 0.8962212204933167, "learning_rate": 1.510930955796668e-05, "loss": 0.2948, "step": 14835 }, { "epoch": 0.3294081086780391, "grad_norm": 1.2280473709106445, "learning_rate": 1.510631193180907e-05, "loss": 0.4923, "step": 14840 }, { "epoch": 0.32951909523756673, "grad_norm": 1.0373233556747437, "learning_rate": 1.5103313684843184e-05, "loss": 0.6092, "step": 14845 }, { "epoch": 0.32963008179709435, "grad_norm": 1.2166337966918945, "learning_rate": 1.5100314817433535e-05, "loss": 0.624, "step": 14850 }, { "epoch": 0.32974106835662204, "grad_norm": 1.1595144271850586, "learning_rate": 1.5097315329944711e-05, "loss": 0.5713, "step": 14855 }, { "epoch": 0.32985205491614966, "grad_norm": 1.23126220703125, "learning_rate": 1.5094315222741388e-05, "loss": 0.6027, "step": 14860 }, { "epoch": 0.3299630414756773, "grad_norm": 0.8551965951919556, "learning_rate": 1.5091314496188303e-05, "loss": 0.4671, "step": 14865 }, { "epoch": 0.3300740280352049, "grad_norm": 1.1392122507095337, "learning_rate": 1.5088313150650278e-05, "loss": 0.426, "step": 14870 }, { "epoch": 0.3301850145947326, "grad_norm": 1.1065609455108643, "learning_rate": 1.5085311186492206e-05, "loss": 0.3221, "step": 14875 }, { "epoch": 0.3302960011542602, "grad_norm": 2.416872262954712, "learning_rate": 1.5082308604079054e-05, "loss": 0.3214, "step": 14880 }, { "epoch": 0.33040698771378785, "grad_norm": 0.9736378788948059, "learning_rate": 1.5079305403775866e-05, "loss": 0.483, "step": 14885 }, { "epoch": 0.33051797427331553, "grad_norm": 1.625980019569397, "learning_rate": 1.5076301585947763e-05, "loss": 0.4877, "step": 14890 }, { "epoch": 0.33062896083284316, "grad_norm": 1.5329805612564087, "learning_rate": 1.5073297150959935e-05, "loss": 0.4652, "step": 14895 }, { "epoch": 0.3307399473923708, "grad_norm": 0.7575278282165527, "learning_rate": 1.5070292099177656e-05, "loss": 0.5156, "step": 14900 }, { "epoch": 0.3308509339518984, "grad_norm": 0.901342511177063, "learning_rate": 1.506728643096627e-05, "loss": 0.471, "step": 14905 }, { "epoch": 0.3309619205114261, "grad_norm": 1.2140843868255615, "learning_rate": 1.506428014669119e-05, "loss": 0.4416, "step": 14910 }, { "epoch": 0.3310729070709537, "grad_norm": 0.9275099635124207, "learning_rate": 1.5061273246717918e-05, "loss": 0.4653, "step": 14915 }, { "epoch": 0.33118389363048134, "grad_norm": 1.206339716911316, "learning_rate": 1.5058265731412017e-05, "loss": 0.5535, "step": 14920 }, { "epoch": 0.33129488019000897, "grad_norm": 1.1294726133346558, "learning_rate": 1.5055257601139132e-05, "loss": 0.2508, "step": 14925 }, { "epoch": 0.33140586674953665, "grad_norm": 1.9953529834747314, "learning_rate": 1.5052248856264982e-05, "loss": 0.5702, "step": 14930 }, { "epoch": 0.3315168533090643, "grad_norm": 1.2275333404541016, "learning_rate": 1.504923949715536e-05, "loss": 0.5817, "step": 14935 }, { "epoch": 0.3316278398685919, "grad_norm": 0.9535608887672424, "learning_rate": 1.5046229524176132e-05, "loss": 0.4339, "step": 14940 }, { "epoch": 0.3317388264281196, "grad_norm": 1.269870400428772, "learning_rate": 1.5043218937693245e-05, "loss": 0.5943, "step": 14945 }, { "epoch": 0.3318498129876472, "grad_norm": 1.8588420152664185, "learning_rate": 1.5040207738072714e-05, "loss": 0.4375, "step": 14950 }, { "epoch": 0.33196079954717483, "grad_norm": 1.0522677898406982, "learning_rate": 1.5037195925680626e-05, "loss": 0.3727, "step": 14955 }, { "epoch": 0.33207178610670246, "grad_norm": 1.5340313911437988, "learning_rate": 1.5034183500883153e-05, "loss": 0.4697, "step": 14960 }, { "epoch": 0.33218277266623014, "grad_norm": 0.8807029128074646, "learning_rate": 1.5031170464046532e-05, "loss": 0.437, "step": 14965 }, { "epoch": 0.33229375922575777, "grad_norm": 1.765564203262329, "learning_rate": 1.5028156815537083e-05, "loss": 0.3982, "step": 14970 }, { "epoch": 0.3324047457852854, "grad_norm": 1.0525873899459839, "learning_rate": 1.5025142555721189e-05, "loss": 0.5576, "step": 14975 }, { "epoch": 0.332515732344813, "grad_norm": 1.1160537004470825, "learning_rate": 1.5022127684965316e-05, "loss": 0.5933, "step": 14980 }, { "epoch": 0.3326267189043407, "grad_norm": 1.0868256092071533, "learning_rate": 1.5019112203636002e-05, "loss": 0.5242, "step": 14985 }, { "epoch": 0.3327377054638683, "grad_norm": 0.989216685295105, "learning_rate": 1.5016096112099858e-05, "loss": 0.6588, "step": 14990 }, { "epoch": 0.33284869202339595, "grad_norm": 1.5227062702178955, "learning_rate": 1.501307941072357e-05, "loss": 0.4789, "step": 14995 }, { "epoch": 0.33295967858292363, "grad_norm": 0.7849462628364563, "learning_rate": 1.5010062099873904e-05, "loss": 0.4131, "step": 15000 }, { "epoch": 0.33307066514245126, "grad_norm": 1.0590498447418213, "learning_rate": 1.5007044179917686e-05, "loss": 0.5614, "step": 15005 }, { "epoch": 0.3331816517019789, "grad_norm": 1.6085760593414307, "learning_rate": 1.5004025651221833e-05, "loss": 0.4746, "step": 15010 }, { "epoch": 0.3332926382615065, "grad_norm": 1.1510807275772095, "learning_rate": 1.5001006514153323e-05, "loss": 0.5765, "step": 15015 }, { "epoch": 0.3334036248210342, "grad_norm": 1.6156750917434692, "learning_rate": 1.4997986769079212e-05, "loss": 0.4793, "step": 15020 }, { "epoch": 0.3335146113805618, "grad_norm": 1.2351129055023193, "learning_rate": 1.499496641636663e-05, "loss": 0.3932, "step": 15025 }, { "epoch": 0.33362559794008945, "grad_norm": 0.9428719282150269, "learning_rate": 1.4991945456382784e-05, "loss": 0.5519, "step": 15030 }, { "epoch": 0.3337365844996171, "grad_norm": 1.2243869304656982, "learning_rate": 1.4988923889494952e-05, "loss": 0.2783, "step": 15035 }, { "epoch": 0.33384757105914475, "grad_norm": 1.2313982248306274, "learning_rate": 1.4985901716070486e-05, "loss": 0.5672, "step": 15040 }, { "epoch": 0.3339585576186724, "grad_norm": 0.9632226228713989, "learning_rate": 1.4982878936476808e-05, "loss": 0.4697, "step": 15045 }, { "epoch": 0.3340695441782, "grad_norm": 1.2731530666351318, "learning_rate": 1.4979855551081422e-05, "loss": 0.4888, "step": 15050 }, { "epoch": 0.3341805307377277, "grad_norm": 1.4409986734390259, "learning_rate": 1.4976831560251901e-05, "loss": 0.3715, "step": 15055 }, { "epoch": 0.3342915172972553, "grad_norm": 0.7494924068450928, "learning_rate": 1.4973806964355886e-05, "loss": 0.3139, "step": 15060 }, { "epoch": 0.33440250385678294, "grad_norm": 1.039964199066162, "learning_rate": 1.4970781763761105e-05, "loss": 0.3249, "step": 15065 }, { "epoch": 0.33451349041631057, "grad_norm": 1.3432955741882324, "learning_rate": 1.4967755958835346e-05, "loss": 0.4206, "step": 15070 }, { "epoch": 0.33462447697583825, "grad_norm": 1.152287483215332, "learning_rate": 1.4964729549946477e-05, "loss": 0.5159, "step": 15075 }, { "epoch": 0.3347354635353659, "grad_norm": 1.3000831604003906, "learning_rate": 1.4961702537462439e-05, "loss": 0.5009, "step": 15080 }, { "epoch": 0.3348464500948935, "grad_norm": 1.213396668434143, "learning_rate": 1.4958674921751248e-05, "loss": 0.3251, "step": 15085 }, { "epoch": 0.3349574366544211, "grad_norm": 1.6604156494140625, "learning_rate": 1.495564670318099e-05, "loss": 0.6201, "step": 15090 }, { "epoch": 0.3350684232139488, "grad_norm": 0.8826848864555359, "learning_rate": 1.4952617882119826e-05, "loss": 0.5158, "step": 15095 }, { "epoch": 0.33517940977347643, "grad_norm": 1.5868339538574219, "learning_rate": 1.4949588458935994e-05, "loss": 0.4477, "step": 15100 }, { "epoch": 0.33529039633300406, "grad_norm": 2.079285144805908, "learning_rate": 1.4946558433997792e-05, "loss": 0.5307, "step": 15105 }, { "epoch": 0.33540138289253174, "grad_norm": 1.3494343757629395, "learning_rate": 1.4943527807673604e-05, "loss": 0.4364, "step": 15110 }, { "epoch": 0.33551236945205937, "grad_norm": 1.2706613540649414, "learning_rate": 1.4940496580331884e-05, "loss": 0.3495, "step": 15115 }, { "epoch": 0.335623356011587, "grad_norm": 1.4336494207382202, "learning_rate": 1.4937464752341163e-05, "loss": 0.5021, "step": 15120 }, { "epoch": 0.3357343425711146, "grad_norm": 1.0443059206008911, "learning_rate": 1.4934432324070033e-05, "loss": 0.5054, "step": 15125 }, { "epoch": 0.3358453291306423, "grad_norm": 1.4348981380462646, "learning_rate": 1.4931399295887172e-05, "loss": 0.585, "step": 15130 }, { "epoch": 0.3359563156901699, "grad_norm": 1.1642272472381592, "learning_rate": 1.4928365668161322e-05, "loss": 0.5025, "step": 15135 }, { "epoch": 0.33606730224969755, "grad_norm": 1.3693106174468994, "learning_rate": 1.4925331441261303e-05, "loss": 0.5125, "step": 15140 }, { "epoch": 0.3361782888092252, "grad_norm": 1.2210627794265747, "learning_rate": 1.4922296615556007e-05, "loss": 0.4841, "step": 15145 }, { "epoch": 0.33628927536875286, "grad_norm": 0.6630014181137085, "learning_rate": 1.4919261191414394e-05, "loss": 0.4659, "step": 15150 }, { "epoch": 0.3364002619282805, "grad_norm": 1.1604273319244385, "learning_rate": 1.4916225169205505e-05, "loss": 0.5551, "step": 15155 }, { "epoch": 0.3365112484878081, "grad_norm": 0.9893293976783752, "learning_rate": 1.4913188549298447e-05, "loss": 0.541, "step": 15160 }, { "epoch": 0.3366222350473358, "grad_norm": 1.03878915309906, "learning_rate": 1.4910151332062404e-05, "loss": 0.4045, "step": 15165 }, { "epoch": 0.3367332216068634, "grad_norm": 1.2045263051986694, "learning_rate": 1.4907113517866629e-05, "loss": 0.556, "step": 15170 }, { "epoch": 0.33684420816639105, "grad_norm": 1.4657323360443115, "learning_rate": 1.4904075107080448e-05, "loss": 0.4307, "step": 15175 }, { "epoch": 0.33695519472591867, "grad_norm": 1.265890121459961, "learning_rate": 1.4901036100073265e-05, "loss": 0.5265, "step": 15180 }, { "epoch": 0.33706618128544635, "grad_norm": 1.269054651260376, "learning_rate": 1.4897996497214548e-05, "loss": 0.3963, "step": 15185 }, { "epoch": 0.337177167844974, "grad_norm": 1.269852876663208, "learning_rate": 1.4894956298873844e-05, "loss": 0.4867, "step": 15190 }, { "epoch": 0.3372881544045016, "grad_norm": 1.182908296585083, "learning_rate": 1.4891915505420768e-05, "loss": 0.3747, "step": 15195 }, { "epoch": 0.33739914096402923, "grad_norm": 1.119896650314331, "learning_rate": 1.4888874117225013e-05, "loss": 0.3979, "step": 15200 }, { "epoch": 0.3375101275235569, "grad_norm": 1.0146291255950928, "learning_rate": 1.488583213465634e-05, "loss": 0.457, "step": 15205 }, { "epoch": 0.33762111408308454, "grad_norm": 0.9620803594589233, "learning_rate": 1.4882789558084578e-05, "loss": 0.6078, "step": 15210 }, { "epoch": 0.33773210064261217, "grad_norm": 1.4261409044265747, "learning_rate": 1.487974638787964e-05, "loss": 0.449, "step": 15215 }, { "epoch": 0.33784308720213985, "grad_norm": 1.0503255128860474, "learning_rate": 1.48767026244115e-05, "loss": 0.3487, "step": 15220 }, { "epoch": 0.3379540737616675, "grad_norm": 1.3295342922210693, "learning_rate": 1.487365826805021e-05, "loss": 0.5331, "step": 15225 }, { "epoch": 0.3380650603211951, "grad_norm": 1.5140436887741089, "learning_rate": 1.4870613319165894e-05, "loss": 0.61, "step": 15230 }, { "epoch": 0.3381760468807227, "grad_norm": 1.2723734378814697, "learning_rate": 1.4867567778128744e-05, "loss": 0.532, "step": 15235 }, { "epoch": 0.3382870334402504, "grad_norm": 0.8640614151954651, "learning_rate": 1.4864521645309031e-05, "loss": 0.4707, "step": 15240 }, { "epoch": 0.33839801999977803, "grad_norm": 1.4589073657989502, "learning_rate": 1.4861474921077088e-05, "loss": 0.5183, "step": 15245 }, { "epoch": 0.33850900655930566, "grad_norm": 0.8808674812316895, "learning_rate": 1.485842760580333e-05, "loss": 0.6101, "step": 15250 }, { "epoch": 0.3386199931188333, "grad_norm": 1.252490520477295, "learning_rate": 1.4855379699858236e-05, "loss": 0.3703, "step": 15255 }, { "epoch": 0.33873097967836097, "grad_norm": 1.2436802387237549, "learning_rate": 1.4852331203612363e-05, "loss": 0.3798, "step": 15260 }, { "epoch": 0.3388419662378886, "grad_norm": 1.3613600730895996, "learning_rate": 1.4849282117436335e-05, "loss": 0.2952, "step": 15265 }, { "epoch": 0.3389529527974162, "grad_norm": 1.2618591785430908, "learning_rate": 1.4846232441700849e-05, "loss": 0.5093, "step": 15270 }, { "epoch": 0.3390639393569439, "grad_norm": 1.2628250122070312, "learning_rate": 1.4843182176776679e-05, "loss": 0.4677, "step": 15275 }, { "epoch": 0.3391749259164715, "grad_norm": 1.2692986726760864, "learning_rate": 1.4840131323034661e-05, "loss": 0.5347, "step": 15280 }, { "epoch": 0.33928591247599915, "grad_norm": 0.9842135906219482, "learning_rate": 1.4837079880845711e-05, "loss": 0.5112, "step": 15285 }, { "epoch": 0.3393968990355268, "grad_norm": 1.024045467376709, "learning_rate": 1.4834027850580809e-05, "loss": 0.4785, "step": 15290 }, { "epoch": 0.33950788559505446, "grad_norm": 1.2909910678863525, "learning_rate": 1.4830975232611013e-05, "loss": 0.4522, "step": 15295 }, { "epoch": 0.3396188721545821, "grad_norm": 1.6360613107681274, "learning_rate": 1.482792202730745e-05, "loss": 0.487, "step": 15300 }, { "epoch": 0.3397298587141097, "grad_norm": 0.9143996834754944, "learning_rate": 1.482486823504132e-05, "loss": 0.3799, "step": 15305 }, { "epoch": 0.33984084527363734, "grad_norm": 1.7492982149124146, "learning_rate": 1.4821813856183891e-05, "loss": 0.5018, "step": 15310 }, { "epoch": 0.339951831833165, "grad_norm": 1.0792031288146973, "learning_rate": 1.4818758891106504e-05, "loss": 0.3132, "step": 15315 }, { "epoch": 0.34006281839269265, "grad_norm": 1.0121617317199707, "learning_rate": 1.4815703340180572e-05, "loss": 0.3193, "step": 15320 }, { "epoch": 0.34017380495222027, "grad_norm": 1.20917546749115, "learning_rate": 1.4812647203777578e-05, "loss": 0.3868, "step": 15325 }, { "epoch": 0.34028479151174795, "grad_norm": 2.5683934688568115, "learning_rate": 1.4809590482269078e-05, "loss": 0.4244, "step": 15330 }, { "epoch": 0.3403957780712756, "grad_norm": 1.3045350313186646, "learning_rate": 1.4806533176026696e-05, "loss": 0.6908, "step": 15335 }, { "epoch": 0.3405067646308032, "grad_norm": 1.223612904548645, "learning_rate": 1.480347528542213e-05, "loss": 0.3019, "step": 15340 }, { "epoch": 0.34061775119033083, "grad_norm": 1.101333498954773, "learning_rate": 1.4800416810827151e-05, "loss": 0.6385, "step": 15345 }, { "epoch": 0.3407287377498585, "grad_norm": 1.2106317281723022, "learning_rate": 1.4797357752613594e-05, "loss": 0.3609, "step": 15350 }, { "epoch": 0.34083972430938614, "grad_norm": 0.8643994927406311, "learning_rate": 1.4794298111153374e-05, "loss": 0.4171, "step": 15355 }, { "epoch": 0.34095071086891376, "grad_norm": 1.0502965450286865, "learning_rate": 1.4791237886818464e-05, "loss": 0.3617, "step": 15360 }, { "epoch": 0.3410616974284414, "grad_norm": 0.955426812171936, "learning_rate": 1.4788177079980919e-05, "loss": 0.3525, "step": 15365 }, { "epoch": 0.34117268398796907, "grad_norm": 0.9375084638595581, "learning_rate": 1.4785115691012866e-05, "loss": 0.6322, "step": 15370 }, { "epoch": 0.3412836705474967, "grad_norm": 0.9736770391464233, "learning_rate": 1.4782053720286496e-05, "loss": 0.3913, "step": 15375 }, { "epoch": 0.3413946571070243, "grad_norm": 1.5599290132522583, "learning_rate": 1.477899116817407e-05, "loss": 0.4838, "step": 15380 }, { "epoch": 0.341505643666552, "grad_norm": 1.8174604177474976, "learning_rate": 1.4775928035047928e-05, "loss": 0.5936, "step": 15385 }, { "epoch": 0.34161663022607963, "grad_norm": 1.320636510848999, "learning_rate": 1.477286432128047e-05, "loss": 0.4545, "step": 15390 }, { "epoch": 0.34172761678560726, "grad_norm": 0.8202735781669617, "learning_rate": 1.4769800027244175e-05, "loss": 0.5133, "step": 15395 }, { "epoch": 0.3418386033451349, "grad_norm": 0.8367480039596558, "learning_rate": 1.476673515331159e-05, "loss": 0.4065, "step": 15400 }, { "epoch": 0.34194958990466257, "grad_norm": 1.1306264400482178, "learning_rate": 1.4763669699855334e-05, "loss": 0.4893, "step": 15405 }, { "epoch": 0.3420605764641902, "grad_norm": 1.102778434753418, "learning_rate": 1.4760603667248087e-05, "loss": 0.3346, "step": 15410 }, { "epoch": 0.3421715630237178, "grad_norm": 1.1968376636505127, "learning_rate": 1.4757537055862616e-05, "loss": 0.4742, "step": 15415 }, { "epoch": 0.34228254958324544, "grad_norm": 1.0032037496566772, "learning_rate": 1.4754469866071745e-05, "loss": 0.4358, "step": 15420 }, { "epoch": 0.3423935361427731, "grad_norm": 1.210485816001892, "learning_rate": 1.4751402098248373e-05, "loss": 0.4164, "step": 15425 }, { "epoch": 0.34250452270230075, "grad_norm": 1.4902434349060059, "learning_rate": 1.4748333752765467e-05, "loss": 0.4337, "step": 15430 }, { "epoch": 0.3426155092618284, "grad_norm": 1.163905382156372, "learning_rate": 1.4745264829996071e-05, "loss": 0.5235, "step": 15435 }, { "epoch": 0.34272649582135606, "grad_norm": 1.2259149551391602, "learning_rate": 1.4742195330313287e-05, "loss": 0.2444, "step": 15440 }, { "epoch": 0.3428374823808837, "grad_norm": 1.1223088502883911, "learning_rate": 1.4739125254090303e-05, "loss": 0.5696, "step": 15445 }, { "epoch": 0.3429484689404113, "grad_norm": 1.2788912057876587, "learning_rate": 1.4736054601700361e-05, "loss": 0.4696, "step": 15450 }, { "epoch": 0.34305945549993894, "grad_norm": 1.5745298862457275, "learning_rate": 1.4732983373516784e-05, "loss": 0.399, "step": 15455 }, { "epoch": 0.3431704420594666, "grad_norm": 0.7371529340744019, "learning_rate": 1.4729911569912965e-05, "loss": 0.5272, "step": 15460 }, { "epoch": 0.34328142861899424, "grad_norm": 0.6537275910377502, "learning_rate": 1.4726839191262358e-05, "loss": 0.4604, "step": 15465 }, { "epoch": 0.34339241517852187, "grad_norm": 1.454412579536438, "learning_rate": 1.4723766237938495e-05, "loss": 0.4575, "step": 15470 }, { "epoch": 0.3435034017380495, "grad_norm": 1.6370235681533813, "learning_rate": 1.4720692710314975e-05, "loss": 0.4841, "step": 15475 }, { "epoch": 0.3436143882975772, "grad_norm": 2.204667568206787, "learning_rate": 1.4717618608765465e-05, "loss": 0.6183, "step": 15480 }, { "epoch": 0.3437253748571048, "grad_norm": 1.4957976341247559, "learning_rate": 1.471454393366371e-05, "loss": 0.6387, "step": 15485 }, { "epoch": 0.34383636141663243, "grad_norm": 1.743932843208313, "learning_rate": 1.4711468685383515e-05, "loss": 0.4208, "step": 15490 }, { "epoch": 0.3439473479761601, "grad_norm": 1.1519712209701538, "learning_rate": 1.4708392864298754e-05, "loss": 0.5323, "step": 15495 }, { "epoch": 0.34405833453568774, "grad_norm": 1.2590768337249756, "learning_rate": 1.4705316470783384e-05, "loss": 0.2691, "step": 15500 }, { "epoch": 0.34416932109521536, "grad_norm": 1.0717517137527466, "learning_rate": 1.4702239505211414e-05, "loss": 0.5341, "step": 15505 }, { "epoch": 0.344280307654743, "grad_norm": 1.0902793407440186, "learning_rate": 1.4699161967956936e-05, "loss": 0.5581, "step": 15510 }, { "epoch": 0.34439129421427067, "grad_norm": 1.8656681776046753, "learning_rate": 1.4696083859394107e-05, "loss": 0.6056, "step": 15515 }, { "epoch": 0.3445022807737983, "grad_norm": 0.9933974742889404, "learning_rate": 1.4693005179897154e-05, "loss": 0.4728, "step": 15520 }, { "epoch": 0.3446132673333259, "grad_norm": 1.2458628416061401, "learning_rate": 1.4689925929840367e-05, "loss": 0.5591, "step": 15525 }, { "epoch": 0.34472425389285355, "grad_norm": 0.7363001704216003, "learning_rate": 1.4686846109598114e-05, "loss": 0.4925, "step": 15530 }, { "epoch": 0.34483524045238123, "grad_norm": 1.4438176155090332, "learning_rate": 1.4683765719544832e-05, "loss": 0.6418, "step": 15535 }, { "epoch": 0.34494622701190886, "grad_norm": 0.6764224767684937, "learning_rate": 1.468068476005502e-05, "loss": 0.4647, "step": 15540 }, { "epoch": 0.3450572135714365, "grad_norm": 1.0960233211517334, "learning_rate": 1.4677603231503254e-05, "loss": 0.443, "step": 15545 }, { "epoch": 0.34516820013096416, "grad_norm": 1.5263903141021729, "learning_rate": 1.4674521134264174e-05, "loss": 0.4602, "step": 15550 }, { "epoch": 0.3452791866904918, "grad_norm": 1.222199559211731, "learning_rate": 1.467143846871249e-05, "loss": 0.4396, "step": 15555 }, { "epoch": 0.3453901732500194, "grad_norm": 0.9828495979309082, "learning_rate": 1.4668355235222986e-05, "loss": 0.4504, "step": 15560 }, { "epoch": 0.34550115980954704, "grad_norm": 1.0530000925064087, "learning_rate": 1.4665271434170507e-05, "loss": 0.5981, "step": 15565 }, { "epoch": 0.3456121463690747, "grad_norm": 1.3413766622543335, "learning_rate": 1.4662187065929976e-05, "loss": 0.4722, "step": 15570 }, { "epoch": 0.34572313292860235, "grad_norm": 1.097169041633606, "learning_rate": 1.4659102130876374e-05, "loss": 0.5017, "step": 15575 }, { "epoch": 0.34583411948813, "grad_norm": 1.3739992380142212, "learning_rate": 1.4656016629384762e-05, "loss": 0.4514, "step": 15580 }, { "epoch": 0.3459451060476576, "grad_norm": 1.134070634841919, "learning_rate": 1.4652930561830263e-05, "loss": 0.4381, "step": 15585 }, { "epoch": 0.3460560926071853, "grad_norm": 1.2391133308410645, "learning_rate": 1.464984392858807e-05, "loss": 0.4815, "step": 15590 }, { "epoch": 0.3461670791667129, "grad_norm": 1.32672917842865, "learning_rate": 1.464675673003345e-05, "loss": 0.4924, "step": 15595 }, { "epoch": 0.34627806572624054, "grad_norm": 1.2489418983459473, "learning_rate": 1.4643668966541726e-05, "loss": 0.5202, "step": 15600 }, { "epoch": 0.3463890522857682, "grad_norm": 1.1635617017745972, "learning_rate": 1.4640580638488306e-05, "loss": 0.4877, "step": 15605 }, { "epoch": 0.34650003884529584, "grad_norm": 1.0924367904663086, "learning_rate": 1.4637491746248653e-05, "loss": 0.3345, "step": 15610 }, { "epoch": 0.34661102540482347, "grad_norm": 1.1278338432312012, "learning_rate": 1.4634402290198306e-05, "loss": 0.4457, "step": 15615 }, { "epoch": 0.3467220119643511, "grad_norm": 0.7023510336875916, "learning_rate": 1.4631312270712875e-05, "loss": 0.5518, "step": 15620 }, { "epoch": 0.3468329985238788, "grad_norm": 1.3885475397109985, "learning_rate": 1.462822168816803e-05, "loss": 0.3658, "step": 15625 }, { "epoch": 0.3469439850834064, "grad_norm": 1.0028605461120605, "learning_rate": 1.4625130542939512e-05, "loss": 0.3945, "step": 15630 }, { "epoch": 0.34705497164293403, "grad_norm": 1.6290814876556396, "learning_rate": 1.4622038835403135e-05, "loss": 0.6108, "step": 15635 }, { "epoch": 0.34716595820246166, "grad_norm": 1.0613174438476562, "learning_rate": 1.4618946565934775e-05, "loss": 0.4755, "step": 15640 }, { "epoch": 0.34727694476198934, "grad_norm": 1.064847469329834, "learning_rate": 1.4615853734910386e-05, "loss": 0.3752, "step": 15645 }, { "epoch": 0.34738793132151696, "grad_norm": 1.3954349756240845, "learning_rate": 1.4612760342705979e-05, "loss": 0.5212, "step": 15650 }, { "epoch": 0.3474989178810446, "grad_norm": 1.5847194194793701, "learning_rate": 1.4609666389697638e-05, "loss": 0.4606, "step": 15655 }, { "epoch": 0.34760990444057227, "grad_norm": 1.386644721031189, "learning_rate": 1.4606571876261517e-05, "loss": 0.4912, "step": 15660 }, { "epoch": 0.3477208910000999, "grad_norm": 1.5342525243759155, "learning_rate": 1.4603476802773839e-05, "loss": 0.4848, "step": 15665 }, { "epoch": 0.3478318775596275, "grad_norm": 1.0431338548660278, "learning_rate": 1.4600381169610888e-05, "loss": 0.4559, "step": 15670 }, { "epoch": 0.34794286411915515, "grad_norm": 1.5182932615280151, "learning_rate": 1.4597284977149022e-05, "loss": 0.5943, "step": 15675 }, { "epoch": 0.34805385067868283, "grad_norm": 1.3352900743484497, "learning_rate": 1.4594188225764667e-05, "loss": 0.5782, "step": 15680 }, { "epoch": 0.34816483723821046, "grad_norm": 1.3885362148284912, "learning_rate": 1.4591090915834319e-05, "loss": 0.3971, "step": 15685 }, { "epoch": 0.3482758237977381, "grad_norm": 1.206699252128601, "learning_rate": 1.458799304773453e-05, "loss": 0.4441, "step": 15690 }, { "epoch": 0.3483868103572657, "grad_norm": 2.033198833465576, "learning_rate": 1.4584894621841937e-05, "loss": 0.3343, "step": 15695 }, { "epoch": 0.3484977969167934, "grad_norm": 1.1778301000595093, "learning_rate": 1.4581795638533227e-05, "loss": 0.5982, "step": 15700 }, { "epoch": 0.348608783476321, "grad_norm": 1.5190293788909912, "learning_rate": 1.4578696098185175e-05, "loss": 0.6049, "step": 15705 }, { "epoch": 0.34871977003584864, "grad_norm": 1.171130657196045, "learning_rate": 1.4575596001174605e-05, "loss": 0.5663, "step": 15710 }, { "epoch": 0.3488307565953763, "grad_norm": 2.257117509841919, "learning_rate": 1.457249534787842e-05, "loss": 0.5115, "step": 15715 }, { "epoch": 0.34894174315490395, "grad_norm": 1.0101255178451538, "learning_rate": 1.4569394138673583e-05, "loss": 0.388, "step": 15720 }, { "epoch": 0.3490527297144316, "grad_norm": 2.2193045616149902, "learning_rate": 1.4566292373937133e-05, "loss": 0.5509, "step": 15725 }, { "epoch": 0.3491637162739592, "grad_norm": 0.6599202752113342, "learning_rate": 1.4563190054046168e-05, "loss": 0.6527, "step": 15730 }, { "epoch": 0.3492747028334869, "grad_norm": 1.3797396421432495, "learning_rate": 1.4560087179377862e-05, "loss": 0.7525, "step": 15735 }, { "epoch": 0.3493856893930145, "grad_norm": 1.2089630365371704, "learning_rate": 1.455698375030945e-05, "loss": 0.4175, "step": 15740 }, { "epoch": 0.34949667595254214, "grad_norm": 1.3355190753936768, "learning_rate": 1.4553879767218238e-05, "loss": 0.4175, "step": 15745 }, { "epoch": 0.34960766251206976, "grad_norm": 0.9695683121681213, "learning_rate": 1.4550775230481593e-05, "loss": 0.5127, "step": 15750 }, { "epoch": 0.34971864907159744, "grad_norm": 1.308209776878357, "learning_rate": 1.454767014047696e-05, "loss": 0.4467, "step": 15755 }, { "epoch": 0.34982963563112507, "grad_norm": 1.0755424499511719, "learning_rate": 1.454456449758184e-05, "loss": 0.3588, "step": 15760 }, { "epoch": 0.3499406221906527, "grad_norm": 1.162462830543518, "learning_rate": 1.4541458302173815e-05, "loss": 0.4285, "step": 15765 }, { "epoch": 0.3500516087501804, "grad_norm": 1.1130118370056152, "learning_rate": 1.4538351554630517e-05, "loss": 0.3654, "step": 15770 }, { "epoch": 0.350162595309708, "grad_norm": 1.501671314239502, "learning_rate": 1.4535244255329657e-05, "loss": 0.4821, "step": 15775 }, { "epoch": 0.35027358186923563, "grad_norm": 1.2213767766952515, "learning_rate": 1.4532136404649012e-05, "loss": 0.4606, "step": 15780 }, { "epoch": 0.35038456842876325, "grad_norm": 1.1781730651855469, "learning_rate": 1.4529028002966424e-05, "loss": 0.4491, "step": 15785 }, { "epoch": 0.35049555498829094, "grad_norm": 1.2890111207962036, "learning_rate": 1.4525919050659798e-05, "loss": 0.4341, "step": 15790 }, { "epoch": 0.35060654154781856, "grad_norm": 1.1150555610656738, "learning_rate": 1.4522809548107113e-05, "loss": 0.4862, "step": 15795 }, { "epoch": 0.3507175281073462, "grad_norm": 1.4423096179962158, "learning_rate": 1.4519699495686413e-05, "loss": 0.5472, "step": 15800 }, { "epoch": 0.3508285146668738, "grad_norm": 0.8447060585021973, "learning_rate": 1.4516588893775805e-05, "loss": 0.4617, "step": 15805 }, { "epoch": 0.3509395012264015, "grad_norm": 1.3163111209869385, "learning_rate": 1.4513477742753465e-05, "loss": 0.4202, "step": 15810 }, { "epoch": 0.3510504877859291, "grad_norm": 1.0188243389129639, "learning_rate": 1.4510366042997638e-05, "loss": 0.5308, "step": 15815 }, { "epoch": 0.35116147434545675, "grad_norm": 0.8433900475502014, "learning_rate": 1.4507253794886638e-05, "loss": 0.436, "step": 15820 }, { "epoch": 0.35127246090498443, "grad_norm": 1.1491411924362183, "learning_rate": 1.4504140998798834e-05, "loss": 0.5348, "step": 15825 }, { "epoch": 0.35138344746451206, "grad_norm": 1.0281944274902344, "learning_rate": 1.4501027655112675e-05, "loss": 0.3657, "step": 15830 }, { "epoch": 0.3514944340240397, "grad_norm": 1.3137558698654175, "learning_rate": 1.449791376420667e-05, "loss": 0.5407, "step": 15835 }, { "epoch": 0.3516054205835673, "grad_norm": 1.239391803741455, "learning_rate": 1.4494799326459393e-05, "loss": 0.6412, "step": 15840 }, { "epoch": 0.351716407143095, "grad_norm": 1.202092170715332, "learning_rate": 1.4491684342249485e-05, "loss": 0.4441, "step": 15845 }, { "epoch": 0.3518273937026226, "grad_norm": 1.3718825578689575, "learning_rate": 1.4488568811955663e-05, "loss": 0.4353, "step": 15850 }, { "epoch": 0.35193838026215024, "grad_norm": 1.1037505865097046, "learning_rate": 1.4485452735956698e-05, "loss": 0.4914, "step": 15855 }, { "epoch": 0.35204936682167787, "grad_norm": 1.0239901542663574, "learning_rate": 1.4482336114631432e-05, "loss": 0.315, "step": 15860 }, { "epoch": 0.35216035338120555, "grad_norm": 0.8123399019241333, "learning_rate": 1.4479218948358773e-05, "loss": 0.4516, "step": 15865 }, { "epoch": 0.3522713399407332, "grad_norm": 0.8140292167663574, "learning_rate": 1.4476101237517694e-05, "loss": 0.3005, "step": 15870 }, { "epoch": 0.3523823265002608, "grad_norm": 1.1909701824188232, "learning_rate": 1.4472982982487242e-05, "loss": 0.2595, "step": 15875 }, { "epoch": 0.3524933130597885, "grad_norm": 1.754469394683838, "learning_rate": 1.4469864183646521e-05, "loss": 0.4105, "step": 15880 }, { "epoch": 0.3526042996193161, "grad_norm": 0.990523099899292, "learning_rate": 1.44667448413747e-05, "loss": 0.4436, "step": 15885 }, { "epoch": 0.35271528617884373, "grad_norm": 1.1670485734939575, "learning_rate": 1.4463624956051023e-05, "loss": 0.6312, "step": 15890 }, { "epoch": 0.35282627273837136, "grad_norm": 1.3339301347732544, "learning_rate": 1.4460504528054796e-05, "loss": 0.3657, "step": 15895 }, { "epoch": 0.35293725929789904, "grad_norm": 1.3710170984268188, "learning_rate": 1.4457383557765385e-05, "loss": 0.5359, "step": 15900 }, { "epoch": 0.35304824585742667, "grad_norm": 1.1399394273757935, "learning_rate": 1.445426204556223e-05, "loss": 0.6212, "step": 15905 }, { "epoch": 0.3531592324169543, "grad_norm": 1.5016330480575562, "learning_rate": 1.4451139991824833e-05, "loss": 0.5507, "step": 15910 }, { "epoch": 0.353270218976482, "grad_norm": 0.6789126396179199, "learning_rate": 1.4448017396932767e-05, "loss": 0.4242, "step": 15915 }, { "epoch": 0.3533812055360096, "grad_norm": 1.0379139184951782, "learning_rate": 1.444489426126566e-05, "loss": 0.4029, "step": 15920 }, { "epoch": 0.3534921920955372, "grad_norm": 1.112060546875, "learning_rate": 1.444177058520322e-05, "loss": 0.4358, "step": 15925 }, { "epoch": 0.35360317865506485, "grad_norm": 1.1852864027023315, "learning_rate": 1.4438646369125205e-05, "loss": 0.4653, "step": 15930 }, { "epoch": 0.35371416521459254, "grad_norm": 1.0151760578155518, "learning_rate": 1.443552161341145e-05, "loss": 0.5065, "step": 15935 }, { "epoch": 0.35382515177412016, "grad_norm": 0.7877997756004333, "learning_rate": 1.4432396318441855e-05, "loss": 0.512, "step": 15940 }, { "epoch": 0.3539361383336478, "grad_norm": 1.031214952468872, "learning_rate": 1.442927048459638e-05, "loss": 0.3855, "step": 15945 }, { "epoch": 0.3540471248931754, "grad_norm": 1.2018985748291016, "learning_rate": 1.4426144112255057e-05, "loss": 0.5145, "step": 15950 }, { "epoch": 0.3541581114527031, "grad_norm": 1.386949062347412, "learning_rate": 1.442301720179797e-05, "loss": 0.512, "step": 15955 }, { "epoch": 0.3542690980122307, "grad_norm": 1.0878076553344727, "learning_rate": 1.441988975360529e-05, "loss": 0.5614, "step": 15960 }, { "epoch": 0.35438008457175835, "grad_norm": 1.7431411743164062, "learning_rate": 1.4416761768057237e-05, "loss": 0.3427, "step": 15965 }, { "epoch": 0.35449107113128603, "grad_norm": 1.3923200368881226, "learning_rate": 1.44136332455341e-05, "loss": 0.6949, "step": 15970 }, { "epoch": 0.35460205769081365, "grad_norm": 1.394872784614563, "learning_rate": 1.4410504186416237e-05, "loss": 0.3061, "step": 15975 }, { "epoch": 0.3547130442503413, "grad_norm": 1.056589126586914, "learning_rate": 1.4407374591084064e-05, "loss": 0.3958, "step": 15980 }, { "epoch": 0.3548240308098689, "grad_norm": 1.1440049409866333, "learning_rate": 1.4404244459918069e-05, "loss": 0.4387, "step": 15985 }, { "epoch": 0.3549350173693966, "grad_norm": 0.9853442907333374, "learning_rate": 1.4401113793298804e-05, "loss": 0.4085, "step": 15990 }, { "epoch": 0.3550460039289242, "grad_norm": 1.08008873462677, "learning_rate": 1.4397982591606887e-05, "loss": 0.4988, "step": 15995 }, { "epoch": 0.35515699048845184, "grad_norm": 1.3674938678741455, "learning_rate": 1.4394850855222995e-05, "loss": 0.338, "step": 16000 }, { "epoch": 0.35526797704797947, "grad_norm": 1.0351570844650269, "learning_rate": 1.4391718584527871e-05, "loss": 0.4379, "step": 16005 }, { "epoch": 0.35537896360750715, "grad_norm": 1.1538983583450317, "learning_rate": 1.4388585779902336e-05, "loss": 0.5366, "step": 16010 }, { "epoch": 0.3554899501670348, "grad_norm": 1.7466696500778198, "learning_rate": 1.4385452441727257e-05, "loss": 0.595, "step": 16015 }, { "epoch": 0.3556009367265624, "grad_norm": 1.2843554019927979, "learning_rate": 1.4382318570383578e-05, "loss": 0.4549, "step": 16020 }, { "epoch": 0.3557119232860901, "grad_norm": 1.1115984916687012, "learning_rate": 1.4379184166252304e-05, "loss": 0.5201, "step": 16025 }, { "epoch": 0.3558229098456177, "grad_norm": 1.5879791975021362, "learning_rate": 1.4376049229714509e-05, "loss": 0.3875, "step": 16030 }, { "epoch": 0.35593389640514533, "grad_norm": 1.3459078073501587, "learning_rate": 1.4372913761151321e-05, "loss": 0.7422, "step": 16035 }, { "epoch": 0.35604488296467296, "grad_norm": 2.2278897762298584, "learning_rate": 1.4369777760943944e-05, "loss": 0.3918, "step": 16040 }, { "epoch": 0.35615586952420064, "grad_norm": 1.5131078958511353, "learning_rate": 1.4366641229473644e-05, "loss": 0.3198, "step": 16045 }, { "epoch": 0.35626685608372827, "grad_norm": 1.5506442785263062, "learning_rate": 1.4363504167121747e-05, "loss": 0.4538, "step": 16050 }, { "epoch": 0.3563778426432559, "grad_norm": 1.6898088455200195, "learning_rate": 1.4360366574269647e-05, "loss": 0.564, "step": 16055 }, { "epoch": 0.3564888292027835, "grad_norm": 1.203200340270996, "learning_rate": 1.4357228451298802e-05, "loss": 0.4952, "step": 16060 }, { "epoch": 0.3565998157623112, "grad_norm": 1.6116600036621094, "learning_rate": 1.4354089798590735e-05, "loss": 0.5373, "step": 16065 }, { "epoch": 0.3567108023218388, "grad_norm": 0.919315755367279, "learning_rate": 1.4350950616527032e-05, "loss": 0.427, "step": 16070 }, { "epoch": 0.35682178888136645, "grad_norm": 1.246697187423706, "learning_rate": 1.4347810905489348e-05, "loss": 0.4758, "step": 16075 }, { "epoch": 0.35693277544089413, "grad_norm": 0.885575532913208, "learning_rate": 1.4344670665859393e-05, "loss": 0.4152, "step": 16080 }, { "epoch": 0.35704376200042176, "grad_norm": 1.086563229560852, "learning_rate": 1.434152989801895e-05, "loss": 0.506, "step": 16085 }, { "epoch": 0.3571547485599494, "grad_norm": 1.346968412399292, "learning_rate": 1.4338388602349862e-05, "loss": 0.5173, "step": 16090 }, { "epoch": 0.357265735119477, "grad_norm": 1.4068158864974976, "learning_rate": 1.433524677923404e-05, "loss": 0.481, "step": 16095 }, { "epoch": 0.3573767216790047, "grad_norm": 1.0199639797210693, "learning_rate": 1.4332104429053449e-05, "loss": 0.4408, "step": 16100 }, { "epoch": 0.3574877082385323, "grad_norm": 1.341516375541687, "learning_rate": 1.4328961552190132e-05, "loss": 0.3486, "step": 16105 }, { "epoch": 0.35759869479805995, "grad_norm": 0.6724761724472046, "learning_rate": 1.432581814902619e-05, "loss": 0.2141, "step": 16110 }, { "epoch": 0.35770968135758757, "grad_norm": 1.1561756134033203, "learning_rate": 1.4322674219943783e-05, "loss": 0.4822, "step": 16115 }, { "epoch": 0.35782066791711525, "grad_norm": 1.3430947065353394, "learning_rate": 1.4319529765325144e-05, "loss": 0.4901, "step": 16120 }, { "epoch": 0.3579316544766429, "grad_norm": 0.9767167568206787, "learning_rate": 1.431638478555256e-05, "loss": 0.5472, "step": 16125 }, { "epoch": 0.3580426410361705, "grad_norm": 1.5211210250854492, "learning_rate": 1.4313239281008388e-05, "loss": 0.6586, "step": 16130 }, { "epoch": 0.3581536275956982, "grad_norm": 1.7379239797592163, "learning_rate": 1.4310093252075055e-05, "loss": 0.4366, "step": 16135 }, { "epoch": 0.3582646141552258, "grad_norm": 0.8871414661407471, "learning_rate": 1.4306946699135038e-05, "loss": 0.4634, "step": 16140 }, { "epoch": 0.35837560071475344, "grad_norm": 1.0361415147781372, "learning_rate": 1.4303799622570884e-05, "loss": 0.4303, "step": 16145 }, { "epoch": 0.35848658727428107, "grad_norm": 1.4143927097320557, "learning_rate": 1.4300652022765207e-05, "loss": 0.5521, "step": 16150 }, { "epoch": 0.35859757383380875, "grad_norm": 1.8394910097122192, "learning_rate": 1.4297503900100678e-05, "loss": 0.5932, "step": 16155 }, { "epoch": 0.3587085603933364, "grad_norm": 1.1367939710617065, "learning_rate": 1.429435525496004e-05, "loss": 0.4967, "step": 16160 }, { "epoch": 0.358819546952864, "grad_norm": 0.9071010947227478, "learning_rate": 1.429120608772609e-05, "loss": 0.5516, "step": 16165 }, { "epoch": 0.3589305335123916, "grad_norm": 1.2656437158584595, "learning_rate": 1.4288056398781698e-05, "loss": 0.632, "step": 16170 }, { "epoch": 0.3590415200719193, "grad_norm": 0.85821133852005, "learning_rate": 1.428490618850979e-05, "loss": 0.4313, "step": 16175 }, { "epoch": 0.35915250663144693, "grad_norm": 0.7876311540603638, "learning_rate": 1.4281755457293359e-05, "loss": 0.3428, "step": 16180 }, { "epoch": 0.35926349319097456, "grad_norm": 1.28394615650177, "learning_rate": 1.4278604205515453e-05, "loss": 0.5349, "step": 16185 }, { "epoch": 0.35937447975050224, "grad_norm": 1.3520690202713013, "learning_rate": 1.4275452433559202e-05, "loss": 0.454, "step": 16190 }, { "epoch": 0.35948546631002987, "grad_norm": 1.587080717086792, "learning_rate": 1.4272300141807782e-05, "loss": 0.469, "step": 16195 }, { "epoch": 0.3595964528695575, "grad_norm": 1.1184484958648682, "learning_rate": 1.426914733064444e-05, "loss": 0.3818, "step": 16200 }, { "epoch": 0.3597074394290851, "grad_norm": 1.2177501916885376, "learning_rate": 1.4265994000452484e-05, "loss": 0.5151, "step": 16205 }, { "epoch": 0.3598184259886128, "grad_norm": 1.274096965789795, "learning_rate": 1.426284015161528e-05, "loss": 0.5162, "step": 16210 }, { "epoch": 0.3599294125481404, "grad_norm": 1.4250680208206177, "learning_rate": 1.425968578451627e-05, "loss": 0.4129, "step": 16215 }, { "epoch": 0.36004039910766805, "grad_norm": 1.0498861074447632, "learning_rate": 1.4256530899538948e-05, "loss": 0.5664, "step": 16220 }, { "epoch": 0.3601513856671957, "grad_norm": 1.3277438879013062, "learning_rate": 1.4253375497066875e-05, "loss": 0.43, "step": 16225 }, { "epoch": 0.36026237222672336, "grad_norm": 1.4574748277664185, "learning_rate": 1.4250219577483673e-05, "loss": 0.5163, "step": 16230 }, { "epoch": 0.360373358786251, "grad_norm": 1.4078586101531982, "learning_rate": 1.424706314117303e-05, "loss": 0.422, "step": 16235 }, { "epoch": 0.3604843453457786, "grad_norm": 1.536993384361267, "learning_rate": 1.4243906188518691e-05, "loss": 0.4067, "step": 16240 }, { "epoch": 0.3605953319053063, "grad_norm": 1.1171602010726929, "learning_rate": 1.4240748719904471e-05, "loss": 0.5737, "step": 16245 }, { "epoch": 0.3607063184648339, "grad_norm": 1.253387689590454, "learning_rate": 1.4237590735714246e-05, "loss": 0.3864, "step": 16250 }, { "epoch": 0.36081730502436155, "grad_norm": 1.6348520517349243, "learning_rate": 1.4234432236331951e-05, "loss": 0.4945, "step": 16255 }, { "epoch": 0.36092829158388917, "grad_norm": 0.8581211566925049, "learning_rate": 1.4231273222141587e-05, "loss": 0.5606, "step": 16260 }, { "epoch": 0.36103927814341685, "grad_norm": 1.2018139362335205, "learning_rate": 1.4228113693527212e-05, "loss": 0.3867, "step": 16265 }, { "epoch": 0.3611502647029445, "grad_norm": 0.9835402369499207, "learning_rate": 1.4224953650872958e-05, "loss": 0.5186, "step": 16270 }, { "epoch": 0.3612612512624721, "grad_norm": 1.4427926540374756, "learning_rate": 1.4221793094563006e-05, "loss": 0.5518, "step": 16275 }, { "epoch": 0.36137223782199973, "grad_norm": 0.8743858933448792, "learning_rate": 1.421863202498161e-05, "loss": 0.4663, "step": 16280 }, { "epoch": 0.3614832243815274, "grad_norm": 2.119997978210449, "learning_rate": 1.4215470442513077e-05, "loss": 0.3596, "step": 16285 }, { "epoch": 0.36159421094105504, "grad_norm": 1.464104413986206, "learning_rate": 1.4212308347541787e-05, "loss": 0.4975, "step": 16290 }, { "epoch": 0.36170519750058266, "grad_norm": 1.2174603939056396, "learning_rate": 1.4209145740452175e-05, "loss": 0.4806, "step": 16295 }, { "epoch": 0.36181618406011035, "grad_norm": 1.1920909881591797, "learning_rate": 1.4205982621628742e-05, "loss": 0.4288, "step": 16300 }, { "epoch": 0.361927170619638, "grad_norm": 1.8685282468795776, "learning_rate": 1.4202818991456047e-05, "loss": 0.381, "step": 16305 }, { "epoch": 0.3620381571791656, "grad_norm": 1.1709849834442139, "learning_rate": 1.4199654850318713e-05, "loss": 0.4441, "step": 16310 }, { "epoch": 0.3621491437386932, "grad_norm": 1.5978822708129883, "learning_rate": 1.4196490198601426e-05, "loss": 0.4736, "step": 16315 }, { "epoch": 0.3622601302982209, "grad_norm": 1.6112041473388672, "learning_rate": 1.419332503668894e-05, "loss": 0.4517, "step": 16320 }, { "epoch": 0.36237111685774853, "grad_norm": 1.0208383798599243, "learning_rate": 1.4190159364966053e-05, "loss": 0.4732, "step": 16325 }, { "epoch": 0.36248210341727616, "grad_norm": 0.9899531006813049, "learning_rate": 1.4186993183817643e-05, "loss": 0.5406, "step": 16330 }, { "epoch": 0.3625930899768038, "grad_norm": 1.1739122867584229, "learning_rate": 1.4183826493628647e-05, "loss": 0.5429, "step": 16335 }, { "epoch": 0.36270407653633147, "grad_norm": 1.1442511081695557, "learning_rate": 1.4180659294784058e-05, "loss": 0.4618, "step": 16340 }, { "epoch": 0.3628150630958591, "grad_norm": 1.8962513208389282, "learning_rate": 1.4177491587668933e-05, "loss": 0.4247, "step": 16345 }, { "epoch": 0.3629260496553867, "grad_norm": 1.5891103744506836, "learning_rate": 1.4174323372668387e-05, "loss": 0.5651, "step": 16350 }, { "epoch": 0.3630370362149144, "grad_norm": 1.5759340524673462, "learning_rate": 1.4171154650167606e-05, "loss": 0.4986, "step": 16355 }, { "epoch": 0.363148022774442, "grad_norm": 1.0296753644943237, "learning_rate": 1.4167985420551836e-05, "loss": 0.4137, "step": 16360 }, { "epoch": 0.36325900933396965, "grad_norm": 1.6463236808776855, "learning_rate": 1.4164815684206372e-05, "loss": 0.4071, "step": 16365 }, { "epoch": 0.3633699958934973, "grad_norm": 0.9370015263557434, "learning_rate": 1.4161645441516588e-05, "loss": 0.2914, "step": 16370 }, { "epoch": 0.36348098245302496, "grad_norm": 1.150620460510254, "learning_rate": 1.4158474692867907e-05, "loss": 0.4136, "step": 16375 }, { "epoch": 0.3635919690125526, "grad_norm": 1.129516363143921, "learning_rate": 1.4155303438645818e-05, "loss": 0.4786, "step": 16380 }, { "epoch": 0.3637029555720802, "grad_norm": 1.2091377973556519, "learning_rate": 1.4152131679235872e-05, "loss": 0.5573, "step": 16385 }, { "epoch": 0.36381394213160784, "grad_norm": 1.0632437467575073, "learning_rate": 1.4148959415023687e-05, "loss": 0.3591, "step": 16390 }, { "epoch": 0.3639249286911355, "grad_norm": 0.9506019949913025, "learning_rate": 1.4145786646394926e-05, "loss": 0.4244, "step": 16395 }, { "epoch": 0.36403591525066314, "grad_norm": 1.217382550239563, "learning_rate": 1.414261337373533e-05, "loss": 0.3737, "step": 16400 }, { "epoch": 0.36414690181019077, "grad_norm": 1.446199655532837, "learning_rate": 1.4139439597430693e-05, "loss": 0.3826, "step": 16405 }, { "epoch": 0.36425788836971845, "grad_norm": 1.6045228242874146, "learning_rate": 1.4136265317866874e-05, "loss": 0.6589, "step": 16410 }, { "epoch": 0.3643688749292461, "grad_norm": 1.313880443572998, "learning_rate": 1.4133090535429788e-05, "loss": 0.5849, "step": 16415 }, { "epoch": 0.3644798614887737, "grad_norm": 1.2983330488204956, "learning_rate": 1.4129915250505418e-05, "loss": 0.709, "step": 16420 }, { "epoch": 0.36459084804830133, "grad_norm": 4.271213054656982, "learning_rate": 1.41267394634798e-05, "loss": 0.4601, "step": 16425 }, { "epoch": 0.364701834607829, "grad_norm": 0.890982985496521, "learning_rate": 1.4123563174739036e-05, "loss": 0.5058, "step": 16430 }, { "epoch": 0.36481282116735664, "grad_norm": 0.7690356969833374, "learning_rate": 1.4120386384669294e-05, "loss": 0.4065, "step": 16435 }, { "epoch": 0.36492380772688426, "grad_norm": 0.7626538276672363, "learning_rate": 1.4117209093656791e-05, "loss": 0.4242, "step": 16440 }, { "epoch": 0.3650347942864119, "grad_norm": 1.2675507068634033, "learning_rate": 1.4114031302087818e-05, "loss": 0.4668, "step": 16445 }, { "epoch": 0.36514578084593957, "grad_norm": 1.6725234985351562, "learning_rate": 1.4110853010348717e-05, "loss": 0.519, "step": 16450 }, { "epoch": 0.3652567674054672, "grad_norm": 1.8293321132659912, "learning_rate": 1.4107674218825893e-05, "loss": 0.5037, "step": 16455 }, { "epoch": 0.3653677539649948, "grad_norm": 1.29794180393219, "learning_rate": 1.4104494927905812e-05, "loss": 0.4575, "step": 16460 }, { "epoch": 0.3654787405245225, "grad_norm": 1.054591417312622, "learning_rate": 1.4101315137975002e-05, "loss": 0.5259, "step": 16465 }, { "epoch": 0.36558972708405013, "grad_norm": 1.571337103843689, "learning_rate": 1.4098134849420055e-05, "loss": 0.4236, "step": 16470 }, { "epoch": 0.36570071364357776, "grad_norm": 0.854489803314209, "learning_rate": 1.4094954062627614e-05, "loss": 0.384, "step": 16475 }, { "epoch": 0.3658117002031054, "grad_norm": 1.1105810403823853, "learning_rate": 1.4091772777984396e-05, "loss": 0.4972, "step": 16480 }, { "epoch": 0.36592268676263306, "grad_norm": 1.42030930519104, "learning_rate": 1.4088590995877165e-05, "loss": 0.4479, "step": 16485 }, { "epoch": 0.3660336733221607, "grad_norm": 1.3728995323181152, "learning_rate": 1.408540871669275e-05, "loss": 0.5138, "step": 16490 }, { "epoch": 0.3661446598816883, "grad_norm": 0.9788410067558289, "learning_rate": 1.4082225940818047e-05, "loss": 0.4445, "step": 16495 }, { "epoch": 0.36625564644121594, "grad_norm": 1.1084668636322021, "learning_rate": 1.4079042668640002e-05, "loss": 0.4348, "step": 16500 }, { "epoch": 0.3663666330007436, "grad_norm": 1.8628714084625244, "learning_rate": 1.4075858900545631e-05, "loss": 0.4036, "step": 16505 }, { "epoch": 0.36647761956027125, "grad_norm": 1.1906403303146362, "learning_rate": 1.4072674636922009e-05, "loss": 0.376, "step": 16510 }, { "epoch": 0.3665886061197989, "grad_norm": 1.1637814044952393, "learning_rate": 1.4069489878156258e-05, "loss": 0.4762, "step": 16515 }, { "epoch": 0.36669959267932656, "grad_norm": 0.9485587477684021, "learning_rate": 1.4066304624635576e-05, "loss": 0.4694, "step": 16520 }, { "epoch": 0.3668105792388542, "grad_norm": 1.3874183893203735, "learning_rate": 1.4063118876747217e-05, "loss": 0.5263, "step": 16525 }, { "epoch": 0.3669215657983818, "grad_norm": 1.4688796997070312, "learning_rate": 1.405993263487849e-05, "loss": 0.4912, "step": 16530 }, { "epoch": 0.36703255235790944, "grad_norm": 1.0320883989334106, "learning_rate": 1.405674589941677e-05, "loss": 0.4507, "step": 16535 }, { "epoch": 0.3671435389174371, "grad_norm": 1.0644927024841309, "learning_rate": 1.405355867074949e-05, "loss": 0.5375, "step": 16540 }, { "epoch": 0.36725452547696474, "grad_norm": 1.249047875404358, "learning_rate": 1.405037094926414e-05, "loss": 0.297, "step": 16545 }, { "epoch": 0.36736551203649237, "grad_norm": 1.9036270380020142, "learning_rate": 1.4047182735348273e-05, "loss": 0.5561, "step": 16550 }, { "epoch": 0.36747649859602, "grad_norm": 2.7875096797943115, "learning_rate": 1.40439940293895e-05, "loss": 0.5732, "step": 16555 }, { "epoch": 0.3675874851555477, "grad_norm": 1.2983033657073975, "learning_rate": 1.40408048317755e-05, "loss": 0.3889, "step": 16560 }, { "epoch": 0.3676984717150753, "grad_norm": 1.0702457427978516, "learning_rate": 1.4037615142894e-05, "loss": 0.5595, "step": 16565 }, { "epoch": 0.36780945827460293, "grad_norm": 1.1802325248718262, "learning_rate": 1.4034424963132792e-05, "loss": 0.3399, "step": 16570 }, { "epoch": 0.3679204448341306, "grad_norm": 1.1893055438995361, "learning_rate": 1.4031234292879726e-05, "loss": 0.379, "step": 16575 }, { "epoch": 0.36803143139365824, "grad_norm": 1.0848939418792725, "learning_rate": 1.402804313252271e-05, "loss": 0.4896, "step": 16580 }, { "epoch": 0.36814241795318586, "grad_norm": 1.2174549102783203, "learning_rate": 1.4024851482449726e-05, "loss": 0.5846, "step": 16585 }, { "epoch": 0.3682534045127135, "grad_norm": 1.244012713432312, "learning_rate": 1.4021659343048795e-05, "loss": 0.4235, "step": 16590 }, { "epoch": 0.36836439107224117, "grad_norm": 0.7126348614692688, "learning_rate": 1.4018466714708008e-05, "loss": 0.4914, "step": 16595 }, { "epoch": 0.3684753776317688, "grad_norm": 1.1667653322219849, "learning_rate": 1.4015273597815516e-05, "loss": 0.4292, "step": 16600 }, { "epoch": 0.3685863641912964, "grad_norm": 0.7720460295677185, "learning_rate": 1.4012079992759521e-05, "loss": 0.415, "step": 16605 }, { "epoch": 0.36869735075082405, "grad_norm": 1.1298047304153442, "learning_rate": 1.4008885899928301e-05, "loss": 0.4099, "step": 16610 }, { "epoch": 0.36880833731035173, "grad_norm": 1.6862759590148926, "learning_rate": 1.4005691319710178e-05, "loss": 0.471, "step": 16615 }, { "epoch": 0.36891932386987936, "grad_norm": 0.9013825058937073, "learning_rate": 1.4002496252493538e-05, "loss": 0.4378, "step": 16620 }, { "epoch": 0.369030310429407, "grad_norm": 1.32625412940979, "learning_rate": 1.3999300698666827e-05, "loss": 0.447, "step": 16625 }, { "epoch": 0.36914129698893466, "grad_norm": 1.1559810638427734, "learning_rate": 1.399610465861855e-05, "loss": 0.3937, "step": 16630 }, { "epoch": 0.3692522835484623, "grad_norm": 1.547504186630249, "learning_rate": 1.3992908132737269e-05, "loss": 0.3431, "step": 16635 }, { "epoch": 0.3693632701079899, "grad_norm": 0.8603490591049194, "learning_rate": 1.3989711121411608e-05, "loss": 0.3781, "step": 16640 }, { "epoch": 0.36947425666751754, "grad_norm": 1.6657804250717163, "learning_rate": 1.3986513625030251e-05, "loss": 0.4678, "step": 16645 }, { "epoch": 0.3695852432270452, "grad_norm": 0.9877973198890686, "learning_rate": 1.3983315643981938e-05, "loss": 0.4872, "step": 16650 }, { "epoch": 0.36969622978657285, "grad_norm": 2.1902594566345215, "learning_rate": 1.3980117178655466e-05, "loss": 0.5882, "step": 16655 }, { "epoch": 0.3698072163461005, "grad_norm": 0.9402801394462585, "learning_rate": 1.3976918229439698e-05, "loss": 0.4461, "step": 16660 }, { "epoch": 0.3699182029056281, "grad_norm": 0.9539381861686707, "learning_rate": 1.3973718796723546e-05, "loss": 0.338, "step": 16665 }, { "epoch": 0.3700291894651558, "grad_norm": 0.9745631814002991, "learning_rate": 1.3970518880895992e-05, "loss": 0.5081, "step": 16670 }, { "epoch": 0.3701401760246834, "grad_norm": 1.3075889348983765, "learning_rate": 1.3967318482346066e-05, "loss": 0.4365, "step": 16675 }, { "epoch": 0.37025116258421104, "grad_norm": 1.23758864402771, "learning_rate": 1.3964117601462865e-05, "loss": 0.4368, "step": 16680 }, { "epoch": 0.3703621491437387, "grad_norm": 1.430646300315857, "learning_rate": 1.3960916238635542e-05, "loss": 0.3723, "step": 16685 }, { "epoch": 0.37047313570326634, "grad_norm": 1.2952028512954712, "learning_rate": 1.3957714394253305e-05, "loss": 0.2682, "step": 16690 }, { "epoch": 0.37058412226279397, "grad_norm": 1.1494914293289185, "learning_rate": 1.3954512068705425e-05, "loss": 0.397, "step": 16695 }, { "epoch": 0.3706951088223216, "grad_norm": 1.0773718357086182, "learning_rate": 1.3951309262381231e-05, "loss": 0.3331, "step": 16700 }, { "epoch": 0.3708060953818493, "grad_norm": 1.6925926208496094, "learning_rate": 1.3948105975670113e-05, "loss": 0.4808, "step": 16705 }, { "epoch": 0.3709170819413769, "grad_norm": 0.9642808437347412, "learning_rate": 1.3944902208961507e-05, "loss": 0.4677, "step": 16710 }, { "epoch": 0.37102806850090453, "grad_norm": 0.9606306552886963, "learning_rate": 1.394169796264492e-05, "loss": 0.4367, "step": 16715 }, { "epoch": 0.37113905506043215, "grad_norm": 1.0823974609375, "learning_rate": 1.3938493237109914e-05, "loss": 0.4162, "step": 16720 }, { "epoch": 0.37125004161995984, "grad_norm": 2.175527811050415, "learning_rate": 1.3935288032746108e-05, "loss": 0.4954, "step": 16725 }, { "epoch": 0.37136102817948746, "grad_norm": 1.030967116355896, "learning_rate": 1.3932082349943184e-05, "loss": 0.6295, "step": 16730 }, { "epoch": 0.3714720147390151, "grad_norm": 1.0239731073379517, "learning_rate": 1.3928876189090874e-05, "loss": 0.4318, "step": 16735 }, { "epoch": 0.37158300129854277, "grad_norm": 1.2536979913711548, "learning_rate": 1.3925669550578973e-05, "loss": 0.3785, "step": 16740 }, { "epoch": 0.3716939878580704, "grad_norm": 1.4033725261688232, "learning_rate": 1.3922462434797335e-05, "loss": 0.5818, "step": 16745 }, { "epoch": 0.371804974417598, "grad_norm": 1.1884605884552002, "learning_rate": 1.3919254842135865e-05, "loss": 0.4141, "step": 16750 }, { "epoch": 0.37191596097712565, "grad_norm": 1.4534409046173096, "learning_rate": 1.3916046772984539e-05, "loss": 0.4574, "step": 16755 }, { "epoch": 0.37202694753665333, "grad_norm": 1.5618983507156372, "learning_rate": 1.391283822773338e-05, "loss": 0.4347, "step": 16760 }, { "epoch": 0.37213793409618096, "grad_norm": 1.5037851333618164, "learning_rate": 1.390962920677247e-05, "loss": 0.3729, "step": 16765 }, { "epoch": 0.3722489206557086, "grad_norm": 0.8089569807052612, "learning_rate": 1.3906419710491954e-05, "loss": 0.4113, "step": 16770 }, { "epoch": 0.3723599072152362, "grad_norm": 0.9394587278366089, "learning_rate": 1.390320973928203e-05, "loss": 0.4543, "step": 16775 }, { "epoch": 0.3724708937747639, "grad_norm": 1.4900579452514648, "learning_rate": 1.389999929353296e-05, "loss": 0.6413, "step": 16780 }, { "epoch": 0.3725818803342915, "grad_norm": 1.5743154287338257, "learning_rate": 1.3896788373635053e-05, "loss": 0.4143, "step": 16785 }, { "epoch": 0.37269286689381914, "grad_norm": 1.6529701948165894, "learning_rate": 1.3893576979978683e-05, "loss": 0.3333, "step": 16790 }, { "epoch": 0.3728038534533468, "grad_norm": 1.025791883468628, "learning_rate": 1.3890365112954282e-05, "loss": 0.4461, "step": 16795 }, { "epoch": 0.37291484001287445, "grad_norm": 1.5343742370605469, "learning_rate": 1.3887152772952339e-05, "loss": 0.386, "step": 16800 }, { "epoch": 0.3730258265724021, "grad_norm": 1.3995214700698853, "learning_rate": 1.3883939960363397e-05, "loss": 0.5093, "step": 16805 }, { "epoch": 0.3731368131319297, "grad_norm": 2.0946216583251953, "learning_rate": 1.3880726675578063e-05, "loss": 0.3601, "step": 16810 }, { "epoch": 0.3732477996914574, "grad_norm": 1.2544093132019043, "learning_rate": 1.387751291898699e-05, "loss": 0.4319, "step": 16815 }, { "epoch": 0.373358786250985, "grad_norm": 1.000132441520691, "learning_rate": 1.3874298690980904e-05, "loss": 0.3898, "step": 16820 }, { "epoch": 0.37346977281051263, "grad_norm": 1.495603322982788, "learning_rate": 1.3871083991950576e-05, "loss": 0.4117, "step": 16825 }, { "epoch": 0.37358075937004026, "grad_norm": 0.9588593244552612, "learning_rate": 1.3867868822286838e-05, "loss": 0.4232, "step": 16830 }, { "epoch": 0.37369174592956794, "grad_norm": 1.1229437589645386, "learning_rate": 1.386465318238058e-05, "loss": 0.3853, "step": 16835 }, { "epoch": 0.37380273248909557, "grad_norm": 1.3521358966827393, "learning_rate": 1.3861437072622752e-05, "loss": 0.4987, "step": 16840 }, { "epoch": 0.3739137190486232, "grad_norm": 0.968280017375946, "learning_rate": 1.3858220493404353e-05, "loss": 0.3644, "step": 16845 }, { "epoch": 0.3740247056081509, "grad_norm": 1.8695068359375, "learning_rate": 1.3855003445116446e-05, "loss": 0.5406, "step": 16850 }, { "epoch": 0.3741356921676785, "grad_norm": 1.0490186214447021, "learning_rate": 1.385178592815015e-05, "loss": 0.4572, "step": 16855 }, { "epoch": 0.37424667872720613, "grad_norm": 0.7611112594604492, "learning_rate": 1.3848567942896636e-05, "loss": 0.4575, "step": 16860 }, { "epoch": 0.37435766528673375, "grad_norm": 1.1203293800354004, "learning_rate": 1.384534948974714e-05, "loss": 0.4189, "step": 16865 }, { "epoch": 0.37446865184626144, "grad_norm": 1.5158090591430664, "learning_rate": 1.3842130569092951e-05, "loss": 0.5554, "step": 16870 }, { "epoch": 0.37457963840578906, "grad_norm": 1.3477773666381836, "learning_rate": 1.383891118132541e-05, "loss": 0.5403, "step": 16875 }, { "epoch": 0.3746906249653167, "grad_norm": 1.0920310020446777, "learning_rate": 1.3835691326835925e-05, "loss": 0.3967, "step": 16880 }, { "epoch": 0.3748016115248443, "grad_norm": 0.9676805138587952, "learning_rate": 1.383247100601595e-05, "loss": 0.4951, "step": 16885 }, { "epoch": 0.374912598084372, "grad_norm": 1.1773241758346558, "learning_rate": 1.3829250219257007e-05, "loss": 0.4838, "step": 16890 }, { "epoch": 0.3750235846438996, "grad_norm": 1.895973801612854, "learning_rate": 1.3826028966950662e-05, "loss": 0.6721, "step": 16895 }, { "epoch": 0.37513457120342725, "grad_norm": 1.123016357421875, "learning_rate": 1.3822807249488545e-05, "loss": 0.5281, "step": 16900 }, { "epoch": 0.37524555776295493, "grad_norm": 1.2969212532043457, "learning_rate": 1.3819585067262347e-05, "loss": 0.4464, "step": 16905 }, { "epoch": 0.37535654432248255, "grad_norm": 1.220132827758789, "learning_rate": 1.3816362420663805e-05, "loss": 0.4504, "step": 16910 }, { "epoch": 0.3754675308820102, "grad_norm": 1.846871018409729, "learning_rate": 1.3813139310084715e-05, "loss": 0.5145, "step": 16915 }, { "epoch": 0.3755785174415378, "grad_norm": 1.588097095489502, "learning_rate": 1.3809915735916942e-05, "loss": 0.5535, "step": 16920 }, { "epoch": 0.3756895040010655, "grad_norm": 0.79483962059021, "learning_rate": 1.3806691698552388e-05, "loss": 0.5354, "step": 16925 }, { "epoch": 0.3758004905605931, "grad_norm": 1.1435658931732178, "learning_rate": 1.3803467198383025e-05, "loss": 0.5275, "step": 16930 }, { "epoch": 0.37591147712012074, "grad_norm": 1.563844919204712, "learning_rate": 1.3800242235800876e-05, "loss": 0.3377, "step": 16935 }, { "epoch": 0.3760224636796484, "grad_norm": 1.347374439239502, "learning_rate": 1.3797016811198018e-05, "loss": 0.312, "step": 16940 }, { "epoch": 0.37613345023917605, "grad_norm": 1.2091847658157349, "learning_rate": 1.3793790924966593e-05, "loss": 0.5454, "step": 16945 }, { "epoch": 0.3762444367987037, "grad_norm": 1.4304174184799194, "learning_rate": 1.3790564577498791e-05, "loss": 0.3816, "step": 16950 }, { "epoch": 0.3763554233582313, "grad_norm": 0.9188779592514038, "learning_rate": 1.3787337769186859e-05, "loss": 0.4192, "step": 16955 }, { "epoch": 0.376466409917759, "grad_norm": 1.6021742820739746, "learning_rate": 1.3784110500423104e-05, "loss": 0.5217, "step": 16960 }, { "epoch": 0.3765773964772866, "grad_norm": 1.2071577310562134, "learning_rate": 1.3780882771599886e-05, "loss": 0.524, "step": 16965 }, { "epoch": 0.37668838303681423, "grad_norm": 0.8562250733375549, "learning_rate": 1.3777654583109618e-05, "loss": 0.4608, "step": 16970 }, { "epoch": 0.37679936959634186, "grad_norm": 1.6524200439453125, "learning_rate": 1.3774425935344775e-05, "loss": 0.5862, "step": 16975 }, { "epoch": 0.37691035615586954, "grad_norm": 1.103670597076416, "learning_rate": 1.3771196828697888e-05, "loss": 0.3822, "step": 16980 }, { "epoch": 0.37702134271539717, "grad_norm": 0.7237701416015625, "learning_rate": 1.3767967263561538e-05, "loss": 0.3177, "step": 16985 }, { "epoch": 0.3771323292749248, "grad_norm": 0.7688775062561035, "learning_rate": 1.3764737240328363e-05, "loss": 0.5407, "step": 16990 }, { "epoch": 0.3772433158344525, "grad_norm": 1.1897283792495728, "learning_rate": 1.3761506759391061e-05, "loss": 0.5392, "step": 16995 }, { "epoch": 0.3773543023939801, "grad_norm": 1.4582651853561401, "learning_rate": 1.3758275821142382e-05, "loss": 0.4387, "step": 17000 }, { "epoch": 0.3774652889535077, "grad_norm": 1.1028008460998535, "learning_rate": 1.3755044425975132e-05, "loss": 0.4452, "step": 17005 }, { "epoch": 0.37757627551303535, "grad_norm": 1.4182307720184326, "learning_rate": 1.3751812574282177e-05, "loss": 0.4905, "step": 17010 }, { "epoch": 0.37768726207256303, "grad_norm": 1.1496057510375977, "learning_rate": 1.374858026645643e-05, "loss": 0.4954, "step": 17015 }, { "epoch": 0.37779824863209066, "grad_norm": 1.2465380430221558, "learning_rate": 1.3745347502890866e-05, "loss": 0.5762, "step": 17020 }, { "epoch": 0.3779092351916183, "grad_norm": 1.1184829473495483, "learning_rate": 1.3742114283978514e-05, "loss": 0.4166, "step": 17025 }, { "epoch": 0.3780202217511459, "grad_norm": 0.7921366691589355, "learning_rate": 1.373888061011246e-05, "loss": 0.3554, "step": 17030 }, { "epoch": 0.3781312083106736, "grad_norm": 0.7676562666893005, "learning_rate": 1.3735646481685836e-05, "loss": 0.3768, "step": 17035 }, { "epoch": 0.3782421948702012, "grad_norm": 2.179152488708496, "learning_rate": 1.3732411899091844e-05, "loss": 0.4078, "step": 17040 }, { "epoch": 0.37835318142972885, "grad_norm": 1.5713518857955933, "learning_rate": 1.372917686272373e-05, "loss": 0.5496, "step": 17045 }, { "epoch": 0.37846416798925653, "grad_norm": 2.061424732208252, "learning_rate": 1.3725941372974801e-05, "loss": 0.5781, "step": 17050 }, { "epoch": 0.37857515454878415, "grad_norm": 0.8337643146514893, "learning_rate": 1.3722705430238414e-05, "loss": 0.5363, "step": 17055 }, { "epoch": 0.3786861411083118, "grad_norm": 1.2876572608947754, "learning_rate": 1.3719469034907984e-05, "loss": 0.5306, "step": 17060 }, { "epoch": 0.3787971276678394, "grad_norm": 1.8232654333114624, "learning_rate": 1.3716232187376985e-05, "loss": 0.5426, "step": 17065 }, { "epoch": 0.3789081142273671, "grad_norm": 1.371899127960205, "learning_rate": 1.371299488803894e-05, "loss": 0.6546, "step": 17070 }, { "epoch": 0.3790191007868947, "grad_norm": 1.1025066375732422, "learning_rate": 1.370975713728743e-05, "loss": 0.4045, "step": 17075 }, { "epoch": 0.37913008734642234, "grad_norm": 1.0858376026153564, "learning_rate": 1.3706518935516087e-05, "loss": 0.4726, "step": 17080 }, { "epoch": 0.37924107390594997, "grad_norm": 1.4351253509521484, "learning_rate": 1.3703280283118601e-05, "loss": 0.3277, "step": 17085 }, { "epoch": 0.37935206046547765, "grad_norm": 0.8693327903747559, "learning_rate": 1.3700041180488721e-05, "loss": 0.5173, "step": 17090 }, { "epoch": 0.3794630470250053, "grad_norm": 0.8443187475204468, "learning_rate": 1.3696801628020243e-05, "loss": 0.3616, "step": 17095 }, { "epoch": 0.3795740335845329, "grad_norm": 1.3002277612686157, "learning_rate": 1.3693561626107021e-05, "loss": 0.5085, "step": 17100 }, { "epoch": 0.3796850201440606, "grad_norm": 1.221724271774292, "learning_rate": 1.3690321175142965e-05, "loss": 0.3993, "step": 17105 }, { "epoch": 0.3797960067035882, "grad_norm": 0.9888561964035034, "learning_rate": 1.3687080275522034e-05, "loss": 0.4385, "step": 17110 }, { "epoch": 0.37990699326311583, "grad_norm": 1.6944843530654907, "learning_rate": 1.3683838927638249e-05, "loss": 0.4659, "step": 17115 }, { "epoch": 0.38001797982264346, "grad_norm": 0.8631107211112976, "learning_rate": 1.368059713188568e-05, "loss": 0.5218, "step": 17120 }, { "epoch": 0.38012896638217114, "grad_norm": 1.0087155103683472, "learning_rate": 1.3677354888658458e-05, "loss": 0.5457, "step": 17125 }, { "epoch": 0.38023995294169877, "grad_norm": 1.0191081762313843, "learning_rate": 1.3674112198350764e-05, "loss": 0.3824, "step": 17130 }, { "epoch": 0.3803509395012264, "grad_norm": 1.5957056283950806, "learning_rate": 1.3670869061356829e-05, "loss": 0.4322, "step": 17135 }, { "epoch": 0.380461926060754, "grad_norm": 1.5974568128585815, "learning_rate": 1.3667625478070945e-05, "loss": 0.3819, "step": 17140 }, { "epoch": 0.3805729126202817, "grad_norm": 0.8775105476379395, "learning_rate": 1.3664381448887454e-05, "loss": 0.5077, "step": 17145 }, { "epoch": 0.3806838991798093, "grad_norm": 1.3796273469924927, "learning_rate": 1.3661136974200757e-05, "loss": 0.3658, "step": 17150 }, { "epoch": 0.38079488573933695, "grad_norm": 1.0044777393341064, "learning_rate": 1.3657892054405304e-05, "loss": 0.4779, "step": 17155 }, { "epoch": 0.38090587229886463, "grad_norm": 0.881597101688385, "learning_rate": 1.3654646689895605e-05, "loss": 0.4481, "step": 17160 }, { "epoch": 0.38101685885839226, "grad_norm": 1.505136489868164, "learning_rate": 1.3651400881066217e-05, "loss": 0.6237, "step": 17165 }, { "epoch": 0.3811278454179199, "grad_norm": 0.7536532282829285, "learning_rate": 1.3648154628311754e-05, "loss": 0.5762, "step": 17170 }, { "epoch": 0.3812388319774475, "grad_norm": 1.034916639328003, "learning_rate": 1.3644907932026887e-05, "loss": 0.3791, "step": 17175 }, { "epoch": 0.3813498185369752, "grad_norm": 1.6484735012054443, "learning_rate": 1.364166079260634e-05, "loss": 0.2924, "step": 17180 }, { "epoch": 0.3814608050965028, "grad_norm": 0.9013214111328125, "learning_rate": 1.3638413210444885e-05, "loss": 0.357, "step": 17185 }, { "epoch": 0.38157179165603045, "grad_norm": 1.3303017616271973, "learning_rate": 1.3635165185937354e-05, "loss": 0.5829, "step": 17190 }, { "epoch": 0.38168277821555807, "grad_norm": 0.9839069247245789, "learning_rate": 1.3631916719478633e-05, "loss": 0.5365, "step": 17195 }, { "epoch": 0.38179376477508575, "grad_norm": 1.1940916776657104, "learning_rate": 1.3628667811463654e-05, "loss": 0.3673, "step": 17200 }, { "epoch": 0.3819047513346134, "grad_norm": 1.1215497255325317, "learning_rate": 1.3625418462287414e-05, "loss": 0.3505, "step": 17205 }, { "epoch": 0.382015737894141, "grad_norm": 1.1891454458236694, "learning_rate": 1.3622168672344957e-05, "loss": 0.6437, "step": 17210 }, { "epoch": 0.3821267244536687, "grad_norm": 1.0986990928649902, "learning_rate": 1.361891844203138e-05, "loss": 0.427, "step": 17215 }, { "epoch": 0.3822377110131963, "grad_norm": 1.3629257678985596, "learning_rate": 1.3615667771741836e-05, "loss": 0.4027, "step": 17220 }, { "epoch": 0.38234869757272394, "grad_norm": 1.3671947717666626, "learning_rate": 1.3612416661871532e-05, "loss": 0.4503, "step": 17225 }, { "epoch": 0.38245968413225157, "grad_norm": 1.2490761280059814, "learning_rate": 1.3609165112815721e-05, "loss": 0.3434, "step": 17230 }, { "epoch": 0.38257067069177925, "grad_norm": 1.1291104555130005, "learning_rate": 1.3605913124969723e-05, "loss": 0.4443, "step": 17235 }, { "epoch": 0.3826816572513069, "grad_norm": 1.559004306793213, "learning_rate": 1.36026606987289e-05, "loss": 0.5403, "step": 17240 }, { "epoch": 0.3827926438108345, "grad_norm": 1.1736834049224854, "learning_rate": 1.3599407834488672e-05, "loss": 0.4011, "step": 17245 }, { "epoch": 0.3829036303703621, "grad_norm": 2.812455654144287, "learning_rate": 1.359615453264451e-05, "loss": 0.5226, "step": 17250 }, { "epoch": 0.3830146169298898, "grad_norm": 1.8055833578109741, "learning_rate": 1.3592900793591941e-05, "loss": 0.4333, "step": 17255 }, { "epoch": 0.38312560348941743, "grad_norm": 1.520922064781189, "learning_rate": 1.3589646617726545e-05, "loss": 0.5924, "step": 17260 }, { "epoch": 0.38323659004894506, "grad_norm": 1.0311449766159058, "learning_rate": 1.358639200544395e-05, "loss": 0.4824, "step": 17265 }, { "epoch": 0.38334757660847274, "grad_norm": 1.1037408113479614, "learning_rate": 1.3583136957139841e-05, "loss": 0.4822, "step": 17270 }, { "epoch": 0.38345856316800037, "grad_norm": 1.3447061777114868, "learning_rate": 1.3579881473209962e-05, "loss": 0.4643, "step": 17275 }, { "epoch": 0.383569549727528, "grad_norm": 1.1121246814727783, "learning_rate": 1.3576625554050095e-05, "loss": 0.4628, "step": 17280 }, { "epoch": 0.3836805362870556, "grad_norm": 0.7411977052688599, "learning_rate": 1.357336920005609e-05, "loss": 0.4589, "step": 17285 }, { "epoch": 0.3837915228465833, "grad_norm": 1.5663127899169922, "learning_rate": 1.3570112411623843e-05, "loss": 0.4138, "step": 17290 }, { "epoch": 0.3839025094061109, "grad_norm": 1.0521498918533325, "learning_rate": 1.3566855189149302e-05, "loss": 0.4277, "step": 17295 }, { "epoch": 0.38401349596563855, "grad_norm": 1.4922767877578735, "learning_rate": 1.3563597533028467e-05, "loss": 0.473, "step": 17300 }, { "epoch": 0.3841244825251662, "grad_norm": 0.9321845173835754, "learning_rate": 1.35603394436574e-05, "loss": 0.5097, "step": 17305 }, { "epoch": 0.38423546908469386, "grad_norm": 1.0778831243515015, "learning_rate": 1.35570809214322e-05, "loss": 0.4739, "step": 17310 }, { "epoch": 0.3843464556442215, "grad_norm": 1.1382580995559692, "learning_rate": 1.355382196674903e-05, "loss": 0.4179, "step": 17315 }, { "epoch": 0.3844574422037491, "grad_norm": 1.2955654859542847, "learning_rate": 1.3550562580004108e-05, "loss": 0.2823, "step": 17320 }, { "epoch": 0.3845684287632768, "grad_norm": 1.0938900709152222, "learning_rate": 1.3547302761593693e-05, "loss": 0.3228, "step": 17325 }, { "epoch": 0.3846794153228044, "grad_norm": 1.351600170135498, "learning_rate": 1.3544042511914104e-05, "loss": 0.6427, "step": 17330 }, { "epoch": 0.38479040188233204, "grad_norm": 0.9713222980499268, "learning_rate": 1.3540781831361713e-05, "loss": 0.3738, "step": 17335 }, { "epoch": 0.38490138844185967, "grad_norm": 1.0599182844161987, "learning_rate": 1.3537520720332943e-05, "loss": 0.4398, "step": 17340 }, { "epoch": 0.38501237500138735, "grad_norm": 0.9963597059249878, "learning_rate": 1.3534259179224265e-05, "loss": 0.3719, "step": 17345 }, { "epoch": 0.385123361560915, "grad_norm": 1.0552200078964233, "learning_rate": 1.3530997208432211e-05, "loss": 0.3478, "step": 17350 }, { "epoch": 0.3852343481204426, "grad_norm": 1.0920829772949219, "learning_rate": 1.3527734808353356e-05, "loss": 0.4186, "step": 17355 }, { "epoch": 0.38534533467997023, "grad_norm": 1.1607277393341064, "learning_rate": 1.3524471979384339e-05, "loss": 0.469, "step": 17360 }, { "epoch": 0.3854563212394979, "grad_norm": 1.2528059482574463, "learning_rate": 1.3521208721921836e-05, "loss": 0.4884, "step": 17365 }, { "epoch": 0.38556730779902554, "grad_norm": 1.1077717542648315, "learning_rate": 1.3517945036362585e-05, "loss": 0.5902, "step": 17370 }, { "epoch": 0.38567829435855316, "grad_norm": 0.9106099605560303, "learning_rate": 1.3514680923103373e-05, "loss": 0.5072, "step": 17375 }, { "epoch": 0.38578928091808085, "grad_norm": 1.0978457927703857, "learning_rate": 1.3511416382541048e-05, "loss": 0.4089, "step": 17380 }, { "epoch": 0.38590026747760847, "grad_norm": 0.7796788215637207, "learning_rate": 1.350815141507249e-05, "loss": 0.4714, "step": 17385 }, { "epoch": 0.3860112540371361, "grad_norm": 0.7110152244567871, "learning_rate": 1.3504886021094653e-05, "loss": 0.4504, "step": 17390 }, { "epoch": 0.3861222405966637, "grad_norm": 1.5071749687194824, "learning_rate": 1.3501620201004524e-05, "loss": 0.5551, "step": 17395 }, { "epoch": 0.3862332271561914, "grad_norm": 1.5758322477340698, "learning_rate": 1.3498353955199157e-05, "loss": 0.4305, "step": 17400 }, { "epoch": 0.38634421371571903, "grad_norm": 1.0656967163085938, "learning_rate": 1.3495087284075648e-05, "loss": 0.5435, "step": 17405 }, { "epoch": 0.38645520027524666, "grad_norm": 1.3301317691802979, "learning_rate": 1.3491820188031148e-05, "loss": 0.4687, "step": 17410 }, { "epoch": 0.3865661868347743, "grad_norm": 1.1281219720840454, "learning_rate": 1.348855266746286e-05, "loss": 0.4022, "step": 17415 }, { "epoch": 0.38667717339430197, "grad_norm": 1.6176022291183472, "learning_rate": 1.348528472276804e-05, "loss": 0.482, "step": 17420 }, { "epoch": 0.3867881599538296, "grad_norm": 1.332358717918396, "learning_rate": 1.348201635434399e-05, "loss": 0.4974, "step": 17425 }, { "epoch": 0.3868991465133572, "grad_norm": 0.8697658777236938, "learning_rate": 1.3478747562588068e-05, "loss": 0.5393, "step": 17430 }, { "epoch": 0.3870101330728849, "grad_norm": 1.16774582862854, "learning_rate": 1.3475478347897688e-05, "loss": 0.6004, "step": 17435 }, { "epoch": 0.3871211196324125, "grad_norm": 1.2171958684921265, "learning_rate": 1.3472208710670305e-05, "loss": 0.3667, "step": 17440 }, { "epoch": 0.38723210619194015, "grad_norm": 1.1921159029006958, "learning_rate": 1.3468938651303431e-05, "loss": 0.488, "step": 17445 }, { "epoch": 0.3873430927514678, "grad_norm": 0.9221675395965576, "learning_rate": 1.3465668170194633e-05, "loss": 0.5209, "step": 17450 }, { "epoch": 0.38745407931099546, "grad_norm": 1.400107502937317, "learning_rate": 1.3462397267741516e-05, "loss": 0.6247, "step": 17455 }, { "epoch": 0.3875650658705231, "grad_norm": 1.6427634954452515, "learning_rate": 1.3459125944341755e-05, "loss": 0.3778, "step": 17460 }, { "epoch": 0.3876760524300507, "grad_norm": 1.0119582414627075, "learning_rate": 1.3455854200393064e-05, "loss": 0.4266, "step": 17465 }, { "epoch": 0.38778703898957834, "grad_norm": 1.327309489250183, "learning_rate": 1.3452582036293205e-05, "loss": 0.5284, "step": 17470 }, { "epoch": 0.387898025549106, "grad_norm": 1.0754491090774536, "learning_rate": 1.3449309452440004e-05, "loss": 0.5902, "step": 17475 }, { "epoch": 0.38800901210863364, "grad_norm": 1.4490890502929688, "learning_rate": 1.3446036449231328e-05, "loss": 0.5134, "step": 17480 }, { "epoch": 0.38811999866816127, "grad_norm": 0.7128272652626038, "learning_rate": 1.3442763027065096e-05, "loss": 0.5247, "step": 17485 }, { "epoch": 0.38823098522768895, "grad_norm": 0.9914596676826477, "learning_rate": 1.3439489186339283e-05, "loss": 0.3952, "step": 17490 }, { "epoch": 0.3883419717872166, "grad_norm": 1.220529556274414, "learning_rate": 1.3436214927451907e-05, "loss": 0.4257, "step": 17495 }, { "epoch": 0.3884529583467442, "grad_norm": 1.3346257209777832, "learning_rate": 1.3432940250801047e-05, "loss": 0.4396, "step": 17500 }, { "epoch": 0.38856394490627183, "grad_norm": 1.3106735944747925, "learning_rate": 1.3429665156784825e-05, "loss": 0.4336, "step": 17505 }, { "epoch": 0.3886749314657995, "grad_norm": 1.7288190126419067, "learning_rate": 1.3426389645801415e-05, "loss": 0.3259, "step": 17510 }, { "epoch": 0.38878591802532714, "grad_norm": 1.962158441543579, "learning_rate": 1.3423113718249044e-05, "loss": 0.422, "step": 17515 }, { "epoch": 0.38889690458485476, "grad_norm": 0.9909548163414001, "learning_rate": 1.3419837374525986e-05, "loss": 0.4822, "step": 17520 }, { "epoch": 0.3890078911443824, "grad_norm": 1.0369338989257812, "learning_rate": 1.3416560615030567e-05, "loss": 0.4333, "step": 17525 }, { "epoch": 0.38911887770391007, "grad_norm": 0.9148257970809937, "learning_rate": 1.3413283440161168e-05, "loss": 0.386, "step": 17530 }, { "epoch": 0.3892298642634377, "grad_norm": 1.404958724975586, "learning_rate": 1.3410005850316219e-05, "loss": 0.4468, "step": 17535 }, { "epoch": 0.3893408508229653, "grad_norm": 1.2354094982147217, "learning_rate": 1.340672784589419e-05, "loss": 0.4362, "step": 17540 }, { "epoch": 0.389451837382493, "grad_norm": 0.912862241268158, "learning_rate": 1.3403449427293617e-05, "loss": 0.7274, "step": 17545 }, { "epoch": 0.38956282394202063, "grad_norm": 1.044878602027893, "learning_rate": 1.3400170594913078e-05, "loss": 0.3184, "step": 17550 }, { "epoch": 0.38967381050154826, "grad_norm": 1.0582435131072998, "learning_rate": 1.33968913491512e-05, "loss": 0.3822, "step": 17555 }, { "epoch": 0.3897847970610759, "grad_norm": 1.0423102378845215, "learning_rate": 1.3393611690406665e-05, "loss": 0.4363, "step": 17560 }, { "epoch": 0.38989578362060356, "grad_norm": 1.4456026554107666, "learning_rate": 1.3390331619078198e-05, "loss": 0.4593, "step": 17565 }, { "epoch": 0.3900067701801312, "grad_norm": 1.5314327478408813, "learning_rate": 1.3387051135564588e-05, "loss": 0.3941, "step": 17570 }, { "epoch": 0.3901177567396588, "grad_norm": 1.1662486791610718, "learning_rate": 1.338377024026466e-05, "loss": 0.424, "step": 17575 }, { "epoch": 0.39022874329918644, "grad_norm": 1.3233836889266968, "learning_rate": 1.3380488933577294e-05, "loss": 0.4227, "step": 17580 }, { "epoch": 0.3903397298587141, "grad_norm": 0.7573814988136292, "learning_rate": 1.337720721590142e-05, "loss": 0.3918, "step": 17585 }, { "epoch": 0.39045071641824175, "grad_norm": 1.548936367034912, "learning_rate": 1.3373925087636017e-05, "loss": 0.5082, "step": 17590 }, { "epoch": 0.3905617029777694, "grad_norm": 0.9623432755470276, "learning_rate": 1.3370642549180117e-05, "loss": 0.3315, "step": 17595 }, { "epoch": 0.39067268953729706, "grad_norm": 0.876105546951294, "learning_rate": 1.3367359600932803e-05, "loss": 0.4066, "step": 17600 }, { "epoch": 0.3907836760968247, "grad_norm": 1.3079777956008911, "learning_rate": 1.3364076243293203e-05, "loss": 0.5509, "step": 17605 }, { "epoch": 0.3908946626563523, "grad_norm": 1.271996021270752, "learning_rate": 1.3360792476660494e-05, "loss": 0.429, "step": 17610 }, { "epoch": 0.39100564921587994, "grad_norm": 0.795352578163147, "learning_rate": 1.3357508301433905e-05, "loss": 0.3334, "step": 17615 }, { "epoch": 0.3911166357754076, "grad_norm": 1.3621175289154053, "learning_rate": 1.335422371801272e-05, "loss": 0.5516, "step": 17620 }, { "epoch": 0.39122762233493524, "grad_norm": 1.1170103549957275, "learning_rate": 1.3350938726796261e-05, "loss": 0.3054, "step": 17625 }, { "epoch": 0.39133860889446287, "grad_norm": 1.2189823389053345, "learning_rate": 1.3347653328183912e-05, "loss": 0.5544, "step": 17630 }, { "epoch": 0.3914495954539905, "grad_norm": 1.1982585191726685, "learning_rate": 1.3344367522575098e-05, "loss": 0.3228, "step": 17635 }, { "epoch": 0.3915605820135182, "grad_norm": 1.4040985107421875, "learning_rate": 1.3341081310369297e-05, "loss": 0.5198, "step": 17640 }, { "epoch": 0.3916715685730458, "grad_norm": 1.0702539682388306, "learning_rate": 1.3337794691966034e-05, "loss": 0.3367, "step": 17645 }, { "epoch": 0.39178255513257343, "grad_norm": 1.5340244770050049, "learning_rate": 1.3334507667764892e-05, "loss": 0.5923, "step": 17650 }, { "epoch": 0.3918935416921011, "grad_norm": 1.4048302173614502, "learning_rate": 1.3331220238165485e-05, "loss": 0.5435, "step": 17655 }, { "epoch": 0.39200452825162874, "grad_norm": 1.0287718772888184, "learning_rate": 1.3327932403567493e-05, "loss": 0.5771, "step": 17660 }, { "epoch": 0.39211551481115636, "grad_norm": 1.8984160423278809, "learning_rate": 1.3324644164370643e-05, "loss": 0.4725, "step": 17665 }, { "epoch": 0.392226501370684, "grad_norm": 1.6385459899902344, "learning_rate": 1.3321355520974708e-05, "loss": 0.5825, "step": 17670 }, { "epoch": 0.39233748793021167, "grad_norm": 1.5618573427200317, "learning_rate": 1.3318066473779504e-05, "loss": 0.4333, "step": 17675 }, { "epoch": 0.3924484744897393, "grad_norm": 1.079800009727478, "learning_rate": 1.3314777023184907e-05, "loss": 0.4186, "step": 17680 }, { "epoch": 0.3925594610492669, "grad_norm": 0.9325678944587708, "learning_rate": 1.3311487169590835e-05, "loss": 0.496, "step": 17685 }, { "epoch": 0.39267044760879455, "grad_norm": 1.4722448587417603, "learning_rate": 1.330819691339726e-05, "loss": 0.4277, "step": 17690 }, { "epoch": 0.39278143416832223, "grad_norm": 0.955633282661438, "learning_rate": 1.33049062550042e-05, "loss": 0.4529, "step": 17695 }, { "epoch": 0.39289242072784986, "grad_norm": 1.27122962474823, "learning_rate": 1.330161519481172e-05, "loss": 0.462, "step": 17700 }, { "epoch": 0.3930034072873775, "grad_norm": 0.7992278933525085, "learning_rate": 1.3298323733219938e-05, "loss": 0.5928, "step": 17705 }, { "epoch": 0.39311439384690516, "grad_norm": 1.091391921043396, "learning_rate": 1.3295031870629016e-05, "loss": 0.5228, "step": 17710 }, { "epoch": 0.3932253804064328, "grad_norm": 1.3356877565383911, "learning_rate": 1.3291739607439173e-05, "loss": 0.4639, "step": 17715 }, { "epoch": 0.3933363669659604, "grad_norm": 1.7986055612564087, "learning_rate": 1.3288446944050668e-05, "loss": 0.484, "step": 17720 }, { "epoch": 0.39344735352548804, "grad_norm": 1.2770730257034302, "learning_rate": 1.328515388086381e-05, "loss": 0.5049, "step": 17725 }, { "epoch": 0.3935583400850157, "grad_norm": 1.0809767246246338, "learning_rate": 1.3281860418278962e-05, "loss": 0.438, "step": 17730 }, { "epoch": 0.39366932664454335, "grad_norm": 1.3970037698745728, "learning_rate": 1.327856655669653e-05, "loss": 0.4304, "step": 17735 }, { "epoch": 0.393780313204071, "grad_norm": 1.239540934562683, "learning_rate": 1.3275272296516973e-05, "loss": 0.5295, "step": 17740 }, { "epoch": 0.3938912997635986, "grad_norm": 2.00631046295166, "learning_rate": 1.3271977638140794e-05, "loss": 0.395, "step": 17745 }, { "epoch": 0.3940022863231263, "grad_norm": 0.9444000124931335, "learning_rate": 1.3268682581968547e-05, "loss": 0.4573, "step": 17750 }, { "epoch": 0.3941132728826539, "grad_norm": 1.2916285991668701, "learning_rate": 1.3265387128400833e-05, "loss": 0.4818, "step": 17755 }, { "epoch": 0.39422425944218153, "grad_norm": 0.8935806751251221, "learning_rate": 1.3262091277838304e-05, "loss": 0.4236, "step": 17760 }, { "epoch": 0.3943352460017092, "grad_norm": 1.304722547531128, "learning_rate": 1.325879503068166e-05, "loss": 0.5102, "step": 17765 }, { "epoch": 0.39444623256123684, "grad_norm": 1.0897523164749146, "learning_rate": 1.3255498387331643e-05, "loss": 0.4259, "step": 17770 }, { "epoch": 0.39455721912076447, "grad_norm": 0.8314265608787537, "learning_rate": 1.325220134818905e-05, "loss": 0.4265, "step": 17775 }, { "epoch": 0.3946682056802921, "grad_norm": 1.312457799911499, "learning_rate": 1.3248903913654726e-05, "loss": 0.5147, "step": 17780 }, { "epoch": 0.3947791922398198, "grad_norm": 1.258514165878296, "learning_rate": 1.3245606084129559e-05, "loss": 0.352, "step": 17785 }, { "epoch": 0.3948901787993474, "grad_norm": 1.0305403470993042, "learning_rate": 1.3242307860014487e-05, "loss": 0.3029, "step": 17790 }, { "epoch": 0.39500116535887503, "grad_norm": 1.2700746059417725, "learning_rate": 1.32390092417105e-05, "loss": 0.3088, "step": 17795 }, { "epoch": 0.39511215191840265, "grad_norm": 1.1628782749176025, "learning_rate": 1.3235710229618635e-05, "loss": 0.5507, "step": 17800 }, { "epoch": 0.39522313847793034, "grad_norm": 1.2276886701583862, "learning_rate": 1.3232410824139969e-05, "loss": 0.4989, "step": 17805 }, { "epoch": 0.39533412503745796, "grad_norm": 1.2096561193466187, "learning_rate": 1.3229111025675639e-05, "loss": 0.425, "step": 17810 }, { "epoch": 0.3954451115969856, "grad_norm": 1.6849702596664429, "learning_rate": 1.3225810834626817e-05, "loss": 0.411, "step": 17815 }, { "epoch": 0.39555609815651327, "grad_norm": 0.8754458427429199, "learning_rate": 1.3222510251394732e-05, "loss": 0.5221, "step": 17820 }, { "epoch": 0.3956670847160409, "grad_norm": 1.1738617420196533, "learning_rate": 1.3219209276380657e-05, "loss": 0.3602, "step": 17825 }, { "epoch": 0.3957780712755685, "grad_norm": 1.1277354955673218, "learning_rate": 1.3215907909985919e-05, "loss": 0.5628, "step": 17830 }, { "epoch": 0.39588905783509615, "grad_norm": 1.6044995784759521, "learning_rate": 1.321260615261188e-05, "loss": 0.4562, "step": 17835 }, { "epoch": 0.39600004439462383, "grad_norm": 1.0552541017532349, "learning_rate": 1.320930400465996e-05, "loss": 0.433, "step": 17840 }, { "epoch": 0.39611103095415146, "grad_norm": 0.8654063940048218, "learning_rate": 1.3206001466531624e-05, "loss": 0.2673, "step": 17845 }, { "epoch": 0.3962220175136791, "grad_norm": 1.105943202972412, "learning_rate": 1.3202698538628376e-05, "loss": 0.417, "step": 17850 }, { "epoch": 0.3963330040732067, "grad_norm": 1.0926660299301147, "learning_rate": 1.3199395221351785e-05, "loss": 0.4506, "step": 17855 }, { "epoch": 0.3964439906327344, "grad_norm": 1.0280393362045288, "learning_rate": 1.3196091515103454e-05, "loss": 0.4719, "step": 17860 }, { "epoch": 0.396554977192262, "grad_norm": 1.2806470394134521, "learning_rate": 1.3192787420285034e-05, "loss": 0.5129, "step": 17865 }, { "epoch": 0.39666596375178964, "grad_norm": 0.8270732760429382, "learning_rate": 1.3189482937298225e-05, "loss": 0.5978, "step": 17870 }, { "epoch": 0.3967769503113173, "grad_norm": 0.8961861729621887, "learning_rate": 1.3186178066544781e-05, "loss": 0.3647, "step": 17875 }, { "epoch": 0.39688793687084495, "grad_norm": 0.9055027961730957, "learning_rate": 1.3182872808426492e-05, "loss": 0.4148, "step": 17880 }, { "epoch": 0.3969989234303726, "grad_norm": 1.5910625457763672, "learning_rate": 1.3179567163345201e-05, "loss": 0.4073, "step": 17885 }, { "epoch": 0.3971099099899002, "grad_norm": 1.038095235824585, "learning_rate": 1.3176261131702796e-05, "loss": 0.4585, "step": 17890 }, { "epoch": 0.3972208965494279, "grad_norm": 1.1451902389526367, "learning_rate": 1.3172954713901218e-05, "loss": 0.5159, "step": 17895 }, { "epoch": 0.3973318831089555, "grad_norm": 0.8876758813858032, "learning_rate": 1.3169647910342447e-05, "loss": 0.4819, "step": 17900 }, { "epoch": 0.39744286966848313, "grad_norm": 1.7914245128631592, "learning_rate": 1.316634072142851e-05, "loss": 0.4021, "step": 17905 }, { "epoch": 0.39755385622801076, "grad_norm": 0.9543294310569763, "learning_rate": 1.316303314756149e-05, "loss": 0.4622, "step": 17910 }, { "epoch": 0.39766484278753844, "grad_norm": 1.5023553371429443, "learning_rate": 1.3159725189143506e-05, "loss": 0.7284, "step": 17915 }, { "epoch": 0.39777582934706607, "grad_norm": 1.5158809423446655, "learning_rate": 1.3156416846576732e-05, "loss": 0.3533, "step": 17920 }, { "epoch": 0.3978868159065937, "grad_norm": 1.2066943645477295, "learning_rate": 1.3153108120263384e-05, "loss": 0.6102, "step": 17925 }, { "epoch": 0.3979978024661214, "grad_norm": 1.2224715948104858, "learning_rate": 1.3149799010605726e-05, "loss": 0.6959, "step": 17930 }, { "epoch": 0.398108789025649, "grad_norm": 1.9479608535766602, "learning_rate": 1.3146489518006065e-05, "loss": 0.5941, "step": 17935 }, { "epoch": 0.3982197755851766, "grad_norm": 1.061508059501648, "learning_rate": 1.314317964286676e-05, "loss": 0.5794, "step": 17940 }, { "epoch": 0.39833076214470425, "grad_norm": 1.1498526334762573, "learning_rate": 1.313986938559022e-05, "loss": 0.5621, "step": 17945 }, { "epoch": 0.39844174870423194, "grad_norm": 1.051237940788269, "learning_rate": 1.3136558746578888e-05, "loss": 0.3175, "step": 17950 }, { "epoch": 0.39855273526375956, "grad_norm": 0.903063952922821, "learning_rate": 1.3133247726235263e-05, "loss": 0.4213, "step": 17955 }, { "epoch": 0.3986637218232872, "grad_norm": 1.0225557088851929, "learning_rate": 1.312993632496189e-05, "loss": 0.3809, "step": 17960 }, { "epoch": 0.39877470838281487, "grad_norm": 2.102311372756958, "learning_rate": 1.3126624543161351e-05, "loss": 0.3249, "step": 17965 }, { "epoch": 0.3988856949423425, "grad_norm": 1.8216239213943481, "learning_rate": 1.3123312381236287e-05, "loss": 0.5173, "step": 17970 }, { "epoch": 0.3989966815018701, "grad_norm": 1.2820842266082764, "learning_rate": 1.311999983958938e-05, "loss": 0.2883, "step": 17975 }, { "epoch": 0.39910766806139775, "grad_norm": 1.3804726600646973, "learning_rate": 1.3116686918623356e-05, "loss": 0.5963, "step": 17980 }, { "epoch": 0.39921865462092543, "grad_norm": 1.2053264379501343, "learning_rate": 1.3113373618740988e-05, "loss": 0.4112, "step": 17985 }, { "epoch": 0.39932964118045305, "grad_norm": 0.9135293364524841, "learning_rate": 1.3110059940345096e-05, "loss": 0.4919, "step": 17990 }, { "epoch": 0.3994406277399807, "grad_norm": 1.1950745582580566, "learning_rate": 1.3106745883838549e-05, "loss": 0.5597, "step": 17995 }, { "epoch": 0.3995516142995083, "grad_norm": 1.2758936882019043, "learning_rate": 1.3103431449624257e-05, "loss": 0.432, "step": 18000 }, { "epoch": 0.399662600859036, "grad_norm": 1.616534948348999, "learning_rate": 1.3100116638105177e-05, "loss": 0.5755, "step": 18005 }, { "epoch": 0.3997735874185636, "grad_norm": 1.3022828102111816, "learning_rate": 1.3096801449684314e-05, "loss": 0.4205, "step": 18010 }, { "epoch": 0.39988457397809124, "grad_norm": 1.1768077611923218, "learning_rate": 1.3093485884764714e-05, "loss": 0.4766, "step": 18015 }, { "epoch": 0.3999955605376189, "grad_norm": 0.7835399508476257, "learning_rate": 1.3090169943749475e-05, "loss": 0.4221, "step": 18020 }, { "epoch": 0.40010654709714655, "grad_norm": 1.2957817316055298, "learning_rate": 1.308685362704174e-05, "loss": 0.4314, "step": 18025 }, { "epoch": 0.4002175336566742, "grad_norm": 1.2330552339553833, "learning_rate": 1.3083536935044695e-05, "loss": 0.4486, "step": 18030 }, { "epoch": 0.4003285202162018, "grad_norm": 1.5200679302215576, "learning_rate": 1.3080219868161565e-05, "loss": 0.408, "step": 18035 }, { "epoch": 0.4004395067757295, "grad_norm": 0.9305362105369568, "learning_rate": 1.3076902426795639e-05, "loss": 0.6147, "step": 18040 }, { "epoch": 0.4005504933352571, "grad_norm": 0.6245304942131042, "learning_rate": 1.3073584611350234e-05, "loss": 0.3921, "step": 18045 }, { "epoch": 0.40066147989478473, "grad_norm": 1.8916888236999512, "learning_rate": 1.3070266422228717e-05, "loss": 0.588, "step": 18050 }, { "epoch": 0.40077246645431236, "grad_norm": 1.5298385620117188, "learning_rate": 1.3066947859834507e-05, "loss": 0.5068, "step": 18055 }, { "epoch": 0.40088345301384004, "grad_norm": 1.188046932220459, "learning_rate": 1.3063628924571061e-05, "loss": 0.5606, "step": 18060 }, { "epoch": 0.40099443957336767, "grad_norm": 1.1807680130004883, "learning_rate": 1.3060309616841887e-05, "loss": 0.4235, "step": 18065 }, { "epoch": 0.4011054261328953, "grad_norm": 1.1999351978302002, "learning_rate": 1.3056989937050532e-05, "loss": 0.4043, "step": 18070 }, { "epoch": 0.401216412692423, "grad_norm": 0.7646277546882629, "learning_rate": 1.3053669885600592e-05, "loss": 0.513, "step": 18075 }, { "epoch": 0.4013273992519506, "grad_norm": 1.1868324279785156, "learning_rate": 1.3050349462895711e-05, "loss": 0.2457, "step": 18080 }, { "epoch": 0.4014383858114782, "grad_norm": 1.2941502332687378, "learning_rate": 1.3047028669339569e-05, "loss": 0.4342, "step": 18085 }, { "epoch": 0.40154937237100585, "grad_norm": 0.8198782205581665, "learning_rate": 1.3043707505335905e-05, "loss": 0.3848, "step": 18090 }, { "epoch": 0.40166035893053353, "grad_norm": 1.302763819694519, "learning_rate": 1.3040385971288486e-05, "loss": 0.4415, "step": 18095 }, { "epoch": 0.40177134549006116, "grad_norm": 1.5501524209976196, "learning_rate": 1.3037064067601141e-05, "loss": 0.4466, "step": 18100 }, { "epoch": 0.4018823320495888, "grad_norm": 0.9922233819961548, "learning_rate": 1.3033741794677728e-05, "loss": 0.5519, "step": 18105 }, { "epoch": 0.4019933186091164, "grad_norm": 0.9999378323554993, "learning_rate": 1.3030419152922165e-05, "loss": 0.316, "step": 18110 }, { "epoch": 0.4021043051686441, "grad_norm": 1.383109211921692, "learning_rate": 1.3027096142738404e-05, "loss": 0.4103, "step": 18115 }, { "epoch": 0.4022152917281717, "grad_norm": 1.4409019947052002, "learning_rate": 1.3023772764530446e-05, "loss": 0.4338, "step": 18120 }, { "epoch": 0.40232627828769935, "grad_norm": 0.8448358774185181, "learning_rate": 1.3020449018702337e-05, "loss": 0.468, "step": 18125 }, { "epoch": 0.402437264847227, "grad_norm": 1.57224702835083, "learning_rate": 1.3017124905658164e-05, "loss": 0.5316, "step": 18130 }, { "epoch": 0.40254825140675465, "grad_norm": 1.8472278118133545, "learning_rate": 1.3013800425802066e-05, "loss": 0.4976, "step": 18135 }, { "epoch": 0.4026592379662823, "grad_norm": 1.0904327630996704, "learning_rate": 1.301047557953822e-05, "loss": 0.5188, "step": 18140 }, { "epoch": 0.4027702245258099, "grad_norm": 1.1760843992233276, "learning_rate": 1.300715036727085e-05, "loss": 0.6897, "step": 18145 }, { "epoch": 0.4028812110853376, "grad_norm": 1.4732872247695923, "learning_rate": 1.300382478940422e-05, "loss": 0.2838, "step": 18150 }, { "epoch": 0.4029921976448652, "grad_norm": 1.4299676418304443, "learning_rate": 1.3000498846342646e-05, "loss": 0.5486, "step": 18155 }, { "epoch": 0.40310318420439284, "grad_norm": 1.4404714107513428, "learning_rate": 1.2997172538490486e-05, "loss": 0.48, "step": 18160 }, { "epoch": 0.40321417076392047, "grad_norm": 1.078039526939392, "learning_rate": 1.2993845866252142e-05, "loss": 0.3799, "step": 18165 }, { "epoch": 0.40332515732344815, "grad_norm": 1.5176726579666138, "learning_rate": 1.2990518830032057e-05, "loss": 0.3537, "step": 18170 }, { "epoch": 0.4034361438829758, "grad_norm": 0.7993068695068359, "learning_rate": 1.2987191430234724e-05, "loss": 0.3949, "step": 18175 }, { "epoch": 0.4035471304425034, "grad_norm": 0.994655430316925, "learning_rate": 1.2983863667264673e-05, "loss": 0.4176, "step": 18180 }, { "epoch": 0.4036581170020311, "grad_norm": 1.150771975517273, "learning_rate": 1.2980535541526487e-05, "loss": 0.5571, "step": 18185 }, { "epoch": 0.4037691035615587, "grad_norm": 1.3434624671936035, "learning_rate": 1.2977207053424781e-05, "loss": 0.7217, "step": 18190 }, { "epoch": 0.40388009012108633, "grad_norm": 0.6132928133010864, "learning_rate": 1.297387820336423e-05, "loss": 0.4322, "step": 18195 }, { "epoch": 0.40399107668061396, "grad_norm": 1.3462475538253784, "learning_rate": 1.2970548991749538e-05, "loss": 0.4018, "step": 18200 }, { "epoch": 0.40410206324014164, "grad_norm": 1.393395185470581, "learning_rate": 1.2967219418985466e-05, "loss": 0.4862, "step": 18205 }, { "epoch": 0.40421304979966927, "grad_norm": 1.1231212615966797, "learning_rate": 1.2963889485476806e-05, "loss": 0.3355, "step": 18210 }, { "epoch": 0.4043240363591969, "grad_norm": 1.0703603029251099, "learning_rate": 1.2960559191628403e-05, "loss": 0.5421, "step": 18215 }, { "epoch": 0.4044350229187245, "grad_norm": 1.6870838403701782, "learning_rate": 1.2957228537845139e-05, "loss": 0.5258, "step": 18220 }, { "epoch": 0.4045460094782522, "grad_norm": 1.1466280221939087, "learning_rate": 1.295389752453195e-05, "loss": 0.4478, "step": 18225 }, { "epoch": 0.4046569960377798, "grad_norm": 1.7697092294692993, "learning_rate": 1.2950566152093808e-05, "loss": 0.4538, "step": 18230 }, { "epoch": 0.40476798259730745, "grad_norm": 1.9452863931655884, "learning_rate": 1.2947234420935729e-05, "loss": 0.4496, "step": 18235 }, { "epoch": 0.40487896915683513, "grad_norm": 1.3969025611877441, "learning_rate": 1.2943902331462771e-05, "loss": 0.5327, "step": 18240 }, { "epoch": 0.40498995571636276, "grad_norm": 1.274922251701355, "learning_rate": 1.2940569884080044e-05, "loss": 0.359, "step": 18245 }, { "epoch": 0.4051009422758904, "grad_norm": 1.0482635498046875, "learning_rate": 1.2937237079192688e-05, "loss": 0.462, "step": 18250 }, { "epoch": 0.405211928835418, "grad_norm": 0.9442659020423889, "learning_rate": 1.2933903917205902e-05, "loss": 0.3367, "step": 18255 }, { "epoch": 0.4053229153949457, "grad_norm": 0.9624006748199463, "learning_rate": 1.2930570398524915e-05, "loss": 0.3738, "step": 18260 }, { "epoch": 0.4054339019544733, "grad_norm": 1.4392725229263306, "learning_rate": 1.2927236523555009e-05, "loss": 0.346, "step": 18265 }, { "epoch": 0.40554488851400095, "grad_norm": 1.205470323562622, "learning_rate": 1.2923902292701502e-05, "loss": 0.354, "step": 18270 }, { "epoch": 0.40565587507352857, "grad_norm": 0.8593617677688599, "learning_rate": 1.292056770636976e-05, "loss": 0.5462, "step": 18275 }, { "epoch": 0.40576686163305625, "grad_norm": 1.7613037824630737, "learning_rate": 1.2917232764965194e-05, "loss": 0.5203, "step": 18280 }, { "epoch": 0.4058778481925839, "grad_norm": 1.4268548488616943, "learning_rate": 1.2913897468893249e-05, "loss": 0.565, "step": 18285 }, { "epoch": 0.4059888347521115, "grad_norm": 1.5129190683364868, "learning_rate": 1.2910561818559421e-05, "loss": 0.5035, "step": 18290 }, { "epoch": 0.4060998213116392, "grad_norm": 1.219570517539978, "learning_rate": 1.2907225814369254e-05, "loss": 0.418, "step": 18295 }, { "epoch": 0.4062108078711668, "grad_norm": 1.4354437589645386, "learning_rate": 1.2903889456728315e-05, "loss": 0.3389, "step": 18300 }, { "epoch": 0.40632179443069444, "grad_norm": 1.1195069551467896, "learning_rate": 1.290055274604224e-05, "loss": 0.5354, "step": 18305 }, { "epoch": 0.40643278099022206, "grad_norm": 0.6017734408378601, "learning_rate": 1.289721568271669e-05, "loss": 0.5803, "step": 18310 }, { "epoch": 0.40654376754974975, "grad_norm": 1.248761773109436, "learning_rate": 1.2893878267157373e-05, "loss": 0.5133, "step": 18315 }, { "epoch": 0.40665475410927737, "grad_norm": 1.238337755203247, "learning_rate": 1.2890540499770041e-05, "loss": 0.6132, "step": 18320 }, { "epoch": 0.406765740668805, "grad_norm": 1.2553455829620361, "learning_rate": 1.2887202380960491e-05, "loss": 0.3653, "step": 18325 }, { "epoch": 0.4068767272283326, "grad_norm": 1.3628250360488892, "learning_rate": 1.2883863911134553e-05, "loss": 0.4313, "step": 18330 }, { "epoch": 0.4069877137878603, "grad_norm": 1.0972529649734497, "learning_rate": 1.2880525090698118e-05, "loss": 0.3519, "step": 18335 }, { "epoch": 0.40709870034738793, "grad_norm": 1.4026445150375366, "learning_rate": 1.2877185920057101e-05, "loss": 0.4353, "step": 18340 }, { "epoch": 0.40720968690691556, "grad_norm": 1.2224009037017822, "learning_rate": 1.2873846399617469e-05, "loss": 0.3717, "step": 18345 }, { "epoch": 0.40732067346644324, "grad_norm": 1.2186386585235596, "learning_rate": 1.2870506529785232e-05, "loss": 0.4258, "step": 18350 }, { "epoch": 0.40743166002597087, "grad_norm": 1.1470417976379395, "learning_rate": 1.2867166310966437e-05, "loss": 0.5244, "step": 18355 }, { "epoch": 0.4075426465854985, "grad_norm": 1.1212327480316162, "learning_rate": 1.2863825743567174e-05, "loss": 0.3101, "step": 18360 }, { "epoch": 0.4076536331450261, "grad_norm": 1.2823984622955322, "learning_rate": 1.2860484827993584e-05, "loss": 0.4485, "step": 18365 }, { "epoch": 0.4077646197045538, "grad_norm": 0.9786013960838318, "learning_rate": 1.2857143564651845e-05, "loss": 0.3765, "step": 18370 }, { "epoch": 0.4078756062640814, "grad_norm": 0.9708194732666016, "learning_rate": 1.2853801953948171e-05, "loss": 0.3235, "step": 18375 }, { "epoch": 0.40798659282360905, "grad_norm": 1.119699478149414, "learning_rate": 1.2850459996288826e-05, "loss": 0.5239, "step": 18380 }, { "epoch": 0.4080975793831367, "grad_norm": 1.5546756982803345, "learning_rate": 1.2847117692080115e-05, "loss": 0.549, "step": 18385 }, { "epoch": 0.40820856594266436, "grad_norm": 1.2498154640197754, "learning_rate": 1.2843775041728384e-05, "loss": 0.4397, "step": 18390 }, { "epoch": 0.408319552502192, "grad_norm": 1.4244163036346436, "learning_rate": 1.2840432045640018e-05, "loss": 0.3831, "step": 18395 }, { "epoch": 0.4084305390617196, "grad_norm": 1.7624845504760742, "learning_rate": 1.2837088704221454e-05, "loss": 0.3975, "step": 18400 }, { "epoch": 0.4085415256212473, "grad_norm": 1.2871631383895874, "learning_rate": 1.2833745017879158e-05, "loss": 0.3667, "step": 18405 }, { "epoch": 0.4086525121807749, "grad_norm": 1.5793527364730835, "learning_rate": 1.2830400987019646e-05, "loss": 0.5073, "step": 18410 }, { "epoch": 0.40876349874030254, "grad_norm": 1.3336777687072754, "learning_rate": 1.2827056612049473e-05, "loss": 0.3303, "step": 18415 }, { "epoch": 0.40887448529983017, "grad_norm": 1.7279059886932373, "learning_rate": 1.2823711893375242e-05, "loss": 0.4877, "step": 18420 }, { "epoch": 0.40898547185935785, "grad_norm": 1.0418862104415894, "learning_rate": 1.2820366831403587e-05, "loss": 0.5104, "step": 18425 }, { "epoch": 0.4090964584188855, "grad_norm": 1.9648549556732178, "learning_rate": 1.2817021426541191e-05, "loss": 0.3704, "step": 18430 }, { "epoch": 0.4092074449784131, "grad_norm": 1.1906790733337402, "learning_rate": 1.2813675679194779e-05, "loss": 0.323, "step": 18435 }, { "epoch": 0.40931843153794073, "grad_norm": 1.1181881427764893, "learning_rate": 1.2810329589771114e-05, "loss": 0.5711, "step": 18440 }, { "epoch": 0.4094294180974684, "grad_norm": 1.0041084289550781, "learning_rate": 1.2806983158677e-05, "loss": 0.319, "step": 18445 }, { "epoch": 0.40954040465699604, "grad_norm": 1.1120296716690063, "learning_rate": 1.2803636386319288e-05, "loss": 0.4846, "step": 18450 }, { "epoch": 0.40965139121652366, "grad_norm": 0.7871121168136597, "learning_rate": 1.2800289273104869e-05, "loss": 0.3841, "step": 18455 }, { "epoch": 0.40976237777605135, "grad_norm": 1.2689319849014282, "learning_rate": 1.2796941819440671e-05, "loss": 0.2925, "step": 18460 }, { "epoch": 0.40987336433557897, "grad_norm": 1.0140430927276611, "learning_rate": 1.2793594025733668e-05, "loss": 0.3438, "step": 18465 }, { "epoch": 0.4099843508951066, "grad_norm": 0.9305757880210876, "learning_rate": 1.2790245892390868e-05, "loss": 0.2941, "step": 18470 }, { "epoch": 0.4100953374546342, "grad_norm": 1.158919334411621, "learning_rate": 1.2786897419819335e-05, "loss": 0.4539, "step": 18475 }, { "epoch": 0.4102063240141619, "grad_norm": 0.8390790224075317, "learning_rate": 1.278354860842616e-05, "loss": 0.3339, "step": 18480 }, { "epoch": 0.41031731057368953, "grad_norm": 1.2888376712799072, "learning_rate": 1.2780199458618478e-05, "loss": 0.2988, "step": 18485 }, { "epoch": 0.41042829713321716, "grad_norm": 1.410506248474121, "learning_rate": 1.2776849970803472e-05, "loss": 0.4381, "step": 18490 }, { "epoch": 0.4105392836927448, "grad_norm": 0.8905935883522034, "learning_rate": 1.2773500145388358e-05, "loss": 0.4926, "step": 18495 }, { "epoch": 0.41065027025227246, "grad_norm": 1.7534258365631104, "learning_rate": 1.2770149982780398e-05, "loss": 0.4428, "step": 18500 }, { "epoch": 0.4107612568118001, "grad_norm": 1.1103402376174927, "learning_rate": 1.2766799483386894e-05, "loss": 0.3413, "step": 18505 }, { "epoch": 0.4108722433713277, "grad_norm": 1.3265504837036133, "learning_rate": 1.276344864761519e-05, "loss": 0.3296, "step": 18510 }, { "epoch": 0.4109832299308554, "grad_norm": 1.7852246761322021, "learning_rate": 1.2760097475872661e-05, "loss": 0.4809, "step": 18515 }, { "epoch": 0.411094216490383, "grad_norm": 1.636186957359314, "learning_rate": 1.2756745968566743e-05, "loss": 0.3488, "step": 18520 }, { "epoch": 0.41120520304991065, "grad_norm": 0.9312105774879456, "learning_rate": 1.2753394126104894e-05, "loss": 0.5638, "step": 18525 }, { "epoch": 0.4113161896094383, "grad_norm": 1.4691455364227295, "learning_rate": 1.2750041948894621e-05, "loss": 0.4399, "step": 18530 }, { "epoch": 0.41142717616896596, "grad_norm": 1.0564097166061401, "learning_rate": 1.274668943734347e-05, "loss": 0.4582, "step": 18535 }, { "epoch": 0.4115381627284936, "grad_norm": 1.2533094882965088, "learning_rate": 1.274333659185903e-05, "loss": 0.349, "step": 18540 }, { "epoch": 0.4116491492880212, "grad_norm": 1.1874221563339233, "learning_rate": 1.2739983412848926e-05, "loss": 0.4179, "step": 18545 }, { "epoch": 0.41176013584754884, "grad_norm": 1.1301754713058472, "learning_rate": 1.2736629900720832e-05, "loss": 0.3151, "step": 18550 }, { "epoch": 0.4118711224070765, "grad_norm": 1.4599363803863525, "learning_rate": 1.2733276055882446e-05, "loss": 0.4141, "step": 18555 }, { "epoch": 0.41198210896660414, "grad_norm": 1.0648486614227295, "learning_rate": 1.272992187874153e-05, "loss": 0.5459, "step": 18560 }, { "epoch": 0.41209309552613177, "grad_norm": 1.3196961879730225, "learning_rate": 1.2726567369705864e-05, "loss": 0.4486, "step": 18565 }, { "epoch": 0.41220408208565945, "grad_norm": 1.1008305549621582, "learning_rate": 1.2723212529183285e-05, "loss": 0.4476, "step": 18570 }, { "epoch": 0.4123150686451871, "grad_norm": 1.0683090686798096, "learning_rate": 1.2719857357581656e-05, "loss": 0.4345, "step": 18575 }, { "epoch": 0.4124260552047147, "grad_norm": 0.8868213295936584, "learning_rate": 1.2716501855308892e-05, "loss": 0.3925, "step": 18580 }, { "epoch": 0.41253704176424233, "grad_norm": 1.6421164274215698, "learning_rate": 1.2713146022772943e-05, "loss": 0.3936, "step": 18585 }, { "epoch": 0.41264802832377, "grad_norm": 1.1328929662704468, "learning_rate": 1.27097898603818e-05, "loss": 0.4957, "step": 18590 }, { "epoch": 0.41275901488329764, "grad_norm": 0.95980304479599, "learning_rate": 1.2706433368543494e-05, "loss": 0.3633, "step": 18595 }, { "epoch": 0.41287000144282526, "grad_norm": 0.9733328819274902, "learning_rate": 1.2703076547666096e-05, "loss": 0.3381, "step": 18600 }, { "epoch": 0.4129809880023529, "grad_norm": 1.4620132446289062, "learning_rate": 1.2699719398157715e-05, "loss": 0.5159, "step": 18605 }, { "epoch": 0.41309197456188057, "grad_norm": 1.0303751230239868, "learning_rate": 1.2696361920426505e-05, "loss": 0.4639, "step": 18610 }, { "epoch": 0.4132029611214082, "grad_norm": 1.0312970876693726, "learning_rate": 1.2693004114880654e-05, "loss": 0.3479, "step": 18615 }, { "epoch": 0.4133139476809358, "grad_norm": 1.142751693725586, "learning_rate": 1.2689645981928395e-05, "loss": 0.3001, "step": 18620 }, { "epoch": 0.4134249342404635, "grad_norm": 0.9720245003700256, "learning_rate": 1.2686287521978e-05, "loss": 0.4678, "step": 18625 }, { "epoch": 0.41353592079999113, "grad_norm": 0.8710252642631531, "learning_rate": 1.2682928735437776e-05, "loss": 0.4558, "step": 18630 }, { "epoch": 0.41364690735951876, "grad_norm": 1.5070589780807495, "learning_rate": 1.2679569622716075e-05, "loss": 0.4395, "step": 18635 }, { "epoch": 0.4137578939190464, "grad_norm": 1.492600917816162, "learning_rate": 1.2676210184221285e-05, "loss": 0.5579, "step": 18640 }, { "epoch": 0.41386888047857406, "grad_norm": 0.7719084024429321, "learning_rate": 1.2672850420361837e-05, "loss": 0.4032, "step": 18645 }, { "epoch": 0.4139798670381017, "grad_norm": 1.246576189994812, "learning_rate": 1.2669490331546198e-05, "loss": 0.6183, "step": 18650 }, { "epoch": 0.4140908535976293, "grad_norm": 1.1218833923339844, "learning_rate": 1.2666129918182876e-05, "loss": 0.4348, "step": 18655 }, { "epoch": 0.41420184015715694, "grad_norm": 0.9847343564033508, "learning_rate": 1.2662769180680424e-05, "loss": 0.4673, "step": 18660 }, { "epoch": 0.4143128267166846, "grad_norm": 1.0954967737197876, "learning_rate": 1.2659408119447422e-05, "loss": 0.4823, "step": 18665 }, { "epoch": 0.41442381327621225, "grad_norm": 1.2822248935699463, "learning_rate": 1.2656046734892498e-05, "loss": 0.5339, "step": 18670 }, { "epoch": 0.4145347998357399, "grad_norm": 1.1070445775985718, "learning_rate": 1.2652685027424324e-05, "loss": 0.4468, "step": 18675 }, { "epoch": 0.41464578639526756, "grad_norm": 1.1634217500686646, "learning_rate": 1.2649322997451599e-05, "loss": 0.5181, "step": 18680 }, { "epoch": 0.4147567729547952, "grad_norm": 0.889788031578064, "learning_rate": 1.2645960645383069e-05, "loss": 0.3611, "step": 18685 }, { "epoch": 0.4148677595143228, "grad_norm": 1.3153811693191528, "learning_rate": 1.2642597971627518e-05, "loss": 0.4127, "step": 18690 }, { "epoch": 0.41497874607385044, "grad_norm": 0.9526509046554565, "learning_rate": 1.2639234976593766e-05, "loss": 0.474, "step": 18695 }, { "epoch": 0.4150897326333781, "grad_norm": 1.2924917936325073, "learning_rate": 1.2635871660690677e-05, "loss": 0.4069, "step": 18700 }, { "epoch": 0.41520071919290574, "grad_norm": 1.0292452573776245, "learning_rate": 1.2632508024327152e-05, "loss": 0.3366, "step": 18705 }, { "epoch": 0.41531170575243337, "grad_norm": 1.0939360857009888, "learning_rate": 1.2629144067912133e-05, "loss": 0.4079, "step": 18710 }, { "epoch": 0.415422692311961, "grad_norm": 1.4312463998794556, "learning_rate": 1.2625779791854593e-05, "loss": 0.4504, "step": 18715 }, { "epoch": 0.4155336788714887, "grad_norm": 1.6537981033325195, "learning_rate": 1.2622415196563554e-05, "loss": 0.4369, "step": 18720 }, { "epoch": 0.4156446654310163, "grad_norm": 1.350915789604187, "learning_rate": 1.2619050282448067e-05, "loss": 0.532, "step": 18725 }, { "epoch": 0.41575565199054393, "grad_norm": 1.0188899040222168, "learning_rate": 1.2615685049917233e-05, "loss": 0.4303, "step": 18730 }, { "epoch": 0.4158666385500716, "grad_norm": 1.0881189107894897, "learning_rate": 1.2612319499380183e-05, "loss": 0.2445, "step": 18735 }, { "epoch": 0.41597762510959924, "grad_norm": 1.1773778200149536, "learning_rate": 1.260895363124609e-05, "loss": 0.6159, "step": 18740 }, { "epoch": 0.41608861166912686, "grad_norm": 1.1648191213607788, "learning_rate": 1.2605587445924164e-05, "loss": 0.4181, "step": 18745 }, { "epoch": 0.4161995982286545, "grad_norm": 1.302767276763916, "learning_rate": 1.2602220943823654e-05, "loss": 0.5449, "step": 18750 }, { "epoch": 0.41631058478818217, "grad_norm": 0.9167510271072388, "learning_rate": 1.2598854125353847e-05, "loss": 0.3292, "step": 18755 }, { "epoch": 0.4164215713477098, "grad_norm": 1.0387072563171387, "learning_rate": 1.2595486990924075e-05, "loss": 0.3367, "step": 18760 }, { "epoch": 0.4165325579072374, "grad_norm": 1.5445338487625122, "learning_rate": 1.2592119540943697e-05, "loss": 0.4267, "step": 18765 }, { "epoch": 0.41664354446676505, "grad_norm": 1.1272823810577393, "learning_rate": 1.258875177582212e-05, "loss": 0.4504, "step": 18770 }, { "epoch": 0.41675453102629273, "grad_norm": 0.8140206933021545, "learning_rate": 1.2585383695968782e-05, "loss": 0.4006, "step": 18775 }, { "epoch": 0.41686551758582036, "grad_norm": 1.2129508256912231, "learning_rate": 1.2582015301793167e-05, "loss": 0.5976, "step": 18780 }, { "epoch": 0.416976504145348, "grad_norm": 1.2662301063537598, "learning_rate": 1.2578646593704786e-05, "loss": 0.4862, "step": 18785 }, { "epoch": 0.41708749070487566, "grad_norm": 1.3142824172973633, "learning_rate": 1.2575277572113205e-05, "loss": 0.5799, "step": 18790 }, { "epoch": 0.4171984772644033, "grad_norm": 1.0552418231964111, "learning_rate": 1.2571908237428012e-05, "loss": 0.3674, "step": 18795 }, { "epoch": 0.4173094638239309, "grad_norm": 0.9380001425743103, "learning_rate": 1.2568538590058844e-05, "loss": 0.3359, "step": 18800 }, { "epoch": 0.41742045038345854, "grad_norm": 1.519612193107605, "learning_rate": 1.2565168630415366e-05, "loss": 0.4007, "step": 18805 }, { "epoch": 0.4175314369429862, "grad_norm": 1.1087913513183594, "learning_rate": 1.2561798358907287e-05, "loss": 0.3141, "step": 18810 }, { "epoch": 0.41764242350251385, "grad_norm": 1.1452858448028564, "learning_rate": 1.2558427775944357e-05, "loss": 0.5899, "step": 18815 }, { "epoch": 0.4177534100620415, "grad_norm": 1.290701150894165, "learning_rate": 1.2555056881936359e-05, "loss": 0.3658, "step": 18820 }, { "epoch": 0.4178643966215691, "grad_norm": 1.1495695114135742, "learning_rate": 1.2551685677293112e-05, "loss": 0.392, "step": 18825 }, { "epoch": 0.4179753831810968, "grad_norm": 1.0973929166793823, "learning_rate": 1.2548314162424481e-05, "loss": 0.4206, "step": 18830 }, { "epoch": 0.4180863697406244, "grad_norm": 1.074574589729309, "learning_rate": 1.254494233774036e-05, "loss": 0.4358, "step": 18835 }, { "epoch": 0.41819735630015203, "grad_norm": 1.424739956855774, "learning_rate": 1.2541570203650681e-05, "loss": 0.473, "step": 18840 }, { "epoch": 0.4183083428596797, "grad_norm": 1.4399970769882202, "learning_rate": 1.2538197760565425e-05, "loss": 0.4795, "step": 18845 }, { "epoch": 0.41841932941920734, "grad_norm": 1.6367219686508179, "learning_rate": 1.2534825008894595e-05, "loss": 0.4966, "step": 18850 }, { "epoch": 0.41853031597873497, "grad_norm": 1.3322153091430664, "learning_rate": 1.2531451949048243e-05, "loss": 0.6716, "step": 18855 }, { "epoch": 0.4186413025382626, "grad_norm": 1.3036226034164429, "learning_rate": 1.2528078581436454e-05, "loss": 0.5527, "step": 18860 }, { "epoch": 0.4187522890977903, "grad_norm": 0.9684581756591797, "learning_rate": 1.2524704906469347e-05, "loss": 0.38, "step": 18865 }, { "epoch": 0.4188632756573179, "grad_norm": 0.9556499719619751, "learning_rate": 1.2521330924557087e-05, "loss": 0.366, "step": 18870 }, { "epoch": 0.4189742622168455, "grad_norm": 0.9147199392318726, "learning_rate": 1.2517956636109867e-05, "loss": 0.4608, "step": 18875 }, { "epoch": 0.41908524877637315, "grad_norm": 1.5343172550201416, "learning_rate": 1.2514582041537926e-05, "loss": 0.417, "step": 18880 }, { "epoch": 0.41919623533590084, "grad_norm": 1.2957946062088013, "learning_rate": 1.2511207141251532e-05, "loss": 0.6602, "step": 18885 }, { "epoch": 0.41930722189542846, "grad_norm": 1.3479381799697876, "learning_rate": 1.2507831935660995e-05, "loss": 0.3779, "step": 18890 }, { "epoch": 0.4194182084549561, "grad_norm": 1.4515689611434937, "learning_rate": 1.2504456425176662e-05, "loss": 0.5103, "step": 18895 }, { "epoch": 0.41952919501448377, "grad_norm": 1.2327100038528442, "learning_rate": 1.2501080610208915e-05, "loss": 0.5223, "step": 18900 }, { "epoch": 0.4196401815740114, "grad_norm": 1.5770394802093506, "learning_rate": 1.2497704491168178e-05, "loss": 0.4282, "step": 18905 }, { "epoch": 0.419751168133539, "grad_norm": 1.4935261011123657, "learning_rate": 1.2494328068464907e-05, "loss": 0.4712, "step": 18910 }, { "epoch": 0.41986215469306665, "grad_norm": 1.3158011436462402, "learning_rate": 1.2490951342509592e-05, "loss": 0.3454, "step": 18915 }, { "epoch": 0.41997314125259433, "grad_norm": 0.869740903377533, "learning_rate": 1.2487574313712766e-05, "loss": 0.4122, "step": 18920 }, { "epoch": 0.42008412781212195, "grad_norm": 0.9241218566894531, "learning_rate": 1.2484196982484997e-05, "loss": 0.3949, "step": 18925 }, { "epoch": 0.4201951143716496, "grad_norm": 1.3600034713745117, "learning_rate": 1.2480819349236895e-05, "loss": 0.5165, "step": 18930 }, { "epoch": 0.4203061009311772, "grad_norm": 1.205753207206726, "learning_rate": 1.2477441414379093e-05, "loss": 0.5086, "step": 18935 }, { "epoch": 0.4204170874907049, "grad_norm": 1.6294773817062378, "learning_rate": 1.2474063178322274e-05, "loss": 0.3416, "step": 18940 }, { "epoch": 0.4205280740502325, "grad_norm": 1.817699909210205, "learning_rate": 1.247068464147715e-05, "loss": 0.3767, "step": 18945 }, { "epoch": 0.42063906060976014, "grad_norm": 1.5021382570266724, "learning_rate": 1.2467305804254472e-05, "loss": 0.3796, "step": 18950 }, { "epoch": 0.4207500471692878, "grad_norm": 1.0630805492401123, "learning_rate": 1.2463926667065031e-05, "loss": 0.5853, "step": 18955 }, { "epoch": 0.42086103372881545, "grad_norm": 0.8353287577629089, "learning_rate": 1.246054723031965e-05, "loss": 0.2753, "step": 18960 }, { "epoch": 0.4209720202883431, "grad_norm": 0.8969990611076355, "learning_rate": 1.2457167494429187e-05, "loss": 0.4887, "step": 18965 }, { "epoch": 0.4210830068478707, "grad_norm": 1.1911489963531494, "learning_rate": 1.2453787459804543e-05, "loss": 0.4958, "step": 18970 }, { "epoch": 0.4211939934073984, "grad_norm": 0.843773365020752, "learning_rate": 1.2450407126856648e-05, "loss": 0.4585, "step": 18975 }, { "epoch": 0.421304979966926, "grad_norm": 0.9726853370666504, "learning_rate": 1.2447026495996469e-05, "loss": 0.2542, "step": 18980 }, { "epoch": 0.42141596652645363, "grad_norm": 0.9821330904960632, "learning_rate": 1.2443645567635018e-05, "loss": 0.3909, "step": 18985 }, { "epoch": 0.4215269530859813, "grad_norm": 1.09699285030365, "learning_rate": 1.2440264342183335e-05, "loss": 0.351, "step": 18990 }, { "epoch": 0.42163793964550894, "grad_norm": 1.2256817817687988, "learning_rate": 1.2436882820052498e-05, "loss": 0.4693, "step": 18995 }, { "epoch": 0.42174892620503657, "grad_norm": 1.5253905057907104, "learning_rate": 1.2433501001653618e-05, "loss": 0.451, "step": 19000 }, { "epoch": 0.4218599127645642, "grad_norm": 1.5205445289611816, "learning_rate": 1.243011888739785e-05, "loss": 0.3386, "step": 19005 }, { "epoch": 0.4219708993240919, "grad_norm": 1.7538121938705444, "learning_rate": 1.2426736477696378e-05, "loss": 0.5199, "step": 19010 }, { "epoch": 0.4220818858836195, "grad_norm": 0.802723228931427, "learning_rate": 1.2423353772960421e-05, "loss": 0.4092, "step": 19015 }, { "epoch": 0.4221928724431471, "grad_norm": 1.3640711307525635, "learning_rate": 1.2419970773601241e-05, "loss": 0.4255, "step": 19020 }, { "epoch": 0.42230385900267475, "grad_norm": 0.8367384076118469, "learning_rate": 1.241658748003013e-05, "loss": 0.3952, "step": 19025 }, { "epoch": 0.42241484556220243, "grad_norm": 1.389653205871582, "learning_rate": 1.241320389265842e-05, "loss": 0.3961, "step": 19030 }, { "epoch": 0.42252583212173006, "grad_norm": 1.5635536909103394, "learning_rate": 1.240982001189747e-05, "loss": 0.4853, "step": 19035 }, { "epoch": 0.4226368186812577, "grad_norm": 1.2197387218475342, "learning_rate": 1.2406435838158686e-05, "loss": 0.478, "step": 19040 }, { "epoch": 0.42274780524078537, "grad_norm": 0.9387150406837463, "learning_rate": 1.2403051371853502e-05, "loss": 0.4831, "step": 19045 }, { "epoch": 0.422858791800313, "grad_norm": 1.2293328046798706, "learning_rate": 1.2399666613393396e-05, "loss": 0.4918, "step": 19050 }, { "epoch": 0.4229697783598406, "grad_norm": 1.0011482238769531, "learning_rate": 1.2396281563189867e-05, "loss": 0.5591, "step": 19055 }, { "epoch": 0.42308076491936825, "grad_norm": 1.3648799657821655, "learning_rate": 1.2392896221654465e-05, "loss": 0.5343, "step": 19060 }, { "epoch": 0.42319175147889593, "grad_norm": 1.37348210811615, "learning_rate": 1.238951058919876e-05, "loss": 0.6581, "step": 19065 }, { "epoch": 0.42330273803842355, "grad_norm": 1.1474547386169434, "learning_rate": 1.2386124666234377e-05, "loss": 0.6939, "step": 19070 }, { "epoch": 0.4234137245979512, "grad_norm": 1.3844075202941895, "learning_rate": 1.2382738453172957e-05, "loss": 0.4332, "step": 19075 }, { "epoch": 0.4235247111574788, "grad_norm": 1.0813393592834473, "learning_rate": 1.2379351950426188e-05, "loss": 0.4339, "step": 19080 }, { "epoch": 0.4236356977170065, "grad_norm": 0.8828607797622681, "learning_rate": 1.2375965158405789e-05, "loss": 0.3954, "step": 19085 }, { "epoch": 0.4237466842765341, "grad_norm": 1.3992273807525635, "learning_rate": 1.2372578077523514e-05, "loss": 0.4668, "step": 19090 }, { "epoch": 0.42385767083606174, "grad_norm": 1.2180794477462769, "learning_rate": 1.2369190708191151e-05, "loss": 0.5227, "step": 19095 }, { "epoch": 0.4239686573955894, "grad_norm": 1.0402294397354126, "learning_rate": 1.2365803050820531e-05, "loss": 0.4719, "step": 19100 }, { "epoch": 0.42407964395511705, "grad_norm": 1.536978006362915, "learning_rate": 1.2362415105823509e-05, "loss": 0.5169, "step": 19105 }, { "epoch": 0.4241906305146447, "grad_norm": 1.119295358657837, "learning_rate": 1.2359026873611981e-05, "loss": 0.5108, "step": 19110 }, { "epoch": 0.4243016170741723, "grad_norm": 0.9652313590049744, "learning_rate": 1.2355638354597878e-05, "loss": 0.4447, "step": 19115 }, { "epoch": 0.4244126036337, "grad_norm": 1.04342520236969, "learning_rate": 1.2352249549193165e-05, "loss": 0.4986, "step": 19120 }, { "epoch": 0.4245235901932276, "grad_norm": 1.2141095399856567, "learning_rate": 1.234886045780984e-05, "loss": 0.5623, "step": 19125 }, { "epoch": 0.42463457675275523, "grad_norm": 1.7910752296447754, "learning_rate": 1.2345471080859937e-05, "loss": 0.469, "step": 19130 }, { "epoch": 0.42474556331228286, "grad_norm": 1.4534924030303955, "learning_rate": 1.2342081418755525e-05, "loss": 0.4874, "step": 19135 }, { "epoch": 0.42485654987181054, "grad_norm": 0.9207220673561096, "learning_rate": 1.233869147190871e-05, "loss": 0.3644, "step": 19140 }, { "epoch": 0.42496753643133817, "grad_norm": 0.8127537369728088, "learning_rate": 1.233530124073163e-05, "loss": 0.4096, "step": 19145 }, { "epoch": 0.4250785229908658, "grad_norm": 1.811837911605835, "learning_rate": 1.2331910725636455e-05, "loss": 0.5663, "step": 19150 }, { "epoch": 0.4251895095503935, "grad_norm": 1.11298668384552, "learning_rate": 1.2328519927035396e-05, "loss": 0.3821, "step": 19155 }, { "epoch": 0.4253004961099211, "grad_norm": 1.6024359464645386, "learning_rate": 1.2325128845340696e-05, "loss": 0.5324, "step": 19160 }, { "epoch": 0.4254114826694487, "grad_norm": 1.2056422233581543, "learning_rate": 1.2321737480964625e-05, "loss": 0.3337, "step": 19165 }, { "epoch": 0.42552246922897635, "grad_norm": 0.8898048400878906, "learning_rate": 1.2318345834319501e-05, "loss": 0.4795, "step": 19170 }, { "epoch": 0.42563345578850403, "grad_norm": 1.0445302724838257, "learning_rate": 1.2314953905817662e-05, "loss": 0.4654, "step": 19175 }, { "epoch": 0.42574444234803166, "grad_norm": 1.4014710187911987, "learning_rate": 1.231156169587149e-05, "loss": 0.5228, "step": 19180 }, { "epoch": 0.4258554289075593, "grad_norm": 1.1380759477615356, "learning_rate": 1.2308169204893403e-05, "loss": 0.4072, "step": 19185 }, { "epoch": 0.4259664154670869, "grad_norm": 1.5649347305297852, "learning_rate": 1.2304776433295844e-05, "loss": 0.2593, "step": 19190 }, { "epoch": 0.4260774020266146, "grad_norm": 1.3048160076141357, "learning_rate": 1.2301383381491297e-05, "loss": 0.3555, "step": 19195 }, { "epoch": 0.4261883885861422, "grad_norm": 1.260620355606079, "learning_rate": 1.2297990049892274e-05, "loss": 0.3316, "step": 19200 }, { "epoch": 0.42629937514566985, "grad_norm": 0.98924720287323, "learning_rate": 1.2294596438911328e-05, "loss": 0.6583, "step": 19205 }, { "epoch": 0.4264103617051975, "grad_norm": 1.6030290126800537, "learning_rate": 1.2291202548961042e-05, "loss": 0.4335, "step": 19210 }, { "epoch": 0.42652134826472515, "grad_norm": 1.4659713506698608, "learning_rate": 1.2287808380454038e-05, "loss": 0.3872, "step": 19215 }, { "epoch": 0.4266323348242528, "grad_norm": 1.2156850099563599, "learning_rate": 1.2284413933802961e-05, "loss": 0.3163, "step": 19220 }, { "epoch": 0.4267433213837804, "grad_norm": 1.2371267080307007, "learning_rate": 1.2281019209420502e-05, "loss": 0.4625, "step": 19225 }, { "epoch": 0.4268543079433081, "grad_norm": 1.0696591138839722, "learning_rate": 1.2277624207719373e-05, "loss": 0.3984, "step": 19230 }, { "epoch": 0.4269652945028357, "grad_norm": 1.0581618547439575, "learning_rate": 1.2274228929112336e-05, "loss": 0.4227, "step": 19235 }, { "epoch": 0.42707628106236334, "grad_norm": 0.9675800800323486, "learning_rate": 1.227083337401217e-05, "loss": 0.6256, "step": 19240 }, { "epoch": 0.42718726762189096, "grad_norm": 1.111681342124939, "learning_rate": 1.22674375428317e-05, "loss": 0.4658, "step": 19245 }, { "epoch": 0.42729825418141865, "grad_norm": 1.5244219303131104, "learning_rate": 1.2264041435983776e-05, "loss": 0.4315, "step": 19250 }, { "epoch": 0.4274092407409463, "grad_norm": 0.9702281355857849, "learning_rate": 1.2260645053881288e-05, "loss": 0.4759, "step": 19255 }, { "epoch": 0.4275202273004739, "grad_norm": 0.8706420063972473, "learning_rate": 1.2257248396937156e-05, "loss": 0.457, "step": 19260 }, { "epoch": 0.4276312138600016, "grad_norm": 1.9086284637451172, "learning_rate": 1.2253851465564333e-05, "loss": 0.5152, "step": 19265 }, { "epoch": 0.4277422004195292, "grad_norm": 1.522344946861267, "learning_rate": 1.2250454260175809e-05, "loss": 0.6024, "step": 19270 }, { "epoch": 0.42785318697905683, "grad_norm": 1.0553314685821533, "learning_rate": 1.2247056781184604e-05, "loss": 0.3522, "step": 19275 }, { "epoch": 0.42796417353858446, "grad_norm": 1.4162936210632324, "learning_rate": 1.2243659029003769e-05, "loss": 0.3912, "step": 19280 }, { "epoch": 0.42807516009811214, "grad_norm": 1.8890293836593628, "learning_rate": 1.2240261004046397e-05, "loss": 0.4187, "step": 19285 }, { "epoch": 0.42818614665763977, "grad_norm": 1.1863138675689697, "learning_rate": 1.2236862706725603e-05, "loss": 0.3151, "step": 19290 }, { "epoch": 0.4282971332171674, "grad_norm": 0.9996054172515869, "learning_rate": 1.2233464137454542e-05, "loss": 0.4022, "step": 19295 }, { "epoch": 0.428408119776695, "grad_norm": 2.290318489074707, "learning_rate": 1.2230065296646406e-05, "loss": 0.4952, "step": 19300 }, { "epoch": 0.4285191063362227, "grad_norm": 0.921788215637207, "learning_rate": 1.2226666184714409e-05, "loss": 0.3769, "step": 19305 }, { "epoch": 0.4286300928957503, "grad_norm": 1.0003505945205688, "learning_rate": 1.2223266802071802e-05, "loss": 0.379, "step": 19310 }, { "epoch": 0.42874107945527795, "grad_norm": 1.5152891874313354, "learning_rate": 1.2219867149131876e-05, "loss": 0.5854, "step": 19315 }, { "epoch": 0.42885206601480563, "grad_norm": 1.3230036497116089, "learning_rate": 1.2216467226307944e-05, "loss": 0.4412, "step": 19320 }, { "epoch": 0.42896305257433326, "grad_norm": 1.2009028196334839, "learning_rate": 1.2213067034013363e-05, "loss": 0.3158, "step": 19325 }, { "epoch": 0.4290740391338609, "grad_norm": 1.3071898221969604, "learning_rate": 1.2209666572661515e-05, "loss": 0.4378, "step": 19330 }, { "epoch": 0.4291850256933885, "grad_norm": 0.9393930435180664, "learning_rate": 1.2206265842665814e-05, "loss": 0.4835, "step": 19335 }, { "epoch": 0.4292960122529162, "grad_norm": 1.3003790378570557, "learning_rate": 1.220286484443971e-05, "loss": 0.4946, "step": 19340 }, { "epoch": 0.4294069988124438, "grad_norm": 1.2015130519866943, "learning_rate": 1.2199463578396688e-05, "loss": 0.4431, "step": 19345 }, { "epoch": 0.42951798537197144, "grad_norm": 1.0020374059677124, "learning_rate": 1.2196062044950259e-05, "loss": 0.2633, "step": 19350 }, { "epoch": 0.42962897193149907, "grad_norm": 0.7587225437164307, "learning_rate": 1.2192660244513971e-05, "loss": 0.4256, "step": 19355 }, { "epoch": 0.42973995849102675, "grad_norm": 1.2856824398040771, "learning_rate": 1.2189258177501406e-05, "loss": 0.4935, "step": 19360 }, { "epoch": 0.4298509450505544, "grad_norm": 0.6996091604232788, "learning_rate": 1.2185855844326174e-05, "loss": 0.45, "step": 19365 }, { "epoch": 0.429961931610082, "grad_norm": 1.1408183574676514, "learning_rate": 1.218245324540192e-05, "loss": 0.493, "step": 19370 }, { "epoch": 0.4300729181696097, "grad_norm": 1.2416564226150513, "learning_rate": 1.2179050381142319e-05, "loss": 0.6317, "step": 19375 }, { "epoch": 0.4301839047291373, "grad_norm": 1.2081810235977173, "learning_rate": 1.217564725196108e-05, "loss": 0.3698, "step": 19380 }, { "epoch": 0.43029489128866494, "grad_norm": 1.1831772327423096, "learning_rate": 1.2172243858271944e-05, "loss": 0.5561, "step": 19385 }, { "epoch": 0.43040587784819256, "grad_norm": 1.0908524990081787, "learning_rate": 1.2168840200488686e-05, "loss": 0.5915, "step": 19390 }, { "epoch": 0.43051686440772025, "grad_norm": 1.0368895530700684, "learning_rate": 1.216543627902511e-05, "loss": 0.6316, "step": 19395 }, { "epoch": 0.43062785096724787, "grad_norm": 1.4189674854278564, "learning_rate": 1.2162032094295052e-05, "loss": 0.4226, "step": 19400 }, { "epoch": 0.4307388375267755, "grad_norm": 0.9197772741317749, "learning_rate": 1.2158627646712384e-05, "loss": 0.3057, "step": 19405 }, { "epoch": 0.4308498240863031, "grad_norm": 1.11489737033844, "learning_rate": 1.2155222936691007e-05, "loss": 0.4414, "step": 19410 }, { "epoch": 0.4309608106458308, "grad_norm": 0.9230841398239136, "learning_rate": 1.2151817964644852e-05, "loss": 0.3631, "step": 19415 }, { "epoch": 0.43107179720535843, "grad_norm": 1.5710375308990479, "learning_rate": 1.2148412730987887e-05, "loss": 0.446, "step": 19420 }, { "epoch": 0.43118278376488606, "grad_norm": 0.9980664849281311, "learning_rate": 1.2145007236134108e-05, "loss": 0.1992, "step": 19425 }, { "epoch": 0.43129377032441374, "grad_norm": 1.850558876991272, "learning_rate": 1.214160148049754e-05, "loss": 0.3733, "step": 19430 }, { "epoch": 0.43140475688394136, "grad_norm": 1.4530669450759888, "learning_rate": 1.2138195464492246e-05, "loss": 0.5507, "step": 19435 }, { "epoch": 0.431515743443469, "grad_norm": 1.877859115600586, "learning_rate": 1.2134789188532322e-05, "loss": 0.4828, "step": 19440 }, { "epoch": 0.4316267300029966, "grad_norm": 0.936596691608429, "learning_rate": 1.2131382653031887e-05, "loss": 0.6039, "step": 19445 }, { "epoch": 0.4317377165625243, "grad_norm": 1.5196179151535034, "learning_rate": 1.2127975858405096e-05, "loss": 0.5006, "step": 19450 }, { "epoch": 0.4318487031220519, "grad_norm": 1.0282752513885498, "learning_rate": 1.2124568805066137e-05, "loss": 0.4285, "step": 19455 }, { "epoch": 0.43195968968157955, "grad_norm": 0.9904384016990662, "learning_rate": 1.2121161493429225e-05, "loss": 0.3089, "step": 19460 }, { "epoch": 0.4320706762411072, "grad_norm": 1.1516846418380737, "learning_rate": 1.2117753923908617e-05, "loss": 0.4541, "step": 19465 }, { "epoch": 0.43218166280063486, "grad_norm": 1.0985053777694702, "learning_rate": 1.211434609691859e-05, "loss": 0.5971, "step": 19470 }, { "epoch": 0.4322926493601625, "grad_norm": 1.2207547426223755, "learning_rate": 1.2110938012873453e-05, "loss": 0.545, "step": 19475 }, { "epoch": 0.4324036359196901, "grad_norm": 1.536851406097412, "learning_rate": 1.2107529672187552e-05, "loss": 0.5405, "step": 19480 }, { "epoch": 0.4325146224792178, "grad_norm": 1.8614027500152588, "learning_rate": 1.2104121075275263e-05, "loss": 0.5477, "step": 19485 }, { "epoch": 0.4326256090387454, "grad_norm": 1.1792525053024292, "learning_rate": 1.210071222255099e-05, "loss": 0.5541, "step": 19490 }, { "epoch": 0.43273659559827304, "grad_norm": 1.6490572690963745, "learning_rate": 1.2097303114429169e-05, "loss": 0.4629, "step": 19495 }, { "epoch": 0.43284758215780067, "grad_norm": 1.219548225402832, "learning_rate": 1.209389375132427e-05, "loss": 0.4757, "step": 19500 }, { "epoch": 0.43295856871732835, "grad_norm": 1.2101728916168213, "learning_rate": 1.2090484133650791e-05, "loss": 0.3978, "step": 19505 }, { "epoch": 0.433069555276856, "grad_norm": 0.8404216766357422, "learning_rate": 1.2087074261823261e-05, "loss": 0.5797, "step": 19510 }, { "epoch": 0.4331805418363836, "grad_norm": 1.5160115957260132, "learning_rate": 1.2083664136256243e-05, "loss": 0.4582, "step": 19515 }, { "epoch": 0.43329152839591123, "grad_norm": 2.2496602535247803, "learning_rate": 1.2080253757364327e-05, "loss": 0.352, "step": 19520 }, { "epoch": 0.4334025149554389, "grad_norm": 0.9950528740882874, "learning_rate": 1.2076843125562135e-05, "loss": 0.3788, "step": 19525 }, { "epoch": 0.43351350151496654, "grad_norm": 0.9879639148712158, "learning_rate": 1.2073432241264322e-05, "loss": 0.4802, "step": 19530 }, { "epoch": 0.43362448807449416, "grad_norm": 1.0998682975769043, "learning_rate": 1.2070021104885571e-05, "loss": 0.4783, "step": 19535 }, { "epoch": 0.43373547463402184, "grad_norm": 1.5225579738616943, "learning_rate": 1.2066609716840595e-05, "loss": 0.2778, "step": 19540 }, { "epoch": 0.43384646119354947, "grad_norm": 1.4201195240020752, "learning_rate": 1.206319807754414e-05, "loss": 0.521, "step": 19545 }, { "epoch": 0.4339574477530771, "grad_norm": 1.3159350156784058, "learning_rate": 1.2059786187410984e-05, "loss": 0.4117, "step": 19550 }, { "epoch": 0.4340684343126047, "grad_norm": 1.2883739471435547, "learning_rate": 1.2056374046855932e-05, "loss": 0.3565, "step": 19555 }, { "epoch": 0.4341794208721324, "grad_norm": 1.8150126934051514, "learning_rate": 1.205296165629382e-05, "loss": 0.4719, "step": 19560 }, { "epoch": 0.43429040743166003, "grad_norm": 1.3383867740631104, "learning_rate": 1.2049549016139513e-05, "loss": 0.3684, "step": 19565 }, { "epoch": 0.43440139399118766, "grad_norm": 1.4191855192184448, "learning_rate": 1.2046136126807913e-05, "loss": 0.3516, "step": 19570 }, { "epoch": 0.4345123805507153, "grad_norm": 1.0860313177108765, "learning_rate": 1.204272298871394e-05, "loss": 0.4765, "step": 19575 }, { "epoch": 0.43462336711024296, "grad_norm": 0.9528011679649353, "learning_rate": 1.203930960227256e-05, "loss": 0.6358, "step": 19580 }, { "epoch": 0.4347343536697706, "grad_norm": 1.3005439043045044, "learning_rate": 1.203589596789876e-05, "loss": 0.3419, "step": 19585 }, { "epoch": 0.4348453402292982, "grad_norm": 1.0321930646896362, "learning_rate": 1.203248208600755e-05, "loss": 0.3879, "step": 19590 }, { "epoch": 0.4349563267888259, "grad_norm": 1.3699491024017334, "learning_rate": 1.2029067957013992e-05, "loss": 0.4039, "step": 19595 }, { "epoch": 0.4350673133483535, "grad_norm": 1.516921877861023, "learning_rate": 1.2025653581333149e-05, "loss": 0.4654, "step": 19600 }, { "epoch": 0.43517829990788115, "grad_norm": 1.036165714263916, "learning_rate": 1.2022238959380142e-05, "loss": 0.3697, "step": 19605 }, { "epoch": 0.4352892864674088, "grad_norm": 1.4566125869750977, "learning_rate": 1.2018824091570103e-05, "loss": 0.3274, "step": 19610 }, { "epoch": 0.43540027302693646, "grad_norm": 1.4865602254867554, "learning_rate": 1.2015408978318201e-05, "loss": 0.4777, "step": 19615 }, { "epoch": 0.4355112595864641, "grad_norm": 0.8841688632965088, "learning_rate": 1.2011993620039637e-05, "loss": 0.5436, "step": 19620 }, { "epoch": 0.4356222461459917, "grad_norm": 1.1306862831115723, "learning_rate": 1.2008578017149634e-05, "loss": 0.571, "step": 19625 }, { "epoch": 0.43573323270551934, "grad_norm": 1.3157212734222412, "learning_rate": 1.2005162170063454e-05, "loss": 0.5216, "step": 19630 }, { "epoch": 0.435844219265047, "grad_norm": 0.6145315170288086, "learning_rate": 1.2001746079196381e-05, "loss": 0.4309, "step": 19635 }, { "epoch": 0.43595520582457464, "grad_norm": 1.297216534614563, "learning_rate": 1.1998329744963733e-05, "loss": 0.483, "step": 19640 }, { "epoch": 0.43606619238410227, "grad_norm": 1.5347627401351929, "learning_rate": 1.1994913167780857e-05, "loss": 0.4924, "step": 19645 }, { "epoch": 0.43617717894362995, "grad_norm": 1.5418304204940796, "learning_rate": 1.1991496348063127e-05, "loss": 0.4475, "step": 19650 }, { "epoch": 0.4362881655031576, "grad_norm": 1.326873540878296, "learning_rate": 1.1988079286225954e-05, "loss": 0.4511, "step": 19655 }, { "epoch": 0.4363991520626852, "grad_norm": 1.0645344257354736, "learning_rate": 1.1984661982684763e-05, "loss": 0.4377, "step": 19660 }, { "epoch": 0.43651013862221283, "grad_norm": 1.4390782117843628, "learning_rate": 1.1981244437855027e-05, "loss": 0.3424, "step": 19665 }, { "epoch": 0.4366211251817405, "grad_norm": 1.2534408569335938, "learning_rate": 1.1977826652152235e-05, "loss": 0.4035, "step": 19670 }, { "epoch": 0.43673211174126814, "grad_norm": 0.9360421895980835, "learning_rate": 1.1974408625991916e-05, "loss": 0.347, "step": 19675 }, { "epoch": 0.43684309830079576, "grad_norm": 1.5838125944137573, "learning_rate": 1.1970990359789616e-05, "loss": 0.4984, "step": 19680 }, { "epoch": 0.4369540848603234, "grad_norm": 1.7336652278900146, "learning_rate": 1.1967571853960916e-05, "loss": 0.4712, "step": 19685 }, { "epoch": 0.43706507141985107, "grad_norm": 1.5626726150512695, "learning_rate": 1.196415310892143e-05, "loss": 0.2976, "step": 19690 }, { "epoch": 0.4371760579793787, "grad_norm": 1.5406701564788818, "learning_rate": 1.1960734125086797e-05, "loss": 0.4693, "step": 19695 }, { "epoch": 0.4372870445389063, "grad_norm": 0.7011148929595947, "learning_rate": 1.1957314902872686e-05, "loss": 0.5277, "step": 19700 }, { "epoch": 0.437398031098434, "grad_norm": 1.4581482410430908, "learning_rate": 1.1953895442694789e-05, "loss": 0.4809, "step": 19705 }, { "epoch": 0.43750901765796163, "grad_norm": 1.0448225736618042, "learning_rate": 1.1950475744968842e-05, "loss": 0.457, "step": 19710 }, { "epoch": 0.43762000421748926, "grad_norm": 0.9020761251449585, "learning_rate": 1.1947055810110591e-05, "loss": 0.3995, "step": 19715 }, { "epoch": 0.4377309907770169, "grad_norm": 1.1282302141189575, "learning_rate": 1.1943635638535827e-05, "loss": 0.5794, "step": 19720 }, { "epoch": 0.43784197733654456, "grad_norm": 1.2756569385528564, "learning_rate": 1.1940215230660362e-05, "loss": 0.641, "step": 19725 }, { "epoch": 0.4379529638960722, "grad_norm": 1.2321871519088745, "learning_rate": 1.1936794586900033e-05, "loss": 0.4981, "step": 19730 }, { "epoch": 0.4380639504555998, "grad_norm": 1.5373114347457886, "learning_rate": 1.1933373707670714e-05, "loss": 0.417, "step": 19735 }, { "epoch": 0.43817493701512744, "grad_norm": 0.9480398297309875, "learning_rate": 1.1929952593388307e-05, "loss": 0.4449, "step": 19740 }, { "epoch": 0.4382859235746551, "grad_norm": 1.2366368770599365, "learning_rate": 1.1926531244468733e-05, "loss": 0.3991, "step": 19745 }, { "epoch": 0.43839691013418275, "grad_norm": 0.8997697234153748, "learning_rate": 1.1923109661327954e-05, "loss": 0.4469, "step": 19750 }, { "epoch": 0.4385078966937104, "grad_norm": 1.6303012371063232, "learning_rate": 1.191968784438195e-05, "loss": 0.4095, "step": 19755 }, { "epoch": 0.43861888325323806, "grad_norm": 1.4117064476013184, "learning_rate": 1.1916265794046738e-05, "loss": 0.4293, "step": 19760 }, { "epoch": 0.4387298698127657, "grad_norm": 0.8353791832923889, "learning_rate": 1.1912843510738355e-05, "loss": 0.4011, "step": 19765 }, { "epoch": 0.4388408563722933, "grad_norm": 1.3969166278839111, "learning_rate": 1.1909420994872871e-05, "loss": 0.3568, "step": 19770 }, { "epoch": 0.43895184293182093, "grad_norm": 1.2692325115203857, "learning_rate": 1.190599824686639e-05, "loss": 0.3129, "step": 19775 }, { "epoch": 0.4390628294913486, "grad_norm": 0.765633761882782, "learning_rate": 1.1902575267135035e-05, "loss": 0.4564, "step": 19780 }, { "epoch": 0.43917381605087624, "grad_norm": 0.9388693571090698, "learning_rate": 1.1899152056094958e-05, "loss": 0.4518, "step": 19785 }, { "epoch": 0.43928480261040387, "grad_norm": 1.4808331727981567, "learning_rate": 1.1895728614162343e-05, "loss": 0.4618, "step": 19790 }, { "epoch": 0.4393957891699315, "grad_norm": 0.7243686318397522, "learning_rate": 1.1892304941753403e-05, "loss": 0.388, "step": 19795 }, { "epoch": 0.4395067757294592, "grad_norm": 1.2276579141616821, "learning_rate": 1.188888103928437e-05, "loss": 0.3931, "step": 19800 }, { "epoch": 0.4396177622889868, "grad_norm": 1.0823590755462646, "learning_rate": 1.1885456907171517e-05, "loss": 0.5161, "step": 19805 }, { "epoch": 0.43972874884851443, "grad_norm": 0.8903690576553345, "learning_rate": 1.188203254583114e-05, "loss": 0.3907, "step": 19810 }, { "epoch": 0.4398397354080421, "grad_norm": 1.7568098306655884, "learning_rate": 1.1878607955679555e-05, "loss": 0.5345, "step": 19815 }, { "epoch": 0.43995072196756974, "grad_norm": 1.1826956272125244, "learning_rate": 1.1875183137133114e-05, "loss": 0.4775, "step": 19820 }, { "epoch": 0.44006170852709736, "grad_norm": 1.2964848279953003, "learning_rate": 1.1871758090608199e-05, "loss": 0.3452, "step": 19825 }, { "epoch": 0.440172695086625, "grad_norm": 0.9392102360725403, "learning_rate": 1.1868332816521208e-05, "loss": 0.3947, "step": 19830 }, { "epoch": 0.44028368164615267, "grad_norm": 0.9794422388076782, "learning_rate": 1.1864907315288585e-05, "loss": 0.4547, "step": 19835 }, { "epoch": 0.4403946682056803, "grad_norm": 0.8995769023895264, "learning_rate": 1.1861481587326782e-05, "loss": 0.5318, "step": 19840 }, { "epoch": 0.4405056547652079, "grad_norm": 1.2317299842834473, "learning_rate": 1.1858055633052292e-05, "loss": 0.4632, "step": 19845 }, { "epoch": 0.44061664132473555, "grad_norm": 1.25813627243042, "learning_rate": 1.1854629452881628e-05, "loss": 0.488, "step": 19850 }, { "epoch": 0.44072762788426323, "grad_norm": 1.1181604862213135, "learning_rate": 1.1851203047231337e-05, "loss": 0.3962, "step": 19855 }, { "epoch": 0.44083861444379085, "grad_norm": 1.2032124996185303, "learning_rate": 1.1847776416517987e-05, "loss": 0.4725, "step": 19860 }, { "epoch": 0.4409496010033185, "grad_norm": 0.9831379652023315, "learning_rate": 1.1844349561158176e-05, "loss": 0.3965, "step": 19865 }, { "epoch": 0.44106058756284616, "grad_norm": 1.1643702983856201, "learning_rate": 1.1840922481568531e-05, "loss": 0.4862, "step": 19870 }, { "epoch": 0.4411715741223738, "grad_norm": 1.2106220722198486, "learning_rate": 1.1837495178165706e-05, "loss": 0.3527, "step": 19875 }, { "epoch": 0.4412825606819014, "grad_norm": 1.1105222702026367, "learning_rate": 1.1834067651366379e-05, "loss": 0.5157, "step": 19880 }, { "epoch": 0.44139354724142904, "grad_norm": 1.241958737373352, "learning_rate": 1.1830639901587256e-05, "loss": 0.4843, "step": 19885 }, { "epoch": 0.4415045338009567, "grad_norm": 1.510998249053955, "learning_rate": 1.1827211929245075e-05, "loss": 0.4317, "step": 19890 }, { "epoch": 0.44161552036048435, "grad_norm": 0.9848299622535706, "learning_rate": 1.1823783734756598e-05, "loss": 0.367, "step": 19895 }, { "epoch": 0.441726506920012, "grad_norm": 0.8628853559494019, "learning_rate": 1.1820355318538608e-05, "loss": 0.5189, "step": 19900 }, { "epoch": 0.4418374934795396, "grad_norm": 1.2922097444534302, "learning_rate": 1.1816926681007925e-05, "loss": 0.4615, "step": 19905 }, { "epoch": 0.4419484800390673, "grad_norm": 1.526689887046814, "learning_rate": 1.1813497822581388e-05, "loss": 0.4063, "step": 19910 }, { "epoch": 0.4420594665985949, "grad_norm": 2.21928334236145, "learning_rate": 1.1810068743675866e-05, "loss": 0.3148, "step": 19915 }, { "epoch": 0.44217045315812253, "grad_norm": 1.0265175104141235, "learning_rate": 1.180663944470826e-05, "loss": 0.5255, "step": 19920 }, { "epoch": 0.4422814397176502, "grad_norm": 1.005786418914795, "learning_rate": 1.1803209926095489e-05, "loss": 0.4243, "step": 19925 }, { "epoch": 0.44239242627717784, "grad_norm": 1.3304482698440552, "learning_rate": 1.1799780188254504e-05, "loss": 0.5117, "step": 19930 }, { "epoch": 0.44250341283670547, "grad_norm": 1.0991795063018799, "learning_rate": 1.1796350231602278e-05, "loss": 0.3571, "step": 19935 }, { "epoch": 0.4426143993962331, "grad_norm": 0.973331868648529, "learning_rate": 1.1792920056555812e-05, "loss": 0.328, "step": 19940 }, { "epoch": 0.4427253859557608, "grad_norm": 0.9038109183311462, "learning_rate": 1.1789489663532142e-05, "loss": 0.4416, "step": 19945 }, { "epoch": 0.4428363725152884, "grad_norm": 1.6230500936508179, "learning_rate": 1.178605905294832e-05, "loss": 0.5181, "step": 19950 }, { "epoch": 0.442947359074816, "grad_norm": 1.0690028667449951, "learning_rate": 1.1782628225221429e-05, "loss": 0.2601, "step": 19955 }, { "epoch": 0.44305834563434365, "grad_norm": 1.025299072265625, "learning_rate": 1.1779197180768575e-05, "loss": 0.4718, "step": 19960 }, { "epoch": 0.44316933219387133, "grad_norm": 1.301763653755188, "learning_rate": 1.1775765920006898e-05, "loss": 0.4844, "step": 19965 }, { "epoch": 0.44328031875339896, "grad_norm": 0.6717504262924194, "learning_rate": 1.177233444335355e-05, "loss": 0.3417, "step": 19970 }, { "epoch": 0.4433913053129266, "grad_norm": 0.9575620889663696, "learning_rate": 1.176890275122573e-05, "loss": 0.4368, "step": 19975 }, { "epoch": 0.44350229187245427, "grad_norm": 1.4361292123794556, "learning_rate": 1.1765470844040645e-05, "loss": 0.339, "step": 19980 }, { "epoch": 0.4436132784319819, "grad_norm": 1.0805773735046387, "learning_rate": 1.1762038722215533e-05, "loss": 0.4149, "step": 19985 }, { "epoch": 0.4437242649915095, "grad_norm": 0.904903769493103, "learning_rate": 1.1758606386167666e-05, "loss": 0.3387, "step": 19990 }, { "epoch": 0.44383525155103715, "grad_norm": 1.5252553224563599, "learning_rate": 1.1755173836314331e-05, "loss": 0.4768, "step": 19995 }, { "epoch": 0.44394623811056483, "grad_norm": 0.7814958691596985, "learning_rate": 1.1751741073072846e-05, "loss": 0.4594, "step": 20000 }, { "epoch": 0.44405722467009245, "grad_norm": 0.8147719502449036, "learning_rate": 1.174830809686056e-05, "loss": 0.4328, "step": 20005 }, { "epoch": 0.4441682112296201, "grad_norm": 1.363770604133606, "learning_rate": 1.1744874908094835e-05, "loss": 0.4362, "step": 20010 }, { "epoch": 0.44427919778914776, "grad_norm": 1.544438362121582, "learning_rate": 1.1741441507193073e-05, "loss": 0.6222, "step": 20015 }, { "epoch": 0.4443901843486754, "grad_norm": 1.065306305885315, "learning_rate": 1.1738007894572691e-05, "loss": 0.3821, "step": 20020 }, { "epoch": 0.444501170908203, "grad_norm": 1.0254193544387817, "learning_rate": 1.1734574070651137e-05, "loss": 0.3301, "step": 20025 }, { "epoch": 0.44461215746773064, "grad_norm": 0.7435585856437683, "learning_rate": 1.1731140035845887e-05, "loss": 0.3025, "step": 20030 }, { "epoch": 0.4447231440272583, "grad_norm": 0.9800282716751099, "learning_rate": 1.1727705790574437e-05, "loss": 0.319, "step": 20035 }, { "epoch": 0.44483413058678595, "grad_norm": 1.4175928831100464, "learning_rate": 1.1724271335254312e-05, "loss": 0.5204, "step": 20040 }, { "epoch": 0.4449451171463136, "grad_norm": 1.1740878820419312, "learning_rate": 1.172083667030306e-05, "loss": 0.4353, "step": 20045 }, { "epoch": 0.4450561037058412, "grad_norm": 2.3309788703918457, "learning_rate": 1.1717401796138256e-05, "loss": 0.3666, "step": 20050 }, { "epoch": 0.4451670902653689, "grad_norm": 1.2700037956237793, "learning_rate": 1.17139667131775e-05, "loss": 0.5611, "step": 20055 }, { "epoch": 0.4452780768248965, "grad_norm": 0.7723988890647888, "learning_rate": 1.1710531421838422e-05, "loss": 0.333, "step": 20060 }, { "epoch": 0.44538906338442413, "grad_norm": 1.2870498895645142, "learning_rate": 1.170709592253867e-05, "loss": 0.3527, "step": 20065 }, { "epoch": 0.4455000499439518, "grad_norm": 1.2130166292190552, "learning_rate": 1.170366021569592e-05, "loss": 0.4413, "step": 20070 }, { "epoch": 0.44561103650347944, "grad_norm": 0.8243342041969299, "learning_rate": 1.1700224301727877e-05, "loss": 0.5166, "step": 20075 }, { "epoch": 0.44572202306300707, "grad_norm": 1.8635729551315308, "learning_rate": 1.1696788181052263e-05, "loss": 0.5611, "step": 20080 }, { "epoch": 0.4458330096225347, "grad_norm": 0.9250847697257996, "learning_rate": 1.1693351854086833e-05, "loss": 0.3723, "step": 20085 }, { "epoch": 0.4459439961820624, "grad_norm": 1.3857841491699219, "learning_rate": 1.1689915321249364e-05, "loss": 0.5136, "step": 20090 }, { "epoch": 0.44605498274159, "grad_norm": 1.1505711078643799, "learning_rate": 1.1686478582957657e-05, "loss": 0.4465, "step": 20095 }, { "epoch": 0.4461659693011176, "grad_norm": 1.2375531196594238, "learning_rate": 1.168304163962954e-05, "loss": 0.5294, "step": 20100 }, { "epoch": 0.44627695586064525, "grad_norm": 1.0499058961868286, "learning_rate": 1.1679604491682865e-05, "loss": 0.5367, "step": 20105 }, { "epoch": 0.44638794242017293, "grad_norm": 0.9685129523277283, "learning_rate": 1.167616713953551e-05, "loss": 0.4059, "step": 20110 }, { "epoch": 0.44649892897970056, "grad_norm": 1.0699036121368408, "learning_rate": 1.1672729583605376e-05, "loss": 0.4751, "step": 20115 }, { "epoch": 0.4466099155392282, "grad_norm": 1.2520678043365479, "learning_rate": 1.1669291824310388e-05, "loss": 0.4419, "step": 20120 }, { "epoch": 0.44672090209875587, "grad_norm": 1.1786726713180542, "learning_rate": 1.16658538620685e-05, "loss": 0.512, "step": 20125 }, { "epoch": 0.4468318886582835, "grad_norm": 0.972960352897644, "learning_rate": 1.1662415697297685e-05, "loss": 0.4704, "step": 20130 }, { "epoch": 0.4469428752178111, "grad_norm": 1.3304414749145508, "learning_rate": 1.1658977330415943e-05, "loss": 0.2956, "step": 20135 }, { "epoch": 0.44705386177733875, "grad_norm": 1.6226036548614502, "learning_rate": 1.16555387618413e-05, "loss": 0.6865, "step": 20140 }, { "epoch": 0.4471648483368664, "grad_norm": 1.115662932395935, "learning_rate": 1.165209999199181e-05, "loss": 0.4262, "step": 20145 }, { "epoch": 0.44727583489639405, "grad_norm": 1.3321551084518433, "learning_rate": 1.1648661021285544e-05, "loss": 0.4129, "step": 20150 }, { "epoch": 0.4473868214559217, "grad_norm": 0.812921941280365, "learning_rate": 1.1645221850140598e-05, "loss": 0.563, "step": 20155 }, { "epoch": 0.4474978080154493, "grad_norm": 1.030159831047058, "learning_rate": 1.1641782478975099e-05, "loss": 0.4808, "step": 20160 }, { "epoch": 0.447608794574977, "grad_norm": 0.8396998047828674, "learning_rate": 1.1638342908207191e-05, "loss": 0.481, "step": 20165 }, { "epoch": 0.4477197811345046, "grad_norm": 0.9397024512290955, "learning_rate": 1.1634903138255045e-05, "loss": 0.4517, "step": 20170 }, { "epoch": 0.44783076769403224, "grad_norm": 1.6640818119049072, "learning_rate": 1.163146316953686e-05, "loss": 0.4182, "step": 20175 }, { "epoch": 0.4479417542535599, "grad_norm": 1.1481603384017944, "learning_rate": 1.1628023002470857e-05, "loss": 0.5254, "step": 20180 }, { "epoch": 0.44805274081308755, "grad_norm": 1.1192456483840942, "learning_rate": 1.1624582637475274e-05, "loss": 0.4221, "step": 20185 }, { "epoch": 0.4481637273726152, "grad_norm": 1.560524582862854, "learning_rate": 1.1621142074968385e-05, "loss": 0.4385, "step": 20190 }, { "epoch": 0.4482747139321428, "grad_norm": 1.255010724067688, "learning_rate": 1.1617701315368478e-05, "loss": 0.4787, "step": 20195 }, { "epoch": 0.4483857004916705, "grad_norm": 0.8947778344154358, "learning_rate": 1.1614260359093869e-05, "loss": 0.5235, "step": 20200 }, { "epoch": 0.4484966870511981, "grad_norm": 1.2940188646316528, "learning_rate": 1.1610819206562902e-05, "loss": 0.4674, "step": 20205 }, { "epoch": 0.44860767361072573, "grad_norm": 1.851446270942688, "learning_rate": 1.1607377858193938e-05, "loss": 0.3948, "step": 20210 }, { "epoch": 0.44871866017025336, "grad_norm": 0.9747987389564514, "learning_rate": 1.1603936314405365e-05, "loss": 0.3417, "step": 20215 }, { "epoch": 0.44882964672978104, "grad_norm": 1.1763579845428467, "learning_rate": 1.160049457561559e-05, "loss": 0.4035, "step": 20220 }, { "epoch": 0.44894063328930867, "grad_norm": 1.1478216648101807, "learning_rate": 1.1597052642243054e-05, "loss": 0.6614, "step": 20225 }, { "epoch": 0.4490516198488363, "grad_norm": 1.6329827308654785, "learning_rate": 1.1593610514706217e-05, "loss": 0.4021, "step": 20230 }, { "epoch": 0.449162606408364, "grad_norm": 1.0295683145523071, "learning_rate": 1.1590168193423557e-05, "loss": 0.5769, "step": 20235 }, { "epoch": 0.4492735929678916, "grad_norm": 1.0577813386917114, "learning_rate": 1.158672567881358e-05, "loss": 0.5333, "step": 20240 }, { "epoch": 0.4493845795274192, "grad_norm": 1.0659865140914917, "learning_rate": 1.1583282971294816e-05, "loss": 0.468, "step": 20245 }, { "epoch": 0.44949556608694685, "grad_norm": 1.8417197465896606, "learning_rate": 1.1579840071285817e-05, "loss": 0.4453, "step": 20250 }, { "epoch": 0.44960655264647453, "grad_norm": 1.0317349433898926, "learning_rate": 1.1576396979205162e-05, "loss": 0.3537, "step": 20255 }, { "epoch": 0.44971753920600216, "grad_norm": 0.8817946910858154, "learning_rate": 1.1572953695471449e-05, "loss": 0.4846, "step": 20260 }, { "epoch": 0.4498285257655298, "grad_norm": 1.245755672454834, "learning_rate": 1.1569510220503304e-05, "loss": 0.5292, "step": 20265 }, { "epoch": 0.4499395123250574, "grad_norm": 1.0409419536590576, "learning_rate": 1.1566066554719366e-05, "loss": 0.2456, "step": 20270 }, { "epoch": 0.4500504988845851, "grad_norm": 1.1645506620407104, "learning_rate": 1.156262269853831e-05, "loss": 0.3571, "step": 20275 }, { "epoch": 0.4501614854441127, "grad_norm": 1.2852460145950317, "learning_rate": 1.1559178652378826e-05, "loss": 0.503, "step": 20280 }, { "epoch": 0.45027247200364034, "grad_norm": 1.27859628200531, "learning_rate": 1.1555734416659632e-05, "loss": 0.4348, "step": 20285 }, { "epoch": 0.450383458563168, "grad_norm": 1.7184007167816162, "learning_rate": 1.1552289991799466e-05, "loss": 0.4694, "step": 20290 }, { "epoch": 0.45049444512269565, "grad_norm": 1.8372256755828857, "learning_rate": 1.1548845378217086e-05, "loss": 0.426, "step": 20295 }, { "epoch": 0.4506054316822233, "grad_norm": 1.3854246139526367, "learning_rate": 1.154540057633128e-05, "loss": 0.4741, "step": 20300 }, { "epoch": 0.4507164182417509, "grad_norm": 1.5537136793136597, "learning_rate": 1.1541955586560855e-05, "loss": 0.5976, "step": 20305 }, { "epoch": 0.4508274048012786, "grad_norm": 0.862808346748352, "learning_rate": 1.1538510409324642e-05, "loss": 0.4937, "step": 20310 }, { "epoch": 0.4509383913608062, "grad_norm": 1.1472190618515015, "learning_rate": 1.1535065045041492e-05, "loss": 0.3002, "step": 20315 }, { "epoch": 0.45104937792033384, "grad_norm": 0.9795900583267212, "learning_rate": 1.1531619494130283e-05, "loss": 0.3339, "step": 20320 }, { "epoch": 0.45116036447986146, "grad_norm": 1.3006950616836548, "learning_rate": 1.1528173757009913e-05, "loss": 0.5872, "step": 20325 }, { "epoch": 0.45127135103938915, "grad_norm": 1.2611806392669678, "learning_rate": 1.1524727834099304e-05, "loss": 0.4502, "step": 20330 }, { "epoch": 0.45138233759891677, "grad_norm": 0.8779106736183167, "learning_rate": 1.1521281725817393e-05, "loss": 0.3637, "step": 20335 }, { "epoch": 0.4514933241584444, "grad_norm": 1.3904417753219604, "learning_rate": 1.1517835432583156e-05, "loss": 0.455, "step": 20340 }, { "epoch": 0.4516043107179721, "grad_norm": 1.4688048362731934, "learning_rate": 1.1514388954815576e-05, "loss": 0.559, "step": 20345 }, { "epoch": 0.4517152972774997, "grad_norm": 1.2480225563049316, "learning_rate": 1.1510942292933667e-05, "loss": 0.3412, "step": 20350 }, { "epoch": 0.45182628383702733, "grad_norm": 1.2750505208969116, "learning_rate": 1.1507495447356462e-05, "loss": 0.3771, "step": 20355 }, { "epoch": 0.45193727039655496, "grad_norm": 0.7525187730789185, "learning_rate": 1.1504048418503016e-05, "loss": 0.4345, "step": 20360 }, { "epoch": 0.45204825695608264, "grad_norm": 1.280582308769226, "learning_rate": 1.1500601206792405e-05, "loss": 0.3903, "step": 20365 }, { "epoch": 0.45215924351561027, "grad_norm": 1.211562156677246, "learning_rate": 1.1497153812643735e-05, "loss": 0.3751, "step": 20370 }, { "epoch": 0.4522702300751379, "grad_norm": 1.9452505111694336, "learning_rate": 1.1493706236476123e-05, "loss": 0.4946, "step": 20375 }, { "epoch": 0.4523812166346655, "grad_norm": 0.9842869639396667, "learning_rate": 1.1490258478708718e-05, "loss": 0.3214, "step": 20380 }, { "epoch": 0.4524922031941932, "grad_norm": 0.8979201912879944, "learning_rate": 1.1486810539760684e-05, "loss": 0.4406, "step": 20385 }, { "epoch": 0.4526031897537208, "grad_norm": 1.5928150415420532, "learning_rate": 1.1483362420051211e-05, "loss": 0.4085, "step": 20390 }, { "epoch": 0.45271417631324845, "grad_norm": 1.6551522016525269, "learning_rate": 1.1479914119999508e-05, "loss": 0.3635, "step": 20395 }, { "epoch": 0.45282516287277613, "grad_norm": 1.5658823251724243, "learning_rate": 1.1476465640024814e-05, "loss": 0.4973, "step": 20400 }, { "epoch": 0.45293614943230376, "grad_norm": 1.3159431219100952, "learning_rate": 1.1473016980546377e-05, "loss": 0.4794, "step": 20405 }, { "epoch": 0.4530471359918314, "grad_norm": 2.1857190132141113, "learning_rate": 1.1469568141983476e-05, "loss": 0.4942, "step": 20410 }, { "epoch": 0.453158122551359, "grad_norm": 1.0008069276809692, "learning_rate": 1.1466119124755407e-05, "loss": 0.4464, "step": 20415 }, { "epoch": 0.4532691091108867, "grad_norm": 1.4019452333450317, "learning_rate": 1.1462669929281491e-05, "loss": 0.1733, "step": 20420 }, { "epoch": 0.4533800956704143, "grad_norm": 1.0772136449813843, "learning_rate": 1.1459220555981075e-05, "loss": 0.4846, "step": 20425 }, { "epoch": 0.45349108222994194, "grad_norm": 1.3797098398208618, "learning_rate": 1.1455771005273516e-05, "loss": 0.461, "step": 20430 }, { "epoch": 0.45360206878946957, "grad_norm": 1.1715055704116821, "learning_rate": 1.1452321277578197e-05, "loss": 0.4189, "step": 20435 }, { "epoch": 0.45371305534899725, "grad_norm": 1.3069055080413818, "learning_rate": 1.1448871373314532e-05, "loss": 0.5456, "step": 20440 }, { "epoch": 0.4538240419085249, "grad_norm": 2.144012928009033, "learning_rate": 1.1445421292901943e-05, "loss": 0.4793, "step": 20445 }, { "epoch": 0.4539350284680525, "grad_norm": 0.875789225101471, "learning_rate": 1.144197103675988e-05, "loss": 0.4647, "step": 20450 }, { "epoch": 0.4540460150275802, "grad_norm": 1.1556012630462646, "learning_rate": 1.1438520605307815e-05, "loss": 0.3527, "step": 20455 }, { "epoch": 0.4541570015871078, "grad_norm": 0.8506287932395935, "learning_rate": 1.1435069998965239e-05, "loss": 0.2872, "step": 20460 }, { "epoch": 0.45426798814663544, "grad_norm": 0.9600818157196045, "learning_rate": 1.1431619218151666e-05, "loss": 0.4843, "step": 20465 }, { "epoch": 0.45437897470616306, "grad_norm": 0.8915436267852783, "learning_rate": 1.142816826328663e-05, "loss": 0.5575, "step": 20470 }, { "epoch": 0.45448996126569075, "grad_norm": 1.268429160118103, "learning_rate": 1.1424717134789685e-05, "loss": 0.3821, "step": 20475 }, { "epoch": 0.45460094782521837, "grad_norm": 1.1212326288223267, "learning_rate": 1.142126583308041e-05, "loss": 0.3212, "step": 20480 }, { "epoch": 0.454711934384746, "grad_norm": 1.3861826658248901, "learning_rate": 1.14178143585784e-05, "loss": 0.4671, "step": 20485 }, { "epoch": 0.4548229209442736, "grad_norm": 1.224517822265625, "learning_rate": 1.1414362711703277e-05, "loss": 0.4713, "step": 20490 }, { "epoch": 0.4549339075038013, "grad_norm": 1.119930624961853, "learning_rate": 1.141091089287468e-05, "loss": 0.4724, "step": 20495 }, { "epoch": 0.45504489406332893, "grad_norm": 1.015840768814087, "learning_rate": 1.1407458902512268e-05, "loss": 0.4609, "step": 20500 }, { "epoch": 0.45515588062285656, "grad_norm": 0.9772009253501892, "learning_rate": 1.1404006741035718e-05, "loss": 0.4207, "step": 20505 }, { "epoch": 0.45526686718238424, "grad_norm": 0.799676775932312, "learning_rate": 1.1400554408864741e-05, "loss": 0.3784, "step": 20510 }, { "epoch": 0.45537785374191186, "grad_norm": 1.0177497863769531, "learning_rate": 1.1397101906419056e-05, "loss": 0.5354, "step": 20515 }, { "epoch": 0.4554888403014395, "grad_norm": 1.6851955652236938, "learning_rate": 1.1393649234118407e-05, "loss": 0.4534, "step": 20520 }, { "epoch": 0.4555998268609671, "grad_norm": 1.011048436164856, "learning_rate": 1.139019639238256e-05, "loss": 0.4355, "step": 20525 }, { "epoch": 0.4557108134204948, "grad_norm": 1.0171626806259155, "learning_rate": 1.1386743381631296e-05, "loss": 0.556, "step": 20530 }, { "epoch": 0.4558217999800224, "grad_norm": 1.0538846254348755, "learning_rate": 1.1383290202284418e-05, "loss": 0.5416, "step": 20535 }, { "epoch": 0.45593278653955005, "grad_norm": 1.385033369064331, "learning_rate": 1.1379836854761761e-05, "loss": 0.5533, "step": 20540 }, { "epoch": 0.4560437730990777, "grad_norm": 0.8482803702354431, "learning_rate": 1.1376383339483165e-05, "loss": 0.2547, "step": 20545 }, { "epoch": 0.45615475965860536, "grad_norm": 1.0015593767166138, "learning_rate": 1.1372929656868501e-05, "loss": 0.4572, "step": 20550 }, { "epoch": 0.456265746218133, "grad_norm": 1.6447019577026367, "learning_rate": 1.1369475807337653e-05, "loss": 0.3652, "step": 20555 }, { "epoch": 0.4563767327776606, "grad_norm": 1.3405704498291016, "learning_rate": 1.136602179131053e-05, "loss": 0.3431, "step": 20560 }, { "epoch": 0.4564877193371883, "grad_norm": 0.7782473564147949, "learning_rate": 1.1362567609207056e-05, "loss": 0.393, "step": 20565 }, { "epoch": 0.4565987058967159, "grad_norm": 0.8959970474243164, "learning_rate": 1.1359113261447183e-05, "loss": 0.5545, "step": 20570 }, { "epoch": 0.45670969245624354, "grad_norm": 1.2282785177230835, "learning_rate": 1.135565874845088e-05, "loss": 0.5127, "step": 20575 }, { "epoch": 0.45682067901577117, "grad_norm": 1.4772942066192627, "learning_rate": 1.135220407063813e-05, "loss": 0.5955, "step": 20580 }, { "epoch": 0.45693166557529885, "grad_norm": 1.02876615524292, "learning_rate": 1.1348749228428946e-05, "loss": 0.5075, "step": 20585 }, { "epoch": 0.4570426521348265, "grad_norm": 1.2462966442108154, "learning_rate": 1.1345294222243351e-05, "loss": 0.2885, "step": 20590 }, { "epoch": 0.4571536386943541, "grad_norm": 1.0734065771102905, "learning_rate": 1.13418390525014e-05, "loss": 0.4402, "step": 20595 }, { "epoch": 0.45726462525388173, "grad_norm": 1.3125464916229248, "learning_rate": 1.1338383719623156e-05, "loss": 0.4559, "step": 20600 }, { "epoch": 0.4573756118134094, "grad_norm": 1.280454397201538, "learning_rate": 1.1334928224028707e-05, "loss": 0.4138, "step": 20605 }, { "epoch": 0.45748659837293704, "grad_norm": 1.2182717323303223, "learning_rate": 1.1331472566138162e-05, "loss": 0.4755, "step": 20610 }, { "epoch": 0.45759758493246466, "grad_norm": 1.3797804117202759, "learning_rate": 1.1328016746371648e-05, "loss": 0.4237, "step": 20615 }, { "epoch": 0.45770857149199234, "grad_norm": 0.990795910358429, "learning_rate": 1.1324560765149312e-05, "loss": 0.4135, "step": 20620 }, { "epoch": 0.45781955805151997, "grad_norm": 1.1758495569229126, "learning_rate": 1.1321104622891321e-05, "loss": 0.5617, "step": 20625 }, { "epoch": 0.4579305446110476, "grad_norm": 0.8563075661659241, "learning_rate": 1.131764832001786e-05, "loss": 0.442, "step": 20630 }, { "epoch": 0.4580415311705752, "grad_norm": 1.0942500829696655, "learning_rate": 1.1314191856949134e-05, "loss": 0.4125, "step": 20635 }, { "epoch": 0.4581525177301029, "grad_norm": 1.0212494134902954, "learning_rate": 1.1310735234105372e-05, "loss": 0.5584, "step": 20640 }, { "epoch": 0.45826350428963053, "grad_norm": 0.9054468274116516, "learning_rate": 1.1307278451906817e-05, "loss": 0.6214, "step": 20645 }, { "epoch": 0.45837449084915816, "grad_norm": 1.5153844356536865, "learning_rate": 1.1303821510773728e-05, "loss": 0.5178, "step": 20650 }, { "epoch": 0.4584854774086858, "grad_norm": 1.2968688011169434, "learning_rate": 1.1300364411126395e-05, "loss": 0.3395, "step": 20655 }, { "epoch": 0.45859646396821346, "grad_norm": 1.2937582731246948, "learning_rate": 1.1296907153385115e-05, "loss": 0.4076, "step": 20660 }, { "epoch": 0.4587074505277411, "grad_norm": 1.468260645866394, "learning_rate": 1.1293449737970217e-05, "loss": 0.3309, "step": 20665 }, { "epoch": 0.4588184370872687, "grad_norm": 1.098471999168396, "learning_rate": 1.1289992165302036e-05, "loss": 0.4811, "step": 20670 }, { "epoch": 0.4589294236467964, "grad_norm": 2.470288038253784, "learning_rate": 1.128653443580093e-05, "loss": 0.4609, "step": 20675 }, { "epoch": 0.459040410206324, "grad_norm": 1.0436688661575317, "learning_rate": 1.1283076549887286e-05, "loss": 0.4925, "step": 20680 }, { "epoch": 0.45915139676585165, "grad_norm": 1.34546959400177, "learning_rate": 1.1279618507981498e-05, "loss": 0.5932, "step": 20685 }, { "epoch": 0.4592623833253793, "grad_norm": 1.2342839241027832, "learning_rate": 1.1276160310503984e-05, "loss": 0.3635, "step": 20690 }, { "epoch": 0.45937336988490696, "grad_norm": 0.7874952554702759, "learning_rate": 1.1272701957875178e-05, "loss": 0.3375, "step": 20695 }, { "epoch": 0.4594843564444346, "grad_norm": 0.848264217376709, "learning_rate": 1.1269243450515537e-05, "loss": 0.3849, "step": 20700 }, { "epoch": 0.4595953430039622, "grad_norm": 1.1894251108169556, "learning_rate": 1.1265784788845534e-05, "loss": 0.464, "step": 20705 }, { "epoch": 0.45970632956348983, "grad_norm": 1.3765366077423096, "learning_rate": 1.126232597328566e-05, "loss": 0.4449, "step": 20710 }, { "epoch": 0.4598173161230175, "grad_norm": 1.2355886697769165, "learning_rate": 1.1258867004256428e-05, "loss": 0.2947, "step": 20715 }, { "epoch": 0.45992830268254514, "grad_norm": 0.8822180032730103, "learning_rate": 1.1255407882178368e-05, "loss": 0.4502, "step": 20720 }, { "epoch": 0.46003928924207277, "grad_norm": 1.4964523315429688, "learning_rate": 1.1251948607472029e-05, "loss": 0.5549, "step": 20725 }, { "epoch": 0.46015027580160045, "grad_norm": 1.5759141445159912, "learning_rate": 1.1248489180557977e-05, "loss": 0.6028, "step": 20730 }, { "epoch": 0.4602612623611281, "grad_norm": 1.3286418914794922, "learning_rate": 1.1245029601856798e-05, "loss": 0.3963, "step": 20735 }, { "epoch": 0.4603722489206557, "grad_norm": 1.1409542560577393, "learning_rate": 1.1241569871789096e-05, "loss": 0.3983, "step": 20740 }, { "epoch": 0.46048323548018333, "grad_norm": 1.1353600025177002, "learning_rate": 1.123810999077549e-05, "loss": 0.491, "step": 20745 }, { "epoch": 0.460594222039711, "grad_norm": 1.1560803651809692, "learning_rate": 1.1234649959236625e-05, "loss": 0.6121, "step": 20750 }, { "epoch": 0.46070520859923864, "grad_norm": 1.4681437015533447, "learning_rate": 1.123118977759316e-05, "loss": 0.5314, "step": 20755 }, { "epoch": 0.46081619515876626, "grad_norm": 1.4478296041488647, "learning_rate": 1.1227729446265766e-05, "loss": 0.4961, "step": 20760 }, { "epoch": 0.4609271817182939, "grad_norm": 1.1718122959136963, "learning_rate": 1.1224268965675149e-05, "loss": 0.3873, "step": 20765 }, { "epoch": 0.46103816827782157, "grad_norm": 1.1868184804916382, "learning_rate": 1.1220808336242015e-05, "loss": 0.5384, "step": 20770 }, { "epoch": 0.4611491548373492, "grad_norm": 1.2744981050491333, "learning_rate": 1.1217347558387098e-05, "loss": 0.3696, "step": 20775 }, { "epoch": 0.4612601413968768, "grad_norm": 1.1455271244049072, "learning_rate": 1.1213886632531147e-05, "loss": 0.4095, "step": 20780 }, { "epoch": 0.4613711279564045, "grad_norm": 1.3924636840820312, "learning_rate": 1.121042555909493e-05, "loss": 0.4655, "step": 20785 }, { "epoch": 0.46148211451593213, "grad_norm": 1.2234874963760376, "learning_rate": 1.120696433849923e-05, "loss": 0.4949, "step": 20790 }, { "epoch": 0.46159310107545976, "grad_norm": 1.050239086151123, "learning_rate": 1.1203502971164859e-05, "loss": 0.4225, "step": 20795 }, { "epoch": 0.4617040876349874, "grad_norm": 1.2952483892440796, "learning_rate": 1.1200041457512632e-05, "loss": 0.592, "step": 20800 }, { "epoch": 0.46181507419451506, "grad_norm": 1.2631264925003052, "learning_rate": 1.1196579797963386e-05, "loss": 0.3866, "step": 20805 }, { "epoch": 0.4619260607540427, "grad_norm": 1.1427369117736816, "learning_rate": 1.1193117992937985e-05, "loss": 0.3557, "step": 20810 }, { "epoch": 0.4620370473135703, "grad_norm": 1.1873159408569336, "learning_rate": 1.11896560428573e-05, "loss": 0.4925, "step": 20815 }, { "epoch": 0.46214803387309794, "grad_norm": 1.403205156326294, "learning_rate": 1.1186193948142219e-05, "loss": 0.5569, "step": 20820 }, { "epoch": 0.4622590204326256, "grad_norm": 1.2612415552139282, "learning_rate": 1.1182731709213658e-05, "loss": 0.5531, "step": 20825 }, { "epoch": 0.46237000699215325, "grad_norm": 1.758579969406128, "learning_rate": 1.1179269326492543e-05, "loss": 0.514, "step": 20830 }, { "epoch": 0.4624809935516809, "grad_norm": 1.4168121814727783, "learning_rate": 1.117580680039982e-05, "loss": 0.4506, "step": 20835 }, { "epoch": 0.46259198011120856, "grad_norm": 1.189832091331482, "learning_rate": 1.1172344131356447e-05, "loss": 0.4042, "step": 20840 }, { "epoch": 0.4627029666707362, "grad_norm": 0.9151319861412048, "learning_rate": 1.1168881319783407e-05, "loss": 0.4344, "step": 20845 }, { "epoch": 0.4628139532302638, "grad_norm": 0.8878720998764038, "learning_rate": 1.1165418366101696e-05, "loss": 0.2991, "step": 20850 }, { "epoch": 0.46292493978979143, "grad_norm": 1.068393349647522, "learning_rate": 1.116195527073233e-05, "loss": 0.4902, "step": 20855 }, { "epoch": 0.4630359263493191, "grad_norm": 0.8120904564857483, "learning_rate": 1.1158492034096337e-05, "loss": 0.444, "step": 20860 }, { "epoch": 0.46314691290884674, "grad_norm": 1.1619365215301514, "learning_rate": 1.1155028656614768e-05, "loss": 0.3862, "step": 20865 }, { "epoch": 0.46325789946837437, "grad_norm": 1.2095001935958862, "learning_rate": 1.1151565138708688e-05, "loss": 0.4603, "step": 20870 }, { "epoch": 0.463368886027902, "grad_norm": 1.2450065612792969, "learning_rate": 1.1148101480799181e-05, "loss": 0.4101, "step": 20875 }, { "epoch": 0.4634798725874297, "grad_norm": 2.0599148273468018, "learning_rate": 1.1144637683307346e-05, "loss": 0.3749, "step": 20880 }, { "epoch": 0.4635908591469573, "grad_norm": 0.8745701909065247, "learning_rate": 1.1141173746654304e-05, "loss": 0.398, "step": 20885 }, { "epoch": 0.4637018457064849, "grad_norm": 1.383885383605957, "learning_rate": 1.1137709671261181e-05, "loss": 0.4914, "step": 20890 }, { "epoch": 0.4638128322660126, "grad_norm": 1.2333803176879883, "learning_rate": 1.1134245457549133e-05, "loss": 0.3697, "step": 20895 }, { "epoch": 0.46392381882554024, "grad_norm": 1.6844825744628906, "learning_rate": 1.1130781105939325e-05, "loss": 0.5081, "step": 20900 }, { "epoch": 0.46403480538506786, "grad_norm": 0.9865526556968689, "learning_rate": 1.1127316616852943e-05, "loss": 0.3997, "step": 20905 }, { "epoch": 0.4641457919445955, "grad_norm": 1.1580348014831543, "learning_rate": 1.112385199071119e-05, "loss": 0.4482, "step": 20910 }, { "epoch": 0.46425677850412317, "grad_norm": 0.8300138711929321, "learning_rate": 1.112038722793528e-05, "loss": 0.5253, "step": 20915 }, { "epoch": 0.4643677650636508, "grad_norm": 1.238762378692627, "learning_rate": 1.1116922328946447e-05, "loss": 0.481, "step": 20920 }, { "epoch": 0.4644787516231784, "grad_norm": 1.333196997642517, "learning_rate": 1.1113457294165944e-05, "loss": 0.3692, "step": 20925 }, { "epoch": 0.46458973818270605, "grad_norm": 1.5084500312805176, "learning_rate": 1.1109992124015036e-05, "loss": 0.4737, "step": 20930 }, { "epoch": 0.46470072474223373, "grad_norm": 1.103947401046753, "learning_rate": 1.1106526818915008e-05, "loss": 0.5106, "step": 20935 }, { "epoch": 0.46481171130176135, "grad_norm": 1.341492772102356, "learning_rate": 1.1103061379287163e-05, "loss": 0.4256, "step": 20940 }, { "epoch": 0.464922697861289, "grad_norm": 1.0959070920944214, "learning_rate": 1.1099595805552815e-05, "loss": 0.3839, "step": 20945 }, { "epoch": 0.46503368442081666, "grad_norm": 1.2406764030456543, "learning_rate": 1.1096130098133296e-05, "loss": 0.4045, "step": 20950 }, { "epoch": 0.4651446709803443, "grad_norm": 1.3345435857772827, "learning_rate": 1.1092664257449955e-05, "loss": 0.3243, "step": 20955 }, { "epoch": 0.4652556575398719, "grad_norm": 1.7433699369430542, "learning_rate": 1.1089198283924155e-05, "loss": 0.466, "step": 20960 }, { "epoch": 0.46536664409939954, "grad_norm": 1.3542841672897339, "learning_rate": 1.1085732177977284e-05, "loss": 0.3902, "step": 20965 }, { "epoch": 0.4654776306589272, "grad_norm": 1.3806394338607788, "learning_rate": 1.1082265940030736e-05, "loss": 0.4725, "step": 20970 }, { "epoch": 0.46558861721845485, "grad_norm": 1.1428102254867554, "learning_rate": 1.1078799570505925e-05, "loss": 0.3389, "step": 20975 }, { "epoch": 0.4656996037779825, "grad_norm": 1.1814501285552979, "learning_rate": 1.107533306982428e-05, "loss": 0.3445, "step": 20980 }, { "epoch": 0.4658105903375101, "grad_norm": 1.3944542407989502, "learning_rate": 1.1071866438407245e-05, "loss": 0.5482, "step": 20985 }, { "epoch": 0.4659215768970378, "grad_norm": 0.8299020528793335, "learning_rate": 1.1068399676676286e-05, "loss": 0.4399, "step": 20990 }, { "epoch": 0.4660325634565654, "grad_norm": 1.0346838235855103, "learning_rate": 1.1064932785052877e-05, "loss": 0.3264, "step": 20995 }, { "epoch": 0.46614355001609303, "grad_norm": 1.1892578601837158, "learning_rate": 1.106146576395851e-05, "loss": 0.5251, "step": 21000 }, { "epoch": 0.4662545365756207, "grad_norm": 1.5619806051254272, "learning_rate": 1.1057998613814695e-05, "loss": 0.3387, "step": 21005 }, { "epoch": 0.46636552313514834, "grad_norm": 1.4932775497436523, "learning_rate": 1.105453133504296e-05, "loss": 0.5554, "step": 21010 }, { "epoch": 0.46647650969467597, "grad_norm": 1.0520886182785034, "learning_rate": 1.1051063928064836e-05, "loss": 0.4444, "step": 21015 }, { "epoch": 0.4665874962542036, "grad_norm": 1.2631596326828003, "learning_rate": 1.104759639330189e-05, "loss": 0.3723, "step": 21020 }, { "epoch": 0.4666984828137313, "grad_norm": 1.4202393293380737, "learning_rate": 1.1044128731175686e-05, "loss": 0.4709, "step": 21025 }, { "epoch": 0.4668094693732589, "grad_norm": 1.4321917295455933, "learning_rate": 1.1040660942107813e-05, "loss": 0.4961, "step": 21030 }, { "epoch": 0.4669204559327865, "grad_norm": 1.6301039457321167, "learning_rate": 1.1037193026519872e-05, "loss": 0.3466, "step": 21035 }, { "epoch": 0.4670314424923142, "grad_norm": 0.9708942174911499, "learning_rate": 1.1033724984833483e-05, "loss": 0.5608, "step": 21040 }, { "epoch": 0.46714242905184183, "grad_norm": 1.3298207521438599, "learning_rate": 1.1030256817470272e-05, "loss": 0.5311, "step": 21045 }, { "epoch": 0.46725341561136946, "grad_norm": 0.9955623149871826, "learning_rate": 1.1026788524851897e-05, "loss": 0.4202, "step": 21050 }, { "epoch": 0.4673644021708971, "grad_norm": 0.8595684766769409, "learning_rate": 1.1023320107400017e-05, "loss": 0.3733, "step": 21055 }, { "epoch": 0.46747538873042477, "grad_norm": 1.570600152015686, "learning_rate": 1.101985156553631e-05, "loss": 0.5551, "step": 21060 }, { "epoch": 0.4675863752899524, "grad_norm": 0.6361602544784546, "learning_rate": 1.101638289968247e-05, "loss": 0.5578, "step": 21065 }, { "epoch": 0.46769736184948, "grad_norm": 1.6245297193527222, "learning_rate": 1.1012914110260202e-05, "loss": 0.5088, "step": 21070 }, { "epoch": 0.46780834840900765, "grad_norm": 1.0813826322555542, "learning_rate": 1.1009445197691237e-05, "loss": 0.4791, "step": 21075 }, { "epoch": 0.4679193349685353, "grad_norm": 0.7604579329490662, "learning_rate": 1.1005976162397309e-05, "loss": 0.3741, "step": 21080 }, { "epoch": 0.46803032152806295, "grad_norm": 1.3884882926940918, "learning_rate": 1.1002507004800174e-05, "loss": 0.5016, "step": 21085 }, { "epoch": 0.4681413080875906, "grad_norm": 1.620674967765808, "learning_rate": 1.09990377253216e-05, "loss": 0.5009, "step": 21090 }, { "epoch": 0.46825229464711826, "grad_norm": 0.7146098017692566, "learning_rate": 1.099556832438337e-05, "loss": 0.4999, "step": 21095 }, { "epoch": 0.4683632812066459, "grad_norm": 0.9891538023948669, "learning_rate": 1.0992098802407281e-05, "loss": 0.4181, "step": 21100 }, { "epoch": 0.4684742677661735, "grad_norm": 1.9593673944473267, "learning_rate": 1.0988629159815147e-05, "loss": 0.4987, "step": 21105 }, { "epoch": 0.46858525432570114, "grad_norm": 0.9993635416030884, "learning_rate": 1.0985159397028795e-05, "loss": 0.2836, "step": 21110 }, { "epoch": 0.4686962408852288, "grad_norm": 0.8817034959793091, "learning_rate": 1.0981689514470069e-05, "loss": 0.4039, "step": 21115 }, { "epoch": 0.46880722744475645, "grad_norm": 1.5623383522033691, "learning_rate": 1.0978219512560824e-05, "loss": 0.3283, "step": 21120 }, { "epoch": 0.4689182140042841, "grad_norm": 1.112033724784851, "learning_rate": 1.097474939172293e-05, "loss": 0.3188, "step": 21125 }, { "epoch": 0.4690292005638117, "grad_norm": 0.9545212984085083, "learning_rate": 1.0971279152378271e-05, "loss": 0.4523, "step": 21130 }, { "epoch": 0.4691401871233394, "grad_norm": 1.1092740297317505, "learning_rate": 1.0967808794948756e-05, "loss": 0.36, "step": 21135 }, { "epoch": 0.469251173682867, "grad_norm": 1.4138292074203491, "learning_rate": 1.0964338319856288e-05, "loss": 0.3877, "step": 21140 }, { "epoch": 0.46936216024239463, "grad_norm": 1.7056288719177246, "learning_rate": 1.0960867727522806e-05, "loss": 0.4093, "step": 21145 }, { "epoch": 0.4694731468019223, "grad_norm": 0.9393314123153687, "learning_rate": 1.0957397018370247e-05, "loss": 0.4628, "step": 21150 }, { "epoch": 0.46958413336144994, "grad_norm": 0.9642075300216675, "learning_rate": 1.0953926192820565e-05, "loss": 0.5429, "step": 21155 }, { "epoch": 0.46969511992097757, "grad_norm": 1.1112103462219238, "learning_rate": 1.0950455251295739e-05, "loss": 0.4087, "step": 21160 }, { "epoch": 0.4698061064805052, "grad_norm": 1.0092273950576782, "learning_rate": 1.0946984194217753e-05, "loss": 0.4188, "step": 21165 }, { "epoch": 0.4699170930400329, "grad_norm": 1.1835898160934448, "learning_rate": 1.0943513022008603e-05, "loss": 0.4485, "step": 21170 }, { "epoch": 0.4700280795995605, "grad_norm": 1.524971604347229, "learning_rate": 1.0940041735090305e-05, "loss": 0.4301, "step": 21175 }, { "epoch": 0.4701390661590881, "grad_norm": 1.383873701095581, "learning_rate": 1.0936570333884885e-05, "loss": 0.3277, "step": 21180 }, { "epoch": 0.47025005271861575, "grad_norm": 1.8832981586456299, "learning_rate": 1.0933098818814383e-05, "loss": 0.4364, "step": 21185 }, { "epoch": 0.47036103927814343, "grad_norm": 1.0231654644012451, "learning_rate": 1.0929627190300858e-05, "loss": 0.4135, "step": 21190 }, { "epoch": 0.47047202583767106, "grad_norm": 1.4642794132232666, "learning_rate": 1.0926155448766376e-05, "loss": 0.3931, "step": 21195 }, { "epoch": 0.4705830123971987, "grad_norm": 0.6988185048103333, "learning_rate": 1.092268359463302e-05, "loss": 0.3946, "step": 21200 }, { "epoch": 0.47069399895672637, "grad_norm": 1.163567304611206, "learning_rate": 1.091921162832289e-05, "loss": 0.3978, "step": 21205 }, { "epoch": 0.470804985516254, "grad_norm": 1.002905249595642, "learning_rate": 1.0915739550258091e-05, "loss": 0.3521, "step": 21210 }, { "epoch": 0.4709159720757816, "grad_norm": 0.9412310719490051, "learning_rate": 1.0912267360860747e-05, "loss": 0.5536, "step": 21215 }, { "epoch": 0.47102695863530925, "grad_norm": 1.228294014930725, "learning_rate": 1.0908795060552997e-05, "loss": 0.3347, "step": 21220 }, { "epoch": 0.4711379451948369, "grad_norm": 1.4175302982330322, "learning_rate": 1.0905322649756992e-05, "loss": 0.4958, "step": 21225 }, { "epoch": 0.47124893175436455, "grad_norm": 0.6760334968566895, "learning_rate": 1.0901850128894899e-05, "loss": 0.4364, "step": 21230 }, { "epoch": 0.4713599183138922, "grad_norm": 1.2598497867584229, "learning_rate": 1.089837749838889e-05, "loss": 0.4319, "step": 21235 }, { "epoch": 0.4714709048734198, "grad_norm": 1.8593010902404785, "learning_rate": 1.0894904758661155e-05, "loss": 0.3682, "step": 21240 }, { "epoch": 0.4715818914329475, "grad_norm": 1.5405446290969849, "learning_rate": 1.0891431910133905e-05, "loss": 0.4079, "step": 21245 }, { "epoch": 0.4716928779924751, "grad_norm": 1.7913031578063965, "learning_rate": 1.0887958953229349e-05, "loss": 0.4243, "step": 21250 }, { "epoch": 0.47180386455200274, "grad_norm": 0.7782161831855774, "learning_rate": 1.0884485888369725e-05, "loss": 0.3811, "step": 21255 }, { "epoch": 0.4719148511115304, "grad_norm": 0.8864994049072266, "learning_rate": 1.0881012715977271e-05, "loss": 0.5465, "step": 21260 }, { "epoch": 0.47202583767105805, "grad_norm": 1.6385595798492432, "learning_rate": 1.0877539436474245e-05, "loss": 0.3958, "step": 21265 }, { "epoch": 0.47213682423058567, "grad_norm": 0.8608049750328064, "learning_rate": 1.0874066050282917e-05, "loss": 0.4109, "step": 21270 }, { "epoch": 0.4722478107901133, "grad_norm": 1.0551360845565796, "learning_rate": 1.087059255782557e-05, "loss": 0.6298, "step": 21275 }, { "epoch": 0.472358797349641, "grad_norm": 1.1309964656829834, "learning_rate": 1.08671189595245e-05, "loss": 0.3551, "step": 21280 }, { "epoch": 0.4724697839091686, "grad_norm": 1.741068959236145, "learning_rate": 1.0863645255802017e-05, "loss": 0.4292, "step": 21285 }, { "epoch": 0.47258077046869623, "grad_norm": 1.0462664365768433, "learning_rate": 1.086017144708044e-05, "loss": 0.5168, "step": 21290 }, { "epoch": 0.47269175702822386, "grad_norm": 1.3243409395217896, "learning_rate": 1.0856697533782102e-05, "loss": 0.4631, "step": 21295 }, { "epoch": 0.47280274358775154, "grad_norm": 1.2467161417007446, "learning_rate": 1.085322351632935e-05, "loss": 0.4451, "step": 21300 }, { "epoch": 0.47291373014727917, "grad_norm": 1.0557029247283936, "learning_rate": 1.0849749395144544e-05, "loss": 0.3586, "step": 21305 }, { "epoch": 0.4730247167068068, "grad_norm": 0.7982147336006165, "learning_rate": 1.084627517065006e-05, "loss": 0.3789, "step": 21310 }, { "epoch": 0.4731357032663345, "grad_norm": 1.3924659490585327, "learning_rate": 1.0842800843268274e-05, "loss": 0.4243, "step": 21315 }, { "epoch": 0.4732466898258621, "grad_norm": 0.9715703725814819, "learning_rate": 1.0839326413421593e-05, "loss": 0.3626, "step": 21320 }, { "epoch": 0.4733576763853897, "grad_norm": 1.034337043762207, "learning_rate": 1.0835851881532418e-05, "loss": 0.4058, "step": 21325 }, { "epoch": 0.47346866294491735, "grad_norm": 1.2233444452285767, "learning_rate": 1.0832377248023175e-05, "loss": 0.3141, "step": 21330 }, { "epoch": 0.47357964950444503, "grad_norm": 1.4177346229553223, "learning_rate": 1.0828902513316299e-05, "loss": 0.4295, "step": 21335 }, { "epoch": 0.47369063606397266, "grad_norm": 1.0765271186828613, "learning_rate": 1.0825427677834235e-05, "loss": 0.3157, "step": 21340 }, { "epoch": 0.4738016226235003, "grad_norm": 0.633413553237915, "learning_rate": 1.0821952741999443e-05, "loss": 0.3753, "step": 21345 }, { "epoch": 0.4739126091830279, "grad_norm": 2.0716490745544434, "learning_rate": 1.0818477706234394e-05, "loss": 0.4344, "step": 21350 }, { "epoch": 0.4740235957425556, "grad_norm": 0.8376625180244446, "learning_rate": 1.0815002570961568e-05, "loss": 0.5318, "step": 21355 }, { "epoch": 0.4741345823020832, "grad_norm": 0.8807407021522522, "learning_rate": 1.0811527336603465e-05, "loss": 0.3093, "step": 21360 }, { "epoch": 0.47424556886161084, "grad_norm": 1.2454125881195068, "learning_rate": 1.0808052003582588e-05, "loss": 0.4386, "step": 21365 }, { "epoch": 0.4743565554211385, "grad_norm": 1.1970107555389404, "learning_rate": 1.080457657232146e-05, "loss": 0.2683, "step": 21370 }, { "epoch": 0.47446754198066615, "grad_norm": 0.8811781406402588, "learning_rate": 1.0801101043242607e-05, "loss": 0.377, "step": 21375 }, { "epoch": 0.4745785285401938, "grad_norm": 1.0804111957550049, "learning_rate": 1.079762541676858e-05, "loss": 0.4174, "step": 21380 }, { "epoch": 0.4746895150997214, "grad_norm": 1.7538986206054688, "learning_rate": 1.0794149693321927e-05, "loss": 0.5051, "step": 21385 }, { "epoch": 0.4748005016592491, "grad_norm": 1.358725905418396, "learning_rate": 1.0790673873325219e-05, "loss": 0.659, "step": 21390 }, { "epoch": 0.4749114882187767, "grad_norm": 1.6165276765823364, "learning_rate": 1.0787197957201035e-05, "loss": 0.306, "step": 21395 }, { "epoch": 0.47502247477830434, "grad_norm": 1.8659031391143799, "learning_rate": 1.0783721945371962e-05, "loss": 0.4602, "step": 21400 }, { "epoch": 0.47513346133783196, "grad_norm": 1.331539511680603, "learning_rate": 1.0780245838260602e-05, "loss": 0.3587, "step": 21405 }, { "epoch": 0.47524444789735965, "grad_norm": 1.6114097833633423, "learning_rate": 1.0776769636289568e-05, "loss": 0.3664, "step": 21410 }, { "epoch": 0.47535543445688727, "grad_norm": 1.5674644708633423, "learning_rate": 1.077329333988149e-05, "loss": 0.3443, "step": 21415 }, { "epoch": 0.4754664210164149, "grad_norm": 1.9007574319839478, "learning_rate": 1.0769816949459002e-05, "loss": 0.2939, "step": 21420 }, { "epoch": 0.4755774075759426, "grad_norm": 1.640196681022644, "learning_rate": 1.0766340465444749e-05, "loss": 0.3501, "step": 21425 }, { "epoch": 0.4756883941354702, "grad_norm": 1.8425315618515015, "learning_rate": 1.0762863888261392e-05, "loss": 0.4537, "step": 21430 }, { "epoch": 0.47579938069499783, "grad_norm": 1.562214970588684, "learning_rate": 1.0759387218331606e-05, "loss": 0.3477, "step": 21435 }, { "epoch": 0.47591036725452546, "grad_norm": 0.9589049816131592, "learning_rate": 1.0755910456078062e-05, "loss": 0.3283, "step": 21440 }, { "epoch": 0.47602135381405314, "grad_norm": 1.2559269666671753, "learning_rate": 1.0752433601923465e-05, "loss": 0.3608, "step": 21445 }, { "epoch": 0.47613234037358076, "grad_norm": 1.0764013528823853, "learning_rate": 1.0748956656290512e-05, "loss": 0.5934, "step": 21450 }, { "epoch": 0.4762433269331084, "grad_norm": 1.1700280904769897, "learning_rate": 1.0745479619601923e-05, "loss": 0.3546, "step": 21455 }, { "epoch": 0.476354313492636, "grad_norm": 1.494066834449768, "learning_rate": 1.074200249228042e-05, "loss": 0.476, "step": 21460 }, { "epoch": 0.4764653000521637, "grad_norm": 1.316925287246704, "learning_rate": 1.073852527474874e-05, "loss": 0.3571, "step": 21465 }, { "epoch": 0.4765762866116913, "grad_norm": 1.3482223749160767, "learning_rate": 1.0735047967429638e-05, "loss": 0.3626, "step": 21470 }, { "epoch": 0.47668727317121895, "grad_norm": 0.9858172535896301, "learning_rate": 1.0731570570745869e-05, "loss": 0.4228, "step": 21475 }, { "epoch": 0.47679825973074663, "grad_norm": 1.263492226600647, "learning_rate": 1.0728093085120202e-05, "loss": 0.3674, "step": 21480 }, { "epoch": 0.47690924629027426, "grad_norm": 1.7882106304168701, "learning_rate": 1.072461551097542e-05, "loss": 0.291, "step": 21485 }, { "epoch": 0.4770202328498019, "grad_norm": 1.1271024942398071, "learning_rate": 1.0721137848734316e-05, "loss": 0.483, "step": 21490 }, { "epoch": 0.4771312194093295, "grad_norm": 1.754003643989563, "learning_rate": 1.0717660098819685e-05, "loss": 0.4653, "step": 21495 }, { "epoch": 0.4772422059688572, "grad_norm": 1.011060357093811, "learning_rate": 1.0714182261654351e-05, "loss": 0.4512, "step": 21500 }, { "epoch": 0.4773531925283848, "grad_norm": 1.6048130989074707, "learning_rate": 1.0710704337661131e-05, "loss": 0.2906, "step": 21505 }, { "epoch": 0.47746417908791244, "grad_norm": 0.7402877807617188, "learning_rate": 1.0707226327262862e-05, "loss": 0.516, "step": 21510 }, { "epoch": 0.47757516564744007, "grad_norm": 1.0845234394073486, "learning_rate": 1.070374823088239e-05, "loss": 0.2873, "step": 21515 }, { "epoch": 0.47768615220696775, "grad_norm": 1.0958060026168823, "learning_rate": 1.0700270048942568e-05, "loss": 0.2853, "step": 21520 }, { "epoch": 0.4777971387664954, "grad_norm": 1.5488417148590088, "learning_rate": 1.0696791781866255e-05, "loss": 0.459, "step": 21525 }, { "epoch": 0.477908125326023, "grad_norm": 1.8981561660766602, "learning_rate": 1.0693313430076343e-05, "loss": 0.6263, "step": 21530 }, { "epoch": 0.4780191118855507, "grad_norm": 1.2021952867507935, "learning_rate": 1.0689834993995705e-05, "loss": 0.4644, "step": 21535 }, { "epoch": 0.4781300984450783, "grad_norm": 1.1099729537963867, "learning_rate": 1.0686356474047242e-05, "loss": 0.4083, "step": 21540 }, { "epoch": 0.47824108500460594, "grad_norm": 1.1983321905136108, "learning_rate": 1.068287787065386e-05, "loss": 0.4784, "step": 21545 }, { "epoch": 0.47835207156413356, "grad_norm": 1.0222690105438232, "learning_rate": 1.0679399184238477e-05, "loss": 0.4406, "step": 21550 }, { "epoch": 0.47846305812366124, "grad_norm": 1.0962550640106201, "learning_rate": 1.0675920415224021e-05, "loss": 0.4584, "step": 21555 }, { "epoch": 0.47857404468318887, "grad_norm": 1.4673035144805908, "learning_rate": 1.0672441564033429e-05, "loss": 0.3869, "step": 21560 }, { "epoch": 0.4786850312427165, "grad_norm": 1.1756876707077026, "learning_rate": 1.0668962631089646e-05, "loss": 0.4112, "step": 21565 }, { "epoch": 0.4787960178022441, "grad_norm": 1.4724787473678589, "learning_rate": 1.0665483616815628e-05, "loss": 0.4768, "step": 21570 }, { "epoch": 0.4789070043617718, "grad_norm": 1.4865951538085938, "learning_rate": 1.0662004521634346e-05, "loss": 0.2881, "step": 21575 }, { "epoch": 0.47901799092129943, "grad_norm": 2.133817434310913, "learning_rate": 1.065852534596877e-05, "loss": 0.4258, "step": 21580 }, { "epoch": 0.47912897748082706, "grad_norm": 1.590226411819458, "learning_rate": 1.0655046090241895e-05, "loss": 0.4986, "step": 21585 }, { "epoch": 0.47923996404035474, "grad_norm": 1.794185996055603, "learning_rate": 1.0651566754876715e-05, "loss": 0.5605, "step": 21590 }, { "epoch": 0.47935095059988236, "grad_norm": 1.2741948366165161, "learning_rate": 1.0648087340296232e-05, "loss": 0.508, "step": 21595 }, { "epoch": 0.47946193715941, "grad_norm": 2.1430916786193848, "learning_rate": 1.0644607846923462e-05, "loss": 0.3775, "step": 21600 }, { "epoch": 0.4795729237189376, "grad_norm": 1.19858980178833, "learning_rate": 1.0641128275181433e-05, "loss": 0.4721, "step": 21605 }, { "epoch": 0.4796839102784653, "grad_norm": 1.104694128036499, "learning_rate": 1.063764862549318e-05, "loss": 0.4713, "step": 21610 }, { "epoch": 0.4797948968379929, "grad_norm": 1.2688933610916138, "learning_rate": 1.0634168898281745e-05, "loss": 0.3581, "step": 21615 }, { "epoch": 0.47990588339752055, "grad_norm": 0.7898721098899841, "learning_rate": 1.0630689093970182e-05, "loss": 0.5328, "step": 21620 }, { "epoch": 0.4800168699570482, "grad_norm": 1.2295295000076294, "learning_rate": 1.0627209212981552e-05, "loss": 0.4056, "step": 21625 }, { "epoch": 0.48012785651657586, "grad_norm": 1.2840009927749634, "learning_rate": 1.0623729255738932e-05, "loss": 0.4107, "step": 21630 }, { "epoch": 0.4802388430761035, "grad_norm": 0.8035621643066406, "learning_rate": 1.0620249222665398e-05, "loss": 0.3382, "step": 21635 }, { "epoch": 0.4803498296356311, "grad_norm": 1.6547472476959229, "learning_rate": 1.0616769114184044e-05, "loss": 0.3411, "step": 21640 }, { "epoch": 0.4804608161951588, "grad_norm": 0.7649518251419067, "learning_rate": 1.0613288930717974e-05, "loss": 0.4424, "step": 21645 }, { "epoch": 0.4805718027546864, "grad_norm": 1.1245989799499512, "learning_rate": 1.060980867269029e-05, "loss": 0.5485, "step": 21650 }, { "epoch": 0.48068278931421404, "grad_norm": 0.8971288800239563, "learning_rate": 1.0606328340524113e-05, "loss": 0.4177, "step": 21655 }, { "epoch": 0.48079377587374167, "grad_norm": 1.5661104917526245, "learning_rate": 1.0602847934642568e-05, "loss": 0.3255, "step": 21660 }, { "epoch": 0.48090476243326935, "grad_norm": 0.85600745677948, "learning_rate": 1.0599367455468793e-05, "loss": 0.4899, "step": 21665 }, { "epoch": 0.481015748992797, "grad_norm": 0.8572877049446106, "learning_rate": 1.0595886903425934e-05, "loss": 0.4959, "step": 21670 }, { "epoch": 0.4811267355523246, "grad_norm": 1.0525004863739014, "learning_rate": 1.0592406278937143e-05, "loss": 0.392, "step": 21675 }, { "epoch": 0.48123772211185223, "grad_norm": 1.3853223323822021, "learning_rate": 1.0588925582425585e-05, "loss": 0.3902, "step": 21680 }, { "epoch": 0.4813487086713799, "grad_norm": 1.3365157842636108, "learning_rate": 1.058544481431443e-05, "loss": 0.433, "step": 21685 }, { "epoch": 0.48145969523090754, "grad_norm": 1.196568489074707, "learning_rate": 1.0581963975026856e-05, "loss": 0.4933, "step": 21690 }, { "epoch": 0.48157068179043516, "grad_norm": 1.1724721193313599, "learning_rate": 1.0578483064986054e-05, "loss": 0.5655, "step": 21695 }, { "epoch": 0.48168166834996284, "grad_norm": 1.0338935852050781, "learning_rate": 1.057500208461522e-05, "loss": 0.4333, "step": 21700 }, { "epoch": 0.48179265490949047, "grad_norm": 1.549454689025879, "learning_rate": 1.0571521034337565e-05, "loss": 0.6056, "step": 21705 }, { "epoch": 0.4819036414690181, "grad_norm": 1.3995952606201172, "learning_rate": 1.0568039914576296e-05, "loss": 0.5526, "step": 21710 }, { "epoch": 0.4820146280285457, "grad_norm": 1.5786106586456299, "learning_rate": 1.0564558725754642e-05, "loss": 0.5402, "step": 21715 }, { "epoch": 0.4821256145880734, "grad_norm": 1.1411170959472656, "learning_rate": 1.0561077468295828e-05, "loss": 0.3592, "step": 21720 }, { "epoch": 0.48223660114760103, "grad_norm": 1.2534387111663818, "learning_rate": 1.0557596142623099e-05, "loss": 0.4262, "step": 21725 }, { "epoch": 0.48234758770712866, "grad_norm": 1.2995420694351196, "learning_rate": 1.05541147491597e-05, "loss": 0.3863, "step": 21730 }, { "epoch": 0.4824585742666563, "grad_norm": 1.1867161989212036, "learning_rate": 1.0550633288328891e-05, "loss": 0.393, "step": 21735 }, { "epoch": 0.48256956082618396, "grad_norm": 1.3839069604873657, "learning_rate": 1.0547151760553932e-05, "loss": 0.5031, "step": 21740 }, { "epoch": 0.4826805473857116, "grad_norm": 1.813341498374939, "learning_rate": 1.0543670166258095e-05, "loss": 0.3658, "step": 21745 }, { "epoch": 0.4827915339452392, "grad_norm": 0.6340671181678772, "learning_rate": 1.0540188505864664e-05, "loss": 0.4365, "step": 21750 }, { "epoch": 0.4829025205047669, "grad_norm": 1.045729398727417, "learning_rate": 1.0536706779796925e-05, "loss": 0.4977, "step": 21755 }, { "epoch": 0.4830135070642945, "grad_norm": 0.9235963225364685, "learning_rate": 1.0533224988478176e-05, "loss": 0.5887, "step": 21760 }, { "epoch": 0.48312449362382215, "grad_norm": 1.3279688358306885, "learning_rate": 1.0529743132331725e-05, "loss": 0.3984, "step": 21765 }, { "epoch": 0.4832354801833498, "grad_norm": 1.1080505847930908, "learning_rate": 1.0526261211780877e-05, "loss": 0.3861, "step": 21770 }, { "epoch": 0.48334646674287746, "grad_norm": 1.468857765197754, "learning_rate": 1.0522779227248956e-05, "loss": 0.5409, "step": 21775 }, { "epoch": 0.4834574533024051, "grad_norm": 1.0246763229370117, "learning_rate": 1.0519297179159289e-05, "loss": 0.3904, "step": 21780 }, { "epoch": 0.4835684398619327, "grad_norm": 0.8548372387886047, "learning_rate": 1.0515815067935213e-05, "loss": 0.4115, "step": 21785 }, { "epoch": 0.48367942642146033, "grad_norm": 3.396918535232544, "learning_rate": 1.0512332894000072e-05, "loss": 0.4362, "step": 21790 }, { "epoch": 0.483790412980988, "grad_norm": 0.9646040797233582, "learning_rate": 1.0508850657777217e-05, "loss": 0.4026, "step": 21795 }, { "epoch": 0.48390139954051564, "grad_norm": 1.2373651266098022, "learning_rate": 1.0505368359690002e-05, "loss": 0.3865, "step": 21800 }, { "epoch": 0.48401238610004327, "grad_norm": 1.5203875303268433, "learning_rate": 1.0501886000161799e-05, "loss": 0.611, "step": 21805 }, { "epoch": 0.48412337265957095, "grad_norm": 0.7656844854354858, "learning_rate": 1.0498403579615978e-05, "loss": 0.5243, "step": 21810 }, { "epoch": 0.4842343592190986, "grad_norm": 1.5859229564666748, "learning_rate": 1.0494921098475923e-05, "loss": 0.4365, "step": 21815 }, { "epoch": 0.4843453457786262, "grad_norm": 1.1974952220916748, "learning_rate": 1.0491438557165022e-05, "loss": 0.3849, "step": 21820 }, { "epoch": 0.48445633233815383, "grad_norm": 1.2102409601211548, "learning_rate": 1.0487955956106666e-05, "loss": 0.4075, "step": 21825 }, { "epoch": 0.4845673188976815, "grad_norm": 1.5311321020126343, "learning_rate": 1.0484473295724264e-05, "loss": 0.4312, "step": 21830 }, { "epoch": 0.48467830545720914, "grad_norm": 1.7898871898651123, "learning_rate": 1.0480990576441223e-05, "loss": 0.4472, "step": 21835 }, { "epoch": 0.48478929201673676, "grad_norm": 1.4970860481262207, "learning_rate": 1.047750779868096e-05, "loss": 0.3975, "step": 21840 }, { "epoch": 0.4849002785762644, "grad_norm": 1.2085856199264526, "learning_rate": 1.04740249628669e-05, "loss": 0.3633, "step": 21845 }, { "epoch": 0.48501126513579207, "grad_norm": 1.2464152574539185, "learning_rate": 1.0470542069422475e-05, "loss": 0.4639, "step": 21850 }, { "epoch": 0.4851222516953197, "grad_norm": 0.94156414270401, "learning_rate": 1.0467059118771126e-05, "loss": 0.363, "step": 21855 }, { "epoch": 0.4852332382548473, "grad_norm": 1.0619620084762573, "learning_rate": 1.0463576111336293e-05, "loss": 0.3719, "step": 21860 }, { "epoch": 0.485344224814375, "grad_norm": 1.4810116291046143, "learning_rate": 1.046009304754143e-05, "loss": 0.27, "step": 21865 }, { "epoch": 0.48545521137390263, "grad_norm": 1.1808918714523315, "learning_rate": 1.0456609927809997e-05, "loss": 0.361, "step": 21870 }, { "epoch": 0.48556619793343025, "grad_norm": 0.9369193315505981, "learning_rate": 1.0453126752565463e-05, "loss": 0.4352, "step": 21875 }, { "epoch": 0.4856771844929579, "grad_norm": 1.1464390754699707, "learning_rate": 1.0449643522231296e-05, "loss": 0.5644, "step": 21880 }, { "epoch": 0.48578817105248556, "grad_norm": 1.6998164653778076, "learning_rate": 1.044616023723098e-05, "loss": 0.5686, "step": 21885 }, { "epoch": 0.4858991576120132, "grad_norm": 0.9991883635520935, "learning_rate": 1.0442676897987995e-05, "loss": 0.3721, "step": 21890 }, { "epoch": 0.4860101441715408, "grad_norm": 1.3870561122894287, "learning_rate": 1.043919350492584e-05, "loss": 0.3989, "step": 21895 }, { "epoch": 0.48612113073106844, "grad_norm": 0.7130857110023499, "learning_rate": 1.0435710058468011e-05, "loss": 0.5284, "step": 21900 }, { "epoch": 0.4862321172905961, "grad_norm": 1.0134477615356445, "learning_rate": 1.0432226559038018e-05, "loss": 0.3538, "step": 21905 }, { "epoch": 0.48634310385012375, "grad_norm": 1.3501771688461304, "learning_rate": 1.0428743007059366e-05, "loss": 0.3259, "step": 21910 }, { "epoch": 0.4864540904096514, "grad_norm": 1.179742455482483, "learning_rate": 1.0425259402955577e-05, "loss": 0.4111, "step": 21915 }, { "epoch": 0.48656507696917906, "grad_norm": 0.9390612244606018, "learning_rate": 1.0421775747150174e-05, "loss": 0.3326, "step": 21920 }, { "epoch": 0.4866760635287067, "grad_norm": 1.1103237867355347, "learning_rate": 1.0418292040066693e-05, "loss": 0.4458, "step": 21925 }, { "epoch": 0.4867870500882343, "grad_norm": 1.2311724424362183, "learning_rate": 1.0414808282128668e-05, "loss": 0.3293, "step": 21930 }, { "epoch": 0.48689803664776193, "grad_norm": 1.5785497426986694, "learning_rate": 1.0411324473759643e-05, "loss": 0.4245, "step": 21935 }, { "epoch": 0.4870090232072896, "grad_norm": 1.5965907573699951, "learning_rate": 1.0407840615383168e-05, "loss": 0.3961, "step": 21940 }, { "epoch": 0.48712000976681724, "grad_norm": 1.7988016605377197, "learning_rate": 1.04043567074228e-05, "loss": 0.4447, "step": 21945 }, { "epoch": 0.48723099632634487, "grad_norm": 0.9706549048423767, "learning_rate": 1.0400872750302095e-05, "loss": 0.4787, "step": 21950 }, { "epoch": 0.4873419828858725, "grad_norm": 1.2334511280059814, "learning_rate": 1.039738874444463e-05, "loss": 0.4045, "step": 21955 }, { "epoch": 0.4874529694454002, "grad_norm": 1.216166377067566, "learning_rate": 1.0393904690273974e-05, "loss": 0.2541, "step": 21960 }, { "epoch": 0.4875639560049278, "grad_norm": 0.8982766270637512, "learning_rate": 1.0390420588213708e-05, "loss": 0.4672, "step": 21965 }, { "epoch": 0.4876749425644554, "grad_norm": 1.6147069931030273, "learning_rate": 1.0386936438687414e-05, "loss": 0.4673, "step": 21970 }, { "epoch": 0.4877859291239831, "grad_norm": 1.2869994640350342, "learning_rate": 1.0383452242118686e-05, "loss": 0.4443, "step": 21975 }, { "epoch": 0.48789691568351073, "grad_norm": 1.865621566772461, "learning_rate": 1.037996799893112e-05, "loss": 0.3578, "step": 21980 }, { "epoch": 0.48800790224303836, "grad_norm": 1.2217044830322266, "learning_rate": 1.0376483709548319e-05, "loss": 0.4546, "step": 21985 }, { "epoch": 0.488118888802566, "grad_norm": 1.1836082935333252, "learning_rate": 1.0372999374393893e-05, "loss": 0.4257, "step": 21990 }, { "epoch": 0.48822987536209367, "grad_norm": 0.815844714641571, "learning_rate": 1.0369514993891451e-05, "loss": 0.372, "step": 21995 }, { "epoch": 0.4883408619216213, "grad_norm": 1.2024153470993042, "learning_rate": 1.0366030568464618e-05, "loss": 0.4395, "step": 22000 }, { "epoch": 0.4884518484811489, "grad_norm": 1.3669815063476562, "learning_rate": 1.0362546098537012e-05, "loss": 0.442, "step": 22005 }, { "epoch": 0.48856283504067655, "grad_norm": 0.7903822064399719, "learning_rate": 1.035906158453227e-05, "loss": 0.3806, "step": 22010 }, { "epoch": 0.48867382160020423, "grad_norm": 1.359349012374878, "learning_rate": 1.0355577026874026e-05, "loss": 0.4904, "step": 22015 }, { "epoch": 0.48878480815973185, "grad_norm": 1.09807288646698, "learning_rate": 1.0352092425985919e-05, "loss": 0.5047, "step": 22020 }, { "epoch": 0.4888957947192595, "grad_norm": 1.374467134475708, "learning_rate": 1.0348607782291595e-05, "loss": 0.4666, "step": 22025 }, { "epoch": 0.48900678127878716, "grad_norm": 0.9287629127502441, "learning_rate": 1.0345123096214706e-05, "loss": 0.3533, "step": 22030 }, { "epoch": 0.4891177678383148, "grad_norm": 1.2469364404678345, "learning_rate": 1.0341638368178905e-05, "loss": 0.4242, "step": 22035 }, { "epoch": 0.4892287543978424, "grad_norm": 1.1202641725540161, "learning_rate": 1.0338153598607862e-05, "loss": 0.4986, "step": 22040 }, { "epoch": 0.48933974095737004, "grad_norm": 1.2364095449447632, "learning_rate": 1.0334668787925237e-05, "loss": 0.4598, "step": 22045 }, { "epoch": 0.4894507275168977, "grad_norm": 1.5487759113311768, "learning_rate": 1.0331183936554703e-05, "loss": 0.4866, "step": 22050 }, { "epoch": 0.48956171407642535, "grad_norm": 1.3600040674209595, "learning_rate": 1.0327699044919936e-05, "loss": 0.4836, "step": 22055 }, { "epoch": 0.489672700635953, "grad_norm": 1.1588906049728394, "learning_rate": 1.0324214113444619e-05, "loss": 0.5456, "step": 22060 }, { "epoch": 0.48978368719548065, "grad_norm": 1.1482707262039185, "learning_rate": 1.0320729142552437e-05, "loss": 0.331, "step": 22065 }, { "epoch": 0.4898946737550083, "grad_norm": 1.0912998914718628, "learning_rate": 1.0317244132667081e-05, "loss": 0.3431, "step": 22070 }, { "epoch": 0.4900056603145359, "grad_norm": 1.4887139797210693, "learning_rate": 1.031375908421225e-05, "loss": 0.5176, "step": 22075 }, { "epoch": 0.49011664687406353, "grad_norm": 1.220666527748108, "learning_rate": 1.0310273997611639e-05, "loss": 0.4706, "step": 22080 }, { "epoch": 0.4902276334335912, "grad_norm": 1.1723802089691162, "learning_rate": 1.0306788873288958e-05, "loss": 0.3419, "step": 22085 }, { "epoch": 0.49033861999311884, "grad_norm": 0.8831339478492737, "learning_rate": 1.0303303711667914e-05, "loss": 0.1974, "step": 22090 }, { "epoch": 0.49044960655264647, "grad_norm": 0.974607527256012, "learning_rate": 1.0299818513172221e-05, "loss": 0.4939, "step": 22095 }, { "epoch": 0.4905605931121741, "grad_norm": 1.4574120044708252, "learning_rate": 1.0296333278225599e-05, "loss": 0.4942, "step": 22100 }, { "epoch": 0.4906715796717018, "grad_norm": 1.623812198638916, "learning_rate": 1.0292848007251773e-05, "loss": 0.3821, "step": 22105 }, { "epoch": 0.4907825662312294, "grad_norm": 1.2309707403182983, "learning_rate": 1.0289362700674466e-05, "loss": 0.473, "step": 22110 }, { "epoch": 0.490893552790757, "grad_norm": 1.5308640003204346, "learning_rate": 1.0285877358917414e-05, "loss": 0.3446, "step": 22115 }, { "epoch": 0.4910045393502847, "grad_norm": 1.2372568845748901, "learning_rate": 1.0282391982404347e-05, "loss": 0.5556, "step": 22120 }, { "epoch": 0.49111552590981233, "grad_norm": 1.0952364206314087, "learning_rate": 1.0278906571559012e-05, "loss": 0.452, "step": 22125 }, { "epoch": 0.49122651246933996, "grad_norm": 1.801071286201477, "learning_rate": 1.0275421126805154e-05, "loss": 0.4498, "step": 22130 }, { "epoch": 0.4913374990288676, "grad_norm": 1.126573085784912, "learning_rate": 1.0271935648566517e-05, "loss": 0.2875, "step": 22135 }, { "epoch": 0.49144848558839527, "grad_norm": 0.9358459711074829, "learning_rate": 1.0268450137266856e-05, "loss": 0.4407, "step": 22140 }, { "epoch": 0.4915594721479229, "grad_norm": 0.8276937007904053, "learning_rate": 1.0264964593329928e-05, "loss": 0.4608, "step": 22145 }, { "epoch": 0.4916704587074505, "grad_norm": 0.99130779504776, "learning_rate": 1.0261479017179494e-05, "loss": 0.466, "step": 22150 }, { "epoch": 0.49178144526697815, "grad_norm": 1.2398957014083862, "learning_rate": 1.0257993409239318e-05, "loss": 0.3074, "step": 22155 }, { "epoch": 0.4918924318265058, "grad_norm": 1.6642937660217285, "learning_rate": 1.0254507769933166e-05, "loss": 0.3289, "step": 22160 }, { "epoch": 0.49200341838603345, "grad_norm": 0.7789919376373291, "learning_rate": 1.0251022099684815e-05, "loss": 0.5229, "step": 22165 }, { "epoch": 0.4921144049455611, "grad_norm": 2.206631660461426, "learning_rate": 1.024753639891804e-05, "loss": 0.4176, "step": 22170 }, { "epoch": 0.49222539150508876, "grad_norm": 1.417659044265747, "learning_rate": 1.0244050668056617e-05, "loss": 0.3517, "step": 22175 }, { "epoch": 0.4923363780646164, "grad_norm": 1.23202645778656, "learning_rate": 1.0240564907524335e-05, "loss": 0.5035, "step": 22180 }, { "epoch": 0.492447364624144, "grad_norm": 1.1205260753631592, "learning_rate": 1.0237079117744977e-05, "loss": 0.4148, "step": 22185 }, { "epoch": 0.49255835118367164, "grad_norm": 0.7706506848335266, "learning_rate": 1.0233593299142336e-05, "loss": 0.3719, "step": 22190 }, { "epoch": 0.4926693377431993, "grad_norm": 1.1921621561050415, "learning_rate": 1.0230107452140203e-05, "loss": 0.4742, "step": 22195 }, { "epoch": 0.49278032430272695, "grad_norm": 1.1580519676208496, "learning_rate": 1.0226621577162377e-05, "loss": 0.4158, "step": 22200 }, { "epoch": 0.4928913108622546, "grad_norm": 1.3027304410934448, "learning_rate": 1.0223135674632663e-05, "loss": 0.4033, "step": 22205 }, { "epoch": 0.4930022974217822, "grad_norm": 1.076047420501709, "learning_rate": 1.021964974497486e-05, "loss": 0.5984, "step": 22210 }, { "epoch": 0.4931132839813099, "grad_norm": 1.8300280570983887, "learning_rate": 1.021616378861278e-05, "loss": 0.3093, "step": 22215 }, { "epoch": 0.4932242705408375, "grad_norm": 0.9792448282241821, "learning_rate": 1.021267780597023e-05, "loss": 0.3822, "step": 22220 }, { "epoch": 0.49333525710036513, "grad_norm": 1.382432222366333, "learning_rate": 1.0209191797471026e-05, "loss": 0.3343, "step": 22225 }, { "epoch": 0.4934462436598928, "grad_norm": 1.1926004886627197, "learning_rate": 1.0205705763538985e-05, "loss": 0.4739, "step": 22230 }, { "epoch": 0.49355723021942044, "grad_norm": 1.1565606594085693, "learning_rate": 1.0202219704597924e-05, "loss": 0.4212, "step": 22235 }, { "epoch": 0.49366821677894807, "grad_norm": 0.8615053296089172, "learning_rate": 1.0198733621071672e-05, "loss": 0.5037, "step": 22240 }, { "epoch": 0.4937792033384757, "grad_norm": 1.8336201906204224, "learning_rate": 1.0195247513384054e-05, "loss": 0.582, "step": 22245 }, { "epoch": 0.4938901898980034, "grad_norm": 1.6957383155822754, "learning_rate": 1.0191761381958897e-05, "loss": 0.4026, "step": 22250 }, { "epoch": 0.494001176457531, "grad_norm": 1.1199365854263306, "learning_rate": 1.0188275227220034e-05, "loss": 0.4577, "step": 22255 }, { "epoch": 0.4941121630170586, "grad_norm": 1.173117756843567, "learning_rate": 1.01847890495913e-05, "loss": 0.266, "step": 22260 }, { "epoch": 0.49422314957658625, "grad_norm": 0.8515411615371704, "learning_rate": 1.0181302849496535e-05, "loss": 0.3795, "step": 22265 }, { "epoch": 0.49433413613611393, "grad_norm": 1.5342164039611816, "learning_rate": 1.0177816627359575e-05, "loss": 0.4661, "step": 22270 }, { "epoch": 0.49444512269564156, "grad_norm": 1.0877432823181152, "learning_rate": 1.0174330383604266e-05, "loss": 0.3531, "step": 22275 }, { "epoch": 0.4945561092551692, "grad_norm": 0.9727147817611694, "learning_rate": 1.0170844118654457e-05, "loss": 0.4371, "step": 22280 }, { "epoch": 0.49466709581469687, "grad_norm": 0.9545711874961853, "learning_rate": 1.0167357832933988e-05, "loss": 0.4294, "step": 22285 }, { "epoch": 0.4947780823742245, "grad_norm": 0.8435078263282776, "learning_rate": 1.0163871526866718e-05, "loss": 0.5171, "step": 22290 }, { "epoch": 0.4948890689337521, "grad_norm": 1.1975470781326294, "learning_rate": 1.0160385200876497e-05, "loss": 0.442, "step": 22295 }, { "epoch": 0.49500005549327974, "grad_norm": 1.1052860021591187, "learning_rate": 1.0156898855387183e-05, "loss": 0.3691, "step": 22300 }, { "epoch": 0.4951110420528074, "grad_norm": 0.8964700698852539, "learning_rate": 1.015341249082263e-05, "loss": 0.3977, "step": 22305 }, { "epoch": 0.49522202861233505, "grad_norm": 1.4109879732131958, "learning_rate": 1.0149926107606702e-05, "loss": 0.5525, "step": 22310 }, { "epoch": 0.4953330151718627, "grad_norm": 1.1375197172164917, "learning_rate": 1.0146439706163259e-05, "loss": 0.507, "step": 22315 }, { "epoch": 0.4954440017313903, "grad_norm": 0.9993934035301208, "learning_rate": 1.0142953286916166e-05, "loss": 0.3335, "step": 22320 }, { "epoch": 0.495554988290918, "grad_norm": 1.6546269655227661, "learning_rate": 1.0139466850289296e-05, "loss": 0.3553, "step": 22325 }, { "epoch": 0.4956659748504456, "grad_norm": 1.108886957168579, "learning_rate": 1.0135980396706513e-05, "loss": 0.4506, "step": 22330 }, { "epoch": 0.49577696140997324, "grad_norm": 1.3343003988265991, "learning_rate": 1.0132493926591688e-05, "loss": 0.417, "step": 22335 }, { "epoch": 0.4958879479695009, "grad_norm": 1.0910420417785645, "learning_rate": 1.0129007440368699e-05, "loss": 0.372, "step": 22340 }, { "epoch": 0.49599893452902855, "grad_norm": 1.4987506866455078, "learning_rate": 1.0125520938461415e-05, "loss": 0.4328, "step": 22345 }, { "epoch": 0.49610992108855617, "grad_norm": 1.265060305595398, "learning_rate": 1.0122034421293719e-05, "loss": 0.4198, "step": 22350 }, { "epoch": 0.4962209076480838, "grad_norm": 1.5524022579193115, "learning_rate": 1.0118547889289485e-05, "loss": 0.4624, "step": 22355 }, { "epoch": 0.4963318942076115, "grad_norm": 0.7765639424324036, "learning_rate": 1.0115061342872597e-05, "loss": 0.438, "step": 22360 }, { "epoch": 0.4964428807671391, "grad_norm": 0.9765235781669617, "learning_rate": 1.0111574782466935e-05, "loss": 0.4708, "step": 22365 }, { "epoch": 0.49655386732666673, "grad_norm": 1.64158034324646, "learning_rate": 1.0108088208496385e-05, "loss": 0.5551, "step": 22370 }, { "epoch": 0.49666485388619436, "grad_norm": 1.1808514595031738, "learning_rate": 1.0104601621384835e-05, "loss": 0.3945, "step": 22375 }, { "epoch": 0.49677584044572204, "grad_norm": 1.438317894935608, "learning_rate": 1.0101115021556172e-05, "loss": 0.4569, "step": 22380 }, { "epoch": 0.49688682700524966, "grad_norm": 1.202447533607483, "learning_rate": 1.0097628409434281e-05, "loss": 0.4311, "step": 22385 }, { "epoch": 0.4969978135647773, "grad_norm": 1.1385836601257324, "learning_rate": 1.0094141785443057e-05, "loss": 0.2666, "step": 22390 }, { "epoch": 0.497108800124305, "grad_norm": 1.2178776264190674, "learning_rate": 1.0090655150006389e-05, "loss": 0.5786, "step": 22395 }, { "epoch": 0.4972197866838326, "grad_norm": 1.0078877210617065, "learning_rate": 1.0087168503548173e-05, "loss": 0.3878, "step": 22400 }, { "epoch": 0.4973307732433602, "grad_norm": 1.3112704753875732, "learning_rate": 1.00836818464923e-05, "loss": 0.4361, "step": 22405 }, { "epoch": 0.49744175980288785, "grad_norm": 0.9972655773162842, "learning_rate": 1.0080195179262673e-05, "loss": 0.3988, "step": 22410 }, { "epoch": 0.49755274636241553, "grad_norm": 0.8976503014564514, "learning_rate": 1.0076708502283184e-05, "loss": 0.4958, "step": 22415 }, { "epoch": 0.49766373292194316, "grad_norm": 1.2193833589553833, "learning_rate": 1.0073221815977732e-05, "loss": 0.2802, "step": 22420 }, { "epoch": 0.4977747194814708, "grad_norm": 1.1301801204681396, "learning_rate": 1.0069735120770217e-05, "loss": 0.402, "step": 22425 }, { "epoch": 0.4978857060409984, "grad_norm": 1.3754868507385254, "learning_rate": 1.0066248417084539e-05, "loss": 0.3872, "step": 22430 }, { "epoch": 0.4979966926005261, "grad_norm": 1.152056336402893, "learning_rate": 1.0062761705344601e-05, "loss": 0.4077, "step": 22435 }, { "epoch": 0.4981076791600537, "grad_norm": 1.1381980180740356, "learning_rate": 1.0059274985974305e-05, "loss": 0.4486, "step": 22440 }, { "epoch": 0.49821866571958134, "grad_norm": 0.9616106748580933, "learning_rate": 1.0055788259397559e-05, "loss": 0.4419, "step": 22445 }, { "epoch": 0.498329652279109, "grad_norm": 1.1387845277786255, "learning_rate": 1.005230152603826e-05, "loss": 0.411, "step": 22450 }, { "epoch": 0.49844063883863665, "grad_norm": 1.3904991149902344, "learning_rate": 1.0048814786320319e-05, "loss": 0.4552, "step": 22455 }, { "epoch": 0.4985516253981643, "grad_norm": 1.3449314832687378, "learning_rate": 1.0045328040667638e-05, "loss": 0.4975, "step": 22460 }, { "epoch": 0.4986626119576919, "grad_norm": 1.5593739748001099, "learning_rate": 1.0041841289504125e-05, "loss": 0.4867, "step": 22465 }, { "epoch": 0.4987735985172196, "grad_norm": 0.911896824836731, "learning_rate": 1.003835453325369e-05, "loss": 0.3393, "step": 22470 }, { "epoch": 0.4988845850767472, "grad_norm": 1.7970991134643555, "learning_rate": 1.003486777234024e-05, "loss": 0.4731, "step": 22475 }, { "epoch": 0.49899557163627484, "grad_norm": 1.274659514427185, "learning_rate": 1.0031381007187681e-05, "loss": 0.4363, "step": 22480 }, { "epoch": 0.49910655819580246, "grad_norm": 1.4955745935440063, "learning_rate": 1.0027894238219923e-05, "loss": 0.3854, "step": 22485 }, { "epoch": 0.49921754475533014, "grad_norm": 1.678066611289978, "learning_rate": 1.0024407465860881e-05, "loss": 0.5047, "step": 22490 }, { "epoch": 0.49932853131485777, "grad_norm": 1.081360101699829, "learning_rate": 1.002092069053446e-05, "loss": 0.4052, "step": 22495 }, { "epoch": 0.4994395178743854, "grad_norm": 0.9500686526298523, "learning_rate": 1.0017433912664572e-05, "loss": 0.3784, "step": 22500 }, { "epoch": 0.4995505044339131, "grad_norm": 1.5203646421432495, "learning_rate": 1.0013947132675125e-05, "loss": 0.4019, "step": 22505 }, { "epoch": 0.4996614909934407, "grad_norm": 1.4040472507476807, "learning_rate": 1.0010460350990037e-05, "loss": 0.3283, "step": 22510 }, { "epoch": 0.49977247755296833, "grad_norm": 1.0540430545806885, "learning_rate": 1.0006973568033209e-05, "loss": 0.4228, "step": 22515 }, { "epoch": 0.49988346411249596, "grad_norm": 0.9384956359863281, "learning_rate": 1.000348678422856e-05, "loss": 0.4279, "step": 22520 }, { "epoch": 0.49999445067202364, "grad_norm": 1.4751801490783691, "learning_rate": 1e-05, "loss": 0.5231, "step": 22525 }, { "epoch": 0.5001054372315512, "grad_norm": 1.00909423828125, "learning_rate": 9.996513215771439e-06, "loss": 0.3545, "step": 22530 }, { "epoch": 0.5002164237910789, "grad_norm": 1.2268149852752686, "learning_rate": 9.993026431966793e-06, "loss": 0.4632, "step": 22535 }, { "epoch": 0.5003274103506066, "grad_norm": 1.1826709508895874, "learning_rate": 9.989539649009968e-06, "loss": 0.3902, "step": 22540 }, { "epoch": 0.5004383969101341, "grad_norm": 1.2434492111206055, "learning_rate": 9.986052867324878e-06, "loss": 0.3739, "step": 22545 }, { "epoch": 0.5005493834696618, "grad_norm": 1.4091449975967407, "learning_rate": 9.982566087335431e-06, "loss": 0.59, "step": 22550 }, { "epoch": 0.5006603700291895, "grad_norm": 0.9223693609237671, "learning_rate": 9.97907930946554e-06, "loss": 0.3983, "step": 22555 }, { "epoch": 0.5007713565887171, "grad_norm": 1.0859547853469849, "learning_rate": 9.975592534139122e-06, "loss": 0.4797, "step": 22560 }, { "epoch": 0.5008823431482448, "grad_norm": 2.139925479888916, "learning_rate": 9.972105761780077e-06, "loss": 0.7093, "step": 22565 }, { "epoch": 0.5009933297077724, "grad_norm": 1.3303583860397339, "learning_rate": 9.968618992812324e-06, "loss": 0.3643, "step": 22570 }, { "epoch": 0.5011043162673, "grad_norm": 0.7934449315071106, "learning_rate": 9.965132227659764e-06, "loss": 0.3783, "step": 22575 }, { "epoch": 0.5012153028268277, "grad_norm": 1.7177858352661133, "learning_rate": 9.961645466746314e-06, "loss": 0.4049, "step": 22580 }, { "epoch": 0.5013262893863553, "grad_norm": 1.3820732831954956, "learning_rate": 9.958158710495877e-06, "loss": 0.3094, "step": 22585 }, { "epoch": 0.5014372759458829, "grad_norm": 1.27067232131958, "learning_rate": 9.954671959332366e-06, "loss": 0.5818, "step": 22590 }, { "epoch": 0.5015482625054106, "grad_norm": 0.9377214312553406, "learning_rate": 9.951185213679686e-06, "loss": 0.439, "step": 22595 }, { "epoch": 0.5016592490649382, "grad_norm": 1.2688758373260498, "learning_rate": 9.947698473961744e-06, "loss": 0.4648, "step": 22600 }, { "epoch": 0.5017702356244659, "grad_norm": 1.2199465036392212, "learning_rate": 9.944211740602445e-06, "loss": 0.3057, "step": 22605 }, { "epoch": 0.5018812221839936, "grad_norm": 1.1889398097991943, "learning_rate": 9.940725014025696e-06, "loss": 0.4765, "step": 22610 }, { "epoch": 0.5019922087435211, "grad_norm": 1.2021925449371338, "learning_rate": 9.937238294655399e-06, "loss": 0.2815, "step": 22615 }, { "epoch": 0.5021031953030488, "grad_norm": 1.5429582595825195, "learning_rate": 9.933751582915464e-06, "loss": 0.262, "step": 22620 }, { "epoch": 0.5022141818625765, "grad_norm": 1.4658925533294678, "learning_rate": 9.930264879229785e-06, "loss": 0.4531, "step": 22625 }, { "epoch": 0.5023251684221041, "grad_norm": 1.2303109169006348, "learning_rate": 9.926778184022273e-06, "loss": 0.3372, "step": 22630 }, { "epoch": 0.5024361549816317, "grad_norm": 1.1760908365249634, "learning_rate": 9.92329149771682e-06, "loss": 0.3905, "step": 22635 }, { "epoch": 0.5025471415411593, "grad_norm": 1.3378584384918213, "learning_rate": 9.919804820737328e-06, "loss": 0.2739, "step": 22640 }, { "epoch": 0.502658128100687, "grad_norm": 1.1107934713363647, "learning_rate": 9.916318153507701e-06, "loss": 0.3587, "step": 22645 }, { "epoch": 0.5027691146602147, "grad_norm": 1.070551872253418, "learning_rate": 9.912831496451829e-06, "loss": 0.3671, "step": 22650 }, { "epoch": 0.5028801012197422, "grad_norm": 1.170880675315857, "learning_rate": 9.909344849993616e-06, "loss": 0.3604, "step": 22655 }, { "epoch": 0.5029910877792699, "grad_norm": 0.8394003510475159, "learning_rate": 9.905858214556947e-06, "loss": 0.5054, "step": 22660 }, { "epoch": 0.5031020743387976, "grad_norm": 1.767619013786316, "learning_rate": 9.902371590565724e-06, "loss": 0.3654, "step": 22665 }, { "epoch": 0.5032130608983252, "grad_norm": 1.0583374500274658, "learning_rate": 9.898884978443833e-06, "loss": 0.4577, "step": 22670 }, { "epoch": 0.5033240474578529, "grad_norm": 1.1993390321731567, "learning_rate": 9.895398378615165e-06, "loss": 0.3257, "step": 22675 }, { "epoch": 0.5034350340173805, "grad_norm": 1.3034275770187378, "learning_rate": 9.891911791503618e-06, "loss": 0.4339, "step": 22680 }, { "epoch": 0.5035460205769081, "grad_norm": 1.5261765718460083, "learning_rate": 9.888425217533067e-06, "loss": 0.3689, "step": 22685 }, { "epoch": 0.5036570071364358, "grad_norm": 1.1259262561798096, "learning_rate": 9.88493865712741e-06, "loss": 0.4287, "step": 22690 }, { "epoch": 0.5037679936959634, "grad_norm": 1.1270091533660889, "learning_rate": 9.881452110710519e-06, "loss": 0.4501, "step": 22695 }, { "epoch": 0.503878980255491, "grad_norm": 1.2836631536483765, "learning_rate": 9.877965578706286e-06, "loss": 0.4244, "step": 22700 }, { "epoch": 0.5039899668150187, "grad_norm": 1.300953984260559, "learning_rate": 9.874479061538588e-06, "loss": 0.2626, "step": 22705 }, { "epoch": 0.5041009533745463, "grad_norm": 1.2757043838500977, "learning_rate": 9.870992559631304e-06, "loss": 0.514, "step": 22710 }, { "epoch": 0.504211939934074, "grad_norm": 1.0749907493591309, "learning_rate": 9.867506073408313e-06, "loss": 0.4767, "step": 22715 }, { "epoch": 0.5043229264936017, "grad_norm": 1.3691682815551758, "learning_rate": 9.86401960329349e-06, "loss": 0.5215, "step": 22720 }, { "epoch": 0.5044339130531292, "grad_norm": 1.7371306419372559, "learning_rate": 9.860533149710705e-06, "loss": 0.5877, "step": 22725 }, { "epoch": 0.5045448996126569, "grad_norm": 1.204622745513916, "learning_rate": 9.857046713083836e-06, "loss": 0.434, "step": 22730 }, { "epoch": 0.5046558861721846, "grad_norm": 0.9504795074462891, "learning_rate": 9.853560293836743e-06, "loss": 0.4497, "step": 22735 }, { "epoch": 0.5047668727317122, "grad_norm": 1.0582622289657593, "learning_rate": 9.850073892393303e-06, "loss": 0.3307, "step": 22740 }, { "epoch": 0.5048778592912398, "grad_norm": 1.0019503831863403, "learning_rate": 9.846587509177374e-06, "loss": 0.3662, "step": 22745 }, { "epoch": 0.5049888458507674, "grad_norm": 1.3375298976898193, "learning_rate": 9.843101144612824e-06, "loss": 0.4374, "step": 22750 }, { "epoch": 0.5050998324102951, "grad_norm": 1.2192867994308472, "learning_rate": 9.839614799123507e-06, "loss": 0.5075, "step": 22755 }, { "epoch": 0.5052108189698228, "grad_norm": 1.1226353645324707, "learning_rate": 9.836128473133282e-06, "loss": 0.4266, "step": 22760 }, { "epoch": 0.5053218055293504, "grad_norm": 1.436882495880127, "learning_rate": 9.832642167066015e-06, "loss": 0.4404, "step": 22765 }, { "epoch": 0.505432792088878, "grad_norm": 0.9391142725944519, "learning_rate": 9.829155881345546e-06, "loss": 0.3448, "step": 22770 }, { "epoch": 0.5055437786484057, "grad_norm": 1.5629217624664307, "learning_rate": 9.825669616395737e-06, "loss": 0.3353, "step": 22775 }, { "epoch": 0.5056547652079333, "grad_norm": 1.2283124923706055, "learning_rate": 9.822183372640426e-06, "loss": 0.4115, "step": 22780 }, { "epoch": 0.505765751767461, "grad_norm": 1.7312462329864502, "learning_rate": 9.818697150503467e-06, "loss": 0.5001, "step": 22785 }, { "epoch": 0.5058767383269887, "grad_norm": 1.1549898386001587, "learning_rate": 9.815210950408703e-06, "loss": 0.4316, "step": 22790 }, { "epoch": 0.5059877248865162, "grad_norm": 1.2889511585235596, "learning_rate": 9.811724772779968e-06, "loss": 0.3246, "step": 22795 }, { "epoch": 0.5060987114460439, "grad_norm": 1.1245282888412476, "learning_rate": 9.808238618041108e-06, "loss": 0.4711, "step": 22800 }, { "epoch": 0.5062096980055715, "grad_norm": 1.3970367908477783, "learning_rate": 9.804752486615947e-06, "loss": 0.4878, "step": 22805 }, { "epoch": 0.5063206845650992, "grad_norm": 0.7514325380325317, "learning_rate": 9.801266378928326e-06, "loss": 0.3886, "step": 22810 }, { "epoch": 0.5064316711246268, "grad_norm": 0.8837243914604187, "learning_rate": 9.797780295402078e-06, "loss": 0.3596, "step": 22815 }, { "epoch": 0.5065426576841544, "grad_norm": 1.0351483821868896, "learning_rate": 9.79429423646102e-06, "loss": 0.2448, "step": 22820 }, { "epoch": 0.5066536442436821, "grad_norm": 1.0443217754364014, "learning_rate": 9.790808202528977e-06, "loss": 0.484, "step": 22825 }, { "epoch": 0.5067646308032098, "grad_norm": 1.6663671731948853, "learning_rate": 9.787322194029773e-06, "loss": 0.4237, "step": 22830 }, { "epoch": 0.5068756173627373, "grad_norm": 1.1216480731964111, "learning_rate": 9.783836211387224e-06, "loss": 0.429, "step": 22835 }, { "epoch": 0.506986603922265, "grad_norm": 0.8126741647720337, "learning_rate": 9.780350255025143e-06, "loss": 0.6123, "step": 22840 }, { "epoch": 0.5070975904817927, "grad_norm": 1.0116031169891357, "learning_rate": 9.776864325367338e-06, "loss": 0.3102, "step": 22845 }, { "epoch": 0.5072085770413203, "grad_norm": 1.1866459846496582, "learning_rate": 9.773378422837624e-06, "loss": 0.4162, "step": 22850 }, { "epoch": 0.507319563600848, "grad_norm": 1.530235767364502, "learning_rate": 9.7698925478598e-06, "loss": 0.4966, "step": 22855 }, { "epoch": 0.5074305501603755, "grad_norm": 1.1207528114318848, "learning_rate": 9.76640670085767e-06, "loss": 0.3392, "step": 22860 }, { "epoch": 0.5075415367199032, "grad_norm": 0.8439724445343018, "learning_rate": 9.762920882255026e-06, "loss": 0.4822, "step": 22865 }, { "epoch": 0.5076525232794309, "grad_norm": 1.509404182434082, "learning_rate": 9.759435092475667e-06, "loss": 0.4871, "step": 22870 }, { "epoch": 0.5077635098389585, "grad_norm": 0.8720496892929077, "learning_rate": 9.755949331943386e-06, "loss": 0.3836, "step": 22875 }, { "epoch": 0.5078744963984861, "grad_norm": 1.2868167161941528, "learning_rate": 9.752463601081963e-06, "loss": 0.3545, "step": 22880 }, { "epoch": 0.5079854829580138, "grad_norm": 1.5097538232803345, "learning_rate": 9.748977900315189e-06, "loss": 0.5467, "step": 22885 }, { "epoch": 0.5080964695175414, "grad_norm": 1.5139718055725098, "learning_rate": 9.745492230066835e-06, "loss": 0.3364, "step": 22890 }, { "epoch": 0.5082074560770691, "grad_norm": 0.7690618634223938, "learning_rate": 9.742006590760683e-06, "loss": 0.2862, "step": 22895 }, { "epoch": 0.5083184426365968, "grad_norm": 1.22612464427948, "learning_rate": 9.738520982820509e-06, "loss": 0.3642, "step": 22900 }, { "epoch": 0.5084294291961243, "grad_norm": 1.3420028686523438, "learning_rate": 9.735035406670072e-06, "loss": 0.4987, "step": 22905 }, { "epoch": 0.508540415755652, "grad_norm": 1.1621700525283813, "learning_rate": 9.731549862733147e-06, "loss": 0.4764, "step": 22910 }, { "epoch": 0.5086514023151796, "grad_norm": 1.2968989610671997, "learning_rate": 9.728064351433484e-06, "loss": 0.4678, "step": 22915 }, { "epoch": 0.5087623888747073, "grad_norm": 1.6852279901504517, "learning_rate": 9.72457887319485e-06, "loss": 0.4159, "step": 22920 }, { "epoch": 0.5088733754342349, "grad_norm": 0.8907633423805237, "learning_rate": 9.72109342844099e-06, "loss": 0.4015, "step": 22925 }, { "epoch": 0.5089843619937625, "grad_norm": 1.4889419078826904, "learning_rate": 9.717608017595653e-06, "loss": 0.3983, "step": 22930 }, { "epoch": 0.5090953485532902, "grad_norm": 1.1271584033966064, "learning_rate": 9.714122641082593e-06, "loss": 0.5151, "step": 22935 }, { "epoch": 0.5092063351128179, "grad_norm": 0.8067299723625183, "learning_rate": 9.710637299325537e-06, "loss": 0.4571, "step": 22940 }, { "epoch": 0.5093173216723454, "grad_norm": 1.6813609600067139, "learning_rate": 9.707151992748232e-06, "loss": 0.5164, "step": 22945 }, { "epoch": 0.5094283082318731, "grad_norm": 1.0879247188568115, "learning_rate": 9.703666721774403e-06, "loss": 0.4272, "step": 22950 }, { "epoch": 0.5095392947914008, "grad_norm": 1.1162388324737549, "learning_rate": 9.70018148682778e-06, "loss": 0.4953, "step": 22955 }, { "epoch": 0.5096502813509284, "grad_norm": 0.8649173378944397, "learning_rate": 9.69669628833209e-06, "loss": 0.3846, "step": 22960 }, { "epoch": 0.5097612679104561, "grad_norm": 1.5884920358657837, "learning_rate": 9.693211126711046e-06, "loss": 0.2942, "step": 22965 }, { "epoch": 0.5098722544699836, "grad_norm": 1.2138025760650635, "learning_rate": 9.689726002388363e-06, "loss": 0.4575, "step": 22970 }, { "epoch": 0.5099832410295113, "grad_norm": 1.2881455421447754, "learning_rate": 9.686240915787753e-06, "loss": 0.4061, "step": 22975 }, { "epoch": 0.510094227589039, "grad_norm": 1.0300076007843018, "learning_rate": 9.682755867332919e-06, "loss": 0.2838, "step": 22980 }, { "epoch": 0.5102052141485666, "grad_norm": 1.7775558233261108, "learning_rate": 9.679270857447567e-06, "loss": 0.4665, "step": 22985 }, { "epoch": 0.5103162007080942, "grad_norm": 0.9555680751800537, "learning_rate": 9.675785886555383e-06, "loss": 0.5005, "step": 22990 }, { "epoch": 0.5104271872676219, "grad_norm": 1.1331521272659302, "learning_rate": 9.672300955080067e-06, "loss": 0.3951, "step": 22995 }, { "epoch": 0.5105381738271495, "grad_norm": 0.889107346534729, "learning_rate": 9.6688160634453e-06, "loss": 0.3451, "step": 23000 }, { "epoch": 0.5106491603866772, "grad_norm": 3.3628461360931396, "learning_rate": 9.665331212074768e-06, "loss": 0.3267, "step": 23005 }, { "epoch": 0.5107601469462049, "grad_norm": 1.115371584892273, "learning_rate": 9.661846401392141e-06, "loss": 0.3528, "step": 23010 }, { "epoch": 0.5108711335057324, "grad_norm": 1.2750897407531738, "learning_rate": 9.658361631821095e-06, "loss": 0.4573, "step": 23015 }, { "epoch": 0.5109821200652601, "grad_norm": 1.5250240564346313, "learning_rate": 9.654876903785299e-06, "loss": 0.5291, "step": 23020 }, { "epoch": 0.5110931066247877, "grad_norm": 1.0960813760757446, "learning_rate": 9.651392217708408e-06, "loss": 0.3362, "step": 23025 }, { "epoch": 0.5112040931843154, "grad_norm": 1.3788139820098877, "learning_rate": 9.647907574014086e-06, "loss": 0.4561, "step": 23030 }, { "epoch": 0.511315079743843, "grad_norm": 0.8054192662239075, "learning_rate": 9.644422973125977e-06, "loss": 0.401, "step": 23035 }, { "epoch": 0.5114260663033706, "grad_norm": 1.1127040386199951, "learning_rate": 9.64093841546773e-06, "loss": 0.4227, "step": 23040 }, { "epoch": 0.5115370528628983, "grad_norm": 1.0898431539535522, "learning_rate": 9.637453901462992e-06, "loss": 0.4615, "step": 23045 }, { "epoch": 0.511648039422426, "grad_norm": 1.0687729120254517, "learning_rate": 9.633969431535385e-06, "loss": 0.4972, "step": 23050 }, { "epoch": 0.5117590259819536, "grad_norm": 1.4021871089935303, "learning_rate": 9.630485006108554e-06, "loss": 0.4042, "step": 23055 }, { "epoch": 0.5118700125414812, "grad_norm": 1.0496760606765747, "learning_rate": 9.627000625606112e-06, "loss": 0.4764, "step": 23060 }, { "epoch": 0.5119809991010089, "grad_norm": 1.0399717092514038, "learning_rate": 9.623516290451683e-06, "loss": 0.3277, "step": 23065 }, { "epoch": 0.5120919856605365, "grad_norm": 1.9745243787765503, "learning_rate": 9.620032001068883e-06, "loss": 0.4506, "step": 23070 }, { "epoch": 0.5122029722200642, "grad_norm": 1.5703458786010742, "learning_rate": 9.616547757881317e-06, "loss": 0.5289, "step": 23075 }, { "epoch": 0.5123139587795917, "grad_norm": 1.2810927629470825, "learning_rate": 9.61306356131259e-06, "loss": 0.4078, "step": 23080 }, { "epoch": 0.5124249453391194, "grad_norm": 1.4439598321914673, "learning_rate": 9.609579411786297e-06, "loss": 0.445, "step": 23085 }, { "epoch": 0.5125359318986471, "grad_norm": 1.3506888151168823, "learning_rate": 9.60609530972603e-06, "loss": 0.4064, "step": 23090 }, { "epoch": 0.5126469184581747, "grad_norm": 1.1572383642196655, "learning_rate": 9.602611255555372e-06, "loss": 0.5398, "step": 23095 }, { "epoch": 0.5127579050177024, "grad_norm": 1.0995047092437744, "learning_rate": 9.599127249697905e-06, "loss": 0.4621, "step": 23100 }, { "epoch": 0.51286889157723, "grad_norm": 1.1626933813095093, "learning_rate": 9.595643292577206e-06, "loss": 0.3887, "step": 23105 }, { "epoch": 0.5129798781367576, "grad_norm": 0.9370729327201843, "learning_rate": 9.592159384616835e-06, "loss": 0.3973, "step": 23110 }, { "epoch": 0.5130908646962853, "grad_norm": 2.0881588459014893, "learning_rate": 9.588675526240362e-06, "loss": 0.4405, "step": 23115 }, { "epoch": 0.513201851255813, "grad_norm": 1.0966885089874268, "learning_rate": 9.585191717871336e-06, "loss": 0.3349, "step": 23120 }, { "epoch": 0.5133128378153405, "grad_norm": 1.0974148511886597, "learning_rate": 9.581707959933309e-06, "loss": 0.3571, "step": 23125 }, { "epoch": 0.5134238243748682, "grad_norm": 1.4036043882369995, "learning_rate": 9.57822425284983e-06, "loss": 0.4259, "step": 23130 }, { "epoch": 0.5135348109343958, "grad_norm": 1.3437319993972778, "learning_rate": 9.574740597044427e-06, "loss": 0.5187, "step": 23135 }, { "epoch": 0.5136457974939235, "grad_norm": 0.8622636795043945, "learning_rate": 9.571256992940641e-06, "loss": 0.4853, "step": 23140 }, { "epoch": 0.5137567840534512, "grad_norm": 1.7558482885360718, "learning_rate": 9.567773440961988e-06, "loss": 0.4349, "step": 23145 }, { "epoch": 0.5138677706129787, "grad_norm": 1.3710284233093262, "learning_rate": 9.564289941531987e-06, "loss": 0.4344, "step": 23150 }, { "epoch": 0.5139787571725064, "grad_norm": 1.8275679349899292, "learning_rate": 9.560806495074162e-06, "loss": 0.336, "step": 23155 }, { "epoch": 0.5140897437320341, "grad_norm": 1.0730940103530884, "learning_rate": 9.557323102012005e-06, "loss": 0.5358, "step": 23160 }, { "epoch": 0.5142007302915617, "grad_norm": 1.1426434516906738, "learning_rate": 9.553839762769025e-06, "loss": 0.3901, "step": 23165 }, { "epoch": 0.5143117168510893, "grad_norm": 1.0768260955810547, "learning_rate": 9.550356477768706e-06, "loss": 0.3633, "step": 23170 }, { "epoch": 0.514422703410617, "grad_norm": 1.2068915367126465, "learning_rate": 9.546873247434542e-06, "loss": 0.3501, "step": 23175 }, { "epoch": 0.5145336899701446, "grad_norm": 1.3856827020645142, "learning_rate": 9.543390072190006e-06, "loss": 0.2788, "step": 23180 }, { "epoch": 0.5146446765296723, "grad_norm": 1.6092084646224976, "learning_rate": 9.539906952458572e-06, "loss": 0.5391, "step": 23185 }, { "epoch": 0.5147556630891998, "grad_norm": 0.6838003396987915, "learning_rate": 9.536423888663712e-06, "loss": 0.3466, "step": 23190 }, { "epoch": 0.5148666496487275, "grad_norm": 2.4816436767578125, "learning_rate": 9.532940881228878e-06, "loss": 0.5389, "step": 23195 }, { "epoch": 0.5149776362082552, "grad_norm": 1.3245611190795898, "learning_rate": 9.52945793057753e-06, "loss": 0.5037, "step": 23200 }, { "epoch": 0.5150886227677828, "grad_norm": 1.5784523487091064, "learning_rate": 9.525975037133101e-06, "loss": 0.4671, "step": 23205 }, { "epoch": 0.5151996093273105, "grad_norm": 1.3718194961547852, "learning_rate": 9.522492201319042e-06, "loss": 0.3642, "step": 23210 }, { "epoch": 0.5153105958868381, "grad_norm": 0.9248862266540527, "learning_rate": 9.51900942355878e-06, "loss": 0.3951, "step": 23215 }, { "epoch": 0.5154215824463657, "grad_norm": 1.1761683225631714, "learning_rate": 9.515526704275739e-06, "loss": 0.4326, "step": 23220 }, { "epoch": 0.5155325690058934, "grad_norm": 1.9514886140823364, "learning_rate": 9.512044043893336e-06, "loss": 0.3304, "step": 23225 }, { "epoch": 0.5156435555654211, "grad_norm": 1.2721954584121704, "learning_rate": 9.508561442834982e-06, "loss": 0.4436, "step": 23230 }, { "epoch": 0.5157545421249486, "grad_norm": 1.279233455657959, "learning_rate": 9.505078901524077e-06, "loss": 0.4154, "step": 23235 }, { "epoch": 0.5158655286844763, "grad_norm": 1.6182982921600342, "learning_rate": 9.501596420384024e-06, "loss": 0.4688, "step": 23240 }, { "epoch": 0.5159765152440039, "grad_norm": 1.204156517982483, "learning_rate": 9.498113999838203e-06, "loss": 0.4464, "step": 23245 }, { "epoch": 0.5160875018035316, "grad_norm": 1.3307151794433594, "learning_rate": 9.494631640310001e-06, "loss": 0.3903, "step": 23250 }, { "epoch": 0.5161984883630593, "grad_norm": 1.3102271556854248, "learning_rate": 9.491149342222787e-06, "loss": 0.4727, "step": 23255 }, { "epoch": 0.5163094749225868, "grad_norm": 0.9829373955726624, "learning_rate": 9.48766710599993e-06, "loss": 0.4298, "step": 23260 }, { "epoch": 0.5164204614821145, "grad_norm": 0.9249544739723206, "learning_rate": 9.48418493206479e-06, "loss": 0.4166, "step": 23265 }, { "epoch": 0.5165314480416422, "grad_norm": 1.1289701461791992, "learning_rate": 9.480702820840713e-06, "loss": 0.3981, "step": 23270 }, { "epoch": 0.5166424346011698, "grad_norm": 1.086345911026001, "learning_rate": 9.477220772751049e-06, "loss": 0.3643, "step": 23275 }, { "epoch": 0.5167534211606974, "grad_norm": 1.0040391683578491, "learning_rate": 9.473738788219128e-06, "loss": 0.5242, "step": 23280 }, { "epoch": 0.5168644077202251, "grad_norm": 1.2268530130386353, "learning_rate": 9.470256867668282e-06, "loss": 0.5265, "step": 23285 }, { "epoch": 0.5169753942797527, "grad_norm": 1.5280417203903198, "learning_rate": 9.466775011521825e-06, "loss": 0.5185, "step": 23290 }, { "epoch": 0.5170863808392804, "grad_norm": 1.1195710897445679, "learning_rate": 9.463293220203075e-06, "loss": 0.2102, "step": 23295 }, { "epoch": 0.517197367398808, "grad_norm": 1.3801215887069702, "learning_rate": 9.459811494135339e-06, "loss": 0.3567, "step": 23300 }, { "epoch": 0.5173083539583356, "grad_norm": 1.0789028406143188, "learning_rate": 9.456329833741907e-06, "loss": 0.4993, "step": 23305 }, { "epoch": 0.5174193405178633, "grad_norm": 1.283454179763794, "learning_rate": 9.452848239446074e-06, "loss": 0.688, "step": 23310 }, { "epoch": 0.5175303270773909, "grad_norm": 0.9683428406715393, "learning_rate": 9.449366711671112e-06, "loss": 0.4205, "step": 23315 }, { "epoch": 0.5176413136369186, "grad_norm": 1.541322112083435, "learning_rate": 9.445885250840301e-06, "loss": 0.344, "step": 23320 }, { "epoch": 0.5177523001964462, "grad_norm": 1.2456687688827515, "learning_rate": 9.442403857376903e-06, "loss": 0.4871, "step": 23325 }, { "epoch": 0.5178632867559738, "grad_norm": 1.0860142707824707, "learning_rate": 9.438922531704174e-06, "loss": 0.53, "step": 23330 }, { "epoch": 0.5179742733155015, "grad_norm": 1.7298486232757568, "learning_rate": 9.435441274245363e-06, "loss": 0.4706, "step": 23335 }, { "epoch": 0.5180852598750292, "grad_norm": 1.22835373878479, "learning_rate": 9.431960085423707e-06, "loss": 0.6449, "step": 23340 }, { "epoch": 0.5181962464345568, "grad_norm": 1.4939440488815308, "learning_rate": 9.428478965662437e-06, "loss": 0.4136, "step": 23345 }, { "epoch": 0.5183072329940844, "grad_norm": 0.8647384643554688, "learning_rate": 9.424997915384781e-06, "loss": 0.3932, "step": 23350 }, { "epoch": 0.518418219553612, "grad_norm": 1.2243454456329346, "learning_rate": 9.421516935013948e-06, "loss": 0.4535, "step": 23355 }, { "epoch": 0.5185292061131397, "grad_norm": 0.7740560173988342, "learning_rate": 9.418036024973149e-06, "loss": 0.362, "step": 23360 }, { "epoch": 0.5186401926726674, "grad_norm": 0.9760211110115051, "learning_rate": 9.414555185685573e-06, "loss": 0.3387, "step": 23365 }, { "epoch": 0.5187511792321949, "grad_norm": 0.9076090455055237, "learning_rate": 9.41107441757442e-06, "loss": 0.4295, "step": 23370 }, { "epoch": 0.5188621657917226, "grad_norm": 1.1980648040771484, "learning_rate": 9.407593721062858e-06, "loss": 0.4772, "step": 23375 }, { "epoch": 0.5189731523512503, "grad_norm": 1.1833443641662598, "learning_rate": 9.404113096574066e-06, "loss": 0.4988, "step": 23380 }, { "epoch": 0.5190841389107779, "grad_norm": 1.2384533882141113, "learning_rate": 9.40063254453121e-06, "loss": 0.4169, "step": 23385 }, { "epoch": 0.5191951254703056, "grad_norm": 1.1089009046554565, "learning_rate": 9.397152065357434e-06, "loss": 0.4497, "step": 23390 }, { "epoch": 0.5193061120298332, "grad_norm": 1.2220206260681152, "learning_rate": 9.393671659475894e-06, "loss": 0.4402, "step": 23395 }, { "epoch": 0.5194170985893608, "grad_norm": 1.0658073425292969, "learning_rate": 9.390191327309713e-06, "loss": 0.366, "step": 23400 }, { "epoch": 0.5195280851488885, "grad_norm": 1.9952574968338013, "learning_rate": 9.386711069282028e-06, "loss": 0.4334, "step": 23405 }, { "epoch": 0.5196390717084161, "grad_norm": 1.4178756475448608, "learning_rate": 9.383230885815957e-06, "loss": 0.4913, "step": 23410 }, { "epoch": 0.5197500582679437, "grad_norm": 0.7791183590888977, "learning_rate": 9.379750777334603e-06, "loss": 0.4209, "step": 23415 }, { "epoch": 0.5198610448274714, "grad_norm": 1.835775375366211, "learning_rate": 9.376270744261073e-06, "loss": 0.5712, "step": 23420 }, { "epoch": 0.519972031386999, "grad_norm": 0.8781411051750183, "learning_rate": 9.37279078701845e-06, "loss": 0.5048, "step": 23425 }, { "epoch": 0.5200830179465267, "grad_norm": 1.235450267791748, "learning_rate": 9.369310906029823e-06, "loss": 0.5186, "step": 23430 }, { "epoch": 0.5201940045060544, "grad_norm": 1.0174843072891235, "learning_rate": 9.36583110171826e-06, "loss": 0.4388, "step": 23435 }, { "epoch": 0.5203049910655819, "grad_norm": 0.8445138335227966, "learning_rate": 9.362351374506823e-06, "loss": 0.5178, "step": 23440 }, { "epoch": 0.5204159776251096, "grad_norm": 1.0303021669387817, "learning_rate": 9.358871724818568e-06, "loss": 0.3982, "step": 23445 }, { "epoch": 0.5205269641846373, "grad_norm": 1.1887991428375244, "learning_rate": 9.355392153076541e-06, "loss": 0.3977, "step": 23450 }, { "epoch": 0.5206379507441649, "grad_norm": 1.1209839582443237, "learning_rate": 9.351912659703773e-06, "loss": 0.3891, "step": 23455 }, { "epoch": 0.5207489373036925, "grad_norm": 1.235081672668457, "learning_rate": 9.34843324512329e-06, "loss": 0.3527, "step": 23460 }, { "epoch": 0.5208599238632201, "grad_norm": 0.8877482414245605, "learning_rate": 9.344953909758106e-06, "loss": 0.3534, "step": 23465 }, { "epoch": 0.5209709104227478, "grad_norm": 1.4008976221084595, "learning_rate": 9.341474654031231e-06, "loss": 0.5007, "step": 23470 }, { "epoch": 0.5210818969822755, "grad_norm": 1.8766608238220215, "learning_rate": 9.337995478365657e-06, "loss": 0.5887, "step": 23475 }, { "epoch": 0.521192883541803, "grad_norm": 0.7073838710784912, "learning_rate": 9.334516383184377e-06, "loss": 0.4969, "step": 23480 }, { "epoch": 0.5213038701013307, "grad_norm": 1.209763526916504, "learning_rate": 9.331037368910359e-06, "loss": 0.5199, "step": 23485 }, { "epoch": 0.5214148566608584, "grad_norm": 1.488824725151062, "learning_rate": 9.327558435966573e-06, "loss": 0.3552, "step": 23490 }, { "epoch": 0.521525843220386, "grad_norm": 1.5087478160858154, "learning_rate": 9.324079584775982e-06, "loss": 0.4104, "step": 23495 }, { "epoch": 0.5216368297799137, "grad_norm": 1.2469892501831055, "learning_rate": 9.320600815761523e-06, "loss": 0.4061, "step": 23500 }, { "epoch": 0.5217478163394413, "grad_norm": 1.4992282390594482, "learning_rate": 9.317122129346144e-06, "loss": 0.3954, "step": 23505 }, { "epoch": 0.5218588028989689, "grad_norm": 1.198625922203064, "learning_rate": 9.313643525952762e-06, "loss": 0.5065, "step": 23510 }, { "epoch": 0.5219697894584966, "grad_norm": 1.102835774421692, "learning_rate": 9.310165006004297e-06, "loss": 0.6921, "step": 23515 }, { "epoch": 0.5220807760180242, "grad_norm": 1.4361196756362915, "learning_rate": 9.306686569923662e-06, "loss": 0.4449, "step": 23520 }, { "epoch": 0.5221917625775518, "grad_norm": 1.5560322999954224, "learning_rate": 9.303208218133745e-06, "loss": 0.4447, "step": 23525 }, { "epoch": 0.5223027491370795, "grad_norm": 1.9868260622024536, "learning_rate": 9.299729951057439e-06, "loss": 0.4669, "step": 23530 }, { "epoch": 0.5224137356966071, "grad_norm": 1.1894471645355225, "learning_rate": 9.296251769117614e-06, "loss": 0.3816, "step": 23535 }, { "epoch": 0.5225247222561348, "grad_norm": 1.239475965499878, "learning_rate": 9.292773672737143e-06, "loss": 0.2769, "step": 23540 }, { "epoch": 0.5226357088156625, "grad_norm": 1.0350260734558105, "learning_rate": 9.28929566233887e-06, "loss": 0.3005, "step": 23545 }, { "epoch": 0.52274669537519, "grad_norm": 0.9946759343147278, "learning_rate": 9.28581773834565e-06, "loss": 0.5116, "step": 23550 }, { "epoch": 0.5228576819347177, "grad_norm": 0.9383373856544495, "learning_rate": 9.282339901180317e-06, "loss": 0.3541, "step": 23555 }, { "epoch": 0.5229686684942454, "grad_norm": 0.6263040900230408, "learning_rate": 9.278862151265688e-06, "loss": 0.4045, "step": 23560 }, { "epoch": 0.523079655053773, "grad_norm": 0.7122305631637573, "learning_rate": 9.275384489024586e-06, "loss": 0.5649, "step": 23565 }, { "epoch": 0.5231906416133006, "grad_norm": 0.9679470062255859, "learning_rate": 9.271906914879802e-06, "loss": 0.5078, "step": 23570 }, { "epoch": 0.5233016281728283, "grad_norm": 1.3679300546646118, "learning_rate": 9.268429429254134e-06, "loss": 0.3559, "step": 23575 }, { "epoch": 0.5234126147323559, "grad_norm": 1.5585976839065552, "learning_rate": 9.264952032570364e-06, "loss": 0.4081, "step": 23580 }, { "epoch": 0.5235236012918836, "grad_norm": 1.3559560775756836, "learning_rate": 9.261474725251261e-06, "loss": 0.5403, "step": 23585 }, { "epoch": 0.5236345878514111, "grad_norm": 1.2353577613830566, "learning_rate": 9.257997507719585e-06, "loss": 0.5122, "step": 23590 }, { "epoch": 0.5237455744109388, "grad_norm": 1.4342765808105469, "learning_rate": 9.254520380398083e-06, "loss": 0.4143, "step": 23595 }, { "epoch": 0.5238565609704665, "grad_norm": 1.2309331893920898, "learning_rate": 9.25104334370949e-06, "loss": 0.4069, "step": 23600 }, { "epoch": 0.5239675475299941, "grad_norm": 1.0727909803390503, "learning_rate": 9.24756639807654e-06, "loss": 0.438, "step": 23605 }, { "epoch": 0.5240785340895218, "grad_norm": 0.7829542756080627, "learning_rate": 9.24408954392194e-06, "loss": 0.4547, "step": 23610 }, { "epoch": 0.5241895206490494, "grad_norm": 1.0878058671951294, "learning_rate": 9.240612781668401e-06, "loss": 0.3775, "step": 23615 }, { "epoch": 0.524300507208577, "grad_norm": 0.8925349116325378, "learning_rate": 9.23713611173861e-06, "loss": 0.3224, "step": 23620 }, { "epoch": 0.5244114937681047, "grad_norm": 1.7960782051086426, "learning_rate": 9.233659534555257e-06, "loss": 0.387, "step": 23625 }, { "epoch": 0.5245224803276324, "grad_norm": 1.554244041442871, "learning_rate": 9.230183050541001e-06, "loss": 0.4478, "step": 23630 }, { "epoch": 0.52463346688716, "grad_norm": 1.7812159061431885, "learning_rate": 9.226706660118511e-06, "loss": 0.4356, "step": 23635 }, { "epoch": 0.5247444534466876, "grad_norm": 1.4758063554763794, "learning_rate": 9.223230363710434e-06, "loss": 0.388, "step": 23640 }, { "epoch": 0.5248554400062152, "grad_norm": 1.4203014373779297, "learning_rate": 9.2197541617394e-06, "loss": 0.5469, "step": 23645 }, { "epoch": 0.5249664265657429, "grad_norm": 1.188704252243042, "learning_rate": 9.216278054628043e-06, "loss": 0.3986, "step": 23650 }, { "epoch": 0.5250774131252706, "grad_norm": 1.4540413618087769, "learning_rate": 9.21280204279897e-06, "loss": 0.2821, "step": 23655 }, { "epoch": 0.5251883996847981, "grad_norm": 1.3421919345855713, "learning_rate": 9.209326126674781e-06, "loss": 0.5352, "step": 23660 }, { "epoch": 0.5252993862443258, "grad_norm": 0.8097754716873169, "learning_rate": 9.205850306678076e-06, "loss": 0.5054, "step": 23665 }, { "epoch": 0.5254103728038535, "grad_norm": 1.0205293893814087, "learning_rate": 9.202374583231423e-06, "loss": 0.4522, "step": 23670 }, { "epoch": 0.5255213593633811, "grad_norm": 1.4187626838684082, "learning_rate": 9.198898956757396e-06, "loss": 0.4245, "step": 23675 }, { "epoch": 0.5256323459229088, "grad_norm": 1.542188048362732, "learning_rate": 9.195423427678544e-06, "loss": 0.4067, "step": 23680 }, { "epoch": 0.5257433324824364, "grad_norm": 1.1413275003433228, "learning_rate": 9.191947996417415e-06, "loss": 0.3822, "step": 23685 }, { "epoch": 0.525854319041964, "grad_norm": 1.4354232549667358, "learning_rate": 9.18847266339654e-06, "loss": 0.3561, "step": 23690 }, { "epoch": 0.5259653056014917, "grad_norm": 1.5432720184326172, "learning_rate": 9.184997429038435e-06, "loss": 0.4168, "step": 23695 }, { "epoch": 0.5260762921610193, "grad_norm": 1.0400127172470093, "learning_rate": 9.18152229376561e-06, "loss": 0.2103, "step": 23700 }, { "epoch": 0.5261872787205469, "grad_norm": 1.6075176000595093, "learning_rate": 9.17804725800056e-06, "loss": 0.4054, "step": 23705 }, { "epoch": 0.5262982652800746, "grad_norm": 1.1011056900024414, "learning_rate": 9.174572322165766e-06, "loss": 0.3888, "step": 23710 }, { "epoch": 0.5264092518396022, "grad_norm": 1.4790599346160889, "learning_rate": 9.171097486683703e-06, "loss": 0.2997, "step": 23715 }, { "epoch": 0.5265202383991299, "grad_norm": 0.9740801453590393, "learning_rate": 9.167622751976825e-06, "loss": 0.4052, "step": 23720 }, { "epoch": 0.5266312249586576, "grad_norm": 1.1656138896942139, "learning_rate": 9.164148118467585e-06, "loss": 0.3117, "step": 23725 }, { "epoch": 0.5267422115181851, "grad_norm": 1.1819837093353271, "learning_rate": 9.16067358657841e-06, "loss": 0.4508, "step": 23730 }, { "epoch": 0.5268531980777128, "grad_norm": 1.0367882251739502, "learning_rate": 9.157199156731729e-06, "loss": 0.4348, "step": 23735 }, { "epoch": 0.5269641846372405, "grad_norm": 1.3061459064483643, "learning_rate": 9.153724829349944e-06, "loss": 0.5079, "step": 23740 }, { "epoch": 0.527075171196768, "grad_norm": 1.1266971826553345, "learning_rate": 9.150250604855454e-06, "loss": 0.4737, "step": 23745 }, { "epoch": 0.5271861577562957, "grad_norm": 1.4855886697769165, "learning_rate": 9.146776483670654e-06, "loss": 0.4979, "step": 23750 }, { "epoch": 0.5272971443158233, "grad_norm": 1.0671223402023315, "learning_rate": 9.1433024662179e-06, "loss": 0.4063, "step": 23755 }, { "epoch": 0.527408130875351, "grad_norm": 1.0044794082641602, "learning_rate": 9.139828552919566e-06, "loss": 0.3773, "step": 23760 }, { "epoch": 0.5275191174348787, "grad_norm": 1.251092791557312, "learning_rate": 9.136354744197984e-06, "loss": 0.4365, "step": 23765 }, { "epoch": 0.5276301039944062, "grad_norm": 1.231126308441162, "learning_rate": 9.132881040475498e-06, "loss": 0.2866, "step": 23770 }, { "epoch": 0.5277410905539339, "grad_norm": 1.3552109003067017, "learning_rate": 9.129407442174431e-06, "loss": 0.3533, "step": 23775 }, { "epoch": 0.5278520771134616, "grad_norm": 1.3020673990249634, "learning_rate": 9.125933949717083e-06, "loss": 0.4461, "step": 23780 }, { "epoch": 0.5279630636729892, "grad_norm": 1.9571168422698975, "learning_rate": 9.12246056352576e-06, "loss": 0.4935, "step": 23785 }, { "epoch": 0.5280740502325169, "grad_norm": 1.401061773300171, "learning_rate": 9.118987284022732e-06, "loss": 0.2573, "step": 23790 }, { "epoch": 0.5281850367920445, "grad_norm": 1.519338846206665, "learning_rate": 9.115514111630282e-06, "loss": 0.3944, "step": 23795 }, { "epoch": 0.5282960233515721, "grad_norm": 1.5136542320251465, "learning_rate": 9.112041046770653e-06, "loss": 0.3774, "step": 23800 }, { "epoch": 0.5284070099110998, "grad_norm": 1.6715372800827026, "learning_rate": 9.108568089866096e-06, "loss": 0.3998, "step": 23805 }, { "epoch": 0.5285179964706274, "grad_norm": 1.1824544668197632, "learning_rate": 9.105095241338846e-06, "loss": 0.4635, "step": 23810 }, { "epoch": 0.528628983030155, "grad_norm": 1.4660857915878296, "learning_rate": 9.101622501611112e-06, "loss": 0.4204, "step": 23815 }, { "epoch": 0.5287399695896827, "grad_norm": 2.133476734161377, "learning_rate": 9.098149871105104e-06, "loss": 0.2596, "step": 23820 }, { "epoch": 0.5288509561492103, "grad_norm": 1.5634102821350098, "learning_rate": 9.09467735024301e-06, "loss": 0.4877, "step": 23825 }, { "epoch": 0.528961942708738, "grad_norm": 1.0197075605392456, "learning_rate": 9.091204939447003e-06, "loss": 0.5129, "step": 23830 }, { "epoch": 0.5290729292682657, "grad_norm": 0.8712895512580872, "learning_rate": 9.087732639139256e-06, "loss": 0.4002, "step": 23835 }, { "epoch": 0.5291839158277932, "grad_norm": 1.3366756439208984, "learning_rate": 9.084260449741912e-06, "loss": 0.4197, "step": 23840 }, { "epoch": 0.5292949023873209, "grad_norm": 1.2106136083602905, "learning_rate": 9.080788371677115e-06, "loss": 0.3646, "step": 23845 }, { "epoch": 0.5294058889468486, "grad_norm": 1.1375362873077393, "learning_rate": 9.07731640536698e-06, "loss": 0.5453, "step": 23850 }, { "epoch": 0.5295168755063762, "grad_norm": 1.8555535078048706, "learning_rate": 9.073844551233624e-06, "loss": 0.3916, "step": 23855 }, { "epoch": 0.5296278620659038, "grad_norm": 0.942084550857544, "learning_rate": 9.070372809699146e-06, "loss": 0.4782, "step": 23860 }, { "epoch": 0.5297388486254314, "grad_norm": 0.90847247838974, "learning_rate": 9.066901181185618e-06, "loss": 0.3912, "step": 23865 }, { "epoch": 0.5298498351849591, "grad_norm": 1.3108025789260864, "learning_rate": 9.06342966611512e-06, "loss": 0.4641, "step": 23870 }, { "epoch": 0.5299608217444868, "grad_norm": 1.1245001554489136, "learning_rate": 9.059958264909698e-06, "loss": 0.3802, "step": 23875 }, { "epoch": 0.5300718083040143, "grad_norm": 1.2725918292999268, "learning_rate": 9.0564869779914e-06, "loss": 0.4285, "step": 23880 }, { "epoch": 0.530182794863542, "grad_norm": 1.2415249347686768, "learning_rate": 9.053015805782249e-06, "loss": 0.385, "step": 23885 }, { "epoch": 0.5302937814230697, "grad_norm": 1.4148523807525635, "learning_rate": 9.04954474870426e-06, "loss": 0.4127, "step": 23890 }, { "epoch": 0.5304047679825973, "grad_norm": 0.6649506092071533, "learning_rate": 9.046073807179436e-06, "loss": 0.3126, "step": 23895 }, { "epoch": 0.530515754542125, "grad_norm": 1.101078748703003, "learning_rate": 9.042602981629756e-06, "loss": 0.2615, "step": 23900 }, { "epoch": 0.5306267411016526, "grad_norm": 1.0729953050613403, "learning_rate": 9.0391322724772e-06, "loss": 0.4776, "step": 23905 }, { "epoch": 0.5307377276611802, "grad_norm": 1.1954920291900635, "learning_rate": 9.035661680143713e-06, "loss": 0.3606, "step": 23910 }, { "epoch": 0.5308487142207079, "grad_norm": 1.2097233533859253, "learning_rate": 9.032191205051245e-06, "loss": 0.52, "step": 23915 }, { "epoch": 0.5309597007802355, "grad_norm": 1.2129782438278198, "learning_rate": 9.028720847621732e-06, "loss": 0.5183, "step": 23920 }, { "epoch": 0.5310706873397631, "grad_norm": 0.9085155725479126, "learning_rate": 9.025250608277075e-06, "loss": 0.4627, "step": 23925 }, { "epoch": 0.5311816738992908, "grad_norm": 0.8985617756843567, "learning_rate": 9.021780487439183e-06, "loss": 0.4049, "step": 23930 }, { "epoch": 0.5312926604588184, "grad_norm": 1.4433565139770508, "learning_rate": 9.018310485529935e-06, "loss": 0.5358, "step": 23935 }, { "epoch": 0.5314036470183461, "grad_norm": 0.9419113397598267, "learning_rate": 9.014840602971208e-06, "loss": 0.3272, "step": 23940 }, { "epoch": 0.5315146335778738, "grad_norm": 1.5108088254928589, "learning_rate": 9.011370840184855e-06, "loss": 0.4735, "step": 23945 }, { "epoch": 0.5316256201374013, "grad_norm": 1.1959902048110962, "learning_rate": 9.007901197592722e-06, "loss": 0.4795, "step": 23950 }, { "epoch": 0.531736606696929, "grad_norm": 1.501643419265747, "learning_rate": 9.004431675616634e-06, "loss": 0.4664, "step": 23955 }, { "epoch": 0.5318475932564567, "grad_norm": 0.9131843447685242, "learning_rate": 9.000962274678402e-06, "loss": 0.5461, "step": 23960 }, { "epoch": 0.5319585798159843, "grad_norm": 1.1181492805480957, "learning_rate": 8.997492995199828e-06, "loss": 0.4563, "step": 23965 }, { "epoch": 0.532069566375512, "grad_norm": 1.3199212551116943, "learning_rate": 8.994023837602694e-06, "loss": 0.3744, "step": 23970 }, { "epoch": 0.5321805529350395, "grad_norm": 1.2438446283340454, "learning_rate": 8.990554802308765e-06, "loss": 0.5131, "step": 23975 }, { "epoch": 0.5322915394945672, "grad_norm": 1.3528581857681274, "learning_rate": 8.987085889739801e-06, "loss": 0.3848, "step": 23980 }, { "epoch": 0.5324025260540949, "grad_norm": 0.8180687427520752, "learning_rate": 8.983617100317534e-06, "loss": 0.3982, "step": 23985 }, { "epoch": 0.5325135126136225, "grad_norm": 0.9565178751945496, "learning_rate": 8.980148434463695e-06, "loss": 0.4377, "step": 23990 }, { "epoch": 0.5326244991731501, "grad_norm": 1.3885096311569214, "learning_rate": 8.976679892599987e-06, "loss": 0.3927, "step": 23995 }, { "epoch": 0.5327354857326778, "grad_norm": 1.039081335067749, "learning_rate": 8.973211475148103e-06, "loss": 0.4103, "step": 24000 }, { "epoch": 0.5328464722922054, "grad_norm": 1.3395166397094727, "learning_rate": 8.96974318252973e-06, "loss": 0.3978, "step": 24005 }, { "epoch": 0.5329574588517331, "grad_norm": 1.1190773248672485, "learning_rate": 8.96627501516652e-06, "loss": 0.42, "step": 24010 }, { "epoch": 0.5330684454112607, "grad_norm": 1.1331000328063965, "learning_rate": 8.962806973480133e-06, "loss": 0.477, "step": 24015 }, { "epoch": 0.5331794319707883, "grad_norm": 1.137101650238037, "learning_rate": 8.95933905789219e-06, "loss": 0.4234, "step": 24020 }, { "epoch": 0.533290418530316, "grad_norm": 0.9587161540985107, "learning_rate": 8.955871268824316e-06, "loss": 0.3629, "step": 24025 }, { "epoch": 0.5334014050898436, "grad_norm": 1.3958914279937744, "learning_rate": 8.952403606698113e-06, "loss": 0.3726, "step": 24030 }, { "epoch": 0.5335123916493713, "grad_norm": 0.9416199326515198, "learning_rate": 8.948936071935164e-06, "loss": 0.4429, "step": 24035 }, { "epoch": 0.5336233782088989, "grad_norm": 1.9556666612625122, "learning_rate": 8.945468664957046e-06, "loss": 0.4545, "step": 24040 }, { "epoch": 0.5337343647684265, "grad_norm": 1.3999742269515991, "learning_rate": 8.942001386185307e-06, "loss": 0.5151, "step": 24045 }, { "epoch": 0.5338453513279542, "grad_norm": 1.2332944869995117, "learning_rate": 8.938534236041495e-06, "loss": 0.3034, "step": 24050 }, { "epoch": 0.5339563378874819, "grad_norm": 2.9347760677337646, "learning_rate": 8.935067214947126e-06, "loss": 0.3782, "step": 24055 }, { "epoch": 0.5340673244470094, "grad_norm": 1.1664717197418213, "learning_rate": 8.931600323323717e-06, "loss": 0.4993, "step": 24060 }, { "epoch": 0.5341783110065371, "grad_norm": 1.102626919746399, "learning_rate": 8.928133561592756e-06, "loss": 0.4186, "step": 24065 }, { "epoch": 0.5342892975660648, "grad_norm": 1.6264910697937012, "learning_rate": 8.924666930175722e-06, "loss": 0.332, "step": 24070 }, { "epoch": 0.5344002841255924, "grad_norm": 1.000712513923645, "learning_rate": 8.921200429494079e-06, "loss": 0.488, "step": 24075 }, { "epoch": 0.53451127068512, "grad_norm": 0.8100029826164246, "learning_rate": 8.917734059969266e-06, "loss": 0.3343, "step": 24080 }, { "epoch": 0.5346222572446476, "grad_norm": 0.926163911819458, "learning_rate": 8.914267822022716e-06, "loss": 0.2985, "step": 24085 }, { "epoch": 0.5347332438041753, "grad_norm": 1.284947395324707, "learning_rate": 8.910801716075847e-06, "loss": 0.3884, "step": 24090 }, { "epoch": 0.534844230363703, "grad_norm": 0.9958680272102356, "learning_rate": 8.90733574255005e-06, "loss": 0.5513, "step": 24095 }, { "epoch": 0.5349552169232306, "grad_norm": 1.1211278438568115, "learning_rate": 8.90386990186671e-06, "loss": 0.3841, "step": 24100 }, { "epoch": 0.5350662034827582, "grad_norm": 1.7093493938446045, "learning_rate": 8.900404194447189e-06, "loss": 0.5569, "step": 24105 }, { "epoch": 0.5351771900422859, "grad_norm": 0.7877238988876343, "learning_rate": 8.896938620712837e-06, "loss": 0.3767, "step": 24110 }, { "epoch": 0.5352881766018135, "grad_norm": 0.9894497394561768, "learning_rate": 8.893473181084993e-06, "loss": 0.5499, "step": 24115 }, { "epoch": 0.5353991631613412, "grad_norm": 1.3464109897613525, "learning_rate": 8.890007875984966e-06, "loss": 0.2911, "step": 24120 }, { "epoch": 0.5355101497208689, "grad_norm": 0.7942631244659424, "learning_rate": 8.88654270583406e-06, "loss": 0.5159, "step": 24125 }, { "epoch": 0.5356211362803964, "grad_norm": 1.2688034772872925, "learning_rate": 8.883077671053557e-06, "loss": 0.4691, "step": 24130 }, { "epoch": 0.5357321228399241, "grad_norm": 1.5835150480270386, "learning_rate": 8.879612772064726e-06, "loss": 0.364, "step": 24135 }, { "epoch": 0.5358431093994517, "grad_norm": 1.22607421875, "learning_rate": 8.876148009288813e-06, "loss": 0.47, "step": 24140 }, { "epoch": 0.5359540959589794, "grad_norm": 1.0492281913757324, "learning_rate": 8.872683383147055e-06, "loss": 0.3973, "step": 24145 }, { "epoch": 0.536065082518507, "grad_norm": 1.0997753143310547, "learning_rate": 8.869218894060678e-06, "loss": 0.3275, "step": 24150 }, { "epoch": 0.5361760690780346, "grad_norm": 1.5366569757461548, "learning_rate": 8.865754542450868e-06, "loss": 0.3678, "step": 24155 }, { "epoch": 0.5362870556375623, "grad_norm": 1.67007315158844, "learning_rate": 8.862290328738822e-06, "loss": 0.3455, "step": 24160 }, { "epoch": 0.53639804219709, "grad_norm": 1.1318846940994263, "learning_rate": 8.8588262533457e-06, "loss": 0.4769, "step": 24165 }, { "epoch": 0.5365090287566175, "grad_norm": 0.8410813808441162, "learning_rate": 8.855362316692654e-06, "loss": 0.36, "step": 24170 }, { "epoch": 0.5366200153161452, "grad_norm": 1.8174649477005005, "learning_rate": 8.85189851920082e-06, "loss": 0.395, "step": 24175 }, { "epoch": 0.5367310018756729, "grad_norm": 0.8335534930229187, "learning_rate": 8.848434861291313e-06, "loss": 0.4751, "step": 24180 }, { "epoch": 0.5368419884352005, "grad_norm": 1.2102363109588623, "learning_rate": 8.844971343385237e-06, "loss": 0.3887, "step": 24185 }, { "epoch": 0.5369529749947282, "grad_norm": 1.066596269607544, "learning_rate": 8.841507965903666e-06, "loss": 0.3254, "step": 24190 }, { "epoch": 0.5370639615542557, "grad_norm": 0.9416897892951965, "learning_rate": 8.838044729267674e-06, "loss": 0.4878, "step": 24195 }, { "epoch": 0.5371749481137834, "grad_norm": 0.8740838170051575, "learning_rate": 8.834581633898307e-06, "loss": 0.2981, "step": 24200 }, { "epoch": 0.5372859346733111, "grad_norm": 1.625891089439392, "learning_rate": 8.831118680216597e-06, "loss": 0.5089, "step": 24205 }, { "epoch": 0.5373969212328387, "grad_norm": 0.9911015033721924, "learning_rate": 8.827655868643557e-06, "loss": 0.5279, "step": 24210 }, { "epoch": 0.5375079077923663, "grad_norm": 1.2692562341690063, "learning_rate": 8.824193199600184e-06, "loss": 0.4197, "step": 24215 }, { "epoch": 0.537618894351894, "grad_norm": 1.1671028137207031, "learning_rate": 8.820730673507459e-06, "loss": 0.4664, "step": 24220 }, { "epoch": 0.5377298809114216, "grad_norm": 1.1915336847305298, "learning_rate": 8.817268290786343e-06, "loss": 0.3918, "step": 24225 }, { "epoch": 0.5378408674709493, "grad_norm": 1.9181326627731323, "learning_rate": 8.813806051857781e-06, "loss": 0.4549, "step": 24230 }, { "epoch": 0.537951854030477, "grad_norm": 1.3655357360839844, "learning_rate": 8.810343957142706e-06, "loss": 0.4827, "step": 24235 }, { "epoch": 0.5380628405900045, "grad_norm": 0.8995305299758911, "learning_rate": 8.806882007062016e-06, "loss": 0.4682, "step": 24240 }, { "epoch": 0.5381738271495322, "grad_norm": 1.4401637315750122, "learning_rate": 8.803420202036617e-06, "loss": 0.4811, "step": 24245 }, { "epoch": 0.5382848137090598, "grad_norm": 1.2994670867919922, "learning_rate": 8.799958542487371e-06, "loss": 0.4077, "step": 24250 }, { "epoch": 0.5383958002685875, "grad_norm": 1.2455394268035889, "learning_rate": 8.79649702883514e-06, "loss": 0.3975, "step": 24255 }, { "epoch": 0.5385067868281151, "grad_norm": 1.3992613554000854, "learning_rate": 8.793035661500771e-06, "loss": 0.3986, "step": 24260 }, { "epoch": 0.5386177733876427, "grad_norm": 1.7506078481674194, "learning_rate": 8.789574440905073e-06, "loss": 0.441, "step": 24265 }, { "epoch": 0.5387287599471704, "grad_norm": 0.9146484732627869, "learning_rate": 8.786113367468859e-06, "loss": 0.3456, "step": 24270 }, { "epoch": 0.5388397465066981, "grad_norm": 1.160560965538025, "learning_rate": 8.782652441612906e-06, "loss": 0.3486, "step": 24275 }, { "epoch": 0.5389507330662257, "grad_norm": 1.0188573598861694, "learning_rate": 8.779191663757987e-06, "loss": 0.3588, "step": 24280 }, { "epoch": 0.5390617196257533, "grad_norm": 0.8397150635719299, "learning_rate": 8.775731034324854e-06, "loss": 0.5183, "step": 24285 }, { "epoch": 0.539172706185281, "grad_norm": 1.5384353399276733, "learning_rate": 8.772270553734234e-06, "loss": 0.3716, "step": 24290 }, { "epoch": 0.5392836927448086, "grad_norm": 0.579929769039154, "learning_rate": 8.768810222406846e-06, "loss": 0.4645, "step": 24295 }, { "epoch": 0.5393946793043363, "grad_norm": 1.4148974418640137, "learning_rate": 8.765350040763377e-06, "loss": 0.471, "step": 24300 }, { "epoch": 0.5395056658638638, "grad_norm": 0.8738304972648621, "learning_rate": 8.761890009224515e-06, "loss": 0.4906, "step": 24305 }, { "epoch": 0.5396166524233915, "grad_norm": 1.0829598903656006, "learning_rate": 8.758430128210908e-06, "loss": 0.3976, "step": 24310 }, { "epoch": 0.5397276389829192, "grad_norm": 1.7203539609909058, "learning_rate": 8.754970398143205e-06, "loss": 0.5641, "step": 24315 }, { "epoch": 0.5398386255424468, "grad_norm": 1.3489069938659668, "learning_rate": 8.751510819442025e-06, "loss": 0.4008, "step": 24320 }, { "epoch": 0.5399496121019745, "grad_norm": 1.8932770490646362, "learning_rate": 8.748051392527973e-06, "loss": 0.6124, "step": 24325 }, { "epoch": 0.5400605986615021, "grad_norm": 1.372402310371399, "learning_rate": 8.744592117821633e-06, "loss": 0.4919, "step": 24330 }, { "epoch": 0.5401715852210297, "grad_norm": 0.9137066602706909, "learning_rate": 8.741132995743573e-06, "loss": 0.3805, "step": 24335 }, { "epoch": 0.5402825717805574, "grad_norm": 0.8052771091461182, "learning_rate": 8.737674026714342e-06, "loss": 0.4316, "step": 24340 }, { "epoch": 0.5403935583400851, "grad_norm": 1.2825324535369873, "learning_rate": 8.734215211154471e-06, "loss": 0.2739, "step": 24345 }, { "epoch": 0.5405045448996126, "grad_norm": 1.194736361503601, "learning_rate": 8.730756549484465e-06, "loss": 0.3092, "step": 24350 }, { "epoch": 0.5406155314591403, "grad_norm": 1.0905753374099731, "learning_rate": 8.727298042124827e-06, "loss": 0.4223, "step": 24355 }, { "epoch": 0.5407265180186679, "grad_norm": 1.0970087051391602, "learning_rate": 8.723839689496018e-06, "loss": 0.5198, "step": 24360 }, { "epoch": 0.5408375045781956, "grad_norm": 1.4189403057098389, "learning_rate": 8.720381492018501e-06, "loss": 0.4348, "step": 24365 }, { "epoch": 0.5409484911377233, "grad_norm": 1.2112865447998047, "learning_rate": 8.716923450112716e-06, "loss": 0.4805, "step": 24370 }, { "epoch": 0.5410594776972508, "grad_norm": 0.8020737767219543, "learning_rate": 8.713465564199071e-06, "loss": 0.504, "step": 24375 }, { "epoch": 0.5411704642567785, "grad_norm": 1.3366249799728394, "learning_rate": 8.71000783469797e-06, "loss": 0.3047, "step": 24380 }, { "epoch": 0.5412814508163062, "grad_norm": 1.6268224716186523, "learning_rate": 8.706550262029787e-06, "loss": 0.5713, "step": 24385 }, { "epoch": 0.5413924373758338, "grad_norm": 1.1691818237304688, "learning_rate": 8.703092846614889e-06, "loss": 0.4409, "step": 24390 }, { "epoch": 0.5415034239353614, "grad_norm": 1.7712862491607666, "learning_rate": 8.69963558887361e-06, "loss": 0.4349, "step": 24395 }, { "epoch": 0.5416144104948891, "grad_norm": 1.100163221359253, "learning_rate": 8.696178489226274e-06, "loss": 0.6371, "step": 24400 }, { "epoch": 0.5417253970544167, "grad_norm": 1.5967742204666138, "learning_rate": 8.692721548093188e-06, "loss": 0.3944, "step": 24405 }, { "epoch": 0.5418363836139444, "grad_norm": 1.168125867843628, "learning_rate": 8.68926476589463e-06, "loss": 0.5811, "step": 24410 }, { "epoch": 0.5419473701734719, "grad_norm": 1.3047908544540405, "learning_rate": 8.685808143050869e-06, "loss": 0.4678, "step": 24415 }, { "epoch": 0.5420583567329996, "grad_norm": 1.6398917436599731, "learning_rate": 8.682351679982142e-06, "loss": 0.4024, "step": 24420 }, { "epoch": 0.5421693432925273, "grad_norm": 1.2872521877288818, "learning_rate": 8.67889537710868e-06, "loss": 0.4321, "step": 24425 }, { "epoch": 0.5422803298520549, "grad_norm": 1.3213415145874023, "learning_rate": 8.67543923485069e-06, "loss": 0.5972, "step": 24430 }, { "epoch": 0.5423913164115826, "grad_norm": 1.914083480834961, "learning_rate": 8.671983253628354e-06, "loss": 0.5109, "step": 24435 }, { "epoch": 0.5425023029711102, "grad_norm": 1.3763399124145508, "learning_rate": 8.66852743386184e-06, "loss": 0.3901, "step": 24440 }, { "epoch": 0.5426132895306378, "grad_norm": 1.1899441480636597, "learning_rate": 8.665071775971296e-06, "loss": 0.4049, "step": 24445 }, { "epoch": 0.5427242760901655, "grad_norm": 1.1333017349243164, "learning_rate": 8.661616280376846e-06, "loss": 0.555, "step": 24450 }, { "epoch": 0.5428352626496932, "grad_norm": 1.456155776977539, "learning_rate": 8.658160947498604e-06, "loss": 0.3794, "step": 24455 }, { "epoch": 0.5429462492092207, "grad_norm": 1.4425901174545288, "learning_rate": 8.654705777756649e-06, "loss": 0.5578, "step": 24460 }, { "epoch": 0.5430572357687484, "grad_norm": 1.1383377313613892, "learning_rate": 8.65125077157106e-06, "loss": 0.456, "step": 24465 }, { "epoch": 0.543168222328276, "grad_norm": 1.1584062576293945, "learning_rate": 8.647795929361873e-06, "loss": 0.3621, "step": 24470 }, { "epoch": 0.5432792088878037, "grad_norm": 1.4009616374969482, "learning_rate": 8.644341251549126e-06, "loss": 0.4397, "step": 24475 }, { "epoch": 0.5433901954473314, "grad_norm": 1.229703426361084, "learning_rate": 8.64088673855282e-06, "loss": 0.6272, "step": 24480 }, { "epoch": 0.5435011820068589, "grad_norm": 1.0358545780181885, "learning_rate": 8.637432390792945e-06, "loss": 0.4596, "step": 24485 }, { "epoch": 0.5436121685663866, "grad_norm": 0.9032576084136963, "learning_rate": 8.633978208689476e-06, "loss": 0.4377, "step": 24490 }, { "epoch": 0.5437231551259143, "grad_norm": 1.1318433284759521, "learning_rate": 8.630524192662349e-06, "loss": 0.3985, "step": 24495 }, { "epoch": 0.5438341416854419, "grad_norm": 1.161857008934021, "learning_rate": 8.627070343131502e-06, "loss": 0.4137, "step": 24500 }, { "epoch": 0.5439451282449695, "grad_norm": 1.4461456537246704, "learning_rate": 8.623616660516836e-06, "loss": 0.3999, "step": 24505 }, { "epoch": 0.5440561148044972, "grad_norm": 1.6744439601898193, "learning_rate": 8.620163145238239e-06, "loss": 0.5026, "step": 24510 }, { "epoch": 0.5441671013640248, "grad_norm": 1.600347638130188, "learning_rate": 8.616709797715585e-06, "loss": 0.5446, "step": 24515 }, { "epoch": 0.5442780879235525, "grad_norm": 1.0115714073181152, "learning_rate": 8.613256618368708e-06, "loss": 0.3249, "step": 24520 }, { "epoch": 0.54438907448308, "grad_norm": 1.2178457975387573, "learning_rate": 8.609803607617447e-06, "loss": 0.5456, "step": 24525 }, { "epoch": 0.5445000610426077, "grad_norm": 1.0085428953170776, "learning_rate": 8.606350765881595e-06, "loss": 0.5988, "step": 24530 }, { "epoch": 0.5446110476021354, "grad_norm": 1.0458776950836182, "learning_rate": 8.602898093580943e-06, "loss": 0.4891, "step": 24535 }, { "epoch": 0.544722034161663, "grad_norm": 1.2219630479812622, "learning_rate": 8.599445591135262e-06, "loss": 0.4179, "step": 24540 }, { "epoch": 0.5448330207211907, "grad_norm": 1.1068999767303467, "learning_rate": 8.595993258964282e-06, "loss": 0.3851, "step": 24545 }, { "epoch": 0.5449440072807183, "grad_norm": 1.4395174980163574, "learning_rate": 8.592541097487737e-06, "loss": 0.3795, "step": 24550 }, { "epoch": 0.5450549938402459, "grad_norm": 0.8915678858757019, "learning_rate": 8.589089107125323e-06, "loss": 0.4981, "step": 24555 }, { "epoch": 0.5451659803997736, "grad_norm": 1.1956037282943726, "learning_rate": 8.585637288296724e-06, "loss": 0.549, "step": 24560 }, { "epoch": 0.5452769669593013, "grad_norm": 1.6773871183395386, "learning_rate": 8.582185641421601e-06, "loss": 0.5066, "step": 24565 }, { "epoch": 0.5453879535188288, "grad_norm": 1.8565099239349365, "learning_rate": 8.578734166919592e-06, "loss": 0.438, "step": 24570 }, { "epoch": 0.5454989400783565, "grad_norm": 1.4970033168792725, "learning_rate": 8.575282865210316e-06, "loss": 0.4107, "step": 24575 }, { "epoch": 0.5456099266378841, "grad_norm": 0.9346946477890015, "learning_rate": 8.571831736713373e-06, "loss": 0.3067, "step": 24580 }, { "epoch": 0.5457209131974118, "grad_norm": 1.2216112613677979, "learning_rate": 8.568380781848337e-06, "loss": 0.3981, "step": 24585 }, { "epoch": 0.5458318997569395, "grad_norm": 0.9755334854125977, "learning_rate": 8.564930001034763e-06, "loss": 0.2897, "step": 24590 }, { "epoch": 0.545942886316467, "grad_norm": 1.269582748413086, "learning_rate": 8.561479394692187e-06, "loss": 0.4555, "step": 24595 }, { "epoch": 0.5460538728759947, "grad_norm": 1.3922683000564575, "learning_rate": 8.558028963240123e-06, "loss": 0.4091, "step": 24600 }, { "epoch": 0.5461648594355224, "grad_norm": 1.26840078830719, "learning_rate": 8.55457870709806e-06, "loss": 0.5134, "step": 24605 }, { "epoch": 0.54627584599505, "grad_norm": 1.013315200805664, "learning_rate": 8.551128626685473e-06, "loss": 0.2049, "step": 24610 }, { "epoch": 0.5463868325545777, "grad_norm": 1.287469506263733, "learning_rate": 8.547678722421805e-06, "loss": 0.3325, "step": 24615 }, { "epoch": 0.5464978191141053, "grad_norm": 1.38962984085083, "learning_rate": 8.544228994726486e-06, "loss": 0.4033, "step": 24620 }, { "epoch": 0.5466088056736329, "grad_norm": 1.2414928674697876, "learning_rate": 8.540779444018929e-06, "loss": 0.4281, "step": 24625 }, { "epoch": 0.5467197922331606, "grad_norm": 1.0304001569747925, "learning_rate": 8.537330070718509e-06, "loss": 0.5581, "step": 24630 }, { "epoch": 0.5468307787926882, "grad_norm": 0.9019938111305237, "learning_rate": 8.533880875244596e-06, "loss": 0.4139, "step": 24635 }, { "epoch": 0.5469417653522158, "grad_norm": 1.1788930892944336, "learning_rate": 8.530431858016527e-06, "loss": 0.5491, "step": 24640 }, { "epoch": 0.5470527519117435, "grad_norm": 1.1308344602584839, "learning_rate": 8.526983019453624e-06, "loss": 0.4609, "step": 24645 }, { "epoch": 0.5471637384712711, "grad_norm": 0.87598717212677, "learning_rate": 8.52353435997519e-06, "loss": 0.3522, "step": 24650 }, { "epoch": 0.5472747250307988, "grad_norm": 1.2333834171295166, "learning_rate": 8.52008588000049e-06, "loss": 0.5089, "step": 24655 }, { "epoch": 0.5473857115903265, "grad_norm": 2.247903347015381, "learning_rate": 8.516637579948792e-06, "loss": 0.3212, "step": 24660 }, { "epoch": 0.547496698149854, "grad_norm": 1.563804030418396, "learning_rate": 8.513189460239318e-06, "loss": 0.4435, "step": 24665 }, { "epoch": 0.5476076847093817, "grad_norm": 1.2471294403076172, "learning_rate": 8.509741521291287e-06, "loss": 0.2912, "step": 24670 }, { "epoch": 0.5477186712689094, "grad_norm": 1.4435144662857056, "learning_rate": 8.506293763523879e-06, "loss": 0.4713, "step": 24675 }, { "epoch": 0.547829657828437, "grad_norm": 0.9990353584289551, "learning_rate": 8.502846187356268e-06, "loss": 0.4863, "step": 24680 }, { "epoch": 0.5479406443879646, "grad_norm": 1.8638103008270264, "learning_rate": 8.499398793207596e-06, "loss": 0.532, "step": 24685 }, { "epoch": 0.5480516309474922, "grad_norm": 1.349532961845398, "learning_rate": 8.495951581496989e-06, "loss": 0.4164, "step": 24690 }, { "epoch": 0.5481626175070199, "grad_norm": 1.3890430927276611, "learning_rate": 8.492504552643541e-06, "loss": 0.4659, "step": 24695 }, { "epoch": 0.5482736040665476, "grad_norm": 0.9236255288124084, "learning_rate": 8.489057707066335e-06, "loss": 0.3011, "step": 24700 }, { "epoch": 0.5483845906260751, "grad_norm": 0.7612236738204956, "learning_rate": 8.485611045184424e-06, "loss": 0.3232, "step": 24705 }, { "epoch": 0.5484955771856028, "grad_norm": 1.6489447355270386, "learning_rate": 8.482164567416848e-06, "loss": 0.5851, "step": 24710 }, { "epoch": 0.5486065637451305, "grad_norm": 1.068479299545288, "learning_rate": 8.478718274182608e-06, "loss": 0.4415, "step": 24715 }, { "epoch": 0.5487175503046581, "grad_norm": 1.324263334274292, "learning_rate": 8.475272165900703e-06, "loss": 0.3814, "step": 24720 }, { "epoch": 0.5488285368641858, "grad_norm": 1.062963604927063, "learning_rate": 8.47182624299009e-06, "loss": 0.5722, "step": 24725 }, { "epoch": 0.5489395234237134, "grad_norm": 1.2563762664794922, "learning_rate": 8.468380505869718e-06, "loss": 0.4247, "step": 24730 }, { "epoch": 0.549050509983241, "grad_norm": 1.1818538904190063, "learning_rate": 8.46493495495851e-06, "loss": 0.4213, "step": 24735 }, { "epoch": 0.5491614965427687, "grad_norm": 1.265612006187439, "learning_rate": 8.46148959067536e-06, "loss": 0.3626, "step": 24740 }, { "epoch": 0.5492724831022963, "grad_norm": 1.954348087310791, "learning_rate": 8.458044413439148e-06, "loss": 0.5429, "step": 24745 }, { "epoch": 0.5493834696618239, "grad_norm": 0.8584548234939575, "learning_rate": 8.454599423668722e-06, "loss": 0.3159, "step": 24750 }, { "epoch": 0.5494944562213516, "grad_norm": 1.0633316040039062, "learning_rate": 8.45115462178292e-06, "loss": 0.3613, "step": 24755 }, { "epoch": 0.5496054427808792, "grad_norm": 1.023334264755249, "learning_rate": 8.447710008200539e-06, "loss": 0.3953, "step": 24760 }, { "epoch": 0.5497164293404069, "grad_norm": 0.8943719267845154, "learning_rate": 8.444265583340368e-06, "loss": 0.5067, "step": 24765 }, { "epoch": 0.5498274158999346, "grad_norm": 0.8232437968254089, "learning_rate": 8.440821347621176e-06, "loss": 0.2908, "step": 24770 }, { "epoch": 0.5499384024594621, "grad_norm": 1.4632898569107056, "learning_rate": 8.437377301461691e-06, "loss": 0.4248, "step": 24775 }, { "epoch": 0.5500493890189898, "grad_norm": 0.9818194508552551, "learning_rate": 8.433933445280637e-06, "loss": 0.5157, "step": 24780 }, { "epoch": 0.5501603755785175, "grad_norm": 1.169661283493042, "learning_rate": 8.4304897794967e-06, "loss": 0.4002, "step": 24785 }, { "epoch": 0.5502713621380451, "grad_norm": 1.7490116357803345, "learning_rate": 8.42704630452855e-06, "loss": 0.4003, "step": 24790 }, { "epoch": 0.5503823486975727, "grad_norm": 1.6208126544952393, "learning_rate": 8.42360302079484e-06, "loss": 0.4971, "step": 24795 }, { "epoch": 0.5504933352571003, "grad_norm": 1.2586880922317505, "learning_rate": 8.420159928714183e-06, "loss": 0.3776, "step": 24800 }, { "epoch": 0.550604321816628, "grad_norm": 1.1551201343536377, "learning_rate": 8.416717028705188e-06, "loss": 0.459, "step": 24805 }, { "epoch": 0.5507153083761557, "grad_norm": 1.0400720834732056, "learning_rate": 8.413274321186423e-06, "loss": 0.4561, "step": 24810 }, { "epoch": 0.5508262949356832, "grad_norm": 1.4724727869033813, "learning_rate": 8.409831806576446e-06, "loss": 0.3669, "step": 24815 }, { "epoch": 0.5509372814952109, "grad_norm": 2.1651415824890137, "learning_rate": 8.406389485293786e-06, "loss": 0.4854, "step": 24820 }, { "epoch": 0.5510482680547386, "grad_norm": 1.3159157037734985, "learning_rate": 8.402947357756946e-06, "loss": 0.4823, "step": 24825 }, { "epoch": 0.5511592546142662, "grad_norm": 1.1210155487060547, "learning_rate": 8.399505424384415e-06, "loss": 0.443, "step": 24830 }, { "epoch": 0.5512702411737939, "grad_norm": 0.9298530220985413, "learning_rate": 8.39606368559464e-06, "loss": 0.3296, "step": 24835 }, { "epoch": 0.5513812277333215, "grad_norm": 1.265432357788086, "learning_rate": 8.392622141806068e-06, "loss": 0.4532, "step": 24840 }, { "epoch": 0.5514922142928491, "grad_norm": 0.821119487285614, "learning_rate": 8.389180793437101e-06, "loss": 0.3021, "step": 24845 }, { "epoch": 0.5516032008523768, "grad_norm": 0.9582390785217285, "learning_rate": 8.385739640906131e-06, "loss": 0.2717, "step": 24850 }, { "epoch": 0.5517141874119044, "grad_norm": 1.3956959247589111, "learning_rate": 8.382298684631528e-06, "loss": 0.3915, "step": 24855 }, { "epoch": 0.551825173971432, "grad_norm": 1.9195780754089355, "learning_rate": 8.378857925031616e-06, "loss": 0.4014, "step": 24860 }, { "epoch": 0.5519361605309597, "grad_norm": 1.3991817235946655, "learning_rate": 8.375417362524729e-06, "loss": 0.3306, "step": 24865 }, { "epoch": 0.5520471470904873, "grad_norm": 1.4770393371582031, "learning_rate": 8.371976997529145e-06, "loss": 0.552, "step": 24870 }, { "epoch": 0.552158133650015, "grad_norm": 1.588649034500122, "learning_rate": 8.36853683046314e-06, "loss": 0.565, "step": 24875 }, { "epoch": 0.5522691202095427, "grad_norm": 1.552659273147583, "learning_rate": 8.365096861744957e-06, "loss": 0.5495, "step": 24880 }, { "epoch": 0.5523801067690702, "grad_norm": 1.6279903650283813, "learning_rate": 8.361657091792812e-06, "loss": 0.5373, "step": 24885 }, { "epoch": 0.5524910933285979, "grad_norm": 0.727022647857666, "learning_rate": 8.358217521024906e-06, "loss": 0.4088, "step": 24890 }, { "epoch": 0.5526020798881256, "grad_norm": 0.8844607472419739, "learning_rate": 8.354778149859403e-06, "loss": 0.5545, "step": 24895 }, { "epoch": 0.5527130664476532, "grad_norm": 0.9243135452270508, "learning_rate": 8.351338978714458e-06, "loss": 0.3905, "step": 24900 }, { "epoch": 0.5528240530071808, "grad_norm": 0.919755756855011, "learning_rate": 8.347900008008194e-06, "loss": 0.564, "step": 24905 }, { "epoch": 0.5529350395667084, "grad_norm": 1.214043140411377, "learning_rate": 8.3444612381587e-06, "loss": 0.384, "step": 24910 }, { "epoch": 0.5530460261262361, "grad_norm": 1.4756897687911987, "learning_rate": 8.341022669584062e-06, "loss": 0.6571, "step": 24915 }, { "epoch": 0.5531570126857638, "grad_norm": 1.5681345462799072, "learning_rate": 8.33758430270232e-06, "loss": 0.379, "step": 24920 }, { "epoch": 0.5532679992452914, "grad_norm": 0.8127152323722839, "learning_rate": 8.334146137931507e-06, "loss": 0.5053, "step": 24925 }, { "epoch": 0.553378985804819, "grad_norm": 2.043302536010742, "learning_rate": 8.330708175689614e-06, "loss": 0.469, "step": 24930 }, { "epoch": 0.5534899723643467, "grad_norm": 1.2709745168685913, "learning_rate": 8.327270416394628e-06, "loss": 0.4525, "step": 24935 }, { "epoch": 0.5536009589238743, "grad_norm": 1.3546684980392456, "learning_rate": 8.323832860464492e-06, "loss": 0.3388, "step": 24940 }, { "epoch": 0.553711945483402, "grad_norm": 1.3516091108322144, "learning_rate": 8.320395508317137e-06, "loss": 0.4119, "step": 24945 }, { "epoch": 0.5538229320429296, "grad_norm": 1.1552678346633911, "learning_rate": 8.316958360370462e-06, "loss": 0.3053, "step": 24950 }, { "epoch": 0.5539339186024572, "grad_norm": 1.4363343715667725, "learning_rate": 8.313521417042347e-06, "loss": 0.4092, "step": 24955 }, { "epoch": 0.5540449051619849, "grad_norm": 1.314101219177246, "learning_rate": 8.310084678750638e-06, "loss": 0.3924, "step": 24960 }, { "epoch": 0.5541558917215125, "grad_norm": 1.3979650735855103, "learning_rate": 8.306648145913173e-06, "loss": 0.3895, "step": 24965 }, { "epoch": 0.5542668782810402, "grad_norm": 1.220910906791687, "learning_rate": 8.303211818947739e-06, "loss": 0.5069, "step": 24970 }, { "epoch": 0.5543778648405678, "grad_norm": 0.6868875622749329, "learning_rate": 8.299775698272128e-06, "loss": 0.3418, "step": 24975 }, { "epoch": 0.5544888514000954, "grad_norm": 1.1837313175201416, "learning_rate": 8.296339784304081e-06, "loss": 0.3593, "step": 24980 }, { "epoch": 0.5545998379596231, "grad_norm": 1.2916624546051025, "learning_rate": 8.292904077461332e-06, "loss": 0.2936, "step": 24985 }, { "epoch": 0.5547108245191508, "grad_norm": 0.7648731470108032, "learning_rate": 8.289468578161581e-06, "loss": 0.428, "step": 24990 }, { "epoch": 0.5548218110786783, "grad_norm": 1.3653825521469116, "learning_rate": 8.2860332868225e-06, "loss": 0.3027, "step": 24995 }, { "epoch": 0.554932797638206, "grad_norm": 1.024545669555664, "learning_rate": 8.282598203861749e-06, "loss": 0.4062, "step": 25000 }, { "epoch": 0.5550437841977337, "grad_norm": 1.0971325635910034, "learning_rate": 8.279163329696944e-06, "loss": 0.341, "step": 25005 }, { "epoch": 0.5551547707572613, "grad_norm": 1.2084906101226807, "learning_rate": 8.275728664745693e-06, "loss": 0.4603, "step": 25010 }, { "epoch": 0.555265757316789, "grad_norm": 1.4418290853500366, "learning_rate": 8.272294209425566e-06, "loss": 0.4499, "step": 25015 }, { "epoch": 0.5553767438763165, "grad_norm": 1.2812556028366089, "learning_rate": 8.268859964154113e-06, "loss": 0.4002, "step": 25020 }, { "epoch": 0.5554877304358442, "grad_norm": 1.1037013530731201, "learning_rate": 8.265425929348867e-06, "loss": 0.3524, "step": 25025 }, { "epoch": 0.5555987169953719, "grad_norm": 1.4665725231170654, "learning_rate": 8.261992105427312e-06, "loss": 0.4524, "step": 25030 }, { "epoch": 0.5557097035548995, "grad_norm": 1.751202940940857, "learning_rate": 8.258558492806932e-06, "loss": 0.527, "step": 25035 }, { "epoch": 0.5558206901144271, "grad_norm": 1.0393388271331787, "learning_rate": 8.255125091905167e-06, "loss": 0.2466, "step": 25040 }, { "epoch": 0.5559316766739548, "grad_norm": 1.357147455215454, "learning_rate": 8.251691903139445e-06, "loss": 0.577, "step": 25045 }, { "epoch": 0.5560426632334824, "grad_norm": 1.1873313188552856, "learning_rate": 8.248258926927157e-06, "loss": 0.5742, "step": 25050 }, { "epoch": 0.5561536497930101, "grad_norm": 1.418981671333313, "learning_rate": 8.244826163685672e-06, "loss": 0.3359, "step": 25055 }, { "epoch": 0.5562646363525378, "grad_norm": 0.8048796057701111, "learning_rate": 8.241393613832337e-06, "loss": 0.3413, "step": 25060 }, { "epoch": 0.5563756229120653, "grad_norm": 1.4124388694763184, "learning_rate": 8.237961277784468e-06, "loss": 0.4055, "step": 25065 }, { "epoch": 0.556486609471593, "grad_norm": 1.5513757467269897, "learning_rate": 8.234529155959357e-06, "loss": 0.508, "step": 25070 }, { "epoch": 0.5565975960311206, "grad_norm": 0.8069791793823242, "learning_rate": 8.231097248774273e-06, "loss": 0.353, "step": 25075 }, { "epoch": 0.5567085825906483, "grad_norm": 0.7790321707725525, "learning_rate": 8.22766555664645e-06, "loss": 0.3161, "step": 25080 }, { "epoch": 0.5568195691501759, "grad_norm": 1.2847238779067993, "learning_rate": 8.224234079993107e-06, "loss": 0.376, "step": 25085 }, { "epoch": 0.5569305557097035, "grad_norm": 1.4938470125198364, "learning_rate": 8.220802819231427e-06, "loss": 0.4056, "step": 25090 }, { "epoch": 0.5570415422692312, "grad_norm": 1.4761202335357666, "learning_rate": 8.217371774778575e-06, "loss": 0.6105, "step": 25095 }, { "epoch": 0.5571525288287589, "grad_norm": 1.031489372253418, "learning_rate": 8.213940947051682e-06, "loss": 0.4533, "step": 25100 }, { "epoch": 0.5572635153882864, "grad_norm": 1.3615245819091797, "learning_rate": 8.21051033646786e-06, "loss": 0.4889, "step": 25105 }, { "epoch": 0.5573745019478141, "grad_norm": 1.0796312093734741, "learning_rate": 8.207079943444191e-06, "loss": 0.3195, "step": 25110 }, { "epoch": 0.5574854885073418, "grad_norm": 1.623977541923523, "learning_rate": 8.203649768397725e-06, "loss": 0.4583, "step": 25115 }, { "epoch": 0.5575964750668694, "grad_norm": 1.6875170469284058, "learning_rate": 8.200219811745502e-06, "loss": 0.4174, "step": 25120 }, { "epoch": 0.5577074616263971, "grad_norm": 1.4233814477920532, "learning_rate": 8.196790073904514e-06, "loss": 0.548, "step": 25125 }, { "epoch": 0.5578184481859246, "grad_norm": 1.2150535583496094, "learning_rate": 8.193360555291741e-06, "loss": 0.6045, "step": 25130 }, { "epoch": 0.5579294347454523, "grad_norm": 1.5399670600891113, "learning_rate": 8.189931256324136e-06, "loss": 0.3197, "step": 25135 }, { "epoch": 0.55804042130498, "grad_norm": 1.1746127605438232, "learning_rate": 8.186502177418614e-06, "loss": 0.4447, "step": 25140 }, { "epoch": 0.5581514078645076, "grad_norm": 1.565848708152771, "learning_rate": 8.18307331899208e-06, "loss": 0.2881, "step": 25145 }, { "epoch": 0.5582623944240352, "grad_norm": 1.0995526313781738, "learning_rate": 8.179644681461395e-06, "loss": 0.6268, "step": 25150 }, { "epoch": 0.5583733809835629, "grad_norm": 1.115090012550354, "learning_rate": 8.176216265243404e-06, "loss": 0.4727, "step": 25155 }, { "epoch": 0.5584843675430905, "grad_norm": 1.7932703495025635, "learning_rate": 8.172788070754927e-06, "loss": 0.4323, "step": 25160 }, { "epoch": 0.5585953541026182, "grad_norm": 1.0676770210266113, "learning_rate": 8.169360098412744e-06, "loss": 0.4214, "step": 25165 }, { "epoch": 0.5587063406621459, "grad_norm": 1.1755608320236206, "learning_rate": 8.165932348633625e-06, "loss": 0.4027, "step": 25170 }, { "epoch": 0.5588173272216734, "grad_norm": 1.457756519317627, "learning_rate": 8.162504821834296e-06, "loss": 0.4976, "step": 25175 }, { "epoch": 0.5589283137812011, "grad_norm": 0.9333487749099731, "learning_rate": 8.159077518431474e-06, "loss": 0.3416, "step": 25180 }, { "epoch": 0.5590393003407287, "grad_norm": 1.0539964437484741, "learning_rate": 8.155650438841826e-06, "loss": 0.3759, "step": 25185 }, { "epoch": 0.5591502869002564, "grad_norm": 1.611405849456787, "learning_rate": 8.152223583482018e-06, "loss": 0.3181, "step": 25190 }, { "epoch": 0.559261273459784, "grad_norm": 1.2849960327148438, "learning_rate": 8.148796952768668e-06, "loss": 0.3716, "step": 25195 }, { "epoch": 0.5593722600193116, "grad_norm": 1.1448283195495605, "learning_rate": 8.145370547118374e-06, "loss": 0.3764, "step": 25200 }, { "epoch": 0.5594832465788393, "grad_norm": 1.0560680627822876, "learning_rate": 8.141944366947711e-06, "loss": 0.4771, "step": 25205 }, { "epoch": 0.559594233138367, "grad_norm": 0.9812813401222229, "learning_rate": 8.138518412673221e-06, "loss": 0.2162, "step": 25210 }, { "epoch": 0.5597052196978946, "grad_norm": 1.6056973934173584, "learning_rate": 8.135092684711417e-06, "loss": 0.4784, "step": 25215 }, { "epoch": 0.5598162062574222, "grad_norm": 0.9755131006240845, "learning_rate": 8.131667183478793e-06, "loss": 0.3506, "step": 25220 }, { "epoch": 0.5599271928169499, "grad_norm": 0.9014113545417786, "learning_rate": 8.128241909391803e-06, "loss": 0.3755, "step": 25225 }, { "epoch": 0.5600381793764775, "grad_norm": 1.5927777290344238, "learning_rate": 8.12481686286689e-06, "loss": 0.5215, "step": 25230 }, { "epoch": 0.5601491659360052, "grad_norm": 1.598618745803833, "learning_rate": 8.121392044320448e-06, "loss": 0.5198, "step": 25235 }, { "epoch": 0.5602601524955327, "grad_norm": 1.4176770448684692, "learning_rate": 8.117967454168862e-06, "loss": 0.4328, "step": 25240 }, { "epoch": 0.5603711390550604, "grad_norm": 1.2567905187606812, "learning_rate": 8.114543092828485e-06, "loss": 0.3619, "step": 25245 }, { "epoch": 0.5604821256145881, "grad_norm": 1.7040385007858276, "learning_rate": 8.11111896071563e-06, "loss": 0.3801, "step": 25250 }, { "epoch": 0.5605931121741157, "grad_norm": 1.7464607954025269, "learning_rate": 8.107695058246603e-06, "loss": 0.5361, "step": 25255 }, { "epoch": 0.5607040987336434, "grad_norm": 1.2700963020324707, "learning_rate": 8.104271385837658e-06, "loss": 0.4105, "step": 25260 }, { "epoch": 0.560815085293171, "grad_norm": 1.1427721977233887, "learning_rate": 8.100847943905047e-06, "loss": 0.3996, "step": 25265 }, { "epoch": 0.5609260718526986, "grad_norm": 1.2108415365219116, "learning_rate": 8.09742473286497e-06, "loss": 0.4433, "step": 25270 }, { "epoch": 0.5610370584122263, "grad_norm": 0.8741481900215149, "learning_rate": 8.09400175313361e-06, "loss": 0.5403, "step": 25275 }, { "epoch": 0.561148044971754, "grad_norm": 1.054548740386963, "learning_rate": 8.09057900512713e-06, "loss": 0.447, "step": 25280 }, { "epoch": 0.5612590315312815, "grad_norm": 1.8012455701828003, "learning_rate": 8.087156489261647e-06, "loss": 0.644, "step": 25285 }, { "epoch": 0.5613700180908092, "grad_norm": 1.0386770963668823, "learning_rate": 8.08373420595327e-06, "loss": 0.4729, "step": 25290 }, { "epoch": 0.5614810046503368, "grad_norm": 1.0843547582626343, "learning_rate": 8.080312155618053e-06, "loss": 0.389, "step": 25295 }, { "epoch": 0.5615919912098645, "grad_norm": 1.1872903108596802, "learning_rate": 8.07689033867205e-06, "loss": 0.3114, "step": 25300 }, { "epoch": 0.5617029777693922, "grad_norm": 1.244188666343689, "learning_rate": 8.073468755531269e-06, "loss": 0.4731, "step": 25305 }, { "epoch": 0.5618139643289197, "grad_norm": 1.497534990310669, "learning_rate": 8.070047406611696e-06, "loss": 0.3794, "step": 25310 }, { "epoch": 0.5619249508884474, "grad_norm": 0.9751390814781189, "learning_rate": 8.066626292329288e-06, "loss": 0.4799, "step": 25315 }, { "epoch": 0.5620359374479751, "grad_norm": 2.0736711025238037, "learning_rate": 8.063205413099969e-06, "loss": 0.3642, "step": 25320 }, { "epoch": 0.5621469240075027, "grad_norm": 0.9690294861793518, "learning_rate": 8.05978476933964e-06, "loss": 0.4201, "step": 25325 }, { "epoch": 0.5622579105670303, "grad_norm": 1.3464387655258179, "learning_rate": 8.056364361464176e-06, "loss": 0.3445, "step": 25330 }, { "epoch": 0.562368897126558, "grad_norm": 1.2715051174163818, "learning_rate": 8.052944189889409e-06, "loss": 0.3768, "step": 25335 }, { "epoch": 0.5624798836860856, "grad_norm": 1.1041648387908936, "learning_rate": 8.049524255031163e-06, "loss": 0.4955, "step": 25340 }, { "epoch": 0.5625908702456133, "grad_norm": 1.3899486064910889, "learning_rate": 8.046104557305212e-06, "loss": 0.3873, "step": 25345 }, { "epoch": 0.5627018568051408, "grad_norm": 1.2393910884857178, "learning_rate": 8.04268509712732e-06, "loss": 0.4267, "step": 25350 }, { "epoch": 0.5628128433646685, "grad_norm": 1.4444127082824707, "learning_rate": 8.039265874913206e-06, "loss": 0.3685, "step": 25355 }, { "epoch": 0.5629238299241962, "grad_norm": 1.5184959173202515, "learning_rate": 8.035846891078572e-06, "loss": 0.248, "step": 25360 }, { "epoch": 0.5630348164837238, "grad_norm": 1.041031002998352, "learning_rate": 8.032428146039088e-06, "loss": 0.3472, "step": 25365 }, { "epoch": 0.5631458030432515, "grad_norm": 1.2190139293670654, "learning_rate": 8.029009640210389e-06, "loss": 0.2602, "step": 25370 }, { "epoch": 0.5632567896027791, "grad_norm": 0.991255521774292, "learning_rate": 8.02559137400809e-06, "loss": 0.435, "step": 25375 }, { "epoch": 0.5633677761623067, "grad_norm": 2.327293634414673, "learning_rate": 8.022173347847766e-06, "loss": 0.3849, "step": 25380 }, { "epoch": 0.5634787627218344, "grad_norm": 1.307950735092163, "learning_rate": 8.018755562144975e-06, "loss": 0.4234, "step": 25385 }, { "epoch": 0.5635897492813621, "grad_norm": 0.7813902497291565, "learning_rate": 8.01533801731524e-06, "loss": 0.3237, "step": 25390 }, { "epoch": 0.5637007358408896, "grad_norm": 1.0847278833389282, "learning_rate": 8.011920713774051e-06, "loss": 0.3685, "step": 25395 }, { "epoch": 0.5638117224004173, "grad_norm": 0.9492262601852417, "learning_rate": 8.008503651936876e-06, "loss": 0.4609, "step": 25400 }, { "epoch": 0.5639227089599449, "grad_norm": 1.9693423509597778, "learning_rate": 8.005086832219145e-06, "loss": 0.4801, "step": 25405 }, { "epoch": 0.5640336955194726, "grad_norm": 1.6223005056381226, "learning_rate": 8.001670255036268e-06, "loss": 0.3899, "step": 25410 }, { "epoch": 0.5641446820790003, "grad_norm": 1.0886939764022827, "learning_rate": 7.99825392080362e-06, "loss": 0.4616, "step": 25415 }, { "epoch": 0.5642556686385278, "grad_norm": 1.0888535976409912, "learning_rate": 7.994837829936549e-06, "loss": 0.4002, "step": 25420 }, { "epoch": 0.5643666551980555, "grad_norm": 1.322079062461853, "learning_rate": 7.991421982850367e-06, "loss": 0.4154, "step": 25425 }, { "epoch": 0.5644776417575832, "grad_norm": 1.256758451461792, "learning_rate": 7.988006379960366e-06, "loss": 0.533, "step": 25430 }, { "epoch": 0.5645886283171108, "grad_norm": 1.1814241409301758, "learning_rate": 7.9845910216818e-06, "loss": 0.574, "step": 25435 }, { "epoch": 0.5646996148766384, "grad_norm": 1.4561266899108887, "learning_rate": 7.9811759084299e-06, "loss": 0.4807, "step": 25440 }, { "epoch": 0.5648106014361661, "grad_norm": 1.137708306312561, "learning_rate": 7.97776104061986e-06, "loss": 0.5643, "step": 25445 }, { "epoch": 0.5649215879956937, "grad_norm": 0.908723771572113, "learning_rate": 7.974346418666854e-06, "loss": 0.4804, "step": 25450 }, { "epoch": 0.5650325745552214, "grad_norm": 2.1305763721466064, "learning_rate": 7.970932042986013e-06, "loss": 0.5224, "step": 25455 }, { "epoch": 0.565143561114749, "grad_norm": 1.0807911157608032, "learning_rate": 7.967517913992453e-06, "loss": 0.4506, "step": 25460 }, { "epoch": 0.5652545476742766, "grad_norm": 1.67317533493042, "learning_rate": 7.964104032101246e-06, "loss": 0.4928, "step": 25465 }, { "epoch": 0.5653655342338043, "grad_norm": 1.2551069259643555, "learning_rate": 7.96069039772744e-06, "loss": 0.4584, "step": 25470 }, { "epoch": 0.5654765207933319, "grad_norm": 1.3132424354553223, "learning_rate": 7.957277011286063e-06, "loss": 0.4113, "step": 25475 }, { "epoch": 0.5655875073528596, "grad_norm": 1.1066019535064697, "learning_rate": 7.953863873192092e-06, "loss": 0.4033, "step": 25480 }, { "epoch": 0.5656984939123872, "grad_norm": 1.9208440780639648, "learning_rate": 7.95045098386049e-06, "loss": 0.4711, "step": 25485 }, { "epoch": 0.5658094804719148, "grad_norm": 0.8026219606399536, "learning_rate": 7.947038343706184e-06, "loss": 0.3814, "step": 25490 }, { "epoch": 0.5659204670314425, "grad_norm": 1.6581600904464722, "learning_rate": 7.943625953144068e-06, "loss": 0.3491, "step": 25495 }, { "epoch": 0.5660314535909702, "grad_norm": 0.9231352806091309, "learning_rate": 7.940213812589018e-06, "loss": 0.453, "step": 25500 }, { "epoch": 0.5661424401504977, "grad_norm": 1.7178947925567627, "learning_rate": 7.93680192245586e-06, "loss": 0.4339, "step": 25505 }, { "epoch": 0.5662534267100254, "grad_norm": 1.2096792459487915, "learning_rate": 7.93339028315941e-06, "loss": 0.3569, "step": 25510 }, { "epoch": 0.566364413269553, "grad_norm": 0.9888404607772827, "learning_rate": 7.929978895114432e-06, "loss": 0.406, "step": 25515 }, { "epoch": 0.5664753998290807, "grad_norm": 1.1972068548202515, "learning_rate": 7.926567758735683e-06, "loss": 0.4937, "step": 25520 }, { "epoch": 0.5665863863886084, "grad_norm": 1.4918951988220215, "learning_rate": 7.923156874437867e-06, "loss": 0.4326, "step": 25525 }, { "epoch": 0.5666973729481359, "grad_norm": 1.0668883323669434, "learning_rate": 7.919746242635675e-06, "loss": 0.1992, "step": 25530 }, { "epoch": 0.5668083595076636, "grad_norm": 1.5377274751663208, "learning_rate": 7.91633586374376e-06, "loss": 0.3767, "step": 25535 }, { "epoch": 0.5669193460671913, "grad_norm": 1.5494482517242432, "learning_rate": 7.91292573817674e-06, "loss": 0.3608, "step": 25540 }, { "epoch": 0.5670303326267189, "grad_norm": 1.1204816102981567, "learning_rate": 7.909515866349214e-06, "loss": 0.442, "step": 25545 }, { "epoch": 0.5671413191862466, "grad_norm": 1.6153379678726196, "learning_rate": 7.906106248675733e-06, "loss": 0.3891, "step": 25550 }, { "epoch": 0.5672523057457742, "grad_norm": 1.0235836505889893, "learning_rate": 7.902696885570833e-06, "loss": 0.4467, "step": 25555 }, { "epoch": 0.5673632923053018, "grad_norm": 1.214629888534546, "learning_rate": 7.899287777449015e-06, "loss": 0.4197, "step": 25560 }, { "epoch": 0.5674742788648295, "grad_norm": 1.6324222087860107, "learning_rate": 7.89587892472474e-06, "loss": 0.2196, "step": 25565 }, { "epoch": 0.567585265424357, "grad_norm": 1.2325080633163452, "learning_rate": 7.892470327812451e-06, "loss": 0.4715, "step": 25570 }, { "epoch": 0.5676962519838847, "grad_norm": 1.5055568218231201, "learning_rate": 7.889061987126549e-06, "loss": 0.444, "step": 25575 }, { "epoch": 0.5678072385434124, "grad_norm": 1.1074658632278442, "learning_rate": 7.885653903081412e-06, "loss": 0.495, "step": 25580 }, { "epoch": 0.56791822510294, "grad_norm": 0.9098854660987854, "learning_rate": 7.882246076091385e-06, "loss": 0.4111, "step": 25585 }, { "epoch": 0.5680292116624677, "grad_norm": 0.773410975933075, "learning_rate": 7.878838506570774e-06, "loss": 0.3386, "step": 25590 }, { "epoch": 0.5681401982219954, "grad_norm": 0.5425692796707153, "learning_rate": 7.875431194933868e-06, "loss": 0.3496, "step": 25595 }, { "epoch": 0.5682511847815229, "grad_norm": 1.6536232233047485, "learning_rate": 7.872024141594907e-06, "loss": 0.3964, "step": 25600 }, { "epoch": 0.5683621713410506, "grad_norm": 0.9511072039604187, "learning_rate": 7.86861734696812e-06, "loss": 0.5014, "step": 25605 }, { "epoch": 0.5684731579005783, "grad_norm": 1.6080678701400757, "learning_rate": 7.865210811467682e-06, "loss": 0.5391, "step": 25610 }, { "epoch": 0.5685841444601059, "grad_norm": 1.163704514503479, "learning_rate": 7.861804535507754e-06, "loss": 0.3368, "step": 25615 }, { "epoch": 0.5686951310196335, "grad_norm": 1.0903295278549194, "learning_rate": 7.858398519502464e-06, "loss": 0.649, "step": 25620 }, { "epoch": 0.5688061175791612, "grad_norm": 1.1664574146270752, "learning_rate": 7.854992763865896e-06, "loss": 0.3017, "step": 25625 }, { "epoch": 0.5689171041386888, "grad_norm": 0.8933138251304626, "learning_rate": 7.851587269012118e-06, "loss": 0.4096, "step": 25630 }, { "epoch": 0.5690280906982165, "grad_norm": 1.3496272563934326, "learning_rate": 7.848182035355151e-06, "loss": 0.5146, "step": 25635 }, { "epoch": 0.569139077257744, "grad_norm": 1.5749702453613281, "learning_rate": 7.844777063308994e-06, "loss": 0.4096, "step": 25640 }, { "epoch": 0.5692500638172717, "grad_norm": 1.6230230331420898, "learning_rate": 7.84137235328762e-06, "loss": 0.3783, "step": 25645 }, { "epoch": 0.5693610503767994, "grad_norm": 0.7861883640289307, "learning_rate": 7.83796790570495e-06, "loss": 0.4608, "step": 25650 }, { "epoch": 0.569472036936327, "grad_norm": 2.24285888671875, "learning_rate": 7.834563720974895e-06, "loss": 0.4869, "step": 25655 }, { "epoch": 0.5695830234958547, "grad_norm": 1.4274076223373413, "learning_rate": 7.831159799511316e-06, "loss": 0.2404, "step": 25660 }, { "epoch": 0.5696940100553823, "grad_norm": 1.2562860250473022, "learning_rate": 7.82775614172806e-06, "loss": 0.426, "step": 25665 }, { "epoch": 0.5698049966149099, "grad_norm": 1.2366671562194824, "learning_rate": 7.824352748038924e-06, "loss": 0.5719, "step": 25670 }, { "epoch": 0.5699159831744376, "grad_norm": 1.4575527906417847, "learning_rate": 7.820949618857685e-06, "loss": 0.4365, "step": 25675 }, { "epoch": 0.5700269697339653, "grad_norm": 1.5027035474777222, "learning_rate": 7.817546754598085e-06, "loss": 0.4668, "step": 25680 }, { "epoch": 0.5701379562934928, "grad_norm": 1.3077846765518188, "learning_rate": 7.814144155673828e-06, "loss": 0.3834, "step": 25685 }, { "epoch": 0.5702489428530205, "grad_norm": 1.1927096843719482, "learning_rate": 7.810741822498596e-06, "loss": 0.4916, "step": 25690 }, { "epoch": 0.5703599294125481, "grad_norm": 1.380219578742981, "learning_rate": 7.80733975548603e-06, "loss": 0.3316, "step": 25695 }, { "epoch": 0.5704709159720758, "grad_norm": 0.9102444052696228, "learning_rate": 7.803937955049743e-06, "loss": 0.3905, "step": 25700 }, { "epoch": 0.5705819025316035, "grad_norm": 0.704429566860199, "learning_rate": 7.800536421603317e-06, "loss": 0.4945, "step": 25705 }, { "epoch": 0.570692889091131, "grad_norm": 0.7455553412437439, "learning_rate": 7.797135155560292e-06, "loss": 0.5326, "step": 25710 }, { "epoch": 0.5708038756506587, "grad_norm": 0.8614157438278198, "learning_rate": 7.793734157334192e-06, "loss": 0.5087, "step": 25715 }, { "epoch": 0.5709148622101864, "grad_norm": 1.1032811403274536, "learning_rate": 7.790333427338489e-06, "loss": 0.4716, "step": 25720 }, { "epoch": 0.571025848769714, "grad_norm": 0.9284597039222717, "learning_rate": 7.786932965986638e-06, "loss": 0.5088, "step": 25725 }, { "epoch": 0.5711368353292416, "grad_norm": 1.2898480892181396, "learning_rate": 7.783532773692058e-06, "loss": 0.4708, "step": 25730 }, { "epoch": 0.5712478218887693, "grad_norm": 0.7317522764205933, "learning_rate": 7.780132850868126e-06, "loss": 0.3739, "step": 25735 }, { "epoch": 0.5713588084482969, "grad_norm": 0.6803992986679077, "learning_rate": 7.776733197928203e-06, "loss": 0.534, "step": 25740 }, { "epoch": 0.5714697950078246, "grad_norm": 0.9768413305282593, "learning_rate": 7.773333815285596e-06, "loss": 0.4836, "step": 25745 }, { "epoch": 0.5715807815673521, "grad_norm": 1.4873603582382202, "learning_rate": 7.769934703353594e-06, "loss": 0.3999, "step": 25750 }, { "epoch": 0.5716917681268798, "grad_norm": 1.1292402744293213, "learning_rate": 7.76653586254546e-06, "loss": 0.3039, "step": 25755 }, { "epoch": 0.5718027546864075, "grad_norm": 1.3553985357284546, "learning_rate": 7.763137293274399e-06, "loss": 0.3609, "step": 25760 }, { "epoch": 0.5719137412459351, "grad_norm": 1.3553942441940308, "learning_rate": 7.759738995953608e-06, "loss": 0.5826, "step": 25765 }, { "epoch": 0.5720247278054628, "grad_norm": 0.895066499710083, "learning_rate": 7.756340970996233e-06, "loss": 0.5257, "step": 25770 }, { "epoch": 0.5721357143649904, "grad_norm": 1.4519301652908325, "learning_rate": 7.752943218815401e-06, "loss": 0.387, "step": 25775 }, { "epoch": 0.572246700924518, "grad_norm": 3.0451765060424805, "learning_rate": 7.749545739824193e-06, "loss": 0.4637, "step": 25780 }, { "epoch": 0.5723576874840457, "grad_norm": 1.217422366142273, "learning_rate": 7.746148534435668e-06, "loss": 0.2863, "step": 25785 }, { "epoch": 0.5724686740435734, "grad_norm": 1.5442456007003784, "learning_rate": 7.742751603062848e-06, "loss": 0.3668, "step": 25790 }, { "epoch": 0.572579660603101, "grad_norm": 1.1498961448669434, "learning_rate": 7.739354946118714e-06, "loss": 0.3538, "step": 25795 }, { "epoch": 0.5726906471626286, "grad_norm": 1.1722846031188965, "learning_rate": 7.735958564016228e-06, "loss": 0.4353, "step": 25800 }, { "epoch": 0.5728016337221562, "grad_norm": 1.1115599870681763, "learning_rate": 7.732562457168304e-06, "loss": 0.5448, "step": 25805 }, { "epoch": 0.5729126202816839, "grad_norm": 1.3585245609283447, "learning_rate": 7.729166625987834e-06, "loss": 0.2745, "step": 25810 }, { "epoch": 0.5730236068412116, "grad_norm": 1.2924643754959106, "learning_rate": 7.725771070887668e-06, "loss": 0.423, "step": 25815 }, { "epoch": 0.5731345934007391, "grad_norm": 1.2379727363586426, "learning_rate": 7.722375792280628e-06, "loss": 0.4376, "step": 25820 }, { "epoch": 0.5732455799602668, "grad_norm": 1.567542552947998, "learning_rate": 7.718980790579503e-06, "loss": 0.4743, "step": 25825 }, { "epoch": 0.5733565665197945, "grad_norm": 1.2755229473114014, "learning_rate": 7.71558606619704e-06, "loss": 0.542, "step": 25830 }, { "epoch": 0.5734675530793221, "grad_norm": 0.7536981701850891, "learning_rate": 7.712191619545962e-06, "loss": 0.4327, "step": 25835 }, { "epoch": 0.5735785396388497, "grad_norm": 0.9931631088256836, "learning_rate": 7.70879745103896e-06, "loss": 0.3605, "step": 25840 }, { "epoch": 0.5736895261983774, "grad_norm": 1.2342039346694946, "learning_rate": 7.705403561088672e-06, "loss": 0.3614, "step": 25845 }, { "epoch": 0.573800512757905, "grad_norm": 1.337133526802063, "learning_rate": 7.702009950107729e-06, "loss": 0.4008, "step": 25850 }, { "epoch": 0.5739114993174327, "grad_norm": 0.7864338755607605, "learning_rate": 7.698616618508706e-06, "loss": 0.4875, "step": 25855 }, { "epoch": 0.5740224858769603, "grad_norm": 1.7323805093765259, "learning_rate": 7.695223566704156e-06, "loss": 0.278, "step": 25860 }, { "epoch": 0.5741334724364879, "grad_norm": 2.2891860008239746, "learning_rate": 7.691830795106599e-06, "loss": 0.4967, "step": 25865 }, { "epoch": 0.5742444589960156, "grad_norm": 1.6649677753448486, "learning_rate": 7.68843830412851e-06, "loss": 0.3181, "step": 25870 }, { "epoch": 0.5743554455555432, "grad_norm": 1.1986839771270752, "learning_rate": 7.685046094182343e-06, "loss": 0.4243, "step": 25875 }, { "epoch": 0.5744664321150709, "grad_norm": 1.3807337284088135, "learning_rate": 7.681654165680504e-06, "loss": 0.5999, "step": 25880 }, { "epoch": 0.5745774186745986, "grad_norm": 1.0951980352401733, "learning_rate": 7.67826251903538e-06, "loss": 0.4205, "step": 25885 }, { "epoch": 0.5746884052341261, "grad_norm": 1.0187554359436035, "learning_rate": 7.67487115465931e-06, "loss": 0.4065, "step": 25890 }, { "epoch": 0.5747993917936538, "grad_norm": 1.6819921731948853, "learning_rate": 7.671480072964605e-06, "loss": 0.4441, "step": 25895 }, { "epoch": 0.5749103783531815, "grad_norm": 0.9652168154716492, "learning_rate": 7.668089274363548e-06, "loss": 0.3745, "step": 25900 }, { "epoch": 0.575021364912709, "grad_norm": 1.1394695043563843, "learning_rate": 7.664698759268371e-06, "loss": 0.5128, "step": 25905 }, { "epoch": 0.5751323514722367, "grad_norm": 1.1738479137420654, "learning_rate": 7.661308528091294e-06, "loss": 0.4227, "step": 25910 }, { "epoch": 0.5752433380317643, "grad_norm": 0.7554298043251038, "learning_rate": 7.657918581244477e-06, "loss": 0.407, "step": 25915 }, { "epoch": 0.575354324591292, "grad_norm": 1.571569800376892, "learning_rate": 7.654528919140067e-06, "loss": 0.4258, "step": 25920 }, { "epoch": 0.5754653111508197, "grad_norm": 1.7037241458892822, "learning_rate": 7.651139542190164e-06, "loss": 0.5274, "step": 25925 }, { "epoch": 0.5755762977103472, "grad_norm": 1.4097694158554077, "learning_rate": 7.647750450806838e-06, "loss": 0.3991, "step": 25930 }, { "epoch": 0.5756872842698749, "grad_norm": 1.1634163856506348, "learning_rate": 7.644361645402124e-06, "loss": 0.3743, "step": 25935 }, { "epoch": 0.5757982708294026, "grad_norm": 0.6868215799331665, "learning_rate": 7.64097312638802e-06, "loss": 0.5497, "step": 25940 }, { "epoch": 0.5759092573889302, "grad_norm": 1.4639344215393066, "learning_rate": 7.637584894176491e-06, "loss": 0.5139, "step": 25945 }, { "epoch": 0.5760202439484579, "grad_norm": 1.0975699424743652, "learning_rate": 7.634196949179472e-06, "loss": 0.452, "step": 25950 }, { "epoch": 0.5761312305079855, "grad_norm": 0.7695174217224121, "learning_rate": 7.630809291808848e-06, "loss": 0.3719, "step": 25955 }, { "epoch": 0.5762422170675131, "grad_norm": 1.2575563192367554, "learning_rate": 7.6274219224764905e-06, "loss": 0.3246, "step": 25960 }, { "epoch": 0.5763532036270408, "grad_norm": 1.0166893005371094, "learning_rate": 7.624034841594212e-06, "loss": 0.401, "step": 25965 }, { "epoch": 0.5764641901865684, "grad_norm": 1.1126619577407837, "learning_rate": 7.620648049573815e-06, "loss": 0.4612, "step": 25970 }, { "epoch": 0.576575176746096, "grad_norm": 0.6467011570930481, "learning_rate": 7.617261546827045e-06, "loss": 0.4013, "step": 25975 }, { "epoch": 0.5766861633056237, "grad_norm": 1.3136359453201294, "learning_rate": 7.613875333765623e-06, "loss": 0.37, "step": 25980 }, { "epoch": 0.5767971498651513, "grad_norm": 1.0311453342437744, "learning_rate": 7.610489410801242e-06, "loss": 0.3963, "step": 25985 }, { "epoch": 0.576908136424679, "grad_norm": 1.1149797439575195, "learning_rate": 7.607103778345538e-06, "loss": 0.4246, "step": 25990 }, { "epoch": 0.5770191229842067, "grad_norm": 0.9374891519546509, "learning_rate": 7.603718436810137e-06, "loss": 0.4815, "step": 25995 }, { "epoch": 0.5771301095437342, "grad_norm": 1.7929052114486694, "learning_rate": 7.600333386606607e-06, "loss": 0.4759, "step": 26000 }, { "epoch": 0.5772410961032619, "grad_norm": 1.3042231798171997, "learning_rate": 7.596948628146498e-06, "loss": 0.4221, "step": 26005 }, { "epoch": 0.5773520826627896, "grad_norm": 0.7556316256523132, "learning_rate": 7.593564161841318e-06, "loss": 0.3902, "step": 26010 }, { "epoch": 0.5774630692223172, "grad_norm": 1.3005378246307373, "learning_rate": 7.590179988102533e-06, "loss": 0.4377, "step": 26015 }, { "epoch": 0.5775740557818448, "grad_norm": 0.8551700115203857, "learning_rate": 7.586796107341587e-06, "loss": 0.471, "step": 26020 }, { "epoch": 0.5776850423413724, "grad_norm": 0.9651265740394592, "learning_rate": 7.5834125199698725e-06, "loss": 0.5191, "step": 26025 }, { "epoch": 0.5777960289009001, "grad_norm": 1.8282874822616577, "learning_rate": 7.580029226398762e-06, "loss": 0.4154, "step": 26030 }, { "epoch": 0.5779070154604278, "grad_norm": 1.118324875831604, "learning_rate": 7.5766462270395815e-06, "loss": 0.2921, "step": 26035 }, { "epoch": 0.5780180020199553, "grad_norm": 1.2642825841903687, "learning_rate": 7.573263522303627e-06, "loss": 0.4322, "step": 26040 }, { "epoch": 0.578128988579483, "grad_norm": 1.2556785345077515, "learning_rate": 7.569881112602153e-06, "loss": 0.2382, "step": 26045 }, { "epoch": 0.5782399751390107, "grad_norm": 0.7829084992408752, "learning_rate": 7.566498998346384e-06, "loss": 0.4101, "step": 26050 }, { "epoch": 0.5783509616985383, "grad_norm": 1.2316957712173462, "learning_rate": 7.563117179947506e-06, "loss": 0.4872, "step": 26055 }, { "epoch": 0.578461948258066, "grad_norm": 1.0883417129516602, "learning_rate": 7.559735657816667e-06, "loss": 0.4142, "step": 26060 }, { "epoch": 0.5785729348175936, "grad_norm": 1.3580420017242432, "learning_rate": 7.5563544323649815e-06, "loss": 0.4207, "step": 26065 }, { "epoch": 0.5786839213771212, "grad_norm": 1.2232900857925415, "learning_rate": 7.552973504003534e-06, "loss": 0.5603, "step": 26070 }, { "epoch": 0.5787949079366489, "grad_norm": 2.7719151973724365, "learning_rate": 7.5495928731433565e-06, "loss": 0.4197, "step": 26075 }, { "epoch": 0.5789058944961765, "grad_norm": 1.237747073173523, "learning_rate": 7.546212540195463e-06, "loss": 0.4251, "step": 26080 }, { "epoch": 0.5790168810557041, "grad_norm": 1.1366463899612427, "learning_rate": 7.542832505570815e-06, "loss": 0.5521, "step": 26085 }, { "epoch": 0.5791278676152318, "grad_norm": 1.3835487365722656, "learning_rate": 7.539452769680351e-06, "loss": 0.47, "step": 26090 }, { "epoch": 0.5792388541747594, "grad_norm": 0.8140641450881958, "learning_rate": 7.536073332934972e-06, "loss": 0.5301, "step": 26095 }, { "epoch": 0.5793498407342871, "grad_norm": 1.0987571477890015, "learning_rate": 7.532694195745529e-06, "loss": 0.3106, "step": 26100 }, { "epoch": 0.5794608272938148, "grad_norm": 0.8439056277275085, "learning_rate": 7.529315358522855e-06, "loss": 0.3396, "step": 26105 }, { "epoch": 0.5795718138533423, "grad_norm": 1.406260371208191, "learning_rate": 7.5259368216777296e-06, "loss": 0.4741, "step": 26110 }, { "epoch": 0.57968280041287, "grad_norm": 1.0653043985366821, "learning_rate": 7.522558585620907e-06, "loss": 0.3736, "step": 26115 }, { "epoch": 0.5797937869723977, "grad_norm": 1.3564051389694214, "learning_rate": 7.51918065076311e-06, "loss": 0.4633, "step": 26120 }, { "epoch": 0.5799047735319253, "grad_norm": 1.2771178483963013, "learning_rate": 7.515803017515002e-06, "loss": 0.4959, "step": 26125 }, { "epoch": 0.580015760091453, "grad_norm": 1.5175601243972778, "learning_rate": 7.512425686287237e-06, "loss": 0.4971, "step": 26130 }, { "epoch": 0.5801267466509805, "grad_norm": 1.8545664548873901, "learning_rate": 7.509048657490411e-06, "loss": 0.4164, "step": 26135 }, { "epoch": 0.5802377332105082, "grad_norm": 1.141951084136963, "learning_rate": 7.505671931535099e-06, "loss": 0.3089, "step": 26140 }, { "epoch": 0.5803487197700359, "grad_norm": 1.5077598094940186, "learning_rate": 7.502295508831824e-06, "loss": 0.3922, "step": 26145 }, { "epoch": 0.5804597063295635, "grad_norm": 1.1959303617477417, "learning_rate": 7.498919389791084e-06, "loss": 0.4636, "step": 26150 }, { "epoch": 0.5805706928890911, "grad_norm": 1.1563524007797241, "learning_rate": 7.495543574823341e-06, "loss": 0.4562, "step": 26155 }, { "epoch": 0.5806816794486188, "grad_norm": 1.376280426979065, "learning_rate": 7.492168064339007e-06, "loss": 0.444, "step": 26160 }, { "epoch": 0.5807926660081464, "grad_norm": 1.0079575777053833, "learning_rate": 7.488792858748473e-06, "loss": 0.3961, "step": 26165 }, { "epoch": 0.5809036525676741, "grad_norm": 1.5137135982513428, "learning_rate": 7.485417958462078e-06, "loss": 0.5764, "step": 26170 }, { "epoch": 0.5810146391272017, "grad_norm": 1.3020367622375488, "learning_rate": 7.4820433638901355e-06, "loss": 0.3482, "step": 26175 }, { "epoch": 0.5811256256867293, "grad_norm": 1.1549218893051147, "learning_rate": 7.478669075442917e-06, "loss": 0.2838, "step": 26180 }, { "epoch": 0.581236612246257, "grad_norm": 1.3579342365264893, "learning_rate": 7.475295093530655e-06, "loss": 0.4663, "step": 26185 }, { "epoch": 0.5813475988057846, "grad_norm": 0.9994341135025024, "learning_rate": 7.4719214185635505e-06, "loss": 0.3361, "step": 26190 }, { "epoch": 0.5814585853653123, "grad_norm": 0.8745097517967224, "learning_rate": 7.4685480509517594e-06, "loss": 0.2108, "step": 26195 }, { "epoch": 0.5815695719248399, "grad_norm": 0.8067927360534668, "learning_rate": 7.465174991105405e-06, "loss": 0.3894, "step": 26200 }, { "epoch": 0.5816805584843675, "grad_norm": 1.0950978994369507, "learning_rate": 7.461802239434579e-06, "loss": 0.4599, "step": 26205 }, { "epoch": 0.5817915450438952, "grad_norm": 1.183995246887207, "learning_rate": 7.4584297963493195e-06, "loss": 0.3717, "step": 26210 }, { "epoch": 0.5819025316034229, "grad_norm": 1.009460687637329, "learning_rate": 7.455057662259645e-06, "loss": 0.4878, "step": 26215 }, { "epoch": 0.5820135181629504, "grad_norm": 0.9349616765975952, "learning_rate": 7.451685837575522e-06, "loss": 0.4331, "step": 26220 }, { "epoch": 0.5821245047224781, "grad_norm": 1.2902761697769165, "learning_rate": 7.448314322706891e-06, "loss": 0.4988, "step": 26225 }, { "epoch": 0.5822354912820058, "grad_norm": 1.7355648279190063, "learning_rate": 7.444943118063645e-06, "loss": 0.5101, "step": 26230 }, { "epoch": 0.5823464778415334, "grad_norm": 0.7856650352478027, "learning_rate": 7.441572224055644e-06, "loss": 0.3294, "step": 26235 }, { "epoch": 0.582457464401061, "grad_norm": 1.197662353515625, "learning_rate": 7.438201641092716e-06, "loss": 0.3985, "step": 26240 }, { "epoch": 0.5825684509605886, "grad_norm": 0.7614005208015442, "learning_rate": 7.4348313695846375e-06, "loss": 0.3994, "step": 26245 }, { "epoch": 0.5826794375201163, "grad_norm": 1.2452610731124878, "learning_rate": 7.431461409941162e-06, "loss": 0.3622, "step": 26250 }, { "epoch": 0.582790424079644, "grad_norm": 1.1531463861465454, "learning_rate": 7.428091762571989e-06, "loss": 0.4351, "step": 26255 }, { "epoch": 0.5829014106391716, "grad_norm": 1.2325093746185303, "learning_rate": 7.424722427886795e-06, "loss": 0.4805, "step": 26260 }, { "epoch": 0.5830123971986992, "grad_norm": 1.3518481254577637, "learning_rate": 7.421353406295215e-06, "loss": 0.5129, "step": 26265 }, { "epoch": 0.5831233837582269, "grad_norm": 1.5780023336410522, "learning_rate": 7.417984698206837e-06, "loss": 0.4087, "step": 26270 }, { "epoch": 0.5832343703177545, "grad_norm": 1.3883979320526123, "learning_rate": 7.414616304031223e-06, "loss": 0.4416, "step": 26275 }, { "epoch": 0.5833453568772822, "grad_norm": 1.5068031549453735, "learning_rate": 7.411248224177884e-06, "loss": 0.3407, "step": 26280 }, { "epoch": 0.5834563434368099, "grad_norm": 1.6327471733093262, "learning_rate": 7.407880459056306e-06, "loss": 0.4364, "step": 26285 }, { "epoch": 0.5835673299963374, "grad_norm": 1.1161293983459473, "learning_rate": 7.404513009075929e-06, "loss": 0.4822, "step": 26290 }, { "epoch": 0.5836783165558651, "grad_norm": 1.0726834535598755, "learning_rate": 7.401145874646154e-06, "loss": 0.5733, "step": 26295 }, { "epoch": 0.5837893031153927, "grad_norm": 1.6274734735488892, "learning_rate": 7.39777905617635e-06, "loss": 0.5559, "step": 26300 }, { "epoch": 0.5839002896749204, "grad_norm": 1.3102668523788452, "learning_rate": 7.39441255407584e-06, "loss": 0.4602, "step": 26305 }, { "epoch": 0.584011276234448, "grad_norm": 1.2450367212295532, "learning_rate": 7.391046368753913e-06, "loss": 0.6068, "step": 26310 }, { "epoch": 0.5841222627939756, "grad_norm": 1.6839604377746582, "learning_rate": 7.387680500619819e-06, "loss": 0.4881, "step": 26315 }, { "epoch": 0.5842332493535033, "grad_norm": 1.215613603591919, "learning_rate": 7.384314950082767e-06, "loss": 0.4433, "step": 26320 }, { "epoch": 0.584344235913031, "grad_norm": 1.1325950622558594, "learning_rate": 7.380949717551935e-06, "loss": 0.4274, "step": 26325 }, { "epoch": 0.5844552224725585, "grad_norm": 1.5983508825302124, "learning_rate": 7.3775848034364485e-06, "loss": 0.5021, "step": 26330 }, { "epoch": 0.5845662090320862, "grad_norm": 0.9946767091751099, "learning_rate": 7.37422020814541e-06, "loss": 0.3135, "step": 26335 }, { "epoch": 0.5846771955916139, "grad_norm": 1.2925642728805542, "learning_rate": 7.37085593208787e-06, "loss": 0.4129, "step": 26340 }, { "epoch": 0.5847881821511415, "grad_norm": 1.2564030885696411, "learning_rate": 7.367491975672847e-06, "loss": 0.3438, "step": 26345 }, { "epoch": 0.5848991687106692, "grad_norm": 1.0621161460876465, "learning_rate": 7.364128339309326e-06, "loss": 0.4698, "step": 26350 }, { "epoch": 0.5850101552701967, "grad_norm": 1.0059372186660767, "learning_rate": 7.360765023406237e-06, "loss": 0.4562, "step": 26355 }, { "epoch": 0.5851211418297244, "grad_norm": 1.0170514583587646, "learning_rate": 7.357402028372489e-06, "loss": 0.3451, "step": 26360 }, { "epoch": 0.5852321283892521, "grad_norm": 0.9726334810256958, "learning_rate": 7.354039354616935e-06, "loss": 0.3805, "step": 26365 }, { "epoch": 0.5853431149487797, "grad_norm": 1.5828025341033936, "learning_rate": 7.350677002548403e-06, "loss": 0.4252, "step": 26370 }, { "epoch": 0.5854541015083073, "grad_norm": 1.3867665529251099, "learning_rate": 7.347314972575681e-06, "loss": 0.4395, "step": 26375 }, { "epoch": 0.585565088067835, "grad_norm": 1.0094491243362427, "learning_rate": 7.343953265107502e-06, "loss": 0.5013, "step": 26380 }, { "epoch": 0.5856760746273626, "grad_norm": 1.0547956228256226, "learning_rate": 7.340591880552583e-06, "loss": 0.4529, "step": 26385 }, { "epoch": 0.5857870611868903, "grad_norm": 1.1832739114761353, "learning_rate": 7.33723081931958e-06, "loss": 0.2863, "step": 26390 }, { "epoch": 0.585898047746418, "grad_norm": 0.9504315257072449, "learning_rate": 7.333870081817127e-06, "loss": 0.4596, "step": 26395 }, { "epoch": 0.5860090343059455, "grad_norm": 1.5113601684570312, "learning_rate": 7.330509668453805e-06, "loss": 0.3081, "step": 26400 }, { "epoch": 0.5861200208654732, "grad_norm": 0.8444293737411499, "learning_rate": 7.3271495796381666e-06, "loss": 0.3587, "step": 26405 }, { "epoch": 0.5862310074250008, "grad_norm": 2.0980753898620605, "learning_rate": 7.323789815778718e-06, "loss": 0.3862, "step": 26410 }, { "epoch": 0.5863419939845285, "grad_norm": 0.6181811690330505, "learning_rate": 7.320430377283928e-06, "loss": 0.2972, "step": 26415 }, { "epoch": 0.5864529805440561, "grad_norm": 1.166435718536377, "learning_rate": 7.317071264562226e-06, "loss": 0.5111, "step": 26420 }, { "epoch": 0.5865639671035837, "grad_norm": 1.4199657440185547, "learning_rate": 7.313712478022002e-06, "loss": 0.3927, "step": 26425 }, { "epoch": 0.5866749536631114, "grad_norm": 1.1458617448806763, "learning_rate": 7.310354018071604e-06, "loss": 0.423, "step": 26430 }, { "epoch": 0.5867859402226391, "grad_norm": 0.5648505091667175, "learning_rate": 7.306995885119349e-06, "loss": 0.5081, "step": 26435 }, { "epoch": 0.5868969267821667, "grad_norm": 0.9345181584358215, "learning_rate": 7.303638079573498e-06, "loss": 0.3507, "step": 26440 }, { "epoch": 0.5870079133416943, "grad_norm": 1.25766122341156, "learning_rate": 7.300280601842289e-06, "loss": 0.4505, "step": 26445 }, { "epoch": 0.587118899901222, "grad_norm": 1.497269868850708, "learning_rate": 7.296923452333908e-06, "loss": 0.4878, "step": 26450 }, { "epoch": 0.5872298864607496, "grad_norm": 1.4120640754699707, "learning_rate": 7.293566631456508e-06, "loss": 0.4281, "step": 26455 }, { "epoch": 0.5873408730202773, "grad_norm": 1.3898377418518066, "learning_rate": 7.290210139618203e-06, "loss": 0.3614, "step": 26460 }, { "epoch": 0.5874518595798048, "grad_norm": 1.8048535585403442, "learning_rate": 7.286853977227059e-06, "loss": 0.4069, "step": 26465 }, { "epoch": 0.5875628461393325, "grad_norm": 1.8236231803894043, "learning_rate": 7.283498144691113e-06, "loss": 0.3288, "step": 26470 }, { "epoch": 0.5876738326988602, "grad_norm": 0.779463529586792, "learning_rate": 7.2801426424183465e-06, "loss": 0.4077, "step": 26475 }, { "epoch": 0.5877848192583878, "grad_norm": 0.8819475769996643, "learning_rate": 7.276787470816722e-06, "loss": 0.4519, "step": 26480 }, { "epoch": 0.5878958058179155, "grad_norm": 1.0383362770080566, "learning_rate": 7.273432630294139e-06, "loss": 0.3105, "step": 26485 }, { "epoch": 0.5880067923774431, "grad_norm": 1.1080772876739502, "learning_rate": 7.270078121258471e-06, "loss": 0.3799, "step": 26490 }, { "epoch": 0.5881177789369707, "grad_norm": 2.0721535682678223, "learning_rate": 7.266723944117555e-06, "loss": 0.3617, "step": 26495 }, { "epoch": 0.5882287654964984, "grad_norm": 0.8842170238494873, "learning_rate": 7.263370099279173e-06, "loss": 0.3916, "step": 26500 }, { "epoch": 0.5883397520560261, "grad_norm": 3.7884020805358887, "learning_rate": 7.260016587151078e-06, "loss": 0.3228, "step": 26505 }, { "epoch": 0.5884507386155536, "grad_norm": 1.390315055847168, "learning_rate": 7.256663408140974e-06, "loss": 0.5956, "step": 26510 }, { "epoch": 0.5885617251750813, "grad_norm": 0.9888474345207214, "learning_rate": 7.253310562656531e-06, "loss": 0.3319, "step": 26515 }, { "epoch": 0.5886727117346089, "grad_norm": 1.2436730861663818, "learning_rate": 7.249958051105383e-06, "loss": 0.639, "step": 26520 }, { "epoch": 0.5887836982941366, "grad_norm": 0.937451183795929, "learning_rate": 7.246605873895109e-06, "loss": 0.3847, "step": 26525 }, { "epoch": 0.5888946848536643, "grad_norm": 0.7500023245811462, "learning_rate": 7.243254031433262e-06, "loss": 0.4246, "step": 26530 }, { "epoch": 0.5890056714131918, "grad_norm": 1.2583023309707642, "learning_rate": 7.2399025241273406e-06, "loss": 0.4899, "step": 26535 }, { "epoch": 0.5891166579727195, "grad_norm": 1.3657556772232056, "learning_rate": 7.2365513523848156e-06, "loss": 0.4028, "step": 26540 }, { "epoch": 0.5892276445322472, "grad_norm": 1.4807183742523193, "learning_rate": 7.233200516613109e-06, "loss": 0.3921, "step": 26545 }, { "epoch": 0.5893386310917748, "grad_norm": 1.4240939617156982, "learning_rate": 7.2298500172196054e-06, "loss": 0.3524, "step": 26550 }, { "epoch": 0.5894496176513024, "grad_norm": 1.1841248273849487, "learning_rate": 7.226499854611646e-06, "loss": 0.4088, "step": 26555 }, { "epoch": 0.5895606042108301, "grad_norm": 1.4342392683029175, "learning_rate": 7.223150029196532e-06, "loss": 0.4176, "step": 26560 }, { "epoch": 0.5896715907703577, "grad_norm": 1.3130017518997192, "learning_rate": 7.219800541381526e-06, "loss": 0.4723, "step": 26565 }, { "epoch": 0.5897825773298854, "grad_norm": 1.9191733598709106, "learning_rate": 7.216451391573844e-06, "loss": 0.3313, "step": 26570 }, { "epoch": 0.5898935638894129, "grad_norm": 1.1599457263946533, "learning_rate": 7.213102580180666e-06, "loss": 0.4146, "step": 26575 }, { "epoch": 0.5900045504489406, "grad_norm": 1.4810781478881836, "learning_rate": 7.209754107609132e-06, "loss": 0.4848, "step": 26580 }, { "epoch": 0.5901155370084683, "grad_norm": 0.3967765271663666, "learning_rate": 7.206405974266335e-06, "loss": 0.297, "step": 26585 }, { "epoch": 0.5902265235679959, "grad_norm": 1.0816624164581299, "learning_rate": 7.203058180559332e-06, "loss": 0.472, "step": 26590 }, { "epoch": 0.5903375101275236, "grad_norm": 1.3460806608200073, "learning_rate": 7.199710726895133e-06, "loss": 0.5931, "step": 26595 }, { "epoch": 0.5904484966870512, "grad_norm": 1.2487653493881226, "learning_rate": 7.19636361368071e-06, "loss": 0.454, "step": 26600 }, { "epoch": 0.5905594832465788, "grad_norm": 1.078736424446106, "learning_rate": 7.1930168413230025e-06, "loss": 0.3498, "step": 26605 }, { "epoch": 0.5906704698061065, "grad_norm": 1.2618157863616943, "learning_rate": 7.189670410228889e-06, "loss": 0.4946, "step": 26610 }, { "epoch": 0.5907814563656342, "grad_norm": 0.9752281308174133, "learning_rate": 7.186324320805226e-06, "loss": 0.3529, "step": 26615 }, { "epoch": 0.5908924429251617, "grad_norm": 0.8908410668373108, "learning_rate": 7.182978573458811e-06, "loss": 0.4021, "step": 26620 }, { "epoch": 0.5910034294846894, "grad_norm": 1.5097835063934326, "learning_rate": 7.1796331685964136e-06, "loss": 0.3922, "step": 26625 }, { "epoch": 0.591114416044217, "grad_norm": 1.578121304512024, "learning_rate": 7.176288106624761e-06, "loss": 0.2987, "step": 26630 }, { "epoch": 0.5912254026037447, "grad_norm": 0.7530714273452759, "learning_rate": 7.172943387950526e-06, "loss": 0.3963, "step": 26635 }, { "epoch": 0.5913363891632724, "grad_norm": 2.377854585647583, "learning_rate": 7.169599012980359e-06, "loss": 0.4138, "step": 26640 }, { "epoch": 0.5914473757227999, "grad_norm": 0.8900545239448547, "learning_rate": 7.166254982120845e-06, "loss": 0.4248, "step": 26645 }, { "epoch": 0.5915583622823276, "grad_norm": 1.3987830877304077, "learning_rate": 7.162911295778552e-06, "loss": 0.2741, "step": 26650 }, { "epoch": 0.5916693488418553, "grad_norm": 1.3931851387023926, "learning_rate": 7.159567954359983e-06, "loss": 0.4107, "step": 26655 }, { "epoch": 0.5917803354013829, "grad_norm": 0.9386917948722839, "learning_rate": 7.15622495827162e-06, "loss": 0.5291, "step": 26660 }, { "epoch": 0.5918913219609105, "grad_norm": 0.991595983505249, "learning_rate": 7.152882307919888e-06, "loss": 0.4553, "step": 26665 }, { "epoch": 0.5920023085204382, "grad_norm": 1.2766691446304321, "learning_rate": 7.149540003711178e-06, "loss": 0.6014, "step": 26670 }, { "epoch": 0.5921132950799658, "grad_norm": 1.4079058170318604, "learning_rate": 7.1461980460518335e-06, "loss": 0.3876, "step": 26675 }, { "epoch": 0.5922242816394935, "grad_norm": 2.7287020683288574, "learning_rate": 7.142856435348159e-06, "loss": 0.4923, "step": 26680 }, { "epoch": 0.592335268199021, "grad_norm": 1.208175539970398, "learning_rate": 7.139515172006416e-06, "loss": 0.365, "step": 26685 }, { "epoch": 0.5924462547585487, "grad_norm": 2.4387660026550293, "learning_rate": 7.136174256432828e-06, "loss": 0.5239, "step": 26690 }, { "epoch": 0.5925572413180764, "grad_norm": 0.8286358714103699, "learning_rate": 7.132833689033567e-06, "loss": 0.449, "step": 26695 }, { "epoch": 0.592668227877604, "grad_norm": 1.651318073272705, "learning_rate": 7.129493470214775e-06, "loss": 0.4033, "step": 26700 }, { "epoch": 0.5927792144371317, "grad_norm": 0.869933009147644, "learning_rate": 7.126153600382533e-06, "loss": 0.3771, "step": 26705 }, { "epoch": 0.5928902009966593, "grad_norm": 1.569030523300171, "learning_rate": 7.122814079942899e-06, "loss": 0.4554, "step": 26710 }, { "epoch": 0.5930011875561869, "grad_norm": 0.9556633234024048, "learning_rate": 7.119474909301886e-06, "loss": 0.4001, "step": 26715 }, { "epoch": 0.5931121741157146, "grad_norm": 1.1097278594970703, "learning_rate": 7.1161360888654466e-06, "loss": 0.4616, "step": 26720 }, { "epoch": 0.5932231606752423, "grad_norm": 1.8948615789413452, "learning_rate": 7.112797619039516e-06, "loss": 0.3212, "step": 26725 }, { "epoch": 0.5933341472347698, "grad_norm": 1.9563379287719727, "learning_rate": 7.109459500229961e-06, "loss": 0.3579, "step": 26730 }, { "epoch": 0.5934451337942975, "grad_norm": 1.5273131132125854, "learning_rate": 7.106121732842633e-06, "loss": 0.4067, "step": 26735 }, { "epoch": 0.5935561203538251, "grad_norm": 1.0542635917663574, "learning_rate": 7.102784317283314e-06, "loss": 0.5198, "step": 26740 }, { "epoch": 0.5936671069133528, "grad_norm": 0.851750910282135, "learning_rate": 7.09944725395776e-06, "loss": 0.3944, "step": 26745 }, { "epoch": 0.5937780934728805, "grad_norm": 1.0166863203048706, "learning_rate": 7.096110543271686e-06, "loss": 0.5288, "step": 26750 }, { "epoch": 0.593889080032408, "grad_norm": 1.1966583728790283, "learning_rate": 7.09277418563075e-06, "loss": 0.421, "step": 26755 }, { "epoch": 0.5940000665919357, "grad_norm": 0.9916155338287354, "learning_rate": 7.089438181440582e-06, "loss": 0.5078, "step": 26760 }, { "epoch": 0.5941110531514634, "grad_norm": 0.8691633939743042, "learning_rate": 7.086102531106755e-06, "loss": 0.4073, "step": 26765 }, { "epoch": 0.594222039710991, "grad_norm": 1.0698593854904175, "learning_rate": 7.082767235034809e-06, "loss": 0.3515, "step": 26770 }, { "epoch": 0.5943330262705186, "grad_norm": 0.8931669592857361, "learning_rate": 7.079432293630244e-06, "loss": 0.4208, "step": 26775 }, { "epoch": 0.5944440128300463, "grad_norm": 1.258162260055542, "learning_rate": 7.0760977072985005e-06, "loss": 0.4374, "step": 26780 }, { "epoch": 0.5945549993895739, "grad_norm": 0.9825992584228516, "learning_rate": 7.072763476444997e-06, "loss": 0.4695, "step": 26785 }, { "epoch": 0.5946659859491016, "grad_norm": 1.4318424463272095, "learning_rate": 7.069429601475088e-06, "loss": 0.3576, "step": 26790 }, { "epoch": 0.5947769725086292, "grad_norm": 0.6295336484909058, "learning_rate": 7.066096082794102e-06, "loss": 0.3883, "step": 26795 }, { "epoch": 0.5948879590681568, "grad_norm": 1.4859687089920044, "learning_rate": 7.0627629208073144e-06, "loss": 0.3997, "step": 26800 }, { "epoch": 0.5949989456276845, "grad_norm": 0.9783358573913574, "learning_rate": 7.0594301159199606e-06, "loss": 0.3838, "step": 26805 }, { "epoch": 0.5951099321872121, "grad_norm": 0.9241087436676025, "learning_rate": 7.056097668537232e-06, "loss": 0.4825, "step": 26810 }, { "epoch": 0.5952209187467398, "grad_norm": 0.8490179777145386, "learning_rate": 7.052765579064273e-06, "loss": 0.3689, "step": 26815 }, { "epoch": 0.5953319053062675, "grad_norm": 1.491245985031128, "learning_rate": 7.049433847906194e-06, "loss": 0.4896, "step": 26820 }, { "epoch": 0.595442891865795, "grad_norm": 1.1904845237731934, "learning_rate": 7.046102475468051e-06, "loss": 0.2843, "step": 26825 }, { "epoch": 0.5955538784253227, "grad_norm": 0.8467894792556763, "learning_rate": 7.04277146215486e-06, "loss": 0.4668, "step": 26830 }, { "epoch": 0.5956648649848504, "grad_norm": 1.1466150283813477, "learning_rate": 7.039440808371602e-06, "loss": 0.4891, "step": 26835 }, { "epoch": 0.595775851544378, "grad_norm": 0.8960450291633606, "learning_rate": 7.036110514523197e-06, "loss": 0.3969, "step": 26840 }, { "epoch": 0.5958868381039056, "grad_norm": 1.353668212890625, "learning_rate": 7.03278058101454e-06, "loss": 0.4128, "step": 26845 }, { "epoch": 0.5959978246634332, "grad_norm": 1.8645679950714111, "learning_rate": 7.029451008250463e-06, "loss": 0.3549, "step": 26850 }, { "epoch": 0.5961088112229609, "grad_norm": 1.427398681640625, "learning_rate": 7.026121796635772e-06, "loss": 0.3455, "step": 26855 }, { "epoch": 0.5962197977824886, "grad_norm": 0.7911583781242371, "learning_rate": 7.022792946575222e-06, "loss": 0.4177, "step": 26860 }, { "epoch": 0.5963307843420161, "grad_norm": 1.1445287466049194, "learning_rate": 7.019464458473518e-06, "loss": 0.6095, "step": 26865 }, { "epoch": 0.5964417709015438, "grad_norm": 0.8763558268547058, "learning_rate": 7.016136332735332e-06, "loss": 0.4996, "step": 26870 }, { "epoch": 0.5965527574610715, "grad_norm": 1.2170817852020264, "learning_rate": 7.012808569765279e-06, "loss": 0.3265, "step": 26875 }, { "epoch": 0.5966637440205991, "grad_norm": 1.1268202066421509, "learning_rate": 7.009481169967943e-06, "loss": 0.5045, "step": 26880 }, { "epoch": 0.5967747305801268, "grad_norm": 1.6648198366165161, "learning_rate": 7.006154133747861e-06, "loss": 0.455, "step": 26885 }, { "epoch": 0.5968857171396544, "grad_norm": 1.6047817468643188, "learning_rate": 7.002827461509514e-06, "loss": 0.266, "step": 26890 }, { "epoch": 0.596996703699182, "grad_norm": 0.7733944058418274, "learning_rate": 6.999501153657358e-06, "loss": 0.3569, "step": 26895 }, { "epoch": 0.5971076902587097, "grad_norm": 1.3187754154205322, "learning_rate": 6.996175210595784e-06, "loss": 0.2942, "step": 26900 }, { "epoch": 0.5972186768182373, "grad_norm": 1.2069423198699951, "learning_rate": 6.992849632729157e-06, "loss": 0.4206, "step": 26905 }, { "epoch": 0.5973296633777649, "grad_norm": 1.752915620803833, "learning_rate": 6.989524420461784e-06, "loss": 0.4684, "step": 26910 }, { "epoch": 0.5974406499372926, "grad_norm": 1.001297950744629, "learning_rate": 6.986199574197936e-06, "loss": 0.4267, "step": 26915 }, { "epoch": 0.5975516364968202, "grad_norm": 1.145909070968628, "learning_rate": 6.982875094341838e-06, "loss": 0.3435, "step": 26920 }, { "epoch": 0.5976626230563479, "grad_norm": 1.3165746927261353, "learning_rate": 6.979550981297666e-06, "loss": 0.3841, "step": 26925 }, { "epoch": 0.5977736096158756, "grad_norm": 1.3009177446365356, "learning_rate": 6.976227235469557e-06, "loss": 0.4375, "step": 26930 }, { "epoch": 0.5978845961754031, "grad_norm": 1.1894065141677856, "learning_rate": 6.972903857261599e-06, "loss": 0.4092, "step": 26935 }, { "epoch": 0.5979955827349308, "grad_norm": 0.9577630758285522, "learning_rate": 6.969580847077836e-06, "loss": 0.3938, "step": 26940 }, { "epoch": 0.5981065692944585, "grad_norm": 0.8460142612457275, "learning_rate": 6.966258205322274e-06, "loss": 0.2974, "step": 26945 }, { "epoch": 0.5982175558539861, "grad_norm": 1.2991278171539307, "learning_rate": 6.962935932398862e-06, "loss": 0.3644, "step": 26950 }, { "epoch": 0.5983285424135137, "grad_norm": 0.9580723643302917, "learning_rate": 6.959614028711517e-06, "loss": 0.2362, "step": 26955 }, { "epoch": 0.5984395289730413, "grad_norm": 0.9842463731765747, "learning_rate": 6.956292494664098e-06, "loss": 0.4265, "step": 26960 }, { "epoch": 0.598550515532569, "grad_norm": 1.0971413850784302, "learning_rate": 6.952971330660429e-06, "loss": 0.3913, "step": 26965 }, { "epoch": 0.5986615020920967, "grad_norm": 1.3643193244934082, "learning_rate": 6.949650537104292e-06, "loss": 0.4197, "step": 26970 }, { "epoch": 0.5987724886516242, "grad_norm": 1.7623727321624756, "learning_rate": 6.946330114399409e-06, "loss": 0.4682, "step": 26975 }, { "epoch": 0.5988834752111519, "grad_norm": 0.7582911849021912, "learning_rate": 6.943010062949471e-06, "loss": 0.3793, "step": 26980 }, { "epoch": 0.5989944617706796, "grad_norm": 1.4799220561981201, "learning_rate": 6.939690383158115e-06, "loss": 0.3809, "step": 26985 }, { "epoch": 0.5991054483302072, "grad_norm": 1.0741524696350098, "learning_rate": 6.936371075428943e-06, "loss": 0.3723, "step": 26990 }, { "epoch": 0.5992164348897349, "grad_norm": 1.4326813220977783, "learning_rate": 6.933052140165496e-06, "loss": 0.5451, "step": 26995 }, { "epoch": 0.5993274214492625, "grad_norm": 1.158925175666809, "learning_rate": 6.9297335777712845e-06, "loss": 0.4122, "step": 27000 }, { "epoch": 0.5994384080087901, "grad_norm": 1.623766303062439, "learning_rate": 6.926415388649772e-06, "loss": 0.4549, "step": 27005 }, { "epoch": 0.5995493945683178, "grad_norm": 0.7449182271957397, "learning_rate": 6.923097573204365e-06, "loss": 0.3833, "step": 27010 }, { "epoch": 0.5996603811278454, "grad_norm": 1.0153058767318726, "learning_rate": 6.919780131838438e-06, "loss": 0.445, "step": 27015 }, { "epoch": 0.599771367687373, "grad_norm": 1.8542829751968384, "learning_rate": 6.91646306495531e-06, "loss": 0.4383, "step": 27020 }, { "epoch": 0.5998823542469007, "grad_norm": 1.3858826160430908, "learning_rate": 6.913146372958263e-06, "loss": 0.331, "step": 27025 }, { "epoch": 0.5999933408064283, "grad_norm": 1.2759438753128052, "learning_rate": 6.909830056250527e-06, "loss": 0.334, "step": 27030 }, { "epoch": 0.600104327365956, "grad_norm": 1.3611114025115967, "learning_rate": 6.90651411523529e-06, "loss": 0.3268, "step": 27035 }, { "epoch": 0.6002153139254837, "grad_norm": 1.6142209768295288, "learning_rate": 6.90319855031569e-06, "loss": 0.407, "step": 27040 }, { "epoch": 0.6003263004850112, "grad_norm": 1.0603861808776855, "learning_rate": 6.899883361894827e-06, "loss": 0.3074, "step": 27045 }, { "epoch": 0.6004372870445389, "grad_norm": 1.0023366212844849, "learning_rate": 6.896568550375744e-06, "loss": 0.4157, "step": 27050 }, { "epoch": 0.6005482736040666, "grad_norm": 1.9315853118896484, "learning_rate": 6.893254116161454e-06, "loss": 0.3693, "step": 27055 }, { "epoch": 0.6006592601635942, "grad_norm": 0.5876302123069763, "learning_rate": 6.889940059654905e-06, "loss": 0.2404, "step": 27060 }, { "epoch": 0.6007702467231218, "grad_norm": 1.1511262655258179, "learning_rate": 6.886626381259016e-06, "loss": 0.302, "step": 27065 }, { "epoch": 0.6008812332826494, "grad_norm": 0.9467554092407227, "learning_rate": 6.883313081376647e-06, "loss": 0.5602, "step": 27070 }, { "epoch": 0.6009922198421771, "grad_norm": 1.7370318174362183, "learning_rate": 6.8800001604106246e-06, "loss": 0.4617, "step": 27075 }, { "epoch": 0.6011032064017048, "grad_norm": 2.1454873085021973, "learning_rate": 6.876687618763716e-06, "loss": 0.415, "step": 27080 }, { "epoch": 0.6012141929612324, "grad_norm": 1.8225048780441284, "learning_rate": 6.873375456838652e-06, "loss": 0.4102, "step": 27085 }, { "epoch": 0.60132517952076, "grad_norm": 1.0236406326293945, "learning_rate": 6.870063675038117e-06, "loss": 0.349, "step": 27090 }, { "epoch": 0.6014361660802877, "grad_norm": 0.8470390439033508, "learning_rate": 6.8667522737647395e-06, "loss": 0.5173, "step": 27095 }, { "epoch": 0.6015471526398153, "grad_norm": 1.1244659423828125, "learning_rate": 6.863441253421117e-06, "loss": 0.4413, "step": 27100 }, { "epoch": 0.601658139199343, "grad_norm": 1.3510165214538574, "learning_rate": 6.860130614409784e-06, "loss": 0.4308, "step": 27105 }, { "epoch": 0.6017691257588706, "grad_norm": 1.0276979207992554, "learning_rate": 6.856820357133239e-06, "loss": 0.5655, "step": 27110 }, { "epoch": 0.6018801123183982, "grad_norm": 0.9559340476989746, "learning_rate": 6.853510481993939e-06, "loss": 0.3937, "step": 27115 }, { "epoch": 0.6019910988779259, "grad_norm": 1.581098198890686, "learning_rate": 6.850200989394278e-06, "loss": 0.3655, "step": 27120 }, { "epoch": 0.6021020854374535, "grad_norm": 0.9266625642776489, "learning_rate": 6.846891879736622e-06, "loss": 0.4016, "step": 27125 }, { "epoch": 0.6022130719969812, "grad_norm": 1.1542038917541504, "learning_rate": 6.84358315342327e-06, "loss": 0.4539, "step": 27130 }, { "epoch": 0.6023240585565088, "grad_norm": 1.1682536602020264, "learning_rate": 6.840274810856493e-06, "loss": 0.4021, "step": 27135 }, { "epoch": 0.6024350451160364, "grad_norm": 1.201403021812439, "learning_rate": 6.836966852438514e-06, "loss": 0.396, "step": 27140 }, { "epoch": 0.6025460316755641, "grad_norm": 1.5981374979019165, "learning_rate": 6.833659278571491e-06, "loss": 0.4464, "step": 27145 }, { "epoch": 0.6026570182350918, "grad_norm": 1.264754056930542, "learning_rate": 6.830352089657557e-06, "loss": 0.4714, "step": 27150 }, { "epoch": 0.6027680047946193, "grad_norm": 1.026802659034729, "learning_rate": 6.827045286098784e-06, "loss": 0.5208, "step": 27155 }, { "epoch": 0.602878991354147, "grad_norm": 1.1696611642837524, "learning_rate": 6.823738868297207e-06, "loss": 0.4067, "step": 27160 }, { "epoch": 0.6029899779136747, "grad_norm": 1.1774004697799683, "learning_rate": 6.820432836654802e-06, "loss": 0.3677, "step": 27165 }, { "epoch": 0.6031009644732023, "grad_norm": 1.192420482635498, "learning_rate": 6.817127191573511e-06, "loss": 0.3367, "step": 27170 }, { "epoch": 0.60321195103273, "grad_norm": 0.8283929824829102, "learning_rate": 6.813821933455222e-06, "loss": 0.4364, "step": 27175 }, { "epoch": 0.6033229375922575, "grad_norm": 1.1864694356918335, "learning_rate": 6.810517062701776e-06, "loss": 0.4281, "step": 27180 }, { "epoch": 0.6034339241517852, "grad_norm": 1.2099741697311401, "learning_rate": 6.80721257971497e-06, "loss": 0.4737, "step": 27185 }, { "epoch": 0.6035449107113129, "grad_norm": 1.3351482152938843, "learning_rate": 6.80390848489655e-06, "loss": 0.5003, "step": 27190 }, { "epoch": 0.6036558972708405, "grad_norm": 1.6452354192733765, "learning_rate": 6.800604778648216e-06, "loss": 0.4329, "step": 27195 }, { "epoch": 0.6037668838303681, "grad_norm": 1.4642529487609863, "learning_rate": 6.797301461371626e-06, "loss": 0.4045, "step": 27200 }, { "epoch": 0.6038778703898958, "grad_norm": 1.5834846496582031, "learning_rate": 6.79399853346838e-06, "loss": 0.3889, "step": 27205 }, { "epoch": 0.6039888569494234, "grad_norm": 1.0351002216339111, "learning_rate": 6.790695995340044e-06, "loss": 0.301, "step": 27210 }, { "epoch": 0.6040998435089511, "grad_norm": 0.9682518243789673, "learning_rate": 6.787393847388122e-06, "loss": 0.2879, "step": 27215 }, { "epoch": 0.6042108300684788, "grad_norm": 1.2477974891662598, "learning_rate": 6.784092090014083e-06, "loss": 0.4827, "step": 27220 }, { "epoch": 0.6043218166280063, "grad_norm": 0.8728592991828918, "learning_rate": 6.7807907236193436e-06, "loss": 0.4047, "step": 27225 }, { "epoch": 0.604432803187534, "grad_norm": 1.7829116582870483, "learning_rate": 6.777489748605271e-06, "loss": 0.4008, "step": 27230 }, { "epoch": 0.6045437897470616, "grad_norm": 1.3560502529144287, "learning_rate": 6.774189165373188e-06, "loss": 0.3258, "step": 27235 }, { "epoch": 0.6046547763065893, "grad_norm": 0.855907142162323, "learning_rate": 6.770888974324365e-06, "loss": 0.4336, "step": 27240 }, { "epoch": 0.6047657628661169, "grad_norm": 1.0651112794876099, "learning_rate": 6.767589175860032e-06, "loss": 0.4607, "step": 27245 }, { "epoch": 0.6048767494256445, "grad_norm": 1.178040862083435, "learning_rate": 6.7642897703813695e-06, "loss": 0.3974, "step": 27250 }, { "epoch": 0.6049877359851722, "grad_norm": 1.5252591371536255, "learning_rate": 6.7609907582895005e-06, "loss": 0.4059, "step": 27255 }, { "epoch": 0.6050987225446999, "grad_norm": 1.074078917503357, "learning_rate": 6.757692139985517e-06, "loss": 0.4739, "step": 27260 }, { "epoch": 0.6052097091042274, "grad_norm": 1.1060175895690918, "learning_rate": 6.754393915870445e-06, "loss": 0.3433, "step": 27265 }, { "epoch": 0.6053206956637551, "grad_norm": 0.9834054112434387, "learning_rate": 6.751096086345279e-06, "loss": 0.3567, "step": 27270 }, { "epoch": 0.6054316822232828, "grad_norm": 1.5103505849838257, "learning_rate": 6.747798651810953e-06, "loss": 0.4983, "step": 27275 }, { "epoch": 0.6055426687828104, "grad_norm": 1.1536871194839478, "learning_rate": 6.74450161266836e-06, "loss": 0.3651, "step": 27280 }, { "epoch": 0.6056536553423381, "grad_norm": 1.0520485639572144, "learning_rate": 6.741204969318343e-06, "loss": 0.4183, "step": 27285 }, { "epoch": 0.6057646419018656, "grad_norm": 1.0779529809951782, "learning_rate": 6.7379087221616965e-06, "loss": 0.4373, "step": 27290 }, { "epoch": 0.6058756284613933, "grad_norm": 1.3279848098754883, "learning_rate": 6.734612871599169e-06, "loss": 0.4022, "step": 27295 }, { "epoch": 0.605986615020921, "grad_norm": 1.6855266094207764, "learning_rate": 6.731317418031456e-06, "loss": 0.4741, "step": 27300 }, { "epoch": 0.6060976015804486, "grad_norm": 1.8417173624038696, "learning_rate": 6.728022361859208e-06, "loss": 0.3657, "step": 27305 }, { "epoch": 0.6062085881399762, "grad_norm": 0.34610217809677124, "learning_rate": 6.72472770348303e-06, "loss": 0.3711, "step": 27310 }, { "epoch": 0.6063195746995039, "grad_norm": 0.9657518863677979, "learning_rate": 6.721433443303471e-06, "loss": 0.4311, "step": 27315 }, { "epoch": 0.6064305612590315, "grad_norm": 1.4196455478668213, "learning_rate": 6.7181395817210415e-06, "loss": 0.4459, "step": 27320 }, { "epoch": 0.6065415478185592, "grad_norm": 0.5634430646896362, "learning_rate": 6.714846119136192e-06, "loss": 0.4148, "step": 27325 }, { "epoch": 0.6066525343780869, "grad_norm": 0.9256958365440369, "learning_rate": 6.711553055949333e-06, "loss": 0.4587, "step": 27330 }, { "epoch": 0.6067635209376144, "grad_norm": 1.0506219863891602, "learning_rate": 6.70826039256083e-06, "loss": 0.3011, "step": 27335 }, { "epoch": 0.6068745074971421, "grad_norm": 0.6727412939071655, "learning_rate": 6.7049681293709836e-06, "loss": 0.4911, "step": 27340 }, { "epoch": 0.6069854940566697, "grad_norm": 1.085914969444275, "learning_rate": 6.701676266780066e-06, "loss": 0.3931, "step": 27345 }, { "epoch": 0.6070964806161974, "grad_norm": 2.1826765537261963, "learning_rate": 6.698384805188283e-06, "loss": 0.3863, "step": 27350 }, { "epoch": 0.607207467175725, "grad_norm": 0.9371523261070251, "learning_rate": 6.695093744995806e-06, "loss": 0.4066, "step": 27355 }, { "epoch": 0.6073184537352526, "grad_norm": 0.7349005937576294, "learning_rate": 6.6918030866027415e-06, "loss": 0.3822, "step": 27360 }, { "epoch": 0.6074294402947803, "grad_norm": 0.9338274002075195, "learning_rate": 6.688512830409167e-06, "loss": 0.3196, "step": 27365 }, { "epoch": 0.607540426854308, "grad_norm": 1.0718753337860107, "learning_rate": 6.6852229768150976e-06, "loss": 0.3626, "step": 27370 }, { "epoch": 0.6076514134138356, "grad_norm": 1.2759748697280884, "learning_rate": 6.681933526220499e-06, "loss": 0.3641, "step": 27375 }, { "epoch": 0.6077623999733632, "grad_norm": 1.4225863218307495, "learning_rate": 6.678644479025298e-06, "loss": 0.3547, "step": 27380 }, { "epoch": 0.6078733865328909, "grad_norm": 0.8354029059410095, "learning_rate": 6.675355835629358e-06, "loss": 0.4423, "step": 27385 }, { "epoch": 0.6079843730924185, "grad_norm": 1.19036865234375, "learning_rate": 6.672067596432506e-06, "loss": 0.5164, "step": 27390 }, { "epoch": 0.6080953596519462, "grad_norm": 1.3374768495559692, "learning_rate": 6.668779761834518e-06, "loss": 0.5144, "step": 27395 }, { "epoch": 0.6082063462114737, "grad_norm": 1.3295682668685913, "learning_rate": 6.665492332235111e-06, "loss": 0.4183, "step": 27400 }, { "epoch": 0.6083173327710014, "grad_norm": 1.1211193799972534, "learning_rate": 6.6622053080339666e-06, "loss": 0.4292, "step": 27405 }, { "epoch": 0.6084283193305291, "grad_norm": 1.4494692087173462, "learning_rate": 6.658918689630706e-06, "loss": 0.4577, "step": 27410 }, { "epoch": 0.6085393058900567, "grad_norm": 0.9899317026138306, "learning_rate": 6.6556324774249025e-06, "loss": 0.2767, "step": 27415 }, { "epoch": 0.6086502924495844, "grad_norm": 1.7873963117599487, "learning_rate": 6.652346671816092e-06, "loss": 0.5098, "step": 27420 }, { "epoch": 0.608761279009112, "grad_norm": 0.9044897556304932, "learning_rate": 6.649061273203741e-06, "loss": 0.329, "step": 27425 }, { "epoch": 0.6088722655686396, "grad_norm": 1.0911178588867188, "learning_rate": 6.645776281987286e-06, "loss": 0.2071, "step": 27430 }, { "epoch": 0.6089832521281673, "grad_norm": 1.748152256011963, "learning_rate": 6.642491698566098e-06, "loss": 0.4249, "step": 27435 }, { "epoch": 0.609094238687695, "grad_norm": 1.0939466953277588, "learning_rate": 6.639207523339512e-06, "loss": 0.3096, "step": 27440 }, { "epoch": 0.6092052252472225, "grad_norm": 1.5443814992904663, "learning_rate": 6.635923756706801e-06, "loss": 0.4344, "step": 27445 }, { "epoch": 0.6093162118067502, "grad_norm": 0.6851821541786194, "learning_rate": 6.632640399067197e-06, "loss": 0.3742, "step": 27450 }, { "epoch": 0.6094271983662778, "grad_norm": 0.9900246262550354, "learning_rate": 6.629357450819885e-06, "loss": 0.5785, "step": 27455 }, { "epoch": 0.6095381849258055, "grad_norm": 1.3923543691635132, "learning_rate": 6.626074912363985e-06, "loss": 0.3563, "step": 27460 }, { "epoch": 0.6096491714853332, "grad_norm": 1.3146992921829224, "learning_rate": 6.622792784098586e-06, "loss": 0.2434, "step": 27465 }, { "epoch": 0.6097601580448607, "grad_norm": 0.7010953426361084, "learning_rate": 6.61951106642271e-06, "loss": 0.3297, "step": 27470 }, { "epoch": 0.6098711446043884, "grad_norm": 0.9393205642700195, "learning_rate": 6.616229759735342e-06, "loss": 0.3292, "step": 27475 }, { "epoch": 0.6099821311639161, "grad_norm": 1.6617045402526855, "learning_rate": 6.612948864435415e-06, "loss": 0.4001, "step": 27480 }, { "epoch": 0.6100931177234437, "grad_norm": 1.3801769018173218, "learning_rate": 6.609668380921801e-06, "loss": 0.3048, "step": 27485 }, { "epoch": 0.6102041042829713, "grad_norm": 2.336073398590088, "learning_rate": 6.6063883095933405e-06, "loss": 0.4305, "step": 27490 }, { "epoch": 0.610315090842499, "grad_norm": 1.2219001054763794, "learning_rate": 6.603108650848802e-06, "loss": 0.4475, "step": 27495 }, { "epoch": 0.6104260774020266, "grad_norm": 1.3967128992080688, "learning_rate": 6.599829405086924e-06, "loss": 0.3382, "step": 27500 }, { "epoch": 0.6105370639615543, "grad_norm": 0.959255039691925, "learning_rate": 6.596550572706386e-06, "loss": 0.4155, "step": 27505 }, { "epoch": 0.6106480505210818, "grad_norm": 1.337578296661377, "learning_rate": 6.593272154105811e-06, "loss": 0.564, "step": 27510 }, { "epoch": 0.6107590370806095, "grad_norm": 1.1137542724609375, "learning_rate": 6.589994149683787e-06, "loss": 0.5698, "step": 27515 }, { "epoch": 0.6108700236401372, "grad_norm": 1.5539424419403076, "learning_rate": 6.586716559838832e-06, "loss": 0.4567, "step": 27520 }, { "epoch": 0.6109810101996648, "grad_norm": 0.9463354349136353, "learning_rate": 6.583439384969437e-06, "loss": 0.5832, "step": 27525 }, { "epoch": 0.6110919967591925, "grad_norm": 2.1490676403045654, "learning_rate": 6.580162625474018e-06, "loss": 0.5059, "step": 27530 }, { "epoch": 0.6112029833187201, "grad_norm": 1.1205565929412842, "learning_rate": 6.57688628175096e-06, "loss": 0.4286, "step": 27535 }, { "epoch": 0.6113139698782477, "grad_norm": 1.2223470211029053, "learning_rate": 6.573610354198587e-06, "loss": 0.4103, "step": 27540 }, { "epoch": 0.6114249564377754, "grad_norm": 1.0583183765411377, "learning_rate": 6.5703348432151784e-06, "loss": 0.4206, "step": 27545 }, { "epoch": 0.6115359429973031, "grad_norm": 1.509799599647522, "learning_rate": 6.567059749198954e-06, "loss": 0.3284, "step": 27550 }, { "epoch": 0.6116469295568306, "grad_norm": 0.9263482093811035, "learning_rate": 6.5637850725480945e-06, "loss": 0.4801, "step": 27555 }, { "epoch": 0.6117579161163583, "grad_norm": 0.9162907600402832, "learning_rate": 6.560510813660719e-06, "loss": 0.5102, "step": 27560 }, { "epoch": 0.6118689026758859, "grad_norm": 1.2033370733261108, "learning_rate": 6.557236972934907e-06, "loss": 0.3699, "step": 27565 }, { "epoch": 0.6119798892354136, "grad_norm": 1.1790424585342407, "learning_rate": 6.5539635507686735e-06, "loss": 0.5855, "step": 27570 }, { "epoch": 0.6120908757949413, "grad_norm": 1.3403029441833496, "learning_rate": 6.55069054756e-06, "loss": 0.4091, "step": 27575 }, { "epoch": 0.6122018623544688, "grad_norm": 0.9827760457992554, "learning_rate": 6.547417963706797e-06, "loss": 0.4158, "step": 27580 }, { "epoch": 0.6123128489139965, "grad_norm": 0.9233430624008179, "learning_rate": 6.544145799606938e-06, "loss": 0.3699, "step": 27585 }, { "epoch": 0.6124238354735242, "grad_norm": 0.7052626609802246, "learning_rate": 6.540874055658249e-06, "loss": 0.2455, "step": 27590 }, { "epoch": 0.6125348220330518, "grad_norm": 1.3502213954925537, "learning_rate": 6.537602732258485e-06, "loss": 0.497, "step": 27595 }, { "epoch": 0.6126458085925794, "grad_norm": 1.0900074243545532, "learning_rate": 6.534331829805373e-06, "loss": 0.4483, "step": 27600 }, { "epoch": 0.6127567951521071, "grad_norm": 1.11318838596344, "learning_rate": 6.53106134869657e-06, "loss": 0.3535, "step": 27605 }, { "epoch": 0.6128677817116347, "grad_norm": 0.7351371049880981, "learning_rate": 6.527791289329699e-06, "loss": 0.3415, "step": 27610 }, { "epoch": 0.6129787682711624, "grad_norm": 1.8652602434158325, "learning_rate": 6.524521652102315e-06, "loss": 0.4084, "step": 27615 }, { "epoch": 0.61308975483069, "grad_norm": 0.9447879195213318, "learning_rate": 6.5212524374119315e-06, "loss": 0.3778, "step": 27620 }, { "epoch": 0.6132007413902176, "grad_norm": 2.6146841049194336, "learning_rate": 6.517983645656014e-06, "loss": 0.463, "step": 27625 }, { "epoch": 0.6133117279497453, "grad_norm": 1.130544900894165, "learning_rate": 6.514715277231963e-06, "loss": 0.5154, "step": 27630 }, { "epoch": 0.6134227145092729, "grad_norm": 0.8905532956123352, "learning_rate": 6.5114473325371445e-06, "loss": 0.444, "step": 27635 }, { "epoch": 0.6135337010688006, "grad_norm": 1.8041813373565674, "learning_rate": 6.508179811968855e-06, "loss": 0.4868, "step": 27640 }, { "epoch": 0.6136446876283282, "grad_norm": 0.8101031184196472, "learning_rate": 6.504912715924355e-06, "loss": 0.6036, "step": 27645 }, { "epoch": 0.6137556741878558, "grad_norm": 1.235679268836975, "learning_rate": 6.501646044800847e-06, "loss": 0.5193, "step": 27650 }, { "epoch": 0.6138666607473835, "grad_norm": 0.6435564160346985, "learning_rate": 6.498379798995478e-06, "loss": 0.3598, "step": 27655 }, { "epoch": 0.6139776473069112, "grad_norm": 0.9466953277587891, "learning_rate": 6.495113978905351e-06, "loss": 0.3198, "step": 27660 }, { "epoch": 0.6140886338664387, "grad_norm": 1.6178840398788452, "learning_rate": 6.4918485849275116e-06, "loss": 0.4214, "step": 27665 }, { "epoch": 0.6141996204259664, "grad_norm": 1.2968192100524902, "learning_rate": 6.488583617458955e-06, "loss": 0.3609, "step": 27670 }, { "epoch": 0.6143106069854941, "grad_norm": 1.3874645233154297, "learning_rate": 6.485319076896628e-06, "loss": 0.4386, "step": 27675 }, { "epoch": 0.6144215935450217, "grad_norm": 0.7128292322158813, "learning_rate": 6.482054963637416e-06, "loss": 0.5341, "step": 27680 }, { "epoch": 0.6145325801045494, "grad_norm": 1.555774211883545, "learning_rate": 6.478791278078169e-06, "loss": 0.4607, "step": 27685 }, { "epoch": 0.6146435666640769, "grad_norm": 1.5064046382904053, "learning_rate": 6.475528020615665e-06, "loss": 0.4431, "step": 27690 }, { "epoch": 0.6147545532236046, "grad_norm": 1.5725188255310059, "learning_rate": 6.472265191646647e-06, "loss": 0.4654, "step": 27695 }, { "epoch": 0.6148655397831323, "grad_norm": 1.4065055847167969, "learning_rate": 6.469002791567792e-06, "loss": 0.5809, "step": 27700 }, { "epoch": 0.6149765263426599, "grad_norm": 1.1861436367034912, "learning_rate": 6.4657408207757365e-06, "loss": 0.3904, "step": 27705 }, { "epoch": 0.6150875129021875, "grad_norm": 1.070533037185669, "learning_rate": 6.4624792796670624e-06, "loss": 0.3536, "step": 27710 }, { "epoch": 0.6151984994617152, "grad_norm": 1.5751197338104248, "learning_rate": 6.459218168638291e-06, "loss": 0.5824, "step": 27715 }, { "epoch": 0.6153094860212428, "grad_norm": 0.9858487248420715, "learning_rate": 6.4559574880859015e-06, "loss": 0.4948, "step": 27720 }, { "epoch": 0.6154204725807705, "grad_norm": 1.114532709121704, "learning_rate": 6.452697238406311e-06, "loss": 0.5044, "step": 27725 }, { "epoch": 0.6155314591402982, "grad_norm": 1.7676235437393188, "learning_rate": 6.449437419995894e-06, "loss": 0.3233, "step": 27730 }, { "epoch": 0.6156424456998257, "grad_norm": 1.1269664764404297, "learning_rate": 6.446178033250973e-06, "loss": 0.3921, "step": 27735 }, { "epoch": 0.6157534322593534, "grad_norm": 1.5101771354675293, "learning_rate": 6.442919078567803e-06, "loss": 0.4236, "step": 27740 }, { "epoch": 0.615864418818881, "grad_norm": 0.6449528336524963, "learning_rate": 6.439660556342606e-06, "loss": 0.3422, "step": 27745 }, { "epoch": 0.6159754053784087, "grad_norm": 1.5987694263458252, "learning_rate": 6.436402466971534e-06, "loss": 0.4795, "step": 27750 }, { "epoch": 0.6160863919379364, "grad_norm": 1.3472490310668945, "learning_rate": 6.4331448108507e-06, "loss": 0.5204, "step": 27755 }, { "epoch": 0.6161973784974639, "grad_norm": 1.8186256885528564, "learning_rate": 6.42988758837616e-06, "loss": 0.3725, "step": 27760 }, { "epoch": 0.6163083650569916, "grad_norm": 0.9955191016197205, "learning_rate": 6.426630799943911e-06, "loss": 0.3704, "step": 27765 }, { "epoch": 0.6164193516165193, "grad_norm": 1.6446115970611572, "learning_rate": 6.423374445949908e-06, "loss": 0.5087, "step": 27770 }, { "epoch": 0.6165303381760469, "grad_norm": 0.7298675179481506, "learning_rate": 6.420118526790041e-06, "loss": 0.4841, "step": 27775 }, { "epoch": 0.6166413247355745, "grad_norm": 3.0948612689971924, "learning_rate": 6.416863042860162e-06, "loss": 0.6477, "step": 27780 }, { "epoch": 0.6167523112951022, "grad_norm": 0.7705976366996765, "learning_rate": 6.4136079945560524e-06, "loss": 0.3469, "step": 27785 }, { "epoch": 0.6168632978546298, "grad_norm": 1.0038697719573975, "learning_rate": 6.410353382273458e-06, "loss": 0.4518, "step": 27790 }, { "epoch": 0.6169742844141575, "grad_norm": 1.2131325006484985, "learning_rate": 6.4070992064080606e-06, "loss": 0.4135, "step": 27795 }, { "epoch": 0.617085270973685, "grad_norm": 1.26372230052948, "learning_rate": 6.4038454673554915e-06, "loss": 0.2634, "step": 27800 }, { "epoch": 0.6171962575332127, "grad_norm": 1.0183274745941162, "learning_rate": 6.4005921655113305e-06, "loss": 0.3646, "step": 27805 }, { "epoch": 0.6173072440927404, "grad_norm": 1.1538991928100586, "learning_rate": 6.397339301271103e-06, "loss": 0.4179, "step": 27810 }, { "epoch": 0.617418230652268, "grad_norm": 0.9333328604698181, "learning_rate": 6.3940868750302774e-06, "loss": 0.2912, "step": 27815 }, { "epoch": 0.6175292172117957, "grad_norm": 1.7044806480407715, "learning_rate": 6.39083488718428e-06, "loss": 0.2643, "step": 27820 }, { "epoch": 0.6176402037713233, "grad_norm": 1.2348930835723877, "learning_rate": 6.387583338128471e-06, "loss": 0.4042, "step": 27825 }, { "epoch": 0.6177511903308509, "grad_norm": 1.1704115867614746, "learning_rate": 6.384332228258168e-06, "loss": 0.4946, "step": 27830 }, { "epoch": 0.6178621768903786, "grad_norm": 1.7322683334350586, "learning_rate": 6.3810815579686225e-06, "loss": 0.4124, "step": 27835 }, { "epoch": 0.6179731634499063, "grad_norm": 1.5869555473327637, "learning_rate": 6.377831327655043e-06, "loss": 0.4499, "step": 27840 }, { "epoch": 0.6180841500094338, "grad_norm": 0.9546343684196472, "learning_rate": 6.374581537712588e-06, "loss": 0.4926, "step": 27845 }, { "epoch": 0.6181951365689615, "grad_norm": 1.1583161354064941, "learning_rate": 6.371332188536347e-06, "loss": 0.4863, "step": 27850 }, { "epoch": 0.6183061231284891, "grad_norm": 0.680205225944519, "learning_rate": 6.368083280521372e-06, "loss": 0.3792, "step": 27855 }, { "epoch": 0.6184171096880168, "grad_norm": 1.0736056566238403, "learning_rate": 6.364834814062648e-06, "loss": 0.5166, "step": 27860 }, { "epoch": 0.6185280962475445, "grad_norm": 1.0722342729568481, "learning_rate": 6.361586789555121e-06, "loss": 0.337, "step": 27865 }, { "epoch": 0.618639082807072, "grad_norm": 1.02963125705719, "learning_rate": 6.358339207393663e-06, "loss": 0.1989, "step": 27870 }, { "epoch": 0.6187500693665997, "grad_norm": 0.7019577026367188, "learning_rate": 6.3550920679731134e-06, "loss": 0.492, "step": 27875 }, { "epoch": 0.6188610559261274, "grad_norm": 0.9072814583778381, "learning_rate": 6.35184537168825e-06, "loss": 0.5211, "step": 27880 }, { "epoch": 0.618972042485655, "grad_norm": 1.3982828855514526, "learning_rate": 6.348599118933786e-06, "loss": 0.4369, "step": 27885 }, { "epoch": 0.6190830290451826, "grad_norm": 2.1749801635742188, "learning_rate": 6.3453533101044e-06, "loss": 0.3617, "step": 27890 }, { "epoch": 0.6191940156047103, "grad_norm": 1.2338169813156128, "learning_rate": 6.342107945594698e-06, "loss": 0.5012, "step": 27895 }, { "epoch": 0.6193050021642379, "grad_norm": 1.5822906494140625, "learning_rate": 6.3388630257992455e-06, "loss": 0.4334, "step": 27900 }, { "epoch": 0.6194159887237656, "grad_norm": 1.246267318725586, "learning_rate": 6.335618551112548e-06, "loss": 0.327, "step": 27905 }, { "epoch": 0.6195269752832931, "grad_norm": 1.0501974821090698, "learning_rate": 6.332374521929059e-06, "loss": 0.4558, "step": 27910 }, { "epoch": 0.6196379618428208, "grad_norm": 1.8456707000732422, "learning_rate": 6.3291309386431744e-06, "loss": 0.3494, "step": 27915 }, { "epoch": 0.6197489484023485, "grad_norm": 1.3171080350875854, "learning_rate": 6.32588780164924e-06, "loss": 0.2822, "step": 27920 }, { "epoch": 0.6198599349618761, "grad_norm": 1.881778597831726, "learning_rate": 6.322645111341541e-06, "loss": 0.4102, "step": 27925 }, { "epoch": 0.6199709215214038, "grad_norm": 0.7797800898551941, "learning_rate": 6.319402868114321e-06, "loss": 0.3568, "step": 27930 }, { "epoch": 0.6200819080809314, "grad_norm": 1.3908886909484863, "learning_rate": 6.3161610723617525e-06, "loss": 0.5446, "step": 27935 }, { "epoch": 0.620192894640459, "grad_norm": 0.75746089220047, "learning_rate": 6.3129197244779715e-06, "loss": 0.3518, "step": 27940 }, { "epoch": 0.6203038811999867, "grad_norm": 0.9157155156135559, "learning_rate": 6.309678824857039e-06, "loss": 0.3124, "step": 27945 }, { "epoch": 0.6204148677595144, "grad_norm": 1.303605556488037, "learning_rate": 6.306438373892985e-06, "loss": 0.4871, "step": 27950 }, { "epoch": 0.620525854319042, "grad_norm": 1.2030810117721558, "learning_rate": 6.30319837197976e-06, "loss": 0.2725, "step": 27955 }, { "epoch": 0.6206368408785696, "grad_norm": 1.1164860725402832, "learning_rate": 6.2999588195112806e-06, "loss": 0.4663, "step": 27960 }, { "epoch": 0.6207478274380972, "grad_norm": 1.380496859550476, "learning_rate": 6.296719716881401e-06, "loss": 0.4018, "step": 27965 }, { "epoch": 0.6208588139976249, "grad_norm": 1.0364583730697632, "learning_rate": 6.293481064483915e-06, "loss": 0.4486, "step": 27970 }, { "epoch": 0.6209698005571526, "grad_norm": 1.5763059854507446, "learning_rate": 6.290242862712576e-06, "loss": 0.4054, "step": 27975 }, { "epoch": 0.6210807871166801, "grad_norm": 0.7757239937782288, "learning_rate": 6.287005111961062e-06, "loss": 0.3067, "step": 27980 }, { "epoch": 0.6211917736762078, "grad_norm": 1.8326990604400635, "learning_rate": 6.283767812623016e-06, "loss": 0.4763, "step": 27985 }, { "epoch": 0.6213027602357355, "grad_norm": 1.3120774030685425, "learning_rate": 6.280530965092019e-06, "loss": 0.3022, "step": 27990 }, { "epoch": 0.6214137467952631, "grad_norm": 1.223878264427185, "learning_rate": 6.2772945697615895e-06, "loss": 0.442, "step": 27995 }, { "epoch": 0.6215247333547907, "grad_norm": 1.3699520826339722, "learning_rate": 6.274058627025205e-06, "loss": 0.4172, "step": 28000 }, { "epoch": 0.6216357199143184, "grad_norm": 0.9021301865577698, "learning_rate": 6.270823137276271e-06, "loss": 0.3973, "step": 28005 }, { "epoch": 0.621746706473846, "grad_norm": 1.7066882848739624, "learning_rate": 6.267588100908159e-06, "loss": 0.4331, "step": 28010 }, { "epoch": 0.6218576930333737, "grad_norm": 1.2832542657852173, "learning_rate": 6.264353518314166e-06, "loss": 0.4259, "step": 28015 }, { "epoch": 0.6219686795929013, "grad_norm": 1.3464566469192505, "learning_rate": 6.261119389887545e-06, "loss": 0.5368, "step": 28020 }, { "epoch": 0.6220796661524289, "grad_norm": 1.6499924659729004, "learning_rate": 6.257885716021488e-06, "loss": 0.5052, "step": 28025 }, { "epoch": 0.6221906527119566, "grad_norm": 1.5775763988494873, "learning_rate": 6.254652497109136e-06, "loss": 0.4077, "step": 28030 }, { "epoch": 0.6223016392714842, "grad_norm": 1.848100185394287, "learning_rate": 6.251419733543572e-06, "loss": 0.5273, "step": 28035 }, { "epoch": 0.6224126258310119, "grad_norm": 1.6376484632492065, "learning_rate": 6.248187425717827e-06, "loss": 0.3518, "step": 28040 }, { "epoch": 0.6225236123905395, "grad_norm": 1.0268816947937012, "learning_rate": 6.244955574024867e-06, "loss": 0.4048, "step": 28045 }, { "epoch": 0.6226345989500671, "grad_norm": 1.2212193012237549, "learning_rate": 6.241724178857621e-06, "loss": 0.5282, "step": 28050 }, { "epoch": 0.6227455855095948, "grad_norm": 1.729117751121521, "learning_rate": 6.23849324060894e-06, "loss": 0.5671, "step": 28055 }, { "epoch": 0.6228565720691225, "grad_norm": 1.5485546588897705, "learning_rate": 6.235262759671641e-06, "loss": 0.4374, "step": 28060 }, { "epoch": 0.62296755862865, "grad_norm": 0.8996152877807617, "learning_rate": 6.232032736438465e-06, "loss": 0.4608, "step": 28065 }, { "epoch": 0.6230785451881777, "grad_norm": 1.1960076093673706, "learning_rate": 6.228803171302112e-06, "loss": 0.3264, "step": 28070 }, { "epoch": 0.6231895317477053, "grad_norm": 1.4368724822998047, "learning_rate": 6.225574064655227e-06, "loss": 0.3819, "step": 28075 }, { "epoch": 0.623300518307233, "grad_norm": 1.8827331066131592, "learning_rate": 6.222345416890383e-06, "loss": 0.3901, "step": 28080 }, { "epoch": 0.6234115048667607, "grad_norm": 1.6428965330123901, "learning_rate": 6.21911722840012e-06, "loss": 0.5045, "step": 28085 }, { "epoch": 0.6235224914262882, "grad_norm": 0.8102307319641113, "learning_rate": 6.215889499576898e-06, "loss": 0.4805, "step": 28090 }, { "epoch": 0.6236334779858159, "grad_norm": 0.8937715888023376, "learning_rate": 6.212662230813141e-06, "loss": 0.4285, "step": 28095 }, { "epoch": 0.6237444645453436, "grad_norm": 1.6061286926269531, "learning_rate": 6.2094354225012124e-06, "loss": 0.484, "step": 28100 }, { "epoch": 0.6238554511048712, "grad_norm": 0.9164207577705383, "learning_rate": 6.206209075033408e-06, "loss": 0.4131, "step": 28105 }, { "epoch": 0.6239664376643989, "grad_norm": 0.8834840059280396, "learning_rate": 6.202983188801985e-06, "loss": 0.3085, "step": 28110 }, { "epoch": 0.6240774242239265, "grad_norm": 1.0660358667373657, "learning_rate": 6.199757764199128e-06, "loss": 0.393, "step": 28115 }, { "epoch": 0.6241884107834541, "grad_norm": 2.5209226608276367, "learning_rate": 6.196532801616981e-06, "loss": 0.4907, "step": 28120 }, { "epoch": 0.6242993973429818, "grad_norm": 0.8467637896537781, "learning_rate": 6.193308301447616e-06, "loss": 0.4862, "step": 28125 }, { "epoch": 0.6244103839025094, "grad_norm": 1.031442403793335, "learning_rate": 6.190084264083061e-06, "loss": 0.4154, "step": 28130 }, { "epoch": 0.624521370462037, "grad_norm": 0.9046064615249634, "learning_rate": 6.186860689915286e-06, "loss": 0.3292, "step": 28135 }, { "epoch": 0.6246323570215647, "grad_norm": 0.8271287679672241, "learning_rate": 6.183637579336199e-06, "loss": 0.4532, "step": 28140 }, { "epoch": 0.6247433435810923, "grad_norm": 0.8403948545455933, "learning_rate": 6.180414932737659e-06, "loss": 0.3869, "step": 28145 }, { "epoch": 0.62485433014062, "grad_norm": 1.2737436294555664, "learning_rate": 6.177192750511456e-06, "loss": 0.475, "step": 28150 }, { "epoch": 0.6249653167001477, "grad_norm": 1.0544812679290771, "learning_rate": 6.173971033049342e-06, "loss": 0.511, "step": 28155 }, { "epoch": 0.6250763032596752, "grad_norm": 0.8840219974517822, "learning_rate": 6.170749780742998e-06, "loss": 0.3926, "step": 28160 }, { "epoch": 0.6251872898192029, "grad_norm": 1.263260006904602, "learning_rate": 6.167528993984051e-06, "loss": 0.4556, "step": 28165 }, { "epoch": 0.6252982763787306, "grad_norm": 1.5352908372879028, "learning_rate": 6.164308673164078e-06, "loss": 0.4933, "step": 28170 }, { "epoch": 0.6254092629382582, "grad_norm": 0.9619081616401672, "learning_rate": 6.161088818674592e-06, "loss": 0.5107, "step": 28175 }, { "epoch": 0.6255202494977858, "grad_norm": 0.8135992884635925, "learning_rate": 6.1578694309070505e-06, "loss": 0.3525, "step": 28180 }, { "epoch": 0.6256312360573134, "grad_norm": 1.5407414436340332, "learning_rate": 6.154650510252862e-06, "loss": 0.5522, "step": 28185 }, { "epoch": 0.6257422226168411, "grad_norm": 0.9878733158111572, "learning_rate": 6.151432057103366e-06, "loss": 0.4503, "step": 28190 }, { "epoch": 0.6258532091763688, "grad_norm": 0.9099797606468201, "learning_rate": 6.148214071849855e-06, "loss": 0.381, "step": 28195 }, { "epoch": 0.6259641957358963, "grad_norm": 1.2787854671478271, "learning_rate": 6.144996554883556e-06, "loss": 0.3622, "step": 28200 }, { "epoch": 0.626075182295424, "grad_norm": 1.287074089050293, "learning_rate": 6.141779506595651e-06, "loss": 0.4097, "step": 28205 }, { "epoch": 0.6261861688549517, "grad_norm": 1.020998239517212, "learning_rate": 6.138562927377251e-06, "loss": 0.3028, "step": 28210 }, { "epoch": 0.6262971554144793, "grad_norm": 0.7408778667449951, "learning_rate": 6.135346817619419e-06, "loss": 0.3358, "step": 28215 }, { "epoch": 0.626408141974007, "grad_norm": 0.669231653213501, "learning_rate": 6.132131177713165e-06, "loss": 0.4524, "step": 28220 }, { "epoch": 0.6265191285335346, "grad_norm": 0.7824404239654541, "learning_rate": 6.1289160080494256e-06, "loss": 0.421, "step": 28225 }, { "epoch": 0.6266301150930622, "grad_norm": 1.124798059463501, "learning_rate": 6.125701309019101e-06, "loss": 0.2571, "step": 28230 }, { "epoch": 0.6267411016525899, "grad_norm": 1.0454397201538086, "learning_rate": 6.122487081013011e-06, "loss": 0.4768, "step": 28235 }, { "epoch": 0.6268520882121175, "grad_norm": 1.7600301504135132, "learning_rate": 6.1192733244219395e-06, "loss": 0.4956, "step": 28240 }, { "epoch": 0.6269630747716451, "grad_norm": 1.7674944400787354, "learning_rate": 6.1160600396366064e-06, "loss": 0.4959, "step": 28245 }, { "epoch": 0.6270740613311728, "grad_norm": 1.1841814517974854, "learning_rate": 6.112847227047662e-06, "loss": 0.4012, "step": 28250 }, { "epoch": 0.6271850478907004, "grad_norm": 0.8376629948616028, "learning_rate": 6.109634887045721e-06, "loss": 0.3011, "step": 28255 }, { "epoch": 0.6272960344502281, "grad_norm": 0.7868844270706177, "learning_rate": 6.1064230200213196e-06, "loss": 0.2649, "step": 28260 }, { "epoch": 0.6274070210097558, "grad_norm": 1.001660943031311, "learning_rate": 6.103211626364951e-06, "loss": 0.374, "step": 28265 }, { "epoch": 0.6275180075692833, "grad_norm": 0.9349133968353271, "learning_rate": 6.1000007064670445e-06, "loss": 0.5471, "step": 28270 }, { "epoch": 0.627628994128811, "grad_norm": 1.4925248622894287, "learning_rate": 6.096790260717971e-06, "loss": 0.3443, "step": 28275 }, { "epoch": 0.6277399806883387, "grad_norm": 0.9076890349388123, "learning_rate": 6.093580289508047e-06, "loss": 0.4022, "step": 28280 }, { "epoch": 0.6278509672478663, "grad_norm": 0.7487587332725525, "learning_rate": 6.090370793227531e-06, "loss": 0.3874, "step": 28285 }, { "epoch": 0.627961953807394, "grad_norm": 1.1190788745880127, "learning_rate": 6.087161772266623e-06, "loss": 0.2739, "step": 28290 }, { "epoch": 0.6280729403669215, "grad_norm": 1.0714386701583862, "learning_rate": 6.083953227015463e-06, "loss": 0.4865, "step": 28295 }, { "epoch": 0.6281839269264492, "grad_norm": 1.3088133335113525, "learning_rate": 6.080745157864135e-06, "loss": 0.4213, "step": 28300 }, { "epoch": 0.6282949134859769, "grad_norm": 1.300983190536499, "learning_rate": 6.07753756520267e-06, "loss": 0.4912, "step": 28305 }, { "epoch": 0.6284059000455045, "grad_norm": 0.9377284049987793, "learning_rate": 6.074330449421029e-06, "loss": 0.3563, "step": 28310 }, { "epoch": 0.6285168866050321, "grad_norm": 1.3748418092727661, "learning_rate": 6.071123810909131e-06, "loss": 0.5195, "step": 28315 }, { "epoch": 0.6286278731645598, "grad_norm": 1.298658847808838, "learning_rate": 6.067917650056818e-06, "loss": 0.4443, "step": 28320 }, { "epoch": 0.6287388597240874, "grad_norm": 1.233130931854248, "learning_rate": 6.064711967253891e-06, "loss": 0.4408, "step": 28325 }, { "epoch": 0.6288498462836151, "grad_norm": 2.0042026042938232, "learning_rate": 6.06150676289009e-06, "loss": 0.6012, "step": 28330 }, { "epoch": 0.6289608328431427, "grad_norm": 1.11312997341156, "learning_rate": 6.058302037355084e-06, "loss": 0.3594, "step": 28335 }, { "epoch": 0.6290718194026703, "grad_norm": 1.4051095247268677, "learning_rate": 6.055097791038499e-06, "loss": 0.35, "step": 28340 }, { "epoch": 0.629182805962198, "grad_norm": 1.0128973722457886, "learning_rate": 6.051894024329892e-06, "loss": 0.366, "step": 28345 }, { "epoch": 0.6292937925217256, "grad_norm": 0.9693575501441956, "learning_rate": 6.048690737618768e-06, "loss": 0.4185, "step": 28350 }, { "epoch": 0.6294047790812533, "grad_norm": 0.9025382995605469, "learning_rate": 6.0454879312945755e-06, "loss": 0.3497, "step": 28355 }, { "epoch": 0.6295157656407809, "grad_norm": 1.336121916770935, "learning_rate": 6.042285605746696e-06, "loss": 0.4256, "step": 28360 }, { "epoch": 0.6296267522003085, "grad_norm": 1.1140364408493042, "learning_rate": 6.0390837613644615e-06, "loss": 0.2469, "step": 28365 }, { "epoch": 0.6297377387598362, "grad_norm": 1.1654084920883179, "learning_rate": 6.035882398537137e-06, "loss": 0.4281, "step": 28370 }, { "epoch": 0.6298487253193639, "grad_norm": 1.7681803703308105, "learning_rate": 6.032681517653938e-06, "loss": 0.5058, "step": 28375 }, { "epoch": 0.6299597118788914, "grad_norm": 0.9355083107948303, "learning_rate": 6.0294811191040125e-06, "loss": 0.4615, "step": 28380 }, { "epoch": 0.6300706984384191, "grad_norm": 1.3876092433929443, "learning_rate": 6.026281203276456e-06, "loss": 0.4634, "step": 28385 }, { "epoch": 0.6301816849979468, "grad_norm": 1.6106153726577759, "learning_rate": 6.023081770560307e-06, "loss": 0.4213, "step": 28390 }, { "epoch": 0.6302926715574744, "grad_norm": 0.8123453259468079, "learning_rate": 6.019882821344536e-06, "loss": 0.4301, "step": 28395 }, { "epoch": 0.630403658117002, "grad_norm": 1.408504843711853, "learning_rate": 6.016684356018066e-06, "loss": 0.6509, "step": 28400 }, { "epoch": 0.6305146446765296, "grad_norm": 1.090542197227478, "learning_rate": 6.01348637496975e-06, "loss": 0.403, "step": 28405 }, { "epoch": 0.6306256312360573, "grad_norm": 1.3139768838882446, "learning_rate": 6.010288878588393e-06, "loss": 0.3842, "step": 28410 }, { "epoch": 0.630736617795585, "grad_norm": 0.8315967321395874, "learning_rate": 6.007091867262735e-06, "loss": 0.4683, "step": 28415 }, { "epoch": 0.6308476043551126, "grad_norm": 1.1125186681747437, "learning_rate": 6.003895341381454e-06, "loss": 0.4052, "step": 28420 }, { "epoch": 0.6309585909146402, "grad_norm": 1.2615845203399658, "learning_rate": 6.000699301333177e-06, "loss": 0.4602, "step": 28425 }, { "epoch": 0.6310695774741679, "grad_norm": 1.5077705383300781, "learning_rate": 5.997503747506465e-06, "loss": 0.3776, "step": 28430 }, { "epoch": 0.6311805640336955, "grad_norm": 0.9946312308311462, "learning_rate": 5.994308680289822e-06, "loss": 0.5199, "step": 28435 }, { "epoch": 0.6312915505932232, "grad_norm": 1.2056828737258911, "learning_rate": 5.991114100071701e-06, "loss": 0.3758, "step": 28440 }, { "epoch": 0.6314025371527509, "grad_norm": 1.2828701734542847, "learning_rate": 5.987920007240478e-06, "loss": 0.5064, "step": 28445 }, { "epoch": 0.6315135237122784, "grad_norm": 1.6678309440612793, "learning_rate": 5.98472640218449e-06, "loss": 0.4126, "step": 28450 }, { "epoch": 0.6316245102718061, "grad_norm": 1.8954167366027832, "learning_rate": 5.981533285291995e-06, "loss": 0.5003, "step": 28455 }, { "epoch": 0.6317354968313337, "grad_norm": 1.7117716073989868, "learning_rate": 5.9783406569512105e-06, "loss": 0.3651, "step": 28460 }, { "epoch": 0.6318464833908614, "grad_norm": 0.7891132831573486, "learning_rate": 5.975148517550278e-06, "loss": 0.2982, "step": 28465 }, { "epoch": 0.631957469950389, "grad_norm": 1.4389150142669678, "learning_rate": 5.971956867477289e-06, "loss": 0.5224, "step": 28470 }, { "epoch": 0.6320684565099166, "grad_norm": 1.39657461643219, "learning_rate": 5.96876570712028e-06, "loss": 0.472, "step": 28475 }, { "epoch": 0.6321794430694443, "grad_norm": 1.6805533170700073, "learning_rate": 5.965575036867212e-06, "loss": 0.505, "step": 28480 }, { "epoch": 0.632290429628972, "grad_norm": 1.7978190183639526, "learning_rate": 5.962384857106005e-06, "loss": 0.3385, "step": 28485 }, { "epoch": 0.6324014161884995, "grad_norm": 1.0432755947113037, "learning_rate": 5.9591951682245034e-06, "loss": 0.5049, "step": 28490 }, { "epoch": 0.6325124027480272, "grad_norm": 1.0674772262573242, "learning_rate": 5.956005970610499e-06, "loss": 0.4788, "step": 28495 }, { "epoch": 0.6326233893075549, "grad_norm": 1.2680522203445435, "learning_rate": 5.952817264651732e-06, "loss": 0.4034, "step": 28500 }, { "epoch": 0.6327343758670825, "grad_norm": 1.232718586921692, "learning_rate": 5.949629050735863e-06, "loss": 0.5217, "step": 28505 }, { "epoch": 0.6328453624266102, "grad_norm": 0.9667626023292542, "learning_rate": 5.946441329250517e-06, "loss": 0.3919, "step": 28510 }, { "epoch": 0.6329563489861377, "grad_norm": 1.1591609716415405, "learning_rate": 5.9432541005832324e-06, "loss": 0.4037, "step": 28515 }, { "epoch": 0.6330673355456654, "grad_norm": 1.1515146493911743, "learning_rate": 5.940067365121512e-06, "loss": 0.3504, "step": 28520 }, { "epoch": 0.6331783221051931, "grad_norm": 1.1757744550704956, "learning_rate": 5.936881123252787e-06, "loss": 0.4657, "step": 28525 }, { "epoch": 0.6332893086647207, "grad_norm": 0.8316757082939148, "learning_rate": 5.933695375364425e-06, "loss": 0.4254, "step": 28530 }, { "epoch": 0.6334002952242483, "grad_norm": 1.2674232721328735, "learning_rate": 5.930510121843746e-06, "loss": 0.4108, "step": 28535 }, { "epoch": 0.633511281783776, "grad_norm": 0.9032902121543884, "learning_rate": 5.927325363077996e-06, "loss": 0.5447, "step": 28540 }, { "epoch": 0.6336222683433036, "grad_norm": 1.034072995185852, "learning_rate": 5.924141099454368e-06, "loss": 0.4106, "step": 28545 }, { "epoch": 0.6337332549028313, "grad_norm": 0.8327234983444214, "learning_rate": 5.92095733136e-06, "loss": 0.3196, "step": 28550 }, { "epoch": 0.633844241462359, "grad_norm": 0.9522653818130493, "learning_rate": 5.917774059181956e-06, "loss": 0.3871, "step": 28555 }, { "epoch": 0.6339552280218865, "grad_norm": 1.6604143381118774, "learning_rate": 5.9145912833072535e-06, "loss": 0.3178, "step": 28560 }, { "epoch": 0.6340662145814142, "grad_norm": 0.8660258650779724, "learning_rate": 5.911409004122839e-06, "loss": 0.1758, "step": 28565 }, { "epoch": 0.6341772011409418, "grad_norm": 1.1857221126556396, "learning_rate": 5.90822722201561e-06, "loss": 0.4051, "step": 28570 }, { "epoch": 0.6342881877004695, "grad_norm": 1.7750307321548462, "learning_rate": 5.9050459373723865e-06, "loss": 0.4322, "step": 28575 }, { "epoch": 0.6343991742599971, "grad_norm": 1.1627682447433472, "learning_rate": 5.901865150579946e-06, "loss": 0.39, "step": 28580 }, { "epoch": 0.6345101608195247, "grad_norm": 1.180820107460022, "learning_rate": 5.898684862025001e-06, "loss": 0.2816, "step": 28585 }, { "epoch": 0.6346211473790524, "grad_norm": 1.285776138305664, "learning_rate": 5.895505072094191e-06, "loss": 0.4874, "step": 28590 }, { "epoch": 0.6347321339385801, "grad_norm": 0.9625634551048279, "learning_rate": 5.892325781174113e-06, "loss": 0.3683, "step": 28595 }, { "epoch": 0.6348431204981076, "grad_norm": 0.9896165728569031, "learning_rate": 5.889146989651286e-06, "loss": 0.3098, "step": 28600 }, { "epoch": 0.6349541070576353, "grad_norm": 1.09481680393219, "learning_rate": 5.885968697912181e-06, "loss": 0.3626, "step": 28605 }, { "epoch": 0.635065093617163, "grad_norm": 1.0791270732879639, "learning_rate": 5.88279090634321e-06, "loss": 0.4849, "step": 28610 }, { "epoch": 0.6351760801766906, "grad_norm": 0.7296260595321655, "learning_rate": 5.879613615330708e-06, "loss": 0.323, "step": 28615 }, { "epoch": 0.6352870667362183, "grad_norm": 1.3319703340530396, "learning_rate": 5.876436825260967e-06, "loss": 0.4253, "step": 28620 }, { "epoch": 0.6353980532957458, "grad_norm": 1.4313448667526245, "learning_rate": 5.873260536520205e-06, "loss": 0.5015, "step": 28625 }, { "epoch": 0.6355090398552735, "grad_norm": 1.895022988319397, "learning_rate": 5.870084749494586e-06, "loss": 0.3823, "step": 28630 }, { "epoch": 0.6356200264148012, "grad_norm": 1.3738524913787842, "learning_rate": 5.866909464570215e-06, "loss": 0.4114, "step": 28635 }, { "epoch": 0.6357310129743288, "grad_norm": 0.9916070103645325, "learning_rate": 5.863734682133129e-06, "loss": 0.4405, "step": 28640 }, { "epoch": 0.6358419995338565, "grad_norm": 1.1903655529022217, "learning_rate": 5.860560402569308e-06, "loss": 0.4088, "step": 28645 }, { "epoch": 0.6359529860933841, "grad_norm": 1.4209836721420288, "learning_rate": 5.857386626264673e-06, "loss": 0.4614, "step": 28650 }, { "epoch": 0.6360639726529117, "grad_norm": 1.390576720237732, "learning_rate": 5.854213353605076e-06, "loss": 0.3247, "step": 28655 }, { "epoch": 0.6361749592124394, "grad_norm": 1.4189549684524536, "learning_rate": 5.8510405849763175e-06, "loss": 0.3953, "step": 28660 }, { "epoch": 0.6362859457719671, "grad_norm": 1.592666506767273, "learning_rate": 5.847868320764128e-06, "loss": 0.3987, "step": 28665 }, { "epoch": 0.6363969323314946, "grad_norm": 1.0677518844604492, "learning_rate": 5.844696561354186e-06, "loss": 0.2998, "step": 28670 }, { "epoch": 0.6365079188910223, "grad_norm": 1.2202013731002808, "learning_rate": 5.841525307132097e-06, "loss": 0.4315, "step": 28675 }, { "epoch": 0.6366189054505499, "grad_norm": 1.3202593326568604, "learning_rate": 5.838354558483418e-06, "loss": 0.4257, "step": 28680 }, { "epoch": 0.6367298920100776, "grad_norm": 1.6365586519241333, "learning_rate": 5.8351843157936305e-06, "loss": 0.3262, "step": 28685 }, { "epoch": 0.6368408785696053, "grad_norm": 1.246142029762268, "learning_rate": 5.832014579448167e-06, "loss": 0.4667, "step": 28690 }, { "epoch": 0.6369518651291328, "grad_norm": 1.7104053497314453, "learning_rate": 5.828845349832396e-06, "loss": 0.5282, "step": 28695 }, { "epoch": 0.6370628516886605, "grad_norm": 1.6780058145523071, "learning_rate": 5.825676627331614e-06, "loss": 0.543, "step": 28700 }, { "epoch": 0.6371738382481882, "grad_norm": 1.1392569541931152, "learning_rate": 5.822508412331074e-06, "loss": 0.3653, "step": 28705 }, { "epoch": 0.6372848248077158, "grad_norm": 1.0654574632644653, "learning_rate": 5.819340705215946e-06, "loss": 0.3235, "step": 28710 }, { "epoch": 0.6373958113672434, "grad_norm": 1.223675012588501, "learning_rate": 5.816173506371352e-06, "loss": 0.2812, "step": 28715 }, { "epoch": 0.6375067979267711, "grad_norm": 1.6331626176834106, "learning_rate": 5.813006816182358e-06, "loss": 0.4586, "step": 28720 }, { "epoch": 0.6376177844862987, "grad_norm": 2.492760419845581, "learning_rate": 5.80984063503395e-06, "loss": 0.4431, "step": 28725 }, { "epoch": 0.6377287710458264, "grad_norm": 1.1395188570022583, "learning_rate": 5.8066749633110675e-06, "loss": 0.3516, "step": 28730 }, { "epoch": 0.6378397576053539, "grad_norm": 0.9498520493507385, "learning_rate": 5.803509801398575e-06, "loss": 0.289, "step": 28735 }, { "epoch": 0.6379507441648816, "grad_norm": 1.5028700828552246, "learning_rate": 5.800345149681293e-06, "loss": 0.4283, "step": 28740 }, { "epoch": 0.6380617307244093, "grad_norm": 1.2373186349868774, "learning_rate": 5.797181008543958e-06, "loss": 0.4102, "step": 28745 }, { "epoch": 0.6381727172839369, "grad_norm": 0.6199971437454224, "learning_rate": 5.79401737837126e-06, "loss": 0.1848, "step": 28750 }, { "epoch": 0.6382837038434646, "grad_norm": 1.6247268915176392, "learning_rate": 5.790854259547827e-06, "loss": 0.4643, "step": 28755 }, { "epoch": 0.6383946904029922, "grad_norm": 0.8452243804931641, "learning_rate": 5.787691652458214e-06, "loss": 0.4384, "step": 28760 }, { "epoch": 0.6385056769625198, "grad_norm": 1.413805365562439, "learning_rate": 5.784529557486927e-06, "loss": 0.5771, "step": 28765 }, { "epoch": 0.6386166635220475, "grad_norm": 1.5562477111816406, "learning_rate": 5.781367975018395e-06, "loss": 0.3771, "step": 28770 }, { "epoch": 0.6387276500815752, "grad_norm": 1.1243140697479248, "learning_rate": 5.778206905436996e-06, "loss": 0.4371, "step": 28775 }, { "epoch": 0.6388386366411027, "grad_norm": 0.9571521282196045, "learning_rate": 5.775046349127046e-06, "loss": 0.3678, "step": 28780 }, { "epoch": 0.6389496232006304, "grad_norm": 0.8463472127914429, "learning_rate": 5.771886306472788e-06, "loss": 0.4502, "step": 28785 }, { "epoch": 0.639060609760158, "grad_norm": 1.0742371082305908, "learning_rate": 5.768726777858417e-06, "loss": 0.3888, "step": 28790 }, { "epoch": 0.6391715963196857, "grad_norm": 1.8010051250457764, "learning_rate": 5.765567763668049e-06, "loss": 0.4078, "step": 28795 }, { "epoch": 0.6392825828792134, "grad_norm": 1.1428115367889404, "learning_rate": 5.762409264285752e-06, "loss": 0.3807, "step": 28800 }, { "epoch": 0.6393935694387409, "grad_norm": 0.9962915182113647, "learning_rate": 5.759251280095529e-06, "loss": 0.5052, "step": 28805 }, { "epoch": 0.6395045559982686, "grad_norm": 1.4247018098831177, "learning_rate": 5.756093811481309e-06, "loss": 0.555, "step": 28810 }, { "epoch": 0.6396155425577963, "grad_norm": 1.413870930671692, "learning_rate": 5.7529368588269745e-06, "loss": 0.4547, "step": 28815 }, { "epoch": 0.6397265291173239, "grad_norm": 1.667945146560669, "learning_rate": 5.7497804225163275e-06, "loss": 0.3261, "step": 28820 }, { "epoch": 0.6398375156768515, "grad_norm": 1.1330857276916504, "learning_rate": 5.746624502933128e-06, "loss": 0.3848, "step": 28825 }, { "epoch": 0.6399485022363792, "grad_norm": 0.8767269849777222, "learning_rate": 5.743469100461052e-06, "loss": 0.1863, "step": 28830 }, { "epoch": 0.6400594887959068, "grad_norm": 1.1383758783340454, "learning_rate": 5.740314215483733e-06, "loss": 0.3286, "step": 28835 }, { "epoch": 0.6401704753554345, "grad_norm": 0.9295101165771484, "learning_rate": 5.7371598483847214e-06, "loss": 0.4101, "step": 28840 }, { "epoch": 0.640281461914962, "grad_norm": 0.9914706945419312, "learning_rate": 5.734005999547522e-06, "loss": 0.3852, "step": 28845 }, { "epoch": 0.6403924484744897, "grad_norm": 0.812248945236206, "learning_rate": 5.730852669355562e-06, "loss": 0.4883, "step": 28850 }, { "epoch": 0.6405034350340174, "grad_norm": 2.2017431259155273, "learning_rate": 5.727699858192222e-06, "loss": 0.5746, "step": 28855 }, { "epoch": 0.640614421593545, "grad_norm": 1.232469916343689, "learning_rate": 5.7245475664408e-06, "loss": 0.2837, "step": 28860 }, { "epoch": 0.6407254081530727, "grad_norm": 0.9875843524932861, "learning_rate": 5.721395794484551e-06, "loss": 0.5556, "step": 28865 }, { "epoch": 0.6408363947126003, "grad_norm": 0.9763182997703552, "learning_rate": 5.718244542706648e-06, "loss": 0.5721, "step": 28870 }, { "epoch": 0.6409473812721279, "grad_norm": 0.9100670218467712, "learning_rate": 5.715093811490216e-06, "loss": 0.4426, "step": 28875 }, { "epoch": 0.6410583678316556, "grad_norm": 1.6159127950668335, "learning_rate": 5.711943601218306e-06, "loss": 0.3191, "step": 28880 }, { "epoch": 0.6411693543911833, "grad_norm": 1.117018699645996, "learning_rate": 5.708793912273911e-06, "loss": 0.3323, "step": 28885 }, { "epoch": 0.6412803409507108, "grad_norm": 1.3021478652954102, "learning_rate": 5.705644745039965e-06, "loss": 0.4529, "step": 28890 }, { "epoch": 0.6413913275102385, "grad_norm": 0.9184788465499878, "learning_rate": 5.702496099899324e-06, "loss": 0.4596, "step": 28895 }, { "epoch": 0.6415023140697661, "grad_norm": 0.7418227195739746, "learning_rate": 5.699347977234799e-06, "loss": 0.3201, "step": 28900 }, { "epoch": 0.6416133006292938, "grad_norm": 0.9961920380592346, "learning_rate": 5.696200377429119e-06, "loss": 0.4405, "step": 28905 }, { "epoch": 0.6417242871888215, "grad_norm": 0.6618875861167908, "learning_rate": 5.693053300864968e-06, "loss": 0.4857, "step": 28910 }, { "epoch": 0.641835273748349, "grad_norm": 0.9733692407608032, "learning_rate": 5.6899067479249485e-06, "loss": 0.3799, "step": 28915 }, { "epoch": 0.6419462603078767, "grad_norm": 1.4784406423568726, "learning_rate": 5.686760718991611e-06, "loss": 0.4574, "step": 28920 }, { "epoch": 0.6420572468674044, "grad_norm": 1.488350510597229, "learning_rate": 5.683615214447445e-06, "loss": 0.3339, "step": 28925 }, { "epoch": 0.642168233426932, "grad_norm": 0.9722474813461304, "learning_rate": 5.680470234674859e-06, "loss": 0.3292, "step": 28930 }, { "epoch": 0.6422792199864596, "grad_norm": 0.7445114850997925, "learning_rate": 5.677325780056221e-06, "loss": 0.37, "step": 28935 }, { "epoch": 0.6423902065459873, "grad_norm": 1.06137216091156, "learning_rate": 5.6741818509738124e-06, "loss": 0.4396, "step": 28940 }, { "epoch": 0.6425011931055149, "grad_norm": 0.7704484462738037, "learning_rate": 5.6710384478098675e-06, "loss": 0.3744, "step": 28945 }, { "epoch": 0.6426121796650426, "grad_norm": 2.215862274169922, "learning_rate": 5.667895570946554e-06, "loss": 0.5016, "step": 28950 }, { "epoch": 0.6427231662245702, "grad_norm": 1.1816397905349731, "learning_rate": 5.664753220765964e-06, "loss": 0.3079, "step": 28955 }, { "epoch": 0.6428341527840978, "grad_norm": 1.280444860458374, "learning_rate": 5.661611397650142e-06, "loss": 0.4385, "step": 28960 }, { "epoch": 0.6429451393436255, "grad_norm": 1.6912497282028198, "learning_rate": 5.658470101981053e-06, "loss": 0.3826, "step": 28965 }, { "epoch": 0.6430561259031531, "grad_norm": 1.6810418367385864, "learning_rate": 5.655329334140608e-06, "loss": 0.3615, "step": 28970 }, { "epoch": 0.6431671124626808, "grad_norm": 1.3643230199813843, "learning_rate": 5.652189094510656e-06, "loss": 0.3644, "step": 28975 }, { "epoch": 0.6432780990222084, "grad_norm": 1.0266542434692383, "learning_rate": 5.6490493834729685e-06, "loss": 0.396, "step": 28980 }, { "epoch": 0.643389085581736, "grad_norm": 1.3530707359313965, "learning_rate": 5.645910201409268e-06, "loss": 0.3436, "step": 28985 }, { "epoch": 0.6435000721412637, "grad_norm": 0.9574394226074219, "learning_rate": 5.6427715487012e-06, "loss": 0.2981, "step": 28990 }, { "epoch": 0.6436110587007914, "grad_norm": 1.4541423320770264, "learning_rate": 5.639633425730357e-06, "loss": 0.3871, "step": 28995 }, { "epoch": 0.643722045260319, "grad_norm": 0.9210712909698486, "learning_rate": 5.636495832878257e-06, "loss": 0.5351, "step": 29000 }, { "epoch": 0.6438330318198466, "grad_norm": 0.9838594794273376, "learning_rate": 5.633358770526357e-06, "loss": 0.3142, "step": 29005 }, { "epoch": 0.6439440183793742, "grad_norm": 1.0086106061935425, "learning_rate": 5.630222239056058e-06, "loss": 0.3137, "step": 29010 }, { "epoch": 0.6440550049389019, "grad_norm": 1.020031452178955, "learning_rate": 5.6270862388486806e-06, "loss": 0.3719, "step": 29015 }, { "epoch": 0.6441659914984296, "grad_norm": 1.0523961782455444, "learning_rate": 5.623950770285496e-06, "loss": 0.4242, "step": 29020 }, { "epoch": 0.6442769780579571, "grad_norm": 0.6960680484771729, "learning_rate": 5.620815833747697e-06, "loss": 0.3194, "step": 29025 }, { "epoch": 0.6443879646174848, "grad_norm": 1.0132477283477783, "learning_rate": 5.617681429616421e-06, "loss": 0.4739, "step": 29030 }, { "epoch": 0.6444989511770125, "grad_norm": 1.0914894342422485, "learning_rate": 5.614547558272745e-06, "loss": 0.39, "step": 29035 }, { "epoch": 0.6446099377365401, "grad_norm": 1.1537106037139893, "learning_rate": 5.611414220097665e-06, "loss": 0.4725, "step": 29040 }, { "epoch": 0.6447209242960678, "grad_norm": 1.0115886926651, "learning_rate": 5.6082814154721296e-06, "loss": 0.3318, "step": 29045 }, { "epoch": 0.6448319108555954, "grad_norm": 1.0585254430770874, "learning_rate": 5.6051491447770065e-06, "loss": 0.4163, "step": 29050 }, { "epoch": 0.644942897415123, "grad_norm": 1.8216021060943604, "learning_rate": 5.602017408393113e-06, "loss": 0.5211, "step": 29055 }, { "epoch": 0.6450538839746507, "grad_norm": 1.7823377847671509, "learning_rate": 5.598886206701195e-06, "loss": 0.3622, "step": 29060 }, { "epoch": 0.6451648705341783, "grad_norm": 1.1798115968704224, "learning_rate": 5.59575554008193e-06, "loss": 0.2396, "step": 29065 }, { "epoch": 0.6452758570937059, "grad_norm": 1.2847548723220825, "learning_rate": 5.592625408915939e-06, "loss": 0.4482, "step": 29070 }, { "epoch": 0.6453868436532336, "grad_norm": 1.348514437675476, "learning_rate": 5.589495813583765e-06, "loss": 0.4592, "step": 29075 }, { "epoch": 0.6454978302127612, "grad_norm": 2.274677276611328, "learning_rate": 5.586366754465903e-06, "loss": 0.4655, "step": 29080 }, { "epoch": 0.6456088167722889, "grad_norm": 1.4002689123153687, "learning_rate": 5.583238231942765e-06, "loss": 0.3983, "step": 29085 }, { "epoch": 0.6457198033318166, "grad_norm": 1.5544315576553345, "learning_rate": 5.580110246394712e-06, "loss": 0.3103, "step": 29090 }, { "epoch": 0.6458307898913441, "grad_norm": 2.2772841453552246, "learning_rate": 5.576982798202031e-06, "loss": 0.3768, "step": 29095 }, { "epoch": 0.6459417764508718, "grad_norm": 1.393365740776062, "learning_rate": 5.57385588774495e-06, "loss": 0.4128, "step": 29100 }, { "epoch": 0.6460527630103995, "grad_norm": 1.1971317529678345, "learning_rate": 5.5707295154036225e-06, "loss": 0.4362, "step": 29105 }, { "epoch": 0.6461637495699271, "grad_norm": 0.9054796695709229, "learning_rate": 5.567603681558149e-06, "loss": 0.4237, "step": 29110 }, { "epoch": 0.6462747361294547, "grad_norm": 1.1730214357376099, "learning_rate": 5.564478386588552e-06, "loss": 0.3831, "step": 29115 }, { "epoch": 0.6463857226889823, "grad_norm": 1.0970360040664673, "learning_rate": 5.561353630874802e-06, "loss": 0.4006, "step": 29120 }, { "epoch": 0.64649670924851, "grad_norm": 1.798247218132019, "learning_rate": 5.558229414796785e-06, "loss": 0.3468, "step": 29125 }, { "epoch": 0.6466076958080377, "grad_norm": 1.501258134841919, "learning_rate": 5.555105738734345e-06, "loss": 0.4938, "step": 29130 }, { "epoch": 0.6467186823675652, "grad_norm": 1.220799207687378, "learning_rate": 5.5519826030672375e-06, "loss": 0.4188, "step": 29135 }, { "epoch": 0.6468296689270929, "grad_norm": 1.2013776302337646, "learning_rate": 5.548860008175167e-06, "loss": 0.3992, "step": 29140 }, { "epoch": 0.6469406554866206, "grad_norm": 0.6283308863639832, "learning_rate": 5.545737954437774e-06, "loss": 0.2735, "step": 29145 }, { "epoch": 0.6470516420461482, "grad_norm": 1.1020041704177856, "learning_rate": 5.542616442234618e-06, "loss": 0.3537, "step": 29150 }, { "epoch": 0.6471626286056759, "grad_norm": 0.9292207956314087, "learning_rate": 5.53949547194521e-06, "loss": 0.487, "step": 29155 }, { "epoch": 0.6472736151652035, "grad_norm": 1.0567008256912231, "learning_rate": 5.536375043948979e-06, "loss": 0.453, "step": 29160 }, { "epoch": 0.6473846017247311, "grad_norm": 1.5478672981262207, "learning_rate": 5.533255158625304e-06, "loss": 0.5066, "step": 29165 }, { "epoch": 0.6474955882842588, "grad_norm": 1.153273344039917, "learning_rate": 5.530135816353484e-06, "loss": 0.372, "step": 29170 }, { "epoch": 0.6476065748437864, "grad_norm": 1.4719431400299072, "learning_rate": 5.527017017512759e-06, "loss": 0.5028, "step": 29175 }, { "epoch": 0.647717561403314, "grad_norm": 1.5583471059799194, "learning_rate": 5.5238987624823075e-06, "loss": 0.4037, "step": 29180 }, { "epoch": 0.6478285479628417, "grad_norm": 1.2767226696014404, "learning_rate": 5.52078105164123e-06, "loss": 0.4889, "step": 29185 }, { "epoch": 0.6479395345223693, "grad_norm": 1.0163935422897339, "learning_rate": 5.5176638853685736e-06, "loss": 0.4455, "step": 29190 }, { "epoch": 0.648050521081897, "grad_norm": 1.9621665477752686, "learning_rate": 5.514547264043305e-06, "loss": 0.319, "step": 29195 }, { "epoch": 0.6481615076414247, "grad_norm": 1.4272702932357788, "learning_rate": 5.5114311880443374e-06, "loss": 0.4779, "step": 29200 }, { "epoch": 0.6482724942009522, "grad_norm": 1.5791912078857422, "learning_rate": 5.508315657750516e-06, "loss": 0.4263, "step": 29205 }, { "epoch": 0.6483834807604799, "grad_norm": 1.9292148351669312, "learning_rate": 5.505200673540609e-06, "loss": 0.3609, "step": 29210 }, { "epoch": 0.6484944673200076, "grad_norm": 0.8313891887664795, "learning_rate": 5.502086235793336e-06, "loss": 0.3403, "step": 29215 }, { "epoch": 0.6486054538795352, "grad_norm": 1.1140210628509521, "learning_rate": 5.498972344887328e-06, "loss": 0.4964, "step": 29220 }, { "epoch": 0.6487164404390628, "grad_norm": 1.0608550310134888, "learning_rate": 5.495859001201166e-06, "loss": 0.3615, "step": 29225 }, { "epoch": 0.6488274269985904, "grad_norm": 1.2314826250076294, "learning_rate": 5.492746205113367e-06, "loss": 0.4928, "step": 29230 }, { "epoch": 0.6489384135581181, "grad_norm": 1.6340162754058838, "learning_rate": 5.489633957002362e-06, "loss": 0.2452, "step": 29235 }, { "epoch": 0.6490494001176458, "grad_norm": 1.148646354675293, "learning_rate": 5.486522257246538e-06, "loss": 0.3765, "step": 29240 }, { "epoch": 0.6491603866771734, "grad_norm": 1.5189963579177856, "learning_rate": 5.483411106224199e-06, "loss": 0.4144, "step": 29245 }, { "epoch": 0.649271373236701, "grad_norm": 1.792006254196167, "learning_rate": 5.480300504313593e-06, "loss": 0.2704, "step": 29250 }, { "epoch": 0.6493823597962287, "grad_norm": 0.776985764503479, "learning_rate": 5.47719045189289e-06, "loss": 0.4363, "step": 29255 }, { "epoch": 0.6494933463557563, "grad_norm": 1.555522084236145, "learning_rate": 5.474080949340203e-06, "loss": 0.2998, "step": 29260 }, { "epoch": 0.649604332915284, "grad_norm": 1.0401331186294556, "learning_rate": 5.47097199703358e-06, "loss": 0.4357, "step": 29265 }, { "epoch": 0.6497153194748116, "grad_norm": 1.3011021614074707, "learning_rate": 5.467863595350988e-06, "loss": 0.4046, "step": 29270 }, { "epoch": 0.6498263060343392, "grad_norm": 0.8262555003166199, "learning_rate": 5.4647557446703446e-06, "loss": 0.4975, "step": 29275 }, { "epoch": 0.6499372925938669, "grad_norm": 0.6609529852867126, "learning_rate": 5.461648445369485e-06, "loss": 0.3821, "step": 29280 }, { "epoch": 0.6500482791533945, "grad_norm": 1.106066346168518, "learning_rate": 5.458541697826185e-06, "loss": 0.3786, "step": 29285 }, { "epoch": 0.6501592657129222, "grad_norm": 1.384567141532898, "learning_rate": 5.4554355024181596e-06, "loss": 0.4237, "step": 29290 }, { "epoch": 0.6502702522724498, "grad_norm": 1.3681679964065552, "learning_rate": 5.45232985952304e-06, "loss": 0.2711, "step": 29295 }, { "epoch": 0.6503812388319774, "grad_norm": 0.8765036463737488, "learning_rate": 5.4492247695184085e-06, "loss": 0.336, "step": 29300 }, { "epoch": 0.6504922253915051, "grad_norm": 1.1128723621368408, "learning_rate": 5.446120232781764e-06, "loss": 0.5218, "step": 29305 }, { "epoch": 0.6506032119510328, "grad_norm": 0.899052619934082, "learning_rate": 5.443016249690552e-06, "loss": 0.3567, "step": 29310 }, { "epoch": 0.6507141985105603, "grad_norm": 0.8907622694969177, "learning_rate": 5.439912820622139e-06, "loss": 0.3628, "step": 29315 }, { "epoch": 0.650825185070088, "grad_norm": 0.8252400755882263, "learning_rate": 5.436809945953835e-06, "loss": 0.412, "step": 29320 }, { "epoch": 0.6509361716296157, "grad_norm": 1.7461494207382202, "learning_rate": 5.43370762606287e-06, "loss": 0.4964, "step": 29325 }, { "epoch": 0.6510471581891433, "grad_norm": 1.049271583557129, "learning_rate": 5.430605861326421e-06, "loss": 0.3316, "step": 29330 }, { "epoch": 0.651158144748671, "grad_norm": 1.3792396783828735, "learning_rate": 5.4275046521215844e-06, "loss": 0.5365, "step": 29335 }, { "epoch": 0.6512691313081985, "grad_norm": 1.2729400396347046, "learning_rate": 5.4244039988254e-06, "loss": 0.4268, "step": 29340 }, { "epoch": 0.6513801178677262, "grad_norm": 1.1661580801010132, "learning_rate": 5.4213039018148285e-06, "loss": 0.3322, "step": 29345 }, { "epoch": 0.6514911044272539, "grad_norm": 0.8208044767379761, "learning_rate": 5.418204361466777e-06, "loss": 0.416, "step": 29350 }, { "epoch": 0.6516020909867815, "grad_norm": 0.8626852631568909, "learning_rate": 5.415105378158069e-06, "loss": 0.3311, "step": 29355 }, { "epoch": 0.6517130775463091, "grad_norm": 1.2224349975585938, "learning_rate": 5.412006952265476e-06, "loss": 0.3479, "step": 29360 }, { "epoch": 0.6518240641058368, "grad_norm": 0.9776822924613953, "learning_rate": 5.408909084165688e-06, "loss": 0.5522, "step": 29365 }, { "epoch": 0.6519350506653644, "grad_norm": 1.2499409914016724, "learning_rate": 5.405811774235334e-06, "loss": 0.5577, "step": 29370 }, { "epoch": 0.6520460372248921, "grad_norm": 1.3289955854415894, "learning_rate": 5.402715022850983e-06, "loss": 0.3796, "step": 29375 }, { "epoch": 0.6521570237844198, "grad_norm": 1.8029693365097046, "learning_rate": 5.3996188303891175e-06, "loss": 0.4136, "step": 29380 }, { "epoch": 0.6522680103439473, "grad_norm": 1.359915852546692, "learning_rate": 5.396523197226169e-06, "loss": 0.3164, "step": 29385 }, { "epoch": 0.652378996903475, "grad_norm": 0.7354469299316406, "learning_rate": 5.393428123738487e-06, "loss": 0.5063, "step": 29390 }, { "epoch": 0.6524899834630026, "grad_norm": 1.1061774492263794, "learning_rate": 5.390333610302365e-06, "loss": 0.383, "step": 29395 }, { "epoch": 0.6526009700225303, "grad_norm": 1.4231762886047363, "learning_rate": 5.387239657294028e-06, "loss": 0.4511, "step": 29400 }, { "epoch": 0.6527119565820579, "grad_norm": 1.2462953329086304, "learning_rate": 5.384146265089618e-06, "loss": 0.3371, "step": 29405 }, { "epoch": 0.6528229431415855, "grad_norm": 1.5884225368499756, "learning_rate": 5.381053434065229e-06, "loss": 0.3727, "step": 29410 }, { "epoch": 0.6529339297011132, "grad_norm": 0.7938171625137329, "learning_rate": 5.3779611645968696e-06, "loss": 0.2553, "step": 29415 }, { "epoch": 0.6530449162606409, "grad_norm": 0.7266703844070435, "learning_rate": 5.374869457060494e-06, "loss": 0.3257, "step": 29420 }, { "epoch": 0.6531559028201684, "grad_norm": 1.5917963981628418, "learning_rate": 5.371778311831974e-06, "loss": 0.4968, "step": 29425 }, { "epoch": 0.6532668893796961, "grad_norm": 0.8227339386940002, "learning_rate": 5.368687729287125e-06, "loss": 0.4034, "step": 29430 }, { "epoch": 0.6533778759392238, "grad_norm": 1.4282219409942627, "learning_rate": 5.3655977098016955e-06, "loss": 0.5062, "step": 29435 }, { "epoch": 0.6534888624987514, "grad_norm": 1.2380449771881104, "learning_rate": 5.362508253751349e-06, "loss": 0.4836, "step": 29440 }, { "epoch": 0.6535998490582791, "grad_norm": 1.3846499919891357, "learning_rate": 5.3594193615116995e-06, "loss": 0.4116, "step": 29445 }, { "epoch": 0.6537108356178066, "grad_norm": 1.1998333930969238, "learning_rate": 5.356331033458276e-06, "loss": 0.4964, "step": 29450 }, { "epoch": 0.6538218221773343, "grad_norm": 0.9864094257354736, "learning_rate": 5.353243269966553e-06, "loss": 0.4673, "step": 29455 }, { "epoch": 0.653932808736862, "grad_norm": 1.1273391246795654, "learning_rate": 5.350156071411933e-06, "loss": 0.3415, "step": 29460 }, { "epoch": 0.6540437952963896, "grad_norm": 1.0939232110977173, "learning_rate": 5.347069438169739e-06, "loss": 0.2618, "step": 29465 }, { "epoch": 0.6541547818559172, "grad_norm": 1.1240630149841309, "learning_rate": 5.343983370615242e-06, "loss": 0.2867, "step": 29470 }, { "epoch": 0.6542657684154449, "grad_norm": 0.9784446358680725, "learning_rate": 5.340897869123629e-06, "loss": 0.3292, "step": 29475 }, { "epoch": 0.6543767549749725, "grad_norm": 1.2527360916137695, "learning_rate": 5.3378129340700256e-06, "loss": 0.3011, "step": 29480 }, { "epoch": 0.6544877415345002, "grad_norm": 1.3284943103790283, "learning_rate": 5.334728565829495e-06, "loss": 0.5744, "step": 29485 }, { "epoch": 0.6545987280940279, "grad_norm": 0.7942169308662415, "learning_rate": 5.331644764777016e-06, "loss": 0.413, "step": 29490 }, { "epoch": 0.6547097146535554, "grad_norm": 1.040993094444275, "learning_rate": 5.328561531287513e-06, "loss": 0.2812, "step": 29495 }, { "epoch": 0.6548207012130831, "grad_norm": 1.427320957183838, "learning_rate": 5.325478865735829e-06, "loss": 0.4896, "step": 29500 }, { "epoch": 0.6549316877726107, "grad_norm": 1.2465837001800537, "learning_rate": 5.32239676849675e-06, "loss": 0.5024, "step": 29505 }, { "epoch": 0.6550426743321384, "grad_norm": 1.5954735279083252, "learning_rate": 5.319315239944982e-06, "loss": 0.4399, "step": 29510 }, { "epoch": 0.655153660891666, "grad_norm": 0.9695634841918945, "learning_rate": 5.316234280455168e-06, "loss": 0.3623, "step": 29515 }, { "epoch": 0.6552646474511936, "grad_norm": 1.064803957939148, "learning_rate": 5.313153890401888e-06, "loss": 0.4928, "step": 29520 }, { "epoch": 0.6553756340107213, "grad_norm": 0.8261331915855408, "learning_rate": 5.310074070159634e-06, "loss": 0.2688, "step": 29525 }, { "epoch": 0.655486620570249, "grad_norm": 1.123943567276001, "learning_rate": 5.30699482010285e-06, "loss": 0.2963, "step": 29530 }, { "epoch": 0.6555976071297765, "grad_norm": 1.4670149087905884, "learning_rate": 5.303916140605893e-06, "loss": 0.3191, "step": 29535 }, { "epoch": 0.6557085936893042, "grad_norm": 1.5768437385559082, "learning_rate": 5.300838032043061e-06, "loss": 0.5026, "step": 29540 }, { "epoch": 0.6558195802488319, "grad_norm": 0.774504542350769, "learning_rate": 5.297760494788586e-06, "loss": 0.357, "step": 29545 }, { "epoch": 0.6559305668083595, "grad_norm": 1.2776967287063599, "learning_rate": 5.294683529216616e-06, "loss": 0.5908, "step": 29550 }, { "epoch": 0.6560415533678872, "grad_norm": 1.3623162508010864, "learning_rate": 5.291607135701246e-06, "loss": 0.3338, "step": 29555 }, { "epoch": 0.6561525399274147, "grad_norm": 0.9033370614051819, "learning_rate": 5.288531314616488e-06, "loss": 0.3197, "step": 29560 }, { "epoch": 0.6562635264869424, "grad_norm": 1.1467854976654053, "learning_rate": 5.285456066336292e-06, "loss": 0.2737, "step": 29565 }, { "epoch": 0.6563745130464701, "grad_norm": 1.61380136013031, "learning_rate": 5.2823813912345345e-06, "loss": 0.3388, "step": 29570 }, { "epoch": 0.6564854996059977, "grad_norm": 0.3687746822834015, "learning_rate": 5.2793072896850295e-06, "loss": 0.4065, "step": 29575 }, { "epoch": 0.6565964861655254, "grad_norm": 1.5772136449813843, "learning_rate": 5.276233762061507e-06, "loss": 0.4353, "step": 29580 }, { "epoch": 0.656707472725053, "grad_norm": 1.5397409200668335, "learning_rate": 5.273160808737647e-06, "loss": 0.3549, "step": 29585 }, { "epoch": 0.6568184592845806, "grad_norm": 1.4259532690048218, "learning_rate": 5.270088430087039e-06, "loss": 0.3488, "step": 29590 }, { "epoch": 0.6569294458441083, "grad_norm": 1.0682930946350098, "learning_rate": 5.267016626483219e-06, "loss": 0.4933, "step": 29595 }, { "epoch": 0.657040432403636, "grad_norm": 1.2446573972702026, "learning_rate": 5.263945398299642e-06, "loss": 0.3519, "step": 29600 }, { "epoch": 0.6571514189631635, "grad_norm": 1.5529674291610718, "learning_rate": 5.260874745909704e-06, "loss": 0.4463, "step": 29605 }, { "epoch": 0.6572624055226912, "grad_norm": 1.337281584739685, "learning_rate": 5.2578046696867165e-06, "loss": 0.4352, "step": 29610 }, { "epoch": 0.6573733920822188, "grad_norm": 1.5599809885025024, "learning_rate": 5.254735170003937e-06, "loss": 0.321, "step": 29615 }, { "epoch": 0.6574843786417465, "grad_norm": 1.1805211305618286, "learning_rate": 5.251666247234537e-06, "loss": 0.3249, "step": 29620 }, { "epoch": 0.6575953652012742, "grad_norm": 1.4224441051483154, "learning_rate": 5.248597901751631e-06, "loss": 0.5011, "step": 29625 }, { "epoch": 0.6577063517608017, "grad_norm": 1.2685762643814087, "learning_rate": 5.245530133928259e-06, "loss": 0.2142, "step": 29630 }, { "epoch": 0.6578173383203294, "grad_norm": 1.105307936668396, "learning_rate": 5.242462944137385e-06, "loss": 0.4682, "step": 29635 }, { "epoch": 0.6579283248798571, "grad_norm": 0.9354825019836426, "learning_rate": 5.239396332751916e-06, "loss": 0.2499, "step": 29640 }, { "epoch": 0.6580393114393847, "grad_norm": 0.9176279306411743, "learning_rate": 5.236330300144669e-06, "loss": 0.6083, "step": 29645 }, { "epoch": 0.6581502979989123, "grad_norm": 1.7297823429107666, "learning_rate": 5.233264846688409e-06, "loss": 0.3017, "step": 29650 }, { "epoch": 0.65826128455844, "grad_norm": 1.1669957637786865, "learning_rate": 5.230199972755828e-06, "loss": 0.4691, "step": 29655 }, { "epoch": 0.6583722711179676, "grad_norm": 1.277626633644104, "learning_rate": 5.227135678719531e-06, "loss": 0.395, "step": 29660 }, { "epoch": 0.6584832576774953, "grad_norm": 1.1681313514709473, "learning_rate": 5.224071964952078e-06, "loss": 0.3731, "step": 29665 }, { "epoch": 0.6585942442370228, "grad_norm": 1.8174617290496826, "learning_rate": 5.221008831825931e-06, "loss": 0.4484, "step": 29670 }, { "epoch": 0.6587052307965505, "grad_norm": 0.9886231422424316, "learning_rate": 5.2179462797135095e-06, "loss": 0.3804, "step": 29675 }, { "epoch": 0.6588162173560782, "grad_norm": 1.7291259765625, "learning_rate": 5.214884308987136e-06, "loss": 0.4318, "step": 29680 }, { "epoch": 0.6589272039156058, "grad_norm": 1.1902395486831665, "learning_rate": 5.211822920019081e-06, "loss": 0.3705, "step": 29685 }, { "epoch": 0.6590381904751335, "grad_norm": 1.5060759782791138, "learning_rate": 5.2087621131815404e-06, "loss": 0.4569, "step": 29690 }, { "epoch": 0.6591491770346611, "grad_norm": 1.078432321548462, "learning_rate": 5.205701888846631e-06, "loss": 0.3947, "step": 29695 }, { "epoch": 0.6592601635941887, "grad_norm": 0.9717170000076294, "learning_rate": 5.202642247386409e-06, "loss": 0.4034, "step": 29700 }, { "epoch": 0.6593711501537164, "grad_norm": 1.2641414403915405, "learning_rate": 5.199583189172851e-06, "loss": 0.5615, "step": 29705 }, { "epoch": 0.6594821367132441, "grad_norm": 1.0129261016845703, "learning_rate": 5.1965247145778685e-06, "loss": 0.3732, "step": 29710 }, { "epoch": 0.6595931232727716, "grad_norm": 1.067817211151123, "learning_rate": 5.193466823973307e-06, "loss": 0.3279, "step": 29715 }, { "epoch": 0.6597041098322993, "grad_norm": 1.3536492586135864, "learning_rate": 5.190409517730924e-06, "loss": 0.4702, "step": 29720 }, { "epoch": 0.659815096391827, "grad_norm": 1.6192893981933594, "learning_rate": 5.1873527962224266e-06, "loss": 0.4683, "step": 29725 }, { "epoch": 0.6599260829513546, "grad_norm": 0.9836245775222778, "learning_rate": 5.184296659819431e-06, "loss": 0.3923, "step": 29730 }, { "epoch": 0.6600370695108823, "grad_norm": 1.0475212335586548, "learning_rate": 5.181241108893498e-06, "loss": 0.4763, "step": 29735 }, { "epoch": 0.6601480560704098, "grad_norm": 0.9123460054397583, "learning_rate": 5.178186143816113e-06, "loss": 0.4424, "step": 29740 }, { "epoch": 0.6602590426299375, "grad_norm": 2.083481788635254, "learning_rate": 5.175131764958681e-06, "loss": 0.3078, "step": 29745 }, { "epoch": 0.6603700291894652, "grad_norm": 1.2369792461395264, "learning_rate": 5.172077972692553e-06, "loss": 0.3764, "step": 29750 }, { "epoch": 0.6604810157489928, "grad_norm": 1.2039142847061157, "learning_rate": 5.169024767388989e-06, "loss": 0.3861, "step": 29755 }, { "epoch": 0.6605920023085204, "grad_norm": 1.1370278596878052, "learning_rate": 5.1659721494191964e-06, "loss": 0.4308, "step": 29760 }, { "epoch": 0.6607029888680481, "grad_norm": 1.3318910598754883, "learning_rate": 5.162920119154293e-06, "loss": 0.3658, "step": 29765 }, { "epoch": 0.6608139754275757, "grad_norm": 1.2840640544891357, "learning_rate": 5.1598686769653395e-06, "loss": 0.3036, "step": 29770 }, { "epoch": 0.6609249619871034, "grad_norm": 1.0112098455429077, "learning_rate": 5.156817823223323e-06, "loss": 0.4429, "step": 29775 }, { "epoch": 0.6610359485466311, "grad_norm": 0.8643494844436646, "learning_rate": 5.15376755829915e-06, "loss": 0.4527, "step": 29780 }, { "epoch": 0.6611469351061586, "grad_norm": 1.8895761966705322, "learning_rate": 5.150717882563668e-06, "loss": 0.4424, "step": 29785 }, { "epoch": 0.6612579216656863, "grad_norm": 1.0386717319488525, "learning_rate": 5.147668796387639e-06, "loss": 0.4194, "step": 29790 }, { "epoch": 0.6613689082252139, "grad_norm": 0.8823903203010559, "learning_rate": 5.144620300141763e-06, "loss": 0.2653, "step": 29795 }, { "epoch": 0.6614798947847416, "grad_norm": 1.2153304815292358, "learning_rate": 5.141572394196672e-06, "loss": 0.2755, "step": 29800 }, { "epoch": 0.6615908813442692, "grad_norm": 1.1399264335632324, "learning_rate": 5.1385250789229116e-06, "loss": 0.4392, "step": 29805 }, { "epoch": 0.6617018679037968, "grad_norm": 1.8947248458862305, "learning_rate": 5.1354783546909725e-06, "loss": 0.3716, "step": 29810 }, { "epoch": 0.6618128544633245, "grad_norm": 1.0885425806045532, "learning_rate": 5.132432221871256e-06, "loss": 0.4363, "step": 29815 }, { "epoch": 0.6619238410228522, "grad_norm": 2.46814227104187, "learning_rate": 5.1293866808341084e-06, "loss": 0.5248, "step": 29820 }, { "epoch": 0.6620348275823797, "grad_norm": 0.9989572167396545, "learning_rate": 5.126341731949791e-06, "loss": 0.371, "step": 29825 }, { "epoch": 0.6621458141419074, "grad_norm": 0.8480471968650818, "learning_rate": 5.123297375588503e-06, "loss": 0.4898, "step": 29830 }, { "epoch": 0.6622568007014351, "grad_norm": 1.4133449792861938, "learning_rate": 5.120253612120363e-06, "loss": 0.3531, "step": 29835 }, { "epoch": 0.6623677872609627, "grad_norm": 1.0268564224243164, "learning_rate": 5.117210441915426e-06, "loss": 0.4025, "step": 29840 }, { "epoch": 0.6624787738204904, "grad_norm": 0.7333199977874756, "learning_rate": 5.114167865343664e-06, "loss": 0.3128, "step": 29845 }, { "epoch": 0.6625897603800179, "grad_norm": 1.1543068885803223, "learning_rate": 5.1111258827749925e-06, "loss": 0.4092, "step": 29850 }, { "epoch": 0.6627007469395456, "grad_norm": 1.7395691871643066, "learning_rate": 5.108084494579235e-06, "loss": 0.5329, "step": 29855 }, { "epoch": 0.6628117334990733, "grad_norm": 1.5491636991500854, "learning_rate": 5.1050437011261624e-06, "loss": 0.306, "step": 29860 }, { "epoch": 0.6629227200586009, "grad_norm": 0.8757246136665344, "learning_rate": 5.102003502785456e-06, "loss": 0.3236, "step": 29865 }, { "epoch": 0.6630337066181285, "grad_norm": 1.34422767162323, "learning_rate": 5.098963899926741e-06, "loss": 0.5338, "step": 29870 }, { "epoch": 0.6631446931776562, "grad_norm": 0.969270646572113, "learning_rate": 5.095924892919556e-06, "loss": 0.3907, "step": 29875 }, { "epoch": 0.6632556797371838, "grad_norm": 2.467583417892456, "learning_rate": 5.0928864821333745e-06, "loss": 0.3063, "step": 29880 }, { "epoch": 0.6633666662967115, "grad_norm": 1.0197619199752808, "learning_rate": 5.0898486679376e-06, "loss": 0.4827, "step": 29885 }, { "epoch": 0.6634776528562392, "grad_norm": 1.4381027221679688, "learning_rate": 5.086811450701554e-06, "loss": 0.4524, "step": 29890 }, { "epoch": 0.6635886394157667, "grad_norm": 1.236048698425293, "learning_rate": 5.083774830794499e-06, "loss": 0.3053, "step": 29895 }, { "epoch": 0.6636996259752944, "grad_norm": 1.6473312377929688, "learning_rate": 5.080738808585608e-06, "loss": 0.3745, "step": 29900 }, { "epoch": 0.663810612534822, "grad_norm": 0.7598027586936951, "learning_rate": 5.077703384443995e-06, "loss": 0.5744, "step": 29905 }, { "epoch": 0.6639215990943497, "grad_norm": 1.2780729532241821, "learning_rate": 5.0746685587387e-06, "loss": 0.3281, "step": 29910 }, { "epoch": 0.6640325856538773, "grad_norm": 1.2586196660995483, "learning_rate": 5.0716343318386795e-06, "loss": 0.4889, "step": 29915 }, { "epoch": 0.6641435722134049, "grad_norm": 0.7518161535263062, "learning_rate": 5.068600704112832e-06, "loss": 0.2159, "step": 29920 }, { "epoch": 0.6642545587729326, "grad_norm": 1.3559149503707886, "learning_rate": 5.065567675929968e-06, "loss": 0.4216, "step": 29925 }, { "epoch": 0.6643655453324603, "grad_norm": 1.2818373441696167, "learning_rate": 5.062535247658838e-06, "loss": 0.3533, "step": 29930 }, { "epoch": 0.6644765318919879, "grad_norm": 1.5211478471755981, "learning_rate": 5.059503419668117e-06, "loss": 0.4355, "step": 29935 }, { "epoch": 0.6645875184515155, "grad_norm": 2.1579790115356445, "learning_rate": 5.056472192326398e-06, "loss": 0.3259, "step": 29940 }, { "epoch": 0.6646985050110432, "grad_norm": 0.821919858455658, "learning_rate": 5.053441566002214e-06, "loss": 0.3938, "step": 29945 }, { "epoch": 0.6648094915705708, "grad_norm": 1.3272850513458252, "learning_rate": 5.0504115410640105e-06, "loss": 0.3472, "step": 29950 }, { "epoch": 0.6649204781300985, "grad_norm": 1.5676440000534058, "learning_rate": 5.047382117880178e-06, "loss": 0.4516, "step": 29955 }, { "epoch": 0.665031464689626, "grad_norm": 1.3177131414413452, "learning_rate": 5.044353296819011e-06, "loss": 0.452, "step": 29960 }, { "epoch": 0.6651424512491537, "grad_norm": 1.7546396255493164, "learning_rate": 5.0413250782487524e-06, "loss": 0.4441, "step": 29965 }, { "epoch": 0.6652534378086814, "grad_norm": 0.9149209856987, "learning_rate": 5.0382974625375635e-06, "loss": 0.1941, "step": 29970 }, { "epoch": 0.665364424368209, "grad_norm": 1.5751773118972778, "learning_rate": 5.035270450053526e-06, "loss": 0.6836, "step": 29975 }, { "epoch": 0.6654754109277367, "grad_norm": 1.3157830238342285, "learning_rate": 5.03224404116466e-06, "loss": 0.4885, "step": 29980 }, { "epoch": 0.6655863974872643, "grad_norm": 1.436733365058899, "learning_rate": 5.029218236238899e-06, "loss": 0.4401, "step": 29985 }, { "epoch": 0.6656973840467919, "grad_norm": 2.5588650703430176, "learning_rate": 5.026193035644113e-06, "loss": 0.4649, "step": 29990 }, { "epoch": 0.6658083706063196, "grad_norm": 1.2236137390136719, "learning_rate": 5.023168439748103e-06, "loss": 0.3297, "step": 29995 }, { "epoch": 0.6659193571658473, "grad_norm": 1.1968693733215332, "learning_rate": 5.020144448918578e-06, "loss": 0.3276, "step": 30000 }, { "epoch": 0.6660303437253748, "grad_norm": 1.0230461359024048, "learning_rate": 5.017121063523194e-06, "loss": 0.4819, "step": 30005 }, { "epoch": 0.6661413302849025, "grad_norm": 1.333814024925232, "learning_rate": 5.014098283929516e-06, "loss": 0.3443, "step": 30010 }, { "epoch": 0.6662523168444301, "grad_norm": 1.472735047340393, "learning_rate": 5.011076110505047e-06, "loss": 0.3886, "step": 30015 }, { "epoch": 0.6663633034039578, "grad_norm": 0.960411012172699, "learning_rate": 5.0080545436172155e-06, "loss": 0.3675, "step": 30020 }, { "epoch": 0.6664742899634855, "grad_norm": 1.6177083253860474, "learning_rate": 5.00503358363337e-06, "loss": 0.3223, "step": 30025 }, { "epoch": 0.666585276523013, "grad_norm": 1.1531990766525269, "learning_rate": 5.0020132309207905e-06, "loss": 0.3309, "step": 30030 }, { "epoch": 0.6666962630825407, "grad_norm": 0.785301685333252, "learning_rate": 4.998993485846678e-06, "loss": 0.3819, "step": 30035 }, { "epoch": 0.6668072496420684, "grad_norm": 1.5275787115097046, "learning_rate": 4.99597434877817e-06, "loss": 0.4338, "step": 30040 }, { "epoch": 0.666918236201596, "grad_norm": 2.108081579208374, "learning_rate": 4.9929558200823135e-06, "loss": 0.405, "step": 30045 }, { "epoch": 0.6670292227611236, "grad_norm": 1.3582932949066162, "learning_rate": 4.989937900126096e-06, "loss": 0.4276, "step": 30050 }, { "epoch": 0.6671402093206513, "grad_norm": 1.2807767391204834, "learning_rate": 4.9869205892764306e-06, "loss": 0.5332, "step": 30055 }, { "epoch": 0.6672511958801789, "grad_norm": 1.020280122756958, "learning_rate": 4.983903887900144e-06, "loss": 0.3643, "step": 30060 }, { "epoch": 0.6673621824397066, "grad_norm": 1.1606618165969849, "learning_rate": 4.9808877963640025e-06, "loss": 0.3958, "step": 30065 }, { "epoch": 0.6674731689992341, "grad_norm": 0.8008097410202026, "learning_rate": 4.977872315034687e-06, "loss": 0.4277, "step": 30070 }, { "epoch": 0.6675841555587618, "grad_norm": 1.4365850687026978, "learning_rate": 4.974857444278816e-06, "loss": 0.4398, "step": 30075 }, { "epoch": 0.6676951421182895, "grad_norm": 1.1749114990234375, "learning_rate": 4.97184318446292e-06, "loss": 0.4682, "step": 30080 }, { "epoch": 0.6678061286778171, "grad_norm": 1.1037579774856567, "learning_rate": 4.96882953595347e-06, "loss": 0.2849, "step": 30085 }, { "epoch": 0.6679171152373448, "grad_norm": 1.5772374868392944, "learning_rate": 4.965816499116849e-06, "loss": 0.559, "step": 30090 }, { "epoch": 0.6680281017968724, "grad_norm": 1.2721027135849, "learning_rate": 4.9628040743193775e-06, "loss": 0.5381, "step": 30095 }, { "epoch": 0.6681390883564, "grad_norm": 1.6205239295959473, "learning_rate": 4.9597922619272894e-06, "loss": 0.4568, "step": 30100 }, { "epoch": 0.6682500749159277, "grad_norm": 1.4295194149017334, "learning_rate": 4.956781062306759e-06, "loss": 0.3646, "step": 30105 }, { "epoch": 0.6683610614754554, "grad_norm": 0.6828015446662903, "learning_rate": 4.9537704758238705e-06, "loss": 0.4187, "step": 30110 }, { "epoch": 0.668472048034983, "grad_norm": 1.455264925956726, "learning_rate": 4.950760502844646e-06, "loss": 0.4342, "step": 30115 }, { "epoch": 0.6685830345945106, "grad_norm": 0.7783439755439758, "learning_rate": 4.947751143735022e-06, "loss": 0.3632, "step": 30120 }, { "epoch": 0.6686940211540382, "grad_norm": 1.1136553287506104, "learning_rate": 4.9447423988608744e-06, "loss": 0.3266, "step": 30125 }, { "epoch": 0.6688050077135659, "grad_norm": 1.053621530532837, "learning_rate": 4.941734268587987e-06, "loss": 0.4097, "step": 30130 }, { "epoch": 0.6689159942730936, "grad_norm": 1.3171651363372803, "learning_rate": 4.938726753282085e-06, "loss": 0.5856, "step": 30135 }, { "epoch": 0.6690269808326211, "grad_norm": 1.715846061706543, "learning_rate": 4.935719853308814e-06, "loss": 0.5022, "step": 30140 }, { "epoch": 0.6691379673921488, "grad_norm": 1.1749157905578613, "learning_rate": 4.932713569033734e-06, "loss": 0.399, "step": 30145 }, { "epoch": 0.6692489539516765, "grad_norm": 1.3666599988937378, "learning_rate": 4.929707900822348e-06, "loss": 0.4338, "step": 30150 }, { "epoch": 0.6693599405112041, "grad_norm": 1.496510624885559, "learning_rate": 4.926702849040067e-06, "loss": 0.4298, "step": 30155 }, { "epoch": 0.6694709270707317, "grad_norm": 0.9879202842712402, "learning_rate": 4.923698414052239e-06, "loss": 0.355, "step": 30160 }, { "epoch": 0.6695819136302594, "grad_norm": 0.870068371295929, "learning_rate": 4.920694596224137e-06, "loss": 0.407, "step": 30165 }, { "epoch": 0.669692900189787, "grad_norm": 1.1352063417434692, "learning_rate": 4.917691395920948e-06, "loss": 0.4668, "step": 30170 }, { "epoch": 0.6698038867493147, "grad_norm": 1.1441079378128052, "learning_rate": 4.914688813507798e-06, "loss": 0.4633, "step": 30175 }, { "epoch": 0.6699148733088423, "grad_norm": 1.7710208892822266, "learning_rate": 4.911686849349723e-06, "loss": 0.3025, "step": 30180 }, { "epoch": 0.6700258598683699, "grad_norm": 1.5889769792556763, "learning_rate": 4.908685503811696e-06, "loss": 0.4249, "step": 30185 }, { "epoch": 0.6701368464278976, "grad_norm": 1.0026971101760864, "learning_rate": 4.905684777258616e-06, "loss": 0.3464, "step": 30190 }, { "epoch": 0.6702478329874252, "grad_norm": 1.486332654953003, "learning_rate": 4.90268467005529e-06, "loss": 0.4357, "step": 30195 }, { "epoch": 0.6703588195469529, "grad_norm": 1.3142849206924438, "learning_rate": 4.899685182566472e-06, "loss": 0.2584, "step": 30200 }, { "epoch": 0.6704698061064805, "grad_norm": 1.6392567157745361, "learning_rate": 4.896686315156819e-06, "loss": 0.5574, "step": 30205 }, { "epoch": 0.6705807926660081, "grad_norm": 1.4760245084762573, "learning_rate": 4.893688068190933e-06, "loss": 0.3984, "step": 30210 }, { "epoch": 0.6706917792255358, "grad_norm": 0.6990063786506653, "learning_rate": 4.890690442033323e-06, "loss": 0.3127, "step": 30215 }, { "epoch": 0.6708027657850635, "grad_norm": 0.9935014843940735, "learning_rate": 4.887693437048433e-06, "loss": 0.4458, "step": 30220 }, { "epoch": 0.670913752344591, "grad_norm": 1.9423924684524536, "learning_rate": 4.884697053600635e-06, "loss": 0.5444, "step": 30225 }, { "epoch": 0.6710247389041187, "grad_norm": 0.797426164150238, "learning_rate": 4.881701292054209e-06, "loss": 0.2913, "step": 30230 }, { "epoch": 0.6711357254636463, "grad_norm": 1.5976841449737549, "learning_rate": 4.878706152773377e-06, "loss": 0.5609, "step": 30235 }, { "epoch": 0.671246712023174, "grad_norm": 1.7722688913345337, "learning_rate": 4.8757116361222735e-06, "loss": 0.3951, "step": 30240 }, { "epoch": 0.6713576985827017, "grad_norm": 1.9161877632141113, "learning_rate": 4.872717742464963e-06, "loss": 0.4617, "step": 30245 }, { "epoch": 0.6714686851422292, "grad_norm": 0.7850003242492676, "learning_rate": 4.869724472165438e-06, "loss": 0.3621, "step": 30250 }, { "epoch": 0.6715796717017569, "grad_norm": 1.3206015825271606, "learning_rate": 4.866731825587602e-06, "loss": 0.2995, "step": 30255 }, { "epoch": 0.6716906582612846, "grad_norm": 0.9397589564323425, "learning_rate": 4.863739803095299e-06, "loss": 0.3133, "step": 30260 }, { "epoch": 0.6718016448208122, "grad_norm": 1.0778348445892334, "learning_rate": 4.8607484050522815e-06, "loss": 0.3775, "step": 30265 }, { "epoch": 0.6719126313803399, "grad_norm": 0.6114378571510315, "learning_rate": 4.8577576318222365e-06, "loss": 0.3259, "step": 30270 }, { "epoch": 0.6720236179398675, "grad_norm": 2.020146608352661, "learning_rate": 4.854767483768776e-06, "loss": 0.2728, "step": 30275 }, { "epoch": 0.6721346044993951, "grad_norm": 1.1776325702667236, "learning_rate": 4.851777961255427e-06, "loss": 0.5557, "step": 30280 }, { "epoch": 0.6722455910589228, "grad_norm": 1.3874727487564087, "learning_rate": 4.84878906464565e-06, "loss": 0.3102, "step": 30285 }, { "epoch": 0.6723565776184504, "grad_norm": 0.9559499621391296, "learning_rate": 4.845800794302821e-06, "loss": 0.4892, "step": 30290 }, { "epoch": 0.672467564177978, "grad_norm": 2.322970151901245, "learning_rate": 4.842813150590247e-06, "loss": 0.4686, "step": 30295 }, { "epoch": 0.6725785507375057, "grad_norm": 0.8557878732681274, "learning_rate": 4.839826133871152e-06, "loss": 0.3854, "step": 30300 }, { "epoch": 0.6726895372970333, "grad_norm": 0.7575249671936035, "learning_rate": 4.836839744508693e-06, "loss": 0.3209, "step": 30305 }, { "epoch": 0.672800523856561, "grad_norm": 1.4156855344772339, "learning_rate": 4.8338539828659384e-06, "loss": 0.5021, "step": 30310 }, { "epoch": 0.6729115104160887, "grad_norm": 1.846325159072876, "learning_rate": 4.830868849305894e-06, "loss": 0.3873, "step": 30315 }, { "epoch": 0.6730224969756162, "grad_norm": 0.8236002326011658, "learning_rate": 4.827884344191474e-06, "loss": 0.4415, "step": 30320 }, { "epoch": 0.6731334835351439, "grad_norm": 0.9851003289222717, "learning_rate": 4.824900467885536e-06, "loss": 0.4356, "step": 30325 }, { "epoch": 0.6732444700946716, "grad_norm": 0.9721106290817261, "learning_rate": 4.821917220750838e-06, "loss": 0.5158, "step": 30330 }, { "epoch": 0.6733554566541992, "grad_norm": 0.8726085424423218, "learning_rate": 4.818934603150082e-06, "loss": 0.3236, "step": 30335 }, { "epoch": 0.6734664432137268, "grad_norm": 1.2427781820297241, "learning_rate": 4.815952615445878e-06, "loss": 0.2958, "step": 30340 }, { "epoch": 0.6735774297732544, "grad_norm": 1.540948510169983, "learning_rate": 4.8129712580007725e-06, "loss": 0.45, "step": 30345 }, { "epoch": 0.6736884163327821, "grad_norm": 1.114790916442871, "learning_rate": 4.809990531177221e-06, "loss": 0.2742, "step": 30350 }, { "epoch": 0.6737994028923098, "grad_norm": 1.4274752140045166, "learning_rate": 4.807010435337616e-06, "loss": 0.4615, "step": 30355 }, { "epoch": 0.6739103894518373, "grad_norm": 0.9064056277275085, "learning_rate": 4.804030970844269e-06, "loss": 0.3181, "step": 30360 }, { "epoch": 0.674021376011365, "grad_norm": 1.431104063987732, "learning_rate": 4.801052138059408e-06, "loss": 0.2833, "step": 30365 }, { "epoch": 0.6741323625708927, "grad_norm": 1.2696224451065063, "learning_rate": 4.798073937345194e-06, "loss": 0.2398, "step": 30370 }, { "epoch": 0.6742433491304203, "grad_norm": 1.2749775648117065, "learning_rate": 4.795096369063703e-06, "loss": 0.4965, "step": 30375 }, { "epoch": 0.674354335689948, "grad_norm": 1.1306232213974, "learning_rate": 4.792119433576943e-06, "loss": 0.3975, "step": 30380 }, { "epoch": 0.6744653222494756, "grad_norm": 0.717216432094574, "learning_rate": 4.789143131246832e-06, "loss": 0.5037, "step": 30385 }, { "epoch": 0.6745763088090032, "grad_norm": 1.363181710243225, "learning_rate": 4.786167462435224e-06, "loss": 0.4823, "step": 30390 }, { "epoch": 0.6746872953685309, "grad_norm": 1.6704401969909668, "learning_rate": 4.783192427503893e-06, "loss": 0.2458, "step": 30395 }, { "epoch": 0.6747982819280585, "grad_norm": 1.2120403051376343, "learning_rate": 4.780218026814527e-06, "loss": 0.4551, "step": 30400 }, { "epoch": 0.6749092684875861, "grad_norm": 0.7928174734115601, "learning_rate": 4.777244260728751e-06, "loss": 0.3698, "step": 30405 }, { "epoch": 0.6750202550471138, "grad_norm": 1.511757493019104, "learning_rate": 4.7742711296081e-06, "loss": 0.4478, "step": 30410 }, { "epoch": 0.6751312416066414, "grad_norm": 1.3559863567352295, "learning_rate": 4.771298633814038e-06, "loss": 0.3981, "step": 30415 }, { "epoch": 0.6752422281661691, "grad_norm": 1.2549550533294678, "learning_rate": 4.768326773707956e-06, "loss": 0.399, "step": 30420 }, { "epoch": 0.6753532147256968, "grad_norm": 2.3049769401550293, "learning_rate": 4.765355549651156e-06, "loss": 0.3148, "step": 30425 }, { "epoch": 0.6754642012852243, "grad_norm": 1.6337782144546509, "learning_rate": 4.762384962004877e-06, "loss": 0.3418, "step": 30430 }, { "epoch": 0.675575187844752, "grad_norm": 1.4367377758026123, "learning_rate": 4.7594150111302635e-06, "loss": 0.3908, "step": 30435 }, { "epoch": 0.6756861744042797, "grad_norm": 0.9786241054534912, "learning_rate": 4.7564456973883984e-06, "loss": 0.4951, "step": 30440 }, { "epoch": 0.6757971609638073, "grad_norm": 1.0460742712020874, "learning_rate": 4.753477021140284e-06, "loss": 0.333, "step": 30445 }, { "epoch": 0.675908147523335, "grad_norm": 1.367433786392212, "learning_rate": 4.7505089827468335e-06, "loss": 0.3857, "step": 30450 }, { "epoch": 0.6760191340828625, "grad_norm": 0.8574208617210388, "learning_rate": 4.747541582568899e-06, "loss": 0.3719, "step": 30455 }, { "epoch": 0.6761301206423902, "grad_norm": 0.9311047792434692, "learning_rate": 4.74457482096724e-06, "loss": 0.4381, "step": 30460 }, { "epoch": 0.6762411072019179, "grad_norm": 0.7453902959823608, "learning_rate": 4.741608698302552e-06, "loss": 0.3637, "step": 30465 }, { "epoch": 0.6763520937614454, "grad_norm": 1.4380278587341309, "learning_rate": 4.73864321493544e-06, "loss": 0.3262, "step": 30470 }, { "epoch": 0.6764630803209731, "grad_norm": 1.0542434453964233, "learning_rate": 4.7356783712264405e-06, "loss": 0.452, "step": 30475 }, { "epoch": 0.6765740668805008, "grad_norm": 1.6459670066833496, "learning_rate": 4.732714167536014e-06, "loss": 0.4058, "step": 30480 }, { "epoch": 0.6766850534400284, "grad_norm": 1.1828376054763794, "learning_rate": 4.72975060422453e-06, "loss": 0.3055, "step": 30485 }, { "epoch": 0.6767960399995561, "grad_norm": 0.7649294137954712, "learning_rate": 4.7267876816522966e-06, "loss": 0.3618, "step": 30490 }, { "epoch": 0.6769070265590837, "grad_norm": 1.0336248874664307, "learning_rate": 4.723825400179527e-06, "loss": 0.464, "step": 30495 }, { "epoch": 0.6770180131186113, "grad_norm": 1.0955878496170044, "learning_rate": 4.720863760166371e-06, "loss": 0.4166, "step": 30500 }, { "epoch": 0.677128999678139, "grad_norm": 1.1246545314788818, "learning_rate": 4.717902761972898e-06, "loss": 0.3503, "step": 30505 }, { "epoch": 0.6772399862376666, "grad_norm": 1.230867624282837, "learning_rate": 4.714942405959088e-06, "loss": 0.2898, "step": 30510 }, { "epoch": 0.6773509727971943, "grad_norm": 1.5435923337936401, "learning_rate": 4.71198269248486e-06, "loss": 0.4346, "step": 30515 }, { "epoch": 0.6774619593567219, "grad_norm": 0.9442398548126221, "learning_rate": 4.709023621910037e-06, "loss": 0.5059, "step": 30520 }, { "epoch": 0.6775729459162495, "grad_norm": 1.648020625114441, "learning_rate": 4.706065194594378e-06, "loss": 0.5491, "step": 30525 }, { "epoch": 0.6776839324757772, "grad_norm": 1.0996757745742798, "learning_rate": 4.703107410897563e-06, "loss": 0.471, "step": 30530 }, { "epoch": 0.6777949190353049, "grad_norm": 1.7918943166732788, "learning_rate": 4.700150271179179e-06, "loss": 0.2379, "step": 30535 }, { "epoch": 0.6779059055948324, "grad_norm": 1.1911612749099731, "learning_rate": 4.697193775798755e-06, "loss": 0.3775, "step": 30540 }, { "epoch": 0.6780168921543601, "grad_norm": 1.0882309675216675, "learning_rate": 4.694237925115724e-06, "loss": 0.5021, "step": 30545 }, { "epoch": 0.6781278787138878, "grad_norm": 1.1152145862579346, "learning_rate": 4.691282719489456e-06, "loss": 0.4538, "step": 30550 }, { "epoch": 0.6782388652734154, "grad_norm": 0.7347661852836609, "learning_rate": 4.688328159279228e-06, "loss": 0.5172, "step": 30555 }, { "epoch": 0.678349851832943, "grad_norm": 1.4539144039154053, "learning_rate": 4.68537424484425e-06, "loss": 0.4374, "step": 30560 }, { "epoch": 0.6784608383924706, "grad_norm": 1.5657992362976074, "learning_rate": 4.6824209765436445e-06, "loss": 0.3194, "step": 30565 }, { "epoch": 0.6785718249519983, "grad_norm": 1.41443932056427, "learning_rate": 4.679468354736467e-06, "loss": 0.4329, "step": 30570 }, { "epoch": 0.678682811511526, "grad_norm": 1.2728972434997559, "learning_rate": 4.6765163797816795e-06, "loss": 0.422, "step": 30575 }, { "epoch": 0.6787937980710536, "grad_norm": 0.8089253306388855, "learning_rate": 4.673565052038181e-06, "loss": 0.5511, "step": 30580 }, { "epoch": 0.6789047846305812, "grad_norm": 1.1690912246704102, "learning_rate": 4.670614371864775e-06, "loss": 0.4561, "step": 30585 }, { "epoch": 0.6790157711901089, "grad_norm": 1.8266934156417847, "learning_rate": 4.667664339620206e-06, "loss": 0.3425, "step": 30590 }, { "epoch": 0.6791267577496365, "grad_norm": 1.1937004327774048, "learning_rate": 4.664714955663118e-06, "loss": 0.3204, "step": 30595 }, { "epoch": 0.6792377443091642, "grad_norm": 0.8955053687095642, "learning_rate": 4.661766220352098e-06, "loss": 0.4385, "step": 30600 }, { "epoch": 0.6793487308686919, "grad_norm": 1.1877119541168213, "learning_rate": 4.6588181340456315e-06, "loss": 0.2442, "step": 30605 }, { "epoch": 0.6794597174282194, "grad_norm": 1.1351243257522583, "learning_rate": 4.655870697102145e-06, "loss": 0.5295, "step": 30610 }, { "epoch": 0.6795707039877471, "grad_norm": 0.9337393641471863, "learning_rate": 4.65292390987998e-06, "loss": 0.3629, "step": 30615 }, { "epoch": 0.6796816905472747, "grad_norm": 1.265181303024292, "learning_rate": 4.649977772737389e-06, "loss": 0.3897, "step": 30620 }, { "epoch": 0.6797926771068024, "grad_norm": 1.2284386157989502, "learning_rate": 4.647032286032563e-06, "loss": 0.2598, "step": 30625 }, { "epoch": 0.67990366366633, "grad_norm": 1.3195282220840454, "learning_rate": 4.644087450123594e-06, "loss": 0.3161, "step": 30630 }, { "epoch": 0.6800146502258576, "grad_norm": 1.4926730394363403, "learning_rate": 4.641143265368515e-06, "loss": 0.3973, "step": 30635 }, { "epoch": 0.6801256367853853, "grad_norm": 3.5536727905273438, "learning_rate": 4.638199732125261e-06, "loss": 0.3454, "step": 30640 }, { "epoch": 0.680236623344913, "grad_norm": 1.021014928817749, "learning_rate": 4.635256850751702e-06, "loss": 0.2805, "step": 30645 }, { "epoch": 0.6803476099044405, "grad_norm": 1.885343074798584, "learning_rate": 4.632314621605627e-06, "loss": 0.4288, "step": 30650 }, { "epoch": 0.6804585964639682, "grad_norm": 1.2568762302398682, "learning_rate": 4.629373045044735e-06, "loss": 0.4563, "step": 30655 }, { "epoch": 0.6805695830234959, "grad_norm": 1.048861026763916, "learning_rate": 4.626432121426659e-06, "loss": 0.3522, "step": 30660 }, { "epoch": 0.6806805695830235, "grad_norm": 1.5068540573120117, "learning_rate": 4.623491851108942e-06, "loss": 0.2918, "step": 30665 }, { "epoch": 0.6807915561425512, "grad_norm": 1.1883301734924316, "learning_rate": 4.620552234449052e-06, "loss": 0.363, "step": 30670 }, { "epoch": 0.6809025427020787, "grad_norm": 1.6479636430740356, "learning_rate": 4.6176132718043866e-06, "loss": 0.481, "step": 30675 }, { "epoch": 0.6810135292616064, "grad_norm": 0.9919922351837158, "learning_rate": 4.614674963532244e-06, "loss": 0.37, "step": 30680 }, { "epoch": 0.6811245158211341, "grad_norm": 1.2481129169464111, "learning_rate": 4.611737309989861e-06, "loss": 0.434, "step": 30685 }, { "epoch": 0.6812355023806617, "grad_norm": 1.077807068824768, "learning_rate": 4.608800311534383e-06, "loss": 0.4506, "step": 30690 }, { "epoch": 0.6813464889401893, "grad_norm": 0.9274584650993347, "learning_rate": 4.60586396852288e-06, "loss": 0.4033, "step": 30695 }, { "epoch": 0.681457475499717, "grad_norm": 2.024184226989746, "learning_rate": 4.602928281312351e-06, "loss": 0.3622, "step": 30700 }, { "epoch": 0.6815684620592446, "grad_norm": 1.019030213356018, "learning_rate": 4.599993250259697e-06, "loss": 0.3212, "step": 30705 }, { "epoch": 0.6816794486187723, "grad_norm": 1.0451632738113403, "learning_rate": 4.597058875721756e-06, "loss": 0.2417, "step": 30710 }, { "epoch": 0.6817904351783, "grad_norm": 0.8908951878547668, "learning_rate": 4.594125158055275e-06, "loss": 0.5534, "step": 30715 }, { "epoch": 0.6819014217378275, "grad_norm": 1.572871446609497, "learning_rate": 4.59119209761693e-06, "loss": 0.5749, "step": 30720 }, { "epoch": 0.6820124082973552, "grad_norm": 1.3295234441757202, "learning_rate": 4.588259694763307e-06, "loss": 0.4703, "step": 30725 }, { "epoch": 0.6821233948568828, "grad_norm": 1.2267779111862183, "learning_rate": 4.5853279498509196e-06, "loss": 0.2847, "step": 30730 }, { "epoch": 0.6822343814164105, "grad_norm": 1.4097859859466553, "learning_rate": 4.582396863236205e-06, "loss": 0.2772, "step": 30735 }, { "epoch": 0.6823453679759381, "grad_norm": 1.4641203880310059, "learning_rate": 4.579466435275506e-06, "loss": 0.3562, "step": 30740 }, { "epoch": 0.6824563545354657, "grad_norm": 1.4551869630813599, "learning_rate": 4.576536666325103e-06, "loss": 0.4807, "step": 30745 }, { "epoch": 0.6825673410949934, "grad_norm": 1.5356292724609375, "learning_rate": 4.57360755674118e-06, "loss": 0.3482, "step": 30750 }, { "epoch": 0.6826783276545211, "grad_norm": 1.2408578395843506, "learning_rate": 4.570679106879852e-06, "loss": 0.3142, "step": 30755 }, { "epoch": 0.6827893142140486, "grad_norm": 1.8057538270950317, "learning_rate": 4.567751317097152e-06, "loss": 0.4854, "step": 30760 }, { "epoch": 0.6829003007735763, "grad_norm": 1.3727633953094482, "learning_rate": 4.564824187749025e-06, "loss": 0.4088, "step": 30765 }, { "epoch": 0.683011287333104, "grad_norm": 1.9439268112182617, "learning_rate": 4.561897719191349e-06, "loss": 0.3903, "step": 30770 }, { "epoch": 0.6831222738926316, "grad_norm": 1.17835533618927, "learning_rate": 4.558971911779908e-06, "loss": 0.535, "step": 30775 }, { "epoch": 0.6832332604521593, "grad_norm": 1.093266487121582, "learning_rate": 4.556046765870413e-06, "loss": 0.5418, "step": 30780 }, { "epoch": 0.6833442470116868, "grad_norm": 1.1753915548324585, "learning_rate": 4.5531222818184984e-06, "loss": 0.3903, "step": 30785 }, { "epoch": 0.6834552335712145, "grad_norm": 1.338931679725647, "learning_rate": 4.550198459979706e-06, "loss": 0.4379, "step": 30790 }, { "epoch": 0.6835662201307422, "grad_norm": 1.7753686904907227, "learning_rate": 4.547275300709511e-06, "loss": 0.3737, "step": 30795 }, { "epoch": 0.6836772066902698, "grad_norm": 0.8611935973167419, "learning_rate": 4.544352804363294e-06, "loss": 0.5054, "step": 30800 }, { "epoch": 0.6837881932497974, "grad_norm": 1.2256321907043457, "learning_rate": 4.54143097129637e-06, "loss": 0.3536, "step": 30805 }, { "epoch": 0.6838991798093251, "grad_norm": 1.5533641576766968, "learning_rate": 4.5385098018639585e-06, "loss": 0.6279, "step": 30810 }, { "epoch": 0.6840101663688527, "grad_norm": 1.3186376094818115, "learning_rate": 4.535589296421212e-06, "loss": 0.4289, "step": 30815 }, { "epoch": 0.6841211529283804, "grad_norm": 1.0984452962875366, "learning_rate": 4.5326694553231885e-06, "loss": 0.3821, "step": 30820 }, { "epoch": 0.6842321394879081, "grad_norm": 1.0532371997833252, "learning_rate": 4.529750278924882e-06, "loss": 0.3864, "step": 30825 }, { "epoch": 0.6843431260474356, "grad_norm": 1.2279421091079712, "learning_rate": 4.526831767581186e-06, "loss": 0.4095, "step": 30830 }, { "epoch": 0.6844541126069633, "grad_norm": 0.8931465744972229, "learning_rate": 4.5239139216469316e-06, "loss": 0.4348, "step": 30835 }, { "epoch": 0.6845650991664909, "grad_norm": 0.9575490355491638, "learning_rate": 4.5209967414768545e-06, "loss": 0.4169, "step": 30840 }, { "epoch": 0.6846760857260186, "grad_norm": 1.4071946144104004, "learning_rate": 4.518080227425621e-06, "loss": 0.4897, "step": 30845 }, { "epoch": 0.6847870722855462, "grad_norm": 0.7204203605651855, "learning_rate": 4.515164379847806e-06, "loss": 0.3523, "step": 30850 }, { "epoch": 0.6848980588450738, "grad_norm": 1.372045636177063, "learning_rate": 4.512249199097914e-06, "loss": 0.5325, "step": 30855 }, { "epoch": 0.6850090454046015, "grad_norm": 1.1533608436584473, "learning_rate": 4.509334685530357e-06, "loss": 0.4836, "step": 30860 }, { "epoch": 0.6851200319641292, "grad_norm": 1.05873703956604, "learning_rate": 4.506420839499474e-06, "loss": 0.5, "step": 30865 }, { "epoch": 0.6852310185236568, "grad_norm": 3.0505340099334717, "learning_rate": 4.503507661359524e-06, "loss": 0.4532, "step": 30870 }, { "epoch": 0.6853420050831844, "grad_norm": 0.9786636233329773, "learning_rate": 4.500595151464676e-06, "loss": 0.2672, "step": 30875 }, { "epoch": 0.6854529916427121, "grad_norm": 0.9993107914924622, "learning_rate": 4.49768331016903e-06, "loss": 0.4078, "step": 30880 }, { "epoch": 0.6855639782022397, "grad_norm": 1.8332587480545044, "learning_rate": 4.4947721378265896e-06, "loss": 0.3187, "step": 30885 }, { "epoch": 0.6856749647617674, "grad_norm": 0.5899979472160339, "learning_rate": 4.491861634791294e-06, "loss": 0.4247, "step": 30890 }, { "epoch": 0.6857859513212949, "grad_norm": 1.7995885610580444, "learning_rate": 4.488951801416983e-06, "loss": 0.3427, "step": 30895 }, { "epoch": 0.6858969378808226, "grad_norm": 1.1848108768463135, "learning_rate": 4.4860426380574295e-06, "loss": 0.4209, "step": 30900 }, { "epoch": 0.6860079244403503, "grad_norm": 1.3083794116973877, "learning_rate": 4.483134145066324e-06, "loss": 0.3509, "step": 30905 }, { "epoch": 0.6861189109998779, "grad_norm": 1.1152405738830566, "learning_rate": 4.480226322797263e-06, "loss": 0.3331, "step": 30910 }, { "epoch": 0.6862298975594056, "grad_norm": 1.3240209817886353, "learning_rate": 4.4773191716037774e-06, "loss": 0.4114, "step": 30915 }, { "epoch": 0.6863408841189332, "grad_norm": 0.7910842895507812, "learning_rate": 4.474412691839302e-06, "loss": 0.3904, "step": 30920 }, { "epoch": 0.6864518706784608, "grad_norm": 1.6772698163986206, "learning_rate": 4.471506883857201e-06, "loss": 0.3804, "step": 30925 }, { "epoch": 0.6865628572379885, "grad_norm": 0.955270528793335, "learning_rate": 4.468601748010755e-06, "loss": 0.3493, "step": 30930 }, { "epoch": 0.6866738437975162, "grad_norm": 1.2743643522262573, "learning_rate": 4.465697284653153e-06, "loss": 0.4314, "step": 30935 }, { "epoch": 0.6867848303570437, "grad_norm": 1.24860680103302, "learning_rate": 4.4627934941375185e-06, "loss": 0.3745, "step": 30940 }, { "epoch": 0.6868958169165714, "grad_norm": 0.7497403025627136, "learning_rate": 4.459890376816878e-06, "loss": 0.4471, "step": 30945 }, { "epoch": 0.687006803476099, "grad_norm": 1.4058095216751099, "learning_rate": 4.456987933044185e-06, "loss": 0.2944, "step": 30950 }, { "epoch": 0.6871177900356267, "grad_norm": 1.4874502420425415, "learning_rate": 4.454086163172312e-06, "loss": 0.2856, "step": 30955 }, { "epoch": 0.6872287765951544, "grad_norm": 0.9197191596031189, "learning_rate": 4.45118506755404e-06, "loss": 0.3419, "step": 30960 }, { "epoch": 0.6873397631546819, "grad_norm": 1.030403733253479, "learning_rate": 4.448284646542084e-06, "loss": 0.3863, "step": 30965 }, { "epoch": 0.6874507497142096, "grad_norm": 1.351831078529358, "learning_rate": 4.445384900489056e-06, "loss": 0.277, "step": 30970 }, { "epoch": 0.6875617362737373, "grad_norm": 1.2920376062393188, "learning_rate": 4.442485829747507e-06, "loss": 0.3688, "step": 30975 }, { "epoch": 0.6876727228332649, "grad_norm": 1.0285531282424927, "learning_rate": 4.4395874346698885e-06, "loss": 0.3805, "step": 30980 }, { "epoch": 0.6877837093927925, "grad_norm": 1.517197608947754, "learning_rate": 4.436689715608583e-06, "loss": 0.4708, "step": 30985 }, { "epoch": 0.6878946959523202, "grad_norm": 1.133748173713684, "learning_rate": 4.433792672915886e-06, "loss": 0.3964, "step": 30990 }, { "epoch": 0.6880056825118478, "grad_norm": 1.029487133026123, "learning_rate": 4.430896306944006e-06, "loss": 0.3344, "step": 30995 }, { "epoch": 0.6881166690713755, "grad_norm": 0.752305805683136, "learning_rate": 4.428000618045078e-06, "loss": 0.434, "step": 31000 }, { "epoch": 0.688227655630903, "grad_norm": 1.6484147310256958, "learning_rate": 4.425105606571145e-06, "loss": 0.3976, "step": 31005 }, { "epoch": 0.6883386421904307, "grad_norm": 1.2067406177520752, "learning_rate": 4.422211272874175e-06, "loss": 0.3293, "step": 31010 }, { "epoch": 0.6884496287499584, "grad_norm": 1.2860989570617676, "learning_rate": 4.419317617306056e-06, "loss": 0.3547, "step": 31015 }, { "epoch": 0.688560615309486, "grad_norm": 1.2995693683624268, "learning_rate": 4.416424640218582e-06, "loss": 0.3672, "step": 31020 }, { "epoch": 0.6886716018690137, "grad_norm": 1.966412901878357, "learning_rate": 4.413532341963477e-06, "loss": 0.6209, "step": 31025 }, { "epoch": 0.6887825884285413, "grad_norm": 1.75698983669281, "learning_rate": 4.410640722892371e-06, "loss": 0.3503, "step": 31030 }, { "epoch": 0.6888935749880689, "grad_norm": 0.894189178943634, "learning_rate": 4.40774978335682e-06, "loss": 0.4089, "step": 31035 }, { "epoch": 0.6890045615475966, "grad_norm": 0.8134837746620178, "learning_rate": 4.404859523708301e-06, "loss": 0.2775, "step": 31040 }, { "epoch": 0.6891155481071243, "grad_norm": 1.6088848114013672, "learning_rate": 4.40196994429819e-06, "loss": 0.459, "step": 31045 }, { "epoch": 0.6892265346666518, "grad_norm": 1.016860008239746, "learning_rate": 4.399081045477804e-06, "loss": 0.4462, "step": 31050 }, { "epoch": 0.6893375212261795, "grad_norm": 1.5716601610183716, "learning_rate": 4.396192827598357e-06, "loss": 0.804, "step": 31055 }, { "epoch": 0.6894485077857071, "grad_norm": 0.7367437481880188, "learning_rate": 4.393305291010995e-06, "loss": 0.4039, "step": 31060 }, { "epoch": 0.6895594943452348, "grad_norm": 1.4050825834274292, "learning_rate": 4.39041843606677e-06, "loss": 0.3258, "step": 31065 }, { "epoch": 0.6896704809047625, "grad_norm": 1.144169569015503, "learning_rate": 4.387532263116662e-06, "loss": 0.2019, "step": 31070 }, { "epoch": 0.68978146746429, "grad_norm": 1.2167930603027344, "learning_rate": 4.384646772511554e-06, "loss": 0.357, "step": 31075 }, { "epoch": 0.6898924540238177, "grad_norm": 1.3271721601486206, "learning_rate": 4.381761964602264e-06, "loss": 0.5452, "step": 31080 }, { "epoch": 0.6900034405833454, "grad_norm": 0.8778615593910217, "learning_rate": 4.3788778397395075e-06, "loss": 0.4604, "step": 31085 }, { "epoch": 0.690114427142873, "grad_norm": 1.1979990005493164, "learning_rate": 4.375994398273935e-06, "loss": 0.3149, "step": 31090 }, { "epoch": 0.6902254137024006, "grad_norm": 1.1055999994277954, "learning_rate": 4.3731116405560996e-06, "loss": 0.5063, "step": 31095 }, { "epoch": 0.6903364002619283, "grad_norm": 1.1913548707962036, "learning_rate": 4.370229566936482e-06, "loss": 0.5055, "step": 31100 }, { "epoch": 0.6904473868214559, "grad_norm": 1.7680208683013916, "learning_rate": 4.36734817776547e-06, "loss": 0.5424, "step": 31105 }, { "epoch": 0.6905583733809836, "grad_norm": 1.0807723999023438, "learning_rate": 4.36446747339338e-06, "loss": 0.335, "step": 31110 }, { "epoch": 0.6906693599405112, "grad_norm": 1.183213472366333, "learning_rate": 4.361587454170431e-06, "loss": 0.5329, "step": 31115 }, { "epoch": 0.6907803465000388, "grad_norm": 1.3761305809020996, "learning_rate": 4.3587081204467685e-06, "loss": 0.3529, "step": 31120 }, { "epoch": 0.6908913330595665, "grad_norm": 1.7719734907150269, "learning_rate": 4.355829472572457e-06, "loss": 0.4814, "step": 31125 }, { "epoch": 0.6910023196190941, "grad_norm": 0.8037557601928711, "learning_rate": 4.352951510897466e-06, "loss": 0.497, "step": 31130 }, { "epoch": 0.6911133061786218, "grad_norm": 1.0056993961334229, "learning_rate": 4.350074235771695e-06, "loss": 0.4597, "step": 31135 }, { "epoch": 0.6912242927381494, "grad_norm": 1.2936779260635376, "learning_rate": 4.347197647544947e-06, "loss": 0.3356, "step": 31140 }, { "epoch": 0.691335279297677, "grad_norm": 0.9828980565071106, "learning_rate": 4.3443217465669505e-06, "loss": 0.3686, "step": 31145 }, { "epoch": 0.6914462658572047, "grad_norm": 1.1120939254760742, "learning_rate": 4.3414465331873524e-06, "loss": 0.4776, "step": 31150 }, { "epoch": 0.6915572524167324, "grad_norm": 1.0941098928451538, "learning_rate": 4.338572007755703e-06, "loss": 0.3537, "step": 31155 }, { "epoch": 0.69166823897626, "grad_norm": 1.1693826913833618, "learning_rate": 4.335698170621487e-06, "loss": 0.3013, "step": 31160 }, { "epoch": 0.6917792255357876, "grad_norm": 1.5390578508377075, "learning_rate": 4.332825022134086e-06, "loss": 0.5256, "step": 31165 }, { "epoch": 0.6918902120953152, "grad_norm": 1.020473837852478, "learning_rate": 4.329952562642816e-06, "loss": 0.4088, "step": 31170 }, { "epoch": 0.6920011986548429, "grad_norm": 1.4370976686477661, "learning_rate": 4.327080792496895e-06, "loss": 0.3802, "step": 31175 }, { "epoch": 0.6921121852143706, "grad_norm": 1.3019744157791138, "learning_rate": 4.324209712045465e-06, "loss": 0.5318, "step": 31180 }, { "epoch": 0.6922231717738981, "grad_norm": 1.436402678489685, "learning_rate": 4.321339321637587e-06, "loss": 0.4362, "step": 31185 }, { "epoch": 0.6923341583334258, "grad_norm": 1.1188828945159912, "learning_rate": 4.318469621622226e-06, "loss": 0.5995, "step": 31190 }, { "epoch": 0.6924451448929535, "grad_norm": 1.3164883852005005, "learning_rate": 4.315600612348278e-06, "loss": 0.4404, "step": 31195 }, { "epoch": 0.6925561314524811, "grad_norm": 1.9575704336166382, "learning_rate": 4.3127322941645385e-06, "loss": 0.4189, "step": 31200 }, { "epoch": 0.6926671180120088, "grad_norm": 1.4906368255615234, "learning_rate": 4.309864667419735e-06, "loss": 0.4129, "step": 31205 }, { "epoch": 0.6927781045715364, "grad_norm": 0.632857620716095, "learning_rate": 4.306997732462505e-06, "loss": 0.2905, "step": 31210 }, { "epoch": 0.692889091131064, "grad_norm": 1.4873570203781128, "learning_rate": 4.304131489641393e-06, "loss": 0.2703, "step": 31215 }, { "epoch": 0.6930000776905917, "grad_norm": 1.2380766868591309, "learning_rate": 4.301265939304877e-06, "loss": 0.4143, "step": 31220 }, { "epoch": 0.6931110642501193, "grad_norm": 1.487708568572998, "learning_rate": 4.298401081801332e-06, "loss": 0.3418, "step": 31225 }, { "epoch": 0.6932220508096469, "grad_norm": 1.3513848781585693, "learning_rate": 4.295536917479062e-06, "loss": 0.3183, "step": 31230 }, { "epoch": 0.6933330373691746, "grad_norm": 1.166556715965271, "learning_rate": 4.292673446686285e-06, "loss": 0.4073, "step": 31235 }, { "epoch": 0.6934440239287022, "grad_norm": 1.4981805086135864, "learning_rate": 4.2898106697711266e-06, "loss": 0.5024, "step": 31240 }, { "epoch": 0.6935550104882299, "grad_norm": 1.0821495056152344, "learning_rate": 4.286948587081639e-06, "loss": 0.3748, "step": 31245 }, { "epoch": 0.6936659970477576, "grad_norm": 0.773173987865448, "learning_rate": 4.284087198965781e-06, "loss": 0.4054, "step": 31250 }, { "epoch": 0.6937769836072851, "grad_norm": 1.5648664236068726, "learning_rate": 4.281226505771433e-06, "loss": 0.4982, "step": 31255 }, { "epoch": 0.6938879701668128, "grad_norm": 1.5265167951583862, "learning_rate": 4.278366507846384e-06, "loss": 0.4065, "step": 31260 }, { "epoch": 0.6939989567263405, "grad_norm": 1.2185603380203247, "learning_rate": 4.275507205538348e-06, "loss": 0.3195, "step": 31265 }, { "epoch": 0.6941099432858681, "grad_norm": 0.8565247058868408, "learning_rate": 4.272648599194948e-06, "loss": 0.3741, "step": 31270 }, { "epoch": 0.6942209298453957, "grad_norm": 1.493923544883728, "learning_rate": 4.269790689163722e-06, "loss": 0.5898, "step": 31275 }, { "epoch": 0.6943319164049233, "grad_norm": 0.9452623724937439, "learning_rate": 4.2669334757921284e-06, "loss": 0.4826, "step": 31280 }, { "epoch": 0.694442902964451, "grad_norm": 1.0773069858551025, "learning_rate": 4.2640769594275335e-06, "loss": 0.3968, "step": 31285 }, { "epoch": 0.6945538895239787, "grad_norm": 1.870210886001587, "learning_rate": 4.261221140417228e-06, "loss": 0.3045, "step": 31290 }, { "epoch": 0.6946648760835062, "grad_norm": 1.6059725284576416, "learning_rate": 4.258366019108405e-06, "loss": 0.4136, "step": 31295 }, { "epoch": 0.6947758626430339, "grad_norm": 1.3067309856414795, "learning_rate": 4.255511595848191e-06, "loss": 0.3463, "step": 31300 }, { "epoch": 0.6948868492025616, "grad_norm": 1.3735544681549072, "learning_rate": 4.2526578709836075e-06, "loss": 0.5428, "step": 31305 }, { "epoch": 0.6949978357620892, "grad_norm": 1.036281704902649, "learning_rate": 4.2498048448616084e-06, "loss": 0.2687, "step": 31310 }, { "epoch": 0.6951088223216169, "grad_norm": 1.331540584564209, "learning_rate": 4.2469525178290485e-06, "loss": 0.4058, "step": 31315 }, { "epoch": 0.6952198088811445, "grad_norm": 1.4195505380630493, "learning_rate": 4.24410089023271e-06, "loss": 0.3653, "step": 31320 }, { "epoch": 0.6953307954406721, "grad_norm": 1.5275979042053223, "learning_rate": 4.241249962419278e-06, "loss": 0.5446, "step": 31325 }, { "epoch": 0.6954417820001998, "grad_norm": 1.3449691534042358, "learning_rate": 4.238399734735365e-06, "loss": 0.351, "step": 31330 }, { "epoch": 0.6955527685597274, "grad_norm": 1.099867343902588, "learning_rate": 4.235550207527488e-06, "loss": 0.5494, "step": 31335 }, { "epoch": 0.695663755119255, "grad_norm": 0.9885258078575134, "learning_rate": 4.2327013811420855e-06, "loss": 0.5013, "step": 31340 }, { "epoch": 0.6957747416787827, "grad_norm": 1.9952067136764526, "learning_rate": 4.229853255925506e-06, "loss": 0.4587, "step": 31345 }, { "epoch": 0.6958857282383103, "grad_norm": 1.0655864477157593, "learning_rate": 4.2270058322240134e-06, "loss": 0.4163, "step": 31350 }, { "epoch": 0.695996714797838, "grad_norm": 1.3053919076919556, "learning_rate": 4.224159110383797e-06, "loss": 0.6184, "step": 31355 }, { "epoch": 0.6961077013573657, "grad_norm": 1.7516372203826904, "learning_rate": 4.221313090750939e-06, "loss": 0.5149, "step": 31360 }, { "epoch": 0.6962186879168932, "grad_norm": 0.7757472991943359, "learning_rate": 4.218467773671461e-06, "loss": 0.2833, "step": 31365 }, { "epoch": 0.6963296744764209, "grad_norm": 0.9427791237831116, "learning_rate": 4.215623159491276e-06, "loss": 0.4318, "step": 31370 }, { "epoch": 0.6964406610359486, "grad_norm": 1.348248839378357, "learning_rate": 4.212779248556229e-06, "loss": 0.3954, "step": 31375 }, { "epoch": 0.6965516475954762, "grad_norm": 1.0021616220474243, "learning_rate": 4.209936041212076e-06, "loss": 0.2566, "step": 31380 }, { "epoch": 0.6966626341550038, "grad_norm": 1.6703379154205322, "learning_rate": 4.207093537804476e-06, "loss": 0.6691, "step": 31385 }, { "epoch": 0.6967736207145314, "grad_norm": 0.5511424541473389, "learning_rate": 4.20425173867902e-06, "loss": 0.3375, "step": 31390 }, { "epoch": 0.6968846072740591, "grad_norm": 1.0685218572616577, "learning_rate": 4.201410644181197e-06, "loss": 0.3661, "step": 31395 }, { "epoch": 0.6969955938335868, "grad_norm": 1.3645395040512085, "learning_rate": 4.19857025465642e-06, "loss": 0.3223, "step": 31400 }, { "epoch": 0.6971065803931144, "grad_norm": 1.014066457748413, "learning_rate": 4.195730570450019e-06, "loss": 0.3221, "step": 31405 }, { "epoch": 0.697217566952642, "grad_norm": 1.2080676555633545, "learning_rate": 4.1928915919072254e-06, "loss": 0.4337, "step": 31410 }, { "epoch": 0.6973285535121697, "grad_norm": 1.6252745389938354, "learning_rate": 4.190053319373201e-06, "loss": 0.6076, "step": 31415 }, { "epoch": 0.6974395400716973, "grad_norm": 1.463215708732605, "learning_rate": 4.187215753193004e-06, "loss": 0.3474, "step": 31420 }, { "epoch": 0.697550526631225, "grad_norm": 1.4297840595245361, "learning_rate": 4.184378893711626e-06, "loss": 0.5259, "step": 31425 }, { "epoch": 0.6976615131907526, "grad_norm": 1.3269591331481934, "learning_rate": 4.181542741273954e-06, "loss": 0.4864, "step": 31430 }, { "epoch": 0.6977724997502802, "grad_norm": 1.156131386756897, "learning_rate": 4.178707296224802e-06, "loss": 0.502, "step": 31435 }, { "epoch": 0.6978834863098079, "grad_norm": 1.3167673349380493, "learning_rate": 4.175872558908898e-06, "loss": 0.3402, "step": 31440 }, { "epoch": 0.6979944728693355, "grad_norm": 0.9266581535339355, "learning_rate": 4.173038529670871e-06, "loss": 0.3809, "step": 31445 }, { "epoch": 0.6981054594288632, "grad_norm": 0.9850370287895203, "learning_rate": 4.170205208855281e-06, "loss": 0.4619, "step": 31450 }, { "epoch": 0.6982164459883908, "grad_norm": 1.2709016799926758, "learning_rate": 4.167372596806587e-06, "loss": 0.2927, "step": 31455 }, { "epoch": 0.6983274325479184, "grad_norm": 0.5235553979873657, "learning_rate": 4.1645406938691725e-06, "loss": 0.3464, "step": 31460 }, { "epoch": 0.6984384191074461, "grad_norm": 1.417148470878601, "learning_rate": 4.161709500387332e-06, "loss": 0.3279, "step": 31465 }, { "epoch": 0.6985494056669738, "grad_norm": 1.2142740488052368, "learning_rate": 4.158879016705267e-06, "loss": 0.4259, "step": 31470 }, { "epoch": 0.6986603922265013, "grad_norm": 0.8356972336769104, "learning_rate": 4.156049243167105e-06, "loss": 0.44, "step": 31475 }, { "epoch": 0.698771378786029, "grad_norm": 1.6601563692092896, "learning_rate": 4.153220180116874e-06, "loss": 0.3932, "step": 31480 }, { "epoch": 0.6988823653455567, "grad_norm": 1.4118906259536743, "learning_rate": 4.150391827898524e-06, "loss": 0.3759, "step": 31485 }, { "epoch": 0.6989933519050843, "grad_norm": 1.6847814321517944, "learning_rate": 4.147564186855923e-06, "loss": 0.3568, "step": 31490 }, { "epoch": 0.699104338464612, "grad_norm": 1.3887543678283691, "learning_rate": 4.144737257332835e-06, "loss": 0.3413, "step": 31495 }, { "epoch": 0.6992153250241395, "grad_norm": 1.4843730926513672, "learning_rate": 4.141911039672959e-06, "loss": 0.4609, "step": 31500 }, { "epoch": 0.6993263115836672, "grad_norm": 1.0380373001098633, "learning_rate": 4.139085534219887e-06, "loss": 0.47, "step": 31505 }, { "epoch": 0.6994372981431949, "grad_norm": 1.1411293745040894, "learning_rate": 4.1362607413171455e-06, "loss": 0.4828, "step": 31510 }, { "epoch": 0.6995482847027225, "grad_norm": 1.434410572052002, "learning_rate": 4.133436661308153e-06, "loss": 0.3755, "step": 31515 }, { "epoch": 0.6996592712622501, "grad_norm": 1.2564046382904053, "learning_rate": 4.130613294536257e-06, "loss": 0.3389, "step": 31520 }, { "epoch": 0.6997702578217778, "grad_norm": 1.2875218391418457, "learning_rate": 4.127790641344715e-06, "loss": 0.4025, "step": 31525 }, { "epoch": 0.6998812443813054, "grad_norm": 1.2832059860229492, "learning_rate": 4.124968702076689e-06, "loss": 0.5064, "step": 31530 }, { "epoch": 0.6999922309408331, "grad_norm": 1.0163439512252808, "learning_rate": 4.12214747707527e-06, "loss": 0.3183, "step": 31535 }, { "epoch": 0.7001032175003608, "grad_norm": 1.3421192169189453, "learning_rate": 4.119326966683443e-06, "loss": 0.3161, "step": 31540 }, { "epoch": 0.7002142040598883, "grad_norm": 0.8114428520202637, "learning_rate": 4.116507171244125e-06, "loss": 0.3758, "step": 31545 }, { "epoch": 0.700325190619416, "grad_norm": 1.2384015321731567, "learning_rate": 4.1136880911001305e-06, "loss": 0.4125, "step": 31550 }, { "epoch": 0.7004361771789436, "grad_norm": 1.3213547468185425, "learning_rate": 4.1108697265942e-06, "loss": 0.3732, "step": 31555 }, { "epoch": 0.7005471637384713, "grad_norm": 1.2653157711029053, "learning_rate": 4.108052078068974e-06, "loss": 0.4037, "step": 31560 }, { "epoch": 0.7006581502979989, "grad_norm": 0.8571048378944397, "learning_rate": 4.1052351458670195e-06, "loss": 0.3677, "step": 31565 }, { "epoch": 0.7007691368575265, "grad_norm": 1.0218335390090942, "learning_rate": 4.1024189303308025e-06, "loss": 0.276, "step": 31570 }, { "epoch": 0.7008801234170542, "grad_norm": 2.0458157062530518, "learning_rate": 4.099603431802718e-06, "loss": 0.3352, "step": 31575 }, { "epoch": 0.7009911099765819, "grad_norm": 0.4724593162536621, "learning_rate": 4.096788650625056e-06, "loss": 0.3873, "step": 31580 }, { "epoch": 0.7011020965361094, "grad_norm": 0.9088649749755859, "learning_rate": 4.0939745871400335e-06, "loss": 0.3816, "step": 31585 }, { "epoch": 0.7012130830956371, "grad_norm": 1.1194968223571777, "learning_rate": 4.091161241689771e-06, "loss": 0.3933, "step": 31590 }, { "epoch": 0.7013240696551648, "grad_norm": 1.356024146080017, "learning_rate": 4.088348614616313e-06, "loss": 0.4728, "step": 31595 }, { "epoch": 0.7014350562146924, "grad_norm": 1.4687360525131226, "learning_rate": 4.085536706261599e-06, "loss": 0.4513, "step": 31600 }, { "epoch": 0.7015460427742201, "grad_norm": 1.7567170858383179, "learning_rate": 4.0827255169674985e-06, "loss": 0.2785, "step": 31605 }, { "epoch": 0.7016570293337476, "grad_norm": 1.7780139446258545, "learning_rate": 4.079915047075786e-06, "loss": 0.5441, "step": 31610 }, { "epoch": 0.7017680158932753, "grad_norm": 1.035780906677246, "learning_rate": 4.077105296928146e-06, "loss": 0.4645, "step": 31615 }, { "epoch": 0.701879002452803, "grad_norm": 1.972312092781067, "learning_rate": 4.0742962668661826e-06, "loss": 0.5715, "step": 31620 }, { "epoch": 0.7019899890123306, "grad_norm": 1.128519892692566, "learning_rate": 4.071487957231403e-06, "loss": 0.4997, "step": 31625 }, { "epoch": 0.7021009755718582, "grad_norm": 1.760668396949768, "learning_rate": 4.068680368365234e-06, "loss": 0.3383, "step": 31630 }, { "epoch": 0.7022119621313859, "grad_norm": 1.2317535877227783, "learning_rate": 4.065873500609018e-06, "loss": 0.5581, "step": 31635 }, { "epoch": 0.7023229486909135, "grad_norm": 1.2062588930130005, "learning_rate": 4.063067354303997e-06, "loss": 0.2875, "step": 31640 }, { "epoch": 0.7024339352504412, "grad_norm": 1.362601637840271, "learning_rate": 4.060261929791338e-06, "loss": 0.4041, "step": 31645 }, { "epoch": 0.7025449218099689, "grad_norm": 0.7741042971611023, "learning_rate": 4.057457227412112e-06, "loss": 0.2976, "step": 31650 }, { "epoch": 0.7026559083694964, "grad_norm": 1.195422887802124, "learning_rate": 4.054653247507304e-06, "loss": 0.4414, "step": 31655 }, { "epoch": 0.7027668949290241, "grad_norm": 1.0758905410766602, "learning_rate": 4.0518499904178195e-06, "loss": 0.4778, "step": 31660 }, { "epoch": 0.7028778814885517, "grad_norm": 0.9068155288696289, "learning_rate": 4.049047456484463e-06, "loss": 0.3964, "step": 31665 }, { "epoch": 0.7029888680480794, "grad_norm": 1.274548888206482, "learning_rate": 4.046245646047961e-06, "loss": 0.5064, "step": 31670 }, { "epoch": 0.703099854607607, "grad_norm": 1.3194080591201782, "learning_rate": 4.0434445594489415e-06, "loss": 0.5299, "step": 31675 }, { "epoch": 0.7032108411671346, "grad_norm": 0.789570152759552, "learning_rate": 4.04064419702796e-06, "loss": 0.2611, "step": 31680 }, { "epoch": 0.7033218277266623, "grad_norm": 0.9924457669258118, "learning_rate": 4.037844559125468e-06, "loss": 0.4622, "step": 31685 }, { "epoch": 0.70343281428619, "grad_norm": 1.1734859943389893, "learning_rate": 4.035045646081838e-06, "loss": 0.4851, "step": 31690 }, { "epoch": 0.7035438008457175, "grad_norm": 1.1078006029129028, "learning_rate": 4.032247458237357e-06, "loss": 0.2741, "step": 31695 }, { "epoch": 0.7036547874052452, "grad_norm": 0.924501895904541, "learning_rate": 4.029449995932213e-06, "loss": 0.3489, "step": 31700 }, { "epoch": 0.7037657739647729, "grad_norm": 1.0132527351379395, "learning_rate": 4.026653259506518e-06, "loss": 0.2577, "step": 31705 }, { "epoch": 0.7038767605243005, "grad_norm": 0.8732908368110657, "learning_rate": 4.023857249300283e-06, "loss": 0.3736, "step": 31710 }, { "epoch": 0.7039877470838282, "grad_norm": 0.8892420530319214, "learning_rate": 4.021061965653441e-06, "loss": 0.4148, "step": 31715 }, { "epoch": 0.7040987336433557, "grad_norm": 0.8567600250244141, "learning_rate": 4.018267408905838e-06, "loss": 0.4137, "step": 31720 }, { "epoch": 0.7042097202028834, "grad_norm": 1.7506471872329712, "learning_rate": 4.015473579397218e-06, "loss": 0.4998, "step": 31725 }, { "epoch": 0.7043207067624111, "grad_norm": 1.1711151599884033, "learning_rate": 4.012680477467254e-06, "loss": 0.5172, "step": 31730 }, { "epoch": 0.7044316933219387, "grad_norm": 0.9681052565574646, "learning_rate": 4.009888103455512e-06, "loss": 0.4736, "step": 31735 }, { "epoch": 0.7045426798814663, "grad_norm": 0.9805378317832947, "learning_rate": 4.007096457701487e-06, "loss": 0.4674, "step": 31740 }, { "epoch": 0.704653666440994, "grad_norm": 1.5319339036941528, "learning_rate": 4.004305540544579e-06, "loss": 0.3196, "step": 31745 }, { "epoch": 0.7047646530005216, "grad_norm": 1.4754489660263062, "learning_rate": 4.001515352324091e-06, "loss": 0.5432, "step": 31750 }, { "epoch": 0.7048756395600493, "grad_norm": 1.703891396522522, "learning_rate": 3.998725893379254e-06, "loss": 0.4281, "step": 31755 }, { "epoch": 0.704986626119577, "grad_norm": 0.9801030158996582, "learning_rate": 3.995937164049192e-06, "loss": 0.3976, "step": 31760 }, { "epoch": 0.7050976126791045, "grad_norm": 0.8765810132026672, "learning_rate": 3.993149164672957e-06, "loss": 0.4155, "step": 31765 }, { "epoch": 0.7052085992386322, "grad_norm": 1.5806583166122437, "learning_rate": 3.990361895589499e-06, "loss": 0.4737, "step": 31770 }, { "epoch": 0.7053195857981599, "grad_norm": 0.9778581857681274, "learning_rate": 3.987575357137685e-06, "loss": 0.251, "step": 31775 }, { "epoch": 0.7054305723576875, "grad_norm": 0.9157988429069519, "learning_rate": 3.984789549656299e-06, "loss": 0.4607, "step": 31780 }, { "epoch": 0.7055415589172152, "grad_norm": 0.6539163589477539, "learning_rate": 3.982004473484022e-06, "loss": 0.3835, "step": 31785 }, { "epoch": 0.7056525454767427, "grad_norm": 1.8265697956085205, "learning_rate": 3.979220128959463e-06, "loss": 0.4101, "step": 31790 }, { "epoch": 0.7057635320362704, "grad_norm": 1.7749618291854858, "learning_rate": 3.976436516421125e-06, "loss": 0.4697, "step": 31795 }, { "epoch": 0.7058745185957981, "grad_norm": 1.127279281616211, "learning_rate": 3.973653636207437e-06, "loss": 0.4854, "step": 31800 }, { "epoch": 0.7059855051553257, "grad_norm": 1.56232750415802, "learning_rate": 3.970871488656727e-06, "loss": 0.3458, "step": 31805 }, { "epoch": 0.7060964917148533, "grad_norm": 1.2407728433609009, "learning_rate": 3.968090074107242e-06, "loss": 0.359, "step": 31810 }, { "epoch": 0.706207478274381, "grad_norm": 0.9445065259933472, "learning_rate": 3.965309392897135e-06, "loss": 0.3034, "step": 31815 }, { "epoch": 0.7063184648339086, "grad_norm": 0.8194674849510193, "learning_rate": 3.9625294453644755e-06, "loss": 0.2554, "step": 31820 }, { "epoch": 0.7064294513934363, "grad_norm": 1.5787767171859741, "learning_rate": 3.9597502318472356e-06, "loss": 0.4429, "step": 31825 }, { "epoch": 0.706540437952964, "grad_norm": 1.8735803365707397, "learning_rate": 3.956971752683309e-06, "loss": 0.4748, "step": 31830 }, { "epoch": 0.7066514245124915, "grad_norm": 1.4561665058135986, "learning_rate": 3.954194008210485e-06, "loss": 0.3635, "step": 31835 }, { "epoch": 0.7067624110720192, "grad_norm": 1.3835692405700684, "learning_rate": 3.951416998766481e-06, "loss": 0.4811, "step": 31840 }, { "epoch": 0.7068733976315468, "grad_norm": 1.6009646654129028, "learning_rate": 3.94864072468891e-06, "loss": 0.216, "step": 31845 }, { "epoch": 0.7069843841910745, "grad_norm": 1.3886228799819946, "learning_rate": 3.945865186315308e-06, "loss": 0.193, "step": 31850 }, { "epoch": 0.7070953707506021, "grad_norm": 1.8874969482421875, "learning_rate": 3.9430903839831104e-06, "loss": 0.4452, "step": 31855 }, { "epoch": 0.7072063573101297, "grad_norm": 1.0513811111450195, "learning_rate": 3.9403163180296685e-06, "loss": 0.474, "step": 31860 }, { "epoch": 0.7073173438696574, "grad_norm": 0.8507716655731201, "learning_rate": 3.937542988792251e-06, "loss": 0.2627, "step": 31865 }, { "epoch": 0.7074283304291851, "grad_norm": 0.38229304552078247, "learning_rate": 3.934770396608022e-06, "loss": 0.3952, "step": 31870 }, { "epoch": 0.7075393169887126, "grad_norm": 1.3319016695022583, "learning_rate": 3.931998541814069e-06, "loss": 0.4774, "step": 31875 }, { "epoch": 0.7076503035482403, "grad_norm": 1.4282726049423218, "learning_rate": 3.92922742474738e-06, "loss": 0.5038, "step": 31880 }, { "epoch": 0.707761290107768, "grad_norm": 1.7863751649856567, "learning_rate": 3.926457045744862e-06, "loss": 0.4292, "step": 31885 }, { "epoch": 0.7078722766672956, "grad_norm": 0.9300488829612732, "learning_rate": 3.923687405143329e-06, "loss": 0.5032, "step": 31890 }, { "epoch": 0.7079832632268233, "grad_norm": 1.699332594871521, "learning_rate": 3.9209185032795004e-06, "loss": 0.4435, "step": 31895 }, { "epoch": 0.7080942497863508, "grad_norm": 1.6415492296218872, "learning_rate": 3.918150340490015e-06, "loss": 0.4167, "step": 31900 }, { "epoch": 0.7082052363458785, "grad_norm": 0.8185513615608215, "learning_rate": 3.915382917111412e-06, "loss": 0.3837, "step": 31905 }, { "epoch": 0.7083162229054062, "grad_norm": 1.4475336074829102, "learning_rate": 3.912616233480148e-06, "loss": 0.2266, "step": 31910 }, { "epoch": 0.7084272094649338, "grad_norm": 1.804500937461853, "learning_rate": 3.909850289932589e-06, "loss": 0.4735, "step": 31915 }, { "epoch": 0.7085381960244614, "grad_norm": 1.0260645151138306, "learning_rate": 3.907085086805005e-06, "loss": 0.4578, "step": 31920 }, { "epoch": 0.7086491825839891, "grad_norm": 1.2490384578704834, "learning_rate": 3.904320624433584e-06, "loss": 0.3645, "step": 31925 }, { "epoch": 0.7087601691435167, "grad_norm": 1.592247486114502, "learning_rate": 3.901556903154415e-06, "loss": 0.3791, "step": 31930 }, { "epoch": 0.7088711557030444, "grad_norm": 1.0241756439208984, "learning_rate": 3.89879392330351e-06, "loss": 0.423, "step": 31935 }, { "epoch": 0.7089821422625721, "grad_norm": 1.805743932723999, "learning_rate": 3.896031685216774e-06, "loss": 0.5112, "step": 31940 }, { "epoch": 0.7090931288220996, "grad_norm": 1.302553653717041, "learning_rate": 3.893270189230033e-06, "loss": 0.4258, "step": 31945 }, { "epoch": 0.7092041153816273, "grad_norm": 1.4034984111785889, "learning_rate": 3.890509435679026e-06, "loss": 0.5204, "step": 31950 }, { "epoch": 0.7093151019411549, "grad_norm": 1.0881425142288208, "learning_rate": 3.8877494248993895e-06, "loss": 0.4758, "step": 31955 }, { "epoch": 0.7094260885006826, "grad_norm": 1.1435565948486328, "learning_rate": 3.884990157226683e-06, "loss": 0.312, "step": 31960 }, { "epoch": 0.7095370750602102, "grad_norm": 1.5867775678634644, "learning_rate": 3.882231632996361e-06, "loss": 0.4257, "step": 31965 }, { "epoch": 0.7096480616197378, "grad_norm": 1.1169955730438232, "learning_rate": 3.879473852543799e-06, "loss": 0.5656, "step": 31970 }, { "epoch": 0.7097590481792655, "grad_norm": 1.6792324781417847, "learning_rate": 3.876716816204284e-06, "loss": 0.4188, "step": 31975 }, { "epoch": 0.7098700347387932, "grad_norm": 1.1958585977554321, "learning_rate": 3.873960524312997e-06, "loss": 0.3275, "step": 31980 }, { "epoch": 0.7099810212983207, "grad_norm": 0.9273353219032288, "learning_rate": 3.87120497720505e-06, "loss": 0.2232, "step": 31985 }, { "epoch": 0.7100920078578484, "grad_norm": 1.430031180381775, "learning_rate": 3.8684501752154425e-06, "loss": 0.4956, "step": 31990 }, { "epoch": 0.7102029944173761, "grad_norm": 1.3313651084899902, "learning_rate": 3.8656961186791e-06, "loss": 0.5053, "step": 31995 }, { "epoch": 0.7103139809769037, "grad_norm": 1.2391557693481445, "learning_rate": 3.862942807930854e-06, "loss": 0.5513, "step": 32000 }, { "epoch": 0.7104249675364314, "grad_norm": 1.1085586547851562, "learning_rate": 3.860190243305435e-06, "loss": 0.516, "step": 32005 }, { "epoch": 0.7105359540959589, "grad_norm": 1.0961024761199951, "learning_rate": 3.857438425137499e-06, "loss": 0.419, "step": 32010 }, { "epoch": 0.7106469406554866, "grad_norm": 1.7764806747436523, "learning_rate": 3.854687353761596e-06, "loss": 0.3715, "step": 32015 }, { "epoch": 0.7107579272150143, "grad_norm": 0.960142970085144, "learning_rate": 3.851937029512197e-06, "loss": 0.4334, "step": 32020 }, { "epoch": 0.7108689137745419, "grad_norm": 1.5467451810836792, "learning_rate": 3.849187452723672e-06, "loss": 0.3228, "step": 32025 }, { "epoch": 0.7109799003340695, "grad_norm": 0.7945905327796936, "learning_rate": 3.846438623730309e-06, "loss": 0.3627, "step": 32030 }, { "epoch": 0.7110908868935972, "grad_norm": 1.7557940483093262, "learning_rate": 3.843690542866303e-06, "loss": 0.3534, "step": 32035 }, { "epoch": 0.7112018734531248, "grad_norm": 0.9791882634162903, "learning_rate": 3.840943210465751e-06, "loss": 0.3775, "step": 32040 }, { "epoch": 0.7113128600126525, "grad_norm": 1.2559431791305542, "learning_rate": 3.83819662686267e-06, "loss": 0.3808, "step": 32045 }, { "epoch": 0.7114238465721802, "grad_norm": 1.400604248046875, "learning_rate": 3.835450792390977e-06, "loss": 0.3739, "step": 32050 }, { "epoch": 0.7115348331317077, "grad_norm": 1.0400586128234863, "learning_rate": 3.832705707384504e-06, "loss": 0.3604, "step": 32055 }, { "epoch": 0.7116458196912354, "grad_norm": 1.6382838487625122, "learning_rate": 3.829961372176985e-06, "loss": 0.2549, "step": 32060 }, { "epoch": 0.711756806250763, "grad_norm": 1.6023155450820923, "learning_rate": 3.827217787102072e-06, "loss": 0.3423, "step": 32065 }, { "epoch": 0.7118677928102907, "grad_norm": 4.825948238372803, "learning_rate": 3.8244749524933155e-06, "loss": 0.3798, "step": 32070 }, { "epoch": 0.7119787793698183, "grad_norm": 1.3694977760314941, "learning_rate": 3.821732868684187e-06, "loss": 0.4026, "step": 32075 }, { "epoch": 0.7120897659293459, "grad_norm": 1.321022629737854, "learning_rate": 3.8189915360080536e-06, "loss": 0.4889, "step": 32080 }, { "epoch": 0.7122007524888736, "grad_norm": 1.2078578472137451, "learning_rate": 3.8162509547982015e-06, "loss": 0.4411, "step": 32085 }, { "epoch": 0.7123117390484013, "grad_norm": 1.4181798696517944, "learning_rate": 3.8135111253878166e-06, "loss": 0.4319, "step": 32090 }, { "epoch": 0.7124227256079289, "grad_norm": 1.0716606378555298, "learning_rate": 3.8107720481100053e-06, "loss": 0.3785, "step": 32095 }, { "epoch": 0.7125337121674565, "grad_norm": 2.094991445541382, "learning_rate": 3.808033723297767e-06, "loss": 0.53, "step": 32100 }, { "epoch": 0.7126446987269842, "grad_norm": 1.6106618642807007, "learning_rate": 3.805296151284027e-06, "loss": 0.4087, "step": 32105 }, { "epoch": 0.7127556852865118, "grad_norm": 1.186604619026184, "learning_rate": 3.802559332401601e-06, "loss": 0.5331, "step": 32110 }, { "epoch": 0.7128666718460395, "grad_norm": 0.5464258193969727, "learning_rate": 3.799823266983227e-06, "loss": 0.3207, "step": 32115 }, { "epoch": 0.712977658405567, "grad_norm": 1.6606342792510986, "learning_rate": 3.79708795536155e-06, "loss": 0.4592, "step": 32120 }, { "epoch": 0.7130886449650947, "grad_norm": 1.799006462097168, "learning_rate": 3.794353397869113e-06, "loss": 0.2939, "step": 32125 }, { "epoch": 0.7131996315246224, "grad_norm": 0.9671017527580261, "learning_rate": 3.7916195948383817e-06, "loss": 0.2851, "step": 32130 }, { "epoch": 0.71331061808415, "grad_norm": 1.8571856021881104, "learning_rate": 3.7888865466017144e-06, "loss": 0.3613, "step": 32135 }, { "epoch": 0.7134216046436777, "grad_norm": 0.34334808588027954, "learning_rate": 3.7861542534913907e-06, "loss": 0.2196, "step": 32140 }, { "epoch": 0.7135325912032053, "grad_norm": 0.8765976428985596, "learning_rate": 3.7834227158395964e-06, "loss": 0.4297, "step": 32145 }, { "epoch": 0.7136435777627329, "grad_norm": 1.156449317932129, "learning_rate": 3.7806919339784166e-06, "loss": 0.3554, "step": 32150 }, { "epoch": 0.7137545643222606, "grad_norm": 0.9955435395240784, "learning_rate": 3.777961908239857e-06, "loss": 0.6089, "step": 32155 }, { "epoch": 0.7138655508817883, "grad_norm": 2.179382801055908, "learning_rate": 3.775232638955818e-06, "loss": 0.2496, "step": 32160 }, { "epoch": 0.7139765374413158, "grad_norm": 0.9458391070365906, "learning_rate": 3.7725041264581184e-06, "loss": 0.3709, "step": 32165 }, { "epoch": 0.7140875240008435, "grad_norm": 1.672772765159607, "learning_rate": 3.769776371078485e-06, "loss": 0.3334, "step": 32170 }, { "epoch": 0.7141985105603711, "grad_norm": 1.3009119033813477, "learning_rate": 3.7670493731485424e-06, "loss": 0.393, "step": 32175 }, { "epoch": 0.7143094971198988, "grad_norm": 1.1832098960876465, "learning_rate": 3.7643231329998366e-06, "loss": 0.1881, "step": 32180 }, { "epoch": 0.7144204836794265, "grad_norm": 0.9928916692733765, "learning_rate": 3.7615976509638086e-06, "loss": 0.4001, "step": 32185 }, { "epoch": 0.714531470238954, "grad_norm": 0.813753604888916, "learning_rate": 3.7588729273718194e-06, "loss": 0.3298, "step": 32190 }, { "epoch": 0.7146424567984817, "grad_norm": 0.9435087442398071, "learning_rate": 3.756148962555125e-06, "loss": 0.4893, "step": 32195 }, { "epoch": 0.7147534433580094, "grad_norm": 1.2980031967163086, "learning_rate": 3.7534257568448995e-06, "loss": 0.4273, "step": 32200 }, { "epoch": 0.714864429917537, "grad_norm": 1.0607577562332153, "learning_rate": 3.7507033105722244e-06, "loss": 0.3802, "step": 32205 }, { "epoch": 0.7149754164770646, "grad_norm": 1.1984096765518188, "learning_rate": 3.7479816240680788e-06, "loss": 0.4617, "step": 32210 }, { "epoch": 0.7150864030365923, "grad_norm": 1.4583815336227417, "learning_rate": 3.7452606976633644e-06, "loss": 0.3474, "step": 32215 }, { "epoch": 0.7151973895961199, "grad_norm": 1.0872869491577148, "learning_rate": 3.742540531688873e-06, "loss": 0.4781, "step": 32220 }, { "epoch": 0.7153083761556476, "grad_norm": 0.5375029444694519, "learning_rate": 3.739821126475318e-06, "loss": 0.4053, "step": 32225 }, { "epoch": 0.7154193627151751, "grad_norm": 0.8395071625709534, "learning_rate": 3.7371024823533187e-06, "loss": 0.4364, "step": 32230 }, { "epoch": 0.7155303492747028, "grad_norm": 0.849446713924408, "learning_rate": 3.7343845996533922e-06, "loss": 0.4214, "step": 32235 }, { "epoch": 0.7156413358342305, "grad_norm": 1.2325609922409058, "learning_rate": 3.731667478705976e-06, "loss": 0.5089, "step": 32240 }, { "epoch": 0.7157523223937581, "grad_norm": 1.1667782068252563, "learning_rate": 3.728951119841403e-06, "loss": 0.626, "step": 32245 }, { "epoch": 0.7158633089532858, "grad_norm": 1.1611791849136353, "learning_rate": 3.7262355233899204e-06, "loss": 0.4001, "step": 32250 }, { "epoch": 0.7159742955128134, "grad_norm": 1.2110391855239868, "learning_rate": 3.7235206896816858e-06, "loss": 0.5051, "step": 32255 }, { "epoch": 0.716085282072341, "grad_norm": 1.197821021080017, "learning_rate": 3.720806619046753e-06, "loss": 0.3824, "step": 32260 }, { "epoch": 0.7161962686318687, "grad_norm": 1.2906665802001953, "learning_rate": 3.718093311815095e-06, "loss": 0.4665, "step": 32265 }, { "epoch": 0.7163072551913964, "grad_norm": 1.2199299335479736, "learning_rate": 3.715380768316582e-06, "loss": 0.3052, "step": 32270 }, { "epoch": 0.716418241750924, "grad_norm": 1.3464202880859375, "learning_rate": 3.7126689888810017e-06, "loss": 0.463, "step": 32275 }, { "epoch": 0.7165292283104516, "grad_norm": 1.0648338794708252, "learning_rate": 3.7099579738380366e-06, "loss": 0.3672, "step": 32280 }, { "epoch": 0.7166402148699792, "grad_norm": 1.0626704692840576, "learning_rate": 3.7072477235172875e-06, "loss": 0.3367, "step": 32285 }, { "epoch": 0.7167512014295069, "grad_norm": 1.1101748943328857, "learning_rate": 3.704538238248254e-06, "loss": 0.3515, "step": 32290 }, { "epoch": 0.7168621879890346, "grad_norm": 1.251792311668396, "learning_rate": 3.7018295183603515e-06, "loss": 0.474, "step": 32295 }, { "epoch": 0.7169731745485621, "grad_norm": 1.3575869798660278, "learning_rate": 3.6991215641828903e-06, "loss": 0.3501, "step": 32300 }, { "epoch": 0.7170841611080898, "grad_norm": 1.2677310705184937, "learning_rate": 3.696414376045101e-06, "loss": 0.4466, "step": 32305 }, { "epoch": 0.7171951476676175, "grad_norm": 1.0338740348815918, "learning_rate": 3.693707954276108e-06, "loss": 0.3098, "step": 32310 }, { "epoch": 0.7173061342271451, "grad_norm": 2.0934510231018066, "learning_rate": 3.6910022992049556e-06, "loss": 0.3614, "step": 32315 }, { "epoch": 0.7174171207866727, "grad_norm": 1.3029704093933105, "learning_rate": 3.688297411160581e-06, "loss": 0.4913, "step": 32320 }, { "epoch": 0.7175281073462004, "grad_norm": 1.1890994310379028, "learning_rate": 3.6855932904718426e-06, "loss": 0.3007, "step": 32325 }, { "epoch": 0.717639093905728, "grad_norm": 1.3361420631408691, "learning_rate": 3.6828899374674933e-06, "loss": 0.3256, "step": 32330 }, { "epoch": 0.7177500804652557, "grad_norm": 1.8069764375686646, "learning_rate": 3.680187352476198e-06, "loss": 0.3186, "step": 32335 }, { "epoch": 0.7178610670247833, "grad_norm": 1.300967812538147, "learning_rate": 3.6774855358265327e-06, "loss": 0.3302, "step": 32340 }, { "epoch": 0.7179720535843109, "grad_norm": 1.4119071960449219, "learning_rate": 3.6747844878469695e-06, "loss": 0.4967, "step": 32345 }, { "epoch": 0.7180830401438386, "grad_norm": 1.0396215915679932, "learning_rate": 3.672084208865898e-06, "loss": 0.5122, "step": 32350 }, { "epoch": 0.7181940267033662, "grad_norm": 1.1072118282318115, "learning_rate": 3.6693846992116024e-06, "loss": 0.3959, "step": 32355 }, { "epoch": 0.7183050132628939, "grad_norm": 1.172977089881897, "learning_rate": 3.6666859592122885e-06, "loss": 0.2945, "step": 32360 }, { "epoch": 0.7184159998224215, "grad_norm": 1.402540683746338, "learning_rate": 3.663987989196051e-06, "loss": 0.4883, "step": 32365 }, { "epoch": 0.7185269863819491, "grad_norm": 0.952644944190979, "learning_rate": 3.6612907894909042e-06, "loss": 0.2974, "step": 32370 }, { "epoch": 0.7186379729414768, "grad_norm": 1.0303863286972046, "learning_rate": 3.6585943604247687e-06, "loss": 0.4951, "step": 32375 }, { "epoch": 0.7187489595010045, "grad_norm": 0.9419283270835876, "learning_rate": 3.65589870232546e-06, "loss": 0.3822, "step": 32380 }, { "epoch": 0.718859946060532, "grad_norm": 1.5716652870178223, "learning_rate": 3.653203815520714e-06, "loss": 0.5331, "step": 32385 }, { "epoch": 0.7189709326200597, "grad_norm": 1.2303674221038818, "learning_rate": 3.6505097003381585e-06, "loss": 0.4361, "step": 32390 }, { "epoch": 0.7190819191795873, "grad_norm": 2.1736645698547363, "learning_rate": 3.6478163571053404e-06, "loss": 0.4631, "step": 32395 }, { "epoch": 0.719192905739115, "grad_norm": 1.194144368171692, "learning_rate": 3.645123786149708e-06, "loss": 0.5266, "step": 32400 }, { "epoch": 0.7193038922986427, "grad_norm": 1.0125007629394531, "learning_rate": 3.642431987798611e-06, "loss": 0.4056, "step": 32405 }, { "epoch": 0.7194148788581702, "grad_norm": 1.3125933408737183, "learning_rate": 3.6397409623793147e-06, "loss": 0.3606, "step": 32410 }, { "epoch": 0.7195258654176979, "grad_norm": 1.499477744102478, "learning_rate": 3.6370507102189767e-06, "loss": 0.4938, "step": 32415 }, { "epoch": 0.7196368519772256, "grad_norm": 1.4758806228637695, "learning_rate": 3.634361231644675e-06, "loss": 0.4187, "step": 32420 }, { "epoch": 0.7197478385367532, "grad_norm": 1.444891095161438, "learning_rate": 3.6316725269833887e-06, "loss": 0.4928, "step": 32425 }, { "epoch": 0.7198588250962809, "grad_norm": 1.2352997064590454, "learning_rate": 3.628984596561996e-06, "loss": 0.387, "step": 32430 }, { "epoch": 0.7199698116558085, "grad_norm": 1.1362451314926147, "learning_rate": 3.6262974407072928e-06, "loss": 0.3679, "step": 32435 }, { "epoch": 0.7200807982153361, "grad_norm": 1.1246973276138306, "learning_rate": 3.6236110597459674e-06, "loss": 0.5203, "step": 32440 }, { "epoch": 0.7201917847748638, "grad_norm": 0.9550732970237732, "learning_rate": 3.620925454004628e-06, "loss": 0.5526, "step": 32445 }, { "epoch": 0.7203027713343914, "grad_norm": 1.293099045753479, "learning_rate": 3.6182406238097745e-06, "loss": 0.5008, "step": 32450 }, { "epoch": 0.720413757893919, "grad_norm": 1.2851907014846802, "learning_rate": 3.6155565694878237e-06, "loss": 0.4955, "step": 32455 }, { "epoch": 0.7205247444534467, "grad_norm": 1.3594646453857422, "learning_rate": 3.6128732913650966e-06, "loss": 0.4474, "step": 32460 }, { "epoch": 0.7206357310129743, "grad_norm": 1.6697702407836914, "learning_rate": 3.61019078976781e-06, "loss": 0.3925, "step": 32465 }, { "epoch": 0.720746717572502, "grad_norm": 1.8893567323684692, "learning_rate": 3.607509065022101e-06, "loss": 0.4252, "step": 32470 }, { "epoch": 0.7208577041320297, "grad_norm": 1.0012366771697998, "learning_rate": 3.604828117453999e-06, "loss": 0.4189, "step": 32475 }, { "epoch": 0.7209686906915572, "grad_norm": 1.221556544303894, "learning_rate": 3.602147947389446e-06, "loss": 0.4251, "step": 32480 }, { "epoch": 0.7210796772510849, "grad_norm": 1.0607415437698364, "learning_rate": 3.5994685551542917e-06, "loss": 0.5034, "step": 32485 }, { "epoch": 0.7211906638106126, "grad_norm": 0.768079936504364, "learning_rate": 3.5967899410742812e-06, "loss": 0.3316, "step": 32490 }, { "epoch": 0.7213016503701402, "grad_norm": 0.9735240340232849, "learning_rate": 3.5941121054750794e-06, "loss": 0.4515, "step": 32495 }, { "epoch": 0.7214126369296678, "grad_norm": 1.3301113843917847, "learning_rate": 3.5914350486822403e-06, "loss": 0.2459, "step": 32500 }, { "epoch": 0.7215236234891954, "grad_norm": 1.4246984720230103, "learning_rate": 3.5887587710212346e-06, "loss": 0.3347, "step": 32505 }, { "epoch": 0.7216346100487231, "grad_norm": 0.778403103351593, "learning_rate": 3.58608327281744e-06, "loss": 0.3325, "step": 32510 }, { "epoch": 0.7217455966082508, "grad_norm": 1.7449480295181274, "learning_rate": 3.5834085543961274e-06, "loss": 0.5193, "step": 32515 }, { "epoch": 0.7218565831677783, "grad_norm": 1.47147798538208, "learning_rate": 3.5807346160824863e-06, "loss": 0.3736, "step": 32520 }, { "epoch": 0.721967569727306, "grad_norm": 1.3669594526290894, "learning_rate": 3.5780614582015983e-06, "loss": 0.4206, "step": 32525 }, { "epoch": 0.7220785562868337, "grad_norm": 1.250982403755188, "learning_rate": 3.5753890810784643e-06, "loss": 0.3494, "step": 32530 }, { "epoch": 0.7221895428463613, "grad_norm": 2.3692402839660645, "learning_rate": 3.5727174850379766e-06, "loss": 0.3719, "step": 32535 }, { "epoch": 0.722300529405889, "grad_norm": 0.8750824928283691, "learning_rate": 3.5700466704049442e-06, "loss": 0.4329, "step": 32540 }, { "epoch": 0.7224115159654166, "grad_norm": 1.0827548503875732, "learning_rate": 3.5673766375040695e-06, "loss": 0.3744, "step": 32545 }, { "epoch": 0.7225225025249442, "grad_norm": 1.5364011526107788, "learning_rate": 3.5647073866599736e-06, "loss": 0.4377, "step": 32550 }, { "epoch": 0.7226334890844719, "grad_norm": 2.185579776763916, "learning_rate": 3.562038918197168e-06, "loss": 0.3987, "step": 32555 }, { "epoch": 0.7227444756439995, "grad_norm": 1.3342119455337524, "learning_rate": 3.559371232440083e-06, "loss": 0.4989, "step": 32560 }, { "epoch": 0.7228554622035271, "grad_norm": 1.243114948272705, "learning_rate": 3.55670432971304e-06, "loss": 0.2273, "step": 32565 }, { "epoch": 0.7229664487630548, "grad_norm": 1.3207345008850098, "learning_rate": 3.5540382103402795e-06, "loss": 0.5867, "step": 32570 }, { "epoch": 0.7230774353225824, "grad_norm": 1.442366600036621, "learning_rate": 3.551372874645931e-06, "loss": 0.3574, "step": 32575 }, { "epoch": 0.7231884218821101, "grad_norm": 1.133528232574463, "learning_rate": 3.5487083229540453e-06, "loss": 0.3784, "step": 32580 }, { "epoch": 0.7232994084416378, "grad_norm": 0.8515037894248962, "learning_rate": 3.5460445555885612e-06, "loss": 0.3293, "step": 32585 }, { "epoch": 0.7234103950011653, "grad_norm": 0.9453320503234863, "learning_rate": 3.5433815728733366e-06, "loss": 0.3655, "step": 32590 }, { "epoch": 0.723521381560693, "grad_norm": 1.4416009187698364, "learning_rate": 3.540719375132129e-06, "loss": 0.4351, "step": 32595 }, { "epoch": 0.7236323681202207, "grad_norm": 0.6182461977005005, "learning_rate": 3.538057962688595e-06, "loss": 0.3649, "step": 32600 }, { "epoch": 0.7237433546797483, "grad_norm": 0.5999155640602112, "learning_rate": 3.535397335866304e-06, "loss": 0.307, "step": 32605 }, { "epoch": 0.723854341239276, "grad_norm": 0.9559226036071777, "learning_rate": 3.5327374949887216e-06, "loss": 0.45, "step": 32610 }, { "epoch": 0.7239653277988035, "grad_norm": 1.6150270700454712, "learning_rate": 3.5300784403792256e-06, "loss": 0.4936, "step": 32615 }, { "epoch": 0.7240763143583312, "grad_norm": 1.1039475202560425, "learning_rate": 3.5274201723610967e-06, "loss": 0.3893, "step": 32620 }, { "epoch": 0.7241873009178589, "grad_norm": 1.903681993484497, "learning_rate": 3.524762691257513e-06, "loss": 0.5153, "step": 32625 }, { "epoch": 0.7242982874773864, "grad_norm": 0.9269314408302307, "learning_rate": 3.5221059973915683e-06, "loss": 0.5013, "step": 32630 }, { "epoch": 0.7244092740369141, "grad_norm": 1.893462896347046, "learning_rate": 3.5194500910862485e-06, "loss": 0.3573, "step": 32635 }, { "epoch": 0.7245202605964418, "grad_norm": 1.3693534135818481, "learning_rate": 3.5167949726644545e-06, "loss": 0.4122, "step": 32640 }, { "epoch": 0.7246312471559694, "grad_norm": 1.0553785562515259, "learning_rate": 3.5141406424489823e-06, "loss": 0.3246, "step": 32645 }, { "epoch": 0.7247422337154971, "grad_norm": 0.5601930618286133, "learning_rate": 3.5114871007625397e-06, "loss": 0.1944, "step": 32650 }, { "epoch": 0.7248532202750247, "grad_norm": 0.5924381613731384, "learning_rate": 3.5088343479277365e-06, "loss": 0.4126, "step": 32655 }, { "epoch": 0.7249642068345523, "grad_norm": 1.915968418121338, "learning_rate": 3.506182384267082e-06, "loss": 0.411, "step": 32660 }, { "epoch": 0.72507519339408, "grad_norm": 1.2195994853973389, "learning_rate": 3.503531210102996e-06, "loss": 0.4658, "step": 32665 }, { "epoch": 0.7251861799536076, "grad_norm": 1.0181869268417358, "learning_rate": 3.5008808257577955e-06, "loss": 0.4259, "step": 32670 }, { "epoch": 0.7252971665131352, "grad_norm": 1.091694712638855, "learning_rate": 3.498231231553708e-06, "loss": 0.5182, "step": 32675 }, { "epoch": 0.7254081530726629, "grad_norm": 1.29982590675354, "learning_rate": 3.4955824278128657e-06, "loss": 0.4597, "step": 32680 }, { "epoch": 0.7255191396321905, "grad_norm": 1.1753360033035278, "learning_rate": 3.492934414857294e-06, "loss": 0.3575, "step": 32685 }, { "epoch": 0.7256301261917182, "grad_norm": 1.8706086874008179, "learning_rate": 3.4902871930089365e-06, "loss": 0.5171, "step": 32690 }, { "epoch": 0.7257411127512459, "grad_norm": 0.8465336561203003, "learning_rate": 3.487640762589627e-06, "loss": 0.3946, "step": 32695 }, { "epoch": 0.7258520993107734, "grad_norm": 1.5317870378494263, "learning_rate": 3.484995123921112e-06, "loss": 0.4923, "step": 32700 }, { "epoch": 0.7259630858703011, "grad_norm": 1.4120965003967285, "learning_rate": 3.482350277325045e-06, "loss": 0.472, "step": 32705 }, { "epoch": 0.7260740724298288, "grad_norm": 1.5469073057174683, "learning_rate": 3.479706223122968e-06, "loss": 0.5002, "step": 32710 }, { "epoch": 0.7261850589893564, "grad_norm": 1.0146976709365845, "learning_rate": 3.477062961636346e-06, "loss": 0.3272, "step": 32715 }, { "epoch": 0.726296045548884, "grad_norm": 1.6725594997406006, "learning_rate": 3.474420493186528e-06, "loss": 0.278, "step": 32720 }, { "epoch": 0.7264070321084116, "grad_norm": 1.9448822736740112, "learning_rate": 3.4717788180947855e-06, "loss": 0.2973, "step": 32725 }, { "epoch": 0.7265180186679393, "grad_norm": 1.0481772422790527, "learning_rate": 3.4691379366822765e-06, "loss": 0.2087, "step": 32730 }, { "epoch": 0.726629005227467, "grad_norm": 1.0238654613494873, "learning_rate": 3.466497849270075e-06, "loss": 0.4737, "step": 32735 }, { "epoch": 0.7267399917869946, "grad_norm": 1.1857450008392334, "learning_rate": 3.463858556179156e-06, "loss": 0.3494, "step": 32740 }, { "epoch": 0.7268509783465222, "grad_norm": 1.1542832851409912, "learning_rate": 3.46122005773039e-06, "loss": 0.3563, "step": 32745 }, { "epoch": 0.7269619649060499, "grad_norm": 0.7841120362281799, "learning_rate": 3.458582354244564e-06, "loss": 0.2948, "step": 32750 }, { "epoch": 0.7270729514655775, "grad_norm": 1.2427359819412231, "learning_rate": 3.4559454460423535e-06, "loss": 0.2064, "step": 32755 }, { "epoch": 0.7271839380251052, "grad_norm": 2.157987594604492, "learning_rate": 3.453309333444349e-06, "loss": 0.3572, "step": 32760 }, { "epoch": 0.7272949245846329, "grad_norm": 1.419998049736023, "learning_rate": 3.450674016771042e-06, "loss": 0.4259, "step": 32765 }, { "epoch": 0.7274059111441604, "grad_norm": 1.2902168035507202, "learning_rate": 3.448039496342821e-06, "loss": 0.3996, "step": 32770 }, { "epoch": 0.7275168977036881, "grad_norm": 1.4809598922729492, "learning_rate": 3.445405772479987e-06, "loss": 0.3526, "step": 32775 }, { "epoch": 0.7276278842632157, "grad_norm": 1.3631724119186401, "learning_rate": 3.4427728455027343e-06, "loss": 0.3453, "step": 32780 }, { "epoch": 0.7277388708227434, "grad_norm": 1.4944729804992676, "learning_rate": 3.4401407157311706e-06, "loss": 0.3023, "step": 32785 }, { "epoch": 0.727849857382271, "grad_norm": 1.2906522750854492, "learning_rate": 3.4375093834852956e-06, "loss": 0.4278, "step": 32790 }, { "epoch": 0.7279608439417986, "grad_norm": 1.471267580986023, "learning_rate": 3.4348788490850236e-06, "loss": 0.2057, "step": 32795 }, { "epoch": 0.7280718305013263, "grad_norm": 1.5331370830535889, "learning_rate": 3.4322491128501613e-06, "loss": 0.3482, "step": 32800 }, { "epoch": 0.728182817060854, "grad_norm": 1.3465474843978882, "learning_rate": 3.429620175100428e-06, "loss": 0.5177, "step": 32805 }, { "epoch": 0.7282938036203815, "grad_norm": 0.895503580570221, "learning_rate": 3.4269920361554342e-06, "loss": 0.3538, "step": 32810 }, { "epoch": 0.7284047901799092, "grad_norm": 1.9255492687225342, "learning_rate": 3.424364696334709e-06, "loss": 0.4316, "step": 32815 }, { "epoch": 0.7285157767394369, "grad_norm": 1.0728861093521118, "learning_rate": 3.421738155957668e-06, "loss": 0.411, "step": 32820 }, { "epoch": 0.7286267632989645, "grad_norm": 1.22779381275177, "learning_rate": 3.419112415343643e-06, "loss": 0.3588, "step": 32825 }, { "epoch": 0.7287377498584922, "grad_norm": 1.1258268356323242, "learning_rate": 3.416487474811856e-06, "loss": 0.3582, "step": 32830 }, { "epoch": 0.7288487364180197, "grad_norm": 1.030422568321228, "learning_rate": 3.4138633346814463e-06, "loss": 0.5491, "step": 32835 }, { "epoch": 0.7289597229775474, "grad_norm": 1.0262194871902466, "learning_rate": 3.4112399952714414e-06, "loss": 0.3523, "step": 32840 }, { "epoch": 0.7290707095370751, "grad_norm": 1.7964067459106445, "learning_rate": 3.4086174569007802e-06, "loss": 0.4073, "step": 32845 }, { "epoch": 0.7291816960966027, "grad_norm": 1.8312066793441772, "learning_rate": 3.4059957198883067e-06, "loss": 0.5042, "step": 32850 }, { "epoch": 0.7292926826561303, "grad_norm": 0.892008900642395, "learning_rate": 3.403374784552754e-06, "loss": 0.3988, "step": 32855 }, { "epoch": 0.729403669215658, "grad_norm": 1.1622059345245361, "learning_rate": 3.4007546512127764e-06, "loss": 0.3452, "step": 32860 }, { "epoch": 0.7295146557751856, "grad_norm": 0.48767390847206116, "learning_rate": 3.3981353201869126e-06, "loss": 0.254, "step": 32865 }, { "epoch": 0.7296256423347133, "grad_norm": 1.3434008359909058, "learning_rate": 3.395516791793616e-06, "loss": 0.3915, "step": 32870 }, { "epoch": 0.729736628894241, "grad_norm": 1.2038825750350952, "learning_rate": 3.3928990663512416e-06, "loss": 0.5588, "step": 32875 }, { "epoch": 0.7298476154537685, "grad_norm": 1.5192091464996338, "learning_rate": 3.3902821441780366e-06, "loss": 0.249, "step": 32880 }, { "epoch": 0.7299586020132962, "grad_norm": 0.9307333827018738, "learning_rate": 3.3876660255921646e-06, "loss": 0.4169, "step": 32885 }, { "epoch": 0.7300695885728238, "grad_norm": 1.7946795225143433, "learning_rate": 3.385050710911677e-06, "loss": 0.615, "step": 32890 }, { "epoch": 0.7301805751323515, "grad_norm": 1.5713242292404175, "learning_rate": 3.382436200454543e-06, "loss": 0.4895, "step": 32895 }, { "epoch": 0.7302915616918791, "grad_norm": 1.2957006692886353, "learning_rate": 3.3798224945386192e-06, "loss": 0.4357, "step": 32900 }, { "epoch": 0.7304025482514067, "grad_norm": 1.557170033454895, "learning_rate": 3.377209593481674e-06, "loss": 0.3959, "step": 32905 }, { "epoch": 0.7305135348109344, "grad_norm": 1.1738910675048828, "learning_rate": 3.3745974976013785e-06, "loss": 0.4669, "step": 32910 }, { "epoch": 0.7306245213704621, "grad_norm": 1.196847915649414, "learning_rate": 3.3719862072152964e-06, "loss": 0.353, "step": 32915 }, { "epoch": 0.7307355079299896, "grad_norm": 2.7891581058502197, "learning_rate": 3.369375722640905e-06, "loss": 0.3949, "step": 32920 }, { "epoch": 0.7308464944895173, "grad_norm": 1.387994408607483, "learning_rate": 3.366766044195574e-06, "loss": 0.3498, "step": 32925 }, { "epoch": 0.730957481049045, "grad_norm": 1.214930534362793, "learning_rate": 3.3641571721965802e-06, "loss": 0.4307, "step": 32930 }, { "epoch": 0.7310684676085726, "grad_norm": 1.103918194770813, "learning_rate": 3.3615491069611062e-06, "loss": 0.2793, "step": 32935 }, { "epoch": 0.7311794541681003, "grad_norm": 0.9579012989997864, "learning_rate": 3.358941848806224e-06, "loss": 0.4801, "step": 32940 }, { "epoch": 0.7312904407276278, "grad_norm": 1.5926653146743774, "learning_rate": 3.3563353980489244e-06, "loss": 0.4244, "step": 32945 }, { "epoch": 0.7314014272871555, "grad_norm": 0.8913977742195129, "learning_rate": 3.353729755006081e-06, "loss": 0.3668, "step": 32950 }, { "epoch": 0.7315124138466832, "grad_norm": 0.952574610710144, "learning_rate": 3.351124919994485e-06, "loss": 0.3864, "step": 32955 }, { "epoch": 0.7316234004062108, "grad_norm": 0.9258487224578857, "learning_rate": 3.3485208933308253e-06, "loss": 0.3548, "step": 32960 }, { "epoch": 0.7317343869657384, "grad_norm": 1.7011442184448242, "learning_rate": 3.3459176753316857e-06, "loss": 0.3893, "step": 32965 }, { "epoch": 0.7318453735252661, "grad_norm": 1.0624943971633911, "learning_rate": 3.3433152663135614e-06, "loss": 0.442, "step": 32970 }, { "epoch": 0.7319563600847937, "grad_norm": 1.2660802602767944, "learning_rate": 3.3407136665928395e-06, "loss": 0.3552, "step": 32975 }, { "epoch": 0.7320673466443214, "grad_norm": 1.497206449508667, "learning_rate": 3.338112876485821e-06, "loss": 0.3593, "step": 32980 }, { "epoch": 0.7321783332038491, "grad_norm": 0.6715766787528992, "learning_rate": 3.3355128963086913e-06, "loss": 0.2989, "step": 32985 }, { "epoch": 0.7322893197633766, "grad_norm": 0.7257074117660522, "learning_rate": 3.3329137263775534e-06, "loss": 0.3485, "step": 32990 }, { "epoch": 0.7324003063229043, "grad_norm": 0.8963240385055542, "learning_rate": 3.3303153670084086e-06, "loss": 0.3927, "step": 32995 }, { "epoch": 0.7325112928824319, "grad_norm": 0.8797928690910339, "learning_rate": 3.32771781851715e-06, "loss": 0.4239, "step": 33000 }, { "epoch": 0.7326222794419596, "grad_norm": 0.583372175693512, "learning_rate": 3.3251210812195843e-06, "loss": 0.3269, "step": 33005 }, { "epoch": 0.7327332660014872, "grad_norm": 1.7786016464233398, "learning_rate": 3.32252515543141e-06, "loss": 0.4063, "step": 33010 }, { "epoch": 0.7328442525610148, "grad_norm": 1.127631425857544, "learning_rate": 3.319930041468231e-06, "loss": 0.5401, "step": 33015 }, { "epoch": 0.7329552391205425, "grad_norm": 1.1388825178146362, "learning_rate": 3.3173357396455587e-06, "loss": 0.3118, "step": 33020 }, { "epoch": 0.7330662256800702, "grad_norm": 0.7228020429611206, "learning_rate": 3.314742250278792e-06, "loss": 0.3522, "step": 33025 }, { "epoch": 0.7331772122395978, "grad_norm": 1.0773768424987793, "learning_rate": 3.3121495736832445e-06, "loss": 0.4434, "step": 33030 }, { "epoch": 0.7332881987991254, "grad_norm": 1.291050910949707, "learning_rate": 3.3095577101741192e-06, "loss": 0.3147, "step": 33035 }, { "epoch": 0.7333991853586531, "grad_norm": 1.4104204177856445, "learning_rate": 3.306966660066534e-06, "loss": 0.4284, "step": 33040 }, { "epoch": 0.7335101719181807, "grad_norm": 1.1272939443588257, "learning_rate": 3.3043764236754916e-06, "loss": 0.4002, "step": 33045 }, { "epoch": 0.7336211584777084, "grad_norm": 1.4456478357315063, "learning_rate": 3.3017870013159116e-06, "loss": 0.4438, "step": 33050 }, { "epoch": 0.7337321450372359, "grad_norm": 1.3265424966812134, "learning_rate": 3.2991983933025997e-06, "loss": 0.3728, "step": 33055 }, { "epoch": 0.7338431315967636, "grad_norm": 1.2507997751235962, "learning_rate": 3.2966105999502786e-06, "loss": 0.3079, "step": 33060 }, { "epoch": 0.7339541181562913, "grad_norm": 0.9457982778549194, "learning_rate": 3.2940236215735554e-06, "loss": 0.2878, "step": 33065 }, { "epoch": 0.7340651047158189, "grad_norm": 1.8294035196304321, "learning_rate": 3.2914374584869547e-06, "loss": 0.3948, "step": 33070 }, { "epoch": 0.7341760912753466, "grad_norm": 0.5478452444076538, "learning_rate": 3.2888521110048844e-06, "loss": 0.4006, "step": 33075 }, { "epoch": 0.7342870778348742, "grad_norm": 1.1786937713623047, "learning_rate": 3.286267579441671e-06, "loss": 0.4344, "step": 33080 }, { "epoch": 0.7343980643944018, "grad_norm": 0.8153396844863892, "learning_rate": 3.2836838641115266e-06, "loss": 0.4621, "step": 33085 }, { "epoch": 0.7345090509539295, "grad_norm": 1.1007554531097412, "learning_rate": 3.2811009653285753e-06, "loss": 0.4171, "step": 33090 }, { "epoch": 0.7346200375134572, "grad_norm": 1.808050274848938, "learning_rate": 3.2785188834068325e-06, "loss": 0.3717, "step": 33095 }, { "epoch": 0.7347310240729847, "grad_norm": 1.3318843841552734, "learning_rate": 3.275937618660221e-06, "loss": 0.4008, "step": 33100 }, { "epoch": 0.7348420106325124, "grad_norm": 0.7449874877929688, "learning_rate": 3.273357171402567e-06, "loss": 0.3371, "step": 33105 }, { "epoch": 0.73495299719204, "grad_norm": 1.0369887351989746, "learning_rate": 3.270777541947586e-06, "loss": 0.4029, "step": 33110 }, { "epoch": 0.7350639837515677, "grad_norm": 0.887763261795044, "learning_rate": 3.268198730608906e-06, "loss": 0.3888, "step": 33115 }, { "epoch": 0.7351749703110954, "grad_norm": 0.9809614419937134, "learning_rate": 3.265620737700044e-06, "loss": 0.4863, "step": 33120 }, { "epoch": 0.7352859568706229, "grad_norm": 1.8363839387893677, "learning_rate": 3.2630435635344283e-06, "loss": 0.4954, "step": 33125 }, { "epoch": 0.7353969434301506, "grad_norm": 1.0824159383773804, "learning_rate": 3.260467208425384e-06, "loss": 0.3013, "step": 33130 }, { "epoch": 0.7355079299896783, "grad_norm": 1.8370351791381836, "learning_rate": 3.257891672686132e-06, "loss": 0.2907, "step": 33135 }, { "epoch": 0.7356189165492059, "grad_norm": 1.621275782585144, "learning_rate": 3.2553169566298017e-06, "loss": 0.4826, "step": 33140 }, { "epoch": 0.7357299031087335, "grad_norm": 1.2516789436340332, "learning_rate": 3.2527430605694134e-06, "loss": 0.3353, "step": 33145 }, { "epoch": 0.7358408896682612, "grad_norm": 1.3625743389129639, "learning_rate": 3.250169984817897e-06, "loss": 0.2859, "step": 33150 }, { "epoch": 0.7359518762277888, "grad_norm": 1.683611512184143, "learning_rate": 3.2475977296880747e-06, "loss": 0.5756, "step": 33155 }, { "epoch": 0.7360628627873165, "grad_norm": 0.8776915669441223, "learning_rate": 3.2450262954926746e-06, "loss": 0.3158, "step": 33160 }, { "epoch": 0.736173849346844, "grad_norm": 0.9705437421798706, "learning_rate": 3.2424556825443252e-06, "loss": 0.418, "step": 33165 }, { "epoch": 0.7362848359063717, "grad_norm": 1.0836580991744995, "learning_rate": 3.2398858911555486e-06, "loss": 0.3985, "step": 33170 }, { "epoch": 0.7363958224658994, "grad_norm": 1.2586573362350464, "learning_rate": 3.237316921638777e-06, "loss": 0.3557, "step": 33175 }, { "epoch": 0.736506809025427, "grad_norm": 0.9279100298881531, "learning_rate": 3.23474877430633e-06, "loss": 0.3226, "step": 33180 }, { "epoch": 0.7366177955849547, "grad_norm": 1.4810030460357666, "learning_rate": 3.2321814494704384e-06, "loss": 0.3564, "step": 33185 }, { "epoch": 0.7367287821444823, "grad_norm": 0.5548216104507446, "learning_rate": 3.2296149474432325e-06, "loss": 0.3007, "step": 33190 }, { "epoch": 0.7368397687040099, "grad_norm": 0.4443269968032837, "learning_rate": 3.2270492685367315e-06, "loss": 0.4224, "step": 33195 }, { "epoch": 0.7369507552635376, "grad_norm": 1.2632858753204346, "learning_rate": 3.2244844130628684e-06, "loss": 0.4178, "step": 33200 }, { "epoch": 0.7370617418230653, "grad_norm": 1.9343825578689575, "learning_rate": 3.2219203813334643e-06, "loss": 0.3557, "step": 33205 }, { "epoch": 0.7371727283825928, "grad_norm": 1.1359671354293823, "learning_rate": 3.2193571736602482e-06, "loss": 0.4858, "step": 33210 }, { "epoch": 0.7372837149421205, "grad_norm": 1.0493714809417725, "learning_rate": 3.2167947903548503e-06, "loss": 0.5146, "step": 33215 }, { "epoch": 0.7373947015016481, "grad_norm": 1.2342268228530884, "learning_rate": 3.2142332317287884e-06, "loss": 0.4715, "step": 33220 }, { "epoch": 0.7375056880611758, "grad_norm": 1.3913966417312622, "learning_rate": 3.2116724980934964e-06, "loss": 0.2505, "step": 33225 }, { "epoch": 0.7376166746207035, "grad_norm": 1.4098409414291382, "learning_rate": 3.2091125897602927e-06, "loss": 0.447, "step": 33230 }, { "epoch": 0.737727661180231, "grad_norm": 0.5722621083259583, "learning_rate": 3.2065535070404085e-06, "loss": 0.4938, "step": 33235 }, { "epoch": 0.7378386477397587, "grad_norm": 1.978156328201294, "learning_rate": 3.2039952502449624e-06, "loss": 0.3224, "step": 33240 }, { "epoch": 0.7379496342992864, "grad_norm": 1.9196401834487915, "learning_rate": 3.2014378196849803e-06, "loss": 0.4027, "step": 33245 }, { "epoch": 0.738060620858814, "grad_norm": 1.0648365020751953, "learning_rate": 3.1988812156713923e-06, "loss": 0.3652, "step": 33250 }, { "epoch": 0.7381716074183416, "grad_norm": 1.3732563257217407, "learning_rate": 3.1963254385150133e-06, "loss": 0.3899, "step": 33255 }, { "epoch": 0.7382825939778693, "grad_norm": 1.086742877960205, "learning_rate": 3.193770488526573e-06, "loss": 0.2905, "step": 33260 }, { "epoch": 0.7383935805373969, "grad_norm": 1.2955430746078491, "learning_rate": 3.1912163660166873e-06, "loss": 0.4638, "step": 33265 }, { "epoch": 0.7385045670969246, "grad_norm": 0.7780910134315491, "learning_rate": 3.18866307129588e-06, "loss": 0.4211, "step": 33270 }, { "epoch": 0.7386155536564522, "grad_norm": 2.104405403137207, "learning_rate": 3.1861106046745773e-06, "loss": 0.4243, "step": 33275 }, { "epoch": 0.7387265402159798, "grad_norm": 1.490172028541565, "learning_rate": 3.183558966463092e-06, "loss": 0.4406, "step": 33280 }, { "epoch": 0.7388375267755075, "grad_norm": 0.8644965291023254, "learning_rate": 3.18100815697165e-06, "loss": 0.1803, "step": 33285 }, { "epoch": 0.7389485133350351, "grad_norm": 0.9825798869132996, "learning_rate": 3.178458176510367e-06, "loss": 0.3847, "step": 33290 }, { "epoch": 0.7390594998945628, "grad_norm": 0.8776482343673706, "learning_rate": 3.1759090253892578e-06, "loss": 0.4212, "step": 33295 }, { "epoch": 0.7391704864540904, "grad_norm": 1.527934193611145, "learning_rate": 3.1733607039182467e-06, "loss": 0.3646, "step": 33300 }, { "epoch": 0.739281473013618, "grad_norm": 1.00637686252594, "learning_rate": 3.170813212407143e-06, "loss": 0.4107, "step": 33305 }, { "epoch": 0.7393924595731457, "grad_norm": 1.2116798162460327, "learning_rate": 3.1682665511656696e-06, "loss": 0.373, "step": 33310 }, { "epoch": 0.7395034461326734, "grad_norm": 1.912797451019287, "learning_rate": 3.1657207205034326e-06, "loss": 0.412, "step": 33315 }, { "epoch": 0.739614432692201, "grad_norm": 0.8689662218093872, "learning_rate": 3.163175720729954e-06, "loss": 0.3351, "step": 33320 }, { "epoch": 0.7397254192517286, "grad_norm": 1.001035451889038, "learning_rate": 3.1606315521546394e-06, "loss": 0.4791, "step": 33325 }, { "epoch": 0.7398364058112562, "grad_norm": 1.1467145681381226, "learning_rate": 3.158088215086802e-06, "loss": 0.2781, "step": 33330 }, { "epoch": 0.7399473923707839, "grad_norm": 1.4117281436920166, "learning_rate": 3.155545709835658e-06, "loss": 0.2971, "step": 33335 }, { "epoch": 0.7400583789303116, "grad_norm": 2.128990888595581, "learning_rate": 3.153004036710308e-06, "loss": 0.4511, "step": 33340 }, { "epoch": 0.7401693654898391, "grad_norm": 1.1109024286270142, "learning_rate": 3.1504631960197673e-06, "loss": 0.3601, "step": 33345 }, { "epoch": 0.7402803520493668, "grad_norm": 0.9951190948486328, "learning_rate": 3.147923188072938e-06, "loss": 0.3177, "step": 33350 }, { "epoch": 0.7403913386088945, "grad_norm": 0.598811686038971, "learning_rate": 3.145384013178625e-06, "loss": 0.3142, "step": 33355 }, { "epoch": 0.7405023251684221, "grad_norm": 1.0349372625350952, "learning_rate": 3.1428456716455403e-06, "loss": 0.4896, "step": 33360 }, { "epoch": 0.7406133117279498, "grad_norm": 0.933586835861206, "learning_rate": 3.1403081637822776e-06, "loss": 0.4462, "step": 33365 }, { "epoch": 0.7407242982874774, "grad_norm": 0.9369057416915894, "learning_rate": 3.1377714898973468e-06, "loss": 0.2947, "step": 33370 }, { "epoch": 0.740835284847005, "grad_norm": 0.9451455473899841, "learning_rate": 3.13523565029914e-06, "loss": 0.3599, "step": 33375 }, { "epoch": 0.7409462714065327, "grad_norm": 1.2992775440216064, "learning_rate": 3.1327006452959595e-06, "loss": 0.3006, "step": 33380 }, { "epoch": 0.7410572579660603, "grad_norm": 1.442800521850586, "learning_rate": 3.1301664751960082e-06, "loss": 0.4617, "step": 33385 }, { "epoch": 0.7411682445255879, "grad_norm": 1.0337368249893188, "learning_rate": 3.1276331403073733e-06, "loss": 0.5779, "step": 33390 }, { "epoch": 0.7412792310851156, "grad_norm": 1.5513899326324463, "learning_rate": 3.1251006409380557e-06, "loss": 0.3455, "step": 33395 }, { "epoch": 0.7413902176446432, "grad_norm": 0.8598487973213196, "learning_rate": 3.1225689773959434e-06, "loss": 0.2977, "step": 33400 }, { "epoch": 0.7415012042041709, "grad_norm": 2.030599594116211, "learning_rate": 3.120038149988832e-06, "loss": 0.308, "step": 33405 }, { "epoch": 0.7416121907636986, "grad_norm": 1.0822179317474365, "learning_rate": 3.1175081590244063e-06, "loss": 0.4726, "step": 33410 }, { "epoch": 0.7417231773232261, "grad_norm": 0.7643406391143799, "learning_rate": 3.1149790048102568e-06, "loss": 0.4757, "step": 33415 }, { "epoch": 0.7418341638827538, "grad_norm": 0.6753915548324585, "learning_rate": 3.112450687653872e-06, "loss": 0.2489, "step": 33420 }, { "epoch": 0.7419451504422815, "grad_norm": 1.1946618556976318, "learning_rate": 3.1099232078626294e-06, "loss": 0.4191, "step": 33425 }, { "epoch": 0.7420561370018091, "grad_norm": 0.712417721748352, "learning_rate": 3.107396565743821e-06, "loss": 0.3746, "step": 33430 }, { "epoch": 0.7421671235613367, "grad_norm": 0.8652156591415405, "learning_rate": 3.104870761604617e-06, "loss": 0.4765, "step": 33435 }, { "epoch": 0.7422781101208643, "grad_norm": 0.856709361076355, "learning_rate": 3.102345795752102e-06, "loss": 0.2438, "step": 33440 }, { "epoch": 0.742389096680392, "grad_norm": 2.4367215633392334, "learning_rate": 3.099821668493256e-06, "loss": 0.4003, "step": 33445 }, { "epoch": 0.7425000832399197, "grad_norm": 1.0651171207427979, "learning_rate": 3.0972983801349464e-06, "loss": 0.4672, "step": 33450 }, { "epoch": 0.7426110697994472, "grad_norm": 0.8328818082809448, "learning_rate": 3.094775930983953e-06, "loss": 0.3191, "step": 33455 }, { "epoch": 0.7427220563589749, "grad_norm": 1.5253877639770508, "learning_rate": 3.0922543213469403e-06, "loss": 0.4132, "step": 33460 }, { "epoch": 0.7428330429185026, "grad_norm": 1.3908597230911255, "learning_rate": 3.0897335515304803e-06, "loss": 0.4988, "step": 33465 }, { "epoch": 0.7429440294780302, "grad_norm": 1.2378095388412476, "learning_rate": 3.087213621841044e-06, "loss": 0.4961, "step": 33470 }, { "epoch": 0.7430550160375579, "grad_norm": 0.9367465972900391, "learning_rate": 3.0846945325849884e-06, "loss": 0.5043, "step": 33475 }, { "epoch": 0.7431660025970855, "grad_norm": 1.15652596950531, "learning_rate": 3.082176284068582e-06, "loss": 0.4179, "step": 33480 }, { "epoch": 0.7432769891566131, "grad_norm": 1.3893247842788696, "learning_rate": 3.0796588765979797e-06, "loss": 0.2992, "step": 33485 }, { "epoch": 0.7433879757161408, "grad_norm": 0.5285239219665527, "learning_rate": 3.0771423104792454e-06, "loss": 0.3843, "step": 33490 }, { "epoch": 0.7434989622756684, "grad_norm": 1.2001694440841675, "learning_rate": 3.074626586018328e-06, "loss": 0.4564, "step": 33495 }, { "epoch": 0.743609948835196, "grad_norm": 1.9519604444503784, "learning_rate": 3.0721117035210845e-06, "loss": 0.53, "step": 33500 }, { "epoch": 0.7437209353947237, "grad_norm": 0.930395245552063, "learning_rate": 3.069597663293269e-06, "loss": 0.5216, "step": 33505 }, { "epoch": 0.7438319219542513, "grad_norm": 1.4803013801574707, "learning_rate": 3.067084465640523e-06, "loss": 0.265, "step": 33510 }, { "epoch": 0.743942908513779, "grad_norm": 1.587602138519287, "learning_rate": 3.0645721108684003e-06, "loss": 0.3635, "step": 33515 }, { "epoch": 0.7440538950733067, "grad_norm": 1.2842870950698853, "learning_rate": 3.062060599282337e-06, "loss": 0.3305, "step": 33520 }, { "epoch": 0.7441648816328342, "grad_norm": 1.3928651809692383, "learning_rate": 3.059549931187682e-06, "loss": 0.5274, "step": 33525 }, { "epoch": 0.7442758681923619, "grad_norm": 1.439814567565918, "learning_rate": 3.057040106889666e-06, "loss": 0.3928, "step": 33530 }, { "epoch": 0.7443868547518896, "grad_norm": 0.9000222086906433, "learning_rate": 3.054531126693433e-06, "loss": 0.428, "step": 33535 }, { "epoch": 0.7444978413114172, "grad_norm": 1.4337481260299683, "learning_rate": 3.052022990904009e-06, "loss": 0.4436, "step": 33540 }, { "epoch": 0.7446088278709448, "grad_norm": 1.6256216764450073, "learning_rate": 3.0495156998263307e-06, "loss": 0.3843, "step": 33545 }, { "epoch": 0.7447198144304724, "grad_norm": 0.9762675762176514, "learning_rate": 3.047009253765221e-06, "loss": 0.4457, "step": 33550 }, { "epoch": 0.7448308009900001, "grad_norm": 1.7330665588378906, "learning_rate": 3.04450365302541e-06, "loss": 0.4577, "step": 33555 }, { "epoch": 0.7449417875495278, "grad_norm": 2.0685441493988037, "learning_rate": 3.0419988979115146e-06, "loss": 0.3594, "step": 33560 }, { "epoch": 0.7450527741090553, "grad_norm": 1.9236466884613037, "learning_rate": 3.0394949887280624e-06, "loss": 0.2987, "step": 33565 }, { "epoch": 0.745163760668583, "grad_norm": 1.2355669736862183, "learning_rate": 3.036991925779461e-06, "loss": 0.288, "step": 33570 }, { "epoch": 0.7452747472281107, "grad_norm": 0.5316757559776306, "learning_rate": 3.0344897093700333e-06, "loss": 0.2921, "step": 33575 }, { "epoch": 0.7453857337876383, "grad_norm": 1.079461693763733, "learning_rate": 3.031988339803983e-06, "loss": 0.4051, "step": 33580 }, { "epoch": 0.745496720347166, "grad_norm": 0.6526192426681519, "learning_rate": 3.0294878173854213e-06, "loss": 0.2468, "step": 33585 }, { "epoch": 0.7456077069066936, "grad_norm": 1.38423752784729, "learning_rate": 3.0269881424183567e-06, "loss": 0.4037, "step": 33590 }, { "epoch": 0.7457186934662212, "grad_norm": 1.5122541189193726, "learning_rate": 3.0244893152066844e-06, "loss": 0.3324, "step": 33595 }, { "epoch": 0.7458296800257489, "grad_norm": 1.4059768915176392, "learning_rate": 3.021991336054211e-06, "loss": 0.3236, "step": 33600 }, { "epoch": 0.7459406665852765, "grad_norm": 1.2875112295150757, "learning_rate": 3.0194942052646246e-06, "loss": 0.441, "step": 33605 }, { "epoch": 0.7460516531448041, "grad_norm": 1.081081748008728, "learning_rate": 3.0169979231415225e-06, "loss": 0.3409, "step": 33610 }, { "epoch": 0.7461626397043318, "grad_norm": 0.8130430579185486, "learning_rate": 3.014502489988397e-06, "loss": 0.3626, "step": 33615 }, { "epoch": 0.7462736262638594, "grad_norm": 0.7352177500724792, "learning_rate": 3.0120079061086284e-06, "loss": 0.3773, "step": 33620 }, { "epoch": 0.7463846128233871, "grad_norm": 1.0624009370803833, "learning_rate": 3.0095141718055055e-06, "loss": 0.3875, "step": 33625 }, { "epoch": 0.7464955993829148, "grad_norm": 0.5698983073234558, "learning_rate": 3.007021287382201e-06, "loss": 0.4605, "step": 33630 }, { "epoch": 0.7466065859424423, "grad_norm": 1.0034706592559814, "learning_rate": 3.004529253141797e-06, "loss": 0.306, "step": 33635 }, { "epoch": 0.74671757250197, "grad_norm": 1.4835110902786255, "learning_rate": 3.0020380693872687e-06, "loss": 0.5002, "step": 33640 }, { "epoch": 0.7468285590614977, "grad_norm": 0.8922720551490784, "learning_rate": 2.9995477364214787e-06, "loss": 0.3605, "step": 33645 }, { "epoch": 0.7469395456210253, "grad_norm": 2.575284719467163, "learning_rate": 2.9970582545472015e-06, "loss": 0.4445, "step": 33650 }, { "epoch": 0.747050532180553, "grad_norm": 1.1334099769592285, "learning_rate": 2.9945696240670905e-06, "loss": 0.3478, "step": 33655 }, { "epoch": 0.7471615187400805, "grad_norm": 1.2259379625320435, "learning_rate": 2.992081845283715e-06, "loss": 0.2761, "step": 33660 }, { "epoch": 0.7472725052996082, "grad_norm": 0.957604706287384, "learning_rate": 2.9895949184995234e-06, "loss": 0.39, "step": 33665 }, { "epoch": 0.7473834918591359, "grad_norm": 2.265244483947754, "learning_rate": 2.9871088440168696e-06, "loss": 0.3312, "step": 33670 }, { "epoch": 0.7474944784186635, "grad_norm": 1.3157933950424194, "learning_rate": 2.9846236221380055e-06, "loss": 0.3522, "step": 33675 }, { "epoch": 0.7476054649781911, "grad_norm": 1.5606595277786255, "learning_rate": 2.9821392531650717e-06, "loss": 0.3771, "step": 33680 }, { "epoch": 0.7477164515377188, "grad_norm": 1.7389330863952637, "learning_rate": 2.9796557374001145e-06, "loss": 0.3752, "step": 33685 }, { "epoch": 0.7478274380972464, "grad_norm": 1.3296996355056763, "learning_rate": 2.9771730751450645e-06, "loss": 0.3257, "step": 33690 }, { "epoch": 0.7479384246567741, "grad_norm": 1.2829476594924927, "learning_rate": 2.974691266701759e-06, "loss": 0.4326, "step": 33695 }, { "epoch": 0.7480494112163018, "grad_norm": 1.3557809591293335, "learning_rate": 2.9722103123719324e-06, "loss": 0.368, "step": 33700 }, { "epoch": 0.7481603977758293, "grad_norm": 1.0110480785369873, "learning_rate": 2.9697302124572034e-06, "loss": 0.2584, "step": 33705 }, { "epoch": 0.748271384335357, "grad_norm": 1.133504867553711, "learning_rate": 2.967250967259101e-06, "loss": 0.4974, "step": 33710 }, { "epoch": 0.7483823708948846, "grad_norm": 1.103011131286621, "learning_rate": 2.9647725770790357e-06, "loss": 0.5845, "step": 33715 }, { "epoch": 0.7484933574544123, "grad_norm": 0.8758270144462585, "learning_rate": 2.962295042218327e-06, "loss": 0.3712, "step": 33720 }, { "epoch": 0.7486043440139399, "grad_norm": 1.137786626815796, "learning_rate": 2.9598183629781875e-06, "loss": 0.4716, "step": 33725 }, { "epoch": 0.7487153305734675, "grad_norm": 0.7884349822998047, "learning_rate": 2.9573425396597166e-06, "loss": 0.4341, "step": 33730 }, { "epoch": 0.7488263171329952, "grad_norm": 1.3493396043777466, "learning_rate": 2.954867572563924e-06, "loss": 0.532, "step": 33735 }, { "epoch": 0.7489373036925229, "grad_norm": 0.6397843956947327, "learning_rate": 2.9523934619917017e-06, "loss": 0.4425, "step": 33740 }, { "epoch": 0.7490482902520504, "grad_norm": 0.7703794836997986, "learning_rate": 2.9499202082438493e-06, "loss": 0.3525, "step": 33745 }, { "epoch": 0.7491592768115781, "grad_norm": 1.2579290866851807, "learning_rate": 2.9474478116210503e-06, "loss": 0.2793, "step": 33750 }, { "epoch": 0.7492702633711058, "grad_norm": 0.9212802052497864, "learning_rate": 2.944976272423895e-06, "loss": 0.3236, "step": 33755 }, { "epoch": 0.7493812499306334, "grad_norm": 1.295153260231018, "learning_rate": 2.9425055909528654e-06, "loss": 0.3656, "step": 33760 }, { "epoch": 0.7494922364901611, "grad_norm": 1.2512061595916748, "learning_rate": 2.940035767508336e-06, "loss": 0.3759, "step": 33765 }, { "epoch": 0.7496032230496886, "grad_norm": 1.540828824043274, "learning_rate": 2.9375668023905823e-06, "loss": 0.5052, "step": 33770 }, { "epoch": 0.7497142096092163, "grad_norm": 1.717550277709961, "learning_rate": 2.9350986958997685e-06, "loss": 0.3682, "step": 33775 }, { "epoch": 0.749825196168744, "grad_norm": 0.9938486814498901, "learning_rate": 2.932631448335964e-06, "loss": 0.3751, "step": 33780 }, { "epoch": 0.7499361827282716, "grad_norm": 0.6627386212348938, "learning_rate": 2.9301650599991227e-06, "loss": 0.3568, "step": 33785 }, { "epoch": 0.7500471692877992, "grad_norm": 1.0992333889007568, "learning_rate": 2.9276995311891078e-06, "loss": 0.4247, "step": 33790 }, { "epoch": 0.7501581558473269, "grad_norm": 1.372779369354248, "learning_rate": 2.9252348622056605e-06, "loss": 0.2754, "step": 33795 }, { "epoch": 0.7502691424068545, "grad_norm": 1.7185051441192627, "learning_rate": 2.9227710533484356e-06, "loss": 0.3183, "step": 33800 }, { "epoch": 0.7503801289663822, "grad_norm": 0.9331538677215576, "learning_rate": 2.920308104916967e-06, "loss": 0.2455, "step": 33805 }, { "epoch": 0.7504911155259099, "grad_norm": 1.317017674446106, "learning_rate": 2.9178460172106992e-06, "loss": 0.4605, "step": 33810 }, { "epoch": 0.7506021020854374, "grad_norm": 1.4651226997375488, "learning_rate": 2.915384790528958e-06, "loss": 0.4375, "step": 33815 }, { "epoch": 0.7507130886449651, "grad_norm": 1.4238923788070679, "learning_rate": 2.9129244251709766e-06, "loss": 0.3796, "step": 33820 }, { "epoch": 0.7508240752044928, "grad_norm": 1.2364790439605713, "learning_rate": 2.9104649214358726e-06, "loss": 0.3157, "step": 33825 }, { "epoch": 0.7509350617640204, "grad_norm": 1.8082529306411743, "learning_rate": 2.908006279622667e-06, "loss": 0.3494, "step": 33830 }, { "epoch": 0.751046048323548, "grad_norm": 1.0577956438064575, "learning_rate": 2.9055485000302765e-06, "loss": 0.4521, "step": 33835 }, { "epoch": 0.7511570348830756, "grad_norm": 1.3225284814834595, "learning_rate": 2.9030915829575034e-06, "loss": 0.3558, "step": 33840 }, { "epoch": 0.7512680214426033, "grad_norm": 1.5698176622390747, "learning_rate": 2.9006355287030576e-06, "loss": 0.3399, "step": 33845 }, { "epoch": 0.751379008002131, "grad_norm": 0.7082293033599854, "learning_rate": 2.89818033756553e-06, "loss": 0.3808, "step": 33850 }, { "epoch": 0.7514899945616585, "grad_norm": 0.8936519622802734, "learning_rate": 2.895726009843425e-06, "loss": 0.3318, "step": 33855 }, { "epoch": 0.7516009811211862, "grad_norm": 1.417548656463623, "learning_rate": 2.893272545835121e-06, "loss": 0.5136, "step": 33860 }, { "epoch": 0.7517119676807139, "grad_norm": 1.2555241584777832, "learning_rate": 2.8908199458389075e-06, "loss": 0.4926, "step": 33865 }, { "epoch": 0.7518229542402415, "grad_norm": 1.7509957551956177, "learning_rate": 2.8883682101529655e-06, "loss": 0.2201, "step": 33870 }, { "epoch": 0.7519339407997692, "grad_norm": 1.6181340217590332, "learning_rate": 2.8859173390753627e-06, "loss": 0.3649, "step": 33875 }, { "epoch": 0.7520449273592968, "grad_norm": 1.0291986465454102, "learning_rate": 2.883467332904074e-06, "loss": 0.3769, "step": 33880 }, { "epoch": 0.7521559139188244, "grad_norm": 1.4332181215286255, "learning_rate": 2.8810181919369574e-06, "loss": 0.3164, "step": 33885 }, { "epoch": 0.7522669004783521, "grad_norm": 1.5019235610961914, "learning_rate": 2.878569916471774e-06, "loss": 0.2946, "step": 33890 }, { "epoch": 0.7523778870378797, "grad_norm": 1.932149887084961, "learning_rate": 2.8761225068061793e-06, "loss": 0.4063, "step": 33895 }, { "epoch": 0.7524888735974073, "grad_norm": 1.2563505172729492, "learning_rate": 2.8736759632377154e-06, "loss": 0.394, "step": 33900 }, { "epoch": 0.752599860156935, "grad_norm": 0.8643641471862793, "learning_rate": 2.871230286063832e-06, "loss": 0.3462, "step": 33905 }, { "epoch": 0.7527108467164626, "grad_norm": 1.325053095817566, "learning_rate": 2.8687854755818577e-06, "loss": 0.4378, "step": 33910 }, { "epoch": 0.7528218332759903, "grad_norm": 0.9804831147193909, "learning_rate": 2.866341532089031e-06, "loss": 0.4027, "step": 33915 }, { "epoch": 0.752932819835518, "grad_norm": 0.7422093749046326, "learning_rate": 2.8638984558824777e-06, "loss": 0.3409, "step": 33920 }, { "epoch": 0.7530438063950455, "grad_norm": 1.1952039003372192, "learning_rate": 2.8614562472592156e-06, "loss": 0.4869, "step": 33925 }, { "epoch": 0.7531547929545732, "grad_norm": 1.3097890615463257, "learning_rate": 2.8590149065161655e-06, "loss": 0.4153, "step": 33930 }, { "epoch": 0.7532657795141009, "grad_norm": 1.0756065845489502, "learning_rate": 2.85657443395013e-06, "loss": 0.279, "step": 33935 }, { "epoch": 0.7533767660736285, "grad_norm": 0.832936704158783, "learning_rate": 2.8541348298578207e-06, "loss": 0.4459, "step": 33940 }, { "epoch": 0.7534877526331561, "grad_norm": 1.2226253747940063, "learning_rate": 2.8516960945358307e-06, "loss": 0.4666, "step": 33945 }, { "epoch": 0.7535987391926837, "grad_norm": 0.6219229102134705, "learning_rate": 2.849258228280656e-06, "loss": 0.1928, "step": 33950 }, { "epoch": 0.7537097257522114, "grad_norm": 0.9533223509788513, "learning_rate": 2.846821231388688e-06, "loss": 0.2028, "step": 33955 }, { "epoch": 0.7538207123117391, "grad_norm": 1.187782883644104, "learning_rate": 2.8443851041561996e-06, "loss": 0.4268, "step": 33960 }, { "epoch": 0.7539316988712667, "grad_norm": 1.5726901292800903, "learning_rate": 2.841949846879377e-06, "loss": 0.4969, "step": 33965 }, { "epoch": 0.7540426854307943, "grad_norm": 1.2846169471740723, "learning_rate": 2.839515459854283e-06, "loss": 0.4282, "step": 33970 }, { "epoch": 0.754153671990322, "grad_norm": 1.3724855184555054, "learning_rate": 2.8370819433768837e-06, "loss": 0.2923, "step": 33975 }, { "epoch": 0.7542646585498496, "grad_norm": 0.7159687876701355, "learning_rate": 2.834649297743043e-06, "loss": 0.4449, "step": 33980 }, { "epoch": 0.7543756451093773, "grad_norm": 0.85330730676651, "learning_rate": 2.832217523248507e-06, "loss": 0.2278, "step": 33985 }, { "epoch": 0.754486631668905, "grad_norm": 1.3416775465011597, "learning_rate": 2.829786620188928e-06, "loss": 0.5682, "step": 33990 }, { "epoch": 0.7545976182284325, "grad_norm": 1.798352599143982, "learning_rate": 2.827356588859842e-06, "loss": 0.4677, "step": 33995 }, { "epoch": 0.7547086047879602, "grad_norm": 1.3195164203643799, "learning_rate": 2.8249274295566863e-06, "loss": 0.3804, "step": 34000 }, { "epoch": 0.7548195913474878, "grad_norm": 1.8729616403579712, "learning_rate": 2.822499142574795e-06, "loss": 0.4065, "step": 34005 }, { "epoch": 0.7549305779070155, "grad_norm": 1.4058563709259033, "learning_rate": 2.8200717282093813e-06, "loss": 0.2909, "step": 34010 }, { "epoch": 0.7550415644665431, "grad_norm": 1.6897121667861938, "learning_rate": 2.817645186755572e-06, "loss": 0.3288, "step": 34015 }, { "epoch": 0.7551525510260707, "grad_norm": 1.0009219646453857, "learning_rate": 2.8152195185083697e-06, "loss": 0.4974, "step": 34020 }, { "epoch": 0.7552635375855984, "grad_norm": 0.9866798520088196, "learning_rate": 2.812794723762685e-06, "loss": 0.3526, "step": 34025 }, { "epoch": 0.7553745241451261, "grad_norm": 2.1823205947875977, "learning_rate": 2.8103708028133113e-06, "loss": 0.4714, "step": 34030 }, { "epoch": 0.7554855107046536, "grad_norm": 1.2664459943771362, "learning_rate": 2.807947755954946e-06, "loss": 0.3761, "step": 34035 }, { "epoch": 0.7555964972641813, "grad_norm": 0.8264433145523071, "learning_rate": 2.8055255834821695e-06, "loss": 0.3104, "step": 34040 }, { "epoch": 0.755707483823709, "grad_norm": 0.9597057700157166, "learning_rate": 2.8031042856894663e-06, "loss": 0.4608, "step": 34045 }, { "epoch": 0.7558184703832366, "grad_norm": 1.1200875043869019, "learning_rate": 2.8006838628712054e-06, "loss": 0.3461, "step": 34050 }, { "epoch": 0.7559294569427643, "grad_norm": 0.4323360025882721, "learning_rate": 2.798264315321658e-06, "loss": 0.4009, "step": 34055 }, { "epoch": 0.7560404435022918, "grad_norm": 1.4403953552246094, "learning_rate": 2.79584564333498e-06, "loss": 0.3895, "step": 34060 }, { "epoch": 0.7561514300618195, "grad_norm": 1.3160420656204224, "learning_rate": 2.793427847205231e-06, "loss": 0.3576, "step": 34065 }, { "epoch": 0.7562624166213472, "grad_norm": 1.1515971422195435, "learning_rate": 2.791010927226353e-06, "loss": 0.52, "step": 34070 }, { "epoch": 0.7563734031808748, "grad_norm": 1.2929975986480713, "learning_rate": 2.7885948836921916e-06, "loss": 0.4144, "step": 34075 }, { "epoch": 0.7564843897404024, "grad_norm": 1.1152117252349854, "learning_rate": 2.7861797168964753e-06, "loss": 0.392, "step": 34080 }, { "epoch": 0.7565953762999301, "grad_norm": 0.6945995092391968, "learning_rate": 2.783765427132837e-06, "loss": 0.3099, "step": 34085 }, { "epoch": 0.7567063628594577, "grad_norm": 0.6502373218536377, "learning_rate": 2.781352014694799e-06, "loss": 0.414, "step": 34090 }, { "epoch": 0.7568173494189854, "grad_norm": 1.6204890012741089, "learning_rate": 2.7789394798757706e-06, "loss": 0.4609, "step": 34095 }, { "epoch": 0.7569283359785131, "grad_norm": 1.5944551229476929, "learning_rate": 2.776527822969066e-06, "loss": 0.2841, "step": 34100 }, { "epoch": 0.7570393225380406, "grad_norm": 1.1407469511032104, "learning_rate": 2.77411704426788e-06, "loss": 0.4497, "step": 34105 }, { "epoch": 0.7571503090975683, "grad_norm": 1.1005648374557495, "learning_rate": 2.771707144065313e-06, "loss": 0.3722, "step": 34110 }, { "epoch": 0.7572612956570959, "grad_norm": 1.2683881521224976, "learning_rate": 2.769298122654347e-06, "loss": 0.3378, "step": 34115 }, { "epoch": 0.7573722822166236, "grad_norm": 1.8083860874176025, "learning_rate": 2.7668899803278646e-06, "loss": 0.6097, "step": 34120 }, { "epoch": 0.7574832687761512, "grad_norm": 1.6240153312683105, "learning_rate": 2.764482717378644e-06, "loss": 0.3813, "step": 34125 }, { "epoch": 0.7575942553356788, "grad_norm": 1.3337078094482422, "learning_rate": 2.7620763340993452e-06, "loss": 0.3571, "step": 34130 }, { "epoch": 0.7577052418952065, "grad_norm": 1.110442042350769, "learning_rate": 2.7596708307825347e-06, "loss": 0.3753, "step": 34135 }, { "epoch": 0.7578162284547342, "grad_norm": 0.9959350824356079, "learning_rate": 2.757266207720659e-06, "loss": 0.4788, "step": 34140 }, { "epoch": 0.7579272150142617, "grad_norm": 1.6347358226776123, "learning_rate": 2.7548624652060672e-06, "loss": 0.2915, "step": 34145 }, { "epoch": 0.7580382015737894, "grad_norm": 1.3603200912475586, "learning_rate": 2.7524596035310037e-06, "loss": 0.383, "step": 34150 }, { "epoch": 0.7581491881333171, "grad_norm": 1.4851304292678833, "learning_rate": 2.7500576229875895e-06, "loss": 0.4998, "step": 34155 }, { "epoch": 0.7582601746928447, "grad_norm": 1.3021334409713745, "learning_rate": 2.7476565238678597e-06, "loss": 0.5353, "step": 34160 }, { "epoch": 0.7583711612523724, "grad_norm": 0.8948301672935486, "learning_rate": 2.7452563064637238e-06, "loss": 0.3798, "step": 34165 }, { "epoch": 0.7584821478118999, "grad_norm": 1.1484118700027466, "learning_rate": 2.742856971066996e-06, "loss": 0.3008, "step": 34170 }, { "epoch": 0.7585931343714276, "grad_norm": 1.1975305080413818, "learning_rate": 2.7404585179693822e-06, "loss": 0.3417, "step": 34175 }, { "epoch": 0.7587041209309553, "grad_norm": 1.2970155477523804, "learning_rate": 2.738060947462472e-06, "loss": 0.5035, "step": 34180 }, { "epoch": 0.7588151074904829, "grad_norm": 1.2377759218215942, "learning_rate": 2.7356642598377604e-06, "loss": 0.3494, "step": 34185 }, { "epoch": 0.7589260940500105, "grad_norm": 1.0850483179092407, "learning_rate": 2.7332684553866216e-06, "loss": 0.4834, "step": 34190 }, { "epoch": 0.7590370806095382, "grad_norm": 1.1056970357894897, "learning_rate": 2.730873534400337e-06, "loss": 0.4547, "step": 34195 }, { "epoch": 0.7591480671690658, "grad_norm": 1.525825023651123, "learning_rate": 2.728479497170066e-06, "loss": 0.3762, "step": 34200 }, { "epoch": 0.7592590537285935, "grad_norm": 1.0932745933532715, "learning_rate": 2.726086343986871e-06, "loss": 0.3346, "step": 34205 }, { "epoch": 0.7593700402881212, "grad_norm": 1.8926199674606323, "learning_rate": 2.723694075141706e-06, "loss": 0.4164, "step": 34210 }, { "epoch": 0.7594810268476487, "grad_norm": 1.1629252433776855, "learning_rate": 2.7213026909254105e-06, "loss": 0.388, "step": 34215 }, { "epoch": 0.7595920134071764, "grad_norm": 0.8054100871086121, "learning_rate": 2.7189121916287252e-06, "loss": 0.2926, "step": 34220 }, { "epoch": 0.759702999966704, "grad_norm": 1.3957380056381226, "learning_rate": 2.7165225775422745e-06, "loss": 0.3635, "step": 34225 }, { "epoch": 0.7598139865262317, "grad_norm": 1.75041925907135, "learning_rate": 2.7141338489565818e-06, "loss": 0.3999, "step": 34230 }, { "epoch": 0.7599249730857593, "grad_norm": 1.5254703760147095, "learning_rate": 2.7117460061620624e-06, "loss": 0.4837, "step": 34235 }, { "epoch": 0.7600359596452869, "grad_norm": 1.7756999731063843, "learning_rate": 2.7093590494490196e-06, "loss": 0.5549, "step": 34240 }, { "epoch": 0.7601469462048146, "grad_norm": 1.2557058334350586, "learning_rate": 2.706972979107655e-06, "loss": 0.3825, "step": 34245 }, { "epoch": 0.7602579327643423, "grad_norm": 0.9370241761207581, "learning_rate": 2.704587795428053e-06, "loss": 0.2929, "step": 34250 }, { "epoch": 0.7603689193238699, "grad_norm": 2.2293343544006348, "learning_rate": 2.702203498700201e-06, "loss": 0.2746, "step": 34255 }, { "epoch": 0.7604799058833975, "grad_norm": 1.6066590547561646, "learning_rate": 2.699820089213975e-06, "loss": 0.3987, "step": 34260 }, { "epoch": 0.7605908924429252, "grad_norm": 1.2838315963745117, "learning_rate": 2.697437567259137e-06, "loss": 0.3886, "step": 34265 }, { "epoch": 0.7607018790024528, "grad_norm": 1.2681989669799805, "learning_rate": 2.695055933125351e-06, "loss": 0.4308, "step": 34270 }, { "epoch": 0.7608128655619805, "grad_norm": 0.8416971564292908, "learning_rate": 2.692675187102164e-06, "loss": 0.4893, "step": 34275 }, { "epoch": 0.760923852121508, "grad_norm": 0.8352290987968445, "learning_rate": 2.6902953294790223e-06, "loss": 0.2652, "step": 34280 }, { "epoch": 0.7610348386810357, "grad_norm": 1.274704098701477, "learning_rate": 2.6879163605452573e-06, "loss": 0.3504, "step": 34285 }, { "epoch": 0.7611458252405634, "grad_norm": 0.6752955913543701, "learning_rate": 2.685538280590102e-06, "loss": 0.275, "step": 34290 }, { "epoch": 0.761256811800091, "grad_norm": 0.892386794090271, "learning_rate": 2.6831610899026718e-06, "loss": 0.4368, "step": 34295 }, { "epoch": 0.7613677983596187, "grad_norm": 1.7363653182983398, "learning_rate": 2.680784788771974e-06, "loss": 0.4177, "step": 34300 }, { "epoch": 0.7614787849191463, "grad_norm": 1.6341389417648315, "learning_rate": 2.678409377486918e-06, "loss": 0.3493, "step": 34305 }, { "epoch": 0.7615897714786739, "grad_norm": 1.1072078943252563, "learning_rate": 2.6760348563362912e-06, "loss": 0.3427, "step": 34310 }, { "epoch": 0.7617007580382016, "grad_norm": 1.033800482749939, "learning_rate": 2.6736612256087848e-06, "loss": 0.5172, "step": 34315 }, { "epoch": 0.7618117445977293, "grad_norm": 1.2220630645751953, "learning_rate": 2.6712884855929788e-06, "loss": 0.2853, "step": 34320 }, { "epoch": 0.7619227311572568, "grad_norm": 1.1428450345993042, "learning_rate": 2.668916636577338e-06, "loss": 0.3438, "step": 34325 }, { "epoch": 0.7620337177167845, "grad_norm": 1.0615078210830688, "learning_rate": 2.6665456788502276e-06, "loss": 0.5136, "step": 34330 }, { "epoch": 0.7621447042763121, "grad_norm": 1.0006989240646362, "learning_rate": 2.6641756126998964e-06, "loss": 0.3704, "step": 34335 }, { "epoch": 0.7622556908358398, "grad_norm": 1.4435336589813232, "learning_rate": 2.6618064384144925e-06, "loss": 0.3761, "step": 34340 }, { "epoch": 0.7623666773953675, "grad_norm": 1.9680712223052979, "learning_rate": 2.6594381562820537e-06, "loss": 0.4304, "step": 34345 }, { "epoch": 0.762477663954895, "grad_norm": 2.007110834121704, "learning_rate": 2.6570707665905026e-06, "loss": 0.4103, "step": 34350 }, { "epoch": 0.7625886505144227, "grad_norm": 1.2605485916137695, "learning_rate": 2.654704269627665e-06, "loss": 0.3096, "step": 34355 }, { "epoch": 0.7626996370739504, "grad_norm": 1.315887451171875, "learning_rate": 2.6523386656812444e-06, "loss": 0.4719, "step": 34360 }, { "epoch": 0.762810623633478, "grad_norm": 2.135716676712036, "learning_rate": 2.6499739550388505e-06, "loss": 0.3175, "step": 34365 }, { "epoch": 0.7629216101930056, "grad_norm": 0.8498549461364746, "learning_rate": 2.647610137987969e-06, "loss": 0.2739, "step": 34370 }, { "epoch": 0.7630325967525333, "grad_norm": 0.4586606025695801, "learning_rate": 2.64524721481599e-06, "loss": 0.297, "step": 34375 }, { "epoch": 0.7631435833120609, "grad_norm": 0.8612726330757141, "learning_rate": 2.6428851858101913e-06, "loss": 0.4003, "step": 34380 }, { "epoch": 0.7632545698715886, "grad_norm": 1.0840905904769897, "learning_rate": 2.6405240512577344e-06, "loss": 0.4075, "step": 34385 }, { "epoch": 0.7633655564311161, "grad_norm": 1.3596967458724976, "learning_rate": 2.638163811445685e-06, "loss": 0.414, "step": 34390 }, { "epoch": 0.7634765429906438, "grad_norm": 1.3669801950454712, "learning_rate": 2.635804466660986e-06, "loss": 0.3763, "step": 34395 }, { "epoch": 0.7635875295501715, "grad_norm": 1.8525625467300415, "learning_rate": 2.633446017190484e-06, "loss": 0.2287, "step": 34400 }, { "epoch": 0.7636985161096991, "grad_norm": 0.44575735926628113, "learning_rate": 2.631088463320911e-06, "loss": 0.4444, "step": 34405 }, { "epoch": 0.7638095026692268, "grad_norm": 0.9332336187362671, "learning_rate": 2.6287318053388877e-06, "loss": 0.2368, "step": 34410 }, { "epoch": 0.7639204892287544, "grad_norm": 1.2206798791885376, "learning_rate": 2.6263760435309317e-06, "loss": 0.4207, "step": 34415 }, { "epoch": 0.764031475788282, "grad_norm": 1.5997384786605835, "learning_rate": 2.624021178183446e-06, "loss": 0.3723, "step": 34420 }, { "epoch": 0.7641424623478097, "grad_norm": 1.4401050806045532, "learning_rate": 2.6216672095827267e-06, "loss": 0.4989, "step": 34425 }, { "epoch": 0.7642534489073374, "grad_norm": 1.4782438278198242, "learning_rate": 2.6193141380149665e-06, "loss": 0.2464, "step": 34430 }, { "epoch": 0.7643644354668649, "grad_norm": 1.1002386808395386, "learning_rate": 2.616961963766237e-06, "loss": 0.3429, "step": 34435 }, { "epoch": 0.7644754220263926, "grad_norm": 0.8980961441993713, "learning_rate": 2.614610687122515e-06, "loss": 0.4068, "step": 34440 }, { "epoch": 0.7645864085859202, "grad_norm": 0.9613227248191833, "learning_rate": 2.612260308369654e-06, "loss": 0.436, "step": 34445 }, { "epoch": 0.7646973951454479, "grad_norm": 1.1408544778823853, "learning_rate": 2.6099108277934105e-06, "loss": 0.3591, "step": 34450 }, { "epoch": 0.7648083817049756, "grad_norm": 0.9857153296470642, "learning_rate": 2.607562245679421e-06, "loss": 0.3469, "step": 34455 }, { "epoch": 0.7649193682645031, "grad_norm": 1.092454195022583, "learning_rate": 2.605214562313222e-06, "loss": 0.4233, "step": 34460 }, { "epoch": 0.7650303548240308, "grad_norm": 1.2938358783721924, "learning_rate": 2.602867777980239e-06, "loss": 0.4979, "step": 34465 }, { "epoch": 0.7651413413835585, "grad_norm": 1.9508321285247803, "learning_rate": 2.6005218929657816e-06, "loss": 0.5095, "step": 34470 }, { "epoch": 0.7652523279430861, "grad_norm": 1.2797309160232544, "learning_rate": 2.598176907555058e-06, "loss": 0.3765, "step": 34475 }, { "epoch": 0.7653633145026137, "grad_norm": 1.148760437965393, "learning_rate": 2.5958328220331597e-06, "loss": 0.3695, "step": 34480 }, { "epoch": 0.7654743010621414, "grad_norm": 0.8093987703323364, "learning_rate": 2.593489636685076e-06, "loss": 0.3263, "step": 34485 }, { "epoch": 0.765585287621669, "grad_norm": 0.899083137512207, "learning_rate": 2.5911473517956854e-06, "loss": 0.4253, "step": 34490 }, { "epoch": 0.7656962741811967, "grad_norm": 1.1059026718139648, "learning_rate": 2.588805967649749e-06, "loss": 0.3879, "step": 34495 }, { "epoch": 0.7658072607407242, "grad_norm": 1.5844160318374634, "learning_rate": 2.58646548453193e-06, "loss": 0.4274, "step": 34500 }, { "epoch": 0.7659182473002519, "grad_norm": 1.7420692443847656, "learning_rate": 2.584125902726773e-06, "loss": 0.3555, "step": 34505 }, { "epoch": 0.7660292338597796, "grad_norm": 1.1865463256835938, "learning_rate": 2.58178722251872e-06, "loss": 0.4135, "step": 34510 }, { "epoch": 0.7661402204193072, "grad_norm": 1.4520313739776611, "learning_rate": 2.579449444192095e-06, "loss": 0.5398, "step": 34515 }, { "epoch": 0.7662512069788349, "grad_norm": 1.1610015630722046, "learning_rate": 2.5771125680311227e-06, "loss": 0.4224, "step": 34520 }, { "epoch": 0.7663621935383625, "grad_norm": 1.9998167753219604, "learning_rate": 2.5747765943199065e-06, "loss": 0.3173, "step": 34525 }, { "epoch": 0.7664731800978901, "grad_norm": 1.0180845260620117, "learning_rate": 2.572441523342454e-06, "loss": 0.3311, "step": 34530 }, { "epoch": 0.7665841666574178, "grad_norm": 1.30564546585083, "learning_rate": 2.5701073553826474e-06, "loss": 0.408, "step": 34535 }, { "epoch": 0.7666951532169455, "grad_norm": 1.6283055543899536, "learning_rate": 2.5677740907242733e-06, "loss": 0.4124, "step": 34540 }, { "epoch": 0.766806139776473, "grad_norm": 1.1568033695220947, "learning_rate": 2.565441729650997e-06, "loss": 0.2596, "step": 34545 }, { "epoch": 0.7669171263360007, "grad_norm": 0.9474804401397705, "learning_rate": 2.5631102724463843e-06, "loss": 0.407, "step": 34550 }, { "epoch": 0.7670281128955283, "grad_norm": 1.356563687324524, "learning_rate": 2.56077971939388e-06, "loss": 0.4522, "step": 34555 }, { "epoch": 0.767139099455056, "grad_norm": 1.5708154439926147, "learning_rate": 2.5584500707768314e-06, "loss": 0.4666, "step": 34560 }, { "epoch": 0.7672500860145837, "grad_norm": 0.9258326292037964, "learning_rate": 2.5561213268784634e-06, "loss": 0.3345, "step": 34565 }, { "epoch": 0.7673610725741112, "grad_norm": 1.1713894605636597, "learning_rate": 2.5537934879818994e-06, "loss": 0.3471, "step": 34570 }, { "epoch": 0.7674720591336389, "grad_norm": 2.1963934898376465, "learning_rate": 2.5514665543701535e-06, "loss": 0.4385, "step": 34575 }, { "epoch": 0.7675830456931666, "grad_norm": 1.4954826831817627, "learning_rate": 2.5491405263261205e-06, "loss": 0.3973, "step": 34580 }, { "epoch": 0.7676940322526942, "grad_norm": 0.7373457551002502, "learning_rate": 2.546815404132598e-06, "loss": 0.2641, "step": 34585 }, { "epoch": 0.7678050188122219, "grad_norm": 0.7391909956932068, "learning_rate": 2.544491188072258e-06, "loss": 0.2995, "step": 34590 }, { "epoch": 0.7679160053717495, "grad_norm": 1.2540416717529297, "learning_rate": 2.5421678784276772e-06, "loss": 0.3834, "step": 34595 }, { "epoch": 0.7680269919312771, "grad_norm": 1.5087982416152954, "learning_rate": 2.539845475481316e-06, "loss": 0.4047, "step": 34600 }, { "epoch": 0.7681379784908048, "grad_norm": 0.9404889345169067, "learning_rate": 2.537523979515519e-06, "loss": 0.4289, "step": 34605 }, { "epoch": 0.7682489650503324, "grad_norm": 1.318764090538025, "learning_rate": 2.535203390812534e-06, "loss": 0.3573, "step": 34610 }, { "epoch": 0.76835995160986, "grad_norm": 0.981238603591919, "learning_rate": 2.532883709654481e-06, "loss": 0.2748, "step": 34615 }, { "epoch": 0.7684709381693877, "grad_norm": 1.5967328548431396, "learning_rate": 2.5305649363233885e-06, "loss": 0.3664, "step": 34620 }, { "epoch": 0.7685819247289153, "grad_norm": 1.0770137310028076, "learning_rate": 2.5282470711011564e-06, "loss": 0.3657, "step": 34625 }, { "epoch": 0.768692911288443, "grad_norm": 1.1292991638183594, "learning_rate": 2.525930114269587e-06, "loss": 0.4482, "step": 34630 }, { "epoch": 0.7688038978479707, "grad_norm": 0.9217401742935181, "learning_rate": 2.523614066110371e-06, "loss": 0.5845, "step": 34635 }, { "epoch": 0.7689148844074982, "grad_norm": 1.4295508861541748, "learning_rate": 2.5212989269050814e-06, "loss": 0.467, "step": 34640 }, { "epoch": 0.7690258709670259, "grad_norm": 1.5857596397399902, "learning_rate": 2.5189846969351882e-06, "loss": 0.3764, "step": 34645 }, { "epoch": 0.7691368575265536, "grad_norm": 0.6380545496940613, "learning_rate": 2.516671376482045e-06, "loss": 0.4617, "step": 34650 }, { "epoch": 0.7692478440860812, "grad_norm": 1.4921594858169556, "learning_rate": 2.5143589658268974e-06, "loss": 0.3563, "step": 34655 }, { "epoch": 0.7693588306456088, "grad_norm": 0.9895206689834595, "learning_rate": 2.5120474652508843e-06, "loss": 0.3998, "step": 34660 }, { "epoch": 0.7694698172051364, "grad_norm": 0.9758628010749817, "learning_rate": 2.509736875035026e-06, "loss": 0.4492, "step": 34665 }, { "epoch": 0.7695808037646641, "grad_norm": 1.2848100662231445, "learning_rate": 2.5074271954602404e-06, "loss": 0.4602, "step": 34670 }, { "epoch": 0.7696917903241918, "grad_norm": 1.179426670074463, "learning_rate": 2.5051184268073246e-06, "loss": 0.4438, "step": 34675 }, { "epoch": 0.7698027768837193, "grad_norm": 1.0144343376159668, "learning_rate": 2.502810569356976e-06, "loss": 0.2947, "step": 34680 }, { "epoch": 0.769913763443247, "grad_norm": 2.486269235610962, "learning_rate": 2.5005036233897763e-06, "loss": 0.3491, "step": 34685 }, { "epoch": 0.7700247500027747, "grad_norm": 1.8819888830184937, "learning_rate": 2.498197589186193e-06, "loss": 0.416, "step": 34690 }, { "epoch": 0.7701357365623023, "grad_norm": 1.1190801858901978, "learning_rate": 2.4958924670265905e-06, "loss": 0.5151, "step": 34695 }, { "epoch": 0.77024672312183, "grad_norm": 0.8236473798751831, "learning_rate": 2.4935882571912107e-06, "loss": 0.3966, "step": 34700 }, { "epoch": 0.7703577096813576, "grad_norm": 1.3798598051071167, "learning_rate": 2.4912849599602007e-06, "loss": 0.3029, "step": 34705 }, { "epoch": 0.7704686962408852, "grad_norm": 1.6030954122543335, "learning_rate": 2.4889825756135786e-06, "loss": 0.515, "step": 34710 }, { "epoch": 0.7705796828004129, "grad_norm": 0.8956512808799744, "learning_rate": 2.4866811044312667e-06, "loss": 0.4533, "step": 34715 }, { "epoch": 0.7706906693599405, "grad_norm": 1.3995407819747925, "learning_rate": 2.4843805466930706e-06, "loss": 0.4383, "step": 34720 }, { "epoch": 0.7708016559194681, "grad_norm": 0.7382140159606934, "learning_rate": 2.4820809026786787e-06, "loss": 0.3899, "step": 34725 }, { "epoch": 0.7709126424789958, "grad_norm": 1.231865406036377, "learning_rate": 2.4797821726676806e-06, "loss": 0.2791, "step": 34730 }, { "epoch": 0.7710236290385234, "grad_norm": 1.6912078857421875, "learning_rate": 2.4774843569395425e-06, "loss": 0.3465, "step": 34735 }, { "epoch": 0.7711346155980511, "grad_norm": 1.707521677017212, "learning_rate": 2.4751874557736278e-06, "loss": 0.6154, "step": 34740 }, { "epoch": 0.7712456021575788, "grad_norm": 0.8998667001724243, "learning_rate": 2.472891469449188e-06, "loss": 0.3077, "step": 34745 }, { "epoch": 0.7713565887171063, "grad_norm": 1.4956085681915283, "learning_rate": 2.4705963982453575e-06, "loss": 0.424, "step": 34750 }, { "epoch": 0.771467575276634, "grad_norm": 1.070346474647522, "learning_rate": 2.4683022424411674e-06, "loss": 0.412, "step": 34755 }, { "epoch": 0.7715785618361617, "grad_norm": 0.7793159484863281, "learning_rate": 2.466009002315529e-06, "loss": 0.4079, "step": 34760 }, { "epoch": 0.7716895483956893, "grad_norm": 0.7783902287483215, "learning_rate": 2.463716678147251e-06, "loss": 0.4412, "step": 34765 }, { "epoch": 0.7718005349552169, "grad_norm": 1.293110966682434, "learning_rate": 2.4614252702150234e-06, "loss": 0.5132, "step": 34770 }, { "epoch": 0.7719115215147445, "grad_norm": 0.8567259311676025, "learning_rate": 2.4591347787974307e-06, "loss": 0.3509, "step": 34775 }, { "epoch": 0.7720225080742722, "grad_norm": 1.187793254852295, "learning_rate": 2.4568452041729383e-06, "loss": 0.436, "step": 34780 }, { "epoch": 0.7721334946337999, "grad_norm": 1.8018677234649658, "learning_rate": 2.4545565466199115e-06, "loss": 0.4194, "step": 34785 }, { "epoch": 0.7722444811933274, "grad_norm": 2.446875810623169, "learning_rate": 2.4522688064165923e-06, "loss": 0.2922, "step": 34790 }, { "epoch": 0.7723554677528551, "grad_norm": 1.2683809995651245, "learning_rate": 2.44998198384112e-06, "loss": 0.3612, "step": 34795 }, { "epoch": 0.7724664543123828, "grad_norm": 1.1614607572555542, "learning_rate": 2.4476960791715154e-06, "loss": 0.3466, "step": 34800 }, { "epoch": 0.7725774408719104, "grad_norm": 1.0021891593933105, "learning_rate": 2.4454110926856955e-06, "loss": 0.4664, "step": 34805 }, { "epoch": 0.7726884274314381, "grad_norm": 1.3776967525482178, "learning_rate": 2.443127024661456e-06, "loss": 0.5079, "step": 34810 }, { "epoch": 0.7727994139909657, "grad_norm": 0.9636569619178772, "learning_rate": 2.4408438753764918e-06, "loss": 0.3864, "step": 34815 }, { "epoch": 0.7729104005504933, "grad_norm": 0.6618173122406006, "learning_rate": 2.438561645108375e-06, "loss": 0.4138, "step": 34820 }, { "epoch": 0.773021387110021, "grad_norm": 1.2725285291671753, "learning_rate": 2.4362803341345744e-06, "loss": 0.4558, "step": 34825 }, { "epoch": 0.7731323736695486, "grad_norm": 1.3965803384780884, "learning_rate": 2.4339999427324467e-06, "loss": 0.4042, "step": 34830 }, { "epoch": 0.7732433602290762, "grad_norm": 1.222990870475769, "learning_rate": 2.4317204711792286e-06, "loss": 0.3529, "step": 34835 }, { "epoch": 0.7733543467886039, "grad_norm": 1.7446800470352173, "learning_rate": 2.429441919752057e-06, "loss": 0.4438, "step": 34840 }, { "epoch": 0.7734653333481315, "grad_norm": 0.9652834534645081, "learning_rate": 2.4271642887279434e-06, "loss": 0.2622, "step": 34845 }, { "epoch": 0.7735763199076592, "grad_norm": 0.9489607810974121, "learning_rate": 2.424887578383799e-06, "loss": 0.4876, "step": 34850 }, { "epoch": 0.7736873064671869, "grad_norm": 1.6105998754501343, "learning_rate": 2.4226117889964206e-06, "loss": 0.3108, "step": 34855 }, { "epoch": 0.7737982930267144, "grad_norm": 1.1085927486419678, "learning_rate": 2.4203369208424853e-06, "loss": 0.3468, "step": 34860 }, { "epoch": 0.7739092795862421, "grad_norm": 1.0164639949798584, "learning_rate": 2.4180629741985707e-06, "loss": 0.4253, "step": 34865 }, { "epoch": 0.7740202661457698, "grad_norm": 1.6611783504486084, "learning_rate": 2.4157899493411274e-06, "loss": 0.3143, "step": 34870 }, { "epoch": 0.7741312527052974, "grad_norm": 1.1299711465835571, "learning_rate": 2.4135178465465103e-06, "loss": 0.4837, "step": 34875 }, { "epoch": 0.774242239264825, "grad_norm": 0.9263424873352051, "learning_rate": 2.411246666090947e-06, "loss": 0.4062, "step": 34880 }, { "epoch": 0.7743532258243526, "grad_norm": 1.4223159551620483, "learning_rate": 2.408976408250564e-06, "loss": 0.4009, "step": 34885 }, { "epoch": 0.7744642123838803, "grad_norm": 0.8300201892852783, "learning_rate": 2.4067070733013742e-06, "loss": 0.4451, "step": 34890 }, { "epoch": 0.774575198943408, "grad_norm": 1.0428576469421387, "learning_rate": 2.4044386615192682e-06, "loss": 0.3676, "step": 34895 }, { "epoch": 0.7746861855029356, "grad_norm": 0.931232750415802, "learning_rate": 2.4021711731800402e-06, "loss": 0.4564, "step": 34900 }, { "epoch": 0.7747971720624632, "grad_norm": 1.0562961101531982, "learning_rate": 2.3999046085593567e-06, "loss": 0.3875, "step": 34905 }, { "epoch": 0.7749081586219909, "grad_norm": 1.2111852169036865, "learning_rate": 2.397638967932783e-06, "loss": 0.3402, "step": 34910 }, { "epoch": 0.7750191451815185, "grad_norm": 1.5517101287841797, "learning_rate": 2.3953742515757684e-06, "loss": 0.367, "step": 34915 }, { "epoch": 0.7751301317410462, "grad_norm": 0.7939963340759277, "learning_rate": 2.3931104597636467e-06, "loss": 0.3447, "step": 34920 }, { "epoch": 0.7752411183005739, "grad_norm": 1.2602977752685547, "learning_rate": 2.3908475927716456e-06, "loss": 0.3851, "step": 34925 }, { "epoch": 0.7753521048601014, "grad_norm": 0.8524995446205139, "learning_rate": 2.388585650874873e-06, "loss": 0.3435, "step": 34930 }, { "epoch": 0.7754630914196291, "grad_norm": 1.17127525806427, "learning_rate": 2.3863246343483306e-06, "loss": 0.4395, "step": 34935 }, { "epoch": 0.7755740779791567, "grad_norm": 0.8931377530097961, "learning_rate": 2.384064543466906e-06, "loss": 0.3894, "step": 34940 }, { "epoch": 0.7756850645386844, "grad_norm": 1.257630467414856, "learning_rate": 2.3818053785053717e-06, "loss": 0.3443, "step": 34945 }, { "epoch": 0.775796051098212, "grad_norm": 1.4244590997695923, "learning_rate": 2.379547139738392e-06, "loss": 0.4465, "step": 34950 }, { "epoch": 0.7759070376577396, "grad_norm": 1.145723581314087, "learning_rate": 2.377289827440511e-06, "loss": 0.4198, "step": 34955 }, { "epoch": 0.7760180242172673, "grad_norm": 0.8434739112854004, "learning_rate": 2.3750334418861707e-06, "loss": 0.3091, "step": 34960 }, { "epoch": 0.776129010776795, "grad_norm": 1.065153956413269, "learning_rate": 2.37277798334969e-06, "loss": 0.4492, "step": 34965 }, { "epoch": 0.7762399973363225, "grad_norm": 0.8990171551704407, "learning_rate": 2.3705234521052823e-06, "loss": 0.2161, "step": 34970 }, { "epoch": 0.7763509838958502, "grad_norm": 1.2386252880096436, "learning_rate": 2.3682698484270496e-06, "loss": 0.3579, "step": 34975 }, { "epoch": 0.7764619704553779, "grad_norm": 1.7970882654190063, "learning_rate": 2.3660171725889703e-06, "loss": 0.4856, "step": 34980 }, { "epoch": 0.7765729570149055, "grad_norm": 1.218748927116394, "learning_rate": 2.363765424864923e-06, "loss": 0.4453, "step": 34985 }, { "epoch": 0.7766839435744332, "grad_norm": 0.9332694411277771, "learning_rate": 2.361514605528663e-06, "loss": 0.4588, "step": 34990 }, { "epoch": 0.7767949301339607, "grad_norm": 1.112435221672058, "learning_rate": 2.3592647148538407e-06, "loss": 0.4827, "step": 34995 }, { "epoch": 0.7769059166934884, "grad_norm": 0.8251892924308777, "learning_rate": 2.3570157531139915e-06, "loss": 0.3976, "step": 35000 }, { "epoch": 0.7770169032530161, "grad_norm": 1.2258474826812744, "learning_rate": 2.3547677205825313e-06, "loss": 0.4968, "step": 35005 }, { "epoch": 0.7771278898125437, "grad_norm": 1.9798097610473633, "learning_rate": 2.352520617532774e-06, "loss": 0.4555, "step": 35010 }, { "epoch": 0.7772388763720713, "grad_norm": 2.0439610481262207, "learning_rate": 2.350274444237911e-06, "loss": 0.2519, "step": 35015 }, { "epoch": 0.777349862931599, "grad_norm": 1.3134821653366089, "learning_rate": 2.3480292009710282e-06, "loss": 0.4432, "step": 35020 }, { "epoch": 0.7774608494911266, "grad_norm": 0.9833825826644897, "learning_rate": 2.345784888005088e-06, "loss": 0.4134, "step": 35025 }, { "epoch": 0.7775718360506543, "grad_norm": 1.082362174987793, "learning_rate": 2.3435415056129564e-06, "loss": 0.4299, "step": 35030 }, { "epoch": 0.777682822610182, "grad_norm": 1.2098311185836792, "learning_rate": 2.3412990540673663e-06, "loss": 0.3032, "step": 35035 }, { "epoch": 0.7777938091697095, "grad_norm": 1.1878077983856201, "learning_rate": 2.3390575336409547e-06, "loss": 0.2941, "step": 35040 }, { "epoch": 0.7779047957292372, "grad_norm": 0.8910760879516602, "learning_rate": 2.3368169446062328e-06, "loss": 0.4637, "step": 35045 }, { "epoch": 0.7780157822887648, "grad_norm": 1.0687915086746216, "learning_rate": 2.334577287235609e-06, "loss": 0.4609, "step": 35050 }, { "epoch": 0.7781267688482925, "grad_norm": 1.5093152523040771, "learning_rate": 2.3323385618013682e-06, "loss": 0.4844, "step": 35055 }, { "epoch": 0.7782377554078201, "grad_norm": 1.6555454730987549, "learning_rate": 2.3301007685756925e-06, "loss": 0.4811, "step": 35060 }, { "epoch": 0.7783487419673477, "grad_norm": 0.8645807504653931, "learning_rate": 2.3278639078306397e-06, "loss": 0.2037, "step": 35065 }, { "epoch": 0.7784597285268754, "grad_norm": 1.1185818910598755, "learning_rate": 2.3256279798381664e-06, "loss": 0.3339, "step": 35070 }, { "epoch": 0.7785707150864031, "grad_norm": 1.836459994316101, "learning_rate": 2.323392984870101e-06, "loss": 0.5161, "step": 35075 }, { "epoch": 0.7786817016459306, "grad_norm": 0.5720643997192383, "learning_rate": 2.3211589231981723e-06, "loss": 0.3593, "step": 35080 }, { "epoch": 0.7787926882054583, "grad_norm": 1.4042539596557617, "learning_rate": 2.3189257950939915e-06, "loss": 0.4549, "step": 35085 }, { "epoch": 0.778903674764986, "grad_norm": 1.7690019607543945, "learning_rate": 2.3166936008290486e-06, "loss": 0.3279, "step": 35090 }, { "epoch": 0.7790146613245136, "grad_norm": 3.0548744201660156, "learning_rate": 2.3144623406747335e-06, "loss": 0.3572, "step": 35095 }, { "epoch": 0.7791256478840413, "grad_norm": 1.4523555040359497, "learning_rate": 2.312232014902309e-06, "loss": 0.5051, "step": 35100 }, { "epoch": 0.7792366344435688, "grad_norm": 1.3138129711151123, "learning_rate": 2.310002623782933e-06, "loss": 0.5978, "step": 35105 }, { "epoch": 0.7793476210030965, "grad_norm": 1.1594641208648682, "learning_rate": 2.307774167587651e-06, "loss": 0.3501, "step": 35110 }, { "epoch": 0.7794586075626242, "grad_norm": 1.2157979011535645, "learning_rate": 2.3055466465873845e-06, "loss": 0.4071, "step": 35115 }, { "epoch": 0.7795695941221518, "grad_norm": 0.8386463522911072, "learning_rate": 2.303320061052955e-06, "loss": 0.4692, "step": 35120 }, { "epoch": 0.7796805806816794, "grad_norm": 1.299099087715149, "learning_rate": 2.301094411255057e-06, "loss": 0.3723, "step": 35125 }, { "epoch": 0.7797915672412071, "grad_norm": 1.3308439254760742, "learning_rate": 2.2988696974642797e-06, "loss": 0.4427, "step": 35130 }, { "epoch": 0.7799025538007347, "grad_norm": 0.8495783805847168, "learning_rate": 2.2966459199511002e-06, "loss": 0.3364, "step": 35135 }, { "epoch": 0.7800135403602624, "grad_norm": 1.6486972570419312, "learning_rate": 2.2944230789858723e-06, "loss": 0.3838, "step": 35140 }, { "epoch": 0.7801245269197901, "grad_norm": 0.9651938080787659, "learning_rate": 2.292201174838846e-06, "loss": 0.3624, "step": 35145 }, { "epoch": 0.7802355134793176, "grad_norm": 0.7932876944541931, "learning_rate": 2.2899802077801482e-06, "loss": 0.3292, "step": 35150 }, { "epoch": 0.7803465000388453, "grad_norm": 1.7208325862884521, "learning_rate": 2.2877601780798033e-06, "loss": 0.3253, "step": 35155 }, { "epoch": 0.7804574865983729, "grad_norm": 1.4360439777374268, "learning_rate": 2.2855410860077065e-06, "loss": 0.3437, "step": 35160 }, { "epoch": 0.7805684731579006, "grad_norm": 1.357067346572876, "learning_rate": 2.2833229318336537e-06, "loss": 0.3154, "step": 35165 }, { "epoch": 0.7806794597174282, "grad_norm": 1.123355507850647, "learning_rate": 2.281105715827321e-06, "loss": 0.4595, "step": 35170 }, { "epoch": 0.7807904462769558, "grad_norm": 3.049179792404175, "learning_rate": 2.278889438258266e-06, "loss": 0.4447, "step": 35175 }, { "epoch": 0.7809014328364835, "grad_norm": 0.8946663737297058, "learning_rate": 2.2766740993959404e-06, "loss": 0.3773, "step": 35180 }, { "epoch": 0.7810124193960112, "grad_norm": 1.7017005681991577, "learning_rate": 2.2744596995096733e-06, "loss": 0.4379, "step": 35185 }, { "epoch": 0.7811234059555388, "grad_norm": 1.0567814111709595, "learning_rate": 2.272246238868687e-06, "loss": 0.485, "step": 35190 }, { "epoch": 0.7812343925150664, "grad_norm": 1.122822642326355, "learning_rate": 2.2700337177420895e-06, "loss": 0.6283, "step": 35195 }, { "epoch": 0.7813453790745941, "grad_norm": 1.1927663087844849, "learning_rate": 2.267822136398864e-06, "loss": 0.4545, "step": 35200 }, { "epoch": 0.7814563656341217, "grad_norm": 1.7498384714126587, "learning_rate": 2.2656114951078957e-06, "loss": 0.2906, "step": 35205 }, { "epoch": 0.7815673521936494, "grad_norm": 1.5318573713302612, "learning_rate": 2.263401794137938e-06, "loss": 0.5141, "step": 35210 }, { "epoch": 0.7816783387531769, "grad_norm": 0.884480357170105, "learning_rate": 2.261193033757645e-06, "loss": 0.5263, "step": 35215 }, { "epoch": 0.7817893253127046, "grad_norm": 1.082189679145813, "learning_rate": 2.2589852142355516e-06, "loss": 0.4145, "step": 35220 }, { "epoch": 0.7819003118722323, "grad_norm": 1.2107759714126587, "learning_rate": 2.256778335840072e-06, "loss": 0.4267, "step": 35225 }, { "epoch": 0.7820112984317599, "grad_norm": 0.9035611152648926, "learning_rate": 2.2545723988395164e-06, "loss": 0.3775, "step": 35230 }, { "epoch": 0.7821222849912876, "grad_norm": 0.9946116209030151, "learning_rate": 2.2523674035020693e-06, "loss": 0.3925, "step": 35235 }, { "epoch": 0.7822332715508152, "grad_norm": 0.7790983319282532, "learning_rate": 2.250163350095812e-06, "loss": 0.3855, "step": 35240 }, { "epoch": 0.7823442581103428, "grad_norm": 1.2134404182434082, "learning_rate": 2.2479602388887013e-06, "loss": 0.2843, "step": 35245 }, { "epoch": 0.7824552446698705, "grad_norm": 1.49185049533844, "learning_rate": 2.245758070148587e-06, "loss": 0.3673, "step": 35250 }, { "epoch": 0.7825662312293982, "grad_norm": 0.7419693470001221, "learning_rate": 2.2435568441432034e-06, "loss": 0.3048, "step": 35255 }, { "epoch": 0.7826772177889257, "grad_norm": 1.4088214635849, "learning_rate": 2.241356561140162e-06, "loss": 0.3416, "step": 35260 }, { "epoch": 0.7827882043484534, "grad_norm": 1.0905264616012573, "learning_rate": 2.2391572214069725e-06, "loss": 0.292, "step": 35265 }, { "epoch": 0.782899190907981, "grad_norm": 1.7834142446517944, "learning_rate": 2.2369588252110175e-06, "loss": 0.3821, "step": 35270 }, { "epoch": 0.7830101774675087, "grad_norm": 0.674659788608551, "learning_rate": 2.234761372819577e-06, "loss": 0.2597, "step": 35275 }, { "epoch": 0.7831211640270364, "grad_norm": 1.2916734218597412, "learning_rate": 2.232564864499802e-06, "loss": 0.4008, "step": 35280 }, { "epoch": 0.7832321505865639, "grad_norm": 0.8347023725509644, "learning_rate": 2.2303693005187445e-06, "loss": 0.3105, "step": 35285 }, { "epoch": 0.7833431371460916, "grad_norm": 0.6374974846839905, "learning_rate": 2.228174681143327e-06, "loss": 0.3454, "step": 35290 }, { "epoch": 0.7834541237056193, "grad_norm": 0.8511948585510254, "learning_rate": 2.22598100664037e-06, "loss": 0.2354, "step": 35295 }, { "epoch": 0.7835651102651469, "grad_norm": 1.0302071571350098, "learning_rate": 2.223788277276567e-06, "loss": 0.3177, "step": 35300 }, { "epoch": 0.7836760968246745, "grad_norm": 1.5168780088424683, "learning_rate": 2.2215964933185097e-06, "loss": 0.4536, "step": 35305 }, { "epoch": 0.7837870833842022, "grad_norm": 0.7155570387840271, "learning_rate": 2.2194056550326605e-06, "loss": 0.505, "step": 35310 }, { "epoch": 0.7838980699437298, "grad_norm": 1.4938472509384155, "learning_rate": 2.217215762685381e-06, "loss": 0.4783, "step": 35315 }, { "epoch": 0.7840090565032575, "grad_norm": 1.1102867126464844, "learning_rate": 2.2150268165429035e-06, "loss": 0.1669, "step": 35320 }, { "epoch": 0.784120043062785, "grad_norm": 0.8757681250572205, "learning_rate": 2.212838816871361e-06, "loss": 0.3824, "step": 35325 }, { "epoch": 0.7842310296223127, "grad_norm": 1.2006827592849731, "learning_rate": 2.2106517639367552e-06, "loss": 0.5301, "step": 35330 }, { "epoch": 0.7843420161818404, "grad_norm": 1.375402808189392, "learning_rate": 2.208465658004986e-06, "loss": 0.4124, "step": 35335 }, { "epoch": 0.784453002741368, "grad_norm": 1.2572929859161377, "learning_rate": 2.206280499341833e-06, "loss": 0.4598, "step": 35340 }, { "epoch": 0.7845639893008957, "grad_norm": 0.8071463108062744, "learning_rate": 2.204096288212956e-06, "loss": 0.2912, "step": 35345 }, { "epoch": 0.7846749758604233, "grad_norm": 0.8503620028495789, "learning_rate": 2.2019130248839092e-06, "loss": 0.4762, "step": 35350 }, { "epoch": 0.7847859624199509, "grad_norm": 0.8228245377540588, "learning_rate": 2.1997307096201228e-06, "loss": 0.2941, "step": 35355 }, { "epoch": 0.7848969489794786, "grad_norm": 1.9255098104476929, "learning_rate": 2.1975493426869155e-06, "loss": 0.4365, "step": 35360 }, { "epoch": 0.7850079355390063, "grad_norm": 1.1783138513565063, "learning_rate": 2.195368924349495e-06, "loss": 0.5566, "step": 35365 }, { "epoch": 0.7851189220985338, "grad_norm": 1.8189682960510254, "learning_rate": 2.1931894548729425e-06, "loss": 0.3299, "step": 35370 }, { "epoch": 0.7852299086580615, "grad_norm": 1.408636212348938, "learning_rate": 2.1910109345222377e-06, "loss": 0.3573, "step": 35375 }, { "epoch": 0.7853408952175891, "grad_norm": 1.0059270858764648, "learning_rate": 2.1888333635622305e-06, "loss": 0.3375, "step": 35380 }, { "epoch": 0.7854518817771168, "grad_norm": 1.2585111856460571, "learning_rate": 2.1866567422576667e-06, "loss": 0.4384, "step": 35385 }, { "epoch": 0.7855628683366445, "grad_norm": 1.1098383665084839, "learning_rate": 2.1844810708731755e-06, "loss": 0.4599, "step": 35390 }, { "epoch": 0.785673854896172, "grad_norm": 1.0346242189407349, "learning_rate": 2.182306349673261e-06, "loss": 0.4093, "step": 35395 }, { "epoch": 0.7857848414556997, "grad_norm": 0.9923444390296936, "learning_rate": 2.180132578922326e-06, "loss": 0.3588, "step": 35400 }, { "epoch": 0.7858958280152274, "grad_norm": 0.7084795236587524, "learning_rate": 2.1779597588846426e-06, "loss": 0.2708, "step": 35405 }, { "epoch": 0.786006814574755, "grad_norm": 0.9460107088088989, "learning_rate": 2.175787889824381e-06, "loss": 0.434, "step": 35410 }, { "epoch": 0.7861178011342826, "grad_norm": 1.1864134073257446, "learning_rate": 2.1736169720055853e-06, "loss": 0.3858, "step": 35415 }, { "epoch": 0.7862287876938103, "grad_norm": 1.140289068222046, "learning_rate": 2.17144700569219e-06, "loss": 0.3212, "step": 35420 }, { "epoch": 0.7863397742533379, "grad_norm": 1.0600495338439941, "learning_rate": 2.1692779911480156e-06, "loss": 0.3844, "step": 35425 }, { "epoch": 0.7864507608128656, "grad_norm": 2.1447246074676514, "learning_rate": 2.167109928636759e-06, "loss": 0.4349, "step": 35430 }, { "epoch": 0.7865617473723931, "grad_norm": 0.8802706003189087, "learning_rate": 2.16494281842201e-06, "loss": 0.3403, "step": 35435 }, { "epoch": 0.7866727339319208, "grad_norm": 0.8940117359161377, "learning_rate": 2.162776660767233e-06, "loss": 0.5823, "step": 35440 }, { "epoch": 0.7867837204914485, "grad_norm": 1.7119009494781494, "learning_rate": 2.160611455935786e-06, "loss": 0.4244, "step": 35445 }, { "epoch": 0.7868947070509761, "grad_norm": 1.043269157409668, "learning_rate": 2.15844720419091e-06, "loss": 0.3863, "step": 35450 }, { "epoch": 0.7870056936105038, "grad_norm": 1.3950551748275757, "learning_rate": 2.1562839057957218e-06, "loss": 0.4137, "step": 35455 }, { "epoch": 0.7871166801700314, "grad_norm": 1.2941358089447021, "learning_rate": 2.154121561013233e-06, "loss": 0.4987, "step": 35460 }, { "epoch": 0.787227666729559, "grad_norm": 0.9239951372146606, "learning_rate": 2.1519601701063285e-06, "loss": 0.4469, "step": 35465 }, { "epoch": 0.7873386532890867, "grad_norm": 1.5648642778396606, "learning_rate": 2.149799733337786e-06, "loss": 0.3853, "step": 35470 }, { "epoch": 0.7874496398486144, "grad_norm": 1.719732403755188, "learning_rate": 2.1476402509702687e-06, "loss": 0.3693, "step": 35475 }, { "epoch": 0.787560626408142, "grad_norm": 1.4296544790267944, "learning_rate": 2.1454817232663117e-06, "loss": 0.3076, "step": 35480 }, { "epoch": 0.7876716129676696, "grad_norm": 1.3797460794448853, "learning_rate": 2.1433241504883463e-06, "loss": 0.477, "step": 35485 }, { "epoch": 0.7877825995271972, "grad_norm": 1.2195448875427246, "learning_rate": 2.1411675328986802e-06, "loss": 0.3835, "step": 35490 }, { "epoch": 0.7878935860867249, "grad_norm": 1.3560128211975098, "learning_rate": 2.139011870759511e-06, "loss": 0.3778, "step": 35495 }, { "epoch": 0.7880045726462526, "grad_norm": 1.120679259300232, "learning_rate": 2.1368571643329118e-06, "loss": 0.4081, "step": 35500 }, { "epoch": 0.7881155592057801, "grad_norm": 1.4044411182403564, "learning_rate": 2.134703413880851e-06, "loss": 0.5043, "step": 35505 }, { "epoch": 0.7882265457653078, "grad_norm": 1.3516502380371094, "learning_rate": 2.132550619665168e-06, "loss": 0.5035, "step": 35510 }, { "epoch": 0.7883375323248355, "grad_norm": 1.0625264644622803, "learning_rate": 2.130398781947598e-06, "loss": 0.2935, "step": 35515 }, { "epoch": 0.7884485188843631, "grad_norm": 1.0616987943649292, "learning_rate": 2.128247900989748e-06, "loss": 0.429, "step": 35520 }, { "epoch": 0.7885595054438908, "grad_norm": 1.1464285850524902, "learning_rate": 2.126097977053122e-06, "loss": 0.4682, "step": 35525 }, { "epoch": 0.7886704920034184, "grad_norm": 1.5215123891830444, "learning_rate": 2.1239490103990946e-06, "loss": 0.4505, "step": 35530 }, { "epoch": 0.788781478562946, "grad_norm": 1.3544058799743652, "learning_rate": 2.1218010012889347e-06, "loss": 0.4219, "step": 35535 }, { "epoch": 0.7888924651224737, "grad_norm": 1.2148548364639282, "learning_rate": 2.1196539499837842e-06, "loss": 0.3494, "step": 35540 }, { "epoch": 0.7890034516820013, "grad_norm": 1.4827840328216553, "learning_rate": 2.1175078567446815e-06, "loss": 0.2958, "step": 35545 }, { "epoch": 0.7891144382415289, "grad_norm": 1.3042678833007812, "learning_rate": 2.1153627218325346e-06, "loss": 0.3881, "step": 35550 }, { "epoch": 0.7892254248010566, "grad_norm": 1.068055272102356, "learning_rate": 2.1132185455081446e-06, "loss": 0.5395, "step": 35555 }, { "epoch": 0.7893364113605842, "grad_norm": 1.0008199214935303, "learning_rate": 2.1110753280321973e-06, "loss": 0.3095, "step": 35560 }, { "epoch": 0.7894473979201119, "grad_norm": 1.100245714187622, "learning_rate": 2.1089330696652498e-06, "loss": 0.3576, "step": 35565 }, { "epoch": 0.7895583844796396, "grad_norm": 2.6166369915008545, "learning_rate": 2.10679177066776e-06, "loss": 0.3863, "step": 35570 }, { "epoch": 0.7896693710391671, "grad_norm": 1.4175260066986084, "learning_rate": 2.104651431300051e-06, "loss": 0.2881, "step": 35575 }, { "epoch": 0.7897803575986948, "grad_norm": 1.2780652046203613, "learning_rate": 2.102512051822344e-06, "loss": 0.4031, "step": 35580 }, { "epoch": 0.7898913441582225, "grad_norm": 1.2362151145935059, "learning_rate": 2.1003736324947345e-06, "loss": 0.4989, "step": 35585 }, { "epoch": 0.7900023307177501, "grad_norm": 1.7409553527832031, "learning_rate": 2.098236173577205e-06, "loss": 0.377, "step": 35590 }, { "epoch": 0.7901133172772777, "grad_norm": 0.9475632905960083, "learning_rate": 2.0960996753296236e-06, "loss": 0.4269, "step": 35595 }, { "epoch": 0.7902243038368053, "grad_norm": 0.9686654210090637, "learning_rate": 2.0939641380117326e-06, "loss": 0.5675, "step": 35600 }, { "epoch": 0.790335290396333, "grad_norm": 1.8504366874694824, "learning_rate": 2.0918295618831708e-06, "loss": 0.4839, "step": 35605 }, { "epoch": 0.7904462769558607, "grad_norm": 1.4183422327041626, "learning_rate": 2.089695947203445e-06, "loss": 0.2792, "step": 35610 }, { "epoch": 0.7905572635153882, "grad_norm": 1.3813730478286743, "learning_rate": 2.087563294231958e-06, "loss": 0.2715, "step": 35615 }, { "epoch": 0.7906682500749159, "grad_norm": 0.8090710639953613, "learning_rate": 2.085431603227992e-06, "loss": 0.3649, "step": 35620 }, { "epoch": 0.7907792366344436, "grad_norm": 1.393152117729187, "learning_rate": 2.0833008744507054e-06, "loss": 0.4255, "step": 35625 }, { "epoch": 0.7908902231939712, "grad_norm": 1.1389708518981934, "learning_rate": 2.08117110815915e-06, "loss": 0.354, "step": 35630 }, { "epoch": 0.7910012097534989, "grad_norm": 1.4676787853240967, "learning_rate": 2.079042304612252e-06, "loss": 0.4842, "step": 35635 }, { "epoch": 0.7911121963130265, "grad_norm": 1.428437352180481, "learning_rate": 2.0769144640688256e-06, "loss": 0.2615, "step": 35640 }, { "epoch": 0.7912231828725541, "grad_norm": 1.4401766061782837, "learning_rate": 2.074787586787569e-06, "loss": 0.3501, "step": 35645 }, { "epoch": 0.7913341694320818, "grad_norm": 1.6893134117126465, "learning_rate": 2.0726616730270554e-06, "loss": 0.3072, "step": 35650 }, { "epoch": 0.7914451559916094, "grad_norm": 1.5719894170761108, "learning_rate": 2.070536723045752e-06, "loss": 0.5454, "step": 35655 }, { "epoch": 0.791556142551137, "grad_norm": 0.9744945168495178, "learning_rate": 2.068412737101998e-06, "loss": 0.3632, "step": 35660 }, { "epoch": 0.7916671291106647, "grad_norm": 1.5975704193115234, "learning_rate": 2.0662897154540263e-06, "loss": 0.3699, "step": 35665 }, { "epoch": 0.7917781156701923, "grad_norm": 0.754593551158905, "learning_rate": 2.06416765835994e-06, "loss": 0.2998, "step": 35670 }, { "epoch": 0.79188910222972, "grad_norm": 0.7576720118522644, "learning_rate": 2.0620465660777357e-06, "loss": 0.3634, "step": 35675 }, { "epoch": 0.7920000887892477, "grad_norm": 0.9368253946304321, "learning_rate": 2.0599264388652907e-06, "loss": 0.4255, "step": 35680 }, { "epoch": 0.7921110753487752, "grad_norm": 0.8757548332214355, "learning_rate": 2.057807276980357e-06, "loss": 0.2294, "step": 35685 }, { "epoch": 0.7922220619083029, "grad_norm": 1.1793277263641357, "learning_rate": 2.055689080680582e-06, "loss": 0.3232, "step": 35690 }, { "epoch": 0.7923330484678306, "grad_norm": 1.2925007343292236, "learning_rate": 2.0535718502234823e-06, "loss": 0.4537, "step": 35695 }, { "epoch": 0.7924440350273582, "grad_norm": 1.0207651853561401, "learning_rate": 2.0514555858664663e-06, "loss": 0.4051, "step": 35700 }, { "epoch": 0.7925550215868858, "grad_norm": 1.2424222230911255, "learning_rate": 2.0493402878668266e-06, "loss": 0.3735, "step": 35705 }, { "epoch": 0.7926660081464134, "grad_norm": 1.308300256729126, "learning_rate": 2.0472259564817265e-06, "loss": 0.5448, "step": 35710 }, { "epoch": 0.7927769947059411, "grad_norm": 1.087999701499939, "learning_rate": 2.045112591968227e-06, "loss": 0.3331, "step": 35715 }, { "epoch": 0.7928879812654688, "grad_norm": 1.1336021423339844, "learning_rate": 2.0430001945832557e-06, "loss": 0.4715, "step": 35720 }, { "epoch": 0.7929989678249963, "grad_norm": 1.1399098634719849, "learning_rate": 2.0408887645836363e-06, "loss": 0.2863, "step": 35725 }, { "epoch": 0.793109954384524, "grad_norm": 2.4566142559051514, "learning_rate": 2.03877830222607e-06, "loss": 0.5767, "step": 35730 }, { "epoch": 0.7932209409440517, "grad_norm": 1.3776051998138428, "learning_rate": 2.036668807767136e-06, "loss": 0.4465, "step": 35735 }, { "epoch": 0.7933319275035793, "grad_norm": 0.8232426047325134, "learning_rate": 2.0345602814633035e-06, "loss": 0.2845, "step": 35740 }, { "epoch": 0.793442914063107, "grad_norm": 0.9974232316017151, "learning_rate": 2.0324527235709148e-06, "loss": 0.3041, "step": 35745 }, { "epoch": 0.7935539006226346, "grad_norm": 1.5925170183181763, "learning_rate": 2.0303461343462062e-06, "loss": 0.5265, "step": 35750 }, { "epoch": 0.7936648871821622, "grad_norm": 1.1427022218704224, "learning_rate": 2.028240514045284e-06, "loss": 0.2463, "step": 35755 }, { "epoch": 0.7937758737416899, "grad_norm": 0.8311687111854553, "learning_rate": 2.0261358629241466e-06, "loss": 0.5457, "step": 35760 }, { "epoch": 0.7938868603012175, "grad_norm": 1.539837121963501, "learning_rate": 2.024032181238668e-06, "loss": 0.3874, "step": 35765 }, { "epoch": 0.7939978468607451, "grad_norm": 1.208118200302124, "learning_rate": 2.021929469244608e-06, "loss": 0.3899, "step": 35770 }, { "epoch": 0.7941088334202728, "grad_norm": 0.8833453059196472, "learning_rate": 2.019827727197605e-06, "loss": 0.4849, "step": 35775 }, { "epoch": 0.7942198199798004, "grad_norm": 1.987878680229187, "learning_rate": 2.0177269553531863e-06, "loss": 0.2693, "step": 35780 }, { "epoch": 0.7943308065393281, "grad_norm": 1.4484559297561646, "learning_rate": 2.0156271539667517e-06, "loss": 0.3459, "step": 35785 }, { "epoch": 0.7944417930988558, "grad_norm": 1.3147300481796265, "learning_rate": 2.013528323293592e-06, "loss": 0.2581, "step": 35790 }, { "epoch": 0.7945527796583833, "grad_norm": 1.0331820249557495, "learning_rate": 2.0114304635888717e-06, "loss": 0.2734, "step": 35795 }, { "epoch": 0.794663766217911, "grad_norm": 1.2817115783691406, "learning_rate": 2.009333575107647e-06, "loss": 0.3658, "step": 35800 }, { "epoch": 0.7947747527774387, "grad_norm": 1.2282030582427979, "learning_rate": 2.0072376581048445e-06, "loss": 0.3646, "step": 35805 }, { "epoch": 0.7948857393369663, "grad_norm": 1.2596557140350342, "learning_rate": 2.005142712835283e-06, "loss": 0.4325, "step": 35810 }, { "epoch": 0.794996725896494, "grad_norm": 1.4031026363372803, "learning_rate": 2.0030487395536593e-06, "loss": 0.4099, "step": 35815 }, { "epoch": 0.7951077124560215, "grad_norm": 0.8215189576148987, "learning_rate": 2.0009557385145485e-06, "loss": 0.3786, "step": 35820 }, { "epoch": 0.7952186990155492, "grad_norm": 0.9108057022094727, "learning_rate": 1.998863709972414e-06, "loss": 0.3869, "step": 35825 }, { "epoch": 0.7953296855750769, "grad_norm": 0.8067658543586731, "learning_rate": 1.9967726541815935e-06, "loss": 0.5273, "step": 35830 }, { "epoch": 0.7954406721346045, "grad_norm": 0.9091742038726807, "learning_rate": 1.994682571396316e-06, "loss": 0.4029, "step": 35835 }, { "epoch": 0.7955516586941321, "grad_norm": 1.6972312927246094, "learning_rate": 1.99259346187068e-06, "loss": 0.4327, "step": 35840 }, { "epoch": 0.7956626452536598, "grad_norm": 0.9916061162948608, "learning_rate": 1.990505325858677e-06, "loss": 0.5104, "step": 35845 }, { "epoch": 0.7957736318131874, "grad_norm": 0.9939529299736023, "learning_rate": 1.9884181636141775e-06, "loss": 0.3834, "step": 35850 }, { "epoch": 0.7958846183727151, "grad_norm": 1.5403352975845337, "learning_rate": 1.986331975390926e-06, "loss": 0.3761, "step": 35855 }, { "epoch": 0.7959956049322428, "grad_norm": 1.4493037462234497, "learning_rate": 1.98424676144256e-06, "loss": 0.4491, "step": 35860 }, { "epoch": 0.7961065914917703, "grad_norm": 1.16340172290802, "learning_rate": 1.982162522022587e-06, "loss": 0.3466, "step": 35865 }, { "epoch": 0.796217578051298, "grad_norm": 1.4656342267990112, "learning_rate": 1.980079257384405e-06, "loss": 0.4166, "step": 35870 }, { "epoch": 0.7963285646108257, "grad_norm": 1.0838220119476318, "learning_rate": 1.9779969677812927e-06, "loss": 0.58, "step": 35875 }, { "epoch": 0.7964395511703533, "grad_norm": 1.0586246252059937, "learning_rate": 1.975915653466404e-06, "loss": 0.302, "step": 35880 }, { "epoch": 0.7965505377298809, "grad_norm": 1.5136168003082275, "learning_rate": 1.9738353146927802e-06, "loss": 0.3081, "step": 35885 }, { "epoch": 0.7966615242894085, "grad_norm": 0.7670632004737854, "learning_rate": 1.97175595171334e-06, "loss": 0.3214, "step": 35890 }, { "epoch": 0.7967725108489362, "grad_norm": 0.9023318886756897, "learning_rate": 1.969677564780885e-06, "loss": 0.3999, "step": 35895 }, { "epoch": 0.7968834974084639, "grad_norm": 1.1757646799087524, "learning_rate": 1.9676001541481037e-06, "loss": 0.4676, "step": 35900 }, { "epoch": 0.7969944839679914, "grad_norm": 1.283319115638733, "learning_rate": 1.965523720067555e-06, "loss": 0.3683, "step": 35905 }, { "epoch": 0.7971054705275191, "grad_norm": 1.4499841928482056, "learning_rate": 1.9634482627916883e-06, "loss": 0.3305, "step": 35910 }, { "epoch": 0.7972164570870468, "grad_norm": 0.9750193357467651, "learning_rate": 1.9613737825728276e-06, "loss": 0.4215, "step": 35915 }, { "epoch": 0.7973274436465744, "grad_norm": 1.2049821615219116, "learning_rate": 1.9593002796631856e-06, "loss": 0.4339, "step": 35920 }, { "epoch": 0.7974384302061021, "grad_norm": 0.8911698460578918, "learning_rate": 1.9572277543148453e-06, "loss": 0.4305, "step": 35925 }, { "epoch": 0.7975494167656297, "grad_norm": 1.4148341417312622, "learning_rate": 1.9551562067797824e-06, "loss": 0.3123, "step": 35930 }, { "epoch": 0.7976604033251573, "grad_norm": 2.4391891956329346, "learning_rate": 1.9530856373098496e-06, "loss": 0.3437, "step": 35935 }, { "epoch": 0.797771389884685, "grad_norm": 2.3069112300872803, "learning_rate": 1.951016046156776e-06, "loss": 0.3807, "step": 35940 }, { "epoch": 0.7978823764442126, "grad_norm": 2.131568193435669, "learning_rate": 1.9489474335721793e-06, "loss": 0.3841, "step": 35945 }, { "epoch": 0.7979933630037402, "grad_norm": 0.7713155150413513, "learning_rate": 1.9468797998075494e-06, "loss": 0.325, "step": 35950 }, { "epoch": 0.7981043495632679, "grad_norm": 0.8977245092391968, "learning_rate": 1.944813145114266e-06, "loss": 0.3944, "step": 35955 }, { "epoch": 0.7982153361227955, "grad_norm": 1.6785709857940674, "learning_rate": 1.942747469743589e-06, "loss": 0.4234, "step": 35960 }, { "epoch": 0.7983263226823232, "grad_norm": 1.0968211889266968, "learning_rate": 1.9406827739466482e-06, "loss": 0.4327, "step": 35965 }, { "epoch": 0.7984373092418509, "grad_norm": 1.513850450515747, "learning_rate": 1.9386190579744703e-06, "loss": 0.3457, "step": 35970 }, { "epoch": 0.7985482958013784, "grad_norm": 1.183782935142517, "learning_rate": 1.9365563220779494e-06, "loss": 0.2454, "step": 35975 }, { "epoch": 0.7986592823609061, "grad_norm": 0.9433440566062927, "learning_rate": 1.9344945665078672e-06, "loss": 0.4454, "step": 35980 }, { "epoch": 0.7987702689204338, "grad_norm": 0.9357894062995911, "learning_rate": 1.9324337915148895e-06, "loss": 0.3218, "step": 35985 }, { "epoch": 0.7988812554799614, "grad_norm": 1.7372990846633911, "learning_rate": 1.930373997349553e-06, "loss": 0.3769, "step": 35990 }, { "epoch": 0.798992242039489, "grad_norm": 1.4230307340621948, "learning_rate": 1.928315184262284e-06, "loss": 0.3372, "step": 35995 }, { "epoch": 0.7991032285990166, "grad_norm": 1.1874383687973022, "learning_rate": 1.926257352503381e-06, "loss": 0.4081, "step": 36000 }, { "epoch": 0.7992142151585443, "grad_norm": 0.7755220532417297, "learning_rate": 1.924200502323036e-06, "loss": 0.3184, "step": 36005 }, { "epoch": 0.799325201718072, "grad_norm": 1.1535636186599731, "learning_rate": 1.922144633971307e-06, "loss": 0.4537, "step": 36010 }, { "epoch": 0.7994361882775995, "grad_norm": 0.8753936886787415, "learning_rate": 1.920089747698144e-06, "loss": 0.2761, "step": 36015 }, { "epoch": 0.7995471748371272, "grad_norm": 0.8846555948257446, "learning_rate": 1.9180358437533695e-06, "loss": 0.3525, "step": 36020 }, { "epoch": 0.7996581613966549, "grad_norm": 1.0711405277252197, "learning_rate": 1.9159829223866956e-06, "loss": 0.5587, "step": 36025 }, { "epoch": 0.7997691479561825, "grad_norm": 1.5819116830825806, "learning_rate": 1.913930983847703e-06, "loss": 0.2664, "step": 36030 }, { "epoch": 0.7998801345157102, "grad_norm": 1.0081666707992554, "learning_rate": 1.911880028385866e-06, "loss": 0.4461, "step": 36035 }, { "epoch": 0.7999911210752378, "grad_norm": 1.3341885805130005, "learning_rate": 1.9098300562505266e-06, "loss": 0.4054, "step": 36040 }, { "epoch": 0.8001021076347654, "grad_norm": 1.4828771352767944, "learning_rate": 1.907781067690919e-06, "loss": 0.3104, "step": 36045 }, { "epoch": 0.8002130941942931, "grad_norm": 1.285024881362915, "learning_rate": 1.9057330629561476e-06, "loss": 0.4829, "step": 36050 }, { "epoch": 0.8003240807538207, "grad_norm": 1.3645647764205933, "learning_rate": 1.9036860422952076e-06, "loss": 0.435, "step": 36055 }, { "epoch": 0.8004350673133483, "grad_norm": 0.6595139503479004, "learning_rate": 1.9016400059569629e-06, "loss": 0.2595, "step": 36060 }, { "epoch": 0.800546053872876, "grad_norm": 1.4788298606872559, "learning_rate": 1.899594954190166e-06, "loss": 0.3523, "step": 36065 }, { "epoch": 0.8006570404324036, "grad_norm": 2.774763822555542, "learning_rate": 1.89755088724345e-06, "loss": 0.5137, "step": 36070 }, { "epoch": 0.8007680269919313, "grad_norm": 1.1998732089996338, "learning_rate": 1.895507805365322e-06, "loss": 0.3658, "step": 36075 }, { "epoch": 0.800879013551459, "grad_norm": 1.142486333847046, "learning_rate": 1.8934657088041763e-06, "loss": 0.4606, "step": 36080 }, { "epoch": 0.8009900001109865, "grad_norm": 1.127084732055664, "learning_rate": 1.8914245978082812e-06, "loss": 0.456, "step": 36085 }, { "epoch": 0.8011009866705142, "grad_norm": 1.3502229452133179, "learning_rate": 1.8893844726257914e-06, "loss": 0.4376, "step": 36090 }, { "epoch": 0.8012119732300419, "grad_norm": 1.8028035163879395, "learning_rate": 1.8873453335047342e-06, "loss": 0.4925, "step": 36095 }, { "epoch": 0.8013229597895695, "grad_norm": 1.4846652746200562, "learning_rate": 1.8853071806930235e-06, "loss": 0.4125, "step": 36100 }, { "epoch": 0.8014339463490971, "grad_norm": 0.7727453708648682, "learning_rate": 1.883270014438453e-06, "loss": 0.4342, "step": 36105 }, { "epoch": 0.8015449329086247, "grad_norm": 1.8496626615524292, "learning_rate": 1.8812338349886905e-06, "loss": 0.375, "step": 36110 }, { "epoch": 0.8016559194681524, "grad_norm": 0.7790250778198242, "learning_rate": 1.8791986425912935e-06, "loss": 0.3095, "step": 36115 }, { "epoch": 0.8017669060276801, "grad_norm": 1.1624494791030884, "learning_rate": 1.877164437493687e-06, "loss": 0.3837, "step": 36120 }, { "epoch": 0.8018778925872077, "grad_norm": 1.23572838306427, "learning_rate": 1.875131219943187e-06, "loss": 0.5883, "step": 36125 }, { "epoch": 0.8019888791467353, "grad_norm": 1.4058516025543213, "learning_rate": 1.8730989901869868e-06, "loss": 0.2798, "step": 36130 }, { "epoch": 0.802099865706263, "grad_norm": 1.2532075643539429, "learning_rate": 1.871067748472154e-06, "loss": 0.4864, "step": 36135 }, { "epoch": 0.8022108522657906, "grad_norm": 0.9430113434791565, "learning_rate": 1.8690374950456436e-06, "loss": 0.2935, "step": 36140 }, { "epoch": 0.8023218388253183, "grad_norm": 1.6031843423843384, "learning_rate": 1.8670082301542835e-06, "loss": 0.3808, "step": 36145 }, { "epoch": 0.802432825384846, "grad_norm": 1.3810571432113647, "learning_rate": 1.8649799540447865e-06, "loss": 0.3689, "step": 36150 }, { "epoch": 0.8025438119443735, "grad_norm": 1.1026618480682373, "learning_rate": 1.8629526669637465e-06, "loss": 0.3452, "step": 36155 }, { "epoch": 0.8026547985039012, "grad_norm": 1.6488431692123413, "learning_rate": 1.8609263691576285e-06, "loss": 0.4822, "step": 36160 }, { "epoch": 0.8027657850634288, "grad_norm": 1.0668485164642334, "learning_rate": 1.8589010608727897e-06, "loss": 0.494, "step": 36165 }, { "epoch": 0.8028767716229565, "grad_norm": 0.8742284178733826, "learning_rate": 1.8568767423554545e-06, "loss": 0.2853, "step": 36170 }, { "epoch": 0.8029877581824841, "grad_norm": 1.244224190711975, "learning_rate": 1.854853413851737e-06, "loss": 0.3572, "step": 36175 }, { "epoch": 0.8030987447420117, "grad_norm": 1.4055209159851074, "learning_rate": 1.8528310756076217e-06, "loss": 0.3008, "step": 36180 }, { "epoch": 0.8032097313015394, "grad_norm": 1.29067862033844, "learning_rate": 1.8508097278689819e-06, "loss": 0.35, "step": 36185 }, { "epoch": 0.8033207178610671, "grad_norm": 1.2676900625228882, "learning_rate": 1.8487893708815675e-06, "loss": 0.3111, "step": 36190 }, { "epoch": 0.8034317044205946, "grad_norm": 1.301653504371643, "learning_rate": 1.8467700048910008e-06, "loss": 0.386, "step": 36195 }, { "epoch": 0.8035426909801223, "grad_norm": 1.308471918106079, "learning_rate": 1.844751630142797e-06, "loss": 0.4339, "step": 36200 }, { "epoch": 0.80365367753965, "grad_norm": 1.5464400053024292, "learning_rate": 1.842734246882336e-06, "loss": 0.4505, "step": 36205 }, { "epoch": 0.8037646640991776, "grad_norm": 0.9431395530700684, "learning_rate": 1.8407178553548876e-06, "loss": 0.3395, "step": 36210 }, { "epoch": 0.8038756506587053, "grad_norm": 1.0454790592193604, "learning_rate": 1.8387024558056022e-06, "loss": 0.3386, "step": 36215 }, { "epoch": 0.8039866372182328, "grad_norm": 0.7487240433692932, "learning_rate": 1.8366880484794969e-06, "loss": 0.231, "step": 36220 }, { "epoch": 0.8040976237777605, "grad_norm": 1.3686524629592896, "learning_rate": 1.834674633621485e-06, "loss": 0.4947, "step": 36225 }, { "epoch": 0.8042086103372882, "grad_norm": 1.4362846612930298, "learning_rate": 1.8326622114763437e-06, "loss": 0.3767, "step": 36230 }, { "epoch": 0.8043195968968158, "grad_norm": 0.6831071972846985, "learning_rate": 1.8306507822887398e-06, "loss": 0.3878, "step": 36235 }, { "epoch": 0.8044305834563434, "grad_norm": 0.9789371490478516, "learning_rate": 1.8286403463032187e-06, "loss": 0.3105, "step": 36240 }, { "epoch": 0.8045415700158711, "grad_norm": 1.684880256652832, "learning_rate": 1.826630903764196e-06, "loss": 0.3869, "step": 36245 }, { "epoch": 0.8046525565753987, "grad_norm": 0.9941912293434143, "learning_rate": 1.8246224549159808e-06, "loss": 0.3643, "step": 36250 }, { "epoch": 0.8047635431349264, "grad_norm": 1.7709214687347412, "learning_rate": 1.8226150000027464e-06, "loss": 0.4625, "step": 36255 }, { "epoch": 0.804874529694454, "grad_norm": 0.734659731388092, "learning_rate": 1.8206085392685568e-06, "loss": 0.3929, "step": 36260 }, { "epoch": 0.8049855162539816, "grad_norm": 1.0187081098556519, "learning_rate": 1.818603072957349e-06, "loss": 0.3132, "step": 36265 }, { "epoch": 0.8050965028135093, "grad_norm": 1.8822877407073975, "learning_rate": 1.8165986013129423e-06, "loss": 0.2947, "step": 36270 }, { "epoch": 0.8052074893730369, "grad_norm": 0.9087295532226562, "learning_rate": 1.8145951245790305e-06, "loss": 0.3782, "step": 36275 }, { "epoch": 0.8053184759325646, "grad_norm": 1.003465175628662, "learning_rate": 1.8125926429991946e-06, "loss": 0.3677, "step": 36280 }, { "epoch": 0.8054294624920922, "grad_norm": 1.4164167642593384, "learning_rate": 1.8105911568168832e-06, "loss": 0.4365, "step": 36285 }, { "epoch": 0.8055404490516198, "grad_norm": 1.4984828233718872, "learning_rate": 1.808590666275437e-06, "loss": 0.4024, "step": 36290 }, { "epoch": 0.8056514356111475, "grad_norm": 0.8606235980987549, "learning_rate": 1.8065911716180639e-06, "loss": 0.5163, "step": 36295 }, { "epoch": 0.8057624221706752, "grad_norm": 1.0989290475845337, "learning_rate": 1.8045926730878594e-06, "loss": 0.5101, "step": 36300 }, { "epoch": 0.8058734087302027, "grad_norm": 1.3459844589233398, "learning_rate": 1.80259517092779e-06, "loss": 0.4239, "step": 36305 }, { "epoch": 0.8059843952897304, "grad_norm": 1.6788448095321655, "learning_rate": 1.8005986653807107e-06, "loss": 0.189, "step": 36310 }, { "epoch": 0.8060953818492581, "grad_norm": 1.3815594911575317, "learning_rate": 1.7986031566893436e-06, "loss": 0.385, "step": 36315 }, { "epoch": 0.8062063684087857, "grad_norm": 1.7086074352264404, "learning_rate": 1.7966086450962994e-06, "loss": 0.3657, "step": 36320 }, { "epoch": 0.8063173549683134, "grad_norm": 1.1373320817947388, "learning_rate": 1.7946151308440675e-06, "loss": 0.4686, "step": 36325 }, { "epoch": 0.8064283415278409, "grad_norm": 1.21728515625, "learning_rate": 1.7926226141750058e-06, "loss": 0.237, "step": 36330 }, { "epoch": 0.8065393280873686, "grad_norm": 1.0383799076080322, "learning_rate": 1.7906310953313645e-06, "loss": 0.416, "step": 36335 }, { "epoch": 0.8066503146468963, "grad_norm": 0.803288459777832, "learning_rate": 1.7886405745552605e-06, "loss": 0.3733, "step": 36340 }, { "epoch": 0.8067613012064239, "grad_norm": 1.2933439016342163, "learning_rate": 1.7866510520886982e-06, "loss": 0.3213, "step": 36345 }, { "epoch": 0.8068722877659515, "grad_norm": 1.3617157936096191, "learning_rate": 1.784662528173553e-06, "loss": 0.3961, "step": 36350 }, { "epoch": 0.8069832743254792, "grad_norm": 1.522436261177063, "learning_rate": 1.782675003051586e-06, "loss": 0.3179, "step": 36355 }, { "epoch": 0.8070942608850068, "grad_norm": 0.7228170037269592, "learning_rate": 1.7806884769644362e-06, "loss": 0.3856, "step": 36360 }, { "epoch": 0.8072052474445345, "grad_norm": 2.0226802825927734, "learning_rate": 1.7787029501536123e-06, "loss": 0.3473, "step": 36365 }, { "epoch": 0.8073162340040622, "grad_norm": 1.0841708183288574, "learning_rate": 1.776718422860515e-06, "loss": 0.4584, "step": 36370 }, { "epoch": 0.8074272205635897, "grad_norm": 0.9569538831710815, "learning_rate": 1.774734895326411e-06, "loss": 0.3909, "step": 36375 }, { "epoch": 0.8075382071231174, "grad_norm": 1.6677311658859253, "learning_rate": 1.772752367792452e-06, "loss": 0.4026, "step": 36380 }, { "epoch": 0.807649193682645, "grad_norm": 1.107144832611084, "learning_rate": 1.7707708404996715e-06, "loss": 0.3025, "step": 36385 }, { "epoch": 0.8077601802421727, "grad_norm": 0.5250940322875977, "learning_rate": 1.7687903136889705e-06, "loss": 0.3486, "step": 36390 }, { "epoch": 0.8078711668017003, "grad_norm": 1.1956672668457031, "learning_rate": 1.7668107876011397e-06, "loss": 0.2646, "step": 36395 }, { "epoch": 0.8079821533612279, "grad_norm": 1.6405327320098877, "learning_rate": 1.76483226247684e-06, "loss": 0.3972, "step": 36400 }, { "epoch": 0.8080931399207556, "grad_norm": 2.861914873123169, "learning_rate": 1.7628547385566152e-06, "loss": 0.3234, "step": 36405 }, { "epoch": 0.8082041264802833, "grad_norm": 0.755532443523407, "learning_rate": 1.760878216080888e-06, "loss": 0.4652, "step": 36410 }, { "epoch": 0.8083151130398109, "grad_norm": 1.5427390336990356, "learning_rate": 1.7589026952899524e-06, "loss": 0.2767, "step": 36415 }, { "epoch": 0.8084260995993385, "grad_norm": 1.0020582675933838, "learning_rate": 1.756928176423992e-06, "loss": 0.3635, "step": 36420 }, { "epoch": 0.8085370861588662, "grad_norm": 1.156604528427124, "learning_rate": 1.754954659723056e-06, "loss": 0.4422, "step": 36425 }, { "epoch": 0.8086480727183938, "grad_norm": 1.1243420839309692, "learning_rate": 1.7529821454270835e-06, "loss": 0.4141, "step": 36430 }, { "epoch": 0.8087590592779215, "grad_norm": 1.0397528409957886, "learning_rate": 1.7510106337758802e-06, "loss": 0.4895, "step": 36435 }, { "epoch": 0.808870045837449, "grad_norm": 1.0990221500396729, "learning_rate": 1.7490401250091404e-06, "loss": 0.4166, "step": 36440 }, { "epoch": 0.8089810323969767, "grad_norm": 1.041306734085083, "learning_rate": 1.7470706193664322e-06, "loss": 0.4988, "step": 36445 }, { "epoch": 0.8090920189565044, "grad_norm": 1.0351485013961792, "learning_rate": 1.7451021170871974e-06, "loss": 0.3052, "step": 36450 }, { "epoch": 0.809203005516032, "grad_norm": 0.9568383097648621, "learning_rate": 1.7431346184107645e-06, "loss": 0.5788, "step": 36455 }, { "epoch": 0.8093139920755597, "grad_norm": 0.883678138256073, "learning_rate": 1.7411681235763323e-06, "loss": 0.3323, "step": 36460 }, { "epoch": 0.8094249786350873, "grad_norm": 0.6918429732322693, "learning_rate": 1.7392026328229804e-06, "loss": 0.2976, "step": 36465 }, { "epoch": 0.8095359651946149, "grad_norm": 1.1069185733795166, "learning_rate": 1.7372381463896703e-06, "loss": 0.4608, "step": 36470 }, { "epoch": 0.8096469517541426, "grad_norm": 1.3932291269302368, "learning_rate": 1.735274664515233e-06, "loss": 0.4438, "step": 36475 }, { "epoch": 0.8097579383136703, "grad_norm": 0.6072965264320374, "learning_rate": 1.7333121874383875e-06, "loss": 0.377, "step": 36480 }, { "epoch": 0.8098689248731978, "grad_norm": 1.8820232152938843, "learning_rate": 1.7313507153977183e-06, "loss": 0.4978, "step": 36485 }, { "epoch": 0.8099799114327255, "grad_norm": 1.3516621589660645, "learning_rate": 1.7293902486317016e-06, "loss": 0.454, "step": 36490 }, { "epoch": 0.8100908979922531, "grad_norm": 0.7736095786094666, "learning_rate": 1.7274307873786777e-06, "loss": 0.4383, "step": 36495 }, { "epoch": 0.8102018845517808, "grad_norm": 1.2522380352020264, "learning_rate": 1.7254723318768785e-06, "loss": 0.3762, "step": 36500 }, { "epoch": 0.8103128711113085, "grad_norm": 1.4323714971542358, "learning_rate": 1.7235148823643987e-06, "loss": 0.377, "step": 36505 }, { "epoch": 0.810423857670836, "grad_norm": 0.9815630316734314, "learning_rate": 1.721558439079225e-06, "loss": 0.2898, "step": 36510 }, { "epoch": 0.8105348442303637, "grad_norm": 1.7804838418960571, "learning_rate": 1.7196030022592102e-06, "loss": 0.3777, "step": 36515 }, { "epoch": 0.8106458307898914, "grad_norm": 1.0361943244934082, "learning_rate": 1.7176485721420943e-06, "loss": 0.4349, "step": 36520 }, { "epoch": 0.810756817349419, "grad_norm": 1.1842056512832642, "learning_rate": 1.7156951489654872e-06, "loss": 0.3378, "step": 36525 }, { "epoch": 0.8108678039089466, "grad_norm": 1.0792927742004395, "learning_rate": 1.713742732966881e-06, "loss": 0.3917, "step": 36530 }, { "epoch": 0.8109787904684743, "grad_norm": 1.4058988094329834, "learning_rate": 1.7117913243836415e-06, "loss": 0.3641, "step": 36535 }, { "epoch": 0.8110897770280019, "grad_norm": 1.1576017141342163, "learning_rate": 1.70984092345302e-06, "loss": 0.4311, "step": 36540 }, { "epoch": 0.8112007635875296, "grad_norm": 0.43751659989356995, "learning_rate": 1.7078915304121323e-06, "loss": 0.3346, "step": 36545 }, { "epoch": 0.8113117501470571, "grad_norm": 1.3185070753097534, "learning_rate": 1.7059431454979825e-06, "loss": 0.3505, "step": 36550 }, { "epoch": 0.8114227367065848, "grad_norm": 2.0647809505462646, "learning_rate": 1.7039957689474517e-06, "loss": 0.4157, "step": 36555 }, { "epoch": 0.8115337232661125, "grad_norm": 1.7556878328323364, "learning_rate": 1.7020494009972909e-06, "loss": 0.3379, "step": 36560 }, { "epoch": 0.8116447098256401, "grad_norm": 1.018389105796814, "learning_rate": 1.7001040418841364e-06, "loss": 0.3126, "step": 36565 }, { "epoch": 0.8117556963851678, "grad_norm": 1.5789307355880737, "learning_rate": 1.6981596918444953e-06, "loss": 0.4389, "step": 36570 }, { "epoch": 0.8118666829446954, "grad_norm": 1.1881147623062134, "learning_rate": 1.6962163511147557e-06, "loss": 0.4891, "step": 36575 }, { "epoch": 0.811977669504223, "grad_norm": 0.9965287446975708, "learning_rate": 1.6942740199311858e-06, "loss": 0.4181, "step": 36580 }, { "epoch": 0.8120886560637507, "grad_norm": 1.7986716032028198, "learning_rate": 1.6923326985299238e-06, "loss": 0.3028, "step": 36585 }, { "epoch": 0.8121996426232784, "grad_norm": 1.148728370666504, "learning_rate": 1.6903923871469917e-06, "loss": 0.1677, "step": 36590 }, { "epoch": 0.8123106291828059, "grad_norm": 1.5958105325698853, "learning_rate": 1.6884530860182835e-06, "loss": 0.4435, "step": 36595 }, { "epoch": 0.8124216157423336, "grad_norm": 0.8438432812690735, "learning_rate": 1.6865147953795746e-06, "loss": 0.4816, "step": 36600 }, { "epoch": 0.8125326023018612, "grad_norm": 1.058948278427124, "learning_rate": 1.6845775154665167e-06, "loss": 0.415, "step": 36605 }, { "epoch": 0.8126435888613889, "grad_norm": 1.3056766986846924, "learning_rate": 1.6826412465146357e-06, "loss": 0.3453, "step": 36610 }, { "epoch": 0.8127545754209166, "grad_norm": 1.4271174669265747, "learning_rate": 1.6807059887593392e-06, "loss": 0.3211, "step": 36615 }, { "epoch": 0.8128655619804441, "grad_norm": 0.9875726103782654, "learning_rate": 1.6787717424359061e-06, "loss": 0.4205, "step": 36620 }, { "epoch": 0.8129765485399718, "grad_norm": 1.2538775205612183, "learning_rate": 1.676838507779499e-06, "loss": 0.6146, "step": 36625 }, { "epoch": 0.8130875350994995, "grad_norm": 0.8289942145347595, "learning_rate": 1.6749062850251508e-06, "loss": 0.4664, "step": 36630 }, { "epoch": 0.8131985216590271, "grad_norm": 0.9525663256645203, "learning_rate": 1.6729750744077755e-06, "loss": 0.4705, "step": 36635 }, { "epoch": 0.8133095082185547, "grad_norm": 0.8990917801856995, "learning_rate": 1.6710448761621667e-06, "loss": 0.422, "step": 36640 }, { "epoch": 0.8134204947780824, "grad_norm": 0.651301920413971, "learning_rate": 1.6691156905229865e-06, "loss": 0.4145, "step": 36645 }, { "epoch": 0.81353148133761, "grad_norm": 0.46783456206321716, "learning_rate": 1.6671875177247833e-06, "loss": 0.302, "step": 36650 }, { "epoch": 0.8136424678971377, "grad_norm": 1.1785321235656738, "learning_rate": 1.6652603580019733e-06, "loss": 0.3042, "step": 36655 }, { "epoch": 0.8137534544566652, "grad_norm": 1.3166145086288452, "learning_rate": 1.6633342115888562e-06, "loss": 0.3713, "step": 36660 }, { "epoch": 0.8138644410161929, "grad_norm": 1.2056806087493896, "learning_rate": 1.6614090787196091e-06, "loss": 0.5188, "step": 36665 }, { "epoch": 0.8139754275757206, "grad_norm": 1.5675289630889893, "learning_rate": 1.6594849596282781e-06, "loss": 0.4461, "step": 36670 }, { "epoch": 0.8140864141352482, "grad_norm": 0.9947167038917542, "learning_rate": 1.657561854548797e-06, "loss": 0.4885, "step": 36675 }, { "epoch": 0.8141974006947759, "grad_norm": 1.071495771408081, "learning_rate": 1.6556397637149646e-06, "loss": 0.4027, "step": 36680 }, { "epoch": 0.8143083872543035, "grad_norm": 1.2620582580566406, "learning_rate": 1.6537186873604638e-06, "loss": 0.3634, "step": 36685 }, { "epoch": 0.8144193738138311, "grad_norm": 1.3768949508666992, "learning_rate": 1.6517986257188578e-06, "loss": 0.4012, "step": 36690 }, { "epoch": 0.8145303603733588, "grad_norm": 1.1447209119796753, "learning_rate": 1.6498795790235734e-06, "loss": 0.3399, "step": 36695 }, { "epoch": 0.8146413469328865, "grad_norm": 1.545142412185669, "learning_rate": 1.6479615475079291e-06, "loss": 0.4296, "step": 36700 }, { "epoch": 0.814752333492414, "grad_norm": 1.6830742359161377, "learning_rate": 1.6460445314051065e-06, "loss": 0.2984, "step": 36705 }, { "epoch": 0.8148633200519417, "grad_norm": 1.2970561981201172, "learning_rate": 1.6441285309481746e-06, "loss": 0.5463, "step": 36710 }, { "epoch": 0.8149743066114693, "grad_norm": 0.8436324596405029, "learning_rate": 1.6422135463700705e-06, "loss": 0.2658, "step": 36715 }, { "epoch": 0.815085293170997, "grad_norm": 1.074008584022522, "learning_rate": 1.6402995779036146e-06, "loss": 0.3757, "step": 36720 }, { "epoch": 0.8151962797305247, "grad_norm": 1.4187140464782715, "learning_rate": 1.6383866257815007e-06, "loss": 0.3975, "step": 36725 }, { "epoch": 0.8153072662900522, "grad_norm": 1.1671775579452515, "learning_rate": 1.6364746902362972e-06, "loss": 0.4162, "step": 36730 }, { "epoch": 0.8154182528495799, "grad_norm": 1.0524426698684692, "learning_rate": 1.6345637715004524e-06, "loss": 0.4194, "step": 36735 }, { "epoch": 0.8155292394091076, "grad_norm": 1.8277971744537354, "learning_rate": 1.6326538698062878e-06, "loss": 0.4083, "step": 36740 }, { "epoch": 0.8156402259686352, "grad_norm": 0.9649227857589722, "learning_rate": 1.6307449853860058e-06, "loss": 0.3563, "step": 36745 }, { "epoch": 0.8157512125281629, "grad_norm": 1.5071200132369995, "learning_rate": 1.6288371184716779e-06, "loss": 0.3864, "step": 36750 }, { "epoch": 0.8158621990876905, "grad_norm": 0.7660090923309326, "learning_rate": 1.6269302692952605e-06, "loss": 0.2733, "step": 36755 }, { "epoch": 0.8159731856472181, "grad_norm": 1.075348138809204, "learning_rate": 1.625024438088577e-06, "loss": 0.3427, "step": 36760 }, { "epoch": 0.8160841722067458, "grad_norm": 0.3273741602897644, "learning_rate": 1.623119625083338e-06, "loss": 0.3185, "step": 36765 }, { "epoch": 0.8161951587662734, "grad_norm": 1.2251849174499512, "learning_rate": 1.6212158305111192e-06, "loss": 0.4443, "step": 36770 }, { "epoch": 0.816306145325801, "grad_norm": 1.5640612840652466, "learning_rate": 1.6193130546033808e-06, "loss": 0.5205, "step": 36775 }, { "epoch": 0.8164171318853287, "grad_norm": 1.0030092000961304, "learning_rate": 1.6174112975914524e-06, "loss": 0.5655, "step": 36780 }, { "epoch": 0.8165281184448563, "grad_norm": 1.602996587753296, "learning_rate": 1.615510559706548e-06, "loss": 0.3742, "step": 36785 }, { "epoch": 0.816639105004384, "grad_norm": 1.4866420030593872, "learning_rate": 1.613610841179748e-06, "loss": 0.3605, "step": 36790 }, { "epoch": 0.8167500915639117, "grad_norm": 1.5150279998779297, "learning_rate": 1.611712142242019e-06, "loss": 0.4498, "step": 36795 }, { "epoch": 0.8168610781234392, "grad_norm": 1.7357852458953857, "learning_rate": 1.6098144631241918e-06, "loss": 0.3914, "step": 36800 }, { "epoch": 0.8169720646829669, "grad_norm": 0.8778465986251831, "learning_rate": 1.6079178040569853e-06, "loss": 0.3118, "step": 36805 }, { "epoch": 0.8170830512424946, "grad_norm": 1.068134069442749, "learning_rate": 1.6060221652709885e-06, "loss": 0.5221, "step": 36810 }, { "epoch": 0.8171940378020222, "grad_norm": 1.6891485452651978, "learning_rate": 1.6041275469966645e-06, "loss": 0.3636, "step": 36815 }, { "epoch": 0.8173050243615498, "grad_norm": 1.3472400903701782, "learning_rate": 1.602233949464357e-06, "loss": 0.3646, "step": 36820 }, { "epoch": 0.8174160109210774, "grad_norm": 1.4167143106460571, "learning_rate": 1.6003413729042804e-06, "loss": 0.3788, "step": 36825 }, { "epoch": 0.8175269974806051, "grad_norm": 1.301668643951416, "learning_rate": 1.5984498175465292e-06, "loss": 0.4136, "step": 36830 }, { "epoch": 0.8176379840401328, "grad_norm": 1.205451488494873, "learning_rate": 1.596559283621074e-06, "loss": 0.3997, "step": 36835 }, { "epoch": 0.8177489705996603, "grad_norm": 0.7667840719223022, "learning_rate": 1.5946697713577574e-06, "loss": 0.3981, "step": 36840 }, { "epoch": 0.817859957159188, "grad_norm": 1.0957651138305664, "learning_rate": 1.592781280986302e-06, "loss": 0.3886, "step": 36845 }, { "epoch": 0.8179709437187157, "grad_norm": 1.3430111408233643, "learning_rate": 1.5908938127363004e-06, "loss": 0.5454, "step": 36850 }, { "epoch": 0.8180819302782433, "grad_norm": 1.6858025789260864, "learning_rate": 1.5890073668372275e-06, "loss": 0.4572, "step": 36855 }, { "epoch": 0.818192916837771, "grad_norm": 0.7941359877586365, "learning_rate": 1.5871219435184325e-06, "loss": 0.2364, "step": 36860 }, { "epoch": 0.8183039033972986, "grad_norm": 1.369829773902893, "learning_rate": 1.585237543009136e-06, "loss": 0.4684, "step": 36865 }, { "epoch": 0.8184148899568262, "grad_norm": 1.1318687200546265, "learning_rate": 1.5833541655384387e-06, "loss": 0.3791, "step": 36870 }, { "epoch": 0.8185258765163539, "grad_norm": 1.6038217544555664, "learning_rate": 1.5814718113353134e-06, "loss": 0.4192, "step": 36875 }, { "epoch": 0.8186368630758815, "grad_norm": 1.0083684921264648, "learning_rate": 1.5795904806286144e-06, "loss": 0.2792, "step": 36880 }, { "epoch": 0.8187478496354091, "grad_norm": 1.6192983388900757, "learning_rate": 1.5777101736470623e-06, "loss": 0.4293, "step": 36885 }, { "epoch": 0.8188588361949368, "grad_norm": 0.9264241456985474, "learning_rate": 1.575830890619261e-06, "loss": 0.384, "step": 36890 }, { "epoch": 0.8189698227544644, "grad_norm": 1.500458002090454, "learning_rate": 1.5739526317736897e-06, "loss": 0.4064, "step": 36895 }, { "epoch": 0.8190808093139921, "grad_norm": 1.7399510145187378, "learning_rate": 1.572075397338696e-06, "loss": 0.4525, "step": 36900 }, { "epoch": 0.8191917958735198, "grad_norm": 1.5163832902908325, "learning_rate": 1.5701991875425137e-06, "loss": 0.3745, "step": 36905 }, { "epoch": 0.8193027824330473, "grad_norm": 1.2246482372283936, "learning_rate": 1.5683240026132395e-06, "loss": 0.42, "step": 36910 }, { "epoch": 0.819413768992575, "grad_norm": 0.7396544218063354, "learning_rate": 1.5664498427788554e-06, "loss": 0.3279, "step": 36915 }, { "epoch": 0.8195247555521027, "grad_norm": 1.3506535291671753, "learning_rate": 1.5645767082672192e-06, "loss": 0.4584, "step": 36920 }, { "epoch": 0.8196357421116303, "grad_norm": 1.3335028886795044, "learning_rate": 1.562704599306053e-06, "loss": 0.4633, "step": 36925 }, { "epoch": 0.8197467286711579, "grad_norm": 0.8453715443611145, "learning_rate": 1.5608335161229682e-06, "loss": 0.325, "step": 36930 }, { "epoch": 0.8198577152306855, "grad_norm": 0.909500241279602, "learning_rate": 1.5589634589454383e-06, "loss": 0.3557, "step": 36935 }, { "epoch": 0.8199687017902132, "grad_norm": 1.10624361038208, "learning_rate": 1.5570944280008227e-06, "loss": 0.3512, "step": 36940 }, { "epoch": 0.8200796883497409, "grad_norm": 1.1543127298355103, "learning_rate": 1.5552264235163538e-06, "loss": 0.3086, "step": 36945 }, { "epoch": 0.8201906749092684, "grad_norm": 1.129363775253296, "learning_rate": 1.5533594457191326e-06, "loss": 0.4401, "step": 36950 }, { "epoch": 0.8203016614687961, "grad_norm": 0.8860126733779907, "learning_rate": 1.5514934948361437e-06, "loss": 0.3586, "step": 36955 }, { "epoch": 0.8204126480283238, "grad_norm": 1.5794183015823364, "learning_rate": 1.5496285710942393e-06, "loss": 0.3656, "step": 36960 }, { "epoch": 0.8205236345878514, "grad_norm": 1.1547932624816895, "learning_rate": 1.5477646747201559e-06, "loss": 0.3935, "step": 36965 }, { "epoch": 0.8206346211473791, "grad_norm": 1.3218568563461304, "learning_rate": 1.545901805940494e-06, "loss": 0.4343, "step": 36970 }, { "epoch": 0.8207456077069067, "grad_norm": 1.116638422012329, "learning_rate": 1.5440399649817384e-06, "loss": 0.4022, "step": 36975 }, { "epoch": 0.8208565942664343, "grad_norm": 1.830057978630066, "learning_rate": 1.5421791520702468e-06, "loss": 0.4358, "step": 36980 }, { "epoch": 0.820967580825962, "grad_norm": 1.1473445892333984, "learning_rate": 1.540319367432246e-06, "loss": 0.2289, "step": 36985 }, { "epoch": 0.8210785673854896, "grad_norm": 1.3270286321640015, "learning_rate": 1.538460611293847e-06, "loss": 0.3231, "step": 36990 }, { "epoch": 0.8211895539450172, "grad_norm": 0.9333168268203735, "learning_rate": 1.5366028838810265e-06, "loss": 0.2854, "step": 36995 }, { "epoch": 0.8213005405045449, "grad_norm": 1.4975849390029907, "learning_rate": 1.5347461854196466e-06, "loss": 0.5078, "step": 37000 }, { "epoch": 0.8214115270640725, "grad_norm": 1.1126400232315063, "learning_rate": 1.5328905161354324e-06, "loss": 0.4876, "step": 37005 }, { "epoch": 0.8215225136236002, "grad_norm": 0.8321356177330017, "learning_rate": 1.5310358762539957e-06, "loss": 0.375, "step": 37010 }, { "epoch": 0.8216335001831279, "grad_norm": 2.101627826690674, "learning_rate": 1.5291822660008116e-06, "loss": 0.416, "step": 37015 }, { "epoch": 0.8217444867426554, "grad_norm": 1.0231560468673706, "learning_rate": 1.527329685601241e-06, "loss": 0.4258, "step": 37020 }, { "epoch": 0.8218554733021831, "grad_norm": 0.7642114162445068, "learning_rate": 1.5254781352805092e-06, "loss": 0.2772, "step": 37025 }, { "epoch": 0.8219664598617108, "grad_norm": 1.4178411960601807, "learning_rate": 1.5236276152637275e-06, "loss": 0.3743, "step": 37030 }, { "epoch": 0.8220774464212384, "grad_norm": 1.4414584636688232, "learning_rate": 1.52177812577587e-06, "loss": 0.4376, "step": 37035 }, { "epoch": 0.822188432980766, "grad_norm": 1.2922779321670532, "learning_rate": 1.5199296670417973e-06, "loss": 0.3136, "step": 37040 }, { "epoch": 0.8222994195402936, "grad_norm": 0.901348352432251, "learning_rate": 1.5180822392862327e-06, "loss": 0.331, "step": 37045 }, { "epoch": 0.8224104060998213, "grad_norm": 1.0336065292358398, "learning_rate": 1.5162358427337853e-06, "loss": 0.5121, "step": 37050 }, { "epoch": 0.822521392659349, "grad_norm": 1.231096625328064, "learning_rate": 1.5143904776089302e-06, "loss": 0.3965, "step": 37055 }, { "epoch": 0.8226323792188766, "grad_norm": 1.0460683107376099, "learning_rate": 1.5125461441360223e-06, "loss": 0.3874, "step": 37060 }, { "epoch": 0.8227433657784042, "grad_norm": 1.1694492101669312, "learning_rate": 1.5107028425392923e-06, "loss": 0.499, "step": 37065 }, { "epoch": 0.8228543523379319, "grad_norm": 1.3804575204849243, "learning_rate": 1.5088605730428362e-06, "loss": 0.3613, "step": 37070 }, { "epoch": 0.8229653388974595, "grad_norm": 0.6450167894363403, "learning_rate": 1.5070193358706375e-06, "loss": 0.3518, "step": 37075 }, { "epoch": 0.8230763254569872, "grad_norm": 0.9184905290603638, "learning_rate": 1.5051791312465425e-06, "loss": 0.2628, "step": 37080 }, { "epoch": 0.8231873120165148, "grad_norm": 0.5770201683044434, "learning_rate": 1.5033399593942789e-06, "loss": 0.2979, "step": 37085 }, { "epoch": 0.8232982985760424, "grad_norm": 1.383227825164795, "learning_rate": 1.5015018205374498e-06, "loss": 0.4115, "step": 37090 }, { "epoch": 0.8234092851355701, "grad_norm": 1.2248444557189941, "learning_rate": 1.4996647148995258e-06, "loss": 0.3667, "step": 37095 }, { "epoch": 0.8235202716950977, "grad_norm": 1.2955788373947144, "learning_rate": 1.4978286427038602e-06, "loss": 0.5362, "step": 37100 }, { "epoch": 0.8236312582546254, "grad_norm": 1.5456572771072388, "learning_rate": 1.495993604173671e-06, "loss": 0.4109, "step": 37105 }, { "epoch": 0.823742244814153, "grad_norm": 1.211588978767395, "learning_rate": 1.49415959953206e-06, "loss": 0.4457, "step": 37110 }, { "epoch": 0.8238532313736806, "grad_norm": 1.0468361377716064, "learning_rate": 1.4923266290020011e-06, "loss": 0.2724, "step": 37115 }, { "epoch": 0.8239642179332083, "grad_norm": 1.3627668619155884, "learning_rate": 1.4904946928063347e-06, "loss": 0.3655, "step": 37120 }, { "epoch": 0.824075204492736, "grad_norm": 1.1421093940734863, "learning_rate": 1.4886637911677882e-06, "loss": 0.3082, "step": 37125 }, { "epoch": 0.8241861910522635, "grad_norm": 0.8973304629325867, "learning_rate": 1.4868339243089503e-06, "loss": 0.3593, "step": 37130 }, { "epoch": 0.8242971776117912, "grad_norm": 0.8185334205627441, "learning_rate": 1.4850050924522953e-06, "loss": 0.43, "step": 37135 }, { "epoch": 0.8244081641713189, "grad_norm": 1.235817313194275, "learning_rate": 1.4831772958201618e-06, "loss": 0.4316, "step": 37140 }, { "epoch": 0.8245191507308465, "grad_norm": 0.8884466886520386, "learning_rate": 1.4813505346347701e-06, "loss": 0.3893, "step": 37145 }, { "epoch": 0.8246301372903742, "grad_norm": 0.9675939679145813, "learning_rate": 1.4795248091182124e-06, "loss": 0.4503, "step": 37150 }, { "epoch": 0.8247411238499017, "grad_norm": 0.9958243370056152, "learning_rate": 1.4777001194924512e-06, "loss": 0.296, "step": 37155 }, { "epoch": 0.8248521104094294, "grad_norm": 1.5261372327804565, "learning_rate": 1.4758764659793302e-06, "loss": 0.335, "step": 37160 }, { "epoch": 0.8249630969689571, "grad_norm": 1.335727572441101, "learning_rate": 1.4740538488005584e-06, "loss": 0.4203, "step": 37165 }, { "epoch": 0.8250740835284847, "grad_norm": 0.9879004955291748, "learning_rate": 1.4722322681777257e-06, "loss": 0.4227, "step": 37170 }, { "epoch": 0.8251850700880123, "grad_norm": 1.6839021444320679, "learning_rate": 1.4704117243322969e-06, "loss": 0.4345, "step": 37175 }, { "epoch": 0.82529605664754, "grad_norm": 1.8196085691452026, "learning_rate": 1.4685922174856015e-06, "loss": 0.3528, "step": 37180 }, { "epoch": 0.8254070432070676, "grad_norm": 1.3297110795974731, "learning_rate": 1.466773747858854e-06, "loss": 0.5426, "step": 37185 }, { "epoch": 0.8255180297665953, "grad_norm": 1.045060157775879, "learning_rate": 1.464956315673135e-06, "loss": 0.3207, "step": 37190 }, { "epoch": 0.825629016326123, "grad_norm": 1.4947199821472168, "learning_rate": 1.4631399211494023e-06, "loss": 0.5682, "step": 37195 }, { "epoch": 0.8257400028856505, "grad_norm": 1.687486171722412, "learning_rate": 1.4613245645084894e-06, "loss": 0.322, "step": 37200 }, { "epoch": 0.8258509894451782, "grad_norm": 1.0408520698547363, "learning_rate": 1.4595102459710987e-06, "loss": 0.5531, "step": 37205 }, { "epoch": 0.8259619760047058, "grad_norm": 1.493778944015503, "learning_rate": 1.45769696575781e-06, "loss": 0.4164, "step": 37210 }, { "epoch": 0.8260729625642335, "grad_norm": 0.9806472659111023, "learning_rate": 1.455884724089075e-06, "loss": 0.3899, "step": 37215 }, { "epoch": 0.8261839491237611, "grad_norm": 1.0444679260253906, "learning_rate": 1.454073521185222e-06, "loss": 0.4276, "step": 37220 }, { "epoch": 0.8262949356832887, "grad_norm": 1.407180666923523, "learning_rate": 1.452263357266447e-06, "loss": 0.3518, "step": 37225 }, { "epoch": 0.8264059222428164, "grad_norm": 1.4814797639846802, "learning_rate": 1.450454232552826e-06, "loss": 0.3628, "step": 37230 }, { "epoch": 0.8265169088023441, "grad_norm": 1.703507661819458, "learning_rate": 1.4486461472643088e-06, "loss": 0.4244, "step": 37235 }, { "epoch": 0.8266278953618716, "grad_norm": 0.640510618686676, "learning_rate": 1.4468391016207129e-06, "loss": 0.3671, "step": 37240 }, { "epoch": 0.8267388819213993, "grad_norm": 1.0508774518966675, "learning_rate": 1.4450330958417348e-06, "loss": 0.3553, "step": 37245 }, { "epoch": 0.826849868480927, "grad_norm": 1.3584023714065552, "learning_rate": 1.4432281301469397e-06, "loss": 0.3616, "step": 37250 }, { "epoch": 0.8269608550404546, "grad_norm": 1.5076439380645752, "learning_rate": 1.4414242047557747e-06, "loss": 0.2058, "step": 37255 }, { "epoch": 0.8270718415999823, "grad_norm": 0.9351935386657715, "learning_rate": 1.4396213198875485e-06, "loss": 0.4854, "step": 37260 }, { "epoch": 0.8271828281595098, "grad_norm": 1.1554254293441772, "learning_rate": 1.437819475761455e-06, "loss": 0.5623, "step": 37265 }, { "epoch": 0.8272938147190375, "grad_norm": 1.1598232984542847, "learning_rate": 1.4360186725965518e-06, "loss": 0.5052, "step": 37270 }, { "epoch": 0.8274048012785652, "grad_norm": 1.4718127250671387, "learning_rate": 1.434218910611781e-06, "loss": 0.3818, "step": 37275 }, { "epoch": 0.8275157878380928, "grad_norm": 0.9634243249893188, "learning_rate": 1.4324201900259438e-06, "loss": 0.3148, "step": 37280 }, { "epoch": 0.8276267743976204, "grad_norm": 1.3595343828201294, "learning_rate": 1.4306225110577288e-06, "loss": 0.4535, "step": 37285 }, { "epoch": 0.8277377609571481, "grad_norm": 0.9844816327095032, "learning_rate": 1.4288258739256877e-06, "loss": 0.2749, "step": 37290 }, { "epoch": 0.8278487475166757, "grad_norm": 2.4494025707244873, "learning_rate": 1.4270302788482537e-06, "loss": 0.4749, "step": 37295 }, { "epoch": 0.8279597340762034, "grad_norm": 1.2348016500473022, "learning_rate": 1.4252357260437244e-06, "loss": 0.4774, "step": 37300 }, { "epoch": 0.8280707206357311, "grad_norm": 0.7983580827713013, "learning_rate": 1.4234422157302808e-06, "loss": 0.2663, "step": 37305 }, { "epoch": 0.8281817071952586, "grad_norm": 1.6383472681045532, "learning_rate": 1.4216497481259662e-06, "loss": 0.2833, "step": 37310 }, { "epoch": 0.8282926937547863, "grad_norm": 1.3078876733779907, "learning_rate": 1.4198583234487052e-06, "loss": 0.3705, "step": 37315 }, { "epoch": 0.8284036803143139, "grad_norm": 0.9692357182502747, "learning_rate": 1.4180679419162968e-06, "loss": 0.3132, "step": 37320 }, { "epoch": 0.8285146668738416, "grad_norm": 1.1948881149291992, "learning_rate": 1.4162786037464038e-06, "loss": 0.3414, "step": 37325 }, { "epoch": 0.8286256534333692, "grad_norm": 2.3686370849609375, "learning_rate": 1.414490309156573e-06, "loss": 0.4313, "step": 37330 }, { "epoch": 0.8287366399928968, "grad_norm": 1.1939934492111206, "learning_rate": 1.4127030583642143e-06, "loss": 0.4808, "step": 37335 }, { "epoch": 0.8288476265524245, "grad_norm": 1.2011733055114746, "learning_rate": 1.410916851586619e-06, "loss": 0.418, "step": 37340 }, { "epoch": 0.8289586131119522, "grad_norm": 1.4537737369537354, "learning_rate": 1.409131689040949e-06, "loss": 0.4427, "step": 37345 }, { "epoch": 0.8290695996714798, "grad_norm": 0.9123027920722961, "learning_rate": 1.407347570944234e-06, "loss": 0.4614, "step": 37350 }, { "epoch": 0.8291805862310074, "grad_norm": 1.0160402059555054, "learning_rate": 1.4055644975133875e-06, "loss": 0.443, "step": 37355 }, { "epoch": 0.8292915727905351, "grad_norm": 1.07515287399292, "learning_rate": 1.4037824689651825e-06, "loss": 0.3687, "step": 37360 }, { "epoch": 0.8294025593500627, "grad_norm": 2.9584853649139404, "learning_rate": 1.4020014855162755e-06, "loss": 0.3932, "step": 37365 }, { "epoch": 0.8295135459095904, "grad_norm": 0.940378725528717, "learning_rate": 1.4002215473831948e-06, "loss": 0.4168, "step": 37370 }, { "epoch": 0.8296245324691179, "grad_norm": 1.1896016597747803, "learning_rate": 1.3984426547823348e-06, "loss": 0.454, "step": 37375 }, { "epoch": 0.8297355190286456, "grad_norm": 1.081487774848938, "learning_rate": 1.3966648079299717e-06, "loss": 0.2407, "step": 37380 }, { "epoch": 0.8298465055881733, "grad_norm": 1.4853719472885132, "learning_rate": 1.3948880070422455e-06, "loss": 0.3657, "step": 37385 }, { "epoch": 0.8299574921477009, "grad_norm": 0.9414169788360596, "learning_rate": 1.3931122523351792e-06, "loss": 0.443, "step": 37390 }, { "epoch": 0.8300684787072286, "grad_norm": 0.7520697712898254, "learning_rate": 1.3913375440246569e-06, "loss": 0.3519, "step": 37395 }, { "epoch": 0.8301794652667562, "grad_norm": 0.8525213599205017, "learning_rate": 1.3895638823264447e-06, "loss": 0.2664, "step": 37400 }, { "epoch": 0.8302904518262838, "grad_norm": 5.166147708892822, "learning_rate": 1.387791267456181e-06, "loss": 0.3077, "step": 37405 }, { "epoch": 0.8304014383858115, "grad_norm": 0.8263622522354126, "learning_rate": 1.3860196996293696e-06, "loss": 0.4258, "step": 37410 }, { "epoch": 0.8305124249453392, "grad_norm": 1.5888712406158447, "learning_rate": 1.3842491790613966e-06, "loss": 0.5111, "step": 37415 }, { "epoch": 0.8306234115048667, "grad_norm": 1.092028021812439, "learning_rate": 1.382479705967511e-06, "loss": 0.3655, "step": 37420 }, { "epoch": 0.8307343980643944, "grad_norm": 1.4173848628997803, "learning_rate": 1.380711280562841e-06, "loss": 0.4612, "step": 37425 }, { "epoch": 0.830845384623922, "grad_norm": 0.5405141711235046, "learning_rate": 1.3789439030623896e-06, "loss": 0.2213, "step": 37430 }, { "epoch": 0.8309563711834497, "grad_norm": 1.3125280141830444, "learning_rate": 1.3771775736810244e-06, "loss": 0.3059, "step": 37435 }, { "epoch": 0.8310673577429774, "grad_norm": 1.3626099824905396, "learning_rate": 1.3754122926334922e-06, "loss": 0.3351, "step": 37440 }, { "epoch": 0.8311783443025049, "grad_norm": 0.6582348942756653, "learning_rate": 1.3736480601344071e-06, "loss": 0.288, "step": 37445 }, { "epoch": 0.8312893308620326, "grad_norm": 1.2736144065856934, "learning_rate": 1.3718848763982596e-06, "loss": 0.4283, "step": 37450 }, { "epoch": 0.8314003174215603, "grad_norm": 0.8599841594696045, "learning_rate": 1.3701227416394146e-06, "loss": 0.3242, "step": 37455 }, { "epoch": 0.8315113039810879, "grad_norm": 1.3656071424484253, "learning_rate": 1.3683616560721036e-06, "loss": 0.3931, "step": 37460 }, { "epoch": 0.8316222905406155, "grad_norm": 1.123210072517395, "learning_rate": 1.3666016199104349e-06, "loss": 0.3652, "step": 37465 }, { "epoch": 0.8317332771001432, "grad_norm": 0.8573991656303406, "learning_rate": 1.364842633368385e-06, "loss": 0.4622, "step": 37470 }, { "epoch": 0.8318442636596708, "grad_norm": 1.0115129947662354, "learning_rate": 1.363084696659811e-06, "loss": 0.3035, "step": 37475 }, { "epoch": 0.8319552502191985, "grad_norm": 1.4920989274978638, "learning_rate": 1.3613278099984305e-06, "loss": 0.2896, "step": 37480 }, { "epoch": 0.832066236778726, "grad_norm": 1.930938959121704, "learning_rate": 1.3595719735978451e-06, "loss": 0.2667, "step": 37485 }, { "epoch": 0.8321772233382537, "grad_norm": 1.1663861274719238, "learning_rate": 1.3578171876715196e-06, "loss": 0.3067, "step": 37490 }, { "epoch": 0.8322882098977814, "grad_norm": 1.9616607427597046, "learning_rate": 1.3560634524327987e-06, "loss": 0.4342, "step": 37495 }, { "epoch": 0.832399196457309, "grad_norm": 0.8635976910591125, "learning_rate": 1.354310768094892e-06, "loss": 0.3984, "step": 37500 }, { "epoch": 0.8325101830168367, "grad_norm": 1.164693832397461, "learning_rate": 1.352559134870889e-06, "loss": 0.5515, "step": 37505 }, { "epoch": 0.8326211695763643, "grad_norm": 1.0937644243240356, "learning_rate": 1.3508085529737425e-06, "loss": 0.3391, "step": 37510 }, { "epoch": 0.8327321561358919, "grad_norm": 2.0383598804473877, "learning_rate": 1.349059022616287e-06, "loss": 0.5429, "step": 37515 }, { "epoch": 0.8328431426954196, "grad_norm": 1.3086220026016235, "learning_rate": 1.34731054401122e-06, "loss": 0.2063, "step": 37520 }, { "epoch": 0.8329541292549473, "grad_norm": 1.2310152053833008, "learning_rate": 1.3455631173711214e-06, "loss": 0.2309, "step": 37525 }, { "epoch": 0.8330651158144748, "grad_norm": 1.0935806035995483, "learning_rate": 1.3438167429084315e-06, "loss": 0.4173, "step": 37530 }, { "epoch": 0.8331761023740025, "grad_norm": 1.3279857635498047, "learning_rate": 1.3420714208354713e-06, "loss": 0.4536, "step": 37535 }, { "epoch": 0.8332870889335301, "grad_norm": 0.9432138800621033, "learning_rate": 1.3403271513644334e-06, "loss": 0.5366, "step": 37540 }, { "epoch": 0.8333980754930578, "grad_norm": 1.0061371326446533, "learning_rate": 1.338583934707377e-06, "loss": 0.4347, "step": 37545 }, { "epoch": 0.8335090620525855, "grad_norm": 2.5035080909729004, "learning_rate": 1.3368417710762394e-06, "loss": 0.3435, "step": 37550 }, { "epoch": 0.833620048612113, "grad_norm": 0.8789152503013611, "learning_rate": 1.335100660682822e-06, "loss": 0.3996, "step": 37555 }, { "epoch": 0.8337310351716407, "grad_norm": 0.9698935151100159, "learning_rate": 1.3333606037388102e-06, "loss": 0.2393, "step": 37560 }, { "epoch": 0.8338420217311684, "grad_norm": 0.7996877431869507, "learning_rate": 1.331621600455747e-06, "loss": 0.3614, "step": 37565 }, { "epoch": 0.833953008290696, "grad_norm": 2.1126487255096436, "learning_rate": 1.3298836510450597e-06, "loss": 0.3795, "step": 37570 }, { "epoch": 0.8340639948502236, "grad_norm": 2.0766563415527344, "learning_rate": 1.3281467557180416e-06, "loss": 0.5423, "step": 37575 }, { "epoch": 0.8341749814097513, "grad_norm": 1.3232349157333374, "learning_rate": 1.3264109146858562e-06, "loss": 0.3806, "step": 37580 }, { "epoch": 0.8342859679692789, "grad_norm": 1.0379611253738403, "learning_rate": 1.3246761281595454e-06, "loss": 0.3126, "step": 37585 }, { "epoch": 0.8343969545288066, "grad_norm": 0.6964349746704102, "learning_rate": 1.3229423963500132e-06, "loss": 0.3547, "step": 37590 }, { "epoch": 0.8345079410883341, "grad_norm": 0.9996328949928284, "learning_rate": 1.321209719468044e-06, "loss": 0.4493, "step": 37595 }, { "epoch": 0.8346189276478618, "grad_norm": 1.6771372556686401, "learning_rate": 1.3194780977242927e-06, "loss": 0.5017, "step": 37600 }, { "epoch": 0.8347299142073895, "grad_norm": 1.0705715417861938, "learning_rate": 1.317747531329281e-06, "loss": 0.2607, "step": 37605 }, { "epoch": 0.8348409007669171, "grad_norm": 1.3443447351455688, "learning_rate": 1.316018020493408e-06, "loss": 0.4812, "step": 37610 }, { "epoch": 0.8349518873264448, "grad_norm": 1.5578769445419312, "learning_rate": 1.3142895654269372e-06, "loss": 0.4023, "step": 37615 }, { "epoch": 0.8350628738859724, "grad_norm": 1.4091074466705322, "learning_rate": 1.3125621663400123e-06, "loss": 0.5718, "step": 37620 }, { "epoch": 0.8351738604455, "grad_norm": 1.1248959302902222, "learning_rate": 1.3108358234426455e-06, "loss": 0.3817, "step": 37625 }, { "epoch": 0.8352848470050277, "grad_norm": 1.4390947818756104, "learning_rate": 1.3091105369447166e-06, "loss": 0.328, "step": 37630 }, { "epoch": 0.8353958335645554, "grad_norm": 1.4196460247039795, "learning_rate": 1.3073863070559833e-06, "loss": 0.3957, "step": 37635 }, { "epoch": 0.835506820124083, "grad_norm": 2.1124303340911865, "learning_rate": 1.3056631339860682e-06, "loss": 0.4624, "step": 37640 }, { "epoch": 0.8356178066836106, "grad_norm": 1.3274662494659424, "learning_rate": 1.3039410179444734e-06, "loss": 0.413, "step": 37645 }, { "epoch": 0.8357287932431382, "grad_norm": 1.625228762626648, "learning_rate": 1.3022199591405616e-06, "loss": 0.3787, "step": 37650 }, { "epoch": 0.8358397798026659, "grad_norm": 0.7213447690010071, "learning_rate": 1.3004999577835786e-06, "loss": 0.3415, "step": 37655 }, { "epoch": 0.8359507663621936, "grad_norm": 0.8313508629798889, "learning_rate": 1.2987810140826362e-06, "loss": 0.4463, "step": 37660 }, { "epoch": 0.8360617529217211, "grad_norm": 1.5393515825271606, "learning_rate": 1.297063128246715e-06, "loss": 0.3254, "step": 37665 }, { "epoch": 0.8361727394812488, "grad_norm": 0.8955177068710327, "learning_rate": 1.2953463004846722e-06, "loss": 0.4756, "step": 37670 }, { "epoch": 0.8362837260407765, "grad_norm": 0.8730138540267944, "learning_rate": 1.2936305310052322e-06, "loss": 0.2815, "step": 37675 }, { "epoch": 0.8363947126003041, "grad_norm": 1.8725333213806152, "learning_rate": 1.2919158200169924e-06, "loss": 0.3197, "step": 37680 }, { "epoch": 0.8365056991598318, "grad_norm": 1.0760202407836914, "learning_rate": 1.290202167728426e-06, "loss": 0.2608, "step": 37685 }, { "epoch": 0.8366166857193594, "grad_norm": 1.7121400833129883, "learning_rate": 1.2884895743478664e-06, "loss": 0.4663, "step": 37690 }, { "epoch": 0.836727672278887, "grad_norm": 1.1320337057113647, "learning_rate": 1.2867780400835307e-06, "loss": 0.4716, "step": 37695 }, { "epoch": 0.8368386588384147, "grad_norm": 0.8330530524253845, "learning_rate": 1.2850675651434962e-06, "loss": 0.3685, "step": 37700 }, { "epoch": 0.8369496453979423, "grad_norm": 0.9803527593612671, "learning_rate": 1.2833581497357205e-06, "loss": 0.3924, "step": 37705 }, { "epoch": 0.8370606319574699, "grad_norm": 1.5887954235076904, "learning_rate": 1.2816497940680294e-06, "loss": 0.3234, "step": 37710 }, { "epoch": 0.8371716185169976, "grad_norm": 1.1901915073394775, "learning_rate": 1.2799424983481145e-06, "loss": 0.393, "step": 37715 }, { "epoch": 0.8372826050765252, "grad_norm": 1.1848924160003662, "learning_rate": 1.2782362627835488e-06, "loss": 0.4492, "step": 37720 }, { "epoch": 0.8373935916360529, "grad_norm": 0.7305712103843689, "learning_rate": 1.2765310875817649e-06, "loss": 0.3837, "step": 37725 }, { "epoch": 0.8375045781955806, "grad_norm": 1.7613849639892578, "learning_rate": 1.2748269729500784e-06, "loss": 0.5601, "step": 37730 }, { "epoch": 0.8376155647551081, "grad_norm": 1.5227969884872437, "learning_rate": 1.2731239190956635e-06, "loss": 0.3904, "step": 37735 }, { "epoch": 0.8377265513146358, "grad_norm": 1.541243076324463, "learning_rate": 1.2714219262255777e-06, "loss": 0.2467, "step": 37740 }, { "epoch": 0.8378375378741635, "grad_norm": 0.8089504241943359, "learning_rate": 1.2697209945467382e-06, "loss": 0.4567, "step": 37745 }, { "epoch": 0.837948524433691, "grad_norm": 1.0389708280563354, "learning_rate": 1.2680211242659425e-06, "loss": 0.2745, "step": 37750 }, { "epoch": 0.8380595109932187, "grad_norm": 0.951557457447052, "learning_rate": 1.266322315589853e-06, "loss": 0.4042, "step": 37755 }, { "epoch": 0.8381704975527463, "grad_norm": 1.3259742259979248, "learning_rate": 1.264624568725007e-06, "loss": 0.353, "step": 37760 }, { "epoch": 0.838281484112274, "grad_norm": 0.8747276067733765, "learning_rate": 1.262927883877808e-06, "loss": 0.2623, "step": 37765 }, { "epoch": 0.8383924706718017, "grad_norm": 2.2142369747161865, "learning_rate": 1.261232261254537e-06, "loss": 0.3924, "step": 37770 }, { "epoch": 0.8385034572313292, "grad_norm": 1.2620859146118164, "learning_rate": 1.2595377010613375e-06, "loss": 0.4454, "step": 37775 }, { "epoch": 0.8386144437908569, "grad_norm": 1.658715844154358, "learning_rate": 1.2578442035042338e-06, "loss": 0.3923, "step": 37780 }, { "epoch": 0.8387254303503846, "grad_norm": 1.0938314199447632, "learning_rate": 1.2561517687891112e-06, "loss": 0.4819, "step": 37785 }, { "epoch": 0.8388364169099122, "grad_norm": 1.25962233543396, "learning_rate": 1.2544603971217318e-06, "loss": 0.3127, "step": 37790 }, { "epoch": 0.8389474034694399, "grad_norm": 1.0734721422195435, "learning_rate": 1.2527700887077289e-06, "loss": 0.203, "step": 37795 }, { "epoch": 0.8390583900289675, "grad_norm": 1.505988597869873, "learning_rate": 1.2510808437526e-06, "loss": 0.5183, "step": 37800 }, { "epoch": 0.8391693765884951, "grad_norm": 1.4044837951660156, "learning_rate": 1.2493926624617237e-06, "loss": 0.2907, "step": 37805 }, { "epoch": 0.8392803631480228, "grad_norm": 0.949424147605896, "learning_rate": 1.2477055450403374e-06, "loss": 0.4513, "step": 37810 }, { "epoch": 0.8393913497075504, "grad_norm": 1.0142112970352173, "learning_rate": 1.2460194916935587e-06, "loss": 0.4101, "step": 37815 }, { "epoch": 0.839502336267078, "grad_norm": 1.2752467393875122, "learning_rate": 1.2443345026263731e-06, "loss": 0.5054, "step": 37820 }, { "epoch": 0.8396133228266057, "grad_norm": 1.5303524732589722, "learning_rate": 1.2426505780436326e-06, "loss": 0.5011, "step": 37825 }, { "epoch": 0.8397243093861333, "grad_norm": 1.807246208190918, "learning_rate": 1.2409677181500668e-06, "loss": 0.2896, "step": 37830 }, { "epoch": 0.839835295945661, "grad_norm": 0.9525343179702759, "learning_rate": 1.2392859231502685e-06, "loss": 0.3024, "step": 37835 }, { "epoch": 0.8399462825051887, "grad_norm": 1.0603681802749634, "learning_rate": 1.2376051932487087e-06, "loss": 0.4048, "step": 37840 }, { "epoch": 0.8400572690647162, "grad_norm": 1.1888055801391602, "learning_rate": 1.2359255286497195e-06, "loss": 0.4145, "step": 37845 }, { "epoch": 0.8401682556242439, "grad_norm": 0.9829010963439941, "learning_rate": 1.2342469295575133e-06, "loss": 0.4637, "step": 37850 }, { "epoch": 0.8402792421837716, "grad_norm": 1.1636102199554443, "learning_rate": 1.2325693961761697e-06, "loss": 0.3748, "step": 37855 }, { "epoch": 0.8403902287432992, "grad_norm": 1.3435735702514648, "learning_rate": 1.2308929287096327e-06, "loss": 0.3393, "step": 37860 }, { "epoch": 0.8405012153028268, "grad_norm": 0.8890703320503235, "learning_rate": 1.2292175273617258e-06, "loss": 0.3868, "step": 37865 }, { "epoch": 0.8406122018623544, "grad_norm": 1.4685038328170776, "learning_rate": 1.2275431923361358e-06, "loss": 0.4727, "step": 37870 }, { "epoch": 0.8407231884218821, "grad_norm": 1.1564308404922485, "learning_rate": 1.2258699238364224e-06, "loss": 0.3571, "step": 37875 }, { "epoch": 0.8408341749814098, "grad_norm": 0.8358651399612427, "learning_rate": 1.2241977220660206e-06, "loss": 0.3261, "step": 37880 }, { "epoch": 0.8409451615409373, "grad_norm": 1.0563150644302368, "learning_rate": 1.2225265872282266e-06, "loss": 0.4553, "step": 37885 }, { "epoch": 0.841056148100465, "grad_norm": 1.1132227182388306, "learning_rate": 1.2208565195262145e-06, "loss": 0.2361, "step": 37890 }, { "epoch": 0.8411671346599927, "grad_norm": 1.431659460067749, "learning_rate": 1.2191875191630209e-06, "loss": 0.3593, "step": 37895 }, { "epoch": 0.8412781212195203, "grad_norm": 1.1609866619110107, "learning_rate": 1.21751958634156e-06, "loss": 0.333, "step": 37900 }, { "epoch": 0.841389107779048, "grad_norm": 1.087960958480835, "learning_rate": 1.2158527212646165e-06, "loss": 0.3605, "step": 37905 }, { "epoch": 0.8415000943385756, "grad_norm": 1.426638126373291, "learning_rate": 1.214186924134838e-06, "loss": 0.4866, "step": 37910 }, { "epoch": 0.8416110808981032, "grad_norm": 3.363008499145508, "learning_rate": 1.2125221951547495e-06, "loss": 0.4721, "step": 37915 }, { "epoch": 0.8417220674576309, "grad_norm": 1.3448435068130493, "learning_rate": 1.2108585345267387e-06, "loss": 0.3232, "step": 37920 }, { "epoch": 0.8418330540171586, "grad_norm": 1.0982211828231812, "learning_rate": 1.209195942453073e-06, "loss": 0.3121, "step": 37925 }, { "epoch": 0.8419440405766861, "grad_norm": 1.3792861700057983, "learning_rate": 1.2075344191358818e-06, "loss": 0.2409, "step": 37930 }, { "epoch": 0.8420550271362138, "grad_norm": 1.3266873359680176, "learning_rate": 1.2058739647771667e-06, "loss": 0.2559, "step": 37935 }, { "epoch": 0.8421660136957414, "grad_norm": 0.886651873588562, "learning_rate": 1.204214579578804e-06, "loss": 0.3027, "step": 37940 }, { "epoch": 0.8422770002552691, "grad_norm": 0.8672597408294678, "learning_rate": 1.2025562637425326e-06, "loss": 0.4554, "step": 37945 }, { "epoch": 0.8423879868147968, "grad_norm": 1.3894035816192627, "learning_rate": 1.2008990174699685e-06, "loss": 0.3483, "step": 37950 }, { "epoch": 0.8424989733743243, "grad_norm": 0.844137966632843, "learning_rate": 1.1992428409625901e-06, "loss": 0.2818, "step": 37955 }, { "epoch": 0.842609959933852, "grad_norm": 1.4286129474639893, "learning_rate": 1.1975877344217501e-06, "loss": 0.3868, "step": 37960 }, { "epoch": 0.8427209464933797, "grad_norm": 0.6080193519592285, "learning_rate": 1.1959336980486757e-06, "loss": 0.4645, "step": 37965 }, { "epoch": 0.8428319330529073, "grad_norm": 0.7501819133758545, "learning_rate": 1.194280732044454e-06, "loss": 0.3131, "step": 37970 }, { "epoch": 0.842942919612435, "grad_norm": 1.4122878313064575, "learning_rate": 1.1926288366100503e-06, "loss": 0.3991, "step": 37975 }, { "epoch": 0.8430539061719626, "grad_norm": 1.70725679397583, "learning_rate": 1.1909780119462922e-06, "loss": 0.4149, "step": 37980 }, { "epoch": 0.8431648927314902, "grad_norm": 0.9734331369400024, "learning_rate": 1.1893282582538867e-06, "loss": 0.307, "step": 37985 }, { "epoch": 0.8432758792910179, "grad_norm": 1.3157240152359009, "learning_rate": 1.1876795757334014e-06, "loss": 0.3889, "step": 37990 }, { "epoch": 0.8433868658505455, "grad_norm": 1.4884155988693237, "learning_rate": 1.1860319645852814e-06, "loss": 0.4665, "step": 37995 }, { "epoch": 0.8434978524100731, "grad_norm": 1.281921625137329, "learning_rate": 1.184385425009832e-06, "loss": 0.4709, "step": 38000 }, { "epoch": 0.8436088389696008, "grad_norm": 1.284393310546875, "learning_rate": 1.1827399572072407e-06, "loss": 0.3922, "step": 38005 }, { "epoch": 0.8437198255291284, "grad_norm": 1.273414134979248, "learning_rate": 1.1810955613775521e-06, "loss": 0.3056, "step": 38010 }, { "epoch": 0.8438308120886561, "grad_norm": 1.4604092836380005, "learning_rate": 1.1794522377206907e-06, "loss": 0.3527, "step": 38015 }, { "epoch": 0.8439417986481837, "grad_norm": 0.8474437594413757, "learning_rate": 1.1778099864364422e-06, "loss": 0.4289, "step": 38020 }, { "epoch": 0.8440527852077113, "grad_norm": 1.360791802406311, "learning_rate": 1.1761688077244703e-06, "loss": 0.2715, "step": 38025 }, { "epoch": 0.844163771767239, "grad_norm": 1.2367948293685913, "learning_rate": 1.174528701784301e-06, "loss": 0.5403, "step": 38030 }, { "epoch": 0.8442747583267667, "grad_norm": 1.419376015663147, "learning_rate": 1.1728896688153347e-06, "loss": 0.5606, "step": 38035 }, { "epoch": 0.8443857448862943, "grad_norm": 1.1906652450561523, "learning_rate": 1.1712517090168373e-06, "loss": 0.4601, "step": 38040 }, { "epoch": 0.8444967314458219, "grad_norm": 1.702460765838623, "learning_rate": 1.1696148225879467e-06, "loss": 0.4131, "step": 38045 }, { "epoch": 0.8446077180053495, "grad_norm": 0.9393609166145325, "learning_rate": 1.1679790097276744e-06, "loss": 0.3865, "step": 38050 }, { "epoch": 0.8447187045648772, "grad_norm": 1.3559194803237915, "learning_rate": 1.1663442706348915e-06, "loss": 0.4174, "step": 38055 }, { "epoch": 0.8448296911244049, "grad_norm": 2.1292991638183594, "learning_rate": 1.164710605508348e-06, "loss": 0.4488, "step": 38060 }, { "epoch": 0.8449406776839324, "grad_norm": 1.1899800300598145, "learning_rate": 1.1630780145466558e-06, "loss": 0.5035, "step": 38065 }, { "epoch": 0.8450516642434601, "grad_norm": 1.1950067281723022, "learning_rate": 1.161446497948302e-06, "loss": 0.3659, "step": 38070 }, { "epoch": 0.8451626508029878, "grad_norm": 1.067469596862793, "learning_rate": 1.1598160559116423e-06, "loss": 0.2644, "step": 38075 }, { "epoch": 0.8452736373625154, "grad_norm": 0.9901965856552124, "learning_rate": 1.158186688634898e-06, "loss": 0.3783, "step": 38080 }, { "epoch": 0.845384623922043, "grad_norm": 0.9443339109420776, "learning_rate": 1.156558396316164e-06, "loss": 0.4083, "step": 38085 }, { "epoch": 0.8454956104815707, "grad_norm": 1.2968891859054565, "learning_rate": 1.1549311791534e-06, "loss": 0.4318, "step": 38090 }, { "epoch": 0.8456065970410983, "grad_norm": 1.3473148345947266, "learning_rate": 1.153305037344441e-06, "loss": 0.4181, "step": 38095 }, { "epoch": 0.845717583600626, "grad_norm": 1.1904234886169434, "learning_rate": 1.1516799710869841e-06, "loss": 0.3757, "step": 38100 }, { "epoch": 0.8458285701601536, "grad_norm": 1.0559203624725342, "learning_rate": 1.1500559805786016e-06, "loss": 0.3172, "step": 38105 }, { "epoch": 0.8459395567196812, "grad_norm": 0.5630988478660583, "learning_rate": 1.1484330660167342e-06, "loss": 0.3873, "step": 38110 }, { "epoch": 0.8460505432792089, "grad_norm": 0.8863928914070129, "learning_rate": 1.1468112275986864e-06, "loss": 0.3741, "step": 38115 }, { "epoch": 0.8461615298387365, "grad_norm": 0.9940023422241211, "learning_rate": 1.1451904655216417e-06, "loss": 0.3822, "step": 38120 }, { "epoch": 0.8462725163982642, "grad_norm": 1.2200144529342651, "learning_rate": 1.14357077998264e-06, "loss": 0.3438, "step": 38125 }, { "epoch": 0.8463835029577919, "grad_norm": 0.8880993127822876, "learning_rate": 1.1419521711786018e-06, "loss": 0.377, "step": 38130 }, { "epoch": 0.8464944895173194, "grad_norm": 0.7141773700714111, "learning_rate": 1.140334639306312e-06, "loss": 0.3334, "step": 38135 }, { "epoch": 0.8466054760768471, "grad_norm": 1.2401177883148193, "learning_rate": 1.138718184562423e-06, "loss": 0.5925, "step": 38140 }, { "epoch": 0.8467164626363748, "grad_norm": 2.065546751022339, "learning_rate": 1.1371028071434599e-06, "loss": 0.4347, "step": 38145 }, { "epoch": 0.8468274491959024, "grad_norm": 1.3787494897842407, "learning_rate": 1.135488507245811e-06, "loss": 0.3479, "step": 38150 }, { "epoch": 0.84693843575543, "grad_norm": 2.0585546493530273, "learning_rate": 1.1338752850657409e-06, "loss": 0.4585, "step": 38155 }, { "epoch": 0.8470494223149576, "grad_norm": 1.160445213317871, "learning_rate": 1.132263140799381e-06, "loss": 0.3359, "step": 38160 }, { "epoch": 0.8471604088744853, "grad_norm": 1.3824493885040283, "learning_rate": 1.1306520746427263e-06, "loss": 0.489, "step": 38165 }, { "epoch": 0.847271395434013, "grad_norm": 1.5092135667800903, "learning_rate": 1.1290420867916496e-06, "loss": 0.3813, "step": 38170 }, { "epoch": 0.8473823819935405, "grad_norm": 1.6400960683822632, "learning_rate": 1.1274331774418822e-06, "loss": 0.3731, "step": 38175 }, { "epoch": 0.8474933685530682, "grad_norm": 0.9592393636703491, "learning_rate": 1.1258253467890346e-06, "loss": 0.4253, "step": 38180 }, { "epoch": 0.8476043551125959, "grad_norm": 1.5121009349822998, "learning_rate": 1.1242185950285778e-06, "loss": 0.5113, "step": 38185 }, { "epoch": 0.8477153416721235, "grad_norm": 0.8395439982414246, "learning_rate": 1.122612922355858e-06, "loss": 0.4075, "step": 38190 }, { "epoch": 0.8478263282316512, "grad_norm": 0.9275767803192139, "learning_rate": 1.1210083289660877e-06, "loss": 0.4377, "step": 38195 }, { "epoch": 0.8479373147911788, "grad_norm": 1.0100282430648804, "learning_rate": 1.1194048150543457e-06, "loss": 0.499, "step": 38200 }, { "epoch": 0.8480483013507064, "grad_norm": 2.3323545455932617, "learning_rate": 1.1178023808155846e-06, "loss": 0.5272, "step": 38205 }, { "epoch": 0.8481592879102341, "grad_norm": 2.287036418914795, "learning_rate": 1.11620102644462e-06, "loss": 0.5176, "step": 38210 }, { "epoch": 0.8482702744697617, "grad_norm": 1.46099853515625, "learning_rate": 1.11460075213614e-06, "loss": 0.4802, "step": 38215 }, { "epoch": 0.8483812610292893, "grad_norm": 1.0827932357788086, "learning_rate": 1.1130015580847032e-06, "loss": 0.5283, "step": 38220 }, { "epoch": 0.848492247588817, "grad_norm": 1.2606589794158936, "learning_rate": 1.111403444484731e-06, "loss": 0.3741, "step": 38225 }, { "epoch": 0.8486032341483446, "grad_norm": 1.5086512565612793, "learning_rate": 1.1098064115305196e-06, "loss": 0.3827, "step": 38230 }, { "epoch": 0.8487142207078723, "grad_norm": 1.9647616147994995, "learning_rate": 1.1082104594162269e-06, "loss": 0.4122, "step": 38235 }, { "epoch": 0.8488252072674, "grad_norm": 0.9889862537384033, "learning_rate": 1.1066155883358877e-06, "loss": 0.3284, "step": 38240 }, { "epoch": 0.8489361938269275, "grad_norm": 1.1836154460906982, "learning_rate": 1.1050217984833978e-06, "loss": 0.4974, "step": 38245 }, { "epoch": 0.8490471803864552, "grad_norm": 1.180612325668335, "learning_rate": 1.1034290900525279e-06, "loss": 0.585, "step": 38250 }, { "epoch": 0.8491581669459829, "grad_norm": 1.3655891418457031, "learning_rate": 1.1018374632369111e-06, "loss": 0.3475, "step": 38255 }, { "epoch": 0.8492691535055105, "grad_norm": 0.8092775940895081, "learning_rate": 1.1002469182300546e-06, "loss": 0.2952, "step": 38260 }, { "epoch": 0.8493801400650381, "grad_norm": 1.331992268562317, "learning_rate": 1.0986574552253282e-06, "loss": 0.4558, "step": 38265 }, { "epoch": 0.8494911266245657, "grad_norm": 1.3252182006835938, "learning_rate": 1.0970690744159784e-06, "loss": 0.2301, "step": 38270 }, { "epoch": 0.8496021131840934, "grad_norm": 0.5097599029541016, "learning_rate": 1.0954817759951098e-06, "loss": 0.3065, "step": 38275 }, { "epoch": 0.8497130997436211, "grad_norm": 1.2783838510513306, "learning_rate": 1.0938955601557056e-06, "loss": 0.3088, "step": 38280 }, { "epoch": 0.8498240863031487, "grad_norm": 1.0956603288650513, "learning_rate": 1.092310427090608e-06, "loss": 0.3869, "step": 38285 }, { "epoch": 0.8499350728626763, "grad_norm": 1.4360817670822144, "learning_rate": 1.0907263769925381e-06, "loss": 0.5542, "step": 38290 }, { "epoch": 0.850046059422204, "grad_norm": 1.2553974390029907, "learning_rate": 1.089143410054072e-06, "loss": 0.3406, "step": 38295 }, { "epoch": 0.8501570459817316, "grad_norm": 1.1232776641845703, "learning_rate": 1.0875615264676665e-06, "loss": 0.5081, "step": 38300 }, { "epoch": 0.8502680325412593, "grad_norm": 1.687680959701538, "learning_rate": 1.0859807264256428e-06, "loss": 0.3252, "step": 38305 }, { "epoch": 0.850379019100787, "grad_norm": 2.0268588066101074, "learning_rate": 1.084401010120185e-06, "loss": 0.2039, "step": 38310 }, { "epoch": 0.8504900056603145, "grad_norm": 1.1189128160476685, "learning_rate": 1.0828223777433534e-06, "loss": 0.4119, "step": 38315 }, { "epoch": 0.8506009922198422, "grad_norm": 1.8909300565719604, "learning_rate": 1.0812448294870692e-06, "loss": 0.5072, "step": 38320 }, { "epoch": 0.8507119787793698, "grad_norm": 1.4315944910049438, "learning_rate": 1.0796683655431272e-06, "loss": 0.3991, "step": 38325 }, { "epoch": 0.8508229653388975, "grad_norm": 1.3107892274856567, "learning_rate": 1.078092986103192e-06, "loss": 0.3847, "step": 38330 }, { "epoch": 0.8509339518984251, "grad_norm": 1.8488363027572632, "learning_rate": 1.0765186913587866e-06, "loss": 0.2773, "step": 38335 }, { "epoch": 0.8510449384579527, "grad_norm": 1.318145513534546, "learning_rate": 1.074945481501314e-06, "loss": 0.401, "step": 38340 }, { "epoch": 0.8511559250174804, "grad_norm": 0.9807799458503723, "learning_rate": 1.0733733567220362e-06, "loss": 0.3081, "step": 38345 }, { "epoch": 0.8512669115770081, "grad_norm": 1.1080820560455322, "learning_rate": 1.0718023172120895e-06, "loss": 0.2787, "step": 38350 }, { "epoch": 0.8513778981365356, "grad_norm": 2.1662795543670654, "learning_rate": 1.0702323631624723e-06, "loss": 0.3918, "step": 38355 }, { "epoch": 0.8514888846960633, "grad_norm": 1.3688949346542358, "learning_rate": 1.068663494764056e-06, "loss": 0.3132, "step": 38360 }, { "epoch": 0.851599871255591, "grad_norm": 0.9906854033470154, "learning_rate": 1.067095712207581e-06, "loss": 0.2986, "step": 38365 }, { "epoch": 0.8517108578151186, "grad_norm": 1.1726410388946533, "learning_rate": 1.0655290156836485e-06, "loss": 0.3745, "step": 38370 }, { "epoch": 0.8518218443746463, "grad_norm": 0.900324285030365, "learning_rate": 1.0639634053827363e-06, "loss": 0.3802, "step": 38375 }, { "epoch": 0.8519328309341738, "grad_norm": 1.3714967966079712, "learning_rate": 1.0623988814951812e-06, "loss": 0.2792, "step": 38380 }, { "epoch": 0.8520438174937015, "grad_norm": 0.9182549118995667, "learning_rate": 1.0608354442111968e-06, "loss": 0.4102, "step": 38385 }, { "epoch": 0.8521548040532292, "grad_norm": 1.286138892173767, "learning_rate": 1.0592730937208618e-06, "loss": 0.2673, "step": 38390 }, { "epoch": 0.8522657906127568, "grad_norm": 1.5091959238052368, "learning_rate": 1.0577118302141166e-06, "loss": 0.4845, "step": 38395 }, { "epoch": 0.8523767771722844, "grad_norm": 1.234449863433838, "learning_rate": 1.0561516538807792e-06, "loss": 0.2381, "step": 38400 }, { "epoch": 0.8524877637318121, "grad_norm": 2.3573572635650635, "learning_rate": 1.054592564910526e-06, "loss": 0.3428, "step": 38405 }, { "epoch": 0.8525987502913397, "grad_norm": 1.1645255088806152, "learning_rate": 1.0530345634929084e-06, "loss": 0.4389, "step": 38410 }, { "epoch": 0.8527097368508674, "grad_norm": 1.2179172039031982, "learning_rate": 1.0514776498173452e-06, "loss": 0.2181, "step": 38415 }, { "epoch": 0.852820723410395, "grad_norm": 1.5778353214263916, "learning_rate": 1.0499218240731157e-06, "loss": 0.5121, "step": 38420 }, { "epoch": 0.8529317099699226, "grad_norm": 1.748390555381775, "learning_rate": 1.0483670864493777e-06, "loss": 0.3426, "step": 38425 }, { "epoch": 0.8530426965294503, "grad_norm": 1.0926501750946045, "learning_rate": 1.0468134371351445e-06, "loss": 0.4044, "step": 38430 }, { "epoch": 0.8531536830889779, "grad_norm": 0.5647231340408325, "learning_rate": 1.0452608763193095e-06, "loss": 0.3543, "step": 38435 }, { "epoch": 0.8532646696485056, "grad_norm": 0.8507322669029236, "learning_rate": 1.0437094041906238e-06, "loss": 0.3336, "step": 38440 }, { "epoch": 0.8533756562080332, "grad_norm": 1.3485618829727173, "learning_rate": 1.0421590209377107e-06, "loss": 0.419, "step": 38445 }, { "epoch": 0.8534866427675608, "grad_norm": 2.003201961517334, "learning_rate": 1.0406097267490644e-06, "loss": 0.455, "step": 38450 }, { "epoch": 0.8535976293270885, "grad_norm": 1.314960241317749, "learning_rate": 1.0390615218130383e-06, "loss": 0.4184, "step": 38455 }, { "epoch": 0.8537086158866162, "grad_norm": 1.501952052116394, "learning_rate": 1.037514406317861e-06, "loss": 0.345, "step": 38460 }, { "epoch": 0.8538196024461437, "grad_norm": 1.1983728408813477, "learning_rate": 1.0359683804516219e-06, "loss": 0.368, "step": 38465 }, { "epoch": 0.8539305890056714, "grad_norm": 0.7054364085197449, "learning_rate": 1.0344234444022872e-06, "loss": 0.4098, "step": 38470 }, { "epoch": 0.8540415755651991, "grad_norm": 0.7602006196975708, "learning_rate": 1.032879598357679e-06, "loss": 0.3234, "step": 38475 }, { "epoch": 0.8541525621247267, "grad_norm": 1.3115113973617554, "learning_rate": 1.0313368425054983e-06, "loss": 0.4597, "step": 38480 }, { "epoch": 0.8542635486842544, "grad_norm": 1.0144224166870117, "learning_rate": 1.0297951770333037e-06, "loss": 0.1787, "step": 38485 }, { "epoch": 0.8543745352437819, "grad_norm": 1.2411842346191406, "learning_rate": 1.02825460212853e-06, "loss": 0.5086, "step": 38490 }, { "epoch": 0.8544855218033096, "grad_norm": 1.0099564790725708, "learning_rate": 1.0267151179784706e-06, "loss": 0.4363, "step": 38495 }, { "epoch": 0.8545965083628373, "grad_norm": 1.160954475402832, "learning_rate": 1.025176724770295e-06, "loss": 0.2975, "step": 38500 }, { "epoch": 0.8547074949223649, "grad_norm": 1.771726369857788, "learning_rate": 1.0236394226910329e-06, "loss": 0.3246, "step": 38505 }, { "epoch": 0.8548184814818925, "grad_norm": 1.7871524095535278, "learning_rate": 1.0221032119275864e-06, "loss": 0.4353, "step": 38510 }, { "epoch": 0.8549294680414202, "grad_norm": 0.8769774436950684, "learning_rate": 1.0205680926667194e-06, "loss": 0.4317, "step": 38515 }, { "epoch": 0.8550404546009478, "grad_norm": 1.4010024070739746, "learning_rate": 1.0190340650950726e-06, "loss": 0.2572, "step": 38520 }, { "epoch": 0.8551514411604755, "grad_norm": 1.1399223804473877, "learning_rate": 1.017501129399141e-06, "loss": 0.3274, "step": 38525 }, { "epoch": 0.8552624277200032, "grad_norm": 1.0759845972061157, "learning_rate": 1.015969285765297e-06, "loss": 0.3051, "step": 38530 }, { "epoch": 0.8553734142795307, "grad_norm": 1.2395176887512207, "learning_rate": 1.0144385343797801e-06, "loss": 0.3784, "step": 38535 }, { "epoch": 0.8554844008390584, "grad_norm": 2.213679790496826, "learning_rate": 1.0129088754286886e-06, "loss": 0.4289, "step": 38540 }, { "epoch": 0.855595387398586, "grad_norm": 0.756289005279541, "learning_rate": 1.011380309097998e-06, "loss": 0.2351, "step": 38545 }, { "epoch": 0.8557063739581137, "grad_norm": 0.951809823513031, "learning_rate": 1.0098528355735414e-06, "loss": 0.2969, "step": 38550 }, { "epoch": 0.8558173605176413, "grad_norm": 0.8504477143287659, "learning_rate": 1.0083264550410266e-06, "loss": 0.4501, "step": 38555 }, { "epoch": 0.8559283470771689, "grad_norm": 1.6416808366775513, "learning_rate": 1.0068011676860278e-06, "loss": 0.43, "step": 38560 }, { "epoch": 0.8560393336366966, "grad_norm": 1.2744433879852295, "learning_rate": 1.0052769736939793e-06, "loss": 0.4125, "step": 38565 }, { "epoch": 0.8561503201962243, "grad_norm": 0.7917124032974243, "learning_rate": 1.003753873250194e-06, "loss": 0.4791, "step": 38570 }, { "epoch": 0.8562613067557518, "grad_norm": 1.0418739318847656, "learning_rate": 1.002231866539839e-06, "loss": 0.3657, "step": 38575 }, { "epoch": 0.8563722933152795, "grad_norm": 1.2840015888214111, "learning_rate": 1.0007109537479564e-06, "loss": 0.3881, "step": 38580 }, { "epoch": 0.8564832798748072, "grad_norm": 1.4934595823287964, "learning_rate": 9.991911350594586e-07, "loss": 0.3519, "step": 38585 }, { "epoch": 0.8565942664343348, "grad_norm": 1.3786568641662598, "learning_rate": 9.976724106591128e-07, "loss": 0.3896, "step": 38590 }, { "epoch": 0.8567052529938625, "grad_norm": 1.0313783884048462, "learning_rate": 9.961547807315664e-07, "loss": 0.2541, "step": 38595 }, { "epoch": 0.85681623955339, "grad_norm": 1.1191176176071167, "learning_rate": 9.946382454613235e-07, "loss": 0.5072, "step": 38600 }, { "epoch": 0.8569272261129177, "grad_norm": 1.5176087617874146, "learning_rate": 9.931228050327623e-07, "loss": 0.4345, "step": 38605 }, { "epoch": 0.8570382126724454, "grad_norm": 0.8821091651916504, "learning_rate": 9.916084596301223e-07, "loss": 0.4797, "step": 38610 }, { "epoch": 0.857149199231973, "grad_norm": 1.2651344537734985, "learning_rate": 9.90095209437515e-07, "loss": 0.4668, "step": 38615 }, { "epoch": 0.8572601857915007, "grad_norm": 1.8215205669403076, "learning_rate": 9.885830546389151e-07, "loss": 0.5254, "step": 38620 }, { "epoch": 0.8573711723510283, "grad_norm": 1.7566852569580078, "learning_rate": 9.870719954181651e-07, "loss": 0.4328, "step": 38625 }, { "epoch": 0.8574821589105559, "grad_norm": 1.0727424621582031, "learning_rate": 9.855620319589766e-07, "loss": 0.2483, "step": 38630 }, { "epoch": 0.8575931454700836, "grad_norm": 1.2513246536254883, "learning_rate": 9.840531644449214e-07, "loss": 0.4956, "step": 38635 }, { "epoch": 0.8577041320296113, "grad_norm": 1.3015859127044678, "learning_rate": 9.825453930594464e-07, "loss": 0.2856, "step": 38640 }, { "epoch": 0.8578151185891388, "grad_norm": 1.3504365682601929, "learning_rate": 9.810387179858616e-07, "loss": 0.4944, "step": 38645 }, { "epoch": 0.8579261051486665, "grad_norm": 0.6242560744285583, "learning_rate": 9.795331394073404e-07, "loss": 0.3423, "step": 38650 }, { "epoch": 0.8580370917081941, "grad_norm": 1.6594665050506592, "learning_rate": 9.780286575069298e-07, "loss": 0.2734, "step": 38655 }, { "epoch": 0.8581480782677218, "grad_norm": 1.602768063545227, "learning_rate": 9.765252724675356e-07, "loss": 0.5262, "step": 38660 }, { "epoch": 0.8582590648272495, "grad_norm": 1.629512071609497, "learning_rate": 9.750229844719362e-07, "loss": 0.4276, "step": 38665 }, { "epoch": 0.858370051386777, "grad_norm": 1.0607631206512451, "learning_rate": 9.735217937027774e-07, "loss": 0.2883, "step": 38670 }, { "epoch": 0.8584810379463047, "grad_norm": 1.34122896194458, "learning_rate": 9.720217003425648e-07, "loss": 0.3398, "step": 38675 }, { "epoch": 0.8585920245058324, "grad_norm": 0.9040728211402893, "learning_rate": 9.705227045736777e-07, "loss": 0.4113, "step": 38680 }, { "epoch": 0.85870301106536, "grad_norm": 0.8782297968864441, "learning_rate": 9.69024806578356e-07, "loss": 0.389, "step": 38685 }, { "epoch": 0.8588139976248876, "grad_norm": 1.0483667850494385, "learning_rate": 9.675280065387117e-07, "loss": 0.4214, "step": 38690 }, { "epoch": 0.8589249841844153, "grad_norm": 1.4147143363952637, "learning_rate": 9.660323046367193e-07, "loss": 0.3655, "step": 38695 }, { "epoch": 0.8590359707439429, "grad_norm": 1.479657530784607, "learning_rate": 9.645377010542212e-07, "loss": 0.3764, "step": 38700 }, { "epoch": 0.8591469573034706, "grad_norm": 2.3263349533081055, "learning_rate": 9.630441959729286e-07, "loss": 0.4449, "step": 38705 }, { "epoch": 0.8592579438629981, "grad_norm": 1.145320177078247, "learning_rate": 9.61551789574413e-07, "loss": 0.292, "step": 38710 }, { "epoch": 0.8593689304225258, "grad_norm": 1.9598171710968018, "learning_rate": 9.600604820401205e-07, "loss": 0.3929, "step": 38715 }, { "epoch": 0.8594799169820535, "grad_norm": 0.999853789806366, "learning_rate": 9.585702735513546e-07, "loss": 0.2936, "step": 38720 }, { "epoch": 0.8595909035415811, "grad_norm": 0.9991888403892517, "learning_rate": 9.570811642892952e-07, "loss": 0.3136, "step": 38725 }, { "epoch": 0.8597018901011088, "grad_norm": 1.3448801040649414, "learning_rate": 9.555931544349772e-07, "loss": 0.3744, "step": 38730 }, { "epoch": 0.8598128766606364, "grad_norm": 1.1844966411590576, "learning_rate": 9.54106244169314e-07, "loss": 0.4347, "step": 38735 }, { "epoch": 0.859923863220164, "grad_norm": 1.6972241401672363, "learning_rate": 9.526204336730727e-07, "loss": 0.3532, "step": 38740 }, { "epoch": 0.8600348497796917, "grad_norm": 1.1772350072860718, "learning_rate": 9.511357231268992e-07, "loss": 0.3568, "step": 38745 }, { "epoch": 0.8601458363392194, "grad_norm": 1.950363278388977, "learning_rate": 9.496521127112956e-07, "loss": 0.3482, "step": 38750 }, { "epoch": 0.8602568228987469, "grad_norm": 0.92097008228302, "learning_rate": 9.481696026066367e-07, "loss": 0.2089, "step": 38755 }, { "epoch": 0.8603678094582746, "grad_norm": 1.196353793144226, "learning_rate": 9.466881929931582e-07, "loss": 0.5417, "step": 38760 }, { "epoch": 0.8604787960178022, "grad_norm": 1.7988780736923218, "learning_rate": 9.452078840509693e-07, "loss": 0.2771, "step": 38765 }, { "epoch": 0.8605897825773299, "grad_norm": 1.4759079217910767, "learning_rate": 9.43728675960035e-07, "loss": 0.4198, "step": 38770 }, { "epoch": 0.8607007691368576, "grad_norm": 1.9383196830749512, "learning_rate": 9.422505689001993e-07, "loss": 0.539, "step": 38775 }, { "epoch": 0.8608117556963851, "grad_norm": 0.9065805673599243, "learning_rate": 9.407735630511594e-07, "loss": 0.4382, "step": 38780 }, { "epoch": 0.8609227422559128, "grad_norm": 0.8202492594718933, "learning_rate": 9.392976585924885e-07, "loss": 0.4727, "step": 38785 }, { "epoch": 0.8610337288154405, "grad_norm": 1.5601189136505127, "learning_rate": 9.378228557036217e-07, "loss": 0.497, "step": 38790 }, { "epoch": 0.8611447153749681, "grad_norm": 0.7903019785881042, "learning_rate": 9.363491545638592e-07, "loss": 0.362, "step": 38795 }, { "epoch": 0.8612557019344957, "grad_norm": 0.5757574439048767, "learning_rate": 9.348765553523697e-07, "loss": 0.4147, "step": 38800 }, { "epoch": 0.8613666884940234, "grad_norm": 0.9555240273475647, "learning_rate": 9.334050582481857e-07, "loss": 0.3742, "step": 38805 }, { "epoch": 0.861477675053551, "grad_norm": 0.8194215297698975, "learning_rate": 9.319346634302084e-07, "loss": 0.2755, "step": 38810 }, { "epoch": 0.8615886616130787, "grad_norm": 1.1851353645324707, "learning_rate": 9.304653710772038e-07, "loss": 0.3956, "step": 38815 }, { "epoch": 0.8616996481726062, "grad_norm": 1.3655849695205688, "learning_rate": 9.289971813678001e-07, "loss": 0.5262, "step": 38820 }, { "epoch": 0.8618106347321339, "grad_norm": 1.137489676475525, "learning_rate": 9.275300944804999e-07, "loss": 0.3999, "step": 38825 }, { "epoch": 0.8619216212916616, "grad_norm": 1.7556220293045044, "learning_rate": 9.260641105936618e-07, "loss": 0.5324, "step": 38830 }, { "epoch": 0.8620326078511892, "grad_norm": 1.0561107397079468, "learning_rate": 9.245992298855177e-07, "loss": 0.3947, "step": 38835 }, { "epoch": 0.8621435944107169, "grad_norm": 0.8867374658584595, "learning_rate": 9.231354525341652e-07, "loss": 0.4675, "step": 38840 }, { "epoch": 0.8622545809702445, "grad_norm": 1.4009345769882202, "learning_rate": 9.216727787175605e-07, "loss": 0.3704, "step": 38845 }, { "epoch": 0.8623655675297721, "grad_norm": 1.6466008424758911, "learning_rate": 9.202112086135351e-07, "loss": 0.356, "step": 38850 }, { "epoch": 0.8624765540892998, "grad_norm": 1.4668787717819214, "learning_rate": 9.187507423997777e-07, "loss": 0.3356, "step": 38855 }, { "epoch": 0.8625875406488275, "grad_norm": 1.3413883447647095, "learning_rate": 9.172913802538508e-07, "loss": 0.3936, "step": 38860 }, { "epoch": 0.862698527208355, "grad_norm": 0.8753415942192078, "learning_rate": 9.158331223531747e-07, "loss": 0.2515, "step": 38865 }, { "epoch": 0.8628095137678827, "grad_norm": 0.8159883618354797, "learning_rate": 9.143759688750419e-07, "loss": 0.3006, "step": 38870 }, { "epoch": 0.8629205003274103, "grad_norm": 1.0602672100067139, "learning_rate": 9.129199199966099e-07, "loss": 0.4705, "step": 38875 }, { "epoch": 0.863031486886938, "grad_norm": 0.6323870420455933, "learning_rate": 9.114649758948967e-07, "loss": 0.4241, "step": 38880 }, { "epoch": 0.8631424734464657, "grad_norm": 1.046294093132019, "learning_rate": 9.100111367467923e-07, "loss": 0.3208, "step": 38885 }, { "epoch": 0.8632534600059932, "grad_norm": 0.940803587436676, "learning_rate": 9.085584027290472e-07, "loss": 0.358, "step": 38890 }, { "epoch": 0.8633644465655209, "grad_norm": 1.8877294063568115, "learning_rate": 9.071067740182815e-07, "loss": 0.2846, "step": 38895 }, { "epoch": 0.8634754331250486, "grad_norm": 1.3504570722579956, "learning_rate": 9.056562507909805e-07, "loss": 0.3206, "step": 38900 }, { "epoch": 0.8635864196845762, "grad_norm": 1.4714055061340332, "learning_rate": 9.042068332234899e-07, "loss": 0.5496, "step": 38905 }, { "epoch": 0.8636974062441038, "grad_norm": 1.0898418426513672, "learning_rate": 9.027585214920298e-07, "loss": 0.4504, "step": 38910 }, { "epoch": 0.8638083928036315, "grad_norm": 1.6247862577438354, "learning_rate": 9.013113157726771e-07, "loss": 0.3826, "step": 38915 }, { "epoch": 0.8639193793631591, "grad_norm": 1.524096131324768, "learning_rate": 8.998652162413801e-07, "loss": 0.3156, "step": 38920 }, { "epoch": 0.8640303659226868, "grad_norm": 1.3673064708709717, "learning_rate": 8.984202230739536e-07, "loss": 0.3662, "step": 38925 }, { "epoch": 0.8641413524822144, "grad_norm": 1.0908459424972534, "learning_rate": 8.969763364460682e-07, "loss": 0.4091, "step": 38930 }, { "epoch": 0.864252339041742, "grad_norm": 1.3286373615264893, "learning_rate": 8.955335565332734e-07, "loss": 0.4986, "step": 38935 }, { "epoch": 0.8643633256012697, "grad_norm": 1.986507534980774, "learning_rate": 8.940918835109735e-07, "loss": 0.5036, "step": 38940 }, { "epoch": 0.8644743121607973, "grad_norm": 0.984667956829071, "learning_rate": 8.926513175544448e-07, "loss": 0.4329, "step": 38945 }, { "epoch": 0.864585298720325, "grad_norm": 0.7082566022872925, "learning_rate": 8.91211858838823e-07, "loss": 0.4022, "step": 38950 }, { "epoch": 0.8646962852798526, "grad_norm": 1.6974024772644043, "learning_rate": 8.897735075391156e-07, "loss": 0.4663, "step": 38955 }, { "epoch": 0.8648072718393802, "grad_norm": 1.3038138151168823, "learning_rate": 8.88336263830194e-07, "loss": 0.4664, "step": 38960 }, { "epoch": 0.8649182583989079, "grad_norm": 1.2903189659118652, "learning_rate": 8.869001278867884e-07, "loss": 0.4917, "step": 38965 }, { "epoch": 0.8650292449584356, "grad_norm": 1.3441555500030518, "learning_rate": 8.854650998835046e-07, "loss": 0.3813, "step": 38970 }, { "epoch": 0.8651402315179632, "grad_norm": 1.2959764003753662, "learning_rate": 8.840311799948042e-07, "loss": 0.551, "step": 38975 }, { "epoch": 0.8652512180774908, "grad_norm": 0.7261309027671814, "learning_rate": 8.825983683950223e-07, "loss": 0.318, "step": 38980 }, { "epoch": 0.8653622046370184, "grad_norm": 1.0499922037124634, "learning_rate": 8.811666652583517e-07, "loss": 0.482, "step": 38985 }, { "epoch": 0.8654731911965461, "grad_norm": 1.1763601303100586, "learning_rate": 8.797360707588576e-07, "loss": 0.5707, "step": 38990 }, { "epoch": 0.8655841777560738, "grad_norm": 0.9862807393074036, "learning_rate": 8.783065850704631e-07, "loss": 0.4042, "step": 38995 }, { "epoch": 0.8656951643156013, "grad_norm": 0.8914280533790588, "learning_rate": 8.768782083669647e-07, "loss": 0.4066, "step": 39000 }, { "epoch": 0.865806150875129, "grad_norm": 1.3279800415039062, "learning_rate": 8.754509408220146e-07, "loss": 0.5931, "step": 39005 }, { "epoch": 0.8659171374346567, "grad_norm": 1.4973981380462646, "learning_rate": 8.740247826091397e-07, "loss": 0.4384, "step": 39010 }, { "epoch": 0.8660281239941843, "grad_norm": 1.2501366138458252, "learning_rate": 8.725997339017233e-07, "loss": 0.3055, "step": 39015 }, { "epoch": 0.866139110553712, "grad_norm": 1.1514090299606323, "learning_rate": 8.711757948730227e-07, "loss": 0.4909, "step": 39020 }, { "epoch": 0.8662500971132396, "grad_norm": 1.296980857849121, "learning_rate": 8.697529656961512e-07, "loss": 0.4126, "step": 39025 }, { "epoch": 0.8663610836727672, "grad_norm": 0.9663153290748596, "learning_rate": 8.683312465440952e-07, "loss": 0.3933, "step": 39030 }, { "epoch": 0.8664720702322949, "grad_norm": 1.295825719833374, "learning_rate": 8.669106375896996e-07, "loss": 0.3961, "step": 39035 }, { "epoch": 0.8665830567918225, "grad_norm": 1.4817475080490112, "learning_rate": 8.654911390056786e-07, "loss": 0.5709, "step": 39040 }, { "epoch": 0.8666940433513501, "grad_norm": 1.2556267976760864, "learning_rate": 8.640727509646119e-07, "loss": 0.4379, "step": 39045 }, { "epoch": 0.8668050299108778, "grad_norm": 0.8033402562141418, "learning_rate": 8.626554736389393e-07, "loss": 0.399, "step": 39050 }, { "epoch": 0.8669160164704054, "grad_norm": 0.9582207798957825, "learning_rate": 8.612393072009706e-07, "loss": 0.2177, "step": 39055 }, { "epoch": 0.8670270030299331, "grad_norm": 0.9874535799026489, "learning_rate": 8.598242518228773e-07, "loss": 0.5628, "step": 39060 }, { "epoch": 0.8671379895894608, "grad_norm": 1.2723040580749512, "learning_rate": 8.58410307676697e-07, "loss": 0.4486, "step": 39065 }, { "epoch": 0.8672489761489883, "grad_norm": 1.10847008228302, "learning_rate": 8.569974749343357e-07, "loss": 0.441, "step": 39070 }, { "epoch": 0.867359962708516, "grad_norm": 0.913597822189331, "learning_rate": 8.555857537675549e-07, "loss": 0.3791, "step": 39075 }, { "epoch": 0.8674709492680437, "grad_norm": 1.1242194175720215, "learning_rate": 8.541751443479928e-07, "loss": 0.3358, "step": 39080 }, { "epoch": 0.8675819358275713, "grad_norm": 1.259722113609314, "learning_rate": 8.527656468471423e-07, "loss": 0.4287, "step": 39085 }, { "epoch": 0.8676929223870989, "grad_norm": 0.994596004486084, "learning_rate": 8.513572614363674e-07, "loss": 0.5339, "step": 39090 }, { "epoch": 0.8678039089466265, "grad_norm": 1.569893717765808, "learning_rate": 8.499499882868955e-07, "loss": 0.3643, "step": 39095 }, { "epoch": 0.8679148955061542, "grad_norm": 1.5473707914352417, "learning_rate": 8.485438275698154e-07, "loss": 0.433, "step": 39100 }, { "epoch": 0.8680258820656819, "grad_norm": 1.753111720085144, "learning_rate": 8.47138779456087e-07, "loss": 0.3161, "step": 39105 }, { "epoch": 0.8681368686252094, "grad_norm": 0.9786942601203918, "learning_rate": 8.457348441165281e-07, "loss": 0.4065, "step": 39110 }, { "epoch": 0.8682478551847371, "grad_norm": 1.395298957824707, "learning_rate": 8.443320217218254e-07, "loss": 0.4125, "step": 39115 }, { "epoch": 0.8683588417442648, "grad_norm": 1.0039939880371094, "learning_rate": 8.429303124425315e-07, "loss": 0.4161, "step": 39120 }, { "epoch": 0.8684698283037924, "grad_norm": 1.6132234334945679, "learning_rate": 8.415297164490577e-07, "loss": 0.417, "step": 39125 }, { "epoch": 0.8685808148633201, "grad_norm": 1.0659582614898682, "learning_rate": 8.40130233911689e-07, "loss": 0.3051, "step": 39130 }, { "epoch": 0.8686918014228477, "grad_norm": 0.9651395678520203, "learning_rate": 8.387318650005638e-07, "loss": 0.4814, "step": 39135 }, { "epoch": 0.8688027879823753, "grad_norm": 0.6835569143295288, "learning_rate": 8.373346098856961e-07, "loss": 0.3561, "step": 39140 }, { "epoch": 0.868913774541903, "grad_norm": 1.12702214717865, "learning_rate": 8.359384687369554e-07, "loss": 0.4681, "step": 39145 }, { "epoch": 0.8690247611014306, "grad_norm": 0.813792884349823, "learning_rate": 8.345434417240816e-07, "loss": 0.3715, "step": 39150 }, { "epoch": 0.8691357476609582, "grad_norm": 0.6443630456924438, "learning_rate": 8.331495290166791e-07, "loss": 0.3748, "step": 39155 }, { "epoch": 0.8692467342204859, "grad_norm": 1.2461071014404297, "learning_rate": 8.317567307842123e-07, "loss": 0.467, "step": 39160 }, { "epoch": 0.8693577207800135, "grad_norm": 1.268563151359558, "learning_rate": 8.303650471960157e-07, "loss": 0.3786, "step": 39165 }, { "epoch": 0.8694687073395412, "grad_norm": 1.2659401893615723, "learning_rate": 8.289744784212827e-07, "loss": 0.4257, "step": 39170 }, { "epoch": 0.8695796938990689, "grad_norm": 1.381678819656372, "learning_rate": 8.275850246290762e-07, "loss": 0.4126, "step": 39175 }, { "epoch": 0.8696906804585964, "grad_norm": 1.0406432151794434, "learning_rate": 8.261966859883208e-07, "loss": 0.4649, "step": 39180 }, { "epoch": 0.8698016670181241, "grad_norm": 1.262363076210022, "learning_rate": 8.24809462667805e-07, "loss": 0.3267, "step": 39185 }, { "epoch": 0.8699126535776518, "grad_norm": 1.6432816982269287, "learning_rate": 8.234233548361847e-07, "loss": 0.296, "step": 39190 }, { "epoch": 0.8700236401371794, "grad_norm": 0.5735118389129639, "learning_rate": 8.220383626619755e-07, "loss": 0.2755, "step": 39195 }, { "epoch": 0.870134626696707, "grad_norm": 0.5151550769805908, "learning_rate": 8.206544863135612e-07, "loss": 0.3769, "step": 39200 }, { "epoch": 0.8702456132562346, "grad_norm": 1.830293893814087, "learning_rate": 8.19271725959192e-07, "loss": 0.3203, "step": 39205 }, { "epoch": 0.8703565998157623, "grad_norm": 1.3666870594024658, "learning_rate": 8.178900817669744e-07, "loss": 0.3906, "step": 39210 }, { "epoch": 0.87046758637529, "grad_norm": 1.4511600732803345, "learning_rate": 8.165095539048884e-07, "loss": 0.4858, "step": 39215 }, { "epoch": 0.8705785729348176, "grad_norm": 1.1984554529190063, "learning_rate": 8.151301425407699e-07, "loss": 0.3846, "step": 39220 }, { "epoch": 0.8706895594943452, "grad_norm": 0.7747187614440918, "learning_rate": 8.137518478423256e-07, "loss": 0.3232, "step": 39225 }, { "epoch": 0.8708005460538729, "grad_norm": 1.4917807579040527, "learning_rate": 8.123746699771229e-07, "loss": 0.2611, "step": 39230 }, { "epoch": 0.8709115326134005, "grad_norm": 1.4434287548065186, "learning_rate": 8.109986091125965e-07, "loss": 0.3076, "step": 39235 }, { "epoch": 0.8710225191729282, "grad_norm": 1.3788912296295166, "learning_rate": 8.096236654160394e-07, "loss": 0.2503, "step": 39240 }, { "epoch": 0.8711335057324558, "grad_norm": 1.336180329322815, "learning_rate": 8.082498390546178e-07, "loss": 0.5352, "step": 39245 }, { "epoch": 0.8712444922919834, "grad_norm": 1.1848613023757935, "learning_rate": 8.068771301953515e-07, "loss": 0.5586, "step": 39250 }, { "epoch": 0.8713554788515111, "grad_norm": 1.0970314741134644, "learning_rate": 8.055055390051336e-07, "loss": 0.2981, "step": 39255 }, { "epoch": 0.8714664654110387, "grad_norm": 1.1610400676727295, "learning_rate": 8.041350656507152e-07, "loss": 0.3591, "step": 39260 }, { "epoch": 0.8715774519705664, "grad_norm": 1.0197187662124634, "learning_rate": 8.027657102987163e-07, "loss": 0.3865, "step": 39265 }, { "epoch": 0.871688438530094, "grad_norm": 0.8454812169075012, "learning_rate": 8.01397473115616e-07, "loss": 0.2558, "step": 39270 }, { "epoch": 0.8717994250896216, "grad_norm": 1.5173369646072388, "learning_rate": 8.000303542677635e-07, "loss": 0.3468, "step": 39275 }, { "epoch": 0.8719104116491493, "grad_norm": 1.057705283164978, "learning_rate": 7.986643539213634e-07, "loss": 0.3252, "step": 39280 }, { "epoch": 0.872021398208677, "grad_norm": 0.6779467463493347, "learning_rate": 7.97299472242492e-07, "loss": 0.459, "step": 39285 }, { "epoch": 0.8721323847682045, "grad_norm": 0.7705076336860657, "learning_rate": 7.959357093970899e-07, "loss": 0.3955, "step": 39290 }, { "epoch": 0.8722433713277322, "grad_norm": 0.6741588115692139, "learning_rate": 7.945730655509543e-07, "loss": 0.3321, "step": 39295 }, { "epoch": 0.8723543578872599, "grad_norm": 1.1833090782165527, "learning_rate": 7.932115408697549e-07, "loss": 0.35, "step": 39300 }, { "epoch": 0.8724653444467875, "grad_norm": 1.345542073249817, "learning_rate": 7.918511355190173e-07, "loss": 0.4641, "step": 39305 }, { "epoch": 0.8725763310063152, "grad_norm": 1.673676609992981, "learning_rate": 7.904918496641379e-07, "loss": 0.3948, "step": 39310 }, { "epoch": 0.8726873175658427, "grad_norm": 3.6500816345214844, "learning_rate": 7.891336834703722e-07, "loss": 0.4197, "step": 39315 }, { "epoch": 0.8727983041253704, "grad_norm": 1.4190216064453125, "learning_rate": 7.877766371028417e-07, "loss": 0.455, "step": 39320 }, { "epoch": 0.8729092906848981, "grad_norm": 2.2108571529388428, "learning_rate": 7.864207107265342e-07, "loss": 0.421, "step": 39325 }, { "epoch": 0.8730202772444257, "grad_norm": 1.1898353099822998, "learning_rate": 7.850659045062958e-07, "loss": 0.466, "step": 39330 }, { "epoch": 0.8731312638039533, "grad_norm": 1.808774709701538, "learning_rate": 7.837122186068414e-07, "loss": 0.357, "step": 39335 }, { "epoch": 0.873242250363481, "grad_norm": 0.5657018423080444, "learning_rate": 7.823596531927447e-07, "loss": 0.3987, "step": 39340 }, { "epoch": 0.8733532369230086, "grad_norm": 1.325925588607788, "learning_rate": 7.810082084284476e-07, "loss": 0.4304, "step": 39345 }, { "epoch": 0.8734642234825363, "grad_norm": 0.9428622722625732, "learning_rate": 7.796578844782554e-07, "loss": 0.3992, "step": 39350 }, { "epoch": 0.873575210042064, "grad_norm": 0.9125751256942749, "learning_rate": 7.783086815063346e-07, "loss": 0.4402, "step": 39355 }, { "epoch": 0.8736861966015915, "grad_norm": 1.4098918437957764, "learning_rate": 7.769605996767182e-07, "loss": 0.4035, "step": 39360 }, { "epoch": 0.8737971831611192, "grad_norm": 1.6082866191864014, "learning_rate": 7.756136391532998e-07, "loss": 0.3726, "step": 39365 }, { "epoch": 0.8739081697206468, "grad_norm": 0.6247255802154541, "learning_rate": 7.742678000998372e-07, "loss": 0.4634, "step": 39370 }, { "epoch": 0.8740191562801745, "grad_norm": 1.5552891492843628, "learning_rate": 7.729230826799583e-07, "loss": 0.443, "step": 39375 }, { "epoch": 0.8741301428397021, "grad_norm": 1.285137414932251, "learning_rate": 7.715794870571425e-07, "loss": 0.3397, "step": 39380 }, { "epoch": 0.8742411293992297, "grad_norm": 1.1897644996643066, "learning_rate": 7.702370133947457e-07, "loss": 0.3414, "step": 39385 }, { "epoch": 0.8743521159587574, "grad_norm": 1.2517973184585571, "learning_rate": 7.688956618559762e-07, "loss": 0.1984, "step": 39390 }, { "epoch": 0.8744631025182851, "grad_norm": 1.8219321966171265, "learning_rate": 7.675554326039158e-07, "loss": 0.4278, "step": 39395 }, { "epoch": 0.8745740890778126, "grad_norm": 0.8656405806541443, "learning_rate": 7.66216325801501e-07, "loss": 0.4873, "step": 39400 }, { "epoch": 0.8746850756373403, "grad_norm": 1.532086730003357, "learning_rate": 7.648783416115369e-07, "loss": 0.4338, "step": 39405 }, { "epoch": 0.874796062196868, "grad_norm": 0.9961473941802979, "learning_rate": 7.635414801966934e-07, "loss": 0.3696, "step": 39410 }, { "epoch": 0.8749070487563956, "grad_norm": 1.5317226648330688, "learning_rate": 7.622057417194995e-07, "loss": 0.346, "step": 39415 }, { "epoch": 0.8750180353159233, "grad_norm": 1.653637409210205, "learning_rate": 7.608711263423507e-07, "loss": 0.4753, "step": 39420 }, { "epoch": 0.8751290218754508, "grad_norm": 0.7220828533172607, "learning_rate": 7.595376342275041e-07, "loss": 0.3362, "step": 39425 }, { "epoch": 0.8752400084349785, "grad_norm": 2.434098958969116, "learning_rate": 7.582052655370809e-07, "loss": 0.4546, "step": 39430 }, { "epoch": 0.8753509949945062, "grad_norm": 1.1487387418746948, "learning_rate": 7.568740204330693e-07, "loss": 0.2843, "step": 39435 }, { "epoch": 0.8754619815540338, "grad_norm": 1.684936285018921, "learning_rate": 7.555438990773134e-07, "loss": 0.3151, "step": 39440 }, { "epoch": 0.8755729681135614, "grad_norm": 0.7425820231437683, "learning_rate": 7.542149016315292e-07, "loss": 0.3909, "step": 39445 }, { "epoch": 0.8756839546730891, "grad_norm": 1.5853488445281982, "learning_rate": 7.528870282572864e-07, "loss": 0.263, "step": 39450 }, { "epoch": 0.8757949412326167, "grad_norm": 1.7768914699554443, "learning_rate": 7.515602791160281e-07, "loss": 0.4729, "step": 39455 }, { "epoch": 0.8759059277921444, "grad_norm": 1.2601383924484253, "learning_rate": 7.502346543690531e-07, "loss": 0.3555, "step": 39460 }, { "epoch": 0.8760169143516721, "grad_norm": 1.3894011974334717, "learning_rate": 7.48910154177529e-07, "loss": 0.3625, "step": 39465 }, { "epoch": 0.8761279009111996, "grad_norm": 0.9332782626152039, "learning_rate": 7.475867787024815e-07, "loss": 0.3834, "step": 39470 }, { "epoch": 0.8762388874707273, "grad_norm": 2.1381521224975586, "learning_rate": 7.462645281048043e-07, "loss": 0.2857, "step": 39475 }, { "epoch": 0.8763498740302549, "grad_norm": 0.9826429486274719, "learning_rate": 7.449434025452496e-07, "loss": 0.4026, "step": 39480 }, { "epoch": 0.8764608605897826, "grad_norm": 1.0933654308319092, "learning_rate": 7.43623402184438e-07, "loss": 0.4764, "step": 39485 }, { "epoch": 0.8765718471493102, "grad_norm": 0.9672232270240784, "learning_rate": 7.423045271828489e-07, "loss": 0.3762, "step": 39490 }, { "epoch": 0.8766828337088378, "grad_norm": 0.4871096909046173, "learning_rate": 7.409867777008295e-07, "loss": 0.4321, "step": 39495 }, { "epoch": 0.8767938202683655, "grad_norm": 1.155806541442871, "learning_rate": 7.396701538985829e-07, "loss": 0.3544, "step": 39500 }, { "epoch": 0.8769048068278932, "grad_norm": 1.418287992477417, "learning_rate": 7.383546559361843e-07, "loss": 0.3252, "step": 39505 }, { "epoch": 0.8770157933874208, "grad_norm": 0.6687827706336975, "learning_rate": 7.370402839735635e-07, "loss": 0.4073, "step": 39510 }, { "epoch": 0.8771267799469484, "grad_norm": 1.328175663948059, "learning_rate": 7.357270381705195e-07, "loss": 0.3662, "step": 39515 }, { "epoch": 0.8772377665064761, "grad_norm": 1.973333477973938, "learning_rate": 7.344149186867133e-07, "loss": 0.3604, "step": 39520 }, { "epoch": 0.8773487530660037, "grad_norm": 1.0833680629730225, "learning_rate": 7.331039256816664e-07, "loss": 0.5241, "step": 39525 }, { "epoch": 0.8774597396255314, "grad_norm": 1.6844494342803955, "learning_rate": 7.317940593147665e-07, "loss": 0.4995, "step": 39530 }, { "epoch": 0.8775707261850589, "grad_norm": 0.6150209307670593, "learning_rate": 7.3048531974526e-07, "loss": 0.3504, "step": 39535 }, { "epoch": 0.8776817127445866, "grad_norm": 1.4000403881072998, "learning_rate": 7.291777071322614e-07, "loss": 0.2883, "step": 39540 }, { "epoch": 0.8777926993041143, "grad_norm": 1.3559322357177734, "learning_rate": 7.278712216347461e-07, "loss": 0.2883, "step": 39545 }, { "epoch": 0.8779036858636419, "grad_norm": 1.0683934688568115, "learning_rate": 7.265658634115502e-07, "loss": 0.3154, "step": 39550 }, { "epoch": 0.8780146724231696, "grad_norm": 0.9396424293518066, "learning_rate": 7.25261632621378e-07, "loss": 0.3369, "step": 39555 }, { "epoch": 0.8781256589826972, "grad_norm": 1.2671051025390625, "learning_rate": 7.239585294227891e-07, "loss": 0.3504, "step": 39560 }, { "epoch": 0.8782366455422248, "grad_norm": 1.2282811403274536, "learning_rate": 7.226565539742148e-07, "loss": 0.4722, "step": 39565 }, { "epoch": 0.8783476321017525, "grad_norm": 1.1958354711532593, "learning_rate": 7.213557064339405e-07, "loss": 0.4123, "step": 39570 }, { "epoch": 0.8784586186612802, "grad_norm": 1.2750756740570068, "learning_rate": 7.20055986960122e-07, "loss": 0.2987, "step": 39575 }, { "epoch": 0.8785696052208077, "grad_norm": 0.874458372592926, "learning_rate": 7.187573957107751e-07, "loss": 0.3363, "step": 39580 }, { "epoch": 0.8786805917803354, "grad_norm": 0.7889037132263184, "learning_rate": 7.174599328437759e-07, "loss": 0.593, "step": 39585 }, { "epoch": 0.878791578339863, "grad_norm": 1.0096137523651123, "learning_rate": 7.161635985168691e-07, "loss": 0.3501, "step": 39590 }, { "epoch": 0.8789025648993907, "grad_norm": 1.4937151670455933, "learning_rate": 7.148683928876544e-07, "loss": 0.3758, "step": 39595 }, { "epoch": 0.8790135514589184, "grad_norm": 0.5935758948326111, "learning_rate": 7.135743161136e-07, "loss": 0.3388, "step": 39600 }, { "epoch": 0.8791245380184459, "grad_norm": 2.038949489593506, "learning_rate": 7.12281368352038e-07, "loss": 0.3533, "step": 39605 }, { "epoch": 0.8792355245779736, "grad_norm": 0.6185315847396851, "learning_rate": 7.109895497601571e-07, "loss": 0.2537, "step": 39610 }, { "epoch": 0.8793465111375013, "grad_norm": 1.051365613937378, "learning_rate": 7.09698860495015e-07, "loss": 0.3532, "step": 39615 }, { "epoch": 0.8794574976970289, "grad_norm": 1.2595382928848267, "learning_rate": 7.084093007135274e-07, "loss": 0.4788, "step": 39620 }, { "epoch": 0.8795684842565565, "grad_norm": 1.1612679958343506, "learning_rate": 7.071208705724742e-07, "loss": 0.5306, "step": 39625 }, { "epoch": 0.8796794708160842, "grad_norm": 1.0881028175354004, "learning_rate": 7.058335702285024e-07, "loss": 0.366, "step": 39630 }, { "epoch": 0.8797904573756118, "grad_norm": 1.3085839748382568, "learning_rate": 7.045473998381136e-07, "loss": 0.2082, "step": 39635 }, { "epoch": 0.8799014439351395, "grad_norm": 1.039229154586792, "learning_rate": 7.032623595576782e-07, "loss": 0.4965, "step": 39640 }, { "epoch": 0.880012430494667, "grad_norm": 1.720349907875061, "learning_rate": 7.019784495434246e-07, "loss": 0.4264, "step": 39645 }, { "epoch": 0.8801234170541947, "grad_norm": 1.182712435722351, "learning_rate": 7.0069566995145e-07, "loss": 0.5752, "step": 39650 }, { "epoch": 0.8802344036137224, "grad_norm": 1.3283008337020874, "learning_rate": 6.994140209377065e-07, "loss": 0.4175, "step": 39655 }, { "epoch": 0.88034539017325, "grad_norm": 0.8335916996002197, "learning_rate": 6.981335026580149e-07, "loss": 0.3787, "step": 39660 }, { "epoch": 0.8804563767327777, "grad_norm": 1.896451711654663, "learning_rate": 6.968541152680575e-07, "loss": 0.4486, "step": 39665 }, { "epoch": 0.8805673632923053, "grad_norm": 0.5865107774734497, "learning_rate": 6.955758589233741e-07, "loss": 0.4943, "step": 39670 }, { "epoch": 0.8806783498518329, "grad_norm": 2.0769338607788086, "learning_rate": 6.942987337793761e-07, "loss": 0.3804, "step": 39675 }, { "epoch": 0.8807893364113606, "grad_norm": 1.878135323524475, "learning_rate": 6.93022739991327e-07, "loss": 0.3271, "step": 39680 }, { "epoch": 0.8809003229708883, "grad_norm": 0.7034597992897034, "learning_rate": 6.917478777143593e-07, "loss": 0.3703, "step": 39685 }, { "epoch": 0.8810113095304158, "grad_norm": 1.7691963911056519, "learning_rate": 6.904741471034692e-07, "loss": 0.5127, "step": 39690 }, { "epoch": 0.8811222960899435, "grad_norm": 0.9068023562431335, "learning_rate": 6.892015483135095e-07, "loss": 0.3244, "step": 39695 }, { "epoch": 0.8812332826494711, "grad_norm": 0.825576663017273, "learning_rate": 6.879300814992007e-07, "loss": 0.4179, "step": 39700 }, { "epoch": 0.8813442692089988, "grad_norm": 2.7324061393737793, "learning_rate": 6.866597468151204e-07, "loss": 0.3272, "step": 39705 }, { "epoch": 0.8814552557685265, "grad_norm": 1.538509726524353, "learning_rate": 6.853905444157161e-07, "loss": 0.4511, "step": 39710 }, { "epoch": 0.881566242328054, "grad_norm": 1.2030854225158691, "learning_rate": 6.841224744552888e-07, "loss": 0.4274, "step": 39715 }, { "epoch": 0.8816772288875817, "grad_norm": 1.1677093505859375, "learning_rate": 6.828555370880085e-07, "loss": 0.4939, "step": 39720 }, { "epoch": 0.8817882154471094, "grad_norm": 1.0679088830947876, "learning_rate": 6.815897324679044e-07, "loss": 0.4146, "step": 39725 }, { "epoch": 0.881899202006637, "grad_norm": 0.8127391338348389, "learning_rate": 6.80325060748871e-07, "loss": 0.3293, "step": 39730 }, { "epoch": 0.8820101885661646, "grad_norm": 1.3469727039337158, "learning_rate": 6.790615220846586e-07, "loss": 0.4906, "step": 39735 }, { "epoch": 0.8821211751256923, "grad_norm": 1.0445455312728882, "learning_rate": 6.777991166288877e-07, "loss": 0.2626, "step": 39740 }, { "epoch": 0.8822321616852199, "grad_norm": 1.063741683959961, "learning_rate": 6.765378445350346e-07, "loss": 0.2856, "step": 39745 }, { "epoch": 0.8823431482447476, "grad_norm": 0.642105758190155, "learning_rate": 6.752777059564431e-07, "loss": 0.3201, "step": 39750 }, { "epoch": 0.8824541348042751, "grad_norm": 1.2013835906982422, "learning_rate": 6.74018701046315e-07, "loss": 0.323, "step": 39755 }, { "epoch": 0.8825651213638028, "grad_norm": 1.887306571006775, "learning_rate": 6.727608299577171e-07, "loss": 0.3755, "step": 39760 }, { "epoch": 0.8826761079233305, "grad_norm": 1.15422523021698, "learning_rate": 6.715040928435746e-07, "loss": 0.4409, "step": 39765 }, { "epoch": 0.8827870944828581, "grad_norm": 1.1066185235977173, "learning_rate": 6.702484898566797e-07, "loss": 0.3627, "step": 39770 }, { "epoch": 0.8828980810423858, "grad_norm": 0.8012137413024902, "learning_rate": 6.689940211496848e-07, "loss": 0.4263, "step": 39775 }, { "epoch": 0.8830090676019134, "grad_norm": 0.863029956817627, "learning_rate": 6.677406868751013e-07, "loss": 0.424, "step": 39780 }, { "epoch": 0.883120054161441, "grad_norm": 1.1876236200332642, "learning_rate": 6.664884871853095e-07, "loss": 0.2413, "step": 39785 }, { "epoch": 0.8832310407209687, "grad_norm": 1.074812412261963, "learning_rate": 6.652374222325441e-07, "loss": 0.3695, "step": 39790 }, { "epoch": 0.8833420272804964, "grad_norm": 1.769379734992981, "learning_rate": 6.639874921689049e-07, "loss": 0.3624, "step": 39795 }, { "epoch": 0.883453013840024, "grad_norm": 1.0724666118621826, "learning_rate": 6.627386971463589e-07, "loss": 0.5713, "step": 39800 }, { "epoch": 0.8835640003995516, "grad_norm": 1.0125855207443237, "learning_rate": 6.614910373167249e-07, "loss": 0.4042, "step": 39805 }, { "epoch": 0.8836749869590792, "grad_norm": 0.8673639297485352, "learning_rate": 6.602445128316937e-07, "loss": 0.37, "step": 39810 }, { "epoch": 0.8837859735186069, "grad_norm": 0.8695536851882935, "learning_rate": 6.589991238428095e-07, "loss": 0.3782, "step": 39815 }, { "epoch": 0.8838969600781346, "grad_norm": 1.2568012475967407, "learning_rate": 6.577548705014869e-07, "loss": 0.3347, "step": 39820 }, { "epoch": 0.8840079466376621, "grad_norm": 1.2032254934310913, "learning_rate": 6.565117529589937e-07, "loss": 0.2547, "step": 39825 }, { "epoch": 0.8841189331971898, "grad_norm": 0.8552618622779846, "learning_rate": 6.552697713664658e-07, "loss": 0.3563, "step": 39830 }, { "epoch": 0.8842299197567175, "grad_norm": 1.2995036840438843, "learning_rate": 6.540289258749011e-07, "loss": 0.4856, "step": 39835 }, { "epoch": 0.8843409063162451, "grad_norm": 2.986711263656616, "learning_rate": 6.527892166351535e-07, "loss": 0.3476, "step": 39840 }, { "epoch": 0.8844518928757727, "grad_norm": 1.8609137535095215, "learning_rate": 6.515506437979469e-07, "loss": 0.3801, "step": 39845 }, { "epoch": 0.8845628794353004, "grad_norm": 1.7628650665283203, "learning_rate": 6.503132075138596e-07, "loss": 0.332, "step": 39850 }, { "epoch": 0.884673865994828, "grad_norm": 1.6240646839141846, "learning_rate": 6.490769079333359e-07, "loss": 0.4001, "step": 39855 }, { "epoch": 0.8847848525543557, "grad_norm": 1.0301650762557983, "learning_rate": 6.478417452066821e-07, "loss": 0.3662, "step": 39860 }, { "epoch": 0.8848958391138833, "grad_norm": 1.349965214729309, "learning_rate": 6.466077194840637e-07, "loss": 0.3889, "step": 39865 }, { "epoch": 0.8850068256734109, "grad_norm": 1.5930453538894653, "learning_rate": 6.453748309155105e-07, "loss": 0.5254, "step": 39870 }, { "epoch": 0.8851178122329386, "grad_norm": 0.877350389957428, "learning_rate": 6.441430796509107e-07, "loss": 0.3623, "step": 39875 }, { "epoch": 0.8852287987924662, "grad_norm": 1.5074859857559204, "learning_rate": 6.429124658400188e-07, "loss": 0.39, "step": 39880 }, { "epoch": 0.8853397853519939, "grad_norm": 0.7831709980964661, "learning_rate": 6.416829896324495e-07, "loss": 0.4656, "step": 39885 }, { "epoch": 0.8854507719115216, "grad_norm": 1.270965576171875, "learning_rate": 6.404546511776755e-07, "loss": 0.4003, "step": 39890 }, { "epoch": 0.8855617584710491, "grad_norm": 1.269120693206787, "learning_rate": 6.392274506250374e-07, "loss": 0.4082, "step": 39895 }, { "epoch": 0.8856727450305768, "grad_norm": 1.6134681701660156, "learning_rate": 6.380013881237302e-07, "loss": 0.5901, "step": 39900 }, { "epoch": 0.8857837315901045, "grad_norm": 0.9332907199859619, "learning_rate": 6.36776463822818e-07, "loss": 0.3308, "step": 39905 }, { "epoch": 0.885894718149632, "grad_norm": 2.3193821907043457, "learning_rate": 6.355526778712195e-07, "loss": 0.5457, "step": 39910 }, { "epoch": 0.8860057047091597, "grad_norm": 0.9229357838630676, "learning_rate": 6.343300304177214e-07, "loss": 0.3415, "step": 39915 }, { "epoch": 0.8861166912686873, "grad_norm": 0.9777660369873047, "learning_rate": 6.331085216109701e-07, "loss": 0.4594, "step": 39920 }, { "epoch": 0.886227677828215, "grad_norm": 1.237738847732544, "learning_rate": 6.318881515994679e-07, "loss": 0.3276, "step": 39925 }, { "epoch": 0.8863386643877427, "grad_norm": 1.1996231079101562, "learning_rate": 6.306689205315885e-07, "loss": 0.5043, "step": 39930 }, { "epoch": 0.8864496509472702, "grad_norm": 1.0719642639160156, "learning_rate": 6.294508285555567e-07, "loss": 0.3521, "step": 39935 }, { "epoch": 0.8865606375067979, "grad_norm": 0.7645555734634399, "learning_rate": 6.282338758194684e-07, "loss": 0.3757, "step": 39940 }, { "epoch": 0.8866716240663256, "grad_norm": 1.6293964385986328, "learning_rate": 6.270180624712751e-07, "loss": 0.4791, "step": 39945 }, { "epoch": 0.8867826106258532, "grad_norm": 1.2439913749694824, "learning_rate": 6.258033886587911e-07, "loss": 0.375, "step": 39950 }, { "epoch": 0.8868935971853809, "grad_norm": 1.689984917640686, "learning_rate": 6.245898545296924e-07, "loss": 0.4247, "step": 39955 }, { "epoch": 0.8870045837449085, "grad_norm": 1.1024510860443115, "learning_rate": 6.233774602315157e-07, "loss": 0.2614, "step": 39960 }, { "epoch": 0.8871155703044361, "grad_norm": 1.1792956590652466, "learning_rate": 6.221662059116629e-07, "loss": 0.3694, "step": 39965 }, { "epoch": 0.8872265568639638, "grad_norm": 1.1179901361465454, "learning_rate": 6.209560917173896e-07, "loss": 0.3596, "step": 39970 }, { "epoch": 0.8873375434234915, "grad_norm": 1.654262900352478, "learning_rate": 6.197471177958214e-07, "loss": 0.3389, "step": 39975 }, { "epoch": 0.887448529983019, "grad_norm": 1.3884893655776978, "learning_rate": 6.185392842939386e-07, "loss": 0.499, "step": 39980 }, { "epoch": 0.8875595165425467, "grad_norm": 1.1281630992889404, "learning_rate": 6.173325913585882e-07, "loss": 0.2934, "step": 39985 }, { "epoch": 0.8876705031020743, "grad_norm": 1.5607919692993164, "learning_rate": 6.161270391364726e-07, "loss": 0.4504, "step": 39990 }, { "epoch": 0.887781489661602, "grad_norm": 1.0889188051223755, "learning_rate": 6.149226277741616e-07, "loss": 0.3168, "step": 39995 }, { "epoch": 0.8878924762211297, "grad_norm": 1.2045074701309204, "learning_rate": 6.137193574180811e-07, "loss": 0.3516, "step": 40000 }, { "epoch": 0.8880034627806572, "grad_norm": 1.0601027011871338, "learning_rate": 6.125172282145242e-07, "loss": 0.4217, "step": 40005 }, { "epoch": 0.8881144493401849, "grad_norm": 0.6195953488349915, "learning_rate": 6.113162403096374e-07, "loss": 0.398, "step": 40010 }, { "epoch": 0.8882254358997126, "grad_norm": 0.8855378031730652, "learning_rate": 6.101163938494359e-07, "loss": 0.3358, "step": 40015 }, { "epoch": 0.8883364224592402, "grad_norm": 1.527886152267456, "learning_rate": 6.08917688979791e-07, "loss": 0.3484, "step": 40020 }, { "epoch": 0.8884474090187678, "grad_norm": 1.2936069965362549, "learning_rate": 6.077201258464383e-07, "loss": 0.263, "step": 40025 }, { "epoch": 0.8885583955782955, "grad_norm": 1.3309067487716675, "learning_rate": 6.065237045949757e-07, "loss": 0.419, "step": 40030 }, { "epoch": 0.8886693821378231, "grad_norm": 1.2705096006393433, "learning_rate": 6.053284253708547e-07, "loss": 0.4035, "step": 40035 }, { "epoch": 0.8887803686973508, "grad_norm": 2.917478561401367, "learning_rate": 6.04134288319399e-07, "loss": 0.5202, "step": 40040 }, { "epoch": 0.8888913552568783, "grad_norm": 0.9519320726394653, "learning_rate": 6.029412935857837e-07, "loss": 0.4442, "step": 40045 }, { "epoch": 0.889002341816406, "grad_norm": 0.9434588551521301, "learning_rate": 6.017494413150504e-07, "loss": 0.4018, "step": 40050 }, { "epoch": 0.8891133283759337, "grad_norm": 0.7919776439666748, "learning_rate": 6.005587316521022e-07, "loss": 0.4546, "step": 40055 }, { "epoch": 0.8892243149354613, "grad_norm": 1.9365350008010864, "learning_rate": 5.993691647416988e-07, "loss": 0.4293, "step": 40060 }, { "epoch": 0.889335301494989, "grad_norm": 1.0461664199829102, "learning_rate": 5.981807407284668e-07, "loss": 0.3527, "step": 40065 }, { "epoch": 0.8894462880545166, "grad_norm": 0.9392116069793701, "learning_rate": 5.969934597568872e-07, "loss": 0.4463, "step": 40070 }, { "epoch": 0.8895572746140442, "grad_norm": 0.9327507615089417, "learning_rate": 5.958073219713089e-07, "loss": 0.3969, "step": 40075 }, { "epoch": 0.8896682611735719, "grad_norm": 1.5740067958831787, "learning_rate": 5.946223275159369e-07, "loss": 0.6056, "step": 40080 }, { "epoch": 0.8897792477330996, "grad_norm": 1.093630075454712, "learning_rate": 5.934384765348378e-07, "loss": 0.3468, "step": 40085 }, { "epoch": 0.8898902342926271, "grad_norm": 0.9992703199386597, "learning_rate": 5.922557691719432e-07, "loss": 0.2102, "step": 40090 }, { "epoch": 0.8900012208521548, "grad_norm": 1.1181128025054932, "learning_rate": 5.910742055710394e-07, "loss": 0.2883, "step": 40095 }, { "epoch": 0.8901122074116824, "grad_norm": 0.8682889342308044, "learning_rate": 5.898937858757814e-07, "loss": 0.3134, "step": 40100 }, { "epoch": 0.8902231939712101, "grad_norm": 1.079490065574646, "learning_rate": 5.887145102296754e-07, "loss": 0.4043, "step": 40105 }, { "epoch": 0.8903341805307378, "grad_norm": 1.373574137687683, "learning_rate": 5.875363787760957e-07, "loss": 0.3318, "step": 40110 }, { "epoch": 0.8904451670902653, "grad_norm": 1.2262922525405884, "learning_rate": 5.863593916582788e-07, "loss": 0.3128, "step": 40115 }, { "epoch": 0.890556153649793, "grad_norm": 0.48372411727905273, "learning_rate": 5.851835490193136e-07, "loss": 0.3235, "step": 40120 }, { "epoch": 0.8906671402093207, "grad_norm": 1.1640418767929077, "learning_rate": 5.840088510021602e-07, "loss": 0.4542, "step": 40125 }, { "epoch": 0.8907781267688483, "grad_norm": 0.7971268892288208, "learning_rate": 5.828352977496299e-07, "loss": 0.4302, "step": 40130 }, { "epoch": 0.890889113328376, "grad_norm": 1.0506165027618408, "learning_rate": 5.816628894044018e-07, "loss": 0.3768, "step": 40135 }, { "epoch": 0.8910000998879036, "grad_norm": 1.1588351726531982, "learning_rate": 5.804916261090132e-07, "loss": 0.2719, "step": 40140 }, { "epoch": 0.8911110864474312, "grad_norm": 1.5512523651123047, "learning_rate": 5.793215080058623e-07, "loss": 0.317, "step": 40145 }, { "epoch": 0.8912220730069589, "grad_norm": 1.2580381631851196, "learning_rate": 5.781525352372086e-07, "loss": 0.3762, "step": 40150 }, { "epoch": 0.8913330595664865, "grad_norm": 1.8583688735961914, "learning_rate": 5.769847079451696e-07, "loss": 0.2636, "step": 40155 }, { "epoch": 0.8914440461260141, "grad_norm": 1.5035998821258545, "learning_rate": 5.758180262717283e-07, "loss": 0.4279, "step": 40160 }, { "epoch": 0.8915550326855418, "grad_norm": 1.1198701858520508, "learning_rate": 5.746524903587247e-07, "loss": 0.4272, "step": 40165 }, { "epoch": 0.8916660192450694, "grad_norm": 1.359174132347107, "learning_rate": 5.73488100347861e-07, "loss": 0.416, "step": 40170 }, { "epoch": 0.8917770058045971, "grad_norm": 1.6771479845046997, "learning_rate": 5.723248563807016e-07, "loss": 0.41, "step": 40175 }, { "epoch": 0.8918879923641247, "grad_norm": 2.4248905181884766, "learning_rate": 5.711627585986667e-07, "loss": 0.4567, "step": 40180 }, { "epoch": 0.8919989789236523, "grad_norm": 0.9985633492469788, "learning_rate": 5.700018071430424e-07, "loss": 0.3216, "step": 40185 }, { "epoch": 0.89210996548318, "grad_norm": 0.6354491114616394, "learning_rate": 5.688420021549724e-07, "loss": 0.3433, "step": 40190 }, { "epoch": 0.8922209520427077, "grad_norm": 1.0853493213653564, "learning_rate": 5.676833437754614e-07, "loss": 0.4728, "step": 40195 }, { "epoch": 0.8923319386022353, "grad_norm": 0.627981960773468, "learning_rate": 5.66525832145377e-07, "loss": 0.4467, "step": 40200 }, { "epoch": 0.8924429251617629, "grad_norm": 0.9562311172485352, "learning_rate": 5.653694674054444e-07, "loss": 0.3171, "step": 40205 }, { "epoch": 0.8925539117212905, "grad_norm": 0.48237770795822144, "learning_rate": 5.642142496962511e-07, "loss": 0.3237, "step": 40210 }, { "epoch": 0.8926648982808182, "grad_norm": 0.9646100401878357, "learning_rate": 5.630601791582424e-07, "loss": 0.2887, "step": 40215 }, { "epoch": 0.8927758848403459, "grad_norm": 1.7289109230041504, "learning_rate": 5.619072559317307e-07, "loss": 0.4087, "step": 40220 }, { "epoch": 0.8928868713998734, "grad_norm": 1.0684244632720947, "learning_rate": 5.607554801568794e-07, "loss": 0.4546, "step": 40225 }, { "epoch": 0.8929978579594011, "grad_norm": 1.2578312158584595, "learning_rate": 5.596048519737218e-07, "loss": 0.353, "step": 40230 }, { "epoch": 0.8931088445189288, "grad_norm": 0.9892885088920593, "learning_rate": 5.584553715221442e-07, "loss": 0.4642, "step": 40235 }, { "epoch": 0.8932198310784564, "grad_norm": 0.9489505887031555, "learning_rate": 5.573070389419e-07, "loss": 0.3711, "step": 40240 }, { "epoch": 0.893330817637984, "grad_norm": 1.6525774002075195, "learning_rate": 5.561598543725954e-07, "loss": 0.4137, "step": 40245 }, { "epoch": 0.8934418041975117, "grad_norm": 1.2819950580596924, "learning_rate": 5.550138179537057e-07, "loss": 0.3999, "step": 40250 }, { "epoch": 0.8935527907570393, "grad_norm": 0.81032794713974, "learning_rate": 5.53868929824558e-07, "loss": 0.2751, "step": 40255 }, { "epoch": 0.893663777316567, "grad_norm": 1.0968986749649048, "learning_rate": 5.52725190124348e-07, "loss": 0.4661, "step": 40260 }, { "epoch": 0.8937747638760946, "grad_norm": 1.1706798076629639, "learning_rate": 5.515825989921242e-07, "loss": 0.3541, "step": 40265 }, { "epoch": 0.8938857504356222, "grad_norm": 1.1918624639511108, "learning_rate": 5.50441156566801e-07, "loss": 0.4062, "step": 40270 }, { "epoch": 0.8939967369951499, "grad_norm": 1.0423552989959717, "learning_rate": 5.493008629871499e-07, "loss": 0.319, "step": 40275 }, { "epoch": 0.8941077235546775, "grad_norm": 1.1794507503509521, "learning_rate": 5.481617183918053e-07, "loss": 0.2986, "step": 40280 }, { "epoch": 0.8942187101142052, "grad_norm": 1.3555355072021484, "learning_rate": 5.470237229192599e-07, "loss": 0.4681, "step": 40285 }, { "epoch": 0.8943296966737329, "grad_norm": 1.5522536039352417, "learning_rate": 5.458868767078673e-07, "loss": 0.4119, "step": 40290 }, { "epoch": 0.8944406832332604, "grad_norm": 1.1344321966171265, "learning_rate": 5.447511798958427e-07, "loss": 0.3222, "step": 40295 }, { "epoch": 0.8945516697927881, "grad_norm": 1.0988397598266602, "learning_rate": 5.436166326212577e-07, "loss": 0.4429, "step": 40300 }, { "epoch": 0.8946626563523158, "grad_norm": 1.102994680404663, "learning_rate": 5.424832350220477e-07, "loss": 0.5143, "step": 40305 }, { "epoch": 0.8947736429118434, "grad_norm": 0.7307510375976562, "learning_rate": 5.413509872360101e-07, "loss": 0.3052, "step": 40310 }, { "epoch": 0.894884629471371, "grad_norm": 1.411036491394043, "learning_rate": 5.40219889400796e-07, "loss": 0.3787, "step": 40315 }, { "epoch": 0.8949956160308986, "grad_norm": 0.9514254331588745, "learning_rate": 5.390899416539231e-07, "loss": 0.3885, "step": 40320 }, { "epoch": 0.8951066025904263, "grad_norm": 1.9836772680282593, "learning_rate": 5.379611441327647e-07, "loss": 0.4674, "step": 40325 }, { "epoch": 0.895217589149954, "grad_norm": 1.1093319654464722, "learning_rate": 5.368334969745592e-07, "loss": 0.4865, "step": 40330 }, { "epoch": 0.8953285757094815, "grad_norm": 1.0533522367477417, "learning_rate": 5.357070003163978e-07, "loss": 0.5483, "step": 40335 }, { "epoch": 0.8954395622690092, "grad_norm": 0.6712168455123901, "learning_rate": 5.345816542952387e-07, "loss": 0.3108, "step": 40340 }, { "epoch": 0.8955505488285369, "grad_norm": 1.435325026512146, "learning_rate": 5.334574590478992e-07, "loss": 0.3496, "step": 40345 }, { "epoch": 0.8956615353880645, "grad_norm": 0.8815795183181763, "learning_rate": 5.32334414711052e-07, "loss": 0.3578, "step": 40350 }, { "epoch": 0.8957725219475922, "grad_norm": 1.6313246488571167, "learning_rate": 5.31212521421236e-07, "loss": 0.3128, "step": 40355 }, { "epoch": 0.8958835085071198, "grad_norm": 1.9099875688552856, "learning_rate": 5.300917793148441e-07, "loss": 0.3975, "step": 40360 }, { "epoch": 0.8959944950666474, "grad_norm": 1.2062870264053345, "learning_rate": 5.28972188528134e-07, "loss": 0.3287, "step": 40365 }, { "epoch": 0.8961054816261751, "grad_norm": 1.5065999031066895, "learning_rate": 5.278537491972236e-07, "loss": 0.4504, "step": 40370 }, { "epoch": 0.8962164681857027, "grad_norm": 1.3372350931167603, "learning_rate": 5.267364614580861e-07, "loss": 0.3728, "step": 40375 }, { "epoch": 0.8963274547452303, "grad_norm": 1.2467929124832153, "learning_rate": 5.256203254465597e-07, "loss": 0.3721, "step": 40380 }, { "epoch": 0.896438441304758, "grad_norm": 1.1268647909164429, "learning_rate": 5.24505341298338e-07, "loss": 0.3695, "step": 40385 }, { "epoch": 0.8965494278642856, "grad_norm": 0.6092180013656616, "learning_rate": 5.233915091489794e-07, "loss": 0.3494, "step": 40390 }, { "epoch": 0.8966604144238133, "grad_norm": 1.103093147277832, "learning_rate": 5.222788291338998e-07, "loss": 0.4334, "step": 40395 }, { "epoch": 0.896771400983341, "grad_norm": 2.1130781173706055, "learning_rate": 5.211673013883733e-07, "loss": 0.3672, "step": 40400 }, { "epoch": 0.8968823875428685, "grad_norm": 1.6719002723693848, "learning_rate": 5.200569260475374e-07, "loss": 0.5602, "step": 40405 }, { "epoch": 0.8969933741023962, "grad_norm": 1.2825921773910522, "learning_rate": 5.189477032463864e-07, "loss": 0.3651, "step": 40410 }, { "epoch": 0.8971043606619239, "grad_norm": 1.1338067054748535, "learning_rate": 5.178396331197766e-07, "loss": 0.1951, "step": 40415 }, { "epoch": 0.8972153472214515, "grad_norm": 1.777740478515625, "learning_rate": 5.167327158024249e-07, "loss": 0.493, "step": 40420 }, { "epoch": 0.8973263337809791, "grad_norm": 1.1521728038787842, "learning_rate": 5.156269514289036e-07, "loss": 0.4128, "step": 40425 }, { "epoch": 0.8974373203405067, "grad_norm": 1.4409505128860474, "learning_rate": 5.145223401336507e-07, "loss": 0.4833, "step": 40430 }, { "epoch": 0.8975483069000344, "grad_norm": 1.0183593034744263, "learning_rate": 5.134188820509589e-07, "loss": 0.2831, "step": 40435 }, { "epoch": 0.8976592934595621, "grad_norm": 0.7380016446113586, "learning_rate": 5.123165773149852e-07, "loss": 0.3006, "step": 40440 }, { "epoch": 0.8977702800190897, "grad_norm": 1.1958386898040771, "learning_rate": 5.112154260597413e-07, "loss": 0.4989, "step": 40445 }, { "epoch": 0.8978812665786173, "grad_norm": 1.2959365844726562, "learning_rate": 5.101154284191035e-07, "loss": 0.3115, "step": 40450 }, { "epoch": 0.897992253138145, "grad_norm": 0.7626371383666992, "learning_rate": 5.090165845268058e-07, "loss": 0.3649, "step": 40455 }, { "epoch": 0.8981032396976726, "grad_norm": 1.715620517730713, "learning_rate": 5.079188945164426e-07, "loss": 0.4139, "step": 40460 }, { "epoch": 0.8982142262572003, "grad_norm": 1.511171817779541, "learning_rate": 5.068223585214637e-07, "loss": 0.5195, "step": 40465 }, { "epoch": 0.898325212816728, "grad_norm": 1.423026204109192, "learning_rate": 5.057269766751871e-07, "loss": 0.4019, "step": 40470 }, { "epoch": 0.8984361993762555, "grad_norm": 2.012284278869629, "learning_rate": 5.046327491107816e-07, "loss": 0.3232, "step": 40475 }, { "epoch": 0.8985471859357832, "grad_norm": 1.1970844268798828, "learning_rate": 5.035396759612831e-07, "loss": 0.3283, "step": 40480 }, { "epoch": 0.8986581724953108, "grad_norm": 1.5900375843048096, "learning_rate": 5.024477573595799e-07, "loss": 0.4416, "step": 40485 }, { "epoch": 0.8987691590548385, "grad_norm": 1.5092462301254272, "learning_rate": 5.013569934384277e-07, "loss": 0.5254, "step": 40490 }, { "epoch": 0.8988801456143661, "grad_norm": 1.6395747661590576, "learning_rate": 5.002673843304351e-07, "loss": 0.5007, "step": 40495 }, { "epoch": 0.8989911321738937, "grad_norm": 2.0644948482513428, "learning_rate": 4.99178930168075e-07, "loss": 0.3698, "step": 40500 }, { "epoch": 0.8991021187334214, "grad_norm": 1.0750409364700317, "learning_rate": 4.98091631083678e-07, "loss": 0.4849, "step": 40505 }, { "epoch": 0.8992131052929491, "grad_norm": 0.9656897783279419, "learning_rate": 4.970054872094321e-07, "loss": 0.412, "step": 40510 }, { "epoch": 0.8993240918524766, "grad_norm": 1.18376624584198, "learning_rate": 4.959204986773903e-07, "loss": 0.3912, "step": 40515 }, { "epoch": 0.8994350784120043, "grad_norm": 1.7295215129852295, "learning_rate": 4.948366656194581e-07, "loss": 0.4733, "step": 40520 }, { "epoch": 0.899546064971532, "grad_norm": 1.199609637260437, "learning_rate": 4.93753988167408e-07, "loss": 0.4633, "step": 40525 }, { "epoch": 0.8996570515310596, "grad_norm": 1.0033913850784302, "learning_rate": 4.926724664528648e-07, "loss": 0.3885, "step": 40530 }, { "epoch": 0.8997680380905873, "grad_norm": 0.852364182472229, "learning_rate": 4.915921006073187e-07, "loss": 0.2825, "step": 40535 }, { "epoch": 0.8998790246501148, "grad_norm": 1.3140791654586792, "learning_rate": 4.90512890762117e-07, "loss": 0.3215, "step": 40540 }, { "epoch": 0.8999900112096425, "grad_norm": 1.3916743993759155, "learning_rate": 4.894348370484648e-07, "loss": 0.2732, "step": 40545 }, { "epoch": 0.9001009977691702, "grad_norm": 0.7842644453048706, "learning_rate": 4.883579395974303e-07, "loss": 0.3304, "step": 40550 }, { "epoch": 0.9002119843286978, "grad_norm": 1.6732553243637085, "learning_rate": 4.872821985399368e-07, "loss": 0.3536, "step": 40555 }, { "epoch": 0.9003229708882254, "grad_norm": 1.2899658679962158, "learning_rate": 4.862076140067695e-07, "loss": 0.3919, "step": 40560 }, { "epoch": 0.9004339574477531, "grad_norm": 1.014262080192566, "learning_rate": 4.851341861285764e-07, "loss": 0.2967, "step": 40565 }, { "epoch": 0.9005449440072807, "grad_norm": 1.1751197576522827, "learning_rate": 4.840619150358561e-07, "loss": 0.3216, "step": 40570 }, { "epoch": 0.9006559305668084, "grad_norm": 0.9085738062858582, "learning_rate": 4.829908008589768e-07, "loss": 0.4418, "step": 40575 }, { "epoch": 0.900766917126336, "grad_norm": 1.2819691896438599, "learning_rate": 4.819208437281553e-07, "loss": 0.4594, "step": 40580 }, { "epoch": 0.9008779036858636, "grad_norm": 0.861376941204071, "learning_rate": 4.808520437734776e-07, "loss": 0.3508, "step": 40585 }, { "epoch": 0.9009888902453913, "grad_norm": 1.106539249420166, "learning_rate": 4.797844011248854e-07, "loss": 0.3888, "step": 40590 }, { "epoch": 0.9010998768049189, "grad_norm": 1.338828444480896, "learning_rate": 4.787179159121758e-07, "loss": 0.3314, "step": 40595 }, { "epoch": 0.9012108633644466, "grad_norm": 0.7250569462776184, "learning_rate": 4.776525882650107e-07, "loss": 0.4059, "step": 40600 }, { "epoch": 0.9013218499239742, "grad_norm": 1.144252061843872, "learning_rate": 4.765884183129077e-07, "loss": 0.534, "step": 40605 }, { "epoch": 0.9014328364835018, "grad_norm": 1.1259852647781372, "learning_rate": 4.7552540618524656e-07, "loss": 0.4234, "step": 40610 }, { "epoch": 0.9015438230430295, "grad_norm": 0.9830173254013062, "learning_rate": 4.7446355201126283e-07, "loss": 0.2786, "step": 40615 }, { "epoch": 0.9016548096025572, "grad_norm": 1.644303798675537, "learning_rate": 4.734028559200543e-07, "loss": 0.3073, "step": 40620 }, { "epoch": 0.9017657961620847, "grad_norm": 1.8261100053787231, "learning_rate": 4.72343318040579e-07, "loss": 0.4793, "step": 40625 }, { "epoch": 0.9018767827216124, "grad_norm": 1.2500735521316528, "learning_rate": 4.7128493850164715e-07, "loss": 0.2982, "step": 40630 }, { "epoch": 0.9019877692811401, "grad_norm": 1.7409520149230957, "learning_rate": 4.7022771743193807e-07, "loss": 0.3677, "step": 40635 }, { "epoch": 0.9020987558406677, "grad_norm": 1.2236367464065552, "learning_rate": 4.6917165495998006e-07, "loss": 0.4784, "step": 40640 }, { "epoch": 0.9022097424001954, "grad_norm": 0.9592740535736084, "learning_rate": 4.681167512141693e-07, "loss": 0.314, "step": 40645 }, { "epoch": 0.9023207289597229, "grad_norm": 1.1023225784301758, "learning_rate": 4.6706300632275767e-07, "loss": 0.3069, "step": 40650 }, { "epoch": 0.9024317155192506, "grad_norm": 2.137260913848877, "learning_rate": 4.660104204138538e-07, "loss": 0.3771, "step": 40655 }, { "epoch": 0.9025427020787783, "grad_norm": 1.2477887868881226, "learning_rate": 4.6495899361542974e-07, "loss": 0.5641, "step": 40660 }, { "epoch": 0.9026536886383059, "grad_norm": 1.1454601287841797, "learning_rate": 4.63908726055311e-07, "loss": 0.4877, "step": 40665 }, { "epoch": 0.9027646751978335, "grad_norm": 1.7181624174118042, "learning_rate": 4.628596178611888e-07, "loss": 0.3043, "step": 40670 }, { "epoch": 0.9028756617573612, "grad_norm": 1.327402114868164, "learning_rate": 4.6181166916061004e-07, "loss": 0.3787, "step": 40675 }, { "epoch": 0.9029866483168888, "grad_norm": 1.639182209968567, "learning_rate": 4.607648800809783e-07, "loss": 0.312, "step": 40680 }, { "epoch": 0.9030976348764165, "grad_norm": 0.5940906405448914, "learning_rate": 4.597192507495618e-07, "loss": 0.3517, "step": 40685 }, { "epoch": 0.9032086214359442, "grad_norm": 1.1408140659332275, "learning_rate": 4.586747812934822e-07, "loss": 0.3383, "step": 40690 }, { "epoch": 0.9033196079954717, "grad_norm": 0.9758588075637817, "learning_rate": 4.576314718397246e-07, "loss": 0.4833, "step": 40695 }, { "epoch": 0.9034305945549994, "grad_norm": 2.091536521911621, "learning_rate": 4.5658932251512856e-07, "loss": 0.278, "step": 40700 }, { "epoch": 0.903541581114527, "grad_norm": 1.074259638786316, "learning_rate": 4.555483334463984e-07, "loss": 0.396, "step": 40705 }, { "epoch": 0.9036525676740547, "grad_norm": 0.7181487083435059, "learning_rate": 4.5450850476009056e-07, "loss": 0.3388, "step": 40710 }, { "epoch": 0.9037635542335823, "grad_norm": 1.2724895477294922, "learning_rate": 4.534698365826273e-07, "loss": 0.3813, "step": 40715 }, { "epoch": 0.9038745407931099, "grad_norm": 1.474095106124878, "learning_rate": 4.524323290402832e-07, "loss": 0.1913, "step": 40720 }, { "epoch": 0.9039855273526376, "grad_norm": 1.1835238933563232, "learning_rate": 4.5139598225919845e-07, "loss": 0.2618, "step": 40725 }, { "epoch": 0.9040965139121653, "grad_norm": 0.8935763239860535, "learning_rate": 4.503607963653644e-07, "loss": 0.2087, "step": 40730 }, { "epoch": 0.9042075004716928, "grad_norm": 1.2680327892303467, "learning_rate": 4.4932677148463943e-07, "loss": 0.431, "step": 40735 }, { "epoch": 0.9043184870312205, "grad_norm": 0.9342009425163269, "learning_rate": 4.4829390774273395e-07, "loss": 0.4155, "step": 40740 }, { "epoch": 0.9044294735907482, "grad_norm": 0.9703999161720276, "learning_rate": 4.47262205265222e-07, "loss": 0.572, "step": 40745 }, { "epoch": 0.9045404601502758, "grad_norm": 0.8090248107910156, "learning_rate": 4.4623166417753217e-07, "loss": 0.4299, "step": 40750 }, { "epoch": 0.9046514467098035, "grad_norm": 1.2748615741729736, "learning_rate": 4.452022846049564e-07, "loss": 0.4863, "step": 40755 }, { "epoch": 0.904762433269331, "grad_norm": 1.3430474996566772, "learning_rate": 4.441740666726446e-07, "loss": 0.3988, "step": 40760 }, { "epoch": 0.9048734198288587, "grad_norm": 1.024856448173523, "learning_rate": 4.4314701050559905e-07, "loss": 0.3997, "step": 40765 }, { "epoch": 0.9049844063883864, "grad_norm": 0.7125641703605652, "learning_rate": 4.421211162286909e-07, "loss": 0.2898, "step": 40770 }, { "epoch": 0.905095392947914, "grad_norm": 1.450653076171875, "learning_rate": 4.410963839666416e-07, "loss": 0.2538, "step": 40775 }, { "epoch": 0.9052063795074416, "grad_norm": 1.2268130779266357, "learning_rate": 4.40072813844038e-07, "loss": 0.3693, "step": 40780 }, { "epoch": 0.9053173660669693, "grad_norm": 1.2509618997573853, "learning_rate": 4.3905040598531737e-07, "loss": 0.6456, "step": 40785 }, { "epoch": 0.9054283526264969, "grad_norm": 1.0354459285736084, "learning_rate": 4.3802916051478463e-07, "loss": 0.2563, "step": 40790 }, { "epoch": 0.9055393391860246, "grad_norm": 1.0008553266525269, "learning_rate": 4.370090775565983e-07, "loss": 0.4748, "step": 40795 }, { "epoch": 0.9056503257455523, "grad_norm": 0.9893780946731567, "learning_rate": 4.359901572347758e-07, "loss": 0.339, "step": 40800 }, { "epoch": 0.9057613123050798, "grad_norm": 1.1091101169586182, "learning_rate": 4.349723996731969e-07, "loss": 0.4297, "step": 40805 }, { "epoch": 0.9058722988646075, "grad_norm": 0.9167705178260803, "learning_rate": 4.3395580499559276e-07, "loss": 0.3564, "step": 40810 }, { "epoch": 0.9059832854241351, "grad_norm": 1.1292577981948853, "learning_rate": 4.3294037332555996e-07, "loss": 0.3398, "step": 40815 }, { "epoch": 0.9060942719836628, "grad_norm": 1.3591548204421997, "learning_rate": 4.3192610478655197e-07, "loss": 0.3313, "step": 40820 }, { "epoch": 0.9062052585431905, "grad_norm": 0.561805009841919, "learning_rate": 4.3091299950187905e-07, "loss": 0.3893, "step": 40825 }, { "epoch": 0.906316245102718, "grad_norm": 0.9161701798439026, "learning_rate": 4.2990105759471266e-07, "loss": 0.4769, "step": 40830 }, { "epoch": 0.9064272316622457, "grad_norm": 1.3063408136367798, "learning_rate": 4.288902791880778e-07, "loss": 0.6025, "step": 40835 }, { "epoch": 0.9065382182217734, "grad_norm": 0.502839207649231, "learning_rate": 4.27880664404865e-07, "loss": 0.3625, "step": 40840 }, { "epoch": 0.906649204781301, "grad_norm": 1.1110161542892456, "learning_rate": 4.268722133678194e-07, "loss": 0.3522, "step": 40845 }, { "epoch": 0.9067601913408286, "grad_norm": 2.8650565147399902, "learning_rate": 4.258649261995429e-07, "loss": 0.3195, "step": 40850 }, { "epoch": 0.9068711779003563, "grad_norm": 0.9162212610244751, "learning_rate": 4.248588030225009e-07, "loss": 0.374, "step": 40855 }, { "epoch": 0.9069821644598839, "grad_norm": 1.5972285270690918, "learning_rate": 4.2385384395901117e-07, "loss": 0.2971, "step": 40860 }, { "epoch": 0.9070931510194116, "grad_norm": 1.1207962036132812, "learning_rate": 4.2285004913125704e-07, "loss": 0.3178, "step": 40865 }, { "epoch": 0.9072041375789391, "grad_norm": 1.0846506357192993, "learning_rate": 4.218474186612731e-07, "loss": 0.3724, "step": 40870 }, { "epoch": 0.9073151241384668, "grad_norm": 1.6842200756072998, "learning_rate": 4.208459526709585e-07, "loss": 0.4633, "step": 40875 }, { "epoch": 0.9074261106979945, "grad_norm": 1.2570232152938843, "learning_rate": 4.19845651282067e-07, "loss": 0.3244, "step": 40880 }, { "epoch": 0.9075370972575221, "grad_norm": 0.7616875767707825, "learning_rate": 4.188465146162113e-07, "loss": 0.4373, "step": 40885 }, { "epoch": 0.9076480838170498, "grad_norm": 1.3283637762069702, "learning_rate": 4.178485427948642e-07, "loss": 0.2744, "step": 40890 }, { "epoch": 0.9077590703765774, "grad_norm": 1.1224035024642944, "learning_rate": 4.168517359393542e-07, "loss": 0.2493, "step": 40895 }, { "epoch": 0.907870056936105, "grad_norm": 1.5765423774719238, "learning_rate": 4.1585609417087115e-07, "loss": 0.3856, "step": 40900 }, { "epoch": 0.9079810434956327, "grad_norm": 0.6863722801208496, "learning_rate": 4.148616176104625e-07, "loss": 0.2628, "step": 40905 }, { "epoch": 0.9080920300551604, "grad_norm": 1.0331417322158813, "learning_rate": 4.1386830637903166e-07, "loss": 0.3926, "step": 40910 }, { "epoch": 0.9082030166146879, "grad_norm": 1.2012207508087158, "learning_rate": 4.12876160597343e-07, "loss": 0.4389, "step": 40915 }, { "epoch": 0.9083140031742156, "grad_norm": 0.9674161672592163, "learning_rate": 4.11885180386018e-07, "loss": 0.5956, "step": 40920 }, { "epoch": 0.9084249897337432, "grad_norm": 1.5293216705322266, "learning_rate": 4.1089536586553567e-07, "loss": 0.419, "step": 40925 }, { "epoch": 0.9085359762932709, "grad_norm": 1.6467329263687134, "learning_rate": 4.0990671715623655e-07, "loss": 0.3022, "step": 40930 }, { "epoch": 0.9086469628527986, "grad_norm": 1.517439842224121, "learning_rate": 4.0891923437831547e-07, "loss": 0.4942, "step": 40935 }, { "epoch": 0.9087579494123261, "grad_norm": 1.8788321018218994, "learning_rate": 4.079329176518287e-07, "loss": 0.3285, "step": 40940 }, { "epoch": 0.9088689359718538, "grad_norm": 1.032759428024292, "learning_rate": 4.0694776709668815e-07, "loss": 0.3184, "step": 40945 }, { "epoch": 0.9089799225313815, "grad_norm": 1.741289496421814, "learning_rate": 4.059637828326657e-07, "loss": 0.2962, "step": 40950 }, { "epoch": 0.9090909090909091, "grad_norm": 1.205772042274475, "learning_rate": 4.049809649793901e-07, "loss": 0.3886, "step": 40955 }, { "epoch": 0.9092018956504367, "grad_norm": 1.3863246440887451, "learning_rate": 4.039993136563503e-07, "loss": 0.2554, "step": 40960 }, { "epoch": 0.9093128822099644, "grad_norm": 1.532273530960083, "learning_rate": 4.030188289828907e-07, "loss": 0.3074, "step": 40965 }, { "epoch": 0.909423868769492, "grad_norm": 2.732170581817627, "learning_rate": 4.02039511078216e-07, "loss": 0.3315, "step": 40970 }, { "epoch": 0.9095348553290197, "grad_norm": 1.513314127922058, "learning_rate": 4.0106136006138885e-07, "loss": 0.4582, "step": 40975 }, { "epoch": 0.9096458418885472, "grad_norm": 2.2053635120391846, "learning_rate": 4.0008437605132957e-07, "loss": 0.359, "step": 40980 }, { "epoch": 0.9097568284480749, "grad_norm": 1.4839268922805786, "learning_rate": 3.9910855916681535e-07, "loss": 0.3554, "step": 40985 }, { "epoch": 0.9098678150076026, "grad_norm": 0.9925002455711365, "learning_rate": 3.981339095264847e-07, "loss": 0.1972, "step": 40990 }, { "epoch": 0.9099788015671302, "grad_norm": 1.1903655529022217, "learning_rate": 3.9716042724883054e-07, "loss": 0.5049, "step": 40995 }, { "epoch": 0.9100897881266579, "grad_norm": 1.2450945377349854, "learning_rate": 3.96188112452206e-07, "loss": 0.3352, "step": 41000 }, { "epoch": 0.9102007746861855, "grad_norm": 1.1269035339355469, "learning_rate": 3.952169652548221e-07, "loss": 0.3713, "step": 41005 }, { "epoch": 0.9103117612457131, "grad_norm": 1.070256233215332, "learning_rate": 3.9424698577474773e-07, "loss": 0.3097, "step": 41010 }, { "epoch": 0.9104227478052408, "grad_norm": 1.2544130086898804, "learning_rate": 3.9327817412990963e-07, "loss": 0.3488, "step": 41015 }, { "epoch": 0.9105337343647685, "grad_norm": 1.162050485610962, "learning_rate": 3.9231053043809255e-07, "loss": 0.3, "step": 41020 }, { "epoch": 0.910644720924296, "grad_norm": 0.7748410105705261, "learning_rate": 3.9134405481694115e-07, "loss": 0.2911, "step": 41025 }, { "epoch": 0.9107557074838237, "grad_norm": 1.0052975416183472, "learning_rate": 3.9037874738395266e-07, "loss": 0.3194, "step": 41030 }, { "epoch": 0.9108666940433513, "grad_norm": 1.3958162069320679, "learning_rate": 3.894146082564887e-07, "loss": 0.2533, "step": 41035 }, { "epoch": 0.910977680602879, "grad_norm": 1.427446961402893, "learning_rate": 3.884516375517644e-07, "loss": 0.4574, "step": 41040 }, { "epoch": 0.9110886671624067, "grad_norm": 1.627304196357727, "learning_rate": 3.87489835386855e-07, "loss": 0.295, "step": 41045 }, { "epoch": 0.9111996537219342, "grad_norm": 1.1479729413986206, "learning_rate": 3.865292018786959e-07, "loss": 0.2985, "step": 41050 }, { "epoch": 0.9113106402814619, "grad_norm": 1.2646464109420776, "learning_rate": 3.8556973714407366e-07, "loss": 0.3134, "step": 41055 }, { "epoch": 0.9114216268409896, "grad_norm": 1.2283658981323242, "learning_rate": 3.846114412996393e-07, "loss": 0.3191, "step": 41060 }, { "epoch": 0.9115326134005172, "grad_norm": 1.0735255479812622, "learning_rate": 3.836543144618976e-07, "loss": 0.3153, "step": 41065 }, { "epoch": 0.9116435999600448, "grad_norm": 1.5772662162780762, "learning_rate": 3.826983567472131e-07, "loss": 0.3514, "step": 41070 }, { "epoch": 0.9117545865195725, "grad_norm": 1.1826890707015991, "learning_rate": 3.817435682718096e-07, "loss": 0.4131, "step": 41075 }, { "epoch": 0.9118655730791001, "grad_norm": 1.1610921621322632, "learning_rate": 3.8078994915176526e-07, "loss": 0.506, "step": 41080 }, { "epoch": 0.9119765596386278, "grad_norm": 0.877407431602478, "learning_rate": 3.7983749950301965e-07, "loss": 0.3275, "step": 41085 }, { "epoch": 0.9120875461981554, "grad_norm": 1.2805155515670776, "learning_rate": 3.788862194413667e-07, "loss": 0.4192, "step": 41090 }, { "epoch": 0.912198532757683, "grad_norm": 0.8939440250396729, "learning_rate": 3.779361090824596e-07, "loss": 0.3885, "step": 41095 }, { "epoch": 0.9123095193172107, "grad_norm": 1.0157679319381714, "learning_rate": 3.769871685418114e-07, "loss": 0.3205, "step": 41100 }, { "epoch": 0.9124205058767383, "grad_norm": 1.2212369441986084, "learning_rate": 3.760393979347898e-07, "loss": 0.33, "step": 41105 }, { "epoch": 0.912531492436266, "grad_norm": 1.968551516532898, "learning_rate": 3.7509279737662276e-07, "loss": 0.2711, "step": 41110 }, { "epoch": 0.9126424789957936, "grad_norm": 1.060575008392334, "learning_rate": 3.741473669823936e-07, "loss": 0.3427, "step": 41115 }, { "epoch": 0.9127534655553212, "grad_norm": 1.3390034437179565, "learning_rate": 3.7320310686704497e-07, "loss": 0.3595, "step": 41120 }, { "epoch": 0.9128644521148489, "grad_norm": 1.1845945119857788, "learning_rate": 3.722600171453761e-07, "loss": 0.4191, "step": 41125 }, { "epoch": 0.9129754386743766, "grad_norm": 0.7942050099372864, "learning_rate": 3.713180979320463e-07, "loss": 0.3634, "step": 41130 }, { "epoch": 0.9130864252339042, "grad_norm": 1.055257797241211, "learning_rate": 3.7037734934156967e-07, "loss": 0.4133, "step": 41135 }, { "epoch": 0.9131974117934318, "grad_norm": 0.9896296858787537, "learning_rate": 3.6943777148831907e-07, "loss": 0.4523, "step": 41140 }, { "epoch": 0.9133083983529594, "grad_norm": 1.3810358047485352, "learning_rate": 3.684993644865276e-07, "loss": 0.5251, "step": 41145 }, { "epoch": 0.9134193849124871, "grad_norm": 1.073945164680481, "learning_rate": 3.6756212845028063e-07, "loss": 0.4773, "step": 41150 }, { "epoch": 0.9135303714720148, "grad_norm": 1.60965096950531, "learning_rate": 3.666260634935248e-07, "loss": 0.3653, "step": 41155 }, { "epoch": 0.9136413580315423, "grad_norm": 1.5314462184906006, "learning_rate": 3.656911697300658e-07, "loss": 0.3599, "step": 41160 }, { "epoch": 0.91375234459107, "grad_norm": 0.9583492279052734, "learning_rate": 3.6475744727356157e-07, "loss": 0.3483, "step": 41165 }, { "epoch": 0.9138633311505977, "grad_norm": 1.8657221794128418, "learning_rate": 3.6382489623753457e-07, "loss": 0.3817, "step": 41170 }, { "epoch": 0.9139743177101253, "grad_norm": 0.9618664979934692, "learning_rate": 3.628935167353587e-07, "loss": 0.2427, "step": 41175 }, { "epoch": 0.914085304269653, "grad_norm": 1.2513151168823242, "learning_rate": 3.6196330888026764e-07, "loss": 0.4282, "step": 41180 }, { "epoch": 0.9141962908291806, "grad_norm": 1.128990888595581, "learning_rate": 3.6103427278535665e-07, "loss": 0.4141, "step": 41185 }, { "epoch": 0.9143072773887082, "grad_norm": 0.7350396513938904, "learning_rate": 3.601064085635697e-07, "loss": 0.2665, "step": 41190 }, { "epoch": 0.9144182639482359, "grad_norm": 1.0335029363632202, "learning_rate": 3.5917971632771773e-07, "loss": 0.4452, "step": 41195 }, { "epoch": 0.9145292505077635, "grad_norm": 1.4310749769210815, "learning_rate": 3.5825419619046176e-07, "loss": 0.2763, "step": 41200 }, { "epoch": 0.9146402370672911, "grad_norm": 1.1692556142807007, "learning_rate": 3.573298482643262e-07, "loss": 0.4406, "step": 41205 }, { "epoch": 0.9147512236268188, "grad_norm": 1.2683709859848022, "learning_rate": 3.564066726616866e-07, "loss": 0.4272, "step": 41210 }, { "epoch": 0.9148622101863464, "grad_norm": 1.0134937763214111, "learning_rate": 3.5548466949478445e-07, "loss": 0.1871, "step": 41215 }, { "epoch": 0.9149731967458741, "grad_norm": 1.2475417852401733, "learning_rate": 3.545638388757078e-07, "loss": 0.3767, "step": 41220 }, { "epoch": 0.9150841833054018, "grad_norm": 1.4357985258102417, "learning_rate": 3.5364418091641374e-07, "loss": 0.4015, "step": 41225 }, { "epoch": 0.9151951698649293, "grad_norm": 0.8965421319007874, "learning_rate": 3.5272569572870727e-07, "loss": 0.3689, "step": 41230 }, { "epoch": 0.915306156424457, "grad_norm": 1.8212131261825562, "learning_rate": 3.5180838342425803e-07, "loss": 0.4141, "step": 41235 }, { "epoch": 0.9154171429839847, "grad_norm": 1.3277180194854736, "learning_rate": 3.508922441145857e-07, "loss": 0.1806, "step": 41240 }, { "epoch": 0.9155281295435123, "grad_norm": 0.8737463355064392, "learning_rate": 3.499772779110766e-07, "loss": 0.291, "step": 41245 }, { "epoch": 0.9156391161030399, "grad_norm": 0.8136932253837585, "learning_rate": 3.490634849249641e-07, "loss": 0.4348, "step": 41250 }, { "epoch": 0.9157501026625675, "grad_norm": 1.7655400037765503, "learning_rate": 3.4815086526734707e-07, "loss": 0.4404, "step": 41255 }, { "epoch": 0.9158610892220952, "grad_norm": 1.2167655229568481, "learning_rate": 3.4723941904917793e-07, "loss": 0.3053, "step": 41260 }, { "epoch": 0.9159720757816229, "grad_norm": 1.5969772338867188, "learning_rate": 3.463291463812668e-07, "loss": 0.3528, "step": 41265 }, { "epoch": 0.9160830623411504, "grad_norm": 1.587766408920288, "learning_rate": 3.4542004737428304e-07, "loss": 0.34, "step": 41270 }, { "epoch": 0.9161940489006781, "grad_norm": 2.0868446826934814, "learning_rate": 3.4451212213875043e-07, "loss": 0.2913, "step": 41275 }, { "epoch": 0.9163050354602058, "grad_norm": 1.0023850202560425, "learning_rate": 3.436053707850528e-07, "loss": 0.32, "step": 41280 }, { "epoch": 0.9164160220197334, "grad_norm": 3.024143934249878, "learning_rate": 3.4269979342342776e-07, "loss": 0.3368, "step": 41285 }, { "epoch": 0.9165270085792611, "grad_norm": 1.2240880727767944, "learning_rate": 3.417953901639759e-07, "loss": 0.2696, "step": 41290 }, { "epoch": 0.9166379951387887, "grad_norm": 1.0215989351272583, "learning_rate": 3.408921611166471e-07, "loss": 0.329, "step": 41295 }, { "epoch": 0.9167489816983163, "grad_norm": 1.2344481945037842, "learning_rate": 3.399901063912558e-07, "loss": 0.4592, "step": 41300 }, { "epoch": 0.916859968257844, "grad_norm": 1.5032986402511597, "learning_rate": 3.390892260974721e-07, "loss": 0.5914, "step": 41305 }, { "epoch": 0.9169709548173716, "grad_norm": 1.5667091608047485, "learning_rate": 3.381895203448182e-07, "loss": 0.4586, "step": 41310 }, { "epoch": 0.9170819413768992, "grad_norm": 1.53379225730896, "learning_rate": 3.372909892426801e-07, "loss": 0.3376, "step": 41315 }, { "epoch": 0.9171929279364269, "grad_norm": 1.1867161989212036, "learning_rate": 3.363936329002959e-07, "loss": 0.4196, "step": 41320 }, { "epoch": 0.9173039144959545, "grad_norm": 1.3190207481384277, "learning_rate": 3.3549745142676614e-07, "loss": 0.2724, "step": 41325 }, { "epoch": 0.9174149010554822, "grad_norm": 1.7920886278152466, "learning_rate": 3.3460244493104365e-07, "loss": 0.2532, "step": 41330 }, { "epoch": 0.9175258876150099, "grad_norm": 1.1654620170593262, "learning_rate": 3.3370861352194026e-07, "loss": 0.3344, "step": 41335 }, { "epoch": 0.9176368741745374, "grad_norm": 0.8272110223770142, "learning_rate": 3.328159573081258e-07, "loss": 0.4368, "step": 41340 }, { "epoch": 0.9177478607340651, "grad_norm": 0.7133996486663818, "learning_rate": 3.319244763981255e-07, "loss": 0.3415, "step": 41345 }, { "epoch": 0.9178588472935928, "grad_norm": 0.7516515851020813, "learning_rate": 3.3103417090032287e-07, "loss": 0.4044, "step": 41350 }, { "epoch": 0.9179698338531204, "grad_norm": 0.8074901103973389, "learning_rate": 3.3014504092296006e-07, "loss": 0.2569, "step": 41355 }, { "epoch": 0.918080820412648, "grad_norm": 0.8953653573989868, "learning_rate": 3.292570865741318e-07, "loss": 0.5623, "step": 41360 }, { "epoch": 0.9181918069721756, "grad_norm": 2.03694748878479, "learning_rate": 3.2837030796179503e-07, "loss": 0.5488, "step": 41365 }, { "epoch": 0.9183027935317033, "grad_norm": 1.2388249635696411, "learning_rate": 3.274847051937591e-07, "loss": 0.4, "step": 41370 }, { "epoch": 0.918413780091231, "grad_norm": 0.5898800492286682, "learning_rate": 3.2660027837769446e-07, "loss": 0.3577, "step": 41375 }, { "epoch": 0.9185247666507586, "grad_norm": 0.9966252446174622, "learning_rate": 3.257170276211241e-07, "loss": 0.3671, "step": 41380 }, { "epoch": 0.9186357532102862, "grad_norm": 0.8349930047988892, "learning_rate": 3.248349530314332e-07, "loss": 0.4678, "step": 41385 }, { "epoch": 0.9187467397698139, "grad_norm": 1.071436882019043, "learning_rate": 3.239540547158626e-07, "loss": 0.4007, "step": 41390 }, { "epoch": 0.9188577263293415, "grad_norm": 0.9444680213928223, "learning_rate": 3.2307433278150445e-07, "loss": 0.3086, "step": 41395 }, { "epoch": 0.9189687128888692, "grad_norm": 1.670590877532959, "learning_rate": 3.2219578733531655e-07, "loss": 0.2858, "step": 41400 }, { "epoch": 0.9190796994483968, "grad_norm": 1.7639275789260864, "learning_rate": 3.213184184841078e-07, "loss": 0.4105, "step": 41405 }, { "epoch": 0.9191906860079244, "grad_norm": 1.3352954387664795, "learning_rate": 3.2044222633454525e-07, "loss": 0.4735, "step": 41410 }, { "epoch": 0.9193016725674521, "grad_norm": 0.9515475034713745, "learning_rate": 3.195672109931558e-07, "loss": 0.2875, "step": 41415 }, { "epoch": 0.9194126591269797, "grad_norm": 1.4635220766067505, "learning_rate": 3.186933725663166e-07, "loss": 0.3685, "step": 41420 }, { "epoch": 0.9195236456865074, "grad_norm": 1.1768747568130493, "learning_rate": 3.1782071116027156e-07, "loss": 0.4472, "step": 41425 }, { "epoch": 0.919634632246035, "grad_norm": 1.2579537630081177, "learning_rate": 3.169492268811103e-07, "loss": 0.2951, "step": 41430 }, { "epoch": 0.9197456188055626, "grad_norm": 1.3200901746749878, "learning_rate": 3.160789198347891e-07, "loss": 0.4369, "step": 41435 }, { "epoch": 0.9198566053650903, "grad_norm": 1.4001281261444092, "learning_rate": 3.152097901271156e-07, "loss": 0.3694, "step": 41440 }, { "epoch": 0.919967591924618, "grad_norm": 1.1858893632888794, "learning_rate": 3.1434183786375416e-07, "loss": 0.5221, "step": 41445 }, { "epoch": 0.9200785784841455, "grad_norm": 0.99980628490448, "learning_rate": 3.1347506315023036e-07, "loss": 0.3342, "step": 41450 }, { "epoch": 0.9201895650436732, "grad_norm": 1.8966047763824463, "learning_rate": 3.12609466091921e-07, "loss": 0.3557, "step": 41455 }, { "epoch": 0.9203005516032009, "grad_norm": 1.1274603605270386, "learning_rate": 3.117450467940653e-07, "loss": 0.3224, "step": 41460 }, { "epoch": 0.9204115381627285, "grad_norm": 1.2839633226394653, "learning_rate": 3.108818053617557e-07, "loss": 0.3317, "step": 41465 }, { "epoch": 0.9205225247222562, "grad_norm": 0.5879912972450256, "learning_rate": 3.1001974189993845e-07, "loss": 0.3073, "step": 41470 }, { "epoch": 0.9206335112817837, "grad_norm": 1.3780027627944946, "learning_rate": 3.0915885651342624e-07, "loss": 0.5207, "step": 41475 }, { "epoch": 0.9207444978413114, "grad_norm": 1.2077401876449585, "learning_rate": 3.0829914930687767e-07, "loss": 0.3862, "step": 41480 }, { "epoch": 0.9208554844008391, "grad_norm": 1.1930087804794312, "learning_rate": 3.0744062038481573e-07, "loss": 0.3539, "step": 41485 }, { "epoch": 0.9209664709603667, "grad_norm": 1.0110259056091309, "learning_rate": 3.065832698516169e-07, "loss": 0.4615, "step": 41490 }, { "epoch": 0.9210774575198943, "grad_norm": 0.975702702999115, "learning_rate": 3.0572709781151344e-07, "loss": 0.3641, "step": 41495 }, { "epoch": 0.921188444079422, "grad_norm": 0.9686071872711182, "learning_rate": 3.0487210436859985e-07, "loss": 0.4104, "step": 41500 }, { "epoch": 0.9212994306389496, "grad_norm": 1.383853554725647, "learning_rate": 3.0401828962681955e-07, "loss": 0.4135, "step": 41505 }, { "epoch": 0.9214104171984773, "grad_norm": 1.5861616134643555, "learning_rate": 3.031656536899785e-07, "loss": 0.5269, "step": 41510 }, { "epoch": 0.921521403758005, "grad_norm": 2.3221077919006348, "learning_rate": 3.023141966617349e-07, "loss": 0.384, "step": 41515 }, { "epoch": 0.9216323903175325, "grad_norm": 1.989410638809204, "learning_rate": 3.0146391864560917e-07, "loss": 0.5662, "step": 41520 }, { "epoch": 0.9217433768770602, "grad_norm": 1.0644265413284302, "learning_rate": 3.006148197449743e-07, "loss": 0.5402, "step": 41525 }, { "epoch": 0.9218543634365878, "grad_norm": 1.6977014541625977, "learning_rate": 2.9976690006305877e-07, "loss": 0.336, "step": 41530 }, { "epoch": 0.9219653499961155, "grad_norm": 1.9462661743164062, "learning_rate": 2.989201597029534e-07, "loss": 0.4512, "step": 41535 }, { "epoch": 0.9220763365556431, "grad_norm": 1.5462268590927124, "learning_rate": 2.9807459876759817e-07, "loss": 0.4801, "step": 41540 }, { "epoch": 0.9221873231151707, "grad_norm": 1.7612812519073486, "learning_rate": 2.972302173597974e-07, "loss": 0.5732, "step": 41545 }, { "epoch": 0.9222983096746984, "grad_norm": 1.0951625108718872, "learning_rate": 2.963870155822046e-07, "loss": 0.4075, "step": 41550 }, { "epoch": 0.9224092962342261, "grad_norm": 1.443281888961792, "learning_rate": 2.955449935373356e-07, "loss": 0.2509, "step": 41555 }, { "epoch": 0.9225202827937536, "grad_norm": 0.3948124349117279, "learning_rate": 2.947041513275606e-07, "loss": 0.3246, "step": 41560 }, { "epoch": 0.9226312693532813, "grad_norm": 1.4538019895553589, "learning_rate": 2.938644890551057e-07, "loss": 0.4642, "step": 41565 }, { "epoch": 0.922742255912809, "grad_norm": 1.6253353357315063, "learning_rate": 2.9302600682205473e-07, "loss": 0.4874, "step": 41570 }, { "epoch": 0.9228532424723366, "grad_norm": 1.0660721063613892, "learning_rate": 2.921887047303462e-07, "loss": 0.4034, "step": 41575 }, { "epoch": 0.9229642290318643, "grad_norm": 1.0873395204544067, "learning_rate": 2.9135258288177757e-07, "loss": 0.4524, "step": 41580 }, { "epoch": 0.9230752155913918, "grad_norm": 1.0553845167160034, "learning_rate": 2.9051764137800195e-07, "loss": 0.5371, "step": 41585 }, { "epoch": 0.9231862021509195, "grad_norm": 1.0221503973007202, "learning_rate": 2.896838803205282e-07, "loss": 0.4071, "step": 41590 }, { "epoch": 0.9232971887104472, "grad_norm": 0.7316520810127258, "learning_rate": 2.8885129981072403e-07, "loss": 0.3178, "step": 41595 }, { "epoch": 0.9234081752699748, "grad_norm": 1.321444034576416, "learning_rate": 2.880198999498074e-07, "loss": 0.3049, "step": 41600 }, { "epoch": 0.9235191618295024, "grad_norm": 1.0900516510009766, "learning_rate": 2.871896808388608e-07, "loss": 0.4049, "step": 41605 }, { "epoch": 0.9236301483890301, "grad_norm": 1.1955881118774414, "learning_rate": 2.863606425788201e-07, "loss": 0.2784, "step": 41610 }, { "epoch": 0.9237411349485577, "grad_norm": 1.5617562532424927, "learning_rate": 2.855327852704737e-07, "loss": 0.4223, "step": 41615 }, { "epoch": 0.9238521215080854, "grad_norm": 1.5575618743896484, "learning_rate": 2.847061090144732e-07, "loss": 0.5853, "step": 41620 }, { "epoch": 0.9239631080676131, "grad_norm": 0.6329960227012634, "learning_rate": 2.838806139113204e-07, "loss": 0.3444, "step": 41625 }, { "epoch": 0.9240740946271406, "grad_norm": 1.4175231456756592, "learning_rate": 2.830563000613773e-07, "loss": 0.4397, "step": 41630 }, { "epoch": 0.9241850811866683, "grad_norm": 0.7703967690467834, "learning_rate": 2.822331675648604e-07, "loss": 0.3143, "step": 41635 }, { "epoch": 0.9242960677461959, "grad_norm": 0.6176158785820007, "learning_rate": 2.8141121652184413e-07, "loss": 0.2804, "step": 41640 }, { "epoch": 0.9244070543057236, "grad_norm": 0.8418532609939575, "learning_rate": 2.8059044703226066e-07, "loss": 0.3661, "step": 41645 }, { "epoch": 0.9245180408652512, "grad_norm": 1.2525098323822021, "learning_rate": 2.7977085919589253e-07, "loss": 0.3764, "step": 41650 }, { "epoch": 0.9246290274247788, "grad_norm": 2.123753547668457, "learning_rate": 2.789524531123844e-07, "loss": 0.5501, "step": 41655 }, { "epoch": 0.9247400139843065, "grad_norm": 1.645007848739624, "learning_rate": 2.781352288812356e-07, "loss": 0.447, "step": 41660 }, { "epoch": 0.9248510005438342, "grad_norm": 1.2815983295440674, "learning_rate": 2.773191866017999e-07, "loss": 0.555, "step": 41665 }, { "epoch": 0.9249619871033617, "grad_norm": 1.142004132270813, "learning_rate": 2.765043263732914e-07, "loss": 0.3564, "step": 41670 }, { "epoch": 0.9250729736628894, "grad_norm": 1.6616061925888062, "learning_rate": 2.7569064829477633e-07, "loss": 0.449, "step": 41675 }, { "epoch": 0.9251839602224171, "grad_norm": 0.8066574335098267, "learning_rate": 2.7487815246518e-07, "loss": 0.3865, "step": 41680 }, { "epoch": 0.9252949467819447, "grad_norm": 1.1873153448104858, "learning_rate": 2.7406683898328125e-07, "loss": 0.2746, "step": 41685 }, { "epoch": 0.9254059333414724, "grad_norm": 1.0035192966461182, "learning_rate": 2.7325670794771887e-07, "loss": 0.2461, "step": 41690 }, { "epoch": 0.9255169199009999, "grad_norm": 0.9507527351379395, "learning_rate": 2.724477594569852e-07, "loss": 0.4659, "step": 41695 }, { "epoch": 0.9256279064605276, "grad_norm": 0.8072733879089355, "learning_rate": 2.716399936094294e-07, "loss": 0.3659, "step": 41700 }, { "epoch": 0.9257388930200553, "grad_norm": 1.3433468341827393, "learning_rate": 2.708334105032551e-07, "loss": 0.5524, "step": 41705 }, { "epoch": 0.9258498795795829, "grad_norm": 1.2383370399475098, "learning_rate": 2.7002801023652715e-07, "loss": 0.2805, "step": 41710 }, { "epoch": 0.9259608661391105, "grad_norm": 0.6843294501304626, "learning_rate": 2.692237929071617e-07, "loss": 0.4555, "step": 41715 }, { "epoch": 0.9260718526986382, "grad_norm": 1.0873684883117676, "learning_rate": 2.684207586129328e-07, "loss": 0.4586, "step": 41720 }, { "epoch": 0.9261828392581658, "grad_norm": 0.7387077212333679, "learning_rate": 2.676189074514712e-07, "loss": 0.2936, "step": 41725 }, { "epoch": 0.9262938258176935, "grad_norm": 1.1081559658050537, "learning_rate": 2.6681823952026343e-07, "loss": 0.3782, "step": 41730 }, { "epoch": 0.9264048123772212, "grad_norm": 0.7738690376281738, "learning_rate": 2.660187549166504e-07, "loss": 0.3472, "step": 41735 }, { "epoch": 0.9265157989367487, "grad_norm": 0.8234603404998779, "learning_rate": 2.6522045373783223e-07, "loss": 0.3328, "step": 41740 }, { "epoch": 0.9266267854962764, "grad_norm": 1.5815513134002686, "learning_rate": 2.6442333608086344e-07, "loss": 0.3223, "step": 41745 }, { "epoch": 0.926737772055804, "grad_norm": 1.6585302352905273, "learning_rate": 2.6362740204265434e-07, "loss": 0.5519, "step": 41750 }, { "epoch": 0.9268487586153317, "grad_norm": 1.0758693218231201, "learning_rate": 2.628326517199731e-07, "loss": 0.3138, "step": 41755 }, { "epoch": 0.9269597451748594, "grad_norm": 2.1765646934509277, "learning_rate": 2.6203908520944234e-07, "loss": 0.36, "step": 41760 }, { "epoch": 0.9270707317343869, "grad_norm": 1.0552098751068115, "learning_rate": 2.6124670260754046e-07, "loss": 0.3435, "step": 41765 }, { "epoch": 0.9271817182939146, "grad_norm": 1.592545986175537, "learning_rate": 2.6045550401060383e-07, "loss": 0.4286, "step": 41770 }, { "epoch": 0.9272927048534423, "grad_norm": 1.1659295558929443, "learning_rate": 2.5966548951482206e-07, "loss": 0.4308, "step": 41775 }, { "epoch": 0.9274036914129699, "grad_norm": 0.8281853199005127, "learning_rate": 2.58876659216245e-07, "loss": 0.361, "step": 41780 }, { "epoch": 0.9275146779724975, "grad_norm": 0.868346095085144, "learning_rate": 2.5808901321077274e-07, "loss": 0.3813, "step": 41785 }, { "epoch": 0.9276256645320252, "grad_norm": 1.7827457189559937, "learning_rate": 2.573025515941685e-07, "loss": 0.5315, "step": 41790 }, { "epoch": 0.9277366510915528, "grad_norm": 0.8157330751419067, "learning_rate": 2.5651727446204364e-07, "loss": 0.3371, "step": 41795 }, { "epoch": 0.9278476376510805, "grad_norm": 1.5221593379974365, "learning_rate": 2.5573318190987186e-07, "loss": 0.3624, "step": 41800 }, { "epoch": 0.927958624210608, "grad_norm": 1.3963264226913452, "learning_rate": 2.549502740329812e-07, "loss": 0.3376, "step": 41805 }, { "epoch": 0.9280696107701357, "grad_norm": 1.3674664497375488, "learning_rate": 2.5416855092655345e-07, "loss": 0.4054, "step": 41810 }, { "epoch": 0.9281805973296634, "grad_norm": 1.2279380559921265, "learning_rate": 2.533880126856281e-07, "loss": 0.3089, "step": 41815 }, { "epoch": 0.928291583889191, "grad_norm": 0.9081880450248718, "learning_rate": 2.5260865940510027e-07, "loss": 0.3848, "step": 41820 }, { "epoch": 0.9284025704487187, "grad_norm": 1.3663815259933472, "learning_rate": 2.51830491179722e-07, "loss": 0.3177, "step": 41825 }, { "epoch": 0.9285135570082463, "grad_norm": 1.113681674003601, "learning_rate": 2.510535081040999e-07, "loss": 0.3978, "step": 41830 }, { "epoch": 0.9286245435677739, "grad_norm": 1.3013014793395996, "learning_rate": 2.502777102726961e-07, "loss": 0.3886, "step": 41835 }, { "epoch": 0.9287355301273016, "grad_norm": 1.1549102067947388, "learning_rate": 2.4950309777983074e-07, "loss": 0.2329, "step": 41840 }, { "epoch": 0.9288465166868293, "grad_norm": 1.560652256011963, "learning_rate": 2.4872967071967736e-07, "loss": 0.4282, "step": 41845 }, { "epoch": 0.9289575032463568, "grad_norm": 1.8950015306472778, "learning_rate": 2.479574291862685e-07, "loss": 0.4252, "step": 41850 }, { "epoch": 0.9290684898058845, "grad_norm": 1.6299108266830444, "learning_rate": 2.4718637327348915e-07, "loss": 0.4783, "step": 41855 }, { "epoch": 0.9291794763654121, "grad_norm": 1.442406415939331, "learning_rate": 2.4641650307508203e-07, "loss": 0.3578, "step": 41860 }, { "epoch": 0.9292904629249398, "grad_norm": 1.77949059009552, "learning_rate": 2.456478186846456e-07, "loss": 0.4453, "step": 41865 }, { "epoch": 0.9294014494844675, "grad_norm": 1.6224652528762817, "learning_rate": 2.44880320195634e-07, "loss": 0.4472, "step": 41870 }, { "epoch": 0.929512436043995, "grad_norm": 1.282048225402832, "learning_rate": 2.4411400770135705e-07, "loss": 0.3831, "step": 41875 }, { "epoch": 0.9296234226035227, "grad_norm": 1.2435473203659058, "learning_rate": 2.4334888129497915e-07, "loss": 0.4324, "step": 41880 }, { "epoch": 0.9297344091630504, "grad_norm": 1.9706709384918213, "learning_rate": 2.425849410695236e-07, "loss": 0.3785, "step": 41885 }, { "epoch": 0.929845395722578, "grad_norm": 1.8323307037353516, "learning_rate": 2.418221871178683e-07, "loss": 0.3998, "step": 41890 }, { "epoch": 0.9299563822821056, "grad_norm": 0.8738846778869629, "learning_rate": 2.4106061953274363e-07, "loss": 0.1959, "step": 41895 }, { "epoch": 0.9300673688416333, "grad_norm": 1.2526519298553467, "learning_rate": 2.40300238406741e-07, "loss": 0.4916, "step": 41900 }, { "epoch": 0.9301783554011609, "grad_norm": 1.3293545246124268, "learning_rate": 2.3954104383230316e-07, "loss": 0.3191, "step": 41905 }, { "epoch": 0.9302893419606886, "grad_norm": 1.7949703931808472, "learning_rate": 2.387830359017318e-07, "loss": 0.4433, "step": 41910 }, { "epoch": 0.9304003285202161, "grad_norm": 1.6385241746902466, "learning_rate": 2.38026214707181e-07, "loss": 0.2801, "step": 41915 }, { "epoch": 0.9305113150797438, "grad_norm": 1.4388099908828735, "learning_rate": 2.3727058034066497e-07, "loss": 0.4695, "step": 41920 }, { "epoch": 0.9306223016392715, "grad_norm": 1.20249342918396, "learning_rate": 2.3651613289405019e-07, "loss": 0.4453, "step": 41925 }, { "epoch": 0.9307332881987991, "grad_norm": 1.9365683794021606, "learning_rate": 2.3576287245905883e-07, "loss": 0.352, "step": 41930 }, { "epoch": 0.9308442747583268, "grad_norm": 0.9989916086196899, "learning_rate": 2.350107991272721e-07, "loss": 0.4682, "step": 41935 }, { "epoch": 0.9309552613178544, "grad_norm": 1.4359912872314453, "learning_rate": 2.3425991299012241e-07, "loss": 0.3343, "step": 41940 }, { "epoch": 0.931066247877382, "grad_norm": 1.1598297357559204, "learning_rate": 2.335102141389012e-07, "loss": 0.3479, "step": 41945 }, { "epoch": 0.9311772344369097, "grad_norm": 1.731669306755066, "learning_rate": 2.327617026647533e-07, "loss": 0.4167, "step": 41950 }, { "epoch": 0.9312882209964374, "grad_norm": 1.6868865489959717, "learning_rate": 2.3201437865868148e-07, "loss": 0.3485, "step": 41955 }, { "epoch": 0.931399207555965, "grad_norm": 0.9299235939979553, "learning_rate": 2.3126824221154199e-07, "loss": 0.3797, "step": 41960 }, { "epoch": 0.9315101941154926, "grad_norm": 1.5356875658035278, "learning_rate": 2.3052329341404777e-07, "loss": 0.2368, "step": 41965 }, { "epoch": 0.9316211806750202, "grad_norm": 1.2975677251815796, "learning_rate": 2.2977953235676642e-07, "loss": 0.4181, "step": 41970 }, { "epoch": 0.9317321672345479, "grad_norm": 0.5879126191139221, "learning_rate": 2.2903695913012448e-07, "loss": 0.1615, "step": 41975 }, { "epoch": 0.9318431537940756, "grad_norm": 1.8237122297286987, "learning_rate": 2.282955738243986e-07, "loss": 0.2599, "step": 41980 }, { "epoch": 0.9319541403536031, "grad_norm": 0.8162139058113098, "learning_rate": 2.2755537652972558e-07, "loss": 0.3054, "step": 41985 }, { "epoch": 0.9320651269131308, "grad_norm": 1.2494746446609497, "learning_rate": 2.2681636733609457e-07, "loss": 0.3428, "step": 41990 }, { "epoch": 0.9321761134726585, "grad_norm": 0.41989076137542725, "learning_rate": 2.260785463333548e-07, "loss": 0.3983, "step": 41995 }, { "epoch": 0.9322871000321861, "grad_norm": 0.9777684807777405, "learning_rate": 2.253419136112045e-07, "loss": 0.3267, "step": 42000 }, { "epoch": 0.9323980865917137, "grad_norm": 1.2072054147720337, "learning_rate": 2.2460646925920315e-07, "loss": 0.4389, "step": 42005 }, { "epoch": 0.9325090731512414, "grad_norm": 1.2655891180038452, "learning_rate": 2.238722133667637e-07, "loss": 0.4491, "step": 42010 }, { "epoch": 0.932620059710769, "grad_norm": 1.3673769235610962, "learning_rate": 2.2313914602315245e-07, "loss": 0.3805, "step": 42015 }, { "epoch": 0.9327310462702967, "grad_norm": 1.3959051370620728, "learning_rate": 2.2240726731749707e-07, "loss": 0.3748, "step": 42020 }, { "epoch": 0.9328420328298244, "grad_norm": 1.0409404039382935, "learning_rate": 2.21676577338773e-07, "loss": 0.3834, "step": 42025 }, { "epoch": 0.9329530193893519, "grad_norm": 1.689075231552124, "learning_rate": 2.20947076175817e-07, "loss": 0.2738, "step": 42030 }, { "epoch": 0.9330640059488796, "grad_norm": 0.9436668157577515, "learning_rate": 2.2021876391731922e-07, "loss": 0.5671, "step": 42035 }, { "epoch": 0.9331749925084072, "grad_norm": 1.3573400974273682, "learning_rate": 2.1949164065182548e-07, "loss": 0.4767, "step": 42040 }, { "epoch": 0.9332859790679349, "grad_norm": 0.9591343998908997, "learning_rate": 2.187657064677362e-07, "loss": 0.2737, "step": 42045 }, { "epoch": 0.9333969656274625, "grad_norm": 0.7570490837097168, "learning_rate": 2.1804096145330856e-07, "loss": 0.4176, "step": 42050 }, { "epoch": 0.9335079521869901, "grad_norm": 1.4021674394607544, "learning_rate": 2.1731740569665538e-07, "loss": 0.4511, "step": 42055 }, { "epoch": 0.9336189387465178, "grad_norm": 1.2997996807098389, "learning_rate": 2.165950392857441e-07, "loss": 0.3495, "step": 42060 }, { "epoch": 0.9337299253060455, "grad_norm": 1.744541049003601, "learning_rate": 2.1587386230839558e-07, "loss": 0.3737, "step": 42065 }, { "epoch": 0.933840911865573, "grad_norm": 1.6680949926376343, "learning_rate": 2.1515387485228968e-07, "loss": 0.3786, "step": 42070 }, { "epoch": 0.9339518984251007, "grad_norm": 2.040933847427368, "learning_rate": 2.1443507700495968e-07, "loss": 0.313, "step": 42075 }, { "epoch": 0.9340628849846284, "grad_norm": 1.7324872016906738, "learning_rate": 2.1371746885379575e-07, "loss": 0.4825, "step": 42080 }, { "epoch": 0.934173871544156, "grad_norm": 1.2083805799484253, "learning_rate": 2.1300105048604136e-07, "loss": 0.473, "step": 42085 }, { "epoch": 0.9342848581036837, "grad_norm": 1.1241426467895508, "learning_rate": 2.1228582198879467e-07, "loss": 0.3938, "step": 42090 }, { "epoch": 0.9343958446632112, "grad_norm": 1.544179916381836, "learning_rate": 2.1157178344901385e-07, "loss": 0.4666, "step": 42095 }, { "epoch": 0.9345068312227389, "grad_norm": 0.7302343249320984, "learning_rate": 2.1085893495350729e-07, "loss": 0.4213, "step": 42100 }, { "epoch": 0.9346178177822666, "grad_norm": 1.4219212532043457, "learning_rate": 2.101472765889412e-07, "loss": 0.4218, "step": 42105 }, { "epoch": 0.9347288043417942, "grad_norm": 1.3587608337402344, "learning_rate": 2.0943680844183633e-07, "loss": 0.4437, "step": 42110 }, { "epoch": 0.9348397909013219, "grad_norm": 0.9834860563278198, "learning_rate": 2.087275305985692e-07, "loss": 0.3333, "step": 42115 }, { "epoch": 0.9349507774608495, "grad_norm": 1.191887617111206, "learning_rate": 2.0801944314537192e-07, "loss": 0.3715, "step": 42120 }, { "epoch": 0.9350617640203771, "grad_norm": 1.459552526473999, "learning_rate": 2.0731254616833008e-07, "loss": 0.3668, "step": 42125 }, { "epoch": 0.9351727505799048, "grad_norm": 0.7708008885383606, "learning_rate": 2.0660683975338826e-07, "loss": 0.3064, "step": 42130 }, { "epoch": 0.9352837371394325, "grad_norm": 1.4403290748596191, "learning_rate": 2.0590232398634114e-07, "loss": 0.4275, "step": 42135 }, { "epoch": 0.93539472369896, "grad_norm": 1.713077187538147, "learning_rate": 2.0519899895284245e-07, "loss": 0.3068, "step": 42140 }, { "epoch": 0.9355057102584877, "grad_norm": 1.082633137702942, "learning_rate": 2.0449686473840157e-07, "loss": 0.3676, "step": 42145 }, { "epoch": 0.9356166968180153, "grad_norm": 1.344906210899353, "learning_rate": 2.0379592142837912e-07, "loss": 0.3333, "step": 42150 }, { "epoch": 0.935727683377543, "grad_norm": 0.6421421766281128, "learning_rate": 2.0309616910799579e-07, "loss": 0.3735, "step": 42155 }, { "epoch": 0.9358386699370707, "grad_norm": 1.8165996074676514, "learning_rate": 2.0239760786232355e-07, "loss": 0.3604, "step": 42160 }, { "epoch": 0.9359496564965982, "grad_norm": 1.6352641582489014, "learning_rate": 2.017002377762911e-07, "loss": 0.399, "step": 42165 }, { "epoch": 0.9360606430561259, "grad_norm": 1.11709463596344, "learning_rate": 2.010040589346829e-07, "loss": 0.388, "step": 42170 }, { "epoch": 0.9361716296156536, "grad_norm": 1.6944082975387573, "learning_rate": 2.0030907142213784e-07, "loss": 0.4607, "step": 42175 }, { "epoch": 0.9362826161751812, "grad_norm": 1.1754425764083862, "learning_rate": 1.996152753231506e-07, "loss": 0.4429, "step": 42180 }, { "epoch": 0.9363936027347088, "grad_norm": 0.9020445942878723, "learning_rate": 1.989226707220704e-07, "loss": 0.284, "step": 42185 }, { "epoch": 0.9365045892942365, "grad_norm": 1.2843596935272217, "learning_rate": 1.9823125770310204e-07, "loss": 0.4293, "step": 42190 }, { "epoch": 0.9366155758537641, "grad_norm": 1.1910959482192993, "learning_rate": 1.9754103635030385e-07, "loss": 0.5196, "step": 42195 }, { "epoch": 0.9367265624132918, "grad_norm": 1.0260785818099976, "learning_rate": 1.968520067475921e-07, "loss": 0.3814, "step": 42200 }, { "epoch": 0.9368375489728193, "grad_norm": 1.587637186050415, "learning_rate": 1.9616416897873525e-07, "loss": 0.3861, "step": 42205 }, { "epoch": 0.936948535532347, "grad_norm": 1.1606448888778687, "learning_rate": 1.9547752312735978e-07, "loss": 0.4401, "step": 42210 }, { "epoch": 0.9370595220918747, "grad_norm": 1.0315425395965576, "learning_rate": 1.947920692769456e-07, "loss": 0.4936, "step": 42215 }, { "epoch": 0.9371705086514023, "grad_norm": 1.2617237567901611, "learning_rate": 1.9410780751082714e-07, "loss": 0.3557, "step": 42220 }, { "epoch": 0.93728149521093, "grad_norm": 1.15226411819458, "learning_rate": 1.9342473791219561e-07, "loss": 0.3832, "step": 42225 }, { "epoch": 0.9373924817704576, "grad_norm": 0.784106433391571, "learning_rate": 1.9274286056409574e-07, "loss": 0.5419, "step": 42230 }, { "epoch": 0.9375034683299852, "grad_norm": 0.9966132044792175, "learning_rate": 1.9206217554942675e-07, "loss": 0.431, "step": 42235 }, { "epoch": 0.9376144548895129, "grad_norm": 1.2382640838623047, "learning_rate": 1.9138268295094687e-07, "loss": 0.3532, "step": 42240 }, { "epoch": 0.9377254414490406, "grad_norm": 1.321143627166748, "learning_rate": 1.9070438285126337e-07, "loss": 0.3812, "step": 42245 }, { "epoch": 0.9378364280085681, "grad_norm": 2.318057060241699, "learning_rate": 1.9002727533284358e-07, "loss": 0.1845, "step": 42250 }, { "epoch": 0.9379474145680958, "grad_norm": 1.220383882522583, "learning_rate": 1.893513604780073e-07, "loss": 0.314, "step": 42255 }, { "epoch": 0.9380584011276234, "grad_norm": 1.221579909324646, "learning_rate": 1.8867663836893092e-07, "loss": 0.3582, "step": 42260 }, { "epoch": 0.9381693876871511, "grad_norm": 1.1214572191238403, "learning_rate": 1.880031090876444e-07, "loss": 0.4177, "step": 42265 }, { "epoch": 0.9382803742466788, "grad_norm": 1.2276601791381836, "learning_rate": 1.8733077271603117e-07, "loss": 0.3543, "step": 42270 }, { "epoch": 0.9383913608062063, "grad_norm": 1.359606385231018, "learning_rate": 1.8665962933583581e-07, "loss": 0.5302, "step": 42275 }, { "epoch": 0.938502347365734, "grad_norm": 1.5073870420455933, "learning_rate": 1.8598967902864974e-07, "loss": 0.4673, "step": 42280 }, { "epoch": 0.9386133339252617, "grad_norm": 1.1110531091690063, "learning_rate": 1.8532092187592553e-07, "loss": 0.4733, "step": 42285 }, { "epoch": 0.9387243204847893, "grad_norm": 1.0862751007080078, "learning_rate": 1.8465335795896822e-07, "loss": 0.4716, "step": 42290 }, { "epoch": 0.938835307044317, "grad_norm": 1.0596460103988647, "learning_rate": 1.8398698735893616e-07, "loss": 0.3851, "step": 42295 }, { "epoch": 0.9389462936038446, "grad_norm": 1.0239298343658447, "learning_rate": 1.833218101568468e-07, "loss": 0.3476, "step": 42300 }, { "epoch": 0.9390572801633722, "grad_norm": 0.8641639947891235, "learning_rate": 1.8265782643356877e-07, "loss": 0.3246, "step": 42305 }, { "epoch": 0.9391682667228999, "grad_norm": 1.0871738195419312, "learning_rate": 1.8199503626982751e-07, "loss": 0.3363, "step": 42310 }, { "epoch": 0.9392792532824275, "grad_norm": 1.2380642890930176, "learning_rate": 1.81333439746203e-07, "loss": 0.4041, "step": 42315 }, { "epoch": 0.9393902398419551, "grad_norm": 1.2620341777801514, "learning_rate": 1.8067303694312975e-07, "loss": 0.4553, "step": 42320 }, { "epoch": 0.9395012264014828, "grad_norm": 2.0406928062438965, "learning_rate": 1.8001382794089695e-07, "loss": 0.3012, "step": 42325 }, { "epoch": 0.9396122129610104, "grad_norm": 1.631622076034546, "learning_rate": 1.793558128196493e-07, "loss": 0.4641, "step": 42330 }, { "epoch": 0.9397231995205381, "grad_norm": 1.1621760129928589, "learning_rate": 1.786989916593862e-07, "loss": 0.3177, "step": 42335 }, { "epoch": 0.9398341860800657, "grad_norm": 1.408253788948059, "learning_rate": 1.780433645399615e-07, "loss": 0.4708, "step": 42340 }, { "epoch": 0.9399451726395933, "grad_norm": 0.9971133470535278, "learning_rate": 1.773889315410837e-07, "loss": 0.3845, "step": 42345 }, { "epoch": 0.940056159199121, "grad_norm": 0.8766626715660095, "learning_rate": 1.7673569274231805e-07, "loss": 0.3939, "step": 42350 }, { "epoch": 0.9401671457586487, "grad_norm": 1.2505395412445068, "learning_rate": 1.76083648223081e-07, "loss": 0.5388, "step": 42355 }, { "epoch": 0.9402781323181763, "grad_norm": 1.0482748746871948, "learning_rate": 1.754327980626491e-07, "loss": 0.4699, "step": 42360 }, { "epoch": 0.9403891188777039, "grad_norm": 0.8574467301368713, "learning_rate": 1.7478314234014693e-07, "loss": 0.4789, "step": 42365 }, { "epoch": 0.9405001054372315, "grad_norm": 1.6569310426712036, "learning_rate": 1.74134681134559e-07, "loss": 0.3876, "step": 42370 }, { "epoch": 0.9406110919967592, "grad_norm": 0.793308675289154, "learning_rate": 1.734874145247245e-07, "loss": 0.2943, "step": 42375 }, { "epoch": 0.9407220785562869, "grad_norm": 1.8770109415054321, "learning_rate": 1.728413425893327e-07, "loss": 0.313, "step": 42380 }, { "epoch": 0.9408330651158144, "grad_norm": 0.7877984642982483, "learning_rate": 1.7219646540693414e-07, "loss": 0.3232, "step": 42385 }, { "epoch": 0.9409440516753421, "grad_norm": 1.603267788887024, "learning_rate": 1.7155278305592938e-07, "loss": 0.3759, "step": 42390 }, { "epoch": 0.9410550382348698, "grad_norm": 0.926700234413147, "learning_rate": 1.7091029561457473e-07, "loss": 0.252, "step": 42395 }, { "epoch": 0.9411660247943974, "grad_norm": 1.335512638092041, "learning_rate": 1.7026900316098217e-07, "loss": 0.3411, "step": 42400 }, { "epoch": 0.941277011353925, "grad_norm": 1.0387749671936035, "learning_rate": 1.696289057731182e-07, "loss": 0.2486, "step": 42405 }, { "epoch": 0.9413879979134527, "grad_norm": 1.5329840183258057, "learning_rate": 1.689900035288028e-07, "loss": 0.5884, "step": 42410 }, { "epoch": 0.9414989844729803, "grad_norm": 1.8901313543319702, "learning_rate": 1.6835229650571272e-07, "loss": 0.3843, "step": 42415 }, { "epoch": 0.941609971032508, "grad_norm": 1.305861473083496, "learning_rate": 1.6771578478137818e-07, "loss": 0.4051, "step": 42420 }, { "epoch": 0.9417209575920356, "grad_norm": 1.112451434135437, "learning_rate": 1.6708046843318172e-07, "loss": 0.2721, "step": 42425 }, { "epoch": 0.9418319441515632, "grad_norm": 0.6873188018798828, "learning_rate": 1.6644634753836598e-07, "loss": 0.2981, "step": 42430 }, { "epoch": 0.9419429307110909, "grad_norm": 1.1935917139053345, "learning_rate": 1.6581342217402485e-07, "loss": 0.2242, "step": 42435 }, { "epoch": 0.9420539172706185, "grad_norm": 1.4765454530715942, "learning_rate": 1.6518169241710568e-07, "loss": 0.4643, "step": 42440 }, { "epoch": 0.9421649038301462, "grad_norm": 1.2573107481002808, "learning_rate": 1.645511583444126e-07, "loss": 0.5275, "step": 42445 }, { "epoch": 0.9422758903896739, "grad_norm": 2.344226837158203, "learning_rate": 1.6392182003260427e-07, "loss": 0.395, "step": 42450 }, { "epoch": 0.9423868769492014, "grad_norm": 2.1406664848327637, "learning_rate": 1.6329367755819503e-07, "loss": 0.48, "step": 42455 }, { "epoch": 0.9424978635087291, "grad_norm": 2.3771812915802, "learning_rate": 1.6266673099754825e-07, "loss": 0.2588, "step": 42460 }, { "epoch": 0.9426088500682568, "grad_norm": 1.4632244110107422, "learning_rate": 1.6204098042689076e-07, "loss": 0.4524, "step": 42465 }, { "epoch": 0.9427198366277844, "grad_norm": 1.293175458908081, "learning_rate": 1.6141642592229611e-07, "loss": 0.4124, "step": 42470 }, { "epoch": 0.942830823187312, "grad_norm": 1.3106547594070435, "learning_rate": 1.6079306755969582e-07, "loss": 0.4071, "step": 42475 }, { "epoch": 0.9429418097468396, "grad_norm": 0.9079194068908691, "learning_rate": 1.601709054148759e-07, "loss": 0.2636, "step": 42480 }, { "epoch": 0.9430527963063673, "grad_norm": 0.9949849247932434, "learning_rate": 1.595499395634781e-07, "loss": 0.4341, "step": 42485 }, { "epoch": 0.943163782865895, "grad_norm": 1.311824917793274, "learning_rate": 1.5893017008099532e-07, "loss": 0.325, "step": 42490 }, { "epoch": 0.9432747694254225, "grad_norm": 1.0263524055480957, "learning_rate": 1.5831159704277955e-07, "loss": 0.3093, "step": 42495 }, { "epoch": 0.9433857559849502, "grad_norm": 1.7075735330581665, "learning_rate": 1.5769422052403172e-07, "loss": 0.3251, "step": 42500 }, { "epoch": 0.9434967425444779, "grad_norm": 1.4891036748886108, "learning_rate": 1.5707804059981403e-07, "loss": 0.3352, "step": 42505 }, { "epoch": 0.9436077291040055, "grad_norm": 0.8496419191360474, "learning_rate": 1.5646305734503541e-07, "loss": 0.3912, "step": 42510 }, { "epoch": 0.9437187156635332, "grad_norm": 0.9281029105186462, "learning_rate": 1.558492708344661e-07, "loss": 0.299, "step": 42515 }, { "epoch": 0.9438297022230608, "grad_norm": 0.8617515563964844, "learning_rate": 1.552366811427286e-07, "loss": 0.4132, "step": 42520 }, { "epoch": 0.9439406887825884, "grad_norm": 1.8958934545516968, "learning_rate": 1.546252883442978e-07, "loss": 0.4692, "step": 42525 }, { "epoch": 0.9440516753421161, "grad_norm": 1.008283257484436, "learning_rate": 1.5401509251350643e-07, "loss": 0.3475, "step": 42530 }, { "epoch": 0.9441626619016437, "grad_norm": 1.0553314685821533, "learning_rate": 1.5340609372453852e-07, "loss": 0.2833, "step": 42535 }, { "epoch": 0.9442736484611713, "grad_norm": 0.5733352899551392, "learning_rate": 1.5279829205143482e-07, "loss": 0.3802, "step": 42540 }, { "epoch": 0.944384635020699, "grad_norm": 1.4484621286392212, "learning_rate": 1.5219168756809067e-07, "loss": 0.4502, "step": 42545 }, { "epoch": 0.9444956215802266, "grad_norm": 2.533099412918091, "learning_rate": 1.5158628034825373e-07, "loss": 0.3167, "step": 42550 }, { "epoch": 0.9446066081397543, "grad_norm": 1.320468783378601, "learning_rate": 1.509820704655285e-07, "loss": 0.4806, "step": 42555 }, { "epoch": 0.944717594699282, "grad_norm": 1.3541382551193237, "learning_rate": 1.5037905799337062e-07, "loss": 0.4546, "step": 42560 }, { "epoch": 0.9448285812588095, "grad_norm": 0.7527419924736023, "learning_rate": 1.4977724300509367e-07, "loss": 0.3497, "step": 42565 }, { "epoch": 0.9449395678183372, "grad_norm": 1.0765899419784546, "learning_rate": 1.491766255738658e-07, "loss": 0.4331, "step": 42570 }, { "epoch": 0.9450505543778649, "grad_norm": 0.9651219248771667, "learning_rate": 1.485772057727064e-07, "loss": 0.2916, "step": 42575 }, { "epoch": 0.9451615409373925, "grad_norm": 0.8204355239868164, "learning_rate": 1.479789836744916e-07, "loss": 0.4547, "step": 42580 }, { "epoch": 0.9452725274969201, "grad_norm": 0.870439350605011, "learning_rate": 1.4738195935194987e-07, "loss": 0.3729, "step": 42585 }, { "epoch": 0.9453835140564477, "grad_norm": 0.8403009176254272, "learning_rate": 1.4678613287766653e-07, "loss": 0.2963, "step": 42590 }, { "epoch": 0.9454945006159754, "grad_norm": 0.7220903635025024, "learning_rate": 1.4619150432408025e-07, "loss": 0.3095, "step": 42595 }, { "epoch": 0.9456054871755031, "grad_norm": 1.0451745986938477, "learning_rate": 1.4559807376348324e-07, "loss": 0.4031, "step": 42600 }, { "epoch": 0.9457164737350306, "grad_norm": 1.8504866361618042, "learning_rate": 1.450058412680244e-07, "loss": 0.4052, "step": 42605 }, { "epoch": 0.9458274602945583, "grad_norm": 1.066005825996399, "learning_rate": 1.4441480690970399e-07, "loss": 0.3125, "step": 42610 }, { "epoch": 0.945938446854086, "grad_norm": 1.2941941022872925, "learning_rate": 1.4382497076037783e-07, "loss": 0.4924, "step": 42615 }, { "epoch": 0.9460494334136136, "grad_norm": 1.210545539855957, "learning_rate": 1.432363328917574e-07, "loss": 0.4056, "step": 42620 }, { "epoch": 0.9461604199731413, "grad_norm": 1.2813447713851929, "learning_rate": 1.4264889337540556e-07, "loss": 0.4493, "step": 42625 }, { "epoch": 0.946271406532669, "grad_norm": 1.060917854309082, "learning_rate": 1.4206265228274285e-07, "loss": 0.2476, "step": 42630 }, { "epoch": 0.9463823930921965, "grad_norm": 1.3904987573623657, "learning_rate": 1.4147760968504232e-07, "loss": 0.3694, "step": 42635 }, { "epoch": 0.9464933796517242, "grad_norm": 1.623928427696228, "learning_rate": 1.4089376565343037e-07, "loss": 0.4551, "step": 42640 }, { "epoch": 0.9466043662112518, "grad_norm": 1.5032771825790405, "learning_rate": 1.4031112025889028e-07, "loss": 0.2393, "step": 42645 }, { "epoch": 0.9467153527707795, "grad_norm": 1.5255825519561768, "learning_rate": 1.397296735722564e-07, "loss": 0.4987, "step": 42650 }, { "epoch": 0.9468263393303071, "grad_norm": 1.5206665992736816, "learning_rate": 1.391494256642212e-07, "loss": 0.4196, "step": 42655 }, { "epoch": 0.9469373258898347, "grad_norm": 1.3917713165283203, "learning_rate": 1.3857037660532703e-07, "loss": 0.3997, "step": 42660 }, { "epoch": 0.9470483124493624, "grad_norm": 1.3763508796691895, "learning_rate": 1.3799252646597428e-07, "loss": 0.2943, "step": 42665 }, { "epoch": 0.9471592990088901, "grad_norm": 0.9568051695823669, "learning_rate": 1.3741587531641566e-07, "loss": 0.2781, "step": 42670 }, { "epoch": 0.9472702855684176, "grad_norm": 1.4659420251846313, "learning_rate": 1.368404232267584e-07, "loss": 0.3767, "step": 42675 }, { "epoch": 0.9473812721279453, "grad_norm": 0.5646659135818481, "learning_rate": 1.362661702669632e-07, "loss": 0.3299, "step": 42680 }, { "epoch": 0.947492258687473, "grad_norm": 1.633173942565918, "learning_rate": 1.3569311650684757e-07, "loss": 0.5553, "step": 42685 }, { "epoch": 0.9476032452470006, "grad_norm": 1.2178746461868286, "learning_rate": 1.351212620160802e-07, "loss": 0.6447, "step": 42690 }, { "epoch": 0.9477142318065283, "grad_norm": 0.9479925036430359, "learning_rate": 1.3455060686418663e-07, "loss": 0.3073, "step": 42695 }, { "epoch": 0.9478252183660558, "grad_norm": 0.6817415356636047, "learning_rate": 1.3398115112054243e-07, "loss": 0.3668, "step": 42700 }, { "epoch": 0.9479362049255835, "grad_norm": 1.6585414409637451, "learning_rate": 1.3341289485438336e-07, "loss": 0.3634, "step": 42705 }, { "epoch": 0.9480471914851112, "grad_norm": 1.1411597728729248, "learning_rate": 1.32845838134793e-07, "loss": 0.3535, "step": 42710 }, { "epoch": 0.9481581780446388, "grad_norm": 1.1587713956832886, "learning_rate": 1.3227998103071516e-07, "loss": 0.4131, "step": 42715 }, { "epoch": 0.9482691646041664, "grad_norm": 3.019202709197998, "learning_rate": 1.3171532361094252e-07, "loss": 0.4121, "step": 42720 }, { "epoch": 0.9483801511636941, "grad_norm": 1.5659027099609375, "learning_rate": 1.3115186594412576e-07, "loss": 0.4875, "step": 42725 }, { "epoch": 0.9484911377232217, "grad_norm": 1.1905382871627808, "learning_rate": 1.3058960809876676e-07, "loss": 0.3339, "step": 42730 }, { "epoch": 0.9486021242827494, "grad_norm": 1.0344101190567017, "learning_rate": 1.3002855014322413e-07, "loss": 0.2769, "step": 42735 }, { "epoch": 0.948713110842277, "grad_norm": 0.7989633083343506, "learning_rate": 1.2946869214570888e-07, "loss": 0.3921, "step": 42740 }, { "epoch": 0.9488240974018046, "grad_norm": 1.0567229986190796, "learning_rate": 1.2891003417428772e-07, "loss": 0.4605, "step": 42745 }, { "epoch": 0.9489350839613323, "grad_norm": 0.9074198007583618, "learning_rate": 1.2835257629687847e-07, "loss": 0.3401, "step": 42750 }, { "epoch": 0.9490460705208599, "grad_norm": 1.9332501888275146, "learning_rate": 1.2779631858125696e-07, "loss": 0.2172, "step": 42755 }, { "epoch": 0.9491570570803876, "grad_norm": 0.7035365700721741, "learning_rate": 1.272412610950502e-07, "loss": 0.4515, "step": 42760 }, { "epoch": 0.9492680436399152, "grad_norm": 1.057877540588379, "learning_rate": 1.2668740390573975e-07, "loss": 0.4552, "step": 42765 }, { "epoch": 0.9493790301994428, "grad_norm": 0.7197465896606445, "learning_rate": 1.2613474708066175e-07, "loss": 0.3718, "step": 42770 }, { "epoch": 0.9494900167589705, "grad_norm": 0.8760359883308411, "learning_rate": 1.2558329068700803e-07, "loss": 0.2546, "step": 42775 }, { "epoch": 0.9496010033184982, "grad_norm": 1.2623904943466187, "learning_rate": 1.250330347918205e-07, "loss": 0.4187, "step": 42780 }, { "epoch": 0.9497119898780257, "grad_norm": 1.0229982137680054, "learning_rate": 1.244839794620001e-07, "loss": 0.4495, "step": 42785 }, { "epoch": 0.9498229764375534, "grad_norm": 0.8693443536758423, "learning_rate": 1.2393612476429562e-07, "loss": 0.2962, "step": 42790 }, { "epoch": 0.9499339629970811, "grad_norm": 1.0552219152450562, "learning_rate": 1.2338947076531603e-07, "loss": 0.4832, "step": 42795 }, { "epoch": 0.9500449495566087, "grad_norm": 1.2997872829437256, "learning_rate": 1.2284401753152153e-07, "loss": 0.4576, "step": 42800 }, { "epoch": 0.9501559361161364, "grad_norm": 1.9434880018234253, "learning_rate": 1.222997651292257e-07, "loss": 0.4407, "step": 42805 }, { "epoch": 0.9502669226756639, "grad_norm": 1.5652992725372314, "learning_rate": 1.2175671362459785e-07, "loss": 0.3417, "step": 42810 }, { "epoch": 0.9503779092351916, "grad_norm": 1.203678011894226, "learning_rate": 1.2121486308365848e-07, "loss": 0.3064, "step": 42815 }, { "epoch": 0.9504888957947193, "grad_norm": 0.8892753720283508, "learning_rate": 1.2067421357228605e-07, "loss": 0.3789, "step": 42820 }, { "epoch": 0.9505998823542469, "grad_norm": 1.4720929861068726, "learning_rate": 1.2013476515621014e-07, "loss": 0.3041, "step": 42825 }, { "epoch": 0.9507108689137745, "grad_norm": 0.9997137188911438, "learning_rate": 1.195965179010139e-07, "loss": 0.2007, "step": 42830 }, { "epoch": 0.9508218554733022, "grad_norm": 1.6503864526748657, "learning_rate": 1.1905947187213717e-07, "loss": 0.412, "step": 42835 }, { "epoch": 0.9509328420328298, "grad_norm": 1.0847293138504028, "learning_rate": 1.185236271348722e-07, "loss": 0.5052, "step": 42840 }, { "epoch": 0.9510438285923575, "grad_norm": 1.8282418251037598, "learning_rate": 1.1798898375436463e-07, "loss": 0.438, "step": 42845 }, { "epoch": 0.9511548151518852, "grad_norm": 1.196332573890686, "learning_rate": 1.1745554179561469e-07, "loss": 0.4065, "step": 42850 }, { "epoch": 0.9512658017114127, "grad_norm": 0.569940447807312, "learning_rate": 1.1692330132347607e-07, "loss": 0.3161, "step": 42855 }, { "epoch": 0.9513767882709404, "grad_norm": 1.002369999885559, "learning_rate": 1.1639226240265811e-07, "loss": 0.4974, "step": 42860 }, { "epoch": 0.951487774830468, "grad_norm": 1.0855084657669067, "learning_rate": 1.1586242509772139e-07, "loss": 0.2977, "step": 42865 }, { "epoch": 0.9515987613899957, "grad_norm": 1.2213571071624756, "learning_rate": 1.1533378947308216e-07, "loss": 0.4363, "step": 42870 }, { "epoch": 0.9517097479495233, "grad_norm": 1.488828420639038, "learning_rate": 1.1480635559301012e-07, "loss": 0.4341, "step": 42875 }, { "epoch": 0.9518207345090509, "grad_norm": 0.6738657355308533, "learning_rate": 1.1428012352162843e-07, "loss": 0.3108, "step": 42880 }, { "epoch": 0.9519317210685786, "grad_norm": 1.1900697946548462, "learning_rate": 1.137550933229159e-07, "loss": 0.4509, "step": 42885 }, { "epoch": 0.9520427076281063, "grad_norm": 1.0502153635025024, "learning_rate": 1.132312650607037e-07, "loss": 0.32, "step": 42890 }, { "epoch": 0.9521536941876338, "grad_norm": 0.9347466826438904, "learning_rate": 1.1270863879867644e-07, "loss": 0.3837, "step": 42895 }, { "epoch": 0.9522646807471615, "grad_norm": 1.7153974771499634, "learning_rate": 1.1218721460037218e-07, "loss": 0.5206, "step": 42900 }, { "epoch": 0.9523756673066892, "grad_norm": 1.7464630603790283, "learning_rate": 1.1166699252918578e-07, "loss": 0.1663, "step": 42905 }, { "epoch": 0.9524866538662168, "grad_norm": 1.6750175952911377, "learning_rate": 1.1114797264836441e-07, "loss": 0.4082, "step": 42910 }, { "epoch": 0.9525976404257445, "grad_norm": 1.6528713703155518, "learning_rate": 1.1063015502100761e-07, "loss": 0.4874, "step": 42915 }, { "epoch": 0.952708626985272, "grad_norm": 2.113939046859741, "learning_rate": 1.1011353971007055e-07, "loss": 0.4054, "step": 42920 }, { "epoch": 0.9528196135447997, "grad_norm": 1.6014636754989624, "learning_rate": 1.0959812677835968e-07, "loss": 0.3204, "step": 42925 }, { "epoch": 0.9529306001043274, "grad_norm": 1.902674913406372, "learning_rate": 1.0908391628854042e-07, "loss": 0.5774, "step": 42930 }, { "epoch": 0.953041586663855, "grad_norm": 1.0486432313919067, "learning_rate": 1.0857090830312612e-07, "loss": 0.3855, "step": 42935 }, { "epoch": 0.9531525732233826, "grad_norm": 1.0042614936828613, "learning_rate": 1.0805910288448795e-07, "loss": 0.2789, "step": 42940 }, { "epoch": 0.9532635597829103, "grad_norm": 0.6804906129837036, "learning_rate": 1.0754850009484841e-07, "loss": 0.3862, "step": 42945 }, { "epoch": 0.9533745463424379, "grad_norm": 1.6858739852905273, "learning_rate": 1.070390999962867e-07, "loss": 0.3043, "step": 42950 }, { "epoch": 0.9534855329019656, "grad_norm": 1.2860480546951294, "learning_rate": 1.0653090265073218e-07, "loss": 0.3668, "step": 42955 }, { "epoch": 0.9535965194614933, "grad_norm": 1.269942283630371, "learning_rate": 1.0602390811996988e-07, "loss": 0.44, "step": 42960 }, { "epoch": 0.9537075060210208, "grad_norm": 1.5455029010772705, "learning_rate": 1.0551811646563936e-07, "loss": 0.4378, "step": 42965 }, { "epoch": 0.9538184925805485, "grad_norm": 0.7402146458625793, "learning_rate": 1.0501352774923368e-07, "loss": 0.2378, "step": 42970 }, { "epoch": 0.9539294791400761, "grad_norm": 1.076019287109375, "learning_rate": 1.0451014203209708e-07, "loss": 0.3095, "step": 42975 }, { "epoch": 0.9540404656996038, "grad_norm": 1.0999020338058472, "learning_rate": 1.0400795937543062e-07, "loss": 0.398, "step": 42980 }, { "epoch": 0.9541514522591314, "grad_norm": 1.187849521636963, "learning_rate": 1.0350697984028768e-07, "loss": 0.4443, "step": 42985 }, { "epoch": 0.954262438818659, "grad_norm": 1.3116439580917358, "learning_rate": 1.030072034875762e-07, "loss": 0.327, "step": 42990 }, { "epoch": 0.9543734253781867, "grad_norm": 1.2499772310256958, "learning_rate": 1.0250863037805647e-07, "loss": 0.3766, "step": 42995 }, { "epoch": 0.9544844119377144, "grad_norm": 2.4240808486938477, "learning_rate": 1.0201126057234445e-07, "loss": 0.3844, "step": 43000 }, { "epoch": 0.954595398497242, "grad_norm": 0.8385963439941406, "learning_rate": 1.0151509413090843e-07, "loss": 0.2594, "step": 43005 }, { "epoch": 0.9547063850567696, "grad_norm": 0.9707930684089661, "learning_rate": 1.0102013111406905e-07, "loss": 0.2967, "step": 43010 }, { "epoch": 0.9548173716162973, "grad_norm": 0.9702897071838379, "learning_rate": 1.0052637158200484e-07, "loss": 0.2644, "step": 43015 }, { "epoch": 0.9549283581758249, "grad_norm": 1.0392128229141235, "learning_rate": 1.0003381559474335e-07, "loss": 0.3998, "step": 43020 }, { "epoch": 0.9550393447353526, "grad_norm": 2.132612705230713, "learning_rate": 9.954246321216887e-08, "loss": 0.5521, "step": 43025 }, { "epoch": 0.9551503312948801, "grad_norm": 1.4161665439605713, "learning_rate": 9.905231449401809e-08, "loss": 0.4327, "step": 43030 }, { "epoch": 0.9552613178544078, "grad_norm": 1.1672700643539429, "learning_rate": 9.856336949988221e-08, "loss": 0.4846, "step": 43035 }, { "epoch": 0.9553723044139355, "grad_norm": 1.3782694339752197, "learning_rate": 9.80756282892048e-08, "loss": 0.4533, "step": 43040 }, { "epoch": 0.9554832909734631, "grad_norm": 1.3480278253555298, "learning_rate": 9.758909092128289e-08, "loss": 0.4974, "step": 43045 }, { "epoch": 0.9555942775329908, "grad_norm": 0.8139400482177734, "learning_rate": 9.710375745527023e-08, "loss": 0.3991, "step": 43050 }, { "epoch": 0.9557052640925184, "grad_norm": 1.030752182006836, "learning_rate": 9.661962795017189e-08, "loss": 0.4059, "step": 43055 }, { "epoch": 0.955816250652046, "grad_norm": 1.4513559341430664, "learning_rate": 9.613670246484408e-08, "loss": 0.4464, "step": 43060 }, { "epoch": 0.9559272372115737, "grad_norm": 2.3847713470458984, "learning_rate": 9.565498105800208e-08, "loss": 0.2873, "step": 43065 }, { "epoch": 0.9560382237711014, "grad_norm": 1.3109573125839233, "learning_rate": 9.517446378821016e-08, "loss": 0.4918, "step": 43070 }, { "epoch": 0.9561492103306289, "grad_norm": 1.3552931547164917, "learning_rate": 9.469515071388935e-08, "loss": 0.352, "step": 43075 }, { "epoch": 0.9562601968901566, "grad_norm": 1.0919387340545654, "learning_rate": 9.421704189331193e-08, "loss": 0.404, "step": 43080 }, { "epoch": 0.9563711834496842, "grad_norm": 2.247760534286499, "learning_rate": 9.374013738460586e-08, "loss": 0.3161, "step": 43085 }, { "epoch": 0.9564821700092119, "grad_norm": 2.0436818599700928, "learning_rate": 9.32644372457503e-08, "loss": 0.3053, "step": 43090 }, { "epoch": 0.9565931565687396, "grad_norm": 2.8228936195373535, "learning_rate": 9.278994153457898e-08, "loss": 0.3766, "step": 43095 }, { "epoch": 0.9567041431282671, "grad_norm": 1.2718102931976318, "learning_rate": 9.231665030878134e-08, "loss": 0.4939, "step": 43100 }, { "epoch": 0.9568151296877948, "grad_norm": 1.6868155002593994, "learning_rate": 9.184456362589799e-08, "loss": 0.3679, "step": 43105 }, { "epoch": 0.9569261162473225, "grad_norm": 0.9345059990882874, "learning_rate": 9.137368154332304e-08, "loss": 0.3236, "step": 43110 }, { "epoch": 0.9570371028068501, "grad_norm": 2.1842598915100098, "learning_rate": 9.090400411830403e-08, "loss": 0.5163, "step": 43115 }, { "epoch": 0.9571480893663777, "grad_norm": 0.8142204880714417, "learning_rate": 9.043553140794414e-08, "loss": 0.3814, "step": 43120 }, { "epoch": 0.9572590759259054, "grad_norm": 0.9440129995346069, "learning_rate": 8.996826346919896e-08, "loss": 0.3837, "step": 43125 }, { "epoch": 0.957370062485433, "grad_norm": 1.2841126918792725, "learning_rate": 8.950220035887636e-08, "loss": 0.3815, "step": 43130 }, { "epoch": 0.9574810490449607, "grad_norm": 0.7791603207588196, "learning_rate": 8.903734213363769e-08, "loss": 0.2432, "step": 43135 }, { "epoch": 0.9575920356044882, "grad_norm": 1.2277582883834839, "learning_rate": 8.857368885000217e-08, "loss": 0.4522, "step": 43140 }, { "epoch": 0.9577030221640159, "grad_norm": 1.1917132139205933, "learning_rate": 8.811124056433584e-08, "loss": 0.4469, "step": 43145 }, { "epoch": 0.9578140087235436, "grad_norm": 0.8144562244415283, "learning_rate": 8.764999733286372e-08, "loss": 0.3648, "step": 43150 }, { "epoch": 0.9579249952830712, "grad_norm": 0.9421244263648987, "learning_rate": 8.718995921166096e-08, "loss": 0.3401, "step": 43155 }, { "epoch": 0.9580359818425989, "grad_norm": 0.9399101138114929, "learning_rate": 8.673112625665725e-08, "loss": 0.2573, "step": 43160 }, { "epoch": 0.9581469684021265, "grad_norm": 0.9864373803138733, "learning_rate": 8.627349852363798e-08, "loss": 0.3559, "step": 43165 }, { "epoch": 0.9582579549616541, "grad_norm": 0.36230021715164185, "learning_rate": 8.581707606823863e-08, "loss": 0.3169, "step": 43170 }, { "epoch": 0.9583689415211818, "grad_norm": 1.521687388420105, "learning_rate": 8.536185894594928e-08, "loss": 0.4052, "step": 43175 }, { "epoch": 0.9584799280807095, "grad_norm": 1.3936021327972412, "learning_rate": 8.490784721211454e-08, "loss": 0.3974, "step": 43180 }, { "epoch": 0.958590914640237, "grad_norm": 1.47688889503479, "learning_rate": 8.445504092193024e-08, "loss": 0.1766, "step": 43185 }, { "epoch": 0.9587019011997647, "grad_norm": 1.2802175283432007, "learning_rate": 8.400344013044793e-08, "loss": 0.3253, "step": 43190 }, { "epoch": 0.9588128877592923, "grad_norm": 1.3018136024475098, "learning_rate": 8.355304489257254e-08, "loss": 0.3899, "step": 43195 }, { "epoch": 0.95892387431882, "grad_norm": 1.7593441009521484, "learning_rate": 8.31038552630603e-08, "loss": 0.4278, "step": 43200 }, { "epoch": 0.9590348608783477, "grad_norm": 0.9603082537651062, "learning_rate": 8.265587129652308e-08, "loss": 0.4065, "step": 43205 }, { "epoch": 0.9591458474378752, "grad_norm": 1.1551685333251953, "learning_rate": 8.220909304742397e-08, "loss": 0.4648, "step": 43210 }, { "epoch": 0.9592568339974029, "grad_norm": 1.3292489051818848, "learning_rate": 8.176352057008174e-08, "loss": 0.3998, "step": 43215 }, { "epoch": 0.9593678205569306, "grad_norm": 0.9885017275810242, "learning_rate": 8.131915391866752e-08, "loss": 0.2783, "step": 43220 }, { "epoch": 0.9594788071164582, "grad_norm": 0.9839476346969604, "learning_rate": 8.087599314720696e-08, "loss": 0.2947, "step": 43225 }, { "epoch": 0.9595897936759858, "grad_norm": 2.1047043800354004, "learning_rate": 8.043403830957586e-08, "loss": 0.4337, "step": 43230 }, { "epoch": 0.9597007802355135, "grad_norm": 1.219032645225525, "learning_rate": 7.99932894595079e-08, "loss": 0.3295, "step": 43235 }, { "epoch": 0.9598117667950411, "grad_norm": 1.1567282676696777, "learning_rate": 7.95537466505858e-08, "loss": 0.2751, "step": 43240 }, { "epoch": 0.9599227533545688, "grad_norm": 0.9363585114479065, "learning_rate": 7.911540993624789e-08, "loss": 0.3384, "step": 43245 }, { "epoch": 0.9600337399140964, "grad_norm": 1.3244959115982056, "learning_rate": 7.867827936978711e-08, "loss": 0.324, "step": 43250 }, { "epoch": 0.960144726473624, "grad_norm": 1.3957321643829346, "learning_rate": 7.824235500434762e-08, "loss": 0.5184, "step": 43255 }, { "epoch": 0.9602557130331517, "grad_norm": 1.3671963214874268, "learning_rate": 7.780763689292814e-08, "loss": 0.4634, "step": 43260 }, { "epoch": 0.9603666995926793, "grad_norm": 0.523896336555481, "learning_rate": 7.73741250883786e-08, "loss": 0.2899, "step": 43265 }, { "epoch": 0.960477686152207, "grad_norm": 0.655168354511261, "learning_rate": 7.694181964340574e-08, "loss": 0.289, "step": 43270 }, { "epoch": 0.9605886727117346, "grad_norm": 2.0806264877319336, "learning_rate": 7.651072061056752e-08, "loss": 0.3962, "step": 43275 }, { "epoch": 0.9606996592712622, "grad_norm": 0.921379566192627, "learning_rate": 7.608082804227424e-08, "loss": 0.3671, "step": 43280 }, { "epoch": 0.9608106458307899, "grad_norm": 1.1921287775039673, "learning_rate": 7.565214199079185e-08, "loss": 0.382, "step": 43285 }, { "epoch": 0.9609216323903176, "grad_norm": 0.6474278569221497, "learning_rate": 7.522466250823867e-08, "loss": 0.2865, "step": 43290 }, { "epoch": 0.9610326189498452, "grad_norm": 1.0548124313354492, "learning_rate": 7.479838964658648e-08, "loss": 0.4609, "step": 43295 }, { "epoch": 0.9611436055093728, "grad_norm": 1.597723126411438, "learning_rate": 7.437332345765825e-08, "loss": 0.4392, "step": 43300 }, { "epoch": 0.9612545920689004, "grad_norm": 0.8780395984649658, "learning_rate": 7.394946399313374e-08, "loss": 0.2615, "step": 43305 }, { "epoch": 0.9613655786284281, "grad_norm": 1.7681487798690796, "learning_rate": 7.352681130454398e-08, "loss": 0.4348, "step": 43310 }, { "epoch": 0.9614765651879558, "grad_norm": 1.4514552354812622, "learning_rate": 7.310536544327452e-08, "loss": 0.3698, "step": 43315 }, { "epoch": 0.9615875517474833, "grad_norm": 1.1815019845962524, "learning_rate": 7.268512646056213e-08, "loss": 0.3915, "step": 43320 }, { "epoch": 0.961698538307011, "grad_norm": 1.2830376625061035, "learning_rate": 7.226609440749821e-08, "loss": 0.3866, "step": 43325 }, { "epoch": 0.9618095248665387, "grad_norm": 1.001721978187561, "learning_rate": 7.184826933502642e-08, "loss": 0.2697, "step": 43330 }, { "epoch": 0.9619205114260663, "grad_norm": 1.2729138135910034, "learning_rate": 7.143165129394725e-08, "loss": 0.6466, "step": 43335 }, { "epoch": 0.962031497985594, "grad_norm": 0.32785171270370483, "learning_rate": 7.101624033490906e-08, "loss": 0.3248, "step": 43340 }, { "epoch": 0.9621424845451216, "grad_norm": 0.7534942030906677, "learning_rate": 7.060203650841813e-08, "loss": 0.3856, "step": 43345 }, { "epoch": 0.9622534711046492, "grad_norm": 1.3082144260406494, "learning_rate": 7.018903986483083e-08, "loss": 0.3323, "step": 43350 }, { "epoch": 0.9623644576641769, "grad_norm": 1.194825530052185, "learning_rate": 6.977725045435702e-08, "loss": 0.3502, "step": 43355 }, { "epoch": 0.9624754442237045, "grad_norm": 1.8124589920043945, "learning_rate": 6.936666832706329e-08, "loss": 0.1988, "step": 43360 }, { "epoch": 0.9625864307832321, "grad_norm": 1.005189061164856, "learning_rate": 6.895729353286418e-08, "loss": 0.4766, "step": 43365 }, { "epoch": 0.9626974173427598, "grad_norm": 1.0299992561340332, "learning_rate": 6.854912612153097e-08, "loss": 0.4319, "step": 43370 }, { "epoch": 0.9628084039022874, "grad_norm": 1.143924355506897, "learning_rate": 6.814216614268843e-08, "loss": 0.4616, "step": 43375 }, { "epoch": 0.9629193904618151, "grad_norm": 1.1192833185195923, "learning_rate": 6.773641364581141e-08, "loss": 0.4629, "step": 43380 }, { "epoch": 0.9630303770213428, "grad_norm": 1.9731518030166626, "learning_rate": 6.733186868023156e-08, "loss": 0.3022, "step": 43385 }, { "epoch": 0.9631413635808703, "grad_norm": 1.9478931427001953, "learning_rate": 6.692853129513177e-08, "loss": 0.503, "step": 43390 }, { "epoch": 0.963252350140398, "grad_norm": 1.0108568668365479, "learning_rate": 6.652640153954836e-08, "loss": 0.4192, "step": 43395 }, { "epoch": 0.9633633366999257, "grad_norm": 1.6158348321914673, "learning_rate": 6.612547946237003e-08, "loss": 0.4582, "step": 43400 }, { "epoch": 0.9634743232594533, "grad_norm": 1.3156096935272217, "learning_rate": 6.572576511234108e-08, "loss": 0.3058, "step": 43405 }, { "epoch": 0.9635853098189809, "grad_norm": 0.9295489192008972, "learning_rate": 6.5327258538056e-08, "loss": 0.3073, "step": 43410 }, { "epoch": 0.9636962963785085, "grad_norm": 1.3966041803359985, "learning_rate": 6.492995978796379e-08, "loss": 0.4236, "step": 43415 }, { "epoch": 0.9638072829380362, "grad_norm": 1.1565799713134766, "learning_rate": 6.453386891036917e-08, "loss": 0.266, "step": 43420 }, { "epoch": 0.9639182694975639, "grad_norm": 1.27523934841156, "learning_rate": 6.413898595342472e-08, "loss": 0.3808, "step": 43425 }, { "epoch": 0.9640292560570914, "grad_norm": 1.3923174142837524, "learning_rate": 6.374531096514091e-08, "loss": 0.4183, "step": 43430 }, { "epoch": 0.9641402426166191, "grad_norm": 1.050148367881775, "learning_rate": 6.335284399337726e-08, "loss": 0.4404, "step": 43435 }, { "epoch": 0.9642512291761468, "grad_norm": 0.7960019111633301, "learning_rate": 6.296158508585115e-08, "loss": 0.353, "step": 43440 }, { "epoch": 0.9643622157356744, "grad_norm": 0.8833103179931641, "learning_rate": 6.257153429012897e-08, "loss": 0.424, "step": 43445 }, { "epoch": 0.9644732022952021, "grad_norm": 1.2397501468658447, "learning_rate": 6.218269165363166e-08, "loss": 0.4319, "step": 43450 }, { "epoch": 0.9645841888547297, "grad_norm": 1.011025309562683, "learning_rate": 6.179505722363367e-08, "loss": 0.336, "step": 43455 }, { "epoch": 0.9646951754142573, "grad_norm": 0.9096837043762207, "learning_rate": 6.140863104726391e-08, "loss": 0.3744, "step": 43460 }, { "epoch": 0.964806161973785, "grad_norm": 1.0249688625335693, "learning_rate": 6.10234131715004e-08, "loss": 0.4071, "step": 43465 }, { "epoch": 0.9649171485333126, "grad_norm": 1.0655795335769653, "learning_rate": 6.063940364317677e-08, "loss": 0.5957, "step": 43470 }, { "epoch": 0.9650281350928402, "grad_norm": 1.1619106531143188, "learning_rate": 6.025660250898124e-08, "loss": 0.4282, "step": 43475 }, { "epoch": 0.9651391216523679, "grad_norm": 0.9888255000114441, "learning_rate": 5.987500981545325e-08, "loss": 0.3985, "step": 43480 }, { "epoch": 0.9652501082118955, "grad_norm": 1.5950113534927368, "learning_rate": 5.9494625608984555e-08, "loss": 0.3052, "step": 43485 }, { "epoch": 0.9653610947714232, "grad_norm": 1.6013449430465698, "learning_rate": 5.9115449935821526e-08, "loss": 0.2657, "step": 43490 }, { "epoch": 0.9654720813309509, "grad_norm": 1.5253368616104126, "learning_rate": 5.873748284206171e-08, "loss": 0.4415, "step": 43495 }, { "epoch": 0.9655830678904784, "grad_norm": 1.13545823097229, "learning_rate": 5.836072437365947e-08, "loss": 0.4705, "step": 43500 }, { "epoch": 0.9656940544500061, "grad_norm": 1.5528640747070312, "learning_rate": 5.798517457641817e-08, "loss": 0.3194, "step": 43505 }, { "epoch": 0.9658050410095338, "grad_norm": 1.22503662109375, "learning_rate": 5.7610833495996833e-08, "loss": 0.3924, "step": 43510 }, { "epoch": 0.9659160275690614, "grad_norm": 1.2438831329345703, "learning_rate": 5.7237701177906836e-08, "loss": 0.3593, "step": 43515 }, { "epoch": 0.966027014128589, "grad_norm": 1.5987977981567383, "learning_rate": 5.686577766751078e-08, "loss": 0.2376, "step": 43520 }, { "epoch": 0.9661380006881166, "grad_norm": 1.195892333984375, "learning_rate": 5.649506301002583e-08, "loss": 0.4138, "step": 43525 }, { "epoch": 0.9662489872476443, "grad_norm": 1.0989891290664673, "learning_rate": 5.612555725052482e-08, "loss": 0.3425, "step": 43530 }, { "epoch": 0.966359973807172, "grad_norm": 1.1684006452560425, "learning_rate": 5.5757260433928485e-08, "loss": 0.5131, "step": 43535 }, { "epoch": 0.9664709603666995, "grad_norm": 1.0840319395065308, "learning_rate": 5.539017260501545e-08, "loss": 0.4428, "step": 43540 }, { "epoch": 0.9665819469262272, "grad_norm": 1.027111530303955, "learning_rate": 5.502429380841223e-08, "loss": 0.4958, "step": 43545 }, { "epoch": 0.9666929334857549, "grad_norm": 1.1387192010879517, "learning_rate": 5.465962408860326e-08, "loss": 0.4502, "step": 43550 }, { "epoch": 0.9668039200452825, "grad_norm": 1.6442123651504517, "learning_rate": 5.429616348992195e-08, "loss": 0.3364, "step": 43555 }, { "epoch": 0.9669149066048102, "grad_norm": 1.353895902633667, "learning_rate": 5.393391205655851e-08, "loss": 0.3276, "step": 43560 }, { "epoch": 0.9670258931643378, "grad_norm": 1.4506542682647705, "learning_rate": 5.357286983255439e-08, "loss": 0.4487, "step": 43565 }, { "epoch": 0.9671368797238654, "grad_norm": 0.8998032808303833, "learning_rate": 5.3213036861801125e-08, "loss": 0.3369, "step": 43570 }, { "epoch": 0.9672478662833931, "grad_norm": 2.011275291442871, "learning_rate": 5.285441318804929e-08, "loss": 0.3461, "step": 43575 }, { "epoch": 0.9673588528429207, "grad_norm": 0.7465802431106567, "learning_rate": 5.249699885489734e-08, "loss": 0.3433, "step": 43580 }, { "epoch": 0.9674698394024484, "grad_norm": 0.9246371388435364, "learning_rate": 5.2140793905799405e-08, "loss": 0.4466, "step": 43585 }, { "epoch": 0.967580825961976, "grad_norm": 0.8375202417373657, "learning_rate": 5.178579838406084e-08, "loss": 0.3347, "step": 43590 }, { "epoch": 0.9676918125215036, "grad_norm": 0.8115552663803101, "learning_rate": 5.143201233284156e-08, "loss": 0.3079, "step": 43595 }, { "epoch": 0.9678027990810313, "grad_norm": 0.6515522599220276, "learning_rate": 5.1079435795152735e-08, "loss": 0.3216, "step": 43600 }, { "epoch": 0.967913785640559, "grad_norm": 0.7174926400184631, "learning_rate": 5.072806881386005e-08, "loss": 0.2816, "step": 43605 }, { "epoch": 0.9680247722000865, "grad_norm": 1.0490044355392456, "learning_rate": 5.037791143168158e-08, "loss": 0.2679, "step": 43610 }, { "epoch": 0.9681357587596142, "grad_norm": 1.0273045301437378, "learning_rate": 5.0028963691188813e-08, "loss": 0.3042, "step": 43615 }, { "epoch": 0.9682467453191419, "grad_norm": 1.9209235906600952, "learning_rate": 4.9681225634804484e-08, "loss": 0.5211, "step": 43620 }, { "epoch": 0.9683577318786695, "grad_norm": 1.4809397459030151, "learning_rate": 4.933469730480589e-08, "loss": 0.4624, "step": 43625 }, { "epoch": 0.9684687184381972, "grad_norm": 1.1139112710952759, "learning_rate": 4.8989378743322654e-08, "loss": 0.289, "step": 43630 }, { "epoch": 0.9685797049977247, "grad_norm": 1.4791672229766846, "learning_rate": 4.8645269992337875e-08, "loss": 0.4627, "step": 43635 }, { "epoch": 0.9686906915572524, "grad_norm": 1.950037956237793, "learning_rate": 4.830237109368696e-08, "loss": 0.4771, "step": 43640 }, { "epoch": 0.9688016781167801, "grad_norm": 1.644313097000122, "learning_rate": 4.79606820890588e-08, "loss": 0.3903, "step": 43645 }, { "epoch": 0.9689126646763077, "grad_norm": 1.6992888450622559, "learning_rate": 4.762020301999459e-08, "loss": 0.3884, "step": 43650 }, { "epoch": 0.9690236512358353, "grad_norm": 1.1619939804077148, "learning_rate": 4.7280933927886795e-08, "loss": 0.3372, "step": 43655 }, { "epoch": 0.969134637795363, "grad_norm": 1.012537956237793, "learning_rate": 4.694287485398574e-08, "loss": 0.4419, "step": 43660 }, { "epoch": 0.9692456243548906, "grad_norm": 1.0167009830474854, "learning_rate": 4.660602583938967e-08, "loss": 0.6154, "step": 43665 }, { "epoch": 0.9693566109144183, "grad_norm": 2.161961317062378, "learning_rate": 4.6270386925051366e-08, "loss": 0.3254, "step": 43670 }, { "epoch": 0.969467597473946, "grad_norm": 0.96853107213974, "learning_rate": 4.59359581517782e-08, "loss": 0.1922, "step": 43675 }, { "epoch": 0.9695785840334735, "grad_norm": 0.6753823161125183, "learning_rate": 4.560273956022654e-08, "loss": 0.5552, "step": 43680 }, { "epoch": 0.9696895705930012, "grad_norm": 1.753062129020691, "learning_rate": 4.527073119091063e-08, "loss": 0.3512, "step": 43685 }, { "epoch": 0.9698005571525288, "grad_norm": 1.2516409158706665, "learning_rate": 4.4939933084192646e-08, "loss": 0.392, "step": 43690 }, { "epoch": 0.9699115437120565, "grad_norm": 1.4465858936309814, "learning_rate": 4.461034528029151e-08, "loss": 0.4693, "step": 43695 }, { "epoch": 0.9700225302715841, "grad_norm": 0.8663288950920105, "learning_rate": 4.42819678192774e-08, "loss": 0.4261, "step": 43700 }, { "epoch": 0.9701335168311117, "grad_norm": 0.7888398766517639, "learning_rate": 4.395480074107172e-08, "loss": 0.4786, "step": 43705 }, { "epoch": 0.9702445033906394, "grad_norm": 0.7504032850265503, "learning_rate": 4.362884408545154e-08, "loss": 0.2153, "step": 43710 }, { "epoch": 0.9703554899501671, "grad_norm": 1.000174641609192, "learning_rate": 4.330409789204515e-08, "loss": 0.3833, "step": 43715 }, { "epoch": 0.9704664765096946, "grad_norm": 1.6008414030075073, "learning_rate": 4.298056220033542e-08, "loss": 0.1503, "step": 43720 }, { "epoch": 0.9705774630692223, "grad_norm": 1.3198015689849854, "learning_rate": 4.2658237049655325e-08, "loss": 0.3803, "step": 43725 }, { "epoch": 0.97068844962875, "grad_norm": 0.7263050079345703, "learning_rate": 4.2337122479191304e-08, "loss": 0.2082, "step": 43730 }, { "epoch": 0.9707994361882776, "grad_norm": 1.561052918434143, "learning_rate": 4.201721852798657e-08, "loss": 0.4365, "step": 43735 }, { "epoch": 0.9709104227478053, "grad_norm": 0.8215240836143494, "learning_rate": 4.169852523493001e-08, "loss": 0.2649, "step": 43740 }, { "epoch": 0.9710214093073328, "grad_norm": 0.9629057049751282, "learning_rate": 4.138104263877063e-08, "loss": 0.4287, "step": 43745 }, { "epoch": 0.9711323958668605, "grad_norm": 1.085941195487976, "learning_rate": 4.106477077810422e-08, "loss": 0.3626, "step": 43750 }, { "epoch": 0.9712433824263882, "grad_norm": 1.2793713808059692, "learning_rate": 4.0749709691383365e-08, "loss": 0.232, "step": 43755 }, { "epoch": 0.9713543689859158, "grad_norm": 1.0775913000106812, "learning_rate": 4.043585941691297e-08, "loss": 0.3176, "step": 43760 }, { "epoch": 0.9714653555454434, "grad_norm": 1.6739836931228638, "learning_rate": 4.012321999284918e-08, "loss": 0.3226, "step": 43765 }, { "epoch": 0.9715763421049711, "grad_norm": 0.8687372207641602, "learning_rate": 3.981179145720049e-08, "loss": 0.4759, "step": 43770 }, { "epoch": 0.9716873286644987, "grad_norm": 0.6673811078071594, "learning_rate": 3.950157384783104e-08, "loss": 0.2595, "step": 43775 }, { "epoch": 0.9717983152240264, "grad_norm": 0.8460555672645569, "learning_rate": 3.9192567202455125e-08, "loss": 0.3479, "step": 43780 }, { "epoch": 0.9719093017835541, "grad_norm": 1.246845006942749, "learning_rate": 3.8884771558640454e-08, "loss": 0.3967, "step": 43785 }, { "epoch": 0.9720202883430816, "grad_norm": 3.1427626609802246, "learning_rate": 3.8578186953808216e-08, "loss": 0.3284, "step": 43790 }, { "epoch": 0.9721312749026093, "grad_norm": 1.5429573059082031, "learning_rate": 3.827281342523304e-08, "loss": 0.2478, "step": 43795 }, { "epoch": 0.9722422614621369, "grad_norm": 1.5764998197555542, "learning_rate": 3.796865101003966e-08, "loss": 0.4668, "step": 43800 }, { "epoch": 0.9723532480216646, "grad_norm": 1.108802318572998, "learning_rate": 3.76656997452074e-08, "loss": 0.4174, "step": 43805 }, { "epoch": 0.9724642345811922, "grad_norm": 1.0935370922088623, "learning_rate": 3.7363959667569006e-08, "loss": 0.403, "step": 43810 }, { "epoch": 0.9725752211407198, "grad_norm": 1.443190336227417, "learning_rate": 3.706343081380737e-08, "loss": 0.3971, "step": 43815 }, { "epoch": 0.9726862077002475, "grad_norm": 2.337955951690674, "learning_rate": 3.676411322046103e-08, "loss": 0.3461, "step": 43820 }, { "epoch": 0.9727971942597752, "grad_norm": 2.3962912559509277, "learning_rate": 3.6466006923919775e-08, "loss": 0.446, "step": 43825 }, { "epoch": 0.9729081808193027, "grad_norm": 1.3454636335372925, "learning_rate": 3.6169111960426826e-08, "loss": 0.3366, "step": 43830 }, { "epoch": 0.9730191673788304, "grad_norm": 1.3569358587265015, "learning_rate": 3.587342836607666e-08, "loss": 0.3145, "step": 43835 }, { "epoch": 0.9731301539383581, "grad_norm": 0.7849341034889221, "learning_rate": 3.557895617681717e-08, "loss": 0.3135, "step": 43840 }, { "epoch": 0.9732411404978857, "grad_norm": 1.1286523342132568, "learning_rate": 3.528569542845084e-08, "loss": 0.3887, "step": 43845 }, { "epoch": 0.9733521270574134, "grad_norm": 1.1259015798568726, "learning_rate": 3.499364615663137e-08, "loss": 0.344, "step": 43850 }, { "epoch": 0.9734631136169409, "grad_norm": 1.3279014825820923, "learning_rate": 3.4702808396863683e-08, "loss": 0.3563, "step": 43855 }, { "epoch": 0.9735741001764686, "grad_norm": 2.132629871368408, "learning_rate": 3.4413182184507285e-08, "loss": 0.4919, "step": 43860 }, { "epoch": 0.9736850867359963, "grad_norm": 1.5616865158081055, "learning_rate": 3.412476755477401e-08, "loss": 0.4435, "step": 43865 }, { "epoch": 0.9737960732955239, "grad_norm": 0.9031458497047424, "learning_rate": 3.383756454272913e-08, "loss": 0.462, "step": 43870 }, { "epoch": 0.9739070598550515, "grad_norm": 1.6827720403671265, "learning_rate": 3.355157318328916e-08, "loss": 0.2801, "step": 43875 }, { "epoch": 0.9740180464145792, "grad_norm": 1.1727105379104614, "learning_rate": 3.326679351122408e-08, "loss": 0.4307, "step": 43880 }, { "epoch": 0.9741290329741068, "grad_norm": 1.3294233083724976, "learning_rate": 3.2983225561156185e-08, "loss": 0.4035, "step": 43885 }, { "epoch": 0.9742400195336345, "grad_norm": 1.5523672103881836, "learning_rate": 3.270086936756123e-08, "loss": 0.3738, "step": 43890 }, { "epoch": 0.9743510060931622, "grad_norm": 1.3038649559020996, "learning_rate": 3.24197249647662e-08, "loss": 0.2539, "step": 43895 }, { "epoch": 0.9744619926526897, "grad_norm": 1.1056405305862427, "learning_rate": 3.213979238695375e-08, "loss": 0.3263, "step": 43900 }, { "epoch": 0.9745729792122174, "grad_norm": 1.8236504793167114, "learning_rate": 3.186107166815444e-08, "loss": 0.4064, "step": 43905 }, { "epoch": 0.974683965771745, "grad_norm": 1.126415729522705, "learning_rate": 3.158356284225561e-08, "loss": 0.2778, "step": 43910 }, { "epoch": 0.9747949523312727, "grad_norm": 1.2793995141983032, "learning_rate": 3.1307265942996935e-08, "loss": 0.4504, "step": 43915 }, { "epoch": 0.9749059388908003, "grad_norm": 1.346092939376831, "learning_rate": 3.1032181003967096e-08, "loss": 0.3949, "step": 43920 }, { "epoch": 0.9750169254503279, "grad_norm": 0.5175896286964417, "learning_rate": 3.075830805861157e-08, "loss": 0.3089, "step": 43925 }, { "epoch": 0.9751279120098556, "grad_norm": 1.5366896390914917, "learning_rate": 3.048564714022706e-08, "loss": 0.4121, "step": 43930 }, { "epoch": 0.9752388985693833, "grad_norm": 0.8148269057273865, "learning_rate": 3.021419828196259e-08, "loss": 0.39, "step": 43935 }, { "epoch": 0.9753498851289109, "grad_norm": 1.4114397764205933, "learning_rate": 2.994396151681955e-08, "loss": 0.4164, "step": 43940 }, { "epoch": 0.9754608716884385, "grad_norm": 1.029274344444275, "learning_rate": 2.967493687765277e-08, "loss": 0.292, "step": 43945 }, { "epoch": 0.9755718582479662, "grad_norm": 1.5301095247268677, "learning_rate": 2.9407124397169418e-08, "loss": 0.4802, "step": 43950 }, { "epoch": 0.9756828448074938, "grad_norm": 1.4264928102493286, "learning_rate": 2.9140524107929e-08, "loss": 0.3826, "step": 43955 }, { "epoch": 0.9757938313670215, "grad_norm": 0.8320451974868774, "learning_rate": 2.8875136042343378e-08, "loss": 0.4036, "step": 43960 }, { "epoch": 0.975904817926549, "grad_norm": 1.3311158418655396, "learning_rate": 2.8610960232678952e-08, "loss": 0.5306, "step": 43965 }, { "epoch": 0.9760158044860767, "grad_norm": 1.6904776096343994, "learning_rate": 2.8347996711052262e-08, "loss": 0.5003, "step": 43970 }, { "epoch": 0.9761267910456044, "grad_norm": 0.4445568919181824, "learning_rate": 2.8086245509434397e-08, "loss": 0.332, "step": 43975 }, { "epoch": 0.976237777605132, "grad_norm": 1.2849273681640625, "learning_rate": 2.7825706659646568e-08, "loss": 0.504, "step": 43980 }, { "epoch": 0.9763487641646597, "grad_norm": 1.234660267829895, "learning_rate": 2.756638019336566e-08, "loss": 0.3669, "step": 43985 }, { "epoch": 0.9764597507241873, "grad_norm": 1.010250210762024, "learning_rate": 2.7308266142119788e-08, "loss": 0.2806, "step": 43990 }, { "epoch": 0.9765707372837149, "grad_norm": 1.0096099376678467, "learning_rate": 2.7051364537288293e-08, "loss": 0.3195, "step": 43995 }, { "epoch": 0.9766817238432426, "grad_norm": 1.6283340454101562, "learning_rate": 2.679567541010619e-08, "loss": 0.374, "step": 44000 }, { "epoch": 0.9767927104027703, "grad_norm": 1.0632973909378052, "learning_rate": 2.6541198791657506e-08, "loss": 0.2042, "step": 44005 }, { "epoch": 0.9769036969622978, "grad_norm": 1.6411769390106201, "learning_rate": 2.6287934712881936e-08, "loss": 0.3868, "step": 44010 }, { "epoch": 0.9770146835218255, "grad_norm": 0.9020516872406006, "learning_rate": 2.603588320456929e-08, "loss": 0.3887, "step": 44015 }, { "epoch": 0.9771256700813531, "grad_norm": 1.113605260848999, "learning_rate": 2.5785044297365057e-08, "loss": 0.4919, "step": 44020 }, { "epoch": 0.9772366566408808, "grad_norm": 1.0201387405395508, "learning_rate": 2.5535418021763735e-08, "loss": 0.3106, "step": 44025 }, { "epoch": 0.9773476432004085, "grad_norm": 1.0621434450149536, "learning_rate": 2.528700440811438e-08, "loss": 0.4039, "step": 44030 }, { "epoch": 0.977458629759936, "grad_norm": 1.047764539718628, "learning_rate": 2.5039803486618385e-08, "loss": 0.3628, "step": 44035 }, { "epoch": 0.9775696163194637, "grad_norm": 1.3255995512008667, "learning_rate": 2.479381528732949e-08, "loss": 0.2393, "step": 44040 }, { "epoch": 0.9776806028789914, "grad_norm": 1.75392484664917, "learning_rate": 2.4549039840154887e-08, "loss": 0.4483, "step": 44045 }, { "epoch": 0.977791589438519, "grad_norm": 0.8042294383049011, "learning_rate": 2.4305477174852986e-08, "loss": 0.3084, "step": 44050 }, { "epoch": 0.9779025759980466, "grad_norm": 0.8410343527793884, "learning_rate": 2.4063127321034552e-08, "loss": 0.2697, "step": 44055 }, { "epoch": 0.9780135625575743, "grad_norm": 1.6293293237686157, "learning_rate": 2.38219903081649e-08, "loss": 0.3805, "step": 44060 }, { "epoch": 0.9781245491171019, "grad_norm": 0.9448833465576172, "learning_rate": 2.358206616555947e-08, "loss": 0.4176, "step": 44065 }, { "epoch": 0.9782355356766296, "grad_norm": 0.8377218842506409, "learning_rate": 2.3343354922389372e-08, "loss": 0.4437, "step": 44070 }, { "epoch": 0.9783465222361573, "grad_norm": 1.2574573755264282, "learning_rate": 2.3105856607674727e-08, "loss": 0.441, "step": 44075 }, { "epoch": 0.9784575087956848, "grad_norm": 1.2281770706176758, "learning_rate": 2.2869571250289102e-08, "loss": 0.3629, "step": 44080 }, { "epoch": 0.9785684953552125, "grad_norm": 1.4200760126113892, "learning_rate": 2.263449887896174e-08, "loss": 0.4407, "step": 44085 }, { "epoch": 0.9786794819147401, "grad_norm": 1.2259193658828735, "learning_rate": 2.2400639522269786e-08, "loss": 0.4167, "step": 44090 }, { "epoch": 0.9787904684742678, "grad_norm": 0.6913291215896606, "learning_rate": 2.2167993208644932e-08, "loss": 0.2671, "step": 44095 }, { "epoch": 0.9789014550337954, "grad_norm": 1.290332555770874, "learning_rate": 2.193655996637345e-08, "loss": 0.3919, "step": 44100 }, { "epoch": 0.979012441593323, "grad_norm": 0.9615741968154907, "learning_rate": 2.1706339823591716e-08, "loss": 0.5159, "step": 44105 }, { "epoch": 0.9791234281528507, "grad_norm": 1.1563535928726196, "learning_rate": 2.1477332808287342e-08, "loss": 0.3505, "step": 44110 }, { "epoch": 0.9792344147123784, "grad_norm": 1.0211172103881836, "learning_rate": 2.1249538948304726e-08, "loss": 0.4348, "step": 44115 }, { "epoch": 0.979345401271906, "grad_norm": 0.7108829617500305, "learning_rate": 2.1022958271336158e-08, "loss": 0.4062, "step": 44120 }, { "epoch": 0.9794563878314336, "grad_norm": 0.9567662477493286, "learning_rate": 2.07975908049296e-08, "loss": 0.5, "step": 44125 }, { "epoch": 0.9795673743909613, "grad_norm": 1.2178698778152466, "learning_rate": 2.0573436576484253e-08, "loss": 0.3449, "step": 44130 }, { "epoch": 0.9796783609504889, "grad_norm": 1.179904580116272, "learning_rate": 2.0350495613252753e-08, "loss": 0.2882, "step": 44135 }, { "epoch": 0.9797893475100166, "grad_norm": 1.3387186527252197, "learning_rate": 2.0128767942337868e-08, "loss": 0.2806, "step": 44140 }, { "epoch": 0.9799003340695441, "grad_norm": 0.8245904445648193, "learning_rate": 1.9908253590698033e-08, "loss": 0.3059, "step": 44145 }, { "epoch": 0.9800113206290718, "grad_norm": 1.3168443441390991, "learning_rate": 1.9688952585141808e-08, "loss": 0.3215, "step": 44150 }, { "epoch": 0.9801223071885995, "grad_norm": 1.4170719385147095, "learning_rate": 1.9470864952331192e-08, "loss": 0.4385, "step": 44155 }, { "epoch": 0.9802332937481271, "grad_norm": 1.0151528120040894, "learning_rate": 1.9253990718781646e-08, "loss": 0.4276, "step": 44160 }, { "epoch": 0.9803442803076547, "grad_norm": 2.1504173278808594, "learning_rate": 1.9038329910858743e-08, "loss": 0.4582, "step": 44165 }, { "epoch": 0.9804552668671824, "grad_norm": 2.0559544563293457, "learning_rate": 1.8823882554781513e-08, "loss": 0.3243, "step": 44170 }, { "epoch": 0.98056625342671, "grad_norm": 2.0051698684692383, "learning_rate": 1.8610648676622432e-08, "loss": 0.5498, "step": 44175 }, { "epoch": 0.9806772399862377, "grad_norm": 0.8001129627227783, "learning_rate": 1.839862830230632e-08, "loss": 0.4211, "step": 44180 }, { "epoch": 0.9807882265457654, "grad_norm": 1.5729435682296753, "learning_rate": 1.8187821457609222e-08, "loss": 0.3743, "step": 44185 }, { "epoch": 0.9808992131052929, "grad_norm": 1.2498583793640137, "learning_rate": 1.7978228168160638e-08, "loss": 0.3745, "step": 44190 }, { "epoch": 0.9810101996648206, "grad_norm": 0.9071432948112488, "learning_rate": 1.7769848459441296e-08, "loss": 0.3781, "step": 44195 }, { "epoch": 0.9811211862243482, "grad_norm": 0.6762136220932007, "learning_rate": 1.7562682356786488e-08, "loss": 0.4706, "step": 44200 }, { "epoch": 0.9812321727838759, "grad_norm": 1.1027727127075195, "learning_rate": 1.7356729885381617e-08, "loss": 0.3397, "step": 44205 }, { "epoch": 0.9813431593434035, "grad_norm": 1.1394597291946411, "learning_rate": 1.715199107026666e-08, "loss": 0.3597, "step": 44210 }, { "epoch": 0.9814541459029311, "grad_norm": 1.1961270570755005, "learning_rate": 1.6948465936332815e-08, "loss": 0.3961, "step": 44215 }, { "epoch": 0.9815651324624588, "grad_norm": 0.7435837388038635, "learning_rate": 1.674615450832362e-08, "loss": 0.3602, "step": 44220 }, { "epoch": 0.9816761190219865, "grad_norm": 1.2395622730255127, "learning_rate": 1.6545056810836068e-08, "loss": 0.3737, "step": 44225 }, { "epoch": 0.981787105581514, "grad_norm": 1.4732521772384644, "learning_rate": 1.634517286831949e-08, "loss": 0.3207, "step": 44230 }, { "epoch": 0.9818980921410417, "grad_norm": 1.865546464920044, "learning_rate": 1.6146502705072233e-08, "loss": 0.3945, "step": 44235 }, { "epoch": 0.9820090787005694, "grad_norm": 0.9982540607452393, "learning_rate": 1.594904634525163e-08, "loss": 0.2663, "step": 44240 }, { "epoch": 0.982120065260097, "grad_norm": 1.148383617401123, "learning_rate": 1.5752803812860706e-08, "loss": 0.2313, "step": 44245 }, { "epoch": 0.9822310518196247, "grad_norm": 1.4810702800750732, "learning_rate": 1.5557775131760376e-08, "loss": 0.3491, "step": 44250 }, { "epoch": 0.9823420383791522, "grad_norm": 1.1810661554336548, "learning_rate": 1.5363960325660565e-08, "loss": 0.3289, "step": 44255 }, { "epoch": 0.9824530249386799, "grad_norm": 1.225584864616394, "learning_rate": 1.5171359418123533e-08, "loss": 0.5736, "step": 44260 }, { "epoch": 0.9825640114982076, "grad_norm": 1.2184268236160278, "learning_rate": 1.4979972432567213e-08, "loss": 0.4315, "step": 44265 }, { "epoch": 0.9826749980577352, "grad_norm": 2.8620071411132812, "learning_rate": 1.4789799392258553e-08, "loss": 0.3292, "step": 44270 }, { "epoch": 0.9827859846172629, "grad_norm": 1.2940585613250732, "learning_rate": 1.4600840320317945e-08, "loss": 0.5029, "step": 44275 }, { "epoch": 0.9828969711767905, "grad_norm": 0.979594349861145, "learning_rate": 1.4413095239719233e-08, "loss": 0.2596, "step": 44280 }, { "epoch": 0.9830079577363181, "grad_norm": 0.9143130779266357, "learning_rate": 1.4226564173286383e-08, "loss": 0.3334, "step": 44285 }, { "epoch": 0.9831189442958458, "grad_norm": 1.4164605140686035, "learning_rate": 1.4041247143699033e-08, "loss": 0.3318, "step": 44290 }, { "epoch": 0.9832299308553735, "grad_norm": 1.3115427494049072, "learning_rate": 1.3857144173485827e-08, "loss": 0.447, "step": 44295 }, { "epoch": 0.983340917414901, "grad_norm": 1.3506934642791748, "learning_rate": 1.3674255285031079e-08, "loss": 0.4044, "step": 44300 }, { "epoch": 0.9834519039744287, "grad_norm": 1.1205554008483887, "learning_rate": 1.349258050056812e-08, "loss": 0.363, "step": 44305 }, { "epoch": 0.9835628905339563, "grad_norm": 0.9037481546401978, "learning_rate": 1.3312119842184834e-08, "loss": 0.3079, "step": 44310 }, { "epoch": 0.983673877093484, "grad_norm": 1.1315191984176636, "learning_rate": 1.313287333182256e-08, "loss": 0.3683, "step": 44315 }, { "epoch": 0.9837848636530117, "grad_norm": 1.7846201658248901, "learning_rate": 1.2954840991270535e-08, "loss": 0.2396, "step": 44320 }, { "epoch": 0.9838958502125392, "grad_norm": 1.5754671096801758, "learning_rate": 1.2778022842175886e-08, "loss": 0.2761, "step": 44325 }, { "epoch": 0.9840068367720669, "grad_norm": 1.4894391298294067, "learning_rate": 1.2602418906034753e-08, "loss": 0.3827, "step": 44330 }, { "epoch": 0.9841178233315946, "grad_norm": 2.369821786880493, "learning_rate": 1.2428029204195612e-08, "loss": 0.4618, "step": 44335 }, { "epoch": 0.9842288098911222, "grad_norm": 1.513770341873169, "learning_rate": 1.2254853757862617e-08, "loss": 0.336, "step": 44340 }, { "epoch": 0.9843397964506498, "grad_norm": 0.6345555186271667, "learning_rate": 1.2082892588086704e-08, "loss": 0.3276, "step": 44345 }, { "epoch": 0.9844507830101775, "grad_norm": 0.9394330978393555, "learning_rate": 1.1912145715775591e-08, "loss": 0.4819, "step": 44350 }, { "epoch": 0.9845617695697051, "grad_norm": 1.2701035737991333, "learning_rate": 1.1742613161689342e-08, "loss": 0.3594, "step": 44355 }, { "epoch": 0.9846727561292328, "grad_norm": 1.3909372091293335, "learning_rate": 1.1574294946438136e-08, "loss": 0.3869, "step": 44360 }, { "epoch": 0.9847837426887603, "grad_norm": 1.4137762784957886, "learning_rate": 1.1407191090485604e-08, "loss": 0.4051, "step": 44365 }, { "epoch": 0.984894729248288, "grad_norm": 0.9500555396080017, "learning_rate": 1.1241301614147715e-08, "loss": 0.3502, "step": 44370 }, { "epoch": 0.9850057158078157, "grad_norm": 0.7241771221160889, "learning_rate": 1.1076626537591672e-08, "loss": 0.3031, "step": 44375 }, { "epoch": 0.9851167023673433, "grad_norm": 0.9816907048225403, "learning_rate": 1.0913165880840348e-08, "loss": 0.2453, "step": 44380 }, { "epoch": 0.985227688926871, "grad_norm": 1.0117193460464478, "learning_rate": 1.0750919663764514e-08, "loss": 0.3752, "step": 44385 }, { "epoch": 0.9853386754863986, "grad_norm": 0.9182025194168091, "learning_rate": 1.0589887906090612e-08, "loss": 0.4162, "step": 44390 }, { "epoch": 0.9854496620459262, "grad_norm": 1.2671328783035278, "learning_rate": 1.0430070627397425e-08, "loss": 0.4278, "step": 44395 }, { "epoch": 0.9855606486054539, "grad_norm": 1.280246376991272, "learning_rate": 1.0271467847112748e-08, "loss": 0.3933, "step": 44400 }, { "epoch": 0.9856716351649816, "grad_norm": 0.9924276471138, "learning_rate": 1.0114079584520042e-08, "loss": 0.2724, "step": 44405 }, { "epoch": 0.9857826217245091, "grad_norm": 0.8611973524093628, "learning_rate": 9.957905858755112e-09, "loss": 0.4114, "step": 44410 }, { "epoch": 0.9858936082840368, "grad_norm": 1.7133750915527344, "learning_rate": 9.802946688802772e-09, "loss": 0.3201, "step": 44415 }, { "epoch": 0.9860045948435644, "grad_norm": 1.1161009073257446, "learning_rate": 9.649202093504616e-09, "loss": 0.4083, "step": 44420 }, { "epoch": 0.9861155814030921, "grad_norm": 1.7523982524871826, "learning_rate": 9.49667209155014e-09, "loss": 0.3819, "step": 44425 }, { "epoch": 0.9862265679626198, "grad_norm": 1.6116431951522827, "learning_rate": 9.34535670148562e-09, "loss": 0.3923, "step": 44430 }, { "epoch": 0.9863375545221473, "grad_norm": 1.6228337287902832, "learning_rate": 9.195255941707448e-09, "loss": 0.3135, "step": 44435 }, { "epoch": 0.986448541081675, "grad_norm": 1.1930763721466064, "learning_rate": 9.046369830462143e-09, "loss": 0.226, "step": 44440 }, { "epoch": 0.9865595276412027, "grad_norm": 2.828937292098999, "learning_rate": 8.898698385853e-09, "loss": 0.4682, "step": 44445 }, { "epoch": 0.9866705142007303, "grad_norm": 0.6722959280014038, "learning_rate": 8.752241625831215e-09, "loss": 0.2786, "step": 44450 }, { "epoch": 0.986781500760258, "grad_norm": 2.1818439960479736, "learning_rate": 8.606999568204766e-09, "loss": 0.4206, "step": 44455 }, { "epoch": 0.9868924873197856, "grad_norm": 0.8703639507293701, "learning_rate": 8.46297223063064e-09, "loss": 0.4662, "step": 44460 }, { "epoch": 0.9870034738793132, "grad_norm": 1.0020828247070312, "learning_rate": 8.320159630620384e-09, "loss": 0.3988, "step": 44465 }, { "epoch": 0.9871144604388409, "grad_norm": 1.252456784248352, "learning_rate": 8.178561785534556e-09, "loss": 0.3913, "step": 44470 }, { "epoch": 0.9872254469983684, "grad_norm": 1.3738957643508911, "learning_rate": 8.038178712589384e-09, "loss": 0.5101, "step": 44475 }, { "epoch": 0.9873364335578961, "grad_norm": 1.1012595891952515, "learning_rate": 7.899010428852328e-09, "loss": 0.3748, "step": 44480 }, { "epoch": 0.9874474201174238, "grad_norm": 1.94729745388031, "learning_rate": 7.761056951242074e-09, "loss": 0.2961, "step": 44485 }, { "epoch": 0.9875584066769514, "grad_norm": 0.995019793510437, "learning_rate": 7.624318296530763e-09, "loss": 0.271, "step": 44490 }, { "epoch": 0.9876693932364791, "grad_norm": 1.0689935684204102, "learning_rate": 7.488794481343986e-09, "loss": 0.518, "step": 44495 }, { "epoch": 0.9877803797960067, "grad_norm": 1.081696629524231, "learning_rate": 7.354485522157451e-09, "loss": 0.3497, "step": 44500 }, { "epoch": 0.9878913663555343, "grad_norm": 1.0555349588394165, "learning_rate": 7.221391435299208e-09, "loss": 0.487, "step": 44505 }, { "epoch": 0.988002352915062, "grad_norm": 1.3464195728302002, "learning_rate": 7.089512236950758e-09, "loss": 0.5904, "step": 44510 }, { "epoch": 0.9881133394745897, "grad_norm": 1.7165731191635132, "learning_rate": 6.958847943147051e-09, "loss": 0.4417, "step": 44515 }, { "epoch": 0.9882243260341173, "grad_norm": 0.8013442754745483, "learning_rate": 6.82939856977094e-09, "loss": 0.3522, "step": 44520 }, { "epoch": 0.9883353125936449, "grad_norm": 1.3391999006271362, "learning_rate": 6.701164132563165e-09, "loss": 0.2826, "step": 44525 }, { "epoch": 0.9884462991531725, "grad_norm": 1.4739177227020264, "learning_rate": 6.574144647112368e-09, "loss": 0.3699, "step": 44530 }, { "epoch": 0.9885572857127002, "grad_norm": 0.9099801182746887, "learning_rate": 6.448340128861752e-09, "loss": 0.3221, "step": 44535 }, { "epoch": 0.9886682722722279, "grad_norm": 2.0303006172180176, "learning_rate": 6.323750593106859e-09, "loss": 0.4449, "step": 44540 }, { "epoch": 0.9887792588317554, "grad_norm": 1.530608892440796, "learning_rate": 6.200376054993351e-09, "loss": 0.3188, "step": 44545 }, { "epoch": 0.9888902453912831, "grad_norm": 2.018537998199463, "learning_rate": 6.078216529522563e-09, "loss": 0.4305, "step": 44550 }, { "epoch": 0.9890012319508108, "grad_norm": 0.8686156272888184, "learning_rate": 5.957272031543726e-09, "loss": 0.2856, "step": 44555 }, { "epoch": 0.9891122185103384, "grad_norm": 0.7620587348937988, "learning_rate": 5.837542575763966e-09, "loss": 0.4426, "step": 44560 }, { "epoch": 0.989223205069866, "grad_norm": 4.526071071624756, "learning_rate": 5.719028176737196e-09, "loss": 0.4328, "step": 44565 }, { "epoch": 0.9893341916293937, "grad_norm": 0.9952146410942078, "learning_rate": 5.60172884887189e-09, "loss": 0.2975, "step": 44570 }, { "epoch": 0.9894451781889213, "grad_norm": 1.9656871557235718, "learning_rate": 5.485644606431084e-09, "loss": 0.286, "step": 44575 }, { "epoch": 0.989556164748449, "grad_norm": 1.245468258857727, "learning_rate": 5.3707754635257126e-09, "loss": 0.4315, "step": 44580 }, { "epoch": 0.9896671513079766, "grad_norm": 1.3038861751556396, "learning_rate": 5.257121434122381e-09, "loss": 0.31, "step": 44585 }, { "epoch": 0.9897781378675042, "grad_norm": 1.346030831336975, "learning_rate": 5.144682532038925e-09, "loss": 0.3789, "step": 44590 }, { "epoch": 0.9898891244270319, "grad_norm": 1.2573784589767456, "learning_rate": 5.03345877094441e-09, "loss": 0.3522, "step": 44595 }, { "epoch": 0.9900001109865595, "grad_norm": 1.692915678024292, "learning_rate": 4.923450164361354e-09, "loss": 0.4208, "step": 44600 }, { "epoch": 0.9901110975460872, "grad_norm": 1.0562539100646973, "learning_rate": 4.814656725664613e-09, "loss": 0.358, "step": 44605 }, { "epoch": 0.9902220841056149, "grad_norm": 1.2630434036254883, "learning_rate": 4.707078468080273e-09, "loss": 0.4486, "step": 44610 }, { "epoch": 0.9903330706651424, "grad_norm": 1.063307762145996, "learning_rate": 4.600715404687872e-09, "loss": 0.387, "step": 44615 }, { "epoch": 0.9904440572246701, "grad_norm": 0.9800068736076355, "learning_rate": 4.495567548419288e-09, "loss": 0.371, "step": 44620 }, { "epoch": 0.9905550437841978, "grad_norm": 1.1773452758789062, "learning_rate": 4.39163491205652e-09, "loss": 0.3439, "step": 44625 }, { "epoch": 0.9906660303437254, "grad_norm": 0.8336577415466309, "learning_rate": 4.288917508236124e-09, "loss": 0.5058, "step": 44630 }, { "epoch": 0.990777016903253, "grad_norm": 1.85921049118042, "learning_rate": 4.187415349445889e-09, "loss": 0.4798, "step": 44635 }, { "epoch": 0.9908880034627806, "grad_norm": 2.1528615951538086, "learning_rate": 4.087128448027056e-09, "loss": 0.3445, "step": 44640 }, { "epoch": 0.9909989900223083, "grad_norm": 1.1345372200012207, "learning_rate": 3.988056816170982e-09, "loss": 0.318, "step": 44645 }, { "epoch": 0.991109976581836, "grad_norm": 1.681210994720459, "learning_rate": 3.890200465923588e-09, "loss": 0.5825, "step": 44650 }, { "epoch": 0.9912209631413635, "grad_norm": 0.6857728362083435, "learning_rate": 3.793559409180913e-09, "loss": 0.3354, "step": 44655 }, { "epoch": 0.9913319497008912, "grad_norm": 1.3109638690948486, "learning_rate": 3.6981336576924487e-09, "loss": 0.362, "step": 44660 }, { "epoch": 0.9914429362604189, "grad_norm": 1.4599089622497559, "learning_rate": 3.603923223060024e-09, "loss": 0.3748, "step": 44665 }, { "epoch": 0.9915539228199465, "grad_norm": 2.070918560028076, "learning_rate": 3.5109281167367004e-09, "loss": 0.436, "step": 44670 }, { "epoch": 0.9916649093794742, "grad_norm": 1.6659787893295288, "learning_rate": 3.4191483500300995e-09, "loss": 0.3908, "step": 44675 }, { "epoch": 0.9917758959390018, "grad_norm": 0.8800778985023499, "learning_rate": 3.328583934096852e-09, "loss": 0.4847, "step": 44680 }, { "epoch": 0.9918868824985294, "grad_norm": 2.3744845390319824, "learning_rate": 3.23923487994815e-09, "loss": 0.3263, "step": 44685 }, { "epoch": 0.9919978690580571, "grad_norm": 1.5989969968795776, "learning_rate": 3.151101198446416e-09, "loss": 0.3522, "step": 44690 }, { "epoch": 0.9921088556175847, "grad_norm": 1.0988764762878418, "learning_rate": 3.064182900307522e-09, "loss": 0.3976, "step": 44695 }, { "epoch": 0.9922198421771123, "grad_norm": 0.8881332874298096, "learning_rate": 2.978479996098571e-09, "loss": 0.2979, "step": 44700 }, { "epoch": 0.99233082873664, "grad_norm": 1.5848913192749023, "learning_rate": 2.8939924962378964e-09, "loss": 0.362, "step": 44705 }, { "epoch": 0.9924418152961676, "grad_norm": 2.511681079864502, "learning_rate": 2.810720410998391e-09, "loss": 0.434, "step": 44710 }, { "epoch": 0.9925528018556953, "grad_norm": 1.503347635269165, "learning_rate": 2.728663750503069e-09, "loss": 0.3451, "step": 44715 }, { "epoch": 0.992663788415223, "grad_norm": 0.9345759749412537, "learning_rate": 2.647822524729504e-09, "loss": 0.2029, "step": 44720 }, { "epoch": 0.9927747749747505, "grad_norm": 1.4155058860778809, "learning_rate": 2.568196743504281e-09, "loss": 0.3551, "step": 44725 }, { "epoch": 0.9928857615342782, "grad_norm": 1.0107218027114868, "learning_rate": 2.489786416508544e-09, "loss": 0.228, "step": 44730 }, { "epoch": 0.9929967480938059, "grad_norm": 1.2654509544372559, "learning_rate": 2.4125915532757782e-09, "loss": 0.5571, "step": 44735 }, { "epoch": 0.9931077346533335, "grad_norm": 1.0148260593414307, "learning_rate": 2.336612163191809e-09, "loss": 0.2945, "step": 44740 }, { "epoch": 0.9932187212128611, "grad_norm": 1.9776355028152466, "learning_rate": 2.2618482554925825e-09, "loss": 0.2915, "step": 44745 }, { "epoch": 0.9933297077723887, "grad_norm": 0.6568176746368408, "learning_rate": 2.1882998392674936e-09, "loss": 0.3109, "step": 44750 }, { "epoch": 0.9934406943319164, "grad_norm": 1.109466314315796, "learning_rate": 2.1159669234593893e-09, "loss": 0.5572, "step": 44755 }, { "epoch": 0.9935516808914441, "grad_norm": 0.8876851201057434, "learning_rate": 2.044849516861236e-09, "loss": 0.3924, "step": 44760 }, { "epoch": 0.9936626674509716, "grad_norm": 0.8593242168426514, "learning_rate": 1.97494762811945e-09, "loss": 0.54, "step": 44765 }, { "epoch": 0.9937736540104993, "grad_norm": 1.1869981288909912, "learning_rate": 1.9062612657338996e-09, "loss": 0.3661, "step": 44770 }, { "epoch": 0.993884640570027, "grad_norm": 2.151790142059326, "learning_rate": 1.8387904380534615e-09, "loss": 0.4096, "step": 44775 }, { "epoch": 0.9939956271295546, "grad_norm": 0.9031875133514404, "learning_rate": 1.7725351532815738e-09, "loss": 0.2741, "step": 44780 }, { "epoch": 0.9941066136890823, "grad_norm": 1.4749810695648193, "learning_rate": 1.7074954194729044e-09, "loss": 0.4858, "step": 44785 }, { "epoch": 0.99421760024861, "grad_norm": 0.9121118187904358, "learning_rate": 1.6436712445366821e-09, "loss": 0.3698, "step": 44790 }, { "epoch": 0.9943285868081375, "grad_norm": 1.0880600214004517, "learning_rate": 1.5810626362300351e-09, "loss": 0.3834, "step": 44795 }, { "epoch": 0.9944395733676652, "grad_norm": 1.0860681533813477, "learning_rate": 1.519669602165763e-09, "loss": 0.3456, "step": 44800 }, { "epoch": 0.9945505599271928, "grad_norm": 1.1477147340774536, "learning_rate": 1.4594921498078951e-09, "loss": 0.3126, "step": 44805 }, { "epoch": 0.9946615464867204, "grad_norm": 1.5204180479049683, "learning_rate": 1.4005302864716908e-09, "loss": 0.2647, "step": 44810 }, { "epoch": 0.9947725330462481, "grad_norm": 1.7552012205123901, "learning_rate": 1.3427840193280805e-09, "loss": 0.4337, "step": 44815 }, { "epoch": 0.9948835196057757, "grad_norm": 0.8465442061424255, "learning_rate": 1.2862533553947843e-09, "loss": 0.4007, "step": 44820 }, { "epoch": 0.9949945061653034, "grad_norm": 0.7114987373352051, "learning_rate": 1.2309383015451926e-09, "loss": 0.3397, "step": 44825 }, { "epoch": 0.9951054927248311, "grad_norm": 0.8507906794548035, "learning_rate": 1.1768388645061468e-09, "loss": 0.3564, "step": 44830 }, { "epoch": 0.9952164792843586, "grad_norm": 1.5379858016967773, "learning_rate": 1.1239550508523877e-09, "loss": 0.5228, "step": 44835 }, { "epoch": 0.9953274658438863, "grad_norm": 0.9246373176574707, "learning_rate": 1.0722868670154374e-09, "loss": 0.3035, "step": 44840 }, { "epoch": 0.995438452403414, "grad_norm": 1.0635490417480469, "learning_rate": 1.0218343192758273e-09, "loss": 0.3683, "step": 44845 }, { "epoch": 0.9955494389629416, "grad_norm": 1.466962218284607, "learning_rate": 9.725974137675399e-10, "loss": 0.3935, "step": 44850 }, { "epoch": 0.9956604255224693, "grad_norm": 1.0246407985687256, "learning_rate": 9.245761564768973e-10, "loss": 0.3812, "step": 44855 }, { "epoch": 0.9957714120819968, "grad_norm": 1.632717490196228, "learning_rate": 8.777705532414526e-10, "loss": 0.3514, "step": 44860 }, { "epoch": 0.9958823986415245, "grad_norm": 0.7847806215286255, "learning_rate": 8.321806097522089e-10, "loss": 0.4794, "step": 44865 }, { "epoch": 0.9959933852010522, "grad_norm": 0.9914886951446533, "learning_rate": 7.878063315525097e-10, "loss": 0.4997, "step": 44870 }, { "epoch": 0.9961043717605798, "grad_norm": 0.9847691655158997, "learning_rate": 7.446477240358185e-10, "loss": 0.2787, "step": 44875 }, { "epoch": 0.9962153583201074, "grad_norm": 1.0501551628112793, "learning_rate": 7.027047924512698e-10, "loss": 0.2494, "step": 44880 }, { "epoch": 0.9963263448796351, "grad_norm": 0.8864679336547852, "learning_rate": 6.619775418958974e-10, "loss": 0.3905, "step": 44885 }, { "epoch": 0.9964373314391627, "grad_norm": 0.9408095479011536, "learning_rate": 6.224659773212959e-10, "loss": 0.4379, "step": 44890 }, { "epoch": 0.9965483179986904, "grad_norm": 2.961080551147461, "learning_rate": 5.841701035336212e-10, "loss": 0.4349, "step": 44895 }, { "epoch": 0.996659304558218, "grad_norm": 1.2695547342300415, "learning_rate": 5.470899251858175e-10, "loss": 0.364, "step": 44900 }, { "epoch": 0.9967702911177456, "grad_norm": 0.9183524250984192, "learning_rate": 5.11225446787611e-10, "loss": 0.3824, "step": 44905 }, { "epoch": 0.9968812776772733, "grad_norm": 1.1266758441925049, "learning_rate": 4.765766726999577e-10, "loss": 0.2879, "step": 44910 }, { "epoch": 0.9969922642368009, "grad_norm": 0.5046502947807312, "learning_rate": 4.4314360713282324e-10, "loss": 0.3201, "step": 44915 }, { "epoch": 0.9971032507963286, "grad_norm": 1.2100203037261963, "learning_rate": 4.109262541529546e-10, "loss": 0.4993, "step": 44920 }, { "epoch": 0.9972142373558562, "grad_norm": 0.8196183443069458, "learning_rate": 3.7992461767721865e-10, "loss": 0.535, "step": 44925 }, { "epoch": 0.9973252239153838, "grad_norm": 1.5104304552078247, "learning_rate": 3.501387014737123e-10, "loss": 0.3876, "step": 44930 }, { "epoch": 0.9974362104749115, "grad_norm": 2.42695689201355, "learning_rate": 3.2156850916398307e-10, "loss": 0.4088, "step": 44935 }, { "epoch": 0.9975471970344392, "grad_norm": 1.9051953554153442, "learning_rate": 2.942140442219188e-10, "loss": 0.3739, "step": 44940 }, { "epoch": 0.9976581835939667, "grad_norm": 1.0544376373291016, "learning_rate": 2.680753099726374e-10, "loss": 0.4976, "step": 44945 }, { "epoch": 0.9977691701534944, "grad_norm": 1.1928341388702393, "learning_rate": 2.4315230959359726e-10, "loss": 0.3354, "step": 44950 }, { "epoch": 0.9978801567130221, "grad_norm": 1.284271001815796, "learning_rate": 2.194450461168174e-10, "loss": 0.4687, "step": 44955 }, { "epoch": 0.9979911432725497, "grad_norm": 0.9410303235054016, "learning_rate": 1.9695352242221633e-10, "loss": 0.2556, "step": 44960 }, { "epoch": 0.9981021298320774, "grad_norm": 1.4326661825180054, "learning_rate": 1.7567774124649384e-10, "loss": 0.3306, "step": 44965 }, { "epoch": 0.9982131163916049, "grad_norm": 0.7842845916748047, "learning_rate": 1.5561770517424913e-10, "loss": 0.4147, "step": 44970 }, { "epoch": 0.9983241029511326, "grad_norm": 1.3309615850448608, "learning_rate": 1.3677341664464216e-10, "loss": 0.4497, "step": 44975 }, { "epoch": 0.9984350895106603, "grad_norm": 0.7155956625938416, "learning_rate": 1.191448779502835e-10, "loss": 0.3195, "step": 44980 }, { "epoch": 0.9985460760701879, "grad_norm": 1.2971090078353882, "learning_rate": 1.0273209123279338e-10, "loss": 0.3304, "step": 44985 }, { "epoch": 0.9986570626297155, "grad_norm": 0.8447107076644897, "learning_rate": 8.753505848724253e-11, "loss": 0.3095, "step": 44990 }, { "epoch": 0.9987680491892432, "grad_norm": 0.9911041855812073, "learning_rate": 7.355378156326254e-11, "loss": 0.4211, "step": 44995 }, { "epoch": 0.9988790357487708, "grad_norm": 0.7930455207824707, "learning_rate": 6.078826215949462e-11, "loss": 0.3887, "step": 45000 }, { "epoch": 0.9989900223082985, "grad_norm": 1.4207168817520142, "learning_rate": 4.923850182692036e-11, "loss": 0.4148, "step": 45005 }, { "epoch": 0.9991010088678262, "grad_norm": 0.814357340335846, "learning_rate": 3.8904501971082086e-11, "loss": 0.3109, "step": 45010 }, { "epoch": 0.9992119954273537, "grad_norm": 1.4594573974609375, "learning_rate": 2.978626384875227e-11, "loss": 0.3686, "step": 45015 }, { "epoch": 0.9993229819868814, "grad_norm": 2.3122453689575195, "learning_rate": 2.188378856682327e-11, "loss": 0.333, "step": 45020 }, { "epoch": 0.999433968546409, "grad_norm": 1.6867766380310059, "learning_rate": 1.5197077087858448e-11, "loss": 0.4699, "step": 45025 }, { "epoch": 0.9995449551059367, "grad_norm": 1.980859637260437, "learning_rate": 9.726130222320607e-12, "loss": 0.4905, "step": 45030 }, { "epoch": 0.9996559416654643, "grad_norm": 1.486547827720642, "learning_rate": 5.4709486385640106e-12, "loss": 0.4427, "step": 45035 }, { "epoch": 0.9997669282249919, "grad_norm": 1.1338590383529663, "learning_rate": 2.4315328517321435e-12, "loss": 0.4851, "step": 45040 }, { "epoch": 0.9998779147845196, "grad_norm": 1.1397794485092163, "learning_rate": 6.078832315292716e-13, "loss": 0.4461, "step": 45045 }, { "epoch": 0.9999889013440473, "grad_norm": 0.7539506554603577, "learning_rate": 0.0, "loss": 0.4099, "step": 45050 }, { "epoch": 0.9999889013440473, "step": 45050, "total_flos": 2.0927971544599167e+19, "train_loss": 0.45276333124470897, "train_runtime": 95784.0198, "train_samples_per_second": 11.288, "train_steps_per_second": 0.47 } ], "logging_steps": 5, "max_steps": 45050, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 15000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.0927971544599167e+19, "train_batch_size": 2, "trial_name": null, "trial_params": null }