{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.428289573531644, "eval_steps": 500, "global_step": 8000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00030353619669145547, "grad_norm": 9.52797794342041, "learning_rate": 1e-05, "loss": 5.0165, "step": 1 }, { "epoch": 0.0006070723933829109, "grad_norm": 10.161993026733398, "learning_rate": 2e-05, "loss": 4.7408, "step": 2 }, { "epoch": 0.0009106085900743664, "grad_norm": 7.550526142120361, "learning_rate": 3e-05, "loss": 5.0209, "step": 3 }, { "epoch": 0.0012141447867658219, "grad_norm": 5.454105377197266, "learning_rate": 4e-05, "loss": 4.4045, "step": 4 }, { "epoch": 0.0015176809834572772, "grad_norm": 3.715569257736206, "learning_rate": 5e-05, "loss": 4.0617, "step": 5 }, { "epoch": 0.0018212171801487327, "grad_norm": 3.9456210136413574, "learning_rate": 6e-05, "loss": 3.813, "step": 6 }, { "epoch": 0.002124753376840188, "grad_norm": 3.6386630535125732, "learning_rate": 7e-05, "loss": 3.5396, "step": 7 }, { "epoch": 0.0024282895735316438, "grad_norm": 2.5329768657684326, "learning_rate": 8e-05, "loss": 3.1611, "step": 8 }, { "epoch": 0.002731825770223099, "grad_norm": 2.09954571723938, "learning_rate": 9e-05, "loss": 2.8787, "step": 9 }, { "epoch": 0.0030353619669145544, "grad_norm": 2.0083999633789062, "learning_rate": 0.0001, "loss": 2.6942, "step": 10 }, { "epoch": 0.00333889816360601, "grad_norm": 1.419735074043274, "learning_rate": 9.999392466585664e-05, "loss": 2.5674, "step": 11 }, { "epoch": 0.0036424343602974654, "grad_norm": 3.0809664726257324, "learning_rate": 9.998784933171324e-05, "loss": 2.2474, "step": 12 }, { "epoch": 0.003945970556988921, "grad_norm": 1.4494595527648926, "learning_rate": 9.998177399756987e-05, "loss": 2.1931, "step": 13 }, { "epoch": 0.004249506753680376, "grad_norm": 1.4052276611328125, "learning_rate": 9.99756986634265e-05, "loss": 2.2179, "step": 14 }, { "epoch": 0.004553042950371832, "grad_norm": 1.0900732278823853, "learning_rate": 9.996962332928312e-05, "loss": 2.3455, "step": 15 }, { "epoch": 0.0048565791470632875, "grad_norm": 1.078604817390442, "learning_rate": 9.996354799513974e-05, "loss": 2.1906, "step": 16 }, { "epoch": 0.005160115343754742, "grad_norm": 1.0777554512023926, "learning_rate": 9.995747266099635e-05, "loss": 2.4069, "step": 17 }, { "epoch": 0.005463651540446198, "grad_norm": 1.2703579664230347, "learning_rate": 9.995139732685298e-05, "loss": 2.1901, "step": 18 }, { "epoch": 0.005767187737137654, "grad_norm": 2.06676983833313, "learning_rate": 9.99453219927096e-05, "loss": 2.4616, "step": 19 }, { "epoch": 0.006070723933829109, "grad_norm": 1.0544441938400269, "learning_rate": 9.993924665856622e-05, "loss": 1.9529, "step": 20 }, { "epoch": 0.0063742601305205645, "grad_norm": 1.1237947940826416, "learning_rate": 9.993317132442285e-05, "loss": 2.8619, "step": 21 }, { "epoch": 0.00667779632721202, "grad_norm": 0.8750623464584351, "learning_rate": 9.992709599027947e-05, "loss": 2.0467, "step": 22 }, { "epoch": 0.006981332523903475, "grad_norm": 0.8135535717010498, "learning_rate": 9.992102065613608e-05, "loss": 2.3566, "step": 23 }, { "epoch": 0.007284868720594931, "grad_norm": 0.8838183879852295, "learning_rate": 9.991494532199271e-05, "loss": 2.2145, "step": 24 }, { "epoch": 0.007588404917286387, "grad_norm": 0.7460266351699829, "learning_rate": 9.990886998784935e-05, "loss": 1.7996, "step": 25 }, { "epoch": 0.007891941113977842, "grad_norm": 0.7469210028648376, "learning_rate": 9.990279465370595e-05, "loss": 1.9147, "step": 26 }, { "epoch": 0.008195477310669297, "grad_norm": 0.796752393245697, "learning_rate": 9.989671931956258e-05, "loss": 1.6982, "step": 27 }, { "epoch": 0.008499013507360752, "grad_norm": 0.9568108916282654, "learning_rate": 9.989064398541921e-05, "loss": 2.294, "step": 28 }, { "epoch": 0.008802549704052209, "grad_norm": 0.7790305018424988, "learning_rate": 9.988456865127583e-05, "loss": 1.7283, "step": 29 }, { "epoch": 0.009106085900743664, "grad_norm": 0.5705334544181824, "learning_rate": 9.987849331713245e-05, "loss": 2.0917, "step": 30 }, { "epoch": 0.009409622097435118, "grad_norm": 0.8099403381347656, "learning_rate": 9.987241798298906e-05, "loss": 2.1187, "step": 31 }, { "epoch": 0.009713158294126575, "grad_norm": 0.728687584400177, "learning_rate": 9.98663426488457e-05, "loss": 2.0019, "step": 32 }, { "epoch": 0.01001669449081803, "grad_norm": 0.7341739535331726, "learning_rate": 9.986026731470231e-05, "loss": 2.2379, "step": 33 }, { "epoch": 0.010320230687509485, "grad_norm": 0.6100563406944275, "learning_rate": 9.985419198055893e-05, "loss": 2.2385, "step": 34 }, { "epoch": 0.010623766884200941, "grad_norm": 0.57859206199646, "learning_rate": 9.984811664641556e-05, "loss": 1.9331, "step": 35 }, { "epoch": 0.010927303080892396, "grad_norm": 0.5878285765647888, "learning_rate": 9.984204131227218e-05, "loss": 1.8691, "step": 36 }, { "epoch": 0.011230839277583851, "grad_norm": 0.5095940232276917, "learning_rate": 9.98359659781288e-05, "loss": 2.1699, "step": 37 }, { "epoch": 0.011534375474275308, "grad_norm": 0.5028595924377441, "learning_rate": 9.982989064398542e-05, "loss": 1.6226, "step": 38 }, { "epoch": 0.011837911670966763, "grad_norm": 0.6969617009162903, "learning_rate": 9.982381530984206e-05, "loss": 1.7049, "step": 39 }, { "epoch": 0.012141447867658217, "grad_norm": 0.6432283520698547, "learning_rate": 9.981773997569866e-05, "loss": 1.645, "step": 40 }, { "epoch": 0.012444984064349674, "grad_norm": 0.5575637221336365, "learning_rate": 9.981166464155529e-05, "loss": 2.122, "step": 41 }, { "epoch": 0.012748520261041129, "grad_norm": 0.8630117177963257, "learning_rate": 9.980558930741192e-05, "loss": 2.4391, "step": 42 }, { "epoch": 0.013052056457732584, "grad_norm": 0.7215672135353088, "learning_rate": 9.979951397326854e-05, "loss": 1.6903, "step": 43 }, { "epoch": 0.01335559265442404, "grad_norm": 0.6649103164672852, "learning_rate": 9.979343863912516e-05, "loss": 2.0192, "step": 44 }, { "epoch": 0.013659128851115495, "grad_norm": 0.7561375498771667, "learning_rate": 9.978736330498177e-05, "loss": 2.1745, "step": 45 }, { "epoch": 0.01396266504780695, "grad_norm": 1.5740697383880615, "learning_rate": 9.97812879708384e-05, "loss": 2.4883, "step": 46 }, { "epoch": 0.014266201244498407, "grad_norm": 0.49843546748161316, "learning_rate": 9.977521263669502e-05, "loss": 2.3359, "step": 47 }, { "epoch": 0.014569737441189862, "grad_norm": 0.6524083018302917, "learning_rate": 9.976913730255164e-05, "loss": 2.0283, "step": 48 }, { "epoch": 0.014873273637881317, "grad_norm": 0.5995165705680847, "learning_rate": 9.976306196840827e-05, "loss": 2.2854, "step": 49 }, { "epoch": 0.015176809834572773, "grad_norm": 0.533091127872467, "learning_rate": 9.975698663426489e-05, "loss": 1.7316, "step": 50 }, { "epoch": 0.015480346031264228, "grad_norm": 0.4611203372478485, "learning_rate": 9.97509113001215e-05, "loss": 1.9873, "step": 51 }, { "epoch": 0.015783882227955685, "grad_norm": 0.5517066121101379, "learning_rate": 9.974483596597813e-05, "loss": 2.3221, "step": 52 }, { "epoch": 0.01608741842464714, "grad_norm": 1.1481316089630127, "learning_rate": 9.973876063183477e-05, "loss": 2.1987, "step": 53 }, { "epoch": 0.016390954621338594, "grad_norm": 0.5169709324836731, "learning_rate": 9.973268529769137e-05, "loss": 2.0714, "step": 54 }, { "epoch": 0.01669449081803005, "grad_norm": 0.5325965881347656, "learning_rate": 9.9726609963548e-05, "loss": 2.0495, "step": 55 }, { "epoch": 0.016998027014721504, "grad_norm": 0.5272805690765381, "learning_rate": 9.972053462940463e-05, "loss": 1.6467, "step": 56 }, { "epoch": 0.017301563211412962, "grad_norm": 0.5756974816322327, "learning_rate": 9.971445929526125e-05, "loss": 1.8153, "step": 57 }, { "epoch": 0.017605099408104417, "grad_norm": 0.49965259432792664, "learning_rate": 9.970838396111787e-05, "loss": 1.7549, "step": 58 }, { "epoch": 0.017908635604795872, "grad_norm": 0.4551718235015869, "learning_rate": 9.970230862697448e-05, "loss": 2.0268, "step": 59 }, { "epoch": 0.018212171801487327, "grad_norm": 0.4995061159133911, "learning_rate": 9.969623329283111e-05, "loss": 2.1194, "step": 60 }, { "epoch": 0.018515707998178782, "grad_norm": 0.6005909442901611, "learning_rate": 9.969015795868773e-05, "loss": 2.0995, "step": 61 }, { "epoch": 0.018819244194870237, "grad_norm": 0.5313609838485718, "learning_rate": 9.968408262454435e-05, "loss": 2.2653, "step": 62 }, { "epoch": 0.019122780391561695, "grad_norm": 0.4645906388759613, "learning_rate": 9.967800729040098e-05, "loss": 2.0501, "step": 63 }, { "epoch": 0.01942631658825315, "grad_norm": 0.4981083869934082, "learning_rate": 9.96719319562576e-05, "loss": 1.2802, "step": 64 }, { "epoch": 0.019729852784944605, "grad_norm": 0.7034462094306946, "learning_rate": 9.966585662211421e-05, "loss": 1.8468, "step": 65 }, { "epoch": 0.02003338898163606, "grad_norm": 0.5249907374382019, "learning_rate": 9.965978128797084e-05, "loss": 1.7569, "step": 66 }, { "epoch": 0.020336925178327515, "grad_norm": 0.7569686770439148, "learning_rate": 9.965370595382748e-05, "loss": 2.2157, "step": 67 }, { "epoch": 0.02064046137501897, "grad_norm": 0.7423145174980164, "learning_rate": 9.964763061968408e-05, "loss": 1.961, "step": 68 }, { "epoch": 0.020943997571710428, "grad_norm": 0.6891425251960754, "learning_rate": 9.964155528554071e-05, "loss": 2.0419, "step": 69 }, { "epoch": 0.021247533768401883, "grad_norm": 0.5633382201194763, "learning_rate": 9.963547995139734e-05, "loss": 1.7981, "step": 70 }, { "epoch": 0.021551069965093338, "grad_norm": 0.4792400598526001, "learning_rate": 9.962940461725396e-05, "loss": 1.7946, "step": 71 }, { "epoch": 0.021854606161784793, "grad_norm": 0.43436333537101746, "learning_rate": 9.962332928311058e-05, "loss": 1.9947, "step": 72 }, { "epoch": 0.022158142358476247, "grad_norm": 0.511132001876831, "learning_rate": 9.96172539489672e-05, "loss": 1.8512, "step": 73 }, { "epoch": 0.022461678555167702, "grad_norm": 0.5628978610038757, "learning_rate": 9.961117861482382e-05, "loss": 2.3258, "step": 74 }, { "epoch": 0.02276521475185916, "grad_norm": 0.5179631114006042, "learning_rate": 9.960510328068044e-05, "loss": 2.1093, "step": 75 }, { "epoch": 0.023068750948550616, "grad_norm": 0.45745086669921875, "learning_rate": 9.959902794653706e-05, "loss": 2.0091, "step": 76 }, { "epoch": 0.02337228714524207, "grad_norm": 0.49223676323890686, "learning_rate": 9.959295261239369e-05, "loss": 1.8603, "step": 77 }, { "epoch": 0.023675823341933525, "grad_norm": 0.44269105792045593, "learning_rate": 9.958687727825031e-05, "loss": 2.0431, "step": 78 }, { "epoch": 0.02397935953862498, "grad_norm": 0.45361781120300293, "learning_rate": 9.958080194410692e-05, "loss": 2.1202, "step": 79 }, { "epoch": 0.024282895735316435, "grad_norm": 0.4460793137550354, "learning_rate": 9.957472660996356e-05, "loss": 2.0395, "step": 80 }, { "epoch": 0.024586431932007893, "grad_norm": 0.3576311767101288, "learning_rate": 9.956865127582019e-05, "loss": 1.6929, "step": 81 }, { "epoch": 0.024889968128699348, "grad_norm": 0.47067755460739136, "learning_rate": 9.956257594167679e-05, "loss": 1.7033, "step": 82 }, { "epoch": 0.025193504325390803, "grad_norm": 0.40777909755706787, "learning_rate": 9.955650060753342e-05, "loss": 1.8594, "step": 83 }, { "epoch": 0.025497040522082258, "grad_norm": 0.4231606721878052, "learning_rate": 9.955042527339005e-05, "loss": 1.9413, "step": 84 }, { "epoch": 0.025800576718773713, "grad_norm": 0.4901526868343353, "learning_rate": 9.954434993924666e-05, "loss": 1.5754, "step": 85 }, { "epoch": 0.026104112915465168, "grad_norm": 0.4473549723625183, "learning_rate": 9.953827460510329e-05, "loss": 2.0099, "step": 86 }, { "epoch": 0.026407649112156626, "grad_norm": 0.4234200119972229, "learning_rate": 9.95321992709599e-05, "loss": 1.9134, "step": 87 }, { "epoch": 0.02671118530884808, "grad_norm": 0.497842937707901, "learning_rate": 9.952612393681653e-05, "loss": 1.5014, "step": 88 }, { "epoch": 0.027014721505539536, "grad_norm": 0.4480627775192261, "learning_rate": 9.952004860267315e-05, "loss": 1.8608, "step": 89 }, { "epoch": 0.02731825770223099, "grad_norm": 0.4578416049480438, "learning_rate": 9.951397326852977e-05, "loss": 1.9455, "step": 90 }, { "epoch": 0.027621793898922446, "grad_norm": 0.4651184380054474, "learning_rate": 9.95078979343864e-05, "loss": 1.7394, "step": 91 }, { "epoch": 0.0279253300956139, "grad_norm": 0.48281168937683105, "learning_rate": 9.950182260024302e-05, "loss": 2.219, "step": 92 }, { "epoch": 0.02822886629230536, "grad_norm": 0.3925339877605438, "learning_rate": 9.949574726609963e-05, "loss": 1.9998, "step": 93 }, { "epoch": 0.028532402488996814, "grad_norm": 0.5093829035758972, "learning_rate": 9.948967193195627e-05, "loss": 1.5958, "step": 94 }, { "epoch": 0.02883593868568827, "grad_norm": 0.4480256736278534, "learning_rate": 9.94835965978129e-05, "loss": 1.7606, "step": 95 }, { "epoch": 0.029139474882379723, "grad_norm": 0.41442152857780457, "learning_rate": 9.94775212636695e-05, "loss": 1.7481, "step": 96 }, { "epoch": 0.029443011079071178, "grad_norm": 0.373604953289032, "learning_rate": 9.947144592952613e-05, "loss": 1.91, "step": 97 }, { "epoch": 0.029746547275762633, "grad_norm": 0.4274522662162781, "learning_rate": 9.946537059538275e-05, "loss": 1.9125, "step": 98 }, { "epoch": 0.03005008347245409, "grad_norm": 0.47791674733161926, "learning_rate": 9.945929526123937e-05, "loss": 2.0647, "step": 99 }, { "epoch": 0.030353619669145546, "grad_norm": 0.456820547580719, "learning_rate": 9.9453219927096e-05, "loss": 2.1057, "step": 100 }, { "epoch": 0.030657155865837, "grad_norm": 0.41789788007736206, "learning_rate": 9.944714459295261e-05, "loss": 2.0159, "step": 101 }, { "epoch": 0.030960692062528456, "grad_norm": 0.4459668695926666, "learning_rate": 9.944106925880924e-05, "loss": 1.927, "step": 102 }, { "epoch": 0.031264228259219914, "grad_norm": 0.372925341129303, "learning_rate": 9.943499392466586e-05, "loss": 1.6992, "step": 103 }, { "epoch": 0.03156776445591137, "grad_norm": 0.4778668284416199, "learning_rate": 9.942891859052248e-05, "loss": 2.1148, "step": 104 }, { "epoch": 0.031871300652602824, "grad_norm": 0.4480198323726654, "learning_rate": 9.942284325637911e-05, "loss": 1.9734, "step": 105 }, { "epoch": 0.03217483684929428, "grad_norm": 0.40198591351509094, "learning_rate": 9.941676792223573e-05, "loss": 1.5448, "step": 106 }, { "epoch": 0.032478373045985734, "grad_norm": 0.40328651666641235, "learning_rate": 9.941069258809234e-05, "loss": 2.1084, "step": 107 }, { "epoch": 0.03278190924267719, "grad_norm": 0.43856972455978394, "learning_rate": 9.940461725394898e-05, "loss": 2.1748, "step": 108 }, { "epoch": 0.033085445439368644, "grad_norm": 0.46910691261291504, "learning_rate": 9.939854191980559e-05, "loss": 1.9526, "step": 109 }, { "epoch": 0.0333889816360601, "grad_norm": 0.4143713116645813, "learning_rate": 9.939246658566221e-05, "loss": 1.9133, "step": 110 }, { "epoch": 0.03369251783275155, "grad_norm": 0.45832857489585876, "learning_rate": 9.938639125151884e-05, "loss": 1.7964, "step": 111 }, { "epoch": 0.03399605402944301, "grad_norm": 0.4263196587562561, "learning_rate": 9.938031591737546e-05, "loss": 1.9231, "step": 112 }, { "epoch": 0.03429959022613446, "grad_norm": 0.38841062784194946, "learning_rate": 9.937424058323208e-05, "loss": 1.9941, "step": 113 }, { "epoch": 0.034603126422825925, "grad_norm": 0.39627939462661743, "learning_rate": 9.93681652490887e-05, "loss": 1.591, "step": 114 }, { "epoch": 0.03490666261951738, "grad_norm": 0.4354992210865021, "learning_rate": 9.936208991494532e-05, "loss": 1.8843, "step": 115 }, { "epoch": 0.035210198816208835, "grad_norm": 0.4674322009086609, "learning_rate": 9.935601458080195e-05, "loss": 1.9687, "step": 116 }, { "epoch": 0.03551373501290029, "grad_norm": 0.4263432025909424, "learning_rate": 9.934993924665857e-05, "loss": 2.0061, "step": 117 }, { "epoch": 0.035817271209591744, "grad_norm": 0.4172697067260742, "learning_rate": 9.934386391251519e-05, "loss": 2.0079, "step": 118 }, { "epoch": 0.0361208074062832, "grad_norm": 0.35841792821884155, "learning_rate": 9.933778857837182e-05, "loss": 2.0048, "step": 119 }, { "epoch": 0.036424343602974654, "grad_norm": 0.4118800759315491, "learning_rate": 9.933171324422844e-05, "loss": 1.8136, "step": 120 }, { "epoch": 0.03672787979966611, "grad_norm": 0.4894438087940216, "learning_rate": 9.932563791008505e-05, "loss": 1.9214, "step": 121 }, { "epoch": 0.037031415996357564, "grad_norm": 0.4079352915287018, "learning_rate": 9.931956257594169e-05, "loss": 2.2445, "step": 122 }, { "epoch": 0.03733495219304902, "grad_norm": 0.41293051838874817, "learning_rate": 9.93134872417983e-05, "loss": 2.0845, "step": 123 }, { "epoch": 0.037638488389740474, "grad_norm": 0.4413944482803345, "learning_rate": 9.930741190765492e-05, "loss": 1.8782, "step": 124 }, { "epoch": 0.03794202458643193, "grad_norm": 0.4036192297935486, "learning_rate": 9.930133657351155e-05, "loss": 1.9393, "step": 125 }, { "epoch": 0.03824556078312339, "grad_norm": 0.7759333848953247, "learning_rate": 9.929526123936817e-05, "loss": 1.9035, "step": 126 }, { "epoch": 0.038549096979814845, "grad_norm": 0.4737033247947693, "learning_rate": 9.928918590522479e-05, "loss": 2.0535, "step": 127 }, { "epoch": 0.0388526331765063, "grad_norm": 0.5254648923873901, "learning_rate": 9.928311057108142e-05, "loss": 1.9854, "step": 128 }, { "epoch": 0.039156169373197755, "grad_norm": 0.46957090497016907, "learning_rate": 9.927703523693803e-05, "loss": 2.1865, "step": 129 }, { "epoch": 0.03945970556988921, "grad_norm": 0.4427931010723114, "learning_rate": 9.927095990279466e-05, "loss": 1.9696, "step": 130 }, { "epoch": 0.039763241766580665, "grad_norm": 0.42948615550994873, "learning_rate": 9.926488456865128e-05, "loss": 1.5367, "step": 131 }, { "epoch": 0.04006677796327212, "grad_norm": 0.3952697515487671, "learning_rate": 9.92588092345079e-05, "loss": 1.9648, "step": 132 }, { "epoch": 0.040370314159963575, "grad_norm": 0.41384372115135193, "learning_rate": 9.925273390036453e-05, "loss": 1.9115, "step": 133 }, { "epoch": 0.04067385035665503, "grad_norm": 0.44592148065567017, "learning_rate": 9.924665856622115e-05, "loss": 2.2336, "step": 134 }, { "epoch": 0.040977386553346484, "grad_norm": 0.43720191717147827, "learning_rate": 9.924058323207776e-05, "loss": 2.1014, "step": 135 }, { "epoch": 0.04128092275003794, "grad_norm": 0.6224471926689148, "learning_rate": 9.92345078979344e-05, "loss": 1.9093, "step": 136 }, { "epoch": 0.041584458946729394, "grad_norm": 0.40913721919059753, "learning_rate": 9.922843256379101e-05, "loss": 2.036, "step": 137 }, { "epoch": 0.041887995143420856, "grad_norm": 0.5675486922264099, "learning_rate": 9.922235722964763e-05, "loss": 1.7925, "step": 138 }, { "epoch": 0.04219153134011231, "grad_norm": 0.4174894690513611, "learning_rate": 9.921628189550426e-05, "loss": 1.742, "step": 139 }, { "epoch": 0.042495067536803766, "grad_norm": 0.5149232745170593, "learning_rate": 9.921020656136088e-05, "loss": 2.0117, "step": 140 }, { "epoch": 0.04279860373349522, "grad_norm": 0.4599703252315521, "learning_rate": 9.92041312272175e-05, "loss": 1.9401, "step": 141 }, { "epoch": 0.043102139930186675, "grad_norm": 0.39801791310310364, "learning_rate": 9.919805589307413e-05, "loss": 1.74, "step": 142 }, { "epoch": 0.04340567612687813, "grad_norm": 0.4469515085220337, "learning_rate": 9.919198055893074e-05, "loss": 1.9919, "step": 143 }, { "epoch": 0.043709212323569585, "grad_norm": 0.4179072678089142, "learning_rate": 9.918590522478737e-05, "loss": 1.9618, "step": 144 }, { "epoch": 0.04401274852026104, "grad_norm": 0.3512915372848511, "learning_rate": 9.917982989064399e-05, "loss": 2.0603, "step": 145 }, { "epoch": 0.044316284716952495, "grad_norm": 0.6461288928985596, "learning_rate": 9.917375455650061e-05, "loss": 1.9461, "step": 146 }, { "epoch": 0.04461982091364395, "grad_norm": 0.4113643169403076, "learning_rate": 9.916767922235724e-05, "loss": 1.5332, "step": 147 }, { "epoch": 0.044923357110335405, "grad_norm": 0.5560798645019531, "learning_rate": 9.916160388821386e-05, "loss": 1.678, "step": 148 }, { "epoch": 0.045226893307026866, "grad_norm": 0.5448784828186035, "learning_rate": 9.915552855407047e-05, "loss": 1.8036, "step": 149 }, { "epoch": 0.04553042950371832, "grad_norm": 0.4570043087005615, "learning_rate": 9.91494532199271e-05, "loss": 2.0126, "step": 150 }, { "epoch": 0.045833965700409776, "grad_norm": 0.4167179465293884, "learning_rate": 9.914337788578372e-05, "loss": 1.7567, "step": 151 }, { "epoch": 0.04613750189710123, "grad_norm": 1.3264193534851074, "learning_rate": 9.913730255164034e-05, "loss": 2.012, "step": 152 }, { "epoch": 0.046441038093792686, "grad_norm": 0.45362886786460876, "learning_rate": 9.913122721749697e-05, "loss": 1.8789, "step": 153 }, { "epoch": 0.04674457429048414, "grad_norm": 2.0713539123535156, "learning_rate": 9.912515188335359e-05, "loss": 2.0798, "step": 154 }, { "epoch": 0.047048110487175596, "grad_norm": 1.430906891822815, "learning_rate": 9.91190765492102e-05, "loss": 1.9401, "step": 155 }, { "epoch": 0.04735164668386705, "grad_norm": 0.846182107925415, "learning_rate": 9.911300121506684e-05, "loss": 1.9073, "step": 156 }, { "epoch": 0.047655182880558505, "grad_norm": 0.5027226805686951, "learning_rate": 9.910692588092345e-05, "loss": 2.1521, "step": 157 }, { "epoch": 0.04795871907724996, "grad_norm": 0.32647275924682617, "learning_rate": 9.910085054678007e-05, "loss": 1.705, "step": 158 }, { "epoch": 0.048262255273941415, "grad_norm": 0.4337715804576874, "learning_rate": 9.90947752126367e-05, "loss": 1.9844, "step": 159 }, { "epoch": 0.04856579147063287, "grad_norm": 0.4408979117870331, "learning_rate": 9.908869987849332e-05, "loss": 1.81, "step": 160 }, { "epoch": 0.04886932766732433, "grad_norm": 3.5793535709381104, "learning_rate": 9.908262454434995e-05, "loss": 1.8569, "step": 161 }, { "epoch": 0.04917286386401579, "grad_norm": 0.47893545031547546, "learning_rate": 9.907654921020657e-05, "loss": 1.9397, "step": 162 }, { "epoch": 0.04947640006070724, "grad_norm": 0.36375802755355835, "learning_rate": 9.907047387606318e-05, "loss": 1.8064, "step": 163 }, { "epoch": 0.049779936257398696, "grad_norm": 0.3935683071613312, "learning_rate": 9.906439854191982e-05, "loss": 2.0968, "step": 164 }, { "epoch": 0.05008347245409015, "grad_norm": 2.1048178672790527, "learning_rate": 9.905832320777643e-05, "loss": 1.8488, "step": 165 }, { "epoch": 0.050387008650781606, "grad_norm": 0.35579410195350647, "learning_rate": 9.905224787363305e-05, "loss": 1.837, "step": 166 }, { "epoch": 0.05069054484747306, "grad_norm": 0.37291133403778076, "learning_rate": 9.904617253948968e-05, "loss": 1.5921, "step": 167 }, { "epoch": 0.050994081044164516, "grad_norm": 0.37633177638053894, "learning_rate": 9.90400972053463e-05, "loss": 1.9613, "step": 168 }, { "epoch": 0.05129761724085597, "grad_norm": 0.49120867252349854, "learning_rate": 9.903402187120292e-05, "loss": 2.1737, "step": 169 }, { "epoch": 0.051601153437547426, "grad_norm": 0.41102972626686096, "learning_rate": 9.902794653705955e-05, "loss": 1.8833, "step": 170 }, { "epoch": 0.05190468963423888, "grad_norm": 0.3835681676864624, "learning_rate": 9.902187120291616e-05, "loss": 2.0283, "step": 171 }, { "epoch": 0.052208225830930335, "grad_norm": 0.4194372296333313, "learning_rate": 9.901579586877278e-05, "loss": 1.6146, "step": 172 }, { "epoch": 0.0525117620276218, "grad_norm": 0.411516934633255, "learning_rate": 9.900972053462941e-05, "loss": 2.0012, "step": 173 }, { "epoch": 0.05281529822431325, "grad_norm": 0.4565434455871582, "learning_rate": 9.900364520048603e-05, "loss": 2.2415, "step": 174 }, { "epoch": 0.05311883442100471, "grad_norm": 0.4352016746997833, "learning_rate": 9.899756986634266e-05, "loss": 1.6505, "step": 175 }, { "epoch": 0.05342237061769616, "grad_norm": 0.5442507266998291, "learning_rate": 9.899149453219928e-05, "loss": 1.9692, "step": 176 }, { "epoch": 0.05372590681438762, "grad_norm": 0.39451470971107483, "learning_rate": 9.89854191980559e-05, "loss": 1.9682, "step": 177 }, { "epoch": 0.05402944301107907, "grad_norm": 0.34474217891693115, "learning_rate": 9.897934386391253e-05, "loss": 1.6806, "step": 178 }, { "epoch": 0.054332979207770526, "grad_norm": 0.45165541768074036, "learning_rate": 9.897326852976914e-05, "loss": 1.9329, "step": 179 }, { "epoch": 0.05463651540446198, "grad_norm": 0.5402116775512695, "learning_rate": 9.896719319562576e-05, "loss": 1.6939, "step": 180 }, { "epoch": 0.054940051601153436, "grad_norm": 0.40272051095962524, "learning_rate": 9.896111786148239e-05, "loss": 1.985, "step": 181 }, { "epoch": 0.05524358779784489, "grad_norm": 0.3833436667919159, "learning_rate": 9.895504252733901e-05, "loss": 1.7804, "step": 182 }, { "epoch": 0.055547123994536346, "grad_norm": 0.34711307287216187, "learning_rate": 9.894896719319563e-05, "loss": 1.865, "step": 183 }, { "epoch": 0.0558506601912278, "grad_norm": 0.43081262707710266, "learning_rate": 9.894289185905226e-05, "loss": 1.8066, "step": 184 }, { "epoch": 0.05615419638791926, "grad_norm": 0.38740819692611694, "learning_rate": 9.893681652490887e-05, "loss": 1.738, "step": 185 }, { "epoch": 0.05645773258461072, "grad_norm": 0.46878042817115784, "learning_rate": 9.893074119076549e-05, "loss": 1.4563, "step": 186 }, { "epoch": 0.05676126878130217, "grad_norm": 0.4415140151977539, "learning_rate": 9.892466585662212e-05, "loss": 1.565, "step": 187 }, { "epoch": 0.05706480497799363, "grad_norm": 0.43196091055870056, "learning_rate": 9.891859052247874e-05, "loss": 2.1562, "step": 188 }, { "epoch": 0.05736834117468508, "grad_norm": 0.4677179455757141, "learning_rate": 9.891251518833537e-05, "loss": 2.0413, "step": 189 }, { "epoch": 0.05767187737137654, "grad_norm": 0.4087100327014923, "learning_rate": 9.890643985419199e-05, "loss": 1.5434, "step": 190 }, { "epoch": 0.05797541356806799, "grad_norm": 0.37906375527381897, "learning_rate": 9.89003645200486e-05, "loss": 1.5561, "step": 191 }, { "epoch": 0.05827894976475945, "grad_norm": 0.5014649033546448, "learning_rate": 9.889428918590524e-05, "loss": 1.6243, "step": 192 }, { "epoch": 0.0585824859614509, "grad_norm": 0.6972336769104004, "learning_rate": 9.888821385176185e-05, "loss": 2.1616, "step": 193 }, { "epoch": 0.058886022158142357, "grad_norm": 0.46012699604034424, "learning_rate": 9.888213851761847e-05, "loss": 2.1195, "step": 194 }, { "epoch": 0.05918955835483381, "grad_norm": 0.36921924352645874, "learning_rate": 9.88760631834751e-05, "loss": 2.1071, "step": 195 }, { "epoch": 0.059493094551525266, "grad_norm": 0.36246025562286377, "learning_rate": 9.886998784933172e-05, "loss": 1.9948, "step": 196 }, { "epoch": 0.05979663074821673, "grad_norm": 0.3935892581939697, "learning_rate": 9.886391251518834e-05, "loss": 2.3065, "step": 197 }, { "epoch": 0.06010016694490818, "grad_norm": 0.36333411931991577, "learning_rate": 9.885783718104497e-05, "loss": 1.8746, "step": 198 }, { "epoch": 0.06040370314159964, "grad_norm": 0.4027535617351532, "learning_rate": 9.885176184690158e-05, "loss": 2.156, "step": 199 }, { "epoch": 0.06070723933829109, "grad_norm": 0.42472004890441895, "learning_rate": 9.88456865127582e-05, "loss": 1.8647, "step": 200 }, { "epoch": 0.06101077553498255, "grad_norm": 0.38055720925331116, "learning_rate": 9.883961117861483e-05, "loss": 1.9185, "step": 201 }, { "epoch": 0.061314311731674, "grad_norm": 0.3831098973751068, "learning_rate": 9.883353584447145e-05, "loss": 2.2488, "step": 202 }, { "epoch": 0.06161784792836546, "grad_norm": 0.35769203305244446, "learning_rate": 9.882746051032808e-05, "loss": 1.9281, "step": 203 }, { "epoch": 0.06192138412505691, "grad_norm": 0.3576291799545288, "learning_rate": 9.88213851761847e-05, "loss": 1.7082, "step": 204 }, { "epoch": 0.06222492032174837, "grad_norm": 0.3641425669193268, "learning_rate": 9.881530984204131e-05, "loss": 1.9374, "step": 205 }, { "epoch": 0.06252845651843983, "grad_norm": 0.4281562268733978, "learning_rate": 9.880923450789795e-05, "loss": 2.0797, "step": 206 }, { "epoch": 0.06283199271513128, "grad_norm": 0.39947500824928284, "learning_rate": 9.880315917375455e-05, "loss": 2.1399, "step": 207 }, { "epoch": 0.06313552891182274, "grad_norm": 0.4200506806373596, "learning_rate": 9.879708383961118e-05, "loss": 2.0443, "step": 208 }, { "epoch": 0.06343906510851419, "grad_norm": 0.35776716470718384, "learning_rate": 9.879100850546781e-05, "loss": 1.9637, "step": 209 }, { "epoch": 0.06374260130520565, "grad_norm": 0.3676275610923767, "learning_rate": 9.878493317132443e-05, "loss": 2.2018, "step": 210 }, { "epoch": 0.0640461375018971, "grad_norm": 0.48199740052223206, "learning_rate": 9.877885783718105e-05, "loss": 1.8948, "step": 211 }, { "epoch": 0.06434967369858856, "grad_norm": 0.40157443284988403, "learning_rate": 9.877278250303768e-05, "loss": 1.9011, "step": 212 }, { "epoch": 0.06465320989528, "grad_norm": 0.3959876596927643, "learning_rate": 9.87667071688943e-05, "loss": 1.8019, "step": 213 }, { "epoch": 0.06495674609197147, "grad_norm": 0.4266337752342224, "learning_rate": 9.876063183475091e-05, "loss": 1.6282, "step": 214 }, { "epoch": 0.06526028228866293, "grad_norm": 0.5142415165901184, "learning_rate": 9.875455650060754e-05, "loss": 2.014, "step": 215 }, { "epoch": 0.06556381848535438, "grad_norm": 0.3834533095359802, "learning_rate": 9.874848116646416e-05, "loss": 2.2733, "step": 216 }, { "epoch": 0.06586735468204584, "grad_norm": 0.4485650062561035, "learning_rate": 9.874240583232079e-05, "loss": 2.0707, "step": 217 }, { "epoch": 0.06617089087873729, "grad_norm": 0.37866663932800293, "learning_rate": 9.873633049817741e-05, "loss": 2.2419, "step": 218 }, { "epoch": 0.06647442707542875, "grad_norm": 0.5389169454574585, "learning_rate": 9.873025516403402e-05, "loss": 1.9736, "step": 219 }, { "epoch": 0.0667779632721202, "grad_norm": 0.3923830986022949, "learning_rate": 9.872417982989066e-05, "loss": 2.0164, "step": 220 }, { "epoch": 0.06708149946881166, "grad_norm": 0.431417316198349, "learning_rate": 9.871810449574726e-05, "loss": 1.7535, "step": 221 }, { "epoch": 0.0673850356655031, "grad_norm": 0.4980961084365845, "learning_rate": 9.871202916160389e-05, "loss": 2.0751, "step": 222 }, { "epoch": 0.06768857186219457, "grad_norm": 0.38455912470817566, "learning_rate": 9.870595382746052e-05, "loss": 1.9394, "step": 223 }, { "epoch": 0.06799210805888602, "grad_norm": 0.3911600708961487, "learning_rate": 9.869987849331714e-05, "loss": 1.6384, "step": 224 }, { "epoch": 0.06829564425557748, "grad_norm": 0.39567652344703674, "learning_rate": 9.869380315917376e-05, "loss": 2.0082, "step": 225 }, { "epoch": 0.06859918045226893, "grad_norm": 0.3773573935031891, "learning_rate": 9.868772782503039e-05, "loss": 2.0852, "step": 226 }, { "epoch": 0.06890271664896039, "grad_norm": 0.4387274384498596, "learning_rate": 9.8681652490887e-05, "loss": 2.2043, "step": 227 }, { "epoch": 0.06920625284565185, "grad_norm": 0.4070594012737274, "learning_rate": 9.867557715674362e-05, "loss": 1.8638, "step": 228 }, { "epoch": 0.0695097890423433, "grad_norm": 0.5250163674354553, "learning_rate": 9.866950182260025e-05, "loss": 2.0826, "step": 229 }, { "epoch": 0.06981332523903476, "grad_norm": 0.47242820262908936, "learning_rate": 9.866342648845687e-05, "loss": 1.5517, "step": 230 }, { "epoch": 0.07011686143572621, "grad_norm": 0.41242364048957825, "learning_rate": 9.865735115431349e-05, "loss": 1.3791, "step": 231 }, { "epoch": 0.07042039763241767, "grad_norm": 0.4049898087978363, "learning_rate": 9.865127582017012e-05, "loss": 2.0933, "step": 232 }, { "epoch": 0.07072393382910912, "grad_norm": 0.5341041684150696, "learning_rate": 9.864520048602673e-05, "loss": 1.8293, "step": 233 }, { "epoch": 0.07102747002580058, "grad_norm": 0.4930991530418396, "learning_rate": 9.863912515188337e-05, "loss": 2.3447, "step": 234 }, { "epoch": 0.07133100622249203, "grad_norm": 0.752202033996582, "learning_rate": 9.863304981773997e-05, "loss": 1.9733, "step": 235 }, { "epoch": 0.07163454241918349, "grad_norm": 0.3552611768245697, "learning_rate": 9.86269744835966e-05, "loss": 2.0324, "step": 236 }, { "epoch": 0.07193807861587494, "grad_norm": 0.3639819622039795, "learning_rate": 9.862089914945323e-05, "loss": 1.9325, "step": 237 }, { "epoch": 0.0722416148125664, "grad_norm": 0.4028383195400238, "learning_rate": 9.861482381530985e-05, "loss": 1.9652, "step": 238 }, { "epoch": 0.07254515100925786, "grad_norm": 0.3904295563697815, "learning_rate": 9.860874848116647e-05, "loss": 1.8133, "step": 239 }, { "epoch": 0.07284868720594931, "grad_norm": 0.46043211221694946, "learning_rate": 9.86026731470231e-05, "loss": 1.6711, "step": 240 }, { "epoch": 0.07315222340264077, "grad_norm": 0.41137024760246277, "learning_rate": 9.859659781287971e-05, "loss": 2.1129, "step": 241 }, { "epoch": 0.07345575959933222, "grad_norm": 0.3776731491088867, "learning_rate": 9.859052247873633e-05, "loss": 1.5451, "step": 242 }, { "epoch": 0.07375929579602368, "grad_norm": 0.8163847923278809, "learning_rate": 9.858444714459296e-05, "loss": 1.8133, "step": 243 }, { "epoch": 0.07406283199271513, "grad_norm": 0.7984678149223328, "learning_rate": 9.857837181044958e-05, "loss": 1.6879, "step": 244 }, { "epoch": 0.07436636818940659, "grad_norm": 0.3759590983390808, "learning_rate": 9.85722964763062e-05, "loss": 2.0183, "step": 245 }, { "epoch": 0.07466990438609804, "grad_norm": 0.4622940421104431, "learning_rate": 9.856622114216283e-05, "loss": 1.9958, "step": 246 }, { "epoch": 0.0749734405827895, "grad_norm": 0.4710557758808136, "learning_rate": 9.856014580801944e-05, "loss": 1.5483, "step": 247 }, { "epoch": 0.07527697677948095, "grad_norm": 0.3766056001186371, "learning_rate": 9.855407047387608e-05, "loss": 1.8697, "step": 248 }, { "epoch": 0.07558051297617241, "grad_norm": 0.6338986158370972, "learning_rate": 9.854799513973268e-05, "loss": 2.2657, "step": 249 }, { "epoch": 0.07588404917286386, "grad_norm": 0.4152657687664032, "learning_rate": 9.854191980558931e-05, "loss": 1.5967, "step": 250 }, { "epoch": 0.07618758536955532, "grad_norm": 0.37085869908332825, "learning_rate": 9.853584447144594e-05, "loss": 1.915, "step": 251 }, { "epoch": 0.07649112156624678, "grad_norm": 0.40199750661849976, "learning_rate": 9.852976913730256e-05, "loss": 2.0662, "step": 252 }, { "epoch": 0.07679465776293823, "grad_norm": 0.39193621277809143, "learning_rate": 9.852369380315918e-05, "loss": 1.911, "step": 253 }, { "epoch": 0.07709819395962969, "grad_norm": 0.39082249999046326, "learning_rate": 9.85176184690158e-05, "loss": 1.6207, "step": 254 }, { "epoch": 0.07740173015632114, "grad_norm": 0.3943793773651123, "learning_rate": 9.851154313487242e-05, "loss": 2.1254, "step": 255 }, { "epoch": 0.0777052663530126, "grad_norm": 0.34571030735969543, "learning_rate": 9.850546780072904e-05, "loss": 1.9696, "step": 256 }, { "epoch": 0.07800880254970405, "grad_norm": 0.4847205579280853, "learning_rate": 9.849939246658567e-05, "loss": 2.1382, "step": 257 }, { "epoch": 0.07831233874639551, "grad_norm": 0.47491976618766785, "learning_rate": 9.849331713244229e-05, "loss": 2.1109, "step": 258 }, { "epoch": 0.07861587494308696, "grad_norm": 0.3984815776348114, "learning_rate": 9.84872417982989e-05, "loss": 2.0019, "step": 259 }, { "epoch": 0.07891941113977842, "grad_norm": 0.578295886516571, "learning_rate": 9.848116646415554e-05, "loss": 1.6984, "step": 260 }, { "epoch": 0.07922294733646987, "grad_norm": 0.4641231894493103, "learning_rate": 9.847509113001215e-05, "loss": 2.0974, "step": 261 }, { "epoch": 0.07952648353316133, "grad_norm": 0.3704085052013397, "learning_rate": 9.846901579586879e-05, "loss": 1.8907, "step": 262 }, { "epoch": 0.07983001972985279, "grad_norm": 0.40248993039131165, "learning_rate": 9.846294046172539e-05, "loss": 1.9194, "step": 263 }, { "epoch": 0.08013355592654424, "grad_norm": 0.40396660566329956, "learning_rate": 9.845686512758202e-05, "loss": 1.7075, "step": 264 }, { "epoch": 0.0804370921232357, "grad_norm": 0.44500696659088135, "learning_rate": 9.845078979343865e-05, "loss": 1.7463, "step": 265 }, { "epoch": 0.08074062831992715, "grad_norm": 0.3681708574295044, "learning_rate": 9.844471445929527e-05, "loss": 1.7162, "step": 266 }, { "epoch": 0.08104416451661861, "grad_norm": 0.47645455598831177, "learning_rate": 9.843863912515189e-05, "loss": 1.7759, "step": 267 }, { "epoch": 0.08134770071331006, "grad_norm": 0.3663488030433655, "learning_rate": 9.843256379100852e-05, "loss": 1.8687, "step": 268 }, { "epoch": 0.08165123691000152, "grad_norm": 0.33710968494415283, "learning_rate": 9.842648845686513e-05, "loss": 1.9777, "step": 269 }, { "epoch": 0.08195477310669297, "grad_norm": 0.4824034571647644, "learning_rate": 9.842041312272175e-05, "loss": 1.4345, "step": 270 }, { "epoch": 0.08225830930338443, "grad_norm": 0.3703802824020386, "learning_rate": 9.841433778857838e-05, "loss": 1.7835, "step": 271 }, { "epoch": 0.08256184550007588, "grad_norm": 0.4279334545135498, "learning_rate": 9.8408262454435e-05, "loss": 2.1811, "step": 272 }, { "epoch": 0.08286538169676734, "grad_norm": 0.3500446379184723, "learning_rate": 9.840218712029162e-05, "loss": 2.0992, "step": 273 }, { "epoch": 0.08316891789345879, "grad_norm": 0.4278954863548279, "learning_rate": 9.839611178614823e-05, "loss": 1.4691, "step": 274 }, { "epoch": 0.08347245409015025, "grad_norm": 0.6769374012947083, "learning_rate": 9.839003645200486e-05, "loss": 1.8223, "step": 275 }, { "epoch": 0.08377599028684171, "grad_norm": 0.34110525250434875, "learning_rate": 9.83839611178615e-05, "loss": 1.9469, "step": 276 }, { "epoch": 0.08407952648353316, "grad_norm": 0.37355175614356995, "learning_rate": 9.83778857837181e-05, "loss": 1.9281, "step": 277 }, { "epoch": 0.08438306268022462, "grad_norm": 0.3968208134174347, "learning_rate": 9.837181044957473e-05, "loss": 2.0537, "step": 278 }, { "epoch": 0.08468659887691607, "grad_norm": 0.3811870515346527, "learning_rate": 9.836573511543136e-05, "loss": 1.9715, "step": 279 }, { "epoch": 0.08499013507360753, "grad_norm": 0.3258214294910431, "learning_rate": 9.835965978128796e-05, "loss": 1.979, "step": 280 }, { "epoch": 0.08529367127029898, "grad_norm": 0.38593369722366333, "learning_rate": 9.83535844471446e-05, "loss": 2.135, "step": 281 }, { "epoch": 0.08559720746699044, "grad_norm": 0.3811703026294708, "learning_rate": 9.834750911300123e-05, "loss": 2.1786, "step": 282 }, { "epoch": 0.08590074366368189, "grad_norm": 0.3784421980381012, "learning_rate": 9.834143377885784e-05, "loss": 2.0193, "step": 283 }, { "epoch": 0.08620427986037335, "grad_norm": 0.3660358190536499, "learning_rate": 9.833535844471446e-05, "loss": 1.9975, "step": 284 }, { "epoch": 0.0865078160570648, "grad_norm": 0.3747190237045288, "learning_rate": 9.832928311057109e-05, "loss": 1.7897, "step": 285 }, { "epoch": 0.08681135225375626, "grad_norm": 0.37042975425720215, "learning_rate": 9.832320777642771e-05, "loss": 1.9026, "step": 286 }, { "epoch": 0.08711488845044772, "grad_norm": 0.3642013669013977, "learning_rate": 9.831713244228433e-05, "loss": 1.9611, "step": 287 }, { "epoch": 0.08741842464713917, "grad_norm": 0.38183900713920593, "learning_rate": 9.831105710814094e-05, "loss": 1.8648, "step": 288 }, { "epoch": 0.08772196084383063, "grad_norm": 0.4243112802505493, "learning_rate": 9.830498177399757e-05, "loss": 1.734, "step": 289 }, { "epoch": 0.08802549704052208, "grad_norm": 0.3763525187969208, "learning_rate": 9.82989064398542e-05, "loss": 1.8955, "step": 290 }, { "epoch": 0.08832903323721354, "grad_norm": 0.37548086047172546, "learning_rate": 9.829283110571081e-05, "loss": 1.9246, "step": 291 }, { "epoch": 0.08863256943390499, "grad_norm": 0.5070151090621948, "learning_rate": 9.828675577156744e-05, "loss": 1.6474, "step": 292 }, { "epoch": 0.08893610563059645, "grad_norm": 0.3903336822986603, "learning_rate": 9.828068043742407e-05, "loss": 1.5546, "step": 293 }, { "epoch": 0.0892396418272879, "grad_norm": 0.42705482244491577, "learning_rate": 9.827460510328068e-05, "loss": 1.5506, "step": 294 }, { "epoch": 0.08954317802397936, "grad_norm": 0.4342738687992096, "learning_rate": 9.82685297691373e-05, "loss": 1.6173, "step": 295 }, { "epoch": 0.08984671422067081, "grad_norm": 0.3975971043109894, "learning_rate": 9.826245443499394e-05, "loss": 1.9652, "step": 296 }, { "epoch": 0.09015025041736227, "grad_norm": 0.42342832684516907, "learning_rate": 9.825637910085055e-05, "loss": 1.9464, "step": 297 }, { "epoch": 0.09045378661405373, "grad_norm": 0.381565660238266, "learning_rate": 9.825030376670717e-05, "loss": 2.0949, "step": 298 }, { "epoch": 0.09075732281074518, "grad_norm": 0.4632069170475006, "learning_rate": 9.82442284325638e-05, "loss": 1.4451, "step": 299 }, { "epoch": 0.09106085900743664, "grad_norm": 0.36039817333221436, "learning_rate": 9.823815309842042e-05, "loss": 1.2199, "step": 300 }, { "epoch": 0.09136439520412809, "grad_norm": 0.37576648592948914, "learning_rate": 9.823207776427704e-05, "loss": 1.9884, "step": 301 }, { "epoch": 0.09166793140081955, "grad_norm": 0.3673763573169708, "learning_rate": 9.822600243013365e-05, "loss": 1.7103, "step": 302 }, { "epoch": 0.091971467597511, "grad_norm": 0.3729887008666992, "learning_rate": 9.821992709599029e-05, "loss": 1.9215, "step": 303 }, { "epoch": 0.09227500379420246, "grad_norm": 0.3857046365737915, "learning_rate": 9.82138517618469e-05, "loss": 2.1883, "step": 304 }, { "epoch": 0.09257853999089391, "grad_norm": 0.4226963520050049, "learning_rate": 9.820777642770352e-05, "loss": 1.6413, "step": 305 }, { "epoch": 0.09288207618758537, "grad_norm": 0.40093332529067993, "learning_rate": 9.820170109356015e-05, "loss": 1.9897, "step": 306 }, { "epoch": 0.09318561238427682, "grad_norm": 0.4287321865558624, "learning_rate": 9.819562575941678e-05, "loss": 1.8708, "step": 307 }, { "epoch": 0.09348914858096828, "grad_norm": 0.3933330178260803, "learning_rate": 9.818955042527339e-05, "loss": 2.003, "step": 308 }, { "epoch": 0.09379268477765973, "grad_norm": 0.3991425633430481, "learning_rate": 9.818347509113002e-05, "loss": 1.7305, "step": 309 }, { "epoch": 0.09409622097435119, "grad_norm": 0.37534525990486145, "learning_rate": 9.817739975698665e-05, "loss": 1.9767, "step": 310 }, { "epoch": 0.09439975717104265, "grad_norm": 0.4293142557144165, "learning_rate": 9.817132442284326e-05, "loss": 2.1563, "step": 311 }, { "epoch": 0.0947032933677341, "grad_norm": 0.5783388614654541, "learning_rate": 9.816524908869988e-05, "loss": 1.4839, "step": 312 }, { "epoch": 0.09500682956442556, "grad_norm": 0.3414449989795685, "learning_rate": 9.815917375455651e-05, "loss": 1.9974, "step": 313 }, { "epoch": 0.09531036576111701, "grad_norm": 0.3927890956401825, "learning_rate": 9.815309842041313e-05, "loss": 1.9683, "step": 314 }, { "epoch": 0.09561390195780847, "grad_norm": 0.42801201343536377, "learning_rate": 9.814702308626975e-05, "loss": 2.1414, "step": 315 }, { "epoch": 0.09591743815449992, "grad_norm": 0.4715151786804199, "learning_rate": 9.814094775212636e-05, "loss": 2.0055, "step": 316 }, { "epoch": 0.09622097435119138, "grad_norm": 0.42110496759414673, "learning_rate": 9.8134872417983e-05, "loss": 2.0693, "step": 317 }, { "epoch": 0.09652451054788283, "grad_norm": 0.39333397150039673, "learning_rate": 9.812879708383961e-05, "loss": 2.1362, "step": 318 }, { "epoch": 0.09682804674457429, "grad_norm": 0.42686814069747925, "learning_rate": 9.812272174969623e-05, "loss": 1.9485, "step": 319 }, { "epoch": 0.09713158294126574, "grad_norm": 0.38239583373069763, "learning_rate": 9.811664641555286e-05, "loss": 1.3584, "step": 320 }, { "epoch": 0.0974351191379572, "grad_norm": 0.3651975393295288, "learning_rate": 9.811057108140949e-05, "loss": 2.2135, "step": 321 }, { "epoch": 0.09773865533464866, "grad_norm": 0.34531673789024353, "learning_rate": 9.81044957472661e-05, "loss": 1.7106, "step": 322 }, { "epoch": 0.09804219153134011, "grad_norm": 0.38727474212646484, "learning_rate": 9.809842041312273e-05, "loss": 1.8647, "step": 323 }, { "epoch": 0.09834572772803157, "grad_norm": 0.4127596318721771, "learning_rate": 9.809234507897936e-05, "loss": 1.5775, "step": 324 }, { "epoch": 0.09864926392472302, "grad_norm": 0.337333083152771, "learning_rate": 9.808626974483597e-05, "loss": 1.8619, "step": 325 }, { "epoch": 0.09895280012141448, "grad_norm": 0.43906038999557495, "learning_rate": 9.808019441069259e-05, "loss": 2.1753, "step": 326 }, { "epoch": 0.09925633631810593, "grad_norm": 0.4216412901878357, "learning_rate": 9.807411907654922e-05, "loss": 1.8322, "step": 327 }, { "epoch": 0.09955987251479739, "grad_norm": 0.3964472711086273, "learning_rate": 9.806804374240584e-05, "loss": 1.6452, "step": 328 }, { "epoch": 0.09986340871148884, "grad_norm": 0.3590555489063263, "learning_rate": 9.806196840826246e-05, "loss": 1.6471, "step": 329 }, { "epoch": 0.1001669449081803, "grad_norm": 0.3878382742404938, "learning_rate": 9.805589307411907e-05, "loss": 1.7192, "step": 330 }, { "epoch": 0.10047048110487175, "grad_norm": 0.37866318225860596, "learning_rate": 9.80498177399757e-05, "loss": 2.0156, "step": 331 }, { "epoch": 0.10077401730156321, "grad_norm": 0.3977656364440918, "learning_rate": 9.804374240583232e-05, "loss": 1.3686, "step": 332 }, { "epoch": 0.10107755349825466, "grad_norm": 0.39724108576774597, "learning_rate": 9.803766707168894e-05, "loss": 1.6206, "step": 333 }, { "epoch": 0.10138108969494612, "grad_norm": 0.7311023473739624, "learning_rate": 9.803159173754557e-05, "loss": 1.7186, "step": 334 }, { "epoch": 0.10168462589163758, "grad_norm": 0.3953106701374054, "learning_rate": 9.80255164034022e-05, "loss": 1.9674, "step": 335 }, { "epoch": 0.10198816208832903, "grad_norm": 0.4133211076259613, "learning_rate": 9.80194410692588e-05, "loss": 1.9536, "step": 336 }, { "epoch": 0.1022916982850205, "grad_norm": 0.4300665855407715, "learning_rate": 9.801336573511544e-05, "loss": 2.0676, "step": 337 }, { "epoch": 0.10259523448171194, "grad_norm": 0.3569762706756592, "learning_rate": 9.800729040097207e-05, "loss": 2.138, "step": 338 }, { "epoch": 0.1028987706784034, "grad_norm": 0.37851640582084656, "learning_rate": 9.800121506682868e-05, "loss": 1.7479, "step": 339 }, { "epoch": 0.10320230687509485, "grad_norm": 0.3435342013835907, "learning_rate": 9.79951397326853e-05, "loss": 2.0182, "step": 340 }, { "epoch": 0.10350584307178631, "grad_norm": 0.487394779920578, "learning_rate": 9.798906439854193e-05, "loss": 1.8017, "step": 341 }, { "epoch": 0.10380937926847776, "grad_norm": 0.3741822838783264, "learning_rate": 9.798298906439855e-05, "loss": 1.9261, "step": 342 }, { "epoch": 0.10411291546516922, "grad_norm": 0.4044167101383209, "learning_rate": 9.797691373025517e-05, "loss": 1.962, "step": 343 }, { "epoch": 0.10441645166186067, "grad_norm": 0.4507991373538971, "learning_rate": 9.797083839611178e-05, "loss": 1.6664, "step": 344 }, { "epoch": 0.10471998785855213, "grad_norm": 0.41394731402397156, "learning_rate": 9.796476306196842e-05, "loss": 1.7994, "step": 345 }, { "epoch": 0.1050235240552436, "grad_norm": 0.4100608825683594, "learning_rate": 9.795868772782503e-05, "loss": 1.8795, "step": 346 }, { "epoch": 0.10532706025193504, "grad_norm": 0.5010010600090027, "learning_rate": 9.795261239368165e-05, "loss": 1.7712, "step": 347 }, { "epoch": 0.1056305964486265, "grad_norm": 0.3657280206680298, "learning_rate": 9.794653705953828e-05, "loss": 2.0675, "step": 348 }, { "epoch": 0.10593413264531795, "grad_norm": 0.43551188707351685, "learning_rate": 9.794046172539491e-05, "loss": 2.3229, "step": 349 }, { "epoch": 0.10623766884200941, "grad_norm": 0.4035640060901642, "learning_rate": 9.793438639125152e-05, "loss": 1.5348, "step": 350 }, { "epoch": 0.10654120503870086, "grad_norm": 0.38934487104415894, "learning_rate": 9.792831105710815e-05, "loss": 2.0634, "step": 351 }, { "epoch": 0.10684474123539232, "grad_norm": 0.3808942437171936, "learning_rate": 9.792223572296478e-05, "loss": 1.5801, "step": 352 }, { "epoch": 0.10714827743208377, "grad_norm": 0.4263344407081604, "learning_rate": 9.791616038882138e-05, "loss": 2.1149, "step": 353 }, { "epoch": 0.10745181362877523, "grad_norm": 0.40345048904418945, "learning_rate": 9.791008505467801e-05, "loss": 1.6522, "step": 354 }, { "epoch": 0.10775534982546668, "grad_norm": 0.39628833532333374, "learning_rate": 9.790400972053464e-05, "loss": 1.8936, "step": 355 }, { "epoch": 0.10805888602215814, "grad_norm": 0.3981876075267792, "learning_rate": 9.789793438639126e-05, "loss": 1.8532, "step": 356 }, { "epoch": 0.10836242221884959, "grad_norm": 0.41689878702163696, "learning_rate": 9.789185905224788e-05, "loss": 1.811, "step": 357 }, { "epoch": 0.10866595841554105, "grad_norm": 0.3519899845123291, "learning_rate": 9.78857837181045e-05, "loss": 2.1879, "step": 358 }, { "epoch": 0.10896949461223251, "grad_norm": 0.4501926004886627, "learning_rate": 9.787970838396113e-05, "loss": 1.7545, "step": 359 }, { "epoch": 0.10927303080892396, "grad_norm": 0.3503700792789459, "learning_rate": 9.787363304981774e-05, "loss": 1.8818, "step": 360 }, { "epoch": 0.10957656700561542, "grad_norm": 0.3641771674156189, "learning_rate": 9.786755771567436e-05, "loss": 1.9666, "step": 361 }, { "epoch": 0.10988010320230687, "grad_norm": 0.41548213362693787, "learning_rate": 9.786148238153099e-05, "loss": 1.8781, "step": 362 }, { "epoch": 0.11018363939899833, "grad_norm": 0.40850362181663513, "learning_rate": 9.785540704738762e-05, "loss": 1.8369, "step": 363 }, { "epoch": 0.11048717559568978, "grad_norm": 0.3874415159225464, "learning_rate": 9.784933171324423e-05, "loss": 1.6867, "step": 364 }, { "epoch": 0.11079071179238124, "grad_norm": 0.40616413950920105, "learning_rate": 9.784325637910086e-05, "loss": 1.7234, "step": 365 }, { "epoch": 0.11109424798907269, "grad_norm": 0.7947202920913696, "learning_rate": 9.783718104495749e-05, "loss": 1.5327, "step": 366 }, { "epoch": 0.11139778418576415, "grad_norm": 0.3792203664779663, "learning_rate": 9.783110571081409e-05, "loss": 1.5403, "step": 367 }, { "epoch": 0.1117013203824556, "grad_norm": 0.4576598107814789, "learning_rate": 9.782503037667072e-05, "loss": 2.2472, "step": 368 }, { "epoch": 0.11200485657914706, "grad_norm": 0.37935471534729004, "learning_rate": 9.781895504252734e-05, "loss": 1.8421, "step": 369 }, { "epoch": 0.11230839277583853, "grad_norm": 0.3658997416496277, "learning_rate": 9.781287970838397e-05, "loss": 1.6879, "step": 370 }, { "epoch": 0.11261192897252997, "grad_norm": 0.3936321437358856, "learning_rate": 9.780680437424059e-05, "loss": 2.0365, "step": 371 }, { "epoch": 0.11291546516922144, "grad_norm": 0.3935524821281433, "learning_rate": 9.78007290400972e-05, "loss": 1.8648, "step": 372 }, { "epoch": 0.11321900136591288, "grad_norm": 0.3798617422580719, "learning_rate": 9.779465370595384e-05, "loss": 2.0528, "step": 373 }, { "epoch": 0.11352253756260434, "grad_norm": 0.38386639952659607, "learning_rate": 9.778857837181045e-05, "loss": 1.5629, "step": 374 }, { "epoch": 0.11382607375929579, "grad_norm": 0.4665718674659729, "learning_rate": 9.778250303766707e-05, "loss": 1.6207, "step": 375 }, { "epoch": 0.11412960995598725, "grad_norm": 0.35728296637535095, "learning_rate": 9.77764277035237e-05, "loss": 1.9548, "step": 376 }, { "epoch": 0.1144331461526787, "grad_norm": 0.3415043354034424, "learning_rate": 9.777035236938032e-05, "loss": 2.0376, "step": 377 }, { "epoch": 0.11473668234937016, "grad_norm": 0.38225334882736206, "learning_rate": 9.776427703523694e-05, "loss": 1.7175, "step": 378 }, { "epoch": 0.11504021854606161, "grad_norm": 0.3931468427181244, "learning_rate": 9.775820170109357e-05, "loss": 2.1832, "step": 379 }, { "epoch": 0.11534375474275307, "grad_norm": 0.3954283595085144, "learning_rate": 9.77521263669502e-05, "loss": 2.1863, "step": 380 }, { "epoch": 0.11564729093944452, "grad_norm": 0.31073784828186035, "learning_rate": 9.77460510328068e-05, "loss": 1.8583, "step": 381 }, { "epoch": 0.11595082713613598, "grad_norm": 0.37894561886787415, "learning_rate": 9.773997569866343e-05, "loss": 2.0385, "step": 382 }, { "epoch": 0.11625436333282745, "grad_norm": 0.3493829369544983, "learning_rate": 9.773390036452005e-05, "loss": 1.8854, "step": 383 }, { "epoch": 0.1165578995295189, "grad_norm": 0.5518279075622559, "learning_rate": 9.772782503037668e-05, "loss": 1.7403, "step": 384 }, { "epoch": 0.11686143572621036, "grad_norm": 0.3724190294742584, "learning_rate": 9.77217496962333e-05, "loss": 1.7759, "step": 385 }, { "epoch": 0.1171649719229018, "grad_norm": 0.4635847508907318, "learning_rate": 9.771567436208991e-05, "loss": 1.8441, "step": 386 }, { "epoch": 0.11746850811959327, "grad_norm": 0.38281580805778503, "learning_rate": 9.770959902794655e-05, "loss": 2.0332, "step": 387 }, { "epoch": 0.11777204431628471, "grad_norm": 0.4179950952529907, "learning_rate": 9.770352369380316e-05, "loss": 2.3451, "step": 388 }, { "epoch": 0.11807558051297617, "grad_norm": 0.34729671478271484, "learning_rate": 9.769744835965978e-05, "loss": 1.9186, "step": 389 }, { "epoch": 0.11837911670966762, "grad_norm": 0.40492531657218933, "learning_rate": 9.769137302551641e-05, "loss": 2.1711, "step": 390 }, { "epoch": 0.11868265290635908, "grad_norm": 0.38143807649612427, "learning_rate": 9.768529769137303e-05, "loss": 1.7601, "step": 391 }, { "epoch": 0.11898618910305053, "grad_norm": 0.35463643074035645, "learning_rate": 9.767922235722965e-05, "loss": 1.8068, "step": 392 }, { "epoch": 0.119289725299742, "grad_norm": 0.3719339370727539, "learning_rate": 9.767314702308628e-05, "loss": 1.6296, "step": 393 }, { "epoch": 0.11959326149643346, "grad_norm": 0.8585293292999268, "learning_rate": 9.766707168894291e-05, "loss": 1.8841, "step": 394 }, { "epoch": 0.1198967976931249, "grad_norm": 0.5286839604377747, "learning_rate": 9.766099635479951e-05, "loss": 1.3645, "step": 395 }, { "epoch": 0.12020033388981637, "grad_norm": 0.44176310300827026, "learning_rate": 9.765492102065614e-05, "loss": 2.1759, "step": 396 }, { "epoch": 0.12050387008650781, "grad_norm": 0.39778321981430054, "learning_rate": 9.764884568651276e-05, "loss": 1.8344, "step": 397 }, { "epoch": 0.12080740628319928, "grad_norm": 0.4364762604236603, "learning_rate": 9.764277035236939e-05, "loss": 1.7834, "step": 398 }, { "epoch": 0.12111094247989072, "grad_norm": 0.37305301427841187, "learning_rate": 9.763669501822601e-05, "loss": 1.7474, "step": 399 }, { "epoch": 0.12141447867658219, "grad_norm": 0.38804179430007935, "learning_rate": 9.763061968408262e-05, "loss": 1.6241, "step": 400 }, { "epoch": 0.12171801487327363, "grad_norm": 0.9124923944473267, "learning_rate": 9.762454434993926e-05, "loss": 2.1456, "step": 401 }, { "epoch": 0.1220215510699651, "grad_norm": 0.38728946447372437, "learning_rate": 9.761846901579587e-05, "loss": 1.9724, "step": 402 }, { "epoch": 0.12232508726665654, "grad_norm": 0.4121726155281067, "learning_rate": 9.761239368165249e-05, "loss": 2.1119, "step": 403 }, { "epoch": 0.122628623463348, "grad_norm": 0.46508121490478516, "learning_rate": 9.760631834750912e-05, "loss": 1.4402, "step": 404 }, { "epoch": 0.12293215966003945, "grad_norm": 0.4460875391960144, "learning_rate": 9.760024301336574e-05, "loss": 2.0572, "step": 405 }, { "epoch": 0.12323569585673091, "grad_norm": 0.38444089889526367, "learning_rate": 9.759416767922236e-05, "loss": 1.9943, "step": 406 }, { "epoch": 0.12353923205342238, "grad_norm": 0.3515356779098511, "learning_rate": 9.758809234507899e-05, "loss": 1.5699, "step": 407 }, { "epoch": 0.12384276825011382, "grad_norm": 0.4010019302368164, "learning_rate": 9.758201701093562e-05, "loss": 1.8674, "step": 408 }, { "epoch": 0.12414630444680529, "grad_norm": 0.4250737428665161, "learning_rate": 9.757594167679222e-05, "loss": 1.0738, "step": 409 }, { "epoch": 0.12444984064349673, "grad_norm": 0.3719541132450104, "learning_rate": 9.756986634264885e-05, "loss": 1.7292, "step": 410 }, { "epoch": 0.1247533768401882, "grad_norm": 0.385420024394989, "learning_rate": 9.756379100850547e-05, "loss": 1.9503, "step": 411 }, { "epoch": 0.12505691303687966, "grad_norm": 0.480056494474411, "learning_rate": 9.75577156743621e-05, "loss": 2.0093, "step": 412 }, { "epoch": 0.1253604492335711, "grad_norm": 0.38757050037384033, "learning_rate": 9.755164034021872e-05, "loss": 2.2636, "step": 413 }, { "epoch": 0.12566398543026255, "grad_norm": 0.3712436854839325, "learning_rate": 9.754556500607533e-05, "loss": 2.0479, "step": 414 }, { "epoch": 0.12596752162695402, "grad_norm": 2.4313645362854004, "learning_rate": 9.753948967193197e-05, "loss": 2.376, "step": 415 }, { "epoch": 0.12627105782364548, "grad_norm": 0.8028842210769653, "learning_rate": 9.753341433778858e-05, "loss": 1.7702, "step": 416 }, { "epoch": 0.1265745940203369, "grad_norm": 0.48586025834083557, "learning_rate": 9.75273390036452e-05, "loss": 1.7623, "step": 417 }, { "epoch": 0.12687813021702837, "grad_norm": 0.4017583131790161, "learning_rate": 9.752126366950183e-05, "loss": 1.8756, "step": 418 }, { "epoch": 0.12718166641371983, "grad_norm": 0.3845275044441223, "learning_rate": 9.751518833535845e-05, "loss": 1.95, "step": 419 }, { "epoch": 0.1274852026104113, "grad_norm": 0.41064974665641785, "learning_rate": 9.750911300121507e-05, "loss": 1.9622, "step": 420 }, { "epoch": 0.12778873880710276, "grad_norm": 0.33571726083755493, "learning_rate": 9.75030376670717e-05, "loss": 1.7291, "step": 421 }, { "epoch": 0.1280922750037942, "grad_norm": 0.3988417387008667, "learning_rate": 9.749696233292833e-05, "loss": 1.9349, "step": 422 }, { "epoch": 0.12839581120048565, "grad_norm": 0.37586870789527893, "learning_rate": 9.749088699878493e-05, "loss": 1.5649, "step": 423 }, { "epoch": 0.12869934739717712, "grad_norm": 0.37013643980026245, "learning_rate": 9.748481166464156e-05, "loss": 1.9448, "step": 424 }, { "epoch": 0.12900288359386858, "grad_norm": 0.31406837701797485, "learning_rate": 9.747873633049818e-05, "loss": 1.8496, "step": 425 }, { "epoch": 0.12930641979056, "grad_norm": 0.3691607415676117, "learning_rate": 9.74726609963548e-05, "loss": 2.0649, "step": 426 }, { "epoch": 0.12960995598725147, "grad_norm": 0.39633169770240784, "learning_rate": 9.746658566221143e-05, "loss": 1.727, "step": 427 }, { "epoch": 0.12991349218394294, "grad_norm": 0.47319236397743225, "learning_rate": 9.746051032806804e-05, "loss": 2.4487, "step": 428 }, { "epoch": 0.1302170283806344, "grad_norm": 0.35506609082221985, "learning_rate": 9.745443499392468e-05, "loss": 2.0857, "step": 429 }, { "epoch": 0.13052056457732586, "grad_norm": 0.38134855031967163, "learning_rate": 9.744835965978129e-05, "loss": 1.5862, "step": 430 }, { "epoch": 0.1308241007740173, "grad_norm": 0.6288440227508545, "learning_rate": 9.744228432563791e-05, "loss": 2.2402, "step": 431 }, { "epoch": 0.13112763697070876, "grad_norm": 0.42172396183013916, "learning_rate": 9.743620899149454e-05, "loss": 1.7245, "step": 432 }, { "epoch": 0.13143117316740022, "grad_norm": 0.38452383875846863, "learning_rate": 9.743013365735116e-05, "loss": 1.6666, "step": 433 }, { "epoch": 0.13173470936409168, "grad_norm": 2.0956268310546875, "learning_rate": 9.742405832320778e-05, "loss": 1.8268, "step": 434 }, { "epoch": 0.1320382455607831, "grad_norm": 0.4363501965999603, "learning_rate": 9.74179829890644e-05, "loss": 2.3149, "step": 435 }, { "epoch": 0.13234178175747457, "grad_norm": 0.38243743777275085, "learning_rate": 9.741190765492104e-05, "loss": 1.9706, "step": 436 }, { "epoch": 0.13264531795416604, "grad_norm": 0.38724249601364136, "learning_rate": 9.740583232077764e-05, "loss": 1.779, "step": 437 }, { "epoch": 0.1329488541508575, "grad_norm": 0.43606194853782654, "learning_rate": 9.739975698663427e-05, "loss": 2.0371, "step": 438 }, { "epoch": 0.13325239034754893, "grad_norm": 0.3511301279067993, "learning_rate": 9.739368165249089e-05, "loss": 1.8771, "step": 439 }, { "epoch": 0.1335559265442404, "grad_norm": 0.3883466124534607, "learning_rate": 9.73876063183475e-05, "loss": 2.1165, "step": 440 }, { "epoch": 0.13385946274093186, "grad_norm": 0.41711342334747314, "learning_rate": 9.738153098420414e-05, "loss": 1.8367, "step": 441 }, { "epoch": 0.13416299893762332, "grad_norm": 0.6146459579467773, "learning_rate": 9.737545565006075e-05, "loss": 1.9233, "step": 442 }, { "epoch": 0.13446653513431478, "grad_norm": 0.37016820907592773, "learning_rate": 9.736938031591739e-05, "loss": 1.8804, "step": 443 }, { "epoch": 0.1347700713310062, "grad_norm": 0.3620823323726654, "learning_rate": 9.7363304981774e-05, "loss": 1.5837, "step": 444 }, { "epoch": 0.13507360752769768, "grad_norm": 0.37375590205192566, "learning_rate": 9.735722964763062e-05, "loss": 1.8095, "step": 445 }, { "epoch": 0.13537714372438914, "grad_norm": 0.6026252508163452, "learning_rate": 9.735115431348725e-05, "loss": 2.041, "step": 446 }, { "epoch": 0.1356806799210806, "grad_norm": 0.3753829002380371, "learning_rate": 9.734507897934387e-05, "loss": 2.0627, "step": 447 }, { "epoch": 0.13598421611777203, "grad_norm": 0.3974304795265198, "learning_rate": 9.733900364520049e-05, "loss": 2.1671, "step": 448 }, { "epoch": 0.1362877523144635, "grad_norm": 0.34336552023887634, "learning_rate": 9.733292831105712e-05, "loss": 1.9118, "step": 449 }, { "epoch": 0.13659128851115496, "grad_norm": 0.3588969111442566, "learning_rate": 9.732685297691373e-05, "loss": 1.9768, "step": 450 }, { "epoch": 0.13689482470784642, "grad_norm": 0.38693130016326904, "learning_rate": 9.732077764277035e-05, "loss": 2.1524, "step": 451 }, { "epoch": 0.13719836090453785, "grad_norm": 0.370612233877182, "learning_rate": 9.731470230862698e-05, "loss": 1.8753, "step": 452 }, { "epoch": 0.13750189710122931, "grad_norm": 0.4038615822792053, "learning_rate": 9.73086269744836e-05, "loss": 2.1024, "step": 453 }, { "epoch": 0.13780543329792078, "grad_norm": 0.3728694021701813, "learning_rate": 9.730255164034022e-05, "loss": 1.8864, "step": 454 }, { "epoch": 0.13810896949461224, "grad_norm": 0.37269484996795654, "learning_rate": 9.729647630619685e-05, "loss": 1.8244, "step": 455 }, { "epoch": 0.1384125056913037, "grad_norm": 0.39840301871299744, "learning_rate": 9.729040097205346e-05, "loss": 1.8215, "step": 456 }, { "epoch": 0.13871604188799513, "grad_norm": 0.39856579899787903, "learning_rate": 9.72843256379101e-05, "loss": 1.9475, "step": 457 }, { "epoch": 0.1390195780846866, "grad_norm": 0.43041157722473145, "learning_rate": 9.727825030376671e-05, "loss": 1.87, "step": 458 }, { "epoch": 0.13932311428137806, "grad_norm": 0.4047417640686035, "learning_rate": 9.727217496962333e-05, "loss": 2.1425, "step": 459 }, { "epoch": 0.13962665047806952, "grad_norm": 0.3901901841163635, "learning_rate": 9.726609963547996e-05, "loss": 1.917, "step": 460 }, { "epoch": 0.13993018667476095, "grad_norm": 0.40706855058670044, "learning_rate": 9.726002430133658e-05, "loss": 2.1278, "step": 461 }, { "epoch": 0.14023372287145242, "grad_norm": 0.47956356406211853, "learning_rate": 9.72539489671932e-05, "loss": 1.2435, "step": 462 }, { "epoch": 0.14053725906814388, "grad_norm": 0.35697320103645325, "learning_rate": 9.724787363304983e-05, "loss": 1.7965, "step": 463 }, { "epoch": 0.14084079526483534, "grad_norm": 0.4102901518344879, "learning_rate": 9.724179829890644e-05, "loss": 1.7132, "step": 464 }, { "epoch": 0.1411443314615268, "grad_norm": 0.3353058099746704, "learning_rate": 9.723572296476306e-05, "loss": 1.674, "step": 465 }, { "epoch": 0.14144786765821823, "grad_norm": 0.3946186900138855, "learning_rate": 9.722964763061969e-05, "loss": 2.0067, "step": 466 }, { "epoch": 0.1417514038549097, "grad_norm": 0.3974400460720062, "learning_rate": 9.722357229647631e-05, "loss": 1.7242, "step": 467 }, { "epoch": 0.14205494005160116, "grad_norm": 1.3334546089172363, "learning_rate": 9.721749696233293e-05, "loss": 2.0141, "step": 468 }, { "epoch": 0.14235847624829262, "grad_norm": 0.36386749148368835, "learning_rate": 9.721142162818956e-05, "loss": 1.8221, "step": 469 }, { "epoch": 0.14266201244498405, "grad_norm": 0.35332655906677246, "learning_rate": 9.720534629404617e-05, "loss": 1.4251, "step": 470 }, { "epoch": 0.14296554864167552, "grad_norm": 0.563428521156311, "learning_rate": 9.71992709599028e-05, "loss": 1.8633, "step": 471 }, { "epoch": 0.14326908483836698, "grad_norm": 0.39971691370010376, "learning_rate": 9.719319562575942e-05, "loss": 2.0397, "step": 472 }, { "epoch": 0.14357262103505844, "grad_norm": 0.39253416657447815, "learning_rate": 9.718712029161604e-05, "loss": 1.3104, "step": 473 }, { "epoch": 0.14387615723174987, "grad_norm": 0.41245678067207336, "learning_rate": 9.718104495747267e-05, "loss": 1.8821, "step": 474 }, { "epoch": 0.14417969342844134, "grad_norm": 0.5424125790596008, "learning_rate": 9.717496962332929e-05, "loss": 1.7739, "step": 475 }, { "epoch": 0.1444832296251328, "grad_norm": 0.425329327583313, "learning_rate": 9.71688942891859e-05, "loss": 1.8047, "step": 476 }, { "epoch": 0.14478676582182426, "grad_norm": 0.43624332547187805, "learning_rate": 9.716281895504254e-05, "loss": 1.5254, "step": 477 }, { "epoch": 0.14509030201851572, "grad_norm": 0.4078616201877594, "learning_rate": 9.715674362089915e-05, "loss": 1.8286, "step": 478 }, { "epoch": 0.14539383821520716, "grad_norm": 0.4144497513771057, "learning_rate": 9.715066828675577e-05, "loss": 2.0737, "step": 479 }, { "epoch": 0.14569737441189862, "grad_norm": 0.37600383162498474, "learning_rate": 9.71445929526124e-05, "loss": 2.2512, "step": 480 }, { "epoch": 0.14600091060859008, "grad_norm": 0.376644492149353, "learning_rate": 9.713851761846902e-05, "loss": 2.0374, "step": 481 }, { "epoch": 0.14630444680528154, "grad_norm": 1.3389711380004883, "learning_rate": 9.713244228432564e-05, "loss": 1.7461, "step": 482 }, { "epoch": 0.14660798300197297, "grad_norm": 0.32981812953948975, "learning_rate": 9.712636695018227e-05, "loss": 1.8383, "step": 483 }, { "epoch": 0.14691151919866444, "grad_norm": 0.3440997004508972, "learning_rate": 9.712029161603888e-05, "loss": 1.6228, "step": 484 }, { "epoch": 0.1472150553953559, "grad_norm": 0.3392031788825989, "learning_rate": 9.711421628189552e-05, "loss": 1.9423, "step": 485 }, { "epoch": 0.14751859159204736, "grad_norm": 0.37523385882377625, "learning_rate": 9.710814094775213e-05, "loss": 2.1037, "step": 486 }, { "epoch": 0.1478221277887388, "grad_norm": 0.36372002959251404, "learning_rate": 9.710206561360875e-05, "loss": 1.863, "step": 487 }, { "epoch": 0.14812566398543026, "grad_norm": 0.3782525360584259, "learning_rate": 9.709599027946538e-05, "loss": 2.1418, "step": 488 }, { "epoch": 0.14842920018212172, "grad_norm": 0.7462687492370605, "learning_rate": 9.7089914945322e-05, "loss": 1.8172, "step": 489 }, { "epoch": 0.14873273637881318, "grad_norm": 0.3471963107585907, "learning_rate": 9.708383961117862e-05, "loss": 2.1015, "step": 490 }, { "epoch": 0.14903627257550464, "grad_norm": 0.5325261950492859, "learning_rate": 9.707776427703525e-05, "loss": 2.05, "step": 491 }, { "epoch": 0.14933980877219608, "grad_norm": 0.5748963952064514, "learning_rate": 9.707168894289186e-05, "loss": 2.1217, "step": 492 }, { "epoch": 0.14964334496888754, "grad_norm": 0.3891385495662689, "learning_rate": 9.706561360874848e-05, "loss": 1.5301, "step": 493 }, { "epoch": 0.149946881165579, "grad_norm": 0.48258477449417114, "learning_rate": 9.705953827460511e-05, "loss": 1.839, "step": 494 }, { "epoch": 0.15025041736227046, "grad_norm": 0.5301745533943176, "learning_rate": 9.705346294046173e-05, "loss": 1.7934, "step": 495 }, { "epoch": 0.1505539535589619, "grad_norm": 0.3614468276500702, "learning_rate": 9.704738760631835e-05, "loss": 1.7176, "step": 496 }, { "epoch": 0.15085748975565336, "grad_norm": 0.31026577949523926, "learning_rate": 9.704131227217498e-05, "loss": 1.925, "step": 497 }, { "epoch": 0.15116102595234482, "grad_norm": 0.37441205978393555, "learning_rate": 9.70352369380316e-05, "loss": 2.1532, "step": 498 }, { "epoch": 0.15146456214903628, "grad_norm": 0.37447264790534973, "learning_rate": 9.702916160388823e-05, "loss": 2.0351, "step": 499 }, { "epoch": 0.15176809834572771, "grad_norm": 0.7793715000152588, "learning_rate": 9.702308626974484e-05, "loss": 2.1011, "step": 500 }, { "epoch": 0.15207163454241918, "grad_norm": 0.3725285232067108, "learning_rate": 9.701701093560146e-05, "loss": 2.1641, "step": 501 }, { "epoch": 0.15237517073911064, "grad_norm": 0.35334041714668274, "learning_rate": 9.701093560145809e-05, "loss": 2.0773, "step": 502 }, { "epoch": 0.1526787069358021, "grad_norm": 0.3819803297519684, "learning_rate": 9.700486026731471e-05, "loss": 1.836, "step": 503 }, { "epoch": 0.15298224313249356, "grad_norm": 0.403060644865036, "learning_rate": 9.699878493317133e-05, "loss": 2.1178, "step": 504 }, { "epoch": 0.153285779329185, "grad_norm": 0.2956171929836273, "learning_rate": 9.699270959902796e-05, "loss": 1.6397, "step": 505 }, { "epoch": 0.15358931552587646, "grad_norm": 0.30349212884902954, "learning_rate": 9.698663426488457e-05, "loss": 1.6435, "step": 506 }, { "epoch": 0.15389285172256792, "grad_norm": 0.38176605105400085, "learning_rate": 9.698055893074119e-05, "loss": 1.8004, "step": 507 }, { "epoch": 0.15419638791925938, "grad_norm": 0.5072764158248901, "learning_rate": 9.697448359659782e-05, "loss": 1.6926, "step": 508 }, { "epoch": 0.15449992411595082, "grad_norm": 0.5380321145057678, "learning_rate": 9.696840826245444e-05, "loss": 2.2396, "step": 509 }, { "epoch": 0.15480346031264228, "grad_norm": 0.40872499346733093, "learning_rate": 9.696233292831106e-05, "loss": 1.8645, "step": 510 }, { "epoch": 0.15510699650933374, "grad_norm": 0.9282563924789429, "learning_rate": 9.695625759416769e-05, "loss": 1.7143, "step": 511 }, { "epoch": 0.1554105327060252, "grad_norm": 0.7355011105537415, "learning_rate": 9.69501822600243e-05, "loss": 1.7461, "step": 512 }, { "epoch": 0.15571406890271666, "grad_norm": 0.9056992530822754, "learning_rate": 9.694410692588092e-05, "loss": 2.1454, "step": 513 }, { "epoch": 0.1560176050994081, "grad_norm": 0.38970059156417847, "learning_rate": 9.693803159173755e-05, "loss": 2.3313, "step": 514 }, { "epoch": 0.15632114129609956, "grad_norm": 0.3651840090751648, "learning_rate": 9.693195625759417e-05, "loss": 1.84, "step": 515 }, { "epoch": 0.15662467749279102, "grad_norm": 0.38748839497566223, "learning_rate": 9.69258809234508e-05, "loss": 1.9666, "step": 516 }, { "epoch": 0.15692821368948248, "grad_norm": 0.407427042722702, "learning_rate": 9.691980558930742e-05, "loss": 1.9351, "step": 517 }, { "epoch": 0.15723174988617392, "grad_norm": 0.31920358538627625, "learning_rate": 9.691373025516404e-05, "loss": 1.7928, "step": 518 }, { "epoch": 0.15753528608286538, "grad_norm": 0.4002731442451477, "learning_rate": 9.690765492102067e-05, "loss": 1.4087, "step": 519 }, { "epoch": 0.15783882227955684, "grad_norm": 0.42125266790390015, "learning_rate": 9.690157958687728e-05, "loss": 1.6185, "step": 520 }, { "epoch": 0.1581423584762483, "grad_norm": 0.3706381022930145, "learning_rate": 9.68955042527339e-05, "loss": 2.0546, "step": 521 }, { "epoch": 0.15844589467293974, "grad_norm": 0.41669219732284546, "learning_rate": 9.688942891859053e-05, "loss": 2.0399, "step": 522 }, { "epoch": 0.1587494308696312, "grad_norm": 0.36784589290618896, "learning_rate": 9.688335358444715e-05, "loss": 1.8182, "step": 523 }, { "epoch": 0.15905296706632266, "grad_norm": 0.3830098807811737, "learning_rate": 9.687727825030377e-05, "loss": 2.0728, "step": 524 }, { "epoch": 0.15935650326301412, "grad_norm": 0.37658411264419556, "learning_rate": 9.68712029161604e-05, "loss": 1.9915, "step": 525 }, { "epoch": 0.15966003945970558, "grad_norm": 0.375053733587265, "learning_rate": 9.686512758201701e-05, "loss": 1.7589, "step": 526 }, { "epoch": 0.15996357565639702, "grad_norm": 0.3810443580150604, "learning_rate": 9.685905224787363e-05, "loss": 1.6921, "step": 527 }, { "epoch": 0.16026711185308848, "grad_norm": 0.41676437854766846, "learning_rate": 9.685297691373026e-05, "loss": 1.1885, "step": 528 }, { "epoch": 0.16057064804977994, "grad_norm": 0.40823522210121155, "learning_rate": 9.684690157958688e-05, "loss": 1.8335, "step": 529 }, { "epoch": 0.1608741842464714, "grad_norm": 0.3795296251773834, "learning_rate": 9.684082624544351e-05, "loss": 1.9209, "step": 530 }, { "epoch": 0.16117772044316284, "grad_norm": 0.41227850317955017, "learning_rate": 9.683475091130013e-05, "loss": 1.7791, "step": 531 }, { "epoch": 0.1614812566398543, "grad_norm": 0.436483234167099, "learning_rate": 9.682867557715675e-05, "loss": 1.8648, "step": 532 }, { "epoch": 0.16178479283654576, "grad_norm": 0.43618106842041016, "learning_rate": 9.682260024301338e-05, "loss": 1.7593, "step": 533 }, { "epoch": 0.16208832903323722, "grad_norm": 0.37166401743888855, "learning_rate": 9.681652490887e-05, "loss": 1.9301, "step": 534 }, { "epoch": 0.16239186522992866, "grad_norm": 0.5716313123703003, "learning_rate": 9.681044957472661e-05, "loss": 2.1892, "step": 535 }, { "epoch": 0.16269540142662012, "grad_norm": 0.4008532166481018, "learning_rate": 9.680437424058324e-05, "loss": 1.8818, "step": 536 }, { "epoch": 0.16299893762331158, "grad_norm": 0.42276448011398315, "learning_rate": 9.679829890643986e-05, "loss": 1.4488, "step": 537 }, { "epoch": 0.16330247382000304, "grad_norm": 0.343649685382843, "learning_rate": 9.679222357229648e-05, "loss": 1.9549, "step": 538 }, { "epoch": 0.1636060100166945, "grad_norm": 0.3381790518760681, "learning_rate": 9.678614823815311e-05, "loss": 1.9949, "step": 539 }, { "epoch": 0.16390954621338594, "grad_norm": 0.3788328468799591, "learning_rate": 9.678007290400973e-05, "loss": 2.0377, "step": 540 }, { "epoch": 0.1642130824100774, "grad_norm": 0.39532333612442017, "learning_rate": 9.677399756986634e-05, "loss": 1.8837, "step": 541 }, { "epoch": 0.16451661860676886, "grad_norm": 0.36701199412345886, "learning_rate": 9.676792223572297e-05, "loss": 1.9046, "step": 542 }, { "epoch": 0.16482015480346032, "grad_norm": 0.4146950840950012, "learning_rate": 9.676184690157959e-05, "loss": 1.8369, "step": 543 }, { "epoch": 0.16512369100015176, "grad_norm": 0.34827515482902527, "learning_rate": 9.675577156743622e-05, "loss": 2.0196, "step": 544 }, { "epoch": 0.16542722719684322, "grad_norm": 0.36529168486595154, "learning_rate": 9.674969623329283e-05, "loss": 1.9189, "step": 545 }, { "epoch": 0.16573076339353468, "grad_norm": 0.3718273639678955, "learning_rate": 9.674362089914946e-05, "loss": 1.933, "step": 546 }, { "epoch": 0.16603429959022614, "grad_norm": 0.3853289484977722, "learning_rate": 9.673754556500609e-05, "loss": 1.9796, "step": 547 }, { "epoch": 0.16633783578691758, "grad_norm": 0.38597023487091064, "learning_rate": 9.67314702308627e-05, "loss": 1.5167, "step": 548 }, { "epoch": 0.16664137198360904, "grad_norm": 0.3792777955532074, "learning_rate": 9.672539489671932e-05, "loss": 1.9285, "step": 549 }, { "epoch": 0.1669449081803005, "grad_norm": 0.3506297171115875, "learning_rate": 9.671931956257595e-05, "loss": 1.8449, "step": 550 }, { "epoch": 0.16724844437699196, "grad_norm": 0.3851594030857086, "learning_rate": 9.671324422843257e-05, "loss": 1.9774, "step": 551 }, { "epoch": 0.16755198057368342, "grad_norm": 0.3438011705875397, "learning_rate": 9.670716889428919e-05, "loss": 2.0849, "step": 552 }, { "epoch": 0.16785551677037486, "grad_norm": 0.3682856559753418, "learning_rate": 9.670109356014582e-05, "loss": 2.09, "step": 553 }, { "epoch": 0.16815905296706632, "grad_norm": 0.5098361372947693, "learning_rate": 9.669501822600244e-05, "loss": 1.7843, "step": 554 }, { "epoch": 0.16846258916375778, "grad_norm": 0.3482840359210968, "learning_rate": 9.668894289185905e-05, "loss": 1.5129, "step": 555 }, { "epoch": 0.16876612536044924, "grad_norm": 0.3557680547237396, "learning_rate": 9.668286755771568e-05, "loss": 2.1033, "step": 556 }, { "epoch": 0.16906966155714068, "grad_norm": 0.3622763752937317, "learning_rate": 9.66767922235723e-05, "loss": 1.7729, "step": 557 }, { "epoch": 0.16937319775383214, "grad_norm": 0.3461545407772064, "learning_rate": 9.667071688942893e-05, "loss": 1.7999, "step": 558 }, { "epoch": 0.1696767339505236, "grad_norm": 0.39597707986831665, "learning_rate": 9.666464155528554e-05, "loss": 1.8805, "step": 559 }, { "epoch": 0.16998027014721506, "grad_norm": 0.39985769987106323, "learning_rate": 9.665856622114217e-05, "loss": 1.8858, "step": 560 }, { "epoch": 0.17028380634390652, "grad_norm": 0.352029025554657, "learning_rate": 9.66524908869988e-05, "loss": 1.8077, "step": 561 }, { "epoch": 0.17058734254059796, "grad_norm": 0.5778902173042297, "learning_rate": 9.66464155528554e-05, "loss": 1.7001, "step": 562 }, { "epoch": 0.17089087873728942, "grad_norm": 0.49807438254356384, "learning_rate": 9.664034021871203e-05, "loss": 1.7536, "step": 563 }, { "epoch": 0.17119441493398088, "grad_norm": 0.6479670405387878, "learning_rate": 9.663426488456866e-05, "loss": 1.7827, "step": 564 }, { "epoch": 0.17149795113067234, "grad_norm": 0.4150646924972534, "learning_rate": 9.662818955042528e-05, "loss": 2.093, "step": 565 }, { "epoch": 0.17180148732736378, "grad_norm": 0.4018631875514984, "learning_rate": 9.66221142162819e-05, "loss": 1.9505, "step": 566 }, { "epoch": 0.17210502352405524, "grad_norm": 0.34705424308776855, "learning_rate": 9.661603888213853e-05, "loss": 2.0355, "step": 567 }, { "epoch": 0.1724085597207467, "grad_norm": 0.40585950016975403, "learning_rate": 9.660996354799515e-05, "loss": 1.5372, "step": 568 }, { "epoch": 0.17271209591743816, "grad_norm": 0.31652507185935974, "learning_rate": 9.660388821385176e-05, "loss": 1.6156, "step": 569 }, { "epoch": 0.1730156321141296, "grad_norm": 0.40016746520996094, "learning_rate": 9.65978128797084e-05, "loss": 1.6054, "step": 570 }, { "epoch": 0.17331916831082106, "grad_norm": 0.3570103943347931, "learning_rate": 9.659173754556501e-05, "loss": 1.863, "step": 571 }, { "epoch": 0.17362270450751252, "grad_norm": 0.3687574565410614, "learning_rate": 9.658566221142164e-05, "loss": 1.6009, "step": 572 }, { "epoch": 0.17392624070420398, "grad_norm": 0.35581347346305847, "learning_rate": 9.657958687727825e-05, "loss": 2.2078, "step": 573 }, { "epoch": 0.17422977690089544, "grad_norm": 0.44036948680877686, "learning_rate": 9.657351154313488e-05, "loss": 1.7993, "step": 574 }, { "epoch": 0.17453331309758688, "grad_norm": 0.3790392577648163, "learning_rate": 9.656743620899151e-05, "loss": 1.9854, "step": 575 }, { "epoch": 0.17483684929427834, "grad_norm": 0.37755638360977173, "learning_rate": 9.656136087484811e-05, "loss": 1.8991, "step": 576 }, { "epoch": 0.1751403854909698, "grad_norm": 0.37551677227020264, "learning_rate": 9.655528554070474e-05, "loss": 1.5446, "step": 577 }, { "epoch": 0.17544392168766126, "grad_norm": 0.36646074056625366, "learning_rate": 9.654921020656137e-05, "loss": 1.6491, "step": 578 }, { "epoch": 0.1757474578843527, "grad_norm": 0.40674564242362976, "learning_rate": 9.654313487241799e-05, "loss": 2.0545, "step": 579 }, { "epoch": 0.17605099408104416, "grad_norm": 0.40863969922065735, "learning_rate": 9.653705953827461e-05, "loss": 1.6848, "step": 580 }, { "epoch": 0.17635453027773562, "grad_norm": 0.3962380886077881, "learning_rate": 9.653098420413124e-05, "loss": 1.8451, "step": 581 }, { "epoch": 0.17665806647442708, "grad_norm": 0.3665854334831238, "learning_rate": 9.652490886998786e-05, "loss": 2.0664, "step": 582 }, { "epoch": 0.17696160267111852, "grad_norm": 0.3678790032863617, "learning_rate": 9.651883353584447e-05, "loss": 1.9612, "step": 583 }, { "epoch": 0.17726513886780998, "grad_norm": 0.37832558155059814, "learning_rate": 9.65127582017011e-05, "loss": 2.3693, "step": 584 }, { "epoch": 0.17756867506450144, "grad_norm": 0.37538209557533264, "learning_rate": 9.650668286755772e-05, "loss": 1.9305, "step": 585 }, { "epoch": 0.1778722112611929, "grad_norm": 0.4227273166179657, "learning_rate": 9.650060753341434e-05, "loss": 2.1853, "step": 586 }, { "epoch": 0.17817574745788436, "grad_norm": 0.35160574316978455, "learning_rate": 9.649453219927096e-05, "loss": 1.6491, "step": 587 }, { "epoch": 0.1784792836545758, "grad_norm": 0.3960542380809784, "learning_rate": 9.648845686512759e-05, "loss": 2.052, "step": 588 }, { "epoch": 0.17878281985126726, "grad_norm": 0.4215950667858124, "learning_rate": 9.648238153098422e-05, "loss": 1.4572, "step": 589 }, { "epoch": 0.17908635604795872, "grad_norm": 0.35461676120758057, "learning_rate": 9.647630619684082e-05, "loss": 1.8047, "step": 590 }, { "epoch": 0.17938989224465018, "grad_norm": 0.3570484220981598, "learning_rate": 9.647023086269745e-05, "loss": 1.5481, "step": 591 }, { "epoch": 0.17969342844134162, "grad_norm": 0.3583620488643646, "learning_rate": 9.646415552855408e-05, "loss": 1.6005, "step": 592 }, { "epoch": 0.17999696463803308, "grad_norm": 0.3991422653198242, "learning_rate": 9.64580801944107e-05, "loss": 1.5039, "step": 593 }, { "epoch": 0.18030050083472454, "grad_norm": 0.4470183551311493, "learning_rate": 9.645200486026732e-05, "loss": 1.9181, "step": 594 }, { "epoch": 0.180604037031416, "grad_norm": 0.6622103452682495, "learning_rate": 9.644592952612395e-05, "loss": 1.9772, "step": 595 }, { "epoch": 0.18090757322810747, "grad_norm": 0.33143168687820435, "learning_rate": 9.643985419198057e-05, "loss": 1.9468, "step": 596 }, { "epoch": 0.1812111094247989, "grad_norm": 0.3072865903377533, "learning_rate": 9.643377885783718e-05, "loss": 1.8147, "step": 597 }, { "epoch": 0.18151464562149036, "grad_norm": 0.5602253079414368, "learning_rate": 9.642770352369381e-05, "loss": 2.069, "step": 598 }, { "epoch": 0.18181818181818182, "grad_norm": 0.29752591252326965, "learning_rate": 9.642162818955043e-05, "loss": 2.039, "step": 599 }, { "epoch": 0.18212171801487329, "grad_norm": 0.37780001759529114, "learning_rate": 9.641555285540705e-05, "loss": 1.8751, "step": 600 }, { "epoch": 0.18242525421156472, "grad_norm": 0.40171170234680176, "learning_rate": 9.640947752126367e-05, "loss": 1.8439, "step": 601 }, { "epoch": 0.18272879040825618, "grad_norm": 0.3567606210708618, "learning_rate": 9.64034021871203e-05, "loss": 1.7814, "step": 602 }, { "epoch": 0.18303232660494764, "grad_norm": 0.3742719888687134, "learning_rate": 9.639732685297693e-05, "loss": 2.3184, "step": 603 }, { "epoch": 0.1833358628016391, "grad_norm": 0.3715921938419342, "learning_rate": 9.639125151883353e-05, "loss": 1.9357, "step": 604 }, { "epoch": 0.18363939899833054, "grad_norm": 0.4021666347980499, "learning_rate": 9.638517618469016e-05, "loss": 2.0251, "step": 605 }, { "epoch": 0.183942935195022, "grad_norm": 0.3618490695953369, "learning_rate": 9.637910085054679e-05, "loss": 1.8889, "step": 606 }, { "epoch": 0.18424647139171346, "grad_norm": 0.3828302025794983, "learning_rate": 9.637302551640341e-05, "loss": 1.8222, "step": 607 }, { "epoch": 0.18455000758840492, "grad_norm": 0.3974449336528778, "learning_rate": 9.636695018226003e-05, "loss": 2.0484, "step": 608 }, { "epoch": 0.18485354378509639, "grad_norm": 0.4195742607116699, "learning_rate": 9.636087484811666e-05, "loss": 1.8497, "step": 609 }, { "epoch": 0.18515707998178782, "grad_norm": 0.3607097864151001, "learning_rate": 9.635479951397328e-05, "loss": 1.7574, "step": 610 }, { "epoch": 0.18546061617847928, "grad_norm": 0.39163729548454285, "learning_rate": 9.634872417982989e-05, "loss": 1.8299, "step": 611 }, { "epoch": 0.18576415237517074, "grad_norm": 0.4060773551464081, "learning_rate": 9.634264884568652e-05, "loss": 1.8041, "step": 612 }, { "epoch": 0.1860676885718622, "grad_norm": 0.34089556336402893, "learning_rate": 9.633657351154314e-05, "loss": 1.7692, "step": 613 }, { "epoch": 0.18637122476855364, "grad_norm": 0.33763736486434937, "learning_rate": 9.633049817739976e-05, "loss": 2.0368, "step": 614 }, { "epoch": 0.1866747609652451, "grad_norm": 0.3397420644760132, "learning_rate": 9.632442284325638e-05, "loss": 1.9108, "step": 615 }, { "epoch": 0.18697829716193656, "grad_norm": 0.387208491563797, "learning_rate": 9.6318347509113e-05, "loss": 1.7698, "step": 616 }, { "epoch": 0.18728183335862802, "grad_norm": 0.4464956820011139, "learning_rate": 9.631227217496964e-05, "loss": 1.7602, "step": 617 }, { "epoch": 0.18758536955531946, "grad_norm": 0.34682369232177734, "learning_rate": 9.630619684082624e-05, "loss": 1.6602, "step": 618 }, { "epoch": 0.18788890575201092, "grad_norm": 0.8865132331848145, "learning_rate": 9.630012150668287e-05, "loss": 2.1785, "step": 619 }, { "epoch": 0.18819244194870238, "grad_norm": 0.32631704211235046, "learning_rate": 9.62940461725395e-05, "loss": 1.5203, "step": 620 }, { "epoch": 0.18849597814539384, "grad_norm": 0.3332744836807251, "learning_rate": 9.628797083839612e-05, "loss": 1.8768, "step": 621 }, { "epoch": 0.1887995143420853, "grad_norm": 0.43369799852371216, "learning_rate": 9.628189550425274e-05, "loss": 2.088, "step": 622 }, { "epoch": 0.18910305053877674, "grad_norm": 0.34848517179489136, "learning_rate": 9.627582017010937e-05, "loss": 1.7685, "step": 623 }, { "epoch": 0.1894065867354682, "grad_norm": 0.40195101499557495, "learning_rate": 9.626974483596599e-05, "loss": 2.0937, "step": 624 }, { "epoch": 0.18971012293215966, "grad_norm": 0.3992537558078766, "learning_rate": 9.62636695018226e-05, "loss": 1.7799, "step": 625 }, { "epoch": 0.19001365912885113, "grad_norm": 0.41797420382499695, "learning_rate": 9.625759416767922e-05, "loss": 1.92, "step": 626 }, { "epoch": 0.19031719532554256, "grad_norm": 0.37648969888687134, "learning_rate": 9.625151883353585e-05, "loss": 2.0243, "step": 627 }, { "epoch": 0.19062073152223402, "grad_norm": 0.3513944447040558, "learning_rate": 9.624544349939247e-05, "loss": 2.0676, "step": 628 }, { "epoch": 0.19092426771892548, "grad_norm": 0.3975341022014618, "learning_rate": 9.623936816524909e-05, "loss": 1.8837, "step": 629 }, { "epoch": 0.19122780391561695, "grad_norm": 0.3954656422138214, "learning_rate": 9.623329283110572e-05, "loss": 1.9648, "step": 630 }, { "epoch": 0.19153134011230838, "grad_norm": 0.32590335607528687, "learning_rate": 9.622721749696235e-05, "loss": 2.0704, "step": 631 }, { "epoch": 0.19183487630899984, "grad_norm": 0.7592522501945496, "learning_rate": 9.622114216281895e-05, "loss": 1.8721, "step": 632 }, { "epoch": 0.1921384125056913, "grad_norm": 0.4411126375198364, "learning_rate": 9.621506682867558e-05, "loss": 1.963, "step": 633 }, { "epoch": 0.19244194870238276, "grad_norm": 0.37471216917037964, "learning_rate": 9.620899149453221e-05, "loss": 2.0679, "step": 634 }, { "epoch": 0.19274548489907423, "grad_norm": 0.39219167828559875, "learning_rate": 9.620291616038882e-05, "loss": 2.0597, "step": 635 }, { "epoch": 0.19304902109576566, "grad_norm": 0.3059561550617218, "learning_rate": 9.619684082624545e-05, "loss": 1.7527, "step": 636 }, { "epoch": 0.19335255729245712, "grad_norm": 0.3843368589878082, "learning_rate": 9.619076549210208e-05, "loss": 1.9737, "step": 637 }, { "epoch": 0.19365609348914858, "grad_norm": 0.3923681378364563, "learning_rate": 9.61846901579587e-05, "loss": 1.7179, "step": 638 }, { "epoch": 0.19395962968584005, "grad_norm": 0.4614477753639221, "learning_rate": 9.617861482381531e-05, "loss": 1.2844, "step": 639 }, { "epoch": 0.19426316588253148, "grad_norm": 0.3571571409702301, "learning_rate": 9.617253948967193e-05, "loss": 2.1014, "step": 640 }, { "epoch": 0.19456670207922294, "grad_norm": 0.40552857518196106, "learning_rate": 9.616646415552856e-05, "loss": 1.8588, "step": 641 }, { "epoch": 0.1948702382759144, "grad_norm": 0.3739052414894104, "learning_rate": 9.616038882138518e-05, "loss": 2.2262, "step": 642 }, { "epoch": 0.19517377447260587, "grad_norm": 0.33607810735702515, "learning_rate": 9.61543134872418e-05, "loss": 1.8196, "step": 643 }, { "epoch": 0.19547731066929733, "grad_norm": 0.3751862645149231, "learning_rate": 9.614823815309843e-05, "loss": 1.7183, "step": 644 }, { "epoch": 0.19578084686598876, "grad_norm": 0.4978049397468567, "learning_rate": 9.614216281895506e-05, "loss": 1.6918, "step": 645 }, { "epoch": 0.19608438306268022, "grad_norm": 0.3755020499229431, "learning_rate": 9.613608748481166e-05, "loss": 2.0704, "step": 646 }, { "epoch": 0.19638791925937168, "grad_norm": 0.3641931414604187, "learning_rate": 9.613001215066829e-05, "loss": 1.8087, "step": 647 }, { "epoch": 0.19669145545606315, "grad_norm": 0.32229694724082947, "learning_rate": 9.612393681652492e-05, "loss": 1.9157, "step": 648 }, { "epoch": 0.19699499165275458, "grad_norm": 0.4132642149925232, "learning_rate": 9.611786148238153e-05, "loss": 1.6715, "step": 649 }, { "epoch": 0.19729852784944604, "grad_norm": 0.38652992248535156, "learning_rate": 9.611178614823816e-05, "loss": 1.814, "step": 650 }, { "epoch": 0.1976020640461375, "grad_norm": 0.432373970746994, "learning_rate": 9.610571081409479e-05, "loss": 1.8719, "step": 651 }, { "epoch": 0.19790560024282897, "grad_norm": 0.4002588987350464, "learning_rate": 9.60996354799514e-05, "loss": 1.9697, "step": 652 }, { "epoch": 0.1982091364395204, "grad_norm": 0.3377281427383423, "learning_rate": 9.609356014580802e-05, "loss": 2.0114, "step": 653 }, { "epoch": 0.19851267263621186, "grad_norm": 0.40434688329696655, "learning_rate": 9.608748481166464e-05, "loss": 2.2406, "step": 654 }, { "epoch": 0.19881620883290332, "grad_norm": 0.33377552032470703, "learning_rate": 9.608140947752127e-05, "loss": 1.8749, "step": 655 }, { "epoch": 0.19911974502959479, "grad_norm": 0.36146265268325806, "learning_rate": 9.607533414337789e-05, "loss": 1.5821, "step": 656 }, { "epoch": 0.19942328122628625, "grad_norm": 1.8864728212356567, "learning_rate": 9.60692588092345e-05, "loss": 1.9895, "step": 657 }, { "epoch": 0.19972681742297768, "grad_norm": 0.35898399353027344, "learning_rate": 9.606318347509114e-05, "loss": 1.6315, "step": 658 }, { "epoch": 0.20003035361966914, "grad_norm": 0.44391825795173645, "learning_rate": 9.605710814094775e-05, "loss": 1.5046, "step": 659 }, { "epoch": 0.2003338898163606, "grad_norm": 0.37957173585891724, "learning_rate": 9.605103280680437e-05, "loss": 1.8532, "step": 660 }, { "epoch": 0.20063742601305207, "grad_norm": 1.4045872688293457, "learning_rate": 9.6044957472661e-05, "loss": 2.1214, "step": 661 }, { "epoch": 0.2009409622097435, "grad_norm": 0.4327601194381714, "learning_rate": 9.603888213851763e-05, "loss": 2.1803, "step": 662 }, { "epoch": 0.20124449840643496, "grad_norm": 3.2122411727905273, "learning_rate": 9.603280680437424e-05, "loss": 1.989, "step": 663 }, { "epoch": 0.20154803460312642, "grad_norm": 0.4538092017173767, "learning_rate": 9.602673147023087e-05, "loss": 1.7016, "step": 664 }, { "epoch": 0.2018515707998179, "grad_norm": 0.5968027710914612, "learning_rate": 9.60206561360875e-05, "loss": 1.8538, "step": 665 }, { "epoch": 0.20215510699650932, "grad_norm": 0.3749493956565857, "learning_rate": 9.601458080194412e-05, "loss": 1.8747, "step": 666 }, { "epoch": 0.20245864319320078, "grad_norm": 0.3741036355495453, "learning_rate": 9.600850546780073e-05, "loss": 1.9896, "step": 667 }, { "epoch": 0.20276217938989224, "grad_norm": 0.3708043098449707, "learning_rate": 9.600243013365735e-05, "loss": 1.7925, "step": 668 }, { "epoch": 0.2030657155865837, "grad_norm": 0.32813695073127747, "learning_rate": 9.599635479951398e-05, "loss": 1.8272, "step": 669 }, { "epoch": 0.20336925178327517, "grad_norm": 0.6151819229125977, "learning_rate": 9.59902794653706e-05, "loss": 1.927, "step": 670 }, { "epoch": 0.2036727879799666, "grad_norm": 0.42905279994010925, "learning_rate": 9.598420413122722e-05, "loss": 1.5442, "step": 671 }, { "epoch": 0.20397632417665806, "grad_norm": 0.4102342426776886, "learning_rate": 9.597812879708385e-05, "loss": 1.2097, "step": 672 }, { "epoch": 0.20427986037334953, "grad_norm": 0.392560750246048, "learning_rate": 9.597205346294046e-05, "loss": 1.8166, "step": 673 }, { "epoch": 0.204583396570041, "grad_norm": 0.4056089222431183, "learning_rate": 9.596597812879708e-05, "loss": 1.5171, "step": 674 }, { "epoch": 0.20488693276673242, "grad_norm": 0.4734075963497162, "learning_rate": 9.595990279465371e-05, "loss": 1.4529, "step": 675 }, { "epoch": 0.20519046896342388, "grad_norm": 0.41490182280540466, "learning_rate": 9.595382746051034e-05, "loss": 1.8316, "step": 676 }, { "epoch": 0.20549400516011535, "grad_norm": 0.3590947091579437, "learning_rate": 9.594775212636695e-05, "loss": 1.9073, "step": 677 }, { "epoch": 0.2057975413568068, "grad_norm": 0.3779642581939697, "learning_rate": 9.594167679222358e-05, "loss": 1.9669, "step": 678 }, { "epoch": 0.20610107755349824, "grad_norm": 0.39710256457328796, "learning_rate": 9.593560145808021e-05, "loss": 2.0278, "step": 679 }, { "epoch": 0.2064046137501897, "grad_norm": 0.4168045222759247, "learning_rate": 9.592952612393683e-05, "loss": 1.5158, "step": 680 }, { "epoch": 0.20670814994688116, "grad_norm": 0.3751262426376343, "learning_rate": 9.592345078979344e-05, "loss": 2.1459, "step": 681 }, { "epoch": 0.20701168614357263, "grad_norm": 0.49441012740135193, "learning_rate": 9.591737545565006e-05, "loss": 1.8968, "step": 682 }, { "epoch": 0.2073152223402641, "grad_norm": 0.4807801842689514, "learning_rate": 9.591130012150669e-05, "loss": 2.2741, "step": 683 }, { "epoch": 0.20761875853695552, "grad_norm": 0.3886473774909973, "learning_rate": 9.590522478736331e-05, "loss": 1.4511, "step": 684 }, { "epoch": 0.20792229473364698, "grad_norm": 0.6425371170043945, "learning_rate": 9.589914945321993e-05, "loss": 2.2643, "step": 685 }, { "epoch": 0.20822583093033845, "grad_norm": 0.37720414996147156, "learning_rate": 9.589307411907656e-05, "loss": 1.8431, "step": 686 }, { "epoch": 0.2085293671270299, "grad_norm": 0.35544151067733765, "learning_rate": 9.588699878493317e-05, "loss": 1.9822, "step": 687 }, { "epoch": 0.20883290332372134, "grad_norm": 0.561444878578186, "learning_rate": 9.588092345078979e-05, "loss": 1.9507, "step": 688 }, { "epoch": 0.2091364395204128, "grad_norm": 0.37386366724967957, "learning_rate": 9.587484811664642e-05, "loss": 2.0673, "step": 689 }, { "epoch": 0.20943997571710427, "grad_norm": 0.3882986307144165, "learning_rate": 9.586877278250305e-05, "loss": 2.0166, "step": 690 }, { "epoch": 0.20974351191379573, "grad_norm": 0.41471484303474426, "learning_rate": 9.586269744835966e-05, "loss": 1.7111, "step": 691 }, { "epoch": 0.2100470481104872, "grad_norm": 0.467939555644989, "learning_rate": 9.585662211421629e-05, "loss": 1.6116, "step": 692 }, { "epoch": 0.21035058430717862, "grad_norm": 0.905303955078125, "learning_rate": 9.585054678007292e-05, "loss": 2.0287, "step": 693 }, { "epoch": 0.21065412050387008, "grad_norm": 0.3820960819721222, "learning_rate": 9.584447144592954e-05, "loss": 1.7834, "step": 694 }, { "epoch": 0.21095765670056155, "grad_norm": 0.4265238642692566, "learning_rate": 9.583839611178615e-05, "loss": 1.5176, "step": 695 }, { "epoch": 0.211261192897253, "grad_norm": 0.30739274621009827, "learning_rate": 9.583232077764277e-05, "loss": 1.3758, "step": 696 }, { "epoch": 0.21156472909394444, "grad_norm": 0.3890193998813629, "learning_rate": 9.58262454434994e-05, "loss": 1.8188, "step": 697 }, { "epoch": 0.2118682652906359, "grad_norm": 0.3726442754268646, "learning_rate": 9.582017010935602e-05, "loss": 1.8957, "step": 698 }, { "epoch": 0.21217180148732737, "grad_norm": 0.43913599848747253, "learning_rate": 9.581409477521264e-05, "loss": 1.8882, "step": 699 }, { "epoch": 0.21247533768401883, "grad_norm": 0.4009544253349304, "learning_rate": 9.580801944106927e-05, "loss": 1.8844, "step": 700 }, { "epoch": 0.21277887388071026, "grad_norm": 0.6625222563743591, "learning_rate": 9.580194410692588e-05, "loss": 1.8564, "step": 701 }, { "epoch": 0.21308241007740172, "grad_norm": 0.3108811676502228, "learning_rate": 9.57958687727825e-05, "loss": 1.5883, "step": 702 }, { "epoch": 0.21338594627409319, "grad_norm": 0.35348960757255554, "learning_rate": 9.578979343863913e-05, "loss": 1.6062, "step": 703 }, { "epoch": 0.21368948247078465, "grad_norm": 0.3460123538970947, "learning_rate": 9.578371810449576e-05, "loss": 1.3375, "step": 704 }, { "epoch": 0.2139930186674761, "grad_norm": 0.3396036922931671, "learning_rate": 9.577764277035237e-05, "loss": 2.007, "step": 705 }, { "epoch": 0.21429655486416754, "grad_norm": 0.38622626662254333, "learning_rate": 9.5771567436209e-05, "loss": 1.7612, "step": 706 }, { "epoch": 0.214600091060859, "grad_norm": 0.39317429065704346, "learning_rate": 9.576549210206563e-05, "loss": 1.7884, "step": 707 }, { "epoch": 0.21490362725755047, "grad_norm": 0.3730657994747162, "learning_rate": 9.575941676792223e-05, "loss": 1.9813, "step": 708 }, { "epoch": 0.21520716345424193, "grad_norm": 0.3781750202178955, "learning_rate": 9.575334143377886e-05, "loss": 1.8843, "step": 709 }, { "epoch": 0.21551069965093336, "grad_norm": 0.4760946035385132, "learning_rate": 9.574726609963548e-05, "loss": 2.0092, "step": 710 }, { "epoch": 0.21581423584762482, "grad_norm": 0.4052627980709076, "learning_rate": 9.574119076549211e-05, "loss": 1.9742, "step": 711 }, { "epoch": 0.2161177720443163, "grad_norm": 0.34289002418518066, "learning_rate": 9.573511543134873e-05, "loss": 2.0408, "step": 712 }, { "epoch": 0.21642130824100775, "grad_norm": 0.3720855116844177, "learning_rate": 9.572904009720535e-05, "loss": 1.6685, "step": 713 }, { "epoch": 0.21672484443769918, "grad_norm": 0.4015984535217285, "learning_rate": 9.572296476306198e-05, "loss": 1.9651, "step": 714 }, { "epoch": 0.21702838063439064, "grad_norm": 0.45196712017059326, "learning_rate": 9.57168894289186e-05, "loss": 2.0588, "step": 715 }, { "epoch": 0.2173319168310821, "grad_norm": 0.3434293866157532, "learning_rate": 9.571081409477521e-05, "loss": 1.748, "step": 716 }, { "epoch": 0.21763545302777357, "grad_norm": 0.29288217425346375, "learning_rate": 9.570473876063184e-05, "loss": 2.0274, "step": 717 }, { "epoch": 0.21793898922446503, "grad_norm": 0.4192684590816498, "learning_rate": 9.569866342648847e-05, "loss": 1.3597, "step": 718 }, { "epoch": 0.21824252542115646, "grad_norm": 0.4450276494026184, "learning_rate": 9.569258809234508e-05, "loss": 1.4675, "step": 719 }, { "epoch": 0.21854606161784793, "grad_norm": 0.33388352394104004, "learning_rate": 9.568651275820171e-05, "loss": 1.9747, "step": 720 }, { "epoch": 0.2188495978145394, "grad_norm": 0.34411269426345825, "learning_rate": 9.568043742405832e-05, "loss": 1.9119, "step": 721 }, { "epoch": 0.21915313401123085, "grad_norm": 0.38926756381988525, "learning_rate": 9.567436208991494e-05, "loss": 1.8859, "step": 722 }, { "epoch": 0.21945667020792228, "grad_norm": 0.3513714671134949, "learning_rate": 9.566828675577157e-05, "loss": 1.9125, "step": 723 }, { "epoch": 0.21976020640461374, "grad_norm": 0.9200549721717834, "learning_rate": 9.566221142162819e-05, "loss": 2.1854, "step": 724 }, { "epoch": 0.2200637426013052, "grad_norm": 0.444815456867218, "learning_rate": 9.565613608748482e-05, "loss": 1.8998, "step": 725 }, { "epoch": 0.22036727879799667, "grad_norm": 0.37483492493629456, "learning_rate": 9.565006075334144e-05, "loss": 1.7089, "step": 726 }, { "epoch": 0.22067081499468813, "grad_norm": 0.32369089126586914, "learning_rate": 9.564398541919806e-05, "loss": 2.1223, "step": 727 }, { "epoch": 0.22097435119137956, "grad_norm": 0.3537048399448395, "learning_rate": 9.563791008505469e-05, "loss": 1.3256, "step": 728 }, { "epoch": 0.22127788738807103, "grad_norm": 0.408723384141922, "learning_rate": 9.56318347509113e-05, "loss": 1.8056, "step": 729 }, { "epoch": 0.2215814235847625, "grad_norm": 0.37529709935188293, "learning_rate": 9.562575941676792e-05, "loss": 1.9281, "step": 730 }, { "epoch": 0.22188495978145395, "grad_norm": 0.3406868278980255, "learning_rate": 9.561968408262455e-05, "loss": 1.9673, "step": 731 }, { "epoch": 0.22218849597814538, "grad_norm": 0.34361201524734497, "learning_rate": 9.561360874848117e-05, "loss": 1.7777, "step": 732 }, { "epoch": 0.22249203217483685, "grad_norm": 0.3995072543621063, "learning_rate": 9.560753341433779e-05, "loss": 1.9959, "step": 733 }, { "epoch": 0.2227955683715283, "grad_norm": 0.4618263244628906, "learning_rate": 9.560145808019442e-05, "loss": 1.7524, "step": 734 }, { "epoch": 0.22309910456821977, "grad_norm": 0.36778688430786133, "learning_rate": 9.559538274605103e-05, "loss": 1.6332, "step": 735 }, { "epoch": 0.2234026407649112, "grad_norm": 0.47031348943710327, "learning_rate": 9.558930741190765e-05, "loss": 2.0364, "step": 736 }, { "epoch": 0.22370617696160267, "grad_norm": 0.6130351424217224, "learning_rate": 9.558323207776428e-05, "loss": 1.7814, "step": 737 }, { "epoch": 0.22400971315829413, "grad_norm": 0.3733448088169098, "learning_rate": 9.55771567436209e-05, "loss": 1.8888, "step": 738 }, { "epoch": 0.2243132493549856, "grad_norm": 0.368182510137558, "learning_rate": 9.557108140947753e-05, "loss": 2.3162, "step": 739 }, { "epoch": 0.22461678555167705, "grad_norm": 0.4311901926994324, "learning_rate": 9.556500607533415e-05, "loss": 2.1294, "step": 740 }, { "epoch": 0.22492032174836848, "grad_norm": 0.38696759939193726, "learning_rate": 9.555893074119077e-05, "loss": 1.9524, "step": 741 }, { "epoch": 0.22522385794505995, "grad_norm": 0.37136873602867126, "learning_rate": 9.55528554070474e-05, "loss": 1.8677, "step": 742 }, { "epoch": 0.2255273941417514, "grad_norm": 0.4084314703941345, "learning_rate": 9.554678007290401e-05, "loss": 1.8621, "step": 743 }, { "epoch": 0.22583093033844287, "grad_norm": 0.4216344952583313, "learning_rate": 9.554070473876063e-05, "loss": 2.0455, "step": 744 }, { "epoch": 0.2261344665351343, "grad_norm": 0.36579129099845886, "learning_rate": 9.553462940461726e-05, "loss": 1.8993, "step": 745 }, { "epoch": 0.22643800273182577, "grad_norm": 0.4048181474208832, "learning_rate": 9.552855407047388e-05, "loss": 1.2016, "step": 746 }, { "epoch": 0.22674153892851723, "grad_norm": 0.4071241617202759, "learning_rate": 9.55224787363305e-05, "loss": 1.9272, "step": 747 }, { "epoch": 0.2270450751252087, "grad_norm": 0.3970381021499634, "learning_rate": 9.551640340218713e-05, "loss": 2.0819, "step": 748 }, { "epoch": 0.22734861132190012, "grad_norm": 0.3891443610191345, "learning_rate": 9.551032806804374e-05, "loss": 2.2604, "step": 749 }, { "epoch": 0.22765214751859159, "grad_norm": 0.40169456601142883, "learning_rate": 9.550425273390036e-05, "loss": 1.9514, "step": 750 }, { "epoch": 0.22795568371528305, "grad_norm": 0.7191595435142517, "learning_rate": 9.549817739975699e-05, "loss": 1.1686, "step": 751 }, { "epoch": 0.2282592199119745, "grad_norm": 0.3602886497974396, "learning_rate": 9.549210206561361e-05, "loss": 1.8877, "step": 752 }, { "epoch": 0.22856275610866597, "grad_norm": 0.34270042181015015, "learning_rate": 9.548602673147024e-05, "loss": 1.4296, "step": 753 }, { "epoch": 0.2288662923053574, "grad_norm": 0.37293288111686707, "learning_rate": 9.547995139732686e-05, "loss": 1.6695, "step": 754 }, { "epoch": 0.22916982850204887, "grad_norm": 0.31505951285362244, "learning_rate": 9.547387606318348e-05, "loss": 1.9869, "step": 755 }, { "epoch": 0.22947336469874033, "grad_norm": 0.38319501280784607, "learning_rate": 9.54678007290401e-05, "loss": 1.9769, "step": 756 }, { "epoch": 0.2297769008954318, "grad_norm": 0.37378913164138794, "learning_rate": 9.546172539489672e-05, "loss": 1.9998, "step": 757 }, { "epoch": 0.23008043709212322, "grad_norm": 0.34337082505226135, "learning_rate": 9.545565006075334e-05, "loss": 2.048, "step": 758 }, { "epoch": 0.2303839732888147, "grad_norm": 0.35315895080566406, "learning_rate": 9.544957472660997e-05, "loss": 1.8742, "step": 759 }, { "epoch": 0.23068750948550615, "grad_norm": 0.3854929506778717, "learning_rate": 9.544349939246659e-05, "loss": 1.4978, "step": 760 }, { "epoch": 0.2309910456821976, "grad_norm": 0.35743293166160583, "learning_rate": 9.543742405832321e-05, "loss": 2.159, "step": 761 }, { "epoch": 0.23129458187888904, "grad_norm": 0.39035484194755554, "learning_rate": 9.543134872417984e-05, "loss": 1.9588, "step": 762 }, { "epoch": 0.2315981180755805, "grad_norm": 0.35890859365463257, "learning_rate": 9.542527339003645e-05, "loss": 1.7466, "step": 763 }, { "epoch": 0.23190165427227197, "grad_norm": 0.46986308693885803, "learning_rate": 9.541919805589307e-05, "loss": 1.8819, "step": 764 }, { "epoch": 0.23220519046896343, "grad_norm": 0.4226287305355072, "learning_rate": 9.54131227217497e-05, "loss": 1.9421, "step": 765 }, { "epoch": 0.2325087266656549, "grad_norm": 0.4461078345775604, "learning_rate": 9.540704738760632e-05, "loss": 1.6888, "step": 766 }, { "epoch": 0.23281226286234633, "grad_norm": 0.363406240940094, "learning_rate": 9.540097205346295e-05, "loss": 1.9234, "step": 767 }, { "epoch": 0.2331157990590378, "grad_norm": 0.3713390529155731, "learning_rate": 9.539489671931957e-05, "loss": 1.9741, "step": 768 }, { "epoch": 0.23341933525572925, "grad_norm": 0.3300642967224121, "learning_rate": 9.538882138517619e-05, "loss": 1.5029, "step": 769 }, { "epoch": 0.2337228714524207, "grad_norm": 0.30819302797317505, "learning_rate": 9.538274605103282e-05, "loss": 1.6739, "step": 770 }, { "epoch": 0.23402640764911214, "grad_norm": 0.3575786054134369, "learning_rate": 9.537667071688943e-05, "loss": 1.5768, "step": 771 }, { "epoch": 0.2343299438458036, "grad_norm": 0.32882705330848694, "learning_rate": 9.537059538274605e-05, "loss": 1.9512, "step": 772 }, { "epoch": 0.23463348004249507, "grad_norm": 0.3340393900871277, "learning_rate": 9.536452004860268e-05, "loss": 1.927, "step": 773 }, { "epoch": 0.23493701623918653, "grad_norm": 0.33640411496162415, "learning_rate": 9.53584447144593e-05, "loss": 1.9496, "step": 774 }, { "epoch": 0.235240552435878, "grad_norm": 0.3581593930721283, "learning_rate": 9.535236938031592e-05, "loss": 1.8766, "step": 775 }, { "epoch": 0.23554408863256943, "grad_norm": 0.44084489345550537, "learning_rate": 9.534629404617255e-05, "loss": 2.0274, "step": 776 }, { "epoch": 0.2358476248292609, "grad_norm": 0.363518089056015, "learning_rate": 9.534021871202917e-05, "loss": 1.9956, "step": 777 }, { "epoch": 0.23615116102595235, "grad_norm": 0.35967034101486206, "learning_rate": 9.533414337788578e-05, "loss": 1.7312, "step": 778 }, { "epoch": 0.2364546972226438, "grad_norm": 0.3683255910873413, "learning_rate": 9.532806804374241e-05, "loss": 1.9594, "step": 779 }, { "epoch": 0.23675823341933525, "grad_norm": 0.3063610792160034, "learning_rate": 9.532199270959903e-05, "loss": 1.3972, "step": 780 }, { "epoch": 0.2370617696160267, "grad_norm": 0.4217472970485687, "learning_rate": 9.531591737545565e-05, "loss": 1.9495, "step": 781 }, { "epoch": 0.23736530581271817, "grad_norm": 0.46779391169548035, "learning_rate": 9.530984204131228e-05, "loss": 1.9965, "step": 782 }, { "epoch": 0.23766884200940963, "grad_norm": 0.351810485124588, "learning_rate": 9.53037667071689e-05, "loss": 1.8034, "step": 783 }, { "epoch": 0.23797237820610107, "grad_norm": 0.3302007019519806, "learning_rate": 9.529769137302553e-05, "loss": 2.0156, "step": 784 }, { "epoch": 0.23827591440279253, "grad_norm": 0.3699585497379303, "learning_rate": 9.529161603888214e-05, "loss": 1.7217, "step": 785 }, { "epoch": 0.238579450599484, "grad_norm": 0.34256428480148315, "learning_rate": 9.528554070473876e-05, "loss": 1.924, "step": 786 }, { "epoch": 0.23888298679617545, "grad_norm": 0.4008747339248657, "learning_rate": 9.527946537059539e-05, "loss": 1.8903, "step": 787 }, { "epoch": 0.2391865229928669, "grad_norm": 0.4281119108200073, "learning_rate": 9.527339003645201e-05, "loss": 2.1402, "step": 788 }, { "epoch": 0.23949005918955835, "grad_norm": 0.4065872132778168, "learning_rate": 9.526731470230863e-05, "loss": 1.8399, "step": 789 }, { "epoch": 0.2397935953862498, "grad_norm": 0.35334911942481995, "learning_rate": 9.526123936816526e-05, "loss": 1.8206, "step": 790 }, { "epoch": 0.24009713158294127, "grad_norm": 0.35420283675193787, "learning_rate": 9.525516403402188e-05, "loss": 1.956, "step": 791 }, { "epoch": 0.24040066777963273, "grad_norm": 0.5720547437667847, "learning_rate": 9.524908869987849e-05, "loss": 2.1932, "step": 792 }, { "epoch": 0.24070420397632417, "grad_norm": 0.3512174189090729, "learning_rate": 9.524301336573512e-05, "loss": 2.1031, "step": 793 }, { "epoch": 0.24100774017301563, "grad_norm": 0.3975936770439148, "learning_rate": 9.523693803159174e-05, "loss": 1.5962, "step": 794 }, { "epoch": 0.2413112763697071, "grad_norm": 0.3723268210887909, "learning_rate": 9.523086269744836e-05, "loss": 1.492, "step": 795 }, { "epoch": 0.24161481256639855, "grad_norm": 0.5287608504295349, "learning_rate": 9.522478736330499e-05, "loss": 1.5488, "step": 796 }, { "epoch": 0.24191834876308999, "grad_norm": 0.37749987840652466, "learning_rate": 9.52187120291616e-05, "loss": 1.5724, "step": 797 }, { "epoch": 0.24222188495978145, "grad_norm": 0.38260164856910706, "learning_rate": 9.521263669501824e-05, "loss": 2.1049, "step": 798 }, { "epoch": 0.2425254211564729, "grad_norm": 0.3552962839603424, "learning_rate": 9.520656136087485e-05, "loss": 1.8853, "step": 799 }, { "epoch": 0.24282895735316437, "grad_norm": 0.5752935409545898, "learning_rate": 9.520048602673147e-05, "loss": 1.6359, "step": 800 }, { "epoch": 0.24313249354985583, "grad_norm": 0.41982319951057434, "learning_rate": 9.51944106925881e-05, "loss": 1.485, "step": 801 }, { "epoch": 0.24343602974654727, "grad_norm": 0.3913584351539612, "learning_rate": 9.518833535844472e-05, "loss": 1.9818, "step": 802 }, { "epoch": 0.24373956594323873, "grad_norm": 0.3771272897720337, "learning_rate": 9.518226002430134e-05, "loss": 1.7237, "step": 803 }, { "epoch": 0.2440431021399302, "grad_norm": 0.3625226318836212, "learning_rate": 9.517618469015797e-05, "loss": 1.6776, "step": 804 }, { "epoch": 0.24434663833662165, "grad_norm": 0.3253527283668518, "learning_rate": 9.517010935601459e-05, "loss": 2.0659, "step": 805 }, { "epoch": 0.2446501745333131, "grad_norm": 0.3705154359340668, "learning_rate": 9.51640340218712e-05, "loss": 2.116, "step": 806 }, { "epoch": 0.24495371073000455, "grad_norm": 0.3321172595024109, "learning_rate": 9.515795868772783e-05, "loss": 2.1026, "step": 807 }, { "epoch": 0.245257246926696, "grad_norm": 0.41880494356155396, "learning_rate": 9.515188335358445e-05, "loss": 1.7541, "step": 808 }, { "epoch": 0.24556078312338747, "grad_norm": 0.38695165514945984, "learning_rate": 9.514580801944107e-05, "loss": 1.9328, "step": 809 }, { "epoch": 0.2458643193200789, "grad_norm": 0.37348538637161255, "learning_rate": 9.51397326852977e-05, "loss": 1.9065, "step": 810 }, { "epoch": 0.24616785551677037, "grad_norm": 1.5822879076004028, "learning_rate": 9.513365735115432e-05, "loss": 1.8297, "step": 811 }, { "epoch": 0.24647139171346183, "grad_norm": 0.3697100579738617, "learning_rate": 9.512758201701095e-05, "loss": 1.9091, "step": 812 }, { "epoch": 0.2467749279101533, "grad_norm": 0.40801766514778137, "learning_rate": 9.512150668286756e-05, "loss": 1.8284, "step": 813 }, { "epoch": 0.24707846410684475, "grad_norm": 0.4060746133327484, "learning_rate": 9.511543134872418e-05, "loss": 2.0244, "step": 814 }, { "epoch": 0.2473820003035362, "grad_norm": 0.38555091619491577, "learning_rate": 9.510935601458081e-05, "loss": 1.6098, "step": 815 }, { "epoch": 0.24768553650022765, "grad_norm": 0.39763063192367554, "learning_rate": 9.510328068043743e-05, "loss": 1.836, "step": 816 }, { "epoch": 0.2479890726969191, "grad_norm": 0.6933274269104004, "learning_rate": 9.509720534629405e-05, "loss": 2.1968, "step": 817 }, { "epoch": 0.24829260889361057, "grad_norm": 1.307569146156311, "learning_rate": 9.509113001215068e-05, "loss": 2.3396, "step": 818 }, { "epoch": 0.248596145090302, "grad_norm": 0.3513609766960144, "learning_rate": 9.50850546780073e-05, "loss": 1.7302, "step": 819 }, { "epoch": 0.24889968128699347, "grad_norm": 0.36949577927589417, "learning_rate": 9.507897934386391e-05, "loss": 1.7796, "step": 820 }, { "epoch": 0.24920321748368493, "grad_norm": 0.38934049010276794, "learning_rate": 9.507290400972054e-05, "loss": 1.7644, "step": 821 }, { "epoch": 0.2495067536803764, "grad_norm": 0.5927665829658508, "learning_rate": 9.506682867557716e-05, "loss": 1.9029, "step": 822 }, { "epoch": 0.24981028987706785, "grad_norm": 0.5131897926330566, "learning_rate": 9.506075334143378e-05, "loss": 2.1282, "step": 823 }, { "epoch": 0.2501138260737593, "grad_norm": 0.36232516169548035, "learning_rate": 9.505467800729041e-05, "loss": 2.1073, "step": 824 }, { "epoch": 0.25041736227045075, "grad_norm": 0.43212029337882996, "learning_rate": 9.504860267314703e-05, "loss": 2.0364, "step": 825 }, { "epoch": 0.2507208984671422, "grad_norm": 0.36575961112976074, "learning_rate": 9.504252733900366e-05, "loss": 1.8609, "step": 826 }, { "epoch": 0.2510244346638337, "grad_norm": 0.32013362646102905, "learning_rate": 9.503645200486027e-05, "loss": 1.4097, "step": 827 }, { "epoch": 0.2513279708605251, "grad_norm": 0.4062201976776123, "learning_rate": 9.503037667071689e-05, "loss": 1.9872, "step": 828 }, { "epoch": 0.2516315070572166, "grad_norm": 0.3433174192905426, "learning_rate": 9.502430133657352e-05, "loss": 2.0662, "step": 829 }, { "epoch": 0.25193504325390803, "grad_norm": 0.3925630748271942, "learning_rate": 9.501822600243013e-05, "loss": 1.5876, "step": 830 }, { "epoch": 0.25223857945059946, "grad_norm": 0.32962149381637573, "learning_rate": 9.501215066828676e-05, "loss": 1.7445, "step": 831 }, { "epoch": 0.25254211564729095, "grad_norm": 0.35508283972740173, "learning_rate": 9.500607533414339e-05, "loss": 1.8836, "step": 832 }, { "epoch": 0.2528456518439824, "grad_norm": 0.34893691539764404, "learning_rate": 9.5e-05, "loss": 1.6312, "step": 833 }, { "epoch": 0.2531491880406738, "grad_norm": 0.4068532884120941, "learning_rate": 9.499392466585662e-05, "loss": 1.5567, "step": 834 }, { "epoch": 0.2534527242373653, "grad_norm": 0.37818485498428345, "learning_rate": 9.498784933171325e-05, "loss": 2.0791, "step": 835 }, { "epoch": 0.25375626043405675, "grad_norm": 0.884172797203064, "learning_rate": 9.498177399756987e-05, "loss": 1.8642, "step": 836 }, { "epoch": 0.25405979663074824, "grad_norm": 0.4108290374279022, "learning_rate": 9.497569866342649e-05, "loss": 1.9545, "step": 837 }, { "epoch": 0.25436333282743967, "grad_norm": 0.37885358929634094, "learning_rate": 9.496962332928312e-05, "loss": 1.3709, "step": 838 }, { "epoch": 0.2546668690241311, "grad_norm": 0.3919561505317688, "learning_rate": 9.496354799513974e-05, "loss": 1.9548, "step": 839 }, { "epoch": 0.2549704052208226, "grad_norm": 0.3945852518081665, "learning_rate": 9.495747266099637e-05, "loss": 1.8139, "step": 840 }, { "epoch": 0.255273941417514, "grad_norm": 0.3272388279438019, "learning_rate": 9.495139732685298e-05, "loss": 2.1711, "step": 841 }, { "epoch": 0.2555774776142055, "grad_norm": 0.3214159905910492, "learning_rate": 9.49453219927096e-05, "loss": 1.6202, "step": 842 }, { "epoch": 0.25588101381089695, "grad_norm": 0.6175217628479004, "learning_rate": 9.493924665856623e-05, "loss": 1.9976, "step": 843 }, { "epoch": 0.2561845500075884, "grad_norm": 0.36993956565856934, "learning_rate": 9.493317132442284e-05, "loss": 1.9981, "step": 844 }, { "epoch": 0.2564880862042799, "grad_norm": 0.4294464588165283, "learning_rate": 9.492709599027947e-05, "loss": 2.017, "step": 845 }, { "epoch": 0.2567916224009713, "grad_norm": 0.4055061638355255, "learning_rate": 9.49210206561361e-05, "loss": 1.9383, "step": 846 }, { "epoch": 0.2570951585976628, "grad_norm": 0.3574405014514923, "learning_rate": 9.491494532199272e-05, "loss": 1.8058, "step": 847 }, { "epoch": 0.25739869479435423, "grad_norm": 0.35684704780578613, "learning_rate": 9.490886998784933e-05, "loss": 1.5661, "step": 848 }, { "epoch": 0.25770223099104567, "grad_norm": 0.35031405091285706, "learning_rate": 9.490279465370596e-05, "loss": 1.6422, "step": 849 }, { "epoch": 0.25800576718773716, "grad_norm": 0.390667200088501, "learning_rate": 9.489671931956258e-05, "loss": 1.9743, "step": 850 }, { "epoch": 0.2583093033844286, "grad_norm": 0.33744457364082336, "learning_rate": 9.48906439854192e-05, "loss": 1.4922, "step": 851 }, { "epoch": 0.25861283958112, "grad_norm": 0.3162226676940918, "learning_rate": 9.488456865127583e-05, "loss": 1.7791, "step": 852 }, { "epoch": 0.2589163757778115, "grad_norm": 0.49357378482818604, "learning_rate": 9.487849331713245e-05, "loss": 1.4353, "step": 853 }, { "epoch": 0.25921991197450295, "grad_norm": 0.4280342757701874, "learning_rate": 9.487241798298906e-05, "loss": 1.7277, "step": 854 }, { "epoch": 0.25952344817119444, "grad_norm": 0.4271382987499237, "learning_rate": 9.48663426488457e-05, "loss": 1.3241, "step": 855 }, { "epoch": 0.25982698436788587, "grad_norm": 0.3773948550224304, "learning_rate": 9.486026731470231e-05, "loss": 2.0892, "step": 856 }, { "epoch": 0.2601305205645773, "grad_norm": 0.3343275785446167, "learning_rate": 9.485419198055894e-05, "loss": 1.7968, "step": 857 }, { "epoch": 0.2604340567612688, "grad_norm": 0.3711187243461609, "learning_rate": 9.484811664641555e-05, "loss": 1.8599, "step": 858 }, { "epoch": 0.26073759295796023, "grad_norm": 0.6738047003746033, "learning_rate": 9.484204131227218e-05, "loss": 2.0192, "step": 859 }, { "epoch": 0.2610411291546517, "grad_norm": 0.3094058334827423, "learning_rate": 9.483596597812881e-05, "loss": 1.8827, "step": 860 }, { "epoch": 0.26134466535134315, "grad_norm": 0.4207117259502411, "learning_rate": 9.482989064398543e-05, "loss": 1.8029, "step": 861 }, { "epoch": 0.2616482015480346, "grad_norm": 0.3958408832550049, "learning_rate": 9.482381530984204e-05, "loss": 1.9911, "step": 862 }, { "epoch": 0.2619517377447261, "grad_norm": 0.6868960857391357, "learning_rate": 9.481773997569867e-05, "loss": 1.9394, "step": 863 }, { "epoch": 0.2622552739414175, "grad_norm": 1.131034016609192, "learning_rate": 9.481166464155529e-05, "loss": 1.486, "step": 864 }, { "epoch": 0.26255881013810894, "grad_norm": 0.42944055795669556, "learning_rate": 9.480558930741191e-05, "loss": 1.9993, "step": 865 }, { "epoch": 0.26286234633480043, "grad_norm": 0.3888295292854309, "learning_rate": 9.479951397326854e-05, "loss": 2.0604, "step": 866 }, { "epoch": 0.26316588253149187, "grad_norm": 0.41875898838043213, "learning_rate": 9.479343863912516e-05, "loss": 1.8242, "step": 867 }, { "epoch": 0.26346941872818336, "grad_norm": 0.44148901104927063, "learning_rate": 9.478736330498177e-05, "loss": 1.7827, "step": 868 }, { "epoch": 0.2637729549248748, "grad_norm": 0.41976141929626465, "learning_rate": 9.47812879708384e-05, "loss": 2.127, "step": 869 }, { "epoch": 0.2640764911215662, "grad_norm": 0.5538145899772644, "learning_rate": 9.477521263669502e-05, "loss": 1.7636, "step": 870 }, { "epoch": 0.2643800273182577, "grad_norm": 0.36378878355026245, "learning_rate": 9.476913730255165e-05, "loss": 1.8159, "step": 871 }, { "epoch": 0.26468356351494915, "grad_norm": 0.3874679505825043, "learning_rate": 9.476306196840826e-05, "loss": 1.4785, "step": 872 }, { "epoch": 0.26498709971164064, "grad_norm": 0.4508163332939148, "learning_rate": 9.475698663426489e-05, "loss": 1.9393, "step": 873 }, { "epoch": 0.2652906359083321, "grad_norm": 0.38843271136283875, "learning_rate": 9.475091130012152e-05, "loss": 1.4532, "step": 874 }, { "epoch": 0.2655941721050235, "grad_norm": 0.4603917598724365, "learning_rate": 9.474483596597814e-05, "loss": 2.1182, "step": 875 }, { "epoch": 0.265897708301715, "grad_norm": 0.39668476581573486, "learning_rate": 9.473876063183475e-05, "loss": 1.9447, "step": 876 }, { "epoch": 0.26620124449840643, "grad_norm": 4.796502113342285, "learning_rate": 9.473268529769138e-05, "loss": 1.9395, "step": 877 }, { "epoch": 0.26650478069509786, "grad_norm": 0.38534435629844666, "learning_rate": 9.4726609963548e-05, "loss": 2.0862, "step": 878 }, { "epoch": 0.26680831689178935, "grad_norm": 0.9022141695022583, "learning_rate": 9.472053462940462e-05, "loss": 1.8303, "step": 879 }, { "epoch": 0.2671118530884808, "grad_norm": 0.4020310342311859, "learning_rate": 9.471445929526125e-05, "loss": 1.5084, "step": 880 }, { "epoch": 0.2674153892851723, "grad_norm": 0.30202022194862366, "learning_rate": 9.470838396111787e-05, "loss": 1.8474, "step": 881 }, { "epoch": 0.2677189254818637, "grad_norm": 0.35603514313697815, "learning_rate": 9.470230862697448e-05, "loss": 1.8973, "step": 882 }, { "epoch": 0.26802246167855515, "grad_norm": 0.3749227523803711, "learning_rate": 9.469623329283111e-05, "loss": 1.9763, "step": 883 }, { "epoch": 0.26832599787524664, "grad_norm": 0.45645421743392944, "learning_rate": 9.469015795868773e-05, "loss": 1.5944, "step": 884 }, { "epoch": 0.26862953407193807, "grad_norm": 0.5855579972267151, "learning_rate": 9.468408262454436e-05, "loss": 1.8724, "step": 885 }, { "epoch": 0.26893307026862956, "grad_norm": 0.3752727210521698, "learning_rate": 9.467800729040097e-05, "loss": 2.1792, "step": 886 }, { "epoch": 0.269236606465321, "grad_norm": 0.8951378464698792, "learning_rate": 9.46719319562576e-05, "loss": 1.4745, "step": 887 }, { "epoch": 0.2695401426620124, "grad_norm": 0.5524512529373169, "learning_rate": 9.466585662211423e-05, "loss": 1.9872, "step": 888 }, { "epoch": 0.2698436788587039, "grad_norm": 0.3917500078678131, "learning_rate": 9.465978128797085e-05, "loss": 1.579, "step": 889 }, { "epoch": 0.27014721505539535, "grad_norm": 0.41635704040527344, "learning_rate": 9.465370595382746e-05, "loss": 1.5984, "step": 890 }, { "epoch": 0.2704507512520868, "grad_norm": 0.3544903099536896, "learning_rate": 9.46476306196841e-05, "loss": 1.9112, "step": 891 }, { "epoch": 0.2707542874487783, "grad_norm": 0.4568898379802704, "learning_rate": 9.464155528554071e-05, "loss": 1.9857, "step": 892 }, { "epoch": 0.2710578236454697, "grad_norm": 0.4155702590942383, "learning_rate": 9.463547995139733e-05, "loss": 1.7986, "step": 893 }, { "epoch": 0.2713613598421612, "grad_norm": 0.37953928112983704, "learning_rate": 9.462940461725396e-05, "loss": 1.8383, "step": 894 }, { "epoch": 0.27166489603885263, "grad_norm": 0.37993937730789185, "learning_rate": 9.462332928311058e-05, "loss": 2.0555, "step": 895 }, { "epoch": 0.27196843223554407, "grad_norm": 0.4355872571468353, "learning_rate": 9.46172539489672e-05, "loss": 1.9307, "step": 896 }, { "epoch": 0.27227196843223556, "grad_norm": 0.38673707842826843, "learning_rate": 9.461117861482381e-05, "loss": 1.7155, "step": 897 }, { "epoch": 0.272575504628927, "grad_norm": 0.38927558064460754, "learning_rate": 9.460510328068044e-05, "loss": 2.1022, "step": 898 }, { "epoch": 0.2728790408256185, "grad_norm": 0.40219199657440186, "learning_rate": 9.459902794653707e-05, "loss": 1.3915, "step": 899 }, { "epoch": 0.2731825770223099, "grad_norm": 0.3896184265613556, "learning_rate": 9.459295261239368e-05, "loss": 1.9976, "step": 900 }, { "epoch": 0.27348611321900135, "grad_norm": 0.37489351630210876, "learning_rate": 9.458687727825031e-05, "loss": 1.8479, "step": 901 }, { "epoch": 0.27378964941569284, "grad_norm": 0.39215734601020813, "learning_rate": 9.458080194410694e-05, "loss": 1.9005, "step": 902 }, { "epoch": 0.27409318561238427, "grad_norm": 0.5054829716682434, "learning_rate": 9.457472660996356e-05, "loss": 1.8805, "step": 903 }, { "epoch": 0.2743967218090757, "grad_norm": 0.38437893986701965, "learning_rate": 9.456865127582017e-05, "loss": 1.5506, "step": 904 }, { "epoch": 0.2747002580057672, "grad_norm": 0.38727036118507385, "learning_rate": 9.45625759416768e-05, "loss": 1.7189, "step": 905 }, { "epoch": 0.27500379420245863, "grad_norm": 0.4260677993297577, "learning_rate": 9.455650060753342e-05, "loss": 2.1553, "step": 906 }, { "epoch": 0.2753073303991501, "grad_norm": 0.3969596326351166, "learning_rate": 9.455042527339004e-05, "loss": 1.73, "step": 907 }, { "epoch": 0.27561086659584155, "grad_norm": 0.371412456035614, "learning_rate": 9.454434993924667e-05, "loss": 1.618, "step": 908 }, { "epoch": 0.275914402792533, "grad_norm": 0.32723626494407654, "learning_rate": 9.453827460510329e-05, "loss": 1.897, "step": 909 }, { "epoch": 0.2762179389892245, "grad_norm": 0.37436455488204956, "learning_rate": 9.45321992709599e-05, "loss": 1.4027, "step": 910 }, { "epoch": 0.2765214751859159, "grad_norm": 0.3615550398826599, "learning_rate": 9.452612393681652e-05, "loss": 1.8835, "step": 911 }, { "epoch": 0.2768250113826074, "grad_norm": 0.37427717447280884, "learning_rate": 9.452004860267315e-05, "loss": 1.5918, "step": 912 }, { "epoch": 0.27712854757929883, "grad_norm": 0.4030051827430725, "learning_rate": 9.451397326852978e-05, "loss": 1.5694, "step": 913 }, { "epoch": 0.27743208377599027, "grad_norm": 0.3948831260204315, "learning_rate": 9.450789793438639e-05, "loss": 1.7315, "step": 914 }, { "epoch": 0.27773561997268176, "grad_norm": 0.4105396866798401, "learning_rate": 9.450182260024302e-05, "loss": 2.0528, "step": 915 }, { "epoch": 0.2780391561693732, "grad_norm": 0.400312215089798, "learning_rate": 9.449574726609965e-05, "loss": 1.6631, "step": 916 }, { "epoch": 0.2783426923660646, "grad_norm": 0.40099987387657166, "learning_rate": 9.448967193195625e-05, "loss": 1.9922, "step": 917 }, { "epoch": 0.2786462285627561, "grad_norm": 0.39861205220222473, "learning_rate": 9.448359659781288e-05, "loss": 1.8319, "step": 918 }, { "epoch": 0.27894976475944755, "grad_norm": 0.33672603964805603, "learning_rate": 9.447752126366951e-05, "loss": 1.8562, "step": 919 }, { "epoch": 0.27925330095613904, "grad_norm": 0.3398993909358978, "learning_rate": 9.447144592952613e-05, "loss": 1.8801, "step": 920 }, { "epoch": 0.2795568371528305, "grad_norm": 0.6748337149620056, "learning_rate": 9.446537059538275e-05, "loss": 2.0353, "step": 921 }, { "epoch": 0.2798603733495219, "grad_norm": 0.33281663060188293, "learning_rate": 9.445929526123938e-05, "loss": 2.0932, "step": 922 }, { "epoch": 0.2801639095462134, "grad_norm": 0.37020498514175415, "learning_rate": 9.4453219927096e-05, "loss": 1.8438, "step": 923 }, { "epoch": 0.28046744574290483, "grad_norm": 0.40763506293296814, "learning_rate": 9.444714459295261e-05, "loss": 1.6915, "step": 924 }, { "epoch": 0.2807709819395963, "grad_norm": 0.36651310324668884, "learning_rate": 9.444106925880923e-05, "loss": 2.0502, "step": 925 }, { "epoch": 0.28107451813628775, "grad_norm": 0.6006852388381958, "learning_rate": 9.443499392466586e-05, "loss": 1.9601, "step": 926 }, { "epoch": 0.2813780543329792, "grad_norm": 0.45634040236473083, "learning_rate": 9.442891859052248e-05, "loss": 1.5068, "step": 927 }, { "epoch": 0.2816815905296707, "grad_norm": 0.3380034565925598, "learning_rate": 9.44228432563791e-05, "loss": 1.8657, "step": 928 }, { "epoch": 0.2819851267263621, "grad_norm": 0.39120668172836304, "learning_rate": 9.441676792223573e-05, "loss": 1.9683, "step": 929 }, { "epoch": 0.2822886629230536, "grad_norm": 0.41591060161590576, "learning_rate": 9.441069258809236e-05, "loss": 1.9994, "step": 930 }, { "epoch": 0.28259219911974504, "grad_norm": 0.3863435983657837, "learning_rate": 9.440461725394896e-05, "loss": 1.8127, "step": 931 }, { "epoch": 0.28289573531643647, "grad_norm": 0.3713644742965698, "learning_rate": 9.439854191980559e-05, "loss": 1.7514, "step": 932 }, { "epoch": 0.28319927151312796, "grad_norm": 0.36419039964675903, "learning_rate": 9.439246658566222e-05, "loss": 1.9044, "step": 933 }, { "epoch": 0.2835028077098194, "grad_norm": 0.4059010148048401, "learning_rate": 9.438639125151884e-05, "loss": 1.783, "step": 934 }, { "epoch": 0.2838063439065108, "grad_norm": 0.5016249418258667, "learning_rate": 9.438031591737546e-05, "loss": 1.9958, "step": 935 }, { "epoch": 0.2841098801032023, "grad_norm": 0.4264843463897705, "learning_rate": 9.437424058323209e-05, "loss": 1.5526, "step": 936 }, { "epoch": 0.28441341629989375, "grad_norm": 0.5768559575080872, "learning_rate": 9.43681652490887e-05, "loss": 1.719, "step": 937 }, { "epoch": 0.28471695249658524, "grad_norm": 0.42008429765701294, "learning_rate": 9.436208991494532e-05, "loss": 2.057, "step": 938 }, { "epoch": 0.2850204886932767, "grad_norm": 0.3530850112438202, "learning_rate": 9.435601458080194e-05, "loss": 1.7118, "step": 939 }, { "epoch": 0.2853240248899681, "grad_norm": 0.44346508383750916, "learning_rate": 9.434993924665857e-05, "loss": 2.1416, "step": 940 }, { "epoch": 0.2856275610866596, "grad_norm": 0.645882785320282, "learning_rate": 9.434386391251519e-05, "loss": 1.8368, "step": 941 }, { "epoch": 0.28593109728335103, "grad_norm": 0.784821093082428, "learning_rate": 9.43377885783718e-05, "loss": 1.9541, "step": 942 }, { "epoch": 0.2862346334800425, "grad_norm": 0.43880385160446167, "learning_rate": 9.433171324422844e-05, "loss": 2.0319, "step": 943 }, { "epoch": 0.28653816967673396, "grad_norm": 0.6283034682273865, "learning_rate": 9.432563791008507e-05, "loss": 1.5762, "step": 944 }, { "epoch": 0.2868417058734254, "grad_norm": 0.3591736853122711, "learning_rate": 9.431956257594167e-05, "loss": 2.1589, "step": 945 }, { "epoch": 0.2871452420701169, "grad_norm": 0.3970873951911926, "learning_rate": 9.43134872417983e-05, "loss": 1.4798, "step": 946 }, { "epoch": 0.2874487782668083, "grad_norm": 0.42486631870269775, "learning_rate": 9.430741190765493e-05, "loss": 1.8345, "step": 947 }, { "epoch": 0.28775231446349975, "grad_norm": 0.37290090322494507, "learning_rate": 9.430133657351155e-05, "loss": 1.9313, "step": 948 }, { "epoch": 0.28805585066019124, "grad_norm": 0.47855010628700256, "learning_rate": 9.429526123936817e-05, "loss": 1.4405, "step": 949 }, { "epoch": 0.28835938685688267, "grad_norm": 0.4648813009262085, "learning_rate": 9.42891859052248e-05, "loss": 1.6818, "step": 950 }, { "epoch": 0.28866292305357416, "grad_norm": 0.40000760555267334, "learning_rate": 9.428311057108142e-05, "loss": 1.9014, "step": 951 }, { "epoch": 0.2889664592502656, "grad_norm": 0.3846280872821808, "learning_rate": 9.427703523693803e-05, "loss": 1.6268, "step": 952 }, { "epoch": 0.28926999544695703, "grad_norm": 0.43172597885131836, "learning_rate": 9.427095990279465e-05, "loss": 1.6287, "step": 953 }, { "epoch": 0.2895735316436485, "grad_norm": 0.42565402388572693, "learning_rate": 9.426488456865128e-05, "loss": 1.8678, "step": 954 }, { "epoch": 0.28987706784033995, "grad_norm": 1.070906400680542, "learning_rate": 9.42588092345079e-05, "loss": 1.7688, "step": 955 }, { "epoch": 0.29018060403703144, "grad_norm": 0.4792560935020447, "learning_rate": 9.425273390036452e-05, "loss": 1.09, "step": 956 }, { "epoch": 0.2904841402337229, "grad_norm": 0.37043797969818115, "learning_rate": 9.424665856622115e-05, "loss": 1.6931, "step": 957 }, { "epoch": 0.2907876764304143, "grad_norm": 0.37764909863471985, "learning_rate": 9.424058323207778e-05, "loss": 1.8621, "step": 958 }, { "epoch": 0.2910912126271058, "grad_norm": 0.40328919887542725, "learning_rate": 9.423450789793438e-05, "loss": 1.9484, "step": 959 }, { "epoch": 0.29139474882379723, "grad_norm": 0.4451077878475189, "learning_rate": 9.422843256379101e-05, "loss": 1.7273, "step": 960 }, { "epoch": 0.29169828502048867, "grad_norm": 0.5410102009773254, "learning_rate": 9.422235722964764e-05, "loss": 2.0116, "step": 961 }, { "epoch": 0.29200182121718016, "grad_norm": 0.42526179552078247, "learning_rate": 9.421628189550426e-05, "loss": 1.6596, "step": 962 }, { "epoch": 0.2923053574138716, "grad_norm": 0.3813883066177368, "learning_rate": 9.421020656136088e-05, "loss": 2.0083, "step": 963 }, { "epoch": 0.2926088936105631, "grad_norm": 0.3967495858669281, "learning_rate": 9.420413122721751e-05, "loss": 1.8665, "step": 964 }, { "epoch": 0.2929124298072545, "grad_norm": 0.4672113060951233, "learning_rate": 9.419805589307413e-05, "loss": 2.1132, "step": 965 }, { "epoch": 0.29321596600394595, "grad_norm": 0.4068308472633362, "learning_rate": 9.419198055893074e-05, "loss": 2.0042, "step": 966 }, { "epoch": 0.29351950220063744, "grad_norm": 0.8895217180252075, "learning_rate": 9.418590522478736e-05, "loss": 1.9725, "step": 967 }, { "epoch": 0.2938230383973289, "grad_norm": 0.6839628219604492, "learning_rate": 9.417982989064399e-05, "loss": 1.5712, "step": 968 }, { "epoch": 0.29412657459402036, "grad_norm": 0.5890039801597595, "learning_rate": 9.417375455650061e-05, "loss": 2.1177, "step": 969 }, { "epoch": 0.2944301107907118, "grad_norm": 0.33217447996139526, "learning_rate": 9.416767922235723e-05, "loss": 1.9683, "step": 970 }, { "epoch": 0.29473364698740323, "grad_norm": 0.44125109910964966, "learning_rate": 9.416160388821386e-05, "loss": 1.6272, "step": 971 }, { "epoch": 0.2950371831840947, "grad_norm": 0.367145836353302, "learning_rate": 9.415552855407049e-05, "loss": 2.0624, "step": 972 }, { "epoch": 0.29534071938078615, "grad_norm": 0.3220556080341339, "learning_rate": 9.414945321992709e-05, "loss": 1.4513, "step": 973 }, { "epoch": 0.2956442555774776, "grad_norm": 0.37686339020729065, "learning_rate": 9.414337788578372e-05, "loss": 1.6418, "step": 974 }, { "epoch": 0.2959477917741691, "grad_norm": 0.46043211221694946, "learning_rate": 9.413730255164035e-05, "loss": 2.0433, "step": 975 }, { "epoch": 0.2962513279708605, "grad_norm": 0.40463754534721375, "learning_rate": 9.413122721749697e-05, "loss": 1.8214, "step": 976 }, { "epoch": 0.296554864167552, "grad_norm": 0.406583309173584, "learning_rate": 9.412515188335359e-05, "loss": 1.5704, "step": 977 }, { "epoch": 0.29685840036424344, "grad_norm": 0.4335365295410156, "learning_rate": 9.41190765492102e-05, "loss": 1.8772, "step": 978 }, { "epoch": 0.29716193656093487, "grad_norm": 0.43915802240371704, "learning_rate": 9.411300121506684e-05, "loss": 1.5376, "step": 979 }, { "epoch": 0.29746547275762636, "grad_norm": 0.36118191480636597, "learning_rate": 9.410692588092345e-05, "loss": 1.8994, "step": 980 }, { "epoch": 0.2977690089543178, "grad_norm": 0.4184354841709137, "learning_rate": 9.410085054678007e-05, "loss": 2.0041, "step": 981 }, { "epoch": 0.2980725451510093, "grad_norm": 0.3743583559989929, "learning_rate": 9.40947752126367e-05, "loss": 1.8986, "step": 982 }, { "epoch": 0.2983760813477007, "grad_norm": 0.4110506474971771, "learning_rate": 9.408869987849332e-05, "loss": 2.0617, "step": 983 }, { "epoch": 0.29867961754439215, "grad_norm": 0.33404871821403503, "learning_rate": 9.408262454434994e-05, "loss": 2.0014, "step": 984 }, { "epoch": 0.29898315374108364, "grad_norm": 0.3586455285549164, "learning_rate": 9.407654921020657e-05, "loss": 1.4912, "step": 985 }, { "epoch": 0.2992866899377751, "grad_norm": 0.3859756886959076, "learning_rate": 9.40704738760632e-05, "loss": 1.7919, "step": 986 }, { "epoch": 0.2995902261344665, "grad_norm": 0.4533100724220276, "learning_rate": 9.40643985419198e-05, "loss": 2.1134, "step": 987 }, { "epoch": 0.299893762331158, "grad_norm": 0.399854838848114, "learning_rate": 9.405832320777643e-05, "loss": 1.8198, "step": 988 }, { "epoch": 0.30019729852784943, "grad_norm": 0.3582475781440735, "learning_rate": 9.405224787363306e-05, "loss": 1.6337, "step": 989 }, { "epoch": 0.3005008347245409, "grad_norm": 0.39537113904953003, "learning_rate": 9.404617253948967e-05, "loss": 1.9856, "step": 990 }, { "epoch": 0.30080437092123236, "grad_norm": 0.3662082254886627, "learning_rate": 9.40400972053463e-05, "loss": 2.0424, "step": 991 }, { "epoch": 0.3011079071179238, "grad_norm": 0.38339659571647644, "learning_rate": 9.403402187120292e-05, "loss": 2.0293, "step": 992 }, { "epoch": 0.3014114433146153, "grad_norm": 0.3134559988975525, "learning_rate": 9.402794653705955e-05, "loss": 1.8086, "step": 993 }, { "epoch": 0.3017149795113067, "grad_norm": 0.4155486226081848, "learning_rate": 9.402187120291616e-05, "loss": 1.8748, "step": 994 }, { "epoch": 0.3020185157079982, "grad_norm": 0.41562893986701965, "learning_rate": 9.401579586877278e-05, "loss": 1.6104, "step": 995 }, { "epoch": 0.30232205190468964, "grad_norm": 0.36112940311431885, "learning_rate": 9.400972053462941e-05, "loss": 1.335, "step": 996 }, { "epoch": 0.30262558810138107, "grad_norm": 0.4332577586174011, "learning_rate": 9.400364520048603e-05, "loss": 1.6098, "step": 997 }, { "epoch": 0.30292912429807256, "grad_norm": 0.3520275950431824, "learning_rate": 9.399756986634265e-05, "loss": 1.9924, "step": 998 }, { "epoch": 0.303232660494764, "grad_norm": 0.44331827759742737, "learning_rate": 9.399149453219928e-05, "loss": 1.8021, "step": 999 }, { "epoch": 0.30353619669145543, "grad_norm": 0.38627490401268005, "learning_rate": 9.398541919805591e-05, "loss": 1.8952, "step": 1000 }, { "epoch": 0.3038397328881469, "grad_norm": 0.42670029401779175, "learning_rate": 9.397934386391251e-05, "loss": 1.7, "step": 1001 }, { "epoch": 0.30414326908483835, "grad_norm": 0.31961289048194885, "learning_rate": 9.397326852976914e-05, "loss": 1.4874, "step": 1002 }, { "epoch": 0.30444680528152984, "grad_norm": 0.46648967266082764, "learning_rate": 9.396719319562577e-05, "loss": 1.962, "step": 1003 }, { "epoch": 0.3047503414782213, "grad_norm": 0.39356762170791626, "learning_rate": 9.396111786148238e-05, "loss": 2.0206, "step": 1004 }, { "epoch": 0.3050538776749127, "grad_norm": 0.37756818532943726, "learning_rate": 9.395504252733901e-05, "loss": 1.9863, "step": 1005 }, { "epoch": 0.3053574138716042, "grad_norm": 0.3291250765323639, "learning_rate": 9.394896719319563e-05, "loss": 1.9422, "step": 1006 }, { "epoch": 0.30566095006829563, "grad_norm": 0.397297739982605, "learning_rate": 9.394289185905226e-05, "loss": 1.7533, "step": 1007 }, { "epoch": 0.3059644862649871, "grad_norm": 0.33320048451423645, "learning_rate": 9.393681652490887e-05, "loss": 1.6411, "step": 1008 }, { "epoch": 0.30626802246167856, "grad_norm": 0.38921716809272766, "learning_rate": 9.393074119076549e-05, "loss": 1.9216, "step": 1009 }, { "epoch": 0.30657155865837, "grad_norm": 0.40245047211647034, "learning_rate": 9.392466585662212e-05, "loss": 1.9853, "step": 1010 }, { "epoch": 0.3068750948550615, "grad_norm": 0.5569208264350891, "learning_rate": 9.391859052247874e-05, "loss": 2.157, "step": 1011 }, { "epoch": 0.3071786310517529, "grad_norm": 0.4204193949699402, "learning_rate": 9.391251518833536e-05, "loss": 1.666, "step": 1012 }, { "epoch": 0.3074821672484444, "grad_norm": 0.3458712100982666, "learning_rate": 9.390643985419199e-05, "loss": 1.9564, "step": 1013 }, { "epoch": 0.30778570344513584, "grad_norm": 0.42556729912757874, "learning_rate": 9.39003645200486e-05, "loss": 2.0565, "step": 1014 }, { "epoch": 0.3080892396418273, "grad_norm": 0.3334849774837494, "learning_rate": 9.389428918590522e-05, "loss": 1.9534, "step": 1015 }, { "epoch": 0.30839277583851876, "grad_norm": 0.3297790288925171, "learning_rate": 9.388821385176185e-05, "loss": 2.0032, "step": 1016 }, { "epoch": 0.3086963120352102, "grad_norm": 0.4108186662197113, "learning_rate": 9.388213851761848e-05, "loss": 1.6698, "step": 1017 }, { "epoch": 0.30899984823190163, "grad_norm": 0.4515385925769806, "learning_rate": 9.387606318347509e-05, "loss": 1.9709, "step": 1018 }, { "epoch": 0.3093033844285931, "grad_norm": 0.38401028513908386, "learning_rate": 9.386998784933172e-05, "loss": 1.9928, "step": 1019 }, { "epoch": 0.30960692062528455, "grad_norm": 0.32774823904037476, "learning_rate": 9.386391251518834e-05, "loss": 2.1256, "step": 1020 }, { "epoch": 0.30991045682197604, "grad_norm": 0.45378655195236206, "learning_rate": 9.385783718104497e-05, "loss": 1.8078, "step": 1021 }, { "epoch": 0.3102139930186675, "grad_norm": 0.3340519368648529, "learning_rate": 9.385176184690158e-05, "loss": 1.6102, "step": 1022 }, { "epoch": 0.3105175292153589, "grad_norm": 0.5457311868667603, "learning_rate": 9.38456865127582e-05, "loss": 1.9735, "step": 1023 }, { "epoch": 0.3108210654120504, "grad_norm": 0.3604097068309784, "learning_rate": 9.383961117861483e-05, "loss": 1.8048, "step": 1024 }, { "epoch": 0.31112460160874184, "grad_norm": 0.3677893579006195, "learning_rate": 9.383353584447145e-05, "loss": 1.5346, "step": 1025 }, { "epoch": 0.3114281378054333, "grad_norm": 0.49554312229156494, "learning_rate": 9.382746051032807e-05, "loss": 2.0472, "step": 1026 }, { "epoch": 0.31173167400212476, "grad_norm": 0.37693944573402405, "learning_rate": 9.38213851761847e-05, "loss": 1.8848, "step": 1027 }, { "epoch": 0.3120352101988162, "grad_norm": 0.6364639401435852, "learning_rate": 9.381530984204132e-05, "loss": 1.7289, "step": 1028 }, { "epoch": 0.3123387463955077, "grad_norm": 0.36025428771972656, "learning_rate": 9.380923450789793e-05, "loss": 1.8544, "step": 1029 }, { "epoch": 0.3126422825921991, "grad_norm": 0.4033251106739044, "learning_rate": 9.380315917375456e-05, "loss": 1.8661, "step": 1030 }, { "epoch": 0.31294581878889055, "grad_norm": 0.41504162549972534, "learning_rate": 9.37970838396112e-05, "loss": 1.9619, "step": 1031 }, { "epoch": 0.31324935498558204, "grad_norm": 0.38639551401138306, "learning_rate": 9.37910085054678e-05, "loss": 2.1785, "step": 1032 }, { "epoch": 0.3135528911822735, "grad_norm": 0.3487949073314667, "learning_rate": 9.378493317132443e-05, "loss": 1.5571, "step": 1033 }, { "epoch": 0.31385642737896496, "grad_norm": 0.3317317068576813, "learning_rate": 9.377885783718105e-05, "loss": 2.1086, "step": 1034 }, { "epoch": 0.3141599635756564, "grad_norm": 0.35874056816101074, "learning_rate": 9.377278250303768e-05, "loss": 1.9453, "step": 1035 }, { "epoch": 0.31446349977234783, "grad_norm": 0.3823045790195465, "learning_rate": 9.37667071688943e-05, "loss": 1.6444, "step": 1036 }, { "epoch": 0.3147670359690393, "grad_norm": 0.39954647421836853, "learning_rate": 9.376063183475091e-05, "loss": 2.0497, "step": 1037 }, { "epoch": 0.31507057216573076, "grad_norm": 0.34357962012290955, "learning_rate": 9.375455650060754e-05, "loss": 1.8391, "step": 1038 }, { "epoch": 0.31537410836242225, "grad_norm": 0.35260939598083496, "learning_rate": 9.374848116646416e-05, "loss": 1.9691, "step": 1039 }, { "epoch": 0.3156776445591137, "grad_norm": 0.33483296632766724, "learning_rate": 9.374240583232078e-05, "loss": 1.8933, "step": 1040 }, { "epoch": 0.3159811807558051, "grad_norm": 0.4771517515182495, "learning_rate": 9.373633049817741e-05, "loss": 1.8574, "step": 1041 }, { "epoch": 0.3162847169524966, "grad_norm": 0.3025968372821808, "learning_rate": 9.373025516403403e-05, "loss": 1.7995, "step": 1042 }, { "epoch": 0.31658825314918804, "grad_norm": 0.39535394310951233, "learning_rate": 9.372417982989064e-05, "loss": 1.6662, "step": 1043 }, { "epoch": 0.31689178934587947, "grad_norm": 0.35718834400177, "learning_rate": 9.371810449574727e-05, "loss": 2.3543, "step": 1044 }, { "epoch": 0.31719532554257096, "grad_norm": 0.40815529227256775, "learning_rate": 9.37120291616039e-05, "loss": 1.8978, "step": 1045 }, { "epoch": 0.3174988617392624, "grad_norm": 0.38799992203712463, "learning_rate": 9.370595382746051e-05, "loss": 1.9145, "step": 1046 }, { "epoch": 0.3178023979359539, "grad_norm": 0.3711848556995392, "learning_rate": 9.369987849331714e-05, "loss": 1.9129, "step": 1047 }, { "epoch": 0.3181059341326453, "grad_norm": 1.041429877281189, "learning_rate": 9.369380315917376e-05, "loss": 1.3535, "step": 1048 }, { "epoch": 0.31840947032933675, "grad_norm": 0.4107154309749603, "learning_rate": 9.368772782503039e-05, "loss": 1.6886, "step": 1049 }, { "epoch": 0.31871300652602824, "grad_norm": 0.35202670097351074, "learning_rate": 9.3681652490887e-05, "loss": 1.4685, "step": 1050 }, { "epoch": 0.3190165427227197, "grad_norm": 0.39248141646385193, "learning_rate": 9.367557715674362e-05, "loss": 1.6177, "step": 1051 }, { "epoch": 0.31932007891941117, "grad_norm": 0.3911724388599396, "learning_rate": 9.366950182260025e-05, "loss": 1.5015, "step": 1052 }, { "epoch": 0.3196236151161026, "grad_norm": 0.8974817991256714, "learning_rate": 9.366342648845687e-05, "loss": 1.5422, "step": 1053 }, { "epoch": 0.31992715131279403, "grad_norm": 0.4150513708591461, "learning_rate": 9.365735115431349e-05, "loss": 2.0201, "step": 1054 }, { "epoch": 0.3202306875094855, "grad_norm": 0.4205161929130554, "learning_rate": 9.365127582017012e-05, "loss": 1.881, "step": 1055 }, { "epoch": 0.32053422370617696, "grad_norm": 0.36916840076446533, "learning_rate": 9.364520048602674e-05, "loss": 1.9912, "step": 1056 }, { "epoch": 0.3208377599028684, "grad_norm": 0.45616719126701355, "learning_rate": 9.363912515188335e-05, "loss": 1.825, "step": 1057 }, { "epoch": 0.3211412960995599, "grad_norm": 0.3602239787578583, "learning_rate": 9.363304981773998e-05, "loss": 1.5264, "step": 1058 }, { "epoch": 0.3214448322962513, "grad_norm": 0.39383935928344727, "learning_rate": 9.362697448359661e-05, "loss": 1.9719, "step": 1059 }, { "epoch": 0.3217483684929428, "grad_norm": 0.321859210729599, "learning_rate": 9.362089914945322e-05, "loss": 1.7703, "step": 1060 }, { "epoch": 0.32205190468963424, "grad_norm": 0.40060603618621826, "learning_rate": 9.361482381530985e-05, "loss": 1.7199, "step": 1061 }, { "epoch": 0.3223554408863257, "grad_norm": 0.4096384644508362, "learning_rate": 9.360874848116647e-05, "loss": 1.5069, "step": 1062 }, { "epoch": 0.32265897708301716, "grad_norm": 0.35391515493392944, "learning_rate": 9.360267314702308e-05, "loss": 1.8928, "step": 1063 }, { "epoch": 0.3229625132797086, "grad_norm": 0.3309794068336487, "learning_rate": 9.359659781287971e-05, "loss": 1.6238, "step": 1064 }, { "epoch": 0.3232660494764001, "grad_norm": 0.37579798698425293, "learning_rate": 9.359052247873633e-05, "loss": 2.329, "step": 1065 }, { "epoch": 0.3235695856730915, "grad_norm": 0.41262614727020264, "learning_rate": 9.358444714459296e-05, "loss": 1.9192, "step": 1066 }, { "epoch": 0.32387312186978295, "grad_norm": 0.3737616539001465, "learning_rate": 9.357837181044958e-05, "loss": 1.9614, "step": 1067 }, { "epoch": 0.32417665806647444, "grad_norm": 0.35716524720191956, "learning_rate": 9.35722964763062e-05, "loss": 1.9046, "step": 1068 }, { "epoch": 0.3244801942631659, "grad_norm": 1.6110327243804932, "learning_rate": 9.356622114216283e-05, "loss": 1.5437, "step": 1069 }, { "epoch": 0.3247837304598573, "grad_norm": 0.3114778399467468, "learning_rate": 9.356014580801945e-05, "loss": 1.8962, "step": 1070 }, { "epoch": 0.3250872666565488, "grad_norm": 0.35084468126296997, "learning_rate": 9.355407047387606e-05, "loss": 2.0253, "step": 1071 }, { "epoch": 0.32539080285324024, "grad_norm": 0.38513630628585815, "learning_rate": 9.35479951397327e-05, "loss": 1.7556, "step": 1072 }, { "epoch": 0.3256943390499317, "grad_norm": 0.41520386934280396, "learning_rate": 9.354191980558931e-05, "loss": 1.274, "step": 1073 }, { "epoch": 0.32599787524662316, "grad_norm": 0.3998602032661438, "learning_rate": 9.353584447144593e-05, "loss": 1.9963, "step": 1074 }, { "epoch": 0.3263014114433146, "grad_norm": 0.3973468244075775, "learning_rate": 9.352976913730256e-05, "loss": 2.2281, "step": 1075 }, { "epoch": 0.3266049476400061, "grad_norm": 0.37020763754844666, "learning_rate": 9.352369380315918e-05, "loss": 1.6891, "step": 1076 }, { "epoch": 0.3269084838366975, "grad_norm": 0.43367013335227966, "learning_rate": 9.35176184690158e-05, "loss": 1.5859, "step": 1077 }, { "epoch": 0.327212020033389, "grad_norm": 0.3882901072502136, "learning_rate": 9.351154313487242e-05, "loss": 1.5294, "step": 1078 }, { "epoch": 0.32751555623008044, "grad_norm": 0.38236895203590393, "learning_rate": 9.350546780072904e-05, "loss": 2.0035, "step": 1079 }, { "epoch": 0.3278190924267719, "grad_norm": 0.42090603709220886, "learning_rate": 9.349939246658567e-05, "loss": 1.325, "step": 1080 }, { "epoch": 0.32812262862346336, "grad_norm": 0.4210514724254608, "learning_rate": 9.349331713244229e-05, "loss": 1.9018, "step": 1081 }, { "epoch": 0.3284261648201548, "grad_norm": 0.3695550858974457, "learning_rate": 9.348724179829891e-05, "loss": 2.0823, "step": 1082 }, { "epoch": 0.32872970101684623, "grad_norm": 0.44178470969200134, "learning_rate": 9.348116646415554e-05, "loss": 1.9396, "step": 1083 }, { "epoch": 0.3290332372135377, "grad_norm": 2.9311540126800537, "learning_rate": 9.347509113001216e-05, "loss": 2.0483, "step": 1084 }, { "epoch": 0.32933677341022916, "grad_norm": 0.38238954544067383, "learning_rate": 9.346901579586877e-05, "loss": 2.0408, "step": 1085 }, { "epoch": 0.32964030960692065, "grad_norm": 0.420622318983078, "learning_rate": 9.34629404617254e-05, "loss": 1.9811, "step": 1086 }, { "epoch": 0.3299438458036121, "grad_norm": 0.47827744483947754, "learning_rate": 9.345686512758202e-05, "loss": 1.7816, "step": 1087 }, { "epoch": 0.3302473820003035, "grad_norm": 0.3673538565635681, "learning_rate": 9.345078979343864e-05, "loss": 1.9916, "step": 1088 }, { "epoch": 0.330550918196995, "grad_norm": 1.2525584697723389, "learning_rate": 9.344471445929527e-05, "loss": 2.0323, "step": 1089 }, { "epoch": 0.33085445439368644, "grad_norm": 0.3575446605682373, "learning_rate": 9.343863912515189e-05, "loss": 2.0254, "step": 1090 }, { "epoch": 0.3311579905903779, "grad_norm": 0.4579968750476837, "learning_rate": 9.34325637910085e-05, "loss": 1.4365, "step": 1091 }, { "epoch": 0.33146152678706936, "grad_norm": 0.5363442301750183, "learning_rate": 9.342648845686513e-05, "loss": 2.0635, "step": 1092 }, { "epoch": 0.3317650629837608, "grad_norm": 0.4065784215927124, "learning_rate": 9.342041312272175e-05, "loss": 1.6133, "step": 1093 }, { "epoch": 0.3320685991804523, "grad_norm": 0.4256560504436493, "learning_rate": 9.341433778857838e-05, "loss": 1.7574, "step": 1094 }, { "epoch": 0.3323721353771437, "grad_norm": 0.3566704988479614, "learning_rate": 9.3408262454435e-05, "loss": 1.4446, "step": 1095 }, { "epoch": 0.33267567157383515, "grad_norm": 0.39680102467536926, "learning_rate": 9.340218712029162e-05, "loss": 2.1008, "step": 1096 }, { "epoch": 0.33297920777052664, "grad_norm": 0.39213013648986816, "learning_rate": 9.339611178614825e-05, "loss": 1.5924, "step": 1097 }, { "epoch": 0.3332827439672181, "grad_norm": 0.39503929018974304, "learning_rate": 9.339003645200487e-05, "loss": 1.9126, "step": 1098 }, { "epoch": 0.33358628016390957, "grad_norm": 0.34226784110069275, "learning_rate": 9.338396111786148e-05, "loss": 1.3475, "step": 1099 }, { "epoch": 0.333889816360601, "grad_norm": 0.3511487543582916, "learning_rate": 9.337788578371811e-05, "loss": 1.6679, "step": 1100 }, { "epoch": 0.33419335255729243, "grad_norm": 0.6215702295303345, "learning_rate": 9.337181044957473e-05, "loss": 1.7599, "step": 1101 }, { "epoch": 0.3344968887539839, "grad_norm": 0.34477895498275757, "learning_rate": 9.336573511543135e-05, "loss": 1.4905, "step": 1102 }, { "epoch": 0.33480042495067536, "grad_norm": 0.37696805596351624, "learning_rate": 9.335965978128798e-05, "loss": 1.2661, "step": 1103 }, { "epoch": 0.33510396114736685, "grad_norm": 0.3722575306892395, "learning_rate": 9.33535844471446e-05, "loss": 1.8483, "step": 1104 }, { "epoch": 0.3354074973440583, "grad_norm": 0.5641891360282898, "learning_rate": 9.334750911300121e-05, "loss": 2.0121, "step": 1105 }, { "epoch": 0.3357110335407497, "grad_norm": 0.37221911549568176, "learning_rate": 9.334143377885784e-05, "loss": 1.9564, "step": 1106 }, { "epoch": 0.3360145697374412, "grad_norm": 0.3777831494808197, "learning_rate": 9.333535844471446e-05, "loss": 1.8625, "step": 1107 }, { "epoch": 0.33631810593413264, "grad_norm": 0.4311445951461792, "learning_rate": 9.332928311057109e-05, "loss": 2.038, "step": 1108 }, { "epoch": 0.33662164213082413, "grad_norm": 0.4373043179512024, "learning_rate": 9.332320777642771e-05, "loss": 1.6282, "step": 1109 }, { "epoch": 0.33692517832751556, "grad_norm": 0.3800273835659027, "learning_rate": 9.331713244228433e-05, "loss": 1.5552, "step": 1110 }, { "epoch": 0.337228714524207, "grad_norm": 0.6762371063232422, "learning_rate": 9.331105710814096e-05, "loss": 1.7299, "step": 1111 }, { "epoch": 0.3375322507208985, "grad_norm": 0.3713971972465515, "learning_rate": 9.330498177399758e-05, "loss": 1.9919, "step": 1112 }, { "epoch": 0.3378357869175899, "grad_norm": 0.40268123149871826, "learning_rate": 9.329890643985419e-05, "loss": 1.9952, "step": 1113 }, { "epoch": 0.33813932311428135, "grad_norm": 0.44786572456359863, "learning_rate": 9.329283110571082e-05, "loss": 2.1622, "step": 1114 }, { "epoch": 0.33844285931097284, "grad_norm": 0.39639097452163696, "learning_rate": 9.328675577156744e-05, "loss": 1.7995, "step": 1115 }, { "epoch": 0.3387463955076643, "grad_norm": 0.5204857587814331, "learning_rate": 9.328068043742406e-05, "loss": 1.9925, "step": 1116 }, { "epoch": 0.33904993170435577, "grad_norm": 0.4180005192756653, "learning_rate": 9.327460510328069e-05, "loss": 1.8489, "step": 1117 }, { "epoch": 0.3393534679010472, "grad_norm": 0.42055949568748474, "learning_rate": 9.32685297691373e-05, "loss": 1.7729, "step": 1118 }, { "epoch": 0.33965700409773864, "grad_norm": 0.4213305115699768, "learning_rate": 9.326245443499392e-05, "loss": 1.9299, "step": 1119 }, { "epoch": 0.3399605402944301, "grad_norm": 0.7117316126823425, "learning_rate": 9.325637910085055e-05, "loss": 1.7163, "step": 1120 }, { "epoch": 0.34026407649112156, "grad_norm": 0.3836345374584198, "learning_rate": 9.325030376670717e-05, "loss": 1.8703, "step": 1121 }, { "epoch": 0.34056761268781305, "grad_norm": 0.5493946075439453, "learning_rate": 9.32442284325638e-05, "loss": 2.1161, "step": 1122 }, { "epoch": 0.3408711488845045, "grad_norm": 0.4323013722896576, "learning_rate": 9.323815309842042e-05, "loss": 1.9867, "step": 1123 }, { "epoch": 0.3411746850811959, "grad_norm": 0.37991368770599365, "learning_rate": 9.323207776427704e-05, "loss": 1.935, "step": 1124 }, { "epoch": 0.3414782212778874, "grad_norm": 0.40191560983657837, "learning_rate": 9.322600243013367e-05, "loss": 1.7617, "step": 1125 }, { "epoch": 0.34178175747457884, "grad_norm": 0.42309120297431946, "learning_rate": 9.321992709599029e-05, "loss": 2.1438, "step": 1126 }, { "epoch": 0.3420852936712703, "grad_norm": 0.3918818235397339, "learning_rate": 9.32138517618469e-05, "loss": 1.9434, "step": 1127 }, { "epoch": 0.34238882986796176, "grad_norm": 0.9797879457473755, "learning_rate": 9.320777642770353e-05, "loss": 1.8893, "step": 1128 }, { "epoch": 0.3426923660646532, "grad_norm": 0.38459375500679016, "learning_rate": 9.320170109356015e-05, "loss": 1.9626, "step": 1129 }, { "epoch": 0.3429959022613447, "grad_norm": 0.35924455523490906, "learning_rate": 9.319562575941677e-05, "loss": 1.7108, "step": 1130 }, { "epoch": 0.3432994384580361, "grad_norm": 0.3576562702655792, "learning_rate": 9.31895504252734e-05, "loss": 1.9426, "step": 1131 }, { "epoch": 0.34360297465472756, "grad_norm": 0.3931269943714142, "learning_rate": 9.318347509113002e-05, "loss": 1.9598, "step": 1132 }, { "epoch": 0.34390651085141904, "grad_norm": 0.41744112968444824, "learning_rate": 9.317739975698663e-05, "loss": 1.9522, "step": 1133 }, { "epoch": 0.3442100470481105, "grad_norm": 0.3721160590648651, "learning_rate": 9.317132442284326e-05, "loss": 1.366, "step": 1134 }, { "epoch": 0.34451358324480197, "grad_norm": 0.4886751174926758, "learning_rate": 9.316524908869988e-05, "loss": 1.8528, "step": 1135 }, { "epoch": 0.3448171194414934, "grad_norm": 0.3837689161300659, "learning_rate": 9.31591737545565e-05, "loss": 2.0877, "step": 1136 }, { "epoch": 0.34512065563818484, "grad_norm": 0.3721841871738434, "learning_rate": 9.315309842041313e-05, "loss": 2.0144, "step": 1137 }, { "epoch": 0.3454241918348763, "grad_norm": 0.4325003921985626, "learning_rate": 9.314702308626975e-05, "loss": 2.2377, "step": 1138 }, { "epoch": 0.34572772803156776, "grad_norm": 0.39936354756355286, "learning_rate": 9.314094775212638e-05, "loss": 1.9469, "step": 1139 }, { "epoch": 0.3460312642282592, "grad_norm": 0.38498643040657043, "learning_rate": 9.3134872417983e-05, "loss": 2.0822, "step": 1140 }, { "epoch": 0.3463348004249507, "grad_norm": 0.3657349944114685, "learning_rate": 9.312879708383961e-05, "loss": 1.8089, "step": 1141 }, { "epoch": 0.3466383366216421, "grad_norm": 2.0269200801849365, "learning_rate": 9.312272174969624e-05, "loss": 1.8882, "step": 1142 }, { "epoch": 0.3469418728183336, "grad_norm": 0.3976801931858063, "learning_rate": 9.311664641555286e-05, "loss": 1.8956, "step": 1143 }, { "epoch": 0.34724540901502504, "grad_norm": 0.47736337780952454, "learning_rate": 9.311057108140948e-05, "loss": 1.4612, "step": 1144 }, { "epoch": 0.3475489452117165, "grad_norm": 0.4764254093170166, "learning_rate": 9.310449574726611e-05, "loss": 2.119, "step": 1145 }, { "epoch": 0.34785248140840797, "grad_norm": 0.49367082118988037, "learning_rate": 9.309842041312273e-05, "loss": 1.1816, "step": 1146 }, { "epoch": 0.3481560176050994, "grad_norm": 0.48990949988365173, "learning_rate": 9.309234507897934e-05, "loss": 1.6951, "step": 1147 }, { "epoch": 0.3484595538017909, "grad_norm": 0.6447961330413818, "learning_rate": 9.308626974483597e-05, "loss": 2.0145, "step": 1148 }, { "epoch": 0.3487630899984823, "grad_norm": 0.8322371244430542, "learning_rate": 9.308019441069259e-05, "loss": 1.8679, "step": 1149 }, { "epoch": 0.34906662619517376, "grad_norm": 0.3726497292518616, "learning_rate": 9.307411907654921e-05, "loss": 1.8455, "step": 1150 }, { "epoch": 0.34937016239186525, "grad_norm": 0.3494066298007965, "learning_rate": 9.306804374240584e-05, "loss": 1.7699, "step": 1151 }, { "epoch": 0.3496736985885567, "grad_norm": 0.44510725140571594, "learning_rate": 9.306196840826246e-05, "loss": 1.6315, "step": 1152 }, { "epoch": 0.3499772347852481, "grad_norm": 0.4738346338272095, "learning_rate": 9.305589307411909e-05, "loss": 1.9982, "step": 1153 }, { "epoch": 0.3502807709819396, "grad_norm": 0.6915324330329895, "learning_rate": 9.304981773997569e-05, "loss": 1.6566, "step": 1154 }, { "epoch": 0.35058430717863104, "grad_norm": 0.35767850279808044, "learning_rate": 9.304374240583232e-05, "loss": 1.6553, "step": 1155 }, { "epoch": 0.35088784337532253, "grad_norm": 0.4144536852836609, "learning_rate": 9.303766707168895e-05, "loss": 1.4838, "step": 1156 }, { "epoch": 0.35119137957201396, "grad_norm": 0.42863425612449646, "learning_rate": 9.303159173754557e-05, "loss": 2.0101, "step": 1157 }, { "epoch": 0.3514949157687054, "grad_norm": 0.38044658303260803, "learning_rate": 9.302551640340219e-05, "loss": 2.0358, "step": 1158 }, { "epoch": 0.3517984519653969, "grad_norm": 0.3667512536048889, "learning_rate": 9.301944106925882e-05, "loss": 2.0601, "step": 1159 }, { "epoch": 0.3521019881620883, "grad_norm": 0.4198186993598938, "learning_rate": 9.301336573511544e-05, "loss": 1.8418, "step": 1160 }, { "epoch": 0.3524055243587798, "grad_norm": 0.40647754073143005, "learning_rate": 9.300729040097205e-05, "loss": 1.945, "step": 1161 }, { "epoch": 0.35270906055547124, "grad_norm": 0.4339864253997803, "learning_rate": 9.300121506682868e-05, "loss": 1.8991, "step": 1162 }, { "epoch": 0.3530125967521627, "grad_norm": 0.43949249386787415, "learning_rate": 9.29951397326853e-05, "loss": 1.8012, "step": 1163 }, { "epoch": 0.35331613294885417, "grad_norm": 0.3767072558403015, "learning_rate": 9.298906439854192e-05, "loss": 1.4843, "step": 1164 }, { "epoch": 0.3536196691455456, "grad_norm": 0.34623175859451294, "learning_rate": 9.298298906439855e-05, "loss": 1.7296, "step": 1165 }, { "epoch": 0.35392320534223703, "grad_norm": 0.4682632088661194, "learning_rate": 9.297691373025517e-05, "loss": 1.6166, "step": 1166 }, { "epoch": 0.3542267415389285, "grad_norm": 0.43289923667907715, "learning_rate": 9.29708383961118e-05, "loss": 1.8117, "step": 1167 }, { "epoch": 0.35453027773561996, "grad_norm": 0.40620309114456177, "learning_rate": 9.29647630619684e-05, "loss": 2.2582, "step": 1168 }, { "epoch": 0.35483381393231145, "grad_norm": 0.4079282879829407, "learning_rate": 9.295868772782503e-05, "loss": 1.8266, "step": 1169 }, { "epoch": 0.3551373501290029, "grad_norm": 0.4398365020751953, "learning_rate": 9.295261239368166e-05, "loss": 1.8592, "step": 1170 }, { "epoch": 0.3554408863256943, "grad_norm": 0.41404253244400024, "learning_rate": 9.294653705953828e-05, "loss": 1.4286, "step": 1171 }, { "epoch": 0.3557444225223858, "grad_norm": 0.3746820390224457, "learning_rate": 9.29404617253949e-05, "loss": 1.751, "step": 1172 }, { "epoch": 0.35604795871907724, "grad_norm": 0.3549497723579407, "learning_rate": 9.293438639125153e-05, "loss": 1.1857, "step": 1173 }, { "epoch": 0.35635149491576873, "grad_norm": 0.3803435266017914, "learning_rate": 9.292831105710815e-05, "loss": 1.9532, "step": 1174 }, { "epoch": 0.35665503111246016, "grad_norm": 0.46608418226242065, "learning_rate": 9.292223572296476e-05, "loss": 1.7284, "step": 1175 }, { "epoch": 0.3569585673091516, "grad_norm": 0.3718934953212738, "learning_rate": 9.29161603888214e-05, "loss": 1.8234, "step": 1176 }, { "epoch": 0.3572621035058431, "grad_norm": 0.43626031279563904, "learning_rate": 9.291008505467801e-05, "loss": 1.6322, "step": 1177 }, { "epoch": 0.3575656397025345, "grad_norm": 0.3897557258605957, "learning_rate": 9.290400972053463e-05, "loss": 1.7562, "step": 1178 }, { "epoch": 0.35786917589922596, "grad_norm": 0.4063243269920349, "learning_rate": 9.289793438639126e-05, "loss": 1.7012, "step": 1179 }, { "epoch": 0.35817271209591744, "grad_norm": 0.33185258507728577, "learning_rate": 9.289185905224788e-05, "loss": 1.8933, "step": 1180 }, { "epoch": 0.3584762482926089, "grad_norm": 0.35498881340026855, "learning_rate": 9.288578371810451e-05, "loss": 1.84, "step": 1181 }, { "epoch": 0.35877978448930037, "grad_norm": 0.37165796756744385, "learning_rate": 9.287970838396111e-05, "loss": 2.0435, "step": 1182 }, { "epoch": 0.3590833206859918, "grad_norm": 0.4139983355998993, "learning_rate": 9.287363304981774e-05, "loss": 1.5223, "step": 1183 }, { "epoch": 0.35938685688268324, "grad_norm": 0.4164494276046753, "learning_rate": 9.286755771567437e-05, "loss": 2.125, "step": 1184 }, { "epoch": 0.3596903930793747, "grad_norm": 0.35237401723861694, "learning_rate": 9.286148238153098e-05, "loss": 1.9644, "step": 1185 }, { "epoch": 0.35999392927606616, "grad_norm": 0.4741188883781433, "learning_rate": 9.285540704738761e-05, "loss": 1.7027, "step": 1186 }, { "epoch": 0.36029746547275765, "grad_norm": 0.6668043732643127, "learning_rate": 9.284933171324424e-05, "loss": 1.7109, "step": 1187 }, { "epoch": 0.3606010016694491, "grad_norm": 0.39687463641166687, "learning_rate": 9.284325637910086e-05, "loss": 1.9012, "step": 1188 }, { "epoch": 0.3609045378661405, "grad_norm": 0.39605942368507385, "learning_rate": 9.283718104495747e-05, "loss": 2.0095, "step": 1189 }, { "epoch": 0.361208074062832, "grad_norm": 0.6824695467948914, "learning_rate": 9.28311057108141e-05, "loss": 1.6799, "step": 1190 }, { "epoch": 0.36151161025952344, "grad_norm": 0.31606560945510864, "learning_rate": 9.282503037667072e-05, "loss": 1.4793, "step": 1191 }, { "epoch": 0.36181514645621493, "grad_norm": 0.3778662085533142, "learning_rate": 9.281895504252734e-05, "loss": 1.7399, "step": 1192 }, { "epoch": 0.36211868265290637, "grad_norm": 0.39530149102211, "learning_rate": 9.281287970838397e-05, "loss": 1.7939, "step": 1193 }, { "epoch": 0.3624222188495978, "grad_norm": 0.4434921145439148, "learning_rate": 9.280680437424059e-05, "loss": 1.5982, "step": 1194 }, { "epoch": 0.3627257550462893, "grad_norm": 0.40200692415237427, "learning_rate": 9.280072904009722e-05, "loss": 1.6496, "step": 1195 }, { "epoch": 0.3630292912429807, "grad_norm": 0.3617413341999054, "learning_rate": 9.279465370595382e-05, "loss": 1.9734, "step": 1196 }, { "epoch": 0.36333282743967216, "grad_norm": 0.3840635120868683, "learning_rate": 9.278857837181045e-05, "loss": 2.0827, "step": 1197 }, { "epoch": 0.36363636363636365, "grad_norm": 0.36481353640556335, "learning_rate": 9.278250303766708e-05, "loss": 1.781, "step": 1198 }, { "epoch": 0.3639398998330551, "grad_norm": 0.3300980031490326, "learning_rate": 9.277642770352369e-05, "loss": 1.8124, "step": 1199 }, { "epoch": 0.36424343602974657, "grad_norm": 0.3816182315349579, "learning_rate": 9.277035236938032e-05, "loss": 2.0853, "step": 1200 }, { "epoch": 0.364546972226438, "grad_norm": 0.40531983971595764, "learning_rate": 9.276427703523695e-05, "loss": 1.8582, "step": 1201 }, { "epoch": 0.36485050842312944, "grad_norm": 0.5847654938697815, "learning_rate": 9.275820170109357e-05, "loss": 1.9022, "step": 1202 }, { "epoch": 0.36515404461982093, "grad_norm": 0.37587395310401917, "learning_rate": 9.275212636695018e-05, "loss": 1.5774, "step": 1203 }, { "epoch": 0.36545758081651236, "grad_norm": 0.4057527780532837, "learning_rate": 9.274605103280681e-05, "loss": 1.965, "step": 1204 }, { "epoch": 0.36576111701320385, "grad_norm": 0.7079107761383057, "learning_rate": 9.273997569866343e-05, "loss": 1.721, "step": 1205 }, { "epoch": 0.3660646532098953, "grad_norm": 0.828880786895752, "learning_rate": 9.273390036452005e-05, "loss": 1.8865, "step": 1206 }, { "epoch": 0.3663681894065867, "grad_norm": 0.3704030215740204, "learning_rate": 9.272782503037668e-05, "loss": 2.1905, "step": 1207 }, { "epoch": 0.3666717256032782, "grad_norm": 0.3877900540828705, "learning_rate": 9.27217496962333e-05, "loss": 1.9098, "step": 1208 }, { "epoch": 0.36697526179996964, "grad_norm": 0.39982378482818604, "learning_rate": 9.271567436208991e-05, "loss": 1.948, "step": 1209 }, { "epoch": 0.3672787979966611, "grad_norm": 0.4450254440307617, "learning_rate": 9.270959902794653e-05, "loss": 2.0398, "step": 1210 }, { "epoch": 0.36758233419335257, "grad_norm": 0.4938177168369293, "learning_rate": 9.270352369380316e-05, "loss": 1.9824, "step": 1211 }, { "epoch": 0.367885870390044, "grad_norm": 0.44947531819343567, "learning_rate": 9.26974483596598e-05, "loss": 2.0937, "step": 1212 }, { "epoch": 0.3681894065867355, "grad_norm": 0.5059708952903748, "learning_rate": 9.26913730255164e-05, "loss": 1.5887, "step": 1213 }, { "epoch": 0.3684929427834269, "grad_norm": 0.38562676310539246, "learning_rate": 9.268529769137303e-05, "loss": 1.898, "step": 1214 }, { "epoch": 0.36879647898011836, "grad_norm": 0.8131862282752991, "learning_rate": 9.267922235722966e-05, "loss": 1.9549, "step": 1215 }, { "epoch": 0.36910001517680985, "grad_norm": 0.3856705129146576, "learning_rate": 9.267314702308628e-05, "loss": 1.9469, "step": 1216 }, { "epoch": 0.3694035513735013, "grad_norm": 0.38688114285469055, "learning_rate": 9.26670716889429e-05, "loss": 2.0386, "step": 1217 }, { "epoch": 0.36970708757019277, "grad_norm": 0.4043256938457489, "learning_rate": 9.266099635479952e-05, "loss": 2.0083, "step": 1218 }, { "epoch": 0.3700106237668842, "grad_norm": 2.2425715923309326, "learning_rate": 9.265492102065614e-05, "loss": 1.9006, "step": 1219 }, { "epoch": 0.37031415996357564, "grad_norm": 0.35413646697998047, "learning_rate": 9.264884568651276e-05, "loss": 1.597, "step": 1220 }, { "epoch": 0.37061769616026713, "grad_norm": 0.4035986363887787, "learning_rate": 9.264277035236939e-05, "loss": 1.9021, "step": 1221 }, { "epoch": 0.37092123235695856, "grad_norm": 0.38641074299812317, "learning_rate": 9.263669501822601e-05, "loss": 2.1007, "step": 1222 }, { "epoch": 0.37122476855365, "grad_norm": 0.3715684711933136, "learning_rate": 9.263061968408262e-05, "loss": 1.8929, "step": 1223 }, { "epoch": 0.3715283047503415, "grad_norm": 0.3876987397670746, "learning_rate": 9.262454434993924e-05, "loss": 1.8983, "step": 1224 }, { "epoch": 0.3718318409470329, "grad_norm": 0.4943421483039856, "learning_rate": 9.261846901579587e-05, "loss": 1.7334, "step": 1225 }, { "epoch": 0.3721353771437244, "grad_norm": 0.41828441619873047, "learning_rate": 9.26123936816525e-05, "loss": 2.1366, "step": 1226 }, { "epoch": 0.37243891334041584, "grad_norm": 0.4057375490665436, "learning_rate": 9.260631834750911e-05, "loss": 1.8695, "step": 1227 }, { "epoch": 0.3727424495371073, "grad_norm": 0.37381577491760254, "learning_rate": 9.260024301336574e-05, "loss": 1.8757, "step": 1228 }, { "epoch": 0.37304598573379877, "grad_norm": 0.31567415595054626, "learning_rate": 9.259416767922237e-05, "loss": 1.579, "step": 1229 }, { "epoch": 0.3733495219304902, "grad_norm": 0.3704005181789398, "learning_rate": 9.258809234507899e-05, "loss": 2.0297, "step": 1230 }, { "epoch": 0.3736530581271817, "grad_norm": 0.37612470984458923, "learning_rate": 9.25820170109356e-05, "loss": 1.8501, "step": 1231 }, { "epoch": 0.3739565943238731, "grad_norm": 0.37165501713752747, "learning_rate": 9.257594167679223e-05, "loss": 1.5672, "step": 1232 }, { "epoch": 0.37426013052056456, "grad_norm": 0.9847288131713867, "learning_rate": 9.256986634264885e-05, "loss": 1.4694, "step": 1233 }, { "epoch": 0.37456366671725605, "grad_norm": 0.35515448451042175, "learning_rate": 9.256379100850547e-05, "loss": 1.8861, "step": 1234 }, { "epoch": 0.3748672029139475, "grad_norm": 0.46874669194221497, "learning_rate": 9.25577156743621e-05, "loss": 2.002, "step": 1235 }, { "epoch": 0.3751707391106389, "grad_norm": 0.4635021686553955, "learning_rate": 9.255164034021872e-05, "loss": 1.3803, "step": 1236 }, { "epoch": 0.3754742753073304, "grad_norm": 0.3871179521083832, "learning_rate": 9.254556500607533e-05, "loss": 1.9017, "step": 1237 }, { "epoch": 0.37577781150402184, "grad_norm": 0.3958319127559662, "learning_rate": 9.253948967193195e-05, "loss": 2.0054, "step": 1238 }, { "epoch": 0.37608134770071333, "grad_norm": 0.38364940881729126, "learning_rate": 9.253341433778858e-05, "loss": 1.7178, "step": 1239 }, { "epoch": 0.37638488389740477, "grad_norm": 0.4198092222213745, "learning_rate": 9.252733900364521e-05, "loss": 1.9953, "step": 1240 }, { "epoch": 0.3766884200940962, "grad_norm": 0.46621835231781006, "learning_rate": 9.252126366950182e-05, "loss": 1.7053, "step": 1241 }, { "epoch": 0.3769919562907877, "grad_norm": 0.3871505558490753, "learning_rate": 9.251518833535845e-05, "loss": 1.7543, "step": 1242 }, { "epoch": 0.3772954924874791, "grad_norm": 0.33642569184303284, "learning_rate": 9.250911300121508e-05, "loss": 1.875, "step": 1243 }, { "epoch": 0.3775990286841706, "grad_norm": 0.37663060426712036, "learning_rate": 9.25030376670717e-05, "loss": 2.0942, "step": 1244 }, { "epoch": 0.37790256488086205, "grad_norm": 0.5118516087532043, "learning_rate": 9.249696233292831e-05, "loss": 1.6102, "step": 1245 }, { "epoch": 0.3782061010775535, "grad_norm": 0.37116679549217224, "learning_rate": 9.249088699878494e-05, "loss": 1.8115, "step": 1246 }, { "epoch": 0.37850963727424497, "grad_norm": 0.3737630248069763, "learning_rate": 9.248481166464156e-05, "loss": 1.8607, "step": 1247 }, { "epoch": 0.3788131734709364, "grad_norm": 0.4388040006160736, "learning_rate": 9.247873633049818e-05, "loss": 1.8458, "step": 1248 }, { "epoch": 0.37911670966762784, "grad_norm": 0.401643842458725, "learning_rate": 9.24726609963548e-05, "loss": 2.022, "step": 1249 }, { "epoch": 0.3794202458643193, "grad_norm": 0.4450658857822418, "learning_rate": 9.246658566221143e-05, "loss": 2.082, "step": 1250 }, { "epoch": 0.37972378206101076, "grad_norm": 0.4192996025085449, "learning_rate": 9.246051032806805e-05, "loss": 1.8848, "step": 1251 }, { "epoch": 0.38002731825770225, "grad_norm": 0.4925002157688141, "learning_rate": 9.245443499392466e-05, "loss": 1.9252, "step": 1252 }, { "epoch": 0.3803308544543937, "grad_norm": 0.43910741806030273, "learning_rate": 9.244835965978129e-05, "loss": 1.0955, "step": 1253 }, { "epoch": 0.3806343906510851, "grad_norm": 0.3679327070713043, "learning_rate": 9.244228432563792e-05, "loss": 1.9442, "step": 1254 }, { "epoch": 0.3809379268477766, "grad_norm": 0.353431761264801, "learning_rate": 9.243620899149453e-05, "loss": 2.1047, "step": 1255 }, { "epoch": 0.38124146304446804, "grad_norm": 0.4353777766227722, "learning_rate": 9.243013365735116e-05, "loss": 1.9008, "step": 1256 }, { "epoch": 0.38154499924115953, "grad_norm": 0.5220703482627869, "learning_rate": 9.242405832320779e-05, "loss": 1.9991, "step": 1257 }, { "epoch": 0.38184853543785097, "grad_norm": 0.4233221709728241, "learning_rate": 9.24179829890644e-05, "loss": 1.946, "step": 1258 }, { "epoch": 0.3821520716345424, "grad_norm": 0.4323975145816803, "learning_rate": 9.241190765492102e-05, "loss": 1.2619, "step": 1259 }, { "epoch": 0.3824556078312339, "grad_norm": 0.41842687129974365, "learning_rate": 9.240583232077766e-05, "loss": 1.7097, "step": 1260 }, { "epoch": 0.3827591440279253, "grad_norm": 0.37142685055732727, "learning_rate": 9.239975698663427e-05, "loss": 1.771, "step": 1261 }, { "epoch": 0.38306268022461676, "grad_norm": 0.3784460127353668, "learning_rate": 9.239368165249089e-05, "loss": 1.7196, "step": 1262 }, { "epoch": 0.38336621642130825, "grad_norm": 0.4241008460521698, "learning_rate": 9.238760631834751e-05, "loss": 1.5656, "step": 1263 }, { "epoch": 0.3836697526179997, "grad_norm": 0.4829429090023041, "learning_rate": 9.238153098420414e-05, "loss": 2.0334, "step": 1264 }, { "epoch": 0.38397328881469117, "grad_norm": 0.3593828082084656, "learning_rate": 9.237545565006076e-05, "loss": 1.8076, "step": 1265 }, { "epoch": 0.3842768250113826, "grad_norm": 0.4482446014881134, "learning_rate": 9.236938031591737e-05, "loss": 1.7199, "step": 1266 }, { "epoch": 0.38458036120807404, "grad_norm": 0.3537690043449402, "learning_rate": 9.2363304981774e-05, "loss": 2.027, "step": 1267 }, { "epoch": 0.38488389740476553, "grad_norm": 0.3928816616535187, "learning_rate": 9.235722964763063e-05, "loss": 1.811, "step": 1268 }, { "epoch": 0.38518743360145696, "grad_norm": 0.4176971912384033, "learning_rate": 9.235115431348724e-05, "loss": 1.9443, "step": 1269 }, { "epoch": 0.38549096979814845, "grad_norm": 0.3628327548503876, "learning_rate": 9.234507897934387e-05, "loss": 2.1905, "step": 1270 }, { "epoch": 0.3857945059948399, "grad_norm": 0.40045323967933655, "learning_rate": 9.23390036452005e-05, "loss": 1.8231, "step": 1271 }, { "epoch": 0.3860980421915313, "grad_norm": 0.36478134989738464, "learning_rate": 9.23329283110571e-05, "loss": 1.9453, "step": 1272 }, { "epoch": 0.3864015783882228, "grad_norm": 0.36314669251441956, "learning_rate": 9.232685297691373e-05, "loss": 1.7708, "step": 1273 }, { "epoch": 0.38670511458491424, "grad_norm": 0.44175347685813904, "learning_rate": 9.232077764277037e-05, "loss": 2.1961, "step": 1274 }, { "epoch": 0.38700865078160573, "grad_norm": 0.411424845457077, "learning_rate": 9.231470230862698e-05, "loss": 1.7878, "step": 1275 }, { "epoch": 0.38731218697829717, "grad_norm": 0.4166533648967743, "learning_rate": 9.23086269744836e-05, "loss": 1.9688, "step": 1276 }, { "epoch": 0.3876157231749886, "grad_norm": 0.3575800061225891, "learning_rate": 9.230255164034022e-05, "loss": 1.94, "step": 1277 }, { "epoch": 0.3879192593716801, "grad_norm": 0.37767383456230164, "learning_rate": 9.229647630619685e-05, "loss": 2.1039, "step": 1278 }, { "epoch": 0.3882227955683715, "grad_norm": 0.5323564410209656, "learning_rate": 9.229040097205347e-05, "loss": 2.1214, "step": 1279 }, { "epoch": 0.38852633176506296, "grad_norm": 0.37731266021728516, "learning_rate": 9.228432563791008e-05, "loss": 1.5483, "step": 1280 }, { "epoch": 0.38882986796175445, "grad_norm": 0.3776138126850128, "learning_rate": 9.227825030376671e-05, "loss": 1.8353, "step": 1281 }, { "epoch": 0.3891334041584459, "grad_norm": 0.40437617897987366, "learning_rate": 9.227217496962333e-05, "loss": 1.7512, "step": 1282 }, { "epoch": 0.3894369403551374, "grad_norm": 0.4422746002674103, "learning_rate": 9.226609963547995e-05, "loss": 1.8497, "step": 1283 }, { "epoch": 0.3897404765518288, "grad_norm": 0.3358697295188904, "learning_rate": 9.226002430133658e-05, "loss": 1.8597, "step": 1284 }, { "epoch": 0.39004401274852024, "grad_norm": 0.8504922986030579, "learning_rate": 9.225394896719321e-05, "loss": 1.4645, "step": 1285 }, { "epoch": 0.39034754894521173, "grad_norm": 0.35627248883247375, "learning_rate": 9.224787363304981e-05, "loss": 2.0882, "step": 1286 }, { "epoch": 0.39065108514190316, "grad_norm": 0.3549906015396118, "learning_rate": 9.224179829890644e-05, "loss": 1.1979, "step": 1287 }, { "epoch": 0.39095462133859465, "grad_norm": 0.41105836629867554, "learning_rate": 9.223572296476308e-05, "loss": 1.5483, "step": 1288 }, { "epoch": 0.3912581575352861, "grad_norm": 0.38563552498817444, "learning_rate": 9.222964763061969e-05, "loss": 1.6972, "step": 1289 }, { "epoch": 0.3915616937319775, "grad_norm": 0.6308382749557495, "learning_rate": 9.222357229647631e-05, "loss": 1.7006, "step": 1290 }, { "epoch": 0.391865229928669, "grad_norm": 0.4371561110019684, "learning_rate": 9.221749696233293e-05, "loss": 1.838, "step": 1291 }, { "epoch": 0.39216876612536045, "grad_norm": 0.421274334192276, "learning_rate": 9.221142162818956e-05, "loss": 2.0326, "step": 1292 }, { "epoch": 0.3924723023220519, "grad_norm": 0.39431074261665344, "learning_rate": 9.220534629404618e-05, "loss": 1.9654, "step": 1293 }, { "epoch": 0.39277583851874337, "grad_norm": 0.3802948594093323, "learning_rate": 9.219927095990279e-05, "loss": 1.8311, "step": 1294 }, { "epoch": 0.3930793747154348, "grad_norm": 0.44941607117652893, "learning_rate": 9.219319562575942e-05, "loss": 1.879, "step": 1295 }, { "epoch": 0.3933829109121263, "grad_norm": 0.395014226436615, "learning_rate": 9.218712029161604e-05, "loss": 1.5158, "step": 1296 }, { "epoch": 0.3936864471088177, "grad_norm": 0.3692936599254608, "learning_rate": 9.218104495747266e-05, "loss": 2.1428, "step": 1297 }, { "epoch": 0.39398998330550916, "grad_norm": 0.41931676864624023, "learning_rate": 9.217496962332929e-05, "loss": 1.5854, "step": 1298 }, { "epoch": 0.39429351950220065, "grad_norm": 0.4195273816585541, "learning_rate": 9.216889428918592e-05, "loss": 1.4504, "step": 1299 }, { "epoch": 0.3945970556988921, "grad_norm": 0.4246782958507538, "learning_rate": 9.216281895504252e-05, "loss": 1.5243, "step": 1300 }, { "epoch": 0.3949005918955836, "grad_norm": 0.3366101086139679, "learning_rate": 9.215674362089915e-05, "loss": 1.5563, "step": 1301 }, { "epoch": 0.395204128092275, "grad_norm": 0.5027258992195129, "learning_rate": 9.215066828675579e-05, "loss": 1.8705, "step": 1302 }, { "epoch": 0.39550766428896644, "grad_norm": 0.33939701318740845, "learning_rate": 9.21445929526124e-05, "loss": 1.5192, "step": 1303 }, { "epoch": 0.39581120048565793, "grad_norm": 0.5187803506851196, "learning_rate": 9.213851761846902e-05, "loss": 1.5153, "step": 1304 }, { "epoch": 0.39611473668234937, "grad_norm": 0.43332159519195557, "learning_rate": 9.213244228432564e-05, "loss": 1.9878, "step": 1305 }, { "epoch": 0.3964182728790408, "grad_norm": 0.371183842420578, "learning_rate": 9.212636695018227e-05, "loss": 1.9458, "step": 1306 }, { "epoch": 0.3967218090757323, "grad_norm": 0.40977227687835693, "learning_rate": 9.212029161603889e-05, "loss": 2.17, "step": 1307 }, { "epoch": 0.3970253452724237, "grad_norm": 0.37145286798477173, "learning_rate": 9.21142162818955e-05, "loss": 1.9565, "step": 1308 }, { "epoch": 0.3973288814691152, "grad_norm": 0.45751968026161194, "learning_rate": 9.210814094775213e-05, "loss": 1.7469, "step": 1309 }, { "epoch": 0.39763241766580665, "grad_norm": 0.39320966601371765, "learning_rate": 9.210206561360875e-05, "loss": 2.2532, "step": 1310 }, { "epoch": 0.3979359538624981, "grad_norm": 0.44945451617240906, "learning_rate": 9.209599027946537e-05, "loss": 1.896, "step": 1311 }, { "epoch": 0.39823949005918957, "grad_norm": 0.4179849326610565, "learning_rate": 9.2089914945322e-05, "loss": 2.0536, "step": 1312 }, { "epoch": 0.398543026255881, "grad_norm": 0.3893973231315613, "learning_rate": 9.208383961117863e-05, "loss": 1.5888, "step": 1313 }, { "epoch": 0.3988465624525725, "grad_norm": 0.4161340892314911, "learning_rate": 9.207776427703523e-05, "loss": 2.071, "step": 1314 }, { "epoch": 0.39915009864926393, "grad_norm": 0.37969034910202026, "learning_rate": 9.207168894289186e-05, "loss": 1.7566, "step": 1315 }, { "epoch": 0.39945363484595536, "grad_norm": 0.4157601296901703, "learning_rate": 9.20656136087485e-05, "loss": 2.1174, "step": 1316 }, { "epoch": 0.39975717104264685, "grad_norm": 0.3726348876953125, "learning_rate": 9.205953827460511e-05, "loss": 1.9701, "step": 1317 }, { "epoch": 0.4000607072393383, "grad_norm": 0.39407408237457275, "learning_rate": 9.205346294046173e-05, "loss": 1.6288, "step": 1318 }, { "epoch": 0.4003642434360297, "grad_norm": 0.37205028533935547, "learning_rate": 9.204738760631835e-05, "loss": 1.801, "step": 1319 }, { "epoch": 0.4006677796327212, "grad_norm": 0.39794477820396423, "learning_rate": 9.204131227217498e-05, "loss": 2.1164, "step": 1320 }, { "epoch": 0.40097131582941264, "grad_norm": 0.4078124761581421, "learning_rate": 9.20352369380316e-05, "loss": 1.8339, "step": 1321 }, { "epoch": 0.40127485202610413, "grad_norm": 0.4183814227581024, "learning_rate": 9.202916160388821e-05, "loss": 1.7706, "step": 1322 }, { "epoch": 0.40157838822279557, "grad_norm": 0.6123658418655396, "learning_rate": 9.202308626974484e-05, "loss": 1.7745, "step": 1323 }, { "epoch": 0.401881924419487, "grad_norm": 0.36878085136413574, "learning_rate": 9.201701093560146e-05, "loss": 1.2733, "step": 1324 }, { "epoch": 0.4021854606161785, "grad_norm": 0.3583606779575348, "learning_rate": 9.201093560145808e-05, "loss": 1.3474, "step": 1325 }, { "epoch": 0.4024889968128699, "grad_norm": 0.4098053574562073, "learning_rate": 9.200486026731471e-05, "loss": 1.845, "step": 1326 }, { "epoch": 0.4027925330095614, "grad_norm": 0.5891076326370239, "learning_rate": 9.199878493317134e-05, "loss": 2.0312, "step": 1327 }, { "epoch": 0.40309606920625285, "grad_norm": 0.5270339250564575, "learning_rate": 9.199270959902794e-05, "loss": 1.6679, "step": 1328 }, { "epoch": 0.4033996054029443, "grad_norm": 0.4184766411781311, "learning_rate": 9.198663426488457e-05, "loss": 2.1587, "step": 1329 }, { "epoch": 0.4037031415996358, "grad_norm": 0.3945539593696594, "learning_rate": 9.198055893074119e-05, "loss": 1.9374, "step": 1330 }, { "epoch": 0.4040066777963272, "grad_norm": 0.3906068205833435, "learning_rate": 9.197448359659781e-05, "loss": 1.4602, "step": 1331 }, { "epoch": 0.40431021399301864, "grad_norm": 0.4073745906352997, "learning_rate": 9.196840826245444e-05, "loss": 2.0568, "step": 1332 }, { "epoch": 0.40461375018971013, "grad_norm": 0.35083553194999695, "learning_rate": 9.196233292831106e-05, "loss": 1.5915, "step": 1333 }, { "epoch": 0.40491728638640156, "grad_norm": 0.38344746828079224, "learning_rate": 9.195625759416769e-05, "loss": 1.8988, "step": 1334 }, { "epoch": 0.40522082258309305, "grad_norm": 0.8442848920822144, "learning_rate": 9.19501822600243e-05, "loss": 1.6229, "step": 1335 }, { "epoch": 0.4055243587797845, "grad_norm": 0.41683101654052734, "learning_rate": 9.194410692588092e-05, "loss": 1.6984, "step": 1336 }, { "epoch": 0.4058278949764759, "grad_norm": 0.43008947372436523, "learning_rate": 9.193803159173755e-05, "loss": 1.8312, "step": 1337 }, { "epoch": 0.4061314311731674, "grad_norm": 0.43498390913009644, "learning_rate": 9.193195625759417e-05, "loss": 1.735, "step": 1338 }, { "epoch": 0.40643496736985885, "grad_norm": 0.361969530582428, "learning_rate": 9.192588092345079e-05, "loss": 2.015, "step": 1339 }, { "epoch": 0.40673850356655034, "grad_norm": 0.4028913378715515, "learning_rate": 9.191980558930742e-05, "loss": 1.9139, "step": 1340 }, { "epoch": 0.40704203976324177, "grad_norm": 0.46840906143188477, "learning_rate": 9.191373025516405e-05, "loss": 1.913, "step": 1341 }, { "epoch": 0.4073455759599332, "grad_norm": 0.39075982570648193, "learning_rate": 9.190765492102065e-05, "loss": 1.8908, "step": 1342 }, { "epoch": 0.4076491121566247, "grad_norm": 0.3519285023212433, "learning_rate": 9.190157958687728e-05, "loss": 1.8016, "step": 1343 }, { "epoch": 0.4079526483533161, "grad_norm": 0.43734681606292725, "learning_rate": 9.18955042527339e-05, "loss": 1.5545, "step": 1344 }, { "epoch": 0.40825618455000756, "grad_norm": 0.4044792950153351, "learning_rate": 9.188942891859052e-05, "loss": 1.836, "step": 1345 }, { "epoch": 0.40855972074669905, "grad_norm": 0.3661639988422394, "learning_rate": 9.188335358444715e-05, "loss": 2.0347, "step": 1346 }, { "epoch": 0.4088632569433905, "grad_norm": 0.46894827485084534, "learning_rate": 9.187727825030377e-05, "loss": 1.5674, "step": 1347 }, { "epoch": 0.409166793140082, "grad_norm": 0.4389861226081848, "learning_rate": 9.18712029161604e-05, "loss": 2.0472, "step": 1348 }, { "epoch": 0.4094703293367734, "grad_norm": 0.3514555096626282, "learning_rate": 9.186512758201702e-05, "loss": 1.7453, "step": 1349 }, { "epoch": 0.40977386553346484, "grad_norm": 0.35691046714782715, "learning_rate": 9.185905224787363e-05, "loss": 2.2705, "step": 1350 }, { "epoch": 0.41007740173015633, "grad_norm": 0.3770231604576111, "learning_rate": 9.185297691373026e-05, "loss": 1.9172, "step": 1351 }, { "epoch": 0.41038093792684777, "grad_norm": 0.40932852029800415, "learning_rate": 9.184690157958688e-05, "loss": 1.8489, "step": 1352 }, { "epoch": 0.41068447412353926, "grad_norm": 0.39512181282043457, "learning_rate": 9.18408262454435e-05, "loss": 1.7082, "step": 1353 }, { "epoch": 0.4109880103202307, "grad_norm": 0.33877119421958923, "learning_rate": 9.183475091130013e-05, "loss": 1.7629, "step": 1354 }, { "epoch": 0.4112915465169221, "grad_norm": 0.4188339412212372, "learning_rate": 9.182867557715675e-05, "loss": 1.4726, "step": 1355 }, { "epoch": 0.4115950827136136, "grad_norm": 0.3661527931690216, "learning_rate": 9.182260024301336e-05, "loss": 1.7752, "step": 1356 }, { "epoch": 0.41189861891030505, "grad_norm": 0.3717115819454193, "learning_rate": 9.181652490887e-05, "loss": 1.6128, "step": 1357 }, { "epoch": 0.4122021551069965, "grad_norm": 0.783671498298645, "learning_rate": 9.181044957472661e-05, "loss": 1.3576, "step": 1358 }, { "epoch": 0.41250569130368797, "grad_norm": 0.5294111967086792, "learning_rate": 9.180437424058323e-05, "loss": 1.6084, "step": 1359 }, { "epoch": 0.4128092275003794, "grad_norm": 0.42108646035194397, "learning_rate": 9.179829890643986e-05, "loss": 2.0071, "step": 1360 }, { "epoch": 0.4131127636970709, "grad_norm": 0.3648010492324829, "learning_rate": 9.179222357229648e-05, "loss": 1.9791, "step": 1361 }, { "epoch": 0.41341629989376233, "grad_norm": 0.31227168440818787, "learning_rate": 9.178614823815311e-05, "loss": 1.811, "step": 1362 }, { "epoch": 0.41371983609045376, "grad_norm": 0.34013819694519043, "learning_rate": 9.178007290400973e-05, "loss": 1.6035, "step": 1363 }, { "epoch": 0.41402337228714525, "grad_norm": 0.3848358392715454, "learning_rate": 9.177399756986634e-05, "loss": 1.7878, "step": 1364 }, { "epoch": 0.4143269084838367, "grad_norm": 0.33737045526504517, "learning_rate": 9.176792223572297e-05, "loss": 1.5855, "step": 1365 }, { "epoch": 0.4146304446805282, "grad_norm": 0.3722662329673767, "learning_rate": 9.176184690157959e-05, "loss": 1.4857, "step": 1366 }, { "epoch": 0.4149339808772196, "grad_norm": 0.38730594515800476, "learning_rate": 9.175577156743621e-05, "loss": 1.858, "step": 1367 }, { "epoch": 0.41523751707391104, "grad_norm": 0.34036555886268616, "learning_rate": 9.174969623329284e-05, "loss": 1.4445, "step": 1368 }, { "epoch": 0.41554105327060253, "grad_norm": 0.395327091217041, "learning_rate": 9.174362089914946e-05, "loss": 1.5194, "step": 1369 }, { "epoch": 0.41584458946729397, "grad_norm": 0.4212843179702759, "learning_rate": 9.173754556500607e-05, "loss": 2.3923, "step": 1370 }, { "epoch": 0.41614812566398546, "grad_norm": 0.33540114760398865, "learning_rate": 9.17314702308627e-05, "loss": 1.9129, "step": 1371 }, { "epoch": 0.4164516618606769, "grad_norm": 0.43007227778434753, "learning_rate": 9.172539489671932e-05, "loss": 1.7658, "step": 1372 }, { "epoch": 0.4167551980573683, "grad_norm": 0.3466784358024597, "learning_rate": 9.171931956257594e-05, "loss": 1.8647, "step": 1373 }, { "epoch": 0.4170587342540598, "grad_norm": 0.35446929931640625, "learning_rate": 9.171324422843257e-05, "loss": 1.5265, "step": 1374 }, { "epoch": 0.41736227045075125, "grad_norm": 0.3868924081325531, "learning_rate": 9.170716889428919e-05, "loss": 1.6355, "step": 1375 }, { "epoch": 0.4176658066474427, "grad_norm": 0.39501097798347473, "learning_rate": 9.170109356014582e-05, "loss": 1.7426, "step": 1376 }, { "epoch": 0.4179693428441342, "grad_norm": 0.44299614429473877, "learning_rate": 9.169501822600244e-05, "loss": 1.8582, "step": 1377 }, { "epoch": 0.4182728790408256, "grad_norm": 0.438927561044693, "learning_rate": 9.168894289185905e-05, "loss": 1.9043, "step": 1378 }, { "epoch": 0.4185764152375171, "grad_norm": 0.3874059319496155, "learning_rate": 9.168286755771568e-05, "loss": 1.2591, "step": 1379 }, { "epoch": 0.41887995143420853, "grad_norm": 0.40715524554252625, "learning_rate": 9.16767922235723e-05, "loss": 1.9779, "step": 1380 }, { "epoch": 0.41918348763089996, "grad_norm": 0.3737177848815918, "learning_rate": 9.167071688942892e-05, "loss": 1.97, "step": 1381 }, { "epoch": 0.41948702382759145, "grad_norm": 0.3690639138221741, "learning_rate": 9.166464155528555e-05, "loss": 1.9618, "step": 1382 }, { "epoch": 0.4197905600242829, "grad_norm": 1.3496792316436768, "learning_rate": 9.165856622114217e-05, "loss": 2.1141, "step": 1383 }, { "epoch": 0.4200940962209744, "grad_norm": 0.39503785967826843, "learning_rate": 9.165249088699878e-05, "loss": 1.8984, "step": 1384 }, { "epoch": 0.4203976324176658, "grad_norm": 0.33576783537864685, "learning_rate": 9.164641555285541e-05, "loss": 1.8536, "step": 1385 }, { "epoch": 0.42070116861435725, "grad_norm": 0.8600859642028809, "learning_rate": 9.164034021871203e-05, "loss": 1.3809, "step": 1386 }, { "epoch": 0.42100470481104874, "grad_norm": 0.3842533230781555, "learning_rate": 9.163426488456865e-05, "loss": 1.6408, "step": 1387 }, { "epoch": 0.42130824100774017, "grad_norm": 0.4019504487514496, "learning_rate": 9.162818955042528e-05, "loss": 1.9738, "step": 1388 }, { "epoch": 0.4216117772044316, "grad_norm": 0.399406760931015, "learning_rate": 9.16221142162819e-05, "loss": 1.9897, "step": 1389 }, { "epoch": 0.4219153134011231, "grad_norm": 0.35225290060043335, "learning_rate": 9.161603888213853e-05, "loss": 1.8217, "step": 1390 }, { "epoch": 0.4222188495978145, "grad_norm": 0.3673458695411682, "learning_rate": 9.160996354799515e-05, "loss": 1.9175, "step": 1391 }, { "epoch": 0.422522385794506, "grad_norm": 0.37856656312942505, "learning_rate": 9.160388821385176e-05, "loss": 1.8, "step": 1392 }, { "epoch": 0.42282592199119745, "grad_norm": 0.3543725907802582, "learning_rate": 9.15978128797084e-05, "loss": 2.0975, "step": 1393 }, { "epoch": 0.4231294581878889, "grad_norm": 0.34620916843414307, "learning_rate": 9.159173754556501e-05, "loss": 1.803, "step": 1394 }, { "epoch": 0.4234329943845804, "grad_norm": 0.428543359041214, "learning_rate": 9.158566221142163e-05, "loss": 1.8852, "step": 1395 }, { "epoch": 0.4237365305812718, "grad_norm": 0.41286444664001465, "learning_rate": 9.157958687727826e-05, "loss": 1.7661, "step": 1396 }, { "epoch": 0.4240400667779633, "grad_norm": 0.42155444622039795, "learning_rate": 9.157351154313488e-05, "loss": 1.9728, "step": 1397 }, { "epoch": 0.42434360297465473, "grad_norm": 0.4446348547935486, "learning_rate": 9.15674362089915e-05, "loss": 1.6411, "step": 1398 }, { "epoch": 0.42464713917134617, "grad_norm": 0.38683468103408813, "learning_rate": 9.156136087484812e-05, "loss": 1.9311, "step": 1399 }, { "epoch": 0.42495067536803766, "grad_norm": 0.398798406124115, "learning_rate": 9.155528554070474e-05, "loss": 1.867, "step": 1400 }, { "epoch": 0.4252542115647291, "grad_norm": 0.3723427951335907, "learning_rate": 9.154921020656136e-05, "loss": 2.1345, "step": 1401 }, { "epoch": 0.4255577477614205, "grad_norm": 0.3853520452976227, "learning_rate": 9.154313487241799e-05, "loss": 1.4843, "step": 1402 }, { "epoch": 0.425861283958112, "grad_norm": 0.4148903489112854, "learning_rate": 9.153705953827461e-05, "loss": 1.9043, "step": 1403 }, { "epoch": 0.42616482015480345, "grad_norm": 0.4134661555290222, "learning_rate": 9.153098420413124e-05, "loss": 2.0662, "step": 1404 }, { "epoch": 0.42646835635149494, "grad_norm": 0.4663408100605011, "learning_rate": 9.152490886998786e-05, "loss": 1.8455, "step": 1405 }, { "epoch": 0.42677189254818637, "grad_norm": 0.3829919695854187, "learning_rate": 9.151883353584447e-05, "loss": 1.7822, "step": 1406 }, { "epoch": 0.4270754287448778, "grad_norm": 0.3487464487552643, "learning_rate": 9.15127582017011e-05, "loss": 1.9024, "step": 1407 }, { "epoch": 0.4273789649415693, "grad_norm": 0.4045817255973816, "learning_rate": 9.150668286755772e-05, "loss": 1.6833, "step": 1408 }, { "epoch": 0.42768250113826073, "grad_norm": 0.4237898588180542, "learning_rate": 9.150060753341434e-05, "loss": 2.0711, "step": 1409 }, { "epoch": 0.4279860373349522, "grad_norm": 0.3947038948535919, "learning_rate": 9.149453219927097e-05, "loss": 1.6692, "step": 1410 }, { "epoch": 0.42828957353164365, "grad_norm": 0.373927503824234, "learning_rate": 9.148845686512759e-05, "loss": 2.1039, "step": 1411 }, { "epoch": 0.4285931097283351, "grad_norm": 0.45322802662849426, "learning_rate": 9.14823815309842e-05, "loss": 1.4528, "step": 1412 }, { "epoch": 0.4288966459250266, "grad_norm": 0.4237847328186035, "learning_rate": 9.147630619684083e-05, "loss": 1.8462, "step": 1413 }, { "epoch": 0.429200182121718, "grad_norm": 0.39840593934059143, "learning_rate": 9.147023086269745e-05, "loss": 1.8354, "step": 1414 }, { "epoch": 0.42950371831840944, "grad_norm": 0.3490378260612488, "learning_rate": 9.146415552855407e-05, "loss": 1.9182, "step": 1415 }, { "epoch": 0.42980725451510093, "grad_norm": 0.37050196528434753, "learning_rate": 9.14580801944107e-05, "loss": 2.1893, "step": 1416 }, { "epoch": 0.43011079071179237, "grad_norm": 0.7810099720954895, "learning_rate": 9.145200486026732e-05, "loss": 1.7489, "step": 1417 }, { "epoch": 0.43041432690848386, "grad_norm": 0.35921812057495117, "learning_rate": 9.144592952612393e-05, "loss": 1.9051, "step": 1418 }, { "epoch": 0.4307178631051753, "grad_norm": 0.42429161071777344, "learning_rate": 9.143985419198057e-05, "loss": 1.4487, "step": 1419 }, { "epoch": 0.4310213993018667, "grad_norm": 0.37482765316963196, "learning_rate": 9.143377885783718e-05, "loss": 1.7627, "step": 1420 }, { "epoch": 0.4313249354985582, "grad_norm": 0.39142462611198425, "learning_rate": 9.142770352369381e-05, "loss": 1.7077, "step": 1421 }, { "epoch": 0.43162847169524965, "grad_norm": 0.33967357873916626, "learning_rate": 9.142162818955043e-05, "loss": 1.7666, "step": 1422 }, { "epoch": 0.43193200789194114, "grad_norm": 0.3520660400390625, "learning_rate": 9.141555285540705e-05, "loss": 2.1716, "step": 1423 }, { "epoch": 0.4322355440886326, "grad_norm": 0.3724939823150635, "learning_rate": 9.140947752126368e-05, "loss": 1.8, "step": 1424 }, { "epoch": 0.432539080285324, "grad_norm": 0.37572380900382996, "learning_rate": 9.14034021871203e-05, "loss": 1.8956, "step": 1425 }, { "epoch": 0.4328426164820155, "grad_norm": 0.38770124316215515, "learning_rate": 9.139732685297691e-05, "loss": 1.6381, "step": 1426 }, { "epoch": 0.43314615267870693, "grad_norm": 0.5836375951766968, "learning_rate": 9.139125151883354e-05, "loss": 1.9264, "step": 1427 }, { "epoch": 0.43344968887539836, "grad_norm": 0.44695645570755005, "learning_rate": 9.138517618469016e-05, "loss": 1.7427, "step": 1428 }, { "epoch": 0.43375322507208985, "grad_norm": 0.3857296407222748, "learning_rate": 9.137910085054678e-05, "loss": 1.5448, "step": 1429 }, { "epoch": 0.4340567612687813, "grad_norm": 0.417868971824646, "learning_rate": 9.137302551640341e-05, "loss": 1.9924, "step": 1430 }, { "epoch": 0.4343602974654728, "grad_norm": 0.42111891508102417, "learning_rate": 9.136695018226003e-05, "loss": 1.9506, "step": 1431 }, { "epoch": 0.4346638336621642, "grad_norm": 0.4096441864967346, "learning_rate": 9.136087484811664e-05, "loss": 1.6159, "step": 1432 }, { "epoch": 0.43496736985885565, "grad_norm": 0.4601602852344513, "learning_rate": 9.135479951397328e-05, "loss": 1.5702, "step": 1433 }, { "epoch": 0.43527090605554714, "grad_norm": 0.4030752182006836, "learning_rate": 9.134872417982989e-05, "loss": 1.9785, "step": 1434 }, { "epoch": 0.43557444225223857, "grad_norm": 0.4357512593269348, "learning_rate": 9.134264884568652e-05, "loss": 1.8718, "step": 1435 }, { "epoch": 0.43587797844893006, "grad_norm": 0.3511190712451935, "learning_rate": 9.133657351154314e-05, "loss": 1.9243, "step": 1436 }, { "epoch": 0.4361815146456215, "grad_norm": 0.4554003179073334, "learning_rate": 9.133049817739976e-05, "loss": 1.5576, "step": 1437 }, { "epoch": 0.4364850508423129, "grad_norm": 0.37637701630592346, "learning_rate": 9.132442284325639e-05, "loss": 2.3059, "step": 1438 }, { "epoch": 0.4367885870390044, "grad_norm": 0.39330780506134033, "learning_rate": 9.1318347509113e-05, "loss": 1.4548, "step": 1439 }, { "epoch": 0.43709212323569585, "grad_norm": 0.44056230783462524, "learning_rate": 9.131227217496962e-05, "loss": 1.7839, "step": 1440 }, { "epoch": 0.4373956594323873, "grad_norm": 1.5862314701080322, "learning_rate": 9.130619684082625e-05, "loss": 1.6374, "step": 1441 }, { "epoch": 0.4376991956290788, "grad_norm": 0.44076550006866455, "learning_rate": 9.130012150668287e-05, "loss": 2.0375, "step": 1442 }, { "epoch": 0.4380027318257702, "grad_norm": 0.46712005138397217, "learning_rate": 9.129404617253949e-05, "loss": 2.0498, "step": 1443 }, { "epoch": 0.4383062680224617, "grad_norm": 0.4472239315509796, "learning_rate": 9.128797083839612e-05, "loss": 2.1283, "step": 1444 }, { "epoch": 0.43860980421915313, "grad_norm": 0.46304264664649963, "learning_rate": 9.128189550425274e-05, "loss": 1.9628, "step": 1445 }, { "epoch": 0.43891334041584457, "grad_norm": 0.4066753387451172, "learning_rate": 9.127582017010935e-05, "loss": 1.4578, "step": 1446 }, { "epoch": 0.43921687661253606, "grad_norm": 0.4386885166168213, "learning_rate": 9.126974483596599e-05, "loss": 1.8655, "step": 1447 }, { "epoch": 0.4395204128092275, "grad_norm": 0.5175670981407166, "learning_rate": 9.12636695018226e-05, "loss": 1.9672, "step": 1448 }, { "epoch": 0.439823949005919, "grad_norm": 0.39056891202926636, "learning_rate": 9.125759416767923e-05, "loss": 2.2645, "step": 1449 }, { "epoch": 0.4401274852026104, "grad_norm": 0.3297121822834015, "learning_rate": 9.125151883353585e-05, "loss": 1.651, "step": 1450 }, { "epoch": 0.44043102139930185, "grad_norm": 0.37801650166511536, "learning_rate": 9.124544349939247e-05, "loss": 1.938, "step": 1451 }, { "epoch": 0.44073455759599334, "grad_norm": 0.45800700783729553, "learning_rate": 9.12393681652491e-05, "loss": 1.8465, "step": 1452 }, { "epoch": 0.44103809379268477, "grad_norm": 0.42198505997657776, "learning_rate": 9.123329283110572e-05, "loss": 1.9878, "step": 1453 }, { "epoch": 0.44134162998937626, "grad_norm": 0.9476953744888306, "learning_rate": 9.122721749696233e-05, "loss": 2.127, "step": 1454 }, { "epoch": 0.4416451661860677, "grad_norm": 0.6569995880126953, "learning_rate": 9.122114216281896e-05, "loss": 2.1351, "step": 1455 }, { "epoch": 0.44194870238275913, "grad_norm": 0.7246467471122742, "learning_rate": 9.121506682867558e-05, "loss": 1.9843, "step": 1456 }, { "epoch": 0.4422522385794506, "grad_norm": 0.3717383146286011, "learning_rate": 9.12089914945322e-05, "loss": 1.7456, "step": 1457 }, { "epoch": 0.44255577477614205, "grad_norm": 0.39930397272109985, "learning_rate": 9.120291616038883e-05, "loss": 2.1122, "step": 1458 }, { "epoch": 0.4428593109728335, "grad_norm": 0.4484943747520447, "learning_rate": 9.119684082624545e-05, "loss": 1.8622, "step": 1459 }, { "epoch": 0.443162847169525, "grad_norm": 0.45804062485694885, "learning_rate": 9.119076549210206e-05, "loss": 2.0503, "step": 1460 }, { "epoch": 0.4434663833662164, "grad_norm": 0.381073921918869, "learning_rate": 9.11846901579587e-05, "loss": 1.8256, "step": 1461 }, { "epoch": 0.4437699195629079, "grad_norm": 0.4491977095603943, "learning_rate": 9.117861482381531e-05, "loss": 1.6401, "step": 1462 }, { "epoch": 0.44407345575959933, "grad_norm": 0.3925999104976654, "learning_rate": 9.117253948967194e-05, "loss": 1.9513, "step": 1463 }, { "epoch": 0.44437699195629077, "grad_norm": 0.45975080132484436, "learning_rate": 9.116646415552856e-05, "loss": 1.7791, "step": 1464 }, { "epoch": 0.44468052815298226, "grad_norm": 0.44993898272514343, "learning_rate": 9.116038882138518e-05, "loss": 0.6258, "step": 1465 }, { "epoch": 0.4449840643496737, "grad_norm": 0.4088694453239441, "learning_rate": 9.115431348724181e-05, "loss": 1.768, "step": 1466 }, { "epoch": 0.4452876005463652, "grad_norm": 0.5844696760177612, "learning_rate": 9.114823815309841e-05, "loss": 1.7229, "step": 1467 }, { "epoch": 0.4455911367430566, "grad_norm": 0.5378713607788086, "learning_rate": 9.114216281895504e-05, "loss": 1.8761, "step": 1468 }, { "epoch": 0.44589467293974805, "grad_norm": 1.5058445930480957, "learning_rate": 9.113608748481167e-05, "loss": 2.0215, "step": 1469 }, { "epoch": 0.44619820913643954, "grad_norm": 0.44474056363105774, "learning_rate": 9.113001215066829e-05, "loss": 1.9797, "step": 1470 }, { "epoch": 0.446501745333131, "grad_norm": 0.4373909831047058, "learning_rate": 9.112393681652491e-05, "loss": 2.1042, "step": 1471 }, { "epoch": 0.4468052815298224, "grad_norm": 0.5322824716567993, "learning_rate": 9.111786148238154e-05, "loss": 1.6328, "step": 1472 }, { "epoch": 0.4471088177265139, "grad_norm": 0.4264838993549347, "learning_rate": 9.111178614823816e-05, "loss": 1.9764, "step": 1473 }, { "epoch": 0.44741235392320533, "grad_norm": 0.39688101410865784, "learning_rate": 9.110571081409478e-05, "loss": 1.8385, "step": 1474 }, { "epoch": 0.4477158901198968, "grad_norm": 0.3781752586364746, "learning_rate": 9.10996354799514e-05, "loss": 2.1128, "step": 1475 }, { "epoch": 0.44801942631658825, "grad_norm": 0.40686413645744324, "learning_rate": 9.109356014580802e-05, "loss": 2.033, "step": 1476 }, { "epoch": 0.4483229625132797, "grad_norm": 0.42852646112442017, "learning_rate": 9.108748481166465e-05, "loss": 1.714, "step": 1477 }, { "epoch": 0.4486264987099712, "grad_norm": 0.3613603413105011, "learning_rate": 9.108140947752127e-05, "loss": 1.2022, "step": 1478 }, { "epoch": 0.4489300349066626, "grad_norm": 0.4005518853664398, "learning_rate": 9.107533414337789e-05, "loss": 1.9914, "step": 1479 }, { "epoch": 0.4492335711033541, "grad_norm": 0.3479957580566406, "learning_rate": 9.106925880923452e-05, "loss": 1.7423, "step": 1480 }, { "epoch": 0.44953710730004554, "grad_norm": 0.43999946117401123, "learning_rate": 9.106318347509112e-05, "loss": 2.0009, "step": 1481 }, { "epoch": 0.44984064349673697, "grad_norm": 0.36132052540779114, "learning_rate": 9.105710814094775e-05, "loss": 1.916, "step": 1482 }, { "epoch": 0.45014417969342846, "grad_norm": 0.33822518587112427, "learning_rate": 9.105103280680438e-05, "loss": 1.6778, "step": 1483 }, { "epoch": 0.4504477158901199, "grad_norm": 0.35278624296188354, "learning_rate": 9.1044957472661e-05, "loss": 1.8943, "step": 1484 }, { "epoch": 0.4507512520868113, "grad_norm": 0.47397172451019287, "learning_rate": 9.103888213851762e-05, "loss": 1.7541, "step": 1485 }, { "epoch": 0.4510547882835028, "grad_norm": 0.3714633882045746, "learning_rate": 9.103280680437425e-05, "loss": 2.026, "step": 1486 }, { "epoch": 0.45135832448019425, "grad_norm": 1.6088794469833374, "learning_rate": 9.102673147023087e-05, "loss": 1.8904, "step": 1487 }, { "epoch": 0.45166186067688574, "grad_norm": 0.39234501123428345, "learning_rate": 9.102065613608749e-05, "loss": 1.9149, "step": 1488 }, { "epoch": 0.4519653968735772, "grad_norm": 0.4205072224140167, "learning_rate": 9.101458080194412e-05, "loss": 2.0117, "step": 1489 }, { "epoch": 0.4522689330702686, "grad_norm": 0.45428764820098877, "learning_rate": 9.100850546780073e-05, "loss": 1.9742, "step": 1490 }, { "epoch": 0.4525724692669601, "grad_norm": 0.35566025972366333, "learning_rate": 9.100243013365735e-05, "loss": 1.8445, "step": 1491 }, { "epoch": 0.45287600546365153, "grad_norm": 0.4020955562591553, "learning_rate": 9.099635479951398e-05, "loss": 1.9665, "step": 1492 }, { "epoch": 0.453179541660343, "grad_norm": 0.6123180985450745, "learning_rate": 9.09902794653706e-05, "loss": 1.6361, "step": 1493 }, { "epoch": 0.45348307785703446, "grad_norm": 0.44139203429222107, "learning_rate": 9.098420413122723e-05, "loss": 1.8156, "step": 1494 }, { "epoch": 0.4537866140537259, "grad_norm": 0.4224632680416107, "learning_rate": 9.097812879708383e-05, "loss": 1.8907, "step": 1495 }, { "epoch": 0.4540901502504174, "grad_norm": 0.40932169556617737, "learning_rate": 9.097205346294046e-05, "loss": 1.9179, "step": 1496 }, { "epoch": 0.4543936864471088, "grad_norm": 0.41995543241500854, "learning_rate": 9.09659781287971e-05, "loss": 1.7627, "step": 1497 }, { "epoch": 0.45469722264380025, "grad_norm": 0.33541586995124817, "learning_rate": 9.095990279465371e-05, "loss": 2.0334, "step": 1498 }, { "epoch": 0.45500075884049174, "grad_norm": 0.426469624042511, "learning_rate": 9.095382746051033e-05, "loss": 2.0636, "step": 1499 }, { "epoch": 0.45530429503718317, "grad_norm": 0.4037235379219055, "learning_rate": 9.094775212636696e-05, "loss": 1.9431, "step": 1500 }, { "epoch": 0.45560783123387466, "grad_norm": 0.35326942801475525, "learning_rate": 9.094167679222358e-05, "loss": 1.9306, "step": 1501 }, { "epoch": 0.4559113674305661, "grad_norm": 0.3722929358482361, "learning_rate": 9.09356014580802e-05, "loss": 1.2554, "step": 1502 }, { "epoch": 0.45621490362725753, "grad_norm": 0.5637504458427429, "learning_rate": 9.092952612393683e-05, "loss": 2.2883, "step": 1503 }, { "epoch": 0.456518439823949, "grad_norm": 0.4601937532424927, "learning_rate": 9.092345078979344e-05, "loss": 1.8051, "step": 1504 }, { "epoch": 0.45682197602064045, "grad_norm": 0.4153023660182953, "learning_rate": 9.091737545565006e-05, "loss": 2.045, "step": 1505 }, { "epoch": 0.45712551221733194, "grad_norm": 0.34770330786705017, "learning_rate": 9.091130012150668e-05, "loss": 1.8672, "step": 1506 }, { "epoch": 0.4574290484140234, "grad_norm": 0.3669261932373047, "learning_rate": 9.090522478736331e-05, "loss": 1.7184, "step": 1507 }, { "epoch": 0.4577325846107148, "grad_norm": 0.4862425923347473, "learning_rate": 9.089914945321994e-05, "loss": 1.7152, "step": 1508 }, { "epoch": 0.4580361208074063, "grad_norm": 0.39143872261047363, "learning_rate": 9.089307411907654e-05, "loss": 1.6634, "step": 1509 }, { "epoch": 0.45833965700409773, "grad_norm": 0.48413509130477905, "learning_rate": 9.088699878493317e-05, "loss": 1.8655, "step": 1510 }, { "epoch": 0.45864319320078917, "grad_norm": 0.438029944896698, "learning_rate": 9.08809234507898e-05, "loss": 1.8924, "step": 1511 }, { "epoch": 0.45894672939748066, "grad_norm": 0.39872634410858154, "learning_rate": 9.087484811664642e-05, "loss": 2.0113, "step": 1512 }, { "epoch": 0.4592502655941721, "grad_norm": 0.6361043453216553, "learning_rate": 9.086877278250304e-05, "loss": 2.004, "step": 1513 }, { "epoch": 0.4595538017908636, "grad_norm": 0.35867545008659363, "learning_rate": 9.086269744835967e-05, "loss": 1.6438, "step": 1514 }, { "epoch": 0.459857337987555, "grad_norm": 0.375430166721344, "learning_rate": 9.085662211421629e-05, "loss": 1.6383, "step": 1515 }, { "epoch": 0.46016087418424645, "grad_norm": 1.4054492712020874, "learning_rate": 9.08505467800729e-05, "loss": 1.6688, "step": 1516 }, { "epoch": 0.46046441038093794, "grad_norm": 0.35748517513275146, "learning_rate": 9.084447144592954e-05, "loss": 1.8517, "step": 1517 }, { "epoch": 0.4607679465776294, "grad_norm": 0.3136993944644928, "learning_rate": 9.083839611178615e-05, "loss": 2.0279, "step": 1518 }, { "epoch": 0.46107148277432086, "grad_norm": 0.39442840218544006, "learning_rate": 9.083232077764277e-05, "loss": 2.0558, "step": 1519 }, { "epoch": 0.4613750189710123, "grad_norm": 0.3278721272945404, "learning_rate": 9.082624544349939e-05, "loss": 1.8702, "step": 1520 }, { "epoch": 0.46167855516770373, "grad_norm": 0.6478224992752075, "learning_rate": 9.082017010935602e-05, "loss": 1.9689, "step": 1521 }, { "epoch": 0.4619820913643952, "grad_norm": 0.39185309410095215, "learning_rate": 9.081409477521265e-05, "loss": 1.871, "step": 1522 }, { "epoch": 0.46228562756108665, "grad_norm": 0.4506731927394867, "learning_rate": 9.080801944106925e-05, "loss": 1.3312, "step": 1523 }, { "epoch": 0.4625891637577781, "grad_norm": 0.36045706272125244, "learning_rate": 9.080194410692588e-05, "loss": 1.4391, "step": 1524 }, { "epoch": 0.4628926999544696, "grad_norm": 0.40836915373802185, "learning_rate": 9.079586877278252e-05, "loss": 2.0619, "step": 1525 }, { "epoch": 0.463196236151161, "grad_norm": 0.39617446064949036, "learning_rate": 9.078979343863913e-05, "loss": 1.3619, "step": 1526 }, { "epoch": 0.4634997723478525, "grad_norm": 0.41966769099235535, "learning_rate": 9.078371810449575e-05, "loss": 1.9004, "step": 1527 }, { "epoch": 0.46380330854454394, "grad_norm": 0.39979109168052673, "learning_rate": 9.077764277035238e-05, "loss": 1.8401, "step": 1528 }, { "epoch": 0.46410684474123537, "grad_norm": 0.3719238042831421, "learning_rate": 9.0771567436209e-05, "loss": 1.8935, "step": 1529 }, { "epoch": 0.46441038093792686, "grad_norm": 0.4243968427181244, "learning_rate": 9.076549210206562e-05, "loss": 1.9741, "step": 1530 }, { "epoch": 0.4647139171346183, "grad_norm": 0.46206673979759216, "learning_rate": 9.075941676792225e-05, "loss": 1.929, "step": 1531 }, { "epoch": 0.4650174533313098, "grad_norm": 0.49223679304122925, "learning_rate": 9.075334143377886e-05, "loss": 1.5711, "step": 1532 }, { "epoch": 0.4653209895280012, "grad_norm": 0.40891674160957336, "learning_rate": 9.074726609963548e-05, "loss": 1.3376, "step": 1533 }, { "epoch": 0.46562452572469265, "grad_norm": 0.4146333336830139, "learning_rate": 9.07411907654921e-05, "loss": 1.9774, "step": 1534 }, { "epoch": 0.46592806192138414, "grad_norm": 0.39834362268447876, "learning_rate": 9.073511543134873e-05, "loss": 1.8253, "step": 1535 }, { "epoch": 0.4662315981180756, "grad_norm": 0.4415489137172699, "learning_rate": 9.072904009720536e-05, "loss": 2.0604, "step": 1536 }, { "epoch": 0.466535134314767, "grad_norm": 0.40179288387298584, "learning_rate": 9.072296476306196e-05, "loss": 2.0014, "step": 1537 }, { "epoch": 0.4668386705114585, "grad_norm": 0.3849865794181824, "learning_rate": 9.07168894289186e-05, "loss": 2.0134, "step": 1538 }, { "epoch": 0.46714220670814993, "grad_norm": 0.4075673520565033, "learning_rate": 9.071081409477523e-05, "loss": 1.7784, "step": 1539 }, { "epoch": 0.4674457429048414, "grad_norm": 0.3913114368915558, "learning_rate": 9.070473876063183e-05, "loss": 2.0035, "step": 1540 }, { "epoch": 0.46774927910153286, "grad_norm": 1.1502317190170288, "learning_rate": 9.069866342648846e-05, "loss": 1.9697, "step": 1541 }, { "epoch": 0.4680528152982243, "grad_norm": 0.3618208169937134, "learning_rate": 9.069258809234509e-05, "loss": 1.8629, "step": 1542 }, { "epoch": 0.4683563514949158, "grad_norm": 0.5658997297286987, "learning_rate": 9.068651275820171e-05, "loss": 1.9923, "step": 1543 }, { "epoch": 0.4686598876916072, "grad_norm": 0.6084100008010864, "learning_rate": 9.068043742405833e-05, "loss": 2.2139, "step": 1544 }, { "epoch": 0.4689634238882987, "grad_norm": 0.45577460527420044, "learning_rate": 9.067436208991496e-05, "loss": 1.602, "step": 1545 }, { "epoch": 0.46926696008499014, "grad_norm": 0.38301292061805725, "learning_rate": 9.066828675577157e-05, "loss": 1.7644, "step": 1546 }, { "epoch": 0.46957049628168157, "grad_norm": 0.42755424976348877, "learning_rate": 9.066221142162819e-05, "loss": 1.7429, "step": 1547 }, { "epoch": 0.46987403247837306, "grad_norm": 0.3980792760848999, "learning_rate": 9.065613608748481e-05, "loss": 1.8362, "step": 1548 }, { "epoch": 0.4701775686750645, "grad_norm": 0.41398894786834717, "learning_rate": 9.065006075334144e-05, "loss": 1.7865, "step": 1549 }, { "epoch": 0.470481104871756, "grad_norm": 0.416704922914505, "learning_rate": 9.064398541919807e-05, "loss": 2.1474, "step": 1550 }, { "epoch": 0.4707846410684474, "grad_norm": 0.3613940477371216, "learning_rate": 9.063791008505467e-05, "loss": 1.9861, "step": 1551 }, { "epoch": 0.47108817726513885, "grad_norm": 0.3752197027206421, "learning_rate": 9.06318347509113e-05, "loss": 1.5374, "step": 1552 }, { "epoch": 0.47139171346183034, "grad_norm": 0.3436816930770874, "learning_rate": 9.062575941676794e-05, "loss": 2.0983, "step": 1553 }, { "epoch": 0.4716952496585218, "grad_norm": 0.40420001745224, "learning_rate": 9.061968408262454e-05, "loss": 1.8447, "step": 1554 }, { "epoch": 0.4719987858552132, "grad_norm": 0.5580700039863586, "learning_rate": 9.061360874848117e-05, "loss": 1.4499, "step": 1555 }, { "epoch": 0.4723023220519047, "grad_norm": 0.42122432589530945, "learning_rate": 9.06075334143378e-05, "loss": 1.9475, "step": 1556 }, { "epoch": 0.47260585824859613, "grad_norm": 0.37578698992729187, "learning_rate": 9.060145808019442e-05, "loss": 2.2064, "step": 1557 }, { "epoch": 0.4729093944452876, "grad_norm": 0.3756474554538727, "learning_rate": 9.059538274605104e-05, "loss": 1.6932, "step": 1558 }, { "epoch": 0.47321293064197906, "grad_norm": 0.45632341504096985, "learning_rate": 9.058930741190767e-05, "loss": 2.0437, "step": 1559 }, { "epoch": 0.4735164668386705, "grad_norm": 0.41071614623069763, "learning_rate": 9.058323207776428e-05, "loss": 1.9358, "step": 1560 }, { "epoch": 0.473820003035362, "grad_norm": 0.5713014006614685, "learning_rate": 9.05771567436209e-05, "loss": 1.9766, "step": 1561 }, { "epoch": 0.4741235392320534, "grad_norm": 0.3685849606990814, "learning_rate": 9.057108140947752e-05, "loss": 1.9811, "step": 1562 }, { "epoch": 0.4744270754287449, "grad_norm": 0.4106161594390869, "learning_rate": 9.056500607533415e-05, "loss": 1.6636, "step": 1563 }, { "epoch": 0.47473061162543634, "grad_norm": 0.4227912724018097, "learning_rate": 9.055893074119077e-05, "loss": 2.1302, "step": 1564 }, { "epoch": 0.4750341478221278, "grad_norm": 0.6117652058601379, "learning_rate": 9.055285540704738e-05, "loss": 1.9783, "step": 1565 }, { "epoch": 0.47533768401881926, "grad_norm": 0.34352535009384155, "learning_rate": 9.054678007290401e-05, "loss": 1.5251, "step": 1566 }, { "epoch": 0.4756412202155107, "grad_norm": 0.4252207577228546, "learning_rate": 9.054070473876065e-05, "loss": 2.1315, "step": 1567 }, { "epoch": 0.47594475641220213, "grad_norm": 0.4429045617580414, "learning_rate": 9.053462940461725e-05, "loss": 2.2023, "step": 1568 }, { "epoch": 0.4762482926088936, "grad_norm": 0.36126387119293213, "learning_rate": 9.052855407047388e-05, "loss": 2.0909, "step": 1569 }, { "epoch": 0.47655182880558505, "grad_norm": 0.40380343794822693, "learning_rate": 9.052247873633051e-05, "loss": 1.614, "step": 1570 }, { "epoch": 0.47685536500227654, "grad_norm": 0.37091997265815735, "learning_rate": 9.051640340218713e-05, "loss": 2.0191, "step": 1571 }, { "epoch": 0.477158901198968, "grad_norm": 0.3446311354637146, "learning_rate": 9.051032806804375e-05, "loss": 1.9633, "step": 1572 }, { "epoch": 0.4774624373956594, "grad_norm": 0.37436115741729736, "learning_rate": 9.050425273390038e-05, "loss": 1.7945, "step": 1573 }, { "epoch": 0.4777659735923509, "grad_norm": 0.36618462204933167, "learning_rate": 9.0498177399757e-05, "loss": 1.838, "step": 1574 }, { "epoch": 0.47806950978904234, "grad_norm": 0.4327848255634308, "learning_rate": 9.049210206561361e-05, "loss": 1.8218, "step": 1575 }, { "epoch": 0.4783730459857338, "grad_norm": 0.33957040309906006, "learning_rate": 9.048602673147023e-05, "loss": 1.3948, "step": 1576 }, { "epoch": 0.47867658218242526, "grad_norm": 0.34288668632507324, "learning_rate": 9.047995139732686e-05, "loss": 1.856, "step": 1577 }, { "epoch": 0.4789801183791167, "grad_norm": 0.42410871386528015, "learning_rate": 9.047387606318348e-05, "loss": 1.6138, "step": 1578 }, { "epoch": 0.4792836545758082, "grad_norm": 0.325130432844162, "learning_rate": 9.04678007290401e-05, "loss": 1.5631, "step": 1579 }, { "epoch": 0.4795871907724996, "grad_norm": 0.46126997470855713, "learning_rate": 9.046172539489672e-05, "loss": 1.8045, "step": 1580 }, { "epoch": 0.47989072696919105, "grad_norm": 0.4125445783138275, "learning_rate": 9.045565006075336e-05, "loss": 1.9054, "step": 1581 }, { "epoch": 0.48019426316588254, "grad_norm": 0.3341776430606842, "learning_rate": 9.044957472660996e-05, "loss": 1.9269, "step": 1582 }, { "epoch": 0.480497799362574, "grad_norm": 0.37623131275177, "learning_rate": 9.044349939246659e-05, "loss": 1.8621, "step": 1583 }, { "epoch": 0.48080133555926546, "grad_norm": 0.42698100209236145, "learning_rate": 9.043742405832322e-05, "loss": 1.9414, "step": 1584 }, { "epoch": 0.4811048717559569, "grad_norm": 0.39322131872177124, "learning_rate": 9.043134872417984e-05, "loss": 1.6427, "step": 1585 }, { "epoch": 0.48140840795264833, "grad_norm": 0.4348810315132141, "learning_rate": 9.042527339003646e-05, "loss": 2.0169, "step": 1586 }, { "epoch": 0.4817119441493398, "grad_norm": 0.42536425590515137, "learning_rate": 9.041919805589307e-05, "loss": 1.79, "step": 1587 }, { "epoch": 0.48201548034603126, "grad_norm": 0.35837772488594055, "learning_rate": 9.04131227217497e-05, "loss": 2.0152, "step": 1588 }, { "epoch": 0.48231901654272274, "grad_norm": 0.4053284525871277, "learning_rate": 9.040704738760632e-05, "loss": 1.9912, "step": 1589 }, { "epoch": 0.4826225527394142, "grad_norm": 1.0780633687973022, "learning_rate": 9.040097205346294e-05, "loss": 2.3151, "step": 1590 }, { "epoch": 0.4829260889361056, "grad_norm": 0.3571546673774719, "learning_rate": 9.039489671931957e-05, "loss": 2.007, "step": 1591 }, { "epoch": 0.4832296251327971, "grad_norm": 1.1343384981155396, "learning_rate": 9.038882138517619e-05, "loss": 2.2369, "step": 1592 }, { "epoch": 0.48353316132948854, "grad_norm": 0.43974751234054565, "learning_rate": 9.03827460510328e-05, "loss": 2.1774, "step": 1593 }, { "epoch": 0.48383669752617997, "grad_norm": 0.5721186995506287, "learning_rate": 9.037667071688943e-05, "loss": 1.9005, "step": 1594 }, { "epoch": 0.48414023372287146, "grad_norm": 0.43478089570999146, "learning_rate": 9.037059538274607e-05, "loss": 1.8163, "step": 1595 }, { "epoch": 0.4844437699195629, "grad_norm": 0.4186250865459442, "learning_rate": 9.036452004860267e-05, "loss": 1.5557, "step": 1596 }, { "epoch": 0.4847473061162544, "grad_norm": 0.363033264875412, "learning_rate": 9.03584447144593e-05, "loss": 1.9102, "step": 1597 }, { "epoch": 0.4850508423129458, "grad_norm": 0.39680740237236023, "learning_rate": 9.035236938031593e-05, "loss": 1.8232, "step": 1598 }, { "epoch": 0.48535437850963725, "grad_norm": 0.3754984736442566, "learning_rate": 9.034629404617255e-05, "loss": 1.7743, "step": 1599 }, { "epoch": 0.48565791470632874, "grad_norm": 0.4426131546497345, "learning_rate": 9.034021871202917e-05, "loss": 1.8806, "step": 1600 }, { "epoch": 0.4859614509030202, "grad_norm": 0.37828418612480164, "learning_rate": 9.033414337788578e-05, "loss": 1.7686, "step": 1601 }, { "epoch": 0.48626498709971167, "grad_norm": 0.44848862290382385, "learning_rate": 9.032806804374241e-05, "loss": 1.6695, "step": 1602 }, { "epoch": 0.4865685232964031, "grad_norm": 0.357838898897171, "learning_rate": 9.032199270959903e-05, "loss": 1.8404, "step": 1603 }, { "epoch": 0.48687205949309453, "grad_norm": 0.6578190326690674, "learning_rate": 9.031591737545565e-05, "loss": 1.3599, "step": 1604 }, { "epoch": 0.487175595689786, "grad_norm": 0.4240557849407196, "learning_rate": 9.030984204131228e-05, "loss": 1.3183, "step": 1605 }, { "epoch": 0.48747913188647746, "grad_norm": 0.4170602262020111, "learning_rate": 9.03037667071689e-05, "loss": 2.193, "step": 1606 }, { "epoch": 0.4877826680831689, "grad_norm": 0.39807751774787903, "learning_rate": 9.029769137302551e-05, "loss": 1.5821, "step": 1607 }, { "epoch": 0.4880862042798604, "grad_norm": 0.54439777135849, "learning_rate": 9.029161603888214e-05, "loss": 1.6293, "step": 1608 }, { "epoch": 0.4883897404765518, "grad_norm": 0.39446118474006653, "learning_rate": 9.028554070473878e-05, "loss": 1.6712, "step": 1609 }, { "epoch": 0.4886932766732433, "grad_norm": 0.42656177282333374, "learning_rate": 9.027946537059538e-05, "loss": 1.7296, "step": 1610 }, { "epoch": 0.48899681286993474, "grad_norm": 0.4832558333873749, "learning_rate": 9.027339003645201e-05, "loss": 1.8888, "step": 1611 }, { "epoch": 0.4893003490666262, "grad_norm": 0.44794905185699463, "learning_rate": 9.026731470230864e-05, "loss": 1.6074, "step": 1612 }, { "epoch": 0.48960388526331766, "grad_norm": 0.344939649105072, "learning_rate": 9.026123936816524e-05, "loss": 1.9943, "step": 1613 }, { "epoch": 0.4899074214600091, "grad_norm": 0.42949387431144714, "learning_rate": 9.025516403402188e-05, "loss": 1.9922, "step": 1614 }, { "epoch": 0.4902109576567006, "grad_norm": 0.39325597882270813, "learning_rate": 9.024908869987849e-05, "loss": 1.8447, "step": 1615 }, { "epoch": 0.490514493853392, "grad_norm": 0.3923071622848511, "learning_rate": 9.024301336573512e-05, "loss": 1.7969, "step": 1616 }, { "epoch": 0.49081803005008345, "grad_norm": 0.3386680483818054, "learning_rate": 9.023693803159174e-05, "loss": 1.9222, "step": 1617 }, { "epoch": 0.49112156624677494, "grad_norm": 0.40206924080848694, "learning_rate": 9.023086269744836e-05, "loss": 2.1749, "step": 1618 }, { "epoch": 0.4914251024434664, "grad_norm": 0.36428967118263245, "learning_rate": 9.022478736330499e-05, "loss": 1.4163, "step": 1619 }, { "epoch": 0.4917286386401578, "grad_norm": 0.4516347050666809, "learning_rate": 9.02187120291616e-05, "loss": 1.8412, "step": 1620 }, { "epoch": 0.4920321748368493, "grad_norm": 0.40233004093170166, "learning_rate": 9.021263669501822e-05, "loss": 1.6124, "step": 1621 }, { "epoch": 0.49233571103354073, "grad_norm": 0.4065000116825104, "learning_rate": 9.020656136087485e-05, "loss": 1.7479, "step": 1622 }, { "epoch": 0.4926392472302322, "grad_norm": 0.42242977023124695, "learning_rate": 9.020048602673149e-05, "loss": 2.3126, "step": 1623 }, { "epoch": 0.49294278342692366, "grad_norm": 0.3774438500404358, "learning_rate": 9.019441069258809e-05, "loss": 2.0075, "step": 1624 }, { "epoch": 0.4932463196236151, "grad_norm": 0.3382234275341034, "learning_rate": 9.018833535844472e-05, "loss": 1.8028, "step": 1625 }, { "epoch": 0.4935498558203066, "grad_norm": 0.443689227104187, "learning_rate": 9.018226002430135e-05, "loss": 1.9506, "step": 1626 }, { "epoch": 0.493853392016998, "grad_norm": 0.32814332842826843, "learning_rate": 9.017618469015795e-05, "loss": 1.85, "step": 1627 }, { "epoch": 0.4941569282136895, "grad_norm": 0.691228449344635, "learning_rate": 9.017010935601459e-05, "loss": 1.3521, "step": 1628 }, { "epoch": 0.49446046441038094, "grad_norm": 0.43137383460998535, "learning_rate": 9.01640340218712e-05, "loss": 1.9107, "step": 1629 }, { "epoch": 0.4947640006070724, "grad_norm": 0.3534761965274811, "learning_rate": 9.015795868772783e-05, "loss": 1.3167, "step": 1630 }, { "epoch": 0.49506753680376386, "grad_norm": 0.5987849831581116, "learning_rate": 9.015188335358445e-05, "loss": 2.0258, "step": 1631 }, { "epoch": 0.4953710730004553, "grad_norm": 0.38956066966056824, "learning_rate": 9.014580801944107e-05, "loss": 1.5787, "step": 1632 }, { "epoch": 0.4956746091971468, "grad_norm": 0.43218016624450684, "learning_rate": 9.01397326852977e-05, "loss": 1.4582, "step": 1633 }, { "epoch": 0.4959781453938382, "grad_norm": 0.8035671710968018, "learning_rate": 9.013365735115432e-05, "loss": 2.2415, "step": 1634 }, { "epoch": 0.49628168159052966, "grad_norm": 0.41837078332901, "learning_rate": 9.012758201701093e-05, "loss": 1.9213, "step": 1635 }, { "epoch": 0.49658521778722114, "grad_norm": 0.48308447003364563, "learning_rate": 9.012150668286756e-05, "loss": 1.5812, "step": 1636 }, { "epoch": 0.4968887539839126, "grad_norm": 0.4080790877342224, "learning_rate": 9.011543134872418e-05, "loss": 1.8055, "step": 1637 }, { "epoch": 0.497192290180604, "grad_norm": 0.409053772687912, "learning_rate": 9.01093560145808e-05, "loss": 2.0126, "step": 1638 }, { "epoch": 0.4974958263772955, "grad_norm": 0.41290226578712463, "learning_rate": 9.010328068043743e-05, "loss": 1.8663, "step": 1639 }, { "epoch": 0.49779936257398694, "grad_norm": 0.36996471881866455, "learning_rate": 9.009720534629406e-05, "loss": 1.9887, "step": 1640 }, { "epoch": 0.4981028987706784, "grad_norm": 0.4474611282348633, "learning_rate": 9.009113001215066e-05, "loss": 1.6636, "step": 1641 }, { "epoch": 0.49840643496736986, "grad_norm": 0.3717537224292755, "learning_rate": 9.00850546780073e-05, "loss": 1.7292, "step": 1642 }, { "epoch": 0.4987099711640613, "grad_norm": 0.6839573979377747, "learning_rate": 9.007897934386391e-05, "loss": 2.1037, "step": 1643 }, { "epoch": 0.4990135073607528, "grad_norm": 0.3877841532230377, "learning_rate": 9.007290400972054e-05, "loss": 2.1324, "step": 1644 }, { "epoch": 0.4993170435574442, "grad_norm": 0.42409414052963257, "learning_rate": 9.006682867557716e-05, "loss": 2.0867, "step": 1645 }, { "epoch": 0.4996205797541357, "grad_norm": 0.38519206643104553, "learning_rate": 9.006075334143378e-05, "loss": 2.0589, "step": 1646 }, { "epoch": 0.49992411595082714, "grad_norm": 0.3910469710826874, "learning_rate": 9.005467800729041e-05, "loss": 1.7952, "step": 1647 }, { "epoch": 0.5002276521475186, "grad_norm": 0.3802652359008789, "learning_rate": 9.004860267314703e-05, "loss": 1.8394, "step": 1648 }, { "epoch": 0.5005311883442101, "grad_norm": 1.9837124347686768, "learning_rate": 9.004252733900364e-05, "loss": 1.9703, "step": 1649 }, { "epoch": 0.5008347245409015, "grad_norm": 0.40731772780418396, "learning_rate": 9.003645200486027e-05, "loss": 1.665, "step": 1650 }, { "epoch": 0.5011382607375929, "grad_norm": 0.4358116686344147, "learning_rate": 9.003037667071689e-05, "loss": 1.369, "step": 1651 }, { "epoch": 0.5014417969342844, "grad_norm": 0.49716782569885254, "learning_rate": 9.002430133657351e-05, "loss": 2.2221, "step": 1652 }, { "epoch": 0.5017453331309759, "grad_norm": 0.41779419779777527, "learning_rate": 9.001822600243014e-05, "loss": 2.0265, "step": 1653 }, { "epoch": 0.5020488693276673, "grad_norm": 0.40375036001205444, "learning_rate": 9.001215066828677e-05, "loss": 1.9761, "step": 1654 }, { "epoch": 0.5023524055243588, "grad_norm": 0.3802977204322815, "learning_rate": 9.000607533414337e-05, "loss": 1.76, "step": 1655 }, { "epoch": 0.5026559417210502, "grad_norm": 0.33772045373916626, "learning_rate": 9e-05, "loss": 1.4531, "step": 1656 }, { "epoch": 0.5029594779177416, "grad_norm": 0.4556722640991211, "learning_rate": 8.999392466585662e-05, "loss": 1.5091, "step": 1657 }, { "epoch": 0.5032630141144332, "grad_norm": 0.37798872590065, "learning_rate": 8.998784933171325e-05, "loss": 1.378, "step": 1658 }, { "epoch": 0.5035665503111246, "grad_norm": 0.3921298086643219, "learning_rate": 8.998177399756987e-05, "loss": 1.901, "step": 1659 }, { "epoch": 0.5038700865078161, "grad_norm": 0.39993181824684143, "learning_rate": 8.997569866342649e-05, "loss": 1.9796, "step": 1660 }, { "epoch": 0.5041736227045075, "grad_norm": 0.41690680384635925, "learning_rate": 8.996962332928312e-05, "loss": 1.6252, "step": 1661 }, { "epoch": 0.5044771589011989, "grad_norm": 0.4252752363681793, "learning_rate": 8.996354799513974e-05, "loss": 1.9233, "step": 1662 }, { "epoch": 0.5047806950978905, "grad_norm": 0.43236085772514343, "learning_rate": 8.995747266099635e-05, "loss": 1.5527, "step": 1663 }, { "epoch": 0.5050842312945819, "grad_norm": 0.32605788111686707, "learning_rate": 8.995139732685298e-05, "loss": 1.8349, "step": 1664 }, { "epoch": 0.5053877674912733, "grad_norm": 0.8619269728660583, "learning_rate": 8.99453219927096e-05, "loss": 1.3305, "step": 1665 }, { "epoch": 0.5056913036879648, "grad_norm": 0.429949551820755, "learning_rate": 8.993924665856622e-05, "loss": 1.531, "step": 1666 }, { "epoch": 0.5059948398846562, "grad_norm": 0.38018864393234253, "learning_rate": 8.993317132442285e-05, "loss": 1.4132, "step": 1667 }, { "epoch": 0.5062983760813476, "grad_norm": 0.411668986082077, "learning_rate": 8.992709599027948e-05, "loss": 2.0736, "step": 1668 }, { "epoch": 0.5066019122780392, "grad_norm": 0.41500651836395264, "learning_rate": 8.992102065613608e-05, "loss": 2.0055, "step": 1669 }, { "epoch": 0.5069054484747306, "grad_norm": 0.3659593164920807, "learning_rate": 8.991494532199272e-05, "loss": 1.8854, "step": 1670 }, { "epoch": 0.5072089846714221, "grad_norm": 0.4081539809703827, "learning_rate": 8.990886998784933e-05, "loss": 1.9027, "step": 1671 }, { "epoch": 0.5075125208681135, "grad_norm": 0.4111250340938568, "learning_rate": 8.990279465370596e-05, "loss": 1.7838, "step": 1672 }, { "epoch": 0.5078160570648049, "grad_norm": 0.37269532680511475, "learning_rate": 8.989671931956258e-05, "loss": 1.8353, "step": 1673 }, { "epoch": 0.5081195932614965, "grad_norm": 0.4204343259334564, "learning_rate": 8.98906439854192e-05, "loss": 1.5011, "step": 1674 }, { "epoch": 0.5084231294581879, "grad_norm": 0.4515773355960846, "learning_rate": 8.988456865127583e-05, "loss": 1.5303, "step": 1675 }, { "epoch": 0.5087266656548793, "grad_norm": 0.44019004702568054, "learning_rate": 8.987849331713245e-05, "loss": 2.0108, "step": 1676 }, { "epoch": 0.5090302018515708, "grad_norm": 0.47351813316345215, "learning_rate": 8.987241798298906e-05, "loss": 2.0518, "step": 1677 }, { "epoch": 0.5093337380482622, "grad_norm": 0.40282347798347473, "learning_rate": 8.98663426488457e-05, "loss": 1.99, "step": 1678 }, { "epoch": 0.5096372742449538, "grad_norm": 0.49869832396507263, "learning_rate": 8.986026731470231e-05, "loss": 1.9592, "step": 1679 }, { "epoch": 0.5099408104416452, "grad_norm": 0.36178889870643616, "learning_rate": 8.985419198055893e-05, "loss": 1.927, "step": 1680 }, { "epoch": 0.5102443466383366, "grad_norm": 0.3670339584350586, "learning_rate": 8.984811664641556e-05, "loss": 1.9516, "step": 1681 }, { "epoch": 0.510547882835028, "grad_norm": 0.3458341658115387, "learning_rate": 8.984204131227218e-05, "loss": 1.8203, "step": 1682 }, { "epoch": 0.5108514190317195, "grad_norm": 0.4636301100254059, "learning_rate": 8.98359659781288e-05, "loss": 1.7146, "step": 1683 }, { "epoch": 0.511154955228411, "grad_norm": 0.45436516404151917, "learning_rate": 8.982989064398543e-05, "loss": 1.8514, "step": 1684 }, { "epoch": 0.5114584914251025, "grad_norm": 0.46940287947654724, "learning_rate": 8.982381530984204e-05, "loss": 2.0162, "step": 1685 }, { "epoch": 0.5117620276217939, "grad_norm": 0.4405171573162079, "learning_rate": 8.981773997569866e-05, "loss": 2.0003, "step": 1686 }, { "epoch": 0.5120655638184853, "grad_norm": 0.4306286871433258, "learning_rate": 8.981166464155529e-05, "loss": 1.8338, "step": 1687 }, { "epoch": 0.5123691000151768, "grad_norm": 0.43476733565330505, "learning_rate": 8.980558930741191e-05, "loss": 1.623, "step": 1688 }, { "epoch": 0.5126726362118683, "grad_norm": 0.3655628561973572, "learning_rate": 8.979951397326854e-05, "loss": 2.098, "step": 1689 }, { "epoch": 0.5129761724085597, "grad_norm": 0.36685287952423096, "learning_rate": 8.979343863912516e-05, "loss": 1.9533, "step": 1690 }, { "epoch": 0.5132797086052512, "grad_norm": 0.4131629765033722, "learning_rate": 8.978736330498177e-05, "loss": 1.8093, "step": 1691 }, { "epoch": 0.5135832448019426, "grad_norm": 0.36607033014297485, "learning_rate": 8.97812879708384e-05, "loss": 1.4293, "step": 1692 }, { "epoch": 0.513886780998634, "grad_norm": 0.4478306174278259, "learning_rate": 8.977521263669502e-05, "loss": 1.916, "step": 1693 }, { "epoch": 0.5141903171953256, "grad_norm": 0.4570290446281433, "learning_rate": 8.976913730255164e-05, "loss": 1.7859, "step": 1694 }, { "epoch": 0.514493853392017, "grad_norm": 0.46024757623672485, "learning_rate": 8.976306196840827e-05, "loss": 1.6032, "step": 1695 }, { "epoch": 0.5147973895887085, "grad_norm": 0.40080446004867554, "learning_rate": 8.975698663426489e-05, "loss": 1.7693, "step": 1696 }, { "epoch": 0.5151009257853999, "grad_norm": 0.3736198842525482, "learning_rate": 8.97509113001215e-05, "loss": 1.8185, "step": 1697 }, { "epoch": 0.5154044619820913, "grad_norm": 0.7444111704826355, "learning_rate": 8.974483596597814e-05, "loss": 1.9927, "step": 1698 }, { "epoch": 0.5157079981787828, "grad_norm": 0.42862579226493835, "learning_rate": 8.973876063183475e-05, "loss": 1.9946, "step": 1699 }, { "epoch": 0.5160115343754743, "grad_norm": 0.5150566101074219, "learning_rate": 8.973268529769137e-05, "loss": 1.6675, "step": 1700 }, { "epoch": 0.5163150705721657, "grad_norm": 0.4260749816894531, "learning_rate": 8.9726609963548e-05, "loss": 2.0212, "step": 1701 }, { "epoch": 0.5166186067688572, "grad_norm": 0.3930248022079468, "learning_rate": 8.972053462940462e-05, "loss": 1.8982, "step": 1702 }, { "epoch": 0.5169221429655486, "grad_norm": 0.40357765555381775, "learning_rate": 8.971445929526125e-05, "loss": 1.8368, "step": 1703 }, { "epoch": 0.51722567916224, "grad_norm": 0.3957735300064087, "learning_rate": 8.970838396111787e-05, "loss": 1.7569, "step": 1704 }, { "epoch": 0.5175292153589316, "grad_norm": 0.3867725431919098, "learning_rate": 8.970230862697448e-05, "loss": 2.0536, "step": 1705 }, { "epoch": 0.517832751555623, "grad_norm": 0.38773855566978455, "learning_rate": 8.969623329283111e-05, "loss": 1.9507, "step": 1706 }, { "epoch": 0.5181362877523145, "grad_norm": 0.4161403775215149, "learning_rate": 8.969015795868773e-05, "loss": 1.1656, "step": 1707 }, { "epoch": 0.5184398239490059, "grad_norm": 0.40050750970840454, "learning_rate": 8.968408262454435e-05, "loss": 1.9384, "step": 1708 }, { "epoch": 0.5187433601456973, "grad_norm": 0.43072274327278137, "learning_rate": 8.967800729040098e-05, "loss": 1.9838, "step": 1709 }, { "epoch": 0.5190468963423889, "grad_norm": 0.4291669428348541, "learning_rate": 8.96719319562576e-05, "loss": 2.0588, "step": 1710 }, { "epoch": 0.5193504325390803, "grad_norm": 0.3524603545665741, "learning_rate": 8.966585662211422e-05, "loss": 1.9433, "step": 1711 }, { "epoch": 0.5196539687357717, "grad_norm": 0.42883431911468506, "learning_rate": 8.965978128797085e-05, "loss": 2.0879, "step": 1712 }, { "epoch": 0.5199575049324632, "grad_norm": 0.3711095452308655, "learning_rate": 8.965370595382746e-05, "loss": 1.8024, "step": 1713 }, { "epoch": 0.5202610411291546, "grad_norm": 0.3979575037956238, "learning_rate": 8.964763061968408e-05, "loss": 1.889, "step": 1714 }, { "epoch": 0.5205645773258462, "grad_norm": 0.3781624436378479, "learning_rate": 8.964155528554071e-05, "loss": 1.4032, "step": 1715 }, { "epoch": 0.5208681135225376, "grad_norm": 0.4285725951194763, "learning_rate": 8.963547995139733e-05, "loss": 1.5933, "step": 1716 }, { "epoch": 0.521171649719229, "grad_norm": 0.40880918502807617, "learning_rate": 8.962940461725396e-05, "loss": 1.4162, "step": 1717 }, { "epoch": 0.5214751859159205, "grad_norm": 0.4186420440673828, "learning_rate": 8.962332928311058e-05, "loss": 1.6866, "step": 1718 }, { "epoch": 0.5217787221126119, "grad_norm": 0.3772728443145752, "learning_rate": 8.96172539489672e-05, "loss": 1.8005, "step": 1719 }, { "epoch": 0.5220822583093034, "grad_norm": 0.4102610945701599, "learning_rate": 8.961117861482382e-05, "loss": 2.0691, "step": 1720 }, { "epoch": 0.5223857945059949, "grad_norm": 0.463878870010376, "learning_rate": 8.960510328068044e-05, "loss": 2.2497, "step": 1721 }, { "epoch": 0.5226893307026863, "grad_norm": 0.3314138948917389, "learning_rate": 8.959902794653706e-05, "loss": 1.6946, "step": 1722 }, { "epoch": 0.5229928668993777, "grad_norm": 0.7187567949295044, "learning_rate": 8.959295261239369e-05, "loss": 1.7443, "step": 1723 }, { "epoch": 0.5232964030960692, "grad_norm": 0.42266663908958435, "learning_rate": 8.958687727825031e-05, "loss": 1.9827, "step": 1724 }, { "epoch": 0.5235999392927606, "grad_norm": 0.39689430594444275, "learning_rate": 8.958080194410693e-05, "loss": 1.8162, "step": 1725 }, { "epoch": 0.5239034754894522, "grad_norm": 0.36018458008766174, "learning_rate": 8.957472660996356e-05, "loss": 1.9901, "step": 1726 }, { "epoch": 0.5242070116861436, "grad_norm": 0.29599374532699585, "learning_rate": 8.956865127582017e-05, "loss": 1.5581, "step": 1727 }, { "epoch": 0.524510547882835, "grad_norm": 0.3953525424003601, "learning_rate": 8.956257594167679e-05, "loss": 1.8398, "step": 1728 }, { "epoch": 0.5248140840795265, "grad_norm": 0.5847448110580444, "learning_rate": 8.955650060753342e-05, "loss": 1.6539, "step": 1729 }, { "epoch": 0.5251176202762179, "grad_norm": 0.37169334292411804, "learning_rate": 8.955042527339004e-05, "loss": 1.0603, "step": 1730 }, { "epoch": 0.5254211564729094, "grad_norm": 0.3689024746417999, "learning_rate": 8.954434993924667e-05, "loss": 1.4661, "step": 1731 }, { "epoch": 0.5257246926696009, "grad_norm": 0.39325040578842163, "learning_rate": 8.953827460510329e-05, "loss": 1.9562, "step": 1732 }, { "epoch": 0.5260282288662923, "grad_norm": 0.5037636756896973, "learning_rate": 8.95321992709599e-05, "loss": 1.5998, "step": 1733 }, { "epoch": 0.5263317650629837, "grad_norm": 0.38126620650291443, "learning_rate": 8.952612393681654e-05, "loss": 1.8444, "step": 1734 }, { "epoch": 0.5266353012596752, "grad_norm": 0.4108048379421234, "learning_rate": 8.952004860267315e-05, "loss": 1.5128, "step": 1735 }, { "epoch": 0.5269388374563667, "grad_norm": 0.3624730408191681, "learning_rate": 8.951397326852977e-05, "loss": 1.9051, "step": 1736 }, { "epoch": 0.5272423736530581, "grad_norm": 0.374348908662796, "learning_rate": 8.95078979343864e-05, "loss": 1.5501, "step": 1737 }, { "epoch": 0.5275459098497496, "grad_norm": 0.504650890827179, "learning_rate": 8.950182260024302e-05, "loss": 1.9115, "step": 1738 }, { "epoch": 0.527849446046441, "grad_norm": 0.31486794352531433, "learning_rate": 8.949574726609964e-05, "loss": 1.7507, "step": 1739 }, { "epoch": 0.5281529822431325, "grad_norm": 0.38089415431022644, "learning_rate": 8.948967193195627e-05, "loss": 1.9424, "step": 1740 }, { "epoch": 0.528456518439824, "grad_norm": 0.5939797163009644, "learning_rate": 8.948359659781288e-05, "loss": 2.0123, "step": 1741 }, { "epoch": 0.5287600546365154, "grad_norm": 0.4175383746623993, "learning_rate": 8.94775212636695e-05, "loss": 1.9405, "step": 1742 }, { "epoch": 0.5290635908332069, "grad_norm": 0.3071494996547699, "learning_rate": 8.947144592952613e-05, "loss": 0.9484, "step": 1743 }, { "epoch": 0.5293671270298983, "grad_norm": 0.4822414219379425, "learning_rate": 8.946537059538275e-05, "loss": 1.7093, "step": 1744 }, { "epoch": 0.5296706632265897, "grad_norm": 0.8036310076713562, "learning_rate": 8.945929526123938e-05, "loss": 2.1441, "step": 1745 }, { "epoch": 0.5299741994232813, "grad_norm": 0.42779991030693054, "learning_rate": 8.9453219927096e-05, "loss": 1.9736, "step": 1746 }, { "epoch": 0.5302777356199727, "grad_norm": 0.37124693393707275, "learning_rate": 8.944714459295261e-05, "loss": 2.0578, "step": 1747 }, { "epoch": 0.5305812718166641, "grad_norm": 0.4504419267177582, "learning_rate": 8.944106925880925e-05, "loss": 2.0148, "step": 1748 }, { "epoch": 0.5308848080133556, "grad_norm": 0.370437353849411, "learning_rate": 8.943499392466586e-05, "loss": 1.7045, "step": 1749 }, { "epoch": 0.531188344210047, "grad_norm": 0.4089522063732147, "learning_rate": 8.942891859052248e-05, "loss": 1.7649, "step": 1750 }, { "epoch": 0.5314918804067384, "grad_norm": 0.3770054280757904, "learning_rate": 8.942284325637911e-05, "loss": 1.8252, "step": 1751 }, { "epoch": 0.53179541660343, "grad_norm": 0.45180705189704895, "learning_rate": 8.941676792223573e-05, "loss": 0.9027, "step": 1752 }, { "epoch": 0.5320989528001214, "grad_norm": 0.415444016456604, "learning_rate": 8.941069258809235e-05, "loss": 1.8366, "step": 1753 }, { "epoch": 0.5324024889968129, "grad_norm": 0.4421723783016205, "learning_rate": 8.940461725394898e-05, "loss": 1.6414, "step": 1754 }, { "epoch": 0.5327060251935043, "grad_norm": 0.3791792392730713, "learning_rate": 8.93985419198056e-05, "loss": 2.0668, "step": 1755 }, { "epoch": 0.5330095613901957, "grad_norm": 0.40155166387557983, "learning_rate": 8.939246658566221e-05, "loss": 1.8144, "step": 1756 }, { "epoch": 0.5333130975868873, "grad_norm": 0.38897809386253357, "learning_rate": 8.938639125151884e-05, "loss": 1.4815, "step": 1757 }, { "epoch": 0.5336166337835787, "grad_norm": 0.35486680269241333, "learning_rate": 8.938031591737546e-05, "loss": 1.8673, "step": 1758 }, { "epoch": 0.5339201699802701, "grad_norm": 0.33397093415260315, "learning_rate": 8.937424058323208e-05, "loss": 1.7756, "step": 1759 }, { "epoch": 0.5342237061769616, "grad_norm": 0.43346378207206726, "learning_rate": 8.936816524908871e-05, "loss": 1.8367, "step": 1760 }, { "epoch": 0.534527242373653, "grad_norm": 0.37739312648773193, "learning_rate": 8.936208991494532e-05, "loss": 2.1916, "step": 1761 }, { "epoch": 0.5348307785703446, "grad_norm": 0.32218697667121887, "learning_rate": 8.935601458080196e-05, "loss": 1.9885, "step": 1762 }, { "epoch": 0.535134314767036, "grad_norm": 0.37920355796813965, "learning_rate": 8.934993924665856e-05, "loss": 1.7163, "step": 1763 }, { "epoch": 0.5354378509637274, "grad_norm": 0.3895961344242096, "learning_rate": 8.934386391251519e-05, "loss": 1.8757, "step": 1764 }, { "epoch": 0.5357413871604189, "grad_norm": 0.4898541271686554, "learning_rate": 8.933778857837182e-05, "loss": 1.7097, "step": 1765 }, { "epoch": 0.5360449233571103, "grad_norm": 0.3851979672908783, "learning_rate": 8.933171324422844e-05, "loss": 1.8913, "step": 1766 }, { "epoch": 0.5363484595538018, "grad_norm": 0.3567551076412201, "learning_rate": 8.932563791008506e-05, "loss": 1.8789, "step": 1767 }, { "epoch": 0.5366519957504933, "grad_norm": 0.4687878489494324, "learning_rate": 8.931956257594169e-05, "loss": 1.936, "step": 1768 }, { "epoch": 0.5369555319471847, "grad_norm": 0.36735373735427856, "learning_rate": 8.93134872417983e-05, "loss": 2.0743, "step": 1769 }, { "epoch": 0.5372590681438761, "grad_norm": 0.508160412311554, "learning_rate": 8.930741190765492e-05, "loss": 1.9822, "step": 1770 }, { "epoch": 0.5375626043405676, "grad_norm": 0.40640148520469666, "learning_rate": 8.930133657351155e-05, "loss": 1.5255, "step": 1771 }, { "epoch": 0.5378661405372591, "grad_norm": 0.7253953218460083, "learning_rate": 8.929526123936817e-05, "loss": 1.8898, "step": 1772 }, { "epoch": 0.5381696767339506, "grad_norm": 0.4226602017879486, "learning_rate": 8.928918590522479e-05, "loss": 1.7197, "step": 1773 }, { "epoch": 0.538473212930642, "grad_norm": 0.42332541942596436, "learning_rate": 8.928311057108142e-05, "loss": 1.9117, "step": 1774 }, { "epoch": 0.5387767491273334, "grad_norm": 0.8125683665275574, "learning_rate": 8.927703523693803e-05, "loss": 1.7325, "step": 1775 }, { "epoch": 0.5390802853240249, "grad_norm": 0.44765642285346985, "learning_rate": 8.927095990279467e-05, "loss": 2.0353, "step": 1776 }, { "epoch": 0.5393838215207164, "grad_norm": 0.45518067479133606, "learning_rate": 8.926488456865127e-05, "loss": 1.9536, "step": 1777 }, { "epoch": 0.5396873577174078, "grad_norm": 0.3856181800365448, "learning_rate": 8.92588092345079e-05, "loss": 1.456, "step": 1778 }, { "epoch": 0.5399908939140993, "grad_norm": 0.41640815138816833, "learning_rate": 8.925273390036453e-05, "loss": 1.7481, "step": 1779 }, { "epoch": 0.5402944301107907, "grad_norm": 0.3643503189086914, "learning_rate": 8.924665856622115e-05, "loss": 1.8541, "step": 1780 }, { "epoch": 0.5405979663074821, "grad_norm": 0.40610817074775696, "learning_rate": 8.924058323207777e-05, "loss": 1.8021, "step": 1781 }, { "epoch": 0.5409015025041736, "grad_norm": 1.8827602863311768, "learning_rate": 8.92345078979344e-05, "loss": 1.4657, "step": 1782 }, { "epoch": 0.5412050387008651, "grad_norm": 0.4862421154975891, "learning_rate": 8.922843256379101e-05, "loss": 1.6917, "step": 1783 }, { "epoch": 0.5415085748975565, "grad_norm": 0.4079034626483917, "learning_rate": 8.922235722964763e-05, "loss": 1.4408, "step": 1784 }, { "epoch": 0.541812111094248, "grad_norm": 0.37174421548843384, "learning_rate": 8.921628189550426e-05, "loss": 1.8314, "step": 1785 }, { "epoch": 0.5421156472909394, "grad_norm": 0.4223754107952118, "learning_rate": 8.921020656136088e-05, "loss": 1.7007, "step": 1786 }, { "epoch": 0.5424191834876309, "grad_norm": 0.371114581823349, "learning_rate": 8.92041312272175e-05, "loss": 1.8876, "step": 1787 }, { "epoch": 0.5427227196843224, "grad_norm": 0.4263741672039032, "learning_rate": 8.919805589307413e-05, "loss": 2.1338, "step": 1788 }, { "epoch": 0.5430262558810138, "grad_norm": 0.4573124349117279, "learning_rate": 8.919198055893074e-05, "loss": 1.9776, "step": 1789 }, { "epoch": 0.5433297920777053, "grad_norm": 0.44550567865371704, "learning_rate": 8.918590522478738e-05, "loss": 1.7205, "step": 1790 }, { "epoch": 0.5436333282743967, "grad_norm": 0.42521047592163086, "learning_rate": 8.917982989064398e-05, "loss": 1.9548, "step": 1791 }, { "epoch": 0.5439368644710881, "grad_norm": 0.39518535137176514, "learning_rate": 8.917375455650061e-05, "loss": 2.0192, "step": 1792 }, { "epoch": 0.5442404006677797, "grad_norm": 0.42280903458595276, "learning_rate": 8.916767922235724e-05, "loss": 1.5445, "step": 1793 }, { "epoch": 0.5445439368644711, "grad_norm": 0.40115422010421753, "learning_rate": 8.916160388821386e-05, "loss": 1.9529, "step": 1794 }, { "epoch": 0.5448474730611625, "grad_norm": 0.3923608958721161, "learning_rate": 8.915552855407048e-05, "loss": 2.0184, "step": 1795 }, { "epoch": 0.545151009257854, "grad_norm": 0.3982231020927429, "learning_rate": 8.91494532199271e-05, "loss": 1.9162, "step": 1796 }, { "epoch": 0.5454545454545454, "grad_norm": 0.4375683665275574, "learning_rate": 8.914337788578372e-05, "loss": 1.8728, "step": 1797 }, { "epoch": 0.545758081651237, "grad_norm": 0.4353227913379669, "learning_rate": 8.913730255164034e-05, "loss": 1.4043, "step": 1798 }, { "epoch": 0.5460616178479284, "grad_norm": 0.4171392619609833, "learning_rate": 8.913122721749697e-05, "loss": 1.9417, "step": 1799 }, { "epoch": 0.5463651540446198, "grad_norm": 0.33565661311149597, "learning_rate": 8.912515188335359e-05, "loss": 1.888, "step": 1800 }, { "epoch": 0.5466686902413113, "grad_norm": 0.3857763707637787, "learning_rate": 8.91190765492102e-05, "loss": 1.7882, "step": 1801 }, { "epoch": 0.5469722264380027, "grad_norm": 0.3976082503795624, "learning_rate": 8.911300121506684e-05, "loss": 1.6312, "step": 1802 }, { "epoch": 0.5472757626346942, "grad_norm": 0.43773913383483887, "learning_rate": 8.910692588092345e-05, "loss": 1.7299, "step": 1803 }, { "epoch": 0.5475792988313857, "grad_norm": 0.39484649896621704, "learning_rate": 8.910085054678009e-05, "loss": 1.8826, "step": 1804 }, { "epoch": 0.5478828350280771, "grad_norm": 0.42913469672203064, "learning_rate": 8.909477521263669e-05, "loss": 1.1713, "step": 1805 }, { "epoch": 0.5481863712247685, "grad_norm": 0.43996962904930115, "learning_rate": 8.908869987849332e-05, "loss": 2.162, "step": 1806 }, { "epoch": 0.54848990742146, "grad_norm": 0.7948350310325623, "learning_rate": 8.908262454434995e-05, "loss": 1.8808, "step": 1807 }, { "epoch": 0.5487934436181514, "grad_norm": 0.43142643570899963, "learning_rate": 8.907654921020657e-05, "loss": 2.0966, "step": 1808 }, { "epoch": 0.549096979814843, "grad_norm": 0.36545732617378235, "learning_rate": 8.907047387606319e-05, "loss": 1.7421, "step": 1809 }, { "epoch": 0.5494005160115344, "grad_norm": 0.3977827727794647, "learning_rate": 8.906439854191982e-05, "loss": 1.9356, "step": 1810 }, { "epoch": 0.5497040522082258, "grad_norm": 0.4487985670566559, "learning_rate": 8.905832320777643e-05, "loss": 1.8294, "step": 1811 }, { "epoch": 0.5500075884049173, "grad_norm": 0.4151144027709961, "learning_rate": 8.905224787363305e-05, "loss": 1.7, "step": 1812 }, { "epoch": 0.5503111246016087, "grad_norm": 0.5114679336547852, "learning_rate": 8.904617253948968e-05, "loss": 2.2179, "step": 1813 }, { "epoch": 0.5506146607983002, "grad_norm": 0.4134223163127899, "learning_rate": 8.90400972053463e-05, "loss": 1.9573, "step": 1814 }, { "epoch": 0.5509181969949917, "grad_norm": 0.5172004699707031, "learning_rate": 8.903402187120292e-05, "loss": 1.7614, "step": 1815 }, { "epoch": 0.5512217331916831, "grad_norm": 0.4552132189273834, "learning_rate": 8.902794653705955e-05, "loss": 1.6595, "step": 1816 }, { "epoch": 0.5515252693883745, "grad_norm": 0.4171915054321289, "learning_rate": 8.902187120291616e-05, "loss": 1.7968, "step": 1817 }, { "epoch": 0.551828805585066, "grad_norm": 0.4485832452774048, "learning_rate": 8.90157958687728e-05, "loss": 2.0529, "step": 1818 }, { "epoch": 0.5521323417817575, "grad_norm": 0.3997848331928253, "learning_rate": 8.90097205346294e-05, "loss": 1.7053, "step": 1819 }, { "epoch": 0.552435877978449, "grad_norm": 0.47565630078315735, "learning_rate": 8.900364520048603e-05, "loss": 1.7842, "step": 1820 }, { "epoch": 0.5527394141751404, "grad_norm": 0.42128419876098633, "learning_rate": 8.899756986634266e-05, "loss": 1.8866, "step": 1821 }, { "epoch": 0.5530429503718318, "grad_norm": 0.4098486602306366, "learning_rate": 8.899149453219926e-05, "loss": 1.9268, "step": 1822 }, { "epoch": 0.5533464865685233, "grad_norm": 0.3754071295261383, "learning_rate": 8.89854191980559e-05, "loss": 1.9191, "step": 1823 }, { "epoch": 0.5536500227652148, "grad_norm": 0.4278963804244995, "learning_rate": 8.897934386391253e-05, "loss": 2.0914, "step": 1824 }, { "epoch": 0.5539535589619062, "grad_norm": 0.41121765971183777, "learning_rate": 8.897326852976914e-05, "loss": 1.9135, "step": 1825 }, { "epoch": 0.5542570951585977, "grad_norm": 0.7463552355766296, "learning_rate": 8.896719319562576e-05, "loss": 1.6663, "step": 1826 }, { "epoch": 0.5545606313552891, "grad_norm": 0.3886711299419403, "learning_rate": 8.896111786148239e-05, "loss": 1.9131, "step": 1827 }, { "epoch": 0.5548641675519805, "grad_norm": 0.3520048260688782, "learning_rate": 8.895504252733901e-05, "loss": 1.9166, "step": 1828 }, { "epoch": 0.5551677037486721, "grad_norm": 0.3484227955341339, "learning_rate": 8.894896719319563e-05, "loss": 1.9053, "step": 1829 }, { "epoch": 0.5554712399453635, "grad_norm": 0.7534793615341187, "learning_rate": 8.894289185905226e-05, "loss": 1.6738, "step": 1830 }, { "epoch": 0.555774776142055, "grad_norm": 0.4037635326385498, "learning_rate": 8.893681652490887e-05, "loss": 2.1951, "step": 1831 }, { "epoch": 0.5560783123387464, "grad_norm": 0.39184069633483887, "learning_rate": 8.893074119076549e-05, "loss": 2.0811, "step": 1832 }, { "epoch": 0.5563818485354378, "grad_norm": 0.35053008794784546, "learning_rate": 8.892466585662211e-05, "loss": 1.8178, "step": 1833 }, { "epoch": 0.5566853847321293, "grad_norm": 0.43768683075904846, "learning_rate": 8.891859052247874e-05, "loss": 2.0811, "step": 1834 }, { "epoch": 0.5569889209288208, "grad_norm": 0.38592809438705444, "learning_rate": 8.891251518833537e-05, "loss": 1.8156, "step": 1835 }, { "epoch": 0.5572924571255122, "grad_norm": 0.351408988237381, "learning_rate": 8.890643985419197e-05, "loss": 1.5505, "step": 1836 }, { "epoch": 0.5575959933222037, "grad_norm": 0.4032740592956543, "learning_rate": 8.89003645200486e-05, "loss": 1.622, "step": 1837 }, { "epoch": 0.5578995295188951, "grad_norm": 0.3902193307876587, "learning_rate": 8.889428918590524e-05, "loss": 2.0796, "step": 1838 }, { "epoch": 0.5582030657155865, "grad_norm": 2.2613284587860107, "learning_rate": 8.888821385176185e-05, "loss": 1.6464, "step": 1839 }, { "epoch": 0.5585066019122781, "grad_norm": 0.3818334937095642, "learning_rate": 8.888213851761847e-05, "loss": 1.0967, "step": 1840 }, { "epoch": 0.5588101381089695, "grad_norm": 0.534939169883728, "learning_rate": 8.88760631834751e-05, "loss": 1.9834, "step": 1841 }, { "epoch": 0.559113674305661, "grad_norm": 0.41335856914520264, "learning_rate": 8.886998784933172e-05, "loss": 1.9458, "step": 1842 }, { "epoch": 0.5594172105023524, "grad_norm": 0.4256092309951782, "learning_rate": 8.886391251518834e-05, "loss": 1.9967, "step": 1843 }, { "epoch": 0.5597207466990438, "grad_norm": 0.40793219208717346, "learning_rate": 8.885783718104497e-05, "loss": 1.6972, "step": 1844 }, { "epoch": 0.5600242828957354, "grad_norm": 0.4092423915863037, "learning_rate": 8.885176184690158e-05, "loss": 1.9676, "step": 1845 }, { "epoch": 0.5603278190924268, "grad_norm": 0.35754647850990295, "learning_rate": 8.88456865127582e-05, "loss": 1.1498, "step": 1846 }, { "epoch": 0.5606313552891182, "grad_norm": 0.41491416096687317, "learning_rate": 8.883961117861482e-05, "loss": 2.2078, "step": 1847 }, { "epoch": 0.5609348914858097, "grad_norm": 1.1699934005737305, "learning_rate": 8.883353584447145e-05, "loss": 2.1936, "step": 1848 }, { "epoch": 0.5612384276825011, "grad_norm": 1.9053874015808105, "learning_rate": 8.882746051032808e-05, "loss": 1.7189, "step": 1849 }, { "epoch": 0.5615419638791926, "grad_norm": 0.41807985305786133, "learning_rate": 8.882138517618468e-05, "loss": 1.7771, "step": 1850 }, { "epoch": 0.5618455000758841, "grad_norm": 0.41903504729270935, "learning_rate": 8.881530984204132e-05, "loss": 2.1023, "step": 1851 }, { "epoch": 0.5621490362725755, "grad_norm": 0.3394705653190613, "learning_rate": 8.880923450789795e-05, "loss": 0.9969, "step": 1852 }, { "epoch": 0.5624525724692669, "grad_norm": 0.347989022731781, "learning_rate": 8.880315917375456e-05, "loss": 1.974, "step": 1853 }, { "epoch": 0.5627561086659584, "grad_norm": 0.49732285737991333, "learning_rate": 8.879708383961118e-05, "loss": 1.7575, "step": 1854 }, { "epoch": 0.5630596448626499, "grad_norm": 0.44572606682777405, "learning_rate": 8.879100850546781e-05, "loss": 1.8167, "step": 1855 }, { "epoch": 0.5633631810593414, "grad_norm": 0.8100895881652832, "learning_rate": 8.878493317132443e-05, "loss": 1.9788, "step": 1856 }, { "epoch": 0.5636667172560328, "grad_norm": 0.4205772578716278, "learning_rate": 8.877885783718105e-05, "loss": 1.9986, "step": 1857 }, { "epoch": 0.5639702534527242, "grad_norm": 0.3976004719734192, "learning_rate": 8.877278250303766e-05, "loss": 1.9735, "step": 1858 }, { "epoch": 0.5642737896494157, "grad_norm": 0.41813865303993225, "learning_rate": 8.87667071688943e-05, "loss": 1.8479, "step": 1859 }, { "epoch": 0.5645773258461072, "grad_norm": 0.4901811182498932, "learning_rate": 8.876063183475091e-05, "loss": 2.3504, "step": 1860 }, { "epoch": 0.5648808620427986, "grad_norm": 0.4103149473667145, "learning_rate": 8.875455650060753e-05, "loss": 2.0658, "step": 1861 }, { "epoch": 0.5651843982394901, "grad_norm": 0.37885773181915283, "learning_rate": 8.874848116646416e-05, "loss": 1.8143, "step": 1862 }, { "epoch": 0.5654879344361815, "grad_norm": 0.35186877846717834, "learning_rate": 8.874240583232079e-05, "loss": 1.9395, "step": 1863 }, { "epoch": 0.5657914706328729, "grad_norm": 0.4435397982597351, "learning_rate": 8.87363304981774e-05, "loss": 1.7693, "step": 1864 }, { "epoch": 0.5660950068295644, "grad_norm": 1.451499342918396, "learning_rate": 8.873025516403403e-05, "loss": 1.9007, "step": 1865 }, { "epoch": 0.5663985430262559, "grad_norm": 0.41606009006500244, "learning_rate": 8.872417982989066e-05, "loss": 2.0327, "step": 1866 }, { "epoch": 0.5667020792229474, "grad_norm": 0.38989219069480896, "learning_rate": 8.871810449574727e-05, "loss": 1.6791, "step": 1867 }, { "epoch": 0.5670056154196388, "grad_norm": 0.3850671052932739, "learning_rate": 8.871202916160389e-05, "loss": 1.7889, "step": 1868 }, { "epoch": 0.5673091516163302, "grad_norm": 0.43616947531700134, "learning_rate": 8.870595382746052e-05, "loss": 1.7769, "step": 1869 }, { "epoch": 0.5676126878130217, "grad_norm": 0.39661890268325806, "learning_rate": 8.869987849331714e-05, "loss": 1.9286, "step": 1870 }, { "epoch": 0.5679162240097132, "grad_norm": 0.43553540110588074, "learning_rate": 8.869380315917376e-05, "loss": 1.5863, "step": 1871 }, { "epoch": 0.5682197602064046, "grad_norm": 0.3950207829475403, "learning_rate": 8.868772782503037e-05, "loss": 1.948, "step": 1872 }, { "epoch": 0.5685232964030961, "grad_norm": 0.5240088701248169, "learning_rate": 8.8681652490887e-05, "loss": 1.8758, "step": 1873 }, { "epoch": 0.5688268325997875, "grad_norm": 0.3744898736476898, "learning_rate": 8.867557715674362e-05, "loss": 2.2962, "step": 1874 }, { "epoch": 0.5691303687964789, "grad_norm": 0.3877609074115753, "learning_rate": 8.866950182260024e-05, "loss": 1.675, "step": 1875 }, { "epoch": 0.5694339049931705, "grad_norm": 0.3350330591201782, "learning_rate": 8.866342648845687e-05, "loss": 1.8538, "step": 1876 }, { "epoch": 0.5697374411898619, "grad_norm": 0.38145220279693604, "learning_rate": 8.86573511543135e-05, "loss": 1.6781, "step": 1877 }, { "epoch": 0.5700409773865533, "grad_norm": 0.43861034512519836, "learning_rate": 8.86512758201701e-05, "loss": 1.8, "step": 1878 }, { "epoch": 0.5703445135832448, "grad_norm": 0.4304041266441345, "learning_rate": 8.864520048602674e-05, "loss": 1.6788, "step": 1879 }, { "epoch": 0.5706480497799362, "grad_norm": 0.4199315309524536, "learning_rate": 8.863912515188337e-05, "loss": 1.8819, "step": 1880 }, { "epoch": 0.5709515859766278, "grad_norm": 0.4044843912124634, "learning_rate": 8.863304981773998e-05, "loss": 1.8221, "step": 1881 }, { "epoch": 0.5712551221733192, "grad_norm": 0.5554643273353577, "learning_rate": 8.86269744835966e-05, "loss": 1.6682, "step": 1882 }, { "epoch": 0.5715586583700106, "grad_norm": 0.45517250895500183, "learning_rate": 8.862089914945323e-05, "loss": 1.8345, "step": 1883 }, { "epoch": 0.5718621945667021, "grad_norm": 0.4475466310977936, "learning_rate": 8.861482381530985e-05, "loss": 1.9378, "step": 1884 }, { "epoch": 0.5721657307633935, "grad_norm": 0.5567486882209778, "learning_rate": 8.860874848116647e-05, "loss": 1.6444, "step": 1885 }, { "epoch": 0.572469266960085, "grad_norm": 0.3710486590862274, "learning_rate": 8.860267314702308e-05, "loss": 1.6935, "step": 1886 }, { "epoch": 0.5727728031567765, "grad_norm": 0.4086054861545563, "learning_rate": 8.859659781287971e-05, "loss": 1.7854, "step": 1887 }, { "epoch": 0.5730763393534679, "grad_norm": 0.46489015221595764, "learning_rate": 8.859052247873633e-05, "loss": 1.8197, "step": 1888 }, { "epoch": 0.5733798755501593, "grad_norm": 0.7444620132446289, "learning_rate": 8.858444714459295e-05, "loss": 1.53, "step": 1889 }, { "epoch": 0.5736834117468508, "grad_norm": 0.4494125545024872, "learning_rate": 8.857837181044958e-05, "loss": 1.6418, "step": 1890 }, { "epoch": 0.5739869479435422, "grad_norm": 0.6012828946113586, "learning_rate": 8.857229647630621e-05, "loss": 2.3039, "step": 1891 }, { "epoch": 0.5742904841402338, "grad_norm": 0.44922634959220886, "learning_rate": 8.856622114216281e-05, "loss": 1.8881, "step": 1892 }, { "epoch": 0.5745940203369252, "grad_norm": 0.34000277519226074, "learning_rate": 8.856014580801945e-05, "loss": 1.5964, "step": 1893 }, { "epoch": 0.5748975565336166, "grad_norm": 0.4107670485973358, "learning_rate": 8.855407047387608e-05, "loss": 1.9057, "step": 1894 }, { "epoch": 0.5752010927303081, "grad_norm": 0.3938602805137634, "learning_rate": 8.854799513973268e-05, "loss": 2.0193, "step": 1895 }, { "epoch": 0.5755046289269995, "grad_norm": 0.3723643720149994, "learning_rate": 8.854191980558931e-05, "loss": 2.0371, "step": 1896 }, { "epoch": 0.575808165123691, "grad_norm": 0.7747316956520081, "learning_rate": 8.853584447144594e-05, "loss": 1.5783, "step": 1897 }, { "epoch": 0.5761117013203825, "grad_norm": 0.40745773911476135, "learning_rate": 8.852976913730256e-05, "loss": 1.6217, "step": 1898 }, { "epoch": 0.5764152375170739, "grad_norm": 0.363471120595932, "learning_rate": 8.852369380315918e-05, "loss": 1.948, "step": 1899 }, { "epoch": 0.5767187737137653, "grad_norm": 0.3844568133354187, "learning_rate": 8.85176184690158e-05, "loss": 2.0447, "step": 1900 }, { "epoch": 0.5770223099104568, "grad_norm": 0.42804035544395447, "learning_rate": 8.851154313487242e-05, "loss": 1.9196, "step": 1901 }, { "epoch": 0.5773258461071483, "grad_norm": 0.36453336477279663, "learning_rate": 8.850546780072904e-05, "loss": 2.2236, "step": 1902 }, { "epoch": 0.5776293823038398, "grad_norm": 0.41334068775177, "learning_rate": 8.849939246658566e-05, "loss": 1.8657, "step": 1903 }, { "epoch": 0.5779329185005312, "grad_norm": 0.3925778567790985, "learning_rate": 8.849331713244229e-05, "loss": 1.9695, "step": 1904 }, { "epoch": 0.5782364546972226, "grad_norm": 0.39274585247039795, "learning_rate": 8.848724179829892e-05, "loss": 1.664, "step": 1905 }, { "epoch": 0.5785399908939141, "grad_norm": 0.37139561772346497, "learning_rate": 8.848116646415552e-05, "loss": 1.0956, "step": 1906 }, { "epoch": 0.5788435270906056, "grad_norm": 0.4112982451915741, "learning_rate": 8.847509113001216e-05, "loss": 2.0189, "step": 1907 }, { "epoch": 0.579147063287297, "grad_norm": 0.34007617831230164, "learning_rate": 8.846901579586879e-05, "loss": 1.537, "step": 1908 }, { "epoch": 0.5794505994839885, "grad_norm": 0.43591251969337463, "learning_rate": 8.846294046172539e-05, "loss": 1.8668, "step": 1909 }, { "epoch": 0.5797541356806799, "grad_norm": 0.4715147316455841, "learning_rate": 8.845686512758202e-05, "loss": 1.9474, "step": 1910 }, { "epoch": 0.5800576718773713, "grad_norm": 0.5986727476119995, "learning_rate": 8.845078979343865e-05, "loss": 1.9555, "step": 1911 }, { "epoch": 0.5803612080740629, "grad_norm": 0.43499329686164856, "learning_rate": 8.844471445929527e-05, "loss": 1.8719, "step": 1912 }, { "epoch": 0.5806647442707543, "grad_norm": 0.4152344763278961, "learning_rate": 8.843863912515189e-05, "loss": 2.0322, "step": 1913 }, { "epoch": 0.5809682804674458, "grad_norm": 0.4037158787250519, "learning_rate": 8.84325637910085e-05, "loss": 1.9687, "step": 1914 }, { "epoch": 0.5812718166641372, "grad_norm": 0.4261537492275238, "learning_rate": 8.842648845686513e-05, "loss": 1.9674, "step": 1915 }, { "epoch": 0.5815753528608286, "grad_norm": 0.3880082070827484, "learning_rate": 8.842041312272175e-05, "loss": 1.7925, "step": 1916 }, { "epoch": 0.58187888905752, "grad_norm": 0.7090932130813599, "learning_rate": 8.841433778857837e-05, "loss": 1.8392, "step": 1917 }, { "epoch": 0.5821824252542116, "grad_norm": 0.4407334625720978, "learning_rate": 8.8408262454435e-05, "loss": 1.9389, "step": 1918 }, { "epoch": 0.582485961450903, "grad_norm": 0.40139150619506836, "learning_rate": 8.840218712029162e-05, "loss": 2.0225, "step": 1919 }, { "epoch": 0.5827894976475945, "grad_norm": 0.7051631212234497, "learning_rate": 8.839611178614823e-05, "loss": 1.9287, "step": 1920 }, { "epoch": 0.5830930338442859, "grad_norm": 0.4037090241909027, "learning_rate": 8.839003645200487e-05, "loss": 1.7706, "step": 1921 }, { "epoch": 0.5833965700409773, "grad_norm": 0.4044518768787384, "learning_rate": 8.83839611178615e-05, "loss": 1.9106, "step": 1922 }, { "epoch": 0.5837001062376689, "grad_norm": 0.5114139914512634, "learning_rate": 8.83778857837181e-05, "loss": 2.0017, "step": 1923 }, { "epoch": 0.5840036424343603, "grad_norm": 0.39643585681915283, "learning_rate": 8.837181044957473e-05, "loss": 2.0096, "step": 1924 }, { "epoch": 0.5843071786310517, "grad_norm": 0.4566240608692169, "learning_rate": 8.836573511543136e-05, "loss": 2.0962, "step": 1925 }, { "epoch": 0.5846107148277432, "grad_norm": 0.37759748101234436, "learning_rate": 8.835965978128798e-05, "loss": 1.6786, "step": 1926 }, { "epoch": 0.5849142510244346, "grad_norm": 0.37798550724983215, "learning_rate": 8.83535844471446e-05, "loss": 2.1075, "step": 1927 }, { "epoch": 0.5852177872211262, "grad_norm": 0.40494439005851746, "learning_rate": 8.834750911300121e-05, "loss": 1.7167, "step": 1928 }, { "epoch": 0.5855213234178176, "grad_norm": 0.3333325684070587, "learning_rate": 8.834143377885784e-05, "loss": 1.9133, "step": 1929 }, { "epoch": 0.585824859614509, "grad_norm": 0.3827350437641144, "learning_rate": 8.833535844471446e-05, "loss": 1.8537, "step": 1930 }, { "epoch": 0.5861283958112005, "grad_norm": 0.4088849127292633, "learning_rate": 8.832928311057108e-05, "loss": 1.9625, "step": 1931 }, { "epoch": 0.5864319320078919, "grad_norm": 0.3575502932071686, "learning_rate": 8.832320777642771e-05, "loss": 1.851, "step": 1932 }, { "epoch": 0.5867354682045834, "grad_norm": 0.38579368591308594, "learning_rate": 8.831713244228433e-05, "loss": 1.8121, "step": 1933 }, { "epoch": 0.5870390044012749, "grad_norm": 0.37787890434265137, "learning_rate": 8.831105710814094e-05, "loss": 1.9509, "step": 1934 }, { "epoch": 0.5873425405979663, "grad_norm": 0.4074660837650299, "learning_rate": 8.830498177399758e-05, "loss": 1.9987, "step": 1935 }, { "epoch": 0.5876460767946577, "grad_norm": 0.7902248501777649, "learning_rate": 8.82989064398542e-05, "loss": 1.6704, "step": 1936 }, { "epoch": 0.5879496129913492, "grad_norm": 0.3240687847137451, "learning_rate": 8.829283110571081e-05, "loss": 1.1956, "step": 1937 }, { "epoch": 0.5882531491880407, "grad_norm": 0.410543829202652, "learning_rate": 8.828675577156744e-05, "loss": 1.9533, "step": 1938 }, { "epoch": 0.5885566853847322, "grad_norm": 0.4559386670589447, "learning_rate": 8.828068043742406e-05, "loss": 1.566, "step": 1939 }, { "epoch": 0.5888602215814236, "grad_norm": 0.44418251514434814, "learning_rate": 8.827460510328069e-05, "loss": 1.814, "step": 1940 }, { "epoch": 0.589163757778115, "grad_norm": 0.42374011874198914, "learning_rate": 8.82685297691373e-05, "loss": 1.8946, "step": 1941 }, { "epoch": 0.5894672939748065, "grad_norm": 0.44734686613082886, "learning_rate": 8.826245443499392e-05, "loss": 1.9893, "step": 1942 }, { "epoch": 0.589770830171498, "grad_norm": 0.42960959672927856, "learning_rate": 8.825637910085055e-05, "loss": 1.5659, "step": 1943 }, { "epoch": 0.5900743663681894, "grad_norm": 0.44513779878616333, "learning_rate": 8.825030376670717e-05, "loss": 1.969, "step": 1944 }, { "epoch": 0.5903779025648809, "grad_norm": 0.39732202887535095, "learning_rate": 8.824422843256379e-05, "loss": 1.9469, "step": 1945 }, { "epoch": 0.5906814387615723, "grad_norm": 0.490384042263031, "learning_rate": 8.823815309842042e-05, "loss": 1.7434, "step": 1946 }, { "epoch": 0.5909849749582637, "grad_norm": 0.5644544959068298, "learning_rate": 8.823207776427704e-05, "loss": 2.1488, "step": 1947 }, { "epoch": 0.5912885111549552, "grad_norm": 0.43499046564102173, "learning_rate": 8.822600243013366e-05, "loss": 1.6023, "step": 1948 }, { "epoch": 0.5915920473516467, "grad_norm": 0.3970509469509125, "learning_rate": 8.821992709599029e-05, "loss": 1.5955, "step": 1949 }, { "epoch": 0.5918955835483382, "grad_norm": 0.39471563696861267, "learning_rate": 8.821385176184692e-05, "loss": 1.9294, "step": 1950 }, { "epoch": 0.5921991197450296, "grad_norm": 0.42955949902534485, "learning_rate": 8.820777642770352e-05, "loss": 1.9628, "step": 1951 }, { "epoch": 0.592502655941721, "grad_norm": 0.3734053373336792, "learning_rate": 8.820170109356015e-05, "loss": 1.9927, "step": 1952 }, { "epoch": 0.5928061921384125, "grad_norm": 0.40868285298347473, "learning_rate": 8.819562575941677e-05, "loss": 2.0704, "step": 1953 }, { "epoch": 0.593109728335104, "grad_norm": 0.4374091625213623, "learning_rate": 8.81895504252734e-05, "loss": 1.7447, "step": 1954 }, { "epoch": 0.5934132645317954, "grad_norm": 0.408299058675766, "learning_rate": 8.818347509113002e-05, "loss": 2.076, "step": 1955 }, { "epoch": 0.5937168007284869, "grad_norm": 0.4676043391227722, "learning_rate": 8.817739975698663e-05, "loss": 1.9319, "step": 1956 }, { "epoch": 0.5940203369251783, "grad_norm": 1.3173327445983887, "learning_rate": 8.817132442284326e-05, "loss": 1.6364, "step": 1957 }, { "epoch": 0.5943238731218697, "grad_norm": 0.39462506771087646, "learning_rate": 8.816524908869988e-05, "loss": 2.1088, "step": 1958 }, { "epoch": 0.5946274093185613, "grad_norm": 0.37660276889801025, "learning_rate": 8.81591737545565e-05, "loss": 1.641, "step": 1959 }, { "epoch": 0.5949309455152527, "grad_norm": 0.3797924518585205, "learning_rate": 8.815309842041313e-05, "loss": 1.4328, "step": 1960 }, { "epoch": 0.5952344817119442, "grad_norm": 0.33881229162216187, "learning_rate": 8.814702308626975e-05, "loss": 1.6713, "step": 1961 }, { "epoch": 0.5955380179086356, "grad_norm": 0.43969300389289856, "learning_rate": 8.814094775212637e-05, "loss": 1.8251, "step": 1962 }, { "epoch": 0.595841554105327, "grad_norm": 0.39608824253082275, "learning_rate": 8.8134872417983e-05, "loss": 2.0762, "step": 1963 }, { "epoch": 0.5961450903020186, "grad_norm": 0.3688305914402008, "learning_rate": 8.812879708383963e-05, "loss": 1.886, "step": 1964 }, { "epoch": 0.59644862649871, "grad_norm": 0.3397257328033447, "learning_rate": 8.812272174969623e-05, "loss": 1.8668, "step": 1965 }, { "epoch": 0.5967521626954014, "grad_norm": 0.39257940649986267, "learning_rate": 8.811664641555286e-05, "loss": 1.879, "step": 1966 }, { "epoch": 0.5970556988920929, "grad_norm": 0.41007375717163086, "learning_rate": 8.811057108140948e-05, "loss": 1.7411, "step": 1967 }, { "epoch": 0.5973592350887843, "grad_norm": 0.3694823682308197, "learning_rate": 8.81044957472661e-05, "loss": 1.7675, "step": 1968 }, { "epoch": 0.5976627712854758, "grad_norm": 0.9148819446563721, "learning_rate": 8.809842041312273e-05, "loss": 2.0984, "step": 1969 }, { "epoch": 0.5979663074821673, "grad_norm": 0.379384309053421, "learning_rate": 8.809234507897934e-05, "loss": 2.1933, "step": 1970 }, { "epoch": 0.5982698436788587, "grad_norm": 0.5637233257293701, "learning_rate": 8.808626974483598e-05, "loss": 1.2066, "step": 1971 }, { "epoch": 0.5985733798755501, "grad_norm": 0.42961108684539795, "learning_rate": 8.808019441069259e-05, "loss": 1.6595, "step": 1972 }, { "epoch": 0.5988769160722416, "grad_norm": 0.41248828172683716, "learning_rate": 8.807411907654921e-05, "loss": 2.0266, "step": 1973 }, { "epoch": 0.599180452268933, "grad_norm": 0.41730985045433044, "learning_rate": 8.806804374240584e-05, "loss": 2.0968, "step": 1974 }, { "epoch": 0.5994839884656246, "grad_norm": 0.4452510178089142, "learning_rate": 8.806196840826246e-05, "loss": 1.6038, "step": 1975 }, { "epoch": 0.599787524662316, "grad_norm": 0.457256942987442, "learning_rate": 8.805589307411908e-05, "loss": 2.0862, "step": 1976 }, { "epoch": 0.6000910608590074, "grad_norm": 0.38506418466567993, "learning_rate": 8.80498177399757e-05, "loss": 1.6764, "step": 1977 }, { "epoch": 0.6003945970556989, "grad_norm": 0.4200589060783386, "learning_rate": 8.804374240583234e-05, "loss": 2.0962, "step": 1978 }, { "epoch": 0.6006981332523903, "grad_norm": 0.41140785813331604, "learning_rate": 8.803766707168894e-05, "loss": 1.7345, "step": 1979 }, { "epoch": 0.6010016694490818, "grad_norm": 0.3584011495113373, "learning_rate": 8.803159173754557e-05, "loss": 2.1839, "step": 1980 }, { "epoch": 0.6013052056457733, "grad_norm": 0.40637287497520447, "learning_rate": 8.802551640340219e-05, "loss": 2.0997, "step": 1981 }, { "epoch": 0.6016087418424647, "grad_norm": 0.42887794971466064, "learning_rate": 8.80194410692588e-05, "loss": 1.967, "step": 1982 }, { "epoch": 0.6019122780391561, "grad_norm": 0.42879635095596313, "learning_rate": 8.801336573511544e-05, "loss": 2.0618, "step": 1983 }, { "epoch": 0.6022158142358476, "grad_norm": 0.5477713346481323, "learning_rate": 8.800729040097205e-05, "loss": 1.3743, "step": 1984 }, { "epoch": 0.6025193504325391, "grad_norm": 0.3772994875907898, "learning_rate": 8.800121506682869e-05, "loss": 1.5254, "step": 1985 }, { "epoch": 0.6028228866292306, "grad_norm": 0.4140057861804962, "learning_rate": 8.79951397326853e-05, "loss": 2.164, "step": 1986 }, { "epoch": 0.603126422825922, "grad_norm": 0.44529426097869873, "learning_rate": 8.798906439854192e-05, "loss": 1.8042, "step": 1987 }, { "epoch": 0.6034299590226134, "grad_norm": 0.39523541927337646, "learning_rate": 8.798298906439855e-05, "loss": 1.8134, "step": 1988 }, { "epoch": 0.6037334952193049, "grad_norm": 0.38513484597206116, "learning_rate": 8.797691373025517e-05, "loss": 1.8259, "step": 1989 }, { "epoch": 0.6040370314159964, "grad_norm": 0.4686470329761505, "learning_rate": 8.797083839611179e-05, "loss": 2.2113, "step": 1990 }, { "epoch": 0.6043405676126878, "grad_norm": 0.4119713008403778, "learning_rate": 8.796476306196842e-05, "loss": 1.9614, "step": 1991 }, { "epoch": 0.6046441038093793, "grad_norm": 0.40786707401275635, "learning_rate": 8.795868772782503e-05, "loss": 1.9369, "step": 1992 }, { "epoch": 0.6049476400060707, "grad_norm": 0.3869648873806, "learning_rate": 8.795261239368165e-05, "loss": 2.1204, "step": 1993 }, { "epoch": 0.6052511762027621, "grad_norm": 0.3826451301574707, "learning_rate": 8.794653705953828e-05, "loss": 2.0715, "step": 1994 }, { "epoch": 0.6055547123994537, "grad_norm": 0.38412514328956604, "learning_rate": 8.79404617253949e-05, "loss": 1.9482, "step": 1995 }, { "epoch": 0.6058582485961451, "grad_norm": 0.4388350248336792, "learning_rate": 8.793438639125152e-05, "loss": 1.9746, "step": 1996 }, { "epoch": 0.6061617847928366, "grad_norm": 0.3750387132167816, "learning_rate": 8.792831105710815e-05, "loss": 1.7831, "step": 1997 }, { "epoch": 0.606465320989528, "grad_norm": 0.42686113715171814, "learning_rate": 8.792223572296476e-05, "loss": 1.6419, "step": 1998 }, { "epoch": 0.6067688571862194, "grad_norm": 0.39653515815734863, "learning_rate": 8.79161603888214e-05, "loss": 1.7617, "step": 1999 }, { "epoch": 0.6070723933829109, "grad_norm": 0.4662545621395111, "learning_rate": 8.791008505467801e-05, "loss": 1.878, "step": 2000 }, { "epoch": 0.6073759295796024, "grad_norm": 0.4733245074748993, "learning_rate": 8.790400972053463e-05, "loss": 1.874, "step": 2001 }, { "epoch": 0.6076794657762938, "grad_norm": 0.4228340983390808, "learning_rate": 8.789793438639126e-05, "loss": 1.9415, "step": 2002 }, { "epoch": 0.6079830019729853, "grad_norm": 0.4229651391506195, "learning_rate": 8.789185905224788e-05, "loss": 1.5762, "step": 2003 }, { "epoch": 0.6082865381696767, "grad_norm": 0.4287284016609192, "learning_rate": 8.78857837181045e-05, "loss": 2.1414, "step": 2004 }, { "epoch": 0.6085900743663681, "grad_norm": 0.555934488773346, "learning_rate": 8.787970838396113e-05, "loss": 1.9591, "step": 2005 }, { "epoch": 0.6088936105630597, "grad_norm": 0.42303675413131714, "learning_rate": 8.787363304981774e-05, "loss": 1.5513, "step": 2006 }, { "epoch": 0.6091971467597511, "grad_norm": 0.37572699785232544, "learning_rate": 8.786755771567436e-05, "loss": 1.9508, "step": 2007 }, { "epoch": 0.6095006829564426, "grad_norm": 0.3933078944683075, "learning_rate": 8.786148238153099e-05, "loss": 1.9507, "step": 2008 }, { "epoch": 0.609804219153134, "grad_norm": 0.46419456601142883, "learning_rate": 8.785540704738761e-05, "loss": 2.0019, "step": 2009 }, { "epoch": 0.6101077553498254, "grad_norm": 0.38383206725120544, "learning_rate": 8.784933171324423e-05, "loss": 1.9533, "step": 2010 }, { "epoch": 0.610411291546517, "grad_norm": 0.37486881017684937, "learning_rate": 8.784325637910086e-05, "loss": 1.9508, "step": 2011 }, { "epoch": 0.6107148277432084, "grad_norm": 0.34909558296203613, "learning_rate": 8.783718104495747e-05, "loss": 1.8833, "step": 2012 }, { "epoch": 0.6110183639398998, "grad_norm": 0.6226269006729126, "learning_rate": 8.78311057108141e-05, "loss": 2.1135, "step": 2013 }, { "epoch": 0.6113219001365913, "grad_norm": 0.45638999342918396, "learning_rate": 8.782503037667072e-05, "loss": 1.8989, "step": 2014 }, { "epoch": 0.6116254363332827, "grad_norm": 0.41857293248176575, "learning_rate": 8.781895504252734e-05, "loss": 1.7609, "step": 2015 }, { "epoch": 0.6119289725299742, "grad_norm": 0.4325519800186157, "learning_rate": 8.781287970838397e-05, "loss": 1.6411, "step": 2016 }, { "epoch": 0.6122325087266657, "grad_norm": 0.3558877110481262, "learning_rate": 8.780680437424059e-05, "loss": 1.7634, "step": 2017 }, { "epoch": 0.6125360449233571, "grad_norm": 0.42849549651145935, "learning_rate": 8.78007290400972e-05, "loss": 1.8625, "step": 2018 }, { "epoch": 0.6128395811200485, "grad_norm": 0.7057125568389893, "learning_rate": 8.779465370595384e-05, "loss": 2.0542, "step": 2019 }, { "epoch": 0.61314311731674, "grad_norm": 0.3607623279094696, "learning_rate": 8.778857837181045e-05, "loss": 2.0613, "step": 2020 }, { "epoch": 0.6134466535134315, "grad_norm": 0.35904109477996826, "learning_rate": 8.778250303766707e-05, "loss": 2.1633, "step": 2021 }, { "epoch": 0.613750189710123, "grad_norm": 0.38341954350471497, "learning_rate": 8.77764277035237e-05, "loss": 1.925, "step": 2022 }, { "epoch": 0.6140537259068144, "grad_norm": 0.8183413743972778, "learning_rate": 8.777035236938032e-05, "loss": 1.5858, "step": 2023 }, { "epoch": 0.6143572621035058, "grad_norm": 0.4051649272441864, "learning_rate": 8.776427703523694e-05, "loss": 1.9788, "step": 2024 }, { "epoch": 0.6146607983001973, "grad_norm": 0.40388303995132446, "learning_rate": 8.775820170109357e-05, "loss": 1.9113, "step": 2025 }, { "epoch": 0.6149643344968888, "grad_norm": 0.38880276679992676, "learning_rate": 8.775212636695018e-05, "loss": 1.7535, "step": 2026 }, { "epoch": 0.6152678706935802, "grad_norm": 0.41596999764442444, "learning_rate": 8.774605103280682e-05, "loss": 1.9532, "step": 2027 }, { "epoch": 0.6155714068902717, "grad_norm": 0.3971737325191498, "learning_rate": 8.773997569866343e-05, "loss": 1.6123, "step": 2028 }, { "epoch": 0.6158749430869631, "grad_norm": 0.610409140586853, "learning_rate": 8.773390036452005e-05, "loss": 1.969, "step": 2029 }, { "epoch": 0.6161784792836545, "grad_norm": 0.4366918206214905, "learning_rate": 8.772782503037668e-05, "loss": 1.7944, "step": 2030 }, { "epoch": 0.616482015480346, "grad_norm": 0.3931274712085724, "learning_rate": 8.77217496962333e-05, "loss": 2.0559, "step": 2031 }, { "epoch": 0.6167855516770375, "grad_norm": 0.556197464466095, "learning_rate": 8.771567436208992e-05, "loss": 1.6191, "step": 2032 }, { "epoch": 0.617089087873729, "grad_norm": 0.4099692404270172, "learning_rate": 8.770959902794655e-05, "loss": 1.6357, "step": 2033 }, { "epoch": 0.6173926240704204, "grad_norm": 0.6582362055778503, "learning_rate": 8.770352369380316e-05, "loss": 2.1112, "step": 2034 }, { "epoch": 0.6176961602671118, "grad_norm": 0.43522998690605164, "learning_rate": 8.769744835965978e-05, "loss": 2.078, "step": 2035 }, { "epoch": 0.6179996964638033, "grad_norm": 0.3984440565109253, "learning_rate": 8.769137302551641e-05, "loss": 1.9548, "step": 2036 }, { "epoch": 0.6183032326604948, "grad_norm": 0.4203691780567169, "learning_rate": 8.768529769137303e-05, "loss": 2.0214, "step": 2037 }, { "epoch": 0.6186067688571862, "grad_norm": 0.4662054181098938, "learning_rate": 8.767922235722965e-05, "loss": 1.8141, "step": 2038 }, { "epoch": 0.6189103050538777, "grad_norm": 0.440121591091156, "learning_rate": 8.767314702308628e-05, "loss": 1.8577, "step": 2039 }, { "epoch": 0.6192138412505691, "grad_norm": 0.4438299536705017, "learning_rate": 8.76670716889429e-05, "loss": 1.5552, "step": 2040 }, { "epoch": 0.6195173774472605, "grad_norm": 0.3925747871398926, "learning_rate": 8.766099635479951e-05, "loss": 1.9399, "step": 2041 }, { "epoch": 0.6198209136439521, "grad_norm": 0.4043785333633423, "learning_rate": 8.765492102065614e-05, "loss": 1.9208, "step": 2042 }, { "epoch": 0.6201244498406435, "grad_norm": 0.4448244273662567, "learning_rate": 8.764884568651276e-05, "loss": 1.6187, "step": 2043 }, { "epoch": 0.620427986037335, "grad_norm": 0.5388829112052917, "learning_rate": 8.764277035236939e-05, "loss": 1.4629, "step": 2044 }, { "epoch": 0.6207315222340264, "grad_norm": 0.3737129867076874, "learning_rate": 8.763669501822601e-05, "loss": 1.9238, "step": 2045 }, { "epoch": 0.6210350584307178, "grad_norm": 0.4435792863368988, "learning_rate": 8.763061968408263e-05, "loss": 1.746, "step": 2046 }, { "epoch": 0.6213385946274094, "grad_norm": 0.3660859167575836, "learning_rate": 8.762454434993926e-05, "loss": 1.9025, "step": 2047 }, { "epoch": 0.6216421308241008, "grad_norm": 0.6945536136627197, "learning_rate": 8.761846901579587e-05, "loss": 2.0719, "step": 2048 }, { "epoch": 0.6219456670207922, "grad_norm": 0.5578482151031494, "learning_rate": 8.761239368165249e-05, "loss": 2.0642, "step": 2049 }, { "epoch": 0.6222492032174837, "grad_norm": 0.38080549240112305, "learning_rate": 8.760631834750912e-05, "loss": 1.9905, "step": 2050 }, { "epoch": 0.6225527394141751, "grad_norm": 0.39509710669517517, "learning_rate": 8.760024301336574e-05, "loss": 1.8086, "step": 2051 }, { "epoch": 0.6228562756108666, "grad_norm": 0.39778873324394226, "learning_rate": 8.759416767922236e-05, "loss": 1.8397, "step": 2052 }, { "epoch": 0.6231598118075581, "grad_norm": 0.4001278877258301, "learning_rate": 8.758809234507899e-05, "loss": 2.1541, "step": 2053 }, { "epoch": 0.6234633480042495, "grad_norm": 0.41478973627090454, "learning_rate": 8.75820170109356e-05, "loss": 1.8661, "step": 2054 }, { "epoch": 0.623766884200941, "grad_norm": 0.44780445098876953, "learning_rate": 8.757594167679222e-05, "loss": 2.06, "step": 2055 }, { "epoch": 0.6240704203976324, "grad_norm": 0.4024375081062317, "learning_rate": 8.756986634264885e-05, "loss": 1.8951, "step": 2056 }, { "epoch": 0.6243739565943238, "grad_norm": 0.48133009672164917, "learning_rate": 8.756379100850547e-05, "loss": 1.8264, "step": 2057 }, { "epoch": 0.6246774927910154, "grad_norm": 0.4362419843673706, "learning_rate": 8.75577156743621e-05, "loss": 1.584, "step": 2058 }, { "epoch": 0.6249810289877068, "grad_norm": 0.39468279480934143, "learning_rate": 8.755164034021872e-05, "loss": 2.1541, "step": 2059 }, { "epoch": 0.6252845651843982, "grad_norm": 0.3956018388271332, "learning_rate": 8.754556500607534e-05, "loss": 2.0688, "step": 2060 }, { "epoch": 0.6255881013810897, "grad_norm": 0.3778972327709198, "learning_rate": 8.753948967193197e-05, "loss": 1.2158, "step": 2061 }, { "epoch": 0.6258916375777811, "grad_norm": 0.6592405438423157, "learning_rate": 8.753341433778858e-05, "loss": 2.1388, "step": 2062 }, { "epoch": 0.6261951737744726, "grad_norm": 0.44248607754707336, "learning_rate": 8.75273390036452e-05, "loss": 1.7007, "step": 2063 }, { "epoch": 0.6264987099711641, "grad_norm": 0.40454086661338806, "learning_rate": 8.752126366950183e-05, "loss": 1.9904, "step": 2064 }, { "epoch": 0.6268022461678555, "grad_norm": 0.4150254428386688, "learning_rate": 8.751518833535845e-05, "loss": 1.825, "step": 2065 }, { "epoch": 0.627105782364547, "grad_norm": 0.39456769824028015, "learning_rate": 8.750911300121507e-05, "loss": 1.6171, "step": 2066 }, { "epoch": 0.6274093185612384, "grad_norm": 0.42913463711738586, "learning_rate": 8.75030376670717e-05, "loss": 1.9738, "step": 2067 }, { "epoch": 0.6277128547579299, "grad_norm": 0.6062834858894348, "learning_rate": 8.749696233292831e-05, "loss": 2.3641, "step": 2068 }, { "epoch": 0.6280163909546214, "grad_norm": 0.4486273229122162, "learning_rate": 8.749088699878493e-05, "loss": 1.895, "step": 2069 }, { "epoch": 0.6283199271513128, "grad_norm": 0.6650506854057312, "learning_rate": 8.748481166464156e-05, "loss": 1.9425, "step": 2070 }, { "epoch": 0.6286234633480042, "grad_norm": 0.4337095618247986, "learning_rate": 8.747873633049818e-05, "loss": 1.6244, "step": 2071 }, { "epoch": 0.6289269995446957, "grad_norm": 0.39554956555366516, "learning_rate": 8.747266099635481e-05, "loss": 1.9773, "step": 2072 }, { "epoch": 0.6292305357413872, "grad_norm": 0.6905329823493958, "learning_rate": 8.746658566221143e-05, "loss": 1.4572, "step": 2073 }, { "epoch": 0.6295340719380786, "grad_norm": 0.4814346730709076, "learning_rate": 8.746051032806805e-05, "loss": 1.5925, "step": 2074 }, { "epoch": 0.6298376081347701, "grad_norm": 0.5194016695022583, "learning_rate": 8.745443499392468e-05, "loss": 1.9103, "step": 2075 }, { "epoch": 0.6301411443314615, "grad_norm": 0.38328269124031067, "learning_rate": 8.74483596597813e-05, "loss": 2.0036, "step": 2076 }, { "epoch": 0.630444680528153, "grad_norm": 0.3967950642108917, "learning_rate": 8.744228432563791e-05, "loss": 2.0632, "step": 2077 }, { "epoch": 0.6307482167248445, "grad_norm": 0.41844338178634644, "learning_rate": 8.743620899149454e-05, "loss": 1.7965, "step": 2078 }, { "epoch": 0.6310517529215359, "grad_norm": 0.4322264790534973, "learning_rate": 8.743013365735116e-05, "loss": 1.6439, "step": 2079 }, { "epoch": 0.6313552891182274, "grad_norm": 1.2367935180664062, "learning_rate": 8.742405832320778e-05, "loss": 1.4027, "step": 2080 }, { "epoch": 0.6316588253149188, "grad_norm": 0.40163764357566833, "learning_rate": 8.741798298906441e-05, "loss": 1.6465, "step": 2081 }, { "epoch": 0.6319623615116102, "grad_norm": 0.4429662823677063, "learning_rate": 8.741190765492102e-05, "loss": 2.0381, "step": 2082 }, { "epoch": 0.6322658977083017, "grad_norm": 0.4150178134441376, "learning_rate": 8.740583232077764e-05, "loss": 1.9406, "step": 2083 }, { "epoch": 0.6325694339049932, "grad_norm": 1.1689107418060303, "learning_rate": 8.739975698663427e-05, "loss": 2.1465, "step": 2084 }, { "epoch": 0.6328729701016846, "grad_norm": 0.39959049224853516, "learning_rate": 8.739368165249089e-05, "loss": 1.7467, "step": 2085 }, { "epoch": 0.6331765062983761, "grad_norm": 0.4441443979740143, "learning_rate": 8.738760631834752e-05, "loss": 1.3684, "step": 2086 }, { "epoch": 0.6334800424950675, "grad_norm": 0.42959195375442505, "learning_rate": 8.738153098420414e-05, "loss": 2.0832, "step": 2087 }, { "epoch": 0.6337835786917589, "grad_norm": 0.5253334045410156, "learning_rate": 8.737545565006076e-05, "loss": 1.83, "step": 2088 }, { "epoch": 0.6340871148884505, "grad_norm": 0.4475717842578888, "learning_rate": 8.736938031591739e-05, "loss": 1.9088, "step": 2089 }, { "epoch": 0.6343906510851419, "grad_norm": 0.4162061810493469, "learning_rate": 8.736330498177399e-05, "loss": 1.6869, "step": 2090 }, { "epoch": 0.6346941872818334, "grad_norm": 0.41907912492752075, "learning_rate": 8.735722964763062e-05, "loss": 1.6695, "step": 2091 }, { "epoch": 0.6349977234785248, "grad_norm": 0.4472843110561371, "learning_rate": 8.735115431348725e-05, "loss": 2.213, "step": 2092 }, { "epoch": 0.6353012596752162, "grad_norm": 0.4260854125022888, "learning_rate": 8.734507897934387e-05, "loss": 2.2187, "step": 2093 }, { "epoch": 0.6356047958719078, "grad_norm": 0.5154047608375549, "learning_rate": 8.733900364520049e-05, "loss": 1.669, "step": 2094 }, { "epoch": 0.6359083320685992, "grad_norm": 0.42840951681137085, "learning_rate": 8.733292831105712e-05, "loss": 1.5714, "step": 2095 }, { "epoch": 0.6362118682652906, "grad_norm": 0.3721560537815094, "learning_rate": 8.732685297691373e-05, "loss": 1.8571, "step": 2096 }, { "epoch": 0.6365154044619821, "grad_norm": 0.38668882846832275, "learning_rate": 8.732077764277035e-05, "loss": 1.9735, "step": 2097 }, { "epoch": 0.6368189406586735, "grad_norm": 0.44400742650032043, "learning_rate": 8.731470230862698e-05, "loss": 1.8012, "step": 2098 }, { "epoch": 0.637122476855365, "grad_norm": 0.4170168936252594, "learning_rate": 8.73086269744836e-05, "loss": 2.0064, "step": 2099 }, { "epoch": 0.6374260130520565, "grad_norm": 0.4339911937713623, "learning_rate": 8.730255164034023e-05, "loss": 1.8095, "step": 2100 }, { "epoch": 0.6377295492487479, "grad_norm": 0.4953417479991913, "learning_rate": 8.729647630619685e-05, "loss": 1.6443, "step": 2101 }, { "epoch": 0.6380330854454394, "grad_norm": 0.43667685985565186, "learning_rate": 8.729040097205347e-05, "loss": 1.5005, "step": 2102 }, { "epoch": 0.6383366216421308, "grad_norm": 0.42101868987083435, "learning_rate": 8.72843256379101e-05, "loss": 1.9, "step": 2103 }, { "epoch": 0.6386401578388223, "grad_norm": 0.4094242751598358, "learning_rate": 8.72782503037667e-05, "loss": 1.4354, "step": 2104 }, { "epoch": 0.6389436940355138, "grad_norm": 0.36078140139579773, "learning_rate": 8.727217496962333e-05, "loss": 1.9761, "step": 2105 }, { "epoch": 0.6392472302322052, "grad_norm": 0.40915626287460327, "learning_rate": 8.726609963547996e-05, "loss": 1.7398, "step": 2106 }, { "epoch": 0.6395507664288966, "grad_norm": 0.4518681466579437, "learning_rate": 8.726002430133658e-05, "loss": 1.6962, "step": 2107 }, { "epoch": 0.6398543026255881, "grad_norm": 0.41864755749702454, "learning_rate": 8.72539489671932e-05, "loss": 1.7477, "step": 2108 }, { "epoch": 0.6401578388222796, "grad_norm": 0.37776780128479004, "learning_rate": 8.724787363304983e-05, "loss": 1.4601, "step": 2109 }, { "epoch": 0.640461375018971, "grad_norm": 0.4602903723716736, "learning_rate": 8.724179829890644e-05, "loss": 1.7791, "step": 2110 }, { "epoch": 0.6407649112156625, "grad_norm": 0.3697658181190491, "learning_rate": 8.723572296476306e-05, "loss": 1.8286, "step": 2111 }, { "epoch": 0.6410684474123539, "grad_norm": 0.3810010254383087, "learning_rate": 8.722964763061969e-05, "loss": 1.6823, "step": 2112 }, { "epoch": 0.6413719836090453, "grad_norm": 0.3020067811012268, "learning_rate": 8.722357229647631e-05, "loss": 1.4566, "step": 2113 }, { "epoch": 0.6416755198057368, "grad_norm": 0.35782814025878906, "learning_rate": 8.721749696233293e-05, "loss": 1.986, "step": 2114 }, { "epoch": 0.6419790560024283, "grad_norm": 0.4075436294078827, "learning_rate": 8.721142162818954e-05, "loss": 2.0318, "step": 2115 }, { "epoch": 0.6422825921991198, "grad_norm": 0.3835841715335846, "learning_rate": 8.720534629404618e-05, "loss": 1.7135, "step": 2116 }, { "epoch": 0.6425861283958112, "grad_norm": 0.45285987854003906, "learning_rate": 8.71992709599028e-05, "loss": 1.9568, "step": 2117 }, { "epoch": 0.6428896645925026, "grad_norm": 0.36824312806129456, "learning_rate": 8.719319562575941e-05, "loss": 1.6443, "step": 2118 }, { "epoch": 0.6431932007891941, "grad_norm": 0.4950961172580719, "learning_rate": 8.718712029161604e-05, "loss": 2.1261, "step": 2119 }, { "epoch": 0.6434967369858856, "grad_norm": 0.36859118938446045, "learning_rate": 8.718104495747267e-05, "loss": 2.0044, "step": 2120 }, { "epoch": 0.643800273182577, "grad_norm": 0.43870800733566284, "learning_rate": 8.717496962332929e-05, "loss": 1.9407, "step": 2121 }, { "epoch": 0.6441038093792685, "grad_norm": 0.37381303310394287, "learning_rate": 8.71688942891859e-05, "loss": 2.1765, "step": 2122 }, { "epoch": 0.6444073455759599, "grad_norm": 0.39354661107063293, "learning_rate": 8.716281895504254e-05, "loss": 1.7037, "step": 2123 }, { "epoch": 0.6447108817726513, "grad_norm": 0.3997972011566162, "learning_rate": 8.715674362089915e-05, "loss": 1.9634, "step": 2124 }, { "epoch": 0.6450144179693429, "grad_norm": 0.4059608280658722, "learning_rate": 8.715066828675577e-05, "loss": 1.6695, "step": 2125 }, { "epoch": 0.6453179541660343, "grad_norm": 0.5082445740699768, "learning_rate": 8.71445929526124e-05, "loss": 1.8471, "step": 2126 }, { "epoch": 0.6456214903627258, "grad_norm": 0.3610053062438965, "learning_rate": 8.713851761846902e-05, "loss": 1.1613, "step": 2127 }, { "epoch": 0.6459250265594172, "grad_norm": 0.3617028295993805, "learning_rate": 8.713244228432564e-05, "loss": 2.1392, "step": 2128 }, { "epoch": 0.6462285627561086, "grad_norm": 0.366720587015152, "learning_rate": 8.712636695018225e-05, "loss": 2.0723, "step": 2129 }, { "epoch": 0.6465320989528002, "grad_norm": 0.6331523656845093, "learning_rate": 8.712029161603889e-05, "loss": 2.167, "step": 2130 }, { "epoch": 0.6468356351494916, "grad_norm": 0.3837411403656006, "learning_rate": 8.711421628189552e-05, "loss": 1.9423, "step": 2131 }, { "epoch": 0.647139171346183, "grad_norm": 0.49465852975845337, "learning_rate": 8.710814094775212e-05, "loss": 1.1904, "step": 2132 }, { "epoch": 0.6474427075428745, "grad_norm": 0.37504327297210693, "learning_rate": 8.710206561360875e-05, "loss": 1.3686, "step": 2133 }, { "epoch": 0.6477462437395659, "grad_norm": 0.7189307808876038, "learning_rate": 8.709599027946538e-05, "loss": 2.2359, "step": 2134 }, { "epoch": 0.6480497799362575, "grad_norm": 0.40414321422576904, "learning_rate": 8.7089914945322e-05, "loss": 1.9962, "step": 2135 }, { "epoch": 0.6483533161329489, "grad_norm": 1.6091177463531494, "learning_rate": 8.708383961117862e-05, "loss": 2.1011, "step": 2136 }, { "epoch": 0.6486568523296403, "grad_norm": 0.38812699913978577, "learning_rate": 8.707776427703525e-05, "loss": 1.8092, "step": 2137 }, { "epoch": 0.6489603885263318, "grad_norm": 0.42820391058921814, "learning_rate": 8.707168894289186e-05, "loss": 1.6027, "step": 2138 }, { "epoch": 0.6492639247230232, "grad_norm": 0.9884753823280334, "learning_rate": 8.706561360874848e-05, "loss": 1.4781, "step": 2139 }, { "epoch": 0.6495674609197146, "grad_norm": 0.477003276348114, "learning_rate": 8.705953827460511e-05, "loss": 1.5069, "step": 2140 }, { "epoch": 0.6498709971164062, "grad_norm": 0.4502262473106384, "learning_rate": 8.705346294046173e-05, "loss": 2.0444, "step": 2141 }, { "epoch": 0.6501745333130976, "grad_norm": 0.36842817068099976, "learning_rate": 8.704738760631835e-05, "loss": 1.8169, "step": 2142 }, { "epoch": 0.650478069509789, "grad_norm": 0.4413151741027832, "learning_rate": 8.704131227217496e-05, "loss": 2.0494, "step": 2143 }, { "epoch": 0.6507816057064805, "grad_norm": 0.35122597217559814, "learning_rate": 8.70352369380316e-05, "loss": 1.6942, "step": 2144 }, { "epoch": 0.6510851419031719, "grad_norm": 0.48351892828941345, "learning_rate": 8.702916160388823e-05, "loss": 2.3677, "step": 2145 }, { "epoch": 0.6513886780998634, "grad_norm": 0.43341419100761414, "learning_rate": 8.702308626974483e-05, "loss": 1.9239, "step": 2146 }, { "epoch": 0.6516922142965549, "grad_norm": 0.36051031947135925, "learning_rate": 8.701701093560146e-05, "loss": 1.7721, "step": 2147 }, { "epoch": 0.6519957504932463, "grad_norm": 0.37466931343078613, "learning_rate": 8.701093560145809e-05, "loss": 2.0199, "step": 2148 }, { "epoch": 0.6522992866899378, "grad_norm": 0.4176545739173889, "learning_rate": 8.700486026731471e-05, "loss": 1.8806, "step": 2149 }, { "epoch": 0.6526028228866292, "grad_norm": 0.4158160984516144, "learning_rate": 8.699878493317133e-05, "loss": 1.4032, "step": 2150 }, { "epoch": 0.6529063590833207, "grad_norm": 0.3781472444534302, "learning_rate": 8.699270959902796e-05, "loss": 1.6158, "step": 2151 }, { "epoch": 0.6532098952800122, "grad_norm": 0.4139382243156433, "learning_rate": 8.698663426488457e-05, "loss": 1.6677, "step": 2152 }, { "epoch": 0.6535134314767036, "grad_norm": 0.5988966226577759, "learning_rate": 8.698055893074119e-05, "loss": 2.0821, "step": 2153 }, { "epoch": 0.653816967673395, "grad_norm": 0.3822804391384125, "learning_rate": 8.697448359659782e-05, "loss": 2.2819, "step": 2154 }, { "epoch": 0.6541205038700865, "grad_norm": 0.42142486572265625, "learning_rate": 8.696840826245444e-05, "loss": 2.1515, "step": 2155 }, { "epoch": 0.654424040066778, "grad_norm": 0.3964162766933441, "learning_rate": 8.696233292831106e-05, "loss": 1.6801, "step": 2156 }, { "epoch": 0.6547275762634694, "grad_norm": 0.6642559170722961, "learning_rate": 8.695625759416767e-05, "loss": 1.9085, "step": 2157 }, { "epoch": 0.6550311124601609, "grad_norm": 0.4267200827598572, "learning_rate": 8.69501822600243e-05, "loss": 1.8194, "step": 2158 }, { "epoch": 0.6553346486568523, "grad_norm": 0.4862426221370697, "learning_rate": 8.694410692588094e-05, "loss": 2.0361, "step": 2159 }, { "epoch": 0.6556381848535437, "grad_norm": 0.45392298698425293, "learning_rate": 8.693803159173754e-05, "loss": 2.034, "step": 2160 }, { "epoch": 0.6559417210502353, "grad_norm": 0.4699818193912506, "learning_rate": 8.693195625759417e-05, "loss": 1.0096, "step": 2161 }, { "epoch": 0.6562452572469267, "grad_norm": 0.4601641595363617, "learning_rate": 8.69258809234508e-05, "loss": 1.9042, "step": 2162 }, { "epoch": 0.6565487934436182, "grad_norm": 0.3832731544971466, "learning_rate": 8.69198055893074e-05, "loss": 1.7728, "step": 2163 }, { "epoch": 0.6568523296403096, "grad_norm": 0.41405048966407776, "learning_rate": 8.691373025516404e-05, "loss": 1.8591, "step": 2164 }, { "epoch": 0.657155865837001, "grad_norm": 0.4332970380783081, "learning_rate": 8.690765492102067e-05, "loss": 1.4433, "step": 2165 }, { "epoch": 0.6574594020336925, "grad_norm": 0.38901615142822266, "learning_rate": 8.690157958687728e-05, "loss": 1.9933, "step": 2166 }, { "epoch": 0.657762938230384, "grad_norm": 0.5068726539611816, "learning_rate": 8.68955042527339e-05, "loss": 1.954, "step": 2167 }, { "epoch": 0.6580664744270754, "grad_norm": 0.4076615571975708, "learning_rate": 8.688942891859053e-05, "loss": 1.699, "step": 2168 }, { "epoch": 0.6583700106237669, "grad_norm": 0.38633993268013, "learning_rate": 8.688335358444715e-05, "loss": 1.8453, "step": 2169 }, { "epoch": 0.6586735468204583, "grad_norm": 0.3873181641101837, "learning_rate": 8.687727825030377e-05, "loss": 1.8966, "step": 2170 }, { "epoch": 0.6589770830171497, "grad_norm": 0.4472099840641022, "learning_rate": 8.687120291616038e-05, "loss": 2.1017, "step": 2171 }, { "epoch": 0.6592806192138413, "grad_norm": 0.34563758969306946, "learning_rate": 8.686512758201702e-05, "loss": 1.6238, "step": 2172 }, { "epoch": 0.6595841554105327, "grad_norm": 0.4515549838542938, "learning_rate": 8.685905224787365e-05, "loss": 1.7878, "step": 2173 }, { "epoch": 0.6598876916072242, "grad_norm": 0.6528467535972595, "learning_rate": 8.685297691373025e-05, "loss": 1.7581, "step": 2174 }, { "epoch": 0.6601912278039156, "grad_norm": 0.345264732837677, "learning_rate": 8.684690157958688e-05, "loss": 1.2984, "step": 2175 }, { "epoch": 0.660494764000607, "grad_norm": 0.3934096395969391, "learning_rate": 8.684082624544351e-05, "loss": 2.084, "step": 2176 }, { "epoch": 0.6607983001972986, "grad_norm": 0.3595477044582367, "learning_rate": 8.683475091130012e-05, "loss": 1.9587, "step": 2177 }, { "epoch": 0.66110183639399, "grad_norm": 0.4324481189250946, "learning_rate": 8.682867557715675e-05, "loss": 1.8877, "step": 2178 }, { "epoch": 0.6614053725906814, "grad_norm": 0.4493394196033478, "learning_rate": 8.682260024301338e-05, "loss": 1.8291, "step": 2179 }, { "epoch": 0.6617089087873729, "grad_norm": 0.4085356891155243, "learning_rate": 8.681652490887e-05, "loss": 2.1646, "step": 2180 }, { "epoch": 0.6620124449840643, "grad_norm": 0.4380393624305725, "learning_rate": 8.681044957472661e-05, "loss": 1.3779, "step": 2181 }, { "epoch": 0.6623159811807559, "grad_norm": 0.3621211349964142, "learning_rate": 8.680437424058324e-05, "loss": 1.7786, "step": 2182 }, { "epoch": 0.6626195173774473, "grad_norm": 0.49654054641723633, "learning_rate": 8.679829890643986e-05, "loss": 1.2754, "step": 2183 }, { "epoch": 0.6629230535741387, "grad_norm": 0.49035829305648804, "learning_rate": 8.679222357229648e-05, "loss": 1.4453, "step": 2184 }, { "epoch": 0.6632265897708302, "grad_norm": 0.5359811782836914, "learning_rate": 8.67861482381531e-05, "loss": 1.4067, "step": 2185 }, { "epoch": 0.6635301259675216, "grad_norm": 0.4120253622531891, "learning_rate": 8.678007290400973e-05, "loss": 2.235, "step": 2186 }, { "epoch": 0.6638336621642131, "grad_norm": 0.3773285448551178, "learning_rate": 8.677399756986634e-05, "loss": 2.1406, "step": 2187 }, { "epoch": 0.6641371983609046, "grad_norm": 0.3956649899482727, "learning_rate": 8.676792223572296e-05, "loss": 1.5831, "step": 2188 }, { "epoch": 0.664440734557596, "grad_norm": 0.3894088864326477, "learning_rate": 8.676184690157959e-05, "loss": 1.9276, "step": 2189 }, { "epoch": 0.6647442707542874, "grad_norm": 0.4932451546192169, "learning_rate": 8.675577156743622e-05, "loss": 2.1134, "step": 2190 }, { "epoch": 0.6650478069509789, "grad_norm": 0.41271400451660156, "learning_rate": 8.674969623329283e-05, "loss": 2.1299, "step": 2191 }, { "epoch": 0.6653513431476703, "grad_norm": 0.596319317817688, "learning_rate": 8.674362089914946e-05, "loss": 1.4789, "step": 2192 }, { "epoch": 0.6656548793443618, "grad_norm": 0.4255685806274414, "learning_rate": 8.673754556500609e-05, "loss": 1.7751, "step": 2193 }, { "epoch": 0.6659584155410533, "grad_norm": 0.371003657579422, "learning_rate": 8.67314702308627e-05, "loss": 1.9355, "step": 2194 }, { "epoch": 0.6662619517377447, "grad_norm": 0.43426600098609924, "learning_rate": 8.672539489671932e-05, "loss": 2.123, "step": 2195 }, { "epoch": 0.6665654879344362, "grad_norm": 0.40644875168800354, "learning_rate": 8.671931956257595e-05, "loss": 2.1907, "step": 2196 }, { "epoch": 0.6668690241311276, "grad_norm": 0.4468652904033661, "learning_rate": 8.671324422843257e-05, "loss": 1.8754, "step": 2197 }, { "epoch": 0.6671725603278191, "grad_norm": 0.34468400478363037, "learning_rate": 8.670716889428919e-05, "loss": 1.989, "step": 2198 }, { "epoch": 0.6674760965245106, "grad_norm": 0.430462121963501, "learning_rate": 8.67010935601458e-05, "loss": 1.6021, "step": 2199 }, { "epoch": 0.667779632721202, "grad_norm": 0.42845168709754944, "learning_rate": 8.669501822600244e-05, "loss": 2.1036, "step": 2200 }, { "epoch": 0.6680831689178934, "grad_norm": 0.4242333769798279, "learning_rate": 8.668894289185905e-05, "loss": 1.8518, "step": 2201 }, { "epoch": 0.6683867051145849, "grad_norm": 0.3754311800003052, "learning_rate": 8.668286755771567e-05, "loss": 1.8616, "step": 2202 }, { "epoch": 0.6686902413112764, "grad_norm": 0.39913347363471985, "learning_rate": 8.66767922235723e-05, "loss": 2.2196, "step": 2203 }, { "epoch": 0.6689937775079678, "grad_norm": 0.3791050910949707, "learning_rate": 8.667071688942893e-05, "loss": 1.8651, "step": 2204 }, { "epoch": 0.6692973137046593, "grad_norm": 0.46585163474082947, "learning_rate": 8.666464155528554e-05, "loss": 2.0577, "step": 2205 }, { "epoch": 0.6696008499013507, "grad_norm": 0.4098934233188629, "learning_rate": 8.665856622114217e-05, "loss": 2.0225, "step": 2206 }, { "epoch": 0.6699043860980421, "grad_norm": 0.3545132577419281, "learning_rate": 8.66524908869988e-05, "loss": 1.8955, "step": 2207 }, { "epoch": 0.6702079222947337, "grad_norm": 0.4183339774608612, "learning_rate": 8.664641555285542e-05, "loss": 2.1093, "step": 2208 }, { "epoch": 0.6705114584914251, "grad_norm": 0.378439724445343, "learning_rate": 8.664034021871203e-05, "loss": 1.8865, "step": 2209 }, { "epoch": 0.6708149946881166, "grad_norm": 0.45795947313308716, "learning_rate": 8.663426488456865e-05, "loss": 1.8891, "step": 2210 }, { "epoch": 0.671118530884808, "grad_norm": 0.3554634153842926, "learning_rate": 8.662818955042528e-05, "loss": 2.1196, "step": 2211 }, { "epoch": 0.6714220670814994, "grad_norm": 0.4456568956375122, "learning_rate": 8.66221142162819e-05, "loss": 1.6737, "step": 2212 }, { "epoch": 0.671725603278191, "grad_norm": 0.39127472043037415, "learning_rate": 8.661603888213852e-05, "loss": 1.989, "step": 2213 }, { "epoch": 0.6720291394748824, "grad_norm": 0.4240843653678894, "learning_rate": 8.660996354799515e-05, "loss": 2.0285, "step": 2214 }, { "epoch": 0.6723326756715738, "grad_norm": 0.40605032444000244, "learning_rate": 8.660388821385176e-05, "loss": 1.6815, "step": 2215 }, { "epoch": 0.6726362118682653, "grad_norm": 0.4075249433517456, "learning_rate": 8.659781287970838e-05, "loss": 1.8847, "step": 2216 }, { "epoch": 0.6729397480649567, "grad_norm": 0.38832414150238037, "learning_rate": 8.659173754556501e-05, "loss": 1.7957, "step": 2217 }, { "epoch": 0.6732432842616483, "grad_norm": 0.40097537636756897, "learning_rate": 8.658566221142164e-05, "loss": 1.7386, "step": 2218 }, { "epoch": 0.6735468204583397, "grad_norm": 0.41220805048942566, "learning_rate": 8.657958687727825e-05, "loss": 2.1473, "step": 2219 }, { "epoch": 0.6738503566550311, "grad_norm": 0.4157550036907196, "learning_rate": 8.657351154313488e-05, "loss": 1.7435, "step": 2220 }, { "epoch": 0.6741538928517226, "grad_norm": 0.9074850082397461, "learning_rate": 8.656743620899151e-05, "loss": 1.7336, "step": 2221 }, { "epoch": 0.674457429048414, "grad_norm": 0.4011635482311249, "learning_rate": 8.656136087484813e-05, "loss": 1.9814, "step": 2222 }, { "epoch": 0.6747609652451054, "grad_norm": 0.4295683801174164, "learning_rate": 8.655528554070474e-05, "loss": 1.6332, "step": 2223 }, { "epoch": 0.675064501441797, "grad_norm": 0.424452006816864, "learning_rate": 8.654921020656136e-05, "loss": 1.6975, "step": 2224 }, { "epoch": 0.6753680376384884, "grad_norm": 0.3975834846496582, "learning_rate": 8.654313487241799e-05, "loss": 1.6783, "step": 2225 }, { "epoch": 0.6756715738351798, "grad_norm": 0.49879249930381775, "learning_rate": 8.653705953827461e-05, "loss": 2.0694, "step": 2226 }, { "epoch": 0.6759751100318713, "grad_norm": 0.424622505903244, "learning_rate": 8.653098420413123e-05, "loss": 1.7089, "step": 2227 }, { "epoch": 0.6762786462285627, "grad_norm": 0.497159868478775, "learning_rate": 8.652490886998786e-05, "loss": 1.8934, "step": 2228 }, { "epoch": 0.6765821824252543, "grad_norm": 0.40200483798980713, "learning_rate": 8.651883353584447e-05, "loss": 2.0158, "step": 2229 }, { "epoch": 0.6768857186219457, "grad_norm": 0.4294535219669342, "learning_rate": 8.651275820170109e-05, "loss": 1.815, "step": 2230 }, { "epoch": 0.6771892548186371, "grad_norm": 0.5176182389259338, "learning_rate": 8.650668286755772e-05, "loss": 2.0518, "step": 2231 }, { "epoch": 0.6774927910153286, "grad_norm": 0.44558650255203247, "learning_rate": 8.650060753341435e-05, "loss": 1.7201, "step": 2232 }, { "epoch": 0.67779632721202, "grad_norm": 0.38811054825782776, "learning_rate": 8.649453219927096e-05, "loss": 1.5197, "step": 2233 }, { "epoch": 0.6780998634087115, "grad_norm": 0.3874174952507019, "learning_rate": 8.648845686512759e-05, "loss": 1.8105, "step": 2234 }, { "epoch": 0.678403399605403, "grad_norm": 0.6453530788421631, "learning_rate": 8.648238153098422e-05, "loss": 1.585, "step": 2235 }, { "epoch": 0.6787069358020944, "grad_norm": 0.4314938485622406, "learning_rate": 8.647630619684082e-05, "loss": 1.7869, "step": 2236 }, { "epoch": 0.6790104719987858, "grad_norm": 0.37230706214904785, "learning_rate": 8.647023086269745e-05, "loss": 1.5571, "step": 2237 }, { "epoch": 0.6793140081954773, "grad_norm": 0.47215935587882996, "learning_rate": 8.646415552855407e-05, "loss": 2.0845, "step": 2238 }, { "epoch": 0.6796175443921688, "grad_norm": 0.4179088771343231, "learning_rate": 8.64580801944107e-05, "loss": 2.0181, "step": 2239 }, { "epoch": 0.6799210805888602, "grad_norm": 0.7629103660583496, "learning_rate": 8.645200486026732e-05, "loss": 2.0156, "step": 2240 }, { "epoch": 0.6802246167855517, "grad_norm": 0.3792973756790161, "learning_rate": 8.644592952612394e-05, "loss": 1.9137, "step": 2241 }, { "epoch": 0.6805281529822431, "grad_norm": 0.38583695888519287, "learning_rate": 8.643985419198057e-05, "loss": 1.8321, "step": 2242 }, { "epoch": 0.6808316891789346, "grad_norm": 0.4620136320590973, "learning_rate": 8.643377885783718e-05, "loss": 1.4017, "step": 2243 }, { "epoch": 0.6811352253756261, "grad_norm": 0.47091394662857056, "learning_rate": 8.64277035236938e-05, "loss": 2.2232, "step": 2244 }, { "epoch": 0.6814387615723175, "grad_norm": 0.3809249699115753, "learning_rate": 8.642162818955043e-05, "loss": 1.7188, "step": 2245 }, { "epoch": 0.681742297769009, "grad_norm": 0.4558849334716797, "learning_rate": 8.641555285540706e-05, "loss": 1.7074, "step": 2246 }, { "epoch": 0.6820458339657004, "grad_norm": 0.39358267188072205, "learning_rate": 8.640947752126367e-05, "loss": 1.6826, "step": 2247 }, { "epoch": 0.6823493701623918, "grad_norm": 0.5007576942443848, "learning_rate": 8.64034021871203e-05, "loss": 1.683, "step": 2248 }, { "epoch": 0.6826529063590833, "grad_norm": 0.43831315636634827, "learning_rate": 8.639732685297693e-05, "loss": 1.4773, "step": 2249 }, { "epoch": 0.6829564425557748, "grad_norm": 0.41979843378067017, "learning_rate": 8.639125151883353e-05, "loss": 1.8137, "step": 2250 }, { "epoch": 0.6832599787524662, "grad_norm": 0.4662984311580658, "learning_rate": 8.638517618469016e-05, "loss": 1.6978, "step": 2251 }, { "epoch": 0.6835635149491577, "grad_norm": 0.4381478428840637, "learning_rate": 8.637910085054678e-05, "loss": 2.0157, "step": 2252 }, { "epoch": 0.6838670511458491, "grad_norm": 0.7363194823265076, "learning_rate": 8.637302551640341e-05, "loss": 1.5466, "step": 2253 }, { "epoch": 0.6841705873425405, "grad_norm": 0.5327618718147278, "learning_rate": 8.636695018226003e-05, "loss": 1.8425, "step": 2254 }, { "epoch": 0.6844741235392321, "grad_norm": 0.4380737245082855, "learning_rate": 8.636087484811665e-05, "loss": 1.7879, "step": 2255 }, { "epoch": 0.6847776597359235, "grad_norm": 0.8782259821891785, "learning_rate": 8.635479951397328e-05, "loss": 1.8806, "step": 2256 }, { "epoch": 0.685081195932615, "grad_norm": 0.3841392397880554, "learning_rate": 8.63487241798299e-05, "loss": 2.014, "step": 2257 }, { "epoch": 0.6853847321293064, "grad_norm": 0.39896446466445923, "learning_rate": 8.634264884568651e-05, "loss": 1.9356, "step": 2258 }, { "epoch": 0.6856882683259978, "grad_norm": 0.41541773080825806, "learning_rate": 8.633657351154314e-05, "loss": 2.1365, "step": 2259 }, { "epoch": 0.6859918045226894, "grad_norm": 0.453948438167572, "learning_rate": 8.633049817739976e-05, "loss": 1.9459, "step": 2260 }, { "epoch": 0.6862953407193808, "grad_norm": 0.6398829221725464, "learning_rate": 8.632442284325638e-05, "loss": 1.3556, "step": 2261 }, { "epoch": 0.6865988769160722, "grad_norm": 0.43574538826942444, "learning_rate": 8.631834750911301e-05, "loss": 1.9142, "step": 2262 }, { "epoch": 0.6869024131127637, "grad_norm": 0.39180728793144226, "learning_rate": 8.631227217496964e-05, "loss": 1.8198, "step": 2263 }, { "epoch": 0.6872059493094551, "grad_norm": 0.4146488904953003, "learning_rate": 8.630619684082624e-05, "loss": 1.7776, "step": 2264 }, { "epoch": 0.6875094855061467, "grad_norm": 0.3681737184524536, "learning_rate": 8.630012150668287e-05, "loss": 1.4926, "step": 2265 }, { "epoch": 0.6878130217028381, "grad_norm": 0.44278883934020996, "learning_rate": 8.629404617253949e-05, "loss": 1.8021, "step": 2266 }, { "epoch": 0.6881165578995295, "grad_norm": 0.4687512218952179, "learning_rate": 8.628797083839612e-05, "loss": 1.75, "step": 2267 }, { "epoch": 0.688420094096221, "grad_norm": 0.4102340042591095, "learning_rate": 8.628189550425274e-05, "loss": 1.9249, "step": 2268 }, { "epoch": 0.6887236302929124, "grad_norm": 0.44898685812950134, "learning_rate": 8.627582017010936e-05, "loss": 1.9914, "step": 2269 }, { "epoch": 0.6890271664896039, "grad_norm": 0.451225221157074, "learning_rate": 8.626974483596599e-05, "loss": 1.71, "step": 2270 }, { "epoch": 0.6893307026862954, "grad_norm": 0.7062796950340271, "learning_rate": 8.62636695018226e-05, "loss": 1.2907, "step": 2271 }, { "epoch": 0.6896342388829868, "grad_norm": 0.39842337369918823, "learning_rate": 8.625759416767922e-05, "loss": 1.6578, "step": 2272 }, { "epoch": 0.6899377750796782, "grad_norm": 0.33577829599380493, "learning_rate": 8.625151883353585e-05, "loss": 1.7332, "step": 2273 }, { "epoch": 0.6902413112763697, "grad_norm": 0.43298929929733276, "learning_rate": 8.624544349939247e-05, "loss": 2.0028, "step": 2274 }, { "epoch": 0.6905448474730611, "grad_norm": 0.4451911449432373, "learning_rate": 8.623936816524909e-05, "loss": 1.4601, "step": 2275 }, { "epoch": 0.6908483836697527, "grad_norm": 0.4683527946472168, "learning_rate": 8.623329283110572e-05, "loss": 1.5981, "step": 2276 }, { "epoch": 0.6911519198664441, "grad_norm": 0.4420105814933777, "learning_rate": 8.622721749696235e-05, "loss": 1.6975, "step": 2277 }, { "epoch": 0.6914554560631355, "grad_norm": 0.3732719421386719, "learning_rate": 8.622114216281895e-05, "loss": 2.1493, "step": 2278 }, { "epoch": 0.691758992259827, "grad_norm": 0.4039726257324219, "learning_rate": 8.621506682867558e-05, "loss": 2.0208, "step": 2279 }, { "epoch": 0.6920625284565184, "grad_norm": 0.35387054085731506, "learning_rate": 8.62089914945322e-05, "loss": 2.0324, "step": 2280 }, { "epoch": 0.6923660646532099, "grad_norm": 0.4533388912677765, "learning_rate": 8.620291616038883e-05, "loss": 1.7281, "step": 2281 }, { "epoch": 0.6926696008499014, "grad_norm": 0.37299293279647827, "learning_rate": 8.619684082624545e-05, "loss": 1.9662, "step": 2282 }, { "epoch": 0.6929731370465928, "grad_norm": 0.41872239112854004, "learning_rate": 8.619076549210207e-05, "loss": 1.9887, "step": 2283 }, { "epoch": 0.6932766732432842, "grad_norm": 0.8140760064125061, "learning_rate": 8.61846901579587e-05, "loss": 2.1649, "step": 2284 }, { "epoch": 0.6935802094399757, "grad_norm": 0.3966423571109772, "learning_rate": 8.617861482381531e-05, "loss": 1.421, "step": 2285 }, { "epoch": 0.6938837456366672, "grad_norm": 0.36617428064346313, "learning_rate": 8.617253948967193e-05, "loss": 1.8174, "step": 2286 }, { "epoch": 0.6941872818333586, "grad_norm": 0.41297128796577454, "learning_rate": 8.616646415552856e-05, "loss": 1.9215, "step": 2287 }, { "epoch": 0.6944908180300501, "grad_norm": 0.48277321457862854, "learning_rate": 8.616038882138518e-05, "loss": 1.3551, "step": 2288 }, { "epoch": 0.6947943542267415, "grad_norm": 0.41190510988235474, "learning_rate": 8.61543134872418e-05, "loss": 1.8701, "step": 2289 }, { "epoch": 0.695097890423433, "grad_norm": 0.34471115469932556, "learning_rate": 8.614823815309843e-05, "loss": 1.5073, "step": 2290 }, { "epoch": 0.6954014266201245, "grad_norm": 0.4469250738620758, "learning_rate": 8.614216281895504e-05, "loss": 1.8257, "step": 2291 }, { "epoch": 0.6957049628168159, "grad_norm": 0.38356101512908936, "learning_rate": 8.613608748481166e-05, "loss": 1.63, "step": 2292 }, { "epoch": 0.6960084990135074, "grad_norm": 0.3836432099342346, "learning_rate": 8.613001215066829e-05, "loss": 1.6294, "step": 2293 }, { "epoch": 0.6963120352101988, "grad_norm": 1.1250473260879517, "learning_rate": 8.612393681652491e-05, "loss": 1.0634, "step": 2294 }, { "epoch": 0.6966155714068902, "grad_norm": 0.39849042892456055, "learning_rate": 8.611786148238154e-05, "loss": 1.4908, "step": 2295 }, { "epoch": 0.6969191076035818, "grad_norm": 1.0617260932922363, "learning_rate": 8.611178614823816e-05, "loss": 2.0693, "step": 2296 }, { "epoch": 0.6972226438002732, "grad_norm": 0.44789618253707886, "learning_rate": 8.610571081409478e-05, "loss": 1.7397, "step": 2297 }, { "epoch": 0.6975261799969646, "grad_norm": 0.7480859756469727, "learning_rate": 8.60996354799514e-05, "loss": 1.8385, "step": 2298 }, { "epoch": 0.6978297161936561, "grad_norm": 0.3201582133769989, "learning_rate": 8.609356014580802e-05, "loss": 1.4323, "step": 2299 }, { "epoch": 0.6981332523903475, "grad_norm": 0.4212173521518707, "learning_rate": 8.608748481166464e-05, "loss": 1.6153, "step": 2300 }, { "epoch": 0.6984367885870391, "grad_norm": 0.39297157526016235, "learning_rate": 8.608140947752127e-05, "loss": 1.9207, "step": 2301 }, { "epoch": 0.6987403247837305, "grad_norm": 0.4868420660495758, "learning_rate": 8.607533414337789e-05, "loss": 1.8056, "step": 2302 }, { "epoch": 0.6990438609804219, "grad_norm": 0.518147885799408, "learning_rate": 8.60692588092345e-05, "loss": 1.4521, "step": 2303 }, { "epoch": 0.6993473971771134, "grad_norm": 0.4484739899635315, "learning_rate": 8.606318347509114e-05, "loss": 1.8651, "step": 2304 }, { "epoch": 0.6996509333738048, "grad_norm": 0.4859076738357544, "learning_rate": 8.605710814094775e-05, "loss": 1.6752, "step": 2305 }, { "epoch": 0.6999544695704962, "grad_norm": 0.4186297655105591, "learning_rate": 8.605103280680437e-05, "loss": 2.0165, "step": 2306 }, { "epoch": 0.7002580057671878, "grad_norm": 0.34496191143989563, "learning_rate": 8.6044957472661e-05, "loss": 2.1395, "step": 2307 }, { "epoch": 0.7005615419638792, "grad_norm": 0.3636651933193207, "learning_rate": 8.603888213851762e-05, "loss": 1.2115, "step": 2308 }, { "epoch": 0.7008650781605706, "grad_norm": 0.38789573311805725, "learning_rate": 8.603280680437425e-05, "loss": 1.7992, "step": 2309 }, { "epoch": 0.7011686143572621, "grad_norm": 0.41874828934669495, "learning_rate": 8.602673147023087e-05, "loss": 1.9813, "step": 2310 }, { "epoch": 0.7014721505539535, "grad_norm": 0.6681198477745056, "learning_rate": 8.602065613608749e-05, "loss": 2.0765, "step": 2311 }, { "epoch": 0.7017756867506451, "grad_norm": 0.4358363151550293, "learning_rate": 8.601458080194412e-05, "loss": 2.0836, "step": 2312 }, { "epoch": 0.7020792229473365, "grad_norm": 0.4268842339515686, "learning_rate": 8.600850546780073e-05, "loss": 1.963, "step": 2313 }, { "epoch": 0.7023827591440279, "grad_norm": 0.43456903100013733, "learning_rate": 8.600243013365735e-05, "loss": 2.031, "step": 2314 }, { "epoch": 0.7026862953407194, "grad_norm": 0.9157736301422119, "learning_rate": 8.599635479951398e-05, "loss": 1.6625, "step": 2315 }, { "epoch": 0.7029898315374108, "grad_norm": 0.41116464138031006, "learning_rate": 8.59902794653706e-05, "loss": 1.3184, "step": 2316 }, { "epoch": 0.7032933677341023, "grad_norm": 0.38889098167419434, "learning_rate": 8.598420413122722e-05, "loss": 1.5656, "step": 2317 }, { "epoch": 0.7035969039307938, "grad_norm": 0.4620545208454132, "learning_rate": 8.597812879708385e-05, "loss": 1.9967, "step": 2318 }, { "epoch": 0.7039004401274852, "grad_norm": 0.44721749424934387, "learning_rate": 8.597205346294046e-05, "loss": 1.9404, "step": 2319 }, { "epoch": 0.7042039763241766, "grad_norm": 0.46273544430732727, "learning_rate": 8.596597812879708e-05, "loss": 1.9765, "step": 2320 }, { "epoch": 0.7045075125208681, "grad_norm": 0.3636545240879059, "learning_rate": 8.595990279465371e-05, "loss": 1.8925, "step": 2321 }, { "epoch": 0.7048110487175596, "grad_norm": 0.49978089332580566, "learning_rate": 8.595382746051033e-05, "loss": 1.9087, "step": 2322 }, { "epoch": 0.705114584914251, "grad_norm": 0.3676183819770813, "learning_rate": 8.594775212636695e-05, "loss": 1.9449, "step": 2323 }, { "epoch": 0.7054181211109425, "grad_norm": 0.3930191397666931, "learning_rate": 8.594167679222358e-05, "loss": 2.1377, "step": 2324 }, { "epoch": 0.7057216573076339, "grad_norm": 0.4476909935474396, "learning_rate": 8.59356014580802e-05, "loss": 1.886, "step": 2325 }, { "epoch": 0.7060251935043254, "grad_norm": 0.4343526363372803, "learning_rate": 8.592952612393683e-05, "loss": 1.5936, "step": 2326 }, { "epoch": 0.7063287297010169, "grad_norm": 0.42617321014404297, "learning_rate": 8.592345078979344e-05, "loss": 1.8253, "step": 2327 }, { "epoch": 0.7066322658977083, "grad_norm": 0.4090782105922699, "learning_rate": 8.591737545565006e-05, "loss": 1.8596, "step": 2328 }, { "epoch": 0.7069358020943998, "grad_norm": 0.4233112633228302, "learning_rate": 8.591130012150669e-05, "loss": 1.8529, "step": 2329 }, { "epoch": 0.7072393382910912, "grad_norm": 0.4159391224384308, "learning_rate": 8.590522478736331e-05, "loss": 2.202, "step": 2330 }, { "epoch": 0.7075428744877826, "grad_norm": 0.4303951859474182, "learning_rate": 8.589914945321993e-05, "loss": 2.2339, "step": 2331 }, { "epoch": 0.7078464106844741, "grad_norm": 0.431086927652359, "learning_rate": 8.589307411907656e-05, "loss": 1.7753, "step": 2332 }, { "epoch": 0.7081499468811656, "grad_norm": 0.4268263280391693, "learning_rate": 8.588699878493317e-05, "loss": 2.1529, "step": 2333 }, { "epoch": 0.708453483077857, "grad_norm": 0.35274428129196167, "learning_rate": 8.588092345078979e-05, "loss": 1.4355, "step": 2334 }, { "epoch": 0.7087570192745485, "grad_norm": 0.3985956311225891, "learning_rate": 8.587484811664642e-05, "loss": 1.7141, "step": 2335 }, { "epoch": 0.7090605554712399, "grad_norm": 0.44768375158309937, "learning_rate": 8.586877278250304e-05, "loss": 1.6654, "step": 2336 }, { "epoch": 0.7093640916679314, "grad_norm": 0.38372135162353516, "learning_rate": 8.586269744835966e-05, "loss": 1.8577, "step": 2337 }, { "epoch": 0.7096676278646229, "grad_norm": 0.459806889295578, "learning_rate": 8.585662211421629e-05, "loss": 1.8502, "step": 2338 }, { "epoch": 0.7099711640613143, "grad_norm": 0.36689698696136475, "learning_rate": 8.58505467800729e-05, "loss": 1.9931, "step": 2339 }, { "epoch": 0.7102747002580058, "grad_norm": 0.5424461960792542, "learning_rate": 8.584447144592954e-05, "loss": 1.6979, "step": 2340 }, { "epoch": 0.7105782364546972, "grad_norm": 0.663773238658905, "learning_rate": 8.583839611178615e-05, "loss": 1.9633, "step": 2341 }, { "epoch": 0.7108817726513886, "grad_norm": 2.337242603302002, "learning_rate": 8.583232077764277e-05, "loss": 1.2587, "step": 2342 }, { "epoch": 0.7111853088480802, "grad_norm": 0.4255028963088989, "learning_rate": 8.58262454434994e-05, "loss": 1.935, "step": 2343 }, { "epoch": 0.7114888450447716, "grad_norm": 0.796564519405365, "learning_rate": 8.582017010935602e-05, "loss": 2.1544, "step": 2344 }, { "epoch": 0.711792381241463, "grad_norm": 0.42163416743278503, "learning_rate": 8.581409477521264e-05, "loss": 1.9419, "step": 2345 }, { "epoch": 0.7120959174381545, "grad_norm": 0.49495795369148254, "learning_rate": 8.580801944106927e-05, "loss": 1.6143, "step": 2346 }, { "epoch": 0.7123994536348459, "grad_norm": 0.5532099008560181, "learning_rate": 8.580194410692588e-05, "loss": 1.9407, "step": 2347 }, { "epoch": 0.7127029898315375, "grad_norm": 0.434341162443161, "learning_rate": 8.57958687727825e-05, "loss": 1.9626, "step": 2348 }, { "epoch": 0.7130065260282289, "grad_norm": 0.5338404774665833, "learning_rate": 8.578979343863913e-05, "loss": 1.801, "step": 2349 }, { "epoch": 0.7133100622249203, "grad_norm": 0.48087722063064575, "learning_rate": 8.578371810449575e-05, "loss": 2.2286, "step": 2350 }, { "epoch": 0.7136135984216118, "grad_norm": 0.43688857555389404, "learning_rate": 8.577764277035237e-05, "loss": 1.7534, "step": 2351 }, { "epoch": 0.7139171346183032, "grad_norm": 1.3858163356781006, "learning_rate": 8.5771567436209e-05, "loss": 1.4466, "step": 2352 }, { "epoch": 0.7142206708149947, "grad_norm": 0.6149253249168396, "learning_rate": 8.576549210206562e-05, "loss": 2.0683, "step": 2353 }, { "epoch": 0.7145242070116862, "grad_norm": 0.49920403957366943, "learning_rate": 8.575941676792225e-05, "loss": 1.9266, "step": 2354 }, { "epoch": 0.7148277432083776, "grad_norm": 0.41959667205810547, "learning_rate": 8.575334143377886e-05, "loss": 1.7429, "step": 2355 }, { "epoch": 0.715131279405069, "grad_norm": 0.5163973569869995, "learning_rate": 8.574726609963548e-05, "loss": 1.2735, "step": 2356 }, { "epoch": 0.7154348156017605, "grad_norm": 0.37799614667892456, "learning_rate": 8.574119076549211e-05, "loss": 2.2448, "step": 2357 }, { "epoch": 0.7157383517984519, "grad_norm": 0.43541470170021057, "learning_rate": 8.573511543134873e-05, "loss": 2.2739, "step": 2358 }, { "epoch": 0.7160418879951435, "grad_norm": 1.3038394451141357, "learning_rate": 8.572904009720535e-05, "loss": 1.9907, "step": 2359 }, { "epoch": 0.7163454241918349, "grad_norm": 0.6111695766448975, "learning_rate": 8.572296476306198e-05, "loss": 1.5575, "step": 2360 }, { "epoch": 0.7166489603885263, "grad_norm": 1.2944895029067993, "learning_rate": 8.57168894289186e-05, "loss": 1.8409, "step": 2361 }, { "epoch": 0.7169524965852178, "grad_norm": 0.42008545994758606, "learning_rate": 8.571081409477521e-05, "loss": 1.8825, "step": 2362 }, { "epoch": 0.7172560327819092, "grad_norm": 0.48183196783065796, "learning_rate": 8.570473876063184e-05, "loss": 1.9233, "step": 2363 }, { "epoch": 0.7175595689786007, "grad_norm": 0.41434016823768616, "learning_rate": 8.569866342648846e-05, "loss": 2.0636, "step": 2364 }, { "epoch": 0.7178631051752922, "grad_norm": 0.3774077296257019, "learning_rate": 8.569258809234508e-05, "loss": 1.9155, "step": 2365 }, { "epoch": 0.7181666413719836, "grad_norm": 0.350824236869812, "learning_rate": 8.568651275820171e-05, "loss": 1.4894, "step": 2366 }, { "epoch": 0.718470177568675, "grad_norm": 1.5183087587356567, "learning_rate": 8.568043742405833e-05, "loss": 2.0812, "step": 2367 }, { "epoch": 0.7187737137653665, "grad_norm": 0.3757447600364685, "learning_rate": 8.567436208991496e-05, "loss": 1.4652, "step": 2368 }, { "epoch": 0.719077249962058, "grad_norm": 0.4151865839958191, "learning_rate": 8.566828675577157e-05, "loss": 1.677, "step": 2369 }, { "epoch": 0.7193807861587495, "grad_norm": 0.4992164075374603, "learning_rate": 8.566221142162819e-05, "loss": 2.1459, "step": 2370 }, { "epoch": 0.7196843223554409, "grad_norm": 0.3945586085319519, "learning_rate": 8.565613608748482e-05, "loss": 1.9362, "step": 2371 }, { "epoch": 0.7199878585521323, "grad_norm": 0.4325678050518036, "learning_rate": 8.565006075334144e-05, "loss": 2.0223, "step": 2372 }, { "epoch": 0.7202913947488238, "grad_norm": 0.39915481209754944, "learning_rate": 8.564398541919806e-05, "loss": 1.9572, "step": 2373 }, { "epoch": 0.7205949309455153, "grad_norm": 0.45898914337158203, "learning_rate": 8.563791008505469e-05, "loss": 1.5554, "step": 2374 }, { "epoch": 0.7208984671422067, "grad_norm": 0.4385409951210022, "learning_rate": 8.56318347509113e-05, "loss": 1.7864, "step": 2375 }, { "epoch": 0.7212020033388982, "grad_norm": 0.40655046701431274, "learning_rate": 8.562575941676792e-05, "loss": 1.9718, "step": 2376 }, { "epoch": 0.7215055395355896, "grad_norm": 0.42865580320358276, "learning_rate": 8.561968408262455e-05, "loss": 1.6585, "step": 2377 }, { "epoch": 0.721809075732281, "grad_norm": 1.0274362564086914, "learning_rate": 8.561360874848117e-05, "loss": 1.9557, "step": 2378 }, { "epoch": 0.7221126119289726, "grad_norm": 0.8454954028129578, "learning_rate": 8.560753341433779e-05, "loss": 2.0079, "step": 2379 }, { "epoch": 0.722416148125664, "grad_norm": 0.3799399733543396, "learning_rate": 8.560145808019442e-05, "loss": 1.9851, "step": 2380 }, { "epoch": 0.7227196843223554, "grad_norm": 0.5621289610862732, "learning_rate": 8.559538274605104e-05, "loss": 1.6631, "step": 2381 }, { "epoch": 0.7230232205190469, "grad_norm": 0.42442479729652405, "learning_rate": 8.558930741190767e-05, "loss": 1.9328, "step": 2382 }, { "epoch": 0.7233267567157383, "grad_norm": 0.4831121265888214, "learning_rate": 8.558323207776428e-05, "loss": 1.6994, "step": 2383 }, { "epoch": 0.7236302929124299, "grad_norm": 0.4605385363101959, "learning_rate": 8.55771567436209e-05, "loss": 2.0735, "step": 2384 }, { "epoch": 0.7239338291091213, "grad_norm": 0.4393116235733032, "learning_rate": 8.557108140947753e-05, "loss": 1.6179, "step": 2385 }, { "epoch": 0.7242373653058127, "grad_norm": 0.3323841392993927, "learning_rate": 8.556500607533414e-05, "loss": 1.7223, "step": 2386 }, { "epoch": 0.7245409015025042, "grad_norm": 1.153462290763855, "learning_rate": 8.555893074119077e-05, "loss": 2.2449, "step": 2387 }, { "epoch": 0.7248444376991956, "grad_norm": 0.4617941677570343, "learning_rate": 8.55528554070474e-05, "loss": 1.4432, "step": 2388 }, { "epoch": 0.725147973895887, "grad_norm": 0.38924935460090637, "learning_rate": 8.554678007290401e-05, "loss": 2.1933, "step": 2389 }, { "epoch": 0.7254515100925786, "grad_norm": 0.37328121066093445, "learning_rate": 8.554070473876063e-05, "loss": 1.5654, "step": 2390 }, { "epoch": 0.72575504628927, "grad_norm": 0.46307137608528137, "learning_rate": 8.553462940461726e-05, "loss": 1.4233, "step": 2391 }, { "epoch": 0.7260585824859614, "grad_norm": 0.39463040232658386, "learning_rate": 8.552855407047388e-05, "loss": 1.8745, "step": 2392 }, { "epoch": 0.7263621186826529, "grad_norm": 0.6351356506347656, "learning_rate": 8.55224787363305e-05, "loss": 2.0999, "step": 2393 }, { "epoch": 0.7266656548793443, "grad_norm": 0.446508526802063, "learning_rate": 8.551640340218713e-05, "loss": 2.0818, "step": 2394 }, { "epoch": 0.7269691910760359, "grad_norm": 0.3539383113384247, "learning_rate": 8.551032806804375e-05, "loss": 1.3604, "step": 2395 }, { "epoch": 0.7272727272727273, "grad_norm": 0.4133947789669037, "learning_rate": 8.550425273390036e-05, "loss": 1.5174, "step": 2396 }, { "epoch": 0.7275762634694187, "grad_norm": 0.3807066082954407, "learning_rate": 8.5498177399757e-05, "loss": 1.8238, "step": 2397 }, { "epoch": 0.7278797996661102, "grad_norm": 0.41087058186531067, "learning_rate": 8.549210206561361e-05, "loss": 1.9404, "step": 2398 }, { "epoch": 0.7281833358628016, "grad_norm": 0.36707812547683716, "learning_rate": 8.548602673147024e-05, "loss": 1.8164, "step": 2399 }, { "epoch": 0.7284868720594931, "grad_norm": 0.38733971118927, "learning_rate": 8.547995139732685e-05, "loss": 1.3954, "step": 2400 }, { "epoch": 0.7287904082561846, "grad_norm": 0.41041603684425354, "learning_rate": 8.547387606318348e-05, "loss": 1.5569, "step": 2401 }, { "epoch": 0.729093944452876, "grad_norm": 0.42836543917655945, "learning_rate": 8.546780072904011e-05, "loss": 1.7968, "step": 2402 }, { "epoch": 0.7293974806495674, "grad_norm": 0.4246993660926819, "learning_rate": 8.546172539489672e-05, "loss": 2.1727, "step": 2403 }, { "epoch": 0.7297010168462589, "grad_norm": 0.43355593085289, "learning_rate": 8.545565006075334e-05, "loss": 1.5563, "step": 2404 }, { "epoch": 0.7300045530429504, "grad_norm": 0.39305025339126587, "learning_rate": 8.544957472660997e-05, "loss": 1.8375, "step": 2405 }, { "epoch": 0.7303080892396419, "grad_norm": 0.44923126697540283, "learning_rate": 8.544349939246659e-05, "loss": 1.6066, "step": 2406 }, { "epoch": 0.7306116254363333, "grad_norm": 0.41019386053085327, "learning_rate": 8.543742405832321e-05, "loss": 2.0204, "step": 2407 }, { "epoch": 0.7309151616330247, "grad_norm": 0.4895036220550537, "learning_rate": 8.543134872417984e-05, "loss": 2.1129, "step": 2408 }, { "epoch": 0.7312186978297162, "grad_norm": 0.4031083583831787, "learning_rate": 8.542527339003646e-05, "loss": 1.9971, "step": 2409 }, { "epoch": 0.7315222340264077, "grad_norm": 0.40298768877983093, "learning_rate": 8.541919805589307e-05, "loss": 1.9922, "step": 2410 }, { "epoch": 0.7318257702230991, "grad_norm": 0.41940340399742126, "learning_rate": 8.54131227217497e-05, "loss": 1.8683, "step": 2411 }, { "epoch": 0.7321293064197906, "grad_norm": 0.4068038761615753, "learning_rate": 8.540704738760632e-05, "loss": 1.8514, "step": 2412 }, { "epoch": 0.732432842616482, "grad_norm": 0.3992190361022949, "learning_rate": 8.540097205346295e-05, "loss": 1.734, "step": 2413 }, { "epoch": 0.7327363788131734, "grad_norm": 0.35920289158821106, "learning_rate": 8.539489671931956e-05, "loss": 2.0869, "step": 2414 }, { "epoch": 0.7330399150098649, "grad_norm": 0.41339996457099915, "learning_rate": 8.538882138517619e-05, "loss": 1.862, "step": 2415 }, { "epoch": 0.7333434512065564, "grad_norm": 0.35875940322875977, "learning_rate": 8.538274605103282e-05, "loss": 1.9953, "step": 2416 }, { "epoch": 0.7336469874032479, "grad_norm": 0.39455875754356384, "learning_rate": 8.537667071688943e-05, "loss": 1.4772, "step": 2417 }, { "epoch": 0.7339505235999393, "grad_norm": 0.4024868905544281, "learning_rate": 8.537059538274605e-05, "loss": 1.9192, "step": 2418 }, { "epoch": 0.7342540597966307, "grad_norm": 0.43624451756477356, "learning_rate": 8.536452004860268e-05, "loss": 1.7586, "step": 2419 }, { "epoch": 0.7345575959933222, "grad_norm": 0.4356803596019745, "learning_rate": 8.53584447144593e-05, "loss": 1.3589, "step": 2420 }, { "epoch": 0.7348611321900137, "grad_norm": 0.3844490051269531, "learning_rate": 8.535236938031592e-05, "loss": 1.7586, "step": 2421 }, { "epoch": 0.7351646683867051, "grad_norm": 0.36453956365585327, "learning_rate": 8.534629404617255e-05, "loss": 1.9643, "step": 2422 }, { "epoch": 0.7354682045833966, "grad_norm": 0.42973411083221436, "learning_rate": 8.534021871202917e-05, "loss": 2.0022, "step": 2423 }, { "epoch": 0.735771740780088, "grad_norm": 0.4491013288497925, "learning_rate": 8.533414337788578e-05, "loss": 1.9658, "step": 2424 }, { "epoch": 0.7360752769767794, "grad_norm": 0.3806130886077881, "learning_rate": 8.532806804374241e-05, "loss": 1.7092, "step": 2425 }, { "epoch": 0.736378813173471, "grad_norm": 0.37530237436294556, "learning_rate": 8.532199270959903e-05, "loss": 2.1067, "step": 2426 }, { "epoch": 0.7366823493701624, "grad_norm": 0.36951944231987, "learning_rate": 8.531591737545566e-05, "loss": 2.1429, "step": 2427 }, { "epoch": 0.7369858855668538, "grad_norm": 0.8292801380157471, "learning_rate": 8.530984204131227e-05, "loss": 1.9387, "step": 2428 }, { "epoch": 0.7372894217635453, "grad_norm": 0.3690939247608185, "learning_rate": 8.53037667071689e-05, "loss": 1.96, "step": 2429 }, { "epoch": 0.7375929579602367, "grad_norm": 0.3507663905620575, "learning_rate": 8.529769137302553e-05, "loss": 1.8147, "step": 2430 }, { "epoch": 0.7378964941569283, "grad_norm": 0.4241466820240021, "learning_rate": 8.529161603888215e-05, "loss": 1.1116, "step": 2431 }, { "epoch": 0.7382000303536197, "grad_norm": 0.40038058161735535, "learning_rate": 8.528554070473876e-05, "loss": 1.9528, "step": 2432 }, { "epoch": 0.7385035665503111, "grad_norm": 0.41025862097740173, "learning_rate": 8.527946537059539e-05, "loss": 1.8288, "step": 2433 }, { "epoch": 0.7388071027470026, "grad_norm": 0.43207821249961853, "learning_rate": 8.527339003645201e-05, "loss": 1.986, "step": 2434 }, { "epoch": 0.739110638943694, "grad_norm": 0.4291042983531952, "learning_rate": 8.526731470230863e-05, "loss": 2.0695, "step": 2435 }, { "epoch": 0.7394141751403855, "grad_norm": 0.39197900891304016, "learning_rate": 8.526123936816526e-05, "loss": 1.5059, "step": 2436 }, { "epoch": 0.739717711337077, "grad_norm": 0.5944773554801941, "learning_rate": 8.525516403402188e-05, "loss": 1.6354, "step": 2437 }, { "epoch": 0.7400212475337684, "grad_norm": 0.42565345764160156, "learning_rate": 8.52490886998785e-05, "loss": 1.5772, "step": 2438 }, { "epoch": 0.7403247837304598, "grad_norm": 0.4184707999229431, "learning_rate": 8.524301336573512e-05, "loss": 1.8781, "step": 2439 }, { "epoch": 0.7406283199271513, "grad_norm": 0.36030882596969604, "learning_rate": 8.523693803159174e-05, "loss": 1.8788, "step": 2440 }, { "epoch": 0.7409318561238427, "grad_norm": 0.4323141872882843, "learning_rate": 8.523086269744837e-05, "loss": 2.0321, "step": 2441 }, { "epoch": 0.7412353923205343, "grad_norm": 0.4332966208457947, "learning_rate": 8.522478736330498e-05, "loss": 1.7093, "step": 2442 }, { "epoch": 0.7415389285172257, "grad_norm": 0.4085337221622467, "learning_rate": 8.521871202916161e-05, "loss": 1.7086, "step": 2443 }, { "epoch": 0.7418424647139171, "grad_norm": 0.4357088506221771, "learning_rate": 8.521263669501824e-05, "loss": 1.5345, "step": 2444 }, { "epoch": 0.7421460009106086, "grad_norm": 0.40508776903152466, "learning_rate": 8.520656136087484e-05, "loss": 2.0369, "step": 2445 }, { "epoch": 0.7424495371073, "grad_norm": 0.36506882309913635, "learning_rate": 8.520048602673147e-05, "loss": 1.4258, "step": 2446 }, { "epoch": 0.7427530733039915, "grad_norm": 0.3771931827068329, "learning_rate": 8.51944106925881e-05, "loss": 1.1239, "step": 2447 }, { "epoch": 0.743056609500683, "grad_norm": 0.4152052700519562, "learning_rate": 8.518833535844472e-05, "loss": 2.093, "step": 2448 }, { "epoch": 0.7433601456973744, "grad_norm": 0.4168509244918823, "learning_rate": 8.518226002430134e-05, "loss": 1.9488, "step": 2449 }, { "epoch": 0.7436636818940658, "grad_norm": 0.44399914145469666, "learning_rate": 8.517618469015797e-05, "loss": 1.8483, "step": 2450 }, { "epoch": 0.7439672180907573, "grad_norm": 0.3898546099662781, "learning_rate": 8.517010935601459e-05, "loss": 1.7384, "step": 2451 }, { "epoch": 0.7442707542874488, "grad_norm": 0.3657229542732239, "learning_rate": 8.51640340218712e-05, "loss": 1.9651, "step": 2452 }, { "epoch": 0.7445742904841403, "grad_norm": 0.5163128972053528, "learning_rate": 8.515795868772783e-05, "loss": 1.679, "step": 2453 }, { "epoch": 0.7448778266808317, "grad_norm": 0.8351776599884033, "learning_rate": 8.515188335358445e-05, "loss": 1.4447, "step": 2454 }, { "epoch": 0.7451813628775231, "grad_norm": 0.4343299865722656, "learning_rate": 8.514580801944108e-05, "loss": 1.8703, "step": 2455 }, { "epoch": 0.7454848990742146, "grad_norm": 0.3905276954174042, "learning_rate": 8.513973268529769e-05, "loss": 1.8941, "step": 2456 }, { "epoch": 0.7457884352709061, "grad_norm": 0.475789338350296, "learning_rate": 8.513365735115432e-05, "loss": 1.629, "step": 2457 }, { "epoch": 0.7460919714675975, "grad_norm": 0.3969273567199707, "learning_rate": 8.512758201701095e-05, "loss": 1.8827, "step": 2458 }, { "epoch": 0.746395507664289, "grad_norm": 0.4705328643321991, "learning_rate": 8.512150668286755e-05, "loss": 1.4177, "step": 2459 }, { "epoch": 0.7466990438609804, "grad_norm": 0.4193515181541443, "learning_rate": 8.511543134872418e-05, "loss": 2.0139, "step": 2460 }, { "epoch": 0.7470025800576718, "grad_norm": 0.38317739963531494, "learning_rate": 8.510935601458081e-05, "loss": 2.119, "step": 2461 }, { "epoch": 0.7473061162543634, "grad_norm": 0.39867040514945984, "learning_rate": 8.510328068043743e-05, "loss": 1.9218, "step": 2462 }, { "epoch": 0.7476096524510548, "grad_norm": 0.5308038592338562, "learning_rate": 8.509720534629405e-05, "loss": 1.5898, "step": 2463 }, { "epoch": 0.7479131886477463, "grad_norm": 0.45667675137519836, "learning_rate": 8.509113001215068e-05, "loss": 1.7705, "step": 2464 }, { "epoch": 0.7482167248444377, "grad_norm": 1.480726718902588, "learning_rate": 8.50850546780073e-05, "loss": 2.1438, "step": 2465 }, { "epoch": 0.7485202610411291, "grad_norm": 0.46620655059814453, "learning_rate": 8.507897934386391e-05, "loss": 1.8973, "step": 2466 }, { "epoch": 0.7488237972378207, "grad_norm": 0.34710168838500977, "learning_rate": 8.507290400972053e-05, "loss": 1.3338, "step": 2467 }, { "epoch": 0.7491273334345121, "grad_norm": 0.43097588419914246, "learning_rate": 8.506682867557716e-05, "loss": 1.7958, "step": 2468 }, { "epoch": 0.7494308696312035, "grad_norm": 0.3998434245586395, "learning_rate": 8.506075334143378e-05, "loss": 1.7615, "step": 2469 }, { "epoch": 0.749734405827895, "grad_norm": 0.39192789793014526, "learning_rate": 8.50546780072904e-05, "loss": 1.7179, "step": 2470 }, { "epoch": 0.7500379420245864, "grad_norm": 0.4148361086845398, "learning_rate": 8.504860267314703e-05, "loss": 1.6477, "step": 2471 }, { "epoch": 0.7503414782212778, "grad_norm": 0.5068510174751282, "learning_rate": 8.504252733900366e-05, "loss": 2.045, "step": 2472 }, { "epoch": 0.7506450144179694, "grad_norm": 0.4798752963542938, "learning_rate": 8.503645200486026e-05, "loss": 1.7506, "step": 2473 }, { "epoch": 0.7509485506146608, "grad_norm": 0.4444788992404938, "learning_rate": 8.503037667071689e-05, "loss": 1.894, "step": 2474 }, { "epoch": 0.7512520868113522, "grad_norm": 0.39380598068237305, "learning_rate": 8.502430133657352e-05, "loss": 2.0365, "step": 2475 }, { "epoch": 0.7515556230080437, "grad_norm": 0.38357478380203247, "learning_rate": 8.501822600243014e-05, "loss": 1.8282, "step": 2476 }, { "epoch": 0.7518591592047351, "grad_norm": 0.47529253363609314, "learning_rate": 8.501215066828676e-05, "loss": 1.7783, "step": 2477 }, { "epoch": 0.7521626954014267, "grad_norm": 0.3402441740036011, "learning_rate": 8.500607533414339e-05, "loss": 1.8927, "step": 2478 }, { "epoch": 0.7524662315981181, "grad_norm": 0.36784660816192627, "learning_rate": 8.5e-05, "loss": 1.4411, "step": 2479 }, { "epoch": 0.7527697677948095, "grad_norm": 0.42149028182029724, "learning_rate": 8.499392466585662e-05, "loss": 2.0137, "step": 2480 }, { "epoch": 0.753073303991501, "grad_norm": 0.40183788537979126, "learning_rate": 8.498784933171324e-05, "loss": 1.6207, "step": 2481 }, { "epoch": 0.7533768401881924, "grad_norm": 0.45237985253334045, "learning_rate": 8.498177399756987e-05, "loss": 2.2596, "step": 2482 }, { "epoch": 0.7536803763848839, "grad_norm": 0.4847509562969208, "learning_rate": 8.497569866342649e-05, "loss": 1.6941, "step": 2483 }, { "epoch": 0.7539839125815754, "grad_norm": 0.4311809837818146, "learning_rate": 8.49696233292831e-05, "loss": 2.0248, "step": 2484 }, { "epoch": 0.7542874487782668, "grad_norm": 0.6543784141540527, "learning_rate": 8.496354799513974e-05, "loss": 1.9065, "step": 2485 }, { "epoch": 0.7545909849749582, "grad_norm": 0.3486241102218628, "learning_rate": 8.495747266099637e-05, "loss": 1.5518, "step": 2486 }, { "epoch": 0.7548945211716497, "grad_norm": 0.44317248463630676, "learning_rate": 8.495139732685297e-05, "loss": 1.9064, "step": 2487 }, { "epoch": 0.7551980573683412, "grad_norm": 0.44157078862190247, "learning_rate": 8.49453219927096e-05, "loss": 1.7866, "step": 2488 }, { "epoch": 0.7555015935650327, "grad_norm": 0.4338137209415436, "learning_rate": 8.493924665856623e-05, "loss": 1.9897, "step": 2489 }, { "epoch": 0.7558051297617241, "grad_norm": 0.45171141624450684, "learning_rate": 8.493317132442285e-05, "loss": 1.9706, "step": 2490 }, { "epoch": 0.7561086659584155, "grad_norm": 0.9964777231216431, "learning_rate": 8.492709599027947e-05, "loss": 2.1163, "step": 2491 }, { "epoch": 0.756412202155107, "grad_norm": 0.39545395970344543, "learning_rate": 8.49210206561361e-05, "loss": 1.6514, "step": 2492 }, { "epoch": 0.7567157383517985, "grad_norm": 0.4575003683567047, "learning_rate": 8.491494532199272e-05, "loss": 1.9118, "step": 2493 }, { "epoch": 0.7570192745484899, "grad_norm": 0.4249429702758789, "learning_rate": 8.490886998784933e-05, "loss": 1.9342, "step": 2494 }, { "epoch": 0.7573228107451814, "grad_norm": 0.4887460768222809, "learning_rate": 8.490279465370595e-05, "loss": 1.9421, "step": 2495 }, { "epoch": 0.7576263469418728, "grad_norm": 0.41777387261390686, "learning_rate": 8.489671931956258e-05, "loss": 1.5985, "step": 2496 }, { "epoch": 0.7579298831385642, "grad_norm": 1.7083243131637573, "learning_rate": 8.48906439854192e-05, "loss": 1.8387, "step": 2497 }, { "epoch": 0.7582334193352557, "grad_norm": 0.39955195784568787, "learning_rate": 8.488456865127582e-05, "loss": 2.0833, "step": 2498 }, { "epoch": 0.7585369555319472, "grad_norm": 1.4131972789764404, "learning_rate": 8.487849331713245e-05, "loss": 1.5318, "step": 2499 }, { "epoch": 0.7588404917286387, "grad_norm": 0.7458001375198364, "learning_rate": 8.487241798298908e-05, "loss": 1.9582, "step": 2500 }, { "epoch": 0.7591440279253301, "grad_norm": 0.4677983820438385, "learning_rate": 8.486634264884568e-05, "loss": 1.8781, "step": 2501 }, { "epoch": 0.7594475641220215, "grad_norm": 0.4976421594619751, "learning_rate": 8.486026731470231e-05, "loss": 2.1458, "step": 2502 }, { "epoch": 0.759751100318713, "grad_norm": 0.3829711675643921, "learning_rate": 8.485419198055894e-05, "loss": 1.9114, "step": 2503 }, { "epoch": 0.7600546365154045, "grad_norm": 0.5295559167861938, "learning_rate": 8.484811664641556e-05, "loss": 2.0679, "step": 2504 }, { "epoch": 0.7603581727120959, "grad_norm": 0.7929876446723938, "learning_rate": 8.484204131227218e-05, "loss": 2.2658, "step": 2505 }, { "epoch": 0.7606617089087874, "grad_norm": 0.35055285692214966, "learning_rate": 8.483596597812881e-05, "loss": 1.9443, "step": 2506 }, { "epoch": 0.7609652451054788, "grad_norm": 0.39741021394729614, "learning_rate": 8.482989064398543e-05, "loss": 1.6983, "step": 2507 }, { "epoch": 0.7612687813021702, "grad_norm": 0.4206577241420746, "learning_rate": 8.482381530984204e-05, "loss": 1.8886, "step": 2508 }, { "epoch": 0.7615723174988618, "grad_norm": 0.7343670129776001, "learning_rate": 8.481773997569866e-05, "loss": 1.9581, "step": 2509 }, { "epoch": 0.7618758536955532, "grad_norm": 0.3836110532283783, "learning_rate": 8.481166464155529e-05, "loss": 1.8658, "step": 2510 }, { "epoch": 0.7621793898922447, "grad_norm": 0.44783517718315125, "learning_rate": 8.480558930741191e-05, "loss": 1.8772, "step": 2511 }, { "epoch": 0.7624829260889361, "grad_norm": 0.44204702973365784, "learning_rate": 8.479951397326853e-05, "loss": 1.8684, "step": 2512 }, { "epoch": 0.7627864622856275, "grad_norm": 0.45162737369537354, "learning_rate": 8.479343863912516e-05, "loss": 1.817, "step": 2513 }, { "epoch": 0.7630899984823191, "grad_norm": 0.36719024181365967, "learning_rate": 8.478736330498179e-05, "loss": 2.0087, "step": 2514 }, { "epoch": 0.7633935346790105, "grad_norm": 0.3979268968105316, "learning_rate": 8.478128797083839e-05, "loss": 2.091, "step": 2515 }, { "epoch": 0.7636970708757019, "grad_norm": 0.45267635583877563, "learning_rate": 8.477521263669502e-05, "loss": 1.2638, "step": 2516 }, { "epoch": 0.7640006070723934, "grad_norm": 0.43147364258766174, "learning_rate": 8.476913730255165e-05, "loss": 2.1202, "step": 2517 }, { "epoch": 0.7643041432690848, "grad_norm": 0.46071958541870117, "learning_rate": 8.476306196840826e-05, "loss": 1.9875, "step": 2518 }, { "epoch": 0.7646076794657763, "grad_norm": 0.3662787973880768, "learning_rate": 8.475698663426489e-05, "loss": 2.0632, "step": 2519 }, { "epoch": 0.7649112156624678, "grad_norm": 1.479319453239441, "learning_rate": 8.475091130012152e-05, "loss": 2.1759, "step": 2520 }, { "epoch": 0.7652147518591592, "grad_norm": 0.38541749119758606, "learning_rate": 8.474483596597814e-05, "loss": 1.6204, "step": 2521 }, { "epoch": 0.7655182880558506, "grad_norm": 0.39057788252830505, "learning_rate": 8.473876063183475e-05, "loss": 1.5414, "step": 2522 }, { "epoch": 0.7658218242525421, "grad_norm": 0.37674304842948914, "learning_rate": 8.473268529769137e-05, "loss": 1.8496, "step": 2523 }, { "epoch": 0.7661253604492335, "grad_norm": 0.4432341158390045, "learning_rate": 8.4726609963548e-05, "loss": 1.8541, "step": 2524 }, { "epoch": 0.7664288966459251, "grad_norm": 0.3844713866710663, "learning_rate": 8.472053462940462e-05, "loss": 1.9793, "step": 2525 }, { "epoch": 0.7667324328426165, "grad_norm": 0.37515100836753845, "learning_rate": 8.471445929526124e-05, "loss": 1.9751, "step": 2526 }, { "epoch": 0.7670359690393079, "grad_norm": 0.36097854375839233, "learning_rate": 8.470838396111787e-05, "loss": 1.2523, "step": 2527 }, { "epoch": 0.7673395052359994, "grad_norm": 0.3747158646583557, "learning_rate": 8.47023086269745e-05, "loss": 1.3921, "step": 2528 }, { "epoch": 0.7676430414326908, "grad_norm": 0.4365270435810089, "learning_rate": 8.46962332928311e-05, "loss": 2.1177, "step": 2529 }, { "epoch": 0.7679465776293823, "grad_norm": 0.39251551032066345, "learning_rate": 8.469015795868773e-05, "loss": 1.2146, "step": 2530 }, { "epoch": 0.7682501138260738, "grad_norm": 0.4519220292568207, "learning_rate": 8.468408262454436e-05, "loss": 1.7527, "step": 2531 }, { "epoch": 0.7685536500227652, "grad_norm": 0.436301589012146, "learning_rate": 8.467800729040097e-05, "loss": 1.8437, "step": 2532 }, { "epoch": 0.7688571862194566, "grad_norm": 0.4134485125541687, "learning_rate": 8.46719319562576e-05, "loss": 1.9497, "step": 2533 }, { "epoch": 0.7691607224161481, "grad_norm": 1.2619364261627197, "learning_rate": 8.466585662211423e-05, "loss": 2.0033, "step": 2534 }, { "epoch": 0.7694642586128396, "grad_norm": 0.3872610926628113, "learning_rate": 8.465978128797085e-05, "loss": 1.5046, "step": 2535 }, { "epoch": 0.7697677948095311, "grad_norm": 0.4039923846721649, "learning_rate": 8.465370595382746e-05, "loss": 1.8782, "step": 2536 }, { "epoch": 0.7700713310062225, "grad_norm": 0.3826749622821808, "learning_rate": 8.464763061968408e-05, "loss": 1.9288, "step": 2537 }, { "epoch": 0.7703748672029139, "grad_norm": 0.4400002062320709, "learning_rate": 8.464155528554071e-05, "loss": 2.2257, "step": 2538 }, { "epoch": 0.7706784033996054, "grad_norm": 0.6801483631134033, "learning_rate": 8.463547995139733e-05, "loss": 1.5758, "step": 2539 }, { "epoch": 0.7709819395962969, "grad_norm": 0.4618719816207886, "learning_rate": 8.462940461725395e-05, "loss": 1.5398, "step": 2540 }, { "epoch": 0.7712854757929883, "grad_norm": 0.3891013562679291, "learning_rate": 8.462332928311058e-05, "loss": 1.7876, "step": 2541 }, { "epoch": 0.7715890119896798, "grad_norm": 0.36486607789993286, "learning_rate": 8.46172539489672e-05, "loss": 1.4358, "step": 2542 }, { "epoch": 0.7718925481863712, "grad_norm": 0.4325253665447235, "learning_rate": 8.461117861482381e-05, "loss": 1.8423, "step": 2543 }, { "epoch": 0.7721960843830626, "grad_norm": 0.3679717481136322, "learning_rate": 8.460510328068044e-05, "loss": 1.2354, "step": 2544 }, { "epoch": 0.7724996205797542, "grad_norm": 0.3954870402812958, "learning_rate": 8.459902794653707e-05, "loss": 1.915, "step": 2545 }, { "epoch": 0.7728031567764456, "grad_norm": 0.3950810432434082, "learning_rate": 8.459295261239368e-05, "loss": 1.9933, "step": 2546 }, { "epoch": 0.773106692973137, "grad_norm": 0.5841623544692993, "learning_rate": 8.458687727825031e-05, "loss": 1.5726, "step": 2547 }, { "epoch": 0.7734102291698285, "grad_norm": 0.39942488074302673, "learning_rate": 8.458080194410694e-05, "loss": 1.8425, "step": 2548 }, { "epoch": 0.7737137653665199, "grad_norm": 0.4704907238483429, "learning_rate": 8.457472660996356e-05, "loss": 2.2747, "step": 2549 }, { "epoch": 0.7740173015632115, "grad_norm": 0.46205440163612366, "learning_rate": 8.456865127582017e-05, "loss": 1.8099, "step": 2550 }, { "epoch": 0.7743208377599029, "grad_norm": 0.4484197497367859, "learning_rate": 8.456257594167679e-05, "loss": 1.3406, "step": 2551 }, { "epoch": 0.7746243739565943, "grad_norm": 0.412507027387619, "learning_rate": 8.455650060753342e-05, "loss": 2.0005, "step": 2552 }, { "epoch": 0.7749279101532858, "grad_norm": 0.4087753891944885, "learning_rate": 8.455042527339004e-05, "loss": 2.0239, "step": 2553 }, { "epoch": 0.7752314463499772, "grad_norm": 0.5045974850654602, "learning_rate": 8.454434993924666e-05, "loss": 1.6854, "step": 2554 }, { "epoch": 0.7755349825466686, "grad_norm": 0.40595903992652893, "learning_rate": 8.453827460510329e-05, "loss": 1.8671, "step": 2555 }, { "epoch": 0.7758385187433602, "grad_norm": 0.3777897357940674, "learning_rate": 8.45321992709599e-05, "loss": 1.5473, "step": 2556 }, { "epoch": 0.7761420549400516, "grad_norm": 0.8116908073425293, "learning_rate": 8.452612393681652e-05, "loss": 2.0338, "step": 2557 }, { "epoch": 0.776445591136743, "grad_norm": 0.4156283736228943, "learning_rate": 8.452004860267315e-05, "loss": 1.9501, "step": 2558 }, { "epoch": 0.7767491273334345, "grad_norm": 0.3934132158756256, "learning_rate": 8.451397326852978e-05, "loss": 1.8579, "step": 2559 }, { "epoch": 0.7770526635301259, "grad_norm": 0.39838075637817383, "learning_rate": 8.450789793438639e-05, "loss": 1.5156, "step": 2560 }, { "epoch": 0.7773561997268175, "grad_norm": 0.38777777552604675, "learning_rate": 8.450182260024302e-05, "loss": 1.7479, "step": 2561 }, { "epoch": 0.7776597359235089, "grad_norm": 0.43510836362838745, "learning_rate": 8.449574726609964e-05, "loss": 1.6293, "step": 2562 }, { "epoch": 0.7779632721202003, "grad_norm": 0.42916715145111084, "learning_rate": 8.448967193195627e-05, "loss": 1.8737, "step": 2563 }, { "epoch": 0.7782668083168918, "grad_norm": 0.4157852232456207, "learning_rate": 8.448359659781288e-05, "loss": 1.5549, "step": 2564 }, { "epoch": 0.7785703445135832, "grad_norm": 0.39083394408226013, "learning_rate": 8.44775212636695e-05, "loss": 1.7937, "step": 2565 }, { "epoch": 0.7788738807102747, "grad_norm": 0.4398769438266754, "learning_rate": 8.447144592952613e-05, "loss": 1.8285, "step": 2566 }, { "epoch": 0.7791774169069662, "grad_norm": 0.4085114598274231, "learning_rate": 8.446537059538275e-05, "loss": 1.835, "step": 2567 }, { "epoch": 0.7794809531036576, "grad_norm": 0.44986245036125183, "learning_rate": 8.445929526123937e-05, "loss": 1.6762, "step": 2568 }, { "epoch": 0.779784489300349, "grad_norm": 0.3613260090351105, "learning_rate": 8.4453219927096e-05, "loss": 1.5119, "step": 2569 }, { "epoch": 0.7800880254970405, "grad_norm": 1.3417142629623413, "learning_rate": 8.444714459295261e-05, "loss": 1.7255, "step": 2570 }, { "epoch": 0.780391561693732, "grad_norm": 0.3687633275985718, "learning_rate": 8.444106925880923e-05, "loss": 1.846, "step": 2571 }, { "epoch": 0.7806950978904235, "grad_norm": 0.5285593867301941, "learning_rate": 8.443499392466586e-05, "loss": 1.8364, "step": 2572 }, { "epoch": 0.7809986340871149, "grad_norm": 0.6644722819328308, "learning_rate": 8.44289185905225e-05, "loss": 2.0563, "step": 2573 }, { "epoch": 0.7813021702838063, "grad_norm": 0.4382190704345703, "learning_rate": 8.44228432563791e-05, "loss": 1.8737, "step": 2574 }, { "epoch": 0.7816057064804978, "grad_norm": 0.43860745429992676, "learning_rate": 8.441676792223573e-05, "loss": 1.7924, "step": 2575 }, { "epoch": 0.7819092426771893, "grad_norm": 0.39125654101371765, "learning_rate": 8.441069258809235e-05, "loss": 1.159, "step": 2576 }, { "epoch": 0.7822127788738807, "grad_norm": 0.4389365017414093, "learning_rate": 8.440461725394898e-05, "loss": 1.9478, "step": 2577 }, { "epoch": 0.7825163150705722, "grad_norm": 0.46001267433166504, "learning_rate": 8.43985419198056e-05, "loss": 1.3252, "step": 2578 }, { "epoch": 0.7828198512672636, "grad_norm": 0.36286357045173645, "learning_rate": 8.439246658566221e-05, "loss": 1.9519, "step": 2579 }, { "epoch": 0.783123387463955, "grad_norm": 0.6641744375228882, "learning_rate": 8.438639125151884e-05, "loss": 1.9772, "step": 2580 }, { "epoch": 0.7834269236606465, "grad_norm": 0.43126794695854187, "learning_rate": 8.438031591737546e-05, "loss": 1.7583, "step": 2581 }, { "epoch": 0.783730459857338, "grad_norm": 0.4094223380088806, "learning_rate": 8.437424058323208e-05, "loss": 1.7251, "step": 2582 }, { "epoch": 0.7840339960540295, "grad_norm": 0.34994634985923767, "learning_rate": 8.436816524908871e-05, "loss": 1.8317, "step": 2583 }, { "epoch": 0.7843375322507209, "grad_norm": 0.3431306481361389, "learning_rate": 8.436208991494532e-05, "loss": 1.9223, "step": 2584 }, { "epoch": 0.7846410684474123, "grad_norm": 0.43418046832084656, "learning_rate": 8.435601458080194e-05, "loss": 2.0232, "step": 2585 }, { "epoch": 0.7849446046441038, "grad_norm": 0.3941698670387268, "learning_rate": 8.434993924665857e-05, "loss": 2.0736, "step": 2586 }, { "epoch": 0.7852481408407953, "grad_norm": 0.9890521764755249, "learning_rate": 8.43438639125152e-05, "loss": 1.6363, "step": 2587 }, { "epoch": 0.7855516770374867, "grad_norm": 0.3400576114654541, "learning_rate": 8.433778857837181e-05, "loss": 1.8934, "step": 2588 }, { "epoch": 0.7858552132341782, "grad_norm": 0.4412512183189392, "learning_rate": 8.433171324422844e-05, "loss": 1.6729, "step": 2589 }, { "epoch": 0.7861587494308696, "grad_norm": 0.7804542183876038, "learning_rate": 8.432563791008506e-05, "loss": 2.0693, "step": 2590 }, { "epoch": 0.786462285627561, "grad_norm": 0.48216378688812256, "learning_rate": 8.431956257594167e-05, "loss": 1.622, "step": 2591 }, { "epoch": 0.7867658218242526, "grad_norm": 0.4186027944087982, "learning_rate": 8.43134872417983e-05, "loss": 2.0402, "step": 2592 }, { "epoch": 0.787069358020944, "grad_norm": 0.35439014434814453, "learning_rate": 8.430741190765492e-05, "loss": 2.0655, "step": 2593 }, { "epoch": 0.7873728942176355, "grad_norm": 0.4577588140964508, "learning_rate": 8.430133657351155e-05, "loss": 1.203, "step": 2594 }, { "epoch": 0.7876764304143269, "grad_norm": 0.42012783885002136, "learning_rate": 8.429526123936817e-05, "loss": 1.8616, "step": 2595 }, { "epoch": 0.7879799666110183, "grad_norm": 0.3673686683177948, "learning_rate": 8.428918590522479e-05, "loss": 2.0879, "step": 2596 }, { "epoch": 0.7882835028077099, "grad_norm": 0.4281460642814636, "learning_rate": 8.428311057108142e-05, "loss": 1.8722, "step": 2597 }, { "epoch": 0.7885870390044013, "grad_norm": 0.432608962059021, "learning_rate": 8.427703523693803e-05, "loss": 1.7152, "step": 2598 }, { "epoch": 0.7888905752010927, "grad_norm": 0.3567800521850586, "learning_rate": 8.427095990279465e-05, "loss": 1.534, "step": 2599 }, { "epoch": 0.7891941113977842, "grad_norm": 0.383045494556427, "learning_rate": 8.426488456865128e-05, "loss": 1.7063, "step": 2600 }, { "epoch": 0.7894976475944756, "grad_norm": 0.3540615141391754, "learning_rate": 8.425880923450791e-05, "loss": 1.6996, "step": 2601 }, { "epoch": 0.7898011837911671, "grad_norm": 0.44690582156181335, "learning_rate": 8.425273390036452e-05, "loss": 1.9774, "step": 2602 }, { "epoch": 0.7901047199878586, "grad_norm": 0.49040475487709045, "learning_rate": 8.424665856622115e-05, "loss": 2.1205, "step": 2603 }, { "epoch": 0.79040825618455, "grad_norm": 0.4510320723056793, "learning_rate": 8.424058323207777e-05, "loss": 2.0409, "step": 2604 }, { "epoch": 0.7907117923812415, "grad_norm": 0.41996338963508606, "learning_rate": 8.423450789793438e-05, "loss": 1.7738, "step": 2605 }, { "epoch": 0.7910153285779329, "grad_norm": 0.4076237976551056, "learning_rate": 8.422843256379101e-05, "loss": 2.1186, "step": 2606 }, { "epoch": 0.7913188647746243, "grad_norm": 0.4231566786766052, "learning_rate": 8.422235722964763e-05, "loss": 1.8912, "step": 2607 }, { "epoch": 0.7916224009713159, "grad_norm": 0.6864861249923706, "learning_rate": 8.421628189550426e-05, "loss": 1.9234, "step": 2608 }, { "epoch": 0.7919259371680073, "grad_norm": 0.3856845796108246, "learning_rate": 8.421020656136088e-05, "loss": 2.0214, "step": 2609 }, { "epoch": 0.7922294733646987, "grad_norm": 0.39030376076698303, "learning_rate": 8.42041312272175e-05, "loss": 1.9272, "step": 2610 }, { "epoch": 0.7925330095613902, "grad_norm": 0.407306432723999, "learning_rate": 8.419805589307413e-05, "loss": 1.9412, "step": 2611 }, { "epoch": 0.7928365457580816, "grad_norm": 0.41559773683547974, "learning_rate": 8.419198055893074e-05, "loss": 1.7524, "step": 2612 }, { "epoch": 0.7931400819547731, "grad_norm": 0.382524311542511, "learning_rate": 8.418590522478736e-05, "loss": 2.0343, "step": 2613 }, { "epoch": 0.7934436181514646, "grad_norm": 0.42608487606048584, "learning_rate": 8.417982989064399e-05, "loss": 1.9188, "step": 2614 }, { "epoch": 0.793747154348156, "grad_norm": 0.3967500329017639, "learning_rate": 8.417375455650061e-05, "loss": 1.8737, "step": 2615 }, { "epoch": 0.7940506905448474, "grad_norm": 0.493898868560791, "learning_rate": 8.416767922235723e-05, "loss": 1.8199, "step": 2616 }, { "epoch": 0.7943542267415389, "grad_norm": 0.5007172226905823, "learning_rate": 8.416160388821386e-05, "loss": 2.0033, "step": 2617 }, { "epoch": 0.7946577629382304, "grad_norm": 0.4337385594844818, "learning_rate": 8.415552855407048e-05, "loss": 2.1693, "step": 2618 }, { "epoch": 0.7949612991349219, "grad_norm": 0.40338581800460815, "learning_rate": 8.414945321992709e-05, "loss": 1.9985, "step": 2619 }, { "epoch": 0.7952648353316133, "grad_norm": 0.3842269778251648, "learning_rate": 8.414337788578372e-05, "loss": 1.5293, "step": 2620 }, { "epoch": 0.7955683715283047, "grad_norm": 0.35648632049560547, "learning_rate": 8.413730255164034e-05, "loss": 1.9728, "step": 2621 }, { "epoch": 0.7958719077249962, "grad_norm": 0.4350222945213318, "learning_rate": 8.413122721749697e-05, "loss": 1.3873, "step": 2622 }, { "epoch": 0.7961754439216877, "grad_norm": 0.605980634689331, "learning_rate": 8.412515188335359e-05, "loss": 2.052, "step": 2623 }, { "epoch": 0.7964789801183791, "grad_norm": 0.6555821299552917, "learning_rate": 8.41190765492102e-05, "loss": 2.146, "step": 2624 }, { "epoch": 0.7967825163150706, "grad_norm": 0.42681270837783813, "learning_rate": 8.411300121506684e-05, "loss": 1.0012, "step": 2625 }, { "epoch": 0.797086052511762, "grad_norm": 0.43222132325172424, "learning_rate": 8.410692588092345e-05, "loss": 2.1722, "step": 2626 }, { "epoch": 0.7973895887084534, "grad_norm": 0.40917056798934937, "learning_rate": 8.410085054678007e-05, "loss": 2.1489, "step": 2627 }, { "epoch": 0.797693124905145, "grad_norm": 0.4139658212661743, "learning_rate": 8.40947752126367e-05, "loss": 2.0852, "step": 2628 }, { "epoch": 0.7979966611018364, "grad_norm": 1.7534079551696777, "learning_rate": 8.408869987849332e-05, "loss": 1.7122, "step": 2629 }, { "epoch": 0.7983001972985279, "grad_norm": 0.37330594658851624, "learning_rate": 8.408262454434994e-05, "loss": 1.6402, "step": 2630 }, { "epoch": 0.7986037334952193, "grad_norm": 0.7637292742729187, "learning_rate": 8.407654921020657e-05, "loss": 1.6067, "step": 2631 }, { "epoch": 0.7989072696919107, "grad_norm": 0.48156240582466125, "learning_rate": 8.407047387606319e-05, "loss": 1.8114, "step": 2632 }, { "epoch": 0.7992108058886022, "grad_norm": 0.3753802180290222, "learning_rate": 8.40643985419198e-05, "loss": 1.6239, "step": 2633 }, { "epoch": 0.7995143420852937, "grad_norm": 0.4283507764339447, "learning_rate": 8.405832320777643e-05, "loss": 2.0778, "step": 2634 }, { "epoch": 0.7998178782819851, "grad_norm": 0.3911525309085846, "learning_rate": 8.405224787363305e-05, "loss": 1.9279, "step": 2635 }, { "epoch": 0.8001214144786766, "grad_norm": 0.4033350646495819, "learning_rate": 8.404617253948968e-05, "loss": 1.3199, "step": 2636 }, { "epoch": 0.800424950675368, "grad_norm": 0.398269921541214, "learning_rate": 8.40400972053463e-05, "loss": 2.0073, "step": 2637 }, { "epoch": 0.8007284868720594, "grad_norm": 0.5763013362884521, "learning_rate": 8.403402187120292e-05, "loss": 1.517, "step": 2638 }, { "epoch": 0.801032023068751, "grad_norm": 0.4008599817752838, "learning_rate": 8.402794653705955e-05, "loss": 1.6136, "step": 2639 }, { "epoch": 0.8013355592654424, "grad_norm": 0.39726853370666504, "learning_rate": 8.402187120291616e-05, "loss": 1.8454, "step": 2640 }, { "epoch": 0.8016390954621339, "grad_norm": 0.4768484830856323, "learning_rate": 8.401579586877278e-05, "loss": 1.4961, "step": 2641 }, { "epoch": 0.8019426316588253, "grad_norm": 0.4185827374458313, "learning_rate": 8.400972053462941e-05, "loss": 1.8617, "step": 2642 }, { "epoch": 0.8022461678555167, "grad_norm": 0.43734210729599, "learning_rate": 8.400364520048603e-05, "loss": 1.4771, "step": 2643 }, { "epoch": 0.8025497040522083, "grad_norm": 0.4270019233226776, "learning_rate": 8.399756986634265e-05, "loss": 1.8692, "step": 2644 }, { "epoch": 0.8028532402488997, "grad_norm": 0.35486480593681335, "learning_rate": 8.399149453219928e-05, "loss": 1.8821, "step": 2645 }, { "epoch": 0.8031567764455911, "grad_norm": 0.9134595394134521, "learning_rate": 8.39854191980559e-05, "loss": 1.8291, "step": 2646 }, { "epoch": 0.8034603126422826, "grad_norm": 0.43372470140457153, "learning_rate": 8.397934386391251e-05, "loss": 2.0605, "step": 2647 }, { "epoch": 0.803763848838974, "grad_norm": 0.39876699447631836, "learning_rate": 8.397326852976914e-05, "loss": 2.2645, "step": 2648 }, { "epoch": 0.8040673850356655, "grad_norm": 0.39235416054725647, "learning_rate": 8.396719319562576e-05, "loss": 1.9287, "step": 2649 }, { "epoch": 0.804370921232357, "grad_norm": 0.3264532685279846, "learning_rate": 8.396111786148239e-05, "loss": 1.856, "step": 2650 }, { "epoch": 0.8046744574290484, "grad_norm": 0.4189594089984894, "learning_rate": 8.395504252733901e-05, "loss": 2.0628, "step": 2651 }, { "epoch": 0.8049779936257399, "grad_norm": 0.3941250145435333, "learning_rate": 8.394896719319563e-05, "loss": 1.7251, "step": 2652 }, { "epoch": 0.8052815298224313, "grad_norm": 0.42837756872177124, "learning_rate": 8.394289185905226e-05, "loss": 1.2304, "step": 2653 }, { "epoch": 0.8055850660191228, "grad_norm": 0.8526172637939453, "learning_rate": 8.393681652490887e-05, "loss": 2.0248, "step": 2654 }, { "epoch": 0.8058886022158143, "grad_norm": 0.36125120520591736, "learning_rate": 8.393074119076549e-05, "loss": 1.522, "step": 2655 }, { "epoch": 0.8061921384125057, "grad_norm": 0.34955886006355286, "learning_rate": 8.392466585662212e-05, "loss": 1.5399, "step": 2656 }, { "epoch": 0.8064956746091971, "grad_norm": 0.42194581031799316, "learning_rate": 8.391859052247874e-05, "loss": 1.5088, "step": 2657 }, { "epoch": 0.8067992108058886, "grad_norm": 0.41130530834198, "learning_rate": 8.391251518833536e-05, "loss": 1.9864, "step": 2658 }, { "epoch": 0.8071027470025801, "grad_norm": 0.3659766614437103, "learning_rate": 8.390643985419199e-05, "loss": 1.3164, "step": 2659 }, { "epoch": 0.8074062831992715, "grad_norm": 0.4178526997566223, "learning_rate": 8.39003645200486e-05, "loss": 2.096, "step": 2660 }, { "epoch": 0.807709819395963, "grad_norm": 0.44985684752464294, "learning_rate": 8.389428918590522e-05, "loss": 1.5053, "step": 2661 }, { "epoch": 0.8080133555926544, "grad_norm": 0.5702995657920837, "learning_rate": 8.388821385176185e-05, "loss": 1.745, "step": 2662 }, { "epoch": 0.8083168917893458, "grad_norm": 0.5479261875152588, "learning_rate": 8.388213851761847e-05, "loss": 1.6453, "step": 2663 }, { "epoch": 0.8086204279860373, "grad_norm": 0.5145617723464966, "learning_rate": 8.387606318347509e-05, "loss": 1.3749, "step": 2664 }, { "epoch": 0.8089239641827288, "grad_norm": 0.3433884084224701, "learning_rate": 8.386998784933172e-05, "loss": 1.8217, "step": 2665 }, { "epoch": 0.8092275003794203, "grad_norm": 0.4309893548488617, "learning_rate": 8.386391251518834e-05, "loss": 1.8368, "step": 2666 }, { "epoch": 0.8095310365761117, "grad_norm": 0.42593100666999817, "learning_rate": 8.385783718104497e-05, "loss": 1.897, "step": 2667 }, { "epoch": 0.8098345727728031, "grad_norm": 0.3921912908554077, "learning_rate": 8.385176184690159e-05, "loss": 1.8504, "step": 2668 }, { "epoch": 0.8101381089694946, "grad_norm": 0.4481246769428253, "learning_rate": 8.38456865127582e-05, "loss": 2.2592, "step": 2669 }, { "epoch": 0.8104416451661861, "grad_norm": 0.3490237891674042, "learning_rate": 8.383961117861483e-05, "loss": 1.792, "step": 2670 }, { "epoch": 0.8107451813628775, "grad_norm": 0.4693361818790436, "learning_rate": 8.383353584447145e-05, "loss": 2.0438, "step": 2671 }, { "epoch": 0.811048717559569, "grad_norm": 0.3441024720668793, "learning_rate": 8.382746051032807e-05, "loss": 1.7983, "step": 2672 }, { "epoch": 0.8113522537562604, "grad_norm": 0.4398588240146637, "learning_rate": 8.38213851761847e-05, "loss": 1.9503, "step": 2673 }, { "epoch": 0.8116557899529518, "grad_norm": 0.36766424775123596, "learning_rate": 8.381530984204132e-05, "loss": 1.9745, "step": 2674 }, { "epoch": 0.8119593261496434, "grad_norm": 0.4529463052749634, "learning_rate": 8.380923450789793e-05, "loss": 1.9025, "step": 2675 }, { "epoch": 0.8122628623463348, "grad_norm": 0.4247633218765259, "learning_rate": 8.380315917375456e-05, "loss": 1.9669, "step": 2676 }, { "epoch": 0.8125663985430263, "grad_norm": 0.39208900928497314, "learning_rate": 8.379708383961118e-05, "loss": 1.8593, "step": 2677 }, { "epoch": 0.8128699347397177, "grad_norm": 0.46601489186286926, "learning_rate": 8.37910085054678e-05, "loss": 1.8102, "step": 2678 }, { "epoch": 0.8131734709364091, "grad_norm": 0.4215412139892578, "learning_rate": 8.378493317132443e-05, "loss": 2.1686, "step": 2679 }, { "epoch": 0.8134770071331007, "grad_norm": 0.4089846909046173, "learning_rate": 8.377885783718105e-05, "loss": 1.8722, "step": 2680 }, { "epoch": 0.8137805433297921, "grad_norm": 0.43888887763023376, "learning_rate": 8.377278250303768e-05, "loss": 1.7578, "step": 2681 }, { "epoch": 0.8140840795264835, "grad_norm": 0.6995130777359009, "learning_rate": 8.37667071688943e-05, "loss": 1.7296, "step": 2682 }, { "epoch": 0.814387615723175, "grad_norm": 0.4700230360031128, "learning_rate": 8.376063183475091e-05, "loss": 1.3055, "step": 2683 }, { "epoch": 0.8146911519198664, "grad_norm": 0.4099135398864746, "learning_rate": 8.375455650060754e-05, "loss": 1.9858, "step": 2684 }, { "epoch": 0.814994688116558, "grad_norm": 0.44735094904899597, "learning_rate": 8.374848116646416e-05, "loss": 1.9615, "step": 2685 }, { "epoch": 0.8152982243132494, "grad_norm": 0.6808655858039856, "learning_rate": 8.374240583232078e-05, "loss": 1.7893, "step": 2686 }, { "epoch": 0.8156017605099408, "grad_norm": 0.4560304880142212, "learning_rate": 8.373633049817741e-05, "loss": 1.7048, "step": 2687 }, { "epoch": 0.8159052967066323, "grad_norm": 0.4043562114238739, "learning_rate": 8.373025516403403e-05, "loss": 2.0911, "step": 2688 }, { "epoch": 0.8162088329033237, "grad_norm": 0.40530329942703247, "learning_rate": 8.372417982989064e-05, "loss": 1.8597, "step": 2689 }, { "epoch": 0.8165123691000151, "grad_norm": 0.42696669697761536, "learning_rate": 8.371810449574727e-05, "loss": 1.354, "step": 2690 }, { "epoch": 0.8168159052967067, "grad_norm": 0.411856472492218, "learning_rate": 8.371202916160389e-05, "loss": 1.8913, "step": 2691 }, { "epoch": 0.8171194414933981, "grad_norm": 1.3200637102127075, "learning_rate": 8.370595382746051e-05, "loss": 1.9903, "step": 2692 }, { "epoch": 0.8174229776900895, "grad_norm": 0.4155752956867218, "learning_rate": 8.369987849331714e-05, "loss": 2.036, "step": 2693 }, { "epoch": 0.817726513886781, "grad_norm": 0.4618370234966278, "learning_rate": 8.369380315917376e-05, "loss": 1.8101, "step": 2694 }, { "epoch": 0.8180300500834724, "grad_norm": 0.338123619556427, "learning_rate": 8.368772782503039e-05, "loss": 1.7934, "step": 2695 }, { "epoch": 0.818333586280164, "grad_norm": 0.4331413507461548, "learning_rate": 8.3681652490887e-05, "loss": 1.7394, "step": 2696 }, { "epoch": 0.8186371224768554, "grad_norm": 0.3667849004268646, "learning_rate": 8.367557715674362e-05, "loss": 1.9162, "step": 2697 }, { "epoch": 0.8189406586735468, "grad_norm": 0.4584942162036896, "learning_rate": 8.366950182260025e-05, "loss": 1.801, "step": 2698 }, { "epoch": 0.8192441948702383, "grad_norm": 0.4310884475708008, "learning_rate": 8.366342648845687e-05, "loss": 1.952, "step": 2699 }, { "epoch": 0.8195477310669297, "grad_norm": 0.35577401518821716, "learning_rate": 8.365735115431349e-05, "loss": 2.0338, "step": 2700 }, { "epoch": 0.8198512672636212, "grad_norm": 0.4453931152820587, "learning_rate": 8.365127582017012e-05, "loss": 1.777, "step": 2701 }, { "epoch": 0.8201548034603127, "grad_norm": 0.4156850576400757, "learning_rate": 8.364520048602674e-05, "loss": 1.8725, "step": 2702 }, { "epoch": 0.8204583396570041, "grad_norm": 0.3999830186367035, "learning_rate": 8.363912515188335e-05, "loss": 2.0136, "step": 2703 }, { "epoch": 0.8207618758536955, "grad_norm": 0.4082907736301422, "learning_rate": 8.363304981773998e-05, "loss": 1.9336, "step": 2704 }, { "epoch": 0.821065412050387, "grad_norm": 0.41365379095077515, "learning_rate": 8.36269744835966e-05, "loss": 1.8598, "step": 2705 }, { "epoch": 0.8213689482470785, "grad_norm": 0.4504840075969696, "learning_rate": 8.362089914945322e-05, "loss": 2.0346, "step": 2706 }, { "epoch": 0.82167248444377, "grad_norm": 0.5199129581451416, "learning_rate": 8.361482381530985e-05, "loss": 1.3809, "step": 2707 }, { "epoch": 0.8219760206404614, "grad_norm": 0.316165030002594, "learning_rate": 8.360874848116647e-05, "loss": 1.6682, "step": 2708 }, { "epoch": 0.8222795568371528, "grad_norm": 0.434994637966156, "learning_rate": 8.36026731470231e-05, "loss": 1.9569, "step": 2709 }, { "epoch": 0.8225830930338442, "grad_norm": 0.4767877757549286, "learning_rate": 8.359659781287972e-05, "loss": 2.0574, "step": 2710 }, { "epoch": 0.8228866292305358, "grad_norm": 0.36715012788772583, "learning_rate": 8.359052247873633e-05, "loss": 1.5894, "step": 2711 }, { "epoch": 0.8231901654272272, "grad_norm": 0.47204360365867615, "learning_rate": 8.358444714459296e-05, "loss": 2.0371, "step": 2712 }, { "epoch": 0.8234937016239187, "grad_norm": 0.4339917302131653, "learning_rate": 8.357837181044958e-05, "loss": 1.6804, "step": 2713 }, { "epoch": 0.8237972378206101, "grad_norm": 0.40211397409439087, "learning_rate": 8.35722964763062e-05, "loss": 1.8478, "step": 2714 }, { "epoch": 0.8241007740173015, "grad_norm": 0.4626907408237457, "learning_rate": 8.356622114216283e-05, "loss": 1.0077, "step": 2715 }, { "epoch": 0.824404310213993, "grad_norm": 0.5032857060432434, "learning_rate": 8.356014580801945e-05, "loss": 1.4585, "step": 2716 }, { "epoch": 0.8247078464106845, "grad_norm": 0.7893280982971191, "learning_rate": 8.355407047387606e-05, "loss": 1.7855, "step": 2717 }, { "epoch": 0.8250113826073759, "grad_norm": 0.4199140965938568, "learning_rate": 8.35479951397327e-05, "loss": 2.036, "step": 2718 }, { "epoch": 0.8253149188040674, "grad_norm": 0.4148293137550354, "learning_rate": 8.354191980558931e-05, "loss": 1.6445, "step": 2719 }, { "epoch": 0.8256184550007588, "grad_norm": 0.40081986784935, "learning_rate": 8.353584447144593e-05, "loss": 1.8132, "step": 2720 }, { "epoch": 0.8259219911974502, "grad_norm": 0.38736727833747864, "learning_rate": 8.352976913730256e-05, "loss": 2.1591, "step": 2721 }, { "epoch": 0.8262255273941418, "grad_norm": 0.39787808060646057, "learning_rate": 8.352369380315918e-05, "loss": 1.986, "step": 2722 }, { "epoch": 0.8265290635908332, "grad_norm": 0.4428958594799042, "learning_rate": 8.351761846901581e-05, "loss": 2.0022, "step": 2723 }, { "epoch": 0.8268325997875247, "grad_norm": 0.4137169420719147, "learning_rate": 8.351154313487243e-05, "loss": 1.647, "step": 2724 }, { "epoch": 0.8271361359842161, "grad_norm": 0.3762650489807129, "learning_rate": 8.350546780072904e-05, "loss": 1.6598, "step": 2725 }, { "epoch": 0.8274396721809075, "grad_norm": 0.4346376955509186, "learning_rate": 8.349939246658567e-05, "loss": 1.9286, "step": 2726 }, { "epoch": 0.8277432083775991, "grad_norm": 0.4809822142124176, "learning_rate": 8.349331713244228e-05, "loss": 1.7861, "step": 2727 }, { "epoch": 0.8280467445742905, "grad_norm": 0.4233179986476898, "learning_rate": 8.348724179829891e-05, "loss": 1.4572, "step": 2728 }, { "epoch": 0.8283502807709819, "grad_norm": 0.4098990261554718, "learning_rate": 8.348116646415554e-05, "loss": 1.5889, "step": 2729 }, { "epoch": 0.8286538169676734, "grad_norm": 0.5033220052719116, "learning_rate": 8.347509113001216e-05, "loss": 2.0879, "step": 2730 }, { "epoch": 0.8289573531643648, "grad_norm": 0.4248674809932709, "learning_rate": 8.346901579586877e-05, "loss": 1.113, "step": 2731 }, { "epoch": 0.8292608893610564, "grad_norm": 0.4501001834869385, "learning_rate": 8.34629404617254e-05, "loss": 1.984, "step": 2732 }, { "epoch": 0.8295644255577478, "grad_norm": 0.4608478844165802, "learning_rate": 8.345686512758202e-05, "loss": 1.692, "step": 2733 }, { "epoch": 0.8298679617544392, "grad_norm": 0.4299629330635071, "learning_rate": 8.345078979343864e-05, "loss": 2.0634, "step": 2734 }, { "epoch": 0.8301714979511307, "grad_norm": 0.4118325412273407, "learning_rate": 8.344471445929527e-05, "loss": 1.9081, "step": 2735 }, { "epoch": 0.8304750341478221, "grad_norm": 0.5083432793617249, "learning_rate": 8.343863912515189e-05, "loss": 2.1141, "step": 2736 }, { "epoch": 0.8307785703445136, "grad_norm": 0.7300907373428345, "learning_rate": 8.34325637910085e-05, "loss": 1.3025, "step": 2737 }, { "epoch": 0.8310821065412051, "grad_norm": 0.4016704261302948, "learning_rate": 8.342648845686512e-05, "loss": 1.8973, "step": 2738 }, { "epoch": 0.8313856427378965, "grad_norm": 0.4292236268520355, "learning_rate": 8.342041312272175e-05, "loss": 1.9909, "step": 2739 }, { "epoch": 0.8316891789345879, "grad_norm": 0.4190838634967804, "learning_rate": 8.341433778857838e-05, "loss": 1.8034, "step": 2740 }, { "epoch": 0.8319927151312794, "grad_norm": 0.4143367111682892, "learning_rate": 8.340826245443499e-05, "loss": 2.0459, "step": 2741 }, { "epoch": 0.8322962513279709, "grad_norm": 0.46704939007759094, "learning_rate": 8.340218712029162e-05, "loss": 1.6579, "step": 2742 }, { "epoch": 0.8325997875246623, "grad_norm": 0.48142144083976746, "learning_rate": 8.339611178614825e-05, "loss": 1.8507, "step": 2743 }, { "epoch": 0.8329033237213538, "grad_norm": 0.42653772234916687, "learning_rate": 8.339003645200487e-05, "loss": 1.9661, "step": 2744 }, { "epoch": 0.8332068599180452, "grad_norm": 0.42195385694503784, "learning_rate": 8.338396111786148e-05, "loss": 1.583, "step": 2745 }, { "epoch": 0.8335103961147367, "grad_norm": 0.5214222073554993, "learning_rate": 8.337788578371811e-05, "loss": 0.8356, "step": 2746 }, { "epoch": 0.8338139323114281, "grad_norm": 0.4736870229244232, "learning_rate": 8.337181044957473e-05, "loss": 2.1115, "step": 2747 }, { "epoch": 0.8341174685081196, "grad_norm": 0.4879785180091858, "learning_rate": 8.336573511543135e-05, "loss": 2.1468, "step": 2748 }, { "epoch": 0.8344210047048111, "grad_norm": 0.33596518635749817, "learning_rate": 8.335965978128798e-05, "loss": 1.9451, "step": 2749 }, { "epoch": 0.8347245409015025, "grad_norm": 0.3724137246608734, "learning_rate": 8.33535844471446e-05, "loss": 1.4332, "step": 2750 }, { "epoch": 0.8350280770981939, "grad_norm": 0.41488635540008545, "learning_rate": 8.334750911300121e-05, "loss": 2.245, "step": 2751 }, { "epoch": 0.8353316132948854, "grad_norm": 0.41388005018234253, "learning_rate": 8.334143377885783e-05, "loss": 2.0323, "step": 2752 }, { "epoch": 0.8356351494915769, "grad_norm": 0.8086270093917847, "learning_rate": 8.333535844471446e-05, "loss": 1.9422, "step": 2753 }, { "epoch": 0.8359386856882683, "grad_norm": 0.3645714223384857, "learning_rate": 8.33292831105711e-05, "loss": 1.7161, "step": 2754 }, { "epoch": 0.8362422218849598, "grad_norm": 0.36916327476501465, "learning_rate": 8.33232077764277e-05, "loss": 1.7339, "step": 2755 }, { "epoch": 0.8365457580816512, "grad_norm": 0.3351556956768036, "learning_rate": 8.331713244228433e-05, "loss": 1.5955, "step": 2756 }, { "epoch": 0.8368492942783426, "grad_norm": 0.4345923364162445, "learning_rate": 8.331105710814096e-05, "loss": 1.871, "step": 2757 }, { "epoch": 0.8371528304750342, "grad_norm": 0.4099547266960144, "learning_rate": 8.330498177399758e-05, "loss": 1.8057, "step": 2758 }, { "epoch": 0.8374563666717256, "grad_norm": 0.45009273290634155, "learning_rate": 8.32989064398542e-05, "loss": 1.3956, "step": 2759 }, { "epoch": 0.8377599028684171, "grad_norm": 0.3890456557273865, "learning_rate": 8.329283110571082e-05, "loss": 1.837, "step": 2760 }, { "epoch": 0.8380634390651085, "grad_norm": 0.4065060615539551, "learning_rate": 8.328675577156744e-05, "loss": 2.1527, "step": 2761 }, { "epoch": 0.8383669752617999, "grad_norm": 0.4432562589645386, "learning_rate": 8.328068043742406e-05, "loss": 1.5033, "step": 2762 }, { "epoch": 0.8386705114584915, "grad_norm": 0.4977710247039795, "learning_rate": 8.327460510328069e-05, "loss": 2.1406, "step": 2763 }, { "epoch": 0.8389740476551829, "grad_norm": 1.0339199304580688, "learning_rate": 8.326852976913731e-05, "loss": 1.9732, "step": 2764 }, { "epoch": 0.8392775838518743, "grad_norm": 1.5824745893478394, "learning_rate": 8.326245443499392e-05, "loss": 1.7956, "step": 2765 }, { "epoch": 0.8395811200485658, "grad_norm": 0.4485887587070465, "learning_rate": 8.325637910085054e-05, "loss": 1.71, "step": 2766 }, { "epoch": 0.8398846562452572, "grad_norm": 0.3982546329498291, "learning_rate": 8.325030376670717e-05, "loss": 2.0978, "step": 2767 }, { "epoch": 0.8401881924419488, "grad_norm": 0.5837999582290649, "learning_rate": 8.32442284325638e-05, "loss": 2.113, "step": 2768 }, { "epoch": 0.8404917286386402, "grad_norm": 0.5739153623580933, "learning_rate": 8.323815309842041e-05, "loss": 2.1892, "step": 2769 }, { "epoch": 0.8407952648353316, "grad_norm": 0.3813978135585785, "learning_rate": 8.323207776427704e-05, "loss": 1.2944, "step": 2770 }, { "epoch": 0.8410988010320231, "grad_norm": 0.4146029055118561, "learning_rate": 8.322600243013367e-05, "loss": 2.0011, "step": 2771 }, { "epoch": 0.8414023372287145, "grad_norm": 0.38315144181251526, "learning_rate": 8.321992709599029e-05, "loss": 2.1513, "step": 2772 }, { "epoch": 0.8417058734254059, "grad_norm": 0.4339327812194824, "learning_rate": 8.32138517618469e-05, "loss": 1.882, "step": 2773 }, { "epoch": 0.8420094096220975, "grad_norm": 0.40696778893470764, "learning_rate": 8.320777642770353e-05, "loss": 1.3785, "step": 2774 }, { "epoch": 0.8423129458187889, "grad_norm": 0.401257187128067, "learning_rate": 8.320170109356015e-05, "loss": 1.8048, "step": 2775 }, { "epoch": 0.8426164820154803, "grad_norm": 0.419649213552475, "learning_rate": 8.319562575941677e-05, "loss": 1.869, "step": 2776 }, { "epoch": 0.8429200182121718, "grad_norm": 0.45188263058662415, "learning_rate": 8.31895504252734e-05, "loss": 1.9754, "step": 2777 }, { "epoch": 0.8432235544088632, "grad_norm": 0.42580482363700867, "learning_rate": 8.318347509113002e-05, "loss": 2.0827, "step": 2778 }, { "epoch": 0.8435270906055548, "grad_norm": 0.3485068678855896, "learning_rate": 8.317739975698663e-05, "loss": 1.1861, "step": 2779 }, { "epoch": 0.8438306268022462, "grad_norm": 0.38991910219192505, "learning_rate": 8.317132442284325e-05, "loss": 1.9857, "step": 2780 }, { "epoch": 0.8441341629989376, "grad_norm": 0.4066307842731476, "learning_rate": 8.316524908869988e-05, "loss": 1.6987, "step": 2781 }, { "epoch": 0.844437699195629, "grad_norm": 0.40589094161987305, "learning_rate": 8.315917375455651e-05, "loss": 2.099, "step": 2782 }, { "epoch": 0.8447412353923205, "grad_norm": 0.42218223214149475, "learning_rate": 8.315309842041312e-05, "loss": 1.7493, "step": 2783 }, { "epoch": 0.845044771589012, "grad_norm": 0.33325353264808655, "learning_rate": 8.314702308626975e-05, "loss": 1.6937, "step": 2784 }, { "epoch": 0.8453483077857035, "grad_norm": 0.4162006676197052, "learning_rate": 8.314094775212638e-05, "loss": 1.6099, "step": 2785 }, { "epoch": 0.8456518439823949, "grad_norm": 1.9040342569351196, "learning_rate": 8.3134872417983e-05, "loss": 1.9213, "step": 2786 }, { "epoch": 0.8459553801790863, "grad_norm": 0.4040900468826294, "learning_rate": 8.312879708383961e-05, "loss": 1.9817, "step": 2787 }, { "epoch": 0.8462589163757778, "grad_norm": 0.4395250082015991, "learning_rate": 8.312272174969624e-05, "loss": 1.8821, "step": 2788 }, { "epoch": 0.8465624525724693, "grad_norm": 0.40407246351242065, "learning_rate": 8.311664641555286e-05, "loss": 1.7777, "step": 2789 }, { "epoch": 0.8468659887691607, "grad_norm": 0.39172056317329407, "learning_rate": 8.311057108140948e-05, "loss": 1.9446, "step": 2790 }, { "epoch": 0.8471695249658522, "grad_norm": 0.4654727876186371, "learning_rate": 8.310449574726611e-05, "loss": 1.5926, "step": 2791 }, { "epoch": 0.8474730611625436, "grad_norm": 0.41954633593559265, "learning_rate": 8.309842041312273e-05, "loss": 2.0347, "step": 2792 }, { "epoch": 0.847776597359235, "grad_norm": 0.39012208580970764, "learning_rate": 8.309234507897934e-05, "loss": 1.8302, "step": 2793 }, { "epoch": 0.8480801335559266, "grad_norm": 0.3932954967021942, "learning_rate": 8.308626974483596e-05, "loss": 2.0534, "step": 2794 }, { "epoch": 0.848383669752618, "grad_norm": 0.40115275979042053, "learning_rate": 8.308019441069259e-05, "loss": 2.1026, "step": 2795 }, { "epoch": 0.8486872059493095, "grad_norm": 0.6058691143989563, "learning_rate": 8.307411907654922e-05, "loss": 1.6423, "step": 2796 }, { "epoch": 0.8489907421460009, "grad_norm": 0.3684822916984558, "learning_rate": 8.306804374240583e-05, "loss": 1.9901, "step": 2797 }, { "epoch": 0.8492942783426923, "grad_norm": 0.3942423164844513, "learning_rate": 8.306196840826246e-05, "loss": 1.9238, "step": 2798 }, { "epoch": 0.8495978145393838, "grad_norm": 0.3520863354206085, "learning_rate": 8.305589307411909e-05, "loss": 1.7857, "step": 2799 }, { "epoch": 0.8499013507360753, "grad_norm": 0.7609321475028992, "learning_rate": 8.304981773997569e-05, "loss": 1.9176, "step": 2800 }, { "epoch": 0.8502048869327667, "grad_norm": 0.45220932364463806, "learning_rate": 8.304374240583232e-05, "loss": 1.8932, "step": 2801 }, { "epoch": 0.8505084231294582, "grad_norm": 0.33773747086524963, "learning_rate": 8.303766707168895e-05, "loss": 1.8779, "step": 2802 }, { "epoch": 0.8508119593261496, "grad_norm": 0.4092886745929718, "learning_rate": 8.303159173754557e-05, "loss": 2.3972, "step": 2803 }, { "epoch": 0.851115495522841, "grad_norm": 0.4083962142467499, "learning_rate": 8.302551640340219e-05, "loss": 1.5651, "step": 2804 }, { "epoch": 0.8514190317195326, "grad_norm": 0.41298726201057434, "learning_rate": 8.301944106925882e-05, "loss": 1.9808, "step": 2805 }, { "epoch": 0.851722567916224, "grad_norm": 0.3522525131702423, "learning_rate": 8.301336573511544e-05, "loss": 2.0116, "step": 2806 }, { "epoch": 0.8520261041129155, "grad_norm": 0.3948490619659424, "learning_rate": 8.300729040097205e-05, "loss": 1.9998, "step": 2807 }, { "epoch": 0.8523296403096069, "grad_norm": 0.40480837225914, "learning_rate": 8.300121506682867e-05, "loss": 1.9787, "step": 2808 }, { "epoch": 0.8526331765062983, "grad_norm": 0.3458811044692993, "learning_rate": 8.29951397326853e-05, "loss": 1.9506, "step": 2809 }, { "epoch": 0.8529367127029899, "grad_norm": 0.4472740888595581, "learning_rate": 8.298906439854193e-05, "loss": 1.7003, "step": 2810 }, { "epoch": 0.8532402488996813, "grad_norm": 0.3910341262817383, "learning_rate": 8.298298906439854e-05, "loss": 1.6672, "step": 2811 }, { "epoch": 0.8535437850963727, "grad_norm": 0.4467204213142395, "learning_rate": 8.297691373025517e-05, "loss": 1.8857, "step": 2812 }, { "epoch": 0.8538473212930642, "grad_norm": 0.42083072662353516, "learning_rate": 8.29708383961118e-05, "loss": 2.0453, "step": 2813 }, { "epoch": 0.8541508574897556, "grad_norm": 0.4398275315761566, "learning_rate": 8.29647630619684e-05, "loss": 1.8129, "step": 2814 }, { "epoch": 0.8544543936864472, "grad_norm": 0.8038653135299683, "learning_rate": 8.295868772782503e-05, "loss": 1.9836, "step": 2815 }, { "epoch": 0.8547579298831386, "grad_norm": 0.41887366771698, "learning_rate": 8.295261239368166e-05, "loss": 2.0524, "step": 2816 }, { "epoch": 0.85506146607983, "grad_norm": 0.5513349175453186, "learning_rate": 8.294653705953828e-05, "loss": 1.681, "step": 2817 }, { "epoch": 0.8553650022765215, "grad_norm": 0.4004881680011749, "learning_rate": 8.29404617253949e-05, "loss": 1.5912, "step": 2818 }, { "epoch": 0.8556685384732129, "grad_norm": 0.3472290635108948, "learning_rate": 8.293438639125152e-05, "loss": 1.7615, "step": 2819 }, { "epoch": 0.8559720746699044, "grad_norm": 0.4187697470188141, "learning_rate": 8.292831105710815e-05, "loss": 1.6436, "step": 2820 }, { "epoch": 0.8562756108665959, "grad_norm": 0.418883353471756, "learning_rate": 8.292223572296476e-05, "loss": 1.8053, "step": 2821 }, { "epoch": 0.8565791470632873, "grad_norm": 0.41798603534698486, "learning_rate": 8.291616038882138e-05, "loss": 1.568, "step": 2822 }, { "epoch": 0.8568826832599787, "grad_norm": 0.3748184144496918, "learning_rate": 8.291008505467801e-05, "loss": 1.4218, "step": 2823 }, { "epoch": 0.8571862194566702, "grad_norm": 0.42556214332580566, "learning_rate": 8.290400972053463e-05, "loss": 1.5612, "step": 2824 }, { "epoch": 0.8574897556533617, "grad_norm": 0.46294355392456055, "learning_rate": 8.289793438639125e-05, "loss": 1.6574, "step": 2825 }, { "epoch": 0.8577932918500532, "grad_norm": 0.40295061469078064, "learning_rate": 8.289185905224788e-05, "loss": 1.3524, "step": 2826 }, { "epoch": 0.8580968280467446, "grad_norm": 0.465472549200058, "learning_rate": 8.288578371810451e-05, "loss": 2.017, "step": 2827 }, { "epoch": 0.858400364243436, "grad_norm": 0.4338732957839966, "learning_rate": 8.287970838396111e-05, "loss": 1.7856, "step": 2828 }, { "epoch": 0.8587039004401275, "grad_norm": 0.4338977336883545, "learning_rate": 8.287363304981774e-05, "loss": 1.8401, "step": 2829 }, { "epoch": 0.8590074366368189, "grad_norm": 0.43514832854270935, "learning_rate": 8.286755771567437e-05, "loss": 1.7652, "step": 2830 }, { "epoch": 0.8593109728335104, "grad_norm": 0.3935963213443756, "learning_rate": 8.286148238153099e-05, "loss": 1.9616, "step": 2831 }, { "epoch": 0.8596145090302019, "grad_norm": 0.4481986463069916, "learning_rate": 8.285540704738761e-05, "loss": 1.8241, "step": 2832 }, { "epoch": 0.8599180452268933, "grad_norm": 0.3898305594921112, "learning_rate": 8.284933171324423e-05, "loss": 2.0083, "step": 2833 }, { "epoch": 0.8602215814235847, "grad_norm": 0.40316537022590637, "learning_rate": 8.284325637910086e-05, "loss": 1.5316, "step": 2834 }, { "epoch": 0.8605251176202762, "grad_norm": 0.407939612865448, "learning_rate": 8.283718104495747e-05, "loss": 2.0299, "step": 2835 }, { "epoch": 0.8608286538169677, "grad_norm": 1.6520899534225464, "learning_rate": 8.283110571081409e-05, "loss": 1.8243, "step": 2836 }, { "epoch": 0.8611321900136591, "grad_norm": 0.4098230004310608, "learning_rate": 8.282503037667072e-05, "loss": 1.9032, "step": 2837 }, { "epoch": 0.8614357262103506, "grad_norm": 0.3847675025463104, "learning_rate": 8.281895504252734e-05, "loss": 1.7605, "step": 2838 }, { "epoch": 0.861739262407042, "grad_norm": 0.42115259170532227, "learning_rate": 8.281287970838396e-05, "loss": 1.4382, "step": 2839 }, { "epoch": 0.8620427986037335, "grad_norm": 0.4232335090637207, "learning_rate": 8.280680437424059e-05, "loss": 1.9637, "step": 2840 }, { "epoch": 0.862346334800425, "grad_norm": 0.3830999732017517, "learning_rate": 8.280072904009722e-05, "loss": 2.0951, "step": 2841 }, { "epoch": 0.8626498709971164, "grad_norm": 0.4446307122707367, "learning_rate": 8.279465370595382e-05, "loss": 1.8081, "step": 2842 }, { "epoch": 0.8629534071938079, "grad_norm": 0.40466341376304626, "learning_rate": 8.278857837181045e-05, "loss": 1.9391, "step": 2843 }, { "epoch": 0.8632569433904993, "grad_norm": 0.4302142560482025, "learning_rate": 8.278250303766708e-05, "loss": 2.0331, "step": 2844 }, { "epoch": 0.8635604795871907, "grad_norm": 0.40295708179473877, "learning_rate": 8.27764277035237e-05, "loss": 1.7359, "step": 2845 }, { "epoch": 0.8638640157838823, "grad_norm": 0.5045837163925171, "learning_rate": 8.277035236938032e-05, "loss": 1.9034, "step": 2846 }, { "epoch": 0.8641675519805737, "grad_norm": 0.4259899854660034, "learning_rate": 8.276427703523694e-05, "loss": 2.0042, "step": 2847 }, { "epoch": 0.8644710881772651, "grad_norm": 0.4341868460178375, "learning_rate": 8.275820170109357e-05, "loss": 1.8425, "step": 2848 }, { "epoch": 0.8647746243739566, "grad_norm": 0.46809810400009155, "learning_rate": 8.275212636695018e-05, "loss": 1.9288, "step": 2849 }, { "epoch": 0.865078160570648, "grad_norm": 0.4060373306274414, "learning_rate": 8.27460510328068e-05, "loss": 1.7835, "step": 2850 }, { "epoch": 0.8653816967673396, "grad_norm": 0.39879024028778076, "learning_rate": 8.273997569866343e-05, "loss": 1.7374, "step": 2851 }, { "epoch": 0.865685232964031, "grad_norm": 0.4948522746562958, "learning_rate": 8.273390036452005e-05, "loss": 1.9127, "step": 2852 }, { "epoch": 0.8659887691607224, "grad_norm": 0.40187695622444153, "learning_rate": 8.272782503037667e-05, "loss": 1.7527, "step": 2853 }, { "epoch": 0.8662923053574139, "grad_norm": 0.4162091910839081, "learning_rate": 8.27217496962333e-05, "loss": 2.0477, "step": 2854 }, { "epoch": 0.8665958415541053, "grad_norm": 0.4181444048881531, "learning_rate": 8.271567436208993e-05, "loss": 1.9956, "step": 2855 }, { "epoch": 0.8668993777507967, "grad_norm": 0.44338878989219666, "learning_rate": 8.270959902794653e-05, "loss": 1.7806, "step": 2856 }, { "epoch": 0.8672029139474883, "grad_norm": 0.4224783778190613, "learning_rate": 8.270352369380316e-05, "loss": 1.8572, "step": 2857 }, { "epoch": 0.8675064501441797, "grad_norm": 0.4111135005950928, "learning_rate": 8.26974483596598e-05, "loss": 1.8744, "step": 2858 }, { "epoch": 0.8678099863408711, "grad_norm": 0.40660667419433594, "learning_rate": 8.269137302551641e-05, "loss": 1.7401, "step": 2859 }, { "epoch": 0.8681135225375626, "grad_norm": 0.430890291929245, "learning_rate": 8.268529769137303e-05, "loss": 1.5967, "step": 2860 }, { "epoch": 0.868417058734254, "grad_norm": 0.45299017429351807, "learning_rate": 8.267922235722965e-05, "loss": 1.8742, "step": 2861 }, { "epoch": 0.8687205949309456, "grad_norm": 0.3461768329143524, "learning_rate": 8.267314702308628e-05, "loss": 1.6326, "step": 2862 }, { "epoch": 0.869024131127637, "grad_norm": 0.386844664812088, "learning_rate": 8.26670716889429e-05, "loss": 1.7126, "step": 2863 }, { "epoch": 0.8693276673243284, "grad_norm": 0.6148979067802429, "learning_rate": 8.266099635479951e-05, "loss": 1.3573, "step": 2864 }, { "epoch": 0.8696312035210199, "grad_norm": 0.4048292934894562, "learning_rate": 8.265492102065614e-05, "loss": 1.1731, "step": 2865 }, { "epoch": 0.8699347397177113, "grad_norm": 0.3976982831954956, "learning_rate": 8.264884568651276e-05, "loss": 1.8089, "step": 2866 }, { "epoch": 0.8702382759144028, "grad_norm": 0.39783963561058044, "learning_rate": 8.264277035236938e-05, "loss": 1.5432, "step": 2867 }, { "epoch": 0.8705418121110943, "grad_norm": 0.3972279131412506, "learning_rate": 8.263669501822601e-05, "loss": 1.7548, "step": 2868 }, { "epoch": 0.8708453483077857, "grad_norm": 0.43422597646713257, "learning_rate": 8.263061968408264e-05, "loss": 1.9792, "step": 2869 }, { "epoch": 0.8711488845044771, "grad_norm": 0.3682768940925598, "learning_rate": 8.262454434993924e-05, "loss": 2.1472, "step": 2870 }, { "epoch": 0.8714524207011686, "grad_norm": 0.37669479846954346, "learning_rate": 8.261846901579587e-05, "loss": 1.9962, "step": 2871 }, { "epoch": 0.8717559568978601, "grad_norm": 0.36915603280067444, "learning_rate": 8.26123936816525e-05, "loss": 1.5438, "step": 2872 }, { "epoch": 0.8720594930945516, "grad_norm": 0.4083096981048584, "learning_rate": 8.260631834750911e-05, "loss": 1.7711, "step": 2873 }, { "epoch": 0.872363029291243, "grad_norm": 0.3865950107574463, "learning_rate": 8.260024301336574e-05, "loss": 1.8624, "step": 2874 }, { "epoch": 0.8726665654879344, "grad_norm": 0.5207681655883789, "learning_rate": 8.259416767922236e-05, "loss": 2.0506, "step": 2875 }, { "epoch": 0.8729701016846259, "grad_norm": 0.4441354274749756, "learning_rate": 8.258809234507899e-05, "loss": 1.822, "step": 2876 }, { "epoch": 0.8732736378813174, "grad_norm": 0.3258417844772339, "learning_rate": 8.25820170109356e-05, "loss": 1.2733, "step": 2877 }, { "epoch": 0.8735771740780088, "grad_norm": 0.37115880846977234, "learning_rate": 8.257594167679222e-05, "loss": 1.9161, "step": 2878 }, { "epoch": 0.8738807102747003, "grad_norm": 0.47799551486968994, "learning_rate": 8.256986634264885e-05, "loss": 2.0963, "step": 2879 }, { "epoch": 0.8741842464713917, "grad_norm": 0.4438342750072479, "learning_rate": 8.256379100850547e-05, "loss": 1.3525, "step": 2880 }, { "epoch": 0.8744877826680831, "grad_norm": 0.3878926932811737, "learning_rate": 8.255771567436209e-05, "loss": 1.4205, "step": 2881 }, { "epoch": 0.8747913188647746, "grad_norm": 0.4843897819519043, "learning_rate": 8.255164034021872e-05, "loss": 2.0388, "step": 2882 }, { "epoch": 0.8750948550614661, "grad_norm": 0.5297700762748718, "learning_rate": 8.254556500607535e-05, "loss": 1.3358, "step": 2883 }, { "epoch": 0.8753983912581575, "grad_norm": 0.34962332248687744, "learning_rate": 8.253948967193195e-05, "loss": 1.6778, "step": 2884 }, { "epoch": 0.875701927454849, "grad_norm": 0.4917025864124298, "learning_rate": 8.253341433778858e-05, "loss": 1.7919, "step": 2885 }, { "epoch": 0.8760054636515404, "grad_norm": 0.43004027009010315, "learning_rate": 8.252733900364521e-05, "loss": 1.8666, "step": 2886 }, { "epoch": 0.8763089998482319, "grad_norm": 0.5672779679298401, "learning_rate": 8.252126366950182e-05, "loss": 1.5285, "step": 2887 }, { "epoch": 0.8766125360449234, "grad_norm": 0.45307332277297974, "learning_rate": 8.251518833535845e-05, "loss": 1.9865, "step": 2888 }, { "epoch": 0.8769160722416148, "grad_norm": 0.4099940359592438, "learning_rate": 8.250911300121507e-05, "loss": 1.8511, "step": 2889 }, { "epoch": 0.8772196084383063, "grad_norm": 0.4223155081272125, "learning_rate": 8.25030376670717e-05, "loss": 1.5077, "step": 2890 }, { "epoch": 0.8775231446349977, "grad_norm": 0.4101323187351227, "learning_rate": 8.249696233292831e-05, "loss": 1.7534, "step": 2891 }, { "epoch": 0.8778266808316891, "grad_norm": 0.4333887994289398, "learning_rate": 8.249088699878493e-05, "loss": 1.9611, "step": 2892 }, { "epoch": 0.8781302170283807, "grad_norm": 0.44225746393203735, "learning_rate": 8.248481166464156e-05, "loss": 1.9739, "step": 2893 }, { "epoch": 0.8784337532250721, "grad_norm": 0.419316828250885, "learning_rate": 8.247873633049818e-05, "loss": 1.9278, "step": 2894 }, { "epoch": 0.8787372894217635, "grad_norm": 0.39314213395118713, "learning_rate": 8.24726609963548e-05, "loss": 1.8768, "step": 2895 }, { "epoch": 0.879040825618455, "grad_norm": 0.45280733704566956, "learning_rate": 8.246658566221143e-05, "loss": 1.4584, "step": 2896 }, { "epoch": 0.8793443618151464, "grad_norm": 0.4470527768135071, "learning_rate": 8.246051032806805e-05, "loss": 1.992, "step": 2897 }, { "epoch": 0.879647898011838, "grad_norm": 0.4180387854576111, "learning_rate": 8.245443499392466e-05, "loss": 1.7643, "step": 2898 }, { "epoch": 0.8799514342085294, "grad_norm": 0.42866212129592896, "learning_rate": 8.24483596597813e-05, "loss": 1.4524, "step": 2899 }, { "epoch": 0.8802549704052208, "grad_norm": 0.3700104057788849, "learning_rate": 8.244228432563791e-05, "loss": 1.7607, "step": 2900 }, { "epoch": 0.8805585066019123, "grad_norm": 0.4378833770751953, "learning_rate": 8.243620899149453e-05, "loss": 1.7648, "step": 2901 }, { "epoch": 0.8808620427986037, "grad_norm": 0.4206582307815552, "learning_rate": 8.243013365735116e-05, "loss": 1.159, "step": 2902 }, { "epoch": 0.8811655789952952, "grad_norm": 0.4247249960899353, "learning_rate": 8.242405832320778e-05, "loss": 2.0676, "step": 2903 }, { "epoch": 0.8814691151919867, "grad_norm": 0.43796390295028687, "learning_rate": 8.241798298906441e-05, "loss": 1.9104, "step": 2904 }, { "epoch": 0.8817726513886781, "grad_norm": 0.4268593192100525, "learning_rate": 8.241190765492103e-05, "loss": 1.8041, "step": 2905 }, { "epoch": 0.8820761875853695, "grad_norm": 0.5760425925254822, "learning_rate": 8.240583232077764e-05, "loss": 1.7562, "step": 2906 }, { "epoch": 0.882379723782061, "grad_norm": 0.328421950340271, "learning_rate": 8.239975698663427e-05, "loss": 2.0184, "step": 2907 }, { "epoch": 0.8826832599787525, "grad_norm": 0.4264001250267029, "learning_rate": 8.239368165249089e-05, "loss": 1.8521, "step": 2908 }, { "epoch": 0.882986796175444, "grad_norm": 0.7516580820083618, "learning_rate": 8.238760631834751e-05, "loss": 1.9573, "step": 2909 }, { "epoch": 0.8832903323721354, "grad_norm": 0.43976011872291565, "learning_rate": 8.238153098420414e-05, "loss": 1.9756, "step": 2910 }, { "epoch": 0.8835938685688268, "grad_norm": 0.420858234167099, "learning_rate": 8.237545565006076e-05, "loss": 1.8513, "step": 2911 }, { "epoch": 0.8838974047655183, "grad_norm": 0.45598578453063965, "learning_rate": 8.236938031591737e-05, "loss": 1.8986, "step": 2912 }, { "epoch": 0.8842009409622097, "grad_norm": 0.3829743266105652, "learning_rate": 8.2363304981774e-05, "loss": 1.7241, "step": 2913 }, { "epoch": 0.8845044771589012, "grad_norm": 1.6669212579727173, "learning_rate": 8.235722964763062e-05, "loss": 1.875, "step": 2914 }, { "epoch": 0.8848080133555927, "grad_norm": 0.711898684501648, "learning_rate": 8.235115431348724e-05, "loss": 1.9559, "step": 2915 }, { "epoch": 0.8851115495522841, "grad_norm": 0.46978119015693665, "learning_rate": 8.234507897934387e-05, "loss": 1.4868, "step": 2916 }, { "epoch": 0.8854150857489755, "grad_norm": 0.4142061173915863, "learning_rate": 8.233900364520049e-05, "loss": 1.9654, "step": 2917 }, { "epoch": 0.885718621945667, "grad_norm": 0.4385989010334015, "learning_rate": 8.233292831105712e-05, "loss": 1.7259, "step": 2918 }, { "epoch": 0.8860221581423585, "grad_norm": 0.4307645261287689, "learning_rate": 8.232685297691374e-05, "loss": 2.033, "step": 2919 }, { "epoch": 0.88632569433905, "grad_norm": 0.5880458950996399, "learning_rate": 8.232077764277035e-05, "loss": 1.5008, "step": 2920 }, { "epoch": 0.8866292305357414, "grad_norm": 0.4887501299381256, "learning_rate": 8.231470230862698e-05, "loss": 1.9574, "step": 2921 }, { "epoch": 0.8869327667324328, "grad_norm": 0.42289820313453674, "learning_rate": 8.23086269744836e-05, "loss": 1.8607, "step": 2922 }, { "epoch": 0.8872363029291243, "grad_norm": 0.4192774295806885, "learning_rate": 8.230255164034022e-05, "loss": 2.0718, "step": 2923 }, { "epoch": 0.8875398391258158, "grad_norm": 0.5114601850509644, "learning_rate": 8.229647630619685e-05, "loss": 1.9832, "step": 2924 }, { "epoch": 0.8878433753225072, "grad_norm": 0.4116429388523102, "learning_rate": 8.229040097205347e-05, "loss": 1.7623, "step": 2925 }, { "epoch": 0.8881469115191987, "grad_norm": 0.44943469762802124, "learning_rate": 8.228432563791008e-05, "loss": 1.8241, "step": 2926 }, { "epoch": 0.8884504477158901, "grad_norm": 1.1577938795089722, "learning_rate": 8.227825030376671e-05, "loss": 1.9125, "step": 2927 }, { "epoch": 0.8887539839125815, "grad_norm": 1.1404715776443481, "learning_rate": 8.227217496962333e-05, "loss": 1.4977, "step": 2928 }, { "epoch": 0.8890575201092731, "grad_norm": 0.7202188968658447, "learning_rate": 8.226609963547995e-05, "loss": 2.0293, "step": 2929 }, { "epoch": 0.8893610563059645, "grad_norm": 0.8101162910461426, "learning_rate": 8.226002430133658e-05, "loss": 1.888, "step": 2930 }, { "epoch": 0.889664592502656, "grad_norm": 0.41163596510887146, "learning_rate": 8.22539489671932e-05, "loss": 1.849, "step": 2931 }, { "epoch": 0.8899681286993474, "grad_norm": 0.42284974455833435, "learning_rate": 8.224787363304983e-05, "loss": 1.9611, "step": 2932 }, { "epoch": 0.8902716648960388, "grad_norm": 0.6039950847625732, "learning_rate": 8.224179829890645e-05, "loss": 1.9822, "step": 2933 }, { "epoch": 0.8905752010927304, "grad_norm": 0.3433489203453064, "learning_rate": 8.223572296476306e-05, "loss": 1.7947, "step": 2934 }, { "epoch": 0.8908787372894218, "grad_norm": 0.3537866473197937, "learning_rate": 8.22296476306197e-05, "loss": 1.8749, "step": 2935 }, { "epoch": 0.8911822734861132, "grad_norm": 0.3994251787662506, "learning_rate": 8.222357229647631e-05, "loss": 1.7805, "step": 2936 }, { "epoch": 0.8914858096828047, "grad_norm": 0.3776698708534241, "learning_rate": 8.221749696233293e-05, "loss": 1.8582, "step": 2937 }, { "epoch": 0.8917893458794961, "grad_norm": 0.42231059074401855, "learning_rate": 8.221142162818956e-05, "loss": 1.9542, "step": 2938 }, { "epoch": 0.8920928820761875, "grad_norm": 0.470005065202713, "learning_rate": 8.220534629404618e-05, "loss": 1.1926, "step": 2939 }, { "epoch": 0.8923964182728791, "grad_norm": 0.43730974197387695, "learning_rate": 8.21992709599028e-05, "loss": 1.549, "step": 2940 }, { "epoch": 0.8926999544695705, "grad_norm": 0.4016040563583374, "learning_rate": 8.219319562575942e-05, "loss": 1.8797, "step": 2941 }, { "epoch": 0.893003490666262, "grad_norm": 0.4425860345363617, "learning_rate": 8.218712029161604e-05, "loss": 1.4267, "step": 2942 }, { "epoch": 0.8933070268629534, "grad_norm": 0.8383780717849731, "learning_rate": 8.218104495747266e-05, "loss": 1.8884, "step": 2943 }, { "epoch": 0.8936105630596448, "grad_norm": 0.4015752077102661, "learning_rate": 8.217496962332929e-05, "loss": 2.0479, "step": 2944 }, { "epoch": 0.8939140992563364, "grad_norm": 0.39999493956565857, "learning_rate": 8.216889428918591e-05, "loss": 1.8845, "step": 2945 }, { "epoch": 0.8942176354530278, "grad_norm": 0.800762414932251, "learning_rate": 8.216281895504252e-05, "loss": 2.1039, "step": 2946 }, { "epoch": 0.8945211716497192, "grad_norm": 0.38609185814857483, "learning_rate": 8.215674362089916e-05, "loss": 1.8951, "step": 2947 }, { "epoch": 0.8948247078464107, "grad_norm": 0.37557461857795715, "learning_rate": 8.215066828675577e-05, "loss": 1.8418, "step": 2948 }, { "epoch": 0.8951282440431021, "grad_norm": 0.4221288561820984, "learning_rate": 8.21445929526124e-05, "loss": 2.0154, "step": 2949 }, { "epoch": 0.8954317802397936, "grad_norm": 0.3798159658908844, "learning_rate": 8.213851761846902e-05, "loss": 1.852, "step": 2950 }, { "epoch": 0.8957353164364851, "grad_norm": 0.4777775704860687, "learning_rate": 8.213244228432564e-05, "loss": 1.9272, "step": 2951 }, { "epoch": 0.8960388526331765, "grad_norm": 0.45156142115592957, "learning_rate": 8.212636695018227e-05, "loss": 1.7512, "step": 2952 }, { "epoch": 0.8963423888298679, "grad_norm": 0.43190255761146545, "learning_rate": 8.212029161603889e-05, "loss": 2.0517, "step": 2953 }, { "epoch": 0.8966459250265594, "grad_norm": 0.40969786047935486, "learning_rate": 8.21142162818955e-05, "loss": 1.843, "step": 2954 }, { "epoch": 0.8969494612232509, "grad_norm": 0.3868393003940582, "learning_rate": 8.210814094775213e-05, "loss": 1.9854, "step": 2955 }, { "epoch": 0.8972529974199424, "grad_norm": 0.39843276143074036, "learning_rate": 8.210206561360875e-05, "loss": 1.9419, "step": 2956 }, { "epoch": 0.8975565336166338, "grad_norm": 0.3709312379360199, "learning_rate": 8.209599027946537e-05, "loss": 1.9075, "step": 2957 }, { "epoch": 0.8978600698133252, "grad_norm": 0.3753807246685028, "learning_rate": 8.2089914945322e-05, "loss": 1.8474, "step": 2958 }, { "epoch": 0.8981636060100167, "grad_norm": 0.47521868348121643, "learning_rate": 8.208383961117862e-05, "loss": 2.0679, "step": 2959 }, { "epoch": 0.8984671422067082, "grad_norm": 0.3866266906261444, "learning_rate": 8.207776427703523e-05, "loss": 1.6691, "step": 2960 }, { "epoch": 0.8987706784033996, "grad_norm": 0.417644739151001, "learning_rate": 8.207168894289187e-05, "loss": 1.7893, "step": 2961 }, { "epoch": 0.8990742146000911, "grad_norm": 0.427492618560791, "learning_rate": 8.206561360874848e-05, "loss": 1.9072, "step": 2962 }, { "epoch": 0.8993777507967825, "grad_norm": 0.4407294988632202, "learning_rate": 8.205953827460511e-05, "loss": 1.9718, "step": 2963 }, { "epoch": 0.8996812869934739, "grad_norm": 0.4453076720237732, "learning_rate": 8.205346294046173e-05, "loss": 1.6475, "step": 2964 }, { "epoch": 0.8999848231901654, "grad_norm": 0.43250027298927307, "learning_rate": 8.204738760631835e-05, "loss": 1.6695, "step": 2965 }, { "epoch": 0.9002883593868569, "grad_norm": 0.4513264298439026, "learning_rate": 8.204131227217498e-05, "loss": 1.7753, "step": 2966 }, { "epoch": 0.9005918955835484, "grad_norm": 0.3830716609954834, "learning_rate": 8.20352369380316e-05, "loss": 2.0716, "step": 2967 }, { "epoch": 0.9008954317802398, "grad_norm": 0.4067733585834503, "learning_rate": 8.202916160388821e-05, "loss": 1.9266, "step": 2968 }, { "epoch": 0.9011989679769312, "grad_norm": 0.39445656538009644, "learning_rate": 8.202308626974484e-05, "loss": 1.9995, "step": 2969 }, { "epoch": 0.9015025041736227, "grad_norm": 0.7493338584899902, "learning_rate": 8.201701093560146e-05, "loss": 2.0499, "step": 2970 }, { "epoch": 0.9018060403703142, "grad_norm": 0.4843970537185669, "learning_rate": 8.201093560145808e-05, "loss": 1.7133, "step": 2971 }, { "epoch": 0.9021095765670056, "grad_norm": 0.4203130602836609, "learning_rate": 8.200486026731471e-05, "loss": 2.057, "step": 2972 }, { "epoch": 0.9024131127636971, "grad_norm": 0.47080641984939575, "learning_rate": 8.199878493317133e-05, "loss": 1.9157, "step": 2973 }, { "epoch": 0.9027166489603885, "grad_norm": 0.3420778512954712, "learning_rate": 8.199270959902794e-05, "loss": 1.7641, "step": 2974 }, { "epoch": 0.9030201851570799, "grad_norm": 0.4011532962322235, "learning_rate": 8.198663426488458e-05, "loss": 2.0489, "step": 2975 }, { "epoch": 0.9033237213537715, "grad_norm": 0.457653284072876, "learning_rate": 8.198055893074119e-05, "loss": 1.5258, "step": 2976 }, { "epoch": 0.9036272575504629, "grad_norm": 0.45125746726989746, "learning_rate": 8.197448359659782e-05, "loss": 1.8996, "step": 2977 }, { "epoch": 0.9039307937471543, "grad_norm": 0.44737517833709717, "learning_rate": 8.196840826245444e-05, "loss": 1.6851, "step": 2978 }, { "epoch": 0.9042343299438458, "grad_norm": 0.4220506250858307, "learning_rate": 8.196233292831106e-05, "loss": 1.0195, "step": 2979 }, { "epoch": 0.9045378661405372, "grad_norm": 0.40028899908065796, "learning_rate": 8.195625759416769e-05, "loss": 1.9251, "step": 2980 }, { "epoch": 0.9048414023372288, "grad_norm": 0.3769090175628662, "learning_rate": 8.19501822600243e-05, "loss": 1.6839, "step": 2981 }, { "epoch": 0.9051449385339202, "grad_norm": 0.41733232140541077, "learning_rate": 8.194410692588092e-05, "loss": 1.1995, "step": 2982 }, { "epoch": 0.9054484747306116, "grad_norm": 0.4010336697101593, "learning_rate": 8.193803159173755e-05, "loss": 1.5531, "step": 2983 }, { "epoch": 0.9057520109273031, "grad_norm": 0.38843366503715515, "learning_rate": 8.193195625759417e-05, "loss": 1.8149, "step": 2984 }, { "epoch": 0.9060555471239945, "grad_norm": 0.3807307481765747, "learning_rate": 8.192588092345079e-05, "loss": 1.8379, "step": 2985 }, { "epoch": 0.906359083320686, "grad_norm": 0.44730183482170105, "learning_rate": 8.191980558930742e-05, "loss": 1.6764, "step": 2986 }, { "epoch": 0.9066626195173775, "grad_norm": 0.4236774146556854, "learning_rate": 8.191373025516404e-05, "loss": 1.926, "step": 2987 }, { "epoch": 0.9069661557140689, "grad_norm": 0.35578781366348267, "learning_rate": 8.190765492102065e-05, "loss": 2.1609, "step": 2988 }, { "epoch": 0.9072696919107603, "grad_norm": 0.41288191080093384, "learning_rate": 8.190157958687729e-05, "loss": 2.1007, "step": 2989 }, { "epoch": 0.9075732281074518, "grad_norm": 0.43154072761535645, "learning_rate": 8.18955042527339e-05, "loss": 1.7687, "step": 2990 }, { "epoch": 0.9078767643041433, "grad_norm": 0.41048216819763184, "learning_rate": 8.188942891859053e-05, "loss": 2.2021, "step": 2991 }, { "epoch": 0.9081803005008348, "grad_norm": 0.4213089942932129, "learning_rate": 8.188335358444715e-05, "loss": 1.7208, "step": 2992 }, { "epoch": 0.9084838366975262, "grad_norm": 0.9679743647575378, "learning_rate": 8.187727825030377e-05, "loss": 1.731, "step": 2993 }, { "epoch": 0.9087873728942176, "grad_norm": 0.4650149643421173, "learning_rate": 8.18712029161604e-05, "loss": 1.9327, "step": 2994 }, { "epoch": 0.9090909090909091, "grad_norm": 0.3545879125595093, "learning_rate": 8.1865127582017e-05, "loss": 1.3728, "step": 2995 }, { "epoch": 0.9093944452876005, "grad_norm": 0.4107753336429596, "learning_rate": 8.185905224787363e-05, "loss": 2.0919, "step": 2996 }, { "epoch": 0.909697981484292, "grad_norm": 0.44587281346321106, "learning_rate": 8.185297691373026e-05, "loss": 1.5915, "step": 2997 }, { "epoch": 0.9100015176809835, "grad_norm": 0.4520403742790222, "learning_rate": 8.184690157958688e-05, "loss": 1.9165, "step": 2998 }, { "epoch": 0.9103050538776749, "grad_norm": 0.39504629373550415, "learning_rate": 8.18408262454435e-05, "loss": 1.7551, "step": 2999 }, { "epoch": 0.9106085900743663, "grad_norm": 0.3621729612350464, "learning_rate": 8.183475091130013e-05, "loss": 1.4211, "step": 3000 }, { "epoch": 0.9109121262710578, "grad_norm": 0.6058910489082336, "learning_rate": 8.182867557715675e-05, "loss": 1.7533, "step": 3001 }, { "epoch": 0.9112156624677493, "grad_norm": 0.49985721707344055, "learning_rate": 8.182260024301336e-05, "loss": 2.0437, "step": 3002 }, { "epoch": 0.9115191986644408, "grad_norm": 14.125785827636719, "learning_rate": 8.181652490887e-05, "loss": 1.3568, "step": 3003 }, { "epoch": 0.9118227348611322, "grad_norm": 0.42591822147369385, "learning_rate": 8.181044957472661e-05, "loss": 1.9824, "step": 3004 }, { "epoch": 0.9121262710578236, "grad_norm": 0.4781965911388397, "learning_rate": 8.180437424058324e-05, "loss": 2.0458, "step": 3005 }, { "epoch": 0.9124298072545151, "grad_norm": 0.40637922286987305, "learning_rate": 8.179829890643986e-05, "loss": 1.9987, "step": 3006 }, { "epoch": 0.9127333434512066, "grad_norm": 0.43722665309906006, "learning_rate": 8.179222357229648e-05, "loss": 2.0207, "step": 3007 }, { "epoch": 0.913036879647898, "grad_norm": 2.785123348236084, "learning_rate": 8.178614823815311e-05, "loss": 2.0649, "step": 3008 }, { "epoch": 0.9133404158445895, "grad_norm": 0.4118681848049164, "learning_rate": 8.178007290400971e-05, "loss": 1.8433, "step": 3009 }, { "epoch": 0.9136439520412809, "grad_norm": 0.38384199142456055, "learning_rate": 8.177399756986634e-05, "loss": 1.686, "step": 3010 }, { "epoch": 0.9139474882379723, "grad_norm": 0.41295409202575684, "learning_rate": 8.176792223572297e-05, "loss": 1.8854, "step": 3011 }, { "epoch": 0.9142510244346639, "grad_norm": 0.40270209312438965, "learning_rate": 8.176184690157959e-05, "loss": 1.8403, "step": 3012 }, { "epoch": 0.9145545606313553, "grad_norm": 0.4634084701538086, "learning_rate": 8.175577156743621e-05, "loss": 1.2805, "step": 3013 }, { "epoch": 0.9148580968280468, "grad_norm": 0.37608620524406433, "learning_rate": 8.174969623329284e-05, "loss": 1.3985, "step": 3014 }, { "epoch": 0.9151616330247382, "grad_norm": 0.47492894530296326, "learning_rate": 8.174362089914946e-05, "loss": 1.6559, "step": 3015 }, { "epoch": 0.9154651692214296, "grad_norm": 0.3841186463832855, "learning_rate": 8.173754556500607e-05, "loss": 2.0221, "step": 3016 }, { "epoch": 0.9157687054181212, "grad_norm": 0.40183159708976746, "learning_rate": 8.17314702308627e-05, "loss": 1.7669, "step": 3017 }, { "epoch": 0.9160722416148126, "grad_norm": 0.4649689197540283, "learning_rate": 8.172539489671932e-05, "loss": 1.9634, "step": 3018 }, { "epoch": 0.916375777811504, "grad_norm": 0.5210034847259521, "learning_rate": 8.171931956257594e-05, "loss": 1.5208, "step": 3019 }, { "epoch": 0.9166793140081955, "grad_norm": 0.41098159551620483, "learning_rate": 8.171324422843257e-05, "loss": 2.1456, "step": 3020 }, { "epoch": 0.9169828502048869, "grad_norm": 0.4477085769176483, "learning_rate": 8.170716889428919e-05, "loss": 2.1252, "step": 3021 }, { "epoch": 0.9172863864015783, "grad_norm": 0.6705775856971741, "learning_rate": 8.170109356014582e-05, "loss": 1.7007, "step": 3022 }, { "epoch": 0.9175899225982699, "grad_norm": 0.3919045925140381, "learning_rate": 8.169501822600242e-05, "loss": 1.9774, "step": 3023 }, { "epoch": 0.9178934587949613, "grad_norm": 0.41216278076171875, "learning_rate": 8.168894289185905e-05, "loss": 1.845, "step": 3024 }, { "epoch": 0.9181969949916527, "grad_norm": 0.4093484580516815, "learning_rate": 8.168286755771568e-05, "loss": 1.7517, "step": 3025 }, { "epoch": 0.9185005311883442, "grad_norm": 0.4002762734889984, "learning_rate": 8.16767922235723e-05, "loss": 1.9234, "step": 3026 }, { "epoch": 0.9188040673850356, "grad_norm": 0.3966367542743683, "learning_rate": 8.167071688942892e-05, "loss": 1.9905, "step": 3027 }, { "epoch": 0.9191076035817272, "grad_norm": 0.4566415250301361, "learning_rate": 8.166464155528555e-05, "loss": 1.6606, "step": 3028 }, { "epoch": 0.9194111397784186, "grad_norm": 0.5808939933776855, "learning_rate": 8.165856622114217e-05, "loss": 1.6156, "step": 3029 }, { "epoch": 0.91971467597511, "grad_norm": 0.4700441062450409, "learning_rate": 8.165249088699878e-05, "loss": 1.9213, "step": 3030 }, { "epoch": 0.9200182121718015, "grad_norm": 0.4296051263809204, "learning_rate": 8.164641555285542e-05, "loss": 2.0518, "step": 3031 }, { "epoch": 0.9203217483684929, "grad_norm": 0.469310462474823, "learning_rate": 8.164034021871203e-05, "loss": 2.1185, "step": 3032 }, { "epoch": 0.9206252845651844, "grad_norm": 0.45901113748550415, "learning_rate": 8.163426488456865e-05, "loss": 1.8144, "step": 3033 }, { "epoch": 0.9209288207618759, "grad_norm": 0.40197721123695374, "learning_rate": 8.162818955042528e-05, "loss": 1.7781, "step": 3034 }, { "epoch": 0.9212323569585673, "grad_norm": 0.41534188389778137, "learning_rate": 8.16221142162819e-05, "loss": 1.9972, "step": 3035 }, { "epoch": 0.9215358931552587, "grad_norm": 0.4121875762939453, "learning_rate": 8.161603888213853e-05, "loss": 1.4284, "step": 3036 }, { "epoch": 0.9218394293519502, "grad_norm": 0.4393114149570465, "learning_rate": 8.160996354799513e-05, "loss": 1.785, "step": 3037 }, { "epoch": 0.9221429655486417, "grad_norm": 0.3844849467277527, "learning_rate": 8.160388821385176e-05, "loss": 1.8865, "step": 3038 }, { "epoch": 0.9224465017453332, "grad_norm": 0.4221876859664917, "learning_rate": 8.15978128797084e-05, "loss": 1.8166, "step": 3039 }, { "epoch": 0.9227500379420246, "grad_norm": 0.4294770359992981, "learning_rate": 8.159173754556501e-05, "loss": 1.8273, "step": 3040 }, { "epoch": 0.923053574138716, "grad_norm": 0.41192346811294556, "learning_rate": 8.158566221142163e-05, "loss": 1.9982, "step": 3041 }, { "epoch": 0.9233571103354075, "grad_norm": 0.4439050555229187, "learning_rate": 8.157958687727826e-05, "loss": 2.0525, "step": 3042 }, { "epoch": 0.923660646532099, "grad_norm": 0.4909377694129944, "learning_rate": 8.157351154313488e-05, "loss": 1.7385, "step": 3043 }, { "epoch": 0.9239641827287904, "grad_norm": 0.3646416664123535, "learning_rate": 8.15674362089915e-05, "loss": 1.7543, "step": 3044 }, { "epoch": 0.9242677189254819, "grad_norm": 0.46845096349716187, "learning_rate": 8.156136087484813e-05, "loss": 1.5907, "step": 3045 }, { "epoch": 0.9245712551221733, "grad_norm": 0.3998015224933624, "learning_rate": 8.155528554070474e-05, "loss": 1.9652, "step": 3046 }, { "epoch": 0.9248747913188647, "grad_norm": 0.43011385202407837, "learning_rate": 8.154921020656136e-05, "loss": 1.9294, "step": 3047 }, { "epoch": 0.9251783275155562, "grad_norm": 1.033368706703186, "learning_rate": 8.154313487241799e-05, "loss": 1.9883, "step": 3048 }, { "epoch": 0.9254818637122477, "grad_norm": 0.6372964382171631, "learning_rate": 8.153705953827461e-05, "loss": 1.6381, "step": 3049 }, { "epoch": 0.9257853999089392, "grad_norm": 0.4168377220630646, "learning_rate": 8.153098420413124e-05, "loss": 1.5776, "step": 3050 }, { "epoch": 0.9260889361056306, "grad_norm": 0.4470007121562958, "learning_rate": 8.152490886998784e-05, "loss": 1.7431, "step": 3051 }, { "epoch": 0.926392472302322, "grad_norm": 0.4876750111579895, "learning_rate": 8.151883353584447e-05, "loss": 2.0082, "step": 3052 }, { "epoch": 0.9266960084990135, "grad_norm": 0.4005252718925476, "learning_rate": 8.15127582017011e-05, "loss": 1.8192, "step": 3053 }, { "epoch": 0.926999544695705, "grad_norm": 0.4852685332298279, "learning_rate": 8.150668286755772e-05, "loss": 1.5553, "step": 3054 }, { "epoch": 0.9273030808923964, "grad_norm": 0.4594980776309967, "learning_rate": 8.150060753341434e-05, "loss": 1.784, "step": 3055 }, { "epoch": 0.9276066170890879, "grad_norm": 0.34720897674560547, "learning_rate": 8.149453219927097e-05, "loss": 1.8604, "step": 3056 }, { "epoch": 0.9279101532857793, "grad_norm": 0.423211932182312, "learning_rate": 8.148845686512759e-05, "loss": 1.9792, "step": 3057 }, { "epoch": 0.9282136894824707, "grad_norm": 0.42972126603126526, "learning_rate": 8.14823815309842e-05, "loss": 1.4225, "step": 3058 }, { "epoch": 0.9285172256791623, "grad_norm": 0.38373371958732605, "learning_rate": 8.147630619684084e-05, "loss": 1.6397, "step": 3059 }, { "epoch": 0.9288207618758537, "grad_norm": 0.4351721405982971, "learning_rate": 8.147023086269745e-05, "loss": 1.6237, "step": 3060 }, { "epoch": 0.9291242980725452, "grad_norm": 0.41888755559921265, "learning_rate": 8.146415552855407e-05, "loss": 1.7037, "step": 3061 }, { "epoch": 0.9294278342692366, "grad_norm": 0.43660473823547363, "learning_rate": 8.14580801944107e-05, "loss": 1.9307, "step": 3062 }, { "epoch": 0.929731370465928, "grad_norm": 0.4016878008842468, "learning_rate": 8.145200486026732e-05, "loss": 1.7994, "step": 3063 }, { "epoch": 0.9300349066626196, "grad_norm": 0.5155421495437622, "learning_rate": 8.144592952612395e-05, "loss": 1.7505, "step": 3064 }, { "epoch": 0.930338442859311, "grad_norm": 0.4258996844291687, "learning_rate": 8.143985419198055e-05, "loss": 1.1935, "step": 3065 }, { "epoch": 0.9306419790560024, "grad_norm": 0.5270261168479919, "learning_rate": 8.143377885783718e-05, "loss": 2.095, "step": 3066 }, { "epoch": 0.9309455152526939, "grad_norm": 0.382199764251709, "learning_rate": 8.142770352369381e-05, "loss": 1.9493, "step": 3067 }, { "epoch": 0.9312490514493853, "grad_norm": 0.6669699549674988, "learning_rate": 8.142162818955042e-05, "loss": 2.07, "step": 3068 }, { "epoch": 0.9315525876460768, "grad_norm": 0.3749605417251587, "learning_rate": 8.141555285540705e-05, "loss": 1.5008, "step": 3069 }, { "epoch": 0.9318561238427683, "grad_norm": 0.4507908523082733, "learning_rate": 8.140947752126368e-05, "loss": 2.2879, "step": 3070 }, { "epoch": 0.9321596600394597, "grad_norm": 0.42423611879348755, "learning_rate": 8.14034021871203e-05, "loss": 2.0068, "step": 3071 }, { "epoch": 0.9324631962361511, "grad_norm": 0.4780293405056, "learning_rate": 8.139732685297691e-05, "loss": 1.9496, "step": 3072 }, { "epoch": 0.9327667324328426, "grad_norm": 0.4152267873287201, "learning_rate": 8.139125151883355e-05, "loss": 1.7815, "step": 3073 }, { "epoch": 0.933070268629534, "grad_norm": 0.40453848242759705, "learning_rate": 8.138517618469016e-05, "loss": 1.825, "step": 3074 }, { "epoch": 0.9333738048262256, "grad_norm": 0.48477646708488464, "learning_rate": 8.137910085054678e-05, "loss": 1.7566, "step": 3075 }, { "epoch": 0.933677341022917, "grad_norm": 0.49090731143951416, "learning_rate": 8.13730255164034e-05, "loss": 1.5743, "step": 3076 }, { "epoch": 0.9339808772196084, "grad_norm": 0.44307780265808105, "learning_rate": 8.136695018226003e-05, "loss": 1.9686, "step": 3077 }, { "epoch": 0.9342844134162999, "grad_norm": 0.6790413856506348, "learning_rate": 8.136087484811666e-05, "loss": 2.0449, "step": 3078 }, { "epoch": 0.9345879496129913, "grad_norm": 0.46484366059303284, "learning_rate": 8.135479951397326e-05, "loss": 1.6846, "step": 3079 }, { "epoch": 0.9348914858096828, "grad_norm": 0.5237354636192322, "learning_rate": 8.13487241798299e-05, "loss": 1.9181, "step": 3080 }, { "epoch": 0.9351950220063743, "grad_norm": 0.3992574214935303, "learning_rate": 8.134264884568652e-05, "loss": 1.8549, "step": 3081 }, { "epoch": 0.9354985582030657, "grad_norm": 0.37925541400909424, "learning_rate": 8.133657351154313e-05, "loss": 1.5961, "step": 3082 }, { "epoch": 0.9358020943997571, "grad_norm": 2.249074935913086, "learning_rate": 8.133049817739976e-05, "loss": 1.7813, "step": 3083 }, { "epoch": 0.9361056305964486, "grad_norm": 0.42430388927459717, "learning_rate": 8.132442284325639e-05, "loss": 1.5935, "step": 3084 }, { "epoch": 0.9364091667931401, "grad_norm": 0.42997804284095764, "learning_rate": 8.131834750911301e-05, "loss": 1.7546, "step": 3085 }, { "epoch": 0.9367127029898316, "grad_norm": 0.3883001506328583, "learning_rate": 8.131227217496962e-05, "loss": 1.9123, "step": 3086 }, { "epoch": 0.937016239186523, "grad_norm": 0.39124423265457153, "learning_rate": 8.130619684082626e-05, "loss": 1.4895, "step": 3087 }, { "epoch": 0.9373197753832144, "grad_norm": 0.41227900981903076, "learning_rate": 8.130012150668287e-05, "loss": 1.8467, "step": 3088 }, { "epoch": 0.9376233115799059, "grad_norm": 0.40440481901168823, "learning_rate": 8.129404617253949e-05, "loss": 1.678, "step": 3089 }, { "epoch": 0.9379268477765974, "grad_norm": 0.406684011220932, "learning_rate": 8.128797083839611e-05, "loss": 1.7328, "step": 3090 }, { "epoch": 0.9382303839732888, "grad_norm": 0.409196138381958, "learning_rate": 8.128189550425274e-05, "loss": 1.645, "step": 3091 }, { "epoch": 0.9385339201699803, "grad_norm": 0.46844586730003357, "learning_rate": 8.127582017010936e-05, "loss": 1.9826, "step": 3092 }, { "epoch": 0.9388374563666717, "grad_norm": 0.4813016355037689, "learning_rate": 8.126974483596597e-05, "loss": 1.5903, "step": 3093 }, { "epoch": 0.9391409925633631, "grad_norm": 0.4739225208759308, "learning_rate": 8.12636695018226e-05, "loss": 1.7212, "step": 3094 }, { "epoch": 0.9394445287600547, "grad_norm": 0.39909741282463074, "learning_rate": 8.125759416767923e-05, "loss": 2.0123, "step": 3095 }, { "epoch": 0.9397480649567461, "grad_norm": 0.4105834662914276, "learning_rate": 8.125151883353584e-05, "loss": 1.9729, "step": 3096 }, { "epoch": 0.9400516011534376, "grad_norm": 0.41497233510017395, "learning_rate": 8.124544349939247e-05, "loss": 2.0673, "step": 3097 }, { "epoch": 0.940355137350129, "grad_norm": 0.443263441324234, "learning_rate": 8.12393681652491e-05, "loss": 1.6216, "step": 3098 }, { "epoch": 0.9406586735468204, "grad_norm": 0.47175195813179016, "learning_rate": 8.123329283110572e-05, "loss": 1.594, "step": 3099 }, { "epoch": 0.940962209743512, "grad_norm": 0.434952974319458, "learning_rate": 8.122721749696233e-05, "loss": 1.8733, "step": 3100 }, { "epoch": 0.9412657459402034, "grad_norm": 0.5037057399749756, "learning_rate": 8.122114216281897e-05, "loss": 1.5384, "step": 3101 }, { "epoch": 0.9415692821368948, "grad_norm": 0.39780277013778687, "learning_rate": 8.121506682867558e-05, "loss": 2.0412, "step": 3102 }, { "epoch": 0.9418728183335863, "grad_norm": 0.4376054108142853, "learning_rate": 8.12089914945322e-05, "loss": 1.9531, "step": 3103 }, { "epoch": 0.9421763545302777, "grad_norm": 0.40796467661857605, "learning_rate": 8.120291616038882e-05, "loss": 2.062, "step": 3104 }, { "epoch": 0.9424798907269691, "grad_norm": 0.47094616293907166, "learning_rate": 8.119684082624545e-05, "loss": 2.16, "step": 3105 }, { "epoch": 0.9427834269236607, "grad_norm": 0.4758855104446411, "learning_rate": 8.119076549210207e-05, "loss": 1.9761, "step": 3106 }, { "epoch": 0.9430869631203521, "grad_norm": 0.3994719088077545, "learning_rate": 8.118469015795868e-05, "loss": 1.8639, "step": 3107 }, { "epoch": 0.9433904993170436, "grad_norm": 0.39443784952163696, "learning_rate": 8.117861482381531e-05, "loss": 1.8374, "step": 3108 }, { "epoch": 0.943694035513735, "grad_norm": 0.3997192978858948, "learning_rate": 8.117253948967194e-05, "loss": 1.8491, "step": 3109 }, { "epoch": 0.9439975717104264, "grad_norm": 0.4563603401184082, "learning_rate": 8.116646415552855e-05, "loss": 1.4991, "step": 3110 }, { "epoch": 0.944301107907118, "grad_norm": 0.4601759612560272, "learning_rate": 8.116038882138518e-05, "loss": 1.6056, "step": 3111 }, { "epoch": 0.9446046441038094, "grad_norm": 0.39985764026641846, "learning_rate": 8.115431348724181e-05, "loss": 1.391, "step": 3112 }, { "epoch": 0.9449081803005008, "grad_norm": 0.5546020269393921, "learning_rate": 8.114823815309843e-05, "loss": 1.9887, "step": 3113 }, { "epoch": 0.9452117164971923, "grad_norm": 0.4334501624107361, "learning_rate": 8.114216281895504e-05, "loss": 2.1764, "step": 3114 }, { "epoch": 0.9455152526938837, "grad_norm": 0.47174403071403503, "learning_rate": 8.113608748481168e-05, "loss": 1.8397, "step": 3115 }, { "epoch": 0.9458187888905752, "grad_norm": 0.4174114465713501, "learning_rate": 8.113001215066829e-05, "loss": 2.2144, "step": 3116 }, { "epoch": 0.9461223250872667, "grad_norm": 0.7976917028427124, "learning_rate": 8.112393681652491e-05, "loss": 1.3549, "step": 3117 }, { "epoch": 0.9464258612839581, "grad_norm": 0.3866395950317383, "learning_rate": 8.111786148238153e-05, "loss": 2.0, "step": 3118 }, { "epoch": 0.9467293974806495, "grad_norm": 0.43397247791290283, "learning_rate": 8.111178614823816e-05, "loss": 1.7408, "step": 3119 }, { "epoch": 0.947032933677341, "grad_norm": 0.4277322292327881, "learning_rate": 8.110571081409478e-05, "loss": 1.8638, "step": 3120 }, { "epoch": 0.9473364698740325, "grad_norm": 0.38876983523368835, "learning_rate": 8.109963547995139e-05, "loss": 1.7645, "step": 3121 }, { "epoch": 0.947640006070724, "grad_norm": 0.37743645906448364, "learning_rate": 8.109356014580802e-05, "loss": 1.9963, "step": 3122 }, { "epoch": 0.9479435422674154, "grad_norm": 0.43921002745628357, "learning_rate": 8.108748481166465e-05, "loss": 2.0021, "step": 3123 }, { "epoch": 0.9482470784641068, "grad_norm": 0.4989663362503052, "learning_rate": 8.108140947752126e-05, "loss": 1.6826, "step": 3124 }, { "epoch": 0.9485506146607983, "grad_norm": 0.40931862592697144, "learning_rate": 8.107533414337789e-05, "loss": 1.681, "step": 3125 }, { "epoch": 0.9488541508574898, "grad_norm": 0.44620081782341003, "learning_rate": 8.106925880923452e-05, "loss": 2.0738, "step": 3126 }, { "epoch": 0.9491576870541812, "grad_norm": 0.42712563276290894, "learning_rate": 8.106318347509114e-05, "loss": 2.078, "step": 3127 }, { "epoch": 0.9494612232508727, "grad_norm": 2.7110748291015625, "learning_rate": 8.105710814094775e-05, "loss": 1.8328, "step": 3128 }, { "epoch": 0.9497647594475641, "grad_norm": 0.40240269899368286, "learning_rate": 8.105103280680439e-05, "loss": 1.8519, "step": 3129 }, { "epoch": 0.9500682956442555, "grad_norm": 0.43927666544914246, "learning_rate": 8.1044957472661e-05, "loss": 1.7309, "step": 3130 }, { "epoch": 0.950371831840947, "grad_norm": 0.4225032925605774, "learning_rate": 8.103888213851762e-05, "loss": 1.9518, "step": 3131 }, { "epoch": 0.9506753680376385, "grad_norm": 0.4135547876358032, "learning_rate": 8.103280680437424e-05, "loss": 1.791, "step": 3132 }, { "epoch": 0.95097890423433, "grad_norm": 0.5137977004051208, "learning_rate": 8.102673147023087e-05, "loss": 1.8914, "step": 3133 }, { "epoch": 0.9512824404310214, "grad_norm": 0.44080087542533875, "learning_rate": 8.102065613608749e-05, "loss": 1.6005, "step": 3134 }, { "epoch": 0.9515859766277128, "grad_norm": 0.4912469983100891, "learning_rate": 8.10145808019441e-05, "loss": 1.7609, "step": 3135 }, { "epoch": 0.9518895128244043, "grad_norm": 0.6660062074661255, "learning_rate": 8.100850546780073e-05, "loss": 2.0835, "step": 3136 }, { "epoch": 0.9521930490210958, "grad_norm": 0.39112183451652527, "learning_rate": 8.100243013365736e-05, "loss": 1.9975, "step": 3137 }, { "epoch": 0.9524965852177872, "grad_norm": 0.41470736265182495, "learning_rate": 8.099635479951397e-05, "loss": 1.8165, "step": 3138 }, { "epoch": 0.9528001214144787, "grad_norm": 0.6125030517578125, "learning_rate": 8.09902794653706e-05, "loss": 1.4878, "step": 3139 }, { "epoch": 0.9531036576111701, "grad_norm": 0.3625620901584625, "learning_rate": 8.098420413122723e-05, "loss": 1.7098, "step": 3140 }, { "epoch": 0.9534071938078615, "grad_norm": 0.3737241327762604, "learning_rate": 8.097812879708383e-05, "loss": 1.8225, "step": 3141 }, { "epoch": 0.9537107300045531, "grad_norm": 0.4835364520549774, "learning_rate": 8.097205346294047e-05, "loss": 2.2708, "step": 3142 }, { "epoch": 0.9540142662012445, "grad_norm": 0.3605796694755554, "learning_rate": 8.09659781287971e-05, "loss": 2.1616, "step": 3143 }, { "epoch": 0.954317802397936, "grad_norm": 0.42037534713745117, "learning_rate": 8.095990279465371e-05, "loss": 1.8847, "step": 3144 }, { "epoch": 0.9546213385946274, "grad_norm": 0.4341660141944885, "learning_rate": 8.095382746051033e-05, "loss": 1.579, "step": 3145 }, { "epoch": 0.9549248747913188, "grad_norm": 0.39239785075187683, "learning_rate": 8.094775212636695e-05, "loss": 1.9024, "step": 3146 }, { "epoch": 0.9552284109880104, "grad_norm": 0.4219903349876404, "learning_rate": 8.094167679222358e-05, "loss": 1.7518, "step": 3147 }, { "epoch": 0.9555319471847018, "grad_norm": 0.36863937973976135, "learning_rate": 8.09356014580802e-05, "loss": 2.0326, "step": 3148 }, { "epoch": 0.9558354833813932, "grad_norm": 0.4089399576187134, "learning_rate": 8.092952612393681e-05, "loss": 1.9478, "step": 3149 }, { "epoch": 0.9561390195780847, "grad_norm": 0.3865533769130707, "learning_rate": 8.092345078979344e-05, "loss": 1.883, "step": 3150 }, { "epoch": 0.9564425557747761, "grad_norm": 0.3673511743545532, "learning_rate": 8.091737545565008e-05, "loss": 1.8116, "step": 3151 }, { "epoch": 0.9567460919714676, "grad_norm": 0.4296679198741913, "learning_rate": 8.091130012150668e-05, "loss": 1.891, "step": 3152 }, { "epoch": 0.9570496281681591, "grad_norm": 0.3618902266025543, "learning_rate": 8.090522478736331e-05, "loss": 1.8951, "step": 3153 }, { "epoch": 0.9573531643648505, "grad_norm": 0.3620889484882355, "learning_rate": 8.089914945321994e-05, "loss": 1.5316, "step": 3154 }, { "epoch": 0.957656700561542, "grad_norm": 0.4978037178516388, "learning_rate": 8.089307411907654e-05, "loss": 1.6175, "step": 3155 }, { "epoch": 0.9579602367582334, "grad_norm": 0.4385554790496826, "learning_rate": 8.088699878493318e-05, "loss": 1.8811, "step": 3156 }, { "epoch": 0.9582637729549248, "grad_norm": 0.42445600032806396, "learning_rate": 8.08809234507898e-05, "loss": 1.9388, "step": 3157 }, { "epoch": 0.9585673091516164, "grad_norm": 0.4952315092086792, "learning_rate": 8.087484811664642e-05, "loss": 2.0404, "step": 3158 }, { "epoch": 0.9588708453483078, "grad_norm": 0.3969573676586151, "learning_rate": 8.086877278250304e-05, "loss": 1.818, "step": 3159 }, { "epoch": 0.9591743815449992, "grad_norm": 0.41406628489494324, "learning_rate": 8.086269744835966e-05, "loss": 2.0014, "step": 3160 }, { "epoch": 0.9594779177416907, "grad_norm": 0.40631070733070374, "learning_rate": 8.085662211421629e-05, "loss": 1.7627, "step": 3161 }, { "epoch": 0.9597814539383821, "grad_norm": 0.41568198800086975, "learning_rate": 8.08505467800729e-05, "loss": 2.0605, "step": 3162 }, { "epoch": 0.9600849901350736, "grad_norm": 0.39019855856895447, "learning_rate": 8.084447144592952e-05, "loss": 2.3052, "step": 3163 }, { "epoch": 0.9603885263317651, "grad_norm": 0.42019182443618774, "learning_rate": 8.083839611178615e-05, "loss": 1.1618, "step": 3164 }, { "epoch": 0.9606920625284565, "grad_norm": 0.3448597192764282, "learning_rate": 8.083232077764277e-05, "loss": 1.1122, "step": 3165 }, { "epoch": 0.960995598725148, "grad_norm": 0.3484005928039551, "learning_rate": 8.082624544349939e-05, "loss": 1.3973, "step": 3166 }, { "epoch": 0.9612991349218394, "grad_norm": 0.3877616822719574, "learning_rate": 8.082017010935602e-05, "loss": 1.7281, "step": 3167 }, { "epoch": 0.9616026711185309, "grad_norm": 0.7124067544937134, "learning_rate": 8.081409477521265e-05, "loss": 1.7848, "step": 3168 }, { "epoch": 0.9619062073152224, "grad_norm": 0.4344068467617035, "learning_rate": 8.080801944106925e-05, "loss": 1.1598, "step": 3169 }, { "epoch": 0.9622097435119138, "grad_norm": 0.8230828046798706, "learning_rate": 8.080194410692589e-05, "loss": 1.7979, "step": 3170 }, { "epoch": 0.9625132797086052, "grad_norm": 0.45202380418777466, "learning_rate": 8.07958687727825e-05, "loss": 1.9827, "step": 3171 }, { "epoch": 0.9628168159052967, "grad_norm": 0.37519025802612305, "learning_rate": 8.078979343863913e-05, "loss": 1.9966, "step": 3172 }, { "epoch": 0.9631203521019882, "grad_norm": 0.42726776003837585, "learning_rate": 8.078371810449575e-05, "loss": 1.9923, "step": 3173 }, { "epoch": 0.9634238882986796, "grad_norm": 0.5753629207611084, "learning_rate": 8.077764277035237e-05, "loss": 1.695, "step": 3174 }, { "epoch": 0.9637274244953711, "grad_norm": 0.44009268283843994, "learning_rate": 8.0771567436209e-05, "loss": 1.5039, "step": 3175 }, { "epoch": 0.9640309606920625, "grad_norm": 0.42067059874534607, "learning_rate": 8.076549210206562e-05, "loss": 1.8273, "step": 3176 }, { "epoch": 0.964334496888754, "grad_norm": 0.44108089804649353, "learning_rate": 8.075941676792223e-05, "loss": 1.6372, "step": 3177 }, { "epoch": 0.9646380330854455, "grad_norm": 0.39648228883743286, "learning_rate": 8.075334143377886e-05, "loss": 2.1179, "step": 3178 }, { "epoch": 0.9649415692821369, "grad_norm": 0.45205631852149963, "learning_rate": 8.074726609963548e-05, "loss": 1.9005, "step": 3179 }, { "epoch": 0.9652451054788284, "grad_norm": 0.45935380458831787, "learning_rate": 8.07411907654921e-05, "loss": 1.7634, "step": 3180 }, { "epoch": 0.9655486416755198, "grad_norm": 0.3873693645000458, "learning_rate": 8.073511543134873e-05, "loss": 1.9629, "step": 3181 }, { "epoch": 0.9658521778722112, "grad_norm": 0.3731973469257355, "learning_rate": 8.072904009720536e-05, "loss": 2.0085, "step": 3182 }, { "epoch": 0.9661557140689028, "grad_norm": 0.45661619305610657, "learning_rate": 8.072296476306196e-05, "loss": 1.3693, "step": 3183 }, { "epoch": 0.9664592502655942, "grad_norm": 0.47569990158081055, "learning_rate": 8.07168894289186e-05, "loss": 1.6855, "step": 3184 }, { "epoch": 0.9667627864622856, "grad_norm": 0.4035504460334778, "learning_rate": 8.071081409477521e-05, "loss": 2.1902, "step": 3185 }, { "epoch": 0.9670663226589771, "grad_norm": 0.6134029030799866, "learning_rate": 8.070473876063184e-05, "loss": 2.0059, "step": 3186 }, { "epoch": 0.9673698588556685, "grad_norm": 0.5165479779243469, "learning_rate": 8.069866342648846e-05, "loss": 1.6179, "step": 3187 }, { "epoch": 0.9676733950523599, "grad_norm": 0.48403364419937134, "learning_rate": 8.069258809234508e-05, "loss": 2.1823, "step": 3188 }, { "epoch": 0.9679769312490515, "grad_norm": 0.4141898453235626, "learning_rate": 8.068651275820171e-05, "loss": 1.7823, "step": 3189 }, { "epoch": 0.9682804674457429, "grad_norm": 0.4937414228916168, "learning_rate": 8.068043742405833e-05, "loss": 1.5423, "step": 3190 }, { "epoch": 0.9685840036424344, "grad_norm": 0.39996278285980225, "learning_rate": 8.067436208991494e-05, "loss": 1.8201, "step": 3191 }, { "epoch": 0.9688875398391258, "grad_norm": 0.4205084443092346, "learning_rate": 8.066828675577157e-05, "loss": 2.1392, "step": 3192 }, { "epoch": 0.9691910760358172, "grad_norm": 0.43701469898223877, "learning_rate": 8.066221142162819e-05, "loss": 1.8178, "step": 3193 }, { "epoch": 0.9694946122325088, "grad_norm": 0.45265719294548035, "learning_rate": 8.065613608748481e-05, "loss": 1.6506, "step": 3194 }, { "epoch": 0.9697981484292002, "grad_norm": 0.43316105008125305, "learning_rate": 8.065006075334144e-05, "loss": 1.6709, "step": 3195 }, { "epoch": 0.9701016846258916, "grad_norm": 0.37833353877067566, "learning_rate": 8.064398541919807e-05, "loss": 1.4646, "step": 3196 }, { "epoch": 0.9704052208225831, "grad_norm": 0.444698691368103, "learning_rate": 8.063791008505467e-05, "loss": 1.6426, "step": 3197 }, { "epoch": 0.9707087570192745, "grad_norm": 0.43853360414505005, "learning_rate": 8.06318347509113e-05, "loss": 2.1562, "step": 3198 }, { "epoch": 0.971012293215966, "grad_norm": 0.6573916673660278, "learning_rate": 8.062575941676792e-05, "loss": 2.1327, "step": 3199 }, { "epoch": 0.9713158294126575, "grad_norm": 0.41661205887794495, "learning_rate": 8.061968408262455e-05, "loss": 1.68, "step": 3200 }, { "epoch": 0.9716193656093489, "grad_norm": 0.7264708876609802, "learning_rate": 8.061360874848117e-05, "loss": 1.8907, "step": 3201 }, { "epoch": 0.9719229018060404, "grad_norm": 0.3977676331996918, "learning_rate": 8.060753341433779e-05, "loss": 1.9205, "step": 3202 }, { "epoch": 0.9722264380027318, "grad_norm": 0.7518191337585449, "learning_rate": 8.060145808019442e-05, "loss": 1.4833, "step": 3203 }, { "epoch": 0.9725299741994233, "grad_norm": 0.4262489080429077, "learning_rate": 8.059538274605104e-05, "loss": 1.8307, "step": 3204 }, { "epoch": 0.9728335103961148, "grad_norm": 0.40081748366355896, "learning_rate": 8.058930741190765e-05, "loss": 0.9998, "step": 3205 }, { "epoch": 0.9731370465928062, "grad_norm": 0.46323978900909424, "learning_rate": 8.058323207776428e-05, "loss": 1.9063, "step": 3206 }, { "epoch": 0.9734405827894976, "grad_norm": 0.40302255749702454, "learning_rate": 8.05771567436209e-05, "loss": 1.8153, "step": 3207 }, { "epoch": 0.9737441189861891, "grad_norm": 0.4447222948074341, "learning_rate": 8.057108140947752e-05, "loss": 2.0744, "step": 3208 }, { "epoch": 0.9740476551828806, "grad_norm": 0.33706068992614746, "learning_rate": 8.056500607533415e-05, "loss": 1.7981, "step": 3209 }, { "epoch": 0.974351191379572, "grad_norm": 0.44239774346351624, "learning_rate": 8.055893074119078e-05, "loss": 1.7168, "step": 3210 }, { "epoch": 0.9746547275762635, "grad_norm": 0.41518473625183105, "learning_rate": 8.055285540704738e-05, "loss": 1.9278, "step": 3211 }, { "epoch": 0.9749582637729549, "grad_norm": 0.4727713167667389, "learning_rate": 8.054678007290402e-05, "loss": 1.6719, "step": 3212 }, { "epoch": 0.9752617999696463, "grad_norm": 0.4056665599346161, "learning_rate": 8.054070473876063e-05, "loss": 1.7211, "step": 3213 }, { "epoch": 0.9755653361663378, "grad_norm": 0.47795236110687256, "learning_rate": 8.053462940461726e-05, "loss": 1.5074, "step": 3214 }, { "epoch": 0.9758688723630293, "grad_norm": 0.5859802961349487, "learning_rate": 8.052855407047388e-05, "loss": 1.7038, "step": 3215 }, { "epoch": 0.9761724085597208, "grad_norm": 0.398113876581192, "learning_rate": 8.05224787363305e-05, "loss": 1.7802, "step": 3216 }, { "epoch": 0.9764759447564122, "grad_norm": 0.3661412000656128, "learning_rate": 8.051640340218713e-05, "loss": 1.4388, "step": 3217 }, { "epoch": 0.9767794809531036, "grad_norm": 0.40348801016807556, "learning_rate": 8.051032806804375e-05, "loss": 1.8187, "step": 3218 }, { "epoch": 0.9770830171497951, "grad_norm": 0.3885161876678467, "learning_rate": 8.050425273390036e-05, "loss": 1.7975, "step": 3219 }, { "epoch": 0.9773865533464866, "grad_norm": 0.3739737868309021, "learning_rate": 8.0498177399757e-05, "loss": 1.7525, "step": 3220 }, { "epoch": 0.977690089543178, "grad_norm": 0.48323333263397217, "learning_rate": 8.049210206561361e-05, "loss": 1.3203, "step": 3221 }, { "epoch": 0.9779936257398695, "grad_norm": 0.3983123004436493, "learning_rate": 8.048602673147023e-05, "loss": 1.994, "step": 3222 }, { "epoch": 0.9782971619365609, "grad_norm": 0.4193548560142517, "learning_rate": 8.047995139732686e-05, "loss": 1.811, "step": 3223 }, { "epoch": 0.9786006981332523, "grad_norm": 0.353444367647171, "learning_rate": 8.047387606318349e-05, "loss": 1.7789, "step": 3224 }, { "epoch": 0.9789042343299439, "grad_norm": 0.5068827867507935, "learning_rate": 8.04678007290401e-05, "loss": 1.9425, "step": 3225 }, { "epoch": 0.9792077705266353, "grad_norm": 0.42118749022483826, "learning_rate": 8.046172539489673e-05, "loss": 1.8055, "step": 3226 }, { "epoch": 0.9795113067233268, "grad_norm": 0.4077788293361664, "learning_rate": 8.045565006075334e-05, "loss": 1.3257, "step": 3227 }, { "epoch": 0.9798148429200182, "grad_norm": 0.45757341384887695, "learning_rate": 8.044957472660996e-05, "loss": 1.998, "step": 3228 }, { "epoch": 0.9801183791167096, "grad_norm": 0.4154861271381378, "learning_rate": 8.044349939246659e-05, "loss": 1.8151, "step": 3229 }, { "epoch": 0.9804219153134012, "grad_norm": 0.48109593987464905, "learning_rate": 8.043742405832321e-05, "loss": 1.7802, "step": 3230 }, { "epoch": 0.9807254515100926, "grad_norm": 0.4576222002506256, "learning_rate": 8.043134872417984e-05, "loss": 1.7928, "step": 3231 }, { "epoch": 0.981028987706784, "grad_norm": 0.38525086641311646, "learning_rate": 8.042527339003646e-05, "loss": 1.4135, "step": 3232 }, { "epoch": 0.9813325239034755, "grad_norm": 0.3866974115371704, "learning_rate": 8.041919805589307e-05, "loss": 1.9068, "step": 3233 }, { "epoch": 0.9816360601001669, "grad_norm": 0.421790212392807, "learning_rate": 8.04131227217497e-05, "loss": 1.8243, "step": 3234 }, { "epoch": 0.9819395962968585, "grad_norm": 0.5579865574836731, "learning_rate": 8.040704738760632e-05, "loss": 1.8662, "step": 3235 }, { "epoch": 0.9822431324935499, "grad_norm": 0.5178837180137634, "learning_rate": 8.040097205346294e-05, "loss": 1.815, "step": 3236 }, { "epoch": 0.9825466686902413, "grad_norm": 0.3817935883998871, "learning_rate": 8.039489671931957e-05, "loss": 2.2422, "step": 3237 }, { "epoch": 0.9828502048869328, "grad_norm": 0.4080420136451721, "learning_rate": 8.038882138517619e-05, "loss": 2.0504, "step": 3238 }, { "epoch": 0.9831537410836242, "grad_norm": 0.3719751536846161, "learning_rate": 8.03827460510328e-05, "loss": 1.9778, "step": 3239 }, { "epoch": 0.9834572772803156, "grad_norm": 0.36413270235061646, "learning_rate": 8.037667071688944e-05, "loss": 1.3326, "step": 3240 }, { "epoch": 0.9837608134770072, "grad_norm": 0.3482026755809784, "learning_rate": 8.037059538274605e-05, "loss": 1.7243, "step": 3241 }, { "epoch": 0.9840643496736986, "grad_norm": 0.3891375958919525, "learning_rate": 8.036452004860267e-05, "loss": 1.8408, "step": 3242 }, { "epoch": 0.98436788587039, "grad_norm": 0.4400385618209839, "learning_rate": 8.03584447144593e-05, "loss": 1.967, "step": 3243 }, { "epoch": 0.9846714220670815, "grad_norm": 0.3769470751285553, "learning_rate": 8.035236938031592e-05, "loss": 1.3612, "step": 3244 }, { "epoch": 0.9849749582637729, "grad_norm": 0.39424487948417664, "learning_rate": 8.034629404617255e-05, "loss": 1.8445, "step": 3245 }, { "epoch": 0.9852784944604644, "grad_norm": 0.4074876308441162, "learning_rate": 8.034021871202917e-05, "loss": 1.9461, "step": 3246 }, { "epoch": 0.9855820306571559, "grad_norm": 0.4052838683128357, "learning_rate": 8.033414337788578e-05, "loss": 1.6049, "step": 3247 }, { "epoch": 0.9858855668538473, "grad_norm": 0.4411472678184509, "learning_rate": 8.032806804374241e-05, "loss": 2.0511, "step": 3248 }, { "epoch": 0.9861891030505388, "grad_norm": 0.37311851978302, "learning_rate": 8.032199270959903e-05, "loss": 1.9176, "step": 3249 }, { "epoch": 0.9864926392472302, "grad_norm": 0.3146267235279083, "learning_rate": 8.031591737545565e-05, "loss": 1.813, "step": 3250 }, { "epoch": 0.9867961754439217, "grad_norm": 0.5194718241691589, "learning_rate": 8.030984204131228e-05, "loss": 2.0425, "step": 3251 }, { "epoch": 0.9870997116406132, "grad_norm": 0.37070557475090027, "learning_rate": 8.03037667071689e-05, "loss": 1.8255, "step": 3252 }, { "epoch": 0.9874032478373046, "grad_norm": 0.37021851539611816, "learning_rate": 8.029769137302551e-05, "loss": 2.2163, "step": 3253 }, { "epoch": 0.987706784033996, "grad_norm": 0.5118260979652405, "learning_rate": 8.029161603888215e-05, "loss": 1.4771, "step": 3254 }, { "epoch": 0.9880103202306875, "grad_norm": 0.4722789227962494, "learning_rate": 8.028554070473876e-05, "loss": 1.9019, "step": 3255 }, { "epoch": 0.988313856427379, "grad_norm": 0.3468252420425415, "learning_rate": 8.027946537059538e-05, "loss": 1.7159, "step": 3256 }, { "epoch": 0.9886173926240704, "grad_norm": 0.4422720968723297, "learning_rate": 8.027339003645201e-05, "loss": 1.8671, "step": 3257 }, { "epoch": 0.9889209288207619, "grad_norm": 0.46859246492385864, "learning_rate": 8.026731470230863e-05, "loss": 2.0586, "step": 3258 }, { "epoch": 0.9892244650174533, "grad_norm": 0.46339279413223267, "learning_rate": 8.026123936816526e-05, "loss": 1.7544, "step": 3259 }, { "epoch": 0.9895280012141447, "grad_norm": 0.3819115161895752, "learning_rate": 8.025516403402188e-05, "loss": 1.9807, "step": 3260 }, { "epoch": 0.9898315374108363, "grad_norm": 0.8981953263282776, "learning_rate": 8.02490886998785e-05, "loss": 1.5445, "step": 3261 }, { "epoch": 0.9901350736075277, "grad_norm": 0.4964045584201813, "learning_rate": 8.024301336573512e-05, "loss": 1.6069, "step": 3262 }, { "epoch": 0.9904386098042192, "grad_norm": 0.4120222330093384, "learning_rate": 8.023693803159174e-05, "loss": 1.8865, "step": 3263 }, { "epoch": 0.9907421460009106, "grad_norm": 0.44508838653564453, "learning_rate": 8.023086269744836e-05, "loss": 1.4333, "step": 3264 }, { "epoch": 0.991045682197602, "grad_norm": 0.3859883248806, "learning_rate": 8.022478736330499e-05, "loss": 1.9533, "step": 3265 }, { "epoch": 0.9913492183942936, "grad_norm": 0.4727214276790619, "learning_rate": 8.021871202916161e-05, "loss": 1.4972, "step": 3266 }, { "epoch": 0.991652754590985, "grad_norm": 0.8692718148231506, "learning_rate": 8.021263669501822e-05, "loss": 1.5174, "step": 3267 }, { "epoch": 0.9919562907876764, "grad_norm": 0.4142051637172699, "learning_rate": 8.020656136087486e-05, "loss": 1.8747, "step": 3268 }, { "epoch": 0.9922598269843679, "grad_norm": 0.4075202941894531, "learning_rate": 8.020048602673147e-05, "loss": 1.9666, "step": 3269 }, { "epoch": 0.9925633631810593, "grad_norm": 0.7073702216148376, "learning_rate": 8.019441069258809e-05, "loss": 1.9629, "step": 3270 }, { "epoch": 0.9928668993777507, "grad_norm": 0.4240557849407196, "learning_rate": 8.018833535844472e-05, "loss": 1.6251, "step": 3271 }, { "epoch": 0.9931704355744423, "grad_norm": 0.4226653277873993, "learning_rate": 8.018226002430134e-05, "loss": 1.9254, "step": 3272 }, { "epoch": 0.9934739717711337, "grad_norm": 0.40740150213241577, "learning_rate": 8.017618469015797e-05, "loss": 1.8494, "step": 3273 }, { "epoch": 0.9937775079678252, "grad_norm": 0.4575270712375641, "learning_rate": 8.017010935601459e-05, "loss": 1.5237, "step": 3274 }, { "epoch": 0.9940810441645166, "grad_norm": 0.48337459564208984, "learning_rate": 8.01640340218712e-05, "loss": 1.5319, "step": 3275 }, { "epoch": 0.994384580361208, "grad_norm": 0.3628256916999817, "learning_rate": 8.015795868772783e-05, "loss": 2.0454, "step": 3276 }, { "epoch": 0.9946881165578996, "grad_norm": 0.3945721387863159, "learning_rate": 8.015188335358445e-05, "loss": 2.0646, "step": 3277 }, { "epoch": 0.994991652754591, "grad_norm": 0.38345763087272644, "learning_rate": 8.014580801944107e-05, "loss": 1.4601, "step": 3278 }, { "epoch": 0.9952951889512824, "grad_norm": 0.47283461689949036, "learning_rate": 8.01397326852977e-05, "loss": 1.6951, "step": 3279 }, { "epoch": 0.9955987251479739, "grad_norm": 0.47353407740592957, "learning_rate": 8.013365735115432e-05, "loss": 2.0192, "step": 3280 }, { "epoch": 0.9959022613446653, "grad_norm": 0.41565829515457153, "learning_rate": 8.012758201701093e-05, "loss": 1.8168, "step": 3281 }, { "epoch": 0.9962057975413569, "grad_norm": 0.43817979097366333, "learning_rate": 8.012150668286757e-05, "loss": 2.1453, "step": 3282 }, { "epoch": 0.9965093337380483, "grad_norm": 0.5480432510375977, "learning_rate": 8.011543134872418e-05, "loss": 1.6872, "step": 3283 }, { "epoch": 0.9968128699347397, "grad_norm": 0.344694584608078, "learning_rate": 8.01093560145808e-05, "loss": 1.0506, "step": 3284 }, { "epoch": 0.9971164061314312, "grad_norm": 0.39683830738067627, "learning_rate": 8.010328068043743e-05, "loss": 1.9114, "step": 3285 }, { "epoch": 0.9974199423281226, "grad_norm": 0.865807294845581, "learning_rate": 8.009720534629405e-05, "loss": 2.0548, "step": 3286 }, { "epoch": 0.9977234785248141, "grad_norm": 0.4596058130264282, "learning_rate": 8.009113001215068e-05, "loss": 1.5162, "step": 3287 }, { "epoch": 0.9980270147215056, "grad_norm": 0.45966169238090515, "learning_rate": 8.00850546780073e-05, "loss": 1.9112, "step": 3288 }, { "epoch": 0.998330550918197, "grad_norm": 0.44408029317855835, "learning_rate": 8.007897934386391e-05, "loss": 1.7549, "step": 3289 }, { "epoch": 0.9986340871148884, "grad_norm": 0.4286332428455353, "learning_rate": 8.007290400972054e-05, "loss": 1.9614, "step": 3290 }, { "epoch": 0.9989376233115799, "grad_norm": 0.40551066398620605, "learning_rate": 8.006682867557716e-05, "loss": 1.9918, "step": 3291 }, { "epoch": 0.9992411595082714, "grad_norm": 0.41468697786331177, "learning_rate": 8.006075334143378e-05, "loss": 1.9249, "step": 3292 }, { "epoch": 0.9995446957049628, "grad_norm": 0.506384551525116, "learning_rate": 8.005467800729041e-05, "loss": 1.9334, "step": 3293 }, { "epoch": 0.9998482319016543, "grad_norm": 0.4209151268005371, "learning_rate": 8.004860267314703e-05, "loss": 1.728, "step": 3294 }, { "epoch": 1.0001517680983458, "grad_norm": 17.907875061035156, "learning_rate": 8.004252733900364e-05, "loss": 2.4794, "step": 3295 }, { "epoch": 1.0004553042950373, "grad_norm": 0.46547284722328186, "learning_rate": 8.003645200486028e-05, "loss": 1.4745, "step": 3296 }, { "epoch": 1.0007588404917287, "grad_norm": 0.3899800777435303, "learning_rate": 8.003037667071689e-05, "loss": 1.5406, "step": 3297 }, { "epoch": 1.0010623766884201, "grad_norm": 0.48273205757141113, "learning_rate": 8.002430133657351e-05, "loss": 1.4638, "step": 3298 }, { "epoch": 1.0013659128851116, "grad_norm": 0.36288753151893616, "learning_rate": 8.001822600243014e-05, "loss": 1.8582, "step": 3299 }, { "epoch": 1.001669449081803, "grad_norm": 0.4598756432533264, "learning_rate": 8.001215066828676e-05, "loss": 1.1343, "step": 3300 }, { "epoch": 1.0019729852784944, "grad_norm": 0.4313514530658722, "learning_rate": 8.000607533414338e-05, "loss": 1.6411, "step": 3301 }, { "epoch": 1.0022765214751859, "grad_norm": 0.5020793676376343, "learning_rate": 8e-05, "loss": 1.8333, "step": 3302 }, { "epoch": 1.0025800576718773, "grad_norm": 0.7939902544021606, "learning_rate": 7.999392466585662e-05, "loss": 1.0223, "step": 3303 }, { "epoch": 1.0028835938685687, "grad_norm": 0.42683956027030945, "learning_rate": 7.998784933171325e-05, "loss": 1.6555, "step": 3304 }, { "epoch": 1.0031871300652604, "grad_norm": 1.1804200410842896, "learning_rate": 7.998177399756987e-05, "loss": 1.2333, "step": 3305 }, { "epoch": 1.0034906662619518, "grad_norm": 0.4585864543914795, "learning_rate": 7.997569866342649e-05, "loss": 1.7396, "step": 3306 }, { "epoch": 1.0037942024586433, "grad_norm": 0.6374893188476562, "learning_rate": 7.996962332928312e-05, "loss": 1.4289, "step": 3307 }, { "epoch": 1.0040977386553347, "grad_norm": 0.7758880853652954, "learning_rate": 7.996354799513974e-05, "loss": 1.2918, "step": 3308 }, { "epoch": 1.0044012748520261, "grad_norm": 0.7899906039237976, "learning_rate": 7.995747266099635e-05, "loss": 0.9088, "step": 3309 }, { "epoch": 1.0047048110487176, "grad_norm": 0.5590714812278748, "learning_rate": 7.995139732685299e-05, "loss": 1.2513, "step": 3310 }, { "epoch": 1.005008347245409, "grad_norm": 0.49430859088897705, "learning_rate": 7.99453219927096e-05, "loss": 1.5882, "step": 3311 }, { "epoch": 1.0053118834421004, "grad_norm": 0.4428652822971344, "learning_rate": 7.993924665856622e-05, "loss": 1.6225, "step": 3312 }, { "epoch": 1.0056154196387919, "grad_norm": 1.8553460836410522, "learning_rate": 7.993317132442285e-05, "loss": 1.3601, "step": 3313 }, { "epoch": 1.0059189558354833, "grad_norm": 0.5211709141731262, "learning_rate": 7.992709599027947e-05, "loss": 1.8398, "step": 3314 }, { "epoch": 1.0062224920321747, "grad_norm": 0.7685166001319885, "learning_rate": 7.992102065613609e-05, "loss": 1.5013, "step": 3315 }, { "epoch": 1.0065260282288664, "grad_norm": 0.4375928044319153, "learning_rate": 7.991494532199272e-05, "loss": 1.8564, "step": 3316 }, { "epoch": 1.0068295644255578, "grad_norm": 0.44753187894821167, "learning_rate": 7.990886998784933e-05, "loss": 1.3722, "step": 3317 }, { "epoch": 1.0071331006222493, "grad_norm": 0.48083680868148804, "learning_rate": 7.990279465370596e-05, "loss": 1.4704, "step": 3318 }, { "epoch": 1.0074366368189407, "grad_norm": 0.3680810332298279, "learning_rate": 7.989671931956258e-05, "loss": 1.2053, "step": 3319 }, { "epoch": 1.0077401730156321, "grad_norm": 0.37688201665878296, "learning_rate": 7.98906439854192e-05, "loss": 1.9585, "step": 3320 }, { "epoch": 1.0080437092123236, "grad_norm": 0.4439717233181, "learning_rate": 7.988456865127583e-05, "loss": 1.6347, "step": 3321 }, { "epoch": 1.008347245409015, "grad_norm": 0.44323423504829407, "learning_rate": 7.987849331713245e-05, "loss": 1.7071, "step": 3322 }, { "epoch": 1.0086507816057064, "grad_norm": 0.44141215085983276, "learning_rate": 7.987241798298906e-05, "loss": 1.5686, "step": 3323 }, { "epoch": 1.0089543178023979, "grad_norm": 0.3377261459827423, "learning_rate": 7.98663426488457e-05, "loss": 1.0637, "step": 3324 }, { "epoch": 1.0092578539990893, "grad_norm": 0.679061770439148, "learning_rate": 7.986026731470231e-05, "loss": 1.6332, "step": 3325 }, { "epoch": 1.009561390195781, "grad_norm": 0.40934574604034424, "learning_rate": 7.985419198055893e-05, "loss": 1.591, "step": 3326 }, { "epoch": 1.0098649263924724, "grad_norm": 0.4708541929721832, "learning_rate": 7.984811664641556e-05, "loss": 1.4567, "step": 3327 }, { "epoch": 1.0101684625891638, "grad_norm": 0.4251214861869812, "learning_rate": 7.984204131227218e-05, "loss": 1.7905, "step": 3328 }, { "epoch": 1.0104719987858553, "grad_norm": 0.48691290616989136, "learning_rate": 7.98359659781288e-05, "loss": 1.8187, "step": 3329 }, { "epoch": 1.0107755349825467, "grad_norm": 0.4369681179523468, "learning_rate": 7.982989064398543e-05, "loss": 1.5784, "step": 3330 }, { "epoch": 1.0110790711792381, "grad_norm": 0.47362881898880005, "learning_rate": 7.982381530984204e-05, "loss": 1.2642, "step": 3331 }, { "epoch": 1.0113826073759296, "grad_norm": 0.4974597096443176, "learning_rate": 7.981773997569867e-05, "loss": 1.6925, "step": 3332 }, { "epoch": 1.011686143572621, "grad_norm": 0.46564406156539917, "learning_rate": 7.981166464155529e-05, "loss": 1.6209, "step": 3333 }, { "epoch": 1.0119896797693124, "grad_norm": 0.4477474093437195, "learning_rate": 7.980558930741191e-05, "loss": 1.8006, "step": 3334 }, { "epoch": 1.0122932159660039, "grad_norm": 0.4635123312473297, "learning_rate": 7.979951397326854e-05, "loss": 1.818, "step": 3335 }, { "epoch": 1.0125967521626955, "grad_norm": 0.43166083097457886, "learning_rate": 7.979343863912516e-05, "loss": 1.3599, "step": 3336 }, { "epoch": 1.012900288359387, "grad_norm": 0.39611899852752686, "learning_rate": 7.978736330498177e-05, "loss": 1.4963, "step": 3337 }, { "epoch": 1.0132038245560784, "grad_norm": 0.917677104473114, "learning_rate": 7.97812879708384e-05, "loss": 1.3816, "step": 3338 }, { "epoch": 1.0135073607527698, "grad_norm": 0.4772632420063019, "learning_rate": 7.977521263669502e-05, "loss": 1.699, "step": 3339 }, { "epoch": 1.0138108969494612, "grad_norm": 0.5998721122741699, "learning_rate": 7.976913730255164e-05, "loss": 1.5147, "step": 3340 }, { "epoch": 1.0141144331461527, "grad_norm": 0.47984611988067627, "learning_rate": 7.976306196840827e-05, "loss": 1.4805, "step": 3341 }, { "epoch": 1.0144179693428441, "grad_norm": 0.4247418940067291, "learning_rate": 7.975698663426489e-05, "loss": 1.3944, "step": 3342 }, { "epoch": 1.0147215055395356, "grad_norm": 0.5164505839347839, "learning_rate": 7.97509113001215e-05, "loss": 1.4708, "step": 3343 }, { "epoch": 1.015025041736227, "grad_norm": 0.43840450048446655, "learning_rate": 7.974483596597814e-05, "loss": 1.5382, "step": 3344 }, { "epoch": 1.0153285779329184, "grad_norm": 0.49245715141296387, "learning_rate": 7.973876063183475e-05, "loss": 1.6582, "step": 3345 }, { "epoch": 1.0156321141296099, "grad_norm": 0.6701889634132385, "learning_rate": 7.973268529769138e-05, "loss": 1.8984, "step": 3346 }, { "epoch": 1.0159356503263015, "grad_norm": 0.4831668734550476, "learning_rate": 7.972660996354799e-05, "loss": 1.4743, "step": 3347 }, { "epoch": 1.016239186522993, "grad_norm": 0.4388216733932495, "learning_rate": 7.972053462940462e-05, "loss": 1.5717, "step": 3348 }, { "epoch": 1.0165427227196844, "grad_norm": 0.3998357951641083, "learning_rate": 7.971445929526125e-05, "loss": 1.6379, "step": 3349 }, { "epoch": 1.0168462589163758, "grad_norm": 0.4894062876701355, "learning_rate": 7.970838396111785e-05, "loss": 1.3522, "step": 3350 }, { "epoch": 1.0171497951130672, "grad_norm": 0.6286391019821167, "learning_rate": 7.970230862697448e-05, "loss": 1.5174, "step": 3351 }, { "epoch": 1.0174533313097587, "grad_norm": 0.688150942325592, "learning_rate": 7.969623329283112e-05, "loss": 1.8916, "step": 3352 }, { "epoch": 1.0177568675064501, "grad_norm": 0.39143821597099304, "learning_rate": 7.969015795868773e-05, "loss": 1.7765, "step": 3353 }, { "epoch": 1.0180604037031415, "grad_norm": 0.5299899578094482, "learning_rate": 7.968408262454435e-05, "loss": 1.5828, "step": 3354 }, { "epoch": 1.018363939899833, "grad_norm": 0.43358203768730164, "learning_rate": 7.967800729040098e-05, "loss": 1.4084, "step": 3355 }, { "epoch": 1.0186674760965244, "grad_norm": 0.43103456497192383, "learning_rate": 7.96719319562576e-05, "loss": 1.5633, "step": 3356 }, { "epoch": 1.018971012293216, "grad_norm": 0.4097878634929657, "learning_rate": 7.966585662211422e-05, "loss": 1.3976, "step": 3357 }, { "epoch": 1.0192745484899075, "grad_norm": 0.47395214438438416, "learning_rate": 7.965978128797085e-05, "loss": 1.6707, "step": 3358 }, { "epoch": 1.019578084686599, "grad_norm": 0.6641651391983032, "learning_rate": 7.965370595382746e-05, "loss": 1.8728, "step": 3359 }, { "epoch": 1.0198816208832904, "grad_norm": 0.48009195923805237, "learning_rate": 7.96476306196841e-05, "loss": 1.675, "step": 3360 }, { "epoch": 1.0201851570799818, "grad_norm": 0.430106520652771, "learning_rate": 7.96415552855407e-05, "loss": 1.7039, "step": 3361 }, { "epoch": 1.0204886932766732, "grad_norm": 0.42592278122901917, "learning_rate": 7.963547995139733e-05, "loss": 1.5168, "step": 3362 }, { "epoch": 1.0207922294733647, "grad_norm": 0.5778846144676208, "learning_rate": 7.962940461725396e-05, "loss": 1.0481, "step": 3363 }, { "epoch": 1.021095765670056, "grad_norm": 0.4378105103969574, "learning_rate": 7.962332928311056e-05, "loss": 1.9381, "step": 3364 }, { "epoch": 1.0213993018667475, "grad_norm": 0.4664958715438843, "learning_rate": 7.96172539489672e-05, "loss": 1.6863, "step": 3365 }, { "epoch": 1.021702838063439, "grad_norm": 0.455496609210968, "learning_rate": 7.961117861482383e-05, "loss": 1.7554, "step": 3366 }, { "epoch": 1.0220063742601304, "grad_norm": 0.5868107676506042, "learning_rate": 7.960510328068044e-05, "loss": 1.9657, "step": 3367 }, { "epoch": 1.022309910456822, "grad_norm": 0.5736465454101562, "learning_rate": 7.959902794653706e-05, "loss": 1.2157, "step": 3368 }, { "epoch": 1.0226134466535135, "grad_norm": 0.3856525421142578, "learning_rate": 7.959295261239369e-05, "loss": 1.7684, "step": 3369 }, { "epoch": 1.022916982850205, "grad_norm": 0.5012997388839722, "learning_rate": 7.958687727825031e-05, "loss": 1.5705, "step": 3370 }, { "epoch": 1.0232205190468964, "grad_norm": 0.4648292362689972, "learning_rate": 7.958080194410693e-05, "loss": 1.9879, "step": 3371 }, { "epoch": 1.0235240552435878, "grad_norm": 0.39332127571105957, "learning_rate": 7.957472660996356e-05, "loss": 1.741, "step": 3372 }, { "epoch": 1.0238275914402792, "grad_norm": 0.4548643231391907, "learning_rate": 7.956865127582017e-05, "loss": 1.6544, "step": 3373 }, { "epoch": 1.0241311276369707, "grad_norm": 0.36641523241996765, "learning_rate": 7.956257594167679e-05, "loss": 1.2098, "step": 3374 }, { "epoch": 1.024434663833662, "grad_norm": 0.46462637186050415, "learning_rate": 7.955650060753341e-05, "loss": 1.3876, "step": 3375 }, { "epoch": 1.0247382000303535, "grad_norm": 0.9742159247398376, "learning_rate": 7.955042527339004e-05, "loss": 1.8654, "step": 3376 }, { "epoch": 1.025041736227045, "grad_norm": 0.5226752758026123, "learning_rate": 7.954434993924667e-05, "loss": 1.1396, "step": 3377 }, { "epoch": 1.0253452724237366, "grad_norm": 0.43976494669914246, "learning_rate": 7.953827460510327e-05, "loss": 1.7537, "step": 3378 }, { "epoch": 1.025648808620428, "grad_norm": 0.4897270202636719, "learning_rate": 7.95321992709599e-05, "loss": 1.382, "step": 3379 }, { "epoch": 1.0259523448171195, "grad_norm": 0.42977437376976013, "learning_rate": 7.952612393681654e-05, "loss": 1.69, "step": 3380 }, { "epoch": 1.026255881013811, "grad_norm": 0.4650570750236511, "learning_rate": 7.952004860267315e-05, "loss": 1.5666, "step": 3381 }, { "epoch": 1.0265594172105024, "grad_norm": 0.5345761179924011, "learning_rate": 7.951397326852977e-05, "loss": 1.5281, "step": 3382 }, { "epoch": 1.0268629534071938, "grad_norm": 0.43827125430107117, "learning_rate": 7.95078979343864e-05, "loss": 1.8579, "step": 3383 }, { "epoch": 1.0271664896038852, "grad_norm": 0.4599241614341736, "learning_rate": 7.950182260024302e-05, "loss": 1.5928, "step": 3384 }, { "epoch": 1.0274700258005767, "grad_norm": 1.1530771255493164, "learning_rate": 7.949574726609964e-05, "loss": 1.5092, "step": 3385 }, { "epoch": 1.027773561997268, "grad_norm": 0.48699623346328735, "learning_rate": 7.948967193195627e-05, "loss": 1.7638, "step": 3386 }, { "epoch": 1.0280770981939595, "grad_norm": 0.5288783311843872, "learning_rate": 7.948359659781288e-05, "loss": 1.7314, "step": 3387 }, { "epoch": 1.0283806343906512, "grad_norm": 0.4574908912181854, "learning_rate": 7.94775212636695e-05, "loss": 1.9344, "step": 3388 }, { "epoch": 1.0286841705873426, "grad_norm": 0.47413721680641174, "learning_rate": 7.947144592952612e-05, "loss": 1.3177, "step": 3389 }, { "epoch": 1.028987706784034, "grad_norm": 0.46366703510284424, "learning_rate": 7.946537059538275e-05, "loss": 1.8695, "step": 3390 }, { "epoch": 1.0292912429807255, "grad_norm": 0.4010477662086487, "learning_rate": 7.945929526123938e-05, "loss": 1.623, "step": 3391 }, { "epoch": 1.029594779177417, "grad_norm": 0.501057505607605, "learning_rate": 7.945321992709598e-05, "loss": 1.6748, "step": 3392 }, { "epoch": 1.0298983153741084, "grad_norm": 0.4147251546382904, "learning_rate": 7.944714459295262e-05, "loss": 1.76, "step": 3393 }, { "epoch": 1.0302018515707998, "grad_norm": 0.5023919939994812, "learning_rate": 7.944106925880925e-05, "loss": 1.7666, "step": 3394 }, { "epoch": 1.0305053877674912, "grad_norm": 0.4336966574192047, "learning_rate": 7.943499392466586e-05, "loss": 1.8498, "step": 3395 }, { "epoch": 1.0308089239641827, "grad_norm": 0.50406813621521, "learning_rate": 7.942891859052248e-05, "loss": 1.0032, "step": 3396 }, { "epoch": 1.031112460160874, "grad_norm": 0.5218415856361389, "learning_rate": 7.942284325637911e-05, "loss": 1.7252, "step": 3397 }, { "epoch": 1.0314159963575655, "grad_norm": 0.5142799019813538, "learning_rate": 7.941676792223573e-05, "loss": 1.9791, "step": 3398 }, { "epoch": 1.0317195325542572, "grad_norm": 0.5369110107421875, "learning_rate": 7.941069258809235e-05, "loss": 1.4356, "step": 3399 }, { "epoch": 1.0320230687509486, "grad_norm": 0.4954996109008789, "learning_rate": 7.940461725394898e-05, "loss": 1.665, "step": 3400 }, { "epoch": 1.03232660494764, "grad_norm": 0.5331052541732788, "learning_rate": 7.93985419198056e-05, "loss": 1.5129, "step": 3401 }, { "epoch": 1.0326301411443315, "grad_norm": 0.4011031985282898, "learning_rate": 7.939246658566221e-05, "loss": 1.5203, "step": 3402 }, { "epoch": 1.032933677341023, "grad_norm": 0.8139665722846985, "learning_rate": 7.938639125151883e-05, "loss": 1.5036, "step": 3403 }, { "epoch": 1.0332372135377144, "grad_norm": 0.4838857650756836, "learning_rate": 7.938031591737546e-05, "loss": 1.7915, "step": 3404 }, { "epoch": 1.0335407497344058, "grad_norm": 0.5446197390556335, "learning_rate": 7.937424058323209e-05, "loss": 1.7374, "step": 3405 }, { "epoch": 1.0338442859310972, "grad_norm": 0.7249342799186707, "learning_rate": 7.93681652490887e-05, "loss": 1.5549, "step": 3406 }, { "epoch": 1.0341478221277887, "grad_norm": 0.4857841730117798, "learning_rate": 7.936208991494533e-05, "loss": 1.6947, "step": 3407 }, { "epoch": 1.03445135832448, "grad_norm": 0.4289863705635071, "learning_rate": 7.935601458080196e-05, "loss": 1.8783, "step": 3408 }, { "epoch": 1.0347548945211718, "grad_norm": 0.49779224395751953, "learning_rate": 7.934993924665857e-05, "loss": 1.7971, "step": 3409 }, { "epoch": 1.0350584307178632, "grad_norm": 0.5169624090194702, "learning_rate": 7.934386391251519e-05, "loss": 1.4507, "step": 3410 }, { "epoch": 1.0353619669145546, "grad_norm": 0.4716205894947052, "learning_rate": 7.933778857837182e-05, "loss": 1.8483, "step": 3411 }, { "epoch": 1.035665503111246, "grad_norm": 0.5545279383659363, "learning_rate": 7.933171324422844e-05, "loss": 1.5672, "step": 3412 }, { "epoch": 1.0359690393079375, "grad_norm": 0.4328896105289459, "learning_rate": 7.932563791008506e-05, "loss": 1.1695, "step": 3413 }, { "epoch": 1.036272575504629, "grad_norm": 0.4805368185043335, "learning_rate": 7.931956257594169e-05, "loss": 1.7504, "step": 3414 }, { "epoch": 1.0365761117013204, "grad_norm": 0.5162798166275024, "learning_rate": 7.93134872417983e-05, "loss": 1.4748, "step": 3415 }, { "epoch": 1.0368796478980118, "grad_norm": 0.5200609564781189, "learning_rate": 7.930741190765492e-05, "loss": 2.0036, "step": 3416 }, { "epoch": 1.0371831840947032, "grad_norm": 0.4653424620628357, "learning_rate": 7.930133657351154e-05, "loss": 1.9499, "step": 3417 }, { "epoch": 1.0374867202913947, "grad_norm": 0.4723150432109833, "learning_rate": 7.929526123936817e-05, "loss": 1.4042, "step": 3418 }, { "epoch": 1.037790256488086, "grad_norm": 0.5302563905715942, "learning_rate": 7.92891859052248e-05, "loss": 1.4871, "step": 3419 }, { "epoch": 1.0380937926847777, "grad_norm": 0.47659730911254883, "learning_rate": 7.92831105710814e-05, "loss": 1.4755, "step": 3420 }, { "epoch": 1.0383973288814692, "grad_norm": 0.5367438197135925, "learning_rate": 7.927703523693804e-05, "loss": 1.8863, "step": 3421 }, { "epoch": 1.0387008650781606, "grad_norm": 0.5086414217948914, "learning_rate": 7.927095990279467e-05, "loss": 1.5784, "step": 3422 }, { "epoch": 1.039004401274852, "grad_norm": 0.4822576642036438, "learning_rate": 7.926488456865127e-05, "loss": 1.8435, "step": 3423 }, { "epoch": 1.0393079374715435, "grad_norm": 0.5086636543273926, "learning_rate": 7.92588092345079e-05, "loss": 1.732, "step": 3424 }, { "epoch": 1.039611473668235, "grad_norm": 0.49060937762260437, "learning_rate": 7.925273390036453e-05, "loss": 1.7369, "step": 3425 }, { "epoch": 1.0399150098649264, "grad_norm": 0.4944159984588623, "learning_rate": 7.924665856622115e-05, "loss": 1.2844, "step": 3426 }, { "epoch": 1.0402185460616178, "grad_norm": 0.4141417443752289, "learning_rate": 7.924058323207777e-05, "loss": 1.3346, "step": 3427 }, { "epoch": 1.0405220822583092, "grad_norm": 0.4598718285560608, "learning_rate": 7.923450789793438e-05, "loss": 1.5991, "step": 3428 }, { "epoch": 1.0408256184550007, "grad_norm": 0.5402548313140869, "learning_rate": 7.922843256379101e-05, "loss": 1.78, "step": 3429 }, { "epoch": 1.0411291546516923, "grad_norm": 0.4793176054954529, "learning_rate": 7.922235722964763e-05, "loss": 1.5652, "step": 3430 }, { "epoch": 1.0414326908483837, "grad_norm": 0.4527183175086975, "learning_rate": 7.921628189550425e-05, "loss": 1.82, "step": 3431 }, { "epoch": 1.0417362270450752, "grad_norm": 0.4884622395038605, "learning_rate": 7.921020656136088e-05, "loss": 1.6954, "step": 3432 }, { "epoch": 1.0420397632417666, "grad_norm": 0.46866077184677124, "learning_rate": 7.920413122721751e-05, "loss": 1.712, "step": 3433 }, { "epoch": 1.042343299438458, "grad_norm": 0.40482431650161743, "learning_rate": 7.919805589307411e-05, "loss": 1.8968, "step": 3434 }, { "epoch": 1.0426468356351495, "grad_norm": 0.5239852666854858, "learning_rate": 7.919198055893075e-05, "loss": 1.7953, "step": 3435 }, { "epoch": 1.042950371831841, "grad_norm": 0.484953373670578, "learning_rate": 7.918590522478738e-05, "loss": 1.8354, "step": 3436 }, { "epoch": 1.0432539080285324, "grad_norm": 0.47952115535736084, "learning_rate": 7.917982989064398e-05, "loss": 1.2171, "step": 3437 }, { "epoch": 1.0435574442252238, "grad_norm": 0.47822096943855286, "learning_rate": 7.917375455650061e-05, "loss": 1.7159, "step": 3438 }, { "epoch": 1.0438609804219152, "grad_norm": 0.44206422567367554, "learning_rate": 7.916767922235724e-05, "loss": 1.8257, "step": 3439 }, { "epoch": 1.0441645166186069, "grad_norm": 0.5453143119812012, "learning_rate": 7.916160388821386e-05, "loss": 1.7791, "step": 3440 }, { "epoch": 1.0444680528152983, "grad_norm": 0.4759043753147125, "learning_rate": 7.915552855407048e-05, "loss": 1.692, "step": 3441 }, { "epoch": 1.0447715890119897, "grad_norm": 0.484531044960022, "learning_rate": 7.91494532199271e-05, "loss": 1.806, "step": 3442 }, { "epoch": 1.0450751252086812, "grad_norm": 0.5373866558074951, "learning_rate": 7.914337788578372e-05, "loss": 1.5913, "step": 3443 }, { "epoch": 1.0453786614053726, "grad_norm": 0.5190970301628113, "learning_rate": 7.913730255164034e-05, "loss": 1.5233, "step": 3444 }, { "epoch": 1.045682197602064, "grad_norm": 0.500152051448822, "learning_rate": 7.913122721749696e-05, "loss": 1.7589, "step": 3445 }, { "epoch": 1.0459857337987555, "grad_norm": 0.3860965073108673, "learning_rate": 7.912515188335359e-05, "loss": 1.2231, "step": 3446 }, { "epoch": 1.046289269995447, "grad_norm": 0.44290807843208313, "learning_rate": 7.911907654921021e-05, "loss": 1.5446, "step": 3447 }, { "epoch": 1.0465928061921383, "grad_norm": 0.46361368894577026, "learning_rate": 7.911300121506682e-05, "loss": 1.4829, "step": 3448 }, { "epoch": 1.0468963423888298, "grad_norm": 0.40358835458755493, "learning_rate": 7.910692588092346e-05, "loss": 1.6495, "step": 3449 }, { "epoch": 1.0471998785855212, "grad_norm": 0.5722264051437378, "learning_rate": 7.910085054678009e-05, "loss": 1.7335, "step": 3450 }, { "epoch": 1.0475034147822129, "grad_norm": 0.49722689390182495, "learning_rate": 7.909477521263669e-05, "loss": 1.6493, "step": 3451 }, { "epoch": 1.0478069509789043, "grad_norm": 0.5183900594711304, "learning_rate": 7.908869987849332e-05, "loss": 1.7254, "step": 3452 }, { "epoch": 1.0481104871755957, "grad_norm": 0.5188613533973694, "learning_rate": 7.908262454434995e-05, "loss": 2.0111, "step": 3453 }, { "epoch": 1.0484140233722872, "grad_norm": 0.5030909180641174, "learning_rate": 7.907654921020657e-05, "loss": 1.395, "step": 3454 }, { "epoch": 1.0487175595689786, "grad_norm": 0.4069419205188751, "learning_rate": 7.907047387606319e-05, "loss": 1.9025, "step": 3455 }, { "epoch": 1.04902109576567, "grad_norm": 0.5355219841003418, "learning_rate": 7.90643985419198e-05, "loss": 1.732, "step": 3456 }, { "epoch": 1.0493246319623615, "grad_norm": 0.43117785453796387, "learning_rate": 7.905832320777643e-05, "loss": 1.4534, "step": 3457 }, { "epoch": 1.049628168159053, "grad_norm": 0.4561751186847687, "learning_rate": 7.905224787363305e-05, "loss": 1.5682, "step": 3458 }, { "epoch": 1.0499317043557443, "grad_norm": 0.4510141611099243, "learning_rate": 7.904617253948967e-05, "loss": 1.6789, "step": 3459 }, { "epoch": 1.0502352405524358, "grad_norm": 0.5011105537414551, "learning_rate": 7.90400972053463e-05, "loss": 1.4711, "step": 3460 }, { "epoch": 1.0505387767491274, "grad_norm": 0.5226435661315918, "learning_rate": 7.903402187120292e-05, "loss": 1.4425, "step": 3461 }, { "epoch": 1.0508423129458189, "grad_norm": 0.46023955941200256, "learning_rate": 7.902794653705953e-05, "loss": 1.0283, "step": 3462 }, { "epoch": 1.0511458491425103, "grad_norm": 0.5048952698707581, "learning_rate": 7.902187120291617e-05, "loss": 1.6029, "step": 3463 }, { "epoch": 1.0514493853392017, "grad_norm": 0.6409230828285217, "learning_rate": 7.90157958687728e-05, "loss": 1.1697, "step": 3464 }, { "epoch": 1.0517529215358932, "grad_norm": 0.5188806653022766, "learning_rate": 7.90097205346294e-05, "loss": 1.8136, "step": 3465 }, { "epoch": 1.0520564577325846, "grad_norm": 0.5835402011871338, "learning_rate": 7.900364520048603e-05, "loss": 1.511, "step": 3466 }, { "epoch": 1.052359993929276, "grad_norm": 0.6449020504951477, "learning_rate": 7.899756986634266e-05, "loss": 1.98, "step": 3467 }, { "epoch": 1.0526635301259675, "grad_norm": 0.47613629698753357, "learning_rate": 7.899149453219928e-05, "loss": 1.6631, "step": 3468 }, { "epoch": 1.052967066322659, "grad_norm": 0.4372462332248688, "learning_rate": 7.89854191980559e-05, "loss": 1.7773, "step": 3469 }, { "epoch": 1.0532706025193503, "grad_norm": 0.48001718521118164, "learning_rate": 7.897934386391251e-05, "loss": 1.703, "step": 3470 }, { "epoch": 1.053574138716042, "grad_norm": 0.5756060481071472, "learning_rate": 7.897326852976914e-05, "loss": 1.4202, "step": 3471 }, { "epoch": 1.0538776749127334, "grad_norm": 0.48645758628845215, "learning_rate": 7.896719319562576e-05, "loss": 1.7291, "step": 3472 }, { "epoch": 1.0541812111094249, "grad_norm": 0.4413807988166809, "learning_rate": 7.896111786148238e-05, "loss": 1.505, "step": 3473 }, { "epoch": 1.0544847473061163, "grad_norm": 0.43039625883102417, "learning_rate": 7.895504252733901e-05, "loss": 1.5668, "step": 3474 }, { "epoch": 1.0547882835028077, "grad_norm": 0.5196880102157593, "learning_rate": 7.894896719319563e-05, "loss": 1.9353, "step": 3475 }, { "epoch": 1.0550918196994992, "grad_norm": 0.6965150833129883, "learning_rate": 7.894289185905224e-05, "loss": 1.1513, "step": 3476 }, { "epoch": 1.0553953558961906, "grad_norm": 0.4723784625530243, "learning_rate": 7.893681652490888e-05, "loss": 1.5391, "step": 3477 }, { "epoch": 1.055698892092882, "grad_norm": 0.47085341811180115, "learning_rate": 7.89307411907655e-05, "loss": 1.452, "step": 3478 }, { "epoch": 1.0560024282895735, "grad_norm": 0.515957772731781, "learning_rate": 7.892466585662211e-05, "loss": 1.7636, "step": 3479 }, { "epoch": 1.056305964486265, "grad_norm": 0.6064741611480713, "learning_rate": 7.891859052247874e-05, "loss": 1.7575, "step": 3480 }, { "epoch": 1.0566095006829563, "grad_norm": 0.567486047744751, "learning_rate": 7.891251518833537e-05, "loss": 1.0875, "step": 3481 }, { "epoch": 1.056913036879648, "grad_norm": 0.4897995591163635, "learning_rate": 7.890643985419199e-05, "loss": 1.5105, "step": 3482 }, { "epoch": 1.0572165730763394, "grad_norm": 0.47024548053741455, "learning_rate": 7.89003645200486e-05, "loss": 1.77, "step": 3483 }, { "epoch": 1.0575201092730309, "grad_norm": 0.7996636033058167, "learning_rate": 7.889428918590522e-05, "loss": 1.3641, "step": 3484 }, { "epoch": 1.0578236454697223, "grad_norm": 0.813572347164154, "learning_rate": 7.888821385176185e-05, "loss": 1.2667, "step": 3485 }, { "epoch": 1.0581271816664137, "grad_norm": 0.38006821274757385, "learning_rate": 7.888213851761847e-05, "loss": 2.011, "step": 3486 }, { "epoch": 1.0584307178631052, "grad_norm": 0.5023001432418823, "learning_rate": 7.887606318347509e-05, "loss": 1.4452, "step": 3487 }, { "epoch": 1.0587342540597966, "grad_norm": 0.6475557088851929, "learning_rate": 7.886998784933172e-05, "loss": 1.1677, "step": 3488 }, { "epoch": 1.059037790256488, "grad_norm": 0.5049715042114258, "learning_rate": 7.886391251518834e-05, "loss": 1.0539, "step": 3489 }, { "epoch": 1.0593413264531795, "grad_norm": 0.5918720960617065, "learning_rate": 7.885783718104495e-05, "loss": 1.1472, "step": 3490 }, { "epoch": 1.059644862649871, "grad_norm": 0.4469449520111084, "learning_rate": 7.885176184690159e-05, "loss": 1.6658, "step": 3491 }, { "epoch": 1.0599483988465626, "grad_norm": 0.4816749393939972, "learning_rate": 7.884568651275822e-05, "loss": 1.9424, "step": 3492 }, { "epoch": 1.060251935043254, "grad_norm": 0.44042688608169556, "learning_rate": 7.883961117861482e-05, "loss": 1.559, "step": 3493 }, { "epoch": 1.0605554712399454, "grad_norm": 0.4422488212585449, "learning_rate": 7.883353584447145e-05, "loss": 0.8213, "step": 3494 }, { "epoch": 1.0608590074366369, "grad_norm": 0.44115856289863586, "learning_rate": 7.882746051032808e-05, "loss": 1.3014, "step": 3495 }, { "epoch": 1.0611625436333283, "grad_norm": 0.5100114345550537, "learning_rate": 7.882138517618469e-05, "loss": 1.7285, "step": 3496 }, { "epoch": 1.0614660798300197, "grad_norm": 0.4293980002403259, "learning_rate": 7.881530984204132e-05, "loss": 1.6291, "step": 3497 }, { "epoch": 1.0617696160267112, "grad_norm": 0.4303349256515503, "learning_rate": 7.880923450789793e-05, "loss": 1.677, "step": 3498 }, { "epoch": 1.0620731522234026, "grad_norm": 1.325836420059204, "learning_rate": 7.880315917375456e-05, "loss": 1.4604, "step": 3499 }, { "epoch": 1.062376688420094, "grad_norm": 0.5173283815383911, "learning_rate": 7.879708383961118e-05, "loss": 1.8935, "step": 3500 }, { "epoch": 1.0626802246167855, "grad_norm": 0.5339661240577698, "learning_rate": 7.87910085054678e-05, "loss": 1.4497, "step": 3501 }, { "epoch": 1.0629837608134771, "grad_norm": 0.5330355763435364, "learning_rate": 7.878493317132443e-05, "loss": 1.7404, "step": 3502 }, { "epoch": 1.0632872970101686, "grad_norm": 0.5396672487258911, "learning_rate": 7.877885783718105e-05, "loss": 1.3268, "step": 3503 }, { "epoch": 1.06359083320686, "grad_norm": 0.44838812947273254, "learning_rate": 7.877278250303766e-05, "loss": 1.6525, "step": 3504 }, { "epoch": 1.0638943694035514, "grad_norm": 0.50009685754776, "learning_rate": 7.87667071688943e-05, "loss": 1.6385, "step": 3505 }, { "epoch": 1.0641979056002429, "grad_norm": 0.6882514953613281, "learning_rate": 7.876063183475093e-05, "loss": 1.3527, "step": 3506 }, { "epoch": 1.0645014417969343, "grad_norm": 0.5103173851966858, "learning_rate": 7.875455650060753e-05, "loss": 1.3589, "step": 3507 }, { "epoch": 1.0648049779936257, "grad_norm": 0.5229162573814392, "learning_rate": 7.874848116646416e-05, "loss": 1.5474, "step": 3508 }, { "epoch": 1.0651085141903172, "grad_norm": 0.5207902193069458, "learning_rate": 7.874240583232079e-05, "loss": 1.807, "step": 3509 }, { "epoch": 1.0654120503870086, "grad_norm": 0.46750408411026, "learning_rate": 7.87363304981774e-05, "loss": 1.4907, "step": 3510 }, { "epoch": 1.0657155865837, "grad_norm": 1.095977544784546, "learning_rate": 7.873025516403403e-05, "loss": 1.6401, "step": 3511 }, { "epoch": 1.0660191227803915, "grad_norm": 0.4337494373321533, "learning_rate": 7.872417982989064e-05, "loss": 1.8245, "step": 3512 }, { "epoch": 1.0663226589770831, "grad_norm": 0.3852023184299469, "learning_rate": 7.871810449574727e-05, "loss": 1.506, "step": 3513 }, { "epoch": 1.0666261951737745, "grad_norm": 0.45099326968193054, "learning_rate": 7.871202916160389e-05, "loss": 1.8121, "step": 3514 }, { "epoch": 1.066929731370466, "grad_norm": 0.6037120223045349, "learning_rate": 7.870595382746051e-05, "loss": 1.7321, "step": 3515 }, { "epoch": 1.0672332675671574, "grad_norm": 0.4074588716030121, "learning_rate": 7.869987849331714e-05, "loss": 1.6336, "step": 3516 }, { "epoch": 1.0675368037638489, "grad_norm": 0.40023350715637207, "learning_rate": 7.869380315917376e-05, "loss": 0.9946, "step": 3517 }, { "epoch": 1.0678403399605403, "grad_norm": 0.8330900073051453, "learning_rate": 7.868772782503037e-05, "loss": 1.4609, "step": 3518 }, { "epoch": 1.0681438761572317, "grad_norm": 0.5196427702903748, "learning_rate": 7.8681652490887e-05, "loss": 1.7803, "step": 3519 }, { "epoch": 1.0684474123539232, "grad_norm": 0.575749933719635, "learning_rate": 7.867557715674362e-05, "loss": 1.6784, "step": 3520 }, { "epoch": 1.0687509485506146, "grad_norm": 0.5069593787193298, "learning_rate": 7.866950182260024e-05, "loss": 1.9097, "step": 3521 }, { "epoch": 1.069054484747306, "grad_norm": 0.5487728714942932, "learning_rate": 7.866342648845687e-05, "loss": 1.8385, "step": 3522 }, { "epoch": 1.0693580209439975, "grad_norm": 0.5695396065711975, "learning_rate": 7.865735115431349e-05, "loss": 1.8143, "step": 3523 }, { "epoch": 1.0696615571406891, "grad_norm": 0.47702378034591675, "learning_rate": 7.86512758201701e-05, "loss": 1.2498, "step": 3524 }, { "epoch": 1.0699650933373805, "grad_norm": 0.4768955707550049, "learning_rate": 7.864520048602674e-05, "loss": 1.7542, "step": 3525 }, { "epoch": 1.070268629534072, "grad_norm": 0.4759134352207184, "learning_rate": 7.863912515188335e-05, "loss": 0.9774, "step": 3526 }, { "epoch": 1.0705721657307634, "grad_norm": 0.6089837551116943, "learning_rate": 7.863304981773998e-05, "loss": 0.7761, "step": 3527 }, { "epoch": 1.0708757019274548, "grad_norm": 0.5931398272514343, "learning_rate": 7.86269744835966e-05, "loss": 1.244, "step": 3528 }, { "epoch": 1.0711792381241463, "grad_norm": 0.4667022228240967, "learning_rate": 7.862089914945322e-05, "loss": 1.3669, "step": 3529 }, { "epoch": 1.0714827743208377, "grad_norm": 0.4586002230644226, "learning_rate": 7.861482381530985e-05, "loss": 1.6041, "step": 3530 }, { "epoch": 1.0717863105175292, "grad_norm": 0.5036244988441467, "learning_rate": 7.860874848116647e-05, "loss": 1.3989, "step": 3531 }, { "epoch": 1.0720898467142206, "grad_norm": 0.40763425827026367, "learning_rate": 7.860267314702308e-05, "loss": 1.1835, "step": 3532 }, { "epoch": 1.0723933829109122, "grad_norm": 0.44515642523765564, "learning_rate": 7.859659781287972e-05, "loss": 1.4935, "step": 3533 }, { "epoch": 1.0726969191076037, "grad_norm": 0.5427178740501404, "learning_rate": 7.859052247873633e-05, "loss": 1.9076, "step": 3534 }, { "epoch": 1.073000455304295, "grad_norm": 0.4585944712162018, "learning_rate": 7.858444714459295e-05, "loss": 1.7363, "step": 3535 }, { "epoch": 1.0733039915009865, "grad_norm": 0.46946725249290466, "learning_rate": 7.857837181044958e-05, "loss": 1.6894, "step": 3536 }, { "epoch": 1.073607527697678, "grad_norm": 0.5090848803520203, "learning_rate": 7.85722964763062e-05, "loss": 1.8616, "step": 3537 }, { "epoch": 1.0739110638943694, "grad_norm": 0.5192902684211731, "learning_rate": 7.856622114216282e-05, "loss": 1.5899, "step": 3538 }, { "epoch": 1.0742146000910608, "grad_norm": 0.4348808228969574, "learning_rate": 7.856014580801945e-05, "loss": 1.7192, "step": 3539 }, { "epoch": 1.0745181362877523, "grad_norm": 0.5693963170051575, "learning_rate": 7.855407047387606e-05, "loss": 1.3645, "step": 3540 }, { "epoch": 1.0748216724844437, "grad_norm": 0.4064824879169464, "learning_rate": 7.85479951397327e-05, "loss": 1.5659, "step": 3541 }, { "epoch": 1.0751252086811351, "grad_norm": 0.4797777235507965, "learning_rate": 7.854191980558931e-05, "loss": 1.8501, "step": 3542 }, { "epoch": 1.0754287448778266, "grad_norm": 0.5156259536743164, "learning_rate": 7.853584447144593e-05, "loss": 1.5016, "step": 3543 }, { "epoch": 1.0757322810745182, "grad_norm": 0.9919567108154297, "learning_rate": 7.852976913730256e-05, "loss": 1.3734, "step": 3544 }, { "epoch": 1.0760358172712097, "grad_norm": 0.5478760600090027, "learning_rate": 7.852369380315918e-05, "loss": 1.8446, "step": 3545 }, { "epoch": 1.076339353467901, "grad_norm": 0.6765535473823547, "learning_rate": 7.85176184690158e-05, "loss": 1.5215, "step": 3546 }, { "epoch": 1.0766428896645925, "grad_norm": 0.4867497384548187, "learning_rate": 7.851154313487243e-05, "loss": 1.3427, "step": 3547 }, { "epoch": 1.076946425861284, "grad_norm": 0.4023679792881012, "learning_rate": 7.850546780072904e-05, "loss": 1.7548, "step": 3548 }, { "epoch": 1.0772499620579754, "grad_norm": 0.42770206928253174, "learning_rate": 7.849939246658566e-05, "loss": 1.4375, "step": 3549 }, { "epoch": 1.0775534982546668, "grad_norm": 0.5628126859664917, "learning_rate": 7.849331713244229e-05, "loss": 1.6336, "step": 3550 }, { "epoch": 1.0778570344513583, "grad_norm": 0.5270586013793945, "learning_rate": 7.848724179829891e-05, "loss": 1.8782, "step": 3551 }, { "epoch": 1.0781605706480497, "grad_norm": 0.7205768823623657, "learning_rate": 7.848116646415553e-05, "loss": 1.7201, "step": 3552 }, { "epoch": 1.0784641068447411, "grad_norm": 0.5051723122596741, "learning_rate": 7.847509113001216e-05, "loss": 1.6346, "step": 3553 }, { "epoch": 1.0787676430414326, "grad_norm": 0.41711702942848206, "learning_rate": 7.846901579586877e-05, "loss": 1.765, "step": 3554 }, { "epoch": 1.0790711792381242, "grad_norm": 0.4348052442073822, "learning_rate": 7.84629404617254e-05, "loss": 1.6667, "step": 3555 }, { "epoch": 1.0793747154348157, "grad_norm": 0.5323374271392822, "learning_rate": 7.845686512758202e-05, "loss": 1.8005, "step": 3556 }, { "epoch": 1.079678251631507, "grad_norm": 0.47705498337745667, "learning_rate": 7.845078979343864e-05, "loss": 1.8168, "step": 3557 }, { "epoch": 1.0799817878281985, "grad_norm": 0.535015344619751, "learning_rate": 7.844471445929527e-05, "loss": 1.7441, "step": 3558 }, { "epoch": 1.08028532402489, "grad_norm": 0.4847927391529083, "learning_rate": 7.843863912515189e-05, "loss": 1.5963, "step": 3559 }, { "epoch": 1.0805888602215814, "grad_norm": 0.5845076441764832, "learning_rate": 7.84325637910085e-05, "loss": 1.116, "step": 3560 }, { "epoch": 1.0808923964182728, "grad_norm": 0.5248334407806396, "learning_rate": 7.842648845686514e-05, "loss": 2.0422, "step": 3561 }, { "epoch": 1.0811959326149643, "grad_norm": 0.5417022705078125, "learning_rate": 7.842041312272175e-05, "loss": 1.5701, "step": 3562 }, { "epoch": 1.0814994688116557, "grad_norm": 0.4764825701713562, "learning_rate": 7.841433778857837e-05, "loss": 1.4946, "step": 3563 }, { "epoch": 1.0818030050083474, "grad_norm": 0.4735731780529022, "learning_rate": 7.8408262454435e-05, "loss": 2.0424, "step": 3564 }, { "epoch": 1.0821065412050388, "grad_norm": 0.4083727300167084, "learning_rate": 7.840218712029162e-05, "loss": 1.2251, "step": 3565 }, { "epoch": 1.0824100774017302, "grad_norm": 0.5175759792327881, "learning_rate": 7.839611178614824e-05, "loss": 1.3758, "step": 3566 }, { "epoch": 1.0827136135984217, "grad_norm": 0.4588059186935425, "learning_rate": 7.839003645200487e-05, "loss": 1.4034, "step": 3567 }, { "epoch": 1.083017149795113, "grad_norm": 0.5879805088043213, "learning_rate": 7.838396111786148e-05, "loss": 1.6349, "step": 3568 }, { "epoch": 1.0833206859918045, "grad_norm": 0.48351842164993286, "learning_rate": 7.83778857837181e-05, "loss": 1.8329, "step": 3569 }, { "epoch": 1.083624222188496, "grad_norm": 0.5158828496932983, "learning_rate": 7.837181044957473e-05, "loss": 1.6281, "step": 3570 }, { "epoch": 1.0839277583851874, "grad_norm": 0.7203484177589417, "learning_rate": 7.836573511543135e-05, "loss": 0.9363, "step": 3571 }, { "epoch": 1.0842312945818788, "grad_norm": 0.5488767623901367, "learning_rate": 7.835965978128798e-05, "loss": 1.7487, "step": 3572 }, { "epoch": 1.0845348307785703, "grad_norm": 0.5658820867538452, "learning_rate": 7.83535844471446e-05, "loss": 1.8557, "step": 3573 }, { "epoch": 1.0848383669752617, "grad_norm": 0.5227528214454651, "learning_rate": 7.834750911300121e-05, "loss": 1.4973, "step": 3574 }, { "epoch": 1.0851419031719534, "grad_norm": 0.5170645713806152, "learning_rate": 7.834143377885785e-05, "loss": 1.7767, "step": 3575 }, { "epoch": 1.0854454393686448, "grad_norm": 0.48934417963027954, "learning_rate": 7.833535844471446e-05, "loss": 1.7613, "step": 3576 }, { "epoch": 1.0857489755653362, "grad_norm": 0.4472818374633789, "learning_rate": 7.832928311057108e-05, "loss": 1.6749, "step": 3577 }, { "epoch": 1.0860525117620277, "grad_norm": 0.4309948682785034, "learning_rate": 7.832320777642771e-05, "loss": 1.8213, "step": 3578 }, { "epoch": 1.086356047958719, "grad_norm": 0.5259717702865601, "learning_rate": 7.831713244228433e-05, "loss": 1.5028, "step": 3579 }, { "epoch": 1.0866595841554105, "grad_norm": 0.4831100106239319, "learning_rate": 7.831105710814095e-05, "loss": 1.5417, "step": 3580 }, { "epoch": 1.086963120352102, "grad_norm": 0.5474818348884583, "learning_rate": 7.830498177399758e-05, "loss": 1.3114, "step": 3581 }, { "epoch": 1.0872666565487934, "grad_norm": 0.6029711961746216, "learning_rate": 7.82989064398542e-05, "loss": 1.6251, "step": 3582 }, { "epoch": 1.0875701927454848, "grad_norm": 0.6199969053268433, "learning_rate": 7.829283110571081e-05, "loss": 1.7712, "step": 3583 }, { "epoch": 1.0878737289421763, "grad_norm": 0.571630597114563, "learning_rate": 7.828675577156744e-05, "loss": 1.8169, "step": 3584 }, { "epoch": 1.0881772651388677, "grad_norm": 0.476755827665329, "learning_rate": 7.828068043742406e-05, "loss": 1.4152, "step": 3585 }, { "epoch": 1.0884808013355594, "grad_norm": 0.5712706446647644, "learning_rate": 7.827460510328069e-05, "loss": 1.624, "step": 3586 }, { "epoch": 1.0887843375322508, "grad_norm": 0.4737652540206909, "learning_rate": 7.826852976913731e-05, "loss": 1.812, "step": 3587 }, { "epoch": 1.0890878737289422, "grad_norm": 0.7318893671035767, "learning_rate": 7.826245443499392e-05, "loss": 1.7989, "step": 3588 }, { "epoch": 1.0893914099256337, "grad_norm": 0.5808560848236084, "learning_rate": 7.825637910085056e-05, "loss": 1.7167, "step": 3589 }, { "epoch": 1.089694946122325, "grad_norm": 0.49355220794677734, "learning_rate": 7.825030376670717e-05, "loss": 1.5009, "step": 3590 }, { "epoch": 1.0899984823190165, "grad_norm": 0.4661107361316681, "learning_rate": 7.824422843256379e-05, "loss": 1.2807, "step": 3591 }, { "epoch": 1.090302018515708, "grad_norm": 0.3863863945007324, "learning_rate": 7.823815309842042e-05, "loss": 1.6173, "step": 3592 }, { "epoch": 1.0906055547123994, "grad_norm": 0.48039016127586365, "learning_rate": 7.823207776427704e-05, "loss": 1.7246, "step": 3593 }, { "epoch": 1.0909090909090908, "grad_norm": 0.4487806558609009, "learning_rate": 7.822600243013366e-05, "loss": 1.408, "step": 3594 }, { "epoch": 1.0912126271057823, "grad_norm": 0.6070311069488525, "learning_rate": 7.821992709599029e-05, "loss": 1.5847, "step": 3595 }, { "epoch": 1.091516163302474, "grad_norm": 0.5059540271759033, "learning_rate": 7.82138517618469e-05, "loss": 1.9817, "step": 3596 }, { "epoch": 1.0918196994991654, "grad_norm": 0.5185919404029846, "learning_rate": 7.820777642770352e-05, "loss": 1.761, "step": 3597 }, { "epoch": 1.0921232356958568, "grad_norm": 0.5528532266616821, "learning_rate": 7.820170109356015e-05, "loss": 1.5324, "step": 3598 }, { "epoch": 1.0924267718925482, "grad_norm": 0.46091577410697937, "learning_rate": 7.819562575941677e-05, "loss": 1.5934, "step": 3599 }, { "epoch": 1.0927303080892397, "grad_norm": 0.5525655746459961, "learning_rate": 7.81895504252734e-05, "loss": 1.742, "step": 3600 }, { "epoch": 1.093033844285931, "grad_norm": 0.8037269115447998, "learning_rate": 7.818347509113002e-05, "loss": 1.2865, "step": 3601 }, { "epoch": 1.0933373804826225, "grad_norm": 0.500320315361023, "learning_rate": 7.817739975698664e-05, "loss": 1.7335, "step": 3602 }, { "epoch": 1.093640916679314, "grad_norm": 0.4381422698497772, "learning_rate": 7.817132442284327e-05, "loss": 1.7203, "step": 3603 }, { "epoch": 1.0939444528760054, "grad_norm": 0.5011032819747925, "learning_rate": 7.816524908869988e-05, "loss": 1.903, "step": 3604 }, { "epoch": 1.0942479890726968, "grad_norm": 0.4082486033439636, "learning_rate": 7.81591737545565e-05, "loss": 1.7394, "step": 3605 }, { "epoch": 1.0945515252693885, "grad_norm": 0.3757207691669464, "learning_rate": 7.815309842041313e-05, "loss": 1.9376, "step": 3606 }, { "epoch": 1.09485506146608, "grad_norm": 0.5873184204101562, "learning_rate": 7.814702308626975e-05, "loss": 1.8047, "step": 3607 }, { "epoch": 1.0951585976627713, "grad_norm": 0.4013366401195526, "learning_rate": 7.814094775212637e-05, "loss": 1.7548, "step": 3608 }, { "epoch": 1.0954621338594628, "grad_norm": 0.4884313642978668, "learning_rate": 7.8134872417983e-05, "loss": 1.7476, "step": 3609 }, { "epoch": 1.0957656700561542, "grad_norm": 0.5261145234107971, "learning_rate": 7.812879708383961e-05, "loss": 1.3334, "step": 3610 }, { "epoch": 1.0960692062528457, "grad_norm": 0.4099176824092865, "learning_rate": 7.812272174969623e-05, "loss": 1.1143, "step": 3611 }, { "epoch": 1.096372742449537, "grad_norm": 0.571192741394043, "learning_rate": 7.811664641555286e-05, "loss": 1.7579, "step": 3612 }, { "epoch": 1.0966762786462285, "grad_norm": 0.48647162318229675, "learning_rate": 7.811057108140948e-05, "loss": 1.8592, "step": 3613 }, { "epoch": 1.09697981484292, "grad_norm": 0.4793272316455841, "learning_rate": 7.810449574726611e-05, "loss": 1.7882, "step": 3614 }, { "epoch": 1.0972833510396114, "grad_norm": 0.49005764722824097, "learning_rate": 7.809842041312273e-05, "loss": 1.52, "step": 3615 }, { "epoch": 1.0975868872363028, "grad_norm": 0.5489885210990906, "learning_rate": 7.809234507897935e-05, "loss": 1.7768, "step": 3616 }, { "epoch": 1.0978904234329945, "grad_norm": 0.5832868814468384, "learning_rate": 7.808626974483598e-05, "loss": 1.2381, "step": 3617 }, { "epoch": 1.098193959629686, "grad_norm": 1.2282688617706299, "learning_rate": 7.80801944106926e-05, "loss": 1.6286, "step": 3618 }, { "epoch": 1.0984974958263773, "grad_norm": 0.5094382166862488, "learning_rate": 7.807411907654921e-05, "loss": 1.9736, "step": 3619 }, { "epoch": 1.0988010320230688, "grad_norm": 0.4855671525001526, "learning_rate": 7.806804374240584e-05, "loss": 1.9063, "step": 3620 }, { "epoch": 1.0991045682197602, "grad_norm": 0.5706669092178345, "learning_rate": 7.806196840826246e-05, "loss": 1.9509, "step": 3621 }, { "epoch": 1.0994081044164516, "grad_norm": 0.5625984072685242, "learning_rate": 7.805589307411908e-05, "loss": 1.5796, "step": 3622 }, { "epoch": 1.099711640613143, "grad_norm": 0.5737254023551941, "learning_rate": 7.804981773997571e-05, "loss": 1.6272, "step": 3623 }, { "epoch": 1.1000151768098345, "grad_norm": 0.3637593388557434, "learning_rate": 7.804374240583232e-05, "loss": 1.2409, "step": 3624 }, { "epoch": 1.100318713006526, "grad_norm": 0.4899303615093231, "learning_rate": 7.803766707168894e-05, "loss": 1.3884, "step": 3625 }, { "epoch": 1.1006222492032174, "grad_norm": 0.524005115032196, "learning_rate": 7.803159173754557e-05, "loss": 1.9453, "step": 3626 }, { "epoch": 1.100925785399909, "grad_norm": 0.5083621740341187, "learning_rate": 7.802551640340219e-05, "loss": 1.5158, "step": 3627 }, { "epoch": 1.1012293215966005, "grad_norm": 0.46930131316185, "learning_rate": 7.801944106925882e-05, "loss": 1.7324, "step": 3628 }, { "epoch": 1.101532857793292, "grad_norm": 0.5122260451316833, "learning_rate": 7.801336573511544e-05, "loss": 1.4132, "step": 3629 }, { "epoch": 1.1018363939899833, "grad_norm": 0.5043088793754578, "learning_rate": 7.800729040097206e-05, "loss": 1.6932, "step": 3630 }, { "epoch": 1.1021399301866748, "grad_norm": 0.5585395097732544, "learning_rate": 7.800121506682869e-05, "loss": 1.6254, "step": 3631 }, { "epoch": 1.1024434663833662, "grad_norm": 0.4394286572933197, "learning_rate": 7.799513973268529e-05, "loss": 1.6988, "step": 3632 }, { "epoch": 1.1027470025800576, "grad_norm": 0.5732413530349731, "learning_rate": 7.798906439854192e-05, "loss": 1.0782, "step": 3633 }, { "epoch": 1.103050538776749, "grad_norm": 0.49380823969841003, "learning_rate": 7.798298906439855e-05, "loss": 1.3272, "step": 3634 }, { "epoch": 1.1033540749734405, "grad_norm": 0.5082643628120422, "learning_rate": 7.797691373025517e-05, "loss": 1.6587, "step": 3635 }, { "epoch": 1.103657611170132, "grad_norm": 0.6942585706710815, "learning_rate": 7.797083839611179e-05, "loss": 1.565, "step": 3636 }, { "epoch": 1.1039611473668236, "grad_norm": 0.5077084302902222, "learning_rate": 7.796476306196842e-05, "loss": 1.8823, "step": 3637 }, { "epoch": 1.104264683563515, "grad_norm": 0.5313974022865295, "learning_rate": 7.795868772782503e-05, "loss": 1.5792, "step": 3638 }, { "epoch": 1.1045682197602065, "grad_norm": 0.5107327699661255, "learning_rate": 7.795261239368165e-05, "loss": 1.4091, "step": 3639 }, { "epoch": 1.104871755956898, "grad_norm": 0.530952513217926, "learning_rate": 7.794653705953828e-05, "loss": 1.918, "step": 3640 }, { "epoch": 1.1051752921535893, "grad_norm": 0.6229440569877625, "learning_rate": 7.79404617253949e-05, "loss": 1.7242, "step": 3641 }, { "epoch": 1.1054788283502808, "grad_norm": 0.5772741436958313, "learning_rate": 7.793438639125152e-05, "loss": 1.4634, "step": 3642 }, { "epoch": 1.1057823645469722, "grad_norm": 0.6212802529335022, "learning_rate": 7.792831105710815e-05, "loss": 1.0477, "step": 3643 }, { "epoch": 1.1060859007436636, "grad_norm": 0.51209557056427, "learning_rate": 7.792223572296477e-05, "loss": 1.6945, "step": 3644 }, { "epoch": 1.106389436940355, "grad_norm": 0.45700934529304504, "learning_rate": 7.79161603888214e-05, "loss": 1.6441, "step": 3645 }, { "epoch": 1.1066929731370465, "grad_norm": 0.5479162931442261, "learning_rate": 7.7910085054678e-05, "loss": 1.2612, "step": 3646 }, { "epoch": 1.106996509333738, "grad_norm": 0.5235689878463745, "learning_rate": 7.790400972053463e-05, "loss": 1.5583, "step": 3647 }, { "epoch": 1.1073000455304296, "grad_norm": 0.40267738699913025, "learning_rate": 7.789793438639126e-05, "loss": 1.5987, "step": 3648 }, { "epoch": 1.107603581727121, "grad_norm": 0.4579909145832062, "learning_rate": 7.789185905224788e-05, "loss": 2.0193, "step": 3649 }, { "epoch": 1.1079071179238125, "grad_norm": 0.7406178712844849, "learning_rate": 7.78857837181045e-05, "loss": 1.3012, "step": 3650 }, { "epoch": 1.108210654120504, "grad_norm": 0.5075519680976868, "learning_rate": 7.787970838396113e-05, "loss": 1.8573, "step": 3651 }, { "epoch": 1.1085141903171953, "grad_norm": 0.5122193098068237, "learning_rate": 7.787363304981774e-05, "loss": 1.4937, "step": 3652 }, { "epoch": 1.1088177265138868, "grad_norm": 0.5174267292022705, "learning_rate": 7.786755771567436e-05, "loss": 1.3752, "step": 3653 }, { "epoch": 1.1091212627105782, "grad_norm": 0.47906357049942017, "learning_rate": 7.786148238153099e-05, "loss": 1.4443, "step": 3654 }, { "epoch": 1.1094247989072696, "grad_norm": 0.5059614777565002, "learning_rate": 7.785540704738761e-05, "loss": 1.5518, "step": 3655 }, { "epoch": 1.109728335103961, "grad_norm": 0.49383166432380676, "learning_rate": 7.784933171324423e-05, "loss": 1.6696, "step": 3656 }, { "epoch": 1.1100318713006525, "grad_norm": 0.45457521080970764, "learning_rate": 7.784325637910086e-05, "loss": 1.4528, "step": 3657 }, { "epoch": 1.1103354074973442, "grad_norm": 0.4575364291667938, "learning_rate": 7.783718104495748e-05, "loss": 1.7467, "step": 3658 }, { "epoch": 1.1106389436940356, "grad_norm": 0.4990423619747162, "learning_rate": 7.78311057108141e-05, "loss": 1.6548, "step": 3659 }, { "epoch": 1.110942479890727, "grad_norm": 0.5598446726799011, "learning_rate": 7.782503037667071e-05, "loss": 1.4941, "step": 3660 }, { "epoch": 1.1112460160874185, "grad_norm": 0.466371089220047, "learning_rate": 7.781895504252734e-05, "loss": 1.7623, "step": 3661 }, { "epoch": 1.11154955228411, "grad_norm": 0.4354589879512787, "learning_rate": 7.781287970838397e-05, "loss": 1.9063, "step": 3662 }, { "epoch": 1.1118530884808013, "grad_norm": 0.7934980988502502, "learning_rate": 7.780680437424059e-05, "loss": 1.8391, "step": 3663 }, { "epoch": 1.1121566246774928, "grad_norm": 0.4731541872024536, "learning_rate": 7.78007290400972e-05, "loss": 1.9535, "step": 3664 }, { "epoch": 1.1124601608741842, "grad_norm": 0.47908467054367065, "learning_rate": 7.779465370595384e-05, "loss": 1.8113, "step": 3665 }, { "epoch": 1.1127636970708756, "grad_norm": 0.4935145676136017, "learning_rate": 7.778857837181045e-05, "loss": 1.7136, "step": 3666 }, { "epoch": 1.113067233267567, "grad_norm": 0.4779261350631714, "learning_rate": 7.778250303766707e-05, "loss": 1.5593, "step": 3667 }, { "epoch": 1.1133707694642587, "grad_norm": 0.3891371786594391, "learning_rate": 7.77764277035237e-05, "loss": 1.2089, "step": 3668 }, { "epoch": 1.1136743056609502, "grad_norm": 0.5638146996498108, "learning_rate": 7.777035236938032e-05, "loss": 1.345, "step": 3669 }, { "epoch": 1.1139778418576416, "grad_norm": 0.46427562832832336, "learning_rate": 7.776427703523694e-05, "loss": 1.8294, "step": 3670 }, { "epoch": 1.114281378054333, "grad_norm": 1.02618408203125, "learning_rate": 7.775820170109357e-05, "loss": 1.608, "step": 3671 }, { "epoch": 1.1145849142510245, "grad_norm": 0.5669841766357422, "learning_rate": 7.775212636695019e-05, "loss": 1.5572, "step": 3672 }, { "epoch": 1.114888450447716, "grad_norm": 0.5150823593139648, "learning_rate": 7.774605103280682e-05, "loss": 1.6798, "step": 3673 }, { "epoch": 1.1151919866444073, "grad_norm": 0.6217275857925415, "learning_rate": 7.773997569866342e-05, "loss": 1.4402, "step": 3674 }, { "epoch": 1.1154955228410988, "grad_norm": 0.508321225643158, "learning_rate": 7.773390036452005e-05, "loss": 1.6684, "step": 3675 }, { "epoch": 1.1157990590377902, "grad_norm": 0.44217655062675476, "learning_rate": 7.772782503037668e-05, "loss": 1.5984, "step": 3676 }, { "epoch": 1.1161025952344816, "grad_norm": 0.4717262089252472, "learning_rate": 7.77217496962333e-05, "loss": 1.4775, "step": 3677 }, { "epoch": 1.116406131431173, "grad_norm": 0.4989759922027588, "learning_rate": 7.771567436208992e-05, "loss": 1.4132, "step": 3678 }, { "epoch": 1.1167096676278647, "grad_norm": 0.44810184836387634, "learning_rate": 7.770959902794655e-05, "loss": 1.7462, "step": 3679 }, { "epoch": 1.1170132038245562, "grad_norm": 0.4343874156475067, "learning_rate": 7.770352369380316e-05, "loss": 1.6128, "step": 3680 }, { "epoch": 1.1173167400212476, "grad_norm": 0.4640476107597351, "learning_rate": 7.769744835965978e-05, "loss": 1.6007, "step": 3681 }, { "epoch": 1.117620276217939, "grad_norm": 0.4636215567588806, "learning_rate": 7.769137302551641e-05, "loss": 1.4001, "step": 3682 }, { "epoch": 1.1179238124146305, "grad_norm": 0.5073500871658325, "learning_rate": 7.768529769137303e-05, "loss": 1.8682, "step": 3683 }, { "epoch": 1.118227348611322, "grad_norm": 0.5101370811462402, "learning_rate": 7.767922235722965e-05, "loss": 1.8096, "step": 3684 }, { "epoch": 1.1185308848080133, "grad_norm": 0.42578715085983276, "learning_rate": 7.767314702308628e-05, "loss": 1.393, "step": 3685 }, { "epoch": 1.1188344210047048, "grad_norm": 0.4419322907924652, "learning_rate": 7.76670716889429e-05, "loss": 1.7337, "step": 3686 }, { "epoch": 1.1191379572013962, "grad_norm": 0.48602306842803955, "learning_rate": 7.766099635479953e-05, "loss": 1.6385, "step": 3687 }, { "epoch": 1.1194414933980876, "grad_norm": 0.46349820494651794, "learning_rate": 7.765492102065613e-05, "loss": 1.8216, "step": 3688 }, { "epoch": 1.119745029594779, "grad_norm": 0.5057324767112732, "learning_rate": 7.764884568651276e-05, "loss": 1.4216, "step": 3689 }, { "epoch": 1.1200485657914707, "grad_norm": 0.45940324664115906, "learning_rate": 7.764277035236939e-05, "loss": 1.7301, "step": 3690 }, { "epoch": 1.1203521019881622, "grad_norm": 0.47218936681747437, "learning_rate": 7.763669501822601e-05, "loss": 1.6976, "step": 3691 }, { "epoch": 1.1206556381848536, "grad_norm": 0.4720531105995178, "learning_rate": 7.763061968408263e-05, "loss": 1.4658, "step": 3692 }, { "epoch": 1.120959174381545, "grad_norm": 0.48740169405937195, "learning_rate": 7.762454434993926e-05, "loss": 1.325, "step": 3693 }, { "epoch": 1.1212627105782365, "grad_norm": 0.4727463722229004, "learning_rate": 7.761846901579587e-05, "loss": 1.7131, "step": 3694 }, { "epoch": 1.1215662467749279, "grad_norm": 0.563522458076477, "learning_rate": 7.761239368165249e-05, "loss": 1.7699, "step": 3695 }, { "epoch": 1.1218697829716193, "grad_norm": 0.492064893245697, "learning_rate": 7.760631834750912e-05, "loss": 1.9043, "step": 3696 }, { "epoch": 1.1221733191683108, "grad_norm": 1.3425596952438354, "learning_rate": 7.760024301336574e-05, "loss": 1.6455, "step": 3697 }, { "epoch": 1.1224768553650022, "grad_norm": 0.4847075641155243, "learning_rate": 7.759416767922236e-05, "loss": 1.7395, "step": 3698 }, { "epoch": 1.1227803915616938, "grad_norm": 0.7759005427360535, "learning_rate": 7.758809234507897e-05, "loss": 1.6917, "step": 3699 }, { "epoch": 1.1230839277583853, "grad_norm": 0.6782048344612122, "learning_rate": 7.75820170109356e-05, "loss": 1.443, "step": 3700 }, { "epoch": 1.1233874639550767, "grad_norm": 0.4265955686569214, "learning_rate": 7.757594167679224e-05, "loss": 1.2195, "step": 3701 }, { "epoch": 1.1236910001517681, "grad_norm": 0.8154575824737549, "learning_rate": 7.756986634264884e-05, "loss": 1.4988, "step": 3702 }, { "epoch": 1.1239945363484596, "grad_norm": 0.4791830778121948, "learning_rate": 7.756379100850547e-05, "loss": 1.2148, "step": 3703 }, { "epoch": 1.124298072545151, "grad_norm": 0.4666757583618164, "learning_rate": 7.75577156743621e-05, "loss": 1.5228, "step": 3704 }, { "epoch": 1.1246016087418425, "grad_norm": 0.4709447920322418, "learning_rate": 7.75516403402187e-05, "loss": 1.6371, "step": 3705 }, { "epoch": 1.1249051449385339, "grad_norm": 0.47154557704925537, "learning_rate": 7.754556500607534e-05, "loss": 1.1386, "step": 3706 }, { "epoch": 1.1252086811352253, "grad_norm": 0.384400337934494, "learning_rate": 7.753948967193197e-05, "loss": 1.6998, "step": 3707 }, { "epoch": 1.1255122173319168, "grad_norm": 0.5026933550834656, "learning_rate": 7.753341433778858e-05, "loss": 1.8225, "step": 3708 }, { "epoch": 1.1258157535286082, "grad_norm": 0.5157676339149475, "learning_rate": 7.75273390036452e-05, "loss": 1.4864, "step": 3709 }, { "epoch": 1.1261192897252998, "grad_norm": 1.0850920677185059, "learning_rate": 7.752126366950183e-05, "loss": 1.8376, "step": 3710 }, { "epoch": 1.1264228259219913, "grad_norm": 0.481871634721756, "learning_rate": 7.751518833535845e-05, "loss": 1.6568, "step": 3711 }, { "epoch": 1.1267263621186827, "grad_norm": 0.5413371920585632, "learning_rate": 7.750911300121507e-05, "loss": 1.3793, "step": 3712 }, { "epoch": 1.1270298983153741, "grad_norm": 0.44527462124824524, "learning_rate": 7.750303766707168e-05, "loss": 1.773, "step": 3713 }, { "epoch": 1.1273334345120656, "grad_norm": 0.49938610196113586, "learning_rate": 7.749696233292832e-05, "loss": 1.5552, "step": 3714 }, { "epoch": 1.127636970708757, "grad_norm": 0.5346314311027527, "learning_rate": 7.749088699878495e-05, "loss": 1.5582, "step": 3715 }, { "epoch": 1.1279405069054484, "grad_norm": 0.9484091401100159, "learning_rate": 7.748481166464155e-05, "loss": 1.5542, "step": 3716 }, { "epoch": 1.1282440431021399, "grad_norm": 0.47302237153053284, "learning_rate": 7.747873633049818e-05, "loss": 1.9569, "step": 3717 }, { "epoch": 1.1285475792988313, "grad_norm": 0.5025098323822021, "learning_rate": 7.747266099635481e-05, "loss": 1.4258, "step": 3718 }, { "epoch": 1.1288511154955228, "grad_norm": 0.42215389013290405, "learning_rate": 7.746658566221142e-05, "loss": 1.6635, "step": 3719 }, { "epoch": 1.1291546516922142, "grad_norm": 0.49948540329933167, "learning_rate": 7.746051032806805e-05, "loss": 1.6155, "step": 3720 }, { "epoch": 1.1294581878889058, "grad_norm": 0.6525446176528931, "learning_rate": 7.745443499392468e-05, "loss": 1.6404, "step": 3721 }, { "epoch": 1.1297617240855973, "grad_norm": 0.5007261633872986, "learning_rate": 7.74483596597813e-05, "loss": 1.2159, "step": 3722 }, { "epoch": 1.1300652602822887, "grad_norm": 0.6284709572792053, "learning_rate": 7.744228432563791e-05, "loss": 1.8035, "step": 3723 }, { "epoch": 1.1303687964789801, "grad_norm": 0.705549418926239, "learning_rate": 7.743620899149454e-05, "loss": 1.2551, "step": 3724 }, { "epoch": 1.1306723326756716, "grad_norm": 1.1247000694274902, "learning_rate": 7.743013365735116e-05, "loss": 1.652, "step": 3725 }, { "epoch": 1.130975868872363, "grad_norm": 0.4521750211715698, "learning_rate": 7.742405832320778e-05, "loss": 1.6, "step": 3726 }, { "epoch": 1.1312794050690544, "grad_norm": 0.4077288806438446, "learning_rate": 7.74179829890644e-05, "loss": 1.8314, "step": 3727 }, { "epoch": 1.1315829412657459, "grad_norm": 0.4165663421154022, "learning_rate": 7.741190765492103e-05, "loss": 1.9458, "step": 3728 }, { "epoch": 1.1318864774624373, "grad_norm": 0.55204176902771, "learning_rate": 7.740583232077764e-05, "loss": 1.661, "step": 3729 }, { "epoch": 1.132190013659129, "grad_norm": 0.5003572106361389, "learning_rate": 7.739975698663426e-05, "loss": 1.6445, "step": 3730 }, { "epoch": 1.1324935498558204, "grad_norm": 0.5279999375343323, "learning_rate": 7.739368165249089e-05, "loss": 1.5472, "step": 3731 }, { "epoch": 1.1327970860525118, "grad_norm": 0.5142978429794312, "learning_rate": 7.738760631834752e-05, "loss": 1.6682, "step": 3732 }, { "epoch": 1.1331006222492033, "grad_norm": 0.5571588277816772, "learning_rate": 7.738153098420413e-05, "loss": 1.5633, "step": 3733 }, { "epoch": 1.1334041584458947, "grad_norm": 0.6555919647216797, "learning_rate": 7.737545565006076e-05, "loss": 1.2002, "step": 3734 }, { "epoch": 1.1337076946425861, "grad_norm": 0.482624888420105, "learning_rate": 7.736938031591739e-05, "loss": 1.7551, "step": 3735 }, { "epoch": 1.1340112308392776, "grad_norm": 0.4221251904964447, "learning_rate": 7.7363304981774e-05, "loss": 1.1514, "step": 3736 }, { "epoch": 1.134314767035969, "grad_norm": 0.482462614774704, "learning_rate": 7.735722964763062e-05, "loss": 1.2174, "step": 3737 }, { "epoch": 1.1346183032326604, "grad_norm": 0.5528337955474854, "learning_rate": 7.735115431348725e-05, "loss": 1.7362, "step": 3738 }, { "epoch": 1.1349218394293519, "grad_norm": 0.4969632625579834, "learning_rate": 7.734507897934387e-05, "loss": 1.991, "step": 3739 }, { "epoch": 1.1352253756260433, "grad_norm": 0.5240209698677063, "learning_rate": 7.733900364520049e-05, "loss": 1.5519, "step": 3740 }, { "epoch": 1.135528911822735, "grad_norm": 0.41266825795173645, "learning_rate": 7.73329283110571e-05, "loss": 1.3405, "step": 3741 }, { "epoch": 1.1358324480194264, "grad_norm": 0.5517387986183167, "learning_rate": 7.732685297691374e-05, "loss": 1.1253, "step": 3742 }, { "epoch": 1.1361359842161178, "grad_norm": 0.49983713030815125, "learning_rate": 7.732077764277035e-05, "loss": 1.3818, "step": 3743 }, { "epoch": 1.1364395204128093, "grad_norm": 0.5212677717208862, "learning_rate": 7.731470230862697e-05, "loss": 1.8162, "step": 3744 }, { "epoch": 1.1367430566095007, "grad_norm": 0.5420991778373718, "learning_rate": 7.73086269744836e-05, "loss": 1.5643, "step": 3745 }, { "epoch": 1.1370465928061921, "grad_norm": 0.5572590827941895, "learning_rate": 7.730255164034023e-05, "loss": 1.6639, "step": 3746 }, { "epoch": 1.1373501290028836, "grad_norm": 0.5790618658065796, "learning_rate": 7.729647630619684e-05, "loss": 1.7084, "step": 3747 }, { "epoch": 1.137653665199575, "grad_norm": 0.6989924311637878, "learning_rate": 7.729040097205347e-05, "loss": 1.7443, "step": 3748 }, { "epoch": 1.1379572013962664, "grad_norm": 0.4986421763896942, "learning_rate": 7.72843256379101e-05, "loss": 1.5655, "step": 3749 }, { "epoch": 1.1382607375929579, "grad_norm": 0.6421902179718018, "learning_rate": 7.727825030376671e-05, "loss": 1.4474, "step": 3750 }, { "epoch": 1.1385642737896493, "grad_norm": 0.4390571117401123, "learning_rate": 7.727217496962333e-05, "loss": 1.9309, "step": 3751 }, { "epoch": 1.138867809986341, "grad_norm": 0.6357335448265076, "learning_rate": 7.726609963547996e-05, "loss": 1.4736, "step": 3752 }, { "epoch": 1.1391713461830324, "grad_norm": 0.5134897828102112, "learning_rate": 7.726002430133658e-05, "loss": 1.623, "step": 3753 }, { "epoch": 1.1394748823797238, "grad_norm": 0.5518725514411926, "learning_rate": 7.72539489671932e-05, "loss": 1.7415, "step": 3754 }, { "epoch": 1.1397784185764153, "grad_norm": 0.49003735184669495, "learning_rate": 7.724787363304981e-05, "loss": 1.8879, "step": 3755 }, { "epoch": 1.1400819547731067, "grad_norm": 0.5795713067054749, "learning_rate": 7.724179829890645e-05, "loss": 1.4763, "step": 3756 }, { "epoch": 1.1403854909697981, "grad_norm": 0.4583165645599365, "learning_rate": 7.723572296476306e-05, "loss": 1.7021, "step": 3757 }, { "epoch": 1.1406890271664896, "grad_norm": 0.5582238435745239, "learning_rate": 7.722964763061968e-05, "loss": 1.2102, "step": 3758 }, { "epoch": 1.140992563363181, "grad_norm": 0.8933469653129578, "learning_rate": 7.722357229647631e-05, "loss": 1.5831, "step": 3759 }, { "epoch": 1.1412960995598724, "grad_norm": 0.5090795755386353, "learning_rate": 7.721749696233294e-05, "loss": 1.7801, "step": 3760 }, { "epoch": 1.141599635756564, "grad_norm": 0.4822721481323242, "learning_rate": 7.721142162818955e-05, "loss": 1.5706, "step": 3761 }, { "epoch": 1.1419031719532553, "grad_norm": 0.5089300870895386, "learning_rate": 7.720534629404618e-05, "loss": 1.7128, "step": 3762 }, { "epoch": 1.142206708149947, "grad_norm": 0.5474359393119812, "learning_rate": 7.719927095990281e-05, "loss": 1.5872, "step": 3763 }, { "epoch": 1.1425102443466384, "grad_norm": 0.582497775554657, "learning_rate": 7.719319562575942e-05, "loss": 1.5936, "step": 3764 }, { "epoch": 1.1428137805433298, "grad_norm": 0.49585655331611633, "learning_rate": 7.718712029161604e-05, "loss": 1.1567, "step": 3765 }, { "epoch": 1.1431173167400213, "grad_norm": 0.5703508257865906, "learning_rate": 7.718104495747267e-05, "loss": 1.5919, "step": 3766 }, { "epoch": 1.1434208529367127, "grad_norm": 0.5075818300247192, "learning_rate": 7.717496962332929e-05, "loss": 1.683, "step": 3767 }, { "epoch": 1.1437243891334041, "grad_norm": 0.5167961120605469, "learning_rate": 7.716889428918591e-05, "loss": 1.425, "step": 3768 }, { "epoch": 1.1440279253300956, "grad_norm": 0.5668395161628723, "learning_rate": 7.716281895504252e-05, "loss": 1.4884, "step": 3769 }, { "epoch": 1.144331461526787, "grad_norm": 0.4944659173488617, "learning_rate": 7.715674362089916e-05, "loss": 1.1739, "step": 3770 }, { "epoch": 1.1446349977234784, "grad_norm": 0.792289137840271, "learning_rate": 7.715066828675577e-05, "loss": 1.7746, "step": 3771 }, { "epoch": 1.14493853392017, "grad_norm": 0.48457542061805725, "learning_rate": 7.714459295261239e-05, "loss": 1.7788, "step": 3772 }, { "epoch": 1.1452420701168615, "grad_norm": 0.5210672616958618, "learning_rate": 7.713851761846902e-05, "loss": 1.7434, "step": 3773 }, { "epoch": 1.145545606313553, "grad_norm": 0.41982871294021606, "learning_rate": 7.713244228432565e-05, "loss": 1.6847, "step": 3774 }, { "epoch": 1.1458491425102444, "grad_norm": 0.5397769212722778, "learning_rate": 7.712636695018226e-05, "loss": 1.7824, "step": 3775 }, { "epoch": 1.1461526787069358, "grad_norm": 0.9942769408226013, "learning_rate": 7.712029161603889e-05, "loss": 1.2639, "step": 3776 }, { "epoch": 1.1464562149036273, "grad_norm": 0.5531973838806152, "learning_rate": 7.711421628189552e-05, "loss": 1.6249, "step": 3777 }, { "epoch": 1.1467597511003187, "grad_norm": 0.4524674415588379, "learning_rate": 7.710814094775212e-05, "loss": 1.1845, "step": 3778 }, { "epoch": 1.1470632872970101, "grad_norm": 0.4991176426410675, "learning_rate": 7.710206561360875e-05, "loss": 1.7489, "step": 3779 }, { "epoch": 1.1473668234937016, "grad_norm": 0.46409863233566284, "learning_rate": 7.709599027946537e-05, "loss": 1.9125, "step": 3780 }, { "epoch": 1.147670359690393, "grad_norm": 0.5320178866386414, "learning_rate": 7.7089914945322e-05, "loss": 1.5643, "step": 3781 }, { "epoch": 1.1479738958870844, "grad_norm": 0.5475717782974243, "learning_rate": 7.708383961117862e-05, "loss": 1.6597, "step": 3782 }, { "epoch": 1.148277432083776, "grad_norm": 0.6391961574554443, "learning_rate": 7.707776427703523e-05, "loss": 1.7717, "step": 3783 }, { "epoch": 1.1485809682804675, "grad_norm": 0.583355724811554, "learning_rate": 7.707168894289187e-05, "loss": 1.8934, "step": 3784 }, { "epoch": 1.148884504477159, "grad_norm": 0.891815721988678, "learning_rate": 7.706561360874848e-05, "loss": 0.836, "step": 3785 }, { "epoch": 1.1491880406738504, "grad_norm": 0.5519468188285828, "learning_rate": 7.70595382746051e-05, "loss": 1.785, "step": 3786 }, { "epoch": 1.1494915768705418, "grad_norm": 0.5709235668182373, "learning_rate": 7.705346294046173e-05, "loss": 1.3448, "step": 3787 }, { "epoch": 1.1497951130672333, "grad_norm": 0.453735888004303, "learning_rate": 7.704738760631836e-05, "loss": 1.7276, "step": 3788 }, { "epoch": 1.1500986492639247, "grad_norm": 0.4751914441585541, "learning_rate": 7.704131227217497e-05, "loss": 1.7094, "step": 3789 }, { "epoch": 1.1504021854606161, "grad_norm": 0.4283442199230194, "learning_rate": 7.70352369380316e-05, "loss": 1.8033, "step": 3790 }, { "epoch": 1.1507057216573076, "grad_norm": 0.4466361701488495, "learning_rate": 7.702916160388823e-05, "loss": 0.9553, "step": 3791 }, { "epoch": 1.1510092578539992, "grad_norm": 0.4145885407924652, "learning_rate": 7.702308626974483e-05, "loss": 1.3865, "step": 3792 }, { "epoch": 1.1513127940506904, "grad_norm": 0.5452487468719482, "learning_rate": 7.701701093560146e-05, "loss": 1.7645, "step": 3793 }, { "epoch": 1.151616330247382, "grad_norm": 0.5216187834739685, "learning_rate": 7.701093560145808e-05, "loss": 1.7205, "step": 3794 }, { "epoch": 1.1519198664440735, "grad_norm": 0.4803890287876129, "learning_rate": 7.700486026731471e-05, "loss": 1.8845, "step": 3795 }, { "epoch": 1.152223402640765, "grad_norm": 0.42299672961235046, "learning_rate": 7.699878493317133e-05, "loss": 1.5909, "step": 3796 }, { "epoch": 1.1525269388374564, "grad_norm": 0.6640902161598206, "learning_rate": 7.699270959902794e-05, "loss": 1.7704, "step": 3797 }, { "epoch": 1.1528304750341478, "grad_norm": 0.4829888939857483, "learning_rate": 7.698663426488458e-05, "loss": 1.4564, "step": 3798 }, { "epoch": 1.1531340112308393, "grad_norm": 0.49383172392845154, "learning_rate": 7.698055893074119e-05, "loss": 1.5888, "step": 3799 }, { "epoch": 1.1534375474275307, "grad_norm": 0.4922170042991638, "learning_rate": 7.697448359659781e-05, "loss": 1.6228, "step": 3800 }, { "epoch": 1.1537410836242221, "grad_norm": 0.5983831286430359, "learning_rate": 7.696840826245444e-05, "loss": 1.3906, "step": 3801 }, { "epoch": 1.1540446198209136, "grad_norm": 0.4924396574497223, "learning_rate": 7.696233292831106e-05, "loss": 1.3455, "step": 3802 }, { "epoch": 1.1543481560176052, "grad_norm": 0.39909911155700684, "learning_rate": 7.695625759416768e-05, "loss": 1.1296, "step": 3803 }, { "epoch": 1.1546516922142966, "grad_norm": 0.5369203686714172, "learning_rate": 7.69501822600243e-05, "loss": 1.7645, "step": 3804 }, { "epoch": 1.154955228410988, "grad_norm": 0.5576856732368469, "learning_rate": 7.694410692588094e-05, "loss": 1.6252, "step": 3805 }, { "epoch": 1.1552587646076795, "grad_norm": 0.6163928508758545, "learning_rate": 7.693803159173754e-05, "loss": 1.1073, "step": 3806 }, { "epoch": 1.155562300804371, "grad_norm": 0.47761568427085876, "learning_rate": 7.693195625759417e-05, "loss": 1.725, "step": 3807 }, { "epoch": 1.1558658370010624, "grad_norm": 0.4886780083179474, "learning_rate": 7.692588092345079e-05, "loss": 2.0903, "step": 3808 }, { "epoch": 1.1561693731977538, "grad_norm": 0.5503537058830261, "learning_rate": 7.691980558930742e-05, "loss": 1.6063, "step": 3809 }, { "epoch": 1.1564729093944452, "grad_norm": 0.647091805934906, "learning_rate": 7.691373025516404e-05, "loss": 1.2219, "step": 3810 }, { "epoch": 1.1567764455911367, "grad_norm": 0.7522347569465637, "learning_rate": 7.690765492102065e-05, "loss": 1.5603, "step": 3811 }, { "epoch": 1.1570799817878281, "grad_norm": 0.5273557305335999, "learning_rate": 7.690157958687729e-05, "loss": 1.6698, "step": 3812 }, { "epoch": 1.1573835179845195, "grad_norm": 0.5218062996864319, "learning_rate": 7.68955042527339e-05, "loss": 1.1702, "step": 3813 }, { "epoch": 1.1576870541812112, "grad_norm": 0.4625975489616394, "learning_rate": 7.688942891859052e-05, "loss": 1.8227, "step": 3814 }, { "epoch": 1.1579905903779026, "grad_norm": 0.49970725178718567, "learning_rate": 7.688335358444715e-05, "loss": 1.6382, "step": 3815 }, { "epoch": 1.158294126574594, "grad_norm": 0.6002604365348816, "learning_rate": 7.687727825030377e-05, "loss": 1.7239, "step": 3816 }, { "epoch": 1.1585976627712855, "grad_norm": 0.6793041825294495, "learning_rate": 7.687120291616039e-05, "loss": 2.0427, "step": 3817 }, { "epoch": 1.158901198967977, "grad_norm": 0.5442394018173218, "learning_rate": 7.686512758201702e-05, "loss": 1.6008, "step": 3818 }, { "epoch": 1.1592047351646684, "grad_norm": 0.4671969413757324, "learning_rate": 7.685905224787365e-05, "loss": 1.401, "step": 3819 }, { "epoch": 1.1595082713613598, "grad_norm": 0.4723747968673706, "learning_rate": 7.685297691373025e-05, "loss": 1.5624, "step": 3820 }, { "epoch": 1.1598118075580512, "grad_norm": 0.5985869765281677, "learning_rate": 7.684690157958688e-05, "loss": 1.5069, "step": 3821 }, { "epoch": 1.1601153437547427, "grad_norm": 0.47640082240104675, "learning_rate": 7.68408262454435e-05, "loss": 1.0615, "step": 3822 }, { "epoch": 1.1604188799514341, "grad_norm": 0.4906187057495117, "learning_rate": 7.683475091130013e-05, "loss": 1.4341, "step": 3823 }, { "epoch": 1.1607224161481255, "grad_norm": 0.6372618675231934, "learning_rate": 7.682867557715675e-05, "loss": 1.2915, "step": 3824 }, { "epoch": 1.1610259523448172, "grad_norm": 0.42580631375312805, "learning_rate": 7.682260024301336e-05, "loss": 1.2624, "step": 3825 }, { "epoch": 1.1613294885415086, "grad_norm": 0.41982603073120117, "learning_rate": 7.681652490887e-05, "loss": 1.7227, "step": 3826 }, { "epoch": 1.1616330247382, "grad_norm": 0.44015559554100037, "learning_rate": 7.681044957472661e-05, "loss": 1.4198, "step": 3827 }, { "epoch": 1.1619365609348915, "grad_norm": 0.5147770047187805, "learning_rate": 7.680437424058323e-05, "loss": 1.3839, "step": 3828 }, { "epoch": 1.162240097131583, "grad_norm": 0.5992082953453064, "learning_rate": 7.679829890643986e-05, "loss": 1.6994, "step": 3829 }, { "epoch": 1.1625436333282744, "grad_norm": 0.5068255662918091, "learning_rate": 7.679222357229648e-05, "loss": 1.6389, "step": 3830 }, { "epoch": 1.1628471695249658, "grad_norm": 0.5025370717048645, "learning_rate": 7.67861482381531e-05, "loss": 1.4422, "step": 3831 }, { "epoch": 1.1631507057216572, "grad_norm": 0.5961645841598511, "learning_rate": 7.678007290400973e-05, "loss": 1.3941, "step": 3832 }, { "epoch": 1.1634542419183487, "grad_norm": 0.5184653997421265, "learning_rate": 7.677399756986636e-05, "loss": 1.7886, "step": 3833 }, { "epoch": 1.1637577781150403, "grad_norm": 0.4389922022819519, "learning_rate": 7.676792223572296e-05, "loss": 1.7861, "step": 3834 }, { "epoch": 1.1640613143117318, "grad_norm": 0.5118327140808105, "learning_rate": 7.676184690157959e-05, "loss": 1.7161, "step": 3835 }, { "epoch": 1.1643648505084232, "grad_norm": 0.5762491226196289, "learning_rate": 7.675577156743621e-05, "loss": 1.7012, "step": 3836 }, { "epoch": 1.1646683867051146, "grad_norm": 0.480589359998703, "learning_rate": 7.674969623329284e-05, "loss": 1.6764, "step": 3837 }, { "epoch": 1.164971922901806, "grad_norm": 0.45624813437461853, "learning_rate": 7.674362089914946e-05, "loss": 1.5741, "step": 3838 }, { "epoch": 1.1652754590984975, "grad_norm": 0.5073031783103943, "learning_rate": 7.673754556500608e-05, "loss": 1.263, "step": 3839 }, { "epoch": 1.165578995295189, "grad_norm": 0.44414857029914856, "learning_rate": 7.67314702308627e-05, "loss": 1.0236, "step": 3840 }, { "epoch": 1.1658825314918804, "grad_norm": 0.5479186177253723, "learning_rate": 7.672539489671932e-05, "loss": 1.9612, "step": 3841 }, { "epoch": 1.1661860676885718, "grad_norm": 0.4903987944126129, "learning_rate": 7.671931956257594e-05, "loss": 1.6913, "step": 3842 }, { "epoch": 1.1664896038852632, "grad_norm": 0.5010794401168823, "learning_rate": 7.671324422843257e-05, "loss": 1.8156, "step": 3843 }, { "epoch": 1.1667931400819547, "grad_norm": 0.4327058792114258, "learning_rate": 7.670716889428919e-05, "loss": 1.557, "step": 3844 }, { "epoch": 1.1670966762786463, "grad_norm": 0.6099236011505127, "learning_rate": 7.67010935601458e-05, "loss": 1.7191, "step": 3845 }, { "epoch": 1.1674002124753378, "grad_norm": 0.7435611486434937, "learning_rate": 7.669501822600244e-05, "loss": 1.8566, "step": 3846 }, { "epoch": 1.1677037486720292, "grad_norm": 0.6030800938606262, "learning_rate": 7.668894289185907e-05, "loss": 1.3435, "step": 3847 }, { "epoch": 1.1680072848687206, "grad_norm": 0.4840324819087982, "learning_rate": 7.668286755771567e-05, "loss": 1.5476, "step": 3848 }, { "epoch": 1.168310821065412, "grad_norm": 0.686964213848114, "learning_rate": 7.66767922235723e-05, "loss": 1.5845, "step": 3849 }, { "epoch": 1.1686143572621035, "grad_norm": 0.4797843396663666, "learning_rate": 7.667071688942892e-05, "loss": 1.7808, "step": 3850 }, { "epoch": 1.168917893458795, "grad_norm": 0.5187574028968811, "learning_rate": 7.666464155528554e-05, "loss": 1.4564, "step": 3851 }, { "epoch": 1.1692214296554864, "grad_norm": 0.42190396785736084, "learning_rate": 7.665856622114217e-05, "loss": 1.4857, "step": 3852 }, { "epoch": 1.1695249658521778, "grad_norm": 0.4939133822917938, "learning_rate": 7.665249088699879e-05, "loss": 1.4132, "step": 3853 }, { "epoch": 1.1698285020488692, "grad_norm": 0.4695587456226349, "learning_rate": 7.664641555285542e-05, "loss": 1.577, "step": 3854 }, { "epoch": 1.1701320382455607, "grad_norm": 0.5055351257324219, "learning_rate": 7.664034021871203e-05, "loss": 1.6084, "step": 3855 }, { "epoch": 1.1704355744422523, "grad_norm": 0.4340987503528595, "learning_rate": 7.663426488456865e-05, "loss": 1.8373, "step": 3856 }, { "epoch": 1.1707391106389438, "grad_norm": 0.5082830190658569, "learning_rate": 7.662818955042528e-05, "loss": 1.9309, "step": 3857 }, { "epoch": 1.1710426468356352, "grad_norm": 0.5326313972473145, "learning_rate": 7.66221142162819e-05, "loss": 1.5347, "step": 3858 }, { "epoch": 1.1713461830323266, "grad_norm": 0.6817587018013, "learning_rate": 7.661603888213852e-05, "loss": 1.9486, "step": 3859 }, { "epoch": 1.171649719229018, "grad_norm": 0.5530791282653809, "learning_rate": 7.660996354799515e-05, "loss": 1.5328, "step": 3860 }, { "epoch": 1.1719532554257095, "grad_norm": 0.4731312692165375, "learning_rate": 7.660388821385178e-05, "loss": 1.7807, "step": 3861 }, { "epoch": 1.172256791622401, "grad_norm": 0.46114182472229004, "learning_rate": 7.659781287970838e-05, "loss": 1.5321, "step": 3862 }, { "epoch": 1.1725603278190924, "grad_norm": 0.4836636185646057, "learning_rate": 7.659173754556501e-05, "loss": 1.8332, "step": 3863 }, { "epoch": 1.1728638640157838, "grad_norm": 0.44714653491973877, "learning_rate": 7.658566221142163e-05, "loss": 1.6486, "step": 3864 }, { "epoch": 1.1731674002124755, "grad_norm": 0.5285139083862305, "learning_rate": 7.657958687727825e-05, "loss": 1.6531, "step": 3865 }, { "epoch": 1.1734709364091669, "grad_norm": 0.44782644510269165, "learning_rate": 7.657351154313488e-05, "loss": 1.8439, "step": 3866 }, { "epoch": 1.1737744726058583, "grad_norm": 0.4893675148487091, "learning_rate": 7.65674362089915e-05, "loss": 1.1381, "step": 3867 }, { "epoch": 1.1740780088025498, "grad_norm": 0.619848370552063, "learning_rate": 7.656136087484813e-05, "loss": 1.4307, "step": 3868 }, { "epoch": 1.1743815449992412, "grad_norm": 0.5027971267700195, "learning_rate": 7.655528554070474e-05, "loss": 1.5816, "step": 3869 }, { "epoch": 1.1746850811959326, "grad_norm": 0.5814145803451538, "learning_rate": 7.654921020656136e-05, "loss": 2.0414, "step": 3870 }, { "epoch": 1.174988617392624, "grad_norm": 0.5027217268943787, "learning_rate": 7.654313487241799e-05, "loss": 1.6058, "step": 3871 }, { "epoch": 1.1752921535893155, "grad_norm": 0.5492193102836609, "learning_rate": 7.653705953827461e-05, "loss": 1.6025, "step": 3872 }, { "epoch": 1.175595689786007, "grad_norm": 0.5875594615936279, "learning_rate": 7.653098420413123e-05, "loss": 1.6729, "step": 3873 }, { "epoch": 1.1758992259826984, "grad_norm": 0.46128249168395996, "learning_rate": 7.652490886998786e-05, "loss": 1.7184, "step": 3874 }, { "epoch": 1.1762027621793898, "grad_norm": 0.4899282157421112, "learning_rate": 7.651883353584447e-05, "loss": 1.7772, "step": 3875 }, { "epoch": 1.1765062983760814, "grad_norm": 0.4934176206588745, "learning_rate": 7.651275820170109e-05, "loss": 1.5329, "step": 3876 }, { "epoch": 1.1768098345727729, "grad_norm": 0.41739147901535034, "learning_rate": 7.650668286755772e-05, "loss": 1.0405, "step": 3877 }, { "epoch": 1.1771133707694643, "grad_norm": 0.5608689785003662, "learning_rate": 7.650060753341434e-05, "loss": 1.459, "step": 3878 }, { "epoch": 1.1774169069661558, "grad_norm": 5.719343185424805, "learning_rate": 7.649453219927096e-05, "loss": 1.4504, "step": 3879 }, { "epoch": 1.1777204431628472, "grad_norm": 0.5679183006286621, "learning_rate": 7.648845686512759e-05, "loss": 1.699, "step": 3880 }, { "epoch": 1.1780239793595386, "grad_norm": 0.5237777233123779, "learning_rate": 7.64823815309842e-05, "loss": 1.7255, "step": 3881 }, { "epoch": 1.17832751555623, "grad_norm": 0.5510279536247253, "learning_rate": 7.647630619684084e-05, "loss": 1.8984, "step": 3882 }, { "epoch": 1.1786310517529215, "grad_norm": 0.4513683021068573, "learning_rate": 7.647023086269745e-05, "loss": 1.8629, "step": 3883 }, { "epoch": 1.178934587949613, "grad_norm": 0.5571762919425964, "learning_rate": 7.646415552855407e-05, "loss": 1.4258, "step": 3884 }, { "epoch": 1.1792381241463044, "grad_norm": 0.5506730675697327, "learning_rate": 7.64580801944107e-05, "loss": 1.5378, "step": 3885 }, { "epoch": 1.1795416603429958, "grad_norm": 0.4214894771575928, "learning_rate": 7.645200486026732e-05, "loss": 1.4376, "step": 3886 }, { "epoch": 1.1798451965396874, "grad_norm": 0.5280786752700806, "learning_rate": 7.644592952612394e-05, "loss": 1.6226, "step": 3887 }, { "epoch": 1.1801487327363789, "grad_norm": 2.3207452297210693, "learning_rate": 7.643985419198057e-05, "loss": 1.3407, "step": 3888 }, { "epoch": 1.1804522689330703, "grad_norm": 0.5287031531333923, "learning_rate": 7.643377885783718e-05, "loss": 1.7205, "step": 3889 }, { "epoch": 1.1807558051297617, "grad_norm": 0.5691362619400024, "learning_rate": 7.64277035236938e-05, "loss": 1.707, "step": 3890 }, { "epoch": 1.1810593413264532, "grad_norm": 0.5688780546188354, "learning_rate": 7.642162818955043e-05, "loss": 1.6914, "step": 3891 }, { "epoch": 1.1813628775231446, "grad_norm": 0.6007869839668274, "learning_rate": 7.641555285540705e-05, "loss": 1.6556, "step": 3892 }, { "epoch": 1.181666413719836, "grad_norm": 0.8336607217788696, "learning_rate": 7.640947752126367e-05, "loss": 1.6392, "step": 3893 }, { "epoch": 1.1819699499165275, "grad_norm": 0.5636674761772156, "learning_rate": 7.64034021871203e-05, "loss": 1.602, "step": 3894 }, { "epoch": 1.182273486113219, "grad_norm": 0.47849076986312866, "learning_rate": 7.639732685297692e-05, "loss": 1.6095, "step": 3895 }, { "epoch": 1.1825770223099106, "grad_norm": 0.4776079058647156, "learning_rate": 7.639125151883355e-05, "loss": 1.5303, "step": 3896 }, { "epoch": 1.182880558506602, "grad_norm": 0.5701802968978882, "learning_rate": 7.638517618469016e-05, "loss": 1.4568, "step": 3897 }, { "epoch": 1.1831840947032934, "grad_norm": 0.4271094799041748, "learning_rate": 7.637910085054678e-05, "loss": 1.7324, "step": 3898 }, { "epoch": 1.1834876308999849, "grad_norm": 0.5306187272071838, "learning_rate": 7.637302551640341e-05, "loss": 1.9042, "step": 3899 }, { "epoch": 1.1837911670966763, "grad_norm": 0.5607674717903137, "learning_rate": 7.636695018226003e-05, "loss": 1.5753, "step": 3900 }, { "epoch": 1.1840947032933677, "grad_norm": 0.520979106426239, "learning_rate": 7.636087484811665e-05, "loss": 1.773, "step": 3901 }, { "epoch": 1.1843982394900592, "grad_norm": 0.5560202598571777, "learning_rate": 7.635479951397328e-05, "loss": 1.685, "step": 3902 }, { "epoch": 1.1847017756867506, "grad_norm": 0.4960952401161194, "learning_rate": 7.63487241798299e-05, "loss": 1.4656, "step": 3903 }, { "epoch": 1.185005311883442, "grad_norm": 0.5220305323600769, "learning_rate": 7.634264884568651e-05, "loss": 1.7571, "step": 3904 }, { "epoch": 1.1853088480801335, "grad_norm": 0.5441679954528809, "learning_rate": 7.633657351154314e-05, "loss": 1.7369, "step": 3905 }, { "epoch": 1.185612384276825, "grad_norm": 0.524355411529541, "learning_rate": 7.633049817739976e-05, "loss": 1.554, "step": 3906 }, { "epoch": 1.1859159204735166, "grad_norm": 0.580812931060791, "learning_rate": 7.632442284325638e-05, "loss": 1.1878, "step": 3907 }, { "epoch": 1.186219456670208, "grad_norm": 0.4383397400379181, "learning_rate": 7.631834750911301e-05, "loss": 1.3957, "step": 3908 }, { "epoch": 1.1865229928668994, "grad_norm": 0.5575391054153442, "learning_rate": 7.631227217496963e-05, "loss": 1.7958, "step": 3909 }, { "epoch": 1.1868265290635909, "grad_norm": 0.6178303956985474, "learning_rate": 7.630619684082626e-05, "loss": 1.8002, "step": 3910 }, { "epoch": 1.1871300652602823, "grad_norm": 0.7053147554397583, "learning_rate": 7.630012150668287e-05, "loss": 1.0536, "step": 3911 }, { "epoch": 1.1874336014569737, "grad_norm": 1.381752848625183, "learning_rate": 7.629404617253949e-05, "loss": 1.3821, "step": 3912 }, { "epoch": 1.1877371376536652, "grad_norm": 1.020168662071228, "learning_rate": 7.628797083839612e-05, "loss": 1.3616, "step": 3913 }, { "epoch": 1.1880406738503566, "grad_norm": 0.5046608448028564, "learning_rate": 7.628189550425274e-05, "loss": 1.7283, "step": 3914 }, { "epoch": 1.188344210047048, "grad_norm": 0.4386448562145233, "learning_rate": 7.627582017010936e-05, "loss": 1.7318, "step": 3915 }, { "epoch": 1.1886477462437395, "grad_norm": 0.46474358439445496, "learning_rate": 7.626974483596599e-05, "loss": 1.8927, "step": 3916 }, { "epoch": 1.188951282440431, "grad_norm": 0.5807692408561707, "learning_rate": 7.62636695018226e-05, "loss": 1.6145, "step": 3917 }, { "epoch": 1.1892548186371226, "grad_norm": 0.4750295579433441, "learning_rate": 7.625759416767922e-05, "loss": 1.4997, "step": 3918 }, { "epoch": 1.189558354833814, "grad_norm": 0.5042990446090698, "learning_rate": 7.625151883353585e-05, "loss": 1.8107, "step": 3919 }, { "epoch": 1.1898618910305054, "grad_norm": 0.48407718539237976, "learning_rate": 7.624544349939247e-05, "loss": 1.4194, "step": 3920 }, { "epoch": 1.1901654272271969, "grad_norm": 0.5130017399787903, "learning_rate": 7.623936816524909e-05, "loss": 1.4494, "step": 3921 }, { "epoch": 1.1904689634238883, "grad_norm": 0.46143487095832825, "learning_rate": 7.623329283110572e-05, "loss": 1.5981, "step": 3922 }, { "epoch": 1.1907724996205797, "grad_norm": 0.8400600552558899, "learning_rate": 7.622721749696234e-05, "loss": 1.2068, "step": 3923 }, { "epoch": 1.1910760358172712, "grad_norm": 0.5218877792358398, "learning_rate": 7.622114216281895e-05, "loss": 1.6137, "step": 3924 }, { "epoch": 1.1913795720139626, "grad_norm": 0.4495093822479248, "learning_rate": 7.621506682867558e-05, "loss": 1.6691, "step": 3925 }, { "epoch": 1.191683108210654, "grad_norm": 0.49023687839508057, "learning_rate": 7.62089914945322e-05, "loss": 1.6207, "step": 3926 }, { "epoch": 1.1919866444073457, "grad_norm": 0.5561721324920654, "learning_rate": 7.620291616038883e-05, "loss": 1.4456, "step": 3927 }, { "epoch": 1.192290180604037, "grad_norm": 0.5107851028442383, "learning_rate": 7.619684082624545e-05, "loss": 1.78, "step": 3928 }, { "epoch": 1.1925937168007286, "grad_norm": 0.5281449556350708, "learning_rate": 7.619076549210207e-05, "loss": 1.9445, "step": 3929 }, { "epoch": 1.19289725299742, "grad_norm": 0.5192548036575317, "learning_rate": 7.61846901579587e-05, "loss": 1.6706, "step": 3930 }, { "epoch": 1.1932007891941114, "grad_norm": 0.5205463171005249, "learning_rate": 7.617861482381531e-05, "loss": 1.6415, "step": 3931 }, { "epoch": 1.1935043253908029, "grad_norm": 0.4953666925430298, "learning_rate": 7.617253948967193e-05, "loss": 1.6263, "step": 3932 }, { "epoch": 1.1938078615874943, "grad_norm": 0.49807044863700867, "learning_rate": 7.616646415552856e-05, "loss": 1.6871, "step": 3933 }, { "epoch": 1.1941113977841857, "grad_norm": 0.8351933360099792, "learning_rate": 7.616038882138518e-05, "loss": 1.0512, "step": 3934 }, { "epoch": 1.1944149339808772, "grad_norm": 0.4444892406463623, "learning_rate": 7.61543134872418e-05, "loss": 1.6981, "step": 3935 }, { "epoch": 1.1947184701775686, "grad_norm": 0.46068355441093445, "learning_rate": 7.614823815309843e-05, "loss": 1.8101, "step": 3936 }, { "epoch": 1.19502200637426, "grad_norm": 0.6468572616577148, "learning_rate": 7.614216281895505e-05, "loss": 1.411, "step": 3937 }, { "epoch": 1.1953255425709517, "grad_norm": 0.5605432391166687, "learning_rate": 7.613608748481166e-05, "loss": 1.1574, "step": 3938 }, { "epoch": 1.1956290787676431, "grad_norm": 0.4770459532737732, "learning_rate": 7.61300121506683e-05, "loss": 1.7571, "step": 3939 }, { "epoch": 1.1959326149643346, "grad_norm": 0.5230698585510254, "learning_rate": 7.612393681652491e-05, "loss": 1.6551, "step": 3940 }, { "epoch": 1.196236151161026, "grad_norm": 0.7350290417671204, "learning_rate": 7.611786148238154e-05, "loss": 1.6698, "step": 3941 }, { "epoch": 1.1965396873577174, "grad_norm": 0.5905072689056396, "learning_rate": 7.611178614823816e-05, "loss": 1.7354, "step": 3942 }, { "epoch": 1.1968432235544089, "grad_norm": 0.5296047329902649, "learning_rate": 7.610571081409478e-05, "loss": 1.7228, "step": 3943 }, { "epoch": 1.1971467597511003, "grad_norm": 0.49780750274658203, "learning_rate": 7.609963547995141e-05, "loss": 1.2224, "step": 3944 }, { "epoch": 1.1974502959477917, "grad_norm": 0.4543820917606354, "learning_rate": 7.609356014580802e-05, "loss": 1.6028, "step": 3945 }, { "epoch": 1.1977538321444832, "grad_norm": 0.4909208118915558, "learning_rate": 7.608748481166464e-05, "loss": 1.9409, "step": 3946 }, { "epoch": 1.1980573683411746, "grad_norm": 0.47982802987098694, "learning_rate": 7.608140947752127e-05, "loss": 1.8759, "step": 3947 }, { "epoch": 1.198360904537866, "grad_norm": 1.0359922647476196, "learning_rate": 7.607533414337789e-05, "loss": 1.2115, "step": 3948 }, { "epoch": 1.1986644407345577, "grad_norm": 0.5492017865180969, "learning_rate": 7.606925880923451e-05, "loss": 1.5465, "step": 3949 }, { "epoch": 1.1989679769312491, "grad_norm": 0.3987594544887543, "learning_rate": 7.606318347509114e-05, "loss": 1.6226, "step": 3950 }, { "epoch": 1.1992715131279406, "grad_norm": 0.4642569422721863, "learning_rate": 7.605710814094776e-05, "loss": 1.9385, "step": 3951 }, { "epoch": 1.199575049324632, "grad_norm": 0.4473128914833069, "learning_rate": 7.605103280680437e-05, "loss": 1.7057, "step": 3952 }, { "epoch": 1.1998785855213234, "grad_norm": 0.4777715504169464, "learning_rate": 7.6044957472661e-05, "loss": 1.3958, "step": 3953 }, { "epoch": 1.2001821217180149, "grad_norm": 0.553566575050354, "learning_rate": 7.603888213851762e-05, "loss": 1.649, "step": 3954 }, { "epoch": 1.2004856579147063, "grad_norm": 0.4801444411277771, "learning_rate": 7.603280680437425e-05, "loss": 1.7801, "step": 3955 }, { "epoch": 1.2007891941113977, "grad_norm": 0.4610240161418915, "learning_rate": 7.602673147023086e-05, "loss": 1.9021, "step": 3956 }, { "epoch": 1.2010927303080892, "grad_norm": 0.48058998584747314, "learning_rate": 7.602065613608749e-05, "loss": 1.0347, "step": 3957 }, { "epoch": 1.2013962665047808, "grad_norm": 0.5109126567840576, "learning_rate": 7.601458080194412e-05, "loss": 1.6481, "step": 3958 }, { "epoch": 1.201699802701472, "grad_norm": 0.5463404655456543, "learning_rate": 7.600850546780073e-05, "loss": 1.4141, "step": 3959 }, { "epoch": 1.2020033388981637, "grad_norm": 0.46814149618148804, "learning_rate": 7.600243013365735e-05, "loss": 1.7893, "step": 3960 }, { "epoch": 1.2023068750948551, "grad_norm": 0.5103051066398621, "learning_rate": 7.599635479951398e-05, "loss": 1.533, "step": 3961 }, { "epoch": 1.2026104112915466, "grad_norm": 0.4707978665828705, "learning_rate": 7.59902794653706e-05, "loss": 1.3515, "step": 3962 }, { "epoch": 1.202913947488238, "grad_norm": 1.0241955518722534, "learning_rate": 7.598420413122722e-05, "loss": 1.4342, "step": 3963 }, { "epoch": 1.2032174836849294, "grad_norm": 0.4893020689487457, "learning_rate": 7.597812879708385e-05, "loss": 1.8932, "step": 3964 }, { "epoch": 1.2035210198816209, "grad_norm": 0.47995486855506897, "learning_rate": 7.597205346294047e-05, "loss": 1.5502, "step": 3965 }, { "epoch": 1.2038245560783123, "grad_norm": 0.5193171501159668, "learning_rate": 7.596597812879708e-05, "loss": 1.6352, "step": 3966 }, { "epoch": 1.2041280922750037, "grad_norm": 0.5245213508605957, "learning_rate": 7.595990279465371e-05, "loss": 1.4066, "step": 3967 }, { "epoch": 1.2044316284716952, "grad_norm": 0.578769326210022, "learning_rate": 7.595382746051033e-05, "loss": 1.7556, "step": 3968 }, { "epoch": 1.2047351646683868, "grad_norm": 0.4418366253376007, "learning_rate": 7.594775212636696e-05, "loss": 0.9755, "step": 3969 }, { "epoch": 1.2050387008650782, "grad_norm": 0.5069161653518677, "learning_rate": 7.594167679222357e-05, "loss": 1.7738, "step": 3970 }, { "epoch": 1.2053422370617697, "grad_norm": 0.6108199954032898, "learning_rate": 7.59356014580802e-05, "loss": 1.6229, "step": 3971 }, { "epoch": 1.2056457732584611, "grad_norm": 0.4657975137233734, "learning_rate": 7.592952612393683e-05, "loss": 1.7213, "step": 3972 }, { "epoch": 1.2059493094551526, "grad_norm": 0.4727039337158203, "learning_rate": 7.592345078979343e-05, "loss": 1.6633, "step": 3973 }, { "epoch": 1.206252845651844, "grad_norm": 0.48204702138900757, "learning_rate": 7.591737545565006e-05, "loss": 1.7443, "step": 3974 }, { "epoch": 1.2065563818485354, "grad_norm": 0.4971252977848053, "learning_rate": 7.591130012150669e-05, "loss": 1.8952, "step": 3975 }, { "epoch": 1.2068599180452269, "grad_norm": 0.5344823002815247, "learning_rate": 7.590522478736331e-05, "loss": 1.402, "step": 3976 }, { "epoch": 1.2071634542419183, "grad_norm": 0.5859917402267456, "learning_rate": 7.589914945321993e-05, "loss": 1.8069, "step": 3977 }, { "epoch": 1.2074669904386097, "grad_norm": 0.5768531560897827, "learning_rate": 7.589307411907656e-05, "loss": 1.4101, "step": 3978 }, { "epoch": 1.2077705266353012, "grad_norm": 0.4760257303714752, "learning_rate": 7.588699878493318e-05, "loss": 1.6148, "step": 3979 }, { "epoch": 1.2080740628319928, "grad_norm": 0.5474233031272888, "learning_rate": 7.588092345078979e-05, "loss": 1.4744, "step": 3980 }, { "epoch": 1.2083775990286842, "grad_norm": 1.7555142641067505, "learning_rate": 7.587484811664642e-05, "loss": 1.6162, "step": 3981 }, { "epoch": 1.2086811352253757, "grad_norm": 0.4019928574562073, "learning_rate": 7.586877278250304e-05, "loss": 1.1217, "step": 3982 }, { "epoch": 1.2089846714220671, "grad_norm": 0.5030451416969299, "learning_rate": 7.586269744835967e-05, "loss": 1.7434, "step": 3983 }, { "epoch": 1.2092882076187585, "grad_norm": 0.4046245813369751, "learning_rate": 7.585662211421628e-05, "loss": 1.3424, "step": 3984 }, { "epoch": 1.20959174381545, "grad_norm": 0.5747511386871338, "learning_rate": 7.58505467800729e-05, "loss": 1.7236, "step": 3985 }, { "epoch": 1.2098952800121414, "grad_norm": 0.505330502986908, "learning_rate": 7.584447144592954e-05, "loss": 1.1954, "step": 3986 }, { "epoch": 1.2101988162088329, "grad_norm": 0.4753364026546478, "learning_rate": 7.583839611178614e-05, "loss": 1.6601, "step": 3987 }, { "epoch": 1.2105023524055243, "grad_norm": 0.45532703399658203, "learning_rate": 7.583232077764277e-05, "loss": 1.6707, "step": 3988 }, { "epoch": 1.2108058886022157, "grad_norm": 0.5530490875244141, "learning_rate": 7.58262454434994e-05, "loss": 1.5897, "step": 3989 }, { "epoch": 1.2111094247989072, "grad_norm": 0.47323623299598694, "learning_rate": 7.582017010935602e-05, "loss": 1.7492, "step": 3990 }, { "epoch": 1.2114129609955988, "grad_norm": 0.5553590655326843, "learning_rate": 7.581409477521264e-05, "loss": 1.4958, "step": 3991 }, { "epoch": 1.2117164971922902, "grad_norm": 0.4471113979816437, "learning_rate": 7.580801944106927e-05, "loss": 1.8111, "step": 3992 }, { "epoch": 1.2120200333889817, "grad_norm": 0.5062560439109802, "learning_rate": 7.580194410692589e-05, "loss": 1.6767, "step": 3993 }, { "epoch": 1.212323569585673, "grad_norm": 0.5048001408576965, "learning_rate": 7.57958687727825e-05, "loss": 1.9827, "step": 3994 }, { "epoch": 1.2126271057823645, "grad_norm": 0.42375367879867554, "learning_rate": 7.578979343863913e-05, "loss": 1.2515, "step": 3995 }, { "epoch": 1.212930641979056, "grad_norm": 0.5349414348602295, "learning_rate": 7.578371810449575e-05, "loss": 1.7492, "step": 3996 }, { "epoch": 1.2132341781757474, "grad_norm": 0.45927226543426514, "learning_rate": 7.577764277035237e-05, "loss": 1.5423, "step": 3997 }, { "epoch": 1.2135377143724388, "grad_norm": 0.6156039237976074, "learning_rate": 7.577156743620899e-05, "loss": 1.3404, "step": 3998 }, { "epoch": 1.2138412505691303, "grad_norm": 1.3208363056182861, "learning_rate": 7.576549210206562e-05, "loss": 1.2658, "step": 3999 }, { "epoch": 1.214144786765822, "grad_norm": 0.5266988277435303, "learning_rate": 7.575941676792225e-05, "loss": 1.6878, "step": 4000 }, { "epoch": 1.2144483229625134, "grad_norm": 0.463829904794693, "learning_rate": 7.575334143377885e-05, "loss": 1.6529, "step": 4001 }, { "epoch": 1.2147518591592048, "grad_norm": 0.4354858994483948, "learning_rate": 7.574726609963548e-05, "loss": 1.9436, "step": 4002 }, { "epoch": 1.2150553953558962, "grad_norm": 0.889329731464386, "learning_rate": 7.574119076549211e-05, "loss": 0.7387, "step": 4003 }, { "epoch": 1.2153589315525877, "grad_norm": 0.9312804341316223, "learning_rate": 7.573511543134873e-05, "loss": 1.9534, "step": 4004 }, { "epoch": 1.215662467749279, "grad_norm": 0.5896217823028564, "learning_rate": 7.572904009720535e-05, "loss": 1.8036, "step": 4005 }, { "epoch": 1.2159660039459705, "grad_norm": 0.5803027153015137, "learning_rate": 7.572296476306198e-05, "loss": 1.7561, "step": 4006 }, { "epoch": 1.216269540142662, "grad_norm": 0.4633532166481018, "learning_rate": 7.57168894289186e-05, "loss": 2.0534, "step": 4007 }, { "epoch": 1.2165730763393534, "grad_norm": 0.4791993200778961, "learning_rate": 7.571081409477521e-05, "loss": 1.8331, "step": 4008 }, { "epoch": 1.2168766125360448, "grad_norm": 0.48696058988571167, "learning_rate": 7.570473876063184e-05, "loss": 1.6295, "step": 4009 }, { "epoch": 1.2171801487327363, "grad_norm": 0.433896005153656, "learning_rate": 7.569866342648846e-05, "loss": 1.2841, "step": 4010 }, { "epoch": 1.217483684929428, "grad_norm": 0.5070579648017883, "learning_rate": 7.569258809234508e-05, "loss": 1.7859, "step": 4011 }, { "epoch": 1.2177872211261194, "grad_norm": 0.5580217838287354, "learning_rate": 7.56865127582017e-05, "loss": 1.5646, "step": 4012 }, { "epoch": 1.2180907573228108, "grad_norm": 0.594814121723175, "learning_rate": 7.568043742405833e-05, "loss": 1.8118, "step": 4013 }, { "epoch": 1.2183942935195022, "grad_norm": 0.5270577669143677, "learning_rate": 7.567436208991496e-05, "loss": 1.8065, "step": 4014 }, { "epoch": 1.2186978297161937, "grad_norm": 0.6173904538154602, "learning_rate": 7.566828675577156e-05, "loss": 1.7159, "step": 4015 }, { "epoch": 1.219001365912885, "grad_norm": 0.6258283257484436, "learning_rate": 7.566221142162819e-05, "loss": 1.1892, "step": 4016 }, { "epoch": 1.2193049021095765, "grad_norm": 0.42736998200416565, "learning_rate": 7.565613608748482e-05, "loss": 1.3273, "step": 4017 }, { "epoch": 1.219608438306268, "grad_norm": 0.5543321371078491, "learning_rate": 7.565006075334144e-05, "loss": 1.4035, "step": 4018 }, { "epoch": 1.2199119745029594, "grad_norm": 0.5516905188560486, "learning_rate": 7.564398541919806e-05, "loss": 1.5628, "step": 4019 }, { "epoch": 1.2202155106996508, "grad_norm": 0.4710666537284851, "learning_rate": 7.563791008505469e-05, "loss": 1.664, "step": 4020 }, { "epoch": 1.2205190468963423, "grad_norm": 0.5181185007095337, "learning_rate": 7.56318347509113e-05, "loss": 1.7942, "step": 4021 }, { "epoch": 1.220822583093034, "grad_norm": 0.4957810044288635, "learning_rate": 7.562575941676792e-05, "loss": 1.899, "step": 4022 }, { "epoch": 1.2211261192897254, "grad_norm": 0.5114620923995972, "learning_rate": 7.561968408262455e-05, "loss": 1.5602, "step": 4023 }, { "epoch": 1.2214296554864168, "grad_norm": 0.4717017710208893, "learning_rate": 7.561360874848117e-05, "loss": 1.4065, "step": 4024 }, { "epoch": 1.2217331916831082, "grad_norm": 0.5395920872688293, "learning_rate": 7.560753341433779e-05, "loss": 1.8311, "step": 4025 }, { "epoch": 1.2220367278797997, "grad_norm": 0.47249385714530945, "learning_rate": 7.56014580801944e-05, "loss": 1.9296, "step": 4026 }, { "epoch": 1.222340264076491, "grad_norm": 0.4514206051826477, "learning_rate": 7.559538274605104e-05, "loss": 1.2713, "step": 4027 }, { "epoch": 1.2226438002731825, "grad_norm": 0.9952641129493713, "learning_rate": 7.558930741190767e-05, "loss": 1.5778, "step": 4028 }, { "epoch": 1.222947336469874, "grad_norm": 0.6609991192817688, "learning_rate": 7.558323207776427e-05, "loss": 1.9552, "step": 4029 }, { "epoch": 1.2232508726665654, "grad_norm": 0.5197901129722595, "learning_rate": 7.55771567436209e-05, "loss": 1.7676, "step": 4030 }, { "epoch": 1.223554408863257, "grad_norm": 0.5448186993598938, "learning_rate": 7.557108140947753e-05, "loss": 1.4093, "step": 4031 }, { "epoch": 1.2238579450599485, "grad_norm": 0.4948391318321228, "learning_rate": 7.556500607533415e-05, "loss": 1.8091, "step": 4032 }, { "epoch": 1.22416148125664, "grad_norm": 0.6797294020652771, "learning_rate": 7.555893074119077e-05, "loss": 1.8645, "step": 4033 }, { "epoch": 1.2244650174533314, "grad_norm": 0.3642887771129608, "learning_rate": 7.55528554070474e-05, "loss": 1.4542, "step": 4034 }, { "epoch": 1.2247685536500228, "grad_norm": 0.5181020498275757, "learning_rate": 7.554678007290402e-05, "loss": 1.791, "step": 4035 }, { "epoch": 1.2250720898467142, "grad_norm": 0.5388804078102112, "learning_rate": 7.554070473876063e-05, "loss": 1.9918, "step": 4036 }, { "epoch": 1.2253756260434057, "grad_norm": 0.4962565302848816, "learning_rate": 7.553462940461726e-05, "loss": 1.6094, "step": 4037 }, { "epoch": 1.225679162240097, "grad_norm": 0.5955974459648132, "learning_rate": 7.552855407047388e-05, "loss": 1.401, "step": 4038 }, { "epoch": 1.2259826984367885, "grad_norm": 0.5586566925048828, "learning_rate": 7.55224787363305e-05, "loss": 1.7028, "step": 4039 }, { "epoch": 1.22628623463348, "grad_norm": 0.5239779949188232, "learning_rate": 7.551640340218712e-05, "loss": 2.0994, "step": 4040 }, { "epoch": 1.2265897708301714, "grad_norm": 0.5144902467727661, "learning_rate": 7.551032806804375e-05, "loss": 1.5493, "step": 4041 }, { "epoch": 1.226893307026863, "grad_norm": 0.4962595999240875, "learning_rate": 7.550425273390038e-05, "loss": 1.7629, "step": 4042 }, { "epoch": 1.2271968432235545, "grad_norm": 0.5348252058029175, "learning_rate": 7.549817739975698e-05, "loss": 1.7287, "step": 4043 }, { "epoch": 1.227500379420246, "grad_norm": 0.485503613948822, "learning_rate": 7.549210206561361e-05, "loss": 1.7518, "step": 4044 }, { "epoch": 1.2278039156169374, "grad_norm": 0.46359485387802124, "learning_rate": 7.548602673147024e-05, "loss": 1.7105, "step": 4045 }, { "epoch": 1.2281074518136288, "grad_norm": 0.5181253552436829, "learning_rate": 7.547995139732685e-05, "loss": 1.4408, "step": 4046 }, { "epoch": 1.2284109880103202, "grad_norm": 0.5859808921813965, "learning_rate": 7.547387606318348e-05, "loss": 1.8802, "step": 4047 }, { "epoch": 1.2287145242070117, "grad_norm": 0.7627731561660767, "learning_rate": 7.546780072904011e-05, "loss": 1.6918, "step": 4048 }, { "epoch": 1.229018060403703, "grad_norm": 0.4201076626777649, "learning_rate": 7.546172539489673e-05, "loss": 1.6491, "step": 4049 }, { "epoch": 1.2293215966003945, "grad_norm": 0.6305765509605408, "learning_rate": 7.545565006075334e-05, "loss": 0.5473, "step": 4050 }, { "epoch": 1.229625132797086, "grad_norm": 0.519670307636261, "learning_rate": 7.544957472660996e-05, "loss": 1.6919, "step": 4051 }, { "epoch": 1.2299286689937774, "grad_norm": 0.5410406589508057, "learning_rate": 7.544349939246659e-05, "loss": 0.9771, "step": 4052 }, { "epoch": 1.230232205190469, "grad_norm": 0.489894837141037, "learning_rate": 7.543742405832321e-05, "loss": 1.5552, "step": 4053 }, { "epoch": 1.2305357413871605, "grad_norm": 0.8147485256195068, "learning_rate": 7.543134872417983e-05, "loss": 1.1342, "step": 4054 }, { "epoch": 1.230839277583852, "grad_norm": 0.5854855179786682, "learning_rate": 7.542527339003646e-05, "loss": 1.2235, "step": 4055 }, { "epoch": 1.2311428137805434, "grad_norm": 0.597017228603363, "learning_rate": 7.541919805589309e-05, "loss": 1.7028, "step": 4056 }, { "epoch": 1.2314463499772348, "grad_norm": 0.5282664895057678, "learning_rate": 7.541312272174969e-05, "loss": 1.1727, "step": 4057 }, { "epoch": 1.2317498861739262, "grad_norm": 0.5463590621948242, "learning_rate": 7.540704738760632e-05, "loss": 1.7269, "step": 4058 }, { "epoch": 1.2320534223706177, "grad_norm": 0.5592811107635498, "learning_rate": 7.540097205346295e-05, "loss": 1.2984, "step": 4059 }, { "epoch": 1.232356958567309, "grad_norm": 0.58669513463974, "learning_rate": 7.539489671931956e-05, "loss": 1.585, "step": 4060 }, { "epoch": 1.2326604947640005, "grad_norm": 0.5241060853004456, "learning_rate": 7.538882138517619e-05, "loss": 1.2848, "step": 4061 }, { "epoch": 1.2329640309606922, "grad_norm": 0.45810186862945557, "learning_rate": 7.538274605103282e-05, "loss": 1.4243, "step": 4062 }, { "epoch": 1.2332675671573834, "grad_norm": 0.468780517578125, "learning_rate": 7.537667071688944e-05, "loss": 1.3984, "step": 4063 }, { "epoch": 1.233571103354075, "grad_norm": 0.46137523651123047, "learning_rate": 7.537059538274605e-05, "loss": 1.9646, "step": 4064 }, { "epoch": 1.2338746395507665, "grad_norm": 0.6160327196121216, "learning_rate": 7.536452004860267e-05, "loss": 1.7623, "step": 4065 }, { "epoch": 1.234178175747458, "grad_norm": 0.5266595482826233, "learning_rate": 7.53584447144593e-05, "loss": 1.7096, "step": 4066 }, { "epoch": 1.2344817119441494, "grad_norm": 0.5507217645645142, "learning_rate": 7.535236938031592e-05, "loss": 1.1104, "step": 4067 }, { "epoch": 1.2347852481408408, "grad_norm": 0.5340211987495422, "learning_rate": 7.534629404617254e-05, "loss": 1.6101, "step": 4068 }, { "epoch": 1.2350887843375322, "grad_norm": 0.6032447218894958, "learning_rate": 7.534021871202917e-05, "loss": 1.4905, "step": 4069 }, { "epoch": 1.2353923205342237, "grad_norm": 0.5784475207328796, "learning_rate": 7.533414337788578e-05, "loss": 1.2617, "step": 4070 }, { "epoch": 1.235695856730915, "grad_norm": 0.6673290133476257, "learning_rate": 7.53280680437424e-05, "loss": 1.3954, "step": 4071 }, { "epoch": 1.2359993929276065, "grad_norm": 0.5767869353294373, "learning_rate": 7.532199270959903e-05, "loss": 1.6808, "step": 4072 }, { "epoch": 1.2363029291242982, "grad_norm": 0.5009193420410156, "learning_rate": 7.531591737545566e-05, "loss": 1.7008, "step": 4073 }, { "epoch": 1.2366064653209896, "grad_norm": 0.4754045903682709, "learning_rate": 7.530984204131227e-05, "loss": 1.7596, "step": 4074 }, { "epoch": 1.236910001517681, "grad_norm": 0.620161235332489, "learning_rate": 7.53037667071689e-05, "loss": 1.4089, "step": 4075 }, { "epoch": 1.2372135377143725, "grad_norm": 0.34108883142471313, "learning_rate": 7.529769137302553e-05, "loss": 1.2953, "step": 4076 }, { "epoch": 1.237517073911064, "grad_norm": 0.9694557189941406, "learning_rate": 7.529161603888215e-05, "loss": 1.3095, "step": 4077 }, { "epoch": 1.2378206101077553, "grad_norm": 0.5211403965950012, "learning_rate": 7.528554070473876e-05, "loss": 1.7275, "step": 4078 }, { "epoch": 1.2381241463044468, "grad_norm": 0.4836254119873047, "learning_rate": 7.527946537059538e-05, "loss": 1.8843, "step": 4079 }, { "epoch": 1.2384276825011382, "grad_norm": 0.4383199214935303, "learning_rate": 7.527339003645201e-05, "loss": 1.7991, "step": 4080 }, { "epoch": 1.2387312186978297, "grad_norm": 0.5982313752174377, "learning_rate": 7.526731470230863e-05, "loss": 1.4946, "step": 4081 }, { "epoch": 1.239034754894521, "grad_norm": 0.5452768802642822, "learning_rate": 7.526123936816525e-05, "loss": 1.5868, "step": 4082 }, { "epoch": 1.2393382910912125, "grad_norm": 0.5064995288848877, "learning_rate": 7.525516403402188e-05, "loss": 1.7856, "step": 4083 }, { "epoch": 1.2396418272879042, "grad_norm": 0.5409367084503174, "learning_rate": 7.52490886998785e-05, "loss": 1.7328, "step": 4084 }, { "epoch": 1.2399453634845956, "grad_norm": 0.5907915830612183, "learning_rate": 7.524301336573511e-05, "loss": 1.2613, "step": 4085 }, { "epoch": 1.240248899681287, "grad_norm": 0.42374351620674133, "learning_rate": 7.523693803159174e-05, "loss": 1.952, "step": 4086 }, { "epoch": 1.2405524358779785, "grad_norm": 0.5498590469360352, "learning_rate": 7.523086269744837e-05, "loss": 1.7781, "step": 4087 }, { "epoch": 1.24085597207467, "grad_norm": 0.5030960440635681, "learning_rate": 7.522478736330498e-05, "loss": 1.3881, "step": 4088 }, { "epoch": 1.2411595082713613, "grad_norm": 0.5195364356040955, "learning_rate": 7.521871202916161e-05, "loss": 1.7551, "step": 4089 }, { "epoch": 1.2414630444680528, "grad_norm": 0.5500257611274719, "learning_rate": 7.521263669501824e-05, "loss": 1.316, "step": 4090 }, { "epoch": 1.2417665806647442, "grad_norm": 0.5093240737915039, "learning_rate": 7.520656136087486e-05, "loss": 1.3301, "step": 4091 }, { "epoch": 1.2420701168614356, "grad_norm": 0.6616035103797913, "learning_rate": 7.520048602673147e-05, "loss": 1.9355, "step": 4092 }, { "epoch": 1.2423736530581273, "grad_norm": 0.5518209338188171, "learning_rate": 7.519441069258809e-05, "loss": 1.7574, "step": 4093 }, { "epoch": 1.2426771892548185, "grad_norm": 0.4014967978000641, "learning_rate": 7.518833535844472e-05, "loss": 1.8058, "step": 4094 }, { "epoch": 1.2429807254515102, "grad_norm": 0.4472224712371826, "learning_rate": 7.518226002430134e-05, "loss": 1.6429, "step": 4095 }, { "epoch": 1.2432842616482016, "grad_norm": 0.5114421248435974, "learning_rate": 7.517618469015796e-05, "loss": 1.6209, "step": 4096 }, { "epoch": 1.243587797844893, "grad_norm": 0.5454509854316711, "learning_rate": 7.517010935601459e-05, "loss": 1.7061, "step": 4097 }, { "epoch": 1.2438913340415845, "grad_norm": 0.41268596053123474, "learning_rate": 7.51640340218712e-05, "loss": 1.9439, "step": 4098 }, { "epoch": 1.244194870238276, "grad_norm": 0.5628203749656677, "learning_rate": 7.515795868772782e-05, "loss": 1.8348, "step": 4099 }, { "epoch": 1.2444984064349673, "grad_norm": 0.7100119590759277, "learning_rate": 7.515188335358445e-05, "loss": 1.764, "step": 4100 }, { "epoch": 1.2448019426316588, "grad_norm": 0.4779648184776306, "learning_rate": 7.514580801944108e-05, "loss": 1.7384, "step": 4101 }, { "epoch": 1.2451054788283502, "grad_norm": 0.42757317423820496, "learning_rate": 7.513973268529769e-05, "loss": 1.3117, "step": 4102 }, { "epoch": 1.2454090150250416, "grad_norm": 0.6299741268157959, "learning_rate": 7.513365735115432e-05, "loss": 1.5541, "step": 4103 }, { "epoch": 1.2457125512217333, "grad_norm": 0.5417861938476562, "learning_rate": 7.512758201701095e-05, "loss": 1.7433, "step": 4104 }, { "epoch": 1.2460160874184247, "grad_norm": 0.504732072353363, "learning_rate": 7.512150668286757e-05, "loss": 1.6971, "step": 4105 }, { "epoch": 1.2463196236151162, "grad_norm": 1.007019281387329, "learning_rate": 7.511543134872418e-05, "loss": 1.4254, "step": 4106 }, { "epoch": 1.2466231598118076, "grad_norm": 0.7208684086799622, "learning_rate": 7.51093560145808e-05, "loss": 1.8213, "step": 4107 }, { "epoch": 1.246926696008499, "grad_norm": 0.5569952726364136, "learning_rate": 7.510328068043743e-05, "loss": 1.8765, "step": 4108 }, { "epoch": 1.2472302322051905, "grad_norm": 0.5467456579208374, "learning_rate": 7.509720534629405e-05, "loss": 1.6964, "step": 4109 }, { "epoch": 1.247533768401882, "grad_norm": 0.6151578426361084, "learning_rate": 7.509113001215067e-05, "loss": 1.8341, "step": 4110 }, { "epoch": 1.2478373045985733, "grad_norm": 0.5277178883552551, "learning_rate": 7.50850546780073e-05, "loss": 1.6909, "step": 4111 }, { "epoch": 1.2481408407952648, "grad_norm": 0.5101326107978821, "learning_rate": 7.507897934386391e-05, "loss": 1.6674, "step": 4112 }, { "epoch": 1.2484443769919562, "grad_norm": 0.49837616086006165, "learning_rate": 7.507290400972053e-05, "loss": 1.6809, "step": 4113 }, { "epoch": 1.2487479131886476, "grad_norm": 0.46518057584762573, "learning_rate": 7.506682867557716e-05, "loss": 1.3971, "step": 4114 }, { "epoch": 1.2490514493853393, "grad_norm": 0.6974937915802002, "learning_rate": 7.50607533414338e-05, "loss": 1.4269, "step": 4115 }, { "epoch": 1.2493549855820307, "grad_norm": 0.5411034822463989, "learning_rate": 7.50546780072904e-05, "loss": 1.5861, "step": 4116 }, { "epoch": 1.2496585217787222, "grad_norm": 0.5407208204269409, "learning_rate": 7.504860267314703e-05, "loss": 1.6395, "step": 4117 }, { "epoch": 1.2499620579754136, "grad_norm": 0.4818359911441803, "learning_rate": 7.504252733900366e-05, "loss": 1.2755, "step": 4118 }, { "epoch": 1.250265594172105, "grad_norm": 0.5112556219100952, "learning_rate": 7.503645200486028e-05, "loss": 1.4617, "step": 4119 }, { "epoch": 1.2505691303687965, "grad_norm": 0.48120614886283875, "learning_rate": 7.50303766707169e-05, "loss": 1.8576, "step": 4120 }, { "epoch": 1.250872666565488, "grad_norm": 0.5144551992416382, "learning_rate": 7.502430133657351e-05, "loss": 1.4099, "step": 4121 }, { "epoch": 1.2511762027621793, "grad_norm": 0.5238993763923645, "learning_rate": 7.501822600243014e-05, "loss": 0.8086, "step": 4122 }, { "epoch": 1.2514797389588708, "grad_norm": 0.5506192445755005, "learning_rate": 7.501215066828676e-05, "loss": 1.7507, "step": 4123 }, { "epoch": 1.2517832751555624, "grad_norm": 0.5206360816955566, "learning_rate": 7.500607533414338e-05, "loss": 1.3207, "step": 4124 }, { "epoch": 1.2520868113522536, "grad_norm": 0.5082906484603882, "learning_rate": 7.500000000000001e-05, "loss": 1.6108, "step": 4125 }, { "epoch": 1.2523903475489453, "grad_norm": 0.8926411271095276, "learning_rate": 7.499392466585662e-05, "loss": 1.0148, "step": 4126 }, { "epoch": 1.2526938837456367, "grad_norm": 0.4711840748786926, "learning_rate": 7.498784933171324e-05, "loss": 1.6824, "step": 4127 }, { "epoch": 1.2529974199423282, "grad_norm": 0.5053786039352417, "learning_rate": 7.498177399756987e-05, "loss": 1.7507, "step": 4128 }, { "epoch": 1.2533009561390196, "grad_norm": 0.4618197977542877, "learning_rate": 7.49756986634265e-05, "loss": 1.1271, "step": 4129 }, { "epoch": 1.253604492335711, "grad_norm": 0.42752858996391296, "learning_rate": 7.496962332928311e-05, "loss": 1.8872, "step": 4130 }, { "epoch": 1.2539080285324025, "grad_norm": 0.4735817313194275, "learning_rate": 7.496354799513974e-05, "loss": 1.6949, "step": 4131 }, { "epoch": 1.254211564729094, "grad_norm": 0.5096668004989624, "learning_rate": 7.495747266099636e-05, "loss": 0.8228, "step": 4132 }, { "epoch": 1.2545151009257853, "grad_norm": 0.6059778332710266, "learning_rate": 7.495139732685297e-05, "loss": 1.4917, "step": 4133 }, { "epoch": 1.2548186371224768, "grad_norm": 0.6292750835418701, "learning_rate": 7.49453219927096e-05, "loss": 1.2934, "step": 4134 }, { "epoch": 1.2551221733191684, "grad_norm": 0.5451275706291199, "learning_rate": 7.493924665856622e-05, "loss": 2.0059, "step": 4135 }, { "epoch": 1.2554257095158596, "grad_norm": 0.5799239277839661, "learning_rate": 7.493317132442285e-05, "loss": 1.4994, "step": 4136 }, { "epoch": 1.2557292457125513, "grad_norm": 0.4744884967803955, "learning_rate": 7.492709599027947e-05, "loss": 1.5334, "step": 4137 }, { "epoch": 1.2560327819092427, "grad_norm": 0.4778893291950226, "learning_rate": 7.492102065613609e-05, "loss": 1.7536, "step": 4138 }, { "epoch": 1.2563363181059342, "grad_norm": 0.5556294918060303, "learning_rate": 7.491494532199272e-05, "loss": 1.9507, "step": 4139 }, { "epoch": 1.2566398543026256, "grad_norm": 0.8392012119293213, "learning_rate": 7.490886998784933e-05, "loss": 1.1061, "step": 4140 }, { "epoch": 1.256943390499317, "grad_norm": 0.42154213786125183, "learning_rate": 7.490279465370595e-05, "loss": 1.1891, "step": 4141 }, { "epoch": 1.2572469266960085, "grad_norm": 0.46638983488082886, "learning_rate": 7.489671931956258e-05, "loss": 1.7741, "step": 4142 }, { "epoch": 1.2575504628927, "grad_norm": 0.5562369227409363, "learning_rate": 7.48906439854192e-05, "loss": 1.6542, "step": 4143 }, { "epoch": 1.2578539990893913, "grad_norm": 0.5079572796821594, "learning_rate": 7.488456865127582e-05, "loss": 1.9253, "step": 4144 }, { "epoch": 1.2581575352860828, "grad_norm": 0.8773594498634338, "learning_rate": 7.487849331713245e-05, "loss": 1.0602, "step": 4145 }, { "epoch": 1.2584610714827744, "grad_norm": 0.6297957301139832, "learning_rate": 7.487241798298907e-05, "loss": 1.4078, "step": 4146 }, { "epoch": 1.2587646076794659, "grad_norm": 0.6245468854904175, "learning_rate": 7.486634264884568e-05, "loss": 1.5408, "step": 4147 }, { "epoch": 1.2590681438761573, "grad_norm": 0.6770250201225281, "learning_rate": 7.486026731470231e-05, "loss": 1.5883, "step": 4148 }, { "epoch": 1.2593716800728487, "grad_norm": 0.571240246295929, "learning_rate": 7.485419198055893e-05, "loss": 1.851, "step": 4149 }, { "epoch": 1.2596752162695402, "grad_norm": 0.6159947514533997, "learning_rate": 7.484811664641556e-05, "loss": 1.7828, "step": 4150 }, { "epoch": 1.2599787524662316, "grad_norm": 0.4492059051990509, "learning_rate": 7.484204131227218e-05, "loss": 1.7235, "step": 4151 }, { "epoch": 1.260282288662923, "grad_norm": 1.0111632347106934, "learning_rate": 7.48359659781288e-05, "loss": 1.5976, "step": 4152 }, { "epoch": 1.2605858248596145, "grad_norm": 0.5812898278236389, "learning_rate": 7.482989064398543e-05, "loss": 1.9861, "step": 4153 }, { "epoch": 1.260889361056306, "grad_norm": 0.4427884519100189, "learning_rate": 7.482381530984204e-05, "loss": 1.7063, "step": 4154 }, { "epoch": 1.2611928972529975, "grad_norm": 0.4709545373916626, "learning_rate": 7.481773997569866e-05, "loss": 1.8049, "step": 4155 }, { "epoch": 1.2614964334496888, "grad_norm": 0.5175103545188904, "learning_rate": 7.481166464155529e-05, "loss": 1.8579, "step": 4156 }, { "epoch": 1.2617999696463804, "grad_norm": 0.6060042977333069, "learning_rate": 7.480558930741191e-05, "loss": 1.1567, "step": 4157 }, { "epoch": 1.2621035058430718, "grad_norm": 0.6093948483467102, "learning_rate": 7.479951397326853e-05, "loss": 1.2373, "step": 4158 }, { "epoch": 1.2624070420397633, "grad_norm": 0.5557296872138977, "learning_rate": 7.479343863912516e-05, "loss": 1.7976, "step": 4159 }, { "epoch": 1.2627105782364547, "grad_norm": 0.5482549667358398, "learning_rate": 7.478736330498178e-05, "loss": 1.4302, "step": 4160 }, { "epoch": 1.2630141144331462, "grad_norm": 0.5282002091407776, "learning_rate": 7.478128797083839e-05, "loss": 1.7744, "step": 4161 }, { "epoch": 1.2633176506298376, "grad_norm": 0.49145814776420593, "learning_rate": 7.477521263669502e-05, "loss": 1.2955, "step": 4162 }, { "epoch": 1.263621186826529, "grad_norm": 0.838631272315979, "learning_rate": 7.476913730255164e-05, "loss": 1.8272, "step": 4163 }, { "epoch": 1.2639247230232205, "grad_norm": 0.5460755825042725, "learning_rate": 7.476306196840827e-05, "loss": 1.6878, "step": 4164 }, { "epoch": 1.2642282592199119, "grad_norm": 0.5344128608703613, "learning_rate": 7.475698663426489e-05, "loss": 1.9606, "step": 4165 }, { "epoch": 1.2645317954166035, "grad_norm": 0.5265620350837708, "learning_rate": 7.47509113001215e-05, "loss": 1.8173, "step": 4166 }, { "epoch": 1.2648353316132948, "grad_norm": 0.5582383275032043, "learning_rate": 7.474483596597814e-05, "loss": 1.4313, "step": 4167 }, { "epoch": 1.2651388678099864, "grad_norm": 0.5039302110671997, "learning_rate": 7.473876063183475e-05, "loss": 1.6151, "step": 4168 }, { "epoch": 1.2654424040066778, "grad_norm": 0.48106664419174194, "learning_rate": 7.473268529769137e-05, "loss": 1.376, "step": 4169 }, { "epoch": 1.2657459402033693, "grad_norm": 0.7529036998748779, "learning_rate": 7.4726609963548e-05, "loss": 1.2728, "step": 4170 }, { "epoch": 1.2660494764000607, "grad_norm": 0.4341298043727875, "learning_rate": 7.472053462940462e-05, "loss": 1.1662, "step": 4171 }, { "epoch": 1.2663530125967521, "grad_norm": 0.5080009698867798, "learning_rate": 7.471445929526124e-05, "loss": 1.8927, "step": 4172 }, { "epoch": 1.2666565487934436, "grad_norm": 0.5409614443778992, "learning_rate": 7.470838396111787e-05, "loss": 1.548, "step": 4173 }, { "epoch": 1.266960084990135, "grad_norm": 0.6036258339881897, "learning_rate": 7.470230862697449e-05, "loss": 1.5646, "step": 4174 }, { "epoch": 1.2672636211868264, "grad_norm": 0.496652752161026, "learning_rate": 7.46962332928311e-05, "loss": 1.7042, "step": 4175 }, { "epoch": 1.2675671573835179, "grad_norm": 0.5276821255683899, "learning_rate": 7.469015795868773e-05, "loss": 1.4722, "step": 4176 }, { "epoch": 1.2678706935802095, "grad_norm": 0.4797695577144623, "learning_rate": 7.468408262454435e-05, "loss": 1.6569, "step": 4177 }, { "epoch": 1.268174229776901, "grad_norm": 0.5196139812469482, "learning_rate": 7.467800729040098e-05, "loss": 1.7454, "step": 4178 }, { "epoch": 1.2684777659735924, "grad_norm": 0.5565782785415649, "learning_rate": 7.46719319562576e-05, "loss": 1.6089, "step": 4179 }, { "epoch": 1.2687813021702838, "grad_norm": 0.49715206027030945, "learning_rate": 7.466585662211422e-05, "loss": 1.6336, "step": 4180 }, { "epoch": 1.2690848383669753, "grad_norm": 0.5397220849990845, "learning_rate": 7.465978128797085e-05, "loss": 1.452, "step": 4181 }, { "epoch": 1.2693883745636667, "grad_norm": 0.7954735159873962, "learning_rate": 7.465370595382746e-05, "loss": 0.973, "step": 4182 }, { "epoch": 1.2696919107603581, "grad_norm": 0.6067697405815125, "learning_rate": 7.464763061968408e-05, "loss": 1.5954, "step": 4183 }, { "epoch": 1.2699954469570496, "grad_norm": 0.5883306860923767, "learning_rate": 7.464155528554071e-05, "loss": 2.072, "step": 4184 }, { "epoch": 1.270298983153741, "grad_norm": 0.503072202205658, "learning_rate": 7.463547995139733e-05, "loss": 1.6075, "step": 4185 }, { "epoch": 1.2706025193504327, "grad_norm": 0.4737991392612457, "learning_rate": 7.462940461725395e-05, "loss": 1.8853, "step": 4186 }, { "epoch": 1.2709060555471239, "grad_norm": 0.6354210376739502, "learning_rate": 7.462332928311058e-05, "loss": 1.901, "step": 4187 }, { "epoch": 1.2712095917438155, "grad_norm": 0.5082765817642212, "learning_rate": 7.46172539489672e-05, "loss": 1.3334, "step": 4188 }, { "epoch": 1.271513127940507, "grad_norm": 0.5836446285247803, "learning_rate": 7.461117861482381e-05, "loss": 1.4225, "step": 4189 }, { "epoch": 1.2718166641371984, "grad_norm": 0.6026635766029358, "learning_rate": 7.460510328068044e-05, "loss": 1.693, "step": 4190 }, { "epoch": 1.2721202003338898, "grad_norm": 0.4391961395740509, "learning_rate": 7.459902794653706e-05, "loss": 1.5203, "step": 4191 }, { "epoch": 1.2724237365305813, "grad_norm": 0.4942375123500824, "learning_rate": 7.459295261239369e-05, "loss": 1.8957, "step": 4192 }, { "epoch": 1.2727272727272727, "grad_norm": 0.5597366094589233, "learning_rate": 7.458687727825031e-05, "loss": 1.6267, "step": 4193 }, { "epoch": 1.2730308089239641, "grad_norm": 0.49395179748535156, "learning_rate": 7.458080194410693e-05, "loss": 1.6232, "step": 4194 }, { "epoch": 1.2733343451206556, "grad_norm": 0.6147474646568298, "learning_rate": 7.457472660996356e-05, "loss": 1.1225, "step": 4195 }, { "epoch": 1.273637881317347, "grad_norm": 0.5345576405525208, "learning_rate": 7.456865127582017e-05, "loss": 1.8809, "step": 4196 }, { "epoch": 1.2739414175140387, "grad_norm": 0.5789499878883362, "learning_rate": 7.456257594167679e-05, "loss": 1.67, "step": 4197 }, { "epoch": 1.2742449537107299, "grad_norm": 0.49217602610588074, "learning_rate": 7.455650060753342e-05, "loss": 1.8027, "step": 4198 }, { "epoch": 1.2745484899074215, "grad_norm": 0.4023227095603943, "learning_rate": 7.455042527339004e-05, "loss": 1.1221, "step": 4199 }, { "epoch": 1.274852026104113, "grad_norm": 0.5066853761672974, "learning_rate": 7.454434993924666e-05, "loss": 1.3252, "step": 4200 }, { "epoch": 1.2751555623008044, "grad_norm": 0.46731558442115784, "learning_rate": 7.453827460510329e-05, "loss": 1.3056, "step": 4201 }, { "epoch": 1.2754590984974958, "grad_norm": 0.594733715057373, "learning_rate": 7.45321992709599e-05, "loss": 1.8543, "step": 4202 }, { "epoch": 1.2757626346941873, "grad_norm": 0.4806402027606964, "learning_rate": 7.452612393681652e-05, "loss": 1.8651, "step": 4203 }, { "epoch": 1.2760661708908787, "grad_norm": 0.476089745759964, "learning_rate": 7.452004860267315e-05, "loss": 1.8084, "step": 4204 }, { "epoch": 1.2763697070875701, "grad_norm": 0.4468570947647095, "learning_rate": 7.451397326852977e-05, "loss": 1.8509, "step": 4205 }, { "epoch": 1.2766732432842616, "grad_norm": 0.608421802520752, "learning_rate": 7.450789793438639e-05, "loss": 1.3824, "step": 4206 }, { "epoch": 1.276976779480953, "grad_norm": 0.517336905002594, "learning_rate": 7.450182260024302e-05, "loss": 1.7146, "step": 4207 }, { "epoch": 1.2772803156776447, "grad_norm": 0.4810909032821655, "learning_rate": 7.449574726609964e-05, "loss": 1.6262, "step": 4208 }, { "epoch": 1.277583851874336, "grad_norm": 0.5203115344047546, "learning_rate": 7.448967193195627e-05, "loss": 1.5095, "step": 4209 }, { "epoch": 1.2778873880710275, "grad_norm": 0.48836538195610046, "learning_rate": 7.448359659781288e-05, "loss": 1.076, "step": 4210 }, { "epoch": 1.278190924267719, "grad_norm": 0.49540603160858154, "learning_rate": 7.44775212636695e-05, "loss": 1.3751, "step": 4211 }, { "epoch": 1.2784944604644104, "grad_norm": 0.4693083167076111, "learning_rate": 7.447144592952613e-05, "loss": 2.007, "step": 4212 }, { "epoch": 1.2787979966611018, "grad_norm": 0.5337846875190735, "learning_rate": 7.446537059538275e-05, "loss": 1.7861, "step": 4213 }, { "epoch": 1.2791015328577933, "grad_norm": 0.5780406594276428, "learning_rate": 7.445929526123937e-05, "loss": 1.6548, "step": 4214 }, { "epoch": 1.2794050690544847, "grad_norm": 0.4388402998447418, "learning_rate": 7.4453219927096e-05, "loss": 1.8911, "step": 4215 }, { "epoch": 1.2797086052511761, "grad_norm": 0.5437458753585815, "learning_rate": 7.444714459295262e-05, "loss": 2.0187, "step": 4216 }, { "epoch": 1.2800121414478678, "grad_norm": 2.0740556716918945, "learning_rate": 7.444106925880923e-05, "loss": 1.926, "step": 4217 }, { "epoch": 1.280315677644559, "grad_norm": 0.5022751688957214, "learning_rate": 7.443499392466586e-05, "loss": 1.6786, "step": 4218 }, { "epoch": 1.2806192138412507, "grad_norm": 0.5324148535728455, "learning_rate": 7.442891859052248e-05, "loss": 1.6156, "step": 4219 }, { "epoch": 1.280922750037942, "grad_norm": 0.5735637545585632, "learning_rate": 7.44228432563791e-05, "loss": 1.4541, "step": 4220 }, { "epoch": 1.2812262862346335, "grad_norm": 0.4589940905570984, "learning_rate": 7.441676792223573e-05, "loss": 1.8461, "step": 4221 }, { "epoch": 1.281529822431325, "grad_norm": 0.44274652004241943, "learning_rate": 7.441069258809235e-05, "loss": 1.992, "step": 4222 }, { "epoch": 1.2818333586280164, "grad_norm": 0.5244073867797852, "learning_rate": 7.440461725394898e-05, "loss": 1.5408, "step": 4223 }, { "epoch": 1.2821368948247078, "grad_norm": 0.513891339302063, "learning_rate": 7.43985419198056e-05, "loss": 1.6297, "step": 4224 }, { "epoch": 1.2824404310213993, "grad_norm": 0.416079580783844, "learning_rate": 7.439246658566221e-05, "loss": 1.6186, "step": 4225 }, { "epoch": 1.2827439672180907, "grad_norm": 0.4873940944671631, "learning_rate": 7.438639125151884e-05, "loss": 1.83, "step": 4226 }, { "epoch": 1.2830475034147821, "grad_norm": 0.5091072916984558, "learning_rate": 7.438031591737546e-05, "loss": 1.4658, "step": 4227 }, { "epoch": 1.2833510396114738, "grad_norm": 0.6976317167282104, "learning_rate": 7.437424058323208e-05, "loss": 1.8133, "step": 4228 }, { "epoch": 1.283654575808165, "grad_norm": 0.5269485116004944, "learning_rate": 7.436816524908871e-05, "loss": 1.8103, "step": 4229 }, { "epoch": 1.2839581120048567, "grad_norm": 0.44276031851768494, "learning_rate": 7.436208991494533e-05, "loss": 1.8184, "step": 4230 }, { "epoch": 1.284261648201548, "grad_norm": 0.4946112632751465, "learning_rate": 7.435601458080194e-05, "loss": 1.7158, "step": 4231 }, { "epoch": 1.2845651843982395, "grad_norm": 1.0636193752288818, "learning_rate": 7.434993924665857e-05, "loss": 1.621, "step": 4232 }, { "epoch": 1.284868720594931, "grad_norm": 0.42557114362716675, "learning_rate": 7.434386391251519e-05, "loss": 1.4983, "step": 4233 }, { "epoch": 1.2851722567916224, "grad_norm": 0.6611326932907104, "learning_rate": 7.433778857837181e-05, "loss": 1.8904, "step": 4234 }, { "epoch": 1.2854757929883138, "grad_norm": 0.47779926657676697, "learning_rate": 7.433171324422844e-05, "loss": 1.8208, "step": 4235 }, { "epoch": 1.2857793291850053, "grad_norm": 0.5227665901184082, "learning_rate": 7.432563791008506e-05, "loss": 1.713, "step": 4236 }, { "epoch": 1.2860828653816967, "grad_norm": 0.5510228872299194, "learning_rate": 7.431956257594169e-05, "loss": 1.632, "step": 4237 }, { "epoch": 1.2863864015783881, "grad_norm": 0.44311997294425964, "learning_rate": 7.43134872417983e-05, "loss": 1.5965, "step": 4238 }, { "epoch": 1.2866899377750798, "grad_norm": 0.4647054672241211, "learning_rate": 7.430741190765492e-05, "loss": 1.7261, "step": 4239 }, { "epoch": 1.286993473971771, "grad_norm": 0.564996063709259, "learning_rate": 7.430133657351155e-05, "loss": 1.1348, "step": 4240 }, { "epoch": 1.2872970101684627, "grad_norm": 0.4639973044395447, "learning_rate": 7.429526123936817e-05, "loss": 1.4364, "step": 4241 }, { "epoch": 1.287600546365154, "grad_norm": 0.5808007121086121, "learning_rate": 7.428918590522479e-05, "loss": 1.5214, "step": 4242 }, { "epoch": 1.2879040825618455, "grad_norm": 0.5553866624832153, "learning_rate": 7.428311057108142e-05, "loss": 1.5734, "step": 4243 }, { "epoch": 1.288207618758537, "grad_norm": 0.4662241041660309, "learning_rate": 7.427703523693804e-05, "loss": 2.1107, "step": 4244 }, { "epoch": 1.2885111549552284, "grad_norm": 0.542239785194397, "learning_rate": 7.427095990279465e-05, "loss": 1.1648, "step": 4245 }, { "epoch": 1.2888146911519198, "grad_norm": 0.531491756439209, "learning_rate": 7.426488456865128e-05, "loss": 1.8248, "step": 4246 }, { "epoch": 1.2891182273486113, "grad_norm": 0.595971941947937, "learning_rate": 7.42588092345079e-05, "loss": 1.4951, "step": 4247 }, { "epoch": 1.289421763545303, "grad_norm": 1.127319574356079, "learning_rate": 7.425273390036452e-05, "loss": 1.4612, "step": 4248 }, { "epoch": 1.2897252997419941, "grad_norm": 0.5719674825668335, "learning_rate": 7.424665856622115e-05, "loss": 1.4476, "step": 4249 }, { "epoch": 1.2900288359386858, "grad_norm": 0.552703320980072, "learning_rate": 7.424058323207777e-05, "loss": 2.0541, "step": 4250 }, { "epoch": 1.2903323721353772, "grad_norm": 0.48743313550949097, "learning_rate": 7.42345078979344e-05, "loss": 1.3482, "step": 4251 }, { "epoch": 1.2906359083320686, "grad_norm": 0.49920594692230225, "learning_rate": 7.422843256379101e-05, "loss": 1.2493, "step": 4252 }, { "epoch": 1.29093944452876, "grad_norm": 2.3087148666381836, "learning_rate": 7.422235722964763e-05, "loss": 1.861, "step": 4253 }, { "epoch": 1.2912429807254515, "grad_norm": 0.5297853946685791, "learning_rate": 7.421628189550426e-05, "loss": 1.4438, "step": 4254 }, { "epoch": 1.291546516922143, "grad_norm": 0.5467548966407776, "learning_rate": 7.421020656136087e-05, "loss": 1.4626, "step": 4255 }, { "epoch": 1.2918500531188344, "grad_norm": 0.5381457805633545, "learning_rate": 7.42041312272175e-05, "loss": 1.6073, "step": 4256 }, { "epoch": 1.2921535893155258, "grad_norm": 0.5748366713523865, "learning_rate": 7.419805589307413e-05, "loss": 1.5323, "step": 4257 }, { "epoch": 1.2924571255122173, "grad_norm": 0.4963832497596741, "learning_rate": 7.419198055893075e-05, "loss": 1.7466, "step": 4258 }, { "epoch": 1.292760661708909, "grad_norm": 0.5308859348297119, "learning_rate": 7.418590522478736e-05, "loss": 1.613, "step": 4259 }, { "epoch": 1.2930641979056001, "grad_norm": 0.5278881192207336, "learning_rate": 7.4179829890644e-05, "loss": 1.5005, "step": 4260 }, { "epoch": 1.2933677341022918, "grad_norm": 0.5616053938865662, "learning_rate": 7.417375455650061e-05, "loss": 1.4673, "step": 4261 }, { "epoch": 1.2936712702989832, "grad_norm": 0.4754112660884857, "learning_rate": 7.416767922235723e-05, "loss": 1.1533, "step": 4262 }, { "epoch": 1.2939748064956746, "grad_norm": 0.4585922062397003, "learning_rate": 7.416160388821386e-05, "loss": 1.3095, "step": 4263 }, { "epoch": 1.294278342692366, "grad_norm": 0.5392476916313171, "learning_rate": 7.415552855407048e-05, "loss": 1.8251, "step": 4264 }, { "epoch": 1.2945818788890575, "grad_norm": 0.5653027296066284, "learning_rate": 7.414945321992711e-05, "loss": 1.4677, "step": 4265 }, { "epoch": 1.294885415085749, "grad_norm": 0.4403519034385681, "learning_rate": 7.414337788578372e-05, "loss": 1.2953, "step": 4266 }, { "epoch": 1.2951889512824404, "grad_norm": 0.5099658370018005, "learning_rate": 7.413730255164034e-05, "loss": 1.6105, "step": 4267 }, { "epoch": 1.2954924874791318, "grad_norm": 0.48418527841567993, "learning_rate": 7.413122721749697e-05, "loss": 1.7371, "step": 4268 }, { "epoch": 1.2957960236758232, "grad_norm": 0.4962136447429657, "learning_rate": 7.412515188335358e-05, "loss": 1.8332, "step": 4269 }, { "epoch": 1.296099559872515, "grad_norm": 0.5248817801475525, "learning_rate": 7.411907654921021e-05, "loss": 1.0489, "step": 4270 }, { "epoch": 1.2964030960692061, "grad_norm": 0.5496529936790466, "learning_rate": 7.411300121506684e-05, "loss": 1.5079, "step": 4271 }, { "epoch": 1.2967066322658978, "grad_norm": 0.49035531282424927, "learning_rate": 7.410692588092346e-05, "loss": 1.7401, "step": 4272 }, { "epoch": 1.2970101684625892, "grad_norm": 0.5801602602005005, "learning_rate": 7.410085054678007e-05, "loss": 2.0675, "step": 4273 }, { "epoch": 1.2973137046592806, "grad_norm": 0.5056676268577576, "learning_rate": 7.40947752126367e-05, "loss": 1.7762, "step": 4274 }, { "epoch": 1.297617240855972, "grad_norm": 0.7785037159919739, "learning_rate": 7.408869987849332e-05, "loss": 1.4058, "step": 4275 }, { "epoch": 1.2979207770526635, "grad_norm": 1.1016004085540771, "learning_rate": 7.408262454434994e-05, "loss": 1.099, "step": 4276 }, { "epoch": 1.298224313249355, "grad_norm": 0.4835539162158966, "learning_rate": 7.407654921020657e-05, "loss": 1.5021, "step": 4277 }, { "epoch": 1.2985278494460464, "grad_norm": 0.4456567168235779, "learning_rate": 7.407047387606319e-05, "loss": 1.3587, "step": 4278 }, { "epoch": 1.298831385642738, "grad_norm": 0.5003551840782166, "learning_rate": 7.40643985419198e-05, "loss": 1.6672, "step": 4279 }, { "epoch": 1.2991349218394292, "grad_norm": 0.5069398880004883, "learning_rate": 7.405832320777643e-05, "loss": 1.4526, "step": 4280 }, { "epoch": 1.299438458036121, "grad_norm": 0.5351117253303528, "learning_rate": 7.405224787363305e-05, "loss": 1.7542, "step": 4281 }, { "epoch": 1.2997419942328123, "grad_norm": 0.5914597511291504, "learning_rate": 7.404617253948968e-05, "loss": 1.4507, "step": 4282 }, { "epoch": 1.3000455304295038, "grad_norm": 0.6194307804107666, "learning_rate": 7.404009720534629e-05, "loss": 1.7291, "step": 4283 }, { "epoch": 1.3003490666261952, "grad_norm": 0.37127256393432617, "learning_rate": 7.403402187120292e-05, "loss": 1.2599, "step": 4284 }, { "epoch": 1.3006526028228866, "grad_norm": 0.49058303236961365, "learning_rate": 7.402794653705955e-05, "loss": 1.8608, "step": 4285 }, { "epoch": 1.300956139019578, "grad_norm": 0.5458847880363464, "learning_rate": 7.402187120291617e-05, "loss": 1.7041, "step": 4286 }, { "epoch": 1.3012596752162695, "grad_norm": 0.5570408701896667, "learning_rate": 7.401579586877278e-05, "loss": 1.8484, "step": 4287 }, { "epoch": 1.301563211412961, "grad_norm": 0.5224193930625916, "learning_rate": 7.400972053462941e-05, "loss": 1.5517, "step": 4288 }, { "epoch": 1.3018667476096524, "grad_norm": 0.6026464700698853, "learning_rate": 7.400364520048603e-05, "loss": 1.0592, "step": 4289 }, { "epoch": 1.302170283806344, "grad_norm": 0.5078088641166687, "learning_rate": 7.399756986634265e-05, "loss": 1.559, "step": 4290 }, { "epoch": 1.3024738200030352, "grad_norm": 0.5993932485580444, "learning_rate": 7.399149453219928e-05, "loss": 1.608, "step": 4291 }, { "epoch": 1.302777356199727, "grad_norm": 0.5589376091957092, "learning_rate": 7.39854191980559e-05, "loss": 1.5105, "step": 4292 }, { "epoch": 1.3030808923964183, "grad_norm": 0.5910342931747437, "learning_rate": 7.397934386391251e-05, "loss": 1.6214, "step": 4293 }, { "epoch": 1.3033844285931098, "grad_norm": 0.5900040864944458, "learning_rate": 7.397326852976914e-05, "loss": 1.6379, "step": 4294 }, { "epoch": 1.3036879647898012, "grad_norm": 0.561262845993042, "learning_rate": 7.396719319562576e-05, "loss": 1.7905, "step": 4295 }, { "epoch": 1.3039915009864926, "grad_norm": 0.4261028468608856, "learning_rate": 7.396111786148239e-05, "loss": 1.5822, "step": 4296 }, { "epoch": 1.304295037183184, "grad_norm": 0.5175686478614807, "learning_rate": 7.3955042527339e-05, "loss": 1.6183, "step": 4297 }, { "epoch": 1.3045985733798755, "grad_norm": 0.46133172512054443, "learning_rate": 7.394896719319563e-05, "loss": 1.7778, "step": 4298 }, { "epoch": 1.304902109576567, "grad_norm": 1.0579636096954346, "learning_rate": 7.394289185905226e-05, "loss": 1.7789, "step": 4299 }, { "epoch": 1.3052056457732584, "grad_norm": 0.5155093669891357, "learning_rate": 7.393681652490888e-05, "loss": 1.6568, "step": 4300 }, { "epoch": 1.30550918196995, "grad_norm": 0.4386572241783142, "learning_rate": 7.393074119076549e-05, "loss": 1.8987, "step": 4301 }, { "epoch": 1.3058127181666412, "grad_norm": 0.5592470169067383, "learning_rate": 7.392466585662212e-05, "loss": 1.764, "step": 4302 }, { "epoch": 1.306116254363333, "grad_norm": 0.5633784532546997, "learning_rate": 7.391859052247874e-05, "loss": 1.6752, "step": 4303 }, { "epoch": 1.3064197905600243, "grad_norm": 0.860517680644989, "learning_rate": 7.391251518833536e-05, "loss": 0.976, "step": 4304 }, { "epoch": 1.3067233267567158, "grad_norm": 0.44970962405204773, "learning_rate": 7.390643985419199e-05, "loss": 1.7791, "step": 4305 }, { "epoch": 1.3070268629534072, "grad_norm": 0.5651503205299377, "learning_rate": 7.390036452004861e-05, "loss": 1.2142, "step": 4306 }, { "epoch": 1.3073303991500986, "grad_norm": 0.5687608122825623, "learning_rate": 7.389428918590522e-05, "loss": 1.91, "step": 4307 }, { "epoch": 1.30763393534679, "grad_norm": 0.5673285126686096, "learning_rate": 7.388821385176184e-05, "loss": 1.6851, "step": 4308 }, { "epoch": 1.3079374715434815, "grad_norm": 0.5170315504074097, "learning_rate": 7.388213851761847e-05, "loss": 1.8565, "step": 4309 }, { "epoch": 1.308241007740173, "grad_norm": 0.45069336891174316, "learning_rate": 7.38760631834751e-05, "loss": 1.9078, "step": 4310 }, { "epoch": 1.3085445439368644, "grad_norm": 0.5088868141174316, "learning_rate": 7.386998784933171e-05, "loss": 1.5292, "step": 4311 }, { "epoch": 1.308848080133556, "grad_norm": 0.5309886932373047, "learning_rate": 7.386391251518834e-05, "loss": 1.8614, "step": 4312 }, { "epoch": 1.3091516163302475, "grad_norm": 0.5747066140174866, "learning_rate": 7.385783718104497e-05, "loss": 1.4692, "step": 4313 }, { "epoch": 1.309455152526939, "grad_norm": 0.5247424840927124, "learning_rate": 7.385176184690159e-05, "loss": 1.9508, "step": 4314 }, { "epoch": 1.3097586887236303, "grad_norm": 0.7920752167701721, "learning_rate": 7.38456865127582e-05, "loss": 1.3849, "step": 4315 }, { "epoch": 1.3100622249203218, "grad_norm": 0.5336897373199463, "learning_rate": 7.383961117861483e-05, "loss": 1.8924, "step": 4316 }, { "epoch": 1.3103657611170132, "grad_norm": 0.5634369850158691, "learning_rate": 7.383353584447145e-05, "loss": 1.6056, "step": 4317 }, { "epoch": 1.3106692973137046, "grad_norm": 0.5356997847557068, "learning_rate": 7.382746051032807e-05, "loss": 1.7636, "step": 4318 }, { "epoch": 1.310972833510396, "grad_norm": 0.552720308303833, "learning_rate": 7.38213851761847e-05, "loss": 1.5682, "step": 4319 }, { "epoch": 1.3112763697070875, "grad_norm": 0.5768778324127197, "learning_rate": 7.381530984204132e-05, "loss": 1.2844, "step": 4320 }, { "epoch": 1.3115799059037792, "grad_norm": 0.45900124311447144, "learning_rate": 7.380923450789793e-05, "loss": 1.6181, "step": 4321 }, { "epoch": 1.3118834421004704, "grad_norm": 0.5029597878456116, "learning_rate": 7.380315917375455e-05, "loss": 1.4201, "step": 4322 }, { "epoch": 1.312186978297162, "grad_norm": 0.55179363489151, "learning_rate": 7.379708383961118e-05, "loss": 1.4521, "step": 4323 }, { "epoch": 1.3124905144938535, "grad_norm": 0.49221259355545044, "learning_rate": 7.379100850546781e-05, "loss": 1.5294, "step": 4324 }, { "epoch": 1.312794050690545, "grad_norm": 0.5700541734695435, "learning_rate": 7.378493317132442e-05, "loss": 1.5504, "step": 4325 }, { "epoch": 1.3130975868872363, "grad_norm": 0.3949977159500122, "learning_rate": 7.377885783718105e-05, "loss": 1.4024, "step": 4326 }, { "epoch": 1.3134011230839278, "grad_norm": 0.4766468107700348, "learning_rate": 7.377278250303768e-05, "loss": 1.9027, "step": 4327 }, { "epoch": 1.3137046592806192, "grad_norm": 0.6489190459251404, "learning_rate": 7.376670716889428e-05, "loss": 1.6306, "step": 4328 }, { "epoch": 1.3140081954773106, "grad_norm": 0.5466142892837524, "learning_rate": 7.376063183475091e-05, "loss": 1.619, "step": 4329 }, { "epoch": 1.314311731674002, "grad_norm": 0.5554240345954895, "learning_rate": 7.375455650060754e-05, "loss": 1.8692, "step": 4330 }, { "epoch": 1.3146152678706935, "grad_norm": 0.473034143447876, "learning_rate": 7.374848116646416e-05, "loss": 1.8351, "step": 4331 }, { "epoch": 1.3149188040673851, "grad_norm": 0.5100724697113037, "learning_rate": 7.374240583232078e-05, "loss": 1.6781, "step": 4332 }, { "epoch": 1.3152223402640764, "grad_norm": 0.48680275678634644, "learning_rate": 7.373633049817741e-05, "loss": 1.6358, "step": 4333 }, { "epoch": 1.315525876460768, "grad_norm": 0.5869154334068298, "learning_rate": 7.373025516403403e-05, "loss": 1.5926, "step": 4334 }, { "epoch": 1.3158294126574595, "grad_norm": 0.4726203382015228, "learning_rate": 7.372417982989064e-05, "loss": 1.6651, "step": 4335 }, { "epoch": 1.3161329488541509, "grad_norm": 0.9809671640396118, "learning_rate": 7.371810449574726e-05, "loss": 1.1371, "step": 4336 }, { "epoch": 1.3164364850508423, "grad_norm": 0.5169830322265625, "learning_rate": 7.371202916160389e-05, "loss": 1.4736, "step": 4337 }, { "epoch": 1.3167400212475338, "grad_norm": 0.48734250664711, "learning_rate": 7.370595382746052e-05, "loss": 1.6371, "step": 4338 }, { "epoch": 1.3170435574442252, "grad_norm": 0.5264248847961426, "learning_rate": 7.369987849331713e-05, "loss": 1.7498, "step": 4339 }, { "epoch": 1.3173470936409166, "grad_norm": 0.5730972290039062, "learning_rate": 7.369380315917376e-05, "loss": 1.3057, "step": 4340 }, { "epoch": 1.317650629837608, "grad_norm": 0.776110827922821, "learning_rate": 7.368772782503039e-05, "loss": 1.4484, "step": 4341 }, { "epoch": 1.3179541660342995, "grad_norm": 0.5012614130973816, "learning_rate": 7.368165249088699e-05, "loss": 1.041, "step": 4342 }, { "epoch": 1.3182577022309911, "grad_norm": 0.5454205870628357, "learning_rate": 7.367557715674362e-05, "loss": 1.3911, "step": 4343 }, { "epoch": 1.3185612384276826, "grad_norm": 0.4966050386428833, "learning_rate": 7.366950182260025e-05, "loss": 1.8287, "step": 4344 }, { "epoch": 1.318864774624374, "grad_norm": 0.5605126619338989, "learning_rate": 7.366342648845687e-05, "loss": 2.0233, "step": 4345 }, { "epoch": 1.3191683108210654, "grad_norm": 0.5626348853111267, "learning_rate": 7.365735115431349e-05, "loss": 1.4652, "step": 4346 }, { "epoch": 1.3194718470177569, "grad_norm": 0.5361192226409912, "learning_rate": 7.365127582017012e-05, "loss": 1.8023, "step": 4347 }, { "epoch": 1.3197753832144483, "grad_norm": 0.6468534469604492, "learning_rate": 7.364520048602674e-05, "loss": 1.7418, "step": 4348 }, { "epoch": 1.3200789194111398, "grad_norm": 0.5218302607536316, "learning_rate": 7.363912515188335e-05, "loss": 1.7832, "step": 4349 }, { "epoch": 1.3203824556078312, "grad_norm": 0.4624869227409363, "learning_rate": 7.363304981773997e-05, "loss": 1.8654, "step": 4350 }, { "epoch": 1.3206859918045226, "grad_norm": 1.073112964630127, "learning_rate": 7.36269744835966e-05, "loss": 1.6806, "step": 4351 }, { "epoch": 1.3209895280012143, "grad_norm": 0.5314664244651794, "learning_rate": 7.362089914945322e-05, "loss": 1.7686, "step": 4352 }, { "epoch": 1.3212930641979055, "grad_norm": 0.49280011653900146, "learning_rate": 7.361482381530984e-05, "loss": 1.8869, "step": 4353 }, { "epoch": 1.3215966003945971, "grad_norm": 0.5610837340354919, "learning_rate": 7.360874848116647e-05, "loss": 1.6387, "step": 4354 }, { "epoch": 1.3219001365912886, "grad_norm": 0.47585153579711914, "learning_rate": 7.36026731470231e-05, "loss": 1.5936, "step": 4355 }, { "epoch": 1.32220367278798, "grad_norm": 0.6694095134735107, "learning_rate": 7.35965978128797e-05, "loss": 1.3855, "step": 4356 }, { "epoch": 1.3225072089846714, "grad_norm": 0.5073167085647583, "learning_rate": 7.359052247873633e-05, "loss": 1.609, "step": 4357 }, { "epoch": 1.3228107451813629, "grad_norm": 0.567101240158081, "learning_rate": 7.358444714459296e-05, "loss": 1.7975, "step": 4358 }, { "epoch": 1.3231142813780543, "grad_norm": 0.5210965275764465, "learning_rate": 7.357837181044958e-05, "loss": 1.6289, "step": 4359 }, { "epoch": 1.3234178175747457, "grad_norm": 0.463466078042984, "learning_rate": 7.35722964763062e-05, "loss": 1.4126, "step": 4360 }, { "epoch": 1.3237213537714372, "grad_norm": 0.523644208908081, "learning_rate": 7.356622114216283e-05, "loss": 1.6897, "step": 4361 }, { "epoch": 1.3240248899681286, "grad_norm": 0.5138276815414429, "learning_rate": 7.356014580801945e-05, "loss": 1.6961, "step": 4362 }, { "epoch": 1.3243284261648203, "grad_norm": 0.4663618505001068, "learning_rate": 7.355407047387606e-05, "loss": 1.2274, "step": 4363 }, { "epoch": 1.3246319623615115, "grad_norm": 0.5608078837394714, "learning_rate": 7.354799513973268e-05, "loss": 1.3882, "step": 4364 }, { "epoch": 1.3249354985582031, "grad_norm": 0.4431968629360199, "learning_rate": 7.354191980558931e-05, "loss": 1.8665, "step": 4365 }, { "epoch": 1.3252390347548946, "grad_norm": 0.5137575268745422, "learning_rate": 7.353584447144593e-05, "loss": 1.8504, "step": 4366 }, { "epoch": 1.325542570951586, "grad_norm": 0.5060153603553772, "learning_rate": 7.352976913730255e-05, "loss": 1.981, "step": 4367 }, { "epoch": 1.3258461071482774, "grad_norm": 0.4878283441066742, "learning_rate": 7.352369380315918e-05, "loss": 0.8917, "step": 4368 }, { "epoch": 1.3261496433449689, "grad_norm": 0.5126661658287048, "learning_rate": 7.351761846901581e-05, "loss": 1.7912, "step": 4369 }, { "epoch": 1.3264531795416603, "grad_norm": 0.742200493812561, "learning_rate": 7.351154313487241e-05, "loss": 1.3088, "step": 4370 }, { "epoch": 1.3267567157383517, "grad_norm": 0.5806966423988342, "learning_rate": 7.350546780072904e-05, "loss": 1.1248, "step": 4371 }, { "epoch": 1.3270602519350432, "grad_norm": 0.46205493807792664, "learning_rate": 7.349939246658567e-05, "loss": 1.6596, "step": 4372 }, { "epoch": 1.3273637881317346, "grad_norm": 0.5538312196731567, "learning_rate": 7.349331713244229e-05, "loss": 1.3168, "step": 4373 }, { "epoch": 1.3276673243284263, "grad_norm": 0.49692967534065247, "learning_rate": 7.348724179829891e-05, "loss": 1.9917, "step": 4374 }, { "epoch": 1.3279708605251177, "grad_norm": 0.5603296160697937, "learning_rate": 7.348116646415554e-05, "loss": 1.5504, "step": 4375 }, { "epoch": 1.3282743967218091, "grad_norm": 0.5459002256393433, "learning_rate": 7.347509113001216e-05, "loss": 1.5883, "step": 4376 }, { "epoch": 1.3285779329185006, "grad_norm": 0.5703235864639282, "learning_rate": 7.346901579586877e-05, "loss": 1.5153, "step": 4377 }, { "epoch": 1.328881469115192, "grad_norm": 0.5803839564323425, "learning_rate": 7.346294046172539e-05, "loss": 1.6111, "step": 4378 }, { "epoch": 1.3291850053118834, "grad_norm": 0.514336347579956, "learning_rate": 7.345686512758202e-05, "loss": 1.4001, "step": 4379 }, { "epoch": 1.3294885415085749, "grad_norm": 0.5741514563560486, "learning_rate": 7.345078979343864e-05, "loss": 1.8245, "step": 4380 }, { "epoch": 1.3297920777052663, "grad_norm": 0.5243557691574097, "learning_rate": 7.344471445929526e-05, "loss": 1.6894, "step": 4381 }, { "epoch": 1.3300956139019577, "grad_norm": 0.529828667640686, "learning_rate": 7.343863912515189e-05, "loss": 1.9486, "step": 4382 }, { "epoch": 1.3303991500986494, "grad_norm": 0.5640398859977722, "learning_rate": 7.343256379100852e-05, "loss": 1.3146, "step": 4383 }, { "epoch": 1.3307026862953406, "grad_norm": 0.537376344203949, "learning_rate": 7.342648845686512e-05, "loss": 1.6864, "step": 4384 }, { "epoch": 1.3310062224920323, "grad_norm": 0.5403789281845093, "learning_rate": 7.342041312272175e-05, "loss": 1.2394, "step": 4385 }, { "epoch": 1.3313097586887237, "grad_norm": 0.5978288054466248, "learning_rate": 7.341433778857838e-05, "loss": 1.6157, "step": 4386 }, { "epoch": 1.3316132948854151, "grad_norm": 0.5005367398262024, "learning_rate": 7.3408262454435e-05, "loss": 1.8174, "step": 4387 }, { "epoch": 1.3319168310821066, "grad_norm": 0.4958413541316986, "learning_rate": 7.340218712029162e-05, "loss": 1.0845, "step": 4388 }, { "epoch": 1.332220367278798, "grad_norm": 0.48872581124305725, "learning_rate": 7.339611178614824e-05, "loss": 1.7209, "step": 4389 }, { "epoch": 1.3325239034754894, "grad_norm": 0.4161425232887268, "learning_rate": 7.339003645200487e-05, "loss": 1.4084, "step": 4390 }, { "epoch": 1.3328274396721809, "grad_norm": 0.5561462044715881, "learning_rate": 7.338396111786148e-05, "loss": 1.7643, "step": 4391 }, { "epoch": 1.3331309758688723, "grad_norm": 0.5434063673019409, "learning_rate": 7.33778857837181e-05, "loss": 1.4755, "step": 4392 }, { "epoch": 1.3334345120655637, "grad_norm": 0.516948938369751, "learning_rate": 7.337181044957473e-05, "loss": 1.6645, "step": 4393 }, { "epoch": 1.3337380482622554, "grad_norm": 0.420340359210968, "learning_rate": 7.336573511543135e-05, "loss": 1.1486, "step": 4394 }, { "epoch": 1.3340415844589466, "grad_norm": 0.49778157472610474, "learning_rate": 7.335965978128797e-05, "loss": 1.8017, "step": 4395 }, { "epoch": 1.3343451206556383, "grad_norm": 0.5771467685699463, "learning_rate": 7.33535844471446e-05, "loss": 1.7241, "step": 4396 }, { "epoch": 1.3346486568523297, "grad_norm": 0.5317919254302979, "learning_rate": 7.334750911300123e-05, "loss": 1.7381, "step": 4397 }, { "epoch": 1.3349521930490211, "grad_norm": 0.37729716300964355, "learning_rate": 7.334143377885783e-05, "loss": 1.4022, "step": 4398 }, { "epoch": 1.3352557292457126, "grad_norm": 0.4068670868873596, "learning_rate": 7.333535844471446e-05, "loss": 1.8955, "step": 4399 }, { "epoch": 1.335559265442404, "grad_norm": 0.5250211358070374, "learning_rate": 7.33292831105711e-05, "loss": 1.9555, "step": 4400 }, { "epoch": 1.3358628016390954, "grad_norm": 0.45118898153305054, "learning_rate": 7.33232077764277e-05, "loss": 1.9174, "step": 4401 }, { "epoch": 1.3361663378357869, "grad_norm": 0.5139362812042236, "learning_rate": 7.331713244228433e-05, "loss": 1.4453, "step": 4402 }, { "epoch": 1.3364698740324783, "grad_norm": 0.5674066543579102, "learning_rate": 7.331105710814095e-05, "loss": 1.7853, "step": 4403 }, { "epoch": 1.3367734102291697, "grad_norm": 0.5428693294525146, "learning_rate": 7.330498177399758e-05, "loss": 1.7516, "step": 4404 }, { "epoch": 1.3370769464258614, "grad_norm": 0.4749910533428192, "learning_rate": 7.32989064398542e-05, "loss": 1.1845, "step": 4405 }, { "epoch": 1.3373804826225526, "grad_norm": 0.5248123407363892, "learning_rate": 7.329283110571081e-05, "loss": 1.7238, "step": 4406 }, { "epoch": 1.3376840188192443, "grad_norm": 0.5120264887809753, "learning_rate": 7.328675577156744e-05, "loss": 1.5947, "step": 4407 }, { "epoch": 1.3379875550159357, "grad_norm": 0.4882364869117737, "learning_rate": 7.328068043742406e-05, "loss": 1.7606, "step": 4408 }, { "epoch": 1.3382910912126271, "grad_norm": 0.5949615240097046, "learning_rate": 7.327460510328068e-05, "loss": 1.6298, "step": 4409 }, { "epoch": 1.3385946274093186, "grad_norm": 0.5893341898918152, "learning_rate": 7.326852976913731e-05, "loss": 1.6613, "step": 4410 }, { "epoch": 1.33889816360601, "grad_norm": 0.6306172609329224, "learning_rate": 7.326245443499394e-05, "loss": 1.3808, "step": 4411 }, { "epoch": 1.3392016998027014, "grad_norm": 0.5589039325714111, "learning_rate": 7.325637910085054e-05, "loss": 1.5282, "step": 4412 }, { "epoch": 1.3395052359993929, "grad_norm": 0.4946483373641968, "learning_rate": 7.325030376670717e-05, "loss": 1.5924, "step": 4413 }, { "epoch": 1.3398087721960845, "grad_norm": 0.5384162068367004, "learning_rate": 7.32442284325638e-05, "loss": 1.7231, "step": 4414 }, { "epoch": 1.3401123083927757, "grad_norm": 0.5065357685089111, "learning_rate": 7.323815309842041e-05, "loss": 1.795, "step": 4415 }, { "epoch": 1.3404158445894674, "grad_norm": 0.5467350482940674, "learning_rate": 7.323207776427704e-05, "loss": 2.1203, "step": 4416 }, { "epoch": 1.3407193807861588, "grad_norm": 0.9943485260009766, "learning_rate": 7.322600243013366e-05, "loss": 1.6615, "step": 4417 }, { "epoch": 1.3410229169828503, "grad_norm": 0.48699676990509033, "learning_rate": 7.321992709599029e-05, "loss": 1.6614, "step": 4418 }, { "epoch": 1.3413264531795417, "grad_norm": 0.5375555753707886, "learning_rate": 7.32138517618469e-05, "loss": 1.8225, "step": 4419 }, { "epoch": 1.3416299893762331, "grad_norm": 0.43219107389450073, "learning_rate": 7.320777642770352e-05, "loss": 1.8322, "step": 4420 }, { "epoch": 1.3419335255729246, "grad_norm": 0.8610438108444214, "learning_rate": 7.320170109356015e-05, "loss": 1.5714, "step": 4421 }, { "epoch": 1.342237061769616, "grad_norm": 0.4116555154323578, "learning_rate": 7.319562575941677e-05, "loss": 1.527, "step": 4422 }, { "epoch": 1.3425405979663074, "grad_norm": 0.7473874688148499, "learning_rate": 7.318955042527339e-05, "loss": 1.7131, "step": 4423 }, { "epoch": 1.3428441341629989, "grad_norm": 0.6852518320083618, "learning_rate": 7.318347509113002e-05, "loss": 1.9008, "step": 4424 }, { "epoch": 1.3431476703596905, "grad_norm": 0.4978015124797821, "learning_rate": 7.317739975698664e-05, "loss": 1.6112, "step": 4425 }, { "epoch": 1.3434512065563817, "grad_norm": 0.4848748445510864, "learning_rate": 7.317132442284325e-05, "loss": 2.0183, "step": 4426 }, { "epoch": 1.3437547427530734, "grad_norm": 0.5683912634849548, "learning_rate": 7.316524908869988e-05, "loss": 1.4524, "step": 4427 }, { "epoch": 1.3440582789497648, "grad_norm": 0.5863691568374634, "learning_rate": 7.315917375455651e-05, "loss": 1.5636, "step": 4428 }, { "epoch": 1.3443618151464563, "grad_norm": 0.5731346011161804, "learning_rate": 7.315309842041312e-05, "loss": 1.7794, "step": 4429 }, { "epoch": 1.3446653513431477, "grad_norm": 0.46514976024627686, "learning_rate": 7.314702308626975e-05, "loss": 1.6708, "step": 4430 }, { "epoch": 1.3449688875398391, "grad_norm": 0.47035089135169983, "learning_rate": 7.314094775212637e-05, "loss": 1.4858, "step": 4431 }, { "epoch": 1.3452724237365306, "grad_norm": 0.6432058215141296, "learning_rate": 7.3134872417983e-05, "loss": 1.9622, "step": 4432 }, { "epoch": 1.345575959933222, "grad_norm": 0.6775307655334473, "learning_rate": 7.312879708383961e-05, "loss": 1.874, "step": 4433 }, { "epoch": 1.3458794961299134, "grad_norm": 0.4946788251399994, "learning_rate": 7.312272174969623e-05, "loss": 1.6998, "step": 4434 }, { "epoch": 1.3461830323266049, "grad_norm": 0.5256187319755554, "learning_rate": 7.311664641555286e-05, "loss": 1.6884, "step": 4435 }, { "epoch": 1.3464865685232965, "grad_norm": 0.526914119720459, "learning_rate": 7.311057108140948e-05, "loss": 1.5151, "step": 4436 }, { "epoch": 1.3467901047199877, "grad_norm": 0.5384836196899414, "learning_rate": 7.31044957472661e-05, "loss": 1.5689, "step": 4437 }, { "epoch": 1.3470936409166794, "grad_norm": 0.47421467304229736, "learning_rate": 7.309842041312273e-05, "loss": 1.6315, "step": 4438 }, { "epoch": 1.3473971771133708, "grad_norm": 0.5070328712463379, "learning_rate": 7.309234507897935e-05, "loss": 1.7336, "step": 4439 }, { "epoch": 1.3477007133100622, "grad_norm": 0.5571221113204956, "learning_rate": 7.308626974483596e-05, "loss": 1.4672, "step": 4440 }, { "epoch": 1.3480042495067537, "grad_norm": 0.5508000254631042, "learning_rate": 7.30801944106926e-05, "loss": 1.7045, "step": 4441 }, { "epoch": 1.3483077857034451, "grad_norm": 0.46867290139198303, "learning_rate": 7.307411907654922e-05, "loss": 1.7144, "step": 4442 }, { "epoch": 1.3486113219001366, "grad_norm": 0.486806720495224, "learning_rate": 7.306804374240583e-05, "loss": 1.2906, "step": 4443 }, { "epoch": 1.348914858096828, "grad_norm": 0.5570629835128784, "learning_rate": 7.306196840826246e-05, "loss": 1.2759, "step": 4444 }, { "epoch": 1.3492183942935196, "grad_norm": 0.4644160270690918, "learning_rate": 7.305589307411908e-05, "loss": 1.7632, "step": 4445 }, { "epoch": 1.3495219304902109, "grad_norm": 0.6232413053512573, "learning_rate": 7.304981773997571e-05, "loss": 1.6626, "step": 4446 }, { "epoch": 1.3498254666869025, "grad_norm": 0.61170893907547, "learning_rate": 7.304374240583232e-05, "loss": 1.5876, "step": 4447 }, { "epoch": 1.350129002883594, "grad_norm": 0.5944005250930786, "learning_rate": 7.303766707168894e-05, "loss": 1.8504, "step": 4448 }, { "epoch": 1.3504325390802854, "grad_norm": 0.39353135228157043, "learning_rate": 7.303159173754557e-05, "loss": 0.5195, "step": 4449 }, { "epoch": 1.3507360752769768, "grad_norm": 0.5182314515113831, "learning_rate": 7.302551640340219e-05, "loss": 1.7379, "step": 4450 }, { "epoch": 1.3510396114736682, "grad_norm": 0.43427690863609314, "learning_rate": 7.301944106925881e-05, "loss": 1.8747, "step": 4451 }, { "epoch": 1.3513431476703597, "grad_norm": 0.5645577907562256, "learning_rate": 7.301336573511544e-05, "loss": 1.6844, "step": 4452 }, { "epoch": 1.3516466838670511, "grad_norm": 0.4782490134239197, "learning_rate": 7.300729040097206e-05, "loss": 1.242, "step": 4453 }, { "epoch": 1.3519502200637425, "grad_norm": 0.3485776484012604, "learning_rate": 7.300121506682867e-05, "loss": 1.4581, "step": 4454 }, { "epoch": 1.352253756260434, "grad_norm": 0.5047571659088135, "learning_rate": 7.29951397326853e-05, "loss": 1.7181, "step": 4455 }, { "epoch": 1.3525572924571256, "grad_norm": 0.48931753635406494, "learning_rate": 7.298906439854193e-05, "loss": 1.8102, "step": 4456 }, { "epoch": 1.3528608286538168, "grad_norm": 0.585381031036377, "learning_rate": 7.298298906439854e-05, "loss": 1.4796, "step": 4457 }, { "epoch": 1.3531643648505085, "grad_norm": 0.5762146711349487, "learning_rate": 7.297691373025517e-05, "loss": 1.7404, "step": 4458 }, { "epoch": 1.3534679010472, "grad_norm": 0.37271368503570557, "learning_rate": 7.297083839611179e-05, "loss": 1.2543, "step": 4459 }, { "epoch": 1.3537714372438914, "grad_norm": 0.4939133822917938, "learning_rate": 7.296476306196842e-05, "loss": 1.6854, "step": 4460 }, { "epoch": 1.3540749734405828, "grad_norm": 0.5658159255981445, "learning_rate": 7.295868772782503e-05, "loss": 1.9223, "step": 4461 }, { "epoch": 1.3543785096372742, "grad_norm": 0.5128167271614075, "learning_rate": 7.295261239368165e-05, "loss": 1.8189, "step": 4462 }, { "epoch": 1.3546820458339657, "grad_norm": 0.6183301210403442, "learning_rate": 7.294653705953828e-05, "loss": 1.3022, "step": 4463 }, { "epoch": 1.354985582030657, "grad_norm": 0.5234330892562866, "learning_rate": 7.29404617253949e-05, "loss": 1.7716, "step": 4464 }, { "epoch": 1.3552891182273485, "grad_norm": 0.5098745226860046, "learning_rate": 7.293438639125152e-05, "loss": 1.8543, "step": 4465 }, { "epoch": 1.35559265442404, "grad_norm": 0.5108742117881775, "learning_rate": 7.292831105710815e-05, "loss": 1.3576, "step": 4466 }, { "epoch": 1.3558961906207316, "grad_norm": 0.4484347701072693, "learning_rate": 7.292223572296477e-05, "loss": 1.7337, "step": 4467 }, { "epoch": 1.3561997268174228, "grad_norm": 0.6011006236076355, "learning_rate": 7.291616038882138e-05, "loss": 1.1075, "step": 4468 }, { "epoch": 1.3565032630141145, "grad_norm": 0.530351459980011, "learning_rate": 7.291008505467801e-05, "loss": 1.6846, "step": 4469 }, { "epoch": 1.356806799210806, "grad_norm": 0.5644029974937439, "learning_rate": 7.290400972053464e-05, "loss": 1.6254, "step": 4470 }, { "epoch": 1.3571103354074974, "grad_norm": 0.5434947609901428, "learning_rate": 7.289793438639125e-05, "loss": 1.7514, "step": 4471 }, { "epoch": 1.3574138716041888, "grad_norm": 0.4561974108219147, "learning_rate": 7.289185905224788e-05, "loss": 1.7515, "step": 4472 }, { "epoch": 1.3577174078008802, "grad_norm": 0.5263285636901855, "learning_rate": 7.28857837181045e-05, "loss": 1.549, "step": 4473 }, { "epoch": 1.3580209439975717, "grad_norm": 0.4440256953239441, "learning_rate": 7.287970838396111e-05, "loss": 1.4366, "step": 4474 }, { "epoch": 1.358324480194263, "grad_norm": 0.4798101782798767, "learning_rate": 7.287363304981774e-05, "loss": 1.9394, "step": 4475 }, { "epoch": 1.3586280163909545, "grad_norm": 0.5924159288406372, "learning_rate": 7.286755771567436e-05, "loss": 1.1942, "step": 4476 }, { "epoch": 1.358931552587646, "grad_norm": 0.5440402030944824, "learning_rate": 7.286148238153099e-05, "loss": 1.889, "step": 4477 }, { "epoch": 1.3592350887843376, "grad_norm": 0.5075298547744751, "learning_rate": 7.285540704738761e-05, "loss": 1.8719, "step": 4478 }, { "epoch": 1.359538624981029, "grad_norm": 0.5377494692802429, "learning_rate": 7.284933171324423e-05, "loss": 1.67, "step": 4479 }, { "epoch": 1.3598421611777205, "grad_norm": 0.5329782366752625, "learning_rate": 7.284325637910086e-05, "loss": 1.7687, "step": 4480 }, { "epoch": 1.360145697374412, "grad_norm": 0.5144550204277039, "learning_rate": 7.283718104495748e-05, "loss": 1.6271, "step": 4481 }, { "epoch": 1.3604492335711034, "grad_norm": 0.4970364570617676, "learning_rate": 7.283110571081409e-05, "loss": 1.7589, "step": 4482 }, { "epoch": 1.3607527697677948, "grad_norm": 1.1922450065612793, "learning_rate": 7.282503037667072e-05, "loss": 1.6477, "step": 4483 }, { "epoch": 1.3610563059644862, "grad_norm": 0.5640878677368164, "learning_rate": 7.281895504252734e-05, "loss": 1.6746, "step": 4484 }, { "epoch": 1.3613598421611777, "grad_norm": 0.496259868144989, "learning_rate": 7.281287970838396e-05, "loss": 1.7005, "step": 4485 }, { "epoch": 1.361663378357869, "grad_norm": 0.5209731459617615, "learning_rate": 7.280680437424059e-05, "loss": 1.7817, "step": 4486 }, { "epoch": 1.3619669145545608, "grad_norm": 0.5042216777801514, "learning_rate": 7.28007290400972e-05, "loss": 1.8656, "step": 4487 }, { "epoch": 1.362270450751252, "grad_norm": 0.693298876285553, "learning_rate": 7.279465370595382e-05, "loss": 1.9507, "step": 4488 }, { "epoch": 1.3625739869479436, "grad_norm": 0.5207253694534302, "learning_rate": 7.278857837181045e-05, "loss": 1.886, "step": 4489 }, { "epoch": 1.362877523144635, "grad_norm": 0.5372057557106018, "learning_rate": 7.278250303766707e-05, "loss": 1.1538, "step": 4490 }, { "epoch": 1.3631810593413265, "grad_norm": 0.43779316544532776, "learning_rate": 7.27764277035237e-05, "loss": 1.0436, "step": 4491 }, { "epoch": 1.363484595538018, "grad_norm": 0.5032690763473511, "learning_rate": 7.277035236938032e-05, "loss": 1.5441, "step": 4492 }, { "epoch": 1.3637881317347094, "grad_norm": 0.43449172377586365, "learning_rate": 7.276427703523694e-05, "loss": 1.787, "step": 4493 }, { "epoch": 1.3640916679314008, "grad_norm": 0.5264309644699097, "learning_rate": 7.275820170109357e-05, "loss": 1.8857, "step": 4494 }, { "epoch": 1.3643952041280922, "grad_norm": 0.5467169284820557, "learning_rate": 7.275212636695019e-05, "loss": 1.4043, "step": 4495 }, { "epoch": 1.3646987403247837, "grad_norm": 0.4802314043045044, "learning_rate": 7.27460510328068e-05, "loss": 1.6738, "step": 4496 }, { "epoch": 1.365002276521475, "grad_norm": 0.50968998670578, "learning_rate": 7.273997569866343e-05, "loss": 1.7424, "step": 4497 }, { "epoch": 1.3653058127181668, "grad_norm": 0.5726447701454163, "learning_rate": 7.273390036452005e-05, "loss": 1.8105, "step": 4498 }, { "epoch": 1.365609348914858, "grad_norm": 0.6234380006790161, "learning_rate": 7.272782503037667e-05, "loss": 1.688, "step": 4499 }, { "epoch": 1.3659128851115496, "grad_norm": 0.4061424136161804, "learning_rate": 7.27217496962333e-05, "loss": 1.8207, "step": 4500 }, { "epoch": 1.366216421308241, "grad_norm": 0.5762432813644409, "learning_rate": 7.271567436208992e-05, "loss": 1.6701, "step": 4501 }, { "epoch": 1.3665199575049325, "grad_norm": 0.63739013671875, "learning_rate": 7.270959902794653e-05, "loss": 1.0446, "step": 4502 }, { "epoch": 1.366823493701624, "grad_norm": 0.47659537196159363, "learning_rate": 7.270352369380316e-05, "loss": 1.6165, "step": 4503 }, { "epoch": 1.3671270298983154, "grad_norm": 0.4327382445335388, "learning_rate": 7.269744835965978e-05, "loss": 1.7651, "step": 4504 }, { "epoch": 1.3674305660950068, "grad_norm": 0.4395967423915863, "learning_rate": 7.269137302551641e-05, "loss": 1.7153, "step": 4505 }, { "epoch": 1.3677341022916982, "grad_norm": 0.4870195686817169, "learning_rate": 7.268529769137303e-05, "loss": 1.6897, "step": 4506 }, { "epoch": 1.3680376384883897, "grad_norm": 0.46602505445480347, "learning_rate": 7.267922235722965e-05, "loss": 1.8504, "step": 4507 }, { "epoch": 1.368341174685081, "grad_norm": 0.7441166639328003, "learning_rate": 7.267314702308628e-05, "loss": 1.4581, "step": 4508 }, { "epoch": 1.3686447108817728, "grad_norm": 0.6213736534118652, "learning_rate": 7.26670716889429e-05, "loss": 1.4608, "step": 4509 }, { "epoch": 1.3689482470784642, "grad_norm": 0.6059181690216064, "learning_rate": 7.266099635479951e-05, "loss": 1.8415, "step": 4510 }, { "epoch": 1.3692517832751556, "grad_norm": 0.9757100343704224, "learning_rate": 7.265492102065614e-05, "loss": 1.2277, "step": 4511 }, { "epoch": 1.369555319471847, "grad_norm": 0.46525654196739197, "learning_rate": 7.264884568651276e-05, "loss": 1.5944, "step": 4512 }, { "epoch": 1.3698588556685385, "grad_norm": 0.5008564591407776, "learning_rate": 7.264277035236938e-05, "loss": 1.8998, "step": 4513 }, { "epoch": 1.37016239186523, "grad_norm": 0.44502413272857666, "learning_rate": 7.263669501822601e-05, "loss": 1.6404, "step": 4514 }, { "epoch": 1.3704659280619214, "grad_norm": 0.5428724884986877, "learning_rate": 7.263061968408263e-05, "loss": 1.6454, "step": 4515 }, { "epoch": 1.3707694642586128, "grad_norm": 0.47314050793647766, "learning_rate": 7.262454434993924e-05, "loss": 1.6914, "step": 4516 }, { "epoch": 1.3710730004553042, "grad_norm": 0.574695885181427, "learning_rate": 7.261846901579587e-05, "loss": 1.7187, "step": 4517 }, { "epoch": 1.3713765366519959, "grad_norm": 0.5672833919525146, "learning_rate": 7.261239368165249e-05, "loss": 1.7231, "step": 4518 }, { "epoch": 1.371680072848687, "grad_norm": 0.46130290627479553, "learning_rate": 7.260631834750912e-05, "loss": 1.2445, "step": 4519 }, { "epoch": 1.3719836090453787, "grad_norm": 0.5272838473320007, "learning_rate": 7.260024301336574e-05, "loss": 1.8076, "step": 4520 }, { "epoch": 1.3722871452420702, "grad_norm": 0.47636985778808594, "learning_rate": 7.259416767922236e-05, "loss": 1.8179, "step": 4521 }, { "epoch": 1.3725906814387616, "grad_norm": 0.4305800199508667, "learning_rate": 7.258809234507899e-05, "loss": 1.6511, "step": 4522 }, { "epoch": 1.372894217635453, "grad_norm": 1.3684027194976807, "learning_rate": 7.25820170109356e-05, "loss": 1.4146, "step": 4523 }, { "epoch": 1.3731977538321445, "grad_norm": 0.5367915630340576, "learning_rate": 7.257594167679222e-05, "loss": 1.2383, "step": 4524 }, { "epoch": 1.373501290028836, "grad_norm": 1.0694987773895264, "learning_rate": 7.256986634264885e-05, "loss": 1.1652, "step": 4525 }, { "epoch": 1.3738048262255274, "grad_norm": 0.5531676411628723, "learning_rate": 7.256379100850547e-05, "loss": 1.3935, "step": 4526 }, { "epoch": 1.3741083624222188, "grad_norm": 0.5630438327789307, "learning_rate": 7.255771567436209e-05, "loss": 1.4998, "step": 4527 }, { "epoch": 1.3744118986189102, "grad_norm": 0.5596137642860413, "learning_rate": 7.255164034021872e-05, "loss": 1.3709, "step": 4528 }, { "epoch": 1.3747154348156019, "grad_norm": 0.6031253337860107, "learning_rate": 7.254556500607534e-05, "loss": 1.2487, "step": 4529 }, { "epoch": 1.375018971012293, "grad_norm": 0.662558913230896, "learning_rate": 7.253948967193195e-05, "loss": 1.7042, "step": 4530 }, { "epoch": 1.3753225072089847, "grad_norm": 0.5421009659767151, "learning_rate": 7.253341433778858e-05, "loss": 1.7412, "step": 4531 }, { "epoch": 1.3756260434056762, "grad_norm": 0.5114768743515015, "learning_rate": 7.25273390036452e-05, "loss": 1.837, "step": 4532 }, { "epoch": 1.3759295796023676, "grad_norm": 0.5638769268989563, "learning_rate": 7.252126366950183e-05, "loss": 1.8487, "step": 4533 }, { "epoch": 1.376233115799059, "grad_norm": 0.5670020580291748, "learning_rate": 7.251518833535845e-05, "loss": 1.5702, "step": 4534 }, { "epoch": 1.3765366519957505, "grad_norm": 0.5094712972640991, "learning_rate": 7.250911300121507e-05, "loss": 1.8152, "step": 4535 }, { "epoch": 1.376840188192442, "grad_norm": 0.5454041361808777, "learning_rate": 7.25030376670717e-05, "loss": 1.3741, "step": 4536 }, { "epoch": 1.3771437243891334, "grad_norm": 0.5326266884803772, "learning_rate": 7.249696233292832e-05, "loss": 1.6344, "step": 4537 }, { "epoch": 1.3774472605858248, "grad_norm": 0.4124714732170105, "learning_rate": 7.249088699878493e-05, "loss": 1.4858, "step": 4538 }, { "epoch": 1.3777507967825162, "grad_norm": 0.5569986701011658, "learning_rate": 7.248481166464156e-05, "loss": 1.8529, "step": 4539 }, { "epoch": 1.3780543329792079, "grad_norm": 0.5609976053237915, "learning_rate": 7.247873633049818e-05, "loss": 1.8264, "step": 4540 }, { "epoch": 1.3783578691758993, "grad_norm": 0.4581679403781891, "learning_rate": 7.24726609963548e-05, "loss": 1.7181, "step": 4541 }, { "epoch": 1.3786614053725907, "grad_norm": 0.551991879940033, "learning_rate": 7.246658566221143e-05, "loss": 0.8978, "step": 4542 }, { "epoch": 1.3789649415692822, "grad_norm": 0.5136808156967163, "learning_rate": 7.246051032806805e-05, "loss": 1.3164, "step": 4543 }, { "epoch": 1.3792684777659736, "grad_norm": 0.5180895328521729, "learning_rate": 7.245443499392466e-05, "loss": 1.7857, "step": 4544 }, { "epoch": 1.379572013962665, "grad_norm": 0.6461337208747864, "learning_rate": 7.24483596597813e-05, "loss": 1.1507, "step": 4545 }, { "epoch": 1.3798755501593565, "grad_norm": 0.5065274834632874, "learning_rate": 7.244228432563791e-05, "loss": 2.0678, "step": 4546 }, { "epoch": 1.380179086356048, "grad_norm": 0.5826600790023804, "learning_rate": 7.243620899149453e-05, "loss": 1.8561, "step": 4547 }, { "epoch": 1.3804826225527393, "grad_norm": 0.8745676279067993, "learning_rate": 7.243013365735116e-05, "loss": 1.3648, "step": 4548 }, { "epoch": 1.380786158749431, "grad_norm": 0.5953565239906311, "learning_rate": 7.242405832320778e-05, "loss": 1.7645, "step": 4549 }, { "epoch": 1.3810896949461222, "grad_norm": 0.5085800290107727, "learning_rate": 7.241798298906441e-05, "loss": 1.5934, "step": 4550 }, { "epoch": 1.3813932311428139, "grad_norm": 0.5552278161048889, "learning_rate": 7.241190765492103e-05, "loss": 1.8317, "step": 4551 }, { "epoch": 1.3816967673395053, "grad_norm": 0.7417198419570923, "learning_rate": 7.240583232077764e-05, "loss": 1.1408, "step": 4552 }, { "epoch": 1.3820003035361967, "grad_norm": 0.5697821378707886, "learning_rate": 7.239975698663427e-05, "loss": 1.7124, "step": 4553 }, { "epoch": 1.3823038397328882, "grad_norm": 0.49888259172439575, "learning_rate": 7.239368165249089e-05, "loss": 1.3308, "step": 4554 }, { "epoch": 1.3826073759295796, "grad_norm": 0.7289189100265503, "learning_rate": 7.238760631834751e-05, "loss": 1.4048, "step": 4555 }, { "epoch": 1.382910912126271, "grad_norm": 0.49783867597579956, "learning_rate": 7.238153098420414e-05, "loss": 1.7252, "step": 4556 }, { "epoch": 1.3832144483229625, "grad_norm": 0.5540681481361389, "learning_rate": 7.237545565006076e-05, "loss": 1.456, "step": 4557 }, { "epoch": 1.383517984519654, "grad_norm": 0.5310572385787964, "learning_rate": 7.236938031591737e-05, "loss": 2.0569, "step": 4558 }, { "epoch": 1.3838215207163453, "grad_norm": 0.6051456332206726, "learning_rate": 7.2363304981774e-05, "loss": 1.6324, "step": 4559 }, { "epoch": 1.384125056913037, "grad_norm": 0.39043620228767395, "learning_rate": 7.235722964763062e-05, "loss": 0.9041, "step": 4560 }, { "epoch": 1.3844285931097282, "grad_norm": 0.518295168876648, "learning_rate": 7.235115431348724e-05, "loss": 1.509, "step": 4561 }, { "epoch": 1.3847321293064199, "grad_norm": 0.4797629415988922, "learning_rate": 7.234507897934387e-05, "loss": 1.8273, "step": 4562 }, { "epoch": 1.3850356655031113, "grad_norm": 0.5168799757957458, "learning_rate": 7.233900364520049e-05, "loss": 1.7992, "step": 4563 }, { "epoch": 1.3853392016998027, "grad_norm": 0.4868592619895935, "learning_rate": 7.233292831105712e-05, "loss": 2.075, "step": 4564 }, { "epoch": 1.3856427378964942, "grad_norm": 0.52605140209198, "learning_rate": 7.232685297691372e-05, "loss": 1.7701, "step": 4565 }, { "epoch": 1.3859462740931856, "grad_norm": 0.5345576405525208, "learning_rate": 7.232077764277035e-05, "loss": 1.3641, "step": 4566 }, { "epoch": 1.386249810289877, "grad_norm": 0.5572211742401123, "learning_rate": 7.231470230862698e-05, "loss": 1.5284, "step": 4567 }, { "epoch": 1.3865533464865685, "grad_norm": 0.5272506475448608, "learning_rate": 7.23086269744836e-05, "loss": 1.4096, "step": 4568 }, { "epoch": 1.38685688268326, "grad_norm": 17.82926368713379, "learning_rate": 7.230255164034022e-05, "loss": 1.8241, "step": 4569 }, { "epoch": 1.3871604188799513, "grad_norm": 2.107409715652466, "learning_rate": 7.229647630619685e-05, "loss": 2.2869, "step": 4570 }, { "epoch": 1.387463955076643, "grad_norm": 0.4298652708530426, "learning_rate": 7.229040097205347e-05, "loss": 1.2312, "step": 4571 }, { "epoch": 1.3877674912733342, "grad_norm": 0.5333446860313416, "learning_rate": 7.228432563791008e-05, "loss": 1.3574, "step": 4572 }, { "epoch": 1.3880710274700259, "grad_norm": 0.465961217880249, "learning_rate": 7.227825030376672e-05, "loss": 1.8196, "step": 4573 }, { "epoch": 1.3883745636667173, "grad_norm": 0.5217798948287964, "learning_rate": 7.227217496962333e-05, "loss": 1.8073, "step": 4574 }, { "epoch": 1.3886780998634087, "grad_norm": 1.037131428718567, "learning_rate": 7.226609963547995e-05, "loss": 1.6744, "step": 4575 }, { "epoch": 1.3889816360601002, "grad_norm": 0.8267009854316711, "learning_rate": 7.226002430133658e-05, "loss": 1.8694, "step": 4576 }, { "epoch": 1.3892851722567916, "grad_norm": 0.6034876108169556, "learning_rate": 7.22539489671932e-05, "loss": 1.6975, "step": 4577 }, { "epoch": 1.389588708453483, "grad_norm": 0.4525824189186096, "learning_rate": 7.224787363304983e-05, "loss": 1.994, "step": 4578 }, { "epoch": 1.3898922446501745, "grad_norm": 0.6136592626571655, "learning_rate": 7.224179829890643e-05, "loss": 1.7241, "step": 4579 }, { "epoch": 1.3901957808468661, "grad_norm": 0.6351958513259888, "learning_rate": 7.223572296476306e-05, "loss": 1.722, "step": 4580 }, { "epoch": 1.3904993170435573, "grad_norm": 0.4822506010532379, "learning_rate": 7.22296476306197e-05, "loss": 1.5643, "step": 4581 }, { "epoch": 1.390802853240249, "grad_norm": 0.5029870271682739, "learning_rate": 7.222357229647631e-05, "loss": 1.6093, "step": 4582 }, { "epoch": 1.3911063894369404, "grad_norm": 0.45101577043533325, "learning_rate": 7.221749696233293e-05, "loss": 1.6051, "step": 4583 }, { "epoch": 1.3914099256336319, "grad_norm": 0.8177331686019897, "learning_rate": 7.221142162818956e-05, "loss": 1.5745, "step": 4584 }, { "epoch": 1.3917134618303233, "grad_norm": 0.3580580949783325, "learning_rate": 7.220534629404618e-05, "loss": 1.3952, "step": 4585 }, { "epoch": 1.3920169980270147, "grad_norm": 0.48784148693084717, "learning_rate": 7.21992709599028e-05, "loss": 1.3858, "step": 4586 }, { "epoch": 1.3923205342237062, "grad_norm": 0.4869060516357422, "learning_rate": 7.219319562575943e-05, "loss": 1.477, "step": 4587 }, { "epoch": 1.3926240704203976, "grad_norm": 0.5329310297966003, "learning_rate": 7.218712029161604e-05, "loss": 1.8107, "step": 4588 }, { "epoch": 1.392927606617089, "grad_norm": 0.5233326554298401, "learning_rate": 7.218104495747266e-05, "loss": 1.6642, "step": 4589 }, { "epoch": 1.3932311428137805, "grad_norm": 0.6266956925392151, "learning_rate": 7.217496962332929e-05, "loss": 1.7805, "step": 4590 }, { "epoch": 1.3935346790104721, "grad_norm": 0.5174371004104614, "learning_rate": 7.216889428918591e-05, "loss": 1.4675, "step": 4591 }, { "epoch": 1.3938382152071633, "grad_norm": 0.5754081606864929, "learning_rate": 7.216281895504254e-05, "loss": 2.0404, "step": 4592 }, { "epoch": 1.394141751403855, "grad_norm": 0.4270972013473511, "learning_rate": 7.215674362089914e-05, "loss": 1.4554, "step": 4593 }, { "epoch": 1.3944452876005464, "grad_norm": 0.604895830154419, "learning_rate": 7.215066828675577e-05, "loss": 1.4743, "step": 4594 }, { "epoch": 1.3947488237972379, "grad_norm": 0.5406295657157898, "learning_rate": 7.21445929526124e-05, "loss": 1.5569, "step": 4595 }, { "epoch": 1.3950523599939293, "grad_norm": 0.5292085409164429, "learning_rate": 7.213851761846902e-05, "loss": 1.4687, "step": 4596 }, { "epoch": 1.3953558961906207, "grad_norm": 0.5510256886482239, "learning_rate": 7.213244228432564e-05, "loss": 1.4113, "step": 4597 }, { "epoch": 1.3956594323873122, "grad_norm": 0.5336708426475525, "learning_rate": 7.212636695018227e-05, "loss": 1.6209, "step": 4598 }, { "epoch": 1.3959629685840036, "grad_norm": 0.5480448603630066, "learning_rate": 7.212029161603889e-05, "loss": 1.613, "step": 4599 }, { "epoch": 1.396266504780695, "grad_norm": 0.49696651101112366, "learning_rate": 7.21142162818955e-05, "loss": 1.5235, "step": 4600 }, { "epoch": 1.3965700409773865, "grad_norm": 0.5844339728355408, "learning_rate": 7.210814094775214e-05, "loss": 1.6704, "step": 4601 }, { "epoch": 1.3968735771740781, "grad_norm": 0.5771584510803223, "learning_rate": 7.210206561360875e-05, "loss": 1.3323, "step": 4602 }, { "epoch": 1.3971771133707693, "grad_norm": 0.5130957365036011, "learning_rate": 7.209599027946537e-05, "loss": 1.4892, "step": 4603 }, { "epoch": 1.397480649567461, "grad_norm": 0.47314324975013733, "learning_rate": 7.2089914945322e-05, "loss": 1.7793, "step": 4604 }, { "epoch": 1.3977841857641524, "grad_norm": 0.4471718966960907, "learning_rate": 7.208383961117862e-05, "loss": 1.7329, "step": 4605 }, { "epoch": 1.3980877219608439, "grad_norm": 0.5555965900421143, "learning_rate": 7.207776427703525e-05, "loss": 1.574, "step": 4606 }, { "epoch": 1.3983912581575353, "grad_norm": 0.5500729084014893, "learning_rate": 7.207168894289185e-05, "loss": 1.5674, "step": 4607 }, { "epoch": 1.3986947943542267, "grad_norm": 0.5009059906005859, "learning_rate": 7.206561360874848e-05, "loss": 1.7574, "step": 4608 }, { "epoch": 1.3989983305509182, "grad_norm": 0.5441694259643555, "learning_rate": 7.205953827460511e-05, "loss": 1.4509, "step": 4609 }, { "epoch": 1.3993018667476096, "grad_norm": 0.4883042573928833, "learning_rate": 7.205346294046172e-05, "loss": 1.4991, "step": 4610 }, { "epoch": 1.399605402944301, "grad_norm": 0.5141351222991943, "learning_rate": 7.204738760631835e-05, "loss": 1.4975, "step": 4611 }, { "epoch": 1.3999089391409925, "grad_norm": 0.5896480083465576, "learning_rate": 7.204131227217498e-05, "loss": 1.4992, "step": 4612 }, { "epoch": 1.4002124753376841, "grad_norm": 0.7314833402633667, "learning_rate": 7.20352369380316e-05, "loss": 0.8182, "step": 4613 }, { "epoch": 1.4005160115343755, "grad_norm": 0.5311540961265564, "learning_rate": 7.202916160388821e-05, "loss": 1.5784, "step": 4614 }, { "epoch": 1.400819547731067, "grad_norm": 0.5259482264518738, "learning_rate": 7.202308626974485e-05, "loss": 1.6322, "step": 4615 }, { "epoch": 1.4011230839277584, "grad_norm": 0.5433558821678162, "learning_rate": 7.201701093560146e-05, "loss": 1.3337, "step": 4616 }, { "epoch": 1.4014266201244499, "grad_norm": 0.6455905437469482, "learning_rate": 7.201093560145808e-05, "loss": 1.9787, "step": 4617 }, { "epoch": 1.4017301563211413, "grad_norm": 0.5605013966560364, "learning_rate": 7.200486026731471e-05, "loss": 1.4333, "step": 4618 }, { "epoch": 1.4020336925178327, "grad_norm": 0.5594799518585205, "learning_rate": 7.199878493317133e-05, "loss": 1.7032, "step": 4619 }, { "epoch": 1.4023372287145242, "grad_norm": 0.49050381779670715, "learning_rate": 7.199270959902796e-05, "loss": 1.8006, "step": 4620 }, { "epoch": 1.4026407649112156, "grad_norm": 0.601580798625946, "learning_rate": 7.198663426488456e-05, "loss": 1.5474, "step": 4621 }, { "epoch": 1.4029443011079072, "grad_norm": 0.6719093918800354, "learning_rate": 7.19805589307412e-05, "loss": 1.6041, "step": 4622 }, { "epoch": 1.4032478373045985, "grad_norm": 0.5384756922721863, "learning_rate": 7.197448359659782e-05, "loss": 1.5229, "step": 4623 }, { "epoch": 1.4035513735012901, "grad_norm": 0.52190101146698, "learning_rate": 7.196840826245443e-05, "loss": 1.7249, "step": 4624 }, { "epoch": 1.4038549096979815, "grad_norm": 0.47463634610176086, "learning_rate": 7.196233292831106e-05, "loss": 1.6262, "step": 4625 }, { "epoch": 1.404158445894673, "grad_norm": 0.48097196221351624, "learning_rate": 7.195625759416769e-05, "loss": 1.7884, "step": 4626 }, { "epoch": 1.4044619820913644, "grad_norm": 0.42945027351379395, "learning_rate": 7.195018226002431e-05, "loss": 1.2333, "step": 4627 }, { "epoch": 1.4047655182880558, "grad_norm": 0.49355918169021606, "learning_rate": 7.194410692588092e-05, "loss": 1.4478, "step": 4628 }, { "epoch": 1.4050690544847473, "grad_norm": 0.5966702699661255, "learning_rate": 7.193803159173756e-05, "loss": 1.0146, "step": 4629 }, { "epoch": 1.4053725906814387, "grad_norm": 0.524314820766449, "learning_rate": 7.193195625759417e-05, "loss": 1.6238, "step": 4630 }, { "epoch": 1.4056761268781301, "grad_norm": 0.4699951708316803, "learning_rate": 7.192588092345079e-05, "loss": 1.3799, "step": 4631 }, { "epoch": 1.4059796630748216, "grad_norm": 0.531152069568634, "learning_rate": 7.191980558930742e-05, "loss": 2.0555, "step": 4632 }, { "epoch": 1.4062831992715132, "grad_norm": 0.5721887946128845, "learning_rate": 7.191373025516404e-05, "loss": 1.339, "step": 4633 }, { "epoch": 1.4065867354682045, "grad_norm": 0.39430439472198486, "learning_rate": 7.190765492102066e-05, "loss": 1.4839, "step": 4634 }, { "epoch": 1.406890271664896, "grad_norm": 0.6990336775779724, "learning_rate": 7.190157958687727e-05, "loss": 1.6197, "step": 4635 }, { "epoch": 1.4071938078615875, "grad_norm": 0.4952123463153839, "learning_rate": 7.18955042527339e-05, "loss": 1.6727, "step": 4636 }, { "epoch": 1.407497344058279, "grad_norm": 0.5764554738998413, "learning_rate": 7.188942891859053e-05, "loss": 2.0462, "step": 4637 }, { "epoch": 1.4078008802549704, "grad_norm": 0.6098769903182983, "learning_rate": 7.188335358444714e-05, "loss": 1.8601, "step": 4638 }, { "epoch": 1.4081044164516618, "grad_norm": 0.6008402109146118, "learning_rate": 7.187727825030377e-05, "loss": 1.6561, "step": 4639 }, { "epoch": 1.4084079526483533, "grad_norm": 0.5050938129425049, "learning_rate": 7.18712029161604e-05, "loss": 1.8548, "step": 4640 }, { "epoch": 1.4087114888450447, "grad_norm": 0.5040962100028992, "learning_rate": 7.186512758201702e-05, "loss": 1.8083, "step": 4641 }, { "epoch": 1.4090150250417361, "grad_norm": 0.5517044067382812, "learning_rate": 7.185905224787363e-05, "loss": 1.1551, "step": 4642 }, { "epoch": 1.4093185612384276, "grad_norm": 0.562538743019104, "learning_rate": 7.185297691373027e-05, "loss": 1.4542, "step": 4643 }, { "epoch": 1.4096220974351192, "grad_norm": 0.9561034440994263, "learning_rate": 7.184690157958688e-05, "loss": 1.39, "step": 4644 }, { "epoch": 1.4099256336318107, "grad_norm": 0.5557636618614197, "learning_rate": 7.18408262454435e-05, "loss": 1.9666, "step": 4645 }, { "epoch": 1.410229169828502, "grad_norm": 0.5418885350227356, "learning_rate": 7.183475091130013e-05, "loss": 1.4215, "step": 4646 }, { "epoch": 1.4105327060251935, "grad_norm": 0.4758126139640808, "learning_rate": 7.182867557715675e-05, "loss": 1.6912, "step": 4647 }, { "epoch": 1.410836242221885, "grad_norm": 0.5334361791610718, "learning_rate": 7.182260024301337e-05, "loss": 1.645, "step": 4648 }, { "epoch": 1.4111397784185764, "grad_norm": 0.5273361802101135, "learning_rate": 7.181652490886998e-05, "loss": 1.5353, "step": 4649 }, { "epoch": 1.4114433146152678, "grad_norm": 0.5110300183296204, "learning_rate": 7.181044957472661e-05, "loss": 1.952, "step": 4650 }, { "epoch": 1.4117468508119593, "grad_norm": 0.49933740496635437, "learning_rate": 7.180437424058324e-05, "loss": 1.8396, "step": 4651 }, { "epoch": 1.4120503870086507, "grad_norm": 0.6788985133171082, "learning_rate": 7.179829890643985e-05, "loss": 1.8449, "step": 4652 }, { "epoch": 1.4123539232053424, "grad_norm": 0.5057569146156311, "learning_rate": 7.179222357229648e-05, "loss": 1.832, "step": 4653 }, { "epoch": 1.4126574594020336, "grad_norm": 0.44517782330513, "learning_rate": 7.178614823815311e-05, "loss": 1.6484, "step": 4654 }, { "epoch": 1.4129609955987252, "grad_norm": 0.557269275188446, "learning_rate": 7.178007290400973e-05, "loss": 1.3732, "step": 4655 }, { "epoch": 1.4132645317954167, "grad_norm": 0.47099003195762634, "learning_rate": 7.177399756986634e-05, "loss": 1.8153, "step": 4656 }, { "epoch": 1.413568067992108, "grad_norm": 0.5688049793243408, "learning_rate": 7.176792223572298e-05, "loss": 1.5837, "step": 4657 }, { "epoch": 1.4138716041887995, "grad_norm": 0.4637562930583954, "learning_rate": 7.176184690157959e-05, "loss": 1.0759, "step": 4658 }, { "epoch": 1.414175140385491, "grad_norm": 0.5533589720726013, "learning_rate": 7.175577156743621e-05, "loss": 1.8164, "step": 4659 }, { "epoch": 1.4144786765821824, "grad_norm": 0.5231189727783203, "learning_rate": 7.174969623329283e-05, "loss": 1.6957, "step": 4660 }, { "epoch": 1.4147822127788738, "grad_norm": 0.49182331562042236, "learning_rate": 7.174362089914946e-05, "loss": 1.776, "step": 4661 }, { "epoch": 1.4150857489755653, "grad_norm": 0.5737461447715759, "learning_rate": 7.173754556500608e-05, "loss": 1.2501, "step": 4662 }, { "epoch": 1.4153892851722567, "grad_norm": 0.4491937756538391, "learning_rate": 7.173147023086269e-05, "loss": 1.1878, "step": 4663 }, { "epoch": 1.4156928213689484, "grad_norm": 0.6204633712768555, "learning_rate": 7.172539489671932e-05, "loss": 1.2013, "step": 4664 }, { "epoch": 1.4159963575656396, "grad_norm": 0.4740472435951233, "learning_rate": 7.171931956257595e-05, "loss": 2.0967, "step": 4665 }, { "epoch": 1.4162998937623312, "grad_norm": 0.42531633377075195, "learning_rate": 7.171324422843256e-05, "loss": 1.247, "step": 4666 }, { "epoch": 1.4166034299590227, "grad_norm": 0.7472957968711853, "learning_rate": 7.170716889428919e-05, "loss": 1.5081, "step": 4667 }, { "epoch": 1.416906966155714, "grad_norm": 0.5398672223091125, "learning_rate": 7.170109356014582e-05, "loss": 2.0357, "step": 4668 }, { "epoch": 1.4172105023524055, "grad_norm": 0.4912889301776886, "learning_rate": 7.169501822600244e-05, "loss": 1.4911, "step": 4669 }, { "epoch": 1.417514038549097, "grad_norm": 0.5321101546287537, "learning_rate": 7.168894289185905e-05, "loss": 1.8128, "step": 4670 }, { "epoch": 1.4178175747457884, "grad_norm": 0.5869212746620178, "learning_rate": 7.168286755771569e-05, "loss": 1.5963, "step": 4671 }, { "epoch": 1.4181211109424798, "grad_norm": 0.5631369948387146, "learning_rate": 7.16767922235723e-05, "loss": 1.6133, "step": 4672 }, { "epoch": 1.4184246471391713, "grad_norm": 0.6509801149368286, "learning_rate": 7.167071688942892e-05, "loss": 1.5701, "step": 4673 }, { "epoch": 1.4187281833358627, "grad_norm": 0.5732050538063049, "learning_rate": 7.166464155528554e-05, "loss": 1.4822, "step": 4674 }, { "epoch": 1.4190317195325544, "grad_norm": 0.5432000756263733, "learning_rate": 7.165856622114217e-05, "loss": 1.6423, "step": 4675 }, { "epoch": 1.4193352557292458, "grad_norm": 0.4711626172065735, "learning_rate": 7.165249088699879e-05, "loss": 1.8018, "step": 4676 }, { "epoch": 1.4196387919259372, "grad_norm": 0.6901485323905945, "learning_rate": 7.16464155528554e-05, "loss": 1.3306, "step": 4677 }, { "epoch": 1.4199423281226287, "grad_norm": 0.4604353606700897, "learning_rate": 7.164034021871203e-05, "loss": 1.6195, "step": 4678 }, { "epoch": 1.42024586431932, "grad_norm": 0.5561215281486511, "learning_rate": 7.163426488456866e-05, "loss": 1.4401, "step": 4679 }, { "epoch": 1.4205494005160115, "grad_norm": 0.566388726234436, "learning_rate": 7.162818955042527e-05, "loss": 1.5088, "step": 4680 }, { "epoch": 1.420852936712703, "grad_norm": 0.4941454827785492, "learning_rate": 7.16221142162819e-05, "loss": 1.7022, "step": 4681 }, { "epoch": 1.4211564729093944, "grad_norm": 0.5353823900222778, "learning_rate": 7.161603888213853e-05, "loss": 1.4053, "step": 4682 }, { "epoch": 1.4214600091060858, "grad_norm": 0.5283215045928955, "learning_rate": 7.160996354799513e-05, "loss": 1.9388, "step": 4683 }, { "epoch": 1.4217635453027775, "grad_norm": 0.46900656819343567, "learning_rate": 7.160388821385176e-05, "loss": 1.5299, "step": 4684 }, { "epoch": 1.4220670814994687, "grad_norm": 0.5333936810493469, "learning_rate": 7.15978128797084e-05, "loss": 1.5468, "step": 4685 }, { "epoch": 1.4223706176961604, "grad_norm": 0.4555998742580414, "learning_rate": 7.159173754556501e-05, "loss": 1.677, "step": 4686 }, { "epoch": 1.4226741538928518, "grad_norm": 0.5287268161773682, "learning_rate": 7.158566221142163e-05, "loss": 1.5348, "step": 4687 }, { "epoch": 1.4229776900895432, "grad_norm": 0.489156037569046, "learning_rate": 7.157958687727825e-05, "loss": 1.8835, "step": 4688 }, { "epoch": 1.4232812262862347, "grad_norm": 0.5219280123710632, "learning_rate": 7.157351154313488e-05, "loss": 1.8132, "step": 4689 }, { "epoch": 1.423584762482926, "grad_norm": 0.5453715920448303, "learning_rate": 7.15674362089915e-05, "loss": 1.4375, "step": 4690 }, { "epoch": 1.4238882986796175, "grad_norm": 0.5377395749092102, "learning_rate": 7.156136087484811e-05, "loss": 1.4851, "step": 4691 }, { "epoch": 1.424191834876309, "grad_norm": 0.5290038585662842, "learning_rate": 7.155528554070474e-05, "loss": 1.9343, "step": 4692 }, { "epoch": 1.4244953710730004, "grad_norm": 0.5442641973495483, "learning_rate": 7.154921020656137e-05, "loss": 1.3488, "step": 4693 }, { "epoch": 1.4247989072696918, "grad_norm": 0.6024147272109985, "learning_rate": 7.154313487241798e-05, "loss": 1.6288, "step": 4694 }, { "epoch": 1.4251024434663835, "grad_norm": 0.5711460113525391, "learning_rate": 7.153705953827461e-05, "loss": 1.3639, "step": 4695 }, { "epoch": 1.4254059796630747, "grad_norm": 0.5421019196510315, "learning_rate": 7.153098420413124e-05, "loss": 1.923, "step": 4696 }, { "epoch": 1.4257095158597664, "grad_norm": 0.5579157471656799, "learning_rate": 7.152490886998784e-05, "loss": 1.5599, "step": 4697 }, { "epoch": 1.4260130520564578, "grad_norm": 0.515901505947113, "learning_rate": 7.151883353584447e-05, "loss": 1.6635, "step": 4698 }, { "epoch": 1.4263165882531492, "grad_norm": 0.5151125192642212, "learning_rate": 7.15127582017011e-05, "loss": 1.7712, "step": 4699 }, { "epoch": 1.4266201244498407, "grad_norm": 0.4949184060096741, "learning_rate": 7.150668286755772e-05, "loss": 1.9386, "step": 4700 }, { "epoch": 1.426923660646532, "grad_norm": 0.5237774848937988, "learning_rate": 7.150060753341434e-05, "loss": 1.6551, "step": 4701 }, { "epoch": 1.4272271968432235, "grad_norm": 0.45499083399772644, "learning_rate": 7.149453219927096e-05, "loss": 1.5892, "step": 4702 }, { "epoch": 1.427530733039915, "grad_norm": 0.5620893239974976, "learning_rate": 7.148845686512759e-05, "loss": 1.5941, "step": 4703 }, { "epoch": 1.4278342692366064, "grad_norm": 0.5575366616249084, "learning_rate": 7.14823815309842e-05, "loss": 1.7663, "step": 4704 }, { "epoch": 1.4281378054332978, "grad_norm": 0.5817383527755737, "learning_rate": 7.147630619684082e-05, "loss": 1.0529, "step": 4705 }, { "epoch": 1.4284413416299895, "grad_norm": 0.4917854964733124, "learning_rate": 7.147023086269745e-05, "loss": 1.8866, "step": 4706 }, { "epoch": 1.428744877826681, "grad_norm": 0.5297862887382507, "learning_rate": 7.146415552855407e-05, "loss": 1.9065, "step": 4707 }, { "epoch": 1.4290484140233723, "grad_norm": 0.4822104275226593, "learning_rate": 7.145808019441069e-05, "loss": 2.1447, "step": 4708 }, { "epoch": 1.4293519502200638, "grad_norm": 0.5464652180671692, "learning_rate": 7.145200486026732e-05, "loss": 1.5848, "step": 4709 }, { "epoch": 1.4296554864167552, "grad_norm": 0.541857123374939, "learning_rate": 7.144592952612395e-05, "loss": 1.8056, "step": 4710 }, { "epoch": 1.4299590226134467, "grad_norm": 0.5261391401290894, "learning_rate": 7.143985419198055e-05, "loss": 1.6894, "step": 4711 }, { "epoch": 1.430262558810138, "grad_norm": 0.6953042149543762, "learning_rate": 7.143377885783718e-05, "loss": 1.5562, "step": 4712 }, { "epoch": 1.4305660950068295, "grad_norm": 1.253004789352417, "learning_rate": 7.142770352369382e-05, "loss": 1.6115, "step": 4713 }, { "epoch": 1.430869631203521, "grad_norm": 0.46701014041900635, "learning_rate": 7.142162818955043e-05, "loss": 1.8057, "step": 4714 }, { "epoch": 1.4311731674002126, "grad_norm": 0.4235196113586426, "learning_rate": 7.141555285540705e-05, "loss": 1.6542, "step": 4715 }, { "epoch": 1.4314767035969038, "grad_norm": 0.6919435858726501, "learning_rate": 7.140947752126367e-05, "loss": 1.8055, "step": 4716 }, { "epoch": 1.4317802397935955, "grad_norm": 0.5037835836410522, "learning_rate": 7.14034021871203e-05, "loss": 1.7444, "step": 4717 }, { "epoch": 1.432083775990287, "grad_norm": 0.5179030299186707, "learning_rate": 7.139732685297692e-05, "loss": 0.9057, "step": 4718 }, { "epoch": 1.4323873121869783, "grad_norm": 0.4623833894729614, "learning_rate": 7.139125151883353e-05, "loss": 1.8318, "step": 4719 }, { "epoch": 1.4326908483836698, "grad_norm": 0.6244356632232666, "learning_rate": 7.138517618469016e-05, "loss": 1.7593, "step": 4720 }, { "epoch": 1.4329943845803612, "grad_norm": 0.41617628931999207, "learning_rate": 7.137910085054678e-05, "loss": 1.3169, "step": 4721 }, { "epoch": 1.4332979207770526, "grad_norm": 0.5794664025306702, "learning_rate": 7.13730255164034e-05, "loss": 1.4539, "step": 4722 }, { "epoch": 1.433601456973744, "grad_norm": 0.5115159749984741, "learning_rate": 7.136695018226003e-05, "loss": 1.5652, "step": 4723 }, { "epoch": 1.4339049931704355, "grad_norm": 0.5601288080215454, "learning_rate": 7.136087484811666e-05, "loss": 1.3252, "step": 4724 }, { "epoch": 1.434208529367127, "grad_norm": 0.5465983152389526, "learning_rate": 7.135479951397326e-05, "loss": 1.8357, "step": 4725 }, { "epoch": 1.4345120655638186, "grad_norm": 0.5939611792564392, "learning_rate": 7.13487241798299e-05, "loss": 1.1053, "step": 4726 }, { "epoch": 1.4348156017605098, "grad_norm": 0.563441276550293, "learning_rate": 7.134264884568653e-05, "loss": 1.9001, "step": 4727 }, { "epoch": 1.4351191379572015, "grad_norm": 0.5361672043800354, "learning_rate": 7.133657351154314e-05, "loss": 1.4157, "step": 4728 }, { "epoch": 1.435422674153893, "grad_norm": 0.4924670159816742, "learning_rate": 7.133049817739976e-05, "loss": 1.7192, "step": 4729 }, { "epoch": 1.4357262103505843, "grad_norm": 0.5592833757400513, "learning_rate": 7.132442284325638e-05, "loss": 1.4881, "step": 4730 }, { "epoch": 1.4360297465472758, "grad_norm": 0.47412756085395813, "learning_rate": 7.131834750911301e-05, "loss": 2.1238, "step": 4731 }, { "epoch": 1.4363332827439672, "grad_norm": 0.5320108532905579, "learning_rate": 7.131227217496963e-05, "loss": 1.4834, "step": 4732 }, { "epoch": 1.4366368189406586, "grad_norm": 0.4866894483566284, "learning_rate": 7.130619684082624e-05, "loss": 1.6534, "step": 4733 }, { "epoch": 1.43694035513735, "grad_norm": 0.4984651207923889, "learning_rate": 7.130012150668287e-05, "loss": 1.576, "step": 4734 }, { "epoch": 1.4372438913340415, "grad_norm": 0.4538702368736267, "learning_rate": 7.129404617253949e-05, "loss": 1.8083, "step": 4735 }, { "epoch": 1.437547427530733, "grad_norm": 0.5471993684768677, "learning_rate": 7.128797083839611e-05, "loss": 1.5235, "step": 4736 }, { "epoch": 1.4378509637274246, "grad_norm": 0.4857175350189209, "learning_rate": 7.128189550425274e-05, "loss": 1.8666, "step": 4737 }, { "epoch": 1.4381544999241158, "grad_norm": 0.4341141879558563, "learning_rate": 7.127582017010937e-05, "loss": 0.8605, "step": 4738 }, { "epoch": 1.4384580361208075, "grad_norm": 0.5297701954841614, "learning_rate": 7.126974483596597e-05, "loss": 2.0106, "step": 4739 }, { "epoch": 1.438761572317499, "grad_norm": 0.44098925590515137, "learning_rate": 7.12636695018226e-05, "loss": 1.3884, "step": 4740 }, { "epoch": 1.4390651085141903, "grad_norm": 0.6269996762275696, "learning_rate": 7.125759416767922e-05, "loss": 1.7329, "step": 4741 }, { "epoch": 1.4393686447108818, "grad_norm": 0.4201589524745941, "learning_rate": 7.125151883353585e-05, "loss": 1.908, "step": 4742 }, { "epoch": 1.4396721809075732, "grad_norm": 0.49259108304977417, "learning_rate": 7.124544349939247e-05, "loss": 1.688, "step": 4743 }, { "epoch": 1.4399757171042646, "grad_norm": 0.511873722076416, "learning_rate": 7.123936816524909e-05, "loss": 1.7732, "step": 4744 }, { "epoch": 1.440279253300956, "grad_norm": 0.4922768771648407, "learning_rate": 7.123329283110572e-05, "loss": 1.721, "step": 4745 }, { "epoch": 1.4405827894976477, "grad_norm": 0.4309976398944855, "learning_rate": 7.122721749696234e-05, "loss": 1.722, "step": 4746 }, { "epoch": 1.440886325694339, "grad_norm": 0.5664550065994263, "learning_rate": 7.122114216281895e-05, "loss": 1.7154, "step": 4747 }, { "epoch": 1.4411898618910306, "grad_norm": 0.5660502910614014, "learning_rate": 7.121506682867558e-05, "loss": 1.6217, "step": 4748 }, { "epoch": 1.441493398087722, "grad_norm": 0.48245811462402344, "learning_rate": 7.12089914945322e-05, "loss": 1.7066, "step": 4749 }, { "epoch": 1.4417969342844135, "grad_norm": 0.5408527255058289, "learning_rate": 7.120291616038882e-05, "loss": 1.4672, "step": 4750 }, { "epoch": 1.442100470481105, "grad_norm": 0.542116105556488, "learning_rate": 7.119684082624545e-05, "loss": 1.842, "step": 4751 }, { "epoch": 1.4424040066777963, "grad_norm": 0.6647868752479553, "learning_rate": 7.119076549210208e-05, "loss": 1.6458, "step": 4752 }, { "epoch": 1.4427075428744878, "grad_norm": 0.5036889910697937, "learning_rate": 7.118469015795868e-05, "loss": 1.3188, "step": 4753 }, { "epoch": 1.4430110790711792, "grad_norm": 0.5360887050628662, "learning_rate": 7.117861482381531e-05, "loss": 1.5876, "step": 4754 }, { "epoch": 1.4433146152678706, "grad_norm": 0.45022231340408325, "learning_rate": 7.117253948967193e-05, "loss": 1.5266, "step": 4755 }, { "epoch": 1.443618151464562, "grad_norm": 0.5475982427597046, "learning_rate": 7.116646415552855e-05, "loss": 1.476, "step": 4756 }, { "epoch": 1.4439216876612537, "grad_norm": 0.615437924861908, "learning_rate": 7.116038882138518e-05, "loss": 1.6777, "step": 4757 }, { "epoch": 1.444225223857945, "grad_norm": 0.5551777482032776, "learning_rate": 7.11543134872418e-05, "loss": 1.4589, "step": 4758 }, { "epoch": 1.4445287600546366, "grad_norm": 0.636407196521759, "learning_rate": 7.114823815309843e-05, "loss": 2.1606, "step": 4759 }, { "epoch": 1.444832296251328, "grad_norm": 0.5781602263450623, "learning_rate": 7.114216281895505e-05, "loss": 1.3962, "step": 4760 }, { "epoch": 1.4451358324480195, "grad_norm": 0.612797737121582, "learning_rate": 7.113608748481166e-05, "loss": 1.5954, "step": 4761 }, { "epoch": 1.445439368644711, "grad_norm": 0.5090053677558899, "learning_rate": 7.11300121506683e-05, "loss": 1.8134, "step": 4762 }, { "epoch": 1.4457429048414023, "grad_norm": 0.49501726031303406, "learning_rate": 7.112393681652491e-05, "loss": 1.8646, "step": 4763 }, { "epoch": 1.4460464410380938, "grad_norm": 0.44436565041542053, "learning_rate": 7.111786148238153e-05, "loss": 1.431, "step": 4764 }, { "epoch": 1.4463499772347852, "grad_norm": 0.5765573978424072, "learning_rate": 7.111178614823816e-05, "loss": 1.6304, "step": 4765 }, { "epoch": 1.4466535134314766, "grad_norm": 0.5061953663825989, "learning_rate": 7.110571081409479e-05, "loss": 1.905, "step": 4766 }, { "epoch": 1.446957049628168, "grad_norm": 0.5362197756767273, "learning_rate": 7.10996354799514e-05, "loss": 1.8781, "step": 4767 }, { "epoch": 1.4472605858248597, "grad_norm": 0.5062169432640076, "learning_rate": 7.109356014580802e-05, "loss": 1.7436, "step": 4768 }, { "epoch": 1.447564122021551, "grad_norm": 0.4389554560184479, "learning_rate": 7.108748481166464e-05, "loss": 1.1959, "step": 4769 }, { "epoch": 1.4478676582182426, "grad_norm": 0.5079648494720459, "learning_rate": 7.108140947752126e-05, "loss": 1.8988, "step": 4770 }, { "epoch": 1.448171194414934, "grad_norm": 0.5056853294372559, "learning_rate": 7.107533414337789e-05, "loss": 1.3921, "step": 4771 }, { "epoch": 1.4484747306116255, "grad_norm": 0.4805181622505188, "learning_rate": 7.106925880923451e-05, "loss": 1.6137, "step": 4772 }, { "epoch": 1.448778266808317, "grad_norm": 0.43535923957824707, "learning_rate": 7.106318347509114e-05, "loss": 1.619, "step": 4773 }, { "epoch": 1.4490818030050083, "grad_norm": 0.5391502976417542, "learning_rate": 7.105710814094776e-05, "loss": 1.637, "step": 4774 }, { "epoch": 1.4493853392016998, "grad_norm": 0.569017767906189, "learning_rate": 7.105103280680437e-05, "loss": 1.6042, "step": 4775 }, { "epoch": 1.4496888753983912, "grad_norm": 0.5475939512252808, "learning_rate": 7.1044957472661e-05, "loss": 1.9324, "step": 4776 }, { "epoch": 1.4499924115950826, "grad_norm": 0.5549785494804382, "learning_rate": 7.103888213851762e-05, "loss": 1.7943, "step": 4777 }, { "epoch": 1.450295947791774, "grad_norm": 0.5545724034309387, "learning_rate": 7.103280680437424e-05, "loss": 1.4165, "step": 4778 }, { "epoch": 1.4505994839884657, "grad_norm": 0.4446735978126526, "learning_rate": 7.102673147023087e-05, "loss": 0.9686, "step": 4779 }, { "epoch": 1.4509030201851572, "grad_norm": 0.5210545063018799, "learning_rate": 7.102065613608749e-05, "loss": 1.802, "step": 4780 }, { "epoch": 1.4512065563818486, "grad_norm": 0.5372775793075562, "learning_rate": 7.10145808019441e-05, "loss": 1.8198, "step": 4781 }, { "epoch": 1.45151009257854, "grad_norm": 0.513157069683075, "learning_rate": 7.100850546780073e-05, "loss": 1.8384, "step": 4782 }, { "epoch": 1.4518136287752315, "grad_norm": 0.5085527896881104, "learning_rate": 7.100243013365735e-05, "loss": 1.4529, "step": 4783 }, { "epoch": 1.452117164971923, "grad_norm": 0.5514604449272156, "learning_rate": 7.099635479951397e-05, "loss": 1.4275, "step": 4784 }, { "epoch": 1.4524207011686143, "grad_norm": 0.6645312905311584, "learning_rate": 7.09902794653706e-05, "loss": 1.3341, "step": 4785 }, { "epoch": 1.4527242373653058, "grad_norm": 0.4445338249206543, "learning_rate": 7.098420413122722e-05, "loss": 1.8186, "step": 4786 }, { "epoch": 1.4530277735619972, "grad_norm": 0.528165340423584, "learning_rate": 7.097812879708385e-05, "loss": 1.4959, "step": 4787 }, { "epoch": 1.4533313097586888, "grad_norm": 0.5748735666275024, "learning_rate": 7.097205346294047e-05, "loss": 1.8661, "step": 4788 }, { "epoch": 1.45363484595538, "grad_norm": 0.4958101212978363, "learning_rate": 7.096597812879708e-05, "loss": 1.7738, "step": 4789 }, { "epoch": 1.4539383821520717, "grad_norm": 0.622242271900177, "learning_rate": 7.095990279465371e-05, "loss": 1.8896, "step": 4790 }, { "epoch": 1.4542419183487632, "grad_norm": 0.861436665058136, "learning_rate": 7.095382746051033e-05, "loss": 1.5506, "step": 4791 }, { "epoch": 1.4545454545454546, "grad_norm": 0.5317303538322449, "learning_rate": 7.094775212636695e-05, "loss": 1.9661, "step": 4792 }, { "epoch": 1.454848990742146, "grad_norm": 0.42405256628990173, "learning_rate": 7.094167679222358e-05, "loss": 1.8856, "step": 4793 }, { "epoch": 1.4551525269388375, "grad_norm": 0.5652716755867004, "learning_rate": 7.09356014580802e-05, "loss": 1.5874, "step": 4794 }, { "epoch": 1.4554560631355289, "grad_norm": 0.46071988344192505, "learning_rate": 7.092952612393681e-05, "loss": 1.7498, "step": 4795 }, { "epoch": 1.4557595993322203, "grad_norm": 0.5258306264877319, "learning_rate": 7.092345078979345e-05, "loss": 1.5423, "step": 4796 }, { "epoch": 1.4560631355289118, "grad_norm": 0.5660673975944519, "learning_rate": 7.091737545565006e-05, "loss": 1.4623, "step": 4797 }, { "epoch": 1.4563666717256032, "grad_norm": 0.5659142732620239, "learning_rate": 7.091130012150668e-05, "loss": 1.9051, "step": 4798 }, { "epoch": 1.4566702079222948, "grad_norm": 0.537135899066925, "learning_rate": 7.090522478736331e-05, "loss": 1.4598, "step": 4799 }, { "epoch": 1.456973744118986, "grad_norm": 0.5714205503463745, "learning_rate": 7.089914945321993e-05, "loss": 1.9371, "step": 4800 }, { "epoch": 1.4572772803156777, "grad_norm": 0.47099271416664124, "learning_rate": 7.089307411907656e-05, "loss": 2.0266, "step": 4801 }, { "epoch": 1.4575808165123691, "grad_norm": 0.46086302399635315, "learning_rate": 7.088699878493318e-05, "loss": 1.0404, "step": 4802 }, { "epoch": 1.4578843527090606, "grad_norm": 0.5183219313621521, "learning_rate": 7.08809234507898e-05, "loss": 1.4641, "step": 4803 }, { "epoch": 1.458187888905752, "grad_norm": 0.3814326226711273, "learning_rate": 7.087484811664642e-05, "loss": 1.1975, "step": 4804 }, { "epoch": 1.4584914251024435, "grad_norm": 0.5242198705673218, "learning_rate": 7.086877278250304e-05, "loss": 1.5483, "step": 4805 }, { "epoch": 1.4587949612991349, "grad_norm": 0.4313776195049286, "learning_rate": 7.086269744835966e-05, "loss": 1.7125, "step": 4806 }, { "epoch": 1.4590984974958263, "grad_norm": 0.5048007965087891, "learning_rate": 7.085662211421629e-05, "loss": 1.7111, "step": 4807 }, { "epoch": 1.4594020336925178, "grad_norm": 0.5413311123847961, "learning_rate": 7.085054678007291e-05, "loss": 1.3843, "step": 4808 }, { "epoch": 1.4597055698892092, "grad_norm": 0.4879262447357178, "learning_rate": 7.084447144592952e-05, "loss": 1.4675, "step": 4809 }, { "epoch": 1.4600091060859008, "grad_norm": 0.4626248776912689, "learning_rate": 7.083839611178616e-05, "loss": 1.8313, "step": 4810 }, { "epoch": 1.4603126422825923, "grad_norm": 0.4955083727836609, "learning_rate": 7.083232077764277e-05, "loss": 1.5718, "step": 4811 }, { "epoch": 1.4606161784792837, "grad_norm": 0.5106918215751648, "learning_rate": 7.082624544349939e-05, "loss": 1.8617, "step": 4812 }, { "epoch": 1.4609197146759751, "grad_norm": 0.5505479574203491, "learning_rate": 7.082017010935602e-05, "loss": 1.2744, "step": 4813 }, { "epoch": 1.4612232508726666, "grad_norm": 0.5480737686157227, "learning_rate": 7.081409477521264e-05, "loss": 1.6127, "step": 4814 }, { "epoch": 1.461526787069358, "grad_norm": 0.5669567584991455, "learning_rate": 7.080801944106927e-05, "loss": 1.4748, "step": 4815 }, { "epoch": 1.4618303232660494, "grad_norm": 0.47782737016677856, "learning_rate": 7.080194410692589e-05, "loss": 1.9775, "step": 4816 }, { "epoch": 1.4621338594627409, "grad_norm": 0.5649453401565552, "learning_rate": 7.07958687727825e-05, "loss": 1.7222, "step": 4817 }, { "epoch": 1.4624373956594323, "grad_norm": 0.5078287720680237, "learning_rate": 7.078979343863913e-05, "loss": 1.5271, "step": 4818 }, { "epoch": 1.462740931856124, "grad_norm": 0.5010936856269836, "learning_rate": 7.078371810449575e-05, "loss": 1.6845, "step": 4819 }, { "epoch": 1.4630444680528152, "grad_norm": 0.4189418852329254, "learning_rate": 7.077764277035237e-05, "loss": 1.6715, "step": 4820 }, { "epoch": 1.4633480042495068, "grad_norm": 0.46101582050323486, "learning_rate": 7.0771567436209e-05, "loss": 1.9044, "step": 4821 }, { "epoch": 1.4636515404461983, "grad_norm": 0.45041772723197937, "learning_rate": 7.076549210206562e-05, "loss": 1.666, "step": 4822 }, { "epoch": 1.4639550766428897, "grad_norm": 0.49406835436820984, "learning_rate": 7.075941676792223e-05, "loss": 1.8148, "step": 4823 }, { "epoch": 1.4642586128395811, "grad_norm": 0.5963855981826782, "learning_rate": 7.075334143377887e-05, "loss": 1.389, "step": 4824 }, { "epoch": 1.4645621490362726, "grad_norm": 0.6109672784805298, "learning_rate": 7.074726609963548e-05, "loss": 1.5109, "step": 4825 }, { "epoch": 1.464865685232964, "grad_norm": 0.627612829208374, "learning_rate": 7.07411907654921e-05, "loss": 1.721, "step": 4826 }, { "epoch": 1.4651692214296554, "grad_norm": 0.5766866207122803, "learning_rate": 7.073511543134873e-05, "loss": 1.6085, "step": 4827 }, { "epoch": 1.4654727576263469, "grad_norm": 0.5267140865325928, "learning_rate": 7.072904009720535e-05, "loss": 1.4195, "step": 4828 }, { "epoch": 1.4657762938230383, "grad_norm": 0.7349816560745239, "learning_rate": 7.072296476306197e-05, "loss": 1.2837, "step": 4829 }, { "epoch": 1.46607983001973, "grad_norm": 0.47294366359710693, "learning_rate": 7.07168894289186e-05, "loss": 1.6431, "step": 4830 }, { "epoch": 1.4663833662164212, "grad_norm": 0.7959756255149841, "learning_rate": 7.071081409477521e-05, "loss": 1.7113, "step": 4831 }, { "epoch": 1.4666869024131128, "grad_norm": 0.5271220803260803, "learning_rate": 7.070473876063184e-05, "loss": 1.7415, "step": 4832 }, { "epoch": 1.4669904386098043, "grad_norm": 0.5845800042152405, "learning_rate": 7.069866342648846e-05, "loss": 1.6222, "step": 4833 }, { "epoch": 1.4672939748064957, "grad_norm": 0.5617759227752686, "learning_rate": 7.069258809234508e-05, "loss": 1.3666, "step": 4834 }, { "epoch": 1.4675975110031871, "grad_norm": 0.489835262298584, "learning_rate": 7.068651275820171e-05, "loss": 1.6019, "step": 4835 }, { "epoch": 1.4679010471998786, "grad_norm": 0.6882174611091614, "learning_rate": 7.068043742405833e-05, "loss": 1.8398, "step": 4836 }, { "epoch": 1.46820458339657, "grad_norm": 0.5395167469978333, "learning_rate": 7.067436208991494e-05, "loss": 1.7218, "step": 4837 }, { "epoch": 1.4685081195932614, "grad_norm": 0.5546643137931824, "learning_rate": 7.066828675577158e-05, "loss": 1.6591, "step": 4838 }, { "epoch": 1.4688116557899529, "grad_norm": 0.4737721085548401, "learning_rate": 7.066221142162819e-05, "loss": 1.8764, "step": 4839 }, { "epoch": 1.4691151919866443, "grad_norm": 0.5098763108253479, "learning_rate": 7.065613608748481e-05, "loss": 1.4366, "step": 4840 }, { "epoch": 1.469418728183336, "grad_norm": 0.49989405274391174, "learning_rate": 7.065006075334144e-05, "loss": 1.5866, "step": 4841 }, { "epoch": 1.4697222643800274, "grad_norm": 0.4978291690349579, "learning_rate": 7.064398541919806e-05, "loss": 1.7242, "step": 4842 }, { "epoch": 1.4700258005767188, "grad_norm": 0.5850281715393066, "learning_rate": 7.063791008505468e-05, "loss": 1.5802, "step": 4843 }, { "epoch": 1.4703293367734103, "grad_norm": 0.806866466999054, "learning_rate": 7.06318347509113e-05, "loss": 1.3167, "step": 4844 }, { "epoch": 1.4706328729701017, "grad_norm": 0.7724255323410034, "learning_rate": 7.062575941676792e-05, "loss": 1.7092, "step": 4845 }, { "epoch": 1.4709364091667931, "grad_norm": 0.5280294418334961, "learning_rate": 7.061968408262455e-05, "loss": 1.4041, "step": 4846 }, { "epoch": 1.4712399453634846, "grad_norm": 0.5118871927261353, "learning_rate": 7.061360874848117e-05, "loss": 1.6343, "step": 4847 }, { "epoch": 1.471543481560176, "grad_norm": 0.6602824330329895, "learning_rate": 7.060753341433779e-05, "loss": 1.4313, "step": 4848 }, { "epoch": 1.4718470177568674, "grad_norm": 0.5540229082107544, "learning_rate": 7.060145808019442e-05, "loss": 1.2642, "step": 4849 }, { "epoch": 1.472150553953559, "grad_norm": 0.5670307278633118, "learning_rate": 7.059538274605104e-05, "loss": 1.7549, "step": 4850 }, { "epoch": 1.4724540901502503, "grad_norm": 0.5450428128242493, "learning_rate": 7.058930741190765e-05, "loss": 1.5367, "step": 4851 }, { "epoch": 1.472757626346942, "grad_norm": 0.5904386043548584, "learning_rate": 7.058323207776429e-05, "loss": 1.815, "step": 4852 }, { "epoch": 1.4730611625436334, "grad_norm": 0.5121384263038635, "learning_rate": 7.05771567436209e-05, "loss": 1.443, "step": 4853 }, { "epoch": 1.4733646987403248, "grad_norm": 0.49516379833221436, "learning_rate": 7.057108140947752e-05, "loss": 1.2342, "step": 4854 }, { "epoch": 1.4736682349370163, "grad_norm": 0.5796108245849609, "learning_rate": 7.056500607533415e-05, "loss": 1.4407, "step": 4855 }, { "epoch": 1.4739717711337077, "grad_norm": 0.5850707292556763, "learning_rate": 7.055893074119077e-05, "loss": 1.9238, "step": 4856 }, { "epoch": 1.4742753073303991, "grad_norm": 0.6012457013130188, "learning_rate": 7.055285540704739e-05, "loss": 1.9955, "step": 4857 }, { "epoch": 1.4745788435270906, "grad_norm": 0.5519123077392578, "learning_rate": 7.054678007290402e-05, "loss": 1.6369, "step": 4858 }, { "epoch": 1.474882379723782, "grad_norm": 0.7996104955673218, "learning_rate": 7.054070473876063e-05, "loss": 1.6808, "step": 4859 }, { "epoch": 1.4751859159204734, "grad_norm": 0.6222035884857178, "learning_rate": 7.053462940461726e-05, "loss": 1.5358, "step": 4860 }, { "epoch": 1.475489452117165, "grad_norm": 0.6133623719215393, "learning_rate": 7.052855407047388e-05, "loss": 1.1753, "step": 4861 }, { "epoch": 1.4757929883138563, "grad_norm": 0.5412503480911255, "learning_rate": 7.05224787363305e-05, "loss": 1.344, "step": 4862 }, { "epoch": 1.476096524510548, "grad_norm": 0.490725576877594, "learning_rate": 7.051640340218713e-05, "loss": 1.3589, "step": 4863 }, { "epoch": 1.4764000607072394, "grad_norm": 0.5937108397483826, "learning_rate": 7.051032806804375e-05, "loss": 1.7992, "step": 4864 }, { "epoch": 1.4767035969039308, "grad_norm": 0.4251902401447296, "learning_rate": 7.050425273390036e-05, "loss": 1.221, "step": 4865 }, { "epoch": 1.4770071331006223, "grad_norm": 0.5891363620758057, "learning_rate": 7.0498177399757e-05, "loss": 1.5272, "step": 4866 }, { "epoch": 1.4773106692973137, "grad_norm": 0.47068464756011963, "learning_rate": 7.049210206561361e-05, "loss": 1.2203, "step": 4867 }, { "epoch": 1.4776142054940051, "grad_norm": 0.5927923917770386, "learning_rate": 7.048602673147023e-05, "loss": 1.8061, "step": 4868 }, { "epoch": 1.4779177416906966, "grad_norm": 0.5405677556991577, "learning_rate": 7.047995139732686e-05, "loss": 1.0712, "step": 4869 }, { "epoch": 1.478221277887388, "grad_norm": 0.5127203464508057, "learning_rate": 7.047387606318348e-05, "loss": 1.9416, "step": 4870 }, { "epoch": 1.4785248140840794, "grad_norm": 0.7339115738868713, "learning_rate": 7.04678007290401e-05, "loss": 1.679, "step": 4871 }, { "epoch": 1.478828350280771, "grad_norm": 0.3953307271003723, "learning_rate": 7.046172539489673e-05, "loss": 1.4667, "step": 4872 }, { "epoch": 1.4791318864774623, "grad_norm": 0.5734744668006897, "learning_rate": 7.045565006075334e-05, "loss": 1.3896, "step": 4873 }, { "epoch": 1.479435422674154, "grad_norm": 0.47078248858451843, "learning_rate": 7.044957472660997e-05, "loss": 1.3958, "step": 4874 }, { "epoch": 1.4797389588708454, "grad_norm": 0.698595404624939, "learning_rate": 7.044349939246659e-05, "loss": 1.5145, "step": 4875 }, { "epoch": 1.4800424950675368, "grad_norm": 0.45018211007118225, "learning_rate": 7.043742405832321e-05, "loss": 1.8364, "step": 4876 }, { "epoch": 1.4803460312642283, "grad_norm": 0.5878861546516418, "learning_rate": 7.043134872417984e-05, "loss": 2.0177, "step": 4877 }, { "epoch": 1.4806495674609197, "grad_norm": 0.5218092799186707, "learning_rate": 7.042527339003644e-05, "loss": 1.6696, "step": 4878 }, { "epoch": 1.4809531036576111, "grad_norm": 0.5571547150611877, "learning_rate": 7.041919805589307e-05, "loss": 1.9252, "step": 4879 }, { "epoch": 1.4812566398543026, "grad_norm": 0.9365406036376953, "learning_rate": 7.04131227217497e-05, "loss": 1.6427, "step": 4880 }, { "epoch": 1.4815601760509942, "grad_norm": 0.43765008449554443, "learning_rate": 7.040704738760632e-05, "loss": 1.7834, "step": 4881 }, { "epoch": 1.4818637122476854, "grad_norm": 0.5410556197166443, "learning_rate": 7.040097205346294e-05, "loss": 1.4718, "step": 4882 }, { "epoch": 1.482167248444377, "grad_norm": 0.5025068521499634, "learning_rate": 7.039489671931957e-05, "loss": 2.0082, "step": 4883 }, { "epoch": 1.4824707846410685, "grad_norm": 0.46235191822052, "learning_rate": 7.038882138517619e-05, "loss": 1.4398, "step": 4884 }, { "epoch": 1.48277432083776, "grad_norm": 0.45842286944389343, "learning_rate": 7.03827460510328e-05, "loss": 1.4679, "step": 4885 }, { "epoch": 1.4830778570344514, "grad_norm": 0.5397442579269409, "learning_rate": 7.037667071688944e-05, "loss": 1.7636, "step": 4886 }, { "epoch": 1.4833813932311428, "grad_norm": 0.8042169213294983, "learning_rate": 7.037059538274605e-05, "loss": 1.5971, "step": 4887 }, { "epoch": 1.4836849294278343, "grad_norm": 0.4883686602115631, "learning_rate": 7.036452004860268e-05, "loss": 1.7809, "step": 4888 }, { "epoch": 1.4839884656245257, "grad_norm": 0.5571742653846741, "learning_rate": 7.03584447144593e-05, "loss": 1.652, "step": 4889 }, { "epoch": 1.4842920018212171, "grad_norm": 0.5409842729568481, "learning_rate": 7.035236938031592e-05, "loss": 1.7244, "step": 4890 }, { "epoch": 1.4845955380179086, "grad_norm": 0.6065815091133118, "learning_rate": 7.034629404617255e-05, "loss": 1.2745, "step": 4891 }, { "epoch": 1.4848990742146002, "grad_norm": 0.499057412147522, "learning_rate": 7.034021871202915e-05, "loss": 1.6754, "step": 4892 }, { "epoch": 1.4852026104112914, "grad_norm": 0.5934293866157532, "learning_rate": 7.033414337788578e-05, "loss": 1.6668, "step": 4893 }, { "epoch": 1.485506146607983, "grad_norm": 0.5210697650909424, "learning_rate": 7.032806804374242e-05, "loss": 1.768, "step": 4894 }, { "epoch": 1.4858096828046745, "grad_norm": 0.5933223962783813, "learning_rate": 7.032199270959903e-05, "loss": 0.8766, "step": 4895 }, { "epoch": 1.486113219001366, "grad_norm": 0.5411461591720581, "learning_rate": 7.031591737545565e-05, "loss": 1.7448, "step": 4896 }, { "epoch": 1.4864167551980574, "grad_norm": 0.6732122302055359, "learning_rate": 7.030984204131228e-05, "loss": 1.9058, "step": 4897 }, { "epoch": 1.4867202913947488, "grad_norm": 0.4998563528060913, "learning_rate": 7.03037667071689e-05, "loss": 1.6999, "step": 4898 }, { "epoch": 1.4870238275914403, "grad_norm": 0.9698503613471985, "learning_rate": 7.029769137302552e-05, "loss": 1.5221, "step": 4899 }, { "epoch": 1.4873273637881317, "grad_norm": 0.4289691746234894, "learning_rate": 7.029161603888215e-05, "loss": 1.7837, "step": 4900 }, { "epoch": 1.4876308999848231, "grad_norm": 0.5373130440711975, "learning_rate": 7.028554070473876e-05, "loss": 1.2654, "step": 4901 }, { "epoch": 1.4879344361815146, "grad_norm": 0.4715062975883484, "learning_rate": 7.027946537059538e-05, "loss": 1.7417, "step": 4902 }, { "epoch": 1.4882379723782062, "grad_norm": 0.494567334651947, "learning_rate": 7.027339003645201e-05, "loss": 1.6927, "step": 4903 }, { "epoch": 1.4885415085748974, "grad_norm": 0.5440135598182678, "learning_rate": 7.026731470230863e-05, "loss": 1.7141, "step": 4904 }, { "epoch": 1.488845044771589, "grad_norm": 0.5883985161781311, "learning_rate": 7.026123936816526e-05, "loss": 1.3748, "step": 4905 }, { "epoch": 1.4891485809682805, "grad_norm": 0.5393871068954468, "learning_rate": 7.025516403402186e-05, "loss": 1.6513, "step": 4906 }, { "epoch": 1.489452117164972, "grad_norm": 0.49532851576805115, "learning_rate": 7.02490886998785e-05, "loss": 1.6941, "step": 4907 }, { "epoch": 1.4897556533616634, "grad_norm": 0.5608948469161987, "learning_rate": 7.024301336573513e-05, "loss": 1.698, "step": 4908 }, { "epoch": 1.4900591895583548, "grad_norm": 0.45833712816238403, "learning_rate": 7.023693803159174e-05, "loss": 0.855, "step": 4909 }, { "epoch": 1.4903627257550462, "grad_norm": 0.7820297479629517, "learning_rate": 7.023086269744836e-05, "loss": 0.8055, "step": 4910 }, { "epoch": 1.4906662619517377, "grad_norm": 0.531315803527832, "learning_rate": 7.022478736330499e-05, "loss": 1.7807, "step": 4911 }, { "epoch": 1.4909697981484293, "grad_norm": 0.5644878149032593, "learning_rate": 7.021871202916161e-05, "loss": 1.6072, "step": 4912 }, { "epoch": 1.4912733343451205, "grad_norm": 0.5558152198791504, "learning_rate": 7.021263669501823e-05, "loss": 1.7253, "step": 4913 }, { "epoch": 1.4915768705418122, "grad_norm": 0.6361897587776184, "learning_rate": 7.020656136087486e-05, "loss": 2.008, "step": 4914 }, { "epoch": 1.4918804067385036, "grad_norm": 0.5509002208709717, "learning_rate": 7.020048602673147e-05, "loss": 1.5245, "step": 4915 }, { "epoch": 1.492183942935195, "grad_norm": 0.6185054183006287, "learning_rate": 7.019441069258809e-05, "loss": 1.5209, "step": 4916 }, { "epoch": 1.4924874791318865, "grad_norm": 0.6301572918891907, "learning_rate": 7.018833535844471e-05, "loss": 1.6199, "step": 4917 }, { "epoch": 1.492791015328578, "grad_norm": 0.6556615233421326, "learning_rate": 7.018226002430134e-05, "loss": 1.1805, "step": 4918 }, { "epoch": 1.4930945515252694, "grad_norm": 0.6239224672317505, "learning_rate": 7.017618469015797e-05, "loss": 1.7463, "step": 4919 }, { "epoch": 1.4933980877219608, "grad_norm": 0.5668457746505737, "learning_rate": 7.017010935601457e-05, "loss": 1.5714, "step": 4920 }, { "epoch": 1.4937016239186522, "grad_norm": 0.4906651973724365, "learning_rate": 7.01640340218712e-05, "loss": 1.7403, "step": 4921 }, { "epoch": 1.4940051601153437, "grad_norm": 0.35869914293289185, "learning_rate": 7.015795868772784e-05, "loss": 0.96, "step": 4922 }, { "epoch": 1.4943086963120353, "grad_norm": 0.5741409063339233, "learning_rate": 7.015188335358445e-05, "loss": 1.4338, "step": 4923 }, { "epoch": 1.4946122325087265, "grad_norm": 0.4738176763057709, "learning_rate": 7.014580801944107e-05, "loss": 1.511, "step": 4924 }, { "epoch": 1.4949157687054182, "grad_norm": 0.5779879093170166, "learning_rate": 7.01397326852977e-05, "loss": 1.8035, "step": 4925 }, { "epoch": 1.4952193049021096, "grad_norm": 11.4434232711792, "learning_rate": 7.013365735115432e-05, "loss": 1.404, "step": 4926 }, { "epoch": 1.495522841098801, "grad_norm": 0.5170081257820129, "learning_rate": 7.012758201701094e-05, "loss": 1.6944, "step": 4927 }, { "epoch": 1.4958263772954925, "grad_norm": 0.6129285097122192, "learning_rate": 7.012150668286757e-05, "loss": 1.411, "step": 4928 }, { "epoch": 1.496129913492184, "grad_norm": 0.4695473313331604, "learning_rate": 7.011543134872418e-05, "loss": 1.9634, "step": 4929 }, { "epoch": 1.4964334496888754, "grad_norm": 0.4601220190525055, "learning_rate": 7.01093560145808e-05, "loss": 1.9807, "step": 4930 }, { "epoch": 1.4967369858855668, "grad_norm": 0.6048194766044617, "learning_rate": 7.010328068043742e-05, "loss": 1.666, "step": 4931 }, { "epoch": 1.4970405220822582, "grad_norm": 0.5324653387069702, "learning_rate": 7.009720534629405e-05, "loss": 1.7026, "step": 4932 }, { "epoch": 1.4973440582789497, "grad_norm": 0.5313439965248108, "learning_rate": 7.009113001215068e-05, "loss": 1.7723, "step": 4933 }, { "epoch": 1.4976475944756413, "grad_norm": 0.5671830177307129, "learning_rate": 7.008505467800728e-05, "loss": 1.3399, "step": 4934 }, { "epoch": 1.4979511306723325, "grad_norm": 0.5517529249191284, "learning_rate": 7.007897934386391e-05, "loss": 1.3371, "step": 4935 }, { "epoch": 1.4982546668690242, "grad_norm": 0.47462624311447144, "learning_rate": 7.007290400972055e-05, "loss": 1.77, "step": 4936 }, { "epoch": 1.4985582030657156, "grad_norm": 0.5272954702377319, "learning_rate": 7.006682867557716e-05, "loss": 1.738, "step": 4937 }, { "epoch": 1.498861739262407, "grad_norm": 0.48636749386787415, "learning_rate": 7.006075334143378e-05, "loss": 1.807, "step": 4938 }, { "epoch": 1.4991652754590985, "grad_norm": 0.4686090350151062, "learning_rate": 7.005467800729041e-05, "loss": 1.9367, "step": 4939 }, { "epoch": 1.49946881165579, "grad_norm": 0.48979878425598145, "learning_rate": 7.004860267314703e-05, "loss": 1.3131, "step": 4940 }, { "epoch": 1.4997723478524814, "grad_norm": 0.571522057056427, "learning_rate": 7.004252733900365e-05, "loss": 1.7552, "step": 4941 }, { "epoch": 1.5000758840491728, "grad_norm": 0.4702144265174866, "learning_rate": 7.003645200486028e-05, "loss": 1.6307, "step": 4942 }, { "epoch": 1.5003794202458645, "grad_norm": 0.5681264400482178, "learning_rate": 7.00303766707169e-05, "loss": 1.8803, "step": 4943 }, { "epoch": 1.5006829564425557, "grad_norm": 0.5513084530830383, "learning_rate": 7.002430133657351e-05, "loss": 1.8452, "step": 4944 }, { "epoch": 1.5009864926392473, "grad_norm": 0.5764619708061218, "learning_rate": 7.001822600243013e-05, "loss": 1.0705, "step": 4945 }, { "epoch": 1.5012900288359385, "grad_norm": 0.5168572664260864, "learning_rate": 7.001215066828676e-05, "loss": 1.8198, "step": 4946 }, { "epoch": 1.5015935650326302, "grad_norm": 1.6397596597671509, "learning_rate": 7.000607533414339e-05, "loss": 1.6889, "step": 4947 }, { "epoch": 1.5018971012293216, "grad_norm": 1.1885254383087158, "learning_rate": 7e-05, "loss": 1.4736, "step": 4948 }, { "epoch": 1.502200637426013, "grad_norm": 0.6035342216491699, "learning_rate": 6.999392466585662e-05, "loss": 1.2236, "step": 4949 }, { "epoch": 1.5025041736227045, "grad_norm": 0.5453857183456421, "learning_rate": 6.998784933171326e-05, "loss": 1.4884, "step": 4950 }, { "epoch": 1.502807709819396, "grad_norm": 0.6001697778701782, "learning_rate": 6.998177399756986e-05, "loss": 1.6897, "step": 4951 }, { "epoch": 1.5031112460160874, "grad_norm": 0.5285768508911133, "learning_rate": 6.997569866342649e-05, "loss": 1.5918, "step": 4952 }, { "epoch": 1.5034147822127788, "grad_norm": 0.5310909748077393, "learning_rate": 6.996962332928312e-05, "loss": 0.9908, "step": 4953 }, { "epoch": 1.5037183184094705, "grad_norm": 0.5659394264221191, "learning_rate": 6.996354799513974e-05, "loss": 1.6639, "step": 4954 }, { "epoch": 1.5040218546061617, "grad_norm": 0.49565961956977844, "learning_rate": 6.995747266099636e-05, "loss": 1.8041, "step": 4955 }, { "epoch": 1.5043253908028533, "grad_norm": 0.6244103312492371, "learning_rate": 6.995139732685299e-05, "loss": 1.7278, "step": 4956 }, { "epoch": 1.5046289269995445, "grad_norm": 0.5681789517402649, "learning_rate": 6.99453219927096e-05, "loss": 1.7712, "step": 4957 }, { "epoch": 1.5049324631962362, "grad_norm": 0.6059290170669556, "learning_rate": 6.993924665856622e-05, "loss": 1.6451, "step": 4958 }, { "epoch": 1.5052359993929276, "grad_norm": 0.5958153605461121, "learning_rate": 6.993317132442284e-05, "loss": 1.5677, "step": 4959 }, { "epoch": 1.505539535589619, "grad_norm": 0.4673093855381012, "learning_rate": 6.992709599027947e-05, "loss": 0.9809, "step": 4960 }, { "epoch": 1.5058430717863105, "grad_norm": 0.5468495488166809, "learning_rate": 6.99210206561361e-05, "loss": 1.5763, "step": 4961 }, { "epoch": 1.506146607983002, "grad_norm": 0.48700201511383057, "learning_rate": 6.99149453219927e-05, "loss": 1.5329, "step": 4962 }, { "epoch": 1.5064501441796936, "grad_norm": 0.5750671625137329, "learning_rate": 6.990886998784933e-05, "loss": 1.904, "step": 4963 }, { "epoch": 1.5067536803763848, "grad_norm": 0.6705580353736877, "learning_rate": 6.990279465370597e-05, "loss": 1.6202, "step": 4964 }, { "epoch": 1.5070572165730765, "grad_norm": 0.7310038208961487, "learning_rate": 6.989671931956257e-05, "loss": 1.9462, "step": 4965 }, { "epoch": 1.5073607527697677, "grad_norm": 0.5241039991378784, "learning_rate": 6.98906439854192e-05, "loss": 0.926, "step": 4966 }, { "epoch": 1.5076642889664593, "grad_norm": 0.5601332187652588, "learning_rate": 6.988456865127583e-05, "loss": 1.6858, "step": 4967 }, { "epoch": 1.5079678251631508, "grad_norm": 0.5411149859428406, "learning_rate": 6.987849331713245e-05, "loss": 1.4975, "step": 4968 }, { "epoch": 1.5082713613598422, "grad_norm": 0.512823224067688, "learning_rate": 6.987241798298907e-05, "loss": 1.9705, "step": 4969 }, { "epoch": 1.5085748975565336, "grad_norm": 0.41236433386802673, "learning_rate": 6.98663426488457e-05, "loss": 1.7138, "step": 4970 }, { "epoch": 1.508878433753225, "grad_norm": 1.0118792057037354, "learning_rate": 6.986026731470231e-05, "loss": 1.6026, "step": 4971 }, { "epoch": 1.5091819699499165, "grad_norm": 0.4692041575908661, "learning_rate": 6.985419198055893e-05, "loss": 1.6974, "step": 4972 }, { "epoch": 1.509485506146608, "grad_norm": 0.5381711721420288, "learning_rate": 6.984811664641555e-05, "loss": 1.5082, "step": 4973 }, { "epoch": 1.5097890423432996, "grad_norm": 0.5390987992286682, "learning_rate": 6.984204131227218e-05, "loss": 1.852, "step": 4974 }, { "epoch": 1.5100925785399908, "grad_norm": 0.5014956593513489, "learning_rate": 6.98359659781288e-05, "loss": 1.7055, "step": 4975 }, { "epoch": 1.5103961147366824, "grad_norm": 0.6018857359886169, "learning_rate": 6.982989064398541e-05, "loss": 1.4449, "step": 4976 }, { "epoch": 1.5106996509333737, "grad_norm": 1.7235846519470215, "learning_rate": 6.982381530984204e-05, "loss": 1.6223, "step": 4977 }, { "epoch": 1.5110031871300653, "grad_norm": 0.4839053153991699, "learning_rate": 6.981773997569868e-05, "loss": 1.7088, "step": 4978 }, { "epoch": 1.5113067233267568, "grad_norm": 0.5103007555007935, "learning_rate": 6.981166464155528e-05, "loss": 1.743, "step": 4979 }, { "epoch": 1.5116102595234482, "grad_norm": 0.5718008279800415, "learning_rate": 6.980558930741191e-05, "loss": 1.5478, "step": 4980 }, { "epoch": 1.5119137957201396, "grad_norm": 0.48067647218704224, "learning_rate": 6.979951397326854e-05, "loss": 1.6347, "step": 4981 }, { "epoch": 1.512217331916831, "grad_norm": 0.5288820266723633, "learning_rate": 6.979343863912516e-05, "loss": 1.5188, "step": 4982 }, { "epoch": 1.5125208681135225, "grad_norm": 0.5474231839179993, "learning_rate": 6.978736330498178e-05, "loss": 1.8939, "step": 4983 }, { "epoch": 1.512824404310214, "grad_norm": 0.4533451795578003, "learning_rate": 6.97812879708384e-05, "loss": 1.8278, "step": 4984 }, { "epoch": 1.5131279405069056, "grad_norm": 0.5542921423912048, "learning_rate": 6.977521263669502e-05, "loss": 1.8469, "step": 4985 }, { "epoch": 1.5134314767035968, "grad_norm": 0.48920702934265137, "learning_rate": 6.976913730255164e-05, "loss": 1.747, "step": 4986 }, { "epoch": 1.5137350129002884, "grad_norm": 0.5489048361778259, "learning_rate": 6.976306196840826e-05, "loss": 1.882, "step": 4987 }, { "epoch": 1.5140385490969797, "grad_norm": 0.5060453414916992, "learning_rate": 6.975698663426489e-05, "loss": 1.9188, "step": 4988 }, { "epoch": 1.5143420852936713, "grad_norm": 0.48653027415275574, "learning_rate": 6.97509113001215e-05, "loss": 1.8193, "step": 4989 }, { "epoch": 1.5146456214903627, "grad_norm": 0.5047556757926941, "learning_rate": 6.974483596597812e-05, "loss": 1.7924, "step": 4990 }, { "epoch": 1.5149491576870542, "grad_norm": 0.5091820955276489, "learning_rate": 6.973876063183475e-05, "loss": 1.7446, "step": 4991 }, { "epoch": 1.5152526938837456, "grad_norm": 0.5132384300231934, "learning_rate": 6.973268529769139e-05, "loss": 1.6975, "step": 4992 }, { "epoch": 1.515556230080437, "grad_norm": 0.5605453848838806, "learning_rate": 6.972660996354799e-05, "loss": 1.747, "step": 4993 }, { "epoch": 1.5158597662771287, "grad_norm": 0.607867419719696, "learning_rate": 6.972053462940462e-05, "loss": 1.7074, "step": 4994 }, { "epoch": 1.51616330247382, "grad_norm": 0.8664141297340393, "learning_rate": 6.971445929526125e-05, "loss": 1.4887, "step": 4995 }, { "epoch": 1.5164668386705116, "grad_norm": 0.5505083799362183, "learning_rate": 6.970838396111787e-05, "loss": 2.0955, "step": 4996 }, { "epoch": 1.5167703748672028, "grad_norm": 0.49877801537513733, "learning_rate": 6.970230862697449e-05, "loss": 1.8985, "step": 4997 }, { "epoch": 1.5170739110638944, "grad_norm": 0.5322781801223755, "learning_rate": 6.969623329283112e-05, "loss": 1.7619, "step": 4998 }, { "epoch": 1.5173774472605859, "grad_norm": 0.5772939920425415, "learning_rate": 6.969015795868773e-05, "loss": 1.5791, "step": 4999 }, { "epoch": 1.5176809834572773, "grad_norm": 0.5946705341339111, "learning_rate": 6.968408262454435e-05, "loss": 1.4594, "step": 5000 }, { "epoch": 1.5179845196539687, "grad_norm": 0.4682813584804535, "learning_rate": 6.967800729040097e-05, "loss": 1.3301, "step": 5001 }, { "epoch": 1.5182880558506602, "grad_norm": 0.542500376701355, "learning_rate": 6.96719319562576e-05, "loss": 1.7164, "step": 5002 }, { "epoch": 1.5185915920473516, "grad_norm": 0.5071382522583008, "learning_rate": 6.966585662211422e-05, "loss": 1.3703, "step": 5003 }, { "epoch": 1.518895128244043, "grad_norm": 0.5770359039306641, "learning_rate": 6.965978128797083e-05, "loss": 1.4195, "step": 5004 }, { "epoch": 1.5191986644407347, "grad_norm": 0.5445078015327454, "learning_rate": 6.965370595382746e-05, "loss": 1.6649, "step": 5005 }, { "epoch": 1.519502200637426, "grad_norm": 0.5967651009559631, "learning_rate": 6.96476306196841e-05, "loss": 1.3306, "step": 5006 }, { "epoch": 1.5198057368341176, "grad_norm": 0.7904636859893799, "learning_rate": 6.96415552855407e-05, "loss": 1.6451, "step": 5007 }, { "epoch": 1.5201092730308088, "grad_norm": 0.5439865589141846, "learning_rate": 6.963547995139733e-05, "loss": 1.9616, "step": 5008 }, { "epoch": 1.5204128092275004, "grad_norm": 0.5701196789741516, "learning_rate": 6.962940461725396e-05, "loss": 1.2662, "step": 5009 }, { "epoch": 1.5207163454241919, "grad_norm": 0.5003536343574524, "learning_rate": 6.962332928311058e-05, "loss": 1.7916, "step": 5010 }, { "epoch": 1.5210198816208833, "grad_norm": 0.6876890659332275, "learning_rate": 6.96172539489672e-05, "loss": 1.8057, "step": 5011 }, { "epoch": 1.5213234178175747, "grad_norm": 0.48604485392570496, "learning_rate": 6.961117861482381e-05, "loss": 1.8073, "step": 5012 }, { "epoch": 1.5216269540142662, "grad_norm": 0.4319641888141632, "learning_rate": 6.960510328068044e-05, "loss": 1.7092, "step": 5013 }, { "epoch": 1.5219304902109576, "grad_norm": 0.49503380060195923, "learning_rate": 6.959902794653706e-05, "loss": 1.7313, "step": 5014 }, { "epoch": 1.522234026407649, "grad_norm": 0.46390098333358765, "learning_rate": 6.959295261239368e-05, "loss": 1.7932, "step": 5015 }, { "epoch": 1.5225375626043407, "grad_norm": 0.5236671566963196, "learning_rate": 6.958687727825031e-05, "loss": 1.527, "step": 5016 }, { "epoch": 1.522841098801032, "grad_norm": 0.5421310663223267, "learning_rate": 6.958080194410693e-05, "loss": 1.7505, "step": 5017 }, { "epoch": 1.5231446349977236, "grad_norm": 0.43941354751586914, "learning_rate": 6.957472660996354e-05, "loss": 1.1991, "step": 5018 }, { "epoch": 1.5234481711944148, "grad_norm": 0.8650984764099121, "learning_rate": 6.956865127582017e-05, "loss": 1.349, "step": 5019 }, { "epoch": 1.5237517073911064, "grad_norm": 0.6118148565292358, "learning_rate": 6.95625759416768e-05, "loss": 1.7518, "step": 5020 }, { "epoch": 1.5240552435877979, "grad_norm": 0.5155607461929321, "learning_rate": 6.955650060753341e-05, "loss": 1.743, "step": 5021 }, { "epoch": 1.5243587797844893, "grad_norm": 0.5593464374542236, "learning_rate": 6.955042527339004e-05, "loss": 2.0366, "step": 5022 }, { "epoch": 1.5246623159811807, "grad_norm": 0.45686206221580505, "learning_rate": 6.954434993924667e-05, "loss": 1.3583, "step": 5023 }, { "epoch": 1.5249658521778722, "grad_norm": 0.5127416253089905, "learning_rate": 6.953827460510329e-05, "loss": 1.8798, "step": 5024 }, { "epoch": 1.5252693883745638, "grad_norm": 0.49719300866127014, "learning_rate": 6.95321992709599e-05, "loss": 1.8293, "step": 5025 }, { "epoch": 1.525572924571255, "grad_norm": 0.5015152096748352, "learning_rate": 6.952612393681652e-05, "loss": 1.5741, "step": 5026 }, { "epoch": 1.5258764607679467, "grad_norm": 0.7065471410751343, "learning_rate": 6.952004860267315e-05, "loss": 1.8697, "step": 5027 }, { "epoch": 1.526179996964638, "grad_norm": 0.5936737656593323, "learning_rate": 6.951397326852977e-05, "loss": 1.6771, "step": 5028 }, { "epoch": 1.5264835331613296, "grad_norm": 0.40442827343940735, "learning_rate": 6.950789793438639e-05, "loss": 1.876, "step": 5029 }, { "epoch": 1.526787069358021, "grad_norm": 0.3835110366344452, "learning_rate": 6.950182260024302e-05, "loss": 1.5589, "step": 5030 }, { "epoch": 1.5270906055547124, "grad_norm": 0.49415749311447144, "learning_rate": 6.949574726609964e-05, "loss": 1.2888, "step": 5031 }, { "epoch": 1.5273941417514039, "grad_norm": 0.677581787109375, "learning_rate": 6.948967193195625e-05, "loss": 1.8734, "step": 5032 }, { "epoch": 1.5276976779480953, "grad_norm": 0.573188066482544, "learning_rate": 6.948359659781289e-05, "loss": 1.4829, "step": 5033 }, { "epoch": 1.5280012141447867, "grad_norm": 0.5042519569396973, "learning_rate": 6.947752126366952e-05, "loss": 1.7926, "step": 5034 }, { "epoch": 1.5283047503414782, "grad_norm": 0.4527590572834015, "learning_rate": 6.947144592952612e-05, "loss": 1.8739, "step": 5035 }, { "epoch": 1.5286082865381698, "grad_norm": 0.6127368211746216, "learning_rate": 6.946537059538275e-05, "loss": 1.6489, "step": 5036 }, { "epoch": 1.528911822734861, "grad_norm": 0.6344648599624634, "learning_rate": 6.945929526123938e-05, "loss": 1.303, "step": 5037 }, { "epoch": 1.5292153589315527, "grad_norm": 0.4736098349094391, "learning_rate": 6.945321992709599e-05, "loss": 1.8006, "step": 5038 }, { "epoch": 1.529518895128244, "grad_norm": 0.5820353031158447, "learning_rate": 6.944714459295262e-05, "loss": 1.1874, "step": 5039 }, { "epoch": 1.5298224313249356, "grad_norm": 0.7533172369003296, "learning_rate": 6.944106925880923e-05, "loss": 1.8722, "step": 5040 }, { "epoch": 1.530125967521627, "grad_norm": 0.5187054872512817, "learning_rate": 6.943499392466586e-05, "loss": 1.4976, "step": 5041 }, { "epoch": 1.5304295037183184, "grad_norm": 1.9647060632705688, "learning_rate": 6.942891859052248e-05, "loss": 1.6822, "step": 5042 }, { "epoch": 1.5307330399150099, "grad_norm": 0.49814414978027344, "learning_rate": 6.94228432563791e-05, "loss": 1.7004, "step": 5043 }, { "epoch": 1.5310365761117013, "grad_norm": 0.47123029828071594, "learning_rate": 6.941676792223573e-05, "loss": 1.4828, "step": 5044 }, { "epoch": 1.5313401123083927, "grad_norm": 0.5213639140129089, "learning_rate": 6.941069258809235e-05, "loss": 1.2126, "step": 5045 }, { "epoch": 1.5316436485050842, "grad_norm": 1.4494082927703857, "learning_rate": 6.940461725394896e-05, "loss": 1.6, "step": 5046 }, { "epoch": 1.5319471847017758, "grad_norm": 0.497048556804657, "learning_rate": 6.93985419198056e-05, "loss": 1.7893, "step": 5047 }, { "epoch": 1.532250720898467, "grad_norm": 0.6306423544883728, "learning_rate": 6.939246658566221e-05, "loss": 1.8145, "step": 5048 }, { "epoch": 1.5325542570951587, "grad_norm": 0.5947224497795105, "learning_rate": 6.938639125151883e-05, "loss": 2.114, "step": 5049 }, { "epoch": 1.53285779329185, "grad_norm": 0.5670956969261169, "learning_rate": 6.938031591737546e-05, "loss": 1.8438, "step": 5050 }, { "epoch": 1.5331613294885416, "grad_norm": 0.5403228402137756, "learning_rate": 6.937424058323209e-05, "loss": 1.4862, "step": 5051 }, { "epoch": 1.533464865685233, "grad_norm": 0.5355117321014404, "learning_rate": 6.93681652490887e-05, "loss": 1.6828, "step": 5052 }, { "epoch": 1.5337684018819244, "grad_norm": 0.5913071036338806, "learning_rate": 6.936208991494533e-05, "loss": 1.0808, "step": 5053 }, { "epoch": 1.5340719380786159, "grad_norm": 0.590332567691803, "learning_rate": 6.935601458080194e-05, "loss": 1.4234, "step": 5054 }, { "epoch": 1.5343754742753073, "grad_norm": 0.5567223429679871, "learning_rate": 6.934993924665857e-05, "loss": 1.8105, "step": 5055 }, { "epoch": 1.534679010471999, "grad_norm": 0.5712899565696716, "learning_rate": 6.934386391251519e-05, "loss": 1.8365, "step": 5056 }, { "epoch": 1.5349825466686902, "grad_norm": 0.46898317337036133, "learning_rate": 6.933778857837181e-05, "loss": 1.7624, "step": 5057 }, { "epoch": 1.5352860828653818, "grad_norm": 0.5434770584106445, "learning_rate": 6.933171324422844e-05, "loss": 1.8745, "step": 5058 }, { "epoch": 1.535589619062073, "grad_norm": 0.5103024840354919, "learning_rate": 6.932563791008506e-05, "loss": 1.314, "step": 5059 }, { "epoch": 1.5358931552587647, "grad_norm": 0.5385938882827759, "learning_rate": 6.931956257594167e-05, "loss": 1.4106, "step": 5060 }, { "epoch": 1.536196691455456, "grad_norm": 0.52946537733078, "learning_rate": 6.93134872417983e-05, "loss": 1.7167, "step": 5061 }, { "epoch": 1.5365002276521476, "grad_norm": 0.499805212020874, "learning_rate": 6.930741190765492e-05, "loss": 1.8297, "step": 5062 }, { "epoch": 1.536803763848839, "grad_norm": 0.48632124066352844, "learning_rate": 6.930133657351154e-05, "loss": 1.6382, "step": 5063 }, { "epoch": 1.5371073000455304, "grad_norm": 0.5471576452255249, "learning_rate": 6.929526123936817e-05, "loss": 1.9328, "step": 5064 }, { "epoch": 1.5374108362422219, "grad_norm": 0.5234617590904236, "learning_rate": 6.92891859052248e-05, "loss": 1.7133, "step": 5065 }, { "epoch": 1.5377143724389133, "grad_norm": 0.7335671186447144, "learning_rate": 6.92831105710814e-05, "loss": 1.5148, "step": 5066 }, { "epoch": 1.538017908635605, "grad_norm": 0.5422836542129517, "learning_rate": 6.927703523693804e-05, "loss": 1.5883, "step": 5067 }, { "epoch": 1.5383214448322962, "grad_norm": 0.5048597455024719, "learning_rate": 6.927095990279465e-05, "loss": 1.7012, "step": 5068 }, { "epoch": 1.5386249810289878, "grad_norm": 0.49904757738113403, "learning_rate": 6.926488456865128e-05, "loss": 1.8099, "step": 5069 }, { "epoch": 1.538928517225679, "grad_norm": 0.527485191822052, "learning_rate": 6.92588092345079e-05, "loss": 1.7986, "step": 5070 }, { "epoch": 1.5392320534223707, "grad_norm": 0.5265811681747437, "learning_rate": 6.925273390036452e-05, "loss": 1.4898, "step": 5071 }, { "epoch": 1.5395355896190621, "grad_norm": 0.5025148391723633, "learning_rate": 6.924665856622115e-05, "loss": 1.201, "step": 5072 }, { "epoch": 1.5398391258157536, "grad_norm": 0.561477780342102, "learning_rate": 6.924058323207777e-05, "loss": 1.7444, "step": 5073 }, { "epoch": 1.540142662012445, "grad_norm": 0.4805125892162323, "learning_rate": 6.923450789793438e-05, "loss": 1.749, "step": 5074 }, { "epoch": 1.5404461982091364, "grad_norm": 0.5457605123519897, "learning_rate": 6.922843256379102e-05, "loss": 1.5691, "step": 5075 }, { "epoch": 1.5407497344058279, "grad_norm": 0.4902309775352478, "learning_rate": 6.922235722964763e-05, "loss": 1.7727, "step": 5076 }, { "epoch": 1.5410532706025193, "grad_norm": 0.505024790763855, "learning_rate": 6.921628189550425e-05, "loss": 1.809, "step": 5077 }, { "epoch": 1.541356806799211, "grad_norm": 0.6486569046974182, "learning_rate": 6.921020656136088e-05, "loss": 1.544, "step": 5078 }, { "epoch": 1.5416603429959022, "grad_norm": 0.6943787336349487, "learning_rate": 6.920413122721751e-05, "loss": 1.5817, "step": 5079 }, { "epoch": 1.5419638791925938, "grad_norm": 0.49823305010795593, "learning_rate": 6.919805589307412e-05, "loss": 1.5363, "step": 5080 }, { "epoch": 1.542267415389285, "grad_norm": 0.49246180057525635, "learning_rate": 6.919198055893075e-05, "loss": 1.5483, "step": 5081 }, { "epoch": 1.5425709515859767, "grad_norm": 0.5904031991958618, "learning_rate": 6.918590522478736e-05, "loss": 1.3076, "step": 5082 }, { "epoch": 1.5428744877826681, "grad_norm": 0.5842667818069458, "learning_rate": 6.9179829890644e-05, "loss": 1.7947, "step": 5083 }, { "epoch": 1.5431780239793595, "grad_norm": 0.5764684677124023, "learning_rate": 6.917375455650061e-05, "loss": 1.6732, "step": 5084 }, { "epoch": 1.543481560176051, "grad_norm": 0.4869197607040405, "learning_rate": 6.916767922235723e-05, "loss": 1.2825, "step": 5085 }, { "epoch": 1.5437850963727424, "grad_norm": 0.4678569436073303, "learning_rate": 6.916160388821386e-05, "loss": 1.8523, "step": 5086 }, { "epoch": 1.5440886325694338, "grad_norm": 0.6023167967796326, "learning_rate": 6.915552855407048e-05, "loss": 1.1368, "step": 5087 }, { "epoch": 1.5443921687661253, "grad_norm": 0.6071855425834656, "learning_rate": 6.91494532199271e-05, "loss": 1.6182, "step": 5088 }, { "epoch": 1.544695704962817, "grad_norm": 0.8810903429985046, "learning_rate": 6.914337788578373e-05, "loss": 1.9165, "step": 5089 }, { "epoch": 1.5449992411595082, "grad_norm": 0.6049922108650208, "learning_rate": 6.913730255164034e-05, "loss": 1.7061, "step": 5090 }, { "epoch": 1.5453027773561998, "grad_norm": 0.5413761138916016, "learning_rate": 6.913122721749696e-05, "loss": 1.8952, "step": 5091 }, { "epoch": 1.545606313552891, "grad_norm": 0.519356906414032, "learning_rate": 6.912515188335359e-05, "loss": 1.7272, "step": 5092 }, { "epoch": 1.5459098497495827, "grad_norm": 0.4805813729763031, "learning_rate": 6.911907654921021e-05, "loss": 1.7761, "step": 5093 }, { "epoch": 1.546213385946274, "grad_norm": 0.5507004857063293, "learning_rate": 6.911300121506683e-05, "loss": 1.7849, "step": 5094 }, { "epoch": 1.5465169221429655, "grad_norm": 0.48082712292671204, "learning_rate": 6.910692588092346e-05, "loss": 1.7475, "step": 5095 }, { "epoch": 1.546820458339657, "grad_norm": 0.5184032320976257, "learning_rate": 6.910085054678007e-05, "loss": 1.4425, "step": 5096 }, { "epoch": 1.5471239945363484, "grad_norm": 0.6835402250289917, "learning_rate": 6.90947752126367e-05, "loss": 1.7552, "step": 5097 }, { "epoch": 1.54742753073304, "grad_norm": 0.5267919898033142, "learning_rate": 6.908869987849332e-05, "loss": 1.2211, "step": 5098 }, { "epoch": 1.5477310669297313, "grad_norm": 0.7247775197029114, "learning_rate": 6.908262454434994e-05, "loss": 1.4436, "step": 5099 }, { "epoch": 1.548034603126423, "grad_norm": 0.5839969515800476, "learning_rate": 6.907654921020657e-05, "loss": 1.7545, "step": 5100 }, { "epoch": 1.5483381393231141, "grad_norm": 0.5333119034767151, "learning_rate": 6.907047387606319e-05, "loss": 1.6516, "step": 5101 }, { "epoch": 1.5486416755198058, "grad_norm": 0.5837914943695068, "learning_rate": 6.90643985419198e-05, "loss": 1.7908, "step": 5102 }, { "epoch": 1.5489452117164972, "grad_norm": 0.5100420713424683, "learning_rate": 6.905832320777644e-05, "loss": 1.4083, "step": 5103 }, { "epoch": 1.5492487479131887, "grad_norm": 0.5830096006393433, "learning_rate": 6.905224787363305e-05, "loss": 1.7333, "step": 5104 }, { "epoch": 1.54955228410988, "grad_norm": 1.1144078969955444, "learning_rate": 6.904617253948967e-05, "loss": 1.5282, "step": 5105 }, { "epoch": 1.5498558203065715, "grad_norm": 0.41218632459640503, "learning_rate": 6.90400972053463e-05, "loss": 0.9582, "step": 5106 }, { "epoch": 1.550159356503263, "grad_norm": 0.5433921813964844, "learning_rate": 6.903402187120292e-05, "loss": 1.8383, "step": 5107 }, { "epoch": 1.5504628926999544, "grad_norm": 0.5715109705924988, "learning_rate": 6.902794653705954e-05, "loss": 1.7578, "step": 5108 }, { "epoch": 1.550766428896646, "grad_norm": 0.5353966355323792, "learning_rate": 6.902187120291617e-05, "loss": 1.4237, "step": 5109 }, { "epoch": 1.5510699650933373, "grad_norm": 0.6180744171142578, "learning_rate": 6.901579586877278e-05, "loss": 1.5549, "step": 5110 }, { "epoch": 1.551373501290029, "grad_norm": 0.48517996072769165, "learning_rate": 6.90097205346294e-05, "loss": 1.7625, "step": 5111 }, { "epoch": 1.5516770374867201, "grad_norm": 0.5804780721664429, "learning_rate": 6.900364520048603e-05, "loss": 1.7948, "step": 5112 }, { "epoch": 1.5519805736834118, "grad_norm": 0.47300025820732117, "learning_rate": 6.899756986634265e-05, "loss": 1.6219, "step": 5113 }, { "epoch": 1.5522841098801032, "grad_norm": 0.5687447190284729, "learning_rate": 6.899149453219928e-05, "loss": 1.7505, "step": 5114 }, { "epoch": 1.5525876460767947, "grad_norm": 0.5722203254699707, "learning_rate": 6.89854191980559e-05, "loss": 1.7819, "step": 5115 }, { "epoch": 1.552891182273486, "grad_norm": 0.5327666401863098, "learning_rate": 6.897934386391251e-05, "loss": 1.7752, "step": 5116 }, { "epoch": 1.5531947184701775, "grad_norm": 0.5604074001312256, "learning_rate": 6.897326852976915e-05, "loss": 1.5922, "step": 5117 }, { "epoch": 1.553498254666869, "grad_norm": 0.4844719469547272, "learning_rate": 6.896719319562576e-05, "loss": 1.3195, "step": 5118 }, { "epoch": 1.5538017908635604, "grad_norm": 0.6321808099746704, "learning_rate": 6.896111786148238e-05, "loss": 1.8087, "step": 5119 }, { "epoch": 1.554105327060252, "grad_norm": 0.49531543254852295, "learning_rate": 6.895504252733901e-05, "loss": 1.7925, "step": 5120 }, { "epoch": 1.5544088632569433, "grad_norm": 0.5053551197052002, "learning_rate": 6.894896719319563e-05, "loss": 1.6427, "step": 5121 }, { "epoch": 1.554712399453635, "grad_norm": 0.5420728325843811, "learning_rate": 6.894289185905225e-05, "loss": 1.8029, "step": 5122 }, { "epoch": 1.5550159356503261, "grad_norm": 0.6182068586349487, "learning_rate": 6.893681652490888e-05, "loss": 2.2098, "step": 5123 }, { "epoch": 1.5553194718470178, "grad_norm": 0.5669702887535095, "learning_rate": 6.89307411907655e-05, "loss": 1.5848, "step": 5124 }, { "epoch": 1.5556230080437092, "grad_norm": 0.5447514653205872, "learning_rate": 6.892466585662211e-05, "loss": 1.3853, "step": 5125 }, { "epoch": 1.5559265442404007, "grad_norm": 0.38322126865386963, "learning_rate": 6.891859052247874e-05, "loss": 1.2577, "step": 5126 }, { "epoch": 1.556230080437092, "grad_norm": 0.5988068580627441, "learning_rate": 6.891251518833536e-05, "loss": 1.4318, "step": 5127 }, { "epoch": 1.5565336166337835, "grad_norm": 0.43476438522338867, "learning_rate": 6.890643985419199e-05, "loss": 1.6554, "step": 5128 }, { "epoch": 1.5568371528304752, "grad_norm": 0.5268298983573914, "learning_rate": 6.890036452004861e-05, "loss": 1.562, "step": 5129 }, { "epoch": 1.5571406890271664, "grad_norm": 0.5895389914512634, "learning_rate": 6.889428918590522e-05, "loss": 1.5002, "step": 5130 }, { "epoch": 1.557444225223858, "grad_norm": 0.5553779006004333, "learning_rate": 6.888821385176186e-05, "loss": 1.7396, "step": 5131 }, { "epoch": 1.5577477614205493, "grad_norm": 0.5850470066070557, "learning_rate": 6.888213851761847e-05, "loss": 1.578, "step": 5132 }, { "epoch": 1.558051297617241, "grad_norm": 0.514907717704773, "learning_rate": 6.887606318347509e-05, "loss": 1.7704, "step": 5133 }, { "epoch": 1.5583548338139324, "grad_norm": 1.6753712892532349, "learning_rate": 6.886998784933172e-05, "loss": 1.4806, "step": 5134 }, { "epoch": 1.5586583700106238, "grad_norm": 0.5311963558197021, "learning_rate": 6.886391251518834e-05, "loss": 1.5369, "step": 5135 }, { "epoch": 1.5589619062073152, "grad_norm": 0.547295093536377, "learning_rate": 6.885783718104496e-05, "loss": 1.8403, "step": 5136 }, { "epoch": 1.5592654424040067, "grad_norm": 0.5358843207359314, "learning_rate": 6.885176184690159e-05, "loss": 2.058, "step": 5137 }, { "epoch": 1.559568978600698, "grad_norm": 0.5402520895004272, "learning_rate": 6.88456865127582e-05, "loss": 1.8126, "step": 5138 }, { "epoch": 1.5598725147973895, "grad_norm": 0.4622855484485626, "learning_rate": 6.883961117861482e-05, "loss": 1.2372, "step": 5139 }, { "epoch": 1.5601760509940812, "grad_norm": 0.5184701681137085, "learning_rate": 6.883353584447145e-05, "loss": 1.9226, "step": 5140 }, { "epoch": 1.5604795871907724, "grad_norm": 0.4912964701652527, "learning_rate": 6.882746051032807e-05, "loss": 1.7784, "step": 5141 }, { "epoch": 1.560783123387464, "grad_norm": 0.5692001581192017, "learning_rate": 6.88213851761847e-05, "loss": 1.7847, "step": 5142 }, { "epoch": 1.5610866595841553, "grad_norm": 1.0396461486816406, "learning_rate": 6.881530984204132e-05, "loss": 1.7171, "step": 5143 }, { "epoch": 1.561390195780847, "grad_norm": 0.5134807825088501, "learning_rate": 6.880923450789793e-05, "loss": 1.5056, "step": 5144 }, { "epoch": 1.5616937319775384, "grad_norm": 0.539484977722168, "learning_rate": 6.880315917375457e-05, "loss": 1.686, "step": 5145 }, { "epoch": 1.5619972681742298, "grad_norm": 0.5286623239517212, "learning_rate": 6.879708383961118e-05, "loss": 1.827, "step": 5146 }, { "epoch": 1.5623008043709212, "grad_norm": 0.5663209557533264, "learning_rate": 6.87910085054678e-05, "loss": 1.7342, "step": 5147 }, { "epoch": 1.5626043405676127, "grad_norm": 0.5498020052909851, "learning_rate": 6.878493317132443e-05, "loss": 1.8626, "step": 5148 }, { "epoch": 1.562907876764304, "grad_norm": 0.5602872371673584, "learning_rate": 6.877885783718105e-05, "loss": 1.7361, "step": 5149 }, { "epoch": 1.5632114129609955, "grad_norm": 0.4635816514492035, "learning_rate": 6.877278250303767e-05, "loss": 1.6465, "step": 5150 }, { "epoch": 1.5635149491576872, "grad_norm": 0.600685715675354, "learning_rate": 6.87667071688943e-05, "loss": 1.6324, "step": 5151 }, { "epoch": 1.5638184853543784, "grad_norm": 0.525507926940918, "learning_rate": 6.876063183475091e-05, "loss": 1.7158, "step": 5152 }, { "epoch": 1.56412202155107, "grad_norm": 1.154478669166565, "learning_rate": 6.875455650060753e-05, "loss": 2.0856, "step": 5153 }, { "epoch": 1.5644255577477613, "grad_norm": 0.43475109338760376, "learning_rate": 6.874848116646416e-05, "loss": 1.3634, "step": 5154 }, { "epoch": 1.564729093944453, "grad_norm": 1.1901203393936157, "learning_rate": 6.874240583232078e-05, "loss": 1.1507, "step": 5155 }, { "epoch": 1.5650326301411444, "grad_norm": 0.48481816053390503, "learning_rate": 6.873633049817741e-05, "loss": 1.6388, "step": 5156 }, { "epoch": 1.5653361663378358, "grad_norm": 0.5131592154502869, "learning_rate": 6.873025516403403e-05, "loss": 1.4084, "step": 5157 }, { "epoch": 1.5656397025345272, "grad_norm": 0.5295942425727844, "learning_rate": 6.872417982989064e-05, "loss": 1.2543, "step": 5158 }, { "epoch": 1.5659432387312187, "grad_norm": 0.6020081043243408, "learning_rate": 6.871810449574728e-05, "loss": 1.6094, "step": 5159 }, { "epoch": 1.5662467749279103, "grad_norm": 0.5166780352592468, "learning_rate": 6.871202916160389e-05, "loss": 1.7124, "step": 5160 }, { "epoch": 1.5665503111246015, "grad_norm": 0.5535523891448975, "learning_rate": 6.870595382746051e-05, "loss": 1.7073, "step": 5161 }, { "epoch": 1.5668538473212932, "grad_norm": 0.6225024461746216, "learning_rate": 6.869987849331714e-05, "loss": 1.2864, "step": 5162 }, { "epoch": 1.5671573835179844, "grad_norm": 1.0813506841659546, "learning_rate": 6.869380315917376e-05, "loss": 1.8424, "step": 5163 }, { "epoch": 1.567460919714676, "grad_norm": 0.5852144360542297, "learning_rate": 6.868772782503038e-05, "loss": 1.5746, "step": 5164 }, { "epoch": 1.5677644559113675, "grad_norm": 0.6405759453773499, "learning_rate": 6.8681652490887e-05, "loss": 1.8351, "step": 5165 }, { "epoch": 1.568067992108059, "grad_norm": 0.5141618847846985, "learning_rate": 6.867557715674362e-05, "loss": 1.7889, "step": 5166 }, { "epoch": 1.5683715283047504, "grad_norm": 1.1576392650604248, "learning_rate": 6.866950182260024e-05, "loss": 1.2938, "step": 5167 }, { "epoch": 1.5686750645014418, "grad_norm": 0.5479152202606201, "learning_rate": 6.866342648845687e-05, "loss": 1.808, "step": 5168 }, { "epoch": 1.5689786006981332, "grad_norm": 0.5595178604125977, "learning_rate": 6.865735115431349e-05, "loss": 1.6656, "step": 5169 }, { "epoch": 1.5692821368948247, "grad_norm": 0.4428215026855469, "learning_rate": 6.865127582017012e-05, "loss": 1.7519, "step": 5170 }, { "epoch": 1.5695856730915163, "grad_norm": 0.5423892736434937, "learning_rate": 6.864520048602674e-05, "loss": 1.703, "step": 5171 }, { "epoch": 1.5698892092882075, "grad_norm": 0.4570651650428772, "learning_rate": 6.863912515188335e-05, "loss": 1.7215, "step": 5172 }, { "epoch": 1.5701927454848992, "grad_norm": 0.5852006673812866, "learning_rate": 6.863304981773999e-05, "loss": 1.7473, "step": 5173 }, { "epoch": 1.5704962816815904, "grad_norm": 0.6939977407455444, "learning_rate": 6.86269744835966e-05, "loss": 1.4824, "step": 5174 }, { "epoch": 1.570799817878282, "grad_norm": 0.4711341857910156, "learning_rate": 6.862089914945322e-05, "loss": 1.706, "step": 5175 }, { "epoch": 1.5711033540749735, "grad_norm": 0.5366457104682922, "learning_rate": 6.861482381530985e-05, "loss": 1.9728, "step": 5176 }, { "epoch": 1.571406890271665, "grad_norm": 0.5411386489868164, "learning_rate": 6.860874848116647e-05, "loss": 1.7552, "step": 5177 }, { "epoch": 1.5717104264683563, "grad_norm": 0.5751674771308899, "learning_rate": 6.860267314702309e-05, "loss": 1.9909, "step": 5178 }, { "epoch": 1.5720139626650478, "grad_norm": 0.4560869038105011, "learning_rate": 6.859659781287972e-05, "loss": 1.4371, "step": 5179 }, { "epoch": 1.5723174988617392, "grad_norm": 0.53570955991745, "learning_rate": 6.859052247873633e-05, "loss": 1.6533, "step": 5180 }, { "epoch": 1.5726210350584306, "grad_norm": 0.4982752799987793, "learning_rate": 6.858444714459295e-05, "loss": 1.2318, "step": 5181 }, { "epoch": 1.5729245712551223, "grad_norm": 0.5627778768539429, "learning_rate": 6.857837181044958e-05, "loss": 1.7169, "step": 5182 }, { "epoch": 1.5732281074518135, "grad_norm": 0.4794982373714447, "learning_rate": 6.85722964763062e-05, "loss": 1.2168, "step": 5183 }, { "epoch": 1.5735316436485052, "grad_norm": 0.5762937664985657, "learning_rate": 6.856622114216282e-05, "loss": 1.4961, "step": 5184 }, { "epoch": 1.5738351798451964, "grad_norm": 0.5434368252754211, "learning_rate": 6.856014580801945e-05, "loss": 1.6214, "step": 5185 }, { "epoch": 1.574138716041888, "grad_norm": 0.7843817472457886, "learning_rate": 6.855407047387606e-05, "loss": 1.7494, "step": 5186 }, { "epoch": 1.5744422522385795, "grad_norm": 0.5866186618804932, "learning_rate": 6.85479951397327e-05, "loss": 1.575, "step": 5187 }, { "epoch": 1.574745788435271, "grad_norm": 0.5419967174530029, "learning_rate": 6.85419198055893e-05, "loss": 1.7683, "step": 5188 }, { "epoch": 1.5750493246319623, "grad_norm": 0.4712877571582794, "learning_rate": 6.853584447144593e-05, "loss": 1.6409, "step": 5189 }, { "epoch": 1.5753528608286538, "grad_norm": 0.5708344578742981, "learning_rate": 6.852976913730256e-05, "loss": 1.6154, "step": 5190 }, { "epoch": 1.5756563970253454, "grad_norm": 0.6477283835411072, "learning_rate": 6.852369380315918e-05, "loss": 1.3777, "step": 5191 }, { "epoch": 1.5759599332220366, "grad_norm": 0.5167595744132996, "learning_rate": 6.85176184690158e-05, "loss": 1.7067, "step": 5192 }, { "epoch": 1.5762634694187283, "grad_norm": 0.9224995374679565, "learning_rate": 6.851154313487243e-05, "loss": 1.8738, "step": 5193 }, { "epoch": 1.5765670056154195, "grad_norm": 0.528824508190155, "learning_rate": 6.850546780072904e-05, "loss": 1.703, "step": 5194 }, { "epoch": 1.5768705418121112, "grad_norm": 0.5642966032028198, "learning_rate": 6.849939246658566e-05, "loss": 1.7035, "step": 5195 }, { "epoch": 1.5771740780088026, "grad_norm": 0.5331950783729553, "learning_rate": 6.849331713244229e-05, "loss": 1.7863, "step": 5196 }, { "epoch": 1.577477614205494, "grad_norm": 0.6543232798576355, "learning_rate": 6.848724179829891e-05, "loss": 1.6948, "step": 5197 }, { "epoch": 1.5777811504021855, "grad_norm": 0.5717130899429321, "learning_rate": 6.848116646415553e-05, "loss": 1.79, "step": 5198 }, { "epoch": 1.578084686598877, "grad_norm": 0.6440120935440063, "learning_rate": 6.847509113001216e-05, "loss": 1.4557, "step": 5199 }, { "epoch": 1.5783882227955683, "grad_norm": 0.5330291390419006, "learning_rate": 6.846901579586877e-05, "loss": 1.6472, "step": 5200 }, { "epoch": 1.5786917589922598, "grad_norm": 0.5164377689361572, "learning_rate": 6.84629404617254e-05, "loss": 1.8938, "step": 5201 }, { "epoch": 1.5789952951889514, "grad_norm": 0.5387392044067383, "learning_rate": 6.845686512758201e-05, "loss": 1.8797, "step": 5202 }, { "epoch": 1.5792988313856426, "grad_norm": 0.6447203159332275, "learning_rate": 6.845078979343864e-05, "loss": 1.3384, "step": 5203 }, { "epoch": 1.5796023675823343, "grad_norm": 0.5613716244697571, "learning_rate": 6.844471445929527e-05, "loss": 1.2776, "step": 5204 }, { "epoch": 1.5799059037790255, "grad_norm": 0.711937665939331, "learning_rate": 6.843863912515189e-05, "loss": 1.4993, "step": 5205 }, { "epoch": 1.5802094399757172, "grad_norm": 0.5429368019104004, "learning_rate": 6.84325637910085e-05, "loss": 1.8176, "step": 5206 }, { "epoch": 1.5805129761724086, "grad_norm": 0.48608335852622986, "learning_rate": 6.842648845686514e-05, "loss": 1.1093, "step": 5207 }, { "epoch": 1.5808165123691, "grad_norm": 0.5331500172615051, "learning_rate": 6.842041312272175e-05, "loss": 1.5347, "step": 5208 }, { "epoch": 1.5811200485657915, "grad_norm": 0.4486270844936371, "learning_rate": 6.841433778857837e-05, "loss": 1.8155, "step": 5209 }, { "epoch": 1.581423584762483, "grad_norm": 0.5292797088623047, "learning_rate": 6.8408262454435e-05, "loss": 1.778, "step": 5210 }, { "epoch": 1.5817271209591743, "grad_norm": 0.5913912057876587, "learning_rate": 6.840218712029162e-05, "loss": 1.6109, "step": 5211 }, { "epoch": 1.5820306571558658, "grad_norm": 0.5955752730369568, "learning_rate": 6.839611178614824e-05, "loss": 1.3188, "step": 5212 }, { "epoch": 1.5823341933525574, "grad_norm": 0.9389486908912659, "learning_rate": 6.839003645200487e-05, "loss": 1.757, "step": 5213 }, { "epoch": 1.5826377295492486, "grad_norm": 0.5492468476295471, "learning_rate": 6.838396111786148e-05, "loss": 1.9314, "step": 5214 }, { "epoch": 1.5829412657459403, "grad_norm": 0.5454115271568298, "learning_rate": 6.837788578371812e-05, "loss": 1.7117, "step": 5215 }, { "epoch": 1.5832448019426315, "grad_norm": 0.5753993391990662, "learning_rate": 6.837181044957472e-05, "loss": 1.2838, "step": 5216 }, { "epoch": 1.5835483381393232, "grad_norm": 0.494477778673172, "learning_rate": 6.836573511543135e-05, "loss": 1.3306, "step": 5217 }, { "epoch": 1.5838518743360146, "grad_norm": 0.5156183242797852, "learning_rate": 6.835965978128798e-05, "loss": 1.2108, "step": 5218 }, { "epoch": 1.584155410532706, "grad_norm": 0.46987971663475037, "learning_rate": 6.83535844471446e-05, "loss": 1.8227, "step": 5219 }, { "epoch": 1.5844589467293975, "grad_norm": 0.720950186252594, "learning_rate": 6.834750911300122e-05, "loss": 1.7258, "step": 5220 }, { "epoch": 1.584762482926089, "grad_norm": 0.5426019430160522, "learning_rate": 6.834143377885785e-05, "loss": 1.6847, "step": 5221 }, { "epoch": 1.5850660191227806, "grad_norm": 0.5420331954956055, "learning_rate": 6.833535844471446e-05, "loss": 1.4088, "step": 5222 }, { "epoch": 1.5853695553194718, "grad_norm": 0.4799429476261139, "learning_rate": 6.832928311057108e-05, "loss": 1.3208, "step": 5223 }, { "epoch": 1.5856730915161634, "grad_norm": 0.3376842737197876, "learning_rate": 6.832320777642771e-05, "loss": 0.8923, "step": 5224 }, { "epoch": 1.5859766277128546, "grad_norm": 0.5960623621940613, "learning_rate": 6.831713244228433e-05, "loss": 1.213, "step": 5225 }, { "epoch": 1.5862801639095463, "grad_norm": 0.415962815284729, "learning_rate": 6.831105710814095e-05, "loss": 1.8835, "step": 5226 }, { "epoch": 1.5865837001062375, "grad_norm": 0.59657222032547, "learning_rate": 6.830498177399758e-05, "loss": 1.3524, "step": 5227 }, { "epoch": 1.5868872363029292, "grad_norm": 0.508263111114502, "learning_rate": 6.82989064398542e-05, "loss": 1.5945, "step": 5228 }, { "epoch": 1.5871907724996206, "grad_norm": 0.44551774859428406, "learning_rate": 6.829283110571083e-05, "loss": 1.1683, "step": 5229 }, { "epoch": 1.587494308696312, "grad_norm": 0.5371022820472717, "learning_rate": 6.828675577156743e-05, "loss": 1.6683, "step": 5230 }, { "epoch": 1.5877978448930035, "grad_norm": 0.5673996806144714, "learning_rate": 6.828068043742406e-05, "loss": 1.6156, "step": 5231 }, { "epoch": 1.588101381089695, "grad_norm": 0.5424557328224182, "learning_rate": 6.827460510328069e-05, "loss": 2.1004, "step": 5232 }, { "epoch": 1.5884049172863866, "grad_norm": 0.6087379455566406, "learning_rate": 6.82685297691373e-05, "loss": 1.685, "step": 5233 }, { "epoch": 1.5887084534830778, "grad_norm": 0.48891234397888184, "learning_rate": 6.826245443499393e-05, "loss": 1.8414, "step": 5234 }, { "epoch": 1.5890119896797694, "grad_norm": 0.5249536633491516, "learning_rate": 6.825637910085056e-05, "loss": 1.8926, "step": 5235 }, { "epoch": 1.5893155258764606, "grad_norm": 0.5451249480247498, "learning_rate": 6.825030376670717e-05, "loss": 1.3554, "step": 5236 }, { "epoch": 1.5896190620731523, "grad_norm": 0.5022341012954712, "learning_rate": 6.824422843256379e-05, "loss": 2.001, "step": 5237 }, { "epoch": 1.5899225982698437, "grad_norm": 0.5455743074417114, "learning_rate": 6.823815309842042e-05, "loss": 1.6482, "step": 5238 }, { "epoch": 1.5902261344665352, "grad_norm": 0.48343515396118164, "learning_rate": 6.823207776427704e-05, "loss": 1.7445, "step": 5239 }, { "epoch": 1.5905296706632266, "grad_norm": 0.5498737096786499, "learning_rate": 6.822600243013366e-05, "loss": 1.4105, "step": 5240 }, { "epoch": 1.590833206859918, "grad_norm": 0.6236885190010071, "learning_rate": 6.821992709599029e-05, "loss": 1.5102, "step": 5241 }, { "epoch": 1.5911367430566095, "grad_norm": 0.5544707179069519, "learning_rate": 6.82138517618469e-05, "loss": 1.5343, "step": 5242 }, { "epoch": 1.591440279253301, "grad_norm": 0.6272419095039368, "learning_rate": 6.820777642770354e-05, "loss": 1.9041, "step": 5243 }, { "epoch": 1.5917438154499925, "grad_norm": 0.5442690849304199, "learning_rate": 6.820170109356014e-05, "loss": 1.7401, "step": 5244 }, { "epoch": 1.5920473516466838, "grad_norm": 0.5792570114135742, "learning_rate": 6.819562575941677e-05, "loss": 1.3998, "step": 5245 }, { "epoch": 1.5923508878433754, "grad_norm": 0.5700094103813171, "learning_rate": 6.81895504252734e-05, "loss": 1.9555, "step": 5246 }, { "epoch": 1.5926544240400666, "grad_norm": 0.6132831573486328, "learning_rate": 6.818347509113e-05, "loss": 1.5922, "step": 5247 }, { "epoch": 1.5929579602367583, "grad_norm": 0.46173346042633057, "learning_rate": 6.817739975698664e-05, "loss": 1.3685, "step": 5248 }, { "epoch": 1.5932614964334497, "grad_norm": 0.4786145091056824, "learning_rate": 6.817132442284327e-05, "loss": 1.9428, "step": 5249 }, { "epoch": 1.5935650326301412, "grad_norm": 1.006852388381958, "learning_rate": 6.816524908869988e-05, "loss": 1.1743, "step": 5250 }, { "epoch": 1.5938685688268326, "grad_norm": 0.6216186881065369, "learning_rate": 6.81591737545565e-05, "loss": 1.7946, "step": 5251 }, { "epoch": 1.594172105023524, "grad_norm": 0.561336874961853, "learning_rate": 6.815309842041313e-05, "loss": 1.7092, "step": 5252 }, { "epoch": 1.5944756412202155, "grad_norm": 0.6000168919563293, "learning_rate": 6.814702308626975e-05, "loss": 1.5225, "step": 5253 }, { "epoch": 1.594779177416907, "grad_norm": 0.5488656163215637, "learning_rate": 6.814094775212637e-05, "loss": 1.7632, "step": 5254 }, { "epoch": 1.5950827136135985, "grad_norm": 0.52173912525177, "learning_rate": 6.8134872417983e-05, "loss": 1.7053, "step": 5255 }, { "epoch": 1.5953862498102898, "grad_norm": 0.927346408367157, "learning_rate": 6.812879708383961e-05, "loss": 1.5436, "step": 5256 }, { "epoch": 1.5956897860069814, "grad_norm": 0.42421141266822815, "learning_rate": 6.812272174969623e-05, "loss": 1.7197, "step": 5257 }, { "epoch": 1.5959933222036726, "grad_norm": 0.5720416903495789, "learning_rate": 6.811664641555285e-05, "loss": 1.8079, "step": 5258 }, { "epoch": 1.5962968584003643, "grad_norm": 0.6596317291259766, "learning_rate": 6.811057108140948e-05, "loss": 1.7341, "step": 5259 }, { "epoch": 1.5966003945970557, "grad_norm": 0.5089192390441895, "learning_rate": 6.810449574726611e-05, "loss": 1.5992, "step": 5260 }, { "epoch": 1.5969039307937472, "grad_norm": 0.5197166204452515, "learning_rate": 6.809842041312272e-05, "loss": 1.6093, "step": 5261 }, { "epoch": 1.5972074669904386, "grad_norm": 0.552408754825592, "learning_rate": 6.809234507897935e-05, "loss": 1.5114, "step": 5262 }, { "epoch": 1.59751100318713, "grad_norm": 0.5977384448051453, "learning_rate": 6.808626974483598e-05, "loss": 1.7183, "step": 5263 }, { "epoch": 1.5978145393838217, "grad_norm": 0.547103762626648, "learning_rate": 6.80801944106926e-05, "loss": 1.7443, "step": 5264 }, { "epoch": 1.5981180755805129, "grad_norm": 0.5751621723175049, "learning_rate": 6.807411907654921e-05, "loss": 1.7588, "step": 5265 }, { "epoch": 1.5984216117772045, "grad_norm": 0.47714513540267944, "learning_rate": 6.806804374240584e-05, "loss": 1.7564, "step": 5266 }, { "epoch": 1.5987251479738958, "grad_norm": 0.5768815279006958, "learning_rate": 6.806196840826246e-05, "loss": 1.7652, "step": 5267 }, { "epoch": 1.5990286841705874, "grad_norm": 0.5746716260910034, "learning_rate": 6.805589307411908e-05, "loss": 1.5079, "step": 5268 }, { "epoch": 1.5993322203672788, "grad_norm": 0.5528024435043335, "learning_rate": 6.80498177399757e-05, "loss": 1.5186, "step": 5269 }, { "epoch": 1.5996357565639703, "grad_norm": 0.5926470756530762, "learning_rate": 6.804374240583233e-05, "loss": 1.5568, "step": 5270 }, { "epoch": 1.5999392927606617, "grad_norm": 0.5206483006477356, "learning_rate": 6.803766707168894e-05, "loss": 1.0035, "step": 5271 }, { "epoch": 1.6002428289573531, "grad_norm": 0.4643096923828125, "learning_rate": 6.803159173754556e-05, "loss": 0.8317, "step": 5272 }, { "epoch": 1.6005463651540446, "grad_norm": 0.48165419697761536, "learning_rate": 6.802551640340219e-05, "loss": 1.8613, "step": 5273 }, { "epoch": 1.600849901350736, "grad_norm": 0.5018272995948792, "learning_rate": 6.801944106925882e-05, "loss": 1.7777, "step": 5274 }, { "epoch": 1.6011534375474277, "grad_norm": 0.7083479762077332, "learning_rate": 6.801336573511543e-05, "loss": 1.8053, "step": 5275 }, { "epoch": 1.6014569737441189, "grad_norm": 0.5450612902641296, "learning_rate": 6.800729040097206e-05, "loss": 1.5861, "step": 5276 }, { "epoch": 1.6017605099408105, "grad_norm": 0.599209189414978, "learning_rate": 6.800121506682869e-05, "loss": 1.2736, "step": 5277 }, { "epoch": 1.6020640461375018, "grad_norm": 0.5083154439926147, "learning_rate": 6.79951397326853e-05, "loss": 1.7954, "step": 5278 }, { "epoch": 1.6023675823341934, "grad_norm": 0.5951920747756958, "learning_rate": 6.798906439854192e-05, "loss": 1.322, "step": 5279 }, { "epoch": 1.6026711185308848, "grad_norm": 0.5329943299293518, "learning_rate": 6.798298906439855e-05, "loss": 1.8546, "step": 5280 }, { "epoch": 1.6029746547275763, "grad_norm": 0.45665401220321655, "learning_rate": 6.797691373025517e-05, "loss": 1.9274, "step": 5281 }, { "epoch": 1.6032781909242677, "grad_norm": 0.5830223560333252, "learning_rate": 6.797083839611179e-05, "loss": 1.7661, "step": 5282 }, { "epoch": 1.6035817271209591, "grad_norm": 0.5497011542320251, "learning_rate": 6.79647630619684e-05, "loss": 1.29, "step": 5283 }, { "epoch": 1.6038852633176506, "grad_norm": 0.47509273886680603, "learning_rate": 6.795868772782504e-05, "loss": 1.8748, "step": 5284 }, { "epoch": 1.604188799514342, "grad_norm": 0.5527020692825317, "learning_rate": 6.795261239368165e-05, "loss": 1.186, "step": 5285 }, { "epoch": 1.6044923357110337, "grad_norm": 0.5587241053581238, "learning_rate": 6.794653705953827e-05, "loss": 1.3485, "step": 5286 }, { "epoch": 1.6047958719077249, "grad_norm": 0.7357602119445801, "learning_rate": 6.79404617253949e-05, "loss": 1.3425, "step": 5287 }, { "epoch": 1.6050994081044165, "grad_norm": 0.600986659526825, "learning_rate": 6.793438639125153e-05, "loss": 1.3589, "step": 5288 }, { "epoch": 1.6054029443011077, "grad_norm": 0.8718162178993225, "learning_rate": 6.792831105710814e-05, "loss": 1.4548, "step": 5289 }, { "epoch": 1.6057064804977994, "grad_norm": 0.5700815320014954, "learning_rate": 6.792223572296477e-05, "loss": 1.5316, "step": 5290 }, { "epoch": 1.6060100166944908, "grad_norm": 0.4497663080692291, "learning_rate": 6.79161603888214e-05, "loss": 1.6142, "step": 5291 }, { "epoch": 1.6063135528911823, "grad_norm": 0.4942954182624817, "learning_rate": 6.791008505467801e-05, "loss": 2.073, "step": 5292 }, { "epoch": 1.6066170890878737, "grad_norm": 0.578714907169342, "learning_rate": 6.790400972053463e-05, "loss": 1.6847, "step": 5293 }, { "epoch": 1.6069206252845651, "grad_norm": 0.5462824106216431, "learning_rate": 6.789793438639126e-05, "loss": 1.8571, "step": 5294 }, { "epoch": 1.6072241614812568, "grad_norm": 0.47509217262268066, "learning_rate": 6.789185905224788e-05, "loss": 1.6175, "step": 5295 }, { "epoch": 1.607527697677948, "grad_norm": 0.5601421594619751, "learning_rate": 6.78857837181045e-05, "loss": 1.1761, "step": 5296 }, { "epoch": 1.6078312338746397, "grad_norm": 0.5942484140396118, "learning_rate": 6.787970838396111e-05, "loss": 1.2908, "step": 5297 }, { "epoch": 1.6081347700713309, "grad_norm": 0.5373112559318542, "learning_rate": 6.787363304981775e-05, "loss": 1.7585, "step": 5298 }, { "epoch": 1.6084383062680225, "grad_norm": 0.5723139643669128, "learning_rate": 6.786755771567436e-05, "loss": 1.7596, "step": 5299 }, { "epoch": 1.608741842464714, "grad_norm": 0.5496042370796204, "learning_rate": 6.786148238153098e-05, "loss": 1.7498, "step": 5300 }, { "epoch": 1.6090453786614054, "grad_norm": 0.5220587253570557, "learning_rate": 6.785540704738761e-05, "loss": 1.6976, "step": 5301 }, { "epoch": 1.6093489148580968, "grad_norm": 0.5628178715705872, "learning_rate": 6.784933171324424e-05, "loss": 1.6136, "step": 5302 }, { "epoch": 1.6096524510547883, "grad_norm": 0.48423969745635986, "learning_rate": 6.784325637910085e-05, "loss": 1.1443, "step": 5303 }, { "epoch": 1.6099559872514797, "grad_norm": 0.8351702094078064, "learning_rate": 6.783718104495748e-05, "loss": 1.7297, "step": 5304 }, { "epoch": 1.6102595234481711, "grad_norm": 0.5108897089958191, "learning_rate": 6.783110571081411e-05, "loss": 1.706, "step": 5305 }, { "epoch": 1.6105630596448628, "grad_norm": 0.5052831768989563, "learning_rate": 6.782503037667071e-05, "loss": 1.801, "step": 5306 }, { "epoch": 1.610866595841554, "grad_norm": 0.8400011658668518, "learning_rate": 6.781895504252734e-05, "loss": 1.423, "step": 5307 }, { "epoch": 1.6111701320382457, "grad_norm": 0.5768548250198364, "learning_rate": 6.781287970838397e-05, "loss": 1.8448, "step": 5308 }, { "epoch": 1.6114736682349369, "grad_norm": 0.4723953306674957, "learning_rate": 6.780680437424059e-05, "loss": 1.564, "step": 5309 }, { "epoch": 1.6117772044316285, "grad_norm": 0.5078704357147217, "learning_rate": 6.780072904009721e-05, "loss": 1.7979, "step": 5310 }, { "epoch": 1.61208074062832, "grad_norm": 0.5367867350578308, "learning_rate": 6.779465370595382e-05, "loss": 1.3874, "step": 5311 }, { "epoch": 1.6123842768250114, "grad_norm": 0.5529655814170837, "learning_rate": 6.778857837181046e-05, "loss": 1.8505, "step": 5312 }, { "epoch": 1.6126878130217028, "grad_norm": 0.5230738520622253, "learning_rate": 6.778250303766707e-05, "loss": 1.5063, "step": 5313 }, { "epoch": 1.6129913492183943, "grad_norm": 0.5180802345275879, "learning_rate": 6.777642770352369e-05, "loss": 1.9768, "step": 5314 }, { "epoch": 1.6132948854150857, "grad_norm": 0.47264987230300903, "learning_rate": 6.777035236938032e-05, "loss": 1.58, "step": 5315 }, { "epoch": 1.6135984216117771, "grad_norm": 0.5119348764419556, "learning_rate": 6.776427703523695e-05, "loss": 1.5589, "step": 5316 }, { "epoch": 1.6139019578084688, "grad_norm": 0.4975685477256775, "learning_rate": 6.775820170109356e-05, "loss": 1.6955, "step": 5317 }, { "epoch": 1.61420549400516, "grad_norm": 0.4829816520214081, "learning_rate": 6.775212636695019e-05, "loss": 1.7274, "step": 5318 }, { "epoch": 1.6145090302018517, "grad_norm": 0.5632861256599426, "learning_rate": 6.774605103280682e-05, "loss": 1.9359, "step": 5319 }, { "epoch": 1.6148125663985429, "grad_norm": 0.46568477153778076, "learning_rate": 6.773997569866342e-05, "loss": 1.9913, "step": 5320 }, { "epoch": 1.6151161025952345, "grad_norm": 0.581303060054779, "learning_rate": 6.773390036452005e-05, "loss": 1.7004, "step": 5321 }, { "epoch": 1.615419638791926, "grad_norm": 0.5229167342185974, "learning_rate": 6.772782503037668e-05, "loss": 1.7712, "step": 5322 }, { "epoch": 1.6157231749886174, "grad_norm": 0.5725805759429932, "learning_rate": 6.77217496962333e-05, "loss": 1.8174, "step": 5323 }, { "epoch": 1.6160267111853088, "grad_norm": 0.8762467503547668, "learning_rate": 6.771567436208992e-05, "loss": 1.6576, "step": 5324 }, { "epoch": 1.6163302473820003, "grad_norm": 0.598650336265564, "learning_rate": 6.770959902794653e-05, "loss": 1.7338, "step": 5325 }, { "epoch": 1.616633783578692, "grad_norm": 0.6000232696533203, "learning_rate": 6.770352369380317e-05, "loss": 1.1433, "step": 5326 }, { "epoch": 1.6169373197753831, "grad_norm": 0.5699084401130676, "learning_rate": 6.769744835965978e-05, "loss": 1.4685, "step": 5327 }, { "epoch": 1.6172408559720748, "grad_norm": 0.5472105741500854, "learning_rate": 6.76913730255164e-05, "loss": 1.6136, "step": 5328 }, { "epoch": 1.617544392168766, "grad_norm": 0.5260597467422485, "learning_rate": 6.768529769137303e-05, "loss": 1.8754, "step": 5329 }, { "epoch": 1.6178479283654577, "grad_norm": 0.5261357426643372, "learning_rate": 6.767922235722965e-05, "loss": 1.641, "step": 5330 }, { "epoch": 1.618151464562149, "grad_norm": 0.4434361755847931, "learning_rate": 6.767314702308627e-05, "loss": 1.3906, "step": 5331 }, { "epoch": 1.6184550007588405, "grad_norm": 0.4359007477760315, "learning_rate": 6.76670716889429e-05, "loss": 1.3848, "step": 5332 }, { "epoch": 1.618758536955532, "grad_norm": 0.4851773679256439, "learning_rate": 6.766099635479953e-05, "loss": 1.6975, "step": 5333 }, { "epoch": 1.6190620731522234, "grad_norm": 0.505078136920929, "learning_rate": 6.765492102065613e-05, "loss": 1.593, "step": 5334 }, { "epoch": 1.6193656093489148, "grad_norm": 0.470007985830307, "learning_rate": 6.764884568651276e-05, "loss": 1.7861, "step": 5335 }, { "epoch": 1.6196691455456063, "grad_norm": 0.480383962392807, "learning_rate": 6.764277035236939e-05, "loss": 1.6374, "step": 5336 }, { "epoch": 1.619972681742298, "grad_norm": 0.8806627988815308, "learning_rate": 6.763669501822601e-05, "loss": 1.5483, "step": 5337 }, { "epoch": 1.6202762179389891, "grad_norm": 0.4468989968299866, "learning_rate": 6.763061968408263e-05, "loss": 1.7802, "step": 5338 }, { "epoch": 1.6205797541356808, "grad_norm": 0.4716281294822693, "learning_rate": 6.762454434993924e-05, "loss": 2.0222, "step": 5339 }, { "epoch": 1.620883290332372, "grad_norm": 0.5289998054504395, "learning_rate": 6.761846901579588e-05, "loss": 1.7585, "step": 5340 }, { "epoch": 1.6211868265290637, "grad_norm": 0.5218448042869568, "learning_rate": 6.761239368165249e-05, "loss": 1.7652, "step": 5341 }, { "epoch": 1.621490362725755, "grad_norm": 0.5116521120071411, "learning_rate": 6.760631834750911e-05, "loss": 1.8309, "step": 5342 }, { "epoch": 1.6217938989224465, "grad_norm": 0.5118533372879028, "learning_rate": 6.760024301336574e-05, "loss": 1.1779, "step": 5343 }, { "epoch": 1.622097435119138, "grad_norm": 0.46334773302078247, "learning_rate": 6.759416767922236e-05, "loss": 1.3687, "step": 5344 }, { "epoch": 1.6224009713158294, "grad_norm": 0.39834198355674744, "learning_rate": 6.758809234507898e-05, "loss": 0.8092, "step": 5345 }, { "epoch": 1.6227045075125208, "grad_norm": 0.6149663925170898, "learning_rate": 6.75820170109356e-05, "loss": 1.6454, "step": 5346 }, { "epoch": 1.6230080437092123, "grad_norm": 0.47009339928627014, "learning_rate": 6.757594167679224e-05, "loss": 1.7656, "step": 5347 }, { "epoch": 1.623311579905904, "grad_norm": 0.5676798820495605, "learning_rate": 6.756986634264884e-05, "loss": 1.7765, "step": 5348 }, { "epoch": 1.6236151161025951, "grad_norm": 0.6189036965370178, "learning_rate": 6.756379100850547e-05, "loss": 1.5765, "step": 5349 }, { "epoch": 1.6239186522992868, "grad_norm": 0.5358760356903076, "learning_rate": 6.75577156743621e-05, "loss": 1.528, "step": 5350 }, { "epoch": 1.624222188495978, "grad_norm": 0.701900064945221, "learning_rate": 6.755164034021872e-05, "loss": 1.4507, "step": 5351 }, { "epoch": 1.6245257246926696, "grad_norm": 0.552905797958374, "learning_rate": 6.754556500607534e-05, "loss": 1.304, "step": 5352 }, { "epoch": 1.624829260889361, "grad_norm": 0.6372193098068237, "learning_rate": 6.753948967193195e-05, "loss": 1.9799, "step": 5353 }, { "epoch": 1.6251327970860525, "grad_norm": 0.5515216588973999, "learning_rate": 6.753341433778859e-05, "loss": 1.8413, "step": 5354 }, { "epoch": 1.625436333282744, "grad_norm": 0.42681434750556946, "learning_rate": 6.75273390036452e-05, "loss": 0.8476, "step": 5355 }, { "epoch": 1.6257398694794354, "grad_norm": 0.520078182220459, "learning_rate": 6.752126366950182e-05, "loss": 1.3612, "step": 5356 }, { "epoch": 1.626043405676127, "grad_norm": 0.5389737486839294, "learning_rate": 6.751518833535845e-05, "loss": 1.7407, "step": 5357 }, { "epoch": 1.6263469418728183, "grad_norm": 0.5108140110969543, "learning_rate": 6.750911300121507e-05, "loss": 1.6941, "step": 5358 }, { "epoch": 1.62665047806951, "grad_norm": 0.8992660641670227, "learning_rate": 6.750303766707169e-05, "loss": 1.7602, "step": 5359 }, { "epoch": 1.6269540142662011, "grad_norm": 0.5920006632804871, "learning_rate": 6.749696233292832e-05, "loss": 1.6317, "step": 5360 }, { "epoch": 1.6272575504628928, "grad_norm": 0.5391355752944946, "learning_rate": 6.749088699878495e-05, "loss": 0.8721, "step": 5361 }, { "epoch": 1.6275610866595842, "grad_norm": 0.5665925741195679, "learning_rate": 6.748481166464155e-05, "loss": 1.3489, "step": 5362 }, { "epoch": 1.6278646228562756, "grad_norm": 0.7543338537216187, "learning_rate": 6.747873633049818e-05, "loss": 1.6003, "step": 5363 }, { "epoch": 1.628168159052967, "grad_norm": 0.5854896903038025, "learning_rate": 6.74726609963548e-05, "loss": 1.7037, "step": 5364 }, { "epoch": 1.6284716952496585, "grad_norm": 0.541285514831543, "learning_rate": 6.746658566221143e-05, "loss": 1.5401, "step": 5365 }, { "epoch": 1.62877523144635, "grad_norm": 0.5873636603355408, "learning_rate": 6.746051032806805e-05, "loss": 1.6896, "step": 5366 }, { "epoch": 1.6290787676430414, "grad_norm": 0.6596795320510864, "learning_rate": 6.745443499392466e-05, "loss": 1.783, "step": 5367 }, { "epoch": 1.629382303839733, "grad_norm": 0.6165364384651184, "learning_rate": 6.74483596597813e-05, "loss": 1.8629, "step": 5368 }, { "epoch": 1.6296858400364242, "grad_norm": 0.5157227516174316, "learning_rate": 6.744228432563791e-05, "loss": 1.743, "step": 5369 }, { "epoch": 1.629989376233116, "grad_norm": 0.5414877533912659, "learning_rate": 6.743620899149453e-05, "loss": 1.1882, "step": 5370 }, { "epoch": 1.6302929124298071, "grad_norm": 0.5076656341552734, "learning_rate": 6.743013365735116e-05, "loss": 1.8108, "step": 5371 }, { "epoch": 1.6305964486264988, "grad_norm": 0.48527640104293823, "learning_rate": 6.742405832320778e-05, "loss": 1.8499, "step": 5372 }, { "epoch": 1.6308999848231902, "grad_norm": 0.5806058049201965, "learning_rate": 6.74179829890644e-05, "loss": 1.919, "step": 5373 }, { "epoch": 1.6312035210198816, "grad_norm": 0.5198022127151489, "learning_rate": 6.741190765492103e-05, "loss": 1.7945, "step": 5374 }, { "epoch": 1.631507057216573, "grad_norm": 0.5610677599906921, "learning_rate": 6.740583232077766e-05, "loss": 1.7286, "step": 5375 }, { "epoch": 1.6318105934132645, "grad_norm": 0.5591566562652588, "learning_rate": 6.739975698663426e-05, "loss": 1.8059, "step": 5376 }, { "epoch": 1.632114129609956, "grad_norm": 0.5589379072189331, "learning_rate": 6.739368165249089e-05, "loss": 1.5548, "step": 5377 }, { "epoch": 1.6324176658066474, "grad_norm": 0.5973590016365051, "learning_rate": 6.738760631834751e-05, "loss": 1.6746, "step": 5378 }, { "epoch": 1.632721202003339, "grad_norm": 0.5872961282730103, "learning_rate": 6.738153098420413e-05, "loss": 1.6873, "step": 5379 }, { "epoch": 1.6330247382000302, "grad_norm": 1.088761329650879, "learning_rate": 6.737545565006076e-05, "loss": 1.2821, "step": 5380 }, { "epoch": 1.633328274396722, "grad_norm": 0.497652143239975, "learning_rate": 6.736938031591737e-05, "loss": 1.7009, "step": 5381 }, { "epoch": 1.6336318105934131, "grad_norm": 0.4582177698612213, "learning_rate": 6.7363304981774e-05, "loss": 1.4134, "step": 5382 }, { "epoch": 1.6339353467901048, "grad_norm": 0.5756871104240417, "learning_rate": 6.735722964763062e-05, "loss": 1.5523, "step": 5383 }, { "epoch": 1.6342388829867962, "grad_norm": 0.4630436301231384, "learning_rate": 6.735115431348724e-05, "loss": 1.6713, "step": 5384 }, { "epoch": 1.6345424191834876, "grad_norm": 0.5446672439575195, "learning_rate": 6.734507897934387e-05, "loss": 1.7208, "step": 5385 }, { "epoch": 1.634845955380179, "grad_norm": 0.5166425108909607, "learning_rate": 6.733900364520049e-05, "loss": 1.6424, "step": 5386 }, { "epoch": 1.6351494915768705, "grad_norm": 0.6008763313293457, "learning_rate": 6.73329283110571e-05, "loss": 1.5234, "step": 5387 }, { "epoch": 1.6354530277735622, "grad_norm": 0.5188408493995667, "learning_rate": 6.732685297691374e-05, "loss": 1.868, "step": 5388 }, { "epoch": 1.6357565639702534, "grad_norm": 1.0882227420806885, "learning_rate": 6.732077764277037e-05, "loss": 1.6903, "step": 5389 }, { "epoch": 1.636060100166945, "grad_norm": 0.5503698587417603, "learning_rate": 6.731470230862697e-05, "loss": 1.8615, "step": 5390 }, { "epoch": 1.6363636363636362, "grad_norm": 0.5375723838806152, "learning_rate": 6.73086269744836e-05, "loss": 1.6473, "step": 5391 }, { "epoch": 1.636667172560328, "grad_norm": 0.5821309685707092, "learning_rate": 6.730255164034022e-05, "loss": 1.8633, "step": 5392 }, { "epoch": 1.636970708757019, "grad_norm": 0.6550545692443848, "learning_rate": 6.729647630619684e-05, "loss": 1.6438, "step": 5393 }, { "epoch": 1.6372742449537108, "grad_norm": 0.5280491709709167, "learning_rate": 6.729040097205347e-05, "loss": 1.4762, "step": 5394 }, { "epoch": 1.6375777811504022, "grad_norm": 0.5491107106208801, "learning_rate": 6.728432563791008e-05, "loss": 1.7162, "step": 5395 }, { "epoch": 1.6378813173470936, "grad_norm": 0.4663378596305847, "learning_rate": 6.727825030376672e-05, "loss": 1.7681, "step": 5396 }, { "epoch": 1.638184853543785, "grad_norm": 0.8106181025505066, "learning_rate": 6.727217496962333e-05, "loss": 1.2499, "step": 5397 }, { "epoch": 1.6384883897404765, "grad_norm": 0.52627032995224, "learning_rate": 6.726609963547995e-05, "loss": 2.023, "step": 5398 }, { "epoch": 1.6387919259371682, "grad_norm": 0.48420122265815735, "learning_rate": 6.726002430133658e-05, "loss": 1.7587, "step": 5399 }, { "epoch": 1.6390954621338594, "grad_norm": 0.513408362865448, "learning_rate": 6.72539489671932e-05, "loss": 1.4867, "step": 5400 }, { "epoch": 1.639398998330551, "grad_norm": 0.48651421070098877, "learning_rate": 6.724787363304982e-05, "loss": 1.217, "step": 5401 }, { "epoch": 1.6397025345272422, "grad_norm": 0.5510370135307312, "learning_rate": 6.724179829890645e-05, "loss": 1.6771, "step": 5402 }, { "epoch": 1.640006070723934, "grad_norm": 0.6256886124610901, "learning_rate": 6.723572296476306e-05, "loss": 1.7332, "step": 5403 }, { "epoch": 1.6403096069206253, "grad_norm": 0.5471596717834473, "learning_rate": 6.722964763061968e-05, "loss": 1.8147, "step": 5404 }, { "epoch": 1.6406131431173168, "grad_norm": 0.5959319472312927, "learning_rate": 6.722357229647631e-05, "loss": 1.6621, "step": 5405 }, { "epoch": 1.6409166793140082, "grad_norm": 0.5674776434898376, "learning_rate": 6.721749696233293e-05, "loss": 1.6725, "step": 5406 }, { "epoch": 1.6412202155106996, "grad_norm": 0.6195299625396729, "learning_rate": 6.721142162818955e-05, "loss": 1.75, "step": 5407 }, { "epoch": 1.641523751707391, "grad_norm": 0.5325772762298584, "learning_rate": 6.720534629404618e-05, "loss": 1.4533, "step": 5408 }, { "epoch": 1.6418272879040825, "grad_norm": 0.5618863105773926, "learning_rate": 6.71992709599028e-05, "loss": 1.8481, "step": 5409 }, { "epoch": 1.6421308241007742, "grad_norm": 0.5095944404602051, "learning_rate": 6.719319562575943e-05, "loss": 1.7641, "step": 5410 }, { "epoch": 1.6424343602974654, "grad_norm": 0.508712649345398, "learning_rate": 6.718712029161604e-05, "loss": 1.825, "step": 5411 }, { "epoch": 1.642737896494157, "grad_norm": 0.5344487428665161, "learning_rate": 6.718104495747266e-05, "loss": 1.7304, "step": 5412 }, { "epoch": 1.6430414326908482, "grad_norm": 0.7639570832252502, "learning_rate": 6.717496962332929e-05, "loss": 1.4032, "step": 5413 }, { "epoch": 1.64334496888754, "grad_norm": 0.5500749349594116, "learning_rate": 6.716889428918591e-05, "loss": 1.719, "step": 5414 }, { "epoch": 1.6436485050842313, "grad_norm": 0.5417535901069641, "learning_rate": 6.716281895504253e-05, "loss": 1.8949, "step": 5415 }, { "epoch": 1.6439520412809228, "grad_norm": 0.8672186136245728, "learning_rate": 6.715674362089916e-05, "loss": 1.5031, "step": 5416 }, { "epoch": 1.6442555774776142, "grad_norm": 0.6225829124450684, "learning_rate": 6.715066828675577e-05, "loss": 1.6316, "step": 5417 }, { "epoch": 1.6445591136743056, "grad_norm": 0.6430771350860596, "learning_rate": 6.714459295261239e-05, "loss": 1.9351, "step": 5418 }, { "epoch": 1.644862649870997, "grad_norm": 0.5645813941955566, "learning_rate": 6.713851761846902e-05, "loss": 1.6483, "step": 5419 }, { "epoch": 1.6451661860676885, "grad_norm": 0.5713335871696472, "learning_rate": 6.713244228432564e-05, "loss": 1.7766, "step": 5420 }, { "epoch": 1.6454697222643802, "grad_norm": 0.4520507752895355, "learning_rate": 6.712636695018226e-05, "loss": 1.2049, "step": 5421 }, { "epoch": 1.6457732584610714, "grad_norm": 0.5464449524879456, "learning_rate": 6.712029161603889e-05, "loss": 1.6615, "step": 5422 }, { "epoch": 1.646076794657763, "grad_norm": 0.5757522583007812, "learning_rate": 6.71142162818955e-05, "loss": 1.9467, "step": 5423 }, { "epoch": 1.6463803308544542, "grad_norm": 0.43012985587120056, "learning_rate": 6.710814094775214e-05, "loss": 1.713, "step": 5424 }, { "epoch": 1.6466838670511459, "grad_norm": 0.5255352854728699, "learning_rate": 6.710206561360875e-05, "loss": 1.3849, "step": 5425 }, { "epoch": 1.6469874032478373, "grad_norm": 0.5202666521072388, "learning_rate": 6.709599027946537e-05, "loss": 1.7943, "step": 5426 }, { "epoch": 1.6472909394445288, "grad_norm": 0.4663451313972473, "learning_rate": 6.7089914945322e-05, "loss": 1.6036, "step": 5427 }, { "epoch": 1.6475944756412202, "grad_norm": 0.4620378315448761, "learning_rate": 6.708383961117862e-05, "loss": 1.2386, "step": 5428 }, { "epoch": 1.6478980118379116, "grad_norm": 0.4991806447505951, "learning_rate": 6.707776427703524e-05, "loss": 1.5412, "step": 5429 }, { "epoch": 1.6482015480346033, "grad_norm": 0.5099241733551025, "learning_rate": 6.707168894289187e-05, "loss": 1.2018, "step": 5430 }, { "epoch": 1.6485050842312945, "grad_norm": 0.48727279901504517, "learning_rate": 6.706561360874848e-05, "loss": 1.7994, "step": 5431 }, { "epoch": 1.6488086204279861, "grad_norm": 0.5382767915725708, "learning_rate": 6.70595382746051e-05, "loss": 1.4265, "step": 5432 }, { "epoch": 1.6491121566246774, "grad_norm": 0.611785888671875, "learning_rate": 6.705346294046173e-05, "loss": 1.1372, "step": 5433 }, { "epoch": 1.649415692821369, "grad_norm": 0.47946327924728394, "learning_rate": 6.704738760631835e-05, "loss": 1.6548, "step": 5434 }, { "epoch": 1.6497192290180605, "grad_norm": 0.5408942699432373, "learning_rate": 6.704131227217497e-05, "loss": 1.7279, "step": 5435 }, { "epoch": 1.6500227652147519, "grad_norm": 0.5369447469711304, "learning_rate": 6.70352369380316e-05, "loss": 1.7837, "step": 5436 }, { "epoch": 1.6503263014114433, "grad_norm": 0.5782811641693115, "learning_rate": 6.702916160388821e-05, "loss": 1.4272, "step": 5437 }, { "epoch": 1.6506298376081348, "grad_norm": 0.6250472068786621, "learning_rate": 6.702308626974485e-05, "loss": 1.6532, "step": 5438 }, { "epoch": 1.6509333738048262, "grad_norm": 0.5835246443748474, "learning_rate": 6.701701093560146e-05, "loss": 1.6724, "step": 5439 }, { "epoch": 1.6512369100015176, "grad_norm": 0.7966321110725403, "learning_rate": 6.701093560145808e-05, "loss": 1.4718, "step": 5440 }, { "epoch": 1.6515404461982093, "grad_norm": 0.5973135232925415, "learning_rate": 6.700486026731471e-05, "loss": 1.7702, "step": 5441 }, { "epoch": 1.6518439823949005, "grad_norm": 0.5492159724235535, "learning_rate": 6.699878493317133e-05, "loss": 1.775, "step": 5442 }, { "epoch": 1.6521475185915921, "grad_norm": 0.5329028367996216, "learning_rate": 6.699270959902795e-05, "loss": 1.4301, "step": 5443 }, { "epoch": 1.6524510547882834, "grad_norm": 0.5012845993041992, "learning_rate": 6.698663426488458e-05, "loss": 1.9806, "step": 5444 }, { "epoch": 1.652754590984975, "grad_norm": 0.5947259068489075, "learning_rate": 6.69805589307412e-05, "loss": 1.844, "step": 5445 }, { "epoch": 1.6530581271816664, "grad_norm": 0.7111178040504456, "learning_rate": 6.697448359659781e-05, "loss": 1.3966, "step": 5446 }, { "epoch": 1.6533616633783579, "grad_norm": 0.5320685505867004, "learning_rate": 6.696840826245444e-05, "loss": 2.0219, "step": 5447 }, { "epoch": 1.6536651995750493, "grad_norm": 0.5629007816314697, "learning_rate": 6.696233292831106e-05, "loss": 1.4143, "step": 5448 }, { "epoch": 1.6539687357717408, "grad_norm": 0.616475522518158, "learning_rate": 6.695625759416768e-05, "loss": 1.4303, "step": 5449 }, { "epoch": 1.6542722719684322, "grad_norm": 0.61955326795578, "learning_rate": 6.695018226002431e-05, "loss": 1.4353, "step": 5450 }, { "epoch": 1.6545758081651236, "grad_norm": 0.5788480639457703, "learning_rate": 6.694410692588092e-05, "loss": 1.1104, "step": 5451 }, { "epoch": 1.6548793443618153, "grad_norm": 0.6140144467353821, "learning_rate": 6.693803159173754e-05, "loss": 1.245, "step": 5452 }, { "epoch": 1.6551828805585065, "grad_norm": 0.5252561569213867, "learning_rate": 6.693195625759417e-05, "loss": 1.6649, "step": 5453 }, { "epoch": 1.6554864167551981, "grad_norm": 0.6164594888687134, "learning_rate": 6.692588092345079e-05, "loss": 1.8658, "step": 5454 }, { "epoch": 1.6557899529518894, "grad_norm": 0.6214210987091064, "learning_rate": 6.691980558930742e-05, "loss": 1.7975, "step": 5455 }, { "epoch": 1.656093489148581, "grad_norm": 0.6027306914329529, "learning_rate": 6.691373025516404e-05, "loss": 1.733, "step": 5456 }, { "epoch": 1.6563970253452724, "grad_norm": 0.5661727786064148, "learning_rate": 6.690765492102066e-05, "loss": 1.1875, "step": 5457 }, { "epoch": 1.6567005615419639, "grad_norm": 0.506899356842041, "learning_rate": 6.690157958687729e-05, "loss": 1.8503, "step": 5458 }, { "epoch": 1.6570040977386553, "grad_norm": 0.3860659897327423, "learning_rate": 6.68955042527339e-05, "loss": 1.5618, "step": 5459 }, { "epoch": 1.6573076339353467, "grad_norm": 0.36981797218322754, "learning_rate": 6.688942891859052e-05, "loss": 1.8145, "step": 5460 }, { "epoch": 1.6576111701320384, "grad_norm": 0.5027870535850525, "learning_rate": 6.688335358444715e-05, "loss": 1.7278, "step": 5461 }, { "epoch": 1.6579147063287296, "grad_norm": 0.4753192663192749, "learning_rate": 6.687727825030377e-05, "loss": 1.9114, "step": 5462 }, { "epoch": 1.6582182425254213, "grad_norm": 0.481952041387558, "learning_rate": 6.687120291616039e-05, "loss": 1.8791, "step": 5463 }, { "epoch": 1.6585217787221125, "grad_norm": 0.549480140209198, "learning_rate": 6.686512758201702e-05, "loss": 1.2981, "step": 5464 }, { "epoch": 1.6588253149188041, "grad_norm": 0.49252748489379883, "learning_rate": 6.685905224787363e-05, "loss": 1.9547, "step": 5465 }, { "epoch": 1.6591288511154956, "grad_norm": 0.5749268531799316, "learning_rate": 6.685297691373025e-05, "loss": 2.0041, "step": 5466 }, { "epoch": 1.659432387312187, "grad_norm": 0.6280375123023987, "learning_rate": 6.684690157958688e-05, "loss": 1.4371, "step": 5467 }, { "epoch": 1.6597359235088784, "grad_norm": 0.6023044586181641, "learning_rate": 6.68408262454435e-05, "loss": 1.5359, "step": 5468 }, { "epoch": 1.6600394597055699, "grad_norm": 0.5130484700202942, "learning_rate": 6.683475091130013e-05, "loss": 1.7051, "step": 5469 }, { "epoch": 1.6603429959022613, "grad_norm": 0.9089249968528748, "learning_rate": 6.682867557715675e-05, "loss": 1.5448, "step": 5470 }, { "epoch": 1.6606465320989527, "grad_norm": 0.5765674114227295, "learning_rate": 6.682260024301337e-05, "loss": 1.5053, "step": 5471 }, { "epoch": 1.6609500682956444, "grad_norm": 0.9535808563232422, "learning_rate": 6.681652490887e-05, "loss": 1.4057, "step": 5472 }, { "epoch": 1.6612536044923356, "grad_norm": 0.6133729219436646, "learning_rate": 6.681044957472661e-05, "loss": 1.1964, "step": 5473 }, { "epoch": 1.6615571406890273, "grad_norm": 0.5631382465362549, "learning_rate": 6.680437424058323e-05, "loss": 1.6014, "step": 5474 }, { "epoch": 1.6618606768857185, "grad_norm": 0.5070339441299438, "learning_rate": 6.679829890643986e-05, "loss": 1.2667, "step": 5475 }, { "epoch": 1.6621642130824101, "grad_norm": 0.564780592918396, "learning_rate": 6.679222357229648e-05, "loss": 1.6802, "step": 5476 }, { "epoch": 1.6624677492791016, "grad_norm": 0.5945841073989868, "learning_rate": 6.67861482381531e-05, "loss": 1.5995, "step": 5477 }, { "epoch": 1.662771285475793, "grad_norm": 0.5960447788238525, "learning_rate": 6.678007290400973e-05, "loss": 1.4147, "step": 5478 }, { "epoch": 1.6630748216724844, "grad_norm": 0.9950317144393921, "learning_rate": 6.677399756986634e-05, "loss": 1.2725, "step": 5479 }, { "epoch": 1.6633783578691759, "grad_norm": 0.5889421105384827, "learning_rate": 6.676792223572296e-05, "loss": 1.5387, "step": 5480 }, { "epoch": 1.6636818940658673, "grad_norm": 0.5153008103370667, "learning_rate": 6.676184690157959e-05, "loss": 1.7412, "step": 5481 }, { "epoch": 1.6639854302625587, "grad_norm": 0.4582356810569763, "learning_rate": 6.675577156743621e-05, "loss": 1.8757, "step": 5482 }, { "epoch": 1.6642889664592504, "grad_norm": 0.4646925926208496, "learning_rate": 6.674969623329284e-05, "loss": 1.8008, "step": 5483 }, { "epoch": 1.6645925026559416, "grad_norm": 0.5988579988479614, "learning_rate": 6.674362089914946e-05, "loss": 1.2828, "step": 5484 }, { "epoch": 1.6648960388526333, "grad_norm": 0.49637970328330994, "learning_rate": 6.673754556500608e-05, "loss": 1.6402, "step": 5485 }, { "epoch": 1.6651995750493245, "grad_norm": 0.5080837607383728, "learning_rate": 6.67314702308627e-05, "loss": 1.7121, "step": 5486 }, { "epoch": 1.6655031112460161, "grad_norm": 0.40366098284721375, "learning_rate": 6.672539489671932e-05, "loss": 1.6499, "step": 5487 }, { "epoch": 1.6658066474427076, "grad_norm": 0.4660029411315918, "learning_rate": 6.671931956257594e-05, "loss": 1.7143, "step": 5488 }, { "epoch": 1.666110183639399, "grad_norm": 0.34281787276268005, "learning_rate": 6.671324422843257e-05, "loss": 1.2508, "step": 5489 }, { "epoch": 1.6664137198360904, "grad_norm": 0.4135943055152893, "learning_rate": 6.670716889428919e-05, "loss": 1.0782, "step": 5490 }, { "epoch": 1.6667172560327819, "grad_norm": 0.5196151733398438, "learning_rate": 6.670109356014581e-05, "loss": 1.3375, "step": 5491 }, { "epoch": 1.6670207922294735, "grad_norm": 0.4847051799297333, "learning_rate": 6.669501822600244e-05, "loss": 1.6875, "step": 5492 }, { "epoch": 1.6673243284261647, "grad_norm": 0.46132588386535645, "learning_rate": 6.668894289185906e-05, "loss": 1.6544, "step": 5493 }, { "epoch": 1.6676278646228564, "grad_norm": 0.5738229751586914, "learning_rate": 6.668286755771567e-05, "loss": 2.0053, "step": 5494 }, { "epoch": 1.6679314008195476, "grad_norm": 0.5141200423240662, "learning_rate": 6.66767922235723e-05, "loss": 1.4922, "step": 5495 }, { "epoch": 1.6682349370162393, "grad_norm": 0.48200786113739014, "learning_rate": 6.667071688942892e-05, "loss": 1.9125, "step": 5496 }, { "epoch": 1.6685384732129307, "grad_norm": 0.5733198523521423, "learning_rate": 6.666464155528555e-05, "loss": 2.0049, "step": 5497 }, { "epoch": 1.6688420094096221, "grad_norm": 0.512097954750061, "learning_rate": 6.665856622114217e-05, "loss": 1.7827, "step": 5498 }, { "epoch": 1.6691455456063136, "grad_norm": 0.5178970098495483, "learning_rate": 6.665249088699879e-05, "loss": 1.8091, "step": 5499 }, { "epoch": 1.669449081803005, "grad_norm": 2.328645944595337, "learning_rate": 6.664641555285542e-05, "loss": 1.8017, "step": 5500 }, { "epoch": 1.6697526179996964, "grad_norm": 0.5665689706802368, "learning_rate": 6.664034021871203e-05, "loss": 1.843, "step": 5501 }, { "epoch": 1.6700561541963879, "grad_norm": 0.5300931930541992, "learning_rate": 6.663426488456865e-05, "loss": 1.3377, "step": 5502 }, { "epoch": 1.6703596903930795, "grad_norm": 0.6273548603057861, "learning_rate": 6.662818955042528e-05, "loss": 1.9491, "step": 5503 }, { "epoch": 1.6706632265897707, "grad_norm": 0.5122102499008179, "learning_rate": 6.66221142162819e-05, "loss": 1.8161, "step": 5504 }, { "epoch": 1.6709667627864624, "grad_norm": 0.45519566535949707, "learning_rate": 6.661603888213852e-05, "loss": 1.7292, "step": 5505 }, { "epoch": 1.6712702989831536, "grad_norm": 0.6773088574409485, "learning_rate": 6.660996354799515e-05, "loss": 1.2953, "step": 5506 }, { "epoch": 1.6715738351798453, "grad_norm": 1.0610431432724, "learning_rate": 6.660388821385177e-05, "loss": 1.3204, "step": 5507 }, { "epoch": 1.6718773713765367, "grad_norm": 0.6220400929450989, "learning_rate": 6.659781287970838e-05, "loss": 1.7026, "step": 5508 }, { "epoch": 1.6721809075732281, "grad_norm": 0.5171409249305725, "learning_rate": 6.659173754556501e-05, "loss": 1.5276, "step": 5509 }, { "epoch": 1.6724844437699196, "grad_norm": 1.0584473609924316, "learning_rate": 6.658566221142163e-05, "loss": 1.2664, "step": 5510 }, { "epoch": 1.672787979966611, "grad_norm": 0.5404136776924133, "learning_rate": 6.657958687727826e-05, "loss": 1.3878, "step": 5511 }, { "epoch": 1.6730915161633024, "grad_norm": 0.5437831282615662, "learning_rate": 6.657351154313488e-05, "loss": 1.6651, "step": 5512 }, { "epoch": 1.6733950523599939, "grad_norm": 0.5370603799819946, "learning_rate": 6.65674362089915e-05, "loss": 1.7805, "step": 5513 }, { "epoch": 1.6736985885566855, "grad_norm": 0.5555904507637024, "learning_rate": 6.656136087484813e-05, "loss": 1.6038, "step": 5514 }, { "epoch": 1.6740021247533767, "grad_norm": 0.5612810850143433, "learning_rate": 6.655528554070473e-05, "loss": 1.3638, "step": 5515 }, { "epoch": 1.6743056609500684, "grad_norm": 0.4679469168186188, "learning_rate": 6.654921020656136e-05, "loss": 1.201, "step": 5516 }, { "epoch": 1.6746091971467596, "grad_norm": 0.5470007061958313, "learning_rate": 6.654313487241799e-05, "loss": 1.8458, "step": 5517 }, { "epoch": 1.6749127333434513, "grad_norm": 0.5731515288352966, "learning_rate": 6.653705953827461e-05, "loss": 1.5961, "step": 5518 }, { "epoch": 1.6752162695401427, "grad_norm": 0.44922706484794617, "learning_rate": 6.653098420413123e-05, "loss": 2.1234, "step": 5519 }, { "epoch": 1.6755198057368341, "grad_norm": 0.4658230245113373, "learning_rate": 6.652490886998786e-05, "loss": 1.5829, "step": 5520 }, { "epoch": 1.6758233419335256, "grad_norm": 0.4976028501987457, "learning_rate": 6.651883353584448e-05, "loss": 1.5513, "step": 5521 }, { "epoch": 1.676126878130217, "grad_norm": 0.5739427804946899, "learning_rate": 6.651275820170109e-05, "loss": 1.514, "step": 5522 }, { "epoch": 1.6764304143269086, "grad_norm": 0.5371562242507935, "learning_rate": 6.650668286755772e-05, "loss": 1.6313, "step": 5523 }, { "epoch": 1.6767339505235999, "grad_norm": 0.8080946207046509, "learning_rate": 6.650060753341434e-05, "loss": 1.4731, "step": 5524 }, { "epoch": 1.6770374867202915, "grad_norm": 0.5355061888694763, "learning_rate": 6.649453219927097e-05, "loss": 1.7795, "step": 5525 }, { "epoch": 1.6773410229169827, "grad_norm": 0.529988706111908, "learning_rate": 6.648845686512758e-05, "loss": 1.7496, "step": 5526 }, { "epoch": 1.6776445591136744, "grad_norm": 0.46063679456710815, "learning_rate": 6.64823815309842e-05, "loss": 1.9183, "step": 5527 }, { "epoch": 1.6779480953103656, "grad_norm": 0.4897077977657318, "learning_rate": 6.647630619684084e-05, "loss": 1.4162, "step": 5528 }, { "epoch": 1.6782516315070573, "grad_norm": 0.47567248344421387, "learning_rate": 6.647023086269744e-05, "loss": 1.2476, "step": 5529 }, { "epoch": 1.6785551677037487, "grad_norm": 0.5894106030464172, "learning_rate": 6.646415552855407e-05, "loss": 1.8962, "step": 5530 }, { "epoch": 1.6788587039004401, "grad_norm": 0.5598317980766296, "learning_rate": 6.64580801944107e-05, "loss": 1.9378, "step": 5531 }, { "epoch": 1.6791622400971316, "grad_norm": 0.46344462037086487, "learning_rate": 6.645200486026732e-05, "loss": 1.3396, "step": 5532 }, { "epoch": 1.679465776293823, "grad_norm": 0.5595990419387817, "learning_rate": 6.644592952612394e-05, "loss": 1.435, "step": 5533 }, { "epoch": 1.6797693124905146, "grad_norm": 0.570959746837616, "learning_rate": 6.643985419198057e-05, "loss": 1.853, "step": 5534 }, { "epoch": 1.6800728486872059, "grad_norm": 0.5187576413154602, "learning_rate": 6.643377885783719e-05, "loss": 1.7441, "step": 5535 }, { "epoch": 1.6803763848838975, "grad_norm": 0.5761917233467102, "learning_rate": 6.64277035236938e-05, "loss": 1.7951, "step": 5536 }, { "epoch": 1.6806799210805887, "grad_norm": 0.7706677913665771, "learning_rate": 6.642162818955043e-05, "loss": 1.5578, "step": 5537 }, { "epoch": 1.6809834572772804, "grad_norm": 0.49452951550483704, "learning_rate": 6.641555285540705e-05, "loss": 1.8111, "step": 5538 }, { "epoch": 1.6812869934739718, "grad_norm": 0.8533628582954407, "learning_rate": 6.640947752126367e-05, "loss": 1.9594, "step": 5539 }, { "epoch": 1.6815905296706632, "grad_norm": 0.5128864049911499, "learning_rate": 6.640340218712029e-05, "loss": 1.8131, "step": 5540 }, { "epoch": 1.6818940658673547, "grad_norm": 0.5614345669746399, "learning_rate": 6.639732685297692e-05, "loss": 1.6136, "step": 5541 }, { "epoch": 1.6821976020640461, "grad_norm": 0.5893770456314087, "learning_rate": 6.639125151883355e-05, "loss": 1.8654, "step": 5542 }, { "epoch": 1.6825011382607375, "grad_norm": 0.49633264541625977, "learning_rate": 6.638517618469015e-05, "loss": 1.6421, "step": 5543 }, { "epoch": 1.682804674457429, "grad_norm": 0.5814844965934753, "learning_rate": 6.637910085054678e-05, "loss": 1.5601, "step": 5544 }, { "epoch": 1.6831082106541206, "grad_norm": 0.544435441493988, "learning_rate": 6.637302551640341e-05, "loss": 1.7607, "step": 5545 }, { "epoch": 1.6834117468508119, "grad_norm": 0.5191029906272888, "learning_rate": 6.636695018226003e-05, "loss": 1.7743, "step": 5546 }, { "epoch": 1.6837152830475035, "grad_norm": 0.588700532913208, "learning_rate": 6.636087484811665e-05, "loss": 1.4228, "step": 5547 }, { "epoch": 1.6840188192441947, "grad_norm": 0.5407133102416992, "learning_rate": 6.635479951397328e-05, "loss": 1.6116, "step": 5548 }, { "epoch": 1.6843223554408864, "grad_norm": 0.5809290409088135, "learning_rate": 6.63487241798299e-05, "loss": 1.4475, "step": 5549 }, { "epoch": 1.6846258916375778, "grad_norm": 0.5454392433166504, "learning_rate": 6.634264884568651e-05, "loss": 1.6486, "step": 5550 }, { "epoch": 1.6849294278342692, "grad_norm": 0.5466883182525635, "learning_rate": 6.633657351154314e-05, "loss": 1.7363, "step": 5551 }, { "epoch": 1.6852329640309607, "grad_norm": 0.6080778241157532, "learning_rate": 6.633049817739976e-05, "loss": 1.9441, "step": 5552 }, { "epoch": 1.6855365002276521, "grad_norm": 0.5893242955207825, "learning_rate": 6.632442284325638e-05, "loss": 1.7365, "step": 5553 }, { "epoch": 1.6858400364243438, "grad_norm": 0.5920213460922241, "learning_rate": 6.6318347509113e-05, "loss": 1.0138, "step": 5554 }, { "epoch": 1.686143572621035, "grad_norm": 0.5826712846755981, "learning_rate": 6.631227217496963e-05, "loss": 1.2456, "step": 5555 }, { "epoch": 1.6864471088177266, "grad_norm": 0.6475664377212524, "learning_rate": 6.630619684082626e-05, "loss": 1.8795, "step": 5556 }, { "epoch": 1.6867506450144178, "grad_norm": 0.4567904770374298, "learning_rate": 6.630012150668286e-05, "loss": 1.8851, "step": 5557 }, { "epoch": 1.6870541812111095, "grad_norm": 0.5504480004310608, "learning_rate": 6.629404617253949e-05, "loss": 1.093, "step": 5558 }, { "epoch": 1.6873577174078007, "grad_norm": 0.44067057967185974, "learning_rate": 6.628797083839612e-05, "loss": 0.9901, "step": 5559 }, { "epoch": 1.6876612536044924, "grad_norm": 0.5783705711364746, "learning_rate": 6.628189550425274e-05, "loss": 1.417, "step": 5560 }, { "epoch": 1.6879647898011838, "grad_norm": 0.5694515109062195, "learning_rate": 6.627582017010936e-05, "loss": 1.7743, "step": 5561 }, { "epoch": 1.6882683259978752, "grad_norm": 0.7065865993499756, "learning_rate": 6.626974483596599e-05, "loss": 1.779, "step": 5562 }, { "epoch": 1.6885718621945667, "grad_norm": 1.2169636487960815, "learning_rate": 6.62636695018226e-05, "loss": 1.4069, "step": 5563 }, { "epoch": 1.688875398391258, "grad_norm": 0.565172553062439, "learning_rate": 6.625759416767922e-05, "loss": 2.1258, "step": 5564 }, { "epoch": 1.6891789345879498, "grad_norm": 0.479299396276474, "learning_rate": 6.625151883353585e-05, "loss": 1.4836, "step": 5565 }, { "epoch": 1.689482470784641, "grad_norm": 0.4096144735813141, "learning_rate": 6.624544349939247e-05, "loss": 1.3714, "step": 5566 }, { "epoch": 1.6897860069813326, "grad_norm": 0.4696303606033325, "learning_rate": 6.623936816524909e-05, "loss": 2.2721, "step": 5567 }, { "epoch": 1.6900895431780238, "grad_norm": 0.6039106249809265, "learning_rate": 6.62332928311057e-05, "loss": 1.7076, "step": 5568 }, { "epoch": 1.6903930793747155, "grad_norm": 0.5275202393531799, "learning_rate": 6.622721749696234e-05, "loss": 1.3444, "step": 5569 }, { "epoch": 1.690696615571407, "grad_norm": 0.49647027254104614, "learning_rate": 6.622114216281897e-05, "loss": 1.1222, "step": 5570 }, { "epoch": 1.6910001517680984, "grad_norm": 0.5454918742179871, "learning_rate": 6.621506682867557e-05, "loss": 1.3901, "step": 5571 }, { "epoch": 1.6913036879647898, "grad_norm": 0.5135229825973511, "learning_rate": 6.62089914945322e-05, "loss": 1.561, "step": 5572 }, { "epoch": 1.6916072241614812, "grad_norm": 0.4712585210800171, "learning_rate": 6.620291616038883e-05, "loss": 1.7875, "step": 5573 }, { "epoch": 1.6919107603581727, "grad_norm": 0.5299578905105591, "learning_rate": 6.619684082624545e-05, "loss": 1.6143, "step": 5574 }, { "epoch": 1.692214296554864, "grad_norm": 0.4971330463886261, "learning_rate": 6.619076549210207e-05, "loss": 1.8084, "step": 5575 }, { "epoch": 1.6925178327515558, "grad_norm": 0.5195701122283936, "learning_rate": 6.61846901579587e-05, "loss": 1.8311, "step": 5576 }, { "epoch": 1.692821368948247, "grad_norm": 0.434212327003479, "learning_rate": 6.617861482381532e-05, "loss": 1.0112, "step": 5577 }, { "epoch": 1.6931249051449386, "grad_norm": 0.5618522763252258, "learning_rate": 6.617253948967193e-05, "loss": 1.6371, "step": 5578 }, { "epoch": 1.6934284413416298, "grad_norm": 0.5451750159263611, "learning_rate": 6.616646415552856e-05, "loss": 1.8356, "step": 5579 }, { "epoch": 1.6937319775383215, "grad_norm": 0.5345383286476135, "learning_rate": 6.616038882138518e-05, "loss": 1.5302, "step": 5580 }, { "epoch": 1.694035513735013, "grad_norm": 0.5338907241821289, "learning_rate": 6.61543134872418e-05, "loss": 1.2822, "step": 5581 }, { "epoch": 1.6943390499317044, "grad_norm": 0.47024282813072205, "learning_rate": 6.614823815309842e-05, "loss": 1.6541, "step": 5582 }, { "epoch": 1.6946425861283958, "grad_norm": 0.5072962641716003, "learning_rate": 6.614216281895505e-05, "loss": 1.7995, "step": 5583 }, { "epoch": 1.6949461223250872, "grad_norm": 0.5675525665283203, "learning_rate": 6.613608748481168e-05, "loss": 1.6175, "step": 5584 }, { "epoch": 1.6952496585217787, "grad_norm": 0.5576388239860535, "learning_rate": 6.613001215066828e-05, "loss": 1.6772, "step": 5585 }, { "epoch": 1.69555319471847, "grad_norm": 0.5666404366493225, "learning_rate": 6.612393681652491e-05, "loss": 1.4908, "step": 5586 }, { "epoch": 1.6958567309151618, "grad_norm": 0.6059871315956116, "learning_rate": 6.611786148238154e-05, "loss": 1.5443, "step": 5587 }, { "epoch": 1.696160267111853, "grad_norm": 0.4992029368877411, "learning_rate": 6.611178614823815e-05, "loss": 1.7427, "step": 5588 }, { "epoch": 1.6964638033085446, "grad_norm": 0.4931040406227112, "learning_rate": 6.610571081409478e-05, "loss": 1.5386, "step": 5589 }, { "epoch": 1.6967673395052358, "grad_norm": 0.6507347226142883, "learning_rate": 6.609963547995141e-05, "loss": 1.3519, "step": 5590 }, { "epoch": 1.6970708757019275, "grad_norm": 0.5607397556304932, "learning_rate": 6.609356014580803e-05, "loss": 1.6292, "step": 5591 }, { "epoch": 1.697374411898619, "grad_norm": 0.5708796977996826, "learning_rate": 6.608748481166464e-05, "loss": 1.671, "step": 5592 }, { "epoch": 1.6976779480953104, "grad_norm": 0.4833160936832428, "learning_rate": 6.608140947752127e-05, "loss": 1.8766, "step": 5593 }, { "epoch": 1.6979814842920018, "grad_norm": 0.6059253811836243, "learning_rate": 6.607533414337789e-05, "loss": 1.4497, "step": 5594 }, { "epoch": 1.6982850204886932, "grad_norm": 0.5514014959335327, "learning_rate": 6.606925880923451e-05, "loss": 1.268, "step": 5595 }, { "epoch": 1.6985885566853849, "grad_norm": 0.7988373041152954, "learning_rate": 6.606318347509113e-05, "loss": 1.4431, "step": 5596 }, { "epoch": 1.698892092882076, "grad_norm": 0.5660038590431213, "learning_rate": 6.605710814094776e-05, "loss": 1.8063, "step": 5597 }, { "epoch": 1.6991956290787678, "grad_norm": 0.5148012042045593, "learning_rate": 6.605103280680439e-05, "loss": 1.969, "step": 5598 }, { "epoch": 1.699499165275459, "grad_norm": 0.5059932470321655, "learning_rate": 6.604495747266099e-05, "loss": 2.0285, "step": 5599 }, { "epoch": 1.6998027014721506, "grad_norm": 0.6167613863945007, "learning_rate": 6.603888213851762e-05, "loss": 1.5008, "step": 5600 }, { "epoch": 1.700106237668842, "grad_norm": 0.6057916283607483, "learning_rate": 6.603280680437425e-05, "loss": 1.8194, "step": 5601 }, { "epoch": 1.7004097738655335, "grad_norm": 0.590444803237915, "learning_rate": 6.602673147023086e-05, "loss": 1.5952, "step": 5602 }, { "epoch": 1.700713310062225, "grad_norm": 0.5104463696479797, "learning_rate": 6.602065613608749e-05, "loss": 1.9437, "step": 5603 }, { "epoch": 1.7010168462589164, "grad_norm": 0.6614755392074585, "learning_rate": 6.601458080194412e-05, "loss": 1.7985, "step": 5604 }, { "epoch": 1.7013203824556078, "grad_norm": 0.5348273515701294, "learning_rate": 6.600850546780074e-05, "loss": 1.81, "step": 5605 }, { "epoch": 1.7016239186522992, "grad_norm": 0.7011120915412903, "learning_rate": 6.600243013365735e-05, "loss": 1.3921, "step": 5606 }, { "epoch": 1.7019274548489909, "grad_norm": 0.5143483281135559, "learning_rate": 6.599635479951398e-05, "loss": 1.3965, "step": 5607 }, { "epoch": 1.702230991045682, "grad_norm": 0.6444684267044067, "learning_rate": 6.59902794653706e-05, "loss": 1.6044, "step": 5608 }, { "epoch": 1.7025345272423738, "grad_norm": 0.5477455854415894, "learning_rate": 6.598420413122722e-05, "loss": 1.4496, "step": 5609 }, { "epoch": 1.702838063439065, "grad_norm": 0.4528508484363556, "learning_rate": 6.597812879708384e-05, "loss": 1.5698, "step": 5610 }, { "epoch": 1.7031415996357566, "grad_norm": 0.8523666262626648, "learning_rate": 6.597205346294047e-05, "loss": 1.7668, "step": 5611 }, { "epoch": 1.703445135832448, "grad_norm": 0.45186835527420044, "learning_rate": 6.596597812879708e-05, "loss": 1.3897, "step": 5612 }, { "epoch": 1.7037486720291395, "grad_norm": 0.47669535875320435, "learning_rate": 6.59599027946537e-05, "loss": 1.8281, "step": 5613 }, { "epoch": 1.704052208225831, "grad_norm": 0.4351363778114319, "learning_rate": 6.595382746051033e-05, "loss": 1.3241, "step": 5614 }, { "epoch": 1.7043557444225224, "grad_norm": 0.508640468120575, "learning_rate": 6.594775212636696e-05, "loss": 2.0058, "step": 5615 }, { "epoch": 1.7046592806192138, "grad_norm": 0.615515947341919, "learning_rate": 6.594167679222357e-05, "loss": 1.352, "step": 5616 }, { "epoch": 1.7049628168159052, "grad_norm": 0.6008703708648682, "learning_rate": 6.59356014580802e-05, "loss": 1.5652, "step": 5617 }, { "epoch": 1.7052663530125969, "grad_norm": 0.5095522403717041, "learning_rate": 6.592952612393683e-05, "loss": 1.7274, "step": 5618 }, { "epoch": 1.705569889209288, "grad_norm": 0.5482503771781921, "learning_rate": 6.592345078979345e-05, "loss": 1.8572, "step": 5619 }, { "epoch": 1.7058734254059797, "grad_norm": 0.601202666759491, "learning_rate": 6.591737545565006e-05, "loss": 1.5425, "step": 5620 }, { "epoch": 1.706176961602671, "grad_norm": 0.5597031116485596, "learning_rate": 6.591130012150668e-05, "loss": 1.7216, "step": 5621 }, { "epoch": 1.7064804977993626, "grad_norm": 0.6926930546760559, "learning_rate": 6.590522478736331e-05, "loss": 1.7835, "step": 5622 }, { "epoch": 1.706784033996054, "grad_norm": 0.5762909054756165, "learning_rate": 6.589914945321993e-05, "loss": 1.7096, "step": 5623 }, { "epoch": 1.7070875701927455, "grad_norm": 0.43438559770584106, "learning_rate": 6.589307411907655e-05, "loss": 1.6576, "step": 5624 }, { "epoch": 1.707391106389437, "grad_norm": 0.577350378036499, "learning_rate": 6.588699878493318e-05, "loss": 1.3354, "step": 5625 }, { "epoch": 1.7076946425861284, "grad_norm": 0.7164928913116455, "learning_rate": 6.58809234507898e-05, "loss": 1.7567, "step": 5626 }, { "epoch": 1.70799817878282, "grad_norm": 0.9256330132484436, "learning_rate": 6.587484811664641e-05, "loss": 1.5759, "step": 5627 }, { "epoch": 1.7083017149795112, "grad_norm": 0.45018166303634644, "learning_rate": 6.586877278250304e-05, "loss": 1.7099, "step": 5628 }, { "epoch": 1.7086052511762029, "grad_norm": 0.5842235088348389, "learning_rate": 6.586269744835967e-05, "loss": 1.927, "step": 5629 }, { "epoch": 1.708908787372894, "grad_norm": 0.5370432138442993, "learning_rate": 6.585662211421628e-05, "loss": 1.4116, "step": 5630 }, { "epoch": 1.7092123235695857, "grad_norm": 0.5643729567527771, "learning_rate": 6.585054678007291e-05, "loss": 1.5234, "step": 5631 }, { "epoch": 1.7095158597662772, "grad_norm": 0.5104454755783081, "learning_rate": 6.584447144592954e-05, "loss": 1.6687, "step": 5632 }, { "epoch": 1.7098193959629686, "grad_norm": 0.5528333783149719, "learning_rate": 6.583839611178616e-05, "loss": 1.7628, "step": 5633 }, { "epoch": 1.71012293215966, "grad_norm": 0.4312419891357422, "learning_rate": 6.583232077764277e-05, "loss": 1.3351, "step": 5634 }, { "epoch": 1.7104264683563515, "grad_norm": 0.5308565497398376, "learning_rate": 6.582624544349939e-05, "loss": 1.3432, "step": 5635 }, { "epoch": 1.710730004553043, "grad_norm": 0.5155957937240601, "learning_rate": 6.582017010935602e-05, "loss": 1.7007, "step": 5636 }, { "epoch": 1.7110335407497343, "grad_norm": 0.48766398429870605, "learning_rate": 6.581409477521264e-05, "loss": 1.8807, "step": 5637 }, { "epoch": 1.711337076946426, "grad_norm": 0.6066737771034241, "learning_rate": 6.580801944106926e-05, "loss": 1.705, "step": 5638 }, { "epoch": 1.7116406131431172, "grad_norm": 0.38555800914764404, "learning_rate": 6.580194410692589e-05, "loss": 1.6603, "step": 5639 }, { "epoch": 1.7119441493398089, "grad_norm": 0.5054239630699158, "learning_rate": 6.57958687727825e-05, "loss": 1.8099, "step": 5640 }, { "epoch": 1.7122476855365, "grad_norm": 2.3474161624908447, "learning_rate": 6.578979343863912e-05, "loss": 1.8, "step": 5641 }, { "epoch": 1.7125512217331917, "grad_norm": 0.5971972942352295, "learning_rate": 6.578371810449575e-05, "loss": 1.7147, "step": 5642 }, { "epoch": 1.7128547579298832, "grad_norm": 0.5372626781463623, "learning_rate": 6.577764277035238e-05, "loss": 1.7786, "step": 5643 }, { "epoch": 1.7131582941265746, "grad_norm": 0.4543991684913635, "learning_rate": 6.577156743620899e-05, "loss": 1.4787, "step": 5644 }, { "epoch": 1.713461830323266, "grad_norm": 0.5099576115608215, "learning_rate": 6.576549210206562e-05, "loss": 1.7248, "step": 5645 }, { "epoch": 1.7137653665199575, "grad_norm": 0.5634260177612305, "learning_rate": 6.575941676792225e-05, "loss": 1.6313, "step": 5646 }, { "epoch": 1.714068902716649, "grad_norm": 0.6225250363349915, "learning_rate": 6.575334143377887e-05, "loss": 1.4485, "step": 5647 }, { "epoch": 1.7143724389133403, "grad_norm": 0.6028598546981812, "learning_rate": 6.574726609963548e-05, "loss": 1.4313, "step": 5648 }, { "epoch": 1.714675975110032, "grad_norm": 0.5556081533432007, "learning_rate": 6.57411907654921e-05, "loss": 1.7669, "step": 5649 }, { "epoch": 1.7149795113067232, "grad_norm": 0.4711391031742096, "learning_rate": 6.573511543134873e-05, "loss": 1.1927, "step": 5650 }, { "epoch": 1.7152830475034149, "grad_norm": 0.62098228931427, "learning_rate": 6.572904009720535e-05, "loss": 1.9925, "step": 5651 }, { "epoch": 1.715586583700106, "grad_norm": 0.402541846036911, "learning_rate": 6.572296476306197e-05, "loss": 0.7512, "step": 5652 }, { "epoch": 1.7158901198967977, "grad_norm": 0.45836901664733887, "learning_rate": 6.57168894289186e-05, "loss": 1.435, "step": 5653 }, { "epoch": 1.7161936560934892, "grad_norm": 0.48346778750419617, "learning_rate": 6.571081409477521e-05, "loss": 1.4181, "step": 5654 }, { "epoch": 1.7164971922901806, "grad_norm": 0.6208150386810303, "learning_rate": 6.570473876063183e-05, "loss": 1.6864, "step": 5655 }, { "epoch": 1.716800728486872, "grad_norm": 0.5359233617782593, "learning_rate": 6.569866342648846e-05, "loss": 1.5866, "step": 5656 }, { "epoch": 1.7171042646835635, "grad_norm": 0.4424448311328888, "learning_rate": 6.569258809234509e-05, "loss": 1.2408, "step": 5657 }, { "epoch": 1.7174078008802551, "grad_norm": 0.5332330465316772, "learning_rate": 6.56865127582017e-05, "loss": 1.4862, "step": 5658 }, { "epoch": 1.7177113370769463, "grad_norm": 0.4810725748538971, "learning_rate": 6.568043742405833e-05, "loss": 1.714, "step": 5659 }, { "epoch": 1.718014873273638, "grad_norm": 0.5168288946151733, "learning_rate": 6.567436208991496e-05, "loss": 1.5932, "step": 5660 }, { "epoch": 1.7183184094703292, "grad_norm": 0.49804630875587463, "learning_rate": 6.566828675577156e-05, "loss": 1.6901, "step": 5661 }, { "epoch": 1.7186219456670209, "grad_norm": 0.5672121047973633, "learning_rate": 6.566221142162819e-05, "loss": 1.993, "step": 5662 }, { "epoch": 1.7189254818637123, "grad_norm": 0.5449413061141968, "learning_rate": 6.565613608748481e-05, "loss": 1.7013, "step": 5663 }, { "epoch": 1.7192290180604037, "grad_norm": 0.5823309421539307, "learning_rate": 6.565006075334144e-05, "loss": 1.5827, "step": 5664 }, { "epoch": 1.7195325542570952, "grad_norm": 0.4752315580844879, "learning_rate": 6.564398541919806e-05, "loss": 1.3413, "step": 5665 }, { "epoch": 1.7198360904537866, "grad_norm": 0.6719446182250977, "learning_rate": 6.563791008505468e-05, "loss": 1.3361, "step": 5666 }, { "epoch": 1.720139626650478, "grad_norm": 0.650968074798584, "learning_rate": 6.56318347509113e-05, "loss": 1.3424, "step": 5667 }, { "epoch": 1.7204431628471695, "grad_norm": 0.668044924736023, "learning_rate": 6.562575941676792e-05, "loss": 1.1552, "step": 5668 }, { "epoch": 1.7207466990438611, "grad_norm": 0.7418035864830017, "learning_rate": 6.561968408262454e-05, "loss": 1.7164, "step": 5669 }, { "epoch": 1.7210502352405523, "grad_norm": 0.5805455446243286, "learning_rate": 6.561360874848117e-05, "loss": 1.8215, "step": 5670 }, { "epoch": 1.721353771437244, "grad_norm": 0.5062075853347778, "learning_rate": 6.56075334143378e-05, "loss": 1.818, "step": 5671 }, { "epoch": 1.7216573076339352, "grad_norm": 0.5593624114990234, "learning_rate": 6.56014580801944e-05, "loss": 1.492, "step": 5672 }, { "epoch": 1.7219608438306269, "grad_norm": 0.6025186777114868, "learning_rate": 6.559538274605104e-05, "loss": 1.6984, "step": 5673 }, { "epoch": 1.7222643800273183, "grad_norm": 0.5048421025276184, "learning_rate": 6.558930741190767e-05, "loss": 0.9581, "step": 5674 }, { "epoch": 1.7225679162240097, "grad_norm": 0.5279005765914917, "learning_rate": 6.558323207776427e-05, "loss": 1.7759, "step": 5675 }, { "epoch": 1.7228714524207012, "grad_norm": 0.39026376605033875, "learning_rate": 6.55771567436209e-05, "loss": 1.631, "step": 5676 }, { "epoch": 1.7231749886173926, "grad_norm": 0.5611656308174133, "learning_rate": 6.557108140947752e-05, "loss": 1.7361, "step": 5677 }, { "epoch": 1.723478524814084, "grad_norm": 0.5910829305648804, "learning_rate": 6.556500607533415e-05, "loss": 1.5679, "step": 5678 }, { "epoch": 1.7237820610107755, "grad_norm": 0.6744527220726013, "learning_rate": 6.555893074119077e-05, "loss": 1.6144, "step": 5679 }, { "epoch": 1.7240855972074671, "grad_norm": 0.5710015892982483, "learning_rate": 6.555285540704739e-05, "loss": 1.6817, "step": 5680 }, { "epoch": 1.7243891334041583, "grad_norm": 0.5634138584136963, "learning_rate": 6.554678007290402e-05, "loss": 1.5614, "step": 5681 }, { "epoch": 1.72469266960085, "grad_norm": 0.538596510887146, "learning_rate": 6.554070473876063e-05, "loss": 1.6871, "step": 5682 }, { "epoch": 1.7249962057975412, "grad_norm": 0.456910103559494, "learning_rate": 6.553462940461725e-05, "loss": 1.9021, "step": 5683 }, { "epoch": 1.7252997419942329, "grad_norm": 0.5184640288352966, "learning_rate": 6.552855407047388e-05, "loss": 1.4493, "step": 5684 }, { "epoch": 1.7256032781909243, "grad_norm": 0.4227922558784485, "learning_rate": 6.55224787363305e-05, "loss": 1.1346, "step": 5685 }, { "epoch": 1.7259068143876157, "grad_norm": 0.5384974479675293, "learning_rate": 6.551640340218712e-05, "loss": 1.4499, "step": 5686 }, { "epoch": 1.7262103505843072, "grad_norm": 0.5459827184677124, "learning_rate": 6.551032806804375e-05, "loss": 1.6721, "step": 5687 }, { "epoch": 1.7265138867809986, "grad_norm": 0.48895028233528137, "learning_rate": 6.550425273390038e-05, "loss": 1.6447, "step": 5688 }, { "epoch": 1.7268174229776903, "grad_norm": 0.4322478175163269, "learning_rate": 6.549817739975698e-05, "loss": 1.2844, "step": 5689 }, { "epoch": 1.7271209591743815, "grad_norm": 0.6133584380149841, "learning_rate": 6.549210206561361e-05, "loss": 1.0293, "step": 5690 }, { "epoch": 1.7274244953710731, "grad_norm": 0.4673958420753479, "learning_rate": 6.548602673147023e-05, "loss": 2.098, "step": 5691 }, { "epoch": 1.7277280315677643, "grad_norm": 0.8196238875389099, "learning_rate": 6.547995139732686e-05, "loss": 1.1086, "step": 5692 }, { "epoch": 1.728031567764456, "grad_norm": 0.6799973249435425, "learning_rate": 6.547387606318348e-05, "loss": 1.8953, "step": 5693 }, { "epoch": 1.7283351039611472, "grad_norm": 0.5416783690452576, "learning_rate": 6.54678007290401e-05, "loss": 1.633, "step": 5694 }, { "epoch": 1.7286386401578389, "grad_norm": 0.574783444404602, "learning_rate": 6.546172539489673e-05, "loss": 1.7365, "step": 5695 }, { "epoch": 1.7289421763545303, "grad_norm": 0.7082532644271851, "learning_rate": 6.545565006075334e-05, "loss": 1.5609, "step": 5696 }, { "epoch": 1.7292457125512217, "grad_norm": 0.4839562177658081, "learning_rate": 6.544957472660996e-05, "loss": 1.6787, "step": 5697 }, { "epoch": 1.7295492487479132, "grad_norm": 0.5359554290771484, "learning_rate": 6.544349939246659e-05, "loss": 1.3859, "step": 5698 }, { "epoch": 1.7298527849446046, "grad_norm": 0.4709901511669159, "learning_rate": 6.543742405832321e-05, "loss": 1.3647, "step": 5699 }, { "epoch": 1.7301563211412962, "grad_norm": 0.5647301077842712, "learning_rate": 6.543134872417983e-05, "loss": 1.7588, "step": 5700 }, { "epoch": 1.7304598573379875, "grad_norm": 0.5082557201385498, "learning_rate": 6.542527339003646e-05, "loss": 1.6857, "step": 5701 }, { "epoch": 1.7307633935346791, "grad_norm": 0.5972119569778442, "learning_rate": 6.541919805589307e-05, "loss": 1.5892, "step": 5702 }, { "epoch": 1.7310669297313703, "grad_norm": 0.5038626790046692, "learning_rate": 6.541312272174969e-05, "loss": 1.7169, "step": 5703 }, { "epoch": 1.731370465928062, "grad_norm": 0.5989472270011902, "learning_rate": 6.540704738760632e-05, "loss": 1.8422, "step": 5704 }, { "epoch": 1.7316740021247534, "grad_norm": 0.6729888916015625, "learning_rate": 6.540097205346294e-05, "loss": 1.3468, "step": 5705 }, { "epoch": 1.7319775383214449, "grad_norm": 0.4645465910434723, "learning_rate": 6.539489671931957e-05, "loss": 2.0354, "step": 5706 }, { "epoch": 1.7322810745181363, "grad_norm": 0.49751991033554077, "learning_rate": 6.538882138517619e-05, "loss": 1.4206, "step": 5707 }, { "epoch": 1.7325846107148277, "grad_norm": 0.49642395973205566, "learning_rate": 6.53827460510328e-05, "loss": 1.5807, "step": 5708 }, { "epoch": 1.7328881469115192, "grad_norm": 0.6195915937423706, "learning_rate": 6.537667071688944e-05, "loss": 1.0846, "step": 5709 }, { "epoch": 1.7331916831082106, "grad_norm": 0.5105364322662354, "learning_rate": 6.537059538274605e-05, "loss": 1.3858, "step": 5710 }, { "epoch": 1.7334952193049022, "grad_norm": 0.5473313331604004, "learning_rate": 6.536452004860267e-05, "loss": 1.6553, "step": 5711 }, { "epoch": 1.7337987555015935, "grad_norm": 0.5856526494026184, "learning_rate": 6.53584447144593e-05, "loss": 1.3455, "step": 5712 }, { "epoch": 1.7341022916982851, "grad_norm": 0.5909231305122375, "learning_rate": 6.535236938031592e-05, "loss": 1.9817, "step": 5713 }, { "epoch": 1.7344058278949763, "grad_norm": 0.5128015279769897, "learning_rate": 6.534629404617254e-05, "loss": 1.7289, "step": 5714 }, { "epoch": 1.734709364091668, "grad_norm": 0.55885249376297, "learning_rate": 6.534021871202917e-05, "loss": 1.7433, "step": 5715 }, { "epoch": 1.7350129002883594, "grad_norm": 1.039367437362671, "learning_rate": 6.533414337788578e-05, "loss": 1.3287, "step": 5716 }, { "epoch": 1.7353164364850509, "grad_norm": 0.6337462067604065, "learning_rate": 6.53280680437424e-05, "loss": 1.5387, "step": 5717 }, { "epoch": 1.7356199726817423, "grad_norm": 0.5924265384674072, "learning_rate": 6.532199270959903e-05, "loss": 1.5792, "step": 5718 }, { "epoch": 1.7359235088784337, "grad_norm": 0.48296046257019043, "learning_rate": 6.531591737545565e-05, "loss": 1.328, "step": 5719 }, { "epoch": 1.7362270450751254, "grad_norm": 0.5941514372825623, "learning_rate": 6.530984204131228e-05, "loss": 1.6456, "step": 5720 }, { "epoch": 1.7365305812718166, "grad_norm": 0.5522921085357666, "learning_rate": 6.53037667071689e-05, "loss": 1.4514, "step": 5721 }, { "epoch": 1.7368341174685082, "grad_norm": 0.5471270680427551, "learning_rate": 6.529769137302552e-05, "loss": 1.5356, "step": 5722 }, { "epoch": 1.7371376536651995, "grad_norm": 1.03130304813385, "learning_rate": 6.529161603888215e-05, "loss": 1.6965, "step": 5723 }, { "epoch": 1.737441189861891, "grad_norm": 0.6405633687973022, "learning_rate": 6.528554070473876e-05, "loss": 1.2192, "step": 5724 }, { "epoch": 1.7377447260585823, "grad_norm": 0.5106350779533386, "learning_rate": 6.527946537059538e-05, "loss": 1.4618, "step": 5725 }, { "epoch": 1.738048262255274, "grad_norm": 0.5871725082397461, "learning_rate": 6.527339003645201e-05, "loss": 1.8213, "step": 5726 }, { "epoch": 1.7383517984519654, "grad_norm": 0.5859651565551758, "learning_rate": 6.526731470230863e-05, "loss": 1.3484, "step": 5727 }, { "epoch": 1.7386553346486568, "grad_norm": 0.5065982937812805, "learning_rate": 6.526123936816525e-05, "loss": 1.6662, "step": 5728 }, { "epoch": 1.7389588708453483, "grad_norm": 0.5518446564674377, "learning_rate": 6.525516403402188e-05, "loss": 1.5213, "step": 5729 }, { "epoch": 1.7392624070420397, "grad_norm": 0.5022513270378113, "learning_rate": 6.52490886998785e-05, "loss": 1.2575, "step": 5730 }, { "epoch": 1.7395659432387314, "grad_norm": 0.3669544756412506, "learning_rate": 6.524301336573511e-05, "loss": 1.8608, "step": 5731 }, { "epoch": 1.7398694794354226, "grad_norm": 0.9789682626724243, "learning_rate": 6.523693803159174e-05, "loss": 1.2424, "step": 5732 }, { "epoch": 1.7401730156321142, "grad_norm": 0.5330730080604553, "learning_rate": 6.523086269744836e-05, "loss": 1.641, "step": 5733 }, { "epoch": 1.7404765518288055, "grad_norm": 0.4809507727622986, "learning_rate": 6.522478736330498e-05, "loss": 1.1522, "step": 5734 }, { "epoch": 1.740780088025497, "grad_norm": 0.6297109723091125, "learning_rate": 6.521871202916161e-05, "loss": 1.8506, "step": 5735 }, { "epoch": 1.7410836242221885, "grad_norm": 0.5746430158615112, "learning_rate": 6.521263669501823e-05, "loss": 1.3589, "step": 5736 }, { "epoch": 1.74138716041888, "grad_norm": 0.5811137557029724, "learning_rate": 6.520656136087486e-05, "loss": 1.8811, "step": 5737 }, { "epoch": 1.7416906966155714, "grad_norm": 0.5107501149177551, "learning_rate": 6.520048602673147e-05, "loss": 1.6527, "step": 5738 }, { "epoch": 1.7419942328122628, "grad_norm": 0.6415001749992371, "learning_rate": 6.519441069258809e-05, "loss": 1.0182, "step": 5739 }, { "epoch": 1.7422977690089543, "grad_norm": 0.6572467684745789, "learning_rate": 6.518833535844472e-05, "loss": 1.5543, "step": 5740 }, { "epoch": 1.7426013052056457, "grad_norm": 0.5816811323165894, "learning_rate": 6.518226002430134e-05, "loss": 2.0466, "step": 5741 }, { "epoch": 1.7429048414023374, "grad_norm": 0.5232481360435486, "learning_rate": 6.517618469015796e-05, "loss": 1.4519, "step": 5742 }, { "epoch": 1.7432083775990286, "grad_norm": 0.720382809638977, "learning_rate": 6.517010935601459e-05, "loss": 1.2823, "step": 5743 }, { "epoch": 1.7435119137957202, "grad_norm": 0.6193926930427551, "learning_rate": 6.51640340218712e-05, "loss": 1.4534, "step": 5744 }, { "epoch": 1.7438154499924114, "grad_norm": 0.629203736782074, "learning_rate": 6.515795868772782e-05, "loss": 1.606, "step": 5745 }, { "epoch": 1.744118986189103, "grad_norm": 0.7067033648490906, "learning_rate": 6.515188335358445e-05, "loss": 1.8257, "step": 5746 }, { "epoch": 1.7444225223857945, "grad_norm": 0.596479594707489, "learning_rate": 6.514580801944107e-05, "loss": 1.5257, "step": 5747 }, { "epoch": 1.744726058582486, "grad_norm": 0.5363408923149109, "learning_rate": 6.513973268529769e-05, "loss": 1.7282, "step": 5748 }, { "epoch": 1.7450295947791774, "grad_norm": 0.5793718695640564, "learning_rate": 6.513365735115432e-05, "loss": 2.0703, "step": 5749 }, { "epoch": 1.7453331309758688, "grad_norm": 0.5103743672370911, "learning_rate": 6.512758201701094e-05, "loss": 1.6265, "step": 5750 }, { "epoch": 1.7456366671725603, "grad_norm": 0.5116788148880005, "learning_rate": 6.512150668286757e-05, "loss": 1.8109, "step": 5751 }, { "epoch": 1.7459402033692517, "grad_norm": 0.6101179718971252, "learning_rate": 6.511543134872418e-05, "loss": 1.5663, "step": 5752 }, { "epoch": 1.7462437395659434, "grad_norm": 0.766834557056427, "learning_rate": 6.51093560145808e-05, "loss": 1.9858, "step": 5753 }, { "epoch": 1.7465472757626346, "grad_norm": 0.5078772306442261, "learning_rate": 6.510328068043743e-05, "loss": 1.3673, "step": 5754 }, { "epoch": 1.7468508119593262, "grad_norm": 0.47746485471725464, "learning_rate": 6.509720534629405e-05, "loss": 1.316, "step": 5755 }, { "epoch": 1.7471543481560174, "grad_norm": 0.5668113827705383, "learning_rate": 6.509113001215067e-05, "loss": 1.6966, "step": 5756 }, { "epoch": 1.747457884352709, "grad_norm": 0.5345942974090576, "learning_rate": 6.50850546780073e-05, "loss": 1.195, "step": 5757 }, { "epoch": 1.7477614205494005, "grad_norm": 0.5363343358039856, "learning_rate": 6.507897934386392e-05, "loss": 1.709, "step": 5758 }, { "epoch": 1.748064956746092, "grad_norm": 0.8389655351638794, "learning_rate": 6.507290400972053e-05, "loss": 1.7834, "step": 5759 }, { "epoch": 1.7483684929427834, "grad_norm": 0.4319862723350525, "learning_rate": 6.506682867557716e-05, "loss": 1.9358, "step": 5760 }, { "epoch": 1.7486720291394748, "grad_norm": 0.5649601221084595, "learning_rate": 6.506075334143378e-05, "loss": 1.2926, "step": 5761 }, { "epoch": 1.7489755653361665, "grad_norm": 0.5124643445014954, "learning_rate": 6.50546780072904e-05, "loss": 2.1127, "step": 5762 }, { "epoch": 1.7492791015328577, "grad_norm": 0.5134212374687195, "learning_rate": 6.504860267314703e-05, "loss": 1.4703, "step": 5763 }, { "epoch": 1.7495826377295494, "grad_norm": 0.7799593210220337, "learning_rate": 6.504252733900365e-05, "loss": 1.1072, "step": 5764 }, { "epoch": 1.7498861739262406, "grad_norm": 0.6452533602714539, "learning_rate": 6.503645200486028e-05, "loss": 1.6831, "step": 5765 }, { "epoch": 1.7501897101229322, "grad_norm": 0.5632435083389282, "learning_rate": 6.50303766707169e-05, "loss": 1.7022, "step": 5766 }, { "epoch": 1.7504932463196237, "grad_norm": 0.49832579493522644, "learning_rate": 6.502430133657351e-05, "loss": 1.3726, "step": 5767 }, { "epoch": 1.750796782516315, "grad_norm": 0.5050408840179443, "learning_rate": 6.501822600243014e-05, "loss": 1.2312, "step": 5768 }, { "epoch": 1.7511003187130065, "grad_norm": 0.5031700134277344, "learning_rate": 6.501215066828676e-05, "loss": 1.1839, "step": 5769 }, { "epoch": 1.751403854909698, "grad_norm": 0.5715921521186829, "learning_rate": 6.500607533414338e-05, "loss": 1.7294, "step": 5770 }, { "epoch": 1.7517073911063894, "grad_norm": 0.6526091694831848, "learning_rate": 6.500000000000001e-05, "loss": 1.3739, "step": 5771 }, { "epoch": 1.7520109273030808, "grad_norm": 0.5826200246810913, "learning_rate": 6.499392466585663e-05, "loss": 1.5172, "step": 5772 }, { "epoch": 1.7523144634997725, "grad_norm": 0.6188724040985107, "learning_rate": 6.498784933171324e-05, "loss": 1.7267, "step": 5773 }, { "epoch": 1.7526179996964637, "grad_norm": 0.5869378447532654, "learning_rate": 6.498177399756987e-05, "loss": 1.6295, "step": 5774 }, { "epoch": 1.7529215358931554, "grad_norm": 0.5730337500572205, "learning_rate": 6.497569866342649e-05, "loss": 1.4468, "step": 5775 }, { "epoch": 1.7532250720898466, "grad_norm": 0.47720491886138916, "learning_rate": 6.496962332928311e-05, "loss": 1.4877, "step": 5776 }, { "epoch": 1.7535286082865382, "grad_norm": 0.5709559917449951, "learning_rate": 6.496354799513974e-05, "loss": 1.9345, "step": 5777 }, { "epoch": 1.7538321444832297, "grad_norm": 0.5799897909164429, "learning_rate": 6.495747266099636e-05, "loss": 0.7032, "step": 5778 }, { "epoch": 1.754135680679921, "grad_norm": 0.6293965578079224, "learning_rate": 6.495139732685299e-05, "loss": 1.7294, "step": 5779 }, { "epoch": 1.7544392168766125, "grad_norm": 0.567211925983429, "learning_rate": 6.49453219927096e-05, "loss": 1.8237, "step": 5780 }, { "epoch": 1.754742753073304, "grad_norm": 0.5054707527160645, "learning_rate": 6.493924665856622e-05, "loss": 1.2977, "step": 5781 }, { "epoch": 1.7550462892699954, "grad_norm": 0.6522750854492188, "learning_rate": 6.493317132442285e-05, "loss": 1.278, "step": 5782 }, { "epoch": 1.7553498254666868, "grad_norm": 0.5055107474327087, "learning_rate": 6.492709599027947e-05, "loss": 1.6936, "step": 5783 }, { "epoch": 1.7556533616633785, "grad_norm": 0.7780368328094482, "learning_rate": 6.492102065613609e-05, "loss": 1.8676, "step": 5784 }, { "epoch": 1.7559568978600697, "grad_norm": 0.5879009366035461, "learning_rate": 6.491494532199272e-05, "loss": 1.8683, "step": 5785 }, { "epoch": 1.7562604340567614, "grad_norm": 0.5584444999694824, "learning_rate": 6.490886998784934e-05, "loss": 1.9057, "step": 5786 }, { "epoch": 1.7565639702534526, "grad_norm": 0.6271816492080688, "learning_rate": 6.490279465370595e-05, "loss": 1.7766, "step": 5787 }, { "epoch": 1.7568675064501442, "grad_norm": 0.6667237281799316, "learning_rate": 6.489671931956258e-05, "loss": 1.6506, "step": 5788 }, { "epoch": 1.7571710426468357, "grad_norm": 0.5379130840301514, "learning_rate": 6.48906439854192e-05, "loss": 1.6144, "step": 5789 }, { "epoch": 1.757474578843527, "grad_norm": 0.8868510723114014, "learning_rate": 6.488456865127582e-05, "loss": 1.7302, "step": 5790 }, { "epoch": 1.7577781150402185, "grad_norm": 0.5535308718681335, "learning_rate": 6.487849331713245e-05, "loss": 1.7013, "step": 5791 }, { "epoch": 1.75808165123691, "grad_norm": 0.5302169919013977, "learning_rate": 6.487241798298907e-05, "loss": 1.9597, "step": 5792 }, { "epoch": 1.7583851874336016, "grad_norm": 0.5474647879600525, "learning_rate": 6.48663426488457e-05, "loss": 2.1173, "step": 5793 }, { "epoch": 1.7586887236302928, "grad_norm": 0.8962429165840149, "learning_rate": 6.486026731470231e-05, "loss": 1.2716, "step": 5794 }, { "epoch": 1.7589922598269845, "grad_norm": 0.6951600909233093, "learning_rate": 6.485419198055893e-05, "loss": 1.4244, "step": 5795 }, { "epoch": 1.7592957960236757, "grad_norm": 0.5327600836753845, "learning_rate": 6.484811664641556e-05, "loss": 1.5641, "step": 5796 }, { "epoch": 1.7595993322203674, "grad_norm": 0.471091091632843, "learning_rate": 6.484204131227217e-05, "loss": 1.7891, "step": 5797 }, { "epoch": 1.7599028684170588, "grad_norm": 0.5820968747138977, "learning_rate": 6.48359659781288e-05, "loss": 1.59, "step": 5798 }, { "epoch": 1.7602064046137502, "grad_norm": 1.5941643714904785, "learning_rate": 6.482989064398543e-05, "loss": 1.131, "step": 5799 }, { "epoch": 1.7605099408104417, "grad_norm": 0.6009232997894287, "learning_rate": 6.482381530984205e-05, "loss": 1.8783, "step": 5800 }, { "epoch": 1.760813477007133, "grad_norm": 0.5255837440490723, "learning_rate": 6.481773997569866e-05, "loss": 1.9547, "step": 5801 }, { "epoch": 1.7611170132038245, "grad_norm": 0.6272095441818237, "learning_rate": 6.48116646415553e-05, "loss": 1.7901, "step": 5802 }, { "epoch": 1.761420549400516, "grad_norm": 0.47905588150024414, "learning_rate": 6.480558930741191e-05, "loss": 1.9139, "step": 5803 }, { "epoch": 1.7617240855972076, "grad_norm": 0.5940137505531311, "learning_rate": 6.479951397326853e-05, "loss": 1.7599, "step": 5804 }, { "epoch": 1.7620276217938988, "grad_norm": 0.52225661277771, "learning_rate": 6.479343863912516e-05, "loss": 1.8058, "step": 5805 }, { "epoch": 1.7623311579905905, "grad_norm": 0.529253363609314, "learning_rate": 6.478736330498178e-05, "loss": 1.7519, "step": 5806 }, { "epoch": 1.7626346941872817, "grad_norm": 0.5793299078941345, "learning_rate": 6.47812879708384e-05, "loss": 1.848, "step": 5807 }, { "epoch": 1.7629382303839733, "grad_norm": 0.5690224170684814, "learning_rate": 6.477521263669502e-05, "loss": 1.5886, "step": 5808 }, { "epoch": 1.7632417665806648, "grad_norm": 0.6605258584022522, "learning_rate": 6.476913730255164e-05, "loss": 1.642, "step": 5809 }, { "epoch": 1.7635453027773562, "grad_norm": 0.5015376210212708, "learning_rate": 6.476306196840827e-05, "loss": 1.5451, "step": 5810 }, { "epoch": 1.7638488389740477, "grad_norm": 0.491745263338089, "learning_rate": 6.475698663426488e-05, "loss": 1.7558, "step": 5811 }, { "epoch": 1.764152375170739, "grad_norm": 0.5883635878562927, "learning_rate": 6.475091130012151e-05, "loss": 1.4678, "step": 5812 }, { "epoch": 1.7644559113674305, "grad_norm": 0.5973695516586304, "learning_rate": 6.474483596597814e-05, "loss": 1.5873, "step": 5813 }, { "epoch": 1.764759447564122, "grad_norm": 0.4223356246948242, "learning_rate": 6.473876063183476e-05, "loss": 1.9833, "step": 5814 }, { "epoch": 1.7650629837608136, "grad_norm": 0.5074962973594666, "learning_rate": 6.473268529769137e-05, "loss": 1.8624, "step": 5815 }, { "epoch": 1.7653665199575048, "grad_norm": 0.5878717303276062, "learning_rate": 6.4726609963548e-05, "loss": 1.1902, "step": 5816 }, { "epoch": 1.7656700561541965, "grad_norm": 0.4983219504356384, "learning_rate": 6.472053462940462e-05, "loss": 2.2586, "step": 5817 }, { "epoch": 1.7659735923508877, "grad_norm": 0.5959672927856445, "learning_rate": 6.471445929526124e-05, "loss": 1.6374, "step": 5818 }, { "epoch": 1.7662771285475793, "grad_norm": 0.5485727190971375, "learning_rate": 6.470838396111787e-05, "loss": 1.3097, "step": 5819 }, { "epoch": 1.7665806647442708, "grad_norm": 0.578179121017456, "learning_rate": 6.470230862697449e-05, "loss": 1.5444, "step": 5820 }, { "epoch": 1.7668842009409622, "grad_norm": 0.5944360494613647, "learning_rate": 6.46962332928311e-05, "loss": 1.1695, "step": 5821 }, { "epoch": 1.7671877371376536, "grad_norm": 0.747759997844696, "learning_rate": 6.469015795868773e-05, "loss": 1.5559, "step": 5822 }, { "epoch": 1.767491273334345, "grad_norm": 0.40747299790382385, "learning_rate": 6.468408262454435e-05, "loss": 1.3606, "step": 5823 }, { "epoch": 1.7677948095310367, "grad_norm": 0.5746089220046997, "learning_rate": 6.467800729040098e-05, "loss": 1.5747, "step": 5824 }, { "epoch": 1.768098345727728, "grad_norm": 0.5214455127716064, "learning_rate": 6.467193195625759e-05, "loss": 1.875, "step": 5825 }, { "epoch": 1.7684018819244196, "grad_norm": 0.5907041430473328, "learning_rate": 6.466585662211422e-05, "loss": 1.8091, "step": 5826 }, { "epoch": 1.7687054181211108, "grad_norm": 0.5252591967582703, "learning_rate": 6.465978128797085e-05, "loss": 1.3878, "step": 5827 }, { "epoch": 1.7690089543178025, "grad_norm": 0.4580781161785126, "learning_rate": 6.465370595382747e-05, "loss": 2.0493, "step": 5828 }, { "epoch": 1.769312490514494, "grad_norm": 1.1108380556106567, "learning_rate": 6.464763061968408e-05, "loss": 1.4833, "step": 5829 }, { "epoch": 1.7696160267111853, "grad_norm": 0.5863052010536194, "learning_rate": 6.464155528554071e-05, "loss": 1.4405, "step": 5830 }, { "epoch": 1.7699195629078768, "grad_norm": 0.6235532760620117, "learning_rate": 6.463547995139733e-05, "loss": 1.1955, "step": 5831 }, { "epoch": 1.7702230991045682, "grad_norm": 0.8234938383102417, "learning_rate": 6.462940461725395e-05, "loss": 1.6902, "step": 5832 }, { "epoch": 1.7705266353012596, "grad_norm": 0.4940117597579956, "learning_rate": 6.462332928311058e-05, "loss": 1.7612, "step": 5833 }, { "epoch": 1.770830171497951, "grad_norm": 0.5862531661987305, "learning_rate": 6.46172539489672e-05, "loss": 1.7272, "step": 5834 }, { "epoch": 1.7711337076946427, "grad_norm": 0.5498414635658264, "learning_rate": 6.461117861482381e-05, "loss": 1.9877, "step": 5835 }, { "epoch": 1.771437243891334, "grad_norm": 0.6384761333465576, "learning_rate": 6.460510328068044e-05, "loss": 1.5546, "step": 5836 }, { "epoch": 1.7717407800880256, "grad_norm": 0.5216341018676758, "learning_rate": 6.459902794653706e-05, "loss": 1.7439, "step": 5837 }, { "epoch": 1.7720443162847168, "grad_norm": 0.749945342540741, "learning_rate": 6.459295261239369e-05, "loss": 1.5544, "step": 5838 }, { "epoch": 1.7723478524814085, "grad_norm": 0.47345829010009766, "learning_rate": 6.45868772782503e-05, "loss": 1.3169, "step": 5839 }, { "epoch": 1.7726513886781, "grad_norm": 0.5755912661552429, "learning_rate": 6.458080194410693e-05, "loss": 1.6923, "step": 5840 }, { "epoch": 1.7729549248747913, "grad_norm": 0.5522906184196472, "learning_rate": 6.457472660996356e-05, "loss": 1.5496, "step": 5841 }, { "epoch": 1.7732584610714828, "grad_norm": 0.6490026116371155, "learning_rate": 6.456865127582018e-05, "loss": 1.6102, "step": 5842 }, { "epoch": 1.7735619972681742, "grad_norm": 0.5617067813873291, "learning_rate": 6.456257594167679e-05, "loss": 1.6246, "step": 5843 }, { "epoch": 1.7738655334648656, "grad_norm": 0.6315189599990845, "learning_rate": 6.455650060753342e-05, "loss": 1.6435, "step": 5844 }, { "epoch": 1.774169069661557, "grad_norm": 0.5512092709541321, "learning_rate": 6.455042527339004e-05, "loss": 1.5024, "step": 5845 }, { "epoch": 1.7744726058582487, "grad_norm": 0.4029645025730133, "learning_rate": 6.454434993924666e-05, "loss": 1.0551, "step": 5846 }, { "epoch": 1.77477614205494, "grad_norm": 0.5837946534156799, "learning_rate": 6.453827460510329e-05, "loss": 1.546, "step": 5847 }, { "epoch": 1.7750796782516316, "grad_norm": 0.5171747207641602, "learning_rate": 6.45321992709599e-05, "loss": 1.7922, "step": 5848 }, { "epoch": 1.7753832144483228, "grad_norm": 0.8446613550186157, "learning_rate": 6.452612393681652e-05, "loss": 1.7363, "step": 5849 }, { "epoch": 1.7756867506450145, "grad_norm": 0.5637487769126892, "learning_rate": 6.452004860267315e-05, "loss": 1.5976, "step": 5850 }, { "epoch": 1.775990286841706, "grad_norm": 0.5242686867713928, "learning_rate": 6.451397326852977e-05, "loss": 1.692, "step": 5851 }, { "epoch": 1.7762938230383973, "grad_norm": 0.6187634468078613, "learning_rate": 6.45078979343864e-05, "loss": 1.5291, "step": 5852 }, { "epoch": 1.7765973592350888, "grad_norm": 1.0247151851654053, "learning_rate": 6.4501822600243e-05, "loss": 1.5718, "step": 5853 }, { "epoch": 1.7769008954317802, "grad_norm": 0.6299808025360107, "learning_rate": 6.449574726609964e-05, "loss": 1.5077, "step": 5854 }, { "epoch": 1.7772044316284719, "grad_norm": 0.6078516840934753, "learning_rate": 6.448967193195627e-05, "loss": 1.0061, "step": 5855 }, { "epoch": 1.777507967825163, "grad_norm": 0.4225638210773468, "learning_rate": 6.448359659781287e-05, "loss": 1.7302, "step": 5856 }, { "epoch": 1.7778115040218547, "grad_norm": 0.5707997679710388, "learning_rate": 6.44775212636695e-05, "loss": 1.3869, "step": 5857 }, { "epoch": 1.778115040218546, "grad_norm": 0.5926483273506165, "learning_rate": 6.447144592952613e-05, "loss": 1.6026, "step": 5858 }, { "epoch": 1.7784185764152376, "grad_norm": 0.5582571625709534, "learning_rate": 6.446537059538275e-05, "loss": 1.6688, "step": 5859 }, { "epoch": 1.7787221126119288, "grad_norm": 0.48093557357788086, "learning_rate": 6.445929526123937e-05, "loss": 1.9678, "step": 5860 }, { "epoch": 1.7790256488086205, "grad_norm": 0.5301848649978638, "learning_rate": 6.4453219927096e-05, "loss": 1.4983, "step": 5861 }, { "epoch": 1.779329185005312, "grad_norm": 0.600426435470581, "learning_rate": 6.444714459295262e-05, "loss": 2.1841, "step": 5862 }, { "epoch": 1.7796327212020033, "grad_norm": 0.5542836785316467, "learning_rate": 6.444106925880923e-05, "loss": 1.9038, "step": 5863 }, { "epoch": 1.7799362573986948, "grad_norm": 0.6525148749351501, "learning_rate": 6.443499392466586e-05, "loss": 1.094, "step": 5864 }, { "epoch": 1.7802397935953862, "grad_norm": 0.5191807150840759, "learning_rate": 6.442891859052248e-05, "loss": 2.0945, "step": 5865 }, { "epoch": 1.7805433297920779, "grad_norm": 0.6895755529403687, "learning_rate": 6.442284325637911e-05, "loss": 1.2236, "step": 5866 }, { "epoch": 1.780846865988769, "grad_norm": 0.5294615626335144, "learning_rate": 6.441676792223572e-05, "loss": 1.5897, "step": 5867 }, { "epoch": 1.7811504021854607, "grad_norm": 0.46759140491485596, "learning_rate": 6.441069258809235e-05, "loss": 1.7159, "step": 5868 }, { "epoch": 1.781453938382152, "grad_norm": 0.5720352530479431, "learning_rate": 6.440461725394898e-05, "loss": 1.4336, "step": 5869 }, { "epoch": 1.7817574745788436, "grad_norm": 0.5489372611045837, "learning_rate": 6.439854191980558e-05, "loss": 1.7358, "step": 5870 }, { "epoch": 1.782061010775535, "grad_norm": 0.4542980492115021, "learning_rate": 6.439246658566221e-05, "loss": 1.5082, "step": 5871 }, { "epoch": 1.7823645469722265, "grad_norm": 0.4929881691932678, "learning_rate": 6.438639125151884e-05, "loss": 1.5841, "step": 5872 }, { "epoch": 1.782668083168918, "grad_norm": 0.5201451182365417, "learning_rate": 6.438031591737546e-05, "loss": 1.9786, "step": 5873 }, { "epoch": 1.7829716193656093, "grad_norm": 0.5401360988616943, "learning_rate": 6.437424058323208e-05, "loss": 1.6557, "step": 5874 }, { "epoch": 1.7832751555623008, "grad_norm": 0.5674952864646912, "learning_rate": 6.436816524908871e-05, "loss": 1.2626, "step": 5875 }, { "epoch": 1.7835786917589922, "grad_norm": 0.561320960521698, "learning_rate": 6.436208991494533e-05, "loss": 1.8853, "step": 5876 }, { "epoch": 1.7838822279556839, "grad_norm": 0.5434008240699768, "learning_rate": 6.435601458080194e-05, "loss": 1.1288, "step": 5877 }, { "epoch": 1.784185764152375, "grad_norm": 0.4528067409992218, "learning_rate": 6.434993924665856e-05, "loss": 1.8259, "step": 5878 }, { "epoch": 1.7844893003490667, "grad_norm": 0.48274731636047363, "learning_rate": 6.434386391251519e-05, "loss": 1.248, "step": 5879 }, { "epoch": 1.784792836545758, "grad_norm": 0.844624936580658, "learning_rate": 6.433778857837181e-05, "loss": 1.2987, "step": 5880 }, { "epoch": 1.7850963727424496, "grad_norm": 0.8764456510543823, "learning_rate": 6.433171324422843e-05, "loss": 1.0901, "step": 5881 }, { "epoch": 1.785399908939141, "grad_norm": 0.5158057808876038, "learning_rate": 6.432563791008506e-05, "loss": 1.7408, "step": 5882 }, { "epoch": 1.7857034451358325, "grad_norm": 0.616571843624115, "learning_rate": 6.431956257594169e-05, "loss": 1.4418, "step": 5883 }, { "epoch": 1.786006981332524, "grad_norm": 0.5972772240638733, "learning_rate": 6.431348724179829e-05, "loss": 1.3225, "step": 5884 }, { "epoch": 1.7863105175292153, "grad_norm": 0.547287106513977, "learning_rate": 6.430741190765492e-05, "loss": 1.6017, "step": 5885 }, { "epoch": 1.7866140537259068, "grad_norm": 0.8032040596008301, "learning_rate": 6.430133657351155e-05, "loss": 1.4679, "step": 5886 }, { "epoch": 1.7869175899225982, "grad_norm": 0.47403547167778015, "learning_rate": 6.429526123936817e-05, "loss": 1.6739, "step": 5887 }, { "epoch": 1.7872211261192898, "grad_norm": 0.5629516243934631, "learning_rate": 6.428918590522479e-05, "loss": 1.752, "step": 5888 }, { "epoch": 1.787524662315981, "grad_norm": 0.47188514471054077, "learning_rate": 6.428311057108142e-05, "loss": 1.3455, "step": 5889 }, { "epoch": 1.7878281985126727, "grad_norm": 0.6849742531776428, "learning_rate": 6.427703523693804e-05, "loss": 1.7675, "step": 5890 }, { "epoch": 1.788131734709364, "grad_norm": 0.5418858528137207, "learning_rate": 6.427095990279465e-05, "loss": 1.4488, "step": 5891 }, { "epoch": 1.7884352709060556, "grad_norm": 0.5874664187431335, "learning_rate": 6.426488456865127e-05, "loss": 1.8787, "step": 5892 }, { "epoch": 1.788738807102747, "grad_norm": 0.4815152585506439, "learning_rate": 6.42588092345079e-05, "loss": 1.2008, "step": 5893 }, { "epoch": 1.7890423432994385, "grad_norm": 0.5272401571273804, "learning_rate": 6.425273390036452e-05, "loss": 1.9368, "step": 5894 }, { "epoch": 1.7893458794961299, "grad_norm": 0.5799587368965149, "learning_rate": 6.424665856622114e-05, "loss": 1.8509, "step": 5895 }, { "epoch": 1.7896494156928213, "grad_norm": 1.0671026706695557, "learning_rate": 6.424058323207777e-05, "loss": 1.6619, "step": 5896 }, { "epoch": 1.789952951889513, "grad_norm": 0.7798335552215576, "learning_rate": 6.42345078979344e-05, "loss": 1.7029, "step": 5897 }, { "epoch": 1.7902564880862042, "grad_norm": 0.5832023024559021, "learning_rate": 6.4228432563791e-05, "loss": 1.9654, "step": 5898 }, { "epoch": 1.7905600242828958, "grad_norm": 0.4774154722690582, "learning_rate": 6.422235722964763e-05, "loss": 1.7518, "step": 5899 }, { "epoch": 1.790863560479587, "grad_norm": 0.4843001663684845, "learning_rate": 6.421628189550426e-05, "loss": 1.6774, "step": 5900 }, { "epoch": 1.7911670966762787, "grad_norm": 0.5146987438201904, "learning_rate": 6.421020656136088e-05, "loss": 1.7348, "step": 5901 }, { "epoch": 1.7914706328729701, "grad_norm": 0.566594123840332, "learning_rate": 6.42041312272175e-05, "loss": 1.5495, "step": 5902 }, { "epoch": 1.7917741690696616, "grad_norm": 0.7687141299247742, "learning_rate": 6.419805589307413e-05, "loss": 1.9389, "step": 5903 }, { "epoch": 1.792077705266353, "grad_norm": 0.6663510799407959, "learning_rate": 6.419198055893075e-05, "loss": 1.3843, "step": 5904 }, { "epoch": 1.7923812414630445, "grad_norm": 0.5539908409118652, "learning_rate": 6.418590522478736e-05, "loss": 1.8642, "step": 5905 }, { "epoch": 1.7926847776597359, "grad_norm": 0.4932482838630676, "learning_rate": 6.417982989064398e-05, "loss": 1.6206, "step": 5906 }, { "epoch": 1.7929883138564273, "grad_norm": 0.617225170135498, "learning_rate": 6.417375455650061e-05, "loss": 1.674, "step": 5907 }, { "epoch": 1.793291850053119, "grad_norm": 0.5194925665855408, "learning_rate": 6.416767922235723e-05, "loss": 1.5878, "step": 5908 }, { "epoch": 1.7935953862498102, "grad_norm": 0.8317811489105225, "learning_rate": 6.416160388821385e-05, "loss": 1.4024, "step": 5909 }, { "epoch": 1.7938989224465018, "grad_norm": 0.6334354281425476, "learning_rate": 6.415552855407048e-05, "loss": 1.4149, "step": 5910 }, { "epoch": 1.794202458643193, "grad_norm": 0.5162734389305115, "learning_rate": 6.414945321992711e-05, "loss": 1.6833, "step": 5911 }, { "epoch": 1.7945059948398847, "grad_norm": 0.6123242378234863, "learning_rate": 6.414337788578371e-05, "loss": 1.3959, "step": 5912 }, { "epoch": 1.7948095310365761, "grad_norm": 0.5130218863487244, "learning_rate": 6.413730255164034e-05, "loss": 1.9731, "step": 5913 }, { "epoch": 1.7951130672332676, "grad_norm": 0.6168215274810791, "learning_rate": 6.413122721749697e-05, "loss": 1.7271, "step": 5914 }, { "epoch": 1.795416603429959, "grad_norm": 0.6758812665939331, "learning_rate": 6.412515188335359e-05, "loss": 2.0077, "step": 5915 }, { "epoch": 1.7957201396266504, "grad_norm": 1.066888451576233, "learning_rate": 6.411907654921021e-05, "loss": 1.0796, "step": 5916 }, { "epoch": 1.7960236758233419, "grad_norm": 0.6181840896606445, "learning_rate": 6.411300121506684e-05, "loss": 1.0032, "step": 5917 }, { "epoch": 1.7963272120200333, "grad_norm": 0.49878695607185364, "learning_rate": 6.410692588092346e-05, "loss": 1.7083, "step": 5918 }, { "epoch": 1.796630748216725, "grad_norm": 0.5682203769683838, "learning_rate": 6.410085054678007e-05, "loss": 1.7901, "step": 5919 }, { "epoch": 1.7969342844134162, "grad_norm": 0.46962597966194153, "learning_rate": 6.409477521263669e-05, "loss": 1.8081, "step": 5920 }, { "epoch": 1.7972378206101078, "grad_norm": 0.4845025837421417, "learning_rate": 6.408869987849332e-05, "loss": 1.1512, "step": 5921 }, { "epoch": 1.797541356806799, "grad_norm": 0.552946150302887, "learning_rate": 6.408262454434994e-05, "loss": 1.8408, "step": 5922 }, { "epoch": 1.7978448930034907, "grad_norm": 0.6529552936553955, "learning_rate": 6.407654921020656e-05, "loss": 1.2169, "step": 5923 }, { "epoch": 1.7981484292001821, "grad_norm": 0.5143837332725525, "learning_rate": 6.407047387606319e-05, "loss": 1.2939, "step": 5924 }, { "epoch": 1.7984519653968736, "grad_norm": 0.5021446347236633, "learning_rate": 6.406439854191982e-05, "loss": 1.771, "step": 5925 }, { "epoch": 1.798755501593565, "grad_norm": 0.5703146457672119, "learning_rate": 6.405832320777642e-05, "loss": 1.0796, "step": 5926 }, { "epoch": 1.7990590377902564, "grad_norm": 0.5527166724205017, "learning_rate": 6.405224787363305e-05, "loss": 1.7345, "step": 5927 }, { "epoch": 1.799362573986948, "grad_norm": 0.5868936777114868, "learning_rate": 6.404617253948968e-05, "loss": 1.9107, "step": 5928 }, { "epoch": 1.7996661101836393, "grad_norm": 0.5727707147598267, "learning_rate": 6.40400972053463e-05, "loss": 1.6983, "step": 5929 }, { "epoch": 1.799969646380331, "grad_norm": 0.4878747761249542, "learning_rate": 6.403402187120292e-05, "loss": 1.6873, "step": 5930 }, { "epoch": 1.8002731825770222, "grad_norm": 0.46012574434280396, "learning_rate": 6.402794653705955e-05, "loss": 1.6663, "step": 5931 }, { "epoch": 1.8005767187737138, "grad_norm": 0.5912777781486511, "learning_rate": 6.402187120291617e-05, "loss": 1.8697, "step": 5932 }, { "epoch": 1.8008802549704053, "grad_norm": 0.5376688241958618, "learning_rate": 6.401579586877278e-05, "loss": 1.7366, "step": 5933 }, { "epoch": 1.8011837911670967, "grad_norm": 0.5066462755203247, "learning_rate": 6.40097205346294e-05, "loss": 1.6613, "step": 5934 }, { "epoch": 1.8014873273637881, "grad_norm": 0.551946759223938, "learning_rate": 6.400364520048603e-05, "loss": 1.663, "step": 5935 }, { "epoch": 1.8017908635604796, "grad_norm": 0.5621793866157532, "learning_rate": 6.399756986634265e-05, "loss": 1.4712, "step": 5936 }, { "epoch": 1.802094399757171, "grad_norm": 0.5620403289794922, "learning_rate": 6.399149453219927e-05, "loss": 1.3263, "step": 5937 }, { "epoch": 1.8023979359538624, "grad_norm": 0.5411643385887146, "learning_rate": 6.39854191980559e-05, "loss": 1.5606, "step": 5938 }, { "epoch": 1.802701472150554, "grad_norm": 0.5093483924865723, "learning_rate": 6.397934386391253e-05, "loss": 1.8361, "step": 5939 }, { "epoch": 1.8030050083472453, "grad_norm": 0.9590352773666382, "learning_rate": 6.397326852976913e-05, "loss": 1.8535, "step": 5940 }, { "epoch": 1.803308544543937, "grad_norm": 0.5259732604026794, "learning_rate": 6.396719319562576e-05, "loss": 1.9654, "step": 5941 }, { "epoch": 1.8036120807406282, "grad_norm": 0.427685022354126, "learning_rate": 6.39611178614824e-05, "loss": 1.5456, "step": 5942 }, { "epoch": 1.8039156169373198, "grad_norm": 0.554628849029541, "learning_rate": 6.3955042527339e-05, "loss": 1.5662, "step": 5943 }, { "epoch": 1.8042191531340113, "grad_norm": 0.5747518539428711, "learning_rate": 6.394896719319563e-05, "loss": 1.7766, "step": 5944 }, { "epoch": 1.8045226893307027, "grad_norm": 0.5522488951683044, "learning_rate": 6.394289185905226e-05, "loss": 1.7876, "step": 5945 }, { "epoch": 1.8048262255273941, "grad_norm": 0.6148546934127808, "learning_rate": 6.393681652490888e-05, "loss": 2.2045, "step": 5946 }, { "epoch": 1.8051297617240856, "grad_norm": 0.5216572284698486, "learning_rate": 6.39307411907655e-05, "loss": 1.5652, "step": 5947 }, { "epoch": 1.805433297920777, "grad_norm": 0.4756337106227875, "learning_rate": 6.392466585662211e-05, "loss": 1.7144, "step": 5948 }, { "epoch": 1.8057368341174684, "grad_norm": 0.552494466304779, "learning_rate": 6.391859052247874e-05, "loss": 1.4098, "step": 5949 }, { "epoch": 1.80604037031416, "grad_norm": 0.4858109951019287, "learning_rate": 6.391251518833536e-05, "loss": 1.8139, "step": 5950 }, { "epoch": 1.8063439065108513, "grad_norm": 0.43136751651763916, "learning_rate": 6.390643985419198e-05, "loss": 1.3983, "step": 5951 }, { "epoch": 1.806647442707543, "grad_norm": 0.5166333317756653, "learning_rate": 6.390036452004861e-05, "loss": 1.7784, "step": 5952 }, { "epoch": 1.8069509789042342, "grad_norm": 0.5090475678443909, "learning_rate": 6.389428918590522e-05, "loss": 1.6639, "step": 5953 }, { "epoch": 1.8072545151009258, "grad_norm": 0.492970734834671, "learning_rate": 6.388821385176184e-05, "loss": 1.6899, "step": 5954 }, { "epoch": 1.8075580512976173, "grad_norm": 0.5264026522636414, "learning_rate": 6.388213851761847e-05, "loss": 1.5677, "step": 5955 }, { "epoch": 1.8078615874943087, "grad_norm": 0.5379698872566223, "learning_rate": 6.38760631834751e-05, "loss": 1.8217, "step": 5956 }, { "epoch": 1.8081651236910001, "grad_norm": 0.5782027244567871, "learning_rate": 6.386998784933171e-05, "loss": 1.7714, "step": 5957 }, { "epoch": 1.8084686598876916, "grad_norm": 0.45545729994773865, "learning_rate": 6.386391251518834e-05, "loss": 1.712, "step": 5958 }, { "epoch": 1.8087721960843832, "grad_norm": 0.6243247985839844, "learning_rate": 6.385783718104497e-05, "loss": 1.749, "step": 5959 }, { "epoch": 1.8090757322810744, "grad_norm": 0.5548247694969177, "learning_rate": 6.385176184690159e-05, "loss": 1.6275, "step": 5960 }, { "epoch": 1.809379268477766, "grad_norm": 0.5740573406219482, "learning_rate": 6.38456865127582e-05, "loss": 1.3607, "step": 5961 }, { "epoch": 1.8096828046744573, "grad_norm": 0.4793718159198761, "learning_rate": 6.383961117861482e-05, "loss": 1.6703, "step": 5962 }, { "epoch": 1.809986340871149, "grad_norm": 0.5316084623336792, "learning_rate": 6.383353584447145e-05, "loss": 1.7506, "step": 5963 }, { "epoch": 1.8102898770678404, "grad_norm": 0.4940855801105499, "learning_rate": 6.382746051032807e-05, "loss": 1.8435, "step": 5964 }, { "epoch": 1.8105934132645318, "grad_norm": 0.583676815032959, "learning_rate": 6.382138517618469e-05, "loss": 1.8004, "step": 5965 }, { "epoch": 1.8108969494612233, "grad_norm": 0.4545220136642456, "learning_rate": 6.381530984204132e-05, "loss": 1.6069, "step": 5966 }, { "epoch": 1.8112004856579147, "grad_norm": 0.49649152159690857, "learning_rate": 6.380923450789794e-05, "loss": 1.5472, "step": 5967 }, { "epoch": 1.8115040218546061, "grad_norm": 0.5210832357406616, "learning_rate": 6.380315917375455e-05, "loss": 1.4535, "step": 5968 }, { "epoch": 1.8118075580512976, "grad_norm": 0.5498248934745789, "learning_rate": 6.379708383961118e-05, "loss": 2.0708, "step": 5969 }, { "epoch": 1.8121110942479892, "grad_norm": 0.46531158685684204, "learning_rate": 6.379100850546781e-05, "loss": 1.5227, "step": 5970 }, { "epoch": 1.8124146304446804, "grad_norm": 0.6665170192718506, "learning_rate": 6.378493317132442e-05, "loss": 1.6048, "step": 5971 }, { "epoch": 1.812718166641372, "grad_norm": 0.46771401166915894, "learning_rate": 6.377885783718105e-05, "loss": 1.9367, "step": 5972 }, { "epoch": 1.8130217028380633, "grad_norm": 0.4593519866466522, "learning_rate": 6.377278250303767e-05, "loss": 1.8305, "step": 5973 }, { "epoch": 1.813325239034755, "grad_norm": 0.5220834016799927, "learning_rate": 6.37667071688943e-05, "loss": 1.8261, "step": 5974 }, { "epoch": 1.8136287752314464, "grad_norm": 0.5303083062171936, "learning_rate": 6.376063183475091e-05, "loss": 1.7589, "step": 5975 }, { "epoch": 1.8139323114281378, "grad_norm": 0.6920210123062134, "learning_rate": 6.375455650060753e-05, "loss": 1.0832, "step": 5976 }, { "epoch": 1.8142358476248293, "grad_norm": 0.49981755018234253, "learning_rate": 6.374848116646416e-05, "loss": 1.9543, "step": 5977 }, { "epoch": 1.8145393838215207, "grad_norm": 0.49285998940467834, "learning_rate": 6.374240583232078e-05, "loss": 1.9847, "step": 5978 }, { "epoch": 1.8148429200182121, "grad_norm": 0.571113109588623, "learning_rate": 6.37363304981774e-05, "loss": 0.9056, "step": 5979 }, { "epoch": 1.8151464562149036, "grad_norm": 0.5736732482910156, "learning_rate": 6.373025516403403e-05, "loss": 1.554, "step": 5980 }, { "epoch": 1.8154499924115952, "grad_norm": 0.5702198147773743, "learning_rate": 6.372417982989065e-05, "loss": 1.4019, "step": 5981 }, { "epoch": 1.8157535286082864, "grad_norm": 0.5917282104492188, "learning_rate": 6.371810449574726e-05, "loss": 1.7169, "step": 5982 }, { "epoch": 1.816057064804978, "grad_norm": 0.4668388366699219, "learning_rate": 6.37120291616039e-05, "loss": 1.3697, "step": 5983 }, { "epoch": 1.8163606010016693, "grad_norm": 0.48866280913352966, "learning_rate": 6.370595382746052e-05, "loss": 1.8745, "step": 5984 }, { "epoch": 1.816664137198361, "grad_norm": 0.5371363759040833, "learning_rate": 6.369987849331713e-05, "loss": 1.3244, "step": 5985 }, { "epoch": 1.8169676733950524, "grad_norm": 0.7522103190422058, "learning_rate": 6.369380315917376e-05, "loss": 1.1633, "step": 5986 }, { "epoch": 1.8172712095917438, "grad_norm": 0.5259113311767578, "learning_rate": 6.368772782503038e-05, "loss": 1.5496, "step": 5987 }, { "epoch": 1.8175747457884353, "grad_norm": 0.5885976552963257, "learning_rate": 6.368165249088701e-05, "loss": 1.4305, "step": 5988 }, { "epoch": 1.8178782819851267, "grad_norm": 0.5653246641159058, "learning_rate": 6.367557715674362e-05, "loss": 1.8765, "step": 5989 }, { "epoch": 1.8181818181818183, "grad_norm": 0.5662322044372559, "learning_rate": 6.366950182260024e-05, "loss": 1.8685, "step": 5990 }, { "epoch": 1.8184853543785096, "grad_norm": 0.4957374632358551, "learning_rate": 6.366342648845687e-05, "loss": 1.7966, "step": 5991 }, { "epoch": 1.8187888905752012, "grad_norm": 0.575343906879425, "learning_rate": 6.365735115431349e-05, "loss": 1.6969, "step": 5992 }, { "epoch": 1.8190924267718924, "grad_norm": 0.5825738906860352, "learning_rate": 6.365127582017011e-05, "loss": 1.6724, "step": 5993 }, { "epoch": 1.819395962968584, "grad_norm": 0.5546246767044067, "learning_rate": 6.364520048602674e-05, "loss": 1.769, "step": 5994 }, { "epoch": 1.8196994991652755, "grad_norm": 0.6090353727340698, "learning_rate": 6.363912515188336e-05, "loss": 1.5532, "step": 5995 }, { "epoch": 1.820003035361967, "grad_norm": 0.6108836531639099, "learning_rate": 6.363304981773997e-05, "loss": 1.7435, "step": 5996 }, { "epoch": 1.8203065715586584, "grad_norm": 0.5270663499832153, "learning_rate": 6.36269744835966e-05, "loss": 1.4083, "step": 5997 }, { "epoch": 1.8206101077553498, "grad_norm": 0.5462440848350525, "learning_rate": 6.362089914945323e-05, "loss": 1.3642, "step": 5998 }, { "epoch": 1.8209136439520412, "grad_norm": 0.6120818257331848, "learning_rate": 6.361482381530984e-05, "loss": 1.3654, "step": 5999 }, { "epoch": 1.8212171801487327, "grad_norm": 0.5536101460456848, "learning_rate": 6.360874848116647e-05, "loss": 1.6613, "step": 6000 }, { "epoch": 1.8215207163454243, "grad_norm": 0.5645522475242615, "learning_rate": 6.360267314702309e-05, "loss": 1.585, "step": 6001 }, { "epoch": 1.8218242525421156, "grad_norm": 0.7577182054519653, "learning_rate": 6.359659781287972e-05, "loss": 1.596, "step": 6002 }, { "epoch": 1.8221277887388072, "grad_norm": 0.5090943574905396, "learning_rate": 6.359052247873633e-05, "loss": 1.9588, "step": 6003 }, { "epoch": 1.8224313249354984, "grad_norm": 0.6380836963653564, "learning_rate": 6.358444714459295e-05, "loss": 1.8904, "step": 6004 }, { "epoch": 1.82273486113219, "grad_norm": 0.5438816547393799, "learning_rate": 6.357837181044958e-05, "loss": 1.4335, "step": 6005 }, { "epoch": 1.8230383973288815, "grad_norm": 0.5515280961990356, "learning_rate": 6.35722964763062e-05, "loss": 1.7108, "step": 6006 }, { "epoch": 1.823341933525573, "grad_norm": 0.5127303600311279, "learning_rate": 6.356622114216282e-05, "loss": 1.8656, "step": 6007 }, { "epoch": 1.8236454697222644, "grad_norm": 0.574600338935852, "learning_rate": 6.356014580801945e-05, "loss": 1.8687, "step": 6008 }, { "epoch": 1.8239490059189558, "grad_norm": 0.5011196136474609, "learning_rate": 6.355407047387607e-05, "loss": 1.0319, "step": 6009 }, { "epoch": 1.8242525421156472, "grad_norm": 0.48129895329475403, "learning_rate": 6.354799513973268e-05, "loss": 1.9097, "step": 6010 }, { "epoch": 1.8245560783123387, "grad_norm": 0.5280462503433228, "learning_rate": 6.354191980558931e-05, "loss": 1.7439, "step": 6011 }, { "epoch": 1.8248596145090303, "grad_norm": 0.558509886264801, "learning_rate": 6.353584447144594e-05, "loss": 1.6233, "step": 6012 }, { "epoch": 1.8251631507057215, "grad_norm": 0.5026416182518005, "learning_rate": 6.352976913730255e-05, "loss": 1.9798, "step": 6013 }, { "epoch": 1.8254666869024132, "grad_norm": 0.7570869326591492, "learning_rate": 6.352369380315918e-05, "loss": 1.509, "step": 6014 }, { "epoch": 1.8257702230991044, "grad_norm": 0.5660725235939026, "learning_rate": 6.35176184690158e-05, "loss": 1.6066, "step": 6015 }, { "epoch": 1.826073759295796, "grad_norm": 0.5537878274917603, "learning_rate": 6.351154313487241e-05, "loss": 1.5985, "step": 6016 }, { "epoch": 1.8263772954924875, "grad_norm": 0.8709487318992615, "learning_rate": 6.350546780072904e-05, "loss": 1.1894, "step": 6017 }, { "epoch": 1.826680831689179, "grad_norm": 0.5389176607131958, "learning_rate": 6.349939246658566e-05, "loss": 1.7235, "step": 6018 }, { "epoch": 1.8269843678858704, "grad_norm": 0.5328723788261414, "learning_rate": 6.349331713244229e-05, "loss": 1.6992, "step": 6019 }, { "epoch": 1.8272879040825618, "grad_norm": 0.5136182308197021, "learning_rate": 6.348724179829891e-05, "loss": 1.5087, "step": 6020 }, { "epoch": 1.8275914402792535, "grad_norm": 0.5750499367713928, "learning_rate": 6.348116646415553e-05, "loss": 1.7912, "step": 6021 }, { "epoch": 1.8278949764759447, "grad_norm": 0.5460924506187439, "learning_rate": 6.347509113001216e-05, "loss": 1.7777, "step": 6022 }, { "epoch": 1.8281985126726363, "grad_norm": 0.5502283573150635, "learning_rate": 6.346901579586878e-05, "loss": 1.5873, "step": 6023 }, { "epoch": 1.8285020488693275, "grad_norm": 0.551810622215271, "learning_rate": 6.346294046172539e-05, "loss": 1.9483, "step": 6024 }, { "epoch": 1.8288055850660192, "grad_norm": 0.4859268367290497, "learning_rate": 6.345686512758202e-05, "loss": 2.0799, "step": 6025 }, { "epoch": 1.8291091212627104, "grad_norm": 0.48601651191711426, "learning_rate": 6.345078979343865e-05, "loss": 1.5712, "step": 6026 }, { "epoch": 1.829412657459402, "grad_norm": 0.5264910459518433, "learning_rate": 6.344471445929526e-05, "loss": 1.8402, "step": 6027 }, { "epoch": 1.8297161936560935, "grad_norm": 0.5978955030441284, "learning_rate": 6.343863912515189e-05, "loss": 1.7215, "step": 6028 }, { "epoch": 1.830019729852785, "grad_norm": 0.5713289976119995, "learning_rate": 6.34325637910085e-05, "loss": 1.9287, "step": 6029 }, { "epoch": 1.8303232660494764, "grad_norm": 0.5466620922088623, "learning_rate": 6.342648845686512e-05, "loss": 1.5087, "step": 6030 }, { "epoch": 1.8306268022461678, "grad_norm": 0.5094414949417114, "learning_rate": 6.342041312272175e-05, "loss": 1.8889, "step": 6031 }, { "epoch": 1.8309303384428595, "grad_norm": 0.536065936088562, "learning_rate": 6.341433778857837e-05, "loss": 1.7133, "step": 6032 }, { "epoch": 1.8312338746395507, "grad_norm": 0.5807048678398132, "learning_rate": 6.3408262454435e-05, "loss": 1.4843, "step": 6033 }, { "epoch": 1.8315374108362423, "grad_norm": 0.4928985834121704, "learning_rate": 6.340218712029162e-05, "loss": 0.9791, "step": 6034 }, { "epoch": 1.8318409470329335, "grad_norm": 0.5296457409858704, "learning_rate": 6.339611178614824e-05, "loss": 1.8372, "step": 6035 }, { "epoch": 1.8321444832296252, "grad_norm": 0.6519824862480164, "learning_rate": 6.339003645200487e-05, "loss": 1.1811, "step": 6036 }, { "epoch": 1.8324480194263166, "grad_norm": 0.5059677362442017, "learning_rate": 6.338396111786149e-05, "loss": 2.0749, "step": 6037 }, { "epoch": 1.832751555623008, "grad_norm": 0.5943655371665955, "learning_rate": 6.33778857837181e-05, "loss": 1.2195, "step": 6038 }, { "epoch": 1.8330550918196995, "grad_norm": 0.5527958273887634, "learning_rate": 6.337181044957473e-05, "loss": 1.4819, "step": 6039 }, { "epoch": 1.833358628016391, "grad_norm": 0.5767375826835632, "learning_rate": 6.336573511543135e-05, "loss": 1.77, "step": 6040 }, { "epoch": 1.8336621642130824, "grad_norm": 0.46123191714286804, "learning_rate": 6.335965978128797e-05, "loss": 1.2523, "step": 6041 }, { "epoch": 1.8339657004097738, "grad_norm": 0.6473827958106995, "learning_rate": 6.33535844471446e-05, "loss": 1.3337, "step": 6042 }, { "epoch": 1.8342692366064655, "grad_norm": 0.5438799858093262, "learning_rate": 6.334750911300122e-05, "loss": 1.3156, "step": 6043 }, { "epoch": 1.8345727728031567, "grad_norm": 0.658911943435669, "learning_rate": 6.334143377885783e-05, "loss": 1.7473, "step": 6044 }, { "epoch": 1.8348763089998483, "grad_norm": 0.5106703042984009, "learning_rate": 6.333535844471446e-05, "loss": 1.7683, "step": 6045 }, { "epoch": 1.8351798451965395, "grad_norm": 0.5918740630149841, "learning_rate": 6.332928311057108e-05, "loss": 1.2451, "step": 6046 }, { "epoch": 1.8354833813932312, "grad_norm": 0.49266964197158813, "learning_rate": 6.332320777642771e-05, "loss": 1.255, "step": 6047 }, { "epoch": 1.8357869175899226, "grad_norm": 0.48280391097068787, "learning_rate": 6.331713244228433e-05, "loss": 1.7687, "step": 6048 }, { "epoch": 1.836090453786614, "grad_norm": 0.51821368932724, "learning_rate": 6.331105710814095e-05, "loss": 1.8662, "step": 6049 }, { "epoch": 1.8363939899833055, "grad_norm": 0.5473127365112305, "learning_rate": 6.330498177399758e-05, "loss": 1.3156, "step": 6050 }, { "epoch": 1.836697526179997, "grad_norm": 0.5145398378372192, "learning_rate": 6.32989064398542e-05, "loss": 1.748, "step": 6051 }, { "epoch": 1.8370010623766884, "grad_norm": 0.4762469530105591, "learning_rate": 6.329283110571081e-05, "loss": 1.9218, "step": 6052 }, { "epoch": 1.8373045985733798, "grad_norm": 0.4372353255748749, "learning_rate": 6.328675577156744e-05, "loss": 1.958, "step": 6053 }, { "epoch": 1.8376081347700715, "grad_norm": 0.47850942611694336, "learning_rate": 6.328068043742406e-05, "loss": 1.9153, "step": 6054 }, { "epoch": 1.8379116709667627, "grad_norm": 0.5003155469894409, "learning_rate": 6.327460510328068e-05, "loss": 1.8781, "step": 6055 }, { "epoch": 1.8382152071634543, "grad_norm": 0.5914559960365295, "learning_rate": 6.326852976913731e-05, "loss": 1.6648, "step": 6056 }, { "epoch": 1.8385187433601455, "grad_norm": 0.5313575267791748, "learning_rate": 6.326245443499393e-05, "loss": 1.1451, "step": 6057 }, { "epoch": 1.8388222795568372, "grad_norm": 0.4557872414588928, "learning_rate": 6.325637910085054e-05, "loss": 1.6781, "step": 6058 }, { "epoch": 1.8391258157535286, "grad_norm": 0.537188708782196, "learning_rate": 6.325030376670717e-05, "loss": 1.4251, "step": 6059 }, { "epoch": 1.83942935195022, "grad_norm": 0.5582257509231567, "learning_rate": 6.324422843256379e-05, "loss": 2.0075, "step": 6060 }, { "epoch": 1.8397328881469115, "grad_norm": 0.5774347186088562, "learning_rate": 6.323815309842042e-05, "loss": 2.0587, "step": 6061 }, { "epoch": 1.840036424343603, "grad_norm": 0.5662617683410645, "learning_rate": 6.323207776427704e-05, "loss": 1.7198, "step": 6062 }, { "epoch": 1.8403399605402946, "grad_norm": 0.5646880269050598, "learning_rate": 6.322600243013366e-05, "loss": 1.8586, "step": 6063 }, { "epoch": 1.8406434967369858, "grad_norm": 0.53641676902771, "learning_rate": 6.321992709599029e-05, "loss": 0.9103, "step": 6064 }, { "epoch": 1.8409470329336775, "grad_norm": 0.5239225625991821, "learning_rate": 6.32138517618469e-05, "loss": 1.5655, "step": 6065 }, { "epoch": 1.8412505691303687, "grad_norm": 0.4748789668083191, "learning_rate": 6.320777642770352e-05, "loss": 1.2857, "step": 6066 }, { "epoch": 1.8415541053270603, "grad_norm": 0.5578672289848328, "learning_rate": 6.320170109356015e-05, "loss": 1.6388, "step": 6067 }, { "epoch": 1.8418576415237518, "grad_norm": 0.48566946387290955, "learning_rate": 6.319562575941677e-05, "loss": 1.7807, "step": 6068 }, { "epoch": 1.8421611777204432, "grad_norm": 0.49624478816986084, "learning_rate": 6.318955042527339e-05, "loss": 1.5199, "step": 6069 }, { "epoch": 1.8424647139171346, "grad_norm": 0.5602688789367676, "learning_rate": 6.318347509113002e-05, "loss": 1.1253, "step": 6070 }, { "epoch": 1.842768250113826, "grad_norm": 0.5979968309402466, "learning_rate": 6.317739975698664e-05, "loss": 1.4047, "step": 6071 }, { "epoch": 1.8430717863105175, "grad_norm": 0.5504110455513, "learning_rate": 6.317132442284325e-05, "loss": 1.8578, "step": 6072 }, { "epoch": 1.843375322507209, "grad_norm": 0.9628989696502686, "learning_rate": 6.316524908869988e-05, "loss": 1.363, "step": 6073 }, { "epoch": 1.8436788587039006, "grad_norm": 0.5489274859428406, "learning_rate": 6.31591737545565e-05, "loss": 1.351, "step": 6074 }, { "epoch": 1.8439823949005918, "grad_norm": 0.6439406275749207, "learning_rate": 6.315309842041313e-05, "loss": 1.7498, "step": 6075 }, { "epoch": 1.8442859310972834, "grad_norm": 0.5306099653244019, "learning_rate": 6.314702308626975e-05, "loss": 1.6709, "step": 6076 }, { "epoch": 1.8445894672939747, "grad_norm": 0.5945127606391907, "learning_rate": 6.314094775212637e-05, "loss": 1.4394, "step": 6077 }, { "epoch": 1.8448930034906663, "grad_norm": 0.600626528263092, "learning_rate": 6.3134872417983e-05, "loss": 1.9918, "step": 6078 }, { "epoch": 1.8451965396873578, "grad_norm": 0.5113967061042786, "learning_rate": 6.312879708383962e-05, "loss": 1.9253, "step": 6079 }, { "epoch": 1.8455000758840492, "grad_norm": 0.5761411786079407, "learning_rate": 6.312272174969623e-05, "loss": 1.512, "step": 6080 }, { "epoch": 1.8458036120807406, "grad_norm": 0.5874478816986084, "learning_rate": 6.311664641555286e-05, "loss": 1.5656, "step": 6081 }, { "epoch": 1.846107148277432, "grad_norm": 0.611596941947937, "learning_rate": 6.311057108140948e-05, "loss": 1.5055, "step": 6082 }, { "epoch": 1.8464106844741235, "grad_norm": 0.4646596908569336, "learning_rate": 6.31044957472661e-05, "loss": 1.8092, "step": 6083 }, { "epoch": 1.846714220670815, "grad_norm": 0.6680156588554382, "learning_rate": 6.309842041312273e-05, "loss": 1.3079, "step": 6084 }, { "epoch": 1.8470177568675066, "grad_norm": 0.5887166261672974, "learning_rate": 6.309234507897935e-05, "loss": 1.6493, "step": 6085 }, { "epoch": 1.8473212930641978, "grad_norm": 0.6366755962371826, "learning_rate": 6.308626974483596e-05, "loss": 1.4721, "step": 6086 }, { "epoch": 1.8476248292608894, "grad_norm": 0.5246219635009766, "learning_rate": 6.30801944106926e-05, "loss": 1.877, "step": 6087 }, { "epoch": 1.8479283654575807, "grad_norm": 0.6656885743141174, "learning_rate": 6.307411907654921e-05, "loss": 1.9393, "step": 6088 }, { "epoch": 1.8482319016542723, "grad_norm": 0.5601686835289001, "learning_rate": 6.306804374240583e-05, "loss": 1.0694, "step": 6089 }, { "epoch": 1.8485354378509637, "grad_norm": 0.8006256222724915, "learning_rate": 6.306196840826246e-05, "loss": 1.4974, "step": 6090 }, { "epoch": 1.8488389740476552, "grad_norm": 0.49230626225471497, "learning_rate": 6.305589307411908e-05, "loss": 1.6028, "step": 6091 }, { "epoch": 1.8491425102443466, "grad_norm": 0.6775741577148438, "learning_rate": 6.304981773997571e-05, "loss": 1.7014, "step": 6092 }, { "epoch": 1.849446046441038, "grad_norm": 0.55632483959198, "learning_rate": 6.304374240583233e-05, "loss": 1.2937, "step": 6093 }, { "epoch": 1.8497495826377297, "grad_norm": 0.494138240814209, "learning_rate": 6.303766707168894e-05, "loss": 1.4426, "step": 6094 }, { "epoch": 1.850053118834421, "grad_norm": 0.5821829438209534, "learning_rate": 6.303159173754557e-05, "loss": 1.4578, "step": 6095 }, { "epoch": 1.8503566550311126, "grad_norm": 0.4682660400867462, "learning_rate": 6.302551640340219e-05, "loss": 1.7926, "step": 6096 }, { "epoch": 1.8506601912278038, "grad_norm": 0.5352194309234619, "learning_rate": 6.301944106925881e-05, "loss": 1.7727, "step": 6097 }, { "epoch": 1.8509637274244954, "grad_norm": 0.6106446385383606, "learning_rate": 6.301336573511544e-05, "loss": 1.3591, "step": 6098 }, { "epoch": 1.8512672636211869, "grad_norm": 0.575634777545929, "learning_rate": 6.300729040097206e-05, "loss": 1.1851, "step": 6099 }, { "epoch": 1.8515707998178783, "grad_norm": 0.976506233215332, "learning_rate": 6.300121506682867e-05, "loss": 1.481, "step": 6100 }, { "epoch": 1.8518743360145697, "grad_norm": 1.6843774318695068, "learning_rate": 6.29951397326853e-05, "loss": 1.8745, "step": 6101 }, { "epoch": 1.8521778722112612, "grad_norm": 0.5201796293258667, "learning_rate": 6.298906439854192e-05, "loss": 1.5802, "step": 6102 }, { "epoch": 1.8524814084079526, "grad_norm": 0.4609794616699219, "learning_rate": 6.298298906439854e-05, "loss": 1.57, "step": 6103 }, { "epoch": 1.852784944604644, "grad_norm": 0.42774495482444763, "learning_rate": 6.297691373025517e-05, "loss": 1.5364, "step": 6104 }, { "epoch": 1.8530884808013357, "grad_norm": 0.5276924967765808, "learning_rate": 6.297083839611179e-05, "loss": 1.5617, "step": 6105 }, { "epoch": 1.853392016998027, "grad_norm": 0.5043931007385254, "learning_rate": 6.296476306196842e-05, "loss": 1.8358, "step": 6106 }, { "epoch": 1.8536955531947186, "grad_norm": 0.5033897161483765, "learning_rate": 6.295868772782504e-05, "loss": 1.7926, "step": 6107 }, { "epoch": 1.8539990893914098, "grad_norm": 0.5580025315284729, "learning_rate": 6.295261239368165e-05, "loss": 1.4676, "step": 6108 }, { "epoch": 1.8543026255881014, "grad_norm": 1.387598991394043, "learning_rate": 6.294653705953828e-05, "loss": 1.3955, "step": 6109 }, { "epoch": 1.8546061617847929, "grad_norm": 0.576951265335083, "learning_rate": 6.29404617253949e-05, "loss": 1.638, "step": 6110 }, { "epoch": 1.8549096979814843, "grad_norm": 0.5881240963935852, "learning_rate": 6.293438639125152e-05, "loss": 1.5124, "step": 6111 }, { "epoch": 1.8552132341781757, "grad_norm": 0.4950685501098633, "learning_rate": 6.292831105710815e-05, "loss": 1.3096, "step": 6112 }, { "epoch": 1.8555167703748672, "grad_norm": 0.6170344948768616, "learning_rate": 6.292223572296477e-05, "loss": 1.4772, "step": 6113 }, { "epoch": 1.8558203065715586, "grad_norm": 0.6243839263916016, "learning_rate": 6.291616038882138e-05, "loss": 1.681, "step": 6114 }, { "epoch": 1.85612384276825, "grad_norm": 0.5996612906455994, "learning_rate": 6.291008505467801e-05, "loss": 1.5997, "step": 6115 }, { "epoch": 1.8564273789649417, "grad_norm": 0.5222072005271912, "learning_rate": 6.290400972053463e-05, "loss": 1.435, "step": 6116 }, { "epoch": 1.856730915161633, "grad_norm": 0.5111011266708374, "learning_rate": 6.289793438639125e-05, "loss": 1.8543, "step": 6117 }, { "epoch": 1.8570344513583246, "grad_norm": 0.5086125135421753, "learning_rate": 6.289185905224788e-05, "loss": 1.8475, "step": 6118 }, { "epoch": 1.8573379875550158, "grad_norm": 0.445705771446228, "learning_rate": 6.28857837181045e-05, "loss": 1.7086, "step": 6119 }, { "epoch": 1.8576415237517074, "grad_norm": 0.5179358720779419, "learning_rate": 6.287970838396113e-05, "loss": 1.6929, "step": 6120 }, { "epoch": 1.8579450599483989, "grad_norm": 0.5571700930595398, "learning_rate": 6.287363304981775e-05, "loss": 1.6619, "step": 6121 }, { "epoch": 1.8582485961450903, "grad_norm": 0.6373270153999329, "learning_rate": 6.286755771567436e-05, "loss": 1.7158, "step": 6122 }, { "epoch": 1.8585521323417817, "grad_norm": 0.7662277817726135, "learning_rate": 6.2861482381531e-05, "loss": 1.506, "step": 6123 }, { "epoch": 1.8588556685384732, "grad_norm": 0.6126819252967834, "learning_rate": 6.285540704738761e-05, "loss": 1.4747, "step": 6124 }, { "epoch": 1.8591592047351648, "grad_norm": 0.48531535267829895, "learning_rate": 6.284933171324423e-05, "loss": 1.3161, "step": 6125 }, { "epoch": 1.859462740931856, "grad_norm": 0.6532206535339355, "learning_rate": 6.284325637910086e-05, "loss": 1.5314, "step": 6126 }, { "epoch": 1.8597662771285477, "grad_norm": 0.5442935228347778, "learning_rate": 6.283718104495748e-05, "loss": 1.8921, "step": 6127 }, { "epoch": 1.860069813325239, "grad_norm": 0.890833854675293, "learning_rate": 6.28311057108141e-05, "loss": 1.509, "step": 6128 }, { "epoch": 1.8603733495219306, "grad_norm": 0.5734865069389343, "learning_rate": 6.282503037667072e-05, "loss": 1.5688, "step": 6129 }, { "epoch": 1.860676885718622, "grad_norm": 0.48190489411354065, "learning_rate": 6.281895504252734e-05, "loss": 1.1954, "step": 6130 }, { "epoch": 1.8609804219153134, "grad_norm": 0.6809987425804138, "learning_rate": 6.281287970838396e-05, "loss": 1.897, "step": 6131 }, { "epoch": 1.8612839581120049, "grad_norm": 0.6227823495864868, "learning_rate": 6.280680437424059e-05, "loss": 1.9167, "step": 6132 }, { "epoch": 1.8615874943086963, "grad_norm": 0.5331142544746399, "learning_rate": 6.280072904009721e-05, "loss": 1.6832, "step": 6133 }, { "epoch": 1.8618910305053877, "grad_norm": 0.4640684425830841, "learning_rate": 6.279465370595384e-05, "loss": 1.5393, "step": 6134 }, { "epoch": 1.8621945667020792, "grad_norm": 0.4620974659919739, "learning_rate": 6.278857837181046e-05, "loss": 1.9326, "step": 6135 }, { "epoch": 1.8624981028987708, "grad_norm": 0.6997519135475159, "learning_rate": 6.278250303766707e-05, "loss": 1.3935, "step": 6136 }, { "epoch": 1.862801639095462, "grad_norm": 0.5011730790138245, "learning_rate": 6.27764277035237e-05, "loss": 1.7596, "step": 6137 }, { "epoch": 1.8631051752921537, "grad_norm": 0.4842537045478821, "learning_rate": 6.277035236938031e-05, "loss": 0.8998, "step": 6138 }, { "epoch": 1.863408711488845, "grad_norm": 0.5254935622215271, "learning_rate": 6.276427703523694e-05, "loss": 1.775, "step": 6139 }, { "epoch": 1.8637122476855366, "grad_norm": 0.5326409339904785, "learning_rate": 6.275820170109357e-05, "loss": 1.7003, "step": 6140 }, { "epoch": 1.864015783882228, "grad_norm": 0.5449077486991882, "learning_rate": 6.275212636695019e-05, "loss": 1.6728, "step": 6141 }, { "epoch": 1.8643193200789194, "grad_norm": 0.4609127938747406, "learning_rate": 6.27460510328068e-05, "loss": 1.7144, "step": 6142 }, { "epoch": 1.8646228562756109, "grad_norm": 0.891974151134491, "learning_rate": 6.273997569866343e-05, "loss": 1.693, "step": 6143 }, { "epoch": 1.8649263924723023, "grad_norm": 0.5309075713157654, "learning_rate": 6.273390036452005e-05, "loss": 1.3689, "step": 6144 }, { "epoch": 1.8652299286689937, "grad_norm": 0.5689653754234314, "learning_rate": 6.272782503037667e-05, "loss": 1.7364, "step": 6145 }, { "epoch": 1.8655334648656852, "grad_norm": 0.49552464485168457, "learning_rate": 6.27217496962333e-05, "loss": 1.9449, "step": 6146 }, { "epoch": 1.8658370010623768, "grad_norm": 0.5504531264305115, "learning_rate": 6.271567436208992e-05, "loss": 1.7631, "step": 6147 }, { "epoch": 1.866140537259068, "grad_norm": 1.0592516660690308, "learning_rate": 6.270959902794655e-05, "loss": 1.5151, "step": 6148 }, { "epoch": 1.8664440734557597, "grad_norm": 0.5770890712738037, "learning_rate": 6.270352369380315e-05, "loss": 1.5414, "step": 6149 }, { "epoch": 1.866747609652451, "grad_norm": 0.611977756023407, "learning_rate": 6.269744835965978e-05, "loss": 1.3377, "step": 6150 }, { "epoch": 1.8670511458491426, "grad_norm": 0.5175086855888367, "learning_rate": 6.269137302551641e-05, "loss": 1.8129, "step": 6151 }, { "epoch": 1.867354682045834, "grad_norm": 0.4967375099658966, "learning_rate": 6.268529769137302e-05, "loss": 1.6175, "step": 6152 }, { "epoch": 1.8676582182425254, "grad_norm": 0.5550147891044617, "learning_rate": 6.267922235722965e-05, "loss": 1.9646, "step": 6153 }, { "epoch": 1.8679617544392169, "grad_norm": 0.5940298438072205, "learning_rate": 6.267314702308628e-05, "loss": 1.8358, "step": 6154 }, { "epoch": 1.8682652906359083, "grad_norm": 0.4860624074935913, "learning_rate": 6.26670716889429e-05, "loss": 1.7674, "step": 6155 }, { "epoch": 1.8685688268326, "grad_norm": 0.5743589401245117, "learning_rate": 6.266099635479951e-05, "loss": 1.6537, "step": 6156 }, { "epoch": 1.8688723630292912, "grad_norm": 0.5607566237449646, "learning_rate": 6.265492102065614e-05, "loss": 1.4747, "step": 6157 }, { "epoch": 1.8691758992259828, "grad_norm": 0.5863677263259888, "learning_rate": 6.264884568651276e-05, "loss": 1.6027, "step": 6158 }, { "epoch": 1.869479435422674, "grad_norm": 0.437791645526886, "learning_rate": 6.264277035236938e-05, "loss": 1.1876, "step": 6159 }, { "epoch": 1.8697829716193657, "grad_norm": 0.6220367550849915, "learning_rate": 6.263669501822601e-05, "loss": 1.5344, "step": 6160 }, { "epoch": 1.8700865078160571, "grad_norm": 0.5647712349891663, "learning_rate": 6.263061968408263e-05, "loss": 1.598, "step": 6161 }, { "epoch": 1.8703900440127486, "grad_norm": 0.46893787384033203, "learning_rate": 6.262454434993924e-05, "loss": 1.3591, "step": 6162 }, { "epoch": 1.87069358020944, "grad_norm": 0.6015875935554504, "learning_rate": 6.261846901579586e-05, "loss": 1.895, "step": 6163 }, { "epoch": 1.8709971164061314, "grad_norm": 0.5706945061683655, "learning_rate": 6.261239368165249e-05, "loss": 1.8152, "step": 6164 }, { "epoch": 1.8713006526028229, "grad_norm": 0.6036109328269958, "learning_rate": 6.260631834750912e-05, "loss": 1.7984, "step": 6165 }, { "epoch": 1.8716041887995143, "grad_norm": 0.521976888179779, "learning_rate": 6.260024301336573e-05, "loss": 1.7626, "step": 6166 }, { "epoch": 1.871907724996206, "grad_norm": 0.5366547107696533, "learning_rate": 6.259416767922236e-05, "loss": 1.8297, "step": 6167 }, { "epoch": 1.8722112611928972, "grad_norm": 0.4634724259376526, "learning_rate": 6.258809234507899e-05, "loss": 1.7023, "step": 6168 }, { "epoch": 1.8725147973895888, "grad_norm": 0.5139271020889282, "learning_rate": 6.25820170109356e-05, "loss": 1.2409, "step": 6169 }, { "epoch": 1.87281833358628, "grad_norm": 0.42354339361190796, "learning_rate": 6.257594167679222e-05, "loss": 1.735, "step": 6170 }, { "epoch": 1.8731218697829717, "grad_norm": 0.48973771929740906, "learning_rate": 6.256986634264885e-05, "loss": 1.8408, "step": 6171 }, { "epoch": 1.8734254059796631, "grad_norm": 0.4284050166606903, "learning_rate": 6.256379100850547e-05, "loss": 0.5575, "step": 6172 }, { "epoch": 1.8737289421763546, "grad_norm": 0.6064922213554382, "learning_rate": 6.255771567436209e-05, "loss": 1.293, "step": 6173 }, { "epoch": 1.874032478373046, "grad_norm": 0.6467086672782898, "learning_rate": 6.255164034021872e-05, "loss": 1.1203, "step": 6174 }, { "epoch": 1.8743360145697374, "grad_norm": 0.5496531128883362, "learning_rate": 6.254556500607534e-05, "loss": 1.5393, "step": 6175 }, { "epoch": 1.8746395507664289, "grad_norm": 0.4759608507156372, "learning_rate": 6.253948967193195e-05, "loss": 1.7542, "step": 6176 }, { "epoch": 1.8749430869631203, "grad_norm": 0.5617760419845581, "learning_rate": 6.253341433778857e-05, "loss": 1.7047, "step": 6177 }, { "epoch": 1.875246623159812, "grad_norm": 0.5658890604972839, "learning_rate": 6.25273390036452e-05, "loss": 1.4287, "step": 6178 }, { "epoch": 1.8755501593565032, "grad_norm": 0.357480525970459, "learning_rate": 6.252126366950183e-05, "loss": 1.1164, "step": 6179 }, { "epoch": 1.8758536955531948, "grad_norm": 0.64265376329422, "learning_rate": 6.251518833535844e-05, "loss": 1.0389, "step": 6180 }, { "epoch": 1.876157231749886, "grad_norm": 0.5272260904312134, "learning_rate": 6.250911300121507e-05, "loss": 1.7431, "step": 6181 }, { "epoch": 1.8764607679465777, "grad_norm": 0.6049331426620483, "learning_rate": 6.25030376670717e-05, "loss": 1.43, "step": 6182 }, { "epoch": 1.8767643041432691, "grad_norm": 0.5652697682380676, "learning_rate": 6.249696233292832e-05, "loss": 1.091, "step": 6183 }, { "epoch": 1.8770678403399605, "grad_norm": 0.6228081583976746, "learning_rate": 6.249088699878493e-05, "loss": 1.5427, "step": 6184 }, { "epoch": 1.877371376536652, "grad_norm": 0.6637395620346069, "learning_rate": 6.248481166464156e-05, "loss": 1.3756, "step": 6185 }, { "epoch": 1.8776749127333434, "grad_norm": 0.5850158333778381, "learning_rate": 6.247873633049818e-05, "loss": 1.7537, "step": 6186 }, { "epoch": 1.877978448930035, "grad_norm": 0.5947254300117493, "learning_rate": 6.24726609963548e-05, "loss": 1.4273, "step": 6187 }, { "epoch": 1.8782819851267263, "grad_norm": 0.6103019714355469, "learning_rate": 6.246658566221143e-05, "loss": 1.9379, "step": 6188 }, { "epoch": 1.878585521323418, "grad_norm": 0.6417216062545776, "learning_rate": 6.246051032806805e-05, "loss": 1.4602, "step": 6189 }, { "epoch": 1.8788890575201092, "grad_norm": 0.5326799154281616, "learning_rate": 6.245443499392466e-05, "loss": 1.6638, "step": 6190 }, { "epoch": 1.8791925937168008, "grad_norm": 0.6213211417198181, "learning_rate": 6.244835965978128e-05, "loss": 1.5516, "step": 6191 }, { "epoch": 1.879496129913492, "grad_norm": 0.48354288935661316, "learning_rate": 6.244228432563791e-05, "loss": 1.3408, "step": 6192 }, { "epoch": 1.8797996661101837, "grad_norm": 0.43827447295188904, "learning_rate": 6.243620899149454e-05, "loss": 1.7035, "step": 6193 }, { "epoch": 1.880103202306875, "grad_norm": 0.5599086880683899, "learning_rate": 6.243013365735115e-05, "loss": 1.7855, "step": 6194 }, { "epoch": 1.8804067385035665, "grad_norm": 0.6431657671928406, "learning_rate": 6.242405832320778e-05, "loss": 1.4125, "step": 6195 }, { "epoch": 1.880710274700258, "grad_norm": 0.5782099366188049, "learning_rate": 6.241798298906441e-05, "loss": 1.9184, "step": 6196 }, { "epoch": 1.8810138108969494, "grad_norm": 0.46235474944114685, "learning_rate": 6.241190765492103e-05, "loss": 1.7373, "step": 6197 }, { "epoch": 1.881317347093641, "grad_norm": 0.5792590975761414, "learning_rate": 6.240583232077764e-05, "loss": 1.6566, "step": 6198 }, { "epoch": 1.8816208832903323, "grad_norm": 0.48585742712020874, "learning_rate": 6.239975698663427e-05, "loss": 1.5372, "step": 6199 }, { "epoch": 1.881924419487024, "grad_norm": 0.595456063747406, "learning_rate": 6.239368165249089e-05, "loss": 1.7805, "step": 6200 }, { "epoch": 1.8822279556837151, "grad_norm": 0.5959881544113159, "learning_rate": 6.238760631834751e-05, "loss": 1.4049, "step": 6201 }, { "epoch": 1.8825314918804068, "grad_norm": 0.6109514236450195, "learning_rate": 6.238153098420414e-05, "loss": 1.9479, "step": 6202 }, { "epoch": 1.8828350280770982, "grad_norm": 0.4679960608482361, "learning_rate": 6.237545565006076e-05, "loss": 1.801, "step": 6203 }, { "epoch": 1.8831385642737897, "grad_norm": 0.5735065937042236, "learning_rate": 6.236938031591738e-05, "loss": 1.6866, "step": 6204 }, { "epoch": 1.883442100470481, "grad_norm": 0.8023161292076111, "learning_rate": 6.236330498177399e-05, "loss": 1.833, "step": 6205 }, { "epoch": 1.8837456366671725, "grad_norm": 0.5725283026695251, "learning_rate": 6.235722964763062e-05, "loss": 1.3659, "step": 6206 }, { "epoch": 1.884049172863864, "grad_norm": 0.6874731779098511, "learning_rate": 6.235115431348725e-05, "loss": 1.8193, "step": 6207 }, { "epoch": 1.8843527090605554, "grad_norm": 0.5876416563987732, "learning_rate": 6.234507897934386e-05, "loss": 2.0175, "step": 6208 }, { "epoch": 1.884656245257247, "grad_norm": 0.5721455812454224, "learning_rate": 6.233900364520049e-05, "loss": 1.4163, "step": 6209 }, { "epoch": 1.8849597814539383, "grad_norm": 0.44347506761550903, "learning_rate": 6.233292831105712e-05, "loss": 1.3832, "step": 6210 }, { "epoch": 1.88526331765063, "grad_norm": 0.5441204905509949, "learning_rate": 6.232685297691372e-05, "loss": 1.7932, "step": 6211 }, { "epoch": 1.8855668538473211, "grad_norm": 0.49128007888793945, "learning_rate": 6.232077764277035e-05, "loss": 1.5375, "step": 6212 }, { "epoch": 1.8858703900440128, "grad_norm": 0.42426589131355286, "learning_rate": 6.231470230862698e-05, "loss": 1.5113, "step": 6213 }, { "epoch": 1.8861739262407042, "grad_norm": 0.6463568806648254, "learning_rate": 6.23086269744836e-05, "loss": 1.7184, "step": 6214 }, { "epoch": 1.8864774624373957, "grad_norm": 0.5162055492401123, "learning_rate": 6.230255164034022e-05, "loss": 1.3341, "step": 6215 }, { "epoch": 1.886780998634087, "grad_norm": 0.5229998826980591, "learning_rate": 6.229647630619685e-05, "loss": 1.8356, "step": 6216 }, { "epoch": 1.8870845348307785, "grad_norm": 0.5528349876403809, "learning_rate": 6.229040097205347e-05, "loss": 1.8502, "step": 6217 }, { "epoch": 1.88738807102747, "grad_norm": 0.6068422794342041, "learning_rate": 6.228432563791009e-05, "loss": 1.7407, "step": 6218 }, { "epoch": 1.8876916072241614, "grad_norm": 0.5407156348228455, "learning_rate": 6.22782503037667e-05, "loss": 1.4199, "step": 6219 }, { "epoch": 1.887995143420853, "grad_norm": 0.8403943777084351, "learning_rate": 6.227217496962333e-05, "loss": 1.5019, "step": 6220 }, { "epoch": 1.8882986796175443, "grad_norm": 0.5002139210700989, "learning_rate": 6.226609963547996e-05, "loss": 1.7308, "step": 6221 }, { "epoch": 1.888602215814236, "grad_norm": 0.5648574829101562, "learning_rate": 6.226002430133657e-05, "loss": 1.6151, "step": 6222 }, { "epoch": 1.8889057520109271, "grad_norm": 0.59864342212677, "learning_rate": 6.22539489671932e-05, "loss": 1.6052, "step": 6223 }, { "epoch": 1.8892092882076188, "grad_norm": 0.531084418296814, "learning_rate": 6.224787363304983e-05, "loss": 1.6574, "step": 6224 }, { "epoch": 1.8895128244043102, "grad_norm": 0.554513692855835, "learning_rate": 6.224179829890643e-05, "loss": 1.4749, "step": 6225 }, { "epoch": 1.8898163606010017, "grad_norm": 0.5515928864479065, "learning_rate": 6.223572296476306e-05, "loss": 1.8412, "step": 6226 }, { "epoch": 1.890119896797693, "grad_norm": 0.4584737718105316, "learning_rate": 6.22296476306197e-05, "loss": 1.8693, "step": 6227 }, { "epoch": 1.8904234329943845, "grad_norm": 0.5091758370399475, "learning_rate": 6.222357229647631e-05, "loss": 1.9317, "step": 6228 }, { "epoch": 1.8907269691910762, "grad_norm": 0.5648382902145386, "learning_rate": 6.221749696233293e-05, "loss": 1.662, "step": 6229 }, { "epoch": 1.8910305053877674, "grad_norm": 0.5485778450965881, "learning_rate": 6.221142162818955e-05, "loss": 1.6501, "step": 6230 }, { "epoch": 1.891334041584459, "grad_norm": 0.5410951375961304, "learning_rate": 6.220534629404618e-05, "loss": 1.7698, "step": 6231 }, { "epoch": 1.8916375777811503, "grad_norm": 0.6109384298324585, "learning_rate": 6.21992709599028e-05, "loss": 1.6953, "step": 6232 }, { "epoch": 1.891941113977842, "grad_norm": 0.5590034127235413, "learning_rate": 6.219319562575941e-05, "loss": 1.4273, "step": 6233 }, { "epoch": 1.8922446501745334, "grad_norm": 0.5122209191322327, "learning_rate": 6.218712029161604e-05, "loss": 1.7359, "step": 6234 }, { "epoch": 1.8925481863712248, "grad_norm": 0.6979067325592041, "learning_rate": 6.218104495747266e-05, "loss": 1.4347, "step": 6235 }, { "epoch": 1.8928517225679162, "grad_norm": 0.6123654246330261, "learning_rate": 6.217496962332928e-05, "loss": 1.6434, "step": 6236 }, { "epoch": 1.8931552587646077, "grad_norm": 0.5874941945075989, "learning_rate": 6.216889428918591e-05, "loss": 1.7587, "step": 6237 }, { "epoch": 1.893458794961299, "grad_norm": 0.6103301048278809, "learning_rate": 6.216281895504254e-05, "loss": 1.725, "step": 6238 }, { "epoch": 1.8937623311579905, "grad_norm": 0.5363330841064453, "learning_rate": 6.215674362089914e-05, "loss": 1.7498, "step": 6239 }, { "epoch": 1.8940658673546822, "grad_norm": 0.5891059041023254, "learning_rate": 6.215066828675577e-05, "loss": 1.8509, "step": 6240 }, { "epoch": 1.8943694035513734, "grad_norm": 0.694438099861145, "learning_rate": 6.21445929526124e-05, "loss": 1.7223, "step": 6241 }, { "epoch": 1.894672939748065, "grad_norm": 0.49839189648628235, "learning_rate": 6.213851761846902e-05, "loss": 1.8137, "step": 6242 }, { "epoch": 1.8949764759447563, "grad_norm": 0.4575786590576172, "learning_rate": 6.213244228432564e-05, "loss": 1.8167, "step": 6243 }, { "epoch": 1.895280012141448, "grad_norm": 0.9389039278030396, "learning_rate": 6.212636695018226e-05, "loss": 1.7694, "step": 6244 }, { "epoch": 1.8955835483381394, "grad_norm": 0.5133308172225952, "learning_rate": 6.212029161603889e-05, "loss": 1.5743, "step": 6245 }, { "epoch": 1.8958870845348308, "grad_norm": 0.5339624881744385, "learning_rate": 6.21142162818955e-05, "loss": 1.4944, "step": 6246 }, { "epoch": 1.8961906207315222, "grad_norm": 0.5333174467086792, "learning_rate": 6.210814094775212e-05, "loss": 1.7742, "step": 6247 }, { "epoch": 1.8964941569282137, "grad_norm": 0.45883241295814514, "learning_rate": 6.210206561360875e-05, "loss": 1.2235, "step": 6248 }, { "epoch": 1.896797693124905, "grad_norm": 0.5622841715812683, "learning_rate": 6.209599027946537e-05, "loss": 1.7732, "step": 6249 }, { "epoch": 1.8971012293215965, "grad_norm": 0.5983763337135315, "learning_rate": 6.208991494532199e-05, "loss": 1.7278, "step": 6250 }, { "epoch": 1.8974047655182882, "grad_norm": 0.47994494438171387, "learning_rate": 6.208383961117862e-05, "loss": 1.6245, "step": 6251 }, { "epoch": 1.8977083017149794, "grad_norm": 0.4889879822731018, "learning_rate": 6.207776427703525e-05, "loss": 1.7647, "step": 6252 }, { "epoch": 1.898011837911671, "grad_norm": 0.40788859128952026, "learning_rate": 6.207168894289185e-05, "loss": 1.2151, "step": 6253 }, { "epoch": 1.8983153741083623, "grad_norm": 0.5352895855903625, "learning_rate": 6.206561360874848e-05, "loss": 1.3245, "step": 6254 }, { "epoch": 1.898618910305054, "grad_norm": 0.42484623193740845, "learning_rate": 6.205953827460512e-05, "loss": 2.3155, "step": 6255 }, { "epoch": 1.8989224465017454, "grad_norm": 0.38781338930130005, "learning_rate": 6.205346294046173e-05, "loss": 1.1976, "step": 6256 }, { "epoch": 1.8992259826984368, "grad_norm": 0.4916774034500122, "learning_rate": 6.204738760631835e-05, "loss": 1.407, "step": 6257 }, { "epoch": 1.8995295188951282, "grad_norm": 0.5175955891609192, "learning_rate": 6.204131227217497e-05, "loss": 1.8932, "step": 6258 }, { "epoch": 1.8998330550918197, "grad_norm": 0.5470436811447144, "learning_rate": 6.20352369380316e-05, "loss": 1.671, "step": 6259 }, { "epoch": 1.9001365912885113, "grad_norm": 0.44192200899124146, "learning_rate": 6.202916160388822e-05, "loss": 1.3732, "step": 6260 }, { "epoch": 1.9004401274852025, "grad_norm": 0.46629634499549866, "learning_rate": 6.202308626974483e-05, "loss": 1.8605, "step": 6261 }, { "epoch": 1.9007436636818942, "grad_norm": 0.5211533904075623, "learning_rate": 6.201701093560146e-05, "loss": 1.6723, "step": 6262 }, { "epoch": 1.9010471998785854, "grad_norm": 0.5735886096954346, "learning_rate": 6.201093560145808e-05, "loss": 1.5299, "step": 6263 }, { "epoch": 1.901350736075277, "grad_norm": 0.5859761238098145, "learning_rate": 6.20048602673147e-05, "loss": 1.5531, "step": 6264 }, { "epoch": 1.9016542722719685, "grad_norm": 0.4476751685142517, "learning_rate": 6.199878493317133e-05, "loss": 1.6964, "step": 6265 }, { "epoch": 1.90195780846866, "grad_norm": 0.7125301957130432, "learning_rate": 6.199270959902796e-05, "loss": 1.3845, "step": 6266 }, { "epoch": 1.9022613446653514, "grad_norm": 0.6609548330307007, "learning_rate": 6.198663426488456e-05, "loss": 2.045, "step": 6267 }, { "epoch": 1.9025648808620428, "grad_norm": 0.4831355810165405, "learning_rate": 6.19805589307412e-05, "loss": 1.2296, "step": 6268 }, { "epoch": 1.9028684170587342, "grad_norm": 0.501849353313446, "learning_rate": 6.197448359659783e-05, "loss": 1.7507, "step": 6269 }, { "epoch": 1.9031719532554257, "grad_norm": 0.6798990964889526, "learning_rate": 6.196840826245444e-05, "loss": 1.8987, "step": 6270 }, { "epoch": 1.9034754894521173, "grad_norm": 0.5525609850883484, "learning_rate": 6.196233292831106e-05, "loss": 1.6601, "step": 6271 }, { "epoch": 1.9037790256488085, "grad_norm": 0.533176839351654, "learning_rate": 6.195625759416768e-05, "loss": 1.7864, "step": 6272 }, { "epoch": 1.9040825618455002, "grad_norm": 0.49768903851509094, "learning_rate": 6.195018226002431e-05, "loss": 1.1226, "step": 6273 }, { "epoch": 1.9043860980421914, "grad_norm": 0.5778190493583679, "learning_rate": 6.194410692588093e-05, "loss": 1.782, "step": 6274 }, { "epoch": 1.904689634238883, "grad_norm": 0.5707374811172485, "learning_rate": 6.193803159173754e-05, "loss": 1.4747, "step": 6275 }, { "epoch": 1.9049931704355745, "grad_norm": 0.5454164147377014, "learning_rate": 6.193195625759417e-05, "loss": 1.2567, "step": 6276 }, { "epoch": 1.905296706632266, "grad_norm": 0.5996286869049072, "learning_rate": 6.192588092345079e-05, "loss": 1.8435, "step": 6277 }, { "epoch": 1.9056002428289573, "grad_norm": 0.5817993879318237, "learning_rate": 6.191980558930741e-05, "loss": 1.6641, "step": 6278 }, { "epoch": 1.9059037790256488, "grad_norm": 0.5574187636375427, "learning_rate": 6.191373025516404e-05, "loss": 1.6168, "step": 6279 }, { "epoch": 1.9062073152223402, "grad_norm": 0.7683195471763611, "learning_rate": 6.190765492102067e-05, "loss": 1.8639, "step": 6280 }, { "epoch": 1.9065108514190316, "grad_norm": 0.42818158864974976, "learning_rate": 6.190157958687727e-05, "loss": 1.2528, "step": 6281 }, { "epoch": 1.9068143876157233, "grad_norm": 0.5653691291809082, "learning_rate": 6.18955042527339e-05, "loss": 1.7523, "step": 6282 }, { "epoch": 1.9071179238124145, "grad_norm": 0.5621758699417114, "learning_rate": 6.188942891859054e-05, "loss": 1.825, "step": 6283 }, { "epoch": 1.9074214600091062, "grad_norm": 0.6503968238830566, "learning_rate": 6.188335358444714e-05, "loss": 1.6188, "step": 6284 }, { "epoch": 1.9077249962057974, "grad_norm": 0.5360357761383057, "learning_rate": 6.187727825030377e-05, "loss": 1.9039, "step": 6285 }, { "epoch": 1.908028532402489, "grad_norm": 0.4974033534526825, "learning_rate": 6.187120291616039e-05, "loss": 1.8735, "step": 6286 }, { "epoch": 1.9083320685991805, "grad_norm": 0.3803102970123291, "learning_rate": 6.186512758201702e-05, "loss": 1.6851, "step": 6287 }, { "epoch": 1.908635604795872, "grad_norm": 0.5085203051567078, "learning_rate": 6.185905224787364e-05, "loss": 1.7257, "step": 6288 }, { "epoch": 1.9089391409925633, "grad_norm": 0.49796509742736816, "learning_rate": 6.185297691373025e-05, "loss": 1.5958, "step": 6289 }, { "epoch": 1.9092426771892548, "grad_norm": 0.5202645063400269, "learning_rate": 6.184690157958688e-05, "loss": 1.4409, "step": 6290 }, { "epoch": 1.9095462133859464, "grad_norm": 0.5298627018928528, "learning_rate": 6.18408262454435e-05, "loss": 1.7299, "step": 6291 }, { "epoch": 1.9098497495826376, "grad_norm": 0.5481365919113159, "learning_rate": 6.183475091130012e-05, "loss": 1.5826, "step": 6292 }, { "epoch": 1.9101532857793293, "grad_norm": 0.5484110116958618, "learning_rate": 6.182867557715675e-05, "loss": 1.2558, "step": 6293 }, { "epoch": 1.9104568219760205, "grad_norm": 0.5717966556549072, "learning_rate": 6.182260024301338e-05, "loss": 1.7264, "step": 6294 }, { "epoch": 1.9107603581727122, "grad_norm": 0.5692905187606812, "learning_rate": 6.181652490886998e-05, "loss": 1.6922, "step": 6295 }, { "epoch": 1.9110638943694036, "grad_norm": 0.45173266530036926, "learning_rate": 6.181044957472661e-05, "loss": 1.0979, "step": 6296 }, { "epoch": 1.911367430566095, "grad_norm": 0.5674440860748291, "learning_rate": 6.180437424058325e-05, "loss": 1.7522, "step": 6297 }, { "epoch": 1.9116709667627865, "grad_norm": 0.634099543094635, "learning_rate": 6.179829890643985e-05, "loss": 1.5519, "step": 6298 }, { "epoch": 1.911974502959478, "grad_norm": 0.6131418943405151, "learning_rate": 6.179222357229648e-05, "loss": 1.5224, "step": 6299 }, { "epoch": 1.9122780391561693, "grad_norm": 0.4736331105232239, "learning_rate": 6.17861482381531e-05, "loss": 1.2786, "step": 6300 }, { "epoch": 1.9125815753528608, "grad_norm": 0.827942967414856, "learning_rate": 6.178007290400973e-05, "loss": 1.7119, "step": 6301 }, { "epoch": 1.9128851115495524, "grad_norm": 0.572033166885376, "learning_rate": 6.177399756986635e-05, "loss": 1.6908, "step": 6302 }, { "epoch": 1.9131886477462436, "grad_norm": 0.6228052377700806, "learning_rate": 6.176792223572296e-05, "loss": 1.7296, "step": 6303 }, { "epoch": 1.9134921839429353, "grad_norm": 0.48696666955947876, "learning_rate": 6.17618469015796e-05, "loss": 1.5238, "step": 6304 }, { "epoch": 1.9137957201396265, "grad_norm": 0.5116902589797974, "learning_rate": 6.175577156743621e-05, "loss": 1.8871, "step": 6305 }, { "epoch": 1.9140992563363182, "grad_norm": 0.5537254810333252, "learning_rate": 6.174969623329283e-05, "loss": 1.712, "step": 6306 }, { "epoch": 1.9144027925330096, "grad_norm": 0.43108007311820984, "learning_rate": 6.174362089914946e-05, "loss": 1.5564, "step": 6307 }, { "epoch": 1.914706328729701, "grad_norm": 0.4873630404472351, "learning_rate": 6.173754556500608e-05, "loss": 1.8391, "step": 6308 }, { "epoch": 1.9150098649263925, "grad_norm": 0.4794020354747772, "learning_rate": 6.17314702308627e-05, "loss": 1.9983, "step": 6309 }, { "epoch": 1.915313401123084, "grad_norm": 0.4727613031864166, "learning_rate": 6.172539489671932e-05, "loss": 1.8201, "step": 6310 }, { "epoch": 1.9156169373197753, "grad_norm": 0.5152159929275513, "learning_rate": 6.171931956257596e-05, "loss": 1.2213, "step": 6311 }, { "epoch": 1.9159204735164668, "grad_norm": 0.5150118470191956, "learning_rate": 6.171324422843256e-05, "loss": 1.7555, "step": 6312 }, { "epoch": 1.9162240097131584, "grad_norm": 0.5367141366004944, "learning_rate": 6.170716889428919e-05, "loss": 1.6911, "step": 6313 }, { "epoch": 1.9165275459098496, "grad_norm": 0.6431503891944885, "learning_rate": 6.170109356014581e-05, "loss": 1.6756, "step": 6314 }, { "epoch": 1.9168310821065413, "grad_norm": 1.273424506187439, "learning_rate": 6.169501822600244e-05, "loss": 1.3612, "step": 6315 }, { "epoch": 1.9171346183032325, "grad_norm": 0.5230395793914795, "learning_rate": 6.168894289185906e-05, "loss": 1.7962, "step": 6316 }, { "epoch": 1.9174381544999242, "grad_norm": 0.5366686582565308, "learning_rate": 6.168286755771567e-05, "loss": 1.8589, "step": 6317 }, { "epoch": 1.9177416906966156, "grad_norm": 0.5949418544769287, "learning_rate": 6.16767922235723e-05, "loss": 1.7689, "step": 6318 }, { "epoch": 1.918045226893307, "grad_norm": 0.800364077091217, "learning_rate": 6.167071688942892e-05, "loss": 1.7323, "step": 6319 }, { "epoch": 1.9183487630899985, "grad_norm": 0.5248730778694153, "learning_rate": 6.166464155528554e-05, "loss": 1.6069, "step": 6320 }, { "epoch": 1.91865229928669, "grad_norm": 0.5831948518753052, "learning_rate": 6.165856622114217e-05, "loss": 1.6719, "step": 6321 }, { "epoch": 1.9189558354833816, "grad_norm": 0.6087794303894043, "learning_rate": 6.165249088699879e-05, "loss": 1.3223, "step": 6322 }, { "epoch": 1.9192593716800728, "grad_norm": 0.5439948439598083, "learning_rate": 6.16464155528554e-05, "loss": 1.7126, "step": 6323 }, { "epoch": 1.9195629078767644, "grad_norm": 0.7024933099746704, "learning_rate": 6.164034021871203e-05, "loss": 1.324, "step": 6324 }, { "epoch": 1.9198664440734556, "grad_norm": 0.5878713130950928, "learning_rate": 6.163426488456865e-05, "loss": 1.7572, "step": 6325 }, { "epoch": 1.9201699802701473, "grad_norm": 0.3653714656829834, "learning_rate": 6.162818955042527e-05, "loss": 1.2048, "step": 6326 }, { "epoch": 1.9204735164668387, "grad_norm": 0.5881572365760803, "learning_rate": 6.16221142162819e-05, "loss": 1.4599, "step": 6327 }, { "epoch": 1.9207770526635302, "grad_norm": 0.5380381941795349, "learning_rate": 6.161603888213852e-05, "loss": 1.618, "step": 6328 }, { "epoch": 1.9210805888602216, "grad_norm": 0.6120547652244568, "learning_rate": 6.160996354799515e-05, "loss": 1.338, "step": 6329 }, { "epoch": 1.921384125056913, "grad_norm": 0.6269816160202026, "learning_rate": 6.160388821385177e-05, "loss": 1.8559, "step": 6330 }, { "epoch": 1.9216876612536045, "grad_norm": 0.6310059428215027, "learning_rate": 6.159781287970838e-05, "loss": 1.642, "step": 6331 }, { "epoch": 1.921991197450296, "grad_norm": 0.6771363615989685, "learning_rate": 6.159173754556501e-05, "loss": 1.4403, "step": 6332 }, { "epoch": 1.9222947336469876, "grad_norm": 1.0170625448226929, "learning_rate": 6.158566221142163e-05, "loss": 1.5708, "step": 6333 }, { "epoch": 1.9225982698436788, "grad_norm": 0.7576006054878235, "learning_rate": 6.157958687727825e-05, "loss": 2.0975, "step": 6334 }, { "epoch": 1.9229018060403704, "grad_norm": 0.5351456999778748, "learning_rate": 6.157351154313488e-05, "loss": 1.9641, "step": 6335 }, { "epoch": 1.9232053422370616, "grad_norm": 0.4267328679561615, "learning_rate": 6.15674362089915e-05, "loss": 1.2032, "step": 6336 }, { "epoch": 1.9235088784337533, "grad_norm": 0.5409083366394043, "learning_rate": 6.156136087484811e-05, "loss": 1.8177, "step": 6337 }, { "epoch": 1.9238124146304447, "grad_norm": 0.5315467119216919, "learning_rate": 6.155528554070474e-05, "loss": 1.8095, "step": 6338 }, { "epoch": 1.9241159508271362, "grad_norm": 0.5460966229438782, "learning_rate": 6.154921020656136e-05, "loss": 1.4244, "step": 6339 }, { "epoch": 1.9244194870238276, "grad_norm": 0.4572533369064331, "learning_rate": 6.154313487241798e-05, "loss": 1.5232, "step": 6340 }, { "epoch": 1.924723023220519, "grad_norm": 0.6189396381378174, "learning_rate": 6.153705953827461e-05, "loss": 1.6011, "step": 6341 }, { "epoch": 1.9250265594172105, "grad_norm": 0.513096272945404, "learning_rate": 6.153098420413123e-05, "loss": 1.4146, "step": 6342 }, { "epoch": 1.925330095613902, "grad_norm": 0.5706098675727844, "learning_rate": 6.152490886998786e-05, "loss": 1.2415, "step": 6343 }, { "epoch": 1.9256336318105935, "grad_norm": 0.6112621426582336, "learning_rate": 6.151883353584448e-05, "loss": 1.3726, "step": 6344 }, { "epoch": 1.9259371680072848, "grad_norm": 0.5262756943702698, "learning_rate": 6.151275820170109e-05, "loss": 1.9781, "step": 6345 }, { "epoch": 1.9262407042039764, "grad_norm": 0.5784090161323547, "learning_rate": 6.150668286755772e-05, "loss": 1.714, "step": 6346 }, { "epoch": 1.9265442404006676, "grad_norm": 0.6644015908241272, "learning_rate": 6.150060753341434e-05, "loss": 1.6659, "step": 6347 }, { "epoch": 1.9268477765973593, "grad_norm": 0.5297151207923889, "learning_rate": 6.149453219927096e-05, "loss": 1.5757, "step": 6348 }, { "epoch": 1.9271513127940507, "grad_norm": 0.6419957876205444, "learning_rate": 6.148845686512759e-05, "loss": 1.7383, "step": 6349 }, { "epoch": 1.9274548489907422, "grad_norm": 0.5073803663253784, "learning_rate": 6.14823815309842e-05, "loss": 1.9598, "step": 6350 }, { "epoch": 1.9277583851874336, "grad_norm": 0.6124430298805237, "learning_rate": 6.147630619684082e-05, "loss": 1.6829, "step": 6351 }, { "epoch": 1.928061921384125, "grad_norm": 0.5520927309989929, "learning_rate": 6.147023086269745e-05, "loss": 1.7079, "step": 6352 }, { "epoch": 1.9283654575808167, "grad_norm": 0.5771626830101013, "learning_rate": 6.146415552855407e-05, "loss": 1.3217, "step": 6353 }, { "epoch": 1.928668993777508, "grad_norm": 0.6157678961753845, "learning_rate": 6.145808019441069e-05, "loss": 1.6344, "step": 6354 }, { "epoch": 1.9289725299741995, "grad_norm": 0.5151667594909668, "learning_rate": 6.145200486026732e-05, "loss": 1.8214, "step": 6355 }, { "epoch": 1.9292760661708908, "grad_norm": 0.48279091715812683, "learning_rate": 6.144592952612394e-05, "loss": 1.6539, "step": 6356 }, { "epoch": 1.9295796023675824, "grad_norm": 0.8940995335578918, "learning_rate": 6.143985419198055e-05, "loss": 1.7098, "step": 6357 }, { "epoch": 1.9298831385642736, "grad_norm": 0.510097861289978, "learning_rate": 6.143377885783719e-05, "loss": 1.2187, "step": 6358 }, { "epoch": 1.9301866747609653, "grad_norm": 0.6991041898727417, "learning_rate": 6.14277035236938e-05, "loss": 1.6297, "step": 6359 }, { "epoch": 1.9304902109576567, "grad_norm": 0.7418796420097351, "learning_rate": 6.142162818955043e-05, "loss": 1.8175, "step": 6360 }, { "epoch": 1.9307937471543482, "grad_norm": 0.46536850929260254, "learning_rate": 6.141555285540705e-05, "loss": 1.3162, "step": 6361 }, { "epoch": 1.9310972833510396, "grad_norm": 0.5243115425109863, "learning_rate": 6.140947752126367e-05, "loss": 2.014, "step": 6362 }, { "epoch": 1.931400819547731, "grad_norm": 0.6091117262840271, "learning_rate": 6.14034021871203e-05, "loss": 1.3685, "step": 6363 }, { "epoch": 1.9317043557444227, "grad_norm": 0.5803942680358887, "learning_rate": 6.139732685297692e-05, "loss": 1.3536, "step": 6364 }, { "epoch": 1.9320078919411139, "grad_norm": 0.5577396750450134, "learning_rate": 6.139125151883353e-05, "loss": 2.0119, "step": 6365 }, { "epoch": 1.9323114281378055, "grad_norm": 0.6743834018707275, "learning_rate": 6.138517618469016e-05, "loss": 1.8706, "step": 6366 }, { "epoch": 1.9326149643344968, "grad_norm": 0.5312842130661011, "learning_rate": 6.137910085054678e-05, "loss": 1.9281, "step": 6367 }, { "epoch": 1.9329185005311884, "grad_norm": 0.569107711315155, "learning_rate": 6.13730255164034e-05, "loss": 1.532, "step": 6368 }, { "epoch": 1.9332220367278798, "grad_norm": 0.4576084315776825, "learning_rate": 6.136695018226003e-05, "loss": 1.1511, "step": 6369 }, { "epoch": 1.9335255729245713, "grad_norm": 0.5539278984069824, "learning_rate": 6.136087484811665e-05, "loss": 1.4286, "step": 6370 }, { "epoch": 1.9338291091212627, "grad_norm": 0.544119119644165, "learning_rate": 6.135479951397326e-05, "loss": 1.2395, "step": 6371 }, { "epoch": 1.9341326453179541, "grad_norm": 0.4071330428123474, "learning_rate": 6.13487241798299e-05, "loss": 1.6382, "step": 6372 }, { "epoch": 1.9344361815146456, "grad_norm": 0.5704235434532166, "learning_rate": 6.134264884568651e-05, "loss": 1.4867, "step": 6373 }, { "epoch": 1.934739717711337, "grad_norm": 0.5418073534965515, "learning_rate": 6.133657351154314e-05, "loss": 1.6909, "step": 6374 }, { "epoch": 1.9350432539080287, "grad_norm": 0.579681932926178, "learning_rate": 6.133049817739976e-05, "loss": 1.5281, "step": 6375 }, { "epoch": 1.9353467901047199, "grad_norm": 0.6796471476554871, "learning_rate": 6.132442284325638e-05, "loss": 1.2198, "step": 6376 }, { "epoch": 1.9356503263014115, "grad_norm": 0.535188615322113, "learning_rate": 6.131834750911301e-05, "loss": 1.4344, "step": 6377 }, { "epoch": 1.9359538624981028, "grad_norm": 0.5495712757110596, "learning_rate": 6.131227217496963e-05, "loss": 1.3002, "step": 6378 }, { "epoch": 1.9362573986947944, "grad_norm": 0.5547313690185547, "learning_rate": 6.130619684082624e-05, "loss": 1.9909, "step": 6379 }, { "epoch": 1.9365609348914858, "grad_norm": 0.5010794997215271, "learning_rate": 6.130012150668287e-05, "loss": 1.778, "step": 6380 }, { "epoch": 1.9368644710881773, "grad_norm": 0.47357454895973206, "learning_rate": 6.129404617253949e-05, "loss": 2.0388, "step": 6381 }, { "epoch": 1.9371680072848687, "grad_norm": 0.6383755207061768, "learning_rate": 6.128797083839611e-05, "loss": 1.2668, "step": 6382 }, { "epoch": 1.9374715434815601, "grad_norm": 0.49293264746665955, "learning_rate": 6.128189550425274e-05, "loss": 1.759, "step": 6383 }, { "epoch": 1.9377750796782516, "grad_norm": 0.5476403832435608, "learning_rate": 6.127582017010936e-05, "loss": 1.63, "step": 6384 }, { "epoch": 1.938078615874943, "grad_norm": 0.5941591858863831, "learning_rate": 6.126974483596597e-05, "loss": 1.8002, "step": 6385 }, { "epoch": 1.9383821520716347, "grad_norm": 0.5813649892807007, "learning_rate": 6.12636695018226e-05, "loss": 1.5548, "step": 6386 }, { "epoch": 1.9386856882683259, "grad_norm": 0.5048520565032959, "learning_rate": 6.125759416767922e-05, "loss": 1.8952, "step": 6387 }, { "epoch": 1.9389892244650175, "grad_norm": 0.5147222876548767, "learning_rate": 6.125151883353585e-05, "loss": 1.7712, "step": 6388 }, { "epoch": 1.9392927606617087, "grad_norm": 0.5488929748535156, "learning_rate": 6.124544349939247e-05, "loss": 1.5482, "step": 6389 }, { "epoch": 1.9395962968584004, "grad_norm": 0.5807020664215088, "learning_rate": 6.123936816524909e-05, "loss": 1.8429, "step": 6390 }, { "epoch": 1.9398998330550918, "grad_norm": 0.4687190651893616, "learning_rate": 6.123329283110572e-05, "loss": 1.4001, "step": 6391 }, { "epoch": 1.9402033692517833, "grad_norm": 0.5817119479179382, "learning_rate": 6.122721749696234e-05, "loss": 1.6638, "step": 6392 }, { "epoch": 1.9405069054484747, "grad_norm": 0.9106517434120178, "learning_rate": 6.122114216281895e-05, "loss": 0.6738, "step": 6393 }, { "epoch": 1.9408104416451661, "grad_norm": 0.5468938946723938, "learning_rate": 6.121506682867558e-05, "loss": 1.8881, "step": 6394 }, { "epoch": 1.9411139778418578, "grad_norm": 0.6180863380432129, "learning_rate": 6.12089914945322e-05, "loss": 1.8285, "step": 6395 }, { "epoch": 1.941417514038549, "grad_norm": 0.5428357124328613, "learning_rate": 6.120291616038882e-05, "loss": 1.613, "step": 6396 }, { "epoch": 1.9417210502352407, "grad_norm": 0.5250274538993835, "learning_rate": 6.119684082624545e-05, "loss": 1.3086, "step": 6397 }, { "epoch": 1.9420245864319319, "grad_norm": 0.5188660621643066, "learning_rate": 6.119076549210207e-05, "loss": 1.8626, "step": 6398 }, { "epoch": 1.9423281226286235, "grad_norm": 0.5318378806114197, "learning_rate": 6.118469015795868e-05, "loss": 1.6041, "step": 6399 }, { "epoch": 1.942631658825315, "grad_norm": 0.5830715894699097, "learning_rate": 6.117861482381532e-05, "loss": 1.7264, "step": 6400 }, { "epoch": 1.9429351950220064, "grad_norm": 0.5456033945083618, "learning_rate": 6.117253948967193e-05, "loss": 1.8035, "step": 6401 }, { "epoch": 1.9432387312186978, "grad_norm": 0.4758380949497223, "learning_rate": 6.116646415552856e-05, "loss": 1.6618, "step": 6402 }, { "epoch": 1.9435422674153893, "grad_norm": 0.6281861662864685, "learning_rate": 6.116038882138518e-05, "loss": 1.4869, "step": 6403 }, { "epoch": 1.9438458036120807, "grad_norm": 0.6155133247375488, "learning_rate": 6.11543134872418e-05, "loss": 1.6041, "step": 6404 }, { "epoch": 1.9441493398087721, "grad_norm": 0.549446702003479, "learning_rate": 6.114823815309843e-05, "loss": 1.6261, "step": 6405 }, { "epoch": 1.9444528760054638, "grad_norm": 0.5333273410797119, "learning_rate": 6.114216281895505e-05, "loss": 1.7439, "step": 6406 }, { "epoch": 1.944756412202155, "grad_norm": 0.5138979554176331, "learning_rate": 6.113608748481166e-05, "loss": 1.3236, "step": 6407 }, { "epoch": 1.9450599483988467, "grad_norm": 0.5613072514533997, "learning_rate": 6.11300121506683e-05, "loss": 1.6681, "step": 6408 }, { "epoch": 1.9453634845955379, "grad_norm": 0.611084520816803, "learning_rate": 6.112393681652491e-05, "loss": 1.5973, "step": 6409 }, { "epoch": 1.9456670207922295, "grad_norm": 0.6191888451576233, "learning_rate": 6.111786148238153e-05, "loss": 1.3854, "step": 6410 }, { "epoch": 1.945970556988921, "grad_norm": 0.6448600888252258, "learning_rate": 6.111178614823816e-05, "loss": 1.8407, "step": 6411 }, { "epoch": 1.9462740931856124, "grad_norm": 0.5122054815292358, "learning_rate": 6.110571081409478e-05, "loss": 1.7577, "step": 6412 }, { "epoch": 1.9465776293823038, "grad_norm": 0.5150049328804016, "learning_rate": 6.10996354799514e-05, "loss": 1.3837, "step": 6413 }, { "epoch": 1.9468811655789953, "grad_norm": 0.5011094808578491, "learning_rate": 6.109356014580803e-05, "loss": 1.7184, "step": 6414 }, { "epoch": 1.9471847017756867, "grad_norm": 0.44695425033569336, "learning_rate": 6.108748481166464e-05, "loss": 1.7555, "step": 6415 }, { "epoch": 1.9474882379723781, "grad_norm": 0.5184510946273804, "learning_rate": 6.108140947752127e-05, "loss": 1.7578, "step": 6416 }, { "epoch": 1.9477917741690698, "grad_norm": 0.6175386309623718, "learning_rate": 6.107533414337789e-05, "loss": 1.4583, "step": 6417 }, { "epoch": 1.948095310365761, "grad_norm": 0.5669700503349304, "learning_rate": 6.106925880923451e-05, "loss": 1.6413, "step": 6418 }, { "epoch": 1.9483988465624527, "grad_norm": 0.5198760032653809, "learning_rate": 6.106318347509114e-05, "loss": 1.8265, "step": 6419 }, { "epoch": 1.9487023827591439, "grad_norm": 0.5493960976600647, "learning_rate": 6.105710814094774e-05, "loss": 1.0121, "step": 6420 }, { "epoch": 1.9490059189558355, "grad_norm": 0.8704328536987305, "learning_rate": 6.105103280680437e-05, "loss": 1.4603, "step": 6421 }, { "epoch": 1.949309455152527, "grad_norm": 0.5389131307601929, "learning_rate": 6.1044957472661e-05, "loss": 1.6166, "step": 6422 }, { "epoch": 1.9496129913492184, "grad_norm": 0.5700472593307495, "learning_rate": 6.103888213851762e-05, "loss": 1.7217, "step": 6423 }, { "epoch": 1.9499165275459098, "grad_norm": 0.6041691899299622, "learning_rate": 6.1032806804374246e-05, "loss": 1.5693, "step": 6424 }, { "epoch": 1.9502200637426013, "grad_norm": 0.5294330716133118, "learning_rate": 6.102673147023087e-05, "loss": 1.644, "step": 6425 }, { "epoch": 1.950523599939293, "grad_norm": 0.5551626682281494, "learning_rate": 6.102065613608748e-05, "loss": 1.6099, "step": 6426 }, { "epoch": 1.9508271361359841, "grad_norm": 0.5445873737335205, "learning_rate": 6.101458080194411e-05, "loss": 1.4334, "step": 6427 }, { "epoch": 1.9511306723326758, "grad_norm": 0.5853805541992188, "learning_rate": 6.1008505467800736e-05, "loss": 1.6584, "step": 6428 }, { "epoch": 1.951434208529367, "grad_norm": 0.5774348974227905, "learning_rate": 6.100243013365735e-05, "loss": 1.6662, "step": 6429 }, { "epoch": 1.9517377447260587, "grad_norm": 0.5022454261779785, "learning_rate": 6.099635479951398e-05, "loss": 1.9131, "step": 6430 }, { "epoch": 1.95204128092275, "grad_norm": 0.44924312829971313, "learning_rate": 6.09902794653706e-05, "loss": 1.4716, "step": 6431 }, { "epoch": 1.9523448171194415, "grad_norm": 0.5021010041236877, "learning_rate": 6.098420413122722e-05, "loss": 0.8655, "step": 6432 }, { "epoch": 1.952648353316133, "grad_norm": 0.621820867061615, "learning_rate": 6.097812879708384e-05, "loss": 1.396, "step": 6433 }, { "epoch": 1.9529518895128244, "grad_norm": 0.6849603056907654, "learning_rate": 6.097205346294046e-05, "loss": 1.2204, "step": 6434 }, { "epoch": 1.9532554257095158, "grad_norm": 0.9602012038230896, "learning_rate": 6.0965978128797084e-05, "loss": 1.5787, "step": 6435 }, { "epoch": 1.9535589619062073, "grad_norm": 0.5582414269447327, "learning_rate": 6.095990279465371e-05, "loss": 1.8998, "step": 6436 }, { "epoch": 1.953862498102899, "grad_norm": 0.4775597155094147, "learning_rate": 6.0953827460510325e-05, "loss": 2.094, "step": 6437 }, { "epoch": 1.9541660342995901, "grad_norm": 0.5407741665840149, "learning_rate": 6.094775212636695e-05, "loss": 1.6444, "step": 6438 }, { "epoch": 1.9544695704962818, "grad_norm": 0.5293698906898499, "learning_rate": 6.094167679222358e-05, "loss": 1.7003, "step": 6439 }, { "epoch": 1.954773106692973, "grad_norm": 0.5839650630950928, "learning_rate": 6.093560145808019e-05, "loss": 1.489, "step": 6440 }, { "epoch": 1.9550766428896647, "grad_norm": 0.6273818612098694, "learning_rate": 6.092952612393682e-05, "loss": 1.5088, "step": 6441 }, { "epoch": 1.955380179086356, "grad_norm": 0.5513050556182861, "learning_rate": 6.0923450789793446e-05, "loss": 1.652, "step": 6442 }, { "epoch": 1.9556837152830475, "grad_norm": 0.48457103967666626, "learning_rate": 6.091737545565006e-05, "loss": 1.7693, "step": 6443 }, { "epoch": 1.955987251479739, "grad_norm": 0.5889832973480225, "learning_rate": 6.091130012150669e-05, "loss": 1.9562, "step": 6444 }, { "epoch": 1.9562907876764304, "grad_norm": 0.5198668241500854, "learning_rate": 6.090522478736331e-05, "loss": 1.592, "step": 6445 }, { "epoch": 1.9565943238731218, "grad_norm": 0.462070107460022, "learning_rate": 6.089914945321993e-05, "loss": 2.0129, "step": 6446 }, { "epoch": 1.9568978600698133, "grad_norm": 0.6464818120002747, "learning_rate": 6.089307411907655e-05, "loss": 1.4838, "step": 6447 }, { "epoch": 1.957201396266505, "grad_norm": 0.510562539100647, "learning_rate": 6.088699878493317e-05, "loss": 1.6365, "step": 6448 }, { "epoch": 1.9575049324631961, "grad_norm": 0.4521179497241974, "learning_rate": 6.0880923450789794e-05, "loss": 1.6811, "step": 6449 }, { "epoch": 1.9578084686598878, "grad_norm": 0.45304521918296814, "learning_rate": 6.087484811664642e-05, "loss": 1.2221, "step": 6450 }, { "epoch": 1.958112004856579, "grad_norm": 0.5869529843330383, "learning_rate": 6.0868772782503036e-05, "loss": 1.6444, "step": 6451 }, { "epoch": 1.9584155410532706, "grad_norm": 0.6218820214271545, "learning_rate": 6.086269744835966e-05, "loss": 1.6352, "step": 6452 }, { "epoch": 1.958719077249962, "grad_norm": 0.6100924015045166, "learning_rate": 6.085662211421629e-05, "loss": 1.4695, "step": 6453 }, { "epoch": 1.9590226134466535, "grad_norm": 0.5317431688308716, "learning_rate": 6.08505467800729e-05, "loss": 1.6922, "step": 6454 }, { "epoch": 1.959326149643345, "grad_norm": 0.4049581289291382, "learning_rate": 6.084447144592953e-05, "loss": 1.8279, "step": 6455 }, { "epoch": 1.9596296858400364, "grad_norm": 0.6045603156089783, "learning_rate": 6.0838396111786156e-05, "loss": 1.86, "step": 6456 }, { "epoch": 1.959933222036728, "grad_norm": 0.5300851464271545, "learning_rate": 6.083232077764277e-05, "loss": 1.5076, "step": 6457 }, { "epoch": 1.9602367582334193, "grad_norm": 0.6136688590049744, "learning_rate": 6.08262454434994e-05, "loss": 1.1669, "step": 6458 }, { "epoch": 1.960540294430111, "grad_norm": 0.5564177632331848, "learning_rate": 6.082017010935602e-05, "loss": 1.4164, "step": 6459 }, { "epoch": 1.9608438306268021, "grad_norm": 0.5613592267036438, "learning_rate": 6.081409477521264e-05, "loss": 1.7866, "step": 6460 }, { "epoch": 1.9611473668234938, "grad_norm": 0.6643790602684021, "learning_rate": 6.080801944106926e-05, "loss": 1.6314, "step": 6461 }, { "epoch": 1.9614509030201852, "grad_norm": 0.9621322751045227, "learning_rate": 6.080194410692588e-05, "loss": 1.6243, "step": 6462 }, { "epoch": 1.9617544392168766, "grad_norm": 0.5654889345169067, "learning_rate": 6.0795868772782504e-05, "loss": 1.6922, "step": 6463 }, { "epoch": 1.962057975413568, "grad_norm": 0.5692543387413025, "learning_rate": 6.078979343863913e-05, "loss": 1.6057, "step": 6464 }, { "epoch": 1.9623615116102595, "grad_norm": 0.6160483360290527, "learning_rate": 6.0783718104495746e-05, "loss": 1.6588, "step": 6465 }, { "epoch": 1.962665047806951, "grad_norm": 0.5041912794113159, "learning_rate": 6.077764277035237e-05, "loss": 1.5149, "step": 6466 }, { "epoch": 1.9629685840036424, "grad_norm": 0.6104092597961426, "learning_rate": 6.0771567436209e-05, "loss": 1.1343, "step": 6467 }, { "epoch": 1.963272120200334, "grad_norm": 0.5036048293113708, "learning_rate": 6.076549210206561e-05, "loss": 1.5201, "step": 6468 }, { "epoch": 1.9635756563970252, "grad_norm": 0.5256472229957581, "learning_rate": 6.075941676792224e-05, "loss": 1.7575, "step": 6469 }, { "epoch": 1.963879192593717, "grad_norm": 0.5224828720092773, "learning_rate": 6.0753341433778866e-05, "loss": 1.7467, "step": 6470 }, { "epoch": 1.9641827287904081, "grad_norm": 0.5759482979774475, "learning_rate": 6.074726609963548e-05, "loss": 1.7598, "step": 6471 }, { "epoch": 1.9644862649870998, "grad_norm": 0.40608924627304077, "learning_rate": 6.074119076549211e-05, "loss": 1.4643, "step": 6472 }, { "epoch": 1.9647898011837912, "grad_norm": 0.5966246724128723, "learning_rate": 6.073511543134873e-05, "loss": 1.8583, "step": 6473 }, { "epoch": 1.9650933373804826, "grad_norm": 0.5416772961616516, "learning_rate": 6.072904009720535e-05, "loss": 1.6128, "step": 6474 }, { "epoch": 1.965396873577174, "grad_norm": 0.5953087210655212, "learning_rate": 6.072296476306197e-05, "loss": 1.885, "step": 6475 }, { "epoch": 1.9657004097738655, "grad_norm": 0.5860414505004883, "learning_rate": 6.071688942891859e-05, "loss": 1.8107, "step": 6476 }, { "epoch": 1.966003945970557, "grad_norm": 1.0410816669464111, "learning_rate": 6.0710814094775214e-05, "loss": 1.4357, "step": 6477 }, { "epoch": 1.9663074821672484, "grad_norm": 0.5218018293380737, "learning_rate": 6.070473876063184e-05, "loss": 1.5651, "step": 6478 }, { "epoch": 1.96661101836394, "grad_norm": 0.5561720132827759, "learning_rate": 6.0698663426488456e-05, "loss": 1.7292, "step": 6479 }, { "epoch": 1.9669145545606312, "grad_norm": 0.5160397887229919, "learning_rate": 6.069258809234508e-05, "loss": 1.7071, "step": 6480 }, { "epoch": 1.967218090757323, "grad_norm": 0.673804521560669, "learning_rate": 6.068651275820171e-05, "loss": 1.7731, "step": 6481 }, { "epoch": 1.9675216269540141, "grad_norm": 0.46362537145614624, "learning_rate": 6.068043742405832e-05, "loss": 1.3744, "step": 6482 }, { "epoch": 1.9678251631507058, "grad_norm": 0.5752343535423279, "learning_rate": 6.067436208991495e-05, "loss": 1.6567, "step": 6483 }, { "epoch": 1.9681286993473972, "grad_norm": 0.6016415357589722, "learning_rate": 6.0668286755771576e-05, "loss": 1.4306, "step": 6484 }, { "epoch": 1.9684322355440886, "grad_norm": 0.5972555875778198, "learning_rate": 6.066221142162819e-05, "loss": 1.7145, "step": 6485 }, { "epoch": 1.96873577174078, "grad_norm": 0.6853018403053284, "learning_rate": 6.065613608748482e-05, "loss": 1.8169, "step": 6486 }, { "epoch": 1.9690393079374715, "grad_norm": 0.5290831923484802, "learning_rate": 6.065006075334144e-05, "loss": 1.914, "step": 6487 }, { "epoch": 1.9693428441341632, "grad_norm": 0.5616101622581482, "learning_rate": 6.064398541919806e-05, "loss": 1.5203, "step": 6488 }, { "epoch": 1.9696463803308544, "grad_norm": 0.5446542501449585, "learning_rate": 6.063791008505468e-05, "loss": 1.6184, "step": 6489 }, { "epoch": 1.969949916527546, "grad_norm": 0.46483322978019714, "learning_rate": 6.06318347509113e-05, "loss": 1.0398, "step": 6490 }, { "epoch": 1.9702534527242372, "grad_norm": 0.5556966066360474, "learning_rate": 6.0625759416767924e-05, "loss": 1.6125, "step": 6491 }, { "epoch": 1.970556988920929, "grad_norm": 0.526470422744751, "learning_rate": 6.061968408262455e-05, "loss": 1.7479, "step": 6492 }, { "epoch": 1.97086052511762, "grad_norm": 0.4931873679161072, "learning_rate": 6.0613608748481166e-05, "loss": 1.9352, "step": 6493 }, { "epoch": 1.9711640613143118, "grad_norm": 0.665084958076477, "learning_rate": 6.060753341433779e-05, "loss": 1.7474, "step": 6494 }, { "epoch": 1.9714675975110032, "grad_norm": 0.5243136286735535, "learning_rate": 6.060145808019442e-05, "loss": 1.7402, "step": 6495 }, { "epoch": 1.9717711337076946, "grad_norm": 0.651799201965332, "learning_rate": 6.059538274605103e-05, "loss": 1.0746, "step": 6496 }, { "epoch": 1.972074669904386, "grad_norm": 0.5092620849609375, "learning_rate": 6.058930741190766e-05, "loss": 1.7713, "step": 6497 }, { "epoch": 1.9723782061010775, "grad_norm": 0.5405744314193726, "learning_rate": 6.0583232077764286e-05, "loss": 1.1775, "step": 6498 }, { "epoch": 1.9726817422977692, "grad_norm": 0.4236612617969513, "learning_rate": 6.05771567436209e-05, "loss": 0.8284, "step": 6499 }, { "epoch": 1.9729852784944604, "grad_norm": 0.5803433656692505, "learning_rate": 6.057108140947753e-05, "loss": 1.8339, "step": 6500 }, { "epoch": 1.973288814691152, "grad_norm": 0.5176873803138733, "learning_rate": 6.056500607533414e-05, "loss": 1.4704, "step": 6501 }, { "epoch": 1.9735923508878432, "grad_norm": 0.5557324886322021, "learning_rate": 6.055893074119077e-05, "loss": 1.8978, "step": 6502 }, { "epoch": 1.973895887084535, "grad_norm": 0.5535895228385925, "learning_rate": 6.055285540704739e-05, "loss": 1.2969, "step": 6503 }, { "epoch": 1.9741994232812263, "grad_norm": 0.5652197003364563, "learning_rate": 6.054678007290401e-05, "loss": 1.541, "step": 6504 }, { "epoch": 1.9745029594779178, "grad_norm": 0.639728307723999, "learning_rate": 6.0540704738760634e-05, "loss": 1.4226, "step": 6505 }, { "epoch": 1.9748064956746092, "grad_norm": 0.5786595344543457, "learning_rate": 6.053462940461726e-05, "loss": 1.3508, "step": 6506 }, { "epoch": 1.9751100318713006, "grad_norm": 0.5643638968467712, "learning_rate": 6.0528554070473876e-05, "loss": 1.802, "step": 6507 }, { "epoch": 1.975413568067992, "grad_norm": 0.5532044768333435, "learning_rate": 6.05224787363305e-05, "loss": 1.4272, "step": 6508 }, { "epoch": 1.9757171042646835, "grad_norm": 0.44745269417762756, "learning_rate": 6.051640340218713e-05, "loss": 1.7448, "step": 6509 }, { "epoch": 1.9760206404613752, "grad_norm": 0.5335531234741211, "learning_rate": 6.051032806804374e-05, "loss": 1.791, "step": 6510 }, { "epoch": 1.9763241766580664, "grad_norm": 0.6150341629981995, "learning_rate": 6.0504252733900365e-05, "loss": 1.7764, "step": 6511 }, { "epoch": 1.976627712854758, "grad_norm": 0.48714327812194824, "learning_rate": 6.0498177399756996e-05, "loss": 1.9457, "step": 6512 }, { "epoch": 1.9769312490514492, "grad_norm": 0.4661531448364258, "learning_rate": 6.049210206561361e-05, "loss": 1.6888, "step": 6513 }, { "epoch": 1.977234785248141, "grad_norm": 0.5356633067131042, "learning_rate": 6.048602673147024e-05, "loss": 1.4209, "step": 6514 }, { "epoch": 1.9775383214448323, "grad_norm": 0.5116220116615295, "learning_rate": 6.047995139732685e-05, "loss": 1.2706, "step": 6515 }, { "epoch": 1.9778418576415238, "grad_norm": 0.42522934079170227, "learning_rate": 6.047387606318348e-05, "loss": 1.1104, "step": 6516 }, { "epoch": 1.9781453938382152, "grad_norm": 0.5198089480400085, "learning_rate": 6.04678007290401e-05, "loss": 1.9558, "step": 6517 }, { "epoch": 1.9784489300349066, "grad_norm": 0.533348798751831, "learning_rate": 6.046172539489672e-05, "loss": 1.5748, "step": 6518 }, { "epoch": 1.9787524662315983, "grad_norm": 0.5725319981575012, "learning_rate": 6.0455650060753344e-05, "loss": 1.849, "step": 6519 }, { "epoch": 1.9790560024282895, "grad_norm": 0.5064899325370789, "learning_rate": 6.044957472660997e-05, "loss": 1.9204, "step": 6520 }, { "epoch": 1.9793595386249812, "grad_norm": 0.6601528525352478, "learning_rate": 6.0443499392466586e-05, "loss": 1.841, "step": 6521 }, { "epoch": 1.9796630748216724, "grad_norm": 0.5768593549728394, "learning_rate": 6.043742405832321e-05, "loss": 1.633, "step": 6522 }, { "epoch": 1.979966611018364, "grad_norm": 0.6356444358825684, "learning_rate": 6.0431348724179834e-05, "loss": 1.9165, "step": 6523 }, { "epoch": 1.9802701472150552, "grad_norm": 0.5365557670593262, "learning_rate": 6.042527339003645e-05, "loss": 1.4321, "step": 6524 }, { "epoch": 1.9805736834117469, "grad_norm": 0.47943785786628723, "learning_rate": 6.0419198055893075e-05, "loss": 1.668, "step": 6525 }, { "epoch": 1.9808772196084383, "grad_norm": 0.604640781879425, "learning_rate": 6.0413122721749706e-05, "loss": 1.4776, "step": 6526 }, { "epoch": 1.9811807558051298, "grad_norm": 0.6262120604515076, "learning_rate": 6.040704738760632e-05, "loss": 1.5482, "step": 6527 }, { "epoch": 1.9814842920018212, "grad_norm": 0.5773532390594482, "learning_rate": 6.040097205346295e-05, "loss": 1.7658, "step": 6528 }, { "epoch": 1.9817878281985126, "grad_norm": 0.5646322965621948, "learning_rate": 6.039489671931956e-05, "loss": 1.3962, "step": 6529 }, { "epoch": 1.9820913643952043, "grad_norm": 0.6088689565658569, "learning_rate": 6.038882138517619e-05, "loss": 1.7198, "step": 6530 }, { "epoch": 1.9823949005918955, "grad_norm": 0.6004126071929932, "learning_rate": 6.038274605103281e-05, "loss": 1.9443, "step": 6531 }, { "epoch": 1.9826984367885871, "grad_norm": 0.48227787017822266, "learning_rate": 6.037667071688943e-05, "loss": 1.631, "step": 6532 }, { "epoch": 1.9830019729852784, "grad_norm": 0.546604335308075, "learning_rate": 6.0370595382746054e-05, "loss": 1.2204, "step": 6533 }, { "epoch": 1.98330550918197, "grad_norm": 0.6554203629493713, "learning_rate": 6.036452004860268e-05, "loss": 1.6814, "step": 6534 }, { "epoch": 1.9836090453786615, "grad_norm": 0.4436679482460022, "learning_rate": 6.0358444714459296e-05, "loss": 1.7958, "step": 6535 }, { "epoch": 1.9839125815753529, "grad_norm": 0.48438987135887146, "learning_rate": 6.035236938031592e-05, "loss": 1.8244, "step": 6536 }, { "epoch": 1.9842161177720443, "grad_norm": 0.4136127233505249, "learning_rate": 6.0346294046172544e-05, "loss": 1.7304, "step": 6537 }, { "epoch": 1.9845196539687358, "grad_norm": 0.4987366795539856, "learning_rate": 6.034021871202916e-05, "loss": 1.8897, "step": 6538 }, { "epoch": 1.9848231901654272, "grad_norm": 0.5552710294723511, "learning_rate": 6.0334143377885785e-05, "loss": 1.5099, "step": 6539 }, { "epoch": 1.9851267263621186, "grad_norm": 0.6219823956489563, "learning_rate": 6.0328068043742416e-05, "loss": 1.4882, "step": 6540 }, { "epoch": 1.9854302625588103, "grad_norm": 0.5261727571487427, "learning_rate": 6.032199270959903e-05, "loss": 1.2227, "step": 6541 }, { "epoch": 1.9857337987555015, "grad_norm": 0.5668848156929016, "learning_rate": 6.031591737545566e-05, "loss": 1.7845, "step": 6542 }, { "epoch": 1.9860373349521931, "grad_norm": 0.47545093297958374, "learning_rate": 6.030984204131227e-05, "loss": 1.2132, "step": 6543 }, { "epoch": 1.9863408711488844, "grad_norm": 0.5103720426559448, "learning_rate": 6.03037667071689e-05, "loss": 1.6584, "step": 6544 }, { "epoch": 1.986644407345576, "grad_norm": 0.5664292573928833, "learning_rate": 6.029769137302552e-05, "loss": 1.8483, "step": 6545 }, { "epoch": 1.9869479435422674, "grad_norm": 0.4930339753627777, "learning_rate": 6.029161603888214e-05, "loss": 1.071, "step": 6546 }, { "epoch": 1.9872514797389589, "grad_norm": 0.4483380615711212, "learning_rate": 6.0285540704738765e-05, "loss": 1.5424, "step": 6547 }, { "epoch": 1.9875550159356503, "grad_norm": 0.5853394269943237, "learning_rate": 6.027946537059539e-05, "loss": 1.151, "step": 6548 }, { "epoch": 1.9878585521323417, "grad_norm": 0.46078211069107056, "learning_rate": 6.0273390036452006e-05, "loss": 1.7595, "step": 6549 }, { "epoch": 1.9881620883290332, "grad_norm": 0.4655674397945404, "learning_rate": 6.026731470230863e-05, "loss": 1.2309, "step": 6550 }, { "epoch": 1.9884656245257246, "grad_norm": 0.5861518383026123, "learning_rate": 6.0261239368165254e-05, "loss": 1.813, "step": 6551 }, { "epoch": 1.9887691607224163, "grad_norm": 0.7233760356903076, "learning_rate": 6.025516403402187e-05, "loss": 1.6074, "step": 6552 }, { "epoch": 1.9890726969191075, "grad_norm": 0.8948001861572266, "learning_rate": 6.0249088699878495e-05, "loss": 1.2804, "step": 6553 }, { "epoch": 1.9893762331157991, "grad_norm": 0.6039700508117676, "learning_rate": 6.0243013365735126e-05, "loss": 1.3581, "step": 6554 }, { "epoch": 1.9896797693124904, "grad_norm": 0.6029567122459412, "learning_rate": 6.023693803159174e-05, "loss": 1.9989, "step": 6555 }, { "epoch": 1.989983305509182, "grad_norm": 0.4397352635860443, "learning_rate": 6.023086269744837e-05, "loss": 1.2332, "step": 6556 }, { "epoch": 1.9902868417058734, "grad_norm": 0.6175510883331299, "learning_rate": 6.022478736330498e-05, "loss": 1.8423, "step": 6557 }, { "epoch": 1.9905903779025649, "grad_norm": 0.5127015113830566, "learning_rate": 6.021871202916161e-05, "loss": 0.9714, "step": 6558 }, { "epoch": 1.9908939140992563, "grad_norm": 0.5095683336257935, "learning_rate": 6.021263669501823e-05, "loss": 1.8097, "step": 6559 }, { "epoch": 1.9911974502959477, "grad_norm": 0.5457046031951904, "learning_rate": 6.0206561360874844e-05, "loss": 1.4398, "step": 6560 }, { "epoch": 1.9915009864926394, "grad_norm": 0.9285824298858643, "learning_rate": 6.0200486026731475e-05, "loss": 1.4319, "step": 6561 }, { "epoch": 1.9918045226893306, "grad_norm": 0.5472911596298218, "learning_rate": 6.01944106925881e-05, "loss": 1.9349, "step": 6562 }, { "epoch": 1.9921080588860223, "grad_norm": 0.4925740957260132, "learning_rate": 6.0188335358444716e-05, "loss": 1.5178, "step": 6563 }, { "epoch": 1.9924115950827135, "grad_norm": 0.5921374559402466, "learning_rate": 6.018226002430134e-05, "loss": 1.5178, "step": 6564 }, { "epoch": 1.9927151312794051, "grad_norm": 0.555499792098999, "learning_rate": 6.0176184690157964e-05, "loss": 1.6957, "step": 6565 }, { "epoch": 1.9930186674760966, "grad_norm": 0.550757110118866, "learning_rate": 6.017010935601458e-05, "loss": 1.6667, "step": 6566 }, { "epoch": 1.993322203672788, "grad_norm": 0.544740617275238, "learning_rate": 6.0164034021871206e-05, "loss": 1.9381, "step": 6567 }, { "epoch": 1.9936257398694794, "grad_norm": 0.4217390716075897, "learning_rate": 6.0157958687727836e-05, "loss": 1.3707, "step": 6568 }, { "epoch": 1.9939292760661709, "grad_norm": 0.5475983619689941, "learning_rate": 6.015188335358445e-05, "loss": 1.7717, "step": 6569 }, { "epoch": 1.9942328122628623, "grad_norm": 0.5119839906692505, "learning_rate": 6.014580801944108e-05, "loss": 1.4352, "step": 6570 }, { "epoch": 1.9945363484595537, "grad_norm": 0.5068908333778381, "learning_rate": 6.013973268529769e-05, "loss": 1.3449, "step": 6571 }, { "epoch": 1.9948398846562454, "grad_norm": 0.8712594509124756, "learning_rate": 6.013365735115431e-05, "loss": 1.2025, "step": 6572 }, { "epoch": 1.9951434208529366, "grad_norm": 0.5083956122398376, "learning_rate": 6.012758201701094e-05, "loss": 1.8382, "step": 6573 }, { "epoch": 1.9954469570496283, "grad_norm": 0.558754563331604, "learning_rate": 6.0121506682867554e-05, "loss": 1.5454, "step": 6574 }, { "epoch": 1.9957504932463195, "grad_norm": 0.56935054063797, "learning_rate": 6.0115431348724185e-05, "loss": 1.5451, "step": 6575 }, { "epoch": 1.9960540294430111, "grad_norm": 0.4553762674331665, "learning_rate": 6.010935601458081e-05, "loss": 2.3248, "step": 6576 }, { "epoch": 1.9963575656397026, "grad_norm": 0.5736023187637329, "learning_rate": 6.0103280680437426e-05, "loss": 1.5156, "step": 6577 }, { "epoch": 1.996661101836394, "grad_norm": 0.4840937554836273, "learning_rate": 6.009720534629405e-05, "loss": 1.3645, "step": 6578 }, { "epoch": 1.9969646380330854, "grad_norm": 0.6681798100471497, "learning_rate": 6.0091130012150674e-05, "loss": 1.2697, "step": 6579 }, { "epoch": 1.9972681742297769, "grad_norm": 0.6583466529846191, "learning_rate": 6.008505467800729e-05, "loss": 1.589, "step": 6580 }, { "epoch": 1.9975717104264683, "grad_norm": 0.3614431619644165, "learning_rate": 6.0078979343863916e-05, "loss": 1.3172, "step": 6581 }, { "epoch": 1.9978752466231597, "grad_norm": 0.4758198857307434, "learning_rate": 6.007290400972053e-05, "loss": 1.8047, "step": 6582 }, { "epoch": 1.9981787828198514, "grad_norm": 0.5516407489776611, "learning_rate": 6.006682867557716e-05, "loss": 2.0063, "step": 6583 }, { "epoch": 1.9984823190165426, "grad_norm": 0.5885918736457825, "learning_rate": 6.006075334143378e-05, "loss": 1.8758, "step": 6584 }, { "epoch": 1.9987858552132343, "grad_norm": 0.559417724609375, "learning_rate": 6.00546780072904e-05, "loss": 1.8631, "step": 6585 }, { "epoch": 1.9990893914099255, "grad_norm": 0.5297870635986328, "learning_rate": 6.004860267314702e-05, "loss": 1.2478, "step": 6586 }, { "epoch": 1.9993929276066171, "grad_norm": 0.583615243434906, "learning_rate": 6.004252733900365e-05, "loss": 1.4074, "step": 6587 }, { "epoch": 1.9996964638033086, "grad_norm": 0.6096095442771912, "learning_rate": 6.0036452004860264e-05, "loss": 1.8452, "step": 6588 }, { "epoch": 2.0, "grad_norm": 0.48540517687797546, "learning_rate": 6.0030376670716895e-05, "loss": 1.4051, "step": 6589 }, { "epoch": 2.0003035361966917, "grad_norm": 0.4750955104827881, "learning_rate": 6.002430133657352e-05, "loss": 1.5649, "step": 6590 }, { "epoch": 2.000607072393383, "grad_norm": 0.559846818447113, "learning_rate": 6.0018226002430136e-05, "loss": 1.4351, "step": 6591 }, { "epoch": 2.0009106085900745, "grad_norm": 0.6046615242958069, "learning_rate": 6.001215066828676e-05, "loss": 1.3853, "step": 6592 }, { "epoch": 2.0012141447867657, "grad_norm": 0.5519492626190186, "learning_rate": 6.0006075334143384e-05, "loss": 1.3244, "step": 6593 }, { "epoch": 2.0015176809834574, "grad_norm": 0.6226881742477417, "learning_rate": 6e-05, "loss": 1.1149, "step": 6594 }, { "epoch": 2.0018212171801486, "grad_norm": 0.6426854729652405, "learning_rate": 5.9993924665856626e-05, "loss": 1.4761, "step": 6595 }, { "epoch": 2.0021247533768403, "grad_norm": 0.7619990110397339, "learning_rate": 5.998784933171324e-05, "loss": 1.1087, "step": 6596 }, { "epoch": 2.0024282895735315, "grad_norm": 1.1839655637741089, "learning_rate": 5.998177399756987e-05, "loss": 1.4132, "step": 6597 }, { "epoch": 2.002731825770223, "grad_norm": 0.736304759979248, "learning_rate": 5.997569866342649e-05, "loss": 1.0338, "step": 6598 }, { "epoch": 2.0030353619669143, "grad_norm": 0.7201933264732361, "learning_rate": 5.996962332928311e-05, "loss": 1.3339, "step": 6599 }, { "epoch": 2.003338898163606, "grad_norm": 0.5403030514717102, "learning_rate": 5.996354799513973e-05, "loss": 1.288, "step": 6600 }, { "epoch": 2.0036424343602977, "grad_norm": 0.5611597895622253, "learning_rate": 5.995747266099636e-05, "loss": 0.7736, "step": 6601 }, { "epoch": 2.003945970556989, "grad_norm": 0.6462942361831665, "learning_rate": 5.9951397326852974e-05, "loss": 1.2936, "step": 6602 }, { "epoch": 2.0042495067536805, "grad_norm": 0.6441487073898315, "learning_rate": 5.9945321992709605e-05, "loss": 1.3673, "step": 6603 }, { "epoch": 2.0045530429503717, "grad_norm": 0.6502240300178528, "learning_rate": 5.993924665856623e-05, "loss": 1.4637, "step": 6604 }, { "epoch": 2.0048565791470634, "grad_norm": 0.8739331364631653, "learning_rate": 5.9933171324422846e-05, "loss": 0.9957, "step": 6605 }, { "epoch": 2.0051601153437546, "grad_norm": 0.7587090134620667, "learning_rate": 5.992709599027947e-05, "loss": 1.4321, "step": 6606 }, { "epoch": 2.0054636515404463, "grad_norm": 0.7050350904464722, "learning_rate": 5.9921020656136094e-05, "loss": 0.8794, "step": 6607 }, { "epoch": 2.0057671877371375, "grad_norm": 0.6558219790458679, "learning_rate": 5.991494532199271e-05, "loss": 1.1112, "step": 6608 }, { "epoch": 2.006070723933829, "grad_norm": 0.8796700835227966, "learning_rate": 5.9908869987849336e-05, "loss": 1.2927, "step": 6609 }, { "epoch": 2.006374260130521, "grad_norm": 0.5149579644203186, "learning_rate": 5.990279465370595e-05, "loss": 0.8447, "step": 6610 }, { "epoch": 2.006677796327212, "grad_norm": 0.8468472361564636, "learning_rate": 5.989671931956258e-05, "loss": 1.3351, "step": 6611 }, { "epoch": 2.0069813325239036, "grad_norm": 0.7754473686218262, "learning_rate": 5.98906439854192e-05, "loss": 1.6105, "step": 6612 }, { "epoch": 2.007284868720595, "grad_norm": 0.7056512832641602, "learning_rate": 5.988456865127582e-05, "loss": 1.0233, "step": 6613 }, { "epoch": 2.0075884049172865, "grad_norm": 0.7129377126693726, "learning_rate": 5.987849331713244e-05, "loss": 1.2205, "step": 6614 }, { "epoch": 2.0078919411139777, "grad_norm": 0.6188176274299622, "learning_rate": 5.987241798298907e-05, "loss": 1.501, "step": 6615 }, { "epoch": 2.0081954773106694, "grad_norm": 0.6133326888084412, "learning_rate": 5.9866342648845684e-05, "loss": 0.9485, "step": 6616 }, { "epoch": 2.0084990135073606, "grad_norm": 0.8083095550537109, "learning_rate": 5.9860267314702315e-05, "loss": 1.0196, "step": 6617 }, { "epoch": 2.0088025497040523, "grad_norm": 0.7114616632461548, "learning_rate": 5.985419198055894e-05, "loss": 1.4478, "step": 6618 }, { "epoch": 2.0091060859007435, "grad_norm": 0.681473970413208, "learning_rate": 5.9848116646415556e-05, "loss": 1.2194, "step": 6619 }, { "epoch": 2.009409622097435, "grad_norm": 0.6493435502052307, "learning_rate": 5.984204131227218e-05, "loss": 1.0063, "step": 6620 }, { "epoch": 2.009713158294127, "grad_norm": 0.5193372368812561, "learning_rate": 5.9835965978128804e-05, "loss": 0.99, "step": 6621 }, { "epoch": 2.010016694490818, "grad_norm": 0.8599382638931274, "learning_rate": 5.982989064398542e-05, "loss": 1.4596, "step": 6622 }, { "epoch": 2.0103202306875096, "grad_norm": 0.7509252429008484, "learning_rate": 5.9823815309842046e-05, "loss": 1.1321, "step": 6623 }, { "epoch": 2.010623766884201, "grad_norm": 0.8264543414115906, "learning_rate": 5.981773997569866e-05, "loss": 1.1802, "step": 6624 }, { "epoch": 2.0109273030808925, "grad_norm": 0.7668397426605225, "learning_rate": 5.981166464155529e-05, "loss": 1.2522, "step": 6625 }, { "epoch": 2.0112308392775837, "grad_norm": 0.5355592370033264, "learning_rate": 5.980558930741191e-05, "loss": 1.2359, "step": 6626 }, { "epoch": 2.0115343754742754, "grad_norm": 0.6542816758155823, "learning_rate": 5.979951397326853e-05, "loss": 1.4511, "step": 6627 }, { "epoch": 2.0118379116709666, "grad_norm": 0.6245687007904053, "learning_rate": 5.979343863912515e-05, "loss": 0.9108, "step": 6628 }, { "epoch": 2.0121414478676583, "grad_norm": 0.702171802520752, "learning_rate": 5.9787363304981783e-05, "loss": 1.4561, "step": 6629 }, { "epoch": 2.0124449840643495, "grad_norm": 0.7270470261573792, "learning_rate": 5.9781287970838394e-05, "loss": 1.2363, "step": 6630 }, { "epoch": 2.012748520261041, "grad_norm": 0.7082234025001526, "learning_rate": 5.9775212636695025e-05, "loss": 1.5224, "step": 6631 }, { "epoch": 2.0130520564577328, "grad_norm": 0.7704906463623047, "learning_rate": 5.976913730255165e-05, "loss": 1.0233, "step": 6632 }, { "epoch": 2.013355592654424, "grad_norm": 0.8580783605575562, "learning_rate": 5.976306196840826e-05, "loss": 1.0413, "step": 6633 }, { "epoch": 2.0136591288511156, "grad_norm": 0.7564939260482788, "learning_rate": 5.975698663426489e-05, "loss": 1.2357, "step": 6634 }, { "epoch": 2.013962665047807, "grad_norm": 0.8341938853263855, "learning_rate": 5.9750911300121514e-05, "loss": 1.5273, "step": 6635 }, { "epoch": 2.0142662012444985, "grad_norm": 0.7217328548431396, "learning_rate": 5.974483596597813e-05, "loss": 1.5105, "step": 6636 }, { "epoch": 2.0145697374411897, "grad_norm": 0.6179178357124329, "learning_rate": 5.9738760631834756e-05, "loss": 1.3321, "step": 6637 }, { "epoch": 2.0148732736378814, "grad_norm": 0.8074550032615662, "learning_rate": 5.973268529769137e-05, "loss": 1.4196, "step": 6638 }, { "epoch": 2.0151768098345726, "grad_norm": 0.7848823666572571, "learning_rate": 5.9726609963548e-05, "loss": 1.2375, "step": 6639 }, { "epoch": 2.0154803460312642, "grad_norm": 0.597193717956543, "learning_rate": 5.972053462940462e-05, "loss": 0.9173, "step": 6640 }, { "epoch": 2.015783882227956, "grad_norm": 0.7859654426574707, "learning_rate": 5.971445929526124e-05, "loss": 1.3904, "step": 6641 }, { "epoch": 2.016087418424647, "grad_norm": 0.8810588121414185, "learning_rate": 5.970838396111786e-05, "loss": 1.2244, "step": 6642 }, { "epoch": 2.0163909546213388, "grad_norm": 0.6478848457336426, "learning_rate": 5.9702308626974493e-05, "loss": 1.2265, "step": 6643 }, { "epoch": 2.01669449081803, "grad_norm": 0.7811263203620911, "learning_rate": 5.9696233292831104e-05, "loss": 1.283, "step": 6644 }, { "epoch": 2.0169980270147216, "grad_norm": 0.8496779203414917, "learning_rate": 5.969015795868773e-05, "loss": 1.208, "step": 6645 }, { "epoch": 2.017301563211413, "grad_norm": 0.6808563470840454, "learning_rate": 5.968408262454436e-05, "loss": 1.5266, "step": 6646 }, { "epoch": 2.0176050994081045, "grad_norm": 0.6841781139373779, "learning_rate": 5.967800729040097e-05, "loss": 1.3965, "step": 6647 }, { "epoch": 2.0179086356047957, "grad_norm": 0.7297331094741821, "learning_rate": 5.96719319562576e-05, "loss": 0.6081, "step": 6648 }, { "epoch": 2.0182121718014874, "grad_norm": 0.6482033133506775, "learning_rate": 5.9665856622114224e-05, "loss": 1.6838, "step": 6649 }, { "epoch": 2.0185157079981786, "grad_norm": 0.7368562817573547, "learning_rate": 5.965978128797084e-05, "loss": 1.193, "step": 6650 }, { "epoch": 2.0188192441948702, "grad_norm": 0.804199755191803, "learning_rate": 5.9653705953827466e-05, "loss": 1.2387, "step": 6651 }, { "epoch": 2.019122780391562, "grad_norm": 0.7707446217536926, "learning_rate": 5.964763061968408e-05, "loss": 0.9575, "step": 6652 }, { "epoch": 2.019426316588253, "grad_norm": 0.704628586769104, "learning_rate": 5.964155528554071e-05, "loss": 1.1401, "step": 6653 }, { "epoch": 2.0197298527849448, "grad_norm": 0.6447728276252747, "learning_rate": 5.963547995139733e-05, "loss": 0.6612, "step": 6654 }, { "epoch": 2.020033388981636, "grad_norm": 0.6423813104629517, "learning_rate": 5.962940461725395e-05, "loss": 0.9907, "step": 6655 }, { "epoch": 2.0203369251783276, "grad_norm": 0.6763865947723389, "learning_rate": 5.962332928311057e-05, "loss": 1.2592, "step": 6656 }, { "epoch": 2.020640461375019, "grad_norm": 0.6126373410224915, "learning_rate": 5.96172539489672e-05, "loss": 1.3896, "step": 6657 }, { "epoch": 2.0209439975717105, "grad_norm": 0.6113005876541138, "learning_rate": 5.9611178614823814e-05, "loss": 1.0545, "step": 6658 }, { "epoch": 2.0212475337684017, "grad_norm": 0.5360875725746155, "learning_rate": 5.960510328068044e-05, "loss": 1.5621, "step": 6659 }, { "epoch": 2.0215510699650934, "grad_norm": 0.7377156615257263, "learning_rate": 5.959902794653707e-05, "loss": 0.9596, "step": 6660 }, { "epoch": 2.0218546061617846, "grad_norm": 0.7450338006019592, "learning_rate": 5.959295261239368e-05, "loss": 0.7481, "step": 6661 }, { "epoch": 2.0221581423584762, "grad_norm": 0.8769157528877258, "learning_rate": 5.958687727825031e-05, "loss": 1.3477, "step": 6662 }, { "epoch": 2.022461678555168, "grad_norm": 0.45230183005332947, "learning_rate": 5.9580801944106934e-05, "loss": 1.3026, "step": 6663 }, { "epoch": 2.022765214751859, "grad_norm": 0.7798328995704651, "learning_rate": 5.957472660996355e-05, "loss": 1.3114, "step": 6664 }, { "epoch": 2.0230687509485508, "grad_norm": 0.7356457114219666, "learning_rate": 5.9568651275820176e-05, "loss": 1.195, "step": 6665 }, { "epoch": 2.023372287145242, "grad_norm": 0.587321937084198, "learning_rate": 5.956257594167679e-05, "loss": 1.7201, "step": 6666 }, { "epoch": 2.0236758233419336, "grad_norm": 0.6452397704124451, "learning_rate": 5.955650060753342e-05, "loss": 1.3267, "step": 6667 }, { "epoch": 2.023979359538625, "grad_norm": 0.7600719332695007, "learning_rate": 5.955042527339004e-05, "loss": 1.3694, "step": 6668 }, { "epoch": 2.0242828957353165, "grad_norm": 0.8960398435592651, "learning_rate": 5.954434993924666e-05, "loss": 1.202, "step": 6669 }, { "epoch": 2.0245864319320077, "grad_norm": 0.7933389544487, "learning_rate": 5.953827460510328e-05, "loss": 0.9678, "step": 6670 }, { "epoch": 2.0248899681286994, "grad_norm": 0.773642897605896, "learning_rate": 5.953219927095991e-05, "loss": 1.3841, "step": 6671 }, { "epoch": 2.025193504325391, "grad_norm": 0.6029089689254761, "learning_rate": 5.9526123936816524e-05, "loss": 1.1724, "step": 6672 }, { "epoch": 2.0254970405220822, "grad_norm": 0.7390233278274536, "learning_rate": 5.952004860267315e-05, "loss": 0.7247, "step": 6673 }, { "epoch": 2.025800576718774, "grad_norm": 0.7552881836891174, "learning_rate": 5.951397326852978e-05, "loss": 1.234, "step": 6674 }, { "epoch": 2.026104112915465, "grad_norm": 1.0866544246673584, "learning_rate": 5.950789793438639e-05, "loss": 0.5423, "step": 6675 }, { "epoch": 2.0264076491121568, "grad_norm": 0.6920125484466553, "learning_rate": 5.950182260024302e-05, "loss": 1.6804, "step": 6676 }, { "epoch": 2.026711185308848, "grad_norm": 0.6557866334915161, "learning_rate": 5.949574726609963e-05, "loss": 1.4507, "step": 6677 }, { "epoch": 2.0270147215055396, "grad_norm": 0.7183884382247925, "learning_rate": 5.948967193195626e-05, "loss": 0.7779, "step": 6678 }, { "epoch": 2.027318257702231, "grad_norm": 0.6660280823707581, "learning_rate": 5.9483596597812886e-05, "loss": 1.5683, "step": 6679 }, { "epoch": 2.0276217938989225, "grad_norm": 0.7593160271644592, "learning_rate": 5.94775212636695e-05, "loss": 1.4431, "step": 6680 }, { "epoch": 2.0279253300956137, "grad_norm": 0.6688575744628906, "learning_rate": 5.947144592952613e-05, "loss": 1.4504, "step": 6681 }, { "epoch": 2.0282288662923054, "grad_norm": 0.662132740020752, "learning_rate": 5.946537059538275e-05, "loss": 1.001, "step": 6682 }, { "epoch": 2.028532402488997, "grad_norm": 0.6534841060638428, "learning_rate": 5.945929526123937e-05, "loss": 1.6814, "step": 6683 }, { "epoch": 2.0288359386856882, "grad_norm": 0.7396661043167114, "learning_rate": 5.945321992709599e-05, "loss": 1.5876, "step": 6684 }, { "epoch": 2.02913947488238, "grad_norm": 0.657581090927124, "learning_rate": 5.944714459295262e-05, "loss": 1.365, "step": 6685 }, { "epoch": 2.029443011079071, "grad_norm": 0.6300247311592102, "learning_rate": 5.9441069258809234e-05, "loss": 1.3679, "step": 6686 }, { "epoch": 2.0297465472757628, "grad_norm": 0.7843654751777649, "learning_rate": 5.943499392466586e-05, "loss": 1.4725, "step": 6687 }, { "epoch": 2.030050083472454, "grad_norm": 0.7142737507820129, "learning_rate": 5.942891859052249e-05, "loss": 1.2956, "step": 6688 }, { "epoch": 2.0303536196691456, "grad_norm": 0.5903081893920898, "learning_rate": 5.94228432563791e-05, "loss": 0.841, "step": 6689 }, { "epoch": 2.030657155865837, "grad_norm": 0.6715419888496399, "learning_rate": 5.941676792223573e-05, "loss": 0.9159, "step": 6690 }, { "epoch": 2.0309606920625285, "grad_norm": 0.5854306817054749, "learning_rate": 5.941069258809234e-05, "loss": 0.7889, "step": 6691 }, { "epoch": 2.0312642282592197, "grad_norm": 0.8587661981582642, "learning_rate": 5.940461725394897e-05, "loss": 1.3361, "step": 6692 }, { "epoch": 2.0315677644559114, "grad_norm": 0.7016955018043518, "learning_rate": 5.9398541919805596e-05, "loss": 0.9073, "step": 6693 }, { "epoch": 2.031871300652603, "grad_norm": 0.6640695333480835, "learning_rate": 5.9392466585662206e-05, "loss": 1.3431, "step": 6694 }, { "epoch": 2.0321748368492942, "grad_norm": 0.6428939700126648, "learning_rate": 5.938639125151884e-05, "loss": 1.5478, "step": 6695 }, { "epoch": 2.032478373045986, "grad_norm": 0.7507612109184265, "learning_rate": 5.938031591737546e-05, "loss": 0.4712, "step": 6696 }, { "epoch": 2.032781909242677, "grad_norm": 0.7252958416938782, "learning_rate": 5.937424058323208e-05, "loss": 1.3709, "step": 6697 }, { "epoch": 2.0330854454393688, "grad_norm": 0.6997389793395996, "learning_rate": 5.93681652490887e-05, "loss": 1.1456, "step": 6698 }, { "epoch": 2.03338898163606, "grad_norm": 0.7252766489982605, "learning_rate": 5.936208991494533e-05, "loss": 1.3329, "step": 6699 }, { "epoch": 2.0336925178327516, "grad_norm": 1.0754157304763794, "learning_rate": 5.9356014580801944e-05, "loss": 0.7818, "step": 6700 }, { "epoch": 2.033996054029443, "grad_norm": 0.7663254141807556, "learning_rate": 5.934993924665857e-05, "loss": 1.3483, "step": 6701 }, { "epoch": 2.0342995902261345, "grad_norm": 0.7525759339332581, "learning_rate": 5.93438639125152e-05, "loss": 1.3253, "step": 6702 }, { "epoch": 2.0346031264228257, "grad_norm": 1.0139453411102295, "learning_rate": 5.933778857837181e-05, "loss": 1.2358, "step": 6703 }, { "epoch": 2.0349066626195174, "grad_norm": 0.7129602432250977, "learning_rate": 5.933171324422844e-05, "loss": 1.2781, "step": 6704 }, { "epoch": 2.035210198816209, "grad_norm": 0.6935693621635437, "learning_rate": 5.932563791008505e-05, "loss": 0.946, "step": 6705 }, { "epoch": 2.0355137350129002, "grad_norm": 0.5569899678230286, "learning_rate": 5.9319562575941675e-05, "loss": 1.6153, "step": 6706 }, { "epoch": 2.035817271209592, "grad_norm": 0.6837217211723328, "learning_rate": 5.9313487241798306e-05, "loss": 1.6131, "step": 6707 }, { "epoch": 2.036120807406283, "grad_norm": 0.6533646583557129, "learning_rate": 5.9307411907654917e-05, "loss": 1.2929, "step": 6708 }, { "epoch": 2.0364243436029748, "grad_norm": 0.7194601893424988, "learning_rate": 5.930133657351155e-05, "loss": 1.0747, "step": 6709 }, { "epoch": 2.036727879799666, "grad_norm": 0.7047435641288757, "learning_rate": 5.929526123936817e-05, "loss": 1.1719, "step": 6710 }, { "epoch": 2.0370314159963576, "grad_norm": 0.650740921497345, "learning_rate": 5.928918590522479e-05, "loss": 1.4469, "step": 6711 }, { "epoch": 2.037334952193049, "grad_norm": 0.7121087908744812, "learning_rate": 5.928311057108141e-05, "loss": 1.1483, "step": 6712 }, { "epoch": 2.0376384883897405, "grad_norm": 0.6568427681922913, "learning_rate": 5.927703523693804e-05, "loss": 0.7621, "step": 6713 }, { "epoch": 2.037942024586432, "grad_norm": 0.7922488451004028, "learning_rate": 5.9270959902794654e-05, "loss": 1.528, "step": 6714 }, { "epoch": 2.0382455607831234, "grad_norm": 0.5935818552970886, "learning_rate": 5.926488456865128e-05, "loss": 1.0031, "step": 6715 }, { "epoch": 2.038549096979815, "grad_norm": 0.9066329002380371, "learning_rate": 5.925880923450791e-05, "loss": 0.8471, "step": 6716 }, { "epoch": 2.0388526331765062, "grad_norm": 0.7111946940422058, "learning_rate": 5.925273390036452e-05, "loss": 1.2482, "step": 6717 }, { "epoch": 2.039156169373198, "grad_norm": 0.7186754941940308, "learning_rate": 5.9246658566221144e-05, "loss": 1.1287, "step": 6718 }, { "epoch": 2.039459705569889, "grad_norm": 0.7100489139556885, "learning_rate": 5.924058323207776e-05, "loss": 1.4158, "step": 6719 }, { "epoch": 2.0397632417665807, "grad_norm": 1.1844197511672974, "learning_rate": 5.9234507897934385e-05, "loss": 0.5527, "step": 6720 }, { "epoch": 2.040066777963272, "grad_norm": 0.45704275369644165, "learning_rate": 5.9228432563791016e-05, "loss": 0.8737, "step": 6721 }, { "epoch": 2.0403703141599636, "grad_norm": 0.5917233228683472, "learning_rate": 5.9222357229647627e-05, "loss": 0.9702, "step": 6722 }, { "epoch": 2.040673850356655, "grad_norm": 0.7638018131256104, "learning_rate": 5.921628189550426e-05, "loss": 1.378, "step": 6723 }, { "epoch": 2.0409773865533465, "grad_norm": 0.8409146666526794, "learning_rate": 5.921020656136088e-05, "loss": 1.0934, "step": 6724 }, { "epoch": 2.041280922750038, "grad_norm": 0.7116609811782837, "learning_rate": 5.92041312272175e-05, "loss": 1.0107, "step": 6725 }, { "epoch": 2.0415844589467294, "grad_norm": 0.7379602789878845, "learning_rate": 5.919805589307412e-05, "loss": 1.4649, "step": 6726 }, { "epoch": 2.041887995143421, "grad_norm": 0.7317615747451782, "learning_rate": 5.919198055893075e-05, "loss": 1.0289, "step": 6727 }, { "epoch": 2.042191531340112, "grad_norm": 0.7722091674804688, "learning_rate": 5.9185905224787364e-05, "loss": 1.352, "step": 6728 }, { "epoch": 2.042495067536804, "grad_norm": 0.8695550560951233, "learning_rate": 5.917982989064399e-05, "loss": 1.2503, "step": 6729 }, { "epoch": 2.042798603733495, "grad_norm": 0.7947261333465576, "learning_rate": 5.917375455650061e-05, "loss": 1.4089, "step": 6730 }, { "epoch": 2.0431021399301867, "grad_norm": 0.6829918622970581, "learning_rate": 5.916767922235723e-05, "loss": 1.3916, "step": 6731 }, { "epoch": 2.043405676126878, "grad_norm": 0.8099695444107056, "learning_rate": 5.9161603888213854e-05, "loss": 1.0505, "step": 6732 }, { "epoch": 2.0437092123235696, "grad_norm": 0.5343392491340637, "learning_rate": 5.915552855407047e-05, "loss": 1.172, "step": 6733 }, { "epoch": 2.044012748520261, "grad_norm": 0.740508496761322, "learning_rate": 5.9149453219927095e-05, "loss": 1.3653, "step": 6734 }, { "epoch": 2.0443162847169525, "grad_norm": 0.565724790096283, "learning_rate": 5.9143377885783726e-05, "loss": 1.5483, "step": 6735 }, { "epoch": 2.044619820913644, "grad_norm": 0.8615663647651672, "learning_rate": 5.913730255164034e-05, "loss": 1.203, "step": 6736 }, { "epoch": 2.0449233571103353, "grad_norm": 0.7551422119140625, "learning_rate": 5.913122721749697e-05, "loss": 1.4751, "step": 6737 }, { "epoch": 2.045226893307027, "grad_norm": 0.6796860694885254, "learning_rate": 5.912515188335359e-05, "loss": 1.6234, "step": 6738 }, { "epoch": 2.045530429503718, "grad_norm": 0.8114526271820068, "learning_rate": 5.911907654921021e-05, "loss": 1.1534, "step": 6739 }, { "epoch": 2.04583396570041, "grad_norm": 0.7070174217224121, "learning_rate": 5.911300121506683e-05, "loss": 1.5523, "step": 6740 }, { "epoch": 2.046137501897101, "grad_norm": 0.8905296325683594, "learning_rate": 5.910692588092346e-05, "loss": 0.5081, "step": 6741 }, { "epoch": 2.0464410380937927, "grad_norm": 0.9360664486885071, "learning_rate": 5.9100850546780074e-05, "loss": 0.6751, "step": 6742 }, { "epoch": 2.046744574290484, "grad_norm": 0.7047433853149414, "learning_rate": 5.90947752126367e-05, "loss": 1.3511, "step": 6743 }, { "epoch": 2.0470481104871756, "grad_norm": 0.655782163143158, "learning_rate": 5.908869987849332e-05, "loss": 1.3505, "step": 6744 }, { "epoch": 2.0473516466838673, "grad_norm": 0.5383403301239014, "learning_rate": 5.908262454434994e-05, "loss": 1.0058, "step": 6745 }, { "epoch": 2.0476551828805585, "grad_norm": 0.8472936153411865, "learning_rate": 5.9076549210206564e-05, "loss": 1.4217, "step": 6746 }, { "epoch": 2.04795871907725, "grad_norm": 0.71051424741745, "learning_rate": 5.907047387606318e-05, "loss": 1.2775, "step": 6747 }, { "epoch": 2.0482622552739413, "grad_norm": 0.8670709133148193, "learning_rate": 5.9064398541919805e-05, "loss": 1.0466, "step": 6748 }, { "epoch": 2.048565791470633, "grad_norm": 0.7335761785507202, "learning_rate": 5.9058323207776436e-05, "loss": 1.176, "step": 6749 }, { "epoch": 2.048869327667324, "grad_norm": 0.6880519390106201, "learning_rate": 5.905224787363305e-05, "loss": 1.5351, "step": 6750 }, { "epoch": 2.049172863864016, "grad_norm": 0.662702202796936, "learning_rate": 5.904617253948968e-05, "loss": 0.9803, "step": 6751 }, { "epoch": 2.049476400060707, "grad_norm": 0.5580474734306335, "learning_rate": 5.90400972053463e-05, "loss": 1.8734, "step": 6752 }, { "epoch": 2.0497799362573987, "grad_norm": 0.6297309398651123, "learning_rate": 5.903402187120292e-05, "loss": 1.5143, "step": 6753 }, { "epoch": 2.05008347245409, "grad_norm": 0.744547426700592, "learning_rate": 5.902794653705954e-05, "loss": 1.1973, "step": 6754 }, { "epoch": 2.0503870086507816, "grad_norm": 0.8597540855407715, "learning_rate": 5.902187120291617e-05, "loss": 1.3367, "step": 6755 }, { "epoch": 2.0506905448474733, "grad_norm": 0.6745612621307373, "learning_rate": 5.9015795868772784e-05, "loss": 1.0339, "step": 6756 }, { "epoch": 2.0509940810441645, "grad_norm": 0.6249368786811829, "learning_rate": 5.900972053462941e-05, "loss": 1.5042, "step": 6757 }, { "epoch": 2.051297617240856, "grad_norm": 0.6534069776535034, "learning_rate": 5.9003645200486026e-05, "loss": 1.486, "step": 6758 }, { "epoch": 2.0516011534375473, "grad_norm": 0.6959986686706543, "learning_rate": 5.899756986634265e-05, "loss": 1.5417, "step": 6759 }, { "epoch": 2.051904689634239, "grad_norm": 0.8996198773384094, "learning_rate": 5.8991494532199274e-05, "loss": 0.9192, "step": 6760 }, { "epoch": 2.05220822583093, "grad_norm": 0.7029753923416138, "learning_rate": 5.898541919805589e-05, "loss": 1.5122, "step": 6761 }, { "epoch": 2.052511762027622, "grad_norm": 0.8004100322723389, "learning_rate": 5.8979343863912515e-05, "loss": 1.1594, "step": 6762 }, { "epoch": 2.052815298224313, "grad_norm": 0.9627721905708313, "learning_rate": 5.8973268529769146e-05, "loss": 0.8332, "step": 6763 }, { "epoch": 2.0531188344210047, "grad_norm": 1.0539624691009521, "learning_rate": 5.896719319562576e-05, "loss": 1.4044, "step": 6764 }, { "epoch": 2.053422370617696, "grad_norm": 0.48121675848960876, "learning_rate": 5.896111786148239e-05, "loss": 1.3278, "step": 6765 }, { "epoch": 2.0537259068143876, "grad_norm": 0.6761457920074463, "learning_rate": 5.895504252733901e-05, "loss": 0.9294, "step": 6766 }, { "epoch": 2.0540294430110793, "grad_norm": 0.6362090110778809, "learning_rate": 5.894896719319562e-05, "loss": 1.4175, "step": 6767 }, { "epoch": 2.0543329792077705, "grad_norm": 0.7415809631347656, "learning_rate": 5.894289185905225e-05, "loss": 1.2596, "step": 6768 }, { "epoch": 2.054636515404462, "grad_norm": 0.5656198263168335, "learning_rate": 5.893681652490888e-05, "loss": 1.1221, "step": 6769 }, { "epoch": 2.0549400516011533, "grad_norm": 0.7808176875114441, "learning_rate": 5.8930741190765494e-05, "loss": 1.3354, "step": 6770 }, { "epoch": 2.055243587797845, "grad_norm": 0.7137062549591064, "learning_rate": 5.892466585662212e-05, "loss": 1.291, "step": 6771 }, { "epoch": 2.055547123994536, "grad_norm": 0.6661720871925354, "learning_rate": 5.8918590522478736e-05, "loss": 1.3515, "step": 6772 }, { "epoch": 2.055850660191228, "grad_norm": 0.8269581198692322, "learning_rate": 5.891251518833536e-05, "loss": 1.2311, "step": 6773 }, { "epoch": 2.056154196387919, "grad_norm": 0.629157543182373, "learning_rate": 5.8906439854191984e-05, "loss": 1.1386, "step": 6774 }, { "epoch": 2.0564577325846107, "grad_norm": 0.7245308756828308, "learning_rate": 5.89003645200486e-05, "loss": 1.1915, "step": 6775 }, { "epoch": 2.0567612687813024, "grad_norm": 0.8003824353218079, "learning_rate": 5.8894289185905225e-05, "loss": 1.1763, "step": 6776 }, { "epoch": 2.0570648049779936, "grad_norm": 0.8073933720588684, "learning_rate": 5.8888213851761856e-05, "loss": 0.9701, "step": 6777 }, { "epoch": 2.0573683411746853, "grad_norm": 0.7716240286827087, "learning_rate": 5.888213851761847e-05, "loss": 1.2758, "step": 6778 }, { "epoch": 2.0576718773713765, "grad_norm": 0.7790003418922424, "learning_rate": 5.887606318347509e-05, "loss": 1.5667, "step": 6779 }, { "epoch": 2.057975413568068, "grad_norm": 0.6642122268676758, "learning_rate": 5.886998784933172e-05, "loss": 1.4832, "step": 6780 }, { "epoch": 2.0582789497647593, "grad_norm": 0.8268905878067017, "learning_rate": 5.886391251518833e-05, "loss": 1.0759, "step": 6781 }, { "epoch": 2.058582485961451, "grad_norm": 0.629315972328186, "learning_rate": 5.885783718104496e-05, "loss": 0.765, "step": 6782 }, { "epoch": 2.058886022158142, "grad_norm": 0.7830377221107483, "learning_rate": 5.885176184690159e-05, "loss": 1.4531, "step": 6783 }, { "epoch": 2.059189558354834, "grad_norm": 0.6775173544883728, "learning_rate": 5.8845686512758205e-05, "loss": 1.3346, "step": 6784 }, { "epoch": 2.059493094551525, "grad_norm": 0.7450172901153564, "learning_rate": 5.883961117861483e-05, "loss": 1.2739, "step": 6785 }, { "epoch": 2.0597966307482167, "grad_norm": 0.6644347310066223, "learning_rate": 5.8833535844471446e-05, "loss": 1.6158, "step": 6786 }, { "epoch": 2.0601001669449084, "grad_norm": 0.9597615003585815, "learning_rate": 5.882746051032807e-05, "loss": 1.3137, "step": 6787 }, { "epoch": 2.0604037031415996, "grad_norm": 0.7239344120025635, "learning_rate": 5.8821385176184694e-05, "loss": 1.5057, "step": 6788 }, { "epoch": 2.0607072393382913, "grad_norm": 0.7747911810874939, "learning_rate": 5.881530984204131e-05, "loss": 1.1718, "step": 6789 }, { "epoch": 2.0610107755349825, "grad_norm": 0.5871680974960327, "learning_rate": 5.8809234507897935e-05, "loss": 1.3308, "step": 6790 }, { "epoch": 2.061314311731674, "grad_norm": 0.824733316898346, "learning_rate": 5.880315917375456e-05, "loss": 1.2952, "step": 6791 }, { "epoch": 2.0616178479283653, "grad_norm": 0.7985734939575195, "learning_rate": 5.879708383961118e-05, "loss": 1.5404, "step": 6792 }, { "epoch": 2.061921384125057, "grad_norm": 0.6020705103874207, "learning_rate": 5.87910085054678e-05, "loss": 1.07, "step": 6793 }, { "epoch": 2.062224920321748, "grad_norm": 0.8818385601043701, "learning_rate": 5.878493317132443e-05, "loss": 1.3636, "step": 6794 }, { "epoch": 2.06252845651844, "grad_norm": 0.5820996165275574, "learning_rate": 5.877885783718104e-05, "loss": 1.5162, "step": 6795 }, { "epoch": 2.062831992715131, "grad_norm": 0.7149410247802734, "learning_rate": 5.877278250303767e-05, "loss": 1.3473, "step": 6796 }, { "epoch": 2.0631355289118227, "grad_norm": 0.8226627707481384, "learning_rate": 5.87667071688943e-05, "loss": 1.0854, "step": 6797 }, { "epoch": 2.0634390651085144, "grad_norm": 0.5526022911071777, "learning_rate": 5.8760631834750915e-05, "loss": 1.4572, "step": 6798 }, { "epoch": 2.0637426013052056, "grad_norm": 0.8518609404563904, "learning_rate": 5.875455650060754e-05, "loss": 1.0517, "step": 6799 }, { "epoch": 2.0640461375018972, "grad_norm": 0.6736918091773987, "learning_rate": 5.8748481166464156e-05, "loss": 1.3883, "step": 6800 }, { "epoch": 2.0643496736985885, "grad_norm": 0.6669840812683105, "learning_rate": 5.874240583232078e-05, "loss": 1.3735, "step": 6801 }, { "epoch": 2.06465320989528, "grad_norm": 0.6384963989257812, "learning_rate": 5.8736330498177404e-05, "loss": 1.6629, "step": 6802 }, { "epoch": 2.0649567460919713, "grad_norm": 0.7732337713241577, "learning_rate": 5.873025516403402e-05, "loss": 1.3029, "step": 6803 }, { "epoch": 2.065260282288663, "grad_norm": 1.0315752029418945, "learning_rate": 5.8724179829890646e-05, "loss": 1.0835, "step": 6804 }, { "epoch": 2.065563818485354, "grad_norm": 0.8240241408348083, "learning_rate": 5.871810449574727e-05, "loss": 1.1011, "step": 6805 }, { "epoch": 2.065867354682046, "grad_norm": 0.7314655184745789, "learning_rate": 5.871202916160389e-05, "loss": 1.4013, "step": 6806 }, { "epoch": 2.0661708908787375, "grad_norm": 0.8280476331710815, "learning_rate": 5.870595382746051e-05, "loss": 1.2687, "step": 6807 }, { "epoch": 2.0664744270754287, "grad_norm": 0.6227006316184998, "learning_rate": 5.869987849331714e-05, "loss": 1.2817, "step": 6808 }, { "epoch": 2.0667779632721204, "grad_norm": 0.6892315745353699, "learning_rate": 5.869380315917375e-05, "loss": 1.4893, "step": 6809 }, { "epoch": 2.0670814994688116, "grad_norm": 0.6203703880310059, "learning_rate": 5.868772782503038e-05, "loss": 1.2614, "step": 6810 }, { "epoch": 2.0673850356655032, "grad_norm": 0.814782977104187, "learning_rate": 5.868165249088701e-05, "loss": 1.0062, "step": 6811 }, { "epoch": 2.0676885718621945, "grad_norm": 0.4778033494949341, "learning_rate": 5.8675577156743625e-05, "loss": 1.5858, "step": 6812 }, { "epoch": 2.067992108058886, "grad_norm": 0.3774012625217438, "learning_rate": 5.866950182260025e-05, "loss": 1.3399, "step": 6813 }, { "epoch": 2.0682956442555773, "grad_norm": 0.7372041344642639, "learning_rate": 5.8663426488456866e-05, "loss": 1.4809, "step": 6814 }, { "epoch": 2.068599180452269, "grad_norm": 0.5884072780609131, "learning_rate": 5.865735115431349e-05, "loss": 1.2738, "step": 6815 }, { "epoch": 2.06890271664896, "grad_norm": 0.7832264304161072, "learning_rate": 5.8651275820170114e-05, "loss": 1.5305, "step": 6816 }, { "epoch": 2.069206252845652, "grad_norm": 0.6761386394500732, "learning_rate": 5.864520048602673e-05, "loss": 1.4384, "step": 6817 }, { "epoch": 2.0695097890423435, "grad_norm": 0.9826549291610718, "learning_rate": 5.8639125151883356e-05, "loss": 1.1518, "step": 6818 }, { "epoch": 2.0698133252390347, "grad_norm": 0.7240217924118042, "learning_rate": 5.863304981773998e-05, "loss": 1.4914, "step": 6819 }, { "epoch": 2.0701168614357264, "grad_norm": 0.6312853693962097, "learning_rate": 5.86269744835966e-05, "loss": 1.3774, "step": 6820 }, { "epoch": 2.0704203976324176, "grad_norm": 0.8521531224250793, "learning_rate": 5.862089914945322e-05, "loss": 1.3638, "step": 6821 }, { "epoch": 2.0707239338291092, "grad_norm": 0.7164038419723511, "learning_rate": 5.861482381530985e-05, "loss": 1.3947, "step": 6822 }, { "epoch": 2.0710274700258005, "grad_norm": 0.7422941327095032, "learning_rate": 5.860874848116646e-05, "loss": 1.3239, "step": 6823 }, { "epoch": 2.071331006222492, "grad_norm": 0.5846385955810547, "learning_rate": 5.860267314702309e-05, "loss": 1.539, "step": 6824 }, { "epoch": 2.0716345424191833, "grad_norm": 0.7266324758529663, "learning_rate": 5.859659781287972e-05, "loss": 1.2199, "step": 6825 }, { "epoch": 2.071938078615875, "grad_norm": 0.6877673268318176, "learning_rate": 5.8590522478736335e-05, "loss": 1.3777, "step": 6826 }, { "epoch": 2.072241614812566, "grad_norm": 0.6746101379394531, "learning_rate": 5.858444714459296e-05, "loss": 1.7111, "step": 6827 }, { "epoch": 2.072545151009258, "grad_norm": 0.6098113059997559, "learning_rate": 5.8578371810449576e-05, "loss": 1.1953, "step": 6828 }, { "epoch": 2.0728486872059495, "grad_norm": 0.8218063116073608, "learning_rate": 5.85722964763062e-05, "loss": 1.067, "step": 6829 }, { "epoch": 2.0731522234026407, "grad_norm": 0.6264614462852478, "learning_rate": 5.8566221142162824e-05, "loss": 0.8346, "step": 6830 }, { "epoch": 2.0734557595993324, "grad_norm": 0.6531704068183899, "learning_rate": 5.856014580801944e-05, "loss": 1.1214, "step": 6831 }, { "epoch": 2.0737592957960236, "grad_norm": 0.8438115119934082, "learning_rate": 5.8554070473876066e-05, "loss": 1.1834, "step": 6832 }, { "epoch": 2.0740628319927152, "grad_norm": 0.7190865874290466, "learning_rate": 5.854799513973269e-05, "loss": 1.4565, "step": 6833 }, { "epoch": 2.0743663681894065, "grad_norm": 0.7674055099487305, "learning_rate": 5.854191980558931e-05, "loss": 1.3615, "step": 6834 }, { "epoch": 2.074669904386098, "grad_norm": 1.1060407161712646, "learning_rate": 5.853584447144593e-05, "loss": 1.2854, "step": 6835 }, { "epoch": 2.0749734405827893, "grad_norm": 0.5805683732032776, "learning_rate": 5.852976913730256e-05, "loss": 1.1622, "step": 6836 }, { "epoch": 2.075276976779481, "grad_norm": 0.53741455078125, "learning_rate": 5.852369380315917e-05, "loss": 1.4795, "step": 6837 }, { "epoch": 2.075580512976172, "grad_norm": 0.8981469869613647, "learning_rate": 5.85176184690158e-05, "loss": 1.3667, "step": 6838 }, { "epoch": 2.075884049172864, "grad_norm": 0.734659731388092, "learning_rate": 5.851154313487243e-05, "loss": 1.1876, "step": 6839 }, { "epoch": 2.0761875853695555, "grad_norm": 0.8583666086196899, "learning_rate": 5.850546780072904e-05, "loss": 1.0072, "step": 6840 }, { "epoch": 2.0764911215662467, "grad_norm": 0.7365713119506836, "learning_rate": 5.849939246658567e-05, "loss": 0.9634, "step": 6841 }, { "epoch": 2.0767946577629384, "grad_norm": 0.7284424901008606, "learning_rate": 5.849331713244228e-05, "loss": 1.28, "step": 6842 }, { "epoch": 2.0770981939596296, "grad_norm": 0.8206003308296204, "learning_rate": 5.848724179829891e-05, "loss": 1.178, "step": 6843 }, { "epoch": 2.0774017301563212, "grad_norm": 0.6586450338363647, "learning_rate": 5.8481166464155534e-05, "loss": 1.6239, "step": 6844 }, { "epoch": 2.0777052663530124, "grad_norm": 0.8370587229728699, "learning_rate": 5.847509113001215e-05, "loss": 1.5696, "step": 6845 }, { "epoch": 2.078008802549704, "grad_norm": 0.715146005153656, "learning_rate": 5.8469015795868776e-05, "loss": 0.9131, "step": 6846 }, { "epoch": 2.0783123387463953, "grad_norm": 0.7461423277854919, "learning_rate": 5.84629404617254e-05, "loss": 1.6913, "step": 6847 }, { "epoch": 2.078615874943087, "grad_norm": 0.613091230392456, "learning_rate": 5.845686512758202e-05, "loss": 1.2838, "step": 6848 }, { "epoch": 2.0789194111397786, "grad_norm": 0.8387308716773987, "learning_rate": 5.845078979343864e-05, "loss": 1.502, "step": 6849 }, { "epoch": 2.07922294733647, "grad_norm": 0.804394006729126, "learning_rate": 5.844471445929527e-05, "loss": 1.114, "step": 6850 }, { "epoch": 2.0795264835331615, "grad_norm": 0.5923547148704529, "learning_rate": 5.843863912515188e-05, "loss": 1.2775, "step": 6851 }, { "epoch": 2.0798300197298527, "grad_norm": 0.7429361939430237, "learning_rate": 5.8432563791008507e-05, "loss": 1.2353, "step": 6852 }, { "epoch": 2.0801335559265444, "grad_norm": 0.7161668539047241, "learning_rate": 5.8426488456865124e-05, "loss": 1.4675, "step": 6853 }, { "epoch": 2.0804370921232356, "grad_norm": 0.7612913250923157, "learning_rate": 5.842041312272175e-05, "loss": 1.1781, "step": 6854 }, { "epoch": 2.0807406283199272, "grad_norm": 0.4716074466705322, "learning_rate": 5.841433778857838e-05, "loss": 1.1833, "step": 6855 }, { "epoch": 2.0810441645166184, "grad_norm": 0.673354983329773, "learning_rate": 5.840826245443499e-05, "loss": 1.416, "step": 6856 }, { "epoch": 2.08134770071331, "grad_norm": 0.7451295852661133, "learning_rate": 5.840218712029162e-05, "loss": 1.6398, "step": 6857 }, { "epoch": 2.0816512369100013, "grad_norm": 0.8739036321640015, "learning_rate": 5.8396111786148244e-05, "loss": 1.4303, "step": 6858 }, { "epoch": 2.081954773106693, "grad_norm": 0.761298418045044, "learning_rate": 5.839003645200486e-05, "loss": 1.3656, "step": 6859 }, { "epoch": 2.0822583093033846, "grad_norm": 0.822521984577179, "learning_rate": 5.8383961117861486e-05, "loss": 1.42, "step": 6860 }, { "epoch": 2.082561845500076, "grad_norm": 0.8375853300094604, "learning_rate": 5.837788578371811e-05, "loss": 1.0434, "step": 6861 }, { "epoch": 2.0828653816967675, "grad_norm": 0.8537359833717346, "learning_rate": 5.837181044957473e-05, "loss": 1.2341, "step": 6862 }, { "epoch": 2.0831689178934587, "grad_norm": 0.7931867241859436, "learning_rate": 5.836573511543135e-05, "loss": 0.8769, "step": 6863 }, { "epoch": 2.0834724540901504, "grad_norm": 0.7443602681159973, "learning_rate": 5.8359659781287975e-05, "loss": 1.3125, "step": 6864 }, { "epoch": 2.0837759902868416, "grad_norm": 0.7556854486465454, "learning_rate": 5.835358444714459e-05, "loss": 0.9453, "step": 6865 }, { "epoch": 2.0840795264835332, "grad_norm": 0.5914473533630371, "learning_rate": 5.834750911300122e-05, "loss": 1.4683, "step": 6866 }, { "epoch": 2.0843830626802244, "grad_norm": 0.5810937285423279, "learning_rate": 5.8341433778857834e-05, "loss": 0.9183, "step": 6867 }, { "epoch": 2.084686598876916, "grad_norm": 0.7326523661613464, "learning_rate": 5.833535844471446e-05, "loss": 1.4119, "step": 6868 }, { "epoch": 2.0849901350736078, "grad_norm": 0.7578046321868896, "learning_rate": 5.832928311057109e-05, "loss": 1.354, "step": 6869 }, { "epoch": 2.085293671270299, "grad_norm": 0.6388635635375977, "learning_rate": 5.83232077764277e-05, "loss": 1.2435, "step": 6870 }, { "epoch": 2.0855972074669906, "grad_norm": 0.8143454790115356, "learning_rate": 5.831713244228433e-05, "loss": 1.1209, "step": 6871 }, { "epoch": 2.085900743663682, "grad_norm": 0.7743310332298279, "learning_rate": 5.8311057108140954e-05, "loss": 1.0791, "step": 6872 }, { "epoch": 2.0862042798603735, "grad_norm": 0.6745989322662354, "learning_rate": 5.830498177399757e-05, "loss": 1.3102, "step": 6873 }, { "epoch": 2.0865078160570647, "grad_norm": 0.8118981122970581, "learning_rate": 5.8298906439854196e-05, "loss": 0.9206, "step": 6874 }, { "epoch": 2.0868113522537564, "grad_norm": 0.815087080001831, "learning_rate": 5.829283110571082e-05, "loss": 1.0726, "step": 6875 }, { "epoch": 2.0871148884504476, "grad_norm": 0.7686581611633301, "learning_rate": 5.828675577156744e-05, "loss": 1.3252, "step": 6876 }, { "epoch": 2.0874184246471392, "grad_norm": 0.8357821106910706, "learning_rate": 5.828068043742406e-05, "loss": 1.3634, "step": 6877 }, { "epoch": 2.0877219608438304, "grad_norm": 0.6431812047958374, "learning_rate": 5.8274605103280685e-05, "loss": 1.6529, "step": 6878 }, { "epoch": 2.088025497040522, "grad_norm": 0.7796066403388977, "learning_rate": 5.82685297691373e-05, "loss": 1.3433, "step": 6879 }, { "epoch": 2.0883290332372137, "grad_norm": 0.7999757528305054, "learning_rate": 5.826245443499393e-05, "loss": 1.4674, "step": 6880 }, { "epoch": 2.088632569433905, "grad_norm": 0.7284780740737915, "learning_rate": 5.8256379100850544e-05, "loss": 1.6366, "step": 6881 }, { "epoch": 2.0889361056305966, "grad_norm": 0.6166238188743591, "learning_rate": 5.825030376670717e-05, "loss": 1.3335, "step": 6882 }, { "epoch": 2.089239641827288, "grad_norm": 0.6601129174232483, "learning_rate": 5.82442284325638e-05, "loss": 1.0488, "step": 6883 }, { "epoch": 2.0895431780239795, "grad_norm": 0.6007164120674133, "learning_rate": 5.823815309842041e-05, "loss": 1.3009, "step": 6884 }, { "epoch": 2.0898467142206707, "grad_norm": 0.6937150359153748, "learning_rate": 5.823207776427704e-05, "loss": 1.3778, "step": 6885 }, { "epoch": 2.0901502504173624, "grad_norm": 0.7025666236877441, "learning_rate": 5.8226002430133664e-05, "loss": 1.5254, "step": 6886 }, { "epoch": 2.0904537866140536, "grad_norm": 0.6330369114875793, "learning_rate": 5.821992709599028e-05, "loss": 1.0681, "step": 6887 }, { "epoch": 2.0907573228107452, "grad_norm": 0.9064787030220032, "learning_rate": 5.8213851761846906e-05, "loss": 1.2772, "step": 6888 }, { "epoch": 2.0910608590074364, "grad_norm": 0.7519260048866272, "learning_rate": 5.820777642770353e-05, "loss": 1.2378, "step": 6889 }, { "epoch": 2.091364395204128, "grad_norm": 0.8459030985832214, "learning_rate": 5.820170109356015e-05, "loss": 1.338, "step": 6890 }, { "epoch": 2.0916679314008197, "grad_norm": 0.6403706073760986, "learning_rate": 5.819562575941677e-05, "loss": 1.5272, "step": 6891 }, { "epoch": 2.091971467597511, "grad_norm": 1.1030857563018799, "learning_rate": 5.8189550425273395e-05, "loss": 1.1563, "step": 6892 }, { "epoch": 2.0922750037942026, "grad_norm": 0.5904721021652222, "learning_rate": 5.818347509113001e-05, "loss": 0.9227, "step": 6893 }, { "epoch": 2.092578539990894, "grad_norm": 0.5721134543418884, "learning_rate": 5.817739975698664e-05, "loss": 1.4074, "step": 6894 }, { "epoch": 2.0928820761875855, "grad_norm": 0.6387042999267578, "learning_rate": 5.8171324422843254e-05, "loss": 1.3688, "step": 6895 }, { "epoch": 2.0931856123842767, "grad_norm": 0.5705280900001526, "learning_rate": 5.816524908869988e-05, "loss": 1.3471, "step": 6896 }, { "epoch": 2.0934891485809684, "grad_norm": 1.1113790273666382, "learning_rate": 5.815917375455651e-05, "loss": 1.1356, "step": 6897 }, { "epoch": 2.0937926847776596, "grad_norm": 0.49781277775764465, "learning_rate": 5.815309842041312e-05, "loss": 1.6787, "step": 6898 }, { "epoch": 2.094096220974351, "grad_norm": 0.6792327165603638, "learning_rate": 5.814702308626975e-05, "loss": 1.4144, "step": 6899 }, { "epoch": 2.0943997571710424, "grad_norm": 0.7339219450950623, "learning_rate": 5.8140947752126374e-05, "loss": 1.087, "step": 6900 }, { "epoch": 2.094703293367734, "grad_norm": 0.8383113145828247, "learning_rate": 5.813487241798299e-05, "loss": 1.585, "step": 6901 }, { "epoch": 2.0950068295644257, "grad_norm": 0.6781391501426697, "learning_rate": 5.8128797083839616e-05, "loss": 1.4833, "step": 6902 }, { "epoch": 2.095310365761117, "grad_norm": 0.8215876817703247, "learning_rate": 5.812272174969624e-05, "loss": 1.2299, "step": 6903 }, { "epoch": 2.0956139019578086, "grad_norm": 0.6117851138114929, "learning_rate": 5.811664641555286e-05, "loss": 0.7209, "step": 6904 }, { "epoch": 2.0959174381545, "grad_norm": 0.7788382172584534, "learning_rate": 5.811057108140948e-05, "loss": 1.4464, "step": 6905 }, { "epoch": 2.0962209743511915, "grad_norm": 0.4706348180770874, "learning_rate": 5.8104495747266105e-05, "loss": 0.6949, "step": 6906 }, { "epoch": 2.0965245105478827, "grad_norm": 0.9112883806228638, "learning_rate": 5.809842041312272e-05, "loss": 0.9623, "step": 6907 }, { "epoch": 2.0968280467445743, "grad_norm": 0.7183716893196106, "learning_rate": 5.809234507897935e-05, "loss": 1.297, "step": 6908 }, { "epoch": 2.0971315829412656, "grad_norm": 1.03567373752594, "learning_rate": 5.8086269744835964e-05, "loss": 1.3522, "step": 6909 }, { "epoch": 2.097435119137957, "grad_norm": 0.8804583549499512, "learning_rate": 5.808019441069259e-05, "loss": 1.5869, "step": 6910 }, { "epoch": 2.097738655334649, "grad_norm": 0.7099947333335876, "learning_rate": 5.807411907654922e-05, "loss": 1.1776, "step": 6911 }, { "epoch": 2.09804219153134, "grad_norm": 0.7495343685150146, "learning_rate": 5.806804374240583e-05, "loss": 1.5691, "step": 6912 }, { "epoch": 2.0983457277280317, "grad_norm": 0.7520137429237366, "learning_rate": 5.806196840826246e-05, "loss": 1.1412, "step": 6913 }, { "epoch": 2.098649263924723, "grad_norm": 0.6386212706565857, "learning_rate": 5.8055893074119085e-05, "loss": 1.0, "step": 6914 }, { "epoch": 2.0989528001214146, "grad_norm": 0.5278308987617493, "learning_rate": 5.8049817739975695e-05, "loss": 1.3963, "step": 6915 }, { "epoch": 2.099256336318106, "grad_norm": 0.8359299898147583, "learning_rate": 5.8043742405832326e-05, "loss": 1.1387, "step": 6916 }, { "epoch": 2.0995598725147975, "grad_norm": 0.6538137197494507, "learning_rate": 5.803766707168895e-05, "loss": 1.0053, "step": 6917 }, { "epoch": 2.0998634087114887, "grad_norm": 0.7379575967788696, "learning_rate": 5.803159173754557e-05, "loss": 1.3158, "step": 6918 }, { "epoch": 2.1001669449081803, "grad_norm": 0.5702930092811584, "learning_rate": 5.802551640340219e-05, "loss": 1.3555, "step": 6919 }, { "epoch": 2.1004704811048716, "grad_norm": 0.8485485911369324, "learning_rate": 5.8019441069258815e-05, "loss": 1.2317, "step": 6920 }, { "epoch": 2.100774017301563, "grad_norm": 0.7139195203781128, "learning_rate": 5.801336573511543e-05, "loss": 1.2626, "step": 6921 }, { "epoch": 2.101077553498255, "grad_norm": 0.7576490044593811, "learning_rate": 5.800729040097206e-05, "loss": 0.9976, "step": 6922 }, { "epoch": 2.101381089694946, "grad_norm": 0.7416251301765442, "learning_rate": 5.8001215066828674e-05, "loss": 1.2989, "step": 6923 }, { "epoch": 2.1016846258916377, "grad_norm": 0.7933403849601746, "learning_rate": 5.79951397326853e-05, "loss": 1.6501, "step": 6924 }, { "epoch": 2.101988162088329, "grad_norm": 0.6887480020523071, "learning_rate": 5.798906439854193e-05, "loss": 1.3108, "step": 6925 }, { "epoch": 2.1022916982850206, "grad_norm": 0.6056838631629944, "learning_rate": 5.798298906439854e-05, "loss": 1.5473, "step": 6926 }, { "epoch": 2.102595234481712, "grad_norm": 0.6874396800994873, "learning_rate": 5.7976913730255164e-05, "loss": 1.4108, "step": 6927 }, { "epoch": 2.1028987706784035, "grad_norm": 0.7933046817779541, "learning_rate": 5.7970838396111795e-05, "loss": 1.4558, "step": 6928 }, { "epoch": 2.1032023068750947, "grad_norm": 0.6857606172561646, "learning_rate": 5.7964763061968405e-05, "loss": 1.0801, "step": 6929 }, { "epoch": 2.1035058430717863, "grad_norm": 0.8258737325668335, "learning_rate": 5.7958687727825036e-05, "loss": 0.9648, "step": 6930 }, { "epoch": 2.1038093792684776, "grad_norm": 0.9222277998924255, "learning_rate": 5.795261239368166e-05, "loss": 1.2347, "step": 6931 }, { "epoch": 2.104112915465169, "grad_norm": 0.5885825157165527, "learning_rate": 5.794653705953828e-05, "loss": 1.5702, "step": 6932 }, { "epoch": 2.104416451661861, "grad_norm": 0.6979884505271912, "learning_rate": 5.79404617253949e-05, "loss": 0.6671, "step": 6933 }, { "epoch": 2.104719987858552, "grad_norm": 0.7170132994651794, "learning_rate": 5.793438639125152e-05, "loss": 1.4453, "step": 6934 }, { "epoch": 2.1050235240552437, "grad_norm": 0.6434389352798462, "learning_rate": 5.792831105710814e-05, "loss": 0.9343, "step": 6935 }, { "epoch": 2.105327060251935, "grad_norm": 0.8252448439598083, "learning_rate": 5.792223572296477e-05, "loss": 1.335, "step": 6936 }, { "epoch": 2.1056305964486266, "grad_norm": 0.7473122477531433, "learning_rate": 5.7916160388821384e-05, "loss": 1.5318, "step": 6937 }, { "epoch": 2.105934132645318, "grad_norm": 0.7320314049720764, "learning_rate": 5.791008505467801e-05, "loss": 1.2928, "step": 6938 }, { "epoch": 2.1062376688420095, "grad_norm": 0.5336587429046631, "learning_rate": 5.790400972053463e-05, "loss": 0.9455, "step": 6939 }, { "epoch": 2.1065412050387007, "grad_norm": 0.7179783582687378, "learning_rate": 5.789793438639125e-05, "loss": 1.2575, "step": 6940 }, { "epoch": 2.1068447412353923, "grad_norm": 0.8911033868789673, "learning_rate": 5.7891859052247874e-05, "loss": 1.3474, "step": 6941 }, { "epoch": 2.107148277432084, "grad_norm": 0.8055987358093262, "learning_rate": 5.7885783718104505e-05, "loss": 1.1506, "step": 6942 }, { "epoch": 2.107451813628775, "grad_norm": 0.8984194397926331, "learning_rate": 5.7879708383961115e-05, "loss": 1.0726, "step": 6943 }, { "epoch": 2.107755349825467, "grad_norm": 0.6842384338378906, "learning_rate": 5.7873633049817746e-05, "loss": 1.002, "step": 6944 }, { "epoch": 2.108058886022158, "grad_norm": 0.5936232805252075, "learning_rate": 5.786755771567437e-05, "loss": 1.3044, "step": 6945 }, { "epoch": 2.1083624222188497, "grad_norm": 0.6418696641921997, "learning_rate": 5.786148238153099e-05, "loss": 1.3182, "step": 6946 }, { "epoch": 2.108665958415541, "grad_norm": 0.5525402426719666, "learning_rate": 5.785540704738761e-05, "loss": 0.6605, "step": 6947 }, { "epoch": 2.1089694946122326, "grad_norm": 0.9133402705192566, "learning_rate": 5.784933171324423e-05, "loss": 1.1272, "step": 6948 }, { "epoch": 2.109273030808924, "grad_norm": 0.8316843509674072, "learning_rate": 5.784325637910085e-05, "loss": 1.0813, "step": 6949 }, { "epoch": 2.1095765670056155, "grad_norm": 0.6806902289390564, "learning_rate": 5.783718104495748e-05, "loss": 1.1012, "step": 6950 }, { "epoch": 2.1098801032023067, "grad_norm": 0.6396098136901855, "learning_rate": 5.7831105710814094e-05, "loss": 1.5173, "step": 6951 }, { "epoch": 2.1101836393989983, "grad_norm": 0.5806583762168884, "learning_rate": 5.782503037667072e-05, "loss": 1.0212, "step": 6952 }, { "epoch": 2.11048717559569, "grad_norm": 0.6874980330467224, "learning_rate": 5.781895504252734e-05, "loss": 1.5176, "step": 6953 }, { "epoch": 2.110790711792381, "grad_norm": 0.7226924300193787, "learning_rate": 5.781287970838396e-05, "loss": 1.476, "step": 6954 }, { "epoch": 2.111094247989073, "grad_norm": 0.7012338638305664, "learning_rate": 5.7806804374240584e-05, "loss": 1.2147, "step": 6955 }, { "epoch": 2.111397784185764, "grad_norm": 1.2091950178146362, "learning_rate": 5.7800729040097215e-05, "loss": 0.9889, "step": 6956 }, { "epoch": 2.1117013203824557, "grad_norm": 0.8179687261581421, "learning_rate": 5.7794653705953825e-05, "loss": 1.3606, "step": 6957 }, { "epoch": 2.112004856579147, "grad_norm": 0.7715298533439636, "learning_rate": 5.7788578371810456e-05, "loss": 1.1802, "step": 6958 }, { "epoch": 2.1123083927758386, "grad_norm": 0.7295283675193787, "learning_rate": 5.778250303766708e-05, "loss": 1.2638, "step": 6959 }, { "epoch": 2.11261192897253, "grad_norm": 0.7757214307785034, "learning_rate": 5.77764277035237e-05, "loss": 1.3599, "step": 6960 }, { "epoch": 2.1129154651692215, "grad_norm": 0.6359571814537048, "learning_rate": 5.777035236938032e-05, "loss": 1.2531, "step": 6961 }, { "epoch": 2.1132190013659127, "grad_norm": 0.8131042122840881, "learning_rate": 5.776427703523694e-05, "loss": 1.5738, "step": 6962 }, { "epoch": 2.1135225375626043, "grad_norm": 0.6301864981651306, "learning_rate": 5.775820170109356e-05, "loss": 1.4534, "step": 6963 }, { "epoch": 2.113826073759296, "grad_norm": 0.569684624671936, "learning_rate": 5.775212636695019e-05, "loss": 1.0992, "step": 6964 }, { "epoch": 2.114129609955987, "grad_norm": 0.5382822155952454, "learning_rate": 5.7746051032806804e-05, "loss": 1.0529, "step": 6965 }, { "epoch": 2.114433146152679, "grad_norm": 0.6871811747550964, "learning_rate": 5.773997569866343e-05, "loss": 1.4066, "step": 6966 }, { "epoch": 2.11473668234937, "grad_norm": 0.7224509119987488, "learning_rate": 5.773390036452005e-05, "loss": 1.3342, "step": 6967 }, { "epoch": 2.1150402185460617, "grad_norm": 0.5536501407623291, "learning_rate": 5.772782503037667e-05, "loss": 1.6142, "step": 6968 }, { "epoch": 2.115343754742753, "grad_norm": 0.7293581962585449, "learning_rate": 5.7721749696233294e-05, "loss": 1.2867, "step": 6969 }, { "epoch": 2.1156472909394446, "grad_norm": 0.7771735787391663, "learning_rate": 5.7715674362089925e-05, "loss": 1.3612, "step": 6970 }, { "epoch": 2.115950827136136, "grad_norm": 0.7306496500968933, "learning_rate": 5.7709599027946535e-05, "loss": 1.4965, "step": 6971 }, { "epoch": 2.1162543633328275, "grad_norm": 0.7789996266365051, "learning_rate": 5.7703523693803166e-05, "loss": 0.8977, "step": 6972 }, { "epoch": 2.1165578995295187, "grad_norm": 0.7751117944717407, "learning_rate": 5.769744835965979e-05, "loss": 1.3932, "step": 6973 }, { "epoch": 2.1168614357262103, "grad_norm": 0.7575637102127075, "learning_rate": 5.769137302551641e-05, "loss": 1.5727, "step": 6974 }, { "epoch": 2.117164971922902, "grad_norm": 0.7688522338867188, "learning_rate": 5.768529769137303e-05, "loss": 1.458, "step": 6975 }, { "epoch": 2.117468508119593, "grad_norm": 0.6894252896308899, "learning_rate": 5.767922235722964e-05, "loss": 1.4373, "step": 6976 }, { "epoch": 2.117772044316285, "grad_norm": 0.8942421674728394, "learning_rate": 5.767314702308627e-05, "loss": 1.3635, "step": 6977 }, { "epoch": 2.118075580512976, "grad_norm": 0.8553876280784607, "learning_rate": 5.76670716889429e-05, "loss": 1.121, "step": 6978 }, { "epoch": 2.1183791167096677, "grad_norm": 0.7508211135864258, "learning_rate": 5.7660996354799514e-05, "loss": 1.0187, "step": 6979 }, { "epoch": 2.118682652906359, "grad_norm": 1.0324530601501465, "learning_rate": 5.765492102065614e-05, "loss": 1.1299, "step": 6980 }, { "epoch": 2.1189861891030506, "grad_norm": 0.5394060611724854, "learning_rate": 5.764884568651276e-05, "loss": 0.9301, "step": 6981 }, { "epoch": 2.119289725299742, "grad_norm": 0.6748804450035095, "learning_rate": 5.764277035236938e-05, "loss": 1.3764, "step": 6982 }, { "epoch": 2.1195932614964335, "grad_norm": 0.90252286195755, "learning_rate": 5.7636695018226004e-05, "loss": 1.3735, "step": 6983 }, { "epoch": 2.119896797693125, "grad_norm": 0.8094146847724915, "learning_rate": 5.7630619684082635e-05, "loss": 0.8956, "step": 6984 }, { "epoch": 2.1202003338898163, "grad_norm": 0.6555180549621582, "learning_rate": 5.7624544349939245e-05, "loss": 0.8515, "step": 6985 }, { "epoch": 2.120503870086508, "grad_norm": 0.7240637540817261, "learning_rate": 5.7618469015795876e-05, "loss": 1.4879, "step": 6986 }, { "epoch": 2.120807406283199, "grad_norm": 0.7705114483833313, "learning_rate": 5.76123936816525e-05, "loss": 1.2672, "step": 6987 }, { "epoch": 2.121110942479891, "grad_norm": 0.8182518482208252, "learning_rate": 5.760631834750911e-05, "loss": 1.4521, "step": 6988 }, { "epoch": 2.121414478676582, "grad_norm": 0.6841187477111816, "learning_rate": 5.760024301336574e-05, "loss": 0.9986, "step": 6989 }, { "epoch": 2.1217180148732737, "grad_norm": 0.7378225326538086, "learning_rate": 5.759416767922235e-05, "loss": 0.316, "step": 6990 }, { "epoch": 2.122021551069965, "grad_norm": 0.5745599865913391, "learning_rate": 5.758809234507898e-05, "loss": 1.6092, "step": 6991 }, { "epoch": 2.1223250872666566, "grad_norm": 0.8312602043151855, "learning_rate": 5.758201701093561e-05, "loss": 1.1245, "step": 6992 }, { "epoch": 2.122628623463348, "grad_norm": 0.7721714377403259, "learning_rate": 5.7575941676792224e-05, "loss": 1.0455, "step": 6993 }, { "epoch": 2.1229321596600395, "grad_norm": 0.7318625450134277, "learning_rate": 5.756986634264885e-05, "loss": 1.5849, "step": 6994 }, { "epoch": 2.123235695856731, "grad_norm": 1.0568405389785767, "learning_rate": 5.756379100850547e-05, "loss": 1.1597, "step": 6995 }, { "epoch": 2.1235392320534223, "grad_norm": 0.5453389883041382, "learning_rate": 5.755771567436209e-05, "loss": 1.7677, "step": 6996 }, { "epoch": 2.123842768250114, "grad_norm": 0.8192586302757263, "learning_rate": 5.7551640340218714e-05, "loss": 1.3186, "step": 6997 }, { "epoch": 2.124146304446805, "grad_norm": 0.836601197719574, "learning_rate": 5.7545565006075345e-05, "loss": 1.0741, "step": 6998 }, { "epoch": 2.124449840643497, "grad_norm": 0.6476053595542908, "learning_rate": 5.7539489671931955e-05, "loss": 1.2964, "step": 6999 }, { "epoch": 2.124753376840188, "grad_norm": 1.121575117111206, "learning_rate": 5.753341433778858e-05, "loss": 0.9233, "step": 7000 }, { "epoch": 2.1250569130368797, "grad_norm": 0.8215871453285217, "learning_rate": 5.752733900364521e-05, "loss": 1.1041, "step": 7001 }, { "epoch": 2.125360449233571, "grad_norm": 0.8727826476097107, "learning_rate": 5.752126366950182e-05, "loss": 1.1298, "step": 7002 }, { "epoch": 2.1256639854302626, "grad_norm": 0.8024986386299133, "learning_rate": 5.751518833535845e-05, "loss": 1.078, "step": 7003 }, { "epoch": 2.1259675216269542, "grad_norm": 0.6803773641586304, "learning_rate": 5.750911300121506e-05, "loss": 1.2013, "step": 7004 }, { "epoch": 2.1262710578236454, "grad_norm": 0.7451168298721313, "learning_rate": 5.750303766707169e-05, "loss": 1.1364, "step": 7005 }, { "epoch": 2.126574594020337, "grad_norm": 0.5670786499977112, "learning_rate": 5.749696233292832e-05, "loss": 1.0867, "step": 7006 }, { "epoch": 2.1268781302170283, "grad_norm": 0.7828416228294373, "learning_rate": 5.7490886998784934e-05, "loss": 1.0044, "step": 7007 }, { "epoch": 2.12718166641372, "grad_norm": 0.6469855308532715, "learning_rate": 5.748481166464156e-05, "loss": 1.5497, "step": 7008 }, { "epoch": 2.127485202610411, "grad_norm": 0.6535457372665405, "learning_rate": 5.747873633049818e-05, "loss": 1.5059, "step": 7009 }, { "epoch": 2.127788738807103, "grad_norm": 0.5588821768760681, "learning_rate": 5.74726609963548e-05, "loss": 1.2033, "step": 7010 }, { "epoch": 2.128092275003794, "grad_norm": 0.8629704713821411, "learning_rate": 5.7466585662211424e-05, "loss": 1.2453, "step": 7011 }, { "epoch": 2.1283958112004857, "grad_norm": 0.6546429991722107, "learning_rate": 5.746051032806805e-05, "loss": 1.6456, "step": 7012 }, { "epoch": 2.128699347397177, "grad_norm": 0.7082663178443909, "learning_rate": 5.7454434993924665e-05, "loss": 1.4852, "step": 7013 }, { "epoch": 2.1290028835938686, "grad_norm": 0.8340616226196289, "learning_rate": 5.744835965978129e-05, "loss": 1.3837, "step": 7014 }, { "epoch": 2.1293064197905602, "grad_norm": 0.6598572731018066, "learning_rate": 5.744228432563791e-05, "loss": 1.2429, "step": 7015 }, { "epoch": 2.1296099559872514, "grad_norm": 0.91293865442276, "learning_rate": 5.743620899149453e-05, "loss": 1.1037, "step": 7016 }, { "epoch": 2.129913492183943, "grad_norm": 0.83259117603302, "learning_rate": 5.743013365735116e-05, "loss": 1.2325, "step": 7017 }, { "epoch": 2.1302170283806343, "grad_norm": 0.8073476552963257, "learning_rate": 5.742405832320777e-05, "loss": 1.3386, "step": 7018 }, { "epoch": 2.130520564577326, "grad_norm": 0.6302822232246399, "learning_rate": 5.74179829890644e-05, "loss": 0.7663, "step": 7019 }, { "epoch": 2.130824100774017, "grad_norm": 0.5466523766517639, "learning_rate": 5.741190765492103e-05, "loss": 1.1305, "step": 7020 }, { "epoch": 2.131127636970709, "grad_norm": 0.6615848541259766, "learning_rate": 5.7405832320777645e-05, "loss": 1.6637, "step": 7021 }, { "epoch": 2.1314311731674, "grad_norm": 0.7762163877487183, "learning_rate": 5.739975698663427e-05, "loss": 1.4986, "step": 7022 }, { "epoch": 2.1317347093640917, "grad_norm": 0.7263661026954651, "learning_rate": 5.739368165249089e-05, "loss": 1.1411, "step": 7023 }, { "epoch": 2.132038245560783, "grad_norm": 0.6416277885437012, "learning_rate": 5.738760631834751e-05, "loss": 0.8715, "step": 7024 }, { "epoch": 2.1323417817574746, "grad_norm": 0.5931896567344666, "learning_rate": 5.7381530984204134e-05, "loss": 1.0077, "step": 7025 }, { "epoch": 2.1326453179541662, "grad_norm": 0.715061604976654, "learning_rate": 5.737545565006076e-05, "loss": 1.2576, "step": 7026 }, { "epoch": 2.1329488541508574, "grad_norm": 0.7923848628997803, "learning_rate": 5.7369380315917375e-05, "loss": 1.4421, "step": 7027 }, { "epoch": 2.133252390347549, "grad_norm": 0.7267158627510071, "learning_rate": 5.7363304981774e-05, "loss": 1.1591, "step": 7028 }, { "epoch": 2.1335559265442403, "grad_norm": 0.7481387257575989, "learning_rate": 5.735722964763062e-05, "loss": 1.2607, "step": 7029 }, { "epoch": 2.133859462740932, "grad_norm": 0.7214675545692444, "learning_rate": 5.735115431348724e-05, "loss": 0.7526, "step": 7030 }, { "epoch": 2.134162998937623, "grad_norm": 0.6436885595321655, "learning_rate": 5.734507897934387e-05, "loss": 1.3025, "step": 7031 }, { "epoch": 2.134466535134315, "grad_norm": 0.7761930227279663, "learning_rate": 5.733900364520048e-05, "loss": 1.4619, "step": 7032 }, { "epoch": 2.134770071331006, "grad_norm": 0.6738321781158447, "learning_rate": 5.733292831105711e-05, "loss": 1.5857, "step": 7033 }, { "epoch": 2.1350736075276977, "grad_norm": 0.8512619137763977, "learning_rate": 5.732685297691374e-05, "loss": 0.805, "step": 7034 }, { "epoch": 2.135377143724389, "grad_norm": 0.6589305996894836, "learning_rate": 5.7320777642770355e-05, "loss": 1.4291, "step": 7035 }, { "epoch": 2.1356806799210806, "grad_norm": 0.8158634901046753, "learning_rate": 5.731470230862698e-05, "loss": 1.1064, "step": 7036 }, { "epoch": 2.1359842161177722, "grad_norm": 0.6389902830123901, "learning_rate": 5.73086269744836e-05, "loss": 1.0643, "step": 7037 }, { "epoch": 2.1362877523144634, "grad_norm": 0.6268038749694824, "learning_rate": 5.730255164034022e-05, "loss": 1.4805, "step": 7038 }, { "epoch": 2.136591288511155, "grad_norm": 0.8576473593711853, "learning_rate": 5.7296476306196844e-05, "loss": 1.165, "step": 7039 }, { "epoch": 2.1368948247078463, "grad_norm": 0.7211112976074219, "learning_rate": 5.729040097205347e-05, "loss": 1.1561, "step": 7040 }, { "epoch": 2.137198360904538, "grad_norm": 0.6866903901100159, "learning_rate": 5.7284325637910086e-05, "loss": 1.4048, "step": 7041 }, { "epoch": 2.137501897101229, "grad_norm": 0.6264458894729614, "learning_rate": 5.727825030376671e-05, "loss": 0.8988, "step": 7042 }, { "epoch": 2.137805433297921, "grad_norm": 0.7150564193725586, "learning_rate": 5.727217496962333e-05, "loss": 1.4186, "step": 7043 }, { "epoch": 2.138108969494612, "grad_norm": 0.82100510597229, "learning_rate": 5.726609963547995e-05, "loss": 1.1084, "step": 7044 }, { "epoch": 2.1384125056913037, "grad_norm": 0.5969480276107788, "learning_rate": 5.726002430133658e-05, "loss": 1.1065, "step": 7045 }, { "epoch": 2.138716041887995, "grad_norm": 0.7427048087120056, "learning_rate": 5.725394896719319e-05, "loss": 1.0831, "step": 7046 }, { "epoch": 2.1390195780846866, "grad_norm": 0.6148872375488281, "learning_rate": 5.724787363304982e-05, "loss": 1.2953, "step": 7047 }, { "epoch": 2.1393231142813782, "grad_norm": 0.7812492251396179, "learning_rate": 5.724179829890645e-05, "loss": 1.3676, "step": 7048 }, { "epoch": 2.1396266504780694, "grad_norm": 0.7223173379898071, "learning_rate": 5.723572296476306e-05, "loss": 0.7977, "step": 7049 }, { "epoch": 2.139930186674761, "grad_norm": 0.6859977841377258, "learning_rate": 5.722964763061969e-05, "loss": 1.053, "step": 7050 }, { "epoch": 2.1402337228714523, "grad_norm": 0.7460084557533264, "learning_rate": 5.722357229647631e-05, "loss": 1.2215, "step": 7051 }, { "epoch": 2.140537259068144, "grad_norm": 0.7167086601257324, "learning_rate": 5.721749696233293e-05, "loss": 1.622, "step": 7052 }, { "epoch": 2.140840795264835, "grad_norm": 0.7320702075958252, "learning_rate": 5.7211421628189554e-05, "loss": 1.0462, "step": 7053 }, { "epoch": 2.141144331461527, "grad_norm": 0.6613295078277588, "learning_rate": 5.720534629404618e-05, "loss": 1.0691, "step": 7054 }, { "epoch": 2.141447867658218, "grad_norm": 0.843561589717865, "learning_rate": 5.7199270959902796e-05, "loss": 1.3478, "step": 7055 }, { "epoch": 2.1417514038549097, "grad_norm": 0.5995355248451233, "learning_rate": 5.719319562575942e-05, "loss": 1.2636, "step": 7056 }, { "epoch": 2.1420549400516014, "grad_norm": 0.8956936597824097, "learning_rate": 5.718712029161604e-05, "loss": 0.8077, "step": 7057 }, { "epoch": 2.1423584762482926, "grad_norm": 0.8197022676467896, "learning_rate": 5.718104495747266e-05, "loss": 1.258, "step": 7058 }, { "epoch": 2.142662012444984, "grad_norm": 0.7216955423355103, "learning_rate": 5.717496962332929e-05, "loss": 1.5475, "step": 7059 }, { "epoch": 2.1429655486416754, "grad_norm": 0.7288816571235657, "learning_rate": 5.71688942891859e-05, "loss": 1.1879, "step": 7060 }, { "epoch": 2.143269084838367, "grad_norm": 0.7779216766357422, "learning_rate": 5.7162818955042527e-05, "loss": 1.3337, "step": 7061 }, { "epoch": 2.1435726210350583, "grad_norm": 0.5709745287895203, "learning_rate": 5.715674362089916e-05, "loss": 1.1431, "step": 7062 }, { "epoch": 2.14387615723175, "grad_norm": 0.7075303792953491, "learning_rate": 5.715066828675577e-05, "loss": 1.4308, "step": 7063 }, { "epoch": 2.144179693428441, "grad_norm": 0.7079590559005737, "learning_rate": 5.71445929526124e-05, "loss": 1.4299, "step": 7064 }, { "epoch": 2.144483229625133, "grad_norm": 0.9902227520942688, "learning_rate": 5.713851761846902e-05, "loss": 1.2042, "step": 7065 }, { "epoch": 2.1447867658218245, "grad_norm": 1.1577403545379639, "learning_rate": 5.713244228432564e-05, "loss": 1.3816, "step": 7066 }, { "epoch": 2.1450903020185157, "grad_norm": 0.8484241962432861, "learning_rate": 5.7126366950182264e-05, "loss": 1.3148, "step": 7067 }, { "epoch": 2.1453938382152073, "grad_norm": 0.8071272969245911, "learning_rate": 5.712029161603889e-05, "loss": 1.4303, "step": 7068 }, { "epoch": 2.1456973744118986, "grad_norm": 0.7060108184814453, "learning_rate": 5.7114216281895506e-05, "loss": 1.4284, "step": 7069 }, { "epoch": 2.14600091060859, "grad_norm": 0.6524072289466858, "learning_rate": 5.710814094775213e-05, "loss": 1.6973, "step": 7070 }, { "epoch": 2.1463044468052814, "grad_norm": 0.8758344650268555, "learning_rate": 5.710206561360875e-05, "loss": 1.2572, "step": 7071 }, { "epoch": 2.146607983001973, "grad_norm": 1.1003342866897583, "learning_rate": 5.709599027946537e-05, "loss": 0.5954, "step": 7072 }, { "epoch": 2.1469115191986643, "grad_norm": 0.6540477871894836, "learning_rate": 5.7089914945321995e-05, "loss": 1.3184, "step": 7073 }, { "epoch": 2.147215055395356, "grad_norm": 0.5550004243850708, "learning_rate": 5.708383961117861e-05, "loss": 1.5281, "step": 7074 }, { "epoch": 2.147518591592047, "grad_norm": 0.6814255714416504, "learning_rate": 5.7077764277035237e-05, "loss": 1.3497, "step": 7075 }, { "epoch": 2.147822127788739, "grad_norm": 0.6634551882743835, "learning_rate": 5.707168894289187e-05, "loss": 0.8038, "step": 7076 }, { "epoch": 2.1481256639854305, "grad_norm": 0.6326273083686829, "learning_rate": 5.706561360874848e-05, "loss": 1.3639, "step": 7077 }, { "epoch": 2.1484292001821217, "grad_norm": 0.4899923503398895, "learning_rate": 5.705953827460511e-05, "loss": 1.2843, "step": 7078 }, { "epoch": 2.1487327363788133, "grad_norm": 0.6824162602424622, "learning_rate": 5.705346294046173e-05, "loss": 1.3331, "step": 7079 }, { "epoch": 2.1490362725755046, "grad_norm": 0.6425466537475586, "learning_rate": 5.704738760631835e-05, "loss": 1.1552, "step": 7080 }, { "epoch": 2.149339808772196, "grad_norm": 1.1080219745635986, "learning_rate": 5.7041312272174974e-05, "loss": 1.1281, "step": 7081 }, { "epoch": 2.1496433449688874, "grad_norm": 0.61561518907547, "learning_rate": 5.70352369380316e-05, "loss": 1.5421, "step": 7082 }, { "epoch": 2.149946881165579, "grad_norm": 0.6175718903541565, "learning_rate": 5.7029161603888216e-05, "loss": 1.0581, "step": 7083 }, { "epoch": 2.1502504173622703, "grad_norm": 0.5754169821739197, "learning_rate": 5.702308626974484e-05, "loss": 1.3451, "step": 7084 }, { "epoch": 2.150553953558962, "grad_norm": 0.6784567832946777, "learning_rate": 5.701701093560146e-05, "loss": 1.6735, "step": 7085 }, { "epoch": 2.150857489755653, "grad_norm": 0.7900217771530151, "learning_rate": 5.701093560145808e-05, "loss": 1.2737, "step": 7086 }, { "epoch": 2.151161025952345, "grad_norm": 0.64167720079422, "learning_rate": 5.7004860267314705e-05, "loss": 1.2182, "step": 7087 }, { "epoch": 2.1514645621490365, "grad_norm": 0.7034931778907776, "learning_rate": 5.699878493317132e-05, "loss": 1.2093, "step": 7088 }, { "epoch": 2.1517680983457277, "grad_norm": 0.6175836324691772, "learning_rate": 5.699270959902795e-05, "loss": 1.5437, "step": 7089 }, { "epoch": 2.1520716345424193, "grad_norm": 0.6727813482284546, "learning_rate": 5.698663426488458e-05, "loss": 1.3068, "step": 7090 }, { "epoch": 2.1523751707391106, "grad_norm": 0.754546582698822, "learning_rate": 5.698055893074119e-05, "loss": 1.4687, "step": 7091 }, { "epoch": 2.152678706935802, "grad_norm": 0.7307665348052979, "learning_rate": 5.697448359659782e-05, "loss": 1.0387, "step": 7092 }, { "epoch": 2.1529822431324934, "grad_norm": 0.7365608215332031, "learning_rate": 5.696840826245444e-05, "loss": 0.857, "step": 7093 }, { "epoch": 2.153285779329185, "grad_norm": 0.6369282007217407, "learning_rate": 5.696233292831106e-05, "loss": 1.3908, "step": 7094 }, { "epoch": 2.1535893155258763, "grad_norm": 1.0504459142684937, "learning_rate": 5.6956257594167684e-05, "loss": 1.503, "step": 7095 }, { "epoch": 2.153892851722568, "grad_norm": 0.5821953415870667, "learning_rate": 5.695018226002431e-05, "loss": 1.1134, "step": 7096 }, { "epoch": 2.154196387919259, "grad_norm": 0.7673921585083008, "learning_rate": 5.6944106925880926e-05, "loss": 1.1303, "step": 7097 }, { "epoch": 2.154499924115951, "grad_norm": 0.7406718730926514, "learning_rate": 5.693803159173755e-05, "loss": 1.6383, "step": 7098 }, { "epoch": 2.1548034603126425, "grad_norm": 0.6577976942062378, "learning_rate": 5.693195625759417e-05, "loss": 1.2918, "step": 7099 }, { "epoch": 2.1551069965093337, "grad_norm": 0.7887044548988342, "learning_rate": 5.692588092345079e-05, "loss": 1.0772, "step": 7100 }, { "epoch": 2.1554105327060253, "grad_norm": 0.7450350522994995, "learning_rate": 5.6919805589307415e-05, "loss": 1.4461, "step": 7101 }, { "epoch": 2.1557140689027166, "grad_norm": 0.5293591022491455, "learning_rate": 5.691373025516403e-05, "loss": 1.7682, "step": 7102 }, { "epoch": 2.156017605099408, "grad_norm": 0.9171348214149475, "learning_rate": 5.690765492102066e-05, "loss": 1.2295, "step": 7103 }, { "epoch": 2.1563211412960994, "grad_norm": 0.6998059153556824, "learning_rate": 5.690157958687729e-05, "loss": 1.2531, "step": 7104 }, { "epoch": 2.156624677492791, "grad_norm": 0.7437159419059753, "learning_rate": 5.68955042527339e-05, "loss": 1.3937, "step": 7105 }, { "epoch": 2.1569282136894823, "grad_norm": 0.8054535388946533, "learning_rate": 5.688942891859053e-05, "loss": 1.0773, "step": 7106 }, { "epoch": 2.157231749886174, "grad_norm": 0.7918900847434998, "learning_rate": 5.688335358444715e-05, "loss": 1.3796, "step": 7107 }, { "epoch": 2.157535286082865, "grad_norm": 0.64754319190979, "learning_rate": 5.687727825030377e-05, "loss": 1.0733, "step": 7108 }, { "epoch": 2.157838822279557, "grad_norm": 0.6509382128715515, "learning_rate": 5.6871202916160394e-05, "loss": 0.5026, "step": 7109 }, { "epoch": 2.1581423584762485, "grad_norm": 0.5929359793663025, "learning_rate": 5.6865127582017005e-05, "loss": 0.8078, "step": 7110 }, { "epoch": 2.1584458946729397, "grad_norm": 0.9011654853820801, "learning_rate": 5.6859052247873636e-05, "loss": 1.1668, "step": 7111 }, { "epoch": 2.1587494308696313, "grad_norm": 1.113480567932129, "learning_rate": 5.685297691373026e-05, "loss": 0.9573, "step": 7112 }, { "epoch": 2.1590529670663225, "grad_norm": 0.7953112125396729, "learning_rate": 5.684690157958688e-05, "loss": 1.0673, "step": 7113 }, { "epoch": 2.159356503263014, "grad_norm": 0.8165501952171326, "learning_rate": 5.68408262454435e-05, "loss": 1.4202, "step": 7114 }, { "epoch": 2.1596600394597054, "grad_norm": 0.8039828538894653, "learning_rate": 5.6834750911300125e-05, "loss": 1.5351, "step": 7115 }, { "epoch": 2.159963575656397, "grad_norm": 0.6484586596488953, "learning_rate": 5.682867557715674e-05, "loss": 1.495, "step": 7116 }, { "epoch": 2.1602671118530883, "grad_norm": 0.859879195690155, "learning_rate": 5.682260024301337e-05, "loss": 1.3554, "step": 7117 }, { "epoch": 2.16057064804978, "grad_norm": 0.6827611923217773, "learning_rate": 5.681652490887e-05, "loss": 1.3924, "step": 7118 }, { "epoch": 2.1608741842464716, "grad_norm": 0.6194184422492981, "learning_rate": 5.681044957472661e-05, "loss": 1.3563, "step": 7119 }, { "epoch": 2.161177720443163, "grad_norm": 0.884806215763092, "learning_rate": 5.680437424058324e-05, "loss": 1.2661, "step": 7120 }, { "epoch": 2.1614812566398545, "grad_norm": 0.8571776151657104, "learning_rate": 5.679829890643986e-05, "loss": 1.3268, "step": 7121 }, { "epoch": 2.1617847928365457, "grad_norm": 0.8481334447860718, "learning_rate": 5.6792223572296474e-05, "loss": 1.5147, "step": 7122 }, { "epoch": 2.1620883290332373, "grad_norm": 0.6749445796012878, "learning_rate": 5.6786148238153104e-05, "loss": 0.5943, "step": 7123 }, { "epoch": 2.1623918652299285, "grad_norm": 0.7724722623825073, "learning_rate": 5.6780072904009715e-05, "loss": 1.6409, "step": 7124 }, { "epoch": 2.16269540142662, "grad_norm": 0.6369667053222656, "learning_rate": 5.6773997569866346e-05, "loss": 1.72, "step": 7125 }, { "epoch": 2.1629989376233114, "grad_norm": 0.7980279326438904, "learning_rate": 5.676792223572297e-05, "loss": 1.3303, "step": 7126 }, { "epoch": 2.163302473820003, "grad_norm": 0.6585915684700012, "learning_rate": 5.676184690157959e-05, "loss": 1.5053, "step": 7127 }, { "epoch": 2.1636060100166947, "grad_norm": 0.7717660069465637, "learning_rate": 5.675577156743621e-05, "loss": 1.1307, "step": 7128 }, { "epoch": 2.163909546213386, "grad_norm": 0.6564520001411438, "learning_rate": 5.6749696233292835e-05, "loss": 1.2847, "step": 7129 }, { "epoch": 2.1642130824100776, "grad_norm": 0.7321675419807434, "learning_rate": 5.674362089914945e-05, "loss": 1.3272, "step": 7130 }, { "epoch": 2.164516618606769, "grad_norm": 0.7516568303108215, "learning_rate": 5.673754556500608e-05, "loss": 1.3297, "step": 7131 }, { "epoch": 2.1648201548034605, "grad_norm": 0.7043347954750061, "learning_rate": 5.673147023086271e-05, "loss": 1.6839, "step": 7132 }, { "epoch": 2.1651236910001517, "grad_norm": 0.6395680904388428, "learning_rate": 5.672539489671932e-05, "loss": 1.0484, "step": 7133 }, { "epoch": 2.1654272271968433, "grad_norm": 0.6057548522949219, "learning_rate": 5.671931956257594e-05, "loss": 1.2702, "step": 7134 }, { "epoch": 2.1657307633935345, "grad_norm": 0.8291522860527039, "learning_rate": 5.671324422843257e-05, "loss": 1.0313, "step": 7135 }, { "epoch": 2.166034299590226, "grad_norm": 0.7034058570861816, "learning_rate": 5.6707168894289184e-05, "loss": 1.4155, "step": 7136 }, { "epoch": 2.1663378357869174, "grad_norm": 0.6599259972572327, "learning_rate": 5.6701093560145814e-05, "loss": 1.3979, "step": 7137 }, { "epoch": 2.166641371983609, "grad_norm": 0.7508943676948547, "learning_rate": 5.6695018226002425e-05, "loss": 1.3171, "step": 7138 }, { "epoch": 2.1669449081803007, "grad_norm": 0.544033408164978, "learning_rate": 5.6688942891859056e-05, "loss": 0.8611, "step": 7139 }, { "epoch": 2.167248444376992, "grad_norm": 0.7824786305427551, "learning_rate": 5.668286755771568e-05, "loss": 1.5292, "step": 7140 }, { "epoch": 2.1675519805736836, "grad_norm": 0.8762481212615967, "learning_rate": 5.66767922235723e-05, "loss": 1.2404, "step": 7141 }, { "epoch": 2.167855516770375, "grad_norm": 0.7257769703865051, "learning_rate": 5.667071688942892e-05, "loss": 0.7228, "step": 7142 }, { "epoch": 2.1681590529670665, "grad_norm": 0.9814961552619934, "learning_rate": 5.6664641555285545e-05, "loss": 0.9963, "step": 7143 }, { "epoch": 2.1684625891637577, "grad_norm": 0.851203441619873, "learning_rate": 5.665856622114216e-05, "loss": 1.1862, "step": 7144 }, { "epoch": 2.1687661253604493, "grad_norm": 0.6610409021377563, "learning_rate": 5.665249088699879e-05, "loss": 1.2907, "step": 7145 }, { "epoch": 2.1690696615571405, "grad_norm": 0.6841842532157898, "learning_rate": 5.664641555285541e-05, "loss": 1.7986, "step": 7146 }, { "epoch": 2.169373197753832, "grad_norm": 0.7544435262680054, "learning_rate": 5.664034021871203e-05, "loss": 1.4018, "step": 7147 }, { "epoch": 2.1696767339505234, "grad_norm": 0.7136738300323486, "learning_rate": 5.663426488456865e-05, "loss": 1.3775, "step": 7148 }, { "epoch": 2.169980270147215, "grad_norm": 1.1160085201263428, "learning_rate": 5.662818955042528e-05, "loss": 0.9446, "step": 7149 }, { "epoch": 2.1702838063439067, "grad_norm": 0.7214122414588928, "learning_rate": 5.6622114216281894e-05, "loss": 1.4815, "step": 7150 }, { "epoch": 2.170587342540598, "grad_norm": 0.6770926117897034, "learning_rate": 5.6616038882138525e-05, "loss": 1.6, "step": 7151 }, { "epoch": 2.1708908787372896, "grad_norm": 0.6858698129653931, "learning_rate": 5.6609963547995135e-05, "loss": 1.3258, "step": 7152 }, { "epoch": 2.171194414933981, "grad_norm": 0.755990743637085, "learning_rate": 5.6603888213851766e-05, "loss": 1.0352, "step": 7153 }, { "epoch": 2.1714979511306725, "grad_norm": 0.628933310508728, "learning_rate": 5.659781287970839e-05, "loss": 0.8605, "step": 7154 }, { "epoch": 2.1718014873273637, "grad_norm": 0.631551206111908, "learning_rate": 5.659173754556501e-05, "loss": 1.3337, "step": 7155 }, { "epoch": 2.1721050235240553, "grad_norm": 0.8183661103248596, "learning_rate": 5.658566221142163e-05, "loss": 1.3968, "step": 7156 }, { "epoch": 2.1724085597207465, "grad_norm": 0.5968457460403442, "learning_rate": 5.6579586877278255e-05, "loss": 1.587, "step": 7157 }, { "epoch": 2.172712095917438, "grad_norm": 0.7125000953674316, "learning_rate": 5.657351154313487e-05, "loss": 1.5769, "step": 7158 }, { "epoch": 2.1730156321141294, "grad_norm": 0.8173585534095764, "learning_rate": 5.65674362089915e-05, "loss": 1.4033, "step": 7159 }, { "epoch": 2.173319168310821, "grad_norm": 0.7674162983894348, "learning_rate": 5.656136087484812e-05, "loss": 0.765, "step": 7160 }, { "epoch": 2.1736227045075127, "grad_norm": 0.6227114200592041, "learning_rate": 5.655528554070474e-05, "loss": 1.4455, "step": 7161 }, { "epoch": 2.173926240704204, "grad_norm": 0.7266408205032349, "learning_rate": 5.654921020656136e-05, "loss": 1.3391, "step": 7162 }, { "epoch": 2.1742297769008956, "grad_norm": 0.6072399020195007, "learning_rate": 5.654313487241799e-05, "loss": 1.7996, "step": 7163 }, { "epoch": 2.174533313097587, "grad_norm": 0.7643356323242188, "learning_rate": 5.6537059538274604e-05, "loss": 1.3845, "step": 7164 }, { "epoch": 2.1748368492942785, "grad_norm": 0.8032098412513733, "learning_rate": 5.6530984204131235e-05, "loss": 1.3607, "step": 7165 }, { "epoch": 2.1751403854909697, "grad_norm": 0.6203994750976562, "learning_rate": 5.6524908869987845e-05, "loss": 1.1317, "step": 7166 }, { "epoch": 2.1754439216876613, "grad_norm": 0.7892910838127136, "learning_rate": 5.6518833535844476e-05, "loss": 1.2879, "step": 7167 }, { "epoch": 2.1757474578843525, "grad_norm": 0.5828709006309509, "learning_rate": 5.65127582017011e-05, "loss": 1.5039, "step": 7168 }, { "epoch": 2.176050994081044, "grad_norm": 0.6579250693321228, "learning_rate": 5.650668286755772e-05, "loss": 1.483, "step": 7169 }, { "epoch": 2.1763545302777354, "grad_norm": 0.7929391860961914, "learning_rate": 5.650060753341434e-05, "loss": 1.4478, "step": 7170 }, { "epoch": 2.176658066474427, "grad_norm": 0.7163347601890564, "learning_rate": 5.6494532199270966e-05, "loss": 0.7903, "step": 7171 }, { "epoch": 2.1769616026711187, "grad_norm": 0.6493278741836548, "learning_rate": 5.648845686512758e-05, "loss": 1.1142, "step": 7172 }, { "epoch": 2.17726513886781, "grad_norm": 0.659180760383606, "learning_rate": 5.648238153098421e-05, "loss": 1.2759, "step": 7173 }, { "epoch": 2.1775686750645016, "grad_norm": 0.6536158323287964, "learning_rate": 5.647630619684083e-05, "loss": 1.51, "step": 7174 }, { "epoch": 2.177872211261193, "grad_norm": 0.8379760980606079, "learning_rate": 5.647023086269745e-05, "loss": 0.8592, "step": 7175 }, { "epoch": 2.1781757474578844, "grad_norm": 0.6624657511711121, "learning_rate": 5.646415552855407e-05, "loss": 1.5387, "step": 7176 }, { "epoch": 2.1784792836545757, "grad_norm": 0.8176611065864563, "learning_rate": 5.64580801944107e-05, "loss": 0.8141, "step": 7177 }, { "epoch": 2.1787828198512673, "grad_norm": 0.8894045948982239, "learning_rate": 5.6452004860267314e-05, "loss": 1.3116, "step": 7178 }, { "epoch": 2.1790863560479585, "grad_norm": 0.8644475936889648, "learning_rate": 5.6445929526123945e-05, "loss": 0.9116, "step": 7179 }, { "epoch": 2.17938989224465, "grad_norm": 0.8027042746543884, "learning_rate": 5.6439854191980555e-05, "loss": 1.3788, "step": 7180 }, { "epoch": 2.1796934284413414, "grad_norm": 0.6205067038536072, "learning_rate": 5.6433778857837186e-05, "loss": 0.9615, "step": 7181 }, { "epoch": 2.179996964638033, "grad_norm": 0.9785541296005249, "learning_rate": 5.642770352369381e-05, "loss": 1.2723, "step": 7182 }, { "epoch": 2.1803005008347247, "grad_norm": 0.541207492351532, "learning_rate": 5.642162818955042e-05, "loss": 0.9423, "step": 7183 }, { "epoch": 2.180604037031416, "grad_norm": 0.7655600905418396, "learning_rate": 5.641555285540705e-05, "loss": 1.088, "step": 7184 }, { "epoch": 2.1809075732281076, "grad_norm": 0.575629711151123, "learning_rate": 5.6409477521263676e-05, "loss": 1.0602, "step": 7185 }, { "epoch": 2.181211109424799, "grad_norm": 0.6803514957427979, "learning_rate": 5.640340218712029e-05, "loss": 1.141, "step": 7186 }, { "epoch": 2.1815146456214904, "grad_norm": 0.681253969669342, "learning_rate": 5.639732685297692e-05, "loss": 1.3516, "step": 7187 }, { "epoch": 2.1818181818181817, "grad_norm": 0.6234375238418579, "learning_rate": 5.639125151883354e-05, "loss": 1.3573, "step": 7188 }, { "epoch": 2.1821217180148733, "grad_norm": 0.809392511844635, "learning_rate": 5.638517618469016e-05, "loss": 1.3348, "step": 7189 }, { "epoch": 2.1824252542115645, "grad_norm": 0.6876418590545654, "learning_rate": 5.637910085054678e-05, "loss": 0.8274, "step": 7190 }, { "epoch": 2.182728790408256, "grad_norm": 0.7089405059814453, "learning_rate": 5.63730255164034e-05, "loss": 1.1393, "step": 7191 }, { "epoch": 2.183032326604948, "grad_norm": 0.6622164249420166, "learning_rate": 5.6366950182260024e-05, "loss": 1.2418, "step": 7192 }, { "epoch": 2.183335862801639, "grad_norm": 0.7589064836502075, "learning_rate": 5.6360874848116655e-05, "loss": 1.2326, "step": 7193 }, { "epoch": 2.1836393989983307, "grad_norm": 0.8332014679908752, "learning_rate": 5.6354799513973265e-05, "loss": 1.2957, "step": 7194 }, { "epoch": 2.183942935195022, "grad_norm": 0.7106983661651611, "learning_rate": 5.634872417982989e-05, "loss": 1.3794, "step": 7195 }, { "epoch": 2.1842464713917136, "grad_norm": 0.7226594090461731, "learning_rate": 5.634264884568652e-05, "loss": 1.674, "step": 7196 }, { "epoch": 2.184550007588405, "grad_norm": 0.7651084065437317, "learning_rate": 5.633657351154313e-05, "loss": 1.101, "step": 7197 }, { "epoch": 2.1848535437850964, "grad_norm": 0.9110549688339233, "learning_rate": 5.633049817739976e-05, "loss": 0.7792, "step": 7198 }, { "epoch": 2.1851570799817877, "grad_norm": 0.7132554650306702, "learning_rate": 5.6324422843256386e-05, "loss": 1.5932, "step": 7199 }, { "epoch": 2.1854606161784793, "grad_norm": 0.7886115908622742, "learning_rate": 5.6318347509113e-05, "loss": 1.1547, "step": 7200 }, { "epoch": 2.185764152375171, "grad_norm": 0.6051173806190491, "learning_rate": 5.631227217496963e-05, "loss": 1.2831, "step": 7201 }, { "epoch": 2.186067688571862, "grad_norm": 0.815495491027832, "learning_rate": 5.630619684082625e-05, "loss": 0.4056, "step": 7202 }, { "epoch": 2.186371224768554, "grad_norm": 1.083448886871338, "learning_rate": 5.630012150668287e-05, "loss": 1.3255, "step": 7203 }, { "epoch": 2.186674760965245, "grad_norm": 0.8830431699752808, "learning_rate": 5.629404617253949e-05, "loss": 0.4751, "step": 7204 }, { "epoch": 2.1869782971619367, "grad_norm": 0.7775143384933472, "learning_rate": 5.628797083839611e-05, "loss": 1.4157, "step": 7205 }, { "epoch": 2.187281833358628, "grad_norm": 0.6572606563568115, "learning_rate": 5.6281895504252734e-05, "loss": 1.7001, "step": 7206 }, { "epoch": 2.1875853695553196, "grad_norm": 0.7444910407066345, "learning_rate": 5.627582017010936e-05, "loss": 1.4273, "step": 7207 }, { "epoch": 2.187888905752011, "grad_norm": 0.8146635293960571, "learning_rate": 5.6269744835965975e-05, "loss": 1.5225, "step": 7208 }, { "epoch": 2.1881924419487024, "grad_norm": 0.7989526391029358, "learning_rate": 5.62636695018226e-05, "loss": 1.3059, "step": 7209 }, { "epoch": 2.1884959781453937, "grad_norm": 0.7003793716430664, "learning_rate": 5.625759416767923e-05, "loss": 1.2017, "step": 7210 }, { "epoch": 2.1887995143420853, "grad_norm": 0.7632039785385132, "learning_rate": 5.625151883353584e-05, "loss": 1.5865, "step": 7211 }, { "epoch": 2.189103050538777, "grad_norm": 0.8134432435035706, "learning_rate": 5.624544349939247e-05, "loss": 1.3355, "step": 7212 }, { "epoch": 2.189406586735468, "grad_norm": 0.7677670121192932, "learning_rate": 5.6239368165249096e-05, "loss": 1.4682, "step": 7213 }, { "epoch": 2.18971012293216, "grad_norm": 0.6580960750579834, "learning_rate": 5.623329283110571e-05, "loss": 1.4944, "step": 7214 }, { "epoch": 2.190013659128851, "grad_norm": 0.5932350158691406, "learning_rate": 5.622721749696234e-05, "loss": 1.3135, "step": 7215 }, { "epoch": 2.1903171953255427, "grad_norm": 0.6935281157493591, "learning_rate": 5.622114216281896e-05, "loss": 1.4697, "step": 7216 }, { "epoch": 2.190620731522234, "grad_norm": 0.6523483395576477, "learning_rate": 5.621506682867558e-05, "loss": 1.4928, "step": 7217 }, { "epoch": 2.1909242677189256, "grad_norm": 0.6458442211151123, "learning_rate": 5.62089914945322e-05, "loss": 1.5989, "step": 7218 }, { "epoch": 2.191227803915617, "grad_norm": 0.8362550139427185, "learning_rate": 5.620291616038882e-05, "loss": 1.3478, "step": 7219 }, { "epoch": 2.1915313401123084, "grad_norm": 0.6641613245010376, "learning_rate": 5.6196840826245444e-05, "loss": 1.3929, "step": 7220 }, { "epoch": 2.1918348763089996, "grad_norm": 0.8658133149147034, "learning_rate": 5.619076549210207e-05, "loss": 1.3856, "step": 7221 }, { "epoch": 2.1921384125056913, "grad_norm": 0.8276031017303467, "learning_rate": 5.6184690157958685e-05, "loss": 1.1622, "step": 7222 }, { "epoch": 2.192441948702383, "grad_norm": 0.7149022221565247, "learning_rate": 5.617861482381531e-05, "loss": 1.1412, "step": 7223 }, { "epoch": 2.192745484899074, "grad_norm": 0.6524426937103271, "learning_rate": 5.617253948967194e-05, "loss": 1.482, "step": 7224 }, { "epoch": 2.193049021095766, "grad_norm": 0.6739472150802612, "learning_rate": 5.616646415552855e-05, "loss": 1.585, "step": 7225 }, { "epoch": 2.193352557292457, "grad_norm": 0.7816546559333801, "learning_rate": 5.616038882138518e-05, "loss": 0.8333, "step": 7226 }, { "epoch": 2.1936560934891487, "grad_norm": 0.639371395111084, "learning_rate": 5.6154313487241806e-05, "loss": 1.5572, "step": 7227 }, { "epoch": 2.19395962968584, "grad_norm": 0.860651969909668, "learning_rate": 5.614823815309842e-05, "loss": 1.3584, "step": 7228 }, { "epoch": 2.1942631658825316, "grad_norm": 0.8631687760353088, "learning_rate": 5.614216281895505e-05, "loss": 1.2089, "step": 7229 }, { "epoch": 2.1945667020792228, "grad_norm": 0.7528846263885498, "learning_rate": 5.613608748481167e-05, "loss": 1.1223, "step": 7230 }, { "epoch": 2.1948702382759144, "grad_norm": 0.6682392954826355, "learning_rate": 5.613001215066829e-05, "loss": 0.9837, "step": 7231 }, { "epoch": 2.1951737744726056, "grad_norm": 0.8789331912994385, "learning_rate": 5.612393681652491e-05, "loss": 0.9464, "step": 7232 }, { "epoch": 2.1954773106692973, "grad_norm": 0.8156276941299438, "learning_rate": 5.611786148238153e-05, "loss": 1.3571, "step": 7233 }, { "epoch": 2.195780846865989, "grad_norm": 0.9034867882728577, "learning_rate": 5.6111786148238154e-05, "loss": 1.4531, "step": 7234 }, { "epoch": 2.19608438306268, "grad_norm": 0.773789644241333, "learning_rate": 5.610571081409478e-05, "loss": 1.2396, "step": 7235 }, { "epoch": 2.196387919259372, "grad_norm": 0.753548800945282, "learning_rate": 5.6099635479951395e-05, "loss": 1.3218, "step": 7236 }, { "epoch": 2.196691455456063, "grad_norm": 0.8552380800247192, "learning_rate": 5.609356014580802e-05, "loss": 1.1841, "step": 7237 }, { "epoch": 2.1969949916527547, "grad_norm": 0.7362204790115356, "learning_rate": 5.608748481166465e-05, "loss": 1.7553, "step": 7238 }, { "epoch": 2.197298527849446, "grad_norm": 0.7145305275917053, "learning_rate": 5.608140947752126e-05, "loss": 0.7013, "step": 7239 }, { "epoch": 2.1976020640461376, "grad_norm": 0.8223626613616943, "learning_rate": 5.607533414337789e-05, "loss": 0.9573, "step": 7240 }, { "epoch": 2.1979056002428288, "grad_norm": 0.6958496570587158, "learning_rate": 5.6069258809234516e-05, "loss": 1.1011, "step": 7241 }, { "epoch": 2.1982091364395204, "grad_norm": 0.7131595611572266, "learning_rate": 5.606318347509113e-05, "loss": 0.9728, "step": 7242 }, { "epoch": 2.1985126726362116, "grad_norm": 0.8090917468070984, "learning_rate": 5.605710814094776e-05, "loss": 1.2979, "step": 7243 }, { "epoch": 2.1988162088329033, "grad_norm": 0.7696943879127502, "learning_rate": 5.605103280680438e-05, "loss": 1.4695, "step": 7244 }, { "epoch": 2.199119745029595, "grad_norm": 0.759185791015625, "learning_rate": 5.6044957472661e-05, "loss": 1.2634, "step": 7245 }, { "epoch": 2.199423281226286, "grad_norm": 0.7122607827186584, "learning_rate": 5.603888213851762e-05, "loss": 1.5165, "step": 7246 }, { "epoch": 2.199726817422978, "grad_norm": 0.8475228548049927, "learning_rate": 5.603280680437424e-05, "loss": 0.8859, "step": 7247 }, { "epoch": 2.200030353619669, "grad_norm": 0.8220751285552979, "learning_rate": 5.6026731470230864e-05, "loss": 1.274, "step": 7248 }, { "epoch": 2.2003338898163607, "grad_norm": 0.5795495510101318, "learning_rate": 5.602065613608749e-05, "loss": 1.6139, "step": 7249 }, { "epoch": 2.200637426013052, "grad_norm": 0.7077553272247314, "learning_rate": 5.6014580801944105e-05, "loss": 1.5815, "step": 7250 }, { "epoch": 2.2009409622097436, "grad_norm": 0.7636668682098389, "learning_rate": 5.600850546780073e-05, "loss": 1.2543, "step": 7251 }, { "epoch": 2.2012444984064348, "grad_norm": 0.8725208640098572, "learning_rate": 5.600243013365736e-05, "loss": 0.8778, "step": 7252 }, { "epoch": 2.2015480346031264, "grad_norm": 0.8233357071876526, "learning_rate": 5.599635479951397e-05, "loss": 1.4274, "step": 7253 }, { "epoch": 2.201851570799818, "grad_norm": 0.737484872341156, "learning_rate": 5.59902794653706e-05, "loss": 0.9487, "step": 7254 }, { "epoch": 2.2021551069965093, "grad_norm": 0.7442474961280823, "learning_rate": 5.5984204131227226e-05, "loss": 1.5195, "step": 7255 }, { "epoch": 2.202458643193201, "grad_norm": 0.6351808309555054, "learning_rate": 5.5978128797083836e-05, "loss": 1.7236, "step": 7256 }, { "epoch": 2.202762179389892, "grad_norm": 1.272067666053772, "learning_rate": 5.597205346294047e-05, "loss": 1.1462, "step": 7257 }, { "epoch": 2.203065715586584, "grad_norm": 0.8174318075180054, "learning_rate": 5.596597812879709e-05, "loss": 1.4007, "step": 7258 }, { "epoch": 2.203369251783275, "grad_norm": 0.7911064028739929, "learning_rate": 5.595990279465371e-05, "loss": 1.4344, "step": 7259 }, { "epoch": 2.2036727879799667, "grad_norm": 0.5833910703659058, "learning_rate": 5.595382746051033e-05, "loss": 1.2447, "step": 7260 }, { "epoch": 2.203976324176658, "grad_norm": 0.793631374835968, "learning_rate": 5.594775212636695e-05, "loss": 0.8962, "step": 7261 }, { "epoch": 2.2042798603733496, "grad_norm": 0.8431137204170227, "learning_rate": 5.5941676792223574e-05, "loss": 1.0257, "step": 7262 }, { "epoch": 2.204583396570041, "grad_norm": 0.9787790775299072, "learning_rate": 5.59356014580802e-05, "loss": 1.5493, "step": 7263 }, { "epoch": 2.2048869327667324, "grad_norm": 0.7261353135108948, "learning_rate": 5.5929526123936815e-05, "loss": 0.7474, "step": 7264 }, { "epoch": 2.205190468963424, "grad_norm": 0.9692999124526978, "learning_rate": 5.592345078979344e-05, "loss": 1.408, "step": 7265 }, { "epoch": 2.2054940051601153, "grad_norm": 0.5326679348945618, "learning_rate": 5.591737545565007e-05, "loss": 1.5479, "step": 7266 }, { "epoch": 2.205797541356807, "grad_norm": 0.8038820028305054, "learning_rate": 5.591130012150668e-05, "loss": 1.2581, "step": 7267 }, { "epoch": 2.206101077553498, "grad_norm": 0.6147705912590027, "learning_rate": 5.5905224787363305e-05, "loss": 1.0921, "step": 7268 }, { "epoch": 2.20640461375019, "grad_norm": 0.8090271949768066, "learning_rate": 5.5899149453219936e-05, "loss": 0.9203, "step": 7269 }, { "epoch": 2.206708149946881, "grad_norm": 0.6505485773086548, "learning_rate": 5.5893074119076546e-05, "loss": 1.2222, "step": 7270 }, { "epoch": 2.2070116861435727, "grad_norm": 0.6681842803955078, "learning_rate": 5.588699878493318e-05, "loss": 1.3745, "step": 7271 }, { "epoch": 2.207315222340264, "grad_norm": 0.6756628751754761, "learning_rate": 5.58809234507898e-05, "loss": 1.2509, "step": 7272 }, { "epoch": 2.2076187585369555, "grad_norm": 1.3730113506317139, "learning_rate": 5.587484811664642e-05, "loss": 0.9545, "step": 7273 }, { "epoch": 2.207922294733647, "grad_norm": 0.7486765384674072, "learning_rate": 5.586877278250304e-05, "loss": 1.2173, "step": 7274 }, { "epoch": 2.2082258309303384, "grad_norm": 0.7601925730705261, "learning_rate": 5.586269744835966e-05, "loss": 0.8298, "step": 7275 }, { "epoch": 2.20852936712703, "grad_norm": 1.0399149656295776, "learning_rate": 5.5856622114216284e-05, "loss": 1.4147, "step": 7276 }, { "epoch": 2.2088329033237213, "grad_norm": 0.5739341974258423, "learning_rate": 5.585054678007291e-05, "loss": 1.2772, "step": 7277 }, { "epoch": 2.209136439520413, "grad_norm": 0.6940932273864746, "learning_rate": 5.5844471445929526e-05, "loss": 1.1425, "step": 7278 }, { "epoch": 2.209439975717104, "grad_norm": 0.7629711627960205, "learning_rate": 5.583839611178615e-05, "loss": 1.2434, "step": 7279 }, { "epoch": 2.209743511913796, "grad_norm": 0.7137802839279175, "learning_rate": 5.5832320777642774e-05, "loss": 1.4219, "step": 7280 }, { "epoch": 2.210047048110487, "grad_norm": 0.7725697159767151, "learning_rate": 5.582624544349939e-05, "loss": 1.6238, "step": 7281 }, { "epoch": 2.2103505843071787, "grad_norm": 0.7074224352836609, "learning_rate": 5.5820170109356015e-05, "loss": 1.2055, "step": 7282 }, { "epoch": 2.21065412050387, "grad_norm": 0.7771949172019958, "learning_rate": 5.5814094775212646e-05, "loss": 1.2759, "step": 7283 }, { "epoch": 2.2109576567005615, "grad_norm": 0.9702246189117432, "learning_rate": 5.5808019441069256e-05, "loss": 1.16, "step": 7284 }, { "epoch": 2.211261192897253, "grad_norm": 1.0390164852142334, "learning_rate": 5.580194410692589e-05, "loss": 1.6189, "step": 7285 }, { "epoch": 2.2115647290939444, "grad_norm": 0.7810035943984985, "learning_rate": 5.57958687727825e-05, "loss": 1.3802, "step": 7286 }, { "epoch": 2.211868265290636, "grad_norm": 0.6858797669410706, "learning_rate": 5.578979343863913e-05, "loss": 1.0308, "step": 7287 }, { "epoch": 2.2121718014873273, "grad_norm": 0.8140575289726257, "learning_rate": 5.578371810449575e-05, "loss": 1.175, "step": 7288 }, { "epoch": 2.212475337684019, "grad_norm": 0.7919504642486572, "learning_rate": 5.577764277035237e-05, "loss": 1.1394, "step": 7289 }, { "epoch": 2.21277887388071, "grad_norm": 0.6557730436325073, "learning_rate": 5.5771567436208994e-05, "loss": 1.4748, "step": 7290 }, { "epoch": 2.213082410077402, "grad_norm": 0.8063220977783203, "learning_rate": 5.576549210206562e-05, "loss": 1.4965, "step": 7291 }, { "epoch": 2.213385946274093, "grad_norm": 0.8114713430404663, "learning_rate": 5.5759416767922236e-05, "loss": 1.2698, "step": 7292 }, { "epoch": 2.2136894824707847, "grad_norm": 0.7607464790344238, "learning_rate": 5.575334143377886e-05, "loss": 1.6277, "step": 7293 }, { "epoch": 2.213993018667476, "grad_norm": 0.8086090683937073, "learning_rate": 5.5747266099635484e-05, "loss": 1.1822, "step": 7294 }, { "epoch": 2.2142965548641675, "grad_norm": 0.6083495616912842, "learning_rate": 5.57411907654921e-05, "loss": 1.6789, "step": 7295 }, { "epoch": 2.214600091060859, "grad_norm": 0.6319407820701599, "learning_rate": 5.5735115431348725e-05, "loss": 1.0977, "step": 7296 }, { "epoch": 2.2149036272575504, "grad_norm": 0.7058820724487305, "learning_rate": 5.5729040097205356e-05, "loss": 1.5291, "step": 7297 }, { "epoch": 2.215207163454242, "grad_norm": 0.7968228459358215, "learning_rate": 5.5722964763061967e-05, "loss": 1.2518, "step": 7298 }, { "epoch": 2.2155106996509333, "grad_norm": 0.8061215281486511, "learning_rate": 5.57168894289186e-05, "loss": 1.2453, "step": 7299 }, { "epoch": 2.215814235847625, "grad_norm": 0.8020003437995911, "learning_rate": 5.571081409477521e-05, "loss": 1.4674, "step": 7300 }, { "epoch": 2.216117772044316, "grad_norm": 0.8562387824058533, "learning_rate": 5.570473876063184e-05, "loss": 0.918, "step": 7301 }, { "epoch": 2.216421308241008, "grad_norm": 0.5621716976165771, "learning_rate": 5.569866342648846e-05, "loss": 1.2054, "step": 7302 }, { "epoch": 2.216724844437699, "grad_norm": 0.5586440563201904, "learning_rate": 5.569258809234508e-05, "loss": 1.2159, "step": 7303 }, { "epoch": 2.2170283806343907, "grad_norm": 0.6557493805885315, "learning_rate": 5.5686512758201704e-05, "loss": 1.1882, "step": 7304 }, { "epoch": 2.217331916831082, "grad_norm": 0.71869957447052, "learning_rate": 5.568043742405833e-05, "loss": 1.2973, "step": 7305 }, { "epoch": 2.2176354530277735, "grad_norm": 0.5254454016685486, "learning_rate": 5.5674362089914946e-05, "loss": 1.6473, "step": 7306 }, { "epoch": 2.217938989224465, "grad_norm": 0.626685380935669, "learning_rate": 5.566828675577157e-05, "loss": 1.5708, "step": 7307 }, { "epoch": 2.2182425254211564, "grad_norm": 0.7227777242660522, "learning_rate": 5.5662211421628194e-05, "loss": 1.3069, "step": 7308 }, { "epoch": 2.218546061617848, "grad_norm": 0.7912486791610718, "learning_rate": 5.565613608748481e-05, "loss": 1.3601, "step": 7309 }, { "epoch": 2.2188495978145393, "grad_norm": 0.9218525886535645, "learning_rate": 5.5650060753341435e-05, "loss": 0.9959, "step": 7310 }, { "epoch": 2.219153134011231, "grad_norm": 0.5446337461471558, "learning_rate": 5.5643985419198066e-05, "loss": 1.0407, "step": 7311 }, { "epoch": 2.219456670207922, "grad_norm": 0.7623136639595032, "learning_rate": 5.5637910085054677e-05, "loss": 1.6247, "step": 7312 }, { "epoch": 2.219760206404614, "grad_norm": 0.4917345643043518, "learning_rate": 5.563183475091131e-05, "loss": 1.0657, "step": 7313 }, { "epoch": 2.220063742601305, "grad_norm": 0.7134093046188354, "learning_rate": 5.562575941676792e-05, "loss": 1.1832, "step": 7314 }, { "epoch": 2.2203672787979967, "grad_norm": 0.9278779029846191, "learning_rate": 5.561968408262455e-05, "loss": 1.0206, "step": 7315 }, { "epoch": 2.2206708149946883, "grad_norm": 0.6090062260627747, "learning_rate": 5.561360874848117e-05, "loss": 1.5232, "step": 7316 }, { "epoch": 2.2209743511913795, "grad_norm": 0.7449427247047424, "learning_rate": 5.560753341433779e-05, "loss": 1.4115, "step": 7317 }, { "epoch": 2.221277887388071, "grad_norm": 0.735029399394989, "learning_rate": 5.5601458080194414e-05, "loss": 1.7686, "step": 7318 }, { "epoch": 2.2215814235847624, "grad_norm": 0.7010538578033447, "learning_rate": 5.559538274605104e-05, "loss": 1.6045, "step": 7319 }, { "epoch": 2.221884959781454, "grad_norm": 0.6299903988838196, "learning_rate": 5.5589307411907656e-05, "loss": 1.4633, "step": 7320 }, { "epoch": 2.2221884959781453, "grad_norm": 0.81271892786026, "learning_rate": 5.558323207776428e-05, "loss": 1.116, "step": 7321 }, { "epoch": 2.222492032174837, "grad_norm": 0.6342976689338684, "learning_rate": 5.5577156743620904e-05, "loss": 1.1627, "step": 7322 }, { "epoch": 2.222795568371528, "grad_norm": 0.6368468999862671, "learning_rate": 5.557108140947752e-05, "loss": 1.6084, "step": 7323 }, { "epoch": 2.22309910456822, "grad_norm": 0.9048125743865967, "learning_rate": 5.5565006075334145e-05, "loss": 1.2591, "step": 7324 }, { "epoch": 2.223402640764911, "grad_norm": 0.684788167476654, "learning_rate": 5.5558930741190776e-05, "loss": 0.9835, "step": 7325 }, { "epoch": 2.2237061769616027, "grad_norm": 0.7801753878593445, "learning_rate": 5.555285540704739e-05, "loss": 1.4924, "step": 7326 }, { "epoch": 2.2240097131582943, "grad_norm": 0.7492843866348267, "learning_rate": 5.554678007290402e-05, "loss": 1.2899, "step": 7327 }, { "epoch": 2.2243132493549855, "grad_norm": 0.587412416934967, "learning_rate": 5.554070473876063e-05, "loss": 1.386, "step": 7328 }, { "epoch": 2.224616785551677, "grad_norm": 0.8866457939147949, "learning_rate": 5.553462940461726e-05, "loss": 1.4909, "step": 7329 }, { "epoch": 2.2249203217483684, "grad_norm": 0.7808331847190857, "learning_rate": 5.552855407047388e-05, "loss": 1.3854, "step": 7330 }, { "epoch": 2.22522385794506, "grad_norm": 0.635968029499054, "learning_rate": 5.5522478736330493e-05, "loss": 1.3924, "step": 7331 }, { "epoch": 2.2255273941417513, "grad_norm": 0.7001415491104126, "learning_rate": 5.5516403402187124e-05, "loss": 0.9052, "step": 7332 }, { "epoch": 2.225830930338443, "grad_norm": 0.7402334213256836, "learning_rate": 5.551032806804375e-05, "loss": 1.4398, "step": 7333 }, { "epoch": 2.226134466535134, "grad_norm": 0.5318384766578674, "learning_rate": 5.5504252733900366e-05, "loss": 1.1877, "step": 7334 }, { "epoch": 2.226438002731826, "grad_norm": 0.7070531845092773, "learning_rate": 5.549817739975699e-05, "loss": 1.4545, "step": 7335 }, { "epoch": 2.2267415389285174, "grad_norm": 0.7260397672653198, "learning_rate": 5.5492102065613614e-05, "loss": 1.4061, "step": 7336 }, { "epoch": 2.2270450751252087, "grad_norm": 0.6360136270523071, "learning_rate": 5.548602673147023e-05, "loss": 1.7614, "step": 7337 }, { "epoch": 2.2273486113219003, "grad_norm": 0.6618418097496033, "learning_rate": 5.5479951397326855e-05, "loss": 1.0696, "step": 7338 }, { "epoch": 2.2276521475185915, "grad_norm": 0.8529426455497742, "learning_rate": 5.5473876063183486e-05, "loss": 1.4114, "step": 7339 }, { "epoch": 2.227955683715283, "grad_norm": 0.7641217112541199, "learning_rate": 5.54678007290401e-05, "loss": 1.3159, "step": 7340 }, { "epoch": 2.2282592199119744, "grad_norm": 0.7829504609107971, "learning_rate": 5.546172539489672e-05, "loss": 1.352, "step": 7341 }, { "epoch": 2.228562756108666, "grad_norm": 0.7978317737579346, "learning_rate": 5.545565006075334e-05, "loss": 1.3526, "step": 7342 }, { "epoch": 2.2288662923053573, "grad_norm": 0.8331364393234253, "learning_rate": 5.544957472660996e-05, "loss": 1.2954, "step": 7343 }, { "epoch": 2.229169828502049, "grad_norm": 0.7829442620277405, "learning_rate": 5.544349939246659e-05, "loss": 1.3813, "step": 7344 }, { "epoch": 2.22947336469874, "grad_norm": 0.6786047220230103, "learning_rate": 5.5437424058323204e-05, "loss": 1.159, "step": 7345 }, { "epoch": 2.229776900895432, "grad_norm": 0.8328139781951904, "learning_rate": 5.5431348724179834e-05, "loss": 1.5405, "step": 7346 }, { "epoch": 2.2300804370921234, "grad_norm": 0.49961405992507935, "learning_rate": 5.542527339003646e-05, "loss": 1.5272, "step": 7347 }, { "epoch": 2.2303839732888147, "grad_norm": 0.8572553992271423, "learning_rate": 5.5419198055893076e-05, "loss": 1.3987, "step": 7348 }, { "epoch": 2.2306875094855063, "grad_norm": 0.6319640278816223, "learning_rate": 5.54131227217497e-05, "loss": 1.242, "step": 7349 }, { "epoch": 2.2309910456821975, "grad_norm": 0.617652416229248, "learning_rate": 5.5407047387606324e-05, "loss": 1.6851, "step": 7350 }, { "epoch": 2.231294581878889, "grad_norm": 0.6495313048362732, "learning_rate": 5.540097205346294e-05, "loss": 1.487, "step": 7351 }, { "epoch": 2.2315981180755804, "grad_norm": 0.9778041839599609, "learning_rate": 5.5394896719319565e-05, "loss": 1.4783, "step": 7352 }, { "epoch": 2.231901654272272, "grad_norm": 0.9432831406593323, "learning_rate": 5.538882138517619e-05, "loss": 0.8277, "step": 7353 }, { "epoch": 2.2322051904689633, "grad_norm": 0.8032881021499634, "learning_rate": 5.538274605103281e-05, "loss": 1.3029, "step": 7354 }, { "epoch": 2.232508726665655, "grad_norm": 0.5441564917564392, "learning_rate": 5.537667071688943e-05, "loss": 1.4384, "step": 7355 }, { "epoch": 2.232812262862346, "grad_norm": 0.8036965727806091, "learning_rate": 5.537059538274605e-05, "loss": 1.4117, "step": 7356 }, { "epoch": 2.233115799059038, "grad_norm": 1.3503347635269165, "learning_rate": 5.536452004860267e-05, "loss": 1.5275, "step": 7357 }, { "epoch": 2.2334193352557294, "grad_norm": 0.7045248746871948, "learning_rate": 5.53584447144593e-05, "loss": 1.3456, "step": 7358 }, { "epoch": 2.2337228714524207, "grad_norm": 0.708449125289917, "learning_rate": 5.5352369380315914e-05, "loss": 1.4649, "step": 7359 }, { "epoch": 2.2340264076491123, "grad_norm": 0.5780391693115234, "learning_rate": 5.5346294046172544e-05, "loss": 1.6705, "step": 7360 }, { "epoch": 2.2343299438458035, "grad_norm": 0.5943533778190613, "learning_rate": 5.534021871202917e-05, "loss": 1.2699, "step": 7361 }, { "epoch": 2.234633480042495, "grad_norm": 0.6117348074913025, "learning_rate": 5.5334143377885786e-05, "loss": 1.1232, "step": 7362 }, { "epoch": 2.2349370162391864, "grad_norm": 0.6989806890487671, "learning_rate": 5.532806804374241e-05, "loss": 1.1067, "step": 7363 }, { "epoch": 2.235240552435878, "grad_norm": 0.6868540644645691, "learning_rate": 5.5321992709599034e-05, "loss": 1.7178, "step": 7364 }, { "epoch": 2.2355440886325693, "grad_norm": 0.6501347422599792, "learning_rate": 5.531591737545565e-05, "loss": 0.9142, "step": 7365 }, { "epoch": 2.235847624829261, "grad_norm": 0.7098049521446228, "learning_rate": 5.5309842041312275e-05, "loss": 1.5727, "step": 7366 }, { "epoch": 2.236151161025952, "grad_norm": 0.8211485743522644, "learning_rate": 5.530376670716889e-05, "loss": 1.2265, "step": 7367 }, { "epoch": 2.236454697222644, "grad_norm": 0.636452853679657, "learning_rate": 5.529769137302552e-05, "loss": 1.0186, "step": 7368 }, { "epoch": 2.2367582334193354, "grad_norm": 0.8176175355911255, "learning_rate": 5.529161603888214e-05, "loss": 1.3212, "step": 7369 }, { "epoch": 2.2370617696160267, "grad_norm": 0.5893675088882446, "learning_rate": 5.528554070473876e-05, "loss": 1.6353, "step": 7370 }, { "epoch": 2.2373653058127183, "grad_norm": 0.7504252791404724, "learning_rate": 5.527946537059538e-05, "loss": 1.2427, "step": 7371 }, { "epoch": 2.2376688420094095, "grad_norm": 0.8744253516197205, "learning_rate": 5.527339003645201e-05, "loss": 1.1929, "step": 7372 }, { "epoch": 2.237972378206101, "grad_norm": 0.6894361972808838, "learning_rate": 5.5267314702308624e-05, "loss": 1.34, "step": 7373 }, { "epoch": 2.2382759144027924, "grad_norm": 0.6582505106925964, "learning_rate": 5.5261239368165254e-05, "loss": 1.4561, "step": 7374 }, { "epoch": 2.238579450599484, "grad_norm": 0.7623777985572815, "learning_rate": 5.525516403402188e-05, "loss": 1.295, "step": 7375 }, { "epoch": 2.2388829867961753, "grad_norm": 0.9451694488525391, "learning_rate": 5.5249088699878496e-05, "loss": 1.1092, "step": 7376 }, { "epoch": 2.239186522992867, "grad_norm": 0.8134379386901855, "learning_rate": 5.524301336573512e-05, "loss": 1.1195, "step": 7377 }, { "epoch": 2.239490059189558, "grad_norm": 0.6125006079673767, "learning_rate": 5.5236938031591744e-05, "loss": 1.724, "step": 7378 }, { "epoch": 2.23979359538625, "grad_norm": 0.8471882939338684, "learning_rate": 5.523086269744836e-05, "loss": 1.4037, "step": 7379 }, { "epoch": 2.2400971315829414, "grad_norm": 0.7992955446243286, "learning_rate": 5.5224787363304985e-05, "loss": 1.3698, "step": 7380 }, { "epoch": 2.2404006677796326, "grad_norm": 0.6339590549468994, "learning_rate": 5.52187120291616e-05, "loss": 1.6158, "step": 7381 }, { "epoch": 2.2407042039763243, "grad_norm": 0.7954568862915039, "learning_rate": 5.521263669501823e-05, "loss": 1.2678, "step": 7382 }, { "epoch": 2.2410077401730155, "grad_norm": 0.7957431674003601, "learning_rate": 5.520656136087485e-05, "loss": 0.9818, "step": 7383 }, { "epoch": 2.241311276369707, "grad_norm": 0.6429740786552429, "learning_rate": 5.520048602673147e-05, "loss": 1.192, "step": 7384 }, { "epoch": 2.2416148125663984, "grad_norm": 0.7835260629653931, "learning_rate": 5.519441069258809e-05, "loss": 1.344, "step": 7385 }, { "epoch": 2.24191834876309, "grad_norm": 0.8486263751983643, "learning_rate": 5.518833535844472e-05, "loss": 1.1573, "step": 7386 }, { "epoch": 2.2422218849597813, "grad_norm": 0.809238612651825, "learning_rate": 5.5182260024301334e-05, "loss": 0.7624, "step": 7387 }, { "epoch": 2.242525421156473, "grad_norm": 0.8039452433586121, "learning_rate": 5.5176184690157965e-05, "loss": 0.9601, "step": 7388 }, { "epoch": 2.2428289573531646, "grad_norm": 0.9128976464271545, "learning_rate": 5.517010935601459e-05, "loss": 0.9799, "step": 7389 }, { "epoch": 2.2431324935498558, "grad_norm": 0.6718438863754272, "learning_rate": 5.5164034021871206e-05, "loss": 1.7084, "step": 7390 }, { "epoch": 2.2434360297465474, "grad_norm": 0.7414968609809875, "learning_rate": 5.515795868772783e-05, "loss": 1.3195, "step": 7391 }, { "epoch": 2.2437395659432386, "grad_norm": 0.7844395041465759, "learning_rate": 5.5151883353584454e-05, "loss": 0.8307, "step": 7392 }, { "epoch": 2.2440431021399303, "grad_norm": 0.6547634601593018, "learning_rate": 5.514580801944107e-05, "loss": 1.5043, "step": 7393 }, { "epoch": 2.2443466383366215, "grad_norm": 1.0427731275558472, "learning_rate": 5.5139732685297695e-05, "loss": 1.2751, "step": 7394 }, { "epoch": 2.244650174533313, "grad_norm": 0.7984392642974854, "learning_rate": 5.513365735115431e-05, "loss": 0.9469, "step": 7395 }, { "epoch": 2.2449537107300044, "grad_norm": 0.6773079037666321, "learning_rate": 5.512758201701094e-05, "loss": 1.1815, "step": 7396 }, { "epoch": 2.245257246926696, "grad_norm": 0.7443834543228149, "learning_rate": 5.512150668286756e-05, "loss": 1.3827, "step": 7397 }, { "epoch": 2.2455607831233877, "grad_norm": 0.6663649678230286, "learning_rate": 5.511543134872418e-05, "loss": 1.4079, "step": 7398 }, { "epoch": 2.245864319320079, "grad_norm": 0.6260837912559509, "learning_rate": 5.51093560145808e-05, "loss": 1.5346, "step": 7399 }, { "epoch": 2.2461678555167706, "grad_norm": 0.542671263217926, "learning_rate": 5.510328068043743e-05, "loss": 1.1236, "step": 7400 }, { "epoch": 2.2464713917134618, "grad_norm": 0.7045307755470276, "learning_rate": 5.5097205346294044e-05, "loss": 1.5626, "step": 7401 }, { "epoch": 2.2467749279101534, "grad_norm": 0.6996301412582397, "learning_rate": 5.5091130012150675e-05, "loss": 0.8182, "step": 7402 }, { "epoch": 2.2470784641068446, "grad_norm": 0.7701748013496399, "learning_rate": 5.50850546780073e-05, "loss": 1.3315, "step": 7403 }, { "epoch": 2.2473820003035363, "grad_norm": 0.5709070563316345, "learning_rate": 5.507897934386391e-05, "loss": 1.5871, "step": 7404 }, { "epoch": 2.2476855365002275, "grad_norm": 0.7984616160392761, "learning_rate": 5.507290400972054e-05, "loss": 1.217, "step": 7405 }, { "epoch": 2.247989072696919, "grad_norm": 0.7854733467102051, "learning_rate": 5.5066828675577164e-05, "loss": 1.2765, "step": 7406 }, { "epoch": 2.2482926088936104, "grad_norm": 0.7947596907615662, "learning_rate": 5.506075334143378e-05, "loss": 1.3284, "step": 7407 }, { "epoch": 2.248596145090302, "grad_norm": 0.5904085636138916, "learning_rate": 5.5054678007290406e-05, "loss": 1.8131, "step": 7408 }, { "epoch": 2.2488996812869937, "grad_norm": 0.6968871355056763, "learning_rate": 5.504860267314702e-05, "loss": 1.4713, "step": 7409 }, { "epoch": 2.249203217483685, "grad_norm": 0.8094422817230225, "learning_rate": 5.504252733900365e-05, "loss": 1.2643, "step": 7410 }, { "epoch": 2.2495067536803766, "grad_norm": 0.675478458404541, "learning_rate": 5.503645200486027e-05, "loss": 1.2157, "step": 7411 }, { "epoch": 2.2498102898770678, "grad_norm": 0.6240454912185669, "learning_rate": 5.503037667071689e-05, "loss": 1.5961, "step": 7412 }, { "epoch": 2.2501138260737594, "grad_norm": 0.8300707936286926, "learning_rate": 5.502430133657351e-05, "loss": 1.1565, "step": 7413 }, { "epoch": 2.2504173622704506, "grad_norm": 0.803261935710907, "learning_rate": 5.501822600243014e-05, "loss": 1.3636, "step": 7414 }, { "epoch": 2.2507208984671423, "grad_norm": 0.5902475714683533, "learning_rate": 5.5012150668286754e-05, "loss": 1.3799, "step": 7415 }, { "epoch": 2.2510244346638335, "grad_norm": 0.7087440490722656, "learning_rate": 5.500607533414338e-05, "loss": 1.2938, "step": 7416 }, { "epoch": 2.251327970860525, "grad_norm": 0.6584330201148987, "learning_rate": 5.500000000000001e-05, "loss": 1.5664, "step": 7417 }, { "epoch": 2.2516315070572164, "grad_norm": 0.7798967957496643, "learning_rate": 5.499392466585662e-05, "loss": 1.4939, "step": 7418 }, { "epoch": 2.251935043253908, "grad_norm": 0.8013840913772583, "learning_rate": 5.498784933171325e-05, "loss": 1.4353, "step": 7419 }, { "epoch": 2.2522385794505997, "grad_norm": 0.8025866150856018, "learning_rate": 5.4981773997569874e-05, "loss": 1.3093, "step": 7420 }, { "epoch": 2.252542115647291, "grad_norm": 0.826526939868927, "learning_rate": 5.497569866342649e-05, "loss": 1.3398, "step": 7421 }, { "epoch": 2.2528456518439826, "grad_norm": 1.1023015975952148, "learning_rate": 5.4969623329283116e-05, "loss": 1.4203, "step": 7422 }, { "epoch": 2.2531491880406738, "grad_norm": 0.8900132775306702, "learning_rate": 5.496354799513973e-05, "loss": 1.551, "step": 7423 }, { "epoch": 2.2534527242373654, "grad_norm": 0.7980222105979919, "learning_rate": 5.495747266099636e-05, "loss": 0.9455, "step": 7424 }, { "epoch": 2.2537562604340566, "grad_norm": 0.88653165102005, "learning_rate": 5.495139732685298e-05, "loss": 1.3634, "step": 7425 }, { "epoch": 2.2540597966307483, "grad_norm": 0.7236202955245972, "learning_rate": 5.49453219927096e-05, "loss": 0.6963, "step": 7426 }, { "epoch": 2.2543633328274395, "grad_norm": 0.6392729878425598, "learning_rate": 5.493924665856622e-05, "loss": 1.1514, "step": 7427 }, { "epoch": 2.254666869024131, "grad_norm": 0.738734781742096, "learning_rate": 5.4933171324422847e-05, "loss": 1.4334, "step": 7428 }, { "epoch": 2.2549704052208224, "grad_norm": 0.7367070317268372, "learning_rate": 5.4927095990279464e-05, "loss": 1.343, "step": 7429 }, { "epoch": 2.255273941417514, "grad_norm": 0.7800174355506897, "learning_rate": 5.492102065613609e-05, "loss": 1.2698, "step": 7430 }, { "epoch": 2.2555774776142057, "grad_norm": 0.6558547616004944, "learning_rate": 5.491494532199272e-05, "loss": 1.4547, "step": 7431 }, { "epoch": 2.255881013810897, "grad_norm": 0.6702367067337036, "learning_rate": 5.490886998784933e-05, "loss": 1.3955, "step": 7432 }, { "epoch": 2.2561845500075886, "grad_norm": 0.6517998576164246, "learning_rate": 5.490279465370596e-05, "loss": 1.3565, "step": 7433 }, { "epoch": 2.2564880862042798, "grad_norm": 0.7218438386917114, "learning_rate": 5.4896719319562584e-05, "loss": 0.7599, "step": 7434 }, { "epoch": 2.2567916224009714, "grad_norm": 0.7125388979911804, "learning_rate": 5.48906439854192e-05, "loss": 1.5283, "step": 7435 }, { "epoch": 2.2570951585976626, "grad_norm": 0.5857595801353455, "learning_rate": 5.4884568651275826e-05, "loss": 0.6817, "step": 7436 }, { "epoch": 2.2573986947943543, "grad_norm": 0.7023595571517944, "learning_rate": 5.487849331713244e-05, "loss": 1.5239, "step": 7437 }, { "epoch": 2.2577022309910455, "grad_norm": 0.8885743618011475, "learning_rate": 5.487241798298907e-05, "loss": 1.4502, "step": 7438 }, { "epoch": 2.258005767187737, "grad_norm": 0.8196373581886292, "learning_rate": 5.486634264884569e-05, "loss": 1.2389, "step": 7439 }, { "epoch": 2.2583093033844284, "grad_norm": 1.0717699527740479, "learning_rate": 5.486026731470231e-05, "loss": 1.2988, "step": 7440 }, { "epoch": 2.25861283958112, "grad_norm": 0.5756183862686157, "learning_rate": 5.485419198055893e-05, "loss": 1.4591, "step": 7441 }, { "epoch": 2.2589163757778117, "grad_norm": 0.8124812245368958, "learning_rate": 5.4848116646415557e-05, "loss": 1.1953, "step": 7442 }, { "epoch": 2.259219911974503, "grad_norm": 0.7616799473762512, "learning_rate": 5.4842041312272174e-05, "loss": 1.0254, "step": 7443 }, { "epoch": 2.2595234481711945, "grad_norm": 0.6586336493492126, "learning_rate": 5.48359659781288e-05, "loss": 0.9613, "step": 7444 }, { "epoch": 2.2598269843678858, "grad_norm": 0.7181385159492493, "learning_rate": 5.482989064398543e-05, "loss": 1.4333, "step": 7445 }, { "epoch": 2.2601305205645774, "grad_norm": 0.5932475328445435, "learning_rate": 5.482381530984204e-05, "loss": 1.1878, "step": 7446 }, { "epoch": 2.2604340567612686, "grad_norm": 0.6791607737541199, "learning_rate": 5.481773997569867e-05, "loss": 1.1296, "step": 7447 }, { "epoch": 2.2607375929579603, "grad_norm": 0.4580678641796112, "learning_rate": 5.4811664641555294e-05, "loss": 0.6515, "step": 7448 }, { "epoch": 2.261041129154652, "grad_norm": 0.8818070292472839, "learning_rate": 5.480558930741191e-05, "loss": 1.2604, "step": 7449 }, { "epoch": 2.261344665351343, "grad_norm": 0.8981488347053528, "learning_rate": 5.4799513973268536e-05, "loss": 1.2392, "step": 7450 }, { "epoch": 2.2616482015480344, "grad_norm": 0.7887245416641235, "learning_rate": 5.479343863912515e-05, "loss": 0.9956, "step": 7451 }, { "epoch": 2.261951737744726, "grad_norm": 0.6639705896377563, "learning_rate": 5.478736330498178e-05, "loss": 1.3412, "step": 7452 }, { "epoch": 2.2622552739414177, "grad_norm": 0.7820801138877869, "learning_rate": 5.47812879708384e-05, "loss": 1.6015, "step": 7453 }, { "epoch": 2.262558810138109, "grad_norm": 0.567541778087616, "learning_rate": 5.477521263669502e-05, "loss": 1.3132, "step": 7454 }, { "epoch": 2.2628623463348005, "grad_norm": 0.7936879396438599, "learning_rate": 5.476913730255164e-05, "loss": 0.9349, "step": 7455 }, { "epoch": 2.2631658825314918, "grad_norm": 0.6322331428527832, "learning_rate": 5.476306196840827e-05, "loss": 1.3739, "step": 7456 }, { "epoch": 2.2634694187281834, "grad_norm": 0.7208223938941956, "learning_rate": 5.4756986634264884e-05, "loss": 1.504, "step": 7457 }, { "epoch": 2.2637729549248746, "grad_norm": 0.6773326396942139, "learning_rate": 5.475091130012151e-05, "loss": 1.22, "step": 7458 }, { "epoch": 2.2640764911215663, "grad_norm": 0.6207496523857117, "learning_rate": 5.474483596597814e-05, "loss": 1.4626, "step": 7459 }, { "epoch": 2.264380027318258, "grad_norm": 0.7778047323226929, "learning_rate": 5.473876063183475e-05, "loss": 0.9966, "step": 7460 }, { "epoch": 2.264683563514949, "grad_norm": 0.762438952922821, "learning_rate": 5.473268529769138e-05, "loss": 1.2628, "step": 7461 }, { "epoch": 2.264987099711641, "grad_norm": 0.8586466908454895, "learning_rate": 5.472660996354799e-05, "loss": 0.9212, "step": 7462 }, { "epoch": 2.265290635908332, "grad_norm": 0.6765137314796448, "learning_rate": 5.472053462940462e-05, "loss": 1.1541, "step": 7463 }, { "epoch": 2.2655941721050237, "grad_norm": 0.6344521641731262, "learning_rate": 5.4714459295261246e-05, "loss": 1.8874, "step": 7464 }, { "epoch": 2.265897708301715, "grad_norm": 0.6776248812675476, "learning_rate": 5.4708383961117856e-05, "loss": 1.6253, "step": 7465 }, { "epoch": 2.2662012444984065, "grad_norm": 0.8977546691894531, "learning_rate": 5.470230862697449e-05, "loss": 0.8644, "step": 7466 }, { "epoch": 2.2665047806950978, "grad_norm": 0.5835941433906555, "learning_rate": 5.469623329283111e-05, "loss": 1.3991, "step": 7467 }, { "epoch": 2.2668083168917894, "grad_norm": 0.6627216935157776, "learning_rate": 5.469015795868773e-05, "loss": 1.4281, "step": 7468 }, { "epoch": 2.2671118530884806, "grad_norm": 0.7152976989746094, "learning_rate": 5.468408262454435e-05, "loss": 1.295, "step": 7469 }, { "epoch": 2.2674153892851723, "grad_norm": 0.7702943086624146, "learning_rate": 5.467800729040098e-05, "loss": 1.4717, "step": 7470 }, { "epoch": 2.267718925481864, "grad_norm": 0.5449094772338867, "learning_rate": 5.4671931956257594e-05, "loss": 1.11, "step": 7471 }, { "epoch": 2.268022461678555, "grad_norm": 0.6027811765670776, "learning_rate": 5.466585662211422e-05, "loss": 1.0541, "step": 7472 }, { "epoch": 2.268325997875247, "grad_norm": 0.7328689098358154, "learning_rate": 5.465978128797085e-05, "loss": 1.4477, "step": 7473 }, { "epoch": 2.268629534071938, "grad_norm": 0.6849613189697266, "learning_rate": 5.465370595382746e-05, "loss": 0.9747, "step": 7474 }, { "epoch": 2.2689330702686297, "grad_norm": 0.8007455468177795, "learning_rate": 5.464763061968409e-05, "loss": 1.4307, "step": 7475 }, { "epoch": 2.269236606465321, "grad_norm": 0.7696539759635925, "learning_rate": 5.46415552855407e-05, "loss": 1.3663, "step": 7476 }, { "epoch": 2.2695401426620125, "grad_norm": 0.9261662364006042, "learning_rate": 5.4635479951397325e-05, "loss": 1.4261, "step": 7477 }, { "epoch": 2.2698436788587038, "grad_norm": 0.6188323497772217, "learning_rate": 5.4629404617253956e-05, "loss": 1.0909, "step": 7478 }, { "epoch": 2.2701472150553954, "grad_norm": 0.8149519562721252, "learning_rate": 5.4623329283110566e-05, "loss": 0.647, "step": 7479 }, { "epoch": 2.2704507512520866, "grad_norm": 0.8626388907432556, "learning_rate": 5.46172539489672e-05, "loss": 1.2524, "step": 7480 }, { "epoch": 2.2707542874487783, "grad_norm": 0.7279552817344666, "learning_rate": 5.461117861482382e-05, "loss": 1.2493, "step": 7481 }, { "epoch": 2.27105782364547, "grad_norm": 0.7417263984680176, "learning_rate": 5.460510328068044e-05, "loss": 1.0718, "step": 7482 }, { "epoch": 2.271361359842161, "grad_norm": 0.7569875121116638, "learning_rate": 5.459902794653706e-05, "loss": 1.4112, "step": 7483 }, { "epoch": 2.271664896038853, "grad_norm": 0.6595586538314819, "learning_rate": 5.459295261239369e-05, "loss": 1.4326, "step": 7484 }, { "epoch": 2.271968432235544, "grad_norm": 0.5473843216896057, "learning_rate": 5.4586877278250304e-05, "loss": 1.6897, "step": 7485 }, { "epoch": 2.2722719684322357, "grad_norm": 0.7562367916107178, "learning_rate": 5.458080194410693e-05, "loss": 1.5851, "step": 7486 }, { "epoch": 2.272575504628927, "grad_norm": 0.6894574761390686, "learning_rate": 5.457472660996356e-05, "loss": 1.1538, "step": 7487 }, { "epoch": 2.2728790408256185, "grad_norm": 0.9435144662857056, "learning_rate": 5.456865127582017e-05, "loss": 1.3727, "step": 7488 }, { "epoch": 2.2731825770223097, "grad_norm": 0.766242504119873, "learning_rate": 5.4562575941676794e-05, "loss": 1.064, "step": 7489 }, { "epoch": 2.2734861132190014, "grad_norm": 0.6115533113479614, "learning_rate": 5.455650060753341e-05, "loss": 0.8242, "step": 7490 }, { "epoch": 2.2737896494156926, "grad_norm": 0.5504389405250549, "learning_rate": 5.4550425273390035e-05, "loss": 0.9371, "step": 7491 }, { "epoch": 2.2740931856123843, "grad_norm": 1.0381739139556885, "learning_rate": 5.4544349939246666e-05, "loss": 1.1618, "step": 7492 }, { "epoch": 2.274396721809076, "grad_norm": 0.9171277284622192, "learning_rate": 5.4538274605103276e-05, "loss": 1.2575, "step": 7493 }, { "epoch": 2.274700258005767, "grad_norm": 0.9959700107574463, "learning_rate": 5.453219927095991e-05, "loss": 1.3317, "step": 7494 }, { "epoch": 2.275003794202459, "grad_norm": 0.6824067831039429, "learning_rate": 5.452612393681653e-05, "loss": 1.2123, "step": 7495 }, { "epoch": 2.27530733039915, "grad_norm": 0.6326625347137451, "learning_rate": 5.452004860267315e-05, "loss": 1.6067, "step": 7496 }, { "epoch": 2.2756108665958417, "grad_norm": 0.7540733814239502, "learning_rate": 5.451397326852977e-05, "loss": 1.3346, "step": 7497 }, { "epoch": 2.275914402792533, "grad_norm": 0.643101692199707, "learning_rate": 5.45078979343864e-05, "loss": 0.8042, "step": 7498 }, { "epoch": 2.2762179389892245, "grad_norm": 0.892805278301239, "learning_rate": 5.4501822600243014e-05, "loss": 0.8651, "step": 7499 }, { "epoch": 2.2765214751859157, "grad_norm": 0.8604877591133118, "learning_rate": 5.449574726609964e-05, "loss": 1.4444, "step": 7500 }, { "epoch": 2.2768250113826074, "grad_norm": 0.6573825478553772, "learning_rate": 5.448967193195626e-05, "loss": 1.2915, "step": 7501 }, { "epoch": 2.2771285475792986, "grad_norm": 0.7212510108947754, "learning_rate": 5.448359659781288e-05, "loss": 0.9891, "step": 7502 }, { "epoch": 2.2774320837759903, "grad_norm": 0.7919306755065918, "learning_rate": 5.4477521263669504e-05, "loss": 1.2099, "step": 7503 }, { "epoch": 2.277735619972682, "grad_norm": 0.6194910407066345, "learning_rate": 5.447144592952612e-05, "loss": 1.5992, "step": 7504 }, { "epoch": 2.278039156169373, "grad_norm": 0.8497462868690491, "learning_rate": 5.4465370595382745e-05, "loss": 1.2713, "step": 7505 }, { "epoch": 2.278342692366065, "grad_norm": 0.747949481010437, "learning_rate": 5.4459295261239376e-05, "loss": 1.0268, "step": 7506 }, { "epoch": 2.278646228562756, "grad_norm": 0.705798864364624, "learning_rate": 5.4453219927095986e-05, "loss": 1.3496, "step": 7507 }, { "epoch": 2.2789497647594477, "grad_norm": 0.7338413000106812, "learning_rate": 5.444714459295262e-05, "loss": 1.2911, "step": 7508 }, { "epoch": 2.279253300956139, "grad_norm": 0.8935163021087646, "learning_rate": 5.444106925880924e-05, "loss": 1.2852, "step": 7509 }, { "epoch": 2.2795568371528305, "grad_norm": 0.7204432487487793, "learning_rate": 5.443499392466586e-05, "loss": 1.2556, "step": 7510 }, { "epoch": 2.2798603733495217, "grad_norm": 0.8553929924964905, "learning_rate": 5.442891859052248e-05, "loss": 1.056, "step": 7511 }, { "epoch": 2.2801639095462134, "grad_norm": 0.9489281177520752, "learning_rate": 5.442284325637911e-05, "loss": 1.4451, "step": 7512 }, { "epoch": 2.2804674457429046, "grad_norm": 0.8464581966400146, "learning_rate": 5.4416767922235724e-05, "loss": 1.0983, "step": 7513 }, { "epoch": 2.2807709819395963, "grad_norm": 0.8345524668693542, "learning_rate": 5.441069258809235e-05, "loss": 1.5898, "step": 7514 }, { "epoch": 2.281074518136288, "grad_norm": 0.9463378190994263, "learning_rate": 5.440461725394897e-05, "loss": 1.1437, "step": 7515 }, { "epoch": 2.281378054332979, "grad_norm": 0.799299955368042, "learning_rate": 5.439854191980559e-05, "loss": 1.3918, "step": 7516 }, { "epoch": 2.281681590529671, "grad_norm": 0.8265707492828369, "learning_rate": 5.4392466585662214e-05, "loss": 1.3374, "step": 7517 }, { "epoch": 2.281985126726362, "grad_norm": 0.6352429986000061, "learning_rate": 5.438639125151883e-05, "loss": 0.7019, "step": 7518 }, { "epoch": 2.2822886629230537, "grad_norm": 0.8135592937469482, "learning_rate": 5.4380315917375455e-05, "loss": 1.278, "step": 7519 }, { "epoch": 2.282592199119745, "grad_norm": 0.7900860905647278, "learning_rate": 5.4374240583232086e-05, "loss": 1.128, "step": 7520 }, { "epoch": 2.2828957353164365, "grad_norm": 0.8089602589607239, "learning_rate": 5.4368165249088696e-05, "loss": 1.3828, "step": 7521 }, { "epoch": 2.283199271513128, "grad_norm": 0.8169485330581665, "learning_rate": 5.436208991494533e-05, "loss": 1.5373, "step": 7522 }, { "epoch": 2.2835028077098194, "grad_norm": 0.7280807495117188, "learning_rate": 5.435601458080195e-05, "loss": 1.485, "step": 7523 }, { "epoch": 2.2838063439065106, "grad_norm": 0.7815548181533813, "learning_rate": 5.434993924665857e-05, "loss": 1.4671, "step": 7524 }, { "epoch": 2.2841098801032023, "grad_norm": 0.6063195466995239, "learning_rate": 5.434386391251519e-05, "loss": 1.779, "step": 7525 }, { "epoch": 2.284413416299894, "grad_norm": 0.8827876448631287, "learning_rate": 5.433778857837182e-05, "loss": 1.4556, "step": 7526 }, { "epoch": 2.284716952496585, "grad_norm": 0.7507184743881226, "learning_rate": 5.4331713244228434e-05, "loss": 1.0536, "step": 7527 }, { "epoch": 2.285020488693277, "grad_norm": 0.820326566696167, "learning_rate": 5.432563791008506e-05, "loss": 1.5834, "step": 7528 }, { "epoch": 2.285324024889968, "grad_norm": 0.7697584629058838, "learning_rate": 5.431956257594168e-05, "loss": 0.9109, "step": 7529 }, { "epoch": 2.2856275610866597, "grad_norm": 0.5971083045005798, "learning_rate": 5.43134872417983e-05, "loss": 1.093, "step": 7530 }, { "epoch": 2.285931097283351, "grad_norm": 0.7807541489601135, "learning_rate": 5.4307411907654924e-05, "loss": 1.1812, "step": 7531 }, { "epoch": 2.2862346334800425, "grad_norm": 0.6648563146591187, "learning_rate": 5.430133657351154e-05, "loss": 1.3252, "step": 7532 }, { "epoch": 2.286538169676734, "grad_norm": 0.842267632484436, "learning_rate": 5.4295261239368165e-05, "loss": 1.0992, "step": 7533 }, { "epoch": 2.2868417058734254, "grad_norm": 0.7376732230186462, "learning_rate": 5.4289185905224796e-05, "loss": 1.4368, "step": 7534 }, { "epoch": 2.287145242070117, "grad_norm": 0.6069490313529968, "learning_rate": 5.4283110571081407e-05, "loss": 1.8016, "step": 7535 }, { "epoch": 2.2874487782668083, "grad_norm": 0.6976711750030518, "learning_rate": 5.427703523693804e-05, "loss": 1.0021, "step": 7536 }, { "epoch": 2.2877523144635, "grad_norm": 0.6691964864730835, "learning_rate": 5.427095990279466e-05, "loss": 1.3759, "step": 7537 }, { "epoch": 2.288055850660191, "grad_norm": 0.7515538930892944, "learning_rate": 5.426488456865127e-05, "loss": 1.3081, "step": 7538 }, { "epoch": 2.288359386856883, "grad_norm": 0.8624359965324402, "learning_rate": 5.42588092345079e-05, "loss": 1.0082, "step": 7539 }, { "epoch": 2.288662923053574, "grad_norm": 0.7907547950744629, "learning_rate": 5.425273390036453e-05, "loss": 0.8, "step": 7540 }, { "epoch": 2.2889664592502657, "grad_norm": 0.7774684429168701, "learning_rate": 5.4246658566221144e-05, "loss": 1.2111, "step": 7541 }, { "epoch": 2.289269995446957, "grad_norm": 0.8284247517585754, "learning_rate": 5.424058323207777e-05, "loss": 1.5555, "step": 7542 }, { "epoch": 2.2895735316436485, "grad_norm": 0.7162729501724243, "learning_rate": 5.4234507897934386e-05, "loss": 1.1438, "step": 7543 }, { "epoch": 2.28987706784034, "grad_norm": 0.7665915489196777, "learning_rate": 5.422843256379101e-05, "loss": 1.4582, "step": 7544 }, { "epoch": 2.2901806040370314, "grad_norm": 0.734765350818634, "learning_rate": 5.4222357229647634e-05, "loss": 1.2054, "step": 7545 }, { "epoch": 2.290484140233723, "grad_norm": 0.8265764117240906, "learning_rate": 5.421628189550425e-05, "loss": 1.712, "step": 7546 }, { "epoch": 2.2907876764304143, "grad_norm": 0.7417080998420715, "learning_rate": 5.4210206561360875e-05, "loss": 1.2468, "step": 7547 }, { "epoch": 2.291091212627106, "grad_norm": 0.7113915681838989, "learning_rate": 5.4204131227217506e-05, "loss": 1.3884, "step": 7548 }, { "epoch": 2.291394748823797, "grad_norm": 0.7582618594169617, "learning_rate": 5.4198055893074117e-05, "loss": 1.454, "step": 7549 }, { "epoch": 2.291698285020489, "grad_norm": 0.8136988282203674, "learning_rate": 5.419198055893074e-05, "loss": 1.3694, "step": 7550 }, { "epoch": 2.29200182121718, "grad_norm": 0.7433492541313171, "learning_rate": 5.418590522478737e-05, "loss": 1.5307, "step": 7551 }, { "epoch": 2.2923053574138716, "grad_norm": 0.7587953209877014, "learning_rate": 5.417982989064398e-05, "loss": 1.3316, "step": 7552 }, { "epoch": 2.292608893610563, "grad_norm": 0.7869864106178284, "learning_rate": 5.417375455650061e-05, "loss": 1.2841, "step": 7553 }, { "epoch": 2.2929124298072545, "grad_norm": 1.0088813304901123, "learning_rate": 5.416767922235724e-05, "loss": 1.1967, "step": 7554 }, { "epoch": 2.293215966003946, "grad_norm": 0.7056594491004944, "learning_rate": 5.4161603888213854e-05, "loss": 1.372, "step": 7555 }, { "epoch": 2.2935195022006374, "grad_norm": 0.7692909836769104, "learning_rate": 5.415552855407048e-05, "loss": 0.7488, "step": 7556 }, { "epoch": 2.293823038397329, "grad_norm": 0.8228776454925537, "learning_rate": 5.4149453219927096e-05, "loss": 1.4965, "step": 7557 }, { "epoch": 2.2941265745940203, "grad_norm": 0.869111955165863, "learning_rate": 5.414337788578372e-05, "loss": 1.4706, "step": 7558 }, { "epoch": 2.294430110790712, "grad_norm": 0.806982696056366, "learning_rate": 5.4137302551640344e-05, "loss": 1.4263, "step": 7559 }, { "epoch": 2.294733646987403, "grad_norm": 0.6825501918792725, "learning_rate": 5.413122721749696e-05, "loss": 1.2769, "step": 7560 }, { "epoch": 2.2950371831840948, "grad_norm": 0.7721306085586548, "learning_rate": 5.4125151883353585e-05, "loss": 1.2624, "step": 7561 }, { "epoch": 2.295340719380786, "grad_norm": 0.654353678226471, "learning_rate": 5.411907654921021e-05, "loss": 0.9266, "step": 7562 }, { "epoch": 2.2956442555774776, "grad_norm": 0.7125826478004456, "learning_rate": 5.411300121506683e-05, "loss": 1.1077, "step": 7563 }, { "epoch": 2.295947791774169, "grad_norm": 0.655035138130188, "learning_rate": 5.410692588092345e-05, "loss": 1.1465, "step": 7564 }, { "epoch": 2.2962513279708605, "grad_norm": 0.4681672751903534, "learning_rate": 5.410085054678008e-05, "loss": 1.6199, "step": 7565 }, { "epoch": 2.296554864167552, "grad_norm": 0.6791568994522095, "learning_rate": 5.409477521263669e-05, "loss": 1.1105, "step": 7566 }, { "epoch": 2.2968584003642434, "grad_norm": 0.7070233821868896, "learning_rate": 5.408869987849332e-05, "loss": 1.3733, "step": 7567 }, { "epoch": 2.297161936560935, "grad_norm": 0.6516934633255005, "learning_rate": 5.408262454434995e-05, "loss": 1.4664, "step": 7568 }, { "epoch": 2.2974654727576262, "grad_norm": 0.8514277935028076, "learning_rate": 5.4076549210206564e-05, "loss": 1.3357, "step": 7569 }, { "epoch": 2.297769008954318, "grad_norm": 0.6793623566627502, "learning_rate": 5.407047387606319e-05, "loss": 1.6255, "step": 7570 }, { "epoch": 2.298072545151009, "grad_norm": 0.5945488810539246, "learning_rate": 5.4064398541919806e-05, "loss": 1.3195, "step": 7571 }, { "epoch": 2.2983760813477008, "grad_norm": 0.7190368175506592, "learning_rate": 5.405832320777643e-05, "loss": 1.3829, "step": 7572 }, { "epoch": 2.298679617544392, "grad_norm": 0.8405774831771851, "learning_rate": 5.4052247873633054e-05, "loss": 1.5689, "step": 7573 }, { "epoch": 2.2989831537410836, "grad_norm": 0.725680410861969, "learning_rate": 5.404617253948967e-05, "loss": 0.8484, "step": 7574 }, { "epoch": 2.299286689937775, "grad_norm": 0.8415581583976746, "learning_rate": 5.4040097205346295e-05, "loss": 1.2999, "step": 7575 }, { "epoch": 2.2995902261344665, "grad_norm": 0.580029308795929, "learning_rate": 5.403402187120292e-05, "loss": 1.7616, "step": 7576 }, { "epoch": 2.299893762331158, "grad_norm": 0.7359707355499268, "learning_rate": 5.402794653705954e-05, "loss": 1.1516, "step": 7577 }, { "epoch": 2.3001972985278494, "grad_norm": 0.7400234937667847, "learning_rate": 5.402187120291616e-05, "loss": 1.1951, "step": 7578 }, { "epoch": 2.300500834724541, "grad_norm": 0.7638850212097168, "learning_rate": 5.401579586877279e-05, "loss": 1.6116, "step": 7579 }, { "epoch": 2.3008043709212322, "grad_norm": 0.8924289345741272, "learning_rate": 5.40097205346294e-05, "loss": 1.4147, "step": 7580 }, { "epoch": 2.301107907117924, "grad_norm": 0.8381494283676147, "learning_rate": 5.400364520048603e-05, "loss": 1.1766, "step": 7581 }, { "epoch": 2.301411443314615, "grad_norm": 0.8965625762939453, "learning_rate": 5.399756986634266e-05, "loss": 1.4292, "step": 7582 }, { "epoch": 2.3017149795113068, "grad_norm": 0.8396190404891968, "learning_rate": 5.3991494532199274e-05, "loss": 1.2678, "step": 7583 }, { "epoch": 2.3020185157079984, "grad_norm": 0.6765458583831787, "learning_rate": 5.39854191980559e-05, "loss": 1.4473, "step": 7584 }, { "epoch": 2.3023220519046896, "grad_norm": 0.8363358974456787, "learning_rate": 5.3979343863912516e-05, "loss": 1.3189, "step": 7585 }, { "epoch": 2.302625588101381, "grad_norm": 0.6315335631370544, "learning_rate": 5.397326852976914e-05, "loss": 1.3433, "step": 7586 }, { "epoch": 2.3029291242980725, "grad_norm": 1.3354105949401855, "learning_rate": 5.3967193195625764e-05, "loss": 0.9348, "step": 7587 }, { "epoch": 2.303232660494764, "grad_norm": 0.7214506268501282, "learning_rate": 5.396111786148238e-05, "loss": 1.1078, "step": 7588 }, { "epoch": 2.3035361966914554, "grad_norm": 0.9364904761314392, "learning_rate": 5.3955042527339005e-05, "loss": 1.117, "step": 7589 }, { "epoch": 2.303839732888147, "grad_norm": 0.8593325018882751, "learning_rate": 5.394896719319563e-05, "loss": 1.1249, "step": 7590 }, { "epoch": 2.3041432690848382, "grad_norm": 0.7999909520149231, "learning_rate": 5.394289185905225e-05, "loss": 1.3141, "step": 7591 }, { "epoch": 2.30444680528153, "grad_norm": 0.6782107949256897, "learning_rate": 5.393681652490887e-05, "loss": 1.2862, "step": 7592 }, { "epoch": 2.304750341478221, "grad_norm": 0.703163743019104, "learning_rate": 5.39307411907655e-05, "loss": 1.4451, "step": 7593 }, { "epoch": 2.3050538776749128, "grad_norm": 0.7271427512168884, "learning_rate": 5.392466585662211e-05, "loss": 1.2476, "step": 7594 }, { "epoch": 2.3053574138716044, "grad_norm": 0.7937582731246948, "learning_rate": 5.391859052247874e-05, "loss": 1.4786, "step": 7595 }, { "epoch": 2.3056609500682956, "grad_norm": 0.6775838136672974, "learning_rate": 5.391251518833537e-05, "loss": 1.7379, "step": 7596 }, { "epoch": 2.3059644862649873, "grad_norm": 0.7014533281326294, "learning_rate": 5.3906439854191984e-05, "loss": 1.5345, "step": 7597 }, { "epoch": 2.3062680224616785, "grad_norm": 0.6490854620933533, "learning_rate": 5.390036452004861e-05, "loss": 1.338, "step": 7598 }, { "epoch": 2.30657155865837, "grad_norm": 1.0077564716339111, "learning_rate": 5.389428918590522e-05, "loss": 1.2281, "step": 7599 }, { "epoch": 2.3068750948550614, "grad_norm": 0.850725531578064, "learning_rate": 5.388821385176185e-05, "loss": 1.3029, "step": 7600 }, { "epoch": 2.307178631051753, "grad_norm": 0.7772053480148315, "learning_rate": 5.3882138517618474e-05, "loss": 1.4725, "step": 7601 }, { "epoch": 2.3074821672484442, "grad_norm": 0.681745707988739, "learning_rate": 5.387606318347509e-05, "loss": 0.8412, "step": 7602 }, { "epoch": 2.307785703445136, "grad_norm": 0.8142476081848145, "learning_rate": 5.3869987849331715e-05, "loss": 1.5885, "step": 7603 }, { "epoch": 2.308089239641827, "grad_norm": 0.8766453862190247, "learning_rate": 5.386391251518834e-05, "loss": 1.2459, "step": 7604 }, { "epoch": 2.3083927758385188, "grad_norm": 0.9796348810195923, "learning_rate": 5.385783718104496e-05, "loss": 1.5479, "step": 7605 }, { "epoch": 2.3086963120352104, "grad_norm": 0.6790328025817871, "learning_rate": 5.385176184690158e-05, "loss": 1.4504, "step": 7606 }, { "epoch": 2.3089998482319016, "grad_norm": 0.7625194191932678, "learning_rate": 5.384568651275821e-05, "loss": 1.3028, "step": 7607 }, { "epoch": 2.3093033844285933, "grad_norm": 0.7844623923301697, "learning_rate": 5.383961117861482e-05, "loss": 1.0466, "step": 7608 }, { "epoch": 2.3096069206252845, "grad_norm": 0.8220598697662354, "learning_rate": 5.383353584447145e-05, "loss": 1.3591, "step": 7609 }, { "epoch": 2.309910456821976, "grad_norm": 0.7971135377883911, "learning_rate": 5.382746051032808e-05, "loss": 1.3741, "step": 7610 }, { "epoch": 2.3102139930186674, "grad_norm": 0.743306040763855, "learning_rate": 5.382138517618469e-05, "loss": 1.0722, "step": 7611 }, { "epoch": 2.310517529215359, "grad_norm": 0.7106120586395264, "learning_rate": 5.381530984204132e-05, "loss": 1.632, "step": 7612 }, { "epoch": 2.3108210654120502, "grad_norm": 0.7360802888870239, "learning_rate": 5.380923450789793e-05, "loss": 1.3646, "step": 7613 }, { "epoch": 2.311124601608742, "grad_norm": 0.7011423707008362, "learning_rate": 5.380315917375456e-05, "loss": 1.4146, "step": 7614 }, { "epoch": 2.311428137805433, "grad_norm": 0.7464067339897156, "learning_rate": 5.3797083839611184e-05, "loss": 1.4509, "step": 7615 }, { "epoch": 2.3117316740021248, "grad_norm": 0.7687321901321411, "learning_rate": 5.37910085054678e-05, "loss": 1.3009, "step": 7616 }, { "epoch": 2.3120352101988164, "grad_norm": 0.7935065627098083, "learning_rate": 5.3784933171324425e-05, "loss": 0.9285, "step": 7617 }, { "epoch": 2.3123387463955076, "grad_norm": 0.7177073955535889, "learning_rate": 5.377885783718105e-05, "loss": 1.4441, "step": 7618 }, { "epoch": 2.3126422825921993, "grad_norm": 0.7047598361968994, "learning_rate": 5.377278250303767e-05, "loss": 1.5208, "step": 7619 }, { "epoch": 2.3129458187888905, "grad_norm": 0.6508432030677795, "learning_rate": 5.376670716889429e-05, "loss": 1.5971, "step": 7620 }, { "epoch": 2.313249354985582, "grad_norm": 0.5820059776306152, "learning_rate": 5.376063183475092e-05, "loss": 1.0914, "step": 7621 }, { "epoch": 2.3135528911822734, "grad_norm": 0.7413522005081177, "learning_rate": 5.375455650060753e-05, "loss": 1.324, "step": 7622 }, { "epoch": 2.313856427378965, "grad_norm": 0.8028507828712463, "learning_rate": 5.3748481166464156e-05, "loss": 1.3074, "step": 7623 }, { "epoch": 2.3141599635756562, "grad_norm": 0.839253842830658, "learning_rate": 5.374240583232079e-05, "loss": 1.3944, "step": 7624 }, { "epoch": 2.314463499772348, "grad_norm": 0.9501205682754517, "learning_rate": 5.37363304981774e-05, "loss": 1.4148, "step": 7625 }, { "epoch": 2.314767035969039, "grad_norm": 0.5154752135276794, "learning_rate": 5.373025516403403e-05, "loss": 1.1781, "step": 7626 }, { "epoch": 2.3150705721657308, "grad_norm": 0.7667773365974426, "learning_rate": 5.372417982989064e-05, "loss": 1.3034, "step": 7627 }, { "epoch": 2.3153741083624224, "grad_norm": 0.676240086555481, "learning_rate": 5.371810449574727e-05, "loss": 1.3928, "step": 7628 }, { "epoch": 2.3156776445591136, "grad_norm": 0.8117524981498718, "learning_rate": 5.3712029161603894e-05, "loss": 1.2232, "step": 7629 }, { "epoch": 2.3159811807558053, "grad_norm": 0.8300802111625671, "learning_rate": 5.370595382746051e-05, "loss": 1.0777, "step": 7630 }, { "epoch": 2.3162847169524965, "grad_norm": 0.6476275324821472, "learning_rate": 5.3699878493317136e-05, "loss": 1.7488, "step": 7631 }, { "epoch": 2.316588253149188, "grad_norm": 0.7915083765983582, "learning_rate": 5.369380315917376e-05, "loss": 1.3514, "step": 7632 }, { "epoch": 2.3168917893458794, "grad_norm": 0.6225711703300476, "learning_rate": 5.368772782503038e-05, "loss": 1.6291, "step": 7633 }, { "epoch": 2.317195325542571, "grad_norm": 0.7601152062416077, "learning_rate": 5.3681652490887e-05, "loss": 1.6199, "step": 7634 }, { "epoch": 2.3174988617392622, "grad_norm": 0.7888222336769104, "learning_rate": 5.3675577156743625e-05, "loss": 1.566, "step": 7635 }, { "epoch": 2.317802397935954, "grad_norm": 0.7114104628562927, "learning_rate": 5.366950182260024e-05, "loss": 0.8866, "step": 7636 }, { "epoch": 2.318105934132645, "grad_norm": 0.7306579947471619, "learning_rate": 5.3663426488456866e-05, "loss": 1.5352, "step": 7637 }, { "epoch": 2.3184094703293368, "grad_norm": 0.8664054274559021, "learning_rate": 5.3657351154313484e-05, "loss": 1.3496, "step": 7638 }, { "epoch": 2.3187130065260284, "grad_norm": 0.6200141906738281, "learning_rate": 5.365127582017011e-05, "loss": 1.0732, "step": 7639 }, { "epoch": 2.3190165427227196, "grad_norm": 0.7229865789413452, "learning_rate": 5.364520048602674e-05, "loss": 1.1097, "step": 7640 }, { "epoch": 2.3193200789194113, "grad_norm": 0.6948032975196838, "learning_rate": 5.363912515188335e-05, "loss": 1.6897, "step": 7641 }, { "epoch": 2.3196236151161025, "grad_norm": 0.606377363204956, "learning_rate": 5.363304981773998e-05, "loss": 0.9409, "step": 7642 }, { "epoch": 2.319927151312794, "grad_norm": 0.7344121932983398, "learning_rate": 5.3626974483596604e-05, "loss": 1.0816, "step": 7643 }, { "epoch": 2.3202306875094854, "grad_norm": 0.7421247363090515, "learning_rate": 5.362089914945322e-05, "loss": 1.3433, "step": 7644 }, { "epoch": 2.320534223706177, "grad_norm": 0.7186826467514038, "learning_rate": 5.3614823815309846e-05, "loss": 1.5924, "step": 7645 }, { "epoch": 2.3208377599028682, "grad_norm": 0.7481942176818848, "learning_rate": 5.360874848116647e-05, "loss": 1.2185, "step": 7646 }, { "epoch": 2.32114129609956, "grad_norm": 0.7925760746002197, "learning_rate": 5.360267314702309e-05, "loss": 1.5318, "step": 7647 }, { "epoch": 2.321444832296251, "grad_norm": 0.8228962421417236, "learning_rate": 5.359659781287971e-05, "loss": 0.8963, "step": 7648 }, { "epoch": 2.3217483684929427, "grad_norm": 0.8172785043716431, "learning_rate": 5.3590522478736335e-05, "loss": 1.1493, "step": 7649 }, { "epoch": 2.3220519046896344, "grad_norm": 0.8325759172439575, "learning_rate": 5.358444714459295e-05, "loss": 1.4674, "step": 7650 }, { "epoch": 2.3223554408863256, "grad_norm": 0.8055946826934814, "learning_rate": 5.3578371810449576e-05, "loss": 1.3843, "step": 7651 }, { "epoch": 2.3226589770830173, "grad_norm": 0.7123836874961853, "learning_rate": 5.3572296476306194e-05, "loss": 1.1121, "step": 7652 }, { "epoch": 2.3229625132797085, "grad_norm": 0.7075245380401611, "learning_rate": 5.356622114216282e-05, "loss": 1.6399, "step": 7653 }, { "epoch": 2.3232660494764, "grad_norm": 0.8346471190452576, "learning_rate": 5.356014580801945e-05, "loss": 0.8726, "step": 7654 }, { "epoch": 2.3235695856730914, "grad_norm": 0.8881227374076843, "learning_rate": 5.355407047387606e-05, "loss": 0.9226, "step": 7655 }, { "epoch": 2.323873121869783, "grad_norm": 0.8708653450012207, "learning_rate": 5.354799513973269e-05, "loss": 1.2796, "step": 7656 }, { "epoch": 2.3241766580664747, "grad_norm": 0.7582796216011047, "learning_rate": 5.3541919805589314e-05, "loss": 1.3877, "step": 7657 }, { "epoch": 2.324480194263166, "grad_norm": 0.6816633343696594, "learning_rate": 5.353584447144593e-05, "loss": 0.6834, "step": 7658 }, { "epoch": 2.324783730459857, "grad_norm": 0.4883376359939575, "learning_rate": 5.3529769137302556e-05, "loss": 1.2223, "step": 7659 }, { "epoch": 2.3250872666565487, "grad_norm": 0.6481773853302002, "learning_rate": 5.352369380315918e-05, "loss": 1.0884, "step": 7660 }, { "epoch": 2.3253908028532404, "grad_norm": 0.7207589149475098, "learning_rate": 5.35176184690158e-05, "loss": 1.7161, "step": 7661 }, { "epoch": 2.3256943390499316, "grad_norm": 0.8019577860832214, "learning_rate": 5.351154313487242e-05, "loss": 1.5573, "step": 7662 }, { "epoch": 2.3259978752466233, "grad_norm": 0.7258016467094421, "learning_rate": 5.3505467800729045e-05, "loss": 0.9274, "step": 7663 }, { "epoch": 2.3263014114433145, "grad_norm": 0.7334061861038208, "learning_rate": 5.349939246658566e-05, "loss": 1.0776, "step": 7664 }, { "epoch": 2.326604947640006, "grad_norm": 0.78205806016922, "learning_rate": 5.3493317132442287e-05, "loss": 1.2147, "step": 7665 }, { "epoch": 2.3269084838366974, "grad_norm": 0.8690736889839172, "learning_rate": 5.3487241798298904e-05, "loss": 1.0358, "step": 7666 }, { "epoch": 2.327212020033389, "grad_norm": 0.5215451121330261, "learning_rate": 5.348116646415553e-05, "loss": 0.5091, "step": 7667 }, { "epoch": 2.3275155562300807, "grad_norm": 0.7433094382286072, "learning_rate": 5.347509113001216e-05, "loss": 1.1312, "step": 7668 }, { "epoch": 2.327819092426772, "grad_norm": 0.8542289137840271, "learning_rate": 5.346901579586877e-05, "loss": 1.4203, "step": 7669 }, { "epoch": 2.3281226286234635, "grad_norm": 0.7717257738113403, "learning_rate": 5.34629404617254e-05, "loss": 0.7082, "step": 7670 }, { "epoch": 2.3284261648201547, "grad_norm": 0.8508163690567017, "learning_rate": 5.3456865127582024e-05, "loss": 1.1675, "step": 7671 }, { "epoch": 2.3287297010168464, "grad_norm": 0.7925937175750732, "learning_rate": 5.3450789793438635e-05, "loss": 1.2884, "step": 7672 }, { "epoch": 2.3290332372135376, "grad_norm": 0.8074536919593811, "learning_rate": 5.3444714459295266e-05, "loss": 1.417, "step": 7673 }, { "epoch": 2.3293367734102293, "grad_norm": 0.718932569026947, "learning_rate": 5.343863912515189e-05, "loss": 1.4604, "step": 7674 }, { "epoch": 2.3296403096069205, "grad_norm": 0.8349537253379822, "learning_rate": 5.343256379100851e-05, "loss": 1.5235, "step": 7675 }, { "epoch": 2.329943845803612, "grad_norm": 0.8295914530754089, "learning_rate": 5.342648845686513e-05, "loss": 1.4084, "step": 7676 }, { "epoch": 2.3302473820003033, "grad_norm": 0.6685676574707031, "learning_rate": 5.3420413122721755e-05, "loss": 1.0359, "step": 7677 }, { "epoch": 2.330550918196995, "grad_norm": 0.8455519080162048, "learning_rate": 5.341433778857837e-05, "loss": 1.1332, "step": 7678 }, { "epoch": 2.3308544543936867, "grad_norm": 0.8389970064163208, "learning_rate": 5.3408262454434997e-05, "loss": 1.6788, "step": 7679 }, { "epoch": 2.331157990590378, "grad_norm": 0.7237008213996887, "learning_rate": 5.3402187120291614e-05, "loss": 0.9352, "step": 7680 }, { "epoch": 2.3314615267870695, "grad_norm": 0.7687880992889404, "learning_rate": 5.339611178614824e-05, "loss": 1.2931, "step": 7681 }, { "epoch": 2.3317650629837607, "grad_norm": 0.7523514628410339, "learning_rate": 5.339003645200487e-05, "loss": 1.5411, "step": 7682 }, { "epoch": 2.3320685991804524, "grad_norm": 0.7142686247825623, "learning_rate": 5.338396111786148e-05, "loss": 0.8408, "step": 7683 }, { "epoch": 2.3323721353771436, "grad_norm": 0.6777017712593079, "learning_rate": 5.3377885783718103e-05, "loss": 0.9626, "step": 7684 }, { "epoch": 2.3326756715738353, "grad_norm": 0.5809240937232971, "learning_rate": 5.3371810449574734e-05, "loss": 1.1718, "step": 7685 }, { "epoch": 2.3329792077705265, "grad_norm": 0.5568855404853821, "learning_rate": 5.3365735115431345e-05, "loss": 1.4329, "step": 7686 }, { "epoch": 2.333282743967218, "grad_norm": 0.7619733214378357, "learning_rate": 5.3359659781287976e-05, "loss": 1.5341, "step": 7687 }, { "epoch": 2.3335862801639093, "grad_norm": 0.8271357417106628, "learning_rate": 5.33535844471446e-05, "loss": 1.1607, "step": 7688 }, { "epoch": 2.333889816360601, "grad_norm": 0.7103882431983948, "learning_rate": 5.334750911300122e-05, "loss": 1.465, "step": 7689 }, { "epoch": 2.3341933525572927, "grad_norm": 0.9172919988632202, "learning_rate": 5.334143377885784e-05, "loss": 1.228, "step": 7690 }, { "epoch": 2.334496888753984, "grad_norm": 0.8360595107078552, "learning_rate": 5.3335358444714465e-05, "loss": 1.4144, "step": 7691 }, { "epoch": 2.3348004249506755, "grad_norm": 0.9780839085578918, "learning_rate": 5.332928311057108e-05, "loss": 1.2262, "step": 7692 }, { "epoch": 2.3351039611473667, "grad_norm": 0.6523328423500061, "learning_rate": 5.332320777642771e-05, "loss": 1.3723, "step": 7693 }, { "epoch": 2.3354074973440584, "grad_norm": 0.7507632374763489, "learning_rate": 5.3317132442284324e-05, "loss": 1.7642, "step": 7694 }, { "epoch": 2.3357110335407496, "grad_norm": 0.689064085483551, "learning_rate": 5.331105710814095e-05, "loss": 1.052, "step": 7695 }, { "epoch": 2.3360145697374413, "grad_norm": 0.7555691003799438, "learning_rate": 5.330498177399757e-05, "loss": 1.2013, "step": 7696 }, { "epoch": 2.3363181059341325, "grad_norm": 0.7709565758705139, "learning_rate": 5.329890643985419e-05, "loss": 1.2231, "step": 7697 }, { "epoch": 2.336621642130824, "grad_norm": 0.6575965285301208, "learning_rate": 5.3292831105710814e-05, "loss": 1.5299, "step": 7698 }, { "epoch": 2.3369251783275153, "grad_norm": 0.8596627116203308, "learning_rate": 5.3286755771567444e-05, "loss": 1.3291, "step": 7699 }, { "epoch": 2.337228714524207, "grad_norm": 0.9520817399024963, "learning_rate": 5.3280680437424055e-05, "loss": 0.8895, "step": 7700 }, { "epoch": 2.3375322507208987, "grad_norm": 0.7376377582550049, "learning_rate": 5.3274605103280686e-05, "loss": 0.7775, "step": 7701 }, { "epoch": 2.33783578691759, "grad_norm": 0.8260226845741272, "learning_rate": 5.326852976913731e-05, "loss": 1.1357, "step": 7702 }, { "epoch": 2.3381393231142815, "grad_norm": 0.9278427958488464, "learning_rate": 5.326245443499393e-05, "loss": 1.65, "step": 7703 }, { "epoch": 2.3384428593109727, "grad_norm": 0.8064718246459961, "learning_rate": 5.325637910085055e-05, "loss": 0.7711, "step": 7704 }, { "epoch": 2.3387463955076644, "grad_norm": 0.7880445122718811, "learning_rate": 5.3250303766707175e-05, "loss": 1.4331, "step": 7705 }, { "epoch": 2.3390499317043556, "grad_norm": 0.6582452058792114, "learning_rate": 5.324422843256379e-05, "loss": 1.7935, "step": 7706 }, { "epoch": 2.3393534679010473, "grad_norm": 1.155536413192749, "learning_rate": 5.323815309842042e-05, "loss": 0.5777, "step": 7707 }, { "epoch": 2.3396570040977385, "grad_norm": 0.7595350742340088, "learning_rate": 5.3232077764277034e-05, "loss": 1.1607, "step": 7708 }, { "epoch": 2.33996054029443, "grad_norm": 0.6159242391586304, "learning_rate": 5.322600243013366e-05, "loss": 1.3568, "step": 7709 }, { "epoch": 2.3402640764911213, "grad_norm": 0.7928234934806824, "learning_rate": 5.321992709599028e-05, "loss": 1.3948, "step": 7710 }, { "epoch": 2.340567612687813, "grad_norm": 0.691699206829071, "learning_rate": 5.32138517618469e-05, "loss": 1.4697, "step": 7711 }, { "epoch": 2.3408711488845046, "grad_norm": 0.5798041224479675, "learning_rate": 5.3207776427703524e-05, "loss": 1.0516, "step": 7712 }, { "epoch": 2.341174685081196, "grad_norm": 0.7047245502471924, "learning_rate": 5.3201701093560154e-05, "loss": 1.2814, "step": 7713 }, { "epoch": 2.3414782212778875, "grad_norm": 0.7146697044372559, "learning_rate": 5.3195625759416765e-05, "loss": 1.3333, "step": 7714 }, { "epoch": 2.3417817574745787, "grad_norm": 0.8074240684509277, "learning_rate": 5.3189550425273396e-05, "loss": 1.0494, "step": 7715 }, { "epoch": 2.3420852936712704, "grad_norm": 0.8097215294837952, "learning_rate": 5.318347509113002e-05, "loss": 1.032, "step": 7716 }, { "epoch": 2.3423888298679616, "grad_norm": 0.6857717633247375, "learning_rate": 5.317739975698664e-05, "loss": 1.7386, "step": 7717 }, { "epoch": 2.3426923660646533, "grad_norm": 0.9278475642204285, "learning_rate": 5.317132442284326e-05, "loss": 1.0966, "step": 7718 }, { "epoch": 2.342995902261345, "grad_norm": 0.6738936305046082, "learning_rate": 5.316524908869988e-05, "loss": 1.1826, "step": 7719 }, { "epoch": 2.343299438458036, "grad_norm": 0.839474618434906, "learning_rate": 5.31591737545565e-05, "loss": 1.6056, "step": 7720 }, { "epoch": 2.3436029746547273, "grad_norm": 1.0106525421142578, "learning_rate": 5.315309842041313e-05, "loss": 1.1876, "step": 7721 }, { "epoch": 2.343906510851419, "grad_norm": 1.049715518951416, "learning_rate": 5.3147023086269744e-05, "loss": 1.3087, "step": 7722 }, { "epoch": 2.3442100470481106, "grad_norm": 0.8846147060394287, "learning_rate": 5.314094775212637e-05, "loss": 1.2788, "step": 7723 }, { "epoch": 2.344513583244802, "grad_norm": 0.6935809850692749, "learning_rate": 5.313487241798299e-05, "loss": 1.0473, "step": 7724 }, { "epoch": 2.3448171194414935, "grad_norm": 0.5639526844024658, "learning_rate": 5.312879708383961e-05, "loss": 1.1472, "step": 7725 }, { "epoch": 2.3451206556381847, "grad_norm": 0.64291912317276, "learning_rate": 5.3122721749696234e-05, "loss": 1.5788, "step": 7726 }, { "epoch": 2.3454241918348764, "grad_norm": 0.7888056039810181, "learning_rate": 5.3116646415552864e-05, "loss": 0.9166, "step": 7727 }, { "epoch": 2.3457277280315676, "grad_norm": 0.731212317943573, "learning_rate": 5.3110571081409475e-05, "loss": 1.5907, "step": 7728 }, { "epoch": 2.3460312642282592, "grad_norm": 0.662165105342865, "learning_rate": 5.3104495747266106e-05, "loss": 1.39, "step": 7729 }, { "epoch": 2.346334800424951, "grad_norm": 0.999442458152771, "learning_rate": 5.309842041312273e-05, "loss": 1.0353, "step": 7730 }, { "epoch": 2.346638336621642, "grad_norm": 0.5597583651542664, "learning_rate": 5.309234507897935e-05, "loss": 1.1838, "step": 7731 }, { "epoch": 2.3469418728183338, "grad_norm": 0.6151300668716431, "learning_rate": 5.308626974483597e-05, "loss": 1.5594, "step": 7732 }, { "epoch": 2.347245409015025, "grad_norm": 0.6515387892723083, "learning_rate": 5.308019441069259e-05, "loss": 1.1982, "step": 7733 }, { "epoch": 2.3475489452117166, "grad_norm": 0.7352016568183899, "learning_rate": 5.307411907654921e-05, "loss": 1.3948, "step": 7734 }, { "epoch": 2.347852481408408, "grad_norm": 0.7883623838424683, "learning_rate": 5.306804374240584e-05, "loss": 1.3335, "step": 7735 }, { "epoch": 2.3481560176050995, "grad_norm": 0.5472824573516846, "learning_rate": 5.3061968408262454e-05, "loss": 0.9117, "step": 7736 }, { "epoch": 2.3484595538017907, "grad_norm": 0.8789010047912598, "learning_rate": 5.305589307411908e-05, "loss": 0.6223, "step": 7737 }, { "epoch": 2.3487630899984824, "grad_norm": 0.8505709171295166, "learning_rate": 5.30498177399757e-05, "loss": 1.0333, "step": 7738 }, { "epoch": 2.3490666261951736, "grad_norm": 0.7751732468605042, "learning_rate": 5.304374240583232e-05, "loss": 1.3989, "step": 7739 }, { "epoch": 2.3493701623918652, "grad_norm": 0.8960305452346802, "learning_rate": 5.3037667071688944e-05, "loss": 1.1284, "step": 7740 }, { "epoch": 2.349673698588557, "grad_norm": 0.7729844450950623, "learning_rate": 5.3031591737545575e-05, "loss": 1.4161, "step": 7741 }, { "epoch": 2.349977234785248, "grad_norm": 0.7679091095924377, "learning_rate": 5.3025516403402185e-05, "loss": 1.2617, "step": 7742 }, { "epoch": 2.3502807709819398, "grad_norm": 0.7796841263771057, "learning_rate": 5.3019441069258816e-05, "loss": 1.5505, "step": 7743 }, { "epoch": 2.350584307178631, "grad_norm": 0.8468260765075684, "learning_rate": 5.301336573511544e-05, "loss": 1.483, "step": 7744 }, { "epoch": 2.3508878433753226, "grad_norm": 0.6661270260810852, "learning_rate": 5.300729040097205e-05, "loss": 1.6524, "step": 7745 }, { "epoch": 2.351191379572014, "grad_norm": 0.9717702865600586, "learning_rate": 5.300121506682868e-05, "loss": 0.8439, "step": 7746 }, { "epoch": 2.3514949157687055, "grad_norm": 0.7468115091323853, "learning_rate": 5.299513973268529e-05, "loss": 1.3385, "step": 7747 }, { "epoch": 2.3517984519653967, "grad_norm": 0.8282458186149597, "learning_rate": 5.298906439854192e-05, "loss": 1.4013, "step": 7748 }, { "epoch": 2.3521019881620884, "grad_norm": 0.7290198802947998, "learning_rate": 5.298298906439855e-05, "loss": 0.9782, "step": 7749 }, { "epoch": 2.3524055243587796, "grad_norm": 0.834172785282135, "learning_rate": 5.2976913730255164e-05, "loss": 1.2091, "step": 7750 }, { "epoch": 2.3527090605554712, "grad_norm": 0.5838407278060913, "learning_rate": 5.297083839611179e-05, "loss": 1.0893, "step": 7751 }, { "epoch": 2.353012596752163, "grad_norm": 0.6311367750167847, "learning_rate": 5.296476306196841e-05, "loss": 1.4858, "step": 7752 }, { "epoch": 2.353316132948854, "grad_norm": 0.7519064545631409, "learning_rate": 5.295868772782503e-05, "loss": 1.3179, "step": 7753 }, { "epoch": 2.3536196691455458, "grad_norm": 0.6211289763450623, "learning_rate": 5.2952612393681654e-05, "loss": 1.4659, "step": 7754 }, { "epoch": 2.353923205342237, "grad_norm": 0.6141878962516785, "learning_rate": 5.2946537059538285e-05, "loss": 1.5953, "step": 7755 }, { "epoch": 2.3542267415389286, "grad_norm": 0.6442515850067139, "learning_rate": 5.2940461725394895e-05, "loss": 1.5598, "step": 7756 }, { "epoch": 2.35453027773562, "grad_norm": 0.7627711296081543, "learning_rate": 5.293438639125152e-05, "loss": 1.2289, "step": 7757 }, { "epoch": 2.3548338139323115, "grad_norm": 0.7751161456108093, "learning_rate": 5.292831105710815e-05, "loss": 1.0708, "step": 7758 }, { "epoch": 2.3551373501290027, "grad_norm": 0.8048432469367981, "learning_rate": 5.292223572296476e-05, "loss": 0.9133, "step": 7759 }, { "epoch": 2.3554408863256944, "grad_norm": 0.6209495067596436, "learning_rate": 5.291616038882139e-05, "loss": 1.5231, "step": 7760 }, { "epoch": 2.3557444225223856, "grad_norm": 0.7496862411499023, "learning_rate": 5.2910085054678e-05, "loss": 1.0688, "step": 7761 }, { "epoch": 2.3560479587190772, "grad_norm": 0.8569952845573425, "learning_rate": 5.290400972053463e-05, "loss": 1.1458, "step": 7762 }, { "epoch": 2.356351494915769, "grad_norm": 0.7132030725479126, "learning_rate": 5.289793438639126e-05, "loss": 1.3457, "step": 7763 }, { "epoch": 2.35665503111246, "grad_norm": 0.690146267414093, "learning_rate": 5.2891859052247874e-05, "loss": 1.5885, "step": 7764 }, { "epoch": 2.3569585673091518, "grad_norm": 0.8199977278709412, "learning_rate": 5.28857837181045e-05, "loss": 1.5518, "step": 7765 }, { "epoch": 2.357262103505843, "grad_norm": 0.9800877571105957, "learning_rate": 5.287970838396112e-05, "loss": 1.1599, "step": 7766 }, { "epoch": 2.3575656397025346, "grad_norm": 0.6656652688980103, "learning_rate": 5.287363304981774e-05, "loss": 1.2197, "step": 7767 }, { "epoch": 2.357869175899226, "grad_norm": 0.7522599101066589, "learning_rate": 5.2867557715674364e-05, "loss": 1.3696, "step": 7768 }, { "epoch": 2.3581727120959175, "grad_norm": 0.8206160068511963, "learning_rate": 5.286148238153099e-05, "loss": 1.4364, "step": 7769 }, { "epoch": 2.3584762482926087, "grad_norm": 0.713617742061615, "learning_rate": 5.2855407047387605e-05, "loss": 1.08, "step": 7770 }, { "epoch": 2.3587797844893004, "grad_norm": 0.9411084055900574, "learning_rate": 5.284933171324423e-05, "loss": 1.2305, "step": 7771 }, { "epoch": 2.3590833206859916, "grad_norm": 0.5938416123390198, "learning_rate": 5.284325637910086e-05, "loss": 0.7511, "step": 7772 }, { "epoch": 2.3593868568826832, "grad_norm": 0.6943349242210388, "learning_rate": 5.283718104495747e-05, "loss": 1.6483, "step": 7773 }, { "epoch": 2.359690393079375, "grad_norm": 0.7166835069656372, "learning_rate": 5.28311057108141e-05, "loss": 1.456, "step": 7774 }, { "epoch": 2.359993929276066, "grad_norm": 1.1558526754379272, "learning_rate": 5.282503037667071e-05, "loss": 0.8777, "step": 7775 }, { "epoch": 2.3602974654727578, "grad_norm": 0.7373621463775635, "learning_rate": 5.281895504252734e-05, "loss": 1.5, "step": 7776 }, { "epoch": 2.360601001669449, "grad_norm": 0.5992624163627625, "learning_rate": 5.281287970838397e-05, "loss": 1.6233, "step": 7777 }, { "epoch": 2.3609045378661406, "grad_norm": 0.8751627802848816, "learning_rate": 5.2806804374240584e-05, "loss": 1.3434, "step": 7778 }, { "epoch": 2.361208074062832, "grad_norm": 0.6332023739814758, "learning_rate": 5.280072904009721e-05, "loss": 1.2038, "step": 7779 }, { "epoch": 2.3615116102595235, "grad_norm": 0.6409838199615479, "learning_rate": 5.279465370595383e-05, "loss": 1.7465, "step": 7780 }, { "epoch": 2.361815146456215, "grad_norm": 0.821772575378418, "learning_rate": 5.278857837181045e-05, "loss": 1.5604, "step": 7781 }, { "epoch": 2.3621186826529064, "grad_norm": 0.9229840040206909, "learning_rate": 5.2782503037667074e-05, "loss": 1.2961, "step": 7782 }, { "epoch": 2.3624222188495976, "grad_norm": 0.6327805519104004, "learning_rate": 5.27764277035237e-05, "loss": 1.3647, "step": 7783 }, { "epoch": 2.3627257550462892, "grad_norm": 0.6412098407745361, "learning_rate": 5.2770352369380315e-05, "loss": 1.5738, "step": 7784 }, { "epoch": 2.363029291242981, "grad_norm": 0.6698938012123108, "learning_rate": 5.276427703523694e-05, "loss": 0.809, "step": 7785 }, { "epoch": 2.363332827439672, "grad_norm": 0.6202579140663147, "learning_rate": 5.275820170109357e-05, "loss": 1.4661, "step": 7786 }, { "epoch": 2.3636363636363638, "grad_norm": 0.7623347043991089, "learning_rate": 5.275212636695018e-05, "loss": 1.4255, "step": 7787 }, { "epoch": 2.363939899833055, "grad_norm": 0.7024716138839722, "learning_rate": 5.274605103280681e-05, "loss": 1.6767, "step": 7788 }, { "epoch": 2.3642434360297466, "grad_norm": 0.7999787330627441, "learning_rate": 5.273997569866342e-05, "loss": 1.2606, "step": 7789 }, { "epoch": 2.364546972226438, "grad_norm": 0.7593993544578552, "learning_rate": 5.273390036452005e-05, "loss": 1.3622, "step": 7790 }, { "epoch": 2.3648505084231295, "grad_norm": 0.7108789682388306, "learning_rate": 5.272782503037668e-05, "loss": 1.351, "step": 7791 }, { "epoch": 2.365154044619821, "grad_norm": 0.8540698885917664, "learning_rate": 5.2721749696233294e-05, "loss": 1.7725, "step": 7792 }, { "epoch": 2.3654575808165124, "grad_norm": 0.8625686168670654, "learning_rate": 5.271567436208992e-05, "loss": 1.1768, "step": 7793 }, { "epoch": 2.365761117013204, "grad_norm": 0.891380250453949, "learning_rate": 5.270959902794654e-05, "loss": 1.2084, "step": 7794 }, { "epoch": 2.3660646532098952, "grad_norm": 0.6972417831420898, "learning_rate": 5.270352369380316e-05, "loss": 1.1735, "step": 7795 }, { "epoch": 2.366368189406587, "grad_norm": 0.9224116802215576, "learning_rate": 5.2697448359659784e-05, "loss": 1.3848, "step": 7796 }, { "epoch": 2.366671725603278, "grad_norm": 0.7985848188400269, "learning_rate": 5.269137302551641e-05, "loss": 0.8748, "step": 7797 }, { "epoch": 2.3669752617999698, "grad_norm": 0.7604532837867737, "learning_rate": 5.2685297691373025e-05, "loss": 1.2969, "step": 7798 }, { "epoch": 2.367278797996661, "grad_norm": 0.8171728253364563, "learning_rate": 5.267922235722965e-05, "loss": 1.3355, "step": 7799 }, { "epoch": 2.3675823341933526, "grad_norm": 0.6600670218467712, "learning_rate": 5.267314702308628e-05, "loss": 1.115, "step": 7800 }, { "epoch": 2.367885870390044, "grad_norm": 0.7643181085586548, "learning_rate": 5.266707168894289e-05, "loss": 1.0573, "step": 7801 }, { "epoch": 2.3681894065867355, "grad_norm": 0.6425821781158447, "learning_rate": 5.266099635479952e-05, "loss": 1.649, "step": 7802 }, { "epoch": 2.368492942783427, "grad_norm": 0.7698783278465271, "learning_rate": 5.265492102065613e-05, "loss": 1.1364, "step": 7803 }, { "epoch": 2.3687964789801184, "grad_norm": 0.8478207588195801, "learning_rate": 5.264884568651276e-05, "loss": 1.272, "step": 7804 }, { "epoch": 2.36910001517681, "grad_norm": 0.6805600523948669, "learning_rate": 5.264277035236939e-05, "loss": 0.9952, "step": 7805 }, { "epoch": 2.3694035513735012, "grad_norm": 0.6795867681503296, "learning_rate": 5.2636695018226004e-05, "loss": 1.4623, "step": 7806 }, { "epoch": 2.369707087570193, "grad_norm": 0.6063035726547241, "learning_rate": 5.263061968408263e-05, "loss": 1.2589, "step": 7807 }, { "epoch": 2.370010623766884, "grad_norm": 0.7077288627624512, "learning_rate": 5.262454434993925e-05, "loss": 1.4941, "step": 7808 }, { "epoch": 2.3703141599635758, "grad_norm": 0.7622582912445068, "learning_rate": 5.261846901579587e-05, "loss": 1.2967, "step": 7809 }, { "epoch": 2.370617696160267, "grad_norm": 0.8287667036056519, "learning_rate": 5.2612393681652494e-05, "loss": 1.537, "step": 7810 }, { "epoch": 2.3709212323569586, "grad_norm": 0.6999541521072388, "learning_rate": 5.260631834750912e-05, "loss": 1.3958, "step": 7811 }, { "epoch": 2.37122476855365, "grad_norm": 1.1540329456329346, "learning_rate": 5.2600243013365735e-05, "loss": 0.9358, "step": 7812 }, { "epoch": 2.3715283047503415, "grad_norm": 0.9087563157081604, "learning_rate": 5.259416767922236e-05, "loss": 1.2583, "step": 7813 }, { "epoch": 2.371831840947033, "grad_norm": 0.825188159942627, "learning_rate": 5.258809234507898e-05, "loss": 1.351, "step": 7814 }, { "epoch": 2.3721353771437244, "grad_norm": 0.7714551687240601, "learning_rate": 5.25820170109356e-05, "loss": 1.3726, "step": 7815 }, { "epoch": 2.372438913340416, "grad_norm": 0.782400906085968, "learning_rate": 5.257594167679223e-05, "loss": 0.9385, "step": 7816 }, { "epoch": 2.3727424495371072, "grad_norm": 0.6738986968994141, "learning_rate": 5.256986634264884e-05, "loss": 1.0588, "step": 7817 }, { "epoch": 2.373045985733799, "grad_norm": 0.7600053548812866, "learning_rate": 5.256379100850547e-05, "loss": 1.3494, "step": 7818 }, { "epoch": 2.37334952193049, "grad_norm": 0.6999025344848633, "learning_rate": 5.25577156743621e-05, "loss": 1.6085, "step": 7819 }, { "epoch": 2.3736530581271817, "grad_norm": 0.8892236351966858, "learning_rate": 5.255164034021871e-05, "loss": 1.4086, "step": 7820 }, { "epoch": 2.373956594323873, "grad_norm": 0.794494092464447, "learning_rate": 5.254556500607534e-05, "loss": 1.6947, "step": 7821 }, { "epoch": 2.3742601305205646, "grad_norm": 0.630824625492096, "learning_rate": 5.253948967193196e-05, "loss": 1.7344, "step": 7822 }, { "epoch": 2.374563666717256, "grad_norm": 0.7141704559326172, "learning_rate": 5.253341433778858e-05, "loss": 1.5292, "step": 7823 }, { "epoch": 2.3748672029139475, "grad_norm": 0.7559216022491455, "learning_rate": 5.2527339003645204e-05, "loss": 1.3356, "step": 7824 }, { "epoch": 2.375170739110639, "grad_norm": 0.6533464789390564, "learning_rate": 5.252126366950183e-05, "loss": 1.5769, "step": 7825 }, { "epoch": 2.3754742753073304, "grad_norm": 0.7361800670623779, "learning_rate": 5.2515188335358445e-05, "loss": 0.7596, "step": 7826 }, { "epoch": 2.375777811504022, "grad_norm": 0.5638136863708496, "learning_rate": 5.250911300121507e-05, "loss": 1.6617, "step": 7827 }, { "epoch": 2.376081347700713, "grad_norm": 0.5785853266716003, "learning_rate": 5.250303766707169e-05, "loss": 0.6196, "step": 7828 }, { "epoch": 2.376384883897405, "grad_norm": 0.8371119499206543, "learning_rate": 5.249696233292831e-05, "loss": 1.2451, "step": 7829 }, { "epoch": 2.376688420094096, "grad_norm": 0.8846685886383057, "learning_rate": 5.249088699878494e-05, "loss": 1.3992, "step": 7830 }, { "epoch": 2.3769919562907877, "grad_norm": 0.8627526164054871, "learning_rate": 5.248481166464155e-05, "loss": 0.8391, "step": 7831 }, { "epoch": 2.377295492487479, "grad_norm": 0.7415776252746582, "learning_rate": 5.2478736330498176e-05, "loss": 1.3816, "step": 7832 }, { "epoch": 2.3775990286841706, "grad_norm": 0.6754639148712158, "learning_rate": 5.247266099635481e-05, "loss": 1.4647, "step": 7833 }, { "epoch": 2.377902564880862, "grad_norm": 1.0893198251724243, "learning_rate": 5.246658566221142e-05, "loss": 1.351, "step": 7834 }, { "epoch": 2.3782061010775535, "grad_norm": 0.6966081857681274, "learning_rate": 5.246051032806805e-05, "loss": 1.4548, "step": 7835 }, { "epoch": 2.378509637274245, "grad_norm": 0.7975082993507385, "learning_rate": 5.245443499392467e-05, "loss": 1.7406, "step": 7836 }, { "epoch": 2.3788131734709363, "grad_norm": 0.8312361240386963, "learning_rate": 5.244835965978129e-05, "loss": 1.3375, "step": 7837 }, { "epoch": 2.379116709667628, "grad_norm": 0.6477303504943848, "learning_rate": 5.2442284325637914e-05, "loss": 1.2427, "step": 7838 }, { "epoch": 2.379420245864319, "grad_norm": 0.7435750961303711, "learning_rate": 5.243620899149454e-05, "loss": 1.0545, "step": 7839 }, { "epoch": 2.379723782061011, "grad_norm": 0.6959526538848877, "learning_rate": 5.2430133657351155e-05, "loss": 0.8377, "step": 7840 }, { "epoch": 2.380027318257702, "grad_norm": 0.7352555990219116, "learning_rate": 5.242405832320778e-05, "loss": 1.2703, "step": 7841 }, { "epoch": 2.3803308544543937, "grad_norm": 0.8414437174797058, "learning_rate": 5.24179829890644e-05, "loss": 1.1084, "step": 7842 }, { "epoch": 2.380634390651085, "grad_norm": 0.7620357275009155, "learning_rate": 5.241190765492102e-05, "loss": 1.5271, "step": 7843 }, { "epoch": 2.3809379268477766, "grad_norm": 0.6760739088058472, "learning_rate": 5.2405832320777645e-05, "loss": 1.5965, "step": 7844 }, { "epoch": 2.381241463044468, "grad_norm": 0.7874226570129395, "learning_rate": 5.239975698663426e-05, "loss": 1.0306, "step": 7845 }, { "epoch": 2.3815449992411595, "grad_norm": 0.8035131096839905, "learning_rate": 5.2393681652490886e-05, "loss": 1.1358, "step": 7846 }, { "epoch": 2.381848535437851, "grad_norm": 1.040859341621399, "learning_rate": 5.238760631834752e-05, "loss": 1.0935, "step": 7847 }, { "epoch": 2.3821520716345423, "grad_norm": 0.6593133807182312, "learning_rate": 5.238153098420413e-05, "loss": 1.3713, "step": 7848 }, { "epoch": 2.382455607831234, "grad_norm": 0.8799865245819092, "learning_rate": 5.237545565006076e-05, "loss": 1.4511, "step": 7849 }, { "epoch": 2.382759144027925, "grad_norm": 0.8076903820037842, "learning_rate": 5.236938031591738e-05, "loss": 1.5262, "step": 7850 }, { "epoch": 2.383062680224617, "grad_norm": 0.7340336441993713, "learning_rate": 5.2363304981774e-05, "loss": 1.147, "step": 7851 }, { "epoch": 2.383366216421308, "grad_norm": 0.4335605800151825, "learning_rate": 5.2357229647630624e-05, "loss": 0.7803, "step": 7852 }, { "epoch": 2.3836697526179997, "grad_norm": 0.7532500624656677, "learning_rate": 5.235115431348725e-05, "loss": 1.2353, "step": 7853 }, { "epoch": 2.3839732888146914, "grad_norm": 0.8696469068527222, "learning_rate": 5.2345078979343865e-05, "loss": 1.3929, "step": 7854 }, { "epoch": 2.3842768250113826, "grad_norm": 0.8726606369018555, "learning_rate": 5.233900364520049e-05, "loss": 0.9976, "step": 7855 }, { "epoch": 2.384580361208074, "grad_norm": 0.7265575528144836, "learning_rate": 5.233292831105711e-05, "loss": 1.5227, "step": 7856 }, { "epoch": 2.3848838974047655, "grad_norm": 0.7864022850990295, "learning_rate": 5.232685297691373e-05, "loss": 1.3465, "step": 7857 }, { "epoch": 2.385187433601457, "grad_norm": 0.8680126070976257, "learning_rate": 5.2320777642770355e-05, "loss": 1.1205, "step": 7858 }, { "epoch": 2.3854909697981483, "grad_norm": 0.743444561958313, "learning_rate": 5.231470230862697e-05, "loss": 1.3234, "step": 7859 }, { "epoch": 2.38579450599484, "grad_norm": 0.6110712289810181, "learning_rate": 5.2308626974483596e-05, "loss": 1.425, "step": 7860 }, { "epoch": 2.386098042191531, "grad_norm": 0.5643225908279419, "learning_rate": 5.230255164034023e-05, "loss": 1.4562, "step": 7861 }, { "epoch": 2.386401578388223, "grad_norm": 0.8390517830848694, "learning_rate": 5.229647630619684e-05, "loss": 1.0968, "step": 7862 }, { "epoch": 2.386705114584914, "grad_norm": 0.7999104261398315, "learning_rate": 5.229040097205347e-05, "loss": 1.3739, "step": 7863 }, { "epoch": 2.3870086507816057, "grad_norm": 1.0680652856826782, "learning_rate": 5.228432563791009e-05, "loss": 1.6146, "step": 7864 }, { "epoch": 2.3873121869782974, "grad_norm": 0.8049265742301941, "learning_rate": 5.227825030376671e-05, "loss": 1.0325, "step": 7865 }, { "epoch": 2.3876157231749886, "grad_norm": 0.7554400563240051, "learning_rate": 5.2272174969623334e-05, "loss": 1.4346, "step": 7866 }, { "epoch": 2.3879192593716803, "grad_norm": 0.7211205959320068, "learning_rate": 5.226609963547996e-05, "loss": 1.6123, "step": 7867 }, { "epoch": 2.3882227955683715, "grad_norm": 0.7623486518859863, "learning_rate": 5.2260024301336576e-05, "loss": 1.4401, "step": 7868 }, { "epoch": 2.388526331765063, "grad_norm": 0.8081662654876709, "learning_rate": 5.22539489671932e-05, "loss": 1.1524, "step": 7869 }, { "epoch": 2.3888298679617543, "grad_norm": 0.8566677570343018, "learning_rate": 5.224787363304982e-05, "loss": 1.2491, "step": 7870 }, { "epoch": 2.389133404158446, "grad_norm": 0.7269451022148132, "learning_rate": 5.224179829890644e-05, "loss": 1.4801, "step": 7871 }, { "epoch": 2.389436940355137, "grad_norm": 0.7573784589767456, "learning_rate": 5.2235722964763065e-05, "loss": 1.3765, "step": 7872 }, { "epoch": 2.389740476551829, "grad_norm": 1.0910553932189941, "learning_rate": 5.222964763061968e-05, "loss": 1.007, "step": 7873 }, { "epoch": 2.39004401274852, "grad_norm": 0.8029559254646301, "learning_rate": 5.2223572296476306e-05, "loss": 1.6888, "step": 7874 }, { "epoch": 2.3903475489452117, "grad_norm": 0.7115080952644348, "learning_rate": 5.221749696233294e-05, "loss": 1.0651, "step": 7875 }, { "epoch": 2.3906510851419034, "grad_norm": 0.7667282223701477, "learning_rate": 5.221142162818955e-05, "loss": 1.5417, "step": 7876 }, { "epoch": 2.3909546213385946, "grad_norm": 0.7072556018829346, "learning_rate": 5.220534629404618e-05, "loss": 1.6521, "step": 7877 }, { "epoch": 2.3912581575352863, "grad_norm": 0.7661739587783813, "learning_rate": 5.21992709599028e-05, "loss": 1.4645, "step": 7878 }, { "epoch": 2.3915616937319775, "grad_norm": 0.8855286836624146, "learning_rate": 5.219319562575942e-05, "loss": 1.3801, "step": 7879 }, { "epoch": 2.391865229928669, "grad_norm": 0.6786907315254211, "learning_rate": 5.2187120291616044e-05, "loss": 1.5246, "step": 7880 }, { "epoch": 2.3921687661253603, "grad_norm": 0.771236002445221, "learning_rate": 5.218104495747267e-05, "loss": 1.3782, "step": 7881 }, { "epoch": 2.392472302322052, "grad_norm": 0.6905536651611328, "learning_rate": 5.2174969623329286e-05, "loss": 1.4765, "step": 7882 }, { "epoch": 2.392775838518743, "grad_norm": 0.8602134585380554, "learning_rate": 5.216889428918591e-05, "loss": 1.3996, "step": 7883 }, { "epoch": 2.393079374715435, "grad_norm": 0.6106351613998413, "learning_rate": 5.216281895504253e-05, "loss": 1.6691, "step": 7884 }, { "epoch": 2.393382910912126, "grad_norm": 0.6688586473464966, "learning_rate": 5.215674362089915e-05, "loss": 1.1784, "step": 7885 }, { "epoch": 2.3936864471088177, "grad_norm": 0.8014105558395386, "learning_rate": 5.2150668286755775e-05, "loss": 1.3896, "step": 7886 }, { "epoch": 2.3939899833055094, "grad_norm": 0.720625638961792, "learning_rate": 5.214459295261239e-05, "loss": 1.2415, "step": 7887 }, { "epoch": 2.3942935195022006, "grad_norm": 0.7473063468933105, "learning_rate": 5.2138517618469017e-05, "loss": 0.6578, "step": 7888 }, { "epoch": 2.3945970556988923, "grad_norm": 0.6142640709877014, "learning_rate": 5.213244228432565e-05, "loss": 1.1646, "step": 7889 }, { "epoch": 2.3949005918955835, "grad_norm": 0.9030197858810425, "learning_rate": 5.212636695018226e-05, "loss": 1.3181, "step": 7890 }, { "epoch": 2.395204128092275, "grad_norm": 0.715584397315979, "learning_rate": 5.212029161603889e-05, "loss": 1.5966, "step": 7891 }, { "epoch": 2.3955076642889663, "grad_norm": 0.7543222308158875, "learning_rate": 5.211421628189551e-05, "loss": 1.6381, "step": 7892 }, { "epoch": 2.395811200485658, "grad_norm": 0.8002287149429321, "learning_rate": 5.210814094775212e-05, "loss": 1.4268, "step": 7893 }, { "epoch": 2.396114736682349, "grad_norm": 0.5649712085723877, "learning_rate": 5.2102065613608754e-05, "loss": 1.626, "step": 7894 }, { "epoch": 2.396418272879041, "grad_norm": 0.7349185943603516, "learning_rate": 5.2095990279465365e-05, "loss": 1.4225, "step": 7895 }, { "epoch": 2.396721809075732, "grad_norm": 0.8495616912841797, "learning_rate": 5.2089914945321996e-05, "loss": 1.5571, "step": 7896 }, { "epoch": 2.3970253452724237, "grad_norm": 0.7431391477584839, "learning_rate": 5.208383961117862e-05, "loss": 1.2231, "step": 7897 }, { "epoch": 2.3973288814691154, "grad_norm": 0.6172650456428528, "learning_rate": 5.207776427703524e-05, "loss": 1.5317, "step": 7898 }, { "epoch": 2.3976324176658066, "grad_norm": 0.7999909520149231, "learning_rate": 5.207168894289186e-05, "loss": 1.0488, "step": 7899 }, { "epoch": 2.3979359538624982, "grad_norm": 0.6296140551567078, "learning_rate": 5.2065613608748485e-05, "loss": 0.961, "step": 7900 }, { "epoch": 2.3982394900591895, "grad_norm": 0.7614960074424744, "learning_rate": 5.20595382746051e-05, "loss": 1.4641, "step": 7901 }, { "epoch": 2.398543026255881, "grad_norm": 0.6893866658210754, "learning_rate": 5.2053462940461727e-05, "loss": 1.625, "step": 7902 }, { "epoch": 2.3988465624525723, "grad_norm": 0.9098206162452698, "learning_rate": 5.204738760631836e-05, "loss": 1.4526, "step": 7903 }, { "epoch": 2.399150098649264, "grad_norm": 0.8129755258560181, "learning_rate": 5.204131227217497e-05, "loss": 1.4076, "step": 7904 }, { "epoch": 2.399453634845955, "grad_norm": 0.8761286735534668, "learning_rate": 5.203523693803159e-05, "loss": 1.2927, "step": 7905 }, { "epoch": 2.399757171042647, "grad_norm": 0.8439755439758301, "learning_rate": 5.202916160388822e-05, "loss": 1.3755, "step": 7906 }, { "epoch": 2.400060707239338, "grad_norm": 0.7161287665367126, "learning_rate": 5.2023086269744833e-05, "loss": 1.2992, "step": 7907 }, { "epoch": 2.4003642434360297, "grad_norm": 0.7279097437858582, "learning_rate": 5.2017010935601464e-05, "loss": 1.3541, "step": 7908 }, { "epoch": 2.4006677796327214, "grad_norm": 0.8380422592163086, "learning_rate": 5.2010935601458075e-05, "loss": 1.4317, "step": 7909 }, { "epoch": 2.4009713158294126, "grad_norm": 0.680642306804657, "learning_rate": 5.2004860267314706e-05, "loss": 0.9812, "step": 7910 }, { "epoch": 2.4012748520261042, "grad_norm": 0.6640725135803223, "learning_rate": 5.199878493317133e-05, "loss": 0.8245, "step": 7911 }, { "epoch": 2.4015783882227955, "grad_norm": 0.5655060410499573, "learning_rate": 5.199270959902795e-05, "loss": 2.0355, "step": 7912 }, { "epoch": 2.401881924419487, "grad_norm": 0.8419114947319031, "learning_rate": 5.198663426488457e-05, "loss": 0.9828, "step": 7913 }, { "epoch": 2.4021854606161783, "grad_norm": 0.6423788070678711, "learning_rate": 5.1980558930741195e-05, "loss": 1.6292, "step": 7914 }, { "epoch": 2.40248899681287, "grad_norm": 0.8200101256370544, "learning_rate": 5.197448359659781e-05, "loss": 1.3373, "step": 7915 }, { "epoch": 2.4027925330095616, "grad_norm": 0.595747709274292, "learning_rate": 5.1968408262454437e-05, "loss": 1.0328, "step": 7916 }, { "epoch": 2.403096069206253, "grad_norm": 0.5122062563896179, "learning_rate": 5.196233292831106e-05, "loss": 1.1858, "step": 7917 }, { "epoch": 2.403399605402944, "grad_norm": 0.7064041495323181, "learning_rate": 5.195625759416768e-05, "loss": 1.6892, "step": 7918 }, { "epoch": 2.4037031415996357, "grad_norm": 0.7920200228691101, "learning_rate": 5.19501822600243e-05, "loss": 1.367, "step": 7919 }, { "epoch": 2.4040066777963274, "grad_norm": 0.7359050512313843, "learning_rate": 5.194410692588093e-05, "loss": 1.0948, "step": 7920 }, { "epoch": 2.4043102139930186, "grad_norm": 0.9729425311088562, "learning_rate": 5.1938031591737543e-05, "loss": 0.5859, "step": 7921 }, { "epoch": 2.4046137501897102, "grad_norm": 0.8038437962532043, "learning_rate": 5.1931956257594174e-05, "loss": 1.3406, "step": 7922 }, { "epoch": 2.4049172863864015, "grad_norm": 0.8591334819793701, "learning_rate": 5.1925880923450785e-05, "loss": 1.4519, "step": 7923 }, { "epoch": 2.405220822583093, "grad_norm": 0.9573879837989807, "learning_rate": 5.1919805589307416e-05, "loss": 0.9611, "step": 7924 }, { "epoch": 2.4055243587797843, "grad_norm": 0.7993837594985962, "learning_rate": 5.191373025516404e-05, "loss": 1.3623, "step": 7925 }, { "epoch": 2.405827894976476, "grad_norm": 0.8858467936515808, "learning_rate": 5.190765492102066e-05, "loss": 1.5171, "step": 7926 }, { "epoch": 2.4061314311731676, "grad_norm": 0.9811488389968872, "learning_rate": 5.190157958687728e-05, "loss": 1.2935, "step": 7927 }, { "epoch": 2.406434967369859, "grad_norm": 0.6521837115287781, "learning_rate": 5.1895504252733905e-05, "loss": 1.4562, "step": 7928 }, { "epoch": 2.4067385035665505, "grad_norm": 0.9483540058135986, "learning_rate": 5.188942891859052e-05, "loss": 0.8764, "step": 7929 }, { "epoch": 2.4070420397632417, "grad_norm": 0.5991268754005432, "learning_rate": 5.188335358444715e-05, "loss": 1.5877, "step": 7930 }, { "epoch": 2.4073455759599334, "grad_norm": 0.9723407626152039, "learning_rate": 5.187727825030377e-05, "loss": 1.1474, "step": 7931 }, { "epoch": 2.4076491121566246, "grad_norm": 0.8698570132255554, "learning_rate": 5.187120291616039e-05, "loss": 1.5058, "step": 7932 }, { "epoch": 2.4079526483533162, "grad_norm": 0.8418928980827332, "learning_rate": 5.186512758201701e-05, "loss": 1.1602, "step": 7933 }, { "epoch": 2.4082561845500075, "grad_norm": 0.7727174758911133, "learning_rate": 5.185905224787364e-05, "loss": 1.5246, "step": 7934 }, { "epoch": 2.408559720746699, "grad_norm": 0.6789799928665161, "learning_rate": 5.1852976913730254e-05, "loss": 1.3529, "step": 7935 }, { "epoch": 2.4088632569433903, "grad_norm": 0.7298761010169983, "learning_rate": 5.1846901579586884e-05, "loss": 0.882, "step": 7936 }, { "epoch": 2.409166793140082, "grad_norm": 0.7938309907913208, "learning_rate": 5.1840826245443495e-05, "loss": 1.2523, "step": 7937 }, { "epoch": 2.4094703293367736, "grad_norm": 0.8047483563423157, "learning_rate": 5.1834750911300126e-05, "loss": 1.6818, "step": 7938 }, { "epoch": 2.409773865533465, "grad_norm": 0.8456572890281677, "learning_rate": 5.182867557715675e-05, "loss": 1.2316, "step": 7939 }, { "epoch": 2.4100774017301565, "grad_norm": 0.776390552520752, "learning_rate": 5.182260024301337e-05, "loss": 1.1516, "step": 7940 }, { "epoch": 2.4103809379268477, "grad_norm": 0.775544285774231, "learning_rate": 5.181652490886999e-05, "loss": 0.9377, "step": 7941 }, { "epoch": 2.4106844741235394, "grad_norm": 0.7695592641830444, "learning_rate": 5.1810449574726615e-05, "loss": 1.4437, "step": 7942 }, { "epoch": 2.4109880103202306, "grad_norm": 0.928779125213623, "learning_rate": 5.180437424058323e-05, "loss": 0.8951, "step": 7943 }, { "epoch": 2.4112915465169222, "grad_norm": 0.757379949092865, "learning_rate": 5.179829890643986e-05, "loss": 1.3946, "step": 7944 }, { "epoch": 2.4115950827136134, "grad_norm": 0.6092455983161926, "learning_rate": 5.179222357229648e-05, "loss": 1.8418, "step": 7945 }, { "epoch": 2.411898618910305, "grad_norm": 0.7635046243667603, "learning_rate": 5.17861482381531e-05, "loss": 1.5335, "step": 7946 }, { "epoch": 2.4122021551069963, "grad_norm": 0.6753832101821899, "learning_rate": 5.178007290400972e-05, "loss": 0.8429, "step": 7947 }, { "epoch": 2.412505691303688, "grad_norm": 0.737553596496582, "learning_rate": 5.177399756986635e-05, "loss": 1.4511, "step": 7948 }, { "epoch": 2.4128092275003796, "grad_norm": 0.9160948991775513, "learning_rate": 5.1767922235722964e-05, "loss": 1.3461, "step": 7949 }, { "epoch": 2.413112763697071, "grad_norm": 0.7199264168739319, "learning_rate": 5.1761846901579594e-05, "loss": 1.3522, "step": 7950 }, { "epoch": 2.4134162998937625, "grad_norm": 1.0194474458694458, "learning_rate": 5.1755771567436205e-05, "loss": 1.0272, "step": 7951 }, { "epoch": 2.4137198360904537, "grad_norm": 0.6661416888237, "learning_rate": 5.1749696233292836e-05, "loss": 1.5689, "step": 7952 }, { "epoch": 2.4140233722871454, "grad_norm": 0.67854905128479, "learning_rate": 5.174362089914946e-05, "loss": 1.2744, "step": 7953 }, { "epoch": 2.4143269084838366, "grad_norm": 0.7923321723937988, "learning_rate": 5.173754556500607e-05, "loss": 1.1285, "step": 7954 }, { "epoch": 2.4146304446805282, "grad_norm": 0.5897708535194397, "learning_rate": 5.17314702308627e-05, "loss": 1.3658, "step": 7955 }, { "epoch": 2.4149339808772194, "grad_norm": 0.7650224566459656, "learning_rate": 5.1725394896719325e-05, "loss": 1.4415, "step": 7956 }, { "epoch": 2.415237517073911, "grad_norm": 1.342687726020813, "learning_rate": 5.171931956257594e-05, "loss": 1.0132, "step": 7957 }, { "epoch": 2.4155410532706023, "grad_norm": 0.8072525858879089, "learning_rate": 5.171324422843257e-05, "loss": 1.5729, "step": 7958 }, { "epoch": 2.415844589467294, "grad_norm": 0.640731930732727, "learning_rate": 5.170716889428919e-05, "loss": 1.5518, "step": 7959 }, { "epoch": 2.4161481256639856, "grad_norm": 0.718439519405365, "learning_rate": 5.170109356014581e-05, "loss": 1.4649, "step": 7960 }, { "epoch": 2.416451661860677, "grad_norm": 0.5635302662849426, "learning_rate": 5.169501822600243e-05, "loss": 1.2203, "step": 7961 }, { "epoch": 2.4167551980573685, "grad_norm": 0.7853577136993408, "learning_rate": 5.168894289185906e-05, "loss": 0.93, "step": 7962 }, { "epoch": 2.4170587342540597, "grad_norm": 0.9242436289787292, "learning_rate": 5.1682867557715674e-05, "loss": 1.4183, "step": 7963 }, { "epoch": 2.4173622704507514, "grad_norm": 0.679762065410614, "learning_rate": 5.1676792223572304e-05, "loss": 0.8421, "step": 7964 }, { "epoch": 2.4176658066474426, "grad_norm": 0.7319661974906921, "learning_rate": 5.1670716889428915e-05, "loss": 1.4754, "step": 7965 }, { "epoch": 2.4179693428441342, "grad_norm": 0.8363327980041504, "learning_rate": 5.166464155528554e-05, "loss": 1.3688, "step": 7966 }, { "epoch": 2.4182728790408254, "grad_norm": 0.7800347805023193, "learning_rate": 5.165856622114217e-05, "loss": 1.5161, "step": 7967 }, { "epoch": 2.418576415237517, "grad_norm": 0.6953141093254089, "learning_rate": 5.165249088699878e-05, "loss": 1.4556, "step": 7968 }, { "epoch": 2.4188799514342083, "grad_norm": 0.7389452457427979, "learning_rate": 5.164641555285541e-05, "loss": 0.9218, "step": 7969 }, { "epoch": 2.4191834876309, "grad_norm": 0.583696722984314, "learning_rate": 5.1640340218712035e-05, "loss": 0.7905, "step": 7970 }, { "epoch": 2.4194870238275916, "grad_norm": 0.7632086873054504, "learning_rate": 5.163426488456865e-05, "loss": 1.0743, "step": 7971 }, { "epoch": 2.419790560024283, "grad_norm": 0.6496031284332275, "learning_rate": 5.162818955042528e-05, "loss": 1.4744, "step": 7972 }, { "epoch": 2.4200940962209745, "grad_norm": 0.9461754560470581, "learning_rate": 5.16221142162819e-05, "loss": 1.1325, "step": 7973 }, { "epoch": 2.4203976324176657, "grad_norm": 0.7598881721496582, "learning_rate": 5.161603888213852e-05, "loss": 1.4004, "step": 7974 }, { "epoch": 2.4207011686143574, "grad_norm": 0.7698111534118652, "learning_rate": 5.160996354799514e-05, "loss": 1.2941, "step": 7975 }, { "epoch": 2.4210047048110486, "grad_norm": 0.8465235829353333, "learning_rate": 5.160388821385177e-05, "loss": 1.0867, "step": 7976 }, { "epoch": 2.4213082410077402, "grad_norm": 0.8211492896080017, "learning_rate": 5.1597812879708384e-05, "loss": 1.1229, "step": 7977 }, { "epoch": 2.4216117772044314, "grad_norm": 0.7429980039596558, "learning_rate": 5.159173754556501e-05, "loss": 1.2347, "step": 7978 }, { "epoch": 2.421915313401123, "grad_norm": 0.7130765318870544, "learning_rate": 5.1585662211421625e-05, "loss": 1.5815, "step": 7979 }, { "epoch": 2.4222188495978143, "grad_norm": 0.8220276236534119, "learning_rate": 5.157958687727825e-05, "loss": 0.9547, "step": 7980 }, { "epoch": 2.422522385794506, "grad_norm": 0.690679132938385, "learning_rate": 5.157351154313488e-05, "loss": 1.6903, "step": 7981 }, { "epoch": 2.4228259219911976, "grad_norm": 0.5744288563728333, "learning_rate": 5.156743620899149e-05, "loss": 1.3488, "step": 7982 }, { "epoch": 2.423129458187889, "grad_norm": 0.7903429269790649, "learning_rate": 5.156136087484812e-05, "loss": 1.1118, "step": 7983 }, { "epoch": 2.4234329943845805, "grad_norm": 0.7011483311653137, "learning_rate": 5.1555285540704745e-05, "loss": 0.7834, "step": 7984 }, { "epoch": 2.4237365305812717, "grad_norm": 0.7828824520111084, "learning_rate": 5.154921020656136e-05, "loss": 1.1493, "step": 7985 }, { "epoch": 2.4240400667779634, "grad_norm": 0.7918079495429993, "learning_rate": 5.154313487241799e-05, "loss": 1.1872, "step": 7986 }, { "epoch": 2.4243436029746546, "grad_norm": 0.7867520451545715, "learning_rate": 5.153705953827461e-05, "loss": 1.6563, "step": 7987 }, { "epoch": 2.424647139171346, "grad_norm": 0.8857144117355347, "learning_rate": 5.153098420413123e-05, "loss": 0.9088, "step": 7988 }, { "epoch": 2.424950675368038, "grad_norm": 0.9001855254173279, "learning_rate": 5.152490886998785e-05, "loss": 1.4135, "step": 7989 }, { "epoch": 2.425254211564729, "grad_norm": 0.5790506601333618, "learning_rate": 5.151883353584447e-05, "loss": 0.921, "step": 7990 }, { "epoch": 2.4255577477614203, "grad_norm": 0.6665115356445312, "learning_rate": 5.1512758201701094e-05, "loss": 1.0874, "step": 7991 }, { "epoch": 2.425861283958112, "grad_norm": 0.8199989795684814, "learning_rate": 5.150668286755772e-05, "loss": 1.6912, "step": 7992 }, { "epoch": 2.4261648201548036, "grad_norm": 1.0102654695510864, "learning_rate": 5.1500607533414335e-05, "loss": 1.1574, "step": 7993 }, { "epoch": 2.426468356351495, "grad_norm": 1.0226120948791504, "learning_rate": 5.149453219927096e-05, "loss": 1.4441, "step": 7994 }, { "epoch": 2.4267718925481865, "grad_norm": 0.9459152221679688, "learning_rate": 5.148845686512759e-05, "loss": 1.4011, "step": 7995 }, { "epoch": 2.4270754287448777, "grad_norm": 0.6363176107406616, "learning_rate": 5.14823815309842e-05, "loss": 1.1087, "step": 7996 }, { "epoch": 2.4273789649415694, "grad_norm": 0.7099559307098389, "learning_rate": 5.147630619684083e-05, "loss": 1.0363, "step": 7997 }, { "epoch": 2.4276825011382606, "grad_norm": 0.7245994806289673, "learning_rate": 5.1470230862697456e-05, "loss": 1.5554, "step": 7998 }, { "epoch": 2.427986037334952, "grad_norm": 0.6965836882591248, "learning_rate": 5.146415552855407e-05, "loss": 1.094, "step": 7999 }, { "epoch": 2.428289573531644, "grad_norm": 0.6804750561714172, "learning_rate": 5.14580801944107e-05, "loss": 1.1956, "step": 8000 } ], "logging_steps": 1, "max_steps": 16470, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 5.193666367442141e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }