{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.214144786765822, "eval_steps": 500, "global_step": 4000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00030353619669145547, "grad_norm": 9.52797794342041, "learning_rate": 1e-05, "loss": 5.0165, "step": 1 }, { "epoch": 0.0006070723933829109, "grad_norm": 10.161993026733398, "learning_rate": 2e-05, "loss": 4.7408, "step": 2 }, { "epoch": 0.0009106085900743664, "grad_norm": 7.550526142120361, "learning_rate": 3e-05, "loss": 5.0209, "step": 3 }, { "epoch": 0.0012141447867658219, "grad_norm": 5.454105377197266, "learning_rate": 4e-05, "loss": 4.4045, "step": 4 }, { "epoch": 0.0015176809834572772, "grad_norm": 3.715569257736206, "learning_rate": 5e-05, "loss": 4.0617, "step": 5 }, { "epoch": 0.0018212171801487327, "grad_norm": 3.9456210136413574, "learning_rate": 6e-05, "loss": 3.813, "step": 6 }, { "epoch": 0.002124753376840188, "grad_norm": 3.6386630535125732, "learning_rate": 7e-05, "loss": 3.5396, "step": 7 }, { "epoch": 0.0024282895735316438, "grad_norm": 2.5329768657684326, "learning_rate": 8e-05, "loss": 3.1611, "step": 8 }, { "epoch": 0.002731825770223099, "grad_norm": 2.09954571723938, "learning_rate": 9e-05, "loss": 2.8787, "step": 9 }, { "epoch": 0.0030353619669145544, "grad_norm": 2.0083999633789062, "learning_rate": 0.0001, "loss": 2.6942, "step": 10 }, { "epoch": 0.00333889816360601, "grad_norm": 1.419735074043274, "learning_rate": 9.999392466585664e-05, "loss": 2.5674, "step": 11 }, { "epoch": 0.0036424343602974654, "grad_norm": 3.0809664726257324, "learning_rate": 9.998784933171324e-05, "loss": 2.2474, "step": 12 }, { "epoch": 0.003945970556988921, "grad_norm": 1.4494595527648926, "learning_rate": 9.998177399756987e-05, "loss": 2.1931, "step": 13 }, { "epoch": 0.004249506753680376, "grad_norm": 1.4052276611328125, "learning_rate": 9.99756986634265e-05, "loss": 2.2179, "step": 14 }, { "epoch": 0.004553042950371832, "grad_norm": 1.0900732278823853, "learning_rate": 9.996962332928312e-05, "loss": 2.3455, "step": 15 }, { "epoch": 0.0048565791470632875, "grad_norm": 1.078604817390442, "learning_rate": 9.996354799513974e-05, "loss": 2.1906, "step": 16 }, { "epoch": 0.005160115343754742, "grad_norm": 1.0777554512023926, "learning_rate": 9.995747266099635e-05, "loss": 2.4069, "step": 17 }, { "epoch": 0.005463651540446198, "grad_norm": 1.2703579664230347, "learning_rate": 9.995139732685298e-05, "loss": 2.1901, "step": 18 }, { "epoch": 0.005767187737137654, "grad_norm": 2.06676983833313, "learning_rate": 9.99453219927096e-05, "loss": 2.4616, "step": 19 }, { "epoch": 0.006070723933829109, "grad_norm": 1.0544441938400269, "learning_rate": 9.993924665856622e-05, "loss": 1.9529, "step": 20 }, { "epoch": 0.0063742601305205645, "grad_norm": 1.1237947940826416, "learning_rate": 9.993317132442285e-05, "loss": 2.8619, "step": 21 }, { "epoch": 0.00667779632721202, "grad_norm": 0.8750623464584351, "learning_rate": 9.992709599027947e-05, "loss": 2.0467, "step": 22 }, { "epoch": 0.006981332523903475, "grad_norm": 0.8135535717010498, "learning_rate": 9.992102065613608e-05, "loss": 2.3566, "step": 23 }, { "epoch": 0.007284868720594931, "grad_norm": 0.8838183879852295, "learning_rate": 9.991494532199271e-05, "loss": 2.2145, "step": 24 }, { "epoch": 0.007588404917286387, "grad_norm": 0.7460266351699829, "learning_rate": 9.990886998784935e-05, "loss": 1.7996, "step": 25 }, { "epoch": 0.007891941113977842, "grad_norm": 0.7469210028648376, "learning_rate": 9.990279465370595e-05, "loss": 1.9147, "step": 26 }, { "epoch": 0.008195477310669297, "grad_norm": 0.796752393245697, "learning_rate": 9.989671931956258e-05, "loss": 1.6982, "step": 27 }, { "epoch": 0.008499013507360752, "grad_norm": 0.9568108916282654, "learning_rate": 9.989064398541921e-05, "loss": 2.294, "step": 28 }, { "epoch": 0.008802549704052209, "grad_norm": 0.7790305018424988, "learning_rate": 9.988456865127583e-05, "loss": 1.7283, "step": 29 }, { "epoch": 0.009106085900743664, "grad_norm": 0.5705334544181824, "learning_rate": 9.987849331713245e-05, "loss": 2.0917, "step": 30 }, { "epoch": 0.009409622097435118, "grad_norm": 0.8099403381347656, "learning_rate": 9.987241798298906e-05, "loss": 2.1187, "step": 31 }, { "epoch": 0.009713158294126575, "grad_norm": 0.728687584400177, "learning_rate": 9.98663426488457e-05, "loss": 2.0019, "step": 32 }, { "epoch": 0.01001669449081803, "grad_norm": 0.7341739535331726, "learning_rate": 9.986026731470231e-05, "loss": 2.2379, "step": 33 }, { "epoch": 0.010320230687509485, "grad_norm": 0.6100563406944275, "learning_rate": 9.985419198055893e-05, "loss": 2.2385, "step": 34 }, { "epoch": 0.010623766884200941, "grad_norm": 0.57859206199646, "learning_rate": 9.984811664641556e-05, "loss": 1.9331, "step": 35 }, { "epoch": 0.010927303080892396, "grad_norm": 0.5878285765647888, "learning_rate": 9.984204131227218e-05, "loss": 1.8691, "step": 36 }, { "epoch": 0.011230839277583851, "grad_norm": 0.5095940232276917, "learning_rate": 9.98359659781288e-05, "loss": 2.1699, "step": 37 }, { "epoch": 0.011534375474275308, "grad_norm": 0.5028595924377441, "learning_rate": 9.982989064398542e-05, "loss": 1.6226, "step": 38 }, { "epoch": 0.011837911670966763, "grad_norm": 0.6969617009162903, "learning_rate": 9.982381530984206e-05, "loss": 1.7049, "step": 39 }, { "epoch": 0.012141447867658217, "grad_norm": 0.6432283520698547, "learning_rate": 9.981773997569866e-05, "loss": 1.645, "step": 40 }, { "epoch": 0.012444984064349674, "grad_norm": 0.5575637221336365, "learning_rate": 9.981166464155529e-05, "loss": 2.122, "step": 41 }, { "epoch": 0.012748520261041129, "grad_norm": 0.8630117177963257, "learning_rate": 9.980558930741192e-05, "loss": 2.4391, "step": 42 }, { "epoch": 0.013052056457732584, "grad_norm": 0.7215672135353088, "learning_rate": 9.979951397326854e-05, "loss": 1.6903, "step": 43 }, { "epoch": 0.01335559265442404, "grad_norm": 0.6649103164672852, "learning_rate": 9.979343863912516e-05, "loss": 2.0192, "step": 44 }, { "epoch": 0.013659128851115495, "grad_norm": 0.7561375498771667, "learning_rate": 9.978736330498177e-05, "loss": 2.1745, "step": 45 }, { "epoch": 0.01396266504780695, "grad_norm": 1.5740697383880615, "learning_rate": 9.97812879708384e-05, "loss": 2.4883, "step": 46 }, { "epoch": 0.014266201244498407, "grad_norm": 0.49843546748161316, "learning_rate": 9.977521263669502e-05, "loss": 2.3359, "step": 47 }, { "epoch": 0.014569737441189862, "grad_norm": 0.6524083018302917, "learning_rate": 9.976913730255164e-05, "loss": 2.0283, "step": 48 }, { "epoch": 0.014873273637881317, "grad_norm": 0.5995165705680847, "learning_rate": 9.976306196840827e-05, "loss": 2.2854, "step": 49 }, { "epoch": 0.015176809834572773, "grad_norm": 0.533091127872467, "learning_rate": 9.975698663426489e-05, "loss": 1.7316, "step": 50 }, { "epoch": 0.015480346031264228, "grad_norm": 0.4611203372478485, "learning_rate": 9.97509113001215e-05, "loss": 1.9873, "step": 51 }, { "epoch": 0.015783882227955685, "grad_norm": 0.5517066121101379, "learning_rate": 9.974483596597813e-05, "loss": 2.3221, "step": 52 }, { "epoch": 0.01608741842464714, "grad_norm": 1.1481316089630127, "learning_rate": 9.973876063183477e-05, "loss": 2.1987, "step": 53 }, { "epoch": 0.016390954621338594, "grad_norm": 0.5169709324836731, "learning_rate": 9.973268529769137e-05, "loss": 2.0714, "step": 54 }, { "epoch": 0.01669449081803005, "grad_norm": 0.5325965881347656, "learning_rate": 9.9726609963548e-05, "loss": 2.0495, "step": 55 }, { "epoch": 0.016998027014721504, "grad_norm": 0.5272805690765381, "learning_rate": 9.972053462940463e-05, "loss": 1.6467, "step": 56 }, { "epoch": 0.017301563211412962, "grad_norm": 0.5756974816322327, "learning_rate": 9.971445929526125e-05, "loss": 1.8153, "step": 57 }, { "epoch": 0.017605099408104417, "grad_norm": 0.49965259432792664, "learning_rate": 9.970838396111787e-05, "loss": 1.7549, "step": 58 }, { "epoch": 0.017908635604795872, "grad_norm": 0.4551718235015869, "learning_rate": 9.970230862697448e-05, "loss": 2.0268, "step": 59 }, { "epoch": 0.018212171801487327, "grad_norm": 0.4995061159133911, "learning_rate": 9.969623329283111e-05, "loss": 2.1194, "step": 60 }, { "epoch": 0.018515707998178782, "grad_norm": 0.6005909442901611, "learning_rate": 9.969015795868773e-05, "loss": 2.0995, "step": 61 }, { "epoch": 0.018819244194870237, "grad_norm": 0.5313609838485718, "learning_rate": 9.968408262454435e-05, "loss": 2.2653, "step": 62 }, { "epoch": 0.019122780391561695, "grad_norm": 0.4645906388759613, "learning_rate": 9.967800729040098e-05, "loss": 2.0501, "step": 63 }, { "epoch": 0.01942631658825315, "grad_norm": 0.4981083869934082, "learning_rate": 9.96719319562576e-05, "loss": 1.2802, "step": 64 }, { "epoch": 0.019729852784944605, "grad_norm": 0.7034462094306946, "learning_rate": 9.966585662211421e-05, "loss": 1.8468, "step": 65 }, { "epoch": 0.02003338898163606, "grad_norm": 0.5249907374382019, "learning_rate": 9.965978128797084e-05, "loss": 1.7569, "step": 66 }, { "epoch": 0.020336925178327515, "grad_norm": 0.7569686770439148, "learning_rate": 9.965370595382748e-05, "loss": 2.2157, "step": 67 }, { "epoch": 0.02064046137501897, "grad_norm": 0.7423145174980164, "learning_rate": 9.964763061968408e-05, "loss": 1.961, "step": 68 }, { "epoch": 0.020943997571710428, "grad_norm": 0.6891425251960754, "learning_rate": 9.964155528554071e-05, "loss": 2.0419, "step": 69 }, { "epoch": 0.021247533768401883, "grad_norm": 0.5633382201194763, "learning_rate": 9.963547995139734e-05, "loss": 1.7981, "step": 70 }, { "epoch": 0.021551069965093338, "grad_norm": 0.4792400598526001, "learning_rate": 9.962940461725396e-05, "loss": 1.7946, "step": 71 }, { "epoch": 0.021854606161784793, "grad_norm": 0.43436333537101746, "learning_rate": 9.962332928311058e-05, "loss": 1.9947, "step": 72 }, { "epoch": 0.022158142358476247, "grad_norm": 0.511132001876831, "learning_rate": 9.96172539489672e-05, "loss": 1.8512, "step": 73 }, { "epoch": 0.022461678555167702, "grad_norm": 0.5628978610038757, "learning_rate": 9.961117861482382e-05, "loss": 2.3258, "step": 74 }, { "epoch": 0.02276521475185916, "grad_norm": 0.5179631114006042, "learning_rate": 9.960510328068044e-05, "loss": 2.1093, "step": 75 }, { "epoch": 0.023068750948550616, "grad_norm": 0.45745086669921875, "learning_rate": 9.959902794653706e-05, "loss": 2.0091, "step": 76 }, { "epoch": 0.02337228714524207, "grad_norm": 0.49223676323890686, "learning_rate": 9.959295261239369e-05, "loss": 1.8603, "step": 77 }, { "epoch": 0.023675823341933525, "grad_norm": 0.44269105792045593, "learning_rate": 9.958687727825031e-05, "loss": 2.0431, "step": 78 }, { "epoch": 0.02397935953862498, "grad_norm": 0.45361781120300293, "learning_rate": 9.958080194410692e-05, "loss": 2.1202, "step": 79 }, { "epoch": 0.024282895735316435, "grad_norm": 0.4460793137550354, "learning_rate": 9.957472660996356e-05, "loss": 2.0395, "step": 80 }, { "epoch": 0.024586431932007893, "grad_norm": 0.3576311767101288, "learning_rate": 9.956865127582019e-05, "loss": 1.6929, "step": 81 }, { "epoch": 0.024889968128699348, "grad_norm": 0.47067755460739136, "learning_rate": 9.956257594167679e-05, "loss": 1.7033, "step": 82 }, { "epoch": 0.025193504325390803, "grad_norm": 0.40777909755706787, "learning_rate": 9.955650060753342e-05, "loss": 1.8594, "step": 83 }, { "epoch": 0.025497040522082258, "grad_norm": 0.4231606721878052, "learning_rate": 9.955042527339005e-05, "loss": 1.9413, "step": 84 }, { "epoch": 0.025800576718773713, "grad_norm": 0.4901526868343353, "learning_rate": 9.954434993924666e-05, "loss": 1.5754, "step": 85 }, { "epoch": 0.026104112915465168, "grad_norm": 0.4473549723625183, "learning_rate": 9.953827460510329e-05, "loss": 2.0099, "step": 86 }, { "epoch": 0.026407649112156626, "grad_norm": 0.4234200119972229, "learning_rate": 9.95321992709599e-05, "loss": 1.9134, "step": 87 }, { "epoch": 0.02671118530884808, "grad_norm": 0.497842937707901, "learning_rate": 9.952612393681653e-05, "loss": 1.5014, "step": 88 }, { "epoch": 0.027014721505539536, "grad_norm": 0.4480627775192261, "learning_rate": 9.952004860267315e-05, "loss": 1.8608, "step": 89 }, { "epoch": 0.02731825770223099, "grad_norm": 0.4578416049480438, "learning_rate": 9.951397326852977e-05, "loss": 1.9455, "step": 90 }, { "epoch": 0.027621793898922446, "grad_norm": 0.4651184380054474, "learning_rate": 9.95078979343864e-05, "loss": 1.7394, "step": 91 }, { "epoch": 0.0279253300956139, "grad_norm": 0.48281168937683105, "learning_rate": 9.950182260024302e-05, "loss": 2.219, "step": 92 }, { "epoch": 0.02822886629230536, "grad_norm": 0.3925339877605438, "learning_rate": 9.949574726609963e-05, "loss": 1.9998, "step": 93 }, { "epoch": 0.028532402488996814, "grad_norm": 0.5093829035758972, "learning_rate": 9.948967193195627e-05, "loss": 1.5958, "step": 94 }, { "epoch": 0.02883593868568827, "grad_norm": 0.4480256736278534, "learning_rate": 9.94835965978129e-05, "loss": 1.7606, "step": 95 }, { "epoch": 0.029139474882379723, "grad_norm": 0.41442152857780457, "learning_rate": 9.94775212636695e-05, "loss": 1.7481, "step": 96 }, { "epoch": 0.029443011079071178, "grad_norm": 0.373604953289032, "learning_rate": 9.947144592952613e-05, "loss": 1.91, "step": 97 }, { "epoch": 0.029746547275762633, "grad_norm": 0.4274522662162781, "learning_rate": 9.946537059538275e-05, "loss": 1.9125, "step": 98 }, { "epoch": 0.03005008347245409, "grad_norm": 0.47791674733161926, "learning_rate": 9.945929526123937e-05, "loss": 2.0647, "step": 99 }, { "epoch": 0.030353619669145546, "grad_norm": 0.456820547580719, "learning_rate": 9.9453219927096e-05, "loss": 2.1057, "step": 100 }, { "epoch": 0.030657155865837, "grad_norm": 0.41789788007736206, "learning_rate": 9.944714459295261e-05, "loss": 2.0159, "step": 101 }, { "epoch": 0.030960692062528456, "grad_norm": 0.4459668695926666, "learning_rate": 9.944106925880924e-05, "loss": 1.927, "step": 102 }, { "epoch": 0.031264228259219914, "grad_norm": 0.372925341129303, "learning_rate": 9.943499392466586e-05, "loss": 1.6992, "step": 103 }, { "epoch": 0.03156776445591137, "grad_norm": 0.4778668284416199, "learning_rate": 9.942891859052248e-05, "loss": 2.1148, "step": 104 }, { "epoch": 0.031871300652602824, "grad_norm": 0.4480198323726654, "learning_rate": 9.942284325637911e-05, "loss": 1.9734, "step": 105 }, { "epoch": 0.03217483684929428, "grad_norm": 0.40198591351509094, "learning_rate": 9.941676792223573e-05, "loss": 1.5448, "step": 106 }, { "epoch": 0.032478373045985734, "grad_norm": 0.40328651666641235, "learning_rate": 9.941069258809234e-05, "loss": 2.1084, "step": 107 }, { "epoch": 0.03278190924267719, "grad_norm": 0.43856972455978394, "learning_rate": 9.940461725394898e-05, "loss": 2.1748, "step": 108 }, { "epoch": 0.033085445439368644, "grad_norm": 0.46910691261291504, "learning_rate": 9.939854191980559e-05, "loss": 1.9526, "step": 109 }, { "epoch": 0.0333889816360601, "grad_norm": 0.4143713116645813, "learning_rate": 9.939246658566221e-05, "loss": 1.9133, "step": 110 }, { "epoch": 0.03369251783275155, "grad_norm": 0.45832857489585876, "learning_rate": 9.938639125151884e-05, "loss": 1.7964, "step": 111 }, { "epoch": 0.03399605402944301, "grad_norm": 0.4263196587562561, "learning_rate": 9.938031591737546e-05, "loss": 1.9231, "step": 112 }, { "epoch": 0.03429959022613446, "grad_norm": 0.38841062784194946, "learning_rate": 9.937424058323208e-05, "loss": 1.9941, "step": 113 }, { "epoch": 0.034603126422825925, "grad_norm": 0.39627939462661743, "learning_rate": 9.93681652490887e-05, "loss": 1.591, "step": 114 }, { "epoch": 0.03490666261951738, "grad_norm": 0.4354992210865021, "learning_rate": 9.936208991494532e-05, "loss": 1.8843, "step": 115 }, { "epoch": 0.035210198816208835, "grad_norm": 0.4674322009086609, "learning_rate": 9.935601458080195e-05, "loss": 1.9687, "step": 116 }, { "epoch": 0.03551373501290029, "grad_norm": 0.4263432025909424, "learning_rate": 9.934993924665857e-05, "loss": 2.0061, "step": 117 }, { "epoch": 0.035817271209591744, "grad_norm": 0.4172697067260742, "learning_rate": 9.934386391251519e-05, "loss": 2.0079, "step": 118 }, { "epoch": 0.0361208074062832, "grad_norm": 0.35841792821884155, "learning_rate": 9.933778857837182e-05, "loss": 2.0048, "step": 119 }, { "epoch": 0.036424343602974654, "grad_norm": 0.4118800759315491, "learning_rate": 9.933171324422844e-05, "loss": 1.8136, "step": 120 }, { "epoch": 0.03672787979966611, "grad_norm": 0.4894438087940216, "learning_rate": 9.932563791008505e-05, "loss": 1.9214, "step": 121 }, { "epoch": 0.037031415996357564, "grad_norm": 0.4079352915287018, "learning_rate": 9.931956257594169e-05, "loss": 2.2445, "step": 122 }, { "epoch": 0.03733495219304902, "grad_norm": 0.41293051838874817, "learning_rate": 9.93134872417983e-05, "loss": 2.0845, "step": 123 }, { "epoch": 0.037638488389740474, "grad_norm": 0.4413944482803345, "learning_rate": 9.930741190765492e-05, "loss": 1.8782, "step": 124 }, { "epoch": 0.03794202458643193, "grad_norm": 0.4036192297935486, "learning_rate": 9.930133657351155e-05, "loss": 1.9393, "step": 125 }, { "epoch": 0.03824556078312339, "grad_norm": 0.7759333848953247, "learning_rate": 9.929526123936817e-05, "loss": 1.9035, "step": 126 }, { "epoch": 0.038549096979814845, "grad_norm": 0.4737033247947693, "learning_rate": 9.928918590522479e-05, "loss": 2.0535, "step": 127 }, { "epoch": 0.0388526331765063, "grad_norm": 0.5254648923873901, "learning_rate": 9.928311057108142e-05, "loss": 1.9854, "step": 128 }, { "epoch": 0.039156169373197755, "grad_norm": 0.46957090497016907, "learning_rate": 9.927703523693803e-05, "loss": 2.1865, "step": 129 }, { "epoch": 0.03945970556988921, "grad_norm": 0.4427931010723114, "learning_rate": 9.927095990279466e-05, "loss": 1.9696, "step": 130 }, { "epoch": 0.039763241766580665, "grad_norm": 0.42948615550994873, "learning_rate": 9.926488456865128e-05, "loss": 1.5367, "step": 131 }, { "epoch": 0.04006677796327212, "grad_norm": 0.3952697515487671, "learning_rate": 9.92588092345079e-05, "loss": 1.9648, "step": 132 }, { "epoch": 0.040370314159963575, "grad_norm": 0.41384372115135193, "learning_rate": 9.925273390036453e-05, "loss": 1.9115, "step": 133 }, { "epoch": 0.04067385035665503, "grad_norm": 0.44592148065567017, "learning_rate": 9.924665856622115e-05, "loss": 2.2336, "step": 134 }, { "epoch": 0.040977386553346484, "grad_norm": 0.43720191717147827, "learning_rate": 9.924058323207776e-05, "loss": 2.1014, "step": 135 }, { "epoch": 0.04128092275003794, "grad_norm": 0.6224471926689148, "learning_rate": 9.92345078979344e-05, "loss": 1.9093, "step": 136 }, { "epoch": 0.041584458946729394, "grad_norm": 0.40913721919059753, "learning_rate": 9.922843256379101e-05, "loss": 2.036, "step": 137 }, { "epoch": 0.041887995143420856, "grad_norm": 0.5675486922264099, "learning_rate": 9.922235722964763e-05, "loss": 1.7925, "step": 138 }, { "epoch": 0.04219153134011231, "grad_norm": 0.4174894690513611, "learning_rate": 9.921628189550426e-05, "loss": 1.742, "step": 139 }, { "epoch": 0.042495067536803766, "grad_norm": 0.5149232745170593, "learning_rate": 9.921020656136088e-05, "loss": 2.0117, "step": 140 }, { "epoch": 0.04279860373349522, "grad_norm": 0.4599703252315521, "learning_rate": 9.92041312272175e-05, "loss": 1.9401, "step": 141 }, { "epoch": 0.043102139930186675, "grad_norm": 0.39801791310310364, "learning_rate": 9.919805589307413e-05, "loss": 1.74, "step": 142 }, { "epoch": 0.04340567612687813, "grad_norm": 0.4469515085220337, "learning_rate": 9.919198055893074e-05, "loss": 1.9919, "step": 143 }, { "epoch": 0.043709212323569585, "grad_norm": 0.4179072678089142, "learning_rate": 9.918590522478737e-05, "loss": 1.9618, "step": 144 }, { "epoch": 0.04401274852026104, "grad_norm": 0.3512915372848511, "learning_rate": 9.917982989064399e-05, "loss": 2.0603, "step": 145 }, { "epoch": 0.044316284716952495, "grad_norm": 0.6461288928985596, "learning_rate": 9.917375455650061e-05, "loss": 1.9461, "step": 146 }, { "epoch": 0.04461982091364395, "grad_norm": 0.4113643169403076, "learning_rate": 9.916767922235724e-05, "loss": 1.5332, "step": 147 }, { "epoch": 0.044923357110335405, "grad_norm": 0.5560798645019531, "learning_rate": 9.916160388821386e-05, "loss": 1.678, "step": 148 }, { "epoch": 0.045226893307026866, "grad_norm": 0.5448784828186035, "learning_rate": 9.915552855407047e-05, "loss": 1.8036, "step": 149 }, { "epoch": 0.04553042950371832, "grad_norm": 0.4570043087005615, "learning_rate": 9.91494532199271e-05, "loss": 2.0126, "step": 150 }, { "epoch": 0.045833965700409776, "grad_norm": 0.4167179465293884, "learning_rate": 9.914337788578372e-05, "loss": 1.7567, "step": 151 }, { "epoch": 0.04613750189710123, "grad_norm": 1.3264193534851074, "learning_rate": 9.913730255164034e-05, "loss": 2.012, "step": 152 }, { "epoch": 0.046441038093792686, "grad_norm": 0.45362886786460876, "learning_rate": 9.913122721749697e-05, "loss": 1.8789, "step": 153 }, { "epoch": 0.04674457429048414, "grad_norm": 2.0713539123535156, "learning_rate": 9.912515188335359e-05, "loss": 2.0798, "step": 154 }, { "epoch": 0.047048110487175596, "grad_norm": 1.430906891822815, "learning_rate": 9.91190765492102e-05, "loss": 1.9401, "step": 155 }, { "epoch": 0.04735164668386705, "grad_norm": 0.846182107925415, "learning_rate": 9.911300121506684e-05, "loss": 1.9073, "step": 156 }, { "epoch": 0.047655182880558505, "grad_norm": 0.5027226805686951, "learning_rate": 9.910692588092345e-05, "loss": 2.1521, "step": 157 }, { "epoch": 0.04795871907724996, "grad_norm": 0.32647275924682617, "learning_rate": 9.910085054678007e-05, "loss": 1.705, "step": 158 }, { "epoch": 0.048262255273941415, "grad_norm": 0.4337715804576874, "learning_rate": 9.90947752126367e-05, "loss": 1.9844, "step": 159 }, { "epoch": 0.04856579147063287, "grad_norm": 0.4408979117870331, "learning_rate": 9.908869987849332e-05, "loss": 1.81, "step": 160 }, { "epoch": 0.04886932766732433, "grad_norm": 3.5793535709381104, "learning_rate": 9.908262454434995e-05, "loss": 1.8569, "step": 161 }, { "epoch": 0.04917286386401579, "grad_norm": 0.47893545031547546, "learning_rate": 9.907654921020657e-05, "loss": 1.9397, "step": 162 }, { "epoch": 0.04947640006070724, "grad_norm": 0.36375802755355835, "learning_rate": 9.907047387606318e-05, "loss": 1.8064, "step": 163 }, { "epoch": 0.049779936257398696, "grad_norm": 0.3935683071613312, "learning_rate": 9.906439854191982e-05, "loss": 2.0968, "step": 164 }, { "epoch": 0.05008347245409015, "grad_norm": 2.1048178672790527, "learning_rate": 9.905832320777643e-05, "loss": 1.8488, "step": 165 }, { "epoch": 0.050387008650781606, "grad_norm": 0.35579410195350647, "learning_rate": 9.905224787363305e-05, "loss": 1.837, "step": 166 }, { "epoch": 0.05069054484747306, "grad_norm": 0.37291133403778076, "learning_rate": 9.904617253948968e-05, "loss": 1.5921, "step": 167 }, { "epoch": 0.050994081044164516, "grad_norm": 0.37633177638053894, "learning_rate": 9.90400972053463e-05, "loss": 1.9613, "step": 168 }, { "epoch": 0.05129761724085597, "grad_norm": 0.49120867252349854, "learning_rate": 9.903402187120292e-05, "loss": 2.1737, "step": 169 }, { "epoch": 0.051601153437547426, "grad_norm": 0.41102972626686096, "learning_rate": 9.902794653705955e-05, "loss": 1.8833, "step": 170 }, { "epoch": 0.05190468963423888, "grad_norm": 0.3835681676864624, "learning_rate": 9.902187120291616e-05, "loss": 2.0283, "step": 171 }, { "epoch": 0.052208225830930335, "grad_norm": 0.4194372296333313, "learning_rate": 9.901579586877278e-05, "loss": 1.6146, "step": 172 }, { "epoch": 0.0525117620276218, "grad_norm": 0.411516934633255, "learning_rate": 9.900972053462941e-05, "loss": 2.0012, "step": 173 }, { "epoch": 0.05281529822431325, "grad_norm": 0.4565434455871582, "learning_rate": 9.900364520048603e-05, "loss": 2.2415, "step": 174 }, { "epoch": 0.05311883442100471, "grad_norm": 0.4352016746997833, "learning_rate": 9.899756986634266e-05, "loss": 1.6505, "step": 175 }, { "epoch": 0.05342237061769616, "grad_norm": 0.5442507266998291, "learning_rate": 9.899149453219928e-05, "loss": 1.9692, "step": 176 }, { "epoch": 0.05372590681438762, "grad_norm": 0.39451470971107483, "learning_rate": 9.89854191980559e-05, "loss": 1.9682, "step": 177 }, { "epoch": 0.05402944301107907, "grad_norm": 0.34474217891693115, "learning_rate": 9.897934386391253e-05, "loss": 1.6806, "step": 178 }, { "epoch": 0.054332979207770526, "grad_norm": 0.45165541768074036, "learning_rate": 9.897326852976914e-05, "loss": 1.9329, "step": 179 }, { "epoch": 0.05463651540446198, "grad_norm": 0.5402116775512695, "learning_rate": 9.896719319562576e-05, "loss": 1.6939, "step": 180 }, { "epoch": 0.054940051601153436, "grad_norm": 0.40272051095962524, "learning_rate": 9.896111786148239e-05, "loss": 1.985, "step": 181 }, { "epoch": 0.05524358779784489, "grad_norm": 0.3833436667919159, "learning_rate": 9.895504252733901e-05, "loss": 1.7804, "step": 182 }, { "epoch": 0.055547123994536346, "grad_norm": 0.34711307287216187, "learning_rate": 9.894896719319563e-05, "loss": 1.865, "step": 183 }, { "epoch": 0.0558506601912278, "grad_norm": 0.43081262707710266, "learning_rate": 9.894289185905226e-05, "loss": 1.8066, "step": 184 }, { "epoch": 0.05615419638791926, "grad_norm": 0.38740819692611694, "learning_rate": 9.893681652490887e-05, "loss": 1.738, "step": 185 }, { "epoch": 0.05645773258461072, "grad_norm": 0.46878042817115784, "learning_rate": 9.893074119076549e-05, "loss": 1.4563, "step": 186 }, { "epoch": 0.05676126878130217, "grad_norm": 0.4415140151977539, "learning_rate": 9.892466585662212e-05, "loss": 1.565, "step": 187 }, { "epoch": 0.05706480497799363, "grad_norm": 0.43196091055870056, "learning_rate": 9.891859052247874e-05, "loss": 2.1562, "step": 188 }, { "epoch": 0.05736834117468508, "grad_norm": 0.4677179455757141, "learning_rate": 9.891251518833537e-05, "loss": 2.0413, "step": 189 }, { "epoch": 0.05767187737137654, "grad_norm": 0.4087100327014923, "learning_rate": 9.890643985419199e-05, "loss": 1.5434, "step": 190 }, { "epoch": 0.05797541356806799, "grad_norm": 0.37906375527381897, "learning_rate": 9.89003645200486e-05, "loss": 1.5561, "step": 191 }, { "epoch": 0.05827894976475945, "grad_norm": 0.5014649033546448, "learning_rate": 9.889428918590524e-05, "loss": 1.6243, "step": 192 }, { "epoch": 0.0585824859614509, "grad_norm": 0.6972336769104004, "learning_rate": 9.888821385176185e-05, "loss": 2.1616, "step": 193 }, { "epoch": 0.058886022158142357, "grad_norm": 0.46012699604034424, "learning_rate": 9.888213851761847e-05, "loss": 2.1195, "step": 194 }, { "epoch": 0.05918955835483381, "grad_norm": 0.36921924352645874, "learning_rate": 9.88760631834751e-05, "loss": 2.1071, "step": 195 }, { "epoch": 0.059493094551525266, "grad_norm": 0.36246025562286377, "learning_rate": 9.886998784933172e-05, "loss": 1.9948, "step": 196 }, { "epoch": 0.05979663074821673, "grad_norm": 0.3935892581939697, "learning_rate": 9.886391251518834e-05, "loss": 2.3065, "step": 197 }, { "epoch": 0.06010016694490818, "grad_norm": 0.36333411931991577, "learning_rate": 9.885783718104497e-05, "loss": 1.8746, "step": 198 }, { "epoch": 0.06040370314159964, "grad_norm": 0.4027535617351532, "learning_rate": 9.885176184690158e-05, "loss": 2.156, "step": 199 }, { "epoch": 0.06070723933829109, "grad_norm": 0.42472004890441895, "learning_rate": 9.88456865127582e-05, "loss": 1.8647, "step": 200 }, { "epoch": 0.06101077553498255, "grad_norm": 0.38055720925331116, "learning_rate": 9.883961117861483e-05, "loss": 1.9185, "step": 201 }, { "epoch": 0.061314311731674, "grad_norm": 0.3831098973751068, "learning_rate": 9.883353584447145e-05, "loss": 2.2488, "step": 202 }, { "epoch": 0.06161784792836546, "grad_norm": 0.35769203305244446, "learning_rate": 9.882746051032808e-05, "loss": 1.9281, "step": 203 }, { "epoch": 0.06192138412505691, "grad_norm": 0.3576291799545288, "learning_rate": 9.88213851761847e-05, "loss": 1.7082, "step": 204 }, { "epoch": 0.06222492032174837, "grad_norm": 0.3641425669193268, "learning_rate": 9.881530984204131e-05, "loss": 1.9374, "step": 205 }, { "epoch": 0.06252845651843983, "grad_norm": 0.4281562268733978, "learning_rate": 9.880923450789795e-05, "loss": 2.0797, "step": 206 }, { "epoch": 0.06283199271513128, "grad_norm": 0.39947500824928284, "learning_rate": 9.880315917375455e-05, "loss": 2.1399, "step": 207 }, { "epoch": 0.06313552891182274, "grad_norm": 0.4200506806373596, "learning_rate": 9.879708383961118e-05, "loss": 2.0443, "step": 208 }, { "epoch": 0.06343906510851419, "grad_norm": 0.35776716470718384, "learning_rate": 9.879100850546781e-05, "loss": 1.9637, "step": 209 }, { "epoch": 0.06374260130520565, "grad_norm": 0.3676275610923767, "learning_rate": 9.878493317132443e-05, "loss": 2.2018, "step": 210 }, { "epoch": 0.0640461375018971, "grad_norm": 0.48199740052223206, "learning_rate": 9.877885783718105e-05, "loss": 1.8948, "step": 211 }, { "epoch": 0.06434967369858856, "grad_norm": 0.40157443284988403, "learning_rate": 9.877278250303768e-05, "loss": 1.9011, "step": 212 }, { "epoch": 0.06465320989528, "grad_norm": 0.3959876596927643, "learning_rate": 9.87667071688943e-05, "loss": 1.8019, "step": 213 }, { "epoch": 0.06495674609197147, "grad_norm": 0.4266337752342224, "learning_rate": 9.876063183475091e-05, "loss": 1.6282, "step": 214 }, { "epoch": 0.06526028228866293, "grad_norm": 0.5142415165901184, "learning_rate": 9.875455650060754e-05, "loss": 2.014, "step": 215 }, { "epoch": 0.06556381848535438, "grad_norm": 0.3834533095359802, "learning_rate": 9.874848116646416e-05, "loss": 2.2733, "step": 216 }, { "epoch": 0.06586735468204584, "grad_norm": 0.4485650062561035, "learning_rate": 9.874240583232079e-05, "loss": 2.0707, "step": 217 }, { "epoch": 0.06617089087873729, "grad_norm": 0.37866663932800293, "learning_rate": 9.873633049817741e-05, "loss": 2.2419, "step": 218 }, { "epoch": 0.06647442707542875, "grad_norm": 0.5389169454574585, "learning_rate": 9.873025516403402e-05, "loss": 1.9736, "step": 219 }, { "epoch": 0.0667779632721202, "grad_norm": 0.3923830986022949, "learning_rate": 9.872417982989066e-05, "loss": 2.0164, "step": 220 }, { "epoch": 0.06708149946881166, "grad_norm": 0.431417316198349, "learning_rate": 9.871810449574726e-05, "loss": 1.7535, "step": 221 }, { "epoch": 0.0673850356655031, "grad_norm": 0.4980961084365845, "learning_rate": 9.871202916160389e-05, "loss": 2.0751, "step": 222 }, { "epoch": 0.06768857186219457, "grad_norm": 0.38455912470817566, "learning_rate": 9.870595382746052e-05, "loss": 1.9394, "step": 223 }, { "epoch": 0.06799210805888602, "grad_norm": 0.3911600708961487, "learning_rate": 9.869987849331714e-05, "loss": 1.6384, "step": 224 }, { "epoch": 0.06829564425557748, "grad_norm": 0.39567652344703674, "learning_rate": 9.869380315917376e-05, "loss": 2.0082, "step": 225 }, { "epoch": 0.06859918045226893, "grad_norm": 0.3773573935031891, "learning_rate": 9.868772782503039e-05, "loss": 2.0852, "step": 226 }, { "epoch": 0.06890271664896039, "grad_norm": 0.4387274384498596, "learning_rate": 9.8681652490887e-05, "loss": 2.2043, "step": 227 }, { "epoch": 0.06920625284565185, "grad_norm": 0.4070594012737274, "learning_rate": 9.867557715674362e-05, "loss": 1.8638, "step": 228 }, { "epoch": 0.0695097890423433, "grad_norm": 0.5250163674354553, "learning_rate": 9.866950182260025e-05, "loss": 2.0826, "step": 229 }, { "epoch": 0.06981332523903476, "grad_norm": 0.47242820262908936, "learning_rate": 9.866342648845687e-05, "loss": 1.5517, "step": 230 }, { "epoch": 0.07011686143572621, "grad_norm": 0.41242364048957825, "learning_rate": 9.865735115431349e-05, "loss": 1.3791, "step": 231 }, { "epoch": 0.07042039763241767, "grad_norm": 0.4049898087978363, "learning_rate": 9.865127582017012e-05, "loss": 2.0933, "step": 232 }, { "epoch": 0.07072393382910912, "grad_norm": 0.5341041684150696, "learning_rate": 9.864520048602673e-05, "loss": 1.8293, "step": 233 }, { "epoch": 0.07102747002580058, "grad_norm": 0.4930991530418396, "learning_rate": 9.863912515188337e-05, "loss": 2.3447, "step": 234 }, { "epoch": 0.07133100622249203, "grad_norm": 0.752202033996582, "learning_rate": 9.863304981773997e-05, "loss": 1.9733, "step": 235 }, { "epoch": 0.07163454241918349, "grad_norm": 0.3552611768245697, "learning_rate": 9.86269744835966e-05, "loss": 2.0324, "step": 236 }, { "epoch": 0.07193807861587494, "grad_norm": 0.3639819622039795, "learning_rate": 9.862089914945323e-05, "loss": 1.9325, "step": 237 }, { "epoch": 0.0722416148125664, "grad_norm": 0.4028383195400238, "learning_rate": 9.861482381530985e-05, "loss": 1.9652, "step": 238 }, { "epoch": 0.07254515100925786, "grad_norm": 0.3904295563697815, "learning_rate": 9.860874848116647e-05, "loss": 1.8133, "step": 239 }, { "epoch": 0.07284868720594931, "grad_norm": 0.46043211221694946, "learning_rate": 9.86026731470231e-05, "loss": 1.6711, "step": 240 }, { "epoch": 0.07315222340264077, "grad_norm": 0.41137024760246277, "learning_rate": 9.859659781287971e-05, "loss": 2.1129, "step": 241 }, { "epoch": 0.07345575959933222, "grad_norm": 0.3776731491088867, "learning_rate": 9.859052247873633e-05, "loss": 1.5451, "step": 242 }, { "epoch": 0.07375929579602368, "grad_norm": 0.8163847923278809, "learning_rate": 9.858444714459296e-05, "loss": 1.8133, "step": 243 }, { "epoch": 0.07406283199271513, "grad_norm": 0.7984678149223328, "learning_rate": 9.857837181044958e-05, "loss": 1.6879, "step": 244 }, { "epoch": 0.07436636818940659, "grad_norm": 0.3759590983390808, "learning_rate": 9.85722964763062e-05, "loss": 2.0183, "step": 245 }, { "epoch": 0.07466990438609804, "grad_norm": 0.4622940421104431, "learning_rate": 9.856622114216283e-05, "loss": 1.9958, "step": 246 }, { "epoch": 0.0749734405827895, "grad_norm": 0.4710557758808136, "learning_rate": 9.856014580801944e-05, "loss": 1.5483, "step": 247 }, { "epoch": 0.07527697677948095, "grad_norm": 0.3766056001186371, "learning_rate": 9.855407047387608e-05, "loss": 1.8697, "step": 248 }, { "epoch": 0.07558051297617241, "grad_norm": 0.6338986158370972, "learning_rate": 9.854799513973268e-05, "loss": 2.2657, "step": 249 }, { "epoch": 0.07588404917286386, "grad_norm": 0.4152657687664032, "learning_rate": 9.854191980558931e-05, "loss": 1.5967, "step": 250 }, { "epoch": 0.07618758536955532, "grad_norm": 0.37085869908332825, "learning_rate": 9.853584447144594e-05, "loss": 1.915, "step": 251 }, { "epoch": 0.07649112156624678, "grad_norm": 0.40199750661849976, "learning_rate": 9.852976913730256e-05, "loss": 2.0662, "step": 252 }, { "epoch": 0.07679465776293823, "grad_norm": 0.39193621277809143, "learning_rate": 9.852369380315918e-05, "loss": 1.911, "step": 253 }, { "epoch": 0.07709819395962969, "grad_norm": 0.39082249999046326, "learning_rate": 9.85176184690158e-05, "loss": 1.6207, "step": 254 }, { "epoch": 0.07740173015632114, "grad_norm": 0.3943793773651123, "learning_rate": 9.851154313487242e-05, "loss": 2.1254, "step": 255 }, { "epoch": 0.0777052663530126, "grad_norm": 0.34571030735969543, "learning_rate": 9.850546780072904e-05, "loss": 1.9696, "step": 256 }, { "epoch": 0.07800880254970405, "grad_norm": 0.4847205579280853, "learning_rate": 9.849939246658567e-05, "loss": 2.1382, "step": 257 }, { "epoch": 0.07831233874639551, "grad_norm": 0.47491976618766785, "learning_rate": 9.849331713244229e-05, "loss": 2.1109, "step": 258 }, { "epoch": 0.07861587494308696, "grad_norm": 0.3984815776348114, "learning_rate": 9.84872417982989e-05, "loss": 2.0019, "step": 259 }, { "epoch": 0.07891941113977842, "grad_norm": 0.578295886516571, "learning_rate": 9.848116646415554e-05, "loss": 1.6984, "step": 260 }, { "epoch": 0.07922294733646987, "grad_norm": 0.4641231894493103, "learning_rate": 9.847509113001215e-05, "loss": 2.0974, "step": 261 }, { "epoch": 0.07952648353316133, "grad_norm": 0.3704085052013397, "learning_rate": 9.846901579586879e-05, "loss": 1.8907, "step": 262 }, { "epoch": 0.07983001972985279, "grad_norm": 0.40248993039131165, "learning_rate": 9.846294046172539e-05, "loss": 1.9194, "step": 263 }, { "epoch": 0.08013355592654424, "grad_norm": 0.40396660566329956, "learning_rate": 9.845686512758202e-05, "loss": 1.7075, "step": 264 }, { "epoch": 0.0804370921232357, "grad_norm": 0.44500696659088135, "learning_rate": 9.845078979343865e-05, "loss": 1.7463, "step": 265 }, { "epoch": 0.08074062831992715, "grad_norm": 0.3681708574295044, "learning_rate": 9.844471445929527e-05, "loss": 1.7162, "step": 266 }, { "epoch": 0.08104416451661861, "grad_norm": 0.47645455598831177, "learning_rate": 9.843863912515189e-05, "loss": 1.7759, "step": 267 }, { "epoch": 0.08134770071331006, "grad_norm": 0.3663488030433655, "learning_rate": 9.843256379100852e-05, "loss": 1.8687, "step": 268 }, { "epoch": 0.08165123691000152, "grad_norm": 0.33710968494415283, "learning_rate": 9.842648845686513e-05, "loss": 1.9777, "step": 269 }, { "epoch": 0.08195477310669297, "grad_norm": 0.4824034571647644, "learning_rate": 9.842041312272175e-05, "loss": 1.4345, "step": 270 }, { "epoch": 0.08225830930338443, "grad_norm": 0.3703802824020386, "learning_rate": 9.841433778857838e-05, "loss": 1.7835, "step": 271 }, { "epoch": 0.08256184550007588, "grad_norm": 0.4279334545135498, "learning_rate": 9.8408262454435e-05, "loss": 2.1811, "step": 272 }, { "epoch": 0.08286538169676734, "grad_norm": 0.3500446379184723, "learning_rate": 9.840218712029162e-05, "loss": 2.0992, "step": 273 }, { "epoch": 0.08316891789345879, "grad_norm": 0.4278954863548279, "learning_rate": 9.839611178614823e-05, "loss": 1.4691, "step": 274 }, { "epoch": 0.08347245409015025, "grad_norm": 0.6769374012947083, "learning_rate": 9.839003645200486e-05, "loss": 1.8223, "step": 275 }, { "epoch": 0.08377599028684171, "grad_norm": 0.34110525250434875, "learning_rate": 9.83839611178615e-05, "loss": 1.9469, "step": 276 }, { "epoch": 0.08407952648353316, "grad_norm": 0.37355175614356995, "learning_rate": 9.83778857837181e-05, "loss": 1.9281, "step": 277 }, { "epoch": 0.08438306268022462, "grad_norm": 0.3968208134174347, "learning_rate": 9.837181044957473e-05, "loss": 2.0537, "step": 278 }, { "epoch": 0.08468659887691607, "grad_norm": 0.3811870515346527, "learning_rate": 9.836573511543136e-05, "loss": 1.9715, "step": 279 }, { "epoch": 0.08499013507360753, "grad_norm": 0.3258214294910431, "learning_rate": 9.835965978128796e-05, "loss": 1.979, "step": 280 }, { "epoch": 0.08529367127029898, "grad_norm": 0.38593369722366333, "learning_rate": 9.83535844471446e-05, "loss": 2.135, "step": 281 }, { "epoch": 0.08559720746699044, "grad_norm": 0.3811703026294708, "learning_rate": 9.834750911300123e-05, "loss": 2.1786, "step": 282 }, { "epoch": 0.08590074366368189, "grad_norm": 0.3784421980381012, "learning_rate": 9.834143377885784e-05, "loss": 2.0193, "step": 283 }, { "epoch": 0.08620427986037335, "grad_norm": 0.3660358190536499, "learning_rate": 9.833535844471446e-05, "loss": 1.9975, "step": 284 }, { "epoch": 0.0865078160570648, "grad_norm": 0.3747190237045288, "learning_rate": 9.832928311057109e-05, "loss": 1.7897, "step": 285 }, { "epoch": 0.08681135225375626, "grad_norm": 0.37042975425720215, "learning_rate": 9.832320777642771e-05, "loss": 1.9026, "step": 286 }, { "epoch": 0.08711488845044772, "grad_norm": 0.3642013669013977, "learning_rate": 9.831713244228433e-05, "loss": 1.9611, "step": 287 }, { "epoch": 0.08741842464713917, "grad_norm": 0.38183900713920593, "learning_rate": 9.831105710814094e-05, "loss": 1.8648, "step": 288 }, { "epoch": 0.08772196084383063, "grad_norm": 0.4243112802505493, "learning_rate": 9.830498177399757e-05, "loss": 1.734, "step": 289 }, { "epoch": 0.08802549704052208, "grad_norm": 0.3763525187969208, "learning_rate": 9.82989064398542e-05, "loss": 1.8955, "step": 290 }, { "epoch": 0.08832903323721354, "grad_norm": 0.37548086047172546, "learning_rate": 9.829283110571081e-05, "loss": 1.9246, "step": 291 }, { "epoch": 0.08863256943390499, "grad_norm": 0.5070151090621948, "learning_rate": 9.828675577156744e-05, "loss": 1.6474, "step": 292 }, { "epoch": 0.08893610563059645, "grad_norm": 0.3903336822986603, "learning_rate": 9.828068043742407e-05, "loss": 1.5546, "step": 293 }, { "epoch": 0.0892396418272879, "grad_norm": 0.42705482244491577, "learning_rate": 9.827460510328068e-05, "loss": 1.5506, "step": 294 }, { "epoch": 0.08954317802397936, "grad_norm": 0.4342738687992096, "learning_rate": 9.82685297691373e-05, "loss": 1.6173, "step": 295 }, { "epoch": 0.08984671422067081, "grad_norm": 0.3975971043109894, "learning_rate": 9.826245443499394e-05, "loss": 1.9652, "step": 296 }, { "epoch": 0.09015025041736227, "grad_norm": 0.42342832684516907, "learning_rate": 9.825637910085055e-05, "loss": 1.9464, "step": 297 }, { "epoch": 0.09045378661405373, "grad_norm": 0.381565660238266, "learning_rate": 9.825030376670717e-05, "loss": 2.0949, "step": 298 }, { "epoch": 0.09075732281074518, "grad_norm": 0.4632069170475006, "learning_rate": 9.82442284325638e-05, "loss": 1.4451, "step": 299 }, { "epoch": 0.09106085900743664, "grad_norm": 0.36039817333221436, "learning_rate": 9.823815309842042e-05, "loss": 1.2199, "step": 300 }, { "epoch": 0.09136439520412809, "grad_norm": 0.37576648592948914, "learning_rate": 9.823207776427704e-05, "loss": 1.9884, "step": 301 }, { "epoch": 0.09166793140081955, "grad_norm": 0.3673763573169708, "learning_rate": 9.822600243013365e-05, "loss": 1.7103, "step": 302 }, { "epoch": 0.091971467597511, "grad_norm": 0.3729887008666992, "learning_rate": 9.821992709599029e-05, "loss": 1.9215, "step": 303 }, { "epoch": 0.09227500379420246, "grad_norm": 0.3857046365737915, "learning_rate": 9.82138517618469e-05, "loss": 2.1883, "step": 304 }, { "epoch": 0.09257853999089391, "grad_norm": 0.4226963520050049, "learning_rate": 9.820777642770352e-05, "loss": 1.6413, "step": 305 }, { "epoch": 0.09288207618758537, "grad_norm": 0.40093332529067993, "learning_rate": 9.820170109356015e-05, "loss": 1.9897, "step": 306 }, { "epoch": 0.09318561238427682, "grad_norm": 0.4287321865558624, "learning_rate": 9.819562575941678e-05, "loss": 1.8708, "step": 307 }, { "epoch": 0.09348914858096828, "grad_norm": 0.3933330178260803, "learning_rate": 9.818955042527339e-05, "loss": 2.003, "step": 308 }, { "epoch": 0.09379268477765973, "grad_norm": 0.3991425633430481, "learning_rate": 9.818347509113002e-05, "loss": 1.7305, "step": 309 }, { "epoch": 0.09409622097435119, "grad_norm": 0.37534525990486145, "learning_rate": 9.817739975698665e-05, "loss": 1.9767, "step": 310 }, { "epoch": 0.09439975717104265, "grad_norm": 0.4293142557144165, "learning_rate": 9.817132442284326e-05, "loss": 2.1563, "step": 311 }, { "epoch": 0.0947032933677341, "grad_norm": 0.5783388614654541, "learning_rate": 9.816524908869988e-05, "loss": 1.4839, "step": 312 }, { "epoch": 0.09500682956442556, "grad_norm": 0.3414449989795685, "learning_rate": 9.815917375455651e-05, "loss": 1.9974, "step": 313 }, { "epoch": 0.09531036576111701, "grad_norm": 0.3927890956401825, "learning_rate": 9.815309842041313e-05, "loss": 1.9683, "step": 314 }, { "epoch": 0.09561390195780847, "grad_norm": 0.42801201343536377, "learning_rate": 9.814702308626975e-05, "loss": 2.1414, "step": 315 }, { "epoch": 0.09591743815449992, "grad_norm": 0.4715151786804199, "learning_rate": 9.814094775212636e-05, "loss": 2.0055, "step": 316 }, { "epoch": 0.09622097435119138, "grad_norm": 0.42110496759414673, "learning_rate": 9.8134872417983e-05, "loss": 2.0693, "step": 317 }, { "epoch": 0.09652451054788283, "grad_norm": 0.39333397150039673, "learning_rate": 9.812879708383961e-05, "loss": 2.1362, "step": 318 }, { "epoch": 0.09682804674457429, "grad_norm": 0.42686814069747925, "learning_rate": 9.812272174969623e-05, "loss": 1.9485, "step": 319 }, { "epoch": 0.09713158294126574, "grad_norm": 0.38239583373069763, "learning_rate": 9.811664641555286e-05, "loss": 1.3584, "step": 320 }, { "epoch": 0.0974351191379572, "grad_norm": 0.3651975393295288, "learning_rate": 9.811057108140949e-05, "loss": 2.2135, "step": 321 }, { "epoch": 0.09773865533464866, "grad_norm": 0.34531673789024353, "learning_rate": 9.81044957472661e-05, "loss": 1.7106, "step": 322 }, { "epoch": 0.09804219153134011, "grad_norm": 0.38727474212646484, "learning_rate": 9.809842041312273e-05, "loss": 1.8647, "step": 323 }, { "epoch": 0.09834572772803157, "grad_norm": 0.4127596318721771, "learning_rate": 9.809234507897936e-05, "loss": 1.5775, "step": 324 }, { "epoch": 0.09864926392472302, "grad_norm": 0.337333083152771, "learning_rate": 9.808626974483597e-05, "loss": 1.8619, "step": 325 }, { "epoch": 0.09895280012141448, "grad_norm": 0.43906038999557495, "learning_rate": 9.808019441069259e-05, "loss": 2.1753, "step": 326 }, { "epoch": 0.09925633631810593, "grad_norm": 0.4216412901878357, "learning_rate": 9.807411907654922e-05, "loss": 1.8322, "step": 327 }, { "epoch": 0.09955987251479739, "grad_norm": 0.3964472711086273, "learning_rate": 9.806804374240584e-05, "loss": 1.6452, "step": 328 }, { "epoch": 0.09986340871148884, "grad_norm": 0.3590555489063263, "learning_rate": 9.806196840826246e-05, "loss": 1.6471, "step": 329 }, { "epoch": 0.1001669449081803, "grad_norm": 0.3878382742404938, "learning_rate": 9.805589307411907e-05, "loss": 1.7192, "step": 330 }, { "epoch": 0.10047048110487175, "grad_norm": 0.37866318225860596, "learning_rate": 9.80498177399757e-05, "loss": 2.0156, "step": 331 }, { "epoch": 0.10077401730156321, "grad_norm": 0.3977656364440918, "learning_rate": 9.804374240583232e-05, "loss": 1.3686, "step": 332 }, { "epoch": 0.10107755349825466, "grad_norm": 0.39724108576774597, "learning_rate": 9.803766707168894e-05, "loss": 1.6206, "step": 333 }, { "epoch": 0.10138108969494612, "grad_norm": 0.7311023473739624, "learning_rate": 9.803159173754557e-05, "loss": 1.7186, "step": 334 }, { "epoch": 0.10168462589163758, "grad_norm": 0.3953106701374054, "learning_rate": 9.80255164034022e-05, "loss": 1.9674, "step": 335 }, { "epoch": 0.10198816208832903, "grad_norm": 0.4133211076259613, "learning_rate": 9.80194410692588e-05, "loss": 1.9536, "step": 336 }, { "epoch": 0.1022916982850205, "grad_norm": 0.4300665855407715, "learning_rate": 9.801336573511544e-05, "loss": 2.0676, "step": 337 }, { "epoch": 0.10259523448171194, "grad_norm": 0.3569762706756592, "learning_rate": 9.800729040097207e-05, "loss": 2.138, "step": 338 }, { "epoch": 0.1028987706784034, "grad_norm": 0.37851640582084656, "learning_rate": 9.800121506682868e-05, "loss": 1.7479, "step": 339 }, { "epoch": 0.10320230687509485, "grad_norm": 0.3435342013835907, "learning_rate": 9.79951397326853e-05, "loss": 2.0182, "step": 340 }, { "epoch": 0.10350584307178631, "grad_norm": 0.487394779920578, "learning_rate": 9.798906439854193e-05, "loss": 1.8017, "step": 341 }, { "epoch": 0.10380937926847776, "grad_norm": 0.3741822838783264, "learning_rate": 9.798298906439855e-05, "loss": 1.9261, "step": 342 }, { "epoch": 0.10411291546516922, "grad_norm": 0.4044167101383209, "learning_rate": 9.797691373025517e-05, "loss": 1.962, "step": 343 }, { "epoch": 0.10441645166186067, "grad_norm": 0.4507991373538971, "learning_rate": 9.797083839611178e-05, "loss": 1.6664, "step": 344 }, { "epoch": 0.10471998785855213, "grad_norm": 0.41394731402397156, "learning_rate": 9.796476306196842e-05, "loss": 1.7994, "step": 345 }, { "epoch": 0.1050235240552436, "grad_norm": 0.4100608825683594, "learning_rate": 9.795868772782503e-05, "loss": 1.8795, "step": 346 }, { "epoch": 0.10532706025193504, "grad_norm": 0.5010010600090027, "learning_rate": 9.795261239368165e-05, "loss": 1.7712, "step": 347 }, { "epoch": 0.1056305964486265, "grad_norm": 0.3657280206680298, "learning_rate": 9.794653705953828e-05, "loss": 2.0675, "step": 348 }, { "epoch": 0.10593413264531795, "grad_norm": 0.43551188707351685, "learning_rate": 9.794046172539491e-05, "loss": 2.3229, "step": 349 }, { "epoch": 0.10623766884200941, "grad_norm": 0.4035640060901642, "learning_rate": 9.793438639125152e-05, "loss": 1.5348, "step": 350 }, { "epoch": 0.10654120503870086, "grad_norm": 0.38934487104415894, "learning_rate": 9.792831105710815e-05, "loss": 2.0634, "step": 351 }, { "epoch": 0.10684474123539232, "grad_norm": 0.3808942437171936, "learning_rate": 9.792223572296478e-05, "loss": 1.5801, "step": 352 }, { "epoch": 0.10714827743208377, "grad_norm": 0.4263344407081604, "learning_rate": 9.791616038882138e-05, "loss": 2.1149, "step": 353 }, { "epoch": 0.10745181362877523, "grad_norm": 0.40345048904418945, "learning_rate": 9.791008505467801e-05, "loss": 1.6522, "step": 354 }, { "epoch": 0.10775534982546668, "grad_norm": 0.39628833532333374, "learning_rate": 9.790400972053464e-05, "loss": 1.8936, "step": 355 }, { "epoch": 0.10805888602215814, "grad_norm": 0.3981876075267792, "learning_rate": 9.789793438639126e-05, "loss": 1.8532, "step": 356 }, { "epoch": 0.10836242221884959, "grad_norm": 0.41689878702163696, "learning_rate": 9.789185905224788e-05, "loss": 1.811, "step": 357 }, { "epoch": 0.10866595841554105, "grad_norm": 0.3519899845123291, "learning_rate": 9.78857837181045e-05, "loss": 2.1879, "step": 358 }, { "epoch": 0.10896949461223251, "grad_norm": 0.4501926004886627, "learning_rate": 9.787970838396113e-05, "loss": 1.7545, "step": 359 }, { "epoch": 0.10927303080892396, "grad_norm": 0.3503700792789459, "learning_rate": 9.787363304981774e-05, "loss": 1.8818, "step": 360 }, { "epoch": 0.10957656700561542, "grad_norm": 0.3641771674156189, "learning_rate": 9.786755771567436e-05, "loss": 1.9666, "step": 361 }, { "epoch": 0.10988010320230687, "grad_norm": 0.41548213362693787, "learning_rate": 9.786148238153099e-05, "loss": 1.8781, "step": 362 }, { "epoch": 0.11018363939899833, "grad_norm": 0.40850362181663513, "learning_rate": 9.785540704738762e-05, "loss": 1.8369, "step": 363 }, { "epoch": 0.11048717559568978, "grad_norm": 0.3874415159225464, "learning_rate": 9.784933171324423e-05, "loss": 1.6867, "step": 364 }, { "epoch": 0.11079071179238124, "grad_norm": 0.40616413950920105, "learning_rate": 9.784325637910086e-05, "loss": 1.7234, "step": 365 }, { "epoch": 0.11109424798907269, "grad_norm": 0.7947202920913696, "learning_rate": 9.783718104495749e-05, "loss": 1.5327, "step": 366 }, { "epoch": 0.11139778418576415, "grad_norm": 0.3792203664779663, "learning_rate": 9.783110571081409e-05, "loss": 1.5403, "step": 367 }, { "epoch": 0.1117013203824556, "grad_norm": 0.4576598107814789, "learning_rate": 9.782503037667072e-05, "loss": 2.2472, "step": 368 }, { "epoch": 0.11200485657914706, "grad_norm": 0.37935471534729004, "learning_rate": 9.781895504252734e-05, "loss": 1.8421, "step": 369 }, { "epoch": 0.11230839277583853, "grad_norm": 0.3658997416496277, "learning_rate": 9.781287970838397e-05, "loss": 1.6879, "step": 370 }, { "epoch": 0.11261192897252997, "grad_norm": 0.3936321437358856, "learning_rate": 9.780680437424059e-05, "loss": 2.0365, "step": 371 }, { "epoch": 0.11291546516922144, "grad_norm": 0.3935524821281433, "learning_rate": 9.78007290400972e-05, "loss": 1.8648, "step": 372 }, { "epoch": 0.11321900136591288, "grad_norm": 0.3798617422580719, "learning_rate": 9.779465370595384e-05, "loss": 2.0528, "step": 373 }, { "epoch": 0.11352253756260434, "grad_norm": 0.38386639952659607, "learning_rate": 9.778857837181045e-05, "loss": 1.5629, "step": 374 }, { "epoch": 0.11382607375929579, "grad_norm": 0.4665718674659729, "learning_rate": 9.778250303766707e-05, "loss": 1.6207, "step": 375 }, { "epoch": 0.11412960995598725, "grad_norm": 0.35728296637535095, "learning_rate": 9.77764277035237e-05, "loss": 1.9548, "step": 376 }, { "epoch": 0.1144331461526787, "grad_norm": 0.3415043354034424, "learning_rate": 9.777035236938032e-05, "loss": 2.0376, "step": 377 }, { "epoch": 0.11473668234937016, "grad_norm": 0.38225334882736206, "learning_rate": 9.776427703523694e-05, "loss": 1.7175, "step": 378 }, { "epoch": 0.11504021854606161, "grad_norm": 0.3931468427181244, "learning_rate": 9.775820170109357e-05, "loss": 2.1832, "step": 379 }, { "epoch": 0.11534375474275307, "grad_norm": 0.3954283595085144, "learning_rate": 9.77521263669502e-05, "loss": 2.1863, "step": 380 }, { "epoch": 0.11564729093944452, "grad_norm": 0.31073784828186035, "learning_rate": 9.77460510328068e-05, "loss": 1.8583, "step": 381 }, { "epoch": 0.11595082713613598, "grad_norm": 0.37894561886787415, "learning_rate": 9.773997569866343e-05, "loss": 2.0385, "step": 382 }, { "epoch": 0.11625436333282745, "grad_norm": 0.3493829369544983, "learning_rate": 9.773390036452005e-05, "loss": 1.8854, "step": 383 }, { "epoch": 0.1165578995295189, "grad_norm": 0.5518279075622559, "learning_rate": 9.772782503037668e-05, "loss": 1.7403, "step": 384 }, { "epoch": 0.11686143572621036, "grad_norm": 0.3724190294742584, "learning_rate": 9.77217496962333e-05, "loss": 1.7759, "step": 385 }, { "epoch": 0.1171649719229018, "grad_norm": 0.4635847508907318, "learning_rate": 9.771567436208991e-05, "loss": 1.8441, "step": 386 }, { "epoch": 0.11746850811959327, "grad_norm": 0.38281580805778503, "learning_rate": 9.770959902794655e-05, "loss": 2.0332, "step": 387 }, { "epoch": 0.11777204431628471, "grad_norm": 0.4179950952529907, "learning_rate": 9.770352369380316e-05, "loss": 2.3451, "step": 388 }, { "epoch": 0.11807558051297617, "grad_norm": 0.34729671478271484, "learning_rate": 9.769744835965978e-05, "loss": 1.9186, "step": 389 }, { "epoch": 0.11837911670966762, "grad_norm": 0.40492531657218933, "learning_rate": 9.769137302551641e-05, "loss": 2.1711, "step": 390 }, { "epoch": 0.11868265290635908, "grad_norm": 0.38143807649612427, "learning_rate": 9.768529769137303e-05, "loss": 1.7601, "step": 391 }, { "epoch": 0.11898618910305053, "grad_norm": 0.35463643074035645, "learning_rate": 9.767922235722965e-05, "loss": 1.8068, "step": 392 }, { "epoch": 0.119289725299742, "grad_norm": 0.3719339370727539, "learning_rate": 9.767314702308628e-05, "loss": 1.6296, "step": 393 }, { "epoch": 0.11959326149643346, "grad_norm": 0.8585293292999268, "learning_rate": 9.766707168894291e-05, "loss": 1.8841, "step": 394 }, { "epoch": 0.1198967976931249, "grad_norm": 0.5286839604377747, "learning_rate": 9.766099635479951e-05, "loss": 1.3645, "step": 395 }, { "epoch": 0.12020033388981637, "grad_norm": 0.44176310300827026, "learning_rate": 9.765492102065614e-05, "loss": 2.1759, "step": 396 }, { "epoch": 0.12050387008650781, "grad_norm": 0.39778321981430054, "learning_rate": 9.764884568651276e-05, "loss": 1.8344, "step": 397 }, { "epoch": 0.12080740628319928, "grad_norm": 0.4364762604236603, "learning_rate": 9.764277035236939e-05, "loss": 1.7834, "step": 398 }, { "epoch": 0.12111094247989072, "grad_norm": 0.37305301427841187, "learning_rate": 9.763669501822601e-05, "loss": 1.7474, "step": 399 }, { "epoch": 0.12141447867658219, "grad_norm": 0.38804179430007935, "learning_rate": 9.763061968408262e-05, "loss": 1.6241, "step": 400 }, { "epoch": 0.12171801487327363, "grad_norm": 0.9124923944473267, "learning_rate": 9.762454434993926e-05, "loss": 2.1456, "step": 401 }, { "epoch": 0.1220215510699651, "grad_norm": 0.38728946447372437, "learning_rate": 9.761846901579587e-05, "loss": 1.9724, "step": 402 }, { "epoch": 0.12232508726665654, "grad_norm": 0.4121726155281067, "learning_rate": 9.761239368165249e-05, "loss": 2.1119, "step": 403 }, { "epoch": 0.122628623463348, "grad_norm": 0.46508121490478516, "learning_rate": 9.760631834750912e-05, "loss": 1.4402, "step": 404 }, { "epoch": 0.12293215966003945, "grad_norm": 0.4460875391960144, "learning_rate": 9.760024301336574e-05, "loss": 2.0572, "step": 405 }, { "epoch": 0.12323569585673091, "grad_norm": 0.38444089889526367, "learning_rate": 9.759416767922236e-05, "loss": 1.9943, "step": 406 }, { "epoch": 0.12353923205342238, "grad_norm": 0.3515356779098511, "learning_rate": 9.758809234507899e-05, "loss": 1.5699, "step": 407 }, { "epoch": 0.12384276825011382, "grad_norm": 0.4010019302368164, "learning_rate": 9.758201701093562e-05, "loss": 1.8674, "step": 408 }, { "epoch": 0.12414630444680529, "grad_norm": 0.4250737428665161, "learning_rate": 9.757594167679222e-05, "loss": 1.0738, "step": 409 }, { "epoch": 0.12444984064349673, "grad_norm": 0.3719541132450104, "learning_rate": 9.756986634264885e-05, "loss": 1.7292, "step": 410 }, { "epoch": 0.1247533768401882, "grad_norm": 0.385420024394989, "learning_rate": 9.756379100850547e-05, "loss": 1.9503, "step": 411 }, { "epoch": 0.12505691303687966, "grad_norm": 0.480056494474411, "learning_rate": 9.75577156743621e-05, "loss": 2.0093, "step": 412 }, { "epoch": 0.1253604492335711, "grad_norm": 0.38757050037384033, "learning_rate": 9.755164034021872e-05, "loss": 2.2636, "step": 413 }, { "epoch": 0.12566398543026255, "grad_norm": 0.3712436854839325, "learning_rate": 9.754556500607533e-05, "loss": 2.0479, "step": 414 }, { "epoch": 0.12596752162695402, "grad_norm": 2.4313645362854004, "learning_rate": 9.753948967193197e-05, "loss": 2.376, "step": 415 }, { "epoch": 0.12627105782364548, "grad_norm": 0.8028842210769653, "learning_rate": 9.753341433778858e-05, "loss": 1.7702, "step": 416 }, { "epoch": 0.1265745940203369, "grad_norm": 0.48586025834083557, "learning_rate": 9.75273390036452e-05, "loss": 1.7623, "step": 417 }, { "epoch": 0.12687813021702837, "grad_norm": 0.4017583131790161, "learning_rate": 9.752126366950183e-05, "loss": 1.8756, "step": 418 }, { "epoch": 0.12718166641371983, "grad_norm": 0.3845275044441223, "learning_rate": 9.751518833535845e-05, "loss": 1.95, "step": 419 }, { "epoch": 0.1274852026104113, "grad_norm": 0.41064974665641785, "learning_rate": 9.750911300121507e-05, "loss": 1.9622, "step": 420 }, { "epoch": 0.12778873880710276, "grad_norm": 0.33571726083755493, "learning_rate": 9.75030376670717e-05, "loss": 1.7291, "step": 421 }, { "epoch": 0.1280922750037942, "grad_norm": 0.3988417387008667, "learning_rate": 9.749696233292833e-05, "loss": 1.9349, "step": 422 }, { "epoch": 0.12839581120048565, "grad_norm": 0.37586870789527893, "learning_rate": 9.749088699878493e-05, "loss": 1.5649, "step": 423 }, { "epoch": 0.12869934739717712, "grad_norm": 0.37013643980026245, "learning_rate": 9.748481166464156e-05, "loss": 1.9448, "step": 424 }, { "epoch": 0.12900288359386858, "grad_norm": 0.31406837701797485, "learning_rate": 9.747873633049818e-05, "loss": 1.8496, "step": 425 }, { "epoch": 0.12930641979056, "grad_norm": 0.3691607415676117, "learning_rate": 9.74726609963548e-05, "loss": 2.0649, "step": 426 }, { "epoch": 0.12960995598725147, "grad_norm": 0.39633169770240784, "learning_rate": 9.746658566221143e-05, "loss": 1.727, "step": 427 }, { "epoch": 0.12991349218394294, "grad_norm": 0.47319236397743225, "learning_rate": 9.746051032806804e-05, "loss": 2.4487, "step": 428 }, { "epoch": 0.1302170283806344, "grad_norm": 0.35506609082221985, "learning_rate": 9.745443499392468e-05, "loss": 2.0857, "step": 429 }, { "epoch": 0.13052056457732586, "grad_norm": 0.38134855031967163, "learning_rate": 9.744835965978129e-05, "loss": 1.5862, "step": 430 }, { "epoch": 0.1308241007740173, "grad_norm": 0.6288440227508545, "learning_rate": 9.744228432563791e-05, "loss": 2.2402, "step": 431 }, { "epoch": 0.13112763697070876, "grad_norm": 0.42172396183013916, "learning_rate": 9.743620899149454e-05, "loss": 1.7245, "step": 432 }, { "epoch": 0.13143117316740022, "grad_norm": 0.38452383875846863, "learning_rate": 9.743013365735116e-05, "loss": 1.6666, "step": 433 }, { "epoch": 0.13173470936409168, "grad_norm": 2.0956268310546875, "learning_rate": 9.742405832320778e-05, "loss": 1.8268, "step": 434 }, { "epoch": 0.1320382455607831, "grad_norm": 0.4363501965999603, "learning_rate": 9.74179829890644e-05, "loss": 2.3149, "step": 435 }, { "epoch": 0.13234178175747457, "grad_norm": 0.38243743777275085, "learning_rate": 9.741190765492104e-05, "loss": 1.9706, "step": 436 }, { "epoch": 0.13264531795416604, "grad_norm": 0.38724249601364136, "learning_rate": 9.740583232077764e-05, "loss": 1.779, "step": 437 }, { "epoch": 0.1329488541508575, "grad_norm": 0.43606194853782654, "learning_rate": 9.739975698663427e-05, "loss": 2.0371, "step": 438 }, { "epoch": 0.13325239034754893, "grad_norm": 0.3511301279067993, "learning_rate": 9.739368165249089e-05, "loss": 1.8771, "step": 439 }, { "epoch": 0.1335559265442404, "grad_norm": 0.3883466124534607, "learning_rate": 9.73876063183475e-05, "loss": 2.1165, "step": 440 }, { "epoch": 0.13385946274093186, "grad_norm": 0.41711342334747314, "learning_rate": 9.738153098420414e-05, "loss": 1.8367, "step": 441 }, { "epoch": 0.13416299893762332, "grad_norm": 0.6146459579467773, "learning_rate": 9.737545565006075e-05, "loss": 1.9233, "step": 442 }, { "epoch": 0.13446653513431478, "grad_norm": 0.37016820907592773, "learning_rate": 9.736938031591739e-05, "loss": 1.8804, "step": 443 }, { "epoch": 0.1347700713310062, "grad_norm": 0.3620823323726654, "learning_rate": 9.7363304981774e-05, "loss": 1.5837, "step": 444 }, { "epoch": 0.13507360752769768, "grad_norm": 0.37375590205192566, "learning_rate": 9.735722964763062e-05, "loss": 1.8095, "step": 445 }, { "epoch": 0.13537714372438914, "grad_norm": 0.6026252508163452, "learning_rate": 9.735115431348725e-05, "loss": 2.041, "step": 446 }, { "epoch": 0.1356806799210806, "grad_norm": 0.3753829002380371, "learning_rate": 9.734507897934387e-05, "loss": 2.0627, "step": 447 }, { "epoch": 0.13598421611777203, "grad_norm": 0.3974304795265198, "learning_rate": 9.733900364520049e-05, "loss": 2.1671, "step": 448 }, { "epoch": 0.1362877523144635, "grad_norm": 0.34336552023887634, "learning_rate": 9.733292831105712e-05, "loss": 1.9118, "step": 449 }, { "epoch": 0.13659128851115496, "grad_norm": 0.3588969111442566, "learning_rate": 9.732685297691373e-05, "loss": 1.9768, "step": 450 }, { "epoch": 0.13689482470784642, "grad_norm": 0.38693130016326904, "learning_rate": 9.732077764277035e-05, "loss": 2.1524, "step": 451 }, { "epoch": 0.13719836090453785, "grad_norm": 0.370612233877182, "learning_rate": 9.731470230862698e-05, "loss": 1.8753, "step": 452 }, { "epoch": 0.13750189710122931, "grad_norm": 0.4038615822792053, "learning_rate": 9.73086269744836e-05, "loss": 2.1024, "step": 453 }, { "epoch": 0.13780543329792078, "grad_norm": 0.3728694021701813, "learning_rate": 9.730255164034022e-05, "loss": 1.8864, "step": 454 }, { "epoch": 0.13810896949461224, "grad_norm": 0.37269484996795654, "learning_rate": 9.729647630619685e-05, "loss": 1.8244, "step": 455 }, { "epoch": 0.1384125056913037, "grad_norm": 0.39840301871299744, "learning_rate": 9.729040097205346e-05, "loss": 1.8215, "step": 456 }, { "epoch": 0.13871604188799513, "grad_norm": 0.39856579899787903, "learning_rate": 9.72843256379101e-05, "loss": 1.9475, "step": 457 }, { "epoch": 0.1390195780846866, "grad_norm": 0.43041157722473145, "learning_rate": 9.727825030376671e-05, "loss": 1.87, "step": 458 }, { "epoch": 0.13932311428137806, "grad_norm": 0.4047417640686035, "learning_rate": 9.727217496962333e-05, "loss": 2.1425, "step": 459 }, { "epoch": 0.13962665047806952, "grad_norm": 0.3901901841163635, "learning_rate": 9.726609963547996e-05, "loss": 1.917, "step": 460 }, { "epoch": 0.13993018667476095, "grad_norm": 0.40706855058670044, "learning_rate": 9.726002430133658e-05, "loss": 2.1278, "step": 461 }, { "epoch": 0.14023372287145242, "grad_norm": 0.47956356406211853, "learning_rate": 9.72539489671932e-05, "loss": 1.2435, "step": 462 }, { "epoch": 0.14053725906814388, "grad_norm": 0.35697320103645325, "learning_rate": 9.724787363304983e-05, "loss": 1.7965, "step": 463 }, { "epoch": 0.14084079526483534, "grad_norm": 0.4102901518344879, "learning_rate": 9.724179829890644e-05, "loss": 1.7132, "step": 464 }, { "epoch": 0.1411443314615268, "grad_norm": 0.3353058099746704, "learning_rate": 9.723572296476306e-05, "loss": 1.674, "step": 465 }, { "epoch": 0.14144786765821823, "grad_norm": 0.3946186900138855, "learning_rate": 9.722964763061969e-05, "loss": 2.0067, "step": 466 }, { "epoch": 0.1417514038549097, "grad_norm": 0.3974400460720062, "learning_rate": 9.722357229647631e-05, "loss": 1.7242, "step": 467 }, { "epoch": 0.14205494005160116, "grad_norm": 1.3334546089172363, "learning_rate": 9.721749696233293e-05, "loss": 2.0141, "step": 468 }, { "epoch": 0.14235847624829262, "grad_norm": 0.36386749148368835, "learning_rate": 9.721142162818956e-05, "loss": 1.8221, "step": 469 }, { "epoch": 0.14266201244498405, "grad_norm": 0.35332655906677246, "learning_rate": 9.720534629404617e-05, "loss": 1.4251, "step": 470 }, { "epoch": 0.14296554864167552, "grad_norm": 0.563428521156311, "learning_rate": 9.71992709599028e-05, "loss": 1.8633, "step": 471 }, { "epoch": 0.14326908483836698, "grad_norm": 0.39971691370010376, "learning_rate": 9.719319562575942e-05, "loss": 2.0397, "step": 472 }, { "epoch": 0.14357262103505844, "grad_norm": 0.39253416657447815, "learning_rate": 9.718712029161604e-05, "loss": 1.3104, "step": 473 }, { "epoch": 0.14387615723174987, "grad_norm": 0.41245678067207336, "learning_rate": 9.718104495747267e-05, "loss": 1.8821, "step": 474 }, { "epoch": 0.14417969342844134, "grad_norm": 0.5424125790596008, "learning_rate": 9.717496962332929e-05, "loss": 1.7739, "step": 475 }, { "epoch": 0.1444832296251328, "grad_norm": 0.425329327583313, "learning_rate": 9.71688942891859e-05, "loss": 1.8047, "step": 476 }, { "epoch": 0.14478676582182426, "grad_norm": 0.43624332547187805, "learning_rate": 9.716281895504254e-05, "loss": 1.5254, "step": 477 }, { "epoch": 0.14509030201851572, "grad_norm": 0.4078616201877594, "learning_rate": 9.715674362089915e-05, "loss": 1.8286, "step": 478 }, { "epoch": 0.14539383821520716, "grad_norm": 0.4144497513771057, "learning_rate": 9.715066828675577e-05, "loss": 2.0737, "step": 479 }, { "epoch": 0.14569737441189862, "grad_norm": 0.37600383162498474, "learning_rate": 9.71445929526124e-05, "loss": 2.2512, "step": 480 }, { "epoch": 0.14600091060859008, "grad_norm": 0.376644492149353, "learning_rate": 9.713851761846902e-05, "loss": 2.0374, "step": 481 }, { "epoch": 0.14630444680528154, "grad_norm": 1.3389711380004883, "learning_rate": 9.713244228432564e-05, "loss": 1.7461, "step": 482 }, { "epoch": 0.14660798300197297, "grad_norm": 0.32981812953948975, "learning_rate": 9.712636695018227e-05, "loss": 1.8383, "step": 483 }, { "epoch": 0.14691151919866444, "grad_norm": 0.3440997004508972, "learning_rate": 9.712029161603888e-05, "loss": 1.6228, "step": 484 }, { "epoch": 0.1472150553953559, "grad_norm": 0.3392031788825989, "learning_rate": 9.711421628189552e-05, "loss": 1.9423, "step": 485 }, { "epoch": 0.14751859159204736, "grad_norm": 0.37523385882377625, "learning_rate": 9.710814094775213e-05, "loss": 2.1037, "step": 486 }, { "epoch": 0.1478221277887388, "grad_norm": 0.36372002959251404, "learning_rate": 9.710206561360875e-05, "loss": 1.863, "step": 487 }, { "epoch": 0.14812566398543026, "grad_norm": 0.3782525360584259, "learning_rate": 9.709599027946538e-05, "loss": 2.1418, "step": 488 }, { "epoch": 0.14842920018212172, "grad_norm": 0.7462687492370605, "learning_rate": 9.7089914945322e-05, "loss": 1.8172, "step": 489 }, { "epoch": 0.14873273637881318, "grad_norm": 0.3471963107585907, "learning_rate": 9.708383961117862e-05, "loss": 2.1015, "step": 490 }, { "epoch": 0.14903627257550464, "grad_norm": 0.5325261950492859, "learning_rate": 9.707776427703525e-05, "loss": 2.05, "step": 491 }, { "epoch": 0.14933980877219608, "grad_norm": 0.5748963952064514, "learning_rate": 9.707168894289186e-05, "loss": 2.1217, "step": 492 }, { "epoch": 0.14964334496888754, "grad_norm": 0.3891385495662689, "learning_rate": 9.706561360874848e-05, "loss": 1.5301, "step": 493 }, { "epoch": 0.149946881165579, "grad_norm": 0.48258477449417114, "learning_rate": 9.705953827460511e-05, "loss": 1.839, "step": 494 }, { "epoch": 0.15025041736227046, "grad_norm": 0.5301745533943176, "learning_rate": 9.705346294046173e-05, "loss": 1.7934, "step": 495 }, { "epoch": 0.1505539535589619, "grad_norm": 0.3614468276500702, "learning_rate": 9.704738760631835e-05, "loss": 1.7176, "step": 496 }, { "epoch": 0.15085748975565336, "grad_norm": 0.31026577949523926, "learning_rate": 9.704131227217498e-05, "loss": 1.925, "step": 497 }, { "epoch": 0.15116102595234482, "grad_norm": 0.37441205978393555, "learning_rate": 9.70352369380316e-05, "loss": 2.1532, "step": 498 }, { "epoch": 0.15146456214903628, "grad_norm": 0.37447264790534973, "learning_rate": 9.702916160388823e-05, "loss": 2.0351, "step": 499 }, { "epoch": 0.15176809834572771, "grad_norm": 0.7793715000152588, "learning_rate": 9.702308626974484e-05, "loss": 2.1011, "step": 500 }, { "epoch": 0.15207163454241918, "grad_norm": 0.3725285232067108, "learning_rate": 9.701701093560146e-05, "loss": 2.1641, "step": 501 }, { "epoch": 0.15237517073911064, "grad_norm": 0.35334041714668274, "learning_rate": 9.701093560145809e-05, "loss": 2.0773, "step": 502 }, { "epoch": 0.1526787069358021, "grad_norm": 0.3819803297519684, "learning_rate": 9.700486026731471e-05, "loss": 1.836, "step": 503 }, { "epoch": 0.15298224313249356, "grad_norm": 0.403060644865036, "learning_rate": 9.699878493317133e-05, "loss": 2.1178, "step": 504 }, { "epoch": 0.153285779329185, "grad_norm": 0.2956171929836273, "learning_rate": 9.699270959902796e-05, "loss": 1.6397, "step": 505 }, { "epoch": 0.15358931552587646, "grad_norm": 0.30349212884902954, "learning_rate": 9.698663426488457e-05, "loss": 1.6435, "step": 506 }, { "epoch": 0.15389285172256792, "grad_norm": 0.38176605105400085, "learning_rate": 9.698055893074119e-05, "loss": 1.8004, "step": 507 }, { "epoch": 0.15419638791925938, "grad_norm": 0.5072764158248901, "learning_rate": 9.697448359659782e-05, "loss": 1.6926, "step": 508 }, { "epoch": 0.15449992411595082, "grad_norm": 0.5380321145057678, "learning_rate": 9.696840826245444e-05, "loss": 2.2396, "step": 509 }, { "epoch": 0.15480346031264228, "grad_norm": 0.40872499346733093, "learning_rate": 9.696233292831106e-05, "loss": 1.8645, "step": 510 }, { "epoch": 0.15510699650933374, "grad_norm": 0.9282563924789429, "learning_rate": 9.695625759416769e-05, "loss": 1.7143, "step": 511 }, { "epoch": 0.1554105327060252, "grad_norm": 0.7355011105537415, "learning_rate": 9.69501822600243e-05, "loss": 1.7461, "step": 512 }, { "epoch": 0.15571406890271666, "grad_norm": 0.9056992530822754, "learning_rate": 9.694410692588092e-05, "loss": 2.1454, "step": 513 }, { "epoch": 0.1560176050994081, "grad_norm": 0.38970059156417847, "learning_rate": 9.693803159173755e-05, "loss": 2.3313, "step": 514 }, { "epoch": 0.15632114129609956, "grad_norm": 0.3651840090751648, "learning_rate": 9.693195625759417e-05, "loss": 1.84, "step": 515 }, { "epoch": 0.15662467749279102, "grad_norm": 0.38748839497566223, "learning_rate": 9.69258809234508e-05, "loss": 1.9666, "step": 516 }, { "epoch": 0.15692821368948248, "grad_norm": 0.407427042722702, "learning_rate": 9.691980558930742e-05, "loss": 1.9351, "step": 517 }, { "epoch": 0.15723174988617392, "grad_norm": 0.31920358538627625, "learning_rate": 9.691373025516404e-05, "loss": 1.7928, "step": 518 }, { "epoch": 0.15753528608286538, "grad_norm": 0.4002731442451477, "learning_rate": 9.690765492102067e-05, "loss": 1.4087, "step": 519 }, { "epoch": 0.15783882227955684, "grad_norm": 0.42125266790390015, "learning_rate": 9.690157958687728e-05, "loss": 1.6185, "step": 520 }, { "epoch": 0.1581423584762483, "grad_norm": 0.3706381022930145, "learning_rate": 9.68955042527339e-05, "loss": 2.0546, "step": 521 }, { "epoch": 0.15844589467293974, "grad_norm": 0.41669219732284546, "learning_rate": 9.688942891859053e-05, "loss": 2.0399, "step": 522 }, { "epoch": 0.1587494308696312, "grad_norm": 0.36784589290618896, "learning_rate": 9.688335358444715e-05, "loss": 1.8182, "step": 523 }, { "epoch": 0.15905296706632266, "grad_norm": 0.3830098807811737, "learning_rate": 9.687727825030377e-05, "loss": 2.0728, "step": 524 }, { "epoch": 0.15935650326301412, "grad_norm": 0.37658411264419556, "learning_rate": 9.68712029161604e-05, "loss": 1.9915, "step": 525 }, { "epoch": 0.15966003945970558, "grad_norm": 0.375053733587265, "learning_rate": 9.686512758201701e-05, "loss": 1.7589, "step": 526 }, { "epoch": 0.15996357565639702, "grad_norm": 0.3810443580150604, "learning_rate": 9.685905224787363e-05, "loss": 1.6921, "step": 527 }, { "epoch": 0.16026711185308848, "grad_norm": 0.41676437854766846, "learning_rate": 9.685297691373026e-05, "loss": 1.1885, "step": 528 }, { "epoch": 0.16057064804977994, "grad_norm": 0.40823522210121155, "learning_rate": 9.684690157958688e-05, "loss": 1.8335, "step": 529 }, { "epoch": 0.1608741842464714, "grad_norm": 0.3795296251773834, "learning_rate": 9.684082624544351e-05, "loss": 1.9209, "step": 530 }, { "epoch": 0.16117772044316284, "grad_norm": 0.41227850317955017, "learning_rate": 9.683475091130013e-05, "loss": 1.7791, "step": 531 }, { "epoch": 0.1614812566398543, "grad_norm": 0.436483234167099, "learning_rate": 9.682867557715675e-05, "loss": 1.8648, "step": 532 }, { "epoch": 0.16178479283654576, "grad_norm": 0.43618106842041016, "learning_rate": 9.682260024301338e-05, "loss": 1.7593, "step": 533 }, { "epoch": 0.16208832903323722, "grad_norm": 0.37166401743888855, "learning_rate": 9.681652490887e-05, "loss": 1.9301, "step": 534 }, { "epoch": 0.16239186522992866, "grad_norm": 0.5716313123703003, "learning_rate": 9.681044957472661e-05, "loss": 2.1892, "step": 535 }, { "epoch": 0.16269540142662012, "grad_norm": 0.4008532166481018, "learning_rate": 9.680437424058324e-05, "loss": 1.8818, "step": 536 }, { "epoch": 0.16299893762331158, "grad_norm": 0.42276448011398315, "learning_rate": 9.679829890643986e-05, "loss": 1.4488, "step": 537 }, { "epoch": 0.16330247382000304, "grad_norm": 0.343649685382843, "learning_rate": 9.679222357229648e-05, "loss": 1.9549, "step": 538 }, { "epoch": 0.1636060100166945, "grad_norm": 0.3381790518760681, "learning_rate": 9.678614823815311e-05, "loss": 1.9949, "step": 539 }, { "epoch": 0.16390954621338594, "grad_norm": 0.3788328468799591, "learning_rate": 9.678007290400973e-05, "loss": 2.0377, "step": 540 }, { "epoch": 0.1642130824100774, "grad_norm": 0.39532333612442017, "learning_rate": 9.677399756986634e-05, "loss": 1.8837, "step": 541 }, { "epoch": 0.16451661860676886, "grad_norm": 0.36701199412345886, "learning_rate": 9.676792223572297e-05, "loss": 1.9046, "step": 542 }, { "epoch": 0.16482015480346032, "grad_norm": 0.4146950840950012, "learning_rate": 9.676184690157959e-05, "loss": 1.8369, "step": 543 }, { "epoch": 0.16512369100015176, "grad_norm": 0.34827515482902527, "learning_rate": 9.675577156743622e-05, "loss": 2.0196, "step": 544 }, { "epoch": 0.16542722719684322, "grad_norm": 0.36529168486595154, "learning_rate": 9.674969623329283e-05, "loss": 1.9189, "step": 545 }, { "epoch": 0.16573076339353468, "grad_norm": 0.3718273639678955, "learning_rate": 9.674362089914946e-05, "loss": 1.933, "step": 546 }, { "epoch": 0.16603429959022614, "grad_norm": 0.3853289484977722, "learning_rate": 9.673754556500609e-05, "loss": 1.9796, "step": 547 }, { "epoch": 0.16633783578691758, "grad_norm": 0.38597023487091064, "learning_rate": 9.67314702308627e-05, "loss": 1.5167, "step": 548 }, { "epoch": 0.16664137198360904, "grad_norm": 0.3792777955532074, "learning_rate": 9.672539489671932e-05, "loss": 1.9285, "step": 549 }, { "epoch": 0.1669449081803005, "grad_norm": 0.3506297171115875, "learning_rate": 9.671931956257595e-05, "loss": 1.8449, "step": 550 }, { "epoch": 0.16724844437699196, "grad_norm": 0.3851594030857086, "learning_rate": 9.671324422843257e-05, "loss": 1.9774, "step": 551 }, { "epoch": 0.16755198057368342, "grad_norm": 0.3438011705875397, "learning_rate": 9.670716889428919e-05, "loss": 2.0849, "step": 552 }, { "epoch": 0.16785551677037486, "grad_norm": 0.3682856559753418, "learning_rate": 9.670109356014582e-05, "loss": 2.09, "step": 553 }, { "epoch": 0.16815905296706632, "grad_norm": 0.5098361372947693, "learning_rate": 9.669501822600244e-05, "loss": 1.7843, "step": 554 }, { "epoch": 0.16846258916375778, "grad_norm": 0.3482840359210968, "learning_rate": 9.668894289185905e-05, "loss": 1.5129, "step": 555 }, { "epoch": 0.16876612536044924, "grad_norm": 0.3557680547237396, "learning_rate": 9.668286755771568e-05, "loss": 2.1033, "step": 556 }, { "epoch": 0.16906966155714068, "grad_norm": 0.3622763752937317, "learning_rate": 9.66767922235723e-05, "loss": 1.7729, "step": 557 }, { "epoch": 0.16937319775383214, "grad_norm": 0.3461545407772064, "learning_rate": 9.667071688942893e-05, "loss": 1.7999, "step": 558 }, { "epoch": 0.1696767339505236, "grad_norm": 0.39597707986831665, "learning_rate": 9.666464155528554e-05, "loss": 1.8805, "step": 559 }, { "epoch": 0.16998027014721506, "grad_norm": 0.39985769987106323, "learning_rate": 9.665856622114217e-05, "loss": 1.8858, "step": 560 }, { "epoch": 0.17028380634390652, "grad_norm": 0.352029025554657, "learning_rate": 9.66524908869988e-05, "loss": 1.8077, "step": 561 }, { "epoch": 0.17058734254059796, "grad_norm": 0.5778902173042297, "learning_rate": 9.66464155528554e-05, "loss": 1.7001, "step": 562 }, { "epoch": 0.17089087873728942, "grad_norm": 0.49807438254356384, "learning_rate": 9.664034021871203e-05, "loss": 1.7536, "step": 563 }, { "epoch": 0.17119441493398088, "grad_norm": 0.6479670405387878, "learning_rate": 9.663426488456866e-05, "loss": 1.7827, "step": 564 }, { "epoch": 0.17149795113067234, "grad_norm": 0.4150646924972534, "learning_rate": 9.662818955042528e-05, "loss": 2.093, "step": 565 }, { "epoch": 0.17180148732736378, "grad_norm": 0.4018631875514984, "learning_rate": 9.66221142162819e-05, "loss": 1.9505, "step": 566 }, { "epoch": 0.17210502352405524, "grad_norm": 0.34705424308776855, "learning_rate": 9.661603888213853e-05, "loss": 2.0355, "step": 567 }, { "epoch": 0.1724085597207467, "grad_norm": 0.40585950016975403, "learning_rate": 9.660996354799515e-05, "loss": 1.5372, "step": 568 }, { "epoch": 0.17271209591743816, "grad_norm": 0.31652507185935974, "learning_rate": 9.660388821385176e-05, "loss": 1.6156, "step": 569 }, { "epoch": 0.1730156321141296, "grad_norm": 0.40016746520996094, "learning_rate": 9.65978128797084e-05, "loss": 1.6054, "step": 570 }, { "epoch": 0.17331916831082106, "grad_norm": 0.3570103943347931, "learning_rate": 9.659173754556501e-05, "loss": 1.863, "step": 571 }, { "epoch": 0.17362270450751252, "grad_norm": 0.3687574565410614, "learning_rate": 9.658566221142164e-05, "loss": 1.6009, "step": 572 }, { "epoch": 0.17392624070420398, "grad_norm": 0.35581347346305847, "learning_rate": 9.657958687727825e-05, "loss": 2.2078, "step": 573 }, { "epoch": 0.17422977690089544, "grad_norm": 0.44036948680877686, "learning_rate": 9.657351154313488e-05, "loss": 1.7993, "step": 574 }, { "epoch": 0.17453331309758688, "grad_norm": 0.3790392577648163, "learning_rate": 9.656743620899151e-05, "loss": 1.9854, "step": 575 }, { "epoch": 0.17483684929427834, "grad_norm": 0.37755638360977173, "learning_rate": 9.656136087484811e-05, "loss": 1.8991, "step": 576 }, { "epoch": 0.1751403854909698, "grad_norm": 0.37551677227020264, "learning_rate": 9.655528554070474e-05, "loss": 1.5446, "step": 577 }, { "epoch": 0.17544392168766126, "grad_norm": 0.36646074056625366, "learning_rate": 9.654921020656137e-05, "loss": 1.6491, "step": 578 }, { "epoch": 0.1757474578843527, "grad_norm": 0.40674564242362976, "learning_rate": 9.654313487241799e-05, "loss": 2.0545, "step": 579 }, { "epoch": 0.17605099408104416, "grad_norm": 0.40863969922065735, "learning_rate": 9.653705953827461e-05, "loss": 1.6848, "step": 580 }, { "epoch": 0.17635453027773562, "grad_norm": 0.3962380886077881, "learning_rate": 9.653098420413124e-05, "loss": 1.8451, "step": 581 }, { "epoch": 0.17665806647442708, "grad_norm": 0.3665854334831238, "learning_rate": 9.652490886998786e-05, "loss": 2.0664, "step": 582 }, { "epoch": 0.17696160267111852, "grad_norm": 0.3678790032863617, "learning_rate": 9.651883353584447e-05, "loss": 1.9612, "step": 583 }, { "epoch": 0.17726513886780998, "grad_norm": 0.37832558155059814, "learning_rate": 9.65127582017011e-05, "loss": 2.3693, "step": 584 }, { "epoch": 0.17756867506450144, "grad_norm": 0.37538209557533264, "learning_rate": 9.650668286755772e-05, "loss": 1.9305, "step": 585 }, { "epoch": 0.1778722112611929, "grad_norm": 0.4227273166179657, "learning_rate": 9.650060753341434e-05, "loss": 2.1853, "step": 586 }, { "epoch": 0.17817574745788436, "grad_norm": 0.35160574316978455, "learning_rate": 9.649453219927096e-05, "loss": 1.6491, "step": 587 }, { "epoch": 0.1784792836545758, "grad_norm": 0.3960542380809784, "learning_rate": 9.648845686512759e-05, "loss": 2.052, "step": 588 }, { "epoch": 0.17878281985126726, "grad_norm": 0.4215950667858124, "learning_rate": 9.648238153098422e-05, "loss": 1.4572, "step": 589 }, { "epoch": 0.17908635604795872, "grad_norm": 0.35461676120758057, "learning_rate": 9.647630619684082e-05, "loss": 1.8047, "step": 590 }, { "epoch": 0.17938989224465018, "grad_norm": 0.3570484220981598, "learning_rate": 9.647023086269745e-05, "loss": 1.5481, "step": 591 }, { "epoch": 0.17969342844134162, "grad_norm": 0.3583620488643646, "learning_rate": 9.646415552855408e-05, "loss": 1.6005, "step": 592 }, { "epoch": 0.17999696463803308, "grad_norm": 0.3991422653198242, "learning_rate": 9.64580801944107e-05, "loss": 1.5039, "step": 593 }, { "epoch": 0.18030050083472454, "grad_norm": 0.4470183551311493, "learning_rate": 9.645200486026732e-05, "loss": 1.9181, "step": 594 }, { "epoch": 0.180604037031416, "grad_norm": 0.6622103452682495, "learning_rate": 9.644592952612395e-05, "loss": 1.9772, "step": 595 }, { "epoch": 0.18090757322810747, "grad_norm": 0.33143168687820435, "learning_rate": 9.643985419198057e-05, "loss": 1.9468, "step": 596 }, { "epoch": 0.1812111094247989, "grad_norm": 0.3072865903377533, "learning_rate": 9.643377885783718e-05, "loss": 1.8147, "step": 597 }, { "epoch": 0.18151464562149036, "grad_norm": 0.5602253079414368, "learning_rate": 9.642770352369381e-05, "loss": 2.069, "step": 598 }, { "epoch": 0.18181818181818182, "grad_norm": 0.29752591252326965, "learning_rate": 9.642162818955043e-05, "loss": 2.039, "step": 599 }, { "epoch": 0.18212171801487329, "grad_norm": 0.37780001759529114, "learning_rate": 9.641555285540705e-05, "loss": 1.8751, "step": 600 }, { "epoch": 0.18242525421156472, "grad_norm": 0.40171170234680176, "learning_rate": 9.640947752126367e-05, "loss": 1.8439, "step": 601 }, { "epoch": 0.18272879040825618, "grad_norm": 0.3567606210708618, "learning_rate": 9.64034021871203e-05, "loss": 1.7814, "step": 602 }, { "epoch": 0.18303232660494764, "grad_norm": 0.3742719888687134, "learning_rate": 9.639732685297693e-05, "loss": 2.3184, "step": 603 }, { "epoch": 0.1833358628016391, "grad_norm": 0.3715921938419342, "learning_rate": 9.639125151883353e-05, "loss": 1.9357, "step": 604 }, { "epoch": 0.18363939899833054, "grad_norm": 0.4021666347980499, "learning_rate": 9.638517618469016e-05, "loss": 2.0251, "step": 605 }, { "epoch": 0.183942935195022, "grad_norm": 0.3618490695953369, "learning_rate": 9.637910085054679e-05, "loss": 1.8889, "step": 606 }, { "epoch": 0.18424647139171346, "grad_norm": 0.3828302025794983, "learning_rate": 9.637302551640341e-05, "loss": 1.8222, "step": 607 }, { "epoch": 0.18455000758840492, "grad_norm": 0.3974449336528778, "learning_rate": 9.636695018226003e-05, "loss": 2.0484, "step": 608 }, { "epoch": 0.18485354378509639, "grad_norm": 0.4195742607116699, "learning_rate": 9.636087484811666e-05, "loss": 1.8497, "step": 609 }, { "epoch": 0.18515707998178782, "grad_norm": 0.3607097864151001, "learning_rate": 9.635479951397328e-05, "loss": 1.7574, "step": 610 }, { "epoch": 0.18546061617847928, "grad_norm": 0.39163729548454285, "learning_rate": 9.634872417982989e-05, "loss": 1.8299, "step": 611 }, { "epoch": 0.18576415237517074, "grad_norm": 0.4060773551464081, "learning_rate": 9.634264884568652e-05, "loss": 1.8041, "step": 612 }, { "epoch": 0.1860676885718622, "grad_norm": 0.34089556336402893, "learning_rate": 9.633657351154314e-05, "loss": 1.7692, "step": 613 }, { "epoch": 0.18637122476855364, "grad_norm": 0.33763736486434937, "learning_rate": 9.633049817739976e-05, "loss": 2.0368, "step": 614 }, { "epoch": 0.1866747609652451, "grad_norm": 0.3397420644760132, "learning_rate": 9.632442284325638e-05, "loss": 1.9108, "step": 615 }, { "epoch": 0.18697829716193656, "grad_norm": 0.387208491563797, "learning_rate": 9.6318347509113e-05, "loss": 1.7698, "step": 616 }, { "epoch": 0.18728183335862802, "grad_norm": 0.4464956820011139, "learning_rate": 9.631227217496964e-05, "loss": 1.7602, "step": 617 }, { "epoch": 0.18758536955531946, "grad_norm": 0.34682369232177734, "learning_rate": 9.630619684082624e-05, "loss": 1.6602, "step": 618 }, { "epoch": 0.18788890575201092, "grad_norm": 0.8865132331848145, "learning_rate": 9.630012150668287e-05, "loss": 2.1785, "step": 619 }, { "epoch": 0.18819244194870238, "grad_norm": 0.32631704211235046, "learning_rate": 9.62940461725395e-05, "loss": 1.5203, "step": 620 }, { "epoch": 0.18849597814539384, "grad_norm": 0.3332744836807251, "learning_rate": 9.628797083839612e-05, "loss": 1.8768, "step": 621 }, { "epoch": 0.1887995143420853, "grad_norm": 0.43369799852371216, "learning_rate": 9.628189550425274e-05, "loss": 2.088, "step": 622 }, { "epoch": 0.18910305053877674, "grad_norm": 0.34848517179489136, "learning_rate": 9.627582017010937e-05, "loss": 1.7685, "step": 623 }, { "epoch": 0.1894065867354682, "grad_norm": 0.40195101499557495, "learning_rate": 9.626974483596599e-05, "loss": 2.0937, "step": 624 }, { "epoch": 0.18971012293215966, "grad_norm": 0.3992537558078766, "learning_rate": 9.62636695018226e-05, "loss": 1.7799, "step": 625 }, { "epoch": 0.19001365912885113, "grad_norm": 0.41797420382499695, "learning_rate": 9.625759416767922e-05, "loss": 1.92, "step": 626 }, { "epoch": 0.19031719532554256, "grad_norm": 0.37648969888687134, "learning_rate": 9.625151883353585e-05, "loss": 2.0243, "step": 627 }, { "epoch": 0.19062073152223402, "grad_norm": 0.3513944447040558, "learning_rate": 9.624544349939247e-05, "loss": 2.0676, "step": 628 }, { "epoch": 0.19092426771892548, "grad_norm": 0.3975341022014618, "learning_rate": 9.623936816524909e-05, "loss": 1.8837, "step": 629 }, { "epoch": 0.19122780391561695, "grad_norm": 0.3954656422138214, "learning_rate": 9.623329283110572e-05, "loss": 1.9648, "step": 630 }, { "epoch": 0.19153134011230838, "grad_norm": 0.32590335607528687, "learning_rate": 9.622721749696235e-05, "loss": 2.0704, "step": 631 }, { "epoch": 0.19183487630899984, "grad_norm": 0.7592522501945496, "learning_rate": 9.622114216281895e-05, "loss": 1.8721, "step": 632 }, { "epoch": 0.1921384125056913, "grad_norm": 0.4411126375198364, "learning_rate": 9.621506682867558e-05, "loss": 1.963, "step": 633 }, { "epoch": 0.19244194870238276, "grad_norm": 0.37471216917037964, "learning_rate": 9.620899149453221e-05, "loss": 2.0679, "step": 634 }, { "epoch": 0.19274548489907423, "grad_norm": 0.39219167828559875, "learning_rate": 9.620291616038882e-05, "loss": 2.0597, "step": 635 }, { "epoch": 0.19304902109576566, "grad_norm": 0.3059561550617218, "learning_rate": 9.619684082624545e-05, "loss": 1.7527, "step": 636 }, { "epoch": 0.19335255729245712, "grad_norm": 0.3843368589878082, "learning_rate": 9.619076549210208e-05, "loss": 1.9737, "step": 637 }, { "epoch": 0.19365609348914858, "grad_norm": 0.3923681378364563, "learning_rate": 9.61846901579587e-05, "loss": 1.7179, "step": 638 }, { "epoch": 0.19395962968584005, "grad_norm": 0.4614477753639221, "learning_rate": 9.617861482381531e-05, "loss": 1.2844, "step": 639 }, { "epoch": 0.19426316588253148, "grad_norm": 0.3571571409702301, "learning_rate": 9.617253948967193e-05, "loss": 2.1014, "step": 640 }, { "epoch": 0.19456670207922294, "grad_norm": 0.40552857518196106, "learning_rate": 9.616646415552856e-05, "loss": 1.8588, "step": 641 }, { "epoch": 0.1948702382759144, "grad_norm": 0.3739052414894104, "learning_rate": 9.616038882138518e-05, "loss": 2.2262, "step": 642 }, { "epoch": 0.19517377447260587, "grad_norm": 0.33607810735702515, "learning_rate": 9.61543134872418e-05, "loss": 1.8196, "step": 643 }, { "epoch": 0.19547731066929733, "grad_norm": 0.3751862645149231, "learning_rate": 9.614823815309843e-05, "loss": 1.7183, "step": 644 }, { "epoch": 0.19578084686598876, "grad_norm": 0.4978049397468567, "learning_rate": 9.614216281895506e-05, "loss": 1.6918, "step": 645 }, { "epoch": 0.19608438306268022, "grad_norm": 0.3755020499229431, "learning_rate": 9.613608748481166e-05, "loss": 2.0704, "step": 646 }, { "epoch": 0.19638791925937168, "grad_norm": 0.3641931414604187, "learning_rate": 9.613001215066829e-05, "loss": 1.8087, "step": 647 }, { "epoch": 0.19669145545606315, "grad_norm": 0.32229694724082947, "learning_rate": 9.612393681652492e-05, "loss": 1.9157, "step": 648 }, { "epoch": 0.19699499165275458, "grad_norm": 0.4132642149925232, "learning_rate": 9.611786148238153e-05, "loss": 1.6715, "step": 649 }, { "epoch": 0.19729852784944604, "grad_norm": 0.38652992248535156, "learning_rate": 9.611178614823816e-05, "loss": 1.814, "step": 650 }, { "epoch": 0.1976020640461375, "grad_norm": 0.432373970746994, "learning_rate": 9.610571081409479e-05, "loss": 1.8719, "step": 651 }, { "epoch": 0.19790560024282897, "grad_norm": 0.4002588987350464, "learning_rate": 9.60996354799514e-05, "loss": 1.9697, "step": 652 }, { "epoch": 0.1982091364395204, "grad_norm": 0.3377281427383423, "learning_rate": 9.609356014580802e-05, "loss": 2.0114, "step": 653 }, { "epoch": 0.19851267263621186, "grad_norm": 0.40434688329696655, "learning_rate": 9.608748481166464e-05, "loss": 2.2406, "step": 654 }, { "epoch": 0.19881620883290332, "grad_norm": 0.33377552032470703, "learning_rate": 9.608140947752127e-05, "loss": 1.8749, "step": 655 }, { "epoch": 0.19911974502959479, "grad_norm": 0.36146265268325806, "learning_rate": 9.607533414337789e-05, "loss": 1.5821, "step": 656 }, { "epoch": 0.19942328122628625, "grad_norm": 1.8864728212356567, "learning_rate": 9.60692588092345e-05, "loss": 1.9895, "step": 657 }, { "epoch": 0.19972681742297768, "grad_norm": 0.35898399353027344, "learning_rate": 9.606318347509114e-05, "loss": 1.6315, "step": 658 }, { "epoch": 0.20003035361966914, "grad_norm": 0.44391825795173645, "learning_rate": 9.605710814094775e-05, "loss": 1.5046, "step": 659 }, { "epoch": 0.2003338898163606, "grad_norm": 0.37957173585891724, "learning_rate": 9.605103280680437e-05, "loss": 1.8532, "step": 660 }, { "epoch": 0.20063742601305207, "grad_norm": 1.4045872688293457, "learning_rate": 9.6044957472661e-05, "loss": 2.1214, "step": 661 }, { "epoch": 0.2009409622097435, "grad_norm": 0.4327601194381714, "learning_rate": 9.603888213851763e-05, "loss": 2.1803, "step": 662 }, { "epoch": 0.20124449840643496, "grad_norm": 3.2122411727905273, "learning_rate": 9.603280680437424e-05, "loss": 1.989, "step": 663 }, { "epoch": 0.20154803460312642, "grad_norm": 0.4538092017173767, "learning_rate": 9.602673147023087e-05, "loss": 1.7016, "step": 664 }, { "epoch": 0.2018515707998179, "grad_norm": 0.5968027710914612, "learning_rate": 9.60206561360875e-05, "loss": 1.8538, "step": 665 }, { "epoch": 0.20215510699650932, "grad_norm": 0.3749493956565857, "learning_rate": 9.601458080194412e-05, "loss": 1.8747, "step": 666 }, { "epoch": 0.20245864319320078, "grad_norm": 0.3741036355495453, "learning_rate": 9.600850546780073e-05, "loss": 1.9896, "step": 667 }, { "epoch": 0.20276217938989224, "grad_norm": 0.3708043098449707, "learning_rate": 9.600243013365735e-05, "loss": 1.7925, "step": 668 }, { "epoch": 0.2030657155865837, "grad_norm": 0.32813695073127747, "learning_rate": 9.599635479951398e-05, "loss": 1.8272, "step": 669 }, { "epoch": 0.20336925178327517, "grad_norm": 0.6151819229125977, "learning_rate": 9.59902794653706e-05, "loss": 1.927, "step": 670 }, { "epoch": 0.2036727879799666, "grad_norm": 0.42905279994010925, "learning_rate": 9.598420413122722e-05, "loss": 1.5442, "step": 671 }, { "epoch": 0.20397632417665806, "grad_norm": 0.4102342426776886, "learning_rate": 9.597812879708385e-05, "loss": 1.2097, "step": 672 }, { "epoch": 0.20427986037334953, "grad_norm": 0.392560750246048, "learning_rate": 9.597205346294046e-05, "loss": 1.8166, "step": 673 }, { "epoch": 0.204583396570041, "grad_norm": 0.4056089222431183, "learning_rate": 9.596597812879708e-05, "loss": 1.5171, "step": 674 }, { "epoch": 0.20488693276673242, "grad_norm": 0.4734075963497162, "learning_rate": 9.595990279465371e-05, "loss": 1.4529, "step": 675 }, { "epoch": 0.20519046896342388, "grad_norm": 0.41490182280540466, "learning_rate": 9.595382746051034e-05, "loss": 1.8316, "step": 676 }, { "epoch": 0.20549400516011535, "grad_norm": 0.3590947091579437, "learning_rate": 9.594775212636695e-05, "loss": 1.9073, "step": 677 }, { "epoch": 0.2057975413568068, "grad_norm": 0.3779642581939697, "learning_rate": 9.594167679222358e-05, "loss": 1.9669, "step": 678 }, { "epoch": 0.20610107755349824, "grad_norm": 0.39710256457328796, "learning_rate": 9.593560145808021e-05, "loss": 2.0278, "step": 679 }, { "epoch": 0.2064046137501897, "grad_norm": 0.4168045222759247, "learning_rate": 9.592952612393683e-05, "loss": 1.5158, "step": 680 }, { "epoch": 0.20670814994688116, "grad_norm": 0.3751262426376343, "learning_rate": 9.592345078979344e-05, "loss": 2.1459, "step": 681 }, { "epoch": 0.20701168614357263, "grad_norm": 0.49441012740135193, "learning_rate": 9.591737545565006e-05, "loss": 1.8968, "step": 682 }, { "epoch": 0.2073152223402641, "grad_norm": 0.4807801842689514, "learning_rate": 9.591130012150669e-05, "loss": 2.2741, "step": 683 }, { "epoch": 0.20761875853695552, "grad_norm": 0.3886473774909973, "learning_rate": 9.590522478736331e-05, "loss": 1.4511, "step": 684 }, { "epoch": 0.20792229473364698, "grad_norm": 0.6425371170043945, "learning_rate": 9.589914945321993e-05, "loss": 2.2643, "step": 685 }, { "epoch": 0.20822583093033845, "grad_norm": 0.37720414996147156, "learning_rate": 9.589307411907656e-05, "loss": 1.8431, "step": 686 }, { "epoch": 0.2085293671270299, "grad_norm": 0.35544151067733765, "learning_rate": 9.588699878493317e-05, "loss": 1.9822, "step": 687 }, { "epoch": 0.20883290332372134, "grad_norm": 0.561444878578186, "learning_rate": 9.588092345078979e-05, "loss": 1.9507, "step": 688 }, { "epoch": 0.2091364395204128, "grad_norm": 0.37386366724967957, "learning_rate": 9.587484811664642e-05, "loss": 2.0673, "step": 689 }, { "epoch": 0.20943997571710427, "grad_norm": 0.3882986307144165, "learning_rate": 9.586877278250305e-05, "loss": 2.0166, "step": 690 }, { "epoch": 0.20974351191379573, "grad_norm": 0.41471484303474426, "learning_rate": 9.586269744835966e-05, "loss": 1.7111, "step": 691 }, { "epoch": 0.2100470481104872, "grad_norm": 0.467939555644989, "learning_rate": 9.585662211421629e-05, "loss": 1.6116, "step": 692 }, { "epoch": 0.21035058430717862, "grad_norm": 0.905303955078125, "learning_rate": 9.585054678007292e-05, "loss": 2.0287, "step": 693 }, { "epoch": 0.21065412050387008, "grad_norm": 0.3820960819721222, "learning_rate": 9.584447144592954e-05, "loss": 1.7834, "step": 694 }, { "epoch": 0.21095765670056155, "grad_norm": 0.4265238642692566, "learning_rate": 9.583839611178615e-05, "loss": 1.5176, "step": 695 }, { "epoch": 0.211261192897253, "grad_norm": 0.30739274621009827, "learning_rate": 9.583232077764277e-05, "loss": 1.3758, "step": 696 }, { "epoch": 0.21156472909394444, "grad_norm": 0.3890193998813629, "learning_rate": 9.58262454434994e-05, "loss": 1.8188, "step": 697 }, { "epoch": 0.2118682652906359, "grad_norm": 0.3726442754268646, "learning_rate": 9.582017010935602e-05, "loss": 1.8957, "step": 698 }, { "epoch": 0.21217180148732737, "grad_norm": 0.43913599848747253, "learning_rate": 9.581409477521264e-05, "loss": 1.8882, "step": 699 }, { "epoch": 0.21247533768401883, "grad_norm": 0.4009544253349304, "learning_rate": 9.580801944106927e-05, "loss": 1.8844, "step": 700 }, { "epoch": 0.21277887388071026, "grad_norm": 0.6625222563743591, "learning_rate": 9.580194410692588e-05, "loss": 1.8564, "step": 701 }, { "epoch": 0.21308241007740172, "grad_norm": 0.3108811676502228, "learning_rate": 9.57958687727825e-05, "loss": 1.5883, "step": 702 }, { "epoch": 0.21338594627409319, "grad_norm": 0.35348960757255554, "learning_rate": 9.578979343863913e-05, "loss": 1.6062, "step": 703 }, { "epoch": 0.21368948247078465, "grad_norm": 0.3460123538970947, "learning_rate": 9.578371810449576e-05, "loss": 1.3375, "step": 704 }, { "epoch": 0.2139930186674761, "grad_norm": 0.3396036922931671, "learning_rate": 9.577764277035237e-05, "loss": 2.007, "step": 705 }, { "epoch": 0.21429655486416754, "grad_norm": 0.38622626662254333, "learning_rate": 9.5771567436209e-05, "loss": 1.7612, "step": 706 }, { "epoch": 0.214600091060859, "grad_norm": 0.39317429065704346, "learning_rate": 9.576549210206563e-05, "loss": 1.7884, "step": 707 }, { "epoch": 0.21490362725755047, "grad_norm": 0.3730657994747162, "learning_rate": 9.575941676792223e-05, "loss": 1.9813, "step": 708 }, { "epoch": 0.21520716345424193, "grad_norm": 0.3781750202178955, "learning_rate": 9.575334143377886e-05, "loss": 1.8843, "step": 709 }, { "epoch": 0.21551069965093336, "grad_norm": 0.4760946035385132, "learning_rate": 9.574726609963548e-05, "loss": 2.0092, "step": 710 }, { "epoch": 0.21581423584762482, "grad_norm": 0.4052627980709076, "learning_rate": 9.574119076549211e-05, "loss": 1.9742, "step": 711 }, { "epoch": 0.2161177720443163, "grad_norm": 0.34289002418518066, "learning_rate": 9.573511543134873e-05, "loss": 2.0408, "step": 712 }, { "epoch": 0.21642130824100775, "grad_norm": 0.3720855116844177, "learning_rate": 9.572904009720535e-05, "loss": 1.6685, "step": 713 }, { "epoch": 0.21672484443769918, "grad_norm": 0.4015984535217285, "learning_rate": 9.572296476306198e-05, "loss": 1.9651, "step": 714 }, { "epoch": 0.21702838063439064, "grad_norm": 0.45196712017059326, "learning_rate": 9.57168894289186e-05, "loss": 2.0588, "step": 715 }, { "epoch": 0.2173319168310821, "grad_norm": 0.3434293866157532, "learning_rate": 9.571081409477521e-05, "loss": 1.748, "step": 716 }, { "epoch": 0.21763545302777357, "grad_norm": 0.29288217425346375, "learning_rate": 9.570473876063184e-05, "loss": 2.0274, "step": 717 }, { "epoch": 0.21793898922446503, "grad_norm": 0.4192684590816498, "learning_rate": 9.569866342648847e-05, "loss": 1.3597, "step": 718 }, { "epoch": 0.21824252542115646, "grad_norm": 0.4450276494026184, "learning_rate": 9.569258809234508e-05, "loss": 1.4675, "step": 719 }, { "epoch": 0.21854606161784793, "grad_norm": 0.33388352394104004, "learning_rate": 9.568651275820171e-05, "loss": 1.9747, "step": 720 }, { "epoch": 0.2188495978145394, "grad_norm": 0.34411269426345825, "learning_rate": 9.568043742405832e-05, "loss": 1.9119, "step": 721 }, { "epoch": 0.21915313401123085, "grad_norm": 0.38926756381988525, "learning_rate": 9.567436208991494e-05, "loss": 1.8859, "step": 722 }, { "epoch": 0.21945667020792228, "grad_norm": 0.3513714671134949, "learning_rate": 9.566828675577157e-05, "loss": 1.9125, "step": 723 }, { "epoch": 0.21976020640461374, "grad_norm": 0.9200549721717834, "learning_rate": 9.566221142162819e-05, "loss": 2.1854, "step": 724 }, { "epoch": 0.2200637426013052, "grad_norm": 0.444815456867218, "learning_rate": 9.565613608748482e-05, "loss": 1.8998, "step": 725 }, { "epoch": 0.22036727879799667, "grad_norm": 0.37483492493629456, "learning_rate": 9.565006075334144e-05, "loss": 1.7089, "step": 726 }, { "epoch": 0.22067081499468813, "grad_norm": 0.32369089126586914, "learning_rate": 9.564398541919806e-05, "loss": 2.1223, "step": 727 }, { "epoch": 0.22097435119137956, "grad_norm": 0.3537048399448395, "learning_rate": 9.563791008505469e-05, "loss": 1.3256, "step": 728 }, { "epoch": 0.22127788738807103, "grad_norm": 0.408723384141922, "learning_rate": 9.56318347509113e-05, "loss": 1.8056, "step": 729 }, { "epoch": 0.2215814235847625, "grad_norm": 0.37529709935188293, "learning_rate": 9.562575941676792e-05, "loss": 1.9281, "step": 730 }, { "epoch": 0.22188495978145395, "grad_norm": 0.3406868278980255, "learning_rate": 9.561968408262455e-05, "loss": 1.9673, "step": 731 }, { "epoch": 0.22218849597814538, "grad_norm": 0.34361201524734497, "learning_rate": 9.561360874848117e-05, "loss": 1.7777, "step": 732 }, { "epoch": 0.22249203217483685, "grad_norm": 0.3995072543621063, "learning_rate": 9.560753341433779e-05, "loss": 1.9959, "step": 733 }, { "epoch": 0.2227955683715283, "grad_norm": 0.4618263244628906, "learning_rate": 9.560145808019442e-05, "loss": 1.7524, "step": 734 }, { "epoch": 0.22309910456821977, "grad_norm": 0.36778688430786133, "learning_rate": 9.559538274605103e-05, "loss": 1.6332, "step": 735 }, { "epoch": 0.2234026407649112, "grad_norm": 0.47031348943710327, "learning_rate": 9.558930741190765e-05, "loss": 2.0364, "step": 736 }, { "epoch": 0.22370617696160267, "grad_norm": 0.6130351424217224, "learning_rate": 9.558323207776428e-05, "loss": 1.7814, "step": 737 }, { "epoch": 0.22400971315829413, "grad_norm": 0.3733448088169098, "learning_rate": 9.55771567436209e-05, "loss": 1.8888, "step": 738 }, { "epoch": 0.2243132493549856, "grad_norm": 0.368182510137558, "learning_rate": 9.557108140947753e-05, "loss": 2.3162, "step": 739 }, { "epoch": 0.22461678555167705, "grad_norm": 0.4311901926994324, "learning_rate": 9.556500607533415e-05, "loss": 2.1294, "step": 740 }, { "epoch": 0.22492032174836848, "grad_norm": 0.38696759939193726, "learning_rate": 9.555893074119077e-05, "loss": 1.9524, "step": 741 }, { "epoch": 0.22522385794505995, "grad_norm": 0.37136873602867126, "learning_rate": 9.55528554070474e-05, "loss": 1.8677, "step": 742 }, { "epoch": 0.2255273941417514, "grad_norm": 0.4084314703941345, "learning_rate": 9.554678007290401e-05, "loss": 1.8621, "step": 743 }, { "epoch": 0.22583093033844287, "grad_norm": 0.4216344952583313, "learning_rate": 9.554070473876063e-05, "loss": 2.0455, "step": 744 }, { "epoch": 0.2261344665351343, "grad_norm": 0.36579129099845886, "learning_rate": 9.553462940461726e-05, "loss": 1.8993, "step": 745 }, { "epoch": 0.22643800273182577, "grad_norm": 0.4048181474208832, "learning_rate": 9.552855407047388e-05, "loss": 1.2016, "step": 746 }, { "epoch": 0.22674153892851723, "grad_norm": 0.4071241617202759, "learning_rate": 9.55224787363305e-05, "loss": 1.9272, "step": 747 }, { "epoch": 0.2270450751252087, "grad_norm": 0.3970381021499634, "learning_rate": 9.551640340218713e-05, "loss": 2.0819, "step": 748 }, { "epoch": 0.22734861132190012, "grad_norm": 0.3891443610191345, "learning_rate": 9.551032806804374e-05, "loss": 2.2604, "step": 749 }, { "epoch": 0.22765214751859159, "grad_norm": 0.40169456601142883, "learning_rate": 9.550425273390036e-05, "loss": 1.9514, "step": 750 }, { "epoch": 0.22795568371528305, "grad_norm": 0.7191595435142517, "learning_rate": 9.549817739975699e-05, "loss": 1.1686, "step": 751 }, { "epoch": 0.2282592199119745, "grad_norm": 0.3602886497974396, "learning_rate": 9.549210206561361e-05, "loss": 1.8877, "step": 752 }, { "epoch": 0.22856275610866597, "grad_norm": 0.34270042181015015, "learning_rate": 9.548602673147024e-05, "loss": 1.4296, "step": 753 }, { "epoch": 0.2288662923053574, "grad_norm": 0.37293288111686707, "learning_rate": 9.547995139732686e-05, "loss": 1.6695, "step": 754 }, { "epoch": 0.22916982850204887, "grad_norm": 0.31505951285362244, "learning_rate": 9.547387606318348e-05, "loss": 1.9869, "step": 755 }, { "epoch": 0.22947336469874033, "grad_norm": 0.38319501280784607, "learning_rate": 9.54678007290401e-05, "loss": 1.9769, "step": 756 }, { "epoch": 0.2297769008954318, "grad_norm": 0.37378913164138794, "learning_rate": 9.546172539489672e-05, "loss": 1.9998, "step": 757 }, { "epoch": 0.23008043709212322, "grad_norm": 0.34337082505226135, "learning_rate": 9.545565006075334e-05, "loss": 2.048, "step": 758 }, { "epoch": 0.2303839732888147, "grad_norm": 0.35315895080566406, "learning_rate": 9.544957472660997e-05, "loss": 1.8742, "step": 759 }, { "epoch": 0.23068750948550615, "grad_norm": 0.3854929506778717, "learning_rate": 9.544349939246659e-05, "loss": 1.4978, "step": 760 }, { "epoch": 0.2309910456821976, "grad_norm": 0.35743293166160583, "learning_rate": 9.543742405832321e-05, "loss": 2.159, "step": 761 }, { "epoch": 0.23129458187888904, "grad_norm": 0.39035484194755554, "learning_rate": 9.543134872417984e-05, "loss": 1.9588, "step": 762 }, { "epoch": 0.2315981180755805, "grad_norm": 0.35890859365463257, "learning_rate": 9.542527339003645e-05, "loss": 1.7466, "step": 763 }, { "epoch": 0.23190165427227197, "grad_norm": 0.46986308693885803, "learning_rate": 9.541919805589307e-05, "loss": 1.8819, "step": 764 }, { "epoch": 0.23220519046896343, "grad_norm": 0.4226287305355072, "learning_rate": 9.54131227217497e-05, "loss": 1.9421, "step": 765 }, { "epoch": 0.2325087266656549, "grad_norm": 0.4461078345775604, "learning_rate": 9.540704738760632e-05, "loss": 1.6888, "step": 766 }, { "epoch": 0.23281226286234633, "grad_norm": 0.363406240940094, "learning_rate": 9.540097205346295e-05, "loss": 1.9234, "step": 767 }, { "epoch": 0.2331157990590378, "grad_norm": 0.3713390529155731, "learning_rate": 9.539489671931957e-05, "loss": 1.9741, "step": 768 }, { "epoch": 0.23341933525572925, "grad_norm": 0.3300642967224121, "learning_rate": 9.538882138517619e-05, "loss": 1.5029, "step": 769 }, { "epoch": 0.2337228714524207, "grad_norm": 0.30819302797317505, "learning_rate": 9.538274605103282e-05, "loss": 1.6739, "step": 770 }, { "epoch": 0.23402640764911214, "grad_norm": 0.3575786054134369, "learning_rate": 9.537667071688943e-05, "loss": 1.5768, "step": 771 }, { "epoch": 0.2343299438458036, "grad_norm": 0.32882705330848694, "learning_rate": 9.537059538274605e-05, "loss": 1.9512, "step": 772 }, { "epoch": 0.23463348004249507, "grad_norm": 0.3340393900871277, "learning_rate": 9.536452004860268e-05, "loss": 1.927, "step": 773 }, { "epoch": 0.23493701623918653, "grad_norm": 0.33640411496162415, "learning_rate": 9.53584447144593e-05, "loss": 1.9496, "step": 774 }, { "epoch": 0.235240552435878, "grad_norm": 0.3581593930721283, "learning_rate": 9.535236938031592e-05, "loss": 1.8766, "step": 775 }, { "epoch": 0.23554408863256943, "grad_norm": 0.44084489345550537, "learning_rate": 9.534629404617255e-05, "loss": 2.0274, "step": 776 }, { "epoch": 0.2358476248292609, "grad_norm": 0.363518089056015, "learning_rate": 9.534021871202917e-05, "loss": 1.9956, "step": 777 }, { "epoch": 0.23615116102595235, "grad_norm": 0.35967034101486206, "learning_rate": 9.533414337788578e-05, "loss": 1.7312, "step": 778 }, { "epoch": 0.2364546972226438, "grad_norm": 0.3683255910873413, "learning_rate": 9.532806804374241e-05, "loss": 1.9594, "step": 779 }, { "epoch": 0.23675823341933525, "grad_norm": 0.3063610792160034, "learning_rate": 9.532199270959903e-05, "loss": 1.3972, "step": 780 }, { "epoch": 0.2370617696160267, "grad_norm": 0.4217472970485687, "learning_rate": 9.531591737545565e-05, "loss": 1.9495, "step": 781 }, { "epoch": 0.23736530581271817, "grad_norm": 0.46779391169548035, "learning_rate": 9.530984204131228e-05, "loss": 1.9965, "step": 782 }, { "epoch": 0.23766884200940963, "grad_norm": 0.351810485124588, "learning_rate": 9.53037667071689e-05, "loss": 1.8034, "step": 783 }, { "epoch": 0.23797237820610107, "grad_norm": 0.3302007019519806, "learning_rate": 9.529769137302553e-05, "loss": 2.0156, "step": 784 }, { "epoch": 0.23827591440279253, "grad_norm": 0.3699585497379303, "learning_rate": 9.529161603888214e-05, "loss": 1.7217, "step": 785 }, { "epoch": 0.238579450599484, "grad_norm": 0.34256428480148315, "learning_rate": 9.528554070473876e-05, "loss": 1.924, "step": 786 }, { "epoch": 0.23888298679617545, "grad_norm": 0.4008747339248657, "learning_rate": 9.527946537059539e-05, "loss": 1.8903, "step": 787 }, { "epoch": 0.2391865229928669, "grad_norm": 0.4281119108200073, "learning_rate": 9.527339003645201e-05, "loss": 2.1402, "step": 788 }, { "epoch": 0.23949005918955835, "grad_norm": 0.4065872132778168, "learning_rate": 9.526731470230863e-05, "loss": 1.8399, "step": 789 }, { "epoch": 0.2397935953862498, "grad_norm": 0.35334911942481995, "learning_rate": 9.526123936816526e-05, "loss": 1.8206, "step": 790 }, { "epoch": 0.24009713158294127, "grad_norm": 0.35420283675193787, "learning_rate": 9.525516403402188e-05, "loss": 1.956, "step": 791 }, { "epoch": 0.24040066777963273, "grad_norm": 0.5720547437667847, "learning_rate": 9.524908869987849e-05, "loss": 2.1932, "step": 792 }, { "epoch": 0.24070420397632417, "grad_norm": 0.3512174189090729, "learning_rate": 9.524301336573512e-05, "loss": 2.1031, "step": 793 }, { "epoch": 0.24100774017301563, "grad_norm": 0.3975936770439148, "learning_rate": 9.523693803159174e-05, "loss": 1.5962, "step": 794 }, { "epoch": 0.2413112763697071, "grad_norm": 0.3723268210887909, "learning_rate": 9.523086269744836e-05, "loss": 1.492, "step": 795 }, { "epoch": 0.24161481256639855, "grad_norm": 0.5287608504295349, "learning_rate": 9.522478736330499e-05, "loss": 1.5488, "step": 796 }, { "epoch": 0.24191834876308999, "grad_norm": 0.37749987840652466, "learning_rate": 9.52187120291616e-05, "loss": 1.5724, "step": 797 }, { "epoch": 0.24222188495978145, "grad_norm": 0.38260164856910706, "learning_rate": 9.521263669501824e-05, "loss": 2.1049, "step": 798 }, { "epoch": 0.2425254211564729, "grad_norm": 0.3552962839603424, "learning_rate": 9.520656136087485e-05, "loss": 1.8853, "step": 799 }, { "epoch": 0.24282895735316437, "grad_norm": 0.5752935409545898, "learning_rate": 9.520048602673147e-05, "loss": 1.6359, "step": 800 }, { "epoch": 0.24313249354985583, "grad_norm": 0.41982319951057434, "learning_rate": 9.51944106925881e-05, "loss": 1.485, "step": 801 }, { "epoch": 0.24343602974654727, "grad_norm": 0.3913584351539612, "learning_rate": 9.518833535844472e-05, "loss": 1.9818, "step": 802 }, { "epoch": 0.24373956594323873, "grad_norm": 0.3771272897720337, "learning_rate": 9.518226002430134e-05, "loss": 1.7237, "step": 803 }, { "epoch": 0.2440431021399302, "grad_norm": 0.3625226318836212, "learning_rate": 9.517618469015797e-05, "loss": 1.6776, "step": 804 }, { "epoch": 0.24434663833662165, "grad_norm": 0.3253527283668518, "learning_rate": 9.517010935601459e-05, "loss": 2.0659, "step": 805 }, { "epoch": 0.2446501745333131, "grad_norm": 0.3705154359340668, "learning_rate": 9.51640340218712e-05, "loss": 2.116, "step": 806 }, { "epoch": 0.24495371073000455, "grad_norm": 0.3321172595024109, "learning_rate": 9.515795868772783e-05, "loss": 2.1026, "step": 807 }, { "epoch": 0.245257246926696, "grad_norm": 0.41880494356155396, "learning_rate": 9.515188335358445e-05, "loss": 1.7541, "step": 808 }, { "epoch": 0.24556078312338747, "grad_norm": 0.38695165514945984, "learning_rate": 9.514580801944107e-05, "loss": 1.9328, "step": 809 }, { "epoch": 0.2458643193200789, "grad_norm": 0.37348538637161255, "learning_rate": 9.51397326852977e-05, "loss": 1.9065, "step": 810 }, { "epoch": 0.24616785551677037, "grad_norm": 1.5822879076004028, "learning_rate": 9.513365735115432e-05, "loss": 1.8297, "step": 811 }, { "epoch": 0.24647139171346183, "grad_norm": 0.3697100579738617, "learning_rate": 9.512758201701095e-05, "loss": 1.9091, "step": 812 }, { "epoch": 0.2467749279101533, "grad_norm": 0.40801766514778137, "learning_rate": 9.512150668286756e-05, "loss": 1.8284, "step": 813 }, { "epoch": 0.24707846410684475, "grad_norm": 0.4060746133327484, "learning_rate": 9.511543134872418e-05, "loss": 2.0244, "step": 814 }, { "epoch": 0.2473820003035362, "grad_norm": 0.38555091619491577, "learning_rate": 9.510935601458081e-05, "loss": 1.6098, "step": 815 }, { "epoch": 0.24768553650022765, "grad_norm": 0.39763063192367554, "learning_rate": 9.510328068043743e-05, "loss": 1.836, "step": 816 }, { "epoch": 0.2479890726969191, "grad_norm": 0.6933274269104004, "learning_rate": 9.509720534629405e-05, "loss": 2.1968, "step": 817 }, { "epoch": 0.24829260889361057, "grad_norm": 1.307569146156311, "learning_rate": 9.509113001215068e-05, "loss": 2.3396, "step": 818 }, { "epoch": 0.248596145090302, "grad_norm": 0.3513609766960144, "learning_rate": 9.50850546780073e-05, "loss": 1.7302, "step": 819 }, { "epoch": 0.24889968128699347, "grad_norm": 0.36949577927589417, "learning_rate": 9.507897934386391e-05, "loss": 1.7796, "step": 820 }, { "epoch": 0.24920321748368493, "grad_norm": 0.38934049010276794, "learning_rate": 9.507290400972054e-05, "loss": 1.7644, "step": 821 }, { "epoch": 0.2495067536803764, "grad_norm": 0.5927665829658508, "learning_rate": 9.506682867557716e-05, "loss": 1.9029, "step": 822 }, { "epoch": 0.24981028987706785, "grad_norm": 0.5131897926330566, "learning_rate": 9.506075334143378e-05, "loss": 2.1282, "step": 823 }, { "epoch": 0.2501138260737593, "grad_norm": 0.36232516169548035, "learning_rate": 9.505467800729041e-05, "loss": 2.1073, "step": 824 }, { "epoch": 0.25041736227045075, "grad_norm": 0.43212029337882996, "learning_rate": 9.504860267314703e-05, "loss": 2.0364, "step": 825 }, { "epoch": 0.2507208984671422, "grad_norm": 0.36575961112976074, "learning_rate": 9.504252733900366e-05, "loss": 1.8609, "step": 826 }, { "epoch": 0.2510244346638337, "grad_norm": 0.32013362646102905, "learning_rate": 9.503645200486027e-05, "loss": 1.4097, "step": 827 }, { "epoch": 0.2513279708605251, "grad_norm": 0.4062201976776123, "learning_rate": 9.503037667071689e-05, "loss": 1.9872, "step": 828 }, { "epoch": 0.2516315070572166, "grad_norm": 0.3433174192905426, "learning_rate": 9.502430133657352e-05, "loss": 2.0662, "step": 829 }, { "epoch": 0.25193504325390803, "grad_norm": 0.3925630748271942, "learning_rate": 9.501822600243013e-05, "loss": 1.5876, "step": 830 }, { "epoch": 0.25223857945059946, "grad_norm": 0.32962149381637573, "learning_rate": 9.501215066828676e-05, "loss": 1.7445, "step": 831 }, { "epoch": 0.25254211564729095, "grad_norm": 0.35508283972740173, "learning_rate": 9.500607533414339e-05, "loss": 1.8836, "step": 832 }, { "epoch": 0.2528456518439824, "grad_norm": 0.34893691539764404, "learning_rate": 9.5e-05, "loss": 1.6312, "step": 833 }, { "epoch": 0.2531491880406738, "grad_norm": 0.4068532884120941, "learning_rate": 9.499392466585662e-05, "loss": 1.5567, "step": 834 }, { "epoch": 0.2534527242373653, "grad_norm": 0.37818485498428345, "learning_rate": 9.498784933171325e-05, "loss": 2.0791, "step": 835 }, { "epoch": 0.25375626043405675, "grad_norm": 0.884172797203064, "learning_rate": 9.498177399756987e-05, "loss": 1.8642, "step": 836 }, { "epoch": 0.25405979663074824, "grad_norm": 0.4108290374279022, "learning_rate": 9.497569866342649e-05, "loss": 1.9545, "step": 837 }, { "epoch": 0.25436333282743967, "grad_norm": 0.37885358929634094, "learning_rate": 9.496962332928312e-05, "loss": 1.3709, "step": 838 }, { "epoch": 0.2546668690241311, "grad_norm": 0.3919561505317688, "learning_rate": 9.496354799513974e-05, "loss": 1.9548, "step": 839 }, { "epoch": 0.2549704052208226, "grad_norm": 0.3945852518081665, "learning_rate": 9.495747266099637e-05, "loss": 1.8139, "step": 840 }, { "epoch": 0.255273941417514, "grad_norm": 0.3272388279438019, "learning_rate": 9.495139732685298e-05, "loss": 2.1711, "step": 841 }, { "epoch": 0.2555774776142055, "grad_norm": 0.3214159905910492, "learning_rate": 9.49453219927096e-05, "loss": 1.6202, "step": 842 }, { "epoch": 0.25588101381089695, "grad_norm": 0.6175217628479004, "learning_rate": 9.493924665856623e-05, "loss": 1.9976, "step": 843 }, { "epoch": 0.2561845500075884, "grad_norm": 0.36993956565856934, "learning_rate": 9.493317132442284e-05, "loss": 1.9981, "step": 844 }, { "epoch": 0.2564880862042799, "grad_norm": 0.4294464588165283, "learning_rate": 9.492709599027947e-05, "loss": 2.017, "step": 845 }, { "epoch": 0.2567916224009713, "grad_norm": 0.4055061638355255, "learning_rate": 9.49210206561361e-05, "loss": 1.9383, "step": 846 }, { "epoch": 0.2570951585976628, "grad_norm": 0.3574405014514923, "learning_rate": 9.491494532199272e-05, "loss": 1.8058, "step": 847 }, { "epoch": 0.25739869479435423, "grad_norm": 0.35684704780578613, "learning_rate": 9.490886998784933e-05, "loss": 1.5661, "step": 848 }, { "epoch": 0.25770223099104567, "grad_norm": 0.35031405091285706, "learning_rate": 9.490279465370596e-05, "loss": 1.6422, "step": 849 }, { "epoch": 0.25800576718773716, "grad_norm": 0.390667200088501, "learning_rate": 9.489671931956258e-05, "loss": 1.9743, "step": 850 }, { "epoch": 0.2583093033844286, "grad_norm": 0.33744457364082336, "learning_rate": 9.48906439854192e-05, "loss": 1.4922, "step": 851 }, { "epoch": 0.25861283958112, "grad_norm": 0.3162226676940918, "learning_rate": 9.488456865127583e-05, "loss": 1.7791, "step": 852 }, { "epoch": 0.2589163757778115, "grad_norm": 0.49357378482818604, "learning_rate": 9.487849331713245e-05, "loss": 1.4353, "step": 853 }, { "epoch": 0.25921991197450295, "grad_norm": 0.4280342757701874, "learning_rate": 9.487241798298906e-05, "loss": 1.7277, "step": 854 }, { "epoch": 0.25952344817119444, "grad_norm": 0.4271382987499237, "learning_rate": 9.48663426488457e-05, "loss": 1.3241, "step": 855 }, { "epoch": 0.25982698436788587, "grad_norm": 0.3773948550224304, "learning_rate": 9.486026731470231e-05, "loss": 2.0892, "step": 856 }, { "epoch": 0.2601305205645773, "grad_norm": 0.3343275785446167, "learning_rate": 9.485419198055894e-05, "loss": 1.7968, "step": 857 }, { "epoch": 0.2604340567612688, "grad_norm": 0.3711187243461609, "learning_rate": 9.484811664641555e-05, "loss": 1.8599, "step": 858 }, { "epoch": 0.26073759295796023, "grad_norm": 0.6738047003746033, "learning_rate": 9.484204131227218e-05, "loss": 2.0192, "step": 859 }, { "epoch": 0.2610411291546517, "grad_norm": 0.3094058334827423, "learning_rate": 9.483596597812881e-05, "loss": 1.8827, "step": 860 }, { "epoch": 0.26134466535134315, "grad_norm": 0.4207117259502411, "learning_rate": 9.482989064398543e-05, "loss": 1.8029, "step": 861 }, { "epoch": 0.2616482015480346, "grad_norm": 0.3958408832550049, "learning_rate": 9.482381530984204e-05, "loss": 1.9911, "step": 862 }, { "epoch": 0.2619517377447261, "grad_norm": 0.6868960857391357, "learning_rate": 9.481773997569867e-05, "loss": 1.9394, "step": 863 }, { "epoch": 0.2622552739414175, "grad_norm": 1.131034016609192, "learning_rate": 9.481166464155529e-05, "loss": 1.486, "step": 864 }, { "epoch": 0.26255881013810894, "grad_norm": 0.42944055795669556, "learning_rate": 9.480558930741191e-05, "loss": 1.9993, "step": 865 }, { "epoch": 0.26286234633480043, "grad_norm": 0.3888295292854309, "learning_rate": 9.479951397326854e-05, "loss": 2.0604, "step": 866 }, { "epoch": 0.26316588253149187, "grad_norm": 0.41875898838043213, "learning_rate": 9.479343863912516e-05, "loss": 1.8242, "step": 867 }, { "epoch": 0.26346941872818336, "grad_norm": 0.44148901104927063, "learning_rate": 9.478736330498177e-05, "loss": 1.7827, "step": 868 }, { "epoch": 0.2637729549248748, "grad_norm": 0.41976141929626465, "learning_rate": 9.47812879708384e-05, "loss": 2.127, "step": 869 }, { "epoch": 0.2640764911215662, "grad_norm": 0.5538145899772644, "learning_rate": 9.477521263669502e-05, "loss": 1.7636, "step": 870 }, { "epoch": 0.2643800273182577, "grad_norm": 0.36378878355026245, "learning_rate": 9.476913730255165e-05, "loss": 1.8159, "step": 871 }, { "epoch": 0.26468356351494915, "grad_norm": 0.3874679505825043, "learning_rate": 9.476306196840826e-05, "loss": 1.4785, "step": 872 }, { "epoch": 0.26498709971164064, "grad_norm": 0.4508163332939148, "learning_rate": 9.475698663426489e-05, "loss": 1.9393, "step": 873 }, { "epoch": 0.2652906359083321, "grad_norm": 0.38843271136283875, "learning_rate": 9.475091130012152e-05, "loss": 1.4532, "step": 874 }, { "epoch": 0.2655941721050235, "grad_norm": 0.4603917598724365, "learning_rate": 9.474483596597814e-05, "loss": 2.1182, "step": 875 }, { "epoch": 0.265897708301715, "grad_norm": 0.39668476581573486, "learning_rate": 9.473876063183475e-05, "loss": 1.9447, "step": 876 }, { "epoch": 0.26620124449840643, "grad_norm": 4.796502113342285, "learning_rate": 9.473268529769138e-05, "loss": 1.9395, "step": 877 }, { "epoch": 0.26650478069509786, "grad_norm": 0.38534435629844666, "learning_rate": 9.4726609963548e-05, "loss": 2.0862, "step": 878 }, { "epoch": 0.26680831689178935, "grad_norm": 0.9022141695022583, "learning_rate": 9.472053462940462e-05, "loss": 1.8303, "step": 879 }, { "epoch": 0.2671118530884808, "grad_norm": 0.4020310342311859, "learning_rate": 9.471445929526125e-05, "loss": 1.5084, "step": 880 }, { "epoch": 0.2674153892851723, "grad_norm": 0.30202022194862366, "learning_rate": 9.470838396111787e-05, "loss": 1.8474, "step": 881 }, { "epoch": 0.2677189254818637, "grad_norm": 0.35603514313697815, "learning_rate": 9.470230862697448e-05, "loss": 1.8973, "step": 882 }, { "epoch": 0.26802246167855515, "grad_norm": 0.3749227523803711, "learning_rate": 9.469623329283111e-05, "loss": 1.9763, "step": 883 }, { "epoch": 0.26832599787524664, "grad_norm": 0.45645421743392944, "learning_rate": 9.469015795868773e-05, "loss": 1.5944, "step": 884 }, { "epoch": 0.26862953407193807, "grad_norm": 0.5855579972267151, "learning_rate": 9.468408262454436e-05, "loss": 1.8724, "step": 885 }, { "epoch": 0.26893307026862956, "grad_norm": 0.3752727210521698, "learning_rate": 9.467800729040097e-05, "loss": 2.1792, "step": 886 }, { "epoch": 0.269236606465321, "grad_norm": 0.8951378464698792, "learning_rate": 9.46719319562576e-05, "loss": 1.4745, "step": 887 }, { "epoch": 0.2695401426620124, "grad_norm": 0.5524512529373169, "learning_rate": 9.466585662211423e-05, "loss": 1.9872, "step": 888 }, { "epoch": 0.2698436788587039, "grad_norm": 0.3917500078678131, "learning_rate": 9.465978128797085e-05, "loss": 1.579, "step": 889 }, { "epoch": 0.27014721505539535, "grad_norm": 0.41635704040527344, "learning_rate": 9.465370595382746e-05, "loss": 1.5984, "step": 890 }, { "epoch": 0.2704507512520868, "grad_norm": 0.3544903099536896, "learning_rate": 9.46476306196841e-05, "loss": 1.9112, "step": 891 }, { "epoch": 0.2707542874487783, "grad_norm": 0.4568898379802704, "learning_rate": 9.464155528554071e-05, "loss": 1.9857, "step": 892 }, { "epoch": 0.2710578236454697, "grad_norm": 0.4155702590942383, "learning_rate": 9.463547995139733e-05, "loss": 1.7986, "step": 893 }, { "epoch": 0.2713613598421612, "grad_norm": 0.37953928112983704, "learning_rate": 9.462940461725396e-05, "loss": 1.8383, "step": 894 }, { "epoch": 0.27166489603885263, "grad_norm": 0.37993937730789185, "learning_rate": 9.462332928311058e-05, "loss": 2.0555, "step": 895 }, { "epoch": 0.27196843223554407, "grad_norm": 0.4355872571468353, "learning_rate": 9.46172539489672e-05, "loss": 1.9307, "step": 896 }, { "epoch": 0.27227196843223556, "grad_norm": 0.38673707842826843, "learning_rate": 9.461117861482381e-05, "loss": 1.7155, "step": 897 }, { "epoch": 0.272575504628927, "grad_norm": 0.38927558064460754, "learning_rate": 9.460510328068044e-05, "loss": 2.1022, "step": 898 }, { "epoch": 0.2728790408256185, "grad_norm": 0.40219199657440186, "learning_rate": 9.459902794653707e-05, "loss": 1.3915, "step": 899 }, { "epoch": 0.2731825770223099, "grad_norm": 0.3896184265613556, "learning_rate": 9.459295261239368e-05, "loss": 1.9976, "step": 900 }, { "epoch": 0.27348611321900135, "grad_norm": 0.37489351630210876, "learning_rate": 9.458687727825031e-05, "loss": 1.8479, "step": 901 }, { "epoch": 0.27378964941569284, "grad_norm": 0.39215734601020813, "learning_rate": 9.458080194410694e-05, "loss": 1.9005, "step": 902 }, { "epoch": 0.27409318561238427, "grad_norm": 0.5054829716682434, "learning_rate": 9.457472660996356e-05, "loss": 1.8805, "step": 903 }, { "epoch": 0.2743967218090757, "grad_norm": 0.38437893986701965, "learning_rate": 9.456865127582017e-05, "loss": 1.5506, "step": 904 }, { "epoch": 0.2747002580057672, "grad_norm": 0.38727036118507385, "learning_rate": 9.45625759416768e-05, "loss": 1.7189, "step": 905 }, { "epoch": 0.27500379420245863, "grad_norm": 0.4260677993297577, "learning_rate": 9.455650060753342e-05, "loss": 2.1553, "step": 906 }, { "epoch": 0.2753073303991501, "grad_norm": 0.3969596326351166, "learning_rate": 9.455042527339004e-05, "loss": 1.73, "step": 907 }, { "epoch": 0.27561086659584155, "grad_norm": 0.371412456035614, "learning_rate": 9.454434993924667e-05, "loss": 1.618, "step": 908 }, { "epoch": 0.275914402792533, "grad_norm": 0.32723626494407654, "learning_rate": 9.453827460510329e-05, "loss": 1.897, "step": 909 }, { "epoch": 0.2762179389892245, "grad_norm": 0.37436455488204956, "learning_rate": 9.45321992709599e-05, "loss": 1.4027, "step": 910 }, { "epoch": 0.2765214751859159, "grad_norm": 0.3615550398826599, "learning_rate": 9.452612393681652e-05, "loss": 1.8835, "step": 911 }, { "epoch": 0.2768250113826074, "grad_norm": 0.37427717447280884, "learning_rate": 9.452004860267315e-05, "loss": 1.5918, "step": 912 }, { "epoch": 0.27712854757929883, "grad_norm": 0.4030051827430725, "learning_rate": 9.451397326852978e-05, "loss": 1.5694, "step": 913 }, { "epoch": 0.27743208377599027, "grad_norm": 0.3948831260204315, "learning_rate": 9.450789793438639e-05, "loss": 1.7315, "step": 914 }, { "epoch": 0.27773561997268176, "grad_norm": 0.4105396866798401, "learning_rate": 9.450182260024302e-05, "loss": 2.0528, "step": 915 }, { "epoch": 0.2780391561693732, "grad_norm": 0.400312215089798, "learning_rate": 9.449574726609965e-05, "loss": 1.6631, "step": 916 }, { "epoch": 0.2783426923660646, "grad_norm": 0.40099987387657166, "learning_rate": 9.448967193195625e-05, "loss": 1.9922, "step": 917 }, { "epoch": 0.2786462285627561, "grad_norm": 0.39861205220222473, "learning_rate": 9.448359659781288e-05, "loss": 1.8319, "step": 918 }, { "epoch": 0.27894976475944755, "grad_norm": 0.33672603964805603, "learning_rate": 9.447752126366951e-05, "loss": 1.8562, "step": 919 }, { "epoch": 0.27925330095613904, "grad_norm": 0.3398993909358978, "learning_rate": 9.447144592952613e-05, "loss": 1.8801, "step": 920 }, { "epoch": 0.2795568371528305, "grad_norm": 0.6748337149620056, "learning_rate": 9.446537059538275e-05, "loss": 2.0353, "step": 921 }, { "epoch": 0.2798603733495219, "grad_norm": 0.33281663060188293, "learning_rate": 9.445929526123938e-05, "loss": 2.0932, "step": 922 }, { "epoch": 0.2801639095462134, "grad_norm": 0.37020498514175415, "learning_rate": 9.4453219927096e-05, "loss": 1.8438, "step": 923 }, { "epoch": 0.28046744574290483, "grad_norm": 0.40763506293296814, "learning_rate": 9.444714459295261e-05, "loss": 1.6915, "step": 924 }, { "epoch": 0.2807709819395963, "grad_norm": 0.36651310324668884, "learning_rate": 9.444106925880923e-05, "loss": 2.0502, "step": 925 }, { "epoch": 0.28107451813628775, "grad_norm": 0.6006852388381958, "learning_rate": 9.443499392466586e-05, "loss": 1.9601, "step": 926 }, { "epoch": 0.2813780543329792, "grad_norm": 0.45634040236473083, "learning_rate": 9.442891859052248e-05, "loss": 1.5068, "step": 927 }, { "epoch": 0.2816815905296707, "grad_norm": 0.3380034565925598, "learning_rate": 9.44228432563791e-05, "loss": 1.8657, "step": 928 }, { "epoch": 0.2819851267263621, "grad_norm": 0.39120668172836304, "learning_rate": 9.441676792223573e-05, "loss": 1.9683, "step": 929 }, { "epoch": 0.2822886629230536, "grad_norm": 0.41591060161590576, "learning_rate": 9.441069258809236e-05, "loss": 1.9994, "step": 930 }, { "epoch": 0.28259219911974504, "grad_norm": 0.3863435983657837, "learning_rate": 9.440461725394896e-05, "loss": 1.8127, "step": 931 }, { "epoch": 0.28289573531643647, "grad_norm": 0.3713644742965698, "learning_rate": 9.439854191980559e-05, "loss": 1.7514, "step": 932 }, { "epoch": 0.28319927151312796, "grad_norm": 0.36419039964675903, "learning_rate": 9.439246658566222e-05, "loss": 1.9044, "step": 933 }, { "epoch": 0.2835028077098194, "grad_norm": 0.4059010148048401, "learning_rate": 9.438639125151884e-05, "loss": 1.783, "step": 934 }, { "epoch": 0.2838063439065108, "grad_norm": 0.5016249418258667, "learning_rate": 9.438031591737546e-05, "loss": 1.9958, "step": 935 }, { "epoch": 0.2841098801032023, "grad_norm": 0.4264843463897705, "learning_rate": 9.437424058323209e-05, "loss": 1.5526, "step": 936 }, { "epoch": 0.28441341629989375, "grad_norm": 0.5768559575080872, "learning_rate": 9.43681652490887e-05, "loss": 1.719, "step": 937 }, { "epoch": 0.28471695249658524, "grad_norm": 0.42008429765701294, "learning_rate": 9.436208991494532e-05, "loss": 2.057, "step": 938 }, { "epoch": 0.2850204886932767, "grad_norm": 0.3530850112438202, "learning_rate": 9.435601458080194e-05, "loss": 1.7118, "step": 939 }, { "epoch": 0.2853240248899681, "grad_norm": 0.44346508383750916, "learning_rate": 9.434993924665857e-05, "loss": 2.1416, "step": 940 }, { "epoch": 0.2856275610866596, "grad_norm": 0.645882785320282, "learning_rate": 9.434386391251519e-05, "loss": 1.8368, "step": 941 }, { "epoch": 0.28593109728335103, "grad_norm": 0.784821093082428, "learning_rate": 9.43377885783718e-05, "loss": 1.9541, "step": 942 }, { "epoch": 0.2862346334800425, "grad_norm": 0.43880385160446167, "learning_rate": 9.433171324422844e-05, "loss": 2.0319, "step": 943 }, { "epoch": 0.28653816967673396, "grad_norm": 0.6283034682273865, "learning_rate": 9.432563791008507e-05, "loss": 1.5762, "step": 944 }, { "epoch": 0.2868417058734254, "grad_norm": 0.3591736853122711, "learning_rate": 9.431956257594167e-05, "loss": 2.1589, "step": 945 }, { "epoch": 0.2871452420701169, "grad_norm": 0.3970873951911926, "learning_rate": 9.43134872417983e-05, "loss": 1.4798, "step": 946 }, { "epoch": 0.2874487782668083, "grad_norm": 0.42486631870269775, "learning_rate": 9.430741190765493e-05, "loss": 1.8345, "step": 947 }, { "epoch": 0.28775231446349975, "grad_norm": 0.37290090322494507, "learning_rate": 9.430133657351155e-05, "loss": 1.9313, "step": 948 }, { "epoch": 0.28805585066019124, "grad_norm": 0.47855010628700256, "learning_rate": 9.429526123936817e-05, "loss": 1.4405, "step": 949 }, { "epoch": 0.28835938685688267, "grad_norm": 0.4648813009262085, "learning_rate": 9.42891859052248e-05, "loss": 1.6818, "step": 950 }, { "epoch": 0.28866292305357416, "grad_norm": 0.40000760555267334, "learning_rate": 9.428311057108142e-05, "loss": 1.9014, "step": 951 }, { "epoch": 0.2889664592502656, "grad_norm": 0.3846280872821808, "learning_rate": 9.427703523693803e-05, "loss": 1.6268, "step": 952 }, { "epoch": 0.28926999544695703, "grad_norm": 0.43172597885131836, "learning_rate": 9.427095990279465e-05, "loss": 1.6287, "step": 953 }, { "epoch": 0.2895735316436485, "grad_norm": 0.42565402388572693, "learning_rate": 9.426488456865128e-05, "loss": 1.8678, "step": 954 }, { "epoch": 0.28987706784033995, "grad_norm": 1.070906400680542, "learning_rate": 9.42588092345079e-05, "loss": 1.7688, "step": 955 }, { "epoch": 0.29018060403703144, "grad_norm": 0.4792560935020447, "learning_rate": 9.425273390036452e-05, "loss": 1.09, "step": 956 }, { "epoch": 0.2904841402337229, "grad_norm": 0.37043797969818115, "learning_rate": 9.424665856622115e-05, "loss": 1.6931, "step": 957 }, { "epoch": 0.2907876764304143, "grad_norm": 0.37764909863471985, "learning_rate": 9.424058323207778e-05, "loss": 1.8621, "step": 958 }, { "epoch": 0.2910912126271058, "grad_norm": 0.40328919887542725, "learning_rate": 9.423450789793438e-05, "loss": 1.9484, "step": 959 }, { "epoch": 0.29139474882379723, "grad_norm": 0.4451077878475189, "learning_rate": 9.422843256379101e-05, "loss": 1.7273, "step": 960 }, { "epoch": 0.29169828502048867, "grad_norm": 0.5410102009773254, "learning_rate": 9.422235722964764e-05, "loss": 2.0116, "step": 961 }, { "epoch": 0.29200182121718016, "grad_norm": 0.42526179552078247, "learning_rate": 9.421628189550426e-05, "loss": 1.6596, "step": 962 }, { "epoch": 0.2923053574138716, "grad_norm": 0.3813883066177368, "learning_rate": 9.421020656136088e-05, "loss": 2.0083, "step": 963 }, { "epoch": 0.2926088936105631, "grad_norm": 0.3967495858669281, "learning_rate": 9.420413122721751e-05, "loss": 1.8665, "step": 964 }, { "epoch": 0.2929124298072545, "grad_norm": 0.4672113060951233, "learning_rate": 9.419805589307413e-05, "loss": 2.1132, "step": 965 }, { "epoch": 0.29321596600394595, "grad_norm": 0.4068308472633362, "learning_rate": 9.419198055893074e-05, "loss": 2.0042, "step": 966 }, { "epoch": 0.29351950220063744, "grad_norm": 0.8895217180252075, "learning_rate": 9.418590522478736e-05, "loss": 1.9725, "step": 967 }, { "epoch": 0.2938230383973289, "grad_norm": 0.6839628219604492, "learning_rate": 9.417982989064399e-05, "loss": 1.5712, "step": 968 }, { "epoch": 0.29412657459402036, "grad_norm": 0.5890039801597595, "learning_rate": 9.417375455650061e-05, "loss": 2.1177, "step": 969 }, { "epoch": 0.2944301107907118, "grad_norm": 0.33217447996139526, "learning_rate": 9.416767922235723e-05, "loss": 1.9683, "step": 970 }, { "epoch": 0.29473364698740323, "grad_norm": 0.44125109910964966, "learning_rate": 9.416160388821386e-05, "loss": 1.6272, "step": 971 }, { "epoch": 0.2950371831840947, "grad_norm": 0.367145836353302, "learning_rate": 9.415552855407049e-05, "loss": 2.0624, "step": 972 }, { "epoch": 0.29534071938078615, "grad_norm": 0.3220556080341339, "learning_rate": 9.414945321992709e-05, "loss": 1.4513, "step": 973 }, { "epoch": 0.2956442555774776, "grad_norm": 0.37686339020729065, "learning_rate": 9.414337788578372e-05, "loss": 1.6418, "step": 974 }, { "epoch": 0.2959477917741691, "grad_norm": 0.46043211221694946, "learning_rate": 9.413730255164035e-05, "loss": 2.0433, "step": 975 }, { "epoch": 0.2962513279708605, "grad_norm": 0.40463754534721375, "learning_rate": 9.413122721749697e-05, "loss": 1.8214, "step": 976 }, { "epoch": 0.296554864167552, "grad_norm": 0.406583309173584, "learning_rate": 9.412515188335359e-05, "loss": 1.5704, "step": 977 }, { "epoch": 0.29685840036424344, "grad_norm": 0.4335365295410156, "learning_rate": 9.41190765492102e-05, "loss": 1.8772, "step": 978 }, { "epoch": 0.29716193656093487, "grad_norm": 0.43915802240371704, "learning_rate": 9.411300121506684e-05, "loss": 1.5376, "step": 979 }, { "epoch": 0.29746547275762636, "grad_norm": 0.36118191480636597, "learning_rate": 9.410692588092345e-05, "loss": 1.8994, "step": 980 }, { "epoch": 0.2977690089543178, "grad_norm": 0.4184354841709137, "learning_rate": 9.410085054678007e-05, "loss": 2.0041, "step": 981 }, { "epoch": 0.2980725451510093, "grad_norm": 0.3743583559989929, "learning_rate": 9.40947752126367e-05, "loss": 1.8986, "step": 982 }, { "epoch": 0.2983760813477007, "grad_norm": 0.4110506474971771, "learning_rate": 9.408869987849332e-05, "loss": 2.0617, "step": 983 }, { "epoch": 0.29867961754439215, "grad_norm": 0.33404871821403503, "learning_rate": 9.408262454434994e-05, "loss": 2.0014, "step": 984 }, { "epoch": 0.29898315374108364, "grad_norm": 0.3586455285549164, "learning_rate": 9.407654921020657e-05, "loss": 1.4912, "step": 985 }, { "epoch": 0.2992866899377751, "grad_norm": 0.3859756886959076, "learning_rate": 9.40704738760632e-05, "loss": 1.7919, "step": 986 }, { "epoch": 0.2995902261344665, "grad_norm": 0.4533100724220276, "learning_rate": 9.40643985419198e-05, "loss": 2.1134, "step": 987 }, { "epoch": 0.299893762331158, "grad_norm": 0.399854838848114, "learning_rate": 9.405832320777643e-05, "loss": 1.8198, "step": 988 }, { "epoch": 0.30019729852784943, "grad_norm": 0.3582475781440735, "learning_rate": 9.405224787363306e-05, "loss": 1.6337, "step": 989 }, { "epoch": 0.3005008347245409, "grad_norm": 0.39537113904953003, "learning_rate": 9.404617253948967e-05, "loss": 1.9856, "step": 990 }, { "epoch": 0.30080437092123236, "grad_norm": 0.3662082254886627, "learning_rate": 9.40400972053463e-05, "loss": 2.0424, "step": 991 }, { "epoch": 0.3011079071179238, "grad_norm": 0.38339659571647644, "learning_rate": 9.403402187120292e-05, "loss": 2.0293, "step": 992 }, { "epoch": 0.3014114433146153, "grad_norm": 0.3134559988975525, "learning_rate": 9.402794653705955e-05, "loss": 1.8086, "step": 993 }, { "epoch": 0.3017149795113067, "grad_norm": 0.4155486226081848, "learning_rate": 9.402187120291616e-05, "loss": 1.8748, "step": 994 }, { "epoch": 0.3020185157079982, "grad_norm": 0.41562893986701965, "learning_rate": 9.401579586877278e-05, "loss": 1.6104, "step": 995 }, { "epoch": 0.30232205190468964, "grad_norm": 0.36112940311431885, "learning_rate": 9.400972053462941e-05, "loss": 1.335, "step": 996 }, { "epoch": 0.30262558810138107, "grad_norm": 0.4332577586174011, "learning_rate": 9.400364520048603e-05, "loss": 1.6098, "step": 997 }, { "epoch": 0.30292912429807256, "grad_norm": 0.3520275950431824, "learning_rate": 9.399756986634265e-05, "loss": 1.9924, "step": 998 }, { "epoch": 0.303232660494764, "grad_norm": 0.44331827759742737, "learning_rate": 9.399149453219928e-05, "loss": 1.8021, "step": 999 }, { "epoch": 0.30353619669145543, "grad_norm": 0.38627490401268005, "learning_rate": 9.398541919805591e-05, "loss": 1.8952, "step": 1000 }, { "epoch": 0.3038397328881469, "grad_norm": 0.42670029401779175, "learning_rate": 9.397934386391251e-05, "loss": 1.7, "step": 1001 }, { "epoch": 0.30414326908483835, "grad_norm": 0.31961289048194885, "learning_rate": 9.397326852976914e-05, "loss": 1.4874, "step": 1002 }, { "epoch": 0.30444680528152984, "grad_norm": 0.46648967266082764, "learning_rate": 9.396719319562577e-05, "loss": 1.962, "step": 1003 }, { "epoch": 0.3047503414782213, "grad_norm": 0.39356762170791626, "learning_rate": 9.396111786148238e-05, "loss": 2.0206, "step": 1004 }, { "epoch": 0.3050538776749127, "grad_norm": 0.37756818532943726, "learning_rate": 9.395504252733901e-05, "loss": 1.9863, "step": 1005 }, { "epoch": 0.3053574138716042, "grad_norm": 0.3291250765323639, "learning_rate": 9.394896719319563e-05, "loss": 1.9422, "step": 1006 }, { "epoch": 0.30566095006829563, "grad_norm": 0.397297739982605, "learning_rate": 9.394289185905226e-05, "loss": 1.7533, "step": 1007 }, { "epoch": 0.3059644862649871, "grad_norm": 0.33320048451423645, "learning_rate": 9.393681652490887e-05, "loss": 1.6411, "step": 1008 }, { "epoch": 0.30626802246167856, "grad_norm": 0.38921716809272766, "learning_rate": 9.393074119076549e-05, "loss": 1.9216, "step": 1009 }, { "epoch": 0.30657155865837, "grad_norm": 0.40245047211647034, "learning_rate": 9.392466585662212e-05, "loss": 1.9853, "step": 1010 }, { "epoch": 0.3068750948550615, "grad_norm": 0.5569208264350891, "learning_rate": 9.391859052247874e-05, "loss": 2.157, "step": 1011 }, { "epoch": 0.3071786310517529, "grad_norm": 0.4204193949699402, "learning_rate": 9.391251518833536e-05, "loss": 1.666, "step": 1012 }, { "epoch": 0.3074821672484444, "grad_norm": 0.3458712100982666, "learning_rate": 9.390643985419199e-05, "loss": 1.9564, "step": 1013 }, { "epoch": 0.30778570344513584, "grad_norm": 0.42556729912757874, "learning_rate": 9.39003645200486e-05, "loss": 2.0565, "step": 1014 }, { "epoch": 0.3080892396418273, "grad_norm": 0.3334849774837494, "learning_rate": 9.389428918590522e-05, "loss": 1.9534, "step": 1015 }, { "epoch": 0.30839277583851876, "grad_norm": 0.3297790288925171, "learning_rate": 9.388821385176185e-05, "loss": 2.0032, "step": 1016 }, { "epoch": 0.3086963120352102, "grad_norm": 0.4108186662197113, "learning_rate": 9.388213851761848e-05, "loss": 1.6698, "step": 1017 }, { "epoch": 0.30899984823190163, "grad_norm": 0.4515385925769806, "learning_rate": 9.387606318347509e-05, "loss": 1.9709, "step": 1018 }, { "epoch": 0.3093033844285931, "grad_norm": 0.38401028513908386, "learning_rate": 9.386998784933172e-05, "loss": 1.9928, "step": 1019 }, { "epoch": 0.30960692062528455, "grad_norm": 0.32774823904037476, "learning_rate": 9.386391251518834e-05, "loss": 2.1256, "step": 1020 }, { "epoch": 0.30991045682197604, "grad_norm": 0.45378655195236206, "learning_rate": 9.385783718104497e-05, "loss": 1.8078, "step": 1021 }, { "epoch": 0.3102139930186675, "grad_norm": 0.3340519368648529, "learning_rate": 9.385176184690158e-05, "loss": 1.6102, "step": 1022 }, { "epoch": 0.3105175292153589, "grad_norm": 0.5457311868667603, "learning_rate": 9.38456865127582e-05, "loss": 1.9735, "step": 1023 }, { "epoch": 0.3108210654120504, "grad_norm": 0.3604097068309784, "learning_rate": 9.383961117861483e-05, "loss": 1.8048, "step": 1024 }, { "epoch": 0.31112460160874184, "grad_norm": 0.3677893579006195, "learning_rate": 9.383353584447145e-05, "loss": 1.5346, "step": 1025 }, { "epoch": 0.3114281378054333, "grad_norm": 0.49554312229156494, "learning_rate": 9.382746051032807e-05, "loss": 2.0472, "step": 1026 }, { "epoch": 0.31173167400212476, "grad_norm": 0.37693944573402405, "learning_rate": 9.38213851761847e-05, "loss": 1.8848, "step": 1027 }, { "epoch": 0.3120352101988162, "grad_norm": 0.6364639401435852, "learning_rate": 9.381530984204132e-05, "loss": 1.7289, "step": 1028 }, { "epoch": 0.3123387463955077, "grad_norm": 0.36025428771972656, "learning_rate": 9.380923450789793e-05, "loss": 1.8544, "step": 1029 }, { "epoch": 0.3126422825921991, "grad_norm": 0.4033251106739044, "learning_rate": 9.380315917375456e-05, "loss": 1.8661, "step": 1030 }, { "epoch": 0.31294581878889055, "grad_norm": 0.41504162549972534, "learning_rate": 9.37970838396112e-05, "loss": 1.9619, "step": 1031 }, { "epoch": 0.31324935498558204, "grad_norm": 0.38639551401138306, "learning_rate": 9.37910085054678e-05, "loss": 2.1785, "step": 1032 }, { "epoch": 0.3135528911822735, "grad_norm": 0.3487949073314667, "learning_rate": 9.378493317132443e-05, "loss": 1.5571, "step": 1033 }, { "epoch": 0.31385642737896496, "grad_norm": 0.3317317068576813, "learning_rate": 9.377885783718105e-05, "loss": 2.1086, "step": 1034 }, { "epoch": 0.3141599635756564, "grad_norm": 0.35874056816101074, "learning_rate": 9.377278250303768e-05, "loss": 1.9453, "step": 1035 }, { "epoch": 0.31446349977234783, "grad_norm": 0.3823045790195465, "learning_rate": 9.37667071688943e-05, "loss": 1.6444, "step": 1036 }, { "epoch": 0.3147670359690393, "grad_norm": 0.39954647421836853, "learning_rate": 9.376063183475091e-05, "loss": 2.0497, "step": 1037 }, { "epoch": 0.31507057216573076, "grad_norm": 0.34357962012290955, "learning_rate": 9.375455650060754e-05, "loss": 1.8391, "step": 1038 }, { "epoch": 0.31537410836242225, "grad_norm": 0.35260939598083496, "learning_rate": 9.374848116646416e-05, "loss": 1.9691, "step": 1039 }, { "epoch": 0.3156776445591137, "grad_norm": 0.33483296632766724, "learning_rate": 9.374240583232078e-05, "loss": 1.8933, "step": 1040 }, { "epoch": 0.3159811807558051, "grad_norm": 0.4771517515182495, "learning_rate": 9.373633049817741e-05, "loss": 1.8574, "step": 1041 }, { "epoch": 0.3162847169524966, "grad_norm": 0.3025968372821808, "learning_rate": 9.373025516403403e-05, "loss": 1.7995, "step": 1042 }, { "epoch": 0.31658825314918804, "grad_norm": 0.39535394310951233, "learning_rate": 9.372417982989064e-05, "loss": 1.6662, "step": 1043 }, { "epoch": 0.31689178934587947, "grad_norm": 0.35718834400177, "learning_rate": 9.371810449574727e-05, "loss": 2.3543, "step": 1044 }, { "epoch": 0.31719532554257096, "grad_norm": 0.40815529227256775, "learning_rate": 9.37120291616039e-05, "loss": 1.8978, "step": 1045 }, { "epoch": 0.3174988617392624, "grad_norm": 0.38799992203712463, "learning_rate": 9.370595382746051e-05, "loss": 1.9145, "step": 1046 }, { "epoch": 0.3178023979359539, "grad_norm": 0.3711848556995392, "learning_rate": 9.369987849331714e-05, "loss": 1.9129, "step": 1047 }, { "epoch": 0.3181059341326453, "grad_norm": 1.041429877281189, "learning_rate": 9.369380315917376e-05, "loss": 1.3535, "step": 1048 }, { "epoch": 0.31840947032933675, "grad_norm": 0.4107154309749603, "learning_rate": 9.368772782503039e-05, "loss": 1.6886, "step": 1049 }, { "epoch": 0.31871300652602824, "grad_norm": 0.35202670097351074, "learning_rate": 9.3681652490887e-05, "loss": 1.4685, "step": 1050 }, { "epoch": 0.3190165427227197, "grad_norm": 0.39248141646385193, "learning_rate": 9.367557715674362e-05, "loss": 1.6177, "step": 1051 }, { "epoch": 0.31932007891941117, "grad_norm": 0.3911724388599396, "learning_rate": 9.366950182260025e-05, "loss": 1.5015, "step": 1052 }, { "epoch": 0.3196236151161026, "grad_norm": 0.8974817991256714, "learning_rate": 9.366342648845687e-05, "loss": 1.5422, "step": 1053 }, { "epoch": 0.31992715131279403, "grad_norm": 0.4150513708591461, "learning_rate": 9.365735115431349e-05, "loss": 2.0201, "step": 1054 }, { "epoch": 0.3202306875094855, "grad_norm": 0.4205161929130554, "learning_rate": 9.365127582017012e-05, "loss": 1.881, "step": 1055 }, { "epoch": 0.32053422370617696, "grad_norm": 0.36916840076446533, "learning_rate": 9.364520048602674e-05, "loss": 1.9912, "step": 1056 }, { "epoch": 0.3208377599028684, "grad_norm": 0.45616719126701355, "learning_rate": 9.363912515188335e-05, "loss": 1.825, "step": 1057 }, { "epoch": 0.3211412960995599, "grad_norm": 0.3602239787578583, "learning_rate": 9.363304981773998e-05, "loss": 1.5264, "step": 1058 }, { "epoch": 0.3214448322962513, "grad_norm": 0.39383935928344727, "learning_rate": 9.362697448359661e-05, "loss": 1.9719, "step": 1059 }, { "epoch": 0.3217483684929428, "grad_norm": 0.321859210729599, "learning_rate": 9.362089914945322e-05, "loss": 1.7703, "step": 1060 }, { "epoch": 0.32205190468963424, "grad_norm": 0.40060603618621826, "learning_rate": 9.361482381530985e-05, "loss": 1.7199, "step": 1061 }, { "epoch": 0.3223554408863257, "grad_norm": 0.4096384644508362, "learning_rate": 9.360874848116647e-05, "loss": 1.5069, "step": 1062 }, { "epoch": 0.32265897708301716, "grad_norm": 0.35391515493392944, "learning_rate": 9.360267314702308e-05, "loss": 1.8928, "step": 1063 }, { "epoch": 0.3229625132797086, "grad_norm": 0.3309794068336487, "learning_rate": 9.359659781287971e-05, "loss": 1.6238, "step": 1064 }, { "epoch": 0.3232660494764001, "grad_norm": 0.37579798698425293, "learning_rate": 9.359052247873633e-05, "loss": 2.329, "step": 1065 }, { "epoch": 0.3235695856730915, "grad_norm": 0.41262614727020264, "learning_rate": 9.358444714459296e-05, "loss": 1.9192, "step": 1066 }, { "epoch": 0.32387312186978295, "grad_norm": 0.3737616539001465, "learning_rate": 9.357837181044958e-05, "loss": 1.9614, "step": 1067 }, { "epoch": 0.32417665806647444, "grad_norm": 0.35716524720191956, "learning_rate": 9.35722964763062e-05, "loss": 1.9046, "step": 1068 }, { "epoch": 0.3244801942631659, "grad_norm": 1.6110327243804932, "learning_rate": 9.356622114216283e-05, "loss": 1.5437, "step": 1069 }, { "epoch": 0.3247837304598573, "grad_norm": 0.3114778399467468, "learning_rate": 9.356014580801945e-05, "loss": 1.8962, "step": 1070 }, { "epoch": 0.3250872666565488, "grad_norm": 0.35084468126296997, "learning_rate": 9.355407047387606e-05, "loss": 2.0253, "step": 1071 }, { "epoch": 0.32539080285324024, "grad_norm": 0.38513630628585815, "learning_rate": 9.35479951397327e-05, "loss": 1.7556, "step": 1072 }, { "epoch": 0.3256943390499317, "grad_norm": 0.41520386934280396, "learning_rate": 9.354191980558931e-05, "loss": 1.274, "step": 1073 }, { "epoch": 0.32599787524662316, "grad_norm": 0.3998602032661438, "learning_rate": 9.353584447144593e-05, "loss": 1.9963, "step": 1074 }, { "epoch": 0.3263014114433146, "grad_norm": 0.3973468244075775, "learning_rate": 9.352976913730256e-05, "loss": 2.2281, "step": 1075 }, { "epoch": 0.3266049476400061, "grad_norm": 0.37020763754844666, "learning_rate": 9.352369380315918e-05, "loss": 1.6891, "step": 1076 }, { "epoch": 0.3269084838366975, "grad_norm": 0.43367013335227966, "learning_rate": 9.35176184690158e-05, "loss": 1.5859, "step": 1077 }, { "epoch": 0.327212020033389, "grad_norm": 0.3882901072502136, "learning_rate": 9.351154313487242e-05, "loss": 1.5294, "step": 1078 }, { "epoch": 0.32751555623008044, "grad_norm": 0.38236895203590393, "learning_rate": 9.350546780072904e-05, "loss": 2.0035, "step": 1079 }, { "epoch": 0.3278190924267719, "grad_norm": 0.42090603709220886, "learning_rate": 9.349939246658567e-05, "loss": 1.325, "step": 1080 }, { "epoch": 0.32812262862346336, "grad_norm": 0.4210514724254608, "learning_rate": 9.349331713244229e-05, "loss": 1.9018, "step": 1081 }, { "epoch": 0.3284261648201548, "grad_norm": 0.3695550858974457, "learning_rate": 9.348724179829891e-05, "loss": 2.0823, "step": 1082 }, { "epoch": 0.32872970101684623, "grad_norm": 0.44178470969200134, "learning_rate": 9.348116646415554e-05, "loss": 1.9396, "step": 1083 }, { "epoch": 0.3290332372135377, "grad_norm": 2.9311540126800537, "learning_rate": 9.347509113001216e-05, "loss": 2.0483, "step": 1084 }, { "epoch": 0.32933677341022916, "grad_norm": 0.38238954544067383, "learning_rate": 9.346901579586877e-05, "loss": 2.0408, "step": 1085 }, { "epoch": 0.32964030960692065, "grad_norm": 0.420622318983078, "learning_rate": 9.34629404617254e-05, "loss": 1.9811, "step": 1086 }, { "epoch": 0.3299438458036121, "grad_norm": 0.47827744483947754, "learning_rate": 9.345686512758202e-05, "loss": 1.7816, "step": 1087 }, { "epoch": 0.3302473820003035, "grad_norm": 0.3673538565635681, "learning_rate": 9.345078979343864e-05, "loss": 1.9916, "step": 1088 }, { "epoch": 0.330550918196995, "grad_norm": 1.2525584697723389, "learning_rate": 9.344471445929527e-05, "loss": 2.0323, "step": 1089 }, { "epoch": 0.33085445439368644, "grad_norm": 0.3575446605682373, "learning_rate": 9.343863912515189e-05, "loss": 2.0254, "step": 1090 }, { "epoch": 0.3311579905903779, "grad_norm": 0.4579968750476837, "learning_rate": 9.34325637910085e-05, "loss": 1.4365, "step": 1091 }, { "epoch": 0.33146152678706936, "grad_norm": 0.5363442301750183, "learning_rate": 9.342648845686513e-05, "loss": 2.0635, "step": 1092 }, { "epoch": 0.3317650629837608, "grad_norm": 0.4065784215927124, "learning_rate": 9.342041312272175e-05, "loss": 1.6133, "step": 1093 }, { "epoch": 0.3320685991804523, "grad_norm": 0.4256560504436493, "learning_rate": 9.341433778857838e-05, "loss": 1.7574, "step": 1094 }, { "epoch": 0.3323721353771437, "grad_norm": 0.3566704988479614, "learning_rate": 9.3408262454435e-05, "loss": 1.4446, "step": 1095 }, { "epoch": 0.33267567157383515, "grad_norm": 0.39680102467536926, "learning_rate": 9.340218712029162e-05, "loss": 2.1008, "step": 1096 }, { "epoch": 0.33297920777052664, "grad_norm": 0.39213013648986816, "learning_rate": 9.339611178614825e-05, "loss": 1.5924, "step": 1097 }, { "epoch": 0.3332827439672181, "grad_norm": 0.39503929018974304, "learning_rate": 9.339003645200487e-05, "loss": 1.9126, "step": 1098 }, { "epoch": 0.33358628016390957, "grad_norm": 0.34226784110069275, "learning_rate": 9.338396111786148e-05, "loss": 1.3475, "step": 1099 }, { "epoch": 0.333889816360601, "grad_norm": 0.3511487543582916, "learning_rate": 9.337788578371811e-05, "loss": 1.6679, "step": 1100 }, { "epoch": 0.33419335255729243, "grad_norm": 0.6215702295303345, "learning_rate": 9.337181044957473e-05, "loss": 1.7599, "step": 1101 }, { "epoch": 0.3344968887539839, "grad_norm": 0.34477895498275757, "learning_rate": 9.336573511543135e-05, "loss": 1.4905, "step": 1102 }, { "epoch": 0.33480042495067536, "grad_norm": 0.37696805596351624, "learning_rate": 9.335965978128798e-05, "loss": 1.2661, "step": 1103 }, { "epoch": 0.33510396114736685, "grad_norm": 0.3722575306892395, "learning_rate": 9.33535844471446e-05, "loss": 1.8483, "step": 1104 }, { "epoch": 0.3354074973440583, "grad_norm": 0.5641891360282898, "learning_rate": 9.334750911300121e-05, "loss": 2.0121, "step": 1105 }, { "epoch": 0.3357110335407497, "grad_norm": 0.37221911549568176, "learning_rate": 9.334143377885784e-05, "loss": 1.9564, "step": 1106 }, { "epoch": 0.3360145697374412, "grad_norm": 0.3777831494808197, "learning_rate": 9.333535844471446e-05, "loss": 1.8625, "step": 1107 }, { "epoch": 0.33631810593413264, "grad_norm": 0.4311445951461792, "learning_rate": 9.332928311057109e-05, "loss": 2.038, "step": 1108 }, { "epoch": 0.33662164213082413, "grad_norm": 0.4373043179512024, "learning_rate": 9.332320777642771e-05, "loss": 1.6282, "step": 1109 }, { "epoch": 0.33692517832751556, "grad_norm": 0.3800273835659027, "learning_rate": 9.331713244228433e-05, "loss": 1.5552, "step": 1110 }, { "epoch": 0.337228714524207, "grad_norm": 0.6762371063232422, "learning_rate": 9.331105710814096e-05, "loss": 1.7299, "step": 1111 }, { "epoch": 0.3375322507208985, "grad_norm": 0.3713971972465515, "learning_rate": 9.330498177399758e-05, "loss": 1.9919, "step": 1112 }, { "epoch": 0.3378357869175899, "grad_norm": 0.40268123149871826, "learning_rate": 9.329890643985419e-05, "loss": 1.9952, "step": 1113 }, { "epoch": 0.33813932311428135, "grad_norm": 0.44786572456359863, "learning_rate": 9.329283110571082e-05, "loss": 2.1622, "step": 1114 }, { "epoch": 0.33844285931097284, "grad_norm": 0.39639097452163696, "learning_rate": 9.328675577156744e-05, "loss": 1.7995, "step": 1115 }, { "epoch": 0.3387463955076643, "grad_norm": 0.5204857587814331, "learning_rate": 9.328068043742406e-05, "loss": 1.9925, "step": 1116 }, { "epoch": 0.33904993170435577, "grad_norm": 0.4180005192756653, "learning_rate": 9.327460510328069e-05, "loss": 1.8489, "step": 1117 }, { "epoch": 0.3393534679010472, "grad_norm": 0.42055949568748474, "learning_rate": 9.32685297691373e-05, "loss": 1.7729, "step": 1118 }, { "epoch": 0.33965700409773864, "grad_norm": 0.4213305115699768, "learning_rate": 9.326245443499392e-05, "loss": 1.9299, "step": 1119 }, { "epoch": 0.3399605402944301, "grad_norm": 0.7117316126823425, "learning_rate": 9.325637910085055e-05, "loss": 1.7163, "step": 1120 }, { "epoch": 0.34026407649112156, "grad_norm": 0.3836345374584198, "learning_rate": 9.325030376670717e-05, "loss": 1.8703, "step": 1121 }, { "epoch": 0.34056761268781305, "grad_norm": 0.5493946075439453, "learning_rate": 9.32442284325638e-05, "loss": 2.1161, "step": 1122 }, { "epoch": 0.3408711488845045, "grad_norm": 0.4323013722896576, "learning_rate": 9.323815309842042e-05, "loss": 1.9867, "step": 1123 }, { "epoch": 0.3411746850811959, "grad_norm": 0.37991368770599365, "learning_rate": 9.323207776427704e-05, "loss": 1.935, "step": 1124 }, { "epoch": 0.3414782212778874, "grad_norm": 0.40191560983657837, "learning_rate": 9.322600243013367e-05, "loss": 1.7617, "step": 1125 }, { "epoch": 0.34178175747457884, "grad_norm": 0.42309120297431946, "learning_rate": 9.321992709599029e-05, "loss": 2.1438, "step": 1126 }, { "epoch": 0.3420852936712703, "grad_norm": 0.3918818235397339, "learning_rate": 9.32138517618469e-05, "loss": 1.9434, "step": 1127 }, { "epoch": 0.34238882986796176, "grad_norm": 0.9797879457473755, "learning_rate": 9.320777642770353e-05, "loss": 1.8893, "step": 1128 }, { "epoch": 0.3426923660646532, "grad_norm": 0.38459375500679016, "learning_rate": 9.320170109356015e-05, "loss": 1.9626, "step": 1129 }, { "epoch": 0.3429959022613447, "grad_norm": 0.35924455523490906, "learning_rate": 9.319562575941677e-05, "loss": 1.7108, "step": 1130 }, { "epoch": 0.3432994384580361, "grad_norm": 0.3576562702655792, "learning_rate": 9.31895504252734e-05, "loss": 1.9426, "step": 1131 }, { "epoch": 0.34360297465472756, "grad_norm": 0.3931269943714142, "learning_rate": 9.318347509113002e-05, "loss": 1.9598, "step": 1132 }, { "epoch": 0.34390651085141904, "grad_norm": 0.41744112968444824, "learning_rate": 9.317739975698663e-05, "loss": 1.9522, "step": 1133 }, { "epoch": 0.3442100470481105, "grad_norm": 0.3721160590648651, "learning_rate": 9.317132442284326e-05, "loss": 1.366, "step": 1134 }, { "epoch": 0.34451358324480197, "grad_norm": 0.4886751174926758, "learning_rate": 9.316524908869988e-05, "loss": 1.8528, "step": 1135 }, { "epoch": 0.3448171194414934, "grad_norm": 0.3837689161300659, "learning_rate": 9.31591737545565e-05, "loss": 2.0877, "step": 1136 }, { "epoch": 0.34512065563818484, "grad_norm": 0.3721841871738434, "learning_rate": 9.315309842041313e-05, "loss": 2.0144, "step": 1137 }, { "epoch": 0.3454241918348763, "grad_norm": 0.4325003921985626, "learning_rate": 9.314702308626975e-05, "loss": 2.2377, "step": 1138 }, { "epoch": 0.34572772803156776, "grad_norm": 0.39936354756355286, "learning_rate": 9.314094775212638e-05, "loss": 1.9469, "step": 1139 }, { "epoch": 0.3460312642282592, "grad_norm": 0.38498643040657043, "learning_rate": 9.3134872417983e-05, "loss": 2.0822, "step": 1140 }, { "epoch": 0.3463348004249507, "grad_norm": 0.3657349944114685, "learning_rate": 9.312879708383961e-05, "loss": 1.8089, "step": 1141 }, { "epoch": 0.3466383366216421, "grad_norm": 2.0269200801849365, "learning_rate": 9.312272174969624e-05, "loss": 1.8882, "step": 1142 }, { "epoch": 0.3469418728183336, "grad_norm": 0.3976801931858063, "learning_rate": 9.311664641555286e-05, "loss": 1.8956, "step": 1143 }, { "epoch": 0.34724540901502504, "grad_norm": 0.47736337780952454, "learning_rate": 9.311057108140948e-05, "loss": 1.4612, "step": 1144 }, { "epoch": 0.3475489452117165, "grad_norm": 0.4764254093170166, "learning_rate": 9.310449574726611e-05, "loss": 2.119, "step": 1145 }, { "epoch": 0.34785248140840797, "grad_norm": 0.49367082118988037, "learning_rate": 9.309842041312273e-05, "loss": 1.1816, "step": 1146 }, { "epoch": 0.3481560176050994, "grad_norm": 0.48990949988365173, "learning_rate": 9.309234507897934e-05, "loss": 1.6951, "step": 1147 }, { "epoch": 0.3484595538017909, "grad_norm": 0.6447961330413818, "learning_rate": 9.308626974483597e-05, "loss": 2.0145, "step": 1148 }, { "epoch": 0.3487630899984823, "grad_norm": 0.8322371244430542, "learning_rate": 9.308019441069259e-05, "loss": 1.8679, "step": 1149 }, { "epoch": 0.34906662619517376, "grad_norm": 0.3726497292518616, "learning_rate": 9.307411907654921e-05, "loss": 1.8455, "step": 1150 }, { "epoch": 0.34937016239186525, "grad_norm": 0.3494066298007965, "learning_rate": 9.306804374240584e-05, "loss": 1.7699, "step": 1151 }, { "epoch": 0.3496736985885567, "grad_norm": 0.44510725140571594, "learning_rate": 9.306196840826246e-05, "loss": 1.6315, "step": 1152 }, { "epoch": 0.3499772347852481, "grad_norm": 0.4738346338272095, "learning_rate": 9.305589307411909e-05, "loss": 1.9982, "step": 1153 }, { "epoch": 0.3502807709819396, "grad_norm": 0.6915324330329895, "learning_rate": 9.304981773997569e-05, "loss": 1.6566, "step": 1154 }, { "epoch": 0.35058430717863104, "grad_norm": 0.35767850279808044, "learning_rate": 9.304374240583232e-05, "loss": 1.6553, "step": 1155 }, { "epoch": 0.35088784337532253, "grad_norm": 0.4144536852836609, "learning_rate": 9.303766707168895e-05, "loss": 1.4838, "step": 1156 }, { "epoch": 0.35119137957201396, "grad_norm": 0.42863425612449646, "learning_rate": 9.303159173754557e-05, "loss": 2.0101, "step": 1157 }, { "epoch": 0.3514949157687054, "grad_norm": 0.38044658303260803, "learning_rate": 9.302551640340219e-05, "loss": 2.0358, "step": 1158 }, { "epoch": 0.3517984519653969, "grad_norm": 0.3667512536048889, "learning_rate": 9.301944106925882e-05, "loss": 2.0601, "step": 1159 }, { "epoch": 0.3521019881620883, "grad_norm": 0.4198186993598938, "learning_rate": 9.301336573511544e-05, "loss": 1.8418, "step": 1160 }, { "epoch": 0.3524055243587798, "grad_norm": 0.40647754073143005, "learning_rate": 9.300729040097205e-05, "loss": 1.945, "step": 1161 }, { "epoch": 0.35270906055547124, "grad_norm": 0.4339864253997803, "learning_rate": 9.300121506682868e-05, "loss": 1.8991, "step": 1162 }, { "epoch": 0.3530125967521627, "grad_norm": 0.43949249386787415, "learning_rate": 9.29951397326853e-05, "loss": 1.8012, "step": 1163 }, { "epoch": 0.35331613294885417, "grad_norm": 0.3767072558403015, "learning_rate": 9.298906439854192e-05, "loss": 1.4843, "step": 1164 }, { "epoch": 0.3536196691455456, "grad_norm": 0.34623175859451294, "learning_rate": 9.298298906439855e-05, "loss": 1.7296, "step": 1165 }, { "epoch": 0.35392320534223703, "grad_norm": 0.4682632088661194, "learning_rate": 9.297691373025517e-05, "loss": 1.6166, "step": 1166 }, { "epoch": 0.3542267415389285, "grad_norm": 0.43289923667907715, "learning_rate": 9.29708383961118e-05, "loss": 1.8117, "step": 1167 }, { "epoch": 0.35453027773561996, "grad_norm": 0.40620309114456177, "learning_rate": 9.29647630619684e-05, "loss": 2.2582, "step": 1168 }, { "epoch": 0.35483381393231145, "grad_norm": 0.4079282879829407, "learning_rate": 9.295868772782503e-05, "loss": 1.8266, "step": 1169 }, { "epoch": 0.3551373501290029, "grad_norm": 0.4398365020751953, "learning_rate": 9.295261239368166e-05, "loss": 1.8592, "step": 1170 }, { "epoch": 0.3554408863256943, "grad_norm": 0.41404253244400024, "learning_rate": 9.294653705953828e-05, "loss": 1.4286, "step": 1171 }, { "epoch": 0.3557444225223858, "grad_norm": 0.3746820390224457, "learning_rate": 9.29404617253949e-05, "loss": 1.751, "step": 1172 }, { "epoch": 0.35604795871907724, "grad_norm": 0.3549497723579407, "learning_rate": 9.293438639125153e-05, "loss": 1.1857, "step": 1173 }, { "epoch": 0.35635149491576873, "grad_norm": 0.3803435266017914, "learning_rate": 9.292831105710815e-05, "loss": 1.9532, "step": 1174 }, { "epoch": 0.35665503111246016, "grad_norm": 0.46608418226242065, "learning_rate": 9.292223572296476e-05, "loss": 1.7284, "step": 1175 }, { "epoch": 0.3569585673091516, "grad_norm": 0.3718934953212738, "learning_rate": 9.29161603888214e-05, "loss": 1.8234, "step": 1176 }, { "epoch": 0.3572621035058431, "grad_norm": 0.43626031279563904, "learning_rate": 9.291008505467801e-05, "loss": 1.6322, "step": 1177 }, { "epoch": 0.3575656397025345, "grad_norm": 0.3897557258605957, "learning_rate": 9.290400972053463e-05, "loss": 1.7562, "step": 1178 }, { "epoch": 0.35786917589922596, "grad_norm": 0.4063243269920349, "learning_rate": 9.289793438639126e-05, "loss": 1.7012, "step": 1179 }, { "epoch": 0.35817271209591744, "grad_norm": 0.33185258507728577, "learning_rate": 9.289185905224788e-05, "loss": 1.8933, "step": 1180 }, { "epoch": 0.3584762482926089, "grad_norm": 0.35498881340026855, "learning_rate": 9.288578371810451e-05, "loss": 1.84, "step": 1181 }, { "epoch": 0.35877978448930037, "grad_norm": 0.37165796756744385, "learning_rate": 9.287970838396111e-05, "loss": 2.0435, "step": 1182 }, { "epoch": 0.3590833206859918, "grad_norm": 0.4139983355998993, "learning_rate": 9.287363304981774e-05, "loss": 1.5223, "step": 1183 }, { "epoch": 0.35938685688268324, "grad_norm": 0.4164494276046753, "learning_rate": 9.286755771567437e-05, "loss": 2.125, "step": 1184 }, { "epoch": 0.3596903930793747, "grad_norm": 0.35237401723861694, "learning_rate": 9.286148238153098e-05, "loss": 1.9644, "step": 1185 }, { "epoch": 0.35999392927606616, "grad_norm": 0.4741188883781433, "learning_rate": 9.285540704738761e-05, "loss": 1.7027, "step": 1186 }, { "epoch": 0.36029746547275765, "grad_norm": 0.6668043732643127, "learning_rate": 9.284933171324424e-05, "loss": 1.7109, "step": 1187 }, { "epoch": 0.3606010016694491, "grad_norm": 0.39687463641166687, "learning_rate": 9.284325637910086e-05, "loss": 1.9012, "step": 1188 }, { "epoch": 0.3609045378661405, "grad_norm": 0.39605942368507385, "learning_rate": 9.283718104495747e-05, "loss": 2.0095, "step": 1189 }, { "epoch": 0.361208074062832, "grad_norm": 0.6824695467948914, "learning_rate": 9.28311057108141e-05, "loss": 1.6799, "step": 1190 }, { "epoch": 0.36151161025952344, "grad_norm": 0.31606560945510864, "learning_rate": 9.282503037667072e-05, "loss": 1.4793, "step": 1191 }, { "epoch": 0.36181514645621493, "grad_norm": 0.3778662085533142, "learning_rate": 9.281895504252734e-05, "loss": 1.7399, "step": 1192 }, { "epoch": 0.36211868265290637, "grad_norm": 0.39530149102211, "learning_rate": 9.281287970838397e-05, "loss": 1.7939, "step": 1193 }, { "epoch": 0.3624222188495978, "grad_norm": 0.4434921145439148, "learning_rate": 9.280680437424059e-05, "loss": 1.5982, "step": 1194 }, { "epoch": 0.3627257550462893, "grad_norm": 0.40200692415237427, "learning_rate": 9.280072904009722e-05, "loss": 1.6496, "step": 1195 }, { "epoch": 0.3630292912429807, "grad_norm": 0.3617413341999054, "learning_rate": 9.279465370595382e-05, "loss": 1.9734, "step": 1196 }, { "epoch": 0.36333282743967216, "grad_norm": 0.3840635120868683, "learning_rate": 9.278857837181045e-05, "loss": 2.0827, "step": 1197 }, { "epoch": 0.36363636363636365, "grad_norm": 0.36481353640556335, "learning_rate": 9.278250303766708e-05, "loss": 1.781, "step": 1198 }, { "epoch": 0.3639398998330551, "grad_norm": 0.3300980031490326, "learning_rate": 9.277642770352369e-05, "loss": 1.8124, "step": 1199 }, { "epoch": 0.36424343602974657, "grad_norm": 0.3816182315349579, "learning_rate": 9.277035236938032e-05, "loss": 2.0853, "step": 1200 }, { "epoch": 0.364546972226438, "grad_norm": 0.40531983971595764, "learning_rate": 9.276427703523695e-05, "loss": 1.8582, "step": 1201 }, { "epoch": 0.36485050842312944, "grad_norm": 0.5847654938697815, "learning_rate": 9.275820170109357e-05, "loss": 1.9022, "step": 1202 }, { "epoch": 0.36515404461982093, "grad_norm": 0.37587395310401917, "learning_rate": 9.275212636695018e-05, "loss": 1.5774, "step": 1203 }, { "epoch": 0.36545758081651236, "grad_norm": 0.4057527780532837, "learning_rate": 9.274605103280681e-05, "loss": 1.965, "step": 1204 }, { "epoch": 0.36576111701320385, "grad_norm": 0.7079107761383057, "learning_rate": 9.273997569866343e-05, "loss": 1.721, "step": 1205 }, { "epoch": 0.3660646532098953, "grad_norm": 0.828880786895752, "learning_rate": 9.273390036452005e-05, "loss": 1.8865, "step": 1206 }, { "epoch": 0.3663681894065867, "grad_norm": 0.3704030215740204, "learning_rate": 9.272782503037668e-05, "loss": 2.1905, "step": 1207 }, { "epoch": 0.3666717256032782, "grad_norm": 0.3877900540828705, "learning_rate": 9.27217496962333e-05, "loss": 1.9098, "step": 1208 }, { "epoch": 0.36697526179996964, "grad_norm": 0.39982378482818604, "learning_rate": 9.271567436208991e-05, "loss": 1.948, "step": 1209 }, { "epoch": 0.3672787979966611, "grad_norm": 0.4450254440307617, "learning_rate": 9.270959902794653e-05, "loss": 2.0398, "step": 1210 }, { "epoch": 0.36758233419335257, "grad_norm": 0.4938177168369293, "learning_rate": 9.270352369380316e-05, "loss": 1.9824, "step": 1211 }, { "epoch": 0.367885870390044, "grad_norm": 0.44947531819343567, "learning_rate": 9.26974483596598e-05, "loss": 2.0937, "step": 1212 }, { "epoch": 0.3681894065867355, "grad_norm": 0.5059708952903748, "learning_rate": 9.26913730255164e-05, "loss": 1.5887, "step": 1213 }, { "epoch": 0.3684929427834269, "grad_norm": 0.38562676310539246, "learning_rate": 9.268529769137303e-05, "loss": 1.898, "step": 1214 }, { "epoch": 0.36879647898011836, "grad_norm": 0.8131862282752991, "learning_rate": 9.267922235722966e-05, "loss": 1.9549, "step": 1215 }, { "epoch": 0.36910001517680985, "grad_norm": 0.3856705129146576, "learning_rate": 9.267314702308628e-05, "loss": 1.9469, "step": 1216 }, { "epoch": 0.3694035513735013, "grad_norm": 0.38688114285469055, "learning_rate": 9.26670716889429e-05, "loss": 2.0386, "step": 1217 }, { "epoch": 0.36970708757019277, "grad_norm": 0.4043256938457489, "learning_rate": 9.266099635479952e-05, "loss": 2.0083, "step": 1218 }, { "epoch": 0.3700106237668842, "grad_norm": 2.2425715923309326, "learning_rate": 9.265492102065614e-05, "loss": 1.9006, "step": 1219 }, { "epoch": 0.37031415996357564, "grad_norm": 0.35413646697998047, "learning_rate": 9.264884568651276e-05, "loss": 1.597, "step": 1220 }, { "epoch": 0.37061769616026713, "grad_norm": 0.4035986363887787, "learning_rate": 9.264277035236939e-05, "loss": 1.9021, "step": 1221 }, { "epoch": 0.37092123235695856, "grad_norm": 0.38641074299812317, "learning_rate": 9.263669501822601e-05, "loss": 2.1007, "step": 1222 }, { "epoch": 0.37122476855365, "grad_norm": 0.3715684711933136, "learning_rate": 9.263061968408262e-05, "loss": 1.8929, "step": 1223 }, { "epoch": 0.3715283047503415, "grad_norm": 0.3876987397670746, "learning_rate": 9.262454434993924e-05, "loss": 1.8983, "step": 1224 }, { "epoch": 0.3718318409470329, "grad_norm": 0.4943421483039856, "learning_rate": 9.261846901579587e-05, "loss": 1.7334, "step": 1225 }, { "epoch": 0.3721353771437244, "grad_norm": 0.41828441619873047, "learning_rate": 9.26123936816525e-05, "loss": 2.1366, "step": 1226 }, { "epoch": 0.37243891334041584, "grad_norm": 0.4057375490665436, "learning_rate": 9.260631834750911e-05, "loss": 1.8695, "step": 1227 }, { "epoch": 0.3727424495371073, "grad_norm": 0.37381577491760254, "learning_rate": 9.260024301336574e-05, "loss": 1.8757, "step": 1228 }, { "epoch": 0.37304598573379877, "grad_norm": 0.31567415595054626, "learning_rate": 9.259416767922237e-05, "loss": 1.579, "step": 1229 }, { "epoch": 0.3733495219304902, "grad_norm": 0.3704005181789398, "learning_rate": 9.258809234507899e-05, "loss": 2.0297, "step": 1230 }, { "epoch": 0.3736530581271817, "grad_norm": 0.37612470984458923, "learning_rate": 9.25820170109356e-05, "loss": 1.8501, "step": 1231 }, { "epoch": 0.3739565943238731, "grad_norm": 0.37165501713752747, "learning_rate": 9.257594167679223e-05, "loss": 1.5672, "step": 1232 }, { "epoch": 0.37426013052056456, "grad_norm": 0.9847288131713867, "learning_rate": 9.256986634264885e-05, "loss": 1.4694, "step": 1233 }, { "epoch": 0.37456366671725605, "grad_norm": 0.35515448451042175, "learning_rate": 9.256379100850547e-05, "loss": 1.8861, "step": 1234 }, { "epoch": 0.3748672029139475, "grad_norm": 0.46874669194221497, "learning_rate": 9.25577156743621e-05, "loss": 2.002, "step": 1235 }, { "epoch": 0.3751707391106389, "grad_norm": 0.4635021686553955, "learning_rate": 9.255164034021872e-05, "loss": 1.3803, "step": 1236 }, { "epoch": 0.3754742753073304, "grad_norm": 0.3871179521083832, "learning_rate": 9.254556500607533e-05, "loss": 1.9017, "step": 1237 }, { "epoch": 0.37577781150402184, "grad_norm": 0.3958319127559662, "learning_rate": 9.253948967193195e-05, "loss": 2.0054, "step": 1238 }, { "epoch": 0.37608134770071333, "grad_norm": 0.38364940881729126, "learning_rate": 9.253341433778858e-05, "loss": 1.7178, "step": 1239 }, { "epoch": 0.37638488389740477, "grad_norm": 0.4198092222213745, "learning_rate": 9.252733900364521e-05, "loss": 1.9953, "step": 1240 }, { "epoch": 0.3766884200940962, "grad_norm": 0.46621835231781006, "learning_rate": 9.252126366950182e-05, "loss": 1.7053, "step": 1241 }, { "epoch": 0.3769919562907877, "grad_norm": 0.3871505558490753, "learning_rate": 9.251518833535845e-05, "loss": 1.7543, "step": 1242 }, { "epoch": 0.3772954924874791, "grad_norm": 0.33642569184303284, "learning_rate": 9.250911300121508e-05, "loss": 1.875, "step": 1243 }, { "epoch": 0.3775990286841706, "grad_norm": 0.37663060426712036, "learning_rate": 9.25030376670717e-05, "loss": 2.0942, "step": 1244 }, { "epoch": 0.37790256488086205, "grad_norm": 0.5118516087532043, "learning_rate": 9.249696233292831e-05, "loss": 1.6102, "step": 1245 }, { "epoch": 0.3782061010775535, "grad_norm": 0.37116679549217224, "learning_rate": 9.249088699878494e-05, "loss": 1.8115, "step": 1246 }, { "epoch": 0.37850963727424497, "grad_norm": 0.3737630248069763, "learning_rate": 9.248481166464156e-05, "loss": 1.8607, "step": 1247 }, { "epoch": 0.3788131734709364, "grad_norm": 0.4388040006160736, "learning_rate": 9.247873633049818e-05, "loss": 1.8458, "step": 1248 }, { "epoch": 0.37911670966762784, "grad_norm": 0.401643842458725, "learning_rate": 9.24726609963548e-05, "loss": 2.022, "step": 1249 }, { "epoch": 0.3794202458643193, "grad_norm": 0.4450658857822418, "learning_rate": 9.246658566221143e-05, "loss": 2.082, "step": 1250 }, { "epoch": 0.37972378206101076, "grad_norm": 0.4192996025085449, "learning_rate": 9.246051032806805e-05, "loss": 1.8848, "step": 1251 }, { "epoch": 0.38002731825770225, "grad_norm": 0.4925002157688141, "learning_rate": 9.245443499392466e-05, "loss": 1.9252, "step": 1252 }, { "epoch": 0.3803308544543937, "grad_norm": 0.43910741806030273, "learning_rate": 9.244835965978129e-05, "loss": 1.0955, "step": 1253 }, { "epoch": 0.3806343906510851, "grad_norm": 0.3679327070713043, "learning_rate": 9.244228432563792e-05, "loss": 1.9442, "step": 1254 }, { "epoch": 0.3809379268477766, "grad_norm": 0.353431761264801, "learning_rate": 9.243620899149453e-05, "loss": 2.1047, "step": 1255 }, { "epoch": 0.38124146304446804, "grad_norm": 0.4353777766227722, "learning_rate": 9.243013365735116e-05, "loss": 1.9008, "step": 1256 }, { "epoch": 0.38154499924115953, "grad_norm": 0.5220703482627869, "learning_rate": 9.242405832320779e-05, "loss": 1.9991, "step": 1257 }, { "epoch": 0.38184853543785097, "grad_norm": 0.4233221709728241, "learning_rate": 9.24179829890644e-05, "loss": 1.946, "step": 1258 }, { "epoch": 0.3821520716345424, "grad_norm": 0.4323975145816803, "learning_rate": 9.241190765492102e-05, "loss": 1.2619, "step": 1259 }, { "epoch": 0.3824556078312339, "grad_norm": 0.41842687129974365, "learning_rate": 9.240583232077766e-05, "loss": 1.7097, "step": 1260 }, { "epoch": 0.3827591440279253, "grad_norm": 0.37142685055732727, "learning_rate": 9.239975698663427e-05, "loss": 1.771, "step": 1261 }, { "epoch": 0.38306268022461676, "grad_norm": 0.3784460127353668, "learning_rate": 9.239368165249089e-05, "loss": 1.7196, "step": 1262 }, { "epoch": 0.38336621642130825, "grad_norm": 0.4241008460521698, "learning_rate": 9.238760631834751e-05, "loss": 1.5656, "step": 1263 }, { "epoch": 0.3836697526179997, "grad_norm": 0.4829429090023041, "learning_rate": 9.238153098420414e-05, "loss": 2.0334, "step": 1264 }, { "epoch": 0.38397328881469117, "grad_norm": 0.3593828082084656, "learning_rate": 9.237545565006076e-05, "loss": 1.8076, "step": 1265 }, { "epoch": 0.3842768250113826, "grad_norm": 0.4482446014881134, "learning_rate": 9.236938031591737e-05, "loss": 1.7199, "step": 1266 }, { "epoch": 0.38458036120807404, "grad_norm": 0.3537690043449402, "learning_rate": 9.2363304981774e-05, "loss": 2.027, "step": 1267 }, { "epoch": 0.38488389740476553, "grad_norm": 0.3928816616535187, "learning_rate": 9.235722964763063e-05, "loss": 1.811, "step": 1268 }, { "epoch": 0.38518743360145696, "grad_norm": 0.4176971912384033, "learning_rate": 9.235115431348724e-05, "loss": 1.9443, "step": 1269 }, { "epoch": 0.38549096979814845, "grad_norm": 0.3628327548503876, "learning_rate": 9.234507897934387e-05, "loss": 2.1905, "step": 1270 }, { "epoch": 0.3857945059948399, "grad_norm": 0.40045323967933655, "learning_rate": 9.23390036452005e-05, "loss": 1.8231, "step": 1271 }, { "epoch": 0.3860980421915313, "grad_norm": 0.36478134989738464, "learning_rate": 9.23329283110571e-05, "loss": 1.9453, "step": 1272 }, { "epoch": 0.3864015783882228, "grad_norm": 0.36314669251441956, "learning_rate": 9.232685297691373e-05, "loss": 1.7708, "step": 1273 }, { "epoch": 0.38670511458491424, "grad_norm": 0.44175347685813904, "learning_rate": 9.232077764277037e-05, "loss": 2.1961, "step": 1274 }, { "epoch": 0.38700865078160573, "grad_norm": 0.411424845457077, "learning_rate": 9.231470230862698e-05, "loss": 1.7878, "step": 1275 }, { "epoch": 0.38731218697829717, "grad_norm": 0.4166533648967743, "learning_rate": 9.23086269744836e-05, "loss": 1.9688, "step": 1276 }, { "epoch": 0.3876157231749886, "grad_norm": 0.3575800061225891, "learning_rate": 9.230255164034022e-05, "loss": 1.94, "step": 1277 }, { "epoch": 0.3879192593716801, "grad_norm": 0.37767383456230164, "learning_rate": 9.229647630619685e-05, "loss": 2.1039, "step": 1278 }, { "epoch": 0.3882227955683715, "grad_norm": 0.5323564410209656, "learning_rate": 9.229040097205347e-05, "loss": 2.1214, "step": 1279 }, { "epoch": 0.38852633176506296, "grad_norm": 0.37731266021728516, "learning_rate": 9.228432563791008e-05, "loss": 1.5483, "step": 1280 }, { "epoch": 0.38882986796175445, "grad_norm": 0.3776138126850128, "learning_rate": 9.227825030376671e-05, "loss": 1.8353, "step": 1281 }, { "epoch": 0.3891334041584459, "grad_norm": 0.40437617897987366, "learning_rate": 9.227217496962333e-05, "loss": 1.7512, "step": 1282 }, { "epoch": 0.3894369403551374, "grad_norm": 0.4422746002674103, "learning_rate": 9.226609963547995e-05, "loss": 1.8497, "step": 1283 }, { "epoch": 0.3897404765518288, "grad_norm": 0.3358697295188904, "learning_rate": 9.226002430133658e-05, "loss": 1.8597, "step": 1284 }, { "epoch": 0.39004401274852024, "grad_norm": 0.8504922986030579, "learning_rate": 9.225394896719321e-05, "loss": 1.4645, "step": 1285 }, { "epoch": 0.39034754894521173, "grad_norm": 0.35627248883247375, "learning_rate": 9.224787363304981e-05, "loss": 2.0882, "step": 1286 }, { "epoch": 0.39065108514190316, "grad_norm": 0.3549906015396118, "learning_rate": 9.224179829890644e-05, "loss": 1.1979, "step": 1287 }, { "epoch": 0.39095462133859465, "grad_norm": 0.41105836629867554, "learning_rate": 9.223572296476308e-05, "loss": 1.5483, "step": 1288 }, { "epoch": 0.3912581575352861, "grad_norm": 0.38563552498817444, "learning_rate": 9.222964763061969e-05, "loss": 1.6972, "step": 1289 }, { "epoch": 0.3915616937319775, "grad_norm": 0.6308382749557495, "learning_rate": 9.222357229647631e-05, "loss": 1.7006, "step": 1290 }, { "epoch": 0.391865229928669, "grad_norm": 0.4371561110019684, "learning_rate": 9.221749696233293e-05, "loss": 1.838, "step": 1291 }, { "epoch": 0.39216876612536045, "grad_norm": 0.421274334192276, "learning_rate": 9.221142162818956e-05, "loss": 2.0326, "step": 1292 }, { "epoch": 0.3924723023220519, "grad_norm": 0.39431074261665344, "learning_rate": 9.220534629404618e-05, "loss": 1.9654, "step": 1293 }, { "epoch": 0.39277583851874337, "grad_norm": 0.3802948594093323, "learning_rate": 9.219927095990279e-05, "loss": 1.8311, "step": 1294 }, { "epoch": 0.3930793747154348, "grad_norm": 0.44941607117652893, "learning_rate": 9.219319562575942e-05, "loss": 1.879, "step": 1295 }, { "epoch": 0.3933829109121263, "grad_norm": 0.395014226436615, "learning_rate": 9.218712029161604e-05, "loss": 1.5158, "step": 1296 }, { "epoch": 0.3936864471088177, "grad_norm": 0.3692936599254608, "learning_rate": 9.218104495747266e-05, "loss": 2.1428, "step": 1297 }, { "epoch": 0.39398998330550916, "grad_norm": 0.41931676864624023, "learning_rate": 9.217496962332929e-05, "loss": 1.5854, "step": 1298 }, { "epoch": 0.39429351950220065, "grad_norm": 0.4195273816585541, "learning_rate": 9.216889428918592e-05, "loss": 1.4504, "step": 1299 }, { "epoch": 0.3945970556988921, "grad_norm": 0.4246782958507538, "learning_rate": 9.216281895504252e-05, "loss": 1.5243, "step": 1300 }, { "epoch": 0.3949005918955836, "grad_norm": 0.3366101086139679, "learning_rate": 9.215674362089915e-05, "loss": 1.5563, "step": 1301 }, { "epoch": 0.395204128092275, "grad_norm": 0.5027258992195129, "learning_rate": 9.215066828675579e-05, "loss": 1.8705, "step": 1302 }, { "epoch": 0.39550766428896644, "grad_norm": 0.33939701318740845, "learning_rate": 9.21445929526124e-05, "loss": 1.5192, "step": 1303 }, { "epoch": 0.39581120048565793, "grad_norm": 0.5187803506851196, "learning_rate": 9.213851761846902e-05, "loss": 1.5153, "step": 1304 }, { "epoch": 0.39611473668234937, "grad_norm": 0.43332159519195557, "learning_rate": 9.213244228432564e-05, "loss": 1.9878, "step": 1305 }, { "epoch": 0.3964182728790408, "grad_norm": 0.371183842420578, "learning_rate": 9.212636695018227e-05, "loss": 1.9458, "step": 1306 }, { "epoch": 0.3967218090757323, "grad_norm": 0.40977227687835693, "learning_rate": 9.212029161603889e-05, "loss": 2.17, "step": 1307 }, { "epoch": 0.3970253452724237, "grad_norm": 0.37145286798477173, "learning_rate": 9.21142162818955e-05, "loss": 1.9565, "step": 1308 }, { "epoch": 0.3973288814691152, "grad_norm": 0.45751968026161194, "learning_rate": 9.210814094775213e-05, "loss": 1.7469, "step": 1309 }, { "epoch": 0.39763241766580665, "grad_norm": 0.39320966601371765, "learning_rate": 9.210206561360875e-05, "loss": 2.2532, "step": 1310 }, { "epoch": 0.3979359538624981, "grad_norm": 0.44945451617240906, "learning_rate": 9.209599027946537e-05, "loss": 1.896, "step": 1311 }, { "epoch": 0.39823949005918957, "grad_norm": 0.4179849326610565, "learning_rate": 9.2089914945322e-05, "loss": 2.0536, "step": 1312 }, { "epoch": 0.398543026255881, "grad_norm": 0.3893973231315613, "learning_rate": 9.208383961117863e-05, "loss": 1.5888, "step": 1313 }, { "epoch": 0.3988465624525725, "grad_norm": 0.4161340892314911, "learning_rate": 9.207776427703523e-05, "loss": 2.071, "step": 1314 }, { "epoch": 0.39915009864926393, "grad_norm": 0.37969034910202026, "learning_rate": 9.207168894289186e-05, "loss": 1.7566, "step": 1315 }, { "epoch": 0.39945363484595536, "grad_norm": 0.4157601296901703, "learning_rate": 9.20656136087485e-05, "loss": 2.1174, "step": 1316 }, { "epoch": 0.39975717104264685, "grad_norm": 0.3726348876953125, "learning_rate": 9.205953827460511e-05, "loss": 1.9701, "step": 1317 }, { "epoch": 0.4000607072393383, "grad_norm": 0.39407408237457275, "learning_rate": 9.205346294046173e-05, "loss": 1.6288, "step": 1318 }, { "epoch": 0.4003642434360297, "grad_norm": 0.37205028533935547, "learning_rate": 9.204738760631835e-05, "loss": 1.801, "step": 1319 }, { "epoch": 0.4006677796327212, "grad_norm": 0.39794477820396423, "learning_rate": 9.204131227217498e-05, "loss": 2.1164, "step": 1320 }, { "epoch": 0.40097131582941264, "grad_norm": 0.4078124761581421, "learning_rate": 9.20352369380316e-05, "loss": 1.8339, "step": 1321 }, { "epoch": 0.40127485202610413, "grad_norm": 0.4183814227581024, "learning_rate": 9.202916160388821e-05, "loss": 1.7706, "step": 1322 }, { "epoch": 0.40157838822279557, "grad_norm": 0.6123658418655396, "learning_rate": 9.202308626974484e-05, "loss": 1.7745, "step": 1323 }, { "epoch": 0.401881924419487, "grad_norm": 0.36878085136413574, "learning_rate": 9.201701093560146e-05, "loss": 1.2733, "step": 1324 }, { "epoch": 0.4021854606161785, "grad_norm": 0.3583606779575348, "learning_rate": 9.201093560145808e-05, "loss": 1.3474, "step": 1325 }, { "epoch": 0.4024889968128699, "grad_norm": 0.4098053574562073, "learning_rate": 9.200486026731471e-05, "loss": 1.845, "step": 1326 }, { "epoch": 0.4027925330095614, "grad_norm": 0.5891076326370239, "learning_rate": 9.199878493317134e-05, "loss": 2.0312, "step": 1327 }, { "epoch": 0.40309606920625285, "grad_norm": 0.5270339250564575, "learning_rate": 9.199270959902794e-05, "loss": 1.6679, "step": 1328 }, { "epoch": 0.4033996054029443, "grad_norm": 0.4184766411781311, "learning_rate": 9.198663426488457e-05, "loss": 2.1587, "step": 1329 }, { "epoch": 0.4037031415996358, "grad_norm": 0.3945539593696594, "learning_rate": 9.198055893074119e-05, "loss": 1.9374, "step": 1330 }, { "epoch": 0.4040066777963272, "grad_norm": 0.3906068205833435, "learning_rate": 9.197448359659781e-05, "loss": 1.4602, "step": 1331 }, { "epoch": 0.40431021399301864, "grad_norm": 0.4073745906352997, "learning_rate": 9.196840826245444e-05, "loss": 2.0568, "step": 1332 }, { "epoch": 0.40461375018971013, "grad_norm": 0.35083553194999695, "learning_rate": 9.196233292831106e-05, "loss": 1.5915, "step": 1333 }, { "epoch": 0.40491728638640156, "grad_norm": 0.38344746828079224, "learning_rate": 9.195625759416769e-05, "loss": 1.8988, "step": 1334 }, { "epoch": 0.40522082258309305, "grad_norm": 0.8442848920822144, "learning_rate": 9.19501822600243e-05, "loss": 1.6229, "step": 1335 }, { "epoch": 0.4055243587797845, "grad_norm": 0.41683101654052734, "learning_rate": 9.194410692588092e-05, "loss": 1.6984, "step": 1336 }, { "epoch": 0.4058278949764759, "grad_norm": 0.43008947372436523, "learning_rate": 9.193803159173755e-05, "loss": 1.8312, "step": 1337 }, { "epoch": 0.4061314311731674, "grad_norm": 0.43498390913009644, "learning_rate": 9.193195625759417e-05, "loss": 1.735, "step": 1338 }, { "epoch": 0.40643496736985885, "grad_norm": 0.361969530582428, "learning_rate": 9.192588092345079e-05, "loss": 2.015, "step": 1339 }, { "epoch": 0.40673850356655034, "grad_norm": 0.4028913378715515, "learning_rate": 9.191980558930742e-05, "loss": 1.9139, "step": 1340 }, { "epoch": 0.40704203976324177, "grad_norm": 0.46840906143188477, "learning_rate": 9.191373025516405e-05, "loss": 1.913, "step": 1341 }, { "epoch": 0.4073455759599332, "grad_norm": 0.39075982570648193, "learning_rate": 9.190765492102065e-05, "loss": 1.8908, "step": 1342 }, { "epoch": 0.4076491121566247, "grad_norm": 0.3519285023212433, "learning_rate": 9.190157958687728e-05, "loss": 1.8016, "step": 1343 }, { "epoch": 0.4079526483533161, "grad_norm": 0.43734681606292725, "learning_rate": 9.18955042527339e-05, "loss": 1.5545, "step": 1344 }, { "epoch": 0.40825618455000756, "grad_norm": 0.4044792950153351, "learning_rate": 9.188942891859052e-05, "loss": 1.836, "step": 1345 }, { "epoch": 0.40855972074669905, "grad_norm": 0.3661639988422394, "learning_rate": 9.188335358444715e-05, "loss": 2.0347, "step": 1346 }, { "epoch": 0.4088632569433905, "grad_norm": 0.46894827485084534, "learning_rate": 9.187727825030377e-05, "loss": 1.5674, "step": 1347 }, { "epoch": 0.409166793140082, "grad_norm": 0.4389861226081848, "learning_rate": 9.18712029161604e-05, "loss": 2.0472, "step": 1348 }, { "epoch": 0.4094703293367734, "grad_norm": 0.3514555096626282, "learning_rate": 9.186512758201702e-05, "loss": 1.7453, "step": 1349 }, { "epoch": 0.40977386553346484, "grad_norm": 0.35691046714782715, "learning_rate": 9.185905224787363e-05, "loss": 2.2705, "step": 1350 }, { "epoch": 0.41007740173015633, "grad_norm": 0.3770231604576111, "learning_rate": 9.185297691373026e-05, "loss": 1.9172, "step": 1351 }, { "epoch": 0.41038093792684777, "grad_norm": 0.40932852029800415, "learning_rate": 9.184690157958688e-05, "loss": 1.8489, "step": 1352 }, { "epoch": 0.41068447412353926, "grad_norm": 0.39512181282043457, "learning_rate": 9.18408262454435e-05, "loss": 1.7082, "step": 1353 }, { "epoch": 0.4109880103202307, "grad_norm": 0.33877119421958923, "learning_rate": 9.183475091130013e-05, "loss": 1.7629, "step": 1354 }, { "epoch": 0.4112915465169221, "grad_norm": 0.4188339412212372, "learning_rate": 9.182867557715675e-05, "loss": 1.4726, "step": 1355 }, { "epoch": 0.4115950827136136, "grad_norm": 0.3661527931690216, "learning_rate": 9.182260024301336e-05, "loss": 1.7752, "step": 1356 }, { "epoch": 0.41189861891030505, "grad_norm": 0.3717115819454193, "learning_rate": 9.181652490887e-05, "loss": 1.6128, "step": 1357 }, { "epoch": 0.4122021551069965, "grad_norm": 0.783671498298645, "learning_rate": 9.181044957472661e-05, "loss": 1.3576, "step": 1358 }, { "epoch": 0.41250569130368797, "grad_norm": 0.5294111967086792, "learning_rate": 9.180437424058323e-05, "loss": 1.6084, "step": 1359 }, { "epoch": 0.4128092275003794, "grad_norm": 0.42108646035194397, "learning_rate": 9.179829890643986e-05, "loss": 2.0071, "step": 1360 }, { "epoch": 0.4131127636970709, "grad_norm": 0.3648010492324829, "learning_rate": 9.179222357229648e-05, "loss": 1.9791, "step": 1361 }, { "epoch": 0.41341629989376233, "grad_norm": 0.31227168440818787, "learning_rate": 9.178614823815311e-05, "loss": 1.811, "step": 1362 }, { "epoch": 0.41371983609045376, "grad_norm": 0.34013819694519043, "learning_rate": 9.178007290400973e-05, "loss": 1.6035, "step": 1363 }, { "epoch": 0.41402337228714525, "grad_norm": 0.3848358392715454, "learning_rate": 9.177399756986634e-05, "loss": 1.7878, "step": 1364 }, { "epoch": 0.4143269084838367, "grad_norm": 0.33737045526504517, "learning_rate": 9.176792223572297e-05, "loss": 1.5855, "step": 1365 }, { "epoch": 0.4146304446805282, "grad_norm": 0.3722662329673767, "learning_rate": 9.176184690157959e-05, "loss": 1.4857, "step": 1366 }, { "epoch": 0.4149339808772196, "grad_norm": 0.38730594515800476, "learning_rate": 9.175577156743621e-05, "loss": 1.858, "step": 1367 }, { "epoch": 0.41523751707391104, "grad_norm": 0.34036555886268616, "learning_rate": 9.174969623329284e-05, "loss": 1.4445, "step": 1368 }, { "epoch": 0.41554105327060253, "grad_norm": 0.395327091217041, "learning_rate": 9.174362089914946e-05, "loss": 1.5194, "step": 1369 }, { "epoch": 0.41584458946729397, "grad_norm": 0.4212843179702759, "learning_rate": 9.173754556500607e-05, "loss": 2.3923, "step": 1370 }, { "epoch": 0.41614812566398546, "grad_norm": 0.33540114760398865, "learning_rate": 9.17314702308627e-05, "loss": 1.9129, "step": 1371 }, { "epoch": 0.4164516618606769, "grad_norm": 0.43007227778434753, "learning_rate": 9.172539489671932e-05, "loss": 1.7658, "step": 1372 }, { "epoch": 0.4167551980573683, "grad_norm": 0.3466784358024597, "learning_rate": 9.171931956257594e-05, "loss": 1.8647, "step": 1373 }, { "epoch": 0.4170587342540598, "grad_norm": 0.35446929931640625, "learning_rate": 9.171324422843257e-05, "loss": 1.5265, "step": 1374 }, { "epoch": 0.41736227045075125, "grad_norm": 0.3868924081325531, "learning_rate": 9.170716889428919e-05, "loss": 1.6355, "step": 1375 }, { "epoch": 0.4176658066474427, "grad_norm": 0.39501097798347473, "learning_rate": 9.170109356014582e-05, "loss": 1.7426, "step": 1376 }, { "epoch": 0.4179693428441342, "grad_norm": 0.44299614429473877, "learning_rate": 9.169501822600244e-05, "loss": 1.8582, "step": 1377 }, { "epoch": 0.4182728790408256, "grad_norm": 0.438927561044693, "learning_rate": 9.168894289185905e-05, "loss": 1.9043, "step": 1378 }, { "epoch": 0.4185764152375171, "grad_norm": 0.3874059319496155, "learning_rate": 9.168286755771568e-05, "loss": 1.2591, "step": 1379 }, { "epoch": 0.41887995143420853, "grad_norm": 0.40715524554252625, "learning_rate": 9.16767922235723e-05, "loss": 1.9779, "step": 1380 }, { "epoch": 0.41918348763089996, "grad_norm": 0.3737177848815918, "learning_rate": 9.167071688942892e-05, "loss": 1.97, "step": 1381 }, { "epoch": 0.41948702382759145, "grad_norm": 0.3690639138221741, "learning_rate": 9.166464155528555e-05, "loss": 1.9618, "step": 1382 }, { "epoch": 0.4197905600242829, "grad_norm": 1.3496792316436768, "learning_rate": 9.165856622114217e-05, "loss": 2.1141, "step": 1383 }, { "epoch": 0.4200940962209744, "grad_norm": 0.39503785967826843, "learning_rate": 9.165249088699878e-05, "loss": 1.8984, "step": 1384 }, { "epoch": 0.4203976324176658, "grad_norm": 0.33576783537864685, "learning_rate": 9.164641555285541e-05, "loss": 1.8536, "step": 1385 }, { "epoch": 0.42070116861435725, "grad_norm": 0.8600859642028809, "learning_rate": 9.164034021871203e-05, "loss": 1.3809, "step": 1386 }, { "epoch": 0.42100470481104874, "grad_norm": 0.3842533230781555, "learning_rate": 9.163426488456865e-05, "loss": 1.6408, "step": 1387 }, { "epoch": 0.42130824100774017, "grad_norm": 0.4019504487514496, "learning_rate": 9.162818955042528e-05, "loss": 1.9738, "step": 1388 }, { "epoch": 0.4216117772044316, "grad_norm": 0.399406760931015, "learning_rate": 9.16221142162819e-05, "loss": 1.9897, "step": 1389 }, { "epoch": 0.4219153134011231, "grad_norm": 0.35225290060043335, "learning_rate": 9.161603888213853e-05, "loss": 1.8217, "step": 1390 }, { "epoch": 0.4222188495978145, "grad_norm": 0.3673458695411682, "learning_rate": 9.160996354799515e-05, "loss": 1.9175, "step": 1391 }, { "epoch": 0.422522385794506, "grad_norm": 0.37856656312942505, "learning_rate": 9.160388821385176e-05, "loss": 1.8, "step": 1392 }, { "epoch": 0.42282592199119745, "grad_norm": 0.3543725907802582, "learning_rate": 9.15978128797084e-05, "loss": 2.0975, "step": 1393 }, { "epoch": 0.4231294581878889, "grad_norm": 0.34620916843414307, "learning_rate": 9.159173754556501e-05, "loss": 1.803, "step": 1394 }, { "epoch": 0.4234329943845804, "grad_norm": 0.428543359041214, "learning_rate": 9.158566221142163e-05, "loss": 1.8852, "step": 1395 }, { "epoch": 0.4237365305812718, "grad_norm": 0.41286444664001465, "learning_rate": 9.157958687727826e-05, "loss": 1.7661, "step": 1396 }, { "epoch": 0.4240400667779633, "grad_norm": 0.42155444622039795, "learning_rate": 9.157351154313488e-05, "loss": 1.9728, "step": 1397 }, { "epoch": 0.42434360297465473, "grad_norm": 0.4446348547935486, "learning_rate": 9.15674362089915e-05, "loss": 1.6411, "step": 1398 }, { "epoch": 0.42464713917134617, "grad_norm": 0.38683468103408813, "learning_rate": 9.156136087484812e-05, "loss": 1.9311, "step": 1399 }, { "epoch": 0.42495067536803766, "grad_norm": 0.398798406124115, "learning_rate": 9.155528554070474e-05, "loss": 1.867, "step": 1400 }, { "epoch": 0.4252542115647291, "grad_norm": 0.3723427951335907, "learning_rate": 9.154921020656136e-05, "loss": 2.1345, "step": 1401 }, { "epoch": 0.4255577477614205, "grad_norm": 0.3853520452976227, "learning_rate": 9.154313487241799e-05, "loss": 1.4843, "step": 1402 }, { "epoch": 0.425861283958112, "grad_norm": 0.4148903489112854, "learning_rate": 9.153705953827461e-05, "loss": 1.9043, "step": 1403 }, { "epoch": 0.42616482015480345, "grad_norm": 0.4134661555290222, "learning_rate": 9.153098420413124e-05, "loss": 2.0662, "step": 1404 }, { "epoch": 0.42646835635149494, "grad_norm": 0.4663408100605011, "learning_rate": 9.152490886998786e-05, "loss": 1.8455, "step": 1405 }, { "epoch": 0.42677189254818637, "grad_norm": 0.3829919695854187, "learning_rate": 9.151883353584447e-05, "loss": 1.7822, "step": 1406 }, { "epoch": 0.4270754287448778, "grad_norm": 0.3487464487552643, "learning_rate": 9.15127582017011e-05, "loss": 1.9024, "step": 1407 }, { "epoch": 0.4273789649415693, "grad_norm": 0.4045817255973816, "learning_rate": 9.150668286755772e-05, "loss": 1.6833, "step": 1408 }, { "epoch": 0.42768250113826073, "grad_norm": 0.4237898588180542, "learning_rate": 9.150060753341434e-05, "loss": 2.0711, "step": 1409 }, { "epoch": 0.4279860373349522, "grad_norm": 0.3947038948535919, "learning_rate": 9.149453219927097e-05, "loss": 1.6692, "step": 1410 }, { "epoch": 0.42828957353164365, "grad_norm": 0.373927503824234, "learning_rate": 9.148845686512759e-05, "loss": 2.1039, "step": 1411 }, { "epoch": 0.4285931097283351, "grad_norm": 0.45322802662849426, "learning_rate": 9.14823815309842e-05, "loss": 1.4528, "step": 1412 }, { "epoch": 0.4288966459250266, "grad_norm": 0.4237847328186035, "learning_rate": 9.147630619684083e-05, "loss": 1.8462, "step": 1413 }, { "epoch": 0.429200182121718, "grad_norm": 0.39840593934059143, "learning_rate": 9.147023086269745e-05, "loss": 1.8354, "step": 1414 }, { "epoch": 0.42950371831840944, "grad_norm": 0.3490378260612488, "learning_rate": 9.146415552855407e-05, "loss": 1.9182, "step": 1415 }, { "epoch": 0.42980725451510093, "grad_norm": 0.37050196528434753, "learning_rate": 9.14580801944107e-05, "loss": 2.1893, "step": 1416 }, { "epoch": 0.43011079071179237, "grad_norm": 0.7810099720954895, "learning_rate": 9.145200486026732e-05, "loss": 1.7489, "step": 1417 }, { "epoch": 0.43041432690848386, "grad_norm": 0.35921812057495117, "learning_rate": 9.144592952612393e-05, "loss": 1.9051, "step": 1418 }, { "epoch": 0.4307178631051753, "grad_norm": 0.42429161071777344, "learning_rate": 9.143985419198057e-05, "loss": 1.4487, "step": 1419 }, { "epoch": 0.4310213993018667, "grad_norm": 0.37482765316963196, "learning_rate": 9.143377885783718e-05, "loss": 1.7627, "step": 1420 }, { "epoch": 0.4313249354985582, "grad_norm": 0.39142462611198425, "learning_rate": 9.142770352369381e-05, "loss": 1.7077, "step": 1421 }, { "epoch": 0.43162847169524965, "grad_norm": 0.33967357873916626, "learning_rate": 9.142162818955043e-05, "loss": 1.7666, "step": 1422 }, { "epoch": 0.43193200789194114, "grad_norm": 0.3520660400390625, "learning_rate": 9.141555285540705e-05, "loss": 2.1716, "step": 1423 }, { "epoch": 0.4322355440886326, "grad_norm": 0.3724939823150635, "learning_rate": 9.140947752126368e-05, "loss": 1.8, "step": 1424 }, { "epoch": 0.432539080285324, "grad_norm": 0.37572380900382996, "learning_rate": 9.14034021871203e-05, "loss": 1.8956, "step": 1425 }, { "epoch": 0.4328426164820155, "grad_norm": 0.38770124316215515, "learning_rate": 9.139732685297691e-05, "loss": 1.6381, "step": 1426 }, { "epoch": 0.43314615267870693, "grad_norm": 0.5836375951766968, "learning_rate": 9.139125151883354e-05, "loss": 1.9264, "step": 1427 }, { "epoch": 0.43344968887539836, "grad_norm": 0.44695645570755005, "learning_rate": 9.138517618469016e-05, "loss": 1.7427, "step": 1428 }, { "epoch": 0.43375322507208985, "grad_norm": 0.3857296407222748, "learning_rate": 9.137910085054678e-05, "loss": 1.5448, "step": 1429 }, { "epoch": 0.4340567612687813, "grad_norm": 0.417868971824646, "learning_rate": 9.137302551640341e-05, "loss": 1.9924, "step": 1430 }, { "epoch": 0.4343602974654728, "grad_norm": 0.42111891508102417, "learning_rate": 9.136695018226003e-05, "loss": 1.9506, "step": 1431 }, { "epoch": 0.4346638336621642, "grad_norm": 0.4096441864967346, "learning_rate": 9.136087484811664e-05, "loss": 1.6159, "step": 1432 }, { "epoch": 0.43496736985885565, "grad_norm": 0.4601602852344513, "learning_rate": 9.135479951397328e-05, "loss": 1.5702, "step": 1433 }, { "epoch": 0.43527090605554714, "grad_norm": 0.4030752182006836, "learning_rate": 9.134872417982989e-05, "loss": 1.9785, "step": 1434 }, { "epoch": 0.43557444225223857, "grad_norm": 0.4357512593269348, "learning_rate": 9.134264884568652e-05, "loss": 1.8718, "step": 1435 }, { "epoch": 0.43587797844893006, "grad_norm": 0.3511190712451935, "learning_rate": 9.133657351154314e-05, "loss": 1.9243, "step": 1436 }, { "epoch": 0.4361815146456215, "grad_norm": 0.4554003179073334, "learning_rate": 9.133049817739976e-05, "loss": 1.5576, "step": 1437 }, { "epoch": 0.4364850508423129, "grad_norm": 0.37637701630592346, "learning_rate": 9.132442284325639e-05, "loss": 2.3059, "step": 1438 }, { "epoch": 0.4367885870390044, "grad_norm": 0.39330780506134033, "learning_rate": 9.1318347509113e-05, "loss": 1.4548, "step": 1439 }, { "epoch": 0.43709212323569585, "grad_norm": 0.44056230783462524, "learning_rate": 9.131227217496962e-05, "loss": 1.7839, "step": 1440 }, { "epoch": 0.4373956594323873, "grad_norm": 1.5862314701080322, "learning_rate": 9.130619684082625e-05, "loss": 1.6374, "step": 1441 }, { "epoch": 0.4376991956290788, "grad_norm": 0.44076550006866455, "learning_rate": 9.130012150668287e-05, "loss": 2.0375, "step": 1442 }, { "epoch": 0.4380027318257702, "grad_norm": 0.46712005138397217, "learning_rate": 9.129404617253949e-05, "loss": 2.0498, "step": 1443 }, { "epoch": 0.4383062680224617, "grad_norm": 0.4472239315509796, "learning_rate": 9.128797083839612e-05, "loss": 2.1283, "step": 1444 }, { "epoch": 0.43860980421915313, "grad_norm": 0.46304264664649963, "learning_rate": 9.128189550425274e-05, "loss": 1.9628, "step": 1445 }, { "epoch": 0.43891334041584457, "grad_norm": 0.4066753387451172, "learning_rate": 9.127582017010935e-05, "loss": 1.4578, "step": 1446 }, { "epoch": 0.43921687661253606, "grad_norm": 0.4386885166168213, "learning_rate": 9.126974483596599e-05, "loss": 1.8655, "step": 1447 }, { "epoch": 0.4395204128092275, "grad_norm": 0.5175670981407166, "learning_rate": 9.12636695018226e-05, "loss": 1.9672, "step": 1448 }, { "epoch": 0.439823949005919, "grad_norm": 0.39056891202926636, "learning_rate": 9.125759416767923e-05, "loss": 2.2645, "step": 1449 }, { "epoch": 0.4401274852026104, "grad_norm": 0.3297121822834015, "learning_rate": 9.125151883353585e-05, "loss": 1.651, "step": 1450 }, { "epoch": 0.44043102139930185, "grad_norm": 0.37801650166511536, "learning_rate": 9.124544349939247e-05, "loss": 1.938, "step": 1451 }, { "epoch": 0.44073455759599334, "grad_norm": 0.45800700783729553, "learning_rate": 9.12393681652491e-05, "loss": 1.8465, "step": 1452 }, { "epoch": 0.44103809379268477, "grad_norm": 0.42198505997657776, "learning_rate": 9.123329283110572e-05, "loss": 1.9878, "step": 1453 }, { "epoch": 0.44134162998937626, "grad_norm": 0.9476953744888306, "learning_rate": 9.122721749696233e-05, "loss": 2.127, "step": 1454 }, { "epoch": 0.4416451661860677, "grad_norm": 0.6569995880126953, "learning_rate": 9.122114216281896e-05, "loss": 2.1351, "step": 1455 }, { "epoch": 0.44194870238275913, "grad_norm": 0.7246467471122742, "learning_rate": 9.121506682867558e-05, "loss": 1.9843, "step": 1456 }, { "epoch": 0.4422522385794506, "grad_norm": 0.3717383146286011, "learning_rate": 9.12089914945322e-05, "loss": 1.7456, "step": 1457 }, { "epoch": 0.44255577477614205, "grad_norm": 0.39930397272109985, "learning_rate": 9.120291616038883e-05, "loss": 2.1122, "step": 1458 }, { "epoch": 0.4428593109728335, "grad_norm": 0.4484943747520447, "learning_rate": 9.119684082624545e-05, "loss": 1.8622, "step": 1459 }, { "epoch": 0.443162847169525, "grad_norm": 0.45804062485694885, "learning_rate": 9.119076549210206e-05, "loss": 2.0503, "step": 1460 }, { "epoch": 0.4434663833662164, "grad_norm": 0.381073921918869, "learning_rate": 9.11846901579587e-05, "loss": 1.8256, "step": 1461 }, { "epoch": 0.4437699195629079, "grad_norm": 0.4491977095603943, "learning_rate": 9.117861482381531e-05, "loss": 1.6401, "step": 1462 }, { "epoch": 0.44407345575959933, "grad_norm": 0.3925999104976654, "learning_rate": 9.117253948967194e-05, "loss": 1.9513, "step": 1463 }, { "epoch": 0.44437699195629077, "grad_norm": 0.45975080132484436, "learning_rate": 9.116646415552856e-05, "loss": 1.7791, "step": 1464 }, { "epoch": 0.44468052815298226, "grad_norm": 0.44993898272514343, "learning_rate": 9.116038882138518e-05, "loss": 0.6258, "step": 1465 }, { "epoch": 0.4449840643496737, "grad_norm": 0.4088694453239441, "learning_rate": 9.115431348724181e-05, "loss": 1.768, "step": 1466 }, { "epoch": 0.4452876005463652, "grad_norm": 0.5844696760177612, "learning_rate": 9.114823815309841e-05, "loss": 1.7229, "step": 1467 }, { "epoch": 0.4455911367430566, "grad_norm": 0.5378713607788086, "learning_rate": 9.114216281895504e-05, "loss": 1.8761, "step": 1468 }, { "epoch": 0.44589467293974805, "grad_norm": 1.5058445930480957, "learning_rate": 9.113608748481167e-05, "loss": 2.0215, "step": 1469 }, { "epoch": 0.44619820913643954, "grad_norm": 0.44474056363105774, "learning_rate": 9.113001215066829e-05, "loss": 1.9797, "step": 1470 }, { "epoch": 0.446501745333131, "grad_norm": 0.4373909831047058, "learning_rate": 9.112393681652491e-05, "loss": 2.1042, "step": 1471 }, { "epoch": 0.4468052815298224, "grad_norm": 0.5322824716567993, "learning_rate": 9.111786148238154e-05, "loss": 1.6328, "step": 1472 }, { "epoch": 0.4471088177265139, "grad_norm": 0.4264838993549347, "learning_rate": 9.111178614823816e-05, "loss": 1.9764, "step": 1473 }, { "epoch": 0.44741235392320533, "grad_norm": 0.39688101410865784, "learning_rate": 9.110571081409478e-05, "loss": 1.8385, "step": 1474 }, { "epoch": 0.4477158901198968, "grad_norm": 0.3781752586364746, "learning_rate": 9.10996354799514e-05, "loss": 2.1128, "step": 1475 }, { "epoch": 0.44801942631658825, "grad_norm": 0.40686413645744324, "learning_rate": 9.109356014580802e-05, "loss": 2.033, "step": 1476 }, { "epoch": 0.4483229625132797, "grad_norm": 0.42852646112442017, "learning_rate": 9.108748481166465e-05, "loss": 1.714, "step": 1477 }, { "epoch": 0.4486264987099712, "grad_norm": 0.3613603413105011, "learning_rate": 9.108140947752127e-05, "loss": 1.2022, "step": 1478 }, { "epoch": 0.4489300349066626, "grad_norm": 0.4005518853664398, "learning_rate": 9.107533414337789e-05, "loss": 1.9914, "step": 1479 }, { "epoch": 0.4492335711033541, "grad_norm": 0.3479957580566406, "learning_rate": 9.106925880923452e-05, "loss": 1.7423, "step": 1480 }, { "epoch": 0.44953710730004554, "grad_norm": 0.43999946117401123, "learning_rate": 9.106318347509112e-05, "loss": 2.0009, "step": 1481 }, { "epoch": 0.44984064349673697, "grad_norm": 0.36132052540779114, "learning_rate": 9.105710814094775e-05, "loss": 1.916, "step": 1482 }, { "epoch": 0.45014417969342846, "grad_norm": 0.33822518587112427, "learning_rate": 9.105103280680438e-05, "loss": 1.6778, "step": 1483 }, { "epoch": 0.4504477158901199, "grad_norm": 0.35278624296188354, "learning_rate": 9.1044957472661e-05, "loss": 1.8943, "step": 1484 }, { "epoch": 0.4507512520868113, "grad_norm": 0.47397172451019287, "learning_rate": 9.103888213851762e-05, "loss": 1.7541, "step": 1485 }, { "epoch": 0.4510547882835028, "grad_norm": 0.3714633882045746, "learning_rate": 9.103280680437425e-05, "loss": 2.026, "step": 1486 }, { "epoch": 0.45135832448019425, "grad_norm": 1.6088794469833374, "learning_rate": 9.102673147023087e-05, "loss": 1.8904, "step": 1487 }, { "epoch": 0.45166186067688574, "grad_norm": 0.39234501123428345, "learning_rate": 9.102065613608749e-05, "loss": 1.9149, "step": 1488 }, { "epoch": 0.4519653968735772, "grad_norm": 0.4205072224140167, "learning_rate": 9.101458080194412e-05, "loss": 2.0117, "step": 1489 }, { "epoch": 0.4522689330702686, "grad_norm": 0.45428764820098877, "learning_rate": 9.100850546780073e-05, "loss": 1.9742, "step": 1490 }, { "epoch": 0.4525724692669601, "grad_norm": 0.35566025972366333, "learning_rate": 9.100243013365735e-05, "loss": 1.8445, "step": 1491 }, { "epoch": 0.45287600546365153, "grad_norm": 0.4020955562591553, "learning_rate": 9.099635479951398e-05, "loss": 1.9665, "step": 1492 }, { "epoch": 0.453179541660343, "grad_norm": 0.6123180985450745, "learning_rate": 9.09902794653706e-05, "loss": 1.6361, "step": 1493 }, { "epoch": 0.45348307785703446, "grad_norm": 0.44139203429222107, "learning_rate": 9.098420413122723e-05, "loss": 1.8156, "step": 1494 }, { "epoch": 0.4537866140537259, "grad_norm": 0.4224632680416107, "learning_rate": 9.097812879708383e-05, "loss": 1.8907, "step": 1495 }, { "epoch": 0.4540901502504174, "grad_norm": 0.40932169556617737, "learning_rate": 9.097205346294046e-05, "loss": 1.9179, "step": 1496 }, { "epoch": 0.4543936864471088, "grad_norm": 0.41995543241500854, "learning_rate": 9.09659781287971e-05, "loss": 1.7627, "step": 1497 }, { "epoch": 0.45469722264380025, "grad_norm": 0.33541586995124817, "learning_rate": 9.095990279465371e-05, "loss": 2.0334, "step": 1498 }, { "epoch": 0.45500075884049174, "grad_norm": 0.426469624042511, "learning_rate": 9.095382746051033e-05, "loss": 2.0636, "step": 1499 }, { "epoch": 0.45530429503718317, "grad_norm": 0.4037235379219055, "learning_rate": 9.094775212636696e-05, "loss": 1.9431, "step": 1500 }, { "epoch": 0.45560783123387466, "grad_norm": 0.35326942801475525, "learning_rate": 9.094167679222358e-05, "loss": 1.9306, "step": 1501 }, { "epoch": 0.4559113674305661, "grad_norm": 0.3722929358482361, "learning_rate": 9.09356014580802e-05, "loss": 1.2554, "step": 1502 }, { "epoch": 0.45621490362725753, "grad_norm": 0.5637504458427429, "learning_rate": 9.092952612393683e-05, "loss": 2.2883, "step": 1503 }, { "epoch": 0.456518439823949, "grad_norm": 0.4601937532424927, "learning_rate": 9.092345078979344e-05, "loss": 1.8051, "step": 1504 }, { "epoch": 0.45682197602064045, "grad_norm": 0.4153023660182953, "learning_rate": 9.091737545565006e-05, "loss": 2.045, "step": 1505 }, { "epoch": 0.45712551221733194, "grad_norm": 0.34770330786705017, "learning_rate": 9.091130012150668e-05, "loss": 1.8672, "step": 1506 }, { "epoch": 0.4574290484140234, "grad_norm": 0.3669261932373047, "learning_rate": 9.090522478736331e-05, "loss": 1.7184, "step": 1507 }, { "epoch": 0.4577325846107148, "grad_norm": 0.4862425923347473, "learning_rate": 9.089914945321994e-05, "loss": 1.7152, "step": 1508 }, { "epoch": 0.4580361208074063, "grad_norm": 0.39143872261047363, "learning_rate": 9.089307411907654e-05, "loss": 1.6634, "step": 1509 }, { "epoch": 0.45833965700409773, "grad_norm": 0.48413509130477905, "learning_rate": 9.088699878493317e-05, "loss": 1.8655, "step": 1510 }, { "epoch": 0.45864319320078917, "grad_norm": 0.438029944896698, "learning_rate": 9.08809234507898e-05, "loss": 1.8924, "step": 1511 }, { "epoch": 0.45894672939748066, "grad_norm": 0.39872634410858154, "learning_rate": 9.087484811664642e-05, "loss": 2.0113, "step": 1512 }, { "epoch": 0.4592502655941721, "grad_norm": 0.6361043453216553, "learning_rate": 9.086877278250304e-05, "loss": 2.004, "step": 1513 }, { "epoch": 0.4595538017908636, "grad_norm": 0.35867545008659363, "learning_rate": 9.086269744835967e-05, "loss": 1.6438, "step": 1514 }, { "epoch": 0.459857337987555, "grad_norm": 0.375430166721344, "learning_rate": 9.085662211421629e-05, "loss": 1.6383, "step": 1515 }, { "epoch": 0.46016087418424645, "grad_norm": 1.4054492712020874, "learning_rate": 9.08505467800729e-05, "loss": 1.6688, "step": 1516 }, { "epoch": 0.46046441038093794, "grad_norm": 0.35748517513275146, "learning_rate": 9.084447144592954e-05, "loss": 1.8517, "step": 1517 }, { "epoch": 0.4607679465776294, "grad_norm": 0.3136993944644928, "learning_rate": 9.083839611178615e-05, "loss": 2.0279, "step": 1518 }, { "epoch": 0.46107148277432086, "grad_norm": 0.39442840218544006, "learning_rate": 9.083232077764277e-05, "loss": 2.0558, "step": 1519 }, { "epoch": 0.4613750189710123, "grad_norm": 0.3278721272945404, "learning_rate": 9.082624544349939e-05, "loss": 1.8702, "step": 1520 }, { "epoch": 0.46167855516770373, "grad_norm": 0.6478224992752075, "learning_rate": 9.082017010935602e-05, "loss": 1.9689, "step": 1521 }, { "epoch": 0.4619820913643952, "grad_norm": 0.39185309410095215, "learning_rate": 9.081409477521265e-05, "loss": 1.871, "step": 1522 }, { "epoch": 0.46228562756108665, "grad_norm": 0.4506731927394867, "learning_rate": 9.080801944106925e-05, "loss": 1.3312, "step": 1523 }, { "epoch": 0.4625891637577781, "grad_norm": 0.36045706272125244, "learning_rate": 9.080194410692588e-05, "loss": 1.4391, "step": 1524 }, { "epoch": 0.4628926999544696, "grad_norm": 0.40836915373802185, "learning_rate": 9.079586877278252e-05, "loss": 2.0619, "step": 1525 }, { "epoch": 0.463196236151161, "grad_norm": 0.39617446064949036, "learning_rate": 9.078979343863913e-05, "loss": 1.3619, "step": 1526 }, { "epoch": 0.4634997723478525, "grad_norm": 0.41966769099235535, "learning_rate": 9.078371810449575e-05, "loss": 1.9004, "step": 1527 }, { "epoch": 0.46380330854454394, "grad_norm": 0.39979109168052673, "learning_rate": 9.077764277035238e-05, "loss": 1.8401, "step": 1528 }, { "epoch": 0.46410684474123537, "grad_norm": 0.3719238042831421, "learning_rate": 9.0771567436209e-05, "loss": 1.8935, "step": 1529 }, { "epoch": 0.46441038093792686, "grad_norm": 0.4243968427181244, "learning_rate": 9.076549210206562e-05, "loss": 1.9741, "step": 1530 }, { "epoch": 0.4647139171346183, "grad_norm": 0.46206673979759216, "learning_rate": 9.075941676792225e-05, "loss": 1.929, "step": 1531 }, { "epoch": 0.4650174533313098, "grad_norm": 0.49223679304122925, "learning_rate": 9.075334143377886e-05, "loss": 1.5711, "step": 1532 }, { "epoch": 0.4653209895280012, "grad_norm": 0.40891674160957336, "learning_rate": 9.074726609963548e-05, "loss": 1.3376, "step": 1533 }, { "epoch": 0.46562452572469265, "grad_norm": 0.4146333336830139, "learning_rate": 9.07411907654921e-05, "loss": 1.9774, "step": 1534 }, { "epoch": 0.46592806192138414, "grad_norm": 0.39834362268447876, "learning_rate": 9.073511543134873e-05, "loss": 1.8253, "step": 1535 }, { "epoch": 0.4662315981180756, "grad_norm": 0.4415489137172699, "learning_rate": 9.072904009720536e-05, "loss": 2.0604, "step": 1536 }, { "epoch": 0.466535134314767, "grad_norm": 0.40179288387298584, "learning_rate": 9.072296476306196e-05, "loss": 2.0014, "step": 1537 }, { "epoch": 0.4668386705114585, "grad_norm": 0.3849865794181824, "learning_rate": 9.07168894289186e-05, "loss": 2.0134, "step": 1538 }, { "epoch": 0.46714220670814993, "grad_norm": 0.4075673520565033, "learning_rate": 9.071081409477523e-05, "loss": 1.7784, "step": 1539 }, { "epoch": 0.4674457429048414, "grad_norm": 0.3913114368915558, "learning_rate": 9.070473876063183e-05, "loss": 2.0035, "step": 1540 }, { "epoch": 0.46774927910153286, "grad_norm": 1.1502317190170288, "learning_rate": 9.069866342648846e-05, "loss": 1.9697, "step": 1541 }, { "epoch": 0.4680528152982243, "grad_norm": 0.3618208169937134, "learning_rate": 9.069258809234509e-05, "loss": 1.8629, "step": 1542 }, { "epoch": 0.4683563514949158, "grad_norm": 0.5658997297286987, "learning_rate": 9.068651275820171e-05, "loss": 1.9923, "step": 1543 }, { "epoch": 0.4686598876916072, "grad_norm": 0.6084100008010864, "learning_rate": 9.068043742405833e-05, "loss": 2.2139, "step": 1544 }, { "epoch": 0.4689634238882987, "grad_norm": 0.45577460527420044, "learning_rate": 9.067436208991496e-05, "loss": 1.602, "step": 1545 }, { "epoch": 0.46926696008499014, "grad_norm": 0.38301292061805725, "learning_rate": 9.066828675577157e-05, "loss": 1.7644, "step": 1546 }, { "epoch": 0.46957049628168157, "grad_norm": 0.42755424976348877, "learning_rate": 9.066221142162819e-05, "loss": 1.7429, "step": 1547 }, { "epoch": 0.46987403247837306, "grad_norm": 0.3980792760848999, "learning_rate": 9.065613608748481e-05, "loss": 1.8362, "step": 1548 }, { "epoch": 0.4701775686750645, "grad_norm": 0.41398894786834717, "learning_rate": 9.065006075334144e-05, "loss": 1.7865, "step": 1549 }, { "epoch": 0.470481104871756, "grad_norm": 0.416704922914505, "learning_rate": 9.064398541919807e-05, "loss": 2.1474, "step": 1550 }, { "epoch": 0.4707846410684474, "grad_norm": 0.3613940477371216, "learning_rate": 9.063791008505467e-05, "loss": 1.9861, "step": 1551 }, { "epoch": 0.47108817726513885, "grad_norm": 0.3752197027206421, "learning_rate": 9.06318347509113e-05, "loss": 1.5374, "step": 1552 }, { "epoch": 0.47139171346183034, "grad_norm": 0.3436816930770874, "learning_rate": 9.062575941676794e-05, "loss": 2.0983, "step": 1553 }, { "epoch": 0.4716952496585218, "grad_norm": 0.40420001745224, "learning_rate": 9.061968408262454e-05, "loss": 1.8447, "step": 1554 }, { "epoch": 0.4719987858552132, "grad_norm": 0.5580700039863586, "learning_rate": 9.061360874848117e-05, "loss": 1.4499, "step": 1555 }, { "epoch": 0.4723023220519047, "grad_norm": 0.42122432589530945, "learning_rate": 9.06075334143378e-05, "loss": 1.9475, "step": 1556 }, { "epoch": 0.47260585824859613, "grad_norm": 0.37578698992729187, "learning_rate": 9.060145808019442e-05, "loss": 2.2064, "step": 1557 }, { "epoch": 0.4729093944452876, "grad_norm": 0.3756474554538727, "learning_rate": 9.059538274605104e-05, "loss": 1.6932, "step": 1558 }, { "epoch": 0.47321293064197906, "grad_norm": 0.45632341504096985, "learning_rate": 9.058930741190767e-05, "loss": 2.0437, "step": 1559 }, { "epoch": 0.4735164668386705, "grad_norm": 0.41071614623069763, "learning_rate": 9.058323207776428e-05, "loss": 1.9358, "step": 1560 }, { "epoch": 0.473820003035362, "grad_norm": 0.5713014006614685, "learning_rate": 9.05771567436209e-05, "loss": 1.9766, "step": 1561 }, { "epoch": 0.4741235392320534, "grad_norm": 0.3685849606990814, "learning_rate": 9.057108140947752e-05, "loss": 1.9811, "step": 1562 }, { "epoch": 0.4744270754287449, "grad_norm": 0.4106161594390869, "learning_rate": 9.056500607533415e-05, "loss": 1.6636, "step": 1563 }, { "epoch": 0.47473061162543634, "grad_norm": 0.4227912724018097, "learning_rate": 9.055893074119077e-05, "loss": 2.1302, "step": 1564 }, { "epoch": 0.4750341478221278, "grad_norm": 0.6117652058601379, "learning_rate": 9.055285540704738e-05, "loss": 1.9783, "step": 1565 }, { "epoch": 0.47533768401881926, "grad_norm": 0.34352535009384155, "learning_rate": 9.054678007290401e-05, "loss": 1.5251, "step": 1566 }, { "epoch": 0.4756412202155107, "grad_norm": 0.4252207577228546, "learning_rate": 9.054070473876065e-05, "loss": 2.1315, "step": 1567 }, { "epoch": 0.47594475641220213, "grad_norm": 0.4429045617580414, "learning_rate": 9.053462940461725e-05, "loss": 2.2023, "step": 1568 }, { "epoch": 0.4762482926088936, "grad_norm": 0.36126387119293213, "learning_rate": 9.052855407047388e-05, "loss": 2.0909, "step": 1569 }, { "epoch": 0.47655182880558505, "grad_norm": 0.40380343794822693, "learning_rate": 9.052247873633051e-05, "loss": 1.614, "step": 1570 }, { "epoch": 0.47685536500227654, "grad_norm": 0.37091997265815735, "learning_rate": 9.051640340218713e-05, "loss": 2.0191, "step": 1571 }, { "epoch": 0.477158901198968, "grad_norm": 0.3446311354637146, "learning_rate": 9.051032806804375e-05, "loss": 1.9633, "step": 1572 }, { "epoch": 0.4774624373956594, "grad_norm": 0.37436115741729736, "learning_rate": 9.050425273390038e-05, "loss": 1.7945, "step": 1573 }, { "epoch": 0.4777659735923509, "grad_norm": 0.36618462204933167, "learning_rate": 9.0498177399757e-05, "loss": 1.838, "step": 1574 }, { "epoch": 0.47806950978904234, "grad_norm": 0.4327848255634308, "learning_rate": 9.049210206561361e-05, "loss": 1.8218, "step": 1575 }, { "epoch": 0.4783730459857338, "grad_norm": 0.33957040309906006, "learning_rate": 9.048602673147023e-05, "loss": 1.3948, "step": 1576 }, { "epoch": 0.47867658218242526, "grad_norm": 0.34288668632507324, "learning_rate": 9.047995139732686e-05, "loss": 1.856, "step": 1577 }, { "epoch": 0.4789801183791167, "grad_norm": 0.42410871386528015, "learning_rate": 9.047387606318348e-05, "loss": 1.6138, "step": 1578 }, { "epoch": 0.4792836545758082, "grad_norm": 0.325130432844162, "learning_rate": 9.04678007290401e-05, "loss": 1.5631, "step": 1579 }, { "epoch": 0.4795871907724996, "grad_norm": 0.46126997470855713, "learning_rate": 9.046172539489672e-05, "loss": 1.8045, "step": 1580 }, { "epoch": 0.47989072696919105, "grad_norm": 0.4125445783138275, "learning_rate": 9.045565006075336e-05, "loss": 1.9054, "step": 1581 }, { "epoch": 0.48019426316588254, "grad_norm": 0.3341776430606842, "learning_rate": 9.044957472660996e-05, "loss": 1.9269, "step": 1582 }, { "epoch": 0.480497799362574, "grad_norm": 0.37623131275177, "learning_rate": 9.044349939246659e-05, "loss": 1.8621, "step": 1583 }, { "epoch": 0.48080133555926546, "grad_norm": 0.42698100209236145, "learning_rate": 9.043742405832322e-05, "loss": 1.9414, "step": 1584 }, { "epoch": 0.4811048717559569, "grad_norm": 0.39322131872177124, "learning_rate": 9.043134872417984e-05, "loss": 1.6427, "step": 1585 }, { "epoch": 0.48140840795264833, "grad_norm": 0.4348810315132141, "learning_rate": 9.042527339003646e-05, "loss": 2.0169, "step": 1586 }, { "epoch": 0.4817119441493398, "grad_norm": 0.42536425590515137, "learning_rate": 9.041919805589307e-05, "loss": 1.79, "step": 1587 }, { "epoch": 0.48201548034603126, "grad_norm": 0.35837772488594055, "learning_rate": 9.04131227217497e-05, "loss": 2.0152, "step": 1588 }, { "epoch": 0.48231901654272274, "grad_norm": 0.4053284525871277, "learning_rate": 9.040704738760632e-05, "loss": 1.9912, "step": 1589 }, { "epoch": 0.4826225527394142, "grad_norm": 1.0780633687973022, "learning_rate": 9.040097205346294e-05, "loss": 2.3151, "step": 1590 }, { "epoch": 0.4829260889361056, "grad_norm": 0.3571546673774719, "learning_rate": 9.039489671931957e-05, "loss": 2.007, "step": 1591 }, { "epoch": 0.4832296251327971, "grad_norm": 1.1343384981155396, "learning_rate": 9.038882138517619e-05, "loss": 2.2369, "step": 1592 }, { "epoch": 0.48353316132948854, "grad_norm": 0.43974751234054565, "learning_rate": 9.03827460510328e-05, "loss": 2.1774, "step": 1593 }, { "epoch": 0.48383669752617997, "grad_norm": 0.5721186995506287, "learning_rate": 9.037667071688943e-05, "loss": 1.9005, "step": 1594 }, { "epoch": 0.48414023372287146, "grad_norm": 0.43478089570999146, "learning_rate": 9.037059538274607e-05, "loss": 1.8163, "step": 1595 }, { "epoch": 0.4844437699195629, "grad_norm": 0.4186250865459442, "learning_rate": 9.036452004860267e-05, "loss": 1.5557, "step": 1596 }, { "epoch": 0.4847473061162544, "grad_norm": 0.363033264875412, "learning_rate": 9.03584447144593e-05, "loss": 1.9102, "step": 1597 }, { "epoch": 0.4850508423129458, "grad_norm": 0.39680740237236023, "learning_rate": 9.035236938031593e-05, "loss": 1.8232, "step": 1598 }, { "epoch": 0.48535437850963725, "grad_norm": 0.3754984736442566, "learning_rate": 9.034629404617255e-05, "loss": 1.7743, "step": 1599 }, { "epoch": 0.48565791470632874, "grad_norm": 0.4426131546497345, "learning_rate": 9.034021871202917e-05, "loss": 1.8806, "step": 1600 }, { "epoch": 0.4859614509030202, "grad_norm": 0.37828418612480164, "learning_rate": 9.033414337788578e-05, "loss": 1.7686, "step": 1601 }, { "epoch": 0.48626498709971167, "grad_norm": 0.44848862290382385, "learning_rate": 9.032806804374241e-05, "loss": 1.6695, "step": 1602 }, { "epoch": 0.4865685232964031, "grad_norm": 0.357838898897171, "learning_rate": 9.032199270959903e-05, "loss": 1.8404, "step": 1603 }, { "epoch": 0.48687205949309453, "grad_norm": 0.6578190326690674, "learning_rate": 9.031591737545565e-05, "loss": 1.3599, "step": 1604 }, { "epoch": 0.487175595689786, "grad_norm": 0.4240557849407196, "learning_rate": 9.030984204131228e-05, "loss": 1.3183, "step": 1605 }, { "epoch": 0.48747913188647746, "grad_norm": 0.4170602262020111, "learning_rate": 9.03037667071689e-05, "loss": 2.193, "step": 1606 }, { "epoch": 0.4877826680831689, "grad_norm": 0.39807751774787903, "learning_rate": 9.029769137302551e-05, "loss": 1.5821, "step": 1607 }, { "epoch": 0.4880862042798604, "grad_norm": 0.54439777135849, "learning_rate": 9.029161603888214e-05, "loss": 1.6293, "step": 1608 }, { "epoch": 0.4883897404765518, "grad_norm": 0.39446118474006653, "learning_rate": 9.028554070473878e-05, "loss": 1.6712, "step": 1609 }, { "epoch": 0.4886932766732433, "grad_norm": 0.42656177282333374, "learning_rate": 9.027946537059538e-05, "loss": 1.7296, "step": 1610 }, { "epoch": 0.48899681286993474, "grad_norm": 0.4832558333873749, "learning_rate": 9.027339003645201e-05, "loss": 1.8888, "step": 1611 }, { "epoch": 0.4893003490666262, "grad_norm": 0.44794905185699463, "learning_rate": 9.026731470230864e-05, "loss": 1.6074, "step": 1612 }, { "epoch": 0.48960388526331766, "grad_norm": 0.344939649105072, "learning_rate": 9.026123936816524e-05, "loss": 1.9943, "step": 1613 }, { "epoch": 0.4899074214600091, "grad_norm": 0.42949387431144714, "learning_rate": 9.025516403402188e-05, "loss": 1.9922, "step": 1614 }, { "epoch": 0.4902109576567006, "grad_norm": 0.39325597882270813, "learning_rate": 9.024908869987849e-05, "loss": 1.8447, "step": 1615 }, { "epoch": 0.490514493853392, "grad_norm": 0.3923071622848511, "learning_rate": 9.024301336573512e-05, "loss": 1.7969, "step": 1616 }, { "epoch": 0.49081803005008345, "grad_norm": 0.3386680483818054, "learning_rate": 9.023693803159174e-05, "loss": 1.9222, "step": 1617 }, { "epoch": 0.49112156624677494, "grad_norm": 0.40206924080848694, "learning_rate": 9.023086269744836e-05, "loss": 2.1749, "step": 1618 }, { "epoch": 0.4914251024434664, "grad_norm": 0.36428967118263245, "learning_rate": 9.022478736330499e-05, "loss": 1.4163, "step": 1619 }, { "epoch": 0.4917286386401578, "grad_norm": 0.4516347050666809, "learning_rate": 9.02187120291616e-05, "loss": 1.8412, "step": 1620 }, { "epoch": 0.4920321748368493, "grad_norm": 0.40233004093170166, "learning_rate": 9.021263669501822e-05, "loss": 1.6124, "step": 1621 }, { "epoch": 0.49233571103354073, "grad_norm": 0.4065000116825104, "learning_rate": 9.020656136087485e-05, "loss": 1.7479, "step": 1622 }, { "epoch": 0.4926392472302322, "grad_norm": 0.42242977023124695, "learning_rate": 9.020048602673149e-05, "loss": 2.3126, "step": 1623 }, { "epoch": 0.49294278342692366, "grad_norm": 0.3774438500404358, "learning_rate": 9.019441069258809e-05, "loss": 2.0075, "step": 1624 }, { "epoch": 0.4932463196236151, "grad_norm": 0.3382234275341034, "learning_rate": 9.018833535844472e-05, "loss": 1.8028, "step": 1625 }, { "epoch": 0.4935498558203066, "grad_norm": 0.443689227104187, "learning_rate": 9.018226002430135e-05, "loss": 1.9506, "step": 1626 }, { "epoch": 0.493853392016998, "grad_norm": 0.32814332842826843, "learning_rate": 9.017618469015795e-05, "loss": 1.85, "step": 1627 }, { "epoch": 0.4941569282136895, "grad_norm": 0.691228449344635, "learning_rate": 9.017010935601459e-05, "loss": 1.3521, "step": 1628 }, { "epoch": 0.49446046441038094, "grad_norm": 0.43137383460998535, "learning_rate": 9.01640340218712e-05, "loss": 1.9107, "step": 1629 }, { "epoch": 0.4947640006070724, "grad_norm": 0.3534761965274811, "learning_rate": 9.015795868772783e-05, "loss": 1.3167, "step": 1630 }, { "epoch": 0.49506753680376386, "grad_norm": 0.5987849831581116, "learning_rate": 9.015188335358445e-05, "loss": 2.0258, "step": 1631 }, { "epoch": 0.4953710730004553, "grad_norm": 0.38956066966056824, "learning_rate": 9.014580801944107e-05, "loss": 1.5787, "step": 1632 }, { "epoch": 0.4956746091971468, "grad_norm": 0.43218016624450684, "learning_rate": 9.01397326852977e-05, "loss": 1.4582, "step": 1633 }, { "epoch": 0.4959781453938382, "grad_norm": 0.8035671710968018, "learning_rate": 9.013365735115432e-05, "loss": 2.2415, "step": 1634 }, { "epoch": 0.49628168159052966, "grad_norm": 0.41837078332901, "learning_rate": 9.012758201701093e-05, "loss": 1.9213, "step": 1635 }, { "epoch": 0.49658521778722114, "grad_norm": 0.48308447003364563, "learning_rate": 9.012150668286756e-05, "loss": 1.5812, "step": 1636 }, { "epoch": 0.4968887539839126, "grad_norm": 0.4080790877342224, "learning_rate": 9.011543134872418e-05, "loss": 1.8055, "step": 1637 }, { "epoch": 0.497192290180604, "grad_norm": 0.409053772687912, "learning_rate": 9.01093560145808e-05, "loss": 2.0126, "step": 1638 }, { "epoch": 0.4974958263772955, "grad_norm": 0.41290226578712463, "learning_rate": 9.010328068043743e-05, "loss": 1.8663, "step": 1639 }, { "epoch": 0.49779936257398694, "grad_norm": 0.36996471881866455, "learning_rate": 9.009720534629406e-05, "loss": 1.9887, "step": 1640 }, { "epoch": 0.4981028987706784, "grad_norm": 0.4474611282348633, "learning_rate": 9.009113001215066e-05, "loss": 1.6636, "step": 1641 }, { "epoch": 0.49840643496736986, "grad_norm": 0.3717537224292755, "learning_rate": 9.00850546780073e-05, "loss": 1.7292, "step": 1642 }, { "epoch": 0.4987099711640613, "grad_norm": 0.6839573979377747, "learning_rate": 9.007897934386391e-05, "loss": 2.1037, "step": 1643 }, { "epoch": 0.4990135073607528, "grad_norm": 0.3877841532230377, "learning_rate": 9.007290400972054e-05, "loss": 2.1324, "step": 1644 }, { "epoch": 0.4993170435574442, "grad_norm": 0.42409414052963257, "learning_rate": 9.006682867557716e-05, "loss": 2.0867, "step": 1645 }, { "epoch": 0.4996205797541357, "grad_norm": 0.38519206643104553, "learning_rate": 9.006075334143378e-05, "loss": 2.0589, "step": 1646 }, { "epoch": 0.49992411595082714, "grad_norm": 0.3910469710826874, "learning_rate": 9.005467800729041e-05, "loss": 1.7952, "step": 1647 }, { "epoch": 0.5002276521475186, "grad_norm": 0.3802652359008789, "learning_rate": 9.004860267314703e-05, "loss": 1.8394, "step": 1648 }, { "epoch": 0.5005311883442101, "grad_norm": 1.9837124347686768, "learning_rate": 9.004252733900364e-05, "loss": 1.9703, "step": 1649 }, { "epoch": 0.5008347245409015, "grad_norm": 0.40731772780418396, "learning_rate": 9.003645200486027e-05, "loss": 1.665, "step": 1650 }, { "epoch": 0.5011382607375929, "grad_norm": 0.4358116686344147, "learning_rate": 9.003037667071689e-05, "loss": 1.369, "step": 1651 }, { "epoch": 0.5014417969342844, "grad_norm": 0.49716782569885254, "learning_rate": 9.002430133657351e-05, "loss": 2.2221, "step": 1652 }, { "epoch": 0.5017453331309759, "grad_norm": 0.41779419779777527, "learning_rate": 9.001822600243014e-05, "loss": 2.0265, "step": 1653 }, { "epoch": 0.5020488693276673, "grad_norm": 0.40375036001205444, "learning_rate": 9.001215066828677e-05, "loss": 1.9761, "step": 1654 }, { "epoch": 0.5023524055243588, "grad_norm": 0.3802977204322815, "learning_rate": 9.000607533414337e-05, "loss": 1.76, "step": 1655 }, { "epoch": 0.5026559417210502, "grad_norm": 0.33772045373916626, "learning_rate": 9e-05, "loss": 1.4531, "step": 1656 }, { "epoch": 0.5029594779177416, "grad_norm": 0.4556722640991211, "learning_rate": 8.999392466585662e-05, "loss": 1.5091, "step": 1657 }, { "epoch": 0.5032630141144332, "grad_norm": 0.37798872590065, "learning_rate": 8.998784933171325e-05, "loss": 1.378, "step": 1658 }, { "epoch": 0.5035665503111246, "grad_norm": 0.3921298086643219, "learning_rate": 8.998177399756987e-05, "loss": 1.901, "step": 1659 }, { "epoch": 0.5038700865078161, "grad_norm": 0.39993181824684143, "learning_rate": 8.997569866342649e-05, "loss": 1.9796, "step": 1660 }, { "epoch": 0.5041736227045075, "grad_norm": 0.41690680384635925, "learning_rate": 8.996962332928312e-05, "loss": 1.6252, "step": 1661 }, { "epoch": 0.5044771589011989, "grad_norm": 0.4252752363681793, "learning_rate": 8.996354799513974e-05, "loss": 1.9233, "step": 1662 }, { "epoch": 0.5047806950978905, "grad_norm": 0.43236085772514343, "learning_rate": 8.995747266099635e-05, "loss": 1.5527, "step": 1663 }, { "epoch": 0.5050842312945819, "grad_norm": 0.32605788111686707, "learning_rate": 8.995139732685298e-05, "loss": 1.8349, "step": 1664 }, { "epoch": 0.5053877674912733, "grad_norm": 0.8619269728660583, "learning_rate": 8.99453219927096e-05, "loss": 1.3305, "step": 1665 }, { "epoch": 0.5056913036879648, "grad_norm": 0.429949551820755, "learning_rate": 8.993924665856622e-05, "loss": 1.531, "step": 1666 }, { "epoch": 0.5059948398846562, "grad_norm": 0.38018864393234253, "learning_rate": 8.993317132442285e-05, "loss": 1.4132, "step": 1667 }, { "epoch": 0.5062983760813476, "grad_norm": 0.411668986082077, "learning_rate": 8.992709599027948e-05, "loss": 2.0736, "step": 1668 }, { "epoch": 0.5066019122780392, "grad_norm": 0.41500651836395264, "learning_rate": 8.992102065613608e-05, "loss": 2.0055, "step": 1669 }, { "epoch": 0.5069054484747306, "grad_norm": 0.3659593164920807, "learning_rate": 8.991494532199272e-05, "loss": 1.8854, "step": 1670 }, { "epoch": 0.5072089846714221, "grad_norm": 0.4081539809703827, "learning_rate": 8.990886998784933e-05, "loss": 1.9027, "step": 1671 }, { "epoch": 0.5075125208681135, "grad_norm": 0.4111250340938568, "learning_rate": 8.990279465370596e-05, "loss": 1.7838, "step": 1672 }, { "epoch": 0.5078160570648049, "grad_norm": 0.37269532680511475, "learning_rate": 8.989671931956258e-05, "loss": 1.8353, "step": 1673 }, { "epoch": 0.5081195932614965, "grad_norm": 0.4204343259334564, "learning_rate": 8.98906439854192e-05, "loss": 1.5011, "step": 1674 }, { "epoch": 0.5084231294581879, "grad_norm": 0.4515773355960846, "learning_rate": 8.988456865127583e-05, "loss": 1.5303, "step": 1675 }, { "epoch": 0.5087266656548793, "grad_norm": 0.44019004702568054, "learning_rate": 8.987849331713245e-05, "loss": 2.0108, "step": 1676 }, { "epoch": 0.5090302018515708, "grad_norm": 0.47351813316345215, "learning_rate": 8.987241798298906e-05, "loss": 2.0518, "step": 1677 }, { "epoch": 0.5093337380482622, "grad_norm": 0.40282347798347473, "learning_rate": 8.98663426488457e-05, "loss": 1.99, "step": 1678 }, { "epoch": 0.5096372742449538, "grad_norm": 0.49869832396507263, "learning_rate": 8.986026731470231e-05, "loss": 1.9592, "step": 1679 }, { "epoch": 0.5099408104416452, "grad_norm": 0.36178889870643616, "learning_rate": 8.985419198055893e-05, "loss": 1.927, "step": 1680 }, { "epoch": 0.5102443466383366, "grad_norm": 0.3670339584350586, "learning_rate": 8.984811664641556e-05, "loss": 1.9516, "step": 1681 }, { "epoch": 0.510547882835028, "grad_norm": 0.3458341658115387, "learning_rate": 8.984204131227218e-05, "loss": 1.8203, "step": 1682 }, { "epoch": 0.5108514190317195, "grad_norm": 0.4636301100254059, "learning_rate": 8.98359659781288e-05, "loss": 1.7146, "step": 1683 }, { "epoch": 0.511154955228411, "grad_norm": 0.45436516404151917, "learning_rate": 8.982989064398543e-05, "loss": 1.8514, "step": 1684 }, { "epoch": 0.5114584914251025, "grad_norm": 0.46940287947654724, "learning_rate": 8.982381530984204e-05, "loss": 2.0162, "step": 1685 }, { "epoch": 0.5117620276217939, "grad_norm": 0.4405171573162079, "learning_rate": 8.981773997569866e-05, "loss": 2.0003, "step": 1686 }, { "epoch": 0.5120655638184853, "grad_norm": 0.4306286871433258, "learning_rate": 8.981166464155529e-05, "loss": 1.8338, "step": 1687 }, { "epoch": 0.5123691000151768, "grad_norm": 0.43476733565330505, "learning_rate": 8.980558930741191e-05, "loss": 1.623, "step": 1688 }, { "epoch": 0.5126726362118683, "grad_norm": 0.3655628561973572, "learning_rate": 8.979951397326854e-05, "loss": 2.098, "step": 1689 }, { "epoch": 0.5129761724085597, "grad_norm": 0.36685287952423096, "learning_rate": 8.979343863912516e-05, "loss": 1.9533, "step": 1690 }, { "epoch": 0.5132797086052512, "grad_norm": 0.4131629765033722, "learning_rate": 8.978736330498177e-05, "loss": 1.8093, "step": 1691 }, { "epoch": 0.5135832448019426, "grad_norm": 0.36607033014297485, "learning_rate": 8.97812879708384e-05, "loss": 1.4293, "step": 1692 }, { "epoch": 0.513886780998634, "grad_norm": 0.4478306174278259, "learning_rate": 8.977521263669502e-05, "loss": 1.916, "step": 1693 }, { "epoch": 0.5141903171953256, "grad_norm": 0.4570290446281433, "learning_rate": 8.976913730255164e-05, "loss": 1.7859, "step": 1694 }, { "epoch": 0.514493853392017, "grad_norm": 0.46024757623672485, "learning_rate": 8.976306196840827e-05, "loss": 1.6032, "step": 1695 }, { "epoch": 0.5147973895887085, "grad_norm": 0.40080446004867554, "learning_rate": 8.975698663426489e-05, "loss": 1.7693, "step": 1696 }, { "epoch": 0.5151009257853999, "grad_norm": 0.3736198842525482, "learning_rate": 8.97509113001215e-05, "loss": 1.8185, "step": 1697 }, { "epoch": 0.5154044619820913, "grad_norm": 0.7444111704826355, "learning_rate": 8.974483596597814e-05, "loss": 1.9927, "step": 1698 }, { "epoch": 0.5157079981787828, "grad_norm": 0.42862579226493835, "learning_rate": 8.973876063183475e-05, "loss": 1.9946, "step": 1699 }, { "epoch": 0.5160115343754743, "grad_norm": 0.5150566101074219, "learning_rate": 8.973268529769137e-05, "loss": 1.6675, "step": 1700 }, { "epoch": 0.5163150705721657, "grad_norm": 0.4260749816894531, "learning_rate": 8.9726609963548e-05, "loss": 2.0212, "step": 1701 }, { "epoch": 0.5166186067688572, "grad_norm": 0.3930248022079468, "learning_rate": 8.972053462940462e-05, "loss": 1.8982, "step": 1702 }, { "epoch": 0.5169221429655486, "grad_norm": 0.40357765555381775, "learning_rate": 8.971445929526125e-05, "loss": 1.8368, "step": 1703 }, { "epoch": 0.51722567916224, "grad_norm": 0.3957735300064087, "learning_rate": 8.970838396111787e-05, "loss": 1.7569, "step": 1704 }, { "epoch": 0.5175292153589316, "grad_norm": 0.3867725431919098, "learning_rate": 8.970230862697448e-05, "loss": 2.0536, "step": 1705 }, { "epoch": 0.517832751555623, "grad_norm": 0.38773855566978455, "learning_rate": 8.969623329283111e-05, "loss": 1.9507, "step": 1706 }, { "epoch": 0.5181362877523145, "grad_norm": 0.4161403775215149, "learning_rate": 8.969015795868773e-05, "loss": 1.1656, "step": 1707 }, { "epoch": 0.5184398239490059, "grad_norm": 0.40050750970840454, "learning_rate": 8.968408262454435e-05, "loss": 1.9384, "step": 1708 }, { "epoch": 0.5187433601456973, "grad_norm": 0.43072274327278137, "learning_rate": 8.967800729040098e-05, "loss": 1.9838, "step": 1709 }, { "epoch": 0.5190468963423889, "grad_norm": 0.4291669428348541, "learning_rate": 8.96719319562576e-05, "loss": 2.0588, "step": 1710 }, { "epoch": 0.5193504325390803, "grad_norm": 0.3524603545665741, "learning_rate": 8.966585662211422e-05, "loss": 1.9433, "step": 1711 }, { "epoch": 0.5196539687357717, "grad_norm": 0.42883431911468506, "learning_rate": 8.965978128797085e-05, "loss": 2.0879, "step": 1712 }, { "epoch": 0.5199575049324632, "grad_norm": 0.3711095452308655, "learning_rate": 8.965370595382746e-05, "loss": 1.8024, "step": 1713 }, { "epoch": 0.5202610411291546, "grad_norm": 0.3979575037956238, "learning_rate": 8.964763061968408e-05, "loss": 1.889, "step": 1714 }, { "epoch": 0.5205645773258462, "grad_norm": 0.3781624436378479, "learning_rate": 8.964155528554071e-05, "loss": 1.4032, "step": 1715 }, { "epoch": 0.5208681135225376, "grad_norm": 0.4285725951194763, "learning_rate": 8.963547995139733e-05, "loss": 1.5933, "step": 1716 }, { "epoch": 0.521171649719229, "grad_norm": 0.40880918502807617, "learning_rate": 8.962940461725396e-05, "loss": 1.4162, "step": 1717 }, { "epoch": 0.5214751859159205, "grad_norm": 0.4186420440673828, "learning_rate": 8.962332928311058e-05, "loss": 1.6866, "step": 1718 }, { "epoch": 0.5217787221126119, "grad_norm": 0.3772728443145752, "learning_rate": 8.96172539489672e-05, "loss": 1.8005, "step": 1719 }, { "epoch": 0.5220822583093034, "grad_norm": 0.4102610945701599, "learning_rate": 8.961117861482382e-05, "loss": 2.0691, "step": 1720 }, { "epoch": 0.5223857945059949, "grad_norm": 0.463878870010376, "learning_rate": 8.960510328068044e-05, "loss": 2.2497, "step": 1721 }, { "epoch": 0.5226893307026863, "grad_norm": 0.3314138948917389, "learning_rate": 8.959902794653706e-05, "loss": 1.6946, "step": 1722 }, { "epoch": 0.5229928668993777, "grad_norm": 0.7187567949295044, "learning_rate": 8.959295261239369e-05, "loss": 1.7443, "step": 1723 }, { "epoch": 0.5232964030960692, "grad_norm": 0.42266663908958435, "learning_rate": 8.958687727825031e-05, "loss": 1.9827, "step": 1724 }, { "epoch": 0.5235999392927606, "grad_norm": 0.39689430594444275, "learning_rate": 8.958080194410693e-05, "loss": 1.8162, "step": 1725 }, { "epoch": 0.5239034754894522, "grad_norm": 0.36018458008766174, "learning_rate": 8.957472660996356e-05, "loss": 1.9901, "step": 1726 }, { "epoch": 0.5242070116861436, "grad_norm": 0.29599374532699585, "learning_rate": 8.956865127582017e-05, "loss": 1.5581, "step": 1727 }, { "epoch": 0.524510547882835, "grad_norm": 0.3953525424003601, "learning_rate": 8.956257594167679e-05, "loss": 1.8398, "step": 1728 }, { "epoch": 0.5248140840795265, "grad_norm": 0.5847448110580444, "learning_rate": 8.955650060753342e-05, "loss": 1.6539, "step": 1729 }, { "epoch": 0.5251176202762179, "grad_norm": 0.37169334292411804, "learning_rate": 8.955042527339004e-05, "loss": 1.0603, "step": 1730 }, { "epoch": 0.5254211564729094, "grad_norm": 0.3689024746417999, "learning_rate": 8.954434993924667e-05, "loss": 1.4661, "step": 1731 }, { "epoch": 0.5257246926696009, "grad_norm": 0.39325040578842163, "learning_rate": 8.953827460510329e-05, "loss": 1.9562, "step": 1732 }, { "epoch": 0.5260282288662923, "grad_norm": 0.5037636756896973, "learning_rate": 8.95321992709599e-05, "loss": 1.5998, "step": 1733 }, { "epoch": 0.5263317650629837, "grad_norm": 0.38126620650291443, "learning_rate": 8.952612393681654e-05, "loss": 1.8444, "step": 1734 }, { "epoch": 0.5266353012596752, "grad_norm": 0.4108048379421234, "learning_rate": 8.952004860267315e-05, "loss": 1.5128, "step": 1735 }, { "epoch": 0.5269388374563667, "grad_norm": 0.3624730408191681, "learning_rate": 8.951397326852977e-05, "loss": 1.9051, "step": 1736 }, { "epoch": 0.5272423736530581, "grad_norm": 0.374348908662796, "learning_rate": 8.95078979343864e-05, "loss": 1.5501, "step": 1737 }, { "epoch": 0.5275459098497496, "grad_norm": 0.504650890827179, "learning_rate": 8.950182260024302e-05, "loss": 1.9115, "step": 1738 }, { "epoch": 0.527849446046441, "grad_norm": 0.31486794352531433, "learning_rate": 8.949574726609964e-05, "loss": 1.7507, "step": 1739 }, { "epoch": 0.5281529822431325, "grad_norm": 0.38089415431022644, "learning_rate": 8.948967193195627e-05, "loss": 1.9424, "step": 1740 }, { "epoch": 0.528456518439824, "grad_norm": 0.5939797163009644, "learning_rate": 8.948359659781288e-05, "loss": 2.0123, "step": 1741 }, { "epoch": 0.5287600546365154, "grad_norm": 0.4175383746623993, "learning_rate": 8.94775212636695e-05, "loss": 1.9405, "step": 1742 }, { "epoch": 0.5290635908332069, "grad_norm": 0.3071494996547699, "learning_rate": 8.947144592952613e-05, "loss": 0.9484, "step": 1743 }, { "epoch": 0.5293671270298983, "grad_norm": 0.4822414219379425, "learning_rate": 8.946537059538275e-05, "loss": 1.7093, "step": 1744 }, { "epoch": 0.5296706632265897, "grad_norm": 0.8036310076713562, "learning_rate": 8.945929526123938e-05, "loss": 2.1441, "step": 1745 }, { "epoch": 0.5299741994232813, "grad_norm": 0.42779991030693054, "learning_rate": 8.9453219927096e-05, "loss": 1.9736, "step": 1746 }, { "epoch": 0.5302777356199727, "grad_norm": 0.37124693393707275, "learning_rate": 8.944714459295261e-05, "loss": 2.0578, "step": 1747 }, { "epoch": 0.5305812718166641, "grad_norm": 0.4504419267177582, "learning_rate": 8.944106925880925e-05, "loss": 2.0148, "step": 1748 }, { "epoch": 0.5308848080133556, "grad_norm": 0.370437353849411, "learning_rate": 8.943499392466586e-05, "loss": 1.7045, "step": 1749 }, { "epoch": 0.531188344210047, "grad_norm": 0.4089522063732147, "learning_rate": 8.942891859052248e-05, "loss": 1.7649, "step": 1750 }, { "epoch": 0.5314918804067384, "grad_norm": 0.3770054280757904, "learning_rate": 8.942284325637911e-05, "loss": 1.8252, "step": 1751 }, { "epoch": 0.53179541660343, "grad_norm": 0.45180705189704895, "learning_rate": 8.941676792223573e-05, "loss": 0.9027, "step": 1752 }, { "epoch": 0.5320989528001214, "grad_norm": 0.415444016456604, "learning_rate": 8.941069258809235e-05, "loss": 1.8366, "step": 1753 }, { "epoch": 0.5324024889968129, "grad_norm": 0.4421723783016205, "learning_rate": 8.940461725394898e-05, "loss": 1.6414, "step": 1754 }, { "epoch": 0.5327060251935043, "grad_norm": 0.3791792392730713, "learning_rate": 8.93985419198056e-05, "loss": 2.0668, "step": 1755 }, { "epoch": 0.5330095613901957, "grad_norm": 0.40155166387557983, "learning_rate": 8.939246658566221e-05, "loss": 1.8144, "step": 1756 }, { "epoch": 0.5333130975868873, "grad_norm": 0.38897809386253357, "learning_rate": 8.938639125151884e-05, "loss": 1.4815, "step": 1757 }, { "epoch": 0.5336166337835787, "grad_norm": 0.35486680269241333, "learning_rate": 8.938031591737546e-05, "loss": 1.8673, "step": 1758 }, { "epoch": 0.5339201699802701, "grad_norm": 0.33397093415260315, "learning_rate": 8.937424058323208e-05, "loss": 1.7756, "step": 1759 }, { "epoch": 0.5342237061769616, "grad_norm": 0.43346378207206726, "learning_rate": 8.936816524908871e-05, "loss": 1.8367, "step": 1760 }, { "epoch": 0.534527242373653, "grad_norm": 0.37739312648773193, "learning_rate": 8.936208991494532e-05, "loss": 2.1916, "step": 1761 }, { "epoch": 0.5348307785703446, "grad_norm": 0.32218697667121887, "learning_rate": 8.935601458080196e-05, "loss": 1.9885, "step": 1762 }, { "epoch": 0.535134314767036, "grad_norm": 0.37920355796813965, "learning_rate": 8.934993924665856e-05, "loss": 1.7163, "step": 1763 }, { "epoch": 0.5354378509637274, "grad_norm": 0.3895961344242096, "learning_rate": 8.934386391251519e-05, "loss": 1.8757, "step": 1764 }, { "epoch": 0.5357413871604189, "grad_norm": 0.4898541271686554, "learning_rate": 8.933778857837182e-05, "loss": 1.7097, "step": 1765 }, { "epoch": 0.5360449233571103, "grad_norm": 0.3851979672908783, "learning_rate": 8.933171324422844e-05, "loss": 1.8913, "step": 1766 }, { "epoch": 0.5363484595538018, "grad_norm": 0.3567551076412201, "learning_rate": 8.932563791008506e-05, "loss": 1.8789, "step": 1767 }, { "epoch": 0.5366519957504933, "grad_norm": 0.4687878489494324, "learning_rate": 8.931956257594169e-05, "loss": 1.936, "step": 1768 }, { "epoch": 0.5369555319471847, "grad_norm": 0.36735373735427856, "learning_rate": 8.93134872417983e-05, "loss": 2.0743, "step": 1769 }, { "epoch": 0.5372590681438761, "grad_norm": 0.508160412311554, "learning_rate": 8.930741190765492e-05, "loss": 1.9822, "step": 1770 }, { "epoch": 0.5375626043405676, "grad_norm": 0.40640148520469666, "learning_rate": 8.930133657351155e-05, "loss": 1.5255, "step": 1771 }, { "epoch": 0.5378661405372591, "grad_norm": 0.7253953218460083, "learning_rate": 8.929526123936817e-05, "loss": 1.8898, "step": 1772 }, { "epoch": 0.5381696767339506, "grad_norm": 0.4226602017879486, "learning_rate": 8.928918590522479e-05, "loss": 1.7197, "step": 1773 }, { "epoch": 0.538473212930642, "grad_norm": 0.42332541942596436, "learning_rate": 8.928311057108142e-05, "loss": 1.9117, "step": 1774 }, { "epoch": 0.5387767491273334, "grad_norm": 0.8125683665275574, "learning_rate": 8.927703523693803e-05, "loss": 1.7325, "step": 1775 }, { "epoch": 0.5390802853240249, "grad_norm": 0.44765642285346985, "learning_rate": 8.927095990279467e-05, "loss": 2.0353, "step": 1776 }, { "epoch": 0.5393838215207164, "grad_norm": 0.45518067479133606, "learning_rate": 8.926488456865127e-05, "loss": 1.9536, "step": 1777 }, { "epoch": 0.5396873577174078, "grad_norm": 0.3856181800365448, "learning_rate": 8.92588092345079e-05, "loss": 1.456, "step": 1778 }, { "epoch": 0.5399908939140993, "grad_norm": 0.41640815138816833, "learning_rate": 8.925273390036453e-05, "loss": 1.7481, "step": 1779 }, { "epoch": 0.5402944301107907, "grad_norm": 0.3643503189086914, "learning_rate": 8.924665856622115e-05, "loss": 1.8541, "step": 1780 }, { "epoch": 0.5405979663074821, "grad_norm": 0.40610817074775696, "learning_rate": 8.924058323207777e-05, "loss": 1.8021, "step": 1781 }, { "epoch": 0.5409015025041736, "grad_norm": 1.8827602863311768, "learning_rate": 8.92345078979344e-05, "loss": 1.4657, "step": 1782 }, { "epoch": 0.5412050387008651, "grad_norm": 0.4862421154975891, "learning_rate": 8.922843256379101e-05, "loss": 1.6917, "step": 1783 }, { "epoch": 0.5415085748975565, "grad_norm": 0.4079034626483917, "learning_rate": 8.922235722964763e-05, "loss": 1.4408, "step": 1784 }, { "epoch": 0.541812111094248, "grad_norm": 0.37174421548843384, "learning_rate": 8.921628189550426e-05, "loss": 1.8314, "step": 1785 }, { "epoch": 0.5421156472909394, "grad_norm": 0.4223754107952118, "learning_rate": 8.921020656136088e-05, "loss": 1.7007, "step": 1786 }, { "epoch": 0.5424191834876309, "grad_norm": 0.371114581823349, "learning_rate": 8.92041312272175e-05, "loss": 1.8876, "step": 1787 }, { "epoch": 0.5427227196843224, "grad_norm": 0.4263741672039032, "learning_rate": 8.919805589307413e-05, "loss": 2.1338, "step": 1788 }, { "epoch": 0.5430262558810138, "grad_norm": 0.4573124349117279, "learning_rate": 8.919198055893074e-05, "loss": 1.9776, "step": 1789 }, { "epoch": 0.5433297920777053, "grad_norm": 0.44550567865371704, "learning_rate": 8.918590522478738e-05, "loss": 1.7205, "step": 1790 }, { "epoch": 0.5436333282743967, "grad_norm": 0.42521047592163086, "learning_rate": 8.917982989064398e-05, "loss": 1.9548, "step": 1791 }, { "epoch": 0.5439368644710881, "grad_norm": 0.39518535137176514, "learning_rate": 8.917375455650061e-05, "loss": 2.0192, "step": 1792 }, { "epoch": 0.5442404006677797, "grad_norm": 0.42280903458595276, "learning_rate": 8.916767922235724e-05, "loss": 1.5445, "step": 1793 }, { "epoch": 0.5445439368644711, "grad_norm": 0.40115422010421753, "learning_rate": 8.916160388821386e-05, "loss": 1.9529, "step": 1794 }, { "epoch": 0.5448474730611625, "grad_norm": 0.3923608958721161, "learning_rate": 8.915552855407048e-05, "loss": 2.0184, "step": 1795 }, { "epoch": 0.545151009257854, "grad_norm": 0.3982231020927429, "learning_rate": 8.91494532199271e-05, "loss": 1.9162, "step": 1796 }, { "epoch": 0.5454545454545454, "grad_norm": 0.4375683665275574, "learning_rate": 8.914337788578372e-05, "loss": 1.8728, "step": 1797 }, { "epoch": 0.545758081651237, "grad_norm": 0.4353227913379669, "learning_rate": 8.913730255164034e-05, "loss": 1.4043, "step": 1798 }, { "epoch": 0.5460616178479284, "grad_norm": 0.4171392619609833, "learning_rate": 8.913122721749697e-05, "loss": 1.9417, "step": 1799 }, { "epoch": 0.5463651540446198, "grad_norm": 0.33565661311149597, "learning_rate": 8.912515188335359e-05, "loss": 1.888, "step": 1800 }, { "epoch": 0.5466686902413113, "grad_norm": 0.3857763707637787, "learning_rate": 8.91190765492102e-05, "loss": 1.7882, "step": 1801 }, { "epoch": 0.5469722264380027, "grad_norm": 0.3976082503795624, "learning_rate": 8.911300121506684e-05, "loss": 1.6312, "step": 1802 }, { "epoch": 0.5472757626346942, "grad_norm": 0.43773913383483887, "learning_rate": 8.910692588092345e-05, "loss": 1.7299, "step": 1803 }, { "epoch": 0.5475792988313857, "grad_norm": 0.39484649896621704, "learning_rate": 8.910085054678009e-05, "loss": 1.8826, "step": 1804 }, { "epoch": 0.5478828350280771, "grad_norm": 0.42913469672203064, "learning_rate": 8.909477521263669e-05, "loss": 1.1713, "step": 1805 }, { "epoch": 0.5481863712247685, "grad_norm": 0.43996962904930115, "learning_rate": 8.908869987849332e-05, "loss": 2.162, "step": 1806 }, { "epoch": 0.54848990742146, "grad_norm": 0.7948350310325623, "learning_rate": 8.908262454434995e-05, "loss": 1.8808, "step": 1807 }, { "epoch": 0.5487934436181514, "grad_norm": 0.43142643570899963, "learning_rate": 8.907654921020657e-05, "loss": 2.0966, "step": 1808 }, { "epoch": 0.549096979814843, "grad_norm": 0.36545732617378235, "learning_rate": 8.907047387606319e-05, "loss": 1.7421, "step": 1809 }, { "epoch": 0.5494005160115344, "grad_norm": 0.3977827727794647, "learning_rate": 8.906439854191982e-05, "loss": 1.9356, "step": 1810 }, { "epoch": 0.5497040522082258, "grad_norm": 0.4487985670566559, "learning_rate": 8.905832320777643e-05, "loss": 1.8294, "step": 1811 }, { "epoch": 0.5500075884049173, "grad_norm": 0.4151144027709961, "learning_rate": 8.905224787363305e-05, "loss": 1.7, "step": 1812 }, { "epoch": 0.5503111246016087, "grad_norm": 0.5114679336547852, "learning_rate": 8.904617253948968e-05, "loss": 2.2179, "step": 1813 }, { "epoch": 0.5506146607983002, "grad_norm": 0.4134223163127899, "learning_rate": 8.90400972053463e-05, "loss": 1.9573, "step": 1814 }, { "epoch": 0.5509181969949917, "grad_norm": 0.5172004699707031, "learning_rate": 8.903402187120292e-05, "loss": 1.7614, "step": 1815 }, { "epoch": 0.5512217331916831, "grad_norm": 0.4552132189273834, "learning_rate": 8.902794653705955e-05, "loss": 1.6595, "step": 1816 }, { "epoch": 0.5515252693883745, "grad_norm": 0.4171915054321289, "learning_rate": 8.902187120291616e-05, "loss": 1.7968, "step": 1817 }, { "epoch": 0.551828805585066, "grad_norm": 0.4485832452774048, "learning_rate": 8.90157958687728e-05, "loss": 2.0529, "step": 1818 }, { "epoch": 0.5521323417817575, "grad_norm": 0.3997848331928253, "learning_rate": 8.90097205346294e-05, "loss": 1.7053, "step": 1819 }, { "epoch": 0.552435877978449, "grad_norm": 0.47565630078315735, "learning_rate": 8.900364520048603e-05, "loss": 1.7842, "step": 1820 }, { "epoch": 0.5527394141751404, "grad_norm": 0.42128419876098633, "learning_rate": 8.899756986634266e-05, "loss": 1.8866, "step": 1821 }, { "epoch": 0.5530429503718318, "grad_norm": 0.4098486602306366, "learning_rate": 8.899149453219926e-05, "loss": 1.9268, "step": 1822 }, { "epoch": 0.5533464865685233, "grad_norm": 0.3754071295261383, "learning_rate": 8.89854191980559e-05, "loss": 1.9191, "step": 1823 }, { "epoch": 0.5536500227652148, "grad_norm": 0.4278963804244995, "learning_rate": 8.897934386391253e-05, "loss": 2.0914, "step": 1824 }, { "epoch": 0.5539535589619062, "grad_norm": 0.41121765971183777, "learning_rate": 8.897326852976914e-05, "loss": 1.9135, "step": 1825 }, { "epoch": 0.5542570951585977, "grad_norm": 0.7463552355766296, "learning_rate": 8.896719319562576e-05, "loss": 1.6663, "step": 1826 }, { "epoch": 0.5545606313552891, "grad_norm": 0.3886711299419403, "learning_rate": 8.896111786148239e-05, "loss": 1.9131, "step": 1827 }, { "epoch": 0.5548641675519805, "grad_norm": 0.3520048260688782, "learning_rate": 8.895504252733901e-05, "loss": 1.9166, "step": 1828 }, { "epoch": 0.5551677037486721, "grad_norm": 0.3484227955341339, "learning_rate": 8.894896719319563e-05, "loss": 1.9053, "step": 1829 }, { "epoch": 0.5554712399453635, "grad_norm": 0.7534793615341187, "learning_rate": 8.894289185905226e-05, "loss": 1.6738, "step": 1830 }, { "epoch": 0.555774776142055, "grad_norm": 0.4037635326385498, "learning_rate": 8.893681652490887e-05, "loss": 2.1951, "step": 1831 }, { "epoch": 0.5560783123387464, "grad_norm": 0.39184069633483887, "learning_rate": 8.893074119076549e-05, "loss": 2.0811, "step": 1832 }, { "epoch": 0.5563818485354378, "grad_norm": 0.35053008794784546, "learning_rate": 8.892466585662211e-05, "loss": 1.8178, "step": 1833 }, { "epoch": 0.5566853847321293, "grad_norm": 0.43768683075904846, "learning_rate": 8.891859052247874e-05, "loss": 2.0811, "step": 1834 }, { "epoch": 0.5569889209288208, "grad_norm": 0.38592809438705444, "learning_rate": 8.891251518833537e-05, "loss": 1.8156, "step": 1835 }, { "epoch": 0.5572924571255122, "grad_norm": 0.351408988237381, "learning_rate": 8.890643985419197e-05, "loss": 1.5505, "step": 1836 }, { "epoch": 0.5575959933222037, "grad_norm": 0.4032740592956543, "learning_rate": 8.89003645200486e-05, "loss": 1.622, "step": 1837 }, { "epoch": 0.5578995295188951, "grad_norm": 0.3902193307876587, "learning_rate": 8.889428918590524e-05, "loss": 2.0796, "step": 1838 }, { "epoch": 0.5582030657155865, "grad_norm": 2.2613284587860107, "learning_rate": 8.888821385176185e-05, "loss": 1.6464, "step": 1839 }, { "epoch": 0.5585066019122781, "grad_norm": 0.3818334937095642, "learning_rate": 8.888213851761847e-05, "loss": 1.0967, "step": 1840 }, { "epoch": 0.5588101381089695, "grad_norm": 0.534939169883728, "learning_rate": 8.88760631834751e-05, "loss": 1.9834, "step": 1841 }, { "epoch": 0.559113674305661, "grad_norm": 0.41335856914520264, "learning_rate": 8.886998784933172e-05, "loss": 1.9458, "step": 1842 }, { "epoch": 0.5594172105023524, "grad_norm": 0.4256092309951782, "learning_rate": 8.886391251518834e-05, "loss": 1.9967, "step": 1843 }, { "epoch": 0.5597207466990438, "grad_norm": 0.40793219208717346, "learning_rate": 8.885783718104497e-05, "loss": 1.6972, "step": 1844 }, { "epoch": 0.5600242828957354, "grad_norm": 0.4092423915863037, "learning_rate": 8.885176184690158e-05, "loss": 1.9676, "step": 1845 }, { "epoch": 0.5603278190924268, "grad_norm": 0.35754647850990295, "learning_rate": 8.88456865127582e-05, "loss": 1.1498, "step": 1846 }, { "epoch": 0.5606313552891182, "grad_norm": 0.41491416096687317, "learning_rate": 8.883961117861482e-05, "loss": 2.2078, "step": 1847 }, { "epoch": 0.5609348914858097, "grad_norm": 1.1699934005737305, "learning_rate": 8.883353584447145e-05, "loss": 2.1936, "step": 1848 }, { "epoch": 0.5612384276825011, "grad_norm": 1.9053874015808105, "learning_rate": 8.882746051032808e-05, "loss": 1.7189, "step": 1849 }, { "epoch": 0.5615419638791926, "grad_norm": 0.41807985305786133, "learning_rate": 8.882138517618468e-05, "loss": 1.7771, "step": 1850 }, { "epoch": 0.5618455000758841, "grad_norm": 0.41903504729270935, "learning_rate": 8.881530984204132e-05, "loss": 2.1023, "step": 1851 }, { "epoch": 0.5621490362725755, "grad_norm": 0.3394705653190613, "learning_rate": 8.880923450789795e-05, "loss": 0.9969, "step": 1852 }, { "epoch": 0.5624525724692669, "grad_norm": 0.347989022731781, "learning_rate": 8.880315917375456e-05, "loss": 1.974, "step": 1853 }, { "epoch": 0.5627561086659584, "grad_norm": 0.49732285737991333, "learning_rate": 8.879708383961118e-05, "loss": 1.7575, "step": 1854 }, { "epoch": 0.5630596448626499, "grad_norm": 0.44572606682777405, "learning_rate": 8.879100850546781e-05, "loss": 1.8167, "step": 1855 }, { "epoch": 0.5633631810593414, "grad_norm": 0.8100895881652832, "learning_rate": 8.878493317132443e-05, "loss": 1.9788, "step": 1856 }, { "epoch": 0.5636667172560328, "grad_norm": 0.4205772578716278, "learning_rate": 8.877885783718105e-05, "loss": 1.9986, "step": 1857 }, { "epoch": 0.5639702534527242, "grad_norm": 0.3976004719734192, "learning_rate": 8.877278250303766e-05, "loss": 1.9735, "step": 1858 }, { "epoch": 0.5642737896494157, "grad_norm": 0.41813865303993225, "learning_rate": 8.87667071688943e-05, "loss": 1.8479, "step": 1859 }, { "epoch": 0.5645773258461072, "grad_norm": 0.4901811182498932, "learning_rate": 8.876063183475091e-05, "loss": 2.3504, "step": 1860 }, { "epoch": 0.5648808620427986, "grad_norm": 0.4103149473667145, "learning_rate": 8.875455650060753e-05, "loss": 2.0658, "step": 1861 }, { "epoch": 0.5651843982394901, "grad_norm": 0.37885773181915283, "learning_rate": 8.874848116646416e-05, "loss": 1.8143, "step": 1862 }, { "epoch": 0.5654879344361815, "grad_norm": 0.35186877846717834, "learning_rate": 8.874240583232079e-05, "loss": 1.9395, "step": 1863 }, { "epoch": 0.5657914706328729, "grad_norm": 0.4435397982597351, "learning_rate": 8.87363304981774e-05, "loss": 1.7693, "step": 1864 }, { "epoch": 0.5660950068295644, "grad_norm": 1.451499342918396, "learning_rate": 8.873025516403403e-05, "loss": 1.9007, "step": 1865 }, { "epoch": 0.5663985430262559, "grad_norm": 0.41606009006500244, "learning_rate": 8.872417982989066e-05, "loss": 2.0327, "step": 1866 }, { "epoch": 0.5667020792229474, "grad_norm": 0.38989219069480896, "learning_rate": 8.871810449574727e-05, "loss": 1.6791, "step": 1867 }, { "epoch": 0.5670056154196388, "grad_norm": 0.3850671052932739, "learning_rate": 8.871202916160389e-05, "loss": 1.7889, "step": 1868 }, { "epoch": 0.5673091516163302, "grad_norm": 0.43616947531700134, "learning_rate": 8.870595382746052e-05, "loss": 1.7769, "step": 1869 }, { "epoch": 0.5676126878130217, "grad_norm": 0.39661890268325806, "learning_rate": 8.869987849331714e-05, "loss": 1.9286, "step": 1870 }, { "epoch": 0.5679162240097132, "grad_norm": 0.43553540110588074, "learning_rate": 8.869380315917376e-05, "loss": 1.5863, "step": 1871 }, { "epoch": 0.5682197602064046, "grad_norm": 0.3950207829475403, "learning_rate": 8.868772782503037e-05, "loss": 1.948, "step": 1872 }, { "epoch": 0.5685232964030961, "grad_norm": 0.5240088701248169, "learning_rate": 8.8681652490887e-05, "loss": 1.8758, "step": 1873 }, { "epoch": 0.5688268325997875, "grad_norm": 0.3744898736476898, "learning_rate": 8.867557715674362e-05, "loss": 2.2962, "step": 1874 }, { "epoch": 0.5691303687964789, "grad_norm": 0.3877609074115753, "learning_rate": 8.866950182260024e-05, "loss": 1.675, "step": 1875 }, { "epoch": 0.5694339049931705, "grad_norm": 0.3350330591201782, "learning_rate": 8.866342648845687e-05, "loss": 1.8538, "step": 1876 }, { "epoch": 0.5697374411898619, "grad_norm": 0.38145220279693604, "learning_rate": 8.86573511543135e-05, "loss": 1.6781, "step": 1877 }, { "epoch": 0.5700409773865533, "grad_norm": 0.43861034512519836, "learning_rate": 8.86512758201701e-05, "loss": 1.8, "step": 1878 }, { "epoch": 0.5703445135832448, "grad_norm": 0.4304041266441345, "learning_rate": 8.864520048602674e-05, "loss": 1.6788, "step": 1879 }, { "epoch": 0.5706480497799362, "grad_norm": 0.4199315309524536, "learning_rate": 8.863912515188337e-05, "loss": 1.8819, "step": 1880 }, { "epoch": 0.5709515859766278, "grad_norm": 0.4044843912124634, "learning_rate": 8.863304981773998e-05, "loss": 1.8221, "step": 1881 }, { "epoch": 0.5712551221733192, "grad_norm": 0.5554643273353577, "learning_rate": 8.86269744835966e-05, "loss": 1.6682, "step": 1882 }, { "epoch": 0.5715586583700106, "grad_norm": 0.45517250895500183, "learning_rate": 8.862089914945323e-05, "loss": 1.8345, "step": 1883 }, { "epoch": 0.5718621945667021, "grad_norm": 0.4475466310977936, "learning_rate": 8.861482381530985e-05, "loss": 1.9378, "step": 1884 }, { "epoch": 0.5721657307633935, "grad_norm": 0.5567486882209778, "learning_rate": 8.860874848116647e-05, "loss": 1.6444, "step": 1885 }, { "epoch": 0.572469266960085, "grad_norm": 0.3710486590862274, "learning_rate": 8.860267314702308e-05, "loss": 1.6935, "step": 1886 }, { "epoch": 0.5727728031567765, "grad_norm": 0.4086054861545563, "learning_rate": 8.859659781287971e-05, "loss": 1.7854, "step": 1887 }, { "epoch": 0.5730763393534679, "grad_norm": 0.46489015221595764, "learning_rate": 8.859052247873633e-05, "loss": 1.8197, "step": 1888 }, { "epoch": 0.5733798755501593, "grad_norm": 0.7444620132446289, "learning_rate": 8.858444714459295e-05, "loss": 1.53, "step": 1889 }, { "epoch": 0.5736834117468508, "grad_norm": 0.4494125545024872, "learning_rate": 8.857837181044958e-05, "loss": 1.6418, "step": 1890 }, { "epoch": 0.5739869479435422, "grad_norm": 0.6012828946113586, "learning_rate": 8.857229647630621e-05, "loss": 2.3039, "step": 1891 }, { "epoch": 0.5742904841402338, "grad_norm": 0.44922634959220886, "learning_rate": 8.856622114216281e-05, "loss": 1.8881, "step": 1892 }, { "epoch": 0.5745940203369252, "grad_norm": 0.34000277519226074, "learning_rate": 8.856014580801945e-05, "loss": 1.5964, "step": 1893 }, { "epoch": 0.5748975565336166, "grad_norm": 0.4107670485973358, "learning_rate": 8.855407047387608e-05, "loss": 1.9057, "step": 1894 }, { "epoch": 0.5752010927303081, "grad_norm": 0.3938602805137634, "learning_rate": 8.854799513973268e-05, "loss": 2.0193, "step": 1895 }, { "epoch": 0.5755046289269995, "grad_norm": 0.3723643720149994, "learning_rate": 8.854191980558931e-05, "loss": 2.0371, "step": 1896 }, { "epoch": 0.575808165123691, "grad_norm": 0.7747316956520081, "learning_rate": 8.853584447144594e-05, "loss": 1.5783, "step": 1897 }, { "epoch": 0.5761117013203825, "grad_norm": 0.40745773911476135, "learning_rate": 8.852976913730256e-05, "loss": 1.6217, "step": 1898 }, { "epoch": 0.5764152375170739, "grad_norm": 0.363471120595932, "learning_rate": 8.852369380315918e-05, "loss": 1.948, "step": 1899 }, { "epoch": 0.5767187737137653, "grad_norm": 0.3844568133354187, "learning_rate": 8.85176184690158e-05, "loss": 2.0447, "step": 1900 }, { "epoch": 0.5770223099104568, "grad_norm": 0.42804035544395447, "learning_rate": 8.851154313487242e-05, "loss": 1.9196, "step": 1901 }, { "epoch": 0.5773258461071483, "grad_norm": 0.36453336477279663, "learning_rate": 8.850546780072904e-05, "loss": 2.2236, "step": 1902 }, { "epoch": 0.5776293823038398, "grad_norm": 0.41334068775177, "learning_rate": 8.849939246658566e-05, "loss": 1.8657, "step": 1903 }, { "epoch": 0.5779329185005312, "grad_norm": 0.3925778567790985, "learning_rate": 8.849331713244229e-05, "loss": 1.9695, "step": 1904 }, { "epoch": 0.5782364546972226, "grad_norm": 0.39274585247039795, "learning_rate": 8.848724179829892e-05, "loss": 1.664, "step": 1905 }, { "epoch": 0.5785399908939141, "grad_norm": 0.37139561772346497, "learning_rate": 8.848116646415552e-05, "loss": 1.0956, "step": 1906 }, { "epoch": 0.5788435270906056, "grad_norm": 0.4112982451915741, "learning_rate": 8.847509113001216e-05, "loss": 2.0189, "step": 1907 }, { "epoch": 0.579147063287297, "grad_norm": 0.34007617831230164, "learning_rate": 8.846901579586879e-05, "loss": 1.537, "step": 1908 }, { "epoch": 0.5794505994839885, "grad_norm": 0.43591251969337463, "learning_rate": 8.846294046172539e-05, "loss": 1.8668, "step": 1909 }, { "epoch": 0.5797541356806799, "grad_norm": 0.4715147316455841, "learning_rate": 8.845686512758202e-05, "loss": 1.9474, "step": 1910 }, { "epoch": 0.5800576718773713, "grad_norm": 0.5986727476119995, "learning_rate": 8.845078979343865e-05, "loss": 1.9555, "step": 1911 }, { "epoch": 0.5803612080740629, "grad_norm": 0.43499329686164856, "learning_rate": 8.844471445929527e-05, "loss": 1.8719, "step": 1912 }, { "epoch": 0.5806647442707543, "grad_norm": 0.4152344763278961, "learning_rate": 8.843863912515189e-05, "loss": 2.0322, "step": 1913 }, { "epoch": 0.5809682804674458, "grad_norm": 0.4037158787250519, "learning_rate": 8.84325637910085e-05, "loss": 1.9687, "step": 1914 }, { "epoch": 0.5812718166641372, "grad_norm": 0.4261537492275238, "learning_rate": 8.842648845686513e-05, "loss": 1.9674, "step": 1915 }, { "epoch": 0.5815753528608286, "grad_norm": 0.3880082070827484, "learning_rate": 8.842041312272175e-05, "loss": 1.7925, "step": 1916 }, { "epoch": 0.58187888905752, "grad_norm": 0.7090932130813599, "learning_rate": 8.841433778857837e-05, "loss": 1.8392, "step": 1917 }, { "epoch": 0.5821824252542116, "grad_norm": 0.4407334625720978, "learning_rate": 8.8408262454435e-05, "loss": 1.9389, "step": 1918 }, { "epoch": 0.582485961450903, "grad_norm": 0.40139150619506836, "learning_rate": 8.840218712029162e-05, "loss": 2.0225, "step": 1919 }, { "epoch": 0.5827894976475945, "grad_norm": 0.7051631212234497, "learning_rate": 8.839611178614823e-05, "loss": 1.9287, "step": 1920 }, { "epoch": 0.5830930338442859, "grad_norm": 0.4037090241909027, "learning_rate": 8.839003645200487e-05, "loss": 1.7706, "step": 1921 }, { "epoch": 0.5833965700409773, "grad_norm": 0.4044518768787384, "learning_rate": 8.83839611178615e-05, "loss": 1.9106, "step": 1922 }, { "epoch": 0.5837001062376689, "grad_norm": 0.5114139914512634, "learning_rate": 8.83778857837181e-05, "loss": 2.0017, "step": 1923 }, { "epoch": 0.5840036424343603, "grad_norm": 0.39643585681915283, "learning_rate": 8.837181044957473e-05, "loss": 2.0096, "step": 1924 }, { "epoch": 0.5843071786310517, "grad_norm": 0.4566240608692169, "learning_rate": 8.836573511543136e-05, "loss": 2.0962, "step": 1925 }, { "epoch": 0.5846107148277432, "grad_norm": 0.37759748101234436, "learning_rate": 8.835965978128798e-05, "loss": 1.6786, "step": 1926 }, { "epoch": 0.5849142510244346, "grad_norm": 0.37798550724983215, "learning_rate": 8.83535844471446e-05, "loss": 2.1075, "step": 1927 }, { "epoch": 0.5852177872211262, "grad_norm": 0.40494439005851746, "learning_rate": 8.834750911300121e-05, "loss": 1.7167, "step": 1928 }, { "epoch": 0.5855213234178176, "grad_norm": 0.3333325684070587, "learning_rate": 8.834143377885784e-05, "loss": 1.9133, "step": 1929 }, { "epoch": 0.585824859614509, "grad_norm": 0.3827350437641144, "learning_rate": 8.833535844471446e-05, "loss": 1.8537, "step": 1930 }, { "epoch": 0.5861283958112005, "grad_norm": 0.4088849127292633, "learning_rate": 8.832928311057108e-05, "loss": 1.9625, "step": 1931 }, { "epoch": 0.5864319320078919, "grad_norm": 0.3575502932071686, "learning_rate": 8.832320777642771e-05, "loss": 1.851, "step": 1932 }, { "epoch": 0.5867354682045834, "grad_norm": 0.38579368591308594, "learning_rate": 8.831713244228433e-05, "loss": 1.8121, "step": 1933 }, { "epoch": 0.5870390044012749, "grad_norm": 0.37787890434265137, "learning_rate": 8.831105710814094e-05, "loss": 1.9509, "step": 1934 }, { "epoch": 0.5873425405979663, "grad_norm": 0.4074660837650299, "learning_rate": 8.830498177399758e-05, "loss": 1.9987, "step": 1935 }, { "epoch": 0.5876460767946577, "grad_norm": 0.7902248501777649, "learning_rate": 8.82989064398542e-05, "loss": 1.6704, "step": 1936 }, { "epoch": 0.5879496129913492, "grad_norm": 0.3240687847137451, "learning_rate": 8.829283110571081e-05, "loss": 1.1956, "step": 1937 }, { "epoch": 0.5882531491880407, "grad_norm": 0.410543829202652, "learning_rate": 8.828675577156744e-05, "loss": 1.9533, "step": 1938 }, { "epoch": 0.5885566853847322, "grad_norm": 0.4559386670589447, "learning_rate": 8.828068043742406e-05, "loss": 1.566, "step": 1939 }, { "epoch": 0.5888602215814236, "grad_norm": 0.44418251514434814, "learning_rate": 8.827460510328069e-05, "loss": 1.814, "step": 1940 }, { "epoch": 0.589163757778115, "grad_norm": 0.42374011874198914, "learning_rate": 8.82685297691373e-05, "loss": 1.8946, "step": 1941 }, { "epoch": 0.5894672939748065, "grad_norm": 0.44734686613082886, "learning_rate": 8.826245443499392e-05, "loss": 1.9893, "step": 1942 }, { "epoch": 0.589770830171498, "grad_norm": 0.42960959672927856, "learning_rate": 8.825637910085055e-05, "loss": 1.5659, "step": 1943 }, { "epoch": 0.5900743663681894, "grad_norm": 0.44513779878616333, "learning_rate": 8.825030376670717e-05, "loss": 1.969, "step": 1944 }, { "epoch": 0.5903779025648809, "grad_norm": 0.39732202887535095, "learning_rate": 8.824422843256379e-05, "loss": 1.9469, "step": 1945 }, { "epoch": 0.5906814387615723, "grad_norm": 0.490384042263031, "learning_rate": 8.823815309842042e-05, "loss": 1.7434, "step": 1946 }, { "epoch": 0.5909849749582637, "grad_norm": 0.5644544959068298, "learning_rate": 8.823207776427704e-05, "loss": 2.1488, "step": 1947 }, { "epoch": 0.5912885111549552, "grad_norm": 0.43499046564102173, "learning_rate": 8.822600243013366e-05, "loss": 1.6023, "step": 1948 }, { "epoch": 0.5915920473516467, "grad_norm": 0.3970509469509125, "learning_rate": 8.821992709599029e-05, "loss": 1.5955, "step": 1949 }, { "epoch": 0.5918955835483382, "grad_norm": 0.39471563696861267, "learning_rate": 8.821385176184692e-05, "loss": 1.9294, "step": 1950 }, { "epoch": 0.5921991197450296, "grad_norm": 0.42955949902534485, "learning_rate": 8.820777642770352e-05, "loss": 1.9628, "step": 1951 }, { "epoch": 0.592502655941721, "grad_norm": 0.3734053373336792, "learning_rate": 8.820170109356015e-05, "loss": 1.9927, "step": 1952 }, { "epoch": 0.5928061921384125, "grad_norm": 0.40868285298347473, "learning_rate": 8.819562575941677e-05, "loss": 2.0704, "step": 1953 }, { "epoch": 0.593109728335104, "grad_norm": 0.4374091625213623, "learning_rate": 8.81895504252734e-05, "loss": 1.7447, "step": 1954 }, { "epoch": 0.5934132645317954, "grad_norm": 0.408299058675766, "learning_rate": 8.818347509113002e-05, "loss": 2.076, "step": 1955 }, { "epoch": 0.5937168007284869, "grad_norm": 0.4676043391227722, "learning_rate": 8.817739975698663e-05, "loss": 1.9319, "step": 1956 }, { "epoch": 0.5940203369251783, "grad_norm": 1.3173327445983887, "learning_rate": 8.817132442284326e-05, "loss": 1.6364, "step": 1957 }, { "epoch": 0.5943238731218697, "grad_norm": 0.39462506771087646, "learning_rate": 8.816524908869988e-05, "loss": 2.1088, "step": 1958 }, { "epoch": 0.5946274093185613, "grad_norm": 0.37660276889801025, "learning_rate": 8.81591737545565e-05, "loss": 1.641, "step": 1959 }, { "epoch": 0.5949309455152527, "grad_norm": 0.3797924518585205, "learning_rate": 8.815309842041313e-05, "loss": 1.4328, "step": 1960 }, { "epoch": 0.5952344817119442, "grad_norm": 0.33881229162216187, "learning_rate": 8.814702308626975e-05, "loss": 1.6713, "step": 1961 }, { "epoch": 0.5955380179086356, "grad_norm": 0.43969300389289856, "learning_rate": 8.814094775212637e-05, "loss": 1.8251, "step": 1962 }, { "epoch": 0.595841554105327, "grad_norm": 0.39608824253082275, "learning_rate": 8.8134872417983e-05, "loss": 2.0762, "step": 1963 }, { "epoch": 0.5961450903020186, "grad_norm": 0.3688305914402008, "learning_rate": 8.812879708383963e-05, "loss": 1.886, "step": 1964 }, { "epoch": 0.59644862649871, "grad_norm": 0.3397257328033447, "learning_rate": 8.812272174969623e-05, "loss": 1.8668, "step": 1965 }, { "epoch": 0.5967521626954014, "grad_norm": 0.39257940649986267, "learning_rate": 8.811664641555286e-05, "loss": 1.879, "step": 1966 }, { "epoch": 0.5970556988920929, "grad_norm": 0.41007375717163086, "learning_rate": 8.811057108140948e-05, "loss": 1.7411, "step": 1967 }, { "epoch": 0.5973592350887843, "grad_norm": 0.3694823682308197, "learning_rate": 8.81044957472661e-05, "loss": 1.7675, "step": 1968 }, { "epoch": 0.5976627712854758, "grad_norm": 0.9148819446563721, "learning_rate": 8.809842041312273e-05, "loss": 2.0984, "step": 1969 }, { "epoch": 0.5979663074821673, "grad_norm": 0.379384309053421, "learning_rate": 8.809234507897934e-05, "loss": 2.1933, "step": 1970 }, { "epoch": 0.5982698436788587, "grad_norm": 0.5637233257293701, "learning_rate": 8.808626974483598e-05, "loss": 1.2066, "step": 1971 }, { "epoch": 0.5985733798755501, "grad_norm": 0.42961108684539795, "learning_rate": 8.808019441069259e-05, "loss": 1.6595, "step": 1972 }, { "epoch": 0.5988769160722416, "grad_norm": 0.41248828172683716, "learning_rate": 8.807411907654921e-05, "loss": 2.0266, "step": 1973 }, { "epoch": 0.599180452268933, "grad_norm": 0.41730985045433044, "learning_rate": 8.806804374240584e-05, "loss": 2.0968, "step": 1974 }, { "epoch": 0.5994839884656246, "grad_norm": 0.4452510178089142, "learning_rate": 8.806196840826246e-05, "loss": 1.6038, "step": 1975 }, { "epoch": 0.599787524662316, "grad_norm": 0.457256942987442, "learning_rate": 8.805589307411908e-05, "loss": 2.0862, "step": 1976 }, { "epoch": 0.6000910608590074, "grad_norm": 0.38506418466567993, "learning_rate": 8.80498177399757e-05, "loss": 1.6764, "step": 1977 }, { "epoch": 0.6003945970556989, "grad_norm": 0.4200589060783386, "learning_rate": 8.804374240583234e-05, "loss": 2.0962, "step": 1978 }, { "epoch": 0.6006981332523903, "grad_norm": 0.41140785813331604, "learning_rate": 8.803766707168894e-05, "loss": 1.7345, "step": 1979 }, { "epoch": 0.6010016694490818, "grad_norm": 0.3584011495113373, "learning_rate": 8.803159173754557e-05, "loss": 2.1839, "step": 1980 }, { "epoch": 0.6013052056457733, "grad_norm": 0.40637287497520447, "learning_rate": 8.802551640340219e-05, "loss": 2.0997, "step": 1981 }, { "epoch": 0.6016087418424647, "grad_norm": 0.42887794971466064, "learning_rate": 8.80194410692588e-05, "loss": 1.967, "step": 1982 }, { "epoch": 0.6019122780391561, "grad_norm": 0.42879635095596313, "learning_rate": 8.801336573511544e-05, "loss": 2.0618, "step": 1983 }, { "epoch": 0.6022158142358476, "grad_norm": 0.5477713346481323, "learning_rate": 8.800729040097205e-05, "loss": 1.3743, "step": 1984 }, { "epoch": 0.6025193504325391, "grad_norm": 0.3772994875907898, "learning_rate": 8.800121506682869e-05, "loss": 1.5254, "step": 1985 }, { "epoch": 0.6028228866292306, "grad_norm": 0.4140057861804962, "learning_rate": 8.79951397326853e-05, "loss": 2.164, "step": 1986 }, { "epoch": 0.603126422825922, "grad_norm": 0.44529426097869873, "learning_rate": 8.798906439854192e-05, "loss": 1.8042, "step": 1987 }, { "epoch": 0.6034299590226134, "grad_norm": 0.39523541927337646, "learning_rate": 8.798298906439855e-05, "loss": 1.8134, "step": 1988 }, { "epoch": 0.6037334952193049, "grad_norm": 0.38513484597206116, "learning_rate": 8.797691373025517e-05, "loss": 1.8259, "step": 1989 }, { "epoch": 0.6040370314159964, "grad_norm": 0.4686470329761505, "learning_rate": 8.797083839611179e-05, "loss": 2.2113, "step": 1990 }, { "epoch": 0.6043405676126878, "grad_norm": 0.4119713008403778, "learning_rate": 8.796476306196842e-05, "loss": 1.9614, "step": 1991 }, { "epoch": 0.6046441038093793, "grad_norm": 0.40786707401275635, "learning_rate": 8.795868772782503e-05, "loss": 1.9369, "step": 1992 }, { "epoch": 0.6049476400060707, "grad_norm": 0.3869648873806, "learning_rate": 8.795261239368165e-05, "loss": 2.1204, "step": 1993 }, { "epoch": 0.6052511762027621, "grad_norm": 0.3826451301574707, "learning_rate": 8.794653705953828e-05, "loss": 2.0715, "step": 1994 }, { "epoch": 0.6055547123994537, "grad_norm": 0.38412514328956604, "learning_rate": 8.79404617253949e-05, "loss": 1.9482, "step": 1995 }, { "epoch": 0.6058582485961451, "grad_norm": 0.4388350248336792, "learning_rate": 8.793438639125152e-05, "loss": 1.9746, "step": 1996 }, { "epoch": 0.6061617847928366, "grad_norm": 0.3750387132167816, "learning_rate": 8.792831105710815e-05, "loss": 1.7831, "step": 1997 }, { "epoch": 0.606465320989528, "grad_norm": 0.42686113715171814, "learning_rate": 8.792223572296476e-05, "loss": 1.6419, "step": 1998 }, { "epoch": 0.6067688571862194, "grad_norm": 0.39653515815734863, "learning_rate": 8.79161603888214e-05, "loss": 1.7617, "step": 1999 }, { "epoch": 0.6070723933829109, "grad_norm": 0.4662545621395111, "learning_rate": 8.791008505467801e-05, "loss": 1.878, "step": 2000 }, { "epoch": 0.6073759295796024, "grad_norm": 0.4733245074748993, "learning_rate": 8.790400972053463e-05, "loss": 1.874, "step": 2001 }, { "epoch": 0.6076794657762938, "grad_norm": 0.4228340983390808, "learning_rate": 8.789793438639126e-05, "loss": 1.9415, "step": 2002 }, { "epoch": 0.6079830019729853, "grad_norm": 0.4229651391506195, "learning_rate": 8.789185905224788e-05, "loss": 1.5762, "step": 2003 }, { "epoch": 0.6082865381696767, "grad_norm": 0.4287284016609192, "learning_rate": 8.78857837181045e-05, "loss": 2.1414, "step": 2004 }, { "epoch": 0.6085900743663681, "grad_norm": 0.555934488773346, "learning_rate": 8.787970838396113e-05, "loss": 1.9591, "step": 2005 }, { "epoch": 0.6088936105630597, "grad_norm": 0.42303675413131714, "learning_rate": 8.787363304981774e-05, "loss": 1.5513, "step": 2006 }, { "epoch": 0.6091971467597511, "grad_norm": 0.37572699785232544, "learning_rate": 8.786755771567436e-05, "loss": 1.9508, "step": 2007 }, { "epoch": 0.6095006829564426, "grad_norm": 0.3933078944683075, "learning_rate": 8.786148238153099e-05, "loss": 1.9507, "step": 2008 }, { "epoch": 0.609804219153134, "grad_norm": 0.46419456601142883, "learning_rate": 8.785540704738761e-05, "loss": 2.0019, "step": 2009 }, { "epoch": 0.6101077553498254, "grad_norm": 0.38383206725120544, "learning_rate": 8.784933171324423e-05, "loss": 1.9533, "step": 2010 }, { "epoch": 0.610411291546517, "grad_norm": 0.37486881017684937, "learning_rate": 8.784325637910086e-05, "loss": 1.9508, "step": 2011 }, { "epoch": 0.6107148277432084, "grad_norm": 0.34909558296203613, "learning_rate": 8.783718104495747e-05, "loss": 1.8833, "step": 2012 }, { "epoch": 0.6110183639398998, "grad_norm": 0.6226269006729126, "learning_rate": 8.78311057108141e-05, "loss": 2.1135, "step": 2013 }, { "epoch": 0.6113219001365913, "grad_norm": 0.45638999342918396, "learning_rate": 8.782503037667072e-05, "loss": 1.8989, "step": 2014 }, { "epoch": 0.6116254363332827, "grad_norm": 0.41857293248176575, "learning_rate": 8.781895504252734e-05, "loss": 1.7609, "step": 2015 }, { "epoch": 0.6119289725299742, "grad_norm": 0.4325519800186157, "learning_rate": 8.781287970838397e-05, "loss": 1.6411, "step": 2016 }, { "epoch": 0.6122325087266657, "grad_norm": 0.3558877110481262, "learning_rate": 8.780680437424059e-05, "loss": 1.7634, "step": 2017 }, { "epoch": 0.6125360449233571, "grad_norm": 0.42849549651145935, "learning_rate": 8.78007290400972e-05, "loss": 1.8625, "step": 2018 }, { "epoch": 0.6128395811200485, "grad_norm": 0.7057125568389893, "learning_rate": 8.779465370595384e-05, "loss": 2.0542, "step": 2019 }, { "epoch": 0.61314311731674, "grad_norm": 0.3607623279094696, "learning_rate": 8.778857837181045e-05, "loss": 2.0613, "step": 2020 }, { "epoch": 0.6134466535134315, "grad_norm": 0.35904109477996826, "learning_rate": 8.778250303766707e-05, "loss": 2.1633, "step": 2021 }, { "epoch": 0.613750189710123, "grad_norm": 0.38341954350471497, "learning_rate": 8.77764277035237e-05, "loss": 1.925, "step": 2022 }, { "epoch": 0.6140537259068144, "grad_norm": 0.8183413743972778, "learning_rate": 8.777035236938032e-05, "loss": 1.5858, "step": 2023 }, { "epoch": 0.6143572621035058, "grad_norm": 0.4051649272441864, "learning_rate": 8.776427703523694e-05, "loss": 1.9788, "step": 2024 }, { "epoch": 0.6146607983001973, "grad_norm": 0.40388303995132446, "learning_rate": 8.775820170109357e-05, "loss": 1.9113, "step": 2025 }, { "epoch": 0.6149643344968888, "grad_norm": 0.38880276679992676, "learning_rate": 8.775212636695018e-05, "loss": 1.7535, "step": 2026 }, { "epoch": 0.6152678706935802, "grad_norm": 0.41596999764442444, "learning_rate": 8.774605103280682e-05, "loss": 1.9532, "step": 2027 }, { "epoch": 0.6155714068902717, "grad_norm": 0.3971737325191498, "learning_rate": 8.773997569866343e-05, "loss": 1.6123, "step": 2028 }, { "epoch": 0.6158749430869631, "grad_norm": 0.610409140586853, "learning_rate": 8.773390036452005e-05, "loss": 1.969, "step": 2029 }, { "epoch": 0.6161784792836545, "grad_norm": 0.4366918206214905, "learning_rate": 8.772782503037668e-05, "loss": 1.7944, "step": 2030 }, { "epoch": 0.616482015480346, "grad_norm": 0.3931274712085724, "learning_rate": 8.77217496962333e-05, "loss": 2.0559, "step": 2031 }, { "epoch": 0.6167855516770375, "grad_norm": 0.556197464466095, "learning_rate": 8.771567436208992e-05, "loss": 1.6191, "step": 2032 }, { "epoch": 0.617089087873729, "grad_norm": 0.4099692404270172, "learning_rate": 8.770959902794655e-05, "loss": 1.6357, "step": 2033 }, { "epoch": 0.6173926240704204, "grad_norm": 0.6582362055778503, "learning_rate": 8.770352369380316e-05, "loss": 2.1112, "step": 2034 }, { "epoch": 0.6176961602671118, "grad_norm": 0.43522998690605164, "learning_rate": 8.769744835965978e-05, "loss": 2.078, "step": 2035 }, { "epoch": 0.6179996964638033, "grad_norm": 0.3984440565109253, "learning_rate": 8.769137302551641e-05, "loss": 1.9548, "step": 2036 }, { "epoch": 0.6183032326604948, "grad_norm": 0.4203691780567169, "learning_rate": 8.768529769137303e-05, "loss": 2.0214, "step": 2037 }, { "epoch": 0.6186067688571862, "grad_norm": 0.4662054181098938, "learning_rate": 8.767922235722965e-05, "loss": 1.8141, "step": 2038 }, { "epoch": 0.6189103050538777, "grad_norm": 0.440121591091156, "learning_rate": 8.767314702308628e-05, "loss": 1.8577, "step": 2039 }, { "epoch": 0.6192138412505691, "grad_norm": 0.4438299536705017, "learning_rate": 8.76670716889429e-05, "loss": 1.5552, "step": 2040 }, { "epoch": 0.6195173774472605, "grad_norm": 0.3925747871398926, "learning_rate": 8.766099635479951e-05, "loss": 1.9399, "step": 2041 }, { "epoch": 0.6198209136439521, "grad_norm": 0.4043785333633423, "learning_rate": 8.765492102065614e-05, "loss": 1.9208, "step": 2042 }, { "epoch": 0.6201244498406435, "grad_norm": 0.4448244273662567, "learning_rate": 8.764884568651276e-05, "loss": 1.6187, "step": 2043 }, { "epoch": 0.620427986037335, "grad_norm": 0.5388829112052917, "learning_rate": 8.764277035236939e-05, "loss": 1.4629, "step": 2044 }, { "epoch": 0.6207315222340264, "grad_norm": 0.3737129867076874, "learning_rate": 8.763669501822601e-05, "loss": 1.9238, "step": 2045 }, { "epoch": 0.6210350584307178, "grad_norm": 0.4435792863368988, "learning_rate": 8.763061968408263e-05, "loss": 1.746, "step": 2046 }, { "epoch": 0.6213385946274094, "grad_norm": 0.3660859167575836, "learning_rate": 8.762454434993926e-05, "loss": 1.9025, "step": 2047 }, { "epoch": 0.6216421308241008, "grad_norm": 0.6945536136627197, "learning_rate": 8.761846901579587e-05, "loss": 2.0719, "step": 2048 }, { "epoch": 0.6219456670207922, "grad_norm": 0.5578482151031494, "learning_rate": 8.761239368165249e-05, "loss": 2.0642, "step": 2049 }, { "epoch": 0.6222492032174837, "grad_norm": 0.38080549240112305, "learning_rate": 8.760631834750912e-05, "loss": 1.9905, "step": 2050 }, { "epoch": 0.6225527394141751, "grad_norm": 0.39509710669517517, "learning_rate": 8.760024301336574e-05, "loss": 1.8086, "step": 2051 }, { "epoch": 0.6228562756108666, "grad_norm": 0.39778873324394226, "learning_rate": 8.759416767922236e-05, "loss": 1.8397, "step": 2052 }, { "epoch": 0.6231598118075581, "grad_norm": 0.4001278877258301, "learning_rate": 8.758809234507899e-05, "loss": 2.1541, "step": 2053 }, { "epoch": 0.6234633480042495, "grad_norm": 0.41478973627090454, "learning_rate": 8.75820170109356e-05, "loss": 1.8661, "step": 2054 }, { "epoch": 0.623766884200941, "grad_norm": 0.44780445098876953, "learning_rate": 8.757594167679222e-05, "loss": 2.06, "step": 2055 }, { "epoch": 0.6240704203976324, "grad_norm": 0.4024375081062317, "learning_rate": 8.756986634264885e-05, "loss": 1.8951, "step": 2056 }, { "epoch": 0.6243739565943238, "grad_norm": 0.48133009672164917, "learning_rate": 8.756379100850547e-05, "loss": 1.8264, "step": 2057 }, { "epoch": 0.6246774927910154, "grad_norm": 0.4362419843673706, "learning_rate": 8.75577156743621e-05, "loss": 1.584, "step": 2058 }, { "epoch": 0.6249810289877068, "grad_norm": 0.39468279480934143, "learning_rate": 8.755164034021872e-05, "loss": 2.1541, "step": 2059 }, { "epoch": 0.6252845651843982, "grad_norm": 0.3956018388271332, "learning_rate": 8.754556500607534e-05, "loss": 2.0688, "step": 2060 }, { "epoch": 0.6255881013810897, "grad_norm": 0.3778972327709198, "learning_rate": 8.753948967193197e-05, "loss": 1.2158, "step": 2061 }, { "epoch": 0.6258916375777811, "grad_norm": 0.6592405438423157, "learning_rate": 8.753341433778858e-05, "loss": 2.1388, "step": 2062 }, { "epoch": 0.6261951737744726, "grad_norm": 0.44248607754707336, "learning_rate": 8.75273390036452e-05, "loss": 1.7007, "step": 2063 }, { "epoch": 0.6264987099711641, "grad_norm": 0.40454086661338806, "learning_rate": 8.752126366950183e-05, "loss": 1.9904, "step": 2064 }, { "epoch": 0.6268022461678555, "grad_norm": 0.4150254428386688, "learning_rate": 8.751518833535845e-05, "loss": 1.825, "step": 2065 }, { "epoch": 0.627105782364547, "grad_norm": 0.39456769824028015, "learning_rate": 8.750911300121507e-05, "loss": 1.6171, "step": 2066 }, { "epoch": 0.6274093185612384, "grad_norm": 0.42913463711738586, "learning_rate": 8.75030376670717e-05, "loss": 1.9738, "step": 2067 }, { "epoch": 0.6277128547579299, "grad_norm": 0.6062834858894348, "learning_rate": 8.749696233292831e-05, "loss": 2.3641, "step": 2068 }, { "epoch": 0.6280163909546214, "grad_norm": 0.4486273229122162, "learning_rate": 8.749088699878493e-05, "loss": 1.895, "step": 2069 }, { "epoch": 0.6283199271513128, "grad_norm": 0.6650506854057312, "learning_rate": 8.748481166464156e-05, "loss": 1.9425, "step": 2070 }, { "epoch": 0.6286234633480042, "grad_norm": 0.4337095618247986, "learning_rate": 8.747873633049818e-05, "loss": 1.6244, "step": 2071 }, { "epoch": 0.6289269995446957, "grad_norm": 0.39554956555366516, "learning_rate": 8.747266099635481e-05, "loss": 1.9773, "step": 2072 }, { "epoch": 0.6292305357413872, "grad_norm": 0.6905329823493958, "learning_rate": 8.746658566221143e-05, "loss": 1.4572, "step": 2073 }, { "epoch": 0.6295340719380786, "grad_norm": 0.4814346730709076, "learning_rate": 8.746051032806805e-05, "loss": 1.5925, "step": 2074 }, { "epoch": 0.6298376081347701, "grad_norm": 0.5194016695022583, "learning_rate": 8.745443499392468e-05, "loss": 1.9103, "step": 2075 }, { "epoch": 0.6301411443314615, "grad_norm": 0.38328269124031067, "learning_rate": 8.74483596597813e-05, "loss": 2.0036, "step": 2076 }, { "epoch": 0.630444680528153, "grad_norm": 0.3967950642108917, "learning_rate": 8.744228432563791e-05, "loss": 2.0632, "step": 2077 }, { "epoch": 0.6307482167248445, "grad_norm": 0.41844338178634644, "learning_rate": 8.743620899149454e-05, "loss": 1.7965, "step": 2078 }, { "epoch": 0.6310517529215359, "grad_norm": 0.4322264790534973, "learning_rate": 8.743013365735116e-05, "loss": 1.6439, "step": 2079 }, { "epoch": 0.6313552891182274, "grad_norm": 1.2367935180664062, "learning_rate": 8.742405832320778e-05, "loss": 1.4027, "step": 2080 }, { "epoch": 0.6316588253149188, "grad_norm": 0.40163764357566833, "learning_rate": 8.741798298906441e-05, "loss": 1.6465, "step": 2081 }, { "epoch": 0.6319623615116102, "grad_norm": 0.4429662823677063, "learning_rate": 8.741190765492102e-05, "loss": 2.0381, "step": 2082 }, { "epoch": 0.6322658977083017, "grad_norm": 0.4150178134441376, "learning_rate": 8.740583232077764e-05, "loss": 1.9406, "step": 2083 }, { "epoch": 0.6325694339049932, "grad_norm": 1.1689107418060303, "learning_rate": 8.739975698663427e-05, "loss": 2.1465, "step": 2084 }, { "epoch": 0.6328729701016846, "grad_norm": 0.39959049224853516, "learning_rate": 8.739368165249089e-05, "loss": 1.7467, "step": 2085 }, { "epoch": 0.6331765062983761, "grad_norm": 0.4441443979740143, "learning_rate": 8.738760631834752e-05, "loss": 1.3684, "step": 2086 }, { "epoch": 0.6334800424950675, "grad_norm": 0.42959195375442505, "learning_rate": 8.738153098420414e-05, "loss": 2.0832, "step": 2087 }, { "epoch": 0.6337835786917589, "grad_norm": 0.5253334045410156, "learning_rate": 8.737545565006076e-05, "loss": 1.83, "step": 2088 }, { "epoch": 0.6340871148884505, "grad_norm": 0.4475717842578888, "learning_rate": 8.736938031591739e-05, "loss": 1.9088, "step": 2089 }, { "epoch": 0.6343906510851419, "grad_norm": 0.4162061810493469, "learning_rate": 8.736330498177399e-05, "loss": 1.6869, "step": 2090 }, { "epoch": 0.6346941872818334, "grad_norm": 0.41907912492752075, "learning_rate": 8.735722964763062e-05, "loss": 1.6695, "step": 2091 }, { "epoch": 0.6349977234785248, "grad_norm": 0.4472843110561371, "learning_rate": 8.735115431348725e-05, "loss": 2.213, "step": 2092 }, { "epoch": 0.6353012596752162, "grad_norm": 0.4260854125022888, "learning_rate": 8.734507897934387e-05, "loss": 2.2187, "step": 2093 }, { "epoch": 0.6356047958719078, "grad_norm": 0.5154047608375549, "learning_rate": 8.733900364520049e-05, "loss": 1.669, "step": 2094 }, { "epoch": 0.6359083320685992, "grad_norm": 0.42840951681137085, "learning_rate": 8.733292831105712e-05, "loss": 1.5714, "step": 2095 }, { "epoch": 0.6362118682652906, "grad_norm": 0.3721560537815094, "learning_rate": 8.732685297691373e-05, "loss": 1.8571, "step": 2096 }, { "epoch": 0.6365154044619821, "grad_norm": 0.38668882846832275, "learning_rate": 8.732077764277035e-05, "loss": 1.9735, "step": 2097 }, { "epoch": 0.6368189406586735, "grad_norm": 0.44400742650032043, "learning_rate": 8.731470230862698e-05, "loss": 1.8012, "step": 2098 }, { "epoch": 0.637122476855365, "grad_norm": 0.4170168936252594, "learning_rate": 8.73086269744836e-05, "loss": 2.0064, "step": 2099 }, { "epoch": 0.6374260130520565, "grad_norm": 0.4339911937713623, "learning_rate": 8.730255164034023e-05, "loss": 1.8095, "step": 2100 }, { "epoch": 0.6377295492487479, "grad_norm": 0.4953417479991913, "learning_rate": 8.729647630619685e-05, "loss": 1.6443, "step": 2101 }, { "epoch": 0.6380330854454394, "grad_norm": 0.43667685985565186, "learning_rate": 8.729040097205347e-05, "loss": 1.5005, "step": 2102 }, { "epoch": 0.6383366216421308, "grad_norm": 0.42101868987083435, "learning_rate": 8.72843256379101e-05, "loss": 1.9, "step": 2103 }, { "epoch": 0.6386401578388223, "grad_norm": 0.4094242751598358, "learning_rate": 8.72782503037667e-05, "loss": 1.4354, "step": 2104 }, { "epoch": 0.6389436940355138, "grad_norm": 0.36078140139579773, "learning_rate": 8.727217496962333e-05, "loss": 1.9761, "step": 2105 }, { "epoch": 0.6392472302322052, "grad_norm": 0.40915626287460327, "learning_rate": 8.726609963547996e-05, "loss": 1.7398, "step": 2106 }, { "epoch": 0.6395507664288966, "grad_norm": 0.4518681466579437, "learning_rate": 8.726002430133658e-05, "loss": 1.6962, "step": 2107 }, { "epoch": 0.6398543026255881, "grad_norm": 0.41864755749702454, "learning_rate": 8.72539489671932e-05, "loss": 1.7477, "step": 2108 }, { "epoch": 0.6401578388222796, "grad_norm": 0.37776780128479004, "learning_rate": 8.724787363304983e-05, "loss": 1.4601, "step": 2109 }, { "epoch": 0.640461375018971, "grad_norm": 0.4602903723716736, "learning_rate": 8.724179829890644e-05, "loss": 1.7791, "step": 2110 }, { "epoch": 0.6407649112156625, "grad_norm": 0.3697658181190491, "learning_rate": 8.723572296476306e-05, "loss": 1.8286, "step": 2111 }, { "epoch": 0.6410684474123539, "grad_norm": 0.3810010254383087, "learning_rate": 8.722964763061969e-05, "loss": 1.6823, "step": 2112 }, { "epoch": 0.6413719836090453, "grad_norm": 0.3020067811012268, "learning_rate": 8.722357229647631e-05, "loss": 1.4566, "step": 2113 }, { "epoch": 0.6416755198057368, "grad_norm": 0.35782814025878906, "learning_rate": 8.721749696233293e-05, "loss": 1.986, "step": 2114 }, { "epoch": 0.6419790560024283, "grad_norm": 0.4075436294078827, "learning_rate": 8.721142162818954e-05, "loss": 2.0318, "step": 2115 }, { "epoch": 0.6422825921991198, "grad_norm": 0.3835841715335846, "learning_rate": 8.720534629404618e-05, "loss": 1.7135, "step": 2116 }, { "epoch": 0.6425861283958112, "grad_norm": 0.45285987854003906, "learning_rate": 8.71992709599028e-05, "loss": 1.9568, "step": 2117 }, { "epoch": 0.6428896645925026, "grad_norm": 0.36824312806129456, "learning_rate": 8.719319562575941e-05, "loss": 1.6443, "step": 2118 }, { "epoch": 0.6431932007891941, "grad_norm": 0.4950961172580719, "learning_rate": 8.718712029161604e-05, "loss": 2.1261, "step": 2119 }, { "epoch": 0.6434967369858856, "grad_norm": 0.36859118938446045, "learning_rate": 8.718104495747267e-05, "loss": 2.0044, "step": 2120 }, { "epoch": 0.643800273182577, "grad_norm": 0.43870800733566284, "learning_rate": 8.717496962332929e-05, "loss": 1.9407, "step": 2121 }, { "epoch": 0.6441038093792685, "grad_norm": 0.37381303310394287, "learning_rate": 8.71688942891859e-05, "loss": 2.1765, "step": 2122 }, { "epoch": 0.6444073455759599, "grad_norm": 0.39354661107063293, "learning_rate": 8.716281895504254e-05, "loss": 1.7037, "step": 2123 }, { "epoch": 0.6447108817726513, "grad_norm": 0.3997972011566162, "learning_rate": 8.715674362089915e-05, "loss": 1.9634, "step": 2124 }, { "epoch": 0.6450144179693429, "grad_norm": 0.4059608280658722, "learning_rate": 8.715066828675577e-05, "loss": 1.6695, "step": 2125 }, { "epoch": 0.6453179541660343, "grad_norm": 0.5082445740699768, "learning_rate": 8.71445929526124e-05, "loss": 1.8471, "step": 2126 }, { "epoch": 0.6456214903627258, "grad_norm": 0.3610053062438965, "learning_rate": 8.713851761846902e-05, "loss": 1.1613, "step": 2127 }, { "epoch": 0.6459250265594172, "grad_norm": 0.3617028295993805, "learning_rate": 8.713244228432564e-05, "loss": 2.1392, "step": 2128 }, { "epoch": 0.6462285627561086, "grad_norm": 0.366720587015152, "learning_rate": 8.712636695018225e-05, "loss": 2.0723, "step": 2129 }, { "epoch": 0.6465320989528002, "grad_norm": 0.6331523656845093, "learning_rate": 8.712029161603889e-05, "loss": 2.167, "step": 2130 }, { "epoch": 0.6468356351494916, "grad_norm": 0.3837411403656006, "learning_rate": 8.711421628189552e-05, "loss": 1.9423, "step": 2131 }, { "epoch": 0.647139171346183, "grad_norm": 0.49465852975845337, "learning_rate": 8.710814094775212e-05, "loss": 1.1904, "step": 2132 }, { "epoch": 0.6474427075428745, "grad_norm": 0.37504327297210693, "learning_rate": 8.710206561360875e-05, "loss": 1.3686, "step": 2133 }, { "epoch": 0.6477462437395659, "grad_norm": 0.7189307808876038, "learning_rate": 8.709599027946538e-05, "loss": 2.2359, "step": 2134 }, { "epoch": 0.6480497799362575, "grad_norm": 0.40414321422576904, "learning_rate": 8.7089914945322e-05, "loss": 1.9962, "step": 2135 }, { "epoch": 0.6483533161329489, "grad_norm": 1.6091177463531494, "learning_rate": 8.708383961117862e-05, "loss": 2.1011, "step": 2136 }, { "epoch": 0.6486568523296403, "grad_norm": 0.38812699913978577, "learning_rate": 8.707776427703525e-05, "loss": 1.8092, "step": 2137 }, { "epoch": 0.6489603885263318, "grad_norm": 0.42820391058921814, "learning_rate": 8.707168894289186e-05, "loss": 1.6027, "step": 2138 }, { "epoch": 0.6492639247230232, "grad_norm": 0.9884753823280334, "learning_rate": 8.706561360874848e-05, "loss": 1.4781, "step": 2139 }, { "epoch": 0.6495674609197146, "grad_norm": 0.477003276348114, "learning_rate": 8.705953827460511e-05, "loss": 1.5069, "step": 2140 }, { "epoch": 0.6498709971164062, "grad_norm": 0.4502262473106384, "learning_rate": 8.705346294046173e-05, "loss": 2.0444, "step": 2141 }, { "epoch": 0.6501745333130976, "grad_norm": 0.36842817068099976, "learning_rate": 8.704738760631835e-05, "loss": 1.8169, "step": 2142 }, { "epoch": 0.650478069509789, "grad_norm": 0.4413151741027832, "learning_rate": 8.704131227217496e-05, "loss": 2.0494, "step": 2143 }, { "epoch": 0.6507816057064805, "grad_norm": 0.35122597217559814, "learning_rate": 8.70352369380316e-05, "loss": 1.6942, "step": 2144 }, { "epoch": 0.6510851419031719, "grad_norm": 0.48351892828941345, "learning_rate": 8.702916160388823e-05, "loss": 2.3677, "step": 2145 }, { "epoch": 0.6513886780998634, "grad_norm": 0.43341419100761414, "learning_rate": 8.702308626974483e-05, "loss": 1.9239, "step": 2146 }, { "epoch": 0.6516922142965549, "grad_norm": 0.36051031947135925, "learning_rate": 8.701701093560146e-05, "loss": 1.7721, "step": 2147 }, { "epoch": 0.6519957504932463, "grad_norm": 0.37466931343078613, "learning_rate": 8.701093560145809e-05, "loss": 2.0199, "step": 2148 }, { "epoch": 0.6522992866899378, "grad_norm": 0.4176545739173889, "learning_rate": 8.700486026731471e-05, "loss": 1.8806, "step": 2149 }, { "epoch": 0.6526028228866292, "grad_norm": 0.4158160984516144, "learning_rate": 8.699878493317133e-05, "loss": 1.4032, "step": 2150 }, { "epoch": 0.6529063590833207, "grad_norm": 0.3781472444534302, "learning_rate": 8.699270959902796e-05, "loss": 1.6158, "step": 2151 }, { "epoch": 0.6532098952800122, "grad_norm": 0.4139382243156433, "learning_rate": 8.698663426488457e-05, "loss": 1.6677, "step": 2152 }, { "epoch": 0.6535134314767036, "grad_norm": 0.5988966226577759, "learning_rate": 8.698055893074119e-05, "loss": 2.0821, "step": 2153 }, { "epoch": 0.653816967673395, "grad_norm": 0.3822804391384125, "learning_rate": 8.697448359659782e-05, "loss": 2.2819, "step": 2154 }, { "epoch": 0.6541205038700865, "grad_norm": 0.42142486572265625, "learning_rate": 8.696840826245444e-05, "loss": 2.1515, "step": 2155 }, { "epoch": 0.654424040066778, "grad_norm": 0.3964162766933441, "learning_rate": 8.696233292831106e-05, "loss": 1.6801, "step": 2156 }, { "epoch": 0.6547275762634694, "grad_norm": 0.6642559170722961, "learning_rate": 8.695625759416767e-05, "loss": 1.9085, "step": 2157 }, { "epoch": 0.6550311124601609, "grad_norm": 0.4267200827598572, "learning_rate": 8.69501822600243e-05, "loss": 1.8194, "step": 2158 }, { "epoch": 0.6553346486568523, "grad_norm": 0.4862426221370697, "learning_rate": 8.694410692588094e-05, "loss": 2.0361, "step": 2159 }, { "epoch": 0.6556381848535437, "grad_norm": 0.45392298698425293, "learning_rate": 8.693803159173754e-05, "loss": 2.034, "step": 2160 }, { "epoch": 0.6559417210502353, "grad_norm": 0.4699818193912506, "learning_rate": 8.693195625759417e-05, "loss": 1.0096, "step": 2161 }, { "epoch": 0.6562452572469267, "grad_norm": 0.4601641595363617, "learning_rate": 8.69258809234508e-05, "loss": 1.9042, "step": 2162 }, { "epoch": 0.6565487934436182, "grad_norm": 0.3832731544971466, "learning_rate": 8.69198055893074e-05, "loss": 1.7728, "step": 2163 }, { "epoch": 0.6568523296403096, "grad_norm": 0.41405048966407776, "learning_rate": 8.691373025516404e-05, "loss": 1.8591, "step": 2164 }, { "epoch": 0.657155865837001, "grad_norm": 0.4332970380783081, "learning_rate": 8.690765492102067e-05, "loss": 1.4433, "step": 2165 }, { "epoch": 0.6574594020336925, "grad_norm": 0.38901615142822266, "learning_rate": 8.690157958687728e-05, "loss": 1.9933, "step": 2166 }, { "epoch": 0.657762938230384, "grad_norm": 0.5068726539611816, "learning_rate": 8.68955042527339e-05, "loss": 1.954, "step": 2167 }, { "epoch": 0.6580664744270754, "grad_norm": 0.4076615571975708, "learning_rate": 8.688942891859053e-05, "loss": 1.699, "step": 2168 }, { "epoch": 0.6583700106237669, "grad_norm": 0.38633993268013, "learning_rate": 8.688335358444715e-05, "loss": 1.8453, "step": 2169 }, { "epoch": 0.6586735468204583, "grad_norm": 0.3873181641101837, "learning_rate": 8.687727825030377e-05, "loss": 1.8966, "step": 2170 }, { "epoch": 0.6589770830171497, "grad_norm": 0.4472099840641022, "learning_rate": 8.687120291616038e-05, "loss": 2.1017, "step": 2171 }, { "epoch": 0.6592806192138413, "grad_norm": 0.34563758969306946, "learning_rate": 8.686512758201702e-05, "loss": 1.6238, "step": 2172 }, { "epoch": 0.6595841554105327, "grad_norm": 0.4515549838542938, "learning_rate": 8.685905224787365e-05, "loss": 1.7878, "step": 2173 }, { "epoch": 0.6598876916072242, "grad_norm": 0.6528467535972595, "learning_rate": 8.685297691373025e-05, "loss": 1.7581, "step": 2174 }, { "epoch": 0.6601912278039156, "grad_norm": 0.345264732837677, "learning_rate": 8.684690157958688e-05, "loss": 1.2984, "step": 2175 }, { "epoch": 0.660494764000607, "grad_norm": 0.3934096395969391, "learning_rate": 8.684082624544351e-05, "loss": 2.084, "step": 2176 }, { "epoch": 0.6607983001972986, "grad_norm": 0.3595477044582367, "learning_rate": 8.683475091130012e-05, "loss": 1.9587, "step": 2177 }, { "epoch": 0.66110183639399, "grad_norm": 0.4324481189250946, "learning_rate": 8.682867557715675e-05, "loss": 1.8877, "step": 2178 }, { "epoch": 0.6614053725906814, "grad_norm": 0.4493394196033478, "learning_rate": 8.682260024301338e-05, "loss": 1.8291, "step": 2179 }, { "epoch": 0.6617089087873729, "grad_norm": 0.4085356891155243, "learning_rate": 8.681652490887e-05, "loss": 2.1646, "step": 2180 }, { "epoch": 0.6620124449840643, "grad_norm": 0.4380393624305725, "learning_rate": 8.681044957472661e-05, "loss": 1.3779, "step": 2181 }, { "epoch": 0.6623159811807559, "grad_norm": 0.3621211349964142, "learning_rate": 8.680437424058324e-05, "loss": 1.7786, "step": 2182 }, { "epoch": 0.6626195173774473, "grad_norm": 0.49654054641723633, "learning_rate": 8.679829890643986e-05, "loss": 1.2754, "step": 2183 }, { "epoch": 0.6629230535741387, "grad_norm": 0.49035829305648804, "learning_rate": 8.679222357229648e-05, "loss": 1.4453, "step": 2184 }, { "epoch": 0.6632265897708302, "grad_norm": 0.5359811782836914, "learning_rate": 8.67861482381531e-05, "loss": 1.4067, "step": 2185 }, { "epoch": 0.6635301259675216, "grad_norm": 0.4120253622531891, "learning_rate": 8.678007290400973e-05, "loss": 2.235, "step": 2186 }, { "epoch": 0.6638336621642131, "grad_norm": 0.3773285448551178, "learning_rate": 8.677399756986634e-05, "loss": 2.1406, "step": 2187 }, { "epoch": 0.6641371983609046, "grad_norm": 0.3956649899482727, "learning_rate": 8.676792223572296e-05, "loss": 1.5831, "step": 2188 }, { "epoch": 0.664440734557596, "grad_norm": 0.3894088864326477, "learning_rate": 8.676184690157959e-05, "loss": 1.9276, "step": 2189 }, { "epoch": 0.6647442707542874, "grad_norm": 0.4932451546192169, "learning_rate": 8.675577156743622e-05, "loss": 2.1134, "step": 2190 }, { "epoch": 0.6650478069509789, "grad_norm": 0.41271400451660156, "learning_rate": 8.674969623329283e-05, "loss": 2.1299, "step": 2191 }, { "epoch": 0.6653513431476703, "grad_norm": 0.596319317817688, "learning_rate": 8.674362089914946e-05, "loss": 1.4789, "step": 2192 }, { "epoch": 0.6656548793443618, "grad_norm": 0.4255685806274414, "learning_rate": 8.673754556500609e-05, "loss": 1.7751, "step": 2193 }, { "epoch": 0.6659584155410533, "grad_norm": 0.371003657579422, "learning_rate": 8.67314702308627e-05, "loss": 1.9355, "step": 2194 }, { "epoch": 0.6662619517377447, "grad_norm": 0.43426600098609924, "learning_rate": 8.672539489671932e-05, "loss": 2.123, "step": 2195 }, { "epoch": 0.6665654879344362, "grad_norm": 0.40644875168800354, "learning_rate": 8.671931956257595e-05, "loss": 2.1907, "step": 2196 }, { "epoch": 0.6668690241311276, "grad_norm": 0.4468652904033661, "learning_rate": 8.671324422843257e-05, "loss": 1.8754, "step": 2197 }, { "epoch": 0.6671725603278191, "grad_norm": 0.34468400478363037, "learning_rate": 8.670716889428919e-05, "loss": 1.989, "step": 2198 }, { "epoch": 0.6674760965245106, "grad_norm": 0.430462121963501, "learning_rate": 8.67010935601458e-05, "loss": 1.6021, "step": 2199 }, { "epoch": 0.667779632721202, "grad_norm": 0.42845168709754944, "learning_rate": 8.669501822600244e-05, "loss": 2.1036, "step": 2200 }, { "epoch": 0.6680831689178934, "grad_norm": 0.4242333769798279, "learning_rate": 8.668894289185905e-05, "loss": 1.8518, "step": 2201 }, { "epoch": 0.6683867051145849, "grad_norm": 0.3754311800003052, "learning_rate": 8.668286755771567e-05, "loss": 1.8616, "step": 2202 }, { "epoch": 0.6686902413112764, "grad_norm": 0.39913347363471985, "learning_rate": 8.66767922235723e-05, "loss": 2.2196, "step": 2203 }, { "epoch": 0.6689937775079678, "grad_norm": 0.3791050910949707, "learning_rate": 8.667071688942893e-05, "loss": 1.8651, "step": 2204 }, { "epoch": 0.6692973137046593, "grad_norm": 0.46585163474082947, "learning_rate": 8.666464155528554e-05, "loss": 2.0577, "step": 2205 }, { "epoch": 0.6696008499013507, "grad_norm": 0.4098934233188629, "learning_rate": 8.665856622114217e-05, "loss": 2.0225, "step": 2206 }, { "epoch": 0.6699043860980421, "grad_norm": 0.3545132577419281, "learning_rate": 8.66524908869988e-05, "loss": 1.8955, "step": 2207 }, { "epoch": 0.6702079222947337, "grad_norm": 0.4183339774608612, "learning_rate": 8.664641555285542e-05, "loss": 2.1093, "step": 2208 }, { "epoch": 0.6705114584914251, "grad_norm": 0.378439724445343, "learning_rate": 8.664034021871203e-05, "loss": 1.8865, "step": 2209 }, { "epoch": 0.6708149946881166, "grad_norm": 0.45795947313308716, "learning_rate": 8.663426488456865e-05, "loss": 1.8891, "step": 2210 }, { "epoch": 0.671118530884808, "grad_norm": 0.3554634153842926, "learning_rate": 8.662818955042528e-05, "loss": 2.1196, "step": 2211 }, { "epoch": 0.6714220670814994, "grad_norm": 0.4456568956375122, "learning_rate": 8.66221142162819e-05, "loss": 1.6737, "step": 2212 }, { "epoch": 0.671725603278191, "grad_norm": 0.39127472043037415, "learning_rate": 8.661603888213852e-05, "loss": 1.989, "step": 2213 }, { "epoch": 0.6720291394748824, "grad_norm": 0.4240843653678894, "learning_rate": 8.660996354799515e-05, "loss": 2.0285, "step": 2214 }, { "epoch": 0.6723326756715738, "grad_norm": 0.40605032444000244, "learning_rate": 8.660388821385176e-05, "loss": 1.6815, "step": 2215 }, { "epoch": 0.6726362118682653, "grad_norm": 0.4075249433517456, "learning_rate": 8.659781287970838e-05, "loss": 1.8847, "step": 2216 }, { "epoch": 0.6729397480649567, "grad_norm": 0.38832414150238037, "learning_rate": 8.659173754556501e-05, "loss": 1.7957, "step": 2217 }, { "epoch": 0.6732432842616483, "grad_norm": 0.40097537636756897, "learning_rate": 8.658566221142164e-05, "loss": 1.7386, "step": 2218 }, { "epoch": 0.6735468204583397, "grad_norm": 0.41220805048942566, "learning_rate": 8.657958687727825e-05, "loss": 2.1473, "step": 2219 }, { "epoch": 0.6738503566550311, "grad_norm": 0.4157550036907196, "learning_rate": 8.657351154313488e-05, "loss": 1.7435, "step": 2220 }, { "epoch": 0.6741538928517226, "grad_norm": 0.9074850082397461, "learning_rate": 8.656743620899151e-05, "loss": 1.7336, "step": 2221 }, { "epoch": 0.674457429048414, "grad_norm": 0.4011635482311249, "learning_rate": 8.656136087484813e-05, "loss": 1.9814, "step": 2222 }, { "epoch": 0.6747609652451054, "grad_norm": 0.4295683801174164, "learning_rate": 8.655528554070474e-05, "loss": 1.6332, "step": 2223 }, { "epoch": 0.675064501441797, "grad_norm": 0.424452006816864, "learning_rate": 8.654921020656136e-05, "loss": 1.6975, "step": 2224 }, { "epoch": 0.6753680376384884, "grad_norm": 0.3975834846496582, "learning_rate": 8.654313487241799e-05, "loss": 1.6783, "step": 2225 }, { "epoch": 0.6756715738351798, "grad_norm": 0.49879249930381775, "learning_rate": 8.653705953827461e-05, "loss": 2.0694, "step": 2226 }, { "epoch": 0.6759751100318713, "grad_norm": 0.424622505903244, "learning_rate": 8.653098420413123e-05, "loss": 1.7089, "step": 2227 }, { "epoch": 0.6762786462285627, "grad_norm": 0.497159868478775, "learning_rate": 8.652490886998786e-05, "loss": 1.8934, "step": 2228 }, { "epoch": 0.6765821824252543, "grad_norm": 0.40200483798980713, "learning_rate": 8.651883353584447e-05, "loss": 2.0158, "step": 2229 }, { "epoch": 0.6768857186219457, "grad_norm": 0.4294535219669342, "learning_rate": 8.651275820170109e-05, "loss": 1.815, "step": 2230 }, { "epoch": 0.6771892548186371, "grad_norm": 0.5176182389259338, "learning_rate": 8.650668286755772e-05, "loss": 2.0518, "step": 2231 }, { "epoch": 0.6774927910153286, "grad_norm": 0.44558650255203247, "learning_rate": 8.650060753341435e-05, "loss": 1.7201, "step": 2232 }, { "epoch": 0.67779632721202, "grad_norm": 0.38811054825782776, "learning_rate": 8.649453219927096e-05, "loss": 1.5197, "step": 2233 }, { "epoch": 0.6780998634087115, "grad_norm": 0.3874174952507019, "learning_rate": 8.648845686512759e-05, "loss": 1.8105, "step": 2234 }, { "epoch": 0.678403399605403, "grad_norm": 0.6453530788421631, "learning_rate": 8.648238153098422e-05, "loss": 1.585, "step": 2235 }, { "epoch": 0.6787069358020944, "grad_norm": 0.4314938485622406, "learning_rate": 8.647630619684082e-05, "loss": 1.7869, "step": 2236 }, { "epoch": 0.6790104719987858, "grad_norm": 0.37230706214904785, "learning_rate": 8.647023086269745e-05, "loss": 1.5571, "step": 2237 }, { "epoch": 0.6793140081954773, "grad_norm": 0.47215935587882996, "learning_rate": 8.646415552855407e-05, "loss": 2.0845, "step": 2238 }, { "epoch": 0.6796175443921688, "grad_norm": 0.4179088771343231, "learning_rate": 8.64580801944107e-05, "loss": 2.0181, "step": 2239 }, { "epoch": 0.6799210805888602, "grad_norm": 0.7629103660583496, "learning_rate": 8.645200486026732e-05, "loss": 2.0156, "step": 2240 }, { "epoch": 0.6802246167855517, "grad_norm": 0.3792973756790161, "learning_rate": 8.644592952612394e-05, "loss": 1.9137, "step": 2241 }, { "epoch": 0.6805281529822431, "grad_norm": 0.38583695888519287, "learning_rate": 8.643985419198057e-05, "loss": 1.8321, "step": 2242 }, { "epoch": 0.6808316891789346, "grad_norm": 0.4620136320590973, "learning_rate": 8.643377885783718e-05, "loss": 1.4017, "step": 2243 }, { "epoch": 0.6811352253756261, "grad_norm": 0.47091394662857056, "learning_rate": 8.64277035236938e-05, "loss": 2.2232, "step": 2244 }, { "epoch": 0.6814387615723175, "grad_norm": 0.3809249699115753, "learning_rate": 8.642162818955043e-05, "loss": 1.7188, "step": 2245 }, { "epoch": 0.681742297769009, "grad_norm": 0.4558849334716797, "learning_rate": 8.641555285540706e-05, "loss": 1.7074, "step": 2246 }, { "epoch": 0.6820458339657004, "grad_norm": 0.39358267188072205, "learning_rate": 8.640947752126367e-05, "loss": 1.6826, "step": 2247 }, { "epoch": 0.6823493701623918, "grad_norm": 0.5007576942443848, "learning_rate": 8.64034021871203e-05, "loss": 1.683, "step": 2248 }, { "epoch": 0.6826529063590833, "grad_norm": 0.43831315636634827, "learning_rate": 8.639732685297693e-05, "loss": 1.4773, "step": 2249 }, { "epoch": 0.6829564425557748, "grad_norm": 0.41979843378067017, "learning_rate": 8.639125151883353e-05, "loss": 1.8137, "step": 2250 }, { "epoch": 0.6832599787524662, "grad_norm": 0.4662984311580658, "learning_rate": 8.638517618469016e-05, "loss": 1.6978, "step": 2251 }, { "epoch": 0.6835635149491577, "grad_norm": 0.4381478428840637, "learning_rate": 8.637910085054678e-05, "loss": 2.0157, "step": 2252 }, { "epoch": 0.6838670511458491, "grad_norm": 0.7363194823265076, "learning_rate": 8.637302551640341e-05, "loss": 1.5466, "step": 2253 }, { "epoch": 0.6841705873425405, "grad_norm": 0.5327618718147278, "learning_rate": 8.636695018226003e-05, "loss": 1.8425, "step": 2254 }, { "epoch": 0.6844741235392321, "grad_norm": 0.4380737245082855, "learning_rate": 8.636087484811665e-05, "loss": 1.7879, "step": 2255 }, { "epoch": 0.6847776597359235, "grad_norm": 0.8782259821891785, "learning_rate": 8.635479951397328e-05, "loss": 1.8806, "step": 2256 }, { "epoch": 0.685081195932615, "grad_norm": 0.3841392397880554, "learning_rate": 8.63487241798299e-05, "loss": 2.014, "step": 2257 }, { "epoch": 0.6853847321293064, "grad_norm": 0.39896446466445923, "learning_rate": 8.634264884568651e-05, "loss": 1.9356, "step": 2258 }, { "epoch": 0.6856882683259978, "grad_norm": 0.41541773080825806, "learning_rate": 8.633657351154314e-05, "loss": 2.1365, "step": 2259 }, { "epoch": 0.6859918045226894, "grad_norm": 0.453948438167572, "learning_rate": 8.633049817739976e-05, "loss": 1.9459, "step": 2260 }, { "epoch": 0.6862953407193808, "grad_norm": 0.6398829221725464, "learning_rate": 8.632442284325638e-05, "loss": 1.3556, "step": 2261 }, { "epoch": 0.6865988769160722, "grad_norm": 0.43574538826942444, "learning_rate": 8.631834750911301e-05, "loss": 1.9142, "step": 2262 }, { "epoch": 0.6869024131127637, "grad_norm": 0.39180728793144226, "learning_rate": 8.631227217496964e-05, "loss": 1.8198, "step": 2263 }, { "epoch": 0.6872059493094551, "grad_norm": 0.4146488904953003, "learning_rate": 8.630619684082624e-05, "loss": 1.7776, "step": 2264 }, { "epoch": 0.6875094855061467, "grad_norm": 0.3681737184524536, "learning_rate": 8.630012150668287e-05, "loss": 1.4926, "step": 2265 }, { "epoch": 0.6878130217028381, "grad_norm": 0.44278883934020996, "learning_rate": 8.629404617253949e-05, "loss": 1.8021, "step": 2266 }, { "epoch": 0.6881165578995295, "grad_norm": 0.4687512218952179, "learning_rate": 8.628797083839612e-05, "loss": 1.75, "step": 2267 }, { "epoch": 0.688420094096221, "grad_norm": 0.4102340042591095, "learning_rate": 8.628189550425274e-05, "loss": 1.9249, "step": 2268 }, { "epoch": 0.6887236302929124, "grad_norm": 0.44898685812950134, "learning_rate": 8.627582017010936e-05, "loss": 1.9914, "step": 2269 }, { "epoch": 0.6890271664896039, "grad_norm": 0.451225221157074, "learning_rate": 8.626974483596599e-05, "loss": 1.71, "step": 2270 }, { "epoch": 0.6893307026862954, "grad_norm": 0.7062796950340271, "learning_rate": 8.62636695018226e-05, "loss": 1.2907, "step": 2271 }, { "epoch": 0.6896342388829868, "grad_norm": 0.39842337369918823, "learning_rate": 8.625759416767922e-05, "loss": 1.6578, "step": 2272 }, { "epoch": 0.6899377750796782, "grad_norm": 0.33577829599380493, "learning_rate": 8.625151883353585e-05, "loss": 1.7332, "step": 2273 }, { "epoch": 0.6902413112763697, "grad_norm": 0.43298929929733276, "learning_rate": 8.624544349939247e-05, "loss": 2.0028, "step": 2274 }, { "epoch": 0.6905448474730611, "grad_norm": 0.4451911449432373, "learning_rate": 8.623936816524909e-05, "loss": 1.4601, "step": 2275 }, { "epoch": 0.6908483836697527, "grad_norm": 0.4683527946472168, "learning_rate": 8.623329283110572e-05, "loss": 1.5981, "step": 2276 }, { "epoch": 0.6911519198664441, "grad_norm": 0.4420105814933777, "learning_rate": 8.622721749696235e-05, "loss": 1.6975, "step": 2277 }, { "epoch": 0.6914554560631355, "grad_norm": 0.3732719421386719, "learning_rate": 8.622114216281895e-05, "loss": 2.1493, "step": 2278 }, { "epoch": 0.691758992259827, "grad_norm": 0.4039726257324219, "learning_rate": 8.621506682867558e-05, "loss": 2.0208, "step": 2279 }, { "epoch": 0.6920625284565184, "grad_norm": 0.35387054085731506, "learning_rate": 8.62089914945322e-05, "loss": 2.0324, "step": 2280 }, { "epoch": 0.6923660646532099, "grad_norm": 0.4533388912677765, "learning_rate": 8.620291616038883e-05, "loss": 1.7281, "step": 2281 }, { "epoch": 0.6926696008499014, "grad_norm": 0.37299293279647827, "learning_rate": 8.619684082624545e-05, "loss": 1.9662, "step": 2282 }, { "epoch": 0.6929731370465928, "grad_norm": 0.41872239112854004, "learning_rate": 8.619076549210207e-05, "loss": 1.9887, "step": 2283 }, { "epoch": 0.6932766732432842, "grad_norm": 0.8140760064125061, "learning_rate": 8.61846901579587e-05, "loss": 2.1649, "step": 2284 }, { "epoch": 0.6935802094399757, "grad_norm": 0.3966423571109772, "learning_rate": 8.617861482381531e-05, "loss": 1.421, "step": 2285 }, { "epoch": 0.6938837456366672, "grad_norm": 0.36617428064346313, "learning_rate": 8.617253948967193e-05, "loss": 1.8174, "step": 2286 }, { "epoch": 0.6941872818333586, "grad_norm": 0.41297128796577454, "learning_rate": 8.616646415552856e-05, "loss": 1.9215, "step": 2287 }, { "epoch": 0.6944908180300501, "grad_norm": 0.48277321457862854, "learning_rate": 8.616038882138518e-05, "loss": 1.3551, "step": 2288 }, { "epoch": 0.6947943542267415, "grad_norm": 0.41190510988235474, "learning_rate": 8.61543134872418e-05, "loss": 1.8701, "step": 2289 }, { "epoch": 0.695097890423433, "grad_norm": 0.34471115469932556, "learning_rate": 8.614823815309843e-05, "loss": 1.5073, "step": 2290 }, { "epoch": 0.6954014266201245, "grad_norm": 0.4469250738620758, "learning_rate": 8.614216281895504e-05, "loss": 1.8257, "step": 2291 }, { "epoch": 0.6957049628168159, "grad_norm": 0.38356101512908936, "learning_rate": 8.613608748481166e-05, "loss": 1.63, "step": 2292 }, { "epoch": 0.6960084990135074, "grad_norm": 0.3836432099342346, "learning_rate": 8.613001215066829e-05, "loss": 1.6294, "step": 2293 }, { "epoch": 0.6963120352101988, "grad_norm": 1.1250473260879517, "learning_rate": 8.612393681652491e-05, "loss": 1.0634, "step": 2294 }, { "epoch": 0.6966155714068902, "grad_norm": 0.39849042892456055, "learning_rate": 8.611786148238154e-05, "loss": 1.4908, "step": 2295 }, { "epoch": 0.6969191076035818, "grad_norm": 1.0617260932922363, "learning_rate": 8.611178614823816e-05, "loss": 2.0693, "step": 2296 }, { "epoch": 0.6972226438002732, "grad_norm": 0.44789618253707886, "learning_rate": 8.610571081409478e-05, "loss": 1.7397, "step": 2297 }, { "epoch": 0.6975261799969646, "grad_norm": 0.7480859756469727, "learning_rate": 8.60996354799514e-05, "loss": 1.8385, "step": 2298 }, { "epoch": 0.6978297161936561, "grad_norm": 0.3201582133769989, "learning_rate": 8.609356014580802e-05, "loss": 1.4323, "step": 2299 }, { "epoch": 0.6981332523903475, "grad_norm": 0.4212173521518707, "learning_rate": 8.608748481166464e-05, "loss": 1.6153, "step": 2300 }, { "epoch": 0.6984367885870391, "grad_norm": 0.39297157526016235, "learning_rate": 8.608140947752127e-05, "loss": 1.9207, "step": 2301 }, { "epoch": 0.6987403247837305, "grad_norm": 0.4868420660495758, "learning_rate": 8.607533414337789e-05, "loss": 1.8056, "step": 2302 }, { "epoch": 0.6990438609804219, "grad_norm": 0.518147885799408, "learning_rate": 8.60692588092345e-05, "loss": 1.4521, "step": 2303 }, { "epoch": 0.6993473971771134, "grad_norm": 0.4484739899635315, "learning_rate": 8.606318347509114e-05, "loss": 1.8651, "step": 2304 }, { "epoch": 0.6996509333738048, "grad_norm": 0.4859076738357544, "learning_rate": 8.605710814094775e-05, "loss": 1.6752, "step": 2305 }, { "epoch": 0.6999544695704962, "grad_norm": 0.4186297655105591, "learning_rate": 8.605103280680437e-05, "loss": 2.0165, "step": 2306 }, { "epoch": 0.7002580057671878, "grad_norm": 0.34496191143989563, "learning_rate": 8.6044957472661e-05, "loss": 2.1395, "step": 2307 }, { "epoch": 0.7005615419638792, "grad_norm": 0.3636651933193207, "learning_rate": 8.603888213851762e-05, "loss": 1.2115, "step": 2308 }, { "epoch": 0.7008650781605706, "grad_norm": 0.38789573311805725, "learning_rate": 8.603280680437425e-05, "loss": 1.7992, "step": 2309 }, { "epoch": 0.7011686143572621, "grad_norm": 0.41874828934669495, "learning_rate": 8.602673147023087e-05, "loss": 1.9813, "step": 2310 }, { "epoch": 0.7014721505539535, "grad_norm": 0.6681198477745056, "learning_rate": 8.602065613608749e-05, "loss": 2.0765, "step": 2311 }, { "epoch": 0.7017756867506451, "grad_norm": 0.4358363151550293, "learning_rate": 8.601458080194412e-05, "loss": 2.0836, "step": 2312 }, { "epoch": 0.7020792229473365, "grad_norm": 0.4268842339515686, "learning_rate": 8.600850546780073e-05, "loss": 1.963, "step": 2313 }, { "epoch": 0.7023827591440279, "grad_norm": 0.43456903100013733, "learning_rate": 8.600243013365735e-05, "loss": 2.031, "step": 2314 }, { "epoch": 0.7026862953407194, "grad_norm": 0.9157736301422119, "learning_rate": 8.599635479951398e-05, "loss": 1.6625, "step": 2315 }, { "epoch": 0.7029898315374108, "grad_norm": 0.41116464138031006, "learning_rate": 8.59902794653706e-05, "loss": 1.3184, "step": 2316 }, { "epoch": 0.7032933677341023, "grad_norm": 0.38889098167419434, "learning_rate": 8.598420413122722e-05, "loss": 1.5656, "step": 2317 }, { "epoch": 0.7035969039307938, "grad_norm": 0.4620545208454132, "learning_rate": 8.597812879708385e-05, "loss": 1.9967, "step": 2318 }, { "epoch": 0.7039004401274852, "grad_norm": 0.44721749424934387, "learning_rate": 8.597205346294046e-05, "loss": 1.9404, "step": 2319 }, { "epoch": 0.7042039763241766, "grad_norm": 0.46273544430732727, "learning_rate": 8.596597812879708e-05, "loss": 1.9765, "step": 2320 }, { "epoch": 0.7045075125208681, "grad_norm": 0.3636545240879059, "learning_rate": 8.595990279465371e-05, "loss": 1.8925, "step": 2321 }, { "epoch": 0.7048110487175596, "grad_norm": 0.49978089332580566, "learning_rate": 8.595382746051033e-05, "loss": 1.9087, "step": 2322 }, { "epoch": 0.705114584914251, "grad_norm": 0.3676183819770813, "learning_rate": 8.594775212636695e-05, "loss": 1.9449, "step": 2323 }, { "epoch": 0.7054181211109425, "grad_norm": 0.3930191397666931, "learning_rate": 8.594167679222358e-05, "loss": 2.1377, "step": 2324 }, { "epoch": 0.7057216573076339, "grad_norm": 0.4476909935474396, "learning_rate": 8.59356014580802e-05, "loss": 1.886, "step": 2325 }, { "epoch": 0.7060251935043254, "grad_norm": 0.4343526363372803, "learning_rate": 8.592952612393683e-05, "loss": 1.5936, "step": 2326 }, { "epoch": 0.7063287297010169, "grad_norm": 0.42617321014404297, "learning_rate": 8.592345078979344e-05, "loss": 1.8253, "step": 2327 }, { "epoch": 0.7066322658977083, "grad_norm": 0.4090782105922699, "learning_rate": 8.591737545565006e-05, "loss": 1.8596, "step": 2328 }, { "epoch": 0.7069358020943998, "grad_norm": 0.4233112633228302, "learning_rate": 8.591130012150669e-05, "loss": 1.8529, "step": 2329 }, { "epoch": 0.7072393382910912, "grad_norm": 0.4159391224384308, "learning_rate": 8.590522478736331e-05, "loss": 2.202, "step": 2330 }, { "epoch": 0.7075428744877826, "grad_norm": 0.4303951859474182, "learning_rate": 8.589914945321993e-05, "loss": 2.2339, "step": 2331 }, { "epoch": 0.7078464106844741, "grad_norm": 0.431086927652359, "learning_rate": 8.589307411907656e-05, "loss": 1.7753, "step": 2332 }, { "epoch": 0.7081499468811656, "grad_norm": 0.4268263280391693, "learning_rate": 8.588699878493317e-05, "loss": 2.1529, "step": 2333 }, { "epoch": 0.708453483077857, "grad_norm": 0.35274428129196167, "learning_rate": 8.588092345078979e-05, "loss": 1.4355, "step": 2334 }, { "epoch": 0.7087570192745485, "grad_norm": 0.3985956311225891, "learning_rate": 8.587484811664642e-05, "loss": 1.7141, "step": 2335 }, { "epoch": 0.7090605554712399, "grad_norm": 0.44768375158309937, "learning_rate": 8.586877278250304e-05, "loss": 1.6654, "step": 2336 }, { "epoch": 0.7093640916679314, "grad_norm": 0.38372135162353516, "learning_rate": 8.586269744835966e-05, "loss": 1.8577, "step": 2337 }, { "epoch": 0.7096676278646229, "grad_norm": 0.459806889295578, "learning_rate": 8.585662211421629e-05, "loss": 1.8502, "step": 2338 }, { "epoch": 0.7099711640613143, "grad_norm": 0.36689698696136475, "learning_rate": 8.58505467800729e-05, "loss": 1.9931, "step": 2339 }, { "epoch": 0.7102747002580058, "grad_norm": 0.5424461960792542, "learning_rate": 8.584447144592954e-05, "loss": 1.6979, "step": 2340 }, { "epoch": 0.7105782364546972, "grad_norm": 0.663773238658905, "learning_rate": 8.583839611178615e-05, "loss": 1.9633, "step": 2341 }, { "epoch": 0.7108817726513886, "grad_norm": 2.337242603302002, "learning_rate": 8.583232077764277e-05, "loss": 1.2587, "step": 2342 }, { "epoch": 0.7111853088480802, "grad_norm": 0.4255028963088989, "learning_rate": 8.58262454434994e-05, "loss": 1.935, "step": 2343 }, { "epoch": 0.7114888450447716, "grad_norm": 0.796564519405365, "learning_rate": 8.582017010935602e-05, "loss": 2.1544, "step": 2344 }, { "epoch": 0.711792381241463, "grad_norm": 0.42163416743278503, "learning_rate": 8.581409477521264e-05, "loss": 1.9419, "step": 2345 }, { "epoch": 0.7120959174381545, "grad_norm": 0.49495795369148254, "learning_rate": 8.580801944106927e-05, "loss": 1.6143, "step": 2346 }, { "epoch": 0.7123994536348459, "grad_norm": 0.5532099008560181, "learning_rate": 8.580194410692588e-05, "loss": 1.9407, "step": 2347 }, { "epoch": 0.7127029898315375, "grad_norm": 0.434341162443161, "learning_rate": 8.57958687727825e-05, "loss": 1.9626, "step": 2348 }, { "epoch": 0.7130065260282289, "grad_norm": 0.5338404774665833, "learning_rate": 8.578979343863913e-05, "loss": 1.801, "step": 2349 }, { "epoch": 0.7133100622249203, "grad_norm": 0.48087722063064575, "learning_rate": 8.578371810449575e-05, "loss": 2.2286, "step": 2350 }, { "epoch": 0.7136135984216118, "grad_norm": 0.43688857555389404, "learning_rate": 8.577764277035237e-05, "loss": 1.7534, "step": 2351 }, { "epoch": 0.7139171346183032, "grad_norm": 1.3858163356781006, "learning_rate": 8.5771567436209e-05, "loss": 1.4466, "step": 2352 }, { "epoch": 0.7142206708149947, "grad_norm": 0.6149253249168396, "learning_rate": 8.576549210206562e-05, "loss": 2.0683, "step": 2353 }, { "epoch": 0.7145242070116862, "grad_norm": 0.49920403957366943, "learning_rate": 8.575941676792225e-05, "loss": 1.9266, "step": 2354 }, { "epoch": 0.7148277432083776, "grad_norm": 0.41959667205810547, "learning_rate": 8.575334143377886e-05, "loss": 1.7429, "step": 2355 }, { "epoch": 0.715131279405069, "grad_norm": 0.5163973569869995, "learning_rate": 8.574726609963548e-05, "loss": 1.2735, "step": 2356 }, { "epoch": 0.7154348156017605, "grad_norm": 0.37799614667892456, "learning_rate": 8.574119076549211e-05, "loss": 2.2448, "step": 2357 }, { "epoch": 0.7157383517984519, "grad_norm": 0.43541470170021057, "learning_rate": 8.573511543134873e-05, "loss": 2.2739, "step": 2358 }, { "epoch": 0.7160418879951435, "grad_norm": 1.3038394451141357, "learning_rate": 8.572904009720535e-05, "loss": 1.9907, "step": 2359 }, { "epoch": 0.7163454241918349, "grad_norm": 0.6111695766448975, "learning_rate": 8.572296476306198e-05, "loss": 1.5575, "step": 2360 }, { "epoch": 0.7166489603885263, "grad_norm": 1.2944895029067993, "learning_rate": 8.57168894289186e-05, "loss": 1.8409, "step": 2361 }, { "epoch": 0.7169524965852178, "grad_norm": 0.42008545994758606, "learning_rate": 8.571081409477521e-05, "loss": 1.8825, "step": 2362 }, { "epoch": 0.7172560327819092, "grad_norm": 0.48183196783065796, "learning_rate": 8.570473876063184e-05, "loss": 1.9233, "step": 2363 }, { "epoch": 0.7175595689786007, "grad_norm": 0.41434016823768616, "learning_rate": 8.569866342648846e-05, "loss": 2.0636, "step": 2364 }, { "epoch": 0.7178631051752922, "grad_norm": 0.3774077296257019, "learning_rate": 8.569258809234508e-05, "loss": 1.9155, "step": 2365 }, { "epoch": 0.7181666413719836, "grad_norm": 0.350824236869812, "learning_rate": 8.568651275820171e-05, "loss": 1.4894, "step": 2366 }, { "epoch": 0.718470177568675, "grad_norm": 1.5183087587356567, "learning_rate": 8.568043742405833e-05, "loss": 2.0812, "step": 2367 }, { "epoch": 0.7187737137653665, "grad_norm": 0.3757447600364685, "learning_rate": 8.567436208991496e-05, "loss": 1.4652, "step": 2368 }, { "epoch": 0.719077249962058, "grad_norm": 0.4151865839958191, "learning_rate": 8.566828675577157e-05, "loss": 1.677, "step": 2369 }, { "epoch": 0.7193807861587495, "grad_norm": 0.4992164075374603, "learning_rate": 8.566221142162819e-05, "loss": 2.1459, "step": 2370 }, { "epoch": 0.7196843223554409, "grad_norm": 0.3945586085319519, "learning_rate": 8.565613608748482e-05, "loss": 1.9362, "step": 2371 }, { "epoch": 0.7199878585521323, "grad_norm": 0.4325678050518036, "learning_rate": 8.565006075334144e-05, "loss": 2.0223, "step": 2372 }, { "epoch": 0.7202913947488238, "grad_norm": 0.39915481209754944, "learning_rate": 8.564398541919806e-05, "loss": 1.9572, "step": 2373 }, { "epoch": 0.7205949309455153, "grad_norm": 0.45898914337158203, "learning_rate": 8.563791008505469e-05, "loss": 1.5554, "step": 2374 }, { "epoch": 0.7208984671422067, "grad_norm": 0.4385409951210022, "learning_rate": 8.56318347509113e-05, "loss": 1.7864, "step": 2375 }, { "epoch": 0.7212020033388982, "grad_norm": 0.40655046701431274, "learning_rate": 8.562575941676792e-05, "loss": 1.9718, "step": 2376 }, { "epoch": 0.7215055395355896, "grad_norm": 0.42865580320358276, "learning_rate": 8.561968408262455e-05, "loss": 1.6585, "step": 2377 }, { "epoch": 0.721809075732281, "grad_norm": 1.0274362564086914, "learning_rate": 8.561360874848117e-05, "loss": 1.9557, "step": 2378 }, { "epoch": 0.7221126119289726, "grad_norm": 0.8454954028129578, "learning_rate": 8.560753341433779e-05, "loss": 2.0079, "step": 2379 }, { "epoch": 0.722416148125664, "grad_norm": 0.3799399733543396, "learning_rate": 8.560145808019442e-05, "loss": 1.9851, "step": 2380 }, { "epoch": 0.7227196843223554, "grad_norm": 0.5621289610862732, "learning_rate": 8.559538274605104e-05, "loss": 1.6631, "step": 2381 }, { "epoch": 0.7230232205190469, "grad_norm": 0.42442479729652405, "learning_rate": 8.558930741190767e-05, "loss": 1.9328, "step": 2382 }, { "epoch": 0.7233267567157383, "grad_norm": 0.4831121265888214, "learning_rate": 8.558323207776428e-05, "loss": 1.6994, "step": 2383 }, { "epoch": 0.7236302929124299, "grad_norm": 0.4605385363101959, "learning_rate": 8.55771567436209e-05, "loss": 2.0735, "step": 2384 }, { "epoch": 0.7239338291091213, "grad_norm": 0.4393116235733032, "learning_rate": 8.557108140947753e-05, "loss": 1.6179, "step": 2385 }, { "epoch": 0.7242373653058127, "grad_norm": 0.3323841392993927, "learning_rate": 8.556500607533414e-05, "loss": 1.7223, "step": 2386 }, { "epoch": 0.7245409015025042, "grad_norm": 1.153462290763855, "learning_rate": 8.555893074119077e-05, "loss": 2.2449, "step": 2387 }, { "epoch": 0.7248444376991956, "grad_norm": 0.4617941677570343, "learning_rate": 8.55528554070474e-05, "loss": 1.4432, "step": 2388 }, { "epoch": 0.725147973895887, "grad_norm": 0.38924935460090637, "learning_rate": 8.554678007290401e-05, "loss": 2.1933, "step": 2389 }, { "epoch": 0.7254515100925786, "grad_norm": 0.37328121066093445, "learning_rate": 8.554070473876063e-05, "loss": 1.5654, "step": 2390 }, { "epoch": 0.72575504628927, "grad_norm": 0.46307137608528137, "learning_rate": 8.553462940461726e-05, "loss": 1.4233, "step": 2391 }, { "epoch": 0.7260585824859614, "grad_norm": 0.39463040232658386, "learning_rate": 8.552855407047388e-05, "loss": 1.8745, "step": 2392 }, { "epoch": 0.7263621186826529, "grad_norm": 0.6351356506347656, "learning_rate": 8.55224787363305e-05, "loss": 2.0999, "step": 2393 }, { "epoch": 0.7266656548793443, "grad_norm": 0.446508526802063, "learning_rate": 8.551640340218713e-05, "loss": 2.0818, "step": 2394 }, { "epoch": 0.7269691910760359, "grad_norm": 0.3539383113384247, "learning_rate": 8.551032806804375e-05, "loss": 1.3604, "step": 2395 }, { "epoch": 0.7272727272727273, "grad_norm": 0.4133947789669037, "learning_rate": 8.550425273390036e-05, "loss": 1.5174, "step": 2396 }, { "epoch": 0.7275762634694187, "grad_norm": 0.3807066082954407, "learning_rate": 8.5498177399757e-05, "loss": 1.8238, "step": 2397 }, { "epoch": 0.7278797996661102, "grad_norm": 0.41087058186531067, "learning_rate": 8.549210206561361e-05, "loss": 1.9404, "step": 2398 }, { "epoch": 0.7281833358628016, "grad_norm": 0.36707812547683716, "learning_rate": 8.548602673147024e-05, "loss": 1.8164, "step": 2399 }, { "epoch": 0.7284868720594931, "grad_norm": 0.38733971118927, "learning_rate": 8.547995139732685e-05, "loss": 1.3954, "step": 2400 }, { "epoch": 0.7287904082561846, "grad_norm": 0.41041603684425354, "learning_rate": 8.547387606318348e-05, "loss": 1.5569, "step": 2401 }, { "epoch": 0.729093944452876, "grad_norm": 0.42836543917655945, "learning_rate": 8.546780072904011e-05, "loss": 1.7968, "step": 2402 }, { "epoch": 0.7293974806495674, "grad_norm": 0.4246993660926819, "learning_rate": 8.546172539489672e-05, "loss": 2.1727, "step": 2403 }, { "epoch": 0.7297010168462589, "grad_norm": 0.43355593085289, "learning_rate": 8.545565006075334e-05, "loss": 1.5563, "step": 2404 }, { "epoch": 0.7300045530429504, "grad_norm": 0.39305025339126587, "learning_rate": 8.544957472660997e-05, "loss": 1.8375, "step": 2405 }, { "epoch": 0.7303080892396419, "grad_norm": 0.44923126697540283, "learning_rate": 8.544349939246659e-05, "loss": 1.6066, "step": 2406 }, { "epoch": 0.7306116254363333, "grad_norm": 0.41019386053085327, "learning_rate": 8.543742405832321e-05, "loss": 2.0204, "step": 2407 }, { "epoch": 0.7309151616330247, "grad_norm": 0.4895036220550537, "learning_rate": 8.543134872417984e-05, "loss": 2.1129, "step": 2408 }, { "epoch": 0.7312186978297162, "grad_norm": 0.4031083583831787, "learning_rate": 8.542527339003646e-05, "loss": 1.9971, "step": 2409 }, { "epoch": 0.7315222340264077, "grad_norm": 0.40298768877983093, "learning_rate": 8.541919805589307e-05, "loss": 1.9922, "step": 2410 }, { "epoch": 0.7318257702230991, "grad_norm": 0.41940340399742126, "learning_rate": 8.54131227217497e-05, "loss": 1.8683, "step": 2411 }, { "epoch": 0.7321293064197906, "grad_norm": 0.4068038761615753, "learning_rate": 8.540704738760632e-05, "loss": 1.8514, "step": 2412 }, { "epoch": 0.732432842616482, "grad_norm": 0.3992190361022949, "learning_rate": 8.540097205346295e-05, "loss": 1.734, "step": 2413 }, { "epoch": 0.7327363788131734, "grad_norm": 0.35920289158821106, "learning_rate": 8.539489671931956e-05, "loss": 2.0869, "step": 2414 }, { "epoch": 0.7330399150098649, "grad_norm": 0.41339996457099915, "learning_rate": 8.538882138517619e-05, "loss": 1.862, "step": 2415 }, { "epoch": 0.7333434512065564, "grad_norm": 0.35875940322875977, "learning_rate": 8.538274605103282e-05, "loss": 1.9953, "step": 2416 }, { "epoch": 0.7336469874032479, "grad_norm": 0.39455875754356384, "learning_rate": 8.537667071688943e-05, "loss": 1.4772, "step": 2417 }, { "epoch": 0.7339505235999393, "grad_norm": 0.4024868905544281, "learning_rate": 8.537059538274605e-05, "loss": 1.9192, "step": 2418 }, { "epoch": 0.7342540597966307, "grad_norm": 0.43624451756477356, "learning_rate": 8.536452004860268e-05, "loss": 1.7586, "step": 2419 }, { "epoch": 0.7345575959933222, "grad_norm": 0.4356803596019745, "learning_rate": 8.53584447144593e-05, "loss": 1.3589, "step": 2420 }, { "epoch": 0.7348611321900137, "grad_norm": 0.3844490051269531, "learning_rate": 8.535236938031592e-05, "loss": 1.7586, "step": 2421 }, { "epoch": 0.7351646683867051, "grad_norm": 0.36453956365585327, "learning_rate": 8.534629404617255e-05, "loss": 1.9643, "step": 2422 }, { "epoch": 0.7354682045833966, "grad_norm": 0.42973411083221436, "learning_rate": 8.534021871202917e-05, "loss": 2.0022, "step": 2423 }, { "epoch": 0.735771740780088, "grad_norm": 0.4491013288497925, "learning_rate": 8.533414337788578e-05, "loss": 1.9658, "step": 2424 }, { "epoch": 0.7360752769767794, "grad_norm": 0.3806130886077881, "learning_rate": 8.532806804374241e-05, "loss": 1.7092, "step": 2425 }, { "epoch": 0.736378813173471, "grad_norm": 0.37530237436294556, "learning_rate": 8.532199270959903e-05, "loss": 2.1067, "step": 2426 }, { "epoch": 0.7366823493701624, "grad_norm": 0.36951944231987, "learning_rate": 8.531591737545566e-05, "loss": 2.1429, "step": 2427 }, { "epoch": 0.7369858855668538, "grad_norm": 0.8292801380157471, "learning_rate": 8.530984204131227e-05, "loss": 1.9387, "step": 2428 }, { "epoch": 0.7372894217635453, "grad_norm": 0.3690939247608185, "learning_rate": 8.53037667071689e-05, "loss": 1.96, "step": 2429 }, { "epoch": 0.7375929579602367, "grad_norm": 0.3507663905620575, "learning_rate": 8.529769137302553e-05, "loss": 1.8147, "step": 2430 }, { "epoch": 0.7378964941569283, "grad_norm": 0.4241466820240021, "learning_rate": 8.529161603888215e-05, "loss": 1.1116, "step": 2431 }, { "epoch": 0.7382000303536197, "grad_norm": 0.40038058161735535, "learning_rate": 8.528554070473876e-05, "loss": 1.9528, "step": 2432 }, { "epoch": 0.7385035665503111, "grad_norm": 0.41025862097740173, "learning_rate": 8.527946537059539e-05, "loss": 1.8288, "step": 2433 }, { "epoch": 0.7388071027470026, "grad_norm": 0.43207821249961853, "learning_rate": 8.527339003645201e-05, "loss": 1.986, "step": 2434 }, { "epoch": 0.739110638943694, "grad_norm": 0.4291042983531952, "learning_rate": 8.526731470230863e-05, "loss": 2.0695, "step": 2435 }, { "epoch": 0.7394141751403855, "grad_norm": 0.39197900891304016, "learning_rate": 8.526123936816526e-05, "loss": 1.5059, "step": 2436 }, { "epoch": 0.739717711337077, "grad_norm": 0.5944773554801941, "learning_rate": 8.525516403402188e-05, "loss": 1.6354, "step": 2437 }, { "epoch": 0.7400212475337684, "grad_norm": 0.42565345764160156, "learning_rate": 8.52490886998785e-05, "loss": 1.5772, "step": 2438 }, { "epoch": 0.7403247837304598, "grad_norm": 0.4184707999229431, "learning_rate": 8.524301336573512e-05, "loss": 1.8781, "step": 2439 }, { "epoch": 0.7406283199271513, "grad_norm": 0.36030882596969604, "learning_rate": 8.523693803159174e-05, "loss": 1.8788, "step": 2440 }, { "epoch": 0.7409318561238427, "grad_norm": 0.4323141872882843, "learning_rate": 8.523086269744837e-05, "loss": 2.0321, "step": 2441 }, { "epoch": 0.7412353923205343, "grad_norm": 0.4332966208457947, "learning_rate": 8.522478736330498e-05, "loss": 1.7093, "step": 2442 }, { "epoch": 0.7415389285172257, "grad_norm": 0.4085337221622467, "learning_rate": 8.521871202916161e-05, "loss": 1.7086, "step": 2443 }, { "epoch": 0.7418424647139171, "grad_norm": 0.4357088506221771, "learning_rate": 8.521263669501824e-05, "loss": 1.5345, "step": 2444 }, { "epoch": 0.7421460009106086, "grad_norm": 0.40508776903152466, "learning_rate": 8.520656136087484e-05, "loss": 2.0369, "step": 2445 }, { "epoch": 0.7424495371073, "grad_norm": 0.36506882309913635, "learning_rate": 8.520048602673147e-05, "loss": 1.4258, "step": 2446 }, { "epoch": 0.7427530733039915, "grad_norm": 0.3771931827068329, "learning_rate": 8.51944106925881e-05, "loss": 1.1239, "step": 2447 }, { "epoch": 0.743056609500683, "grad_norm": 0.4152052700519562, "learning_rate": 8.518833535844472e-05, "loss": 2.093, "step": 2448 }, { "epoch": 0.7433601456973744, "grad_norm": 0.4168509244918823, "learning_rate": 8.518226002430134e-05, "loss": 1.9488, "step": 2449 }, { "epoch": 0.7436636818940658, "grad_norm": 0.44399914145469666, "learning_rate": 8.517618469015797e-05, "loss": 1.8483, "step": 2450 }, { "epoch": 0.7439672180907573, "grad_norm": 0.3898546099662781, "learning_rate": 8.517010935601459e-05, "loss": 1.7384, "step": 2451 }, { "epoch": 0.7442707542874488, "grad_norm": 0.3657229542732239, "learning_rate": 8.51640340218712e-05, "loss": 1.9651, "step": 2452 }, { "epoch": 0.7445742904841403, "grad_norm": 0.5163128972053528, "learning_rate": 8.515795868772783e-05, "loss": 1.679, "step": 2453 }, { "epoch": 0.7448778266808317, "grad_norm": 0.8351776599884033, "learning_rate": 8.515188335358445e-05, "loss": 1.4447, "step": 2454 }, { "epoch": 0.7451813628775231, "grad_norm": 0.4343299865722656, "learning_rate": 8.514580801944108e-05, "loss": 1.8703, "step": 2455 }, { "epoch": 0.7454848990742146, "grad_norm": 0.3905276954174042, "learning_rate": 8.513973268529769e-05, "loss": 1.8941, "step": 2456 }, { "epoch": 0.7457884352709061, "grad_norm": 0.475789338350296, "learning_rate": 8.513365735115432e-05, "loss": 1.629, "step": 2457 }, { "epoch": 0.7460919714675975, "grad_norm": 0.3969273567199707, "learning_rate": 8.512758201701095e-05, "loss": 1.8827, "step": 2458 }, { "epoch": 0.746395507664289, "grad_norm": 0.4705328643321991, "learning_rate": 8.512150668286755e-05, "loss": 1.4177, "step": 2459 }, { "epoch": 0.7466990438609804, "grad_norm": 0.4193515181541443, "learning_rate": 8.511543134872418e-05, "loss": 2.0139, "step": 2460 }, { "epoch": 0.7470025800576718, "grad_norm": 0.38317739963531494, "learning_rate": 8.510935601458081e-05, "loss": 2.119, "step": 2461 }, { "epoch": 0.7473061162543634, "grad_norm": 0.39867040514945984, "learning_rate": 8.510328068043743e-05, "loss": 1.9218, "step": 2462 }, { "epoch": 0.7476096524510548, "grad_norm": 0.5308038592338562, "learning_rate": 8.509720534629405e-05, "loss": 1.5898, "step": 2463 }, { "epoch": 0.7479131886477463, "grad_norm": 0.45667675137519836, "learning_rate": 8.509113001215068e-05, "loss": 1.7705, "step": 2464 }, { "epoch": 0.7482167248444377, "grad_norm": 1.480726718902588, "learning_rate": 8.50850546780073e-05, "loss": 2.1438, "step": 2465 }, { "epoch": 0.7485202610411291, "grad_norm": 0.46620655059814453, "learning_rate": 8.507897934386391e-05, "loss": 1.8973, "step": 2466 }, { "epoch": 0.7488237972378207, "grad_norm": 0.34710168838500977, "learning_rate": 8.507290400972053e-05, "loss": 1.3338, "step": 2467 }, { "epoch": 0.7491273334345121, "grad_norm": 0.43097588419914246, "learning_rate": 8.506682867557716e-05, "loss": 1.7958, "step": 2468 }, { "epoch": 0.7494308696312035, "grad_norm": 0.3998434245586395, "learning_rate": 8.506075334143378e-05, "loss": 1.7615, "step": 2469 }, { "epoch": 0.749734405827895, "grad_norm": 0.39192789793014526, "learning_rate": 8.50546780072904e-05, "loss": 1.7179, "step": 2470 }, { "epoch": 0.7500379420245864, "grad_norm": 0.4148361086845398, "learning_rate": 8.504860267314703e-05, "loss": 1.6477, "step": 2471 }, { "epoch": 0.7503414782212778, "grad_norm": 0.5068510174751282, "learning_rate": 8.504252733900366e-05, "loss": 2.045, "step": 2472 }, { "epoch": 0.7506450144179694, "grad_norm": 0.4798752963542938, "learning_rate": 8.503645200486026e-05, "loss": 1.7506, "step": 2473 }, { "epoch": 0.7509485506146608, "grad_norm": 0.4444788992404938, "learning_rate": 8.503037667071689e-05, "loss": 1.894, "step": 2474 }, { "epoch": 0.7512520868113522, "grad_norm": 0.39380598068237305, "learning_rate": 8.502430133657352e-05, "loss": 2.0365, "step": 2475 }, { "epoch": 0.7515556230080437, "grad_norm": 0.38357478380203247, "learning_rate": 8.501822600243014e-05, "loss": 1.8282, "step": 2476 }, { "epoch": 0.7518591592047351, "grad_norm": 0.47529253363609314, "learning_rate": 8.501215066828676e-05, "loss": 1.7783, "step": 2477 }, { "epoch": 0.7521626954014267, "grad_norm": 0.3402441740036011, "learning_rate": 8.500607533414339e-05, "loss": 1.8927, "step": 2478 }, { "epoch": 0.7524662315981181, "grad_norm": 0.36784660816192627, "learning_rate": 8.5e-05, "loss": 1.4411, "step": 2479 }, { "epoch": 0.7527697677948095, "grad_norm": 0.42149028182029724, "learning_rate": 8.499392466585662e-05, "loss": 2.0137, "step": 2480 }, { "epoch": 0.753073303991501, "grad_norm": 0.40183788537979126, "learning_rate": 8.498784933171324e-05, "loss": 1.6207, "step": 2481 }, { "epoch": 0.7533768401881924, "grad_norm": 0.45237985253334045, "learning_rate": 8.498177399756987e-05, "loss": 2.2596, "step": 2482 }, { "epoch": 0.7536803763848839, "grad_norm": 0.4847509562969208, "learning_rate": 8.497569866342649e-05, "loss": 1.6941, "step": 2483 }, { "epoch": 0.7539839125815754, "grad_norm": 0.4311809837818146, "learning_rate": 8.49696233292831e-05, "loss": 2.0248, "step": 2484 }, { "epoch": 0.7542874487782668, "grad_norm": 0.6543784141540527, "learning_rate": 8.496354799513974e-05, "loss": 1.9065, "step": 2485 }, { "epoch": 0.7545909849749582, "grad_norm": 0.3486241102218628, "learning_rate": 8.495747266099637e-05, "loss": 1.5518, "step": 2486 }, { "epoch": 0.7548945211716497, "grad_norm": 0.44317248463630676, "learning_rate": 8.495139732685297e-05, "loss": 1.9064, "step": 2487 }, { "epoch": 0.7551980573683412, "grad_norm": 0.44157078862190247, "learning_rate": 8.49453219927096e-05, "loss": 1.7866, "step": 2488 }, { "epoch": 0.7555015935650327, "grad_norm": 0.4338137209415436, "learning_rate": 8.493924665856623e-05, "loss": 1.9897, "step": 2489 }, { "epoch": 0.7558051297617241, "grad_norm": 0.45171141624450684, "learning_rate": 8.493317132442285e-05, "loss": 1.9706, "step": 2490 }, { "epoch": 0.7561086659584155, "grad_norm": 0.9964777231216431, "learning_rate": 8.492709599027947e-05, "loss": 2.1163, "step": 2491 }, { "epoch": 0.756412202155107, "grad_norm": 0.39545395970344543, "learning_rate": 8.49210206561361e-05, "loss": 1.6514, "step": 2492 }, { "epoch": 0.7567157383517985, "grad_norm": 0.4575003683567047, "learning_rate": 8.491494532199272e-05, "loss": 1.9118, "step": 2493 }, { "epoch": 0.7570192745484899, "grad_norm": 0.4249429702758789, "learning_rate": 8.490886998784933e-05, "loss": 1.9342, "step": 2494 }, { "epoch": 0.7573228107451814, "grad_norm": 0.4887460768222809, "learning_rate": 8.490279465370595e-05, "loss": 1.9421, "step": 2495 }, { "epoch": 0.7576263469418728, "grad_norm": 0.41777387261390686, "learning_rate": 8.489671931956258e-05, "loss": 1.5985, "step": 2496 }, { "epoch": 0.7579298831385642, "grad_norm": 1.7083243131637573, "learning_rate": 8.48906439854192e-05, "loss": 1.8387, "step": 2497 }, { "epoch": 0.7582334193352557, "grad_norm": 0.39955195784568787, "learning_rate": 8.488456865127582e-05, "loss": 2.0833, "step": 2498 }, { "epoch": 0.7585369555319472, "grad_norm": 1.4131972789764404, "learning_rate": 8.487849331713245e-05, "loss": 1.5318, "step": 2499 }, { "epoch": 0.7588404917286387, "grad_norm": 0.7458001375198364, "learning_rate": 8.487241798298908e-05, "loss": 1.9582, "step": 2500 }, { "epoch": 0.7591440279253301, "grad_norm": 0.4677983820438385, "learning_rate": 8.486634264884568e-05, "loss": 1.8781, "step": 2501 }, { "epoch": 0.7594475641220215, "grad_norm": 0.4976421594619751, "learning_rate": 8.486026731470231e-05, "loss": 2.1458, "step": 2502 }, { "epoch": 0.759751100318713, "grad_norm": 0.3829711675643921, "learning_rate": 8.485419198055894e-05, "loss": 1.9114, "step": 2503 }, { "epoch": 0.7600546365154045, "grad_norm": 0.5295559167861938, "learning_rate": 8.484811664641556e-05, "loss": 2.0679, "step": 2504 }, { "epoch": 0.7603581727120959, "grad_norm": 0.7929876446723938, "learning_rate": 8.484204131227218e-05, "loss": 2.2658, "step": 2505 }, { "epoch": 0.7606617089087874, "grad_norm": 0.35055285692214966, "learning_rate": 8.483596597812881e-05, "loss": 1.9443, "step": 2506 }, { "epoch": 0.7609652451054788, "grad_norm": 0.39741021394729614, "learning_rate": 8.482989064398543e-05, "loss": 1.6983, "step": 2507 }, { "epoch": 0.7612687813021702, "grad_norm": 0.4206577241420746, "learning_rate": 8.482381530984204e-05, "loss": 1.8886, "step": 2508 }, { "epoch": 0.7615723174988618, "grad_norm": 0.7343670129776001, "learning_rate": 8.481773997569866e-05, "loss": 1.9581, "step": 2509 }, { "epoch": 0.7618758536955532, "grad_norm": 0.3836110532283783, "learning_rate": 8.481166464155529e-05, "loss": 1.8658, "step": 2510 }, { "epoch": 0.7621793898922447, "grad_norm": 0.44783517718315125, "learning_rate": 8.480558930741191e-05, "loss": 1.8772, "step": 2511 }, { "epoch": 0.7624829260889361, "grad_norm": 0.44204702973365784, "learning_rate": 8.479951397326853e-05, "loss": 1.8684, "step": 2512 }, { "epoch": 0.7627864622856275, "grad_norm": 0.45162737369537354, "learning_rate": 8.479343863912516e-05, "loss": 1.817, "step": 2513 }, { "epoch": 0.7630899984823191, "grad_norm": 0.36719024181365967, "learning_rate": 8.478736330498179e-05, "loss": 2.0087, "step": 2514 }, { "epoch": 0.7633935346790105, "grad_norm": 0.3979268968105316, "learning_rate": 8.478128797083839e-05, "loss": 2.091, "step": 2515 }, { "epoch": 0.7636970708757019, "grad_norm": 0.45267635583877563, "learning_rate": 8.477521263669502e-05, "loss": 1.2638, "step": 2516 }, { "epoch": 0.7640006070723934, "grad_norm": 0.43147364258766174, "learning_rate": 8.476913730255165e-05, "loss": 2.1202, "step": 2517 }, { "epoch": 0.7643041432690848, "grad_norm": 0.46071958541870117, "learning_rate": 8.476306196840826e-05, "loss": 1.9875, "step": 2518 }, { "epoch": 0.7646076794657763, "grad_norm": 0.3662787973880768, "learning_rate": 8.475698663426489e-05, "loss": 2.0632, "step": 2519 }, { "epoch": 0.7649112156624678, "grad_norm": 1.479319453239441, "learning_rate": 8.475091130012152e-05, "loss": 2.1759, "step": 2520 }, { "epoch": 0.7652147518591592, "grad_norm": 0.38541749119758606, "learning_rate": 8.474483596597814e-05, "loss": 1.6204, "step": 2521 }, { "epoch": 0.7655182880558506, "grad_norm": 0.39057788252830505, "learning_rate": 8.473876063183475e-05, "loss": 1.5414, "step": 2522 }, { "epoch": 0.7658218242525421, "grad_norm": 0.37674304842948914, "learning_rate": 8.473268529769137e-05, "loss": 1.8496, "step": 2523 }, { "epoch": 0.7661253604492335, "grad_norm": 0.4432341158390045, "learning_rate": 8.4726609963548e-05, "loss": 1.8541, "step": 2524 }, { "epoch": 0.7664288966459251, "grad_norm": 0.3844713866710663, "learning_rate": 8.472053462940462e-05, "loss": 1.9793, "step": 2525 }, { "epoch": 0.7667324328426165, "grad_norm": 0.37515100836753845, "learning_rate": 8.471445929526124e-05, "loss": 1.9751, "step": 2526 }, { "epoch": 0.7670359690393079, "grad_norm": 0.36097854375839233, "learning_rate": 8.470838396111787e-05, "loss": 1.2523, "step": 2527 }, { "epoch": 0.7673395052359994, "grad_norm": 0.3747158646583557, "learning_rate": 8.47023086269745e-05, "loss": 1.3921, "step": 2528 }, { "epoch": 0.7676430414326908, "grad_norm": 0.4365270435810089, "learning_rate": 8.46962332928311e-05, "loss": 2.1177, "step": 2529 }, { "epoch": 0.7679465776293823, "grad_norm": 0.39251551032066345, "learning_rate": 8.469015795868773e-05, "loss": 1.2146, "step": 2530 }, { "epoch": 0.7682501138260738, "grad_norm": 0.4519220292568207, "learning_rate": 8.468408262454436e-05, "loss": 1.7527, "step": 2531 }, { "epoch": 0.7685536500227652, "grad_norm": 0.436301589012146, "learning_rate": 8.467800729040097e-05, "loss": 1.8437, "step": 2532 }, { "epoch": 0.7688571862194566, "grad_norm": 0.4134485125541687, "learning_rate": 8.46719319562576e-05, "loss": 1.9497, "step": 2533 }, { "epoch": 0.7691607224161481, "grad_norm": 1.2619364261627197, "learning_rate": 8.466585662211423e-05, "loss": 2.0033, "step": 2534 }, { "epoch": 0.7694642586128396, "grad_norm": 0.3872610926628113, "learning_rate": 8.465978128797085e-05, "loss": 1.5046, "step": 2535 }, { "epoch": 0.7697677948095311, "grad_norm": 0.4039923846721649, "learning_rate": 8.465370595382746e-05, "loss": 1.8782, "step": 2536 }, { "epoch": 0.7700713310062225, "grad_norm": 0.3826749622821808, "learning_rate": 8.464763061968408e-05, "loss": 1.9288, "step": 2537 }, { "epoch": 0.7703748672029139, "grad_norm": 0.4400002062320709, "learning_rate": 8.464155528554071e-05, "loss": 2.2257, "step": 2538 }, { "epoch": 0.7706784033996054, "grad_norm": 0.6801483631134033, "learning_rate": 8.463547995139733e-05, "loss": 1.5758, "step": 2539 }, { "epoch": 0.7709819395962969, "grad_norm": 0.4618719816207886, "learning_rate": 8.462940461725395e-05, "loss": 1.5398, "step": 2540 }, { "epoch": 0.7712854757929883, "grad_norm": 0.3891013562679291, "learning_rate": 8.462332928311058e-05, "loss": 1.7876, "step": 2541 }, { "epoch": 0.7715890119896798, "grad_norm": 0.36486607789993286, "learning_rate": 8.46172539489672e-05, "loss": 1.4358, "step": 2542 }, { "epoch": 0.7718925481863712, "grad_norm": 0.4325253665447235, "learning_rate": 8.461117861482381e-05, "loss": 1.8423, "step": 2543 }, { "epoch": 0.7721960843830626, "grad_norm": 0.3679717481136322, "learning_rate": 8.460510328068044e-05, "loss": 1.2354, "step": 2544 }, { "epoch": 0.7724996205797542, "grad_norm": 0.3954870402812958, "learning_rate": 8.459902794653707e-05, "loss": 1.915, "step": 2545 }, { "epoch": 0.7728031567764456, "grad_norm": 0.3950810432434082, "learning_rate": 8.459295261239368e-05, "loss": 1.9933, "step": 2546 }, { "epoch": 0.773106692973137, "grad_norm": 0.5841623544692993, "learning_rate": 8.458687727825031e-05, "loss": 1.5726, "step": 2547 }, { "epoch": 0.7734102291698285, "grad_norm": 0.39942488074302673, "learning_rate": 8.458080194410694e-05, "loss": 1.8425, "step": 2548 }, { "epoch": 0.7737137653665199, "grad_norm": 0.4704907238483429, "learning_rate": 8.457472660996356e-05, "loss": 2.2747, "step": 2549 }, { "epoch": 0.7740173015632115, "grad_norm": 0.46205440163612366, "learning_rate": 8.456865127582017e-05, "loss": 1.8099, "step": 2550 }, { "epoch": 0.7743208377599029, "grad_norm": 0.4484197497367859, "learning_rate": 8.456257594167679e-05, "loss": 1.3406, "step": 2551 }, { "epoch": 0.7746243739565943, "grad_norm": 0.412507027387619, "learning_rate": 8.455650060753342e-05, "loss": 2.0005, "step": 2552 }, { "epoch": 0.7749279101532858, "grad_norm": 0.4087753891944885, "learning_rate": 8.455042527339004e-05, "loss": 2.0239, "step": 2553 }, { "epoch": 0.7752314463499772, "grad_norm": 0.5045974850654602, "learning_rate": 8.454434993924666e-05, "loss": 1.6854, "step": 2554 }, { "epoch": 0.7755349825466686, "grad_norm": 0.40595903992652893, "learning_rate": 8.453827460510329e-05, "loss": 1.8671, "step": 2555 }, { "epoch": 0.7758385187433602, "grad_norm": 0.3777897357940674, "learning_rate": 8.45321992709599e-05, "loss": 1.5473, "step": 2556 }, { "epoch": 0.7761420549400516, "grad_norm": 0.8116908073425293, "learning_rate": 8.452612393681652e-05, "loss": 2.0338, "step": 2557 }, { "epoch": 0.776445591136743, "grad_norm": 0.4156283736228943, "learning_rate": 8.452004860267315e-05, "loss": 1.9501, "step": 2558 }, { "epoch": 0.7767491273334345, "grad_norm": 0.3934132158756256, "learning_rate": 8.451397326852978e-05, "loss": 1.8579, "step": 2559 }, { "epoch": 0.7770526635301259, "grad_norm": 0.39838075637817383, "learning_rate": 8.450789793438639e-05, "loss": 1.5156, "step": 2560 }, { "epoch": 0.7773561997268175, "grad_norm": 0.38777777552604675, "learning_rate": 8.450182260024302e-05, "loss": 1.7479, "step": 2561 }, { "epoch": 0.7776597359235089, "grad_norm": 0.43510836362838745, "learning_rate": 8.449574726609964e-05, "loss": 1.6293, "step": 2562 }, { "epoch": 0.7779632721202003, "grad_norm": 0.42916715145111084, "learning_rate": 8.448967193195627e-05, "loss": 1.8737, "step": 2563 }, { "epoch": 0.7782668083168918, "grad_norm": 0.4157852232456207, "learning_rate": 8.448359659781288e-05, "loss": 1.5549, "step": 2564 }, { "epoch": 0.7785703445135832, "grad_norm": 0.39083394408226013, "learning_rate": 8.44775212636695e-05, "loss": 1.7937, "step": 2565 }, { "epoch": 0.7788738807102747, "grad_norm": 0.4398769438266754, "learning_rate": 8.447144592952613e-05, "loss": 1.8285, "step": 2566 }, { "epoch": 0.7791774169069662, "grad_norm": 0.4085114598274231, "learning_rate": 8.446537059538275e-05, "loss": 1.835, "step": 2567 }, { "epoch": 0.7794809531036576, "grad_norm": 0.44986245036125183, "learning_rate": 8.445929526123937e-05, "loss": 1.6762, "step": 2568 }, { "epoch": 0.779784489300349, "grad_norm": 0.3613260090351105, "learning_rate": 8.4453219927096e-05, "loss": 1.5119, "step": 2569 }, { "epoch": 0.7800880254970405, "grad_norm": 1.3417142629623413, "learning_rate": 8.444714459295261e-05, "loss": 1.7255, "step": 2570 }, { "epoch": 0.780391561693732, "grad_norm": 0.3687633275985718, "learning_rate": 8.444106925880923e-05, "loss": 1.846, "step": 2571 }, { "epoch": 0.7806950978904235, "grad_norm": 0.5285593867301941, "learning_rate": 8.443499392466586e-05, "loss": 1.8364, "step": 2572 }, { "epoch": 0.7809986340871149, "grad_norm": 0.6644722819328308, "learning_rate": 8.44289185905225e-05, "loss": 2.0563, "step": 2573 }, { "epoch": 0.7813021702838063, "grad_norm": 0.4382190704345703, "learning_rate": 8.44228432563791e-05, "loss": 1.8737, "step": 2574 }, { "epoch": 0.7816057064804978, "grad_norm": 0.43860745429992676, "learning_rate": 8.441676792223573e-05, "loss": 1.7924, "step": 2575 }, { "epoch": 0.7819092426771893, "grad_norm": 0.39125654101371765, "learning_rate": 8.441069258809235e-05, "loss": 1.159, "step": 2576 }, { "epoch": 0.7822127788738807, "grad_norm": 0.4389365017414093, "learning_rate": 8.440461725394898e-05, "loss": 1.9478, "step": 2577 }, { "epoch": 0.7825163150705722, "grad_norm": 0.46001267433166504, "learning_rate": 8.43985419198056e-05, "loss": 1.3252, "step": 2578 }, { "epoch": 0.7828198512672636, "grad_norm": 0.36286357045173645, "learning_rate": 8.439246658566221e-05, "loss": 1.9519, "step": 2579 }, { "epoch": 0.783123387463955, "grad_norm": 0.6641744375228882, "learning_rate": 8.438639125151884e-05, "loss": 1.9772, "step": 2580 }, { "epoch": 0.7834269236606465, "grad_norm": 0.43126794695854187, "learning_rate": 8.438031591737546e-05, "loss": 1.7583, "step": 2581 }, { "epoch": 0.783730459857338, "grad_norm": 0.4094223380088806, "learning_rate": 8.437424058323208e-05, "loss": 1.7251, "step": 2582 }, { "epoch": 0.7840339960540295, "grad_norm": 0.34994634985923767, "learning_rate": 8.436816524908871e-05, "loss": 1.8317, "step": 2583 }, { "epoch": 0.7843375322507209, "grad_norm": 0.3431306481361389, "learning_rate": 8.436208991494532e-05, "loss": 1.9223, "step": 2584 }, { "epoch": 0.7846410684474123, "grad_norm": 0.43418046832084656, "learning_rate": 8.435601458080194e-05, "loss": 2.0232, "step": 2585 }, { "epoch": 0.7849446046441038, "grad_norm": 0.3941698670387268, "learning_rate": 8.434993924665857e-05, "loss": 2.0736, "step": 2586 }, { "epoch": 0.7852481408407953, "grad_norm": 0.9890521764755249, "learning_rate": 8.43438639125152e-05, "loss": 1.6363, "step": 2587 }, { "epoch": 0.7855516770374867, "grad_norm": 0.3400576114654541, "learning_rate": 8.433778857837181e-05, "loss": 1.8934, "step": 2588 }, { "epoch": 0.7858552132341782, "grad_norm": 0.4412512183189392, "learning_rate": 8.433171324422844e-05, "loss": 1.6729, "step": 2589 }, { "epoch": 0.7861587494308696, "grad_norm": 0.7804542183876038, "learning_rate": 8.432563791008506e-05, "loss": 2.0693, "step": 2590 }, { "epoch": 0.786462285627561, "grad_norm": 0.48216378688812256, "learning_rate": 8.431956257594167e-05, "loss": 1.622, "step": 2591 }, { "epoch": 0.7867658218242526, "grad_norm": 0.4186027944087982, "learning_rate": 8.43134872417983e-05, "loss": 2.0402, "step": 2592 }, { "epoch": 0.787069358020944, "grad_norm": 0.35439014434814453, "learning_rate": 8.430741190765492e-05, "loss": 2.0655, "step": 2593 }, { "epoch": 0.7873728942176355, "grad_norm": 0.4577588140964508, "learning_rate": 8.430133657351155e-05, "loss": 1.203, "step": 2594 }, { "epoch": 0.7876764304143269, "grad_norm": 0.42012783885002136, "learning_rate": 8.429526123936817e-05, "loss": 1.8616, "step": 2595 }, { "epoch": 0.7879799666110183, "grad_norm": 0.3673686683177948, "learning_rate": 8.428918590522479e-05, "loss": 2.0879, "step": 2596 }, { "epoch": 0.7882835028077099, "grad_norm": 0.4281460642814636, "learning_rate": 8.428311057108142e-05, "loss": 1.8722, "step": 2597 }, { "epoch": 0.7885870390044013, "grad_norm": 0.432608962059021, "learning_rate": 8.427703523693803e-05, "loss": 1.7152, "step": 2598 }, { "epoch": 0.7888905752010927, "grad_norm": 0.3567800521850586, "learning_rate": 8.427095990279465e-05, "loss": 1.534, "step": 2599 }, { "epoch": 0.7891941113977842, "grad_norm": 0.383045494556427, "learning_rate": 8.426488456865128e-05, "loss": 1.7063, "step": 2600 }, { "epoch": 0.7894976475944756, "grad_norm": 0.3540615141391754, "learning_rate": 8.425880923450791e-05, "loss": 1.6996, "step": 2601 }, { "epoch": 0.7898011837911671, "grad_norm": 0.44690582156181335, "learning_rate": 8.425273390036452e-05, "loss": 1.9774, "step": 2602 }, { "epoch": 0.7901047199878586, "grad_norm": 0.49040475487709045, "learning_rate": 8.424665856622115e-05, "loss": 2.1205, "step": 2603 }, { "epoch": 0.79040825618455, "grad_norm": 0.4510320723056793, "learning_rate": 8.424058323207777e-05, "loss": 2.0409, "step": 2604 }, { "epoch": 0.7907117923812415, "grad_norm": 0.41996338963508606, "learning_rate": 8.423450789793438e-05, "loss": 1.7738, "step": 2605 }, { "epoch": 0.7910153285779329, "grad_norm": 0.4076237976551056, "learning_rate": 8.422843256379101e-05, "loss": 2.1186, "step": 2606 }, { "epoch": 0.7913188647746243, "grad_norm": 0.4231566786766052, "learning_rate": 8.422235722964763e-05, "loss": 1.8912, "step": 2607 }, { "epoch": 0.7916224009713159, "grad_norm": 0.6864861249923706, "learning_rate": 8.421628189550426e-05, "loss": 1.9234, "step": 2608 }, { "epoch": 0.7919259371680073, "grad_norm": 0.3856845796108246, "learning_rate": 8.421020656136088e-05, "loss": 2.0214, "step": 2609 }, { "epoch": 0.7922294733646987, "grad_norm": 0.39030376076698303, "learning_rate": 8.42041312272175e-05, "loss": 1.9272, "step": 2610 }, { "epoch": 0.7925330095613902, "grad_norm": 0.407306432723999, "learning_rate": 8.419805589307413e-05, "loss": 1.9412, "step": 2611 }, { "epoch": 0.7928365457580816, "grad_norm": 0.41559773683547974, "learning_rate": 8.419198055893074e-05, "loss": 1.7524, "step": 2612 }, { "epoch": 0.7931400819547731, "grad_norm": 0.382524311542511, "learning_rate": 8.418590522478736e-05, "loss": 2.0343, "step": 2613 }, { "epoch": 0.7934436181514646, "grad_norm": 0.42608487606048584, "learning_rate": 8.417982989064399e-05, "loss": 1.9188, "step": 2614 }, { "epoch": 0.793747154348156, "grad_norm": 0.3967500329017639, "learning_rate": 8.417375455650061e-05, "loss": 1.8737, "step": 2615 }, { "epoch": 0.7940506905448474, "grad_norm": 0.493898868560791, "learning_rate": 8.416767922235723e-05, "loss": 1.8199, "step": 2616 }, { "epoch": 0.7943542267415389, "grad_norm": 0.5007172226905823, "learning_rate": 8.416160388821386e-05, "loss": 2.0033, "step": 2617 }, { "epoch": 0.7946577629382304, "grad_norm": 0.4337385594844818, "learning_rate": 8.415552855407048e-05, "loss": 2.1693, "step": 2618 }, { "epoch": 0.7949612991349219, "grad_norm": 0.40338581800460815, "learning_rate": 8.414945321992709e-05, "loss": 1.9985, "step": 2619 }, { "epoch": 0.7952648353316133, "grad_norm": 0.3842269778251648, "learning_rate": 8.414337788578372e-05, "loss": 1.5293, "step": 2620 }, { "epoch": 0.7955683715283047, "grad_norm": 0.35648632049560547, "learning_rate": 8.413730255164034e-05, "loss": 1.9728, "step": 2621 }, { "epoch": 0.7958719077249962, "grad_norm": 0.4350222945213318, "learning_rate": 8.413122721749697e-05, "loss": 1.3873, "step": 2622 }, { "epoch": 0.7961754439216877, "grad_norm": 0.605980634689331, "learning_rate": 8.412515188335359e-05, "loss": 2.052, "step": 2623 }, { "epoch": 0.7964789801183791, "grad_norm": 0.6555821299552917, "learning_rate": 8.41190765492102e-05, "loss": 2.146, "step": 2624 }, { "epoch": 0.7967825163150706, "grad_norm": 0.42681270837783813, "learning_rate": 8.411300121506684e-05, "loss": 1.0012, "step": 2625 }, { "epoch": 0.797086052511762, "grad_norm": 0.43222132325172424, "learning_rate": 8.410692588092345e-05, "loss": 2.1722, "step": 2626 }, { "epoch": 0.7973895887084534, "grad_norm": 0.40917056798934937, "learning_rate": 8.410085054678007e-05, "loss": 2.1489, "step": 2627 }, { "epoch": 0.797693124905145, "grad_norm": 0.4139658212661743, "learning_rate": 8.40947752126367e-05, "loss": 2.0852, "step": 2628 }, { "epoch": 0.7979966611018364, "grad_norm": 1.7534079551696777, "learning_rate": 8.408869987849332e-05, "loss": 1.7122, "step": 2629 }, { "epoch": 0.7983001972985279, "grad_norm": 0.37330594658851624, "learning_rate": 8.408262454434994e-05, "loss": 1.6402, "step": 2630 }, { "epoch": 0.7986037334952193, "grad_norm": 0.7637292742729187, "learning_rate": 8.407654921020657e-05, "loss": 1.6067, "step": 2631 }, { "epoch": 0.7989072696919107, "grad_norm": 0.48156240582466125, "learning_rate": 8.407047387606319e-05, "loss": 1.8114, "step": 2632 }, { "epoch": 0.7992108058886022, "grad_norm": 0.3753802180290222, "learning_rate": 8.40643985419198e-05, "loss": 1.6239, "step": 2633 }, { "epoch": 0.7995143420852937, "grad_norm": 0.4283507764339447, "learning_rate": 8.405832320777643e-05, "loss": 2.0778, "step": 2634 }, { "epoch": 0.7998178782819851, "grad_norm": 0.3911525309085846, "learning_rate": 8.405224787363305e-05, "loss": 1.9279, "step": 2635 }, { "epoch": 0.8001214144786766, "grad_norm": 0.4033350646495819, "learning_rate": 8.404617253948968e-05, "loss": 1.3199, "step": 2636 }, { "epoch": 0.800424950675368, "grad_norm": 0.398269921541214, "learning_rate": 8.40400972053463e-05, "loss": 2.0073, "step": 2637 }, { "epoch": 0.8007284868720594, "grad_norm": 0.5763013362884521, "learning_rate": 8.403402187120292e-05, "loss": 1.517, "step": 2638 }, { "epoch": 0.801032023068751, "grad_norm": 0.4008599817752838, "learning_rate": 8.402794653705955e-05, "loss": 1.6136, "step": 2639 }, { "epoch": 0.8013355592654424, "grad_norm": 0.39726853370666504, "learning_rate": 8.402187120291616e-05, "loss": 1.8454, "step": 2640 }, { "epoch": 0.8016390954621339, "grad_norm": 0.4768484830856323, "learning_rate": 8.401579586877278e-05, "loss": 1.4961, "step": 2641 }, { "epoch": 0.8019426316588253, "grad_norm": 0.4185827374458313, "learning_rate": 8.400972053462941e-05, "loss": 1.8617, "step": 2642 }, { "epoch": 0.8022461678555167, "grad_norm": 0.43734210729599, "learning_rate": 8.400364520048603e-05, "loss": 1.4771, "step": 2643 }, { "epoch": 0.8025497040522083, "grad_norm": 0.4270019233226776, "learning_rate": 8.399756986634265e-05, "loss": 1.8692, "step": 2644 }, { "epoch": 0.8028532402488997, "grad_norm": 0.35486480593681335, "learning_rate": 8.399149453219928e-05, "loss": 1.8821, "step": 2645 }, { "epoch": 0.8031567764455911, "grad_norm": 0.9134595394134521, "learning_rate": 8.39854191980559e-05, "loss": 1.8291, "step": 2646 }, { "epoch": 0.8034603126422826, "grad_norm": 0.43372470140457153, "learning_rate": 8.397934386391251e-05, "loss": 2.0605, "step": 2647 }, { "epoch": 0.803763848838974, "grad_norm": 0.39876699447631836, "learning_rate": 8.397326852976914e-05, "loss": 2.2645, "step": 2648 }, { "epoch": 0.8040673850356655, "grad_norm": 0.39235416054725647, "learning_rate": 8.396719319562576e-05, "loss": 1.9287, "step": 2649 }, { "epoch": 0.804370921232357, "grad_norm": 0.3264532685279846, "learning_rate": 8.396111786148239e-05, "loss": 1.856, "step": 2650 }, { "epoch": 0.8046744574290484, "grad_norm": 0.4189594089984894, "learning_rate": 8.395504252733901e-05, "loss": 2.0628, "step": 2651 }, { "epoch": 0.8049779936257399, "grad_norm": 0.3941250145435333, "learning_rate": 8.394896719319563e-05, "loss": 1.7251, "step": 2652 }, { "epoch": 0.8052815298224313, "grad_norm": 0.42837756872177124, "learning_rate": 8.394289185905226e-05, "loss": 1.2304, "step": 2653 }, { "epoch": 0.8055850660191228, "grad_norm": 0.8526172637939453, "learning_rate": 8.393681652490887e-05, "loss": 2.0248, "step": 2654 }, { "epoch": 0.8058886022158143, "grad_norm": 0.36125120520591736, "learning_rate": 8.393074119076549e-05, "loss": 1.522, "step": 2655 }, { "epoch": 0.8061921384125057, "grad_norm": 0.34955886006355286, "learning_rate": 8.392466585662212e-05, "loss": 1.5399, "step": 2656 }, { "epoch": 0.8064956746091971, "grad_norm": 0.42194581031799316, "learning_rate": 8.391859052247874e-05, "loss": 1.5088, "step": 2657 }, { "epoch": 0.8067992108058886, "grad_norm": 0.41130530834198, "learning_rate": 8.391251518833536e-05, "loss": 1.9864, "step": 2658 }, { "epoch": 0.8071027470025801, "grad_norm": 0.3659766614437103, "learning_rate": 8.390643985419199e-05, "loss": 1.3164, "step": 2659 }, { "epoch": 0.8074062831992715, "grad_norm": 0.4178526997566223, "learning_rate": 8.39003645200486e-05, "loss": 2.096, "step": 2660 }, { "epoch": 0.807709819395963, "grad_norm": 0.44985684752464294, "learning_rate": 8.389428918590522e-05, "loss": 1.5053, "step": 2661 }, { "epoch": 0.8080133555926544, "grad_norm": 0.5702995657920837, "learning_rate": 8.388821385176185e-05, "loss": 1.745, "step": 2662 }, { "epoch": 0.8083168917893458, "grad_norm": 0.5479261875152588, "learning_rate": 8.388213851761847e-05, "loss": 1.6453, "step": 2663 }, { "epoch": 0.8086204279860373, "grad_norm": 0.5145617723464966, "learning_rate": 8.387606318347509e-05, "loss": 1.3749, "step": 2664 }, { "epoch": 0.8089239641827288, "grad_norm": 0.3433884084224701, "learning_rate": 8.386998784933172e-05, "loss": 1.8217, "step": 2665 }, { "epoch": 0.8092275003794203, "grad_norm": 0.4309893548488617, "learning_rate": 8.386391251518834e-05, "loss": 1.8368, "step": 2666 }, { "epoch": 0.8095310365761117, "grad_norm": 0.42593100666999817, "learning_rate": 8.385783718104497e-05, "loss": 1.897, "step": 2667 }, { "epoch": 0.8098345727728031, "grad_norm": 0.3921912908554077, "learning_rate": 8.385176184690159e-05, "loss": 1.8504, "step": 2668 }, { "epoch": 0.8101381089694946, "grad_norm": 0.4481246769428253, "learning_rate": 8.38456865127582e-05, "loss": 2.2592, "step": 2669 }, { "epoch": 0.8104416451661861, "grad_norm": 0.3490237891674042, "learning_rate": 8.383961117861483e-05, "loss": 1.792, "step": 2670 }, { "epoch": 0.8107451813628775, "grad_norm": 0.4693361818790436, "learning_rate": 8.383353584447145e-05, "loss": 2.0438, "step": 2671 }, { "epoch": 0.811048717559569, "grad_norm": 0.3441024720668793, "learning_rate": 8.382746051032807e-05, "loss": 1.7983, "step": 2672 }, { "epoch": 0.8113522537562604, "grad_norm": 0.4398588240146637, "learning_rate": 8.38213851761847e-05, "loss": 1.9503, "step": 2673 }, { "epoch": 0.8116557899529518, "grad_norm": 0.36766424775123596, "learning_rate": 8.381530984204132e-05, "loss": 1.9745, "step": 2674 }, { "epoch": 0.8119593261496434, "grad_norm": 0.4529463052749634, "learning_rate": 8.380923450789793e-05, "loss": 1.9025, "step": 2675 }, { "epoch": 0.8122628623463348, "grad_norm": 0.4247633218765259, "learning_rate": 8.380315917375456e-05, "loss": 1.9669, "step": 2676 }, { "epoch": 0.8125663985430263, "grad_norm": 0.39208900928497314, "learning_rate": 8.379708383961118e-05, "loss": 1.8593, "step": 2677 }, { "epoch": 0.8128699347397177, "grad_norm": 0.46601489186286926, "learning_rate": 8.37910085054678e-05, "loss": 1.8102, "step": 2678 }, { "epoch": 0.8131734709364091, "grad_norm": 0.4215412139892578, "learning_rate": 8.378493317132443e-05, "loss": 2.1686, "step": 2679 }, { "epoch": 0.8134770071331007, "grad_norm": 0.4089846909046173, "learning_rate": 8.377885783718105e-05, "loss": 1.8722, "step": 2680 }, { "epoch": 0.8137805433297921, "grad_norm": 0.43888887763023376, "learning_rate": 8.377278250303768e-05, "loss": 1.7578, "step": 2681 }, { "epoch": 0.8140840795264835, "grad_norm": 0.6995130777359009, "learning_rate": 8.37667071688943e-05, "loss": 1.7296, "step": 2682 }, { "epoch": 0.814387615723175, "grad_norm": 0.4700230360031128, "learning_rate": 8.376063183475091e-05, "loss": 1.3055, "step": 2683 }, { "epoch": 0.8146911519198664, "grad_norm": 0.4099135398864746, "learning_rate": 8.375455650060754e-05, "loss": 1.9858, "step": 2684 }, { "epoch": 0.814994688116558, "grad_norm": 0.44735094904899597, "learning_rate": 8.374848116646416e-05, "loss": 1.9615, "step": 2685 }, { "epoch": 0.8152982243132494, "grad_norm": 0.6808655858039856, "learning_rate": 8.374240583232078e-05, "loss": 1.7893, "step": 2686 }, { "epoch": 0.8156017605099408, "grad_norm": 0.4560304880142212, "learning_rate": 8.373633049817741e-05, "loss": 1.7048, "step": 2687 }, { "epoch": 0.8159052967066323, "grad_norm": 0.4043562114238739, "learning_rate": 8.373025516403403e-05, "loss": 2.0911, "step": 2688 }, { "epoch": 0.8162088329033237, "grad_norm": 0.40530329942703247, "learning_rate": 8.372417982989064e-05, "loss": 1.8597, "step": 2689 }, { "epoch": 0.8165123691000151, "grad_norm": 0.42696669697761536, "learning_rate": 8.371810449574727e-05, "loss": 1.354, "step": 2690 }, { "epoch": 0.8168159052967067, "grad_norm": 0.411856472492218, "learning_rate": 8.371202916160389e-05, "loss": 1.8913, "step": 2691 }, { "epoch": 0.8171194414933981, "grad_norm": 1.3200637102127075, "learning_rate": 8.370595382746051e-05, "loss": 1.9903, "step": 2692 }, { "epoch": 0.8174229776900895, "grad_norm": 0.4155752956867218, "learning_rate": 8.369987849331714e-05, "loss": 2.036, "step": 2693 }, { "epoch": 0.817726513886781, "grad_norm": 0.4618370234966278, "learning_rate": 8.369380315917376e-05, "loss": 1.8101, "step": 2694 }, { "epoch": 0.8180300500834724, "grad_norm": 0.338123619556427, "learning_rate": 8.368772782503039e-05, "loss": 1.7934, "step": 2695 }, { "epoch": 0.818333586280164, "grad_norm": 0.4331413507461548, "learning_rate": 8.3681652490887e-05, "loss": 1.7394, "step": 2696 }, { "epoch": 0.8186371224768554, "grad_norm": 0.3667849004268646, "learning_rate": 8.367557715674362e-05, "loss": 1.9162, "step": 2697 }, { "epoch": 0.8189406586735468, "grad_norm": 0.4584942162036896, "learning_rate": 8.366950182260025e-05, "loss": 1.801, "step": 2698 }, { "epoch": 0.8192441948702383, "grad_norm": 0.4310884475708008, "learning_rate": 8.366342648845687e-05, "loss": 1.952, "step": 2699 }, { "epoch": 0.8195477310669297, "grad_norm": 0.35577401518821716, "learning_rate": 8.365735115431349e-05, "loss": 2.0338, "step": 2700 }, { "epoch": 0.8198512672636212, "grad_norm": 0.4453931152820587, "learning_rate": 8.365127582017012e-05, "loss": 1.777, "step": 2701 }, { "epoch": 0.8201548034603127, "grad_norm": 0.4156850576400757, "learning_rate": 8.364520048602674e-05, "loss": 1.8725, "step": 2702 }, { "epoch": 0.8204583396570041, "grad_norm": 0.3999830186367035, "learning_rate": 8.363912515188335e-05, "loss": 2.0136, "step": 2703 }, { "epoch": 0.8207618758536955, "grad_norm": 0.4082907736301422, "learning_rate": 8.363304981773998e-05, "loss": 1.9336, "step": 2704 }, { "epoch": 0.821065412050387, "grad_norm": 0.41365379095077515, "learning_rate": 8.36269744835966e-05, "loss": 1.8598, "step": 2705 }, { "epoch": 0.8213689482470785, "grad_norm": 0.4504840075969696, "learning_rate": 8.362089914945322e-05, "loss": 2.0346, "step": 2706 }, { "epoch": 0.82167248444377, "grad_norm": 0.5199129581451416, "learning_rate": 8.361482381530985e-05, "loss": 1.3809, "step": 2707 }, { "epoch": 0.8219760206404614, "grad_norm": 0.316165030002594, "learning_rate": 8.360874848116647e-05, "loss": 1.6682, "step": 2708 }, { "epoch": 0.8222795568371528, "grad_norm": 0.434994637966156, "learning_rate": 8.36026731470231e-05, "loss": 1.9569, "step": 2709 }, { "epoch": 0.8225830930338442, "grad_norm": 0.4767877757549286, "learning_rate": 8.359659781287972e-05, "loss": 2.0574, "step": 2710 }, { "epoch": 0.8228866292305358, "grad_norm": 0.36715012788772583, "learning_rate": 8.359052247873633e-05, "loss": 1.5894, "step": 2711 }, { "epoch": 0.8231901654272272, "grad_norm": 0.47204360365867615, "learning_rate": 8.358444714459296e-05, "loss": 2.0371, "step": 2712 }, { "epoch": 0.8234937016239187, "grad_norm": 0.4339917302131653, "learning_rate": 8.357837181044958e-05, "loss": 1.6804, "step": 2713 }, { "epoch": 0.8237972378206101, "grad_norm": 0.40211397409439087, "learning_rate": 8.35722964763062e-05, "loss": 1.8478, "step": 2714 }, { "epoch": 0.8241007740173015, "grad_norm": 0.4626907408237457, "learning_rate": 8.356622114216283e-05, "loss": 1.0077, "step": 2715 }, { "epoch": 0.824404310213993, "grad_norm": 0.5032857060432434, "learning_rate": 8.356014580801945e-05, "loss": 1.4585, "step": 2716 }, { "epoch": 0.8247078464106845, "grad_norm": 0.7893280982971191, "learning_rate": 8.355407047387606e-05, "loss": 1.7855, "step": 2717 }, { "epoch": 0.8250113826073759, "grad_norm": 0.4199140965938568, "learning_rate": 8.35479951397327e-05, "loss": 2.036, "step": 2718 }, { "epoch": 0.8253149188040674, "grad_norm": 0.4148293137550354, "learning_rate": 8.354191980558931e-05, "loss": 1.6445, "step": 2719 }, { "epoch": 0.8256184550007588, "grad_norm": 0.40081986784935, "learning_rate": 8.353584447144593e-05, "loss": 1.8132, "step": 2720 }, { "epoch": 0.8259219911974502, "grad_norm": 0.38736727833747864, "learning_rate": 8.352976913730256e-05, "loss": 2.1591, "step": 2721 }, { "epoch": 0.8262255273941418, "grad_norm": 0.39787808060646057, "learning_rate": 8.352369380315918e-05, "loss": 1.986, "step": 2722 }, { "epoch": 0.8265290635908332, "grad_norm": 0.4428958594799042, "learning_rate": 8.351761846901581e-05, "loss": 2.0022, "step": 2723 }, { "epoch": 0.8268325997875247, "grad_norm": 0.4137169420719147, "learning_rate": 8.351154313487243e-05, "loss": 1.647, "step": 2724 }, { "epoch": 0.8271361359842161, "grad_norm": 0.3762650489807129, "learning_rate": 8.350546780072904e-05, "loss": 1.6598, "step": 2725 }, { "epoch": 0.8274396721809075, "grad_norm": 0.4346376955509186, "learning_rate": 8.349939246658567e-05, "loss": 1.9286, "step": 2726 }, { "epoch": 0.8277432083775991, "grad_norm": 0.4809822142124176, "learning_rate": 8.349331713244228e-05, "loss": 1.7861, "step": 2727 }, { "epoch": 0.8280467445742905, "grad_norm": 0.4233179986476898, "learning_rate": 8.348724179829891e-05, "loss": 1.4572, "step": 2728 }, { "epoch": 0.8283502807709819, "grad_norm": 0.4098990261554718, "learning_rate": 8.348116646415554e-05, "loss": 1.5889, "step": 2729 }, { "epoch": 0.8286538169676734, "grad_norm": 0.5033220052719116, "learning_rate": 8.347509113001216e-05, "loss": 2.0879, "step": 2730 }, { "epoch": 0.8289573531643648, "grad_norm": 0.4248674809932709, "learning_rate": 8.346901579586877e-05, "loss": 1.113, "step": 2731 }, { "epoch": 0.8292608893610564, "grad_norm": 0.4501001834869385, "learning_rate": 8.34629404617254e-05, "loss": 1.984, "step": 2732 }, { "epoch": 0.8295644255577478, "grad_norm": 0.4608478844165802, "learning_rate": 8.345686512758202e-05, "loss": 1.692, "step": 2733 }, { "epoch": 0.8298679617544392, "grad_norm": 0.4299629330635071, "learning_rate": 8.345078979343864e-05, "loss": 2.0634, "step": 2734 }, { "epoch": 0.8301714979511307, "grad_norm": 0.4118325412273407, "learning_rate": 8.344471445929527e-05, "loss": 1.9081, "step": 2735 }, { "epoch": 0.8304750341478221, "grad_norm": 0.5083432793617249, "learning_rate": 8.343863912515189e-05, "loss": 2.1141, "step": 2736 }, { "epoch": 0.8307785703445136, "grad_norm": 0.7300907373428345, "learning_rate": 8.34325637910085e-05, "loss": 1.3025, "step": 2737 }, { "epoch": 0.8310821065412051, "grad_norm": 0.4016704261302948, "learning_rate": 8.342648845686512e-05, "loss": 1.8973, "step": 2738 }, { "epoch": 0.8313856427378965, "grad_norm": 0.4292236268520355, "learning_rate": 8.342041312272175e-05, "loss": 1.9909, "step": 2739 }, { "epoch": 0.8316891789345879, "grad_norm": 0.4190838634967804, "learning_rate": 8.341433778857838e-05, "loss": 1.8034, "step": 2740 }, { "epoch": 0.8319927151312794, "grad_norm": 0.4143367111682892, "learning_rate": 8.340826245443499e-05, "loss": 2.0459, "step": 2741 }, { "epoch": 0.8322962513279709, "grad_norm": 0.46704939007759094, "learning_rate": 8.340218712029162e-05, "loss": 1.6579, "step": 2742 }, { "epoch": 0.8325997875246623, "grad_norm": 0.48142144083976746, "learning_rate": 8.339611178614825e-05, "loss": 1.8507, "step": 2743 }, { "epoch": 0.8329033237213538, "grad_norm": 0.42653772234916687, "learning_rate": 8.339003645200487e-05, "loss": 1.9661, "step": 2744 }, { "epoch": 0.8332068599180452, "grad_norm": 0.42195385694503784, "learning_rate": 8.338396111786148e-05, "loss": 1.583, "step": 2745 }, { "epoch": 0.8335103961147367, "grad_norm": 0.5214222073554993, "learning_rate": 8.337788578371811e-05, "loss": 0.8356, "step": 2746 }, { "epoch": 0.8338139323114281, "grad_norm": 0.4736870229244232, "learning_rate": 8.337181044957473e-05, "loss": 2.1115, "step": 2747 }, { "epoch": 0.8341174685081196, "grad_norm": 0.4879785180091858, "learning_rate": 8.336573511543135e-05, "loss": 2.1468, "step": 2748 }, { "epoch": 0.8344210047048111, "grad_norm": 0.33596518635749817, "learning_rate": 8.335965978128798e-05, "loss": 1.9451, "step": 2749 }, { "epoch": 0.8347245409015025, "grad_norm": 0.3724137246608734, "learning_rate": 8.33535844471446e-05, "loss": 1.4332, "step": 2750 }, { "epoch": 0.8350280770981939, "grad_norm": 0.41488635540008545, "learning_rate": 8.334750911300121e-05, "loss": 2.245, "step": 2751 }, { "epoch": 0.8353316132948854, "grad_norm": 0.41388005018234253, "learning_rate": 8.334143377885783e-05, "loss": 2.0323, "step": 2752 }, { "epoch": 0.8356351494915769, "grad_norm": 0.8086270093917847, "learning_rate": 8.333535844471446e-05, "loss": 1.9422, "step": 2753 }, { "epoch": 0.8359386856882683, "grad_norm": 0.3645714223384857, "learning_rate": 8.33292831105711e-05, "loss": 1.7161, "step": 2754 }, { "epoch": 0.8362422218849598, "grad_norm": 0.36916327476501465, "learning_rate": 8.33232077764277e-05, "loss": 1.7339, "step": 2755 }, { "epoch": 0.8365457580816512, "grad_norm": 0.3351556956768036, "learning_rate": 8.331713244228433e-05, "loss": 1.5955, "step": 2756 }, { "epoch": 0.8368492942783426, "grad_norm": 0.4345923364162445, "learning_rate": 8.331105710814096e-05, "loss": 1.871, "step": 2757 }, { "epoch": 0.8371528304750342, "grad_norm": 0.4099547266960144, "learning_rate": 8.330498177399758e-05, "loss": 1.8057, "step": 2758 }, { "epoch": 0.8374563666717256, "grad_norm": 0.45009273290634155, "learning_rate": 8.32989064398542e-05, "loss": 1.3956, "step": 2759 }, { "epoch": 0.8377599028684171, "grad_norm": 0.3890456557273865, "learning_rate": 8.329283110571082e-05, "loss": 1.837, "step": 2760 }, { "epoch": 0.8380634390651085, "grad_norm": 0.4065060615539551, "learning_rate": 8.328675577156744e-05, "loss": 2.1527, "step": 2761 }, { "epoch": 0.8383669752617999, "grad_norm": 0.4432562589645386, "learning_rate": 8.328068043742406e-05, "loss": 1.5033, "step": 2762 }, { "epoch": 0.8386705114584915, "grad_norm": 0.4977710247039795, "learning_rate": 8.327460510328069e-05, "loss": 2.1406, "step": 2763 }, { "epoch": 0.8389740476551829, "grad_norm": 1.0339199304580688, "learning_rate": 8.326852976913731e-05, "loss": 1.9732, "step": 2764 }, { "epoch": 0.8392775838518743, "grad_norm": 1.5824745893478394, "learning_rate": 8.326245443499392e-05, "loss": 1.7956, "step": 2765 }, { "epoch": 0.8395811200485658, "grad_norm": 0.4485887587070465, "learning_rate": 8.325637910085054e-05, "loss": 1.71, "step": 2766 }, { "epoch": 0.8398846562452572, "grad_norm": 0.3982546329498291, "learning_rate": 8.325030376670717e-05, "loss": 2.0978, "step": 2767 }, { "epoch": 0.8401881924419488, "grad_norm": 0.5837999582290649, "learning_rate": 8.32442284325638e-05, "loss": 2.113, "step": 2768 }, { "epoch": 0.8404917286386402, "grad_norm": 0.5739153623580933, "learning_rate": 8.323815309842041e-05, "loss": 2.1892, "step": 2769 }, { "epoch": 0.8407952648353316, "grad_norm": 0.3813978135585785, "learning_rate": 8.323207776427704e-05, "loss": 1.2944, "step": 2770 }, { "epoch": 0.8410988010320231, "grad_norm": 0.4146029055118561, "learning_rate": 8.322600243013367e-05, "loss": 2.0011, "step": 2771 }, { "epoch": 0.8414023372287145, "grad_norm": 0.38315144181251526, "learning_rate": 8.321992709599029e-05, "loss": 2.1513, "step": 2772 }, { "epoch": 0.8417058734254059, "grad_norm": 0.4339327812194824, "learning_rate": 8.32138517618469e-05, "loss": 1.882, "step": 2773 }, { "epoch": 0.8420094096220975, "grad_norm": 0.40696778893470764, "learning_rate": 8.320777642770353e-05, "loss": 1.3785, "step": 2774 }, { "epoch": 0.8423129458187889, "grad_norm": 0.401257187128067, "learning_rate": 8.320170109356015e-05, "loss": 1.8048, "step": 2775 }, { "epoch": 0.8426164820154803, "grad_norm": 0.419649213552475, "learning_rate": 8.319562575941677e-05, "loss": 1.869, "step": 2776 }, { "epoch": 0.8429200182121718, "grad_norm": 0.45188263058662415, "learning_rate": 8.31895504252734e-05, "loss": 1.9754, "step": 2777 }, { "epoch": 0.8432235544088632, "grad_norm": 0.42580482363700867, "learning_rate": 8.318347509113002e-05, "loss": 2.0827, "step": 2778 }, { "epoch": 0.8435270906055548, "grad_norm": 0.3485068678855896, "learning_rate": 8.317739975698663e-05, "loss": 1.1861, "step": 2779 }, { "epoch": 0.8438306268022462, "grad_norm": 0.38991910219192505, "learning_rate": 8.317132442284325e-05, "loss": 1.9857, "step": 2780 }, { "epoch": 0.8441341629989376, "grad_norm": 0.4066307842731476, "learning_rate": 8.316524908869988e-05, "loss": 1.6987, "step": 2781 }, { "epoch": 0.844437699195629, "grad_norm": 0.40589094161987305, "learning_rate": 8.315917375455651e-05, "loss": 2.099, "step": 2782 }, { "epoch": 0.8447412353923205, "grad_norm": 0.42218223214149475, "learning_rate": 8.315309842041312e-05, "loss": 1.7493, "step": 2783 }, { "epoch": 0.845044771589012, "grad_norm": 0.33325353264808655, "learning_rate": 8.314702308626975e-05, "loss": 1.6937, "step": 2784 }, { "epoch": 0.8453483077857035, "grad_norm": 0.4162006676197052, "learning_rate": 8.314094775212638e-05, "loss": 1.6099, "step": 2785 }, { "epoch": 0.8456518439823949, "grad_norm": 1.9040342569351196, "learning_rate": 8.3134872417983e-05, "loss": 1.9213, "step": 2786 }, { "epoch": 0.8459553801790863, "grad_norm": 0.4040900468826294, "learning_rate": 8.312879708383961e-05, "loss": 1.9817, "step": 2787 }, { "epoch": 0.8462589163757778, "grad_norm": 0.4395250082015991, "learning_rate": 8.312272174969624e-05, "loss": 1.8821, "step": 2788 }, { "epoch": 0.8465624525724693, "grad_norm": 0.40407246351242065, "learning_rate": 8.311664641555286e-05, "loss": 1.7777, "step": 2789 }, { "epoch": 0.8468659887691607, "grad_norm": 0.39172056317329407, "learning_rate": 8.311057108140948e-05, "loss": 1.9446, "step": 2790 }, { "epoch": 0.8471695249658522, "grad_norm": 0.4654727876186371, "learning_rate": 8.310449574726611e-05, "loss": 1.5926, "step": 2791 }, { "epoch": 0.8474730611625436, "grad_norm": 0.41954633593559265, "learning_rate": 8.309842041312273e-05, "loss": 2.0347, "step": 2792 }, { "epoch": 0.847776597359235, "grad_norm": 0.39012208580970764, "learning_rate": 8.309234507897934e-05, "loss": 1.8302, "step": 2793 }, { "epoch": 0.8480801335559266, "grad_norm": 0.3932954967021942, "learning_rate": 8.308626974483596e-05, "loss": 2.0534, "step": 2794 }, { "epoch": 0.848383669752618, "grad_norm": 0.40115275979042053, "learning_rate": 8.308019441069259e-05, "loss": 2.1026, "step": 2795 }, { "epoch": 0.8486872059493095, "grad_norm": 0.6058691143989563, "learning_rate": 8.307411907654922e-05, "loss": 1.6423, "step": 2796 }, { "epoch": 0.8489907421460009, "grad_norm": 0.3684822916984558, "learning_rate": 8.306804374240583e-05, "loss": 1.9901, "step": 2797 }, { "epoch": 0.8492942783426923, "grad_norm": 0.3942423164844513, "learning_rate": 8.306196840826246e-05, "loss": 1.9238, "step": 2798 }, { "epoch": 0.8495978145393838, "grad_norm": 0.3520863354206085, "learning_rate": 8.305589307411909e-05, "loss": 1.7857, "step": 2799 }, { "epoch": 0.8499013507360753, "grad_norm": 0.7609321475028992, "learning_rate": 8.304981773997569e-05, "loss": 1.9176, "step": 2800 }, { "epoch": 0.8502048869327667, "grad_norm": 0.45220932364463806, "learning_rate": 8.304374240583232e-05, "loss": 1.8932, "step": 2801 }, { "epoch": 0.8505084231294582, "grad_norm": 0.33773747086524963, "learning_rate": 8.303766707168895e-05, "loss": 1.8779, "step": 2802 }, { "epoch": 0.8508119593261496, "grad_norm": 0.4092886745929718, "learning_rate": 8.303159173754557e-05, "loss": 2.3972, "step": 2803 }, { "epoch": 0.851115495522841, "grad_norm": 0.4083962142467499, "learning_rate": 8.302551640340219e-05, "loss": 1.5651, "step": 2804 }, { "epoch": 0.8514190317195326, "grad_norm": 0.41298726201057434, "learning_rate": 8.301944106925882e-05, "loss": 1.9808, "step": 2805 }, { "epoch": 0.851722567916224, "grad_norm": 0.3522525131702423, "learning_rate": 8.301336573511544e-05, "loss": 2.0116, "step": 2806 }, { "epoch": 0.8520261041129155, "grad_norm": 0.3948490619659424, "learning_rate": 8.300729040097205e-05, "loss": 1.9998, "step": 2807 }, { "epoch": 0.8523296403096069, "grad_norm": 0.40480837225914, "learning_rate": 8.300121506682867e-05, "loss": 1.9787, "step": 2808 }, { "epoch": 0.8526331765062983, "grad_norm": 0.3458811044692993, "learning_rate": 8.29951397326853e-05, "loss": 1.9506, "step": 2809 }, { "epoch": 0.8529367127029899, "grad_norm": 0.4472740888595581, "learning_rate": 8.298906439854193e-05, "loss": 1.7003, "step": 2810 }, { "epoch": 0.8532402488996813, "grad_norm": 0.3910341262817383, "learning_rate": 8.298298906439854e-05, "loss": 1.6672, "step": 2811 }, { "epoch": 0.8535437850963727, "grad_norm": 0.4467204213142395, "learning_rate": 8.297691373025517e-05, "loss": 1.8857, "step": 2812 }, { "epoch": 0.8538473212930642, "grad_norm": 0.42083072662353516, "learning_rate": 8.29708383961118e-05, "loss": 2.0453, "step": 2813 }, { "epoch": 0.8541508574897556, "grad_norm": 0.4398275315761566, "learning_rate": 8.29647630619684e-05, "loss": 1.8129, "step": 2814 }, { "epoch": 0.8544543936864472, "grad_norm": 0.8038653135299683, "learning_rate": 8.295868772782503e-05, "loss": 1.9836, "step": 2815 }, { "epoch": 0.8547579298831386, "grad_norm": 0.41887366771698, "learning_rate": 8.295261239368166e-05, "loss": 2.0524, "step": 2816 }, { "epoch": 0.85506146607983, "grad_norm": 0.5513349175453186, "learning_rate": 8.294653705953828e-05, "loss": 1.681, "step": 2817 }, { "epoch": 0.8553650022765215, "grad_norm": 0.4004881680011749, "learning_rate": 8.29404617253949e-05, "loss": 1.5912, "step": 2818 }, { "epoch": 0.8556685384732129, "grad_norm": 0.3472290635108948, "learning_rate": 8.293438639125152e-05, "loss": 1.7615, "step": 2819 }, { "epoch": 0.8559720746699044, "grad_norm": 0.4187697470188141, "learning_rate": 8.292831105710815e-05, "loss": 1.6436, "step": 2820 }, { "epoch": 0.8562756108665959, "grad_norm": 0.418883353471756, "learning_rate": 8.292223572296476e-05, "loss": 1.8053, "step": 2821 }, { "epoch": 0.8565791470632873, "grad_norm": 0.41798603534698486, "learning_rate": 8.291616038882138e-05, "loss": 1.568, "step": 2822 }, { "epoch": 0.8568826832599787, "grad_norm": 0.3748184144496918, "learning_rate": 8.291008505467801e-05, "loss": 1.4218, "step": 2823 }, { "epoch": 0.8571862194566702, "grad_norm": 0.42556214332580566, "learning_rate": 8.290400972053463e-05, "loss": 1.5612, "step": 2824 }, { "epoch": 0.8574897556533617, "grad_norm": 0.46294355392456055, "learning_rate": 8.289793438639125e-05, "loss": 1.6574, "step": 2825 }, { "epoch": 0.8577932918500532, "grad_norm": 0.40295061469078064, "learning_rate": 8.289185905224788e-05, "loss": 1.3524, "step": 2826 }, { "epoch": 0.8580968280467446, "grad_norm": 0.465472549200058, "learning_rate": 8.288578371810451e-05, "loss": 2.017, "step": 2827 }, { "epoch": 0.858400364243436, "grad_norm": 0.4338732957839966, "learning_rate": 8.287970838396111e-05, "loss": 1.7856, "step": 2828 }, { "epoch": 0.8587039004401275, "grad_norm": 0.4338977336883545, "learning_rate": 8.287363304981774e-05, "loss": 1.8401, "step": 2829 }, { "epoch": 0.8590074366368189, "grad_norm": 0.43514832854270935, "learning_rate": 8.286755771567437e-05, "loss": 1.7652, "step": 2830 }, { "epoch": 0.8593109728335104, "grad_norm": 0.3935963213443756, "learning_rate": 8.286148238153099e-05, "loss": 1.9616, "step": 2831 }, { "epoch": 0.8596145090302019, "grad_norm": 0.4481986463069916, "learning_rate": 8.285540704738761e-05, "loss": 1.8241, "step": 2832 }, { "epoch": 0.8599180452268933, "grad_norm": 0.3898305594921112, "learning_rate": 8.284933171324423e-05, "loss": 2.0083, "step": 2833 }, { "epoch": 0.8602215814235847, "grad_norm": 0.40316537022590637, "learning_rate": 8.284325637910086e-05, "loss": 1.5316, "step": 2834 }, { "epoch": 0.8605251176202762, "grad_norm": 0.407939612865448, "learning_rate": 8.283718104495747e-05, "loss": 2.0299, "step": 2835 }, { "epoch": 0.8608286538169677, "grad_norm": 1.6520899534225464, "learning_rate": 8.283110571081409e-05, "loss": 1.8243, "step": 2836 }, { "epoch": 0.8611321900136591, "grad_norm": 0.4098230004310608, "learning_rate": 8.282503037667072e-05, "loss": 1.9032, "step": 2837 }, { "epoch": 0.8614357262103506, "grad_norm": 0.3847675025463104, "learning_rate": 8.281895504252734e-05, "loss": 1.7605, "step": 2838 }, { "epoch": 0.861739262407042, "grad_norm": 0.42115259170532227, "learning_rate": 8.281287970838396e-05, "loss": 1.4382, "step": 2839 }, { "epoch": 0.8620427986037335, "grad_norm": 0.4232335090637207, "learning_rate": 8.280680437424059e-05, "loss": 1.9637, "step": 2840 }, { "epoch": 0.862346334800425, "grad_norm": 0.3830999732017517, "learning_rate": 8.280072904009722e-05, "loss": 2.0951, "step": 2841 }, { "epoch": 0.8626498709971164, "grad_norm": 0.4446307122707367, "learning_rate": 8.279465370595382e-05, "loss": 1.8081, "step": 2842 }, { "epoch": 0.8629534071938079, "grad_norm": 0.40466341376304626, "learning_rate": 8.278857837181045e-05, "loss": 1.9391, "step": 2843 }, { "epoch": 0.8632569433904993, "grad_norm": 0.4302142560482025, "learning_rate": 8.278250303766708e-05, "loss": 2.0331, "step": 2844 }, { "epoch": 0.8635604795871907, "grad_norm": 0.40295708179473877, "learning_rate": 8.27764277035237e-05, "loss": 1.7359, "step": 2845 }, { "epoch": 0.8638640157838823, "grad_norm": 0.5045837163925171, "learning_rate": 8.277035236938032e-05, "loss": 1.9034, "step": 2846 }, { "epoch": 0.8641675519805737, "grad_norm": 0.4259899854660034, "learning_rate": 8.276427703523694e-05, "loss": 2.0042, "step": 2847 }, { "epoch": 0.8644710881772651, "grad_norm": 0.4341868460178375, "learning_rate": 8.275820170109357e-05, "loss": 1.8425, "step": 2848 }, { "epoch": 0.8647746243739566, "grad_norm": 0.46809810400009155, "learning_rate": 8.275212636695018e-05, "loss": 1.9288, "step": 2849 }, { "epoch": 0.865078160570648, "grad_norm": 0.4060373306274414, "learning_rate": 8.27460510328068e-05, "loss": 1.7835, "step": 2850 }, { "epoch": 0.8653816967673396, "grad_norm": 0.39879024028778076, "learning_rate": 8.273997569866343e-05, "loss": 1.7374, "step": 2851 }, { "epoch": 0.865685232964031, "grad_norm": 0.4948522746562958, "learning_rate": 8.273390036452005e-05, "loss": 1.9127, "step": 2852 }, { "epoch": 0.8659887691607224, "grad_norm": 0.40187695622444153, "learning_rate": 8.272782503037667e-05, "loss": 1.7527, "step": 2853 }, { "epoch": 0.8662923053574139, "grad_norm": 0.4162091910839081, "learning_rate": 8.27217496962333e-05, "loss": 2.0477, "step": 2854 }, { "epoch": 0.8665958415541053, "grad_norm": 0.4181444048881531, "learning_rate": 8.271567436208993e-05, "loss": 1.9956, "step": 2855 }, { "epoch": 0.8668993777507967, "grad_norm": 0.44338878989219666, "learning_rate": 8.270959902794653e-05, "loss": 1.7806, "step": 2856 }, { "epoch": 0.8672029139474883, "grad_norm": 0.4224783778190613, "learning_rate": 8.270352369380316e-05, "loss": 1.8572, "step": 2857 }, { "epoch": 0.8675064501441797, "grad_norm": 0.4111135005950928, "learning_rate": 8.26974483596598e-05, "loss": 1.8744, "step": 2858 }, { "epoch": 0.8678099863408711, "grad_norm": 0.40660667419433594, "learning_rate": 8.269137302551641e-05, "loss": 1.7401, "step": 2859 }, { "epoch": 0.8681135225375626, "grad_norm": 0.430890291929245, "learning_rate": 8.268529769137303e-05, "loss": 1.5967, "step": 2860 }, { "epoch": 0.868417058734254, "grad_norm": 0.45299017429351807, "learning_rate": 8.267922235722965e-05, "loss": 1.8742, "step": 2861 }, { "epoch": 0.8687205949309456, "grad_norm": 0.3461768329143524, "learning_rate": 8.267314702308628e-05, "loss": 1.6326, "step": 2862 }, { "epoch": 0.869024131127637, "grad_norm": 0.386844664812088, "learning_rate": 8.26670716889429e-05, "loss": 1.7126, "step": 2863 }, { "epoch": 0.8693276673243284, "grad_norm": 0.6148979067802429, "learning_rate": 8.266099635479951e-05, "loss": 1.3573, "step": 2864 }, { "epoch": 0.8696312035210199, "grad_norm": 0.4048292934894562, "learning_rate": 8.265492102065614e-05, "loss": 1.1731, "step": 2865 }, { "epoch": 0.8699347397177113, "grad_norm": 0.3976982831954956, "learning_rate": 8.264884568651276e-05, "loss": 1.8089, "step": 2866 }, { "epoch": 0.8702382759144028, "grad_norm": 0.39783963561058044, "learning_rate": 8.264277035236938e-05, "loss": 1.5432, "step": 2867 }, { "epoch": 0.8705418121110943, "grad_norm": 0.3972279131412506, "learning_rate": 8.263669501822601e-05, "loss": 1.7548, "step": 2868 }, { "epoch": 0.8708453483077857, "grad_norm": 0.43422597646713257, "learning_rate": 8.263061968408264e-05, "loss": 1.9792, "step": 2869 }, { "epoch": 0.8711488845044771, "grad_norm": 0.3682768940925598, "learning_rate": 8.262454434993924e-05, "loss": 2.1472, "step": 2870 }, { "epoch": 0.8714524207011686, "grad_norm": 0.37669479846954346, "learning_rate": 8.261846901579587e-05, "loss": 1.9962, "step": 2871 }, { "epoch": 0.8717559568978601, "grad_norm": 0.36915603280067444, "learning_rate": 8.26123936816525e-05, "loss": 1.5438, "step": 2872 }, { "epoch": 0.8720594930945516, "grad_norm": 0.4083096981048584, "learning_rate": 8.260631834750911e-05, "loss": 1.7711, "step": 2873 }, { "epoch": 0.872363029291243, "grad_norm": 0.3865950107574463, "learning_rate": 8.260024301336574e-05, "loss": 1.8624, "step": 2874 }, { "epoch": 0.8726665654879344, "grad_norm": 0.5207681655883789, "learning_rate": 8.259416767922236e-05, "loss": 2.0506, "step": 2875 }, { "epoch": 0.8729701016846259, "grad_norm": 0.4441354274749756, "learning_rate": 8.258809234507899e-05, "loss": 1.822, "step": 2876 }, { "epoch": 0.8732736378813174, "grad_norm": 0.3258417844772339, "learning_rate": 8.25820170109356e-05, "loss": 1.2733, "step": 2877 }, { "epoch": 0.8735771740780088, "grad_norm": 0.37115880846977234, "learning_rate": 8.257594167679222e-05, "loss": 1.9161, "step": 2878 }, { "epoch": 0.8738807102747003, "grad_norm": 0.47799551486968994, "learning_rate": 8.256986634264885e-05, "loss": 2.0963, "step": 2879 }, { "epoch": 0.8741842464713917, "grad_norm": 0.4438342750072479, "learning_rate": 8.256379100850547e-05, "loss": 1.3525, "step": 2880 }, { "epoch": 0.8744877826680831, "grad_norm": 0.3878926932811737, "learning_rate": 8.255771567436209e-05, "loss": 1.4205, "step": 2881 }, { "epoch": 0.8747913188647746, "grad_norm": 0.4843897819519043, "learning_rate": 8.255164034021872e-05, "loss": 2.0388, "step": 2882 }, { "epoch": 0.8750948550614661, "grad_norm": 0.5297700762748718, "learning_rate": 8.254556500607535e-05, "loss": 1.3358, "step": 2883 }, { "epoch": 0.8753983912581575, "grad_norm": 0.34962332248687744, "learning_rate": 8.253948967193195e-05, "loss": 1.6778, "step": 2884 }, { "epoch": 0.875701927454849, "grad_norm": 0.4917025864124298, "learning_rate": 8.253341433778858e-05, "loss": 1.7919, "step": 2885 }, { "epoch": 0.8760054636515404, "grad_norm": 0.43004027009010315, "learning_rate": 8.252733900364521e-05, "loss": 1.8666, "step": 2886 }, { "epoch": 0.8763089998482319, "grad_norm": 0.5672779679298401, "learning_rate": 8.252126366950182e-05, "loss": 1.5285, "step": 2887 }, { "epoch": 0.8766125360449234, "grad_norm": 0.45307332277297974, "learning_rate": 8.251518833535845e-05, "loss": 1.9865, "step": 2888 }, { "epoch": 0.8769160722416148, "grad_norm": 0.4099940359592438, "learning_rate": 8.250911300121507e-05, "loss": 1.8511, "step": 2889 }, { "epoch": 0.8772196084383063, "grad_norm": 0.4223155081272125, "learning_rate": 8.25030376670717e-05, "loss": 1.5077, "step": 2890 }, { "epoch": 0.8775231446349977, "grad_norm": 0.4101323187351227, "learning_rate": 8.249696233292831e-05, "loss": 1.7534, "step": 2891 }, { "epoch": 0.8778266808316891, "grad_norm": 0.4333887994289398, "learning_rate": 8.249088699878493e-05, "loss": 1.9611, "step": 2892 }, { "epoch": 0.8781302170283807, "grad_norm": 0.44225746393203735, "learning_rate": 8.248481166464156e-05, "loss": 1.9739, "step": 2893 }, { "epoch": 0.8784337532250721, "grad_norm": 0.419316828250885, "learning_rate": 8.247873633049818e-05, "loss": 1.9278, "step": 2894 }, { "epoch": 0.8787372894217635, "grad_norm": 0.39314213395118713, "learning_rate": 8.24726609963548e-05, "loss": 1.8768, "step": 2895 }, { "epoch": 0.879040825618455, "grad_norm": 0.45280733704566956, "learning_rate": 8.246658566221143e-05, "loss": 1.4584, "step": 2896 }, { "epoch": 0.8793443618151464, "grad_norm": 0.4470527768135071, "learning_rate": 8.246051032806805e-05, "loss": 1.992, "step": 2897 }, { "epoch": 0.879647898011838, "grad_norm": 0.4180387854576111, "learning_rate": 8.245443499392466e-05, "loss": 1.7643, "step": 2898 }, { "epoch": 0.8799514342085294, "grad_norm": 0.42866212129592896, "learning_rate": 8.24483596597813e-05, "loss": 1.4524, "step": 2899 }, { "epoch": 0.8802549704052208, "grad_norm": 0.3700104057788849, "learning_rate": 8.244228432563791e-05, "loss": 1.7607, "step": 2900 }, { "epoch": 0.8805585066019123, "grad_norm": 0.4378833770751953, "learning_rate": 8.243620899149453e-05, "loss": 1.7648, "step": 2901 }, { "epoch": 0.8808620427986037, "grad_norm": 0.4206582307815552, "learning_rate": 8.243013365735116e-05, "loss": 1.159, "step": 2902 }, { "epoch": 0.8811655789952952, "grad_norm": 0.4247249960899353, "learning_rate": 8.242405832320778e-05, "loss": 2.0676, "step": 2903 }, { "epoch": 0.8814691151919867, "grad_norm": 0.43796390295028687, "learning_rate": 8.241798298906441e-05, "loss": 1.9104, "step": 2904 }, { "epoch": 0.8817726513886781, "grad_norm": 0.4268593192100525, "learning_rate": 8.241190765492103e-05, "loss": 1.8041, "step": 2905 }, { "epoch": 0.8820761875853695, "grad_norm": 0.5760425925254822, "learning_rate": 8.240583232077764e-05, "loss": 1.7562, "step": 2906 }, { "epoch": 0.882379723782061, "grad_norm": 0.328421950340271, "learning_rate": 8.239975698663427e-05, "loss": 2.0184, "step": 2907 }, { "epoch": 0.8826832599787525, "grad_norm": 0.4264001250267029, "learning_rate": 8.239368165249089e-05, "loss": 1.8521, "step": 2908 }, { "epoch": 0.882986796175444, "grad_norm": 0.7516580820083618, "learning_rate": 8.238760631834751e-05, "loss": 1.9573, "step": 2909 }, { "epoch": 0.8832903323721354, "grad_norm": 0.43976011872291565, "learning_rate": 8.238153098420414e-05, "loss": 1.9756, "step": 2910 }, { "epoch": 0.8835938685688268, "grad_norm": 0.420858234167099, "learning_rate": 8.237545565006076e-05, "loss": 1.8513, "step": 2911 }, { "epoch": 0.8838974047655183, "grad_norm": 0.45598578453063965, "learning_rate": 8.236938031591737e-05, "loss": 1.8986, "step": 2912 }, { "epoch": 0.8842009409622097, "grad_norm": 0.3829743266105652, "learning_rate": 8.2363304981774e-05, "loss": 1.7241, "step": 2913 }, { "epoch": 0.8845044771589012, "grad_norm": 1.6669212579727173, "learning_rate": 8.235722964763062e-05, "loss": 1.875, "step": 2914 }, { "epoch": 0.8848080133555927, "grad_norm": 0.711898684501648, "learning_rate": 8.235115431348724e-05, "loss": 1.9559, "step": 2915 }, { "epoch": 0.8851115495522841, "grad_norm": 0.46978119015693665, "learning_rate": 8.234507897934387e-05, "loss": 1.4868, "step": 2916 }, { "epoch": 0.8854150857489755, "grad_norm": 0.4142061173915863, "learning_rate": 8.233900364520049e-05, "loss": 1.9654, "step": 2917 }, { "epoch": 0.885718621945667, "grad_norm": 0.4385989010334015, "learning_rate": 8.233292831105712e-05, "loss": 1.7259, "step": 2918 }, { "epoch": 0.8860221581423585, "grad_norm": 0.4307645261287689, "learning_rate": 8.232685297691374e-05, "loss": 2.033, "step": 2919 }, { "epoch": 0.88632569433905, "grad_norm": 0.5880458950996399, "learning_rate": 8.232077764277035e-05, "loss": 1.5008, "step": 2920 }, { "epoch": 0.8866292305357414, "grad_norm": 0.4887501299381256, "learning_rate": 8.231470230862698e-05, "loss": 1.9574, "step": 2921 }, { "epoch": 0.8869327667324328, "grad_norm": 0.42289820313453674, "learning_rate": 8.23086269744836e-05, "loss": 1.8607, "step": 2922 }, { "epoch": 0.8872363029291243, "grad_norm": 0.4192774295806885, "learning_rate": 8.230255164034022e-05, "loss": 2.0718, "step": 2923 }, { "epoch": 0.8875398391258158, "grad_norm": 0.5114601850509644, "learning_rate": 8.229647630619685e-05, "loss": 1.9832, "step": 2924 }, { "epoch": 0.8878433753225072, "grad_norm": 0.4116429388523102, "learning_rate": 8.229040097205347e-05, "loss": 1.7623, "step": 2925 }, { "epoch": 0.8881469115191987, "grad_norm": 0.44943469762802124, "learning_rate": 8.228432563791008e-05, "loss": 1.8241, "step": 2926 }, { "epoch": 0.8884504477158901, "grad_norm": 1.1577938795089722, "learning_rate": 8.227825030376671e-05, "loss": 1.9125, "step": 2927 }, { "epoch": 0.8887539839125815, "grad_norm": 1.1404715776443481, "learning_rate": 8.227217496962333e-05, "loss": 1.4977, "step": 2928 }, { "epoch": 0.8890575201092731, "grad_norm": 0.7202188968658447, "learning_rate": 8.226609963547995e-05, "loss": 2.0293, "step": 2929 }, { "epoch": 0.8893610563059645, "grad_norm": 0.8101162910461426, "learning_rate": 8.226002430133658e-05, "loss": 1.888, "step": 2930 }, { "epoch": 0.889664592502656, "grad_norm": 0.41163596510887146, "learning_rate": 8.22539489671932e-05, "loss": 1.849, "step": 2931 }, { "epoch": 0.8899681286993474, "grad_norm": 0.42284974455833435, "learning_rate": 8.224787363304983e-05, "loss": 1.9611, "step": 2932 }, { "epoch": 0.8902716648960388, "grad_norm": 0.6039950847625732, "learning_rate": 8.224179829890645e-05, "loss": 1.9822, "step": 2933 }, { "epoch": 0.8905752010927304, "grad_norm": 0.3433489203453064, "learning_rate": 8.223572296476306e-05, "loss": 1.7947, "step": 2934 }, { "epoch": 0.8908787372894218, "grad_norm": 0.3537866473197937, "learning_rate": 8.22296476306197e-05, "loss": 1.8749, "step": 2935 }, { "epoch": 0.8911822734861132, "grad_norm": 0.3994251787662506, "learning_rate": 8.222357229647631e-05, "loss": 1.7805, "step": 2936 }, { "epoch": 0.8914858096828047, "grad_norm": 0.3776698708534241, "learning_rate": 8.221749696233293e-05, "loss": 1.8582, "step": 2937 }, { "epoch": 0.8917893458794961, "grad_norm": 0.42231059074401855, "learning_rate": 8.221142162818956e-05, "loss": 1.9542, "step": 2938 }, { "epoch": 0.8920928820761875, "grad_norm": 0.470005065202713, "learning_rate": 8.220534629404618e-05, "loss": 1.1926, "step": 2939 }, { "epoch": 0.8923964182728791, "grad_norm": 0.43730974197387695, "learning_rate": 8.21992709599028e-05, "loss": 1.549, "step": 2940 }, { "epoch": 0.8926999544695705, "grad_norm": 0.4016040563583374, "learning_rate": 8.219319562575942e-05, "loss": 1.8797, "step": 2941 }, { "epoch": 0.893003490666262, "grad_norm": 0.4425860345363617, "learning_rate": 8.218712029161604e-05, "loss": 1.4267, "step": 2942 }, { "epoch": 0.8933070268629534, "grad_norm": 0.8383780717849731, "learning_rate": 8.218104495747266e-05, "loss": 1.8884, "step": 2943 }, { "epoch": 0.8936105630596448, "grad_norm": 0.4015752077102661, "learning_rate": 8.217496962332929e-05, "loss": 2.0479, "step": 2944 }, { "epoch": 0.8939140992563364, "grad_norm": 0.39999493956565857, "learning_rate": 8.216889428918591e-05, "loss": 1.8845, "step": 2945 }, { "epoch": 0.8942176354530278, "grad_norm": 0.800762414932251, "learning_rate": 8.216281895504252e-05, "loss": 2.1039, "step": 2946 }, { "epoch": 0.8945211716497192, "grad_norm": 0.38609185814857483, "learning_rate": 8.215674362089916e-05, "loss": 1.8951, "step": 2947 }, { "epoch": 0.8948247078464107, "grad_norm": 0.37557461857795715, "learning_rate": 8.215066828675577e-05, "loss": 1.8418, "step": 2948 }, { "epoch": 0.8951282440431021, "grad_norm": 0.4221288561820984, "learning_rate": 8.21445929526124e-05, "loss": 2.0154, "step": 2949 }, { "epoch": 0.8954317802397936, "grad_norm": 0.3798159658908844, "learning_rate": 8.213851761846902e-05, "loss": 1.852, "step": 2950 }, { "epoch": 0.8957353164364851, "grad_norm": 0.4777775704860687, "learning_rate": 8.213244228432564e-05, "loss": 1.9272, "step": 2951 }, { "epoch": 0.8960388526331765, "grad_norm": 0.45156142115592957, "learning_rate": 8.212636695018227e-05, "loss": 1.7512, "step": 2952 }, { "epoch": 0.8963423888298679, "grad_norm": 0.43190255761146545, "learning_rate": 8.212029161603889e-05, "loss": 2.0517, "step": 2953 }, { "epoch": 0.8966459250265594, "grad_norm": 0.40969786047935486, "learning_rate": 8.21142162818955e-05, "loss": 1.843, "step": 2954 }, { "epoch": 0.8969494612232509, "grad_norm": 0.3868393003940582, "learning_rate": 8.210814094775213e-05, "loss": 1.9854, "step": 2955 }, { "epoch": 0.8972529974199424, "grad_norm": 0.39843276143074036, "learning_rate": 8.210206561360875e-05, "loss": 1.9419, "step": 2956 }, { "epoch": 0.8975565336166338, "grad_norm": 0.3709312379360199, "learning_rate": 8.209599027946537e-05, "loss": 1.9075, "step": 2957 }, { "epoch": 0.8978600698133252, "grad_norm": 0.3753807246685028, "learning_rate": 8.2089914945322e-05, "loss": 1.8474, "step": 2958 }, { "epoch": 0.8981636060100167, "grad_norm": 0.47521868348121643, "learning_rate": 8.208383961117862e-05, "loss": 2.0679, "step": 2959 }, { "epoch": 0.8984671422067082, "grad_norm": 0.3866266906261444, "learning_rate": 8.207776427703523e-05, "loss": 1.6691, "step": 2960 }, { "epoch": 0.8987706784033996, "grad_norm": 0.417644739151001, "learning_rate": 8.207168894289187e-05, "loss": 1.7893, "step": 2961 }, { "epoch": 0.8990742146000911, "grad_norm": 0.427492618560791, "learning_rate": 8.206561360874848e-05, "loss": 1.9072, "step": 2962 }, { "epoch": 0.8993777507967825, "grad_norm": 0.4407294988632202, "learning_rate": 8.205953827460511e-05, "loss": 1.9718, "step": 2963 }, { "epoch": 0.8996812869934739, "grad_norm": 0.4453076720237732, "learning_rate": 8.205346294046173e-05, "loss": 1.6475, "step": 2964 }, { "epoch": 0.8999848231901654, "grad_norm": 0.43250027298927307, "learning_rate": 8.204738760631835e-05, "loss": 1.6695, "step": 2965 }, { "epoch": 0.9002883593868569, "grad_norm": 0.4513264298439026, "learning_rate": 8.204131227217498e-05, "loss": 1.7753, "step": 2966 }, { "epoch": 0.9005918955835484, "grad_norm": 0.3830716609954834, "learning_rate": 8.20352369380316e-05, "loss": 2.0716, "step": 2967 }, { "epoch": 0.9008954317802398, "grad_norm": 0.4067733585834503, "learning_rate": 8.202916160388821e-05, "loss": 1.9266, "step": 2968 }, { "epoch": 0.9011989679769312, "grad_norm": 0.39445656538009644, "learning_rate": 8.202308626974484e-05, "loss": 1.9995, "step": 2969 }, { "epoch": 0.9015025041736227, "grad_norm": 0.7493338584899902, "learning_rate": 8.201701093560146e-05, "loss": 2.0499, "step": 2970 }, { "epoch": 0.9018060403703142, "grad_norm": 0.4843970537185669, "learning_rate": 8.201093560145808e-05, "loss": 1.7133, "step": 2971 }, { "epoch": 0.9021095765670056, "grad_norm": 0.4203130602836609, "learning_rate": 8.200486026731471e-05, "loss": 2.057, "step": 2972 }, { "epoch": 0.9024131127636971, "grad_norm": 0.47080641984939575, "learning_rate": 8.199878493317133e-05, "loss": 1.9157, "step": 2973 }, { "epoch": 0.9027166489603885, "grad_norm": 0.3420778512954712, "learning_rate": 8.199270959902794e-05, "loss": 1.7641, "step": 2974 }, { "epoch": 0.9030201851570799, "grad_norm": 0.4011532962322235, "learning_rate": 8.198663426488458e-05, "loss": 2.0489, "step": 2975 }, { "epoch": 0.9033237213537715, "grad_norm": 0.457653284072876, "learning_rate": 8.198055893074119e-05, "loss": 1.5258, "step": 2976 }, { "epoch": 0.9036272575504629, "grad_norm": 0.45125746726989746, "learning_rate": 8.197448359659782e-05, "loss": 1.8996, "step": 2977 }, { "epoch": 0.9039307937471543, "grad_norm": 0.44737517833709717, "learning_rate": 8.196840826245444e-05, "loss": 1.6851, "step": 2978 }, { "epoch": 0.9042343299438458, "grad_norm": 0.4220506250858307, "learning_rate": 8.196233292831106e-05, "loss": 1.0195, "step": 2979 }, { "epoch": 0.9045378661405372, "grad_norm": 0.40028899908065796, "learning_rate": 8.195625759416769e-05, "loss": 1.9251, "step": 2980 }, { "epoch": 0.9048414023372288, "grad_norm": 0.3769090175628662, "learning_rate": 8.19501822600243e-05, "loss": 1.6839, "step": 2981 }, { "epoch": 0.9051449385339202, "grad_norm": 0.41733232140541077, "learning_rate": 8.194410692588092e-05, "loss": 1.1995, "step": 2982 }, { "epoch": 0.9054484747306116, "grad_norm": 0.4010336697101593, "learning_rate": 8.193803159173755e-05, "loss": 1.5531, "step": 2983 }, { "epoch": 0.9057520109273031, "grad_norm": 0.38843366503715515, "learning_rate": 8.193195625759417e-05, "loss": 1.8149, "step": 2984 }, { "epoch": 0.9060555471239945, "grad_norm": 0.3807307481765747, "learning_rate": 8.192588092345079e-05, "loss": 1.8379, "step": 2985 }, { "epoch": 0.906359083320686, "grad_norm": 0.44730183482170105, "learning_rate": 8.191980558930742e-05, "loss": 1.6764, "step": 2986 }, { "epoch": 0.9066626195173775, "grad_norm": 0.4236774146556854, "learning_rate": 8.191373025516404e-05, "loss": 1.926, "step": 2987 }, { "epoch": 0.9069661557140689, "grad_norm": 0.35578781366348267, "learning_rate": 8.190765492102065e-05, "loss": 2.1609, "step": 2988 }, { "epoch": 0.9072696919107603, "grad_norm": 0.41288191080093384, "learning_rate": 8.190157958687729e-05, "loss": 2.1007, "step": 2989 }, { "epoch": 0.9075732281074518, "grad_norm": 0.43154072761535645, "learning_rate": 8.18955042527339e-05, "loss": 1.7687, "step": 2990 }, { "epoch": 0.9078767643041433, "grad_norm": 0.41048216819763184, "learning_rate": 8.188942891859053e-05, "loss": 2.2021, "step": 2991 }, { "epoch": 0.9081803005008348, "grad_norm": 0.4213089942932129, "learning_rate": 8.188335358444715e-05, "loss": 1.7208, "step": 2992 }, { "epoch": 0.9084838366975262, "grad_norm": 0.9679743647575378, "learning_rate": 8.187727825030377e-05, "loss": 1.731, "step": 2993 }, { "epoch": 0.9087873728942176, "grad_norm": 0.4650149643421173, "learning_rate": 8.18712029161604e-05, "loss": 1.9327, "step": 2994 }, { "epoch": 0.9090909090909091, "grad_norm": 0.3545879125595093, "learning_rate": 8.1865127582017e-05, "loss": 1.3728, "step": 2995 }, { "epoch": 0.9093944452876005, "grad_norm": 0.4107753336429596, "learning_rate": 8.185905224787363e-05, "loss": 2.0919, "step": 2996 }, { "epoch": 0.909697981484292, "grad_norm": 0.44587281346321106, "learning_rate": 8.185297691373026e-05, "loss": 1.5915, "step": 2997 }, { "epoch": 0.9100015176809835, "grad_norm": 0.4520403742790222, "learning_rate": 8.184690157958688e-05, "loss": 1.9165, "step": 2998 }, { "epoch": 0.9103050538776749, "grad_norm": 0.39504629373550415, "learning_rate": 8.18408262454435e-05, "loss": 1.7551, "step": 2999 }, { "epoch": 0.9106085900743663, "grad_norm": 0.3621729612350464, "learning_rate": 8.183475091130013e-05, "loss": 1.4211, "step": 3000 }, { "epoch": 0.9109121262710578, "grad_norm": 0.6058910489082336, "learning_rate": 8.182867557715675e-05, "loss": 1.7533, "step": 3001 }, { "epoch": 0.9112156624677493, "grad_norm": 0.49985721707344055, "learning_rate": 8.182260024301336e-05, "loss": 2.0437, "step": 3002 }, { "epoch": 0.9115191986644408, "grad_norm": 14.125785827636719, "learning_rate": 8.181652490887e-05, "loss": 1.3568, "step": 3003 }, { "epoch": 0.9118227348611322, "grad_norm": 0.42591822147369385, "learning_rate": 8.181044957472661e-05, "loss": 1.9824, "step": 3004 }, { "epoch": 0.9121262710578236, "grad_norm": 0.4781965911388397, "learning_rate": 8.180437424058324e-05, "loss": 2.0458, "step": 3005 }, { "epoch": 0.9124298072545151, "grad_norm": 0.40637922286987305, "learning_rate": 8.179829890643986e-05, "loss": 1.9987, "step": 3006 }, { "epoch": 0.9127333434512066, "grad_norm": 0.43722665309906006, "learning_rate": 8.179222357229648e-05, "loss": 2.0207, "step": 3007 }, { "epoch": 0.913036879647898, "grad_norm": 2.785123348236084, "learning_rate": 8.178614823815311e-05, "loss": 2.0649, "step": 3008 }, { "epoch": 0.9133404158445895, "grad_norm": 0.4118681848049164, "learning_rate": 8.178007290400971e-05, "loss": 1.8433, "step": 3009 }, { "epoch": 0.9136439520412809, "grad_norm": 0.38384199142456055, "learning_rate": 8.177399756986634e-05, "loss": 1.686, "step": 3010 }, { "epoch": 0.9139474882379723, "grad_norm": 0.41295409202575684, "learning_rate": 8.176792223572297e-05, "loss": 1.8854, "step": 3011 }, { "epoch": 0.9142510244346639, "grad_norm": 0.40270209312438965, "learning_rate": 8.176184690157959e-05, "loss": 1.8403, "step": 3012 }, { "epoch": 0.9145545606313553, "grad_norm": 0.4634084701538086, "learning_rate": 8.175577156743621e-05, "loss": 1.2805, "step": 3013 }, { "epoch": 0.9148580968280468, "grad_norm": 0.37608620524406433, "learning_rate": 8.174969623329284e-05, "loss": 1.3985, "step": 3014 }, { "epoch": 0.9151616330247382, "grad_norm": 0.47492894530296326, "learning_rate": 8.174362089914946e-05, "loss": 1.6559, "step": 3015 }, { "epoch": 0.9154651692214296, "grad_norm": 0.3841186463832855, "learning_rate": 8.173754556500607e-05, "loss": 2.0221, "step": 3016 }, { "epoch": 0.9157687054181212, "grad_norm": 0.40183159708976746, "learning_rate": 8.17314702308627e-05, "loss": 1.7669, "step": 3017 }, { "epoch": 0.9160722416148126, "grad_norm": 0.4649689197540283, "learning_rate": 8.172539489671932e-05, "loss": 1.9634, "step": 3018 }, { "epoch": 0.916375777811504, "grad_norm": 0.5210034847259521, "learning_rate": 8.171931956257594e-05, "loss": 1.5208, "step": 3019 }, { "epoch": 0.9166793140081955, "grad_norm": 0.41098159551620483, "learning_rate": 8.171324422843257e-05, "loss": 2.1456, "step": 3020 }, { "epoch": 0.9169828502048869, "grad_norm": 0.4477085769176483, "learning_rate": 8.170716889428919e-05, "loss": 2.1252, "step": 3021 }, { "epoch": 0.9172863864015783, "grad_norm": 0.6705775856971741, "learning_rate": 8.170109356014582e-05, "loss": 1.7007, "step": 3022 }, { "epoch": 0.9175899225982699, "grad_norm": 0.3919045925140381, "learning_rate": 8.169501822600242e-05, "loss": 1.9774, "step": 3023 }, { "epoch": 0.9178934587949613, "grad_norm": 0.41216278076171875, "learning_rate": 8.168894289185905e-05, "loss": 1.845, "step": 3024 }, { "epoch": 0.9181969949916527, "grad_norm": 0.4093484580516815, "learning_rate": 8.168286755771568e-05, "loss": 1.7517, "step": 3025 }, { "epoch": 0.9185005311883442, "grad_norm": 0.4002762734889984, "learning_rate": 8.16767922235723e-05, "loss": 1.9234, "step": 3026 }, { "epoch": 0.9188040673850356, "grad_norm": 0.3966367542743683, "learning_rate": 8.167071688942892e-05, "loss": 1.9905, "step": 3027 }, { "epoch": 0.9191076035817272, "grad_norm": 0.4566415250301361, "learning_rate": 8.166464155528555e-05, "loss": 1.6606, "step": 3028 }, { "epoch": 0.9194111397784186, "grad_norm": 0.5808939933776855, "learning_rate": 8.165856622114217e-05, "loss": 1.6156, "step": 3029 }, { "epoch": 0.91971467597511, "grad_norm": 0.4700441062450409, "learning_rate": 8.165249088699878e-05, "loss": 1.9213, "step": 3030 }, { "epoch": 0.9200182121718015, "grad_norm": 0.4296051263809204, "learning_rate": 8.164641555285542e-05, "loss": 2.0518, "step": 3031 }, { "epoch": 0.9203217483684929, "grad_norm": 0.469310462474823, "learning_rate": 8.164034021871203e-05, "loss": 2.1185, "step": 3032 }, { "epoch": 0.9206252845651844, "grad_norm": 0.45901113748550415, "learning_rate": 8.163426488456865e-05, "loss": 1.8144, "step": 3033 }, { "epoch": 0.9209288207618759, "grad_norm": 0.40197721123695374, "learning_rate": 8.162818955042528e-05, "loss": 1.7781, "step": 3034 }, { "epoch": 0.9212323569585673, "grad_norm": 0.41534188389778137, "learning_rate": 8.16221142162819e-05, "loss": 1.9972, "step": 3035 }, { "epoch": 0.9215358931552587, "grad_norm": 0.4121875762939453, "learning_rate": 8.161603888213853e-05, "loss": 1.4284, "step": 3036 }, { "epoch": 0.9218394293519502, "grad_norm": 0.4393114149570465, "learning_rate": 8.160996354799513e-05, "loss": 1.785, "step": 3037 }, { "epoch": 0.9221429655486417, "grad_norm": 0.3844849467277527, "learning_rate": 8.160388821385176e-05, "loss": 1.8865, "step": 3038 }, { "epoch": 0.9224465017453332, "grad_norm": 0.4221876859664917, "learning_rate": 8.15978128797084e-05, "loss": 1.8166, "step": 3039 }, { "epoch": 0.9227500379420246, "grad_norm": 0.4294770359992981, "learning_rate": 8.159173754556501e-05, "loss": 1.8273, "step": 3040 }, { "epoch": 0.923053574138716, "grad_norm": 0.41192346811294556, "learning_rate": 8.158566221142163e-05, "loss": 1.9982, "step": 3041 }, { "epoch": 0.9233571103354075, "grad_norm": 0.4439050555229187, "learning_rate": 8.157958687727826e-05, "loss": 2.0525, "step": 3042 }, { "epoch": 0.923660646532099, "grad_norm": 0.4909377694129944, "learning_rate": 8.157351154313488e-05, "loss": 1.7385, "step": 3043 }, { "epoch": 0.9239641827287904, "grad_norm": 0.3646416664123535, "learning_rate": 8.15674362089915e-05, "loss": 1.7543, "step": 3044 }, { "epoch": 0.9242677189254819, "grad_norm": 0.46845096349716187, "learning_rate": 8.156136087484813e-05, "loss": 1.5907, "step": 3045 }, { "epoch": 0.9245712551221733, "grad_norm": 0.3998015224933624, "learning_rate": 8.155528554070474e-05, "loss": 1.9652, "step": 3046 }, { "epoch": 0.9248747913188647, "grad_norm": 0.43011385202407837, "learning_rate": 8.154921020656136e-05, "loss": 1.9294, "step": 3047 }, { "epoch": 0.9251783275155562, "grad_norm": 1.033368706703186, "learning_rate": 8.154313487241799e-05, "loss": 1.9883, "step": 3048 }, { "epoch": 0.9254818637122477, "grad_norm": 0.6372964382171631, "learning_rate": 8.153705953827461e-05, "loss": 1.6381, "step": 3049 }, { "epoch": 0.9257853999089392, "grad_norm": 0.4168377220630646, "learning_rate": 8.153098420413124e-05, "loss": 1.5776, "step": 3050 }, { "epoch": 0.9260889361056306, "grad_norm": 0.4470007121562958, "learning_rate": 8.152490886998784e-05, "loss": 1.7431, "step": 3051 }, { "epoch": 0.926392472302322, "grad_norm": 0.4876750111579895, "learning_rate": 8.151883353584447e-05, "loss": 2.0082, "step": 3052 }, { "epoch": 0.9266960084990135, "grad_norm": 0.4005252718925476, "learning_rate": 8.15127582017011e-05, "loss": 1.8192, "step": 3053 }, { "epoch": 0.926999544695705, "grad_norm": 0.4852685332298279, "learning_rate": 8.150668286755772e-05, "loss": 1.5553, "step": 3054 }, { "epoch": 0.9273030808923964, "grad_norm": 0.4594980776309967, "learning_rate": 8.150060753341434e-05, "loss": 1.784, "step": 3055 }, { "epoch": 0.9276066170890879, "grad_norm": 0.34720897674560547, "learning_rate": 8.149453219927097e-05, "loss": 1.8604, "step": 3056 }, { "epoch": 0.9279101532857793, "grad_norm": 0.423211932182312, "learning_rate": 8.148845686512759e-05, "loss": 1.9792, "step": 3057 }, { "epoch": 0.9282136894824707, "grad_norm": 0.42972126603126526, "learning_rate": 8.14823815309842e-05, "loss": 1.4225, "step": 3058 }, { "epoch": 0.9285172256791623, "grad_norm": 0.38373371958732605, "learning_rate": 8.147630619684084e-05, "loss": 1.6397, "step": 3059 }, { "epoch": 0.9288207618758537, "grad_norm": 0.4351721405982971, "learning_rate": 8.147023086269745e-05, "loss": 1.6237, "step": 3060 }, { "epoch": 0.9291242980725452, "grad_norm": 0.41888755559921265, "learning_rate": 8.146415552855407e-05, "loss": 1.7037, "step": 3061 }, { "epoch": 0.9294278342692366, "grad_norm": 0.43660473823547363, "learning_rate": 8.14580801944107e-05, "loss": 1.9307, "step": 3062 }, { "epoch": 0.929731370465928, "grad_norm": 0.4016878008842468, "learning_rate": 8.145200486026732e-05, "loss": 1.7994, "step": 3063 }, { "epoch": 0.9300349066626196, "grad_norm": 0.5155421495437622, "learning_rate": 8.144592952612395e-05, "loss": 1.7505, "step": 3064 }, { "epoch": 0.930338442859311, "grad_norm": 0.4258996844291687, "learning_rate": 8.143985419198055e-05, "loss": 1.1935, "step": 3065 }, { "epoch": 0.9306419790560024, "grad_norm": 0.5270261168479919, "learning_rate": 8.143377885783718e-05, "loss": 2.095, "step": 3066 }, { "epoch": 0.9309455152526939, "grad_norm": 0.382199764251709, "learning_rate": 8.142770352369381e-05, "loss": 1.9493, "step": 3067 }, { "epoch": 0.9312490514493853, "grad_norm": 0.6669699549674988, "learning_rate": 8.142162818955042e-05, "loss": 2.07, "step": 3068 }, { "epoch": 0.9315525876460768, "grad_norm": 0.3749605417251587, "learning_rate": 8.141555285540705e-05, "loss": 1.5008, "step": 3069 }, { "epoch": 0.9318561238427683, "grad_norm": 0.4507908523082733, "learning_rate": 8.140947752126368e-05, "loss": 2.2879, "step": 3070 }, { "epoch": 0.9321596600394597, "grad_norm": 0.42423611879348755, "learning_rate": 8.14034021871203e-05, "loss": 2.0068, "step": 3071 }, { "epoch": 0.9324631962361511, "grad_norm": 0.4780293405056, "learning_rate": 8.139732685297691e-05, "loss": 1.9496, "step": 3072 }, { "epoch": 0.9327667324328426, "grad_norm": 0.4152267873287201, "learning_rate": 8.139125151883355e-05, "loss": 1.7815, "step": 3073 }, { "epoch": 0.933070268629534, "grad_norm": 0.40453848242759705, "learning_rate": 8.138517618469016e-05, "loss": 1.825, "step": 3074 }, { "epoch": 0.9333738048262256, "grad_norm": 0.48477646708488464, "learning_rate": 8.137910085054678e-05, "loss": 1.7566, "step": 3075 }, { "epoch": 0.933677341022917, "grad_norm": 0.49090731143951416, "learning_rate": 8.13730255164034e-05, "loss": 1.5743, "step": 3076 }, { "epoch": 0.9339808772196084, "grad_norm": 0.44307780265808105, "learning_rate": 8.136695018226003e-05, "loss": 1.9686, "step": 3077 }, { "epoch": 0.9342844134162999, "grad_norm": 0.6790413856506348, "learning_rate": 8.136087484811666e-05, "loss": 2.0449, "step": 3078 }, { "epoch": 0.9345879496129913, "grad_norm": 0.46484366059303284, "learning_rate": 8.135479951397326e-05, "loss": 1.6846, "step": 3079 }, { "epoch": 0.9348914858096828, "grad_norm": 0.5237354636192322, "learning_rate": 8.13487241798299e-05, "loss": 1.9181, "step": 3080 }, { "epoch": 0.9351950220063743, "grad_norm": 0.3992574214935303, "learning_rate": 8.134264884568652e-05, "loss": 1.8549, "step": 3081 }, { "epoch": 0.9354985582030657, "grad_norm": 0.37925541400909424, "learning_rate": 8.133657351154313e-05, "loss": 1.5961, "step": 3082 }, { "epoch": 0.9358020943997571, "grad_norm": 2.249074935913086, "learning_rate": 8.133049817739976e-05, "loss": 1.7813, "step": 3083 }, { "epoch": 0.9361056305964486, "grad_norm": 0.42430388927459717, "learning_rate": 8.132442284325639e-05, "loss": 1.5935, "step": 3084 }, { "epoch": 0.9364091667931401, "grad_norm": 0.42997804284095764, "learning_rate": 8.131834750911301e-05, "loss": 1.7546, "step": 3085 }, { "epoch": 0.9367127029898316, "grad_norm": 0.3883001506328583, "learning_rate": 8.131227217496962e-05, "loss": 1.9123, "step": 3086 }, { "epoch": 0.937016239186523, "grad_norm": 0.39124423265457153, "learning_rate": 8.130619684082626e-05, "loss": 1.4895, "step": 3087 }, { "epoch": 0.9373197753832144, "grad_norm": 0.41227900981903076, "learning_rate": 8.130012150668287e-05, "loss": 1.8467, "step": 3088 }, { "epoch": 0.9376233115799059, "grad_norm": 0.40440481901168823, "learning_rate": 8.129404617253949e-05, "loss": 1.678, "step": 3089 }, { "epoch": 0.9379268477765974, "grad_norm": 0.406684011220932, "learning_rate": 8.128797083839611e-05, "loss": 1.7328, "step": 3090 }, { "epoch": 0.9382303839732888, "grad_norm": 0.409196138381958, "learning_rate": 8.128189550425274e-05, "loss": 1.645, "step": 3091 }, { "epoch": 0.9385339201699803, "grad_norm": 0.46844586730003357, "learning_rate": 8.127582017010936e-05, "loss": 1.9826, "step": 3092 }, { "epoch": 0.9388374563666717, "grad_norm": 0.4813016355037689, "learning_rate": 8.126974483596597e-05, "loss": 1.5903, "step": 3093 }, { "epoch": 0.9391409925633631, "grad_norm": 0.4739225208759308, "learning_rate": 8.12636695018226e-05, "loss": 1.7212, "step": 3094 }, { "epoch": 0.9394445287600547, "grad_norm": 0.39909741282463074, "learning_rate": 8.125759416767923e-05, "loss": 2.0123, "step": 3095 }, { "epoch": 0.9397480649567461, "grad_norm": 0.4105834662914276, "learning_rate": 8.125151883353584e-05, "loss": 1.9729, "step": 3096 }, { "epoch": 0.9400516011534376, "grad_norm": 0.41497233510017395, "learning_rate": 8.124544349939247e-05, "loss": 2.0673, "step": 3097 }, { "epoch": 0.940355137350129, "grad_norm": 0.443263441324234, "learning_rate": 8.12393681652491e-05, "loss": 1.6216, "step": 3098 }, { "epoch": 0.9406586735468204, "grad_norm": 0.47175195813179016, "learning_rate": 8.123329283110572e-05, "loss": 1.594, "step": 3099 }, { "epoch": 0.940962209743512, "grad_norm": 0.434952974319458, "learning_rate": 8.122721749696233e-05, "loss": 1.8733, "step": 3100 }, { "epoch": 0.9412657459402034, "grad_norm": 0.5037057399749756, "learning_rate": 8.122114216281897e-05, "loss": 1.5384, "step": 3101 }, { "epoch": 0.9415692821368948, "grad_norm": 0.39780277013778687, "learning_rate": 8.121506682867558e-05, "loss": 2.0412, "step": 3102 }, { "epoch": 0.9418728183335863, "grad_norm": 0.4376054108142853, "learning_rate": 8.12089914945322e-05, "loss": 1.9531, "step": 3103 }, { "epoch": 0.9421763545302777, "grad_norm": 0.40796467661857605, "learning_rate": 8.120291616038882e-05, "loss": 2.062, "step": 3104 }, { "epoch": 0.9424798907269691, "grad_norm": 0.47094616293907166, "learning_rate": 8.119684082624545e-05, "loss": 2.16, "step": 3105 }, { "epoch": 0.9427834269236607, "grad_norm": 0.4758855104446411, "learning_rate": 8.119076549210207e-05, "loss": 1.9761, "step": 3106 }, { "epoch": 0.9430869631203521, "grad_norm": 0.3994719088077545, "learning_rate": 8.118469015795868e-05, "loss": 1.8639, "step": 3107 }, { "epoch": 0.9433904993170436, "grad_norm": 0.39443784952163696, "learning_rate": 8.117861482381531e-05, "loss": 1.8374, "step": 3108 }, { "epoch": 0.943694035513735, "grad_norm": 0.3997192978858948, "learning_rate": 8.117253948967194e-05, "loss": 1.8491, "step": 3109 }, { "epoch": 0.9439975717104264, "grad_norm": 0.4563603401184082, "learning_rate": 8.116646415552855e-05, "loss": 1.4991, "step": 3110 }, { "epoch": 0.944301107907118, "grad_norm": 0.4601759612560272, "learning_rate": 8.116038882138518e-05, "loss": 1.6056, "step": 3111 }, { "epoch": 0.9446046441038094, "grad_norm": 0.39985764026641846, "learning_rate": 8.115431348724181e-05, "loss": 1.391, "step": 3112 }, { "epoch": 0.9449081803005008, "grad_norm": 0.5546020269393921, "learning_rate": 8.114823815309843e-05, "loss": 1.9887, "step": 3113 }, { "epoch": 0.9452117164971923, "grad_norm": 0.4334501624107361, "learning_rate": 8.114216281895504e-05, "loss": 2.1764, "step": 3114 }, { "epoch": 0.9455152526938837, "grad_norm": 0.47174403071403503, "learning_rate": 8.113608748481168e-05, "loss": 1.8397, "step": 3115 }, { "epoch": 0.9458187888905752, "grad_norm": 0.4174114465713501, "learning_rate": 8.113001215066829e-05, "loss": 2.2144, "step": 3116 }, { "epoch": 0.9461223250872667, "grad_norm": 0.7976917028427124, "learning_rate": 8.112393681652491e-05, "loss": 1.3549, "step": 3117 }, { "epoch": 0.9464258612839581, "grad_norm": 0.3866395950317383, "learning_rate": 8.111786148238153e-05, "loss": 2.0, "step": 3118 }, { "epoch": 0.9467293974806495, "grad_norm": 0.43397247791290283, "learning_rate": 8.111178614823816e-05, "loss": 1.7408, "step": 3119 }, { "epoch": 0.947032933677341, "grad_norm": 0.4277322292327881, "learning_rate": 8.110571081409478e-05, "loss": 1.8638, "step": 3120 }, { "epoch": 0.9473364698740325, "grad_norm": 0.38876983523368835, "learning_rate": 8.109963547995139e-05, "loss": 1.7645, "step": 3121 }, { "epoch": 0.947640006070724, "grad_norm": 0.37743645906448364, "learning_rate": 8.109356014580802e-05, "loss": 1.9963, "step": 3122 }, { "epoch": 0.9479435422674154, "grad_norm": 0.43921002745628357, "learning_rate": 8.108748481166465e-05, "loss": 2.0021, "step": 3123 }, { "epoch": 0.9482470784641068, "grad_norm": 0.4989663362503052, "learning_rate": 8.108140947752126e-05, "loss": 1.6826, "step": 3124 }, { "epoch": 0.9485506146607983, "grad_norm": 0.40931862592697144, "learning_rate": 8.107533414337789e-05, "loss": 1.681, "step": 3125 }, { "epoch": 0.9488541508574898, "grad_norm": 0.44620081782341003, "learning_rate": 8.106925880923452e-05, "loss": 2.0738, "step": 3126 }, { "epoch": 0.9491576870541812, "grad_norm": 0.42712563276290894, "learning_rate": 8.106318347509114e-05, "loss": 2.078, "step": 3127 }, { "epoch": 0.9494612232508727, "grad_norm": 2.7110748291015625, "learning_rate": 8.105710814094775e-05, "loss": 1.8328, "step": 3128 }, { "epoch": 0.9497647594475641, "grad_norm": 0.40240269899368286, "learning_rate": 8.105103280680439e-05, "loss": 1.8519, "step": 3129 }, { "epoch": 0.9500682956442555, "grad_norm": 0.43927666544914246, "learning_rate": 8.1044957472661e-05, "loss": 1.7309, "step": 3130 }, { "epoch": 0.950371831840947, "grad_norm": 0.4225032925605774, "learning_rate": 8.103888213851762e-05, "loss": 1.9518, "step": 3131 }, { "epoch": 0.9506753680376385, "grad_norm": 0.4135547876358032, "learning_rate": 8.103280680437424e-05, "loss": 1.791, "step": 3132 }, { "epoch": 0.95097890423433, "grad_norm": 0.5137977004051208, "learning_rate": 8.102673147023087e-05, "loss": 1.8914, "step": 3133 }, { "epoch": 0.9512824404310214, "grad_norm": 0.44080087542533875, "learning_rate": 8.102065613608749e-05, "loss": 1.6005, "step": 3134 }, { "epoch": 0.9515859766277128, "grad_norm": 0.4912469983100891, "learning_rate": 8.10145808019441e-05, "loss": 1.7609, "step": 3135 }, { "epoch": 0.9518895128244043, "grad_norm": 0.6660062074661255, "learning_rate": 8.100850546780073e-05, "loss": 2.0835, "step": 3136 }, { "epoch": 0.9521930490210958, "grad_norm": 0.39112183451652527, "learning_rate": 8.100243013365736e-05, "loss": 1.9975, "step": 3137 }, { "epoch": 0.9524965852177872, "grad_norm": 0.41470736265182495, "learning_rate": 8.099635479951397e-05, "loss": 1.8165, "step": 3138 }, { "epoch": 0.9528001214144787, "grad_norm": 0.6125030517578125, "learning_rate": 8.09902794653706e-05, "loss": 1.4878, "step": 3139 }, { "epoch": 0.9531036576111701, "grad_norm": 0.3625620901584625, "learning_rate": 8.098420413122723e-05, "loss": 1.7098, "step": 3140 }, { "epoch": 0.9534071938078615, "grad_norm": 0.3737241327762604, "learning_rate": 8.097812879708383e-05, "loss": 1.8225, "step": 3141 }, { "epoch": 0.9537107300045531, "grad_norm": 0.4835364520549774, "learning_rate": 8.097205346294047e-05, "loss": 2.2708, "step": 3142 }, { "epoch": 0.9540142662012445, "grad_norm": 0.3605796694755554, "learning_rate": 8.09659781287971e-05, "loss": 2.1616, "step": 3143 }, { "epoch": 0.954317802397936, "grad_norm": 0.42037534713745117, "learning_rate": 8.095990279465371e-05, "loss": 1.8847, "step": 3144 }, { "epoch": 0.9546213385946274, "grad_norm": 0.4341660141944885, "learning_rate": 8.095382746051033e-05, "loss": 1.579, "step": 3145 }, { "epoch": 0.9549248747913188, "grad_norm": 0.39239785075187683, "learning_rate": 8.094775212636695e-05, "loss": 1.9024, "step": 3146 }, { "epoch": 0.9552284109880104, "grad_norm": 0.4219903349876404, "learning_rate": 8.094167679222358e-05, "loss": 1.7518, "step": 3147 }, { "epoch": 0.9555319471847018, "grad_norm": 0.36863937973976135, "learning_rate": 8.09356014580802e-05, "loss": 2.0326, "step": 3148 }, { "epoch": 0.9558354833813932, "grad_norm": 0.4089399576187134, "learning_rate": 8.092952612393681e-05, "loss": 1.9478, "step": 3149 }, { "epoch": 0.9561390195780847, "grad_norm": 0.3865533769130707, "learning_rate": 8.092345078979344e-05, "loss": 1.883, "step": 3150 }, { "epoch": 0.9564425557747761, "grad_norm": 0.3673511743545532, "learning_rate": 8.091737545565008e-05, "loss": 1.8116, "step": 3151 }, { "epoch": 0.9567460919714676, "grad_norm": 0.4296679198741913, "learning_rate": 8.091130012150668e-05, "loss": 1.891, "step": 3152 }, { "epoch": 0.9570496281681591, "grad_norm": 0.3618902266025543, "learning_rate": 8.090522478736331e-05, "loss": 1.8951, "step": 3153 }, { "epoch": 0.9573531643648505, "grad_norm": 0.3620889484882355, "learning_rate": 8.089914945321994e-05, "loss": 1.5316, "step": 3154 }, { "epoch": 0.957656700561542, "grad_norm": 0.4978037178516388, "learning_rate": 8.089307411907654e-05, "loss": 1.6175, "step": 3155 }, { "epoch": 0.9579602367582334, "grad_norm": 0.4385554790496826, "learning_rate": 8.088699878493318e-05, "loss": 1.8811, "step": 3156 }, { "epoch": 0.9582637729549248, "grad_norm": 0.42445600032806396, "learning_rate": 8.08809234507898e-05, "loss": 1.9388, "step": 3157 }, { "epoch": 0.9585673091516164, "grad_norm": 0.4952315092086792, "learning_rate": 8.087484811664642e-05, "loss": 2.0404, "step": 3158 }, { "epoch": 0.9588708453483078, "grad_norm": 0.3969573676586151, "learning_rate": 8.086877278250304e-05, "loss": 1.818, "step": 3159 }, { "epoch": 0.9591743815449992, "grad_norm": 0.41406628489494324, "learning_rate": 8.086269744835966e-05, "loss": 2.0014, "step": 3160 }, { "epoch": 0.9594779177416907, "grad_norm": 0.40631070733070374, "learning_rate": 8.085662211421629e-05, "loss": 1.7627, "step": 3161 }, { "epoch": 0.9597814539383821, "grad_norm": 0.41568198800086975, "learning_rate": 8.08505467800729e-05, "loss": 2.0605, "step": 3162 }, { "epoch": 0.9600849901350736, "grad_norm": 0.39019855856895447, "learning_rate": 8.084447144592952e-05, "loss": 2.3052, "step": 3163 }, { "epoch": 0.9603885263317651, "grad_norm": 0.42019182443618774, "learning_rate": 8.083839611178615e-05, "loss": 1.1618, "step": 3164 }, { "epoch": 0.9606920625284565, "grad_norm": 0.3448597192764282, "learning_rate": 8.083232077764277e-05, "loss": 1.1122, "step": 3165 }, { "epoch": 0.960995598725148, "grad_norm": 0.3484005928039551, "learning_rate": 8.082624544349939e-05, "loss": 1.3973, "step": 3166 }, { "epoch": 0.9612991349218394, "grad_norm": 0.3877616822719574, "learning_rate": 8.082017010935602e-05, "loss": 1.7281, "step": 3167 }, { "epoch": 0.9616026711185309, "grad_norm": 0.7124067544937134, "learning_rate": 8.081409477521265e-05, "loss": 1.7848, "step": 3168 }, { "epoch": 0.9619062073152224, "grad_norm": 0.4344068467617035, "learning_rate": 8.080801944106925e-05, "loss": 1.1598, "step": 3169 }, { "epoch": 0.9622097435119138, "grad_norm": 0.8230828046798706, "learning_rate": 8.080194410692589e-05, "loss": 1.7979, "step": 3170 }, { "epoch": 0.9625132797086052, "grad_norm": 0.45202380418777466, "learning_rate": 8.07958687727825e-05, "loss": 1.9827, "step": 3171 }, { "epoch": 0.9628168159052967, "grad_norm": 0.37519025802612305, "learning_rate": 8.078979343863913e-05, "loss": 1.9966, "step": 3172 }, { "epoch": 0.9631203521019882, "grad_norm": 0.42726776003837585, "learning_rate": 8.078371810449575e-05, "loss": 1.9923, "step": 3173 }, { "epoch": 0.9634238882986796, "grad_norm": 0.5753629207611084, "learning_rate": 8.077764277035237e-05, "loss": 1.695, "step": 3174 }, { "epoch": 0.9637274244953711, "grad_norm": 0.44009268283843994, "learning_rate": 8.0771567436209e-05, "loss": 1.5039, "step": 3175 }, { "epoch": 0.9640309606920625, "grad_norm": 0.42067059874534607, "learning_rate": 8.076549210206562e-05, "loss": 1.8273, "step": 3176 }, { "epoch": 0.964334496888754, "grad_norm": 0.44108089804649353, "learning_rate": 8.075941676792223e-05, "loss": 1.6372, "step": 3177 }, { "epoch": 0.9646380330854455, "grad_norm": 0.39648228883743286, "learning_rate": 8.075334143377886e-05, "loss": 2.1179, "step": 3178 }, { "epoch": 0.9649415692821369, "grad_norm": 0.45205631852149963, "learning_rate": 8.074726609963548e-05, "loss": 1.9005, "step": 3179 }, { "epoch": 0.9652451054788284, "grad_norm": 0.45935380458831787, "learning_rate": 8.07411907654921e-05, "loss": 1.7634, "step": 3180 }, { "epoch": 0.9655486416755198, "grad_norm": 0.3873693645000458, "learning_rate": 8.073511543134873e-05, "loss": 1.9629, "step": 3181 }, { "epoch": 0.9658521778722112, "grad_norm": 0.3731973469257355, "learning_rate": 8.072904009720536e-05, "loss": 2.0085, "step": 3182 }, { "epoch": 0.9661557140689028, "grad_norm": 0.45661619305610657, "learning_rate": 8.072296476306196e-05, "loss": 1.3693, "step": 3183 }, { "epoch": 0.9664592502655942, "grad_norm": 0.47569990158081055, "learning_rate": 8.07168894289186e-05, "loss": 1.6855, "step": 3184 }, { "epoch": 0.9667627864622856, "grad_norm": 0.4035504460334778, "learning_rate": 8.071081409477521e-05, "loss": 2.1902, "step": 3185 }, { "epoch": 0.9670663226589771, "grad_norm": 0.6134029030799866, "learning_rate": 8.070473876063184e-05, "loss": 2.0059, "step": 3186 }, { "epoch": 0.9673698588556685, "grad_norm": 0.5165479779243469, "learning_rate": 8.069866342648846e-05, "loss": 1.6179, "step": 3187 }, { "epoch": 0.9676733950523599, "grad_norm": 0.48403364419937134, "learning_rate": 8.069258809234508e-05, "loss": 2.1823, "step": 3188 }, { "epoch": 0.9679769312490515, "grad_norm": 0.4141898453235626, "learning_rate": 8.068651275820171e-05, "loss": 1.7823, "step": 3189 }, { "epoch": 0.9682804674457429, "grad_norm": 0.4937414228916168, "learning_rate": 8.068043742405833e-05, "loss": 1.5423, "step": 3190 }, { "epoch": 0.9685840036424344, "grad_norm": 0.39996278285980225, "learning_rate": 8.067436208991494e-05, "loss": 1.8201, "step": 3191 }, { "epoch": 0.9688875398391258, "grad_norm": 0.4205084443092346, "learning_rate": 8.066828675577157e-05, "loss": 2.1392, "step": 3192 }, { "epoch": 0.9691910760358172, "grad_norm": 0.43701469898223877, "learning_rate": 8.066221142162819e-05, "loss": 1.8178, "step": 3193 }, { "epoch": 0.9694946122325088, "grad_norm": 0.45265719294548035, "learning_rate": 8.065613608748481e-05, "loss": 1.6506, "step": 3194 }, { "epoch": 0.9697981484292002, "grad_norm": 0.43316105008125305, "learning_rate": 8.065006075334144e-05, "loss": 1.6709, "step": 3195 }, { "epoch": 0.9701016846258916, "grad_norm": 0.37833353877067566, "learning_rate": 8.064398541919807e-05, "loss": 1.4646, "step": 3196 }, { "epoch": 0.9704052208225831, "grad_norm": 0.444698691368103, "learning_rate": 8.063791008505467e-05, "loss": 1.6426, "step": 3197 }, { "epoch": 0.9707087570192745, "grad_norm": 0.43853360414505005, "learning_rate": 8.06318347509113e-05, "loss": 2.1562, "step": 3198 }, { "epoch": 0.971012293215966, "grad_norm": 0.6573916673660278, "learning_rate": 8.062575941676792e-05, "loss": 2.1327, "step": 3199 }, { "epoch": 0.9713158294126575, "grad_norm": 0.41661205887794495, "learning_rate": 8.061968408262455e-05, "loss": 1.68, "step": 3200 }, { "epoch": 0.9716193656093489, "grad_norm": 0.7264708876609802, "learning_rate": 8.061360874848117e-05, "loss": 1.8907, "step": 3201 }, { "epoch": 0.9719229018060404, "grad_norm": 0.3977676331996918, "learning_rate": 8.060753341433779e-05, "loss": 1.9205, "step": 3202 }, { "epoch": 0.9722264380027318, "grad_norm": 0.7518191337585449, "learning_rate": 8.060145808019442e-05, "loss": 1.4833, "step": 3203 }, { "epoch": 0.9725299741994233, "grad_norm": 0.4262489080429077, "learning_rate": 8.059538274605104e-05, "loss": 1.8307, "step": 3204 }, { "epoch": 0.9728335103961148, "grad_norm": 0.40081748366355896, "learning_rate": 8.058930741190765e-05, "loss": 0.9998, "step": 3205 }, { "epoch": 0.9731370465928062, "grad_norm": 0.46323978900909424, "learning_rate": 8.058323207776428e-05, "loss": 1.9063, "step": 3206 }, { "epoch": 0.9734405827894976, "grad_norm": 0.40302255749702454, "learning_rate": 8.05771567436209e-05, "loss": 1.8153, "step": 3207 }, { "epoch": 0.9737441189861891, "grad_norm": 0.4447222948074341, "learning_rate": 8.057108140947752e-05, "loss": 2.0744, "step": 3208 }, { "epoch": 0.9740476551828806, "grad_norm": 0.33706068992614746, "learning_rate": 8.056500607533415e-05, "loss": 1.7981, "step": 3209 }, { "epoch": 0.974351191379572, "grad_norm": 0.44239774346351624, "learning_rate": 8.055893074119078e-05, "loss": 1.7168, "step": 3210 }, { "epoch": 0.9746547275762635, "grad_norm": 0.41518473625183105, "learning_rate": 8.055285540704738e-05, "loss": 1.9278, "step": 3211 }, { "epoch": 0.9749582637729549, "grad_norm": 0.4727713167667389, "learning_rate": 8.054678007290402e-05, "loss": 1.6719, "step": 3212 }, { "epoch": 0.9752617999696463, "grad_norm": 0.4056665599346161, "learning_rate": 8.054070473876063e-05, "loss": 1.7211, "step": 3213 }, { "epoch": 0.9755653361663378, "grad_norm": 0.47795236110687256, "learning_rate": 8.053462940461726e-05, "loss": 1.5074, "step": 3214 }, { "epoch": 0.9758688723630293, "grad_norm": 0.5859802961349487, "learning_rate": 8.052855407047388e-05, "loss": 1.7038, "step": 3215 }, { "epoch": 0.9761724085597208, "grad_norm": 0.398113876581192, "learning_rate": 8.05224787363305e-05, "loss": 1.7802, "step": 3216 }, { "epoch": 0.9764759447564122, "grad_norm": 0.3661412000656128, "learning_rate": 8.051640340218713e-05, "loss": 1.4388, "step": 3217 }, { "epoch": 0.9767794809531036, "grad_norm": 0.40348801016807556, "learning_rate": 8.051032806804375e-05, "loss": 1.8187, "step": 3218 }, { "epoch": 0.9770830171497951, "grad_norm": 0.3885161876678467, "learning_rate": 8.050425273390036e-05, "loss": 1.7975, "step": 3219 }, { "epoch": 0.9773865533464866, "grad_norm": 0.3739737868309021, "learning_rate": 8.0498177399757e-05, "loss": 1.7525, "step": 3220 }, { "epoch": 0.977690089543178, "grad_norm": 0.48323333263397217, "learning_rate": 8.049210206561361e-05, "loss": 1.3203, "step": 3221 }, { "epoch": 0.9779936257398695, "grad_norm": 0.3983123004436493, "learning_rate": 8.048602673147023e-05, "loss": 1.994, "step": 3222 }, { "epoch": 0.9782971619365609, "grad_norm": 0.4193548560142517, "learning_rate": 8.047995139732686e-05, "loss": 1.811, "step": 3223 }, { "epoch": 0.9786006981332523, "grad_norm": 0.353444367647171, "learning_rate": 8.047387606318349e-05, "loss": 1.7789, "step": 3224 }, { "epoch": 0.9789042343299439, "grad_norm": 0.5068827867507935, "learning_rate": 8.04678007290401e-05, "loss": 1.9425, "step": 3225 }, { "epoch": 0.9792077705266353, "grad_norm": 0.42118749022483826, "learning_rate": 8.046172539489673e-05, "loss": 1.8055, "step": 3226 }, { "epoch": 0.9795113067233268, "grad_norm": 0.4077788293361664, "learning_rate": 8.045565006075334e-05, "loss": 1.3257, "step": 3227 }, { "epoch": 0.9798148429200182, "grad_norm": 0.45757341384887695, "learning_rate": 8.044957472660996e-05, "loss": 1.998, "step": 3228 }, { "epoch": 0.9801183791167096, "grad_norm": 0.4154861271381378, "learning_rate": 8.044349939246659e-05, "loss": 1.8151, "step": 3229 }, { "epoch": 0.9804219153134012, "grad_norm": 0.48109593987464905, "learning_rate": 8.043742405832321e-05, "loss": 1.7802, "step": 3230 }, { "epoch": 0.9807254515100926, "grad_norm": 0.4576222002506256, "learning_rate": 8.043134872417984e-05, "loss": 1.7928, "step": 3231 }, { "epoch": 0.981028987706784, "grad_norm": 0.38525086641311646, "learning_rate": 8.042527339003646e-05, "loss": 1.4135, "step": 3232 }, { "epoch": 0.9813325239034755, "grad_norm": 0.3866974115371704, "learning_rate": 8.041919805589307e-05, "loss": 1.9068, "step": 3233 }, { "epoch": 0.9816360601001669, "grad_norm": 0.421790212392807, "learning_rate": 8.04131227217497e-05, "loss": 1.8243, "step": 3234 }, { "epoch": 0.9819395962968585, "grad_norm": 0.5579865574836731, "learning_rate": 8.040704738760632e-05, "loss": 1.8662, "step": 3235 }, { "epoch": 0.9822431324935499, "grad_norm": 0.5178837180137634, "learning_rate": 8.040097205346294e-05, "loss": 1.815, "step": 3236 }, { "epoch": 0.9825466686902413, "grad_norm": 0.3817935883998871, "learning_rate": 8.039489671931957e-05, "loss": 2.2422, "step": 3237 }, { "epoch": 0.9828502048869328, "grad_norm": 0.4080420136451721, "learning_rate": 8.038882138517619e-05, "loss": 2.0504, "step": 3238 }, { "epoch": 0.9831537410836242, "grad_norm": 0.3719751536846161, "learning_rate": 8.03827460510328e-05, "loss": 1.9778, "step": 3239 }, { "epoch": 0.9834572772803156, "grad_norm": 0.36413270235061646, "learning_rate": 8.037667071688944e-05, "loss": 1.3326, "step": 3240 }, { "epoch": 0.9837608134770072, "grad_norm": 0.3482026755809784, "learning_rate": 8.037059538274605e-05, "loss": 1.7243, "step": 3241 }, { "epoch": 0.9840643496736986, "grad_norm": 0.3891375958919525, "learning_rate": 8.036452004860267e-05, "loss": 1.8408, "step": 3242 }, { "epoch": 0.98436788587039, "grad_norm": 0.4400385618209839, "learning_rate": 8.03584447144593e-05, "loss": 1.967, "step": 3243 }, { "epoch": 0.9846714220670815, "grad_norm": 0.3769470751285553, "learning_rate": 8.035236938031592e-05, "loss": 1.3612, "step": 3244 }, { "epoch": 0.9849749582637729, "grad_norm": 0.39424487948417664, "learning_rate": 8.034629404617255e-05, "loss": 1.8445, "step": 3245 }, { "epoch": 0.9852784944604644, "grad_norm": 0.4074876308441162, "learning_rate": 8.034021871202917e-05, "loss": 1.9461, "step": 3246 }, { "epoch": 0.9855820306571559, "grad_norm": 0.4052838683128357, "learning_rate": 8.033414337788578e-05, "loss": 1.6049, "step": 3247 }, { "epoch": 0.9858855668538473, "grad_norm": 0.4411472678184509, "learning_rate": 8.032806804374241e-05, "loss": 2.0511, "step": 3248 }, { "epoch": 0.9861891030505388, "grad_norm": 0.37311851978302, "learning_rate": 8.032199270959903e-05, "loss": 1.9176, "step": 3249 }, { "epoch": 0.9864926392472302, "grad_norm": 0.3146267235279083, "learning_rate": 8.031591737545565e-05, "loss": 1.813, "step": 3250 }, { "epoch": 0.9867961754439217, "grad_norm": 0.5194718241691589, "learning_rate": 8.030984204131228e-05, "loss": 2.0425, "step": 3251 }, { "epoch": 0.9870997116406132, "grad_norm": 0.37070557475090027, "learning_rate": 8.03037667071689e-05, "loss": 1.8255, "step": 3252 }, { "epoch": 0.9874032478373046, "grad_norm": 0.37021851539611816, "learning_rate": 8.029769137302551e-05, "loss": 2.2163, "step": 3253 }, { "epoch": 0.987706784033996, "grad_norm": 0.5118260979652405, "learning_rate": 8.029161603888215e-05, "loss": 1.4771, "step": 3254 }, { "epoch": 0.9880103202306875, "grad_norm": 0.4722789227962494, "learning_rate": 8.028554070473876e-05, "loss": 1.9019, "step": 3255 }, { "epoch": 0.988313856427379, "grad_norm": 0.3468252420425415, "learning_rate": 8.027946537059538e-05, "loss": 1.7159, "step": 3256 }, { "epoch": 0.9886173926240704, "grad_norm": 0.4422720968723297, "learning_rate": 8.027339003645201e-05, "loss": 1.8671, "step": 3257 }, { "epoch": 0.9889209288207619, "grad_norm": 0.46859246492385864, "learning_rate": 8.026731470230863e-05, "loss": 2.0586, "step": 3258 }, { "epoch": 0.9892244650174533, "grad_norm": 0.46339279413223267, "learning_rate": 8.026123936816526e-05, "loss": 1.7544, "step": 3259 }, { "epoch": 0.9895280012141447, "grad_norm": 0.3819115161895752, "learning_rate": 8.025516403402188e-05, "loss": 1.9807, "step": 3260 }, { "epoch": 0.9898315374108363, "grad_norm": 0.8981953263282776, "learning_rate": 8.02490886998785e-05, "loss": 1.5445, "step": 3261 }, { "epoch": 0.9901350736075277, "grad_norm": 0.4964045584201813, "learning_rate": 8.024301336573512e-05, "loss": 1.6069, "step": 3262 }, { "epoch": 0.9904386098042192, "grad_norm": 0.4120222330093384, "learning_rate": 8.023693803159174e-05, "loss": 1.8865, "step": 3263 }, { "epoch": 0.9907421460009106, "grad_norm": 0.44508838653564453, "learning_rate": 8.023086269744836e-05, "loss": 1.4333, "step": 3264 }, { "epoch": 0.991045682197602, "grad_norm": 0.3859883248806, "learning_rate": 8.022478736330499e-05, "loss": 1.9533, "step": 3265 }, { "epoch": 0.9913492183942936, "grad_norm": 0.4727214276790619, "learning_rate": 8.021871202916161e-05, "loss": 1.4972, "step": 3266 }, { "epoch": 0.991652754590985, "grad_norm": 0.8692718148231506, "learning_rate": 8.021263669501822e-05, "loss": 1.5174, "step": 3267 }, { "epoch": 0.9919562907876764, "grad_norm": 0.4142051637172699, "learning_rate": 8.020656136087486e-05, "loss": 1.8747, "step": 3268 }, { "epoch": 0.9922598269843679, "grad_norm": 0.4075202941894531, "learning_rate": 8.020048602673147e-05, "loss": 1.9666, "step": 3269 }, { "epoch": 0.9925633631810593, "grad_norm": 0.7073702216148376, "learning_rate": 8.019441069258809e-05, "loss": 1.9629, "step": 3270 }, { "epoch": 0.9928668993777507, "grad_norm": 0.4240557849407196, "learning_rate": 8.018833535844472e-05, "loss": 1.6251, "step": 3271 }, { "epoch": 0.9931704355744423, "grad_norm": 0.4226653277873993, "learning_rate": 8.018226002430134e-05, "loss": 1.9254, "step": 3272 }, { "epoch": 0.9934739717711337, "grad_norm": 0.40740150213241577, "learning_rate": 8.017618469015797e-05, "loss": 1.8494, "step": 3273 }, { "epoch": 0.9937775079678252, "grad_norm": 0.4575270712375641, "learning_rate": 8.017010935601459e-05, "loss": 1.5237, "step": 3274 }, { "epoch": 0.9940810441645166, "grad_norm": 0.48337459564208984, "learning_rate": 8.01640340218712e-05, "loss": 1.5319, "step": 3275 }, { "epoch": 0.994384580361208, "grad_norm": 0.3628256916999817, "learning_rate": 8.015795868772783e-05, "loss": 2.0454, "step": 3276 }, { "epoch": 0.9946881165578996, "grad_norm": 0.3945721387863159, "learning_rate": 8.015188335358445e-05, "loss": 2.0646, "step": 3277 }, { "epoch": 0.994991652754591, "grad_norm": 0.38345763087272644, "learning_rate": 8.014580801944107e-05, "loss": 1.4601, "step": 3278 }, { "epoch": 0.9952951889512824, "grad_norm": 0.47283461689949036, "learning_rate": 8.01397326852977e-05, "loss": 1.6951, "step": 3279 }, { "epoch": 0.9955987251479739, "grad_norm": 0.47353407740592957, "learning_rate": 8.013365735115432e-05, "loss": 2.0192, "step": 3280 }, { "epoch": 0.9959022613446653, "grad_norm": 0.41565829515457153, "learning_rate": 8.012758201701093e-05, "loss": 1.8168, "step": 3281 }, { "epoch": 0.9962057975413569, "grad_norm": 0.43817979097366333, "learning_rate": 8.012150668286757e-05, "loss": 2.1453, "step": 3282 }, { "epoch": 0.9965093337380483, "grad_norm": 0.5480432510375977, "learning_rate": 8.011543134872418e-05, "loss": 1.6872, "step": 3283 }, { "epoch": 0.9968128699347397, "grad_norm": 0.344694584608078, "learning_rate": 8.01093560145808e-05, "loss": 1.0506, "step": 3284 }, { "epoch": 0.9971164061314312, "grad_norm": 0.39683830738067627, "learning_rate": 8.010328068043743e-05, "loss": 1.9114, "step": 3285 }, { "epoch": 0.9974199423281226, "grad_norm": 0.865807294845581, "learning_rate": 8.009720534629405e-05, "loss": 2.0548, "step": 3286 }, { "epoch": 0.9977234785248141, "grad_norm": 0.4596058130264282, "learning_rate": 8.009113001215068e-05, "loss": 1.5162, "step": 3287 }, { "epoch": 0.9980270147215056, "grad_norm": 0.45966169238090515, "learning_rate": 8.00850546780073e-05, "loss": 1.9112, "step": 3288 }, { "epoch": 0.998330550918197, "grad_norm": 0.44408029317855835, "learning_rate": 8.007897934386391e-05, "loss": 1.7549, "step": 3289 }, { "epoch": 0.9986340871148884, "grad_norm": 0.4286332428455353, "learning_rate": 8.007290400972054e-05, "loss": 1.9614, "step": 3290 }, { "epoch": 0.9989376233115799, "grad_norm": 0.40551066398620605, "learning_rate": 8.006682867557716e-05, "loss": 1.9918, "step": 3291 }, { "epoch": 0.9992411595082714, "grad_norm": 0.41468697786331177, "learning_rate": 8.006075334143378e-05, "loss": 1.9249, "step": 3292 }, { "epoch": 0.9995446957049628, "grad_norm": 0.506384551525116, "learning_rate": 8.005467800729041e-05, "loss": 1.9334, "step": 3293 }, { "epoch": 0.9998482319016543, "grad_norm": 0.4209151268005371, "learning_rate": 8.004860267314703e-05, "loss": 1.728, "step": 3294 }, { "epoch": 1.0001517680983458, "grad_norm": 17.907875061035156, "learning_rate": 8.004252733900364e-05, "loss": 2.4794, "step": 3295 }, { "epoch": 1.0004553042950373, "grad_norm": 0.46547284722328186, "learning_rate": 8.003645200486028e-05, "loss": 1.4745, "step": 3296 }, { "epoch": 1.0007588404917287, "grad_norm": 0.3899800777435303, "learning_rate": 8.003037667071689e-05, "loss": 1.5406, "step": 3297 }, { "epoch": 1.0010623766884201, "grad_norm": 0.48273205757141113, "learning_rate": 8.002430133657351e-05, "loss": 1.4638, "step": 3298 }, { "epoch": 1.0013659128851116, "grad_norm": 0.36288753151893616, "learning_rate": 8.001822600243014e-05, "loss": 1.8582, "step": 3299 }, { "epoch": 1.001669449081803, "grad_norm": 0.4598756432533264, "learning_rate": 8.001215066828676e-05, "loss": 1.1343, "step": 3300 }, { "epoch": 1.0019729852784944, "grad_norm": 0.4313514530658722, "learning_rate": 8.000607533414338e-05, "loss": 1.6411, "step": 3301 }, { "epoch": 1.0022765214751859, "grad_norm": 0.5020793676376343, "learning_rate": 8e-05, "loss": 1.8333, "step": 3302 }, { "epoch": 1.0025800576718773, "grad_norm": 0.7939902544021606, "learning_rate": 7.999392466585662e-05, "loss": 1.0223, "step": 3303 }, { "epoch": 1.0028835938685687, "grad_norm": 0.42683956027030945, "learning_rate": 7.998784933171325e-05, "loss": 1.6555, "step": 3304 }, { "epoch": 1.0031871300652604, "grad_norm": 1.1804200410842896, "learning_rate": 7.998177399756987e-05, "loss": 1.2333, "step": 3305 }, { "epoch": 1.0034906662619518, "grad_norm": 0.4585864543914795, "learning_rate": 7.997569866342649e-05, "loss": 1.7396, "step": 3306 }, { "epoch": 1.0037942024586433, "grad_norm": 0.6374893188476562, "learning_rate": 7.996962332928312e-05, "loss": 1.4289, "step": 3307 }, { "epoch": 1.0040977386553347, "grad_norm": 0.7758880853652954, "learning_rate": 7.996354799513974e-05, "loss": 1.2918, "step": 3308 }, { "epoch": 1.0044012748520261, "grad_norm": 0.7899906039237976, "learning_rate": 7.995747266099635e-05, "loss": 0.9088, "step": 3309 }, { "epoch": 1.0047048110487176, "grad_norm": 0.5590714812278748, "learning_rate": 7.995139732685299e-05, "loss": 1.2513, "step": 3310 }, { "epoch": 1.005008347245409, "grad_norm": 0.49430859088897705, "learning_rate": 7.99453219927096e-05, "loss": 1.5882, "step": 3311 }, { "epoch": 1.0053118834421004, "grad_norm": 0.4428652822971344, "learning_rate": 7.993924665856622e-05, "loss": 1.6225, "step": 3312 }, { "epoch": 1.0056154196387919, "grad_norm": 1.8553460836410522, "learning_rate": 7.993317132442285e-05, "loss": 1.3601, "step": 3313 }, { "epoch": 1.0059189558354833, "grad_norm": 0.5211709141731262, "learning_rate": 7.992709599027947e-05, "loss": 1.8398, "step": 3314 }, { "epoch": 1.0062224920321747, "grad_norm": 0.7685166001319885, "learning_rate": 7.992102065613609e-05, "loss": 1.5013, "step": 3315 }, { "epoch": 1.0065260282288664, "grad_norm": 0.4375928044319153, "learning_rate": 7.991494532199272e-05, "loss": 1.8564, "step": 3316 }, { "epoch": 1.0068295644255578, "grad_norm": 0.44753187894821167, "learning_rate": 7.990886998784933e-05, "loss": 1.3722, "step": 3317 }, { "epoch": 1.0071331006222493, "grad_norm": 0.48083680868148804, "learning_rate": 7.990279465370596e-05, "loss": 1.4704, "step": 3318 }, { "epoch": 1.0074366368189407, "grad_norm": 0.3680810332298279, "learning_rate": 7.989671931956258e-05, "loss": 1.2053, "step": 3319 }, { "epoch": 1.0077401730156321, "grad_norm": 0.37688201665878296, "learning_rate": 7.98906439854192e-05, "loss": 1.9585, "step": 3320 }, { "epoch": 1.0080437092123236, "grad_norm": 0.4439717233181, "learning_rate": 7.988456865127583e-05, "loss": 1.6347, "step": 3321 }, { "epoch": 1.008347245409015, "grad_norm": 0.44323423504829407, "learning_rate": 7.987849331713245e-05, "loss": 1.7071, "step": 3322 }, { "epoch": 1.0086507816057064, "grad_norm": 0.44141215085983276, "learning_rate": 7.987241798298906e-05, "loss": 1.5686, "step": 3323 }, { "epoch": 1.0089543178023979, "grad_norm": 0.3377261459827423, "learning_rate": 7.98663426488457e-05, "loss": 1.0637, "step": 3324 }, { "epoch": 1.0092578539990893, "grad_norm": 0.679061770439148, "learning_rate": 7.986026731470231e-05, "loss": 1.6332, "step": 3325 }, { "epoch": 1.009561390195781, "grad_norm": 0.40934574604034424, "learning_rate": 7.985419198055893e-05, "loss": 1.591, "step": 3326 }, { "epoch": 1.0098649263924724, "grad_norm": 0.4708541929721832, "learning_rate": 7.984811664641556e-05, "loss": 1.4567, "step": 3327 }, { "epoch": 1.0101684625891638, "grad_norm": 0.4251214861869812, "learning_rate": 7.984204131227218e-05, "loss": 1.7905, "step": 3328 }, { "epoch": 1.0104719987858553, "grad_norm": 0.48691290616989136, "learning_rate": 7.98359659781288e-05, "loss": 1.8187, "step": 3329 }, { "epoch": 1.0107755349825467, "grad_norm": 0.4369681179523468, "learning_rate": 7.982989064398543e-05, "loss": 1.5784, "step": 3330 }, { "epoch": 1.0110790711792381, "grad_norm": 0.47362881898880005, "learning_rate": 7.982381530984204e-05, "loss": 1.2642, "step": 3331 }, { "epoch": 1.0113826073759296, "grad_norm": 0.4974597096443176, "learning_rate": 7.981773997569867e-05, "loss": 1.6925, "step": 3332 }, { "epoch": 1.011686143572621, "grad_norm": 0.46564406156539917, "learning_rate": 7.981166464155529e-05, "loss": 1.6209, "step": 3333 }, { "epoch": 1.0119896797693124, "grad_norm": 0.4477474093437195, "learning_rate": 7.980558930741191e-05, "loss": 1.8006, "step": 3334 }, { "epoch": 1.0122932159660039, "grad_norm": 0.4635123312473297, "learning_rate": 7.979951397326854e-05, "loss": 1.818, "step": 3335 }, { "epoch": 1.0125967521626955, "grad_norm": 0.43166083097457886, "learning_rate": 7.979343863912516e-05, "loss": 1.3599, "step": 3336 }, { "epoch": 1.012900288359387, "grad_norm": 0.39611899852752686, "learning_rate": 7.978736330498177e-05, "loss": 1.4963, "step": 3337 }, { "epoch": 1.0132038245560784, "grad_norm": 0.917677104473114, "learning_rate": 7.97812879708384e-05, "loss": 1.3816, "step": 3338 }, { "epoch": 1.0135073607527698, "grad_norm": 0.4772632420063019, "learning_rate": 7.977521263669502e-05, "loss": 1.699, "step": 3339 }, { "epoch": 1.0138108969494612, "grad_norm": 0.5998721122741699, "learning_rate": 7.976913730255164e-05, "loss": 1.5147, "step": 3340 }, { "epoch": 1.0141144331461527, "grad_norm": 0.47984611988067627, "learning_rate": 7.976306196840827e-05, "loss": 1.4805, "step": 3341 }, { "epoch": 1.0144179693428441, "grad_norm": 0.4247418940067291, "learning_rate": 7.975698663426489e-05, "loss": 1.3944, "step": 3342 }, { "epoch": 1.0147215055395356, "grad_norm": 0.5164505839347839, "learning_rate": 7.97509113001215e-05, "loss": 1.4708, "step": 3343 }, { "epoch": 1.015025041736227, "grad_norm": 0.43840450048446655, "learning_rate": 7.974483596597814e-05, "loss": 1.5382, "step": 3344 }, { "epoch": 1.0153285779329184, "grad_norm": 0.49245715141296387, "learning_rate": 7.973876063183475e-05, "loss": 1.6582, "step": 3345 }, { "epoch": 1.0156321141296099, "grad_norm": 0.6701889634132385, "learning_rate": 7.973268529769138e-05, "loss": 1.8984, "step": 3346 }, { "epoch": 1.0159356503263015, "grad_norm": 0.4831668734550476, "learning_rate": 7.972660996354799e-05, "loss": 1.4743, "step": 3347 }, { "epoch": 1.016239186522993, "grad_norm": 0.4388216733932495, "learning_rate": 7.972053462940462e-05, "loss": 1.5717, "step": 3348 }, { "epoch": 1.0165427227196844, "grad_norm": 0.3998357951641083, "learning_rate": 7.971445929526125e-05, "loss": 1.6379, "step": 3349 }, { "epoch": 1.0168462589163758, "grad_norm": 0.4894062876701355, "learning_rate": 7.970838396111785e-05, "loss": 1.3522, "step": 3350 }, { "epoch": 1.0171497951130672, "grad_norm": 0.6286391019821167, "learning_rate": 7.970230862697448e-05, "loss": 1.5174, "step": 3351 }, { "epoch": 1.0174533313097587, "grad_norm": 0.688150942325592, "learning_rate": 7.969623329283112e-05, "loss": 1.8916, "step": 3352 }, { "epoch": 1.0177568675064501, "grad_norm": 0.39143821597099304, "learning_rate": 7.969015795868773e-05, "loss": 1.7765, "step": 3353 }, { "epoch": 1.0180604037031415, "grad_norm": 0.5299899578094482, "learning_rate": 7.968408262454435e-05, "loss": 1.5828, "step": 3354 }, { "epoch": 1.018363939899833, "grad_norm": 0.43358203768730164, "learning_rate": 7.967800729040098e-05, "loss": 1.4084, "step": 3355 }, { "epoch": 1.0186674760965244, "grad_norm": 0.43103456497192383, "learning_rate": 7.96719319562576e-05, "loss": 1.5633, "step": 3356 }, { "epoch": 1.018971012293216, "grad_norm": 0.4097878634929657, "learning_rate": 7.966585662211422e-05, "loss": 1.3976, "step": 3357 }, { "epoch": 1.0192745484899075, "grad_norm": 0.47395214438438416, "learning_rate": 7.965978128797085e-05, "loss": 1.6707, "step": 3358 }, { "epoch": 1.019578084686599, "grad_norm": 0.6641651391983032, "learning_rate": 7.965370595382746e-05, "loss": 1.8728, "step": 3359 }, { "epoch": 1.0198816208832904, "grad_norm": 0.48009195923805237, "learning_rate": 7.96476306196841e-05, "loss": 1.675, "step": 3360 }, { "epoch": 1.0201851570799818, "grad_norm": 0.430106520652771, "learning_rate": 7.96415552855407e-05, "loss": 1.7039, "step": 3361 }, { "epoch": 1.0204886932766732, "grad_norm": 0.42592278122901917, "learning_rate": 7.963547995139733e-05, "loss": 1.5168, "step": 3362 }, { "epoch": 1.0207922294733647, "grad_norm": 0.5778846144676208, "learning_rate": 7.962940461725396e-05, "loss": 1.0481, "step": 3363 }, { "epoch": 1.021095765670056, "grad_norm": 0.4378105103969574, "learning_rate": 7.962332928311056e-05, "loss": 1.9381, "step": 3364 }, { "epoch": 1.0213993018667475, "grad_norm": 0.4664958715438843, "learning_rate": 7.96172539489672e-05, "loss": 1.6863, "step": 3365 }, { "epoch": 1.021702838063439, "grad_norm": 0.455496609210968, "learning_rate": 7.961117861482383e-05, "loss": 1.7554, "step": 3366 }, { "epoch": 1.0220063742601304, "grad_norm": 0.5868107676506042, "learning_rate": 7.960510328068044e-05, "loss": 1.9657, "step": 3367 }, { "epoch": 1.022309910456822, "grad_norm": 0.5736465454101562, "learning_rate": 7.959902794653706e-05, "loss": 1.2157, "step": 3368 }, { "epoch": 1.0226134466535135, "grad_norm": 0.3856525421142578, "learning_rate": 7.959295261239369e-05, "loss": 1.7684, "step": 3369 }, { "epoch": 1.022916982850205, "grad_norm": 0.5012997388839722, "learning_rate": 7.958687727825031e-05, "loss": 1.5705, "step": 3370 }, { "epoch": 1.0232205190468964, "grad_norm": 0.4648292362689972, "learning_rate": 7.958080194410693e-05, "loss": 1.9879, "step": 3371 }, { "epoch": 1.0235240552435878, "grad_norm": 0.39332127571105957, "learning_rate": 7.957472660996356e-05, "loss": 1.741, "step": 3372 }, { "epoch": 1.0238275914402792, "grad_norm": 0.4548643231391907, "learning_rate": 7.956865127582017e-05, "loss": 1.6544, "step": 3373 }, { "epoch": 1.0241311276369707, "grad_norm": 0.36641523241996765, "learning_rate": 7.956257594167679e-05, "loss": 1.2098, "step": 3374 }, { "epoch": 1.024434663833662, "grad_norm": 0.46462637186050415, "learning_rate": 7.955650060753341e-05, "loss": 1.3876, "step": 3375 }, { "epoch": 1.0247382000303535, "grad_norm": 0.9742159247398376, "learning_rate": 7.955042527339004e-05, "loss": 1.8654, "step": 3376 }, { "epoch": 1.025041736227045, "grad_norm": 0.5226752758026123, "learning_rate": 7.954434993924667e-05, "loss": 1.1396, "step": 3377 }, { "epoch": 1.0253452724237366, "grad_norm": 0.43976494669914246, "learning_rate": 7.953827460510327e-05, "loss": 1.7537, "step": 3378 }, { "epoch": 1.025648808620428, "grad_norm": 0.4897270202636719, "learning_rate": 7.95321992709599e-05, "loss": 1.382, "step": 3379 }, { "epoch": 1.0259523448171195, "grad_norm": 0.42977437376976013, "learning_rate": 7.952612393681654e-05, "loss": 1.69, "step": 3380 }, { "epoch": 1.026255881013811, "grad_norm": 0.4650570750236511, "learning_rate": 7.952004860267315e-05, "loss": 1.5666, "step": 3381 }, { "epoch": 1.0265594172105024, "grad_norm": 0.5345761179924011, "learning_rate": 7.951397326852977e-05, "loss": 1.5281, "step": 3382 }, { "epoch": 1.0268629534071938, "grad_norm": 0.43827125430107117, "learning_rate": 7.95078979343864e-05, "loss": 1.8579, "step": 3383 }, { "epoch": 1.0271664896038852, "grad_norm": 0.4599241614341736, "learning_rate": 7.950182260024302e-05, "loss": 1.5928, "step": 3384 }, { "epoch": 1.0274700258005767, "grad_norm": 1.1530771255493164, "learning_rate": 7.949574726609964e-05, "loss": 1.5092, "step": 3385 }, { "epoch": 1.027773561997268, "grad_norm": 0.48699623346328735, "learning_rate": 7.948967193195627e-05, "loss": 1.7638, "step": 3386 }, { "epoch": 1.0280770981939595, "grad_norm": 0.5288783311843872, "learning_rate": 7.948359659781288e-05, "loss": 1.7314, "step": 3387 }, { "epoch": 1.0283806343906512, "grad_norm": 0.4574908912181854, "learning_rate": 7.94775212636695e-05, "loss": 1.9344, "step": 3388 }, { "epoch": 1.0286841705873426, "grad_norm": 0.47413721680641174, "learning_rate": 7.947144592952612e-05, "loss": 1.3177, "step": 3389 }, { "epoch": 1.028987706784034, "grad_norm": 0.46366703510284424, "learning_rate": 7.946537059538275e-05, "loss": 1.8695, "step": 3390 }, { "epoch": 1.0292912429807255, "grad_norm": 0.4010477662086487, "learning_rate": 7.945929526123938e-05, "loss": 1.623, "step": 3391 }, { "epoch": 1.029594779177417, "grad_norm": 0.501057505607605, "learning_rate": 7.945321992709598e-05, "loss": 1.6748, "step": 3392 }, { "epoch": 1.0298983153741084, "grad_norm": 0.4147251546382904, "learning_rate": 7.944714459295262e-05, "loss": 1.76, "step": 3393 }, { "epoch": 1.0302018515707998, "grad_norm": 0.5023919939994812, "learning_rate": 7.944106925880925e-05, "loss": 1.7666, "step": 3394 }, { "epoch": 1.0305053877674912, "grad_norm": 0.4336966574192047, "learning_rate": 7.943499392466586e-05, "loss": 1.8498, "step": 3395 }, { "epoch": 1.0308089239641827, "grad_norm": 0.50406813621521, "learning_rate": 7.942891859052248e-05, "loss": 1.0032, "step": 3396 }, { "epoch": 1.031112460160874, "grad_norm": 0.5218415856361389, "learning_rate": 7.942284325637911e-05, "loss": 1.7252, "step": 3397 }, { "epoch": 1.0314159963575655, "grad_norm": 0.5142799019813538, "learning_rate": 7.941676792223573e-05, "loss": 1.9791, "step": 3398 }, { "epoch": 1.0317195325542572, "grad_norm": 0.5369110107421875, "learning_rate": 7.941069258809235e-05, "loss": 1.4356, "step": 3399 }, { "epoch": 1.0320230687509486, "grad_norm": 0.4954996109008789, "learning_rate": 7.940461725394898e-05, "loss": 1.665, "step": 3400 }, { "epoch": 1.03232660494764, "grad_norm": 0.5331052541732788, "learning_rate": 7.93985419198056e-05, "loss": 1.5129, "step": 3401 }, { "epoch": 1.0326301411443315, "grad_norm": 0.4011031985282898, "learning_rate": 7.939246658566221e-05, "loss": 1.5203, "step": 3402 }, { "epoch": 1.032933677341023, "grad_norm": 0.8139665722846985, "learning_rate": 7.938639125151883e-05, "loss": 1.5036, "step": 3403 }, { "epoch": 1.0332372135377144, "grad_norm": 0.4838857650756836, "learning_rate": 7.938031591737546e-05, "loss": 1.7915, "step": 3404 }, { "epoch": 1.0335407497344058, "grad_norm": 0.5446197390556335, "learning_rate": 7.937424058323209e-05, "loss": 1.7374, "step": 3405 }, { "epoch": 1.0338442859310972, "grad_norm": 0.7249342799186707, "learning_rate": 7.93681652490887e-05, "loss": 1.5549, "step": 3406 }, { "epoch": 1.0341478221277887, "grad_norm": 0.4857841730117798, "learning_rate": 7.936208991494533e-05, "loss": 1.6947, "step": 3407 }, { "epoch": 1.03445135832448, "grad_norm": 0.4289863705635071, "learning_rate": 7.935601458080196e-05, "loss": 1.8783, "step": 3408 }, { "epoch": 1.0347548945211718, "grad_norm": 0.49779224395751953, "learning_rate": 7.934993924665857e-05, "loss": 1.7971, "step": 3409 }, { "epoch": 1.0350584307178632, "grad_norm": 0.5169624090194702, "learning_rate": 7.934386391251519e-05, "loss": 1.4507, "step": 3410 }, { "epoch": 1.0353619669145546, "grad_norm": 0.4716205894947052, "learning_rate": 7.933778857837182e-05, "loss": 1.8483, "step": 3411 }, { "epoch": 1.035665503111246, "grad_norm": 0.5545279383659363, "learning_rate": 7.933171324422844e-05, "loss": 1.5672, "step": 3412 }, { "epoch": 1.0359690393079375, "grad_norm": 0.4328896105289459, "learning_rate": 7.932563791008506e-05, "loss": 1.1695, "step": 3413 }, { "epoch": 1.036272575504629, "grad_norm": 0.4805368185043335, "learning_rate": 7.931956257594169e-05, "loss": 1.7504, "step": 3414 }, { "epoch": 1.0365761117013204, "grad_norm": 0.5162798166275024, "learning_rate": 7.93134872417983e-05, "loss": 1.4748, "step": 3415 }, { "epoch": 1.0368796478980118, "grad_norm": 0.5200609564781189, "learning_rate": 7.930741190765492e-05, "loss": 2.0036, "step": 3416 }, { "epoch": 1.0371831840947032, "grad_norm": 0.4653424620628357, "learning_rate": 7.930133657351154e-05, "loss": 1.9499, "step": 3417 }, { "epoch": 1.0374867202913947, "grad_norm": 0.4723150432109833, "learning_rate": 7.929526123936817e-05, "loss": 1.4042, "step": 3418 }, { "epoch": 1.037790256488086, "grad_norm": 0.5302563905715942, "learning_rate": 7.92891859052248e-05, "loss": 1.4871, "step": 3419 }, { "epoch": 1.0380937926847777, "grad_norm": 0.47659730911254883, "learning_rate": 7.92831105710814e-05, "loss": 1.4755, "step": 3420 }, { "epoch": 1.0383973288814692, "grad_norm": 0.5367438197135925, "learning_rate": 7.927703523693804e-05, "loss": 1.8863, "step": 3421 }, { "epoch": 1.0387008650781606, "grad_norm": 0.5086414217948914, "learning_rate": 7.927095990279467e-05, "loss": 1.5784, "step": 3422 }, { "epoch": 1.039004401274852, "grad_norm": 0.4822576642036438, "learning_rate": 7.926488456865127e-05, "loss": 1.8435, "step": 3423 }, { "epoch": 1.0393079374715435, "grad_norm": 0.5086636543273926, "learning_rate": 7.92588092345079e-05, "loss": 1.732, "step": 3424 }, { "epoch": 1.039611473668235, "grad_norm": 0.49060937762260437, "learning_rate": 7.925273390036453e-05, "loss": 1.7369, "step": 3425 }, { "epoch": 1.0399150098649264, "grad_norm": 0.4944159984588623, "learning_rate": 7.924665856622115e-05, "loss": 1.2844, "step": 3426 }, { "epoch": 1.0402185460616178, "grad_norm": 0.4141417443752289, "learning_rate": 7.924058323207777e-05, "loss": 1.3346, "step": 3427 }, { "epoch": 1.0405220822583092, "grad_norm": 0.4598718285560608, "learning_rate": 7.923450789793438e-05, "loss": 1.5991, "step": 3428 }, { "epoch": 1.0408256184550007, "grad_norm": 0.5402548313140869, "learning_rate": 7.922843256379101e-05, "loss": 1.78, "step": 3429 }, { "epoch": 1.0411291546516923, "grad_norm": 0.4793176054954529, "learning_rate": 7.922235722964763e-05, "loss": 1.5652, "step": 3430 }, { "epoch": 1.0414326908483837, "grad_norm": 0.4527183175086975, "learning_rate": 7.921628189550425e-05, "loss": 1.82, "step": 3431 }, { "epoch": 1.0417362270450752, "grad_norm": 0.4884622395038605, "learning_rate": 7.921020656136088e-05, "loss": 1.6954, "step": 3432 }, { "epoch": 1.0420397632417666, "grad_norm": 0.46866077184677124, "learning_rate": 7.920413122721751e-05, "loss": 1.712, "step": 3433 }, { "epoch": 1.042343299438458, "grad_norm": 0.40482431650161743, "learning_rate": 7.919805589307411e-05, "loss": 1.8968, "step": 3434 }, { "epoch": 1.0426468356351495, "grad_norm": 0.5239852666854858, "learning_rate": 7.919198055893075e-05, "loss": 1.7953, "step": 3435 }, { "epoch": 1.042950371831841, "grad_norm": 0.484953373670578, "learning_rate": 7.918590522478738e-05, "loss": 1.8354, "step": 3436 }, { "epoch": 1.0432539080285324, "grad_norm": 0.47952115535736084, "learning_rate": 7.917982989064398e-05, "loss": 1.2171, "step": 3437 }, { "epoch": 1.0435574442252238, "grad_norm": 0.47822096943855286, "learning_rate": 7.917375455650061e-05, "loss": 1.7159, "step": 3438 }, { "epoch": 1.0438609804219152, "grad_norm": 0.44206422567367554, "learning_rate": 7.916767922235724e-05, "loss": 1.8257, "step": 3439 }, { "epoch": 1.0441645166186069, "grad_norm": 0.5453143119812012, "learning_rate": 7.916160388821386e-05, "loss": 1.7791, "step": 3440 }, { "epoch": 1.0444680528152983, "grad_norm": 0.4759043753147125, "learning_rate": 7.915552855407048e-05, "loss": 1.692, "step": 3441 }, { "epoch": 1.0447715890119897, "grad_norm": 0.484531044960022, "learning_rate": 7.91494532199271e-05, "loss": 1.806, "step": 3442 }, { "epoch": 1.0450751252086812, "grad_norm": 0.5373866558074951, "learning_rate": 7.914337788578372e-05, "loss": 1.5913, "step": 3443 }, { "epoch": 1.0453786614053726, "grad_norm": 0.5190970301628113, "learning_rate": 7.913730255164034e-05, "loss": 1.5233, "step": 3444 }, { "epoch": 1.045682197602064, "grad_norm": 0.500152051448822, "learning_rate": 7.913122721749696e-05, "loss": 1.7589, "step": 3445 }, { "epoch": 1.0459857337987555, "grad_norm": 0.3860965073108673, "learning_rate": 7.912515188335359e-05, "loss": 1.2231, "step": 3446 }, { "epoch": 1.046289269995447, "grad_norm": 0.44290807843208313, "learning_rate": 7.911907654921021e-05, "loss": 1.5446, "step": 3447 }, { "epoch": 1.0465928061921383, "grad_norm": 0.46361368894577026, "learning_rate": 7.911300121506682e-05, "loss": 1.4829, "step": 3448 }, { "epoch": 1.0468963423888298, "grad_norm": 0.40358835458755493, "learning_rate": 7.910692588092346e-05, "loss": 1.6495, "step": 3449 }, { "epoch": 1.0471998785855212, "grad_norm": 0.5722264051437378, "learning_rate": 7.910085054678009e-05, "loss": 1.7335, "step": 3450 }, { "epoch": 1.0475034147822129, "grad_norm": 0.49722689390182495, "learning_rate": 7.909477521263669e-05, "loss": 1.6493, "step": 3451 }, { "epoch": 1.0478069509789043, "grad_norm": 0.5183900594711304, "learning_rate": 7.908869987849332e-05, "loss": 1.7254, "step": 3452 }, { "epoch": 1.0481104871755957, "grad_norm": 0.5188613533973694, "learning_rate": 7.908262454434995e-05, "loss": 2.0111, "step": 3453 }, { "epoch": 1.0484140233722872, "grad_norm": 0.5030909180641174, "learning_rate": 7.907654921020657e-05, "loss": 1.395, "step": 3454 }, { "epoch": 1.0487175595689786, "grad_norm": 0.4069419205188751, "learning_rate": 7.907047387606319e-05, "loss": 1.9025, "step": 3455 }, { "epoch": 1.04902109576567, "grad_norm": 0.5355219841003418, "learning_rate": 7.90643985419198e-05, "loss": 1.732, "step": 3456 }, { "epoch": 1.0493246319623615, "grad_norm": 0.43117785453796387, "learning_rate": 7.905832320777643e-05, "loss": 1.4534, "step": 3457 }, { "epoch": 1.049628168159053, "grad_norm": 0.4561751186847687, "learning_rate": 7.905224787363305e-05, "loss": 1.5682, "step": 3458 }, { "epoch": 1.0499317043557443, "grad_norm": 0.4510141611099243, "learning_rate": 7.904617253948967e-05, "loss": 1.6789, "step": 3459 }, { "epoch": 1.0502352405524358, "grad_norm": 0.5011105537414551, "learning_rate": 7.90400972053463e-05, "loss": 1.4711, "step": 3460 }, { "epoch": 1.0505387767491274, "grad_norm": 0.5226435661315918, "learning_rate": 7.903402187120292e-05, "loss": 1.4425, "step": 3461 }, { "epoch": 1.0508423129458189, "grad_norm": 0.46023955941200256, "learning_rate": 7.902794653705953e-05, "loss": 1.0283, "step": 3462 }, { "epoch": 1.0511458491425103, "grad_norm": 0.5048952698707581, "learning_rate": 7.902187120291617e-05, "loss": 1.6029, "step": 3463 }, { "epoch": 1.0514493853392017, "grad_norm": 0.6409230828285217, "learning_rate": 7.90157958687728e-05, "loss": 1.1697, "step": 3464 }, { "epoch": 1.0517529215358932, "grad_norm": 0.5188806653022766, "learning_rate": 7.90097205346294e-05, "loss": 1.8136, "step": 3465 }, { "epoch": 1.0520564577325846, "grad_norm": 0.5835402011871338, "learning_rate": 7.900364520048603e-05, "loss": 1.511, "step": 3466 }, { "epoch": 1.052359993929276, "grad_norm": 0.6449020504951477, "learning_rate": 7.899756986634266e-05, "loss": 1.98, "step": 3467 }, { "epoch": 1.0526635301259675, "grad_norm": 0.47613629698753357, "learning_rate": 7.899149453219928e-05, "loss": 1.6631, "step": 3468 }, { "epoch": 1.052967066322659, "grad_norm": 0.4372462332248688, "learning_rate": 7.89854191980559e-05, "loss": 1.7773, "step": 3469 }, { "epoch": 1.0532706025193503, "grad_norm": 0.48001718521118164, "learning_rate": 7.897934386391251e-05, "loss": 1.703, "step": 3470 }, { "epoch": 1.053574138716042, "grad_norm": 0.5756060481071472, "learning_rate": 7.897326852976914e-05, "loss": 1.4202, "step": 3471 }, { "epoch": 1.0538776749127334, "grad_norm": 0.48645758628845215, "learning_rate": 7.896719319562576e-05, "loss": 1.7291, "step": 3472 }, { "epoch": 1.0541812111094249, "grad_norm": 0.4413807988166809, "learning_rate": 7.896111786148238e-05, "loss": 1.505, "step": 3473 }, { "epoch": 1.0544847473061163, "grad_norm": 0.43039625883102417, "learning_rate": 7.895504252733901e-05, "loss": 1.5668, "step": 3474 }, { "epoch": 1.0547882835028077, "grad_norm": 0.5196880102157593, "learning_rate": 7.894896719319563e-05, "loss": 1.9353, "step": 3475 }, { "epoch": 1.0550918196994992, "grad_norm": 0.6965150833129883, "learning_rate": 7.894289185905224e-05, "loss": 1.1513, "step": 3476 }, { "epoch": 1.0553953558961906, "grad_norm": 0.4723784625530243, "learning_rate": 7.893681652490888e-05, "loss": 1.5391, "step": 3477 }, { "epoch": 1.055698892092882, "grad_norm": 0.47085341811180115, "learning_rate": 7.89307411907655e-05, "loss": 1.452, "step": 3478 }, { "epoch": 1.0560024282895735, "grad_norm": 0.515957772731781, "learning_rate": 7.892466585662211e-05, "loss": 1.7636, "step": 3479 }, { "epoch": 1.056305964486265, "grad_norm": 0.6064741611480713, "learning_rate": 7.891859052247874e-05, "loss": 1.7575, "step": 3480 }, { "epoch": 1.0566095006829563, "grad_norm": 0.567486047744751, "learning_rate": 7.891251518833537e-05, "loss": 1.0875, "step": 3481 }, { "epoch": 1.056913036879648, "grad_norm": 0.4897995591163635, "learning_rate": 7.890643985419199e-05, "loss": 1.5105, "step": 3482 }, { "epoch": 1.0572165730763394, "grad_norm": 0.47024548053741455, "learning_rate": 7.89003645200486e-05, "loss": 1.77, "step": 3483 }, { "epoch": 1.0575201092730309, "grad_norm": 0.7996636033058167, "learning_rate": 7.889428918590522e-05, "loss": 1.3641, "step": 3484 }, { "epoch": 1.0578236454697223, "grad_norm": 0.813572347164154, "learning_rate": 7.888821385176185e-05, "loss": 1.2667, "step": 3485 }, { "epoch": 1.0581271816664137, "grad_norm": 0.38006821274757385, "learning_rate": 7.888213851761847e-05, "loss": 2.011, "step": 3486 }, { "epoch": 1.0584307178631052, "grad_norm": 0.5023001432418823, "learning_rate": 7.887606318347509e-05, "loss": 1.4452, "step": 3487 }, { "epoch": 1.0587342540597966, "grad_norm": 0.6475557088851929, "learning_rate": 7.886998784933172e-05, "loss": 1.1677, "step": 3488 }, { "epoch": 1.059037790256488, "grad_norm": 0.5049715042114258, "learning_rate": 7.886391251518834e-05, "loss": 1.0539, "step": 3489 }, { "epoch": 1.0593413264531795, "grad_norm": 0.5918720960617065, "learning_rate": 7.885783718104495e-05, "loss": 1.1472, "step": 3490 }, { "epoch": 1.059644862649871, "grad_norm": 0.4469449520111084, "learning_rate": 7.885176184690159e-05, "loss": 1.6658, "step": 3491 }, { "epoch": 1.0599483988465626, "grad_norm": 0.4816749393939972, "learning_rate": 7.884568651275822e-05, "loss": 1.9424, "step": 3492 }, { "epoch": 1.060251935043254, "grad_norm": 0.44042688608169556, "learning_rate": 7.883961117861482e-05, "loss": 1.559, "step": 3493 }, { "epoch": 1.0605554712399454, "grad_norm": 0.4422488212585449, "learning_rate": 7.883353584447145e-05, "loss": 0.8213, "step": 3494 }, { "epoch": 1.0608590074366369, "grad_norm": 0.44115856289863586, "learning_rate": 7.882746051032808e-05, "loss": 1.3014, "step": 3495 }, { "epoch": 1.0611625436333283, "grad_norm": 0.5100114345550537, "learning_rate": 7.882138517618469e-05, "loss": 1.7285, "step": 3496 }, { "epoch": 1.0614660798300197, "grad_norm": 0.4293980002403259, "learning_rate": 7.881530984204132e-05, "loss": 1.6291, "step": 3497 }, { "epoch": 1.0617696160267112, "grad_norm": 0.4303349256515503, "learning_rate": 7.880923450789793e-05, "loss": 1.677, "step": 3498 }, { "epoch": 1.0620731522234026, "grad_norm": 1.325836420059204, "learning_rate": 7.880315917375456e-05, "loss": 1.4604, "step": 3499 }, { "epoch": 1.062376688420094, "grad_norm": 0.5173283815383911, "learning_rate": 7.879708383961118e-05, "loss": 1.8935, "step": 3500 }, { "epoch": 1.0626802246167855, "grad_norm": 0.5339661240577698, "learning_rate": 7.87910085054678e-05, "loss": 1.4497, "step": 3501 }, { "epoch": 1.0629837608134771, "grad_norm": 0.5330355763435364, "learning_rate": 7.878493317132443e-05, "loss": 1.7404, "step": 3502 }, { "epoch": 1.0632872970101686, "grad_norm": 0.5396672487258911, "learning_rate": 7.877885783718105e-05, "loss": 1.3268, "step": 3503 }, { "epoch": 1.06359083320686, "grad_norm": 0.44838812947273254, "learning_rate": 7.877278250303766e-05, "loss": 1.6525, "step": 3504 }, { "epoch": 1.0638943694035514, "grad_norm": 0.50009685754776, "learning_rate": 7.87667071688943e-05, "loss": 1.6385, "step": 3505 }, { "epoch": 1.0641979056002429, "grad_norm": 0.6882514953613281, "learning_rate": 7.876063183475093e-05, "loss": 1.3527, "step": 3506 }, { "epoch": 1.0645014417969343, "grad_norm": 0.5103173851966858, "learning_rate": 7.875455650060753e-05, "loss": 1.3589, "step": 3507 }, { "epoch": 1.0648049779936257, "grad_norm": 0.5229162573814392, "learning_rate": 7.874848116646416e-05, "loss": 1.5474, "step": 3508 }, { "epoch": 1.0651085141903172, "grad_norm": 0.5207902193069458, "learning_rate": 7.874240583232079e-05, "loss": 1.807, "step": 3509 }, { "epoch": 1.0654120503870086, "grad_norm": 0.46750408411026, "learning_rate": 7.87363304981774e-05, "loss": 1.4907, "step": 3510 }, { "epoch": 1.0657155865837, "grad_norm": 1.095977544784546, "learning_rate": 7.873025516403403e-05, "loss": 1.6401, "step": 3511 }, { "epoch": 1.0660191227803915, "grad_norm": 0.4337494373321533, "learning_rate": 7.872417982989064e-05, "loss": 1.8245, "step": 3512 }, { "epoch": 1.0663226589770831, "grad_norm": 0.3852023184299469, "learning_rate": 7.871810449574727e-05, "loss": 1.506, "step": 3513 }, { "epoch": 1.0666261951737745, "grad_norm": 0.45099326968193054, "learning_rate": 7.871202916160389e-05, "loss": 1.8121, "step": 3514 }, { "epoch": 1.066929731370466, "grad_norm": 0.6037120223045349, "learning_rate": 7.870595382746051e-05, "loss": 1.7321, "step": 3515 }, { "epoch": 1.0672332675671574, "grad_norm": 0.4074588716030121, "learning_rate": 7.869987849331714e-05, "loss": 1.6336, "step": 3516 }, { "epoch": 1.0675368037638489, "grad_norm": 0.40023350715637207, "learning_rate": 7.869380315917376e-05, "loss": 0.9946, "step": 3517 }, { "epoch": 1.0678403399605403, "grad_norm": 0.8330900073051453, "learning_rate": 7.868772782503037e-05, "loss": 1.4609, "step": 3518 }, { "epoch": 1.0681438761572317, "grad_norm": 0.5196427702903748, "learning_rate": 7.8681652490887e-05, "loss": 1.7803, "step": 3519 }, { "epoch": 1.0684474123539232, "grad_norm": 0.575749933719635, "learning_rate": 7.867557715674362e-05, "loss": 1.6784, "step": 3520 }, { "epoch": 1.0687509485506146, "grad_norm": 0.5069593787193298, "learning_rate": 7.866950182260024e-05, "loss": 1.9097, "step": 3521 }, { "epoch": 1.069054484747306, "grad_norm": 0.5487728714942932, "learning_rate": 7.866342648845687e-05, "loss": 1.8385, "step": 3522 }, { "epoch": 1.0693580209439975, "grad_norm": 0.5695396065711975, "learning_rate": 7.865735115431349e-05, "loss": 1.8143, "step": 3523 }, { "epoch": 1.0696615571406891, "grad_norm": 0.47702378034591675, "learning_rate": 7.86512758201701e-05, "loss": 1.2498, "step": 3524 }, { "epoch": 1.0699650933373805, "grad_norm": 0.4768955707550049, "learning_rate": 7.864520048602674e-05, "loss": 1.7542, "step": 3525 }, { "epoch": 1.070268629534072, "grad_norm": 0.4759134352207184, "learning_rate": 7.863912515188335e-05, "loss": 0.9774, "step": 3526 }, { "epoch": 1.0705721657307634, "grad_norm": 0.6089837551116943, "learning_rate": 7.863304981773998e-05, "loss": 0.7761, "step": 3527 }, { "epoch": 1.0708757019274548, "grad_norm": 0.5931398272514343, "learning_rate": 7.86269744835966e-05, "loss": 1.244, "step": 3528 }, { "epoch": 1.0711792381241463, "grad_norm": 0.4667022228240967, "learning_rate": 7.862089914945322e-05, "loss": 1.3669, "step": 3529 }, { "epoch": 1.0714827743208377, "grad_norm": 0.4586002230644226, "learning_rate": 7.861482381530985e-05, "loss": 1.6041, "step": 3530 }, { "epoch": 1.0717863105175292, "grad_norm": 0.5036244988441467, "learning_rate": 7.860874848116647e-05, "loss": 1.3989, "step": 3531 }, { "epoch": 1.0720898467142206, "grad_norm": 0.40763425827026367, "learning_rate": 7.860267314702308e-05, "loss": 1.1835, "step": 3532 }, { "epoch": 1.0723933829109122, "grad_norm": 0.44515642523765564, "learning_rate": 7.859659781287972e-05, "loss": 1.4935, "step": 3533 }, { "epoch": 1.0726969191076037, "grad_norm": 0.5427178740501404, "learning_rate": 7.859052247873633e-05, "loss": 1.9076, "step": 3534 }, { "epoch": 1.073000455304295, "grad_norm": 0.4585944712162018, "learning_rate": 7.858444714459295e-05, "loss": 1.7363, "step": 3535 }, { "epoch": 1.0733039915009865, "grad_norm": 0.46946725249290466, "learning_rate": 7.857837181044958e-05, "loss": 1.6894, "step": 3536 }, { "epoch": 1.073607527697678, "grad_norm": 0.5090848803520203, "learning_rate": 7.85722964763062e-05, "loss": 1.8616, "step": 3537 }, { "epoch": 1.0739110638943694, "grad_norm": 0.5192902684211731, "learning_rate": 7.856622114216282e-05, "loss": 1.5899, "step": 3538 }, { "epoch": 1.0742146000910608, "grad_norm": 0.4348808228969574, "learning_rate": 7.856014580801945e-05, "loss": 1.7192, "step": 3539 }, { "epoch": 1.0745181362877523, "grad_norm": 0.5693963170051575, "learning_rate": 7.855407047387606e-05, "loss": 1.3645, "step": 3540 }, { "epoch": 1.0748216724844437, "grad_norm": 0.4064824879169464, "learning_rate": 7.85479951397327e-05, "loss": 1.5659, "step": 3541 }, { "epoch": 1.0751252086811351, "grad_norm": 0.4797777235507965, "learning_rate": 7.854191980558931e-05, "loss": 1.8501, "step": 3542 }, { "epoch": 1.0754287448778266, "grad_norm": 0.5156259536743164, "learning_rate": 7.853584447144593e-05, "loss": 1.5016, "step": 3543 }, { "epoch": 1.0757322810745182, "grad_norm": 0.9919567108154297, "learning_rate": 7.852976913730256e-05, "loss": 1.3734, "step": 3544 }, { "epoch": 1.0760358172712097, "grad_norm": 0.5478760600090027, "learning_rate": 7.852369380315918e-05, "loss": 1.8446, "step": 3545 }, { "epoch": 1.076339353467901, "grad_norm": 0.6765535473823547, "learning_rate": 7.85176184690158e-05, "loss": 1.5215, "step": 3546 }, { "epoch": 1.0766428896645925, "grad_norm": 0.4867497384548187, "learning_rate": 7.851154313487243e-05, "loss": 1.3427, "step": 3547 }, { "epoch": 1.076946425861284, "grad_norm": 0.4023679792881012, "learning_rate": 7.850546780072904e-05, "loss": 1.7548, "step": 3548 }, { "epoch": 1.0772499620579754, "grad_norm": 0.42770206928253174, "learning_rate": 7.849939246658566e-05, "loss": 1.4375, "step": 3549 }, { "epoch": 1.0775534982546668, "grad_norm": 0.5628126859664917, "learning_rate": 7.849331713244229e-05, "loss": 1.6336, "step": 3550 }, { "epoch": 1.0778570344513583, "grad_norm": 0.5270586013793945, "learning_rate": 7.848724179829891e-05, "loss": 1.8782, "step": 3551 }, { "epoch": 1.0781605706480497, "grad_norm": 0.7205768823623657, "learning_rate": 7.848116646415553e-05, "loss": 1.7201, "step": 3552 }, { "epoch": 1.0784641068447411, "grad_norm": 0.5051723122596741, "learning_rate": 7.847509113001216e-05, "loss": 1.6346, "step": 3553 }, { "epoch": 1.0787676430414326, "grad_norm": 0.41711702942848206, "learning_rate": 7.846901579586877e-05, "loss": 1.765, "step": 3554 }, { "epoch": 1.0790711792381242, "grad_norm": 0.4348052442073822, "learning_rate": 7.84629404617254e-05, "loss": 1.6667, "step": 3555 }, { "epoch": 1.0793747154348157, "grad_norm": 0.5323374271392822, "learning_rate": 7.845686512758202e-05, "loss": 1.8005, "step": 3556 }, { "epoch": 1.079678251631507, "grad_norm": 0.47705498337745667, "learning_rate": 7.845078979343864e-05, "loss": 1.8168, "step": 3557 }, { "epoch": 1.0799817878281985, "grad_norm": 0.535015344619751, "learning_rate": 7.844471445929527e-05, "loss": 1.7441, "step": 3558 }, { "epoch": 1.08028532402489, "grad_norm": 0.4847927391529083, "learning_rate": 7.843863912515189e-05, "loss": 1.5963, "step": 3559 }, { "epoch": 1.0805888602215814, "grad_norm": 0.5845076441764832, "learning_rate": 7.84325637910085e-05, "loss": 1.116, "step": 3560 }, { "epoch": 1.0808923964182728, "grad_norm": 0.5248334407806396, "learning_rate": 7.842648845686514e-05, "loss": 2.0422, "step": 3561 }, { "epoch": 1.0811959326149643, "grad_norm": 0.5417022705078125, "learning_rate": 7.842041312272175e-05, "loss": 1.5701, "step": 3562 }, { "epoch": 1.0814994688116557, "grad_norm": 0.4764825701713562, "learning_rate": 7.841433778857837e-05, "loss": 1.4946, "step": 3563 }, { "epoch": 1.0818030050083474, "grad_norm": 0.4735731780529022, "learning_rate": 7.8408262454435e-05, "loss": 2.0424, "step": 3564 }, { "epoch": 1.0821065412050388, "grad_norm": 0.4083727300167084, "learning_rate": 7.840218712029162e-05, "loss": 1.2251, "step": 3565 }, { "epoch": 1.0824100774017302, "grad_norm": 0.5175759792327881, "learning_rate": 7.839611178614824e-05, "loss": 1.3758, "step": 3566 }, { "epoch": 1.0827136135984217, "grad_norm": 0.4588059186935425, "learning_rate": 7.839003645200487e-05, "loss": 1.4034, "step": 3567 }, { "epoch": 1.083017149795113, "grad_norm": 0.5879805088043213, "learning_rate": 7.838396111786148e-05, "loss": 1.6349, "step": 3568 }, { "epoch": 1.0833206859918045, "grad_norm": 0.48351842164993286, "learning_rate": 7.83778857837181e-05, "loss": 1.8329, "step": 3569 }, { "epoch": 1.083624222188496, "grad_norm": 0.5158828496932983, "learning_rate": 7.837181044957473e-05, "loss": 1.6281, "step": 3570 }, { "epoch": 1.0839277583851874, "grad_norm": 0.7203484177589417, "learning_rate": 7.836573511543135e-05, "loss": 0.9363, "step": 3571 }, { "epoch": 1.0842312945818788, "grad_norm": 0.5488767623901367, "learning_rate": 7.835965978128798e-05, "loss": 1.7487, "step": 3572 }, { "epoch": 1.0845348307785703, "grad_norm": 0.5658820867538452, "learning_rate": 7.83535844471446e-05, "loss": 1.8557, "step": 3573 }, { "epoch": 1.0848383669752617, "grad_norm": 0.5227528214454651, "learning_rate": 7.834750911300121e-05, "loss": 1.4973, "step": 3574 }, { "epoch": 1.0851419031719534, "grad_norm": 0.5170645713806152, "learning_rate": 7.834143377885785e-05, "loss": 1.7767, "step": 3575 }, { "epoch": 1.0854454393686448, "grad_norm": 0.48934417963027954, "learning_rate": 7.833535844471446e-05, "loss": 1.7613, "step": 3576 }, { "epoch": 1.0857489755653362, "grad_norm": 0.4472818374633789, "learning_rate": 7.832928311057108e-05, "loss": 1.6749, "step": 3577 }, { "epoch": 1.0860525117620277, "grad_norm": 0.4309948682785034, "learning_rate": 7.832320777642771e-05, "loss": 1.8213, "step": 3578 }, { "epoch": 1.086356047958719, "grad_norm": 0.5259717702865601, "learning_rate": 7.831713244228433e-05, "loss": 1.5028, "step": 3579 }, { "epoch": 1.0866595841554105, "grad_norm": 0.4831100106239319, "learning_rate": 7.831105710814095e-05, "loss": 1.5417, "step": 3580 }, { "epoch": 1.086963120352102, "grad_norm": 0.5474818348884583, "learning_rate": 7.830498177399758e-05, "loss": 1.3114, "step": 3581 }, { "epoch": 1.0872666565487934, "grad_norm": 0.6029711961746216, "learning_rate": 7.82989064398542e-05, "loss": 1.6251, "step": 3582 }, { "epoch": 1.0875701927454848, "grad_norm": 0.6199969053268433, "learning_rate": 7.829283110571081e-05, "loss": 1.7712, "step": 3583 }, { "epoch": 1.0878737289421763, "grad_norm": 0.571630597114563, "learning_rate": 7.828675577156744e-05, "loss": 1.8169, "step": 3584 }, { "epoch": 1.0881772651388677, "grad_norm": 0.476755827665329, "learning_rate": 7.828068043742406e-05, "loss": 1.4152, "step": 3585 }, { "epoch": 1.0884808013355594, "grad_norm": 0.5712706446647644, "learning_rate": 7.827460510328069e-05, "loss": 1.624, "step": 3586 }, { "epoch": 1.0887843375322508, "grad_norm": 0.4737652540206909, "learning_rate": 7.826852976913731e-05, "loss": 1.812, "step": 3587 }, { "epoch": 1.0890878737289422, "grad_norm": 0.7318893671035767, "learning_rate": 7.826245443499392e-05, "loss": 1.7989, "step": 3588 }, { "epoch": 1.0893914099256337, "grad_norm": 0.5808560848236084, "learning_rate": 7.825637910085056e-05, "loss": 1.7167, "step": 3589 }, { "epoch": 1.089694946122325, "grad_norm": 0.49355220794677734, "learning_rate": 7.825030376670717e-05, "loss": 1.5009, "step": 3590 }, { "epoch": 1.0899984823190165, "grad_norm": 0.4661107361316681, "learning_rate": 7.824422843256379e-05, "loss": 1.2807, "step": 3591 }, { "epoch": 1.090302018515708, "grad_norm": 0.3863863945007324, "learning_rate": 7.823815309842042e-05, "loss": 1.6173, "step": 3592 }, { "epoch": 1.0906055547123994, "grad_norm": 0.48039016127586365, "learning_rate": 7.823207776427704e-05, "loss": 1.7246, "step": 3593 }, { "epoch": 1.0909090909090908, "grad_norm": 0.4487806558609009, "learning_rate": 7.822600243013366e-05, "loss": 1.408, "step": 3594 }, { "epoch": 1.0912126271057823, "grad_norm": 0.6070311069488525, "learning_rate": 7.821992709599029e-05, "loss": 1.5847, "step": 3595 }, { "epoch": 1.091516163302474, "grad_norm": 0.5059540271759033, "learning_rate": 7.82138517618469e-05, "loss": 1.9817, "step": 3596 }, { "epoch": 1.0918196994991654, "grad_norm": 0.5185919404029846, "learning_rate": 7.820777642770352e-05, "loss": 1.761, "step": 3597 }, { "epoch": 1.0921232356958568, "grad_norm": 0.5528532266616821, "learning_rate": 7.820170109356015e-05, "loss": 1.5324, "step": 3598 }, { "epoch": 1.0924267718925482, "grad_norm": 0.46091577410697937, "learning_rate": 7.819562575941677e-05, "loss": 1.5934, "step": 3599 }, { "epoch": 1.0927303080892397, "grad_norm": 0.5525655746459961, "learning_rate": 7.81895504252734e-05, "loss": 1.742, "step": 3600 }, { "epoch": 1.093033844285931, "grad_norm": 0.8037269115447998, "learning_rate": 7.818347509113002e-05, "loss": 1.2865, "step": 3601 }, { "epoch": 1.0933373804826225, "grad_norm": 0.500320315361023, "learning_rate": 7.817739975698664e-05, "loss": 1.7335, "step": 3602 }, { "epoch": 1.093640916679314, "grad_norm": 0.4381422698497772, "learning_rate": 7.817132442284327e-05, "loss": 1.7203, "step": 3603 }, { "epoch": 1.0939444528760054, "grad_norm": 0.5011032819747925, "learning_rate": 7.816524908869988e-05, "loss": 1.903, "step": 3604 }, { "epoch": 1.0942479890726968, "grad_norm": 0.4082486033439636, "learning_rate": 7.81591737545565e-05, "loss": 1.7394, "step": 3605 }, { "epoch": 1.0945515252693885, "grad_norm": 0.3757207691669464, "learning_rate": 7.815309842041313e-05, "loss": 1.9376, "step": 3606 }, { "epoch": 1.09485506146608, "grad_norm": 0.5873184204101562, "learning_rate": 7.814702308626975e-05, "loss": 1.8047, "step": 3607 }, { "epoch": 1.0951585976627713, "grad_norm": 0.4013366401195526, "learning_rate": 7.814094775212637e-05, "loss": 1.7548, "step": 3608 }, { "epoch": 1.0954621338594628, "grad_norm": 0.4884313642978668, "learning_rate": 7.8134872417983e-05, "loss": 1.7476, "step": 3609 }, { "epoch": 1.0957656700561542, "grad_norm": 0.5261145234107971, "learning_rate": 7.812879708383961e-05, "loss": 1.3334, "step": 3610 }, { "epoch": 1.0960692062528457, "grad_norm": 0.4099176824092865, "learning_rate": 7.812272174969623e-05, "loss": 1.1143, "step": 3611 }, { "epoch": 1.096372742449537, "grad_norm": 0.571192741394043, "learning_rate": 7.811664641555286e-05, "loss": 1.7579, "step": 3612 }, { "epoch": 1.0966762786462285, "grad_norm": 0.48647162318229675, "learning_rate": 7.811057108140948e-05, "loss": 1.8592, "step": 3613 }, { "epoch": 1.09697981484292, "grad_norm": 0.4793272316455841, "learning_rate": 7.810449574726611e-05, "loss": 1.7882, "step": 3614 }, { "epoch": 1.0972833510396114, "grad_norm": 0.49005764722824097, "learning_rate": 7.809842041312273e-05, "loss": 1.52, "step": 3615 }, { "epoch": 1.0975868872363028, "grad_norm": 0.5489885210990906, "learning_rate": 7.809234507897935e-05, "loss": 1.7768, "step": 3616 }, { "epoch": 1.0978904234329945, "grad_norm": 0.5832868814468384, "learning_rate": 7.808626974483598e-05, "loss": 1.2381, "step": 3617 }, { "epoch": 1.098193959629686, "grad_norm": 1.2282688617706299, "learning_rate": 7.80801944106926e-05, "loss": 1.6286, "step": 3618 }, { "epoch": 1.0984974958263773, "grad_norm": 0.5094382166862488, "learning_rate": 7.807411907654921e-05, "loss": 1.9736, "step": 3619 }, { "epoch": 1.0988010320230688, "grad_norm": 0.4855671525001526, "learning_rate": 7.806804374240584e-05, "loss": 1.9063, "step": 3620 }, { "epoch": 1.0991045682197602, "grad_norm": 0.5706669092178345, "learning_rate": 7.806196840826246e-05, "loss": 1.9509, "step": 3621 }, { "epoch": 1.0994081044164516, "grad_norm": 0.5625984072685242, "learning_rate": 7.805589307411908e-05, "loss": 1.5796, "step": 3622 }, { "epoch": 1.099711640613143, "grad_norm": 0.5737254023551941, "learning_rate": 7.804981773997571e-05, "loss": 1.6272, "step": 3623 }, { "epoch": 1.1000151768098345, "grad_norm": 0.3637593388557434, "learning_rate": 7.804374240583232e-05, "loss": 1.2409, "step": 3624 }, { "epoch": 1.100318713006526, "grad_norm": 0.4899303615093231, "learning_rate": 7.803766707168894e-05, "loss": 1.3884, "step": 3625 }, { "epoch": 1.1006222492032174, "grad_norm": 0.524005115032196, "learning_rate": 7.803159173754557e-05, "loss": 1.9453, "step": 3626 }, { "epoch": 1.100925785399909, "grad_norm": 0.5083621740341187, "learning_rate": 7.802551640340219e-05, "loss": 1.5158, "step": 3627 }, { "epoch": 1.1012293215966005, "grad_norm": 0.46930131316185, "learning_rate": 7.801944106925882e-05, "loss": 1.7324, "step": 3628 }, { "epoch": 1.101532857793292, "grad_norm": 0.5122260451316833, "learning_rate": 7.801336573511544e-05, "loss": 1.4132, "step": 3629 }, { "epoch": 1.1018363939899833, "grad_norm": 0.5043088793754578, "learning_rate": 7.800729040097206e-05, "loss": 1.6932, "step": 3630 }, { "epoch": 1.1021399301866748, "grad_norm": 0.5585395097732544, "learning_rate": 7.800121506682869e-05, "loss": 1.6254, "step": 3631 }, { "epoch": 1.1024434663833662, "grad_norm": 0.4394286572933197, "learning_rate": 7.799513973268529e-05, "loss": 1.6988, "step": 3632 }, { "epoch": 1.1027470025800576, "grad_norm": 0.5732413530349731, "learning_rate": 7.798906439854192e-05, "loss": 1.0782, "step": 3633 }, { "epoch": 1.103050538776749, "grad_norm": 0.49380823969841003, "learning_rate": 7.798298906439855e-05, "loss": 1.3272, "step": 3634 }, { "epoch": 1.1033540749734405, "grad_norm": 0.5082643628120422, "learning_rate": 7.797691373025517e-05, "loss": 1.6587, "step": 3635 }, { "epoch": 1.103657611170132, "grad_norm": 0.6942585706710815, "learning_rate": 7.797083839611179e-05, "loss": 1.565, "step": 3636 }, { "epoch": 1.1039611473668236, "grad_norm": 0.5077084302902222, "learning_rate": 7.796476306196842e-05, "loss": 1.8823, "step": 3637 }, { "epoch": 1.104264683563515, "grad_norm": 0.5313974022865295, "learning_rate": 7.795868772782503e-05, "loss": 1.5792, "step": 3638 }, { "epoch": 1.1045682197602065, "grad_norm": 0.5107327699661255, "learning_rate": 7.795261239368165e-05, "loss": 1.4091, "step": 3639 }, { "epoch": 1.104871755956898, "grad_norm": 0.530952513217926, "learning_rate": 7.794653705953828e-05, "loss": 1.918, "step": 3640 }, { "epoch": 1.1051752921535893, "grad_norm": 0.6229440569877625, "learning_rate": 7.79404617253949e-05, "loss": 1.7242, "step": 3641 }, { "epoch": 1.1054788283502808, "grad_norm": 0.5772741436958313, "learning_rate": 7.793438639125152e-05, "loss": 1.4634, "step": 3642 }, { "epoch": 1.1057823645469722, "grad_norm": 0.6212802529335022, "learning_rate": 7.792831105710815e-05, "loss": 1.0477, "step": 3643 }, { "epoch": 1.1060859007436636, "grad_norm": 0.51209557056427, "learning_rate": 7.792223572296477e-05, "loss": 1.6945, "step": 3644 }, { "epoch": 1.106389436940355, "grad_norm": 0.45700934529304504, "learning_rate": 7.79161603888214e-05, "loss": 1.6441, "step": 3645 }, { "epoch": 1.1066929731370465, "grad_norm": 0.5479162931442261, "learning_rate": 7.7910085054678e-05, "loss": 1.2612, "step": 3646 }, { "epoch": 1.106996509333738, "grad_norm": 0.5235689878463745, "learning_rate": 7.790400972053463e-05, "loss": 1.5583, "step": 3647 }, { "epoch": 1.1073000455304296, "grad_norm": 0.40267738699913025, "learning_rate": 7.789793438639126e-05, "loss": 1.5987, "step": 3648 }, { "epoch": 1.107603581727121, "grad_norm": 0.4579909145832062, "learning_rate": 7.789185905224788e-05, "loss": 2.0193, "step": 3649 }, { "epoch": 1.1079071179238125, "grad_norm": 0.7406178712844849, "learning_rate": 7.78857837181045e-05, "loss": 1.3012, "step": 3650 }, { "epoch": 1.108210654120504, "grad_norm": 0.5075519680976868, "learning_rate": 7.787970838396113e-05, "loss": 1.8573, "step": 3651 }, { "epoch": 1.1085141903171953, "grad_norm": 0.5122193098068237, "learning_rate": 7.787363304981774e-05, "loss": 1.4937, "step": 3652 }, { "epoch": 1.1088177265138868, "grad_norm": 0.5174267292022705, "learning_rate": 7.786755771567436e-05, "loss": 1.3752, "step": 3653 }, { "epoch": 1.1091212627105782, "grad_norm": 0.47906357049942017, "learning_rate": 7.786148238153099e-05, "loss": 1.4443, "step": 3654 }, { "epoch": 1.1094247989072696, "grad_norm": 0.5059614777565002, "learning_rate": 7.785540704738761e-05, "loss": 1.5518, "step": 3655 }, { "epoch": 1.109728335103961, "grad_norm": 0.49383166432380676, "learning_rate": 7.784933171324423e-05, "loss": 1.6696, "step": 3656 }, { "epoch": 1.1100318713006525, "grad_norm": 0.45457521080970764, "learning_rate": 7.784325637910086e-05, "loss": 1.4528, "step": 3657 }, { "epoch": 1.1103354074973442, "grad_norm": 0.4575364291667938, "learning_rate": 7.783718104495748e-05, "loss": 1.7467, "step": 3658 }, { "epoch": 1.1106389436940356, "grad_norm": 0.4990423619747162, "learning_rate": 7.78311057108141e-05, "loss": 1.6548, "step": 3659 }, { "epoch": 1.110942479890727, "grad_norm": 0.5598446726799011, "learning_rate": 7.782503037667071e-05, "loss": 1.4941, "step": 3660 }, { "epoch": 1.1112460160874185, "grad_norm": 0.466371089220047, "learning_rate": 7.781895504252734e-05, "loss": 1.7623, "step": 3661 }, { "epoch": 1.11154955228411, "grad_norm": 0.4354589879512787, "learning_rate": 7.781287970838397e-05, "loss": 1.9063, "step": 3662 }, { "epoch": 1.1118530884808013, "grad_norm": 0.7934980988502502, "learning_rate": 7.780680437424059e-05, "loss": 1.8391, "step": 3663 }, { "epoch": 1.1121566246774928, "grad_norm": 0.4731541872024536, "learning_rate": 7.78007290400972e-05, "loss": 1.9535, "step": 3664 }, { "epoch": 1.1124601608741842, "grad_norm": 0.47908467054367065, "learning_rate": 7.779465370595384e-05, "loss": 1.8113, "step": 3665 }, { "epoch": 1.1127636970708756, "grad_norm": 0.4935145676136017, "learning_rate": 7.778857837181045e-05, "loss": 1.7136, "step": 3666 }, { "epoch": 1.113067233267567, "grad_norm": 0.4779261350631714, "learning_rate": 7.778250303766707e-05, "loss": 1.5593, "step": 3667 }, { "epoch": 1.1133707694642587, "grad_norm": 0.3891371786594391, "learning_rate": 7.77764277035237e-05, "loss": 1.2089, "step": 3668 }, { "epoch": 1.1136743056609502, "grad_norm": 0.5638146996498108, "learning_rate": 7.777035236938032e-05, "loss": 1.345, "step": 3669 }, { "epoch": 1.1139778418576416, "grad_norm": 0.46427562832832336, "learning_rate": 7.776427703523694e-05, "loss": 1.8294, "step": 3670 }, { "epoch": 1.114281378054333, "grad_norm": 1.02618408203125, "learning_rate": 7.775820170109357e-05, "loss": 1.608, "step": 3671 }, { "epoch": 1.1145849142510245, "grad_norm": 0.5669841766357422, "learning_rate": 7.775212636695019e-05, "loss": 1.5572, "step": 3672 }, { "epoch": 1.114888450447716, "grad_norm": 0.5150823593139648, "learning_rate": 7.774605103280682e-05, "loss": 1.6798, "step": 3673 }, { "epoch": 1.1151919866444073, "grad_norm": 0.6217275857925415, "learning_rate": 7.773997569866342e-05, "loss": 1.4402, "step": 3674 }, { "epoch": 1.1154955228410988, "grad_norm": 0.508321225643158, "learning_rate": 7.773390036452005e-05, "loss": 1.6684, "step": 3675 }, { "epoch": 1.1157990590377902, "grad_norm": 0.44217655062675476, "learning_rate": 7.772782503037668e-05, "loss": 1.5984, "step": 3676 }, { "epoch": 1.1161025952344816, "grad_norm": 0.4717262089252472, "learning_rate": 7.77217496962333e-05, "loss": 1.4775, "step": 3677 }, { "epoch": 1.116406131431173, "grad_norm": 0.4989759922027588, "learning_rate": 7.771567436208992e-05, "loss": 1.4132, "step": 3678 }, { "epoch": 1.1167096676278647, "grad_norm": 0.44810184836387634, "learning_rate": 7.770959902794655e-05, "loss": 1.7462, "step": 3679 }, { "epoch": 1.1170132038245562, "grad_norm": 0.4343874156475067, "learning_rate": 7.770352369380316e-05, "loss": 1.6128, "step": 3680 }, { "epoch": 1.1173167400212476, "grad_norm": 0.4640476107597351, "learning_rate": 7.769744835965978e-05, "loss": 1.6007, "step": 3681 }, { "epoch": 1.117620276217939, "grad_norm": 0.4636215567588806, "learning_rate": 7.769137302551641e-05, "loss": 1.4001, "step": 3682 }, { "epoch": 1.1179238124146305, "grad_norm": 0.5073500871658325, "learning_rate": 7.768529769137303e-05, "loss": 1.8682, "step": 3683 }, { "epoch": 1.118227348611322, "grad_norm": 0.5101370811462402, "learning_rate": 7.767922235722965e-05, "loss": 1.8096, "step": 3684 }, { "epoch": 1.1185308848080133, "grad_norm": 0.42578715085983276, "learning_rate": 7.767314702308628e-05, "loss": 1.393, "step": 3685 }, { "epoch": 1.1188344210047048, "grad_norm": 0.4419322907924652, "learning_rate": 7.76670716889429e-05, "loss": 1.7337, "step": 3686 }, { "epoch": 1.1191379572013962, "grad_norm": 0.48602306842803955, "learning_rate": 7.766099635479953e-05, "loss": 1.6385, "step": 3687 }, { "epoch": 1.1194414933980876, "grad_norm": 0.46349820494651794, "learning_rate": 7.765492102065613e-05, "loss": 1.8216, "step": 3688 }, { "epoch": 1.119745029594779, "grad_norm": 0.5057324767112732, "learning_rate": 7.764884568651276e-05, "loss": 1.4216, "step": 3689 }, { "epoch": 1.1200485657914707, "grad_norm": 0.45940324664115906, "learning_rate": 7.764277035236939e-05, "loss": 1.7301, "step": 3690 }, { "epoch": 1.1203521019881622, "grad_norm": 0.47218936681747437, "learning_rate": 7.763669501822601e-05, "loss": 1.6976, "step": 3691 }, { "epoch": 1.1206556381848536, "grad_norm": 0.4720531105995178, "learning_rate": 7.763061968408263e-05, "loss": 1.4658, "step": 3692 }, { "epoch": 1.120959174381545, "grad_norm": 0.48740169405937195, "learning_rate": 7.762454434993926e-05, "loss": 1.325, "step": 3693 }, { "epoch": 1.1212627105782365, "grad_norm": 0.4727463722229004, "learning_rate": 7.761846901579587e-05, "loss": 1.7131, "step": 3694 }, { "epoch": 1.1215662467749279, "grad_norm": 0.563522458076477, "learning_rate": 7.761239368165249e-05, "loss": 1.7699, "step": 3695 }, { "epoch": 1.1218697829716193, "grad_norm": 0.492064893245697, "learning_rate": 7.760631834750912e-05, "loss": 1.9043, "step": 3696 }, { "epoch": 1.1221733191683108, "grad_norm": 1.3425596952438354, "learning_rate": 7.760024301336574e-05, "loss": 1.6455, "step": 3697 }, { "epoch": 1.1224768553650022, "grad_norm": 0.4847075641155243, "learning_rate": 7.759416767922236e-05, "loss": 1.7395, "step": 3698 }, { "epoch": 1.1227803915616938, "grad_norm": 0.7759005427360535, "learning_rate": 7.758809234507897e-05, "loss": 1.6917, "step": 3699 }, { "epoch": 1.1230839277583853, "grad_norm": 0.6782048344612122, "learning_rate": 7.75820170109356e-05, "loss": 1.443, "step": 3700 }, { "epoch": 1.1233874639550767, "grad_norm": 0.4265955686569214, "learning_rate": 7.757594167679224e-05, "loss": 1.2195, "step": 3701 }, { "epoch": 1.1236910001517681, "grad_norm": 0.8154575824737549, "learning_rate": 7.756986634264884e-05, "loss": 1.4988, "step": 3702 }, { "epoch": 1.1239945363484596, "grad_norm": 0.4791830778121948, "learning_rate": 7.756379100850547e-05, "loss": 1.2148, "step": 3703 }, { "epoch": 1.124298072545151, "grad_norm": 0.4666757583618164, "learning_rate": 7.75577156743621e-05, "loss": 1.5228, "step": 3704 }, { "epoch": 1.1246016087418425, "grad_norm": 0.4709447920322418, "learning_rate": 7.75516403402187e-05, "loss": 1.6371, "step": 3705 }, { "epoch": 1.1249051449385339, "grad_norm": 0.47154557704925537, "learning_rate": 7.754556500607534e-05, "loss": 1.1386, "step": 3706 }, { "epoch": 1.1252086811352253, "grad_norm": 0.384400337934494, "learning_rate": 7.753948967193197e-05, "loss": 1.6998, "step": 3707 }, { "epoch": 1.1255122173319168, "grad_norm": 0.5026933550834656, "learning_rate": 7.753341433778858e-05, "loss": 1.8225, "step": 3708 }, { "epoch": 1.1258157535286082, "grad_norm": 0.5157676339149475, "learning_rate": 7.75273390036452e-05, "loss": 1.4864, "step": 3709 }, { "epoch": 1.1261192897252998, "grad_norm": 1.0850920677185059, "learning_rate": 7.752126366950183e-05, "loss": 1.8376, "step": 3710 }, { "epoch": 1.1264228259219913, "grad_norm": 0.481871634721756, "learning_rate": 7.751518833535845e-05, "loss": 1.6568, "step": 3711 }, { "epoch": 1.1267263621186827, "grad_norm": 0.5413371920585632, "learning_rate": 7.750911300121507e-05, "loss": 1.3793, "step": 3712 }, { "epoch": 1.1270298983153741, "grad_norm": 0.44527462124824524, "learning_rate": 7.750303766707168e-05, "loss": 1.773, "step": 3713 }, { "epoch": 1.1273334345120656, "grad_norm": 0.49938610196113586, "learning_rate": 7.749696233292832e-05, "loss": 1.5552, "step": 3714 }, { "epoch": 1.127636970708757, "grad_norm": 0.5346314311027527, "learning_rate": 7.749088699878495e-05, "loss": 1.5582, "step": 3715 }, { "epoch": 1.1279405069054484, "grad_norm": 0.9484091401100159, "learning_rate": 7.748481166464155e-05, "loss": 1.5542, "step": 3716 }, { "epoch": 1.1282440431021399, "grad_norm": 0.47302237153053284, "learning_rate": 7.747873633049818e-05, "loss": 1.9569, "step": 3717 }, { "epoch": 1.1285475792988313, "grad_norm": 0.5025098323822021, "learning_rate": 7.747266099635481e-05, "loss": 1.4258, "step": 3718 }, { "epoch": 1.1288511154955228, "grad_norm": 0.42215389013290405, "learning_rate": 7.746658566221142e-05, "loss": 1.6635, "step": 3719 }, { "epoch": 1.1291546516922142, "grad_norm": 0.49948540329933167, "learning_rate": 7.746051032806805e-05, "loss": 1.6155, "step": 3720 }, { "epoch": 1.1294581878889058, "grad_norm": 0.6525446176528931, "learning_rate": 7.745443499392468e-05, "loss": 1.6404, "step": 3721 }, { "epoch": 1.1297617240855973, "grad_norm": 0.5007261633872986, "learning_rate": 7.74483596597813e-05, "loss": 1.2159, "step": 3722 }, { "epoch": 1.1300652602822887, "grad_norm": 0.6284709572792053, "learning_rate": 7.744228432563791e-05, "loss": 1.8035, "step": 3723 }, { "epoch": 1.1303687964789801, "grad_norm": 0.705549418926239, "learning_rate": 7.743620899149454e-05, "loss": 1.2551, "step": 3724 }, { "epoch": 1.1306723326756716, "grad_norm": 1.1247000694274902, "learning_rate": 7.743013365735116e-05, "loss": 1.652, "step": 3725 }, { "epoch": 1.130975868872363, "grad_norm": 0.4521750211715698, "learning_rate": 7.742405832320778e-05, "loss": 1.6, "step": 3726 }, { "epoch": 1.1312794050690544, "grad_norm": 0.4077288806438446, "learning_rate": 7.74179829890644e-05, "loss": 1.8314, "step": 3727 }, { "epoch": 1.1315829412657459, "grad_norm": 0.4165663421154022, "learning_rate": 7.741190765492103e-05, "loss": 1.9458, "step": 3728 }, { "epoch": 1.1318864774624373, "grad_norm": 0.55204176902771, "learning_rate": 7.740583232077764e-05, "loss": 1.661, "step": 3729 }, { "epoch": 1.132190013659129, "grad_norm": 0.5003572106361389, "learning_rate": 7.739975698663426e-05, "loss": 1.6445, "step": 3730 }, { "epoch": 1.1324935498558204, "grad_norm": 0.5279999375343323, "learning_rate": 7.739368165249089e-05, "loss": 1.5472, "step": 3731 }, { "epoch": 1.1327970860525118, "grad_norm": 0.5142978429794312, "learning_rate": 7.738760631834752e-05, "loss": 1.6682, "step": 3732 }, { "epoch": 1.1331006222492033, "grad_norm": 0.5571588277816772, "learning_rate": 7.738153098420413e-05, "loss": 1.5633, "step": 3733 }, { "epoch": 1.1334041584458947, "grad_norm": 0.6555919647216797, "learning_rate": 7.737545565006076e-05, "loss": 1.2002, "step": 3734 }, { "epoch": 1.1337076946425861, "grad_norm": 0.482624888420105, "learning_rate": 7.736938031591739e-05, "loss": 1.7551, "step": 3735 }, { "epoch": 1.1340112308392776, "grad_norm": 0.4221251904964447, "learning_rate": 7.7363304981774e-05, "loss": 1.1514, "step": 3736 }, { "epoch": 1.134314767035969, "grad_norm": 0.482462614774704, "learning_rate": 7.735722964763062e-05, "loss": 1.2174, "step": 3737 }, { "epoch": 1.1346183032326604, "grad_norm": 0.5528337955474854, "learning_rate": 7.735115431348725e-05, "loss": 1.7362, "step": 3738 }, { "epoch": 1.1349218394293519, "grad_norm": 0.4969632625579834, "learning_rate": 7.734507897934387e-05, "loss": 1.991, "step": 3739 }, { "epoch": 1.1352253756260433, "grad_norm": 0.5240209698677063, "learning_rate": 7.733900364520049e-05, "loss": 1.5519, "step": 3740 }, { "epoch": 1.135528911822735, "grad_norm": 0.41266825795173645, "learning_rate": 7.73329283110571e-05, "loss": 1.3405, "step": 3741 }, { "epoch": 1.1358324480194264, "grad_norm": 0.5517387986183167, "learning_rate": 7.732685297691374e-05, "loss": 1.1253, "step": 3742 }, { "epoch": 1.1361359842161178, "grad_norm": 0.49983713030815125, "learning_rate": 7.732077764277035e-05, "loss": 1.3818, "step": 3743 }, { "epoch": 1.1364395204128093, "grad_norm": 0.5212677717208862, "learning_rate": 7.731470230862697e-05, "loss": 1.8162, "step": 3744 }, { "epoch": 1.1367430566095007, "grad_norm": 0.5420991778373718, "learning_rate": 7.73086269744836e-05, "loss": 1.5643, "step": 3745 }, { "epoch": 1.1370465928061921, "grad_norm": 0.5572590827941895, "learning_rate": 7.730255164034023e-05, "loss": 1.6639, "step": 3746 }, { "epoch": 1.1373501290028836, "grad_norm": 0.5790618658065796, "learning_rate": 7.729647630619684e-05, "loss": 1.7084, "step": 3747 }, { "epoch": 1.137653665199575, "grad_norm": 0.6989924311637878, "learning_rate": 7.729040097205347e-05, "loss": 1.7443, "step": 3748 }, { "epoch": 1.1379572013962664, "grad_norm": 0.4986421763896942, "learning_rate": 7.72843256379101e-05, "loss": 1.5655, "step": 3749 }, { "epoch": 1.1382607375929579, "grad_norm": 0.6421902179718018, "learning_rate": 7.727825030376671e-05, "loss": 1.4474, "step": 3750 }, { "epoch": 1.1385642737896493, "grad_norm": 0.4390571117401123, "learning_rate": 7.727217496962333e-05, "loss": 1.9309, "step": 3751 }, { "epoch": 1.138867809986341, "grad_norm": 0.6357335448265076, "learning_rate": 7.726609963547996e-05, "loss": 1.4736, "step": 3752 }, { "epoch": 1.1391713461830324, "grad_norm": 0.5134897828102112, "learning_rate": 7.726002430133658e-05, "loss": 1.623, "step": 3753 }, { "epoch": 1.1394748823797238, "grad_norm": 0.5518725514411926, "learning_rate": 7.72539489671932e-05, "loss": 1.7415, "step": 3754 }, { "epoch": 1.1397784185764153, "grad_norm": 0.49003735184669495, "learning_rate": 7.724787363304981e-05, "loss": 1.8879, "step": 3755 }, { "epoch": 1.1400819547731067, "grad_norm": 0.5795713067054749, "learning_rate": 7.724179829890645e-05, "loss": 1.4763, "step": 3756 }, { "epoch": 1.1403854909697981, "grad_norm": 0.4583165645599365, "learning_rate": 7.723572296476306e-05, "loss": 1.7021, "step": 3757 }, { "epoch": 1.1406890271664896, "grad_norm": 0.5582238435745239, "learning_rate": 7.722964763061968e-05, "loss": 1.2102, "step": 3758 }, { "epoch": 1.140992563363181, "grad_norm": 0.8933469653129578, "learning_rate": 7.722357229647631e-05, "loss": 1.5831, "step": 3759 }, { "epoch": 1.1412960995598724, "grad_norm": 0.5090795755386353, "learning_rate": 7.721749696233294e-05, "loss": 1.7801, "step": 3760 }, { "epoch": 1.141599635756564, "grad_norm": 0.4822721481323242, "learning_rate": 7.721142162818955e-05, "loss": 1.5706, "step": 3761 }, { "epoch": 1.1419031719532553, "grad_norm": 0.5089300870895386, "learning_rate": 7.720534629404618e-05, "loss": 1.7128, "step": 3762 }, { "epoch": 1.142206708149947, "grad_norm": 0.5474359393119812, "learning_rate": 7.719927095990281e-05, "loss": 1.5872, "step": 3763 }, { "epoch": 1.1425102443466384, "grad_norm": 0.582497775554657, "learning_rate": 7.719319562575942e-05, "loss": 1.5936, "step": 3764 }, { "epoch": 1.1428137805433298, "grad_norm": 0.49585655331611633, "learning_rate": 7.718712029161604e-05, "loss": 1.1567, "step": 3765 }, { "epoch": 1.1431173167400213, "grad_norm": 0.5703508257865906, "learning_rate": 7.718104495747267e-05, "loss": 1.5919, "step": 3766 }, { "epoch": 1.1434208529367127, "grad_norm": 0.5075818300247192, "learning_rate": 7.717496962332929e-05, "loss": 1.683, "step": 3767 }, { "epoch": 1.1437243891334041, "grad_norm": 0.5167961120605469, "learning_rate": 7.716889428918591e-05, "loss": 1.425, "step": 3768 }, { "epoch": 1.1440279253300956, "grad_norm": 0.5668395161628723, "learning_rate": 7.716281895504252e-05, "loss": 1.4884, "step": 3769 }, { "epoch": 1.144331461526787, "grad_norm": 0.4944659173488617, "learning_rate": 7.715674362089916e-05, "loss": 1.1739, "step": 3770 }, { "epoch": 1.1446349977234784, "grad_norm": 0.792289137840271, "learning_rate": 7.715066828675577e-05, "loss": 1.7746, "step": 3771 }, { "epoch": 1.14493853392017, "grad_norm": 0.48457542061805725, "learning_rate": 7.714459295261239e-05, "loss": 1.7788, "step": 3772 }, { "epoch": 1.1452420701168615, "grad_norm": 0.5210672616958618, "learning_rate": 7.713851761846902e-05, "loss": 1.7434, "step": 3773 }, { "epoch": 1.145545606313553, "grad_norm": 0.41982871294021606, "learning_rate": 7.713244228432565e-05, "loss": 1.6847, "step": 3774 }, { "epoch": 1.1458491425102444, "grad_norm": 0.5397769212722778, "learning_rate": 7.712636695018226e-05, "loss": 1.7824, "step": 3775 }, { "epoch": 1.1461526787069358, "grad_norm": 0.9942769408226013, "learning_rate": 7.712029161603889e-05, "loss": 1.2639, "step": 3776 }, { "epoch": 1.1464562149036273, "grad_norm": 0.5531973838806152, "learning_rate": 7.711421628189552e-05, "loss": 1.6249, "step": 3777 }, { "epoch": 1.1467597511003187, "grad_norm": 0.4524674415588379, "learning_rate": 7.710814094775212e-05, "loss": 1.1845, "step": 3778 }, { "epoch": 1.1470632872970101, "grad_norm": 0.4991176426410675, "learning_rate": 7.710206561360875e-05, "loss": 1.7489, "step": 3779 }, { "epoch": 1.1473668234937016, "grad_norm": 0.46409863233566284, "learning_rate": 7.709599027946537e-05, "loss": 1.9125, "step": 3780 }, { "epoch": 1.147670359690393, "grad_norm": 0.5320178866386414, "learning_rate": 7.7089914945322e-05, "loss": 1.5643, "step": 3781 }, { "epoch": 1.1479738958870844, "grad_norm": 0.5475717782974243, "learning_rate": 7.708383961117862e-05, "loss": 1.6597, "step": 3782 }, { "epoch": 1.148277432083776, "grad_norm": 0.6391961574554443, "learning_rate": 7.707776427703523e-05, "loss": 1.7717, "step": 3783 }, { "epoch": 1.1485809682804675, "grad_norm": 0.583355724811554, "learning_rate": 7.707168894289187e-05, "loss": 1.8934, "step": 3784 }, { "epoch": 1.148884504477159, "grad_norm": 0.891815721988678, "learning_rate": 7.706561360874848e-05, "loss": 0.836, "step": 3785 }, { "epoch": 1.1491880406738504, "grad_norm": 0.5519468188285828, "learning_rate": 7.70595382746051e-05, "loss": 1.785, "step": 3786 }, { "epoch": 1.1494915768705418, "grad_norm": 0.5709235668182373, "learning_rate": 7.705346294046173e-05, "loss": 1.3448, "step": 3787 }, { "epoch": 1.1497951130672333, "grad_norm": 0.453735888004303, "learning_rate": 7.704738760631836e-05, "loss": 1.7276, "step": 3788 }, { "epoch": 1.1500986492639247, "grad_norm": 0.4751914441585541, "learning_rate": 7.704131227217497e-05, "loss": 1.7094, "step": 3789 }, { "epoch": 1.1504021854606161, "grad_norm": 0.4283442199230194, "learning_rate": 7.70352369380316e-05, "loss": 1.8033, "step": 3790 }, { "epoch": 1.1507057216573076, "grad_norm": 0.4466361701488495, "learning_rate": 7.702916160388823e-05, "loss": 0.9553, "step": 3791 }, { "epoch": 1.1510092578539992, "grad_norm": 0.4145885407924652, "learning_rate": 7.702308626974483e-05, "loss": 1.3865, "step": 3792 }, { "epoch": 1.1513127940506904, "grad_norm": 0.5452487468719482, "learning_rate": 7.701701093560146e-05, "loss": 1.7645, "step": 3793 }, { "epoch": 1.151616330247382, "grad_norm": 0.5216187834739685, "learning_rate": 7.701093560145808e-05, "loss": 1.7205, "step": 3794 }, { "epoch": 1.1519198664440735, "grad_norm": 0.4803890287876129, "learning_rate": 7.700486026731471e-05, "loss": 1.8845, "step": 3795 }, { "epoch": 1.152223402640765, "grad_norm": 0.42299672961235046, "learning_rate": 7.699878493317133e-05, "loss": 1.5909, "step": 3796 }, { "epoch": 1.1525269388374564, "grad_norm": 0.6640902161598206, "learning_rate": 7.699270959902794e-05, "loss": 1.7704, "step": 3797 }, { "epoch": 1.1528304750341478, "grad_norm": 0.4829888939857483, "learning_rate": 7.698663426488458e-05, "loss": 1.4564, "step": 3798 }, { "epoch": 1.1531340112308393, "grad_norm": 0.49383172392845154, "learning_rate": 7.698055893074119e-05, "loss": 1.5888, "step": 3799 }, { "epoch": 1.1534375474275307, "grad_norm": 0.4922170042991638, "learning_rate": 7.697448359659781e-05, "loss": 1.6228, "step": 3800 }, { "epoch": 1.1537410836242221, "grad_norm": 0.5983831286430359, "learning_rate": 7.696840826245444e-05, "loss": 1.3906, "step": 3801 }, { "epoch": 1.1540446198209136, "grad_norm": 0.4924396574497223, "learning_rate": 7.696233292831106e-05, "loss": 1.3455, "step": 3802 }, { "epoch": 1.1543481560176052, "grad_norm": 0.39909911155700684, "learning_rate": 7.695625759416768e-05, "loss": 1.1296, "step": 3803 }, { "epoch": 1.1546516922142966, "grad_norm": 0.5369203686714172, "learning_rate": 7.69501822600243e-05, "loss": 1.7645, "step": 3804 }, { "epoch": 1.154955228410988, "grad_norm": 0.5576856732368469, "learning_rate": 7.694410692588094e-05, "loss": 1.6252, "step": 3805 }, { "epoch": 1.1552587646076795, "grad_norm": 0.6163928508758545, "learning_rate": 7.693803159173754e-05, "loss": 1.1073, "step": 3806 }, { "epoch": 1.155562300804371, "grad_norm": 0.47761568427085876, "learning_rate": 7.693195625759417e-05, "loss": 1.725, "step": 3807 }, { "epoch": 1.1558658370010624, "grad_norm": 0.4886780083179474, "learning_rate": 7.692588092345079e-05, "loss": 2.0903, "step": 3808 }, { "epoch": 1.1561693731977538, "grad_norm": 0.5503537058830261, "learning_rate": 7.691980558930742e-05, "loss": 1.6063, "step": 3809 }, { "epoch": 1.1564729093944452, "grad_norm": 0.647091805934906, "learning_rate": 7.691373025516404e-05, "loss": 1.2219, "step": 3810 }, { "epoch": 1.1567764455911367, "grad_norm": 0.7522347569465637, "learning_rate": 7.690765492102065e-05, "loss": 1.5603, "step": 3811 }, { "epoch": 1.1570799817878281, "grad_norm": 0.5273557305335999, "learning_rate": 7.690157958687729e-05, "loss": 1.6698, "step": 3812 }, { "epoch": 1.1573835179845195, "grad_norm": 0.5218062996864319, "learning_rate": 7.68955042527339e-05, "loss": 1.1702, "step": 3813 }, { "epoch": 1.1576870541812112, "grad_norm": 0.4625975489616394, "learning_rate": 7.688942891859052e-05, "loss": 1.8227, "step": 3814 }, { "epoch": 1.1579905903779026, "grad_norm": 0.49970725178718567, "learning_rate": 7.688335358444715e-05, "loss": 1.6382, "step": 3815 }, { "epoch": 1.158294126574594, "grad_norm": 0.6002604365348816, "learning_rate": 7.687727825030377e-05, "loss": 1.7239, "step": 3816 }, { "epoch": 1.1585976627712855, "grad_norm": 0.6793041825294495, "learning_rate": 7.687120291616039e-05, "loss": 2.0427, "step": 3817 }, { "epoch": 1.158901198967977, "grad_norm": 0.5442394018173218, "learning_rate": 7.686512758201702e-05, "loss": 1.6008, "step": 3818 }, { "epoch": 1.1592047351646684, "grad_norm": 0.4671969413757324, "learning_rate": 7.685905224787365e-05, "loss": 1.401, "step": 3819 }, { "epoch": 1.1595082713613598, "grad_norm": 0.4723747968673706, "learning_rate": 7.685297691373025e-05, "loss": 1.5624, "step": 3820 }, { "epoch": 1.1598118075580512, "grad_norm": 0.5985869765281677, "learning_rate": 7.684690157958688e-05, "loss": 1.5069, "step": 3821 }, { "epoch": 1.1601153437547427, "grad_norm": 0.47640082240104675, "learning_rate": 7.68408262454435e-05, "loss": 1.0615, "step": 3822 }, { "epoch": 1.1604188799514341, "grad_norm": 0.4906187057495117, "learning_rate": 7.683475091130013e-05, "loss": 1.4341, "step": 3823 }, { "epoch": 1.1607224161481255, "grad_norm": 0.6372618675231934, "learning_rate": 7.682867557715675e-05, "loss": 1.2915, "step": 3824 }, { "epoch": 1.1610259523448172, "grad_norm": 0.42580631375312805, "learning_rate": 7.682260024301336e-05, "loss": 1.2624, "step": 3825 }, { "epoch": 1.1613294885415086, "grad_norm": 0.41982603073120117, "learning_rate": 7.681652490887e-05, "loss": 1.7227, "step": 3826 }, { "epoch": 1.1616330247382, "grad_norm": 0.44015559554100037, "learning_rate": 7.681044957472661e-05, "loss": 1.4198, "step": 3827 }, { "epoch": 1.1619365609348915, "grad_norm": 0.5147770047187805, "learning_rate": 7.680437424058323e-05, "loss": 1.3839, "step": 3828 }, { "epoch": 1.162240097131583, "grad_norm": 0.5992082953453064, "learning_rate": 7.679829890643986e-05, "loss": 1.6994, "step": 3829 }, { "epoch": 1.1625436333282744, "grad_norm": 0.5068255662918091, "learning_rate": 7.679222357229648e-05, "loss": 1.6389, "step": 3830 }, { "epoch": 1.1628471695249658, "grad_norm": 0.5025370717048645, "learning_rate": 7.67861482381531e-05, "loss": 1.4422, "step": 3831 }, { "epoch": 1.1631507057216572, "grad_norm": 0.5961645841598511, "learning_rate": 7.678007290400973e-05, "loss": 1.3941, "step": 3832 }, { "epoch": 1.1634542419183487, "grad_norm": 0.5184653997421265, "learning_rate": 7.677399756986636e-05, "loss": 1.7886, "step": 3833 }, { "epoch": 1.1637577781150403, "grad_norm": 0.4389922022819519, "learning_rate": 7.676792223572296e-05, "loss": 1.7861, "step": 3834 }, { "epoch": 1.1640613143117318, "grad_norm": 0.5118327140808105, "learning_rate": 7.676184690157959e-05, "loss": 1.7161, "step": 3835 }, { "epoch": 1.1643648505084232, "grad_norm": 0.5762491226196289, "learning_rate": 7.675577156743621e-05, "loss": 1.7012, "step": 3836 }, { "epoch": 1.1646683867051146, "grad_norm": 0.480589359998703, "learning_rate": 7.674969623329284e-05, "loss": 1.6764, "step": 3837 }, { "epoch": 1.164971922901806, "grad_norm": 0.45624813437461853, "learning_rate": 7.674362089914946e-05, "loss": 1.5741, "step": 3838 }, { "epoch": 1.1652754590984975, "grad_norm": 0.5073031783103943, "learning_rate": 7.673754556500608e-05, "loss": 1.263, "step": 3839 }, { "epoch": 1.165578995295189, "grad_norm": 0.44414857029914856, "learning_rate": 7.67314702308627e-05, "loss": 1.0236, "step": 3840 }, { "epoch": 1.1658825314918804, "grad_norm": 0.5479186177253723, "learning_rate": 7.672539489671932e-05, "loss": 1.9612, "step": 3841 }, { "epoch": 1.1661860676885718, "grad_norm": 0.4903987944126129, "learning_rate": 7.671931956257594e-05, "loss": 1.6913, "step": 3842 }, { "epoch": 1.1664896038852632, "grad_norm": 0.5010794401168823, "learning_rate": 7.671324422843257e-05, "loss": 1.8156, "step": 3843 }, { "epoch": 1.1667931400819547, "grad_norm": 0.4327058792114258, "learning_rate": 7.670716889428919e-05, "loss": 1.557, "step": 3844 }, { "epoch": 1.1670966762786463, "grad_norm": 0.6099236011505127, "learning_rate": 7.67010935601458e-05, "loss": 1.7191, "step": 3845 }, { "epoch": 1.1674002124753378, "grad_norm": 0.7435611486434937, "learning_rate": 7.669501822600244e-05, "loss": 1.8566, "step": 3846 }, { "epoch": 1.1677037486720292, "grad_norm": 0.6030800938606262, "learning_rate": 7.668894289185907e-05, "loss": 1.3435, "step": 3847 }, { "epoch": 1.1680072848687206, "grad_norm": 0.4840324819087982, "learning_rate": 7.668286755771567e-05, "loss": 1.5476, "step": 3848 }, { "epoch": 1.168310821065412, "grad_norm": 0.686964213848114, "learning_rate": 7.66767922235723e-05, "loss": 1.5845, "step": 3849 }, { "epoch": 1.1686143572621035, "grad_norm": 0.4797843396663666, "learning_rate": 7.667071688942892e-05, "loss": 1.7808, "step": 3850 }, { "epoch": 1.168917893458795, "grad_norm": 0.5187574028968811, "learning_rate": 7.666464155528554e-05, "loss": 1.4564, "step": 3851 }, { "epoch": 1.1692214296554864, "grad_norm": 0.42190396785736084, "learning_rate": 7.665856622114217e-05, "loss": 1.4857, "step": 3852 }, { "epoch": 1.1695249658521778, "grad_norm": 0.4939133822917938, "learning_rate": 7.665249088699879e-05, "loss": 1.4132, "step": 3853 }, { "epoch": 1.1698285020488692, "grad_norm": 0.4695587456226349, "learning_rate": 7.664641555285542e-05, "loss": 1.577, "step": 3854 }, { "epoch": 1.1701320382455607, "grad_norm": 0.5055351257324219, "learning_rate": 7.664034021871203e-05, "loss": 1.6084, "step": 3855 }, { "epoch": 1.1704355744422523, "grad_norm": 0.4340987503528595, "learning_rate": 7.663426488456865e-05, "loss": 1.8373, "step": 3856 }, { "epoch": 1.1707391106389438, "grad_norm": 0.5082830190658569, "learning_rate": 7.662818955042528e-05, "loss": 1.9309, "step": 3857 }, { "epoch": 1.1710426468356352, "grad_norm": 0.5326313972473145, "learning_rate": 7.66221142162819e-05, "loss": 1.5347, "step": 3858 }, { "epoch": 1.1713461830323266, "grad_norm": 0.6817587018013, "learning_rate": 7.661603888213852e-05, "loss": 1.9486, "step": 3859 }, { "epoch": 1.171649719229018, "grad_norm": 0.5530791282653809, "learning_rate": 7.660996354799515e-05, "loss": 1.5328, "step": 3860 }, { "epoch": 1.1719532554257095, "grad_norm": 0.4731312692165375, "learning_rate": 7.660388821385178e-05, "loss": 1.7807, "step": 3861 }, { "epoch": 1.172256791622401, "grad_norm": 0.46114182472229004, "learning_rate": 7.659781287970838e-05, "loss": 1.5321, "step": 3862 }, { "epoch": 1.1725603278190924, "grad_norm": 0.4836636185646057, "learning_rate": 7.659173754556501e-05, "loss": 1.8332, "step": 3863 }, { "epoch": 1.1728638640157838, "grad_norm": 0.44714653491973877, "learning_rate": 7.658566221142163e-05, "loss": 1.6486, "step": 3864 }, { "epoch": 1.1731674002124755, "grad_norm": 0.5285139083862305, "learning_rate": 7.657958687727825e-05, "loss": 1.6531, "step": 3865 }, { "epoch": 1.1734709364091669, "grad_norm": 0.44782644510269165, "learning_rate": 7.657351154313488e-05, "loss": 1.8439, "step": 3866 }, { "epoch": 1.1737744726058583, "grad_norm": 0.4893675148487091, "learning_rate": 7.65674362089915e-05, "loss": 1.1381, "step": 3867 }, { "epoch": 1.1740780088025498, "grad_norm": 0.619848370552063, "learning_rate": 7.656136087484813e-05, "loss": 1.4307, "step": 3868 }, { "epoch": 1.1743815449992412, "grad_norm": 0.5027971267700195, "learning_rate": 7.655528554070474e-05, "loss": 1.5816, "step": 3869 }, { "epoch": 1.1746850811959326, "grad_norm": 0.5814145803451538, "learning_rate": 7.654921020656136e-05, "loss": 2.0414, "step": 3870 }, { "epoch": 1.174988617392624, "grad_norm": 0.5027217268943787, "learning_rate": 7.654313487241799e-05, "loss": 1.6058, "step": 3871 }, { "epoch": 1.1752921535893155, "grad_norm": 0.5492193102836609, "learning_rate": 7.653705953827461e-05, "loss": 1.6025, "step": 3872 }, { "epoch": 1.175595689786007, "grad_norm": 0.5875594615936279, "learning_rate": 7.653098420413123e-05, "loss": 1.6729, "step": 3873 }, { "epoch": 1.1758992259826984, "grad_norm": 0.46128249168395996, "learning_rate": 7.652490886998786e-05, "loss": 1.7184, "step": 3874 }, { "epoch": 1.1762027621793898, "grad_norm": 0.4899282157421112, "learning_rate": 7.651883353584447e-05, "loss": 1.7772, "step": 3875 }, { "epoch": 1.1765062983760814, "grad_norm": 0.4934176206588745, "learning_rate": 7.651275820170109e-05, "loss": 1.5329, "step": 3876 }, { "epoch": 1.1768098345727729, "grad_norm": 0.41739147901535034, "learning_rate": 7.650668286755772e-05, "loss": 1.0405, "step": 3877 }, { "epoch": 1.1771133707694643, "grad_norm": 0.5608689785003662, "learning_rate": 7.650060753341434e-05, "loss": 1.459, "step": 3878 }, { "epoch": 1.1774169069661558, "grad_norm": 5.719343185424805, "learning_rate": 7.649453219927096e-05, "loss": 1.4504, "step": 3879 }, { "epoch": 1.1777204431628472, "grad_norm": 0.5679183006286621, "learning_rate": 7.648845686512759e-05, "loss": 1.699, "step": 3880 }, { "epoch": 1.1780239793595386, "grad_norm": 0.5237777233123779, "learning_rate": 7.64823815309842e-05, "loss": 1.7255, "step": 3881 }, { "epoch": 1.17832751555623, "grad_norm": 0.5510279536247253, "learning_rate": 7.647630619684084e-05, "loss": 1.8984, "step": 3882 }, { "epoch": 1.1786310517529215, "grad_norm": 0.4513683021068573, "learning_rate": 7.647023086269745e-05, "loss": 1.8629, "step": 3883 }, { "epoch": 1.178934587949613, "grad_norm": 0.5571762919425964, "learning_rate": 7.646415552855407e-05, "loss": 1.4258, "step": 3884 }, { "epoch": 1.1792381241463044, "grad_norm": 0.5506730675697327, "learning_rate": 7.64580801944107e-05, "loss": 1.5378, "step": 3885 }, { "epoch": 1.1795416603429958, "grad_norm": 0.4214894771575928, "learning_rate": 7.645200486026732e-05, "loss": 1.4376, "step": 3886 }, { "epoch": 1.1798451965396874, "grad_norm": 0.5280786752700806, "learning_rate": 7.644592952612394e-05, "loss": 1.6226, "step": 3887 }, { "epoch": 1.1801487327363789, "grad_norm": 2.3207452297210693, "learning_rate": 7.643985419198057e-05, "loss": 1.3407, "step": 3888 }, { "epoch": 1.1804522689330703, "grad_norm": 0.5287031531333923, "learning_rate": 7.643377885783718e-05, "loss": 1.7205, "step": 3889 }, { "epoch": 1.1807558051297617, "grad_norm": 0.5691362619400024, "learning_rate": 7.64277035236938e-05, "loss": 1.707, "step": 3890 }, { "epoch": 1.1810593413264532, "grad_norm": 0.5688780546188354, "learning_rate": 7.642162818955043e-05, "loss": 1.6914, "step": 3891 }, { "epoch": 1.1813628775231446, "grad_norm": 0.6007869839668274, "learning_rate": 7.641555285540705e-05, "loss": 1.6556, "step": 3892 }, { "epoch": 1.181666413719836, "grad_norm": 0.8336607217788696, "learning_rate": 7.640947752126367e-05, "loss": 1.6392, "step": 3893 }, { "epoch": 1.1819699499165275, "grad_norm": 0.5636674761772156, "learning_rate": 7.64034021871203e-05, "loss": 1.602, "step": 3894 }, { "epoch": 1.182273486113219, "grad_norm": 0.47849076986312866, "learning_rate": 7.639732685297692e-05, "loss": 1.6095, "step": 3895 }, { "epoch": 1.1825770223099106, "grad_norm": 0.4776079058647156, "learning_rate": 7.639125151883355e-05, "loss": 1.5303, "step": 3896 }, { "epoch": 1.182880558506602, "grad_norm": 0.5701802968978882, "learning_rate": 7.638517618469016e-05, "loss": 1.4568, "step": 3897 }, { "epoch": 1.1831840947032934, "grad_norm": 0.4271094799041748, "learning_rate": 7.637910085054678e-05, "loss": 1.7324, "step": 3898 }, { "epoch": 1.1834876308999849, "grad_norm": 0.5306187272071838, "learning_rate": 7.637302551640341e-05, "loss": 1.9042, "step": 3899 }, { "epoch": 1.1837911670966763, "grad_norm": 0.5607674717903137, "learning_rate": 7.636695018226003e-05, "loss": 1.5753, "step": 3900 }, { "epoch": 1.1840947032933677, "grad_norm": 0.520979106426239, "learning_rate": 7.636087484811665e-05, "loss": 1.773, "step": 3901 }, { "epoch": 1.1843982394900592, "grad_norm": 0.5560202598571777, "learning_rate": 7.635479951397328e-05, "loss": 1.685, "step": 3902 }, { "epoch": 1.1847017756867506, "grad_norm": 0.4960952401161194, "learning_rate": 7.63487241798299e-05, "loss": 1.4656, "step": 3903 }, { "epoch": 1.185005311883442, "grad_norm": 0.5220305323600769, "learning_rate": 7.634264884568651e-05, "loss": 1.7571, "step": 3904 }, { "epoch": 1.1853088480801335, "grad_norm": 0.5441679954528809, "learning_rate": 7.633657351154314e-05, "loss": 1.7369, "step": 3905 }, { "epoch": 1.185612384276825, "grad_norm": 0.524355411529541, "learning_rate": 7.633049817739976e-05, "loss": 1.554, "step": 3906 }, { "epoch": 1.1859159204735166, "grad_norm": 0.580812931060791, "learning_rate": 7.632442284325638e-05, "loss": 1.1878, "step": 3907 }, { "epoch": 1.186219456670208, "grad_norm": 0.4383397400379181, "learning_rate": 7.631834750911301e-05, "loss": 1.3957, "step": 3908 }, { "epoch": 1.1865229928668994, "grad_norm": 0.5575391054153442, "learning_rate": 7.631227217496963e-05, "loss": 1.7958, "step": 3909 }, { "epoch": 1.1868265290635909, "grad_norm": 0.6178303956985474, "learning_rate": 7.630619684082626e-05, "loss": 1.8002, "step": 3910 }, { "epoch": 1.1871300652602823, "grad_norm": 0.7053147554397583, "learning_rate": 7.630012150668287e-05, "loss": 1.0536, "step": 3911 }, { "epoch": 1.1874336014569737, "grad_norm": 1.381752848625183, "learning_rate": 7.629404617253949e-05, "loss": 1.3821, "step": 3912 }, { "epoch": 1.1877371376536652, "grad_norm": 1.020168662071228, "learning_rate": 7.628797083839612e-05, "loss": 1.3616, "step": 3913 }, { "epoch": 1.1880406738503566, "grad_norm": 0.5046608448028564, "learning_rate": 7.628189550425274e-05, "loss": 1.7283, "step": 3914 }, { "epoch": 1.188344210047048, "grad_norm": 0.4386448562145233, "learning_rate": 7.627582017010936e-05, "loss": 1.7318, "step": 3915 }, { "epoch": 1.1886477462437395, "grad_norm": 0.46474358439445496, "learning_rate": 7.626974483596599e-05, "loss": 1.8927, "step": 3916 }, { "epoch": 1.188951282440431, "grad_norm": 0.5807692408561707, "learning_rate": 7.62636695018226e-05, "loss": 1.6145, "step": 3917 }, { "epoch": 1.1892548186371226, "grad_norm": 0.4750295579433441, "learning_rate": 7.625759416767922e-05, "loss": 1.4997, "step": 3918 }, { "epoch": 1.189558354833814, "grad_norm": 0.5042990446090698, "learning_rate": 7.625151883353585e-05, "loss": 1.8107, "step": 3919 }, { "epoch": 1.1898618910305054, "grad_norm": 0.48407718539237976, "learning_rate": 7.624544349939247e-05, "loss": 1.4194, "step": 3920 }, { "epoch": 1.1901654272271969, "grad_norm": 0.5130017399787903, "learning_rate": 7.623936816524909e-05, "loss": 1.4494, "step": 3921 }, { "epoch": 1.1904689634238883, "grad_norm": 0.46143487095832825, "learning_rate": 7.623329283110572e-05, "loss": 1.5981, "step": 3922 }, { "epoch": 1.1907724996205797, "grad_norm": 0.8400600552558899, "learning_rate": 7.622721749696234e-05, "loss": 1.2068, "step": 3923 }, { "epoch": 1.1910760358172712, "grad_norm": 0.5218877792358398, "learning_rate": 7.622114216281895e-05, "loss": 1.6137, "step": 3924 }, { "epoch": 1.1913795720139626, "grad_norm": 0.4495093822479248, "learning_rate": 7.621506682867558e-05, "loss": 1.6691, "step": 3925 }, { "epoch": 1.191683108210654, "grad_norm": 0.49023687839508057, "learning_rate": 7.62089914945322e-05, "loss": 1.6207, "step": 3926 }, { "epoch": 1.1919866444073457, "grad_norm": 0.5561721324920654, "learning_rate": 7.620291616038883e-05, "loss": 1.4456, "step": 3927 }, { "epoch": 1.192290180604037, "grad_norm": 0.5107851028442383, "learning_rate": 7.619684082624545e-05, "loss": 1.78, "step": 3928 }, { "epoch": 1.1925937168007286, "grad_norm": 0.5281449556350708, "learning_rate": 7.619076549210207e-05, "loss": 1.9445, "step": 3929 }, { "epoch": 1.19289725299742, "grad_norm": 0.5192548036575317, "learning_rate": 7.61846901579587e-05, "loss": 1.6706, "step": 3930 }, { "epoch": 1.1932007891941114, "grad_norm": 0.5205463171005249, "learning_rate": 7.617861482381531e-05, "loss": 1.6415, "step": 3931 }, { "epoch": 1.1935043253908029, "grad_norm": 0.4953666925430298, "learning_rate": 7.617253948967193e-05, "loss": 1.6263, "step": 3932 }, { "epoch": 1.1938078615874943, "grad_norm": 0.49807044863700867, "learning_rate": 7.616646415552856e-05, "loss": 1.6871, "step": 3933 }, { "epoch": 1.1941113977841857, "grad_norm": 0.8351933360099792, "learning_rate": 7.616038882138518e-05, "loss": 1.0512, "step": 3934 }, { "epoch": 1.1944149339808772, "grad_norm": 0.4444892406463623, "learning_rate": 7.61543134872418e-05, "loss": 1.6981, "step": 3935 }, { "epoch": 1.1947184701775686, "grad_norm": 0.46068355441093445, "learning_rate": 7.614823815309843e-05, "loss": 1.8101, "step": 3936 }, { "epoch": 1.19502200637426, "grad_norm": 0.6468572616577148, "learning_rate": 7.614216281895505e-05, "loss": 1.411, "step": 3937 }, { "epoch": 1.1953255425709517, "grad_norm": 0.5605432391166687, "learning_rate": 7.613608748481166e-05, "loss": 1.1574, "step": 3938 }, { "epoch": 1.1956290787676431, "grad_norm": 0.4770459532737732, "learning_rate": 7.61300121506683e-05, "loss": 1.7571, "step": 3939 }, { "epoch": 1.1959326149643346, "grad_norm": 0.5230698585510254, "learning_rate": 7.612393681652491e-05, "loss": 1.6551, "step": 3940 }, { "epoch": 1.196236151161026, "grad_norm": 0.7350290417671204, "learning_rate": 7.611786148238154e-05, "loss": 1.6698, "step": 3941 }, { "epoch": 1.1965396873577174, "grad_norm": 0.5905072689056396, "learning_rate": 7.611178614823816e-05, "loss": 1.7354, "step": 3942 }, { "epoch": 1.1968432235544089, "grad_norm": 0.5296047329902649, "learning_rate": 7.610571081409478e-05, "loss": 1.7228, "step": 3943 }, { "epoch": 1.1971467597511003, "grad_norm": 0.49780750274658203, "learning_rate": 7.609963547995141e-05, "loss": 1.2224, "step": 3944 }, { "epoch": 1.1974502959477917, "grad_norm": 0.4543820917606354, "learning_rate": 7.609356014580802e-05, "loss": 1.6028, "step": 3945 }, { "epoch": 1.1977538321444832, "grad_norm": 0.4909208118915558, "learning_rate": 7.608748481166464e-05, "loss": 1.9409, "step": 3946 }, { "epoch": 1.1980573683411746, "grad_norm": 0.47982802987098694, "learning_rate": 7.608140947752127e-05, "loss": 1.8759, "step": 3947 }, { "epoch": 1.198360904537866, "grad_norm": 1.0359922647476196, "learning_rate": 7.607533414337789e-05, "loss": 1.2115, "step": 3948 }, { "epoch": 1.1986644407345577, "grad_norm": 0.5492017865180969, "learning_rate": 7.606925880923451e-05, "loss": 1.5465, "step": 3949 }, { "epoch": 1.1989679769312491, "grad_norm": 0.3987594544887543, "learning_rate": 7.606318347509114e-05, "loss": 1.6226, "step": 3950 }, { "epoch": 1.1992715131279406, "grad_norm": 0.4642569422721863, "learning_rate": 7.605710814094776e-05, "loss": 1.9385, "step": 3951 }, { "epoch": 1.199575049324632, "grad_norm": 0.4473128914833069, "learning_rate": 7.605103280680437e-05, "loss": 1.7057, "step": 3952 }, { "epoch": 1.1998785855213234, "grad_norm": 0.4777715504169464, "learning_rate": 7.6044957472661e-05, "loss": 1.3958, "step": 3953 }, { "epoch": 1.2001821217180149, "grad_norm": 0.553566575050354, "learning_rate": 7.603888213851762e-05, "loss": 1.649, "step": 3954 }, { "epoch": 1.2004856579147063, "grad_norm": 0.4801444411277771, "learning_rate": 7.603280680437425e-05, "loss": 1.7801, "step": 3955 }, { "epoch": 1.2007891941113977, "grad_norm": 0.4610240161418915, "learning_rate": 7.602673147023086e-05, "loss": 1.9021, "step": 3956 }, { "epoch": 1.2010927303080892, "grad_norm": 0.48058998584747314, "learning_rate": 7.602065613608749e-05, "loss": 1.0347, "step": 3957 }, { "epoch": 1.2013962665047808, "grad_norm": 0.5109126567840576, "learning_rate": 7.601458080194412e-05, "loss": 1.6481, "step": 3958 }, { "epoch": 1.201699802701472, "grad_norm": 0.5463404655456543, "learning_rate": 7.600850546780073e-05, "loss": 1.4141, "step": 3959 }, { "epoch": 1.2020033388981637, "grad_norm": 0.46814149618148804, "learning_rate": 7.600243013365735e-05, "loss": 1.7893, "step": 3960 }, { "epoch": 1.2023068750948551, "grad_norm": 0.5103051066398621, "learning_rate": 7.599635479951398e-05, "loss": 1.533, "step": 3961 }, { "epoch": 1.2026104112915466, "grad_norm": 0.4707978665828705, "learning_rate": 7.59902794653706e-05, "loss": 1.3515, "step": 3962 }, { "epoch": 1.202913947488238, "grad_norm": 1.0241955518722534, "learning_rate": 7.598420413122722e-05, "loss": 1.4342, "step": 3963 }, { "epoch": 1.2032174836849294, "grad_norm": 0.4893020689487457, "learning_rate": 7.597812879708385e-05, "loss": 1.8932, "step": 3964 }, { "epoch": 1.2035210198816209, "grad_norm": 0.47995486855506897, "learning_rate": 7.597205346294047e-05, "loss": 1.5502, "step": 3965 }, { "epoch": 1.2038245560783123, "grad_norm": 0.5193171501159668, "learning_rate": 7.596597812879708e-05, "loss": 1.6352, "step": 3966 }, { "epoch": 1.2041280922750037, "grad_norm": 0.5245213508605957, "learning_rate": 7.595990279465371e-05, "loss": 1.4066, "step": 3967 }, { "epoch": 1.2044316284716952, "grad_norm": 0.578769326210022, "learning_rate": 7.595382746051033e-05, "loss": 1.7556, "step": 3968 }, { "epoch": 1.2047351646683868, "grad_norm": 0.4418366253376007, "learning_rate": 7.594775212636696e-05, "loss": 0.9755, "step": 3969 }, { "epoch": 1.2050387008650782, "grad_norm": 0.5069161653518677, "learning_rate": 7.594167679222357e-05, "loss": 1.7738, "step": 3970 }, { "epoch": 1.2053422370617697, "grad_norm": 0.6108199954032898, "learning_rate": 7.59356014580802e-05, "loss": 1.6229, "step": 3971 }, { "epoch": 1.2056457732584611, "grad_norm": 0.4657975137233734, "learning_rate": 7.592952612393683e-05, "loss": 1.7213, "step": 3972 }, { "epoch": 1.2059493094551526, "grad_norm": 0.4727039337158203, "learning_rate": 7.592345078979343e-05, "loss": 1.6633, "step": 3973 }, { "epoch": 1.206252845651844, "grad_norm": 0.48204702138900757, "learning_rate": 7.591737545565006e-05, "loss": 1.7443, "step": 3974 }, { "epoch": 1.2065563818485354, "grad_norm": 0.4971252977848053, "learning_rate": 7.591130012150669e-05, "loss": 1.8952, "step": 3975 }, { "epoch": 1.2068599180452269, "grad_norm": 0.5344823002815247, "learning_rate": 7.590522478736331e-05, "loss": 1.402, "step": 3976 }, { "epoch": 1.2071634542419183, "grad_norm": 0.5859917402267456, "learning_rate": 7.589914945321993e-05, "loss": 1.8069, "step": 3977 }, { "epoch": 1.2074669904386097, "grad_norm": 0.5768531560897827, "learning_rate": 7.589307411907656e-05, "loss": 1.4101, "step": 3978 }, { "epoch": 1.2077705266353012, "grad_norm": 0.4760257303714752, "learning_rate": 7.588699878493318e-05, "loss": 1.6148, "step": 3979 }, { "epoch": 1.2080740628319928, "grad_norm": 0.5474233031272888, "learning_rate": 7.588092345078979e-05, "loss": 1.4744, "step": 3980 }, { "epoch": 1.2083775990286842, "grad_norm": 1.7555142641067505, "learning_rate": 7.587484811664642e-05, "loss": 1.6162, "step": 3981 }, { "epoch": 1.2086811352253757, "grad_norm": 0.4019928574562073, "learning_rate": 7.586877278250304e-05, "loss": 1.1217, "step": 3982 }, { "epoch": 1.2089846714220671, "grad_norm": 0.5030451416969299, "learning_rate": 7.586269744835967e-05, "loss": 1.7434, "step": 3983 }, { "epoch": 1.2092882076187585, "grad_norm": 0.4046245813369751, "learning_rate": 7.585662211421628e-05, "loss": 1.3424, "step": 3984 }, { "epoch": 1.20959174381545, "grad_norm": 0.5747511386871338, "learning_rate": 7.58505467800729e-05, "loss": 1.7236, "step": 3985 }, { "epoch": 1.2098952800121414, "grad_norm": 0.505330502986908, "learning_rate": 7.584447144592954e-05, "loss": 1.1954, "step": 3986 }, { "epoch": 1.2101988162088329, "grad_norm": 0.4753364026546478, "learning_rate": 7.583839611178614e-05, "loss": 1.6601, "step": 3987 }, { "epoch": 1.2105023524055243, "grad_norm": 0.45532703399658203, "learning_rate": 7.583232077764277e-05, "loss": 1.6707, "step": 3988 }, { "epoch": 1.2108058886022157, "grad_norm": 0.5530490875244141, "learning_rate": 7.58262454434994e-05, "loss": 1.5897, "step": 3989 }, { "epoch": 1.2111094247989072, "grad_norm": 0.47323623299598694, "learning_rate": 7.582017010935602e-05, "loss": 1.7492, "step": 3990 }, { "epoch": 1.2114129609955988, "grad_norm": 0.5553590655326843, "learning_rate": 7.581409477521264e-05, "loss": 1.4958, "step": 3991 }, { "epoch": 1.2117164971922902, "grad_norm": 0.4471113979816437, "learning_rate": 7.580801944106927e-05, "loss": 1.8111, "step": 3992 }, { "epoch": 1.2120200333889817, "grad_norm": 0.5062560439109802, "learning_rate": 7.580194410692589e-05, "loss": 1.6767, "step": 3993 }, { "epoch": 1.212323569585673, "grad_norm": 0.5048001408576965, "learning_rate": 7.57958687727825e-05, "loss": 1.9827, "step": 3994 }, { "epoch": 1.2126271057823645, "grad_norm": 0.42375367879867554, "learning_rate": 7.578979343863913e-05, "loss": 1.2515, "step": 3995 }, { "epoch": 1.212930641979056, "grad_norm": 0.5349414348602295, "learning_rate": 7.578371810449575e-05, "loss": 1.7492, "step": 3996 }, { "epoch": 1.2132341781757474, "grad_norm": 0.45927226543426514, "learning_rate": 7.577764277035237e-05, "loss": 1.5423, "step": 3997 }, { "epoch": 1.2135377143724388, "grad_norm": 0.6156039237976074, "learning_rate": 7.577156743620899e-05, "loss": 1.3404, "step": 3998 }, { "epoch": 1.2138412505691303, "grad_norm": 1.3208363056182861, "learning_rate": 7.576549210206562e-05, "loss": 1.2658, "step": 3999 }, { "epoch": 1.214144786765822, "grad_norm": 0.5266988277435303, "learning_rate": 7.575941676792225e-05, "loss": 1.6878, "step": 4000 } ], "logging_steps": 1, "max_steps": 16470, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.601599129048875e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }