{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.33224431031618584, "eval_steps": 500, "global_step": 102000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0003257297159962606, "grad_norm": 2.2308592796325684, "learning_rate": 4.99853416853153e-05, "loss": 1.4483, "step": 100 }, { "epoch": 0.0006514594319925212, "grad_norm": 2.3997225761413574, "learning_rate": 4.996905466899897e-05, "loss": 1.3276, "step": 200 }, { "epoch": 0.0009771891479887819, "grad_norm": 1.4687339067459106, "learning_rate": 4.995276765268264e-05, "loss": 1.3394, "step": 300 }, { "epoch": 0.0013029188639850425, "grad_norm": 0.6583470702171326, "learning_rate": 4.993648063636631e-05, "loss": 1.3245, "step": 400 }, { "epoch": 0.0016286485799813031, "grad_norm": 1.6252340078353882, "learning_rate": 4.992019362004997e-05, "loss": 1.3249, "step": 500 }, { "epoch": 0.0019543782959775637, "grad_norm": 2.0806777477264404, "learning_rate": 4.9903906603733634e-05, "loss": 1.32, "step": 600 }, { "epoch": 0.002280108011973824, "grad_norm": 1.376539707183838, "learning_rate": 4.988761958741731e-05, "loss": 1.3133, "step": 700 }, { "epoch": 0.002605837727970085, "grad_norm": 2.234644889831543, "learning_rate": 4.987133257110097e-05, "loss": 1.3179, "step": 800 }, { "epoch": 0.0029315674439663454, "grad_norm": 1.4599684476852417, "learning_rate": 4.985504555478464e-05, "loss": 1.3097, "step": 900 }, { "epoch": 0.0032572971599626062, "grad_norm": 1.7078094482421875, "learning_rate": 4.9838758538468304e-05, "loss": 1.3083, "step": 1000 }, { "epoch": 0.0035830268759588666, "grad_norm": 0.6953567266464233, "learning_rate": 4.9822471522151976e-05, "loss": 1.3075, "step": 1100 }, { "epoch": 0.0039087565919551275, "grad_norm": 1.225602626800537, "learning_rate": 4.980618450583564e-05, "loss": 1.3054, "step": 1200 }, { "epoch": 0.004234486307951388, "grad_norm": 1.3010519742965698, "learning_rate": 4.978989748951931e-05, "loss": 1.3066, "step": 1300 }, { "epoch": 0.004560216023947648, "grad_norm": 0.6475724577903748, "learning_rate": 4.9773610473202974e-05, "loss": 1.3109, "step": 1400 }, { "epoch": 0.004885945739943909, "grad_norm": 1.046614646911621, "learning_rate": 4.975732345688664e-05, "loss": 1.3074, "step": 1500 }, { "epoch": 0.00521167545594017, "grad_norm": 1.113573670387268, "learning_rate": 4.974103644057031e-05, "loss": 1.3083, "step": 1600 }, { "epoch": 0.005537405171936431, "grad_norm": 1.4273550510406494, "learning_rate": 4.972474942425398e-05, "loss": 1.3018, "step": 1700 }, { "epoch": 0.005863134887932691, "grad_norm": 0.5519908666610718, "learning_rate": 4.970846240793764e-05, "loss": 1.2945, "step": 1800 }, { "epoch": 0.006188864603928952, "grad_norm": 0.6653416156768799, "learning_rate": 4.969217539162131e-05, "loss": 1.3004, "step": 1900 }, { "epoch": 0.0065145943199252125, "grad_norm": 0.732170581817627, "learning_rate": 4.9675888375304975e-05, "loss": 1.3014, "step": 2000 }, { "epoch": 0.006840324035921473, "grad_norm": 0.405608594417572, "learning_rate": 4.965960135898865e-05, "loss": 1.2939, "step": 2100 }, { "epoch": 0.007166053751917733, "grad_norm": 0.9849847555160522, "learning_rate": 4.9643314342672306e-05, "loss": 1.2922, "step": 2200 }, { "epoch": 0.007491783467913994, "grad_norm": 0.7152832746505737, "learning_rate": 4.962702732635598e-05, "loss": 1.2905, "step": 2300 }, { "epoch": 0.007817513183910255, "grad_norm": 1.1164734363555908, "learning_rate": 4.9610740310039644e-05, "loss": 1.3024, "step": 2400 }, { "epoch": 0.008143242899906516, "grad_norm": 0.574243426322937, "learning_rate": 4.959445329372332e-05, "loss": 1.2944, "step": 2500 }, { "epoch": 0.008468972615902777, "grad_norm": 0.6976324319839478, "learning_rate": 4.9578166277406976e-05, "loss": 1.2939, "step": 2600 }, { "epoch": 0.008794702331899037, "grad_norm": 0.4648737609386444, "learning_rate": 4.956187926109064e-05, "loss": 1.2841, "step": 2700 }, { "epoch": 0.009120432047895297, "grad_norm": 1.189271092414856, "learning_rate": 4.9545592244774314e-05, "loss": 1.294, "step": 2800 }, { "epoch": 0.009446161763891557, "grad_norm": 0.6437670588493347, "learning_rate": 4.952930522845798e-05, "loss": 1.2882, "step": 2900 }, { "epoch": 0.009771891479887818, "grad_norm": 1.591304898262024, "learning_rate": 4.9513018212141646e-05, "loss": 1.2805, "step": 3000 }, { "epoch": 0.010097621195884079, "grad_norm": 0.2836475670337677, "learning_rate": 4.949673119582531e-05, "loss": 1.2802, "step": 3100 }, { "epoch": 0.01042335091188034, "grad_norm": 1.304417610168457, "learning_rate": 4.9480444179508984e-05, "loss": 1.2833, "step": 3200 }, { "epoch": 0.0107490806278766, "grad_norm": 0.27579864859580994, "learning_rate": 4.946415716319265e-05, "loss": 1.2852, "step": 3300 }, { "epoch": 0.011074810343872862, "grad_norm": 1.1080585718154907, "learning_rate": 4.9447870146876315e-05, "loss": 1.289, "step": 3400 }, { "epoch": 0.011400540059869122, "grad_norm": 0.2783690392971039, "learning_rate": 4.943158313055998e-05, "loss": 1.2885, "step": 3500 }, { "epoch": 0.011726269775865382, "grad_norm": 0.6603112816810608, "learning_rate": 4.941529611424365e-05, "loss": 1.2882, "step": 3600 }, { "epoch": 0.012051999491861642, "grad_norm": 0.9498095512390137, "learning_rate": 4.939900909792732e-05, "loss": 1.2835, "step": 3700 }, { "epoch": 0.012377729207857903, "grad_norm": 0.5274548530578613, "learning_rate": 4.9382722081610985e-05, "loss": 1.279, "step": 3800 }, { "epoch": 0.012703458923854164, "grad_norm": 0.5299821496009827, "learning_rate": 4.936643506529465e-05, "loss": 1.2879, "step": 3900 }, { "epoch": 0.013029188639850425, "grad_norm": 1.0898863077163696, "learning_rate": 4.9350148048978316e-05, "loss": 1.2913, "step": 4000 }, { "epoch": 0.013354918355846686, "grad_norm": 0.6892501711845398, "learning_rate": 4.933386103266198e-05, "loss": 1.2835, "step": 4100 }, { "epoch": 0.013680648071842947, "grad_norm": 0.9103847146034241, "learning_rate": 4.9317574016345655e-05, "loss": 1.2876, "step": 4200 }, { "epoch": 0.014006377787839207, "grad_norm": 0.8750960826873779, "learning_rate": 4.9301287000029314e-05, "loss": 1.2761, "step": 4300 }, { "epoch": 0.014332107503835467, "grad_norm": 1.7296843528747559, "learning_rate": 4.9284999983712986e-05, "loss": 1.2825, "step": 4400 }, { "epoch": 0.014657837219831727, "grad_norm": 0.7019387483596802, "learning_rate": 4.926871296739665e-05, "loss": 1.2774, "step": 4500 }, { "epoch": 0.014983566935827988, "grad_norm": 0.9353660345077515, "learning_rate": 4.9252425951080324e-05, "loss": 1.2701, "step": 4600 }, { "epoch": 0.015309296651824249, "grad_norm": 0.7081932425498962, "learning_rate": 4.923613893476399e-05, "loss": 1.276, "step": 4700 }, { "epoch": 0.01563502636782051, "grad_norm": 0.8366962671279907, "learning_rate": 4.9219851918447656e-05, "loss": 1.2767, "step": 4800 }, { "epoch": 0.01596075608381677, "grad_norm": 1.765871286392212, "learning_rate": 4.920356490213132e-05, "loss": 1.2617, "step": 4900 }, { "epoch": 0.01628648579981303, "grad_norm": 0.2926379442214966, "learning_rate": 4.918727788581499e-05, "loss": 1.2762, "step": 5000 }, { "epoch": 0.01661221551580929, "grad_norm": 1.1176525354385376, "learning_rate": 4.917099086949866e-05, "loss": 1.2647, "step": 5100 }, { "epoch": 0.016937945231805553, "grad_norm": 0.384264200925827, "learning_rate": 4.915470385318232e-05, "loss": 1.2628, "step": 5200 }, { "epoch": 0.017263674947801812, "grad_norm": 1.5339140892028809, "learning_rate": 4.913841683686599e-05, "loss": 1.2692, "step": 5300 }, { "epoch": 0.017589404663798075, "grad_norm": 1.2026703357696533, "learning_rate": 4.912212982054966e-05, "loss": 1.2618, "step": 5400 }, { "epoch": 0.017915134379794334, "grad_norm": 0.6754997968673706, "learning_rate": 4.910584280423333e-05, "loss": 1.2495, "step": 5500 }, { "epoch": 0.018240864095790593, "grad_norm": 0.8240428566932678, "learning_rate": 4.908955578791699e-05, "loss": 1.2498, "step": 5600 }, { "epoch": 0.018566593811786856, "grad_norm": 0.6363087892532349, "learning_rate": 4.9073268771600654e-05, "loss": 1.2514, "step": 5700 }, { "epoch": 0.018892323527783115, "grad_norm": 1.393833875656128, "learning_rate": 4.905698175528433e-05, "loss": 1.2509, "step": 5800 }, { "epoch": 0.019218053243779377, "grad_norm": 0.6422170996665955, "learning_rate": 4.904069473896799e-05, "loss": 1.2405, "step": 5900 }, { "epoch": 0.019543782959775637, "grad_norm": 0.7575420141220093, "learning_rate": 4.902440772265166e-05, "loss": 1.2241, "step": 6000 }, { "epoch": 0.0198695126757719, "grad_norm": 0.7148196697235107, "learning_rate": 4.9008120706335324e-05, "loss": 1.2372, "step": 6100 }, { "epoch": 0.020195242391768158, "grad_norm": 1.1207329034805298, "learning_rate": 4.8991833690018996e-05, "loss": 1.2372, "step": 6200 }, { "epoch": 0.02052097210776442, "grad_norm": 1.3915568590164185, "learning_rate": 4.897554667370266e-05, "loss": 1.2129, "step": 6300 }, { "epoch": 0.02084670182376068, "grad_norm": 0.8674553036689758, "learning_rate": 4.895925965738633e-05, "loss": 1.2262, "step": 6400 }, { "epoch": 0.02117243153975694, "grad_norm": 0.7640644311904907, "learning_rate": 4.8942972641069994e-05, "loss": 1.1998, "step": 6500 }, { "epoch": 0.0214981612557532, "grad_norm": 0.7928606271743774, "learning_rate": 4.892668562475366e-05, "loss": 1.1776, "step": 6600 }, { "epoch": 0.02182389097174946, "grad_norm": 1.1644946336746216, "learning_rate": 4.891039860843733e-05, "loss": 1.1916, "step": 6700 }, { "epoch": 0.022149620687745723, "grad_norm": 1.1310213804244995, "learning_rate": 4.8894111592121e-05, "loss": 1.1786, "step": 6800 }, { "epoch": 0.022475350403741982, "grad_norm": 1.3858141899108887, "learning_rate": 4.887782457580466e-05, "loss": 1.1728, "step": 6900 }, { "epoch": 0.022801080119738245, "grad_norm": 3.814767360687256, "learning_rate": 4.886153755948833e-05, "loss": 1.1384, "step": 7000 }, { "epoch": 0.023126809835734504, "grad_norm": 1.2411885261535645, "learning_rate": 4.8845250543171995e-05, "loss": 1.1588, "step": 7100 }, { "epoch": 0.023452539551730763, "grad_norm": 1.4492881298065186, "learning_rate": 4.882896352685567e-05, "loss": 1.1266, "step": 7200 }, { "epoch": 0.023778269267727026, "grad_norm": 0.8389878869056702, "learning_rate": 4.8812676510539326e-05, "loss": 1.1446, "step": 7300 }, { "epoch": 0.024103998983723285, "grad_norm": 0.33955487608909607, "learning_rate": 4.8796389494223e-05, "loss": 1.1111, "step": 7400 }, { "epoch": 0.024429728699719547, "grad_norm": 0.7004753351211548, "learning_rate": 4.8780102477906664e-05, "loss": 1.0954, "step": 7500 }, { "epoch": 0.024755458415715807, "grad_norm": 0.7213209271430969, "learning_rate": 4.876381546159034e-05, "loss": 1.1123, "step": 7600 }, { "epoch": 0.02508118813171207, "grad_norm": 0.960991382598877, "learning_rate": 4.8747528445273996e-05, "loss": 1.0982, "step": 7700 }, { "epoch": 0.025406917847708328, "grad_norm": 0.6955804228782654, "learning_rate": 4.873124142895766e-05, "loss": 1.0827, "step": 7800 }, { "epoch": 0.02573264756370459, "grad_norm": 0.47498619556427, "learning_rate": 4.8714954412641334e-05, "loss": 1.1043, "step": 7900 }, { "epoch": 0.02605837727970085, "grad_norm": 0.304063618183136, "learning_rate": 4.8698667396325e-05, "loss": 1.0699, "step": 8000 }, { "epoch": 0.02638410699569711, "grad_norm": 0.9996088743209839, "learning_rate": 4.8682380380008666e-05, "loss": 1.0697, "step": 8100 }, { "epoch": 0.02670983671169337, "grad_norm": 0.5986392498016357, "learning_rate": 4.866609336369233e-05, "loss": 1.0733, "step": 8200 }, { "epoch": 0.02703556642768963, "grad_norm": 0.41347017884254456, "learning_rate": 4.8649806347376004e-05, "loss": 1.0643, "step": 8300 }, { "epoch": 0.027361296143685893, "grad_norm": 0.3976612687110901, "learning_rate": 4.863351933105967e-05, "loss": 1.0401, "step": 8400 }, { "epoch": 0.027687025859682152, "grad_norm": 1.1716387271881104, "learning_rate": 4.8617232314743335e-05, "loss": 1.0298, "step": 8500 }, { "epoch": 0.028012755575678415, "grad_norm": 0.7384105324745178, "learning_rate": 4.8600945298427e-05, "loss": 1.0223, "step": 8600 }, { "epoch": 0.028338485291674674, "grad_norm": 0.517280638217926, "learning_rate": 4.858465828211067e-05, "loss": 1.0445, "step": 8700 }, { "epoch": 0.028664215007670933, "grad_norm": 0.7129126787185669, "learning_rate": 4.856837126579434e-05, "loss": 1.0508, "step": 8800 }, { "epoch": 0.028989944723667196, "grad_norm": 0.35596320033073425, "learning_rate": 4.8552084249478005e-05, "loss": 1.0296, "step": 8900 }, { "epoch": 0.029315674439663455, "grad_norm": 0.9362590909004211, "learning_rate": 4.853579723316167e-05, "loss": 1.0785, "step": 9000 }, { "epoch": 0.029641404155659717, "grad_norm": 0.8223775625228882, "learning_rate": 4.8519510216845336e-05, "loss": 1.043, "step": 9100 }, { "epoch": 0.029967133871655977, "grad_norm": 0.7149192690849304, "learning_rate": 4.8503223200529e-05, "loss": 1.0036, "step": 9200 }, { "epoch": 0.03029286358765224, "grad_norm": 0.5907948017120361, "learning_rate": 4.8486936184212675e-05, "loss": 1.0408, "step": 9300 }, { "epoch": 0.030618593303648498, "grad_norm": 0.6083859801292419, "learning_rate": 4.847064916789634e-05, "loss": 1.0313, "step": 9400 }, { "epoch": 0.03094432301964476, "grad_norm": 0.5470224618911743, "learning_rate": 4.8454362151580006e-05, "loss": 1.0395, "step": 9500 }, { "epoch": 0.03127005273564102, "grad_norm": 0.9455150961875916, "learning_rate": 4.843807513526367e-05, "loss": 1.0132, "step": 9600 }, { "epoch": 0.03159578245163728, "grad_norm": 0.9068177938461304, "learning_rate": 4.8421788118947344e-05, "loss": 1.0219, "step": 9700 }, { "epoch": 0.03192151216763354, "grad_norm": 0.6018943190574646, "learning_rate": 4.840550110263101e-05, "loss": 0.9966, "step": 9800 }, { "epoch": 0.032247241883629804, "grad_norm": 1.1521615982055664, "learning_rate": 4.838921408631467e-05, "loss": 0.9782, "step": 9900 }, { "epoch": 0.03257297159962606, "grad_norm": 0.33281368017196655, "learning_rate": 4.837292706999834e-05, "loss": 1.0325, "step": 10000 }, { "epoch": 0.03289870131562232, "grad_norm": 0.8903327584266663, "learning_rate": 4.835664005368201e-05, "loss": 0.9889, "step": 10100 }, { "epoch": 0.03322443103161858, "grad_norm": 0.5526803731918335, "learning_rate": 4.834035303736568e-05, "loss": 1.0018, "step": 10200 }, { "epoch": 0.03355016074761485, "grad_norm": 0.8086706399917603, "learning_rate": 4.832406602104934e-05, "loss": 1.0189, "step": 10300 }, { "epoch": 0.03387589046361111, "grad_norm": 0.6990864276885986, "learning_rate": 4.830777900473301e-05, "loss": 0.996, "step": 10400 }, { "epoch": 0.034201620179607366, "grad_norm": 0.4859602451324463, "learning_rate": 4.829149198841668e-05, "loss": 0.992, "step": 10500 }, { "epoch": 0.034527349895603625, "grad_norm": 1.2284592390060425, "learning_rate": 4.827520497210034e-05, "loss": 1.0139, "step": 10600 }, { "epoch": 0.034853079611599884, "grad_norm": 0.6529733538627625, "learning_rate": 4.825891795578401e-05, "loss": 1.025, "step": 10700 }, { "epoch": 0.03517880932759615, "grad_norm": 0.6755232810974121, "learning_rate": 4.8242630939467674e-05, "loss": 1.0123, "step": 10800 }, { "epoch": 0.03550453904359241, "grad_norm": 0.9006055593490601, "learning_rate": 4.8226343923151347e-05, "loss": 0.9936, "step": 10900 }, { "epoch": 0.03583026875958867, "grad_norm": 0.7058572769165039, "learning_rate": 4.821005690683501e-05, "loss": 0.934, "step": 11000 }, { "epoch": 0.03615599847558493, "grad_norm": 0.4535008668899536, "learning_rate": 4.819376989051868e-05, "loss": 1.0269, "step": 11100 }, { "epoch": 0.036481728191581186, "grad_norm": 0.39823395013809204, "learning_rate": 4.8177482874202344e-05, "loss": 0.9866, "step": 11200 }, { "epoch": 0.03680745790757745, "grad_norm": 0.8109054565429688, "learning_rate": 4.816119585788601e-05, "loss": 1.0209, "step": 11300 }, { "epoch": 0.03713318762357371, "grad_norm": 0.760396420955658, "learning_rate": 4.814490884156968e-05, "loss": 0.9711, "step": 11400 }, { "epoch": 0.03745891733956997, "grad_norm": 0.8584955334663391, "learning_rate": 4.812862182525335e-05, "loss": 1.0151, "step": 11500 }, { "epoch": 0.03778464705556623, "grad_norm": 1.104041576385498, "learning_rate": 4.8112334808937013e-05, "loss": 0.9826, "step": 11600 }, { "epoch": 0.038110376771562496, "grad_norm": 0.6111257672309875, "learning_rate": 4.809604779262068e-05, "loss": 0.9524, "step": 11700 }, { "epoch": 0.038436106487558755, "grad_norm": 0.6601366996765137, "learning_rate": 4.807976077630435e-05, "loss": 0.9527, "step": 11800 }, { "epoch": 0.038761836203555014, "grad_norm": 0.4624398350715637, "learning_rate": 4.806347375998802e-05, "loss": 1.0077, "step": 11900 }, { "epoch": 0.03908756591955127, "grad_norm": 0.2786065638065338, "learning_rate": 4.8047186743671676e-05, "loss": 0.956, "step": 12000 }, { "epoch": 0.03941329563554753, "grad_norm": 1.0275955200195312, "learning_rate": 4.803089972735535e-05, "loss": 0.9484, "step": 12100 }, { "epoch": 0.0397390253515438, "grad_norm": 0.6198407411575317, "learning_rate": 4.8014612711039015e-05, "loss": 0.9847, "step": 12200 }, { "epoch": 0.04006475506754006, "grad_norm": 0.5880489945411682, "learning_rate": 4.799832569472269e-05, "loss": 0.9559, "step": 12300 }, { "epoch": 0.040390484783536316, "grad_norm": 0.39753594994544983, "learning_rate": 4.7982038678406346e-05, "loss": 0.9489, "step": 12400 }, { "epoch": 0.040716214499532576, "grad_norm": 0.5815085768699646, "learning_rate": 4.796575166209002e-05, "loss": 0.9567, "step": 12500 }, { "epoch": 0.04104194421552884, "grad_norm": 0.8463611602783203, "learning_rate": 4.7949464645773684e-05, "loss": 0.9706, "step": 12600 }, { "epoch": 0.0413676739315251, "grad_norm": 0.7260481715202332, "learning_rate": 4.793317762945736e-05, "loss": 1.0032, "step": 12700 }, { "epoch": 0.04169340364752136, "grad_norm": 0.6970434188842773, "learning_rate": 4.7916890613141016e-05, "loss": 0.9559, "step": 12800 }, { "epoch": 0.04201913336351762, "grad_norm": 0.6083927750587463, "learning_rate": 4.790060359682468e-05, "loss": 0.9558, "step": 12900 }, { "epoch": 0.04234486307951388, "grad_norm": 0.4736403524875641, "learning_rate": 4.7884316580508354e-05, "loss": 0.9444, "step": 13000 }, { "epoch": 0.042670592795510144, "grad_norm": 0.34586021304130554, "learning_rate": 4.786802956419202e-05, "loss": 0.9186, "step": 13100 }, { "epoch": 0.0429963225115064, "grad_norm": 0.5979019403457642, "learning_rate": 4.7851742547875685e-05, "loss": 0.9367, "step": 13200 }, { "epoch": 0.04332205222750266, "grad_norm": 1.0827624797821045, "learning_rate": 4.783545553155935e-05, "loss": 0.9324, "step": 13300 }, { "epoch": 0.04364778194349892, "grad_norm": 1.1920030117034912, "learning_rate": 4.7819168515243024e-05, "loss": 0.9367, "step": 13400 }, { "epoch": 0.04397351165949519, "grad_norm": 0.6469812989234924, "learning_rate": 4.780288149892669e-05, "loss": 0.9815, "step": 13500 }, { "epoch": 0.04429924137549145, "grad_norm": 0.8156530857086182, "learning_rate": 4.7786594482610355e-05, "loss": 0.9679, "step": 13600 }, { "epoch": 0.044624971091487706, "grad_norm": 1.2997325658798218, "learning_rate": 4.777030746629402e-05, "loss": 0.9358, "step": 13700 }, { "epoch": 0.044950700807483965, "grad_norm": 0.42360150814056396, "learning_rate": 4.7754020449977687e-05, "loss": 0.9326, "step": 13800 }, { "epoch": 0.045276430523480224, "grad_norm": 0.7316247820854187, "learning_rate": 4.773773343366136e-05, "loss": 0.9283, "step": 13900 }, { "epoch": 0.04560216023947649, "grad_norm": 0.5978175401687622, "learning_rate": 4.7721446417345025e-05, "loss": 0.9699, "step": 14000 }, { "epoch": 0.04592788995547275, "grad_norm": 0.5278334617614746, "learning_rate": 4.770515940102869e-05, "loss": 0.99, "step": 14100 }, { "epoch": 0.04625361967146901, "grad_norm": 0.7452822327613831, "learning_rate": 4.7688872384712356e-05, "loss": 0.8824, "step": 14200 }, { "epoch": 0.04657934938746527, "grad_norm": 0.4158065617084503, "learning_rate": 4.767258536839602e-05, "loss": 0.9076, "step": 14300 }, { "epoch": 0.046905079103461526, "grad_norm": 0.6929590106010437, "learning_rate": 4.7656298352079694e-05, "loss": 0.926, "step": 14400 }, { "epoch": 0.04723080881945779, "grad_norm": 0.8249752521514893, "learning_rate": 4.764001133576336e-05, "loss": 0.9342, "step": 14500 }, { "epoch": 0.04755653853545405, "grad_norm": 0.6523115038871765, "learning_rate": 4.7623724319447026e-05, "loss": 0.9312, "step": 14600 }, { "epoch": 0.04788226825145031, "grad_norm": 0.7809571027755737, "learning_rate": 4.760743730313069e-05, "loss": 0.927, "step": 14700 }, { "epoch": 0.04820799796744657, "grad_norm": 0.4370424747467041, "learning_rate": 4.7591150286814364e-05, "loss": 0.9275, "step": 14800 }, { "epoch": 0.048533727683442836, "grad_norm": 0.8082228302955627, "learning_rate": 4.757486327049803e-05, "loss": 0.9524, "step": 14900 }, { "epoch": 0.048859457399439095, "grad_norm": 0.7073273658752441, "learning_rate": 4.755857625418169e-05, "loss": 0.9069, "step": 15000 }, { "epoch": 0.049185187115435354, "grad_norm": 0.9150802493095398, "learning_rate": 4.754228923786536e-05, "loss": 0.9669, "step": 15100 }, { "epoch": 0.04951091683143161, "grad_norm": 0.6621295809745789, "learning_rate": 4.752600222154903e-05, "loss": 0.9117, "step": 15200 }, { "epoch": 0.04983664654742787, "grad_norm": 1.1658425331115723, "learning_rate": 4.75097152052327e-05, "loss": 0.9061, "step": 15300 }, { "epoch": 0.05016237626342414, "grad_norm": 1.1669522523880005, "learning_rate": 4.749342818891636e-05, "loss": 0.9625, "step": 15400 }, { "epoch": 0.0504881059794204, "grad_norm": 0.6995384693145752, "learning_rate": 4.747714117260003e-05, "loss": 0.9098, "step": 15500 }, { "epoch": 0.050813835695416656, "grad_norm": 0.5169076919555664, "learning_rate": 4.74608541562837e-05, "loss": 0.9243, "step": 15600 }, { "epoch": 0.051139565411412916, "grad_norm": 0.33565372228622437, "learning_rate": 4.744456713996736e-05, "loss": 0.9375, "step": 15700 }, { "epoch": 0.05146529512740918, "grad_norm": 0.4140024781227112, "learning_rate": 4.742828012365103e-05, "loss": 0.919, "step": 15800 }, { "epoch": 0.05179102484340544, "grad_norm": 0.9499224424362183, "learning_rate": 4.7411993107334694e-05, "loss": 0.9034, "step": 15900 }, { "epoch": 0.0521167545594017, "grad_norm": 0.8801336288452148, "learning_rate": 4.7395706091018366e-05, "loss": 0.881, "step": 16000 }, { "epoch": 0.05244248427539796, "grad_norm": 0.7208696007728577, "learning_rate": 4.737941907470203e-05, "loss": 0.8518, "step": 16100 }, { "epoch": 0.05276821399139422, "grad_norm": 0.5132054686546326, "learning_rate": 4.73631320583857e-05, "loss": 0.8933, "step": 16200 }, { "epoch": 0.053093943707390484, "grad_norm": 0.6521860957145691, "learning_rate": 4.7346845042069364e-05, "loss": 0.9332, "step": 16300 }, { "epoch": 0.05341967342338674, "grad_norm": 0.7121620178222656, "learning_rate": 4.733055802575303e-05, "loss": 0.9067, "step": 16400 }, { "epoch": 0.053745403139383, "grad_norm": 0.5065134763717651, "learning_rate": 4.73142710094367e-05, "loss": 0.9062, "step": 16500 }, { "epoch": 0.05407113285537926, "grad_norm": 0.5855521559715271, "learning_rate": 4.729798399312037e-05, "loss": 0.915, "step": 16600 }, { "epoch": 0.05439686257137553, "grad_norm": 0.5392531156539917, "learning_rate": 4.728169697680403e-05, "loss": 0.9124, "step": 16700 }, { "epoch": 0.05472259228737179, "grad_norm": 0.6617989540100098, "learning_rate": 4.72654099604877e-05, "loss": 0.8594, "step": 16800 }, { "epoch": 0.055048322003368046, "grad_norm": 0.6459785103797913, "learning_rate": 4.724912294417137e-05, "loss": 0.9262, "step": 16900 }, { "epoch": 0.055374051719364305, "grad_norm": 0.34565970301628113, "learning_rate": 4.723283592785504e-05, "loss": 0.8747, "step": 17000 }, { "epoch": 0.055699781435360564, "grad_norm": 0.9510948061943054, "learning_rate": 4.7216548911538696e-05, "loss": 0.9027, "step": 17100 }, { "epoch": 0.05602551115135683, "grad_norm": 0.577192485332489, "learning_rate": 4.720026189522237e-05, "loss": 0.9192, "step": 17200 }, { "epoch": 0.05635124086735309, "grad_norm": 0.38653406500816345, "learning_rate": 4.7183974878906034e-05, "loss": 0.8759, "step": 17300 }, { "epoch": 0.05667697058334935, "grad_norm": 0.6405381560325623, "learning_rate": 4.716768786258971e-05, "loss": 0.8486, "step": 17400 }, { "epoch": 0.05700270029934561, "grad_norm": 0.6968704462051392, "learning_rate": 4.7151400846273366e-05, "loss": 0.903, "step": 17500 }, { "epoch": 0.057328430015341866, "grad_norm": 0.8094695210456848, "learning_rate": 4.713511382995704e-05, "loss": 0.864, "step": 17600 }, { "epoch": 0.05765415973133813, "grad_norm": 0.8325287103652954, "learning_rate": 4.7118826813640704e-05, "loss": 0.8886, "step": 17700 }, { "epoch": 0.05797988944733439, "grad_norm": 0.5068339705467224, "learning_rate": 4.710253979732437e-05, "loss": 0.8767, "step": 17800 }, { "epoch": 0.05830561916333065, "grad_norm": 0.7535611391067505, "learning_rate": 4.7086252781008036e-05, "loss": 0.8661, "step": 17900 }, { "epoch": 0.05863134887932691, "grad_norm": 0.9104974865913391, "learning_rate": 4.70699657646917e-05, "loss": 0.8612, "step": 18000 }, { "epoch": 0.058957078595323176, "grad_norm": 0.9106101989746094, "learning_rate": 4.7053678748375374e-05, "loss": 0.8885, "step": 18100 }, { "epoch": 0.059282808311319435, "grad_norm": 0.9990994334220886, "learning_rate": 4.703739173205904e-05, "loss": 0.9097, "step": 18200 }, { "epoch": 0.059608538027315694, "grad_norm": 0.6219133138656616, "learning_rate": 4.7021104715742705e-05, "loss": 0.8349, "step": 18300 }, { "epoch": 0.05993426774331195, "grad_norm": 0.28884798288345337, "learning_rate": 4.700481769942637e-05, "loss": 0.8359, "step": 18400 }, { "epoch": 0.06025999745930821, "grad_norm": 0.6142743229866028, "learning_rate": 4.698853068311004e-05, "loss": 0.8686, "step": 18500 }, { "epoch": 0.06058572717530448, "grad_norm": 0.7121238708496094, "learning_rate": 4.697224366679371e-05, "loss": 0.8318, "step": 18600 }, { "epoch": 0.06091145689130074, "grad_norm": 0.3502013683319092, "learning_rate": 4.6955956650477375e-05, "loss": 0.8353, "step": 18700 }, { "epoch": 0.061237186607296996, "grad_norm": 0.869159460067749, "learning_rate": 4.693966963416104e-05, "loss": 0.8811, "step": 18800 }, { "epoch": 0.061562916323293256, "grad_norm": 0.4008027911186218, "learning_rate": 4.6923382617844706e-05, "loss": 0.8595, "step": 18900 }, { "epoch": 0.06188864603928952, "grad_norm": 0.6609760522842407, "learning_rate": 4.690709560152838e-05, "loss": 0.8591, "step": 19000 }, { "epoch": 0.06221437575528578, "grad_norm": 0.41599878668785095, "learning_rate": 4.6890808585212045e-05, "loss": 0.8792, "step": 19100 }, { "epoch": 0.06254010547128204, "grad_norm": 0.8219528794288635, "learning_rate": 4.687452156889571e-05, "loss": 0.8469, "step": 19200 }, { "epoch": 0.0628658351872783, "grad_norm": 0.5383628010749817, "learning_rate": 4.6858234552579376e-05, "loss": 0.8619, "step": 19300 }, { "epoch": 0.06319156490327456, "grad_norm": 1.0892442464828491, "learning_rate": 4.684194753626304e-05, "loss": 0.8219, "step": 19400 }, { "epoch": 0.06351729461927082, "grad_norm": 0.7258702516555786, "learning_rate": 4.6825660519946714e-05, "loss": 0.8243, "step": 19500 }, { "epoch": 0.06384302433526708, "grad_norm": 1.2622634172439575, "learning_rate": 4.680937350363038e-05, "loss": 0.8619, "step": 19600 }, { "epoch": 0.06416875405126335, "grad_norm": 0.3901592195034027, "learning_rate": 4.6793086487314046e-05, "loss": 0.8315, "step": 19700 }, { "epoch": 0.06449448376725961, "grad_norm": 0.5976518392562866, "learning_rate": 4.677679947099771e-05, "loss": 0.8193, "step": 19800 }, { "epoch": 0.06482021348325587, "grad_norm": 1.0668984651565552, "learning_rate": 4.676051245468138e-05, "loss": 0.8381, "step": 19900 }, { "epoch": 0.06514594319925213, "grad_norm": 0.6844903826713562, "learning_rate": 4.674422543836505e-05, "loss": 0.8202, "step": 20000 }, { "epoch": 0.06547167291524839, "grad_norm": 0.6987929344177246, "learning_rate": 4.672793842204871e-05, "loss": 0.844, "step": 20100 }, { "epoch": 0.06579740263124464, "grad_norm": 1.0227413177490234, "learning_rate": 4.671165140573238e-05, "loss": 0.8093, "step": 20200 }, { "epoch": 0.0661231323472409, "grad_norm": 0.5901645421981812, "learning_rate": 4.669536438941605e-05, "loss": 0.8068, "step": 20300 }, { "epoch": 0.06644886206323716, "grad_norm": 0.7951213717460632, "learning_rate": 4.667907737309972e-05, "loss": 0.8581, "step": 20400 }, { "epoch": 0.06677459177923342, "grad_norm": 0.617341160774231, "learning_rate": 4.666279035678338e-05, "loss": 0.8427, "step": 20500 }, { "epoch": 0.0671003214952297, "grad_norm": 0.694558322429657, "learning_rate": 4.6646503340467044e-05, "loss": 0.8619, "step": 20600 }, { "epoch": 0.06742605121122595, "grad_norm": 0.6441329717636108, "learning_rate": 4.663021632415072e-05, "loss": 0.8866, "step": 20700 }, { "epoch": 0.06775178092722221, "grad_norm": 0.46440285444259644, "learning_rate": 4.661392930783438e-05, "loss": 0.8435, "step": 20800 }, { "epoch": 0.06807751064321847, "grad_norm": 0.42911046743392944, "learning_rate": 4.659764229151805e-05, "loss": 0.8145, "step": 20900 }, { "epoch": 0.06840324035921473, "grad_norm": 0.7508918046951294, "learning_rate": 4.6581355275201714e-05, "loss": 0.8576, "step": 21000 }, { "epoch": 0.06872897007521099, "grad_norm": 0.6361901164054871, "learning_rate": 4.6565068258885386e-05, "loss": 0.7982, "step": 21100 }, { "epoch": 0.06905469979120725, "grad_norm": 0.804426372051239, "learning_rate": 4.654878124256905e-05, "loss": 0.8386, "step": 21200 }, { "epoch": 0.06938042950720351, "grad_norm": 0.5336636304855347, "learning_rate": 4.653249422625272e-05, "loss": 0.8296, "step": 21300 }, { "epoch": 0.06970615922319977, "grad_norm": 0.5880811810493469, "learning_rate": 4.6516207209936384e-05, "loss": 0.8065, "step": 21400 }, { "epoch": 0.07003188893919603, "grad_norm": 0.4607875347137451, "learning_rate": 4.649992019362005e-05, "loss": 0.8601, "step": 21500 }, { "epoch": 0.0703576186551923, "grad_norm": 0.6503331065177917, "learning_rate": 4.648363317730372e-05, "loss": 0.7925, "step": 21600 }, { "epoch": 0.07068334837118856, "grad_norm": 0.7841913104057312, "learning_rate": 4.646734616098739e-05, "loss": 0.8218, "step": 21700 }, { "epoch": 0.07100907808718482, "grad_norm": 0.45437848567962646, "learning_rate": 4.645105914467105e-05, "loss": 0.8663, "step": 21800 }, { "epoch": 0.07133480780318108, "grad_norm": 0.6052650213241577, "learning_rate": 4.643477212835472e-05, "loss": 0.8634, "step": 21900 }, { "epoch": 0.07166053751917734, "grad_norm": 0.5301306247711182, "learning_rate": 4.641848511203839e-05, "loss": 0.8215, "step": 22000 }, { "epoch": 0.0719862672351736, "grad_norm": 0.8724095821380615, "learning_rate": 4.640219809572206e-05, "loss": 0.8304, "step": 22100 }, { "epoch": 0.07231199695116985, "grad_norm": 0.8219661116600037, "learning_rate": 4.6385911079405716e-05, "loss": 0.8515, "step": 22200 }, { "epoch": 0.07263772666716611, "grad_norm": 0.6308414936065674, "learning_rate": 4.636962406308939e-05, "loss": 0.7233, "step": 22300 }, { "epoch": 0.07296345638316237, "grad_norm": 0.35772112011909485, "learning_rate": 4.6353337046773054e-05, "loss": 0.7792, "step": 22400 }, { "epoch": 0.07328918609915865, "grad_norm": 0.519975483417511, "learning_rate": 4.633705003045673e-05, "loss": 0.8265, "step": 22500 }, { "epoch": 0.0736149158151549, "grad_norm": 0.8935458660125732, "learning_rate": 4.6320763014140386e-05, "loss": 0.8276, "step": 22600 }, { "epoch": 0.07394064553115116, "grad_norm": 0.4765929877758026, "learning_rate": 4.630447599782406e-05, "loss": 0.8088, "step": 22700 }, { "epoch": 0.07426637524714742, "grad_norm": 0.5910876989364624, "learning_rate": 4.6288188981507724e-05, "loss": 0.8003, "step": 22800 }, { "epoch": 0.07459210496314368, "grad_norm": 0.6108260154724121, "learning_rate": 4.627190196519139e-05, "loss": 0.7949, "step": 22900 }, { "epoch": 0.07491783467913994, "grad_norm": 0.9665610194206238, "learning_rate": 4.625561494887506e-05, "loss": 0.7989, "step": 23000 }, { "epoch": 0.0752435643951362, "grad_norm": 0.43020346760749817, "learning_rate": 4.623932793255872e-05, "loss": 0.8052, "step": 23100 }, { "epoch": 0.07556929411113246, "grad_norm": 0.3901965022087097, "learning_rate": 4.6223040916242394e-05, "loss": 0.7756, "step": 23200 }, { "epoch": 0.07589502382712872, "grad_norm": 0.8132317066192627, "learning_rate": 4.620675389992606e-05, "loss": 0.797, "step": 23300 }, { "epoch": 0.07622075354312499, "grad_norm": 0.6211370825767517, "learning_rate": 4.619046688360973e-05, "loss": 0.7698, "step": 23400 }, { "epoch": 0.07654648325912125, "grad_norm": 0.8378313779830933, "learning_rate": 4.617417986729339e-05, "loss": 0.805, "step": 23500 }, { "epoch": 0.07687221297511751, "grad_norm": 0.9225132465362549, "learning_rate": 4.615789285097706e-05, "loss": 0.7999, "step": 23600 }, { "epoch": 0.07719794269111377, "grad_norm": 0.46878713369369507, "learning_rate": 4.614160583466073e-05, "loss": 0.75, "step": 23700 }, { "epoch": 0.07752367240711003, "grad_norm": 0.409138560295105, "learning_rate": 4.6125318818344395e-05, "loss": 0.7944, "step": 23800 }, { "epoch": 0.07784940212310629, "grad_norm": 0.4791303277015686, "learning_rate": 4.610903180202806e-05, "loss": 0.7912, "step": 23900 }, { "epoch": 0.07817513183910255, "grad_norm": 0.8759014010429382, "learning_rate": 4.6092744785711726e-05, "loss": 0.8198, "step": 24000 }, { "epoch": 0.0785008615550988, "grad_norm": 0.47595012187957764, "learning_rate": 4.60764577693954e-05, "loss": 0.7984, "step": 24100 }, { "epoch": 0.07882659127109506, "grad_norm": 0.7923133373260498, "learning_rate": 4.6060170753079065e-05, "loss": 0.7436, "step": 24200 }, { "epoch": 0.07915232098709134, "grad_norm": 0.39254361391067505, "learning_rate": 4.604388373676273e-05, "loss": 0.7771, "step": 24300 }, { "epoch": 0.0794780507030876, "grad_norm": 0.6828033924102783, "learning_rate": 4.6027596720446396e-05, "loss": 0.8083, "step": 24400 }, { "epoch": 0.07980378041908386, "grad_norm": 0.6189585328102112, "learning_rate": 4.601130970413006e-05, "loss": 0.7885, "step": 24500 }, { "epoch": 0.08012951013508011, "grad_norm": 0.6750975847244263, "learning_rate": 4.5995022687813734e-05, "loss": 0.759, "step": 24600 }, { "epoch": 0.08045523985107637, "grad_norm": 0.6616020798683167, "learning_rate": 4.59787356714974e-05, "loss": 0.8226, "step": 24700 }, { "epoch": 0.08078096956707263, "grad_norm": 0.7598117589950562, "learning_rate": 4.5962448655181066e-05, "loss": 0.7806, "step": 24800 }, { "epoch": 0.08110669928306889, "grad_norm": 0.41183263063430786, "learning_rate": 4.594616163886473e-05, "loss": 0.7939, "step": 24900 }, { "epoch": 0.08143242899906515, "grad_norm": 0.40911582112312317, "learning_rate": 4.59298746225484e-05, "loss": 0.7635, "step": 25000 }, { "epoch": 0.08175815871506141, "grad_norm": 0.8820083737373352, "learning_rate": 4.591358760623207e-05, "loss": 0.7886, "step": 25100 }, { "epoch": 0.08208388843105768, "grad_norm": 0.9055482745170593, "learning_rate": 4.589730058991573e-05, "loss": 0.7487, "step": 25200 }, { "epoch": 0.08240961814705394, "grad_norm": 0.5680561065673828, "learning_rate": 4.58810135735994e-05, "loss": 0.7505, "step": 25300 }, { "epoch": 0.0827353478630502, "grad_norm": 0.5064377188682556, "learning_rate": 4.586472655728307e-05, "loss": 0.768, "step": 25400 }, { "epoch": 0.08306107757904646, "grad_norm": 0.462200403213501, "learning_rate": 4.584843954096674e-05, "loss": 0.7399, "step": 25500 }, { "epoch": 0.08338680729504272, "grad_norm": 0.7820500731468201, "learning_rate": 4.58321525246504e-05, "loss": 0.8109, "step": 25600 }, { "epoch": 0.08371253701103898, "grad_norm": 0.4833464026451111, "learning_rate": 4.5815865508334064e-05, "loss": 0.764, "step": 25700 }, { "epoch": 0.08403826672703524, "grad_norm": 0.3821680247783661, "learning_rate": 4.5799578492017737e-05, "loss": 0.7397, "step": 25800 }, { "epoch": 0.0843639964430315, "grad_norm": 0.5084909200668335, "learning_rate": 4.57832914757014e-05, "loss": 0.7428, "step": 25900 }, { "epoch": 0.08468972615902776, "grad_norm": 0.925619900226593, "learning_rate": 4.576700445938507e-05, "loss": 0.7386, "step": 26000 }, { "epoch": 0.08501545587502403, "grad_norm": 0.8126088380813599, "learning_rate": 4.5750717443068734e-05, "loss": 0.7798, "step": 26100 }, { "epoch": 0.08534118559102029, "grad_norm": 1.0178046226501465, "learning_rate": 4.5734430426752406e-05, "loss": 0.7796, "step": 26200 }, { "epoch": 0.08566691530701655, "grad_norm": 0.4879295229911804, "learning_rate": 4.571814341043607e-05, "loss": 0.7762, "step": 26300 }, { "epoch": 0.0859926450230128, "grad_norm": 0.6722548604011536, "learning_rate": 4.570185639411974e-05, "loss": 0.7234, "step": 26400 }, { "epoch": 0.08631837473900907, "grad_norm": 0.6326486468315125, "learning_rate": 4.5685569377803403e-05, "loss": 0.72, "step": 26500 }, { "epoch": 0.08664410445500532, "grad_norm": 0.4354076087474823, "learning_rate": 4.566928236148707e-05, "loss": 0.7704, "step": 26600 }, { "epoch": 0.08696983417100158, "grad_norm": 0.7113054394721985, "learning_rate": 4.565299534517074e-05, "loss": 0.7623, "step": 26700 }, { "epoch": 0.08729556388699784, "grad_norm": 0.595664381980896, "learning_rate": 4.563670832885441e-05, "loss": 0.765, "step": 26800 }, { "epoch": 0.0876212936029941, "grad_norm": 0.5344740152359009, "learning_rate": 4.562042131253807e-05, "loss": 0.7201, "step": 26900 }, { "epoch": 0.08794702331899037, "grad_norm": 0.5330939292907715, "learning_rate": 4.560413429622174e-05, "loss": 0.7617, "step": 27000 }, { "epoch": 0.08827275303498663, "grad_norm": 0.45265939831733704, "learning_rate": 4.5587847279905405e-05, "loss": 0.7806, "step": 27100 }, { "epoch": 0.0885984827509829, "grad_norm": 0.5947338342666626, "learning_rate": 4.557156026358908e-05, "loss": 0.7524, "step": 27200 }, { "epoch": 0.08892421246697915, "grad_norm": 0.8656592965126038, "learning_rate": 4.555527324727274e-05, "loss": 0.7599, "step": 27300 }, { "epoch": 0.08924994218297541, "grad_norm": 0.645728349685669, "learning_rate": 4.553898623095641e-05, "loss": 0.7629, "step": 27400 }, { "epoch": 0.08957567189897167, "grad_norm": 0.8474392890930176, "learning_rate": 4.5522699214640074e-05, "loss": 0.7641, "step": 27500 }, { "epoch": 0.08990140161496793, "grad_norm": 0.7386724948883057, "learning_rate": 4.550641219832375e-05, "loss": 0.7523, "step": 27600 }, { "epoch": 0.09022713133096419, "grad_norm": 0.9216130971908569, "learning_rate": 4.549012518200741e-05, "loss": 0.7562, "step": 27700 }, { "epoch": 0.09055286104696045, "grad_norm": 0.8789349794387817, "learning_rate": 4.547383816569107e-05, "loss": 0.7229, "step": 27800 }, { "epoch": 0.0908785907629567, "grad_norm": 0.582091748714447, "learning_rate": 4.5457551149374744e-05, "loss": 0.7274, "step": 27900 }, { "epoch": 0.09120432047895298, "grad_norm": 0.6011328101158142, "learning_rate": 4.544126413305841e-05, "loss": 0.7297, "step": 28000 }, { "epoch": 0.09153005019494924, "grad_norm": 0.6041598916053772, "learning_rate": 4.542497711674208e-05, "loss": 0.7409, "step": 28100 }, { "epoch": 0.0918557799109455, "grad_norm": 0.7190874814987183, "learning_rate": 4.540869010042574e-05, "loss": 0.7149, "step": 28200 }, { "epoch": 0.09218150962694176, "grad_norm": 0.5705780982971191, "learning_rate": 4.5392403084109414e-05, "loss": 0.76, "step": 28300 }, { "epoch": 0.09250723934293802, "grad_norm": 0.7988401651382446, "learning_rate": 4.537611606779308e-05, "loss": 0.7594, "step": 28400 }, { "epoch": 0.09283296905893428, "grad_norm": 0.48971208930015564, "learning_rate": 4.5359829051476745e-05, "loss": 0.7505, "step": 28500 }, { "epoch": 0.09315869877493053, "grad_norm": 0.6600379347801208, "learning_rate": 4.534354203516041e-05, "loss": 0.7902, "step": 28600 }, { "epoch": 0.0934844284909268, "grad_norm": 0.6095920205116272, "learning_rate": 4.5327255018844077e-05, "loss": 0.7166, "step": 28700 }, { "epoch": 0.09381015820692305, "grad_norm": 0.6808424592018127, "learning_rate": 4.531096800252775e-05, "loss": 0.7148, "step": 28800 }, { "epoch": 0.09413588792291933, "grad_norm": 0.9923068284988403, "learning_rate": 4.5294680986211415e-05, "loss": 0.7226, "step": 28900 }, { "epoch": 0.09446161763891558, "grad_norm": 0.8952274918556213, "learning_rate": 4.527839396989508e-05, "loss": 0.7645, "step": 29000 }, { "epoch": 0.09478734735491184, "grad_norm": 0.7416999936103821, "learning_rate": 4.5262106953578746e-05, "loss": 0.7503, "step": 29100 }, { "epoch": 0.0951130770709081, "grad_norm": 0.7862002849578857, "learning_rate": 4.524581993726242e-05, "loss": 0.7469, "step": 29200 }, { "epoch": 0.09543880678690436, "grad_norm": 0.6296769380569458, "learning_rate": 4.5229532920946085e-05, "loss": 0.6873, "step": 29300 }, { "epoch": 0.09576453650290062, "grad_norm": 0.9056894779205322, "learning_rate": 4.521324590462975e-05, "loss": 0.7126, "step": 29400 }, { "epoch": 0.09609026621889688, "grad_norm": 0.624724268913269, "learning_rate": 4.5196958888313416e-05, "loss": 0.7668, "step": 29500 }, { "epoch": 0.09641599593489314, "grad_norm": 0.680957555770874, "learning_rate": 4.518067187199708e-05, "loss": 0.7783, "step": 29600 }, { "epoch": 0.0967417256508894, "grad_norm": 0.5778472423553467, "learning_rate": 4.5164384855680754e-05, "loss": 0.7355, "step": 29700 }, { "epoch": 0.09706745536688567, "grad_norm": 0.6346442699432373, "learning_rate": 4.514809783936442e-05, "loss": 0.7276, "step": 29800 }, { "epoch": 0.09739318508288193, "grad_norm": 0.9289300441741943, "learning_rate": 4.5131810823048086e-05, "loss": 0.7179, "step": 29900 }, { "epoch": 0.09771891479887819, "grad_norm": 0.7473464012145996, "learning_rate": 4.511552380673175e-05, "loss": 0.7172, "step": 30000 }, { "epoch": 0.09804464451487445, "grad_norm": 0.6801792979240417, "learning_rate": 4.509923679041542e-05, "loss": 0.7074, "step": 30100 }, { "epoch": 0.09837037423087071, "grad_norm": 0.6129624247550964, "learning_rate": 4.508294977409909e-05, "loss": 0.7166, "step": 30200 }, { "epoch": 0.09869610394686697, "grad_norm": 0.8195613026618958, "learning_rate": 4.506666275778275e-05, "loss": 0.7709, "step": 30300 }, { "epoch": 0.09902183366286323, "grad_norm": 0.4703550934791565, "learning_rate": 4.505037574146642e-05, "loss": 0.7037, "step": 30400 }, { "epoch": 0.09934756337885949, "grad_norm": 0.7674877047538757, "learning_rate": 4.503408872515009e-05, "loss": 0.7202, "step": 30500 }, { "epoch": 0.09967329309485574, "grad_norm": 0.8670388460159302, "learning_rate": 4.501780170883376e-05, "loss": 0.7183, "step": 30600 }, { "epoch": 0.09999902281085202, "grad_norm": 0.280652791261673, "learning_rate": 4.500151469251742e-05, "loss": 0.6998, "step": 30700 }, { "epoch": 0.10032475252684828, "grad_norm": 0.7346746325492859, "learning_rate": 4.4985227676201084e-05, "loss": 0.7358, "step": 30800 }, { "epoch": 0.10065048224284454, "grad_norm": 0.978670060634613, "learning_rate": 4.4968940659884756e-05, "loss": 0.7259, "step": 30900 }, { "epoch": 0.1009762119588408, "grad_norm": 0.5910704135894775, "learning_rate": 4.495265364356842e-05, "loss": 0.7074, "step": 31000 }, { "epoch": 0.10130194167483705, "grad_norm": 0.7966532707214355, "learning_rate": 4.493636662725209e-05, "loss": 0.7117, "step": 31100 }, { "epoch": 0.10162767139083331, "grad_norm": 0.9344640374183655, "learning_rate": 4.4920079610935754e-05, "loss": 0.7349, "step": 31200 }, { "epoch": 0.10195340110682957, "grad_norm": 0.8043787479400635, "learning_rate": 4.4903792594619426e-05, "loss": 0.7361, "step": 31300 }, { "epoch": 0.10227913082282583, "grad_norm": 0.6786687970161438, "learning_rate": 4.488750557830309e-05, "loss": 0.6969, "step": 31400 }, { "epoch": 0.10260486053882209, "grad_norm": 0.4679253399372101, "learning_rate": 4.487121856198676e-05, "loss": 0.7157, "step": 31500 }, { "epoch": 0.10293059025481836, "grad_norm": 0.5903817415237427, "learning_rate": 4.485493154567042e-05, "loss": 0.7352, "step": 31600 }, { "epoch": 0.10325631997081462, "grad_norm": 0.715834379196167, "learning_rate": 4.483864452935409e-05, "loss": 0.7532, "step": 31700 }, { "epoch": 0.10358204968681088, "grad_norm": 0.6664106249809265, "learning_rate": 4.482235751303776e-05, "loss": 0.6853, "step": 31800 }, { "epoch": 0.10390777940280714, "grad_norm": 0.700243353843689, "learning_rate": 4.480607049672143e-05, "loss": 0.6835, "step": 31900 }, { "epoch": 0.1042335091188034, "grad_norm": 0.7481942772865295, "learning_rate": 4.478978348040509e-05, "loss": 0.7343, "step": 32000 }, { "epoch": 0.10455923883479966, "grad_norm": 0.5347774028778076, "learning_rate": 4.477349646408876e-05, "loss": 0.6688, "step": 32100 }, { "epoch": 0.10488496855079592, "grad_norm": 0.541346549987793, "learning_rate": 4.4757209447772425e-05, "loss": 0.7088, "step": 32200 }, { "epoch": 0.10521069826679218, "grad_norm": 0.6126936674118042, "learning_rate": 4.47409224314561e-05, "loss": 0.7333, "step": 32300 }, { "epoch": 0.10553642798278844, "grad_norm": 0.952684760093689, "learning_rate": 4.472463541513976e-05, "loss": 0.7242, "step": 32400 }, { "epoch": 0.10586215769878471, "grad_norm": 0.72658771276474, "learning_rate": 4.470834839882343e-05, "loss": 0.7422, "step": 32500 }, { "epoch": 0.10618788741478097, "grad_norm": 0.5741873383522034, "learning_rate": 4.4692061382507094e-05, "loss": 0.7307, "step": 32600 }, { "epoch": 0.10651361713077723, "grad_norm": 0.646496057510376, "learning_rate": 4.467577436619077e-05, "loss": 0.7138, "step": 32700 }, { "epoch": 0.10683934684677349, "grad_norm": 0.40007448196411133, "learning_rate": 4.465948734987443e-05, "loss": 0.7045, "step": 32800 }, { "epoch": 0.10716507656276975, "grad_norm": 0.6594932675361633, "learning_rate": 4.464320033355809e-05, "loss": 0.6874, "step": 32900 }, { "epoch": 0.107490806278766, "grad_norm": 0.7663995623588562, "learning_rate": 4.4626913317241764e-05, "loss": 0.7303, "step": 33000 }, { "epoch": 0.10781653599476226, "grad_norm": 0.5867152810096741, "learning_rate": 4.461062630092543e-05, "loss": 0.7072, "step": 33100 }, { "epoch": 0.10814226571075852, "grad_norm": 0.5017038583755493, "learning_rate": 4.45943392846091e-05, "loss": 0.6879, "step": 33200 }, { "epoch": 0.10846799542675478, "grad_norm": 0.6196131110191345, "learning_rate": 4.457805226829276e-05, "loss": 0.7094, "step": 33300 }, { "epoch": 0.10879372514275105, "grad_norm": 0.643118679523468, "learning_rate": 4.4561765251976434e-05, "loss": 0.6763, "step": 33400 }, { "epoch": 0.10911945485874731, "grad_norm": 0.516583263874054, "learning_rate": 4.45454782356601e-05, "loss": 0.6744, "step": 33500 }, { "epoch": 0.10944518457474357, "grad_norm": 0.6565887928009033, "learning_rate": 4.4529191219343765e-05, "loss": 0.6818, "step": 33600 }, { "epoch": 0.10977091429073983, "grad_norm": 0.644209623336792, "learning_rate": 4.451290420302743e-05, "loss": 0.6795, "step": 33700 }, { "epoch": 0.11009664400673609, "grad_norm": 0.5720322132110596, "learning_rate": 4.4496617186711096e-05, "loss": 0.6444, "step": 33800 }, { "epoch": 0.11042237372273235, "grad_norm": 0.7580476999282837, "learning_rate": 4.448033017039477e-05, "loss": 0.7067, "step": 33900 }, { "epoch": 0.11074810343872861, "grad_norm": 0.3334468603134155, "learning_rate": 4.4464043154078435e-05, "loss": 0.7245, "step": 34000 }, { "epoch": 0.11107383315472487, "grad_norm": 0.7232679724693298, "learning_rate": 4.44477561377621e-05, "loss": 0.6476, "step": 34100 }, { "epoch": 0.11139956287072113, "grad_norm": 0.49447712302207947, "learning_rate": 4.4431469121445766e-05, "loss": 0.6813, "step": 34200 }, { "epoch": 0.11172529258671739, "grad_norm": 0.9112755656242371, "learning_rate": 4.441518210512943e-05, "loss": 0.7039, "step": 34300 }, { "epoch": 0.11205102230271366, "grad_norm": 0.9391865134239197, "learning_rate": 4.4398895088813104e-05, "loss": 0.7154, "step": 34400 }, { "epoch": 0.11237675201870992, "grad_norm": 0.6869890689849854, "learning_rate": 4.438260807249677e-05, "loss": 0.7462, "step": 34500 }, { "epoch": 0.11270248173470618, "grad_norm": 0.6954273581504822, "learning_rate": 4.4366321056180436e-05, "loss": 0.7151, "step": 34600 }, { "epoch": 0.11302821145070244, "grad_norm": 0.8512132167816162, "learning_rate": 4.43500340398641e-05, "loss": 0.7157, "step": 34700 }, { "epoch": 0.1133539411666987, "grad_norm": 0.7044045329093933, "learning_rate": 4.4333747023547774e-05, "loss": 0.6649, "step": 34800 }, { "epoch": 0.11367967088269496, "grad_norm": 0.6773298978805542, "learning_rate": 4.431746000723144e-05, "loss": 0.6137, "step": 34900 }, { "epoch": 0.11400540059869121, "grad_norm": 0.544491171836853, "learning_rate": 4.43011729909151e-05, "loss": 0.6577, "step": 35000 }, { "epoch": 0.11433113031468747, "grad_norm": 0.543596625328064, "learning_rate": 4.428488597459877e-05, "loss": 0.6699, "step": 35100 }, { "epoch": 0.11465686003068373, "grad_norm": 0.7878594398498535, "learning_rate": 4.426859895828244e-05, "loss": 0.709, "step": 35200 }, { "epoch": 0.11498258974668, "grad_norm": 0.8226998448371887, "learning_rate": 4.425231194196611e-05, "loss": 0.6954, "step": 35300 }, { "epoch": 0.11530831946267626, "grad_norm": 0.48608875274658203, "learning_rate": 4.423602492564977e-05, "loss": 0.7502, "step": 35400 }, { "epoch": 0.11563404917867252, "grad_norm": 0.6490182280540466, "learning_rate": 4.421973790933344e-05, "loss": 0.7085, "step": 35500 }, { "epoch": 0.11595977889466878, "grad_norm": 0.3032003343105316, "learning_rate": 4.420345089301711e-05, "loss": 0.6778, "step": 35600 }, { "epoch": 0.11628550861066504, "grad_norm": 0.7003344297409058, "learning_rate": 4.418716387670077e-05, "loss": 0.71, "step": 35700 }, { "epoch": 0.1166112383266613, "grad_norm": 0.6569785475730896, "learning_rate": 4.417087686038444e-05, "loss": 0.653, "step": 35800 }, { "epoch": 0.11693696804265756, "grad_norm": 0.5428867936134338, "learning_rate": 4.4154589844068104e-05, "loss": 0.6733, "step": 35900 }, { "epoch": 0.11726269775865382, "grad_norm": 0.6179760098457336, "learning_rate": 4.4138302827751776e-05, "loss": 0.7081, "step": 36000 }, { "epoch": 0.11758842747465008, "grad_norm": 0.7397803068161011, "learning_rate": 4.412201581143544e-05, "loss": 0.6894, "step": 36100 }, { "epoch": 0.11791415719064635, "grad_norm": 0.725395679473877, "learning_rate": 4.410572879511911e-05, "loss": 0.6874, "step": 36200 }, { "epoch": 0.11823988690664261, "grad_norm": 0.45658519864082336, "learning_rate": 4.4089441778802774e-05, "loss": 0.6821, "step": 36300 }, { "epoch": 0.11856561662263887, "grad_norm": 0.9002487063407898, "learning_rate": 4.407315476248644e-05, "loss": 0.641, "step": 36400 }, { "epoch": 0.11889134633863513, "grad_norm": 0.8738647103309631, "learning_rate": 4.405686774617011e-05, "loss": 0.6763, "step": 36500 }, { "epoch": 0.11921707605463139, "grad_norm": 1.0051002502441406, "learning_rate": 4.404058072985378e-05, "loss": 0.6775, "step": 36600 }, { "epoch": 0.11954280577062765, "grad_norm": 0.8074469566345215, "learning_rate": 4.402429371353744e-05, "loss": 0.7408, "step": 36700 }, { "epoch": 0.1198685354866239, "grad_norm": 0.485388845205307, "learning_rate": 4.400800669722111e-05, "loss": 0.6729, "step": 36800 }, { "epoch": 0.12019426520262017, "grad_norm": 0.7123886942863464, "learning_rate": 4.399171968090478e-05, "loss": 0.661, "step": 36900 }, { "epoch": 0.12051999491861642, "grad_norm": 0.4587586522102356, "learning_rate": 4.397543266458845e-05, "loss": 0.6662, "step": 37000 }, { "epoch": 0.1208457246346127, "grad_norm": 0.7726449966430664, "learning_rate": 4.395914564827211e-05, "loss": 0.7469, "step": 37100 }, { "epoch": 0.12117145435060896, "grad_norm": 0.8636273741722107, "learning_rate": 4.394285863195578e-05, "loss": 0.6669, "step": 37200 }, { "epoch": 0.12149718406660522, "grad_norm": 0.6817033886909485, "learning_rate": 4.3926571615639444e-05, "loss": 0.6874, "step": 37300 }, { "epoch": 0.12182291378260147, "grad_norm": 0.5549355149269104, "learning_rate": 4.391028459932312e-05, "loss": 0.6939, "step": 37400 }, { "epoch": 0.12214864349859773, "grad_norm": 0.6180316805839539, "learning_rate": 4.389399758300678e-05, "loss": 0.6299, "step": 37500 }, { "epoch": 0.12247437321459399, "grad_norm": 0.7779985070228577, "learning_rate": 4.387771056669045e-05, "loss": 0.7181, "step": 37600 }, { "epoch": 0.12280010293059025, "grad_norm": 0.7182669043540955, "learning_rate": 4.3861423550374114e-05, "loss": 0.6703, "step": 37700 }, { "epoch": 0.12312583264658651, "grad_norm": 0.7191387414932251, "learning_rate": 4.3845136534057787e-05, "loss": 0.6802, "step": 37800 }, { "epoch": 0.12345156236258277, "grad_norm": 0.6137369275093079, "learning_rate": 4.382884951774145e-05, "loss": 0.7028, "step": 37900 }, { "epoch": 0.12377729207857904, "grad_norm": 0.7508791089057922, "learning_rate": 4.381256250142511e-05, "loss": 0.642, "step": 38000 }, { "epoch": 0.1241030217945753, "grad_norm": 0.6414891481399536, "learning_rate": 4.3796275485108784e-05, "loss": 0.6255, "step": 38100 }, { "epoch": 0.12442875151057156, "grad_norm": 0.6669697165489197, "learning_rate": 4.377998846879245e-05, "loss": 0.6691, "step": 38200 }, { "epoch": 0.12475448122656782, "grad_norm": 0.8991898894309998, "learning_rate": 4.376370145247612e-05, "loss": 0.6727, "step": 38300 }, { "epoch": 0.12508021094256408, "grad_norm": 0.4924679398536682, "learning_rate": 4.374741443615978e-05, "loss": 0.6661, "step": 38400 }, { "epoch": 0.12540594065856034, "grad_norm": 0.3712103068828583, "learning_rate": 4.3731127419843453e-05, "loss": 0.7306, "step": 38500 }, { "epoch": 0.1257316703745566, "grad_norm": 0.9136518836021423, "learning_rate": 4.371484040352712e-05, "loss": 0.6453, "step": 38600 }, { "epoch": 0.12605740009055286, "grad_norm": 0.6828204393386841, "learning_rate": 4.3698553387210785e-05, "loss": 0.6587, "step": 38700 }, { "epoch": 0.12638312980654912, "grad_norm": 0.6366333961486816, "learning_rate": 4.368226637089445e-05, "loss": 0.6606, "step": 38800 }, { "epoch": 0.12670885952254538, "grad_norm": 0.39375558495521545, "learning_rate": 4.3665979354578116e-05, "loss": 0.6937, "step": 38900 }, { "epoch": 0.12703458923854163, "grad_norm": 0.46293410658836365, "learning_rate": 4.364969233826179e-05, "loss": 0.6504, "step": 39000 }, { "epoch": 0.1273603189545379, "grad_norm": 0.9897958040237427, "learning_rate": 4.3633405321945455e-05, "loss": 0.7126, "step": 39100 }, { "epoch": 0.12768604867053415, "grad_norm": 0.5616987347602844, "learning_rate": 4.361711830562912e-05, "loss": 0.5956, "step": 39200 }, { "epoch": 0.1280117783865304, "grad_norm": 0.4081191122531891, "learning_rate": 4.3600831289312786e-05, "loss": 0.6648, "step": 39300 }, { "epoch": 0.1283375081025267, "grad_norm": 0.485188364982605, "learning_rate": 4.358454427299645e-05, "loss": 0.6694, "step": 39400 }, { "epoch": 0.12866323781852296, "grad_norm": 0.7212422490119934, "learning_rate": 4.3568257256680124e-05, "loss": 0.6767, "step": 39500 }, { "epoch": 0.12898896753451922, "grad_norm": 0.5502139925956726, "learning_rate": 4.355197024036379e-05, "loss": 0.6721, "step": 39600 }, { "epoch": 0.12931469725051548, "grad_norm": 0.49975594878196716, "learning_rate": 4.3535683224047456e-05, "loss": 0.6669, "step": 39700 }, { "epoch": 0.12964042696651173, "grad_norm": 0.4203544557094574, "learning_rate": 4.351939620773112e-05, "loss": 0.6716, "step": 39800 }, { "epoch": 0.129966156682508, "grad_norm": 0.5464275479316711, "learning_rate": 4.3503109191414794e-05, "loss": 0.6544, "step": 39900 }, { "epoch": 0.13029188639850425, "grad_norm": 0.6473097801208496, "learning_rate": 4.348682217509846e-05, "loss": 0.6977, "step": 40000 }, { "epoch": 0.1306176161145005, "grad_norm": 0.39890334010124207, "learning_rate": 4.347053515878212e-05, "loss": 0.6704, "step": 40100 }, { "epoch": 0.13094334583049677, "grad_norm": 1.0785876512527466, "learning_rate": 4.345424814246579e-05, "loss": 0.6196, "step": 40200 }, { "epoch": 0.13126907554649303, "grad_norm": 0.6607077121734619, "learning_rate": 4.343796112614946e-05, "loss": 0.6608, "step": 40300 }, { "epoch": 0.1315948052624893, "grad_norm": 0.5987501740455627, "learning_rate": 4.342167410983313e-05, "loss": 0.6334, "step": 40400 }, { "epoch": 0.13192053497848555, "grad_norm": 0.3443163335323334, "learning_rate": 4.340538709351679e-05, "loss": 0.6621, "step": 40500 }, { "epoch": 0.1322462646944818, "grad_norm": 0.9362694025039673, "learning_rate": 4.338910007720046e-05, "loss": 0.6404, "step": 40600 }, { "epoch": 0.13257199441047807, "grad_norm": 0.5049243569374084, "learning_rate": 4.3372813060884127e-05, "loss": 0.6426, "step": 40700 }, { "epoch": 0.13289772412647433, "grad_norm": 0.787389874458313, "learning_rate": 4.335652604456779e-05, "loss": 0.6432, "step": 40800 }, { "epoch": 0.13322345384247058, "grad_norm": 0.8065658211708069, "learning_rate": 4.334023902825146e-05, "loss": 0.6477, "step": 40900 }, { "epoch": 0.13354918355846684, "grad_norm": 0.5166397094726562, "learning_rate": 4.3323952011935124e-05, "loss": 0.6384, "step": 41000 }, { "epoch": 0.1338749132744631, "grad_norm": 0.9597229957580566, "learning_rate": 4.3307664995618796e-05, "loss": 0.6832, "step": 41100 }, { "epoch": 0.1342006429904594, "grad_norm": 0.5936517715454102, "learning_rate": 4.329137797930246e-05, "loss": 0.6767, "step": 41200 }, { "epoch": 0.13452637270645565, "grad_norm": 0.8391766548156738, "learning_rate": 4.3275090962986135e-05, "loss": 0.6215, "step": 41300 }, { "epoch": 0.1348521024224519, "grad_norm": 0.977497398853302, "learning_rate": 4.3258803946669793e-05, "loss": 0.6307, "step": 41400 }, { "epoch": 0.13517783213844817, "grad_norm": 0.6750873923301697, "learning_rate": 4.324251693035346e-05, "loss": 0.631, "step": 41500 }, { "epoch": 0.13550356185444443, "grad_norm": 0.4655423164367676, "learning_rate": 4.322622991403713e-05, "loss": 0.7025, "step": 41600 }, { "epoch": 0.13582929157044069, "grad_norm": 0.43544334173202515, "learning_rate": 4.32099428977208e-05, "loss": 0.6555, "step": 41700 }, { "epoch": 0.13615502128643694, "grad_norm": 0.7595189213752747, "learning_rate": 4.319365588140446e-05, "loss": 0.6197, "step": 41800 }, { "epoch": 0.1364807510024332, "grad_norm": 0.4422534108161926, "learning_rate": 4.317736886508813e-05, "loss": 0.5798, "step": 41900 }, { "epoch": 0.13680648071842946, "grad_norm": 0.4622032344341278, "learning_rate": 4.31610818487718e-05, "loss": 0.6493, "step": 42000 }, { "epoch": 0.13713221043442572, "grad_norm": 0.7267939448356628, "learning_rate": 4.314479483245547e-05, "loss": 0.6228, "step": 42100 }, { "epoch": 0.13745794015042198, "grad_norm": 0.66838139295578, "learning_rate": 4.312850781613913e-05, "loss": 0.6507, "step": 42200 }, { "epoch": 0.13778366986641824, "grad_norm": 0.40865644812583923, "learning_rate": 4.31122207998228e-05, "loss": 0.6388, "step": 42300 }, { "epoch": 0.1381093995824145, "grad_norm": 0.7203364968299866, "learning_rate": 4.3095933783506464e-05, "loss": 0.589, "step": 42400 }, { "epoch": 0.13843512929841076, "grad_norm": 0.7719990015029907, "learning_rate": 4.307964676719014e-05, "loss": 0.6446, "step": 42500 }, { "epoch": 0.13876085901440702, "grad_norm": 0.35780540108680725, "learning_rate": 4.30633597508738e-05, "loss": 0.683, "step": 42600 }, { "epoch": 0.13908658873040328, "grad_norm": 0.5952534675598145, "learning_rate": 4.304707273455747e-05, "loss": 0.6697, "step": 42700 }, { "epoch": 0.13941231844639954, "grad_norm": 0.539117157459259, "learning_rate": 4.3030785718241134e-05, "loss": 0.6582, "step": 42800 }, { "epoch": 0.1397380481623958, "grad_norm": 0.8181525468826294, "learning_rate": 4.30144987019248e-05, "loss": 0.6695, "step": 42900 }, { "epoch": 0.14006377787839205, "grad_norm": 0.8720047473907471, "learning_rate": 4.299821168560847e-05, "loss": 0.5931, "step": 43000 }, { "epoch": 0.14038950759438834, "grad_norm": 0.9138098955154419, "learning_rate": 4.298192466929213e-05, "loss": 0.6874, "step": 43100 }, { "epoch": 0.1407152373103846, "grad_norm": 0.8015493750572205, "learning_rate": 4.2965637652975804e-05, "loss": 0.6574, "step": 43200 }, { "epoch": 0.14104096702638086, "grad_norm": 0.8426867723464966, "learning_rate": 4.294935063665947e-05, "loss": 0.6662, "step": 43300 }, { "epoch": 0.14136669674237712, "grad_norm": 0.3480939567089081, "learning_rate": 4.293306362034314e-05, "loss": 0.6351, "step": 43400 }, { "epoch": 0.14169242645837338, "grad_norm": 0.5666735172271729, "learning_rate": 4.29167766040268e-05, "loss": 0.641, "step": 43500 }, { "epoch": 0.14201815617436964, "grad_norm": 0.9445961117744446, "learning_rate": 4.2900489587710467e-05, "loss": 0.6608, "step": 43600 }, { "epoch": 0.1423438858903659, "grad_norm": 0.7916907072067261, "learning_rate": 4.288420257139414e-05, "loss": 0.6615, "step": 43700 }, { "epoch": 0.14266961560636215, "grad_norm": 0.9159532785415649, "learning_rate": 4.2867915555077805e-05, "loss": 0.5919, "step": 43800 }, { "epoch": 0.1429953453223584, "grad_norm": 0.5766249895095825, "learning_rate": 4.285162853876147e-05, "loss": 0.6724, "step": 43900 }, { "epoch": 0.14332107503835467, "grad_norm": 0.753519594669342, "learning_rate": 4.2835341522445136e-05, "loss": 0.6995, "step": 44000 }, { "epoch": 0.14364680475435093, "grad_norm": 1.1004271507263184, "learning_rate": 4.281905450612881e-05, "loss": 0.6636, "step": 44100 }, { "epoch": 0.1439725344703472, "grad_norm": 0.7064334154129028, "learning_rate": 4.2802767489812475e-05, "loss": 0.6793, "step": 44200 }, { "epoch": 0.14429826418634345, "grad_norm": 0.5158839225769043, "learning_rate": 4.278648047349614e-05, "loss": 0.6336, "step": 44300 }, { "epoch": 0.1446239939023397, "grad_norm": 1.0451433658599854, "learning_rate": 4.2770193457179806e-05, "loss": 0.6227, "step": 44400 }, { "epoch": 0.14494972361833597, "grad_norm": 0.5956864356994629, "learning_rate": 4.275390644086347e-05, "loss": 0.6517, "step": 44500 }, { "epoch": 0.14527545333433223, "grad_norm": 0.9525729417800903, "learning_rate": 4.2737619424547144e-05, "loss": 0.6245, "step": 44600 }, { "epoch": 0.1456011830503285, "grad_norm": 0.7456961274147034, "learning_rate": 4.272133240823081e-05, "loss": 0.6577, "step": 44700 }, { "epoch": 0.14592691276632475, "grad_norm": 0.5686585307121277, "learning_rate": 4.2705045391914476e-05, "loss": 0.6675, "step": 44800 }, { "epoch": 0.14625264248232103, "grad_norm": 0.5127500295639038, "learning_rate": 4.268875837559814e-05, "loss": 0.5966, "step": 44900 }, { "epoch": 0.1465783721983173, "grad_norm": 0.6099263429641724, "learning_rate": 4.267247135928181e-05, "loss": 0.6259, "step": 45000 }, { "epoch": 0.14690410191431355, "grad_norm": 0.5734119415283203, "learning_rate": 4.265618434296548e-05, "loss": 0.6251, "step": 45100 }, { "epoch": 0.1472298316303098, "grad_norm": 0.40758875012397766, "learning_rate": 4.263989732664914e-05, "loss": 0.5856, "step": 45200 }, { "epoch": 0.14755556134630607, "grad_norm": 0.5974459052085876, "learning_rate": 4.262361031033281e-05, "loss": 0.6443, "step": 45300 }, { "epoch": 0.14788129106230233, "grad_norm": 0.48085859417915344, "learning_rate": 4.260732329401648e-05, "loss": 0.6612, "step": 45400 }, { "epoch": 0.1482070207782986, "grad_norm": 0.5771530270576477, "learning_rate": 4.259103627770015e-05, "loss": 0.6272, "step": 45500 }, { "epoch": 0.14853275049429485, "grad_norm": 0.8463455438613892, "learning_rate": 4.2574749261383815e-05, "loss": 0.6008, "step": 45600 }, { "epoch": 0.1488584802102911, "grad_norm": 0.7014292478561401, "learning_rate": 4.255846224506748e-05, "loss": 0.5353, "step": 45700 }, { "epoch": 0.14918420992628736, "grad_norm": 0.6181588768959045, "learning_rate": 4.2542175228751146e-05, "loss": 0.6139, "step": 45800 }, { "epoch": 0.14950993964228362, "grad_norm": 0.6540141701698303, "learning_rate": 4.252588821243481e-05, "loss": 0.5997, "step": 45900 }, { "epoch": 0.14983566935827988, "grad_norm": 0.47981733083724976, "learning_rate": 4.2509601196118485e-05, "loss": 0.6511, "step": 46000 }, { "epoch": 0.15016139907427614, "grad_norm": 0.964857816696167, "learning_rate": 4.2493314179802144e-05, "loss": 0.6365, "step": 46100 }, { "epoch": 0.1504871287902724, "grad_norm": 0.6706714034080505, "learning_rate": 4.2477027163485816e-05, "loss": 0.664, "step": 46200 }, { "epoch": 0.15081285850626866, "grad_norm": 0.5073367953300476, "learning_rate": 4.246074014716948e-05, "loss": 0.5633, "step": 46300 }, { "epoch": 0.15113858822226492, "grad_norm": 0.37114378809928894, "learning_rate": 4.2444453130853154e-05, "loss": 0.6498, "step": 46400 }, { "epoch": 0.15146431793826118, "grad_norm": 1.153325080871582, "learning_rate": 4.242816611453681e-05, "loss": 0.6254, "step": 46500 }, { "epoch": 0.15179004765425744, "grad_norm": 0.7353873252868652, "learning_rate": 4.241187909822048e-05, "loss": 0.6573, "step": 46600 }, { "epoch": 0.15211577737025372, "grad_norm": 0.5379579067230225, "learning_rate": 4.239559208190415e-05, "loss": 0.6642, "step": 46700 }, { "epoch": 0.15244150708624998, "grad_norm": 0.341907799243927, "learning_rate": 4.237930506558782e-05, "loss": 0.6294, "step": 46800 }, { "epoch": 0.15276723680224624, "grad_norm": 0.3866462707519531, "learning_rate": 4.236301804927148e-05, "loss": 0.6212, "step": 46900 }, { "epoch": 0.1530929665182425, "grad_norm": 0.6686252951622009, "learning_rate": 4.234673103295515e-05, "loss": 0.64, "step": 47000 }, { "epoch": 0.15341869623423876, "grad_norm": 0.6398385167121887, "learning_rate": 4.233044401663882e-05, "loss": 0.6156, "step": 47100 }, { "epoch": 0.15374442595023502, "grad_norm": 0.8679475784301758, "learning_rate": 4.231415700032249e-05, "loss": 0.6492, "step": 47200 }, { "epoch": 0.15407015566623128, "grad_norm": 0.6425623297691345, "learning_rate": 4.229786998400615e-05, "loss": 0.6661, "step": 47300 }, { "epoch": 0.15439588538222754, "grad_norm": 0.7811526656150818, "learning_rate": 4.228158296768982e-05, "loss": 0.6416, "step": 47400 }, { "epoch": 0.1547216150982238, "grad_norm": 0.6820793747901917, "learning_rate": 4.2265295951373484e-05, "loss": 0.6426, "step": 47500 }, { "epoch": 0.15504734481422006, "grad_norm": 0.8748511672019958, "learning_rate": 4.224900893505716e-05, "loss": 0.6038, "step": 47600 }, { "epoch": 0.15537307453021632, "grad_norm": 0.6828723549842834, "learning_rate": 4.223272191874082e-05, "loss": 0.6408, "step": 47700 }, { "epoch": 0.15569880424621257, "grad_norm": 1.01051926612854, "learning_rate": 4.221643490242449e-05, "loss": 0.6218, "step": 47800 }, { "epoch": 0.15602453396220883, "grad_norm": 0.6920143961906433, "learning_rate": 4.2200147886108154e-05, "loss": 0.63, "step": 47900 }, { "epoch": 0.1563502636782051, "grad_norm": 0.6410394310951233, "learning_rate": 4.218386086979182e-05, "loss": 0.6176, "step": 48000 }, { "epoch": 0.15667599339420135, "grad_norm": 0.5157743692398071, "learning_rate": 4.216757385347549e-05, "loss": 0.5947, "step": 48100 }, { "epoch": 0.1570017231101976, "grad_norm": 0.6770983934402466, "learning_rate": 4.215128683715915e-05, "loss": 0.6192, "step": 48200 }, { "epoch": 0.15732745282619387, "grad_norm": 0.49714550375938416, "learning_rate": 4.2134999820842824e-05, "loss": 0.6121, "step": 48300 }, { "epoch": 0.15765318254219013, "grad_norm": 0.3486001789569855, "learning_rate": 4.211871280452649e-05, "loss": 0.5821, "step": 48400 }, { "epoch": 0.15797891225818642, "grad_norm": 0.4202999770641327, "learning_rate": 4.210242578821016e-05, "loss": 0.5909, "step": 48500 }, { "epoch": 0.15830464197418267, "grad_norm": 0.44769522547721863, "learning_rate": 4.208613877189382e-05, "loss": 0.6369, "step": 48600 }, { "epoch": 0.15863037169017893, "grad_norm": 0.6501901745796204, "learning_rate": 4.2069851755577486e-05, "loss": 0.6187, "step": 48700 }, { "epoch": 0.1589561014061752, "grad_norm": 0.8261470794677734, "learning_rate": 4.205356473926116e-05, "loss": 0.6136, "step": 48800 }, { "epoch": 0.15928183112217145, "grad_norm": 0.9979439973831177, "learning_rate": 4.2037277722944825e-05, "loss": 0.623, "step": 48900 }, { "epoch": 0.1596075608381677, "grad_norm": 0.5651659369468689, "learning_rate": 4.202099070662849e-05, "loss": 0.6742, "step": 49000 }, { "epoch": 0.15993329055416397, "grad_norm": 0.7412470579147339, "learning_rate": 4.2004703690312156e-05, "loss": 0.6272, "step": 49100 }, { "epoch": 0.16025902027016023, "grad_norm": 0.43271690607070923, "learning_rate": 4.198841667399583e-05, "loss": 0.5729, "step": 49200 }, { "epoch": 0.1605847499861565, "grad_norm": 0.5117851495742798, "learning_rate": 4.1972129657679494e-05, "loss": 0.6156, "step": 49300 }, { "epoch": 0.16091047970215275, "grad_norm": 0.7106539011001587, "learning_rate": 4.195584264136316e-05, "loss": 0.6052, "step": 49400 }, { "epoch": 0.161236209418149, "grad_norm": 0.6146919131278992, "learning_rate": 4.1939555625046826e-05, "loss": 0.5932, "step": 49500 }, { "epoch": 0.16156193913414527, "grad_norm": 0.49088531732559204, "learning_rate": 4.192326860873049e-05, "loss": 0.568, "step": 49600 }, { "epoch": 0.16188766885014153, "grad_norm": 0.9923317432403564, "learning_rate": 4.1906981592414164e-05, "loss": 0.596, "step": 49700 }, { "epoch": 0.16221339856613778, "grad_norm": 0.3995937705039978, "learning_rate": 4.189069457609783e-05, "loss": 0.6442, "step": 49800 }, { "epoch": 0.16253912828213404, "grad_norm": 0.5258984565734863, "learning_rate": 4.1874407559781496e-05, "loss": 0.5601, "step": 49900 }, { "epoch": 0.1628648579981303, "grad_norm": 0.19585928320884705, "learning_rate": 4.185812054346516e-05, "loss": 0.6509, "step": 50000 }, { "epoch": 0.16319058771412656, "grad_norm": 0.625548243522644, "learning_rate": 4.184183352714883e-05, "loss": 0.6411, "step": 50100 }, { "epoch": 0.16351631743012282, "grad_norm": 0.7014303207397461, "learning_rate": 4.18255465108325e-05, "loss": 0.6125, "step": 50200 }, { "epoch": 0.16384204714611908, "grad_norm": 0.5523779988288879, "learning_rate": 4.1809259494516165e-05, "loss": 0.5811, "step": 50300 }, { "epoch": 0.16416777686211537, "grad_norm": 0.5742841958999634, "learning_rate": 4.179297247819983e-05, "loss": 0.6282, "step": 50400 }, { "epoch": 0.16449350657811163, "grad_norm": 0.5776492357254028, "learning_rate": 4.17766854618835e-05, "loss": 0.6622, "step": 50500 }, { "epoch": 0.16481923629410788, "grad_norm": 0.7464694380760193, "learning_rate": 4.176039844556717e-05, "loss": 0.6309, "step": 50600 }, { "epoch": 0.16514496601010414, "grad_norm": 0.5271546244621277, "learning_rate": 4.1744111429250835e-05, "loss": 0.645, "step": 50700 }, { "epoch": 0.1654706957261004, "grad_norm": 0.6904231905937195, "learning_rate": 4.1727824412934494e-05, "loss": 0.5927, "step": 50800 }, { "epoch": 0.16579642544209666, "grad_norm": 0.578195333480835, "learning_rate": 4.1711537396618166e-05, "loss": 0.5812, "step": 50900 }, { "epoch": 0.16612215515809292, "grad_norm": 0.8716936707496643, "learning_rate": 4.169525038030183e-05, "loss": 0.6261, "step": 51000 }, { "epoch": 0.16644788487408918, "grad_norm": 0.6577697992324829, "learning_rate": 4.1678963363985505e-05, "loss": 0.6101, "step": 51100 }, { "epoch": 0.16677361459008544, "grad_norm": 0.7431929111480713, "learning_rate": 4.1662676347669164e-05, "loss": 0.6227, "step": 51200 }, { "epoch": 0.1670993443060817, "grad_norm": 0.9198315739631653, "learning_rate": 4.1646389331352836e-05, "loss": 0.6399, "step": 51300 }, { "epoch": 0.16742507402207796, "grad_norm": 0.5159572958946228, "learning_rate": 4.16301023150365e-05, "loss": 0.6329, "step": 51400 }, { "epoch": 0.16775080373807422, "grad_norm": 0.7744697332382202, "learning_rate": 4.161381529872017e-05, "loss": 0.5579, "step": 51500 }, { "epoch": 0.16807653345407048, "grad_norm": 0.4429173767566681, "learning_rate": 4.159752828240383e-05, "loss": 0.5786, "step": 51600 }, { "epoch": 0.16840226317006673, "grad_norm": 0.7796801924705505, "learning_rate": 4.15812412660875e-05, "loss": 0.6353, "step": 51700 }, { "epoch": 0.168727992886063, "grad_norm": 0.43117523193359375, "learning_rate": 4.156495424977117e-05, "loss": 0.5807, "step": 51800 }, { "epoch": 0.16905372260205925, "grad_norm": 0.44315412640571594, "learning_rate": 4.154866723345484e-05, "loss": 0.5979, "step": 51900 }, { "epoch": 0.1693794523180555, "grad_norm": 0.4306319057941437, "learning_rate": 4.15323802171385e-05, "loss": 0.6498, "step": 52000 }, { "epoch": 0.16970518203405177, "grad_norm": 0.283033549785614, "learning_rate": 4.151609320082217e-05, "loss": 0.6329, "step": 52100 }, { "epoch": 0.17003091175004806, "grad_norm": 0.4118421673774719, "learning_rate": 4.1499806184505834e-05, "loss": 0.5933, "step": 52200 }, { "epoch": 0.17035664146604432, "grad_norm": 0.9130700826644897, "learning_rate": 4.148351916818951e-05, "loss": 0.5349, "step": 52300 }, { "epoch": 0.17068237118204058, "grad_norm": 0.33348548412323, "learning_rate": 4.146723215187317e-05, "loss": 0.6182, "step": 52400 }, { "epoch": 0.17100810089803684, "grad_norm": 0.6642253398895264, "learning_rate": 4.145094513555684e-05, "loss": 0.5989, "step": 52500 }, { "epoch": 0.1713338306140331, "grad_norm": 0.7113855481147766, "learning_rate": 4.1434658119240504e-05, "loss": 0.6063, "step": 52600 }, { "epoch": 0.17165956033002935, "grad_norm": 1.0840643644332886, "learning_rate": 4.1418371102924177e-05, "loss": 0.615, "step": 52700 }, { "epoch": 0.1719852900460256, "grad_norm": 0.5277838706970215, "learning_rate": 4.140208408660784e-05, "loss": 0.6234, "step": 52800 }, { "epoch": 0.17231101976202187, "grad_norm": 0.5993104577064514, "learning_rate": 4.13857970702915e-05, "loss": 0.5905, "step": 52900 }, { "epoch": 0.17263674947801813, "grad_norm": 0.7363581657409668, "learning_rate": 4.1369510053975174e-05, "loss": 0.6032, "step": 53000 }, { "epoch": 0.1729624791940144, "grad_norm": 0.6299027800559998, "learning_rate": 4.135322303765884e-05, "loss": 0.5717, "step": 53100 }, { "epoch": 0.17328820891001065, "grad_norm": 0.49232372641563416, "learning_rate": 4.133693602134251e-05, "loss": 0.6031, "step": 53200 }, { "epoch": 0.1736139386260069, "grad_norm": 0.7371428608894348, "learning_rate": 4.132064900502617e-05, "loss": 0.5608, "step": 53300 }, { "epoch": 0.17393966834200317, "grad_norm": 1.0730559825897217, "learning_rate": 4.1304361988709843e-05, "loss": 0.6026, "step": 53400 }, { "epoch": 0.17426539805799943, "grad_norm": 0.674548327922821, "learning_rate": 4.128807497239351e-05, "loss": 0.5721, "step": 53500 }, { "epoch": 0.17459112777399569, "grad_norm": 0.5990965962409973, "learning_rate": 4.1271787956077175e-05, "loss": 0.6185, "step": 53600 }, { "epoch": 0.17491685748999194, "grad_norm": 0.61868816614151, "learning_rate": 4.125550093976084e-05, "loss": 0.6089, "step": 53700 }, { "epoch": 0.1752425872059882, "grad_norm": 0.4897661507129669, "learning_rate": 4.1239213923444506e-05, "loss": 0.6025, "step": 53800 }, { "epoch": 0.17556831692198446, "grad_norm": 0.2856525480747223, "learning_rate": 4.122292690712818e-05, "loss": 0.5609, "step": 53900 }, { "epoch": 0.17589404663798075, "grad_norm": 0.5488519668579102, "learning_rate": 4.1206639890811845e-05, "loss": 0.5781, "step": 54000 }, { "epoch": 0.176219776353977, "grad_norm": 0.7812597155570984, "learning_rate": 4.119035287449551e-05, "loss": 0.665, "step": 54100 }, { "epoch": 0.17654550606997327, "grad_norm": 0.5567785501480103, "learning_rate": 4.1174065858179176e-05, "loss": 0.6178, "step": 54200 }, { "epoch": 0.17687123578596953, "grad_norm": 0.7302952408790588, "learning_rate": 4.115777884186285e-05, "loss": 0.5912, "step": 54300 }, { "epoch": 0.1771969655019658, "grad_norm": 0.6872962713241577, "learning_rate": 4.1141491825546514e-05, "loss": 0.5698, "step": 54400 }, { "epoch": 0.17752269521796205, "grad_norm": 0.6139744520187378, "learning_rate": 4.112520480923018e-05, "loss": 0.6148, "step": 54500 }, { "epoch": 0.1778484249339583, "grad_norm": 0.6646268367767334, "learning_rate": 4.1108917792913846e-05, "loss": 0.5222, "step": 54600 }, { "epoch": 0.17817415464995456, "grad_norm": 0.4842844009399414, "learning_rate": 4.109263077659751e-05, "loss": 0.6225, "step": 54700 }, { "epoch": 0.17849988436595082, "grad_norm": 0.6158716082572937, "learning_rate": 4.1076343760281184e-05, "loss": 0.634, "step": 54800 }, { "epoch": 0.17882561408194708, "grad_norm": 0.5122677683830261, "learning_rate": 4.106005674396485e-05, "loss": 0.6355, "step": 54900 }, { "epoch": 0.17915134379794334, "grad_norm": 0.6086121201515198, "learning_rate": 4.1043769727648515e-05, "loss": 0.5787, "step": 55000 }, { "epoch": 0.1794770735139396, "grad_norm": 0.5853461623191833, "learning_rate": 4.102748271133218e-05, "loss": 0.5935, "step": 55100 }, { "epoch": 0.17980280322993586, "grad_norm": 0.9216148853302002, "learning_rate": 4.101119569501585e-05, "loss": 0.575, "step": 55200 }, { "epoch": 0.18012853294593212, "grad_norm": 0.6602348685264587, "learning_rate": 4.099490867869952e-05, "loss": 0.6324, "step": 55300 }, { "epoch": 0.18045426266192838, "grad_norm": 0.7494210004806519, "learning_rate": 4.0978621662383185e-05, "loss": 0.5859, "step": 55400 }, { "epoch": 0.18077999237792464, "grad_norm": 0.6391832232475281, "learning_rate": 4.096233464606685e-05, "loss": 0.6172, "step": 55500 }, { "epoch": 0.1811057220939209, "grad_norm": 0.5824201107025146, "learning_rate": 4.0946047629750517e-05, "loss": 0.6298, "step": 55600 }, { "epoch": 0.18143145180991715, "grad_norm": 0.6924212574958801, "learning_rate": 4.092976061343419e-05, "loss": 0.6105, "step": 55700 }, { "epoch": 0.1817571815259134, "grad_norm": 0.4423877000808716, "learning_rate": 4.0913473597117855e-05, "loss": 0.5613, "step": 55800 }, { "epoch": 0.1820829112419097, "grad_norm": 0.6090314984321594, "learning_rate": 4.0897186580801514e-05, "loss": 0.6643, "step": 55900 }, { "epoch": 0.18240864095790596, "grad_norm": 0.7554407119750977, "learning_rate": 4.0880899564485186e-05, "loss": 0.6017, "step": 56000 }, { "epoch": 0.18273437067390222, "grad_norm": 0.8148972988128662, "learning_rate": 4.086461254816885e-05, "loss": 0.6539, "step": 56100 }, { "epoch": 0.18306010038989848, "grad_norm": 0.5610066652297974, "learning_rate": 4.0848325531852525e-05, "loss": 0.5872, "step": 56200 }, { "epoch": 0.18338583010589474, "grad_norm": 0.6361645460128784, "learning_rate": 4.0832038515536183e-05, "loss": 0.5815, "step": 56300 }, { "epoch": 0.183711559821891, "grad_norm": 0.4567771553993225, "learning_rate": 4.0815751499219856e-05, "loss": 0.5799, "step": 56400 }, { "epoch": 0.18403728953788726, "grad_norm": 0.8705578446388245, "learning_rate": 4.079946448290352e-05, "loss": 0.6088, "step": 56500 }, { "epoch": 0.18436301925388351, "grad_norm": 0.8278294801712036, "learning_rate": 4.078317746658719e-05, "loss": 0.6064, "step": 56600 }, { "epoch": 0.18468874896987977, "grad_norm": 0.38864201307296753, "learning_rate": 4.076689045027085e-05, "loss": 0.5705, "step": 56700 }, { "epoch": 0.18501447868587603, "grad_norm": 0.6986147165298462, "learning_rate": 4.075060343395452e-05, "loss": 0.6071, "step": 56800 }, { "epoch": 0.1853402084018723, "grad_norm": 0.9127377867698669, "learning_rate": 4.073431641763819e-05, "loss": 0.608, "step": 56900 }, { "epoch": 0.18566593811786855, "grad_norm": 0.5072229504585266, "learning_rate": 4.071802940132186e-05, "loss": 0.583, "step": 57000 }, { "epoch": 0.1859916678338648, "grad_norm": 0.47545337677001953, "learning_rate": 4.070174238500552e-05, "loss": 0.5826, "step": 57100 }, { "epoch": 0.18631739754986107, "grad_norm": 0.5175743103027344, "learning_rate": 4.068545536868919e-05, "loss": 0.6184, "step": 57200 }, { "epoch": 0.18664312726585733, "grad_norm": 0.7252177596092224, "learning_rate": 4.0669168352372854e-05, "loss": 0.6042, "step": 57300 }, { "epoch": 0.1869688569818536, "grad_norm": 0.21297673881053925, "learning_rate": 4.065288133605653e-05, "loss": 0.5874, "step": 57400 }, { "epoch": 0.18729458669784985, "grad_norm": 0.6985592246055603, "learning_rate": 4.063659431974019e-05, "loss": 0.5641, "step": 57500 }, { "epoch": 0.1876203164138461, "grad_norm": 0.35783612728118896, "learning_rate": 4.062030730342386e-05, "loss": 0.5743, "step": 57600 }, { "epoch": 0.1879460461298424, "grad_norm": 0.40871796011924744, "learning_rate": 4.0604020287107524e-05, "loss": 0.6418, "step": 57700 }, { "epoch": 0.18827177584583865, "grad_norm": 0.6412025094032288, "learning_rate": 4.0587733270791197e-05, "loss": 0.6048, "step": 57800 }, { "epoch": 0.1885975055618349, "grad_norm": 0.6944416165351868, "learning_rate": 4.057144625447486e-05, "loss": 0.5647, "step": 57900 }, { "epoch": 0.18892323527783117, "grad_norm": 0.8592963218688965, "learning_rate": 4.055515923815852e-05, "loss": 0.5703, "step": 58000 }, { "epoch": 0.18924896499382743, "grad_norm": 0.7240419983863831, "learning_rate": 4.0538872221842194e-05, "loss": 0.6025, "step": 58100 }, { "epoch": 0.1895746947098237, "grad_norm": 0.3861270546913147, "learning_rate": 4.052258520552586e-05, "loss": 0.5864, "step": 58200 }, { "epoch": 0.18990042442581995, "grad_norm": 0.6718447208404541, "learning_rate": 4.050629818920953e-05, "loss": 0.6139, "step": 58300 }, { "epoch": 0.1902261541418162, "grad_norm": 0.7049744129180908, "learning_rate": 4.049001117289319e-05, "loss": 0.5697, "step": 58400 }, { "epoch": 0.19055188385781247, "grad_norm": 0.39576876163482666, "learning_rate": 4.047372415657686e-05, "loss": 0.5987, "step": 58500 }, { "epoch": 0.19087761357380872, "grad_norm": 0.7814981341362, "learning_rate": 4.045743714026053e-05, "loss": 0.5715, "step": 58600 }, { "epoch": 0.19120334328980498, "grad_norm": 1.0083011388778687, "learning_rate": 4.0441150123944195e-05, "loss": 0.6355, "step": 58700 }, { "epoch": 0.19152907300580124, "grad_norm": 0.7083866596221924, "learning_rate": 4.042486310762786e-05, "loss": 0.6666, "step": 58800 }, { "epoch": 0.1918548027217975, "grad_norm": 0.4740765690803528, "learning_rate": 4.0408576091311526e-05, "loss": 0.5773, "step": 58900 }, { "epoch": 0.19218053243779376, "grad_norm": 0.3599790632724762, "learning_rate": 4.03922890749952e-05, "loss": 0.5916, "step": 59000 }, { "epoch": 0.19250626215379002, "grad_norm": 0.6107310652732849, "learning_rate": 4.0376002058678865e-05, "loss": 0.63, "step": 59100 }, { "epoch": 0.19283199186978628, "grad_norm": 0.6388813257217407, "learning_rate": 4.035971504236253e-05, "loss": 0.6197, "step": 59200 }, { "epoch": 0.19315772158578254, "grad_norm": 0.4137844145298004, "learning_rate": 4.0343428026046196e-05, "loss": 0.6185, "step": 59300 }, { "epoch": 0.1934834513017788, "grad_norm": 0.6289616823196411, "learning_rate": 4.032714100972986e-05, "loss": 0.6367, "step": 59400 }, { "epoch": 0.19380918101777508, "grad_norm": 0.7528841495513916, "learning_rate": 4.0310853993413534e-05, "loss": 0.5783, "step": 59500 }, { "epoch": 0.19413491073377134, "grad_norm": 0.7345238924026489, "learning_rate": 4.02945669770972e-05, "loss": 0.6378, "step": 59600 }, { "epoch": 0.1944606404497676, "grad_norm": 0.7652753591537476, "learning_rate": 4.0278279960780866e-05, "loss": 0.5419, "step": 59700 }, { "epoch": 0.19478637016576386, "grad_norm": 0.3726235032081604, "learning_rate": 4.026199294446453e-05, "loss": 0.5933, "step": 59800 }, { "epoch": 0.19511209988176012, "grad_norm": 0.475990355014801, "learning_rate": 4.0245705928148204e-05, "loss": 0.5421, "step": 59900 }, { "epoch": 0.19543782959775638, "grad_norm": 0.8618846535682678, "learning_rate": 4.022941891183187e-05, "loss": 0.6149, "step": 60000 }, { "epoch": 0.19576355931375264, "grad_norm": 0.3643835484981537, "learning_rate": 4.0213131895515535e-05, "loss": 0.5898, "step": 60100 }, { "epoch": 0.1960892890297489, "grad_norm": 0.6492701172828674, "learning_rate": 4.01968448791992e-05, "loss": 0.6115, "step": 60200 }, { "epoch": 0.19641501874574516, "grad_norm": 0.46400219202041626, "learning_rate": 4.018055786288287e-05, "loss": 0.6093, "step": 60300 }, { "epoch": 0.19674074846174142, "grad_norm": 0.6529611349105835, "learning_rate": 4.016427084656654e-05, "loss": 0.5663, "step": 60400 }, { "epoch": 0.19706647817773768, "grad_norm": 0.8332497477531433, "learning_rate": 4.0147983830250205e-05, "loss": 0.557, "step": 60500 }, { "epoch": 0.19739220789373393, "grad_norm": 0.43394774198532104, "learning_rate": 4.013169681393387e-05, "loss": 0.5864, "step": 60600 }, { "epoch": 0.1977179376097302, "grad_norm": 0.3713783919811249, "learning_rate": 4.0115409797617537e-05, "loss": 0.597, "step": 60700 }, { "epoch": 0.19804366732572645, "grad_norm": 0.5605040788650513, "learning_rate": 4.00991227813012e-05, "loss": 0.5965, "step": 60800 }, { "epoch": 0.1983693970417227, "grad_norm": 0.4591531455516815, "learning_rate": 4.0082835764984875e-05, "loss": 0.5718, "step": 60900 }, { "epoch": 0.19869512675771897, "grad_norm": 0.7599985003471375, "learning_rate": 4.0066548748668534e-05, "loss": 0.6088, "step": 61000 }, { "epoch": 0.19902085647371523, "grad_norm": 0.7234918475151062, "learning_rate": 4.0050261732352206e-05, "loss": 0.6022, "step": 61100 }, { "epoch": 0.1993465861897115, "grad_norm": 0.8344034552574158, "learning_rate": 4.003397471603587e-05, "loss": 0.5978, "step": 61200 }, { "epoch": 0.19967231590570778, "grad_norm": 0.7539324164390564, "learning_rate": 4.0017687699719544e-05, "loss": 0.5979, "step": 61300 }, { "epoch": 0.19999804562170403, "grad_norm": 0.7535436153411865, "learning_rate": 4.00014006834032e-05, "loss": 0.5632, "step": 61400 }, { "epoch": 0.2003237753377003, "grad_norm": 1.0253859758377075, "learning_rate": 3.998511366708687e-05, "loss": 0.6245, "step": 61500 }, { "epoch": 0.20064950505369655, "grad_norm": 0.8442240357398987, "learning_rate": 3.996882665077054e-05, "loss": 0.56, "step": 61600 }, { "epoch": 0.2009752347696928, "grad_norm": 0.7696794867515564, "learning_rate": 3.995253963445421e-05, "loss": 0.5525, "step": 61700 }, { "epoch": 0.20130096448568907, "grad_norm": 1.0839108228683472, "learning_rate": 3.993625261813787e-05, "loss": 0.576, "step": 61800 }, { "epoch": 0.20162669420168533, "grad_norm": 0.4837821125984192, "learning_rate": 3.991996560182154e-05, "loss": 0.6654, "step": 61900 }, { "epoch": 0.2019524239176816, "grad_norm": 0.8696286082267761, "learning_rate": 3.990367858550521e-05, "loss": 0.5237, "step": 62000 }, { "epoch": 0.20227815363367785, "grad_norm": 0.5389662384986877, "learning_rate": 3.988739156918888e-05, "loss": 0.5765, "step": 62100 }, { "epoch": 0.2026038833496741, "grad_norm": 0.39996546506881714, "learning_rate": 3.987110455287254e-05, "loss": 0.5666, "step": 62200 }, { "epoch": 0.20292961306567037, "grad_norm": 0.5612654685974121, "learning_rate": 3.985481753655621e-05, "loss": 0.5975, "step": 62300 }, { "epoch": 0.20325534278166663, "grad_norm": 0.4764688014984131, "learning_rate": 3.9838530520239874e-05, "loss": 0.5973, "step": 62400 }, { "epoch": 0.20358107249766288, "grad_norm": 0.538745105266571, "learning_rate": 3.982224350392355e-05, "loss": 0.6108, "step": 62500 }, { "epoch": 0.20390680221365914, "grad_norm": 0.6589317321777344, "learning_rate": 3.980595648760721e-05, "loss": 0.5482, "step": 62600 }, { "epoch": 0.2042325319296554, "grad_norm": 0.8373557925224304, "learning_rate": 3.978966947129088e-05, "loss": 0.5671, "step": 62700 }, { "epoch": 0.20455826164565166, "grad_norm": 0.6305526494979858, "learning_rate": 3.9773382454974544e-05, "loss": 0.6205, "step": 62800 }, { "epoch": 0.20488399136164792, "grad_norm": 0.6550065875053406, "learning_rate": 3.9757095438658216e-05, "loss": 0.5805, "step": 62900 }, { "epoch": 0.20520972107764418, "grad_norm": 0.6951280236244202, "learning_rate": 3.974080842234188e-05, "loss": 0.6103, "step": 63000 }, { "epoch": 0.20553545079364044, "grad_norm": 0.5202652215957642, "learning_rate": 3.972452140602554e-05, "loss": 0.5623, "step": 63100 }, { "epoch": 0.20586118050963673, "grad_norm": 1.0889042615890503, "learning_rate": 3.9708234389709214e-05, "loss": 0.5879, "step": 63200 }, { "epoch": 0.20618691022563299, "grad_norm": 0.4142896234989166, "learning_rate": 3.969194737339288e-05, "loss": 0.6148, "step": 63300 }, { "epoch": 0.20651263994162924, "grad_norm": 0.6650342345237732, "learning_rate": 3.967566035707655e-05, "loss": 0.5902, "step": 63400 }, { "epoch": 0.2068383696576255, "grad_norm": 0.42452552914619446, "learning_rate": 3.965937334076021e-05, "loss": 0.4877, "step": 63500 }, { "epoch": 0.20716409937362176, "grad_norm": 0.6702756881713867, "learning_rate": 3.964308632444388e-05, "loss": 0.5943, "step": 63600 }, { "epoch": 0.20748982908961802, "grad_norm": 0.9007012248039246, "learning_rate": 3.962679930812755e-05, "loss": 0.5652, "step": 63700 }, { "epoch": 0.20781555880561428, "grad_norm": 0.8962705135345459, "learning_rate": 3.9610512291811215e-05, "loss": 0.5731, "step": 63800 }, { "epoch": 0.20814128852161054, "grad_norm": 0.8256299495697021, "learning_rate": 3.959422527549489e-05, "loss": 0.5596, "step": 63900 }, { "epoch": 0.2084670182376068, "grad_norm": 0.5674106478691101, "learning_rate": 3.9577938259178546e-05, "loss": 0.557, "step": 64000 }, { "epoch": 0.20879274795360306, "grad_norm": 0.564755916595459, "learning_rate": 3.956165124286222e-05, "loss": 0.5735, "step": 64100 }, { "epoch": 0.20911847766959932, "grad_norm": 1.0437874794006348, "learning_rate": 3.9545364226545884e-05, "loss": 0.5371, "step": 64200 }, { "epoch": 0.20944420738559558, "grad_norm": 0.877699077129364, "learning_rate": 3.952907721022956e-05, "loss": 0.538, "step": 64300 }, { "epoch": 0.20976993710159184, "grad_norm": 0.6481153964996338, "learning_rate": 3.9512790193913216e-05, "loss": 0.5763, "step": 64400 }, { "epoch": 0.2100956668175881, "grad_norm": 0.7963904142379761, "learning_rate": 3.949650317759688e-05, "loss": 0.5617, "step": 64500 }, { "epoch": 0.21042139653358435, "grad_norm": 1.1034698486328125, "learning_rate": 3.9480216161280554e-05, "loss": 0.5876, "step": 64600 }, { "epoch": 0.2107471262495806, "grad_norm": 0.7540128827095032, "learning_rate": 3.946392914496422e-05, "loss": 0.574, "step": 64700 }, { "epoch": 0.21107285596557687, "grad_norm": 0.7184910178184509, "learning_rate": 3.9447642128647886e-05, "loss": 0.5328, "step": 64800 }, { "epoch": 0.21139858568157313, "grad_norm": 0.7150009274482727, "learning_rate": 3.943135511233155e-05, "loss": 0.6049, "step": 64900 }, { "epoch": 0.21172431539756942, "grad_norm": 0.4451941251754761, "learning_rate": 3.9415068096015224e-05, "loss": 0.5958, "step": 65000 }, { "epoch": 0.21205004511356568, "grad_norm": 1.00858736038208, "learning_rate": 3.939878107969889e-05, "loss": 0.5752, "step": 65100 }, { "epoch": 0.21237577482956194, "grad_norm": 0.7953845858573914, "learning_rate": 3.9382494063382555e-05, "loss": 0.5555, "step": 65200 }, { "epoch": 0.2127015045455582, "grad_norm": 0.5992127060890198, "learning_rate": 3.936620704706622e-05, "loss": 0.59, "step": 65300 }, { "epoch": 0.21302723426155445, "grad_norm": 0.5878809690475464, "learning_rate": 3.934992003074989e-05, "loss": 0.5881, "step": 65400 }, { "epoch": 0.2133529639775507, "grad_norm": 0.9159529805183411, "learning_rate": 3.933363301443356e-05, "loss": 0.5951, "step": 65500 }, { "epoch": 0.21367869369354697, "grad_norm": 0.6340069770812988, "learning_rate": 3.9317345998117225e-05, "loss": 0.5799, "step": 65600 }, { "epoch": 0.21400442340954323, "grad_norm": 0.8940368890762329, "learning_rate": 3.930105898180089e-05, "loss": 0.5273, "step": 65700 }, { "epoch": 0.2143301531255395, "grad_norm": 0.7908622622489929, "learning_rate": 3.9284771965484556e-05, "loss": 0.5472, "step": 65800 }, { "epoch": 0.21465588284153575, "grad_norm": 0.9964277744293213, "learning_rate": 3.926848494916822e-05, "loss": 0.5719, "step": 65900 }, { "epoch": 0.214981612557532, "grad_norm": 0.6497515439987183, "learning_rate": 3.9252197932851895e-05, "loss": 0.5338, "step": 66000 }, { "epoch": 0.21530734227352827, "grad_norm": 0.8303185105323792, "learning_rate": 3.9235910916535554e-05, "loss": 0.5237, "step": 66100 }, { "epoch": 0.21563307198952453, "grad_norm": 0.8530830144882202, "learning_rate": 3.9219623900219226e-05, "loss": 0.5328, "step": 66200 }, { "epoch": 0.2159588017055208, "grad_norm": 0.9482616782188416, "learning_rate": 3.920333688390289e-05, "loss": 0.5548, "step": 66300 }, { "epoch": 0.21628453142151705, "grad_norm": 0.430633008480072, "learning_rate": 3.9187049867586564e-05, "loss": 0.551, "step": 66400 }, { "epoch": 0.2166102611375133, "grad_norm": 0.5612674355506897, "learning_rate": 3.917076285127022e-05, "loss": 0.5571, "step": 66500 }, { "epoch": 0.21693599085350956, "grad_norm": 0.7157821655273438, "learning_rate": 3.915447583495389e-05, "loss": 0.555, "step": 66600 }, { "epoch": 0.21726172056950582, "grad_norm": 0.6013966202735901, "learning_rate": 3.913818881863756e-05, "loss": 0.585, "step": 66700 }, { "epoch": 0.2175874502855021, "grad_norm": 0.4616648554801941, "learning_rate": 3.912190180232123e-05, "loss": 0.5832, "step": 66800 }, { "epoch": 0.21791318000149837, "grad_norm": 0.6870980858802795, "learning_rate": 3.910561478600489e-05, "loss": 0.5944, "step": 66900 }, { "epoch": 0.21823890971749463, "grad_norm": 0.629490315914154, "learning_rate": 3.908932776968856e-05, "loss": 0.5279, "step": 67000 }, { "epoch": 0.2185646394334909, "grad_norm": 0.5478650331497192, "learning_rate": 3.907304075337223e-05, "loss": 0.5815, "step": 67100 }, { "epoch": 0.21889036914948715, "grad_norm": 0.6581255793571472, "learning_rate": 3.90567537370559e-05, "loss": 0.5661, "step": 67200 }, { "epoch": 0.2192160988654834, "grad_norm": 0.7738802433013916, "learning_rate": 3.904046672073956e-05, "loss": 0.5901, "step": 67300 }, { "epoch": 0.21954182858147966, "grad_norm": 0.5748447179794312, "learning_rate": 3.902417970442323e-05, "loss": 0.5813, "step": 67400 }, { "epoch": 0.21986755829747592, "grad_norm": 0.7152987718582153, "learning_rate": 3.9007892688106894e-05, "loss": 0.5359, "step": 67500 }, { "epoch": 0.22019328801347218, "grad_norm": 0.867574155330658, "learning_rate": 3.899160567179057e-05, "loss": 0.5419, "step": 67600 }, { "epoch": 0.22051901772946844, "grad_norm": 0.8477634787559509, "learning_rate": 3.897531865547423e-05, "loss": 0.5788, "step": 67700 }, { "epoch": 0.2208447474454647, "grad_norm": 0.7993571758270264, "learning_rate": 3.89590316391579e-05, "loss": 0.528, "step": 67800 }, { "epoch": 0.22117047716146096, "grad_norm": 0.6607359647750854, "learning_rate": 3.8942744622841564e-05, "loss": 0.5647, "step": 67900 }, { "epoch": 0.22149620687745722, "grad_norm": 0.6910780072212219, "learning_rate": 3.892645760652523e-05, "loss": 0.5418, "step": 68000 }, { "epoch": 0.22182193659345348, "grad_norm": 0.4793308675289154, "learning_rate": 3.89101705902089e-05, "loss": 0.5913, "step": 68100 }, { "epoch": 0.22214766630944974, "grad_norm": 0.7222141027450562, "learning_rate": 3.889388357389257e-05, "loss": 0.6128, "step": 68200 }, { "epoch": 0.222473396025446, "grad_norm": 0.43712884187698364, "learning_rate": 3.8877596557576233e-05, "loss": 0.583, "step": 68300 }, { "epoch": 0.22279912574144226, "grad_norm": 0.5187420845031738, "learning_rate": 3.88613095412599e-05, "loss": 0.5758, "step": 68400 }, { "epoch": 0.22312485545743851, "grad_norm": 0.5550572872161865, "learning_rate": 3.884502252494357e-05, "loss": 0.5269, "step": 68500 }, { "epoch": 0.22345058517343477, "grad_norm": 0.7551735639572144, "learning_rate": 3.882873550862724e-05, "loss": 0.6005, "step": 68600 }, { "epoch": 0.22377631488943106, "grad_norm": 0.7213869690895081, "learning_rate": 3.8812448492310896e-05, "loss": 0.5174, "step": 68700 }, { "epoch": 0.22410204460542732, "grad_norm": 0.6445099115371704, "learning_rate": 3.879616147599457e-05, "loss": 0.5501, "step": 68800 }, { "epoch": 0.22442777432142358, "grad_norm": 0.7937589883804321, "learning_rate": 3.8779874459678235e-05, "loss": 0.5598, "step": 68900 }, { "epoch": 0.22475350403741984, "grad_norm": 0.5327324271202087, "learning_rate": 3.876358744336191e-05, "loss": 0.531, "step": 69000 }, { "epoch": 0.2250792337534161, "grad_norm": 0.7627710103988647, "learning_rate": 3.8747300427045566e-05, "loss": 0.578, "step": 69100 }, { "epoch": 0.22540496346941236, "grad_norm": 0.5054932832717896, "learning_rate": 3.873101341072924e-05, "loss": 0.5905, "step": 69200 }, { "epoch": 0.22573069318540862, "grad_norm": 0.6468352675437927, "learning_rate": 3.8714726394412904e-05, "loss": 0.5931, "step": 69300 }, { "epoch": 0.22605642290140487, "grad_norm": 0.37974539399147034, "learning_rate": 3.869843937809657e-05, "loss": 0.5777, "step": 69400 }, { "epoch": 0.22638215261740113, "grad_norm": 0.8011950850486755, "learning_rate": 3.8682152361780236e-05, "loss": 0.5187, "step": 69500 }, { "epoch": 0.2267078823333974, "grad_norm": 0.40006023645401, "learning_rate": 3.86658653454639e-05, "loss": 0.5292, "step": 69600 }, { "epoch": 0.22703361204939365, "grad_norm": 0.42605412006378174, "learning_rate": 3.8649578329147574e-05, "loss": 0.5704, "step": 69700 }, { "epoch": 0.2273593417653899, "grad_norm": 0.820277988910675, "learning_rate": 3.863329131283124e-05, "loss": 0.5641, "step": 69800 }, { "epoch": 0.22768507148138617, "grad_norm": 0.6671209931373596, "learning_rate": 3.8617004296514905e-05, "loss": 0.5942, "step": 69900 }, { "epoch": 0.22801080119738243, "grad_norm": 0.7214267253875732, "learning_rate": 3.860071728019857e-05, "loss": 0.6078, "step": 70000 }, { "epoch": 0.2283365309133787, "grad_norm": 0.5705024003982544, "learning_rate": 3.858443026388224e-05, "loss": 0.5111, "step": 70100 }, { "epoch": 0.22866226062937495, "grad_norm": 0.7017680406570435, "learning_rate": 3.856814324756591e-05, "loss": 0.5386, "step": 70200 }, { "epoch": 0.2289879903453712, "grad_norm": 0.36700716614723206, "learning_rate": 3.8551856231249575e-05, "loss": 0.5947, "step": 70300 }, { "epoch": 0.22931372006136747, "grad_norm": 1.018539309501648, "learning_rate": 3.853556921493324e-05, "loss": 0.5739, "step": 70400 }, { "epoch": 0.22963944977736375, "grad_norm": 0.8273037672042847, "learning_rate": 3.851928219861691e-05, "loss": 0.5247, "step": 70500 }, { "epoch": 0.22996517949336, "grad_norm": 1.0655425786972046, "learning_rate": 3.850299518230058e-05, "loss": 0.5397, "step": 70600 }, { "epoch": 0.23029090920935627, "grad_norm": 0.38495421409606934, "learning_rate": 3.8486708165984245e-05, "loss": 0.5844, "step": 70700 }, { "epoch": 0.23061663892535253, "grad_norm": 0.9659711122512817, "learning_rate": 3.847042114966791e-05, "loss": 0.5873, "step": 70800 }, { "epoch": 0.2309423686413488, "grad_norm": 0.7230137586593628, "learning_rate": 3.8454134133351576e-05, "loss": 0.593, "step": 70900 }, { "epoch": 0.23126809835734505, "grad_norm": 0.9325969219207764, "learning_rate": 3.843784711703524e-05, "loss": 0.5965, "step": 71000 }, { "epoch": 0.2315938280733413, "grad_norm": 0.6791651248931885, "learning_rate": 3.8421560100718915e-05, "loss": 0.6223, "step": 71100 }, { "epoch": 0.23191955778933757, "grad_norm": 0.8241651058197021, "learning_rate": 3.8405273084402573e-05, "loss": 0.5257, "step": 71200 }, { "epoch": 0.23224528750533383, "grad_norm": 0.8813059329986572, "learning_rate": 3.8388986068086246e-05, "loss": 0.5965, "step": 71300 }, { "epoch": 0.23257101722133008, "grad_norm": 0.7717010378837585, "learning_rate": 3.837269905176991e-05, "loss": 0.5502, "step": 71400 }, { "epoch": 0.23289674693732634, "grad_norm": 0.39482927322387695, "learning_rate": 3.8356412035453584e-05, "loss": 0.5618, "step": 71500 }, { "epoch": 0.2332224766533226, "grad_norm": 0.8985998630523682, "learning_rate": 3.834012501913724e-05, "loss": 0.5247, "step": 71600 }, { "epoch": 0.23354820636931886, "grad_norm": 0.4451032876968384, "learning_rate": 3.832383800282091e-05, "loss": 0.565, "step": 71700 }, { "epoch": 0.23387393608531512, "grad_norm": 0.46427956223487854, "learning_rate": 3.830755098650458e-05, "loss": 0.5511, "step": 71800 }, { "epoch": 0.23419966580131138, "grad_norm": 1.1371232271194458, "learning_rate": 3.829126397018825e-05, "loss": 0.5867, "step": 71900 }, { "epoch": 0.23452539551730764, "grad_norm": 0.5856015086174011, "learning_rate": 3.827497695387191e-05, "loss": 0.5425, "step": 72000 }, { "epoch": 0.2348511252333039, "grad_norm": 0.5723338723182678, "learning_rate": 3.825868993755558e-05, "loss": 0.5828, "step": 72100 }, { "epoch": 0.23517685494930016, "grad_norm": 0.6274189352989197, "learning_rate": 3.824240292123925e-05, "loss": 0.4961, "step": 72200 }, { "epoch": 0.23550258466529644, "grad_norm": 0.5841485857963562, "learning_rate": 3.822611590492292e-05, "loss": 0.5639, "step": 72300 }, { "epoch": 0.2358283143812927, "grad_norm": 0.9061130285263062, "learning_rate": 3.820982888860658e-05, "loss": 0.5126, "step": 72400 }, { "epoch": 0.23615404409728896, "grad_norm": 0.9499684572219849, "learning_rate": 3.819354187229025e-05, "loss": 0.5684, "step": 72500 }, { "epoch": 0.23647977381328522, "grad_norm": 0.7132393717765808, "learning_rate": 3.8177254855973914e-05, "loss": 0.5287, "step": 72600 }, { "epoch": 0.23680550352928148, "grad_norm": 0.8645475506782532, "learning_rate": 3.8160967839657587e-05, "loss": 0.564, "step": 72700 }, { "epoch": 0.23713123324527774, "grad_norm": 0.8675580024719238, "learning_rate": 3.814468082334125e-05, "loss": 0.5435, "step": 72800 }, { "epoch": 0.237456962961274, "grad_norm": 0.7194923162460327, "learning_rate": 3.812839380702492e-05, "loss": 0.5843, "step": 72900 }, { "epoch": 0.23778269267727026, "grad_norm": 0.782618522644043, "learning_rate": 3.8112106790708584e-05, "loss": 0.5609, "step": 73000 }, { "epoch": 0.23810842239326652, "grad_norm": 0.6671516299247742, "learning_rate": 3.809581977439225e-05, "loss": 0.4925, "step": 73100 }, { "epoch": 0.23843415210926278, "grad_norm": 0.8488081097602844, "learning_rate": 3.807953275807592e-05, "loss": 0.5536, "step": 73200 }, { "epoch": 0.23875988182525903, "grad_norm": 0.7259848117828369, "learning_rate": 3.806324574175959e-05, "loss": 0.5372, "step": 73300 }, { "epoch": 0.2390856115412553, "grad_norm": 0.5849174857139587, "learning_rate": 3.8046958725443253e-05, "loss": 0.5602, "step": 73400 }, { "epoch": 0.23941134125725155, "grad_norm": 0.36567142605781555, "learning_rate": 3.803067170912692e-05, "loss": 0.5976, "step": 73500 }, { "epoch": 0.2397370709732478, "grad_norm": 0.8540560007095337, "learning_rate": 3.801438469281059e-05, "loss": 0.576, "step": 73600 }, { "epoch": 0.24006280068924407, "grad_norm": 0.7733421921730042, "learning_rate": 3.799809767649426e-05, "loss": 0.5446, "step": 73700 }, { "epoch": 0.24038853040524033, "grad_norm": 0.6541240811347961, "learning_rate": 3.7981810660177916e-05, "loss": 0.5302, "step": 73800 }, { "epoch": 0.2407142601212366, "grad_norm": 0.6777580976486206, "learning_rate": 3.796552364386159e-05, "loss": 0.5742, "step": 73900 }, { "epoch": 0.24103998983723285, "grad_norm": 1.1045103073120117, "learning_rate": 3.7949236627545255e-05, "loss": 0.5391, "step": 74000 }, { "epoch": 0.2413657195532291, "grad_norm": 1.223781943321228, "learning_rate": 3.793294961122893e-05, "loss": 0.5754, "step": 74100 }, { "epoch": 0.2416914492692254, "grad_norm": 0.7645404934883118, "learning_rate": 3.7916662594912586e-05, "loss": 0.5424, "step": 74200 }, { "epoch": 0.24201717898522165, "grad_norm": 0.8637171983718872, "learning_rate": 3.790037557859626e-05, "loss": 0.5577, "step": 74300 }, { "epoch": 0.2423429087012179, "grad_norm": 0.633642315864563, "learning_rate": 3.7884088562279924e-05, "loss": 0.5513, "step": 74400 }, { "epoch": 0.24266863841721417, "grad_norm": 0.48609936237335205, "learning_rate": 3.786780154596359e-05, "loss": 0.6002, "step": 74500 }, { "epoch": 0.24299436813321043, "grad_norm": 0.3668748140335083, "learning_rate": 3.7851514529647256e-05, "loss": 0.5947, "step": 74600 }, { "epoch": 0.2433200978492067, "grad_norm": 0.735894501209259, "learning_rate": 3.783522751333092e-05, "loss": 0.5862, "step": 74700 }, { "epoch": 0.24364582756520295, "grad_norm": 0.8264063000679016, "learning_rate": 3.7818940497014594e-05, "loss": 0.5749, "step": 74800 }, { "epoch": 0.2439715572811992, "grad_norm": 0.482183575630188, "learning_rate": 3.780265348069826e-05, "loss": 0.5553, "step": 74900 }, { "epoch": 0.24429728699719547, "grad_norm": 0.6649850606918335, "learning_rate": 3.7786366464381925e-05, "loss": 0.6042, "step": 75000 }, { "epoch": 0.24462301671319173, "grad_norm": 0.5215208530426025, "learning_rate": 3.777007944806559e-05, "loss": 0.5134, "step": 75100 }, { "epoch": 0.24494874642918799, "grad_norm": 0.6028915643692017, "learning_rate": 3.775379243174926e-05, "loss": 0.5, "step": 75200 }, { "epoch": 0.24527447614518424, "grad_norm": 0.5038050413131714, "learning_rate": 3.773750541543293e-05, "loss": 0.6081, "step": 75300 }, { "epoch": 0.2456002058611805, "grad_norm": 0.568586528301239, "learning_rate": 3.7721218399116595e-05, "loss": 0.5484, "step": 75400 }, { "epoch": 0.24592593557717676, "grad_norm": 0.4442402720451355, "learning_rate": 3.770493138280026e-05, "loss": 0.5983, "step": 75500 }, { "epoch": 0.24625166529317302, "grad_norm": 0.775284469127655, "learning_rate": 3.7688644366483927e-05, "loss": 0.549, "step": 75600 }, { "epoch": 0.24657739500916928, "grad_norm": 0.7132833003997803, "learning_rate": 3.76723573501676e-05, "loss": 0.5317, "step": 75700 }, { "epoch": 0.24690312472516554, "grad_norm": 0.7935360074043274, "learning_rate": 3.7656070333851265e-05, "loss": 0.5389, "step": 75800 }, { "epoch": 0.2472288544411618, "grad_norm": 0.5749487280845642, "learning_rate": 3.7639783317534924e-05, "loss": 0.5918, "step": 75900 }, { "epoch": 0.2475545841571581, "grad_norm": 0.6536827087402344, "learning_rate": 3.7623496301218596e-05, "loss": 0.5245, "step": 76000 }, { "epoch": 0.24788031387315435, "grad_norm": 0.7014347314834595, "learning_rate": 3.760720928490226e-05, "loss": 0.5661, "step": 76100 }, { "epoch": 0.2482060435891506, "grad_norm": 0.8436623811721802, "learning_rate": 3.7590922268585934e-05, "loss": 0.5714, "step": 76200 }, { "epoch": 0.24853177330514686, "grad_norm": 0.6371897459030151, "learning_rate": 3.7574635252269593e-05, "loss": 0.5767, "step": 76300 }, { "epoch": 0.24885750302114312, "grad_norm": 0.7796430587768555, "learning_rate": 3.7558348235953266e-05, "loss": 0.5308, "step": 76400 }, { "epoch": 0.24918323273713938, "grad_norm": 0.6565324664115906, "learning_rate": 3.754206121963693e-05, "loss": 0.5377, "step": 76500 }, { "epoch": 0.24950896245313564, "grad_norm": 0.6670543551445007, "learning_rate": 3.75257742033206e-05, "loss": 0.6095, "step": 76600 }, { "epoch": 0.2498346921691319, "grad_norm": 0.8650514483451843, "learning_rate": 3.750948718700426e-05, "loss": 0.5586, "step": 76700 }, { "epoch": 0.25016042188512816, "grad_norm": 0.42015933990478516, "learning_rate": 3.749320017068793e-05, "loss": 0.5274, "step": 76800 }, { "epoch": 0.2504861516011244, "grad_norm": 0.5667533278465271, "learning_rate": 3.74769131543716e-05, "loss": 0.5628, "step": 76900 }, { "epoch": 0.2508118813171207, "grad_norm": 0.6887187361717224, "learning_rate": 3.746062613805527e-05, "loss": 0.5663, "step": 77000 }, { "epoch": 0.25113761103311694, "grad_norm": 0.4367005527019501, "learning_rate": 3.744433912173893e-05, "loss": 0.5368, "step": 77100 }, { "epoch": 0.2514633407491132, "grad_norm": 0.3392166197299957, "learning_rate": 3.74280521054226e-05, "loss": 0.5353, "step": 77200 }, { "epoch": 0.25178907046510945, "grad_norm": 0.5449352860450745, "learning_rate": 3.7411765089106264e-05, "loss": 0.5611, "step": 77300 }, { "epoch": 0.2521148001811057, "grad_norm": 0.6924061179161072, "learning_rate": 3.739547807278994e-05, "loss": 0.5918, "step": 77400 }, { "epoch": 0.252440529897102, "grad_norm": 0.8356592655181885, "learning_rate": 3.73791910564736e-05, "loss": 0.5713, "step": 77500 }, { "epoch": 0.25276625961309823, "grad_norm": 0.9207838177680969, "learning_rate": 3.736290404015727e-05, "loss": 0.5078, "step": 77600 }, { "epoch": 0.2530919893290945, "grad_norm": 0.6466575860977173, "learning_rate": 3.7346617023840934e-05, "loss": 0.5274, "step": 77700 }, { "epoch": 0.25341771904509075, "grad_norm": 0.5351524353027344, "learning_rate": 3.7330330007524606e-05, "loss": 0.5411, "step": 77800 }, { "epoch": 0.253743448761087, "grad_norm": 0.7786761522293091, "learning_rate": 3.731404299120827e-05, "loss": 0.4859, "step": 77900 }, { "epoch": 0.25406917847708327, "grad_norm": 0.6750699281692505, "learning_rate": 3.729775597489194e-05, "loss": 0.5689, "step": 78000 }, { "epoch": 0.2543949081930795, "grad_norm": 0.7088775038719177, "learning_rate": 3.7281468958575604e-05, "loss": 0.5325, "step": 78100 }, { "epoch": 0.2547206379090758, "grad_norm": 0.8920672535896301, "learning_rate": 3.726518194225927e-05, "loss": 0.5284, "step": 78200 }, { "epoch": 0.25504636762507205, "grad_norm": 0.6582838296890259, "learning_rate": 3.724889492594294e-05, "loss": 0.511, "step": 78300 }, { "epoch": 0.2553720973410683, "grad_norm": 0.6662094593048096, "learning_rate": 3.723260790962661e-05, "loss": 0.5618, "step": 78400 }, { "epoch": 0.25569782705706456, "grad_norm": 0.4346591830253601, "learning_rate": 3.721632089331027e-05, "loss": 0.54, "step": 78500 }, { "epoch": 0.2560235567730608, "grad_norm": 0.7967207431793213, "learning_rate": 3.720003387699394e-05, "loss": 0.5884, "step": 78600 }, { "epoch": 0.25634928648905714, "grad_norm": 0.4879821538925171, "learning_rate": 3.7183746860677605e-05, "loss": 0.5557, "step": 78700 }, { "epoch": 0.2566750162050534, "grad_norm": 0.5626016855239868, "learning_rate": 3.716745984436128e-05, "loss": 0.498, "step": 78800 }, { "epoch": 0.25700074592104966, "grad_norm": 0.5859974026679993, "learning_rate": 3.7151172828044936e-05, "loss": 0.5218, "step": 78900 }, { "epoch": 0.2573264756370459, "grad_norm": 0.7462596893310547, "learning_rate": 3.713488581172861e-05, "loss": 0.5093, "step": 79000 }, { "epoch": 0.2576522053530422, "grad_norm": 0.9555974006652832, "learning_rate": 3.7118598795412274e-05, "loss": 0.5348, "step": 79100 }, { "epoch": 0.25797793506903843, "grad_norm": 0.7466504573822021, "learning_rate": 3.710231177909595e-05, "loss": 0.5383, "step": 79200 }, { "epoch": 0.2583036647850347, "grad_norm": 0.8801865577697754, "learning_rate": 3.7086024762779606e-05, "loss": 0.4767, "step": 79300 }, { "epoch": 0.25862939450103095, "grad_norm": 0.48174184560775757, "learning_rate": 3.706973774646328e-05, "loss": 0.5528, "step": 79400 }, { "epoch": 0.2589551242170272, "grad_norm": 0.7198649048805237, "learning_rate": 3.7053450730146944e-05, "loss": 0.5953, "step": 79500 }, { "epoch": 0.25928085393302347, "grad_norm": 0.4515075385570526, "learning_rate": 3.703716371383061e-05, "loss": 0.5505, "step": 79600 }, { "epoch": 0.25960658364901973, "grad_norm": 0.706524670124054, "learning_rate": 3.7020876697514276e-05, "loss": 0.6011, "step": 79700 }, { "epoch": 0.259932313365016, "grad_norm": 0.6895307302474976, "learning_rate": 3.700458968119794e-05, "loss": 0.5188, "step": 79800 }, { "epoch": 0.26025804308101225, "grad_norm": 0.7927341461181641, "learning_rate": 3.6988302664881614e-05, "loss": 0.5739, "step": 79900 }, { "epoch": 0.2605837727970085, "grad_norm": 0.8496550917625427, "learning_rate": 3.697201564856528e-05, "loss": 0.5152, "step": 80000 }, { "epoch": 0.26090950251300477, "grad_norm": 0.47138693928718567, "learning_rate": 3.6955728632248945e-05, "loss": 0.5475, "step": 80100 }, { "epoch": 0.261235232229001, "grad_norm": 0.8020485639572144, "learning_rate": 3.693944161593261e-05, "loss": 0.5489, "step": 80200 }, { "epoch": 0.2615609619449973, "grad_norm": 0.6385429501533508, "learning_rate": 3.692315459961628e-05, "loss": 0.5457, "step": 80300 }, { "epoch": 0.26188669166099354, "grad_norm": 0.6027743220329285, "learning_rate": 3.690686758329995e-05, "loss": 0.5412, "step": 80400 }, { "epoch": 0.2622124213769898, "grad_norm": 0.6040454506874084, "learning_rate": 3.6890580566983615e-05, "loss": 0.5348, "step": 80500 }, { "epoch": 0.26253815109298606, "grad_norm": 0.6697177290916443, "learning_rate": 3.687429355066728e-05, "loss": 0.509, "step": 80600 }, { "epoch": 0.2628638808089823, "grad_norm": 0.8428653478622437, "learning_rate": 3.6858006534350946e-05, "loss": 0.5505, "step": 80700 }, { "epoch": 0.2631896105249786, "grad_norm": 0.9421257972717285, "learning_rate": 3.684171951803462e-05, "loss": 0.5587, "step": 80800 }, { "epoch": 0.26351534024097484, "grad_norm": 0.7752894759178162, "learning_rate": 3.6825432501718285e-05, "loss": 0.5308, "step": 80900 }, { "epoch": 0.2638410699569711, "grad_norm": 0.9658520817756653, "learning_rate": 3.6809145485401944e-05, "loss": 0.5394, "step": 81000 }, { "epoch": 0.26416679967296736, "grad_norm": 0.3100132644176483, "learning_rate": 3.6792858469085616e-05, "loss": 0.5616, "step": 81100 }, { "epoch": 0.2644925293889636, "grad_norm": 1.0838834047317505, "learning_rate": 3.677657145276928e-05, "loss": 0.5374, "step": 81200 }, { "epoch": 0.2648182591049599, "grad_norm": 0.9311345219612122, "learning_rate": 3.6760284436452954e-05, "loss": 0.5353, "step": 81300 }, { "epoch": 0.26514398882095613, "grad_norm": 0.32365360856056213, "learning_rate": 3.674399742013661e-05, "loss": 0.5493, "step": 81400 }, { "epoch": 0.2654697185369524, "grad_norm": 0.6390203833580017, "learning_rate": 3.6727710403820286e-05, "loss": 0.5205, "step": 81500 }, { "epoch": 0.26579544825294865, "grad_norm": 0.6106113195419312, "learning_rate": 3.671142338750395e-05, "loss": 0.5161, "step": 81600 }, { "epoch": 0.2661211779689449, "grad_norm": 0.4415883421897888, "learning_rate": 3.669513637118762e-05, "loss": 0.5235, "step": 81700 }, { "epoch": 0.26644690768494117, "grad_norm": 0.8828484416007996, "learning_rate": 3.667884935487128e-05, "loss": 0.5214, "step": 81800 }, { "epoch": 0.26677263740093743, "grad_norm": 0.8186760544776917, "learning_rate": 3.666256233855495e-05, "loss": 0.5435, "step": 81900 }, { "epoch": 0.2670983671169337, "grad_norm": 0.43989554047584534, "learning_rate": 3.664627532223862e-05, "loss": 0.5653, "step": 82000 }, { "epoch": 0.26742409683292995, "grad_norm": 1.083422303199768, "learning_rate": 3.662998830592229e-05, "loss": 0.5338, "step": 82100 }, { "epoch": 0.2677498265489262, "grad_norm": 0.40522611141204834, "learning_rate": 3.661370128960596e-05, "loss": 0.4892, "step": 82200 }, { "epoch": 0.26807555626492247, "grad_norm": 0.7010061740875244, "learning_rate": 3.659741427328962e-05, "loss": 0.5372, "step": 82300 }, { "epoch": 0.2684012859809188, "grad_norm": 0.9971382021903992, "learning_rate": 3.6581127256973284e-05, "loss": 0.501, "step": 82400 }, { "epoch": 0.26872701569691504, "grad_norm": 0.5222276449203491, "learning_rate": 3.656484024065696e-05, "loss": 0.5194, "step": 82500 }, { "epoch": 0.2690527454129113, "grad_norm": 0.724824845790863, "learning_rate": 3.654855322434062e-05, "loss": 0.499, "step": 82600 }, { "epoch": 0.26937847512890756, "grad_norm": 0.48272421956062317, "learning_rate": 3.653226620802429e-05, "loss": 0.486, "step": 82700 }, { "epoch": 0.2697042048449038, "grad_norm": 0.8187432885169983, "learning_rate": 3.6515979191707954e-05, "loss": 0.5634, "step": 82800 }, { "epoch": 0.2700299345609001, "grad_norm": 0.46917855739593506, "learning_rate": 3.6499692175391626e-05, "loss": 0.5468, "step": 82900 }, { "epoch": 0.27035566427689633, "grad_norm": 0.5338607430458069, "learning_rate": 3.648340515907529e-05, "loss": 0.481, "step": 83000 }, { "epoch": 0.2706813939928926, "grad_norm": 0.5420836806297302, "learning_rate": 3.646711814275896e-05, "loss": 0.5391, "step": 83100 }, { "epoch": 0.27100712370888885, "grad_norm": 0.5124307870864868, "learning_rate": 3.6450831126442624e-05, "loss": 0.5446, "step": 83200 }, { "epoch": 0.2713328534248851, "grad_norm": 0.5944223403930664, "learning_rate": 3.643454411012629e-05, "loss": 0.5759, "step": 83300 }, { "epoch": 0.27165858314088137, "grad_norm": 1.1431384086608887, "learning_rate": 3.641825709380996e-05, "loss": 0.5416, "step": 83400 }, { "epoch": 0.27198431285687763, "grad_norm": 0.9613766670227051, "learning_rate": 3.640197007749363e-05, "loss": 0.521, "step": 83500 }, { "epoch": 0.2723100425728739, "grad_norm": 0.7477935552597046, "learning_rate": 3.638568306117729e-05, "loss": 0.558, "step": 83600 }, { "epoch": 0.27263577228887015, "grad_norm": 0.47112804651260376, "learning_rate": 3.636939604486096e-05, "loss": 0.5083, "step": 83700 }, { "epoch": 0.2729615020048664, "grad_norm": 0.5914379954338074, "learning_rate": 3.6353109028544625e-05, "loss": 0.5776, "step": 83800 }, { "epoch": 0.27328723172086267, "grad_norm": 0.5500662326812744, "learning_rate": 3.63368220122283e-05, "loss": 0.5194, "step": 83900 }, { "epoch": 0.2736129614368589, "grad_norm": 0.41591793298721313, "learning_rate": 3.6320534995911956e-05, "loss": 0.5266, "step": 84000 }, { "epoch": 0.2739386911528552, "grad_norm": 1.080356478691101, "learning_rate": 3.630424797959563e-05, "loss": 0.4964, "step": 84100 }, { "epoch": 0.27426442086885144, "grad_norm": 0.40892690420150757, "learning_rate": 3.6287960963279294e-05, "loss": 0.5163, "step": 84200 }, { "epoch": 0.2745901505848477, "grad_norm": 0.7729841470718384, "learning_rate": 3.627167394696297e-05, "loss": 0.5336, "step": 84300 }, { "epoch": 0.27491588030084396, "grad_norm": 0.6264617443084717, "learning_rate": 3.6255386930646626e-05, "loss": 0.5762, "step": 84400 }, { "epoch": 0.2752416100168402, "grad_norm": 0.8050372004508972, "learning_rate": 3.623909991433029e-05, "loss": 0.4509, "step": 84500 }, { "epoch": 0.2755673397328365, "grad_norm": 0.621804416179657, "learning_rate": 3.6222812898013964e-05, "loss": 0.5174, "step": 84600 }, { "epoch": 0.27589306944883274, "grad_norm": 0.5717790126800537, "learning_rate": 3.620652588169763e-05, "loss": 0.5431, "step": 84700 }, { "epoch": 0.276218799164829, "grad_norm": 0.394345223903656, "learning_rate": 3.6190238865381295e-05, "loss": 0.5294, "step": 84800 }, { "epoch": 0.27654452888082526, "grad_norm": 0.8917814493179321, "learning_rate": 3.617395184906496e-05, "loss": 0.4955, "step": 84900 }, { "epoch": 0.2768702585968215, "grad_norm": 0.721481442451477, "learning_rate": 3.6157664832748634e-05, "loss": 0.5433, "step": 85000 }, { "epoch": 0.2771959883128178, "grad_norm": 0.6476948857307434, "learning_rate": 3.61413778164323e-05, "loss": 0.563, "step": 85100 }, { "epoch": 0.27752171802881404, "grad_norm": 0.38036003708839417, "learning_rate": 3.6125090800115965e-05, "loss": 0.516, "step": 85200 }, { "epoch": 0.2778474477448103, "grad_norm": 0.6185033917427063, "learning_rate": 3.610880378379963e-05, "loss": 0.5178, "step": 85300 }, { "epoch": 0.27817317746080655, "grad_norm": 0.8313725590705872, "learning_rate": 3.60925167674833e-05, "loss": 0.5296, "step": 85400 }, { "epoch": 0.2784989071768028, "grad_norm": 0.5369439721107483, "learning_rate": 3.607622975116697e-05, "loss": 0.5803, "step": 85500 }, { "epoch": 0.27882463689279907, "grad_norm": 0.7777513265609741, "learning_rate": 3.6059942734850635e-05, "loss": 0.4875, "step": 85600 }, { "epoch": 0.27915036660879533, "grad_norm": 0.5527925491333008, "learning_rate": 3.60436557185343e-05, "loss": 0.5141, "step": 85700 }, { "epoch": 0.2794760963247916, "grad_norm": 0.8335199356079102, "learning_rate": 3.6027368702217966e-05, "loss": 0.4851, "step": 85800 }, { "epoch": 0.27980182604078785, "grad_norm": 0.7015230059623718, "learning_rate": 3.601108168590163e-05, "loss": 0.5395, "step": 85900 }, { "epoch": 0.2801275557567841, "grad_norm": 0.7245033979415894, "learning_rate": 3.5994794669585305e-05, "loss": 0.5204, "step": 86000 }, { "epoch": 0.2804532854727804, "grad_norm": 0.8472508192062378, "learning_rate": 3.5978507653268964e-05, "loss": 0.5087, "step": 86100 }, { "epoch": 0.2807790151887767, "grad_norm": 0.7517431974411011, "learning_rate": 3.5962220636952636e-05, "loss": 0.5176, "step": 86200 }, { "epoch": 0.28110474490477294, "grad_norm": 0.5864343643188477, "learning_rate": 3.59459336206363e-05, "loss": 0.5828, "step": 86300 }, { "epoch": 0.2814304746207692, "grad_norm": 0.8981267809867859, "learning_rate": 3.5929646604319974e-05, "loss": 0.5309, "step": 86400 }, { "epoch": 0.28175620433676546, "grad_norm": 0.8167164325714111, "learning_rate": 3.591335958800364e-05, "loss": 0.5513, "step": 86500 }, { "epoch": 0.2820819340527617, "grad_norm": 0.7764830589294434, "learning_rate": 3.58970725716873e-05, "loss": 0.5249, "step": 86600 }, { "epoch": 0.282407663768758, "grad_norm": 0.7545201182365417, "learning_rate": 3.588078555537097e-05, "loss": 0.5293, "step": 86700 }, { "epoch": 0.28273339348475424, "grad_norm": 0.6954336166381836, "learning_rate": 3.586449853905464e-05, "loss": 0.5532, "step": 86800 }, { "epoch": 0.2830591232007505, "grad_norm": 0.6742025017738342, "learning_rate": 3.584821152273831e-05, "loss": 0.5356, "step": 86900 }, { "epoch": 0.28338485291674675, "grad_norm": 0.731679379940033, "learning_rate": 3.583192450642197e-05, "loss": 0.5128, "step": 87000 }, { "epoch": 0.283710582632743, "grad_norm": 0.7906468510627747, "learning_rate": 3.581563749010564e-05, "loss": 0.5359, "step": 87100 }, { "epoch": 0.2840363123487393, "grad_norm": 0.36753523349761963, "learning_rate": 3.579935047378931e-05, "loss": 0.5366, "step": 87200 }, { "epoch": 0.28436204206473553, "grad_norm": 0.6043976545333862, "learning_rate": 3.578306345747298e-05, "loss": 0.4995, "step": 87300 }, { "epoch": 0.2846877717807318, "grad_norm": 0.7573038339614868, "learning_rate": 3.576677644115664e-05, "loss": 0.5093, "step": 87400 }, { "epoch": 0.28501350149672805, "grad_norm": 0.25290992856025696, "learning_rate": 3.5750489424840304e-05, "loss": 0.4948, "step": 87500 }, { "epoch": 0.2853392312127243, "grad_norm": 0.6551434397697449, "learning_rate": 3.5734202408523977e-05, "loss": 0.5116, "step": 87600 }, { "epoch": 0.28566496092872057, "grad_norm": 0.6715214252471924, "learning_rate": 3.571791539220764e-05, "loss": 0.6104, "step": 87700 }, { "epoch": 0.2859906906447168, "grad_norm": 0.7275449633598328, "learning_rate": 3.570162837589131e-05, "loss": 0.506, "step": 87800 }, { "epoch": 0.2863164203607131, "grad_norm": 0.2885235846042633, "learning_rate": 3.5685341359574974e-05, "loss": 0.4684, "step": 87900 }, { "epoch": 0.28664215007670935, "grad_norm": 0.9342713356018066, "learning_rate": 3.5669054343258646e-05, "loss": 0.5293, "step": 88000 }, { "epoch": 0.2869678797927056, "grad_norm": 1.0423755645751953, "learning_rate": 3.565276732694231e-05, "loss": 0.5466, "step": 88100 }, { "epoch": 0.28729360950870186, "grad_norm": 1.0259456634521484, "learning_rate": 3.563648031062598e-05, "loss": 0.4885, "step": 88200 }, { "epoch": 0.2876193392246981, "grad_norm": 0.8733958601951599, "learning_rate": 3.5620193294309643e-05, "loss": 0.5353, "step": 88300 }, { "epoch": 0.2879450689406944, "grad_norm": 0.33869871497154236, "learning_rate": 3.560390627799331e-05, "loss": 0.5465, "step": 88400 }, { "epoch": 0.28827079865669064, "grad_norm": 0.5838894844055176, "learning_rate": 3.558761926167698e-05, "loss": 0.555, "step": 88500 }, { "epoch": 0.2885965283726869, "grad_norm": 0.8616543412208557, "learning_rate": 3.557133224536065e-05, "loss": 0.5173, "step": 88600 }, { "epoch": 0.28892225808868316, "grad_norm": 0.8486323356628418, "learning_rate": 3.555504522904431e-05, "loss": 0.5258, "step": 88700 }, { "epoch": 0.2892479878046794, "grad_norm": 0.6569567918777466, "learning_rate": 3.553875821272798e-05, "loss": 0.5097, "step": 88800 }, { "epoch": 0.2895737175206757, "grad_norm": 0.6821163296699524, "learning_rate": 3.5522471196411645e-05, "loss": 0.5428, "step": 88900 }, { "epoch": 0.28989944723667194, "grad_norm": 0.6147534251213074, "learning_rate": 3.550618418009532e-05, "loss": 0.5544, "step": 89000 }, { "epoch": 0.2902251769526682, "grad_norm": 0.42478904128074646, "learning_rate": 3.5489897163778976e-05, "loss": 0.5376, "step": 89100 }, { "epoch": 0.29055090666866445, "grad_norm": 0.5254961252212524, "learning_rate": 3.547361014746265e-05, "loss": 0.4964, "step": 89200 }, { "epoch": 0.2908766363846607, "grad_norm": 0.6934669017791748, "learning_rate": 3.5457323131146314e-05, "loss": 0.4835, "step": 89300 }, { "epoch": 0.291202366100657, "grad_norm": 0.4250465929508209, "learning_rate": 3.544103611482999e-05, "loss": 0.4954, "step": 89400 }, { "epoch": 0.29152809581665323, "grad_norm": 0.6067728996276855, "learning_rate": 3.5424749098513646e-05, "loss": 0.4926, "step": 89500 }, { "epoch": 0.2918538255326495, "grad_norm": 0.5424463748931885, "learning_rate": 3.540846208219731e-05, "loss": 0.5627, "step": 89600 }, { "epoch": 0.2921795552486458, "grad_norm": 0.5810889005661011, "learning_rate": 3.5392175065880984e-05, "loss": 0.4316, "step": 89700 }, { "epoch": 0.29250528496464206, "grad_norm": 0.4583912491798401, "learning_rate": 3.537588804956465e-05, "loss": 0.4987, "step": 89800 }, { "epoch": 0.2928310146806383, "grad_norm": 0.4320780634880066, "learning_rate": 3.5359601033248315e-05, "loss": 0.5204, "step": 89900 }, { "epoch": 0.2931567443966346, "grad_norm": 0.6955101490020752, "learning_rate": 3.534331401693198e-05, "loss": 0.5179, "step": 90000 }, { "epoch": 0.29348247411263084, "grad_norm": 0.512250542640686, "learning_rate": 3.5327027000615654e-05, "loss": 0.4909, "step": 90100 }, { "epoch": 0.2938082038286271, "grad_norm": 0.7975231409072876, "learning_rate": 3.531073998429932e-05, "loss": 0.4845, "step": 90200 }, { "epoch": 0.29413393354462336, "grad_norm": 0.25338149070739746, "learning_rate": 3.5294452967982985e-05, "loss": 0.4963, "step": 90300 }, { "epoch": 0.2944596632606196, "grad_norm": 0.43115437030792236, "learning_rate": 3.527816595166665e-05, "loss": 0.5203, "step": 90400 }, { "epoch": 0.2947853929766159, "grad_norm": 0.830754280090332, "learning_rate": 3.5261878935350317e-05, "loss": 0.4916, "step": 90500 }, { "epoch": 0.29511112269261214, "grad_norm": 0.8370751738548279, "learning_rate": 3.524559191903399e-05, "loss": 0.547, "step": 90600 }, { "epoch": 0.2954368524086084, "grad_norm": 0.7122400403022766, "learning_rate": 3.5229304902717655e-05, "loss": 0.5126, "step": 90700 }, { "epoch": 0.29576258212460466, "grad_norm": 0.4084763824939728, "learning_rate": 3.521301788640132e-05, "loss": 0.4971, "step": 90800 }, { "epoch": 0.2960883118406009, "grad_norm": 0.8079352974891663, "learning_rate": 3.5196730870084986e-05, "loss": 0.4992, "step": 90900 }, { "epoch": 0.2964140415565972, "grad_norm": 0.25352516770362854, "learning_rate": 3.518044385376865e-05, "loss": 0.5333, "step": 91000 }, { "epoch": 0.29673977127259343, "grad_norm": 0.5390329957008362, "learning_rate": 3.5164156837452324e-05, "loss": 0.5007, "step": 91100 }, { "epoch": 0.2970655009885897, "grad_norm": 0.6617804765701294, "learning_rate": 3.514786982113599e-05, "loss": 0.548, "step": 91200 }, { "epoch": 0.29739123070458595, "grad_norm": 0.7202132940292358, "learning_rate": 3.5131582804819656e-05, "loss": 0.5417, "step": 91300 }, { "epoch": 0.2977169604205822, "grad_norm": 0.28012895584106445, "learning_rate": 3.511529578850332e-05, "loss": 0.4883, "step": 91400 }, { "epoch": 0.29804269013657847, "grad_norm": 0.3527827560901642, "learning_rate": 3.5099008772186994e-05, "loss": 0.523, "step": 91500 }, { "epoch": 0.29836841985257473, "grad_norm": 0.7193790078163147, "learning_rate": 3.508272175587066e-05, "loss": 0.5148, "step": 91600 }, { "epoch": 0.298694149568571, "grad_norm": 0.9702345728874207, "learning_rate": 3.506643473955432e-05, "loss": 0.4781, "step": 91700 }, { "epoch": 0.29901987928456725, "grad_norm": 0.7323670983314514, "learning_rate": 3.505014772323799e-05, "loss": 0.5394, "step": 91800 }, { "epoch": 0.2993456090005635, "grad_norm": 0.6757960915565491, "learning_rate": 3.503386070692166e-05, "loss": 0.4984, "step": 91900 }, { "epoch": 0.29967133871655977, "grad_norm": 0.7119109630584717, "learning_rate": 3.501757369060533e-05, "loss": 0.5502, "step": 92000 }, { "epoch": 0.299997068432556, "grad_norm": 0.6820542216300964, "learning_rate": 3.500128667428899e-05, "loss": 0.5498, "step": 92100 }, { "epoch": 0.3003227981485523, "grad_norm": 0.784050703048706, "learning_rate": 3.498499965797266e-05, "loss": 0.5445, "step": 92200 }, { "epoch": 0.30064852786454854, "grad_norm": 0.6549366116523743, "learning_rate": 3.496871264165633e-05, "loss": 0.5326, "step": 92300 }, { "epoch": 0.3009742575805448, "grad_norm": 0.4872061014175415, "learning_rate": 3.495242562533999e-05, "loss": 0.5093, "step": 92400 }, { "epoch": 0.30129998729654106, "grad_norm": 0.3646996319293976, "learning_rate": 3.493613860902366e-05, "loss": 0.5476, "step": 92500 }, { "epoch": 0.3016257170125373, "grad_norm": 0.5709706544876099, "learning_rate": 3.4919851592707324e-05, "loss": 0.4513, "step": 92600 }, { "epoch": 0.3019514467285336, "grad_norm": 0.6031984090805054, "learning_rate": 3.4903564576390996e-05, "loss": 0.5044, "step": 92700 }, { "epoch": 0.30227717644452984, "grad_norm": 0.8381587862968445, "learning_rate": 3.488727756007466e-05, "loss": 0.5128, "step": 92800 }, { "epoch": 0.3026029061605261, "grad_norm": 1.0859401226043701, "learning_rate": 3.487099054375833e-05, "loss": 0.5328, "step": 92900 }, { "epoch": 0.30292863587652236, "grad_norm": 0.34642109274864197, "learning_rate": 3.4854703527441994e-05, "loss": 0.4852, "step": 93000 }, { "epoch": 0.3032543655925186, "grad_norm": 0.6529460549354553, "learning_rate": 3.483841651112566e-05, "loss": 0.5032, "step": 93100 }, { "epoch": 0.3035800953085149, "grad_norm": 0.7026881575584412, "learning_rate": 3.482212949480933e-05, "loss": 0.6338, "step": 93200 }, { "epoch": 0.30390582502451113, "grad_norm": 0.49741417169570923, "learning_rate": 3.4805842478493e-05, "loss": 0.5231, "step": 93300 }, { "epoch": 0.30423155474050745, "grad_norm": 0.6611301898956299, "learning_rate": 3.478955546217666e-05, "loss": 0.5189, "step": 93400 }, { "epoch": 0.3045572844565037, "grad_norm": 0.6907228827476501, "learning_rate": 3.477326844586033e-05, "loss": 0.5256, "step": 93500 }, { "epoch": 0.30488301417249997, "grad_norm": 0.5975654721260071, "learning_rate": 3.4756981429544e-05, "loss": 0.522, "step": 93600 }, { "epoch": 0.3052087438884962, "grad_norm": 0.6043006777763367, "learning_rate": 3.474069441322767e-05, "loss": 0.5018, "step": 93700 }, { "epoch": 0.3055344736044925, "grad_norm": 0.5697898864746094, "learning_rate": 3.4724407396911326e-05, "loss": 0.5009, "step": 93800 }, { "epoch": 0.30586020332048874, "grad_norm": 0.40364518761634827, "learning_rate": 3.4708120380595e-05, "loss": 0.4642, "step": 93900 }, { "epoch": 0.306185933036485, "grad_norm": 0.940877377986908, "learning_rate": 3.4691833364278664e-05, "loss": 0.5136, "step": 94000 }, { "epoch": 0.30651166275248126, "grad_norm": 0.7497209310531616, "learning_rate": 3.467554634796234e-05, "loss": 0.5261, "step": 94100 }, { "epoch": 0.3068373924684775, "grad_norm": 0.8120318651199341, "learning_rate": 3.4659259331645996e-05, "loss": 0.4756, "step": 94200 }, { "epoch": 0.3071631221844738, "grad_norm": 0.6802115440368652, "learning_rate": 3.464297231532967e-05, "loss": 0.5257, "step": 94300 }, { "epoch": 0.30748885190047004, "grad_norm": 0.43083488941192627, "learning_rate": 3.4626685299013334e-05, "loss": 0.5365, "step": 94400 }, { "epoch": 0.3078145816164663, "grad_norm": 0.6194273233413696, "learning_rate": 3.4610398282697e-05, "loss": 0.5157, "step": 94500 }, { "epoch": 0.30814031133246256, "grad_norm": 0.5603410601615906, "learning_rate": 3.4594111266380666e-05, "loss": 0.51, "step": 94600 }, { "epoch": 0.3084660410484588, "grad_norm": 1.0651506185531616, "learning_rate": 3.457782425006433e-05, "loss": 0.4759, "step": 94700 }, { "epoch": 0.3087917707644551, "grad_norm": 0.7674971222877502, "learning_rate": 3.4561537233748004e-05, "loss": 0.467, "step": 94800 }, { "epoch": 0.30911750048045133, "grad_norm": 0.9666951298713684, "learning_rate": 3.454525021743167e-05, "loss": 0.5524, "step": 94900 }, { "epoch": 0.3094432301964476, "grad_norm": 0.6148163080215454, "learning_rate": 3.4528963201115335e-05, "loss": 0.5345, "step": 95000 }, { "epoch": 0.30976895991244385, "grad_norm": 0.7641096711158752, "learning_rate": 3.4512676184799e-05, "loss": 0.4872, "step": 95100 }, { "epoch": 0.3100946896284401, "grad_norm": 0.6152538657188416, "learning_rate": 3.449638916848267e-05, "loss": 0.4832, "step": 95200 }, { "epoch": 0.31042041934443637, "grad_norm": 0.7761083841323853, "learning_rate": 3.448010215216634e-05, "loss": 0.4761, "step": 95300 }, { "epoch": 0.31074614906043263, "grad_norm": 0.6005348563194275, "learning_rate": 3.4463815135850005e-05, "loss": 0.4585, "step": 95400 }, { "epoch": 0.3110718787764289, "grad_norm": 0.7649496793746948, "learning_rate": 3.444752811953367e-05, "loss": 0.5283, "step": 95500 }, { "epoch": 0.31139760849242515, "grad_norm": 0.9503573179244995, "learning_rate": 3.4431241103217336e-05, "loss": 0.5032, "step": 95600 }, { "epoch": 0.3117233382084214, "grad_norm": 0.8403215408325195, "learning_rate": 3.441495408690101e-05, "loss": 0.5172, "step": 95700 }, { "epoch": 0.31204906792441767, "grad_norm": 0.5137957334518433, "learning_rate": 3.4398667070584675e-05, "loss": 0.5551, "step": 95800 }, { "epoch": 0.3123747976404139, "grad_norm": 0.6618998646736145, "learning_rate": 3.438238005426834e-05, "loss": 0.5237, "step": 95900 }, { "epoch": 0.3127005273564102, "grad_norm": 0.3272695541381836, "learning_rate": 3.4366093037952006e-05, "loss": 0.4556, "step": 96000 }, { "epoch": 0.31302625707240644, "grad_norm": 0.7416215538978577, "learning_rate": 3.434980602163567e-05, "loss": 0.5039, "step": 96100 }, { "epoch": 0.3133519867884027, "grad_norm": 0.9183087944984436, "learning_rate": 3.4333519005319344e-05, "loss": 0.5408, "step": 96200 }, { "epoch": 0.31367771650439896, "grad_norm": 0.3782617151737213, "learning_rate": 3.431723198900301e-05, "loss": 0.5113, "step": 96300 }, { "epoch": 0.3140034462203952, "grad_norm": 0.6314922571182251, "learning_rate": 3.4300944972686676e-05, "loss": 0.4955, "step": 96400 }, { "epoch": 0.3143291759363915, "grad_norm": 0.3009500801563263, "learning_rate": 3.428465795637034e-05, "loss": 0.5114, "step": 96500 }, { "epoch": 0.31465490565238774, "grad_norm": 0.8378229737281799, "learning_rate": 3.4268370940054014e-05, "loss": 0.5287, "step": 96600 }, { "epoch": 0.314980635368384, "grad_norm": 0.7249593138694763, "learning_rate": 3.425208392373768e-05, "loss": 0.5209, "step": 96700 }, { "epoch": 0.31530636508438026, "grad_norm": 0.45489412546157837, "learning_rate": 3.423579690742134e-05, "loss": 0.5745, "step": 96800 }, { "epoch": 0.3156320948003765, "grad_norm": 0.6379255056381226, "learning_rate": 3.421950989110501e-05, "loss": 0.5199, "step": 96900 }, { "epoch": 0.31595782451637283, "grad_norm": 0.8550392389297485, "learning_rate": 3.420322287478868e-05, "loss": 0.5374, "step": 97000 }, { "epoch": 0.3162835542323691, "grad_norm": 0.5571677684783936, "learning_rate": 3.418693585847235e-05, "loss": 0.5057, "step": 97100 }, { "epoch": 0.31660928394836535, "grad_norm": 0.48302140831947327, "learning_rate": 3.417064884215601e-05, "loss": 0.5496, "step": 97200 }, { "epoch": 0.3169350136643616, "grad_norm": 0.7864711284637451, "learning_rate": 3.415436182583968e-05, "loss": 0.5132, "step": 97300 }, { "epoch": 0.31726074338035787, "grad_norm": 0.5517250299453735, "learning_rate": 3.413807480952335e-05, "loss": 0.4826, "step": 97400 }, { "epoch": 0.3175864730963541, "grad_norm": 0.7834230065345764, "learning_rate": 3.412178779320701e-05, "loss": 0.5186, "step": 97500 }, { "epoch": 0.3179122028123504, "grad_norm": 0.938097357749939, "learning_rate": 3.410550077689068e-05, "loss": 0.4817, "step": 97600 }, { "epoch": 0.31823793252834665, "grad_norm": 0.25078582763671875, "learning_rate": 3.4089213760574344e-05, "loss": 0.4996, "step": 97700 }, { "epoch": 0.3185636622443429, "grad_norm": 0.7896013259887695, "learning_rate": 3.4072926744258016e-05, "loss": 0.5163, "step": 97800 }, { "epoch": 0.31888939196033916, "grad_norm": 0.6857266426086426, "learning_rate": 3.405663972794168e-05, "loss": 0.4952, "step": 97900 }, { "epoch": 0.3192151216763354, "grad_norm": 0.5710707306861877, "learning_rate": 3.404035271162535e-05, "loss": 0.5273, "step": 98000 }, { "epoch": 0.3195408513923317, "grad_norm": 0.5274339914321899, "learning_rate": 3.4024065695309014e-05, "loss": 0.5385, "step": 98100 }, { "epoch": 0.31986658110832794, "grad_norm": 0.27135804295539856, "learning_rate": 3.400777867899268e-05, "loss": 0.5042, "step": 98200 }, { "epoch": 0.3201923108243242, "grad_norm": 0.6852828860282898, "learning_rate": 3.399149166267635e-05, "loss": 0.5214, "step": 98300 }, { "epoch": 0.32051804054032046, "grad_norm": 0.5614081621170044, "learning_rate": 3.397520464636002e-05, "loss": 0.5023, "step": 98400 }, { "epoch": 0.3208437702563167, "grad_norm": 0.7719017863273621, "learning_rate": 3.395891763004368e-05, "loss": 0.4919, "step": 98500 }, { "epoch": 0.321169499972313, "grad_norm": 0.8100476264953613, "learning_rate": 3.394263061372735e-05, "loss": 0.4607, "step": 98600 }, { "epoch": 0.32149522968830924, "grad_norm": 0.6814531087875366, "learning_rate": 3.392634359741102e-05, "loss": 0.5457, "step": 98700 }, { "epoch": 0.3218209594043055, "grad_norm": 1.0356829166412354, "learning_rate": 3.391005658109469e-05, "loss": 0.4844, "step": 98800 }, { "epoch": 0.32214668912030175, "grad_norm": 0.8719603419303894, "learning_rate": 3.3893769564778346e-05, "loss": 0.5182, "step": 98900 }, { "epoch": 0.322472418836298, "grad_norm": 0.6145396828651428, "learning_rate": 3.387748254846202e-05, "loss": 0.4732, "step": 99000 }, { "epoch": 0.3227981485522943, "grad_norm": 1.005679726600647, "learning_rate": 3.3861195532145684e-05, "loss": 0.5182, "step": 99100 }, { "epoch": 0.32312387826829053, "grad_norm": 0.29751360416412354, "learning_rate": 3.384490851582936e-05, "loss": 0.4823, "step": 99200 }, { "epoch": 0.3234496079842868, "grad_norm": 0.7968891263008118, "learning_rate": 3.3828621499513016e-05, "loss": 0.5235, "step": 99300 }, { "epoch": 0.32377533770028305, "grad_norm": 0.7049364447593689, "learning_rate": 3.381233448319669e-05, "loss": 0.5392, "step": 99400 }, { "epoch": 0.3241010674162793, "grad_norm": 0.6265050172805786, "learning_rate": 3.3796047466880354e-05, "loss": 0.5119, "step": 99500 }, { "epoch": 0.32442679713227557, "grad_norm": 0.6732152104377747, "learning_rate": 3.377976045056402e-05, "loss": 0.4837, "step": 99600 }, { "epoch": 0.3247525268482718, "grad_norm": 0.25657424330711365, "learning_rate": 3.3763473434247686e-05, "loss": 0.5199, "step": 99700 }, { "epoch": 0.3250782565642681, "grad_norm": 0.4994146227836609, "learning_rate": 3.374718641793135e-05, "loss": 0.4894, "step": 99800 }, { "epoch": 0.32540398628026435, "grad_norm": 0.7468940615653992, "learning_rate": 3.3730899401615024e-05, "loss": 0.5409, "step": 99900 }, { "epoch": 0.3257297159962606, "grad_norm": 0.17829063534736633, "learning_rate": 3.371461238529869e-05, "loss": 0.5111, "step": 100000 }, { "epoch": 0.32605544571225686, "grad_norm": 0.6492403745651245, "learning_rate": 3.369832536898236e-05, "loss": 0.5085, "step": 100100 }, { "epoch": 0.3263811754282531, "grad_norm": 0.41203296184539795, "learning_rate": 3.368203835266602e-05, "loss": 0.4674, "step": 100200 }, { "epoch": 0.3267069051442494, "grad_norm": 0.6258901953697205, "learning_rate": 3.366575133634969e-05, "loss": 0.4797, "step": 100300 }, { "epoch": 0.32703263486024564, "grad_norm": 0.5243533849716187, "learning_rate": 3.364946432003336e-05, "loss": 0.4851, "step": 100400 }, { "epoch": 0.3273583645762419, "grad_norm": 0.7344015836715698, "learning_rate": 3.3633177303717025e-05, "loss": 0.4964, "step": 100500 }, { "epoch": 0.32768409429223816, "grad_norm": 1.1914827823638916, "learning_rate": 3.361689028740069e-05, "loss": 0.4923, "step": 100600 }, { "epoch": 0.3280098240082345, "grad_norm": 0.7036446928977966, "learning_rate": 3.3600603271084356e-05, "loss": 0.5234, "step": 100700 }, { "epoch": 0.32833555372423073, "grad_norm": 0.8239650726318359, "learning_rate": 3.358431625476803e-05, "loss": 0.4715, "step": 100800 }, { "epoch": 0.328661283440227, "grad_norm": 0.6158246397972107, "learning_rate": 3.3568029238451695e-05, "loss": 0.488, "step": 100900 }, { "epoch": 0.32898701315622325, "grad_norm": 0.708604633808136, "learning_rate": 3.355174222213536e-05, "loss": 0.4674, "step": 101000 }, { "epoch": 0.3293127428722195, "grad_norm": 0.5420898199081421, "learning_rate": 3.3535455205819026e-05, "loss": 0.4741, "step": 101100 }, { "epoch": 0.32963847258821577, "grad_norm": 0.49769943952560425, "learning_rate": 3.351916818950269e-05, "loss": 0.4638, "step": 101200 }, { "epoch": 0.32996420230421203, "grad_norm": 0.7099531888961792, "learning_rate": 3.3502881173186364e-05, "loss": 0.5236, "step": 101300 }, { "epoch": 0.3302899320202083, "grad_norm": 0.712815523147583, "learning_rate": 3.348659415687003e-05, "loss": 0.5268, "step": 101400 }, { "epoch": 0.33061566173620455, "grad_norm": 0.8762120008468628, "learning_rate": 3.3470307140553696e-05, "loss": 0.5045, "step": 101500 }, { "epoch": 0.3309413914522008, "grad_norm": 0.7411269545555115, "learning_rate": 3.345402012423736e-05, "loss": 0.5017, "step": 101600 }, { "epoch": 0.33126712116819707, "grad_norm": 0.7993664145469666, "learning_rate": 3.343773310792103e-05, "loss": 0.4866, "step": 101700 }, { "epoch": 0.3315928508841933, "grad_norm": 0.9997897148132324, "learning_rate": 3.34214460916047e-05, "loss": 0.5033, "step": 101800 }, { "epoch": 0.3319185806001896, "grad_norm": 0.3995771110057831, "learning_rate": 3.340515907528836e-05, "loss": 0.5037, "step": 101900 }, { "epoch": 0.33224431031618584, "grad_norm": 0.4990951418876648, "learning_rate": 3.338887205897203e-05, "loss": 0.5353, "step": 102000 } ], "logging_steps": 100, "max_steps": 307003, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.515680604094464e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }