diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,9 +1,9 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 0.8484265727202576, + "epoch": 1.010031634190783, "eval_steps": 500, - "global_step": 420000, + "global_step": 500000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, @@ -294007,6 +294007,56014 @@ "learning_rate": 7.597595192178702e-07, "loss": 7.1108, "step": 420000 + }, + { + "epoch": 0.8484467733529414, + "grad_norm": 172.4610137939453, + "learning_rate": 7.595745528326176e-07, + "loss": 30.5344, + "step": 420010 + }, + { + "epoch": 0.8484669739856252, + "grad_norm": 339.5697021484375, + "learning_rate": 7.593896071146828e-07, + "loss": 11.9902, + "step": 420020 + }, + { + "epoch": 0.848487174618309, + "grad_norm": 445.3851013183594, + "learning_rate": 7.592046820649706e-07, + "loss": 13.3798, + "step": 420030 + }, + { + "epoch": 0.8485073752509928, + "grad_norm": 216.46893310546875, + "learning_rate": 7.5901977768438e-07, + "loss": 17.0299, + "step": 420040 + }, + { + "epoch": 0.8485275758836767, + "grad_norm": 429.12762451171875, + "learning_rate": 7.588348939738116e-07, + "loss": 13.6675, + "step": 420050 + }, + { + "epoch": 0.8485477765163605, + "grad_norm": 9.526630401611328, + "learning_rate": 7.586500309341682e-07, + "loss": 14.2783, + "step": 420060 + }, + { + "epoch": 0.8485679771490443, + "grad_norm": 292.997314453125, + "learning_rate": 7.584651885663497e-07, + "loss": 9.1951, + "step": 420070 + }, + { + "epoch": 0.8485881777817281, + "grad_norm": 168.2415313720703, + "learning_rate": 7.582803668712579e-07, + "loss": 11.4824, + "step": 420080 + }, + { + "epoch": 0.848608378414412, + "grad_norm": 236.3516387939453, + "learning_rate": 7.580955658497924e-07, + "loss": 21.5602, + "step": 420090 + }, + { + "epoch": 0.8486285790470958, + "grad_norm": 239.54022216796875, + "learning_rate": 7.579107855028562e-07, + "loss": 21.8654, + "step": 420100 + }, + { + "epoch": 0.8486487796797796, + "grad_norm": 361.20947265625, + "learning_rate": 7.577260258313474e-07, + "loss": 25.1178, + "step": 420110 + }, + { + "epoch": 0.8486689803124634, + "grad_norm": 521.0519409179688, + "learning_rate": 7.57541286836167e-07, + "loss": 15.5121, + "step": 420120 + }, + { + "epoch": 0.8486891809451472, + "grad_norm": 309.9486389160156, + "learning_rate": 7.573565685182166e-07, + "loss": 16.8341, + "step": 420130 + }, + { + "epoch": 0.848709381577831, + "grad_norm": 99.5003433227539, + "learning_rate": 7.571718708783948e-07, + "loss": 11.3829, + "step": 420140 + }, + { + "epoch": 0.8487295822105149, + "grad_norm": 501.2671203613281, + "learning_rate": 7.569871939176037e-07, + "loss": 22.9475, + "step": 420150 + }, + { + "epoch": 0.8487497828431987, + "grad_norm": 145.5573272705078, + "learning_rate": 7.568025376367422e-07, + "loss": 12.3609, + "step": 420160 + }, + { + "epoch": 0.8487699834758825, + "grad_norm": 389.3744201660156, + "learning_rate": 7.566179020367098e-07, + "loss": 18.8499, + "step": 420170 + }, + { + "epoch": 0.8487901841085663, + "grad_norm": 376.95501708984375, + "learning_rate": 7.564332871184077e-07, + "loss": 23.0914, + "step": 420180 + }, + { + "epoch": 0.8488103847412501, + "grad_norm": 844.72705078125, + "learning_rate": 7.562486928827356e-07, + "loss": 25.2918, + "step": 420190 + }, + { + "epoch": 0.848830585373934, + "grad_norm": 556.1143188476562, + "learning_rate": 7.560641193305912e-07, + "loss": 30.2421, + "step": 420200 + }, + { + "epoch": 0.8488507860066177, + "grad_norm": 452.9190979003906, + "learning_rate": 7.55879566462876e-07, + "loss": 20.9866, + "step": 420210 + }, + { + "epoch": 0.8488709866393015, + "grad_norm": 344.0130920410156, + "learning_rate": 7.556950342804908e-07, + "loss": 12.773, + "step": 420220 + }, + { + "epoch": 0.8488911872719853, + "grad_norm": 559.789794921875, + "learning_rate": 7.555105227843312e-07, + "loss": 22.4848, + "step": 420230 + }, + { + "epoch": 0.8489113879046691, + "grad_norm": 236.39991760253906, + "learning_rate": 7.553260319752986e-07, + "loss": 11.7751, + "step": 420240 + }, + { + "epoch": 0.848931588537353, + "grad_norm": 178.82774353027344, + "learning_rate": 7.551415618542928e-07, + "loss": 10.9281, + "step": 420250 + }, + { + "epoch": 0.8489517891700368, + "grad_norm": 416.56451416015625, + "learning_rate": 7.549571124222127e-07, + "loss": 29.0814, + "step": 420260 + }, + { + "epoch": 0.8489719898027206, + "grad_norm": 125.81304931640625, + "learning_rate": 7.547726836799551e-07, + "loss": 13.1673, + "step": 420270 + }, + { + "epoch": 0.8489921904354044, + "grad_norm": 317.0643615722656, + "learning_rate": 7.545882756284212e-07, + "loss": 7.6171, + "step": 420280 + }, + { + "epoch": 0.8490123910680882, + "grad_norm": 523.3759765625, + "learning_rate": 7.544038882685112e-07, + "loss": 25.3222, + "step": 420290 + }, + { + "epoch": 0.849032591700772, + "grad_norm": 430.9664306640625, + "learning_rate": 7.542195216011188e-07, + "loss": 18.5014, + "step": 420300 + }, + { + "epoch": 0.8490527923334559, + "grad_norm": 462.48175048828125, + "learning_rate": 7.540351756271464e-07, + "loss": 18.2764, + "step": 420310 + }, + { + "epoch": 0.8490729929661397, + "grad_norm": 312.3878173828125, + "learning_rate": 7.538508503474923e-07, + "loss": 19.0678, + "step": 420320 + }, + { + "epoch": 0.8490931935988235, + "grad_norm": 158.47137451171875, + "learning_rate": 7.536665457630544e-07, + "loss": 21.0344, + "step": 420330 + }, + { + "epoch": 0.8491133942315073, + "grad_norm": 358.35675048828125, + "learning_rate": 7.534822618747289e-07, + "loss": 33.1354, + "step": 420340 + }, + { + "epoch": 0.8491335948641912, + "grad_norm": 539.3812255859375, + "learning_rate": 7.532979986834177e-07, + "loss": 29.5604, + "step": 420350 + }, + { + "epoch": 0.849153795496875, + "grad_norm": 234.95236206054688, + "learning_rate": 7.53113756190017e-07, + "loss": 10.4691, + "step": 420360 + }, + { + "epoch": 0.8491739961295588, + "grad_norm": 205.41302490234375, + "learning_rate": 7.529295343954229e-07, + "loss": 9.7857, + "step": 420370 + }, + { + "epoch": 0.8491941967622426, + "grad_norm": 466.1488037109375, + "learning_rate": 7.527453333005368e-07, + "loss": 16.7474, + "step": 420380 + }, + { + "epoch": 0.8492143973949264, + "grad_norm": 1036.1993408203125, + "learning_rate": 7.525611529062538e-07, + "loss": 24.6821, + "step": 420390 + }, + { + "epoch": 0.8492345980276103, + "grad_norm": 448.77362060546875, + "learning_rate": 7.523769932134739e-07, + "loss": 17.0026, + "step": 420400 + }, + { + "epoch": 0.8492547986602941, + "grad_norm": 88.42417907714844, + "learning_rate": 7.521928542230916e-07, + "loss": 21.489, + "step": 420410 + }, + { + "epoch": 0.8492749992929779, + "grad_norm": 304.2235412597656, + "learning_rate": 7.520087359360073e-07, + "loss": 6.9169, + "step": 420420 + }, + { + "epoch": 0.8492951999256617, + "grad_norm": 195.1634063720703, + "learning_rate": 7.51824638353118e-07, + "loss": 17.1502, + "step": 420430 + }, + { + "epoch": 0.8493154005583455, + "grad_norm": 17.28790855407715, + "learning_rate": 7.51640561475318e-07, + "loss": 16.0534, + "step": 420440 + }, + { + "epoch": 0.8493356011910294, + "grad_norm": 185.24879455566406, + "learning_rate": 7.514565053035083e-07, + "loss": 10.7844, + "step": 420450 + }, + { + "epoch": 0.8493558018237131, + "grad_norm": 467.2895202636719, + "learning_rate": 7.512724698385831e-07, + "loss": 12.9716, + "step": 420460 + }, + { + "epoch": 0.8493760024563969, + "grad_norm": 195.4284210205078, + "learning_rate": 7.510884550814418e-07, + "loss": 13.6663, + "step": 420470 + }, + { + "epoch": 0.8493962030890807, + "grad_norm": 298.0000915527344, + "learning_rate": 7.509044610329803e-07, + "loss": 29.8925, + "step": 420480 + }, + { + "epoch": 0.8494164037217645, + "grad_norm": 297.3729248046875, + "learning_rate": 7.507204876940938e-07, + "loss": 13.3852, + "step": 420490 + }, + { + "epoch": 0.8494366043544483, + "grad_norm": 187.8828887939453, + "learning_rate": 7.505365350656813e-07, + "loss": 11.998, + "step": 420500 + }, + { + "epoch": 0.8494568049871322, + "grad_norm": 617.66552734375, + "learning_rate": 7.50352603148638e-07, + "loss": 25.929, + "step": 420510 + }, + { + "epoch": 0.849477005619816, + "grad_norm": 349.9392395019531, + "learning_rate": 7.5016869194386e-07, + "loss": 14.9932, + "step": 420520 + }, + { + "epoch": 0.8494972062524998, + "grad_norm": 259.2439270019531, + "learning_rate": 7.499848014522443e-07, + "loss": 19.1879, + "step": 420530 + }, + { + "epoch": 0.8495174068851836, + "grad_norm": 257.3936767578125, + "learning_rate": 7.498009316746879e-07, + "loss": 30.295, + "step": 420540 + }, + { + "epoch": 0.8495376075178674, + "grad_norm": 373.472900390625, + "learning_rate": 7.496170826120869e-07, + "loss": 19.4068, + "step": 420550 + }, + { + "epoch": 0.8495578081505513, + "grad_norm": 835.5751953125, + "learning_rate": 7.494332542653349e-07, + "loss": 18.3841, + "step": 420560 + }, + { + "epoch": 0.8495780087832351, + "grad_norm": 344.0382080078125, + "learning_rate": 7.492494466353317e-07, + "loss": 25.2305, + "step": 420570 + }, + { + "epoch": 0.8495982094159189, + "grad_norm": 439.5791320800781, + "learning_rate": 7.490656597229707e-07, + "loss": 34.9259, + "step": 420580 + }, + { + "epoch": 0.8496184100486027, + "grad_norm": 294.4809875488281, + "learning_rate": 7.488818935291465e-07, + "loss": 26.3275, + "step": 420590 + }, + { + "epoch": 0.8496386106812865, + "grad_norm": 312.11370849609375, + "learning_rate": 7.486981480547567e-07, + "loss": 30.0317, + "step": 420600 + }, + { + "epoch": 0.8496588113139704, + "grad_norm": 177.1890106201172, + "learning_rate": 7.48514423300698e-07, + "loss": 21.0186, + "step": 420610 + }, + { + "epoch": 0.8496790119466542, + "grad_norm": 83.07833862304688, + "learning_rate": 7.48330719267864e-07, + "loss": 9.8696, + "step": 420620 + }, + { + "epoch": 0.849699212579338, + "grad_norm": 548.93310546875, + "learning_rate": 7.481470359571497e-07, + "loss": 14.6292, + "step": 420630 + }, + { + "epoch": 0.8497194132120218, + "grad_norm": 274.3866882324219, + "learning_rate": 7.479633733694519e-07, + "loss": 14.8856, + "step": 420640 + }, + { + "epoch": 0.8497396138447056, + "grad_norm": 734.2679443359375, + "learning_rate": 7.477797315056645e-07, + "loss": 22.1115, + "step": 420650 + }, + { + "epoch": 0.8497598144773895, + "grad_norm": 447.2192687988281, + "learning_rate": 7.475961103666824e-07, + "loss": 17.7805, + "step": 420660 + }, + { + "epoch": 0.8497800151100733, + "grad_norm": 216.69761657714844, + "learning_rate": 7.474125099534019e-07, + "loss": 18.5491, + "step": 420670 + }, + { + "epoch": 0.8498002157427571, + "grad_norm": 214.6820831298828, + "learning_rate": 7.472289302667163e-07, + "loss": 13.082, + "step": 420680 + }, + { + "epoch": 0.8498204163754409, + "grad_norm": 361.2976379394531, + "learning_rate": 7.470453713075215e-07, + "loss": 24.109, + "step": 420690 + }, + { + "epoch": 0.8498406170081247, + "grad_norm": 281.5863037109375, + "learning_rate": 7.468618330767114e-07, + "loss": 11.7758, + "step": 420700 + }, + { + "epoch": 0.8498608176408086, + "grad_norm": 71.94729614257812, + "learning_rate": 7.466783155751816e-07, + "loss": 14.2478, + "step": 420710 + }, + { + "epoch": 0.8498810182734923, + "grad_norm": 362.0234375, + "learning_rate": 7.464948188038262e-07, + "loss": 13.8452, + "step": 420720 + }, + { + "epoch": 0.8499012189061761, + "grad_norm": 562.4525146484375, + "learning_rate": 7.463113427635376e-07, + "loss": 23.803, + "step": 420730 + }, + { + "epoch": 0.8499214195388599, + "grad_norm": 171.87557983398438, + "learning_rate": 7.461278874552131e-07, + "loss": 7.2911, + "step": 420740 + }, + { + "epoch": 0.8499416201715437, + "grad_norm": 215.71307373046875, + "learning_rate": 7.459444528797438e-07, + "loss": 9.4946, + "step": 420750 + }, + { + "epoch": 0.8499618208042276, + "grad_norm": 240.4515838623047, + "learning_rate": 7.457610390380265e-07, + "loss": 15.6298, + "step": 420760 + }, + { + "epoch": 0.8499820214369114, + "grad_norm": 410.1695251464844, + "learning_rate": 7.455776459309538e-07, + "loss": 15.6956, + "step": 420770 + }, + { + "epoch": 0.8500022220695952, + "grad_norm": 235.95631408691406, + "learning_rate": 7.453942735594189e-07, + "loss": 31.0048, + "step": 420780 + }, + { + "epoch": 0.850022422702279, + "grad_norm": 133.65708923339844, + "learning_rate": 7.452109219243175e-07, + "loss": 14.8682, + "step": 420790 + }, + { + "epoch": 0.8500426233349628, + "grad_norm": 217.16836547851562, + "learning_rate": 7.450275910265415e-07, + "loss": 29.9921, + "step": 420800 + }, + { + "epoch": 0.8500628239676467, + "grad_norm": 527.6741943359375, + "learning_rate": 7.448442808669842e-07, + "loss": 15.8221, + "step": 420810 + }, + { + "epoch": 0.8500830246003305, + "grad_norm": 312.0057373046875, + "learning_rate": 7.446609914465397e-07, + "loss": 18.7422, + "step": 420820 + }, + { + "epoch": 0.8501032252330143, + "grad_norm": 212.4395751953125, + "learning_rate": 7.444777227661037e-07, + "loss": 19.5951, + "step": 420830 + }, + { + "epoch": 0.8501234258656981, + "grad_norm": 257.22882080078125, + "learning_rate": 7.442944748265651e-07, + "loss": 16.0408, + "step": 420840 + }, + { + "epoch": 0.8501436264983819, + "grad_norm": 224.76571655273438, + "learning_rate": 7.441112476288187e-07, + "loss": 22.7621, + "step": 420850 + }, + { + "epoch": 0.8501638271310658, + "grad_norm": 111.08911895751953, + "learning_rate": 7.439280411737592e-07, + "loss": 11.3516, + "step": 420860 + }, + { + "epoch": 0.8501840277637496, + "grad_norm": 237.11033630371094, + "learning_rate": 7.437448554622783e-07, + "loss": 6.7287, + "step": 420870 + }, + { + "epoch": 0.8502042283964334, + "grad_norm": 395.3372497558594, + "learning_rate": 7.435616904952675e-07, + "loss": 19.3152, + "step": 420880 + }, + { + "epoch": 0.8502244290291172, + "grad_norm": 480.9623107910156, + "learning_rate": 7.433785462736209e-07, + "loss": 20.8863, + "step": 420890 + }, + { + "epoch": 0.850244629661801, + "grad_norm": 382.73583984375, + "learning_rate": 7.43195422798233e-07, + "loss": 19.1049, + "step": 420900 + }, + { + "epoch": 0.8502648302944849, + "grad_norm": 1869.8851318359375, + "learning_rate": 7.430123200699924e-07, + "loss": 32.079, + "step": 420910 + }, + { + "epoch": 0.8502850309271687, + "grad_norm": 712.2645874023438, + "learning_rate": 7.428292380897933e-07, + "loss": 18.4824, + "step": 420920 + }, + { + "epoch": 0.8503052315598525, + "grad_norm": 398.8208312988281, + "learning_rate": 7.426461768585291e-07, + "loss": 23.8987, + "step": 420930 + }, + { + "epoch": 0.8503254321925363, + "grad_norm": 108.1100082397461, + "learning_rate": 7.424631363770912e-07, + "loss": 15.4419, + "step": 420940 + }, + { + "epoch": 0.8503456328252201, + "grad_norm": 335.90740966796875, + "learning_rate": 7.422801166463706e-07, + "loss": 20.4621, + "step": 420950 + }, + { + "epoch": 0.850365833457904, + "grad_norm": 288.9495544433594, + "learning_rate": 7.420971176672614e-07, + "loss": 17.5241, + "step": 420960 + }, + { + "epoch": 0.8503860340905878, + "grad_norm": 348.71038818359375, + "learning_rate": 7.419141394406543e-07, + "loss": 18.832, + "step": 420970 + }, + { + "epoch": 0.8504062347232715, + "grad_norm": 540.66650390625, + "learning_rate": 7.4173118196744e-07, + "loss": 33.4038, + "step": 420980 + }, + { + "epoch": 0.8504264353559553, + "grad_norm": 284.1331787109375, + "learning_rate": 7.415482452485129e-07, + "loss": 21.0569, + "step": 420990 + }, + { + "epoch": 0.8504466359886391, + "grad_norm": 199.09298706054688, + "learning_rate": 7.413653292847617e-07, + "loss": 22.0451, + "step": 421000 + }, + { + "epoch": 0.8504668366213229, + "grad_norm": 437.1142883300781, + "learning_rate": 7.411824340770813e-07, + "loss": 21.0575, + "step": 421010 + }, + { + "epoch": 0.8504870372540068, + "grad_norm": 7.600314617156982, + "learning_rate": 7.409995596263591e-07, + "loss": 10.5348, + "step": 421020 + }, + { + "epoch": 0.8505072378866906, + "grad_norm": 411.22314453125, + "learning_rate": 7.408167059334897e-07, + "loss": 35.8578, + "step": 421030 + }, + { + "epoch": 0.8505274385193744, + "grad_norm": 206.26797485351562, + "learning_rate": 7.40633872999364e-07, + "loss": 13.219, + "step": 421040 + }, + { + "epoch": 0.8505476391520582, + "grad_norm": 343.618896484375, + "learning_rate": 7.4045106082487e-07, + "loss": 27.9574, + "step": 421050 + }, + { + "epoch": 0.850567839784742, + "grad_norm": 161.28146362304688, + "learning_rate": 7.402682694109026e-07, + "loss": 18.7444, + "step": 421060 + }, + { + "epoch": 0.8505880404174259, + "grad_norm": 341.72161865234375, + "learning_rate": 7.4008549875835e-07, + "loss": 16.2181, + "step": 421070 + }, + { + "epoch": 0.8506082410501097, + "grad_norm": 453.3415222167969, + "learning_rate": 7.399027488681049e-07, + "loss": 28.2804, + "step": 421080 + }, + { + "epoch": 0.8506284416827935, + "grad_norm": 495.8848876953125, + "learning_rate": 7.39720019741057e-07, + "loss": 30.7922, + "step": 421090 + }, + { + "epoch": 0.8506486423154773, + "grad_norm": 277.91143798828125, + "learning_rate": 7.395373113780962e-07, + "loss": 18.172, + "step": 421100 + }, + { + "epoch": 0.8506688429481611, + "grad_norm": 302.0050964355469, + "learning_rate": 7.393546237801147e-07, + "loss": 20.2798, + "step": 421110 + }, + { + "epoch": 0.850689043580845, + "grad_norm": 3.2582924365997314, + "learning_rate": 7.391719569480021e-07, + "loss": 13.3446, + "step": 421120 + }, + { + "epoch": 0.8507092442135288, + "grad_norm": 554.4869384765625, + "learning_rate": 7.389893108826473e-07, + "loss": 26.9601, + "step": 421130 + }, + { + "epoch": 0.8507294448462126, + "grad_norm": 320.6636047363281, + "learning_rate": 7.388066855849418e-07, + "loss": 14.6543, + "step": 421140 + }, + { + "epoch": 0.8507496454788964, + "grad_norm": 301.8233642578125, + "learning_rate": 7.386240810557771e-07, + "loss": 25.684, + "step": 421150 + }, + { + "epoch": 0.8507698461115802, + "grad_norm": 1380.050048828125, + "learning_rate": 7.384414972960419e-07, + "loss": 27.1782, + "step": 421160 + }, + { + "epoch": 0.8507900467442641, + "grad_norm": 360.6093444824219, + "learning_rate": 7.382589343066243e-07, + "loss": 14.7563, + "step": 421170 + }, + { + "epoch": 0.8508102473769479, + "grad_norm": 667.0159912109375, + "learning_rate": 7.380763920884171e-07, + "loss": 20.4568, + "step": 421180 + }, + { + "epoch": 0.8508304480096317, + "grad_norm": 194.67372131347656, + "learning_rate": 7.378938706423089e-07, + "loss": 13.4816, + "step": 421190 + }, + { + "epoch": 0.8508506486423155, + "grad_norm": 118.02854919433594, + "learning_rate": 7.377113699691879e-07, + "loss": 15.1407, + "step": 421200 + }, + { + "epoch": 0.8508708492749993, + "grad_norm": 446.3256530761719, + "learning_rate": 7.375288900699445e-07, + "loss": 10.0944, + "step": 421210 + }, + { + "epoch": 0.8508910499076832, + "grad_norm": 54.94810485839844, + "learning_rate": 7.373464309454698e-07, + "loss": 22.6813, + "step": 421220 + }, + { + "epoch": 0.8509112505403669, + "grad_norm": 132.21023559570312, + "learning_rate": 7.371639925966512e-07, + "loss": 11.2265, + "step": 421230 + }, + { + "epoch": 0.8509314511730507, + "grad_norm": 949.7929077148438, + "learning_rate": 7.369815750243769e-07, + "loss": 17.2051, + "step": 421240 + }, + { + "epoch": 0.8509516518057345, + "grad_norm": 444.0530090332031, + "learning_rate": 7.367991782295392e-07, + "loss": 23.2181, + "step": 421250 + }, + { + "epoch": 0.8509718524384183, + "grad_norm": 639.97900390625, + "learning_rate": 7.366168022130249e-07, + "loss": 24.6395, + "step": 421260 + }, + { + "epoch": 0.8509920530711022, + "grad_norm": 575.516845703125, + "learning_rate": 7.364344469757223e-07, + "loss": 21.0052, + "step": 421270 + }, + { + "epoch": 0.851012253703786, + "grad_norm": 327.63720703125, + "learning_rate": 7.362521125185218e-07, + "loss": 17.7251, + "step": 421280 + }, + { + "epoch": 0.8510324543364698, + "grad_norm": 325.7966003417969, + "learning_rate": 7.360697988423105e-07, + "loss": 13.0203, + "step": 421290 + }, + { + "epoch": 0.8510526549691536, + "grad_norm": 282.32525634765625, + "learning_rate": 7.358875059479792e-07, + "loss": 14.0752, + "step": 421300 + }, + { + "epoch": 0.8510728556018374, + "grad_norm": 528.5604858398438, + "learning_rate": 7.357052338364134e-07, + "loss": 14.7047, + "step": 421310 + }, + { + "epoch": 0.8510930562345213, + "grad_norm": 2.1461079120635986, + "learning_rate": 7.355229825085047e-07, + "loss": 9.7225, + "step": 421320 + }, + { + "epoch": 0.8511132568672051, + "grad_norm": 432.22021484375, + "learning_rate": 7.353407519651395e-07, + "loss": 27.3364, + "step": 421330 + }, + { + "epoch": 0.8511334574998889, + "grad_norm": 551.1565551757812, + "learning_rate": 7.351585422072049e-07, + "loss": 15.5955, + "step": 421340 + }, + { + "epoch": 0.8511536581325727, + "grad_norm": 288.9549560546875, + "learning_rate": 7.349763532355919e-07, + "loss": 14.2593, + "step": 421350 + }, + { + "epoch": 0.8511738587652565, + "grad_norm": 47.11952209472656, + "learning_rate": 7.347941850511853e-07, + "loss": 20.5868, + "step": 421360 + }, + { + "epoch": 0.8511940593979404, + "grad_norm": 447.0443420410156, + "learning_rate": 7.34612037654876e-07, + "loss": 11.4741, + "step": 421370 + }, + { + "epoch": 0.8512142600306242, + "grad_norm": 382.4913024902344, + "learning_rate": 7.344299110475506e-07, + "loss": 21.1025, + "step": 421380 + }, + { + "epoch": 0.851234460663308, + "grad_norm": 304.9476318359375, + "learning_rate": 7.342478052300945e-07, + "loss": 21.573, + "step": 421390 + }, + { + "epoch": 0.8512546612959918, + "grad_norm": 327.41204833984375, + "learning_rate": 7.34065720203399e-07, + "loss": 16.9658, + "step": 421400 + }, + { + "epoch": 0.8512748619286756, + "grad_norm": 293.70294189453125, + "learning_rate": 7.338836559683493e-07, + "loss": 15.7225, + "step": 421410 + }, + { + "epoch": 0.8512950625613595, + "grad_norm": 349.07061767578125, + "learning_rate": 7.337016125258323e-07, + "loss": 17.4128, + "step": 421420 + }, + { + "epoch": 0.8513152631940433, + "grad_norm": 261.398193359375, + "learning_rate": 7.335195898767367e-07, + "loss": 25.8577, + "step": 421430 + }, + { + "epoch": 0.8513354638267271, + "grad_norm": 351.88543701171875, + "learning_rate": 7.333375880219507e-07, + "loss": 52.9905, + "step": 421440 + }, + { + "epoch": 0.8513556644594109, + "grad_norm": 0.34273359179496765, + "learning_rate": 7.33155606962358e-07, + "loss": 26.1271, + "step": 421450 + }, + { + "epoch": 0.8513758650920947, + "grad_norm": 578.9764404296875, + "learning_rate": 7.329736466988469e-07, + "loss": 11.8796, + "step": 421460 + }, + { + "epoch": 0.8513960657247786, + "grad_norm": 174.4795684814453, + "learning_rate": 7.327917072323065e-07, + "loss": 31.6347, + "step": 421470 + }, + { + "epoch": 0.8514162663574624, + "grad_norm": 111.70514678955078, + "learning_rate": 7.326097885636214e-07, + "loss": 11.7568, + "step": 421480 + }, + { + "epoch": 0.8514364669901461, + "grad_norm": 278.8871154785156, + "learning_rate": 7.324278906936771e-07, + "loss": 24.3871, + "step": 421490 + }, + { + "epoch": 0.8514566676228299, + "grad_norm": 619.1923828125, + "learning_rate": 7.322460136233622e-07, + "loss": 29.3718, + "step": 421500 + }, + { + "epoch": 0.8514768682555137, + "grad_norm": 228.79319763183594, + "learning_rate": 7.320641573535647e-07, + "loss": 19.2691, + "step": 421510 + }, + { + "epoch": 0.8514970688881975, + "grad_norm": 413.57049560546875, + "learning_rate": 7.318823218851668e-07, + "loss": 32.7849, + "step": 421520 + }, + { + "epoch": 0.8515172695208814, + "grad_norm": 396.7378845214844, + "learning_rate": 7.31700507219057e-07, + "loss": 20.0575, + "step": 421530 + }, + { + "epoch": 0.8515374701535652, + "grad_norm": 375.3517761230469, + "learning_rate": 7.315187133561219e-07, + "loss": 19.0317, + "step": 421540 + }, + { + "epoch": 0.851557670786249, + "grad_norm": 37.742340087890625, + "learning_rate": 7.31336940297247e-07, + "loss": 20.6924, + "step": 421550 + }, + { + "epoch": 0.8515778714189328, + "grad_norm": 427.2479248046875, + "learning_rate": 7.311551880433171e-07, + "loss": 13.0093, + "step": 421560 + }, + { + "epoch": 0.8515980720516166, + "grad_norm": 529.8531494140625, + "learning_rate": 7.309734565952198e-07, + "loss": 16.0609, + "step": 421570 + }, + { + "epoch": 0.8516182726843005, + "grad_norm": 294.2206726074219, + "learning_rate": 7.307917459538405e-07, + "loss": 22.9379, + "step": 421580 + }, + { + "epoch": 0.8516384733169843, + "grad_norm": 397.2265625, + "learning_rate": 7.30610056120063e-07, + "loss": 24.8546, + "step": 421590 + }, + { + "epoch": 0.8516586739496681, + "grad_norm": 20.17185401916504, + "learning_rate": 7.304283870947748e-07, + "loss": 17.733, + "step": 421600 + }, + { + "epoch": 0.8516788745823519, + "grad_norm": 473.6755065917969, + "learning_rate": 7.302467388788614e-07, + "loss": 23.9961, + "step": 421610 + }, + { + "epoch": 0.8516990752150357, + "grad_norm": 362.61431884765625, + "learning_rate": 7.300651114732077e-07, + "loss": 20.499, + "step": 421620 + }, + { + "epoch": 0.8517192758477196, + "grad_norm": 176.23777770996094, + "learning_rate": 7.298835048786979e-07, + "loss": 15.5241, + "step": 421630 + }, + { + "epoch": 0.8517394764804034, + "grad_norm": 443.8497009277344, + "learning_rate": 7.29701919096219e-07, + "loss": 17.5838, + "step": 421640 + }, + { + "epoch": 0.8517596771130872, + "grad_norm": 549.1595458984375, + "learning_rate": 7.295203541266549e-07, + "loss": 21.9836, + "step": 421650 + }, + { + "epoch": 0.851779877745771, + "grad_norm": 238.44998168945312, + "learning_rate": 7.293388099708892e-07, + "loss": 17.0428, + "step": 421660 + }, + { + "epoch": 0.8518000783784548, + "grad_norm": 224.4907684326172, + "learning_rate": 7.291572866298102e-07, + "loss": 20.522, + "step": 421670 + }, + { + "epoch": 0.8518202790111387, + "grad_norm": 39.98009490966797, + "learning_rate": 7.289757841042988e-07, + "loss": 22.7961, + "step": 421680 + }, + { + "epoch": 0.8518404796438225, + "grad_norm": 395.55438232421875, + "learning_rate": 7.287943023952426e-07, + "loss": 14.4075, + "step": 421690 + }, + { + "epoch": 0.8518606802765063, + "grad_norm": 462.35174560546875, + "learning_rate": 7.286128415035249e-07, + "loss": 20.4092, + "step": 421700 + }, + { + "epoch": 0.8518808809091901, + "grad_norm": 342.838623046875, + "learning_rate": 7.284314014300292e-07, + "loss": 14.6635, + "step": 421710 + }, + { + "epoch": 0.8519010815418739, + "grad_norm": 402.89068603515625, + "learning_rate": 7.282499821756417e-07, + "loss": 14.4933, + "step": 421720 + }, + { + "epoch": 0.8519212821745578, + "grad_norm": 195.26206970214844, + "learning_rate": 7.28068583741246e-07, + "loss": 25.5276, + "step": 421730 + }, + { + "epoch": 0.8519414828072415, + "grad_norm": 632.72265625, + "learning_rate": 7.278872061277248e-07, + "loss": 21.9685, + "step": 421740 + }, + { + "epoch": 0.8519616834399253, + "grad_norm": 480.2913818359375, + "learning_rate": 7.277058493359629e-07, + "loss": 26.8477, + "step": 421750 + }, + { + "epoch": 0.8519818840726091, + "grad_norm": 457.3885192871094, + "learning_rate": 7.275245133668457e-07, + "loss": 21.4772, + "step": 421760 + }, + { + "epoch": 0.8520020847052929, + "grad_norm": 304.02252197265625, + "learning_rate": 7.273431982212559e-07, + "loss": 14.3607, + "step": 421770 + }, + { + "epoch": 0.8520222853379767, + "grad_norm": 395.3974304199219, + "learning_rate": 7.27161903900076e-07, + "loss": 14.1178, + "step": 421780 + }, + { + "epoch": 0.8520424859706606, + "grad_norm": 679.8463134765625, + "learning_rate": 7.269806304041915e-07, + "loss": 18.9653, + "step": 421790 + }, + { + "epoch": 0.8520626866033444, + "grad_norm": 689.3289184570312, + "learning_rate": 7.267993777344856e-07, + "loss": 24.3499, + "step": 421800 + }, + { + "epoch": 0.8520828872360282, + "grad_norm": 340.630126953125, + "learning_rate": 7.266181458918403e-07, + "loss": 33.4605, + "step": 421810 + }, + { + "epoch": 0.852103087868712, + "grad_norm": 313.085205078125, + "learning_rate": 7.264369348771394e-07, + "loss": 17.1183, + "step": 421820 + }, + { + "epoch": 0.8521232885013958, + "grad_norm": 342.37451171875, + "learning_rate": 7.262557446912693e-07, + "loss": 15.2027, + "step": 421830 + }, + { + "epoch": 0.8521434891340797, + "grad_norm": 316.3703918457031, + "learning_rate": 7.260745753351078e-07, + "loss": 14.5585, + "step": 421840 + }, + { + "epoch": 0.8521636897667635, + "grad_norm": 439.4654235839844, + "learning_rate": 7.258934268095402e-07, + "loss": 14.8989, + "step": 421850 + }, + { + "epoch": 0.8521838903994473, + "grad_norm": 593.458740234375, + "learning_rate": 7.257122991154514e-07, + "loss": 26.9237, + "step": 421860 + }, + { + "epoch": 0.8522040910321311, + "grad_norm": 509.9592590332031, + "learning_rate": 7.255311922537217e-07, + "loss": 21.6094, + "step": 421870 + }, + { + "epoch": 0.852224291664815, + "grad_norm": 44.265228271484375, + "learning_rate": 7.253501062252338e-07, + "loss": 17.5555, + "step": 421880 + }, + { + "epoch": 0.8522444922974988, + "grad_norm": 115.82080078125, + "learning_rate": 7.251690410308726e-07, + "loss": 21.4566, + "step": 421890 + }, + { + "epoch": 0.8522646929301826, + "grad_norm": 234.79441833496094, + "learning_rate": 7.249879966715174e-07, + "loss": 13.3536, + "step": 421900 + }, + { + "epoch": 0.8522848935628664, + "grad_norm": 245.02662658691406, + "learning_rate": 7.248069731480533e-07, + "loss": 11.3074, + "step": 421910 + }, + { + "epoch": 0.8523050941955502, + "grad_norm": 447.1869812011719, + "learning_rate": 7.246259704613606e-07, + "loss": 19.349, + "step": 421920 + }, + { + "epoch": 0.852325294828234, + "grad_norm": 258.03607177734375, + "learning_rate": 7.244449886123233e-07, + "loss": 20.7277, + "step": 421930 + }, + { + "epoch": 0.8523454954609179, + "grad_norm": 416.8277282714844, + "learning_rate": 7.242640276018226e-07, + "loss": 15.8615, + "step": 421940 + }, + { + "epoch": 0.8523656960936017, + "grad_norm": 9.882548332214355, + "learning_rate": 7.240830874307392e-07, + "loss": 21.3556, + "step": 421950 + }, + { + "epoch": 0.8523858967262855, + "grad_norm": 509.8556213378906, + "learning_rate": 7.239021680999575e-07, + "loss": 18.4977, + "step": 421960 + }, + { + "epoch": 0.8524060973589693, + "grad_norm": 307.5932922363281, + "learning_rate": 7.237212696103568e-07, + "loss": 19.4454, + "step": 421970 + }, + { + "epoch": 0.8524262979916531, + "grad_norm": 57.08131408691406, + "learning_rate": 7.235403919628214e-07, + "loss": 9.8599, + "step": 421980 + }, + { + "epoch": 0.852446498624337, + "grad_norm": 234.61434936523438, + "learning_rate": 7.233595351582313e-07, + "loss": 37.4964, + "step": 421990 + }, + { + "epoch": 0.8524666992570207, + "grad_norm": 192.13845825195312, + "learning_rate": 7.23178699197467e-07, + "loss": 19.1351, + "step": 422000 + }, + { + "epoch": 0.8524868998897045, + "grad_norm": 12.566235542297363, + "learning_rate": 7.229978840814122e-07, + "loss": 23.8078, + "step": 422010 + }, + { + "epoch": 0.8525071005223883, + "grad_norm": 335.6422119140625, + "learning_rate": 7.228170898109465e-07, + "loss": 11.2894, + "step": 422020 + }, + { + "epoch": 0.8525273011550721, + "grad_norm": 59.3913688659668, + "learning_rate": 7.22636316386951e-07, + "loss": 17.7019, + "step": 422030 + }, + { + "epoch": 0.852547501787756, + "grad_norm": 292.2152099609375, + "learning_rate": 7.22455563810307e-07, + "loss": 18.1457, + "step": 422040 + }, + { + "epoch": 0.8525677024204398, + "grad_norm": 203.8167266845703, + "learning_rate": 7.222748320818984e-07, + "loss": 13.9486, + "step": 422050 + }, + { + "epoch": 0.8525879030531236, + "grad_norm": 192.4440155029297, + "learning_rate": 7.220941212026005e-07, + "loss": 12.7467, + "step": 422060 + }, + { + "epoch": 0.8526081036858074, + "grad_norm": 151.43333435058594, + "learning_rate": 7.219134311732978e-07, + "loss": 17.172, + "step": 422070 + }, + { + "epoch": 0.8526283043184912, + "grad_norm": 274.3697509765625, + "learning_rate": 7.217327619948705e-07, + "loss": 10.654, + "step": 422080 + }, + { + "epoch": 0.8526485049511751, + "grad_norm": 333.4923400878906, + "learning_rate": 7.215521136681997e-07, + "loss": 16.1211, + "step": 422090 + }, + { + "epoch": 0.8526687055838589, + "grad_norm": 509.5408020019531, + "learning_rate": 7.213714861941628e-07, + "loss": 21.663, + "step": 422100 + }, + { + "epoch": 0.8526889062165427, + "grad_norm": 252.9277801513672, + "learning_rate": 7.211908795736433e-07, + "loss": 15.0233, + "step": 422110 + }, + { + "epoch": 0.8527091068492265, + "grad_norm": 328.42047119140625, + "learning_rate": 7.210102938075225e-07, + "loss": 14.1027, + "step": 422120 + }, + { + "epoch": 0.8527293074819103, + "grad_norm": 867.2739868164062, + "learning_rate": 7.20829728896676e-07, + "loss": 30.1601, + "step": 422130 + }, + { + "epoch": 0.8527495081145942, + "grad_norm": 230.66310119628906, + "learning_rate": 7.206491848419867e-07, + "loss": 14.3198, + "step": 422140 + }, + { + "epoch": 0.852769708747278, + "grad_norm": 273.2495422363281, + "learning_rate": 7.204686616443352e-07, + "loss": 26.9474, + "step": 422150 + }, + { + "epoch": 0.8527899093799618, + "grad_norm": 440.524169921875, + "learning_rate": 7.202881593046002e-07, + "loss": 19.9583, + "step": 422160 + }, + { + "epoch": 0.8528101100126456, + "grad_norm": 270.63421630859375, + "learning_rate": 7.20107677823661e-07, + "loss": 26.173, + "step": 422170 + }, + { + "epoch": 0.8528303106453294, + "grad_norm": 816.7549438476562, + "learning_rate": 7.199272172023986e-07, + "loss": 30.6517, + "step": 422180 + }, + { + "epoch": 0.8528505112780133, + "grad_norm": 11.268199920654297, + "learning_rate": 7.197467774416921e-07, + "loss": 17.9889, + "step": 422190 + }, + { + "epoch": 0.8528707119106971, + "grad_norm": 447.5383605957031, + "learning_rate": 7.195663585424195e-07, + "loss": 13.2262, + "step": 422200 + }, + { + "epoch": 0.8528909125433809, + "grad_norm": 179.54664611816406, + "learning_rate": 7.193859605054615e-07, + "loss": 16.2462, + "step": 422210 + }, + { + "epoch": 0.8529111131760647, + "grad_norm": 292.2140197753906, + "learning_rate": 7.19205583331698e-07, + "loss": 10.8472, + "step": 422220 + }, + { + "epoch": 0.8529313138087485, + "grad_norm": 346.0629577636719, + "learning_rate": 7.190252270220071e-07, + "loss": 10.9012, + "step": 422230 + }, + { + "epoch": 0.8529515144414324, + "grad_norm": 255.02244567871094, + "learning_rate": 7.188448915772673e-07, + "loss": 11.8099, + "step": 422240 + }, + { + "epoch": 0.8529717150741161, + "grad_norm": 21.119182586669922, + "learning_rate": 7.186645769983591e-07, + "loss": 15.2602, + "step": 422250 + }, + { + "epoch": 0.8529919157067999, + "grad_norm": 140.7800750732422, + "learning_rate": 7.18484283286161e-07, + "loss": 34.9589, + "step": 422260 + }, + { + "epoch": 0.8530121163394837, + "grad_norm": 472.4410400390625, + "learning_rate": 7.183040104415495e-07, + "loss": 13.2531, + "step": 422270 + }, + { + "epoch": 0.8530323169721675, + "grad_norm": 12.594407081604004, + "learning_rate": 7.181237584654066e-07, + "loss": 10.1258, + "step": 422280 + }, + { + "epoch": 0.8530525176048513, + "grad_norm": 415.6765441894531, + "learning_rate": 7.179435273586078e-07, + "loss": 15.773, + "step": 422290 + }, + { + "epoch": 0.8530727182375352, + "grad_norm": 65.1563720703125, + "learning_rate": 7.177633171220339e-07, + "loss": 18.0508, + "step": 422300 + }, + { + "epoch": 0.853092918870219, + "grad_norm": 0.0, + "learning_rate": 7.17583127756562e-07, + "loss": 36.8546, + "step": 422310 + }, + { + "epoch": 0.8531131195029028, + "grad_norm": 316.9919128417969, + "learning_rate": 7.1740295926307e-07, + "loss": 17.3249, + "step": 422320 + }, + { + "epoch": 0.8531333201355866, + "grad_norm": 129.97496032714844, + "learning_rate": 7.172228116424374e-07, + "loss": 25.0414, + "step": 422330 + }, + { + "epoch": 0.8531535207682704, + "grad_norm": 284.4596252441406, + "learning_rate": 7.170426848955408e-07, + "loss": 16.1025, + "step": 422340 + }, + { + "epoch": 0.8531737214009543, + "grad_norm": 640.4921264648438, + "learning_rate": 7.168625790232586e-07, + "loss": 18.8267, + "step": 422350 + }, + { + "epoch": 0.8531939220336381, + "grad_norm": 1549.4013671875, + "learning_rate": 7.166824940264683e-07, + "loss": 28.6623, + "step": 422360 + }, + { + "epoch": 0.8532141226663219, + "grad_norm": 363.51800537109375, + "learning_rate": 7.165024299060486e-07, + "loss": 11.5337, + "step": 422370 + }, + { + "epoch": 0.8532343232990057, + "grad_norm": 534.2193603515625, + "learning_rate": 7.163223866628771e-07, + "loss": 14.7847, + "step": 422380 + }, + { + "epoch": 0.8532545239316895, + "grad_norm": 626.86865234375, + "learning_rate": 7.161423642978299e-07, + "loss": 22.6856, + "step": 422390 + }, + { + "epoch": 0.8532747245643734, + "grad_norm": 187.61221313476562, + "learning_rate": 7.159623628117856e-07, + "loss": 13.3871, + "step": 422400 + }, + { + "epoch": 0.8532949251970572, + "grad_norm": 282.8906555175781, + "learning_rate": 7.157823822056214e-07, + "loss": 12.2203, + "step": 422410 + }, + { + "epoch": 0.853315125829741, + "grad_norm": 233.69619750976562, + "learning_rate": 7.156024224802139e-07, + "loss": 14.9793, + "step": 422420 + }, + { + "epoch": 0.8533353264624248, + "grad_norm": 729.30517578125, + "learning_rate": 7.154224836364398e-07, + "loss": 23.6431, + "step": 422430 + }, + { + "epoch": 0.8533555270951086, + "grad_norm": 443.14154052734375, + "learning_rate": 7.152425656751794e-07, + "loss": 13.5563, + "step": 422440 + }, + { + "epoch": 0.8533757277277925, + "grad_norm": 488.0747985839844, + "learning_rate": 7.150626685973045e-07, + "loss": 16.0828, + "step": 422450 + }, + { + "epoch": 0.8533959283604763, + "grad_norm": 164.69842529296875, + "learning_rate": 7.148827924036944e-07, + "loss": 18.4377, + "step": 422460 + }, + { + "epoch": 0.8534161289931601, + "grad_norm": 345.8693542480469, + "learning_rate": 7.147029370952274e-07, + "loss": 24.7451, + "step": 422470 + }, + { + "epoch": 0.8534363296258439, + "grad_norm": 172.44485473632812, + "learning_rate": 7.145231026727783e-07, + "loss": 19.3036, + "step": 422480 + }, + { + "epoch": 0.8534565302585277, + "grad_norm": 492.9544372558594, + "learning_rate": 7.143432891372226e-07, + "loss": 16.763, + "step": 422490 + }, + { + "epoch": 0.8534767308912116, + "grad_norm": 627.2689208984375, + "learning_rate": 7.141634964894389e-07, + "loss": 18.8558, + "step": 422500 + }, + { + "epoch": 0.8534969315238953, + "grad_norm": 461.1217041015625, + "learning_rate": 7.139837247303027e-07, + "loss": 14.6763, + "step": 422510 + }, + { + "epoch": 0.8535171321565791, + "grad_norm": 225.9117889404297, + "learning_rate": 7.138039738606894e-07, + "loss": 33.9146, + "step": 422520 + }, + { + "epoch": 0.8535373327892629, + "grad_norm": 202.75625610351562, + "learning_rate": 7.13624243881475e-07, + "loss": 22.2583, + "step": 422530 + }, + { + "epoch": 0.8535575334219467, + "grad_norm": 566.3533935546875, + "learning_rate": 7.134445347935376e-07, + "loss": 16.8513, + "step": 422540 + }, + { + "epoch": 0.8535777340546306, + "grad_norm": 628.0380249023438, + "learning_rate": 7.132648465977515e-07, + "loss": 13.4277, + "step": 422550 + }, + { + "epoch": 0.8535979346873144, + "grad_norm": 36.08042526245117, + "learning_rate": 7.130851792949916e-07, + "loss": 12.6178, + "step": 422560 + }, + { + "epoch": 0.8536181353199982, + "grad_norm": 211.43484497070312, + "learning_rate": 7.129055328861356e-07, + "loss": 16.2987, + "step": 422570 + }, + { + "epoch": 0.853638335952682, + "grad_norm": 52.93839645385742, + "learning_rate": 7.127259073720571e-07, + "loss": 16.9325, + "step": 422580 + }, + { + "epoch": 0.8536585365853658, + "grad_norm": 228.2338409423828, + "learning_rate": 7.125463027536334e-07, + "loss": 24.4949, + "step": 422590 + }, + { + "epoch": 0.8536787372180497, + "grad_norm": 248.85826110839844, + "learning_rate": 7.123667190317396e-07, + "loss": 19.4014, + "step": 422600 + }, + { + "epoch": 0.8536989378507335, + "grad_norm": 218.89491271972656, + "learning_rate": 7.121871562072486e-07, + "loss": 28.9889, + "step": 422610 + }, + { + "epoch": 0.8537191384834173, + "grad_norm": 258.9183654785156, + "learning_rate": 7.12007614281039e-07, + "loss": 23.8125, + "step": 422620 + }, + { + "epoch": 0.8537393391161011, + "grad_norm": 481.2682800292969, + "learning_rate": 7.11828093253984e-07, + "loss": 16.0911, + "step": 422630 + }, + { + "epoch": 0.8537595397487849, + "grad_norm": 599.0542602539062, + "learning_rate": 7.116485931269573e-07, + "loss": 16.99, + "step": 422640 + }, + { + "epoch": 0.8537797403814688, + "grad_norm": 411.8247375488281, + "learning_rate": 7.114691139008356e-07, + "loss": 13.0052, + "step": 422650 + }, + { + "epoch": 0.8537999410141526, + "grad_norm": 409.2203674316406, + "learning_rate": 7.112896555764943e-07, + "loss": 13.5062, + "step": 422660 + }, + { + "epoch": 0.8538201416468364, + "grad_norm": 386.6644592285156, + "learning_rate": 7.111102181548074e-07, + "loss": 22.6078, + "step": 422670 + }, + { + "epoch": 0.8538403422795202, + "grad_norm": 340.1001281738281, + "learning_rate": 7.109308016366473e-07, + "loss": 10.6364, + "step": 422680 + }, + { + "epoch": 0.853860542912204, + "grad_norm": 299.4497375488281, + "learning_rate": 7.107514060228921e-07, + "loss": 18.9434, + "step": 422690 + }, + { + "epoch": 0.8538807435448879, + "grad_norm": 461.1625671386719, + "learning_rate": 7.105720313144143e-07, + "loss": 9.3554, + "step": 422700 + }, + { + "epoch": 0.8539009441775717, + "grad_norm": 300.0633544921875, + "learning_rate": 7.103926775120867e-07, + "loss": 33.4887, + "step": 422710 + }, + { + "epoch": 0.8539211448102555, + "grad_norm": 314.12481689453125, + "learning_rate": 7.102133446167847e-07, + "loss": 18.0671, + "step": 422720 + }, + { + "epoch": 0.8539413454429393, + "grad_norm": 172.38677978515625, + "learning_rate": 7.100340326293853e-07, + "loss": 9.6837, + "step": 422730 + }, + { + "epoch": 0.8539615460756231, + "grad_norm": 89.95143127441406, + "learning_rate": 7.098547415507572e-07, + "loss": 19.3794, + "step": 422740 + }, + { + "epoch": 0.853981746708307, + "grad_norm": 269.31787109375, + "learning_rate": 7.096754713817771e-07, + "loss": 21.4956, + "step": 422750 + }, + { + "epoch": 0.8540019473409908, + "grad_norm": 4.7827067375183105, + "learning_rate": 7.094962221233192e-07, + "loss": 12.2086, + "step": 422760 + }, + { + "epoch": 0.8540221479736745, + "grad_norm": 681.1311645507812, + "learning_rate": 7.093169937762562e-07, + "loss": 18.2227, + "step": 422770 + }, + { + "epoch": 0.8540423486063583, + "grad_norm": 109.95421600341797, + "learning_rate": 7.091377863414611e-07, + "loss": 14.1634, + "step": 422780 + }, + { + "epoch": 0.8540625492390421, + "grad_norm": 179.98135375976562, + "learning_rate": 7.08958599819809e-07, + "loss": 15.3122, + "step": 422790 + }, + { + "epoch": 0.8540827498717259, + "grad_norm": 210.8217315673828, + "learning_rate": 7.087794342121724e-07, + "loss": 10.1048, + "step": 422800 + }, + { + "epoch": 0.8541029505044098, + "grad_norm": 285.1670837402344, + "learning_rate": 7.086002895194227e-07, + "loss": 15.6775, + "step": 422810 + }, + { + "epoch": 0.8541231511370936, + "grad_norm": 824.2320556640625, + "learning_rate": 7.08421165742435e-07, + "loss": 26.6279, + "step": 422820 + }, + { + "epoch": 0.8541433517697774, + "grad_norm": 56.3100471496582, + "learning_rate": 7.08242062882083e-07, + "loss": 21.4873, + "step": 422830 + }, + { + "epoch": 0.8541635524024612, + "grad_norm": 541.4666137695312, + "learning_rate": 7.080629809392392e-07, + "loss": 15.3471, + "step": 422840 + }, + { + "epoch": 0.854183753035145, + "grad_norm": 333.91851806640625, + "learning_rate": 7.078839199147741e-07, + "loss": 22.5128, + "step": 422850 + }, + { + "epoch": 0.8542039536678289, + "grad_norm": 545.7657470703125, + "learning_rate": 7.077048798095637e-07, + "loss": 28.4918, + "step": 422860 + }, + { + "epoch": 0.8542241543005127, + "grad_norm": 0.6666960716247559, + "learning_rate": 7.07525860624479e-07, + "loss": 15.1818, + "step": 422870 + }, + { + "epoch": 0.8542443549331965, + "grad_norm": 512.3268432617188, + "learning_rate": 7.073468623603918e-07, + "loss": 18.1457, + "step": 422880 + }, + { + "epoch": 0.8542645555658803, + "grad_norm": 398.10882568359375, + "learning_rate": 7.071678850181762e-07, + "loss": 23.5213, + "step": 422890 + }, + { + "epoch": 0.8542847561985641, + "grad_norm": 26.361709594726562, + "learning_rate": 7.069889285987025e-07, + "loss": 24.1029, + "step": 422900 + }, + { + "epoch": 0.854304956831248, + "grad_norm": 99.04013061523438, + "learning_rate": 7.068099931028449e-07, + "loss": 14.5572, + "step": 422910 + }, + { + "epoch": 0.8543251574639318, + "grad_norm": 487.67059326171875, + "learning_rate": 7.066310785314756e-07, + "loss": 14.2184, + "step": 422920 + }, + { + "epoch": 0.8543453580966156, + "grad_norm": 779.5738525390625, + "learning_rate": 7.064521848854639e-07, + "loss": 41.2217, + "step": 422930 + }, + { + "epoch": 0.8543655587292994, + "grad_norm": 250.2709503173828, + "learning_rate": 7.062733121656845e-07, + "loss": 17.1406, + "step": 422940 + }, + { + "epoch": 0.8543857593619832, + "grad_norm": 452.2339782714844, + "learning_rate": 7.060944603730086e-07, + "loss": 11.8047, + "step": 422950 + }, + { + "epoch": 0.8544059599946671, + "grad_norm": 194.9429168701172, + "learning_rate": 7.059156295083064e-07, + "loss": 16.9779, + "step": 422960 + }, + { + "epoch": 0.8544261606273509, + "grad_norm": 0.0, + "learning_rate": 7.057368195724506e-07, + "loss": 25.6859, + "step": 422970 + }, + { + "epoch": 0.8544463612600347, + "grad_norm": 120.65647888183594, + "learning_rate": 7.055580305663135e-07, + "loss": 12.0228, + "step": 422980 + }, + { + "epoch": 0.8544665618927185, + "grad_norm": 79.81140899658203, + "learning_rate": 7.053792624907662e-07, + "loss": 14.6664, + "step": 422990 + }, + { + "epoch": 0.8544867625254023, + "grad_norm": 401.9481201171875, + "learning_rate": 7.052005153466779e-07, + "loss": 20.8853, + "step": 423000 + }, + { + "epoch": 0.8545069631580862, + "grad_norm": 73.91416931152344, + "learning_rate": 7.050217891349226e-07, + "loss": 15.3818, + "step": 423010 + }, + { + "epoch": 0.8545271637907699, + "grad_norm": 1922.633056640625, + "learning_rate": 7.048430838563708e-07, + "loss": 31.3682, + "step": 423020 + }, + { + "epoch": 0.8545473644234537, + "grad_norm": 451.913818359375, + "learning_rate": 7.046643995118913e-07, + "loss": 16.2877, + "step": 423030 + }, + { + "epoch": 0.8545675650561375, + "grad_norm": 74.22517395019531, + "learning_rate": 7.04485736102356e-07, + "loss": 27.027, + "step": 423040 + }, + { + "epoch": 0.8545877656888213, + "grad_norm": 294.8464050292969, + "learning_rate": 7.043070936286395e-07, + "loss": 11.8094, + "step": 423050 + }, + { + "epoch": 0.8546079663215052, + "grad_norm": 267.31085205078125, + "learning_rate": 7.041284720916064e-07, + "loss": 14.3946, + "step": 423060 + }, + { + "epoch": 0.854628166954189, + "grad_norm": 237.23765563964844, + "learning_rate": 7.0394987149213e-07, + "loss": 15.9205, + "step": 423070 + }, + { + "epoch": 0.8546483675868728, + "grad_norm": 169.7945098876953, + "learning_rate": 7.037712918310818e-07, + "loss": 21.465, + "step": 423080 + }, + { + "epoch": 0.8546685682195566, + "grad_norm": 1223.9046630859375, + "learning_rate": 7.035927331093318e-07, + "loss": 20.4954, + "step": 423090 + }, + { + "epoch": 0.8546887688522404, + "grad_norm": 288.6837463378906, + "learning_rate": 7.034141953277484e-07, + "loss": 14.3236, + "step": 423100 + }, + { + "epoch": 0.8547089694849243, + "grad_norm": 198.91371154785156, + "learning_rate": 7.032356784872035e-07, + "loss": 22.5588, + "step": 423110 + }, + { + "epoch": 0.8547291701176081, + "grad_norm": 362.96160888671875, + "learning_rate": 7.030571825885685e-07, + "loss": 12.3301, + "step": 423120 + }, + { + "epoch": 0.8547493707502919, + "grad_norm": 245.9905242919922, + "learning_rate": 7.028787076327093e-07, + "loss": 21.1567, + "step": 423130 + }, + { + "epoch": 0.8547695713829757, + "grad_norm": 548.1031494140625, + "learning_rate": 7.027002536204986e-07, + "loss": 17.0715, + "step": 423140 + }, + { + "epoch": 0.8547897720156595, + "grad_norm": 129.6185760498047, + "learning_rate": 7.025218205528061e-07, + "loss": 13.8482, + "step": 423150 + }, + { + "epoch": 0.8548099726483434, + "grad_norm": 187.77706909179688, + "learning_rate": 7.02343408430502e-07, + "loss": 12.9665, + "step": 423160 + }, + { + "epoch": 0.8548301732810272, + "grad_norm": 544.2252807617188, + "learning_rate": 7.021650172544531e-07, + "loss": 12.6351, + "step": 423170 + }, + { + "epoch": 0.854850373913711, + "grad_norm": 582.0858154296875, + "learning_rate": 7.019866470255315e-07, + "loss": 26.0964, + "step": 423180 + }, + { + "epoch": 0.8548705745463948, + "grad_norm": 290.2862548828125, + "learning_rate": 7.018082977446061e-07, + "loss": 12.2002, + "step": 423190 + }, + { + "epoch": 0.8548907751790786, + "grad_norm": 792.829833984375, + "learning_rate": 7.01629969412545e-07, + "loss": 17.0466, + "step": 423200 + }, + { + "epoch": 0.8549109758117625, + "grad_norm": 155.3640594482422, + "learning_rate": 7.014516620302186e-07, + "loss": 8.8565, + "step": 423210 + }, + { + "epoch": 0.8549311764444463, + "grad_norm": 230.0448760986328, + "learning_rate": 7.012733755984946e-07, + "loss": 10.1439, + "step": 423220 + }, + { + "epoch": 0.8549513770771301, + "grad_norm": 291.56732177734375, + "learning_rate": 7.010951101182439e-07, + "loss": 17.6992, + "step": 423230 + }, + { + "epoch": 0.8549715777098139, + "grad_norm": 23.559616088867188, + "learning_rate": 7.009168655903342e-07, + "loss": 16.6611, + "step": 423240 + }, + { + "epoch": 0.8549917783424977, + "grad_norm": 102.11821746826172, + "learning_rate": 7.007386420156332e-07, + "loss": 16.0564, + "step": 423250 + }, + { + "epoch": 0.8550119789751816, + "grad_norm": 426.19720458984375, + "learning_rate": 7.005604393950116e-07, + "loss": 8.914, + "step": 423260 + }, + { + "epoch": 0.8550321796078654, + "grad_norm": 197.2255859375, + "learning_rate": 7.003822577293362e-07, + "loss": 13.2246, + "step": 423270 + }, + { + "epoch": 0.8550523802405491, + "grad_norm": 291.4448547363281, + "learning_rate": 7.002040970194768e-07, + "loss": 13.1481, + "step": 423280 + }, + { + "epoch": 0.8550725808732329, + "grad_norm": 600.7762451171875, + "learning_rate": 7.000259572663004e-07, + "loss": 43.9559, + "step": 423290 + }, + { + "epoch": 0.8550927815059167, + "grad_norm": 267.8164367675781, + "learning_rate": 6.99847838470677e-07, + "loss": 9.8765, + "step": 423300 + }, + { + "epoch": 0.8551129821386005, + "grad_norm": 551.2529296875, + "learning_rate": 6.996697406334735e-07, + "loss": 8.6453, + "step": 423310 + }, + { + "epoch": 0.8551331827712844, + "grad_norm": 306.5521240234375, + "learning_rate": 6.994916637555571e-07, + "loss": 15.5206, + "step": 423320 + }, + { + "epoch": 0.8551533834039682, + "grad_norm": 409.07958984375, + "learning_rate": 6.993136078377965e-07, + "loss": 11.0522, + "step": 423330 + }, + { + "epoch": 0.855173584036652, + "grad_norm": 609.0219116210938, + "learning_rate": 6.991355728810623e-07, + "loss": 29.754, + "step": 423340 + }, + { + "epoch": 0.8551937846693358, + "grad_norm": 440.0611877441406, + "learning_rate": 6.989575588862174e-07, + "loss": 19.6599, + "step": 423350 + }, + { + "epoch": 0.8552139853020196, + "grad_norm": 289.8558044433594, + "learning_rate": 6.987795658541319e-07, + "loss": 13.4273, + "step": 423360 + }, + { + "epoch": 0.8552341859347035, + "grad_norm": 370.3302917480469, + "learning_rate": 6.986015937856743e-07, + "loss": 26.0949, + "step": 423370 + }, + { + "epoch": 0.8552543865673873, + "grad_norm": 423.3115234375, + "learning_rate": 6.984236426817104e-07, + "loss": 29.1275, + "step": 423380 + }, + { + "epoch": 0.8552745872000711, + "grad_norm": 849.1698608398438, + "learning_rate": 6.982457125431069e-07, + "loss": 33.0457, + "step": 423390 + }, + { + "epoch": 0.8552947878327549, + "grad_norm": 520.9661254882812, + "learning_rate": 6.980678033707333e-07, + "loss": 19.85, + "step": 423400 + }, + { + "epoch": 0.8553149884654387, + "grad_norm": 812.6036376953125, + "learning_rate": 6.978899151654556e-07, + "loss": 22.3343, + "step": 423410 + }, + { + "epoch": 0.8553351890981226, + "grad_norm": 95.53800201416016, + "learning_rate": 6.977120479281396e-07, + "loss": 20.1647, + "step": 423420 + }, + { + "epoch": 0.8553553897308064, + "grad_norm": 571.2792358398438, + "learning_rate": 6.975342016596531e-07, + "loss": 24.6136, + "step": 423430 + }, + { + "epoch": 0.8553755903634902, + "grad_norm": 265.1282653808594, + "learning_rate": 6.973563763608643e-07, + "loss": 17.2472, + "step": 423440 + }, + { + "epoch": 0.855395790996174, + "grad_norm": 319.90191650390625, + "learning_rate": 6.971785720326385e-07, + "loss": 13.9207, + "step": 423450 + }, + { + "epoch": 0.8554159916288578, + "grad_norm": 482.8645324707031, + "learning_rate": 6.970007886758412e-07, + "loss": 20.3257, + "step": 423460 + }, + { + "epoch": 0.8554361922615417, + "grad_norm": 471.61529541015625, + "learning_rate": 6.968230262913417e-07, + "loss": 21.8656, + "step": 423470 + }, + { + "epoch": 0.8554563928942255, + "grad_norm": 443.1946716308594, + "learning_rate": 6.966452848800043e-07, + "loss": 15.9943, + "step": 423480 + }, + { + "epoch": 0.8554765935269093, + "grad_norm": 578.9497680664062, + "learning_rate": 6.964675644426955e-07, + "loss": 18.8936, + "step": 423490 + }, + { + "epoch": 0.8554967941595931, + "grad_norm": 6.638950347900391, + "learning_rate": 6.962898649802824e-07, + "loss": 17.4078, + "step": 423500 + }, + { + "epoch": 0.8555169947922769, + "grad_norm": 1091.97998046875, + "learning_rate": 6.961121864936294e-07, + "loss": 22.4504, + "step": 423510 + }, + { + "epoch": 0.8555371954249608, + "grad_norm": 532.3468627929688, + "learning_rate": 6.95934528983605e-07, + "loss": 10.7549, + "step": 423520 + }, + { + "epoch": 0.8555573960576445, + "grad_norm": 306.2650146484375, + "learning_rate": 6.957568924510733e-07, + "loss": 11.9596, + "step": 423530 + }, + { + "epoch": 0.8555775966903283, + "grad_norm": 550.3804931640625, + "learning_rate": 6.955792768969e-07, + "loss": 18.5798, + "step": 423540 + }, + { + "epoch": 0.8555977973230121, + "grad_norm": 468.0177307128906, + "learning_rate": 6.954016823219517e-07, + "loss": 15.8299, + "step": 423550 + }, + { + "epoch": 0.8556179979556959, + "grad_norm": 265.7972717285156, + "learning_rate": 6.952241087270938e-07, + "loss": 12.0223, + "step": 423560 + }, + { + "epoch": 0.8556381985883798, + "grad_norm": 417.89630126953125, + "learning_rate": 6.950465561131903e-07, + "loss": 18.3307, + "step": 423570 + }, + { + "epoch": 0.8556583992210636, + "grad_norm": 390.70086669921875, + "learning_rate": 6.948690244811079e-07, + "loss": 22.9239, + "step": 423580 + }, + { + "epoch": 0.8556785998537474, + "grad_norm": 357.60009765625, + "learning_rate": 6.946915138317129e-07, + "loss": 13.8321, + "step": 423590 + }, + { + "epoch": 0.8556988004864312, + "grad_norm": 292.06292724609375, + "learning_rate": 6.945140241658688e-07, + "loss": 16.6138, + "step": 423600 + }, + { + "epoch": 0.855719001119115, + "grad_norm": 231.03797912597656, + "learning_rate": 6.943365554844406e-07, + "loss": 16.0941, + "step": 423610 + }, + { + "epoch": 0.8557392017517989, + "grad_norm": 178.90017700195312, + "learning_rate": 6.941591077882948e-07, + "loss": 26.9284, + "step": 423620 + }, + { + "epoch": 0.8557594023844827, + "grad_norm": 267.6416015625, + "learning_rate": 6.939816810782952e-07, + "loss": 22.531, + "step": 423630 + }, + { + "epoch": 0.8557796030171665, + "grad_norm": 483.8642883300781, + "learning_rate": 6.938042753553054e-07, + "loss": 33.9005, + "step": 423640 + }, + { + "epoch": 0.8557998036498503, + "grad_norm": 532.0093383789062, + "learning_rate": 6.936268906201915e-07, + "loss": 14.0307, + "step": 423650 + }, + { + "epoch": 0.8558200042825341, + "grad_norm": 672.6396484375, + "learning_rate": 6.934495268738195e-07, + "loss": 18.4205, + "step": 423660 + }, + { + "epoch": 0.855840204915218, + "grad_norm": 494.2569274902344, + "learning_rate": 6.932721841170503e-07, + "loss": 12.0525, + "step": 423670 + }, + { + "epoch": 0.8558604055479018, + "grad_norm": 488.8106689453125, + "learning_rate": 6.930948623507505e-07, + "loss": 16.517, + "step": 423680 + }, + { + "epoch": 0.8558806061805856, + "grad_norm": 716.0003662109375, + "learning_rate": 6.92917561575785e-07, + "loss": 24.3459, + "step": 423690 + }, + { + "epoch": 0.8559008068132694, + "grad_norm": 463.7527160644531, + "learning_rate": 6.927402817930168e-07, + "loss": 17.1598, + "step": 423700 + }, + { + "epoch": 0.8559210074459532, + "grad_norm": 303.1055908203125, + "learning_rate": 6.925630230033087e-07, + "loss": 24.3444, + "step": 423710 + }, + { + "epoch": 0.855941208078637, + "grad_norm": 637.7202758789062, + "learning_rate": 6.923857852075261e-07, + "loss": 17.3609, + "step": 423720 + }, + { + "epoch": 0.8559614087113209, + "grad_norm": 456.35491943359375, + "learning_rate": 6.922085684065349e-07, + "loss": 19.7045, + "step": 423730 + }, + { + "epoch": 0.8559816093440047, + "grad_norm": 126.94768524169922, + "learning_rate": 6.920313726011945e-07, + "loss": 6.9527, + "step": 423740 + }, + { + "epoch": 0.8560018099766885, + "grad_norm": 357.84619140625, + "learning_rate": 6.918541977923709e-07, + "loss": 23.9845, + "step": 423750 + }, + { + "epoch": 0.8560220106093723, + "grad_norm": 377.5257873535156, + "learning_rate": 6.916770439809283e-07, + "loss": 21.3561, + "step": 423760 + }, + { + "epoch": 0.8560422112420562, + "grad_norm": 135.76699829101562, + "learning_rate": 6.914999111677295e-07, + "loss": 16.4085, + "step": 423770 + }, + { + "epoch": 0.85606241187474, + "grad_norm": 181.76840209960938, + "learning_rate": 6.913227993536364e-07, + "loss": 23.1228, + "step": 423780 + }, + { + "epoch": 0.8560826125074237, + "grad_norm": 422.5714416503906, + "learning_rate": 6.911457085395146e-07, + "loss": 13.2301, + "step": 423790 + }, + { + "epoch": 0.8561028131401075, + "grad_norm": 458.74517822265625, + "learning_rate": 6.909686387262255e-07, + "loss": 13.2711, + "step": 423800 + }, + { + "epoch": 0.8561230137727913, + "grad_norm": 173.49359130859375, + "learning_rate": 6.907915899146322e-07, + "loss": 14.9355, + "step": 423810 + }, + { + "epoch": 0.8561432144054751, + "grad_norm": 420.94586181640625, + "learning_rate": 6.906145621055987e-07, + "loss": 15.7464, + "step": 423820 + }, + { + "epoch": 0.856163415038159, + "grad_norm": 325.2900695800781, + "learning_rate": 6.904375552999859e-07, + "loss": 14.5921, + "step": 423830 + }, + { + "epoch": 0.8561836156708428, + "grad_norm": 377.8846130371094, + "learning_rate": 6.902605694986592e-07, + "loss": 29.0642, + "step": 423840 + }, + { + "epoch": 0.8562038163035266, + "grad_norm": 424.69622802734375, + "learning_rate": 6.9008360470248e-07, + "loss": 10.3148, + "step": 423850 + }, + { + "epoch": 0.8562240169362104, + "grad_norm": 16.294780731201172, + "learning_rate": 6.89906660912309e-07, + "loss": 20.1662, + "step": 423860 + }, + { + "epoch": 0.8562442175688942, + "grad_norm": 467.6504821777344, + "learning_rate": 6.897297381290113e-07, + "loss": 12.0162, + "step": 423870 + }, + { + "epoch": 0.8562644182015781, + "grad_norm": 90.69451141357422, + "learning_rate": 6.895528363534476e-07, + "loss": 21.7523, + "step": 423880 + }, + { + "epoch": 0.8562846188342619, + "grad_norm": 572.353515625, + "learning_rate": 6.89375955586481e-07, + "loss": 9.6403, + "step": 423890 + }, + { + "epoch": 0.8563048194669457, + "grad_norm": 426.759765625, + "learning_rate": 6.891990958289724e-07, + "loss": 16.1095, + "step": 423900 + }, + { + "epoch": 0.8563250200996295, + "grad_norm": 261.89208984375, + "learning_rate": 6.890222570817856e-07, + "loss": 19.6324, + "step": 423910 + }, + { + "epoch": 0.8563452207323133, + "grad_norm": 768.3299560546875, + "learning_rate": 6.888454393457817e-07, + "loss": 23.239, + "step": 423920 + }, + { + "epoch": 0.8563654213649972, + "grad_norm": 167.47921752929688, + "learning_rate": 6.886686426218209e-07, + "loss": 16.6358, + "step": 423930 + }, + { + "epoch": 0.856385621997681, + "grad_norm": 671.823486328125, + "learning_rate": 6.884918669107671e-07, + "loss": 14.0608, + "step": 423940 + }, + { + "epoch": 0.8564058226303648, + "grad_norm": 208.45716857910156, + "learning_rate": 6.883151122134812e-07, + "loss": 13.198, + "step": 423950 + }, + { + "epoch": 0.8564260232630486, + "grad_norm": 64.32759857177734, + "learning_rate": 6.881383785308232e-07, + "loss": 15.9101, + "step": 423960 + }, + { + "epoch": 0.8564462238957324, + "grad_norm": 501.4256591796875, + "learning_rate": 6.879616658636562e-07, + "loss": 18.0617, + "step": 423970 + }, + { + "epoch": 0.8564664245284163, + "grad_norm": 332.1427917480469, + "learning_rate": 6.877849742128423e-07, + "loss": 15.8026, + "step": 423980 + }, + { + "epoch": 0.8564866251611001, + "grad_norm": 429.5103759765625, + "learning_rate": 6.876083035792408e-07, + "loss": 13.0902, + "step": 423990 + }, + { + "epoch": 0.8565068257937839, + "grad_norm": 616.64208984375, + "learning_rate": 6.874316539637127e-07, + "loss": 26.6816, + "step": 424000 + }, + { + "epoch": 0.8565270264264677, + "grad_norm": 420.34564208984375, + "learning_rate": 6.872550253671207e-07, + "loss": 23.2197, + "step": 424010 + }, + { + "epoch": 0.8565472270591515, + "grad_norm": 1443.5985107421875, + "learning_rate": 6.870784177903244e-07, + "loss": 31.7041, + "step": 424020 + }, + { + "epoch": 0.8565674276918354, + "grad_norm": 152.979248046875, + "learning_rate": 6.869018312341841e-07, + "loss": 12.8966, + "step": 424030 + }, + { + "epoch": 0.8565876283245192, + "grad_norm": 279.4204406738281, + "learning_rate": 6.86725265699561e-07, + "loss": 17.8656, + "step": 424040 + }, + { + "epoch": 0.8566078289572029, + "grad_norm": 646.5421142578125, + "learning_rate": 6.865487211873167e-07, + "loss": 19.8595, + "step": 424050 + }, + { + "epoch": 0.8566280295898867, + "grad_norm": 416.0545654296875, + "learning_rate": 6.863721976983112e-07, + "loss": 26.8425, + "step": 424060 + }, + { + "epoch": 0.8566482302225705, + "grad_norm": 922.7435302734375, + "learning_rate": 6.861956952334031e-07, + "loss": 17.239, + "step": 424070 + }, + { + "epoch": 0.8566684308552543, + "grad_norm": 240.82968139648438, + "learning_rate": 6.860192137934552e-07, + "loss": 19.2269, + "step": 424080 + }, + { + "epoch": 0.8566886314879382, + "grad_norm": 439.35784912109375, + "learning_rate": 6.858427533793261e-07, + "loss": 13.4926, + "step": 424090 + }, + { + "epoch": 0.856708832120622, + "grad_norm": 588.150146484375, + "learning_rate": 6.856663139918751e-07, + "loss": 10.181, + "step": 424100 + }, + { + "epoch": 0.8567290327533058, + "grad_norm": 288.3674011230469, + "learning_rate": 6.854898956319644e-07, + "loss": 25.9362, + "step": 424110 + }, + { + "epoch": 0.8567492333859896, + "grad_norm": 256.4996643066406, + "learning_rate": 6.853134983004517e-07, + "loss": 7.8227, + "step": 424120 + }, + { + "epoch": 0.8567694340186734, + "grad_norm": 452.1529846191406, + "learning_rate": 6.851371219981989e-07, + "loss": 16.0354, + "step": 424130 + }, + { + "epoch": 0.8567896346513573, + "grad_norm": 287.13458251953125, + "learning_rate": 6.849607667260643e-07, + "loss": 25.9335, + "step": 424140 + }, + { + "epoch": 0.8568098352840411, + "grad_norm": 425.8190002441406, + "learning_rate": 6.847844324849062e-07, + "loss": 40.5554, + "step": 424150 + }, + { + "epoch": 0.8568300359167249, + "grad_norm": 180.3892822265625, + "learning_rate": 6.846081192755871e-07, + "loss": 12.232, + "step": 424160 + }, + { + "epoch": 0.8568502365494087, + "grad_norm": 622.3618774414062, + "learning_rate": 6.844318270989631e-07, + "loss": 16.899, + "step": 424170 + }, + { + "epoch": 0.8568704371820925, + "grad_norm": 257.9329528808594, + "learning_rate": 6.842555559558961e-07, + "loss": 13.4867, + "step": 424180 + }, + { + "epoch": 0.8568906378147764, + "grad_norm": 330.57879638671875, + "learning_rate": 6.840793058472434e-07, + "loss": 11.9845, + "step": 424190 + }, + { + "epoch": 0.8569108384474602, + "grad_norm": 922.0054321289062, + "learning_rate": 6.839030767738653e-07, + "loss": 19.7079, + "step": 424200 + }, + { + "epoch": 0.856931039080144, + "grad_norm": 719.37109375, + "learning_rate": 6.837268687366199e-07, + "loss": 25.35, + "step": 424210 + }, + { + "epoch": 0.8569512397128278, + "grad_norm": 421.06842041015625, + "learning_rate": 6.835506817363657e-07, + "loss": 15.7544, + "step": 424220 + }, + { + "epoch": 0.8569714403455116, + "grad_norm": 64.48709869384766, + "learning_rate": 6.83374515773963e-07, + "loss": 25.4828, + "step": 424230 + }, + { + "epoch": 0.8569916409781955, + "grad_norm": 391.7761535644531, + "learning_rate": 6.831983708502693e-07, + "loss": 13.6241, + "step": 424240 + }, + { + "epoch": 0.8570118416108793, + "grad_norm": 389.82110595703125, + "learning_rate": 6.830222469661419e-07, + "loss": 10.5855, + "step": 424250 + }, + { + "epoch": 0.8570320422435631, + "grad_norm": 359.3149108886719, + "learning_rate": 6.828461441224405e-07, + "loss": 16.1136, + "step": 424260 + }, + { + "epoch": 0.8570522428762469, + "grad_norm": 162.98236083984375, + "learning_rate": 6.826700623200255e-07, + "loss": 12.2284, + "step": 424270 + }, + { + "epoch": 0.8570724435089307, + "grad_norm": 303.9280090332031, + "learning_rate": 6.824940015597514e-07, + "loss": 15.9212, + "step": 424280 + }, + { + "epoch": 0.8570926441416146, + "grad_norm": 93.1552963256836, + "learning_rate": 6.823179618424774e-07, + "loss": 6.9389, + "step": 424290 + }, + { + "epoch": 0.8571128447742983, + "grad_norm": 265.16131591796875, + "learning_rate": 6.821419431690629e-07, + "loss": 12.7053, + "step": 424300 + }, + { + "epoch": 0.8571330454069821, + "grad_norm": 131.3051300048828, + "learning_rate": 6.819659455403654e-07, + "loss": 12.4221, + "step": 424310 + }, + { + "epoch": 0.8571532460396659, + "grad_norm": 738.7040405273438, + "learning_rate": 6.817899689572405e-07, + "loss": 18.3596, + "step": 424320 + }, + { + "epoch": 0.8571734466723497, + "grad_norm": 505.9089660644531, + "learning_rate": 6.816140134205479e-07, + "loss": 17.632, + "step": 424330 + }, + { + "epoch": 0.8571936473050336, + "grad_norm": 440.5530700683594, + "learning_rate": 6.81438078931147e-07, + "loss": 14.6219, + "step": 424340 + }, + { + "epoch": 0.8572138479377174, + "grad_norm": 101.6639404296875, + "learning_rate": 6.81262165489891e-07, + "loss": 13.628, + "step": 424350 + }, + { + "epoch": 0.8572340485704012, + "grad_norm": 87.83068084716797, + "learning_rate": 6.810862730976392e-07, + "loss": 12.6355, + "step": 424360 + }, + { + "epoch": 0.857254249203085, + "grad_norm": 340.1312561035156, + "learning_rate": 6.809104017552503e-07, + "loss": 10.5935, + "step": 424370 + }, + { + "epoch": 0.8572744498357688, + "grad_norm": 155.0587158203125, + "learning_rate": 6.807345514635805e-07, + "loss": 17.6908, + "step": 424380 + }, + { + "epoch": 0.8572946504684527, + "grad_norm": 348.400146484375, + "learning_rate": 6.80558722223485e-07, + "loss": 13.1934, + "step": 424390 + }, + { + "epoch": 0.8573148511011365, + "grad_norm": 300.3902893066406, + "learning_rate": 6.803829140358237e-07, + "loss": 20.5829, + "step": 424400 + }, + { + "epoch": 0.8573350517338203, + "grad_norm": 574.2684326171875, + "learning_rate": 6.802071269014527e-07, + "loss": 20.713, + "step": 424410 + }, + { + "epoch": 0.8573552523665041, + "grad_norm": 401.4273376464844, + "learning_rate": 6.800313608212261e-07, + "loss": 17.8327, + "step": 424420 + }, + { + "epoch": 0.8573754529991879, + "grad_norm": 308.0999450683594, + "learning_rate": 6.798556157960046e-07, + "loss": 13.0263, + "step": 424430 + }, + { + "epoch": 0.8573956536318718, + "grad_norm": 250.98672485351562, + "learning_rate": 6.796798918266417e-07, + "loss": 21.3625, + "step": 424440 + }, + { + "epoch": 0.8574158542645556, + "grad_norm": 9.2577543258667, + "learning_rate": 6.795041889139958e-07, + "loss": 17.9683, + "step": 424450 + }, + { + "epoch": 0.8574360548972394, + "grad_norm": 317.8941345214844, + "learning_rate": 6.793285070589229e-07, + "loss": 18.9148, + "step": 424460 + }, + { + "epoch": 0.8574562555299232, + "grad_norm": 299.7467346191406, + "learning_rate": 6.79152846262277e-07, + "loss": 29.3304, + "step": 424470 + }, + { + "epoch": 0.857476456162607, + "grad_norm": 391.6691589355469, + "learning_rate": 6.789772065249178e-07, + "loss": 12.1778, + "step": 424480 + }, + { + "epoch": 0.8574966567952909, + "grad_norm": 327.8114929199219, + "learning_rate": 6.788015878476983e-07, + "loss": 12.3476, + "step": 424490 + }, + { + "epoch": 0.8575168574279747, + "grad_norm": 172.25738525390625, + "learning_rate": 6.786259902314768e-07, + "loss": 12.297, + "step": 424500 + }, + { + "epoch": 0.8575370580606585, + "grad_norm": 308.32952880859375, + "learning_rate": 6.784504136771075e-07, + "loss": 25.0714, + "step": 424510 + }, + { + "epoch": 0.8575572586933423, + "grad_norm": 362.46392822265625, + "learning_rate": 6.782748581854471e-07, + "loss": 14.4293, + "step": 424520 + }, + { + "epoch": 0.8575774593260261, + "grad_norm": 24.034151077270508, + "learning_rate": 6.780993237573513e-07, + "loss": 10.6712, + "step": 424530 + }, + { + "epoch": 0.85759765995871, + "grad_norm": 383.0001220703125, + "learning_rate": 6.779238103936742e-07, + "loss": 17.6843, + "step": 424540 + }, + { + "epoch": 0.8576178605913938, + "grad_norm": 189.0929412841797, + "learning_rate": 6.777483180952732e-07, + "loss": 13.5381, + "step": 424550 + }, + { + "epoch": 0.8576380612240775, + "grad_norm": 70.52850341796875, + "learning_rate": 6.775728468630027e-07, + "loss": 20.7291, + "step": 424560 + }, + { + "epoch": 0.8576582618567613, + "grad_norm": 336.09002685546875, + "learning_rate": 6.773973966977165e-07, + "loss": 26.8984, + "step": 424570 + }, + { + "epoch": 0.8576784624894451, + "grad_norm": 489.1164245605469, + "learning_rate": 6.772219676002717e-07, + "loss": 18.0368, + "step": 424580 + }, + { + "epoch": 0.857698663122129, + "grad_norm": 230.7914581298828, + "learning_rate": 6.770465595715231e-07, + "loss": 16.2012, + "step": 424590 + }, + { + "epoch": 0.8577188637548128, + "grad_norm": 262.0990905761719, + "learning_rate": 6.768711726123261e-07, + "loss": 12.7744, + "step": 424600 + }, + { + "epoch": 0.8577390643874966, + "grad_norm": 402.1889343261719, + "learning_rate": 6.76695806723533e-07, + "loss": 15.6399, + "step": 424610 + }, + { + "epoch": 0.8577592650201804, + "grad_norm": 267.36810302734375, + "learning_rate": 6.765204619060012e-07, + "loss": 33.8587, + "step": 424620 + }, + { + "epoch": 0.8577794656528642, + "grad_norm": 326.3402404785156, + "learning_rate": 6.763451381605846e-07, + "loss": 15.6711, + "step": 424630 + }, + { + "epoch": 0.857799666285548, + "grad_norm": 11.367655754089355, + "learning_rate": 6.761698354881363e-07, + "loss": 43.7316, + "step": 424640 + }, + { + "epoch": 0.8578198669182319, + "grad_norm": 635.5440673828125, + "learning_rate": 6.759945538895119e-07, + "loss": 23.3102, + "step": 424650 + }, + { + "epoch": 0.8578400675509157, + "grad_norm": 849.0969848632812, + "learning_rate": 6.758192933655667e-07, + "loss": 24.0309, + "step": 424660 + }, + { + "epoch": 0.8578602681835995, + "grad_norm": 350.1836242675781, + "learning_rate": 6.756440539171533e-07, + "loss": 17.4814, + "step": 424670 + }, + { + "epoch": 0.8578804688162833, + "grad_norm": 686.380615234375, + "learning_rate": 6.754688355451256e-07, + "loss": 16.0547, + "step": 424680 + }, + { + "epoch": 0.8579006694489671, + "grad_norm": 292.45770263671875, + "learning_rate": 6.752936382503394e-07, + "loss": 19.9808, + "step": 424690 + }, + { + "epoch": 0.857920870081651, + "grad_norm": 641.3974609375, + "learning_rate": 6.751184620336471e-07, + "loss": 21.2562, + "step": 424700 + }, + { + "epoch": 0.8579410707143348, + "grad_norm": 272.4770202636719, + "learning_rate": 6.749433068959022e-07, + "loss": 8.5196, + "step": 424710 + }, + { + "epoch": 0.8579612713470186, + "grad_norm": 308.81781005859375, + "learning_rate": 6.747681728379601e-07, + "loss": 16.7534, + "step": 424720 + }, + { + "epoch": 0.8579814719797024, + "grad_norm": 365.3183898925781, + "learning_rate": 6.745930598606721e-07, + "loss": 27.0763, + "step": 424730 + }, + { + "epoch": 0.8580016726123862, + "grad_norm": 418.2502746582031, + "learning_rate": 6.744179679648943e-07, + "loss": 14.3722, + "step": 424740 + }, + { + "epoch": 0.8580218732450701, + "grad_norm": 421.59576416015625, + "learning_rate": 6.742428971514786e-07, + "loss": 15.017, + "step": 424750 + }, + { + "epoch": 0.8580420738777539, + "grad_norm": 524.6824340820312, + "learning_rate": 6.74067847421277e-07, + "loss": 30.5549, + "step": 424760 + }, + { + "epoch": 0.8580622745104377, + "grad_norm": 423.94622802734375, + "learning_rate": 6.738928187751454e-07, + "loss": 17.7713, + "step": 424770 + }, + { + "epoch": 0.8580824751431215, + "grad_norm": 554.2315063476562, + "learning_rate": 6.737178112139342e-07, + "loss": 14.8536, + "step": 424780 + }, + { + "epoch": 0.8581026757758053, + "grad_norm": 244.727294921875, + "learning_rate": 6.735428247384989e-07, + "loss": 26.6349, + "step": 424790 + }, + { + "epoch": 0.8581228764084892, + "grad_norm": 278.85321044921875, + "learning_rate": 6.733678593496901e-07, + "loss": 13.8719, + "step": 424800 + }, + { + "epoch": 0.8581430770411729, + "grad_norm": 419.3354797363281, + "learning_rate": 6.731929150483624e-07, + "loss": 13.6345, + "step": 424810 + }, + { + "epoch": 0.8581632776738567, + "grad_norm": 365.9506530761719, + "learning_rate": 6.73017991835368e-07, + "loss": 15.2896, + "step": 424820 + }, + { + "epoch": 0.8581834783065405, + "grad_norm": 290.0882873535156, + "learning_rate": 6.728430897115578e-07, + "loss": 25.9369, + "step": 424830 + }, + { + "epoch": 0.8582036789392243, + "grad_norm": 205.87440490722656, + "learning_rate": 6.726682086777869e-07, + "loss": 14.2771, + "step": 424840 + }, + { + "epoch": 0.8582238795719082, + "grad_norm": 305.48828125, + "learning_rate": 6.724933487349061e-07, + "loss": 17.0883, + "step": 424850 + }, + { + "epoch": 0.858244080204592, + "grad_norm": 160.75222778320312, + "learning_rate": 6.723185098837665e-07, + "loss": 37.2009, + "step": 424860 + }, + { + "epoch": 0.8582642808372758, + "grad_norm": 272.7733154296875, + "learning_rate": 6.721436921252223e-07, + "loss": 31.2173, + "step": 424870 + }, + { + "epoch": 0.8582844814699596, + "grad_norm": 170.22232055664062, + "learning_rate": 6.719688954601266e-07, + "loss": 27.0385, + "step": 424880 + }, + { + "epoch": 0.8583046821026434, + "grad_norm": 419.1109924316406, + "learning_rate": 6.717941198893274e-07, + "loss": 14.9501, + "step": 424890 + }, + { + "epoch": 0.8583248827353273, + "grad_norm": 344.0302734375, + "learning_rate": 6.716193654136788e-07, + "loss": 25.7425, + "step": 424900 + }, + { + "epoch": 0.8583450833680111, + "grad_norm": 112.04228210449219, + "learning_rate": 6.714446320340334e-07, + "loss": 11.3657, + "step": 424910 + }, + { + "epoch": 0.8583652840006949, + "grad_norm": 580.894775390625, + "learning_rate": 6.712699197512418e-07, + "loss": 11.5984, + "step": 424920 + }, + { + "epoch": 0.8583854846333787, + "grad_norm": 150.63763427734375, + "learning_rate": 6.710952285661549e-07, + "loss": 32.7941, + "step": 424930 + }, + { + "epoch": 0.8584056852660625, + "grad_norm": 569.3262939453125, + "learning_rate": 6.709205584796241e-07, + "loss": 15.6715, + "step": 424940 + }, + { + "epoch": 0.8584258858987464, + "grad_norm": 216.56661987304688, + "learning_rate": 6.707459094925045e-07, + "loss": 10.321, + "step": 424950 + }, + { + "epoch": 0.8584460865314302, + "grad_norm": 393.1278381347656, + "learning_rate": 6.705712816056415e-07, + "loss": 16.0812, + "step": 424960 + }, + { + "epoch": 0.858466287164114, + "grad_norm": 350.8961181640625, + "learning_rate": 6.703966748198892e-07, + "loss": 12.8694, + "step": 424970 + }, + { + "epoch": 0.8584864877967978, + "grad_norm": 113.81829833984375, + "learning_rate": 6.702220891360994e-07, + "loss": 11.2908, + "step": 424980 + }, + { + "epoch": 0.8585066884294816, + "grad_norm": 11.053807258605957, + "learning_rate": 6.700475245551218e-07, + "loss": 9.902, + "step": 424990 + }, + { + "epoch": 0.8585268890621655, + "grad_norm": 425.9871520996094, + "learning_rate": 6.698729810778065e-07, + "loss": 21.1804, + "step": 425000 + }, + { + "epoch": 0.8585470896948493, + "grad_norm": 581.9754638671875, + "learning_rate": 6.696984587050065e-07, + "loss": 12.3986, + "step": 425010 + }, + { + "epoch": 0.8585672903275331, + "grad_norm": 212.0026092529297, + "learning_rate": 6.695239574375706e-07, + "loss": 8.5076, + "step": 425020 + }, + { + "epoch": 0.8585874909602169, + "grad_norm": 72.91810607910156, + "learning_rate": 6.693494772763487e-07, + "loss": 11.4933, + "step": 425030 + }, + { + "epoch": 0.8586076915929007, + "grad_norm": 143.1288299560547, + "learning_rate": 6.691750182221935e-07, + "loss": 20.9185, + "step": 425040 + }, + { + "epoch": 0.8586278922255846, + "grad_norm": 425.71942138671875, + "learning_rate": 6.69000580275953e-07, + "loss": 13.3819, + "step": 425050 + }, + { + "epoch": 0.8586480928582684, + "grad_norm": 21.489431381225586, + "learning_rate": 6.688261634384791e-07, + "loss": 17.3936, + "step": 425060 + }, + { + "epoch": 0.8586682934909521, + "grad_norm": 41.76399230957031, + "learning_rate": 6.686517677106214e-07, + "loss": 13.0918, + "step": 425070 + }, + { + "epoch": 0.8586884941236359, + "grad_norm": 32.80811309814453, + "learning_rate": 6.684773930932281e-07, + "loss": 15.1117, + "step": 425080 + }, + { + "epoch": 0.8587086947563197, + "grad_norm": 69.80337524414062, + "learning_rate": 6.683030395871526e-07, + "loss": 30.6622, + "step": 425090 + }, + { + "epoch": 0.8587288953890035, + "grad_norm": 185.21798706054688, + "learning_rate": 6.681287071932408e-07, + "loss": 22.5686, + "step": 425100 + }, + { + "epoch": 0.8587490960216874, + "grad_norm": 478.83819580078125, + "learning_rate": 6.679543959123458e-07, + "loss": 20.441, + "step": 425110 + }, + { + "epoch": 0.8587692966543712, + "grad_norm": 399.27178955078125, + "learning_rate": 6.677801057453143e-07, + "loss": 16.9462, + "step": 425120 + }, + { + "epoch": 0.858789497287055, + "grad_norm": 411.01025390625, + "learning_rate": 6.676058366929988e-07, + "loss": 11.9585, + "step": 425130 + }, + { + "epoch": 0.8588096979197388, + "grad_norm": 322.119384765625, + "learning_rate": 6.674315887562466e-07, + "loss": 16.2444, + "step": 425140 + }, + { + "epoch": 0.8588298985524226, + "grad_norm": 285.550048828125, + "learning_rate": 6.672573619359063e-07, + "loss": 21.3185, + "step": 425150 + }, + { + "epoch": 0.8588500991851065, + "grad_norm": 260.2703857421875, + "learning_rate": 6.67083156232829e-07, + "loss": 28.233, + "step": 425160 + }, + { + "epoch": 0.8588702998177903, + "grad_norm": 317.91375732421875, + "learning_rate": 6.669089716478627e-07, + "loss": 24.1974, + "step": 425170 + }, + { + "epoch": 0.8588905004504741, + "grad_norm": 188.8263702392578, + "learning_rate": 6.667348081818559e-07, + "loss": 9.7247, + "step": 425180 + }, + { + "epoch": 0.8589107010831579, + "grad_norm": 616.5370483398438, + "learning_rate": 6.665606658356583e-07, + "loss": 16.824, + "step": 425190 + }, + { + "epoch": 0.8589309017158417, + "grad_norm": 105.1166000366211, + "learning_rate": 6.663865446101192e-07, + "loss": 18.8522, + "step": 425200 + }, + { + "epoch": 0.8589511023485256, + "grad_norm": 127.76112365722656, + "learning_rate": 6.662124445060863e-07, + "loss": 16.4562, + "step": 425210 + }, + { + "epoch": 0.8589713029812094, + "grad_norm": 267.9668273925781, + "learning_rate": 6.660383655244074e-07, + "loss": 12.7942, + "step": 425220 + }, + { + "epoch": 0.8589915036138932, + "grad_norm": 100.9757308959961, + "learning_rate": 6.658643076659327e-07, + "loss": 19.0906, + "step": 425230 + }, + { + "epoch": 0.859011704246577, + "grad_norm": 521.11767578125, + "learning_rate": 6.6569027093151e-07, + "loss": 22.8369, + "step": 425240 + }, + { + "epoch": 0.8590319048792608, + "grad_norm": 638.5565795898438, + "learning_rate": 6.655162553219862e-07, + "loss": 32.4391, + "step": 425250 + }, + { + "epoch": 0.8590521055119447, + "grad_norm": 581.9984741210938, + "learning_rate": 6.653422608382105e-07, + "loss": 28.779, + "step": 425260 + }, + { + "epoch": 0.8590723061446285, + "grad_norm": 107.42449951171875, + "learning_rate": 6.651682874810317e-07, + "loss": 10.0435, + "step": 425270 + }, + { + "epoch": 0.8590925067773123, + "grad_norm": 673.4070434570312, + "learning_rate": 6.649943352512972e-07, + "loss": 20.3799, + "step": 425280 + }, + { + "epoch": 0.8591127074099961, + "grad_norm": 507.18798828125, + "learning_rate": 6.648204041498534e-07, + "loss": 20.1533, + "step": 425290 + }, + { + "epoch": 0.8591329080426799, + "grad_norm": 315.055908203125, + "learning_rate": 6.646464941775499e-07, + "loss": 10.991, + "step": 425300 + }, + { + "epoch": 0.8591531086753638, + "grad_norm": 265.2852783203125, + "learning_rate": 6.64472605335234e-07, + "loss": 13.6276, + "step": 425310 + }, + { + "epoch": 0.8591733093080475, + "grad_norm": 124.01360321044922, + "learning_rate": 6.642987376237514e-07, + "loss": 29.7952, + "step": 425320 + }, + { + "epoch": 0.8591935099407313, + "grad_norm": 366.8377685546875, + "learning_rate": 6.641248910439518e-07, + "loss": 12.5253, + "step": 425330 + }, + { + "epoch": 0.8592137105734151, + "grad_norm": 379.7523193359375, + "learning_rate": 6.639510655966813e-07, + "loss": 15.6356, + "step": 425340 + }, + { + "epoch": 0.8592339112060989, + "grad_norm": 288.5459289550781, + "learning_rate": 6.637772612827881e-07, + "loss": 13.3753, + "step": 425350 + }, + { + "epoch": 0.8592541118387828, + "grad_norm": 86.94245147705078, + "learning_rate": 6.636034781031181e-07, + "loss": 13.9261, + "step": 425360 + }, + { + "epoch": 0.8592743124714666, + "grad_norm": 391.8909912109375, + "learning_rate": 6.634297160585184e-07, + "loss": 22.205, + "step": 425370 + }, + { + "epoch": 0.8592945131041504, + "grad_norm": 401.6841125488281, + "learning_rate": 6.632559751498369e-07, + "loss": 16.0234, + "step": 425380 + }, + { + "epoch": 0.8593147137368342, + "grad_norm": 576.306640625, + "learning_rate": 6.630822553779193e-07, + "loss": 30.8965, + "step": 425390 + }, + { + "epoch": 0.859334914369518, + "grad_norm": 179.42271423339844, + "learning_rate": 6.629085567436133e-07, + "loss": 27.6414, + "step": 425400 + }, + { + "epoch": 0.8593551150022019, + "grad_norm": 441.8272705078125, + "learning_rate": 6.627348792477639e-07, + "loss": 20.9805, + "step": 425410 + }, + { + "epoch": 0.8593753156348857, + "grad_norm": 26.4918155670166, + "learning_rate": 6.625612228912199e-07, + "loss": 26.1555, + "step": 425420 + }, + { + "epoch": 0.8593955162675695, + "grad_norm": 261.5885009765625, + "learning_rate": 6.623875876748265e-07, + "loss": 23.7872, + "step": 425430 + }, + { + "epoch": 0.8594157169002533, + "grad_norm": 585.4600219726562, + "learning_rate": 6.622139735994288e-07, + "loss": 18.8284, + "step": 425440 + }, + { + "epoch": 0.8594359175329371, + "grad_norm": 181.7188262939453, + "learning_rate": 6.620403806658754e-07, + "loss": 8.9154, + "step": 425450 + }, + { + "epoch": 0.859456118165621, + "grad_norm": 448.3474426269531, + "learning_rate": 6.618668088750107e-07, + "loss": 23.9844, + "step": 425460 + }, + { + "epoch": 0.8594763187983048, + "grad_norm": 234.59646606445312, + "learning_rate": 6.616932582276798e-07, + "loss": 17.3415, + "step": 425470 + }, + { + "epoch": 0.8594965194309886, + "grad_norm": 122.73247528076172, + "learning_rate": 6.615197287247299e-07, + "loss": 15.1149, + "step": 425480 + }, + { + "epoch": 0.8595167200636724, + "grad_norm": 1209.4432373046875, + "learning_rate": 6.61346220367009e-07, + "loss": 10.0193, + "step": 425490 + }, + { + "epoch": 0.8595369206963562, + "grad_norm": 872.7515258789062, + "learning_rate": 6.611727331553585e-07, + "loss": 22.8392, + "step": 425500 + }, + { + "epoch": 0.85955712132904, + "grad_norm": 473.6070556640625, + "learning_rate": 6.609992670906251e-07, + "loss": 14.2966, + "step": 425510 + }, + { + "epoch": 0.8595773219617239, + "grad_norm": 349.6256103515625, + "learning_rate": 6.608258221736568e-07, + "loss": 18.4509, + "step": 425520 + }, + { + "epoch": 0.8595975225944077, + "grad_norm": 724.1307983398438, + "learning_rate": 6.60652398405297e-07, + "loss": 27.9618, + "step": 425530 + }, + { + "epoch": 0.8596177232270915, + "grad_norm": 521.7205810546875, + "learning_rate": 6.604789957863899e-07, + "loss": 18.731, + "step": 425540 + }, + { + "epoch": 0.8596379238597753, + "grad_norm": 652.4680786132812, + "learning_rate": 6.603056143177817e-07, + "loss": 16.1501, + "step": 425550 + }, + { + "epoch": 0.8596581244924592, + "grad_norm": 0.0, + "learning_rate": 6.601322540003202e-07, + "loss": 27.6205, + "step": 425560 + }, + { + "epoch": 0.859678325125143, + "grad_norm": 201.71774291992188, + "learning_rate": 6.599589148348451e-07, + "loss": 17.4737, + "step": 425570 + }, + { + "epoch": 0.8596985257578267, + "grad_norm": 604.082763671875, + "learning_rate": 6.597855968222038e-07, + "loss": 13.4022, + "step": 425580 + }, + { + "epoch": 0.8597187263905105, + "grad_norm": 367.8940734863281, + "learning_rate": 6.596122999632426e-07, + "loss": 21.1846, + "step": 425590 + }, + { + "epoch": 0.8597389270231943, + "grad_norm": 194.28854370117188, + "learning_rate": 6.594390242588044e-07, + "loss": 13.4595, + "step": 425600 + }, + { + "epoch": 0.8597591276558781, + "grad_norm": 5.756259441375732, + "learning_rate": 6.592657697097333e-07, + "loss": 18.238, + "step": 425610 + }, + { + "epoch": 0.859779328288562, + "grad_norm": 582.0173950195312, + "learning_rate": 6.590925363168749e-07, + "loss": 25.401, + "step": 425620 + }, + { + "epoch": 0.8597995289212458, + "grad_norm": 677.1604614257812, + "learning_rate": 6.589193240810732e-07, + "loss": 29.9261, + "step": 425630 + }, + { + "epoch": 0.8598197295539296, + "grad_norm": 0.6434399485588074, + "learning_rate": 6.587461330031714e-07, + "loss": 14.7036, + "step": 425640 + }, + { + "epoch": 0.8598399301866134, + "grad_norm": 413.88775634765625, + "learning_rate": 6.585729630840149e-07, + "loss": 17.6794, + "step": 425650 + }, + { + "epoch": 0.8598601308192972, + "grad_norm": 194.02024841308594, + "learning_rate": 6.583998143244463e-07, + "loss": 14.493, + "step": 425660 + }, + { + "epoch": 0.8598803314519811, + "grad_norm": 146.6978759765625, + "learning_rate": 6.582266867253118e-07, + "loss": 7.9778, + "step": 425670 + }, + { + "epoch": 0.8599005320846649, + "grad_norm": 225.89418029785156, + "learning_rate": 6.580535802874538e-07, + "loss": 15.6481, + "step": 425680 + }, + { + "epoch": 0.8599207327173487, + "grad_norm": 421.75567626953125, + "learning_rate": 6.578804950117146e-07, + "loss": 14.5297, + "step": 425690 + }, + { + "epoch": 0.8599409333500325, + "grad_norm": 206.58993530273438, + "learning_rate": 6.577074308989406e-07, + "loss": 22.0901, + "step": 425700 + }, + { + "epoch": 0.8599611339827163, + "grad_norm": 30.304372787475586, + "learning_rate": 6.575343879499729e-07, + "loss": 17.5016, + "step": 425710 + }, + { + "epoch": 0.8599813346154002, + "grad_norm": 396.6864318847656, + "learning_rate": 6.57361366165657e-07, + "loss": 18.5493, + "step": 425720 + }, + { + "epoch": 0.860001535248084, + "grad_norm": 469.9893493652344, + "learning_rate": 6.571883655468336e-07, + "loss": 22.4117, + "step": 425730 + }, + { + "epoch": 0.8600217358807678, + "grad_norm": 226.0044708251953, + "learning_rate": 6.57015386094349e-07, + "loss": 20.228, + "step": 425740 + }, + { + "epoch": 0.8600419365134516, + "grad_norm": 166.0838623046875, + "learning_rate": 6.568424278090446e-07, + "loss": 16.2481, + "step": 425750 + }, + { + "epoch": 0.8600621371461354, + "grad_norm": 412.2013854980469, + "learning_rate": 6.56669490691762e-07, + "loss": 17.67, + "step": 425760 + }, + { + "epoch": 0.8600823377788193, + "grad_norm": 566.22900390625, + "learning_rate": 6.564965747433472e-07, + "loss": 31.0132, + "step": 425770 + }, + { + "epoch": 0.8601025384115031, + "grad_norm": 529.25732421875, + "learning_rate": 6.563236799646405e-07, + "loss": 15.0801, + "step": 425780 + }, + { + "epoch": 0.8601227390441869, + "grad_norm": 350.4403381347656, + "learning_rate": 6.561508063564847e-07, + "loss": 13.9268, + "step": 425790 + }, + { + "epoch": 0.8601429396768707, + "grad_norm": 297.8817138671875, + "learning_rate": 6.559779539197231e-07, + "loss": 30.1499, + "step": 425800 + }, + { + "epoch": 0.8601631403095545, + "grad_norm": 724.6544189453125, + "learning_rate": 6.558051226551992e-07, + "loss": 19.1858, + "step": 425810 + }, + { + "epoch": 0.8601833409422384, + "grad_norm": 423.3988952636719, + "learning_rate": 6.556323125637542e-07, + "loss": 12.4289, + "step": 425820 + }, + { + "epoch": 0.8602035415749222, + "grad_norm": 3.7432050704956055, + "learning_rate": 6.554595236462291e-07, + "loss": 21.2461, + "step": 425830 + }, + { + "epoch": 0.8602237422076059, + "grad_norm": 308.4240417480469, + "learning_rate": 6.552867559034687e-07, + "loss": 39.6005, + "step": 425840 + }, + { + "epoch": 0.8602439428402897, + "grad_norm": 232.96771240234375, + "learning_rate": 6.551140093363135e-07, + "loss": 15.7731, + "step": 425850 + }, + { + "epoch": 0.8602641434729735, + "grad_norm": 339.595458984375, + "learning_rate": 6.549412839456048e-07, + "loss": 13.9972, + "step": 425860 + }, + { + "epoch": 0.8602843441056574, + "grad_norm": 749.7958984375, + "learning_rate": 6.547685797321851e-07, + "loss": 24.7508, + "step": 425870 + }, + { + "epoch": 0.8603045447383412, + "grad_norm": 102.43689727783203, + "learning_rate": 6.545958966968974e-07, + "loss": 8.3586, + "step": 425880 + }, + { + "epoch": 0.860324745371025, + "grad_norm": 429.5927429199219, + "learning_rate": 6.544232348405821e-07, + "loss": 18.1928, + "step": 425890 + }, + { + "epoch": 0.8603449460037088, + "grad_norm": 176.30628967285156, + "learning_rate": 6.542505941640803e-07, + "loss": 25.1116, + "step": 425900 + }, + { + "epoch": 0.8603651466363926, + "grad_norm": 614.5752563476562, + "learning_rate": 6.540779746682346e-07, + "loss": 30.6537, + "step": 425910 + }, + { + "epoch": 0.8603853472690765, + "grad_norm": 484.5924377441406, + "learning_rate": 6.53905376353886e-07, + "loss": 19.1837, + "step": 425920 + }, + { + "epoch": 0.8604055479017603, + "grad_norm": 508.8131408691406, + "learning_rate": 6.537327992218745e-07, + "loss": 10.3871, + "step": 425930 + }, + { + "epoch": 0.8604257485344441, + "grad_norm": 277.9728088378906, + "learning_rate": 6.535602432730432e-07, + "loss": 13.4423, + "step": 425940 + }, + { + "epoch": 0.8604459491671279, + "grad_norm": 355.0103759765625, + "learning_rate": 6.533877085082307e-07, + "loss": 12.4517, + "step": 425950 + }, + { + "epoch": 0.8604661497998117, + "grad_norm": 258.963623046875, + "learning_rate": 6.532151949282811e-07, + "loss": 17.3157, + "step": 425960 + }, + { + "epoch": 0.8604863504324956, + "grad_norm": 131.1427764892578, + "learning_rate": 6.53042702534033e-07, + "loss": 11.3277, + "step": 425970 + }, + { + "epoch": 0.8605065510651794, + "grad_norm": 262.5980529785156, + "learning_rate": 6.528702313263264e-07, + "loss": 15.6577, + "step": 425980 + }, + { + "epoch": 0.8605267516978632, + "grad_norm": 415.2807922363281, + "learning_rate": 6.526977813060042e-07, + "loss": 14.8311, + "step": 425990 + }, + { + "epoch": 0.860546952330547, + "grad_norm": 277.50701904296875, + "learning_rate": 6.52525352473905e-07, + "loss": 13.9653, + "step": 426000 + }, + { + "epoch": 0.8605671529632308, + "grad_norm": 771.4279174804688, + "learning_rate": 6.523529448308708e-07, + "loss": 18.6443, + "step": 426010 + }, + { + "epoch": 0.8605873535959147, + "grad_norm": 647.3812255859375, + "learning_rate": 6.521805583777396e-07, + "loss": 17.5931, + "step": 426020 + }, + { + "epoch": 0.8606075542285985, + "grad_norm": 317.23895263671875, + "learning_rate": 6.520081931153544e-07, + "loss": 17.4922, + "step": 426030 + }, + { + "epoch": 0.8606277548612823, + "grad_norm": 698.1004028320312, + "learning_rate": 6.518358490445542e-07, + "loss": 19.6825, + "step": 426040 + }, + { + "epoch": 0.8606479554939661, + "grad_norm": 270.4190979003906, + "learning_rate": 6.516635261661775e-07, + "loss": 14.7697, + "step": 426050 + }, + { + "epoch": 0.8606681561266499, + "grad_norm": 208.9061737060547, + "learning_rate": 6.514912244810662e-07, + "loss": 10.7142, + "step": 426060 + }, + { + "epoch": 0.8606883567593338, + "grad_norm": 288.39532470703125, + "learning_rate": 6.513189439900591e-07, + "loss": 16.5989, + "step": 426070 + }, + { + "epoch": 0.8607085573920176, + "grad_norm": 423.8705139160156, + "learning_rate": 6.511466846939956e-07, + "loss": 19.8654, + "step": 426080 + }, + { + "epoch": 0.8607287580247013, + "grad_norm": 144.19081115722656, + "learning_rate": 6.509744465937151e-07, + "loss": 17.9407, + "step": 426090 + }, + { + "epoch": 0.8607489586573851, + "grad_norm": 311.5626525878906, + "learning_rate": 6.508022296900601e-07, + "loss": 22.3135, + "step": 426100 + }, + { + "epoch": 0.8607691592900689, + "grad_norm": 19.088600158691406, + "learning_rate": 6.506300339838656e-07, + "loss": 6.1991, + "step": 426110 + }, + { + "epoch": 0.8607893599227527, + "grad_norm": 200.4326629638672, + "learning_rate": 6.504578594759725e-07, + "loss": 13.0629, + "step": 426120 + }, + { + "epoch": 0.8608095605554366, + "grad_norm": 175.73031616210938, + "learning_rate": 6.502857061672213e-07, + "loss": 24.9918, + "step": 426130 + }, + { + "epoch": 0.8608297611881204, + "grad_norm": 140.0410919189453, + "learning_rate": 6.501135740584502e-07, + "loss": 19.302, + "step": 426140 + }, + { + "epoch": 0.8608499618208042, + "grad_norm": 374.5769348144531, + "learning_rate": 6.499414631504969e-07, + "loss": 24.6695, + "step": 426150 + }, + { + "epoch": 0.860870162453488, + "grad_norm": 289.5231018066406, + "learning_rate": 6.497693734442007e-07, + "loss": 16.6754, + "step": 426160 + }, + { + "epoch": 0.8608903630861718, + "grad_norm": 633.6494750976562, + "learning_rate": 6.495973049404037e-07, + "loss": 17.9832, + "step": 426170 + }, + { + "epoch": 0.8609105637188557, + "grad_norm": 203.72691345214844, + "learning_rate": 6.494252576399395e-07, + "loss": 16.9029, + "step": 426180 + }, + { + "epoch": 0.8609307643515395, + "grad_norm": 121.83488464355469, + "learning_rate": 6.49253231543649e-07, + "loss": 20.0728, + "step": 426190 + }, + { + "epoch": 0.8609509649842233, + "grad_norm": 61.352237701416016, + "learning_rate": 6.490812266523716e-07, + "loss": 16.1185, + "step": 426200 + }, + { + "epoch": 0.8609711656169071, + "grad_norm": 234.11602783203125, + "learning_rate": 6.489092429669447e-07, + "loss": 14.0938, + "step": 426210 + }, + { + "epoch": 0.8609913662495909, + "grad_norm": 657.1200561523438, + "learning_rate": 6.487372804882053e-07, + "loss": 17.6869, + "step": 426220 + }, + { + "epoch": 0.8610115668822748, + "grad_norm": 588.2548828125, + "learning_rate": 6.485653392169938e-07, + "loss": 22.2285, + "step": 426230 + }, + { + "epoch": 0.8610317675149586, + "grad_norm": 148.45741271972656, + "learning_rate": 6.483934191541469e-07, + "loss": 15.8468, + "step": 426240 + }, + { + "epoch": 0.8610519681476424, + "grad_norm": 243.9552459716797, + "learning_rate": 6.482215203005016e-07, + "loss": 18.0027, + "step": 426250 + }, + { + "epoch": 0.8610721687803262, + "grad_norm": 399.02685546875, + "learning_rate": 6.480496426568983e-07, + "loss": 33.1014, + "step": 426260 + }, + { + "epoch": 0.86109236941301, + "grad_norm": 126.6474609375, + "learning_rate": 6.478777862241714e-07, + "loss": 12.1914, + "step": 426270 + }, + { + "epoch": 0.8611125700456939, + "grad_norm": 29.05548095703125, + "learning_rate": 6.477059510031619e-07, + "loss": 18.6576, + "step": 426280 + }, + { + "epoch": 0.8611327706783777, + "grad_norm": 391.8978271484375, + "learning_rate": 6.475341369947047e-07, + "loss": 18.8539, + "step": 426290 + }, + { + "epoch": 0.8611529713110615, + "grad_norm": 301.7601318359375, + "learning_rate": 6.47362344199639e-07, + "loss": 11.3853, + "step": 426300 + }, + { + "epoch": 0.8611731719437453, + "grad_norm": 172.89324951171875, + "learning_rate": 6.471905726188015e-07, + "loss": 15.71, + "step": 426310 + }, + { + "epoch": 0.8611933725764291, + "grad_norm": 511.9766540527344, + "learning_rate": 6.470188222530282e-07, + "loss": 17.3988, + "step": 426320 + }, + { + "epoch": 0.861213573209113, + "grad_norm": 495.0720520019531, + "learning_rate": 6.468470931031584e-07, + "loss": 26.0315, + "step": 426330 + }, + { + "epoch": 0.8612337738417968, + "grad_norm": 554.4364624023438, + "learning_rate": 6.466753851700264e-07, + "loss": 14.4683, + "step": 426340 + }, + { + "epoch": 0.8612539744744805, + "grad_norm": 499.3392639160156, + "learning_rate": 6.465036984544721e-07, + "loss": 24.2076, + "step": 426350 + }, + { + "epoch": 0.8612741751071643, + "grad_norm": 135.09153747558594, + "learning_rate": 6.463320329573303e-07, + "loss": 9.2683, + "step": 426360 + }, + { + "epoch": 0.8612943757398481, + "grad_norm": 318.33416748046875, + "learning_rate": 6.46160388679437e-07, + "loss": 11.1335, + "step": 426370 + }, + { + "epoch": 0.861314576372532, + "grad_norm": 261.01849365234375, + "learning_rate": 6.459887656216318e-07, + "loss": 19.9245, + "step": 426380 + }, + { + "epoch": 0.8613347770052158, + "grad_norm": 608.0852661132812, + "learning_rate": 6.458171637847488e-07, + "loss": 12.4543, + "step": 426390 + }, + { + "epoch": 0.8613549776378996, + "grad_norm": 101.21218872070312, + "learning_rate": 6.456455831696234e-07, + "loss": 18.111, + "step": 426400 + }, + { + "epoch": 0.8613751782705834, + "grad_norm": 22.630373001098633, + "learning_rate": 6.454740237770934e-07, + "loss": 19.3144, + "step": 426410 + }, + { + "epoch": 0.8613953789032672, + "grad_norm": 196.15382385253906, + "learning_rate": 6.453024856079976e-07, + "loss": 19.9625, + "step": 426420 + }, + { + "epoch": 0.861415579535951, + "grad_norm": 61.11758804321289, + "learning_rate": 6.451309686631668e-07, + "loss": 14.8085, + "step": 426430 + }, + { + "epoch": 0.8614357801686349, + "grad_norm": 243.17691040039062, + "learning_rate": 6.449594729434394e-07, + "loss": 25.2256, + "step": 426440 + }, + { + "epoch": 0.8614559808013187, + "grad_norm": 221.3637237548828, + "learning_rate": 6.447879984496525e-07, + "loss": 24.9462, + "step": 426450 + }, + { + "epoch": 0.8614761814340025, + "grad_norm": 64.93826293945312, + "learning_rate": 6.446165451826409e-07, + "loss": 17.2373, + "step": 426460 + }, + { + "epoch": 0.8614963820666863, + "grad_norm": 665.0004272460938, + "learning_rate": 6.444451131432383e-07, + "loss": 28.1167, + "step": 426470 + }, + { + "epoch": 0.8615165826993701, + "grad_norm": 742.8054809570312, + "learning_rate": 6.442737023322826e-07, + "loss": 17.7511, + "step": 426480 + }, + { + "epoch": 0.861536783332054, + "grad_norm": 982.496337890625, + "learning_rate": 6.441023127506096e-07, + "loss": 25.0334, + "step": 426490 + }, + { + "epoch": 0.8615569839647378, + "grad_norm": 602.4315185546875, + "learning_rate": 6.439309443990532e-07, + "loss": 20.5195, + "step": 426500 + }, + { + "epoch": 0.8615771845974216, + "grad_norm": 155.8933868408203, + "learning_rate": 6.437595972784483e-07, + "loss": 19.0511, + "step": 426510 + }, + { + "epoch": 0.8615973852301054, + "grad_norm": 141.42478942871094, + "learning_rate": 6.435882713896319e-07, + "loss": 21.8023, + "step": 426520 + }, + { + "epoch": 0.8616175858627892, + "grad_norm": 461.2997131347656, + "learning_rate": 6.434169667334378e-07, + "loss": 16.3485, + "step": 426530 + }, + { + "epoch": 0.8616377864954731, + "grad_norm": 254.6463623046875, + "learning_rate": 6.432456833106998e-07, + "loss": 31.786, + "step": 426540 + }, + { + "epoch": 0.8616579871281569, + "grad_norm": 549.6669921875, + "learning_rate": 6.43074421122255e-07, + "loss": 15.4421, + "step": 426550 + }, + { + "epoch": 0.8616781877608407, + "grad_norm": 163.59596252441406, + "learning_rate": 6.429031801689362e-07, + "loss": 22.5572, + "step": 426560 + }, + { + "epoch": 0.8616983883935245, + "grad_norm": 173.46153259277344, + "learning_rate": 6.427319604515797e-07, + "loss": 11.6241, + "step": 426570 + }, + { + "epoch": 0.8617185890262083, + "grad_norm": 86.23184204101562, + "learning_rate": 6.425607619710195e-07, + "loss": 14.788, + "step": 426580 + }, + { + "epoch": 0.8617387896588922, + "grad_norm": 461.0710144042969, + "learning_rate": 6.423895847280881e-07, + "loss": 14.6491, + "step": 426590 + }, + { + "epoch": 0.8617589902915759, + "grad_norm": 449.5500793457031, + "learning_rate": 6.422184287236227e-07, + "loss": 15.9077, + "step": 426600 + }, + { + "epoch": 0.8617791909242597, + "grad_norm": 248.44351196289062, + "learning_rate": 6.420472939584549e-07, + "loss": 16.0009, + "step": 426610 + }, + { + "epoch": 0.8617993915569435, + "grad_norm": 529.0774536132812, + "learning_rate": 6.418761804334212e-07, + "loss": 23.6131, + "step": 426620 + }, + { + "epoch": 0.8618195921896273, + "grad_norm": 219.2325897216797, + "learning_rate": 6.417050881493536e-07, + "loss": 23.5891, + "step": 426630 + }, + { + "epoch": 0.8618397928223112, + "grad_norm": 363.240234375, + "learning_rate": 6.415340171070877e-07, + "loss": 13.0453, + "step": 426640 + }, + { + "epoch": 0.861859993454995, + "grad_norm": 181.5204315185547, + "learning_rate": 6.413629673074562e-07, + "loss": 10.0992, + "step": 426650 + }, + { + "epoch": 0.8618801940876788, + "grad_norm": 576.6511840820312, + "learning_rate": 6.411919387512922e-07, + "loss": 18.3, + "step": 426660 + }, + { + "epoch": 0.8619003947203626, + "grad_norm": 520.0042724609375, + "learning_rate": 6.410209314394305e-07, + "loss": 38.815, + "step": 426670 + }, + { + "epoch": 0.8619205953530464, + "grad_norm": 368.30169677734375, + "learning_rate": 6.408499453727046e-07, + "loss": 34.8511, + "step": 426680 + }, + { + "epoch": 0.8619407959857303, + "grad_norm": 88.484619140625, + "learning_rate": 6.406789805519464e-07, + "loss": 12.4439, + "step": 426690 + }, + { + "epoch": 0.8619609966184141, + "grad_norm": 312.1562805175781, + "learning_rate": 6.405080369779898e-07, + "loss": 17.1228, + "step": 426700 + }, + { + "epoch": 0.8619811972510979, + "grad_norm": 394.21160888671875, + "learning_rate": 6.403371146516707e-07, + "loss": 21.2892, + "step": 426710 + }, + { + "epoch": 0.8620013978837817, + "grad_norm": 180.46238708496094, + "learning_rate": 6.401662135738174e-07, + "loss": 14.238, + "step": 426720 + }, + { + "epoch": 0.8620215985164655, + "grad_norm": 1012.4619140625, + "learning_rate": 6.399953337452652e-07, + "loss": 21.8464, + "step": 426730 + }, + { + "epoch": 0.8620417991491494, + "grad_norm": 408.7126159667969, + "learning_rate": 6.398244751668481e-07, + "loss": 12.9432, + "step": 426740 + }, + { + "epoch": 0.8620619997818332, + "grad_norm": 258.2486877441406, + "learning_rate": 6.396536378393975e-07, + "loss": 30.4977, + "step": 426750 + }, + { + "epoch": 0.862082200414517, + "grad_norm": 318.1420593261719, + "learning_rate": 6.394828217637455e-07, + "loss": 13.0637, + "step": 426760 + }, + { + "epoch": 0.8621024010472008, + "grad_norm": 563.1136474609375, + "learning_rate": 6.393120269407249e-07, + "loss": 30.0762, + "step": 426770 + }, + { + "epoch": 0.8621226016798846, + "grad_norm": 844.4476928710938, + "learning_rate": 6.391412533711711e-07, + "loss": 25.8394, + "step": 426780 + }, + { + "epoch": 0.8621428023125685, + "grad_norm": 198.59259033203125, + "learning_rate": 6.389705010559117e-07, + "loss": 15.6251, + "step": 426790 + }, + { + "epoch": 0.8621630029452523, + "grad_norm": 59.635990142822266, + "learning_rate": 6.387997699957815e-07, + "loss": 14.6566, + "step": 426800 + }, + { + "epoch": 0.8621832035779361, + "grad_norm": 131.53367614746094, + "learning_rate": 6.386290601916129e-07, + "loss": 12.1609, + "step": 426810 + }, + { + "epoch": 0.8622034042106199, + "grad_norm": 647.2199096679688, + "learning_rate": 6.384583716442371e-07, + "loss": 28.2555, + "step": 426820 + }, + { + "epoch": 0.8622236048433037, + "grad_norm": 488.17181396484375, + "learning_rate": 6.382877043544855e-07, + "loss": 8.1615, + "step": 426830 + }, + { + "epoch": 0.8622438054759876, + "grad_norm": 1174.1416015625, + "learning_rate": 6.381170583231916e-07, + "loss": 23.6752, + "step": 426840 + }, + { + "epoch": 0.8622640061086714, + "grad_norm": 1267.3780517578125, + "learning_rate": 6.379464335511859e-07, + "loss": 37.9311, + "step": 426850 + }, + { + "epoch": 0.8622842067413551, + "grad_norm": 1164.790771484375, + "learning_rate": 6.377758300392994e-07, + "loss": 21.5773, + "step": 426860 + }, + { + "epoch": 0.8623044073740389, + "grad_norm": 257.47064208984375, + "learning_rate": 6.376052477883655e-07, + "loss": 18.7969, + "step": 426870 + }, + { + "epoch": 0.8623246080067227, + "grad_norm": 348.642333984375, + "learning_rate": 6.374346867992138e-07, + "loss": 10.7251, + "step": 426880 + }, + { + "epoch": 0.8623448086394065, + "grad_norm": 360.3748779296875, + "learning_rate": 6.372641470726765e-07, + "loss": 20.3781, + "step": 426890 + }, + { + "epoch": 0.8623650092720904, + "grad_norm": 258.9471130371094, + "learning_rate": 6.370936286095842e-07, + "loss": 12.6942, + "step": 426900 + }, + { + "epoch": 0.8623852099047742, + "grad_norm": 261.1137390136719, + "learning_rate": 6.369231314107693e-07, + "loss": 19.6193, + "step": 426910 + }, + { + "epoch": 0.862405410537458, + "grad_norm": 265.93951416015625, + "learning_rate": 6.36752655477062e-07, + "loss": 24.8202, + "step": 426920 + }, + { + "epoch": 0.8624256111701418, + "grad_norm": 3038.77490234375, + "learning_rate": 6.36582200809292e-07, + "loss": 29.0701, + "step": 426930 + }, + { + "epoch": 0.8624458118028256, + "grad_norm": 328.9877624511719, + "learning_rate": 6.36411767408292e-07, + "loss": 17.7837, + "step": 426940 + }, + { + "epoch": 0.8624660124355095, + "grad_norm": 327.4744873046875, + "learning_rate": 6.362413552748908e-07, + "loss": 33.9574, + "step": 426950 + }, + { + "epoch": 0.8624862130681933, + "grad_norm": 380.6587219238281, + "learning_rate": 6.360709644099211e-07, + "loss": 20.3324, + "step": 426960 + }, + { + "epoch": 0.8625064137008771, + "grad_norm": 335.7801208496094, + "learning_rate": 6.359005948142122e-07, + "loss": 19.1392, + "step": 426970 + }, + { + "epoch": 0.8625266143335609, + "grad_norm": 442.7544860839844, + "learning_rate": 6.357302464885934e-07, + "loss": 15.0711, + "step": 426980 + }, + { + "epoch": 0.8625468149662447, + "grad_norm": 639.4885864257812, + "learning_rate": 6.355599194338974e-07, + "loss": 28.2678, + "step": 426990 + }, + { + "epoch": 0.8625670155989286, + "grad_norm": 170.79458618164062, + "learning_rate": 6.353896136509524e-07, + "loss": 11.3887, + "step": 427000 + }, + { + "epoch": 0.8625872162316124, + "grad_norm": 80.47013854980469, + "learning_rate": 6.352193291405884e-07, + "loss": 9.2106, + "step": 427010 + }, + { + "epoch": 0.8626074168642962, + "grad_norm": 607.91259765625, + "learning_rate": 6.350490659036362e-07, + "loss": 23.6392, + "step": 427020 + }, + { + "epoch": 0.86262761749698, + "grad_norm": 184.9613494873047, + "learning_rate": 6.348788239409271e-07, + "loss": 11.4782, + "step": 427030 + }, + { + "epoch": 0.8626478181296638, + "grad_norm": 74.06925201416016, + "learning_rate": 6.347086032532873e-07, + "loss": 13.4751, + "step": 427040 + }, + { + "epoch": 0.8626680187623477, + "grad_norm": 476.8874816894531, + "learning_rate": 6.345384038415486e-07, + "loss": 14.9326, + "step": 427050 + }, + { + "epoch": 0.8626882193950315, + "grad_norm": 196.14111328125, + "learning_rate": 6.343682257065408e-07, + "loss": 15.7272, + "step": 427060 + }, + { + "epoch": 0.8627084200277153, + "grad_norm": 679.0256958007812, + "learning_rate": 6.341980688490934e-07, + "loss": 38.8679, + "step": 427070 + }, + { + "epoch": 0.8627286206603991, + "grad_norm": 1150.7294921875, + "learning_rate": 6.340279332700333e-07, + "loss": 22.0961, + "step": 427080 + }, + { + "epoch": 0.862748821293083, + "grad_norm": 202.36204528808594, + "learning_rate": 6.338578189701921e-07, + "loss": 12.8232, + "step": 427090 + }, + { + "epoch": 0.8627690219257668, + "grad_norm": 753.1251831054688, + "learning_rate": 6.336877259504004e-07, + "loss": 15.3143, + "step": 427100 + }, + { + "epoch": 0.8627892225584506, + "grad_norm": 265.86480712890625, + "learning_rate": 6.335176542114829e-07, + "loss": 21.6381, + "step": 427110 + }, + { + "epoch": 0.8628094231911343, + "grad_norm": 627.78076171875, + "learning_rate": 6.333476037542707e-07, + "loss": 20.5898, + "step": 427120 + }, + { + "epoch": 0.8628296238238181, + "grad_norm": 273.6964416503906, + "learning_rate": 6.331775745795937e-07, + "loss": 28.021, + "step": 427130 + }, + { + "epoch": 0.8628498244565019, + "grad_norm": 639.4112548828125, + "learning_rate": 6.330075666882795e-07, + "loss": 22.1546, + "step": 427140 + }, + { + "epoch": 0.8628700250891858, + "grad_norm": 623.170166015625, + "learning_rate": 6.328375800811559e-07, + "loss": 19.3433, + "step": 427150 + }, + { + "epoch": 0.8628902257218696, + "grad_norm": 471.6022033691406, + "learning_rate": 6.326676147590533e-07, + "loss": 19.5129, + "step": 427160 + }, + { + "epoch": 0.8629104263545534, + "grad_norm": 424.69146728515625, + "learning_rate": 6.324976707227993e-07, + "loss": 6.4254, + "step": 427170 + }, + { + "epoch": 0.8629306269872372, + "grad_norm": 0.0, + "learning_rate": 6.323277479732203e-07, + "loss": 13.1277, + "step": 427180 + }, + { + "epoch": 0.862950827619921, + "grad_norm": 962.615234375, + "learning_rate": 6.321578465111478e-07, + "loss": 26.358, + "step": 427190 + }, + { + "epoch": 0.8629710282526049, + "grad_norm": 536.439453125, + "learning_rate": 6.319879663374068e-07, + "loss": 22.4644, + "step": 427200 + }, + { + "epoch": 0.8629912288852887, + "grad_norm": 307.97930908203125, + "learning_rate": 6.318181074528279e-07, + "loss": 26.8442, + "step": 427210 + }, + { + "epoch": 0.8630114295179725, + "grad_norm": 415.0699768066406, + "learning_rate": 6.316482698582365e-07, + "loss": 9.4954, + "step": 427220 + }, + { + "epoch": 0.8630316301506563, + "grad_norm": 51.516761779785156, + "learning_rate": 6.314784535544627e-07, + "loss": 21.2377, + "step": 427230 + }, + { + "epoch": 0.8630518307833401, + "grad_norm": 110.73899841308594, + "learning_rate": 6.313086585423316e-07, + "loss": 14.6737, + "step": 427240 + }, + { + "epoch": 0.863072031416024, + "grad_norm": 384.1722106933594, + "learning_rate": 6.311388848226741e-07, + "loss": 21.8881, + "step": 427250 + }, + { + "epoch": 0.8630922320487078, + "grad_norm": 262.3652648925781, + "learning_rate": 6.309691323963152e-07, + "loss": 23.4864, + "step": 427260 + }, + { + "epoch": 0.8631124326813916, + "grad_norm": 3200.091552734375, + "learning_rate": 6.307994012640822e-07, + "loss": 31.6897, + "step": 427270 + }, + { + "epoch": 0.8631326333140754, + "grad_norm": 670.214599609375, + "learning_rate": 6.30629691426804e-07, + "loss": 18.9, + "step": 427280 + }, + { + "epoch": 0.8631528339467592, + "grad_norm": 600.1300659179688, + "learning_rate": 6.304600028853065e-07, + "loss": 17.5757, + "step": 427290 + }, + { + "epoch": 0.863173034579443, + "grad_norm": 689.7635498046875, + "learning_rate": 6.302903356404161e-07, + "loss": 19.7232, + "step": 427300 + }, + { + "epoch": 0.8631932352121269, + "grad_norm": 251.3146514892578, + "learning_rate": 6.301206896929607e-07, + "loss": 18.1964, + "step": 427310 + }, + { + "epoch": 0.8632134358448107, + "grad_norm": 415.42327880859375, + "learning_rate": 6.29951065043769e-07, + "loss": 17.6401, + "step": 427320 + }, + { + "epoch": 0.8632336364774945, + "grad_norm": 302.36083984375, + "learning_rate": 6.297814616936637e-07, + "loss": 23.2883, + "step": 427330 + }, + { + "epoch": 0.8632538371101783, + "grad_norm": 387.613037109375, + "learning_rate": 6.296118796434735e-07, + "loss": 16.3554, + "step": 427340 + }, + { + "epoch": 0.8632740377428622, + "grad_norm": 283.42108154296875, + "learning_rate": 6.294423188940263e-07, + "loss": 10.3294, + "step": 427350 + }, + { + "epoch": 0.863294238375546, + "grad_norm": 270.95489501953125, + "learning_rate": 6.292727794461468e-07, + "loss": 21.8911, + "step": 427360 + }, + { + "epoch": 0.8633144390082297, + "grad_norm": 473.91461181640625, + "learning_rate": 6.291032613006604e-07, + "loss": 23.1282, + "step": 427370 + }, + { + "epoch": 0.8633346396409135, + "grad_norm": 404.8705749511719, + "learning_rate": 6.289337644583949e-07, + "loss": 11.5368, + "step": 427380 + }, + { + "epoch": 0.8633548402735973, + "grad_norm": 357.8258361816406, + "learning_rate": 6.287642889201783e-07, + "loss": 16.3562, + "step": 427390 + }, + { + "epoch": 0.8633750409062811, + "grad_norm": 366.5943298339844, + "learning_rate": 6.28594834686832e-07, + "loss": 16.7686, + "step": 427400 + }, + { + "epoch": 0.863395241538965, + "grad_norm": 202.032470703125, + "learning_rate": 6.284254017591845e-07, + "loss": 14.6638, + "step": 427410 + }, + { + "epoch": 0.8634154421716488, + "grad_norm": 290.3363342285156, + "learning_rate": 6.282559901380625e-07, + "loss": 15.5777, + "step": 427420 + }, + { + "epoch": 0.8634356428043326, + "grad_norm": 314.4226989746094, + "learning_rate": 6.280865998242908e-07, + "loss": 17.3926, + "step": 427430 + }, + { + "epoch": 0.8634558434370164, + "grad_norm": 367.91033935546875, + "learning_rate": 6.279172308186931e-07, + "loss": 18.545, + "step": 427440 + }, + { + "epoch": 0.8634760440697002, + "grad_norm": 329.7330322265625, + "learning_rate": 6.277478831220979e-07, + "loss": 29.4346, + "step": 427450 + }, + { + "epoch": 0.8634962447023841, + "grad_norm": 199.14669799804688, + "learning_rate": 6.275785567353293e-07, + "loss": 13.117, + "step": 427460 + }, + { + "epoch": 0.8635164453350679, + "grad_norm": 242.6150360107422, + "learning_rate": 6.274092516592111e-07, + "loss": 8.73, + "step": 427470 + }, + { + "epoch": 0.8635366459677517, + "grad_norm": 316.0828857421875, + "learning_rate": 6.272399678945712e-07, + "loss": 10.5379, + "step": 427480 + }, + { + "epoch": 0.8635568466004355, + "grad_norm": 629.6071166992188, + "learning_rate": 6.27070705442232e-07, + "loss": 18.8083, + "step": 427490 + }, + { + "epoch": 0.8635770472331193, + "grad_norm": 554.4690551757812, + "learning_rate": 6.269014643030214e-07, + "loss": 20.6939, + "step": 427500 + }, + { + "epoch": 0.8635972478658032, + "grad_norm": 251.9290313720703, + "learning_rate": 6.267322444777612e-07, + "loss": 13.3097, + "step": 427510 + }, + { + "epoch": 0.863617448498487, + "grad_norm": 288.9092712402344, + "learning_rate": 6.265630459672789e-07, + "loss": 9.0264, + "step": 427520 + }, + { + "epoch": 0.8636376491311708, + "grad_norm": 124.1415786743164, + "learning_rate": 6.263938687723981e-07, + "loss": 21.6229, + "step": 427530 + }, + { + "epoch": 0.8636578497638546, + "grad_norm": 357.16436767578125, + "learning_rate": 6.262247128939414e-07, + "loss": 10.0365, + "step": 427540 + }, + { + "epoch": 0.8636780503965384, + "grad_norm": 393.6729431152344, + "learning_rate": 6.260555783327366e-07, + "loss": 19.359, + "step": 427550 + }, + { + "epoch": 0.8636982510292223, + "grad_norm": 0.5614365339279175, + "learning_rate": 6.258864650896051e-07, + "loss": 10.1452, + "step": 427560 + }, + { + "epoch": 0.8637184516619061, + "grad_norm": 841.2181396484375, + "learning_rate": 6.257173731653738e-07, + "loss": 24.4382, + "step": 427570 + }, + { + "epoch": 0.8637386522945899, + "grad_norm": 343.8075256347656, + "learning_rate": 6.25548302560865e-07, + "loss": 24.539, + "step": 427580 + }, + { + "epoch": 0.8637588529272737, + "grad_norm": 555.2030029296875, + "learning_rate": 6.253792532769026e-07, + "loss": 12.5351, + "step": 427590 + }, + { + "epoch": 0.8637790535599575, + "grad_norm": 549.779052734375, + "learning_rate": 6.252102253143122e-07, + "loss": 26.9366, + "step": 427600 + }, + { + "epoch": 0.8637992541926414, + "grad_norm": 707.0358276367188, + "learning_rate": 6.250412186739163e-07, + "loss": 17.0704, + "step": 427610 + }, + { + "epoch": 0.8638194548253252, + "grad_norm": 679.9314575195312, + "learning_rate": 6.248722333565377e-07, + "loss": 19.7051, + "step": 427620 + }, + { + "epoch": 0.8638396554580089, + "grad_norm": 125.01164245605469, + "learning_rate": 6.247032693630012e-07, + "loss": 24.9688, + "step": 427630 + }, + { + "epoch": 0.8638598560906927, + "grad_norm": 502.79290771484375, + "learning_rate": 6.245343266941328e-07, + "loss": 18.026, + "step": 427640 + }, + { + "epoch": 0.8638800567233765, + "grad_norm": 7.125823020935059, + "learning_rate": 6.243654053507515e-07, + "loss": 10.2686, + "step": 427650 + }, + { + "epoch": 0.8639002573560604, + "grad_norm": 999.5502319335938, + "learning_rate": 6.241965053336818e-07, + "loss": 19.1553, + "step": 427660 + }, + { + "epoch": 0.8639204579887442, + "grad_norm": 560.1099853515625, + "learning_rate": 6.24027626643749e-07, + "loss": 19.8785, + "step": 427670 + }, + { + "epoch": 0.863940658621428, + "grad_norm": 584.9874877929688, + "learning_rate": 6.238587692817749e-07, + "loss": 12.1768, + "step": 427680 + }, + { + "epoch": 0.8639608592541118, + "grad_norm": 467.38824462890625, + "learning_rate": 6.236899332485813e-07, + "loss": 15.8237, + "step": 427690 + }, + { + "epoch": 0.8639810598867956, + "grad_norm": 1058.6409912109375, + "learning_rate": 6.235211185449919e-07, + "loss": 14.8201, + "step": 427700 + }, + { + "epoch": 0.8640012605194795, + "grad_norm": 332.8944091796875, + "learning_rate": 6.233523251718321e-07, + "loss": 15.9929, + "step": 427710 + }, + { + "epoch": 0.8640214611521633, + "grad_norm": 237.4763946533203, + "learning_rate": 6.231835531299202e-07, + "loss": 24.3467, + "step": 427720 + }, + { + "epoch": 0.8640416617848471, + "grad_norm": 355.6075744628906, + "learning_rate": 6.23014802420081e-07, + "loss": 19.6161, + "step": 427730 + }, + { + "epoch": 0.8640618624175309, + "grad_norm": 207.7703399658203, + "learning_rate": 6.228460730431374e-07, + "loss": 12.1483, + "step": 427740 + }, + { + "epoch": 0.8640820630502147, + "grad_norm": 235.3800811767578, + "learning_rate": 6.226773649999113e-07, + "loss": 11.0554, + "step": 427750 + }, + { + "epoch": 0.8641022636828986, + "grad_norm": 405.9524230957031, + "learning_rate": 6.225086782912237e-07, + "loss": 21.9891, + "step": 427760 + }, + { + "epoch": 0.8641224643155824, + "grad_norm": 145.1119842529297, + "learning_rate": 6.223400129178992e-07, + "loss": 9.3089, + "step": 427770 + }, + { + "epoch": 0.8641426649482662, + "grad_norm": 275.88665771484375, + "learning_rate": 6.221713688807585e-07, + "loss": 32.3242, + "step": 427780 + }, + { + "epoch": 0.86416286558095, + "grad_norm": 425.7492980957031, + "learning_rate": 6.220027461806222e-07, + "loss": 10.1099, + "step": 427790 + }, + { + "epoch": 0.8641830662136338, + "grad_norm": 280.409423828125, + "learning_rate": 6.218341448183141e-07, + "loss": 23.433, + "step": 427800 + }, + { + "epoch": 0.8642032668463177, + "grad_norm": 453.26947021484375, + "learning_rate": 6.216655647946556e-07, + "loss": 14.3934, + "step": 427810 + }, + { + "epoch": 0.8642234674790015, + "grad_norm": 234.53407287597656, + "learning_rate": 6.214970061104686e-07, + "loss": 29.2459, + "step": 427820 + }, + { + "epoch": 0.8642436681116853, + "grad_norm": 383.7027587890625, + "learning_rate": 6.213284687665733e-07, + "loss": 25.2639, + "step": 427830 + }, + { + "epoch": 0.8642638687443691, + "grad_norm": 490.578125, + "learning_rate": 6.21159952763793e-07, + "loss": 13.2446, + "step": 427840 + }, + { + "epoch": 0.8642840693770529, + "grad_norm": 291.77685546875, + "learning_rate": 6.209914581029474e-07, + "loss": 32.4509, + "step": 427850 + }, + { + "epoch": 0.8643042700097368, + "grad_norm": 213.53958129882812, + "learning_rate": 6.20822984784858e-07, + "loss": 17.6725, + "step": 427860 + }, + { + "epoch": 0.8643244706424206, + "grad_norm": 241.1728057861328, + "learning_rate": 6.20654532810347e-07, + "loss": 19.7326, + "step": 427870 + }, + { + "epoch": 0.8643446712751043, + "grad_norm": 178.4251251220703, + "learning_rate": 6.204861021802333e-07, + "loss": 9.0884, + "step": 427880 + }, + { + "epoch": 0.8643648719077881, + "grad_norm": 417.46343994140625, + "learning_rate": 6.203176928953403e-07, + "loss": 17.9481, + "step": 427890 + }, + { + "epoch": 0.8643850725404719, + "grad_norm": 1527.8167724609375, + "learning_rate": 6.201493049564883e-07, + "loss": 14.5531, + "step": 427900 + }, + { + "epoch": 0.8644052731731557, + "grad_norm": 728.8560180664062, + "learning_rate": 6.199809383644956e-07, + "loss": 12.0885, + "step": 427910 + }, + { + "epoch": 0.8644254738058396, + "grad_norm": 236.08883666992188, + "learning_rate": 6.198125931201848e-07, + "loss": 21.4588, + "step": 427920 + }, + { + "epoch": 0.8644456744385234, + "grad_norm": 260.3927917480469, + "learning_rate": 6.196442692243787e-07, + "loss": 20.7814, + "step": 427930 + }, + { + "epoch": 0.8644658750712072, + "grad_norm": 662.6884155273438, + "learning_rate": 6.194759666778927e-07, + "loss": 19.546, + "step": 427940 + }, + { + "epoch": 0.864486075703891, + "grad_norm": 499.678466796875, + "learning_rate": 6.193076854815494e-07, + "loss": 19.0645, + "step": 427950 + }, + { + "epoch": 0.8645062763365748, + "grad_norm": 537.9235229492188, + "learning_rate": 6.191394256361699e-07, + "loss": 24.221, + "step": 427960 + }, + { + "epoch": 0.8645264769692587, + "grad_norm": 395.36358642578125, + "learning_rate": 6.189711871425741e-07, + "loss": 14.9715, + "step": 427970 + }, + { + "epoch": 0.8645466776019425, + "grad_norm": 348.86285400390625, + "learning_rate": 6.188029700015802e-07, + "loss": 13.2189, + "step": 427980 + }, + { + "epoch": 0.8645668782346263, + "grad_norm": 371.9176330566406, + "learning_rate": 6.186347742140092e-07, + "loss": 29.0738, + "step": 427990 + }, + { + "epoch": 0.8645870788673101, + "grad_norm": 878.5059814453125, + "learning_rate": 6.184665997806832e-07, + "loss": 22.1013, + "step": 428000 + }, + { + "epoch": 0.8646072794999939, + "grad_norm": 426.17926025390625, + "learning_rate": 6.182984467024173e-07, + "loss": 19.0282, + "step": 428010 + }, + { + "epoch": 0.8646274801326778, + "grad_norm": 299.5845031738281, + "learning_rate": 6.181303149800333e-07, + "loss": 18.7745, + "step": 428020 + }, + { + "epoch": 0.8646476807653616, + "grad_norm": 98.38887023925781, + "learning_rate": 6.179622046143513e-07, + "loss": 19.949, + "step": 428030 + }, + { + "epoch": 0.8646678813980454, + "grad_norm": 340.8511657714844, + "learning_rate": 6.177941156061906e-07, + "loss": 8.3363, + "step": 428040 + }, + { + "epoch": 0.8646880820307292, + "grad_norm": 50.793827056884766, + "learning_rate": 6.17626047956369e-07, + "loss": 15.9434, + "step": 428050 + }, + { + "epoch": 0.864708282663413, + "grad_norm": 231.07606506347656, + "learning_rate": 6.174580016657073e-07, + "loss": 13.5634, + "step": 428060 + }, + { + "epoch": 0.8647284832960969, + "grad_norm": 1061.10302734375, + "learning_rate": 6.172899767350238e-07, + "loss": 16.6211, + "step": 428070 + }, + { + "epoch": 0.8647486839287807, + "grad_norm": 386.4588623046875, + "learning_rate": 6.171219731651362e-07, + "loss": 24.283, + "step": 428080 + }, + { + "epoch": 0.8647688845614645, + "grad_norm": 548.4069213867188, + "learning_rate": 6.169539909568656e-07, + "loss": 17.2704, + "step": 428090 + }, + { + "epoch": 0.8647890851941483, + "grad_norm": 411.5999450683594, + "learning_rate": 6.167860301110284e-07, + "loss": 20.6446, + "step": 428100 + }, + { + "epoch": 0.8648092858268321, + "grad_norm": 124.37580871582031, + "learning_rate": 6.166180906284458e-07, + "loss": 21.915, + "step": 428110 + }, + { + "epoch": 0.864829486459516, + "grad_norm": 51.3354377746582, + "learning_rate": 6.164501725099342e-07, + "loss": 20.3813, + "step": 428120 + }, + { + "epoch": 0.8648496870921998, + "grad_norm": 313.509521484375, + "learning_rate": 6.162822757563136e-07, + "loss": 16.0153, + "step": 428130 + }, + { + "epoch": 0.8648698877248835, + "grad_norm": 571.181884765625, + "learning_rate": 6.161144003684017e-07, + "loss": 21.6709, + "step": 428140 + }, + { + "epoch": 0.8648900883575673, + "grad_norm": 57.8526496887207, + "learning_rate": 6.159465463470149e-07, + "loss": 13.1453, + "step": 428150 + }, + { + "epoch": 0.8649102889902511, + "grad_norm": 282.853515625, + "learning_rate": 6.157787136929743e-07, + "loss": 13.9466, + "step": 428160 + }, + { + "epoch": 0.864930489622935, + "grad_norm": 47.31289291381836, + "learning_rate": 6.156109024070955e-07, + "loss": 19.1847, + "step": 428170 + }, + { + "epoch": 0.8649506902556188, + "grad_norm": 368.8774719238281, + "learning_rate": 6.154431124901983e-07, + "loss": 13.2376, + "step": 428180 + }, + { + "epoch": 0.8649708908883026, + "grad_norm": 938.6478271484375, + "learning_rate": 6.152753439430997e-07, + "loss": 18.7021, + "step": 428190 + }, + { + "epoch": 0.8649910915209864, + "grad_norm": 505.9247131347656, + "learning_rate": 6.151075967666165e-07, + "loss": 21.0578, + "step": 428200 + }, + { + "epoch": 0.8650112921536702, + "grad_norm": 360.1973571777344, + "learning_rate": 6.149398709615678e-07, + "loss": 14.0969, + "step": 428210 + }, + { + "epoch": 0.865031492786354, + "grad_norm": 419.23187255859375, + "learning_rate": 6.147721665287703e-07, + "loss": 9.2271, + "step": 428220 + }, + { + "epoch": 0.8650516934190379, + "grad_norm": 2.7760090827941895, + "learning_rate": 6.146044834690401e-07, + "loss": 10.1077, + "step": 428230 + }, + { + "epoch": 0.8650718940517217, + "grad_norm": 281.9516906738281, + "learning_rate": 6.144368217831965e-07, + "loss": 8.2609, + "step": 428240 + }, + { + "epoch": 0.8650920946844055, + "grad_norm": 337.73345947265625, + "learning_rate": 6.142691814720575e-07, + "loss": 20.3037, + "step": 428250 + }, + { + "epoch": 0.8651122953170893, + "grad_norm": 430.0217590332031, + "learning_rate": 6.141015625364366e-07, + "loss": 11.1121, + "step": 428260 + }, + { + "epoch": 0.8651324959497732, + "grad_norm": 234.089599609375, + "learning_rate": 6.139339649771525e-07, + "loss": 16.6557, + "step": 428270 + }, + { + "epoch": 0.865152696582457, + "grad_norm": 211.0261993408203, + "learning_rate": 6.137663887950235e-07, + "loss": 13.0847, + "step": 428280 + }, + { + "epoch": 0.8651728972151408, + "grad_norm": 1035.0299072265625, + "learning_rate": 6.135988339908655e-07, + "loss": 20.2821, + "step": 428290 + }, + { + "epoch": 0.8651930978478246, + "grad_norm": 463.2383728027344, + "learning_rate": 6.134313005654929e-07, + "loss": 17.1917, + "step": 428300 + }, + { + "epoch": 0.8652132984805084, + "grad_norm": 309.8400573730469, + "learning_rate": 6.132637885197251e-07, + "loss": 20.0468, + "step": 428310 + }, + { + "epoch": 0.8652334991131923, + "grad_norm": 226.93862915039062, + "learning_rate": 6.130962978543792e-07, + "loss": 12.4476, + "step": 428320 + }, + { + "epoch": 0.8652536997458761, + "grad_norm": 284.1596374511719, + "learning_rate": 6.129288285702672e-07, + "loss": 13.2608, + "step": 428330 + }, + { + "epoch": 0.8652739003785599, + "grad_norm": 140.4319610595703, + "learning_rate": 6.127613806682087e-07, + "loss": 22.1714, + "step": 428340 + }, + { + "epoch": 0.8652941010112437, + "grad_norm": 279.0960693359375, + "learning_rate": 6.1259395414902e-07, + "loss": 10.1577, + "step": 428350 + }, + { + "epoch": 0.8653143016439275, + "grad_norm": 213.5045166015625, + "learning_rate": 6.124265490135161e-07, + "loss": 18.4615, + "step": 428360 + }, + { + "epoch": 0.8653345022766114, + "grad_norm": 517.26416015625, + "learning_rate": 6.122591652625126e-07, + "loss": 14.2572, + "step": 428370 + }, + { + "epoch": 0.8653547029092952, + "grad_norm": 994.1997680664062, + "learning_rate": 6.120918028968265e-07, + "loss": 15.3615, + "step": 428380 + }, + { + "epoch": 0.8653749035419789, + "grad_norm": 991.473876953125, + "learning_rate": 6.119244619172727e-07, + "loss": 25.2808, + "step": 428390 + }, + { + "epoch": 0.8653951041746627, + "grad_norm": 304.0759582519531, + "learning_rate": 6.117571423246655e-07, + "loss": 18.7912, + "step": 428400 + }, + { + "epoch": 0.8654153048073465, + "grad_norm": 199.3155059814453, + "learning_rate": 6.11589844119822e-07, + "loss": 8.4047, + "step": 428410 + }, + { + "epoch": 0.8654355054400303, + "grad_norm": 1094.4544677734375, + "learning_rate": 6.114225673035584e-07, + "loss": 21.4444, + "step": 428420 + }, + { + "epoch": 0.8654557060727142, + "grad_norm": 207.91004943847656, + "learning_rate": 6.112553118766889e-07, + "loss": 25.2106, + "step": 428430 + }, + { + "epoch": 0.865475906705398, + "grad_norm": 201.0609588623047, + "learning_rate": 6.110880778400275e-07, + "loss": 17.329, + "step": 428440 + }, + { + "epoch": 0.8654961073380818, + "grad_norm": 327.4130859375, + "learning_rate": 6.109208651943921e-07, + "loss": 18.1264, + "step": 428450 + }, + { + "epoch": 0.8655163079707656, + "grad_norm": 22.62371253967285, + "learning_rate": 6.107536739405956e-07, + "loss": 17.9029, + "step": 428460 + }, + { + "epoch": 0.8655365086034494, + "grad_norm": 56.47309875488281, + "learning_rate": 6.105865040794523e-07, + "loss": 22.641, + "step": 428470 + }, + { + "epoch": 0.8655567092361333, + "grad_norm": 569.1929321289062, + "learning_rate": 6.104193556117793e-07, + "loss": 22.5186, + "step": 428480 + }, + { + "epoch": 0.8655769098688171, + "grad_norm": 389.8046875, + "learning_rate": 6.102522285383888e-07, + "loss": 10.6147, + "step": 428490 + }, + { + "epoch": 0.8655971105015009, + "grad_norm": 299.5357360839844, + "learning_rate": 6.100851228600974e-07, + "loss": 28.5346, + "step": 428500 + }, + { + "epoch": 0.8656173111341847, + "grad_norm": 633.14501953125, + "learning_rate": 6.099180385777192e-07, + "loss": 20.1365, + "step": 428510 + }, + { + "epoch": 0.8656375117668685, + "grad_norm": 677.9397583007812, + "learning_rate": 6.097509756920667e-07, + "loss": 25.9088, + "step": 428520 + }, + { + "epoch": 0.8656577123995524, + "grad_norm": 182.68701171875, + "learning_rate": 6.095839342039561e-07, + "loss": 13.7292, + "step": 428530 + }, + { + "epoch": 0.8656779130322362, + "grad_norm": 308.7190246582031, + "learning_rate": 6.094169141142014e-07, + "loss": 17.2095, + "step": 428540 + }, + { + "epoch": 0.86569811366492, + "grad_norm": 33.39704132080078, + "learning_rate": 6.092499154236148e-07, + "loss": 14.2919, + "step": 428550 + }, + { + "epoch": 0.8657183142976038, + "grad_norm": 427.1455383300781, + "learning_rate": 6.090829381330116e-07, + "loss": 21.8508, + "step": 428560 + }, + { + "epoch": 0.8657385149302876, + "grad_norm": 329.7239990234375, + "learning_rate": 6.089159822432073e-07, + "loss": 15.2621, + "step": 428570 + }, + { + "epoch": 0.8657587155629715, + "grad_norm": 314.5326843261719, + "learning_rate": 6.087490477550129e-07, + "loss": 20.3163, + "step": 428580 + }, + { + "epoch": 0.8657789161956553, + "grad_norm": 572.4465942382812, + "learning_rate": 6.085821346692427e-07, + "loss": 16.2445, + "step": 428590 + }, + { + "epoch": 0.8657991168283391, + "grad_norm": 400.1139831542969, + "learning_rate": 6.084152429867113e-07, + "loss": 13.0832, + "step": 428600 + }, + { + "epoch": 0.8658193174610229, + "grad_norm": 211.2682342529297, + "learning_rate": 6.082483727082317e-07, + "loss": 19.7007, + "step": 428610 + }, + { + "epoch": 0.8658395180937067, + "grad_norm": 22.129629135131836, + "learning_rate": 6.080815238346155e-07, + "loss": 12.4625, + "step": 428620 + }, + { + "epoch": 0.8658597187263906, + "grad_norm": 177.38450622558594, + "learning_rate": 6.079146963666777e-07, + "loss": 19.3025, + "step": 428630 + }, + { + "epoch": 0.8658799193590744, + "grad_norm": 248.06504821777344, + "learning_rate": 6.077478903052314e-07, + "loss": 10.8978, + "step": 428640 + }, + { + "epoch": 0.8659001199917581, + "grad_norm": 420.5555419921875, + "learning_rate": 6.075811056510894e-07, + "loss": 22.3603, + "step": 428650 + }, + { + "epoch": 0.8659203206244419, + "grad_norm": 462.0986633300781, + "learning_rate": 6.074143424050638e-07, + "loss": 12.2093, + "step": 428660 + }, + { + "epoch": 0.8659405212571257, + "grad_norm": 136.93148803710938, + "learning_rate": 6.072476005679684e-07, + "loss": 17.8271, + "step": 428670 + }, + { + "epoch": 0.8659607218898095, + "grad_norm": 218.40277099609375, + "learning_rate": 6.070808801406158e-07, + "loss": 14.5372, + "step": 428680 + }, + { + "epoch": 0.8659809225224934, + "grad_norm": 508.3802185058594, + "learning_rate": 6.069141811238166e-07, + "loss": 28.8907, + "step": 428690 + }, + { + "epoch": 0.8660011231551772, + "grad_norm": 515.6898803710938, + "learning_rate": 6.067475035183862e-07, + "loss": 19.3834, + "step": 428700 + }, + { + "epoch": 0.866021323787861, + "grad_norm": 187.1757354736328, + "learning_rate": 6.06580847325135e-07, + "loss": 23.0241, + "step": 428710 + }, + { + "epoch": 0.8660415244205448, + "grad_norm": 411.40753173828125, + "learning_rate": 6.064142125448763e-07, + "loss": 15.513, + "step": 428720 + }, + { + "epoch": 0.8660617250532286, + "grad_norm": 325.0508728027344, + "learning_rate": 6.062475991784211e-07, + "loss": 9.4605, + "step": 428730 + }, + { + "epoch": 0.8660819256859125, + "grad_norm": 573.1134643554688, + "learning_rate": 6.060810072265833e-07, + "loss": 16.3213, + "step": 428740 + }, + { + "epoch": 0.8661021263185963, + "grad_norm": 238.4241943359375, + "learning_rate": 6.059144366901737e-07, + "loss": 24.3547, + "step": 428750 + }, + { + "epoch": 0.8661223269512801, + "grad_norm": 197.23275756835938, + "learning_rate": 6.057478875700035e-07, + "loss": 17.9109, + "step": 428760 + }, + { + "epoch": 0.8661425275839639, + "grad_norm": 341.0602722167969, + "learning_rate": 6.055813598668853e-07, + "loss": 27.3504, + "step": 428770 + }, + { + "epoch": 0.8661627282166477, + "grad_norm": 847.5743408203125, + "learning_rate": 6.054148535816301e-07, + "loss": 22.3776, + "step": 428780 + }, + { + "epoch": 0.8661829288493316, + "grad_norm": 183.5742645263672, + "learning_rate": 6.052483687150512e-07, + "loss": 30.0743, + "step": 428790 + }, + { + "epoch": 0.8662031294820154, + "grad_norm": 780.5835571289062, + "learning_rate": 6.050819052679585e-07, + "loss": 19.2017, + "step": 428800 + }, + { + "epoch": 0.8662233301146992, + "grad_norm": 535.20263671875, + "learning_rate": 6.049154632411625e-07, + "loss": 16.9253, + "step": 428810 + }, + { + "epoch": 0.866243530747383, + "grad_norm": 549.9434814453125, + "learning_rate": 6.047490426354763e-07, + "loss": 39.4413, + "step": 428820 + }, + { + "epoch": 0.8662637313800668, + "grad_norm": 660.7290649414062, + "learning_rate": 6.045826434517104e-07, + "loss": 15.035, + "step": 428830 + }, + { + "epoch": 0.8662839320127507, + "grad_norm": 398.4607238769531, + "learning_rate": 6.044162656906744e-07, + "loss": 20.089, + "step": 428840 + }, + { + "epoch": 0.8663041326454345, + "grad_norm": 207.11167907714844, + "learning_rate": 6.042499093531806e-07, + "loss": 14.1729, + "step": 428850 + }, + { + "epoch": 0.8663243332781183, + "grad_norm": 146.52442932128906, + "learning_rate": 6.040835744400403e-07, + "loss": 14.1465, + "step": 428860 + }, + { + "epoch": 0.8663445339108021, + "grad_norm": 394.9657287597656, + "learning_rate": 6.039172609520639e-07, + "loss": 19.8002, + "step": 428870 + }, + { + "epoch": 0.866364734543486, + "grad_norm": 185.9178466796875, + "learning_rate": 6.037509688900606e-07, + "loss": 17.8917, + "step": 428880 + }, + { + "epoch": 0.8663849351761698, + "grad_norm": 221.9687042236328, + "learning_rate": 6.035846982548427e-07, + "loss": 12.455, + "step": 428890 + }, + { + "epoch": 0.8664051358088536, + "grad_norm": 630.0784912109375, + "learning_rate": 6.034184490472195e-07, + "loss": 20.9911, + "step": 428900 + }, + { + "epoch": 0.8664253364415373, + "grad_norm": 412.6866149902344, + "learning_rate": 6.032522212680009e-07, + "loss": 23.1872, + "step": 428910 + }, + { + "epoch": 0.8664455370742211, + "grad_norm": 362.35296630859375, + "learning_rate": 6.030860149179973e-07, + "loss": 31.9858, + "step": 428920 + }, + { + "epoch": 0.8664657377069049, + "grad_norm": 606.3483276367188, + "learning_rate": 6.029198299980216e-07, + "loss": 19.8697, + "step": 428930 + }, + { + "epoch": 0.8664859383395888, + "grad_norm": 331.26861572265625, + "learning_rate": 6.027536665088795e-07, + "loss": 16.6162, + "step": 428940 + }, + { + "epoch": 0.8665061389722726, + "grad_norm": 681.3436889648438, + "learning_rate": 6.025875244513824e-07, + "loss": 22.1219, + "step": 428950 + }, + { + "epoch": 0.8665263396049564, + "grad_norm": 241.8258514404297, + "learning_rate": 6.024214038263415e-07, + "loss": 17.739, + "step": 428960 + }, + { + "epoch": 0.8665465402376402, + "grad_norm": 483.76239013671875, + "learning_rate": 6.022553046345647e-07, + "loss": 14.6526, + "step": 428970 + }, + { + "epoch": 0.866566740870324, + "grad_norm": 263.0108947753906, + "learning_rate": 6.020892268768619e-07, + "loss": 15.652, + "step": 428980 + }, + { + "epoch": 0.8665869415030079, + "grad_norm": 369.8650817871094, + "learning_rate": 6.019231705540435e-07, + "loss": 28.1917, + "step": 428990 + }, + { + "epoch": 0.8666071421356917, + "grad_norm": 325.9433288574219, + "learning_rate": 6.017571356669183e-07, + "loss": 18.0282, + "step": 429000 + }, + { + "epoch": 0.8666273427683755, + "grad_norm": 964.6558227539062, + "learning_rate": 6.015911222162946e-07, + "loss": 30.9763, + "step": 429010 + }, + { + "epoch": 0.8666475434010593, + "grad_norm": 114.1938705444336, + "learning_rate": 6.014251302029817e-07, + "loss": 14.6377, + "step": 429020 + }, + { + "epoch": 0.8666677440337431, + "grad_norm": 836.9354248046875, + "learning_rate": 6.012591596277906e-07, + "loss": 24.4932, + "step": 429030 + }, + { + "epoch": 0.866687944666427, + "grad_norm": 426.4087219238281, + "learning_rate": 6.01093210491529e-07, + "loss": 12.1576, + "step": 429040 + }, + { + "epoch": 0.8667081452991108, + "grad_norm": 317.5310974121094, + "learning_rate": 6.009272827950042e-07, + "loss": 24.4424, + "step": 429050 + }, + { + "epoch": 0.8667283459317946, + "grad_norm": 448.1875305175781, + "learning_rate": 6.007613765390274e-07, + "loss": 19.8337, + "step": 429060 + }, + { + "epoch": 0.8667485465644784, + "grad_norm": 399.0731201171875, + "learning_rate": 6.005954917244062e-07, + "loss": 19.61, + "step": 429070 + }, + { + "epoch": 0.8667687471971622, + "grad_norm": 395.09912109375, + "learning_rate": 6.004296283519478e-07, + "loss": 12.0017, + "step": 429080 + }, + { + "epoch": 0.8667889478298461, + "grad_norm": 44.819297790527344, + "learning_rate": 6.002637864224631e-07, + "loss": 23.6252, + "step": 429090 + }, + { + "epoch": 0.8668091484625299, + "grad_norm": 1196.097412109375, + "learning_rate": 6.000979659367579e-07, + "loss": 29.9458, + "step": 429100 + }, + { + "epoch": 0.8668293490952137, + "grad_norm": 607.6129150390625, + "learning_rate": 5.999321668956425e-07, + "loss": 33.875, + "step": 429110 + }, + { + "epoch": 0.8668495497278975, + "grad_norm": 7.330821990966797, + "learning_rate": 5.997663892999239e-07, + "loss": 15.4908, + "step": 429120 + }, + { + "epoch": 0.8668697503605813, + "grad_norm": 38.274208068847656, + "learning_rate": 5.996006331504095e-07, + "loss": 20.6134, + "step": 429130 + }, + { + "epoch": 0.8668899509932652, + "grad_norm": 188.052734375, + "learning_rate": 5.994348984479092e-07, + "loss": 9.7485, + "step": 429140 + }, + { + "epoch": 0.866910151625949, + "grad_norm": 266.77996826171875, + "learning_rate": 5.992691851932292e-07, + "loss": 15.0421, + "step": 429150 + }, + { + "epoch": 0.8669303522586327, + "grad_norm": 796.673095703125, + "learning_rate": 5.991034933871764e-07, + "loss": 26.3724, + "step": 429160 + }, + { + "epoch": 0.8669505528913165, + "grad_norm": 565.3130493164062, + "learning_rate": 5.989378230305592e-07, + "loss": 16.5069, + "step": 429170 + }, + { + "epoch": 0.8669707535240003, + "grad_norm": 660.585205078125, + "learning_rate": 5.987721741241864e-07, + "loss": 23.7941, + "step": 429180 + }, + { + "epoch": 0.8669909541566841, + "grad_norm": 213.3594512939453, + "learning_rate": 5.986065466688645e-07, + "loss": 8.1154, + "step": 429190 + }, + { + "epoch": 0.867011154789368, + "grad_norm": 346.75775146484375, + "learning_rate": 5.98440940665399e-07, + "loss": 6.3781, + "step": 429200 + }, + { + "epoch": 0.8670313554220518, + "grad_norm": 371.4981994628906, + "learning_rate": 5.982753561145999e-07, + "loss": 12.4109, + "step": 429210 + }, + { + "epoch": 0.8670515560547356, + "grad_norm": 529.7020263671875, + "learning_rate": 5.981097930172725e-07, + "loss": 12.5239, + "step": 429220 + }, + { + "epoch": 0.8670717566874194, + "grad_norm": 96.09205627441406, + "learning_rate": 5.979442513742234e-07, + "loss": 16.9727, + "step": 429230 + }, + { + "epoch": 0.8670919573201032, + "grad_norm": 101.05036926269531, + "learning_rate": 5.977787311862598e-07, + "loss": 29.9755, + "step": 429240 + }, + { + "epoch": 0.8671121579527871, + "grad_norm": 340.5410461425781, + "learning_rate": 5.9761323245419e-07, + "loss": 13.352, + "step": 429250 + }, + { + "epoch": 0.8671323585854709, + "grad_norm": 692.7017211914062, + "learning_rate": 5.974477551788194e-07, + "loss": 17.2747, + "step": 429260 + }, + { + "epoch": 0.8671525592181547, + "grad_norm": 6.047051429748535, + "learning_rate": 5.972822993609534e-07, + "loss": 20.4808, + "step": 429270 + }, + { + "epoch": 0.8671727598508385, + "grad_norm": 296.6640319824219, + "learning_rate": 5.971168650014008e-07, + "loss": 16.523, + "step": 429280 + }, + { + "epoch": 0.8671929604835223, + "grad_norm": 690.8693237304688, + "learning_rate": 5.969514521009662e-07, + "loss": 17.6326, + "step": 429290 + }, + { + "epoch": 0.8672131611162062, + "grad_norm": 161.1558837890625, + "learning_rate": 5.967860606604553e-07, + "loss": 9.8164, + "step": 429300 + }, + { + "epoch": 0.86723336174889, + "grad_norm": 797.1126708984375, + "learning_rate": 5.966206906806748e-07, + "loss": 21.5301, + "step": 429310 + }, + { + "epoch": 0.8672535623815738, + "grad_norm": 326.7194519042969, + "learning_rate": 5.964553421624325e-07, + "loss": 11.2839, + "step": 429320 + }, + { + "epoch": 0.8672737630142576, + "grad_norm": 360.45458984375, + "learning_rate": 5.962900151065326e-07, + "loss": 13.3711, + "step": 429330 + }, + { + "epoch": 0.8672939636469414, + "grad_norm": 225.3975372314453, + "learning_rate": 5.961247095137795e-07, + "loss": 11.2614, + "step": 429340 + }, + { + "epoch": 0.8673141642796253, + "grad_norm": 301.3730163574219, + "learning_rate": 5.959594253849821e-07, + "loss": 21.4585, + "step": 429350 + }, + { + "epoch": 0.8673343649123091, + "grad_norm": 598.3665771484375, + "learning_rate": 5.95794162720944e-07, + "loss": 24.2816, + "step": 429360 + }, + { + "epoch": 0.8673545655449929, + "grad_norm": 180.07510375976562, + "learning_rate": 5.956289215224703e-07, + "loss": 7.2624, + "step": 429370 + }, + { + "epoch": 0.8673747661776767, + "grad_norm": 148.77638244628906, + "learning_rate": 5.95463701790368e-07, + "loss": 10.5682, + "step": 429380 + }, + { + "epoch": 0.8673949668103605, + "grad_norm": 296.96978759765625, + "learning_rate": 5.9529850352544e-07, + "loss": 5.4315, + "step": 429390 + }, + { + "epoch": 0.8674151674430444, + "grad_norm": 186.8556365966797, + "learning_rate": 5.951333267284942e-07, + "loss": 17.682, + "step": 429400 + }, + { + "epoch": 0.8674353680757282, + "grad_norm": 472.2164306640625, + "learning_rate": 5.949681714003347e-07, + "loss": 16.0475, + "step": 429410 + }, + { + "epoch": 0.8674555687084119, + "grad_norm": 502.9358215332031, + "learning_rate": 5.948030375417646e-07, + "loss": 18.0679, + "step": 429420 + }, + { + "epoch": 0.8674757693410957, + "grad_norm": 209.30079650878906, + "learning_rate": 5.946379251535911e-07, + "loss": 15.7121, + "step": 429430 + }, + { + "epoch": 0.8674959699737795, + "grad_norm": 0.22631804645061493, + "learning_rate": 5.944728342366179e-07, + "loss": 18.109, + "step": 429440 + }, + { + "epoch": 0.8675161706064634, + "grad_norm": 458.28662109375, + "learning_rate": 5.943077647916496e-07, + "loss": 13.4672, + "step": 429450 + }, + { + "epoch": 0.8675363712391472, + "grad_norm": 349.9730529785156, + "learning_rate": 5.941427168194902e-07, + "loss": 21.1921, + "step": 429460 + }, + { + "epoch": 0.867556571871831, + "grad_norm": 614.0407104492188, + "learning_rate": 5.93977690320946e-07, + "loss": 22.7234, + "step": 429470 + }, + { + "epoch": 0.8675767725045148, + "grad_norm": 400.67938232421875, + "learning_rate": 5.938126852968201e-07, + "loss": 11.0473, + "step": 429480 + }, + { + "epoch": 0.8675969731371986, + "grad_norm": 653.5919799804688, + "learning_rate": 5.936477017479158e-07, + "loss": 12.628, + "step": 429490 + }, + { + "epoch": 0.8676171737698825, + "grad_norm": 406.5254211425781, + "learning_rate": 5.934827396750392e-07, + "loss": 29.1455, + "step": 429500 + }, + { + "epoch": 0.8676373744025663, + "grad_norm": 265.0135498046875, + "learning_rate": 5.933177990789934e-07, + "loss": 12.9955, + "step": 429510 + }, + { + "epoch": 0.8676575750352501, + "grad_norm": 358.8462829589844, + "learning_rate": 5.931528799605813e-07, + "loss": 20.3956, + "step": 429520 + }, + { + "epoch": 0.8676777756679339, + "grad_norm": 660.5332641601562, + "learning_rate": 5.92987982320607e-07, + "loss": 28.7643, + "step": 429530 + }, + { + "epoch": 0.8676979763006177, + "grad_norm": 831.7608642578125, + "learning_rate": 5.928231061598772e-07, + "loss": 22.3911, + "step": 429540 + }, + { + "epoch": 0.8677181769333016, + "grad_norm": 287.30059814453125, + "learning_rate": 5.926582514791912e-07, + "loss": 15.9544, + "step": 429550 + }, + { + "epoch": 0.8677383775659854, + "grad_norm": 534.867431640625, + "learning_rate": 5.92493418279354e-07, + "loss": 17.9074, + "step": 429560 + }, + { + "epoch": 0.8677585781986692, + "grad_norm": 2.071526288986206, + "learning_rate": 5.923286065611705e-07, + "loss": 10.6438, + "step": 429570 + }, + { + "epoch": 0.867778778831353, + "grad_norm": 794.7091674804688, + "learning_rate": 5.921638163254423e-07, + "loss": 26.6991, + "step": 429580 + }, + { + "epoch": 0.8677989794640368, + "grad_norm": 532.3543701171875, + "learning_rate": 5.919990475729725e-07, + "loss": 20.9693, + "step": 429590 + }, + { + "epoch": 0.8678191800967207, + "grad_norm": 124.79682922363281, + "learning_rate": 5.918343003045656e-07, + "loss": 7.699, + "step": 429600 + }, + { + "epoch": 0.8678393807294045, + "grad_norm": 1302.444580078125, + "learning_rate": 5.916695745210238e-07, + "loss": 25.4062, + "step": 429610 + }, + { + "epoch": 0.8678595813620883, + "grad_norm": 697.5823364257812, + "learning_rate": 5.915048702231491e-07, + "loss": 18.373, + "step": 429620 + }, + { + "epoch": 0.8678797819947721, + "grad_norm": 262.98980712890625, + "learning_rate": 5.913401874117447e-07, + "loss": 13.0781, + "step": 429630 + }, + { + "epoch": 0.8678999826274559, + "grad_norm": 508.6786193847656, + "learning_rate": 5.911755260876145e-07, + "loss": 10.9307, + "step": 429640 + }, + { + "epoch": 0.8679201832601398, + "grad_norm": 2.2486164569854736, + "learning_rate": 5.910108862515596e-07, + "loss": 16.3299, + "step": 429650 + }, + { + "epoch": 0.8679403838928236, + "grad_norm": 539.4609375, + "learning_rate": 5.908462679043825e-07, + "loss": 18.2002, + "step": 429660 + }, + { + "epoch": 0.8679605845255073, + "grad_norm": 812.7120971679688, + "learning_rate": 5.906816710468866e-07, + "loss": 17.6814, + "step": 429670 + }, + { + "epoch": 0.8679807851581911, + "grad_norm": 134.23193359375, + "learning_rate": 5.905170956798739e-07, + "loss": 18.2646, + "step": 429680 + }, + { + "epoch": 0.8680009857908749, + "grad_norm": 302.33538818359375, + "learning_rate": 5.903525418041445e-07, + "loss": 19.6878, + "step": 429690 + }, + { + "epoch": 0.8680211864235587, + "grad_norm": 690.2728881835938, + "learning_rate": 5.901880094205037e-07, + "loss": 27.2643, + "step": 429700 + }, + { + "epoch": 0.8680413870562426, + "grad_norm": 243.7892303466797, + "learning_rate": 5.900234985297498e-07, + "loss": 16.3223, + "step": 429710 + }, + { + "epoch": 0.8680615876889264, + "grad_norm": 343.6305236816406, + "learning_rate": 5.898590091326884e-07, + "loss": 12.9504, + "step": 429720 + }, + { + "epoch": 0.8680817883216102, + "grad_norm": 401.50335693359375, + "learning_rate": 5.896945412301186e-07, + "loss": 18.5425, + "step": 429730 + }, + { + "epoch": 0.868101988954294, + "grad_norm": 463.09027099609375, + "learning_rate": 5.895300948228421e-07, + "loss": 18.9193, + "step": 429740 + }, + { + "epoch": 0.8681221895869778, + "grad_norm": 31.94213104248047, + "learning_rate": 5.893656699116618e-07, + "loss": 12.961, + "step": 429750 + }, + { + "epoch": 0.8681423902196617, + "grad_norm": 168.0640106201172, + "learning_rate": 5.892012664973784e-07, + "loss": 15.7568, + "step": 429760 + }, + { + "epoch": 0.8681625908523455, + "grad_norm": 18.695362091064453, + "learning_rate": 5.89036884580792e-07, + "loss": 10.4548, + "step": 429770 + }, + { + "epoch": 0.8681827914850293, + "grad_norm": 192.1884307861328, + "learning_rate": 5.888725241627047e-07, + "loss": 26.1267, + "step": 429780 + }, + { + "epoch": 0.8682029921177131, + "grad_norm": 971.8569946289062, + "learning_rate": 5.887081852439186e-07, + "loss": 15.6991, + "step": 429790 + }, + { + "epoch": 0.8682231927503969, + "grad_norm": 236.9096221923828, + "learning_rate": 5.885438678252342e-07, + "loss": 8.6657, + "step": 429800 + }, + { + "epoch": 0.8682433933830808, + "grad_norm": 144.9537811279297, + "learning_rate": 5.883795719074509e-07, + "loss": 14.095, + "step": 429810 + }, + { + "epoch": 0.8682635940157646, + "grad_norm": 165.6309814453125, + "learning_rate": 5.882152974913713e-07, + "loss": 11.5047, + "step": 429820 + }, + { + "epoch": 0.8682837946484484, + "grad_norm": 436.7717590332031, + "learning_rate": 5.880510445777954e-07, + "loss": 26.6042, + "step": 429830 + }, + { + "epoch": 0.8683039952811322, + "grad_norm": 427.2652893066406, + "learning_rate": 5.878868131675225e-07, + "loss": 11.8873, + "step": 429840 + }, + { + "epoch": 0.868324195913816, + "grad_norm": 63.67523956298828, + "learning_rate": 5.877226032613542e-07, + "loss": 13.2029, + "step": 429850 + }, + { + "epoch": 0.8683443965464999, + "grad_norm": 112.3509292602539, + "learning_rate": 5.875584148600916e-07, + "loss": 12.0538, + "step": 429860 + }, + { + "epoch": 0.8683645971791837, + "grad_norm": 80.30730438232422, + "learning_rate": 5.873942479645345e-07, + "loss": 17.1, + "step": 429870 + }, + { + "epoch": 0.8683847978118675, + "grad_norm": 356.8930358886719, + "learning_rate": 5.872301025754812e-07, + "loss": 11.842, + "step": 429880 + }, + { + "epoch": 0.8684049984445513, + "grad_norm": 548.4095458984375, + "learning_rate": 5.870659786937344e-07, + "loss": 18.5002, + "step": 429890 + }, + { + "epoch": 0.8684251990772351, + "grad_norm": 45.01439666748047, + "learning_rate": 5.869018763200929e-07, + "loss": 22.6678, + "step": 429900 + }, + { + "epoch": 0.868445399709919, + "grad_norm": 42.51400375366211, + "learning_rate": 5.867377954553555e-07, + "loss": 31.7126, + "step": 429910 + }, + { + "epoch": 0.8684656003426028, + "grad_norm": 472.7746276855469, + "learning_rate": 5.865737361003226e-07, + "loss": 17.5104, + "step": 429920 + }, + { + "epoch": 0.8684858009752865, + "grad_norm": 214.0835723876953, + "learning_rate": 5.864096982557949e-07, + "loss": 18.531, + "step": 429930 + }, + { + "epoch": 0.8685060016079703, + "grad_norm": 1023.9053344726562, + "learning_rate": 5.862456819225715e-07, + "loss": 21.1408, + "step": 429940 + }, + { + "epoch": 0.8685262022406541, + "grad_norm": 826.1692504882812, + "learning_rate": 5.860816871014496e-07, + "loss": 20.2905, + "step": 429950 + }, + { + "epoch": 0.868546402873338, + "grad_norm": 303.5501403808594, + "learning_rate": 5.859177137932315e-07, + "loss": 20.0418, + "step": 429960 + }, + { + "epoch": 0.8685666035060218, + "grad_norm": 949.9374389648438, + "learning_rate": 5.857537619987152e-07, + "loss": 16.031, + "step": 429970 + }, + { + "epoch": 0.8685868041387056, + "grad_norm": 184.94468688964844, + "learning_rate": 5.855898317186992e-07, + "loss": 13.5504, + "step": 429980 + }, + { + "epoch": 0.8686070047713894, + "grad_norm": 224.17349243164062, + "learning_rate": 5.854259229539833e-07, + "loss": 20.099, + "step": 429990 + }, + { + "epoch": 0.8686272054040732, + "grad_norm": 571.5230102539062, + "learning_rate": 5.852620357053651e-07, + "loss": 20.4905, + "step": 430000 + }, + { + "epoch": 0.868647406036757, + "grad_norm": 362.5823669433594, + "learning_rate": 5.850981699736453e-07, + "loss": 31.167, + "step": 430010 + }, + { + "epoch": 0.8686676066694409, + "grad_norm": 429.9981384277344, + "learning_rate": 5.849343257596218e-07, + "loss": 16.7991, + "step": 430020 + }, + { + "epoch": 0.8686878073021247, + "grad_norm": 447.98236083984375, + "learning_rate": 5.847705030640915e-07, + "loss": 23.4799, + "step": 430030 + }, + { + "epoch": 0.8687080079348085, + "grad_norm": 339.2086486816406, + "learning_rate": 5.84606701887856e-07, + "loss": 14.5216, + "step": 430040 + }, + { + "epoch": 0.8687282085674923, + "grad_norm": 9.948432922363281, + "learning_rate": 5.844429222317111e-07, + "loss": 4.6542, + "step": 430050 + }, + { + "epoch": 0.8687484092001762, + "grad_norm": 334.28228759765625, + "learning_rate": 5.842791640964551e-07, + "loss": 22.8723, + "step": 430060 + }, + { + "epoch": 0.86876860983286, + "grad_norm": 188.88925170898438, + "learning_rate": 5.841154274828869e-07, + "loss": 14.7107, + "step": 430070 + }, + { + "epoch": 0.8687888104655438, + "grad_norm": 496.1181335449219, + "learning_rate": 5.839517123918059e-07, + "loss": 18.6788, + "step": 430080 + }, + { + "epoch": 0.8688090110982276, + "grad_norm": 227.1697235107422, + "learning_rate": 5.83788018824008e-07, + "loss": 10.2072, + "step": 430090 + }, + { + "epoch": 0.8688292117309114, + "grad_norm": 239.81246948242188, + "learning_rate": 5.836243467802915e-07, + "loss": 15.5513, + "step": 430100 + }, + { + "epoch": 0.8688494123635953, + "grad_norm": 183.52682495117188, + "learning_rate": 5.834606962614548e-07, + "loss": 6.584, + "step": 430110 + }, + { + "epoch": 0.8688696129962791, + "grad_norm": 421.7367858886719, + "learning_rate": 5.832970672682948e-07, + "loss": 30.8429, + "step": 430120 + }, + { + "epoch": 0.8688898136289629, + "grad_norm": 890.8756103515625, + "learning_rate": 5.831334598016086e-07, + "loss": 21.5495, + "step": 430130 + }, + { + "epoch": 0.8689100142616467, + "grad_norm": 315.5503234863281, + "learning_rate": 5.829698738621941e-07, + "loss": 21.7944, + "step": 430140 + }, + { + "epoch": 0.8689302148943305, + "grad_norm": 303.3395080566406, + "learning_rate": 5.828063094508507e-07, + "loss": 22.6338, + "step": 430150 + }, + { + "epoch": 0.8689504155270144, + "grad_norm": 297.9224853515625, + "learning_rate": 5.826427665683715e-07, + "loss": 8.8286, + "step": 430160 + }, + { + "epoch": 0.8689706161596982, + "grad_norm": 394.8922119140625, + "learning_rate": 5.824792452155558e-07, + "loss": 15.724, + "step": 430170 + }, + { + "epoch": 0.868990816792382, + "grad_norm": 441.8057556152344, + "learning_rate": 5.823157453932015e-07, + "loss": 26.8445, + "step": 430180 + }, + { + "epoch": 0.8690110174250657, + "grad_norm": 26.113948822021484, + "learning_rate": 5.821522671021041e-07, + "loss": 20.7014, + "step": 430190 + }, + { + "epoch": 0.8690312180577495, + "grad_norm": 501.9127197265625, + "learning_rate": 5.819888103430598e-07, + "loss": 19.1681, + "step": 430200 + }, + { + "epoch": 0.8690514186904333, + "grad_norm": 878.7197875976562, + "learning_rate": 5.818253751168679e-07, + "loss": 17.2247, + "step": 430210 + }, + { + "epoch": 0.8690716193231172, + "grad_norm": 168.92588806152344, + "learning_rate": 5.816619614243224e-07, + "loss": 22.1551, + "step": 430220 + }, + { + "epoch": 0.869091819955801, + "grad_norm": 309.08953857421875, + "learning_rate": 5.814985692662201e-07, + "loss": 31.1003, + "step": 430230 + }, + { + "epoch": 0.8691120205884848, + "grad_norm": 260.6549072265625, + "learning_rate": 5.81335198643358e-07, + "loss": 12.7556, + "step": 430240 + }, + { + "epoch": 0.8691322212211686, + "grad_norm": 285.0781555175781, + "learning_rate": 5.811718495565327e-07, + "loss": 14.1524, + "step": 430250 + }, + { + "epoch": 0.8691524218538524, + "grad_norm": 851.3721923828125, + "learning_rate": 5.810085220065404e-07, + "loss": 42.9282, + "step": 430260 + }, + { + "epoch": 0.8691726224865363, + "grad_norm": 672.4266967773438, + "learning_rate": 5.808452159941752e-07, + "loss": 14.6863, + "step": 430270 + }, + { + "epoch": 0.8691928231192201, + "grad_norm": 127.9519271850586, + "learning_rate": 5.806819315202361e-07, + "loss": 15.2976, + "step": 430280 + }, + { + "epoch": 0.8692130237519039, + "grad_norm": 315.2915954589844, + "learning_rate": 5.805186685855163e-07, + "loss": 8.6507, + "step": 430290 + }, + { + "epoch": 0.8692332243845877, + "grad_norm": 301.9071960449219, + "learning_rate": 5.803554271908124e-07, + "loss": 15.8969, + "step": 430300 + }, + { + "epoch": 0.8692534250172715, + "grad_norm": 345.7572326660156, + "learning_rate": 5.801922073369203e-07, + "loss": 14.5178, + "step": 430310 + }, + { + "epoch": 0.8692736256499554, + "grad_norm": 429.4742431640625, + "learning_rate": 5.800290090246346e-07, + "loss": 12.8204, + "step": 430320 + }, + { + "epoch": 0.8692938262826392, + "grad_norm": 374.86956787109375, + "learning_rate": 5.798658322547529e-07, + "loss": 16.9373, + "step": 430330 + }, + { + "epoch": 0.869314026915323, + "grad_norm": 175.72686767578125, + "learning_rate": 5.797026770280683e-07, + "loss": 7.0061, + "step": 430340 + }, + { + "epoch": 0.8693342275480068, + "grad_norm": 4.458624362945557, + "learning_rate": 5.795395433453765e-07, + "loss": 14.7271, + "step": 430350 + }, + { + "epoch": 0.8693544281806906, + "grad_norm": 185.01858520507812, + "learning_rate": 5.793764312074735e-07, + "loss": 17.3475, + "step": 430360 + }, + { + "epoch": 0.8693746288133745, + "grad_norm": 254.61776733398438, + "learning_rate": 5.792133406151523e-07, + "loss": 12.7682, + "step": 430370 + }, + { + "epoch": 0.8693948294460583, + "grad_norm": 371.08416748046875, + "learning_rate": 5.790502715692104e-07, + "loss": 23.1745, + "step": 430380 + }, + { + "epoch": 0.8694150300787421, + "grad_norm": 311.84423828125, + "learning_rate": 5.788872240704402e-07, + "loss": 13.9985, + "step": 430390 + }, + { + "epoch": 0.8694352307114259, + "grad_norm": 510.8533935546875, + "learning_rate": 5.787241981196384e-07, + "loss": 25.0979, + "step": 430400 + }, + { + "epoch": 0.8694554313441097, + "grad_norm": 385.23028564453125, + "learning_rate": 5.785611937175989e-07, + "loss": 11.1401, + "step": 430410 + }, + { + "epoch": 0.8694756319767936, + "grad_norm": 78.8631591796875, + "learning_rate": 5.783982108651148e-07, + "loss": 16.0758, + "step": 430420 + }, + { + "epoch": 0.8694958326094774, + "grad_norm": 550.5646362304688, + "learning_rate": 5.782352495629822e-07, + "loss": 24.8675, + "step": 430430 + }, + { + "epoch": 0.8695160332421611, + "grad_norm": 36.11176681518555, + "learning_rate": 5.780723098119951e-07, + "loss": 24.2342, + "step": 430440 + }, + { + "epoch": 0.8695362338748449, + "grad_norm": 163.89772033691406, + "learning_rate": 5.779093916129464e-07, + "loss": 19.7238, + "step": 430450 + }, + { + "epoch": 0.8695564345075287, + "grad_norm": 196.0900421142578, + "learning_rate": 5.777464949666306e-07, + "loss": 36.1687, + "step": 430460 + }, + { + "epoch": 0.8695766351402126, + "grad_norm": 170.99618530273438, + "learning_rate": 5.775836198738427e-07, + "loss": 48.7488, + "step": 430470 + }, + { + "epoch": 0.8695968357728964, + "grad_norm": 68.98126220703125, + "learning_rate": 5.774207663353765e-07, + "loss": 17.3575, + "step": 430480 + }, + { + "epoch": 0.8696170364055802, + "grad_norm": 196.4060516357422, + "learning_rate": 5.772579343520241e-07, + "loss": 24.434, + "step": 430490 + }, + { + "epoch": 0.869637237038264, + "grad_norm": 221.60061645507812, + "learning_rate": 5.770951239245803e-07, + "loss": 21.6284, + "step": 430500 + }, + { + "epoch": 0.8696574376709478, + "grad_norm": 758.8854370117188, + "learning_rate": 5.769323350538391e-07, + "loss": 28.3884, + "step": 430510 + }, + { + "epoch": 0.8696776383036317, + "grad_norm": 156.29849243164062, + "learning_rate": 5.767695677405921e-07, + "loss": 21.5101, + "step": 430520 + }, + { + "epoch": 0.8696978389363155, + "grad_norm": 207.89292907714844, + "learning_rate": 5.766068219856341e-07, + "loss": 27.7899, + "step": 430530 + }, + { + "epoch": 0.8697180395689993, + "grad_norm": 300.2975158691406, + "learning_rate": 5.764440977897584e-07, + "loss": 21.9585, + "step": 430540 + }, + { + "epoch": 0.8697382402016831, + "grad_norm": 296.90606689453125, + "learning_rate": 5.762813951537582e-07, + "loss": 12.1937, + "step": 430550 + }, + { + "epoch": 0.8697584408343669, + "grad_norm": 179.23008728027344, + "learning_rate": 5.761187140784247e-07, + "loss": 13.7425, + "step": 430560 + }, + { + "epoch": 0.8697786414670508, + "grad_norm": 180.9647216796875, + "learning_rate": 5.759560545645527e-07, + "loss": 17.7472, + "step": 430570 + }, + { + "epoch": 0.8697988420997346, + "grad_norm": 295.97515869140625, + "learning_rate": 5.757934166129347e-07, + "loss": 11.9401, + "step": 430580 + }, + { + "epoch": 0.8698190427324184, + "grad_norm": 453.92840576171875, + "learning_rate": 5.756308002243622e-07, + "loss": 25.4231, + "step": 430590 + }, + { + "epoch": 0.8698392433651022, + "grad_norm": 279.5281066894531, + "learning_rate": 5.754682053996291e-07, + "loss": 13.0344, + "step": 430600 + }, + { + "epoch": 0.869859443997786, + "grad_norm": 364.5643005371094, + "learning_rate": 5.753056321395267e-07, + "loss": 13.5878, + "step": 430610 + }, + { + "epoch": 0.8698796446304699, + "grad_norm": 478.2242126464844, + "learning_rate": 5.751430804448488e-07, + "loss": 17.6104, + "step": 430620 + }, + { + "epoch": 0.8698998452631537, + "grad_norm": 193.85586547851562, + "learning_rate": 5.749805503163869e-07, + "loss": 10.5943, + "step": 430630 + }, + { + "epoch": 0.8699200458958375, + "grad_norm": 67.99600982666016, + "learning_rate": 5.748180417549321e-07, + "loss": 10.1801, + "step": 430640 + }, + { + "epoch": 0.8699402465285213, + "grad_norm": 151.18943786621094, + "learning_rate": 5.746555547612781e-07, + "loss": 24.1284, + "step": 430650 + }, + { + "epoch": 0.8699604471612051, + "grad_norm": 712.3665161132812, + "learning_rate": 5.744930893362166e-07, + "loss": 24.4889, + "step": 430660 + }, + { + "epoch": 0.869980647793889, + "grad_norm": 426.7364807128906, + "learning_rate": 5.74330645480538e-07, + "loss": 16.0659, + "step": 430670 + }, + { + "epoch": 0.8700008484265728, + "grad_norm": 322.21551513671875, + "learning_rate": 5.741682231950351e-07, + "loss": 21.5867, + "step": 430680 + }, + { + "epoch": 0.8700210490592566, + "grad_norm": 193.04405212402344, + "learning_rate": 5.740058224805001e-07, + "loss": 29.5392, + "step": 430690 + }, + { + "epoch": 0.8700412496919403, + "grad_norm": 57.63292694091797, + "learning_rate": 5.738434433377244e-07, + "loss": 18.641, + "step": 430700 + }, + { + "epoch": 0.8700614503246241, + "grad_norm": 256.9406433105469, + "learning_rate": 5.736810857674979e-07, + "loss": 30.6381, + "step": 430710 + }, + { + "epoch": 0.8700816509573079, + "grad_norm": 812.01513671875, + "learning_rate": 5.735187497706135e-07, + "loss": 25.1458, + "step": 430720 + }, + { + "epoch": 0.8701018515899918, + "grad_norm": 382.5735168457031, + "learning_rate": 5.733564353478622e-07, + "loss": 13.3199, + "step": 430730 + }, + { + "epoch": 0.8701220522226756, + "grad_norm": 0.0, + "learning_rate": 5.731941425000337e-07, + "loss": 25.8635, + "step": 430740 + }, + { + "epoch": 0.8701422528553594, + "grad_norm": 259.4105529785156, + "learning_rate": 5.730318712279203e-07, + "loss": 7.6119, + "step": 430750 + }, + { + "epoch": 0.8701624534880432, + "grad_norm": 455.6169738769531, + "learning_rate": 5.728696215323143e-07, + "loss": 13.8466, + "step": 430760 + }, + { + "epoch": 0.870182654120727, + "grad_norm": 10.431163787841797, + "learning_rate": 5.727073934140026e-07, + "loss": 17.4464, + "step": 430770 + }, + { + "epoch": 0.8702028547534109, + "grad_norm": 20.1141414642334, + "learning_rate": 5.725451868737786e-07, + "loss": 10.5243, + "step": 430780 + }, + { + "epoch": 0.8702230553860947, + "grad_norm": 79.7918930053711, + "learning_rate": 5.723830019124332e-07, + "loss": 17.1926, + "step": 430790 + }, + { + "epoch": 0.8702432560187785, + "grad_norm": 243.2442169189453, + "learning_rate": 5.722208385307559e-07, + "loss": 11.0174, + "step": 430800 + }, + { + "epoch": 0.8702634566514623, + "grad_norm": 311.8316650390625, + "learning_rate": 5.720586967295366e-07, + "loss": 10.5594, + "step": 430810 + }, + { + "epoch": 0.8702836572841461, + "grad_norm": 1566.53173828125, + "learning_rate": 5.718965765095669e-07, + "loss": 18.4191, + "step": 430820 + }, + { + "epoch": 0.87030385791683, + "grad_norm": 16.48854637145996, + "learning_rate": 5.717344778716361e-07, + "loss": 10.0563, + "step": 430830 + }, + { + "epoch": 0.8703240585495138, + "grad_norm": 369.173095703125, + "learning_rate": 5.715724008165335e-07, + "loss": 18.6129, + "step": 430840 + }, + { + "epoch": 0.8703442591821976, + "grad_norm": 287.1148376464844, + "learning_rate": 5.714103453450498e-07, + "loss": 17.074, + "step": 430850 + }, + { + "epoch": 0.8703644598148814, + "grad_norm": 375.2651062011719, + "learning_rate": 5.712483114579758e-07, + "loss": 22.1905, + "step": 430860 + }, + { + "epoch": 0.8703846604475652, + "grad_norm": 144.95962524414062, + "learning_rate": 5.710862991561006e-07, + "loss": 13.5704, + "step": 430870 + }, + { + "epoch": 0.8704048610802491, + "grad_norm": 589.4137573242188, + "learning_rate": 5.709243084402128e-07, + "loss": 14.086, + "step": 430880 + }, + { + "epoch": 0.8704250617129329, + "grad_norm": 4.6374192237854, + "learning_rate": 5.70762339311104e-07, + "loss": 17.3641, + "step": 430890 + }, + { + "epoch": 0.8704452623456167, + "grad_norm": 481.83892822265625, + "learning_rate": 5.706003917695619e-07, + "loss": 26.1069, + "step": 430900 + }, + { + "epoch": 0.8704654629783005, + "grad_norm": 828.807861328125, + "learning_rate": 5.704384658163748e-07, + "loss": 20.8426, + "step": 430910 + }, + { + "epoch": 0.8704856636109843, + "grad_norm": 578.0156860351562, + "learning_rate": 5.702765614523354e-07, + "loss": 27.7354, + "step": 430920 + }, + { + "epoch": 0.8705058642436682, + "grad_norm": 473.4757995605469, + "learning_rate": 5.701146786782291e-07, + "loss": 22.8459, + "step": 430930 + }, + { + "epoch": 0.870526064876352, + "grad_norm": 713.3866577148438, + "learning_rate": 5.699528174948477e-07, + "loss": 103.8826, + "step": 430940 + }, + { + "epoch": 0.8705462655090357, + "grad_norm": 761.142578125, + "learning_rate": 5.697909779029786e-07, + "loss": 14.9914, + "step": 430950 + }, + { + "epoch": 0.8705664661417195, + "grad_norm": 440.9145202636719, + "learning_rate": 5.696291599034104e-07, + "loss": 37.4724, + "step": 430960 + }, + { + "epoch": 0.8705866667744033, + "grad_norm": 239.36610412597656, + "learning_rate": 5.694673634969334e-07, + "loss": 9.299, + "step": 430970 + }, + { + "epoch": 0.8706068674070871, + "grad_norm": 616.1082763671875, + "learning_rate": 5.693055886843341e-07, + "loss": 17.9193, + "step": 430980 + }, + { + "epoch": 0.870627068039771, + "grad_norm": 220.93699645996094, + "learning_rate": 5.691438354664031e-07, + "loss": 25.5439, + "step": 430990 + }, + { + "epoch": 0.8706472686724548, + "grad_norm": 50.125919342041016, + "learning_rate": 5.689821038439264e-07, + "loss": 22.6964, + "step": 431000 + }, + { + "epoch": 0.8706674693051386, + "grad_norm": 86.68278503417969, + "learning_rate": 5.688203938176945e-07, + "loss": 18.2718, + "step": 431010 + }, + { + "epoch": 0.8706876699378224, + "grad_norm": 283.8215026855469, + "learning_rate": 5.686587053884946e-07, + "loss": 10.0713, + "step": 431020 + }, + { + "epoch": 0.8707078705705062, + "grad_norm": 289.13702392578125, + "learning_rate": 5.684970385571137e-07, + "loss": 12.529, + "step": 431030 + }, + { + "epoch": 0.8707280712031901, + "grad_norm": 564.9439697265625, + "learning_rate": 5.683353933243418e-07, + "loss": 27.3489, + "step": 431040 + }, + { + "epoch": 0.8707482718358739, + "grad_norm": 163.3947296142578, + "learning_rate": 5.681737696909656e-07, + "loss": 16.6905, + "step": 431050 + }, + { + "epoch": 0.8707684724685577, + "grad_norm": 573.1134643554688, + "learning_rate": 5.680121676577721e-07, + "loss": 13.4849, + "step": 431060 + }, + { + "epoch": 0.8707886731012415, + "grad_norm": 630.4998168945312, + "learning_rate": 5.678505872255496e-07, + "loss": 36.728, + "step": 431070 + }, + { + "epoch": 0.8708088737339253, + "grad_norm": 344.05316162109375, + "learning_rate": 5.676890283950881e-07, + "loss": 10.6528, + "step": 431080 + }, + { + "epoch": 0.8708290743666092, + "grad_norm": 374.6134948730469, + "learning_rate": 5.675274911671702e-07, + "loss": 14.498, + "step": 431090 + }, + { + "epoch": 0.870849274999293, + "grad_norm": 26.75878143310547, + "learning_rate": 5.673659755425859e-07, + "loss": 31.9518, + "step": 431100 + }, + { + "epoch": 0.8708694756319768, + "grad_norm": 45.3067512512207, + "learning_rate": 5.672044815221234e-07, + "loss": 17.2036, + "step": 431110 + }, + { + "epoch": 0.8708896762646606, + "grad_norm": 154.9571075439453, + "learning_rate": 5.670430091065682e-07, + "loss": 10.5864, + "step": 431120 + }, + { + "epoch": 0.8709098768973444, + "grad_norm": 662.7469482421875, + "learning_rate": 5.668815582967074e-07, + "loss": 17.816, + "step": 431130 + }, + { + "epoch": 0.8709300775300283, + "grad_norm": 295.2125244140625, + "learning_rate": 5.667201290933278e-07, + "loss": 9.9797, + "step": 431140 + }, + { + "epoch": 0.8709502781627121, + "grad_norm": 101.50379180908203, + "learning_rate": 5.665587214972173e-07, + "loss": 16.7628, + "step": 431150 + }, + { + "epoch": 0.8709704787953959, + "grad_norm": 238.20875549316406, + "learning_rate": 5.663973355091624e-07, + "loss": 22.1564, + "step": 431160 + }, + { + "epoch": 0.8709906794280797, + "grad_norm": 10.923376083374023, + "learning_rate": 5.662359711299481e-07, + "loss": 20.7295, + "step": 431170 + }, + { + "epoch": 0.8710108800607635, + "grad_norm": 430.8266906738281, + "learning_rate": 5.66074628360363e-07, + "loss": 13.5221, + "step": 431180 + }, + { + "epoch": 0.8710310806934474, + "grad_norm": 339.6048278808594, + "learning_rate": 5.659133072011919e-07, + "loss": 9.1787, + "step": 431190 + }, + { + "epoch": 0.8710512813261312, + "grad_norm": 364.2235412597656, + "learning_rate": 5.657520076532208e-07, + "loss": 23.0557, + "step": 431200 + }, + { + "epoch": 0.8710714819588149, + "grad_norm": 451.9517822265625, + "learning_rate": 5.655907297172375e-07, + "loss": 18.2305, + "step": 431210 + }, + { + "epoch": 0.8710916825914987, + "grad_norm": 389.908935546875, + "learning_rate": 5.654294733940263e-07, + "loss": 24.1222, + "step": 431220 + }, + { + "epoch": 0.8711118832241825, + "grad_norm": 146.44761657714844, + "learning_rate": 5.65268238684375e-07, + "loss": 11.2361, + "step": 431230 + }, + { + "epoch": 0.8711320838568664, + "grad_norm": 268.7914733886719, + "learning_rate": 5.651070255890689e-07, + "loss": 13.8313, + "step": 431240 + }, + { + "epoch": 0.8711522844895502, + "grad_norm": 70.17601013183594, + "learning_rate": 5.649458341088915e-07, + "loss": 12.3232, + "step": 431250 + }, + { + "epoch": 0.871172485122234, + "grad_norm": 253.52432250976562, + "learning_rate": 5.647846642446314e-07, + "loss": 23.8891, + "step": 431260 + }, + { + "epoch": 0.8711926857549178, + "grad_norm": 4.544342517852783, + "learning_rate": 5.646235159970731e-07, + "loss": 18.7502, + "step": 431270 + }, + { + "epoch": 0.8712128863876016, + "grad_norm": 505.71832275390625, + "learning_rate": 5.64462389367001e-07, + "loss": 11.3864, + "step": 431280 + }, + { + "epoch": 0.8712330870202855, + "grad_norm": 626.1432495117188, + "learning_rate": 5.64301284355201e-07, + "loss": 29.4871, + "step": 431290 + }, + { + "epoch": 0.8712532876529693, + "grad_norm": 345.0766296386719, + "learning_rate": 5.641402009624591e-07, + "loss": 15.6212, + "step": 431300 + }, + { + "epoch": 0.8712734882856531, + "grad_norm": 224.95736694335938, + "learning_rate": 5.639791391895605e-07, + "loss": 31.2785, + "step": 431310 + }, + { + "epoch": 0.8712936889183369, + "grad_norm": 168.00369262695312, + "learning_rate": 5.638180990372882e-07, + "loss": 14.0955, + "step": 431320 + }, + { + "epoch": 0.8713138895510207, + "grad_norm": 420.13336181640625, + "learning_rate": 5.6365708050643e-07, + "loss": 13.7055, + "step": 431330 + }, + { + "epoch": 0.8713340901837046, + "grad_norm": 414.8622131347656, + "learning_rate": 5.634960835977688e-07, + "loss": 13.434, + "step": 431340 + }, + { + "epoch": 0.8713542908163884, + "grad_norm": 77.1890869140625, + "learning_rate": 5.633351083120886e-07, + "loss": 15.1375, + "step": 431350 + }, + { + "epoch": 0.8713744914490722, + "grad_norm": 103.03490447998047, + "learning_rate": 5.631741546501746e-07, + "loss": 14.2117, + "step": 431360 + }, + { + "epoch": 0.871394692081756, + "grad_norm": 775.10791015625, + "learning_rate": 5.630132226128143e-07, + "loss": 14.5798, + "step": 431370 + }, + { + "epoch": 0.8714148927144398, + "grad_norm": 281.8716125488281, + "learning_rate": 5.628523122007867e-07, + "loss": 26.4327, + "step": 431380 + }, + { + "epoch": 0.8714350933471237, + "grad_norm": 1007.6710205078125, + "learning_rate": 5.626914234148794e-07, + "loss": 14.8941, + "step": 431390 + }, + { + "epoch": 0.8714552939798075, + "grad_norm": 223.94935607910156, + "learning_rate": 5.625305562558764e-07, + "loss": 19.0216, + "step": 431400 + }, + { + "epoch": 0.8714754946124913, + "grad_norm": 555.6204223632812, + "learning_rate": 5.623697107245619e-07, + "loss": 21.2055, + "step": 431410 + }, + { + "epoch": 0.8714956952451751, + "grad_norm": 259.8000183105469, + "learning_rate": 5.622088868217179e-07, + "loss": 10.3563, + "step": 431420 + }, + { + "epoch": 0.8715158958778589, + "grad_norm": 346.1632385253906, + "learning_rate": 5.620480845481291e-07, + "loss": 16.3341, + "step": 431430 + }, + { + "epoch": 0.8715360965105428, + "grad_norm": 343.7537536621094, + "learning_rate": 5.618873039045825e-07, + "loss": 17.5446, + "step": 431440 + }, + { + "epoch": 0.8715562971432266, + "grad_norm": 157.0694580078125, + "learning_rate": 5.617265448918563e-07, + "loss": 17.3536, + "step": 431450 + }, + { + "epoch": 0.8715764977759103, + "grad_norm": 279.52642822265625, + "learning_rate": 5.615658075107366e-07, + "loss": 23.2342, + "step": 431460 + }, + { + "epoch": 0.8715966984085941, + "grad_norm": 355.4528503417969, + "learning_rate": 5.614050917620084e-07, + "loss": 12.9763, + "step": 431470 + }, + { + "epoch": 0.8716168990412779, + "grad_norm": 355.9991760253906, + "learning_rate": 5.612443976464527e-07, + "loss": 16.9133, + "step": 431480 + }, + { + "epoch": 0.8716370996739617, + "grad_norm": 264.4765319824219, + "learning_rate": 5.610837251648532e-07, + "loss": 16.8319, + "step": 431490 + }, + { + "epoch": 0.8716573003066456, + "grad_norm": 707.2461547851562, + "learning_rate": 5.609230743179939e-07, + "loss": 14.6348, + "step": 431500 + }, + { + "epoch": 0.8716775009393294, + "grad_norm": 394.54473876953125, + "learning_rate": 5.607624451066568e-07, + "loss": 14.8244, + "step": 431510 + }, + { + "epoch": 0.8716977015720132, + "grad_norm": 259.0372314453125, + "learning_rate": 5.606018375316246e-07, + "loss": 12.0606, + "step": 431520 + }, + { + "epoch": 0.871717902204697, + "grad_norm": 168.06674194335938, + "learning_rate": 5.604412515936814e-07, + "loss": 19.1726, + "step": 431530 + }, + { + "epoch": 0.8717381028373808, + "grad_norm": 333.35650634765625, + "learning_rate": 5.602806872936079e-07, + "loss": 19.7599, + "step": 431540 + }, + { + "epoch": 0.8717583034700647, + "grad_norm": 696.573974609375, + "learning_rate": 5.601201446321891e-07, + "loss": 12.1974, + "step": 431550 + }, + { + "epoch": 0.8717785041027485, + "grad_norm": 419.63787841796875, + "learning_rate": 5.599596236102068e-07, + "loss": 19.8364, + "step": 431560 + }, + { + "epoch": 0.8717987047354323, + "grad_norm": 172.09666442871094, + "learning_rate": 5.597991242284407e-07, + "loss": 21.4207, + "step": 431570 + }, + { + "epoch": 0.8718189053681161, + "grad_norm": 339.2966613769531, + "learning_rate": 5.596386464876769e-07, + "loss": 22.9744, + "step": 431580 + }, + { + "epoch": 0.8718391060008, + "grad_norm": 554.4182739257812, + "learning_rate": 5.594781903886942e-07, + "loss": 17.6794, + "step": 431590 + }, + { + "epoch": 0.8718593066334838, + "grad_norm": 452.2076416015625, + "learning_rate": 5.593177559322776e-07, + "loss": 42.9131, + "step": 431600 + }, + { + "epoch": 0.8718795072661676, + "grad_norm": 175.66160583496094, + "learning_rate": 5.591573431192066e-07, + "loss": 16.0131, + "step": 431610 + }, + { + "epoch": 0.8718997078988514, + "grad_norm": 335.3212585449219, + "learning_rate": 5.589969519502652e-07, + "loss": 19.758, + "step": 431620 + }, + { + "epoch": 0.8719199085315352, + "grad_norm": 366.0373840332031, + "learning_rate": 5.588365824262343e-07, + "loss": 13.9156, + "step": 431630 + }, + { + "epoch": 0.871940109164219, + "grad_norm": 330.110107421875, + "learning_rate": 5.586762345478935e-07, + "loss": 12.1645, + "step": 431640 + }, + { + "epoch": 0.8719603097969029, + "grad_norm": 303.6749572753906, + "learning_rate": 5.585159083160274e-07, + "loss": 20.0114, + "step": 431650 + }, + { + "epoch": 0.8719805104295867, + "grad_norm": 408.7389221191406, + "learning_rate": 5.583556037314164e-07, + "loss": 9.4455, + "step": 431660 + }, + { + "epoch": 0.8720007110622705, + "grad_norm": 513.5108032226562, + "learning_rate": 5.581953207948404e-07, + "loss": 12.3123, + "step": 431670 + }, + { + "epoch": 0.8720209116949543, + "grad_norm": 308.0274658203125, + "learning_rate": 5.58035059507081e-07, + "loss": 11.2197, + "step": 431680 + }, + { + "epoch": 0.8720411123276381, + "grad_norm": 446.0270690917969, + "learning_rate": 5.578748198689226e-07, + "loss": 15.9516, + "step": 431690 + }, + { + "epoch": 0.872061312960322, + "grad_norm": 491.5663757324219, + "learning_rate": 5.577146018811419e-07, + "loss": 25.5518, + "step": 431700 + }, + { + "epoch": 0.8720815135930058, + "grad_norm": 377.6907958984375, + "learning_rate": 5.575544055445209e-07, + "loss": 18.1277, + "step": 431710 + }, + { + "epoch": 0.8721017142256895, + "grad_norm": 401.6126708984375, + "learning_rate": 5.573942308598418e-07, + "loss": 15.5451, + "step": 431720 + }, + { + "epoch": 0.8721219148583733, + "grad_norm": 740.29931640625, + "learning_rate": 5.572340778278845e-07, + "loss": 27.8595, + "step": 431730 + }, + { + "epoch": 0.8721421154910571, + "grad_norm": 167.87570190429688, + "learning_rate": 5.570739464494284e-07, + "loss": 24.6506, + "step": 431740 + }, + { + "epoch": 0.872162316123741, + "grad_norm": 236.99952697753906, + "learning_rate": 5.569138367252553e-07, + "loss": 7.629, + "step": 431750 + }, + { + "epoch": 0.8721825167564248, + "grad_norm": 101.96878051757812, + "learning_rate": 5.567537486561476e-07, + "loss": 10.9722, + "step": 431760 + }, + { + "epoch": 0.8722027173891086, + "grad_norm": 275.85247802734375, + "learning_rate": 5.565936822428808e-07, + "loss": 13.2331, + "step": 431770 + }, + { + "epoch": 0.8722229180217924, + "grad_norm": 294.6750183105469, + "learning_rate": 5.564336374862373e-07, + "loss": 13.1657, + "step": 431780 + }, + { + "epoch": 0.8722431186544762, + "grad_norm": 430.006103515625, + "learning_rate": 5.562736143869984e-07, + "loss": 15.3141, + "step": 431790 + }, + { + "epoch": 0.87226331928716, + "grad_norm": 288.45123291015625, + "learning_rate": 5.561136129459432e-07, + "loss": 11.9062, + "step": 431800 + }, + { + "epoch": 0.8722835199198439, + "grad_norm": 201.5088653564453, + "learning_rate": 5.559536331638498e-07, + "loss": 17.3725, + "step": 431810 + }, + { + "epoch": 0.8723037205525277, + "grad_norm": 431.68145751953125, + "learning_rate": 5.557936750415011e-07, + "loss": 11.8276, + "step": 431820 + }, + { + "epoch": 0.8723239211852115, + "grad_norm": 385.95745849609375, + "learning_rate": 5.556337385796734e-07, + "loss": 15.5291, + "step": 431830 + }, + { + "epoch": 0.8723441218178953, + "grad_norm": 78.91565704345703, + "learning_rate": 5.55473823779149e-07, + "loss": 17.6818, + "step": 431840 + }, + { + "epoch": 0.8723643224505792, + "grad_norm": 32.79534149169922, + "learning_rate": 5.553139306407062e-07, + "loss": 20.4146, + "step": 431850 + }, + { + "epoch": 0.872384523083263, + "grad_norm": 160.63734436035156, + "learning_rate": 5.551540591651234e-07, + "loss": 18.6493, + "step": 431860 + }, + { + "epoch": 0.8724047237159468, + "grad_norm": 673.2002563476562, + "learning_rate": 5.549942093531812e-07, + "loss": 27.3512, + "step": 431870 + }, + { + "epoch": 0.8724249243486306, + "grad_norm": 1010.4269409179688, + "learning_rate": 5.548343812056584e-07, + "loss": 24.5515, + "step": 431880 + }, + { + "epoch": 0.8724451249813144, + "grad_norm": 185.97637939453125, + "learning_rate": 5.546745747233323e-07, + "loss": 8.7487, + "step": 431890 + }, + { + "epoch": 0.8724653256139983, + "grad_norm": 88.39303588867188, + "learning_rate": 5.545147899069836e-07, + "loss": 12.3746, + "step": 431900 + }, + { + "epoch": 0.8724855262466821, + "grad_norm": 796.1704711914062, + "learning_rate": 5.543550267573916e-07, + "loss": 21.8673, + "step": 431910 + }, + { + "epoch": 0.8725057268793659, + "grad_norm": 188.41055297851562, + "learning_rate": 5.541952852753341e-07, + "loss": 11.4995, + "step": 431920 + }, + { + "epoch": 0.8725259275120497, + "grad_norm": 184.47239685058594, + "learning_rate": 5.540355654615881e-07, + "loss": 13.8518, + "step": 431930 + }, + { + "epoch": 0.8725461281447335, + "grad_norm": 144.6045379638672, + "learning_rate": 5.538758673169348e-07, + "loss": 25.1702, + "step": 431940 + }, + { + "epoch": 0.8725663287774174, + "grad_norm": 425.9952392578125, + "learning_rate": 5.537161908421512e-07, + "loss": 17.863, + "step": 431950 + }, + { + "epoch": 0.8725865294101012, + "grad_norm": 225.549072265625, + "learning_rate": 5.535565360380146e-07, + "loss": 36.3956, + "step": 431960 + }, + { + "epoch": 0.872606730042785, + "grad_norm": 264.4824523925781, + "learning_rate": 5.533969029053043e-07, + "loss": 9.2637, + "step": 431970 + }, + { + "epoch": 0.8726269306754687, + "grad_norm": 424.3130798339844, + "learning_rate": 5.532372914448003e-07, + "loss": 17.5098, + "step": 431980 + }, + { + "epoch": 0.8726471313081525, + "grad_norm": 24.801950454711914, + "learning_rate": 5.530777016572763e-07, + "loss": 18.5786, + "step": 431990 + }, + { + "epoch": 0.8726673319408363, + "grad_norm": 175.92909240722656, + "learning_rate": 5.529181335435124e-07, + "loss": 25.0721, + "step": 432000 + }, + { + "epoch": 0.8726875325735202, + "grad_norm": 133.3310546875, + "learning_rate": 5.527585871042867e-07, + "loss": 35.488, + "step": 432010 + }, + { + "epoch": 0.872707733206204, + "grad_norm": 51.423831939697266, + "learning_rate": 5.525990623403765e-07, + "loss": 21.8321, + "step": 432020 + }, + { + "epoch": 0.8727279338388878, + "grad_norm": 757.439453125, + "learning_rate": 5.524395592525584e-07, + "loss": 14.381, + "step": 432030 + }, + { + "epoch": 0.8727481344715716, + "grad_norm": 194.53475952148438, + "learning_rate": 5.522800778416099e-07, + "loss": 38.3131, + "step": 432040 + }, + { + "epoch": 0.8727683351042554, + "grad_norm": 474.09283447265625, + "learning_rate": 5.521206181083111e-07, + "loss": 22.2005, + "step": 432050 + }, + { + "epoch": 0.8727885357369393, + "grad_norm": 117.4988021850586, + "learning_rate": 5.519611800534347e-07, + "loss": 12.3979, + "step": 432060 + }, + { + "epoch": 0.8728087363696231, + "grad_norm": 265.8170166015625, + "learning_rate": 5.518017636777606e-07, + "loss": 14.9616, + "step": 432070 + }, + { + "epoch": 0.8728289370023069, + "grad_norm": 243.57675170898438, + "learning_rate": 5.516423689820655e-07, + "loss": 22.1666, + "step": 432080 + }, + { + "epoch": 0.8728491376349907, + "grad_norm": 331.3977355957031, + "learning_rate": 5.514829959671264e-07, + "loss": 15.4174, + "step": 432090 + }, + { + "epoch": 0.8728693382676745, + "grad_norm": 238.21192932128906, + "learning_rate": 5.51323644633719e-07, + "loss": 6.8709, + "step": 432100 + }, + { + "epoch": 0.8728895389003584, + "grad_norm": 210.3321990966797, + "learning_rate": 5.511643149826206e-07, + "loss": 24.2467, + "step": 432110 + }, + { + "epoch": 0.8729097395330422, + "grad_norm": 312.65826416015625, + "learning_rate": 5.510050070146083e-07, + "loss": 14.3068, + "step": 432120 + }, + { + "epoch": 0.872929940165726, + "grad_norm": 549.9442138671875, + "learning_rate": 5.508457207304574e-07, + "loss": 24.7624, + "step": 432130 + }, + { + "epoch": 0.8729501407984098, + "grad_norm": 259.71240234375, + "learning_rate": 5.506864561309455e-07, + "loss": 13.4766, + "step": 432140 + }, + { + "epoch": 0.8729703414310936, + "grad_norm": 175.85206604003906, + "learning_rate": 5.505272132168471e-07, + "loss": 12.3887, + "step": 432150 + }, + { + "epoch": 0.8729905420637775, + "grad_norm": 409.7877197265625, + "learning_rate": 5.503679919889404e-07, + "loss": 18.4446, + "step": 432160 + }, + { + "epoch": 0.8730107426964613, + "grad_norm": 258.2601623535156, + "learning_rate": 5.502087924480005e-07, + "loss": 26.1197, + "step": 432170 + }, + { + "epoch": 0.8730309433291451, + "grad_norm": 183.85809326171875, + "learning_rate": 5.50049614594802e-07, + "loss": 14.9734, + "step": 432180 + }, + { + "epoch": 0.8730511439618289, + "grad_norm": 342.5546569824219, + "learning_rate": 5.498904584301235e-07, + "loss": 22.1346, + "step": 432190 + }, + { + "epoch": 0.8730713445945127, + "grad_norm": 303.18170166015625, + "learning_rate": 5.497313239547374e-07, + "loss": 14.67, + "step": 432200 + }, + { + "epoch": 0.8730915452271966, + "grad_norm": 280.4842529296875, + "learning_rate": 5.49572211169423e-07, + "loss": 24.5363, + "step": 432210 + }, + { + "epoch": 0.8731117458598804, + "grad_norm": 332.1897277832031, + "learning_rate": 5.49413120074952e-07, + "loss": 21.0212, + "step": 432220 + }, + { + "epoch": 0.8731319464925641, + "grad_norm": 602.5610961914062, + "learning_rate": 5.492540506721033e-07, + "loss": 16.7147, + "step": 432230 + }, + { + "epoch": 0.8731521471252479, + "grad_norm": 0.8453227281570435, + "learning_rate": 5.490950029616504e-07, + "loss": 30.1204, + "step": 432240 + }, + { + "epoch": 0.8731723477579317, + "grad_norm": 269.9468994140625, + "learning_rate": 5.489359769443675e-07, + "loss": 21.4242, + "step": 432250 + }, + { + "epoch": 0.8731925483906156, + "grad_norm": 805.483154296875, + "learning_rate": 5.487769726210318e-07, + "loss": 16.6819, + "step": 432260 + }, + { + "epoch": 0.8732127490232994, + "grad_norm": 526.4840698242188, + "learning_rate": 5.486179899924171e-07, + "loss": 12.0968, + "step": 432270 + }, + { + "epoch": 0.8732329496559832, + "grad_norm": 212.0432891845703, + "learning_rate": 5.484590290592979e-07, + "loss": 21.728, + "step": 432280 + }, + { + "epoch": 0.873253150288667, + "grad_norm": 108.55610656738281, + "learning_rate": 5.483000898224494e-07, + "loss": 10.9841, + "step": 432290 + }, + { + "epoch": 0.8732733509213508, + "grad_norm": 398.4762878417969, + "learning_rate": 5.48141172282648e-07, + "loss": 55.5959, + "step": 432300 + }, + { + "epoch": 0.8732935515540347, + "grad_norm": 319.7445983886719, + "learning_rate": 5.479822764406645e-07, + "loss": 23.2687, + "step": 432310 + }, + { + "epoch": 0.8733137521867185, + "grad_norm": 391.5088195800781, + "learning_rate": 5.478234022972756e-07, + "loss": 14.2518, + "step": 432320 + }, + { + "epoch": 0.8733339528194023, + "grad_norm": 249.58383178710938, + "learning_rate": 5.476645498532567e-07, + "loss": 11.0005, + "step": 432330 + }, + { + "epoch": 0.8733541534520861, + "grad_norm": 648.7304077148438, + "learning_rate": 5.475057191093808e-07, + "loss": 16.0999, + "step": 432340 + }, + { + "epoch": 0.8733743540847699, + "grad_norm": 190.3310089111328, + "learning_rate": 5.473469100664208e-07, + "loss": 19.4721, + "step": 432350 + }, + { + "epoch": 0.8733945547174538, + "grad_norm": 461.1125183105469, + "learning_rate": 5.471881227251518e-07, + "loss": 14.6923, + "step": 432360 + }, + { + "epoch": 0.8734147553501376, + "grad_norm": 504.35113525390625, + "learning_rate": 5.470293570863499e-07, + "loss": 20.7561, + "step": 432370 + }, + { + "epoch": 0.8734349559828214, + "grad_norm": 109.82476043701172, + "learning_rate": 5.46870613150785e-07, + "loss": 5.3293, + "step": 432380 + }, + { + "epoch": 0.8734551566155052, + "grad_norm": 343.56341552734375, + "learning_rate": 5.467118909192326e-07, + "loss": 22.4527, + "step": 432390 + }, + { + "epoch": 0.873475357248189, + "grad_norm": 465.1142578125, + "learning_rate": 5.46553190392467e-07, + "loss": 16.6082, + "step": 432400 + }, + { + "epoch": 0.8734955578808729, + "grad_norm": 420.0114440917969, + "learning_rate": 5.46394511571261e-07, + "loss": 11.3463, + "step": 432410 + }, + { + "epoch": 0.8735157585135567, + "grad_norm": 172.47666931152344, + "learning_rate": 5.462358544563873e-07, + "loss": 14.4073, + "step": 432420 + }, + { + "epoch": 0.8735359591462405, + "grad_norm": 168.5302734375, + "learning_rate": 5.460772190486208e-07, + "loss": 13.0114, + "step": 432430 + }, + { + "epoch": 0.8735561597789243, + "grad_norm": 69.3399429321289, + "learning_rate": 5.459186053487336e-07, + "loss": 14.7425, + "step": 432440 + }, + { + "epoch": 0.8735763604116081, + "grad_norm": 761.65478515625, + "learning_rate": 5.457600133574987e-07, + "loss": 24.4876, + "step": 432450 + }, + { + "epoch": 0.873596561044292, + "grad_norm": 211.42430114746094, + "learning_rate": 5.456014430756895e-07, + "loss": 15.9545, + "step": 432460 + }, + { + "epoch": 0.8736167616769758, + "grad_norm": 216.61505126953125, + "learning_rate": 5.454428945040774e-07, + "loss": 12.5682, + "step": 432470 + }, + { + "epoch": 0.8736369623096596, + "grad_norm": 194.28504943847656, + "learning_rate": 5.452843676434377e-07, + "loss": 17.7277, + "step": 432480 + }, + { + "epoch": 0.8736571629423433, + "grad_norm": 80.29365539550781, + "learning_rate": 5.45125862494541e-07, + "loss": 20.0275, + "step": 432490 + }, + { + "epoch": 0.8736773635750271, + "grad_norm": 152.5752716064453, + "learning_rate": 5.449673790581611e-07, + "loss": 24.6487, + "step": 432500 + }, + { + "epoch": 0.8736975642077109, + "grad_norm": 554.6441650390625, + "learning_rate": 5.448089173350696e-07, + "loss": 9.1202, + "step": 432510 + }, + { + "epoch": 0.8737177648403948, + "grad_norm": 277.0372314453125, + "learning_rate": 5.446504773260386e-07, + "loss": 34.0796, + "step": 432520 + }, + { + "epoch": 0.8737379654730786, + "grad_norm": 0.0, + "learning_rate": 5.44492059031842e-07, + "loss": 14.672, + "step": 432530 + }, + { + "epoch": 0.8737581661057624, + "grad_norm": 554.496337890625, + "learning_rate": 5.443336624532492e-07, + "loss": 18.5186, + "step": 432540 + }, + { + "epoch": 0.8737783667384462, + "grad_norm": 546.7005615234375, + "learning_rate": 5.44175287591035e-07, + "loss": 16.7326, + "step": 432550 + }, + { + "epoch": 0.87379856737113, + "grad_norm": 198.7493896484375, + "learning_rate": 5.440169344459701e-07, + "loss": 31.7625, + "step": 432560 + }, + { + "epoch": 0.8738187680038139, + "grad_norm": 22.46010398864746, + "learning_rate": 5.438586030188247e-07, + "loss": 9.2043, + "step": 432570 + }, + { + "epoch": 0.8738389686364977, + "grad_norm": 207.79080200195312, + "learning_rate": 5.437002933103724e-07, + "loss": 18.9447, + "step": 432580 + }, + { + "epoch": 0.8738591692691815, + "grad_norm": 162.22372436523438, + "learning_rate": 5.435420053213863e-07, + "loss": 18.3355, + "step": 432590 + }, + { + "epoch": 0.8738793699018653, + "grad_norm": 187.2777099609375, + "learning_rate": 5.433837390526341e-07, + "loss": 14.0661, + "step": 432600 + }, + { + "epoch": 0.8738995705345491, + "grad_norm": 232.63430786132812, + "learning_rate": 5.432254945048887e-07, + "loss": 10.7602, + "step": 432610 + }, + { + "epoch": 0.873919771167233, + "grad_norm": 56.217803955078125, + "learning_rate": 5.430672716789232e-07, + "loss": 10.8976, + "step": 432620 + }, + { + "epoch": 0.8739399717999168, + "grad_norm": 368.2447509765625, + "learning_rate": 5.429090705755069e-07, + "loss": 11.1776, + "step": 432630 + }, + { + "epoch": 0.8739601724326006, + "grad_norm": 455.3416748046875, + "learning_rate": 5.427508911954105e-07, + "loss": 13.8417, + "step": 432640 + }, + { + "epoch": 0.8739803730652844, + "grad_norm": 212.73867797851562, + "learning_rate": 5.425927335394054e-07, + "loss": 17.5974, + "step": 432650 + }, + { + "epoch": 0.8740005736979682, + "grad_norm": 362.45147705078125, + "learning_rate": 5.424345976082645e-07, + "loss": 18.1649, + "step": 432660 + }, + { + "epoch": 0.8740207743306521, + "grad_norm": 493.18914794921875, + "learning_rate": 5.42276483402755e-07, + "loss": 39.1498, + "step": 432670 + }, + { + "epoch": 0.8740409749633359, + "grad_norm": 467.24139404296875, + "learning_rate": 5.421183909236494e-07, + "loss": 12.8275, + "step": 432680 + }, + { + "epoch": 0.8740611755960197, + "grad_norm": 296.6934814453125, + "learning_rate": 5.419603201717189e-07, + "loss": 22.2164, + "step": 432690 + }, + { + "epoch": 0.8740813762287035, + "grad_norm": 17.59931182861328, + "learning_rate": 5.418022711477333e-07, + "loss": 19.6711, + "step": 432700 + }, + { + "epoch": 0.8741015768613873, + "grad_norm": 393.2613220214844, + "learning_rate": 5.416442438524616e-07, + "loss": 38.0818, + "step": 432710 + }, + { + "epoch": 0.8741217774940712, + "grad_norm": 290.41900634765625, + "learning_rate": 5.414862382866759e-07, + "loss": 17.1718, + "step": 432720 + }, + { + "epoch": 0.874141978126755, + "grad_norm": 296.6372375488281, + "learning_rate": 5.413282544511455e-07, + "loss": 12.0898, + "step": 432730 + }, + { + "epoch": 0.8741621787594387, + "grad_norm": 202.93516540527344, + "learning_rate": 5.4117029234664e-07, + "loss": 14.88, + "step": 432740 + }, + { + "epoch": 0.8741823793921225, + "grad_norm": 21.219100952148438, + "learning_rate": 5.410123519739302e-07, + "loss": 12.9098, + "step": 432750 + }, + { + "epoch": 0.8742025800248063, + "grad_norm": 372.3573913574219, + "learning_rate": 5.408544333337845e-07, + "loss": 15.8375, + "step": 432760 + }, + { + "epoch": 0.8742227806574902, + "grad_norm": 146.60308837890625, + "learning_rate": 5.406965364269745e-07, + "loss": 13.5756, + "step": 432770 + }, + { + "epoch": 0.874242981290174, + "grad_norm": 507.54498291015625, + "learning_rate": 5.405386612542685e-07, + "loss": 8.8647, + "step": 432780 + }, + { + "epoch": 0.8742631819228578, + "grad_norm": 238.99053955078125, + "learning_rate": 5.403808078164358e-07, + "loss": 8.5932, + "step": 432790 + }, + { + "epoch": 0.8742833825555416, + "grad_norm": 383.38189697265625, + "learning_rate": 5.402229761142464e-07, + "loss": 16.0073, + "step": 432800 + }, + { + "epoch": 0.8743035831882254, + "grad_norm": 186.34523010253906, + "learning_rate": 5.400651661484684e-07, + "loss": 7.5963, + "step": 432810 + }, + { + "epoch": 0.8743237838209093, + "grad_norm": 380.567138671875, + "learning_rate": 5.399073779198732e-07, + "loss": 15.2616, + "step": 432820 + }, + { + "epoch": 0.8743439844535931, + "grad_norm": 8.406536102294922, + "learning_rate": 5.397496114292278e-07, + "loss": 28.221, + "step": 432830 + }, + { + "epoch": 0.8743641850862769, + "grad_norm": 315.7178955078125, + "learning_rate": 5.395918666773026e-07, + "loss": 17.5182, + "step": 432840 + }, + { + "epoch": 0.8743843857189607, + "grad_norm": 289.23663330078125, + "learning_rate": 5.394341436648653e-07, + "loss": 22.2257, + "step": 432850 + }, + { + "epoch": 0.8744045863516445, + "grad_norm": 133.96200561523438, + "learning_rate": 5.392764423926844e-07, + "loss": 19.679, + "step": 432860 + }, + { + "epoch": 0.8744247869843284, + "grad_norm": 226.0999755859375, + "learning_rate": 5.391187628615296e-07, + "loss": 15.2748, + "step": 432870 + }, + { + "epoch": 0.8744449876170122, + "grad_norm": 555.56201171875, + "learning_rate": 5.389611050721694e-07, + "loss": 21.7419, + "step": 432880 + }, + { + "epoch": 0.874465188249696, + "grad_norm": 433.8108825683594, + "learning_rate": 5.388034690253701e-07, + "loss": 9.8576, + "step": 432890 + }, + { + "epoch": 0.8744853888823798, + "grad_norm": 171.49124145507812, + "learning_rate": 5.386458547219026e-07, + "loss": 21.4585, + "step": 432900 + }, + { + "epoch": 0.8745055895150636, + "grad_norm": 192.7606658935547, + "learning_rate": 5.384882621625353e-07, + "loss": 14.1203, + "step": 432910 + }, + { + "epoch": 0.8745257901477475, + "grad_norm": 1570.7572021484375, + "learning_rate": 5.383306913480335e-07, + "loss": 32.1453, + "step": 432920 + }, + { + "epoch": 0.8745459907804313, + "grad_norm": 495.3536376953125, + "learning_rate": 5.381731422791664e-07, + "loss": 25.808, + "step": 432930 + }, + { + "epoch": 0.8745661914131151, + "grad_norm": 662.15869140625, + "learning_rate": 5.380156149567034e-07, + "loss": 17.0348, + "step": 432940 + }, + { + "epoch": 0.8745863920457989, + "grad_norm": 261.1503601074219, + "learning_rate": 5.378581093814112e-07, + "loss": 9.5443, + "step": 432950 + }, + { + "epoch": 0.8746065926784827, + "grad_norm": 349.7860412597656, + "learning_rate": 5.377006255540562e-07, + "loss": 20.0274, + "step": 432960 + }, + { + "epoch": 0.8746267933111666, + "grad_norm": 153.81024169921875, + "learning_rate": 5.375431634754074e-07, + "loss": 16.5702, + "step": 432970 + }, + { + "epoch": 0.8746469939438504, + "grad_norm": 734.0849609375, + "learning_rate": 5.373857231462337e-07, + "loss": 35.6784, + "step": 432980 + }, + { + "epoch": 0.8746671945765342, + "grad_norm": 1650.5621337890625, + "learning_rate": 5.372283045672994e-07, + "loss": 32.2598, + "step": 432990 + }, + { + "epoch": 0.8746873952092179, + "grad_norm": 35.7860107421875, + "learning_rate": 5.370709077393721e-07, + "loss": 24.6236, + "step": 433000 + }, + { + "epoch": 0.8747075958419017, + "grad_norm": 258.3363952636719, + "learning_rate": 5.369135326632219e-07, + "loss": 27.3438, + "step": 433010 + }, + { + "epoch": 0.8747277964745855, + "grad_norm": 46.31103515625, + "learning_rate": 5.367561793396132e-07, + "loss": 17.3969, + "step": 433020 + }, + { + "epoch": 0.8747479971072694, + "grad_norm": 448.46282958984375, + "learning_rate": 5.365988477693124e-07, + "loss": 19.1224, + "step": 433030 + }, + { + "epoch": 0.8747681977399532, + "grad_norm": 178.7939453125, + "learning_rate": 5.364415379530891e-07, + "loss": 25.5598, + "step": 433040 + }, + { + "epoch": 0.874788398372637, + "grad_norm": 209.88671875, + "learning_rate": 5.362842498917081e-07, + "loss": 17.2414, + "step": 433050 + }, + { + "epoch": 0.8748085990053208, + "grad_norm": 262.6157531738281, + "learning_rate": 5.36126983585935e-07, + "loss": 14.1692, + "step": 433060 + }, + { + "epoch": 0.8748287996380046, + "grad_norm": 541.3820190429688, + "learning_rate": 5.359697390365387e-07, + "loss": 17.0255, + "step": 433070 + }, + { + "epoch": 0.8748490002706885, + "grad_norm": 379.27484130859375, + "learning_rate": 5.35812516244284e-07, + "loss": 18.0919, + "step": 433080 + }, + { + "epoch": 0.8748692009033723, + "grad_norm": 590.208251953125, + "learning_rate": 5.356553152099381e-07, + "loss": 24.4596, + "step": 433090 + }, + { + "epoch": 0.8748894015360561, + "grad_norm": 436.2502746582031, + "learning_rate": 5.354981359342659e-07, + "loss": 19.3351, + "step": 433100 + }, + { + "epoch": 0.8749096021687399, + "grad_norm": 154.7361297607422, + "learning_rate": 5.353409784180352e-07, + "loss": 6.1398, + "step": 433110 + }, + { + "epoch": 0.8749298028014237, + "grad_norm": 5.425511360168457, + "learning_rate": 5.35183842662011e-07, + "loss": 16.9334, + "step": 433120 + }, + { + "epoch": 0.8749500034341076, + "grad_norm": 375.8233947753906, + "learning_rate": 5.350267286669585e-07, + "loss": 10.4431, + "step": 433130 + }, + { + "epoch": 0.8749702040667914, + "grad_norm": 1288.437744140625, + "learning_rate": 5.348696364336448e-07, + "loss": 17.4038, + "step": 433140 + }, + { + "epoch": 0.8749904046994752, + "grad_norm": 644.7527465820312, + "learning_rate": 5.347125659628344e-07, + "loss": 18.2653, + "step": 433150 + }, + { + "epoch": 0.875010605332159, + "grad_norm": 102.53956604003906, + "learning_rate": 5.345555172552941e-07, + "loss": 32.7191, + "step": 433160 + }, + { + "epoch": 0.8750308059648428, + "grad_norm": 643.939208984375, + "learning_rate": 5.343984903117889e-07, + "loss": 10.4285, + "step": 433170 + }, + { + "epoch": 0.8750510065975267, + "grad_norm": 478.85107421875, + "learning_rate": 5.342414851330824e-07, + "loss": 28.7579, + "step": 433180 + }, + { + "epoch": 0.8750712072302105, + "grad_norm": 139.83828735351562, + "learning_rate": 5.340845017199425e-07, + "loss": 22.7458, + "step": 433190 + }, + { + "epoch": 0.8750914078628943, + "grad_norm": 238.2321319580078, + "learning_rate": 5.339275400731331e-07, + "loss": 24.4583, + "step": 433200 + }, + { + "epoch": 0.8751116084955781, + "grad_norm": 358.0097961425781, + "learning_rate": 5.337706001934184e-07, + "loss": 12.0504, + "step": 433210 + }, + { + "epoch": 0.8751318091282619, + "grad_norm": 76.72681427001953, + "learning_rate": 5.33613682081564e-07, + "loss": 15.6278, + "step": 433220 + }, + { + "epoch": 0.8751520097609458, + "grad_norm": 533.8015747070312, + "learning_rate": 5.334567857383354e-07, + "loss": 29.6115, + "step": 433230 + }, + { + "epoch": 0.8751722103936296, + "grad_norm": 436.1002502441406, + "learning_rate": 5.332999111644971e-07, + "loss": 17.0068, + "step": 433240 + }, + { + "epoch": 0.8751924110263134, + "grad_norm": 372.8768310546875, + "learning_rate": 5.331430583608122e-07, + "loss": 17.8014, + "step": 433250 + }, + { + "epoch": 0.8752126116589971, + "grad_norm": 635.5821533203125, + "learning_rate": 5.329862273280462e-07, + "loss": 28.728, + "step": 433260 + }, + { + "epoch": 0.8752328122916809, + "grad_norm": 17.68754768371582, + "learning_rate": 5.328294180669658e-07, + "loss": 15.5665, + "step": 433270 + }, + { + "epoch": 0.8752530129243647, + "grad_norm": 138.2192840576172, + "learning_rate": 5.326726305783308e-07, + "loss": 25.2673, + "step": 433280 + }, + { + "epoch": 0.8752732135570486, + "grad_norm": 27.268882751464844, + "learning_rate": 5.325158648629075e-07, + "loss": 18.3581, + "step": 433290 + }, + { + "epoch": 0.8752934141897324, + "grad_norm": 136.46170043945312, + "learning_rate": 5.323591209214612e-07, + "loss": 21.0438, + "step": 433300 + }, + { + "epoch": 0.8753136148224162, + "grad_norm": 241.7820587158203, + "learning_rate": 5.322023987547547e-07, + "loss": 13.798, + "step": 433310 + }, + { + "epoch": 0.8753338154551, + "grad_norm": 367.8775329589844, + "learning_rate": 5.320456983635508e-07, + "loss": 16.049, + "step": 433320 + }, + { + "epoch": 0.8753540160877838, + "grad_norm": 531.8893432617188, + "learning_rate": 5.318890197486154e-07, + "loss": 20.8812, + "step": 433330 + }, + { + "epoch": 0.8753742167204677, + "grad_norm": 166.06248474121094, + "learning_rate": 5.317323629107108e-07, + "loss": 11.5503, + "step": 433340 + }, + { + "epoch": 0.8753944173531515, + "grad_norm": 269.6576232910156, + "learning_rate": 5.315757278505995e-07, + "loss": 18.7279, + "step": 433350 + }, + { + "epoch": 0.8754146179858353, + "grad_norm": 50.614158630371094, + "learning_rate": 5.314191145690473e-07, + "loss": 27.3247, + "step": 433360 + }, + { + "epoch": 0.8754348186185191, + "grad_norm": 360.9093017578125, + "learning_rate": 5.312625230668155e-07, + "loss": 22.016, + "step": 433370 + }, + { + "epoch": 0.875455019251203, + "grad_norm": 285.93548583984375, + "learning_rate": 5.311059533446694e-07, + "loss": 13.3252, + "step": 433380 + }, + { + "epoch": 0.8754752198838868, + "grad_norm": 201.05575561523438, + "learning_rate": 5.309494054033704e-07, + "loss": 11.7539, + "step": 433390 + }, + { + "epoch": 0.8754954205165706, + "grad_norm": 509.9866638183594, + "learning_rate": 5.307928792436812e-07, + "loss": 22.6127, + "step": 433400 + }, + { + "epoch": 0.8755156211492544, + "grad_norm": 513.7008056640625, + "learning_rate": 5.306363748663668e-07, + "loss": 26.1914, + "step": 433410 + }, + { + "epoch": 0.8755358217819382, + "grad_norm": 431.09210205078125, + "learning_rate": 5.304798922721871e-07, + "loss": 15.9364, + "step": 433420 + }, + { + "epoch": 0.875556022414622, + "grad_norm": 260.40850830078125, + "learning_rate": 5.303234314619071e-07, + "loss": 10.2867, + "step": 433430 + }, + { + "epoch": 0.8755762230473059, + "grad_norm": 502.4874267578125, + "learning_rate": 5.301669924362884e-07, + "loss": 23.3825, + "step": 433440 + }, + { + "epoch": 0.8755964236799897, + "grad_norm": 321.4576416015625, + "learning_rate": 5.300105751960943e-07, + "loss": 17.2701, + "step": 433450 + }, + { + "epoch": 0.8756166243126735, + "grad_norm": 139.70773315429688, + "learning_rate": 5.298541797420864e-07, + "loss": 12.7987, + "step": 433460 + }, + { + "epoch": 0.8756368249453573, + "grad_norm": 582.2942504882812, + "learning_rate": 5.296978060750257e-07, + "loss": 16.9464, + "step": 433470 + }, + { + "epoch": 0.8756570255780411, + "grad_norm": 487.0493469238281, + "learning_rate": 5.295414541956773e-07, + "loss": 24.6904, + "step": 433480 + }, + { + "epoch": 0.875677226210725, + "grad_norm": 231.55262756347656, + "learning_rate": 5.293851241048015e-07, + "loss": 11.211, + "step": 433490 + }, + { + "epoch": 0.8756974268434088, + "grad_norm": 110.7741928100586, + "learning_rate": 5.292288158031595e-07, + "loss": 15.8149, + "step": 433500 + }, + { + "epoch": 0.8757176274760925, + "grad_norm": 195.20785522460938, + "learning_rate": 5.290725292915138e-07, + "loss": 15.7708, + "step": 433510 + }, + { + "epoch": 0.8757378281087763, + "grad_norm": 195.65206909179688, + "learning_rate": 5.28916264570628e-07, + "loss": 20.5607, + "step": 433520 + }, + { + "epoch": 0.8757580287414601, + "grad_norm": 98.87440490722656, + "learning_rate": 5.287600216412609e-07, + "loss": 21.9518, + "step": 433530 + }, + { + "epoch": 0.875778229374144, + "grad_norm": 632.74267578125, + "learning_rate": 5.286038005041744e-07, + "loss": 27.4733, + "step": 433540 + }, + { + "epoch": 0.8757984300068278, + "grad_norm": 154.83694458007812, + "learning_rate": 5.28447601160132e-07, + "loss": 30.9031, + "step": 433550 + }, + { + "epoch": 0.8758186306395116, + "grad_norm": 567.2290649414062, + "learning_rate": 5.28291423609894e-07, + "loss": 20.3919, + "step": 433560 + }, + { + "epoch": 0.8758388312721954, + "grad_norm": 287.53558349609375, + "learning_rate": 5.281352678542195e-07, + "loss": 13.9235, + "step": 433570 + }, + { + "epoch": 0.8758590319048792, + "grad_norm": 43.357112884521484, + "learning_rate": 5.279791338938717e-07, + "loss": 22.6026, + "step": 433580 + }, + { + "epoch": 0.8758792325375631, + "grad_norm": 369.96173095703125, + "learning_rate": 5.278230217296132e-07, + "loss": 26.5123, + "step": 433590 + }, + { + "epoch": 0.8758994331702469, + "grad_norm": 405.46685791015625, + "learning_rate": 5.276669313622013e-07, + "loss": 18.1153, + "step": 433600 + }, + { + "epoch": 0.8759196338029307, + "grad_norm": 311.51251220703125, + "learning_rate": 5.275108627923975e-07, + "loss": 21.1751, + "step": 433610 + }, + { + "epoch": 0.8759398344356145, + "grad_norm": 322.4067077636719, + "learning_rate": 5.273548160209651e-07, + "loss": 20.5398, + "step": 433620 + }, + { + "epoch": 0.8759600350682983, + "grad_norm": 423.6556396484375, + "learning_rate": 5.271987910486625e-07, + "loss": 20.9919, + "step": 433630 + }, + { + "epoch": 0.8759802357009822, + "grad_norm": 623.9129638671875, + "learning_rate": 5.270427878762496e-07, + "loss": 16.2219, + "step": 433640 + }, + { + "epoch": 0.876000436333666, + "grad_norm": 353.4367370605469, + "learning_rate": 5.268868065044886e-07, + "loss": 18.939, + "step": 433650 + }, + { + "epoch": 0.8760206369663498, + "grad_norm": 227.28196716308594, + "learning_rate": 5.267308469341387e-07, + "loss": 20.1464, + "step": 433660 + }, + { + "epoch": 0.8760408375990336, + "grad_norm": 672.3381958007812, + "learning_rate": 5.265749091659589e-07, + "loss": 21.6426, + "step": 433670 + }, + { + "epoch": 0.8760610382317174, + "grad_norm": 361.6789855957031, + "learning_rate": 5.264189932007119e-07, + "loss": 13.6678, + "step": 433680 + }, + { + "epoch": 0.8760812388644013, + "grad_norm": 427.6011657714844, + "learning_rate": 5.262630990391549e-07, + "loss": 19.2339, + "step": 433690 + }, + { + "epoch": 0.8761014394970851, + "grad_norm": 503.2677307128906, + "learning_rate": 5.2610722668205e-07, + "loss": 19.9637, + "step": 433700 + }, + { + "epoch": 0.8761216401297689, + "grad_norm": 466.8319396972656, + "learning_rate": 5.259513761301549e-07, + "loss": 18.7197, + "step": 433710 + }, + { + "epoch": 0.8761418407624527, + "grad_norm": 389.1748046875, + "learning_rate": 5.257955473842314e-07, + "loss": 13.7403, + "step": 433720 + }, + { + "epoch": 0.8761620413951365, + "grad_norm": 258.1947937011719, + "learning_rate": 5.25639740445037e-07, + "loss": 17.3214, + "step": 433730 + }, + { + "epoch": 0.8761822420278204, + "grad_norm": 548.7359619140625, + "learning_rate": 5.254839553133312e-07, + "loss": 11.7706, + "step": 433740 + }, + { + "epoch": 0.8762024426605042, + "grad_norm": 1.1916102170944214, + "learning_rate": 5.253281919898751e-07, + "loss": 31.5134, + "step": 433750 + }, + { + "epoch": 0.876222643293188, + "grad_norm": 550.4560546875, + "learning_rate": 5.251724504754258e-07, + "loss": 10.0177, + "step": 433760 + }, + { + "epoch": 0.8762428439258717, + "grad_norm": 305.590576171875, + "learning_rate": 5.250167307707437e-07, + "loss": 17.4997, + "step": 433770 + }, + { + "epoch": 0.8762630445585555, + "grad_norm": 482.72845458984375, + "learning_rate": 5.24861032876588e-07, + "loss": 22.2059, + "step": 433780 + }, + { + "epoch": 0.8762832451912393, + "grad_norm": 302.8846435546875, + "learning_rate": 5.247053567937155e-07, + "loss": 21.9938, + "step": 433790 + }, + { + "epoch": 0.8763034458239232, + "grad_norm": 25.524728775024414, + "learning_rate": 5.245497025228874e-07, + "loss": 32.624, + "step": 433800 + }, + { + "epoch": 0.876323646456607, + "grad_norm": 563.2052612304688, + "learning_rate": 5.243940700648609e-07, + "loss": 14.6061, + "step": 433810 + }, + { + "epoch": 0.8763438470892908, + "grad_norm": 260.39739990234375, + "learning_rate": 5.242384594203942e-07, + "loss": 29.1842, + "step": 433820 + }, + { + "epoch": 0.8763640477219746, + "grad_norm": 326.4090270996094, + "learning_rate": 5.240828705902462e-07, + "loss": 6.721, + "step": 433830 + }, + { + "epoch": 0.8763842483546584, + "grad_norm": 285.9054870605469, + "learning_rate": 5.239273035751763e-07, + "loss": 28.3131, + "step": 433840 + }, + { + "epoch": 0.8764044489873423, + "grad_norm": 558.6888427734375, + "learning_rate": 5.237717583759421e-07, + "loss": 18.2302, + "step": 433850 + }, + { + "epoch": 0.8764246496200261, + "grad_norm": 381.3103332519531, + "learning_rate": 5.236162349933005e-07, + "loss": 16.1051, + "step": 433860 + }, + { + "epoch": 0.8764448502527099, + "grad_norm": 773.1160278320312, + "learning_rate": 5.234607334280117e-07, + "loss": 13.0957, + "step": 433870 + }, + { + "epoch": 0.8764650508853937, + "grad_norm": 564.5609130859375, + "learning_rate": 5.23305253680832e-07, + "loss": 19.1361, + "step": 433880 + }, + { + "epoch": 0.8764852515180775, + "grad_norm": 561.9713745117188, + "learning_rate": 5.231497957525184e-07, + "loss": 17.2799, + "step": 433890 + }, + { + "epoch": 0.8765054521507614, + "grad_norm": 131.5213165283203, + "learning_rate": 5.229943596438297e-07, + "loss": 18.6086, + "step": 433900 + }, + { + "epoch": 0.8765256527834452, + "grad_norm": 312.5356140136719, + "learning_rate": 5.22838945355525e-07, + "loss": 15.7515, + "step": 433910 + }, + { + "epoch": 0.876545853416129, + "grad_norm": 225.6712646484375, + "learning_rate": 5.2268355288836e-07, + "loss": 7.8059, + "step": 433920 + }, + { + "epoch": 0.8765660540488128, + "grad_norm": 158.82826232910156, + "learning_rate": 5.225281822430911e-07, + "loss": 17.0044, + "step": 433930 + }, + { + "epoch": 0.8765862546814966, + "grad_norm": 908.42578125, + "learning_rate": 5.22372833420478e-07, + "loss": 33.0427, + "step": 433940 + }, + { + "epoch": 0.8766064553141805, + "grad_norm": 103.19387817382812, + "learning_rate": 5.222175064212764e-07, + "loss": 9.7731, + "step": 433950 + }, + { + "epoch": 0.8766266559468643, + "grad_norm": 747.3312377929688, + "learning_rate": 5.220622012462429e-07, + "loss": 25.4204, + "step": 433960 + }, + { + "epoch": 0.8766468565795481, + "grad_norm": 188.887939453125, + "learning_rate": 5.219069178961361e-07, + "loss": 17.6252, + "step": 433970 + }, + { + "epoch": 0.8766670572122319, + "grad_norm": 572.9087524414062, + "learning_rate": 5.217516563717107e-07, + "loss": 26.657, + "step": 433980 + }, + { + "epoch": 0.8766872578449157, + "grad_norm": 1910.318603515625, + "learning_rate": 5.215964166737258e-07, + "loss": 22.2355, + "step": 433990 + }, + { + "epoch": 0.8767074584775996, + "grad_norm": 343.5889587402344, + "learning_rate": 5.214411988029355e-07, + "loss": 14.9556, + "step": 434000 + }, + { + "epoch": 0.8767276591102834, + "grad_norm": 452.5372619628906, + "learning_rate": 5.212860027600986e-07, + "loss": 11.9828, + "step": 434010 + }, + { + "epoch": 0.8767478597429671, + "grad_norm": 744.7176513671875, + "learning_rate": 5.21130828545971e-07, + "loss": 9.0645, + "step": 434020 + }, + { + "epoch": 0.8767680603756509, + "grad_norm": 317.92132568359375, + "learning_rate": 5.209756761613072e-07, + "loss": 21.652, + "step": 434030 + }, + { + "epoch": 0.8767882610083347, + "grad_norm": 102.97876739501953, + "learning_rate": 5.208205456068655e-07, + "loss": 20.3339, + "step": 434040 + }, + { + "epoch": 0.8768084616410186, + "grad_norm": 526.6455078125, + "learning_rate": 5.206654368834002e-07, + "loss": 29.8651, + "step": 434050 + }, + { + "epoch": 0.8768286622737024, + "grad_norm": 283.0181579589844, + "learning_rate": 5.205103499916697e-07, + "loss": 10.6281, + "step": 434060 + }, + { + "epoch": 0.8768488629063862, + "grad_norm": 1044.4290771484375, + "learning_rate": 5.203552849324284e-07, + "loss": 18.4944, + "step": 434070 + }, + { + "epoch": 0.87686906353907, + "grad_norm": 282.4901428222656, + "learning_rate": 5.202002417064306e-07, + "loss": 28.6363, + "step": 434080 + }, + { + "epoch": 0.8768892641717538, + "grad_norm": 400.8611755371094, + "learning_rate": 5.200452203144352e-07, + "loss": 22.2381, + "step": 434090 + }, + { + "epoch": 0.8769094648044377, + "grad_norm": 384.8634033203125, + "learning_rate": 5.198902207571955e-07, + "loss": 12.7948, + "step": 434100 + }, + { + "epoch": 0.8769296654371215, + "grad_norm": 218.68125915527344, + "learning_rate": 5.197352430354669e-07, + "loss": 19.107, + "step": 434110 + }, + { + "epoch": 0.8769498660698053, + "grad_norm": 254.05931091308594, + "learning_rate": 5.19580287150005e-07, + "loss": 33.2748, + "step": 434120 + }, + { + "epoch": 0.8769700667024891, + "grad_norm": 872.2964477539062, + "learning_rate": 5.194253531015675e-07, + "loss": 17.5088, + "step": 434130 + }, + { + "epoch": 0.8769902673351729, + "grad_norm": 439.51593017578125, + "learning_rate": 5.192704408909055e-07, + "loss": 17.5281, + "step": 434140 + }, + { + "epoch": 0.8770104679678568, + "grad_norm": 149.76513671875, + "learning_rate": 5.191155505187756e-07, + "loss": 17.1009, + "step": 434150 + }, + { + "epoch": 0.8770306686005406, + "grad_norm": 145.25714111328125, + "learning_rate": 5.189606819859344e-07, + "loss": 11.5807, + "step": 434160 + }, + { + "epoch": 0.8770508692332244, + "grad_norm": 513.3689575195312, + "learning_rate": 5.188058352931352e-07, + "loss": 18.9419, + "step": 434170 + }, + { + "epoch": 0.8770710698659082, + "grad_norm": 817.205322265625, + "learning_rate": 5.186510104411319e-07, + "loss": 21.3864, + "step": 434180 + }, + { + "epoch": 0.877091270498592, + "grad_norm": 311.3988952636719, + "learning_rate": 5.184962074306798e-07, + "loss": 16.6342, + "step": 434190 + }, + { + "epoch": 0.8771114711312759, + "grad_norm": 298.127197265625, + "learning_rate": 5.183414262625364e-07, + "loss": 12.2656, + "step": 434200 + }, + { + "epoch": 0.8771316717639597, + "grad_norm": 143.401611328125, + "learning_rate": 5.181866669374507e-07, + "loss": 12.541, + "step": 434210 + }, + { + "epoch": 0.8771518723966435, + "grad_norm": 945.0745239257812, + "learning_rate": 5.180319294561797e-07, + "loss": 20.0791, + "step": 434220 + }, + { + "epoch": 0.8771720730293273, + "grad_norm": 256.93524169921875, + "learning_rate": 5.178772138194782e-07, + "loss": 15.6674, + "step": 434230 + }, + { + "epoch": 0.8771922736620111, + "grad_norm": 766.1445922851562, + "learning_rate": 5.177225200281e-07, + "loss": 19.7, + "step": 434240 + }, + { + "epoch": 0.877212474294695, + "grad_norm": 280.7933044433594, + "learning_rate": 5.175678480827972e-07, + "loss": 15.9839, + "step": 434250 + }, + { + "epoch": 0.8772326749273788, + "grad_norm": 542.4453735351562, + "learning_rate": 5.174131979843266e-07, + "loss": 21.3026, + "step": 434260 + }, + { + "epoch": 0.8772528755600626, + "grad_norm": 401.15228271484375, + "learning_rate": 5.172585697334398e-07, + "loss": 22.7436, + "step": 434270 + }, + { + "epoch": 0.8772730761927463, + "grad_norm": 469.0462341308594, + "learning_rate": 5.171039633308905e-07, + "loss": 14.0386, + "step": 434280 + }, + { + "epoch": 0.8772932768254301, + "grad_norm": 233.45587158203125, + "learning_rate": 5.169493787774338e-07, + "loss": 13.6644, + "step": 434290 + }, + { + "epoch": 0.8773134774581139, + "grad_norm": 294.5813293457031, + "learning_rate": 5.167948160738206e-07, + "loss": 10.5622, + "step": 434300 + }, + { + "epoch": 0.8773336780907978, + "grad_norm": 11.028252601623535, + "learning_rate": 5.166402752208071e-07, + "loss": 8.3747, + "step": 434310 + }, + { + "epoch": 0.8773538787234816, + "grad_norm": 437.0497741699219, + "learning_rate": 5.164857562191439e-07, + "loss": 24.6234, + "step": 434320 + }, + { + "epoch": 0.8773740793561654, + "grad_norm": 663.6287231445312, + "learning_rate": 5.163312590695869e-07, + "loss": 21.4791, + "step": 434330 + }, + { + "epoch": 0.8773942799888492, + "grad_norm": 246.58670043945312, + "learning_rate": 5.161767837728871e-07, + "loss": 21.7751, + "step": 434340 + }, + { + "epoch": 0.877414480621533, + "grad_norm": 15.990674018859863, + "learning_rate": 5.160223303297967e-07, + "loss": 13.354, + "step": 434350 + }, + { + "epoch": 0.8774346812542169, + "grad_norm": 264.93389892578125, + "learning_rate": 5.15867898741071e-07, + "loss": 17.3039, + "step": 434360 + }, + { + "epoch": 0.8774548818869007, + "grad_norm": 337.698974609375, + "learning_rate": 5.1571348900746e-07, + "loss": 17.1138, + "step": 434370 + }, + { + "epoch": 0.8774750825195845, + "grad_norm": 445.1646423339844, + "learning_rate": 5.155591011297184e-07, + "loss": 33.2997, + "step": 434380 + }, + { + "epoch": 0.8774952831522683, + "grad_norm": 197.03204345703125, + "learning_rate": 5.154047351085983e-07, + "loss": 12.7977, + "step": 434390 + }, + { + "epoch": 0.8775154837849521, + "grad_norm": 262.495361328125, + "learning_rate": 5.152503909448503e-07, + "loss": 20.8684, + "step": 434400 + }, + { + "epoch": 0.877535684417636, + "grad_norm": 787.7115478515625, + "learning_rate": 5.150960686392293e-07, + "loss": 22.9129, + "step": 434410 + }, + { + "epoch": 0.8775558850503198, + "grad_norm": 477.249755859375, + "learning_rate": 5.149417681924856e-07, + "loss": 20.5048, + "step": 434420 + }, + { + "epoch": 0.8775760856830036, + "grad_norm": 42.68398666381836, + "learning_rate": 5.147874896053711e-07, + "loss": 14.379, + "step": 434430 + }, + { + "epoch": 0.8775962863156874, + "grad_norm": 145.18858337402344, + "learning_rate": 5.146332328786386e-07, + "loss": 21.0583, + "step": 434440 + }, + { + "epoch": 0.8776164869483712, + "grad_norm": 353.35400390625, + "learning_rate": 5.144789980130404e-07, + "loss": 19.2491, + "step": 434450 + }, + { + "epoch": 0.8776366875810551, + "grad_norm": 766.2012939453125, + "learning_rate": 5.143247850093274e-07, + "loss": 22.5219, + "step": 434460 + }, + { + "epoch": 0.8776568882137389, + "grad_norm": 329.0042419433594, + "learning_rate": 5.141705938682506e-07, + "loss": 20.0049, + "step": 434470 + }, + { + "epoch": 0.8776770888464227, + "grad_norm": 256.41632080078125, + "learning_rate": 5.140164245905633e-07, + "loss": 13.8661, + "step": 434480 + }, + { + "epoch": 0.8776972894791065, + "grad_norm": 472.8299255371094, + "learning_rate": 5.138622771770157e-07, + "loss": 19.9039, + "step": 434490 + }, + { + "epoch": 0.8777174901117903, + "grad_norm": 310.2403564453125, + "learning_rate": 5.137081516283582e-07, + "loss": 14.9185, + "step": 434500 + }, + { + "epoch": 0.8777376907444742, + "grad_norm": 534.4397583007812, + "learning_rate": 5.135540479453432e-07, + "loss": 17.3908, + "step": 434510 + }, + { + "epoch": 0.877757891377158, + "grad_norm": 316.0336608886719, + "learning_rate": 5.133999661287226e-07, + "loss": 26.7963, + "step": 434520 + }, + { + "epoch": 0.8777780920098417, + "grad_norm": 247.14096069335938, + "learning_rate": 5.13245906179246e-07, + "loss": 28.1687, + "step": 434530 + }, + { + "epoch": 0.8777982926425255, + "grad_norm": 113.46258544921875, + "learning_rate": 5.130918680976643e-07, + "loss": 13.7509, + "step": 434540 + }, + { + "epoch": 0.8778184932752093, + "grad_norm": 235.147705078125, + "learning_rate": 5.129378518847295e-07, + "loss": 23.4125, + "step": 434550 + }, + { + "epoch": 0.8778386939078932, + "grad_norm": 383.57684326171875, + "learning_rate": 5.127838575411908e-07, + "loss": 14.9999, + "step": 434560 + }, + { + "epoch": 0.877858894540577, + "grad_norm": 352.58856201171875, + "learning_rate": 5.126298850677991e-07, + "loss": 13.527, + "step": 434570 + }, + { + "epoch": 0.8778790951732608, + "grad_norm": 321.5787353515625, + "learning_rate": 5.124759344653057e-07, + "loss": 8.4193, + "step": 434580 + }, + { + "epoch": 0.8778992958059446, + "grad_norm": 500.7260437011719, + "learning_rate": 5.123220057344597e-07, + "loss": 22.2562, + "step": 434590 + }, + { + "epoch": 0.8779194964386284, + "grad_norm": 445.0096130371094, + "learning_rate": 5.121680988760125e-07, + "loss": 19.0204, + "step": 434600 + }, + { + "epoch": 0.8779396970713123, + "grad_norm": 222.2227325439453, + "learning_rate": 5.120142138907131e-07, + "loss": 37.4008, + "step": 434610 + }, + { + "epoch": 0.8779598977039961, + "grad_norm": 418.1203918457031, + "learning_rate": 5.11860350779313e-07, + "loss": 19.7539, + "step": 434620 + }, + { + "epoch": 0.8779800983366799, + "grad_norm": 516.3267822265625, + "learning_rate": 5.11706509542561e-07, + "loss": 14.7627, + "step": 434630 + }, + { + "epoch": 0.8780002989693637, + "grad_norm": 933.3505859375, + "learning_rate": 5.115526901812062e-07, + "loss": 25.2065, + "step": 434640 + }, + { + "epoch": 0.8780204996020475, + "grad_norm": 410.081787109375, + "learning_rate": 5.113988926960001e-07, + "loss": 15.1906, + "step": 434650 + }, + { + "epoch": 0.8780407002347314, + "grad_norm": 320.27349853515625, + "learning_rate": 5.112451170876903e-07, + "loss": 16.609, + "step": 434660 + }, + { + "epoch": 0.8780609008674152, + "grad_norm": 259.0826110839844, + "learning_rate": 5.110913633570286e-07, + "loss": 31.0852, + "step": 434670 + }, + { + "epoch": 0.878081101500099, + "grad_norm": 314.41046142578125, + "learning_rate": 5.109376315047632e-07, + "loss": 11.9574, + "step": 434680 + }, + { + "epoch": 0.8781013021327828, + "grad_norm": 643.1636352539062, + "learning_rate": 5.107839215316424e-07, + "loss": 27.3094, + "step": 434690 + }, + { + "epoch": 0.8781215027654666, + "grad_norm": 439.34320068359375, + "learning_rate": 5.106302334384172e-07, + "loss": 16.708, + "step": 434700 + }, + { + "epoch": 0.8781417033981505, + "grad_norm": 979.7433471679688, + "learning_rate": 5.104765672258355e-07, + "loss": 25.1756, + "step": 434710 + }, + { + "epoch": 0.8781619040308343, + "grad_norm": 422.1972961425781, + "learning_rate": 5.103229228946455e-07, + "loss": 11.8049, + "step": 434720 + }, + { + "epoch": 0.8781821046635181, + "grad_norm": 145.0203857421875, + "learning_rate": 5.101693004455977e-07, + "loss": 10.6392, + "step": 434730 + }, + { + "epoch": 0.8782023052962019, + "grad_norm": 377.1507568359375, + "learning_rate": 5.100156998794415e-07, + "loss": 18.1032, + "step": 434740 + }, + { + "epoch": 0.8782225059288857, + "grad_norm": 272.1239929199219, + "learning_rate": 5.098621211969224e-07, + "loss": 21.4669, + "step": 434750 + }, + { + "epoch": 0.8782427065615696, + "grad_norm": 46.512596130371094, + "learning_rate": 5.09708564398791e-07, + "loss": 13.3603, + "step": 434760 + }, + { + "epoch": 0.8782629071942534, + "grad_norm": 281.3079528808594, + "learning_rate": 5.095550294857959e-07, + "loss": 14.2644, + "step": 434770 + }, + { + "epoch": 0.8782831078269372, + "grad_norm": 374.24896240234375, + "learning_rate": 5.094015164586852e-07, + "loss": 14.84, + "step": 434780 + }, + { + "epoch": 0.8783033084596209, + "grad_norm": 364.0821533203125, + "learning_rate": 5.092480253182058e-07, + "loss": 23.2679, + "step": 434790 + }, + { + "epoch": 0.8783235090923047, + "grad_norm": 162.59280395507812, + "learning_rate": 5.090945560651073e-07, + "loss": 15.5259, + "step": 434800 + }, + { + "epoch": 0.8783437097249885, + "grad_norm": 276.9872741699219, + "learning_rate": 5.08941108700139e-07, + "loss": 8.1555, + "step": 434810 + }, + { + "epoch": 0.8783639103576724, + "grad_norm": 326.7541198730469, + "learning_rate": 5.087876832240446e-07, + "loss": 27.8575, + "step": 434820 + }, + { + "epoch": 0.8783841109903562, + "grad_norm": 33.11380386352539, + "learning_rate": 5.086342796375749e-07, + "loss": 7.0672, + "step": 434830 + }, + { + "epoch": 0.87840431162304, + "grad_norm": 153.7439727783203, + "learning_rate": 5.084808979414779e-07, + "loss": 14.9328, + "step": 434840 + }, + { + "epoch": 0.8784245122557238, + "grad_norm": 171.47793579101562, + "learning_rate": 5.083275381364999e-07, + "loss": 26.4409, + "step": 434850 + }, + { + "epoch": 0.8784447128884076, + "grad_norm": 350.4378662109375, + "learning_rate": 5.081742002233881e-07, + "loss": 16.2276, + "step": 434860 + }, + { + "epoch": 0.8784649135210915, + "grad_norm": 450.28155517578125, + "learning_rate": 5.080208842028911e-07, + "loss": 25.9278, + "step": 434870 + }, + { + "epoch": 0.8784851141537753, + "grad_norm": 448.4052734375, + "learning_rate": 5.078675900757557e-07, + "loss": 20.3733, + "step": 434880 + }, + { + "epoch": 0.8785053147864591, + "grad_norm": 350.4577941894531, + "learning_rate": 5.07714317842728e-07, + "loss": 12.8354, + "step": 434890 + }, + { + "epoch": 0.8785255154191429, + "grad_norm": 610.2189331054688, + "learning_rate": 5.075610675045567e-07, + "loss": 17.948, + "step": 434900 + }, + { + "epoch": 0.8785457160518267, + "grad_norm": 399.9798889160156, + "learning_rate": 5.074078390619869e-07, + "loss": 21.844, + "step": 434910 + }, + { + "epoch": 0.8785659166845106, + "grad_norm": 383.8321838378906, + "learning_rate": 5.072546325157673e-07, + "loss": 13.7429, + "step": 434920 + }, + { + "epoch": 0.8785861173171944, + "grad_norm": 216.4840850830078, + "learning_rate": 5.071014478666425e-07, + "loss": 14.4042, + "step": 434930 + }, + { + "epoch": 0.8786063179498782, + "grad_norm": 457.54327392578125, + "learning_rate": 5.069482851153618e-07, + "loss": 18.8779, + "step": 434940 + }, + { + "epoch": 0.878626518582562, + "grad_norm": 32.08904266357422, + "learning_rate": 5.0679514426267e-07, + "loss": 15.2914, + "step": 434950 + }, + { + "epoch": 0.8786467192152458, + "grad_norm": 35.61134719848633, + "learning_rate": 5.06642025309313e-07, + "loss": 11.8779, + "step": 434960 + }, + { + "epoch": 0.8786669198479297, + "grad_norm": 723.7139282226562, + "learning_rate": 5.064889282560382e-07, + "loss": 20.9908, + "step": 434970 + }, + { + "epoch": 0.8786871204806135, + "grad_norm": 102.7997817993164, + "learning_rate": 5.063358531035906e-07, + "loss": 13.4488, + "step": 434980 + }, + { + "epoch": 0.8787073211132973, + "grad_norm": 352.2797546386719, + "learning_rate": 5.06182799852718e-07, + "loss": 13.2957, + "step": 434990 + }, + { + "epoch": 0.8787275217459811, + "grad_norm": 886.9483032226562, + "learning_rate": 5.06029768504166e-07, + "loss": 22.359, + "step": 435000 + }, + { + "epoch": 0.8787477223786649, + "grad_norm": 218.36572265625, + "learning_rate": 5.058767590586783e-07, + "loss": 19.1126, + "step": 435010 + }, + { + "epoch": 0.8787679230113488, + "grad_norm": 287.1523742675781, + "learning_rate": 5.057237715170032e-07, + "loss": 14.2723, + "step": 435020 + }, + { + "epoch": 0.8787881236440326, + "grad_norm": 420.1017761230469, + "learning_rate": 5.055708058798853e-07, + "loss": 27.4834, + "step": 435030 + }, + { + "epoch": 0.8788083242767164, + "grad_norm": 525.8872680664062, + "learning_rate": 5.054178621480694e-07, + "loss": 16.9919, + "step": 435040 + }, + { + "epoch": 0.8788285249094001, + "grad_norm": 232.071533203125, + "learning_rate": 5.052649403223015e-07, + "loss": 18.4302, + "step": 435050 + }, + { + "epoch": 0.8788487255420839, + "grad_norm": 342.8013610839844, + "learning_rate": 5.051120404033283e-07, + "loss": 18.3008, + "step": 435060 + }, + { + "epoch": 0.8788689261747678, + "grad_norm": 6.585102081298828, + "learning_rate": 5.049591623918937e-07, + "loss": 26.1853, + "step": 435070 + }, + { + "epoch": 0.8788891268074516, + "grad_norm": 368.1114501953125, + "learning_rate": 5.04806306288742e-07, + "loss": 25.3293, + "step": 435080 + }, + { + "epoch": 0.8789093274401354, + "grad_norm": 515.1712036132812, + "learning_rate": 5.046534720946206e-07, + "loss": 16.9136, + "step": 435090 + }, + { + "epoch": 0.8789295280728192, + "grad_norm": 16.89857292175293, + "learning_rate": 5.045006598102725e-07, + "loss": 25.171, + "step": 435100 + }, + { + "epoch": 0.878949728705503, + "grad_norm": 346.8264465332031, + "learning_rate": 5.043478694364423e-07, + "loss": 16.4898, + "step": 435110 + }, + { + "epoch": 0.8789699293381869, + "grad_norm": 332.0619201660156, + "learning_rate": 5.04195100973875e-07, + "loss": 17.1752, + "step": 435120 + }, + { + "epoch": 0.8789901299708707, + "grad_norm": 107.52465057373047, + "learning_rate": 5.040423544233164e-07, + "loss": 17.1065, + "step": 435130 + }, + { + "epoch": 0.8790103306035545, + "grad_norm": 433.8957824707031, + "learning_rate": 5.0388962978551e-07, + "loss": 15.4823, + "step": 435140 + }, + { + "epoch": 0.8790305312362383, + "grad_norm": 242.65370178222656, + "learning_rate": 5.037369270611997e-07, + "loss": 14.8826, + "step": 435150 + }, + { + "epoch": 0.8790507318689221, + "grad_norm": 471.4053649902344, + "learning_rate": 5.035842462511309e-07, + "loss": 33.3922, + "step": 435160 + }, + { + "epoch": 0.879070932501606, + "grad_norm": 67.55804443359375, + "learning_rate": 5.034315873560475e-07, + "loss": 15.3738, + "step": 435170 + }, + { + "epoch": 0.8790911331342898, + "grad_norm": 534.8261108398438, + "learning_rate": 5.032789503766922e-07, + "loss": 19.2829, + "step": 435180 + }, + { + "epoch": 0.8791113337669736, + "grad_norm": 511.7774353027344, + "learning_rate": 5.031263353138105e-07, + "loss": 27.3329, + "step": 435190 + }, + { + "epoch": 0.8791315343996574, + "grad_norm": 279.4249267578125, + "learning_rate": 5.029737421681446e-07, + "loss": 28.538, + "step": 435200 + }, + { + "epoch": 0.8791517350323412, + "grad_norm": 345.3414306640625, + "learning_rate": 5.028211709404407e-07, + "loss": 24.7293, + "step": 435210 + }, + { + "epoch": 0.879171935665025, + "grad_norm": 673.9608764648438, + "learning_rate": 5.026686216314397e-07, + "loss": 16.0036, + "step": 435220 + }, + { + "epoch": 0.8791921362977089, + "grad_norm": 195.92652893066406, + "learning_rate": 5.025160942418872e-07, + "loss": 16.4735, + "step": 435230 + }, + { + "epoch": 0.8792123369303927, + "grad_norm": 606.4151611328125, + "learning_rate": 5.023635887725259e-07, + "loss": 17.4006, + "step": 435240 + }, + { + "epoch": 0.8792325375630765, + "grad_norm": 447.26043701171875, + "learning_rate": 5.022111052240985e-07, + "loss": 21.1823, + "step": 435250 + }, + { + "epoch": 0.8792527381957603, + "grad_norm": 122.67220306396484, + "learning_rate": 5.020586435973491e-07, + "loss": 25.98, + "step": 435260 + }, + { + "epoch": 0.8792729388284442, + "grad_norm": 458.9827575683594, + "learning_rate": 5.019062038930195e-07, + "loss": 21.6993, + "step": 435270 + }, + { + "epoch": 0.879293139461128, + "grad_norm": 315.67913818359375, + "learning_rate": 5.017537861118543e-07, + "loss": 27.674, + "step": 435280 + }, + { + "epoch": 0.8793133400938118, + "grad_norm": 158.3207244873047, + "learning_rate": 5.016013902545957e-07, + "loss": 13.9432, + "step": 435290 + }, + { + "epoch": 0.8793335407264955, + "grad_norm": 261.7013244628906, + "learning_rate": 5.014490163219854e-07, + "loss": 16.5056, + "step": 435300 + }, + { + "epoch": 0.8793537413591793, + "grad_norm": 59.75825500488281, + "learning_rate": 5.012966643147682e-07, + "loss": 10.7603, + "step": 435310 + }, + { + "epoch": 0.8793739419918631, + "grad_norm": 483.52099609375, + "learning_rate": 5.011443342336852e-07, + "loss": 8.7847, + "step": 435320 + }, + { + "epoch": 0.879394142624547, + "grad_norm": 192.5576934814453, + "learning_rate": 5.009920260794782e-07, + "loss": 9.5417, + "step": 435330 + }, + { + "epoch": 0.8794143432572308, + "grad_norm": 318.4565124511719, + "learning_rate": 5.008397398528903e-07, + "loss": 13.3563, + "step": 435340 + }, + { + "epoch": 0.8794345438899146, + "grad_norm": 380.82891845703125, + "learning_rate": 5.006874755546654e-07, + "loss": 17.2505, + "step": 435350 + }, + { + "epoch": 0.8794547445225984, + "grad_norm": 852.404296875, + "learning_rate": 5.005352331855423e-07, + "loss": 22.5348, + "step": 435360 + }, + { + "epoch": 0.8794749451552822, + "grad_norm": 502.7873229980469, + "learning_rate": 5.00383012746265e-07, + "loss": 34.0941, + "step": 435370 + }, + { + "epoch": 0.8794951457879661, + "grad_norm": 190.71434020996094, + "learning_rate": 5.002308142375762e-07, + "loss": 13.873, + "step": 435380 + }, + { + "epoch": 0.8795153464206499, + "grad_norm": 77.80549621582031, + "learning_rate": 5.000786376602162e-07, + "loss": 12.7138, + "step": 435390 + }, + { + "epoch": 0.8795355470533337, + "grad_norm": 485.7001037597656, + "learning_rate": 4.99926483014927e-07, + "loss": 13.9821, + "step": 435400 + }, + { + "epoch": 0.8795557476860175, + "grad_norm": 265.3295593261719, + "learning_rate": 4.997743503024494e-07, + "loss": 19.1495, + "step": 435410 + }, + { + "epoch": 0.8795759483187013, + "grad_norm": 141.0780792236328, + "learning_rate": 4.996222395235283e-07, + "loss": 22.0904, + "step": 435420 + }, + { + "epoch": 0.8795961489513852, + "grad_norm": 273.9278869628906, + "learning_rate": 4.994701506789007e-07, + "loss": 17.5997, + "step": 435430 + }, + { + "epoch": 0.879616349584069, + "grad_norm": 353.7311706542969, + "learning_rate": 4.99318083769309e-07, + "loss": 13.0439, + "step": 435440 + }, + { + "epoch": 0.8796365502167528, + "grad_norm": 721.9375610351562, + "learning_rate": 4.991660387954967e-07, + "loss": 22.3529, + "step": 435450 + }, + { + "epoch": 0.8796567508494366, + "grad_norm": 301.629638671875, + "learning_rate": 4.990140157582036e-07, + "loss": 14.4415, + "step": 435460 + }, + { + "epoch": 0.8796769514821204, + "grad_norm": 149.6297607421875, + "learning_rate": 4.988620146581685e-07, + "loss": 16.6529, + "step": 435470 + }, + { + "epoch": 0.8796971521148043, + "grad_norm": 219.41262817382812, + "learning_rate": 4.987100354961355e-07, + "loss": 14.8243, + "step": 435480 + }, + { + "epoch": 0.8797173527474881, + "grad_norm": 404.56951904296875, + "learning_rate": 4.985580782728433e-07, + "loss": 18.9657, + "step": 435490 + }, + { + "epoch": 0.8797375533801719, + "grad_norm": 428.6075744628906, + "learning_rate": 4.984061429890324e-07, + "loss": 13.3605, + "step": 435500 + }, + { + "epoch": 0.8797577540128557, + "grad_norm": 303.6961975097656, + "learning_rate": 4.98254229645444e-07, + "loss": 23.9782, + "step": 435510 + }, + { + "epoch": 0.8797779546455395, + "grad_norm": 307.2866516113281, + "learning_rate": 4.981023382428196e-07, + "loss": 22.7079, + "step": 435520 + }, + { + "epoch": 0.8797981552782234, + "grad_norm": 423.6874084472656, + "learning_rate": 4.979504687818987e-07, + "loss": 21.0905, + "step": 435530 + }, + { + "epoch": 0.8798183559109072, + "grad_norm": 481.9783020019531, + "learning_rate": 4.977986212634195e-07, + "loss": 21.2299, + "step": 435540 + }, + { + "epoch": 0.879838556543591, + "grad_norm": 610.5613403320312, + "learning_rate": 4.976467956881254e-07, + "loss": 20.8345, + "step": 435550 + }, + { + "epoch": 0.8798587571762747, + "grad_norm": 238.90846252441406, + "learning_rate": 4.97494992056754e-07, + "loss": 18.398, + "step": 435560 + }, + { + "epoch": 0.8798789578089585, + "grad_norm": 228.98049926757812, + "learning_rate": 4.973432103700454e-07, + "loss": 25.6139, + "step": 435570 + }, + { + "epoch": 0.8798991584416423, + "grad_norm": 269.6488037109375, + "learning_rate": 4.971914506287407e-07, + "loss": 21.9952, + "step": 435580 + }, + { + "epoch": 0.8799193590743262, + "grad_norm": 191.76803588867188, + "learning_rate": 4.97039712833578e-07, + "loss": 30.8179, + "step": 435590 + }, + { + "epoch": 0.87993955970701, + "grad_norm": 510.7918701171875, + "learning_rate": 4.968879969852985e-07, + "loss": 21.6079, + "step": 435600 + }, + { + "epoch": 0.8799597603396938, + "grad_norm": 22.066001892089844, + "learning_rate": 4.967363030846406e-07, + "loss": 22.5894, + "step": 435610 + }, + { + "epoch": 0.8799799609723776, + "grad_norm": 355.6514892578125, + "learning_rate": 4.965846311323431e-07, + "loss": 24.1495, + "step": 435620 + }, + { + "epoch": 0.8800001616050614, + "grad_norm": 226.7723388671875, + "learning_rate": 4.964329811291463e-07, + "loss": 11.3769, + "step": 435630 + }, + { + "epoch": 0.8800203622377453, + "grad_norm": 76.03665161132812, + "learning_rate": 4.962813530757893e-07, + "loss": 14.7804, + "step": 435640 + }, + { + "epoch": 0.8800405628704291, + "grad_norm": 199.2616729736328, + "learning_rate": 4.961297469730097e-07, + "loss": 13.2704, + "step": 435650 + }, + { + "epoch": 0.8800607635031129, + "grad_norm": 465.0576171875, + "learning_rate": 4.959781628215476e-07, + "loss": 19.5031, + "step": 435660 + }, + { + "epoch": 0.8800809641357967, + "grad_norm": 108.87104797363281, + "learning_rate": 4.95826600622143e-07, + "loss": 15.921, + "step": 435670 + }, + { + "epoch": 0.8801011647684805, + "grad_norm": 103.40303802490234, + "learning_rate": 4.956750603755328e-07, + "loss": 13.7844, + "step": 435680 + }, + { + "epoch": 0.8801213654011644, + "grad_norm": 320.8612365722656, + "learning_rate": 4.95523542082455e-07, + "loss": 15.81, + "step": 435690 + }, + { + "epoch": 0.8801415660338482, + "grad_norm": 696.59326171875, + "learning_rate": 4.9537204574365e-07, + "loss": 24.1962, + "step": 435700 + }, + { + "epoch": 0.880161766666532, + "grad_norm": 989.5188598632812, + "learning_rate": 4.952205713598557e-07, + "loss": 21.6382, + "step": 435710 + }, + { + "epoch": 0.8801819672992158, + "grad_norm": 198.38470458984375, + "learning_rate": 4.950691189318086e-07, + "loss": 12.5203, + "step": 435720 + }, + { + "epoch": 0.8802021679318996, + "grad_norm": 232.7144317626953, + "learning_rate": 4.949176884602486e-07, + "loss": 40.752, + "step": 435730 + }, + { + "epoch": 0.8802223685645835, + "grad_norm": 314.38751220703125, + "learning_rate": 4.947662799459152e-07, + "loss": 19.3201, + "step": 435740 + }, + { + "epoch": 0.8802425691972673, + "grad_norm": 347.6485595703125, + "learning_rate": 4.946148933895423e-07, + "loss": 22.7622, + "step": 435750 + }, + { + "epoch": 0.8802627698299511, + "grad_norm": 87.8727035522461, + "learning_rate": 4.944635287918703e-07, + "loss": 10.1187, + "step": 435760 + }, + { + "epoch": 0.8802829704626349, + "grad_norm": 192.29734802246094, + "learning_rate": 4.943121861536376e-07, + "loss": 20.3241, + "step": 435770 + }, + { + "epoch": 0.8803031710953187, + "grad_norm": 254.23399353027344, + "learning_rate": 4.941608654755808e-07, + "loss": 10.8488, + "step": 435780 + }, + { + "epoch": 0.8803233717280026, + "grad_norm": 515.1815185546875, + "learning_rate": 4.940095667584366e-07, + "loss": 21.5697, + "step": 435790 + }, + { + "epoch": 0.8803435723606864, + "grad_norm": 158.14749145507812, + "learning_rate": 4.938582900029437e-07, + "loss": 21.8464, + "step": 435800 + }, + { + "epoch": 0.8803637729933701, + "grad_norm": 957.723876953125, + "learning_rate": 4.937070352098384e-07, + "loss": 24.3145, + "step": 435810 + }, + { + "epoch": 0.8803839736260539, + "grad_norm": 341.580322265625, + "learning_rate": 4.935558023798592e-07, + "loss": 18.2231, + "step": 435820 + }, + { + "epoch": 0.8804041742587377, + "grad_norm": 276.53717041015625, + "learning_rate": 4.934045915137419e-07, + "loss": 9.082, + "step": 435830 + }, + { + "epoch": 0.8804243748914216, + "grad_norm": 673.6719970703125, + "learning_rate": 4.932534026122249e-07, + "loss": 12.0788, + "step": 435840 + }, + { + "epoch": 0.8804445755241054, + "grad_norm": 303.8342590332031, + "learning_rate": 4.931022356760439e-07, + "loss": 22.0684, + "step": 435850 + }, + { + "epoch": 0.8804647761567892, + "grad_norm": 380.82440185546875, + "learning_rate": 4.929510907059354e-07, + "loss": 20.524, + "step": 435860 + }, + { + "epoch": 0.880484976789473, + "grad_norm": 22.417442321777344, + "learning_rate": 4.927999677026374e-07, + "loss": 14.9668, + "step": 435870 + }, + { + "epoch": 0.8805051774221568, + "grad_norm": 0.30903351306915283, + "learning_rate": 4.926488666668844e-07, + "loss": 12.1088, + "step": 435880 + }, + { + "epoch": 0.8805253780548407, + "grad_norm": 529.1118774414062, + "learning_rate": 4.924977875994159e-07, + "loss": 19.5259, + "step": 435890 + }, + { + "epoch": 0.8805455786875245, + "grad_norm": 357.0335388183594, + "learning_rate": 4.92346730500966e-07, + "loss": 9.633, + "step": 435900 + }, + { + "epoch": 0.8805657793202083, + "grad_norm": 185.8363037109375, + "learning_rate": 4.921956953722701e-07, + "loss": 9.942, + "step": 435910 + }, + { + "epoch": 0.8805859799528921, + "grad_norm": 256.4708557128906, + "learning_rate": 4.920446822140673e-07, + "loss": 10.8701, + "step": 435920 + }, + { + "epoch": 0.8806061805855759, + "grad_norm": 474.8191833496094, + "learning_rate": 4.918936910270916e-07, + "loss": 12.3728, + "step": 435930 + }, + { + "epoch": 0.8806263812182598, + "grad_norm": 217.32952880859375, + "learning_rate": 4.917427218120785e-07, + "loss": 21.8445, + "step": 435940 + }, + { + "epoch": 0.8806465818509436, + "grad_norm": 248.072265625, + "learning_rate": 4.915917745697645e-07, + "loss": 20.431, + "step": 435950 + }, + { + "epoch": 0.8806667824836274, + "grad_norm": 288.0353698730469, + "learning_rate": 4.914408493008871e-07, + "loss": 25.1107, + "step": 435960 + }, + { + "epoch": 0.8806869831163112, + "grad_norm": 106.0359115600586, + "learning_rate": 4.912899460061787e-07, + "loss": 12.067, + "step": 435970 + }, + { + "epoch": 0.880707183748995, + "grad_norm": 354.09881591796875, + "learning_rate": 4.911390646863757e-07, + "loss": 17.9086, + "step": 435980 + }, + { + "epoch": 0.8807273843816789, + "grad_norm": 540.8861694335938, + "learning_rate": 4.909882053422154e-07, + "loss": 18.2216, + "step": 435990 + }, + { + "epoch": 0.8807475850143627, + "grad_norm": 184.07806396484375, + "learning_rate": 4.908373679744316e-07, + "loss": 17.5554, + "step": 436000 + }, + { + "epoch": 0.8807677856470465, + "grad_norm": 177.03599548339844, + "learning_rate": 4.90686552583759e-07, + "loss": 18.7642, + "step": 436010 + }, + { + "epoch": 0.8807879862797303, + "grad_norm": 166.44651794433594, + "learning_rate": 4.905357591709325e-07, + "loss": 43.1835, + "step": 436020 + }, + { + "epoch": 0.8808081869124141, + "grad_norm": 321.760986328125, + "learning_rate": 4.9038498773669e-07, + "loss": 16.6634, + "step": 436030 + }, + { + "epoch": 0.880828387545098, + "grad_norm": 341.0688171386719, + "learning_rate": 4.902342382817626e-07, + "loss": 32.7243, + "step": 436040 + }, + { + "epoch": 0.8808485881777818, + "grad_norm": 302.72125244140625, + "learning_rate": 4.900835108068863e-07, + "loss": 16.4093, + "step": 436050 + }, + { + "epoch": 0.8808687888104656, + "grad_norm": 336.9021301269531, + "learning_rate": 4.899328053127966e-07, + "loss": 21.199, + "step": 436060 + }, + { + "epoch": 0.8808889894431493, + "grad_norm": 434.4256896972656, + "learning_rate": 4.89782121800228e-07, + "loss": 14.7242, + "step": 436070 + }, + { + "epoch": 0.8809091900758331, + "grad_norm": 167.6675567626953, + "learning_rate": 4.896314602699126e-07, + "loss": 15.5623, + "step": 436080 + }, + { + "epoch": 0.880929390708517, + "grad_norm": 460.88494873046875, + "learning_rate": 4.894808207225882e-07, + "loss": 21.3126, + "step": 436090 + }, + { + "epoch": 0.8809495913412008, + "grad_norm": 345.532958984375, + "learning_rate": 4.893302031589864e-07, + "loss": 14.3713, + "step": 436100 + }, + { + "epoch": 0.8809697919738846, + "grad_norm": 349.746826171875, + "learning_rate": 4.891796075798416e-07, + "loss": 39.5377, + "step": 436110 + }, + { + "epoch": 0.8809899926065684, + "grad_norm": 411.3088684082031, + "learning_rate": 4.890290339858883e-07, + "loss": 13.9997, + "step": 436120 + }, + { + "epoch": 0.8810101932392522, + "grad_norm": 324.4735107421875, + "learning_rate": 4.888784823778614e-07, + "loss": 15.4449, + "step": 436130 + }, + { + "epoch": 0.881030393871936, + "grad_norm": 259.8567199707031, + "learning_rate": 4.887279527564936e-07, + "loss": 9.0276, + "step": 436140 + }, + { + "epoch": 0.8810505945046199, + "grad_norm": 444.4129943847656, + "learning_rate": 4.885774451225178e-07, + "loss": 15.4865, + "step": 436150 + }, + { + "epoch": 0.8810707951373037, + "grad_norm": 343.4646911621094, + "learning_rate": 4.884269594766689e-07, + "loss": 18.7254, + "step": 436160 + }, + { + "epoch": 0.8810909957699875, + "grad_norm": 232.9482421875, + "learning_rate": 4.8827649581968e-07, + "loss": 16.868, + "step": 436170 + }, + { + "epoch": 0.8811111964026713, + "grad_norm": 549.541015625, + "learning_rate": 4.881260541522831e-07, + "loss": 28.0799, + "step": 436180 + }, + { + "epoch": 0.8811313970353551, + "grad_norm": 612.6248168945312, + "learning_rate": 4.87975634475214e-07, + "loss": 20.9709, + "step": 436190 + }, + { + "epoch": 0.881151597668039, + "grad_norm": 501.504638671875, + "learning_rate": 4.878252367892033e-07, + "loss": 16.7092, + "step": 436200 + }, + { + "epoch": 0.8811717983007228, + "grad_norm": 297.49261474609375, + "learning_rate": 4.87674861094986e-07, + "loss": 19.5409, + "step": 436210 + }, + { + "epoch": 0.8811919989334066, + "grad_norm": 224.5021209716797, + "learning_rate": 4.875245073932944e-07, + "loss": 10.7612, + "step": 436220 + }, + { + "epoch": 0.8812121995660904, + "grad_norm": 361.38916015625, + "learning_rate": 4.873741756848594e-07, + "loss": 23.1716, + "step": 436230 + }, + { + "epoch": 0.8812324001987742, + "grad_norm": 224.0435333251953, + "learning_rate": 4.87223865970417e-07, + "loss": 12.7234, + "step": 436240 + }, + { + "epoch": 0.8812526008314581, + "grad_norm": 342.0216369628906, + "learning_rate": 4.87073578250698e-07, + "loss": 13.4458, + "step": 436250 + }, + { + "epoch": 0.8812728014641419, + "grad_norm": 313.01409912109375, + "learning_rate": 4.869233125264339e-07, + "loss": 8.6048, + "step": 436260 + }, + { + "epoch": 0.8812930020968257, + "grad_norm": 258.0538330078125, + "learning_rate": 4.867730687983585e-07, + "loss": 15.3131, + "step": 436270 + }, + { + "epoch": 0.8813132027295095, + "grad_norm": 425.1286926269531, + "learning_rate": 4.866228470672041e-07, + "loss": 11.7544, + "step": 436280 + }, + { + "epoch": 0.8813334033621933, + "grad_norm": 173.1059112548828, + "learning_rate": 4.864726473337034e-07, + "loss": 21.4789, + "step": 436290 + }, + { + "epoch": 0.8813536039948772, + "grad_norm": 401.54052734375, + "learning_rate": 4.863224695985858e-07, + "loss": 23.8768, + "step": 436300 + }, + { + "epoch": 0.881373804627561, + "grad_norm": 436.904052734375, + "learning_rate": 4.861723138625862e-07, + "loss": 18.8533, + "step": 436310 + }, + { + "epoch": 0.8813940052602448, + "grad_norm": 455.9186706542969, + "learning_rate": 4.860221801264358e-07, + "loss": 17.9445, + "step": 436320 + }, + { + "epoch": 0.8814142058929285, + "grad_norm": 319.8497314453125, + "learning_rate": 4.858720683908646e-07, + "loss": 21.2574, + "step": 436330 + }, + { + "epoch": 0.8814344065256123, + "grad_norm": 468.0010986328125, + "learning_rate": 4.857219786566053e-07, + "loss": 24.4979, + "step": 436340 + }, + { + "epoch": 0.8814546071582962, + "grad_norm": 550.1976928710938, + "learning_rate": 4.855719109243917e-07, + "loss": 23.3478, + "step": 436350 + }, + { + "epoch": 0.88147480779098, + "grad_norm": 190.78244018554688, + "learning_rate": 4.85421865194951e-07, + "loss": 25.4633, + "step": 436360 + }, + { + "epoch": 0.8814950084236638, + "grad_norm": 798.060302734375, + "learning_rate": 4.852718414690166e-07, + "loss": 11.6966, + "step": 436370 + }, + { + "epoch": 0.8815152090563476, + "grad_norm": 268.206298828125, + "learning_rate": 4.851218397473206e-07, + "loss": 20.4789, + "step": 436380 + }, + { + "epoch": 0.8815354096890314, + "grad_norm": 858.3386840820312, + "learning_rate": 4.84971860030593e-07, + "loss": 25.3801, + "step": 436390 + }, + { + "epoch": 0.8815556103217153, + "grad_norm": 158.20469665527344, + "learning_rate": 4.848219023195644e-07, + "loss": 9.441, + "step": 436400 + }, + { + "epoch": 0.8815758109543991, + "grad_norm": 214.66708374023438, + "learning_rate": 4.846719666149668e-07, + "loss": 14.5835, + "step": 436410 + }, + { + "epoch": 0.8815960115870829, + "grad_norm": 338.07586669921875, + "learning_rate": 4.845220529175304e-07, + "loss": 16.8058, + "step": 436420 + }, + { + "epoch": 0.8816162122197667, + "grad_norm": 421.33880615234375, + "learning_rate": 4.84372161227985e-07, + "loss": 13.6267, + "step": 436430 + }, + { + "epoch": 0.8816364128524505, + "grad_norm": 0.18723739683628082, + "learning_rate": 4.842222915470618e-07, + "loss": 11.3648, + "step": 436440 + }, + { + "epoch": 0.8816566134851344, + "grad_norm": 26.688398361206055, + "learning_rate": 4.840724438754929e-07, + "loss": 23.6005, + "step": 436450 + }, + { + "epoch": 0.8816768141178182, + "grad_norm": 433.8655700683594, + "learning_rate": 4.839226182140072e-07, + "loss": 14.6859, + "step": 436460 + }, + { + "epoch": 0.881697014750502, + "grad_norm": 489.1601867675781, + "learning_rate": 4.837728145633335e-07, + "loss": 19.8474, + "step": 436470 + }, + { + "epoch": 0.8817172153831858, + "grad_norm": 393.7753601074219, + "learning_rate": 4.836230329242042e-07, + "loss": 20.2308, + "step": 436480 + }, + { + "epoch": 0.8817374160158696, + "grad_norm": 130.21282958984375, + "learning_rate": 4.83473273297348e-07, + "loss": 17.8237, + "step": 436490 + }, + { + "epoch": 0.8817576166485535, + "grad_norm": 400.9236755371094, + "learning_rate": 4.833235356834959e-07, + "loss": 15.2621, + "step": 436500 + }, + { + "epoch": 0.8817778172812373, + "grad_norm": 395.96966552734375, + "learning_rate": 4.831738200833775e-07, + "loss": 17.6387, + "step": 436510 + }, + { + "epoch": 0.8817980179139211, + "grad_norm": 605.7783813476562, + "learning_rate": 4.830241264977209e-07, + "loss": 19.7262, + "step": 436520 + }, + { + "epoch": 0.8818182185466049, + "grad_norm": 163.8187713623047, + "learning_rate": 4.828744549272579e-07, + "loss": 21.9274, + "step": 436530 + }, + { + "epoch": 0.8818384191792887, + "grad_norm": 571.732421875, + "learning_rate": 4.827248053727168e-07, + "loss": 18.2143, + "step": 436540 + }, + { + "epoch": 0.8818586198119726, + "grad_norm": 313.611083984375, + "learning_rate": 4.825751778348259e-07, + "loss": 16.4337, + "step": 436550 + }, + { + "epoch": 0.8818788204446564, + "grad_norm": 463.8468933105469, + "learning_rate": 4.824255723143162e-07, + "loss": 17.2454, + "step": 436560 + }, + { + "epoch": 0.8818990210773402, + "grad_norm": 476.1033020019531, + "learning_rate": 4.822759888119171e-07, + "loss": 15.0617, + "step": 436570 + }, + { + "epoch": 0.8819192217100239, + "grad_norm": 248.8014678955078, + "learning_rate": 4.821264273283566e-07, + "loss": 10.7048, + "step": 436580 + }, + { + "epoch": 0.8819394223427077, + "grad_norm": 682.4763793945312, + "learning_rate": 4.819768878643633e-07, + "loss": 23.8681, + "step": 436590 + }, + { + "epoch": 0.8819596229753915, + "grad_norm": 253.24656677246094, + "learning_rate": 4.818273704206678e-07, + "loss": 11.7182, + "step": 436600 + }, + { + "epoch": 0.8819798236080754, + "grad_norm": 95.55328369140625, + "learning_rate": 4.816778749979973e-07, + "loss": 16.1974, + "step": 436610 + }, + { + "epoch": 0.8820000242407592, + "grad_norm": 72.05003356933594, + "learning_rate": 4.815284015970801e-07, + "loss": 10.8611, + "step": 436620 + }, + { + "epoch": 0.882020224873443, + "grad_norm": 675.322998046875, + "learning_rate": 4.813789502186456e-07, + "loss": 12.9505, + "step": 436630 + }, + { + "epoch": 0.8820404255061268, + "grad_norm": 151.66297912597656, + "learning_rate": 4.812295208634238e-07, + "loss": 21.0599, + "step": 436640 + }, + { + "epoch": 0.8820606261388106, + "grad_norm": 343.0258483886719, + "learning_rate": 4.810801135321391e-07, + "loss": 28.378, + "step": 436650 + }, + { + "epoch": 0.8820808267714945, + "grad_norm": 415.1305236816406, + "learning_rate": 4.809307282255221e-07, + "loss": 14.8969, + "step": 436660 + }, + { + "epoch": 0.8821010274041783, + "grad_norm": 659.5724487304688, + "learning_rate": 4.807813649443016e-07, + "loss": 24.2443, + "step": 436670 + }, + { + "epoch": 0.8821212280368621, + "grad_norm": 838.9188232421875, + "learning_rate": 4.806320236892048e-07, + "loss": 24.2559, + "step": 436680 + }, + { + "epoch": 0.8821414286695459, + "grad_norm": 328.0303039550781, + "learning_rate": 4.804827044609578e-07, + "loss": 18.7561, + "step": 436690 + }, + { + "epoch": 0.8821616293022297, + "grad_norm": 458.027099609375, + "learning_rate": 4.803334072602917e-07, + "loss": 16.5193, + "step": 436700 + }, + { + "epoch": 0.8821818299349136, + "grad_norm": 705.2664184570312, + "learning_rate": 4.801841320879319e-07, + "loss": 24.2991, + "step": 436710 + }, + { + "epoch": 0.8822020305675974, + "grad_norm": 440.40521240234375, + "learning_rate": 4.800348789446058e-07, + "loss": 17.4962, + "step": 436720 + }, + { + "epoch": 0.8822222312002812, + "grad_norm": 82.1038818359375, + "learning_rate": 4.798856478310409e-07, + "loss": 17.8321, + "step": 436730 + }, + { + "epoch": 0.882242431832965, + "grad_norm": 691.0404052734375, + "learning_rate": 4.797364387479664e-07, + "loss": 28.2128, + "step": 436740 + }, + { + "epoch": 0.8822626324656488, + "grad_norm": 657.4496459960938, + "learning_rate": 4.795872516961087e-07, + "loss": 15.4127, + "step": 436750 + }, + { + "epoch": 0.8822828330983327, + "grad_norm": 584.3892822265625, + "learning_rate": 4.794380866761928e-07, + "loss": 13.4703, + "step": 436760 + }, + { + "epoch": 0.8823030337310165, + "grad_norm": 325.6146545410156, + "learning_rate": 4.792889436889487e-07, + "loss": 7.1147, + "step": 436770 + }, + { + "epoch": 0.8823232343637003, + "grad_norm": 118.1163101196289, + "learning_rate": 4.791398227351024e-07, + "loss": 14.4561, + "step": 436780 + }, + { + "epoch": 0.8823434349963841, + "grad_norm": 243.56419372558594, + "learning_rate": 4.789907238153785e-07, + "loss": 21.6599, + "step": 436790 + }, + { + "epoch": 0.8823636356290679, + "grad_norm": 760.7359619140625, + "learning_rate": 4.788416469305068e-07, + "loss": 25.8602, + "step": 436800 + }, + { + "epoch": 0.8823838362617518, + "grad_norm": 113.09996032714844, + "learning_rate": 4.786925920812119e-07, + "loss": 12.4008, + "step": 436810 + }, + { + "epoch": 0.8824040368944356, + "grad_norm": 326.0145263671875, + "learning_rate": 4.78543559268222e-07, + "loss": 22.3032, + "step": 436820 + }, + { + "epoch": 0.8824242375271194, + "grad_norm": 238.98001098632812, + "learning_rate": 4.78394548492262e-07, + "loss": 20.334, + "step": 436830 + }, + { + "epoch": 0.8824444381598031, + "grad_norm": 637.3267822265625, + "learning_rate": 4.782455597540576e-07, + "loss": 30.5467, + "step": 436840 + }, + { + "epoch": 0.8824646387924869, + "grad_norm": 487.647216796875, + "learning_rate": 4.780965930543369e-07, + "loss": 12.0064, + "step": 436850 + }, + { + "epoch": 0.8824848394251708, + "grad_norm": 367.8666687011719, + "learning_rate": 4.779476483938251e-07, + "loss": 13.7615, + "step": 436860 + }, + { + "epoch": 0.8825050400578546, + "grad_norm": 679.5595092773438, + "learning_rate": 4.777987257732469e-07, + "loss": 20.4513, + "step": 436870 + }, + { + "epoch": 0.8825252406905384, + "grad_norm": 102.08183288574219, + "learning_rate": 4.776498251933292e-07, + "loss": 21.0092, + "step": 436880 + }, + { + "epoch": 0.8825454413232222, + "grad_norm": 578.4793701171875, + "learning_rate": 4.775009466547986e-07, + "loss": 20.5826, + "step": 436890 + }, + { + "epoch": 0.882565641955906, + "grad_norm": 64.65514373779297, + "learning_rate": 4.773520901583801e-07, + "loss": 19.3511, + "step": 436900 + }, + { + "epoch": 0.8825858425885899, + "grad_norm": 629.3908081054688, + "learning_rate": 4.772032557047984e-07, + "loss": 24.7698, + "step": 436910 + }, + { + "epoch": 0.8826060432212737, + "grad_norm": 752.3409423828125, + "learning_rate": 4.770544432947799e-07, + "loss": 17.0448, + "step": 436920 + }, + { + "epoch": 0.8826262438539575, + "grad_norm": 471.2159118652344, + "learning_rate": 4.769056529290495e-07, + "loss": 16.0433, + "step": 436930 + }, + { + "epoch": 0.8826464444866413, + "grad_norm": 449.1194763183594, + "learning_rate": 4.7675688460833145e-07, + "loss": 24.9503, + "step": 436940 + }, + { + "epoch": 0.8826666451193251, + "grad_norm": 366.4239807128906, + "learning_rate": 4.766081383333521e-07, + "loss": 29.5621, + "step": 436950 + }, + { + "epoch": 0.882686845752009, + "grad_norm": 390.2115173339844, + "learning_rate": 4.7645941410483733e-07, + "loss": 16.0112, + "step": 436960 + }, + { + "epoch": 0.8827070463846928, + "grad_norm": 189.2827606201172, + "learning_rate": 4.7631071192350943e-07, + "loss": 19.8507, + "step": 436970 + }, + { + "epoch": 0.8827272470173766, + "grad_norm": 124.29317474365234, + "learning_rate": 4.7616203179009445e-07, + "loss": 20.3787, + "step": 436980 + }, + { + "epoch": 0.8827474476500604, + "grad_norm": 133.61338806152344, + "learning_rate": 4.760133737053174e-07, + "loss": 7.5316, + "step": 436990 + }, + { + "epoch": 0.8827676482827442, + "grad_norm": 119.6464614868164, + "learning_rate": 4.758647376699033e-07, + "loss": 16.7015, + "step": 437000 + }, + { + "epoch": 0.882787848915428, + "grad_norm": 175.40997314453125, + "learning_rate": 4.757161236845742e-07, + "loss": 13.1048, + "step": 437010 + }, + { + "epoch": 0.8828080495481119, + "grad_norm": 1037.2186279296875, + "learning_rate": 4.755675317500569e-07, + "loss": 30.6978, + "step": 437020 + }, + { + "epoch": 0.8828282501807957, + "grad_norm": 235.47128295898438, + "learning_rate": 4.7541896186707517e-07, + "loss": 17.5572, + "step": 437030 + }, + { + "epoch": 0.8828484508134795, + "grad_norm": 591.5972290039062, + "learning_rate": 4.752704140363512e-07, + "loss": 25.8775, + "step": 437040 + }, + { + "epoch": 0.8828686514461633, + "grad_norm": 420.414794921875, + "learning_rate": 4.751218882586106e-07, + "loss": 30.0815, + "step": 437050 + }, + { + "epoch": 0.8828888520788472, + "grad_norm": 216.95462036132812, + "learning_rate": 4.749733845345783e-07, + "loss": 17.1518, + "step": 437060 + }, + { + "epoch": 0.882909052711531, + "grad_norm": 728.5482177734375, + "learning_rate": 4.748249028649765e-07, + "loss": 23.1488, + "step": 437070 + }, + { + "epoch": 0.8829292533442148, + "grad_norm": 530.2343139648438, + "learning_rate": 4.7467644325052855e-07, + "loss": 16.5789, + "step": 437080 + }, + { + "epoch": 0.8829494539768985, + "grad_norm": 219.0844268798828, + "learning_rate": 4.7452800569195987e-07, + "loss": 13.1149, + "step": 437090 + }, + { + "epoch": 0.8829696546095823, + "grad_norm": 440.1877746582031, + "learning_rate": 4.743795901899928e-07, + "loss": 21.1264, + "step": 437100 + }, + { + "epoch": 0.8829898552422661, + "grad_norm": 140.57644653320312, + "learning_rate": 4.742311967453495e-07, + "loss": 14.4177, + "step": 437110 + }, + { + "epoch": 0.88301005587495, + "grad_norm": 552.50537109375, + "learning_rate": 4.7408282535875593e-07, + "loss": 29.2932, + "step": 437120 + }, + { + "epoch": 0.8830302565076338, + "grad_norm": 141.6434326171875, + "learning_rate": 4.739344760309322e-07, + "loss": 14.1547, + "step": 437130 + }, + { + "epoch": 0.8830504571403176, + "grad_norm": 53.88914108276367, + "learning_rate": 4.737861487626039e-07, + "loss": 11.355, + "step": 437140 + }, + { + "epoch": 0.8830706577730014, + "grad_norm": 602.4818115234375, + "learning_rate": 4.7363784355449303e-07, + "loss": 21.5469, + "step": 437150 + }, + { + "epoch": 0.8830908584056852, + "grad_norm": 606.8235473632812, + "learning_rate": 4.734895604073214e-07, + "loss": 14.414, + "step": 437160 + }, + { + "epoch": 0.8831110590383691, + "grad_norm": 586.7005004882812, + "learning_rate": 4.7334129932181283e-07, + "loss": 34.3757, + "step": 437170 + }, + { + "epoch": 0.8831312596710529, + "grad_norm": 158.1629638671875, + "learning_rate": 4.731930602986906e-07, + "loss": 9.9822, + "step": 437180 + }, + { + "epoch": 0.8831514603037367, + "grad_norm": 417.0316162109375, + "learning_rate": 4.730448433386764e-07, + "loss": 14.5091, + "step": 437190 + }, + { + "epoch": 0.8831716609364205, + "grad_norm": 861.2413940429688, + "learning_rate": 4.728966484424913e-07, + "loss": 26.3821, + "step": 437200 + }, + { + "epoch": 0.8831918615691043, + "grad_norm": 502.9021301269531, + "learning_rate": 4.727484756108602e-07, + "loss": 14.3446, + "step": 437210 + }, + { + "epoch": 0.8832120622017882, + "grad_norm": 319.57952880859375, + "learning_rate": 4.726003248445038e-07, + "loss": 17.3094, + "step": 437220 + }, + { + "epoch": 0.883232262834472, + "grad_norm": 108.85161590576172, + "learning_rate": 4.724521961441436e-07, + "loss": 14.5947, + "step": 437230 + }, + { + "epoch": 0.8832524634671558, + "grad_norm": 199.23394775390625, + "learning_rate": 4.723040895105019e-07, + "loss": 23.1646, + "step": 437240 + }, + { + "epoch": 0.8832726640998396, + "grad_norm": 285.6567687988281, + "learning_rate": 4.72156004944303e-07, + "loss": 13.2874, + "step": 437250 + }, + { + "epoch": 0.8832928647325234, + "grad_norm": 457.18017578125, + "learning_rate": 4.720079424462648e-07, + "loss": 20.3675, + "step": 437260 + }, + { + "epoch": 0.8833130653652073, + "grad_norm": 428.0270080566406, + "learning_rate": 4.718599020171105e-07, + "loss": 18.1246, + "step": 437270 + }, + { + "epoch": 0.8833332659978911, + "grad_norm": 94.82398223876953, + "learning_rate": 4.7171188365756235e-07, + "loss": 21.3042, + "step": 437280 + }, + { + "epoch": 0.8833534666305749, + "grad_norm": 424.24517822265625, + "learning_rate": 4.71563887368342e-07, + "loss": 13.0863, + "step": 437290 + }, + { + "epoch": 0.8833736672632587, + "grad_norm": 0.6462593078613281, + "learning_rate": 4.714159131501689e-07, + "loss": 8.5118, + "step": 437300 + }, + { + "epoch": 0.8833938678959425, + "grad_norm": 181.89942932128906, + "learning_rate": 4.7126796100376625e-07, + "loss": 16.9697, + "step": 437310 + }, + { + "epoch": 0.8834140685286264, + "grad_norm": 22.54423713684082, + "learning_rate": 4.7112003092985414e-07, + "loss": 11.9234, + "step": 437320 + }, + { + "epoch": 0.8834342691613102, + "grad_norm": 212.46315002441406, + "learning_rate": 4.7097212292915307e-07, + "loss": 18.5752, + "step": 437330 + }, + { + "epoch": 0.883454469793994, + "grad_norm": 273.62518310546875, + "learning_rate": 4.7082423700238413e-07, + "loss": 12.7955, + "step": 437340 + }, + { + "epoch": 0.8834746704266777, + "grad_norm": 337.732421875, + "learning_rate": 4.7067637315027005e-07, + "loss": 11.8375, + "step": 437350 + }, + { + "epoch": 0.8834948710593615, + "grad_norm": 203.64193725585938, + "learning_rate": 4.705285313735297e-07, + "loss": 9.4601, + "step": 437360 + }, + { + "epoch": 0.8835150716920454, + "grad_norm": 468.43768310546875, + "learning_rate": 4.703807116728831e-07, + "loss": 14.4001, + "step": 437370 + }, + { + "epoch": 0.8835352723247292, + "grad_norm": 310.4579772949219, + "learning_rate": 4.7023291404905245e-07, + "loss": 13.6832, + "step": 437380 + }, + { + "epoch": 0.883555472957413, + "grad_norm": 489.82421875, + "learning_rate": 4.700851385027566e-07, + "loss": 18.8303, + "step": 437390 + }, + { + "epoch": 0.8835756735900968, + "grad_norm": 307.5848083496094, + "learning_rate": 4.699373850347161e-07, + "loss": 23.059, + "step": 437400 + }, + { + "epoch": 0.8835958742227806, + "grad_norm": 587.0242919921875, + "learning_rate": 4.69789653645652e-07, + "loss": 21.7008, + "step": 437410 + }, + { + "epoch": 0.8836160748554645, + "grad_norm": 445.4830322265625, + "learning_rate": 4.6964194433628317e-07, + "loss": 10.5014, + "step": 437420 + }, + { + "epoch": 0.8836362754881483, + "grad_norm": 395.11370849609375, + "learning_rate": 4.6949425710733076e-07, + "loss": 9.3218, + "step": 437430 + }, + { + "epoch": 0.8836564761208321, + "grad_norm": 300.8310852050781, + "learning_rate": 4.693465919595136e-07, + "loss": 10.4542, + "step": 437440 + }, + { + "epoch": 0.8836766767535159, + "grad_norm": 375.7260437011719, + "learning_rate": 4.691989488935511e-07, + "loss": 17.3353, + "step": 437450 + }, + { + "epoch": 0.8836968773861997, + "grad_norm": 503.7796325683594, + "learning_rate": 4.690513279101638e-07, + "loss": 17.4916, + "step": 437460 + }, + { + "epoch": 0.8837170780188836, + "grad_norm": 682.6212158203125, + "learning_rate": 4.689037290100712e-07, + "loss": 22.9069, + "step": 437470 + }, + { + "epoch": 0.8837372786515674, + "grad_norm": 276.8441162109375, + "learning_rate": 4.687561521939915e-07, + "loss": 14.4692, + "step": 437480 + }, + { + "epoch": 0.8837574792842512, + "grad_norm": 87.65137481689453, + "learning_rate": 4.686085974626442e-07, + "loss": 25.7139, + "step": 437490 + }, + { + "epoch": 0.883777679916935, + "grad_norm": 647.8373413085938, + "learning_rate": 4.6846106481675035e-07, + "loss": 35.8543, + "step": 437500 + }, + { + "epoch": 0.8837978805496188, + "grad_norm": 23.456647872924805, + "learning_rate": 4.683135542570277e-07, + "loss": 15.9017, + "step": 437510 + }, + { + "epoch": 0.8838180811823027, + "grad_norm": 539.7153930664062, + "learning_rate": 4.681660657841941e-07, + "loss": 12.9271, + "step": 437520 + }, + { + "epoch": 0.8838382818149865, + "grad_norm": 442.73876953125, + "learning_rate": 4.6801859939896997e-07, + "loss": 19.9689, + "step": 437530 + }, + { + "epoch": 0.8838584824476703, + "grad_norm": 325.82708740234375, + "learning_rate": 4.678711551020743e-07, + "loss": 19.9856, + "step": 437540 + }, + { + "epoch": 0.8838786830803541, + "grad_norm": 2447.803466796875, + "learning_rate": 4.677237328942236e-07, + "loss": 16.8712, + "step": 437550 + }, + { + "epoch": 0.8838988837130379, + "grad_norm": 508.7763671875, + "learning_rate": 4.6757633277613734e-07, + "loss": 15.6815, + "step": 437560 + }, + { + "epoch": 0.8839190843457218, + "grad_norm": 592.2590942382812, + "learning_rate": 4.674289547485367e-07, + "loss": 19.2942, + "step": 437570 + }, + { + "epoch": 0.8839392849784056, + "grad_norm": 809.1892700195312, + "learning_rate": 4.672815988121354e-07, + "loss": 19.2552, + "step": 437580 + }, + { + "epoch": 0.8839594856110894, + "grad_norm": 3.522529125213623, + "learning_rate": 4.6713426496765413e-07, + "loss": 19.2758, + "step": 437590 + }, + { + "epoch": 0.8839796862437731, + "grad_norm": 0.0, + "learning_rate": 4.6698695321581165e-07, + "loss": 19.1922, + "step": 437600 + }, + { + "epoch": 0.8839998868764569, + "grad_norm": 352.0887145996094, + "learning_rate": 4.6683966355732466e-07, + "loss": 12.766, + "step": 437610 + }, + { + "epoch": 0.8840200875091407, + "grad_norm": 241.43849182128906, + "learning_rate": 4.6669239599291093e-07, + "loss": 17.8853, + "step": 437620 + }, + { + "epoch": 0.8840402881418246, + "grad_norm": 608.0235595703125, + "learning_rate": 4.665451505232882e-07, + "loss": 14.7198, + "step": 437630 + }, + { + "epoch": 0.8840604887745084, + "grad_norm": 361.3467712402344, + "learning_rate": 4.663979271491764e-07, + "loss": 21.5801, + "step": 437640 + }, + { + "epoch": 0.8840806894071922, + "grad_norm": 461.13983154296875, + "learning_rate": 4.662507258712895e-07, + "loss": 12.0084, + "step": 437650 + }, + { + "epoch": 0.884100890039876, + "grad_norm": 745.8554077148438, + "learning_rate": 4.6610354669034686e-07, + "loss": 23.2169, + "step": 437660 + }, + { + "epoch": 0.8841210906725598, + "grad_norm": 785.3743896484375, + "learning_rate": 4.6595638960706624e-07, + "loss": 19.8191, + "step": 437670 + }, + { + "epoch": 0.8841412913052437, + "grad_norm": 748.1947631835938, + "learning_rate": 4.6580925462216487e-07, + "loss": 21.8141, + "step": 437680 + }, + { + "epoch": 0.8841614919379275, + "grad_norm": 268.165283203125, + "learning_rate": 4.656621417363577e-07, + "loss": 33.7911, + "step": 437690 + }, + { + "epoch": 0.8841816925706113, + "grad_norm": 100.24395751953125, + "learning_rate": 4.655150509503642e-07, + "loss": 7.0177, + "step": 437700 + }, + { + "epoch": 0.8842018932032951, + "grad_norm": 439.3194885253906, + "learning_rate": 4.65367982264901e-07, + "loss": 13.1335, + "step": 437710 + }, + { + "epoch": 0.8842220938359789, + "grad_norm": 580.4093627929688, + "learning_rate": 4.6522093568068307e-07, + "loss": 21.3427, + "step": 437720 + }, + { + "epoch": 0.8842422944686628, + "grad_norm": 221.25843811035156, + "learning_rate": 4.650739111984287e-07, + "loss": 17.7892, + "step": 437730 + }, + { + "epoch": 0.8842624951013466, + "grad_norm": 306.7304992675781, + "learning_rate": 4.649269088188535e-07, + "loss": 11.2845, + "step": 437740 + }, + { + "epoch": 0.8842826957340304, + "grad_norm": 288.1169738769531, + "learning_rate": 4.647799285426757e-07, + "loss": 19.4869, + "step": 437750 + }, + { + "epoch": 0.8843028963667142, + "grad_norm": 726.6117553710938, + "learning_rate": 4.646329703706104e-07, + "loss": 37.4337, + "step": 437760 + }, + { + "epoch": 0.884323096999398, + "grad_norm": 282.6123962402344, + "learning_rate": 4.644860343033725e-07, + "loss": 8.5612, + "step": 437770 + }, + { + "epoch": 0.8843432976320819, + "grad_norm": 184.43309020996094, + "learning_rate": 4.6433912034168083e-07, + "loss": 10.7296, + "step": 437780 + }, + { + "epoch": 0.8843634982647657, + "grad_norm": 313.0046691894531, + "learning_rate": 4.6419222848624933e-07, + "loss": 13.1905, + "step": 437790 + }, + { + "epoch": 0.8843836988974495, + "grad_norm": 424.1170349121094, + "learning_rate": 4.640453587377958e-07, + "loss": 17.5901, + "step": 437800 + }, + { + "epoch": 0.8844038995301333, + "grad_norm": 543.5725708007812, + "learning_rate": 4.63898511097034e-07, + "loss": 20.5456, + "step": 437810 + }, + { + "epoch": 0.8844241001628171, + "grad_norm": 267.98577880859375, + "learning_rate": 4.6375168556468175e-07, + "loss": 27.1274, + "step": 437820 + }, + { + "epoch": 0.884444300795501, + "grad_norm": 689.3540649414062, + "learning_rate": 4.636048821414535e-07, + "loss": 17.7252, + "step": 437830 + }, + { + "epoch": 0.8844645014281848, + "grad_norm": 445.6986999511719, + "learning_rate": 4.6345810082806363e-07, + "loss": 18.1701, + "step": 437840 + }, + { + "epoch": 0.8844847020608686, + "grad_norm": 373.1318359375, + "learning_rate": 4.6331134162522994e-07, + "loss": 15.1325, + "step": 437850 + }, + { + "epoch": 0.8845049026935523, + "grad_norm": 377.1880187988281, + "learning_rate": 4.631646045336663e-07, + "loss": 16.7919, + "step": 437860 + }, + { + "epoch": 0.8845251033262361, + "grad_norm": 185.0105438232422, + "learning_rate": 4.6301788955408765e-07, + "loss": 27.8121, + "step": 437870 + }, + { + "epoch": 0.88454530395892, + "grad_norm": 563.5680541992188, + "learning_rate": 4.62871196687209e-07, + "loss": 18.6658, + "step": 437880 + }, + { + "epoch": 0.8845655045916038, + "grad_norm": 231.3992919921875, + "learning_rate": 4.6272452593374763e-07, + "loss": 15.211, + "step": 437890 + }, + { + "epoch": 0.8845857052242876, + "grad_norm": 88.52085876464844, + "learning_rate": 4.625778772944156e-07, + "loss": 17.0757, + "step": 437900 + }, + { + "epoch": 0.8846059058569714, + "grad_norm": 181.04515075683594, + "learning_rate": 4.6243125076992857e-07, + "loss": 9.0486, + "step": 437910 + }, + { + "epoch": 0.8846261064896552, + "grad_norm": 112.04206085205078, + "learning_rate": 4.62284646361002e-07, + "loss": 14.2957, + "step": 437920 + }, + { + "epoch": 0.884646307122339, + "grad_norm": 370.3331604003906, + "learning_rate": 4.6213806406834926e-07, + "loss": 25.6922, + "step": 437930 + }, + { + "epoch": 0.8846665077550229, + "grad_norm": 485.3958740234375, + "learning_rate": 4.6199150389268476e-07, + "loss": 13.4734, + "step": 437940 + }, + { + "epoch": 0.8846867083877067, + "grad_norm": 288.86383056640625, + "learning_rate": 4.6184496583472293e-07, + "loss": 32.1188, + "step": 437950 + }, + { + "epoch": 0.8847069090203905, + "grad_norm": 38.7264518737793, + "learning_rate": 4.616984498951793e-07, + "loss": 12.0045, + "step": 437960 + }, + { + "epoch": 0.8847271096530743, + "grad_norm": 810.9624633789062, + "learning_rate": 4.6155195607476723e-07, + "loss": 14.6303, + "step": 437970 + }, + { + "epoch": 0.8847473102857581, + "grad_norm": 460.4407958984375, + "learning_rate": 4.614054843741994e-07, + "loss": 9.7327, + "step": 437980 + }, + { + "epoch": 0.884767510918442, + "grad_norm": 91.19581604003906, + "learning_rate": 4.61259034794192e-07, + "loss": 10.1597, + "step": 437990 + }, + { + "epoch": 0.8847877115511258, + "grad_norm": 616.4536743164062, + "learning_rate": 4.6111260733545714e-07, + "loss": 28.4931, + "step": 438000 + }, + { + "epoch": 0.8848079121838096, + "grad_norm": 162.45799255371094, + "learning_rate": 4.6096620199870824e-07, + "loss": 8.3742, + "step": 438010 + }, + { + "epoch": 0.8848281128164934, + "grad_norm": 215.92393493652344, + "learning_rate": 4.6081981878466077e-07, + "loss": 18.8802, + "step": 438020 + }, + { + "epoch": 0.8848483134491772, + "grad_norm": 255.54925537109375, + "learning_rate": 4.606734576940253e-07, + "loss": 16.6796, + "step": 438030 + }, + { + "epoch": 0.8848685140818611, + "grad_norm": 0.0, + "learning_rate": 4.6052711872751843e-07, + "loss": 10.8918, + "step": 438040 + }, + { + "epoch": 0.8848887147145449, + "grad_norm": 235.79869079589844, + "learning_rate": 4.6038080188585135e-07, + "loss": 21.5738, + "step": 438050 + }, + { + "epoch": 0.8849089153472287, + "grad_norm": 143.0699920654297, + "learning_rate": 4.602345071697373e-07, + "loss": 13.8974, + "step": 438060 + }, + { + "epoch": 0.8849291159799125, + "grad_norm": 160.64614868164062, + "learning_rate": 4.600882345798902e-07, + "loss": 24.4522, + "step": 438070 + }, + { + "epoch": 0.8849493166125963, + "grad_norm": 119.09025573730469, + "learning_rate": 4.599419841170216e-07, + "loss": 17.8335, + "step": 438080 + }, + { + "epoch": 0.8849695172452802, + "grad_norm": 638.122802734375, + "learning_rate": 4.5979575578184554e-07, + "loss": 18.7437, + "step": 438090 + }, + { + "epoch": 0.884989717877964, + "grad_norm": 433.4074401855469, + "learning_rate": 4.5964954957507414e-07, + "loss": 22.9923, + "step": 438100 + }, + { + "epoch": 0.8850099185106478, + "grad_norm": 442.2541809082031, + "learning_rate": 4.595033654974207e-07, + "loss": 11.311, + "step": 438110 + }, + { + "epoch": 0.8850301191433315, + "grad_norm": 338.8622131347656, + "learning_rate": 4.593572035495969e-07, + "loss": 16.6048, + "step": 438120 + }, + { + "epoch": 0.8850503197760153, + "grad_norm": 254.80043029785156, + "learning_rate": 4.592110637323149e-07, + "loss": 9.9581, + "step": 438130 + }, + { + "epoch": 0.8850705204086992, + "grad_norm": 468.2026062011719, + "learning_rate": 4.5906494604628816e-07, + "loss": 17.1238, + "step": 438140 + }, + { + "epoch": 0.885090721041383, + "grad_norm": 607.1072998046875, + "learning_rate": 4.5891885049222815e-07, + "loss": 10.1863, + "step": 438150 + }, + { + "epoch": 0.8851109216740668, + "grad_norm": 680.0186157226562, + "learning_rate": 4.587727770708461e-07, + "loss": 15.1077, + "step": 438160 + }, + { + "epoch": 0.8851311223067506, + "grad_norm": 596.2591552734375, + "learning_rate": 4.5862672578285475e-07, + "loss": 18.6161, + "step": 438170 + }, + { + "epoch": 0.8851513229394344, + "grad_norm": 493.0876159667969, + "learning_rate": 4.5848069662896786e-07, + "loss": 18.7473, + "step": 438180 + }, + { + "epoch": 0.8851715235721183, + "grad_norm": 283.8135070800781, + "learning_rate": 4.5833468960989333e-07, + "loss": 19.7269, + "step": 438190 + }, + { + "epoch": 0.8851917242048021, + "grad_norm": 155.3894805908203, + "learning_rate": 4.581887047263445e-07, + "loss": 36.0125, + "step": 438200 + }, + { + "epoch": 0.8852119248374859, + "grad_norm": 399.869384765625, + "learning_rate": 4.5804274197903396e-07, + "loss": 11.8754, + "step": 438210 + }, + { + "epoch": 0.8852321254701697, + "grad_norm": 407.58941650390625, + "learning_rate": 4.5789680136867245e-07, + "loss": 14.0652, + "step": 438220 + }, + { + "epoch": 0.8852523261028535, + "grad_norm": 232.7967071533203, + "learning_rate": 4.577508828959698e-07, + "loss": 12.1223, + "step": 438230 + }, + { + "epoch": 0.8852725267355374, + "grad_norm": 279.1619873046875, + "learning_rate": 4.5760498656163886e-07, + "loss": 30.3401, + "step": 438240 + }, + { + "epoch": 0.8852927273682212, + "grad_norm": 716.46875, + "learning_rate": 4.5745911236639186e-07, + "loss": 25.7322, + "step": 438250 + }, + { + "epoch": 0.885312928000905, + "grad_norm": 555.6453857421875, + "learning_rate": 4.5731326031093645e-07, + "loss": 20.943, + "step": 438260 + }, + { + "epoch": 0.8853331286335888, + "grad_norm": 371.71453857421875, + "learning_rate": 4.57167430395985e-07, + "loss": 17.1643, + "step": 438270 + }, + { + "epoch": 0.8853533292662726, + "grad_norm": 604.136962890625, + "learning_rate": 4.5702162262224957e-07, + "loss": 15.5282, + "step": 438280 + }, + { + "epoch": 0.8853735298989565, + "grad_norm": 481.5717468261719, + "learning_rate": 4.5687583699044027e-07, + "loss": 20.4883, + "step": 438290 + }, + { + "epoch": 0.8853937305316403, + "grad_norm": 913.0048828125, + "learning_rate": 4.567300735012653e-07, + "loss": 19.11, + "step": 438300 + }, + { + "epoch": 0.8854139311643241, + "grad_norm": 522.569580078125, + "learning_rate": 4.565843321554386e-07, + "loss": 7.5831, + "step": 438310 + }, + { + "epoch": 0.8854341317970079, + "grad_norm": 355.49493408203125, + "learning_rate": 4.5643861295366854e-07, + "loss": 22.0143, + "step": 438320 + }, + { + "epoch": 0.8854543324296917, + "grad_norm": 793.0252075195312, + "learning_rate": 4.562929158966645e-07, + "loss": 19.7702, + "step": 438330 + }, + { + "epoch": 0.8854745330623756, + "grad_norm": 668.8243408203125, + "learning_rate": 4.561472409851386e-07, + "loss": 29.0895, + "step": 438340 + }, + { + "epoch": 0.8854947336950594, + "grad_norm": 279.86346435546875, + "learning_rate": 4.5600158821979933e-07, + "loss": 23.1765, + "step": 438350 + }, + { + "epoch": 0.8855149343277432, + "grad_norm": 11.049134254455566, + "learning_rate": 4.5585595760135825e-07, + "loss": 12.0037, + "step": 438360 + }, + { + "epoch": 0.8855351349604269, + "grad_norm": 108.68246459960938, + "learning_rate": 4.557103491305237e-07, + "loss": 22.9215, + "step": 438370 + }, + { + "epoch": 0.8855553355931107, + "grad_norm": 414.8169250488281, + "learning_rate": 4.555647628080051e-07, + "loss": 8.2424, + "step": 438380 + }, + { + "epoch": 0.8855755362257945, + "grad_norm": 265.68408203125, + "learning_rate": 4.554191986345136e-07, + "loss": 17.2626, + "step": 438390 + }, + { + "epoch": 0.8855957368584784, + "grad_norm": 139.4207000732422, + "learning_rate": 4.552736566107563e-07, + "loss": 9.0719, + "step": 438400 + }, + { + "epoch": 0.8856159374911622, + "grad_norm": 15.755369186401367, + "learning_rate": 4.551281367374455e-07, + "loss": 8.6049, + "step": 438410 + }, + { + "epoch": 0.885636138123846, + "grad_norm": 264.4195556640625, + "learning_rate": 4.5498263901528784e-07, + "loss": 13.7838, + "step": 438420 + }, + { + "epoch": 0.8856563387565298, + "grad_norm": 201.75054931640625, + "learning_rate": 4.548371634449944e-07, + "loss": 10.8078, + "step": 438430 + }, + { + "epoch": 0.8856765393892136, + "grad_norm": 353.6030578613281, + "learning_rate": 4.546917100272735e-07, + "loss": 8.0673, + "step": 438440 + }, + { + "epoch": 0.8856967400218975, + "grad_norm": 319.56085205078125, + "learning_rate": 4.5454627876283295e-07, + "loss": 18.6831, + "step": 438450 + }, + { + "epoch": 0.8857169406545813, + "grad_norm": 398.4009094238281, + "learning_rate": 4.5440086965238326e-07, + "loss": 12.8228, + "step": 438460 + }, + { + "epoch": 0.8857371412872651, + "grad_norm": 1338.2640380859375, + "learning_rate": 4.542554826966328e-07, + "loss": 14.2016, + "step": 438470 + }, + { + "epoch": 0.8857573419199489, + "grad_norm": 801.7337646484375, + "learning_rate": 4.541101178962887e-07, + "loss": 23.2571, + "step": 438480 + }, + { + "epoch": 0.8857775425526327, + "grad_norm": 412.7310485839844, + "learning_rate": 4.539647752520604e-07, + "loss": 16.6522, + "step": 438490 + }, + { + "epoch": 0.8857977431853166, + "grad_norm": 84.2537841796875, + "learning_rate": 4.538194547646574e-07, + "loss": 29.8909, + "step": 438500 + }, + { + "epoch": 0.8858179438180004, + "grad_norm": 494.4392395019531, + "learning_rate": 4.5367415643478683e-07, + "loss": 27.2246, + "step": 438510 + }, + { + "epoch": 0.8858381444506842, + "grad_norm": 822.8221435546875, + "learning_rate": 4.5352888026315654e-07, + "loss": 27.4212, + "step": 438520 + }, + { + "epoch": 0.885858345083368, + "grad_norm": 83.42070007324219, + "learning_rate": 4.533836262504759e-07, + "loss": 15.1378, + "step": 438530 + }, + { + "epoch": 0.8858785457160518, + "grad_norm": 133.17446899414062, + "learning_rate": 4.5323839439745163e-07, + "loss": 42.554, + "step": 438540 + }, + { + "epoch": 0.8858987463487357, + "grad_norm": 579.7252807617188, + "learning_rate": 4.5309318470479144e-07, + "loss": 18.9857, + "step": 438550 + }, + { + "epoch": 0.8859189469814195, + "grad_norm": 142.4523162841797, + "learning_rate": 4.529479971732031e-07, + "loss": 15.7356, + "step": 438560 + }, + { + "epoch": 0.8859391476141033, + "grad_norm": 598.0130004882812, + "learning_rate": 4.528028318033961e-07, + "loss": 20.5556, + "step": 438570 + }, + { + "epoch": 0.8859593482467871, + "grad_norm": 274.994384765625, + "learning_rate": 4.526576885960765e-07, + "loss": 14.0988, + "step": 438580 + }, + { + "epoch": 0.885979548879471, + "grad_norm": 882.8165893554688, + "learning_rate": 4.5251256755195093e-07, + "loss": 17.3218, + "step": 438590 + }, + { + "epoch": 0.8859997495121548, + "grad_norm": 356.706787109375, + "learning_rate": 4.523674686717283e-07, + "loss": 13.0502, + "step": 438600 + }, + { + "epoch": 0.8860199501448386, + "grad_norm": 363.36566162109375, + "learning_rate": 4.522223919561153e-07, + "loss": 7.7626, + "step": 438610 + }, + { + "epoch": 0.8860401507775224, + "grad_norm": 416.0433654785156, + "learning_rate": 4.520773374058179e-07, + "loss": 14.5136, + "step": 438620 + }, + { + "epoch": 0.8860603514102061, + "grad_norm": 344.8145446777344, + "learning_rate": 4.519323050215446e-07, + "loss": 9.0433, + "step": 438630 + }, + { + "epoch": 0.8860805520428899, + "grad_norm": 2461.770263671875, + "learning_rate": 4.5178729480400084e-07, + "loss": 44.5676, + "step": 438640 + }, + { + "epoch": 0.8861007526755738, + "grad_norm": 302.51043701171875, + "learning_rate": 4.51642306753895e-07, + "loss": 11.8257, + "step": 438650 + }, + { + "epoch": 0.8861209533082576, + "grad_norm": 52.07389831542969, + "learning_rate": 4.514973408719331e-07, + "loss": 23.1165, + "step": 438660 + }, + { + "epoch": 0.8861411539409414, + "grad_norm": 351.7831115722656, + "learning_rate": 4.513523971588202e-07, + "loss": 15.1223, + "step": 438670 + }, + { + "epoch": 0.8861613545736252, + "grad_norm": 761.6973876953125, + "learning_rate": 4.512074756152651e-07, + "loss": 88.1393, + "step": 438680 + }, + { + "epoch": 0.886181555206309, + "grad_norm": 458.8019104003906, + "learning_rate": 4.5106257624197237e-07, + "loss": 20.716, + "step": 438690 + }, + { + "epoch": 0.8862017558389929, + "grad_norm": 499.322998046875, + "learning_rate": 4.5091769903964965e-07, + "loss": 18.8507, + "step": 438700 + }, + { + "epoch": 0.8862219564716767, + "grad_norm": 28.082361221313477, + "learning_rate": 4.5077284400900147e-07, + "loss": 32.0778, + "step": 438710 + }, + { + "epoch": 0.8862421571043605, + "grad_norm": 210.52737426757812, + "learning_rate": 4.5062801115073607e-07, + "loss": 17.0969, + "step": 438720 + }, + { + "epoch": 0.8862623577370443, + "grad_norm": 71.61561584472656, + "learning_rate": 4.504832004655574e-07, + "loss": 10.2539, + "step": 438730 + }, + { + "epoch": 0.8862825583697281, + "grad_norm": 459.04461669921875, + "learning_rate": 4.503384119541709e-07, + "loss": 13.2212, + "step": 438740 + }, + { + "epoch": 0.886302759002412, + "grad_norm": 210.68026733398438, + "learning_rate": 4.501936456172845e-07, + "loss": 28.052, + "step": 438750 + }, + { + "epoch": 0.8863229596350958, + "grad_norm": 465.93450927734375, + "learning_rate": 4.50048901455602e-07, + "loss": 21.7153, + "step": 438760 + }, + { + "epoch": 0.8863431602677796, + "grad_norm": 143.4450225830078, + "learning_rate": 4.4990417946982836e-07, + "loss": 10.5338, + "step": 438770 + }, + { + "epoch": 0.8863633609004634, + "grad_norm": 1098.5234375, + "learning_rate": 4.4975947966067023e-07, + "loss": 29.5691, + "step": 438780 + }, + { + "epoch": 0.8863835615331472, + "grad_norm": 390.75439453125, + "learning_rate": 4.4961480202883434e-07, + "loss": 20.115, + "step": 438790 + }, + { + "epoch": 0.886403762165831, + "grad_norm": 428.8589782714844, + "learning_rate": 4.494701465750217e-07, + "loss": 25.0542, + "step": 438800 + }, + { + "epoch": 0.8864239627985149, + "grad_norm": 109.70050811767578, + "learning_rate": 4.4932551329994023e-07, + "loss": 13.2468, + "step": 438810 + }, + { + "epoch": 0.8864441634311987, + "grad_norm": 313.089111328125, + "learning_rate": 4.4918090220429476e-07, + "loss": 11.3645, + "step": 438820 + }, + { + "epoch": 0.8864643640638825, + "grad_norm": 143.77685546875, + "learning_rate": 4.490363132887904e-07, + "loss": 11.8531, + "step": 438830 + }, + { + "epoch": 0.8864845646965663, + "grad_norm": 922.7874145507812, + "learning_rate": 4.4889174655412924e-07, + "loss": 20.0964, + "step": 438840 + }, + { + "epoch": 0.8865047653292502, + "grad_norm": 204.7127685546875, + "learning_rate": 4.487472020010181e-07, + "loss": 9.0205, + "step": 438850 + }, + { + "epoch": 0.886524965961934, + "grad_norm": 307.23052978515625, + "learning_rate": 4.4860267963016293e-07, + "loss": 19.5553, + "step": 438860 + }, + { + "epoch": 0.8865451665946178, + "grad_norm": 117.01416778564453, + "learning_rate": 4.484581794422643e-07, + "loss": 15.8075, + "step": 438870 + }, + { + "epoch": 0.8865653672273015, + "grad_norm": 903.5155029296875, + "learning_rate": 4.48313701438029e-07, + "loss": 22.7876, + "step": 438880 + }, + { + "epoch": 0.8865855678599853, + "grad_norm": 392.1251220703125, + "learning_rate": 4.4816924561816076e-07, + "loss": 15.337, + "step": 438890 + }, + { + "epoch": 0.8866057684926691, + "grad_norm": 690.133056640625, + "learning_rate": 4.480248119833641e-07, + "loss": 12.9416, + "step": 438900 + }, + { + "epoch": 0.886625969125353, + "grad_norm": 261.4015197753906, + "learning_rate": 4.4788040053434124e-07, + "loss": 16.3067, + "step": 438910 + }, + { + "epoch": 0.8866461697580368, + "grad_norm": 405.3455810546875, + "learning_rate": 4.477360112717982e-07, + "loss": 21.6496, + "step": 438920 + }, + { + "epoch": 0.8866663703907206, + "grad_norm": 555.2881469726562, + "learning_rate": 4.475916441964379e-07, + "loss": 18.7047, + "step": 438930 + }, + { + "epoch": 0.8866865710234044, + "grad_norm": 474.69940185546875, + "learning_rate": 4.474472993089629e-07, + "loss": 15.6627, + "step": 438940 + }, + { + "epoch": 0.8867067716560882, + "grad_norm": 689.9981689453125, + "learning_rate": 4.473029766100784e-07, + "loss": 37.1921, + "step": 438950 + }, + { + "epoch": 0.8867269722887721, + "grad_norm": 55.59153747558594, + "learning_rate": 4.471586761004859e-07, + "loss": 6.5716, + "step": 438960 + }, + { + "epoch": 0.8867471729214559, + "grad_norm": 115.98567199707031, + "learning_rate": 4.4701439778089105e-07, + "loss": 18.5614, + "step": 438970 + }, + { + "epoch": 0.8867673735541397, + "grad_norm": 6.016637802124023, + "learning_rate": 4.4687014165199547e-07, + "loss": 12.5303, + "step": 438980 + }, + { + "epoch": 0.8867875741868235, + "grad_norm": 264.12884521484375, + "learning_rate": 4.46725907714502e-07, + "loss": 36.1123, + "step": 438990 + }, + { + "epoch": 0.8868077748195073, + "grad_norm": 352.121826171875, + "learning_rate": 4.4658169596911493e-07, + "loss": 18.2095, + "step": 439000 + }, + { + "epoch": 0.8868279754521912, + "grad_norm": 496.9523620605469, + "learning_rate": 4.464375064165355e-07, + "loss": 17.7458, + "step": 439010 + }, + { + "epoch": 0.886848176084875, + "grad_norm": 726.5328979492188, + "learning_rate": 4.4629333905746864e-07, + "loss": 24.0773, + "step": 439020 + }, + { + "epoch": 0.8868683767175588, + "grad_norm": 915.4989624023438, + "learning_rate": 4.461491938926144e-07, + "loss": 25.1332, + "step": 439030 + }, + { + "epoch": 0.8868885773502426, + "grad_norm": 90.11317443847656, + "learning_rate": 4.4600507092267767e-07, + "loss": 15.5745, + "step": 439040 + }, + { + "epoch": 0.8869087779829264, + "grad_norm": 420.9406433105469, + "learning_rate": 4.4586097014836017e-07, + "loss": 11.2216, + "step": 439050 + }, + { + "epoch": 0.8869289786156103, + "grad_norm": 325.19110107421875, + "learning_rate": 4.4571689157036244e-07, + "loss": 9.7093, + "step": 439060 + }, + { + "epoch": 0.8869491792482941, + "grad_norm": 156.01129150390625, + "learning_rate": 4.455728351893895e-07, + "loss": 19.7621, + "step": 439070 + }, + { + "epoch": 0.8869693798809779, + "grad_norm": 1102.5689697265625, + "learning_rate": 4.454288010061425e-07, + "loss": 33.3687, + "step": 439080 + }, + { + "epoch": 0.8869895805136617, + "grad_norm": 310.2066955566406, + "learning_rate": 4.4528478902132187e-07, + "loss": 12.1877, + "step": 439090 + }, + { + "epoch": 0.8870097811463455, + "grad_norm": 66.99678039550781, + "learning_rate": 4.4514079923563103e-07, + "loss": 15.1627, + "step": 439100 + }, + { + "epoch": 0.8870299817790294, + "grad_norm": 365.5875244140625, + "learning_rate": 4.449968316497721e-07, + "loss": 15.9574, + "step": 439110 + }, + { + "epoch": 0.8870501824117132, + "grad_norm": 398.55133056640625, + "learning_rate": 4.448528862644458e-07, + "loss": 12.7694, + "step": 439120 + }, + { + "epoch": 0.887070383044397, + "grad_norm": 465.5559387207031, + "learning_rate": 4.447089630803536e-07, + "loss": 25.2559, + "step": 439130 + }, + { + "epoch": 0.8870905836770807, + "grad_norm": 1330.308837890625, + "learning_rate": 4.445650620981984e-07, + "loss": 17.1388, + "step": 439140 + }, + { + "epoch": 0.8871107843097645, + "grad_norm": 277.4283752441406, + "learning_rate": 4.444211833186807e-07, + "loss": 15.4694, + "step": 439150 + }, + { + "epoch": 0.8871309849424484, + "grad_norm": 719.4064331054688, + "learning_rate": 4.4427732674250045e-07, + "loss": 19.9625, + "step": 439160 + }, + { + "epoch": 0.8871511855751322, + "grad_norm": 396.2528076171875, + "learning_rate": 4.4413349237036e-07, + "loss": 15.4088, + "step": 439170 + }, + { + "epoch": 0.887171386207816, + "grad_norm": 250.67013549804688, + "learning_rate": 4.4398968020296143e-07, + "loss": 20.414, + "step": 439180 + }, + { + "epoch": 0.8871915868404998, + "grad_norm": 535.2349853515625, + "learning_rate": 4.4384589024100423e-07, + "loss": 28.5496, + "step": 439190 + }, + { + "epoch": 0.8872117874731836, + "grad_norm": 499.7408447265625, + "learning_rate": 4.4370212248518895e-07, + "loss": 18.146, + "step": 439200 + }, + { + "epoch": 0.8872319881058675, + "grad_norm": 579.1659545898438, + "learning_rate": 4.4355837693621786e-07, + "loss": 22.3271, + "step": 439210 + }, + { + "epoch": 0.8872521887385513, + "grad_norm": 432.2957458496094, + "learning_rate": 4.434146535947903e-07, + "loss": 20.5266, + "step": 439220 + }, + { + "epoch": 0.8872723893712351, + "grad_norm": 116.518310546875, + "learning_rate": 4.4327095246160636e-07, + "loss": 15.6317, + "step": 439230 + }, + { + "epoch": 0.8872925900039189, + "grad_norm": 329.53875732421875, + "learning_rate": 4.4312727353736816e-07, + "loss": 48.8665, + "step": 439240 + }, + { + "epoch": 0.8873127906366027, + "grad_norm": 527.8027954101562, + "learning_rate": 4.4298361682277355e-07, + "loss": 17.8723, + "step": 439250 + }, + { + "epoch": 0.8873329912692866, + "grad_norm": 389.74847412109375, + "learning_rate": 4.428399823185253e-07, + "loss": 16.563, + "step": 439260 + }, + { + "epoch": 0.8873531919019704, + "grad_norm": 277.07861328125, + "learning_rate": 4.426963700253223e-07, + "loss": 19.9283, + "step": 439270 + }, + { + "epoch": 0.8873733925346542, + "grad_norm": 542.4605712890625, + "learning_rate": 4.425527799438639e-07, + "loss": 12.7354, + "step": 439280 + }, + { + "epoch": 0.887393593167338, + "grad_norm": 369.82061767578125, + "learning_rate": 4.4240921207485077e-07, + "loss": 30.9794, + "step": 439290 + }, + { + "epoch": 0.8874137938000218, + "grad_norm": 419.03533935546875, + "learning_rate": 4.4226566641898173e-07, + "loss": 10.5437, + "step": 439300 + }, + { + "epoch": 0.8874339944327057, + "grad_norm": 9.967693328857422, + "learning_rate": 4.421221429769579e-07, + "loss": 13.8394, + "step": 439310 + }, + { + "epoch": 0.8874541950653895, + "grad_norm": 428.9529113769531, + "learning_rate": 4.4197864174947755e-07, + "loss": 11.1389, + "step": 439320 + }, + { + "epoch": 0.8874743956980733, + "grad_norm": 293.9654235839844, + "learning_rate": 4.418351627372408e-07, + "loss": 20.6097, + "step": 439330 + }, + { + "epoch": 0.8874945963307571, + "grad_norm": 1044.8182373046875, + "learning_rate": 4.416917059409465e-07, + "loss": 22.0248, + "step": 439340 + }, + { + "epoch": 0.8875147969634409, + "grad_norm": 149.45436096191406, + "learning_rate": 4.415482713612934e-07, + "loss": 15.2085, + "step": 439350 + }, + { + "epoch": 0.8875349975961248, + "grad_norm": 171.68978881835938, + "learning_rate": 4.414048589989822e-07, + "loss": 14.8334, + "step": 439360 + }, + { + "epoch": 0.8875551982288086, + "grad_norm": 443.9935607910156, + "learning_rate": 4.4126146885471067e-07, + "loss": 22.9411, + "step": 439370 + }, + { + "epoch": 0.8875753988614924, + "grad_norm": 236.66659545898438, + "learning_rate": 4.411181009291765e-07, + "loss": 28.307, + "step": 439380 + }, + { + "epoch": 0.8875955994941762, + "grad_norm": 205.0904541015625, + "learning_rate": 4.409747552230803e-07, + "loss": 13.7862, + "step": 439390 + }, + { + "epoch": 0.8876158001268599, + "grad_norm": 697.9765625, + "learning_rate": 4.4083143173712207e-07, + "loss": 15.5063, + "step": 439400 + }, + { + "epoch": 0.8876360007595437, + "grad_norm": 472.9891052246094, + "learning_rate": 4.406881304719962e-07, + "loss": 16.3077, + "step": 439410 + }, + { + "epoch": 0.8876562013922276, + "grad_norm": 50.7934684753418, + "learning_rate": 4.405448514284039e-07, + "loss": 32.6937, + "step": 439420 + }, + { + "epoch": 0.8876764020249114, + "grad_norm": 613.9843139648438, + "learning_rate": 4.404015946070439e-07, + "loss": 22.2959, + "step": 439430 + }, + { + "epoch": 0.8876966026575952, + "grad_norm": 435.0783996582031, + "learning_rate": 4.40258360008613e-07, + "loss": 27.7951, + "step": 439440 + }, + { + "epoch": 0.887716803290279, + "grad_norm": 777.6680908203125, + "learning_rate": 4.401151476338095e-07, + "loss": 42.459, + "step": 439450 + }, + { + "epoch": 0.8877370039229628, + "grad_norm": 434.35491943359375, + "learning_rate": 4.3997195748333113e-07, + "loss": 11.0759, + "step": 439460 + }, + { + "epoch": 0.8877572045556467, + "grad_norm": 22.04349136352539, + "learning_rate": 4.3982878955787844e-07, + "loss": 14.6108, + "step": 439470 + }, + { + "epoch": 0.8877774051883305, + "grad_norm": 79.72455596923828, + "learning_rate": 4.396856438581454e-07, + "loss": 32.3698, + "step": 439480 + }, + { + "epoch": 0.8877976058210143, + "grad_norm": 269.5208435058594, + "learning_rate": 4.395425203848314e-07, + "loss": 16.2407, + "step": 439490 + }, + { + "epoch": 0.8878178064536981, + "grad_norm": 467.6042785644531, + "learning_rate": 4.3939941913863525e-07, + "loss": 17.5712, + "step": 439500 + }, + { + "epoch": 0.8878380070863819, + "grad_norm": 170.37184143066406, + "learning_rate": 4.392563401202526e-07, + "loss": 27.6455, + "step": 439510 + }, + { + "epoch": 0.8878582077190658, + "grad_norm": 74.18937683105469, + "learning_rate": 4.391132833303807e-07, + "loss": 19.3106, + "step": 439520 + }, + { + "epoch": 0.8878784083517496, + "grad_norm": 343.9047546386719, + "learning_rate": 4.389702487697189e-07, + "loss": 12.2741, + "step": 439530 + }, + { + "epoch": 0.8878986089844334, + "grad_norm": 33.56159973144531, + "learning_rate": 4.388272364389623e-07, + "loss": 12.6946, + "step": 439540 + }, + { + "epoch": 0.8879188096171172, + "grad_norm": 324.51470947265625, + "learning_rate": 4.38684246338808e-07, + "loss": 22.468, + "step": 439550 + }, + { + "epoch": 0.887939010249801, + "grad_norm": 296.24420166015625, + "learning_rate": 4.385412784699544e-07, + "loss": 32.3633, + "step": 439560 + }, + { + "epoch": 0.8879592108824849, + "grad_norm": 791.6322021484375, + "learning_rate": 4.3839833283309597e-07, + "loss": 28.1926, + "step": 439570 + }, + { + "epoch": 0.8879794115151687, + "grad_norm": 354.2049255371094, + "learning_rate": 4.3825540942893206e-07, + "loss": 19.6999, + "step": 439580 + }, + { + "epoch": 0.8879996121478525, + "grad_norm": 276.34783935546875, + "learning_rate": 4.381125082581583e-07, + "loss": 20.3004, + "step": 439590 + }, + { + "epoch": 0.8880198127805363, + "grad_norm": 525.9163818359375, + "learning_rate": 4.379696293214697e-07, + "loss": 21.0685, + "step": 439600 + }, + { + "epoch": 0.8880400134132201, + "grad_norm": 405.0637512207031, + "learning_rate": 4.378267726195645e-07, + "loss": 16.6269, + "step": 439610 + }, + { + "epoch": 0.888060214045904, + "grad_norm": 875.94873046875, + "learning_rate": 4.3768393815313723e-07, + "loss": 20.4529, + "step": 439620 + }, + { + "epoch": 0.8880804146785878, + "grad_norm": 78.52611541748047, + "learning_rate": 4.375411259228868e-07, + "loss": 16.3166, + "step": 439630 + }, + { + "epoch": 0.8881006153112716, + "grad_norm": 154.6929931640625, + "learning_rate": 4.373983359295059e-07, + "loss": 18.9563, + "step": 439640 + }, + { + "epoch": 0.8881208159439553, + "grad_norm": 183.49374389648438, + "learning_rate": 4.372555681736934e-07, + "loss": 23.9325, + "step": 439650 + }, + { + "epoch": 0.8881410165766391, + "grad_norm": 517.3644409179688, + "learning_rate": 4.3711282265614385e-07, + "loss": 17.1077, + "step": 439660 + }, + { + "epoch": 0.888161217209323, + "grad_norm": 266.4991455078125, + "learning_rate": 4.369700993775522e-07, + "loss": 11.7718, + "step": 439670 + }, + { + "epoch": 0.8881814178420068, + "grad_norm": 103.00946807861328, + "learning_rate": 4.368273983386157e-07, + "loss": 28.2479, + "step": 439680 + }, + { + "epoch": 0.8882016184746906, + "grad_norm": 126.4054946899414, + "learning_rate": 4.3668471954002864e-07, + "loss": 15.4523, + "step": 439690 + }, + { + "epoch": 0.8882218191073744, + "grad_norm": 629.47265625, + "learning_rate": 4.3654206298248625e-07, + "loss": 17.5671, + "step": 439700 + }, + { + "epoch": 0.8882420197400582, + "grad_norm": 250.27529907226562, + "learning_rate": 4.363994286666845e-07, + "loss": 20.4415, + "step": 439710 + }, + { + "epoch": 0.888262220372742, + "grad_norm": 315.4422607421875, + "learning_rate": 4.3625681659331895e-07, + "loss": 18.5077, + "step": 439720 + }, + { + "epoch": 0.8882824210054259, + "grad_norm": 205.74395751953125, + "learning_rate": 4.3611422676308413e-07, + "loss": 19.0239, + "step": 439730 + }, + { + "epoch": 0.8883026216381097, + "grad_norm": 144.6514892578125, + "learning_rate": 4.359716591766744e-07, + "loss": 10.1121, + "step": 439740 + }, + { + "epoch": 0.8883228222707935, + "grad_norm": 94.45883178710938, + "learning_rate": 4.3582911383478646e-07, + "loss": 23.2051, + "step": 439750 + }, + { + "epoch": 0.8883430229034773, + "grad_norm": 108.36637878417969, + "learning_rate": 4.3568659073811306e-07, + "loss": 14.929, + "step": 439760 + }, + { + "epoch": 0.8883632235361612, + "grad_norm": 606.2430419921875, + "learning_rate": 4.355440898873492e-07, + "loss": 10.0484, + "step": 439770 + }, + { + "epoch": 0.888383424168845, + "grad_norm": 298.37127685546875, + "learning_rate": 4.354016112831899e-07, + "loss": 7.4637, + "step": 439780 + }, + { + "epoch": 0.8884036248015288, + "grad_norm": 760.9266357421875, + "learning_rate": 4.352591549263302e-07, + "loss": 12.9479, + "step": 439790 + }, + { + "epoch": 0.8884238254342126, + "grad_norm": 621.0365600585938, + "learning_rate": 4.3511672081746393e-07, + "loss": 14.3445, + "step": 439800 + }, + { + "epoch": 0.8884440260668964, + "grad_norm": 407.5010070800781, + "learning_rate": 4.3497430895728444e-07, + "loss": 15.4422, + "step": 439810 + }, + { + "epoch": 0.8884642266995803, + "grad_norm": 734.0892333984375, + "learning_rate": 4.348319193464867e-07, + "loss": 36.5453, + "step": 439820 + }, + { + "epoch": 0.8884844273322641, + "grad_norm": 274.6903991699219, + "learning_rate": 4.3468955198576524e-07, + "loss": 13.928, + "step": 439830 + }, + { + "epoch": 0.8885046279649479, + "grad_norm": 403.5230712890625, + "learning_rate": 4.3454720687581165e-07, + "loss": 23.6129, + "step": 439840 + }, + { + "epoch": 0.8885248285976317, + "grad_norm": 741.6392211914062, + "learning_rate": 4.344048840173226e-07, + "loss": 13.0167, + "step": 439850 + }, + { + "epoch": 0.8885450292303155, + "grad_norm": 252.17994689941406, + "learning_rate": 4.3426258341098925e-07, + "loss": 6.1833, + "step": 439860 + }, + { + "epoch": 0.8885652298629994, + "grad_norm": 793.151611328125, + "learning_rate": 4.341203050575077e-07, + "loss": 20.799, + "step": 439870 + }, + { + "epoch": 0.8885854304956832, + "grad_norm": 447.8883056640625, + "learning_rate": 4.3397804895756957e-07, + "loss": 25.9996, + "step": 439880 + }, + { + "epoch": 0.888605631128367, + "grad_norm": 0.031039610505104065, + "learning_rate": 4.338358151118677e-07, + "loss": 9.6498, + "step": 439890 + }, + { + "epoch": 0.8886258317610508, + "grad_norm": 219.12828063964844, + "learning_rate": 4.33693603521097e-07, + "loss": 6.6652, + "step": 439900 + }, + { + "epoch": 0.8886460323937345, + "grad_norm": 318.5906677246094, + "learning_rate": 4.3355141418594926e-07, + "loss": 21.3146, + "step": 439910 + }, + { + "epoch": 0.8886662330264183, + "grad_norm": 336.47918701171875, + "learning_rate": 4.334092471071194e-07, + "loss": 17.1244, + "step": 439920 + }, + { + "epoch": 0.8886864336591022, + "grad_norm": 483.5345764160156, + "learning_rate": 4.3326710228529746e-07, + "loss": 16.6284, + "step": 439930 + }, + { + "epoch": 0.888706634291786, + "grad_norm": 651.7122192382812, + "learning_rate": 4.3312497972117895e-07, + "loss": 16.1954, + "step": 439940 + }, + { + "epoch": 0.8887268349244698, + "grad_norm": 283.4456481933594, + "learning_rate": 4.32982879415455e-07, + "loss": 19.7755, + "step": 439950 + }, + { + "epoch": 0.8887470355571536, + "grad_norm": 185.06582641601562, + "learning_rate": 4.3284080136881847e-07, + "loss": 25.6656, + "step": 439960 + }, + { + "epoch": 0.8887672361898374, + "grad_norm": 255.2400360107422, + "learning_rate": 4.32698745581962e-07, + "loss": 14.7366, + "step": 439970 + }, + { + "epoch": 0.8887874368225213, + "grad_norm": 445.416015625, + "learning_rate": 4.325567120555785e-07, + "loss": 11.0359, + "step": 439980 + }, + { + "epoch": 0.8888076374552051, + "grad_norm": 338.63629150390625, + "learning_rate": 4.324147007903584e-07, + "loss": 17.1912, + "step": 439990 + }, + { + "epoch": 0.8888278380878889, + "grad_norm": 143.6701202392578, + "learning_rate": 4.322727117869951e-07, + "loss": 9.3195, + "step": 440000 + }, + { + "epoch": 0.8888480387205727, + "grad_norm": 117.09489440917969, + "learning_rate": 4.3213074504618256e-07, + "loss": 12.1255, + "step": 440010 + }, + { + "epoch": 0.8888682393532565, + "grad_norm": 437.0494689941406, + "learning_rate": 4.31988800568609e-07, + "loss": 21.9564, + "step": 440020 + }, + { + "epoch": 0.8888884399859404, + "grad_norm": 12.793342590332031, + "learning_rate": 4.3184687835496784e-07, + "loss": 21.4166, + "step": 440030 + }, + { + "epoch": 0.8889086406186242, + "grad_norm": 170.0322723388672, + "learning_rate": 4.317049784059518e-07, + "loss": 12.801, + "step": 440040 + }, + { + "epoch": 0.888928841251308, + "grad_norm": 349.74859619140625, + "learning_rate": 4.315631007222515e-07, + "loss": 14.7523, + "step": 440050 + }, + { + "epoch": 0.8889490418839918, + "grad_norm": 217.57176208496094, + "learning_rate": 4.31421245304558e-07, + "loss": 19.6759, + "step": 440060 + }, + { + "epoch": 0.8889692425166756, + "grad_norm": 264.9936828613281, + "learning_rate": 4.3127941215356296e-07, + "loss": 18.8373, + "step": 440070 + }, + { + "epoch": 0.8889894431493595, + "grad_norm": 885.3839721679688, + "learning_rate": 4.3113760126995974e-07, + "loss": 22.7676, + "step": 440080 + }, + { + "epoch": 0.8890096437820433, + "grad_norm": 178.55599975585938, + "learning_rate": 4.309958126544361e-07, + "loss": 19.6819, + "step": 440090 + }, + { + "epoch": 0.8890298444147271, + "grad_norm": 23.563514709472656, + "learning_rate": 4.308540463076849e-07, + "loss": 8.1549, + "step": 440100 + }, + { + "epoch": 0.8890500450474109, + "grad_norm": 475.083251953125, + "learning_rate": 4.3071230223039774e-07, + "loss": 22.2255, + "step": 440110 + }, + { + "epoch": 0.8890702456800947, + "grad_norm": 310.59912109375, + "learning_rate": 4.3057058042326407e-07, + "loss": 18.2259, + "step": 440120 + }, + { + "epoch": 0.8890904463127786, + "grad_norm": 249.81739807128906, + "learning_rate": 4.30428880886975e-07, + "loss": 9.7312, + "step": 440130 + }, + { + "epoch": 0.8891106469454624, + "grad_norm": 157.1270751953125, + "learning_rate": 4.3028720362222166e-07, + "loss": 22.6873, + "step": 440140 + }, + { + "epoch": 0.8891308475781462, + "grad_norm": 368.5738220214844, + "learning_rate": 4.301455486296946e-07, + "loss": 21.0228, + "step": 440150 + }, + { + "epoch": 0.8891510482108299, + "grad_norm": 474.0268249511719, + "learning_rate": 4.300039159100827e-07, + "loss": 18.5053, + "step": 440160 + }, + { + "epoch": 0.8891712488435137, + "grad_norm": 402.1085510253906, + "learning_rate": 4.298623054640788e-07, + "loss": 14.8153, + "step": 440170 + }, + { + "epoch": 0.8891914494761975, + "grad_norm": 357.016357421875, + "learning_rate": 4.2972071729237065e-07, + "loss": 11.0594, + "step": 440180 + }, + { + "epoch": 0.8892116501088814, + "grad_norm": 11.80178165435791, + "learning_rate": 4.295791513956504e-07, + "loss": 18.0563, + "step": 440190 + }, + { + "epoch": 0.8892318507415652, + "grad_norm": 288.9800109863281, + "learning_rate": 4.29437607774606e-07, + "loss": 17.9754, + "step": 440200 + }, + { + "epoch": 0.889252051374249, + "grad_norm": 327.2669677734375, + "learning_rate": 4.2929608642992894e-07, + "loss": 28.796, + "step": 440210 + }, + { + "epoch": 0.8892722520069328, + "grad_norm": 452.38348388671875, + "learning_rate": 4.291545873623087e-07, + "loss": 16.7511, + "step": 440220 + }, + { + "epoch": 0.8892924526396166, + "grad_norm": 530.0560302734375, + "learning_rate": 4.2901311057243377e-07, + "loss": 10.8369, + "step": 440230 + }, + { + "epoch": 0.8893126532723005, + "grad_norm": 489.7251892089844, + "learning_rate": 4.2887165606099513e-07, + "loss": 18.7304, + "step": 440240 + }, + { + "epoch": 0.8893328539049843, + "grad_norm": 356.1235656738281, + "learning_rate": 4.2873022382868115e-07, + "loss": 18.1181, + "step": 440250 + }, + { + "epoch": 0.8893530545376681, + "grad_norm": 407.93499755859375, + "learning_rate": 4.2858881387618235e-07, + "loss": 16.1849, + "step": 440260 + }, + { + "epoch": 0.8893732551703519, + "grad_norm": 684.4132080078125, + "learning_rate": 4.284474262041871e-07, + "loss": 24.9747, + "step": 440270 + }, + { + "epoch": 0.8893934558030357, + "grad_norm": 216.8534698486328, + "learning_rate": 4.283060608133843e-07, + "loss": 17.7716, + "step": 440280 + }, + { + "epoch": 0.8894136564357196, + "grad_norm": 476.00628662109375, + "learning_rate": 4.2816471770446343e-07, + "loss": 14.6045, + "step": 440290 + }, + { + "epoch": 0.8894338570684034, + "grad_norm": 329.6189880371094, + "learning_rate": 4.280233968781139e-07, + "loss": 14.645, + "step": 440300 + }, + { + "epoch": 0.8894540577010872, + "grad_norm": 216.16668701171875, + "learning_rate": 4.2788209833502237e-07, + "loss": 15.3206, + "step": 440310 + }, + { + "epoch": 0.889474258333771, + "grad_norm": 514.588134765625, + "learning_rate": 4.277408220758794e-07, + "loss": 20.1569, + "step": 440320 + }, + { + "epoch": 0.8894944589664548, + "grad_norm": 256.3499755859375, + "learning_rate": 4.275995681013745e-07, + "loss": 30.1647, + "step": 440330 + }, + { + "epoch": 0.8895146595991387, + "grad_norm": 737.529296875, + "learning_rate": 4.2745833641219317e-07, + "loss": 16.7913, + "step": 440340 + }, + { + "epoch": 0.8895348602318225, + "grad_norm": 97.90251159667969, + "learning_rate": 4.273171270090254e-07, + "loss": 6.7977, + "step": 440350 + }, + { + "epoch": 0.8895550608645063, + "grad_norm": 356.3243408203125, + "learning_rate": 4.271759398925601e-07, + "loss": 13.1676, + "step": 440360 + }, + { + "epoch": 0.8895752614971901, + "grad_norm": 435.6765441894531, + "learning_rate": 4.270347750634846e-07, + "loss": 18.4452, + "step": 440370 + }, + { + "epoch": 0.889595462129874, + "grad_norm": 663.2234497070312, + "learning_rate": 4.2689363252248595e-07, + "loss": 21.1937, + "step": 440380 + }, + { + "epoch": 0.8896156627625578, + "grad_norm": 155.16162109375, + "learning_rate": 4.2675251227025315e-07, + "loss": 19.0472, + "step": 440390 + }, + { + "epoch": 0.8896358633952416, + "grad_norm": 344.0484313964844, + "learning_rate": 4.266114143074751e-07, + "loss": 10.4041, + "step": 440400 + }, + { + "epoch": 0.8896560640279254, + "grad_norm": 165.12008666992188, + "learning_rate": 4.264703386348384e-07, + "loss": 10.6243, + "step": 440410 + }, + { + "epoch": 0.8896762646606091, + "grad_norm": 223.59613037109375, + "learning_rate": 4.263292852530293e-07, + "loss": 23.7532, + "step": 440420 + }, + { + "epoch": 0.8896964652932929, + "grad_norm": 111.74337768554688, + "learning_rate": 4.261882541627377e-07, + "loss": 7.5019, + "step": 440430 + }, + { + "epoch": 0.8897166659259768, + "grad_norm": 448.3424377441406, + "learning_rate": 4.260472453646497e-07, + "loss": 27.4821, + "step": 440440 + }, + { + "epoch": 0.8897368665586606, + "grad_norm": 318.8260192871094, + "learning_rate": 4.2590625885945205e-07, + "loss": 18.8889, + "step": 440450 + }, + { + "epoch": 0.8897570671913444, + "grad_norm": 331.6166687011719, + "learning_rate": 4.25765294647833e-07, + "loss": 29.4402, + "step": 440460 + }, + { + "epoch": 0.8897772678240282, + "grad_norm": 598.9876708984375, + "learning_rate": 4.256243527304782e-07, + "loss": 18.679, + "step": 440470 + }, + { + "epoch": 0.889797468456712, + "grad_norm": 435.41290283203125, + "learning_rate": 4.2548343310807704e-07, + "loss": 17.4909, + "step": 440480 + }, + { + "epoch": 0.8898176690893959, + "grad_norm": 481.29132080078125, + "learning_rate": 4.25342535781314e-07, + "loss": 17.0876, + "step": 440490 + }, + { + "epoch": 0.8898378697220797, + "grad_norm": 258.2563781738281, + "learning_rate": 4.2520166075087635e-07, + "loss": 14.378, + "step": 440500 + }, + { + "epoch": 0.8898580703547635, + "grad_norm": 452.7650451660156, + "learning_rate": 4.250608080174512e-07, + "loss": 19.2819, + "step": 440510 + }, + { + "epoch": 0.8898782709874473, + "grad_norm": 223.28135681152344, + "learning_rate": 4.249199775817242e-07, + "loss": 16.1098, + "step": 440520 + }, + { + "epoch": 0.8898984716201311, + "grad_norm": 80.29216003417969, + "learning_rate": 4.247791694443837e-07, + "loss": 23.5703, + "step": 440530 + }, + { + "epoch": 0.889918672252815, + "grad_norm": 555.3062744140625, + "learning_rate": 4.24638383606113e-07, + "loss": 22.0077, + "step": 440540 + }, + { + "epoch": 0.8899388728854988, + "grad_norm": 1100.7147216796875, + "learning_rate": 4.24497620067601e-07, + "loss": 39.7739, + "step": 440550 + }, + { + "epoch": 0.8899590735181826, + "grad_norm": 675.494873046875, + "learning_rate": 4.2435687882953327e-07, + "loss": 17.158, + "step": 440560 + }, + { + "epoch": 0.8899792741508664, + "grad_norm": 292.1600341796875, + "learning_rate": 4.242161598925937e-07, + "loss": 15.5541, + "step": 440570 + }, + { + "epoch": 0.8899994747835502, + "grad_norm": 274.9527282714844, + "learning_rate": 4.240754632574706e-07, + "loss": 27.1441, + "step": 440580 + }, + { + "epoch": 0.8900196754162341, + "grad_norm": 48.35358428955078, + "learning_rate": 4.239347889248485e-07, + "loss": 13.2044, + "step": 440590 + }, + { + "epoch": 0.8900398760489179, + "grad_norm": 235.7996368408203, + "learning_rate": 4.237941368954124e-07, + "loss": 22.3997, + "step": 440600 + }, + { + "epoch": 0.8900600766816017, + "grad_norm": 11.193168640136719, + "learning_rate": 4.236535071698489e-07, + "loss": 17.1348, + "step": 440610 + }, + { + "epoch": 0.8900802773142855, + "grad_norm": 379.49041748046875, + "learning_rate": 4.2351289974884467e-07, + "loss": 23.3766, + "step": 440620 + }, + { + "epoch": 0.8901004779469693, + "grad_norm": 255.96241760253906, + "learning_rate": 4.2337231463308147e-07, + "loss": 14.9062, + "step": 440630 + }, + { + "epoch": 0.8901206785796532, + "grad_norm": 757.0723876953125, + "learning_rate": 4.2323175182324706e-07, + "loss": 29.2287, + "step": 440640 + }, + { + "epoch": 0.890140879212337, + "grad_norm": 357.3234558105469, + "learning_rate": 4.2309121132002695e-07, + "loss": 11.862, + "step": 440650 + }, + { + "epoch": 0.8901610798450208, + "grad_norm": 25.004135131835938, + "learning_rate": 4.2295069312410455e-07, + "loss": 18.1373, + "step": 440660 + }, + { + "epoch": 0.8901812804777045, + "grad_norm": 36.95526123046875, + "learning_rate": 4.228101972361648e-07, + "loss": 7.3536, + "step": 440670 + }, + { + "epoch": 0.8902014811103883, + "grad_norm": 207.34742736816406, + "learning_rate": 4.226697236568933e-07, + "loss": 16.4848, + "step": 440680 + }, + { + "epoch": 0.8902216817430721, + "grad_norm": 526.7403564453125, + "learning_rate": 4.225292723869762e-07, + "loss": 20.7376, + "step": 440690 + }, + { + "epoch": 0.890241882375756, + "grad_norm": 572.5574951171875, + "learning_rate": 4.2238884342709397e-07, + "loss": 23.5109, + "step": 440700 + }, + { + "epoch": 0.8902620830084398, + "grad_norm": 486.1275329589844, + "learning_rate": 4.222484367779334e-07, + "loss": 37.0887, + "step": 440710 + }, + { + "epoch": 0.8902822836411236, + "grad_norm": 314.53375244140625, + "learning_rate": 4.2210805244017993e-07, + "loss": 24.3133, + "step": 440720 + }, + { + "epoch": 0.8903024842738074, + "grad_norm": 374.93328857421875, + "learning_rate": 4.219676904145165e-07, + "loss": 31.6426, + "step": 440730 + }, + { + "epoch": 0.8903226849064912, + "grad_norm": 113.07835388183594, + "learning_rate": 4.218273507016263e-07, + "loss": 32.312, + "step": 440740 + }, + { + "epoch": 0.8903428855391751, + "grad_norm": 478.1295471191406, + "learning_rate": 4.2168703330219494e-07, + "loss": 22.5987, + "step": 440750 + }, + { + "epoch": 0.8903630861718589, + "grad_norm": 360.760498046875, + "learning_rate": 4.2154673821690585e-07, + "loss": 22.2875, + "step": 440760 + }, + { + "epoch": 0.8903832868045427, + "grad_norm": 177.20535278320312, + "learning_rate": 4.2140646544644227e-07, + "loss": 16.4268, + "step": 440770 + }, + { + "epoch": 0.8904034874372265, + "grad_norm": 525.95361328125, + "learning_rate": 4.212662149914887e-07, + "loss": 13.4369, + "step": 440780 + }, + { + "epoch": 0.8904236880699103, + "grad_norm": 216.5105438232422, + "learning_rate": 4.211259868527273e-07, + "loss": 27.3728, + "step": 440790 + }, + { + "epoch": 0.8904438887025942, + "grad_norm": 634.1834716796875, + "learning_rate": 4.2098578103084376e-07, + "loss": 22.5864, + "step": 440800 + }, + { + "epoch": 0.890464089335278, + "grad_norm": 52.97663116455078, + "learning_rate": 4.208455975265191e-07, + "loss": 18.4716, + "step": 440810 + }, + { + "epoch": 0.8904842899679618, + "grad_norm": 56.63849639892578, + "learning_rate": 4.2070543634043834e-07, + "loss": 9.3955, + "step": 440820 + }, + { + "epoch": 0.8905044906006456, + "grad_norm": 550.6646118164062, + "learning_rate": 4.205652974732838e-07, + "loss": 18.9752, + "step": 440830 + }, + { + "epoch": 0.8905246912333294, + "grad_norm": 559.0628051757812, + "learning_rate": 4.2042518092573814e-07, + "loss": 27.9919, + "step": 440840 + }, + { + "epoch": 0.8905448918660133, + "grad_norm": 128.8303680419922, + "learning_rate": 4.202850866984853e-07, + "loss": 27.2158, + "step": 440850 + }, + { + "epoch": 0.8905650924986971, + "grad_norm": 343.2501220703125, + "learning_rate": 4.201450147922065e-07, + "loss": 16.7919, + "step": 440860 + }, + { + "epoch": 0.8905852931313809, + "grad_norm": 185.21302795410156, + "learning_rate": 4.200049652075866e-07, + "loss": 20.326, + "step": 440870 + }, + { + "epoch": 0.8906054937640647, + "grad_norm": 244.53485107421875, + "learning_rate": 4.198649379453068e-07, + "loss": 12.922, + "step": 440880 + }, + { + "epoch": 0.8906256943967485, + "grad_norm": 532.2025756835938, + "learning_rate": 4.1972493300604877e-07, + "loss": 22.2626, + "step": 440890 + }, + { + "epoch": 0.8906458950294324, + "grad_norm": 89.53042602539062, + "learning_rate": 4.195849503904975e-07, + "loss": 12.9347, + "step": 440900 + }, + { + "epoch": 0.8906660956621162, + "grad_norm": 171.7433624267578, + "learning_rate": 4.1944499009933303e-07, + "loss": 8.9002, + "step": 440910 + }, + { + "epoch": 0.8906862962948, + "grad_norm": 118.01786041259766, + "learning_rate": 4.19305052133237e-07, + "loss": 7.3701, + "step": 440920 + }, + { + "epoch": 0.8907064969274837, + "grad_norm": 339.9176940917969, + "learning_rate": 4.1916513649289334e-07, + "loss": 17.031, + "step": 440930 + }, + { + "epoch": 0.8907266975601675, + "grad_norm": 315.4590759277344, + "learning_rate": 4.1902524317898427e-07, + "loss": 20.6522, + "step": 440940 + }, + { + "epoch": 0.8907468981928514, + "grad_norm": 231.2949676513672, + "learning_rate": 4.188853721921893e-07, + "loss": 17.2781, + "step": 440950 + }, + { + "epoch": 0.8907670988255352, + "grad_norm": 404.14935302734375, + "learning_rate": 4.1874552353319107e-07, + "loss": 20.6696, + "step": 440960 + }, + { + "epoch": 0.890787299458219, + "grad_norm": 1402.7301025390625, + "learning_rate": 4.186056972026725e-07, + "loss": 37.9456, + "step": 440970 + }, + { + "epoch": 0.8908075000909028, + "grad_norm": 425.55401611328125, + "learning_rate": 4.1846589320131415e-07, + "loss": 15.6599, + "step": 440980 + }, + { + "epoch": 0.8908277007235866, + "grad_norm": 712.2496337890625, + "learning_rate": 4.1832611152979655e-07, + "loss": 21.0699, + "step": 440990 + }, + { + "epoch": 0.8908479013562705, + "grad_norm": 487.39239501953125, + "learning_rate": 4.1818635218880186e-07, + "loss": 17.5509, + "step": 441000 + }, + { + "epoch": 0.8908681019889543, + "grad_norm": 75.24030303955078, + "learning_rate": 4.1804661517901244e-07, + "loss": 41.9425, + "step": 441010 + }, + { + "epoch": 0.8908883026216381, + "grad_norm": 488.7297058105469, + "learning_rate": 4.179069005011066e-07, + "loss": 14.4235, + "step": 441020 + }, + { + "epoch": 0.8909085032543219, + "grad_norm": 662.9674072265625, + "learning_rate": 4.177672081557671e-07, + "loss": 12.6769, + "step": 441030 + }, + { + "epoch": 0.8909287038870057, + "grad_norm": 744.5567626953125, + "learning_rate": 4.176275381436751e-07, + "loss": 12.1562, + "step": 441040 + }, + { + "epoch": 0.8909489045196896, + "grad_norm": 169.5181427001953, + "learning_rate": 4.1748789046551055e-07, + "loss": 20.7962, + "step": 441050 + }, + { + "epoch": 0.8909691051523734, + "grad_norm": 230.7576141357422, + "learning_rate": 4.173482651219535e-07, + "loss": 12.6017, + "step": 441060 + }, + { + "epoch": 0.8909893057850572, + "grad_norm": 511.5341491699219, + "learning_rate": 4.1720866211368615e-07, + "loss": 23.1607, + "step": 441070 + }, + { + "epoch": 0.891009506417741, + "grad_norm": 373.091552734375, + "learning_rate": 4.1706908144138804e-07, + "loss": 14.3656, + "step": 441080 + }, + { + "epoch": 0.8910297070504248, + "grad_norm": 284.5761413574219, + "learning_rate": 4.1692952310573854e-07, + "loss": 21.3658, + "step": 441090 + }, + { + "epoch": 0.8910499076831087, + "grad_norm": 570.6357421875, + "learning_rate": 4.1678998710741936e-07, + "loss": 19.4725, + "step": 441100 + }, + { + "epoch": 0.8910701083157925, + "grad_norm": 967.0919189453125, + "learning_rate": 4.1665047344710887e-07, + "loss": 26.8212, + "step": 441110 + }, + { + "epoch": 0.8910903089484763, + "grad_norm": 259.86822509765625, + "learning_rate": 4.1651098212548923e-07, + "loss": 12.6998, + "step": 441120 + }, + { + "epoch": 0.8911105095811601, + "grad_norm": 206.50607299804688, + "learning_rate": 4.163715131432383e-07, + "loss": 10.5511, + "step": 441130 + }, + { + "epoch": 0.8911307102138439, + "grad_norm": 466.2878723144531, + "learning_rate": 4.162320665010372e-07, + "loss": 22.2893, + "step": 441140 + }, + { + "epoch": 0.8911509108465278, + "grad_norm": 11.808757781982422, + "learning_rate": 4.160926421995648e-07, + "loss": 19.4246, + "step": 441150 + }, + { + "epoch": 0.8911711114792116, + "grad_norm": 793.69775390625, + "learning_rate": 4.159532402395011e-07, + "loss": 27.9781, + "step": 441160 + }, + { + "epoch": 0.8911913121118954, + "grad_norm": 0.3489563763141632, + "learning_rate": 4.158138606215256e-07, + "loss": 9.1859, + "step": 441170 + }, + { + "epoch": 0.8912115127445792, + "grad_norm": 0.0, + "learning_rate": 4.1567450334631667e-07, + "loss": 21.4238, + "step": 441180 + }, + { + "epoch": 0.8912317133772629, + "grad_norm": 1309.4345703125, + "learning_rate": 4.155351684145548e-07, + "loss": 15.2316, + "step": 441190 + }, + { + "epoch": 0.8912519140099467, + "grad_norm": 204.02719116210938, + "learning_rate": 4.153958558269189e-07, + "loss": 14.4283, + "step": 441200 + }, + { + "epoch": 0.8912721146426306, + "grad_norm": 98.7807846069336, + "learning_rate": 4.1525656558408624e-07, + "loss": 11.4204, + "step": 441210 + }, + { + "epoch": 0.8912923152753144, + "grad_norm": 407.64825439453125, + "learning_rate": 4.151172976867374e-07, + "loss": 13.8096, + "step": 441220 + }, + { + "epoch": 0.8913125159079982, + "grad_norm": 458.1145324707031, + "learning_rate": 4.149780521355523e-07, + "loss": 14.3765, + "step": 441230 + }, + { + "epoch": 0.891332716540682, + "grad_norm": 275.7145690917969, + "learning_rate": 4.1483882893120606e-07, + "loss": 11.479, + "step": 441240 + }, + { + "epoch": 0.8913529171733658, + "grad_norm": 315.7733154296875, + "learning_rate": 4.146996280743798e-07, + "loss": 28.4609, + "step": 441250 + }, + { + "epoch": 0.8913731178060497, + "grad_norm": 152.9192657470703, + "learning_rate": 4.145604495657518e-07, + "loss": 4.4852, + "step": 441260 + }, + { + "epoch": 0.8913933184387335, + "grad_norm": 29.09087562561035, + "learning_rate": 4.144212934060005e-07, + "loss": 21.8849, + "step": 441270 + }, + { + "epoch": 0.8914135190714173, + "grad_norm": 118.08297729492188, + "learning_rate": 4.142821595958024e-07, + "loss": 24.1273, + "step": 441280 + }, + { + "epoch": 0.8914337197041011, + "grad_norm": 202.7879638671875, + "learning_rate": 4.1414304813583663e-07, + "loss": 25.2596, + "step": 441290 + }, + { + "epoch": 0.8914539203367849, + "grad_norm": 70.14551544189453, + "learning_rate": 4.140039590267836e-07, + "loss": 21.5711, + "step": 441300 + }, + { + "epoch": 0.8914741209694688, + "grad_norm": 55.28828048706055, + "learning_rate": 4.1386489226931723e-07, + "loss": 21.0769, + "step": 441310 + }, + { + "epoch": 0.8914943216021526, + "grad_norm": 189.6409149169922, + "learning_rate": 4.137258478641176e-07, + "loss": 21.3108, + "step": 441320 + }, + { + "epoch": 0.8915145222348364, + "grad_norm": 401.5644836425781, + "learning_rate": 4.135868258118625e-07, + "loss": 12.4659, + "step": 441330 + }, + { + "epoch": 0.8915347228675202, + "grad_norm": 46.09809112548828, + "learning_rate": 4.1344782611322855e-07, + "loss": 21.3478, + "step": 441340 + }, + { + "epoch": 0.891554923500204, + "grad_norm": 237.2362823486328, + "learning_rate": 4.13308848768893e-07, + "loss": 20.2728, + "step": 441350 + }, + { + "epoch": 0.8915751241328879, + "grad_norm": 277.4466857910156, + "learning_rate": 4.1316989377953477e-07, + "loss": 16.9617, + "step": 441360 + }, + { + "epoch": 0.8915953247655717, + "grad_norm": 219.892822265625, + "learning_rate": 4.1303096114583e-07, + "loss": 19.2814, + "step": 441370 + }, + { + "epoch": 0.8916155253982555, + "grad_norm": 448.60009765625, + "learning_rate": 4.128920508684553e-07, + "loss": 24.5058, + "step": 441380 + }, + { + "epoch": 0.8916357260309393, + "grad_norm": 351.42901611328125, + "learning_rate": 4.127531629480891e-07, + "loss": 19.3832, + "step": 441390 + }, + { + "epoch": 0.8916559266636231, + "grad_norm": 642.4378662109375, + "learning_rate": 4.1261429738540694e-07, + "loss": 31.0099, + "step": 441400 + }, + { + "epoch": 0.891676127296307, + "grad_norm": 496.5330810546875, + "learning_rate": 4.1247545418108715e-07, + "loss": 21.5189, + "step": 441410 + }, + { + "epoch": 0.8916963279289908, + "grad_norm": 206.7541046142578, + "learning_rate": 4.1233663333580474e-07, + "loss": 10.9033, + "step": 441420 + }, + { + "epoch": 0.8917165285616746, + "grad_norm": 428.3406677246094, + "learning_rate": 4.121978348502381e-07, + "loss": 27.4461, + "step": 441430 + }, + { + "epoch": 0.8917367291943583, + "grad_norm": 265.78924560546875, + "learning_rate": 4.1205905872506224e-07, + "loss": 17.8106, + "step": 441440 + }, + { + "epoch": 0.8917569298270421, + "grad_norm": 439.1598815917969, + "learning_rate": 4.119203049609538e-07, + "loss": 19.014, + "step": 441450 + }, + { + "epoch": 0.891777130459726, + "grad_norm": 4.566817760467529, + "learning_rate": 4.1178157355859005e-07, + "loss": 13.6683, + "step": 441460 + }, + { + "epoch": 0.8917973310924098, + "grad_norm": 406.9350891113281, + "learning_rate": 4.1164286451864543e-07, + "loss": 17.9122, + "step": 441470 + }, + { + "epoch": 0.8918175317250936, + "grad_norm": 272.4143371582031, + "learning_rate": 4.1150417784179776e-07, + "loss": 21.5863, + "step": 441480 + }, + { + "epoch": 0.8918377323577774, + "grad_norm": 664.770263671875, + "learning_rate": 4.1136551352872256e-07, + "loss": 16.9244, + "step": 441490 + }, + { + "epoch": 0.8918579329904612, + "grad_norm": 657.5719604492188, + "learning_rate": 4.112268715800943e-07, + "loss": 23.7679, + "step": 441500 + }, + { + "epoch": 0.891878133623145, + "grad_norm": 150.0564727783203, + "learning_rate": 4.1108825199659087e-07, + "loss": 16.1455, + "step": 441510 + }, + { + "epoch": 0.8918983342558289, + "grad_norm": 410.1813659667969, + "learning_rate": 4.1094965477888605e-07, + "loss": 17.7182, + "step": 441520 + }, + { + "epoch": 0.8919185348885127, + "grad_norm": 194.68238830566406, + "learning_rate": 4.1081107992765546e-07, + "loss": 17.0886, + "step": 441530 + }, + { + "epoch": 0.8919387355211965, + "grad_norm": 333.56903076171875, + "learning_rate": 4.1067252744357524e-07, + "loss": 17.374, + "step": 441540 + }, + { + "epoch": 0.8919589361538803, + "grad_norm": 348.90771484375, + "learning_rate": 4.10533997327322e-07, + "loss": 10.7891, + "step": 441550 + }, + { + "epoch": 0.8919791367865642, + "grad_norm": 272.50164794921875, + "learning_rate": 4.1039548957956807e-07, + "loss": 16.9159, + "step": 441560 + }, + { + "epoch": 0.891999337419248, + "grad_norm": 54.76359558105469, + "learning_rate": 4.102570042009896e-07, + "loss": 17.3863, + "step": 441570 + }, + { + "epoch": 0.8920195380519318, + "grad_norm": 255.55389404296875, + "learning_rate": 4.101185411922626e-07, + "loss": 15.8582, + "step": 441580 + }, + { + "epoch": 0.8920397386846156, + "grad_norm": 70.89956665039062, + "learning_rate": 4.099801005540616e-07, + "loss": 17.2675, + "step": 441590 + }, + { + "epoch": 0.8920599393172994, + "grad_norm": 311.2065124511719, + "learning_rate": 4.0984168228705934e-07, + "loss": 13.5224, + "step": 441600 + }, + { + "epoch": 0.8920801399499833, + "grad_norm": 296.6814880371094, + "learning_rate": 4.0970328639193255e-07, + "loss": 17.2601, + "step": 441610 + }, + { + "epoch": 0.8921003405826671, + "grad_norm": 348.70703125, + "learning_rate": 4.0956491286935687e-07, + "loss": 40.7721, + "step": 441620 + }, + { + "epoch": 0.8921205412153509, + "grad_norm": 760.1993408203125, + "learning_rate": 4.0942656172000273e-07, + "loss": 19.1502, + "step": 441630 + }, + { + "epoch": 0.8921407418480347, + "grad_norm": 331.21807861328125, + "learning_rate": 4.0928823294454743e-07, + "loss": 27.8179, + "step": 441640 + }, + { + "epoch": 0.8921609424807185, + "grad_norm": 431.77606201171875, + "learning_rate": 4.091499265436649e-07, + "loss": 14.2931, + "step": 441650 + }, + { + "epoch": 0.8921811431134024, + "grad_norm": 814.52001953125, + "learning_rate": 4.0901164251802905e-07, + "loss": 16.915, + "step": 441660 + }, + { + "epoch": 0.8922013437460862, + "grad_norm": 197.07962036132812, + "learning_rate": 4.088733808683132e-07, + "loss": 19.4393, + "step": 441670 + }, + { + "epoch": 0.89222154437877, + "grad_norm": 327.1533203125, + "learning_rate": 4.087351415951918e-07, + "loss": 25.7358, + "step": 441680 + }, + { + "epoch": 0.8922417450114538, + "grad_norm": 617.658935546875, + "learning_rate": 4.085969246993388e-07, + "loss": 21.5491, + "step": 441690 + }, + { + "epoch": 0.8922619456441375, + "grad_norm": 484.689208984375, + "learning_rate": 4.084587301814269e-07, + "loss": 17.0248, + "step": 441700 + }, + { + "epoch": 0.8922821462768213, + "grad_norm": 317.9181823730469, + "learning_rate": 4.0832055804212957e-07, + "loss": 12.5236, + "step": 441710 + }, + { + "epoch": 0.8923023469095052, + "grad_norm": 6.746350288391113, + "learning_rate": 4.081824082821223e-07, + "loss": 11.206, + "step": 441720 + }, + { + "epoch": 0.892322547542189, + "grad_norm": 467.7158203125, + "learning_rate": 4.080442809020774e-07, + "loss": 15.5601, + "step": 441730 + }, + { + "epoch": 0.8923427481748728, + "grad_norm": 175.54934692382812, + "learning_rate": 4.079061759026659e-07, + "loss": 19.0913, + "step": 441740 + }, + { + "epoch": 0.8923629488075566, + "grad_norm": 490.88568115234375, + "learning_rate": 4.0776809328456455e-07, + "loss": 14.066, + "step": 441750 + }, + { + "epoch": 0.8923831494402404, + "grad_norm": 570.3674926757812, + "learning_rate": 4.0763003304844395e-07, + "loss": 12.0989, + "step": 441760 + }, + { + "epoch": 0.8924033500729243, + "grad_norm": 483.3099670410156, + "learning_rate": 4.0749199519497686e-07, + "loss": 14.4682, + "step": 441770 + }, + { + "epoch": 0.8924235507056081, + "grad_norm": 312.3144836425781, + "learning_rate": 4.073539797248377e-07, + "loss": 21.0393, + "step": 441780 + }, + { + "epoch": 0.8924437513382919, + "grad_norm": 95.38319396972656, + "learning_rate": 4.0721598663869764e-07, + "loss": 18.0344, + "step": 441790 + }, + { + "epoch": 0.8924639519709757, + "grad_norm": 414.2052001953125, + "learning_rate": 4.0707801593723006e-07, + "loss": 33.5571, + "step": 441800 + }, + { + "epoch": 0.8924841526036595, + "grad_norm": 379.32366943359375, + "learning_rate": 4.069400676211077e-07, + "loss": 19.56, + "step": 441810 + }, + { + "epoch": 0.8925043532363434, + "grad_norm": 185.80258178710938, + "learning_rate": 4.0680214169100117e-07, + "loss": 14.8039, + "step": 441820 + }, + { + "epoch": 0.8925245538690272, + "grad_norm": 760.356689453125, + "learning_rate": 4.0666423814758436e-07, + "loss": 28.8158, + "step": 441830 + }, + { + "epoch": 0.892544754501711, + "grad_norm": 153.5394287109375, + "learning_rate": 4.065263569915301e-07, + "loss": 16.4005, + "step": 441840 + }, + { + "epoch": 0.8925649551343948, + "grad_norm": 312.73956298828125, + "learning_rate": 4.063884982235078e-07, + "loss": 21.2115, + "step": 441850 + }, + { + "epoch": 0.8925851557670786, + "grad_norm": 129.17160034179688, + "learning_rate": 4.062506618441908e-07, + "loss": 14.3664, + "step": 441860 + }, + { + "epoch": 0.8926053563997625, + "grad_norm": 255.9687042236328, + "learning_rate": 4.06112847854252e-07, + "loss": 28.4689, + "step": 441870 + }, + { + "epoch": 0.8926255570324463, + "grad_norm": 344.6517639160156, + "learning_rate": 4.059750562543618e-07, + "loss": 42.3723, + "step": 441880 + }, + { + "epoch": 0.8926457576651301, + "grad_norm": 280.7841796875, + "learning_rate": 4.05837287045191e-07, + "loss": 14.4088, + "step": 441890 + }, + { + "epoch": 0.8926659582978139, + "grad_norm": 324.40618896484375, + "learning_rate": 4.056995402274122e-07, + "loss": 22.3527, + "step": 441900 + }, + { + "epoch": 0.8926861589304977, + "grad_norm": 194.77801513671875, + "learning_rate": 4.0556181580169885e-07, + "loss": 15.2484, + "step": 441910 + }, + { + "epoch": 0.8927063595631816, + "grad_norm": 433.3874206542969, + "learning_rate": 4.054241137687176e-07, + "loss": 22.2178, + "step": 441920 + }, + { + "epoch": 0.8927265601958654, + "grad_norm": 102.00749969482422, + "learning_rate": 4.052864341291418e-07, + "loss": 11.2987, + "step": 441930 + }, + { + "epoch": 0.8927467608285492, + "grad_norm": 348.9513854980469, + "learning_rate": 4.051487768836443e-07, + "loss": 22.3919, + "step": 441940 + }, + { + "epoch": 0.8927669614612329, + "grad_norm": 229.98158264160156, + "learning_rate": 4.0501114203289395e-07, + "loss": 17.3857, + "step": 441950 + }, + { + "epoch": 0.8927871620939167, + "grad_norm": 416.3073425292969, + "learning_rate": 4.048735295775608e-07, + "loss": 16.0174, + "step": 441960 + }, + { + "epoch": 0.8928073627266006, + "grad_norm": 374.8637390136719, + "learning_rate": 4.0473593951831814e-07, + "loss": 12.0551, + "step": 441970 + }, + { + "epoch": 0.8928275633592844, + "grad_norm": 544.44580078125, + "learning_rate": 4.0459837185583497e-07, + "loss": 10.0, + "step": 441980 + }, + { + "epoch": 0.8928477639919682, + "grad_norm": 578.8648071289062, + "learning_rate": 4.044608265907807e-07, + "loss": 23.768, + "step": 441990 + }, + { + "epoch": 0.892867964624652, + "grad_norm": 93.16361236572266, + "learning_rate": 4.043233037238281e-07, + "loss": 22.4546, + "step": 442000 + }, + { + "epoch": 0.8928881652573358, + "grad_norm": 862.9176025390625, + "learning_rate": 4.041858032556456e-07, + "loss": 19.5332, + "step": 442010 + }, + { + "epoch": 0.8929083658900197, + "grad_norm": 319.9920654296875, + "learning_rate": 4.040483251869054e-07, + "loss": 12.6, + "step": 442020 + }, + { + "epoch": 0.8929285665227035, + "grad_norm": 28.0956974029541, + "learning_rate": 4.0391086951827474e-07, + "loss": 31.4243, + "step": 442030 + }, + { + "epoch": 0.8929487671553873, + "grad_norm": 325.6099548339844, + "learning_rate": 4.0377343625042587e-07, + "loss": 11.6039, + "step": 442040 + }, + { + "epoch": 0.8929689677880711, + "grad_norm": 81.76892852783203, + "learning_rate": 4.0363602538402823e-07, + "loss": 11.1132, + "step": 442050 + }, + { + "epoch": 0.8929891684207549, + "grad_norm": 668.8286743164062, + "learning_rate": 4.034986369197502e-07, + "loss": 26.1809, + "step": 442060 + }, + { + "epoch": 0.8930093690534388, + "grad_norm": 663.0156860351562, + "learning_rate": 4.0336127085826294e-07, + "loss": 22.1773, + "step": 442070 + }, + { + "epoch": 0.8930295696861226, + "grad_norm": 353.7117919921875, + "learning_rate": 4.032239272002347e-07, + "loss": 14.2357, + "step": 442080 + }, + { + "epoch": 0.8930497703188064, + "grad_norm": 571.19384765625, + "learning_rate": 4.030866059463362e-07, + "loss": 24.5401, + "step": 442090 + }, + { + "epoch": 0.8930699709514902, + "grad_norm": 277.9085388183594, + "learning_rate": 4.029493070972362e-07, + "loss": 15.2518, + "step": 442100 + }, + { + "epoch": 0.893090171584174, + "grad_norm": 212.6479949951172, + "learning_rate": 4.0281203065360265e-07, + "loss": 15.6186, + "step": 442110 + }, + { + "epoch": 0.8931103722168579, + "grad_norm": 3.895404100418091, + "learning_rate": 4.026747766161071e-07, + "loss": 14.8128, + "step": 442120 + }, + { + "epoch": 0.8931305728495417, + "grad_norm": 3.034308433532715, + "learning_rate": 4.025375449854163e-07, + "loss": 10.7396, + "step": 442130 + }, + { + "epoch": 0.8931507734822255, + "grad_norm": 376.6468200683594, + "learning_rate": 4.0240033576219974e-07, + "loss": 22.8036, + "step": 442140 + }, + { + "epoch": 0.8931709741149093, + "grad_norm": 591.7930297851562, + "learning_rate": 4.022631489471257e-07, + "loss": 21.005, + "step": 442150 + }, + { + "epoch": 0.8931911747475931, + "grad_norm": 67.9887924194336, + "learning_rate": 4.0212598454086596e-07, + "loss": 17.2758, + "step": 442160 + }, + { + "epoch": 0.893211375380277, + "grad_norm": 611.8180541992188, + "learning_rate": 4.019888425440838e-07, + "loss": 18.577, + "step": 442170 + }, + { + "epoch": 0.8932315760129608, + "grad_norm": 460.660400390625, + "learning_rate": 4.018517229574509e-07, + "loss": 21.4927, + "step": 442180 + }, + { + "epoch": 0.8932517766456446, + "grad_norm": 286.05133056640625, + "learning_rate": 4.0171462578163624e-07, + "loss": 17.3168, + "step": 442190 + }, + { + "epoch": 0.8932719772783284, + "grad_norm": 289.0408020019531, + "learning_rate": 4.0157755101730645e-07, + "loss": 13.6396, + "step": 442200 + }, + { + "epoch": 0.8932921779110121, + "grad_norm": 433.1505432128906, + "learning_rate": 4.014404986651288e-07, + "loss": 18.2587, + "step": 442210 + }, + { + "epoch": 0.8933123785436959, + "grad_norm": 505.6806945800781, + "learning_rate": 4.013034687257727e-07, + "loss": 21.5438, + "step": 442220 + }, + { + "epoch": 0.8933325791763798, + "grad_norm": 330.5233154296875, + "learning_rate": 4.011664611999072e-07, + "loss": 30.2277, + "step": 442230 + }, + { + "epoch": 0.8933527798090636, + "grad_norm": 633.8748779296875, + "learning_rate": 4.010294760881972e-07, + "loss": 25.41, + "step": 442240 + }, + { + "epoch": 0.8933729804417474, + "grad_norm": 301.0047607421875, + "learning_rate": 4.0089251339131164e-07, + "loss": 21.5253, + "step": 442250 + }, + { + "epoch": 0.8933931810744312, + "grad_norm": 2.294178009033203, + "learning_rate": 4.0075557310991886e-07, + "loss": 21.8428, + "step": 442260 + }, + { + "epoch": 0.893413381707115, + "grad_norm": 519.116455078125, + "learning_rate": 4.006186552446861e-07, + "loss": 18.2456, + "step": 442270 + }, + { + "epoch": 0.8934335823397989, + "grad_norm": 256.85467529296875, + "learning_rate": 4.00481759796279e-07, + "loss": 19.9572, + "step": 442280 + }, + { + "epoch": 0.8934537829724827, + "grad_norm": 501.7436828613281, + "learning_rate": 4.003448867653664e-07, + "loss": 18.7082, + "step": 442290 + }, + { + "epoch": 0.8934739836051665, + "grad_norm": 430.889404296875, + "learning_rate": 4.002080361526156e-07, + "loss": 16.348, + "step": 442300 + }, + { + "epoch": 0.8934941842378503, + "grad_norm": 591.6133422851562, + "learning_rate": 4.000712079586916e-07, + "loss": 16.6457, + "step": 442310 + }, + { + "epoch": 0.8935143848705341, + "grad_norm": 604.29736328125, + "learning_rate": 3.999344021842627e-07, + "loss": 23.2564, + "step": 442320 + }, + { + "epoch": 0.893534585503218, + "grad_norm": 216.2506103515625, + "learning_rate": 3.997976188299968e-07, + "loss": 16.2214, + "step": 442330 + }, + { + "epoch": 0.8935547861359018, + "grad_norm": 949.240966796875, + "learning_rate": 3.996608578965594e-07, + "loss": 22.458, + "step": 442340 + }, + { + "epoch": 0.8935749867685856, + "grad_norm": 247.2509307861328, + "learning_rate": 3.9952411938461557e-07, + "loss": 18.5836, + "step": 442350 + }, + { + "epoch": 0.8935951874012694, + "grad_norm": 260.24273681640625, + "learning_rate": 3.9938740329483473e-07, + "loss": 20.2889, + "step": 442360 + }, + { + "epoch": 0.8936153880339532, + "grad_norm": 475.4487609863281, + "learning_rate": 3.992507096278814e-07, + "loss": 23.5179, + "step": 442370 + }, + { + "epoch": 0.8936355886666371, + "grad_norm": 317.2352600097656, + "learning_rate": 3.991140383844211e-07, + "loss": 9.8048, + "step": 442380 + }, + { + "epoch": 0.8936557892993209, + "grad_norm": 841.1236572265625, + "learning_rate": 3.989773895651222e-07, + "loss": 18.5649, + "step": 442390 + }, + { + "epoch": 0.8936759899320047, + "grad_norm": 498.5187072753906, + "learning_rate": 3.9884076317064813e-07, + "loss": 20.668, + "step": 442400 + }, + { + "epoch": 0.8936961905646885, + "grad_norm": 311.369384765625, + "learning_rate": 3.9870415920166715e-07, + "loss": 16.4844, + "step": 442410 + }, + { + "epoch": 0.8937163911973723, + "grad_norm": 283.80987548828125, + "learning_rate": 3.9856757765884436e-07, + "loss": 10.47, + "step": 442420 + }, + { + "epoch": 0.8937365918300562, + "grad_norm": 37.27119445800781, + "learning_rate": 3.984310185428442e-07, + "loss": 33.6831, + "step": 442430 + }, + { + "epoch": 0.89375679246274, + "grad_norm": 382.70458984375, + "learning_rate": 3.9829448185433385e-07, + "loss": 25.1369, + "step": 442440 + }, + { + "epoch": 0.8937769930954238, + "grad_norm": 424.489501953125, + "learning_rate": 3.9815796759397783e-07, + "loss": 8.1771, + "step": 442450 + }, + { + "epoch": 0.8937971937281076, + "grad_norm": 0.0, + "learning_rate": 3.980214757624412e-07, + "loss": 20.0538, + "step": 442460 + }, + { + "epoch": 0.8938173943607913, + "grad_norm": 232.93911743164062, + "learning_rate": 3.978850063603895e-07, + "loss": 18.2064, + "step": 442470 + }, + { + "epoch": 0.8938375949934751, + "grad_norm": 588.0669555664062, + "learning_rate": 3.977485593884889e-07, + "loss": 27.8043, + "step": 442480 + }, + { + "epoch": 0.893857795626159, + "grad_norm": 289.8110656738281, + "learning_rate": 3.9761213484740435e-07, + "loss": 18.8844, + "step": 442490 + }, + { + "epoch": 0.8938779962588428, + "grad_norm": 455.50518798828125, + "learning_rate": 3.9747573273779816e-07, + "loss": 13.1274, + "step": 442500 + }, + { + "epoch": 0.8938981968915266, + "grad_norm": 263.20697021484375, + "learning_rate": 3.9733935306033756e-07, + "loss": 16.1889, + "step": 442510 + }, + { + "epoch": 0.8939183975242104, + "grad_norm": 463.04376220703125, + "learning_rate": 3.9720299581568865e-07, + "loss": 21.7175, + "step": 442520 + }, + { + "epoch": 0.8939385981568942, + "grad_norm": 302.4626770019531, + "learning_rate": 3.970666610045121e-07, + "loss": 19.4688, + "step": 442530 + }, + { + "epoch": 0.8939587987895781, + "grad_norm": 769.6148681640625, + "learning_rate": 3.969303486274745e-07, + "loss": 27.4903, + "step": 442540 + }, + { + "epoch": 0.8939789994222619, + "grad_norm": 316.3883361816406, + "learning_rate": 3.967940586852409e-07, + "loss": 12.6395, + "step": 442550 + }, + { + "epoch": 0.8939992000549457, + "grad_norm": 183.7150421142578, + "learning_rate": 3.966577911784747e-07, + "loss": 20.832, + "step": 442560 + }, + { + "epoch": 0.8940194006876295, + "grad_norm": 590.2752685546875, + "learning_rate": 3.965215461078392e-07, + "loss": 15.9476, + "step": 442570 + }, + { + "epoch": 0.8940396013203133, + "grad_norm": 534.8192138671875, + "learning_rate": 3.963853234740006e-07, + "loss": 19.1567, + "step": 442580 + }, + { + "epoch": 0.8940598019529972, + "grad_norm": 597.6304931640625, + "learning_rate": 3.962491232776211e-07, + "loss": 20.072, + "step": 442590 + }, + { + "epoch": 0.894080002585681, + "grad_norm": 381.5980529785156, + "learning_rate": 3.961129455193641e-07, + "loss": 32.0533, + "step": 442600 + }, + { + "epoch": 0.8941002032183648, + "grad_norm": 256.7167663574219, + "learning_rate": 3.959767901998957e-07, + "loss": 11.408, + "step": 442610 + }, + { + "epoch": 0.8941204038510486, + "grad_norm": 65.94268798828125, + "learning_rate": 3.958406573198764e-07, + "loss": 18.7583, + "step": 442620 + }, + { + "epoch": 0.8941406044837324, + "grad_norm": 288.7595520019531, + "learning_rate": 3.957045468799725e-07, + "loss": 22.4417, + "step": 442630 + }, + { + "epoch": 0.8941608051164163, + "grad_norm": 427.774169921875, + "learning_rate": 3.955684588808456e-07, + "loss": 29.2313, + "step": 442640 + }, + { + "epoch": 0.8941810057491001, + "grad_norm": 105.23033142089844, + "learning_rate": 3.954323933231602e-07, + "loss": 24.7779, + "step": 442650 + }, + { + "epoch": 0.8942012063817839, + "grad_norm": 648.6617431640625, + "learning_rate": 3.952963502075791e-07, + "loss": 23.5429, + "step": 442660 + }, + { + "epoch": 0.8942214070144677, + "grad_norm": 456.74700927734375, + "learning_rate": 3.951603295347639e-07, + "loss": 21.8411, + "step": 442670 + }, + { + "epoch": 0.8942416076471515, + "grad_norm": 0.0741969645023346, + "learning_rate": 3.9502433130537977e-07, + "loss": 10.3931, + "step": 442680 + }, + { + "epoch": 0.8942618082798354, + "grad_norm": 482.71612548828125, + "learning_rate": 3.9488835552008773e-07, + "loss": 20.5551, + "step": 442690 + }, + { + "epoch": 0.8942820089125192, + "grad_norm": 61.67448806762695, + "learning_rate": 3.947524021795518e-07, + "loss": 18.076, + "step": 442700 + }, + { + "epoch": 0.894302209545203, + "grad_norm": 291.5075988769531, + "learning_rate": 3.946164712844347e-07, + "loss": 10.6352, + "step": 442710 + }, + { + "epoch": 0.8943224101778867, + "grad_norm": 536.745361328125, + "learning_rate": 3.9448056283539704e-07, + "loss": 19.3627, + "step": 442720 + }, + { + "epoch": 0.8943426108105705, + "grad_norm": 359.7989196777344, + "learning_rate": 3.9434467683310327e-07, + "loss": 13.5057, + "step": 442730 + }, + { + "epoch": 0.8943628114432544, + "grad_norm": 391.85369873046875, + "learning_rate": 3.942088132782157e-07, + "loss": 16.9426, + "step": 442740 + }, + { + "epoch": 0.8943830120759382, + "grad_norm": 234.9928741455078, + "learning_rate": 3.9407297217139427e-07, + "loss": 12.7332, + "step": 442750 + }, + { + "epoch": 0.894403212708622, + "grad_norm": 301.238037109375, + "learning_rate": 3.9393715351330243e-07, + "loss": 11.6003, + "step": 442760 + }, + { + "epoch": 0.8944234133413058, + "grad_norm": 177.99615478515625, + "learning_rate": 3.9380135730460347e-07, + "loss": 9.4544, + "step": 442770 + }, + { + "epoch": 0.8944436139739896, + "grad_norm": 565.592041015625, + "learning_rate": 3.9366558354595797e-07, + "loss": 23.9906, + "step": 442780 + }, + { + "epoch": 0.8944638146066735, + "grad_norm": 177.5765838623047, + "learning_rate": 3.935298322380271e-07, + "loss": 10.1951, + "step": 442790 + }, + { + "epoch": 0.8944840152393573, + "grad_norm": 451.1468505859375, + "learning_rate": 3.9339410338147363e-07, + "loss": 17.273, + "step": 442800 + }, + { + "epoch": 0.8945042158720411, + "grad_norm": 234.44549560546875, + "learning_rate": 3.9325839697695877e-07, + "loss": 11.3077, + "step": 442810 + }, + { + "epoch": 0.8945244165047249, + "grad_norm": 601.57275390625, + "learning_rate": 3.931227130251425e-07, + "loss": 16.1628, + "step": 442820 + }, + { + "epoch": 0.8945446171374087, + "grad_norm": 50.89834976196289, + "learning_rate": 3.929870515266876e-07, + "loss": 21.8412, + "step": 442830 + }, + { + "epoch": 0.8945648177700926, + "grad_norm": 312.83880615234375, + "learning_rate": 3.928514124822569e-07, + "loss": 18.6505, + "step": 442840 + }, + { + "epoch": 0.8945850184027764, + "grad_norm": 22.50575065612793, + "learning_rate": 3.9271579589250817e-07, + "loss": 27.6935, + "step": 442850 + }, + { + "epoch": 0.8946052190354602, + "grad_norm": 371.71063232421875, + "learning_rate": 3.925802017581032e-07, + "loss": 14.2174, + "step": 442860 + }, + { + "epoch": 0.894625419668144, + "grad_norm": 322.22113037109375, + "learning_rate": 3.924446300797052e-07, + "loss": 11.8918, + "step": 442870 + }, + { + "epoch": 0.8946456203008278, + "grad_norm": 245.89785766601562, + "learning_rate": 3.923090808579727e-07, + "loss": 19.6123, + "step": 442880 + }, + { + "epoch": 0.8946658209335117, + "grad_norm": 521.1366577148438, + "learning_rate": 3.9217355409356614e-07, + "loss": 26.2525, + "step": 442890 + }, + { + "epoch": 0.8946860215661955, + "grad_norm": 397.6199645996094, + "learning_rate": 3.920380497871473e-07, + "loss": 15.6646, + "step": 442900 + }, + { + "epoch": 0.8947062221988793, + "grad_norm": 391.6208801269531, + "learning_rate": 3.9190256793937675e-07, + "loss": 20.3865, + "step": 442910 + }, + { + "epoch": 0.8947264228315631, + "grad_norm": 289.1727294921875, + "learning_rate": 3.9176710855091283e-07, + "loss": 9.2943, + "step": 442920 + }, + { + "epoch": 0.8947466234642469, + "grad_norm": 239.43304443359375, + "learning_rate": 3.916316716224172e-07, + "loss": 16.7488, + "step": 442930 + }, + { + "epoch": 0.8947668240969308, + "grad_norm": 349.6020202636719, + "learning_rate": 3.9149625715455107e-07, + "loss": 20.8104, + "step": 442940 + }, + { + "epoch": 0.8947870247296146, + "grad_norm": 96.1251449584961, + "learning_rate": 3.913608651479733e-07, + "loss": 26.9883, + "step": 442950 + }, + { + "epoch": 0.8948072253622984, + "grad_norm": 623.001953125, + "learning_rate": 3.912254956033423e-07, + "loss": 19.0216, + "step": 442960 + }, + { + "epoch": 0.8948274259949822, + "grad_norm": 338.7909851074219, + "learning_rate": 3.9109014852132035e-07, + "loss": 10.8443, + "step": 442970 + }, + { + "epoch": 0.8948476266276659, + "grad_norm": 490.4385070800781, + "learning_rate": 3.9095482390256624e-07, + "loss": 17.8714, + "step": 442980 + }, + { + "epoch": 0.8948678272603497, + "grad_norm": 648.5219116210938, + "learning_rate": 3.908195217477384e-07, + "loss": 19.2579, + "step": 442990 + }, + { + "epoch": 0.8948880278930336, + "grad_norm": 679.5826416015625, + "learning_rate": 3.90684242057498e-07, + "loss": 34.5945, + "step": 443000 + }, + { + "epoch": 0.8949082285257174, + "grad_norm": 291.563232421875, + "learning_rate": 3.9054898483250224e-07, + "loss": 20.5633, + "step": 443010 + }, + { + "epoch": 0.8949284291584012, + "grad_norm": 510.5856628417969, + "learning_rate": 3.904137500734129e-07, + "loss": 22.5774, + "step": 443020 + }, + { + "epoch": 0.894948629791085, + "grad_norm": 551.2908325195312, + "learning_rate": 3.902785377808882e-07, + "loss": 26.2614, + "step": 443030 + }, + { + "epoch": 0.8949688304237688, + "grad_norm": 251.3923797607422, + "learning_rate": 3.901433479555855e-07, + "loss": 18.5808, + "step": 443040 + }, + { + "epoch": 0.8949890310564527, + "grad_norm": 289.2093200683594, + "learning_rate": 3.9000818059816593e-07, + "loss": 24.0075, + "step": 443050 + }, + { + "epoch": 0.8950092316891365, + "grad_norm": 123.8757553100586, + "learning_rate": 3.898730357092878e-07, + "loss": 28.818, + "step": 443060 + }, + { + "epoch": 0.8950294323218203, + "grad_norm": 592.0748291015625, + "learning_rate": 3.8973791328960786e-07, + "loss": 18.4389, + "step": 443070 + }, + { + "epoch": 0.8950496329545041, + "grad_norm": 339.29248046875, + "learning_rate": 3.8960281333978667e-07, + "loss": 19.7948, + "step": 443080 + }, + { + "epoch": 0.895069833587188, + "grad_norm": 218.08489990234375, + "learning_rate": 3.894677358604826e-07, + "loss": 14.6797, + "step": 443090 + }, + { + "epoch": 0.8950900342198718, + "grad_norm": 459.9266662597656, + "learning_rate": 3.89332680852354e-07, + "loss": 17.3519, + "step": 443100 + }, + { + "epoch": 0.8951102348525556, + "grad_norm": 314.08953857421875, + "learning_rate": 3.8919764831605754e-07, + "loss": 9.2347, + "step": 443110 + }, + { + "epoch": 0.8951304354852394, + "grad_norm": 176.02169799804688, + "learning_rate": 3.890626382522539e-07, + "loss": 21.5893, + "step": 443120 + }, + { + "epoch": 0.8951506361179232, + "grad_norm": 822.2457275390625, + "learning_rate": 3.889276506615991e-07, + "loss": 14.4411, + "step": 443130 + }, + { + "epoch": 0.895170836750607, + "grad_norm": 414.7174987792969, + "learning_rate": 3.88792685544751e-07, + "loss": 23.4973, + "step": 443140 + }, + { + "epoch": 0.8951910373832909, + "grad_norm": 176.8641357421875, + "learning_rate": 3.88657742902368e-07, + "loss": 14.6913, + "step": 443150 + }, + { + "epoch": 0.8952112380159747, + "grad_norm": 465.1305847167969, + "learning_rate": 3.88522822735109e-07, + "loss": 25.9285, + "step": 443160 + }, + { + "epoch": 0.8952314386486585, + "grad_norm": 268.416748046875, + "learning_rate": 3.8838792504363066e-07, + "loss": 8.6215, + "step": 443170 + }, + { + "epoch": 0.8952516392813423, + "grad_norm": 1195.952392578125, + "learning_rate": 3.882530498285886e-07, + "loss": 16.573, + "step": 443180 + }, + { + "epoch": 0.8952718399140261, + "grad_norm": 293.72906494140625, + "learning_rate": 3.8811819709064336e-07, + "loss": 14.2375, + "step": 443190 + }, + { + "epoch": 0.89529204054671, + "grad_norm": 47.482784271240234, + "learning_rate": 3.879833668304506e-07, + "loss": 29.3211, + "step": 443200 + }, + { + "epoch": 0.8953122411793938, + "grad_norm": 433.0750427246094, + "learning_rate": 3.8784855904866637e-07, + "loss": 26.0019, + "step": 443210 + }, + { + "epoch": 0.8953324418120776, + "grad_norm": 113.45503234863281, + "learning_rate": 3.877137737459502e-07, + "loss": 11.3939, + "step": 443220 + }, + { + "epoch": 0.8953526424447613, + "grad_norm": 362.42205810546875, + "learning_rate": 3.875790109229566e-07, + "loss": 25.6576, + "step": 443230 + }, + { + "epoch": 0.8953728430774451, + "grad_norm": 107.12870788574219, + "learning_rate": 3.8744427058034384e-07, + "loss": 13.7933, + "step": 443240 + }, + { + "epoch": 0.895393043710129, + "grad_norm": 554.9472045898438, + "learning_rate": 3.8730955271876813e-07, + "loss": 10.5045, + "step": 443250 + }, + { + "epoch": 0.8954132443428128, + "grad_norm": 5.183191299438477, + "learning_rate": 3.871748573388867e-07, + "loss": 19.209, + "step": 443260 + }, + { + "epoch": 0.8954334449754966, + "grad_norm": 622.285400390625, + "learning_rate": 3.870401844413557e-07, + "loss": 20.5912, + "step": 443270 + }, + { + "epoch": 0.8954536456081804, + "grad_norm": 608.5679931640625, + "learning_rate": 3.8690553402683015e-07, + "loss": 21.6407, + "step": 443280 + }, + { + "epoch": 0.8954738462408642, + "grad_norm": 224.35610961914062, + "learning_rate": 3.86770906095969e-07, + "loss": 21.7842, + "step": 443290 + }, + { + "epoch": 0.895494046873548, + "grad_norm": 360.945556640625, + "learning_rate": 3.866363006494256e-07, + "loss": 17.557, + "step": 443300 + }, + { + "epoch": 0.8955142475062319, + "grad_norm": 458.71636962890625, + "learning_rate": 3.8650171768785826e-07, + "loss": 30.2733, + "step": 443310 + }, + { + "epoch": 0.8955344481389157, + "grad_norm": 509.4033508300781, + "learning_rate": 3.863671572119221e-07, + "loss": 13.7747, + "step": 443320 + }, + { + "epoch": 0.8955546487715995, + "grad_norm": 390.75848388671875, + "learning_rate": 3.8623261922227204e-07, + "loss": 26.8211, + "step": 443330 + }, + { + "epoch": 0.8955748494042833, + "grad_norm": 336.63623046875, + "learning_rate": 3.8609810371956544e-07, + "loss": 9.9372, + "step": 443340 + }, + { + "epoch": 0.8955950500369672, + "grad_norm": 26.759260177612305, + "learning_rate": 3.859636107044573e-07, + "loss": 24.4349, + "step": 443350 + }, + { + "epoch": 0.895615250669651, + "grad_norm": 317.5001220703125, + "learning_rate": 3.8582914017760154e-07, + "loss": 21.1117, + "step": 443360 + }, + { + "epoch": 0.8956354513023348, + "grad_norm": 460.7093811035156, + "learning_rate": 3.856946921396554e-07, + "loss": 15.5941, + "step": 443370 + }, + { + "epoch": 0.8956556519350186, + "grad_norm": 315.0159606933594, + "learning_rate": 3.8556026659127445e-07, + "loss": 7.0699, + "step": 443380 + }, + { + "epoch": 0.8956758525677024, + "grad_norm": 358.8883972167969, + "learning_rate": 3.8542586353311264e-07, + "loss": 14.2727, + "step": 443390 + }, + { + "epoch": 0.8956960532003863, + "grad_norm": 316.111572265625, + "learning_rate": 3.85291482965825e-07, + "loss": 15.5937, + "step": 443400 + }, + { + "epoch": 0.8957162538330701, + "grad_norm": 223.5146942138672, + "learning_rate": 3.851571248900676e-07, + "loss": 15.3269, + "step": 443410 + }, + { + "epoch": 0.8957364544657539, + "grad_norm": 420.58526611328125, + "learning_rate": 3.8502278930649506e-07, + "loss": 18.9996, + "step": 443420 + }, + { + "epoch": 0.8957566550984377, + "grad_norm": 426.0105285644531, + "learning_rate": 3.8488847621576066e-07, + "loss": 8.0604, + "step": 443430 + }, + { + "epoch": 0.8957768557311215, + "grad_norm": 114.0346450805664, + "learning_rate": 3.8475418561851996e-07, + "loss": 43.2814, + "step": 443440 + }, + { + "epoch": 0.8957970563638054, + "grad_norm": 466.63580322265625, + "learning_rate": 3.846199175154297e-07, + "loss": 20.6006, + "step": 443450 + }, + { + "epoch": 0.8958172569964892, + "grad_norm": 123.13581848144531, + "learning_rate": 3.8448567190713993e-07, + "loss": 22.305, + "step": 443460 + }, + { + "epoch": 0.895837457629173, + "grad_norm": 208.05831909179688, + "learning_rate": 3.843514487943079e-07, + "loss": 17.5501, + "step": 443470 + }, + { + "epoch": 0.8958576582618568, + "grad_norm": 190.33071899414062, + "learning_rate": 3.8421724817758745e-07, + "loss": 21.5826, + "step": 443480 + }, + { + "epoch": 0.8958778588945405, + "grad_norm": 356.87127685546875, + "learning_rate": 3.84083070057632e-07, + "loss": 23.349, + "step": 443490 + }, + { + "epoch": 0.8958980595272243, + "grad_norm": 371.1936340332031, + "learning_rate": 3.8394891443509554e-07, + "loss": 7.139, + "step": 443500 + }, + { + "epoch": 0.8959182601599082, + "grad_norm": 329.1784973144531, + "learning_rate": 3.83814781310633e-07, + "loss": 17.2144, + "step": 443510 + }, + { + "epoch": 0.895938460792592, + "grad_norm": 22.805368423461914, + "learning_rate": 3.8368067068489724e-07, + "loss": 18.3239, + "step": 443520 + }, + { + "epoch": 0.8959586614252758, + "grad_norm": 343.87689208984375, + "learning_rate": 3.8354658255854105e-07, + "loss": 16.9245, + "step": 443530 + }, + { + "epoch": 0.8959788620579596, + "grad_norm": 511.1064147949219, + "learning_rate": 3.8341251693221893e-07, + "loss": 19.1064, + "step": 443540 + }, + { + "epoch": 0.8959990626906434, + "grad_norm": 370.7943420410156, + "learning_rate": 3.832784738065853e-07, + "loss": 18.6469, + "step": 443550 + }, + { + "epoch": 0.8960192633233273, + "grad_norm": 338.9887390136719, + "learning_rate": 3.83144453182292e-07, + "loss": 9.248, + "step": 443560 + }, + { + "epoch": 0.8960394639560111, + "grad_norm": 596.9765625, + "learning_rate": 3.830104550599922e-07, + "loss": 15.9077, + "step": 443570 + }, + { + "epoch": 0.8960596645886949, + "grad_norm": 96.01427459716797, + "learning_rate": 3.8287647944034054e-07, + "loss": 12.1708, + "step": 443580 + }, + { + "epoch": 0.8960798652213787, + "grad_norm": 276.3226013183594, + "learning_rate": 3.827425263239887e-07, + "loss": 15.0657, + "step": 443590 + }, + { + "epoch": 0.8961000658540625, + "grad_norm": 450.3712158203125, + "learning_rate": 3.8260859571158883e-07, + "loss": 22.2363, + "step": 443600 + }, + { + "epoch": 0.8961202664867464, + "grad_norm": 348.3032531738281, + "learning_rate": 3.824746876037955e-07, + "loss": 14.9362, + "step": 443610 + }, + { + "epoch": 0.8961404671194302, + "grad_norm": 542.25146484375, + "learning_rate": 3.8234080200125977e-07, + "loss": 17.2952, + "step": 443620 + }, + { + "epoch": 0.896160667752114, + "grad_norm": 302.9388732910156, + "learning_rate": 3.822069389046357e-07, + "loss": 21.2011, + "step": 443630 + }, + { + "epoch": 0.8961808683847978, + "grad_norm": 261.0054016113281, + "learning_rate": 3.8207309831457485e-07, + "loss": 11.77, + "step": 443640 + }, + { + "epoch": 0.8962010690174816, + "grad_norm": 107.35267639160156, + "learning_rate": 3.8193928023172897e-07, + "loss": 29.1692, + "step": 443650 + }, + { + "epoch": 0.8962212696501655, + "grad_norm": 248.1295166015625, + "learning_rate": 3.818054846567515e-07, + "loss": 16.7975, + "step": 443660 + }, + { + "epoch": 0.8962414702828493, + "grad_norm": 457.2428894042969, + "learning_rate": 3.8167171159029405e-07, + "loss": 15.9782, + "step": 443670 + }, + { + "epoch": 0.8962616709155331, + "grad_norm": 429.57025146484375, + "learning_rate": 3.815379610330078e-07, + "loss": 14.5478, + "step": 443680 + }, + { + "epoch": 0.8962818715482169, + "grad_norm": 473.4178466796875, + "learning_rate": 3.814042329855455e-07, + "loss": 17.089, + "step": 443690 + }, + { + "epoch": 0.8963020721809007, + "grad_norm": 374.85650634765625, + "learning_rate": 3.812705274485595e-07, + "loss": 22.4911, + "step": 443700 + }, + { + "epoch": 0.8963222728135846, + "grad_norm": 525.5431518554688, + "learning_rate": 3.811368444227009e-07, + "loss": 21.8245, + "step": 443710 + }, + { + "epoch": 0.8963424734462684, + "grad_norm": 626.8782348632812, + "learning_rate": 3.8100318390862033e-07, + "loss": 11.766, + "step": 443720 + }, + { + "epoch": 0.8963626740789522, + "grad_norm": 276.6632080078125, + "learning_rate": 3.8086954590697057e-07, + "loss": 21.9943, + "step": 443730 + }, + { + "epoch": 0.8963828747116359, + "grad_norm": 272.9100646972656, + "learning_rate": 3.8073593041840274e-07, + "loss": 40.214, + "step": 443740 + }, + { + "epoch": 0.8964030753443197, + "grad_norm": 561.2411499023438, + "learning_rate": 3.8060233744356634e-07, + "loss": 18.5295, + "step": 443750 + }, + { + "epoch": 0.8964232759770036, + "grad_norm": 11.287782669067383, + "learning_rate": 3.804687669831142e-07, + "loss": 11.435, + "step": 443760 + }, + { + "epoch": 0.8964434766096874, + "grad_norm": 321.518798828125, + "learning_rate": 3.80335219037698e-07, + "loss": 15.9047, + "step": 443770 + }, + { + "epoch": 0.8964636772423712, + "grad_norm": 562.2626953125, + "learning_rate": 3.802016936079678e-07, + "loss": 21.6205, + "step": 443780 + }, + { + "epoch": 0.896483877875055, + "grad_norm": 276.7866516113281, + "learning_rate": 3.8006819069457304e-07, + "loss": 20.1359, + "step": 443790 + }, + { + "epoch": 0.8965040785077388, + "grad_norm": 219.55706787109375, + "learning_rate": 3.7993471029816653e-07, + "loss": 10.2999, + "step": 443800 + }, + { + "epoch": 0.8965242791404227, + "grad_norm": 369.7362060546875, + "learning_rate": 3.798012524193978e-07, + "loss": 31.9167, + "step": 443810 + }, + { + "epoch": 0.8965444797731065, + "grad_norm": 241.03692626953125, + "learning_rate": 3.7966781705891684e-07, + "loss": 24.1727, + "step": 443820 + }, + { + "epoch": 0.8965646804057903, + "grad_norm": 726.6835327148438, + "learning_rate": 3.7953440421737433e-07, + "loss": 16.6032, + "step": 443830 + }, + { + "epoch": 0.8965848810384741, + "grad_norm": 285.6444091796875, + "learning_rate": 3.794010138954213e-07, + "loss": 33.959, + "step": 443840 + }, + { + "epoch": 0.8966050816711579, + "grad_norm": 587.40283203125, + "learning_rate": 3.792676460937078e-07, + "loss": 23.2916, + "step": 443850 + }, + { + "epoch": 0.8966252823038418, + "grad_norm": 6.033504486083984, + "learning_rate": 3.791343008128823e-07, + "loss": 11.7179, + "step": 443860 + }, + { + "epoch": 0.8966454829365256, + "grad_norm": 12.601014137268066, + "learning_rate": 3.790009780535969e-07, + "loss": 19.0735, + "step": 443870 + }, + { + "epoch": 0.8966656835692094, + "grad_norm": 82.12911987304688, + "learning_rate": 3.7886767781650016e-07, + "loss": 22.1259, + "step": 443880 + }, + { + "epoch": 0.8966858842018932, + "grad_norm": 154.22366333007812, + "learning_rate": 3.787344001022408e-07, + "loss": 12.7269, + "step": 443890 + }, + { + "epoch": 0.896706084834577, + "grad_norm": 318.2230224609375, + "learning_rate": 3.7860114491147017e-07, + "loss": 15.6227, + "step": 443900 + }, + { + "epoch": 0.8967262854672609, + "grad_norm": 235.8320770263672, + "learning_rate": 3.784679122448365e-07, + "loss": 29.1175, + "step": 443910 + }, + { + "epoch": 0.8967464860999447, + "grad_norm": 15.09432601928711, + "learning_rate": 3.783347021029904e-07, + "loss": 10.9162, + "step": 443920 + }, + { + "epoch": 0.8967666867326285, + "grad_norm": 90.33223724365234, + "learning_rate": 3.782015144865808e-07, + "loss": 16.4666, + "step": 443930 + }, + { + "epoch": 0.8967868873653123, + "grad_norm": 423.8289489746094, + "learning_rate": 3.780683493962556e-07, + "loss": 13.6356, + "step": 443940 + }, + { + "epoch": 0.8968070879979961, + "grad_norm": 230.84378051757812, + "learning_rate": 3.779352068326653e-07, + "loss": 11.9636, + "step": 443950 + }, + { + "epoch": 0.89682728863068, + "grad_norm": 1077.435546875, + "learning_rate": 3.7780208679645826e-07, + "loss": 25.3632, + "step": 443960 + }, + { + "epoch": 0.8968474892633638, + "grad_norm": 17.35983657836914, + "learning_rate": 3.776689892882823e-07, + "loss": 20.2653, + "step": 443970 + }, + { + "epoch": 0.8968676898960476, + "grad_norm": 436.32354736328125, + "learning_rate": 3.77535914308787e-07, + "loss": 17.2695, + "step": 443980 + }, + { + "epoch": 0.8968878905287314, + "grad_norm": 282.3138122558594, + "learning_rate": 3.774028618586217e-07, + "loss": 25.9139, + "step": 443990 + }, + { + "epoch": 0.8969080911614151, + "grad_norm": 650.9126586914062, + "learning_rate": 3.772698319384349e-07, + "loss": 13.6121, + "step": 444000 + }, + { + "epoch": 0.8969282917940989, + "grad_norm": 65.5379409790039, + "learning_rate": 3.7713682454887266e-07, + "loss": 11.2433, + "step": 444010 + }, + { + "epoch": 0.8969484924267828, + "grad_norm": 320.1227722167969, + "learning_rate": 3.770038396905862e-07, + "loss": 14.2495, + "step": 444020 + }, + { + "epoch": 0.8969686930594666, + "grad_norm": 357.67852783203125, + "learning_rate": 3.768708773642221e-07, + "loss": 16.8157, + "step": 444030 + }, + { + "epoch": 0.8969888936921504, + "grad_norm": 276.79132080078125, + "learning_rate": 3.767379375704278e-07, + "loss": 16.1594, + "step": 444040 + }, + { + "epoch": 0.8970090943248342, + "grad_norm": 167.3631134033203, + "learning_rate": 3.7660502030985203e-07, + "loss": 14.0351, + "step": 444050 + }, + { + "epoch": 0.897029294957518, + "grad_norm": 344.3874206542969, + "learning_rate": 3.7647212558314493e-07, + "loss": 10.6284, + "step": 444060 + }, + { + "epoch": 0.8970494955902019, + "grad_norm": 355.57635498046875, + "learning_rate": 3.7633925339094936e-07, + "loss": 4.788, + "step": 444070 + }, + { + "epoch": 0.8970696962228857, + "grad_norm": 234.05596923828125, + "learning_rate": 3.762064037339158e-07, + "loss": 26.8288, + "step": 444080 + }, + { + "epoch": 0.8970898968555695, + "grad_norm": 573.5048217773438, + "learning_rate": 3.760735766126927e-07, + "loss": 18.9453, + "step": 444090 + }, + { + "epoch": 0.8971100974882533, + "grad_norm": 306.1864318847656, + "learning_rate": 3.759407720279257e-07, + "loss": 17.8384, + "step": 444100 + }, + { + "epoch": 0.8971302981209371, + "grad_norm": 763.8611450195312, + "learning_rate": 3.758079899802619e-07, + "loss": 17.9444, + "step": 444110 + }, + { + "epoch": 0.897150498753621, + "grad_norm": 241.2154998779297, + "learning_rate": 3.756752304703498e-07, + "loss": 20.6118, + "step": 444120 + }, + { + "epoch": 0.8971706993863048, + "grad_norm": 474.2724609375, + "learning_rate": 3.755424934988355e-07, + "loss": 9.7833, + "step": 444130 + }, + { + "epoch": 0.8971909000189886, + "grad_norm": 71.91261291503906, + "learning_rate": 3.7540977906636576e-07, + "loss": 13.4105, + "step": 444140 + }, + { + "epoch": 0.8972111006516724, + "grad_norm": 111.76239776611328, + "learning_rate": 3.752770871735878e-07, + "loss": 10.913, + "step": 444150 + }, + { + "epoch": 0.8972313012843562, + "grad_norm": 187.22528076171875, + "learning_rate": 3.751444178211494e-07, + "loss": 7.838, + "step": 444160 + }, + { + "epoch": 0.8972515019170401, + "grad_norm": 277.2666320800781, + "learning_rate": 3.7501177100969566e-07, + "loss": 8.9107, + "step": 444170 + }, + { + "epoch": 0.8972717025497239, + "grad_norm": 444.3712463378906, + "learning_rate": 3.748791467398732e-07, + "loss": 12.3827, + "step": 444180 + }, + { + "epoch": 0.8972919031824077, + "grad_norm": 361.9163818359375, + "learning_rate": 3.747465450123294e-07, + "loss": 19.0128, + "step": 444190 + }, + { + "epoch": 0.8973121038150915, + "grad_norm": 334.4684143066406, + "learning_rate": 3.7461396582771035e-07, + "loss": 20.5581, + "step": 444200 + }, + { + "epoch": 0.8973323044477753, + "grad_norm": 313.4148864746094, + "learning_rate": 3.744814091866605e-07, + "loss": 23.7055, + "step": 444210 + }, + { + "epoch": 0.8973525050804592, + "grad_norm": 403.1631164550781, + "learning_rate": 3.7434887508982886e-07, + "loss": 16.993, + "step": 444220 + }, + { + "epoch": 0.897372705713143, + "grad_norm": 43.625972747802734, + "learning_rate": 3.7421636353785815e-07, + "loss": 17.8234, + "step": 444230 + }, + { + "epoch": 0.8973929063458268, + "grad_norm": 342.0977478027344, + "learning_rate": 3.740838745313974e-07, + "loss": 12.4247, + "step": 444240 + }, + { + "epoch": 0.8974131069785106, + "grad_norm": 238.92608642578125, + "learning_rate": 3.739514080710899e-07, + "loss": 10.3439, + "step": 444250 + }, + { + "epoch": 0.8974333076111943, + "grad_norm": 943.6799926757812, + "learning_rate": 3.738189641575818e-07, + "loss": 27.6635, + "step": 444260 + }, + { + "epoch": 0.8974535082438782, + "grad_norm": 413.5370788574219, + "learning_rate": 3.7368654279151985e-07, + "loss": 41.3092, + "step": 444270 + }, + { + "epoch": 0.897473708876562, + "grad_norm": 118.44355773925781, + "learning_rate": 3.7355414397354796e-07, + "loss": 10.9965, + "step": 444280 + }, + { + "epoch": 0.8974939095092458, + "grad_norm": 392.2132263183594, + "learning_rate": 3.7342176770431284e-07, + "loss": 15.963, + "step": 444290 + }, + { + "epoch": 0.8975141101419296, + "grad_norm": 335.9568176269531, + "learning_rate": 3.732894139844578e-07, + "loss": 40.2798, + "step": 444300 + }, + { + "epoch": 0.8975343107746134, + "grad_norm": 109.2363510131836, + "learning_rate": 3.731570828146297e-07, + "loss": 17.8185, + "step": 444310 + }, + { + "epoch": 0.8975545114072973, + "grad_norm": 73.78199768066406, + "learning_rate": 3.730247741954729e-07, + "loss": 16.4353, + "step": 444320 + }, + { + "epoch": 0.8975747120399811, + "grad_norm": 389.6304626464844, + "learning_rate": 3.7289248812763137e-07, + "loss": 10.4385, + "step": 444330 + }, + { + "epoch": 0.8975949126726649, + "grad_norm": 545.3394165039062, + "learning_rate": 3.727602246117518e-07, + "loss": 23.9497, + "step": 444340 + }, + { + "epoch": 0.8976151133053487, + "grad_norm": 512.56640625, + "learning_rate": 3.7262798364847753e-07, + "loss": 23.9551, + "step": 444350 + }, + { + "epoch": 0.8976353139380325, + "grad_norm": 228.80422973632812, + "learning_rate": 3.72495765238452e-07, + "loss": 15.2482, + "step": 444360 + }, + { + "epoch": 0.8976555145707164, + "grad_norm": 306.053955078125, + "learning_rate": 3.723635693823213e-07, + "loss": 21.9044, + "step": 444370 + }, + { + "epoch": 0.8976757152034002, + "grad_norm": 275.0199279785156, + "learning_rate": 3.7223139608073e-07, + "loss": 18.1763, + "step": 444380 + }, + { + "epoch": 0.897695915836084, + "grad_norm": 606.3357543945312, + "learning_rate": 3.720992453343214e-07, + "loss": 22.7662, + "step": 444390 + }, + { + "epoch": 0.8977161164687678, + "grad_norm": 194.8509979248047, + "learning_rate": 3.7196711714373947e-07, + "loss": 12.8268, + "step": 444400 + }, + { + "epoch": 0.8977363171014516, + "grad_norm": 116.74224853515625, + "learning_rate": 3.7183501150962863e-07, + "loss": 15.3617, + "step": 444410 + }, + { + "epoch": 0.8977565177341355, + "grad_norm": 469.42413330078125, + "learning_rate": 3.7170292843263347e-07, + "loss": 21.9017, + "step": 444420 + }, + { + "epoch": 0.8977767183668193, + "grad_norm": 342.9283142089844, + "learning_rate": 3.715708679133956e-07, + "loss": 10.503, + "step": 444430 + }, + { + "epoch": 0.8977969189995031, + "grad_norm": 269.0628662109375, + "learning_rate": 3.714388299525595e-07, + "loss": 12.1823, + "step": 444440 + }, + { + "epoch": 0.8978171196321869, + "grad_norm": 332.29168701171875, + "learning_rate": 3.713068145507709e-07, + "loss": 12.9783, + "step": 444450 + }, + { + "epoch": 0.8978373202648707, + "grad_norm": 60.27352523803711, + "learning_rate": 3.7117482170867083e-07, + "loss": 16.7351, + "step": 444460 + }, + { + "epoch": 0.8978575208975546, + "grad_norm": 413.5735168457031, + "learning_rate": 3.710428514269027e-07, + "loss": 11.7485, + "step": 444470 + }, + { + "epoch": 0.8978777215302384, + "grad_norm": 770.4952392578125, + "learning_rate": 3.7091090370611093e-07, + "loss": 17.4015, + "step": 444480 + }, + { + "epoch": 0.8978979221629222, + "grad_norm": 228.45712280273438, + "learning_rate": 3.707789785469379e-07, + "loss": 16.2267, + "step": 444490 + }, + { + "epoch": 0.897918122795606, + "grad_norm": 236.81741333007812, + "learning_rate": 3.7064707595002636e-07, + "loss": 27.0402, + "step": 444500 + }, + { + "epoch": 0.8979383234282897, + "grad_norm": 83.5274887084961, + "learning_rate": 3.705151959160197e-07, + "loss": 18.8256, + "step": 444510 + }, + { + "epoch": 0.8979585240609735, + "grad_norm": 512.5786743164062, + "learning_rate": 3.703833384455602e-07, + "loss": 20.8874, + "step": 444520 + }, + { + "epoch": 0.8979787246936574, + "grad_norm": 270.5484619140625, + "learning_rate": 3.702515035392912e-07, + "loss": 28.6608, + "step": 444530 + }, + { + "epoch": 0.8979989253263412, + "grad_norm": 0.0, + "learning_rate": 3.7011969119785496e-07, + "loss": 4.3968, + "step": 444540 + }, + { + "epoch": 0.898019125959025, + "grad_norm": 415.77490234375, + "learning_rate": 3.6998790142189324e-07, + "loss": 12.6397, + "step": 444550 + }, + { + "epoch": 0.8980393265917088, + "grad_norm": 994.0740966796875, + "learning_rate": 3.698561342120499e-07, + "loss": 16.8994, + "step": 444560 + }, + { + "epoch": 0.8980595272243926, + "grad_norm": 106.81134033203125, + "learning_rate": 3.6972438956896563e-07, + "loss": 10.136, + "step": 444570 + }, + { + "epoch": 0.8980797278570765, + "grad_norm": 483.38348388671875, + "learning_rate": 3.695926674932826e-07, + "loss": 33.2317, + "step": 444580 + }, + { + "epoch": 0.8980999284897603, + "grad_norm": 474.3923034667969, + "learning_rate": 3.694609679856431e-07, + "loss": 28.5039, + "step": 444590 + }, + { + "epoch": 0.8981201291224441, + "grad_norm": 798.9323120117188, + "learning_rate": 3.693292910466906e-07, + "loss": 21.2564, + "step": 444600 + }, + { + "epoch": 0.8981403297551279, + "grad_norm": 342.9609680175781, + "learning_rate": 3.69197636677065e-07, + "loss": 12.9522, + "step": 444610 + }, + { + "epoch": 0.8981605303878117, + "grad_norm": 172.19468688964844, + "learning_rate": 3.690660048774075e-07, + "loss": 12.2965, + "step": 444620 + }, + { + "epoch": 0.8981807310204956, + "grad_norm": 224.4616241455078, + "learning_rate": 3.6893439564836155e-07, + "loss": 8.3202, + "step": 444630 + }, + { + "epoch": 0.8982009316531794, + "grad_norm": 38.49509048461914, + "learning_rate": 3.688028089905682e-07, + "loss": 17.5146, + "step": 444640 + }, + { + "epoch": 0.8982211322858632, + "grad_norm": 54.56721115112305, + "learning_rate": 3.6867124490466697e-07, + "loss": 15.7033, + "step": 444650 + }, + { + "epoch": 0.898241332918547, + "grad_norm": 315.0137634277344, + "learning_rate": 3.685397033913002e-07, + "loss": 14.4688, + "step": 444660 + }, + { + "epoch": 0.8982615335512308, + "grad_norm": 285.6546936035156, + "learning_rate": 3.6840818445111114e-07, + "loss": 17.9773, + "step": 444670 + }, + { + "epoch": 0.8982817341839147, + "grad_norm": 588.5462036132812, + "learning_rate": 3.6827668808473714e-07, + "loss": 10.5859, + "step": 444680 + }, + { + "epoch": 0.8983019348165985, + "grad_norm": 444.90185546875, + "learning_rate": 3.68145214292821e-07, + "loss": 15.1751, + "step": 444690 + }, + { + "epoch": 0.8983221354492823, + "grad_norm": 1217.4012451171875, + "learning_rate": 3.680137630760039e-07, + "loss": 23.9297, + "step": 444700 + }, + { + "epoch": 0.8983423360819661, + "grad_norm": 432.7891845703125, + "learning_rate": 3.6788233443492583e-07, + "loss": 22.0646, + "step": 444710 + }, + { + "epoch": 0.8983625367146499, + "grad_norm": 320.3438720703125, + "learning_rate": 3.6775092837022685e-07, + "loss": 19.194, + "step": 444720 + }, + { + "epoch": 0.8983827373473338, + "grad_norm": 209.52174377441406, + "learning_rate": 3.676195448825487e-07, + "loss": 14.6774, + "step": 444730 + }, + { + "epoch": 0.8984029379800176, + "grad_norm": 575.9804077148438, + "learning_rate": 3.674881839725314e-07, + "loss": 28.4419, + "step": 444740 + }, + { + "epoch": 0.8984231386127014, + "grad_norm": 313.8929138183594, + "learning_rate": 3.6735684564081385e-07, + "loss": 20.3377, + "step": 444750 + }, + { + "epoch": 0.8984433392453852, + "grad_norm": 266.3089904785156, + "learning_rate": 3.672255298880367e-07, + "loss": 33.5205, + "step": 444760 + }, + { + "epoch": 0.8984635398780689, + "grad_norm": 223.70948791503906, + "learning_rate": 3.670942367148417e-07, + "loss": 21.5937, + "step": 444770 + }, + { + "epoch": 0.8984837405107527, + "grad_norm": 568.1279907226562, + "learning_rate": 3.669629661218671e-07, + "loss": 25.2434, + "step": 444780 + }, + { + "epoch": 0.8985039411434366, + "grad_norm": 15.84461498260498, + "learning_rate": 3.66831718109753e-07, + "loss": 14.5056, + "step": 444790 + }, + { + "epoch": 0.8985241417761204, + "grad_norm": 378.42156982421875, + "learning_rate": 3.6670049267913954e-07, + "loss": 15.9576, + "step": 444800 + }, + { + "epoch": 0.8985443424088042, + "grad_norm": 416.7359313964844, + "learning_rate": 3.665692898306655e-07, + "loss": 18.7411, + "step": 444810 + }, + { + "epoch": 0.898564543041488, + "grad_norm": 205.28900146484375, + "learning_rate": 3.664381095649705e-07, + "loss": 12.982, + "step": 444820 + }, + { + "epoch": 0.8985847436741718, + "grad_norm": 116.66765594482422, + "learning_rate": 3.6630695188269505e-07, + "loss": 10.0713, + "step": 444830 + }, + { + "epoch": 0.8986049443068557, + "grad_norm": 293.1413269042969, + "learning_rate": 3.6617581678447647e-07, + "loss": 14.8839, + "step": 444840 + }, + { + "epoch": 0.8986251449395395, + "grad_norm": 92.39081573486328, + "learning_rate": 3.6604470427095587e-07, + "loss": 13.498, + "step": 444850 + }, + { + "epoch": 0.8986453455722233, + "grad_norm": 314.4815979003906, + "learning_rate": 3.6591361434277105e-07, + "loss": 21.9132, + "step": 444860 + }, + { + "epoch": 0.8986655462049071, + "grad_norm": 375.6960144042969, + "learning_rate": 3.6578254700056107e-07, + "loss": 8.0686, + "step": 444870 + }, + { + "epoch": 0.898685746837591, + "grad_norm": 254.2244873046875, + "learning_rate": 3.6565150224496525e-07, + "loss": 8.0898, + "step": 444880 + }, + { + "epoch": 0.8987059474702748, + "grad_norm": 192.85308837890625, + "learning_rate": 3.65520480076621e-07, + "loss": 12.1381, + "step": 444890 + }, + { + "epoch": 0.8987261481029586, + "grad_norm": 262.3988952636719, + "learning_rate": 3.6538948049616886e-07, + "loss": 11.3097, + "step": 444900 + }, + { + "epoch": 0.8987463487356424, + "grad_norm": 374.4770812988281, + "learning_rate": 3.6525850350424554e-07, + "loss": 8.4125, + "step": 444910 + }, + { + "epoch": 0.8987665493683262, + "grad_norm": 919.95458984375, + "learning_rate": 3.651275491014905e-07, + "loss": 28.0506, + "step": 444920 + }, + { + "epoch": 0.89878675000101, + "grad_norm": 256.9989013671875, + "learning_rate": 3.649966172885422e-07, + "loss": 18.5785, + "step": 444930 + }, + { + "epoch": 0.8988069506336939, + "grad_norm": 302.6526184082031, + "learning_rate": 3.648657080660373e-07, + "loss": 14.164, + "step": 444940 + }, + { + "epoch": 0.8988271512663777, + "grad_norm": 138.86810302734375, + "learning_rate": 3.6473482143461523e-07, + "loss": 15.4201, + "step": 444950 + }, + { + "epoch": 0.8988473518990615, + "grad_norm": 253.0792999267578, + "learning_rate": 3.6460395739491337e-07, + "loss": 8.506, + "step": 444960 + }, + { + "epoch": 0.8988675525317453, + "grad_norm": 267.2154846191406, + "learning_rate": 3.644731159475695e-07, + "loss": 14.1381, + "step": 444970 + }, + { + "epoch": 0.8988877531644291, + "grad_norm": 773.7963256835938, + "learning_rate": 3.643422970932209e-07, + "loss": 28.2256, + "step": 444980 + }, + { + "epoch": 0.898907953797113, + "grad_norm": 183.3919219970703, + "learning_rate": 3.6421150083250754e-07, + "loss": 11.0542, + "step": 444990 + }, + { + "epoch": 0.8989281544297968, + "grad_norm": 614.067138671875, + "learning_rate": 3.6408072716606346e-07, + "loss": 20.9825, + "step": 445000 + }, + { + "epoch": 0.8989483550624806, + "grad_norm": 728.6051025390625, + "learning_rate": 3.6394997609452755e-07, + "loss": 13.9006, + "step": 445010 + }, + { + "epoch": 0.8989685556951643, + "grad_norm": 354.2909851074219, + "learning_rate": 3.6381924761853814e-07, + "loss": 13.5888, + "step": 445020 + }, + { + "epoch": 0.8989887563278481, + "grad_norm": 600.506591796875, + "learning_rate": 3.6368854173873094e-07, + "loss": 20.5429, + "step": 445030 + }, + { + "epoch": 0.899008956960532, + "grad_norm": 345.0184631347656, + "learning_rate": 3.635578584557431e-07, + "loss": 25.2817, + "step": 445040 + }, + { + "epoch": 0.8990291575932158, + "grad_norm": 493.048095703125, + "learning_rate": 3.6342719777021194e-07, + "loss": 23.0403, + "step": 445050 + }, + { + "epoch": 0.8990493582258996, + "grad_norm": 174.3533477783203, + "learning_rate": 3.6329655968277477e-07, + "loss": 16.8303, + "step": 445060 + }, + { + "epoch": 0.8990695588585834, + "grad_norm": 57.79670333862305, + "learning_rate": 3.6316594419406826e-07, + "loss": 14.3608, + "step": 445070 + }, + { + "epoch": 0.8990897594912672, + "grad_norm": 422.2472229003906, + "learning_rate": 3.6303535130472743e-07, + "loss": 24.412, + "step": 445080 + }, + { + "epoch": 0.8991099601239511, + "grad_norm": 244.47982788085938, + "learning_rate": 3.6290478101539073e-07, + "loss": 11.1764, + "step": 445090 + }, + { + "epoch": 0.8991301607566349, + "grad_norm": 318.8597412109375, + "learning_rate": 3.627742333266937e-07, + "loss": 11.5762, + "step": 445100 + }, + { + "epoch": 0.8991503613893187, + "grad_norm": 132.2611083984375, + "learning_rate": 3.6264370823927196e-07, + "loss": 18.6923, + "step": 445110 + }, + { + "epoch": 0.8991705620220025, + "grad_norm": 481.2422180175781, + "learning_rate": 3.6251320575376336e-07, + "loss": 23.3867, + "step": 445120 + }, + { + "epoch": 0.8991907626546863, + "grad_norm": 274.683837890625, + "learning_rate": 3.6238272587080183e-07, + "loss": 15.6587, + "step": 445130 + }, + { + "epoch": 0.8992109632873702, + "grad_norm": 292.89007568359375, + "learning_rate": 3.6225226859102515e-07, + "loss": 8.5681, + "step": 445140 + }, + { + "epoch": 0.899231163920054, + "grad_norm": 596.2623901367188, + "learning_rate": 3.621218339150684e-07, + "loss": 17.0533, + "step": 445150 + }, + { + "epoch": 0.8992513645527378, + "grad_norm": 728.558837890625, + "learning_rate": 3.619914218435666e-07, + "loss": 16.9225, + "step": 445160 + }, + { + "epoch": 0.8992715651854216, + "grad_norm": 299.84503173828125, + "learning_rate": 3.6186103237715706e-07, + "loss": 12.8559, + "step": 445170 + }, + { + "epoch": 0.8992917658181054, + "grad_norm": 665.6764526367188, + "learning_rate": 3.617306655164743e-07, + "loss": 37.6737, + "step": 445180 + }, + { + "epoch": 0.8993119664507893, + "grad_norm": 377.08441162109375, + "learning_rate": 3.6160032126215274e-07, + "loss": 15.4841, + "step": 445190 + }, + { + "epoch": 0.8993321670834731, + "grad_norm": 668.6402587890625, + "learning_rate": 3.614699996148285e-07, + "loss": 24.6529, + "step": 445200 + }, + { + "epoch": 0.8993523677161569, + "grad_norm": 458.1636657714844, + "learning_rate": 3.613397005751379e-07, + "loss": 26.3734, + "step": 445210 + }, + { + "epoch": 0.8993725683488407, + "grad_norm": 363.49127197265625, + "learning_rate": 3.612094241437153e-07, + "loss": 22.0163, + "step": 445220 + }, + { + "epoch": 0.8993927689815245, + "grad_norm": 333.63128662109375, + "learning_rate": 3.610791703211941e-07, + "loss": 14.0242, + "step": 445230 + }, + { + "epoch": 0.8994129696142084, + "grad_norm": 389.77337646484375, + "learning_rate": 3.6094893910821103e-07, + "loss": 20.8929, + "step": 445240 + }, + { + "epoch": 0.8994331702468922, + "grad_norm": 279.6188049316406, + "learning_rate": 3.608187305054006e-07, + "loss": 11.8537, + "step": 445250 + }, + { + "epoch": 0.899453370879576, + "grad_norm": 544.8707885742188, + "learning_rate": 3.606885445133962e-07, + "loss": 12.3577, + "step": 445260 + }, + { + "epoch": 0.8994735715122598, + "grad_norm": 241.91265869140625, + "learning_rate": 3.605583811328328e-07, + "loss": 23.5405, + "step": 445270 + }, + { + "epoch": 0.8994937721449435, + "grad_norm": 481.5022277832031, + "learning_rate": 3.604282403643472e-07, + "loss": 24.3286, + "step": 445280 + }, + { + "epoch": 0.8995139727776273, + "grad_norm": 165.5880584716797, + "learning_rate": 3.6029812220857e-07, + "loss": 18.6106, + "step": 445290 + }, + { + "epoch": 0.8995341734103112, + "grad_norm": 96.1946792602539, + "learning_rate": 3.601680266661367e-07, + "loss": 17.9782, + "step": 445300 + }, + { + "epoch": 0.899554374042995, + "grad_norm": 529.3019409179688, + "learning_rate": 3.6003795373768303e-07, + "loss": 18.7, + "step": 445310 + }, + { + "epoch": 0.8995745746756788, + "grad_norm": 1.002054214477539, + "learning_rate": 3.5990790342384117e-07, + "loss": 18.6463, + "step": 445320 + }, + { + "epoch": 0.8995947753083626, + "grad_norm": 678.5360717773438, + "learning_rate": 3.5977787572524457e-07, + "loss": 16.6345, + "step": 445330 + }, + { + "epoch": 0.8996149759410464, + "grad_norm": 79.41068267822266, + "learning_rate": 3.596478706425277e-07, + "loss": 16.8962, + "step": 445340 + }, + { + "epoch": 0.8996351765737303, + "grad_norm": 608.1316528320312, + "learning_rate": 3.5951788817632615e-07, + "loss": 15.5022, + "step": 445350 + }, + { + "epoch": 0.8996553772064141, + "grad_norm": 404.8101806640625, + "learning_rate": 3.5938792832726996e-07, + "loss": 23.5742, + "step": 445360 + }, + { + "epoch": 0.8996755778390979, + "grad_norm": 316.1697998046875, + "learning_rate": 3.5925799109599426e-07, + "loss": 15.4585, + "step": 445370 + }, + { + "epoch": 0.8996957784717817, + "grad_norm": 362.8455505371094, + "learning_rate": 3.5912807648313285e-07, + "loss": 19.0565, + "step": 445380 + }, + { + "epoch": 0.8997159791044655, + "grad_norm": 341.9504699707031, + "learning_rate": 3.5899818448931865e-07, + "loss": 20.8177, + "step": 445390 + }, + { + "epoch": 0.8997361797371494, + "grad_norm": 401.0293273925781, + "learning_rate": 3.5886831511518336e-07, + "loss": 24.3106, + "step": 445400 + }, + { + "epoch": 0.8997563803698332, + "grad_norm": 161.62619018554688, + "learning_rate": 3.5873846836136204e-07, + "loss": 25.0842, + "step": 445410 + }, + { + "epoch": 0.899776581002517, + "grad_norm": 0.0, + "learning_rate": 3.586086442284864e-07, + "loss": 22.1347, + "step": 445420 + }, + { + "epoch": 0.8997967816352008, + "grad_norm": 324.30487060546875, + "learning_rate": 3.5847884271718814e-07, + "loss": 21.2996, + "step": 445430 + }, + { + "epoch": 0.8998169822678846, + "grad_norm": 198.55274963378906, + "learning_rate": 3.583490638281023e-07, + "loss": 24.4018, + "step": 445440 + }, + { + "epoch": 0.8998371829005685, + "grad_norm": 240.75051879882812, + "learning_rate": 3.5821930756185894e-07, + "loss": 15.4742, + "step": 445450 + }, + { + "epoch": 0.8998573835332523, + "grad_norm": 526.855224609375, + "learning_rate": 3.5808957391909315e-07, + "loss": 14.6135, + "step": 445460 + }, + { + "epoch": 0.8998775841659361, + "grad_norm": 506.0526123046875, + "learning_rate": 3.579598629004355e-07, + "loss": 14.1117, + "step": 445470 + }, + { + "epoch": 0.8998977847986199, + "grad_norm": 545.1388549804688, + "learning_rate": 3.5783017450651714e-07, + "loss": 12.2958, + "step": 445480 + }, + { + "epoch": 0.8999179854313037, + "grad_norm": 520.304443359375, + "learning_rate": 3.5770050873797314e-07, + "loss": 19.0794, + "step": 445490 + }, + { + "epoch": 0.8999381860639876, + "grad_norm": 142.5303955078125, + "learning_rate": 3.575708655954324e-07, + "loss": 8.911, + "step": 445500 + }, + { + "epoch": 0.8999583866966714, + "grad_norm": 366.2143249511719, + "learning_rate": 3.5744124507952895e-07, + "loss": 15.2595, + "step": 445510 + }, + { + "epoch": 0.8999785873293552, + "grad_norm": 171.73390197753906, + "learning_rate": 3.573116471908933e-07, + "loss": 21.8395, + "step": 445520 + }, + { + "epoch": 0.899998787962039, + "grad_norm": 449.6515197753906, + "learning_rate": 3.571820719301583e-07, + "loss": 17.7359, + "step": 445530 + }, + { + "epoch": 0.9000189885947227, + "grad_norm": 827.51953125, + "learning_rate": 3.570525192979546e-07, + "loss": 15.0506, + "step": 445540 + }, + { + "epoch": 0.9000391892274066, + "grad_norm": 465.2867126464844, + "learning_rate": 3.569229892949133e-07, + "loss": 21.303, + "step": 445550 + }, + { + "epoch": 0.9000593898600904, + "grad_norm": 218.71087646484375, + "learning_rate": 3.5679348192166675e-07, + "loss": 8.2386, + "step": 445560 + }, + { + "epoch": 0.9000795904927742, + "grad_norm": 721.7833251953125, + "learning_rate": 3.5666399717884604e-07, + "loss": 20.4058, + "step": 445570 + }, + { + "epoch": 0.900099791125458, + "grad_norm": 141.5386505126953, + "learning_rate": 3.565345350670807e-07, + "loss": 12.856, + "step": 445580 + }, + { + "epoch": 0.9001199917581418, + "grad_norm": 508.2934875488281, + "learning_rate": 3.56405095587003e-07, + "loss": 12.1547, + "step": 445590 + }, + { + "epoch": 0.9001401923908257, + "grad_norm": 132.60581970214844, + "learning_rate": 3.562756787392452e-07, + "loss": 23.756, + "step": 445600 + }, + { + "epoch": 0.9001603930235095, + "grad_norm": 340.5688781738281, + "learning_rate": 3.561462845244351e-07, + "loss": 13.3342, + "step": 445610 + }, + { + "epoch": 0.9001805936561933, + "grad_norm": 351.4344482421875, + "learning_rate": 3.560169129432045e-07, + "loss": 26.1856, + "step": 445620 + }, + { + "epoch": 0.9002007942888771, + "grad_norm": 527.7457275390625, + "learning_rate": 3.5588756399618507e-07, + "loss": 9.6738, + "step": 445630 + }, + { + "epoch": 0.9002209949215609, + "grad_norm": 262.84136962890625, + "learning_rate": 3.557582376840063e-07, + "loss": 14.2681, + "step": 445640 + }, + { + "epoch": 0.9002411955542448, + "grad_norm": 526.7639770507812, + "learning_rate": 3.556289340072977e-07, + "loss": 10.0684, + "step": 445650 + }, + { + "epoch": 0.9002613961869286, + "grad_norm": 321.4812316894531, + "learning_rate": 3.55499652966691e-07, + "loss": 17.8272, + "step": 445660 + }, + { + "epoch": 0.9002815968196124, + "grad_norm": 91.7345962524414, + "learning_rate": 3.5537039456281674e-07, + "loss": 13.1638, + "step": 445670 + }, + { + "epoch": 0.9003017974522962, + "grad_norm": 111.93982696533203, + "learning_rate": 3.5524115879630225e-07, + "loss": 7.6087, + "step": 445680 + }, + { + "epoch": 0.90032199808498, + "grad_norm": 481.2381286621094, + "learning_rate": 3.551119456677793e-07, + "loss": 15.0273, + "step": 445690 + }, + { + "epoch": 0.9003421987176639, + "grad_norm": 403.2091979980469, + "learning_rate": 3.5498275517787783e-07, + "loss": 33.3146, + "step": 445700 + }, + { + "epoch": 0.9003623993503477, + "grad_norm": 309.87310791015625, + "learning_rate": 3.5485358732722743e-07, + "loss": 17.5942, + "step": 445710 + }, + { + "epoch": 0.9003825999830315, + "grad_norm": 732.3163452148438, + "learning_rate": 3.547244421164564e-07, + "loss": 20.9732, + "step": 445720 + }, + { + "epoch": 0.9004028006157153, + "grad_norm": 365.7209167480469, + "learning_rate": 3.545953195461954e-07, + "loss": 22.0166, + "step": 445730 + }, + { + "epoch": 0.9004230012483991, + "grad_norm": 365.7203369140625, + "learning_rate": 3.5446621961707284e-07, + "loss": 14.7776, + "step": 445740 + }, + { + "epoch": 0.900443201881083, + "grad_norm": 545.6244506835938, + "learning_rate": 3.5433714232971927e-07, + "loss": 26.0447, + "step": 445750 + }, + { + "epoch": 0.9004634025137668, + "grad_norm": 219.1865234375, + "learning_rate": 3.5420808768476313e-07, + "loss": 17.1418, + "step": 445760 + }, + { + "epoch": 0.9004836031464506, + "grad_norm": 876.4417114257812, + "learning_rate": 3.540790556828327e-07, + "loss": 14.8119, + "step": 445770 + }, + { + "epoch": 0.9005038037791344, + "grad_norm": 154.30516052246094, + "learning_rate": 3.539500463245582e-07, + "loss": 42.6618, + "step": 445780 + }, + { + "epoch": 0.9005240044118181, + "grad_norm": 280.67626953125, + "learning_rate": 3.5382105961056735e-07, + "loss": 24.9225, + "step": 445790 + }, + { + "epoch": 0.9005442050445019, + "grad_norm": 122.18487548828125, + "learning_rate": 3.5369209554148854e-07, + "loss": 17.0533, + "step": 445800 + }, + { + "epoch": 0.9005644056771858, + "grad_norm": 348.6451110839844, + "learning_rate": 3.535631541179507e-07, + "loss": 15.0391, + "step": 445810 + }, + { + "epoch": 0.9005846063098696, + "grad_norm": 140.8256072998047, + "learning_rate": 3.534342353405834e-07, + "loss": 13.4767, + "step": 445820 + }, + { + "epoch": 0.9006048069425534, + "grad_norm": 281.6974182128906, + "learning_rate": 3.533053392100144e-07, + "loss": 24.3823, + "step": 445830 + }, + { + "epoch": 0.9006250075752372, + "grad_norm": 448.6886901855469, + "learning_rate": 3.531764657268705e-07, + "loss": 15.1547, + "step": 445840 + }, + { + "epoch": 0.900645208207921, + "grad_norm": 455.422119140625, + "learning_rate": 3.530476148917816e-07, + "loss": 31.2984, + "step": 445850 + }, + { + "epoch": 0.9006654088406049, + "grad_norm": 555.0156860351562, + "learning_rate": 3.5291878670537516e-07, + "loss": 22.9467, + "step": 445860 + }, + { + "epoch": 0.9006856094732887, + "grad_norm": 285.40625, + "learning_rate": 3.5278998116827835e-07, + "loss": 23.5238, + "step": 445870 + }, + { + "epoch": 0.9007058101059725, + "grad_norm": 291.3361511230469, + "learning_rate": 3.5266119828111953e-07, + "loss": 21.3014, + "step": 445880 + }, + { + "epoch": 0.9007260107386563, + "grad_norm": 621.8134155273438, + "learning_rate": 3.525324380445277e-07, + "loss": 15.1114, + "step": 445890 + }, + { + "epoch": 0.9007462113713401, + "grad_norm": 316.3350524902344, + "learning_rate": 3.524037004591274e-07, + "loss": 15.3083, + "step": 445900 + }, + { + "epoch": 0.900766412004024, + "grad_norm": 235.22181701660156, + "learning_rate": 3.5227498552554805e-07, + "loss": 18.8735, + "step": 445910 + }, + { + "epoch": 0.9007866126367078, + "grad_norm": 680.9277954101562, + "learning_rate": 3.5214629324441754e-07, + "loss": 33.483, + "step": 445920 + }, + { + "epoch": 0.9008068132693916, + "grad_norm": 646.0338745117188, + "learning_rate": 3.5201762361636195e-07, + "loss": 12.3207, + "step": 445930 + }, + { + "epoch": 0.9008270139020754, + "grad_norm": 601.1619873046875, + "learning_rate": 3.5188897664200804e-07, + "loss": 17.9275, + "step": 445940 + }, + { + "epoch": 0.9008472145347592, + "grad_norm": 357.0928039550781, + "learning_rate": 3.5176035232198367e-07, + "loss": 20.5941, + "step": 445950 + }, + { + "epoch": 0.9008674151674431, + "grad_norm": 431.9078674316406, + "learning_rate": 3.516317506569172e-07, + "loss": 20.7034, + "step": 445960 + }, + { + "epoch": 0.9008876158001269, + "grad_norm": 188.78501892089844, + "learning_rate": 3.515031716474321e-07, + "loss": 17.5412, + "step": 445970 + }, + { + "epoch": 0.9009078164328107, + "grad_norm": 616.5831298828125, + "learning_rate": 3.513746152941572e-07, + "loss": 15.5337, + "step": 445980 + }, + { + "epoch": 0.9009280170654945, + "grad_norm": 245.98434448242188, + "learning_rate": 3.5124608159771864e-07, + "loss": 13.5908, + "step": 445990 + }, + { + "epoch": 0.9009482176981783, + "grad_norm": 259.31622314453125, + "learning_rate": 3.511175705587433e-07, + "loss": 21.3198, + "step": 446000 + }, + { + "epoch": 0.9009684183308622, + "grad_norm": 614.026123046875, + "learning_rate": 3.509890821778561e-07, + "loss": 17.3189, + "step": 446010 + }, + { + "epoch": 0.900988618963546, + "grad_norm": 1304.6748046875, + "learning_rate": 3.508606164556855e-07, + "loss": 22.5737, + "step": 446020 + }, + { + "epoch": 0.9010088195962298, + "grad_norm": 445.1016845703125, + "learning_rate": 3.507321733928559e-07, + "loss": 13.0387, + "step": 446030 + }, + { + "epoch": 0.9010290202289136, + "grad_norm": 323.90216064453125, + "learning_rate": 3.5060375298999303e-07, + "loss": 26.3852, + "step": 446040 + }, + { + "epoch": 0.9010492208615973, + "grad_norm": 340.41754150390625, + "learning_rate": 3.5047535524772467e-07, + "loss": 23.3956, + "step": 446050 + }, + { + "epoch": 0.9010694214942812, + "grad_norm": 309.1763916015625, + "learning_rate": 3.5034698016667423e-07, + "loss": 13.9331, + "step": 446060 + }, + { + "epoch": 0.901089622126965, + "grad_norm": 299.2520751953125, + "learning_rate": 3.5021862774747007e-07, + "loss": 17.1172, + "step": 446070 + }, + { + "epoch": 0.9011098227596488, + "grad_norm": 73.07524108886719, + "learning_rate": 3.500902979907356e-07, + "loss": 15.6339, + "step": 446080 + }, + { + "epoch": 0.9011300233923326, + "grad_norm": 607.2282104492188, + "learning_rate": 3.4996199089709695e-07, + "loss": 13.3263, + "step": 446090 + }, + { + "epoch": 0.9011502240250164, + "grad_norm": 414.443359375, + "learning_rate": 3.498337064671803e-07, + "loss": 21.8615, + "step": 446100 + }, + { + "epoch": 0.9011704246577003, + "grad_norm": 176.03738403320312, + "learning_rate": 3.4970544470160905e-07, + "loss": 14.6029, + "step": 446110 + }, + { + "epoch": 0.9011906252903841, + "grad_norm": 243.4033203125, + "learning_rate": 3.495772056010105e-07, + "loss": 21.5409, + "step": 446120 + }, + { + "epoch": 0.9012108259230679, + "grad_norm": 423.4161071777344, + "learning_rate": 3.4944898916600743e-07, + "loss": 27.8391, + "step": 446130 + }, + { + "epoch": 0.9012310265557517, + "grad_norm": 488.76904296875, + "learning_rate": 3.493207953972272e-07, + "loss": 12.8866, + "step": 446140 + }, + { + "epoch": 0.9012512271884355, + "grad_norm": 815.1055297851562, + "learning_rate": 3.491926242952931e-07, + "loss": 20.7648, + "step": 446150 + }, + { + "epoch": 0.9012714278211194, + "grad_norm": 744.4523315429688, + "learning_rate": 3.4906447586082917e-07, + "loss": 27.846, + "step": 446160 + }, + { + "epoch": 0.9012916284538032, + "grad_norm": 186.1259002685547, + "learning_rate": 3.48936350094462e-07, + "loss": 11.0203, + "step": 446170 + }, + { + "epoch": 0.901311829086487, + "grad_norm": 751.5745849609375, + "learning_rate": 3.488082469968146e-07, + "loss": 27.7282, + "step": 446180 + }, + { + "epoch": 0.9013320297191708, + "grad_norm": 876.5944213867188, + "learning_rate": 3.4868016656851135e-07, + "loss": 20.8903, + "step": 446190 + }, + { + "epoch": 0.9013522303518546, + "grad_norm": 248.3055419921875, + "learning_rate": 3.4855210881017675e-07, + "loss": 18.6062, + "step": 446200 + }, + { + "epoch": 0.9013724309845385, + "grad_norm": 534.772216796875, + "learning_rate": 3.4842407372243646e-07, + "loss": 30.0212, + "step": 446210 + }, + { + "epoch": 0.9013926316172223, + "grad_norm": 178.67788696289062, + "learning_rate": 3.482960613059111e-07, + "loss": 16.603, + "step": 446220 + }, + { + "epoch": 0.9014128322499061, + "grad_norm": 169.78196716308594, + "learning_rate": 3.481680715612273e-07, + "loss": 10.8747, + "step": 446230 + }, + { + "epoch": 0.9014330328825899, + "grad_norm": 949.7888793945312, + "learning_rate": 3.480401044890086e-07, + "loss": 16.0835, + "step": 446240 + }, + { + "epoch": 0.9014532335152737, + "grad_norm": 310.1080017089844, + "learning_rate": 3.479121600898777e-07, + "loss": 22.9904, + "step": 446250 + }, + { + "epoch": 0.9014734341479576, + "grad_norm": 533.2051391601562, + "learning_rate": 3.477842383644586e-07, + "loss": 14.9268, + "step": 446260 + }, + { + "epoch": 0.9014936347806414, + "grad_norm": 290.5344543457031, + "learning_rate": 3.476563393133747e-07, + "loss": 19.1723, + "step": 446270 + }, + { + "epoch": 0.9015138354133252, + "grad_norm": 248.2886962890625, + "learning_rate": 3.475284629372511e-07, + "loss": 25.3621, + "step": 446280 + }, + { + "epoch": 0.901534036046009, + "grad_norm": 496.6837463378906, + "learning_rate": 3.474006092367077e-07, + "loss": 12.9381, + "step": 446290 + }, + { + "epoch": 0.9015542366786927, + "grad_norm": 8.167790412902832, + "learning_rate": 3.472727782123697e-07, + "loss": 5.335, + "step": 446300 + }, + { + "epoch": 0.9015744373113765, + "grad_norm": 494.1587829589844, + "learning_rate": 3.4714496986486045e-07, + "loss": 14.7211, + "step": 446310 + }, + { + "epoch": 0.9015946379440604, + "grad_norm": 496.6618957519531, + "learning_rate": 3.470171841948022e-07, + "loss": 15.1881, + "step": 446320 + }, + { + "epoch": 0.9016148385767442, + "grad_norm": 318.4088134765625, + "learning_rate": 3.468894212028173e-07, + "loss": 25.2526, + "step": 446330 + }, + { + "epoch": 0.901635039209428, + "grad_norm": 424.84149169921875, + "learning_rate": 3.467616808895302e-07, + "loss": 17.9956, + "step": 446340 + }, + { + "epoch": 0.9016552398421118, + "grad_norm": 295.3107604980469, + "learning_rate": 3.4663396325556154e-07, + "loss": 8.7933, + "step": 446350 + }, + { + "epoch": 0.9016754404747956, + "grad_norm": 235.19976806640625, + "learning_rate": 3.465062683015341e-07, + "loss": 15.9845, + "step": 446360 + }, + { + "epoch": 0.9016956411074795, + "grad_norm": 327.4865417480469, + "learning_rate": 3.463785960280719e-07, + "loss": 17.9148, + "step": 446370 + }, + { + "epoch": 0.9017158417401633, + "grad_norm": 313.4536437988281, + "learning_rate": 3.462509464357944e-07, + "loss": 28.1946, + "step": 446380 + }, + { + "epoch": 0.9017360423728471, + "grad_norm": 201.65939331054688, + "learning_rate": 3.461233195253266e-07, + "loss": 18.0275, + "step": 446390 + }, + { + "epoch": 0.9017562430055309, + "grad_norm": 977.5045166015625, + "learning_rate": 3.459957152972887e-07, + "loss": 28.939, + "step": 446400 + }, + { + "epoch": 0.9017764436382147, + "grad_norm": 0.0, + "learning_rate": 3.45868133752304e-07, + "loss": 17.0587, + "step": 446410 + }, + { + "epoch": 0.9017966442708986, + "grad_norm": 593.5664672851562, + "learning_rate": 3.45740574890992e-07, + "loss": 18.1876, + "step": 446420 + }, + { + "epoch": 0.9018168449035824, + "grad_norm": 122.75834655761719, + "learning_rate": 3.456130387139778e-07, + "loss": 40.8926, + "step": 446430 + }, + { + "epoch": 0.9018370455362662, + "grad_norm": 281.06292724609375, + "learning_rate": 3.454855252218803e-07, + "loss": 21.9502, + "step": 446440 + }, + { + "epoch": 0.90185724616895, + "grad_norm": 272.9182434082031, + "learning_rate": 3.4535803441532125e-07, + "loss": 15.2092, + "step": 446450 + }, + { + "epoch": 0.9018774468016338, + "grad_norm": 274.5888671875, + "learning_rate": 3.4523056629492344e-07, + "loss": 25.7554, + "step": 446460 + }, + { + "epoch": 0.9018976474343177, + "grad_norm": 422.311767578125, + "learning_rate": 3.451031208613076e-07, + "loss": 10.7451, + "step": 446470 + }, + { + "epoch": 0.9019178480670015, + "grad_norm": 267.4585266113281, + "learning_rate": 3.449756981150931e-07, + "loss": 14.6757, + "step": 446480 + }, + { + "epoch": 0.9019380486996853, + "grad_norm": 369.5973205566406, + "learning_rate": 3.448482980569029e-07, + "loss": 29.1553, + "step": 446490 + }, + { + "epoch": 0.9019582493323691, + "grad_norm": 713.7677001953125, + "learning_rate": 3.4472092068735917e-07, + "loss": 31.0512, + "step": 446500 + }, + { + "epoch": 0.9019784499650529, + "grad_norm": 286.2923889160156, + "learning_rate": 3.4459356600707925e-07, + "loss": 9.0444, + "step": 446510 + }, + { + "epoch": 0.9019986505977368, + "grad_norm": 692.444580078125, + "learning_rate": 3.44466234016686e-07, + "loss": 15.3244, + "step": 446520 + }, + { + "epoch": 0.9020188512304206, + "grad_norm": 233.64529418945312, + "learning_rate": 3.443389247168e-07, + "loss": 16.3657, + "step": 446530 + }, + { + "epoch": 0.9020390518631044, + "grad_norm": 156.468994140625, + "learning_rate": 3.442116381080418e-07, + "loss": 13.9561, + "step": 446540 + }, + { + "epoch": 0.9020592524957882, + "grad_norm": 376.273193359375, + "learning_rate": 3.4408437419103047e-07, + "loss": 9.307, + "step": 446550 + }, + { + "epoch": 0.9020794531284719, + "grad_norm": 253.2138671875, + "learning_rate": 3.4395713296638713e-07, + "loss": 18.5868, + "step": 446560 + }, + { + "epoch": 0.9020996537611558, + "grad_norm": 474.3890380859375, + "learning_rate": 3.4382991443473403e-07, + "loss": 20.5897, + "step": 446570 + }, + { + "epoch": 0.9021198543938396, + "grad_norm": 670.932373046875, + "learning_rate": 3.437027185966868e-07, + "loss": 13.156, + "step": 446580 + }, + { + "epoch": 0.9021400550265234, + "grad_norm": 267.61737060546875, + "learning_rate": 3.4357554545286833e-07, + "loss": 20.705, + "step": 446590 + }, + { + "epoch": 0.9021602556592072, + "grad_norm": 19.1137752532959, + "learning_rate": 3.434483950038986e-07, + "loss": 10.2194, + "step": 446600 + }, + { + "epoch": 0.902180456291891, + "grad_norm": 818.1859741210938, + "learning_rate": 3.433212672503966e-07, + "loss": 19.5881, + "step": 446610 + }, + { + "epoch": 0.9022006569245749, + "grad_norm": 497.45947265625, + "learning_rate": 3.431941621929813e-07, + "loss": 22.4659, + "step": 446620 + }, + { + "epoch": 0.9022208575572587, + "grad_norm": 109.84841918945312, + "learning_rate": 3.430670798322733e-07, + "loss": 12.4741, + "step": 446630 + }, + { + "epoch": 0.9022410581899425, + "grad_norm": 232.27517700195312, + "learning_rate": 3.4294002016889206e-07, + "loss": 15.562, + "step": 446640 + }, + { + "epoch": 0.9022612588226263, + "grad_norm": 204.5904998779297, + "learning_rate": 3.428129832034549e-07, + "loss": 18.9749, + "step": 446650 + }, + { + "epoch": 0.9022814594553101, + "grad_norm": 271.8900146484375, + "learning_rate": 3.426859689365836e-07, + "loss": 11.7842, + "step": 446660 + }, + { + "epoch": 0.902301660087994, + "grad_norm": 203.10791015625, + "learning_rate": 3.425589773688953e-07, + "loss": 20.7852, + "step": 446670 + }, + { + "epoch": 0.9023218607206778, + "grad_norm": 189.75350952148438, + "learning_rate": 3.424320085010102e-07, + "loss": 19.1101, + "step": 446680 + }, + { + "epoch": 0.9023420613533616, + "grad_norm": 96.21739959716797, + "learning_rate": 3.423050623335467e-07, + "loss": 10.193, + "step": 446690 + }, + { + "epoch": 0.9023622619860454, + "grad_norm": 410.38580322265625, + "learning_rate": 3.421781388671225e-07, + "loss": 13.8715, + "step": 446700 + }, + { + "epoch": 0.9023824626187292, + "grad_norm": 29.13329315185547, + "learning_rate": 3.420512381023583e-07, + "loss": 19.4322, + "step": 446710 + }, + { + "epoch": 0.902402663251413, + "grad_norm": 76.03953552246094, + "learning_rate": 3.419243600398703e-07, + "loss": 12.2015, + "step": 446720 + }, + { + "epoch": 0.9024228638840969, + "grad_norm": 233.65858459472656, + "learning_rate": 3.4179750468027906e-07, + "loss": 15.4427, + "step": 446730 + }, + { + "epoch": 0.9024430645167807, + "grad_norm": 392.72564697265625, + "learning_rate": 3.416706720242008e-07, + "loss": 15.2899, + "step": 446740 + }, + { + "epoch": 0.9024632651494645, + "grad_norm": 527.7315673828125, + "learning_rate": 3.415438620722555e-07, + "loss": 17.6316, + "step": 446750 + }, + { + "epoch": 0.9024834657821483, + "grad_norm": 222.16433715820312, + "learning_rate": 3.4141707482506056e-07, + "loss": 16.3429, + "step": 446760 + }, + { + "epoch": 0.9025036664148322, + "grad_norm": 687.5732421875, + "learning_rate": 3.412903102832327e-07, + "loss": 10.8618, + "step": 446770 + }, + { + "epoch": 0.902523867047516, + "grad_norm": 1229.32275390625, + "learning_rate": 3.4116356844739184e-07, + "loss": 28.2971, + "step": 446780 + }, + { + "epoch": 0.9025440676801998, + "grad_norm": 58.414588928222656, + "learning_rate": 3.4103684931815483e-07, + "loss": 17.7443, + "step": 446790 + }, + { + "epoch": 0.9025642683128836, + "grad_norm": 371.36328125, + "learning_rate": 3.409101528961378e-07, + "loss": 5.0725, + "step": 446800 + }, + { + "epoch": 0.9025844689455673, + "grad_norm": 140.0763397216797, + "learning_rate": 3.407834791819603e-07, + "loss": 15.3257, + "step": 446810 + }, + { + "epoch": 0.9026046695782511, + "grad_norm": 291.6254577636719, + "learning_rate": 3.4065682817624015e-07, + "loss": 17.0342, + "step": 446820 + }, + { + "epoch": 0.902624870210935, + "grad_norm": 433.16741943359375, + "learning_rate": 3.4053019987959234e-07, + "loss": 23.7829, + "step": 446830 + }, + { + "epoch": 0.9026450708436188, + "grad_norm": 360.67669677734375, + "learning_rate": 3.404035942926348e-07, + "loss": 14.3795, + "step": 446840 + }, + { + "epoch": 0.9026652714763026, + "grad_norm": 400.54913330078125, + "learning_rate": 3.402770114159859e-07, + "loss": 25.7854, + "step": 446850 + }, + { + "epoch": 0.9026854721089864, + "grad_norm": 313.03179931640625, + "learning_rate": 3.401504512502618e-07, + "loss": 18.2124, + "step": 446860 + }, + { + "epoch": 0.9027056727416702, + "grad_norm": 448.4454650878906, + "learning_rate": 3.4002391379607815e-07, + "loss": 26.2969, + "step": 446870 + }, + { + "epoch": 0.9027258733743541, + "grad_norm": 15.552080154418945, + "learning_rate": 3.3989739905405326e-07, + "loss": 10.4564, + "step": 446880 + }, + { + "epoch": 0.9027460740070379, + "grad_norm": 462.7173156738281, + "learning_rate": 3.3977090702480455e-07, + "loss": 22.8206, + "step": 446890 + }, + { + "epoch": 0.9027662746397217, + "grad_norm": 673.148193359375, + "learning_rate": 3.396444377089453e-07, + "loss": 21.1259, + "step": 446900 + }, + { + "epoch": 0.9027864752724055, + "grad_norm": 480.64093017578125, + "learning_rate": 3.395179911070945e-07, + "loss": 20.9275, + "step": 446910 + }, + { + "epoch": 0.9028066759050893, + "grad_norm": 125.34583282470703, + "learning_rate": 3.3939156721986777e-07, + "loss": 15.184, + "step": 446920 + }, + { + "epoch": 0.9028268765377732, + "grad_norm": 139.38539123535156, + "learning_rate": 3.3926516604788185e-07, + "loss": 23.2434, + "step": 446930 + }, + { + "epoch": 0.902847077170457, + "grad_norm": 422.3639221191406, + "learning_rate": 3.3913878759175124e-07, + "loss": 24.0939, + "step": 446940 + }, + { + "epoch": 0.9028672778031408, + "grad_norm": 721.1874389648438, + "learning_rate": 3.3901243185209375e-07, + "loss": 20.7313, + "step": 446950 + }, + { + "epoch": 0.9028874784358246, + "grad_norm": 594.5277709960938, + "learning_rate": 3.388860988295245e-07, + "loss": 18.999, + "step": 446960 + }, + { + "epoch": 0.9029076790685084, + "grad_norm": 317.29876708984375, + "learning_rate": 3.3875978852465795e-07, + "loss": 15.7928, + "step": 446970 + }, + { + "epoch": 0.9029278797011923, + "grad_norm": 22.906665802001953, + "learning_rate": 3.3863350093811196e-07, + "loss": 13.0489, + "step": 446980 + }, + { + "epoch": 0.9029480803338761, + "grad_norm": 198.2176971435547, + "learning_rate": 3.3850723607049994e-07, + "loss": 14.2384, + "step": 446990 + }, + { + "epoch": 0.9029682809665599, + "grad_norm": 149.88609313964844, + "learning_rate": 3.3838099392243915e-07, + "loss": 19.9989, + "step": 447000 + }, + { + "epoch": 0.9029884815992437, + "grad_norm": 35.93222427368164, + "learning_rate": 3.382547744945436e-07, + "loss": 16.6192, + "step": 447010 + }, + { + "epoch": 0.9030086822319275, + "grad_norm": 271.7751159667969, + "learning_rate": 3.3812857778742935e-07, + "loss": 11.4213, + "step": 447020 + }, + { + "epoch": 0.9030288828646114, + "grad_norm": 524.922119140625, + "learning_rate": 3.3800240380171046e-07, + "loss": 14.8176, + "step": 447030 + }, + { + "epoch": 0.9030490834972952, + "grad_norm": 583.8279418945312, + "learning_rate": 3.3787625253800247e-07, + "loss": 13.3672, + "step": 447040 + }, + { + "epoch": 0.903069284129979, + "grad_norm": 301.9650573730469, + "learning_rate": 3.3775012399692055e-07, + "loss": 22.4438, + "step": 447050 + }, + { + "epoch": 0.9030894847626628, + "grad_norm": 724.0348510742188, + "learning_rate": 3.3762401817907795e-07, + "loss": 18.063, + "step": 447060 + }, + { + "epoch": 0.9031096853953465, + "grad_norm": 301.4600524902344, + "learning_rate": 3.374979350850921e-07, + "loss": 24.1437, + "step": 447070 + }, + { + "epoch": 0.9031298860280303, + "grad_norm": 89.9062728881836, + "learning_rate": 3.373718747155752e-07, + "loss": 24.9831, + "step": 447080 + }, + { + "epoch": 0.9031500866607142, + "grad_norm": 147.34152221679688, + "learning_rate": 3.372458370711412e-07, + "loss": 10.8046, + "step": 447090 + }, + { + "epoch": 0.903170287293398, + "grad_norm": 258.792236328125, + "learning_rate": 3.371198221524069e-07, + "loss": 13.8453, + "step": 447100 + }, + { + "epoch": 0.9031904879260818, + "grad_norm": 426.54119873046875, + "learning_rate": 3.3699382995998455e-07, + "loss": 16.5044, + "step": 447110 + }, + { + "epoch": 0.9032106885587656, + "grad_norm": 264.06488037109375, + "learning_rate": 3.368678604944886e-07, + "loss": 13.1876, + "step": 447120 + }, + { + "epoch": 0.9032308891914494, + "grad_norm": 298.4723815917969, + "learning_rate": 3.3674191375653255e-07, + "loss": 10.8668, + "step": 447130 + }, + { + "epoch": 0.9032510898241333, + "grad_norm": 236.72854614257812, + "learning_rate": 3.366159897467314e-07, + "loss": 14.6112, + "step": 447140 + }, + { + "epoch": 0.9032712904568171, + "grad_norm": 278.7016296386719, + "learning_rate": 3.364900884656991e-07, + "loss": 16.1786, + "step": 447150 + }, + { + "epoch": 0.9032914910895009, + "grad_norm": 394.2326354980469, + "learning_rate": 3.3636420991404686e-07, + "loss": 9.8431, + "step": 447160 + }, + { + "epoch": 0.9033116917221847, + "grad_norm": 443.133544921875, + "learning_rate": 3.3623835409239023e-07, + "loss": 21.7817, + "step": 447170 + }, + { + "epoch": 0.9033318923548685, + "grad_norm": 417.1814880371094, + "learning_rate": 3.361125210013438e-07, + "loss": 15.9135, + "step": 447180 + }, + { + "epoch": 0.9033520929875524, + "grad_norm": 0.0, + "learning_rate": 3.3598671064151767e-07, + "loss": 18.1624, + "step": 447190 + }, + { + "epoch": 0.9033722936202362, + "grad_norm": 309.14569091796875, + "learning_rate": 3.358609230135268e-07, + "loss": 13.8954, + "step": 447200 + }, + { + "epoch": 0.90339249425292, + "grad_norm": 253.5623016357422, + "learning_rate": 3.357351581179846e-07, + "loss": 23.6011, + "step": 447210 + }, + { + "epoch": 0.9034126948856038, + "grad_norm": 218.31854248046875, + "learning_rate": 3.35609415955504e-07, + "loss": 16.3643, + "step": 447220 + }, + { + "epoch": 0.9034328955182876, + "grad_norm": 628.333740234375, + "learning_rate": 3.354836965266961e-07, + "loss": 49.4682, + "step": 447230 + }, + { + "epoch": 0.9034530961509715, + "grad_norm": 351.5813293457031, + "learning_rate": 3.35357999832176e-07, + "loss": 17.8232, + "step": 447240 + }, + { + "epoch": 0.9034732967836553, + "grad_norm": 8.600955963134766, + "learning_rate": 3.352323258725554e-07, + "loss": 13.7669, + "step": 447250 + }, + { + "epoch": 0.9034934974163391, + "grad_norm": 387.5688781738281, + "learning_rate": 3.351066746484455e-07, + "loss": 18.2632, + "step": 447260 + }, + { + "epoch": 0.9035136980490229, + "grad_norm": 518.1428833007812, + "learning_rate": 3.349810461604608e-07, + "loss": 22.6909, + "step": 447270 + }, + { + "epoch": 0.9035338986817067, + "grad_norm": 448.3558654785156, + "learning_rate": 3.3485544040921194e-07, + "loss": 33.6627, + "step": 447280 + }, + { + "epoch": 0.9035540993143906, + "grad_norm": 583.00341796875, + "learning_rate": 3.347298573953128e-07, + "loss": 20.9727, + "step": 447290 + }, + { + "epoch": 0.9035742999470744, + "grad_norm": 18.169967651367188, + "learning_rate": 3.3460429711937417e-07, + "loss": 19.8996, + "step": 447300 + }, + { + "epoch": 0.9035945005797582, + "grad_norm": 311.3398132324219, + "learning_rate": 3.344787595820076e-07, + "loss": 13.1239, + "step": 447310 + }, + { + "epoch": 0.903614701212442, + "grad_norm": 567.7097778320312, + "learning_rate": 3.343532447838266e-07, + "loss": 17.0992, + "step": 447320 + }, + { + "epoch": 0.9036349018451257, + "grad_norm": 484.4988098144531, + "learning_rate": 3.3422775272544115e-07, + "loss": 16.0481, + "step": 447330 + }, + { + "epoch": 0.9036551024778096, + "grad_norm": 355.2326354980469, + "learning_rate": 3.3410228340746475e-07, + "loss": 15.4506, + "step": 447340 + }, + { + "epoch": 0.9036753031104934, + "grad_norm": 338.78253173828125, + "learning_rate": 3.3397683683050685e-07, + "loss": 27.8149, + "step": 447350 + }, + { + "epoch": 0.9036955037431772, + "grad_norm": 374.1416931152344, + "learning_rate": 3.338514129951809e-07, + "loss": 6.0169, + "step": 447360 + }, + { + "epoch": 0.903715704375861, + "grad_norm": 189.8123321533203, + "learning_rate": 3.337260119020974e-07, + "loss": 22.418, + "step": 447370 + }, + { + "epoch": 0.9037359050085448, + "grad_norm": 403.3528747558594, + "learning_rate": 3.33600633551866e-07, + "loss": 25.0211, + "step": 447380 + }, + { + "epoch": 0.9037561056412287, + "grad_norm": 108.13957977294922, + "learning_rate": 3.334752779451006e-07, + "loss": 12.1229, + "step": 447390 + }, + { + "epoch": 0.9037763062739125, + "grad_norm": 207.4993438720703, + "learning_rate": 3.3334994508241013e-07, + "loss": 16.8677, + "step": 447400 + }, + { + "epoch": 0.9037965069065963, + "grad_norm": 612.2901000976562, + "learning_rate": 3.332246349644058e-07, + "loss": 32.5601, + "step": 447410 + }, + { + "epoch": 0.9038167075392801, + "grad_norm": 235.7354278564453, + "learning_rate": 3.3309934759169825e-07, + "loss": 20.5226, + "step": 447420 + }, + { + "epoch": 0.9038369081719639, + "grad_norm": 312.7841796875, + "learning_rate": 3.3297408296489973e-07, + "loss": 13.5229, + "step": 447430 + }, + { + "epoch": 0.9038571088046478, + "grad_norm": 346.403076171875, + "learning_rate": 3.328488410846187e-07, + "loss": 21.7707, + "step": 447440 + }, + { + "epoch": 0.9038773094373316, + "grad_norm": 612.0426635742188, + "learning_rate": 3.327236219514657e-07, + "loss": 15.0607, + "step": 447450 + }, + { + "epoch": 0.9038975100700154, + "grad_norm": 130.6298828125, + "learning_rate": 3.325984255660525e-07, + "loss": 29.4215, + "step": 447460 + }, + { + "epoch": 0.9039177107026992, + "grad_norm": 374.779296875, + "learning_rate": 3.324732519289886e-07, + "loss": 15.5523, + "step": 447470 + }, + { + "epoch": 0.903937911335383, + "grad_norm": 244.9932403564453, + "learning_rate": 3.3234810104088356e-07, + "loss": 14.7736, + "step": 447480 + }, + { + "epoch": 0.9039581119680669, + "grad_norm": 402.7099914550781, + "learning_rate": 3.322229729023474e-07, + "loss": 11.1433, + "step": 447490 + }, + { + "epoch": 0.9039783126007507, + "grad_norm": 646.601806640625, + "learning_rate": 3.320978675139919e-07, + "loss": 19.4331, + "step": 447500 + }, + { + "epoch": 0.9039985132334345, + "grad_norm": 16.859086990356445, + "learning_rate": 3.319727848764237e-07, + "loss": 22.936, + "step": 447510 + }, + { + "epoch": 0.9040187138661183, + "grad_norm": 434.27294921875, + "learning_rate": 3.318477249902541e-07, + "loss": 12.9803, + "step": 447520 + }, + { + "epoch": 0.9040389144988021, + "grad_norm": 109.24701690673828, + "learning_rate": 3.317226878560931e-07, + "loss": 16.8664, + "step": 447530 + }, + { + "epoch": 0.904059115131486, + "grad_norm": 533.0873413085938, + "learning_rate": 3.3159767347454963e-07, + "loss": 28.9964, + "step": 447540 + }, + { + "epoch": 0.9040793157641698, + "grad_norm": 255.40516662597656, + "learning_rate": 3.3147268184623216e-07, + "loss": 13.5238, + "step": 447550 + }, + { + "epoch": 0.9040995163968536, + "grad_norm": 294.3439636230469, + "learning_rate": 3.3134771297175127e-07, + "loss": 21.6402, + "step": 447560 + }, + { + "epoch": 0.9041197170295374, + "grad_norm": 246.0892333984375, + "learning_rate": 3.3122276685171593e-07, + "loss": 22.777, + "step": 447570 + }, + { + "epoch": 0.9041399176622211, + "grad_norm": 598.0889892578125, + "learning_rate": 3.3109784348673293e-07, + "loss": 17.5916, + "step": 447580 + }, + { + "epoch": 0.904160118294905, + "grad_norm": 875.310791015625, + "learning_rate": 3.309729428774144e-07, + "loss": 17.8856, + "step": 447590 + }, + { + "epoch": 0.9041803189275888, + "grad_norm": 216.2782745361328, + "learning_rate": 3.3084806502436617e-07, + "loss": 17.0905, + "step": 447600 + }, + { + "epoch": 0.9042005195602726, + "grad_norm": 251.59776306152344, + "learning_rate": 3.3072320992819875e-07, + "loss": 22.732, + "step": 447610 + }, + { + "epoch": 0.9042207201929564, + "grad_norm": 118.15159606933594, + "learning_rate": 3.3059837758951995e-07, + "loss": 12.7717, + "step": 447620 + }, + { + "epoch": 0.9042409208256402, + "grad_norm": 723.6868896484375, + "learning_rate": 3.3047356800893826e-07, + "loss": 24.9406, + "step": 447630 + }, + { + "epoch": 0.904261121458324, + "grad_norm": 495.8370666503906, + "learning_rate": 3.303487811870626e-07, + "loss": 12.215, + "step": 447640 + }, + { + "epoch": 0.9042813220910079, + "grad_norm": 214.63211059570312, + "learning_rate": 3.3022401712450025e-07, + "loss": 14.264, + "step": 447650 + }, + { + "epoch": 0.9043015227236917, + "grad_norm": 1940.4654541015625, + "learning_rate": 3.3009927582185965e-07, + "loss": 36.2257, + "step": 447660 + }, + { + "epoch": 0.9043217233563755, + "grad_norm": 94.8907470703125, + "learning_rate": 3.2997455727974856e-07, + "loss": 17.4319, + "step": 447670 + }, + { + "epoch": 0.9043419239890593, + "grad_norm": 51.35108947753906, + "learning_rate": 3.2984986149877554e-07, + "loss": 7.2657, + "step": 447680 + }, + { + "epoch": 0.9043621246217431, + "grad_norm": 0.6754915118217468, + "learning_rate": 3.297251884795477e-07, + "loss": 19.2647, + "step": 447690 + }, + { + "epoch": 0.904382325254427, + "grad_norm": 611.1425170898438, + "learning_rate": 3.2960053822267245e-07, + "loss": 18.7656, + "step": 447700 + }, + { + "epoch": 0.9044025258871108, + "grad_norm": 124.38903045654297, + "learning_rate": 3.294759107287582e-07, + "loss": 10.8771, + "step": 447710 + }, + { + "epoch": 0.9044227265197946, + "grad_norm": 308.4328918457031, + "learning_rate": 3.293513059984121e-07, + "loss": 15.8337, + "step": 447720 + }, + { + "epoch": 0.9044429271524784, + "grad_norm": 346.4642333984375, + "learning_rate": 3.2922672403224053e-07, + "loss": 20.7543, + "step": 447730 + }, + { + "epoch": 0.9044631277851622, + "grad_norm": 415.1618347167969, + "learning_rate": 3.2910216483085125e-07, + "loss": 15.1491, + "step": 447740 + }, + { + "epoch": 0.9044833284178461, + "grad_norm": 471.6772155761719, + "learning_rate": 3.289776283948526e-07, + "loss": 16.6599, + "step": 447750 + }, + { + "epoch": 0.9045035290505299, + "grad_norm": 338.21728515625, + "learning_rate": 3.2885311472485025e-07, + "loss": 15.285, + "step": 447760 + }, + { + "epoch": 0.9045237296832137, + "grad_norm": 173.7224578857422, + "learning_rate": 3.287286238214504e-07, + "loss": 12.2094, + "step": 447770 + }, + { + "epoch": 0.9045439303158975, + "grad_norm": 188.3278350830078, + "learning_rate": 3.286041556852615e-07, + "loss": 11.3011, + "step": 447780 + }, + { + "epoch": 0.9045641309485813, + "grad_norm": 178.1429443359375, + "learning_rate": 3.2847971031688963e-07, + "loss": 32.7256, + "step": 447790 + }, + { + "epoch": 0.9045843315812652, + "grad_norm": 302.8177490234375, + "learning_rate": 3.283552877169399e-07, + "loss": 18.2551, + "step": 447800 + }, + { + "epoch": 0.904604532213949, + "grad_norm": 243.9778594970703, + "learning_rate": 3.282308878860202e-07, + "loss": 11.2811, + "step": 447810 + }, + { + "epoch": 0.9046247328466328, + "grad_norm": 298.0887145996094, + "learning_rate": 3.281065108247372e-07, + "loss": 49.3632, + "step": 447820 + }, + { + "epoch": 0.9046449334793166, + "grad_norm": 426.9590759277344, + "learning_rate": 3.279821565336966e-07, + "loss": 11.56, + "step": 447830 + }, + { + "epoch": 0.9046651341120003, + "grad_norm": 115.5376968383789, + "learning_rate": 3.2785782501350284e-07, + "loss": 20.0251, + "step": 447840 + }, + { + "epoch": 0.9046853347446842, + "grad_norm": 229.4214630126953, + "learning_rate": 3.277335162647649e-07, + "loss": 30.5869, + "step": 447850 + }, + { + "epoch": 0.904705535377368, + "grad_norm": 498.9197082519531, + "learning_rate": 3.276092302880868e-07, + "loss": 18.484, + "step": 447860 + }, + { + "epoch": 0.9047257360100518, + "grad_norm": 226.56736755371094, + "learning_rate": 3.274849670840741e-07, + "loss": 7.8482, + "step": 447870 + }, + { + "epoch": 0.9047459366427356, + "grad_norm": 256.080810546875, + "learning_rate": 3.2736072665333353e-07, + "loss": 16.6454, + "step": 447880 + }, + { + "epoch": 0.9047661372754194, + "grad_norm": 244.22299194335938, + "learning_rate": 3.272365089964691e-07, + "loss": 11.4553, + "step": 447890 + }, + { + "epoch": 0.9047863379081033, + "grad_norm": 233.81263732910156, + "learning_rate": 3.271123141140886e-07, + "loss": 16.5892, + "step": 447900 + }, + { + "epoch": 0.9048065385407871, + "grad_norm": 615.6817626953125, + "learning_rate": 3.269881420067944e-07, + "loss": 17.0083, + "step": 447910 + }, + { + "epoch": 0.9048267391734709, + "grad_norm": 204.99356079101562, + "learning_rate": 3.268639926751943e-07, + "loss": 11.7022, + "step": 447920 + }, + { + "epoch": 0.9048469398061547, + "grad_norm": 595.6871948242188, + "learning_rate": 3.267398661198923e-07, + "loss": 26.3127, + "step": 447930 + }, + { + "epoch": 0.9048671404388385, + "grad_norm": 300.2926940917969, + "learning_rate": 3.2661576234149285e-07, + "loss": 14.0718, + "step": 447940 + }, + { + "epoch": 0.9048873410715224, + "grad_norm": 243.31668090820312, + "learning_rate": 3.264916813406022e-07, + "loss": 19.1018, + "step": 447950 + }, + { + "epoch": 0.9049075417042062, + "grad_norm": 388.39508056640625, + "learning_rate": 3.263676231178231e-07, + "loss": 9.4069, + "step": 447960 + }, + { + "epoch": 0.90492774233689, + "grad_norm": 5.497354984283447, + "learning_rate": 3.262435876737624e-07, + "loss": 16.1466, + "step": 447970 + }, + { + "epoch": 0.9049479429695738, + "grad_norm": 346.11614990234375, + "learning_rate": 3.2611957500902345e-07, + "loss": 26.7694, + "step": 447980 + }, + { + "epoch": 0.9049681436022576, + "grad_norm": 1.7630181312561035, + "learning_rate": 3.2599558512421024e-07, + "loss": 16.1983, + "step": 447990 + }, + { + "epoch": 0.9049883442349415, + "grad_norm": 228.85594177246094, + "learning_rate": 3.258716180199278e-07, + "loss": 19.5428, + "step": 448000 + }, + { + "epoch": 0.9050085448676253, + "grad_norm": 92.16657257080078, + "learning_rate": 3.2574767369678073e-07, + "loss": 15.1785, + "step": 448010 + }, + { + "epoch": 0.9050287455003091, + "grad_norm": 402.4857177734375, + "learning_rate": 3.2562375215537176e-07, + "loss": 11.6245, + "step": 448020 + }, + { + "epoch": 0.9050489461329929, + "grad_norm": 345.69921875, + "learning_rate": 3.2549985339630606e-07, + "loss": 22.8108, + "step": 448030 + }, + { + "epoch": 0.9050691467656767, + "grad_norm": 150.28077697753906, + "learning_rate": 3.253759774201881e-07, + "loss": 17.1309, + "step": 448040 + }, + { + "epoch": 0.9050893473983606, + "grad_norm": 653.8369140625, + "learning_rate": 3.252521242276191e-07, + "loss": 29.8362, + "step": 448050 + }, + { + "epoch": 0.9051095480310444, + "grad_norm": 420.880615234375, + "learning_rate": 3.2512829381920463e-07, + "loss": 21.5998, + "step": 448060 + }, + { + "epoch": 0.9051297486637282, + "grad_norm": 253.64968872070312, + "learning_rate": 3.250044861955487e-07, + "loss": 24.7224, + "step": 448070 + }, + { + "epoch": 0.905149949296412, + "grad_norm": 10.839777946472168, + "learning_rate": 3.248807013572536e-07, + "loss": 21.1545, + "step": 448080 + }, + { + "epoch": 0.9051701499290957, + "grad_norm": 449.986083984375, + "learning_rate": 3.2475693930492214e-07, + "loss": 11.1076, + "step": 448090 + }, + { + "epoch": 0.9051903505617795, + "grad_norm": 435.3336486816406, + "learning_rate": 3.246332000391583e-07, + "loss": 15.9453, + "step": 448100 + }, + { + "epoch": 0.9052105511944634, + "grad_norm": 679.3292846679688, + "learning_rate": 3.245094835605667e-07, + "loss": 16.2423, + "step": 448110 + }, + { + "epoch": 0.9052307518271472, + "grad_norm": 145.24484252929688, + "learning_rate": 3.2438578986974776e-07, + "loss": 19.8766, + "step": 448120 + }, + { + "epoch": 0.905250952459831, + "grad_norm": 776.5805053710938, + "learning_rate": 3.242621189673051e-07, + "loss": 32.9737, + "step": 448130 + }, + { + "epoch": 0.9052711530925148, + "grad_norm": 196.9553680419922, + "learning_rate": 3.2413847085384256e-07, + "loss": 13.7633, + "step": 448140 + }, + { + "epoch": 0.9052913537251986, + "grad_norm": 300.7092590332031, + "learning_rate": 3.240148455299619e-07, + "loss": 12.6026, + "step": 448150 + }, + { + "epoch": 0.9053115543578825, + "grad_norm": 359.0980529785156, + "learning_rate": 3.2389124299626483e-07, + "loss": 24.2891, + "step": 448160 + }, + { + "epoch": 0.9053317549905663, + "grad_norm": 576.0731811523438, + "learning_rate": 3.237676632533554e-07, + "loss": 14.0114, + "step": 448170 + }, + { + "epoch": 0.9053519556232501, + "grad_norm": 501.6863708496094, + "learning_rate": 3.2364410630183587e-07, + "loss": 14.6857, + "step": 448180 + }, + { + "epoch": 0.9053721562559339, + "grad_norm": 275.0650634765625, + "learning_rate": 3.2352057214230623e-07, + "loss": 11.8622, + "step": 448190 + }, + { + "epoch": 0.9053923568886177, + "grad_norm": 12.33811092376709, + "learning_rate": 3.233970607753717e-07, + "loss": 18.6745, + "step": 448200 + }, + { + "epoch": 0.9054125575213016, + "grad_norm": 254.0646514892578, + "learning_rate": 3.2327357220163116e-07, + "loss": 15.4009, + "step": 448210 + }, + { + "epoch": 0.9054327581539854, + "grad_norm": 294.08221435546875, + "learning_rate": 3.231501064216891e-07, + "loss": 27.6999, + "step": 448220 + }, + { + "epoch": 0.9054529587866692, + "grad_norm": 194.0894775390625, + "learning_rate": 3.2302666343614565e-07, + "loss": 13.6976, + "step": 448230 + }, + { + "epoch": 0.905473159419353, + "grad_norm": 417.09747314453125, + "learning_rate": 3.2290324324560363e-07, + "loss": 18.6556, + "step": 448240 + }, + { + "epoch": 0.9054933600520368, + "grad_norm": 286.4114685058594, + "learning_rate": 3.227798458506637e-07, + "loss": 24.0155, + "step": 448250 + }, + { + "epoch": 0.9055135606847207, + "grad_norm": 399.8034362792969, + "learning_rate": 3.22656471251927e-07, + "loss": 22.4191, + "step": 448260 + }, + { + "epoch": 0.9055337613174045, + "grad_norm": 260.3201599121094, + "learning_rate": 3.225331194499964e-07, + "loss": 10.4566, + "step": 448270 + }, + { + "epoch": 0.9055539619500883, + "grad_norm": 192.796142578125, + "learning_rate": 3.2240979044547095e-07, + "loss": 14.5858, + "step": 448280 + }, + { + "epoch": 0.9055741625827721, + "grad_norm": 601.3679809570312, + "learning_rate": 3.2228648423895335e-07, + "loss": 19.8696, + "step": 448290 + }, + { + "epoch": 0.9055943632154559, + "grad_norm": 512.9625854492188, + "learning_rate": 3.2216320083104434e-07, + "loss": 18.3414, + "step": 448300 + }, + { + "epoch": 0.9056145638481398, + "grad_norm": 388.1852722167969, + "learning_rate": 3.2203994022234396e-07, + "loss": 19.741, + "step": 448310 + }, + { + "epoch": 0.9056347644808236, + "grad_norm": 282.55877685546875, + "learning_rate": 3.2191670241345395e-07, + "loss": 23.1342, + "step": 448320 + }, + { + "epoch": 0.9056549651135074, + "grad_norm": 1288.4505615234375, + "learning_rate": 3.2179348740497494e-07, + "loss": 15.1686, + "step": 448330 + }, + { + "epoch": 0.9056751657461912, + "grad_norm": 421.3667907714844, + "learning_rate": 3.216702951975059e-07, + "loss": 17.3516, + "step": 448340 + }, + { + "epoch": 0.9056953663788749, + "grad_norm": 95.43819427490234, + "learning_rate": 3.2154712579164913e-07, + "loss": 7.5615, + "step": 448350 + }, + { + "epoch": 0.9057155670115588, + "grad_norm": 406.94879150390625, + "learning_rate": 3.2142397918800416e-07, + "loss": 19.1469, + "step": 448360 + }, + { + "epoch": 0.9057357676442426, + "grad_norm": 99.41961669921875, + "learning_rate": 3.213008553871716e-07, + "loss": 11.4991, + "step": 448370 + }, + { + "epoch": 0.9057559682769264, + "grad_norm": 403.7974853515625, + "learning_rate": 3.2117775438975096e-07, + "loss": 19.0258, + "step": 448380 + }, + { + "epoch": 0.9057761689096102, + "grad_norm": 141.62965393066406, + "learning_rate": 3.2105467619634234e-07, + "loss": 13.6665, + "step": 448390 + }, + { + "epoch": 0.905796369542294, + "grad_norm": 194.33724975585938, + "learning_rate": 3.2093162080754634e-07, + "loss": 10.719, + "step": 448400 + }, + { + "epoch": 0.9058165701749779, + "grad_norm": 289.2017517089844, + "learning_rate": 3.208085882239614e-07, + "loss": 11.4255, + "step": 448410 + }, + { + "epoch": 0.9058367708076617, + "grad_norm": 219.3064727783203, + "learning_rate": 3.206855784461876e-07, + "loss": 18.4352, + "step": 448420 + }, + { + "epoch": 0.9058569714403455, + "grad_norm": 170.38917541503906, + "learning_rate": 3.205625914748256e-07, + "loss": 22.2926, + "step": 448430 + }, + { + "epoch": 0.9058771720730293, + "grad_norm": 533.3335571289062, + "learning_rate": 3.2043962731047373e-07, + "loss": 10.2524, + "step": 448440 + }, + { + "epoch": 0.9058973727057131, + "grad_norm": 7.176573753356934, + "learning_rate": 3.20316685953731e-07, + "loss": 28.7266, + "step": 448450 + }, + { + "epoch": 0.905917573338397, + "grad_norm": 258.6211242675781, + "learning_rate": 3.20193767405198e-07, + "loss": 11.7974, + "step": 448460 + }, + { + "epoch": 0.9059377739710808, + "grad_norm": 153.90350341796875, + "learning_rate": 3.2007087166547325e-07, + "loss": 11.3183, + "step": 448470 + }, + { + "epoch": 0.9059579746037646, + "grad_norm": 189.8888397216797, + "learning_rate": 3.199479987351545e-07, + "loss": 13.7309, + "step": 448480 + }, + { + "epoch": 0.9059781752364484, + "grad_norm": 615.9976806640625, + "learning_rate": 3.1982514861484184e-07, + "loss": 17.5584, + "step": 448490 + }, + { + "epoch": 0.9059983758691322, + "grad_norm": 1100.916015625, + "learning_rate": 3.1970232130513365e-07, + "loss": 18.7667, + "step": 448500 + }, + { + "epoch": 0.906018576501816, + "grad_norm": 349.28466796875, + "learning_rate": 3.19579516806629e-07, + "loss": 22.8324, + "step": 448510 + }, + { + "epoch": 0.9060387771344999, + "grad_norm": 385.8545227050781, + "learning_rate": 3.194567351199257e-07, + "loss": 24.0253, + "step": 448520 + }, + { + "epoch": 0.9060589777671837, + "grad_norm": 657.9740600585938, + "learning_rate": 3.193339762456232e-07, + "loss": 12.212, + "step": 448530 + }, + { + "epoch": 0.9060791783998675, + "grad_norm": 757.3938598632812, + "learning_rate": 3.1921124018431946e-07, + "loss": 19.3436, + "step": 448540 + }, + { + "epoch": 0.9060993790325513, + "grad_norm": 373.80242919921875, + "learning_rate": 3.1908852693661116e-07, + "loss": 27.6082, + "step": 448550 + }, + { + "epoch": 0.9061195796652352, + "grad_norm": 371.020751953125, + "learning_rate": 3.1896583650309896e-07, + "loss": 20.1392, + "step": 448560 + }, + { + "epoch": 0.906139780297919, + "grad_norm": 597.7250366210938, + "learning_rate": 3.188431688843785e-07, + "loss": 22.1884, + "step": 448570 + }, + { + "epoch": 0.9061599809306028, + "grad_norm": 571.2300415039062, + "learning_rate": 3.187205240810493e-07, + "loss": 21.1181, + "step": 448580 + }, + { + "epoch": 0.9061801815632866, + "grad_norm": 228.69024658203125, + "learning_rate": 3.1859790209370855e-07, + "loss": 16.468, + "step": 448590 + }, + { + "epoch": 0.9062003821959704, + "grad_norm": 60.642494201660156, + "learning_rate": 3.1847530292295313e-07, + "loss": 9.7498, + "step": 448600 + }, + { + "epoch": 0.9062205828286541, + "grad_norm": 327.65728759765625, + "learning_rate": 3.18352726569382e-07, + "loss": 26.8853, + "step": 448610 + }, + { + "epoch": 0.906240783461338, + "grad_norm": 418.5385437011719, + "learning_rate": 3.1823017303359185e-07, + "loss": 16.8091, + "step": 448620 + }, + { + "epoch": 0.9062609840940218, + "grad_norm": 602.8416748046875, + "learning_rate": 3.181076423161794e-07, + "loss": 28.1128, + "step": 448630 + }, + { + "epoch": 0.9062811847267056, + "grad_norm": 307.3411865234375, + "learning_rate": 3.179851344177426e-07, + "loss": 15.6654, + "step": 448640 + }, + { + "epoch": 0.9063013853593894, + "grad_norm": 315.0074157714844, + "learning_rate": 3.1786264933887977e-07, + "loss": 10.2402, + "step": 448650 + }, + { + "epoch": 0.9063215859920732, + "grad_norm": 312.59918212890625, + "learning_rate": 3.1774018708018493e-07, + "loss": 14.1066, + "step": 448660 + }, + { + "epoch": 0.9063417866247571, + "grad_norm": 371.5028076171875, + "learning_rate": 3.176177476422565e-07, + "loss": 11.442, + "step": 448670 + }, + { + "epoch": 0.9063619872574409, + "grad_norm": 310.7154235839844, + "learning_rate": 3.1749533102569176e-07, + "loss": 8.6138, + "step": 448680 + }, + { + "epoch": 0.9063821878901247, + "grad_norm": 11.721882820129395, + "learning_rate": 3.173729372310874e-07, + "loss": 10.4029, + "step": 448690 + }, + { + "epoch": 0.9064023885228085, + "grad_norm": 372.51806640625, + "learning_rate": 3.172505662590386e-07, + "loss": 12.4118, + "step": 448700 + }, + { + "epoch": 0.9064225891554923, + "grad_norm": 541.5244750976562, + "learning_rate": 3.1712821811014205e-07, + "loss": 19.0972, + "step": 448710 + }, + { + "epoch": 0.9064427897881762, + "grad_norm": 290.11285400390625, + "learning_rate": 3.170058927849967e-07, + "loss": 9.8545, + "step": 448720 + }, + { + "epoch": 0.90646299042086, + "grad_norm": 413.8747863769531, + "learning_rate": 3.168835902841949e-07, + "loss": 11.5055, + "step": 448730 + }, + { + "epoch": 0.9064831910535438, + "grad_norm": 268.87640380859375, + "learning_rate": 3.167613106083345e-07, + "loss": 17.3808, + "step": 448740 + }, + { + "epoch": 0.9065033916862276, + "grad_norm": 547.7677001953125, + "learning_rate": 3.166390537580122e-07, + "loss": 18.9562, + "step": 448750 + }, + { + "epoch": 0.9065235923189114, + "grad_norm": 8.598103523254395, + "learning_rate": 3.165168197338231e-07, + "loss": 25.9563, + "step": 448760 + }, + { + "epoch": 0.9065437929515953, + "grad_norm": 537.5119018554688, + "learning_rate": 3.1639460853636226e-07, + "loss": 16.9571, + "step": 448770 + }, + { + "epoch": 0.9065639935842791, + "grad_norm": 21.0839900970459, + "learning_rate": 3.162724201662265e-07, + "loss": 13.5353, + "step": 448780 + }, + { + "epoch": 0.9065841942169629, + "grad_norm": 6.599519729614258, + "learning_rate": 3.161502546240114e-07, + "loss": 13.7822, + "step": 448790 + }, + { + "epoch": 0.9066043948496467, + "grad_norm": 888.1553344726562, + "learning_rate": 3.160281119103109e-07, + "loss": 30.5156, + "step": 448800 + }, + { + "epoch": 0.9066245954823305, + "grad_norm": 198.2985076904297, + "learning_rate": 3.159059920257218e-07, + "loss": 15.3852, + "step": 448810 + }, + { + "epoch": 0.9066447961150144, + "grad_norm": 103.79512786865234, + "learning_rate": 3.157838949708386e-07, + "loss": 18.8775, + "step": 448820 + }, + { + "epoch": 0.9066649967476982, + "grad_norm": 1511.2833251953125, + "learning_rate": 3.1566182074625693e-07, + "loss": 25.0839, + "step": 448830 + }, + { + "epoch": 0.906685197380382, + "grad_norm": 641.9132080078125, + "learning_rate": 3.155397693525708e-07, + "loss": 27.815, + "step": 448840 + }, + { + "epoch": 0.9067053980130658, + "grad_norm": 455.2959899902344, + "learning_rate": 3.1541774079037635e-07, + "loss": 24.2859, + "step": 448850 + }, + { + "epoch": 0.9067255986457495, + "grad_norm": 527.1464233398438, + "learning_rate": 3.1529573506026757e-07, + "loss": 28.662, + "step": 448860 + }, + { + "epoch": 0.9067457992784334, + "grad_norm": 25.249774932861328, + "learning_rate": 3.151737521628384e-07, + "loss": 12.2626, + "step": 448870 + }, + { + "epoch": 0.9067659999111172, + "grad_norm": 164.37081909179688, + "learning_rate": 3.150517920986851e-07, + "loss": 12.5367, + "step": 448880 + }, + { + "epoch": 0.906786200543801, + "grad_norm": 476.541259765625, + "learning_rate": 3.1492985486840044e-07, + "loss": 11.9349, + "step": 448890 + }, + { + "epoch": 0.9068064011764848, + "grad_norm": 286.9355773925781, + "learning_rate": 3.148079404725801e-07, + "loss": 13.8341, + "step": 448900 + }, + { + "epoch": 0.9068266018091686, + "grad_norm": 356.0955810546875, + "learning_rate": 3.1468604891181755e-07, + "loss": 10.4007, + "step": 448910 + }, + { + "epoch": 0.9068468024418525, + "grad_norm": 518.3862915039062, + "learning_rate": 3.145641801867061e-07, + "loss": 18.039, + "step": 448920 + }, + { + "epoch": 0.9068670030745363, + "grad_norm": 17.225505828857422, + "learning_rate": 3.1444233429784145e-07, + "loss": 16.9743, + "step": 448930 + }, + { + "epoch": 0.9068872037072201, + "grad_norm": 309.4004211425781, + "learning_rate": 3.14320511245817e-07, + "loss": 18.3492, + "step": 448940 + }, + { + "epoch": 0.9069074043399039, + "grad_norm": 307.60687255859375, + "learning_rate": 3.1419871103122447e-07, + "loss": 16.0927, + "step": 448950 + }, + { + "epoch": 0.9069276049725877, + "grad_norm": 96.79744720458984, + "learning_rate": 3.1407693365465954e-07, + "loss": 19.2152, + "step": 448960 + }, + { + "epoch": 0.9069478056052716, + "grad_norm": 557.51611328125, + "learning_rate": 3.1395517911671613e-07, + "loss": 29.1107, + "step": 448970 + }, + { + "epoch": 0.9069680062379554, + "grad_norm": 78.58000946044922, + "learning_rate": 3.1383344741798716e-07, + "loss": 19.1292, + "step": 448980 + }, + { + "epoch": 0.9069882068706392, + "grad_norm": 305.63470458984375, + "learning_rate": 3.137117385590643e-07, + "loss": 21.5092, + "step": 448990 + }, + { + "epoch": 0.907008407503323, + "grad_norm": 89.49698638916016, + "learning_rate": 3.135900525405428e-07, + "loss": 10.2221, + "step": 449000 + }, + { + "epoch": 0.9070286081360068, + "grad_norm": 123.39061737060547, + "learning_rate": 3.134683893630153e-07, + "loss": 20.3347, + "step": 449010 + }, + { + "epoch": 0.9070488087686907, + "grad_norm": 265.69366455078125, + "learning_rate": 3.133467490270736e-07, + "loss": 10.337, + "step": 449020 + }, + { + "epoch": 0.9070690094013745, + "grad_norm": 636.016845703125, + "learning_rate": 3.1322513153331124e-07, + "loss": 19.651, + "step": 449030 + }, + { + "epoch": 0.9070892100340583, + "grad_norm": 1288.184814453125, + "learning_rate": 3.1310353688232207e-07, + "loss": 18.6391, + "step": 449040 + }, + { + "epoch": 0.9071094106667421, + "grad_norm": 191.1102294921875, + "learning_rate": 3.1298196507469737e-07, + "loss": 28.4142, + "step": 449050 + }, + { + "epoch": 0.9071296112994259, + "grad_norm": 140.29869079589844, + "learning_rate": 3.128604161110299e-07, + "loss": 7.4167, + "step": 449060 + }, + { + "epoch": 0.9071498119321098, + "grad_norm": 300.2296142578125, + "learning_rate": 3.1273888999191314e-07, + "loss": 18.4172, + "step": 449070 + }, + { + "epoch": 0.9071700125647936, + "grad_norm": 1085.9041748046875, + "learning_rate": 3.126173867179383e-07, + "loss": 25.2115, + "step": 449080 + }, + { + "epoch": 0.9071902131974774, + "grad_norm": 149.21746826171875, + "learning_rate": 3.1249590628969707e-07, + "loss": 17.9784, + "step": 449090 + }, + { + "epoch": 0.9072104138301612, + "grad_norm": 429.34661865234375, + "learning_rate": 3.123744487077829e-07, + "loss": 26.9559, + "step": 449100 + }, + { + "epoch": 0.907230614462845, + "grad_norm": 245.41632080078125, + "learning_rate": 3.122530139727864e-07, + "loss": 22.1495, + "step": 449110 + }, + { + "epoch": 0.9072508150955287, + "grad_norm": 282.68389892578125, + "learning_rate": 3.12131602085301e-07, + "loss": 10.4131, + "step": 449120 + }, + { + "epoch": 0.9072710157282126, + "grad_norm": 448.1305236816406, + "learning_rate": 3.1201021304591684e-07, + "loss": 17.2096, + "step": 449130 + }, + { + "epoch": 0.9072912163608964, + "grad_norm": 308.39117431640625, + "learning_rate": 3.118888468552267e-07, + "loss": 6.6042, + "step": 449140 + }, + { + "epoch": 0.9073114169935802, + "grad_norm": 370.9839172363281, + "learning_rate": 3.1176750351382235e-07, + "loss": 16.0175, + "step": 449150 + }, + { + "epoch": 0.907331617626264, + "grad_norm": 7.8283562660217285, + "learning_rate": 3.116461830222933e-07, + "loss": 13.4259, + "step": 449160 + }, + { + "epoch": 0.9073518182589478, + "grad_norm": 560.1171264648438, + "learning_rate": 3.11524885381233e-07, + "loss": 36.5528, + "step": 449170 + }, + { + "epoch": 0.9073720188916317, + "grad_norm": 238.0440673828125, + "learning_rate": 3.11403610591231e-07, + "loss": 14.3345, + "step": 449180 + }, + { + "epoch": 0.9073922195243155, + "grad_norm": 222.44174194335938, + "learning_rate": 3.1128235865288013e-07, + "loss": 16.2855, + "step": 449190 + }, + { + "epoch": 0.9074124201569993, + "grad_norm": 247.97122192382812, + "learning_rate": 3.1116112956677045e-07, + "loss": 10.4193, + "step": 449200 + }, + { + "epoch": 0.9074326207896831, + "grad_norm": 179.41934204101562, + "learning_rate": 3.1103992333349153e-07, + "loss": 16.3914, + "step": 449210 + }, + { + "epoch": 0.9074528214223669, + "grad_norm": 470.7078552246094, + "learning_rate": 3.1091873995363677e-07, + "loss": 17.0916, + "step": 449220 + }, + { + "epoch": 0.9074730220550508, + "grad_norm": 270.0218811035156, + "learning_rate": 3.1079757942779453e-07, + "loss": 16.3383, + "step": 449230 + }, + { + "epoch": 0.9074932226877346, + "grad_norm": 367.3122863769531, + "learning_rate": 3.106764417565561e-07, + "loss": 12.3758, + "step": 449240 + }, + { + "epoch": 0.9075134233204184, + "grad_norm": 306.461181640625, + "learning_rate": 3.105553269405115e-07, + "loss": 18.3363, + "step": 449250 + }, + { + "epoch": 0.9075336239531022, + "grad_norm": 522.5321655273438, + "learning_rate": 3.1043423498025303e-07, + "loss": 24.1793, + "step": 449260 + }, + { + "epoch": 0.907553824585786, + "grad_norm": 516.6013793945312, + "learning_rate": 3.1031316587636805e-07, + "loss": 17.7558, + "step": 449270 + }, + { + "epoch": 0.9075740252184699, + "grad_norm": 1105.093994140625, + "learning_rate": 3.101921196294477e-07, + "loss": 33.2699, + "step": 449280 + }, + { + "epoch": 0.9075942258511537, + "grad_norm": 102.50092315673828, + "learning_rate": 3.1007109624008326e-07, + "loss": 25.3437, + "step": 449290 + }, + { + "epoch": 0.9076144264838375, + "grad_norm": 565.497314453125, + "learning_rate": 3.0995009570886305e-07, + "loss": 24.174, + "step": 449300 + }, + { + "epoch": 0.9076346271165213, + "grad_norm": 291.22540283203125, + "learning_rate": 3.098291180363766e-07, + "loss": 16.3335, + "step": 449310 + }, + { + "epoch": 0.9076548277492051, + "grad_norm": 256.3533935546875, + "learning_rate": 3.097081632232141e-07, + "loss": 12.1066, + "step": 449320 + }, + { + "epoch": 0.907675028381889, + "grad_norm": 300.76544189453125, + "learning_rate": 3.095872312699666e-07, + "loss": 9.9195, + "step": 449330 + }, + { + "epoch": 0.9076952290145728, + "grad_norm": 374.5771789550781, + "learning_rate": 3.094663221772209e-07, + "loss": 20.0803, + "step": 449340 + }, + { + "epoch": 0.9077154296472566, + "grad_norm": 457.07659912109375, + "learning_rate": 3.093454359455672e-07, + "loss": 18.5713, + "step": 449350 + }, + { + "epoch": 0.9077356302799404, + "grad_norm": 456.6804504394531, + "learning_rate": 3.09224572575596e-07, + "loss": 17.623, + "step": 449360 + }, + { + "epoch": 0.9077558309126241, + "grad_norm": 631.7473754882812, + "learning_rate": 3.091037320678947e-07, + "loss": 19.8148, + "step": 449370 + }, + { + "epoch": 0.907776031545308, + "grad_norm": 283.96368408203125, + "learning_rate": 3.089829144230527e-07, + "loss": 16.0926, + "step": 449380 + }, + { + "epoch": 0.9077962321779918, + "grad_norm": 319.5601806640625, + "learning_rate": 3.088621196416597e-07, + "loss": 10.865, + "step": 449390 + }, + { + "epoch": 0.9078164328106756, + "grad_norm": 254.3486785888672, + "learning_rate": 3.0874134772430344e-07, + "loss": 10.6656, + "step": 449400 + }, + { + "epoch": 0.9078366334433594, + "grad_norm": 1269.4559326171875, + "learning_rate": 3.0862059867157237e-07, + "loss": 25.0295, + "step": 449410 + }, + { + "epoch": 0.9078568340760432, + "grad_norm": 524.7383422851562, + "learning_rate": 3.08499872484056e-07, + "loss": 9.9924, + "step": 449420 + }, + { + "epoch": 0.907877034708727, + "grad_norm": 284.661865234375, + "learning_rate": 3.0837916916234166e-07, + "loss": 37.4764, + "step": 449430 + }, + { + "epoch": 0.9078972353414109, + "grad_norm": 367.37078857421875, + "learning_rate": 3.0825848870701893e-07, + "loss": 18.6231, + "step": 449440 + }, + { + "epoch": 0.9079174359740947, + "grad_norm": 6.0677595138549805, + "learning_rate": 3.08137831118675e-07, + "loss": 15.7651, + "step": 449450 + }, + { + "epoch": 0.9079376366067785, + "grad_norm": 330.1292724609375, + "learning_rate": 3.080171963978984e-07, + "loss": 22.6161, + "step": 449460 + }, + { + "epoch": 0.9079578372394623, + "grad_norm": 170.20848083496094, + "learning_rate": 3.078965845452769e-07, + "loss": 8.8626, + "step": 449470 + }, + { + "epoch": 0.9079780378721461, + "grad_norm": 458.19586181640625, + "learning_rate": 3.077759955613979e-07, + "loss": 17.3684, + "step": 449480 + }, + { + "epoch": 0.90799823850483, + "grad_norm": 236.87313842773438, + "learning_rate": 3.0765542944685036e-07, + "loss": 22.9826, + "step": 449490 + }, + { + "epoch": 0.9080184391375138, + "grad_norm": 444.2785339355469, + "learning_rate": 3.0753488620222037e-07, + "loss": 29.2181, + "step": 449500 + }, + { + "epoch": 0.9080386397701976, + "grad_norm": 6.273552417755127, + "learning_rate": 3.07414365828097e-07, + "loss": 20.8284, + "step": 449510 + }, + { + "epoch": 0.9080588404028814, + "grad_norm": 569.6155395507812, + "learning_rate": 3.0729386832506647e-07, + "loss": 22.2681, + "step": 449520 + }, + { + "epoch": 0.9080790410355652, + "grad_norm": 717.0222778320312, + "learning_rate": 3.07173393693716e-07, + "loss": 22.9611, + "step": 449530 + }, + { + "epoch": 0.9080992416682491, + "grad_norm": 583.5347900390625, + "learning_rate": 3.0705294193463406e-07, + "loss": 18.084, + "step": 449540 + }, + { + "epoch": 0.9081194423009329, + "grad_norm": 611.0958862304688, + "learning_rate": 3.069325130484069e-07, + "loss": 20.9226, + "step": 449550 + }, + { + "epoch": 0.9081396429336167, + "grad_norm": 88.22270202636719, + "learning_rate": 3.068121070356206e-07, + "loss": 18.2759, + "step": 449560 + }, + { + "epoch": 0.9081598435663005, + "grad_norm": 147.03045654296875, + "learning_rate": 3.066917238968631e-07, + "loss": 11.1902, + "step": 449570 + }, + { + "epoch": 0.9081800441989843, + "grad_norm": 39.146305084228516, + "learning_rate": 3.065713636327211e-07, + "loss": 19.9076, + "step": 449580 + }, + { + "epoch": 0.9082002448316682, + "grad_norm": 243.39291381835938, + "learning_rate": 3.0645102624378144e-07, + "loss": 19.0416, + "step": 449590 + }, + { + "epoch": 0.908220445464352, + "grad_norm": 473.9781799316406, + "learning_rate": 3.0633071173062966e-07, + "loss": 12.5497, + "step": 449600 + }, + { + "epoch": 0.9082406460970358, + "grad_norm": 149.61248779296875, + "learning_rate": 3.0621042009385313e-07, + "loss": 19.2441, + "step": 449610 + }, + { + "epoch": 0.9082608467297196, + "grad_norm": 420.84954833984375, + "learning_rate": 3.0609015133403806e-07, + "loss": 19.7314, + "step": 449620 + }, + { + "epoch": 0.9082810473624033, + "grad_norm": 262.05914306640625, + "learning_rate": 3.0596990545176895e-07, + "loss": 17.1844, + "step": 449630 + }, + { + "epoch": 0.9083012479950872, + "grad_norm": 387.1336669921875, + "learning_rate": 3.058496824476337e-07, + "loss": 10.2942, + "step": 449640 + }, + { + "epoch": 0.908321448627771, + "grad_norm": 243.58253479003906, + "learning_rate": 3.057294823222184e-07, + "loss": 21.0373, + "step": 449650 + }, + { + "epoch": 0.9083416492604548, + "grad_norm": 281.2779846191406, + "learning_rate": 3.056093050761083e-07, + "loss": 14.0664, + "step": 449660 + }, + { + "epoch": 0.9083618498931386, + "grad_norm": 339.8753356933594, + "learning_rate": 3.0548915070988837e-07, + "loss": 13.4907, + "step": 449670 + }, + { + "epoch": 0.9083820505258224, + "grad_norm": 271.4891052246094, + "learning_rate": 3.0536901922414543e-07, + "loss": 22.4855, + "step": 449680 + }, + { + "epoch": 0.9084022511585063, + "grad_norm": 451.68890380859375, + "learning_rate": 3.052489106194645e-07, + "loss": 31.894, + "step": 449690 + }, + { + "epoch": 0.9084224517911901, + "grad_norm": 644.14111328125, + "learning_rate": 3.051288248964307e-07, + "loss": 22.8247, + "step": 449700 + }, + { + "epoch": 0.9084426524238739, + "grad_norm": 158.24072265625, + "learning_rate": 3.050087620556302e-07, + "loss": 7.1228, + "step": 449710 + }, + { + "epoch": 0.9084628530565577, + "grad_norm": 788.8622436523438, + "learning_rate": 3.0488872209764654e-07, + "loss": 29.4907, + "step": 449720 + }, + { + "epoch": 0.9084830536892415, + "grad_norm": 38.16522979736328, + "learning_rate": 3.047687050230663e-07, + "loss": 13.5508, + "step": 449730 + }, + { + "epoch": 0.9085032543219254, + "grad_norm": 369.5571594238281, + "learning_rate": 3.046487108324736e-07, + "loss": 18.3055, + "step": 449740 + }, + { + "epoch": 0.9085234549546092, + "grad_norm": 458.87786865234375, + "learning_rate": 3.0452873952645455e-07, + "loss": 11.2209, + "step": 449750 + }, + { + "epoch": 0.908543655587293, + "grad_norm": 659.783935546875, + "learning_rate": 3.0440879110559263e-07, + "loss": 34.188, + "step": 449760 + }, + { + "epoch": 0.9085638562199768, + "grad_norm": 36.0201530456543, + "learning_rate": 3.0428886557047176e-07, + "loss": 19.4572, + "step": 449770 + }, + { + "epoch": 0.9085840568526606, + "grad_norm": 552.7836303710938, + "learning_rate": 3.0416896292167873e-07, + "loss": 24.2485, + "step": 449780 + }, + { + "epoch": 0.9086042574853445, + "grad_norm": 214.62286376953125, + "learning_rate": 3.0404908315979587e-07, + "loss": 17.428, + "step": 449790 + }, + { + "epoch": 0.9086244581180283, + "grad_norm": 499.6221008300781, + "learning_rate": 3.0392922628540875e-07, + "loss": 21.9665, + "step": 449800 + }, + { + "epoch": 0.9086446587507121, + "grad_norm": 429.6669921875, + "learning_rate": 3.0380939229910087e-07, + "loss": 22.1935, + "step": 449810 + }, + { + "epoch": 0.9086648593833959, + "grad_norm": 387.1067810058594, + "learning_rate": 3.036895812014556e-07, + "loss": 14.9835, + "step": 449820 + }, + { + "epoch": 0.9086850600160797, + "grad_norm": 452.1629638671875, + "learning_rate": 3.0356979299305867e-07, + "loss": 19.1507, + "step": 449830 + }, + { + "epoch": 0.9087052606487636, + "grad_norm": 265.0748291015625, + "learning_rate": 3.0345002767449337e-07, + "loss": 13.7882, + "step": 449840 + }, + { + "epoch": 0.9087254612814474, + "grad_norm": 824.8062744140625, + "learning_rate": 3.0333028524634156e-07, + "loss": 17.5646, + "step": 449850 + }, + { + "epoch": 0.9087456619141312, + "grad_norm": 16.399173736572266, + "learning_rate": 3.0321056570918883e-07, + "loss": 18.7888, + "step": 449860 + }, + { + "epoch": 0.908765862546815, + "grad_norm": 1272.93701171875, + "learning_rate": 3.030908690636192e-07, + "loss": 27.8628, + "step": 449870 + }, + { + "epoch": 0.9087860631794987, + "grad_norm": 344.57684326171875, + "learning_rate": 3.029711953102138e-07, + "loss": 21.5898, + "step": 449880 + }, + { + "epoch": 0.9088062638121825, + "grad_norm": 346.8316955566406, + "learning_rate": 3.028515444495572e-07, + "loss": 10.6882, + "step": 449890 + }, + { + "epoch": 0.9088264644448664, + "grad_norm": 240.88621520996094, + "learning_rate": 3.027319164822329e-07, + "loss": 16.0152, + "step": 449900 + }, + { + "epoch": 0.9088466650775502, + "grad_norm": 113.99472045898438, + "learning_rate": 3.0261231140882363e-07, + "loss": 37.6736, + "step": 449910 + }, + { + "epoch": 0.908866865710234, + "grad_norm": 153.38453674316406, + "learning_rate": 3.024927292299118e-07, + "loss": 21.8065, + "step": 449920 + }, + { + "epoch": 0.9088870663429178, + "grad_norm": 444.3023986816406, + "learning_rate": 3.0237316994608025e-07, + "loss": 14.1541, + "step": 449930 + }, + { + "epoch": 0.9089072669756016, + "grad_norm": 412.1969299316406, + "learning_rate": 3.02253633557914e-07, + "loss": 20.4504, + "step": 449940 + }, + { + "epoch": 0.9089274676082855, + "grad_norm": 162.01190185546875, + "learning_rate": 3.0213412006599216e-07, + "loss": 11.6022, + "step": 449950 + }, + { + "epoch": 0.9089476682409693, + "grad_norm": 0.0, + "learning_rate": 3.0201462947089865e-07, + "loss": 24.5164, + "step": 449960 + }, + { + "epoch": 0.9089678688736531, + "grad_norm": 684.73779296875, + "learning_rate": 3.018951617732169e-07, + "loss": 21.3613, + "step": 449970 + }, + { + "epoch": 0.9089880695063369, + "grad_norm": 733.5957641601562, + "learning_rate": 3.01775716973528e-07, + "loss": 34.4216, + "step": 449980 + }, + { + "epoch": 0.9090082701390207, + "grad_norm": 362.92449951171875, + "learning_rate": 3.0165629507241446e-07, + "loss": 13.8412, + "step": 449990 + }, + { + "epoch": 0.9090284707717046, + "grad_norm": 392.89288330078125, + "learning_rate": 3.015368960704584e-07, + "loss": 15.1687, + "step": 450000 + }, + { + "epoch": 0.9090486714043884, + "grad_norm": 420.2798767089844, + "learning_rate": 3.014175199682418e-07, + "loss": 15.1001, + "step": 450010 + }, + { + "epoch": 0.9090688720370722, + "grad_norm": 494.7281799316406, + "learning_rate": 3.012981667663456e-07, + "loss": 14.4487, + "step": 450020 + }, + { + "epoch": 0.909089072669756, + "grad_norm": 673.8734741210938, + "learning_rate": 3.011788364653523e-07, + "loss": 18.8924, + "step": 450030 + }, + { + "epoch": 0.9091092733024398, + "grad_norm": 491.93609619140625, + "learning_rate": 3.010595290658441e-07, + "loss": 13.9405, + "step": 450040 + }, + { + "epoch": 0.9091294739351237, + "grad_norm": 136.08999633789062, + "learning_rate": 3.0094024456840176e-07, + "loss": 12.4955, + "step": 450050 + }, + { + "epoch": 0.9091496745678075, + "grad_norm": 411.83746337890625, + "learning_rate": 3.008209829736064e-07, + "loss": 19.9289, + "step": 450060 + }, + { + "epoch": 0.9091698752004913, + "grad_norm": 226.57904052734375, + "learning_rate": 3.007017442820398e-07, + "loss": 21.5572, + "step": 450070 + }, + { + "epoch": 0.9091900758331751, + "grad_norm": 651.72900390625, + "learning_rate": 3.005825284942837e-07, + "loss": 22.6409, + "step": 450080 + }, + { + "epoch": 0.909210276465859, + "grad_norm": 8.027105331420898, + "learning_rate": 3.004633356109171e-07, + "loss": 25.1169, + "step": 450090 + }, + { + "epoch": 0.9092304770985428, + "grad_norm": 294.9572448730469, + "learning_rate": 3.003441656325229e-07, + "loss": 12.6286, + "step": 450100 + }, + { + "epoch": 0.9092506777312266, + "grad_norm": 374.8417663574219, + "learning_rate": 3.002250185596806e-07, + "loss": 19.1292, + "step": 450110 + }, + { + "epoch": 0.9092708783639104, + "grad_norm": 58.905242919921875, + "learning_rate": 3.0010589439297245e-07, + "loss": 30.5942, + "step": 450120 + }, + { + "epoch": 0.9092910789965942, + "grad_norm": 385.8377380371094, + "learning_rate": 2.9998679313297807e-07, + "loss": 17.9534, + "step": 450130 + }, + { + "epoch": 0.9093112796292779, + "grad_norm": 214.30320739746094, + "learning_rate": 2.99867714780277e-07, + "loss": 17.9742, + "step": 450140 + }, + { + "epoch": 0.9093314802619618, + "grad_norm": 166.50491333007812, + "learning_rate": 2.9974865933545207e-07, + "loss": 15.6885, + "step": 450150 + }, + { + "epoch": 0.9093516808946456, + "grad_norm": 264.8522644042969, + "learning_rate": 2.996296267990817e-07, + "loss": 17.7719, + "step": 450160 + }, + { + "epoch": 0.9093718815273294, + "grad_norm": 327.0448303222656, + "learning_rate": 2.9951061717174543e-07, + "loss": 18.7962, + "step": 450170 + }, + { + "epoch": 0.9093920821600132, + "grad_norm": 425.8899841308594, + "learning_rate": 2.9939163045402456e-07, + "loss": 8.4987, + "step": 450180 + }, + { + "epoch": 0.909412282792697, + "grad_norm": 132.53302001953125, + "learning_rate": 2.992726666464996e-07, + "loss": 17.0717, + "step": 450190 + }, + { + "epoch": 0.9094324834253809, + "grad_norm": 338.8014221191406, + "learning_rate": 2.99153725749749e-07, + "loss": 19.2351, + "step": 450200 + }, + { + "epoch": 0.9094526840580647, + "grad_norm": 340.30010986328125, + "learning_rate": 2.990348077643529e-07, + "loss": 13.3819, + "step": 450210 + }, + { + "epoch": 0.9094728846907485, + "grad_norm": 61.42256546020508, + "learning_rate": 2.989159126908914e-07, + "loss": 15.6164, + "step": 450220 + }, + { + "epoch": 0.9094930853234323, + "grad_norm": 432.1888122558594, + "learning_rate": 2.9879704052994395e-07, + "loss": 11.7148, + "step": 450230 + }, + { + "epoch": 0.9095132859561161, + "grad_norm": 227.47276306152344, + "learning_rate": 2.986781912820885e-07, + "loss": 9.2523, + "step": 450240 + }, + { + "epoch": 0.9095334865888, + "grad_norm": 834.7599487304688, + "learning_rate": 2.9855936494790516e-07, + "loss": 20.4919, + "step": 450250 + }, + { + "epoch": 0.9095536872214838, + "grad_norm": 10.250986099243164, + "learning_rate": 2.9844056152797505e-07, + "loss": 15.58, + "step": 450260 + }, + { + "epoch": 0.9095738878541676, + "grad_norm": 283.3591613769531, + "learning_rate": 2.983217810228739e-07, + "loss": 6.9049, + "step": 450270 + }, + { + "epoch": 0.9095940884868514, + "grad_norm": 299.103515625, + "learning_rate": 2.9820302343318177e-07, + "loss": 18.7821, + "step": 450280 + }, + { + "epoch": 0.9096142891195352, + "grad_norm": 216.5201416015625, + "learning_rate": 2.9808428875947925e-07, + "loss": 14.35, + "step": 450290 + }, + { + "epoch": 0.909634489752219, + "grad_norm": 2.37345027923584, + "learning_rate": 2.9796557700234317e-07, + "loss": 24.4248, + "step": 450300 + }, + { + "epoch": 0.9096546903849029, + "grad_norm": 201.43051147460938, + "learning_rate": 2.9784688816235194e-07, + "loss": 18.0874, + "step": 450310 + }, + { + "epoch": 0.9096748910175867, + "grad_norm": 150.97740173339844, + "learning_rate": 2.9772822224008515e-07, + "loss": 14.5586, + "step": 450320 + }, + { + "epoch": 0.9096950916502705, + "grad_norm": 190.00332641601562, + "learning_rate": 2.976095792361211e-07, + "loss": 44.8998, + "step": 450330 + }, + { + "epoch": 0.9097152922829543, + "grad_norm": 350.0762939453125, + "learning_rate": 2.9749095915103665e-07, + "loss": 24.1158, + "step": 450340 + }, + { + "epoch": 0.9097354929156382, + "grad_norm": 541.433349609375, + "learning_rate": 2.9737236198541077e-07, + "loss": 30.8052, + "step": 450350 + }, + { + "epoch": 0.909755693548322, + "grad_norm": 118.30099487304688, + "learning_rate": 2.9725378773982295e-07, + "loss": 16.6455, + "step": 450360 + }, + { + "epoch": 0.9097758941810058, + "grad_norm": 424.4774475097656, + "learning_rate": 2.971352364148494e-07, + "loss": 17.9009, + "step": 450370 + }, + { + "epoch": 0.9097960948136896, + "grad_norm": 122.40388488769531, + "learning_rate": 2.970167080110675e-07, + "loss": 13.7767, + "step": 450380 + }, + { + "epoch": 0.9098162954463734, + "grad_norm": 129.57290649414062, + "learning_rate": 2.968982025290568e-07, + "loss": 20.165, + "step": 450390 + }, + { + "epoch": 0.9098364960790571, + "grad_norm": 307.5549011230469, + "learning_rate": 2.967797199693928e-07, + "loss": 22.9961, + "step": 450400 + }, + { + "epoch": 0.909856696711741, + "grad_norm": 755.9302978515625, + "learning_rate": 2.9666126033265517e-07, + "loss": 17.0161, + "step": 450410 + }, + { + "epoch": 0.9098768973444248, + "grad_norm": 232.53997802734375, + "learning_rate": 2.9654282361941953e-07, + "loss": 19.0928, + "step": 450420 + }, + { + "epoch": 0.9098970979771086, + "grad_norm": 578.3982543945312, + "learning_rate": 2.9642440983026324e-07, + "loss": 23.7926, + "step": 450430 + }, + { + "epoch": 0.9099172986097924, + "grad_norm": 685.7623901367188, + "learning_rate": 2.963060189657646e-07, + "loss": 18.2127, + "step": 450440 + }, + { + "epoch": 0.9099374992424762, + "grad_norm": 157.11639404296875, + "learning_rate": 2.961876510264999e-07, + "loss": 19.3056, + "step": 450450 + }, + { + "epoch": 0.9099576998751601, + "grad_norm": 142.48611450195312, + "learning_rate": 2.9606930601304595e-07, + "loss": 15.6247, + "step": 450460 + }, + { + "epoch": 0.9099779005078439, + "grad_norm": 37.19289779663086, + "learning_rate": 2.9595098392597887e-07, + "loss": 11.6434, + "step": 450470 + }, + { + "epoch": 0.9099981011405277, + "grad_norm": 489.3031005859375, + "learning_rate": 2.958326847658771e-07, + "loss": 17.3193, + "step": 450480 + }, + { + "epoch": 0.9100183017732115, + "grad_norm": 122.79801940917969, + "learning_rate": 2.9571440853331634e-07, + "loss": 19.2583, + "step": 450490 + }, + { + "epoch": 0.9100385024058953, + "grad_norm": 422.8098449707031, + "learning_rate": 2.9559615522887275e-07, + "loss": 10.5503, + "step": 450500 + }, + { + "epoch": 0.9100587030385792, + "grad_norm": 154.032958984375, + "learning_rate": 2.954779248531231e-07, + "loss": 14.2224, + "step": 450510 + }, + { + "epoch": 0.910078903671263, + "grad_norm": 259.25714111328125, + "learning_rate": 2.953597174066436e-07, + "loss": 17.5647, + "step": 450520 + }, + { + "epoch": 0.9100991043039468, + "grad_norm": 197.6894989013672, + "learning_rate": 2.952415328900093e-07, + "loss": 13.9, + "step": 450530 + }, + { + "epoch": 0.9101193049366306, + "grad_norm": 140.98451232910156, + "learning_rate": 2.951233713037971e-07, + "loss": 10.3261, + "step": 450540 + }, + { + "epoch": 0.9101395055693144, + "grad_norm": 530.427001953125, + "learning_rate": 2.9500523264858473e-07, + "loss": 17.0496, + "step": 450550 + }, + { + "epoch": 0.9101597062019983, + "grad_norm": 1189.279296875, + "learning_rate": 2.948871169249451e-07, + "loss": 24.623, + "step": 450560 + }, + { + "epoch": 0.9101799068346821, + "grad_norm": 255.91119384765625, + "learning_rate": 2.9476902413345443e-07, + "loss": 19.1389, + "step": 450570 + }, + { + "epoch": 0.9102001074673659, + "grad_norm": 382.6887512207031, + "learning_rate": 2.946509542746895e-07, + "loss": 10.8728, + "step": 450580 + }, + { + "epoch": 0.9102203081000497, + "grad_norm": 526.3858642578125, + "learning_rate": 2.9453290734922537e-07, + "loss": 27.7853, + "step": 450590 + }, + { + "epoch": 0.9102405087327335, + "grad_norm": 301.6593017578125, + "learning_rate": 2.9441488335763656e-07, + "loss": 32.5232, + "step": 450600 + }, + { + "epoch": 0.9102607093654174, + "grad_norm": 8.258715629577637, + "learning_rate": 2.9429688230049934e-07, + "loss": 14.6922, + "step": 450610 + }, + { + "epoch": 0.9102809099981012, + "grad_norm": 467.5506286621094, + "learning_rate": 2.941789041783888e-07, + "loss": 7.9306, + "step": 450620 + }, + { + "epoch": 0.910301110630785, + "grad_norm": 4.88645076751709, + "learning_rate": 2.940609489918783e-07, + "loss": 11.6321, + "step": 450630 + }, + { + "epoch": 0.9103213112634688, + "grad_norm": 615.4234619140625, + "learning_rate": 2.9394301674154413e-07, + "loss": 14.5892, + "step": 450640 + }, + { + "epoch": 0.9103415118961525, + "grad_norm": 232.69505310058594, + "learning_rate": 2.938251074279619e-07, + "loss": 15.8183, + "step": 450650 + }, + { + "epoch": 0.9103617125288364, + "grad_norm": 698.8169555664062, + "learning_rate": 2.9370722105170504e-07, + "loss": 18.039, + "step": 450660 + }, + { + "epoch": 0.9103819131615202, + "grad_norm": 29.231889724731445, + "learning_rate": 2.935893576133475e-07, + "loss": 24.1463, + "step": 450670 + }, + { + "epoch": 0.910402113794204, + "grad_norm": 68.94104766845703, + "learning_rate": 2.9347151711346556e-07, + "loss": 11.9315, + "step": 450680 + }, + { + "epoch": 0.9104223144268878, + "grad_norm": 363.8338317871094, + "learning_rate": 2.933536995526326e-07, + "loss": 29.146, + "step": 450690 + }, + { + "epoch": 0.9104425150595716, + "grad_norm": 57.30198669433594, + "learning_rate": 2.9323590493142206e-07, + "loss": 15.7005, + "step": 450700 + }, + { + "epoch": 0.9104627156922555, + "grad_norm": 235.6766357421875, + "learning_rate": 2.931181332504096e-07, + "loss": 15.6148, + "step": 450710 + }, + { + "epoch": 0.9104829163249393, + "grad_norm": 136.07627868652344, + "learning_rate": 2.930003845101681e-07, + "loss": 12.1525, + "step": 450720 + }, + { + "epoch": 0.9105031169576231, + "grad_norm": 347.1009216308594, + "learning_rate": 2.9288265871127206e-07, + "loss": 13.9972, + "step": 450730 + }, + { + "epoch": 0.9105233175903069, + "grad_norm": 58.31315231323242, + "learning_rate": 2.927649558542955e-07, + "loss": 17.731, + "step": 450740 + }, + { + "epoch": 0.9105435182229907, + "grad_norm": 188.71400451660156, + "learning_rate": 2.9264727593981024e-07, + "loss": 17.885, + "step": 450750 + }, + { + "epoch": 0.9105637188556746, + "grad_norm": 244.48216247558594, + "learning_rate": 2.9252961896839236e-07, + "loss": 16.7535, + "step": 450760 + }, + { + "epoch": 0.9105839194883584, + "grad_norm": 595.94921875, + "learning_rate": 2.9241198494061427e-07, + "loss": 20.7789, + "step": 450770 + }, + { + "epoch": 0.9106041201210422, + "grad_norm": 59.34998321533203, + "learning_rate": 2.922943738570483e-07, + "loss": 13.5088, + "step": 450780 + }, + { + "epoch": 0.910624320753726, + "grad_norm": 384.7566833496094, + "learning_rate": 2.921767857182689e-07, + "loss": 16.75, + "step": 450790 + }, + { + "epoch": 0.9106445213864098, + "grad_norm": 258.9685363769531, + "learning_rate": 2.920592205248496e-07, + "loss": 20.9006, + "step": 450800 + }, + { + "epoch": 0.9106647220190937, + "grad_norm": 415.7431945800781, + "learning_rate": 2.919416782773621e-07, + "loss": 18.0212, + "step": 450810 + }, + { + "epoch": 0.9106849226517775, + "grad_norm": 307.84735107421875, + "learning_rate": 2.918241589763793e-07, + "loss": 17.0118, + "step": 450820 + }, + { + "epoch": 0.9107051232844613, + "grad_norm": 154.61444091796875, + "learning_rate": 2.917066626224757e-07, + "loss": 20.9167, + "step": 450830 + }, + { + "epoch": 0.9107253239171451, + "grad_norm": 545.405029296875, + "learning_rate": 2.9158918921622205e-07, + "loss": 29.9494, + "step": 450840 + }, + { + "epoch": 0.9107455245498289, + "grad_norm": 372.11822509765625, + "learning_rate": 2.914717387581917e-07, + "loss": 26.7919, + "step": 450850 + }, + { + "epoch": 0.9107657251825128, + "grad_norm": 272.1129150390625, + "learning_rate": 2.913543112489564e-07, + "loss": 22.2206, + "step": 450860 + }, + { + "epoch": 0.9107859258151966, + "grad_norm": 564.9196166992188, + "learning_rate": 2.912369066890908e-07, + "loss": 13.2435, + "step": 450870 + }, + { + "epoch": 0.9108061264478804, + "grad_norm": 407.7785949707031, + "learning_rate": 2.9111952507916375e-07, + "loss": 24.8045, + "step": 450880 + }, + { + "epoch": 0.9108263270805642, + "grad_norm": 240.93890380859375, + "learning_rate": 2.910021664197493e-07, + "loss": 23.5872, + "step": 450890 + }, + { + "epoch": 0.910846527713248, + "grad_norm": 455.80938720703125, + "learning_rate": 2.908848307114198e-07, + "loss": 18.3362, + "step": 450900 + }, + { + "epoch": 0.9108667283459317, + "grad_norm": 180.8559112548828, + "learning_rate": 2.9076751795474647e-07, + "loss": 11.052, + "step": 450910 + }, + { + "epoch": 0.9108869289786156, + "grad_norm": 185.9855499267578, + "learning_rate": 2.9065022815030044e-07, + "loss": 10.145, + "step": 450920 + }, + { + "epoch": 0.9109071296112994, + "grad_norm": 503.127685546875, + "learning_rate": 2.905329612986546e-07, + "loss": 19.2377, + "step": 450930 + }, + { + "epoch": 0.9109273302439832, + "grad_norm": 231.20713806152344, + "learning_rate": 2.9041571740037967e-07, + "loss": 15.72, + "step": 450940 + }, + { + "epoch": 0.910947530876667, + "grad_norm": 444.4071350097656, + "learning_rate": 2.9029849645604735e-07, + "loss": 16.6395, + "step": 450950 + }, + { + "epoch": 0.9109677315093508, + "grad_norm": 311.00054931640625, + "learning_rate": 2.9018129846622834e-07, + "loss": 11.8376, + "step": 450960 + }, + { + "epoch": 0.9109879321420347, + "grad_norm": 424.9436340332031, + "learning_rate": 2.900641234314955e-07, + "loss": 22.9413, + "step": 450970 + }, + { + "epoch": 0.9110081327747185, + "grad_norm": 113.29547119140625, + "learning_rate": 2.899469713524183e-07, + "loss": 4.7459, + "step": 450980 + }, + { + "epoch": 0.9110283334074023, + "grad_norm": 8.436201095581055, + "learning_rate": 2.898298422295681e-07, + "loss": 22.1985, + "step": 450990 + }, + { + "epoch": 0.9110485340400861, + "grad_norm": 239.64442443847656, + "learning_rate": 2.8971273606351656e-07, + "loss": 16.2286, + "step": 451000 + }, + { + "epoch": 0.9110687346727699, + "grad_norm": 269.68243408203125, + "learning_rate": 2.895956528548338e-07, + "loss": 34.9558, + "step": 451010 + }, + { + "epoch": 0.9110889353054538, + "grad_norm": 182.18995666503906, + "learning_rate": 2.8947859260408997e-07, + "loss": 14.533, + "step": 451020 + }, + { + "epoch": 0.9111091359381376, + "grad_norm": 23.684764862060547, + "learning_rate": 2.8936155531185675e-07, + "loss": 29.4517, + "step": 451030 + }, + { + "epoch": 0.9111293365708214, + "grad_norm": 405.1063537597656, + "learning_rate": 2.892445409787037e-07, + "loss": 30.9317, + "step": 451040 + }, + { + "epoch": 0.9111495372035052, + "grad_norm": 154.00747680664062, + "learning_rate": 2.891275496052015e-07, + "loss": 18.9794, + "step": 451050 + }, + { + "epoch": 0.911169737836189, + "grad_norm": 521.412353515625, + "learning_rate": 2.8901058119192026e-07, + "loss": 16.2786, + "step": 451060 + }, + { + "epoch": 0.9111899384688729, + "grad_norm": 295.50860595703125, + "learning_rate": 2.8889363573943006e-07, + "loss": 11.4799, + "step": 451070 + }, + { + "epoch": 0.9112101391015567, + "grad_norm": 461.7724609375, + "learning_rate": 2.8877671324829994e-07, + "loss": 17.0752, + "step": 451080 + }, + { + "epoch": 0.9112303397342405, + "grad_norm": 307.6255798339844, + "learning_rate": 2.886598137191021e-07, + "loss": 22.2398, + "step": 451090 + }, + { + "epoch": 0.9112505403669243, + "grad_norm": 112.7985610961914, + "learning_rate": 2.8854293715240455e-07, + "loss": 20.4865, + "step": 451100 + }, + { + "epoch": 0.9112707409996081, + "grad_norm": 301.1622009277344, + "learning_rate": 2.884260835487768e-07, + "loss": 14.4974, + "step": 451110 + }, + { + "epoch": 0.911290941632292, + "grad_norm": 166.6481475830078, + "learning_rate": 2.8830925290878997e-07, + "loss": 20.8534, + "step": 451120 + }, + { + "epoch": 0.9113111422649758, + "grad_norm": 242.3641357421875, + "learning_rate": 2.8819244523301206e-07, + "loss": 20.7083, + "step": 451130 + }, + { + "epoch": 0.9113313428976596, + "grad_norm": 162.1275177001953, + "learning_rate": 2.880756605220114e-07, + "loss": 14.867, + "step": 451140 + }, + { + "epoch": 0.9113515435303434, + "grad_norm": 562.4708862304688, + "learning_rate": 2.879588987763593e-07, + "loss": 26.572, + "step": 451150 + }, + { + "epoch": 0.9113717441630271, + "grad_norm": 410.82196044921875, + "learning_rate": 2.878421599966252e-07, + "loss": 20.0088, + "step": 451160 + }, + { + "epoch": 0.911391944795711, + "grad_norm": 334.5708923339844, + "learning_rate": 2.877254441833754e-07, + "loss": 34.9807, + "step": 451170 + }, + { + "epoch": 0.9114121454283948, + "grad_norm": 541.3951416015625, + "learning_rate": 2.8760875133718003e-07, + "loss": 45.1133, + "step": 451180 + }, + { + "epoch": 0.9114323460610786, + "grad_norm": 848.431396484375, + "learning_rate": 2.8749208145860907e-07, + "loss": 13.6133, + "step": 451190 + }, + { + "epoch": 0.9114525466937624, + "grad_norm": 182.5347442626953, + "learning_rate": 2.8737543454822993e-07, + "loss": 16.9643, + "step": 451200 + }, + { + "epoch": 0.9114727473264462, + "grad_norm": 245.85494995117188, + "learning_rate": 2.87258810606611e-07, + "loss": 11.7495, + "step": 451210 + }, + { + "epoch": 0.91149294795913, + "grad_norm": 480.86737060546875, + "learning_rate": 2.8714220963432125e-07, + "loss": 16.4133, + "step": 451220 + }, + { + "epoch": 0.9115131485918139, + "grad_norm": 0.02471095696091652, + "learning_rate": 2.870256316319292e-07, + "loss": 24.8626, + "step": 451230 + }, + { + "epoch": 0.9115333492244977, + "grad_norm": 480.7483825683594, + "learning_rate": 2.8690907660000156e-07, + "loss": 16.0808, + "step": 451240 + }, + { + "epoch": 0.9115535498571815, + "grad_norm": 106.62007904052734, + "learning_rate": 2.867925445391079e-07, + "loss": 11.1748, + "step": 451250 + }, + { + "epoch": 0.9115737504898653, + "grad_norm": 517.0779418945312, + "learning_rate": 2.8667603544981604e-07, + "loss": 17.3558, + "step": 451260 + }, + { + "epoch": 0.9115939511225492, + "grad_norm": 763.6796875, + "learning_rate": 2.8655954933269395e-07, + "loss": 25.0028, + "step": 451270 + }, + { + "epoch": 0.911614151755233, + "grad_norm": 448.9400634765625, + "learning_rate": 2.8644308618830775e-07, + "loss": 30.3073, + "step": 451280 + }, + { + "epoch": 0.9116343523879168, + "grad_norm": 110.02424621582031, + "learning_rate": 2.86326646017227e-07, + "loss": 12.3114, + "step": 451290 + }, + { + "epoch": 0.9116545530206006, + "grad_norm": 60.046451568603516, + "learning_rate": 2.862102288200186e-07, + "loss": 8.2574, + "step": 451300 + }, + { + "epoch": 0.9116747536532844, + "grad_norm": 163.764892578125, + "learning_rate": 2.8609383459724915e-07, + "loss": 11.8364, + "step": 451310 + }, + { + "epoch": 0.9116949542859683, + "grad_norm": 462.7592468261719, + "learning_rate": 2.8597746334948773e-07, + "loss": 13.9652, + "step": 451320 + }, + { + "epoch": 0.9117151549186521, + "grad_norm": 387.3420104980469, + "learning_rate": 2.8586111507729887e-07, + "loss": 17.1762, + "step": 451330 + }, + { + "epoch": 0.9117353555513359, + "grad_norm": 213.70208740234375, + "learning_rate": 2.8574478978125266e-07, + "loss": 25.3365, + "step": 451340 + }, + { + "epoch": 0.9117555561840197, + "grad_norm": 193.23422241210938, + "learning_rate": 2.856284874619142e-07, + "loss": 14.605, + "step": 451350 + }, + { + "epoch": 0.9117757568167035, + "grad_norm": 355.51910400390625, + "learning_rate": 2.855122081198503e-07, + "loss": 14.1195, + "step": 451360 + }, + { + "epoch": 0.9117959574493874, + "grad_norm": 309.6023254394531, + "learning_rate": 2.8539595175562817e-07, + "loss": 14.2535, + "step": 451370 + }, + { + "epoch": 0.9118161580820712, + "grad_norm": 276.8885803222656, + "learning_rate": 2.852797183698147e-07, + "loss": 31.66, + "step": 451380 + }, + { + "epoch": 0.911836358714755, + "grad_norm": 233.3388214111328, + "learning_rate": 2.851635079629755e-07, + "loss": 18.4705, + "step": 451390 + }, + { + "epoch": 0.9118565593474388, + "grad_norm": 53.565189361572266, + "learning_rate": 2.850473205356774e-07, + "loss": 18.5574, + "step": 451400 + }, + { + "epoch": 0.9118767599801226, + "grad_norm": 290.7619934082031, + "learning_rate": 2.8493115608848764e-07, + "loss": 24.058, + "step": 451410 + }, + { + "epoch": 0.9118969606128063, + "grad_norm": 286.1277160644531, + "learning_rate": 2.8481501462197137e-07, + "loss": 12.422, + "step": 451420 + }, + { + "epoch": 0.9119171612454902, + "grad_norm": 622.9802856445312, + "learning_rate": 2.846988961366942e-07, + "loss": 22.359, + "step": 451430 + }, + { + "epoch": 0.911937361878174, + "grad_norm": 324.1952209472656, + "learning_rate": 2.8458280063322353e-07, + "loss": 23.3572, + "step": 451440 + }, + { + "epoch": 0.9119575625108578, + "grad_norm": 410.7866516113281, + "learning_rate": 2.844667281121244e-07, + "loss": 16.6071, + "step": 451450 + }, + { + "epoch": 0.9119777631435416, + "grad_norm": 475.54052734375, + "learning_rate": 2.843506785739614e-07, + "loss": 12.6368, + "step": 451460 + }, + { + "epoch": 0.9119979637762254, + "grad_norm": 35.28129959106445, + "learning_rate": 2.842346520193018e-07, + "loss": 18.0663, + "step": 451470 + }, + { + "epoch": 0.9120181644089093, + "grad_norm": 287.9037780761719, + "learning_rate": 2.8411864844871184e-07, + "loss": 14.8362, + "step": 451480 + }, + { + "epoch": 0.9120383650415931, + "grad_norm": 250.30825805664062, + "learning_rate": 2.8400266786275387e-07, + "loss": 34.2517, + "step": 451490 + }, + { + "epoch": 0.9120585656742769, + "grad_norm": 359.9864196777344, + "learning_rate": 2.838867102619952e-07, + "loss": 17.0671, + "step": 451500 + }, + { + "epoch": 0.9120787663069607, + "grad_norm": 276.7231750488281, + "learning_rate": 2.8377077564700094e-07, + "loss": 8.9851, + "step": 451510 + }, + { + "epoch": 0.9120989669396445, + "grad_norm": 338.2561340332031, + "learning_rate": 2.8365486401833677e-07, + "loss": 22.0601, + "step": 451520 + }, + { + "epoch": 0.9121191675723284, + "grad_norm": 143.89337158203125, + "learning_rate": 2.835389753765655e-07, + "loss": 13.1001, + "step": 451530 + }, + { + "epoch": 0.9121393682050122, + "grad_norm": 683.7655639648438, + "learning_rate": 2.834231097222534e-07, + "loss": 26.8933, + "step": 451540 + }, + { + "epoch": 0.912159568837696, + "grad_norm": 487.2683410644531, + "learning_rate": 2.833072670559661e-07, + "loss": 21.1917, + "step": 451550 + }, + { + "epoch": 0.9121797694703798, + "grad_norm": 561.84423828125, + "learning_rate": 2.83191447378266e-07, + "loss": 19.7071, + "step": 451560 + }, + { + "epoch": 0.9121999701030636, + "grad_norm": 33.04185104370117, + "learning_rate": 2.8307565068971867e-07, + "loss": 18.7884, + "step": 451570 + }, + { + "epoch": 0.9122201707357475, + "grad_norm": 215.38392639160156, + "learning_rate": 2.829598769908892e-07, + "loss": 24.2191, + "step": 451580 + }, + { + "epoch": 0.9122403713684313, + "grad_norm": 159.6072998046875, + "learning_rate": 2.8284412628234117e-07, + "loss": 9.3734, + "step": 451590 + }, + { + "epoch": 0.9122605720011151, + "grad_norm": 5.168828010559082, + "learning_rate": 2.8272839856463783e-07, + "loss": 12.7489, + "step": 451600 + }, + { + "epoch": 0.9122807726337989, + "grad_norm": 598.6390380859375, + "learning_rate": 2.8261269383834497e-07, + "loss": 20.4365, + "step": 451610 + }, + { + "epoch": 0.9123009732664827, + "grad_norm": 304.0203857421875, + "learning_rate": 2.8249701210402603e-07, + "loss": 17.0099, + "step": 451620 + }, + { + "epoch": 0.9123211738991666, + "grad_norm": 471.2829895019531, + "learning_rate": 2.823813533622438e-07, + "loss": 15.8112, + "step": 451630 + }, + { + "epoch": 0.9123413745318504, + "grad_norm": 779.8455810546875, + "learning_rate": 2.822657176135629e-07, + "loss": 27.6026, + "step": 451640 + }, + { + "epoch": 0.9123615751645342, + "grad_norm": 398.429443359375, + "learning_rate": 2.821501048585462e-07, + "loss": 52.8884, + "step": 451650 + }, + { + "epoch": 0.912381775797218, + "grad_norm": 173.2937774658203, + "learning_rate": 2.8203451509775825e-07, + "loss": 14.2675, + "step": 451660 + }, + { + "epoch": 0.9124019764299018, + "grad_norm": 0.9098075032234192, + "learning_rate": 2.819189483317625e-07, + "loss": 17.9382, + "step": 451670 + }, + { + "epoch": 0.9124221770625855, + "grad_norm": 557.1135864257812, + "learning_rate": 2.818034045611201e-07, + "loss": 19.7519, + "step": 451680 + }, + { + "epoch": 0.9124423776952694, + "grad_norm": 325.0290832519531, + "learning_rate": 2.816878837863968e-07, + "loss": 10.5954, + "step": 451690 + }, + { + "epoch": 0.9124625783279532, + "grad_norm": 497.5835876464844, + "learning_rate": 2.815723860081537e-07, + "loss": 16.6184, + "step": 451700 + }, + { + "epoch": 0.912482778960637, + "grad_norm": 197.95315551757812, + "learning_rate": 2.8145691122695496e-07, + "loss": 20.7503, + "step": 451710 + }, + { + "epoch": 0.9125029795933208, + "grad_norm": 424.4306945800781, + "learning_rate": 2.8134145944336225e-07, + "loss": 15.6808, + "step": 451720 + }, + { + "epoch": 0.9125231802260046, + "grad_norm": 193.1788330078125, + "learning_rate": 2.812260306579401e-07, + "loss": 20.6186, + "step": 451730 + }, + { + "epoch": 0.9125433808586885, + "grad_norm": 717.1135864257812, + "learning_rate": 2.811106248712497e-07, + "loss": 26.0767, + "step": 451740 + }, + { + "epoch": 0.9125635814913723, + "grad_norm": 459.9844665527344, + "learning_rate": 2.8099524208385297e-07, + "loss": 29.406, + "step": 451750 + }, + { + "epoch": 0.9125837821240561, + "grad_norm": 380.0570373535156, + "learning_rate": 2.8087988229631325e-07, + "loss": 11.3463, + "step": 451760 + }, + { + "epoch": 0.9126039827567399, + "grad_norm": 562.419189453125, + "learning_rate": 2.8076454550919397e-07, + "loss": 21.1921, + "step": 451770 + }, + { + "epoch": 0.9126241833894237, + "grad_norm": 874.0545043945312, + "learning_rate": 2.8064923172305467e-07, + "loss": 27.8008, + "step": 451780 + }, + { + "epoch": 0.9126443840221076, + "grad_norm": 218.60658264160156, + "learning_rate": 2.8053394093845833e-07, + "loss": 15.5657, + "step": 451790 + }, + { + "epoch": 0.9126645846547914, + "grad_norm": 72.1780014038086, + "learning_rate": 2.804186731559677e-07, + "loss": 11.2906, + "step": 451800 + }, + { + "epoch": 0.9126847852874752, + "grad_norm": 530.6395263671875, + "learning_rate": 2.8030342837614466e-07, + "loss": 17.5073, + "step": 451810 + }, + { + "epoch": 0.912704985920159, + "grad_norm": 183.9311981201172, + "learning_rate": 2.8018820659954927e-07, + "loss": 14.8489, + "step": 451820 + }, + { + "epoch": 0.9127251865528428, + "grad_norm": 110.54678344726562, + "learning_rate": 2.800730078267444e-07, + "loss": 14.3235, + "step": 451830 + }, + { + "epoch": 0.9127453871855267, + "grad_norm": 10.480724334716797, + "learning_rate": 2.7995783205829185e-07, + "loss": 14.0507, + "step": 451840 + }, + { + "epoch": 0.9127655878182105, + "grad_norm": 188.81947326660156, + "learning_rate": 2.798426792947517e-07, + "loss": 18.8643, + "step": 451850 + }, + { + "epoch": 0.9127857884508943, + "grad_norm": 501.7898254394531, + "learning_rate": 2.7972754953668524e-07, + "loss": 17.7551, + "step": 451860 + }, + { + "epoch": 0.9128059890835781, + "grad_norm": 352.3951110839844, + "learning_rate": 2.796124427846553e-07, + "loss": 11.06, + "step": 451870 + }, + { + "epoch": 0.912826189716262, + "grad_norm": 364.9849853515625, + "learning_rate": 2.7949735903922195e-07, + "loss": 16.1715, + "step": 451880 + }, + { + "epoch": 0.9128463903489458, + "grad_norm": 404.40289306640625, + "learning_rate": 2.7938229830094475e-07, + "loss": 13.3248, + "step": 451890 + }, + { + "epoch": 0.9128665909816296, + "grad_norm": 885.2887573242188, + "learning_rate": 2.792672605703867e-07, + "loss": 23.7981, + "step": 451900 + }, + { + "epoch": 0.9128867916143134, + "grad_norm": 442.0524597167969, + "learning_rate": 2.791522458481077e-07, + "loss": 21.1115, + "step": 451910 + }, + { + "epoch": 0.9129069922469972, + "grad_norm": 656.0565795898438, + "learning_rate": 2.79037254134667e-07, + "loss": 24.4948, + "step": 451920 + }, + { + "epoch": 0.9129271928796809, + "grad_norm": 265.2845458984375, + "learning_rate": 2.7892228543062725e-07, + "loss": 8.6274, + "step": 451930 + }, + { + "epoch": 0.9129473935123648, + "grad_norm": 973.04541015625, + "learning_rate": 2.788073397365465e-07, + "loss": 26.0115, + "step": 451940 + }, + { + "epoch": 0.9129675941450486, + "grad_norm": 154.58677673339844, + "learning_rate": 2.78692417052987e-07, + "loss": 17.974, + "step": 451950 + }, + { + "epoch": 0.9129877947777324, + "grad_norm": 412.13519287109375, + "learning_rate": 2.785775173805083e-07, + "loss": 26.1495, + "step": 451960 + }, + { + "epoch": 0.9130079954104162, + "grad_norm": 714.805908203125, + "learning_rate": 2.784626407196689e-07, + "loss": 18.7356, + "step": 451970 + }, + { + "epoch": 0.9130281960431, + "grad_norm": 215.3949432373047, + "learning_rate": 2.7834778707103104e-07, + "loss": 13.2698, + "step": 451980 + }, + { + "epoch": 0.9130483966757839, + "grad_norm": 10.791740417480469, + "learning_rate": 2.782329564351532e-07, + "loss": 31.0468, + "step": 451990 + }, + { + "epoch": 0.9130685973084677, + "grad_norm": 588.0308227539062, + "learning_rate": 2.7811814881259503e-07, + "loss": 32.0709, + "step": 452000 + }, + { + "epoch": 0.9130887979411515, + "grad_norm": 636.9493408203125, + "learning_rate": 2.7800336420391593e-07, + "loss": 17.1677, + "step": 452010 + }, + { + "epoch": 0.9131089985738353, + "grad_norm": 351.098876953125, + "learning_rate": 2.7788860260967665e-07, + "loss": 21.9418, + "step": 452020 + }, + { + "epoch": 0.9131291992065191, + "grad_norm": 490.53692626953125, + "learning_rate": 2.77773864030435e-07, + "loss": 17.6471, + "step": 452030 + }, + { + "epoch": 0.913149399839203, + "grad_norm": 453.8436279296875, + "learning_rate": 2.7765914846675067e-07, + "loss": 26.3111, + "step": 452040 + }, + { + "epoch": 0.9131696004718868, + "grad_norm": 264.65277099609375, + "learning_rate": 2.775444559191837e-07, + "loss": 16.9386, + "step": 452050 + }, + { + "epoch": 0.9131898011045706, + "grad_norm": 389.0132751464844, + "learning_rate": 2.774297863882919e-07, + "loss": 21.6635, + "step": 452060 + }, + { + "epoch": 0.9132100017372544, + "grad_norm": 171.96925354003906, + "learning_rate": 2.773151398746338e-07, + "loss": 8.1321, + "step": 452070 + }, + { + "epoch": 0.9132302023699382, + "grad_norm": 102.16289520263672, + "learning_rate": 2.772005163787689e-07, + "loss": 10.6967, + "step": 452080 + }, + { + "epoch": 0.9132504030026221, + "grad_norm": 133.51788330078125, + "learning_rate": 2.770859159012579e-07, + "loss": 12.4846, + "step": 452090 + }, + { + "epoch": 0.9132706036353059, + "grad_norm": 315.4050598144531, + "learning_rate": 2.7697133844265535e-07, + "loss": 19.5759, + "step": 452100 + }, + { + "epoch": 0.9132908042679897, + "grad_norm": 29.029733657836914, + "learning_rate": 2.768567840035219e-07, + "loss": 27.0451, + "step": 452110 + }, + { + "epoch": 0.9133110049006735, + "grad_norm": 409.8620300292969, + "learning_rate": 2.76742252584416e-07, + "loss": 10.4875, + "step": 452120 + }, + { + "epoch": 0.9133312055333573, + "grad_norm": 29.181663513183594, + "learning_rate": 2.7662774418589555e-07, + "loss": 10.0926, + "step": 452130 + }, + { + "epoch": 0.9133514061660412, + "grad_norm": 651.210693359375, + "learning_rate": 2.765132588085184e-07, + "loss": 23.9028, + "step": 452140 + }, + { + "epoch": 0.913371606798725, + "grad_norm": 248.01182556152344, + "learning_rate": 2.763987964528425e-07, + "loss": 14.2047, + "step": 452150 + }, + { + "epoch": 0.9133918074314088, + "grad_norm": 453.2279052734375, + "learning_rate": 2.7628435711942737e-07, + "loss": 18.6502, + "step": 452160 + }, + { + "epoch": 0.9134120080640926, + "grad_norm": 344.0987548828125, + "learning_rate": 2.7616994080882754e-07, + "loss": 24.7988, + "step": 452170 + }, + { + "epoch": 0.9134322086967764, + "grad_norm": 455.11712646484375, + "learning_rate": 2.7605554752160256e-07, + "loss": 21.2957, + "step": 452180 + }, + { + "epoch": 0.9134524093294601, + "grad_norm": 76.14151763916016, + "learning_rate": 2.7594117725831096e-07, + "loss": 7.8477, + "step": 452190 + }, + { + "epoch": 0.913472609962144, + "grad_norm": 342.2422180175781, + "learning_rate": 2.758268300195094e-07, + "loss": 12.5107, + "step": 452200 + }, + { + "epoch": 0.9134928105948278, + "grad_norm": 320.7900390625, + "learning_rate": 2.757125058057536e-07, + "loss": 10.5416, + "step": 452210 + }, + { + "epoch": 0.9135130112275116, + "grad_norm": 340.1474609375, + "learning_rate": 2.755982046176031e-07, + "loss": 19.4271, + "step": 452220 + }, + { + "epoch": 0.9135332118601954, + "grad_norm": 202.52476501464844, + "learning_rate": 2.754839264556136e-07, + "loss": 6.2309, + "step": 452230 + }, + { + "epoch": 0.9135534124928792, + "grad_norm": 294.6946105957031, + "learning_rate": 2.7536967132034186e-07, + "loss": 11.8811, + "step": 452240 + }, + { + "epoch": 0.9135736131255631, + "grad_norm": 639.5032958984375, + "learning_rate": 2.752554392123463e-07, + "loss": 23.6904, + "step": 452250 + }, + { + "epoch": 0.9135938137582469, + "grad_norm": 500.483642578125, + "learning_rate": 2.7514123013218153e-07, + "loss": 22.7042, + "step": 452260 + }, + { + "epoch": 0.9136140143909307, + "grad_norm": 311.4737548828125, + "learning_rate": 2.750270440804065e-07, + "loss": 11.9557, + "step": 452270 + }, + { + "epoch": 0.9136342150236145, + "grad_norm": 100.47586059570312, + "learning_rate": 2.749128810575763e-07, + "loss": 14.1911, + "step": 452280 + }, + { + "epoch": 0.9136544156562983, + "grad_norm": 356.00457763671875, + "learning_rate": 2.747987410642472e-07, + "loss": 23.2069, + "step": 452290 + }, + { + "epoch": 0.9136746162889822, + "grad_norm": 248.7677001953125, + "learning_rate": 2.746846241009765e-07, + "loss": 11.1875, + "step": 452300 + }, + { + "epoch": 0.913694816921666, + "grad_norm": 617.8887329101562, + "learning_rate": 2.745705301683188e-07, + "loss": 15.8752, + "step": 452310 + }, + { + "epoch": 0.9137150175543498, + "grad_norm": 313.4349060058594, + "learning_rate": 2.7445645926683253e-07, + "loss": 22.381, + "step": 452320 + }, + { + "epoch": 0.9137352181870336, + "grad_norm": 353.8231201171875, + "learning_rate": 2.7434241139707106e-07, + "loss": 16.0729, + "step": 452330 + }, + { + "epoch": 0.9137554188197174, + "grad_norm": 184.85678100585938, + "learning_rate": 2.742283865595924e-07, + "loss": 13.1759, + "step": 452340 + }, + { + "epoch": 0.9137756194524013, + "grad_norm": 421.85113525390625, + "learning_rate": 2.7411438475495155e-07, + "loss": 16.8529, + "step": 452350 + }, + { + "epoch": 0.9137958200850851, + "grad_norm": 700.2169189453125, + "learning_rate": 2.740004059837031e-07, + "loss": 18.6863, + "step": 452360 + }, + { + "epoch": 0.9138160207177689, + "grad_norm": 408.6905517578125, + "learning_rate": 2.738864502464045e-07, + "loss": 19.4036, + "step": 452370 + }, + { + "epoch": 0.9138362213504527, + "grad_norm": 341.9888000488281, + "learning_rate": 2.737725175436101e-07, + "loss": 14.0515, + "step": 452380 + }, + { + "epoch": 0.9138564219831365, + "grad_norm": 401.954345703125, + "learning_rate": 2.7365860787587405e-07, + "loss": 9.6258, + "step": 452390 + }, + { + "epoch": 0.9138766226158204, + "grad_norm": 23.526723861694336, + "learning_rate": 2.735447212437531e-07, + "loss": 27.0535, + "step": 452400 + }, + { + "epoch": 0.9138968232485042, + "grad_norm": 1.1871169805526733, + "learning_rate": 2.734308576478023e-07, + "loss": 13.1915, + "step": 452410 + }, + { + "epoch": 0.913917023881188, + "grad_norm": 668.0006103515625, + "learning_rate": 2.733170170885768e-07, + "loss": 21.8141, + "step": 452420 + }, + { + "epoch": 0.9139372245138718, + "grad_norm": 413.64727783203125, + "learning_rate": 2.7320319956662957e-07, + "loss": 19.9112, + "step": 452430 + }, + { + "epoch": 0.9139574251465555, + "grad_norm": 692.726318359375, + "learning_rate": 2.730894050825178e-07, + "loss": 6.8907, + "step": 452440 + }, + { + "epoch": 0.9139776257792394, + "grad_norm": 22.914690017700195, + "learning_rate": 2.72975633636795e-07, + "loss": 11.7867, + "step": 452450 + }, + { + "epoch": 0.9139978264119232, + "grad_norm": 361.56939697265625, + "learning_rate": 2.728618852300147e-07, + "loss": 10.5111, + "step": 452460 + }, + { + "epoch": 0.914018027044607, + "grad_norm": 424.5031433105469, + "learning_rate": 2.727481598627324e-07, + "loss": 14.1596, + "step": 452470 + }, + { + "epoch": 0.9140382276772908, + "grad_norm": 325.74468994140625, + "learning_rate": 2.7263445753550275e-07, + "loss": 6.4521, + "step": 452480 + }, + { + "epoch": 0.9140584283099746, + "grad_norm": 468.8731384277344, + "learning_rate": 2.725207782488792e-07, + "loss": 18.0635, + "step": 452490 + }, + { + "epoch": 0.9140786289426585, + "grad_norm": 428.7283020019531, + "learning_rate": 2.724071220034158e-07, + "loss": 22.1494, + "step": 452500 + }, + { + "epoch": 0.9140988295753423, + "grad_norm": 658.4942626953125, + "learning_rate": 2.72293488799667e-07, + "loss": 23.3455, + "step": 452510 + }, + { + "epoch": 0.9141190302080261, + "grad_norm": 221.12045288085938, + "learning_rate": 2.7217987863818684e-07, + "loss": 28.4971, + "step": 452520 + }, + { + "epoch": 0.9141392308407099, + "grad_norm": 68.4049301147461, + "learning_rate": 2.7206629151952715e-07, + "loss": 17.7994, + "step": 452530 + }, + { + "epoch": 0.9141594314733937, + "grad_norm": 116.92928314208984, + "learning_rate": 2.7195272744424405e-07, + "loss": 15.2393, + "step": 452540 + }, + { + "epoch": 0.9141796321060776, + "grad_norm": 373.884521484375, + "learning_rate": 2.7183918641288943e-07, + "loss": 9.9782, + "step": 452550 + }, + { + "epoch": 0.9141998327387614, + "grad_norm": 490.6842041015625, + "learning_rate": 2.717256684260172e-07, + "loss": 22.5185, + "step": 452560 + }, + { + "epoch": 0.9142200333714452, + "grad_norm": 161.93197631835938, + "learning_rate": 2.716121734841814e-07, + "loss": 28.5874, + "step": 452570 + }, + { + "epoch": 0.914240234004129, + "grad_norm": 130.264404296875, + "learning_rate": 2.714987015879328e-07, + "loss": 20.4128, + "step": 452580 + }, + { + "epoch": 0.9142604346368128, + "grad_norm": 932.6390991210938, + "learning_rate": 2.7138525273782746e-07, + "loss": 23.6868, + "step": 452590 + }, + { + "epoch": 0.9142806352694967, + "grad_norm": 156.4149627685547, + "learning_rate": 2.712718269344161e-07, + "loss": 14.7229, + "step": 452600 + }, + { + "epoch": 0.9143008359021805, + "grad_norm": 90.23873138427734, + "learning_rate": 2.711584241782528e-07, + "loss": 13.9489, + "step": 452610 + }, + { + "epoch": 0.9143210365348643, + "grad_norm": 210.1337127685547, + "learning_rate": 2.7104504446988867e-07, + "loss": 20.9073, + "step": 452620 + }, + { + "epoch": 0.9143412371675481, + "grad_norm": 329.8204040527344, + "learning_rate": 2.709316878098789e-07, + "loss": 14.034, + "step": 452630 + }, + { + "epoch": 0.9143614378002319, + "grad_norm": 465.3182067871094, + "learning_rate": 2.708183541987741e-07, + "loss": 19.5663, + "step": 452640 + }, + { + "epoch": 0.9143816384329158, + "grad_norm": 193.5091094970703, + "learning_rate": 2.707050436371267e-07, + "loss": 16.4573, + "step": 452650 + }, + { + "epoch": 0.9144018390655996, + "grad_norm": 298.7574462890625, + "learning_rate": 2.7059175612548947e-07, + "loss": 19.0406, + "step": 452660 + }, + { + "epoch": 0.9144220396982834, + "grad_norm": 359.96295166015625, + "learning_rate": 2.7047849166441487e-07, + "loss": 31.8732, + "step": 452670 + }, + { + "epoch": 0.9144422403309672, + "grad_norm": 387.7774963378906, + "learning_rate": 2.703652502544535e-07, + "loss": 23.7531, + "step": 452680 + }, + { + "epoch": 0.914462440963651, + "grad_norm": 192.9261932373047, + "learning_rate": 2.702520318961588e-07, + "loss": 18.7476, + "step": 452690 + }, + { + "epoch": 0.9144826415963347, + "grad_norm": 229.85899353027344, + "learning_rate": 2.701388365900831e-07, + "loss": 17.4647, + "step": 452700 + }, + { + "epoch": 0.9145028422290186, + "grad_norm": 377.1651306152344, + "learning_rate": 2.7002566433677547e-07, + "loss": 16.6451, + "step": 452710 + }, + { + "epoch": 0.9145230428617024, + "grad_norm": 153.7214813232422, + "learning_rate": 2.699125151367893e-07, + "loss": 17.1172, + "step": 452720 + }, + { + "epoch": 0.9145432434943862, + "grad_norm": 193.17724609375, + "learning_rate": 2.697993889906764e-07, + "loss": 14.337, + "step": 452730 + }, + { + "epoch": 0.91456344412707, + "grad_norm": 263.640869140625, + "learning_rate": 2.6968628589898735e-07, + "loss": 8.4065, + "step": 452740 + }, + { + "epoch": 0.9145836447597538, + "grad_norm": 96.6507568359375, + "learning_rate": 2.6957320586227354e-07, + "loss": 25.2692, + "step": 452750 + }, + { + "epoch": 0.9146038453924377, + "grad_norm": 7.664963245391846, + "learning_rate": 2.694601488810855e-07, + "loss": 19.9627, + "step": 452760 + }, + { + "epoch": 0.9146240460251215, + "grad_norm": 634.0137329101562, + "learning_rate": 2.6934711495597676e-07, + "loss": 45.0058, + "step": 452770 + }, + { + "epoch": 0.9146442466578053, + "grad_norm": 621.3983764648438, + "learning_rate": 2.6923410408749516e-07, + "loss": 17.0445, + "step": 452780 + }, + { + "epoch": 0.9146644472904891, + "grad_norm": 284.60247802734375, + "learning_rate": 2.6912111627619255e-07, + "loss": 21.5207, + "step": 452790 + }, + { + "epoch": 0.9146846479231729, + "grad_norm": 216.87062072753906, + "learning_rate": 2.690081515226206e-07, + "loss": 23.3396, + "step": 452800 + }, + { + "epoch": 0.9147048485558568, + "grad_norm": 274.0249938964844, + "learning_rate": 2.6889520982732897e-07, + "loss": 11.5307, + "step": 452810 + }, + { + "epoch": 0.9147250491885406, + "grad_norm": 52.05552673339844, + "learning_rate": 2.6878229119086776e-07, + "loss": 8.4213, + "step": 452820 + }, + { + "epoch": 0.9147452498212244, + "grad_norm": 188.5215606689453, + "learning_rate": 2.6866939561378867e-07, + "loss": 17.838, + "step": 452830 + }, + { + "epoch": 0.9147654504539082, + "grad_norm": 419.7026062011719, + "learning_rate": 2.685565230966408e-07, + "loss": 17.6637, + "step": 452840 + }, + { + "epoch": 0.914785651086592, + "grad_norm": 525.0553588867188, + "learning_rate": 2.684436736399737e-07, + "loss": 14.4383, + "step": 452850 + }, + { + "epoch": 0.9148058517192759, + "grad_norm": 333.9505920410156, + "learning_rate": 2.6833084724433965e-07, + "loss": 25.6344, + "step": 452860 + }, + { + "epoch": 0.9148260523519597, + "grad_norm": 261.0413513183594, + "learning_rate": 2.6821804391028603e-07, + "loss": 17.0195, + "step": 452870 + }, + { + "epoch": 0.9148462529846435, + "grad_norm": 58.79597854614258, + "learning_rate": 2.681052636383641e-07, + "loss": 15.6878, + "step": 452880 + }, + { + "epoch": 0.9148664536173273, + "grad_norm": 906.9078979492188, + "learning_rate": 2.679925064291239e-07, + "loss": 24.5474, + "step": 452890 + }, + { + "epoch": 0.9148866542500111, + "grad_norm": 172.41342163085938, + "learning_rate": 2.6787977228311336e-07, + "loss": 12.6641, + "step": 452900 + }, + { + "epoch": 0.914906854882695, + "grad_norm": 290.7709655761719, + "learning_rate": 2.677670612008837e-07, + "loss": 28.5541, + "step": 452910 + }, + { + "epoch": 0.9149270555153788, + "grad_norm": 383.07562255859375, + "learning_rate": 2.676543731829823e-07, + "loss": 22.4324, + "step": 452920 + }, + { + "epoch": 0.9149472561480626, + "grad_norm": 501.7777404785156, + "learning_rate": 2.6754170822996026e-07, + "loss": 11.6085, + "step": 452930 + }, + { + "epoch": 0.9149674567807464, + "grad_norm": 1093.78369140625, + "learning_rate": 2.6742906634236564e-07, + "loss": 17.3962, + "step": 452940 + }, + { + "epoch": 0.9149876574134301, + "grad_norm": 355.9870300292969, + "learning_rate": 2.6731644752074846e-07, + "loss": 16.4126, + "step": 452950 + }, + { + "epoch": 0.915007858046114, + "grad_norm": 244.96832275390625, + "learning_rate": 2.6720385176565664e-07, + "loss": 10.274, + "step": 452960 + }, + { + "epoch": 0.9150280586787978, + "grad_norm": 823.0773315429688, + "learning_rate": 2.6709127907763864e-07, + "loss": 30.8585, + "step": 452970 + }, + { + "epoch": 0.9150482593114816, + "grad_norm": 54.019229888916016, + "learning_rate": 2.6697872945724455e-07, + "loss": 14.8269, + "step": 452980 + }, + { + "epoch": 0.9150684599441654, + "grad_norm": 484.1335754394531, + "learning_rate": 2.668662029050217e-07, + "loss": 14.5493, + "step": 452990 + }, + { + "epoch": 0.9150886605768492, + "grad_norm": 500.1659851074219, + "learning_rate": 2.6675369942151864e-07, + "loss": 22.1963, + "step": 453000 + }, + { + "epoch": 0.915108861209533, + "grad_norm": 514.647216796875, + "learning_rate": 2.666412190072837e-07, + "loss": 30.4295, + "step": 453010 + }, + { + "epoch": 0.9151290618422169, + "grad_norm": 443.3310546875, + "learning_rate": 2.665287616628659e-07, + "loss": 9.98, + "step": 453020 + }, + { + "epoch": 0.9151492624749007, + "grad_norm": 290.30291748046875, + "learning_rate": 2.6641632738881315e-07, + "loss": 17.9727, + "step": 453030 + }, + { + "epoch": 0.9151694631075845, + "grad_norm": 235.2632293701172, + "learning_rate": 2.663039161856723e-07, + "loss": 13.8964, + "step": 453040 + }, + { + "epoch": 0.9151896637402683, + "grad_norm": 122.74141693115234, + "learning_rate": 2.6619152805399286e-07, + "loss": 21.4631, + "step": 453050 + }, + { + "epoch": 0.9152098643729522, + "grad_norm": 160.0275421142578, + "learning_rate": 2.660791629943216e-07, + "loss": 32.995, + "step": 453060 + }, + { + "epoch": 0.915230065005636, + "grad_norm": 74.91618347167969, + "learning_rate": 2.659668210072058e-07, + "loss": 10.9474, + "step": 453070 + }, + { + "epoch": 0.9152502656383198, + "grad_norm": 386.40838623046875, + "learning_rate": 2.658545020931935e-07, + "loss": 31.9575, + "step": 453080 + }, + { + "epoch": 0.9152704662710036, + "grad_norm": 303.7977600097656, + "learning_rate": 2.657422062528325e-07, + "loss": 18.1284, + "step": 453090 + }, + { + "epoch": 0.9152906669036874, + "grad_norm": 856.51220703125, + "learning_rate": 2.656299334866702e-07, + "loss": 22.5285, + "step": 453100 + }, + { + "epoch": 0.9153108675363713, + "grad_norm": 381.6043395996094, + "learning_rate": 2.655176837952528e-07, + "loss": 24.3001, + "step": 453110 + }, + { + "epoch": 0.9153310681690551, + "grad_norm": 246.74102783203125, + "learning_rate": 2.654054571791287e-07, + "loss": 11.1632, + "step": 453120 + }, + { + "epoch": 0.9153512688017389, + "grad_norm": 494.12615966796875, + "learning_rate": 2.6529325363884364e-07, + "loss": 17.3095, + "step": 453130 + }, + { + "epoch": 0.9153714694344227, + "grad_norm": 129.7801055908203, + "learning_rate": 2.651810731749449e-07, + "loss": 12.1936, + "step": 453140 + }, + { + "epoch": 0.9153916700671065, + "grad_norm": 249.99185180664062, + "learning_rate": 2.650689157879799e-07, + "loss": 8.0272, + "step": 453150 + }, + { + "epoch": 0.9154118706997904, + "grad_norm": 524.1873168945312, + "learning_rate": 2.649567814784937e-07, + "loss": 37.0262, + "step": 453160 + }, + { + "epoch": 0.9154320713324742, + "grad_norm": 212.4214630126953, + "learning_rate": 2.6484467024703476e-07, + "loss": 17.7013, + "step": 453170 + }, + { + "epoch": 0.915452271965158, + "grad_norm": 446.6779479980469, + "learning_rate": 2.647325820941488e-07, + "loss": 15.0334, + "step": 453180 + }, + { + "epoch": 0.9154724725978418, + "grad_norm": 331.09869384765625, + "learning_rate": 2.6462051702038085e-07, + "loss": 14.7586, + "step": 453190 + }, + { + "epoch": 0.9154926732305256, + "grad_norm": 382.7553405761719, + "learning_rate": 2.6450847502627883e-07, + "loss": 24.1565, + "step": 453200 + }, + { + "epoch": 0.9155128738632093, + "grad_norm": 365.6215515136719, + "learning_rate": 2.6439645611238795e-07, + "loss": 20.7367, + "step": 453210 + }, + { + "epoch": 0.9155330744958932, + "grad_norm": 598.3663940429688, + "learning_rate": 2.642844602792544e-07, + "loss": 17.08, + "step": 453220 + }, + { + "epoch": 0.915553275128577, + "grad_norm": 184.0043487548828, + "learning_rate": 2.6417248752742374e-07, + "loss": 17.1081, + "step": 453230 + }, + { + "epoch": 0.9155734757612608, + "grad_norm": 380.4300231933594, + "learning_rate": 2.640605378574429e-07, + "loss": 26.9159, + "step": 453240 + }, + { + "epoch": 0.9155936763939446, + "grad_norm": 263.845703125, + "learning_rate": 2.639486112698564e-07, + "loss": 20.9774, + "step": 453250 + }, + { + "epoch": 0.9156138770266284, + "grad_norm": 513.1378173828125, + "learning_rate": 2.6383670776520933e-07, + "loss": 25.778, + "step": 453260 + }, + { + "epoch": 0.9156340776593123, + "grad_norm": 421.67596435546875, + "learning_rate": 2.637248273440479e-07, + "loss": 20.1958, + "step": 453270 + }, + { + "epoch": 0.9156542782919961, + "grad_norm": 430.4900207519531, + "learning_rate": 2.6361297000691787e-07, + "loss": 11.1612, + "step": 453280 + }, + { + "epoch": 0.9156744789246799, + "grad_norm": 284.833984375, + "learning_rate": 2.6350113575436266e-07, + "loss": 8.8739, + "step": 453290 + }, + { + "epoch": 0.9156946795573637, + "grad_norm": 409.1342468261719, + "learning_rate": 2.6338932458692847e-07, + "loss": 12.7456, + "step": 453300 + }, + { + "epoch": 0.9157148801900475, + "grad_norm": 1214.3470458984375, + "learning_rate": 2.6327753650516205e-07, + "loss": 46.1228, + "step": 453310 + }, + { + "epoch": 0.9157350808227314, + "grad_norm": 333.0218505859375, + "learning_rate": 2.631657715096048e-07, + "loss": 11.3531, + "step": 453320 + }, + { + "epoch": 0.9157552814554152, + "grad_norm": 98.55928802490234, + "learning_rate": 2.630540296008027e-07, + "loss": 5.1327, + "step": 453330 + }, + { + "epoch": 0.915775482088099, + "grad_norm": 674.3197021484375, + "learning_rate": 2.629423107793022e-07, + "loss": 23.4372, + "step": 453340 + }, + { + "epoch": 0.9157956827207828, + "grad_norm": 350.0754089355469, + "learning_rate": 2.6283061504564553e-07, + "loss": 14.2001, + "step": 453350 + }, + { + "epoch": 0.9158158833534666, + "grad_norm": 382.4748840332031, + "learning_rate": 2.6271894240037785e-07, + "loss": 17.2192, + "step": 453360 + }, + { + "epoch": 0.9158360839861505, + "grad_norm": 423.0195617675781, + "learning_rate": 2.626072928440432e-07, + "loss": 16.7564, + "step": 453370 + }, + { + "epoch": 0.9158562846188343, + "grad_norm": 386.2308654785156, + "learning_rate": 2.6249566637718714e-07, + "loss": 21.5422, + "step": 453380 + }, + { + "epoch": 0.9158764852515181, + "grad_norm": 250.33633422851562, + "learning_rate": 2.623840630003516e-07, + "loss": 12.7983, + "step": 453390 + }, + { + "epoch": 0.9158966858842019, + "grad_norm": 0.0, + "learning_rate": 2.622724827140816e-07, + "loss": 26.3335, + "step": 453400 + }, + { + "epoch": 0.9159168865168857, + "grad_norm": 266.47052001953125, + "learning_rate": 2.6216092551892116e-07, + "loss": 17.5531, + "step": 453410 + }, + { + "epoch": 0.9159370871495696, + "grad_norm": 374.290771484375, + "learning_rate": 2.6204939141541376e-07, + "loss": 17.8065, + "step": 453420 + }, + { + "epoch": 0.9159572877822534, + "grad_norm": 368.16546630859375, + "learning_rate": 2.6193788040410286e-07, + "loss": 22.8737, + "step": 453430 + }, + { + "epoch": 0.9159774884149372, + "grad_norm": 354.1971435546875, + "learning_rate": 2.618263924855324e-07, + "loss": 18.6407, + "step": 453440 + }, + { + "epoch": 0.915997689047621, + "grad_norm": 318.41448974609375, + "learning_rate": 2.617149276602454e-07, + "loss": 26.7618, + "step": 453450 + }, + { + "epoch": 0.9160178896803048, + "grad_norm": 4.6045918464660645, + "learning_rate": 2.616034859287847e-07, + "loss": 19.446, + "step": 453460 + }, + { + "epoch": 0.9160380903129886, + "grad_norm": 251.6952362060547, + "learning_rate": 2.614920672916943e-07, + "loss": 13.2661, + "step": 453470 + }, + { + "epoch": 0.9160582909456724, + "grad_norm": 194.96925354003906, + "learning_rate": 2.61380671749516e-07, + "loss": 12.1911, + "step": 453480 + }, + { + "epoch": 0.9160784915783562, + "grad_norm": 76.82911682128906, + "learning_rate": 2.6126929930279486e-07, + "loss": 18.5787, + "step": 453490 + }, + { + "epoch": 0.91609869221104, + "grad_norm": 472.5380859375, + "learning_rate": 2.611579499520722e-07, + "loss": 24.2646, + "step": 453500 + }, + { + "epoch": 0.9161188928437238, + "grad_norm": 149.053466796875, + "learning_rate": 2.610466236978898e-07, + "loss": 21.6734, + "step": 453510 + }, + { + "epoch": 0.9161390934764077, + "grad_norm": 177.4880828857422, + "learning_rate": 2.6093532054079276e-07, + "loss": 17.4667, + "step": 453520 + }, + { + "epoch": 0.9161592941090915, + "grad_norm": 147.05726623535156, + "learning_rate": 2.6082404048132114e-07, + "loss": 12.7033, + "step": 453530 + }, + { + "epoch": 0.9161794947417753, + "grad_norm": 410.37933349609375, + "learning_rate": 2.6071278352001904e-07, + "loss": 18.072, + "step": 453540 + }, + { + "epoch": 0.9161996953744591, + "grad_norm": 114.84696197509766, + "learning_rate": 2.606015496574277e-07, + "loss": 20.3603, + "step": 453550 + }, + { + "epoch": 0.9162198960071429, + "grad_norm": 125.01507568359375, + "learning_rate": 2.604903388940899e-07, + "loss": 23.2023, + "step": 453560 + }, + { + "epoch": 0.9162400966398268, + "grad_norm": 164.6056365966797, + "learning_rate": 2.60379151230547e-07, + "loss": 13.9271, + "step": 453570 + }, + { + "epoch": 0.9162602972725106, + "grad_norm": 285.9284973144531, + "learning_rate": 2.602679866673413e-07, + "loss": 18.8505, + "step": 453580 + }, + { + "epoch": 0.9162804979051944, + "grad_norm": 394.2331848144531, + "learning_rate": 2.601568452050146e-07, + "loss": 27.4004, + "step": 453590 + }, + { + "epoch": 0.9163006985378782, + "grad_norm": 301.9587707519531, + "learning_rate": 2.600457268441092e-07, + "loss": 21.1456, + "step": 453600 + }, + { + "epoch": 0.916320899170562, + "grad_norm": 417.49981689453125, + "learning_rate": 2.599346315851653e-07, + "loss": 17.002, + "step": 453610 + }, + { + "epoch": 0.9163410998032459, + "grad_norm": 1888.8367919921875, + "learning_rate": 2.598235594287246e-07, + "loss": 53.7972, + "step": 453620 + }, + { + "epoch": 0.9163613004359297, + "grad_norm": 2.622469186782837, + "learning_rate": 2.597125103753301e-07, + "loss": 17.3838, + "step": 453630 + }, + { + "epoch": 0.9163815010686135, + "grad_norm": 417.6042785644531, + "learning_rate": 2.596014844255218e-07, + "loss": 17.2987, + "step": 453640 + }, + { + "epoch": 0.9164017017012973, + "grad_norm": 831.579833984375, + "learning_rate": 2.594904815798399e-07, + "loss": 15.5263, + "step": 453650 + }, + { + "epoch": 0.9164219023339811, + "grad_norm": 425.7539978027344, + "learning_rate": 2.5937950183882734e-07, + "loss": 21.3763, + "step": 453660 + }, + { + "epoch": 0.916442102966665, + "grad_norm": 59.693458557128906, + "learning_rate": 2.5926854520302414e-07, + "loss": 30.4629, + "step": 453670 + }, + { + "epoch": 0.9164623035993488, + "grad_norm": 582.9328002929688, + "learning_rate": 2.591576116729705e-07, + "loss": 27.7754, + "step": 453680 + }, + { + "epoch": 0.9164825042320326, + "grad_norm": 186.99293518066406, + "learning_rate": 2.590467012492076e-07, + "loss": 20.5637, + "step": 453690 + }, + { + "epoch": 0.9165027048647164, + "grad_norm": 334.56634521484375, + "learning_rate": 2.589358139322767e-07, + "loss": 21.4026, + "step": 453700 + }, + { + "epoch": 0.9165229054974002, + "grad_norm": 414.5623779296875, + "learning_rate": 2.5882494972271746e-07, + "loss": 10.8386, + "step": 453710 + }, + { + "epoch": 0.9165431061300839, + "grad_norm": 290.22833251953125, + "learning_rate": 2.587141086210698e-07, + "loss": 10.4992, + "step": 453720 + }, + { + "epoch": 0.9165633067627678, + "grad_norm": 399.252197265625, + "learning_rate": 2.586032906278757e-07, + "loss": 16.0113, + "step": 453730 + }, + { + "epoch": 0.9165835073954516, + "grad_norm": 763.1583862304688, + "learning_rate": 2.584924957436735e-07, + "loss": 19.304, + "step": 453740 + }, + { + "epoch": 0.9166037080281354, + "grad_norm": 534.4200439453125, + "learning_rate": 2.583817239690034e-07, + "loss": 13.5357, + "step": 453750 + }, + { + "epoch": 0.9166239086608192, + "grad_norm": 162.89605712890625, + "learning_rate": 2.5827097530440605e-07, + "loss": 19.0077, + "step": 453760 + }, + { + "epoch": 0.916644109293503, + "grad_norm": 399.8823547363281, + "learning_rate": 2.581602497504204e-07, + "loss": 13.0953, + "step": 453770 + }, + { + "epoch": 0.9166643099261869, + "grad_norm": 227.8401336669922, + "learning_rate": 2.580495473075878e-07, + "loss": 12.9742, + "step": 453780 + }, + { + "epoch": 0.9166845105588707, + "grad_norm": 545.9440307617188, + "learning_rate": 2.579388679764455e-07, + "loss": 20.8741, + "step": 453790 + }, + { + "epoch": 0.9167047111915545, + "grad_norm": 400.27392578125, + "learning_rate": 2.578282117575343e-07, + "loss": 20.9395, + "step": 453800 + }, + { + "epoch": 0.9167249118242383, + "grad_norm": 30.04183578491211, + "learning_rate": 2.577175786513936e-07, + "loss": 33.2292, + "step": 453810 + }, + { + "epoch": 0.9167451124569221, + "grad_norm": 386.2145080566406, + "learning_rate": 2.576069686585614e-07, + "loss": 11.1608, + "step": 453820 + }, + { + "epoch": 0.916765313089606, + "grad_norm": 608.4188232421875, + "learning_rate": 2.5749638177957834e-07, + "loss": 24.0531, + "step": 453830 + }, + { + "epoch": 0.9167855137222898, + "grad_norm": 363.9738464355469, + "learning_rate": 2.5738581801498187e-07, + "loss": 25.8142, + "step": 453840 + }, + { + "epoch": 0.9168057143549736, + "grad_norm": 498.20318603515625, + "learning_rate": 2.5727527736531256e-07, + "loss": 22.7052, + "step": 453850 + }, + { + "epoch": 0.9168259149876574, + "grad_norm": 287.67144775390625, + "learning_rate": 2.571647598311089e-07, + "loss": 28.2199, + "step": 453860 + }, + { + "epoch": 0.9168461156203412, + "grad_norm": 356.0575866699219, + "learning_rate": 2.5705426541290765e-07, + "loss": 28.7977, + "step": 453870 + }, + { + "epoch": 0.9168663162530251, + "grad_norm": 364.90789794921875, + "learning_rate": 2.56943794111249e-07, + "loss": 13.9711, + "step": 453880 + }, + { + "epoch": 0.9168865168857089, + "grad_norm": 352.82049560546875, + "learning_rate": 2.5683334592667195e-07, + "loss": 17.8761, + "step": 453890 + }, + { + "epoch": 0.9169067175183927, + "grad_norm": 500.10650634765625, + "learning_rate": 2.5672292085971276e-07, + "loss": 19.1851, + "step": 453900 + }, + { + "epoch": 0.9169269181510765, + "grad_norm": 606.4287719726562, + "learning_rate": 2.5661251891091087e-07, + "loss": 29.4943, + "step": 453910 + }, + { + "epoch": 0.9169471187837603, + "grad_norm": 287.93060302734375, + "learning_rate": 2.5650214008080544e-07, + "loss": 11.703, + "step": 453920 + }, + { + "epoch": 0.9169673194164442, + "grad_norm": 257.391357421875, + "learning_rate": 2.5639178436993205e-07, + "loss": 7.1054, + "step": 453930 + }, + { + "epoch": 0.916987520049128, + "grad_norm": 299.9127502441406, + "learning_rate": 2.5628145177882926e-07, + "loss": 14.3462, + "step": 453940 + }, + { + "epoch": 0.9170077206818118, + "grad_norm": 271.0744934082031, + "learning_rate": 2.561711423080365e-07, + "loss": 16.4024, + "step": 453950 + }, + { + "epoch": 0.9170279213144956, + "grad_norm": 438.03692626953125, + "learning_rate": 2.5606085595809015e-07, + "loss": 19.034, + "step": 453960 + }, + { + "epoch": 0.9170481219471794, + "grad_norm": 61.587188720703125, + "learning_rate": 2.559505927295275e-07, + "loss": 13.6311, + "step": 453970 + }, + { + "epoch": 0.9170683225798631, + "grad_norm": 294.6064758300781, + "learning_rate": 2.5584035262288585e-07, + "loss": 15.7928, + "step": 453980 + }, + { + "epoch": 0.917088523212547, + "grad_norm": 643.0828247070312, + "learning_rate": 2.557301356387043e-07, + "loss": 15.0719, + "step": 453990 + }, + { + "epoch": 0.9171087238452308, + "grad_norm": 880.5946044921875, + "learning_rate": 2.556199417775174e-07, + "loss": 20.1695, + "step": 454000 + }, + { + "epoch": 0.9171289244779146, + "grad_norm": 547.711669921875, + "learning_rate": 2.555097710398635e-07, + "loss": 16.7549, + "step": 454010 + }, + { + "epoch": 0.9171491251105984, + "grad_norm": 849.7205200195312, + "learning_rate": 2.553996234262801e-07, + "loss": 19.2328, + "step": 454020 + }, + { + "epoch": 0.9171693257432822, + "grad_norm": 251.0746307373047, + "learning_rate": 2.5528949893730393e-07, + "loss": 11.8855, + "step": 454030 + }, + { + "epoch": 0.9171895263759661, + "grad_norm": 719.6102905273438, + "learning_rate": 2.551793975734701e-07, + "loss": 24.0901, + "step": 454040 + }, + { + "epoch": 0.9172097270086499, + "grad_norm": 897.33056640625, + "learning_rate": 2.550693193353171e-07, + "loss": 19.3591, + "step": 454050 + }, + { + "epoch": 0.9172299276413337, + "grad_norm": 1040.27001953125, + "learning_rate": 2.5495926422338115e-07, + "loss": 31.5606, + "step": 454060 + }, + { + "epoch": 0.9172501282740175, + "grad_norm": 364.1861572265625, + "learning_rate": 2.548492322381968e-07, + "loss": 18.4093, + "step": 454070 + }, + { + "epoch": 0.9172703289067013, + "grad_norm": 278.04632568359375, + "learning_rate": 2.547392233803031e-07, + "loss": 10.8303, + "step": 454080 + }, + { + "epoch": 0.9172905295393852, + "grad_norm": 7105.796875, + "learning_rate": 2.5462923765023404e-07, + "loss": 99.3802, + "step": 454090 + }, + { + "epoch": 0.917310730172069, + "grad_norm": 169.97467041015625, + "learning_rate": 2.5451927504852757e-07, + "loss": 15.1717, + "step": 454100 + }, + { + "epoch": 0.9173309308047528, + "grad_norm": 299.2624206542969, + "learning_rate": 2.544093355757171e-07, + "loss": 13.147, + "step": 454110 + }, + { + "epoch": 0.9173511314374366, + "grad_norm": 228.32669067382812, + "learning_rate": 2.5429941923234114e-07, + "loss": 18.2561, + "step": 454120 + }, + { + "epoch": 0.9173713320701204, + "grad_norm": 1094.8414306640625, + "learning_rate": 2.541895260189342e-07, + "loss": 21.1897, + "step": 454130 + }, + { + "epoch": 0.9173915327028043, + "grad_norm": 386.129638671875, + "learning_rate": 2.5407965593603147e-07, + "loss": 9.8599, + "step": 454140 + }, + { + "epoch": 0.9174117333354881, + "grad_norm": 22.72983169555664, + "learning_rate": 2.539698089841691e-07, + "loss": 28.813, + "step": 454150 + }, + { + "epoch": 0.9174319339681719, + "grad_norm": 307.68890380859375, + "learning_rate": 2.538599851638818e-07, + "loss": 13.8743, + "step": 454160 + }, + { + "epoch": 0.9174521346008557, + "grad_norm": 477.8509216308594, + "learning_rate": 2.537501844757062e-07, + "loss": 11.512, + "step": 454170 + }, + { + "epoch": 0.9174723352335395, + "grad_norm": 852.4013061523438, + "learning_rate": 2.5364040692017644e-07, + "loss": 17.6096, + "step": 454180 + }, + { + "epoch": 0.9174925358662234, + "grad_norm": 0.0, + "learning_rate": 2.5353065249782647e-07, + "loss": 19.7076, + "step": 454190 + }, + { + "epoch": 0.9175127364989072, + "grad_norm": 74.62406921386719, + "learning_rate": 2.534209212091937e-07, + "loss": 19.8793, + "step": 454200 + }, + { + "epoch": 0.917532937131591, + "grad_norm": 248.11647033691406, + "learning_rate": 2.5331121305481154e-07, + "loss": 13.3752, + "step": 454210 + }, + { + "epoch": 0.9175531377642748, + "grad_norm": 529.7850952148438, + "learning_rate": 2.53201528035214e-07, + "loss": 17.367, + "step": 454220 + }, + { + "epoch": 0.9175733383969585, + "grad_norm": 640.7024536132812, + "learning_rate": 2.530918661509368e-07, + "loss": 16.6216, + "step": 454230 + }, + { + "epoch": 0.9175935390296424, + "grad_norm": 394.8306579589844, + "learning_rate": 2.529822274025151e-07, + "loss": 34.8331, + "step": 454240 + }, + { + "epoch": 0.9176137396623262, + "grad_norm": 270.83837890625, + "learning_rate": 2.5287261179048117e-07, + "loss": 14.5951, + "step": 454250 + }, + { + "epoch": 0.91763394029501, + "grad_norm": 38.40205001831055, + "learning_rate": 2.5276301931537015e-07, + "loss": 11.557, + "step": 454260 + }, + { + "epoch": 0.9176541409276938, + "grad_norm": 283.07025146484375, + "learning_rate": 2.5265344997771726e-07, + "loss": 13.8433, + "step": 454270 + }, + { + "epoch": 0.9176743415603776, + "grad_norm": 469.7500915527344, + "learning_rate": 2.525439037780558e-07, + "loss": 18.4683, + "step": 454280 + }, + { + "epoch": 0.9176945421930615, + "grad_norm": 574.3396606445312, + "learning_rate": 2.5243438071691826e-07, + "loss": 11.2677, + "step": 454290 + }, + { + "epoch": 0.9177147428257453, + "grad_norm": 244.5355682373047, + "learning_rate": 2.523248807948403e-07, + "loss": 19.2707, + "step": 454300 + }, + { + "epoch": 0.9177349434584291, + "grad_norm": 128.16741943359375, + "learning_rate": 2.522154040123559e-07, + "loss": 31.2086, + "step": 454310 + }, + { + "epoch": 0.9177551440911129, + "grad_norm": 91.43888854980469, + "learning_rate": 2.521059503699974e-07, + "loss": 50.0533, + "step": 454320 + }, + { + "epoch": 0.9177753447237967, + "grad_norm": 523.4545288085938, + "learning_rate": 2.5199651986829777e-07, + "loss": 9.7866, + "step": 454330 + }, + { + "epoch": 0.9177955453564806, + "grad_norm": 693.93310546875, + "learning_rate": 2.518871125077926e-07, + "loss": 27.2713, + "step": 454340 + }, + { + "epoch": 0.9178157459891644, + "grad_norm": 682.9830932617188, + "learning_rate": 2.5177772828901327e-07, + "loss": 17.2413, + "step": 454350 + }, + { + "epoch": 0.9178359466218482, + "grad_norm": 181.77784729003906, + "learning_rate": 2.5166836721249254e-07, + "loss": 23.461, + "step": 454360 + }, + { + "epoch": 0.917856147254532, + "grad_norm": 505.5752258300781, + "learning_rate": 2.515590292787656e-07, + "loss": 14.8428, + "step": 454370 + }, + { + "epoch": 0.9178763478872158, + "grad_norm": 173.8253173828125, + "learning_rate": 2.5144971448836263e-07, + "loss": 19.8574, + "step": 454380 + }, + { + "epoch": 0.9178965485198997, + "grad_norm": 465.9610595703125, + "learning_rate": 2.5134042284181927e-07, + "loss": 11.3948, + "step": 454390 + }, + { + "epoch": 0.9179167491525835, + "grad_norm": 168.74269104003906, + "learning_rate": 2.5123115433966615e-07, + "loss": 16.2418, + "step": 454400 + }, + { + "epoch": 0.9179369497852673, + "grad_norm": 270.9518737792969, + "learning_rate": 2.5112190898243627e-07, + "loss": 26.7706, + "step": 454410 + }, + { + "epoch": 0.9179571504179511, + "grad_norm": 157.22084045410156, + "learning_rate": 2.5101268677066247e-07, + "loss": 16.8187, + "step": 454420 + }, + { + "epoch": 0.9179773510506349, + "grad_norm": 1096.5396728515625, + "learning_rate": 2.5090348770487604e-07, + "loss": 17.5447, + "step": 454430 + }, + { + "epoch": 0.9179975516833188, + "grad_norm": 61.21339797973633, + "learning_rate": 2.50794311785611e-07, + "loss": 14.8221, + "step": 454440 + }, + { + "epoch": 0.9180177523160026, + "grad_norm": 505.9404602050781, + "learning_rate": 2.5068515901339794e-07, + "loss": 15.3988, + "step": 454450 + }, + { + "epoch": 0.9180379529486864, + "grad_norm": 299.4237365722656, + "learning_rate": 2.505760293887699e-07, + "loss": 10.9603, + "step": 454460 + }, + { + "epoch": 0.9180581535813702, + "grad_norm": 218.8555145263672, + "learning_rate": 2.5046692291225803e-07, + "loss": 5.6634, + "step": 454470 + }, + { + "epoch": 0.918078354214054, + "grad_norm": 427.2490539550781, + "learning_rate": 2.503578395843936e-07, + "loss": 15.9759, + "step": 454480 + }, + { + "epoch": 0.9180985548467377, + "grad_norm": 130.83230590820312, + "learning_rate": 2.502487794057101e-07, + "loss": 20.5652, + "step": 454490 + }, + { + "epoch": 0.9181187554794216, + "grad_norm": 522.4658203125, + "learning_rate": 2.5013974237673824e-07, + "loss": 21.8345, + "step": 454500 + }, + { + "epoch": 0.9181389561121054, + "grad_norm": 0.0, + "learning_rate": 2.50030728498008e-07, + "loss": 18.9641, + "step": 454510 + }, + { + "epoch": 0.9181591567447892, + "grad_norm": 367.7684020996094, + "learning_rate": 2.499217377700519e-07, + "loss": 19.4399, + "step": 454520 + }, + { + "epoch": 0.918179357377473, + "grad_norm": 331.11248779296875, + "learning_rate": 2.498127701934022e-07, + "loss": 13.3787, + "step": 454530 + }, + { + "epoch": 0.9181995580101568, + "grad_norm": 173.78582763671875, + "learning_rate": 2.49703825768588e-07, + "loss": 15.3256, + "step": 454540 + }, + { + "epoch": 0.9182197586428407, + "grad_norm": 425.071533203125, + "learning_rate": 2.49594904496141e-07, + "loss": 23.0658, + "step": 454550 + }, + { + "epoch": 0.9182399592755245, + "grad_norm": 405.1095275878906, + "learning_rate": 2.494860063765936e-07, + "loss": 31.5979, + "step": 454560 + }, + { + "epoch": 0.9182601599082083, + "grad_norm": 200.28211975097656, + "learning_rate": 2.493771314104743e-07, + "loss": 12.9257, + "step": 454570 + }, + { + "epoch": 0.9182803605408921, + "grad_norm": 636.622802734375, + "learning_rate": 2.492682795983148e-07, + "loss": 26.3459, + "step": 454580 + }, + { + "epoch": 0.918300561173576, + "grad_norm": 449.2371520996094, + "learning_rate": 2.4915945094064476e-07, + "loss": 21.1839, + "step": 454590 + }, + { + "epoch": 0.9183207618062598, + "grad_norm": 494.1687927246094, + "learning_rate": 2.4905064543799706e-07, + "loss": 34.0085, + "step": 454600 + }, + { + "epoch": 0.9183409624389436, + "grad_norm": 625.0794067382812, + "learning_rate": 2.4894186309089906e-07, + "loss": 28.0983, + "step": 454610 + }, + { + "epoch": 0.9183611630716274, + "grad_norm": 321.6091613769531, + "learning_rate": 2.48833103899882e-07, + "loss": 10.3564, + "step": 454620 + }, + { + "epoch": 0.9183813637043112, + "grad_norm": 101.4336166381836, + "learning_rate": 2.487243678654772e-07, + "loss": 24.6452, + "step": 454630 + }, + { + "epoch": 0.918401564336995, + "grad_norm": 319.6427001953125, + "learning_rate": 2.486156549882135e-07, + "loss": 24.801, + "step": 454640 + }, + { + "epoch": 0.9184217649696789, + "grad_norm": 511.2293701171875, + "learning_rate": 2.485069652686195e-07, + "loss": 21.2323, + "step": 454650 + }, + { + "epoch": 0.9184419656023627, + "grad_norm": 1020.9444580078125, + "learning_rate": 2.4839829870722756e-07, + "loss": 20.0905, + "step": 454660 + }, + { + "epoch": 0.9184621662350465, + "grad_norm": 81.75325012207031, + "learning_rate": 2.482896553045661e-07, + "loss": 14.3172, + "step": 454670 + }, + { + "epoch": 0.9184823668677303, + "grad_norm": 431.46844482421875, + "learning_rate": 2.4818103506116355e-07, + "loss": 19.0111, + "step": 454680 + }, + { + "epoch": 0.9185025675004141, + "grad_norm": 302.5388488769531, + "learning_rate": 2.4807243797755064e-07, + "loss": 17.3512, + "step": 454690 + }, + { + "epoch": 0.918522768133098, + "grad_norm": 289.9342041015625, + "learning_rate": 2.479638640542564e-07, + "loss": 21.5833, + "step": 454700 + }, + { + "epoch": 0.9185429687657818, + "grad_norm": 292.45037841796875, + "learning_rate": 2.478553132918099e-07, + "loss": 15.3687, + "step": 454710 + }, + { + "epoch": 0.9185631693984656, + "grad_norm": 312.3753356933594, + "learning_rate": 2.477467856907401e-07, + "loss": 21.3034, + "step": 454720 + }, + { + "epoch": 0.9185833700311494, + "grad_norm": 407.63671875, + "learning_rate": 2.4763828125157654e-07, + "loss": 21.7776, + "step": 454730 + }, + { + "epoch": 0.9186035706638332, + "grad_norm": 203.3709716796875, + "learning_rate": 2.4752979997484774e-07, + "loss": 27.606, + "step": 454740 + }, + { + "epoch": 0.918623771296517, + "grad_norm": 219.9977264404297, + "learning_rate": 2.474213418610816e-07, + "loss": 10.9427, + "step": 454750 + }, + { + "epoch": 0.9186439719292008, + "grad_norm": 118.15936279296875, + "learning_rate": 2.4731290691080766e-07, + "loss": 14.6467, + "step": 454760 + }, + { + "epoch": 0.9186641725618846, + "grad_norm": 804.6563110351562, + "learning_rate": 2.472044951245539e-07, + "loss": 14.8232, + "step": 454770 + }, + { + "epoch": 0.9186843731945684, + "grad_norm": 118.620849609375, + "learning_rate": 2.470961065028499e-07, + "loss": 8.4102, + "step": 454780 + }, + { + "epoch": 0.9187045738272522, + "grad_norm": 88.24150085449219, + "learning_rate": 2.4698774104622235e-07, + "loss": 16.5455, + "step": 454790 + }, + { + "epoch": 0.918724774459936, + "grad_norm": 339.7093200683594, + "learning_rate": 2.4687939875519984e-07, + "loss": 12.3436, + "step": 454800 + }, + { + "epoch": 0.9187449750926199, + "grad_norm": 1048.246826171875, + "learning_rate": 2.4677107963031134e-07, + "loss": 22.8728, + "step": 454810 + }, + { + "epoch": 0.9187651757253037, + "grad_norm": 0.4774147868156433, + "learning_rate": 2.4666278367208417e-07, + "loss": 9.4854, + "step": 454820 + }, + { + "epoch": 0.9187853763579875, + "grad_norm": 353.9394226074219, + "learning_rate": 2.465545108810452e-07, + "loss": 12.8216, + "step": 454830 + }, + { + "epoch": 0.9188055769906713, + "grad_norm": 17.323734283447266, + "learning_rate": 2.464462612577234e-07, + "loss": 20.1536, + "step": 454840 + }, + { + "epoch": 0.9188257776233552, + "grad_norm": 464.7160339355469, + "learning_rate": 2.463380348026467e-07, + "loss": 19.4973, + "step": 454850 + }, + { + "epoch": 0.918845978256039, + "grad_norm": 355.2198486328125, + "learning_rate": 2.4622983151634083e-07, + "loss": 22.1437, + "step": 454860 + }, + { + "epoch": 0.9188661788887228, + "grad_norm": 98.9280776977539, + "learning_rate": 2.461216513993342e-07, + "loss": 7.2628, + "step": 454870 + }, + { + "epoch": 0.9188863795214066, + "grad_norm": 291.2118225097656, + "learning_rate": 2.460134944521547e-07, + "loss": 15.7866, + "step": 454880 + }, + { + "epoch": 0.9189065801540904, + "grad_norm": 68.61520385742188, + "learning_rate": 2.459053606753292e-07, + "loss": 27.5811, + "step": 454890 + }, + { + "epoch": 0.9189267807867743, + "grad_norm": 555.388671875, + "learning_rate": 2.457972500693834e-07, + "loss": 27.5479, + "step": 454900 + }, + { + "epoch": 0.9189469814194581, + "grad_norm": 220.7833251953125, + "learning_rate": 2.456891626348451e-07, + "loss": 11.7957, + "step": 454910 + }, + { + "epoch": 0.9189671820521419, + "grad_norm": 337.0918884277344, + "learning_rate": 2.455810983722429e-07, + "loss": 21.3151, + "step": 454920 + }, + { + "epoch": 0.9189873826848257, + "grad_norm": 540.9188842773438, + "learning_rate": 2.4547305728210015e-07, + "loss": 19.5621, + "step": 454930 + }, + { + "epoch": 0.9190075833175095, + "grad_norm": 21.81239891052246, + "learning_rate": 2.453650393649448e-07, + "loss": 19.4124, + "step": 454940 + }, + { + "epoch": 0.9190277839501934, + "grad_norm": 145.65936279296875, + "learning_rate": 2.4525704462130485e-07, + "loss": 10.6751, + "step": 454950 + }, + { + "epoch": 0.9190479845828772, + "grad_norm": 368.0899658203125, + "learning_rate": 2.4514907305170476e-07, + "loss": 13.6209, + "step": 454960 + }, + { + "epoch": 0.919068185215561, + "grad_norm": 228.02255249023438, + "learning_rate": 2.4504112465667085e-07, + "loss": 15.9716, + "step": 454970 + }, + { + "epoch": 0.9190883858482448, + "grad_norm": 207.96987915039062, + "learning_rate": 2.44933199436731e-07, + "loss": 9.5194, + "step": 454980 + }, + { + "epoch": 0.9191085864809286, + "grad_norm": 123.552490234375, + "learning_rate": 2.448252973924087e-07, + "loss": 18.464, + "step": 454990 + }, + { + "epoch": 0.9191287871136123, + "grad_norm": 89.00115203857422, + "learning_rate": 2.447174185242324e-07, + "loss": 16.1351, + "step": 455000 + }, + { + "epoch": 0.9191489877462962, + "grad_norm": 24.298992156982422, + "learning_rate": 2.446095628327261e-07, + "loss": 20.8026, + "step": 455010 + }, + { + "epoch": 0.91916918837898, + "grad_norm": 263.0375671386719, + "learning_rate": 2.4450173031841607e-07, + "loss": 18.0441, + "step": 455020 + }, + { + "epoch": 0.9191893890116638, + "grad_norm": 294.327392578125, + "learning_rate": 2.4439392098182804e-07, + "loss": 19.281, + "step": 455030 + }, + { + "epoch": 0.9192095896443476, + "grad_norm": 4.164112091064453, + "learning_rate": 2.442861348234865e-07, + "loss": 20.7177, + "step": 455040 + }, + { + "epoch": 0.9192297902770314, + "grad_norm": 289.1728210449219, + "learning_rate": 2.4417837184391833e-07, + "loss": 18.8782, + "step": 455050 + }, + { + "epoch": 0.9192499909097153, + "grad_norm": 171.1140899658203, + "learning_rate": 2.4407063204364703e-07, + "loss": 16.9614, + "step": 455060 + }, + { + "epoch": 0.9192701915423991, + "grad_norm": 142.26199340820312, + "learning_rate": 2.4396291542319985e-07, + "loss": 9.5314, + "step": 455070 + }, + { + "epoch": 0.9192903921750829, + "grad_norm": 285.46533203125, + "learning_rate": 2.438552219831003e-07, + "loss": 19.4052, + "step": 455080 + }, + { + "epoch": 0.9193105928077667, + "grad_norm": 98.38275909423828, + "learning_rate": 2.43747551723873e-07, + "loss": 13.5849, + "step": 455090 + }, + { + "epoch": 0.9193307934404505, + "grad_norm": 71.43067169189453, + "learning_rate": 2.4363990464604357e-07, + "loss": 13.79, + "step": 455100 + }, + { + "epoch": 0.9193509940731344, + "grad_norm": 370.7507019042969, + "learning_rate": 2.435322807501367e-07, + "loss": 11.0951, + "step": 455110 + }, + { + "epoch": 0.9193711947058182, + "grad_norm": 230.6715850830078, + "learning_rate": 2.4342468003667576e-07, + "loss": 13.7054, + "step": 455120 + }, + { + "epoch": 0.919391395338502, + "grad_norm": 126.7361068725586, + "learning_rate": 2.4331710250618647e-07, + "loss": 13.2743, + "step": 455130 + }, + { + "epoch": 0.9194115959711858, + "grad_norm": 314.3231201171875, + "learning_rate": 2.432095481591934e-07, + "loss": 15.3865, + "step": 455140 + }, + { + "epoch": 0.9194317966038696, + "grad_norm": 329.8556213378906, + "learning_rate": 2.4310201699621896e-07, + "loss": 6.9519, + "step": 455150 + }, + { + "epoch": 0.9194519972365535, + "grad_norm": 1005.0066528320312, + "learning_rate": 2.429945090177888e-07, + "loss": 16.0441, + "step": 455160 + }, + { + "epoch": 0.9194721978692373, + "grad_norm": 642.8953247070312, + "learning_rate": 2.4288702422442633e-07, + "loss": 21.0964, + "step": 455170 + }, + { + "epoch": 0.9194923985019211, + "grad_norm": 365.2342224121094, + "learning_rate": 2.4277956261665624e-07, + "loss": 23.2442, + "step": 455180 + }, + { + "epoch": 0.9195125991346049, + "grad_norm": 618.9346923828125, + "learning_rate": 2.426721241950003e-07, + "loss": 20.1788, + "step": 455190 + }, + { + "epoch": 0.9195327997672887, + "grad_norm": 18.485822677612305, + "learning_rate": 2.4256470895998363e-07, + "loss": 10.9366, + "step": 455200 + }, + { + "epoch": 0.9195530003999726, + "grad_norm": 779.13916015625, + "learning_rate": 2.4245731691213137e-07, + "loss": 21.7145, + "step": 455210 + }, + { + "epoch": 0.9195732010326564, + "grad_norm": 243.8010711669922, + "learning_rate": 2.423499480519631e-07, + "loss": 12.4242, + "step": 455220 + }, + { + "epoch": 0.9195934016653402, + "grad_norm": 560.136474609375, + "learning_rate": 2.4224260238000454e-07, + "loss": 17.1233, + "step": 455230 + }, + { + "epoch": 0.919613602298024, + "grad_norm": 554.8250122070312, + "learning_rate": 2.421352798967791e-07, + "loss": 21.4487, + "step": 455240 + }, + { + "epoch": 0.9196338029307078, + "grad_norm": 26.497678756713867, + "learning_rate": 2.420279806028092e-07, + "loss": 16.912, + "step": 455250 + }, + { + "epoch": 0.9196540035633916, + "grad_norm": 335.10186767578125, + "learning_rate": 2.4192070449861717e-07, + "loss": 17.4943, + "step": 455260 + }, + { + "epoch": 0.9196742041960754, + "grad_norm": 429.57232666015625, + "learning_rate": 2.418134515847276e-07, + "loss": 16.5135, + "step": 455270 + }, + { + "epoch": 0.9196944048287592, + "grad_norm": 488.39642333984375, + "learning_rate": 2.417062218616617e-07, + "loss": 18.5194, + "step": 455280 + }, + { + "epoch": 0.919714605461443, + "grad_norm": 113.37300872802734, + "learning_rate": 2.415990153299419e-07, + "loss": 9.4051, + "step": 455290 + }, + { + "epoch": 0.9197348060941268, + "grad_norm": 277.68145751953125, + "learning_rate": 2.414918319900922e-07, + "loss": 15.3822, + "step": 455300 + }, + { + "epoch": 0.9197550067268107, + "grad_norm": 112.81552124023438, + "learning_rate": 2.413846718426338e-07, + "loss": 8.4589, + "step": 455310 + }, + { + "epoch": 0.9197752073594945, + "grad_norm": 36.12300491333008, + "learning_rate": 2.412775348880897e-07, + "loss": 11.6502, + "step": 455320 + }, + { + "epoch": 0.9197954079921783, + "grad_norm": 292.3298034667969, + "learning_rate": 2.4117042112698107e-07, + "loss": 22.0428, + "step": 455330 + }, + { + "epoch": 0.9198156086248621, + "grad_norm": 442.1577453613281, + "learning_rate": 2.410633305598309e-07, + "loss": 10.4833, + "step": 455340 + }, + { + "epoch": 0.9198358092575459, + "grad_norm": 704.8218383789062, + "learning_rate": 2.4095626318716146e-07, + "loss": 17.864, + "step": 455350 + }, + { + "epoch": 0.9198560098902298, + "grad_norm": 6.83566951751709, + "learning_rate": 2.40849219009493e-07, + "loss": 16.1764, + "step": 455360 + }, + { + "epoch": 0.9198762105229136, + "grad_norm": 534.9774780273438, + "learning_rate": 2.407421980273489e-07, + "loss": 15.982, + "step": 455370 + }, + { + "epoch": 0.9198964111555974, + "grad_norm": 246.0690460205078, + "learning_rate": 2.406352002412499e-07, + "loss": 12.9856, + "step": 455380 + }, + { + "epoch": 0.9199166117882812, + "grad_norm": 541.630126953125, + "learning_rate": 2.4052822565171775e-07, + "loss": 35.701, + "step": 455390 + }, + { + "epoch": 0.919936812420965, + "grad_norm": 363.7542419433594, + "learning_rate": 2.404212742592743e-07, + "loss": 11.3771, + "step": 455400 + }, + { + "epoch": 0.9199570130536489, + "grad_norm": 228.14138793945312, + "learning_rate": 2.4031434606443914e-07, + "loss": 18.2189, + "step": 455410 + }, + { + "epoch": 0.9199772136863327, + "grad_norm": 1217.557861328125, + "learning_rate": 2.4020744106773573e-07, + "loss": 16.9061, + "step": 455420 + }, + { + "epoch": 0.9199974143190165, + "grad_norm": 165.05870056152344, + "learning_rate": 2.401005592696837e-07, + "loss": 5.6709, + "step": 455430 + }, + { + "epoch": 0.9200176149517003, + "grad_norm": 351.95660400390625, + "learning_rate": 2.399937006708036e-07, + "loss": 15.8542, + "step": 455440 + }, + { + "epoch": 0.9200378155843841, + "grad_norm": 201.84127807617188, + "learning_rate": 2.3988686527161686e-07, + "loss": 10.515, + "step": 455450 + }, + { + "epoch": 0.920058016217068, + "grad_norm": 519.5714111328125, + "learning_rate": 2.3978005307264517e-07, + "loss": 15.5738, + "step": 455460 + }, + { + "epoch": 0.9200782168497518, + "grad_norm": 326.5455627441406, + "learning_rate": 2.396732640744076e-07, + "loss": 16.2957, + "step": 455470 + }, + { + "epoch": 0.9200984174824356, + "grad_norm": 346.3192443847656, + "learning_rate": 2.395664982774243e-07, + "loss": 10.8901, + "step": 455480 + }, + { + "epoch": 0.9201186181151194, + "grad_norm": 306.8151550292969, + "learning_rate": 2.3945975568221814e-07, + "loss": 12.2765, + "step": 455490 + }, + { + "epoch": 0.9201388187478032, + "grad_norm": 392.6286315917969, + "learning_rate": 2.3935303628930705e-07, + "loss": 19.6382, + "step": 455500 + }, + { + "epoch": 0.9201590193804869, + "grad_norm": 130.42257690429688, + "learning_rate": 2.392463400992112e-07, + "loss": 26.4354, + "step": 455510 + }, + { + "epoch": 0.9201792200131708, + "grad_norm": 803.9397583007812, + "learning_rate": 2.3913966711245185e-07, + "loss": 24.5252, + "step": 455520 + }, + { + "epoch": 0.9201994206458546, + "grad_norm": 530.8009033203125, + "learning_rate": 2.390330173295491e-07, + "loss": 17.6739, + "step": 455530 + }, + { + "epoch": 0.9202196212785384, + "grad_norm": 526.763671875, + "learning_rate": 2.389263907510209e-07, + "loss": 21.4294, + "step": 455540 + }, + { + "epoch": 0.9202398219112222, + "grad_norm": 629.4849853515625, + "learning_rate": 2.388197873773879e-07, + "loss": 19.7043, + "step": 455550 + }, + { + "epoch": 0.920260022543906, + "grad_norm": 201.6699981689453, + "learning_rate": 2.387132072091708e-07, + "loss": 17.9511, + "step": 455560 + }, + { + "epoch": 0.9202802231765899, + "grad_norm": 579.7809448242188, + "learning_rate": 2.3860665024688757e-07, + "loss": 18.4522, + "step": 455570 + }, + { + "epoch": 0.9203004238092737, + "grad_norm": 169.97409057617188, + "learning_rate": 2.3850011649105774e-07, + "loss": 16.2272, + "step": 455580 + }, + { + "epoch": 0.9203206244419575, + "grad_norm": 380.0137023925781, + "learning_rate": 2.3839360594220094e-07, + "loss": 8.5757, + "step": 455590 + }, + { + "epoch": 0.9203408250746413, + "grad_norm": 528.6257934570312, + "learning_rate": 2.3828711860083676e-07, + "loss": 11.0365, + "step": 455600 + }, + { + "epoch": 0.9203610257073251, + "grad_norm": 328.374755859375, + "learning_rate": 2.3818065446748306e-07, + "loss": 15.4559, + "step": 455610 + }, + { + "epoch": 0.920381226340009, + "grad_norm": 558.6610717773438, + "learning_rate": 2.380742135426589e-07, + "loss": 28.1747, + "step": 455620 + }, + { + "epoch": 0.9204014269726928, + "grad_norm": 217.7898712158203, + "learning_rate": 2.3796779582688444e-07, + "loss": 28.7116, + "step": 455630 + }, + { + "epoch": 0.9204216276053766, + "grad_norm": 835.9548950195312, + "learning_rate": 2.3786140132067703e-07, + "loss": 14.7242, + "step": 455640 + }, + { + "epoch": 0.9204418282380604, + "grad_norm": 386.291748046875, + "learning_rate": 2.3775503002455514e-07, + "loss": 17.8009, + "step": 455650 + }, + { + "epoch": 0.9204620288707442, + "grad_norm": 310.6231384277344, + "learning_rate": 2.3764868193903835e-07, + "loss": 36.5693, + "step": 455660 + }, + { + "epoch": 0.9204822295034281, + "grad_norm": 138.1139373779297, + "learning_rate": 2.3754235706464457e-07, + "loss": 14.5568, + "step": 455670 + }, + { + "epoch": 0.9205024301361119, + "grad_norm": 622.2490844726562, + "learning_rate": 2.3743605540189063e-07, + "loss": 23.2112, + "step": 455680 + }, + { + "epoch": 0.9205226307687957, + "grad_norm": 221.71322631835938, + "learning_rate": 2.3732977695129612e-07, + "loss": 17.7805, + "step": 455690 + }, + { + "epoch": 0.9205428314014795, + "grad_norm": 127.730712890625, + "learning_rate": 2.3722352171337836e-07, + "loss": 22.0893, + "step": 455700 + }, + { + "epoch": 0.9205630320341633, + "grad_norm": 697.8600463867188, + "learning_rate": 2.3711728968865643e-07, + "loss": 21.791, + "step": 455710 + }, + { + "epoch": 0.9205832326668472, + "grad_norm": 485.99853515625, + "learning_rate": 2.3701108087764657e-07, + "loss": 9.2866, + "step": 455720 + }, + { + "epoch": 0.920603433299531, + "grad_norm": 415.9108581542969, + "learning_rate": 2.3690489528086668e-07, + "loss": 10.3244, + "step": 455730 + }, + { + "epoch": 0.9206236339322148, + "grad_norm": 396.4830322265625, + "learning_rate": 2.367987328988347e-07, + "loss": 12.8008, + "step": 455740 + }, + { + "epoch": 0.9206438345648986, + "grad_norm": 450.2491760253906, + "learning_rate": 2.366925937320691e-07, + "loss": 23.0683, + "step": 455750 + }, + { + "epoch": 0.9206640351975824, + "grad_norm": 267.2088623046875, + "learning_rate": 2.36586477781085e-07, + "loss": 25.6832, + "step": 455760 + }, + { + "epoch": 0.9206842358302662, + "grad_norm": 849.0910034179688, + "learning_rate": 2.3648038504640036e-07, + "loss": 15.9315, + "step": 455770 + }, + { + "epoch": 0.92070443646295, + "grad_norm": 1106.893798828125, + "learning_rate": 2.3637431552853363e-07, + "loss": 17.4516, + "step": 455780 + }, + { + "epoch": 0.9207246370956338, + "grad_norm": 865.7428588867188, + "learning_rate": 2.362682692280005e-07, + "loss": 26.9862, + "step": 455790 + }, + { + "epoch": 0.9207448377283176, + "grad_norm": 594.6658325195312, + "learning_rate": 2.361622461453178e-07, + "loss": 19.4022, + "step": 455800 + }, + { + "epoch": 0.9207650383610014, + "grad_norm": 248.6200714111328, + "learning_rate": 2.3605624628100178e-07, + "loss": 15.1621, + "step": 455810 + }, + { + "epoch": 0.9207852389936853, + "grad_norm": 509.28375244140625, + "learning_rate": 2.3595026963557145e-07, + "loss": 18.1621, + "step": 455820 + }, + { + "epoch": 0.9208054396263691, + "grad_norm": 231.5879669189453, + "learning_rate": 2.3584431620954085e-07, + "loss": 17.4094, + "step": 455830 + }, + { + "epoch": 0.9208256402590529, + "grad_norm": 151.7647705078125, + "learning_rate": 2.357383860034268e-07, + "loss": 16.7027, + "step": 455840 + }, + { + "epoch": 0.9208458408917367, + "grad_norm": 314.00775146484375, + "learning_rate": 2.3563247901774666e-07, + "loss": 11.1552, + "step": 455850 + }, + { + "epoch": 0.9208660415244205, + "grad_norm": 340.83489990234375, + "learning_rate": 2.3552659525301557e-07, + "loss": 13.9794, + "step": 455860 + }, + { + "epoch": 0.9208862421571044, + "grad_norm": 376.7004089355469, + "learning_rate": 2.354207347097498e-07, + "loss": 19.8883, + "step": 455870 + }, + { + "epoch": 0.9209064427897882, + "grad_norm": 568.572265625, + "learning_rate": 2.3531489738846613e-07, + "loss": 28.6371, + "step": 455880 + }, + { + "epoch": 0.920926643422472, + "grad_norm": 324.4543762207031, + "learning_rate": 2.3520908328968027e-07, + "loss": 21.9086, + "step": 455890 + }, + { + "epoch": 0.9209468440551558, + "grad_norm": 622.8432006835938, + "learning_rate": 2.351032924139063e-07, + "loss": 17.3053, + "step": 455900 + }, + { + "epoch": 0.9209670446878396, + "grad_norm": 20.084917068481445, + "learning_rate": 2.349975247616615e-07, + "loss": 22.7659, + "step": 455910 + }, + { + "epoch": 0.9209872453205235, + "grad_norm": 2.691534996032715, + "learning_rate": 2.3489178033345994e-07, + "loss": 14.0358, + "step": 455920 + }, + { + "epoch": 0.9210074459532073, + "grad_norm": 561.95361328125, + "learning_rate": 2.34786059129819e-07, + "loss": 15.8221, + "step": 455930 + }, + { + "epoch": 0.9210276465858911, + "grad_norm": 415.3448791503906, + "learning_rate": 2.3468036115125215e-07, + "loss": 15.3055, + "step": 455940 + }, + { + "epoch": 0.9210478472185749, + "grad_norm": 369.7100830078125, + "learning_rate": 2.3457468639827563e-07, + "loss": 22.8397, + "step": 455950 + }, + { + "epoch": 0.9210680478512587, + "grad_norm": 155.63955688476562, + "learning_rate": 2.344690348714046e-07, + "loss": 14.0282, + "step": 455960 + }, + { + "epoch": 0.9210882484839426, + "grad_norm": 211.77499389648438, + "learning_rate": 2.3436340657115253e-07, + "loss": 25.2555, + "step": 455970 + }, + { + "epoch": 0.9211084491166264, + "grad_norm": 341.98150634765625, + "learning_rate": 2.3425780149803623e-07, + "loss": 8.1026, + "step": 455980 + }, + { + "epoch": 0.9211286497493102, + "grad_norm": 704.5994873046875, + "learning_rate": 2.3415221965256807e-07, + "loss": 26.6054, + "step": 455990 + }, + { + "epoch": 0.921148850381994, + "grad_norm": 0.6574216485023499, + "learning_rate": 2.3404666103526542e-07, + "loss": 18.6659, + "step": 456000 + }, + { + "epoch": 0.9211690510146778, + "grad_norm": 143.26748657226562, + "learning_rate": 2.3394112564664062e-07, + "loss": 15.0257, + "step": 456010 + }, + { + "epoch": 0.9211892516473615, + "grad_norm": 477.011474609375, + "learning_rate": 2.338356134872083e-07, + "loss": 18.866, + "step": 456020 + }, + { + "epoch": 0.9212094522800454, + "grad_norm": 669.2011108398438, + "learning_rate": 2.3373012455748356e-07, + "loss": 23.3207, + "step": 456030 + }, + { + "epoch": 0.9212296529127292, + "grad_norm": 387.2029724121094, + "learning_rate": 2.3362465885798046e-07, + "loss": 19.2357, + "step": 456040 + }, + { + "epoch": 0.921249853545413, + "grad_norm": 1630.5396728515625, + "learning_rate": 2.3351921638921193e-07, + "loss": 23.0036, + "step": 456050 + }, + { + "epoch": 0.9212700541780968, + "grad_norm": 55.501487731933594, + "learning_rate": 2.3341379715169254e-07, + "loss": 8.9039, + "step": 456060 + }, + { + "epoch": 0.9212902548107806, + "grad_norm": 258.55023193359375, + "learning_rate": 2.33308401145938e-07, + "loss": 8.9358, + "step": 456070 + }, + { + "epoch": 0.9213104554434645, + "grad_norm": 803.1054077148438, + "learning_rate": 2.3320302837245846e-07, + "loss": 20.5455, + "step": 456080 + }, + { + "epoch": 0.9213306560761483, + "grad_norm": 273.6131286621094, + "learning_rate": 2.3309767883176903e-07, + "loss": 28.0867, + "step": 456090 + }, + { + "epoch": 0.9213508567088321, + "grad_norm": 248.03652954101562, + "learning_rate": 2.3299235252438434e-07, + "loss": 30.396, + "step": 456100 + }, + { + "epoch": 0.9213710573415159, + "grad_norm": 518.1532592773438, + "learning_rate": 2.3288704945081675e-07, + "loss": 24.2605, + "step": 456110 + }, + { + "epoch": 0.9213912579741997, + "grad_norm": 412.0128479003906, + "learning_rate": 2.327817696115786e-07, + "loss": 23.6396, + "step": 456120 + }, + { + "epoch": 0.9214114586068836, + "grad_norm": 296.6305236816406, + "learning_rate": 2.3267651300718397e-07, + "loss": 11.4003, + "step": 456130 + }, + { + "epoch": 0.9214316592395674, + "grad_norm": 601.3193359375, + "learning_rate": 2.325712796381474e-07, + "loss": 34.8444, + "step": 456140 + }, + { + "epoch": 0.9214518598722512, + "grad_norm": 1319.228515625, + "learning_rate": 2.3246606950497851e-07, + "loss": 17.4915, + "step": 456150 + }, + { + "epoch": 0.921472060504935, + "grad_norm": 450.4251708984375, + "learning_rate": 2.3236088260819188e-07, + "loss": 19.1523, + "step": 456160 + }, + { + "epoch": 0.9214922611376188, + "grad_norm": 232.65257263183594, + "learning_rate": 2.3225571894830047e-07, + "loss": 32.4943, + "step": 456170 + }, + { + "epoch": 0.9215124617703027, + "grad_norm": 534.1532592773438, + "learning_rate": 2.3215057852581712e-07, + "loss": 22.4241, + "step": 456180 + }, + { + "epoch": 0.9215326624029865, + "grad_norm": 448.429443359375, + "learning_rate": 2.3204546134125207e-07, + "loss": 13.9421, + "step": 456190 + }, + { + "epoch": 0.9215528630356703, + "grad_norm": 756.3300170898438, + "learning_rate": 2.319403673951204e-07, + "loss": 30.2919, + "step": 456200 + }, + { + "epoch": 0.9215730636683541, + "grad_norm": 176.592041015625, + "learning_rate": 2.3183529668793282e-07, + "loss": 16.056, + "step": 456210 + }, + { + "epoch": 0.9215932643010379, + "grad_norm": 563.3243408203125, + "learning_rate": 2.3173024922020114e-07, + "loss": 27.7949, + "step": 456220 + }, + { + "epoch": 0.9216134649337218, + "grad_norm": 545.1439819335938, + "learning_rate": 2.3162522499243833e-07, + "loss": 18.0718, + "step": 456230 + }, + { + "epoch": 0.9216336655664056, + "grad_norm": 2.2020576000213623, + "learning_rate": 2.3152022400515561e-07, + "loss": 9.7643, + "step": 456240 + }, + { + "epoch": 0.9216538661990894, + "grad_norm": 372.8847351074219, + "learning_rate": 2.314152462588659e-07, + "loss": 14.4089, + "step": 456250 + }, + { + "epoch": 0.9216740668317732, + "grad_norm": 359.3384094238281, + "learning_rate": 2.3131029175407883e-07, + "loss": 16.9152, + "step": 456260 + }, + { + "epoch": 0.921694267464457, + "grad_norm": 105.58753204345703, + "learning_rate": 2.3120536049130727e-07, + "loss": 15.5293, + "step": 456270 + }, + { + "epoch": 0.9217144680971407, + "grad_norm": 756.6207885742188, + "learning_rate": 2.3110045247106305e-07, + "loss": 17.3203, + "step": 456280 + }, + { + "epoch": 0.9217346687298246, + "grad_norm": 200.3162384033203, + "learning_rate": 2.3099556769385578e-07, + "loss": 20.6643, + "step": 456290 + }, + { + "epoch": 0.9217548693625084, + "grad_norm": 505.3739318847656, + "learning_rate": 2.3089070616019838e-07, + "loss": 26.1185, + "step": 456300 + }, + { + "epoch": 0.9217750699951922, + "grad_norm": 161.13502502441406, + "learning_rate": 2.3078586787060098e-07, + "loss": 18.3574, + "step": 456310 + }, + { + "epoch": 0.921795270627876, + "grad_norm": 254.5853729248047, + "learning_rate": 2.306810528255754e-07, + "loss": 20.247, + "step": 456320 + }, + { + "epoch": 0.9218154712605598, + "grad_norm": 281.9202575683594, + "learning_rate": 2.3057626102563125e-07, + "loss": 18.4267, + "step": 456330 + }, + { + "epoch": 0.9218356718932437, + "grad_norm": 411.31146240234375, + "learning_rate": 2.3047149247127975e-07, + "loss": 19.3035, + "step": 456340 + }, + { + "epoch": 0.9218558725259275, + "grad_norm": 285.017822265625, + "learning_rate": 2.3036674716303277e-07, + "loss": 11.5934, + "step": 456350 + }, + { + "epoch": 0.9218760731586113, + "grad_norm": 234.5925750732422, + "learning_rate": 2.3026202510139928e-07, + "loss": 26.1673, + "step": 456360 + }, + { + "epoch": 0.9218962737912951, + "grad_norm": 180.01597595214844, + "learning_rate": 2.3015732628688948e-07, + "loss": 20.587, + "step": 456370 + }, + { + "epoch": 0.921916474423979, + "grad_norm": 229.5282745361328, + "learning_rate": 2.300526507200146e-07, + "loss": 20.5006, + "step": 456380 + }, + { + "epoch": 0.9219366750566628, + "grad_norm": 141.17848205566406, + "learning_rate": 2.2994799840128533e-07, + "loss": 10.3158, + "step": 456390 + }, + { + "epoch": 0.9219568756893466, + "grad_norm": 196.68496704101562, + "learning_rate": 2.2984336933121076e-07, + "loss": 19.1464, + "step": 456400 + }, + { + "epoch": 0.9219770763220304, + "grad_norm": 318.13580322265625, + "learning_rate": 2.2973876351030046e-07, + "loss": 20.5453, + "step": 456410 + }, + { + "epoch": 0.9219972769547142, + "grad_norm": 117.87385559082031, + "learning_rate": 2.2963418093906453e-07, + "loss": 13.3332, + "step": 456420 + }, + { + "epoch": 0.922017477587398, + "grad_norm": 398.493896484375, + "learning_rate": 2.2952962161801485e-07, + "loss": 20.5647, + "step": 456430 + }, + { + "epoch": 0.9220376782200819, + "grad_norm": 551.0594482421875, + "learning_rate": 2.2942508554765764e-07, + "loss": 27.3807, + "step": 456440 + }, + { + "epoch": 0.9220578788527657, + "grad_norm": 418.9258117675781, + "learning_rate": 2.2932057272850416e-07, + "loss": 22.1432, + "step": 456450 + }, + { + "epoch": 0.9220780794854495, + "grad_norm": 318.01776123046875, + "learning_rate": 2.2921608316106402e-07, + "loss": 13.1443, + "step": 456460 + }, + { + "epoch": 0.9220982801181333, + "grad_norm": 500.9718322753906, + "learning_rate": 2.2911161684584626e-07, + "loss": 18.2248, + "step": 456470 + }, + { + "epoch": 0.9221184807508171, + "grad_norm": 678.5086059570312, + "learning_rate": 2.290071737833588e-07, + "loss": 12.8109, + "step": 456480 + }, + { + "epoch": 0.922138681383501, + "grad_norm": 999.7238159179688, + "learning_rate": 2.2890275397411288e-07, + "loss": 27.8825, + "step": 456490 + }, + { + "epoch": 0.9221588820161848, + "grad_norm": 319.5143127441406, + "learning_rate": 2.287983574186159e-07, + "loss": 17.5575, + "step": 456500 + }, + { + "epoch": 0.9221790826488686, + "grad_norm": 279.6608581542969, + "learning_rate": 2.2869398411737687e-07, + "loss": 8.8441, + "step": 456510 + }, + { + "epoch": 0.9221992832815524, + "grad_norm": 411.6119079589844, + "learning_rate": 2.2858963407090484e-07, + "loss": 16.573, + "step": 456520 + }, + { + "epoch": 0.9222194839142362, + "grad_norm": 264.779541015625, + "learning_rate": 2.2848530727970775e-07, + "loss": 13.1386, + "step": 456530 + }, + { + "epoch": 0.92223968454692, + "grad_norm": 456.6824035644531, + "learning_rate": 2.2838100374429518e-07, + "loss": 26.1654, + "step": 456540 + }, + { + "epoch": 0.9222598851796038, + "grad_norm": 541.8452758789062, + "learning_rate": 2.2827672346517448e-07, + "loss": 19.0031, + "step": 456550 + }, + { + "epoch": 0.9222800858122876, + "grad_norm": 209.32427978515625, + "learning_rate": 2.2817246644285472e-07, + "loss": 13.9763, + "step": 456560 + }, + { + "epoch": 0.9223002864449714, + "grad_norm": 547.7611083984375, + "learning_rate": 2.2806823267784327e-07, + "loss": 13.4791, + "step": 456570 + }, + { + "epoch": 0.9223204870776552, + "grad_norm": 513.3222045898438, + "learning_rate": 2.2796402217064806e-07, + "loss": 43.8032, + "step": 456580 + }, + { + "epoch": 0.9223406877103391, + "grad_norm": 145.3455810546875, + "learning_rate": 2.2785983492177867e-07, + "loss": 35.2751, + "step": 456590 + }, + { + "epoch": 0.9223608883430229, + "grad_norm": 489.8966064453125, + "learning_rate": 2.2775567093174022e-07, + "loss": 38.8586, + "step": 456600 + }, + { + "epoch": 0.9223810889757067, + "grad_norm": 584.9359130859375, + "learning_rate": 2.2765153020104292e-07, + "loss": 16.7876, + "step": 456610 + }, + { + "epoch": 0.9224012896083905, + "grad_norm": 157.879638671875, + "learning_rate": 2.27547412730193e-07, + "loss": 22.6318, + "step": 456620 + }, + { + "epoch": 0.9224214902410743, + "grad_norm": 298.29937744140625, + "learning_rate": 2.274433185196978e-07, + "loss": 26.4337, + "step": 456630 + }, + { + "epoch": 0.9224416908737582, + "grad_norm": 90.5885238647461, + "learning_rate": 2.2733924757006531e-07, + "loss": 16.6558, + "step": 456640 + }, + { + "epoch": 0.922461891506442, + "grad_norm": 346.80889892578125, + "learning_rate": 2.2723519988180232e-07, + "loss": 27.9793, + "step": 456650 + }, + { + "epoch": 0.9224820921391258, + "grad_norm": 355.554931640625, + "learning_rate": 2.2713117545541618e-07, + "loss": 8.9129, + "step": 456660 + }, + { + "epoch": 0.9225022927718096, + "grad_norm": 307.0997619628906, + "learning_rate": 2.270271742914132e-07, + "loss": 19.7816, + "step": 456670 + }, + { + "epoch": 0.9225224934044934, + "grad_norm": 178.32456970214844, + "learning_rate": 2.269231963903018e-07, + "loss": 13.3781, + "step": 456680 + }, + { + "epoch": 0.9225426940371773, + "grad_norm": 317.56158447265625, + "learning_rate": 2.2681924175258773e-07, + "loss": 14.2739, + "step": 456690 + }, + { + "epoch": 0.9225628946698611, + "grad_norm": 227.15536499023438, + "learning_rate": 2.2671531037877724e-07, + "loss": 7.1665, + "step": 456700 + }, + { + "epoch": 0.9225830953025449, + "grad_norm": 186.4401397705078, + "learning_rate": 2.2661140226937773e-07, + "loss": 15.9246, + "step": 456710 + }, + { + "epoch": 0.9226032959352287, + "grad_norm": 461.26959228515625, + "learning_rate": 2.2650751742489542e-07, + "loss": 16.1091, + "step": 456720 + }, + { + "epoch": 0.9226234965679125, + "grad_norm": 550.1011352539062, + "learning_rate": 2.2640365584583602e-07, + "loss": 12.9847, + "step": 456730 + }, + { + "epoch": 0.9226436972005964, + "grad_norm": 301.6156005859375, + "learning_rate": 2.2629981753270636e-07, + "loss": 25.3048, + "step": 456740 + }, + { + "epoch": 0.9226638978332802, + "grad_norm": 535.1401977539062, + "learning_rate": 2.2619600248601327e-07, + "loss": 17.4736, + "step": 456750 + }, + { + "epoch": 0.922684098465964, + "grad_norm": 250.36090087890625, + "learning_rate": 2.2609221070626132e-07, + "loss": 22.4407, + "step": 456760 + }, + { + "epoch": 0.9227042990986478, + "grad_norm": 615.8826293945312, + "learning_rate": 2.259884421939562e-07, + "loss": 15.605, + "step": 456770 + }, + { + "epoch": 0.9227244997313316, + "grad_norm": 851.8646850585938, + "learning_rate": 2.2588469694960535e-07, + "loss": 19.2829, + "step": 456780 + }, + { + "epoch": 0.9227447003640153, + "grad_norm": 488.8655700683594, + "learning_rate": 2.2578097497371333e-07, + "loss": 12.2094, + "step": 456790 + }, + { + "epoch": 0.9227649009966992, + "grad_norm": 184.11412048339844, + "learning_rate": 2.2567727626678527e-07, + "loss": 16.5717, + "step": 456800 + }, + { + "epoch": 0.922785101629383, + "grad_norm": 157.20469665527344, + "learning_rate": 2.2557360082932745e-07, + "loss": 21.0107, + "step": 456810 + }, + { + "epoch": 0.9228053022620668, + "grad_norm": 558.7533569335938, + "learning_rate": 2.2546994866184557e-07, + "loss": 14.0995, + "step": 456820 + }, + { + "epoch": 0.9228255028947506, + "grad_norm": 219.511474609375, + "learning_rate": 2.253663197648426e-07, + "loss": 22.1468, + "step": 456830 + }, + { + "epoch": 0.9228457035274344, + "grad_norm": 254.12046813964844, + "learning_rate": 2.2526271413882528e-07, + "loss": 10.6055, + "step": 456840 + }, + { + "epoch": 0.9228659041601183, + "grad_norm": 282.03717041015625, + "learning_rate": 2.2515913178429937e-07, + "loss": 13.9128, + "step": 456850 + }, + { + "epoch": 0.9228861047928021, + "grad_norm": 658.669189453125, + "learning_rate": 2.2505557270176837e-07, + "loss": 21.0541, + "step": 456860 + }, + { + "epoch": 0.9229063054254859, + "grad_norm": 399.0304260253906, + "learning_rate": 2.249520368917374e-07, + "loss": 15.579, + "step": 456870 + }, + { + "epoch": 0.9229265060581697, + "grad_norm": 888.0962524414062, + "learning_rate": 2.2484852435471106e-07, + "loss": 26.448, + "step": 456880 + }, + { + "epoch": 0.9229467066908535, + "grad_norm": 123.26486206054688, + "learning_rate": 2.2474503509119394e-07, + "loss": 16.2698, + "step": 456890 + }, + { + "epoch": 0.9229669073235374, + "grad_norm": 622.527099609375, + "learning_rate": 2.2464156910168954e-07, + "loss": 21.5968, + "step": 456900 + }, + { + "epoch": 0.9229871079562212, + "grad_norm": 94.87808990478516, + "learning_rate": 2.2453812638670413e-07, + "loss": 16.1051, + "step": 456910 + }, + { + "epoch": 0.923007308588905, + "grad_norm": 231.9738006591797, + "learning_rate": 2.2443470694673953e-07, + "loss": 12.1589, + "step": 456920 + }, + { + "epoch": 0.9230275092215888, + "grad_norm": 812.3882446289062, + "learning_rate": 2.2433131078230196e-07, + "loss": 36.6796, + "step": 456930 + }, + { + "epoch": 0.9230477098542726, + "grad_norm": 472.25018310546875, + "learning_rate": 2.242279378938944e-07, + "loss": 21.6882, + "step": 456940 + }, + { + "epoch": 0.9230679104869565, + "grad_norm": 667.198974609375, + "learning_rate": 2.2412458828201977e-07, + "loss": 23.4013, + "step": 456950 + }, + { + "epoch": 0.9230881111196403, + "grad_norm": 68.68064880371094, + "learning_rate": 2.2402126194718322e-07, + "loss": 13.6757, + "step": 456960 + }, + { + "epoch": 0.9231083117523241, + "grad_norm": 491.9569396972656, + "learning_rate": 2.2391795888988822e-07, + "loss": 28.9148, + "step": 456970 + }, + { + "epoch": 0.9231285123850079, + "grad_norm": 482.0792541503906, + "learning_rate": 2.2381467911063658e-07, + "loss": 31.3768, + "step": 456980 + }, + { + "epoch": 0.9231487130176917, + "grad_norm": 214.32330322265625, + "learning_rate": 2.237114226099335e-07, + "loss": 51.9785, + "step": 456990 + }, + { + "epoch": 0.9231689136503756, + "grad_norm": 533.4446411132812, + "learning_rate": 2.2360818938828189e-07, + "loss": 20.4225, + "step": 457000 + }, + { + "epoch": 0.9231891142830594, + "grad_norm": 268.1573181152344, + "learning_rate": 2.2350497944618466e-07, + "loss": 15.3793, + "step": 457010 + }, + { + "epoch": 0.9232093149157432, + "grad_norm": 460.7972717285156, + "learning_rate": 2.234017927841442e-07, + "loss": 20.3809, + "step": 457020 + }, + { + "epoch": 0.923229515548427, + "grad_norm": 72.43126678466797, + "learning_rate": 2.2329862940266511e-07, + "loss": 24.144, + "step": 457030 + }, + { + "epoch": 0.9232497161811108, + "grad_norm": 144.76942443847656, + "learning_rate": 2.2319548930224865e-07, + "loss": 9.2042, + "step": 457040 + }, + { + "epoch": 0.9232699168137946, + "grad_norm": 414.5937194824219, + "learning_rate": 2.2309237248339776e-07, + "loss": 18.7369, + "step": 457050 + }, + { + "epoch": 0.9232901174464784, + "grad_norm": 315.70648193359375, + "learning_rate": 2.2298927894661481e-07, + "loss": 18.66, + "step": 457060 + }, + { + "epoch": 0.9233103180791622, + "grad_norm": 640.033203125, + "learning_rate": 2.2288620869240384e-07, + "loss": 23.0898, + "step": 457070 + }, + { + "epoch": 0.923330518711846, + "grad_norm": 205.42617797851562, + "learning_rate": 2.2278316172126612e-07, + "loss": 15.6564, + "step": 457080 + }, + { + "epoch": 0.9233507193445298, + "grad_norm": 1400.28759765625, + "learning_rate": 2.2268013803370292e-07, + "loss": 27.0673, + "step": 457090 + }, + { + "epoch": 0.9233709199772137, + "grad_norm": 338.8489685058594, + "learning_rate": 2.2257713763021826e-07, + "loss": 20.6615, + "step": 457100 + }, + { + "epoch": 0.9233911206098975, + "grad_norm": 253.6118927001953, + "learning_rate": 2.2247416051131288e-07, + "loss": 13.7819, + "step": 457110 + }, + { + "epoch": 0.9234113212425813, + "grad_norm": 162.47174072265625, + "learning_rate": 2.2237120667748856e-07, + "loss": 14.0294, + "step": 457120 + }, + { + "epoch": 0.9234315218752651, + "grad_norm": 318.33233642578125, + "learning_rate": 2.2226827612924774e-07, + "loss": 19.7768, + "step": 457130 + }, + { + "epoch": 0.9234517225079489, + "grad_norm": 17.8799991607666, + "learning_rate": 2.221653688670916e-07, + "loss": 17.8933, + "step": 457140 + }, + { + "epoch": 0.9234719231406328, + "grad_norm": 501.81207275390625, + "learning_rate": 2.220624848915226e-07, + "loss": 21.2872, + "step": 457150 + }, + { + "epoch": 0.9234921237733166, + "grad_norm": 313.23614501953125, + "learning_rate": 2.2195962420304083e-07, + "loss": 14.9458, + "step": 457160 + }, + { + "epoch": 0.9235123244060004, + "grad_norm": 307.3273010253906, + "learning_rate": 2.2185678680214927e-07, + "loss": 23.8175, + "step": 457170 + }, + { + "epoch": 0.9235325250386842, + "grad_norm": 324.7360534667969, + "learning_rate": 2.2175397268934807e-07, + "loss": 20.4862, + "step": 457180 + }, + { + "epoch": 0.923552725671368, + "grad_norm": 141.25596618652344, + "learning_rate": 2.216511818651379e-07, + "loss": 19.4876, + "step": 457190 + }, + { + "epoch": 0.9235729263040519, + "grad_norm": 443.9954528808594, + "learning_rate": 2.2154841433002062e-07, + "loss": 16.8223, + "step": 457200 + }, + { + "epoch": 0.9235931269367357, + "grad_norm": 601.5424194335938, + "learning_rate": 2.2144567008449636e-07, + "loss": 29.5105, + "step": 457210 + }, + { + "epoch": 0.9236133275694195, + "grad_norm": 606.4437255859375, + "learning_rate": 2.2134294912906696e-07, + "loss": 25.3341, + "step": 457220 + }, + { + "epoch": 0.9236335282021033, + "grad_norm": 327.7691955566406, + "learning_rate": 2.2124025146423255e-07, + "loss": 16.0972, + "step": 457230 + }, + { + "epoch": 0.9236537288347871, + "grad_norm": 0.0, + "learning_rate": 2.2113757709049277e-07, + "loss": 10.7911, + "step": 457240 + }, + { + "epoch": 0.923673929467471, + "grad_norm": 412.32818603515625, + "learning_rate": 2.210349260083494e-07, + "loss": 19.2258, + "step": 457250 + }, + { + "epoch": 0.9236941301001548, + "grad_norm": 642.1405639648438, + "learning_rate": 2.2093229821830263e-07, + "loss": 12.5188, + "step": 457260 + }, + { + "epoch": 0.9237143307328386, + "grad_norm": 217.98617553710938, + "learning_rate": 2.208296937208515e-07, + "loss": 7.6809, + "step": 457270 + }, + { + "epoch": 0.9237345313655224, + "grad_norm": 428.9268493652344, + "learning_rate": 2.2072711251649615e-07, + "loss": 7.977, + "step": 457280 + }, + { + "epoch": 0.9237547319982062, + "grad_norm": 475.7462158203125, + "learning_rate": 2.2062455460573838e-07, + "loss": 20.2152, + "step": 457290 + }, + { + "epoch": 0.9237749326308899, + "grad_norm": 620.5607299804688, + "learning_rate": 2.2052201998907673e-07, + "loss": 16.3221, + "step": 457300 + }, + { + "epoch": 0.9237951332635738, + "grad_norm": 581.6460571289062, + "learning_rate": 2.2041950866701078e-07, + "loss": 32.3453, + "step": 457310 + }, + { + "epoch": 0.9238153338962576, + "grad_norm": 61.62921142578125, + "learning_rate": 2.2031702064004067e-07, + "loss": 11.4661, + "step": 457320 + }, + { + "epoch": 0.9238355345289414, + "grad_norm": 357.42230224609375, + "learning_rate": 2.2021455590866546e-07, + "loss": 26.6565, + "step": 457330 + }, + { + "epoch": 0.9238557351616252, + "grad_norm": 72.99015045166016, + "learning_rate": 2.2011211447338477e-07, + "loss": 20.1441, + "step": 457340 + }, + { + "epoch": 0.923875935794309, + "grad_norm": 806.2169189453125, + "learning_rate": 2.200096963346976e-07, + "loss": 22.0562, + "step": 457350 + }, + { + "epoch": 0.9238961364269929, + "grad_norm": 346.07818603515625, + "learning_rate": 2.199073014931047e-07, + "loss": 15.2492, + "step": 457360 + }, + { + "epoch": 0.9239163370596767, + "grad_norm": 3.393545150756836, + "learning_rate": 2.198049299491023e-07, + "loss": 17.5839, + "step": 457370 + }, + { + "epoch": 0.9239365376923605, + "grad_norm": 210.43228149414062, + "learning_rate": 2.1970258170319114e-07, + "loss": 8.5199, + "step": 457380 + }, + { + "epoch": 0.9239567383250443, + "grad_norm": 230.42808532714844, + "learning_rate": 2.1960025675587082e-07, + "loss": 11.0523, + "step": 457390 + }, + { + "epoch": 0.9239769389577281, + "grad_norm": 283.653076171875, + "learning_rate": 2.1949795510763872e-07, + "loss": 16.4459, + "step": 457400 + }, + { + "epoch": 0.923997139590412, + "grad_norm": 348.6254577636719, + "learning_rate": 2.1939567675899333e-07, + "loss": 25.9321, + "step": 457410 + }, + { + "epoch": 0.9240173402230958, + "grad_norm": 506.9590759277344, + "learning_rate": 2.1929342171043366e-07, + "loss": 32.5383, + "step": 457420 + }, + { + "epoch": 0.9240375408557796, + "grad_norm": 166.4855499267578, + "learning_rate": 2.191911899624588e-07, + "loss": 27.0463, + "step": 457430 + }, + { + "epoch": 0.9240577414884634, + "grad_norm": 386.7478942871094, + "learning_rate": 2.1908898151556502e-07, + "loss": 17.4471, + "step": 457440 + }, + { + "epoch": 0.9240779421211472, + "grad_norm": 499.1949462890625, + "learning_rate": 2.189867963702519e-07, + "loss": 16.7372, + "step": 457450 + }, + { + "epoch": 0.9240981427538311, + "grad_norm": 165.86109924316406, + "learning_rate": 2.188846345270179e-07, + "loss": 18.4938, + "step": 457460 + }, + { + "epoch": 0.9241183433865149, + "grad_norm": 238.35037231445312, + "learning_rate": 2.1878249598636047e-07, + "loss": 15.6179, + "step": 457470 + }, + { + "epoch": 0.9241385440191987, + "grad_norm": 72.76551055908203, + "learning_rate": 2.186803807487764e-07, + "loss": 14.1343, + "step": 457480 + }, + { + "epoch": 0.9241587446518825, + "grad_norm": 494.52740478515625, + "learning_rate": 2.1857828881476472e-07, + "loss": 25.6341, + "step": 457490 + }, + { + "epoch": 0.9241789452845663, + "grad_norm": 260.92047119140625, + "learning_rate": 2.1847622018482283e-07, + "loss": 25.9921, + "step": 457500 + }, + { + "epoch": 0.9241991459172502, + "grad_norm": 423.1890869140625, + "learning_rate": 2.1837417485944755e-07, + "loss": 20.9144, + "step": 457510 + }, + { + "epoch": 0.924219346549934, + "grad_norm": 1008.9845581054688, + "learning_rate": 2.1827215283913683e-07, + "loss": 33.1877, + "step": 457520 + }, + { + "epoch": 0.9242395471826178, + "grad_norm": 773.0634765625, + "learning_rate": 2.1817015412438692e-07, + "loss": 31.0697, + "step": 457530 + }, + { + "epoch": 0.9242597478153016, + "grad_norm": 716.7244873046875, + "learning_rate": 2.1806817871569686e-07, + "loss": 26.2957, + "step": 457540 + }, + { + "epoch": 0.9242799484479854, + "grad_norm": 288.569091796875, + "learning_rate": 2.1796622661356238e-07, + "loss": 25.4398, + "step": 457550 + }, + { + "epoch": 0.9243001490806692, + "grad_norm": 189.87271118164062, + "learning_rate": 2.1786429781847972e-07, + "loss": 25.3806, + "step": 457560 + }, + { + "epoch": 0.924320349713353, + "grad_norm": 470.0531005859375, + "learning_rate": 2.1776239233094687e-07, + "loss": 10.8642, + "step": 457570 + }, + { + "epoch": 0.9243405503460368, + "grad_norm": 243.01356506347656, + "learning_rate": 2.176605101514606e-07, + "loss": 25.1575, + "step": 457580 + }, + { + "epoch": 0.9243607509787206, + "grad_norm": 802.8110961914062, + "learning_rate": 2.175586512805161e-07, + "loss": 30.1035, + "step": 457590 + }, + { + "epoch": 0.9243809516114044, + "grad_norm": 338.3279113769531, + "learning_rate": 2.174568157186102e-07, + "loss": 20.7881, + "step": 457600 + }, + { + "epoch": 0.9244011522440883, + "grad_norm": 1601.8643798828125, + "learning_rate": 2.1735500346624083e-07, + "loss": 18.3868, + "step": 457610 + }, + { + "epoch": 0.9244213528767721, + "grad_norm": 473.4765930175781, + "learning_rate": 2.1725321452390314e-07, + "loss": 17.7341, + "step": 457620 + }, + { + "epoch": 0.9244415535094559, + "grad_norm": 257.49365234375, + "learning_rate": 2.1715144889209284e-07, + "loss": 20.9138, + "step": 457630 + }, + { + "epoch": 0.9244617541421397, + "grad_norm": 537.02880859375, + "learning_rate": 2.1704970657130675e-07, + "loss": 14.2191, + "step": 457640 + }, + { + "epoch": 0.9244819547748235, + "grad_norm": 323.56304931640625, + "learning_rate": 2.1694798756204005e-07, + "loss": 14.642, + "step": 457650 + }, + { + "epoch": 0.9245021554075074, + "grad_norm": 1487.6925048828125, + "learning_rate": 2.1684629186478846e-07, + "loss": 14.8847, + "step": 457660 + }, + { + "epoch": 0.9245223560401912, + "grad_norm": 214.20962524414062, + "learning_rate": 2.1674461948004766e-07, + "loss": 16.7686, + "step": 457670 + }, + { + "epoch": 0.924542556672875, + "grad_norm": 241.1480255126953, + "learning_rate": 2.1664297040831394e-07, + "loss": 17.2618, + "step": 457680 + }, + { + "epoch": 0.9245627573055588, + "grad_norm": 87.69023132324219, + "learning_rate": 2.1654134465008247e-07, + "loss": 8.7376, + "step": 457690 + }, + { + "epoch": 0.9245829579382426, + "grad_norm": 602.7846069335938, + "learning_rate": 2.1643974220584729e-07, + "loss": 24.1448, + "step": 457700 + }, + { + "epoch": 0.9246031585709265, + "grad_norm": 238.993408203125, + "learning_rate": 2.1633816307610577e-07, + "loss": 7.3158, + "step": 457710 + }, + { + "epoch": 0.9246233592036103, + "grad_norm": 375.7080078125, + "learning_rate": 2.1623660726135197e-07, + "loss": 20.0468, + "step": 457720 + }, + { + "epoch": 0.9246435598362941, + "grad_norm": 201.77499389648438, + "learning_rate": 2.161350747620794e-07, + "loss": 14.484, + "step": 457730 + }, + { + "epoch": 0.9246637604689779, + "grad_norm": 403.94195556640625, + "learning_rate": 2.1603356557878486e-07, + "loss": 12.3051, + "step": 457740 + }, + { + "epoch": 0.9246839611016617, + "grad_norm": 305.6134338378906, + "learning_rate": 2.1593207971196296e-07, + "loss": 15.9938, + "step": 457750 + }, + { + "epoch": 0.9247041617343456, + "grad_norm": 513.3170166015625, + "learning_rate": 2.1583061716210774e-07, + "loss": 11.5397, + "step": 457760 + }, + { + "epoch": 0.9247243623670294, + "grad_norm": 155.95716857910156, + "learning_rate": 2.1572917792971326e-07, + "loss": 20.2176, + "step": 457770 + }, + { + "epoch": 0.9247445629997132, + "grad_norm": 194.8069610595703, + "learning_rate": 2.1562776201527525e-07, + "loss": 12.5153, + "step": 457780 + }, + { + "epoch": 0.924764763632397, + "grad_norm": 1246.8511962890625, + "learning_rate": 2.1552636941928717e-07, + "loss": 18.2852, + "step": 457790 + }, + { + "epoch": 0.9247849642650808, + "grad_norm": 706.98828125, + "learning_rate": 2.154250001422431e-07, + "loss": 18.9501, + "step": 457800 + }, + { + "epoch": 0.9248051648977647, + "grad_norm": 322.3863830566406, + "learning_rate": 2.1532365418463708e-07, + "loss": 8.0768, + "step": 457810 + }, + { + "epoch": 0.9248253655304484, + "grad_norm": 513.7962036132812, + "learning_rate": 2.1522233154696314e-07, + "loss": 9.9633, + "step": 457820 + }, + { + "epoch": 0.9248455661631322, + "grad_norm": 401.7316589355469, + "learning_rate": 2.151210322297159e-07, + "loss": 22.6052, + "step": 457830 + }, + { + "epoch": 0.924865766795816, + "grad_norm": 16.721694946289062, + "learning_rate": 2.1501975623338833e-07, + "loss": 11.0103, + "step": 457840 + }, + { + "epoch": 0.9248859674284998, + "grad_norm": 110.01323699951172, + "learning_rate": 2.1491850355847332e-07, + "loss": 16.0218, + "step": 457850 + }, + { + "epoch": 0.9249061680611836, + "grad_norm": 219.98196411132812, + "learning_rate": 2.1481727420546605e-07, + "loss": 8.403, + "step": 457860 + }, + { + "epoch": 0.9249263686938675, + "grad_norm": 379.1888427734375, + "learning_rate": 2.147160681748589e-07, + "loss": 18.7612, + "step": 457870 + }, + { + "epoch": 0.9249465693265513, + "grad_norm": 298.0424499511719, + "learning_rate": 2.1461488546714425e-07, + "loss": 17.5678, + "step": 457880 + }, + { + "epoch": 0.9249667699592351, + "grad_norm": 253.58570861816406, + "learning_rate": 2.1451372608281674e-07, + "loss": 8.7051, + "step": 457890 + }, + { + "epoch": 0.9249869705919189, + "grad_norm": 341.6390075683594, + "learning_rate": 2.1441259002236924e-07, + "loss": 17.6478, + "step": 457900 + }, + { + "epoch": 0.9250071712246027, + "grad_norm": 258.5384826660156, + "learning_rate": 2.1431147728629476e-07, + "loss": 14.3431, + "step": 457910 + }, + { + "epoch": 0.9250273718572866, + "grad_norm": 365.05657958984375, + "learning_rate": 2.1421038787508508e-07, + "loss": 14.0344, + "step": 457920 + }, + { + "epoch": 0.9250475724899704, + "grad_norm": 327.6850280761719, + "learning_rate": 2.1410932178923372e-07, + "loss": 17.4474, + "step": 457930 + }, + { + "epoch": 0.9250677731226542, + "grad_norm": 425.10919189453125, + "learning_rate": 2.1400827902923304e-07, + "loss": 24.1575, + "step": 457940 + }, + { + "epoch": 0.925087973755338, + "grad_norm": 132.63491821289062, + "learning_rate": 2.1390725959557546e-07, + "loss": 13.1877, + "step": 457950 + }, + { + "epoch": 0.9251081743880218, + "grad_norm": 263.4458923339844, + "learning_rate": 2.1380626348875278e-07, + "loss": 16.6089, + "step": 457960 + }, + { + "epoch": 0.9251283750207057, + "grad_norm": 155.18031311035156, + "learning_rate": 2.137052907092596e-07, + "loss": 13.4668, + "step": 457970 + }, + { + "epoch": 0.9251485756533895, + "grad_norm": 387.36309814453125, + "learning_rate": 2.13604341257585e-07, + "loss": 21.6578, + "step": 457980 + }, + { + "epoch": 0.9251687762860733, + "grad_norm": 305.8255920410156, + "learning_rate": 2.135034151342219e-07, + "loss": 17.9192, + "step": 457990 + }, + { + "epoch": 0.9251889769187571, + "grad_norm": 212.56101989746094, + "learning_rate": 2.134025123396638e-07, + "loss": 17.149, + "step": 458000 + }, + { + "epoch": 0.9252091775514409, + "grad_norm": 318.2418518066406, + "learning_rate": 2.1330163287440087e-07, + "loss": 9.732, + "step": 458010 + }, + { + "epoch": 0.9252293781841248, + "grad_norm": 1.686371088027954, + "learning_rate": 2.1320077673892493e-07, + "loss": 18.1357, + "step": 458020 + }, + { + "epoch": 0.9252495788168086, + "grad_norm": 362.7011413574219, + "learning_rate": 2.1309994393372836e-07, + "loss": 19.8476, + "step": 458030 + }, + { + "epoch": 0.9252697794494924, + "grad_norm": 32.95444107055664, + "learning_rate": 2.1299913445930242e-07, + "loss": 17.4755, + "step": 458040 + }, + { + "epoch": 0.9252899800821762, + "grad_norm": 139.97511291503906, + "learning_rate": 2.1289834831613675e-07, + "loss": 12.3224, + "step": 458050 + }, + { + "epoch": 0.92531018071486, + "grad_norm": 455.064208984375, + "learning_rate": 2.127975855047243e-07, + "loss": 12.8526, + "step": 458060 + }, + { + "epoch": 0.9253303813475438, + "grad_norm": 443.9643859863281, + "learning_rate": 2.126968460255563e-07, + "loss": 22.1814, + "step": 458070 + }, + { + "epoch": 0.9253505819802276, + "grad_norm": 151.95689392089844, + "learning_rate": 2.1259612987912348e-07, + "loss": 50.2585, + "step": 458080 + }, + { + "epoch": 0.9253707826129114, + "grad_norm": 207.3853759765625, + "learning_rate": 2.1249543706591602e-07, + "loss": 7.4737, + "step": 458090 + }, + { + "epoch": 0.9253909832455952, + "grad_norm": 126.56194305419922, + "learning_rate": 2.123947675864252e-07, + "loss": 10.355, + "step": 458100 + }, + { + "epoch": 0.925411183878279, + "grad_norm": 161.03704833984375, + "learning_rate": 2.1229412144114225e-07, + "loss": 11.7268, + "step": 458110 + }, + { + "epoch": 0.9254313845109629, + "grad_norm": 341.4384765625, + "learning_rate": 2.121934986305557e-07, + "loss": 18.7943, + "step": 458120 + }, + { + "epoch": 0.9254515851436467, + "grad_norm": 219.3982391357422, + "learning_rate": 2.120928991551585e-07, + "loss": 17.0994, + "step": 458130 + }, + { + "epoch": 0.9254717857763305, + "grad_norm": 286.6707458496094, + "learning_rate": 2.1199232301543915e-07, + "loss": 18.727, + "step": 458140 + }, + { + "epoch": 0.9254919864090143, + "grad_norm": 211.01681518554688, + "learning_rate": 2.1189177021188888e-07, + "loss": 38.938, + "step": 458150 + }, + { + "epoch": 0.9255121870416981, + "grad_norm": 172.27032470703125, + "learning_rate": 2.117912407449979e-07, + "loss": 12.6474, + "step": 458160 + }, + { + "epoch": 0.925532387674382, + "grad_norm": 29.137935638427734, + "learning_rate": 2.116907346152547e-07, + "loss": 17.0718, + "step": 458170 + }, + { + "epoch": 0.9255525883070658, + "grad_norm": 170.0179443359375, + "learning_rate": 2.1159025182315052e-07, + "loss": 16.2877, + "step": 458180 + }, + { + "epoch": 0.9255727889397496, + "grad_norm": 187.5997314453125, + "learning_rate": 2.11489792369175e-07, + "loss": 15.7404, + "step": 458190 + }, + { + "epoch": 0.9255929895724334, + "grad_norm": 167.6175537109375, + "learning_rate": 2.1138935625381663e-07, + "loss": 15.2215, + "step": 458200 + }, + { + "epoch": 0.9256131902051172, + "grad_norm": 530.850830078125, + "learning_rate": 2.1128894347756613e-07, + "loss": 18.4094, + "step": 458210 + }, + { + "epoch": 0.925633390837801, + "grad_norm": 381.7887268066406, + "learning_rate": 2.1118855404091253e-07, + "loss": 24.7909, + "step": 458220 + }, + { + "epoch": 0.9256535914704849, + "grad_norm": 29.789928436279297, + "learning_rate": 2.110881879443455e-07, + "loss": 13.2872, + "step": 458230 + }, + { + "epoch": 0.9256737921031687, + "grad_norm": 217.49542236328125, + "learning_rate": 2.1098784518835292e-07, + "loss": 18.055, + "step": 458240 + }, + { + "epoch": 0.9256939927358525, + "grad_norm": 535.6318969726562, + "learning_rate": 2.1088752577342607e-07, + "loss": 27.8291, + "step": 458250 + }, + { + "epoch": 0.9257141933685363, + "grad_norm": 926.4777221679688, + "learning_rate": 2.1078722970005182e-07, + "loss": 16.9594, + "step": 458260 + }, + { + "epoch": 0.9257343940012202, + "grad_norm": 346.7973937988281, + "learning_rate": 2.1068695696871922e-07, + "loss": 15.6104, + "step": 458270 + }, + { + "epoch": 0.925754594633904, + "grad_norm": 359.69110107421875, + "learning_rate": 2.1058670757991783e-07, + "loss": 14.7547, + "step": 458280 + }, + { + "epoch": 0.9257747952665878, + "grad_norm": 451.11236572265625, + "learning_rate": 2.104864815341362e-07, + "loss": 18.7919, + "step": 458290 + }, + { + "epoch": 0.9257949958992716, + "grad_norm": 196.26596069335938, + "learning_rate": 2.103862788318628e-07, + "loss": 14.8491, + "step": 458300 + }, + { + "epoch": 0.9258151965319554, + "grad_norm": 308.1374206542969, + "learning_rate": 2.102860994735856e-07, + "loss": 10.3378, + "step": 458310 + }, + { + "epoch": 0.9258353971646393, + "grad_norm": 489.496826171875, + "learning_rate": 2.1018594345979305e-07, + "loss": 23.6785, + "step": 458320 + }, + { + "epoch": 0.925855597797323, + "grad_norm": 394.7153625488281, + "learning_rate": 2.1008581079097312e-07, + "loss": 22.2725, + "step": 458330 + }, + { + "epoch": 0.9258757984300068, + "grad_norm": 380.5003662109375, + "learning_rate": 2.0998570146761376e-07, + "loss": 18.5849, + "step": 458340 + }, + { + "epoch": 0.9258959990626906, + "grad_norm": 218.2222442626953, + "learning_rate": 2.098856154902029e-07, + "loss": 14.9699, + "step": 458350 + }, + { + "epoch": 0.9259161996953744, + "grad_norm": 324.1954345703125, + "learning_rate": 2.0978555285922963e-07, + "loss": 29.3252, + "step": 458360 + }, + { + "epoch": 0.9259364003280582, + "grad_norm": 354.8207092285156, + "learning_rate": 2.0968551357518018e-07, + "loss": 13.6946, + "step": 458370 + }, + { + "epoch": 0.9259566009607421, + "grad_norm": 1145.4552001953125, + "learning_rate": 2.0958549763854196e-07, + "loss": 24.0101, + "step": 458380 + }, + { + "epoch": 0.9259768015934259, + "grad_norm": 261.1099853515625, + "learning_rate": 2.0948550504980403e-07, + "loss": 13.9914, + "step": 458390 + }, + { + "epoch": 0.9259970022261097, + "grad_norm": 170.25189208984375, + "learning_rate": 2.0938553580945208e-07, + "loss": 24.7193, + "step": 458400 + }, + { + "epoch": 0.9260172028587935, + "grad_norm": 437.9764099121094, + "learning_rate": 2.092855899179741e-07, + "loss": 9.5411, + "step": 458410 + }, + { + "epoch": 0.9260374034914773, + "grad_norm": 94.67361450195312, + "learning_rate": 2.0918566737585688e-07, + "loss": 24.0387, + "step": 458420 + }, + { + "epoch": 0.9260576041241612, + "grad_norm": 157.25180053710938, + "learning_rate": 2.0908576818358783e-07, + "loss": 14.9054, + "step": 458430 + }, + { + "epoch": 0.926077804756845, + "grad_norm": 154.60018920898438, + "learning_rate": 2.0898589234165378e-07, + "loss": 13.5259, + "step": 458440 + }, + { + "epoch": 0.9260980053895288, + "grad_norm": 464.2149658203125, + "learning_rate": 2.0888603985054156e-07, + "loss": 19.8272, + "step": 458450 + }, + { + "epoch": 0.9261182060222126, + "grad_norm": 252.5354461669922, + "learning_rate": 2.0878621071073745e-07, + "loss": 10.8964, + "step": 458460 + }, + { + "epoch": 0.9261384066548964, + "grad_norm": 138.6220703125, + "learning_rate": 2.086864049227283e-07, + "loss": 15.92, + "step": 458470 + }, + { + "epoch": 0.9261586072875803, + "grad_norm": 651.3563232421875, + "learning_rate": 2.085866224870009e-07, + "loss": 12.8114, + "step": 458480 + }, + { + "epoch": 0.9261788079202641, + "grad_norm": 445.91790771484375, + "learning_rate": 2.0848686340404045e-07, + "loss": 34.6725, + "step": 458490 + }, + { + "epoch": 0.9261990085529479, + "grad_norm": 325.5871887207031, + "learning_rate": 2.083871276743338e-07, + "loss": 20.4321, + "step": 458500 + }, + { + "epoch": 0.9262192091856317, + "grad_norm": 373.8263854980469, + "learning_rate": 2.0828741529836771e-07, + "loss": 15.8434, + "step": 458510 + }, + { + "epoch": 0.9262394098183155, + "grad_norm": 28.93181037902832, + "learning_rate": 2.0818772627662743e-07, + "loss": 11.6755, + "step": 458520 + }, + { + "epoch": 0.9262596104509994, + "grad_norm": 15.074884414672852, + "learning_rate": 2.0808806060959864e-07, + "loss": 27.5151, + "step": 458530 + }, + { + "epoch": 0.9262798110836832, + "grad_norm": 386.1090087890625, + "learning_rate": 2.0798841829776816e-07, + "loss": 9.6054, + "step": 458540 + }, + { + "epoch": 0.926300011716367, + "grad_norm": 159.1573028564453, + "learning_rate": 2.0788879934162064e-07, + "loss": 17.2663, + "step": 458550 + }, + { + "epoch": 0.9263202123490508, + "grad_norm": 262.0688781738281, + "learning_rate": 2.077892037416418e-07, + "loss": 19.3492, + "step": 458560 + }, + { + "epoch": 0.9263404129817346, + "grad_norm": 194.11447143554688, + "learning_rate": 2.0768963149831678e-07, + "loss": 14.6876, + "step": 458570 + }, + { + "epoch": 0.9263606136144183, + "grad_norm": 365.3988342285156, + "learning_rate": 2.0759008261213242e-07, + "loss": 16.0998, + "step": 458580 + }, + { + "epoch": 0.9263808142471022, + "grad_norm": 192.0506591796875, + "learning_rate": 2.0749055708357168e-07, + "loss": 26.9038, + "step": 458590 + }, + { + "epoch": 0.926401014879786, + "grad_norm": 250.0546875, + "learning_rate": 2.0739105491312028e-07, + "loss": 17.8981, + "step": 458600 + }, + { + "epoch": 0.9264212155124698, + "grad_norm": 996.9682006835938, + "learning_rate": 2.0729157610126448e-07, + "loss": 27.0404, + "step": 458610 + }, + { + "epoch": 0.9264414161451536, + "grad_norm": 122.38219451904297, + "learning_rate": 2.0719212064848838e-07, + "loss": 19.0454, + "step": 458620 + }, + { + "epoch": 0.9264616167778374, + "grad_norm": 393.2940368652344, + "learning_rate": 2.07092688555276e-07, + "loss": 18.0674, + "step": 458630 + }, + { + "epoch": 0.9264818174105213, + "grad_norm": 224.41012573242188, + "learning_rate": 2.0699327982211304e-07, + "loss": 13.0098, + "step": 458640 + }, + { + "epoch": 0.9265020180432051, + "grad_norm": 460.50335693359375, + "learning_rate": 2.068938944494836e-07, + "loss": 19.5449, + "step": 458650 + }, + { + "epoch": 0.9265222186758889, + "grad_norm": 692.4585571289062, + "learning_rate": 2.0679453243787174e-07, + "loss": 12.5094, + "step": 458660 + }, + { + "epoch": 0.9265424193085727, + "grad_norm": 759.0761108398438, + "learning_rate": 2.0669519378776147e-07, + "loss": 22.3652, + "step": 458670 + }, + { + "epoch": 0.9265626199412565, + "grad_norm": 336.0935363769531, + "learning_rate": 2.0659587849963801e-07, + "loss": 21.5044, + "step": 458680 + }, + { + "epoch": 0.9265828205739404, + "grad_norm": 345.7869873046875, + "learning_rate": 2.0649658657398487e-07, + "loss": 11.5162, + "step": 458690 + }, + { + "epoch": 0.9266030212066242, + "grad_norm": 578.116943359375, + "learning_rate": 2.0639731801128603e-07, + "loss": 19.6053, + "step": 458700 + }, + { + "epoch": 0.926623221839308, + "grad_norm": 552.098388671875, + "learning_rate": 2.0629807281202508e-07, + "loss": 22.3361, + "step": 458710 + }, + { + "epoch": 0.9266434224719918, + "grad_norm": 314.4300537109375, + "learning_rate": 2.0619885097668658e-07, + "loss": 10.5781, + "step": 458720 + }, + { + "epoch": 0.9266636231046756, + "grad_norm": 577.6607666015625, + "learning_rate": 2.0609965250575237e-07, + "loss": 22.9499, + "step": 458730 + }, + { + "epoch": 0.9266838237373595, + "grad_norm": 0.4380476176738739, + "learning_rate": 2.0600047739970762e-07, + "loss": 11.5809, + "step": 458740 + }, + { + "epoch": 0.9267040243700433, + "grad_norm": 208.62179565429688, + "learning_rate": 2.0590132565903475e-07, + "loss": 10.554, + "step": 458750 + }, + { + "epoch": 0.9267242250027271, + "grad_norm": 527.3123168945312, + "learning_rate": 2.058021972842178e-07, + "loss": 19.0069, + "step": 458760 + }, + { + "epoch": 0.9267444256354109, + "grad_norm": 499.30670166015625, + "learning_rate": 2.057030922757397e-07, + "loss": 19.0369, + "step": 458770 + }, + { + "epoch": 0.9267646262680947, + "grad_norm": 314.04437255859375, + "learning_rate": 2.056040106340823e-07, + "loss": 10.8281, + "step": 458780 + }, + { + "epoch": 0.9267848269007786, + "grad_norm": 123.98068237304688, + "learning_rate": 2.0550495235973023e-07, + "loss": 12.4546, + "step": 458790 + }, + { + "epoch": 0.9268050275334624, + "grad_norm": 198.2677001953125, + "learning_rate": 2.054059174531653e-07, + "loss": 20.5061, + "step": 458800 + }, + { + "epoch": 0.9268252281661462, + "grad_norm": 154.2901153564453, + "learning_rate": 2.0530690591487047e-07, + "loss": 19.8547, + "step": 458810 + }, + { + "epoch": 0.92684542879883, + "grad_norm": 277.7265930175781, + "learning_rate": 2.0520791774532757e-07, + "loss": 24.4116, + "step": 458820 + }, + { + "epoch": 0.9268656294315138, + "grad_norm": 328.3410949707031, + "learning_rate": 2.0510895294502066e-07, + "loss": 14.0775, + "step": 458830 + }, + { + "epoch": 0.9268858300641976, + "grad_norm": 317.104736328125, + "learning_rate": 2.0501001151443156e-07, + "loss": 17.5873, + "step": 458840 + }, + { + "epoch": 0.9269060306968814, + "grad_norm": 226.73709106445312, + "learning_rate": 2.0491109345404102e-07, + "loss": 15.4212, + "step": 458850 + }, + { + "epoch": 0.9269262313295652, + "grad_norm": 849.9632568359375, + "learning_rate": 2.0481219876433257e-07, + "loss": 18.5426, + "step": 458860 + }, + { + "epoch": 0.926946431962249, + "grad_norm": 363.4132385253906, + "learning_rate": 2.0471332744578853e-07, + "loss": 25.3132, + "step": 458870 + }, + { + "epoch": 0.9269666325949328, + "grad_norm": 785.9321899414062, + "learning_rate": 2.0461447949888912e-07, + "loss": 13.8512, + "step": 458880 + }, + { + "epoch": 0.9269868332276167, + "grad_norm": 338.12884521484375, + "learning_rate": 2.0451565492411672e-07, + "loss": 21.3447, + "step": 458890 + }, + { + "epoch": 0.9270070338603005, + "grad_norm": 235.50070190429688, + "learning_rate": 2.0441685372195487e-07, + "loss": 16.1099, + "step": 458900 + }, + { + "epoch": 0.9270272344929843, + "grad_norm": 402.45147705078125, + "learning_rate": 2.043180758928831e-07, + "loss": 27.9963, + "step": 458910 + }, + { + "epoch": 0.9270474351256681, + "grad_norm": 154.576904296875, + "learning_rate": 2.0421932143738276e-07, + "loss": 12.2917, + "step": 458920 + }, + { + "epoch": 0.9270676357583519, + "grad_norm": 344.50531005859375, + "learning_rate": 2.041205903559368e-07, + "loss": 14.6842, + "step": 458930 + }, + { + "epoch": 0.9270878363910358, + "grad_norm": 366.5665588378906, + "learning_rate": 2.0402188264902533e-07, + "loss": 22.528, + "step": 458940 + }, + { + "epoch": 0.9271080370237196, + "grad_norm": 477.3027038574219, + "learning_rate": 2.039231983171286e-07, + "loss": 16.5182, + "step": 458950 + }, + { + "epoch": 0.9271282376564034, + "grad_norm": 0.0, + "learning_rate": 2.0382453736072838e-07, + "loss": 16.6952, + "step": 458960 + }, + { + "epoch": 0.9271484382890872, + "grad_norm": 306.124755859375, + "learning_rate": 2.0372589978030654e-07, + "loss": 11.9331, + "step": 458970 + }, + { + "epoch": 0.927168638921771, + "grad_norm": 754.0401611328125, + "learning_rate": 2.0362728557634327e-07, + "loss": 32.1713, + "step": 458980 + }, + { + "epoch": 0.9271888395544549, + "grad_norm": 123.714599609375, + "learning_rate": 2.0352869474931758e-07, + "loss": 12.2411, + "step": 458990 + }, + { + "epoch": 0.9272090401871387, + "grad_norm": 302.0909423828125, + "learning_rate": 2.0343012729971244e-07, + "loss": 10.5212, + "step": 459000 + }, + { + "epoch": 0.9272292408198225, + "grad_norm": 466.5577087402344, + "learning_rate": 2.0333158322800696e-07, + "loss": 15.9164, + "step": 459010 + }, + { + "epoch": 0.9272494414525063, + "grad_norm": 263.16961669921875, + "learning_rate": 2.0323306253468123e-07, + "loss": 10.4473, + "step": 459020 + }, + { + "epoch": 0.9272696420851901, + "grad_norm": 584.4795532226562, + "learning_rate": 2.0313456522021603e-07, + "loss": 16.174, + "step": 459030 + }, + { + "epoch": 0.927289842717874, + "grad_norm": 429.5994567871094, + "learning_rate": 2.0303609128509038e-07, + "loss": 22.1154, + "step": 459040 + }, + { + "epoch": 0.9273100433505578, + "grad_norm": 389.5309143066406, + "learning_rate": 2.0293764072978618e-07, + "loss": 22.5208, + "step": 459050 + }, + { + "epoch": 0.9273302439832416, + "grad_norm": 372.05609130859375, + "learning_rate": 2.0283921355478187e-07, + "loss": 18.7915, + "step": 459060 + }, + { + "epoch": 0.9273504446159254, + "grad_norm": 289.4825744628906, + "learning_rate": 2.0274080976055655e-07, + "loss": 14.1784, + "step": 459070 + }, + { + "epoch": 0.9273706452486092, + "grad_norm": 477.81732177734375, + "learning_rate": 2.0264242934759147e-07, + "loss": 22.1646, + "step": 459080 + }, + { + "epoch": 0.927390845881293, + "grad_norm": 390.359130859375, + "learning_rate": 2.025440723163652e-07, + "loss": 24.2576, + "step": 459090 + }, + { + "epoch": 0.9274110465139768, + "grad_norm": 163.69305419921875, + "learning_rate": 2.0244573866735673e-07, + "loss": 19.6924, + "step": 459100 + }, + { + "epoch": 0.9274312471466606, + "grad_norm": 304.3630065917969, + "learning_rate": 2.0234742840104627e-07, + "loss": 17.2062, + "step": 459110 + }, + { + "epoch": 0.9274514477793444, + "grad_norm": 149.70547485351562, + "learning_rate": 2.0224914151791285e-07, + "loss": 9.3478, + "step": 459120 + }, + { + "epoch": 0.9274716484120282, + "grad_norm": 283.55181884765625, + "learning_rate": 2.0215087801843504e-07, + "loss": 18.1078, + "step": 459130 + }, + { + "epoch": 0.927491849044712, + "grad_norm": 709.7905883789062, + "learning_rate": 2.0205263790309125e-07, + "loss": 13.3553, + "step": 459140 + }, + { + "epoch": 0.9275120496773959, + "grad_norm": 871.2809448242188, + "learning_rate": 2.0195442117236176e-07, + "loss": 31.3566, + "step": 459150 + }, + { + "epoch": 0.9275322503100797, + "grad_norm": 387.3518371582031, + "learning_rate": 2.0185622782672497e-07, + "loss": 16.4249, + "step": 459160 + }, + { + "epoch": 0.9275524509427635, + "grad_norm": 516.0538940429688, + "learning_rate": 2.0175805786665782e-07, + "loss": 15.3209, + "step": 459170 + }, + { + "epoch": 0.9275726515754473, + "grad_norm": 313.6719665527344, + "learning_rate": 2.0165991129263984e-07, + "loss": 10.7569, + "step": 459180 + }, + { + "epoch": 0.9275928522081311, + "grad_norm": 368.5505065917969, + "learning_rate": 2.0156178810515127e-07, + "loss": 22.9218, + "step": 459190 + }, + { + "epoch": 0.927613052840815, + "grad_norm": 476.4034729003906, + "learning_rate": 2.0146368830466668e-07, + "loss": 26.4644, + "step": 459200 + }, + { + "epoch": 0.9276332534734988, + "grad_norm": 604.5199584960938, + "learning_rate": 2.0136561189166682e-07, + "loss": 12.8561, + "step": 459210 + }, + { + "epoch": 0.9276534541061826, + "grad_norm": 173.3835906982422, + "learning_rate": 2.0126755886662907e-07, + "loss": 14.4322, + "step": 459220 + }, + { + "epoch": 0.9276736547388664, + "grad_norm": 331.8222961425781, + "learning_rate": 2.0116952923003142e-07, + "loss": 14.6574, + "step": 459230 + }, + { + "epoch": 0.9276938553715502, + "grad_norm": 756.9157104492188, + "learning_rate": 2.0107152298235067e-07, + "loss": 21.7451, + "step": 459240 + }, + { + "epoch": 0.9277140560042341, + "grad_norm": 45.36907958984375, + "learning_rate": 2.0097354012406535e-07, + "loss": 23.7887, + "step": 459250 + }, + { + "epoch": 0.9277342566369179, + "grad_norm": 711.8092041015625, + "learning_rate": 2.0087558065565394e-07, + "loss": 25.7945, + "step": 459260 + }, + { + "epoch": 0.9277544572696017, + "grad_norm": 285.94219970703125, + "learning_rate": 2.007776445775922e-07, + "loss": 17.119, + "step": 459270 + }, + { + "epoch": 0.9277746579022855, + "grad_norm": 445.1518249511719, + "learning_rate": 2.006797318903575e-07, + "loss": 17.844, + "step": 459280 + }, + { + "epoch": 0.9277948585349693, + "grad_norm": 286.44287109375, + "learning_rate": 2.0058184259442893e-07, + "loss": 17.7038, + "step": 459290 + }, + { + "epoch": 0.9278150591676532, + "grad_norm": 206.69505310058594, + "learning_rate": 2.0048397669028164e-07, + "loss": 25.5775, + "step": 459300 + }, + { + "epoch": 0.927835259800337, + "grad_norm": 343.9249267578125, + "learning_rate": 2.003861341783936e-07, + "loss": 15.4368, + "step": 459310 + }, + { + "epoch": 0.9278554604330208, + "grad_norm": 146.1588592529297, + "learning_rate": 2.0028831505924162e-07, + "loss": 21.2046, + "step": 459320 + }, + { + "epoch": 0.9278756610657046, + "grad_norm": 526.7974853515625, + "learning_rate": 2.0019051933330204e-07, + "loss": 14.5463, + "step": 459330 + }, + { + "epoch": 0.9278958616983884, + "grad_norm": 53.01992416381836, + "learning_rate": 2.000927470010511e-07, + "loss": 12.1168, + "step": 459340 + }, + { + "epoch": 0.9279160623310722, + "grad_norm": 1.5485777854919434, + "learning_rate": 1.9999499806296674e-07, + "loss": 11.8297, + "step": 459350 + }, + { + "epoch": 0.927936262963756, + "grad_norm": 233.57095336914062, + "learning_rate": 1.9989727251952418e-07, + "loss": 32.4009, + "step": 459360 + }, + { + "epoch": 0.9279564635964398, + "grad_norm": 552.9609985351562, + "learning_rate": 1.9979957037120078e-07, + "loss": 22.0227, + "step": 459370 + }, + { + "epoch": 0.9279766642291236, + "grad_norm": 629.5794677734375, + "learning_rate": 1.9970189161847175e-07, + "loss": 15.866, + "step": 459380 + }, + { + "epoch": 0.9279968648618074, + "grad_norm": 189.53878784179688, + "learning_rate": 1.996042362618128e-07, + "loss": 17.6634, + "step": 459390 + }, + { + "epoch": 0.9280170654944913, + "grad_norm": 389.4222412109375, + "learning_rate": 1.995066043017013e-07, + "loss": 38.8011, + "step": 459400 + }, + { + "epoch": 0.9280372661271751, + "grad_norm": 289.399658203125, + "learning_rate": 1.9940899573861195e-07, + "loss": 12.5329, + "step": 459410 + }, + { + "epoch": 0.9280574667598589, + "grad_norm": 482.4100646972656, + "learning_rate": 1.993114105730215e-07, + "loss": 20.3436, + "step": 459420 + }, + { + "epoch": 0.9280776673925427, + "grad_norm": 151.21099853515625, + "learning_rate": 1.9921384880540406e-07, + "loss": 16.3865, + "step": 459430 + }, + { + "epoch": 0.9280978680252265, + "grad_norm": 391.5841369628906, + "learning_rate": 1.9911631043623704e-07, + "loss": 16.6259, + "step": 459440 + }, + { + "epoch": 0.9281180686579104, + "grad_norm": 311.5986328125, + "learning_rate": 1.99018795465995e-07, + "loss": 16.96, + "step": 459450 + }, + { + "epoch": 0.9281382692905942, + "grad_norm": 299.59454345703125, + "learning_rate": 1.9892130389515207e-07, + "loss": 13.1875, + "step": 459460 + }, + { + "epoch": 0.928158469923278, + "grad_norm": 135.6160125732422, + "learning_rate": 1.9882383572418508e-07, + "loss": 8.0025, + "step": 459470 + }, + { + "epoch": 0.9281786705559618, + "grad_norm": 845.6554565429688, + "learning_rate": 1.987263909535686e-07, + "loss": 26.8887, + "step": 459480 + }, + { + "epoch": 0.9281988711886456, + "grad_norm": 530.5870361328125, + "learning_rate": 1.986289695837762e-07, + "loss": 25.8121, + "step": 459490 + }, + { + "epoch": 0.9282190718213295, + "grad_norm": 299.4232177734375, + "learning_rate": 1.9853157161528468e-07, + "loss": 17.5788, + "step": 459500 + }, + { + "epoch": 0.9282392724540133, + "grad_norm": 230.8065948486328, + "learning_rate": 1.984341970485687e-07, + "loss": 11.3624, + "step": 459510 + }, + { + "epoch": 0.9282594730866971, + "grad_norm": 250.2640838623047, + "learning_rate": 1.9833684588410062e-07, + "loss": 17.0651, + "step": 459520 + }, + { + "epoch": 0.9282796737193809, + "grad_norm": 1162.2672119140625, + "learning_rate": 1.9823951812235675e-07, + "loss": 32.3245, + "step": 459530 + }, + { + "epoch": 0.9282998743520647, + "grad_norm": 648.0301513671875, + "learning_rate": 1.981422137638117e-07, + "loss": 26.6975, + "step": 459540 + }, + { + "epoch": 0.9283200749847486, + "grad_norm": 192.74911499023438, + "learning_rate": 1.98044932808939e-07, + "loss": 17.8849, + "step": 459550 + }, + { + "epoch": 0.9283402756174324, + "grad_norm": 3.202186346054077, + "learning_rate": 1.9794767525821212e-07, + "loss": 13.659, + "step": 459560 + }, + { + "epoch": 0.9283604762501162, + "grad_norm": 437.1767883300781, + "learning_rate": 1.9785044111210627e-07, + "loss": 13.1538, + "step": 459570 + }, + { + "epoch": 0.9283806768828, + "grad_norm": 588.3291625976562, + "learning_rate": 1.977532303710955e-07, + "loss": 18.7623, + "step": 459580 + }, + { + "epoch": 0.9284008775154838, + "grad_norm": 872.955322265625, + "learning_rate": 1.9765604303565223e-07, + "loss": 22.6115, + "step": 459590 + }, + { + "epoch": 0.9284210781481677, + "grad_norm": 207.5331268310547, + "learning_rate": 1.9755887910625103e-07, + "loss": 16.759, + "step": 459600 + }, + { + "epoch": 0.9284412787808514, + "grad_norm": 433.579833984375, + "learning_rate": 1.9746173858336604e-07, + "loss": 16.0027, + "step": 459610 + }, + { + "epoch": 0.9284614794135352, + "grad_norm": 119.8254623413086, + "learning_rate": 1.9736462146747015e-07, + "loss": 15.5961, + "step": 459620 + }, + { + "epoch": 0.928481680046219, + "grad_norm": 346.081787109375, + "learning_rate": 1.972675277590358e-07, + "loss": 18.0993, + "step": 459630 + }, + { + "epoch": 0.9285018806789028, + "grad_norm": 251.54295349121094, + "learning_rate": 1.9717045745853758e-07, + "loss": 13.2372, + "step": 459640 + }, + { + "epoch": 0.9285220813115866, + "grad_norm": 453.9295654296875, + "learning_rate": 1.9707341056644737e-07, + "loss": 30.9079, + "step": 459650 + }, + { + "epoch": 0.9285422819442705, + "grad_norm": 333.4501647949219, + "learning_rate": 1.9697638708323918e-07, + "loss": 14.9811, + "step": 459660 + }, + { + "epoch": 0.9285624825769543, + "grad_norm": 230.99989318847656, + "learning_rate": 1.9687938700938602e-07, + "loss": 40.2869, + "step": 459670 + }, + { + "epoch": 0.9285826832096381, + "grad_norm": 225.25244140625, + "learning_rate": 1.967824103453597e-07, + "loss": 28.2683, + "step": 459680 + }, + { + "epoch": 0.9286028838423219, + "grad_norm": 387.0373840332031, + "learning_rate": 1.9668545709163378e-07, + "loss": 18.3746, + "step": 459690 + }, + { + "epoch": 0.9286230844750057, + "grad_norm": 612.067138671875, + "learning_rate": 1.9658852724868005e-07, + "loss": 24.9054, + "step": 459700 + }, + { + "epoch": 0.9286432851076896, + "grad_norm": 357.3955993652344, + "learning_rate": 1.9649162081697094e-07, + "loss": 24.4012, + "step": 459710 + }, + { + "epoch": 0.9286634857403734, + "grad_norm": 536.5983276367188, + "learning_rate": 1.963947377969788e-07, + "loss": 12.6208, + "step": 459720 + }, + { + "epoch": 0.9286836863730572, + "grad_norm": 267.8538513183594, + "learning_rate": 1.9629787818917722e-07, + "loss": 16.5168, + "step": 459730 + }, + { + "epoch": 0.928703887005741, + "grad_norm": 31.454442977905273, + "learning_rate": 1.9620104199403688e-07, + "loss": 19.1965, + "step": 459740 + }, + { + "epoch": 0.9287240876384248, + "grad_norm": 337.57623291015625, + "learning_rate": 1.961042292120291e-07, + "loss": 14.2129, + "step": 459750 + }, + { + "epoch": 0.9287442882711087, + "grad_norm": 220.30328369140625, + "learning_rate": 1.9600743984362792e-07, + "loss": 13.5387, + "step": 459760 + }, + { + "epoch": 0.9287644889037925, + "grad_norm": 252.01829528808594, + "learning_rate": 1.959106738893035e-07, + "loss": 10.7286, + "step": 459770 + }, + { + "epoch": 0.9287846895364763, + "grad_norm": 173.19248962402344, + "learning_rate": 1.958139313495272e-07, + "loss": 16.17, + "step": 459780 + }, + { + "epoch": 0.9288048901691601, + "grad_norm": 5.145406723022461, + "learning_rate": 1.957172122247708e-07, + "loss": 30.5028, + "step": 459790 + }, + { + "epoch": 0.9288250908018439, + "grad_norm": 237.4205780029297, + "learning_rate": 1.9562051651550784e-07, + "loss": 11.8499, + "step": 459800 + }, + { + "epoch": 0.9288452914345278, + "grad_norm": 157.3616943359375, + "learning_rate": 1.9552384422220627e-07, + "loss": 12.6332, + "step": 459810 + }, + { + "epoch": 0.9288654920672116, + "grad_norm": 286.0095520019531, + "learning_rate": 1.954271953453385e-07, + "loss": 18.3174, + "step": 459820 + }, + { + "epoch": 0.9288856926998954, + "grad_norm": 1239.24462890625, + "learning_rate": 1.953305698853769e-07, + "loss": 25.9679, + "step": 459830 + }, + { + "epoch": 0.9289058933325792, + "grad_norm": 665.3642578125, + "learning_rate": 1.9523396784279114e-07, + "loss": 23.8822, + "step": 459840 + }, + { + "epoch": 0.928926093965263, + "grad_norm": 508.5174865722656, + "learning_rate": 1.9513738921805192e-07, + "loss": 17.3589, + "step": 459850 + }, + { + "epoch": 0.9289462945979468, + "grad_norm": 316.22100830078125, + "learning_rate": 1.9504083401162999e-07, + "loss": 22.6244, + "step": 459860 + }, + { + "epoch": 0.9289664952306306, + "grad_norm": 284.92230224609375, + "learning_rate": 1.9494430222399774e-07, + "loss": 18.12, + "step": 459870 + }, + { + "epoch": 0.9289866958633144, + "grad_norm": 188.45223999023438, + "learning_rate": 1.948477938556226e-07, + "loss": 13.7546, + "step": 459880 + }, + { + "epoch": 0.9290068964959982, + "grad_norm": 294.2447814941406, + "learning_rate": 1.9475130890697691e-07, + "loss": 29.014, + "step": 459890 + }, + { + "epoch": 0.929027097128682, + "grad_norm": 181.1422882080078, + "learning_rate": 1.9465484737853092e-07, + "loss": 22.4655, + "step": 459900 + }, + { + "epoch": 0.9290472977613659, + "grad_norm": 217.6072235107422, + "learning_rate": 1.945584092707542e-07, + "loss": 14.1615, + "step": 459910 + }, + { + "epoch": 0.9290674983940497, + "grad_norm": 657.130126953125, + "learning_rate": 1.944619945841164e-07, + "loss": 11.1402, + "step": 459920 + }, + { + "epoch": 0.9290876990267335, + "grad_norm": 399.0269470214844, + "learning_rate": 1.9436560331908882e-07, + "loss": 10.5638, + "step": 459930 + }, + { + "epoch": 0.9291078996594173, + "grad_norm": 359.1075134277344, + "learning_rate": 1.9426923547614052e-07, + "loss": 12.1312, + "step": 459940 + }, + { + "epoch": 0.9291281002921011, + "grad_norm": 851.9827880859375, + "learning_rate": 1.9417289105574054e-07, + "loss": 27.9834, + "step": 459950 + }, + { + "epoch": 0.929148300924785, + "grad_norm": 248.04933166503906, + "learning_rate": 1.9407657005835967e-07, + "loss": 19.4236, + "step": 459960 + }, + { + "epoch": 0.9291685015574688, + "grad_norm": 225.03952026367188, + "learning_rate": 1.9398027248446582e-07, + "loss": 11.7057, + "step": 459970 + }, + { + "epoch": 0.9291887021901526, + "grad_norm": 383.68414306640625, + "learning_rate": 1.9388399833452974e-07, + "loss": 18.9307, + "step": 459980 + }, + { + "epoch": 0.9292089028228364, + "grad_norm": 298.77899169921875, + "learning_rate": 1.9378774760902052e-07, + "loss": 11.7764, + "step": 459990 + }, + { + "epoch": 0.9292291034555202, + "grad_norm": 239.48939514160156, + "learning_rate": 1.9369152030840553e-07, + "loss": 16.0167, + "step": 460000 + }, + { + "epoch": 0.929249304088204, + "grad_norm": 472.67279052734375, + "learning_rate": 1.9359531643315665e-07, + "loss": 19.6229, + "step": 460010 + }, + { + "epoch": 0.9292695047208879, + "grad_norm": 348.67791748046875, + "learning_rate": 1.9349913598374014e-07, + "loss": 15.113, + "step": 460020 + }, + { + "epoch": 0.9292897053535717, + "grad_norm": 697.9534301757812, + "learning_rate": 1.9340297896062676e-07, + "loss": 16.396, + "step": 460030 + }, + { + "epoch": 0.9293099059862555, + "grad_norm": 561.0146484375, + "learning_rate": 1.9330684536428335e-07, + "loss": 18.4987, + "step": 460040 + }, + { + "epoch": 0.9293301066189393, + "grad_norm": 609.9745483398438, + "learning_rate": 1.9321073519518007e-07, + "loss": 19.547, + "step": 460050 + }, + { + "epoch": 0.9293503072516232, + "grad_norm": 38.030887603759766, + "learning_rate": 1.9311464845378492e-07, + "loss": 11.6924, + "step": 460060 + }, + { + "epoch": 0.929370507884307, + "grad_norm": 112.8908462524414, + "learning_rate": 1.9301858514056527e-07, + "loss": 9.4522, + "step": 460070 + }, + { + "epoch": 0.9293907085169908, + "grad_norm": 187.12997436523438, + "learning_rate": 1.9292254525599075e-07, + "loss": 17.3489, + "step": 460080 + }, + { + "epoch": 0.9294109091496746, + "grad_norm": 392.20379638671875, + "learning_rate": 1.928265288005282e-07, + "loss": 18.9361, + "step": 460090 + }, + { + "epoch": 0.9294311097823584, + "grad_norm": 281.1587219238281, + "learning_rate": 1.927305357746462e-07, + "loss": 16.0612, + "step": 460100 + }, + { + "epoch": 0.9294513104150423, + "grad_norm": 273.9291076660156, + "learning_rate": 1.9263456617881203e-07, + "loss": 11.1165, + "step": 460110 + }, + { + "epoch": 0.929471511047726, + "grad_norm": 327.94281005859375, + "learning_rate": 1.9253862001349543e-07, + "loss": 11.9103, + "step": 460120 + }, + { + "epoch": 0.9294917116804098, + "grad_norm": 285.5328674316406, + "learning_rate": 1.9244269727916097e-07, + "loss": 10.3836, + "step": 460130 + }, + { + "epoch": 0.9295119123130936, + "grad_norm": 176.0340118408203, + "learning_rate": 1.9234679797627832e-07, + "loss": 16.9426, + "step": 460140 + }, + { + "epoch": 0.9295321129457774, + "grad_norm": 317.98980712890625, + "learning_rate": 1.9225092210531425e-07, + "loss": 19.7461, + "step": 460150 + }, + { + "epoch": 0.9295523135784612, + "grad_norm": 614.9310302734375, + "learning_rate": 1.9215506966673624e-07, + "loss": 22.1396, + "step": 460160 + }, + { + "epoch": 0.9295725142111451, + "grad_norm": 347.4130859375, + "learning_rate": 1.9205924066101057e-07, + "loss": 10.2269, + "step": 460170 + }, + { + "epoch": 0.9295927148438289, + "grad_norm": 427.9936218261719, + "learning_rate": 1.9196343508860515e-07, + "loss": 22.8617, + "step": 460180 + }, + { + "epoch": 0.9296129154765127, + "grad_norm": 386.7332458496094, + "learning_rate": 1.9186765294998855e-07, + "loss": 18.1745, + "step": 460190 + }, + { + "epoch": 0.9296331161091965, + "grad_norm": 421.6540832519531, + "learning_rate": 1.917718942456237e-07, + "loss": 15.6033, + "step": 460200 + }, + { + "epoch": 0.9296533167418803, + "grad_norm": 387.5469665527344, + "learning_rate": 1.9167615897598023e-07, + "loss": 12.7935, + "step": 460210 + }, + { + "epoch": 0.9296735173745642, + "grad_norm": 550.24853515625, + "learning_rate": 1.9158044714152447e-07, + "loss": 23.7764, + "step": 460220 + }, + { + "epoch": 0.929693718007248, + "grad_norm": 1298.991455078125, + "learning_rate": 1.914847587427221e-07, + "loss": 32.2464, + "step": 460230 + }, + { + "epoch": 0.9297139186399318, + "grad_norm": 668.077880859375, + "learning_rate": 1.9138909378003946e-07, + "loss": 17.404, + "step": 460240 + }, + { + "epoch": 0.9297341192726156, + "grad_norm": 339.6650695800781, + "learning_rate": 1.9129345225394335e-07, + "loss": 18.9665, + "step": 460250 + }, + { + "epoch": 0.9297543199052994, + "grad_norm": 773.9869384765625, + "learning_rate": 1.9119783416490013e-07, + "loss": 29.5222, + "step": 460260 + }, + { + "epoch": 0.9297745205379833, + "grad_norm": 61.7031135559082, + "learning_rate": 1.9110223951337492e-07, + "loss": 18.3, + "step": 460270 + }, + { + "epoch": 0.9297947211706671, + "grad_norm": 233.27320861816406, + "learning_rate": 1.910066682998346e-07, + "loss": 22.573, + "step": 460280 + }, + { + "epoch": 0.9298149218033509, + "grad_norm": 261.3985595703125, + "learning_rate": 1.909111205247438e-07, + "loss": 24.1993, + "step": 460290 + }, + { + "epoch": 0.9298351224360347, + "grad_norm": 256.5817565917969, + "learning_rate": 1.9081559618856938e-07, + "loss": 25.1584, + "step": 460300 + }, + { + "epoch": 0.9298553230687185, + "grad_norm": 443.31829833984375, + "learning_rate": 1.907200952917765e-07, + "loss": 22.7134, + "step": 460310 + }, + { + "epoch": 0.9298755237014024, + "grad_norm": 368.3848571777344, + "learning_rate": 1.9062461783483034e-07, + "loss": 13.8993, + "step": 460320 + }, + { + "epoch": 0.9298957243340862, + "grad_norm": 602.7681274414062, + "learning_rate": 1.9052916381819664e-07, + "loss": 21.0812, + "step": 460330 + }, + { + "epoch": 0.92991592496677, + "grad_norm": 1414.91796875, + "learning_rate": 1.904337332423406e-07, + "loss": 36.7006, + "step": 460340 + }, + { + "epoch": 0.9299361255994538, + "grad_norm": 475.8883056640625, + "learning_rate": 1.903383261077274e-07, + "loss": 28.129, + "step": 460350 + }, + { + "epoch": 0.9299563262321376, + "grad_norm": 8.253231048583984, + "learning_rate": 1.9024294241482112e-07, + "loss": 15.3869, + "step": 460360 + }, + { + "epoch": 0.9299765268648214, + "grad_norm": 116.25457000732422, + "learning_rate": 1.9014758216408803e-07, + "loss": 18.6597, + "step": 460370 + }, + { + "epoch": 0.9299967274975052, + "grad_norm": 79.83399963378906, + "learning_rate": 1.900522453559922e-07, + "loss": 16.284, + "step": 460380 + }, + { + "epoch": 0.930016928130189, + "grad_norm": 1928.6744384765625, + "learning_rate": 1.899569319909983e-07, + "loss": 35.3755, + "step": 460390 + }, + { + "epoch": 0.9300371287628728, + "grad_norm": 159.0091094970703, + "learning_rate": 1.8986164206957037e-07, + "loss": 15.8112, + "step": 460400 + }, + { + "epoch": 0.9300573293955566, + "grad_norm": 728.5242919921875, + "learning_rate": 1.897663755921747e-07, + "loss": 25.4743, + "step": 460410 + }, + { + "epoch": 0.9300775300282405, + "grad_norm": 291.23748779296875, + "learning_rate": 1.8967113255927315e-07, + "loss": 17.5053, + "step": 460420 + }, + { + "epoch": 0.9300977306609243, + "grad_norm": 305.28753662109375, + "learning_rate": 1.8957591297133093e-07, + "loss": 10.2362, + "step": 460430 + }, + { + "epoch": 0.9301179312936081, + "grad_norm": 413.1041564941406, + "learning_rate": 1.894807168288132e-07, + "loss": 17.3874, + "step": 460440 + }, + { + "epoch": 0.9301381319262919, + "grad_norm": 93.40693664550781, + "learning_rate": 1.8938554413218292e-07, + "loss": 14.6845, + "step": 460450 + }, + { + "epoch": 0.9301583325589757, + "grad_norm": 265.3533020019531, + "learning_rate": 1.8929039488190304e-07, + "loss": 10.0248, + "step": 460460 + }, + { + "epoch": 0.9301785331916596, + "grad_norm": 1233.372802734375, + "learning_rate": 1.8919526907843876e-07, + "loss": 50.5303, + "step": 460470 + }, + { + "epoch": 0.9301987338243434, + "grad_norm": 11.834299087524414, + "learning_rate": 1.8910016672225418e-07, + "loss": 14.3973, + "step": 460480 + }, + { + "epoch": 0.9302189344570272, + "grad_norm": 288.27978515625, + "learning_rate": 1.8900508781381056e-07, + "loss": 14.8687, + "step": 460490 + }, + { + "epoch": 0.930239135089711, + "grad_norm": 186.37252807617188, + "learning_rate": 1.8891003235357307e-07, + "loss": 18.5956, + "step": 460500 + }, + { + "epoch": 0.9302593357223948, + "grad_norm": 793.5770874023438, + "learning_rate": 1.8881500034200473e-07, + "loss": 15.8723, + "step": 460510 + }, + { + "epoch": 0.9302795363550787, + "grad_norm": 255.04364013671875, + "learning_rate": 1.88719991779569e-07, + "loss": 40.3052, + "step": 460520 + }, + { + "epoch": 0.9302997369877625, + "grad_norm": 5.66730260848999, + "learning_rate": 1.8862500666672778e-07, + "loss": 9.9851, + "step": 460530 + }, + { + "epoch": 0.9303199376204463, + "grad_norm": 385.5716552734375, + "learning_rate": 1.8853004500394512e-07, + "loss": 20.0818, + "step": 460540 + }, + { + "epoch": 0.9303401382531301, + "grad_norm": 10.681656837463379, + "learning_rate": 1.8843510679168341e-07, + "loss": 13.3622, + "step": 460550 + }, + { + "epoch": 0.9303603388858139, + "grad_norm": 0.04340120032429695, + "learning_rate": 1.883401920304051e-07, + "loss": 10.5599, + "step": 460560 + }, + { + "epoch": 0.9303805395184978, + "grad_norm": 323.4036560058594, + "learning_rate": 1.8824530072057369e-07, + "loss": 25.3733, + "step": 460570 + }, + { + "epoch": 0.9304007401511816, + "grad_norm": 161.68948364257812, + "learning_rate": 1.8815043286265044e-07, + "loss": 17.8008, + "step": 460580 + }, + { + "epoch": 0.9304209407838654, + "grad_norm": 98.37507629394531, + "learning_rate": 1.8805558845709894e-07, + "loss": 10.1326, + "step": 460590 + }, + { + "epoch": 0.9304411414165492, + "grad_norm": 1050.3140869140625, + "learning_rate": 1.8796076750438096e-07, + "loss": 29.5644, + "step": 460600 + }, + { + "epoch": 0.930461342049233, + "grad_norm": 4.681258201599121, + "learning_rate": 1.878659700049579e-07, + "loss": 21.6032, + "step": 460610 + }, + { + "epoch": 0.9304815426819169, + "grad_norm": 902.1006469726562, + "learning_rate": 1.8777119595929315e-07, + "loss": 15.341, + "step": 460620 + }, + { + "epoch": 0.9305017433146006, + "grad_norm": 584.0853271484375, + "learning_rate": 1.8767644536784703e-07, + "loss": 16.4857, + "step": 460630 + }, + { + "epoch": 0.9305219439472844, + "grad_norm": 605.7371826171875, + "learning_rate": 1.8758171823108295e-07, + "loss": 13.6837, + "step": 460640 + }, + { + "epoch": 0.9305421445799682, + "grad_norm": 346.3061828613281, + "learning_rate": 1.874870145494617e-07, + "loss": 24.9194, + "step": 460650 + }, + { + "epoch": 0.930562345212652, + "grad_norm": 471.7580261230469, + "learning_rate": 1.8739233432344518e-07, + "loss": 18.2733, + "step": 460660 + }, + { + "epoch": 0.9305825458453358, + "grad_norm": 226.48004150390625, + "learning_rate": 1.8729767755349514e-07, + "loss": 18.7975, + "step": 460670 + }, + { + "epoch": 0.9306027464780197, + "grad_norm": 829.1776733398438, + "learning_rate": 1.872030442400713e-07, + "loss": 28.4234, + "step": 460680 + }, + { + "epoch": 0.9306229471107035, + "grad_norm": 331.8815612792969, + "learning_rate": 1.8710843438363713e-07, + "loss": 36.9377, + "step": 460690 + }, + { + "epoch": 0.9306431477433873, + "grad_norm": 0.0, + "learning_rate": 1.8701384798465284e-07, + "loss": 13.0251, + "step": 460700 + }, + { + "epoch": 0.9306633483760711, + "grad_norm": 418.47802734375, + "learning_rate": 1.8691928504357858e-07, + "loss": 25.0024, + "step": 460710 + }, + { + "epoch": 0.9306835490087549, + "grad_norm": 308.30731201171875, + "learning_rate": 1.868247455608757e-07, + "loss": 10.4745, + "step": 460720 + }, + { + "epoch": 0.9307037496414388, + "grad_norm": 447.0386657714844, + "learning_rate": 1.867302295370066e-07, + "loss": 15.2201, + "step": 460730 + }, + { + "epoch": 0.9307239502741226, + "grad_norm": 355.3406677246094, + "learning_rate": 1.8663573697242977e-07, + "loss": 15.954, + "step": 460740 + }, + { + "epoch": 0.9307441509068064, + "grad_norm": 252.26039123535156, + "learning_rate": 1.8654126786760597e-07, + "loss": 37.0269, + "step": 460750 + }, + { + "epoch": 0.9307643515394902, + "grad_norm": 0.0, + "learning_rate": 1.8644682222299703e-07, + "loss": 9.7798, + "step": 460760 + }, + { + "epoch": 0.930784552172174, + "grad_norm": 65.33013153076172, + "learning_rate": 1.8635240003906264e-07, + "loss": 14.7183, + "step": 460770 + }, + { + "epoch": 0.9308047528048579, + "grad_norm": 246.04112243652344, + "learning_rate": 1.8625800131626236e-07, + "loss": 32.6438, + "step": 460780 + }, + { + "epoch": 0.9308249534375417, + "grad_norm": 92.7275161743164, + "learning_rate": 1.8616362605505645e-07, + "loss": 8.0405, + "step": 460790 + }, + { + "epoch": 0.9308451540702255, + "grad_norm": 412.2235107421875, + "learning_rate": 1.8606927425590616e-07, + "loss": 19.9027, + "step": 460800 + }, + { + "epoch": 0.9308653547029093, + "grad_norm": 122.2205810546875, + "learning_rate": 1.8597494591926946e-07, + "loss": 15.9204, + "step": 460810 + }, + { + "epoch": 0.9308855553355931, + "grad_norm": 530.8219604492188, + "learning_rate": 1.858806410456071e-07, + "loss": 16.7315, + "step": 460820 + }, + { + "epoch": 0.930905755968277, + "grad_norm": 323.177001953125, + "learning_rate": 1.8578635963537926e-07, + "loss": 13.0048, + "step": 460830 + }, + { + "epoch": 0.9309259566009608, + "grad_norm": 388.21942138671875, + "learning_rate": 1.856921016890445e-07, + "loss": 20.7814, + "step": 460840 + }, + { + "epoch": 0.9309461572336446, + "grad_norm": 462.2232971191406, + "learning_rate": 1.8559786720706185e-07, + "loss": 16.8715, + "step": 460850 + }, + { + "epoch": 0.9309663578663284, + "grad_norm": 595.4361572265625, + "learning_rate": 1.8550365618989207e-07, + "loss": 16.301, + "step": 460860 + }, + { + "epoch": 0.9309865584990122, + "grad_norm": 652.5562744140625, + "learning_rate": 1.854094686379937e-07, + "loss": 18.9976, + "step": 460870 + }, + { + "epoch": 0.9310067591316961, + "grad_norm": 611.486328125, + "learning_rate": 1.8531530455182522e-07, + "loss": 14.5472, + "step": 460880 + }, + { + "epoch": 0.9310269597643798, + "grad_norm": 11.874302864074707, + "learning_rate": 1.852211639318463e-07, + "loss": 8.2873, + "step": 460890 + }, + { + "epoch": 0.9310471603970636, + "grad_norm": 143.15005493164062, + "learning_rate": 1.8512704677851489e-07, + "loss": 8.2499, + "step": 460900 + }, + { + "epoch": 0.9310673610297474, + "grad_norm": 231.83935546875, + "learning_rate": 1.8503295309229065e-07, + "loss": 10.2527, + "step": 460910 + }, + { + "epoch": 0.9310875616624312, + "grad_norm": 354.39215087890625, + "learning_rate": 1.8493888287363148e-07, + "loss": 17.5189, + "step": 460920 + }, + { + "epoch": 0.931107762295115, + "grad_norm": 64.90702056884766, + "learning_rate": 1.8484483612299654e-07, + "loss": 23.2972, + "step": 460930 + }, + { + "epoch": 0.9311279629277989, + "grad_norm": 9.676826477050781, + "learning_rate": 1.8475081284084428e-07, + "loss": 13.254, + "step": 460940 + }, + { + "epoch": 0.9311481635604827, + "grad_norm": 393.5041809082031, + "learning_rate": 1.846568130276316e-07, + "loss": 20.8727, + "step": 460950 + }, + { + "epoch": 0.9311683641931665, + "grad_norm": 633.0675659179688, + "learning_rate": 1.8456283668381814e-07, + "loss": 15.9627, + "step": 460960 + }, + { + "epoch": 0.9311885648258503, + "grad_norm": 109.91011047363281, + "learning_rate": 1.8446888380986128e-07, + "loss": 25.1295, + "step": 460970 + }, + { + "epoch": 0.9312087654585341, + "grad_norm": 222.28253173828125, + "learning_rate": 1.84374954406219e-07, + "loss": 21.0881, + "step": 460980 + }, + { + "epoch": 0.931228966091218, + "grad_norm": 410.1101989746094, + "learning_rate": 1.8428104847334927e-07, + "loss": 8.0959, + "step": 460990 + }, + { + "epoch": 0.9312491667239018, + "grad_norm": 428.9569396972656, + "learning_rate": 1.841871660117095e-07, + "loss": 14.7619, + "step": 461000 + }, + { + "epoch": 0.9312693673565856, + "grad_norm": 594.7449340820312, + "learning_rate": 1.8409330702175764e-07, + "loss": 16.7418, + "step": 461010 + }, + { + "epoch": 0.9312895679892694, + "grad_norm": 154.09713745117188, + "learning_rate": 1.8399947150395058e-07, + "loss": 25.5965, + "step": 461020 + }, + { + "epoch": 0.9313097686219532, + "grad_norm": 237.8426055908203, + "learning_rate": 1.8390565945874572e-07, + "loss": 19.8066, + "step": 461030 + }, + { + "epoch": 0.9313299692546371, + "grad_norm": 260.109130859375, + "learning_rate": 1.8381187088660046e-07, + "loss": 18.0444, + "step": 461040 + }, + { + "epoch": 0.9313501698873209, + "grad_norm": 103.0996322631836, + "learning_rate": 1.8371810578797277e-07, + "loss": 12.4614, + "step": 461050 + }, + { + "epoch": 0.9313703705200047, + "grad_norm": 20.84394645690918, + "learning_rate": 1.8362436416331896e-07, + "loss": 12.9066, + "step": 461060 + }, + { + "epoch": 0.9313905711526885, + "grad_norm": 246.29661560058594, + "learning_rate": 1.8353064601309533e-07, + "loss": 12.1289, + "step": 461070 + }, + { + "epoch": 0.9314107717853723, + "grad_norm": 95.33872985839844, + "learning_rate": 1.8343695133775874e-07, + "loss": 25.977, + "step": 461080 + }, + { + "epoch": 0.9314309724180562, + "grad_norm": 189.6843719482422, + "learning_rate": 1.833432801377677e-07, + "loss": 17.1384, + "step": 461090 + }, + { + "epoch": 0.93145117305074, + "grad_norm": 294.8585205078125, + "learning_rate": 1.832496324135763e-07, + "loss": 14.8602, + "step": 461100 + }, + { + "epoch": 0.9314713736834238, + "grad_norm": 403.48773193359375, + "learning_rate": 1.8315600816564137e-07, + "loss": 21.2338, + "step": 461110 + }, + { + "epoch": 0.9314915743161076, + "grad_norm": 142.48876953125, + "learning_rate": 1.8306240739442094e-07, + "loss": 19.6496, + "step": 461120 + }, + { + "epoch": 0.9315117749487914, + "grad_norm": 399.75579833984375, + "learning_rate": 1.8296883010037014e-07, + "loss": 17.4055, + "step": 461130 + }, + { + "epoch": 0.9315319755814752, + "grad_norm": 204.31796264648438, + "learning_rate": 1.8287527628394418e-07, + "loss": 10.361, + "step": 461140 + }, + { + "epoch": 0.931552176214159, + "grad_norm": 174.668701171875, + "learning_rate": 1.827817459456005e-07, + "loss": 11.47, + "step": 461150 + }, + { + "epoch": 0.9315723768468428, + "grad_norm": 326.95880126953125, + "learning_rate": 1.826882390857948e-07, + "loss": 18.0679, + "step": 461160 + }, + { + "epoch": 0.9315925774795266, + "grad_norm": 178.13429260253906, + "learning_rate": 1.825947557049812e-07, + "loss": 22.8237, + "step": 461170 + }, + { + "epoch": 0.9316127781122104, + "grad_norm": 647.4390258789062, + "learning_rate": 1.825012958036171e-07, + "loss": 37.3266, + "step": 461180 + }, + { + "epoch": 0.9316329787448943, + "grad_norm": 589.9456176757812, + "learning_rate": 1.824078593821571e-07, + "loss": 16.358, + "step": 461190 + }, + { + "epoch": 0.9316531793775781, + "grad_norm": 18.983449935913086, + "learning_rate": 1.8231444644105755e-07, + "loss": 16.9788, + "step": 461200 + }, + { + "epoch": 0.9316733800102619, + "grad_norm": 755.7958374023438, + "learning_rate": 1.8222105698077253e-07, + "loss": 25.4583, + "step": 461210 + }, + { + "epoch": 0.9316935806429457, + "grad_norm": 346.2664794921875, + "learning_rate": 1.8212769100175774e-07, + "loss": 24.498, + "step": 461220 + }, + { + "epoch": 0.9317137812756295, + "grad_norm": 390.356201171875, + "learning_rate": 1.8203434850446844e-07, + "loss": 22.6047, + "step": 461230 + }, + { + "epoch": 0.9317339819083134, + "grad_norm": 265.6792297363281, + "learning_rate": 1.819410294893592e-07, + "loss": 29.0851, + "step": 461240 + }, + { + "epoch": 0.9317541825409972, + "grad_norm": 3.6130383014678955, + "learning_rate": 1.8184773395688527e-07, + "loss": 10.6757, + "step": 461250 + }, + { + "epoch": 0.931774383173681, + "grad_norm": 434.55145263671875, + "learning_rate": 1.8175446190750068e-07, + "loss": 17.189, + "step": 461260 + }, + { + "epoch": 0.9317945838063648, + "grad_norm": 693.389404296875, + "learning_rate": 1.816612133416612e-07, + "loss": 14.9552, + "step": 461270 + }, + { + "epoch": 0.9318147844390486, + "grad_norm": 259.1981506347656, + "learning_rate": 1.8156798825982035e-07, + "loss": 9.5073, + "step": 461280 + }, + { + "epoch": 0.9318349850717325, + "grad_norm": 193.0907745361328, + "learning_rate": 1.814747866624317e-07, + "loss": 17.5511, + "step": 461290 + }, + { + "epoch": 0.9318551857044163, + "grad_norm": 376.73968505859375, + "learning_rate": 1.8138160854995145e-07, + "loss": 25.6786, + "step": 461300 + }, + { + "epoch": 0.9318753863371001, + "grad_norm": 282.88330078125, + "learning_rate": 1.8128845392283324e-07, + "loss": 14.7452, + "step": 461310 + }, + { + "epoch": 0.9318955869697839, + "grad_norm": 447.7366027832031, + "learning_rate": 1.8119532278152996e-07, + "loss": 33.4463, + "step": 461320 + }, + { + "epoch": 0.9319157876024677, + "grad_norm": 370.9266662597656, + "learning_rate": 1.8110221512649573e-07, + "loss": 13.9933, + "step": 461330 + }, + { + "epoch": 0.9319359882351516, + "grad_norm": 500.27410888671875, + "learning_rate": 1.8100913095818627e-07, + "loss": 19.469, + "step": 461340 + }, + { + "epoch": 0.9319561888678354, + "grad_norm": 306.4193115234375, + "learning_rate": 1.8091607027705293e-07, + "loss": 15.5763, + "step": 461350 + }, + { + "epoch": 0.9319763895005192, + "grad_norm": 447.197265625, + "learning_rate": 1.8082303308354976e-07, + "loss": 19.6914, + "step": 461360 + }, + { + "epoch": 0.931996590133203, + "grad_norm": 125.23076629638672, + "learning_rate": 1.8073001937813138e-07, + "loss": 16.8651, + "step": 461370 + }, + { + "epoch": 0.9320167907658868, + "grad_norm": 91.94493865966797, + "learning_rate": 1.8063702916125025e-07, + "loss": 9.5325, + "step": 461380 + }, + { + "epoch": 0.9320369913985707, + "grad_norm": 202.62930297851562, + "learning_rate": 1.805440624333593e-07, + "loss": 26.8626, + "step": 461390 + }, + { + "epoch": 0.9320571920312544, + "grad_norm": 514.9032592773438, + "learning_rate": 1.804511191949121e-07, + "loss": 21.1308, + "step": 461400 + }, + { + "epoch": 0.9320773926639382, + "grad_norm": 172.77285766601562, + "learning_rate": 1.8035819944636269e-07, + "loss": 25.8753, + "step": 461410 + }, + { + "epoch": 0.932097593296622, + "grad_norm": 289.017822265625, + "learning_rate": 1.8026530318816183e-07, + "loss": 17.1092, + "step": 461420 + }, + { + "epoch": 0.9321177939293058, + "grad_norm": 6.599430561065674, + "learning_rate": 1.8017243042076304e-07, + "loss": 18.2395, + "step": 461430 + }, + { + "epoch": 0.9321379945619896, + "grad_norm": 280.7936706542969, + "learning_rate": 1.8007958114462042e-07, + "loss": 13.1732, + "step": 461440 + }, + { + "epoch": 0.9321581951946735, + "grad_norm": 894.9324340820312, + "learning_rate": 1.7998675536018474e-07, + "loss": 21.2707, + "step": 461450 + }, + { + "epoch": 0.9321783958273573, + "grad_norm": 80.70378112792969, + "learning_rate": 1.7989395306790835e-07, + "loss": 43.2938, + "step": 461460 + }, + { + "epoch": 0.9321985964600411, + "grad_norm": 301.1913757324219, + "learning_rate": 1.798011742682454e-07, + "loss": 18.9873, + "step": 461470 + }, + { + "epoch": 0.9322187970927249, + "grad_norm": 164.97093200683594, + "learning_rate": 1.7970841896164658e-07, + "loss": 27.1778, + "step": 461480 + }, + { + "epoch": 0.9322389977254087, + "grad_norm": 172.13999938964844, + "learning_rate": 1.7961568714856382e-07, + "loss": 22.4734, + "step": 461490 + }, + { + "epoch": 0.9322591983580926, + "grad_norm": 228.5173797607422, + "learning_rate": 1.7952297882945e-07, + "loss": 35.3497, + "step": 461500 + }, + { + "epoch": 0.9322793989907764, + "grad_norm": 513.8005981445312, + "learning_rate": 1.7943029400475598e-07, + "loss": 19.494, + "step": 461510 + }, + { + "epoch": 0.9322995996234602, + "grad_norm": 236.8550567626953, + "learning_rate": 1.7933763267493465e-07, + "loss": 11.9686, + "step": 461520 + }, + { + "epoch": 0.932319800256144, + "grad_norm": 3.990077018737793, + "learning_rate": 1.7924499484043622e-07, + "loss": 15.6638, + "step": 461530 + }, + { + "epoch": 0.9323400008888278, + "grad_norm": 267.17132568359375, + "learning_rate": 1.7915238050171367e-07, + "loss": 19.3243, + "step": 461540 + }, + { + "epoch": 0.9323602015215117, + "grad_norm": 175.84994506835938, + "learning_rate": 1.7905978965921778e-07, + "loss": 23.0182, + "step": 461550 + }, + { + "epoch": 0.9323804021541955, + "grad_norm": 455.0528259277344, + "learning_rate": 1.7896722231339925e-07, + "loss": 21.7424, + "step": 461560 + }, + { + "epoch": 0.9324006027868793, + "grad_norm": 239.96661376953125, + "learning_rate": 1.788746784647105e-07, + "loss": 13.5123, + "step": 461570 + }, + { + "epoch": 0.9324208034195631, + "grad_norm": 418.77294921875, + "learning_rate": 1.7878215811360068e-07, + "loss": 17.4203, + "step": 461580 + }, + { + "epoch": 0.932441004052247, + "grad_norm": 564.48095703125, + "learning_rate": 1.7868966126052323e-07, + "loss": 9.9037, + "step": 461590 + }, + { + "epoch": 0.9324612046849308, + "grad_norm": 132.60113525390625, + "learning_rate": 1.785971879059273e-07, + "loss": 18.6507, + "step": 461600 + }, + { + "epoch": 0.9324814053176146, + "grad_norm": 358.73638916015625, + "learning_rate": 1.7850473805026304e-07, + "loss": 22.1932, + "step": 461610 + }, + { + "epoch": 0.9325016059502984, + "grad_norm": 105.70510864257812, + "learning_rate": 1.7841231169398287e-07, + "loss": 11.7789, + "step": 461620 + }, + { + "epoch": 0.9325218065829822, + "grad_norm": 426.2632751464844, + "learning_rate": 1.7831990883753592e-07, + "loss": 16.3437, + "step": 461630 + }, + { + "epoch": 0.932542007215666, + "grad_norm": 410.3416748046875, + "learning_rate": 1.7822752948137289e-07, + "loss": 16.3283, + "step": 461640 + }, + { + "epoch": 0.9325622078483498, + "grad_norm": 988.7979736328125, + "learning_rate": 1.7813517362594347e-07, + "loss": 26.9564, + "step": 461650 + }, + { + "epoch": 0.9325824084810336, + "grad_norm": 334.42529296875, + "learning_rate": 1.7804284127169946e-07, + "loss": 8.4331, + "step": 461660 + }, + { + "epoch": 0.9326026091137174, + "grad_norm": 243.21121215820312, + "learning_rate": 1.7795053241908943e-07, + "loss": 10.7253, + "step": 461670 + }, + { + "epoch": 0.9326228097464012, + "grad_norm": 570.7203369140625, + "learning_rate": 1.7785824706856303e-07, + "loss": 12.1912, + "step": 461680 + }, + { + "epoch": 0.932643010379085, + "grad_norm": 534.68994140625, + "learning_rate": 1.7776598522057154e-07, + "loss": 27.0854, + "step": 461690 + }, + { + "epoch": 0.9326632110117689, + "grad_norm": 371.4281921386719, + "learning_rate": 1.7767374687556405e-07, + "loss": 31.2355, + "step": 461700 + }, + { + "epoch": 0.9326834116444527, + "grad_norm": 163.0079803466797, + "learning_rate": 1.7758153203398853e-07, + "loss": 26.3914, + "step": 461710 + }, + { + "epoch": 0.9327036122771365, + "grad_norm": 99.2337875366211, + "learning_rate": 1.774893406962963e-07, + "loss": 22.1929, + "step": 461720 + }, + { + "epoch": 0.9327238129098203, + "grad_norm": 719.2616577148438, + "learning_rate": 1.7739717286293644e-07, + "loss": 15.3715, + "step": 461730 + }, + { + "epoch": 0.9327440135425041, + "grad_norm": 360.2846374511719, + "learning_rate": 1.7730502853435805e-07, + "loss": 13.2564, + "step": 461740 + }, + { + "epoch": 0.932764214175188, + "grad_norm": 130.13404846191406, + "learning_rate": 1.7721290771100964e-07, + "loss": 11.8033, + "step": 461750 + }, + { + "epoch": 0.9327844148078718, + "grad_norm": 130.27134704589844, + "learning_rate": 1.7712081039334083e-07, + "loss": 8.8699, + "step": 461760 + }, + { + "epoch": 0.9328046154405556, + "grad_norm": 40.666603088378906, + "learning_rate": 1.770287365818002e-07, + "loss": 11.4024, + "step": 461770 + }, + { + "epoch": 0.9328248160732394, + "grad_norm": 350.0940856933594, + "learning_rate": 1.7693668627683625e-07, + "loss": 10.8917, + "step": 461780 + }, + { + "epoch": 0.9328450167059232, + "grad_norm": 260.5531921386719, + "learning_rate": 1.7684465947889806e-07, + "loss": 14.8456, + "step": 461790 + }, + { + "epoch": 0.9328652173386071, + "grad_norm": 369.9488525390625, + "learning_rate": 1.7675265618843361e-07, + "loss": 11.4153, + "step": 461800 + }, + { + "epoch": 0.9328854179712909, + "grad_norm": 265.1435241699219, + "learning_rate": 1.7666067640589256e-07, + "loss": 31.7913, + "step": 461810 + }, + { + "epoch": 0.9329056186039747, + "grad_norm": 607.055419921875, + "learning_rate": 1.7656872013172176e-07, + "loss": 19.5087, + "step": 461820 + }, + { + "epoch": 0.9329258192366585, + "grad_norm": 319.4716796875, + "learning_rate": 1.764767873663703e-07, + "loss": 37.0232, + "step": 461830 + }, + { + "epoch": 0.9329460198693423, + "grad_norm": 467.063232421875, + "learning_rate": 1.7638487811028616e-07, + "loss": 17.9041, + "step": 461840 + }, + { + "epoch": 0.9329662205020262, + "grad_norm": 175.1415557861328, + "learning_rate": 1.7629299236391616e-07, + "loss": 15.5899, + "step": 461850 + }, + { + "epoch": 0.93298642113471, + "grad_norm": 1434.3819580078125, + "learning_rate": 1.7620113012771002e-07, + "loss": 23.1167, + "step": 461860 + }, + { + "epoch": 0.9330066217673938, + "grad_norm": 619.7500610351562, + "learning_rate": 1.7610929140211397e-07, + "loss": 27.0966, + "step": 461870 + }, + { + "epoch": 0.9330268224000776, + "grad_norm": 457.38543701171875, + "learning_rate": 1.760174761875766e-07, + "loss": 10.2659, + "step": 461880 + }, + { + "epoch": 0.9330470230327614, + "grad_norm": 312.8133544921875, + "learning_rate": 1.7592568448454528e-07, + "loss": 24.0141, + "step": 461890 + }, + { + "epoch": 0.9330672236654453, + "grad_norm": 113.47441101074219, + "learning_rate": 1.758339162934658e-07, + "loss": 21.0164, + "step": 461900 + }, + { + "epoch": 0.933087424298129, + "grad_norm": 468.087646484375, + "learning_rate": 1.757421716147878e-07, + "loss": 21.749, + "step": 461910 + }, + { + "epoch": 0.9331076249308128, + "grad_norm": 730.168212890625, + "learning_rate": 1.7565045044895756e-07, + "loss": 20.517, + "step": 461920 + }, + { + "epoch": 0.9331278255634966, + "grad_norm": 459.88446044921875, + "learning_rate": 1.7555875279642087e-07, + "loss": 16.8315, + "step": 461930 + }, + { + "epoch": 0.9331480261961804, + "grad_norm": 299.3412780761719, + "learning_rate": 1.754670786576257e-07, + "loss": 11.309, + "step": 461940 + }, + { + "epoch": 0.9331682268288642, + "grad_norm": 255.61477661132812, + "learning_rate": 1.7537542803302e-07, + "loss": 8.1569, + "step": 461950 + }, + { + "epoch": 0.9331884274615481, + "grad_norm": 303.9642028808594, + "learning_rate": 1.7528380092304842e-07, + "loss": 26.4091, + "step": 461960 + }, + { + "epoch": 0.9332086280942319, + "grad_norm": 880.2850341796875, + "learning_rate": 1.751921973281584e-07, + "loss": 30.5622, + "step": 461970 + }, + { + "epoch": 0.9332288287269157, + "grad_norm": 446.8388671875, + "learning_rate": 1.7510061724879678e-07, + "loss": 17.2565, + "step": 461980 + }, + { + "epoch": 0.9332490293595995, + "grad_norm": 195.17286682128906, + "learning_rate": 1.750090606854099e-07, + "loss": 22.5183, + "step": 461990 + }, + { + "epoch": 0.9332692299922833, + "grad_norm": 234.3126983642578, + "learning_rate": 1.7491752763844294e-07, + "loss": 14.1387, + "step": 462000 + }, + { + "epoch": 0.9332894306249672, + "grad_norm": 184.8792724609375, + "learning_rate": 1.7482601810834276e-07, + "loss": 12.4313, + "step": 462010 + }, + { + "epoch": 0.933309631257651, + "grad_norm": 96.02690124511719, + "learning_rate": 1.7473453209555625e-07, + "loss": 14.9553, + "step": 462020 + }, + { + "epoch": 0.9333298318903348, + "grad_norm": 613.595947265625, + "learning_rate": 1.7464306960052746e-07, + "loss": 14.9738, + "step": 462030 + }, + { + "epoch": 0.9333500325230186, + "grad_norm": 386.20684814453125, + "learning_rate": 1.7455163062370273e-07, + "loss": 11.3136, + "step": 462040 + }, + { + "epoch": 0.9333702331557024, + "grad_norm": 408.2582702636719, + "learning_rate": 1.744602151655289e-07, + "loss": 18.7055, + "step": 462050 + }, + { + "epoch": 0.9333904337883863, + "grad_norm": 124.63929748535156, + "learning_rate": 1.743688232264512e-07, + "loss": 8.8279, + "step": 462060 + }, + { + "epoch": 0.9334106344210701, + "grad_norm": 418.02581787109375, + "learning_rate": 1.742774548069137e-07, + "loss": 12.0346, + "step": 462070 + }, + { + "epoch": 0.9334308350537539, + "grad_norm": 400.3048400878906, + "learning_rate": 1.7418610990736273e-07, + "loss": 16.3988, + "step": 462080 + }, + { + "epoch": 0.9334510356864377, + "grad_norm": 161.02871704101562, + "learning_rate": 1.7409478852824402e-07, + "loss": 30.2902, + "step": 462090 + }, + { + "epoch": 0.9334712363191215, + "grad_norm": 321.67498779296875, + "learning_rate": 1.740034906700011e-07, + "loss": 14.7172, + "step": 462100 + }, + { + "epoch": 0.9334914369518054, + "grad_norm": 422.029296875, + "learning_rate": 1.7391221633308032e-07, + "loss": 18.4577, + "step": 462110 + }, + { + "epoch": 0.9335116375844892, + "grad_norm": 616.5631103515625, + "learning_rate": 1.7382096551792572e-07, + "loss": 12.381, + "step": 462120 + }, + { + "epoch": 0.933531838217173, + "grad_norm": 594.563720703125, + "learning_rate": 1.7372973822498252e-07, + "loss": 16.8806, + "step": 462130 + }, + { + "epoch": 0.9335520388498568, + "grad_norm": 6.52410364151001, + "learning_rate": 1.7363853445469482e-07, + "loss": 16.316, + "step": 462140 + }, + { + "epoch": 0.9335722394825406, + "grad_norm": 363.2400817871094, + "learning_rate": 1.7354735420750835e-07, + "loss": 17.689, + "step": 462150 + }, + { + "epoch": 0.9335924401152244, + "grad_norm": 481.3193359375, + "learning_rate": 1.7345619748386666e-07, + "loss": 22.4928, + "step": 462160 + }, + { + "epoch": 0.9336126407479082, + "grad_norm": 216.18240356445312, + "learning_rate": 1.733650642842133e-07, + "loss": 15.536, + "step": 462170 + }, + { + "epoch": 0.933632841380592, + "grad_norm": 661.4515991210938, + "learning_rate": 1.73273954608994e-07, + "loss": 27.2659, + "step": 462180 + }, + { + "epoch": 0.9336530420132758, + "grad_norm": 69.99722290039062, + "learning_rate": 1.7318286845865174e-07, + "loss": 25.4236, + "step": 462190 + }, + { + "epoch": 0.9336732426459596, + "grad_norm": 110.41122436523438, + "learning_rate": 1.7309180583363062e-07, + "loss": 21.1707, + "step": 462200 + }, + { + "epoch": 0.9336934432786435, + "grad_norm": 328.7599792480469, + "learning_rate": 1.7300076673437526e-07, + "loss": 9.7369, + "step": 462210 + }, + { + "epoch": 0.9337136439113273, + "grad_norm": 602.4974975585938, + "learning_rate": 1.7290975116132756e-07, + "loss": 24.9123, + "step": 462220 + }, + { + "epoch": 0.9337338445440111, + "grad_norm": 209.95286560058594, + "learning_rate": 1.728187591149333e-07, + "loss": 12.9717, + "step": 462230 + }, + { + "epoch": 0.9337540451766949, + "grad_norm": 150.35626220703125, + "learning_rate": 1.7272779059563483e-07, + "loss": 22.0588, + "step": 462240 + }, + { + "epoch": 0.9337742458093787, + "grad_norm": 237.76243591308594, + "learning_rate": 1.7263684560387518e-07, + "loss": 19.1027, + "step": 462250 + }, + { + "epoch": 0.9337944464420626, + "grad_norm": 186.2238311767578, + "learning_rate": 1.7254592414009785e-07, + "loss": 16.0267, + "step": 462260 + }, + { + "epoch": 0.9338146470747464, + "grad_norm": 1390.9752197265625, + "learning_rate": 1.7245502620474643e-07, + "loss": 21.0884, + "step": 462270 + }, + { + "epoch": 0.9338348477074302, + "grad_norm": 373.25543212890625, + "learning_rate": 1.7236415179826438e-07, + "loss": 20.0606, + "step": 462280 + }, + { + "epoch": 0.933855048340114, + "grad_norm": 115.53531646728516, + "learning_rate": 1.7227330092109306e-07, + "loss": 18.2338, + "step": 462290 + }, + { + "epoch": 0.9338752489727978, + "grad_norm": 155.42364501953125, + "learning_rate": 1.7218247357367656e-07, + "loss": 15.553, + "step": 462300 + }, + { + "epoch": 0.9338954496054817, + "grad_norm": 14.017396926879883, + "learning_rate": 1.720916697564573e-07, + "loss": 8.9427, + "step": 462310 + }, + { + "epoch": 0.9339156502381655, + "grad_norm": 613.1043701171875, + "learning_rate": 1.7200088946987713e-07, + "loss": 29.1203, + "step": 462320 + }, + { + "epoch": 0.9339358508708493, + "grad_norm": 1064.6912841796875, + "learning_rate": 1.7191013271437908e-07, + "loss": 23.2569, + "step": 462330 + }, + { + "epoch": 0.9339560515035331, + "grad_norm": 644.6098022460938, + "learning_rate": 1.7181939949040606e-07, + "loss": 17.197, + "step": 462340 + }, + { + "epoch": 0.9339762521362169, + "grad_norm": 735.906982421875, + "learning_rate": 1.717286897983994e-07, + "loss": 27.4059, + "step": 462350 + }, + { + "epoch": 0.9339964527689008, + "grad_norm": 194.6940155029297, + "learning_rate": 1.7163800363880102e-07, + "loss": 8.4069, + "step": 462360 + }, + { + "epoch": 0.9340166534015846, + "grad_norm": 446.6946105957031, + "learning_rate": 1.715473410120544e-07, + "loss": 28.9146, + "step": 462370 + }, + { + "epoch": 0.9340368540342684, + "grad_norm": 454.2542419433594, + "learning_rate": 1.7145670191859977e-07, + "loss": 12.4825, + "step": 462380 + }, + { + "epoch": 0.9340570546669522, + "grad_norm": 210.14657592773438, + "learning_rate": 1.7136608635887952e-07, + "loss": 18.0583, + "step": 462390 + }, + { + "epoch": 0.934077255299636, + "grad_norm": 1262.3385009765625, + "learning_rate": 1.7127549433333557e-07, + "loss": 26.256, + "step": 462400 + }, + { + "epoch": 0.9340974559323199, + "grad_norm": 194.8001251220703, + "learning_rate": 1.7118492584240865e-07, + "loss": 15.3081, + "step": 462410 + }, + { + "epoch": 0.9341176565650036, + "grad_norm": 210.71812438964844, + "learning_rate": 1.7109438088654173e-07, + "loss": 14.9052, + "step": 462420 + }, + { + "epoch": 0.9341378571976874, + "grad_norm": 427.1445617675781, + "learning_rate": 1.7100385946617393e-07, + "loss": 20.4259, + "step": 462430 + }, + { + "epoch": 0.9341580578303712, + "grad_norm": 1009.92626953125, + "learning_rate": 1.7091336158174877e-07, + "loss": 22.6844, + "step": 462440 + }, + { + "epoch": 0.934178258463055, + "grad_norm": 234.84991455078125, + "learning_rate": 1.7082288723370587e-07, + "loss": 4.9438, + "step": 462450 + }, + { + "epoch": 0.9341984590957388, + "grad_norm": 272.6453857421875, + "learning_rate": 1.7073243642248605e-07, + "loss": 16.662, + "step": 462460 + }, + { + "epoch": 0.9342186597284227, + "grad_norm": 554.6612548828125, + "learning_rate": 1.7064200914853112e-07, + "loss": 26.6758, + "step": 462470 + }, + { + "epoch": 0.9342388603611065, + "grad_norm": 171.58334350585938, + "learning_rate": 1.7055160541228077e-07, + "loss": 7.0108, + "step": 462480 + }, + { + "epoch": 0.9342590609937903, + "grad_norm": 761.4239501953125, + "learning_rate": 1.7046122521417686e-07, + "loss": 10.5315, + "step": 462490 + }, + { + "epoch": 0.9342792616264741, + "grad_norm": 279.9809875488281, + "learning_rate": 1.7037086855465902e-07, + "loss": 29.0353, + "step": 462500 + }, + { + "epoch": 0.9342994622591579, + "grad_norm": 130.5390625, + "learning_rate": 1.702805354341669e-07, + "loss": 26.2922, + "step": 462510 + }, + { + "epoch": 0.9343196628918418, + "grad_norm": 1399.39794921875, + "learning_rate": 1.7019022585314293e-07, + "loss": 19.9626, + "step": 462520 + }, + { + "epoch": 0.9343398635245256, + "grad_norm": 965.174560546875, + "learning_rate": 1.7009993981202567e-07, + "loss": 26.338, + "step": 462530 + }, + { + "epoch": 0.9343600641572094, + "grad_norm": 168.63499450683594, + "learning_rate": 1.7000967731125472e-07, + "loss": 19.967, + "step": 462540 + }, + { + "epoch": 0.9343802647898932, + "grad_norm": 222.2827911376953, + "learning_rate": 1.699194383512709e-07, + "loss": 7.6039, + "step": 462550 + }, + { + "epoch": 0.934400465422577, + "grad_norm": 277.81793212890625, + "learning_rate": 1.6982922293251548e-07, + "loss": 43.1463, + "step": 462560 + }, + { + "epoch": 0.9344206660552609, + "grad_norm": 207.72872924804688, + "learning_rate": 1.6973903105542533e-07, + "loss": 32.8887, + "step": 462570 + }, + { + "epoch": 0.9344408666879447, + "grad_norm": 741.337158203125, + "learning_rate": 1.6964886272044069e-07, + "loss": 24.0368, + "step": 462580 + }, + { + "epoch": 0.9344610673206285, + "grad_norm": 294.7828369140625, + "learning_rate": 1.6955871792800283e-07, + "loss": 13.5807, + "step": 462590 + }, + { + "epoch": 0.9344812679533123, + "grad_norm": 463.4609680175781, + "learning_rate": 1.6946859667854977e-07, + "loss": 34.091, + "step": 462600 + }, + { + "epoch": 0.9345014685859961, + "grad_norm": 118.23023223876953, + "learning_rate": 1.6937849897252056e-07, + "loss": 8.3565, + "step": 462610 + }, + { + "epoch": 0.93452166921868, + "grad_norm": 76.01818084716797, + "learning_rate": 1.6928842481035436e-07, + "loss": 9.2868, + "step": 462620 + }, + { + "epoch": 0.9345418698513638, + "grad_norm": 468.1135559082031, + "learning_rate": 1.691983741924913e-07, + "loss": 16.4288, + "step": 462630 + }, + { + "epoch": 0.9345620704840476, + "grad_norm": 322.39031982421875, + "learning_rate": 1.6910834711936886e-07, + "loss": 15.4933, + "step": 462640 + }, + { + "epoch": 0.9345822711167314, + "grad_norm": 523.301025390625, + "learning_rate": 1.690183435914261e-07, + "loss": 12.5366, + "step": 462650 + }, + { + "epoch": 0.9346024717494152, + "grad_norm": 577.0751342773438, + "learning_rate": 1.689283636091027e-07, + "loss": 19.5234, + "step": 462660 + }, + { + "epoch": 0.9346226723820991, + "grad_norm": 452.67889404296875, + "learning_rate": 1.688384071728366e-07, + "loss": 31.4996, + "step": 462670 + }, + { + "epoch": 0.9346428730147828, + "grad_norm": 11.344707489013672, + "learning_rate": 1.6874847428306583e-07, + "loss": 20.8121, + "step": 462680 + }, + { + "epoch": 0.9346630736474666, + "grad_norm": 550.50927734375, + "learning_rate": 1.6865856494022892e-07, + "loss": 14.4299, + "step": 462690 + }, + { + "epoch": 0.9346832742801504, + "grad_norm": 397.3772888183594, + "learning_rate": 1.6856867914476492e-07, + "loss": 8.819, + "step": 462700 + }, + { + "epoch": 0.9347034749128342, + "grad_norm": 6.457852840423584, + "learning_rate": 1.684788168971102e-07, + "loss": 12.4718, + "step": 462710 + }, + { + "epoch": 0.934723675545518, + "grad_norm": 464.8440856933594, + "learning_rate": 1.6838897819770438e-07, + "loss": 40.8515, + "step": 462720 + }, + { + "epoch": 0.9347438761782019, + "grad_norm": 2413.400390625, + "learning_rate": 1.682991630469838e-07, + "loss": 42.2716, + "step": 462730 + }, + { + "epoch": 0.9347640768108857, + "grad_norm": 624.918212890625, + "learning_rate": 1.6820937144538807e-07, + "loss": 16.1107, + "step": 462740 + }, + { + "epoch": 0.9347842774435695, + "grad_norm": 635.3636474609375, + "learning_rate": 1.6811960339335298e-07, + "loss": 18.9638, + "step": 462750 + }, + { + "epoch": 0.9348044780762533, + "grad_norm": 595.7736206054688, + "learning_rate": 1.6802985889131762e-07, + "loss": 22.7298, + "step": 462760 + }, + { + "epoch": 0.9348246787089372, + "grad_norm": 233.24156188964844, + "learning_rate": 1.6794013793971887e-07, + "loss": 26.2768, + "step": 462770 + }, + { + "epoch": 0.934844879341621, + "grad_norm": 545.78271484375, + "learning_rate": 1.6785044053899302e-07, + "loss": 15.6134, + "step": 462780 + }, + { + "epoch": 0.9348650799743048, + "grad_norm": 230.40011596679688, + "learning_rate": 1.6776076668957864e-07, + "loss": 16.2018, + "step": 462790 + }, + { + "epoch": 0.9348852806069886, + "grad_norm": 242.9065704345703, + "learning_rate": 1.6767111639191202e-07, + "loss": 24.9229, + "step": 462800 + }, + { + "epoch": 0.9349054812396724, + "grad_norm": 0.0, + "learning_rate": 1.675814896464306e-07, + "loss": 34.967, + "step": 462810 + }, + { + "epoch": 0.9349256818723563, + "grad_norm": 229.31613159179688, + "learning_rate": 1.6749188645357072e-07, + "loss": 11.2219, + "step": 462820 + }, + { + "epoch": 0.9349458825050401, + "grad_norm": 412.9171447753906, + "learning_rate": 1.6740230681376867e-07, + "loss": 35.8329, + "step": 462830 + }, + { + "epoch": 0.9349660831377239, + "grad_norm": 12.56732177734375, + "learning_rate": 1.6731275072746244e-07, + "loss": 20.6205, + "step": 462840 + }, + { + "epoch": 0.9349862837704077, + "grad_norm": 454.1410827636719, + "learning_rate": 1.672232181950878e-07, + "loss": 11.0535, + "step": 462850 + }, + { + "epoch": 0.9350064844030915, + "grad_norm": 283.65576171875, + "learning_rate": 1.6713370921708049e-07, + "loss": 20.9141, + "step": 462860 + }, + { + "epoch": 0.9350266850357754, + "grad_norm": 240.0868682861328, + "learning_rate": 1.6704422379387685e-07, + "loss": 16.6079, + "step": 462870 + }, + { + "epoch": 0.9350468856684592, + "grad_norm": 0.09903652966022491, + "learning_rate": 1.669547619259143e-07, + "loss": 10.4518, + "step": 462880 + }, + { + "epoch": 0.935067086301143, + "grad_norm": 383.5288391113281, + "learning_rate": 1.6686532361362805e-07, + "loss": 14.665, + "step": 462890 + }, + { + "epoch": 0.9350872869338268, + "grad_norm": 133.67431640625, + "learning_rate": 1.6677590885745388e-07, + "loss": 30.3162, + "step": 462900 + }, + { + "epoch": 0.9351074875665106, + "grad_norm": 161.3779296875, + "learning_rate": 1.6668651765782806e-07, + "loss": 27.869, + "step": 462910 + }, + { + "epoch": 0.9351276881991945, + "grad_norm": 499.52880859375, + "learning_rate": 1.6659715001518583e-07, + "loss": 20.6727, + "step": 462920 + }, + { + "epoch": 0.9351478888318782, + "grad_norm": 500.2755126953125, + "learning_rate": 1.665078059299624e-07, + "loss": 11.5379, + "step": 462930 + }, + { + "epoch": 0.935168089464562, + "grad_norm": 345.6598815917969, + "learning_rate": 1.6641848540259353e-07, + "loss": 28.3758, + "step": 462940 + }, + { + "epoch": 0.9351882900972458, + "grad_norm": 91.43672943115234, + "learning_rate": 1.6632918843351554e-07, + "loss": 8.8927, + "step": 462950 + }, + { + "epoch": 0.9352084907299296, + "grad_norm": 75.08840942382812, + "learning_rate": 1.662399150231625e-07, + "loss": 18.547, + "step": 462960 + }, + { + "epoch": 0.9352286913626134, + "grad_norm": 301.5865478515625, + "learning_rate": 1.6615066517196965e-07, + "loss": 21.6605, + "step": 462970 + }, + { + "epoch": 0.9352488919952973, + "grad_norm": 436.82568359375, + "learning_rate": 1.6606143888037219e-07, + "loss": 12.5314, + "step": 462980 + }, + { + "epoch": 0.9352690926279811, + "grad_norm": 358.45697021484375, + "learning_rate": 1.659722361488053e-07, + "loss": 23.522, + "step": 462990 + }, + { + "epoch": 0.9352892932606649, + "grad_norm": 677.9505004882812, + "learning_rate": 1.6588305697770313e-07, + "loss": 23.7936, + "step": 463000 + }, + { + "epoch": 0.9353094938933487, + "grad_norm": 140.39315795898438, + "learning_rate": 1.6579390136750086e-07, + "loss": 30.5268, + "step": 463010 + }, + { + "epoch": 0.9353296945260325, + "grad_norm": 391.122802734375, + "learning_rate": 1.6570476931863256e-07, + "loss": 16.1737, + "step": 463020 + }, + { + "epoch": 0.9353498951587164, + "grad_norm": 328.2913818359375, + "learning_rate": 1.656156608315329e-07, + "loss": 22.6559, + "step": 463030 + }, + { + "epoch": 0.9353700957914002, + "grad_norm": 246.04940795898438, + "learning_rate": 1.65526575906636e-07, + "loss": 13.7928, + "step": 463040 + }, + { + "epoch": 0.935390296424084, + "grad_norm": 124.8285140991211, + "learning_rate": 1.6543751454437708e-07, + "loss": 14.0438, + "step": 463050 + }, + { + "epoch": 0.9354104970567678, + "grad_norm": 247.01405334472656, + "learning_rate": 1.6534847674518905e-07, + "loss": 13.7093, + "step": 463060 + }, + { + "epoch": 0.9354306976894516, + "grad_norm": 403.01171875, + "learning_rate": 1.6525946250950553e-07, + "loss": 14.5217, + "step": 463070 + }, + { + "epoch": 0.9354508983221355, + "grad_norm": 196.5394744873047, + "learning_rate": 1.651704718377617e-07, + "loss": 12.1933, + "step": 463080 + }, + { + "epoch": 0.9354710989548193, + "grad_norm": 820.8240356445312, + "learning_rate": 1.650815047303894e-07, + "loss": 23.4596, + "step": 463090 + }, + { + "epoch": 0.9354912995875031, + "grad_norm": 45.13959503173828, + "learning_rate": 1.6499256118782503e-07, + "loss": 23.7143, + "step": 463100 + }, + { + "epoch": 0.9355115002201869, + "grad_norm": 1102.887451171875, + "learning_rate": 1.6490364121049984e-07, + "loss": 23.7886, + "step": 463110 + }, + { + "epoch": 0.9355317008528707, + "grad_norm": 180.792724609375, + "learning_rate": 1.648147447988474e-07, + "loss": 19.873, + "step": 463120 + }, + { + "epoch": 0.9355519014855546, + "grad_norm": 553.9746704101562, + "learning_rate": 1.6472587195330236e-07, + "loss": 27.4306, + "step": 463130 + }, + { + "epoch": 0.9355721021182384, + "grad_norm": 309.197509765625, + "learning_rate": 1.6463702267429659e-07, + "loss": 15.8393, + "step": 463140 + }, + { + "epoch": 0.9355923027509222, + "grad_norm": 319.75726318359375, + "learning_rate": 1.645481969622631e-07, + "loss": 20.5699, + "step": 463150 + }, + { + "epoch": 0.935612503383606, + "grad_norm": 260.9181213378906, + "learning_rate": 1.644593948176354e-07, + "loss": 23.9048, + "step": 463160 + }, + { + "epoch": 0.9356327040162898, + "grad_norm": 283.957763671875, + "learning_rate": 1.6437061624084704e-07, + "loss": 15.1584, + "step": 463170 + }, + { + "epoch": 0.9356529046489737, + "grad_norm": 216.19720458984375, + "learning_rate": 1.6428186123232826e-07, + "loss": 14.2853, + "step": 463180 + }, + { + "epoch": 0.9356731052816574, + "grad_norm": 282.28033447265625, + "learning_rate": 1.6419312979251368e-07, + "loss": 26.9816, + "step": 463190 + }, + { + "epoch": 0.9356933059143412, + "grad_norm": 547.8955688476562, + "learning_rate": 1.6410442192183574e-07, + "loss": 13.523, + "step": 463200 + }, + { + "epoch": 0.935713506547025, + "grad_norm": 317.2925720214844, + "learning_rate": 1.6401573762072631e-07, + "loss": 10.4762, + "step": 463210 + }, + { + "epoch": 0.9357337071797088, + "grad_norm": 226.99322509765625, + "learning_rate": 1.6392707688961728e-07, + "loss": 17.5634, + "step": 463220 + }, + { + "epoch": 0.9357539078123926, + "grad_norm": 104.4507064819336, + "learning_rate": 1.638384397289411e-07, + "loss": 22.4312, + "step": 463230 + }, + { + "epoch": 0.9357741084450765, + "grad_norm": 216.3778533935547, + "learning_rate": 1.6374982613913072e-07, + "loss": 17.19, + "step": 463240 + }, + { + "epoch": 0.9357943090777603, + "grad_norm": 299.8779296875, + "learning_rate": 1.6366123612061636e-07, + "loss": 26.81, + "step": 463250 + }, + { + "epoch": 0.9358145097104441, + "grad_norm": 167.1317596435547, + "learning_rate": 1.635726696738299e-07, + "loss": 19.6392, + "step": 463260 + }, + { + "epoch": 0.9358347103431279, + "grad_norm": 415.87091064453125, + "learning_rate": 1.6348412679920488e-07, + "loss": 10.1537, + "step": 463270 + }, + { + "epoch": 0.9358549109758117, + "grad_norm": 165.0482177734375, + "learning_rate": 1.6339560749717154e-07, + "loss": 9.7739, + "step": 463280 + }, + { + "epoch": 0.9358751116084956, + "grad_norm": 236.4598388671875, + "learning_rate": 1.633071117681606e-07, + "loss": 14.7218, + "step": 463290 + }, + { + "epoch": 0.9358953122411794, + "grad_norm": 644.8948974609375, + "learning_rate": 1.6321863961260452e-07, + "loss": 18.5992, + "step": 463300 + }, + { + "epoch": 0.9359155128738632, + "grad_norm": 419.4942626953125, + "learning_rate": 1.6313019103093463e-07, + "loss": 16.4838, + "step": 463310 + }, + { + "epoch": 0.935935713506547, + "grad_norm": 219.58193969726562, + "learning_rate": 1.6304176602358056e-07, + "loss": 13.8679, + "step": 463320 + }, + { + "epoch": 0.9359559141392308, + "grad_norm": 338.3202819824219, + "learning_rate": 1.6295336459097532e-07, + "loss": 15.4373, + "step": 463330 + }, + { + "epoch": 0.9359761147719147, + "grad_norm": 377.089111328125, + "learning_rate": 1.62864986733548e-07, + "loss": 14.0342, + "step": 463340 + }, + { + "epoch": 0.9359963154045985, + "grad_norm": 299.72210693359375, + "learning_rate": 1.6277663245173047e-07, + "loss": 11.9394, + "step": 463350 + }, + { + "epoch": 0.9360165160372823, + "grad_norm": 290.8778076171875, + "learning_rate": 1.6268830174595242e-07, + "loss": 14.5878, + "step": 463360 + }, + { + "epoch": 0.9360367166699661, + "grad_norm": 491.1105651855469, + "learning_rate": 1.6259999461664567e-07, + "loss": 23.7471, + "step": 463370 + }, + { + "epoch": 0.93605691730265, + "grad_norm": 42.898990631103516, + "learning_rate": 1.6251171106423935e-07, + "loss": 15.818, + "step": 463380 + }, + { + "epoch": 0.9360771179353338, + "grad_norm": 205.88906860351562, + "learning_rate": 1.6242345108916424e-07, + "loss": 13.1447, + "step": 463390 + }, + { + "epoch": 0.9360973185680176, + "grad_norm": 438.9242248535156, + "learning_rate": 1.6233521469185054e-07, + "loss": 22.0241, + "step": 463400 + }, + { + "epoch": 0.9361175192007014, + "grad_norm": 303.1636962890625, + "learning_rate": 1.6224700187272792e-07, + "loss": 18.267, + "step": 463410 + }, + { + "epoch": 0.9361377198333852, + "grad_norm": 295.252685546875, + "learning_rate": 1.621588126322271e-07, + "loss": 13.4704, + "step": 463420 + }, + { + "epoch": 0.936157920466069, + "grad_norm": 1018.2053833007812, + "learning_rate": 1.620706469707778e-07, + "loss": 39.882, + "step": 463430 + }, + { + "epoch": 0.9361781210987528, + "grad_norm": 461.65521240234375, + "learning_rate": 1.619825048888085e-07, + "loss": 12.3608, + "step": 463440 + }, + { + "epoch": 0.9361983217314366, + "grad_norm": 297.8987121582031, + "learning_rate": 1.618943863867506e-07, + "loss": 23.6289, + "step": 463450 + }, + { + "epoch": 0.9362185223641204, + "grad_norm": 34.65291213989258, + "learning_rate": 1.6180629146503256e-07, + "loss": 10.5408, + "step": 463460 + }, + { + "epoch": 0.9362387229968042, + "grad_norm": 454.8813781738281, + "learning_rate": 1.61718220124083e-07, + "loss": 11.0426, + "step": 463470 + }, + { + "epoch": 0.936258923629488, + "grad_norm": 0.6589401960372925, + "learning_rate": 1.6163017236433265e-07, + "loss": 17.0109, + "step": 463480 + }, + { + "epoch": 0.9362791242621719, + "grad_norm": 688.1834106445312, + "learning_rate": 1.6154214818621007e-07, + "loss": 21.8056, + "step": 463490 + }, + { + "epoch": 0.9362993248948557, + "grad_norm": 224.41502380371094, + "learning_rate": 1.6145414759014433e-07, + "loss": 13.6119, + "step": 463500 + }, + { + "epoch": 0.9363195255275395, + "grad_norm": 252.64492797851562, + "learning_rate": 1.6136617057656344e-07, + "loss": 12.8251, + "step": 463510 + }, + { + "epoch": 0.9363397261602233, + "grad_norm": 197.5618896484375, + "learning_rate": 1.6127821714589763e-07, + "loss": 14.6395, + "step": 463520 + }, + { + "epoch": 0.9363599267929071, + "grad_norm": 646.2914428710938, + "learning_rate": 1.6119028729857545e-07, + "loss": 14.4647, + "step": 463530 + }, + { + "epoch": 0.936380127425591, + "grad_norm": 217.630126953125, + "learning_rate": 1.6110238103502374e-07, + "loss": 21.2792, + "step": 463540 + }, + { + "epoch": 0.9364003280582748, + "grad_norm": 221.92062377929688, + "learning_rate": 1.6101449835567273e-07, + "loss": 11.9105, + "step": 463550 + }, + { + "epoch": 0.9364205286909586, + "grad_norm": 686.2666015625, + "learning_rate": 1.6092663926094987e-07, + "loss": 21.9448, + "step": 463560 + }, + { + "epoch": 0.9364407293236424, + "grad_norm": 395.3066101074219, + "learning_rate": 1.6083880375128424e-07, + "loss": 17.1408, + "step": 463570 + }, + { + "epoch": 0.9364609299563262, + "grad_norm": 663.0675659179688, + "learning_rate": 1.6075099182710274e-07, + "loss": 34.2614, + "step": 463580 + }, + { + "epoch": 0.9364811305890101, + "grad_norm": 228.4241943359375, + "learning_rate": 1.6066320348883448e-07, + "loss": 20.2467, + "step": 463590 + }, + { + "epoch": 0.9365013312216939, + "grad_norm": 198.4999237060547, + "learning_rate": 1.6057543873690685e-07, + "loss": 11.6937, + "step": 463600 + }, + { + "epoch": 0.9365215318543777, + "grad_norm": 1286.03173828125, + "learning_rate": 1.604876975717473e-07, + "loss": 17.0578, + "step": 463610 + }, + { + "epoch": 0.9365417324870615, + "grad_norm": 179.70726013183594, + "learning_rate": 1.6039997999378388e-07, + "loss": 9.2406, + "step": 463620 + }, + { + "epoch": 0.9365619331197453, + "grad_norm": 201.22686767578125, + "learning_rate": 1.603122860034434e-07, + "loss": 18.0075, + "step": 463630 + }, + { + "epoch": 0.9365821337524292, + "grad_norm": 345.80712890625, + "learning_rate": 1.6022461560115498e-07, + "loss": 22.7832, + "step": 463640 + }, + { + "epoch": 0.936602334385113, + "grad_norm": 495.47772216796875, + "learning_rate": 1.6013696878734385e-07, + "loss": 10.4317, + "step": 463650 + }, + { + "epoch": 0.9366225350177968, + "grad_norm": 253.85330200195312, + "learning_rate": 1.6004934556243857e-07, + "loss": 8.4572, + "step": 463660 + }, + { + "epoch": 0.9366427356504806, + "grad_norm": 395.35296630859375, + "learning_rate": 1.5996174592686598e-07, + "loss": 31.3012, + "step": 463670 + }, + { + "epoch": 0.9366629362831644, + "grad_norm": 491.93597412109375, + "learning_rate": 1.5987416988105188e-07, + "loss": 25.3924, + "step": 463680 + }, + { + "epoch": 0.9366831369158483, + "grad_norm": 1044.8165283203125, + "learning_rate": 1.5978661742542477e-07, + "loss": 23.4883, + "step": 463690 + }, + { + "epoch": 0.936703337548532, + "grad_norm": 443.22705078125, + "learning_rate": 1.596990885604105e-07, + "loss": 12.8588, + "step": 463700 + }, + { + "epoch": 0.9367235381812158, + "grad_norm": 350.1383972167969, + "learning_rate": 1.596115832864359e-07, + "loss": 35.5491, + "step": 463710 + }, + { + "epoch": 0.9367437388138996, + "grad_norm": 461.406005859375, + "learning_rate": 1.5952410160392784e-07, + "loss": 23.4766, + "step": 463720 + }, + { + "epoch": 0.9367639394465834, + "grad_norm": 426.9395751953125, + "learning_rate": 1.59436643513311e-07, + "loss": 31.4879, + "step": 463730 + }, + { + "epoch": 0.9367841400792672, + "grad_norm": 142.09671020507812, + "learning_rate": 1.5934920901501395e-07, + "loss": 12.2829, + "step": 463740 + }, + { + "epoch": 0.9368043407119511, + "grad_norm": 467.48260498046875, + "learning_rate": 1.5926179810946185e-07, + "loss": 20.8934, + "step": 463750 + }, + { + "epoch": 0.9368245413446349, + "grad_norm": 128.65975952148438, + "learning_rate": 1.5917441079707942e-07, + "loss": 17.7847, + "step": 463760 + }, + { + "epoch": 0.9368447419773187, + "grad_norm": 333.0611267089844, + "learning_rate": 1.5908704707829458e-07, + "loss": 10.7977, + "step": 463770 + }, + { + "epoch": 0.9368649426100025, + "grad_norm": 593.1852416992188, + "learning_rate": 1.5899970695353262e-07, + "loss": 17.1089, + "step": 463780 + }, + { + "epoch": 0.9368851432426863, + "grad_norm": 347.4171142578125, + "learning_rate": 1.5891239042321871e-07, + "loss": 9.5736, + "step": 463790 + }, + { + "epoch": 0.9369053438753702, + "grad_norm": 362.99835205078125, + "learning_rate": 1.5882509748777809e-07, + "loss": 23.4353, + "step": 463800 + }, + { + "epoch": 0.936925544508054, + "grad_norm": 219.54135131835938, + "learning_rate": 1.5873782814763762e-07, + "loss": 38.4928, + "step": 463810 + }, + { + "epoch": 0.9369457451407378, + "grad_norm": 166.92816162109375, + "learning_rate": 1.586505824032214e-07, + "loss": 27.4917, + "step": 463820 + }, + { + "epoch": 0.9369659457734216, + "grad_norm": 479.5160827636719, + "learning_rate": 1.5856336025495466e-07, + "loss": 9.695, + "step": 463830 + }, + { + "epoch": 0.9369861464061054, + "grad_norm": 526.8687133789062, + "learning_rate": 1.5847616170326318e-07, + "loss": 24.6402, + "step": 463840 + }, + { + "epoch": 0.9370063470387893, + "grad_norm": 471.7763366699219, + "learning_rate": 1.5838898674857273e-07, + "loss": 13.5469, + "step": 463850 + }, + { + "epoch": 0.9370265476714731, + "grad_norm": 414.0970458984375, + "learning_rate": 1.5830183539130574e-07, + "loss": 11.267, + "step": 463860 + }, + { + "epoch": 0.9370467483041569, + "grad_norm": 319.959716796875, + "learning_rate": 1.582147076318885e-07, + "loss": 13.8998, + "step": 463870 + }, + { + "epoch": 0.9370669489368407, + "grad_norm": 175.54379272460938, + "learning_rate": 1.581276034707463e-07, + "loss": 20.5544, + "step": 463880 + }, + { + "epoch": 0.9370871495695245, + "grad_norm": 416.506591796875, + "learning_rate": 1.5804052290830262e-07, + "loss": 16.798, + "step": 463890 + }, + { + "epoch": 0.9371073502022084, + "grad_norm": 728.6934814453125, + "learning_rate": 1.5795346594498162e-07, + "loss": 19.4924, + "step": 463900 + }, + { + "epoch": 0.9371275508348922, + "grad_norm": 333.4419250488281, + "learning_rate": 1.5786643258120905e-07, + "loss": 18.2137, + "step": 463910 + }, + { + "epoch": 0.937147751467576, + "grad_norm": 11.038262367248535, + "learning_rate": 1.5777942281740789e-07, + "loss": 18.4305, + "step": 463920 + }, + { + "epoch": 0.9371679521002598, + "grad_norm": 327.39752197265625, + "learning_rate": 1.5769243665400224e-07, + "loss": 28.4707, + "step": 463930 + }, + { + "epoch": 0.9371881527329436, + "grad_norm": 425.0711364746094, + "learning_rate": 1.5760547409141626e-07, + "loss": 19.2452, + "step": 463940 + }, + { + "epoch": 0.9372083533656275, + "grad_norm": 120.11178588867188, + "learning_rate": 1.5751853513007454e-07, + "loss": 12.1289, + "step": 463950 + }, + { + "epoch": 0.9372285539983112, + "grad_norm": 14.883577346801758, + "learning_rate": 1.5743161977039954e-07, + "loss": 17.47, + "step": 463960 + }, + { + "epoch": 0.937248754630995, + "grad_norm": 333.3983154296875, + "learning_rate": 1.5734472801281543e-07, + "loss": 15.0493, + "step": 463970 + }, + { + "epoch": 0.9372689552636788, + "grad_norm": 384.79547119140625, + "learning_rate": 1.5725785985774623e-07, + "loss": 17.9263, + "step": 463980 + }, + { + "epoch": 0.9372891558963626, + "grad_norm": 323.10943603515625, + "learning_rate": 1.5717101530561497e-07, + "loss": 10.0074, + "step": 463990 + }, + { + "epoch": 0.9373093565290465, + "grad_norm": 400.6250305175781, + "learning_rate": 1.5708419435684463e-07, + "loss": 21.9798, + "step": 464000 + }, + { + "epoch": 0.9373295571617303, + "grad_norm": 287.2420349121094, + "learning_rate": 1.5699739701185878e-07, + "loss": 40.5435, + "step": 464010 + }, + { + "epoch": 0.9373497577944141, + "grad_norm": 474.51348876953125, + "learning_rate": 1.5691062327107932e-07, + "loss": 13.2892, + "step": 464020 + }, + { + "epoch": 0.9373699584270979, + "grad_norm": 319.4916687011719, + "learning_rate": 1.5682387313493086e-07, + "loss": 15.7933, + "step": 464030 + }, + { + "epoch": 0.9373901590597817, + "grad_norm": 614.4664306640625, + "learning_rate": 1.5673714660383532e-07, + "loss": 17.009, + "step": 464040 + }, + { + "epoch": 0.9374103596924656, + "grad_norm": 623.9253540039062, + "learning_rate": 1.5665044367821513e-07, + "loss": 17.0484, + "step": 464050 + }, + { + "epoch": 0.9374305603251494, + "grad_norm": 486.2925720214844, + "learning_rate": 1.5656376435849385e-07, + "loss": 24.1836, + "step": 464060 + }, + { + "epoch": 0.9374507609578332, + "grad_norm": 343.29730224609375, + "learning_rate": 1.5647710864509336e-07, + "loss": 18.7584, + "step": 464070 + }, + { + "epoch": 0.937470961590517, + "grad_norm": 500.9264221191406, + "learning_rate": 1.5639047653843554e-07, + "loss": 15.1472, + "step": 464080 + }, + { + "epoch": 0.9374911622232008, + "grad_norm": 171.4511260986328, + "learning_rate": 1.563038680389428e-07, + "loss": 10.5716, + "step": 464090 + }, + { + "epoch": 0.9375113628558847, + "grad_norm": 52.38894271850586, + "learning_rate": 1.5621728314703822e-07, + "loss": 17.9345, + "step": 464100 + }, + { + "epoch": 0.9375315634885685, + "grad_norm": 151.2301788330078, + "learning_rate": 1.5613072186314304e-07, + "loss": 13.4225, + "step": 464110 + }, + { + "epoch": 0.9375517641212523, + "grad_norm": 661.763427734375, + "learning_rate": 1.560441841876792e-07, + "loss": 24.8922, + "step": 464120 + }, + { + "epoch": 0.9375719647539361, + "grad_norm": 323.34417724609375, + "learning_rate": 1.5595767012106856e-07, + "loss": 20.4487, + "step": 464130 + }, + { + "epoch": 0.9375921653866199, + "grad_norm": 433.3529052734375, + "learning_rate": 1.5587117966373244e-07, + "loss": 10.2313, + "step": 464140 + }, + { + "epoch": 0.9376123660193038, + "grad_norm": 414.6815185546875, + "learning_rate": 1.5578471281609274e-07, + "loss": 17.6942, + "step": 464150 + }, + { + "epoch": 0.9376325666519876, + "grad_norm": 320.7261047363281, + "learning_rate": 1.5569826957857027e-07, + "loss": 18.6855, + "step": 464160 + }, + { + "epoch": 0.9376527672846714, + "grad_norm": 142.53854370117188, + "learning_rate": 1.556118499515885e-07, + "loss": 19.6049, + "step": 464170 + }, + { + "epoch": 0.9376729679173552, + "grad_norm": 573.0814819335938, + "learning_rate": 1.555254539355655e-07, + "loss": 11.8, + "step": 464180 + }, + { + "epoch": 0.937693168550039, + "grad_norm": 382.5942687988281, + "learning_rate": 1.5543908153092424e-07, + "loss": 28.2042, + "step": 464190 + }, + { + "epoch": 0.9377133691827229, + "grad_norm": 489.61602783203125, + "learning_rate": 1.553527327380855e-07, + "loss": 27.2857, + "step": 464200 + }, + { + "epoch": 0.9377335698154066, + "grad_norm": 398.268310546875, + "learning_rate": 1.5526640755747003e-07, + "loss": 18.6053, + "step": 464210 + }, + { + "epoch": 0.9377537704480904, + "grad_norm": 689.2642822265625, + "learning_rate": 1.5518010598949807e-07, + "loss": 17.124, + "step": 464220 + }, + { + "epoch": 0.9377739710807742, + "grad_norm": 391.16351318359375, + "learning_rate": 1.5509382803459149e-07, + "loss": 16.5513, + "step": 464230 + }, + { + "epoch": 0.937794171713458, + "grad_norm": 324.4962463378906, + "learning_rate": 1.5500757369316888e-07, + "loss": 26.283, + "step": 464240 + }, + { + "epoch": 0.9378143723461418, + "grad_norm": 243.7255859375, + "learning_rate": 1.5492134296565264e-07, + "loss": 54.337, + "step": 464250 + }, + { + "epoch": 0.9378345729788257, + "grad_norm": 180.85537719726562, + "learning_rate": 1.5483513585246135e-07, + "loss": 19.1971, + "step": 464260 + }, + { + "epoch": 0.9378547736115095, + "grad_norm": 870.2200927734375, + "learning_rate": 1.5474895235401688e-07, + "loss": 21.2145, + "step": 464270 + }, + { + "epoch": 0.9378749742441933, + "grad_norm": 397.47149658203125, + "learning_rate": 1.546627924707378e-07, + "loss": 19.5186, + "step": 464280 + }, + { + "epoch": 0.9378951748768771, + "grad_norm": 120.85033416748047, + "learning_rate": 1.545766562030443e-07, + "loss": 21.5627, + "step": 464290 + }, + { + "epoch": 0.937915375509561, + "grad_norm": 370.54620361328125, + "learning_rate": 1.5449054355135718e-07, + "loss": 20.4738, + "step": 464300 + }, + { + "epoch": 0.9379355761422448, + "grad_norm": 237.71800231933594, + "learning_rate": 1.54404454516095e-07, + "loss": 21.7886, + "step": 464310 + }, + { + "epoch": 0.9379557767749286, + "grad_norm": 425.63330078125, + "learning_rate": 1.5431838909767793e-07, + "loss": 18.739, + "step": 464320 + }, + { + "epoch": 0.9379759774076124, + "grad_norm": 454.5373840332031, + "learning_rate": 1.542323472965257e-07, + "loss": 15.3442, + "step": 464330 + }, + { + "epoch": 0.9379961780402962, + "grad_norm": 304.43927001953125, + "learning_rate": 1.5414632911305683e-07, + "loss": 18.2253, + "step": 464340 + }, + { + "epoch": 0.93801637867298, + "grad_norm": 516.6799926757812, + "learning_rate": 1.5406033454769154e-07, + "loss": 11.4711, + "step": 464350 + }, + { + "epoch": 0.9380365793056639, + "grad_norm": 366.4046325683594, + "learning_rate": 1.5397436360084784e-07, + "loss": 18.1866, + "step": 464360 + }, + { + "epoch": 0.9380567799383477, + "grad_norm": 405.16986083984375, + "learning_rate": 1.5388841627294536e-07, + "loss": 25.2115, + "step": 464370 + }, + { + "epoch": 0.9380769805710315, + "grad_norm": 702.1383666992188, + "learning_rate": 1.5380249256440272e-07, + "loss": 23.2144, + "step": 464380 + }, + { + "epoch": 0.9380971812037153, + "grad_norm": 471.3887939453125, + "learning_rate": 1.5371659247564063e-07, + "loss": 19.3548, + "step": 464390 + }, + { + "epoch": 0.9381173818363991, + "grad_norm": 681.3546752929688, + "learning_rate": 1.5363071600707435e-07, + "loss": 18.2053, + "step": 464400 + }, + { + "epoch": 0.938137582469083, + "grad_norm": 247.7911834716797, + "learning_rate": 1.5354486315912408e-07, + "loss": 18.0673, + "step": 464410 + }, + { + "epoch": 0.9381577831017668, + "grad_norm": 511.078125, + "learning_rate": 1.534590339322095e-07, + "loss": 12.9879, + "step": 464420 + }, + { + "epoch": 0.9381779837344506, + "grad_norm": 401.2061767578125, + "learning_rate": 1.533732283267475e-07, + "loss": 24.2374, + "step": 464430 + }, + { + "epoch": 0.9381981843671344, + "grad_norm": 348.04296875, + "learning_rate": 1.532874463431555e-07, + "loss": 10.0772, + "step": 464440 + }, + { + "epoch": 0.9382183849998182, + "grad_norm": 193.95880126953125, + "learning_rate": 1.532016879818532e-07, + "loss": 17.9981, + "step": 464450 + }, + { + "epoch": 0.9382385856325021, + "grad_norm": 133.47679138183594, + "learning_rate": 1.5311595324325912e-07, + "loss": 14.5344, + "step": 464460 + }, + { + "epoch": 0.9382587862651858, + "grad_norm": 349.5655212402344, + "learning_rate": 1.5303024212778905e-07, + "loss": 25.1919, + "step": 464470 + }, + { + "epoch": 0.9382789868978696, + "grad_norm": 495.6358947753906, + "learning_rate": 1.5294455463586157e-07, + "loss": 11.76, + "step": 464480 + }, + { + "epoch": 0.9382991875305534, + "grad_norm": 852.6415405273438, + "learning_rate": 1.528588907678946e-07, + "loss": 22.6186, + "step": 464490 + }, + { + "epoch": 0.9383193881632372, + "grad_norm": 338.9753723144531, + "learning_rate": 1.5277325052430569e-07, + "loss": 10.316, + "step": 464500 + }, + { + "epoch": 0.938339588795921, + "grad_norm": 182.22496032714844, + "learning_rate": 1.5268763390551167e-07, + "loss": 8.8562, + "step": 464510 + }, + { + "epoch": 0.9383597894286049, + "grad_norm": 697.7536010742188, + "learning_rate": 1.526020409119311e-07, + "loss": 16.601, + "step": 464520 + }, + { + "epoch": 0.9383799900612887, + "grad_norm": 471.80670166015625, + "learning_rate": 1.5251647154397975e-07, + "loss": 17.9513, + "step": 464530 + }, + { + "epoch": 0.9384001906939725, + "grad_norm": 438.54632568359375, + "learning_rate": 1.5243092580207507e-07, + "loss": 21.5101, + "step": 464540 + }, + { + "epoch": 0.9384203913266563, + "grad_norm": 160.27978515625, + "learning_rate": 1.5234540368663343e-07, + "loss": 13.6464, + "step": 464550 + }, + { + "epoch": 0.9384405919593402, + "grad_norm": 483.20751953125, + "learning_rate": 1.5225990519807332e-07, + "loss": 12.8111, + "step": 464560 + }, + { + "epoch": 0.938460792592024, + "grad_norm": 539.7047119140625, + "learning_rate": 1.5217443033681058e-07, + "loss": 20.0048, + "step": 464570 + }, + { + "epoch": 0.9384809932247078, + "grad_norm": 290.4250793457031, + "learning_rate": 1.5208897910326092e-07, + "loss": 13.4598, + "step": 464580 + }, + { + "epoch": 0.9385011938573916, + "grad_norm": 410.6996765136719, + "learning_rate": 1.520035514978424e-07, + "loss": 14.0252, + "step": 464590 + }, + { + "epoch": 0.9385213944900754, + "grad_norm": 470.7203369140625, + "learning_rate": 1.5191814752097024e-07, + "loss": 10.0053, + "step": 464600 + }, + { + "epoch": 0.9385415951227593, + "grad_norm": 427.7999572753906, + "learning_rate": 1.5183276717306072e-07, + "loss": 16.5089, + "step": 464610 + }, + { + "epoch": 0.9385617957554431, + "grad_norm": 862.2379150390625, + "learning_rate": 1.517474104545308e-07, + "loss": 13.5312, + "step": 464620 + }, + { + "epoch": 0.9385819963881269, + "grad_norm": 554.513916015625, + "learning_rate": 1.5166207736579564e-07, + "loss": 24.5063, + "step": 464630 + }, + { + "epoch": 0.9386021970208107, + "grad_norm": 620.202880859375, + "learning_rate": 1.515767679072716e-07, + "loss": 13.7802, + "step": 464640 + }, + { + "epoch": 0.9386223976534945, + "grad_norm": 556.294921875, + "learning_rate": 1.5149148207937447e-07, + "loss": 20.9563, + "step": 464650 + }, + { + "epoch": 0.9386425982861784, + "grad_norm": 317.4458923339844, + "learning_rate": 1.5140621988251947e-07, + "loss": 5.6833, + "step": 464660 + }, + { + "epoch": 0.9386627989188622, + "grad_norm": 381.5221252441406, + "learning_rate": 1.513209813171229e-07, + "loss": 15.0624, + "step": 464670 + }, + { + "epoch": 0.938682999551546, + "grad_norm": 317.9993591308594, + "learning_rate": 1.5123576638360004e-07, + "loss": 16.5072, + "step": 464680 + }, + { + "epoch": 0.9387032001842298, + "grad_norm": 118.84877014160156, + "learning_rate": 1.5115057508236498e-07, + "loss": 18.7588, + "step": 464690 + }, + { + "epoch": 0.9387234008169136, + "grad_norm": 1289.9464111328125, + "learning_rate": 1.5106540741383402e-07, + "loss": 25.8103, + "step": 464700 + }, + { + "epoch": 0.9387436014495975, + "grad_norm": 153.5814666748047, + "learning_rate": 1.5098026337842297e-07, + "loss": 24.5178, + "step": 464710 + }, + { + "epoch": 0.9387638020822812, + "grad_norm": 646.8268432617188, + "learning_rate": 1.5089514297654594e-07, + "loss": 22.356, + "step": 464720 + }, + { + "epoch": 0.938784002714965, + "grad_norm": 461.1291809082031, + "learning_rate": 1.5081004620861706e-07, + "loss": 18.6647, + "step": 464730 + }, + { + "epoch": 0.9388042033476488, + "grad_norm": 489.087890625, + "learning_rate": 1.5072497307505263e-07, + "loss": 17.1713, + "step": 464740 + }, + { + "epoch": 0.9388244039803326, + "grad_norm": 315.9326171875, + "learning_rate": 1.5063992357626623e-07, + "loss": 35.0541, + "step": 464750 + }, + { + "epoch": 0.9388446046130164, + "grad_norm": 529.4815673828125, + "learning_rate": 1.5055489771267252e-07, + "loss": 24.0619, + "step": 464760 + }, + { + "epoch": 0.9388648052457003, + "grad_norm": 470.8222961425781, + "learning_rate": 1.5046989548468616e-07, + "loss": 14.294, + "step": 464770 + }, + { + "epoch": 0.9388850058783841, + "grad_norm": 345.0228271484375, + "learning_rate": 1.503849168927224e-07, + "loss": 26.5148, + "step": 464780 + }, + { + "epoch": 0.9389052065110679, + "grad_norm": 425.7090148925781, + "learning_rate": 1.502999619371931e-07, + "loss": 23.6067, + "step": 464790 + }, + { + "epoch": 0.9389254071437517, + "grad_norm": 333.5506286621094, + "learning_rate": 1.502150306185135e-07, + "loss": 18.3616, + "step": 464800 + }, + { + "epoch": 0.9389456077764355, + "grad_norm": 141.08529663085938, + "learning_rate": 1.5013012293709828e-07, + "loss": 10.1888, + "step": 464810 + }, + { + "epoch": 0.9389658084091194, + "grad_norm": 280.8174743652344, + "learning_rate": 1.5004523889336042e-07, + "loss": 18.8, + "step": 464820 + }, + { + "epoch": 0.9389860090418032, + "grad_norm": 197.2073516845703, + "learning_rate": 1.499603784877135e-07, + "loss": 11.635, + "step": 464830 + }, + { + "epoch": 0.939006209674487, + "grad_norm": 554.6068725585938, + "learning_rate": 1.4987554172057216e-07, + "loss": 14.0745, + "step": 464840 + }, + { + "epoch": 0.9390264103071708, + "grad_norm": 622.3172607421875, + "learning_rate": 1.497907285923489e-07, + "loss": 17.9825, + "step": 464850 + }, + { + "epoch": 0.9390466109398546, + "grad_norm": 118.02326965332031, + "learning_rate": 1.4970593910345665e-07, + "loss": 8.7162, + "step": 464860 + }, + { + "epoch": 0.9390668115725385, + "grad_norm": 379.7081604003906, + "learning_rate": 1.4962117325431013e-07, + "loss": 24.6652, + "step": 464870 + }, + { + "epoch": 0.9390870122052223, + "grad_norm": 115.3721923828125, + "learning_rate": 1.495364310453218e-07, + "loss": 25.8597, + "step": 464880 + }, + { + "epoch": 0.9391072128379061, + "grad_norm": 453.2753601074219, + "learning_rate": 1.494517124769046e-07, + "loss": 11.1597, + "step": 464890 + }, + { + "epoch": 0.9391274134705899, + "grad_norm": 118.36872100830078, + "learning_rate": 1.4936701754947104e-07, + "loss": 12.8013, + "step": 464900 + }, + { + "epoch": 0.9391476141032737, + "grad_norm": 366.5028076171875, + "learning_rate": 1.4928234626343464e-07, + "loss": 9.0016, + "step": 464910 + }, + { + "epoch": 0.9391678147359576, + "grad_norm": 417.77398681640625, + "learning_rate": 1.4919769861920785e-07, + "loss": 14.6703, + "step": 464920 + }, + { + "epoch": 0.9391880153686414, + "grad_norm": 295.6487121582031, + "learning_rate": 1.491130746172026e-07, + "loss": 16.5016, + "step": 464930 + }, + { + "epoch": 0.9392082160013252, + "grad_norm": 459.5295104980469, + "learning_rate": 1.490284742578324e-07, + "loss": 30.716, + "step": 464940 + }, + { + "epoch": 0.939228416634009, + "grad_norm": 387.3690185546875, + "learning_rate": 1.4894389754150862e-07, + "loss": 29.2588, + "step": 464950 + }, + { + "epoch": 0.9392486172666928, + "grad_norm": 396.13812255859375, + "learning_rate": 1.4885934446864425e-07, + "loss": 19.497, + "step": 464960 + }, + { + "epoch": 0.9392688178993767, + "grad_norm": 133.84405517578125, + "learning_rate": 1.487748150396512e-07, + "loss": 13.2698, + "step": 464970 + }, + { + "epoch": 0.9392890185320604, + "grad_norm": 130.9963836669922, + "learning_rate": 1.4869030925494077e-07, + "loss": 22.2032, + "step": 464980 + }, + { + "epoch": 0.9393092191647442, + "grad_norm": 646.0631103515625, + "learning_rate": 1.4860582711492544e-07, + "loss": 25.5472, + "step": 464990 + }, + { + "epoch": 0.939329419797428, + "grad_norm": 182.46316528320312, + "learning_rate": 1.4852136862001766e-07, + "loss": 25.9811, + "step": 465000 + }, + { + "epoch": 0.9393496204301118, + "grad_norm": 329.9434509277344, + "learning_rate": 1.4843693377062818e-07, + "loss": 17.1915, + "step": 465010 + }, + { + "epoch": 0.9393698210627957, + "grad_norm": 404.30364990234375, + "learning_rate": 1.483525225671678e-07, + "loss": 18.5547, + "step": 465020 + }, + { + "epoch": 0.9393900216954795, + "grad_norm": 116.88551330566406, + "learning_rate": 1.4826813501004954e-07, + "loss": 15.3164, + "step": 465030 + }, + { + "epoch": 0.9394102223281633, + "grad_norm": 680.893798828125, + "learning_rate": 1.4818377109968417e-07, + "loss": 18.4572, + "step": 465040 + }, + { + "epoch": 0.9394304229608471, + "grad_norm": 601.853515625, + "learning_rate": 1.4809943083648194e-07, + "loss": 16.359, + "step": 465050 + }, + { + "epoch": 0.9394506235935309, + "grad_norm": 455.3320617675781, + "learning_rate": 1.480151142208547e-07, + "loss": 22.9655, + "step": 465060 + }, + { + "epoch": 0.9394708242262148, + "grad_norm": 201.93214416503906, + "learning_rate": 1.4793082125321435e-07, + "loss": 20.6204, + "step": 465070 + }, + { + "epoch": 0.9394910248588986, + "grad_norm": 148.03668212890625, + "learning_rate": 1.4784655193396947e-07, + "loss": 16.3354, + "step": 465080 + }, + { + "epoch": 0.9395112254915824, + "grad_norm": 234.11648559570312, + "learning_rate": 1.4776230626353193e-07, + "loss": 16.4417, + "step": 465090 + }, + { + "epoch": 0.9395314261242662, + "grad_norm": 346.0733642578125, + "learning_rate": 1.4767808424231312e-07, + "loss": 12.3764, + "step": 465100 + }, + { + "epoch": 0.93955162675695, + "grad_norm": 55.60728454589844, + "learning_rate": 1.4759388587072266e-07, + "loss": 18.4479, + "step": 465110 + }, + { + "epoch": 0.9395718273896339, + "grad_norm": 111.06137084960938, + "learning_rate": 1.475097111491708e-07, + "loss": 11.6848, + "step": 465120 + }, + { + "epoch": 0.9395920280223177, + "grad_norm": 300.3351135253906, + "learning_rate": 1.474255600780683e-07, + "loss": 13.9835, + "step": 465130 + }, + { + "epoch": 0.9396122286550015, + "grad_norm": 363.2767028808594, + "learning_rate": 1.473414326578254e-07, + "loss": 9.6612, + "step": 465140 + }, + { + "epoch": 0.9396324292876853, + "grad_norm": 927.3482055664062, + "learning_rate": 1.4725732888885126e-07, + "loss": 20.8591, + "step": 465150 + }, + { + "epoch": 0.9396526299203691, + "grad_norm": 472.299072265625, + "learning_rate": 1.4717324877155603e-07, + "loss": 6.7122, + "step": 465160 + }, + { + "epoch": 0.939672830553053, + "grad_norm": 161.1034698486328, + "learning_rate": 1.4708919230635054e-07, + "loss": 14.2039, + "step": 465170 + }, + { + "epoch": 0.9396930311857368, + "grad_norm": 361.7558288574219, + "learning_rate": 1.4700515949364337e-07, + "loss": 28.8626, + "step": 465180 + }, + { + "epoch": 0.9397132318184206, + "grad_norm": 246.94151306152344, + "learning_rate": 1.4692115033384468e-07, + "loss": 25.2142, + "step": 465190 + }, + { + "epoch": 0.9397334324511044, + "grad_norm": 314.4781494140625, + "learning_rate": 1.4683716482736364e-07, + "loss": 21.4019, + "step": 465200 + }, + { + "epoch": 0.9397536330837882, + "grad_norm": 495.0483703613281, + "learning_rate": 1.4675320297460994e-07, + "loss": 22.743, + "step": 465210 + }, + { + "epoch": 0.939773833716472, + "grad_norm": 0.047875095158815384, + "learning_rate": 1.4666926477599153e-07, + "loss": 9.238, + "step": 465220 + }, + { + "epoch": 0.9397940343491558, + "grad_norm": 3.2310094833374023, + "learning_rate": 1.4658535023191922e-07, + "loss": 18.7415, + "step": 465230 + }, + { + "epoch": 0.9398142349818396, + "grad_norm": 279.0162353515625, + "learning_rate": 1.4650145934280103e-07, + "loss": 25.614, + "step": 465240 + }, + { + "epoch": 0.9398344356145234, + "grad_norm": 96.9708023071289, + "learning_rate": 1.4641759210904605e-07, + "loss": 16.596, + "step": 465250 + }, + { + "epoch": 0.9398546362472072, + "grad_norm": 684.9998779296875, + "learning_rate": 1.463337485310634e-07, + "loss": 18.1348, + "step": 465260 + }, + { + "epoch": 0.939874836879891, + "grad_norm": 10.25330924987793, + "learning_rate": 1.4624992860926112e-07, + "loss": 12.523, + "step": 465270 + }, + { + "epoch": 0.9398950375125749, + "grad_norm": 2057.43212890625, + "learning_rate": 1.461661323440483e-07, + "loss": 37.1617, + "step": 465280 + }, + { + "epoch": 0.9399152381452587, + "grad_norm": 313.6214904785156, + "learning_rate": 1.4608235973583296e-07, + "loss": 16.6247, + "step": 465290 + }, + { + "epoch": 0.9399354387779425, + "grad_norm": 133.1804656982422, + "learning_rate": 1.459986107850231e-07, + "loss": 29.5971, + "step": 465300 + }, + { + "epoch": 0.9399556394106263, + "grad_norm": 0.5061958432197571, + "learning_rate": 1.4591488549202725e-07, + "loss": 21.5198, + "step": 465310 + }, + { + "epoch": 0.9399758400433101, + "grad_norm": 390.5429992675781, + "learning_rate": 1.4583118385725402e-07, + "loss": 10.8836, + "step": 465320 + }, + { + "epoch": 0.939996040675994, + "grad_norm": 153.8983917236328, + "learning_rate": 1.4574750588111085e-07, + "loss": 10.1329, + "step": 465330 + }, + { + "epoch": 0.9400162413086778, + "grad_norm": 102.71835327148438, + "learning_rate": 1.4566385156400463e-07, + "loss": 19.8331, + "step": 465340 + }, + { + "epoch": 0.9400364419413616, + "grad_norm": 165.04196166992188, + "learning_rate": 1.4558022090634504e-07, + "loss": 9.5658, + "step": 465350 + }, + { + "epoch": 0.9400566425740454, + "grad_norm": 144.1915283203125, + "learning_rate": 1.4549661390853897e-07, + "loss": 22.7668, + "step": 465360 + }, + { + "epoch": 0.9400768432067292, + "grad_norm": 269.35174560546875, + "learning_rate": 1.4541303057099275e-07, + "loss": 17.217, + "step": 465370 + }, + { + "epoch": 0.9400970438394131, + "grad_norm": 341.9735412597656, + "learning_rate": 1.4532947089411443e-07, + "loss": 14.102, + "step": 465380 + }, + { + "epoch": 0.9401172444720969, + "grad_norm": 342.3262939453125, + "learning_rate": 1.452459348783125e-07, + "loss": 14.8285, + "step": 465390 + }, + { + "epoch": 0.9401374451047807, + "grad_norm": 195.041748046875, + "learning_rate": 1.4516242252399227e-07, + "loss": 21.6281, + "step": 465400 + }, + { + "epoch": 0.9401576457374645, + "grad_norm": 653.010986328125, + "learning_rate": 1.450789338315617e-07, + "loss": 16.6789, + "step": 465410 + }, + { + "epoch": 0.9401778463701483, + "grad_norm": 429.4823913574219, + "learning_rate": 1.4499546880142823e-07, + "loss": 20.1362, + "step": 465420 + }, + { + "epoch": 0.9401980470028322, + "grad_norm": 472.930419921875, + "learning_rate": 1.4491202743399767e-07, + "loss": 16.6625, + "step": 465430 + }, + { + "epoch": 0.940218247635516, + "grad_norm": 598.2315063476562, + "learning_rate": 1.448286097296764e-07, + "loss": 22.673, + "step": 465440 + }, + { + "epoch": 0.9402384482681998, + "grad_norm": 560.1913452148438, + "learning_rate": 1.4474521568887178e-07, + "loss": 20.8818, + "step": 465450 + }, + { + "epoch": 0.9402586489008836, + "grad_norm": 308.62799072265625, + "learning_rate": 1.4466184531199135e-07, + "loss": 10.2614, + "step": 465460 + }, + { + "epoch": 0.9402788495335674, + "grad_norm": 633.8780517578125, + "learning_rate": 1.4457849859943862e-07, + "loss": 20.54, + "step": 465470 + }, + { + "epoch": 0.9402990501662513, + "grad_norm": 394.0419616699219, + "learning_rate": 1.4449517555162163e-07, + "loss": 11.4615, + "step": 465480 + }, + { + "epoch": 0.940319250798935, + "grad_norm": 340.50732421875, + "learning_rate": 1.4441187616894724e-07, + "loss": 16.5588, + "step": 465490 + }, + { + "epoch": 0.9403394514316188, + "grad_norm": 202.9910125732422, + "learning_rate": 1.4432860045182019e-07, + "loss": 27.8608, + "step": 465500 + }, + { + "epoch": 0.9403596520643026, + "grad_norm": 115.48960876464844, + "learning_rate": 1.4424534840064563e-07, + "loss": 16.095, + "step": 465510 + }, + { + "epoch": 0.9403798526969864, + "grad_norm": 159.15509033203125, + "learning_rate": 1.4416212001583163e-07, + "loss": 13.0288, + "step": 465520 + }, + { + "epoch": 0.9404000533296702, + "grad_norm": 198.47335815429688, + "learning_rate": 1.4407891529778172e-07, + "loss": 11.6673, + "step": 465530 + }, + { + "epoch": 0.9404202539623541, + "grad_norm": 702.14794921875, + "learning_rate": 1.4399573424690227e-07, + "loss": 26.2657, + "step": 465540 + }, + { + "epoch": 0.9404404545950379, + "grad_norm": 347.3752746582031, + "learning_rate": 1.4391257686359906e-07, + "loss": 21.5979, + "step": 465550 + }, + { + "epoch": 0.9404606552277217, + "grad_norm": 139.51553344726562, + "learning_rate": 1.438294431482762e-07, + "loss": 27.9939, + "step": 465560 + }, + { + "epoch": 0.9404808558604055, + "grad_norm": 472.1897277832031, + "learning_rate": 1.4374633310134057e-07, + "loss": 16.1521, + "step": 465570 + }, + { + "epoch": 0.9405010564930893, + "grad_norm": 373.1190490722656, + "learning_rate": 1.4366324672319575e-07, + "loss": 27.0018, + "step": 465580 + }, + { + "epoch": 0.9405212571257732, + "grad_norm": 204.26011657714844, + "learning_rate": 1.43580184014247e-07, + "loss": 18.4251, + "step": 465590 + }, + { + "epoch": 0.940541457758457, + "grad_norm": 159.9972686767578, + "learning_rate": 1.4349714497490009e-07, + "loss": 11.4422, + "step": 465600 + }, + { + "epoch": 0.9405616583911408, + "grad_norm": 307.0250549316406, + "learning_rate": 1.4341412960555855e-07, + "loss": 15.6385, + "step": 465610 + }, + { + "epoch": 0.9405818590238246, + "grad_norm": 491.42803955078125, + "learning_rate": 1.4333113790662822e-07, + "loss": 18.4737, + "step": 465620 + }, + { + "epoch": 0.9406020596565084, + "grad_norm": 243.17962646484375, + "learning_rate": 1.432481698785121e-07, + "loss": 4.4355, + "step": 465630 + }, + { + "epoch": 0.9406222602891923, + "grad_norm": 17.453372955322266, + "learning_rate": 1.4316522552161593e-07, + "loss": 18.5871, + "step": 465640 + }, + { + "epoch": 0.9406424609218761, + "grad_norm": 4.891506671905518, + "learning_rate": 1.4308230483634334e-07, + "loss": 28.1658, + "step": 465650 + }, + { + "epoch": 0.9406626615545599, + "grad_norm": 328.7502136230469, + "learning_rate": 1.4299940782309785e-07, + "loss": 12.0737, + "step": 465660 + }, + { + "epoch": 0.9406828621872437, + "grad_norm": 356.3871765136719, + "learning_rate": 1.4291653448228416e-07, + "loss": 14.3334, + "step": 465670 + }, + { + "epoch": 0.9407030628199275, + "grad_norm": 29.75800132751465, + "learning_rate": 1.4283368481430747e-07, + "loss": 21.4017, + "step": 465680 + }, + { + "epoch": 0.9407232634526114, + "grad_norm": 62.24608612060547, + "learning_rate": 1.427508588195692e-07, + "loss": 15.4066, + "step": 465690 + }, + { + "epoch": 0.9407434640852952, + "grad_norm": 540.0977172851562, + "learning_rate": 1.4266805649847392e-07, + "loss": 16.5002, + "step": 465700 + }, + { + "epoch": 0.940763664717979, + "grad_norm": 419.4856872558594, + "learning_rate": 1.425852778514264e-07, + "loss": 10.3622, + "step": 465710 + }, + { + "epoch": 0.9407838653506628, + "grad_norm": 501.3851318359375, + "learning_rate": 1.4250252287882848e-07, + "loss": 16.8594, + "step": 465720 + }, + { + "epoch": 0.9408040659833466, + "grad_norm": 234.9951934814453, + "learning_rate": 1.4241979158108433e-07, + "loss": 16.2693, + "step": 465730 + }, + { + "epoch": 0.9408242666160305, + "grad_norm": 13.653524398803711, + "learning_rate": 1.4233708395859692e-07, + "loss": 25.7325, + "step": 465740 + }, + { + "epoch": 0.9408444672487142, + "grad_norm": 139.67691040039062, + "learning_rate": 1.4225440001176983e-07, + "loss": 15.5495, + "step": 465750 + }, + { + "epoch": 0.940864667881398, + "grad_norm": 470.8349609375, + "learning_rate": 1.421717397410044e-07, + "loss": 25.8463, + "step": 465760 + }, + { + "epoch": 0.9408848685140818, + "grad_norm": 453.8491516113281, + "learning_rate": 1.420891031467053e-07, + "loss": 22.6746, + "step": 465770 + }, + { + "epoch": 0.9409050691467656, + "grad_norm": 224.24273681640625, + "learning_rate": 1.4200649022927505e-07, + "loss": 11.2414, + "step": 465780 + }, + { + "epoch": 0.9409252697794495, + "grad_norm": 47.68784713745117, + "learning_rate": 1.41923900989116e-07, + "loss": 15.038, + "step": 465790 + }, + { + "epoch": 0.9409454704121333, + "grad_norm": 472.7624206542969, + "learning_rate": 1.4184133542663014e-07, + "loss": 14.2832, + "step": 465800 + }, + { + "epoch": 0.9409656710448171, + "grad_norm": 157.7672882080078, + "learning_rate": 1.41758793542221e-07, + "loss": 12.4509, + "step": 465810 + }, + { + "epoch": 0.9409858716775009, + "grad_norm": 438.72222900390625, + "learning_rate": 1.4167627533628992e-07, + "loss": 17.5392, + "step": 465820 + }, + { + "epoch": 0.9410060723101847, + "grad_norm": 0.7373473048210144, + "learning_rate": 1.4159378080923936e-07, + "loss": 23.9596, + "step": 465830 + }, + { + "epoch": 0.9410262729428686, + "grad_norm": 465.291015625, + "learning_rate": 1.4151130996147177e-07, + "loss": 22.3183, + "step": 465840 + }, + { + "epoch": 0.9410464735755524, + "grad_norm": 515.6854248046875, + "learning_rate": 1.4142886279338852e-07, + "loss": 28.089, + "step": 465850 + }, + { + "epoch": 0.9410666742082362, + "grad_norm": 241.45852661132812, + "learning_rate": 1.4134643930539204e-07, + "loss": 13.2914, + "step": 465860 + }, + { + "epoch": 0.94108687484092, + "grad_norm": 210.69989013671875, + "learning_rate": 1.4126403949788369e-07, + "loss": 14.6817, + "step": 465870 + }, + { + "epoch": 0.9411070754736038, + "grad_norm": 14.873430252075195, + "learning_rate": 1.4118166337126428e-07, + "loss": 23.0126, + "step": 465880 + }, + { + "epoch": 0.9411272761062877, + "grad_norm": 133.8224334716797, + "learning_rate": 1.4109931092593732e-07, + "loss": 25.0196, + "step": 465890 + }, + { + "epoch": 0.9411474767389715, + "grad_norm": 605.4613647460938, + "learning_rate": 1.4101698216230254e-07, + "loss": 11.0159, + "step": 465900 + }, + { + "epoch": 0.9411676773716553, + "grad_norm": 308.15338134765625, + "learning_rate": 1.4093467708076126e-07, + "loss": 17.0492, + "step": 465910 + }, + { + "epoch": 0.9411878780043391, + "grad_norm": 569.3749389648438, + "learning_rate": 1.4085239568171483e-07, + "loss": 13.6531, + "step": 465920 + }, + { + "epoch": 0.9412080786370229, + "grad_norm": 5.422422885894775, + "learning_rate": 1.4077013796556515e-07, + "loss": 27.1837, + "step": 465930 + }, + { + "epoch": 0.9412282792697068, + "grad_norm": 260.8473205566406, + "learning_rate": 1.406879039327125e-07, + "loss": 27.4344, + "step": 465940 + }, + { + "epoch": 0.9412484799023906, + "grad_norm": 385.05035400390625, + "learning_rate": 1.4060569358355703e-07, + "loss": 28.6004, + "step": 465950 + }, + { + "epoch": 0.9412686805350744, + "grad_norm": 113.09442138671875, + "learning_rate": 1.405235069185007e-07, + "loss": 13.7099, + "step": 465960 + }, + { + "epoch": 0.9412888811677582, + "grad_norm": 390.7118835449219, + "learning_rate": 1.4044134393794373e-07, + "loss": 16.4347, + "step": 465970 + }, + { + "epoch": 0.941309081800442, + "grad_norm": 317.0025634765625, + "learning_rate": 1.4035920464228525e-07, + "loss": 10.5919, + "step": 465980 + }, + { + "epoch": 0.9413292824331259, + "grad_norm": 301.4737548828125, + "learning_rate": 1.4027708903192662e-07, + "loss": 24.9793, + "step": 465990 + }, + { + "epoch": 0.9413494830658096, + "grad_norm": 348.3053283691406, + "learning_rate": 1.4019499710726913e-07, + "loss": 17.0418, + "step": 466000 + }, + { + "epoch": 0.9413696836984934, + "grad_norm": 103.71463775634766, + "learning_rate": 1.4011292886871086e-07, + "loss": 14.3418, + "step": 466010 + }, + { + "epoch": 0.9413898843311772, + "grad_norm": 543.97265625, + "learning_rate": 1.4003088431665312e-07, + "loss": 11.213, + "step": 466020 + }, + { + "epoch": 0.941410084963861, + "grad_norm": 645.0778198242188, + "learning_rate": 1.3994886345149504e-07, + "loss": 17.445, + "step": 466030 + }, + { + "epoch": 0.9414302855965448, + "grad_norm": 707.9461059570312, + "learning_rate": 1.3986686627363744e-07, + "loss": 33.2789, + "step": 466040 + }, + { + "epoch": 0.9414504862292287, + "grad_norm": 12.873198509216309, + "learning_rate": 1.3978489278347883e-07, + "loss": 28.9894, + "step": 466050 + }, + { + "epoch": 0.9414706868619125, + "grad_norm": 318.5113830566406, + "learning_rate": 1.397029429814184e-07, + "loss": 22.1279, + "step": 466060 + }, + { + "epoch": 0.9414908874945963, + "grad_norm": 123.84893798828125, + "learning_rate": 1.39621016867858e-07, + "loss": 21.4548, + "step": 466070 + }, + { + "epoch": 0.9415110881272801, + "grad_norm": 315.24322509765625, + "learning_rate": 1.39539114443194e-07, + "loss": 23.2591, + "step": 466080 + }, + { + "epoch": 0.941531288759964, + "grad_norm": 444.9673156738281, + "learning_rate": 1.3945723570782722e-07, + "loss": 22.2276, + "step": 466090 + }, + { + "epoch": 0.9415514893926478, + "grad_norm": 99.54863739013672, + "learning_rate": 1.3937538066215672e-07, + "loss": 22.1537, + "step": 466100 + }, + { + "epoch": 0.9415716900253316, + "grad_norm": 248.89920043945312, + "learning_rate": 1.3929354930658112e-07, + "loss": 11.2812, + "step": 466110 + }, + { + "epoch": 0.9415918906580154, + "grad_norm": 295.01171875, + "learning_rate": 1.3921174164149842e-07, + "loss": 23.3916, + "step": 466120 + }, + { + "epoch": 0.9416120912906992, + "grad_norm": 364.608154296875, + "learning_rate": 1.3912995766730887e-07, + "loss": 9.879, + "step": 466130 + }, + { + "epoch": 0.941632291923383, + "grad_norm": 156.15719604492188, + "learning_rate": 1.3904819738441043e-07, + "loss": 28.2597, + "step": 466140 + }, + { + "epoch": 0.9416524925560669, + "grad_norm": 570.7730712890625, + "learning_rate": 1.3896646079320064e-07, + "loss": 18.1528, + "step": 466150 + }, + { + "epoch": 0.9416726931887507, + "grad_norm": 200.7433319091797, + "learning_rate": 1.388847478940797e-07, + "loss": 19.8137, + "step": 466160 + }, + { + "epoch": 0.9416928938214345, + "grad_norm": 213.10841369628906, + "learning_rate": 1.3880305868744392e-07, + "loss": 20.0374, + "step": 466170 + }, + { + "epoch": 0.9417130944541183, + "grad_norm": 244.504150390625, + "learning_rate": 1.3872139317369304e-07, + "loss": 10.2179, + "step": 466180 + }, + { + "epoch": 0.9417332950868021, + "grad_norm": 319.9541015625, + "learning_rate": 1.3863975135322505e-07, + "loss": 12.6198, + "step": 466190 + }, + { + "epoch": 0.941753495719486, + "grad_norm": 201.5258331298828, + "learning_rate": 1.385581332264363e-07, + "loss": 15.1341, + "step": 466200 + }, + { + "epoch": 0.9417736963521698, + "grad_norm": 192.09957885742188, + "learning_rate": 1.3847653879372646e-07, + "loss": 18.1536, + "step": 466210 + }, + { + "epoch": 0.9417938969848536, + "grad_norm": 217.1121826171875, + "learning_rate": 1.3839496805549136e-07, + "loss": 12.6363, + "step": 466220 + }, + { + "epoch": 0.9418140976175374, + "grad_norm": 646.8970947265625, + "learning_rate": 1.383134210121301e-07, + "loss": 13.4812, + "step": 466230 + }, + { + "epoch": 0.9418342982502212, + "grad_norm": 209.90310668945312, + "learning_rate": 1.3823189766403954e-07, + "loss": 22.6369, + "step": 466240 + }, + { + "epoch": 0.9418544988829051, + "grad_norm": 597.7464599609375, + "learning_rate": 1.3815039801161723e-07, + "loss": 14.8995, + "step": 466250 + }, + { + "epoch": 0.9418746995155888, + "grad_norm": 583.8340454101562, + "learning_rate": 1.3806892205526e-07, + "loss": 12.6547, + "step": 466260 + }, + { + "epoch": 0.9418949001482726, + "grad_norm": 146.20486450195312, + "learning_rate": 1.3798746979536482e-07, + "loss": 20.1029, + "step": 466270 + }, + { + "epoch": 0.9419151007809564, + "grad_norm": 462.0206604003906, + "learning_rate": 1.3790604123232966e-07, + "loss": 11.2156, + "step": 466280 + }, + { + "epoch": 0.9419353014136402, + "grad_norm": 243.73023986816406, + "learning_rate": 1.3782463636655087e-07, + "loss": 21.2671, + "step": 466290 + }, + { + "epoch": 0.9419555020463241, + "grad_norm": 127.55465698242188, + "learning_rate": 1.3774325519842423e-07, + "loss": 14.346, + "step": 466300 + }, + { + "epoch": 0.9419757026790079, + "grad_norm": 122.92948913574219, + "learning_rate": 1.376618977283478e-07, + "loss": 20.0084, + "step": 466310 + }, + { + "epoch": 0.9419959033116917, + "grad_norm": 609.1244506835938, + "learning_rate": 1.3758056395671738e-07, + "loss": 17.6589, + "step": 466320 + }, + { + "epoch": 0.9420161039443755, + "grad_norm": 322.552001953125, + "learning_rate": 1.374992538839298e-07, + "loss": 19.4366, + "step": 466330 + }, + { + "epoch": 0.9420363045770593, + "grad_norm": 110.1209716796875, + "learning_rate": 1.3741796751038095e-07, + "loss": 22.9075, + "step": 466340 + }, + { + "epoch": 0.9420565052097432, + "grad_norm": 380.9493713378906, + "learning_rate": 1.373367048364671e-07, + "loss": 14.092, + "step": 466350 + }, + { + "epoch": 0.942076705842427, + "grad_norm": 0.42919933795928955, + "learning_rate": 1.3725546586258464e-07, + "loss": 13.1575, + "step": 466360 + }, + { + "epoch": 0.9420969064751108, + "grad_norm": 1916.9405517578125, + "learning_rate": 1.3717425058912882e-07, + "loss": 18.7603, + "step": 466370 + }, + { + "epoch": 0.9421171071077946, + "grad_norm": 14.732674598693848, + "learning_rate": 1.3709305901649594e-07, + "loss": 38.1083, + "step": 466380 + }, + { + "epoch": 0.9421373077404784, + "grad_norm": 512.1130981445312, + "learning_rate": 1.370118911450824e-07, + "loss": 18.5798, + "step": 466390 + }, + { + "epoch": 0.9421575083731623, + "grad_norm": 434.47039794921875, + "learning_rate": 1.3693074697528231e-07, + "loss": 19.878, + "step": 466400 + }, + { + "epoch": 0.9421777090058461, + "grad_norm": 20.22287940979004, + "learning_rate": 1.36849626507492e-07, + "loss": 21.2979, + "step": 466410 + }, + { + "epoch": 0.9421979096385299, + "grad_norm": 260.6330261230469, + "learning_rate": 1.367685297421073e-07, + "loss": 22.6538, + "step": 466420 + }, + { + "epoch": 0.9422181102712137, + "grad_norm": 635.8558959960938, + "learning_rate": 1.366874566795229e-07, + "loss": 25.0508, + "step": 466430 + }, + { + "epoch": 0.9422383109038975, + "grad_norm": 382.78399658203125, + "learning_rate": 1.3660640732013342e-07, + "loss": 9.9655, + "step": 466440 + }, + { + "epoch": 0.9422585115365814, + "grad_norm": 202.0198211669922, + "learning_rate": 1.3652538166433527e-07, + "loss": 20.7105, + "step": 466450 + }, + { + "epoch": 0.9422787121692652, + "grad_norm": 86.01641082763672, + "learning_rate": 1.3644437971252144e-07, + "loss": 23.0091, + "step": 466460 + }, + { + "epoch": 0.942298912801949, + "grad_norm": 148.82080078125, + "learning_rate": 1.3636340146508886e-07, + "loss": 17.164, + "step": 466470 + }, + { + "epoch": 0.9423191134346328, + "grad_norm": 490.79486083984375, + "learning_rate": 1.362824469224311e-07, + "loss": 26.0232, + "step": 466480 + }, + { + "epoch": 0.9423393140673166, + "grad_norm": 759.5617065429688, + "learning_rate": 1.362015160849417e-07, + "loss": 17.5195, + "step": 466490 + }, + { + "epoch": 0.9423595147000005, + "grad_norm": 160.0380401611328, + "learning_rate": 1.3612060895301759e-07, + "loss": 10.9043, + "step": 466500 + }, + { + "epoch": 0.9423797153326842, + "grad_norm": 81.51010131835938, + "learning_rate": 1.360397255270507e-07, + "loss": 12.4245, + "step": 466510 + }, + { + "epoch": 0.942399915965368, + "grad_norm": 9.42785930633545, + "learning_rate": 1.3595886580743677e-07, + "loss": 15.6158, + "step": 466520 + }, + { + "epoch": 0.9424201165980518, + "grad_norm": 254.63711547851562, + "learning_rate": 1.3587802979456888e-07, + "loss": 21.2009, + "step": 466530 + }, + { + "epoch": 0.9424403172307356, + "grad_norm": 488.95220947265625, + "learning_rate": 1.3579721748884222e-07, + "loss": 21.6403, + "step": 466540 + }, + { + "epoch": 0.9424605178634194, + "grad_norm": 34.97917556762695, + "learning_rate": 1.3571642889064984e-07, + "loss": 15.1119, + "step": 466550 + }, + { + "epoch": 0.9424807184961033, + "grad_norm": 238.89625549316406, + "learning_rate": 1.356356640003853e-07, + "loss": 8.2208, + "step": 466560 + }, + { + "epoch": 0.9425009191287871, + "grad_norm": 247.09791564941406, + "learning_rate": 1.3555492281844273e-07, + "loss": 16.6098, + "step": 466570 + }, + { + "epoch": 0.9425211197614709, + "grad_norm": 292.3550109863281, + "learning_rate": 1.354742053452157e-07, + "loss": 25.4076, + "step": 466580 + }, + { + "epoch": 0.9425413203941547, + "grad_norm": 550.693115234375, + "learning_rate": 1.353935115810967e-07, + "loss": 13.479, + "step": 466590 + }, + { + "epoch": 0.9425615210268385, + "grad_norm": 150.84622192382812, + "learning_rate": 1.3531284152647983e-07, + "loss": 26.4981, + "step": 466600 + }, + { + "epoch": 0.9425817216595224, + "grad_norm": 135.9732666015625, + "learning_rate": 1.3523219518175924e-07, + "loss": 19.2011, + "step": 466610 + }, + { + "epoch": 0.9426019222922062, + "grad_norm": 411.6714172363281, + "learning_rate": 1.351515725473257e-07, + "loss": 13.1952, + "step": 466620 + }, + { + "epoch": 0.94262212292489, + "grad_norm": 691.3521728515625, + "learning_rate": 1.3507097362357392e-07, + "loss": 23.5884, + "step": 466630 + }, + { + "epoch": 0.9426423235575738, + "grad_norm": 435.2999572753906, + "learning_rate": 1.349903984108958e-07, + "loss": 19.8776, + "step": 466640 + }, + { + "epoch": 0.9426625241902576, + "grad_norm": 522.2403564453125, + "learning_rate": 1.3490984690968488e-07, + "loss": 25.458, + "step": 466650 + }, + { + "epoch": 0.9426827248229415, + "grad_norm": 790.474609375, + "learning_rate": 1.3482931912033314e-07, + "loss": 21.0078, + "step": 466660 + }, + { + "epoch": 0.9427029254556253, + "grad_norm": 1263.0888671875, + "learning_rate": 1.3474881504323301e-07, + "loss": 19.8605, + "step": 466670 + }, + { + "epoch": 0.9427231260883091, + "grad_norm": 590.8986206054688, + "learning_rate": 1.346683346787775e-07, + "loss": 13.3168, + "step": 466680 + }, + { + "epoch": 0.9427433267209929, + "grad_norm": 232.91049194335938, + "learning_rate": 1.3458787802735794e-07, + "loss": 21.9886, + "step": 466690 + }, + { + "epoch": 0.9427635273536767, + "grad_norm": 438.4366455078125, + "learning_rate": 1.3450744508936687e-07, + "loss": 18.1001, + "step": 466700 + }, + { + "epoch": 0.9427837279863606, + "grad_norm": 492.16668701171875, + "learning_rate": 1.3442703586519724e-07, + "loss": 10.5851, + "step": 466710 + }, + { + "epoch": 0.9428039286190444, + "grad_norm": 277.2815246582031, + "learning_rate": 1.3434665035523985e-07, + "loss": 30.349, + "step": 466720 + }, + { + "epoch": 0.9428241292517282, + "grad_norm": 177.5471954345703, + "learning_rate": 1.342662885598861e-07, + "loss": 11.7614, + "step": 466730 + }, + { + "epoch": 0.942844329884412, + "grad_norm": 438.42755126953125, + "learning_rate": 1.3418595047952897e-07, + "loss": 12.5441, + "step": 466740 + }, + { + "epoch": 0.9428645305170958, + "grad_norm": 639.2652587890625, + "learning_rate": 1.341056361145593e-07, + "loss": 15.6022, + "step": 466750 + }, + { + "epoch": 0.9428847311497797, + "grad_norm": 40.974517822265625, + "learning_rate": 1.3402534546536783e-07, + "loss": 17.9016, + "step": 466760 + }, + { + "epoch": 0.9429049317824634, + "grad_norm": 227.44757080078125, + "learning_rate": 1.3394507853234763e-07, + "loss": 19.5819, + "step": 466770 + }, + { + "epoch": 0.9429251324151472, + "grad_norm": 461.1067810058594, + "learning_rate": 1.3386483531588834e-07, + "loss": 21.6301, + "step": 466780 + }, + { + "epoch": 0.942945333047831, + "grad_norm": 502.9984130859375, + "learning_rate": 1.337846158163819e-07, + "loss": 12.9162, + "step": 466790 + }, + { + "epoch": 0.9429655336805148, + "grad_norm": 606.315673828125, + "learning_rate": 1.3370442003421913e-07, + "loss": 25.3759, + "step": 466800 + }, + { + "epoch": 0.9429857343131987, + "grad_norm": 275.4271545410156, + "learning_rate": 1.336242479697908e-07, + "loss": 35.1638, + "step": 466810 + }, + { + "epoch": 0.9430059349458825, + "grad_norm": 544.243408203125, + "learning_rate": 1.335440996234877e-07, + "loss": 11.1995, + "step": 466820 + }, + { + "epoch": 0.9430261355785663, + "grad_norm": 200.02992248535156, + "learning_rate": 1.334639749956995e-07, + "loss": 12.549, + "step": 466830 + }, + { + "epoch": 0.9430463362112501, + "grad_norm": 487.49444580078125, + "learning_rate": 1.3338387408681875e-07, + "loss": 13.481, + "step": 466840 + }, + { + "epoch": 0.9430665368439339, + "grad_norm": 738.1172485351562, + "learning_rate": 1.333037968972345e-07, + "loss": 15.7329, + "step": 466850 + }, + { + "epoch": 0.9430867374766178, + "grad_norm": 99.85303497314453, + "learning_rate": 1.33223743427337e-07, + "loss": 19.0156, + "step": 466860 + }, + { + "epoch": 0.9431069381093016, + "grad_norm": 357.34100341796875, + "learning_rate": 1.331437136775171e-07, + "loss": 18.5052, + "step": 466870 + }, + { + "epoch": 0.9431271387419854, + "grad_norm": 166.46453857421875, + "learning_rate": 1.330637076481639e-07, + "loss": 13.3199, + "step": 466880 + }, + { + "epoch": 0.9431473393746692, + "grad_norm": 48.30439758300781, + "learning_rate": 1.3298372533966874e-07, + "loss": 11.878, + "step": 466890 + }, + { + "epoch": 0.943167540007353, + "grad_norm": 324.0368347167969, + "learning_rate": 1.3290376675242022e-07, + "loss": 25.7629, + "step": 466900 + }, + { + "epoch": 0.9431877406400369, + "grad_norm": 390.7507019042969, + "learning_rate": 1.3282383188680802e-07, + "loss": 21.2157, + "step": 466910 + }, + { + "epoch": 0.9432079412727207, + "grad_norm": 57.96989059448242, + "learning_rate": 1.327439207432224e-07, + "loss": 15.7185, + "step": 466920 + }, + { + "epoch": 0.9432281419054045, + "grad_norm": 202.033935546875, + "learning_rate": 1.3266403332205248e-07, + "loss": 9.7807, + "step": 466930 + }, + { + "epoch": 0.9432483425380883, + "grad_norm": 108.84996795654297, + "learning_rate": 1.3258416962368849e-07, + "loss": 14.6172, + "step": 466940 + }, + { + "epoch": 0.9432685431707721, + "grad_norm": 172.59718322753906, + "learning_rate": 1.325043296485179e-07, + "loss": 18.0242, + "step": 466950 + }, + { + "epoch": 0.943288743803456, + "grad_norm": 434.2422180175781, + "learning_rate": 1.3242451339693153e-07, + "loss": 33.7853, + "step": 466960 + }, + { + "epoch": 0.9433089444361398, + "grad_norm": 147.3890838623047, + "learning_rate": 1.3234472086931738e-07, + "loss": 26.3679, + "step": 466970 + }, + { + "epoch": 0.9433291450688236, + "grad_norm": 10.948002815246582, + "learning_rate": 1.322649520660646e-07, + "loss": 13.5406, + "step": 466980 + }, + { + "epoch": 0.9433493457015074, + "grad_norm": 424.71539306640625, + "learning_rate": 1.3218520698756177e-07, + "loss": 9.7036, + "step": 466990 + }, + { + "epoch": 0.9433695463341912, + "grad_norm": 243.30010986328125, + "learning_rate": 1.3210548563419857e-07, + "loss": 13.9705, + "step": 467000 + }, + { + "epoch": 0.943389746966875, + "grad_norm": 197.2886962890625, + "learning_rate": 1.32025788006363e-07, + "loss": 26.665, + "step": 467010 + }, + { + "epoch": 0.9434099475995589, + "grad_norm": 610.0017700195312, + "learning_rate": 1.3194611410444258e-07, + "loss": 21.8821, + "step": 467020 + }, + { + "epoch": 0.9434301482322426, + "grad_norm": 272.1587219238281, + "learning_rate": 1.3186646392882696e-07, + "loss": 8.3357, + "step": 467030 + }, + { + "epoch": 0.9434503488649264, + "grad_norm": 167.1318817138672, + "learning_rate": 1.3178683747990362e-07, + "loss": 11.8705, + "step": 467040 + }, + { + "epoch": 0.9434705494976102, + "grad_norm": 434.9255065917969, + "learning_rate": 1.3170723475806003e-07, + "loss": 23.161, + "step": 467050 + }, + { + "epoch": 0.943490750130294, + "grad_norm": 79.04789733886719, + "learning_rate": 1.3162765576368587e-07, + "loss": 24.5077, + "step": 467060 + }, + { + "epoch": 0.9435109507629779, + "grad_norm": 203.78448486328125, + "learning_rate": 1.315481004971675e-07, + "loss": 8.7908, + "step": 467070 + }, + { + "epoch": 0.9435311513956617, + "grad_norm": 225.8023681640625, + "learning_rate": 1.314685689588935e-07, + "loss": 18.4575, + "step": 467080 + }, + { + "epoch": 0.9435513520283455, + "grad_norm": 297.1622619628906, + "learning_rate": 1.3138906114925133e-07, + "loss": 10.182, + "step": 467090 + }, + { + "epoch": 0.9435715526610293, + "grad_norm": 90.65361022949219, + "learning_rate": 1.313095770686279e-07, + "loss": 11.4575, + "step": 467100 + }, + { + "epoch": 0.9435917532937131, + "grad_norm": 511.15032958984375, + "learning_rate": 1.3123011671741183e-07, + "loss": 17.2859, + "step": 467110 + }, + { + "epoch": 0.943611953926397, + "grad_norm": 199.49876403808594, + "learning_rate": 1.3115068009598886e-07, + "loss": 15.8563, + "step": 467120 + }, + { + "epoch": 0.9436321545590808, + "grad_norm": 403.6021728515625, + "learning_rate": 1.3107126720474762e-07, + "loss": 13.149, + "step": 467130 + }, + { + "epoch": 0.9436523551917646, + "grad_norm": 256.2900695800781, + "learning_rate": 1.3099187804407387e-07, + "loss": 24.4645, + "step": 467140 + }, + { + "epoch": 0.9436725558244484, + "grad_norm": 254.575927734375, + "learning_rate": 1.3091251261435568e-07, + "loss": 7.1697, + "step": 467150 + }, + { + "epoch": 0.9436927564571322, + "grad_norm": 467.6200866699219, + "learning_rate": 1.3083317091597936e-07, + "loss": 17.0296, + "step": 467160 + }, + { + "epoch": 0.9437129570898161, + "grad_norm": 310.8859558105469, + "learning_rate": 1.3075385294933129e-07, + "loss": 14.5931, + "step": 467170 + }, + { + "epoch": 0.9437331577224999, + "grad_norm": 312.9391174316406, + "learning_rate": 1.306745587147984e-07, + "loss": 17.0316, + "step": 467180 + }, + { + "epoch": 0.9437533583551837, + "grad_norm": 322.968994140625, + "learning_rate": 1.3059528821276758e-07, + "loss": 18.964, + "step": 467190 + }, + { + "epoch": 0.9437735589878675, + "grad_norm": 116.29562377929688, + "learning_rate": 1.3051604144362407e-07, + "loss": 15.6003, + "step": 467200 + }, + { + "epoch": 0.9437937596205513, + "grad_norm": 424.7398986816406, + "learning_rate": 1.304368184077548e-07, + "loss": 21.4729, + "step": 467210 + }, + { + "epoch": 0.9438139602532352, + "grad_norm": 406.6511535644531, + "learning_rate": 1.3035761910554666e-07, + "loss": 23.7003, + "step": 467220 + }, + { + "epoch": 0.943834160885919, + "grad_norm": 431.1209716796875, + "learning_rate": 1.302784435373844e-07, + "loss": 11.4722, + "step": 467230 + }, + { + "epoch": 0.9438543615186028, + "grad_norm": 277.345703125, + "learning_rate": 1.3019929170365376e-07, + "loss": 18.2024, + "step": 467240 + }, + { + "epoch": 0.9438745621512866, + "grad_norm": 393.22003173828125, + "learning_rate": 1.3012016360474223e-07, + "loss": 6.3782, + "step": 467250 + }, + { + "epoch": 0.9438947627839704, + "grad_norm": 107.62462615966797, + "learning_rate": 1.3004105924103394e-07, + "loss": 28.9039, + "step": 467260 + }, + { + "epoch": 0.9439149634166543, + "grad_norm": 312.8868103027344, + "learning_rate": 1.2996197861291472e-07, + "loss": 20.6576, + "step": 467270 + }, + { + "epoch": 0.943935164049338, + "grad_norm": 304.1863098144531, + "learning_rate": 1.2988292172076977e-07, + "loss": 18.5216, + "step": 467280 + }, + { + "epoch": 0.9439553646820218, + "grad_norm": 190.7682647705078, + "learning_rate": 1.2980388856498604e-07, + "loss": 9.204, + "step": 467290 + }, + { + "epoch": 0.9439755653147056, + "grad_norm": 121.52448272705078, + "learning_rate": 1.29724879145946e-07, + "loss": 12.4498, + "step": 467300 + }, + { + "epoch": 0.9439957659473894, + "grad_norm": 229.15966796875, + "learning_rate": 1.296458934640371e-07, + "loss": 9.9259, + "step": 467310 + }, + { + "epoch": 0.9440159665800733, + "grad_norm": 532.7434692382812, + "learning_rate": 1.2956693151964296e-07, + "loss": 19.2803, + "step": 467320 + }, + { + "epoch": 0.9440361672127571, + "grad_norm": 372.7672424316406, + "learning_rate": 1.2948799331314933e-07, + "loss": 14.7937, + "step": 467330 + }, + { + "epoch": 0.9440563678454409, + "grad_norm": 1146.291015625, + "learning_rate": 1.2940907884494036e-07, + "loss": 48.4152, + "step": 467340 + }, + { + "epoch": 0.9440765684781247, + "grad_norm": 452.1363830566406, + "learning_rate": 1.2933018811540078e-07, + "loss": 18.9367, + "step": 467350 + }, + { + "epoch": 0.9440967691108085, + "grad_norm": 391.53204345703125, + "learning_rate": 1.2925132112491523e-07, + "loss": 16.6104, + "step": 467360 + }, + { + "epoch": 0.9441169697434924, + "grad_norm": 237.9326171875, + "learning_rate": 1.2917247787386787e-07, + "loss": 16.8393, + "step": 467370 + }, + { + "epoch": 0.9441371703761762, + "grad_norm": 501.45037841796875, + "learning_rate": 1.2909365836264287e-07, + "loss": 21.6704, + "step": 467380 + }, + { + "epoch": 0.94415737100886, + "grad_norm": 126.89668273925781, + "learning_rate": 1.2901486259162488e-07, + "loss": 19.5969, + "step": 467390 + }, + { + "epoch": 0.9441775716415438, + "grad_norm": 428.07708740234375, + "learning_rate": 1.289360905611975e-07, + "loss": 10.1928, + "step": 467400 + }, + { + "epoch": 0.9441977722742276, + "grad_norm": 126.64779663085938, + "learning_rate": 1.288573422717454e-07, + "loss": 39.1484, + "step": 467410 + }, + { + "epoch": 0.9442179729069115, + "grad_norm": 75.78832244873047, + "learning_rate": 1.287786177236511e-07, + "loss": 15.7369, + "step": 467420 + }, + { + "epoch": 0.9442381735395953, + "grad_norm": 851.9575805664062, + "learning_rate": 1.2869991691729922e-07, + "loss": 30.6076, + "step": 467430 + }, + { + "epoch": 0.9442583741722791, + "grad_norm": 15.668920516967773, + "learning_rate": 1.2862123985307284e-07, + "loss": 11.1184, + "step": 467440 + }, + { + "epoch": 0.9442785748049629, + "grad_norm": 314.4423828125, + "learning_rate": 1.285425865313561e-07, + "loss": 14.5962, + "step": 467450 + }, + { + "epoch": 0.9442987754376467, + "grad_norm": 326.61346435546875, + "learning_rate": 1.28463956952532e-07, + "loss": 20.0917, + "step": 467460 + }, + { + "epoch": 0.9443189760703306, + "grad_norm": 434.478271484375, + "learning_rate": 1.2838535111698359e-07, + "loss": 18.4376, + "step": 467470 + }, + { + "epoch": 0.9443391767030144, + "grad_norm": 282.2841796875, + "learning_rate": 1.2830676902509443e-07, + "loss": 14.4992, + "step": 467480 + }, + { + "epoch": 0.9443593773356982, + "grad_norm": 924.6843872070312, + "learning_rate": 1.2822821067724643e-07, + "loss": 37.9355, + "step": 467490 + }, + { + "epoch": 0.944379577968382, + "grad_norm": 236.3780059814453, + "learning_rate": 1.2814967607382433e-07, + "loss": 7.6149, + "step": 467500 + }, + { + "epoch": 0.9443997786010658, + "grad_norm": 343.4498596191406, + "learning_rate": 1.2807116521520947e-07, + "loss": 13.3588, + "step": 467510 + }, + { + "epoch": 0.9444199792337497, + "grad_norm": 542.5467529296875, + "learning_rate": 1.279926781017843e-07, + "loss": 26.7331, + "step": 467520 + }, + { + "epoch": 0.9444401798664335, + "grad_norm": 121.63961029052734, + "learning_rate": 1.2791421473393184e-07, + "loss": 10.3786, + "step": 467530 + }, + { + "epoch": 0.9444603804991172, + "grad_norm": 289.2529296875, + "learning_rate": 1.2783577511203515e-07, + "loss": 14.1991, + "step": 467540 + }, + { + "epoch": 0.944480581131801, + "grad_norm": 811.0139770507812, + "learning_rate": 1.2775735923647614e-07, + "loss": 21.7363, + "step": 467550 + }, + { + "epoch": 0.9445007817644848, + "grad_norm": 155.37222290039062, + "learning_rate": 1.2767896710763616e-07, + "loss": 15.0662, + "step": 467560 + }, + { + "epoch": 0.9445209823971686, + "grad_norm": 305.1764221191406, + "learning_rate": 1.2760059872589824e-07, + "loss": 21.6474, + "step": 467570 + }, + { + "epoch": 0.9445411830298525, + "grad_norm": 359.0425109863281, + "learning_rate": 1.2752225409164432e-07, + "loss": 27.2246, + "step": 467580 + }, + { + "epoch": 0.9445613836625363, + "grad_norm": 193.85736083984375, + "learning_rate": 1.2744393320525573e-07, + "loss": 9.8475, + "step": 467590 + }, + { + "epoch": 0.9445815842952201, + "grad_norm": 462.73992919921875, + "learning_rate": 1.2736563606711384e-07, + "loss": 11.911, + "step": 467600 + }, + { + "epoch": 0.9446017849279039, + "grad_norm": 240.19923400878906, + "learning_rate": 1.2728736267760167e-07, + "loss": 43.9946, + "step": 467610 + }, + { + "epoch": 0.9446219855605877, + "grad_norm": 217.29193115234375, + "learning_rate": 1.2720911303710004e-07, + "loss": 11.255, + "step": 467620 + }, + { + "epoch": 0.9446421861932716, + "grad_norm": 166.75306701660156, + "learning_rate": 1.2713088714598974e-07, + "loss": 10.818, + "step": 467630 + }, + { + "epoch": 0.9446623868259554, + "grad_norm": 146.30332946777344, + "learning_rate": 1.2705268500465274e-07, + "loss": 14.2386, + "step": 467640 + }, + { + "epoch": 0.9446825874586392, + "grad_norm": 37.514007568359375, + "learning_rate": 1.2697450661347033e-07, + "loss": 14.8072, + "step": 467650 + }, + { + "epoch": 0.944702788091323, + "grad_norm": 697.3914794921875, + "learning_rate": 1.2689635197282224e-07, + "loss": 24.3691, + "step": 467660 + }, + { + "epoch": 0.9447229887240068, + "grad_norm": 776.3318481445312, + "learning_rate": 1.2681822108309094e-07, + "loss": 21.7652, + "step": 467670 + }, + { + "epoch": 0.9447431893566907, + "grad_norm": 744.0573120117188, + "learning_rate": 1.2674011394465614e-07, + "loss": 29.1712, + "step": 467680 + }, + { + "epoch": 0.9447633899893745, + "grad_norm": 237.00564575195312, + "learning_rate": 1.2666203055789915e-07, + "loss": 11.0158, + "step": 467690 + }, + { + "epoch": 0.9447835906220583, + "grad_norm": 423.5158996582031, + "learning_rate": 1.2658397092320028e-07, + "loss": 18.9339, + "step": 467700 + }, + { + "epoch": 0.9448037912547421, + "grad_norm": 1079.7054443359375, + "learning_rate": 1.2650593504094034e-07, + "loss": 16.8912, + "step": 467710 + }, + { + "epoch": 0.9448239918874259, + "grad_norm": 400.0626525878906, + "learning_rate": 1.2642792291149896e-07, + "loss": 15.3153, + "step": 467720 + }, + { + "epoch": 0.9448441925201098, + "grad_norm": 819.558349609375, + "learning_rate": 1.2634993453525702e-07, + "loss": 27.604, + "step": 467730 + }, + { + "epoch": 0.9448643931527936, + "grad_norm": 203.84701538085938, + "learning_rate": 1.2627196991259473e-07, + "loss": 24.7013, + "step": 467740 + }, + { + "epoch": 0.9448845937854774, + "grad_norm": 247.1985626220703, + "learning_rate": 1.261940290438912e-07, + "loss": 24.6028, + "step": 467750 + }, + { + "epoch": 0.9449047944181612, + "grad_norm": 239.46533203125, + "learning_rate": 1.2611611192952733e-07, + "loss": 15.5538, + "step": 467760 + }, + { + "epoch": 0.944924995050845, + "grad_norm": 20.467748641967773, + "learning_rate": 1.2603821856988218e-07, + "loss": 15.1277, + "step": 467770 + }, + { + "epoch": 0.9449451956835289, + "grad_norm": 612.4995727539062, + "learning_rate": 1.259603489653355e-07, + "loss": 13.7239, + "step": 467780 + }, + { + "epoch": 0.9449653963162126, + "grad_norm": 285.17694091796875, + "learning_rate": 1.2588250311626693e-07, + "loss": 19.4016, + "step": 467790 + }, + { + "epoch": 0.9449855969488964, + "grad_norm": 300.2528991699219, + "learning_rate": 1.258046810230562e-07, + "loss": 30.4937, + "step": 467800 + }, + { + "epoch": 0.9450057975815802, + "grad_norm": 406.70208740234375, + "learning_rate": 1.257268826860819e-07, + "loss": 25.9061, + "step": 467810 + }, + { + "epoch": 0.945025998214264, + "grad_norm": 152.28329467773438, + "learning_rate": 1.2564910810572317e-07, + "loss": 10.9667, + "step": 467820 + }, + { + "epoch": 0.9450461988469478, + "grad_norm": 489.2743225097656, + "learning_rate": 1.255713572823608e-07, + "loss": 13.7445, + "step": 467830 + }, + { + "epoch": 0.9450663994796317, + "grad_norm": 551.2591552734375, + "learning_rate": 1.2549363021637174e-07, + "loss": 14.6357, + "step": 467840 + }, + { + "epoch": 0.9450866001123155, + "grad_norm": 263.2279052734375, + "learning_rate": 1.2541592690813508e-07, + "loss": 17.0744, + "step": 467850 + }, + { + "epoch": 0.9451068007449993, + "grad_norm": 324.44024658203125, + "learning_rate": 1.2533824735803059e-07, + "loss": 19.0699, + "step": 467860 + }, + { + "epoch": 0.9451270013776831, + "grad_norm": 39.75548553466797, + "learning_rate": 1.252605915664362e-07, + "loss": 18.4461, + "step": 467870 + }, + { + "epoch": 0.945147202010367, + "grad_norm": 417.4817810058594, + "learning_rate": 1.2518295953373005e-07, + "loss": 13.1373, + "step": 467880 + }, + { + "epoch": 0.9451674026430508, + "grad_norm": 178.2801971435547, + "learning_rate": 1.2510535126029067e-07, + "loss": 15.0186, + "step": 467890 + }, + { + "epoch": 0.9451876032757346, + "grad_norm": 16.9990177154541, + "learning_rate": 1.2502776674649776e-07, + "loss": 17.0969, + "step": 467900 + }, + { + "epoch": 0.9452078039084184, + "grad_norm": 648.9572143554688, + "learning_rate": 1.2495020599272766e-07, + "loss": 16.402, + "step": 467910 + }, + { + "epoch": 0.9452280045411022, + "grad_norm": 249.67657470703125, + "learning_rate": 1.2487266899935845e-07, + "loss": 12.7127, + "step": 467920 + }, + { + "epoch": 0.945248205173786, + "grad_norm": 551.3310546875, + "learning_rate": 1.2479515576676925e-07, + "loss": 21.3335, + "step": 467930 + }, + { + "epoch": 0.9452684058064699, + "grad_norm": 407.8700256347656, + "learning_rate": 1.24717666295337e-07, + "loss": 13.2619, + "step": 467940 + }, + { + "epoch": 0.9452886064391537, + "grad_norm": 116.41875457763672, + "learning_rate": 1.2464020058543912e-07, + "loss": 10.3969, + "step": 467950 + }, + { + "epoch": 0.9453088070718375, + "grad_norm": 427.3218078613281, + "learning_rate": 1.2456275863745426e-07, + "loss": 14.2084, + "step": 467960 + }, + { + "epoch": 0.9453290077045213, + "grad_norm": 1037.89501953125, + "learning_rate": 1.2448534045175876e-07, + "loss": 23.1926, + "step": 467970 + }, + { + "epoch": 0.9453492083372051, + "grad_norm": 447.3141174316406, + "learning_rate": 1.2440794602873064e-07, + "loss": 14.0446, + "step": 467980 + }, + { + "epoch": 0.945369408969889, + "grad_norm": 353.541015625, + "learning_rate": 1.2433057536874682e-07, + "loss": 8.0339, + "step": 467990 + }, + { + "epoch": 0.9453896096025728, + "grad_norm": 710.0747680664062, + "learning_rate": 1.2425322847218368e-07, + "loss": 22.1698, + "step": 468000 + }, + { + "epoch": 0.9454098102352566, + "grad_norm": 101.60120391845703, + "learning_rate": 1.241759053394198e-07, + "loss": 14.5833, + "step": 468010 + }, + { + "epoch": 0.9454300108679404, + "grad_norm": 127.26005554199219, + "learning_rate": 1.2409860597083102e-07, + "loss": 9.7244, + "step": 468020 + }, + { + "epoch": 0.9454502115006242, + "grad_norm": 360.7369384765625, + "learning_rate": 1.240213303667942e-07, + "loss": 6.6837, + "step": 468030 + }, + { + "epoch": 0.9454704121333081, + "grad_norm": 610.5038452148438, + "learning_rate": 1.239440785276863e-07, + "loss": 30.8797, + "step": 468040 + }, + { + "epoch": 0.9454906127659918, + "grad_norm": 677.2760620117188, + "learning_rate": 1.2386685045388313e-07, + "loss": 18.1932, + "step": 468050 + }, + { + "epoch": 0.9455108133986756, + "grad_norm": 348.82879638671875, + "learning_rate": 1.2378964614576162e-07, + "loss": 23.3082, + "step": 468060 + }, + { + "epoch": 0.9455310140313594, + "grad_norm": 282.5048828125, + "learning_rate": 1.237124656036981e-07, + "loss": 20.5746, + "step": 468070 + }, + { + "epoch": 0.9455512146640432, + "grad_norm": 175.1044158935547, + "learning_rate": 1.236353088280684e-07, + "loss": 17.5479, + "step": 468080 + }, + { + "epoch": 0.9455714152967271, + "grad_norm": 675.5391235351562, + "learning_rate": 1.2355817581924945e-07, + "loss": 11.3713, + "step": 468090 + }, + { + "epoch": 0.9455916159294109, + "grad_norm": 466.1625061035156, + "learning_rate": 1.2348106657761537e-07, + "loss": 10.5448, + "step": 468100 + }, + { + "epoch": 0.9456118165620947, + "grad_norm": 307.6192932128906, + "learning_rate": 1.2340398110354424e-07, + "loss": 15.6597, + "step": 468110 + }, + { + "epoch": 0.9456320171947785, + "grad_norm": 307.61944580078125, + "learning_rate": 1.2332691939741015e-07, + "loss": 7.8915, + "step": 468120 + }, + { + "epoch": 0.9456522178274623, + "grad_norm": 311.0517578125, + "learning_rate": 1.2324988145958895e-07, + "loss": 19.7036, + "step": 468130 + }, + { + "epoch": 0.9456724184601462, + "grad_norm": 85.99491882324219, + "learning_rate": 1.2317286729045586e-07, + "loss": 21.8538, + "step": 468140 + }, + { + "epoch": 0.94569261909283, + "grad_norm": 264.7533264160156, + "learning_rate": 1.2309587689038783e-07, + "loss": 26.0238, + "step": 468150 + }, + { + "epoch": 0.9457128197255138, + "grad_norm": 210.65113830566406, + "learning_rate": 1.2301891025975897e-07, + "loss": 16.9391, + "step": 468160 + }, + { + "epoch": 0.9457330203581976, + "grad_norm": 189.60183715820312, + "learning_rate": 1.229419673989435e-07, + "loss": 17.2061, + "step": 468170 + }, + { + "epoch": 0.9457532209908814, + "grad_norm": 383.91912841796875, + "learning_rate": 1.2286504830831824e-07, + "loss": 19.7732, + "step": 468180 + }, + { + "epoch": 0.9457734216235653, + "grad_norm": 416.5213928222656, + "learning_rate": 1.2278815298825742e-07, + "loss": 25.9267, + "step": 468190 + }, + { + "epoch": 0.9457936222562491, + "grad_norm": 327.18695068359375, + "learning_rate": 1.2271128143913458e-07, + "loss": 29.0271, + "step": 468200 + }, + { + "epoch": 0.9458138228889329, + "grad_norm": 160.49041748046875, + "learning_rate": 1.2263443366132555e-07, + "loss": 7.8713, + "step": 468210 + }, + { + "epoch": 0.9458340235216167, + "grad_norm": 510.4881896972656, + "learning_rate": 1.2255760965520557e-07, + "loss": 21.4823, + "step": 468220 + }, + { + "epoch": 0.9458542241543005, + "grad_norm": 509.4342956542969, + "learning_rate": 1.224808094211477e-07, + "loss": 20.5117, + "step": 468230 + }, + { + "epoch": 0.9458744247869844, + "grad_norm": 307.6473693847656, + "learning_rate": 1.2240403295952662e-07, + "loss": 9.3283, + "step": 468240 + }, + { + "epoch": 0.9458946254196682, + "grad_norm": 509.9915771484375, + "learning_rate": 1.2232728027071704e-07, + "loss": 13.2376, + "step": 468250 + }, + { + "epoch": 0.945914826052352, + "grad_norm": 95.58562469482422, + "learning_rate": 1.222505513550931e-07, + "loss": 9.5526, + "step": 468260 + }, + { + "epoch": 0.9459350266850358, + "grad_norm": 732.8162231445312, + "learning_rate": 1.221738462130273e-07, + "loss": 26.2388, + "step": 468270 + }, + { + "epoch": 0.9459552273177196, + "grad_norm": 83.42688751220703, + "learning_rate": 1.2209716484489543e-07, + "loss": 23.4167, + "step": 468280 + }, + { + "epoch": 0.9459754279504035, + "grad_norm": 442.001220703125, + "learning_rate": 1.2202050725106995e-07, + "loss": 21.2767, + "step": 468290 + }, + { + "epoch": 0.9459956285830872, + "grad_norm": 363.59063720703125, + "learning_rate": 1.2194387343192504e-07, + "loss": 16.9433, + "step": 468300 + }, + { + "epoch": 0.946015829215771, + "grad_norm": 130.3279266357422, + "learning_rate": 1.2186726338783427e-07, + "loss": 10.2144, + "step": 468310 + }, + { + "epoch": 0.9460360298484548, + "grad_norm": 277.4386901855469, + "learning_rate": 1.2179067711917015e-07, + "loss": 12.461, + "step": 468320 + }, + { + "epoch": 0.9460562304811386, + "grad_norm": 47.01555633544922, + "learning_rate": 1.2171411462630732e-07, + "loss": 16.815, + "step": 468330 + }, + { + "epoch": 0.9460764311138224, + "grad_norm": 436.15606689453125, + "learning_rate": 1.216375759096178e-07, + "loss": 36.5272, + "step": 468340 + }, + { + "epoch": 0.9460966317465063, + "grad_norm": 708.5038452148438, + "learning_rate": 1.2156106096947563e-07, + "loss": 32.5021, + "step": 468350 + }, + { + "epoch": 0.9461168323791901, + "grad_norm": 106.7961196899414, + "learning_rate": 1.2148456980625223e-07, + "loss": 14.8968, + "step": 468360 + }, + { + "epoch": 0.9461370330118739, + "grad_norm": 335.14959716796875, + "learning_rate": 1.214081024203223e-07, + "loss": 15.8294, + "step": 468370 + }, + { + "epoch": 0.9461572336445577, + "grad_norm": 152.71038818359375, + "learning_rate": 1.2133165881205723e-07, + "loss": 19.1025, + "step": 468380 + }, + { + "epoch": 0.9461774342772415, + "grad_norm": 317.9998779296875, + "learning_rate": 1.2125523898182945e-07, + "loss": 19.3284, + "step": 468390 + }, + { + "epoch": 0.9461976349099254, + "grad_norm": 164.97238159179688, + "learning_rate": 1.211788429300126e-07, + "loss": 18.7075, + "step": 468400 + }, + { + "epoch": 0.9462178355426092, + "grad_norm": 481.8919372558594, + "learning_rate": 1.21102470656978e-07, + "loss": 22.586, + "step": 468410 + }, + { + "epoch": 0.946238036175293, + "grad_norm": 427.742431640625, + "learning_rate": 1.2102612216309816e-07, + "loss": 16.2481, + "step": 468420 + }, + { + "epoch": 0.9462582368079768, + "grad_norm": 82.76959991455078, + "learning_rate": 1.2094979744874502e-07, + "loss": 15.4469, + "step": 468430 + }, + { + "epoch": 0.9462784374406606, + "grad_norm": 502.79638671875, + "learning_rate": 1.2087349651429215e-07, + "loss": 15.7268, + "step": 468440 + }, + { + "epoch": 0.9462986380733445, + "grad_norm": 586.7080688476562, + "learning_rate": 1.207972193601087e-07, + "loss": 13.1939, + "step": 468450 + }, + { + "epoch": 0.9463188387060283, + "grad_norm": 500.2250061035156, + "learning_rate": 1.207209659865677e-07, + "loss": 14.0395, + "step": 468460 + }, + { + "epoch": 0.9463390393387121, + "grad_norm": 637.3262939453125, + "learning_rate": 1.206447363940416e-07, + "loss": 19.9108, + "step": 468470 + }, + { + "epoch": 0.9463592399713959, + "grad_norm": 573.2239990234375, + "learning_rate": 1.205685305829013e-07, + "loss": 25.3511, + "step": 468480 + }, + { + "epoch": 0.9463794406040797, + "grad_norm": 55.16313552856445, + "learning_rate": 1.204923485535181e-07, + "loss": 22.0333, + "step": 468490 + }, + { + "epoch": 0.9463996412367636, + "grad_norm": 374.0496520996094, + "learning_rate": 1.2041619030626283e-07, + "loss": 21.0971, + "step": 468500 + }, + { + "epoch": 0.9464198418694474, + "grad_norm": 87.39411163330078, + "learning_rate": 1.2034005584150854e-07, + "loss": 12.7182, + "step": 468510 + }, + { + "epoch": 0.9464400425021312, + "grad_norm": 320.94329833984375, + "learning_rate": 1.2026394515962382e-07, + "loss": 28.4477, + "step": 468520 + }, + { + "epoch": 0.946460243134815, + "grad_norm": 220.43370056152344, + "learning_rate": 1.2018785826098057e-07, + "loss": 9.1108, + "step": 468530 + }, + { + "epoch": 0.9464804437674988, + "grad_norm": 135.63665771484375, + "learning_rate": 1.2011179514595072e-07, + "loss": 30.4911, + "step": 468540 + }, + { + "epoch": 0.9465006444001827, + "grad_norm": 315.3052978515625, + "learning_rate": 1.20035755814904e-07, + "loss": 17.3008, + "step": 468550 + }, + { + "epoch": 0.9465208450328664, + "grad_norm": 32.96565246582031, + "learning_rate": 1.1995974026821066e-07, + "loss": 24.2093, + "step": 468560 + }, + { + "epoch": 0.9465410456655502, + "grad_norm": 148.7790069580078, + "learning_rate": 1.1988374850624208e-07, + "loss": 20.4896, + "step": 468570 + }, + { + "epoch": 0.946561246298234, + "grad_norm": 930.547119140625, + "learning_rate": 1.198077805293679e-07, + "loss": 9.2134, + "step": 468580 + }, + { + "epoch": 0.9465814469309178, + "grad_norm": 237.03515625, + "learning_rate": 1.1973183633795849e-07, + "loss": 52.5735, + "step": 468590 + }, + { + "epoch": 0.9466016475636017, + "grad_norm": 17.60946273803711, + "learning_rate": 1.1965591593238513e-07, + "loss": 10.6422, + "step": 468600 + }, + { + "epoch": 0.9466218481962855, + "grad_norm": 320.0501403808594, + "learning_rate": 1.1958001931301587e-07, + "loss": 13.3763, + "step": 468610 + }, + { + "epoch": 0.9466420488289693, + "grad_norm": 115.49630737304688, + "learning_rate": 1.195041464802227e-07, + "loss": 18.8042, + "step": 468620 + }, + { + "epoch": 0.9466622494616531, + "grad_norm": 32.85114669799805, + "learning_rate": 1.19428297434373e-07, + "loss": 22.0479, + "step": 468630 + }, + { + "epoch": 0.9466824500943369, + "grad_norm": 259.37530517578125, + "learning_rate": 1.1935247217583934e-07, + "loss": 12.3792, + "step": 468640 + }, + { + "epoch": 0.9467026507270208, + "grad_norm": 781.1555786132812, + "learning_rate": 1.1927667070498916e-07, + "loss": 15.1413, + "step": 468650 + }, + { + "epoch": 0.9467228513597046, + "grad_norm": 385.81744384765625, + "learning_rate": 1.1920089302219218e-07, + "loss": 16.4108, + "step": 468660 + }, + { + "epoch": 0.9467430519923884, + "grad_norm": 162.37913513183594, + "learning_rate": 1.1912513912781864e-07, + "loss": 17.8091, + "step": 468670 + }, + { + "epoch": 0.9467632526250722, + "grad_norm": 371.38995361328125, + "learning_rate": 1.1904940902223661e-07, + "loss": 15.9771, + "step": 468680 + }, + { + "epoch": 0.946783453257756, + "grad_norm": 426.4790954589844, + "learning_rate": 1.1897370270581632e-07, + "loss": 25.6471, + "step": 468690 + }, + { + "epoch": 0.9468036538904399, + "grad_norm": 6.888204574584961, + "learning_rate": 1.1889802017892638e-07, + "loss": 14.0545, + "step": 468700 + }, + { + "epoch": 0.9468238545231237, + "grad_norm": 34.13553237915039, + "learning_rate": 1.1882236144193482e-07, + "loss": 19.7614, + "step": 468710 + }, + { + "epoch": 0.9468440551558075, + "grad_norm": 369.91961669921875, + "learning_rate": 1.1874672649521135e-07, + "loss": 26.8512, + "step": 468720 + }, + { + "epoch": 0.9468642557884913, + "grad_norm": 118.35027313232422, + "learning_rate": 1.1867111533912457e-07, + "loss": 10.5861, + "step": 468730 + }, + { + "epoch": 0.9468844564211751, + "grad_norm": 473.2422180175781, + "learning_rate": 1.1859552797404194e-07, + "loss": 11.2697, + "step": 468740 + }, + { + "epoch": 0.946904657053859, + "grad_norm": 0.0, + "learning_rate": 1.185199644003332e-07, + "loss": 18.9192, + "step": 468750 + }, + { + "epoch": 0.9469248576865428, + "grad_norm": 975.2803955078125, + "learning_rate": 1.1844442461836636e-07, + "loss": 27.8736, + "step": 468760 + }, + { + "epoch": 0.9469450583192266, + "grad_norm": 175.9191436767578, + "learning_rate": 1.1836890862850892e-07, + "loss": 19.3225, + "step": 468770 + }, + { + "epoch": 0.9469652589519104, + "grad_norm": 246.40516662597656, + "learning_rate": 1.1829341643112946e-07, + "loss": 23.1092, + "step": 468780 + }, + { + "epoch": 0.9469854595845942, + "grad_norm": 763.1865844726562, + "learning_rate": 1.1821794802659603e-07, + "loss": 14.0527, + "step": 468790 + }, + { + "epoch": 0.9470056602172781, + "grad_norm": 257.7726135253906, + "learning_rate": 1.1814250341527611e-07, + "loss": 23.4074, + "step": 468800 + }, + { + "epoch": 0.9470258608499619, + "grad_norm": 573.9570922851562, + "learning_rate": 1.1806708259753718e-07, + "loss": 20.0437, + "step": 468810 + }, + { + "epoch": 0.9470460614826456, + "grad_norm": 201.02435302734375, + "learning_rate": 1.179916855737473e-07, + "loss": 16.4613, + "step": 468820 + }, + { + "epoch": 0.9470662621153294, + "grad_norm": 245.06533813476562, + "learning_rate": 1.1791631234427448e-07, + "loss": 14.38, + "step": 468830 + }, + { + "epoch": 0.9470864627480132, + "grad_norm": 862.9598999023438, + "learning_rate": 1.1784096290948455e-07, + "loss": 24.3121, + "step": 468840 + }, + { + "epoch": 0.947106663380697, + "grad_norm": 560.0054321289062, + "learning_rate": 1.177656372697461e-07, + "loss": 15.7914, + "step": 468850 + }, + { + "epoch": 0.9471268640133809, + "grad_norm": 175.73483276367188, + "learning_rate": 1.1769033542542552e-07, + "loss": 6.7914, + "step": 468860 + }, + { + "epoch": 0.9471470646460647, + "grad_norm": 291.7281188964844, + "learning_rate": 1.1761505737689082e-07, + "loss": 18.2273, + "step": 468870 + }, + { + "epoch": 0.9471672652787485, + "grad_norm": 338.5940856933594, + "learning_rate": 1.175398031245073e-07, + "loss": 12.2181, + "step": 468880 + }, + { + "epoch": 0.9471874659114323, + "grad_norm": 272.7087707519531, + "learning_rate": 1.1746457266864297e-07, + "loss": 13.4214, + "step": 468890 + }, + { + "epoch": 0.9472076665441161, + "grad_norm": 311.3247985839844, + "learning_rate": 1.1738936600966366e-07, + "loss": 14.7226, + "step": 468900 + }, + { + "epoch": 0.9472278671768, + "grad_norm": 291.2148132324219, + "learning_rate": 1.173141831479374e-07, + "loss": 17.4352, + "step": 468910 + }, + { + "epoch": 0.9472480678094838, + "grad_norm": 206.57376098632812, + "learning_rate": 1.1723902408382892e-07, + "loss": 20.1185, + "step": 468920 + }, + { + "epoch": 0.9472682684421676, + "grad_norm": 440.7289733886719, + "learning_rate": 1.1716388881770513e-07, + "loss": 27.5187, + "step": 468930 + }, + { + "epoch": 0.9472884690748514, + "grad_norm": 608.8489379882812, + "learning_rate": 1.1708877734993296e-07, + "loss": 20.8657, + "step": 468940 + }, + { + "epoch": 0.9473086697075352, + "grad_norm": 202.66183471679688, + "learning_rate": 1.1701368968087711e-07, + "loss": 7.8949, + "step": 468950 + }, + { + "epoch": 0.9473288703402191, + "grad_norm": 296.52935791015625, + "learning_rate": 1.1693862581090453e-07, + "loss": 13.0502, + "step": 468960 + }, + { + "epoch": 0.9473490709729029, + "grad_norm": 659.4253540039062, + "learning_rate": 1.1686358574038104e-07, + "loss": 20.7425, + "step": 468970 + }, + { + "epoch": 0.9473692716055867, + "grad_norm": 132.4563751220703, + "learning_rate": 1.1678856946967244e-07, + "loss": 17.9443, + "step": 468980 + }, + { + "epoch": 0.9473894722382705, + "grad_norm": 633.8014526367188, + "learning_rate": 1.1671357699914343e-07, + "loss": 15.5557, + "step": 468990 + }, + { + "epoch": 0.9474096728709543, + "grad_norm": 66.96441650390625, + "learning_rate": 1.166386083291604e-07, + "loss": 15.0395, + "step": 469000 + }, + { + "epoch": 0.9474298735036382, + "grad_norm": 582.8804931640625, + "learning_rate": 1.1656366346008862e-07, + "loss": 22.1104, + "step": 469010 + }, + { + "epoch": 0.947450074136322, + "grad_norm": 347.63812255859375, + "learning_rate": 1.1648874239229391e-07, + "loss": 16.2601, + "step": 469020 + }, + { + "epoch": 0.9474702747690058, + "grad_norm": 283.6161193847656, + "learning_rate": 1.1641384512613985e-07, + "loss": 12.2617, + "step": 469030 + }, + { + "epoch": 0.9474904754016896, + "grad_norm": 430.0794677734375, + "learning_rate": 1.1633897166199227e-07, + "loss": 19.0489, + "step": 469040 + }, + { + "epoch": 0.9475106760343734, + "grad_norm": 315.4210510253906, + "learning_rate": 1.1626412200021697e-07, + "loss": 11.05, + "step": 469050 + }, + { + "epoch": 0.9475308766670573, + "grad_norm": 376.3665466308594, + "learning_rate": 1.1618929614117757e-07, + "loss": 21.5259, + "step": 469060 + }, + { + "epoch": 0.947551077299741, + "grad_norm": 45.962337493896484, + "learning_rate": 1.1611449408523879e-07, + "loss": 21.0688, + "step": 469070 + }, + { + "epoch": 0.9475712779324248, + "grad_norm": 248.42181396484375, + "learning_rate": 1.1603971583276641e-07, + "loss": 11.9986, + "step": 469080 + }, + { + "epoch": 0.9475914785651086, + "grad_norm": 26.90751838684082, + "learning_rate": 1.1596496138412405e-07, + "loss": 28.1198, + "step": 469090 + }, + { + "epoch": 0.9476116791977924, + "grad_norm": 678.9092407226562, + "learning_rate": 1.1589023073967586e-07, + "loss": 20.818, + "step": 469100 + }, + { + "epoch": 0.9476318798304763, + "grad_norm": 7.6660566329956055, + "learning_rate": 1.1581552389978601e-07, + "loss": 9.1425, + "step": 469110 + }, + { + "epoch": 0.9476520804631601, + "grad_norm": 273.488037109375, + "learning_rate": 1.1574084086481973e-07, + "loss": 23.9087, + "step": 469120 + }, + { + "epoch": 0.9476722810958439, + "grad_norm": 672.782958984375, + "learning_rate": 1.1566618163513954e-07, + "loss": 12.4417, + "step": 469130 + }, + { + "epoch": 0.9476924817285277, + "grad_norm": 28.96516990661621, + "learning_rate": 1.1559154621110957e-07, + "loss": 6.9446, + "step": 469140 + }, + { + "epoch": 0.9477126823612115, + "grad_norm": 1065.329833984375, + "learning_rate": 1.155169345930951e-07, + "loss": 20.1584, + "step": 469150 + }, + { + "epoch": 0.9477328829938954, + "grad_norm": 578.9161987304688, + "learning_rate": 1.1544234678145805e-07, + "loss": 22.021, + "step": 469160 + }, + { + "epoch": 0.9477530836265792, + "grad_norm": 99.24224853515625, + "learning_rate": 1.1536778277656258e-07, + "loss": 11.5046, + "step": 469170 + }, + { + "epoch": 0.947773284259263, + "grad_norm": 570.20166015625, + "learning_rate": 1.1529324257877228e-07, + "loss": 33.5162, + "step": 469180 + }, + { + "epoch": 0.9477934848919468, + "grad_norm": 138.69076538085938, + "learning_rate": 1.152187261884502e-07, + "loss": 13.3612, + "step": 469190 + }, + { + "epoch": 0.9478136855246306, + "grad_norm": 892.8467407226562, + "learning_rate": 1.1514423360595939e-07, + "loss": 14.4001, + "step": 469200 + }, + { + "epoch": 0.9478338861573145, + "grad_norm": 1641.258544921875, + "learning_rate": 1.1506976483166343e-07, + "loss": 22.1854, + "step": 469210 + }, + { + "epoch": 0.9478540867899983, + "grad_norm": 1146.048583984375, + "learning_rate": 1.1499531986592482e-07, + "loss": 25.4028, + "step": 469220 + }, + { + "epoch": 0.9478742874226821, + "grad_norm": 272.59130859375, + "learning_rate": 1.1492089870910662e-07, + "loss": 17.5598, + "step": 469230 + }, + { + "epoch": 0.9478944880553659, + "grad_norm": 321.302734375, + "learning_rate": 1.1484650136157127e-07, + "loss": 21.2706, + "step": 469240 + }, + { + "epoch": 0.9479146886880497, + "grad_norm": 702.2314453125, + "learning_rate": 1.1477212782368185e-07, + "loss": 15.2294, + "step": 469250 + }, + { + "epoch": 0.9479348893207336, + "grad_norm": 371.1092834472656, + "learning_rate": 1.1469777809580084e-07, + "loss": 21.5585, + "step": 469260 + }, + { + "epoch": 0.9479550899534174, + "grad_norm": 443.08245849609375, + "learning_rate": 1.1462345217828963e-07, + "loss": 10.0246, + "step": 469270 + }, + { + "epoch": 0.9479752905861012, + "grad_norm": 476.6809997558594, + "learning_rate": 1.1454915007151179e-07, + "loss": 17.9244, + "step": 469280 + }, + { + "epoch": 0.947995491218785, + "grad_norm": 626.3543090820312, + "learning_rate": 1.1447487177582816e-07, + "loss": 18.527, + "step": 469290 + }, + { + "epoch": 0.9480156918514688, + "grad_norm": 466.4951477050781, + "learning_rate": 1.1440061729160235e-07, + "loss": 21.489, + "step": 469300 + }, + { + "epoch": 0.9480358924841527, + "grad_norm": 11.767196655273438, + "learning_rate": 1.1432638661919515e-07, + "loss": 10.4186, + "step": 469310 + }, + { + "epoch": 0.9480560931168365, + "grad_norm": 308.8578796386719, + "learning_rate": 1.1425217975896796e-07, + "loss": 12.2751, + "step": 469320 + }, + { + "epoch": 0.9480762937495202, + "grad_norm": 282.1939392089844, + "learning_rate": 1.1417799671128327e-07, + "loss": 15.9067, + "step": 469330 + }, + { + "epoch": 0.948096494382204, + "grad_norm": 348.49114990234375, + "learning_rate": 1.14103837476503e-07, + "loss": 17.374, + "step": 469340 + }, + { + "epoch": 0.9481166950148878, + "grad_norm": 595.0879516601562, + "learning_rate": 1.1402970205498742e-07, + "loss": 27.4214, + "step": 469350 + }, + { + "epoch": 0.9481368956475716, + "grad_norm": 424.2049255371094, + "learning_rate": 1.1395559044709848e-07, + "loss": 11.7823, + "step": 469360 + }, + { + "epoch": 0.9481570962802555, + "grad_norm": 488.8497009277344, + "learning_rate": 1.1388150265319808e-07, + "loss": 13.7173, + "step": 469370 + }, + { + "epoch": 0.9481772969129393, + "grad_norm": 382.3506164550781, + "learning_rate": 1.1380743867364596e-07, + "loss": 17.2933, + "step": 469380 + }, + { + "epoch": 0.9481974975456231, + "grad_norm": 171.2713623046875, + "learning_rate": 1.1373339850880405e-07, + "loss": 17.9502, + "step": 469390 + }, + { + "epoch": 0.9482176981783069, + "grad_norm": 338.79010009765625, + "learning_rate": 1.136593821590326e-07, + "loss": 11.2293, + "step": 469400 + }, + { + "epoch": 0.9482378988109907, + "grad_norm": 173.9486541748047, + "learning_rate": 1.1358538962469356e-07, + "loss": 21.067, + "step": 469410 + }, + { + "epoch": 0.9482580994436746, + "grad_norm": 574.244873046875, + "learning_rate": 1.1351142090614553e-07, + "loss": 22.1407, + "step": 469420 + }, + { + "epoch": 0.9482783000763584, + "grad_norm": 132.3253936767578, + "learning_rate": 1.1343747600375044e-07, + "loss": 12.5369, + "step": 469430 + }, + { + "epoch": 0.9482985007090422, + "grad_norm": 91.43624114990234, + "learning_rate": 1.1336355491786966e-07, + "loss": 15.2691, + "step": 469440 + }, + { + "epoch": 0.948318701341726, + "grad_norm": 191.69607543945312, + "learning_rate": 1.1328965764886069e-07, + "loss": 23.2678, + "step": 469450 + }, + { + "epoch": 0.9483389019744098, + "grad_norm": 117.98069763183594, + "learning_rate": 1.1321578419708545e-07, + "loss": 18.1865, + "step": 469460 + }, + { + "epoch": 0.9483591026070937, + "grad_norm": 142.21937561035156, + "learning_rate": 1.1314193456290424e-07, + "loss": 22.8095, + "step": 469470 + }, + { + "epoch": 0.9483793032397775, + "grad_norm": 17.312837600708008, + "learning_rate": 1.1306810874667673e-07, + "loss": 18.3314, + "step": 469480 + }, + { + "epoch": 0.9483995038724613, + "grad_norm": 365.4279479980469, + "learning_rate": 1.129943067487621e-07, + "loss": 15.4938, + "step": 469490 + }, + { + "epoch": 0.9484197045051451, + "grad_norm": 610.1925659179688, + "learning_rate": 1.1292052856952063e-07, + "loss": 25.1791, + "step": 469500 + }, + { + "epoch": 0.9484399051378289, + "grad_norm": 28.289342880249023, + "learning_rate": 1.1284677420931201e-07, + "loss": 9.2474, + "step": 469510 + }, + { + "epoch": 0.9484601057705128, + "grad_norm": 530.5923461914062, + "learning_rate": 1.1277304366849539e-07, + "loss": 16.9483, + "step": 469520 + }, + { + "epoch": 0.9484803064031966, + "grad_norm": 376.1013488769531, + "learning_rate": 1.1269933694742996e-07, + "loss": 26.9136, + "step": 469530 + }, + { + "epoch": 0.9485005070358804, + "grad_norm": 743.5037231445312, + "learning_rate": 1.1262565404647485e-07, + "loss": 16.8922, + "step": 469540 + }, + { + "epoch": 0.9485207076685642, + "grad_norm": 290.6438293457031, + "learning_rate": 1.1255199496599034e-07, + "loss": 13.4206, + "step": 469550 + }, + { + "epoch": 0.948540908301248, + "grad_norm": 591.5577392578125, + "learning_rate": 1.1247835970633392e-07, + "loss": 15.4233, + "step": 469560 + }, + { + "epoch": 0.9485611089339319, + "grad_norm": 413.7424621582031, + "learning_rate": 1.1240474826786585e-07, + "loss": 11.5132, + "step": 469570 + }, + { + "epoch": 0.9485813095666156, + "grad_norm": 81.66283416748047, + "learning_rate": 1.1233116065094363e-07, + "loss": 17.804, + "step": 469580 + }, + { + "epoch": 0.9486015101992994, + "grad_norm": 712.2706909179688, + "learning_rate": 1.1225759685592697e-07, + "loss": 17.38, + "step": 469590 + }, + { + "epoch": 0.9486217108319832, + "grad_norm": 451.8149108886719, + "learning_rate": 1.1218405688317447e-07, + "loss": 8.9866, + "step": 469600 + }, + { + "epoch": 0.948641911464667, + "grad_norm": 286.841796875, + "learning_rate": 1.1211054073304305e-07, + "loss": 14.9223, + "step": 469610 + }, + { + "epoch": 0.9486621120973509, + "grad_norm": 352.9850769042969, + "learning_rate": 1.1203704840589247e-07, + "loss": 13.3507, + "step": 469620 + }, + { + "epoch": 0.9486823127300347, + "grad_norm": 10.075496673583984, + "learning_rate": 1.1196357990208074e-07, + "loss": 13.7039, + "step": 469630 + }, + { + "epoch": 0.9487025133627185, + "grad_norm": 428.2384948730469, + "learning_rate": 1.1189013522196479e-07, + "loss": 24.0886, + "step": 469640 + }, + { + "epoch": 0.9487227139954023, + "grad_norm": 123.3632583618164, + "learning_rate": 1.118167143659038e-07, + "loss": 9.5288, + "step": 469650 + }, + { + "epoch": 0.9487429146280861, + "grad_norm": 335.2364196777344, + "learning_rate": 1.1174331733425636e-07, + "loss": 19.4018, + "step": 469660 + }, + { + "epoch": 0.94876311526077, + "grad_norm": 423.3990173339844, + "learning_rate": 1.1166994412737774e-07, + "loss": 23.5129, + "step": 469670 + }, + { + "epoch": 0.9487833158934538, + "grad_norm": 360.9956359863281, + "learning_rate": 1.1159659474562712e-07, + "loss": 13.3685, + "step": 469680 + }, + { + "epoch": 0.9488035165261376, + "grad_norm": 478.0350341796875, + "learning_rate": 1.1152326918936251e-07, + "loss": 24.2518, + "step": 469690 + }, + { + "epoch": 0.9488237171588214, + "grad_norm": 577.0175170898438, + "learning_rate": 1.1144996745894033e-07, + "loss": 28.4371, + "step": 469700 + }, + { + "epoch": 0.9488439177915052, + "grad_norm": 551.7817993164062, + "learning_rate": 1.1137668955471803e-07, + "loss": 10.8502, + "step": 469710 + }, + { + "epoch": 0.948864118424189, + "grad_norm": 1055.144287109375, + "learning_rate": 1.1130343547705257e-07, + "loss": 26.8408, + "step": 469720 + }, + { + "epoch": 0.9488843190568729, + "grad_norm": 444.7875061035156, + "learning_rate": 1.1123020522630202e-07, + "loss": 24.6248, + "step": 469730 + }, + { + "epoch": 0.9489045196895567, + "grad_norm": 305.60711669921875, + "learning_rate": 1.111569988028216e-07, + "loss": 26.628, + "step": 469740 + }, + { + "epoch": 0.9489247203222405, + "grad_norm": 499.5113220214844, + "learning_rate": 1.1108381620696885e-07, + "loss": 13.9902, + "step": 469750 + }, + { + "epoch": 0.9489449209549243, + "grad_norm": 288.8736267089844, + "learning_rate": 1.1101065743910122e-07, + "loss": 15.2388, + "step": 469760 + }, + { + "epoch": 0.9489651215876082, + "grad_norm": 506.89111328125, + "learning_rate": 1.1093752249957512e-07, + "loss": 22.3633, + "step": 469770 + }, + { + "epoch": 0.948985322220292, + "grad_norm": 421.3926086425781, + "learning_rate": 1.1086441138874581e-07, + "loss": 35.1609, + "step": 469780 + }, + { + "epoch": 0.9490055228529758, + "grad_norm": 636.1416015625, + "learning_rate": 1.107913241069708e-07, + "loss": 17.0688, + "step": 469790 + }, + { + "epoch": 0.9490257234856596, + "grad_norm": 95.65436553955078, + "learning_rate": 1.107182606546059e-07, + "loss": 6.2457, + "step": 469800 + }, + { + "epoch": 0.9490459241183434, + "grad_norm": 57.0366096496582, + "learning_rate": 1.1064522103200636e-07, + "loss": 10.7217, + "step": 469810 + }, + { + "epoch": 0.9490661247510273, + "grad_norm": 331.9047546386719, + "learning_rate": 1.1057220523953027e-07, + "loss": 8.9685, + "step": 469820 + }, + { + "epoch": 0.9490863253837111, + "grad_norm": 151.98440551757812, + "learning_rate": 1.1049921327753121e-07, + "loss": 9.8115, + "step": 469830 + }, + { + "epoch": 0.9491065260163948, + "grad_norm": 602.0835571289062, + "learning_rate": 1.1042624514636669e-07, + "loss": 16.8972, + "step": 469840 + }, + { + "epoch": 0.9491267266490786, + "grad_norm": 357.6507873535156, + "learning_rate": 1.1035330084639084e-07, + "loss": 9.9532, + "step": 469850 + }, + { + "epoch": 0.9491469272817624, + "grad_norm": 330.49566650390625, + "learning_rate": 1.1028038037796063e-07, + "loss": 13.1317, + "step": 469860 + }, + { + "epoch": 0.9491671279144462, + "grad_norm": 263.4090881347656, + "learning_rate": 1.1020748374143075e-07, + "loss": 17.9668, + "step": 469870 + }, + { + "epoch": 0.9491873285471301, + "grad_norm": 231.7461700439453, + "learning_rate": 1.1013461093715594e-07, + "loss": 9.0968, + "step": 469880 + }, + { + "epoch": 0.9492075291798139, + "grad_norm": 209.28048706054688, + "learning_rate": 1.1006176196549256e-07, + "loss": 6.7867, + "step": 469890 + }, + { + "epoch": 0.9492277298124977, + "grad_norm": 297.8096008300781, + "learning_rate": 1.0998893682679479e-07, + "loss": 23.6673, + "step": 469900 + }, + { + "epoch": 0.9492479304451815, + "grad_norm": 334.8628234863281, + "learning_rate": 1.099161355214179e-07, + "loss": 14.8809, + "step": 469910 + }, + { + "epoch": 0.9492681310778653, + "grad_norm": 610.1708374023438, + "learning_rate": 1.0984335804971713e-07, + "loss": 16.09, + "step": 469920 + }, + { + "epoch": 0.9492883317105492, + "grad_norm": 32.4946403503418, + "learning_rate": 1.0977060441204612e-07, + "loss": 12.6552, + "step": 469930 + }, + { + "epoch": 0.949308532343233, + "grad_norm": 275.5940856933594, + "learning_rate": 1.0969787460876013e-07, + "loss": 13.3029, + "step": 469940 + }, + { + "epoch": 0.9493287329759168, + "grad_norm": 345.1875305175781, + "learning_rate": 1.0962516864021388e-07, + "loss": 14.9644, + "step": 469950 + }, + { + "epoch": 0.9493489336086006, + "grad_norm": 191.85562133789062, + "learning_rate": 1.0955248650676154e-07, + "loss": 14.6402, + "step": 469960 + }, + { + "epoch": 0.9493691342412844, + "grad_norm": 325.9012145996094, + "learning_rate": 1.0947982820875669e-07, + "loss": 18.6594, + "step": 469970 + }, + { + "epoch": 0.9493893348739683, + "grad_norm": 841.303955078125, + "learning_rate": 1.0940719374655462e-07, + "loss": 34.0621, + "step": 469980 + }, + { + "epoch": 0.9494095355066521, + "grad_norm": 264.1408386230469, + "learning_rate": 1.0933458312050837e-07, + "loss": 19.1478, + "step": 469990 + }, + { + "epoch": 0.9494297361393359, + "grad_norm": 6.626099109649658, + "learning_rate": 1.0926199633097156e-07, + "loss": 11.4435, + "step": 470000 + }, + { + "epoch": 0.9494499367720197, + "grad_norm": 991.1474609375, + "learning_rate": 1.0918943337829945e-07, + "loss": 29.941, + "step": 470010 + }, + { + "epoch": 0.9494701374047035, + "grad_norm": 58.44243240356445, + "learning_rate": 1.091168942628451e-07, + "loss": 9.7997, + "step": 470020 + }, + { + "epoch": 0.9494903380373874, + "grad_norm": 160.3310089111328, + "learning_rate": 1.09044378984961e-07, + "loss": 13.4106, + "step": 470030 + }, + { + "epoch": 0.9495105386700712, + "grad_norm": 809.5315551757812, + "learning_rate": 1.0897188754500187e-07, + "loss": 20.2929, + "step": 470040 + }, + { + "epoch": 0.949530739302755, + "grad_norm": 105.98553466796875, + "learning_rate": 1.0889941994332077e-07, + "loss": 14.2779, + "step": 470050 + }, + { + "epoch": 0.9495509399354388, + "grad_norm": 900.638671875, + "learning_rate": 1.0882697618027016e-07, + "loss": 15.7529, + "step": 470060 + }, + { + "epoch": 0.9495711405681226, + "grad_norm": 212.1917266845703, + "learning_rate": 1.0875455625620368e-07, + "loss": 25.6759, + "step": 470070 + }, + { + "epoch": 0.9495913412008065, + "grad_norm": 913.4801635742188, + "learning_rate": 1.0868216017147437e-07, + "loss": 29.1505, + "step": 470080 + }, + { + "epoch": 0.9496115418334903, + "grad_norm": 361.5392150878906, + "learning_rate": 1.0860978792643528e-07, + "loss": 10.5928, + "step": 470090 + }, + { + "epoch": 0.949631742466174, + "grad_norm": 346.39202880859375, + "learning_rate": 1.0853743952143836e-07, + "loss": 13.5348, + "step": 470100 + }, + { + "epoch": 0.9496519430988578, + "grad_norm": 312.23748779296875, + "learning_rate": 1.084651149568372e-07, + "loss": 13.6943, + "step": 470110 + }, + { + "epoch": 0.9496721437315416, + "grad_norm": 496.2864074707031, + "learning_rate": 1.0839281423298375e-07, + "loss": 13.6047, + "step": 470120 + }, + { + "epoch": 0.9496923443642254, + "grad_norm": 494.9438781738281, + "learning_rate": 1.0832053735022996e-07, + "loss": 14.4703, + "step": 470130 + }, + { + "epoch": 0.9497125449969093, + "grad_norm": 574.4812622070312, + "learning_rate": 1.0824828430892831e-07, + "loss": 28.3906, + "step": 470140 + }, + { + "epoch": 0.9497327456295931, + "grad_norm": 630.9834594726562, + "learning_rate": 1.0817605510943241e-07, + "loss": 15.1082, + "step": 470150 + }, + { + "epoch": 0.9497529462622769, + "grad_norm": 492.0447692871094, + "learning_rate": 1.0810384975209254e-07, + "loss": 17.1864, + "step": 470160 + }, + { + "epoch": 0.9497731468949607, + "grad_norm": 292.03009033203125, + "learning_rate": 1.0803166823726064e-07, + "loss": 19.4169, + "step": 470170 + }, + { + "epoch": 0.9497933475276445, + "grad_norm": 491.9415588378906, + "learning_rate": 1.0795951056528974e-07, + "loss": 21.1524, + "step": 470180 + }, + { + "epoch": 0.9498135481603284, + "grad_norm": 447.0008850097656, + "learning_rate": 1.0788737673653072e-07, + "loss": 30.879, + "step": 470190 + }, + { + "epoch": 0.9498337487930122, + "grad_norm": 475.52154541015625, + "learning_rate": 1.0781526675133492e-07, + "loss": 27.2527, + "step": 470200 + }, + { + "epoch": 0.949853949425696, + "grad_norm": 681.0592651367188, + "learning_rate": 1.0774318061005484e-07, + "loss": 16.0728, + "step": 470210 + }, + { + "epoch": 0.9498741500583798, + "grad_norm": 284.41815185546875, + "learning_rate": 1.0767111831304022e-07, + "loss": 16.9715, + "step": 470220 + }, + { + "epoch": 0.9498943506910636, + "grad_norm": 381.9979248046875, + "learning_rate": 1.0759907986064411e-07, + "loss": 17.1089, + "step": 470230 + }, + { + "epoch": 0.9499145513237475, + "grad_norm": 362.4442443847656, + "learning_rate": 1.0752706525321622e-07, + "loss": 11.5909, + "step": 470240 + }, + { + "epoch": 0.9499347519564313, + "grad_norm": 117.89044952392578, + "learning_rate": 1.0745507449110792e-07, + "loss": 17.8032, + "step": 470250 + }, + { + "epoch": 0.9499549525891151, + "grad_norm": 243.0460662841797, + "learning_rate": 1.0738310757467064e-07, + "loss": 18.7897, + "step": 470260 + }, + { + "epoch": 0.9499751532217989, + "grad_norm": 157.89682006835938, + "learning_rate": 1.0731116450425461e-07, + "loss": 12.373, + "step": 470270 + }, + { + "epoch": 0.9499953538544827, + "grad_norm": 399.5901184082031, + "learning_rate": 1.0723924528021012e-07, + "loss": 18.7036, + "step": 470280 + }, + { + "epoch": 0.9500155544871666, + "grad_norm": 314.33355712890625, + "learning_rate": 1.0716734990288801e-07, + "loss": 21.0218, + "step": 470290 + }, + { + "epoch": 0.9500357551198504, + "grad_norm": 707.6248779296875, + "learning_rate": 1.0709547837263967e-07, + "loss": 20.888, + "step": 470300 + }, + { + "epoch": 0.9500559557525342, + "grad_norm": 880.1218872070312, + "learning_rate": 1.0702363068981425e-07, + "loss": 36.0962, + "step": 470310 + }, + { + "epoch": 0.950076156385218, + "grad_norm": 407.9019775390625, + "learning_rate": 1.0695180685476148e-07, + "loss": 16.846, + "step": 470320 + }, + { + "epoch": 0.9500963570179018, + "grad_norm": 439.6984558105469, + "learning_rate": 1.0688000686783272e-07, + "loss": 15.0265, + "step": 470330 + }, + { + "epoch": 0.9501165576505857, + "grad_norm": 486.39654541015625, + "learning_rate": 1.0680823072937774e-07, + "loss": 20.4392, + "step": 470340 + }, + { + "epoch": 0.9501367582832694, + "grad_norm": 271.5041198730469, + "learning_rate": 1.067364784397451e-07, + "loss": 23.7964, + "step": 470350 + }, + { + "epoch": 0.9501569589159532, + "grad_norm": 1043.4071044921875, + "learning_rate": 1.0666474999928566e-07, + "loss": 27.549, + "step": 470360 + }, + { + "epoch": 0.950177159548637, + "grad_norm": 495.0008239746094, + "learning_rate": 1.0659304540834914e-07, + "loss": 16.5035, + "step": 470370 + }, + { + "epoch": 0.9501973601813208, + "grad_norm": 503.79620361328125, + "learning_rate": 1.0652136466728468e-07, + "loss": 18.311, + "step": 470380 + }, + { + "epoch": 0.9502175608140047, + "grad_norm": 629.4742431640625, + "learning_rate": 1.0644970777644093e-07, + "loss": 8.9288, + "step": 470390 + }, + { + "epoch": 0.9502377614466885, + "grad_norm": 632.9424438476562, + "learning_rate": 1.0637807473616812e-07, + "loss": 36.336, + "step": 470400 + }, + { + "epoch": 0.9502579620793723, + "grad_norm": 541.6952514648438, + "learning_rate": 1.0630646554681545e-07, + "loss": 18.185, + "step": 470410 + }, + { + "epoch": 0.9502781627120561, + "grad_norm": 427.49407958984375, + "learning_rate": 1.0623488020873097e-07, + "loss": 24.8249, + "step": 470420 + }, + { + "epoch": 0.9502983633447399, + "grad_norm": 368.714599609375, + "learning_rate": 1.0616331872226437e-07, + "loss": 17.8403, + "step": 470430 + }, + { + "epoch": 0.9503185639774238, + "grad_norm": 441.51666259765625, + "learning_rate": 1.0609178108776375e-07, + "loss": 14.8681, + "step": 470440 + }, + { + "epoch": 0.9503387646101076, + "grad_norm": 1.7728757858276367, + "learning_rate": 1.0602026730557879e-07, + "loss": 16.306, + "step": 470450 + }, + { + "epoch": 0.9503589652427914, + "grad_norm": 660.8228759765625, + "learning_rate": 1.0594877737605702e-07, + "loss": 13.973, + "step": 470460 + }, + { + "epoch": 0.9503791658754752, + "grad_norm": 421.2904357910156, + "learning_rate": 1.0587731129954815e-07, + "loss": 16.3852, + "step": 470470 + }, + { + "epoch": 0.950399366508159, + "grad_norm": 426.2768249511719, + "learning_rate": 1.0580586907639912e-07, + "loss": 14.3896, + "step": 470480 + }, + { + "epoch": 0.9504195671408429, + "grad_norm": 1023.1954956054688, + "learning_rate": 1.0573445070695853e-07, + "loss": 15.0185, + "step": 470490 + }, + { + "epoch": 0.9504397677735267, + "grad_norm": 269.4640808105469, + "learning_rate": 1.0566305619157502e-07, + "loss": 20.0318, + "step": 470500 + }, + { + "epoch": 0.9504599684062105, + "grad_norm": 215.77854919433594, + "learning_rate": 1.0559168553059551e-07, + "loss": 26.3668, + "step": 470510 + }, + { + "epoch": 0.9504801690388943, + "grad_norm": 280.6916809082031, + "learning_rate": 1.0552033872436917e-07, + "loss": 13.8537, + "step": 470520 + }, + { + "epoch": 0.9505003696715781, + "grad_norm": 436.7329406738281, + "learning_rate": 1.0544901577324351e-07, + "loss": 15.7911, + "step": 470530 + }, + { + "epoch": 0.950520570304262, + "grad_norm": 314.02001953125, + "learning_rate": 1.0537771667756436e-07, + "loss": 16.0215, + "step": 470540 + }, + { + "epoch": 0.9505407709369458, + "grad_norm": 383.9037170410156, + "learning_rate": 1.0530644143768143e-07, + "loss": 18.3283, + "step": 470550 + }, + { + "epoch": 0.9505609715696296, + "grad_norm": 140.77203369140625, + "learning_rate": 1.0523519005394167e-07, + "loss": 18.8596, + "step": 470560 + }, + { + "epoch": 0.9505811722023134, + "grad_norm": 368.7960205078125, + "learning_rate": 1.0516396252669092e-07, + "loss": 19.5221, + "step": 470570 + }, + { + "epoch": 0.9506013728349972, + "grad_norm": 476.341796875, + "learning_rate": 1.0509275885627779e-07, + "loss": 14.5584, + "step": 470580 + }, + { + "epoch": 0.9506215734676811, + "grad_norm": 769.5809326171875, + "learning_rate": 1.0502157904304866e-07, + "loss": 15.2152, + "step": 470590 + }, + { + "epoch": 0.9506417741003649, + "grad_norm": 667.5062255859375, + "learning_rate": 1.0495042308735104e-07, + "loss": 20.4751, + "step": 470600 + }, + { + "epoch": 0.9506619747330486, + "grad_norm": 14.904691696166992, + "learning_rate": 1.0487929098953131e-07, + "loss": 12.7858, + "step": 470610 + }, + { + "epoch": 0.9506821753657324, + "grad_norm": 1079.2874755859375, + "learning_rate": 1.0480818274993587e-07, + "loss": 11.7952, + "step": 470620 + }, + { + "epoch": 0.9507023759984162, + "grad_norm": 685.9215087890625, + "learning_rate": 1.0473709836891222e-07, + "loss": 11.9369, + "step": 470630 + }, + { + "epoch": 0.9507225766311, + "grad_norm": 289.9880065917969, + "learning_rate": 1.0466603784680562e-07, + "loss": 15.8206, + "step": 470640 + }, + { + "epoch": 0.9507427772637839, + "grad_norm": 980.9338989257812, + "learning_rate": 1.0459500118396304e-07, + "loss": 19.923, + "step": 470650 + }, + { + "epoch": 0.9507629778964677, + "grad_norm": 817.2350463867188, + "learning_rate": 1.0452398838073141e-07, + "loss": 11.4897, + "step": 470660 + }, + { + "epoch": 0.9507831785291515, + "grad_norm": 450.24676513671875, + "learning_rate": 1.0445299943745546e-07, + "loss": 18.5014, + "step": 470670 + }, + { + "epoch": 0.9508033791618353, + "grad_norm": 589.586669921875, + "learning_rate": 1.0438203435448157e-07, + "loss": 42.9503, + "step": 470680 + }, + { + "epoch": 0.9508235797945191, + "grad_norm": 976.338623046875, + "learning_rate": 1.0431109313215671e-07, + "loss": 30.7082, + "step": 470690 + }, + { + "epoch": 0.950843780427203, + "grad_norm": 527.5633544921875, + "learning_rate": 1.0424017577082556e-07, + "loss": 13.4547, + "step": 470700 + }, + { + "epoch": 0.9508639810598868, + "grad_norm": 1180.8414306640625, + "learning_rate": 1.0416928227083345e-07, + "loss": 31.8732, + "step": 470710 + }, + { + "epoch": 0.9508841816925706, + "grad_norm": 434.7847900390625, + "learning_rate": 1.0409841263252673e-07, + "loss": 18.8544, + "step": 470720 + }, + { + "epoch": 0.9509043823252544, + "grad_norm": 335.68133544921875, + "learning_rate": 1.040275668562507e-07, + "loss": 19.671, + "step": 470730 + }, + { + "epoch": 0.9509245829579382, + "grad_norm": 181.6182403564453, + "learning_rate": 1.0395674494235064e-07, + "loss": 24.6191, + "step": 470740 + }, + { + "epoch": 0.9509447835906221, + "grad_norm": 255.40704345703125, + "learning_rate": 1.038859468911707e-07, + "loss": 7.3884, + "step": 470750 + }, + { + "epoch": 0.9509649842233059, + "grad_norm": 12.816692352294922, + "learning_rate": 1.0381517270305786e-07, + "loss": 31.8007, + "step": 470760 + }, + { + "epoch": 0.9509851848559897, + "grad_norm": 0.5190161466598511, + "learning_rate": 1.0374442237835625e-07, + "loss": 8.8657, + "step": 470770 + }, + { + "epoch": 0.9510053854886735, + "grad_norm": 197.76315307617188, + "learning_rate": 1.036736959174095e-07, + "loss": 15.5982, + "step": 470780 + }, + { + "epoch": 0.9510255861213573, + "grad_norm": 188.831787109375, + "learning_rate": 1.03602993320564e-07, + "loss": 16.2393, + "step": 470790 + }, + { + "epoch": 0.9510457867540412, + "grad_norm": 853.7542724609375, + "learning_rate": 1.0353231458816338e-07, + "loss": 14.2301, + "step": 470800 + }, + { + "epoch": 0.951065987386725, + "grad_norm": 827.4303588867188, + "learning_rate": 1.0346165972055233e-07, + "loss": 37.7539, + "step": 470810 + }, + { + "epoch": 0.9510861880194088, + "grad_norm": 324.36322021484375, + "learning_rate": 1.0339102871807505e-07, + "loss": 16.0894, + "step": 470820 + }, + { + "epoch": 0.9511063886520926, + "grad_norm": 1509.9539794921875, + "learning_rate": 1.0332042158107624e-07, + "loss": 20.8904, + "step": 470830 + }, + { + "epoch": 0.9511265892847764, + "grad_norm": 88.76578521728516, + "learning_rate": 1.032498383099001e-07, + "loss": 17.4222, + "step": 470840 + }, + { + "epoch": 0.9511467899174603, + "grad_norm": 53.25017547607422, + "learning_rate": 1.0317927890489021e-07, + "loss": 12.9788, + "step": 470850 + }, + { + "epoch": 0.951166990550144, + "grad_norm": 131.33226013183594, + "learning_rate": 1.0310874336639021e-07, + "loss": 30.0801, + "step": 470860 + }, + { + "epoch": 0.9511871911828278, + "grad_norm": 426.4961853027344, + "learning_rate": 1.030382316947448e-07, + "loss": 13.2985, + "step": 470870 + }, + { + "epoch": 0.9512073918155116, + "grad_norm": 243.90689086914062, + "learning_rate": 1.0296774389029707e-07, + "loss": 22.1258, + "step": 470880 + }, + { + "epoch": 0.9512275924481954, + "grad_norm": 55.12568283081055, + "learning_rate": 1.0289727995339005e-07, + "loss": 9.4031, + "step": 470890 + }, + { + "epoch": 0.9512477930808793, + "grad_norm": 184.26426696777344, + "learning_rate": 1.0282683988436792e-07, + "loss": 12.5956, + "step": 470900 + }, + { + "epoch": 0.9512679937135631, + "grad_norm": 108.84278869628906, + "learning_rate": 1.027564236835743e-07, + "loss": 16.8787, + "step": 470910 + }, + { + "epoch": 0.9512881943462469, + "grad_norm": 94.78299713134766, + "learning_rate": 1.0268603135135169e-07, + "loss": 12.0234, + "step": 470920 + }, + { + "epoch": 0.9513083949789307, + "grad_norm": 524.7290649414062, + "learning_rate": 1.0261566288804315e-07, + "loss": 20.1096, + "step": 470930 + }, + { + "epoch": 0.9513285956116145, + "grad_norm": 404.2486572265625, + "learning_rate": 1.0254531829399228e-07, + "loss": 13.8545, + "step": 470940 + }, + { + "epoch": 0.9513487962442984, + "grad_norm": 185.98550415039062, + "learning_rate": 1.024749975695416e-07, + "loss": 14.0839, + "step": 470950 + }, + { + "epoch": 0.9513689968769822, + "grad_norm": 461.5027770996094, + "learning_rate": 1.0240470071503306e-07, + "loss": 12.5585, + "step": 470960 + }, + { + "epoch": 0.951389197509666, + "grad_norm": 258.36834716796875, + "learning_rate": 1.0233442773081026e-07, + "loss": 22.2929, + "step": 470970 + }, + { + "epoch": 0.9514093981423498, + "grad_norm": 346.71075439453125, + "learning_rate": 1.0226417861721571e-07, + "loss": 8.3865, + "step": 470980 + }, + { + "epoch": 0.9514295987750336, + "grad_norm": 485.3686828613281, + "learning_rate": 1.0219395337459137e-07, + "loss": 9.4013, + "step": 470990 + }, + { + "epoch": 0.9514497994077175, + "grad_norm": 640.2971801757812, + "learning_rate": 1.0212375200327973e-07, + "loss": 18.4735, + "step": 471000 + }, + { + "epoch": 0.9514700000404013, + "grad_norm": 171.5185546875, + "learning_rate": 1.0205357450362275e-07, + "loss": 12.5515, + "step": 471010 + }, + { + "epoch": 0.9514902006730851, + "grad_norm": 8.77415657043457, + "learning_rate": 1.0198342087596292e-07, + "loss": 15.5475, + "step": 471020 + }, + { + "epoch": 0.9515104013057689, + "grad_norm": 390.6858825683594, + "learning_rate": 1.0191329112064164e-07, + "loss": 16.3626, + "step": 471030 + }, + { + "epoch": 0.9515306019384527, + "grad_norm": 611.0020751953125, + "learning_rate": 1.0184318523800086e-07, + "loss": 13.536, + "step": 471040 + }, + { + "epoch": 0.9515508025711366, + "grad_norm": 441.7110290527344, + "learning_rate": 1.0177310322838251e-07, + "loss": 15.6641, + "step": 471050 + }, + { + "epoch": 0.9515710032038204, + "grad_norm": 280.6241149902344, + "learning_rate": 1.0170304509212803e-07, + "loss": 21.3834, + "step": 471060 + }, + { + "epoch": 0.9515912038365042, + "grad_norm": 260.3578796386719, + "learning_rate": 1.0163301082957821e-07, + "loss": 21.4385, + "step": 471070 + }, + { + "epoch": 0.951611404469188, + "grad_norm": 502.2814025878906, + "learning_rate": 1.0156300044107559e-07, + "loss": 12.3369, + "step": 471080 + }, + { + "epoch": 0.9516316051018718, + "grad_norm": 436.1946716308594, + "learning_rate": 1.0149301392696098e-07, + "loss": 17.8134, + "step": 471090 + }, + { + "epoch": 0.9516518057345557, + "grad_norm": 191.00633239746094, + "learning_rate": 1.0142305128757468e-07, + "loss": 21.5534, + "step": 471100 + }, + { + "epoch": 0.9516720063672395, + "grad_norm": 350.3988037109375, + "learning_rate": 1.0135311252325863e-07, + "loss": 25.5265, + "step": 471110 + }, + { + "epoch": 0.9516922069999232, + "grad_norm": 326.69793701171875, + "learning_rate": 1.0128319763435312e-07, + "loss": 27.3422, + "step": 471120 + }, + { + "epoch": 0.951712407632607, + "grad_norm": 208.7720947265625, + "learning_rate": 1.0121330662119954e-07, + "loss": 11.7793, + "step": 471130 + }, + { + "epoch": 0.9517326082652908, + "grad_norm": 137.06358337402344, + "learning_rate": 1.0114343948413818e-07, + "loss": 13.8002, + "step": 471140 + }, + { + "epoch": 0.9517528088979746, + "grad_norm": 801.980224609375, + "learning_rate": 1.0107359622350877e-07, + "loss": 30.6869, + "step": 471150 + }, + { + "epoch": 0.9517730095306585, + "grad_norm": 206.8402099609375, + "learning_rate": 1.0100377683965323e-07, + "loss": 13.2018, + "step": 471160 + }, + { + "epoch": 0.9517932101633423, + "grad_norm": 289.8222961425781, + "learning_rate": 1.0093398133291132e-07, + "loss": 17.1643, + "step": 471170 + }, + { + "epoch": 0.9518134107960261, + "grad_norm": 523.9519653320312, + "learning_rate": 1.0086420970362221e-07, + "loss": 18.1259, + "step": 471180 + }, + { + "epoch": 0.9518336114287099, + "grad_norm": 477.6012268066406, + "learning_rate": 1.0079446195212728e-07, + "loss": 27.6164, + "step": 471190 + }, + { + "epoch": 0.9518538120613937, + "grad_norm": 233.2044219970703, + "learning_rate": 1.007247380787657e-07, + "loss": 29.1043, + "step": 471200 + }, + { + "epoch": 0.9518740126940776, + "grad_norm": 843.2547607421875, + "learning_rate": 1.0065503808387777e-07, + "loss": 25.8573, + "step": 471210 + }, + { + "epoch": 0.9518942133267614, + "grad_norm": 200.23541259765625, + "learning_rate": 1.0058536196780266e-07, + "loss": 11.6166, + "step": 471220 + }, + { + "epoch": 0.9519144139594452, + "grad_norm": 330.73638916015625, + "learning_rate": 1.0051570973088064e-07, + "loss": 18.2436, + "step": 471230 + }, + { + "epoch": 0.951934614592129, + "grad_norm": 169.0516815185547, + "learning_rate": 1.0044608137345091e-07, + "loss": 15.007, + "step": 471240 + }, + { + "epoch": 0.9519548152248128, + "grad_norm": 297.7073974609375, + "learning_rate": 1.0037647689585207e-07, + "loss": 13.2723, + "step": 471250 + }, + { + "epoch": 0.9519750158574967, + "grad_norm": 577.2901000976562, + "learning_rate": 1.0030689629842382e-07, + "loss": 25.1775, + "step": 471260 + }, + { + "epoch": 0.9519952164901805, + "grad_norm": 361.49591064453125, + "learning_rate": 1.0023733958150706e-07, + "loss": 18.4722, + "step": 471270 + }, + { + "epoch": 0.9520154171228643, + "grad_norm": 455.4455261230469, + "learning_rate": 1.0016780674543813e-07, + "loss": 13.6768, + "step": 471280 + }, + { + "epoch": 0.9520356177555481, + "grad_norm": 259.64111328125, + "learning_rate": 1.0009829779055679e-07, + "loss": 5.7533, + "step": 471290 + }, + { + "epoch": 0.952055818388232, + "grad_norm": 324.5470886230469, + "learning_rate": 1.0002881271720222e-07, + "loss": 19.4292, + "step": 471300 + }, + { + "epoch": 0.9520760190209158, + "grad_norm": 1066.47021484375, + "learning_rate": 9.995935152571357e-08, + "loss": 19.227, + "step": 471310 + }, + { + "epoch": 0.9520962196535996, + "grad_norm": 455.80023193359375, + "learning_rate": 9.988991421642779e-08, + "loss": 16.4507, + "step": 471320 + }, + { + "epoch": 0.9521164202862834, + "grad_norm": 139.85203552246094, + "learning_rate": 9.98205007896852e-08, + "loss": 11.9968, + "step": 471330 + }, + { + "epoch": 0.9521366209189672, + "grad_norm": 218.869384765625, + "learning_rate": 9.975111124582271e-08, + "loss": 17.4944, + "step": 471340 + }, + { + "epoch": 0.952156821551651, + "grad_norm": 0.0, + "learning_rate": 9.968174558517895e-08, + "loss": 9.4351, + "step": 471350 + }, + { + "epoch": 0.9521770221843349, + "grad_norm": 360.7905578613281, + "learning_rate": 9.961240380809201e-08, + "loss": 17.8279, + "step": 471360 + }, + { + "epoch": 0.9521972228170186, + "grad_norm": 610.8971557617188, + "learning_rate": 9.954308591489991e-08, + "loss": 26.3978, + "step": 471370 + }, + { + "epoch": 0.9522174234497024, + "grad_norm": 0.0, + "learning_rate": 9.947379190594076e-08, + "loss": 23.4322, + "step": 471380 + }, + { + "epoch": 0.9522376240823862, + "grad_norm": 613.8010864257812, + "learning_rate": 9.940452178155147e-08, + "loss": 20.5446, + "step": 471390 + }, + { + "epoch": 0.95225782471507, + "grad_norm": 466.0672607421875, + "learning_rate": 9.933527554207012e-08, + "loss": 23.108, + "step": 471400 + }, + { + "epoch": 0.9522780253477539, + "grad_norm": 515.3238525390625, + "learning_rate": 9.926605318783477e-08, + "loss": 21.6157, + "step": 471410 + }, + { + "epoch": 0.9522982259804377, + "grad_norm": 495.3761291503906, + "learning_rate": 9.919685471918183e-08, + "loss": 34.2023, + "step": 471420 + }, + { + "epoch": 0.9523184266131215, + "grad_norm": 262.55364990234375, + "learning_rate": 9.912768013644936e-08, + "loss": 19.4069, + "step": 471430 + }, + { + "epoch": 0.9523386272458053, + "grad_norm": 674.4443969726562, + "learning_rate": 9.905852943997374e-08, + "loss": 14.3366, + "step": 471440 + }, + { + "epoch": 0.9523588278784891, + "grad_norm": 322.1920166015625, + "learning_rate": 9.898940263009304e-08, + "loss": 18.1222, + "step": 471450 + }, + { + "epoch": 0.952379028511173, + "grad_norm": 358.52862548828125, + "learning_rate": 9.892029970714367e-08, + "loss": 16.8095, + "step": 471460 + }, + { + "epoch": 0.9523992291438568, + "grad_norm": 177.48074340820312, + "learning_rate": 9.885122067146147e-08, + "loss": 12.4875, + "step": 471470 + }, + { + "epoch": 0.9524194297765406, + "grad_norm": 130.83697509765625, + "learning_rate": 9.878216552338504e-08, + "loss": 12.1451, + "step": 471480 + }, + { + "epoch": 0.9524396304092244, + "grad_norm": 295.5693054199219, + "learning_rate": 9.871313426324913e-08, + "loss": 32.5697, + "step": 471490 + }, + { + "epoch": 0.9524598310419082, + "grad_norm": 281.16351318359375, + "learning_rate": 9.864412689139124e-08, + "loss": 15.9962, + "step": 471500 + }, + { + "epoch": 0.952480031674592, + "grad_norm": 359.3190612792969, + "learning_rate": 9.857514340814667e-08, + "loss": 13.375, + "step": 471510 + }, + { + "epoch": 0.9525002323072759, + "grad_norm": 552.441162109375, + "learning_rate": 9.850618381385346e-08, + "loss": 12.7265, + "step": 471520 + }, + { + "epoch": 0.9525204329399597, + "grad_norm": 328.9881286621094, + "learning_rate": 9.843724810884636e-08, + "loss": 11.7589, + "step": 471530 + }, + { + "epoch": 0.9525406335726435, + "grad_norm": 326.3606872558594, + "learning_rate": 9.836833629346121e-08, + "loss": 12.6732, + "step": 471540 + }, + { + "epoch": 0.9525608342053273, + "grad_norm": 527.2721557617188, + "learning_rate": 9.82994483680344e-08, + "loss": 20.0105, + "step": 471550 + }, + { + "epoch": 0.9525810348380112, + "grad_norm": 212.74549865722656, + "learning_rate": 9.823058433290178e-08, + "loss": 15.8167, + "step": 471560 + }, + { + "epoch": 0.952601235470695, + "grad_norm": 212.47674560546875, + "learning_rate": 9.816174418839863e-08, + "loss": 20.676, + "step": 471570 + }, + { + "epoch": 0.9526214361033788, + "grad_norm": 208.3838348388672, + "learning_rate": 9.809292793486025e-08, + "loss": 12.077, + "step": 471580 + }, + { + "epoch": 0.9526416367360626, + "grad_norm": 644.1743774414062, + "learning_rate": 9.802413557262302e-08, + "loss": 19.7677, + "step": 471590 + }, + { + "epoch": 0.9526618373687464, + "grad_norm": 483.9356994628906, + "learning_rate": 9.795536710202169e-08, + "loss": 15.0092, + "step": 471600 + }, + { + "epoch": 0.9526820380014303, + "grad_norm": 355.7208251953125, + "learning_rate": 9.788662252339099e-08, + "loss": 16.6463, + "step": 471610 + }, + { + "epoch": 0.9527022386341141, + "grad_norm": 403.49383544921875, + "learning_rate": 9.781790183706674e-08, + "loss": 19.6125, + "step": 471620 + }, + { + "epoch": 0.9527224392667978, + "grad_norm": 338.376708984375, + "learning_rate": 9.774920504338315e-08, + "loss": 28.9406, + "step": 471630 + }, + { + "epoch": 0.9527426398994816, + "grad_norm": 0.0, + "learning_rate": 9.768053214267548e-08, + "loss": 18.7657, + "step": 471640 + }, + { + "epoch": 0.9527628405321654, + "grad_norm": 458.7870788574219, + "learning_rate": 9.761188313527792e-08, + "loss": 17.2405, + "step": 471650 + }, + { + "epoch": 0.9527830411648492, + "grad_norm": 83.17525482177734, + "learning_rate": 9.754325802152575e-08, + "loss": 9.8613, + "step": 471660 + }, + { + "epoch": 0.9528032417975331, + "grad_norm": 143.07252502441406, + "learning_rate": 9.747465680175316e-08, + "loss": 14.7038, + "step": 471670 + }, + { + "epoch": 0.9528234424302169, + "grad_norm": 373.1251220703125, + "learning_rate": 9.740607947629433e-08, + "loss": 16.7827, + "step": 471680 + }, + { + "epoch": 0.9528436430629007, + "grad_norm": 725.1900024414062, + "learning_rate": 9.733752604548397e-08, + "loss": 23.8853, + "step": 471690 + }, + { + "epoch": 0.9528638436955845, + "grad_norm": 702.7918701171875, + "learning_rate": 9.726899650965626e-08, + "loss": 28.6152, + "step": 471700 + }, + { + "epoch": 0.9528840443282683, + "grad_norm": 432.33929443359375, + "learning_rate": 9.720049086914374e-08, + "loss": 21.788, + "step": 471710 + }, + { + "epoch": 0.9529042449609522, + "grad_norm": 272.7337951660156, + "learning_rate": 9.713200912428222e-08, + "loss": 21.7312, + "step": 471720 + }, + { + "epoch": 0.952924445593636, + "grad_norm": 105.20574951171875, + "learning_rate": 9.706355127540423e-08, + "loss": 12.223, + "step": 471730 + }, + { + "epoch": 0.9529446462263198, + "grad_norm": 39.18534851074219, + "learning_rate": 9.699511732284395e-08, + "loss": 14.3647, + "step": 471740 + }, + { + "epoch": 0.9529648468590036, + "grad_norm": 172.9857635498047, + "learning_rate": 9.692670726693498e-08, + "loss": 12.8175, + "step": 471750 + }, + { + "epoch": 0.9529850474916874, + "grad_norm": 9.908214569091797, + "learning_rate": 9.68583211080104e-08, + "loss": 12.9484, + "step": 471760 + }, + { + "epoch": 0.9530052481243713, + "grad_norm": 11452.4755859375, + "learning_rate": 9.678995884640385e-08, + "loss": 29.713, + "step": 471770 + }, + { + "epoch": 0.9530254487570551, + "grad_norm": 133.68270874023438, + "learning_rate": 9.672162048244838e-08, + "loss": 20.4403, + "step": 471780 + }, + { + "epoch": 0.9530456493897389, + "grad_norm": 81.29830169677734, + "learning_rate": 9.66533060164765e-08, + "loss": 11.5564, + "step": 471790 + }, + { + "epoch": 0.9530658500224227, + "grad_norm": 679.6661376953125, + "learning_rate": 9.658501544882182e-08, + "loss": 20.2916, + "step": 471800 + }, + { + "epoch": 0.9530860506551065, + "grad_norm": 374.6275329589844, + "learning_rate": 9.651674877981743e-08, + "loss": 15.3965, + "step": 471810 + }, + { + "epoch": 0.9531062512877904, + "grad_norm": 220.9196319580078, + "learning_rate": 9.644850600979583e-08, + "loss": 29.6173, + "step": 471820 + }, + { + "epoch": 0.9531264519204742, + "grad_norm": 372.6571350097656, + "learning_rate": 9.638028713908898e-08, + "loss": 10.7237, + "step": 471830 + }, + { + "epoch": 0.953146652553158, + "grad_norm": 210.60226440429688, + "learning_rate": 9.63120921680305e-08, + "loss": 18.6246, + "step": 471840 + }, + { + "epoch": 0.9531668531858418, + "grad_norm": 299.7548522949219, + "learning_rate": 9.62439210969518e-08, + "loss": 18.4083, + "step": 471850 + }, + { + "epoch": 0.9531870538185256, + "grad_norm": 381.3686828613281, + "learning_rate": 9.617577392618538e-08, + "loss": 17.6152, + "step": 471860 + }, + { + "epoch": 0.9532072544512095, + "grad_norm": 281.87298583984375, + "learning_rate": 9.61076506560632e-08, + "loss": 14.8257, + "step": 471870 + }, + { + "epoch": 0.9532274550838933, + "grad_norm": 464.02081298828125, + "learning_rate": 9.603955128691833e-08, + "loss": 15.0501, + "step": 471880 + }, + { + "epoch": 0.953247655716577, + "grad_norm": 191.71810913085938, + "learning_rate": 9.597147581908107e-08, + "loss": 12.9511, + "step": 471890 + }, + { + "epoch": 0.9532678563492608, + "grad_norm": 200.65492248535156, + "learning_rate": 9.590342425288446e-08, + "loss": 15.8451, + "step": 471900 + }, + { + "epoch": 0.9532880569819446, + "grad_norm": 658.044677734375, + "learning_rate": 9.583539658865992e-08, + "loss": 13.1211, + "step": 471910 + }, + { + "epoch": 0.9533082576146285, + "grad_norm": 286.3880615234375, + "learning_rate": 9.576739282673886e-08, + "loss": 14.6873, + "step": 471920 + }, + { + "epoch": 0.9533284582473123, + "grad_norm": 544.264404296875, + "learning_rate": 9.569941296745212e-08, + "loss": 24.7345, + "step": 471930 + }, + { + "epoch": 0.9533486588799961, + "grad_norm": 443.8762512207031, + "learning_rate": 9.563145701113219e-08, + "loss": 33.2531, + "step": 471940 + }, + { + "epoch": 0.9533688595126799, + "grad_norm": 27.334545135498047, + "learning_rate": 9.556352495810994e-08, + "loss": 13.0649, + "step": 471950 + }, + { + "epoch": 0.9533890601453637, + "grad_norm": 729.0777587890625, + "learning_rate": 9.549561680871566e-08, + "loss": 29.3194, + "step": 471960 + }, + { + "epoch": 0.9534092607780476, + "grad_norm": 566.8876342773438, + "learning_rate": 9.542773256328075e-08, + "loss": 18.8543, + "step": 471970 + }, + { + "epoch": 0.9534294614107314, + "grad_norm": 197.99618530273438, + "learning_rate": 9.53598722221366e-08, + "loss": 14.835, + "step": 471980 + }, + { + "epoch": 0.9534496620434152, + "grad_norm": 13.030482292175293, + "learning_rate": 9.529203578561353e-08, + "loss": 16.2845, + "step": 471990 + }, + { + "epoch": 0.953469862676099, + "grad_norm": 906.8263549804688, + "learning_rate": 9.522422325404234e-08, + "loss": 25.3349, + "step": 472000 + }, + { + "epoch": 0.9534900633087828, + "grad_norm": 498.44171142578125, + "learning_rate": 9.515643462775337e-08, + "loss": 24.0487, + "step": 472010 + }, + { + "epoch": 0.9535102639414667, + "grad_norm": 541.366943359375, + "learning_rate": 9.508866990707688e-08, + "loss": 18.6442, + "step": 472020 + }, + { + "epoch": 0.9535304645741505, + "grad_norm": 613.9030151367188, + "learning_rate": 9.502092909234317e-08, + "loss": 18.6889, + "step": 472030 + }, + { + "epoch": 0.9535506652068343, + "grad_norm": 364.0337829589844, + "learning_rate": 9.495321218388309e-08, + "loss": 25.2835, + "step": 472040 + }, + { + "epoch": 0.9535708658395181, + "grad_norm": 375.2162170410156, + "learning_rate": 9.488551918202527e-08, + "loss": 10.3971, + "step": 472050 + }, + { + "epoch": 0.9535910664722019, + "grad_norm": 430.7357177734375, + "learning_rate": 9.481785008710165e-08, + "loss": 19.3899, + "step": 472060 + }, + { + "epoch": 0.9536112671048858, + "grad_norm": 386.5377197265625, + "learning_rate": 9.475020489944032e-08, + "loss": 7.5816, + "step": 472070 + }, + { + "epoch": 0.9536314677375696, + "grad_norm": 249.53707885742188, + "learning_rate": 9.468258361937155e-08, + "loss": 14.0372, + "step": 472080 + }, + { + "epoch": 0.9536516683702534, + "grad_norm": 207.7149658203125, + "learning_rate": 9.461498624722509e-08, + "loss": 15.2406, + "step": 472090 + }, + { + "epoch": 0.9536718690029372, + "grad_norm": 1044.557373046875, + "learning_rate": 9.454741278333013e-08, + "loss": 20.2056, + "step": 472100 + }, + { + "epoch": 0.953692069635621, + "grad_norm": 274.39410400390625, + "learning_rate": 9.447986322801583e-08, + "loss": 17.5673, + "step": 472110 + }, + { + "epoch": 0.9537122702683049, + "grad_norm": 79.78308868408203, + "learning_rate": 9.441233758161139e-08, + "loss": 12.5258, + "step": 472120 + }, + { + "epoch": 0.9537324709009887, + "grad_norm": 32.28790283203125, + "learning_rate": 9.434483584444709e-08, + "loss": 12.0531, + "step": 472130 + }, + { + "epoch": 0.9537526715336724, + "grad_norm": 162.91354370117188, + "learning_rate": 9.427735801685101e-08, + "loss": 12.2741, + "step": 472140 + }, + { + "epoch": 0.9537728721663562, + "grad_norm": 1072.396484375, + "learning_rate": 9.420990409915176e-08, + "loss": 30.2842, + "step": 472150 + }, + { + "epoch": 0.95379307279904, + "grad_norm": 776.0736083984375, + "learning_rate": 9.414247409167854e-08, + "loss": 22.6709, + "step": 472160 + }, + { + "epoch": 0.9538132734317238, + "grad_norm": 681.9462280273438, + "learning_rate": 9.407506799475996e-08, + "loss": 19.9861, + "step": 472170 + }, + { + "epoch": 0.9538334740644077, + "grad_norm": 22.479015350341797, + "learning_rate": 9.400768580872411e-08, + "loss": 13.1713, + "step": 472180 + }, + { + "epoch": 0.9538536746970915, + "grad_norm": 194.88360595703125, + "learning_rate": 9.394032753390014e-08, + "loss": 11.1674, + "step": 472190 + }, + { + "epoch": 0.9538738753297753, + "grad_norm": 1.7064508199691772, + "learning_rate": 9.387299317061615e-08, + "loss": 13.6525, + "step": 472200 + }, + { + "epoch": 0.9538940759624591, + "grad_norm": 13.816083908081055, + "learning_rate": 9.380568271919966e-08, + "loss": 5.8827, + "step": 472210 + }, + { + "epoch": 0.9539142765951429, + "grad_norm": 244.79507446289062, + "learning_rate": 9.373839617997926e-08, + "loss": 21.1322, + "step": 472220 + }, + { + "epoch": 0.9539344772278268, + "grad_norm": 331.3328552246094, + "learning_rate": 9.367113355328361e-08, + "loss": 20.3777, + "step": 472230 + }, + { + "epoch": 0.9539546778605106, + "grad_norm": 237.48936462402344, + "learning_rate": 9.36038948394391e-08, + "loss": 20.8298, + "step": 472240 + }, + { + "epoch": 0.9539748784931944, + "grad_norm": 329.25421142578125, + "learning_rate": 9.353668003877437e-08, + "loss": 24.5786, + "step": 472250 + }, + { + "epoch": 0.9539950791258782, + "grad_norm": 491.8194274902344, + "learning_rate": 9.346948915161636e-08, + "loss": 19.1928, + "step": 472260 + }, + { + "epoch": 0.954015279758562, + "grad_norm": 353.59442138671875, + "learning_rate": 9.340232217829371e-08, + "loss": 14.4394, + "step": 472270 + }, + { + "epoch": 0.9540354803912459, + "grad_norm": 592.506103515625, + "learning_rate": 9.333517911913281e-08, + "loss": 15.0711, + "step": 472280 + }, + { + "epoch": 0.9540556810239297, + "grad_norm": 246.64601135253906, + "learning_rate": 9.326805997446065e-08, + "loss": 31.8441, + "step": 472290 + }, + { + "epoch": 0.9540758816566135, + "grad_norm": 420.1644287109375, + "learning_rate": 9.320096474460527e-08, + "loss": 14.5073, + "step": 472300 + }, + { + "epoch": 0.9540960822892973, + "grad_norm": 676.580078125, + "learning_rate": 9.31338934298931e-08, + "loss": 17.2554, + "step": 472310 + }, + { + "epoch": 0.9541162829219811, + "grad_norm": 452.257568359375, + "learning_rate": 9.306684603065108e-08, + "loss": 18.2278, + "step": 472320 + }, + { + "epoch": 0.954136483554665, + "grad_norm": 362.83880615234375, + "learning_rate": 9.299982254720674e-08, + "loss": 12.9905, + "step": 472330 + }, + { + "epoch": 0.9541566841873488, + "grad_norm": 603.9871215820312, + "learning_rate": 9.293282297988537e-08, + "loss": 25.1383, + "step": 472340 + }, + { + "epoch": 0.9541768848200326, + "grad_norm": 383.53717041015625, + "learning_rate": 9.28658473290145e-08, + "loss": 19.0505, + "step": 472350 + }, + { + "epoch": 0.9541970854527164, + "grad_norm": 301.22833251953125, + "learning_rate": 9.27988955949205e-08, + "loss": 20.0336, + "step": 472360 + }, + { + "epoch": 0.9542172860854002, + "grad_norm": 209.2220458984375, + "learning_rate": 9.273196777792926e-08, + "loss": 19.8336, + "step": 472370 + }, + { + "epoch": 0.9542374867180841, + "grad_norm": 757.2785034179688, + "learning_rate": 9.266506387836771e-08, + "loss": 17.0049, + "step": 472380 + }, + { + "epoch": 0.9542576873507679, + "grad_norm": 260.1947937011719, + "learning_rate": 9.259818389656117e-08, + "loss": 10.2656, + "step": 472390 + }, + { + "epoch": 0.9542778879834516, + "grad_norm": 363.085205078125, + "learning_rate": 9.253132783283548e-08, + "loss": 17.8424, + "step": 472400 + }, + { + "epoch": 0.9542980886161354, + "grad_norm": 322.1111145019531, + "learning_rate": 9.246449568751702e-08, + "loss": 15.6732, + "step": 472410 + }, + { + "epoch": 0.9543182892488192, + "grad_norm": 152.23348999023438, + "learning_rate": 9.239768746093226e-08, + "loss": 13.6807, + "step": 472420 + }, + { + "epoch": 0.954338489881503, + "grad_norm": 496.6689758300781, + "learning_rate": 9.233090315340532e-08, + "loss": 17.5344, + "step": 472430 + }, + { + "epoch": 0.9543586905141869, + "grad_norm": 209.66159057617188, + "learning_rate": 9.226414276526208e-08, + "loss": 20.2614, + "step": 472440 + }, + { + "epoch": 0.9543788911468707, + "grad_norm": 116.48358917236328, + "learning_rate": 9.219740629682838e-08, + "loss": 17.466, + "step": 472450 + }, + { + "epoch": 0.9543990917795545, + "grad_norm": 573.6477661132812, + "learning_rate": 9.213069374842953e-08, + "loss": 19.1514, + "step": 472460 + }, + { + "epoch": 0.9544192924122383, + "grad_norm": 676.3889770507812, + "learning_rate": 9.206400512039026e-08, + "loss": 22.2085, + "step": 472470 + }, + { + "epoch": 0.9544394930449221, + "grad_norm": 319.9701232910156, + "learning_rate": 9.199734041303532e-08, + "loss": 18.8759, + "step": 472480 + }, + { + "epoch": 0.954459693677606, + "grad_norm": 307.66448974609375, + "learning_rate": 9.19306996266911e-08, + "loss": 13.4954, + "step": 472490 + }, + { + "epoch": 0.9544798943102898, + "grad_norm": 592.2040405273438, + "learning_rate": 9.186408276168012e-08, + "loss": 12.9578, + "step": 472500 + }, + { + "epoch": 0.9545000949429736, + "grad_norm": 497.1847229003906, + "learning_rate": 9.179748981832881e-08, + "loss": 17.0803, + "step": 472510 + }, + { + "epoch": 0.9545202955756574, + "grad_norm": 368.4649353027344, + "learning_rate": 9.173092079696188e-08, + "loss": 10.285, + "step": 472520 + }, + { + "epoch": 0.9545404962083412, + "grad_norm": 271.83099365234375, + "learning_rate": 9.166437569790242e-08, + "loss": 34.242, + "step": 472530 + }, + { + "epoch": 0.9545606968410251, + "grad_norm": 302.1800842285156, + "learning_rate": 9.159785452147574e-08, + "loss": 7.3704, + "step": 472540 + }, + { + "epoch": 0.9545808974737089, + "grad_norm": 34.294837951660156, + "learning_rate": 9.153135726800599e-08, + "loss": 12.2237, + "step": 472550 + }, + { + "epoch": 0.9546010981063927, + "grad_norm": 589.4528198242188, + "learning_rate": 9.146488393781683e-08, + "loss": 13.1811, + "step": 472560 + }, + { + "epoch": 0.9546212987390765, + "grad_norm": 535.8561401367188, + "learning_rate": 9.139843453123243e-08, + "loss": 18.9766, + "step": 472570 + }, + { + "epoch": 0.9546414993717603, + "grad_norm": 309.1181335449219, + "learning_rate": 9.133200904857642e-08, + "loss": 16.5908, + "step": 472580 + }, + { + "epoch": 0.9546617000044442, + "grad_norm": 90.18566131591797, + "learning_rate": 9.126560749017354e-08, + "loss": 20.1748, + "step": 472590 + }, + { + "epoch": 0.954681900637128, + "grad_norm": 374.27630615234375, + "learning_rate": 9.119922985634633e-08, + "loss": 14.8302, + "step": 472600 + }, + { + "epoch": 0.9547021012698118, + "grad_norm": 202.9929656982422, + "learning_rate": 9.113287614741895e-08, + "loss": 19.8109, + "step": 472610 + }, + { + "epoch": 0.9547223019024956, + "grad_norm": 497.937744140625, + "learning_rate": 9.106654636371448e-08, + "loss": 15.6629, + "step": 472620 + }, + { + "epoch": 0.9547425025351794, + "grad_norm": 143.7318878173828, + "learning_rate": 9.1000240505556e-08, + "loss": 16.0604, + "step": 472630 + }, + { + "epoch": 0.9547627031678633, + "grad_norm": 204.52513122558594, + "learning_rate": 9.093395857326714e-08, + "loss": 20.6228, + "step": 472640 + }, + { + "epoch": 0.954782903800547, + "grad_norm": 343.8081970214844, + "learning_rate": 9.086770056717099e-08, + "loss": 18.4626, + "step": 472650 + }, + { + "epoch": 0.9548031044332308, + "grad_norm": 177.4273223876953, + "learning_rate": 9.080146648759003e-08, + "loss": 38.9969, + "step": 472660 + }, + { + "epoch": 0.9548233050659146, + "grad_norm": 262.6379089355469, + "learning_rate": 9.073525633484737e-08, + "loss": 11.4115, + "step": 472670 + }, + { + "epoch": 0.9548435056985984, + "grad_norm": 410.4691467285156, + "learning_rate": 9.066907010926551e-08, + "loss": 19.5217, + "step": 472680 + }, + { + "epoch": 0.9548637063312823, + "grad_norm": 372.9002990722656, + "learning_rate": 9.060290781116698e-08, + "loss": 35.4899, + "step": 472690 + }, + { + "epoch": 0.9548839069639661, + "grad_norm": 734.9282836914062, + "learning_rate": 9.053676944087542e-08, + "loss": 29.2235, + "step": 472700 + }, + { + "epoch": 0.9549041075966499, + "grad_norm": 508.56658935546875, + "learning_rate": 9.04706549987111e-08, + "loss": 17.9589, + "step": 472710 + }, + { + "epoch": 0.9549243082293337, + "grad_norm": 352.7237854003906, + "learning_rate": 9.040456448499769e-08, + "loss": 18.5129, + "step": 472720 + }, + { + "epoch": 0.9549445088620175, + "grad_norm": 788.5574951171875, + "learning_rate": 9.03384979000571e-08, + "loss": 24.0234, + "step": 472730 + }, + { + "epoch": 0.9549647094947014, + "grad_norm": 231.6894989013672, + "learning_rate": 9.027245524421135e-08, + "loss": 16.0379, + "step": 472740 + }, + { + "epoch": 0.9549849101273852, + "grad_norm": 354.6516418457031, + "learning_rate": 9.020643651778183e-08, + "loss": 26.0841, + "step": 472750 + }, + { + "epoch": 0.955005110760069, + "grad_norm": 79.38582611083984, + "learning_rate": 9.014044172109049e-08, + "loss": 10.3183, + "step": 472760 + }, + { + "epoch": 0.9550253113927528, + "grad_norm": 317.4454650878906, + "learning_rate": 9.007447085445987e-08, + "loss": 20.7183, + "step": 472770 + }, + { + "epoch": 0.9550455120254366, + "grad_norm": 140.3369140625, + "learning_rate": 9.00085239182108e-08, + "loss": 14.6581, + "step": 472780 + }, + { + "epoch": 0.9550657126581205, + "grad_norm": 209.433349609375, + "learning_rate": 8.99426009126636e-08, + "loss": 17.313, + "step": 472790 + }, + { + "epoch": 0.9550859132908043, + "grad_norm": 313.2442321777344, + "learning_rate": 8.987670183814134e-08, + "loss": 18.3114, + "step": 472800 + }, + { + "epoch": 0.9551061139234881, + "grad_norm": 0.4536556601524353, + "learning_rate": 8.981082669496433e-08, + "loss": 20.4024, + "step": 472810 + }, + { + "epoch": 0.9551263145561719, + "grad_norm": 458.67962646484375, + "learning_rate": 8.974497548345396e-08, + "loss": 26.9035, + "step": 472820 + }, + { + "epoch": 0.9551465151888557, + "grad_norm": 449.9855651855469, + "learning_rate": 8.967914820393108e-08, + "loss": 14.6759, + "step": 472830 + }, + { + "epoch": 0.9551667158215396, + "grad_norm": 200.32005310058594, + "learning_rate": 8.961334485671657e-08, + "loss": 10.905, + "step": 472840 + }, + { + "epoch": 0.9551869164542234, + "grad_norm": 478.463134765625, + "learning_rate": 8.954756544213128e-08, + "loss": 10.1928, + "step": 472850 + }, + { + "epoch": 0.9552071170869072, + "grad_norm": 410.7976989746094, + "learning_rate": 8.948180996049493e-08, + "loss": 26.1069, + "step": 472860 + }, + { + "epoch": 0.955227317719591, + "grad_norm": 193.8374786376953, + "learning_rate": 8.941607841212841e-08, + "loss": 12.1814, + "step": 472870 + }, + { + "epoch": 0.9552475183522748, + "grad_norm": 352.20159912109375, + "learning_rate": 8.93503707973531e-08, + "loss": 18.8841, + "step": 472880 + }, + { + "epoch": 0.9552677189849587, + "grad_norm": 54.54193878173828, + "learning_rate": 8.928468711648875e-08, + "loss": 24.2221, + "step": 472890 + }, + { + "epoch": 0.9552879196176425, + "grad_norm": 270.1540222167969, + "learning_rate": 8.921902736985399e-08, + "loss": 13.8838, + "step": 472900 + }, + { + "epoch": 0.9553081202503262, + "grad_norm": 258.27935791015625, + "learning_rate": 8.915339155777136e-08, + "loss": 27.3903, + "step": 472910 + }, + { + "epoch": 0.95532832088301, + "grad_norm": 851.5621948242188, + "learning_rate": 8.908777968055893e-08, + "loss": 24.5242, + "step": 472920 + }, + { + "epoch": 0.9553485215156938, + "grad_norm": 504.7754211425781, + "learning_rate": 8.902219173853699e-08, + "loss": 16.8366, + "step": 472930 + }, + { + "epoch": 0.9553687221483776, + "grad_norm": 763.4608154296875, + "learning_rate": 8.895662773202529e-08, + "loss": 18.1817, + "step": 472940 + }, + { + "epoch": 0.9553889227810615, + "grad_norm": 500.566650390625, + "learning_rate": 8.889108766134358e-08, + "loss": 21.5876, + "step": 472950 + }, + { + "epoch": 0.9554091234137453, + "grad_norm": 424.56103515625, + "learning_rate": 8.882557152681104e-08, + "loss": 12.3775, + "step": 472960 + }, + { + "epoch": 0.9554293240464291, + "grad_norm": 52.139766693115234, + "learning_rate": 8.876007932874686e-08, + "loss": 9.1044, + "step": 472970 + }, + { + "epoch": 0.9554495246791129, + "grad_norm": 101.28936004638672, + "learning_rate": 8.869461106747024e-08, + "loss": 10.6823, + "step": 472980 + }, + { + "epoch": 0.9554697253117967, + "grad_norm": 415.1844482421875, + "learning_rate": 8.862916674330091e-08, + "loss": 25.4574, + "step": 472990 + }, + { + "epoch": 0.9554899259444806, + "grad_norm": 635.3602905273438, + "learning_rate": 8.856374635655696e-08, + "loss": 14.1896, + "step": 473000 + }, + { + "epoch": 0.9555101265771644, + "grad_norm": 320.00335693359375, + "learning_rate": 8.849834990755757e-08, + "loss": 17.2174, + "step": 473010 + }, + { + "epoch": 0.9555303272098482, + "grad_norm": 408.1123046875, + "learning_rate": 8.843297739662138e-08, + "loss": 17.2332, + "step": 473020 + }, + { + "epoch": 0.955550527842532, + "grad_norm": 37.39815139770508, + "learning_rate": 8.836762882406757e-08, + "loss": 12.6036, + "step": 473030 + }, + { + "epoch": 0.9555707284752158, + "grad_norm": 118.26240539550781, + "learning_rate": 8.830230419021424e-08, + "loss": 26.1722, + "step": 473040 + }, + { + "epoch": 0.9555909291078997, + "grad_norm": 264.1705017089844, + "learning_rate": 8.823700349537945e-08, + "loss": 17.247, + "step": 473050 + }, + { + "epoch": 0.9556111297405835, + "grad_norm": 483.496337890625, + "learning_rate": 8.817172673988184e-08, + "loss": 48.2525, + "step": 473060 + }, + { + "epoch": 0.9556313303732673, + "grad_norm": 314.58447265625, + "learning_rate": 8.810647392404004e-08, + "loss": 15.7937, + "step": 473070 + }, + { + "epoch": 0.9556515310059511, + "grad_norm": 426.01654052734375, + "learning_rate": 8.804124504817046e-08, + "loss": 20.1481, + "step": 473080 + }, + { + "epoch": 0.955671731638635, + "grad_norm": 525.1240844726562, + "learning_rate": 8.797604011259287e-08, + "loss": 19.8814, + "step": 473090 + }, + { + "epoch": 0.9556919322713188, + "grad_norm": 566.5120239257812, + "learning_rate": 8.791085911762476e-08, + "loss": 9.9254, + "step": 473100 + }, + { + "epoch": 0.9557121329040026, + "grad_norm": 656.9776611328125, + "learning_rate": 8.784570206358201e-08, + "loss": 26.1737, + "step": 473110 + }, + { + "epoch": 0.9557323335366864, + "grad_norm": 436.4179382324219, + "learning_rate": 8.778056895078435e-08, + "loss": 27.7056, + "step": 473120 + }, + { + "epoch": 0.9557525341693702, + "grad_norm": 674.1171264648438, + "learning_rate": 8.77154597795482e-08, + "loss": 31.0846, + "step": 473130 + }, + { + "epoch": 0.955772734802054, + "grad_norm": 75.30695343017578, + "learning_rate": 8.765037455019165e-08, + "loss": 17.1403, + "step": 473140 + }, + { + "epoch": 0.9557929354347379, + "grad_norm": 637.0873413085938, + "learning_rate": 8.758531326303054e-08, + "loss": 12.4042, + "step": 473150 + }, + { + "epoch": 0.9558131360674217, + "grad_norm": 203.11627197265625, + "learning_rate": 8.752027591838352e-08, + "loss": 12.956, + "step": 473160 + }, + { + "epoch": 0.9558333367001054, + "grad_norm": 414.611328125, + "learning_rate": 8.7455262516567e-08, + "loss": 10.456, + "step": 473170 + }, + { + "epoch": 0.9558535373327892, + "grad_norm": 177.26925659179688, + "learning_rate": 8.739027305789682e-08, + "loss": 9.3711, + "step": 473180 + }, + { + "epoch": 0.955873737965473, + "grad_norm": 434.41229248046875, + "learning_rate": 8.732530754269108e-08, + "loss": 19.888, + "step": 473190 + }, + { + "epoch": 0.9558939385981569, + "grad_norm": 155.48361206054688, + "learning_rate": 8.726036597126619e-08, + "loss": 27.929, + "step": 473200 + }, + { + "epoch": 0.9559141392308407, + "grad_norm": 146.4423828125, + "learning_rate": 8.719544834393855e-08, + "loss": 11.0543, + "step": 473210 + }, + { + "epoch": 0.9559343398635245, + "grad_norm": 6.916018486022949, + "learning_rate": 8.713055466102349e-08, + "loss": 10.1698, + "step": 473220 + }, + { + "epoch": 0.9559545404962083, + "grad_norm": 46.781036376953125, + "learning_rate": 8.706568492283907e-08, + "loss": 22.7213, + "step": 473230 + }, + { + "epoch": 0.9559747411288921, + "grad_norm": 42.242225646972656, + "learning_rate": 8.700083912970058e-08, + "loss": 14.4235, + "step": 473240 + }, + { + "epoch": 0.955994941761576, + "grad_norm": 473.7392578125, + "learning_rate": 8.693601728192392e-08, + "loss": 19.8019, + "step": 473250 + }, + { + "epoch": 0.9560151423942598, + "grad_norm": 480.6423034667969, + "learning_rate": 8.687121937982545e-08, + "loss": 22.3902, + "step": 473260 + }, + { + "epoch": 0.9560353430269436, + "grad_norm": 186.20558166503906, + "learning_rate": 8.680644542372052e-08, + "loss": 15.3956, + "step": 473270 + }, + { + "epoch": 0.9560555436596274, + "grad_norm": 345.1857604980469, + "learning_rate": 8.674169541392552e-08, + "loss": 12.7148, + "step": 473280 + }, + { + "epoch": 0.9560757442923112, + "grad_norm": 428.6722717285156, + "learning_rate": 8.66769693507552e-08, + "loss": 13.5177, + "step": 473290 + }, + { + "epoch": 0.9560959449249951, + "grad_norm": 675.354248046875, + "learning_rate": 8.661226723452542e-08, + "loss": 31.9146, + "step": 473300 + }, + { + "epoch": 0.9561161455576789, + "grad_norm": 294.69195556640625, + "learning_rate": 8.65475890655515e-08, + "loss": 25.9512, + "step": 473310 + }, + { + "epoch": 0.9561363461903627, + "grad_norm": 163.40345764160156, + "learning_rate": 8.648293484414871e-08, + "loss": 13.3244, + "step": 473320 + }, + { + "epoch": 0.9561565468230465, + "grad_norm": 363.00103759765625, + "learning_rate": 8.641830457063239e-08, + "loss": 15.7228, + "step": 473330 + }, + { + "epoch": 0.9561767474557303, + "grad_norm": 229.41160583496094, + "learning_rate": 8.63536982453167e-08, + "loss": 18.2286, + "step": 473340 + }, + { + "epoch": 0.9561969480884142, + "grad_norm": 86.52777099609375, + "learning_rate": 8.628911586851752e-08, + "loss": 19.5806, + "step": 473350 + }, + { + "epoch": 0.956217148721098, + "grad_norm": 348.8701171875, + "learning_rate": 8.622455744054958e-08, + "loss": 15.0217, + "step": 473360 + }, + { + "epoch": 0.9562373493537818, + "grad_norm": 418.804443359375, + "learning_rate": 8.616002296172654e-08, + "loss": 22.1636, + "step": 473370 + }, + { + "epoch": 0.9562575499864656, + "grad_norm": 265.16455078125, + "learning_rate": 8.609551243236424e-08, + "loss": 16.2389, + "step": 473380 + }, + { + "epoch": 0.9562777506191494, + "grad_norm": 330.1594543457031, + "learning_rate": 8.603102585277634e-08, + "loss": 21.3645, + "step": 473390 + }, + { + "epoch": 0.9562979512518333, + "grad_norm": 229.58462524414062, + "learning_rate": 8.596656322327645e-08, + "loss": 19.1739, + "step": 473400 + }, + { + "epoch": 0.9563181518845171, + "grad_norm": 329.893310546875, + "learning_rate": 8.59021245441799e-08, + "loss": 26.2905, + "step": 473410 + }, + { + "epoch": 0.9563383525172008, + "grad_norm": 271.0732421875, + "learning_rate": 8.583770981580142e-08, + "loss": 14.1466, + "step": 473420 + }, + { + "epoch": 0.9563585531498846, + "grad_norm": 37.961769104003906, + "learning_rate": 8.577331903845243e-08, + "loss": 15.111, + "step": 473430 + }, + { + "epoch": 0.9563787537825684, + "grad_norm": 156.81661987304688, + "learning_rate": 8.57089522124488e-08, + "loss": 22.9995, + "step": 473440 + }, + { + "epoch": 0.9563989544152522, + "grad_norm": 514.8054809570312, + "learning_rate": 8.564460933810414e-08, + "loss": 15.597, + "step": 473450 + }, + { + "epoch": 0.9564191550479361, + "grad_norm": 277.9557800292969, + "learning_rate": 8.558029041573157e-08, + "loss": 18.3253, + "step": 473460 + }, + { + "epoch": 0.9564393556806199, + "grad_norm": 375.3038635253906, + "learning_rate": 8.55159954456436e-08, + "loss": 14.3698, + "step": 473470 + }, + { + "epoch": 0.9564595563133037, + "grad_norm": 115.1312026977539, + "learning_rate": 8.545172442815552e-08, + "loss": 10.1309, + "step": 473480 + }, + { + "epoch": 0.9564797569459875, + "grad_norm": 557.439697265625, + "learning_rate": 8.538747736357933e-08, + "loss": 21.8316, + "step": 473490 + }, + { + "epoch": 0.9564999575786713, + "grad_norm": 112.39806365966797, + "learning_rate": 8.53232542522292e-08, + "loss": 23.8843, + "step": 473500 + }, + { + "epoch": 0.9565201582113552, + "grad_norm": 99.22763061523438, + "learning_rate": 8.525905509441656e-08, + "loss": 16.9909, + "step": 473510 + }, + { + "epoch": 0.956540358844039, + "grad_norm": 216.84629821777344, + "learning_rate": 8.51948798904556e-08, + "loss": 31.9568, + "step": 473520 + }, + { + "epoch": 0.9565605594767228, + "grad_norm": 413.5600280761719, + "learning_rate": 8.513072864065885e-08, + "loss": 20.5399, + "step": 473530 + }, + { + "epoch": 0.9565807601094066, + "grad_norm": 408.629150390625, + "learning_rate": 8.506660134533828e-08, + "loss": 16.17, + "step": 473540 + }, + { + "epoch": 0.9566009607420904, + "grad_norm": 635.8742065429688, + "learning_rate": 8.500249800480754e-08, + "loss": 21.3376, + "step": 473550 + }, + { + "epoch": 0.9566211613747743, + "grad_norm": 622.0107421875, + "learning_rate": 8.493841861937802e-08, + "loss": 35.623, + "step": 473560 + }, + { + "epoch": 0.9566413620074581, + "grad_norm": 311.6985778808594, + "learning_rate": 8.487436318936282e-08, + "loss": 22.0412, + "step": 473570 + }, + { + "epoch": 0.9566615626401419, + "grad_norm": 505.4440612792969, + "learning_rate": 8.481033171507391e-08, + "loss": 22.9561, + "step": 473580 + }, + { + "epoch": 0.9566817632728257, + "grad_norm": 416.3011474609375, + "learning_rate": 8.474632419682327e-08, + "loss": 10.0568, + "step": 473590 + }, + { + "epoch": 0.9567019639055095, + "grad_norm": 246.4512481689453, + "learning_rate": 8.468234063492287e-08, + "loss": 12.2208, + "step": 473600 + }, + { + "epoch": 0.9567221645381934, + "grad_norm": 388.3680725097656, + "learning_rate": 8.461838102968467e-08, + "loss": 27.4346, + "step": 473610 + }, + { + "epoch": 0.9567423651708772, + "grad_norm": 199.04757690429688, + "learning_rate": 8.45544453814201e-08, + "loss": 17.5684, + "step": 473620 + }, + { + "epoch": 0.956762565803561, + "grad_norm": 252.2554168701172, + "learning_rate": 8.449053369044058e-08, + "loss": 43.0305, + "step": 473630 + }, + { + "epoch": 0.9567827664362448, + "grad_norm": 469.8954772949219, + "learning_rate": 8.442664595705862e-08, + "loss": 19.1467, + "step": 473640 + }, + { + "epoch": 0.9568029670689286, + "grad_norm": 150.32028198242188, + "learning_rate": 8.436278218158511e-08, + "loss": 13.8263, + "step": 473650 + }, + { + "epoch": 0.9568231677016125, + "grad_norm": 190.54705810546875, + "learning_rate": 8.429894236433089e-08, + "loss": 25.0537, + "step": 473660 + }, + { + "epoch": 0.9568433683342963, + "grad_norm": 288.9213562011719, + "learning_rate": 8.423512650560795e-08, + "loss": 13.1843, + "step": 473670 + }, + { + "epoch": 0.95686356896698, + "grad_norm": 79.70426177978516, + "learning_rate": 8.417133460572658e-08, + "loss": 13.2238, + "step": 473680 + }, + { + "epoch": 0.9568837695996638, + "grad_norm": 143.64450073242188, + "learning_rate": 8.410756666499709e-08, + "loss": 10.8533, + "step": 473690 + }, + { + "epoch": 0.9569039702323476, + "grad_norm": 439.2137756347656, + "learning_rate": 8.404382268373145e-08, + "loss": 30.3238, + "step": 473700 + }, + { + "epoch": 0.9569241708650315, + "grad_norm": 410.85736083984375, + "learning_rate": 8.39801026622411e-08, + "loss": 11.4916, + "step": 473710 + }, + { + "epoch": 0.9569443714977153, + "grad_norm": 465.1092529296875, + "learning_rate": 8.391640660083411e-08, + "loss": 17.738, + "step": 473720 + }, + { + "epoch": 0.9569645721303991, + "grad_norm": 376.08203125, + "learning_rate": 8.3852734499823e-08, + "loss": 22.8946, + "step": 473730 + }, + { + "epoch": 0.9569847727630829, + "grad_norm": 195.26583862304688, + "learning_rate": 8.3789086359517e-08, + "loss": 15.1908, + "step": 473740 + }, + { + "epoch": 0.9570049733957667, + "grad_norm": 279.01898193359375, + "learning_rate": 8.372546218022747e-08, + "loss": 16.2743, + "step": 473750 + }, + { + "epoch": 0.9570251740284506, + "grad_norm": 338.1087951660156, + "learning_rate": 8.366186196226311e-08, + "loss": 11.4824, + "step": 473760 + }, + { + "epoch": 0.9570453746611344, + "grad_norm": 680.8377685546875, + "learning_rate": 8.35982857059342e-08, + "loss": 26.1408, + "step": 473770 + }, + { + "epoch": 0.9570655752938182, + "grad_norm": 339.9266357421875, + "learning_rate": 8.353473341155216e-08, + "loss": 16.2152, + "step": 473780 + }, + { + "epoch": 0.957085775926502, + "grad_norm": 435.17315673828125, + "learning_rate": 8.347120507942453e-08, + "loss": 24.8962, + "step": 473790 + }, + { + "epoch": 0.9571059765591858, + "grad_norm": 554.631103515625, + "learning_rate": 8.340770070986215e-08, + "loss": 11.9722, + "step": 473800 + }, + { + "epoch": 0.9571261771918697, + "grad_norm": 353.4429626464844, + "learning_rate": 8.334422030317424e-08, + "loss": 16.6615, + "step": 473810 + }, + { + "epoch": 0.9571463778245535, + "grad_norm": 341.6706848144531, + "learning_rate": 8.328076385967055e-08, + "loss": 14.8122, + "step": 473820 + }, + { + "epoch": 0.9571665784572373, + "grad_norm": 367.8915710449219, + "learning_rate": 8.321733137966026e-08, + "loss": 16.9164, + "step": 473830 + }, + { + "epoch": 0.9571867790899211, + "grad_norm": 143.0441436767578, + "learning_rate": 8.315392286345203e-08, + "loss": 12.8845, + "step": 473840 + }, + { + "epoch": 0.9572069797226049, + "grad_norm": 177.54774475097656, + "learning_rate": 8.30905383113556e-08, + "loss": 29.3222, + "step": 473850 + }, + { + "epoch": 0.9572271803552888, + "grad_norm": 143.09291076660156, + "learning_rate": 8.302717772367908e-08, + "loss": 15.2372, + "step": 473860 + }, + { + "epoch": 0.9572473809879726, + "grad_norm": 857.23046875, + "learning_rate": 8.296384110073164e-08, + "loss": 20.7451, + "step": 473870 + }, + { + "epoch": 0.9572675816206564, + "grad_norm": 20.85658836364746, + "learning_rate": 8.290052844282248e-08, + "loss": 24.1314, + "step": 473880 + }, + { + "epoch": 0.9572877822533402, + "grad_norm": 423.4533386230469, + "learning_rate": 8.283723975025971e-08, + "loss": 11.0826, + "step": 473890 + }, + { + "epoch": 0.957307982886024, + "grad_norm": 811.9452514648438, + "learning_rate": 8.277397502335194e-08, + "loss": 13.4805, + "step": 473900 + }, + { + "epoch": 0.9573281835187079, + "grad_norm": 382.5894775390625, + "learning_rate": 8.271073426240672e-08, + "loss": 15.23, + "step": 473910 + }, + { + "epoch": 0.9573483841513917, + "grad_norm": 535.0155029296875, + "learning_rate": 8.264751746773381e-08, + "loss": 18.9655, + "step": 473920 + }, + { + "epoch": 0.9573685847840754, + "grad_norm": 13.196868896484375, + "learning_rate": 8.258432463964016e-08, + "loss": 9.4939, + "step": 473930 + }, + { + "epoch": 0.9573887854167592, + "grad_norm": 94.76506042480469, + "learning_rate": 8.252115577843444e-08, + "loss": 17.6372, + "step": 473940 + }, + { + "epoch": 0.957408986049443, + "grad_norm": 212.6458740234375, + "learning_rate": 8.245801088442362e-08, + "loss": 25.0338, + "step": 473950 + }, + { + "epoch": 0.9574291866821268, + "grad_norm": 258.0672302246094, + "learning_rate": 8.239488995791633e-08, + "loss": 13.0235, + "step": 473960 + }, + { + "epoch": 0.9574493873148107, + "grad_norm": 127.04428100585938, + "learning_rate": 8.233179299922012e-08, + "loss": 9.8082, + "step": 473970 + }, + { + "epoch": 0.9574695879474945, + "grad_norm": 526.0514526367188, + "learning_rate": 8.226872000864194e-08, + "loss": 18.7706, + "step": 473980 + }, + { + "epoch": 0.9574897885801783, + "grad_norm": 562.9580078125, + "learning_rate": 8.22056709864899e-08, + "loss": 27.8233, + "step": 473990 + }, + { + "epoch": 0.9575099892128621, + "grad_norm": 26.61240005493164, + "learning_rate": 8.214264593307097e-08, + "loss": 15.2482, + "step": 474000 + }, + { + "epoch": 0.9575301898455459, + "grad_norm": 424.70050048828125, + "learning_rate": 8.207964484869158e-08, + "loss": 23.444, + "step": 474010 + }, + { + "epoch": 0.9575503904782298, + "grad_norm": 365.2879333496094, + "learning_rate": 8.201666773365979e-08, + "loss": 13.3401, + "step": 474020 + }, + { + "epoch": 0.9575705911109136, + "grad_norm": 239.11878967285156, + "learning_rate": 8.195371458828316e-08, + "loss": 17.2312, + "step": 474030 + }, + { + "epoch": 0.9575907917435974, + "grad_norm": 1522.04150390625, + "learning_rate": 8.1890785412867e-08, + "loss": 38.5878, + "step": 474040 + }, + { + "epoch": 0.9576109923762812, + "grad_norm": 123.2011489868164, + "learning_rate": 8.182788020771826e-08, + "loss": 13.6253, + "step": 474050 + }, + { + "epoch": 0.957631193008965, + "grad_norm": 466.6886901855469, + "learning_rate": 8.176499897314505e-08, + "loss": 18.2509, + "step": 474060 + }, + { + "epoch": 0.9576513936416489, + "grad_norm": 331.8001708984375, + "learning_rate": 8.170214170945212e-08, + "loss": 20.0639, + "step": 474070 + }, + { + "epoch": 0.9576715942743327, + "grad_norm": 618.6167602539062, + "learning_rate": 8.163930841694589e-08, + "loss": 11.9667, + "step": 474080 + }, + { + "epoch": 0.9576917949070165, + "grad_norm": 716.3890380859375, + "learning_rate": 8.157649909593335e-08, + "loss": 22.36, + "step": 474090 + }, + { + "epoch": 0.9577119955397003, + "grad_norm": 433.8890686035156, + "learning_rate": 8.151371374672146e-08, + "loss": 25.017, + "step": 474100 + }, + { + "epoch": 0.9577321961723841, + "grad_norm": 1074.271728515625, + "learning_rate": 8.145095236961387e-08, + "loss": 20.0564, + "step": 474110 + }, + { + "epoch": 0.957752396805068, + "grad_norm": 633.5374755859375, + "learning_rate": 8.13882149649181e-08, + "loss": 19.4297, + "step": 474120 + }, + { + "epoch": 0.9577725974377518, + "grad_norm": 354.6879577636719, + "learning_rate": 8.132550153294005e-08, + "loss": 15.8962, + "step": 474130 + }, + { + "epoch": 0.9577927980704356, + "grad_norm": 583.9142456054688, + "learning_rate": 8.1262812073985e-08, + "loss": 32.1786, + "step": 474140 + }, + { + "epoch": 0.9578129987031194, + "grad_norm": 79.23692321777344, + "learning_rate": 8.120014658835828e-08, + "loss": 18.3146, + "step": 474150 + }, + { + "epoch": 0.9578331993358032, + "grad_norm": 208.16111755371094, + "learning_rate": 8.11375050763652e-08, + "loss": 9.589, + "step": 474160 + }, + { + "epoch": 0.9578533999684871, + "grad_norm": 248.46812438964844, + "learning_rate": 8.107488753831161e-08, + "loss": 8.9652, + "step": 474170 + }, + { + "epoch": 0.9578736006011709, + "grad_norm": 376.1187438964844, + "learning_rate": 8.101229397450228e-08, + "loss": 24.8758, + "step": 474180 + }, + { + "epoch": 0.9578938012338546, + "grad_norm": 769.6351928710938, + "learning_rate": 8.094972438524251e-08, + "loss": 23.3915, + "step": 474190 + }, + { + "epoch": 0.9579140018665384, + "grad_norm": 93.28047943115234, + "learning_rate": 8.088717877083706e-08, + "loss": 11.5412, + "step": 474200 + }, + { + "epoch": 0.9579342024992222, + "grad_norm": 330.7162170410156, + "learning_rate": 8.082465713159126e-08, + "loss": 14.759, + "step": 474210 + }, + { + "epoch": 0.957954403131906, + "grad_norm": 514.1710205078125, + "learning_rate": 8.076215946780874e-08, + "loss": 15.7291, + "step": 474220 + }, + { + "epoch": 0.9579746037645899, + "grad_norm": 361.1650085449219, + "learning_rate": 8.069968577979536e-08, + "loss": 20.1469, + "step": 474230 + }, + { + "epoch": 0.9579948043972737, + "grad_norm": 208.582275390625, + "learning_rate": 8.063723606785478e-08, + "loss": 11.3949, + "step": 474240 + }, + { + "epoch": 0.9580150050299575, + "grad_norm": 294.64715576171875, + "learning_rate": 8.057481033229176e-08, + "loss": 12.0631, + "step": 474250 + }, + { + "epoch": 0.9580352056626413, + "grad_norm": 946.9326782226562, + "learning_rate": 8.051240857341102e-08, + "loss": 23.1346, + "step": 474260 + }, + { + "epoch": 0.9580554062953252, + "grad_norm": 424.99945068359375, + "learning_rate": 8.045003079151514e-08, + "loss": 14.3341, + "step": 474270 + }, + { + "epoch": 0.958075606928009, + "grad_norm": 271.5953369140625, + "learning_rate": 8.038767698690996e-08, + "loss": 22.1632, + "step": 474280 + }, + { + "epoch": 0.9580958075606928, + "grad_norm": 500.50146484375, + "learning_rate": 8.032534715989859e-08, + "loss": 16.352, + "step": 474290 + }, + { + "epoch": 0.9581160081933766, + "grad_norm": 4.317878246307373, + "learning_rate": 8.02630413107841e-08, + "loss": 28.6135, + "step": 474300 + }, + { + "epoch": 0.9581362088260604, + "grad_norm": 193.65724182128906, + "learning_rate": 8.020075943987071e-08, + "loss": 27.3813, + "step": 474310 + }, + { + "epoch": 0.9581564094587443, + "grad_norm": 400.7613525390625, + "learning_rate": 8.013850154746317e-08, + "loss": 25.7266, + "step": 474320 + }, + { + "epoch": 0.9581766100914281, + "grad_norm": 190.98426818847656, + "learning_rate": 8.007626763386345e-08, + "loss": 10.3764, + "step": 474330 + }, + { + "epoch": 0.9581968107241119, + "grad_norm": 268.8352355957031, + "learning_rate": 8.001405769937464e-08, + "loss": 44.0869, + "step": 474340 + }, + { + "epoch": 0.9582170113567957, + "grad_norm": 131.71649169921875, + "learning_rate": 7.995187174430152e-08, + "loss": 7.6256, + "step": 474350 + }, + { + "epoch": 0.9582372119894795, + "grad_norm": 427.9704284667969, + "learning_rate": 7.988970976894605e-08, + "loss": 17.7285, + "step": 474360 + }, + { + "epoch": 0.9582574126221634, + "grad_norm": 440.0675354003906, + "learning_rate": 7.982757177361078e-08, + "loss": 27.5012, + "step": 474370 + }, + { + "epoch": 0.9582776132548472, + "grad_norm": 119.3633804321289, + "learning_rate": 7.976545775859934e-08, + "loss": 16.8517, + "step": 474380 + }, + { + "epoch": 0.958297813887531, + "grad_norm": 484.5781555175781, + "learning_rate": 7.970336772421483e-08, + "loss": 10.7958, + "step": 474390 + }, + { + "epoch": 0.9583180145202148, + "grad_norm": 140.41004943847656, + "learning_rate": 7.964130167075923e-08, + "loss": 27.1162, + "step": 474400 + }, + { + "epoch": 0.9583382151528986, + "grad_norm": 40.1215705871582, + "learning_rate": 7.957925959853452e-08, + "loss": 16.428, + "step": 474410 + }, + { + "epoch": 0.9583584157855825, + "grad_norm": 349.8250732421875, + "learning_rate": 7.951724150784434e-08, + "loss": 15.7237, + "step": 474420 + }, + { + "epoch": 0.9583786164182663, + "grad_norm": 1398.2322998046875, + "learning_rate": 7.945524739899069e-08, + "loss": 16.3054, + "step": 474430 + }, + { + "epoch": 0.95839881705095, + "grad_norm": 201.12515258789062, + "learning_rate": 7.939327727227441e-08, + "loss": 13.2908, + "step": 474440 + }, + { + "epoch": 0.9584190176836338, + "grad_norm": 303.38531494140625, + "learning_rate": 7.933133112799918e-08, + "loss": 5.1576, + "step": 474450 + }, + { + "epoch": 0.9584392183163176, + "grad_norm": 196.3020782470703, + "learning_rate": 7.926940896646584e-08, + "loss": 33.5378, + "step": 474460 + }, + { + "epoch": 0.9584594189490014, + "grad_norm": 862.3712158203125, + "learning_rate": 7.920751078797695e-08, + "loss": 23.6772, + "step": 474470 + }, + { + "epoch": 0.9584796195816853, + "grad_norm": 55.28944778442383, + "learning_rate": 7.914563659283392e-08, + "loss": 6.0035, + "step": 474480 + }, + { + "epoch": 0.9584998202143691, + "grad_norm": 519.5173950195312, + "learning_rate": 7.908378638133762e-08, + "loss": 13.2152, + "step": 474490 + }, + { + "epoch": 0.9585200208470529, + "grad_norm": 744.1380004882812, + "learning_rate": 7.90219601537906e-08, + "loss": 29.0983, + "step": 474500 + }, + { + "epoch": 0.9585402214797367, + "grad_norm": 117.6964111328125, + "learning_rate": 7.896015791049372e-08, + "loss": 21.7835, + "step": 474510 + }, + { + "epoch": 0.9585604221124205, + "grad_norm": 940.3536376953125, + "learning_rate": 7.889837965174784e-08, + "loss": 13.7541, + "step": 474520 + }, + { + "epoch": 0.9585806227451044, + "grad_norm": 88.52216339111328, + "learning_rate": 7.883662537785442e-08, + "loss": 25.2124, + "step": 474530 + }, + { + "epoch": 0.9586008233777882, + "grad_norm": 71.35601043701172, + "learning_rate": 7.877489508911429e-08, + "loss": 29.5819, + "step": 474540 + }, + { + "epoch": 0.958621024010472, + "grad_norm": 405.7100830078125, + "learning_rate": 7.871318878582889e-08, + "loss": 18.056, + "step": 474550 + }, + { + "epoch": 0.9586412246431558, + "grad_norm": 50.22018051147461, + "learning_rate": 7.865150646829855e-08, + "loss": 6.516, + "step": 474560 + }, + { + "epoch": 0.9586614252758396, + "grad_norm": 324.6778869628906, + "learning_rate": 7.858984813682357e-08, + "loss": 13.7222, + "step": 474570 + }, + { + "epoch": 0.9586816259085235, + "grad_norm": 395.4815368652344, + "learning_rate": 7.852821379170538e-08, + "loss": 26.4004, + "step": 474580 + }, + { + "epoch": 0.9587018265412073, + "grad_norm": 268.73529052734375, + "learning_rate": 7.846660343324263e-08, + "loss": 19.3143, + "step": 474590 + }, + { + "epoch": 0.9587220271738911, + "grad_norm": 604.1646728515625, + "learning_rate": 7.840501706173786e-08, + "loss": 19.3332, + "step": 474600 + }, + { + "epoch": 0.9587422278065749, + "grad_norm": 353.2278747558594, + "learning_rate": 7.834345467748972e-08, + "loss": 17.0513, + "step": 474610 + }, + { + "epoch": 0.9587624284392587, + "grad_norm": 295.6312561035156, + "learning_rate": 7.828191628079851e-08, + "loss": 19.6961, + "step": 474620 + }, + { + "epoch": 0.9587826290719426, + "grad_norm": 174.08670043945312, + "learning_rate": 7.8220401871964e-08, + "loss": 25.454, + "step": 474630 + }, + { + "epoch": 0.9588028297046264, + "grad_norm": 54.75748825073242, + "learning_rate": 7.815891145128763e-08, + "loss": 11.6366, + "step": 474640 + }, + { + "epoch": 0.9588230303373102, + "grad_norm": 355.44708251953125, + "learning_rate": 7.809744501906635e-08, + "loss": 13.3484, + "step": 474650 + }, + { + "epoch": 0.958843230969994, + "grad_norm": 343.7234191894531, + "learning_rate": 7.803600257560162e-08, + "loss": 29.4316, + "step": 474660 + }, + { + "epoch": 0.9588634316026778, + "grad_norm": 491.80804443359375, + "learning_rate": 7.797458412119264e-08, + "loss": 14.72, + "step": 474670 + }, + { + "epoch": 0.9588836322353617, + "grad_norm": 367.8491516113281, + "learning_rate": 7.791318965613859e-08, + "loss": 11.354, + "step": 474680 + }, + { + "epoch": 0.9589038328680455, + "grad_norm": 538.6314697265625, + "learning_rate": 7.785181918073814e-08, + "loss": 14.5587, + "step": 474690 + }, + { + "epoch": 0.9589240335007292, + "grad_norm": 453.12664794921875, + "learning_rate": 7.779047269529105e-08, + "loss": 28.2727, + "step": 474700 + }, + { + "epoch": 0.958944234133413, + "grad_norm": 237.34593200683594, + "learning_rate": 7.772915020009708e-08, + "loss": 16.4047, + "step": 474710 + }, + { + "epoch": 0.9589644347660968, + "grad_norm": 76.294921875, + "learning_rate": 7.766785169545376e-08, + "loss": 23.21, + "step": 474720 + }, + { + "epoch": 0.9589846353987806, + "grad_norm": 149.80584716796875, + "learning_rate": 7.760657718165976e-08, + "loss": 22.3909, + "step": 474730 + }, + { + "epoch": 0.9590048360314645, + "grad_norm": 376.0763854980469, + "learning_rate": 7.754532665901482e-08, + "loss": 29.0698, + "step": 474740 + }, + { + "epoch": 0.9590250366641483, + "grad_norm": 468.72503662109375, + "learning_rate": 7.748410012781705e-08, + "loss": 11.0402, + "step": 474750 + }, + { + "epoch": 0.9590452372968321, + "grad_norm": 140.60446166992188, + "learning_rate": 7.742289758836452e-08, + "loss": 13.6312, + "step": 474760 + }, + { + "epoch": 0.9590654379295159, + "grad_norm": 254.8717498779297, + "learning_rate": 7.736171904095591e-08, + "loss": 33.6136, + "step": 474770 + }, + { + "epoch": 0.9590856385621997, + "grad_norm": 121.12136840820312, + "learning_rate": 7.73005644858893e-08, + "loss": 22.7845, + "step": 474780 + }, + { + "epoch": 0.9591058391948836, + "grad_norm": 93.96916198730469, + "learning_rate": 7.723943392346223e-08, + "loss": 20.9016, + "step": 474790 + }, + { + "epoch": 0.9591260398275674, + "grad_norm": 10.694808959960938, + "learning_rate": 7.717832735397335e-08, + "loss": 6.3451, + "step": 474800 + }, + { + "epoch": 0.9591462404602512, + "grad_norm": 118.19041442871094, + "learning_rate": 7.71172447777202e-08, + "loss": 11.2926, + "step": 474810 + }, + { + "epoch": 0.959166441092935, + "grad_norm": 255.38201904296875, + "learning_rate": 7.705618619500032e-08, + "loss": 18.6042, + "step": 474820 + }, + { + "epoch": 0.9591866417256188, + "grad_norm": 29.095916748046875, + "learning_rate": 7.699515160611182e-08, + "loss": 16.4324, + "step": 474830 + }, + { + "epoch": 0.9592068423583027, + "grad_norm": 350.3114013671875, + "learning_rate": 7.693414101135166e-08, + "loss": 13.2131, + "step": 474840 + }, + { + "epoch": 0.9592270429909865, + "grad_norm": 225.74880981445312, + "learning_rate": 7.687315441101795e-08, + "loss": 10.0032, + "step": 474850 + }, + { + "epoch": 0.9592472436236703, + "grad_norm": 950.14892578125, + "learning_rate": 7.681219180540655e-08, + "loss": 18.5111, + "step": 474860 + }, + { + "epoch": 0.9592674442563541, + "grad_norm": 336.2343444824219, + "learning_rate": 7.675125319481614e-08, + "loss": 36.791, + "step": 474870 + }, + { + "epoch": 0.959287644889038, + "grad_norm": 241.7272186279297, + "learning_rate": 7.669033857954255e-08, + "loss": 6.8688, + "step": 474880 + }, + { + "epoch": 0.9593078455217218, + "grad_norm": 182.2271270751953, + "learning_rate": 7.662944795988337e-08, + "loss": 18.8567, + "step": 474890 + }, + { + "epoch": 0.9593280461544056, + "grad_norm": 396.48284912109375, + "learning_rate": 7.656858133613498e-08, + "loss": 15.0419, + "step": 474900 + }, + { + "epoch": 0.9593482467870894, + "grad_norm": 544.12109375, + "learning_rate": 7.65077387085944e-08, + "loss": 18.7172, + "step": 474910 + }, + { + "epoch": 0.9593684474197732, + "grad_norm": 0.0, + "learning_rate": 7.64469200775575e-08, + "loss": 7.0933, + "step": 474920 + }, + { + "epoch": 0.959388648052457, + "grad_norm": 231.64244079589844, + "learning_rate": 7.638612544332181e-08, + "loss": 20.6236, + "step": 474930 + }, + { + "epoch": 0.9594088486851409, + "grad_norm": 248.8718719482422, + "learning_rate": 7.632535480618264e-08, + "loss": 29.4236, + "step": 474940 + }, + { + "epoch": 0.9594290493178247, + "grad_norm": 177.2399139404297, + "learning_rate": 7.626460816643588e-08, + "loss": 15.6347, + "step": 474950 + }, + { + "epoch": 0.9594492499505084, + "grad_norm": 396.2425537109375, + "learning_rate": 7.620388552437907e-08, + "loss": 18.1908, + "step": 474960 + }, + { + "epoch": 0.9594694505831922, + "grad_norm": 185.56700134277344, + "learning_rate": 7.614318688030753e-08, + "loss": 22.8882, + "step": 474970 + }, + { + "epoch": 0.959489651215876, + "grad_norm": 348.06341552734375, + "learning_rate": 7.608251223451601e-08, + "loss": 14.1523, + "step": 474980 + }, + { + "epoch": 0.9595098518485599, + "grad_norm": 102.40057373046875, + "learning_rate": 7.602186158730152e-08, + "loss": 23.2, + "step": 474990 + }, + { + "epoch": 0.9595300524812437, + "grad_norm": 506.0945739746094, + "learning_rate": 7.59612349389599e-08, + "loss": 15.9318, + "step": 475000 + }, + { + "epoch": 0.9595502531139275, + "grad_norm": 328.2109680175781, + "learning_rate": 7.590063228978539e-08, + "loss": 21.683, + "step": 475010 + }, + { + "epoch": 0.9595704537466113, + "grad_norm": 660.8423461914062, + "learning_rate": 7.584005364007386e-08, + "loss": 19.7161, + "step": 475020 + }, + { + "epoch": 0.9595906543792951, + "grad_norm": 1033.015869140625, + "learning_rate": 7.577949899012116e-08, + "loss": 18.3523, + "step": 475030 + }, + { + "epoch": 0.959610855011979, + "grad_norm": 179.03768920898438, + "learning_rate": 7.571896834022152e-08, + "loss": 13.8454, + "step": 475040 + }, + { + "epoch": 0.9596310556446628, + "grad_norm": 301.87847900390625, + "learning_rate": 7.565846169067026e-08, + "loss": 19.6426, + "step": 475050 + }, + { + "epoch": 0.9596512562773466, + "grad_norm": 212.1444854736328, + "learning_rate": 7.559797904176325e-08, + "loss": 10.688, + "step": 475060 + }, + { + "epoch": 0.9596714569100304, + "grad_norm": 205.0482635498047, + "learning_rate": 7.553752039379359e-08, + "loss": 21.518, + "step": 475070 + }, + { + "epoch": 0.9596916575427142, + "grad_norm": 354.538818359375, + "learning_rate": 7.547708574705714e-08, + "loss": 9.6673, + "step": 475080 + }, + { + "epoch": 0.9597118581753981, + "grad_norm": 517.3529663085938, + "learning_rate": 7.541667510184813e-08, + "loss": 11.3955, + "step": 475090 + }, + { + "epoch": 0.9597320588080819, + "grad_norm": 348.29376220703125, + "learning_rate": 7.535628845846077e-08, + "loss": 22.1537, + "step": 475100 + }, + { + "epoch": 0.9597522594407657, + "grad_norm": 358.4526672363281, + "learning_rate": 7.529592581718981e-08, + "loss": 20.0223, + "step": 475110 + }, + { + "epoch": 0.9597724600734495, + "grad_norm": 477.21746826171875, + "learning_rate": 7.52355871783289e-08, + "loss": 27.5024, + "step": 475120 + }, + { + "epoch": 0.9597926607061333, + "grad_norm": 515.2587890625, + "learning_rate": 7.517527254217282e-08, + "loss": 13.9886, + "step": 475130 + }, + { + "epoch": 0.9598128613388172, + "grad_norm": 370.2596130371094, + "learning_rate": 7.511498190901467e-08, + "loss": 17.2683, + "step": 475140 + }, + { + "epoch": 0.959833061971501, + "grad_norm": 705.9306030273438, + "learning_rate": 7.50547152791492e-08, + "loss": 12.0799, + "step": 475150 + }, + { + "epoch": 0.9598532626041848, + "grad_norm": 146.8015594482422, + "learning_rate": 7.499447265286952e-08, + "loss": 14.1286, + "step": 475160 + }, + { + "epoch": 0.9598734632368686, + "grad_norm": 332.004638671875, + "learning_rate": 7.493425403046928e-08, + "loss": 10.444, + "step": 475170 + }, + { + "epoch": 0.9598936638695524, + "grad_norm": 378.7012023925781, + "learning_rate": 7.487405941224268e-08, + "loss": 14.7322, + "step": 475180 + }, + { + "epoch": 0.9599138645022363, + "grad_norm": 475.091552734375, + "learning_rate": 7.481388879848228e-08, + "loss": 14.2766, + "step": 475190 + }, + { + "epoch": 0.9599340651349201, + "grad_norm": 111.34712982177734, + "learning_rate": 7.475374218948118e-08, + "loss": 17.2071, + "step": 475200 + }, + { + "epoch": 0.9599542657676038, + "grad_norm": 187.49609375, + "learning_rate": 7.469361958553356e-08, + "loss": 7.586, + "step": 475210 + }, + { + "epoch": 0.9599744664002876, + "grad_norm": 528.530517578125, + "learning_rate": 7.463352098693199e-08, + "loss": 9.4207, + "step": 475220 + }, + { + "epoch": 0.9599946670329714, + "grad_norm": 682.244140625, + "learning_rate": 7.457344639396902e-08, + "loss": 15.9991, + "step": 475230 + }, + { + "epoch": 0.9600148676656552, + "grad_norm": 180.2545623779297, + "learning_rate": 7.451339580693718e-08, + "loss": 15.8759, + "step": 475240 + }, + { + "epoch": 0.9600350682983391, + "grad_norm": 314.72698974609375, + "learning_rate": 7.445336922613067e-08, + "loss": 20.9214, + "step": 475250 + }, + { + "epoch": 0.9600552689310229, + "grad_norm": 167.43357849121094, + "learning_rate": 7.439336665184039e-08, + "loss": 19.3911, + "step": 475260 + }, + { + "epoch": 0.9600754695637067, + "grad_norm": 223.72708129882812, + "learning_rate": 7.433338808435941e-08, + "loss": 16.5949, + "step": 475270 + }, + { + "epoch": 0.9600956701963905, + "grad_norm": 348.5341491699219, + "learning_rate": 7.427343352398031e-08, + "loss": 40.8376, + "step": 475280 + }, + { + "epoch": 0.9601158708290743, + "grad_norm": 1039.63427734375, + "learning_rate": 7.421350297099505e-08, + "loss": 24.5333, + "step": 475290 + }, + { + "epoch": 0.9601360714617582, + "grad_norm": 505.9857482910156, + "learning_rate": 7.415359642569564e-08, + "loss": 43.2792, + "step": 475300 + }, + { + "epoch": 0.960156272094442, + "grad_norm": 211.31011962890625, + "learning_rate": 7.409371388837405e-08, + "loss": 16.5823, + "step": 475310 + }, + { + "epoch": 0.9601764727271258, + "grad_norm": 541.6423950195312, + "learning_rate": 7.403385535932284e-08, + "loss": 13.8085, + "step": 475320 + }, + { + "epoch": 0.9601966733598096, + "grad_norm": 231.13262939453125, + "learning_rate": 7.397402083883287e-08, + "loss": 6.0951, + "step": 475330 + }, + { + "epoch": 0.9602168739924934, + "grad_norm": 235.80392456054688, + "learning_rate": 7.39142103271956e-08, + "loss": 35.2601, + "step": 475340 + }, + { + "epoch": 0.9602370746251773, + "grad_norm": 87.18898010253906, + "learning_rate": 7.385442382470354e-08, + "loss": 12.2211, + "step": 475350 + }, + { + "epoch": 0.9602572752578611, + "grad_norm": 449.9172058105469, + "learning_rate": 7.379466133164759e-08, + "loss": 11.9918, + "step": 475360 + }, + { + "epoch": 0.9602774758905449, + "grad_norm": 16.43218994140625, + "learning_rate": 7.373492284831862e-08, + "loss": 10.8565, + "step": 475370 + }, + { + "epoch": 0.9602976765232287, + "grad_norm": 662.5327758789062, + "learning_rate": 7.367520837500808e-08, + "loss": 18.2149, + "step": 475380 + }, + { + "epoch": 0.9603178771559125, + "grad_norm": 198.49664306640625, + "learning_rate": 7.361551791200794e-08, + "loss": 13.8516, + "step": 475390 + }, + { + "epoch": 0.9603380777885964, + "grad_norm": 309.06439208984375, + "learning_rate": 7.355585145960743e-08, + "loss": 20.8389, + "step": 475400 + }, + { + "epoch": 0.9603582784212802, + "grad_norm": 412.4136962890625, + "learning_rate": 7.34962090180985e-08, + "loss": 13.8319, + "step": 475410 + }, + { + "epoch": 0.960378479053964, + "grad_norm": 311.71685791015625, + "learning_rate": 7.343659058777098e-08, + "loss": 21.7253, + "step": 475420 + }, + { + "epoch": 0.9603986796866478, + "grad_norm": 0.0, + "learning_rate": 7.33769961689168e-08, + "loss": 12.6646, + "step": 475430 + }, + { + "epoch": 0.9604188803193316, + "grad_norm": 291.0174560546875, + "learning_rate": 7.331742576182466e-08, + "loss": 22.1376, + "step": 475440 + }, + { + "epoch": 0.9604390809520155, + "grad_norm": 91.20379638671875, + "learning_rate": 7.325787936678708e-08, + "loss": 18.15, + "step": 475450 + }, + { + "epoch": 0.9604592815846993, + "grad_norm": 171.52337646484375, + "learning_rate": 7.319835698409217e-08, + "loss": 16.8848, + "step": 475460 + }, + { + "epoch": 0.960479482217383, + "grad_norm": 489.40252685546875, + "learning_rate": 7.313885861403135e-08, + "loss": 20.3697, + "step": 475470 + }, + { + "epoch": 0.9604996828500668, + "grad_norm": 297.5575256347656, + "learning_rate": 7.307938425689388e-08, + "loss": 13.0444, + "step": 475480 + }, + { + "epoch": 0.9605198834827506, + "grad_norm": 816.5662231445312, + "learning_rate": 7.301993391297003e-08, + "loss": 21.4931, + "step": 475490 + }, + { + "epoch": 0.9605400841154345, + "grad_norm": 462.9900817871094, + "learning_rate": 7.296050758254958e-08, + "loss": 16.9646, + "step": 475500 + }, + { + "epoch": 0.9605602847481183, + "grad_norm": 358.0087890625, + "learning_rate": 7.290110526592231e-08, + "loss": 13.6412, + "step": 475510 + }, + { + "epoch": 0.9605804853808021, + "grad_norm": 226.2945098876953, + "learning_rate": 7.284172696337688e-08, + "loss": 16.7119, + "step": 475520 + }, + { + "epoch": 0.9606006860134859, + "grad_norm": 57.92973709106445, + "learning_rate": 7.27823726752036e-08, + "loss": 10.6754, + "step": 475530 + }, + { + "epoch": 0.9606208866461697, + "grad_norm": 444.6774597167969, + "learning_rate": 7.272304240169115e-08, + "loss": 13.8628, + "step": 475540 + }, + { + "epoch": 0.9606410872788536, + "grad_norm": 343.57550048828125, + "learning_rate": 7.266373614312927e-08, + "loss": 10.9022, + "step": 475550 + }, + { + "epoch": 0.9606612879115374, + "grad_norm": 770.028076171875, + "learning_rate": 7.260445389980609e-08, + "loss": 16.674, + "step": 475560 + }, + { + "epoch": 0.9606814885442212, + "grad_norm": 388.7988586425781, + "learning_rate": 7.25451956720119e-08, + "loss": 23.1462, + "step": 475570 + }, + { + "epoch": 0.960701689176905, + "grad_norm": 696.9945068359375, + "learning_rate": 7.248596146003484e-08, + "loss": 17.2478, + "step": 475580 + }, + { + "epoch": 0.9607218898095888, + "grad_norm": 542.0843505859375, + "learning_rate": 7.242675126416299e-08, + "loss": 29.4484, + "step": 475590 + }, + { + "epoch": 0.9607420904422727, + "grad_norm": 227.0558319091797, + "learning_rate": 7.236756508468612e-08, + "loss": 15.132, + "step": 475600 + }, + { + "epoch": 0.9607622910749565, + "grad_norm": 386.5271301269531, + "learning_rate": 7.230840292189179e-08, + "loss": 9.2645, + "step": 475610 + }, + { + "epoch": 0.9607824917076403, + "grad_norm": 256.40350341796875, + "learning_rate": 7.224926477606864e-08, + "loss": 21.749, + "step": 475620 + }, + { + "epoch": 0.9608026923403241, + "grad_norm": 632.7886962890625, + "learning_rate": 7.219015064750478e-08, + "loss": 19.2842, + "step": 475630 + }, + { + "epoch": 0.9608228929730079, + "grad_norm": 666.5670166015625, + "learning_rate": 7.213106053648889e-08, + "loss": 15.0993, + "step": 475640 + }, + { + "epoch": 0.9608430936056918, + "grad_norm": 11.963786125183105, + "learning_rate": 7.207199444330847e-08, + "loss": 14.6905, + "step": 475650 + }, + { + "epoch": 0.9608632942383756, + "grad_norm": 266.03759765625, + "learning_rate": 7.201295236825112e-08, + "loss": 13.424, + "step": 475660 + }, + { + "epoch": 0.9608834948710594, + "grad_norm": 662.6854248046875, + "learning_rate": 7.195393431160491e-08, + "loss": 12.8577, + "step": 475670 + }, + { + "epoch": 0.9609036955037432, + "grad_norm": 32.202144622802734, + "learning_rate": 7.189494027365795e-08, + "loss": 25.5644, + "step": 475680 + }, + { + "epoch": 0.960923896136427, + "grad_norm": 900.0670166015625, + "learning_rate": 7.183597025469669e-08, + "loss": 19.6551, + "step": 475690 + }, + { + "epoch": 0.9609440967691109, + "grad_norm": 299.69903564453125, + "learning_rate": 7.177702425500977e-08, + "loss": 24.7179, + "step": 475700 + }, + { + "epoch": 0.9609642974017947, + "grad_norm": 133.36920166015625, + "learning_rate": 7.171810227488363e-08, + "loss": 10.9328, + "step": 475710 + }, + { + "epoch": 0.9609844980344784, + "grad_norm": 364.9358215332031, + "learning_rate": 7.165920431460637e-08, + "loss": 20.8803, + "step": 475720 + }, + { + "epoch": 0.9610046986671622, + "grad_norm": 402.2028503417969, + "learning_rate": 7.16003303744639e-08, + "loss": 13.6, + "step": 475730 + }, + { + "epoch": 0.961024899299846, + "grad_norm": 288.1083679199219, + "learning_rate": 7.154148045474319e-08, + "loss": 22.305, + "step": 475740 + }, + { + "epoch": 0.9610450999325298, + "grad_norm": 284.3811950683594, + "learning_rate": 7.148265455573233e-08, + "loss": 14.0048, + "step": 475750 + }, + { + "epoch": 0.9610653005652137, + "grad_norm": 295.71832275390625, + "learning_rate": 7.142385267771667e-08, + "loss": 23.9628, + "step": 475760 + }, + { + "epoch": 0.9610855011978975, + "grad_norm": 337.1648254394531, + "learning_rate": 7.136507482098375e-08, + "loss": 17.5986, + "step": 475770 + }, + { + "epoch": 0.9611057018305813, + "grad_norm": 39.2862434387207, + "learning_rate": 7.130632098581947e-08, + "loss": 20.2325, + "step": 475780 + }, + { + "epoch": 0.9611259024632651, + "grad_norm": 1028.286865234375, + "learning_rate": 7.124759117251078e-08, + "loss": 31.2637, + "step": 475790 + }, + { + "epoch": 0.961146103095949, + "grad_norm": 658.9299926757812, + "learning_rate": 7.118888538134361e-08, + "loss": 12.5292, + "step": 475800 + }, + { + "epoch": 0.9611663037286328, + "grad_norm": 798.3705444335938, + "learning_rate": 7.113020361260325e-08, + "loss": 18.518, + "step": 475810 + }, + { + "epoch": 0.9611865043613166, + "grad_norm": 51.175601959228516, + "learning_rate": 7.107154586657727e-08, + "loss": 14.5358, + "step": 475820 + }, + { + "epoch": 0.9612067049940004, + "grad_norm": 144.55946350097656, + "learning_rate": 7.101291214355043e-08, + "loss": 15.6595, + "step": 475830 + }, + { + "epoch": 0.9612269056266842, + "grad_norm": 248.81558227539062, + "learning_rate": 7.095430244380863e-08, + "loss": 11.5365, + "step": 475840 + }, + { + "epoch": 0.961247106259368, + "grad_norm": 728.4669799804688, + "learning_rate": 7.089571676763773e-08, + "loss": 26.9842, + "step": 475850 + }, + { + "epoch": 0.9612673068920519, + "grad_norm": 232.49505615234375, + "learning_rate": 7.083715511532419e-08, + "loss": 16.1763, + "step": 475860 + }, + { + "epoch": 0.9612875075247357, + "grad_norm": 485.213134765625, + "learning_rate": 7.077861748715165e-08, + "loss": 14.5382, + "step": 475870 + }, + { + "epoch": 0.9613077081574195, + "grad_norm": 15.343710899353027, + "learning_rate": 7.072010388340656e-08, + "loss": 7.3748, + "step": 475880 + }, + { + "epoch": 0.9613279087901033, + "grad_norm": 376.9869384765625, + "learning_rate": 7.066161430437368e-08, + "loss": 17.4395, + "step": 475890 + }, + { + "epoch": 0.9613481094227871, + "grad_norm": 403.6138000488281, + "learning_rate": 7.060314875033836e-08, + "loss": 11.6849, + "step": 475900 + }, + { + "epoch": 0.961368310055471, + "grad_norm": 160.56594848632812, + "learning_rate": 7.054470722158535e-08, + "loss": 17.0336, + "step": 475910 + }, + { + "epoch": 0.9613885106881548, + "grad_norm": 440.259521484375, + "learning_rate": 7.048628971839944e-08, + "loss": 25.2777, + "step": 475920 + }, + { + "epoch": 0.9614087113208386, + "grad_norm": 219.51797485351562, + "learning_rate": 7.042789624106594e-08, + "loss": 13.5234, + "step": 475930 + }, + { + "epoch": 0.9614289119535224, + "grad_norm": 540.7685546875, + "learning_rate": 7.036952678986852e-08, + "loss": 25.3983, + "step": 475940 + }, + { + "epoch": 0.9614491125862062, + "grad_norm": 333.6399841308594, + "learning_rate": 7.031118136509196e-08, + "loss": 19.2188, + "step": 475950 + }, + { + "epoch": 0.9614693132188901, + "grad_norm": 218.74139404296875, + "learning_rate": 7.025285996702158e-08, + "loss": 25.5605, + "step": 475960 + }, + { + "epoch": 0.9614895138515739, + "grad_norm": 297.5934753417969, + "learning_rate": 7.019456259594049e-08, + "loss": 19.9465, + "step": 475970 + }, + { + "epoch": 0.9615097144842576, + "grad_norm": 229.1687774658203, + "learning_rate": 7.01362892521329e-08, + "loss": 16.26, + "step": 475980 + }, + { + "epoch": 0.9615299151169414, + "grad_norm": 333.52423095703125, + "learning_rate": 7.007803993588358e-08, + "loss": 17.7302, + "step": 475990 + }, + { + "epoch": 0.9615501157496252, + "grad_norm": 419.1000061035156, + "learning_rate": 7.001981464747565e-08, + "loss": 21.5595, + "step": 476000 + }, + { + "epoch": 0.961570316382309, + "grad_norm": 307.7343444824219, + "learning_rate": 6.996161338719332e-08, + "loss": 21.56, + "step": 476010 + }, + { + "epoch": 0.9615905170149929, + "grad_norm": 160.56182861328125, + "learning_rate": 6.990343615532025e-08, + "loss": 19.3788, + "step": 476020 + }, + { + "epoch": 0.9616107176476767, + "grad_norm": 504.5084228515625, + "learning_rate": 6.9845282952139e-08, + "loss": 18.6281, + "step": 476030 + }, + { + "epoch": 0.9616309182803605, + "grad_norm": 415.0246276855469, + "learning_rate": 6.978715377793489e-08, + "loss": 16.6749, + "step": 476040 + }, + { + "epoch": 0.9616511189130443, + "grad_norm": 234.70310974121094, + "learning_rate": 6.972904863298991e-08, + "loss": 17.0805, + "step": 476050 + }, + { + "epoch": 0.9616713195457282, + "grad_norm": 501.95538330078125, + "learning_rate": 6.967096751758773e-08, + "loss": 15.4388, + "step": 476060 + }, + { + "epoch": 0.961691520178412, + "grad_norm": 480.3716735839844, + "learning_rate": 6.961291043201145e-08, + "loss": 17.3112, + "step": 476070 + }, + { + "epoch": 0.9617117208110958, + "grad_norm": 252.87176513671875, + "learning_rate": 6.955487737654309e-08, + "loss": 13.0375, + "step": 476080 + }, + { + "epoch": 0.9617319214437796, + "grad_norm": 326.9039001464844, + "learning_rate": 6.949686835146685e-08, + "loss": 15.2768, + "step": 476090 + }, + { + "epoch": 0.9617521220764634, + "grad_norm": 370.8696594238281, + "learning_rate": 6.943888335706472e-08, + "loss": 23.2639, + "step": 476100 + }, + { + "epoch": 0.9617723227091473, + "grad_norm": 819.5240478515625, + "learning_rate": 6.938092239361982e-08, + "loss": 12.4631, + "step": 476110 + }, + { + "epoch": 0.9617925233418311, + "grad_norm": 419.8603820800781, + "learning_rate": 6.932298546141413e-08, + "loss": 13.5396, + "step": 476120 + }, + { + "epoch": 0.9618127239745149, + "grad_norm": 370.8901062011719, + "learning_rate": 6.926507256072967e-08, + "loss": 22.5444, + "step": 476130 + }, + { + "epoch": 0.9618329246071987, + "grad_norm": 274.3182067871094, + "learning_rate": 6.920718369185009e-08, + "loss": 14.7543, + "step": 476140 + }, + { + "epoch": 0.9618531252398825, + "grad_norm": 449.9986267089844, + "learning_rate": 6.914931885505626e-08, + "loss": 13.2917, + "step": 476150 + }, + { + "epoch": 0.9618733258725664, + "grad_norm": 392.4185485839844, + "learning_rate": 6.909147805063021e-08, + "loss": 43.4365, + "step": 476160 + }, + { + "epoch": 0.9618935265052502, + "grad_norm": 379.1201477050781, + "learning_rate": 6.903366127885447e-08, + "loss": 13.0509, + "step": 476170 + }, + { + "epoch": 0.961913727137934, + "grad_norm": 269.5074157714844, + "learning_rate": 6.897586854001048e-08, + "loss": 28.5801, + "step": 476180 + }, + { + "epoch": 0.9619339277706178, + "grad_norm": 428.8318176269531, + "learning_rate": 6.89180998343808e-08, + "loss": 23.163, + "step": 476190 + }, + { + "epoch": 0.9619541284033016, + "grad_norm": 270.17315673828125, + "learning_rate": 6.88603551622452e-08, + "loss": 16.6752, + "step": 476200 + }, + { + "epoch": 0.9619743290359855, + "grad_norm": 870.8080444335938, + "learning_rate": 6.88026345238868e-08, + "loss": 30.2955, + "step": 476210 + }, + { + "epoch": 0.9619945296686693, + "grad_norm": 471.13787841796875, + "learning_rate": 6.874493791958648e-08, + "loss": 13.3997, + "step": 476220 + }, + { + "epoch": 0.9620147303013531, + "grad_norm": 261.5706481933594, + "learning_rate": 6.868726534962456e-08, + "loss": 20.7547, + "step": 476230 + }, + { + "epoch": 0.9620349309340368, + "grad_norm": 308.1227722167969, + "learning_rate": 6.862961681428304e-08, + "loss": 32.5355, + "step": 476240 + }, + { + "epoch": 0.9620551315667206, + "grad_norm": 321.1436767578125, + "learning_rate": 6.857199231384282e-08, + "loss": 24.4721, + "step": 476250 + }, + { + "epoch": 0.9620753321994044, + "grad_norm": 461.12042236328125, + "learning_rate": 6.851439184858477e-08, + "loss": 23.3178, + "step": 476260 + }, + { + "epoch": 0.9620955328320883, + "grad_norm": 445.0748596191406, + "learning_rate": 6.845681541878924e-08, + "loss": 16.4683, + "step": 476270 + }, + { + "epoch": 0.9621157334647721, + "grad_norm": 237.60678100585938, + "learning_rate": 6.83992630247371e-08, + "loss": 14.815, + "step": 476280 + }, + { + "epoch": 0.9621359340974559, + "grad_norm": 29.770912170410156, + "learning_rate": 6.834173466670923e-08, + "loss": 13.1535, + "step": 476290 + }, + { + "epoch": 0.9621561347301397, + "grad_norm": 1804.21337890625, + "learning_rate": 6.828423034498488e-08, + "loss": 13.4355, + "step": 476300 + }, + { + "epoch": 0.9621763353628235, + "grad_norm": 363.8840637207031, + "learning_rate": 6.822675005984547e-08, + "loss": 12.4518, + "step": 476310 + }, + { + "epoch": 0.9621965359955074, + "grad_norm": 387.9690856933594, + "learning_rate": 6.816929381157023e-08, + "loss": 14.038, + "step": 476320 + }, + { + "epoch": 0.9622167366281912, + "grad_norm": 282.8274230957031, + "learning_rate": 6.811186160044004e-08, + "loss": 22.7182, + "step": 476330 + }, + { + "epoch": 0.962236937260875, + "grad_norm": 395.19390869140625, + "learning_rate": 6.805445342673467e-08, + "loss": 16.2363, + "step": 476340 + }, + { + "epoch": 0.9622571378935588, + "grad_norm": 541.1038208007812, + "learning_rate": 6.799706929073335e-08, + "loss": 17.5635, + "step": 476350 + }, + { + "epoch": 0.9622773385262426, + "grad_norm": 34.52801513671875, + "learning_rate": 6.793970919271642e-08, + "loss": 19.4252, + "step": 476360 + }, + { + "epoch": 0.9622975391589265, + "grad_norm": 540.6629028320312, + "learning_rate": 6.788237313296309e-08, + "loss": 21.7979, + "step": 476370 + }, + { + "epoch": 0.9623177397916103, + "grad_norm": 432.80377197265625, + "learning_rate": 6.782506111175313e-08, + "loss": 21.8245, + "step": 476380 + }, + { + "epoch": 0.9623379404242941, + "grad_norm": 271.496337890625, + "learning_rate": 6.776777312936522e-08, + "loss": 8.9662, + "step": 476390 + }, + { + "epoch": 0.9623581410569779, + "grad_norm": 3.349520444869995, + "learning_rate": 6.771050918607913e-08, + "loss": 20.389, + "step": 476400 + }, + { + "epoch": 0.9623783416896617, + "grad_norm": 240.0352325439453, + "learning_rate": 6.765326928217408e-08, + "loss": 20.2974, + "step": 476410 + }, + { + "epoch": 0.9623985423223456, + "grad_norm": 186.44142150878906, + "learning_rate": 6.759605341792819e-08, + "loss": 16.1324, + "step": 476420 + }, + { + "epoch": 0.9624187429550294, + "grad_norm": 404.8953857421875, + "learning_rate": 6.753886159362122e-08, + "loss": 17.5296, + "step": 476430 + }, + { + "epoch": 0.9624389435877132, + "grad_norm": 338.2667541503906, + "learning_rate": 6.748169380953184e-08, + "loss": 21.9625, + "step": 476440 + }, + { + "epoch": 0.962459144220397, + "grad_norm": 359.7268371582031, + "learning_rate": 6.742455006593762e-08, + "loss": 22.326, + "step": 476450 + }, + { + "epoch": 0.9624793448530808, + "grad_norm": 498.0181579589844, + "learning_rate": 6.736743036311832e-08, + "loss": 29.3099, + "step": 476460 + }, + { + "epoch": 0.9624995454857647, + "grad_norm": 707.2603149414062, + "learning_rate": 6.731033470135262e-08, + "loss": 20.6502, + "step": 476470 + }, + { + "epoch": 0.9625197461184485, + "grad_norm": 489.98052978515625, + "learning_rate": 6.725326308091751e-08, + "loss": 23.2261, + "step": 476480 + }, + { + "epoch": 0.9625399467511322, + "grad_norm": 297.9427490234375, + "learning_rate": 6.71962155020911e-08, + "loss": 22.8428, + "step": 476490 + }, + { + "epoch": 0.962560147383816, + "grad_norm": 706.0108642578125, + "learning_rate": 6.713919196515317e-08, + "loss": 24.6106, + "step": 476500 + }, + { + "epoch": 0.9625803480164998, + "grad_norm": 370.75567626953125, + "learning_rate": 6.708219247038017e-08, + "loss": 18.245, + "step": 476510 + }, + { + "epoch": 0.9626005486491837, + "grad_norm": 375.1898498535156, + "learning_rate": 6.702521701804965e-08, + "loss": 18.2819, + "step": 476520 + }, + { + "epoch": 0.9626207492818675, + "grad_norm": 178.1335906982422, + "learning_rate": 6.696826560844027e-08, + "loss": 18.5652, + "step": 476530 + }, + { + "epoch": 0.9626409499145513, + "grad_norm": 248.6165313720703, + "learning_rate": 6.691133824183016e-08, + "loss": 31.3355, + "step": 476540 + }, + { + "epoch": 0.9626611505472351, + "grad_norm": 87.95904541015625, + "learning_rate": 6.685443491849464e-08, + "loss": 20.726, + "step": 476550 + }, + { + "epoch": 0.9626813511799189, + "grad_norm": 90.31098175048828, + "learning_rate": 6.679755563871292e-08, + "loss": 14.6505, + "step": 476560 + }, + { + "epoch": 0.9627015518126028, + "grad_norm": 547.4888305664062, + "learning_rate": 6.674070040276148e-08, + "loss": 22.3104, + "step": 476570 + }, + { + "epoch": 0.9627217524452866, + "grad_norm": 423.2607421875, + "learning_rate": 6.66838692109173e-08, + "loss": 23.5625, + "step": 476580 + }, + { + "epoch": 0.9627419530779704, + "grad_norm": 170.6096649169922, + "learning_rate": 6.662706206345793e-08, + "loss": 12.0225, + "step": 476590 + }, + { + "epoch": 0.9627621537106542, + "grad_norm": 213.32203674316406, + "learning_rate": 6.657027896065982e-08, + "loss": 16.2692, + "step": 476600 + }, + { + "epoch": 0.962782354343338, + "grad_norm": 164.5532684326172, + "learning_rate": 6.651351990279997e-08, + "loss": 3.9653, + "step": 476610 + }, + { + "epoch": 0.9628025549760219, + "grad_norm": 221.3753204345703, + "learning_rate": 6.645678489015428e-08, + "loss": 18.0769, + "step": 476620 + }, + { + "epoch": 0.9628227556087057, + "grad_norm": 445.78863525390625, + "learning_rate": 6.64000739230003e-08, + "loss": 27.9703, + "step": 476630 + }, + { + "epoch": 0.9628429562413895, + "grad_norm": 475.8753356933594, + "learning_rate": 6.634338700161392e-08, + "loss": 24.063, + "step": 476640 + }, + { + "epoch": 0.9628631568740733, + "grad_norm": 65.68719482421875, + "learning_rate": 6.628672412627158e-08, + "loss": 11.14, + "step": 476650 + }, + { + "epoch": 0.9628833575067571, + "grad_norm": 330.69525146484375, + "learning_rate": 6.623008529724917e-08, + "loss": 17.0637, + "step": 476660 + }, + { + "epoch": 0.962903558139441, + "grad_norm": 321.6661071777344, + "learning_rate": 6.617347051482315e-08, + "loss": 18.995, + "step": 476670 + }, + { + "epoch": 0.9629237587721248, + "grad_norm": 493.0853271484375, + "learning_rate": 6.611687977926939e-08, + "loss": 14.4967, + "step": 476680 + }, + { + "epoch": 0.9629439594048086, + "grad_norm": 328.06475830078125, + "learning_rate": 6.606031309086269e-08, + "loss": 17.7986, + "step": 476690 + }, + { + "epoch": 0.9629641600374924, + "grad_norm": 445.1838684082031, + "learning_rate": 6.60037704498806e-08, + "loss": 24.9507, + "step": 476700 + }, + { + "epoch": 0.9629843606701762, + "grad_norm": 174.0291748046875, + "learning_rate": 6.594725185659734e-08, + "loss": 18.0457, + "step": 476710 + }, + { + "epoch": 0.96300456130286, + "grad_norm": 1234.39453125, + "learning_rate": 6.58907573112888e-08, + "loss": 14.8554, + "step": 476720 + }, + { + "epoch": 0.9630247619355439, + "grad_norm": 286.1321716308594, + "learning_rate": 6.583428681423032e-08, + "loss": 27.2782, + "step": 476730 + }, + { + "epoch": 0.9630449625682277, + "grad_norm": 372.0481262207031, + "learning_rate": 6.577784036569668e-08, + "loss": 15.7032, + "step": 476740 + }, + { + "epoch": 0.9630651632009114, + "grad_norm": 583.0672607421875, + "learning_rate": 6.572141796596376e-08, + "loss": 18.4742, + "step": 476750 + }, + { + "epoch": 0.9630853638335952, + "grad_norm": 189.72605895996094, + "learning_rate": 6.566501961530636e-08, + "loss": 13.0015, + "step": 476760 + }, + { + "epoch": 0.963105564466279, + "grad_norm": 115.40235137939453, + "learning_rate": 6.560864531399869e-08, + "loss": 13.4514, + "step": 476770 + }, + { + "epoch": 0.9631257650989629, + "grad_norm": 412.1432800292969, + "learning_rate": 6.555229506231608e-08, + "loss": 22.9218, + "step": 476780 + }, + { + "epoch": 0.9631459657316467, + "grad_norm": 300.8472900390625, + "learning_rate": 6.549596886053334e-08, + "loss": 16.8421, + "step": 476790 + }, + { + "epoch": 0.9631661663643305, + "grad_norm": 140.56094360351562, + "learning_rate": 6.543966670892465e-08, + "loss": 21.1918, + "step": 476800 + }, + { + "epoch": 0.9631863669970143, + "grad_norm": 39.55817794799805, + "learning_rate": 6.538338860776483e-08, + "loss": 10.6026, + "step": 476810 + }, + { + "epoch": 0.9632065676296981, + "grad_norm": 251.70135498046875, + "learning_rate": 6.532713455732753e-08, + "loss": 18.2131, + "step": 476820 + }, + { + "epoch": 0.963226768262382, + "grad_norm": 437.0846862792969, + "learning_rate": 6.527090455788754e-08, + "loss": 13.2615, + "step": 476830 + }, + { + "epoch": 0.9632469688950658, + "grad_norm": 323.1280212402344, + "learning_rate": 6.521469860971852e-08, + "loss": 15.5527, + "step": 476840 + }, + { + "epoch": 0.9632671695277496, + "grad_norm": 469.2850341796875, + "learning_rate": 6.515851671309414e-08, + "loss": 23.856, + "step": 476850 + }, + { + "epoch": 0.9632873701604334, + "grad_norm": 561.0780029296875, + "learning_rate": 6.51023588682892e-08, + "loss": 17.9284, + "step": 476860 + }, + { + "epoch": 0.9633075707931172, + "grad_norm": 952.8590698242188, + "learning_rate": 6.504622507557679e-08, + "loss": 32.2216, + "step": 476870 + }, + { + "epoch": 0.9633277714258011, + "grad_norm": 398.85455322265625, + "learning_rate": 6.499011533523003e-08, + "loss": 18.0433, + "step": 476880 + }, + { + "epoch": 0.9633479720584849, + "grad_norm": 258.42388916015625, + "learning_rate": 6.493402964752371e-08, + "loss": 19.5101, + "step": 476890 + }, + { + "epoch": 0.9633681726911687, + "grad_norm": 526.315185546875, + "learning_rate": 6.487796801272983e-08, + "loss": 16.5671, + "step": 476900 + }, + { + "epoch": 0.9633883733238525, + "grad_norm": 0.0, + "learning_rate": 6.482193043112206e-08, + "loss": 10.8797, + "step": 476910 + }, + { + "epoch": 0.9634085739565363, + "grad_norm": 394.5545654296875, + "learning_rate": 6.476591690297407e-08, + "loss": 11.1586, + "step": 476920 + }, + { + "epoch": 0.9634287745892202, + "grad_norm": 235.354736328125, + "learning_rate": 6.470992742855786e-08, + "loss": 19.3661, + "step": 476930 + }, + { + "epoch": 0.963448975221904, + "grad_norm": 375.6850280761719, + "learning_rate": 6.465396200814766e-08, + "loss": 18.9032, + "step": 476940 + }, + { + "epoch": 0.9634691758545878, + "grad_norm": 444.804931640625, + "learning_rate": 6.459802064201437e-08, + "loss": 19.9718, + "step": 476950 + }, + { + "epoch": 0.9634893764872716, + "grad_norm": 340.27679443359375, + "learning_rate": 6.454210333043275e-08, + "loss": 18.8783, + "step": 476960 + }, + { + "epoch": 0.9635095771199554, + "grad_norm": 123.67523193359375, + "learning_rate": 6.448621007367428e-08, + "loss": 21.5001, + "step": 476970 + }, + { + "epoch": 0.9635297777526393, + "grad_norm": 256.27716064453125, + "learning_rate": 6.443034087201095e-08, + "loss": 21.0008, + "step": 476980 + }, + { + "epoch": 0.9635499783853231, + "grad_norm": 645.1035766601562, + "learning_rate": 6.437449572571586e-08, + "loss": 29.0721, + "step": 476990 + }, + { + "epoch": 0.9635701790180068, + "grad_norm": 40.7523078918457, + "learning_rate": 6.431867463506047e-08, + "loss": 11.9315, + "step": 477000 + }, + { + "epoch": 0.9635903796506906, + "grad_norm": 276.47442626953125, + "learning_rate": 6.426287760031736e-08, + "loss": 14.655, + "step": 477010 + }, + { + "epoch": 0.9636105802833744, + "grad_norm": 95.3863525390625, + "learning_rate": 6.42071046217585e-08, + "loss": 8.3127, + "step": 477020 + }, + { + "epoch": 0.9636307809160582, + "grad_norm": 156.61651611328125, + "learning_rate": 6.415135569965536e-08, + "loss": 18.0984, + "step": 477030 + }, + { + "epoch": 0.9636509815487421, + "grad_norm": 303.97393798828125, + "learning_rate": 6.40956308342805e-08, + "loss": 26.0625, + "step": 477040 + }, + { + "epoch": 0.9636711821814259, + "grad_norm": 451.6395568847656, + "learning_rate": 6.403993002590425e-08, + "loss": 13.9365, + "step": 477050 + }, + { + "epoch": 0.9636913828141097, + "grad_norm": 103.19915008544922, + "learning_rate": 6.398425327479863e-08, + "loss": 12.9072, + "step": 477060 + }, + { + "epoch": 0.9637115834467935, + "grad_norm": 421.86407470703125, + "learning_rate": 6.392860058123506e-08, + "loss": 14.673, + "step": 477070 + }, + { + "epoch": 0.9637317840794773, + "grad_norm": 330.48236083984375, + "learning_rate": 6.387297194548558e-08, + "loss": 29.77, + "step": 477080 + }, + { + "epoch": 0.9637519847121612, + "grad_norm": 211.17384338378906, + "learning_rate": 6.381736736781996e-08, + "loss": 11.6155, + "step": 477090 + }, + { + "epoch": 0.963772185344845, + "grad_norm": 212.2564697265625, + "learning_rate": 6.376178684850965e-08, + "loss": 19.4258, + "step": 477100 + }, + { + "epoch": 0.9637923859775288, + "grad_norm": 375.3379211425781, + "learning_rate": 6.370623038782608e-08, + "loss": 27.173, + "step": 477110 + }, + { + "epoch": 0.9638125866102126, + "grad_norm": 27.419113159179688, + "learning_rate": 6.365069798603962e-08, + "loss": 31.2934, + "step": 477120 + }, + { + "epoch": 0.9638327872428964, + "grad_norm": 335.3210144042969, + "learning_rate": 6.359518964342059e-08, + "loss": 12.9161, + "step": 477130 + }, + { + "epoch": 0.9638529878755803, + "grad_norm": 405.4734191894531, + "learning_rate": 6.353970536024045e-08, + "loss": 16.8732, + "step": 477140 + }, + { + "epoch": 0.9638731885082641, + "grad_norm": 583.7369384765625, + "learning_rate": 6.348424513676898e-08, + "loss": 17.2103, + "step": 477150 + }, + { + "epoch": 0.9638933891409479, + "grad_norm": 540.9656982421875, + "learning_rate": 6.342880897327597e-08, + "loss": 24.3372, + "step": 477160 + }, + { + "epoch": 0.9639135897736317, + "grad_norm": 146.86822509765625, + "learning_rate": 6.337339687003286e-08, + "loss": 13.8029, + "step": 477170 + }, + { + "epoch": 0.9639337904063155, + "grad_norm": 539.55712890625, + "learning_rate": 6.331800882730887e-08, + "loss": 15.6876, + "step": 477180 + }, + { + "epoch": 0.9639539910389994, + "grad_norm": 153.9258575439453, + "learning_rate": 6.326264484537437e-08, + "loss": 12.555, + "step": 477190 + }, + { + "epoch": 0.9639741916716832, + "grad_norm": 533.649169921875, + "learning_rate": 6.3207304924498e-08, + "loss": 22.0611, + "step": 477200 + }, + { + "epoch": 0.963994392304367, + "grad_norm": 455.38360595703125, + "learning_rate": 6.315198906495179e-08, + "loss": 21.8381, + "step": 477210 + }, + { + "epoch": 0.9640145929370508, + "grad_norm": 396.9118347167969, + "learning_rate": 6.30966972670033e-08, + "loss": 22.9322, + "step": 477220 + }, + { + "epoch": 0.9640347935697346, + "grad_norm": 515.7533569335938, + "learning_rate": 6.304142953092285e-08, + "loss": 17.8945, + "step": 477230 + }, + { + "epoch": 0.9640549942024185, + "grad_norm": 493.3134460449219, + "learning_rate": 6.298618585697968e-08, + "loss": 13.7864, + "step": 477240 + }, + { + "epoch": 0.9640751948351023, + "grad_norm": 143.59620666503906, + "learning_rate": 6.293096624544304e-08, + "loss": 8.9156, + "step": 477250 + }, + { + "epoch": 0.964095395467786, + "grad_norm": 500.61334228515625, + "learning_rate": 6.287577069658213e-08, + "loss": 10.7181, + "step": 477260 + }, + { + "epoch": 0.9641155961004698, + "grad_norm": 179.6245574951172, + "learning_rate": 6.282059921066564e-08, + "loss": 14.0273, + "step": 477270 + }, + { + "epoch": 0.9641357967331536, + "grad_norm": 224.61199951171875, + "learning_rate": 6.276545178796333e-08, + "loss": 9.6322, + "step": 477280 + }, + { + "epoch": 0.9641559973658375, + "grad_norm": 408.6536865234375, + "learning_rate": 6.271032842874281e-08, + "loss": 26.8837, + "step": 477290 + }, + { + "epoch": 0.9641761979985213, + "grad_norm": 296.5220031738281, + "learning_rate": 6.265522913327326e-08, + "loss": 29.1121, + "step": 477300 + }, + { + "epoch": 0.9641963986312051, + "grad_norm": 2035.961669921875, + "learning_rate": 6.260015390182395e-08, + "loss": 31.4718, + "step": 477310 + }, + { + "epoch": 0.9642165992638889, + "grad_norm": 81.89179992675781, + "learning_rate": 6.254510273466186e-08, + "loss": 14.0888, + "step": 477320 + }, + { + "epoch": 0.9642367998965727, + "grad_norm": 422.5835266113281, + "learning_rate": 6.249007563205679e-08, + "loss": 35.0419, + "step": 477330 + }, + { + "epoch": 0.9642570005292566, + "grad_norm": 592.8697509765625, + "learning_rate": 6.243507259427628e-08, + "loss": 23.1118, + "step": 477340 + }, + { + "epoch": 0.9642772011619404, + "grad_norm": 247.3590545654297, + "learning_rate": 6.238009362158793e-08, + "loss": 17.3722, + "step": 477350 + }, + { + "epoch": 0.9642974017946242, + "grad_norm": 285.0309753417969, + "learning_rate": 6.232513871426038e-08, + "loss": 20.0719, + "step": 477360 + }, + { + "epoch": 0.964317602427308, + "grad_norm": 174.8265380859375, + "learning_rate": 6.227020787256122e-08, + "loss": 16.8355, + "step": 477370 + }, + { + "epoch": 0.9643378030599918, + "grad_norm": 287.5248718261719, + "learning_rate": 6.2215301096758e-08, + "loss": 11.1058, + "step": 477380 + }, + { + "epoch": 0.9643580036926757, + "grad_norm": 673.0089721679688, + "learning_rate": 6.216041838711828e-08, + "loss": 23.4357, + "step": 477390 + }, + { + "epoch": 0.9643782043253595, + "grad_norm": 94.95233917236328, + "learning_rate": 6.210555974391075e-08, + "loss": 24.3358, + "step": 477400 + }, + { + "epoch": 0.9643984049580433, + "grad_norm": 0.0, + "learning_rate": 6.205072516740129e-08, + "loss": 9.14, + "step": 477410 + }, + { + "epoch": 0.9644186055907271, + "grad_norm": 367.91717529296875, + "learning_rate": 6.199591465785748e-08, + "loss": 10.6455, + "step": 477420 + }, + { + "epoch": 0.9644388062234109, + "grad_norm": 476.6819152832031, + "learning_rate": 6.194112821554687e-08, + "loss": 29.8962, + "step": 477430 + }, + { + "epoch": 0.9644590068560948, + "grad_norm": 690.882568359375, + "learning_rate": 6.188636584073648e-08, + "loss": 19.3234, + "step": 477440 + }, + { + "epoch": 0.9644792074887786, + "grad_norm": 66.78436279296875, + "learning_rate": 6.183162753369221e-08, + "loss": 11.4298, + "step": 477450 + }, + { + "epoch": 0.9644994081214624, + "grad_norm": 493.62298583984375, + "learning_rate": 6.177691329468217e-08, + "loss": 21.2334, + "step": 477460 + }, + { + "epoch": 0.9645196087541462, + "grad_norm": 125.45512390136719, + "learning_rate": 6.17222231239728e-08, + "loss": 21.7523, + "step": 477470 + }, + { + "epoch": 0.96453980938683, + "grad_norm": 481.461181640625, + "learning_rate": 6.166755702183058e-08, + "loss": 20.9767, + "step": 477480 + }, + { + "epoch": 0.9645600100195139, + "grad_norm": 17.118911743164062, + "learning_rate": 6.161291498852084e-08, + "loss": 19.0655, + "step": 477490 + }, + { + "epoch": 0.9645802106521977, + "grad_norm": 752.0841064453125, + "learning_rate": 6.15582970243117e-08, + "loss": 25.7584, + "step": 477500 + }, + { + "epoch": 0.9646004112848814, + "grad_norm": 101.85774230957031, + "learning_rate": 6.150370312946797e-08, + "loss": 7.6507, + "step": 477510 + }, + { + "epoch": 0.9646206119175652, + "grad_norm": 380.5328674316406, + "learning_rate": 6.144913330425606e-08, + "loss": 32.0278, + "step": 477520 + }, + { + "epoch": 0.964640812550249, + "grad_norm": 403.0509033203125, + "learning_rate": 6.139458754894245e-08, + "loss": 21.2497, + "step": 477530 + }, + { + "epoch": 0.9646610131829328, + "grad_norm": 670.5574340820312, + "learning_rate": 6.134006586379249e-08, + "loss": 15.94, + "step": 477540 + }, + { + "epoch": 0.9646812138156167, + "grad_norm": 410.8387145996094, + "learning_rate": 6.128556824907205e-08, + "loss": 21.5452, + "step": 477550 + }, + { + "epoch": 0.9647014144483005, + "grad_norm": 534.5658569335938, + "learning_rate": 6.12310947050465e-08, + "loss": 9.8214, + "step": 477560 + }, + { + "epoch": 0.9647216150809843, + "grad_norm": 365.89556884765625, + "learning_rate": 6.11766452319823e-08, + "loss": 15.1667, + "step": 477570 + }, + { + "epoch": 0.9647418157136681, + "grad_norm": 132.87437438964844, + "learning_rate": 6.112221983014366e-08, + "loss": 10.2982, + "step": 477580 + }, + { + "epoch": 0.964762016346352, + "grad_norm": 169.1856689453125, + "learning_rate": 6.106781849979648e-08, + "loss": 15.1391, + "step": 477590 + }, + { + "epoch": 0.9647822169790358, + "grad_norm": 393.377197265625, + "learning_rate": 6.101344124120557e-08, + "loss": 28.7703, + "step": 477600 + }, + { + "epoch": 0.9648024176117196, + "grad_norm": 539.4514770507812, + "learning_rate": 6.095908805463624e-08, + "loss": 29.2519, + "step": 477610 + }, + { + "epoch": 0.9648226182444034, + "grad_norm": 331.7099914550781, + "learning_rate": 6.09047589403533e-08, + "loss": 24.0463, + "step": 477620 + }, + { + "epoch": 0.9648428188770872, + "grad_norm": 604.9713745117188, + "learning_rate": 6.085045389862154e-08, + "loss": 19.9847, + "step": 477630 + }, + { + "epoch": 0.964863019509771, + "grad_norm": 330.50347900390625, + "learning_rate": 6.079617292970519e-08, + "loss": 9.4264, + "step": 477640 + }, + { + "epoch": 0.9648832201424549, + "grad_norm": 481.7924499511719, + "learning_rate": 6.074191603386958e-08, + "loss": 23.5693, + "step": 477650 + }, + { + "epoch": 0.9649034207751387, + "grad_norm": 245.63783264160156, + "learning_rate": 6.068768321137897e-08, + "loss": 10.665, + "step": 477660 + }, + { + "epoch": 0.9649236214078225, + "grad_norm": 2.3313724994659424, + "learning_rate": 6.0633474462497e-08, + "loss": 8.234, + "step": 477670 + }, + { + "epoch": 0.9649438220405063, + "grad_norm": 282.9562683105469, + "learning_rate": 6.057928978748906e-08, + "loss": 9.9405, + "step": 477680 + }, + { + "epoch": 0.9649640226731901, + "grad_norm": 374.821044921875, + "learning_rate": 6.052512918661879e-08, + "loss": 15.405, + "step": 477690 + }, + { + "epoch": 0.964984223305874, + "grad_norm": 202.2202606201172, + "learning_rate": 6.047099266014877e-08, + "loss": 22.1854, + "step": 477700 + }, + { + "epoch": 0.9650044239385578, + "grad_norm": 503.0718688964844, + "learning_rate": 6.041688020834491e-08, + "loss": 18.3147, + "step": 477710 + }, + { + "epoch": 0.9650246245712416, + "grad_norm": 5.190357208251953, + "learning_rate": 6.036279183146975e-08, + "loss": 18.278, + "step": 477720 + }, + { + "epoch": 0.9650448252039254, + "grad_norm": 569.5199584960938, + "learning_rate": 6.030872752978756e-08, + "loss": 18.4054, + "step": 477730 + }, + { + "epoch": 0.9650650258366092, + "grad_norm": 393.5696105957031, + "learning_rate": 6.025468730356144e-08, + "loss": 16.5542, + "step": 477740 + }, + { + "epoch": 0.9650852264692931, + "grad_norm": 1.4170738458633423, + "learning_rate": 6.020067115305451e-08, + "loss": 24.3471, + "step": 477750 + }, + { + "epoch": 0.9651054271019769, + "grad_norm": 224.58607482910156, + "learning_rate": 6.0146679078531e-08, + "loss": 7.6774, + "step": 477760 + }, + { + "epoch": 0.9651256277346606, + "grad_norm": 371.5082092285156, + "learning_rate": 6.009271108025294e-08, + "loss": 11.425, + "step": 477770 + }, + { + "epoch": 0.9651458283673444, + "grad_norm": 591.8775634765625, + "learning_rate": 6.003876715848345e-08, + "loss": 18.0876, + "step": 477780 + }, + { + "epoch": 0.9651660290000282, + "grad_norm": 681.1532592773438, + "learning_rate": 5.998484731348675e-08, + "loss": 11.5306, + "step": 477790 + }, + { + "epoch": 0.9651862296327121, + "grad_norm": 209.39012145996094, + "learning_rate": 5.993095154552431e-08, + "loss": 12.4092, + "step": 477800 + }, + { + "epoch": 0.9652064302653959, + "grad_norm": 232.25473022460938, + "learning_rate": 5.987707985485925e-08, + "loss": 23.2592, + "step": 477810 + }, + { + "epoch": 0.9652266308980797, + "grad_norm": 493.2044677734375, + "learning_rate": 5.982323224175468e-08, + "loss": 12.4573, + "step": 477820 + }, + { + "epoch": 0.9652468315307635, + "grad_norm": 163.55963134765625, + "learning_rate": 5.976940870647207e-08, + "loss": 29.157, + "step": 477830 + }, + { + "epoch": 0.9652670321634473, + "grad_norm": 152.7935028076172, + "learning_rate": 5.9715609249274e-08, + "loss": 16.3347, + "step": 477840 + }, + { + "epoch": 0.9652872327961312, + "grad_norm": 170.32513427734375, + "learning_rate": 5.966183387042246e-08, + "loss": 22.4016, + "step": 477850 + }, + { + "epoch": 0.965307433428815, + "grad_norm": 350.372802734375, + "learning_rate": 5.960808257018113e-08, + "loss": 17.2876, + "step": 477860 + }, + { + "epoch": 0.9653276340614988, + "grad_norm": 345.9002990722656, + "learning_rate": 5.955435534881038e-08, + "loss": 22.5925, + "step": 477870 + }, + { + "epoch": 0.9653478346941826, + "grad_norm": 49.1010627746582, + "learning_rate": 5.950065220657164e-08, + "loss": 6.0972, + "step": 477880 + }, + { + "epoch": 0.9653680353268664, + "grad_norm": 442.44207763671875, + "learning_rate": 5.9446973143728605e-08, + "loss": 22.0141, + "step": 477890 + }, + { + "epoch": 0.9653882359595503, + "grad_norm": 59.3377799987793, + "learning_rate": 5.939331816054161e-08, + "loss": 25.7228, + "step": 477900 + }, + { + "epoch": 0.9654084365922341, + "grad_norm": 461.59124755859375, + "learning_rate": 5.9339687257272126e-08, + "loss": 26.488, + "step": 477910 + }, + { + "epoch": 0.9654286372249179, + "grad_norm": 312.3343505859375, + "learning_rate": 5.92860804341816e-08, + "loss": 26.8083, + "step": 477920 + }, + { + "epoch": 0.9654488378576017, + "grad_norm": 451.34051513671875, + "learning_rate": 5.9232497691531496e-08, + "loss": 20.6006, + "step": 477930 + }, + { + "epoch": 0.9654690384902855, + "grad_norm": 146.99241638183594, + "learning_rate": 5.917893902958327e-08, + "loss": 14.9505, + "step": 477940 + }, + { + "epoch": 0.9654892391229694, + "grad_norm": 617.3377075195312, + "learning_rate": 5.9125404448597825e-08, + "loss": 9.5861, + "step": 477950 + }, + { + "epoch": 0.9655094397556532, + "grad_norm": 416.9510192871094, + "learning_rate": 5.9071893948835505e-08, + "loss": 26.2968, + "step": 477960 + }, + { + "epoch": 0.965529640388337, + "grad_norm": 112.83805084228516, + "learning_rate": 5.901840753055776e-08, + "loss": 36.8032, + "step": 477970 + }, + { + "epoch": 0.9655498410210208, + "grad_norm": 199.9210205078125, + "learning_rate": 5.896494519402496e-08, + "loss": 15.8749, + "step": 477980 + }, + { + "epoch": 0.9655700416537046, + "grad_norm": 339.83465576171875, + "learning_rate": 5.891150693949743e-08, + "loss": 20.127, + "step": 477990 + }, + { + "epoch": 0.9655902422863885, + "grad_norm": 863.1902465820312, + "learning_rate": 5.8858092767236084e-08, + "loss": 29.7857, + "step": 478000 + }, + { + "epoch": 0.9656104429190723, + "grad_norm": 264.4561462402344, + "learning_rate": 5.880470267750127e-08, + "loss": 27.0998, + "step": 478010 + }, + { + "epoch": 0.9656306435517561, + "grad_norm": 359.2437744140625, + "learning_rate": 5.8751336670552775e-08, + "loss": 19.5618, + "step": 478020 + }, + { + "epoch": 0.9656508441844398, + "grad_norm": 617.5918579101562, + "learning_rate": 5.8697994746650946e-08, + "loss": 27.1238, + "step": 478030 + }, + { + "epoch": 0.9656710448171236, + "grad_norm": 2867.60791015625, + "learning_rate": 5.864467690605613e-08, + "loss": 27.6647, + "step": 478040 + }, + { + "epoch": 0.9656912454498074, + "grad_norm": 291.1172790527344, + "learning_rate": 5.8591383149028126e-08, + "loss": 32.4455, + "step": 478050 + }, + { + "epoch": 0.9657114460824913, + "grad_norm": 40.02949523925781, + "learning_rate": 5.8538113475825606e-08, + "loss": 23.7419, + "step": 478060 + }, + { + "epoch": 0.9657316467151751, + "grad_norm": 311.8459777832031, + "learning_rate": 5.848486788670893e-08, + "loss": 22.3518, + "step": 478070 + }, + { + "epoch": 0.9657518473478589, + "grad_norm": 315.1097106933594, + "learning_rate": 5.843164638193899e-08, + "loss": 11.6773, + "step": 478080 + }, + { + "epoch": 0.9657720479805427, + "grad_norm": 173.48623657226562, + "learning_rate": 5.837844896177225e-08, + "loss": 15.2753, + "step": 478090 + }, + { + "epoch": 0.9657922486132265, + "grad_norm": 233.17698669433594, + "learning_rate": 5.8325275626470166e-08, + "loss": 14.2047, + "step": 478100 + }, + { + "epoch": 0.9658124492459104, + "grad_norm": 757.6137084960938, + "learning_rate": 5.827212637629198e-08, + "loss": 20.2655, + "step": 478110 + }, + { + "epoch": 0.9658326498785942, + "grad_norm": 354.80108642578125, + "learning_rate": 5.821900121149582e-08, + "loss": 31.6603, + "step": 478120 + }, + { + "epoch": 0.965852850511278, + "grad_norm": 187.01177978515625, + "learning_rate": 5.8165900132340356e-08, + "loss": 27.2038, + "step": 478130 + }, + { + "epoch": 0.9658730511439618, + "grad_norm": 545.61181640625, + "learning_rate": 5.8112823139085396e-08, + "loss": 23.4992, + "step": 478140 + }, + { + "epoch": 0.9658932517766456, + "grad_norm": 373.3515625, + "learning_rate": 5.80597702319885e-08, + "loss": 18.8845, + "step": 478150 + }, + { + "epoch": 0.9659134524093295, + "grad_norm": 558.9241333007812, + "learning_rate": 5.800674141130946e-08, + "loss": 19.6263, + "step": 478160 + }, + { + "epoch": 0.9659336530420133, + "grad_norm": 379.2434387207031, + "learning_rate": 5.795373667730586e-08, + "loss": 19.4863, + "step": 478170 + }, + { + "epoch": 0.9659538536746971, + "grad_norm": 53.8942985534668, + "learning_rate": 5.7900756030236924e-08, + "loss": 19.1105, + "step": 478180 + }, + { + "epoch": 0.9659740543073809, + "grad_norm": 489.83416748046875, + "learning_rate": 5.7847799470360236e-08, + "loss": 13.0014, + "step": 478190 + }, + { + "epoch": 0.9659942549400647, + "grad_norm": 657.77099609375, + "learning_rate": 5.7794866997933355e-08, + "loss": 11.6912, + "step": 478200 + }, + { + "epoch": 0.9660144555727486, + "grad_norm": 244.96229553222656, + "learning_rate": 5.774195861321552e-08, + "loss": 37.3842, + "step": 478210 + }, + { + "epoch": 0.9660346562054324, + "grad_norm": 335.60382080078125, + "learning_rate": 5.76890743164632e-08, + "loss": 16.0015, + "step": 478220 + }, + { + "epoch": 0.9660548568381162, + "grad_norm": 518.923583984375, + "learning_rate": 5.763621410793563e-08, + "loss": 27.9391, + "step": 478230 + }, + { + "epoch": 0.9660750574708, + "grad_norm": 56.98911666870117, + "learning_rate": 5.758337798788982e-08, + "loss": 9.7949, + "step": 478240 + }, + { + "epoch": 0.9660952581034838, + "grad_norm": 637.66015625, + "learning_rate": 5.753056595658224e-08, + "loss": 19.3816, + "step": 478250 + }, + { + "epoch": 0.9661154587361677, + "grad_norm": 145.6067352294922, + "learning_rate": 5.7477778014272124e-08, + "loss": 21.7067, + "step": 478260 + }, + { + "epoch": 0.9661356593688515, + "grad_norm": 318.15826416015625, + "learning_rate": 5.7425014161215375e-08, + "loss": 23.1052, + "step": 478270 + }, + { + "epoch": 0.9661558600015352, + "grad_norm": 899.2626953125, + "learning_rate": 5.737227439766957e-08, + "loss": 19.9951, + "step": 478280 + }, + { + "epoch": 0.966176060634219, + "grad_norm": 159.44384765625, + "learning_rate": 5.7319558723892275e-08, + "loss": 11.5791, + "step": 478290 + }, + { + "epoch": 0.9661962612669028, + "grad_norm": 759.2963256835938, + "learning_rate": 5.726686714013996e-08, + "loss": 20.4163, + "step": 478300 + }, + { + "epoch": 0.9662164618995867, + "grad_norm": 241.1317596435547, + "learning_rate": 5.7214199646669076e-08, + "loss": 30.4358, + "step": 478310 + }, + { + "epoch": 0.9662366625322705, + "grad_norm": 260.4803161621094, + "learning_rate": 5.716155624373665e-08, + "loss": 16.714, + "step": 478320 + }, + { + "epoch": 0.9662568631649543, + "grad_norm": 84.16582489013672, + "learning_rate": 5.710893693159969e-08, + "loss": 18.6407, + "step": 478330 + }, + { + "epoch": 0.9662770637976381, + "grad_norm": 439.8134765625, + "learning_rate": 5.705634171051411e-08, + "loss": 19.6875, + "step": 478340 + }, + { + "epoch": 0.9662972644303219, + "grad_norm": 137.8370819091797, + "learning_rate": 5.700377058073636e-08, + "loss": 26.0859, + "step": 478350 + }, + { + "epoch": 0.9663174650630058, + "grad_norm": 316.39593505859375, + "learning_rate": 5.6951223542522915e-08, + "loss": 34.9143, + "step": 478360 + }, + { + "epoch": 0.9663376656956896, + "grad_norm": 157.16848754882812, + "learning_rate": 5.6898700596129674e-08, + "loss": 13.9034, + "step": 478370 + }, + { + "epoch": 0.9663578663283734, + "grad_norm": 149.63681030273438, + "learning_rate": 5.684620174181255e-08, + "loss": 13.5584, + "step": 478380 + }, + { + "epoch": 0.9663780669610572, + "grad_norm": 177.0972442626953, + "learning_rate": 5.679372697982688e-08, + "loss": 19.0456, + "step": 478390 + }, + { + "epoch": 0.966398267593741, + "grad_norm": 292.07403564453125, + "learning_rate": 5.674127631043025e-08, + "loss": 11.7672, + "step": 478400 + }, + { + "epoch": 0.9664184682264249, + "grad_norm": 1.6651360988616943, + "learning_rate": 5.668884973387634e-08, + "loss": 14.0168, + "step": 478410 + }, + { + "epoch": 0.9664386688591087, + "grad_norm": 468.69232177734375, + "learning_rate": 5.663644725042161e-08, + "loss": 31.5944, + "step": 478420 + }, + { + "epoch": 0.9664588694917925, + "grad_norm": 530.5999755859375, + "learning_rate": 5.658406886032142e-08, + "loss": 22.6728, + "step": 478430 + }, + { + "epoch": 0.9664790701244763, + "grad_norm": 627.4494018554688, + "learning_rate": 5.653171456383055e-08, + "loss": 21.3161, + "step": 478440 + }, + { + "epoch": 0.9664992707571601, + "grad_norm": 374.77008056640625, + "learning_rate": 5.647938436120437e-08, + "loss": 11.9045, + "step": 478450 + }, + { + "epoch": 0.966519471389844, + "grad_norm": 876.54443359375, + "learning_rate": 5.642707825269822e-08, + "loss": 20.375, + "step": 478460 + }, + { + "epoch": 0.9665396720225278, + "grad_norm": 236.10513305664062, + "learning_rate": 5.637479623856745e-08, + "loss": 18.2886, + "step": 478470 + }, + { + "epoch": 0.9665598726552116, + "grad_norm": 345.92333984375, + "learning_rate": 5.632253831906631e-08, + "loss": 19.2766, + "step": 478480 + }, + { + "epoch": 0.9665800732878954, + "grad_norm": 260.10943603515625, + "learning_rate": 5.6270304494449035e-08, + "loss": 20.5953, + "step": 478490 + }, + { + "epoch": 0.9666002739205792, + "grad_norm": 98.62396240234375, + "learning_rate": 5.621809476497098e-08, + "loss": 31.1268, + "step": 478500 + }, + { + "epoch": 0.966620474553263, + "grad_norm": 279.5716857910156, + "learning_rate": 5.616590913088638e-08, + "loss": 19.0142, + "step": 478510 + }, + { + "epoch": 0.9666406751859469, + "grad_norm": 140.54217529296875, + "learning_rate": 5.611374759244892e-08, + "loss": 11.6353, + "step": 478520 + }, + { + "epoch": 0.9666608758186307, + "grad_norm": 748.846435546875, + "learning_rate": 5.6061610149913957e-08, + "loss": 36.219, + "step": 478530 + }, + { + "epoch": 0.9666810764513144, + "grad_norm": 199.6036376953125, + "learning_rate": 5.6009496803534624e-08, + "loss": 25.0226, + "step": 478540 + }, + { + "epoch": 0.9667012770839982, + "grad_norm": 400.14459228515625, + "learning_rate": 5.595740755356627e-08, + "loss": 17.4089, + "step": 478550 + }, + { + "epoch": 0.966721477716682, + "grad_norm": 3818.854736328125, + "learning_rate": 5.590534240026146e-08, + "loss": 42.379, + "step": 478560 + }, + { + "epoch": 0.9667416783493659, + "grad_norm": 6.718419075012207, + "learning_rate": 5.58533013438739e-08, + "loss": 19.4567, + "step": 478570 + }, + { + "epoch": 0.9667618789820497, + "grad_norm": 202.12559509277344, + "learning_rate": 5.580128438465837e-08, + "loss": 11.5954, + "step": 478580 + }, + { + "epoch": 0.9667820796147335, + "grad_norm": 521.642333984375, + "learning_rate": 5.574929152286745e-08, + "loss": 14.6131, + "step": 478590 + }, + { + "epoch": 0.9668022802474173, + "grad_norm": 276.46636962890625, + "learning_rate": 5.569732275875428e-08, + "loss": 18.7578, + "step": 478600 + }, + { + "epoch": 0.9668224808801011, + "grad_norm": 231.13758850097656, + "learning_rate": 5.5645378092573085e-08, + "loss": 34.6684, + "step": 478610 + }, + { + "epoch": 0.966842681512785, + "grad_norm": 396.3559875488281, + "learning_rate": 5.559345752457701e-08, + "loss": 14.3551, + "step": 478620 + }, + { + "epoch": 0.9668628821454688, + "grad_norm": 597.891845703125, + "learning_rate": 5.554156105501862e-08, + "loss": 46.5115, + "step": 478630 + }, + { + "epoch": 0.9668830827781526, + "grad_norm": 2.6351895332336426, + "learning_rate": 5.54896886841505e-08, + "loss": 26.3693, + "step": 478640 + }, + { + "epoch": 0.9669032834108364, + "grad_norm": 122.9947509765625, + "learning_rate": 5.543784041222633e-08, + "loss": 12.3833, + "step": 478650 + }, + { + "epoch": 0.9669234840435202, + "grad_norm": 285.569580078125, + "learning_rate": 5.538601623949869e-08, + "loss": 10.7647, + "step": 478660 + }, + { + "epoch": 0.9669436846762041, + "grad_norm": 461.61627197265625, + "learning_rate": 5.533421616621903e-08, + "loss": 18.2578, + "step": 478670 + }, + { + "epoch": 0.9669638853088879, + "grad_norm": 484.755859375, + "learning_rate": 5.528244019264106e-08, + "loss": 17.0171, + "step": 478680 + }, + { + "epoch": 0.9669840859415717, + "grad_norm": 177.4759063720703, + "learning_rate": 5.5230688319017344e-08, + "loss": 18.0975, + "step": 478690 + }, + { + "epoch": 0.9670042865742555, + "grad_norm": 557.1797485351562, + "learning_rate": 5.517896054559879e-08, + "loss": 21.3366, + "step": 478700 + }, + { + "epoch": 0.9670244872069393, + "grad_norm": 383.9194030761719, + "learning_rate": 5.512725687263853e-08, + "loss": 15.7219, + "step": 478710 + }, + { + "epoch": 0.9670446878396232, + "grad_norm": 586.3380126953125, + "learning_rate": 5.507557730038859e-08, + "loss": 14.0986, + "step": 478720 + }, + { + "epoch": 0.967064888472307, + "grad_norm": 580.1846923828125, + "learning_rate": 5.5023921829100434e-08, + "loss": 24.0707, + "step": 478730 + }, + { + "epoch": 0.9670850891049908, + "grad_norm": 572.0950927734375, + "learning_rate": 5.497229045902552e-08, + "loss": 33.8662, + "step": 478740 + }, + { + "epoch": 0.9671052897376746, + "grad_norm": 19.00641632080078, + "learning_rate": 5.492068319041588e-08, + "loss": 36.7493, + "step": 478750 + }, + { + "epoch": 0.9671254903703584, + "grad_norm": 484.2851257324219, + "learning_rate": 5.4869100023523526e-08, + "loss": 18.0603, + "step": 478760 + }, + { + "epoch": 0.9671456910030423, + "grad_norm": 218.1751251220703, + "learning_rate": 5.4817540958598814e-08, + "loss": 9.6601, + "step": 478770 + }, + { + "epoch": 0.9671658916357261, + "grad_norm": 545.3314819335938, + "learning_rate": 5.476600599589377e-08, + "loss": 25.2217, + "step": 478780 + }, + { + "epoch": 0.9671860922684098, + "grad_norm": 230.48045349121094, + "learning_rate": 5.471449513565985e-08, + "loss": 22.9013, + "step": 478790 + }, + { + "epoch": 0.9672062929010936, + "grad_norm": 163.40428161621094, + "learning_rate": 5.466300837814797e-08, + "loss": 18.2725, + "step": 478800 + }, + { + "epoch": 0.9672264935337774, + "grad_norm": 319.80743408203125, + "learning_rate": 5.461154572360794e-08, + "loss": 21.5176, + "step": 478810 + }, + { + "epoch": 0.9672466941664613, + "grad_norm": 519.5460205078125, + "learning_rate": 5.456010717229177e-08, + "loss": 23.0734, + "step": 478820 + }, + { + "epoch": 0.9672668947991451, + "grad_norm": 264.7811584472656, + "learning_rate": 5.4508692724449806e-08, + "loss": 19.0381, + "step": 478830 + }, + { + "epoch": 0.9672870954318289, + "grad_norm": 236.47109985351562, + "learning_rate": 5.445730238033298e-08, + "loss": 15.7891, + "step": 478840 + }, + { + "epoch": 0.9673072960645127, + "grad_norm": 237.08204650878906, + "learning_rate": 5.440593614019107e-08, + "loss": 12.1388, + "step": 478850 + }, + { + "epoch": 0.9673274966971965, + "grad_norm": 171.9046173095703, + "learning_rate": 5.435459400427501e-08, + "loss": 13.042, + "step": 478860 + }, + { + "epoch": 0.9673476973298804, + "grad_norm": 631.8507690429688, + "learning_rate": 5.4303275972834577e-08, + "loss": 23.7042, + "step": 478870 + }, + { + "epoch": 0.9673678979625642, + "grad_norm": 141.70753479003906, + "learning_rate": 5.42519820461207e-08, + "loss": 10.7698, + "step": 478880 + }, + { + "epoch": 0.967388098595248, + "grad_norm": 266.23284912109375, + "learning_rate": 5.4200712224382056e-08, + "loss": 16.0063, + "step": 478890 + }, + { + "epoch": 0.9674082992279318, + "grad_norm": 253.25729370117188, + "learning_rate": 5.414946650786957e-08, + "loss": 10.512, + "step": 478900 + }, + { + "epoch": 0.9674284998606156, + "grad_norm": 220.38278198242188, + "learning_rate": 5.409824489683247e-08, + "loss": 29.1843, + "step": 478910 + }, + { + "epoch": 0.9674487004932995, + "grad_norm": 304.1505432128906, + "learning_rate": 5.4047047391521114e-08, + "loss": 23.3969, + "step": 478920 + }, + { + "epoch": 0.9674689011259833, + "grad_norm": 145.32652282714844, + "learning_rate": 5.39958739921842e-08, + "loss": 23.0794, + "step": 478930 + }, + { + "epoch": 0.9674891017586671, + "grad_norm": 579.7205810546875, + "learning_rate": 5.394472469907208e-08, + "loss": 25.9221, + "step": 478940 + }, + { + "epoch": 0.9675093023913509, + "grad_norm": 588.6752319335938, + "learning_rate": 5.389359951243345e-08, + "loss": 9.451, + "step": 478950 + }, + { + "epoch": 0.9675295030240347, + "grad_norm": 196.82383728027344, + "learning_rate": 5.3842498432516986e-08, + "loss": 14.1151, + "step": 478960 + }, + { + "epoch": 0.9675497036567186, + "grad_norm": 685.24951171875, + "learning_rate": 5.3791421459571947e-08, + "loss": 16.9339, + "step": 478970 + }, + { + "epoch": 0.9675699042894024, + "grad_norm": 78.54755401611328, + "learning_rate": 5.374036859384868e-08, + "loss": 10.4572, + "step": 478980 + }, + { + "epoch": 0.9675901049220862, + "grad_norm": 475.5689697265625, + "learning_rate": 5.3689339835594215e-08, + "loss": 14.2306, + "step": 478990 + }, + { + "epoch": 0.96761030555477, + "grad_norm": 257.8653259277344, + "learning_rate": 5.363833518505834e-08, + "loss": 7.7078, + "step": 479000 + }, + { + "epoch": 0.9676305061874538, + "grad_norm": 461.0395202636719, + "learning_rate": 5.358735464248921e-08, + "loss": 17.0037, + "step": 479010 + }, + { + "epoch": 0.9676507068201377, + "grad_norm": 271.3075256347656, + "learning_rate": 5.3536398208135495e-08, + "loss": 25.8619, + "step": 479020 + }, + { + "epoch": 0.9676709074528215, + "grad_norm": 59.960697174072266, + "learning_rate": 5.348546588224535e-08, + "loss": 22.1884, + "step": 479030 + }, + { + "epoch": 0.9676911080855053, + "grad_norm": 94.71739959716797, + "learning_rate": 5.343455766506689e-08, + "loss": 21.2829, + "step": 479040 + }, + { + "epoch": 0.967711308718189, + "grad_norm": 124.12822723388672, + "learning_rate": 5.338367355684881e-08, + "loss": 21.4375, + "step": 479050 + }, + { + "epoch": 0.9677315093508728, + "grad_norm": 197.34124755859375, + "learning_rate": 5.33328135578387e-08, + "loss": 24.6474, + "step": 479060 + }, + { + "epoch": 0.9677517099835566, + "grad_norm": 378.1953125, + "learning_rate": 5.3281977668284136e-08, + "loss": 32.3378, + "step": 479070 + }, + { + "epoch": 0.9677719106162405, + "grad_norm": 154.08151245117188, + "learning_rate": 5.323116588843324e-08, + "loss": 16.6362, + "step": 479080 + }, + { + "epoch": 0.9677921112489243, + "grad_norm": 331.41455078125, + "learning_rate": 5.318037821853417e-08, + "loss": 24.9352, + "step": 479090 + }, + { + "epoch": 0.9678123118816081, + "grad_norm": 417.18731689453125, + "learning_rate": 5.312961465883393e-08, + "loss": 19.8879, + "step": 479100 + }, + { + "epoch": 0.9678325125142919, + "grad_norm": 524.3778076171875, + "learning_rate": 5.307887520957955e-08, + "loss": 11.2781, + "step": 479110 + }, + { + "epoch": 0.9678527131469757, + "grad_norm": 276.419921875, + "learning_rate": 5.302815987101917e-08, + "loss": 11.3561, + "step": 479120 + }, + { + "epoch": 0.9678729137796596, + "grad_norm": 623.837646484375, + "learning_rate": 5.2977468643399254e-08, + "loss": 23.5989, + "step": 479130 + }, + { + "epoch": 0.9678931144123434, + "grad_norm": 422.738525390625, + "learning_rate": 5.292680152696739e-08, + "loss": 16.2322, + "step": 479140 + }, + { + "epoch": 0.9679133150450272, + "grad_norm": 76.41869354248047, + "learning_rate": 5.2876158521969476e-08, + "loss": 19.3891, + "step": 479150 + }, + { + "epoch": 0.967933515677711, + "grad_norm": 361.3853759765625, + "learning_rate": 5.282553962865422e-08, + "loss": 13.3551, + "step": 479160 + }, + { + "epoch": 0.9679537163103948, + "grad_norm": 378.0127868652344, + "learning_rate": 5.2774944847266976e-08, + "loss": 15.339, + "step": 479170 + }, + { + "epoch": 0.9679739169430787, + "grad_norm": 353.4735107421875, + "learning_rate": 5.27243741780542e-08, + "loss": 21.6208, + "step": 479180 + }, + { + "epoch": 0.9679941175757625, + "grad_norm": 434.619384765625, + "learning_rate": 5.267382762126294e-08, + "loss": 19.5157, + "step": 479190 + }, + { + "epoch": 0.9680143182084463, + "grad_norm": 356.3162536621094, + "learning_rate": 5.262330517713965e-08, + "loss": 9.1625, + "step": 479200 + }, + { + "epoch": 0.9680345188411301, + "grad_norm": 324.1009521484375, + "learning_rate": 5.2572806845930244e-08, + "loss": 25.1147, + "step": 479210 + }, + { + "epoch": 0.9680547194738139, + "grad_norm": 196.4537353515625, + "learning_rate": 5.252233262788065e-08, + "loss": 17.194, + "step": 479220 + }, + { + "epoch": 0.9680749201064978, + "grad_norm": 162.21270751953125, + "learning_rate": 5.247188252323787e-08, + "loss": 12.6761, + "step": 479230 + }, + { + "epoch": 0.9680951207391816, + "grad_norm": 512.8888549804688, + "learning_rate": 5.242145653224673e-08, + "loss": 19.6846, + "step": 479240 + }, + { + "epoch": 0.9681153213718654, + "grad_norm": 164.4032745361328, + "learning_rate": 5.237105465515258e-08, + "loss": 16.9834, + "step": 479250 + }, + { + "epoch": 0.9681355220045492, + "grad_norm": 311.5745544433594, + "learning_rate": 5.2320676892202996e-08, + "loss": 16.5828, + "step": 479260 + }, + { + "epoch": 0.968155722637233, + "grad_norm": 305.44390869140625, + "learning_rate": 5.227032324364167e-08, + "loss": 16.0539, + "step": 479270 + }, + { + "epoch": 0.9681759232699169, + "grad_norm": 198.4656982421875, + "learning_rate": 5.2219993709714535e-08, + "loss": 14.3294, + "step": 479280 + }, + { + "epoch": 0.9681961239026007, + "grad_norm": 338.1217041015625, + "learning_rate": 5.2169688290667485e-08, + "loss": 17.7753, + "step": 479290 + }, + { + "epoch": 0.9682163245352845, + "grad_norm": 446.63409423828125, + "learning_rate": 5.2119406986745336e-08, + "loss": 16.3242, + "step": 479300 + }, + { + "epoch": 0.9682365251679682, + "grad_norm": 288.9396667480469, + "learning_rate": 5.206914979819289e-08, + "loss": 18.615, + "step": 479310 + }, + { + "epoch": 0.968256725800652, + "grad_norm": 410.0702209472656, + "learning_rate": 5.2018916725254945e-08, + "loss": 31.4091, + "step": 479320 + }, + { + "epoch": 0.9682769264333358, + "grad_norm": 287.1968994140625, + "learning_rate": 5.196870776817742e-08, + "loss": 17.3582, + "step": 479330 + }, + { + "epoch": 0.9682971270660197, + "grad_norm": 151.74069213867188, + "learning_rate": 5.191852292720401e-08, + "loss": 32.3567, + "step": 479340 + }, + { + "epoch": 0.9683173276987035, + "grad_norm": 738.2186279296875, + "learning_rate": 5.186836220257951e-08, + "loss": 19.9806, + "step": 479350 + }, + { + "epoch": 0.9683375283313873, + "grad_norm": 647.9827880859375, + "learning_rate": 5.1818225594548185e-08, + "loss": 12.0309, + "step": 479360 + }, + { + "epoch": 0.9683577289640711, + "grad_norm": 210.44410705566406, + "learning_rate": 5.176811310335539e-08, + "loss": 19.8676, + "step": 479370 + }, + { + "epoch": 0.968377929596755, + "grad_norm": 104.06281280517578, + "learning_rate": 5.17180247292437e-08, + "loss": 9.6487, + "step": 479380 + }, + { + "epoch": 0.9683981302294388, + "grad_norm": 136.3513641357422, + "learning_rate": 5.1667960472459034e-08, + "loss": 13.6415, + "step": 479390 + }, + { + "epoch": 0.9684183308621226, + "grad_norm": 104.18755340576172, + "learning_rate": 5.161792033324398e-08, + "loss": 12.012, + "step": 479400 + }, + { + "epoch": 0.9684385314948064, + "grad_norm": 200.2335662841797, + "learning_rate": 5.1567904311843886e-08, + "loss": 15.0877, + "step": 479410 + }, + { + "epoch": 0.9684587321274902, + "grad_norm": 316.23980712890625, + "learning_rate": 5.151791240850079e-08, + "loss": 14.0139, + "step": 479420 + }, + { + "epoch": 0.968478932760174, + "grad_norm": 485.8829345703125, + "learning_rate": 5.14679446234595e-08, + "loss": 38.5259, + "step": 479430 + }, + { + "epoch": 0.9684991333928579, + "grad_norm": 583.5144653320312, + "learning_rate": 5.14180009569637e-08, + "loss": 24.376, + "step": 479440 + }, + { + "epoch": 0.9685193340255417, + "grad_norm": 827.423583984375, + "learning_rate": 5.136808140925542e-08, + "loss": 14.9787, + "step": 479450 + }, + { + "epoch": 0.9685395346582255, + "grad_norm": 401.1725769042969, + "learning_rate": 5.131818598057947e-08, + "loss": 17.1449, + "step": 479460 + }, + { + "epoch": 0.9685597352909093, + "grad_norm": 236.3235626220703, + "learning_rate": 5.126831467117843e-08, + "loss": 23.819, + "step": 479470 + }, + { + "epoch": 0.9685799359235931, + "grad_norm": 396.2535400390625, + "learning_rate": 5.121846748129544e-08, + "loss": 20.7878, + "step": 479480 + }, + { + "epoch": 0.968600136556277, + "grad_norm": 363.9278259277344, + "learning_rate": 5.116864441117364e-08, + "loss": 20.4867, + "step": 479490 + }, + { + "epoch": 0.9686203371889608, + "grad_norm": 392.9410400390625, + "learning_rate": 5.111884546105506e-08, + "loss": 18.4554, + "step": 479500 + }, + { + "epoch": 0.9686405378216446, + "grad_norm": 536.6632080078125, + "learning_rate": 5.106907063118394e-08, + "loss": 13.4975, + "step": 479510 + }, + { + "epoch": 0.9686607384543284, + "grad_norm": 448.8122863769531, + "learning_rate": 5.10193199218012e-08, + "loss": 23.2407, + "step": 479520 + }, + { + "epoch": 0.9686809390870122, + "grad_norm": 314.69012451171875, + "learning_rate": 5.0969593333149994e-08, + "loss": 17.3976, + "step": 479530 + }, + { + "epoch": 0.9687011397196961, + "grad_norm": 206.2161407470703, + "learning_rate": 5.091989086547289e-08, + "loss": 20.0975, + "step": 479540 + }, + { + "epoch": 0.9687213403523799, + "grad_norm": 59.227203369140625, + "learning_rate": 5.0870212519012477e-08, + "loss": 14.2258, + "step": 479550 + }, + { + "epoch": 0.9687415409850636, + "grad_norm": 420.8604431152344, + "learning_rate": 5.082055829400967e-08, + "loss": 17.1765, + "step": 479560 + }, + { + "epoch": 0.9687617416177474, + "grad_norm": 511.0311584472656, + "learning_rate": 5.077092819070761e-08, + "loss": 10.442, + "step": 479570 + }, + { + "epoch": 0.9687819422504312, + "grad_norm": 241.3959503173828, + "learning_rate": 5.072132220934722e-08, + "loss": 23.6839, + "step": 479580 + }, + { + "epoch": 0.9688021428831151, + "grad_norm": 407.5174865722656, + "learning_rate": 5.067174035017164e-08, + "loss": 11.5223, + "step": 479590 + }, + { + "epoch": 0.9688223435157989, + "grad_norm": 148.66758728027344, + "learning_rate": 5.062218261342122e-08, + "loss": 15.6233, + "step": 479600 + }, + { + "epoch": 0.9688425441484827, + "grad_norm": 478.1081237792969, + "learning_rate": 5.0572648999338e-08, + "loss": 10.8934, + "step": 479610 + }, + { + "epoch": 0.9688627447811665, + "grad_norm": 410.79217529296875, + "learning_rate": 5.052313950816401e-08, + "loss": 14.1215, + "step": 479620 + }, + { + "epoch": 0.9688829454138503, + "grad_norm": 486.24853515625, + "learning_rate": 5.0473654140139604e-08, + "loss": 23.106, + "step": 479630 + }, + { + "epoch": 0.9689031460465342, + "grad_norm": 893.8436279296875, + "learning_rate": 5.042419289550571e-08, + "loss": 11.5406, + "step": 479640 + }, + { + "epoch": 0.968923346679218, + "grad_norm": 1036.2552490234375, + "learning_rate": 5.0374755774504346e-08, + "loss": 23.9848, + "step": 479650 + }, + { + "epoch": 0.9689435473119018, + "grad_norm": 652.051025390625, + "learning_rate": 5.032534277737644e-08, + "loss": 14.6965, + "step": 479660 + }, + { + "epoch": 0.9689637479445856, + "grad_norm": 469.65863037109375, + "learning_rate": 5.027595390436235e-08, + "loss": 13.7001, + "step": 479670 + }, + { + "epoch": 0.9689839485772694, + "grad_norm": 601.7528686523438, + "learning_rate": 5.0226589155702445e-08, + "loss": 16.1731, + "step": 479680 + }, + { + "epoch": 0.9690041492099533, + "grad_norm": 601.6310424804688, + "learning_rate": 5.017724853163819e-08, + "loss": 29.1537, + "step": 479690 + }, + { + "epoch": 0.9690243498426371, + "grad_norm": 367.8590393066406, + "learning_rate": 5.012793203240995e-08, + "loss": 13.7387, + "step": 479700 + }, + { + "epoch": 0.9690445504753209, + "grad_norm": 397.095947265625, + "learning_rate": 5.007863965825754e-08, + "loss": 16.9001, + "step": 479710 + }, + { + "epoch": 0.9690647511080047, + "grad_norm": 306.54229736328125, + "learning_rate": 5.002937140942132e-08, + "loss": 9.5529, + "step": 479720 + }, + { + "epoch": 0.9690849517406885, + "grad_norm": 188.66578674316406, + "learning_rate": 4.998012728614221e-08, + "loss": 8.5547, + "step": 479730 + }, + { + "epoch": 0.9691051523733724, + "grad_norm": 368.2060546875, + "learning_rate": 4.99309072886589e-08, + "loss": 19.5318, + "step": 479740 + }, + { + "epoch": 0.9691253530060562, + "grad_norm": 2.3029627799987793, + "learning_rate": 4.988171141721232e-08, + "loss": 14.3379, + "step": 479750 + }, + { + "epoch": 0.96914555363874, + "grad_norm": 339.0953674316406, + "learning_rate": 4.983253967204171e-08, + "loss": 30.6589, + "step": 479760 + }, + { + "epoch": 0.9691657542714238, + "grad_norm": 335.7156982421875, + "learning_rate": 4.9783392053386894e-08, + "loss": 17.6085, + "step": 479770 + }, + { + "epoch": 0.9691859549041076, + "grad_norm": 301.7350769042969, + "learning_rate": 4.9734268561487665e-08, + "loss": 14.41, + "step": 479780 + }, + { + "epoch": 0.9692061555367915, + "grad_norm": 659.3392944335938, + "learning_rate": 4.968516919658328e-08, + "loss": 11.3555, + "step": 479790 + }, + { + "epoch": 0.9692263561694753, + "grad_norm": 292.5453796386719, + "learning_rate": 4.9636093958913e-08, + "loss": 19.53, + "step": 479800 + }, + { + "epoch": 0.9692465568021591, + "grad_norm": 249.79486083984375, + "learning_rate": 4.958704284871552e-08, + "loss": 17.1886, + "step": 479810 + }, + { + "epoch": 0.9692667574348428, + "grad_norm": 254.720947265625, + "learning_rate": 4.9538015866230636e-08, + "loss": 19.8521, + "step": 479820 + }, + { + "epoch": 0.9692869580675266, + "grad_norm": 330.8457946777344, + "learning_rate": 4.948901301169706e-08, + "loss": 10.1042, + "step": 479830 + }, + { + "epoch": 0.9693071587002104, + "grad_norm": 244.03285217285156, + "learning_rate": 4.944003428535349e-08, + "loss": 18.2047, + "step": 479840 + }, + { + "epoch": 0.9693273593328943, + "grad_norm": 638.8070068359375, + "learning_rate": 4.939107968743917e-08, + "loss": 16.9893, + "step": 479850 + }, + { + "epoch": 0.9693475599655781, + "grad_norm": 152.32827758789062, + "learning_rate": 4.9342149218191694e-08, + "loss": 11.6009, + "step": 479860 + }, + { + "epoch": 0.9693677605982619, + "grad_norm": 240.5481719970703, + "learning_rate": 4.9293242877850866e-08, + "loss": 14.4416, + "step": 479870 + }, + { + "epoch": 0.9693879612309457, + "grad_norm": 169.4164276123047, + "learning_rate": 4.9244360666653724e-08, + "loss": 37.0584, + "step": 479880 + }, + { + "epoch": 0.9694081618636295, + "grad_norm": 467.1522216796875, + "learning_rate": 4.9195502584839516e-08, + "loss": 30.0674, + "step": 479890 + }, + { + "epoch": 0.9694283624963134, + "grad_norm": 606.3841552734375, + "learning_rate": 4.914666863264528e-08, + "loss": 13.857, + "step": 479900 + }, + { + "epoch": 0.9694485631289972, + "grad_norm": 677.8084106445312, + "learning_rate": 4.9097858810310815e-08, + "loss": 20.1473, + "step": 479910 + }, + { + "epoch": 0.969468763761681, + "grad_norm": 207.4750518798828, + "learning_rate": 4.9049073118072057e-08, + "loss": 24.7366, + "step": 479920 + }, + { + "epoch": 0.9694889643943648, + "grad_norm": 444.7561950683594, + "learning_rate": 4.900031155616769e-08, + "loss": 21.5432, + "step": 479930 + }, + { + "epoch": 0.9695091650270486, + "grad_norm": 69.67774963378906, + "learning_rate": 4.8951574124835865e-08, + "loss": 16.5166, + "step": 479940 + }, + { + "epoch": 0.9695293656597325, + "grad_norm": 270.2462463378906, + "learning_rate": 4.890286082431306e-08, + "loss": 32.815, + "step": 479950 + }, + { + "epoch": 0.9695495662924163, + "grad_norm": 338.75665283203125, + "learning_rate": 4.885417165483741e-08, + "loss": 17.1081, + "step": 479960 + }, + { + "epoch": 0.9695697669251001, + "grad_norm": 434.7626037597656, + "learning_rate": 4.880550661664541e-08, + "loss": 21.953, + "step": 479970 + }, + { + "epoch": 0.9695899675577839, + "grad_norm": 429.6672058105469, + "learning_rate": 4.8756865709976284e-08, + "loss": 15.7475, + "step": 479980 + }, + { + "epoch": 0.9696101681904677, + "grad_norm": 552.9013061523438, + "learning_rate": 4.8708248935064315e-08, + "loss": 23.5726, + "step": 479990 + }, + { + "epoch": 0.9696303688231516, + "grad_norm": 270.3870849609375, + "learning_rate": 4.865965629214819e-08, + "loss": 14.1504, + "step": 480000 + }, + { + "epoch": 0.9696505694558354, + "grad_norm": 93.67717742919922, + "learning_rate": 4.861108778146495e-08, + "loss": 12.9809, + "step": 480010 + }, + { + "epoch": 0.9696707700885192, + "grad_norm": 1092.2213134765625, + "learning_rate": 4.856254340325051e-08, + "loss": 17.7695, + "step": 480020 + }, + { + "epoch": 0.969690970721203, + "grad_norm": 579.4368896484375, + "learning_rate": 4.851402315774134e-08, + "loss": 18.8982, + "step": 480030 + }, + { + "epoch": 0.9697111713538868, + "grad_norm": 327.2518005371094, + "learning_rate": 4.846552704517449e-08, + "loss": 19.1692, + "step": 480040 + }, + { + "epoch": 0.9697313719865707, + "grad_norm": 140.6658172607422, + "learning_rate": 4.841705506578587e-08, + "loss": 10.5863, + "step": 480050 + }, + { + "epoch": 0.9697515726192545, + "grad_norm": 232.82550048828125, + "learning_rate": 4.836860721981196e-08, + "loss": 16.9834, + "step": 480060 + }, + { + "epoch": 0.9697717732519382, + "grad_norm": 67.43278503417969, + "learning_rate": 4.8320183507489236e-08, + "loss": 12.7466, + "step": 480070 + }, + { + "epoch": 0.969791973884622, + "grad_norm": 478.03912353515625, + "learning_rate": 4.827178392905307e-08, + "loss": 18.2566, + "step": 480080 + }, + { + "epoch": 0.9698121745173058, + "grad_norm": 125.35701751708984, + "learning_rate": 4.822340848473994e-08, + "loss": 25.7218, + "step": 480090 + }, + { + "epoch": 0.9698323751499897, + "grad_norm": 123.36390686035156, + "learning_rate": 4.8175057174785766e-08, + "loss": 14.2658, + "step": 480100 + }, + { + "epoch": 0.9698525757826735, + "grad_norm": 257.4310607910156, + "learning_rate": 4.81267299994248e-08, + "loss": 7.4554, + "step": 480110 + }, + { + "epoch": 0.9698727764153573, + "grad_norm": 543.759033203125, + "learning_rate": 4.807842695889409e-08, + "loss": 22.3655, + "step": 480120 + }, + { + "epoch": 0.9698929770480411, + "grad_norm": 426.5550842285156, + "learning_rate": 4.8030148053428424e-08, + "loss": 17.6665, + "step": 480130 + }, + { + "epoch": 0.9699131776807249, + "grad_norm": 199.07061767578125, + "learning_rate": 4.798189328326319e-08, + "loss": 17.6431, + "step": 480140 + }, + { + "epoch": 0.9699333783134088, + "grad_norm": 161.0851287841797, + "learning_rate": 4.793366264863375e-08, + "loss": 16.4152, + "step": 480150 + }, + { + "epoch": 0.9699535789460926, + "grad_norm": 404.6234436035156, + "learning_rate": 4.788545614977491e-08, + "loss": 16.7375, + "step": 480160 + }, + { + "epoch": 0.9699737795787764, + "grad_norm": 1150.251708984375, + "learning_rate": 4.783727378692205e-08, + "loss": 21.3241, + "step": 480170 + }, + { + "epoch": 0.9699939802114602, + "grad_norm": 497.9523620605469, + "learning_rate": 4.778911556030885e-08, + "loss": 15.1136, + "step": 480180 + }, + { + "epoch": 0.970014180844144, + "grad_norm": 442.69622802734375, + "learning_rate": 4.774098147017181e-08, + "loss": 30.2108, + "step": 480190 + }, + { + "epoch": 0.9700343814768279, + "grad_norm": 169.4849090576172, + "learning_rate": 4.769287151674407e-08, + "loss": 34.2275, + "step": 480200 + }, + { + "epoch": 0.9700545821095117, + "grad_norm": 871.2241821289062, + "learning_rate": 4.764478570026043e-08, + "loss": 19.6836, + "step": 480210 + }, + { + "epoch": 0.9700747827421955, + "grad_norm": 239.61862182617188, + "learning_rate": 4.759672402095572e-08, + "loss": 15.2729, + "step": 480220 + }, + { + "epoch": 0.9700949833748793, + "grad_norm": 132.29234313964844, + "learning_rate": 4.754868647906419e-08, + "loss": 20.5158, + "step": 480230 + }, + { + "epoch": 0.9701151840075631, + "grad_norm": 95.70121765136719, + "learning_rate": 4.750067307481954e-08, + "loss": 14.4823, + "step": 480240 + }, + { + "epoch": 0.970135384640247, + "grad_norm": 423.11871337890625, + "learning_rate": 4.7452683808456026e-08, + "loss": 15.589, + "step": 480250 + }, + { + "epoch": 0.9701555852729308, + "grad_norm": 675.8088989257812, + "learning_rate": 4.740471868020735e-08, + "loss": 16.2484, + "step": 480260 + }, + { + "epoch": 0.9701757859056146, + "grad_norm": 734.3201904296875, + "learning_rate": 4.735677769030722e-08, + "loss": 32.3499, + "step": 480270 + }, + { + "epoch": 0.9701959865382984, + "grad_norm": 365.7787780761719, + "learning_rate": 4.730886083898989e-08, + "loss": 22.4048, + "step": 480280 + }, + { + "epoch": 0.9702161871709822, + "grad_norm": 247.57118225097656, + "learning_rate": 4.726096812648795e-08, + "loss": 19.5636, + "step": 480290 + }, + { + "epoch": 0.9702363878036661, + "grad_norm": 520.8023071289062, + "learning_rate": 4.7213099553035655e-08, + "loss": 13.0253, + "step": 480300 + }, + { + "epoch": 0.9702565884363499, + "grad_norm": 343.294189453125, + "learning_rate": 4.716525511886616e-08, + "loss": 29.1592, + "step": 480310 + }, + { + "epoch": 0.9702767890690337, + "grad_norm": 219.90328979492188, + "learning_rate": 4.711743482421205e-08, + "loss": 22.095, + "step": 480320 + }, + { + "epoch": 0.9702969897017174, + "grad_norm": 176.9662628173828, + "learning_rate": 4.7069638669307026e-08, + "loss": 16.2117, + "step": 480330 + }, + { + "epoch": 0.9703171903344012, + "grad_norm": 482.5318603515625, + "learning_rate": 4.702186665438424e-08, + "loss": 15.1355, + "step": 480340 + }, + { + "epoch": 0.970337390967085, + "grad_norm": 220.77462768554688, + "learning_rate": 4.697411877967573e-08, + "loss": 22.2234, + "step": 480350 + }, + { + "epoch": 0.9703575915997689, + "grad_norm": 50.13962173461914, + "learning_rate": 4.692639504541518e-08, + "loss": 7.2214, + "step": 480360 + }, + { + "epoch": 0.9703777922324527, + "grad_norm": 331.6729736328125, + "learning_rate": 4.68786954518341e-08, + "loss": 11.8504, + "step": 480370 + }, + { + "epoch": 0.9703979928651365, + "grad_norm": 470.2253112792969, + "learning_rate": 4.683101999916562e-08, + "loss": 6.9637, + "step": 480380 + }, + { + "epoch": 0.9704181934978203, + "grad_norm": 112.66553497314453, + "learning_rate": 4.6783368687642325e-08, + "loss": 11.6855, + "step": 480390 + }, + { + "epoch": 0.9704383941305041, + "grad_norm": 532.448486328125, + "learning_rate": 4.6735741517495715e-08, + "loss": 25.7508, + "step": 480400 + }, + { + "epoch": 0.970458594763188, + "grad_norm": 284.78729248046875, + "learning_rate": 4.668813848895837e-08, + "loss": 11.7805, + "step": 480410 + }, + { + "epoch": 0.9704787953958718, + "grad_norm": 195.6524200439453, + "learning_rate": 4.6640559602262325e-08, + "loss": 14.5171, + "step": 480420 + }, + { + "epoch": 0.9704989960285556, + "grad_norm": 151.35105895996094, + "learning_rate": 4.6593004857639627e-08, + "loss": 7.1785, + "step": 480430 + }, + { + "epoch": 0.9705191966612394, + "grad_norm": 350.37542724609375, + "learning_rate": 4.654547425532119e-08, + "loss": 13.3357, + "step": 480440 + }, + { + "epoch": 0.9705393972939232, + "grad_norm": 365.9966125488281, + "learning_rate": 4.649796779554016e-08, + "loss": 15.9065, + "step": 480450 + }, + { + "epoch": 0.9705595979266071, + "grad_norm": 213.68614196777344, + "learning_rate": 4.645048547852693e-08, + "loss": 17.5773, + "step": 480460 + }, + { + "epoch": 0.9705797985592909, + "grad_norm": 346.04425048828125, + "learning_rate": 4.6403027304513513e-08, + "loss": 9.6109, + "step": 480470 + }, + { + "epoch": 0.9705999991919747, + "grad_norm": 422.04339599609375, + "learning_rate": 4.635559327373029e-08, + "loss": 14.3172, + "step": 480480 + }, + { + "epoch": 0.9706201998246585, + "grad_norm": 148.32606506347656, + "learning_rate": 4.6308183386409855e-08, + "loss": 23.7274, + "step": 480490 + }, + { + "epoch": 0.9706404004573423, + "grad_norm": 87.15348815917969, + "learning_rate": 4.626079764278202e-08, + "loss": 15.3824, + "step": 480500 + }, + { + "epoch": 0.9706606010900262, + "grad_norm": 780.4072875976562, + "learning_rate": 4.621343604307826e-08, + "loss": 32.2571, + "step": 480510 + }, + { + "epoch": 0.97068080172271, + "grad_norm": 604.56298828125, + "learning_rate": 4.616609858753007e-08, + "loss": 16.4787, + "step": 480520 + }, + { + "epoch": 0.9707010023553938, + "grad_norm": 308.4506530761719, + "learning_rate": 4.6118785276366706e-08, + "loss": 19.7474, + "step": 480530 + }, + { + "epoch": 0.9707212029880776, + "grad_norm": 34.33176040649414, + "learning_rate": 4.6071496109819643e-08, + "loss": 16.4853, + "step": 480540 + }, + { + "epoch": 0.9707414036207614, + "grad_norm": 516.2884521484375, + "learning_rate": 4.6024231088119266e-08, + "loss": 20.3538, + "step": 480550 + }, + { + "epoch": 0.9707616042534453, + "grad_norm": 333.0625305175781, + "learning_rate": 4.597699021149649e-08, + "loss": 19.302, + "step": 480560 + }, + { + "epoch": 0.9707818048861291, + "grad_norm": 81.12806701660156, + "learning_rate": 4.592977348018002e-08, + "loss": 14.8552, + "step": 480570 + }, + { + "epoch": 0.9708020055188128, + "grad_norm": 467.4275817871094, + "learning_rate": 4.588258089440134e-08, + "loss": 11.0226, + "step": 480580 + }, + { + "epoch": 0.9708222061514966, + "grad_norm": 438.2637023925781, + "learning_rate": 4.5835412454390823e-08, + "loss": 16.0897, + "step": 480590 + }, + { + "epoch": 0.9708424067841804, + "grad_norm": 253.96768188476562, + "learning_rate": 4.578826816037718e-08, + "loss": 27.9838, + "step": 480600 + }, + { + "epoch": 0.9708626074168643, + "grad_norm": 300.85296630859375, + "learning_rate": 4.574114801259022e-08, + "loss": 13.6882, + "step": 480610 + }, + { + "epoch": 0.9708828080495481, + "grad_norm": 462.7916259765625, + "learning_rate": 4.569405201126087e-08, + "loss": 32.8059, + "step": 480620 + }, + { + "epoch": 0.9709030086822319, + "grad_norm": 599.6968383789062, + "learning_rate": 4.5646980156617284e-08, + "loss": 14.333, + "step": 480630 + }, + { + "epoch": 0.9709232093149157, + "grad_norm": 571.0783081054688, + "learning_rate": 4.5599932448889276e-08, + "loss": 27.9993, + "step": 480640 + }, + { + "epoch": 0.9709434099475995, + "grad_norm": 412.3912658691406, + "learning_rate": 4.5552908888306654e-08, + "loss": 13.605, + "step": 480650 + }, + { + "epoch": 0.9709636105802834, + "grad_norm": 659.2654418945312, + "learning_rate": 4.5505909475098144e-08, + "loss": 21.4791, + "step": 480660 + }, + { + "epoch": 0.9709838112129672, + "grad_norm": 343.37347412109375, + "learning_rate": 4.545893420949299e-08, + "loss": 17.2477, + "step": 480670 + }, + { + "epoch": 0.971004011845651, + "grad_norm": 263.5906982421875, + "learning_rate": 4.5411983091719905e-08, + "loss": 10.5555, + "step": 480680 + }, + { + "epoch": 0.9710242124783348, + "grad_norm": 128.11795043945312, + "learning_rate": 4.5365056122007586e-08, + "loss": 23.7357, + "step": 480690 + }, + { + "epoch": 0.9710444131110186, + "grad_norm": 292.9198913574219, + "learning_rate": 4.531815330058586e-08, + "loss": 20.5026, + "step": 480700 + }, + { + "epoch": 0.9710646137437025, + "grad_norm": 434.9056091308594, + "learning_rate": 4.527127462768233e-08, + "loss": 17.0049, + "step": 480710 + }, + { + "epoch": 0.9710848143763863, + "grad_norm": 501.1333923339844, + "learning_rate": 4.5224420103525125e-08, + "loss": 18.9389, + "step": 480720 + }, + { + "epoch": 0.9711050150090701, + "grad_norm": 255.53147888183594, + "learning_rate": 4.517758972834352e-08, + "loss": 18.6333, + "step": 480730 + }, + { + "epoch": 0.9711252156417539, + "grad_norm": 63.87846755981445, + "learning_rate": 4.5130783502365106e-08, + "loss": 12.9465, + "step": 480740 + }, + { + "epoch": 0.9711454162744377, + "grad_norm": 277.8436279296875, + "learning_rate": 4.508400142581859e-08, + "loss": 15.9341, + "step": 480750 + }, + { + "epoch": 0.9711656169071216, + "grad_norm": 372.5913391113281, + "learning_rate": 4.503724349893157e-08, + "loss": 22.5148, + "step": 480760 + }, + { + "epoch": 0.9711858175398054, + "grad_norm": 550.0427856445312, + "learning_rate": 4.49905097219322e-08, + "loss": 22.9235, + "step": 480770 + }, + { + "epoch": 0.9712060181724892, + "grad_norm": 700.8040161132812, + "learning_rate": 4.4943800095048615e-08, + "loss": 19.4991, + "step": 480780 + }, + { + "epoch": 0.971226218805173, + "grad_norm": 490.8512878417969, + "learning_rate": 4.4897114618506765e-08, + "loss": 19.7064, + "step": 480790 + }, + { + "epoch": 0.9712464194378568, + "grad_norm": 342.76123046875, + "learning_rate": 4.485045329253646e-08, + "loss": 17.5235, + "step": 480800 + }, + { + "epoch": 0.9712666200705407, + "grad_norm": 430.11834716796875, + "learning_rate": 4.480381611736362e-08, + "loss": 14.5157, + "step": 480810 + }, + { + "epoch": 0.9712868207032245, + "grad_norm": 151.7615966796875, + "learning_rate": 4.4757203093215854e-08, + "loss": 17.9508, + "step": 480820 + }, + { + "epoch": 0.9713070213359083, + "grad_norm": 129.25218200683594, + "learning_rate": 4.4710614220320746e-08, + "loss": 9.7335, + "step": 480830 + }, + { + "epoch": 0.971327221968592, + "grad_norm": 260.68408203125, + "learning_rate": 4.4664049498904796e-08, + "loss": 12.1655, + "step": 480840 + }, + { + "epoch": 0.9713474226012758, + "grad_norm": 498.87469482421875, + "learning_rate": 4.4617508929195585e-08, + "loss": 14.879, + "step": 480850 + }, + { + "epoch": 0.9713676232339596, + "grad_norm": 176.5625762939453, + "learning_rate": 4.457099251141961e-08, + "loss": 7.3391, + "step": 480860 + }, + { + "epoch": 0.9713878238666435, + "grad_norm": 308.27056884765625, + "learning_rate": 4.4524500245803346e-08, + "loss": 15.3614, + "step": 480870 + }, + { + "epoch": 0.9714080244993273, + "grad_norm": 322.5669250488281, + "learning_rate": 4.4478032132573845e-08, + "loss": 18.5765, + "step": 480880 + }, + { + "epoch": 0.9714282251320111, + "grad_norm": 428.40423583984375, + "learning_rate": 4.443158817195703e-08, + "loss": 37.4654, + "step": 480890 + }, + { + "epoch": 0.9714484257646949, + "grad_norm": 14.182168006896973, + "learning_rate": 4.438516836417994e-08, + "loss": 19.9014, + "step": 480900 + }, + { + "epoch": 0.9714686263973787, + "grad_norm": 493.1097717285156, + "learning_rate": 4.4338772709468514e-08, + "loss": 14.3817, + "step": 480910 + }, + { + "epoch": 0.9714888270300626, + "grad_norm": 235.4907684326172, + "learning_rate": 4.429240120804923e-08, + "loss": 31.6696, + "step": 480920 + }, + { + "epoch": 0.9715090276627464, + "grad_norm": 473.29937744140625, + "learning_rate": 4.424605386014691e-08, + "loss": 26.5514, + "step": 480930 + }, + { + "epoch": 0.9715292282954302, + "grad_norm": 496.5303955078125, + "learning_rate": 4.4199730665988594e-08, + "loss": 15.9515, + "step": 480940 + }, + { + "epoch": 0.971549428928114, + "grad_norm": 189.34280395507812, + "learning_rate": 4.415343162580022e-08, + "loss": 11.0705, + "step": 480950 + }, + { + "epoch": 0.9715696295607978, + "grad_norm": 182.630126953125, + "learning_rate": 4.4107156739806037e-08, + "loss": 17.192, + "step": 480960 + }, + { + "epoch": 0.9715898301934817, + "grad_norm": 19.358842849731445, + "learning_rate": 4.40609060082331e-08, + "loss": 11.0745, + "step": 480970 + }, + { + "epoch": 0.9716100308261655, + "grad_norm": 249.55409240722656, + "learning_rate": 4.401467943130622e-08, + "loss": 13.5404, + "step": 480980 + }, + { + "epoch": 0.9716302314588493, + "grad_norm": 409.1258544921875, + "learning_rate": 4.3968477009250775e-08, + "loss": 19.5579, + "step": 480990 + }, + { + "epoch": 0.9716504320915331, + "grad_norm": 214.1861114501953, + "learning_rate": 4.392229874229159e-08, + "loss": 17.2364, + "step": 481000 + }, + { + "epoch": 0.9716706327242169, + "grad_norm": 208.24266052246094, + "learning_rate": 4.387614463065404e-08, + "loss": 26.5228, + "step": 481010 + }, + { + "epoch": 0.9716908333569008, + "grad_norm": 916.2860107421875, + "learning_rate": 4.383001467456294e-08, + "loss": 21.4474, + "step": 481020 + }, + { + "epoch": 0.9717110339895846, + "grad_norm": 1262.3741455078125, + "learning_rate": 4.378390887424366e-08, + "loss": 20.0141, + "step": 481030 + }, + { + "epoch": 0.9717312346222684, + "grad_norm": 484.7181701660156, + "learning_rate": 4.3737827229919926e-08, + "loss": 24.9658, + "step": 481040 + }, + { + "epoch": 0.9717514352549522, + "grad_norm": 199.1392822265625, + "learning_rate": 4.36917697418171e-08, + "loss": 13.892, + "step": 481050 + }, + { + "epoch": 0.971771635887636, + "grad_norm": 56.355411529541016, + "learning_rate": 4.364573641016001e-08, + "loss": 16.0134, + "step": 481060 + }, + { + "epoch": 0.9717918365203199, + "grad_norm": 294.04638671875, + "learning_rate": 4.359972723517236e-08, + "loss": 25.3006, + "step": 481070 + }, + { + "epoch": 0.9718120371530037, + "grad_norm": 230.2291259765625, + "learning_rate": 4.3553742217077866e-08, + "loss": 15.2554, + "step": 481080 + }, + { + "epoch": 0.9718322377856875, + "grad_norm": 100.92296600341797, + "learning_rate": 4.350778135610134e-08, + "loss": 11.8245, + "step": 481090 + }, + { + "epoch": 0.9718524384183712, + "grad_norm": 3.7475879192352295, + "learning_rate": 4.346184465246761e-08, + "loss": 15.6748, + "step": 481100 + }, + { + "epoch": 0.971872639051055, + "grad_norm": 409.6090087890625, + "learning_rate": 4.3415932106398715e-08, + "loss": 20.7157, + "step": 481110 + }, + { + "epoch": 0.9718928396837389, + "grad_norm": 396.5038757324219, + "learning_rate": 4.3370043718119484e-08, + "loss": 14.2519, + "step": 481120 + }, + { + "epoch": 0.9719130403164227, + "grad_norm": 455.0494689941406, + "learning_rate": 4.332417948785417e-08, + "loss": 20.1041, + "step": 481130 + }, + { + "epoch": 0.9719332409491065, + "grad_norm": 298.5553283691406, + "learning_rate": 4.327833941582538e-08, + "loss": 19.1641, + "step": 481140 + }, + { + "epoch": 0.9719534415817903, + "grad_norm": 259.3094177246094, + "learning_rate": 4.3232523502256264e-08, + "loss": 30.4921, + "step": 481150 + }, + { + "epoch": 0.9719736422144741, + "grad_norm": 36.026588439941406, + "learning_rate": 4.318673174737109e-08, + "loss": 8.7467, + "step": 481160 + }, + { + "epoch": 0.971993842847158, + "grad_norm": 358.4710693359375, + "learning_rate": 4.3140964151393015e-08, + "loss": 9.7279, + "step": 481170 + }, + { + "epoch": 0.9720140434798418, + "grad_norm": 439.7259216308594, + "learning_rate": 4.3095220714544084e-08, + "loss": 13.9889, + "step": 481180 + }, + { + "epoch": 0.9720342441125256, + "grad_norm": 202.249755859375, + "learning_rate": 4.304950143704745e-08, + "loss": 32.0029, + "step": 481190 + }, + { + "epoch": 0.9720544447452094, + "grad_norm": 282.92864990234375, + "learning_rate": 4.3003806319127376e-08, + "loss": 13.5526, + "step": 481200 + }, + { + "epoch": 0.9720746453778932, + "grad_norm": 222.46804809570312, + "learning_rate": 4.2958135361004794e-08, + "loss": 14.219, + "step": 481210 + }, + { + "epoch": 0.972094846010577, + "grad_norm": 247.01966857910156, + "learning_rate": 4.291248856290342e-08, + "loss": 22.9882, + "step": 481220 + }, + { + "epoch": 0.9721150466432609, + "grad_norm": 363.4715576171875, + "learning_rate": 4.28668659250453e-08, + "loss": 19.1361, + "step": 481230 + }, + { + "epoch": 0.9721352472759447, + "grad_norm": 279.6907043457031, + "learning_rate": 4.282126744765247e-08, + "loss": 18.316, + "step": 481240 + }, + { + "epoch": 0.9721554479086285, + "grad_norm": 236.50823974609375, + "learning_rate": 4.2775693130948094e-08, + "loss": 33.653, + "step": 481250 + }, + { + "epoch": 0.9721756485413123, + "grad_norm": 0.12165041267871857, + "learning_rate": 4.2730142975153654e-08, + "loss": 23.2239, + "step": 481260 + }, + { + "epoch": 0.9721958491739962, + "grad_norm": 269.12451171875, + "learning_rate": 4.26846169804912e-08, + "loss": 7.2478, + "step": 481270 + }, + { + "epoch": 0.97221604980668, + "grad_norm": 169.82176208496094, + "learning_rate": 4.263911514718222e-08, + "loss": 23.8434, + "step": 481280 + }, + { + "epoch": 0.9722362504393638, + "grad_norm": 1032.0400390625, + "learning_rate": 4.259363747544931e-08, + "loss": 26.5229, + "step": 481290 + }, + { + "epoch": 0.9722564510720476, + "grad_norm": 326.4361267089844, + "learning_rate": 4.2548183965513415e-08, + "loss": 19.0609, + "step": 481300 + }, + { + "epoch": 0.9722766517047314, + "grad_norm": 200.75393676757812, + "learning_rate": 4.250275461759712e-08, + "loss": 22.7418, + "step": 481310 + }, + { + "epoch": 0.9722968523374153, + "grad_norm": 357.01751708984375, + "learning_rate": 4.245734943192081e-08, + "loss": 15.4631, + "step": 481320 + }, + { + "epoch": 0.9723170529700991, + "grad_norm": 334.8346862792969, + "learning_rate": 4.241196840870598e-08, + "loss": 16.0759, + "step": 481330 + }, + { + "epoch": 0.9723372536027829, + "grad_norm": 252.5950469970703, + "learning_rate": 4.236661154817412e-08, + "loss": 5.5812, + "step": 481340 + }, + { + "epoch": 0.9723574542354666, + "grad_norm": 310.70068359375, + "learning_rate": 4.23212788505456e-08, + "loss": 17.8986, + "step": 481350 + }, + { + "epoch": 0.9723776548681504, + "grad_norm": 362.728515625, + "learning_rate": 4.227597031604247e-08, + "loss": 13.8651, + "step": 481360 + }, + { + "epoch": 0.9723978555008342, + "grad_norm": 718.6262817382812, + "learning_rate": 4.2230685944884554e-08, + "loss": 31.8578, + "step": 481370 + }, + { + "epoch": 0.9724180561335181, + "grad_norm": 399.081787109375, + "learning_rate": 4.218542573729334e-08, + "loss": 20.2966, + "step": 481380 + }, + { + "epoch": 0.9724382567662019, + "grad_norm": 261.89031982421875, + "learning_rate": 4.2140189693488654e-08, + "loss": 30.2808, + "step": 481390 + }, + { + "epoch": 0.9724584573988857, + "grad_norm": 309.1327209472656, + "learning_rate": 4.209497781369143e-08, + "loss": 7.3983, + "step": 481400 + }, + { + "epoch": 0.9724786580315695, + "grad_norm": 325.05633544921875, + "learning_rate": 4.20497900981226e-08, + "loss": 17.7839, + "step": 481410 + }, + { + "epoch": 0.9724988586642533, + "grad_norm": 60.344329833984375, + "learning_rate": 4.2004626547000885e-08, + "loss": 11.7354, + "step": 481420 + }, + { + "epoch": 0.9725190592969372, + "grad_norm": 437.69622802734375, + "learning_rate": 4.195948716054776e-08, + "loss": 23.6131, + "step": 481430 + }, + { + "epoch": 0.972539259929621, + "grad_norm": 432.4437561035156, + "learning_rate": 4.191437193898251e-08, + "loss": 26.2536, + "step": 481440 + }, + { + "epoch": 0.9725594605623048, + "grad_norm": 393.9098205566406, + "learning_rate": 4.1869280882525506e-08, + "loss": 33.5293, + "step": 481450 + }, + { + "epoch": 0.9725796611949886, + "grad_norm": 126.11160278320312, + "learning_rate": 4.1824213991396024e-08, + "loss": 13.8136, + "step": 481460 + }, + { + "epoch": 0.9725998618276724, + "grad_norm": 60.79416275024414, + "learning_rate": 4.1779171265814435e-08, + "loss": 10.5208, + "step": 481470 + }, + { + "epoch": 0.9726200624603563, + "grad_norm": 189.42037963867188, + "learning_rate": 4.173415270599945e-08, + "loss": 21.2066, + "step": 481480 + }, + { + "epoch": 0.9726402630930401, + "grad_norm": 373.3421630859375, + "learning_rate": 4.168915831217091e-08, + "loss": 14.9442, + "step": 481490 + }, + { + "epoch": 0.9726604637257239, + "grad_norm": 282.920166015625, + "learning_rate": 4.164418808454806e-08, + "loss": 16.3627, + "step": 481500 + }, + { + "epoch": 0.9726806643584077, + "grad_norm": 251.2848358154297, + "learning_rate": 4.159924202334964e-08, + "loss": 21.8468, + "step": 481510 + }, + { + "epoch": 0.9727008649910915, + "grad_norm": 81.56550598144531, + "learning_rate": 4.1554320128795455e-08, + "loss": 13.4286, + "step": 481520 + }, + { + "epoch": 0.9727210656237754, + "grad_norm": 390.93798828125, + "learning_rate": 4.150942240110478e-08, + "loss": 13.1788, + "step": 481530 + }, + { + "epoch": 0.9727412662564592, + "grad_norm": 456.1376953125, + "learning_rate": 4.146454884049467e-08, + "loss": 22.1921, + "step": 481540 + }, + { + "epoch": 0.972761466889143, + "grad_norm": 565.8245849609375, + "learning_rate": 4.1419699447186045e-08, + "loss": 55.9171, + "step": 481550 + }, + { + "epoch": 0.9727816675218268, + "grad_norm": 669.5379028320312, + "learning_rate": 4.137487422139541e-08, + "loss": 24.39, + "step": 481560 + }, + { + "epoch": 0.9728018681545106, + "grad_norm": 78.6161117553711, + "learning_rate": 4.133007316334259e-08, + "loss": 13.201, + "step": 481570 + }, + { + "epoch": 0.9728220687871945, + "grad_norm": 78.4344711303711, + "learning_rate": 4.128529627324573e-08, + "loss": 20.3774, + "step": 481580 + }, + { + "epoch": 0.9728422694198783, + "grad_norm": 380.95587158203125, + "learning_rate": 4.124054355132301e-08, + "loss": 12.9402, + "step": 481590 + }, + { + "epoch": 0.9728624700525621, + "grad_norm": 485.6889343261719, + "learning_rate": 4.1195814997792014e-08, + "loss": 11.6911, + "step": 481600 + }, + { + "epoch": 0.9728826706852458, + "grad_norm": 471.99139404296875, + "learning_rate": 4.1151110612872023e-08, + "loss": 18.3957, + "step": 481610 + }, + { + "epoch": 0.9729028713179296, + "grad_norm": 628.6807861328125, + "learning_rate": 4.1106430396778974e-08, + "loss": 30.9745, + "step": 481620 + }, + { + "epoch": 0.9729230719506134, + "grad_norm": 203.78421020507812, + "learning_rate": 4.1061774349732686e-08, + "loss": 12.6072, + "step": 481630 + }, + { + "epoch": 0.9729432725832973, + "grad_norm": 325.5924987792969, + "learning_rate": 4.10171424719491e-08, + "loss": 25.4391, + "step": 481640 + }, + { + "epoch": 0.9729634732159811, + "grad_norm": 293.34912109375, + "learning_rate": 4.097253476364693e-08, + "loss": 35.9362, + "step": 481650 + }, + { + "epoch": 0.9729836738486649, + "grad_norm": 170.9857177734375, + "learning_rate": 4.092795122504323e-08, + "loss": 17.5913, + "step": 481660 + }, + { + "epoch": 0.9730038744813487, + "grad_norm": 682.1256103515625, + "learning_rate": 4.088339185635504e-08, + "loss": 13.5609, + "step": 481670 + }, + { + "epoch": 0.9730240751140325, + "grad_norm": 350.8077087402344, + "learning_rate": 4.083885665779996e-08, + "loss": 23.3654, + "step": 481680 + }, + { + "epoch": 0.9730442757467164, + "grad_norm": 403.890625, + "learning_rate": 4.07943456295945e-08, + "loss": 25.1687, + "step": 481690 + }, + { + "epoch": 0.9730644763794002, + "grad_norm": 0.18735237419605255, + "learning_rate": 4.0749858771956253e-08, + "loss": 10.6021, + "step": 481700 + }, + { + "epoch": 0.973084677012084, + "grad_norm": 451.27435302734375, + "learning_rate": 4.070539608510171e-08, + "loss": 25.175, + "step": 481710 + }, + { + "epoch": 0.9731048776447678, + "grad_norm": 744.0653686523438, + "learning_rate": 4.066095756924682e-08, + "loss": 34.801, + "step": 481720 + }, + { + "epoch": 0.9731250782774516, + "grad_norm": 14.12344741821289, + "learning_rate": 4.061654322460973e-08, + "loss": 19.8248, + "step": 481730 + }, + { + "epoch": 0.9731452789101355, + "grad_norm": 167.87937927246094, + "learning_rate": 4.0572153051406383e-08, + "loss": 15.7398, + "step": 481740 + }, + { + "epoch": 0.9731654795428193, + "grad_norm": 193.6814727783203, + "learning_rate": 4.052778704985216e-08, + "loss": 25.9701, + "step": 481750 + }, + { + "epoch": 0.9731856801755031, + "grad_norm": 437.42242431640625, + "learning_rate": 4.048344522016356e-08, + "loss": 21.1084, + "step": 481760 + }, + { + "epoch": 0.9732058808081869, + "grad_norm": 628.2918090820312, + "learning_rate": 4.043912756255819e-08, + "loss": 19.3399, + "step": 481770 + }, + { + "epoch": 0.9732260814408707, + "grad_norm": 197.848876953125, + "learning_rate": 4.039483407725031e-08, + "loss": 17.7387, + "step": 481780 + }, + { + "epoch": 0.9732462820735546, + "grad_norm": 301.0892333984375, + "learning_rate": 4.035056476445698e-08, + "loss": 24.283, + "step": 481790 + }, + { + "epoch": 0.9732664827062384, + "grad_norm": 535.1521606445312, + "learning_rate": 4.030631962439302e-08, + "loss": 13.416, + "step": 481800 + }, + { + "epoch": 0.9732866833389222, + "grad_norm": 199.53504943847656, + "learning_rate": 4.026209865727493e-08, + "loss": 22.0506, + "step": 481810 + }, + { + "epoch": 0.973306883971606, + "grad_norm": 77.13323974609375, + "learning_rate": 4.0217901863317534e-08, + "loss": 12.3341, + "step": 481820 + }, + { + "epoch": 0.9733270846042898, + "grad_norm": 553.6863403320312, + "learning_rate": 4.017372924273621e-08, + "loss": 25.0607, + "step": 481830 + }, + { + "epoch": 0.9733472852369737, + "grad_norm": 112.62586975097656, + "learning_rate": 4.012958079574747e-08, + "loss": 17.1677, + "step": 481840 + }, + { + "epoch": 0.9733674858696575, + "grad_norm": 160.67294311523438, + "learning_rate": 4.008545652256502e-08, + "loss": 16.0821, + "step": 481850 + }, + { + "epoch": 0.9733876865023412, + "grad_norm": 88.95194244384766, + "learning_rate": 4.004135642340423e-08, + "loss": 14.0948, + "step": 481860 + }, + { + "epoch": 0.973407887135025, + "grad_norm": 199.16061401367188, + "learning_rate": 3.999728049848106e-08, + "loss": 12.0725, + "step": 481870 + }, + { + "epoch": 0.9734280877677088, + "grad_norm": 470.1341247558594, + "learning_rate": 3.995322874800922e-08, + "loss": 26.116, + "step": 481880 + }, + { + "epoch": 0.9734482884003927, + "grad_norm": 118.9264907836914, + "learning_rate": 3.9909201172203537e-08, + "loss": 18.3537, + "step": 481890 + }, + { + "epoch": 0.9734684890330765, + "grad_norm": 12.092803001403809, + "learning_rate": 3.986519777127884e-08, + "loss": 10.6664, + "step": 481900 + }, + { + "epoch": 0.9734886896657603, + "grad_norm": 42.88130187988281, + "learning_rate": 3.9821218545449956e-08, + "loss": 9.6017, + "step": 481910 + }, + { + "epoch": 0.9735088902984441, + "grad_norm": 111.88728332519531, + "learning_rate": 3.977726349493061e-08, + "loss": 16.497, + "step": 481920 + }, + { + "epoch": 0.9735290909311279, + "grad_norm": 265.8011779785156, + "learning_rate": 3.973333261993506e-08, + "loss": 30.0345, + "step": 481930 + }, + { + "epoch": 0.9735492915638118, + "grad_norm": 7.396425724029541, + "learning_rate": 3.9689425920678146e-08, + "loss": 31.8613, + "step": 481940 + }, + { + "epoch": 0.9735694921964956, + "grad_norm": 209.86404418945312, + "learning_rate": 3.964554339737303e-08, + "loss": 39.5427, + "step": 481950 + }, + { + "epoch": 0.9735896928291794, + "grad_norm": 909.5701293945312, + "learning_rate": 3.960168505023343e-08, + "loss": 30.6708, + "step": 481960 + }, + { + "epoch": 0.9736098934618632, + "grad_norm": 885.4310302734375, + "learning_rate": 3.955785087947473e-08, + "loss": 17.3833, + "step": 481970 + }, + { + "epoch": 0.973630094094547, + "grad_norm": 640.5752563476562, + "learning_rate": 3.951404088530841e-08, + "loss": 25.2004, + "step": 481980 + }, + { + "epoch": 0.9736502947272309, + "grad_norm": 345.5538635253906, + "learning_rate": 3.947025506794933e-08, + "loss": 25.903, + "step": 481990 + }, + { + "epoch": 0.9736704953599147, + "grad_norm": 286.027099609375, + "learning_rate": 3.9426493427611177e-08, + "loss": 9.783, + "step": 482000 + }, + { + "epoch": 0.9736906959925985, + "grad_norm": 334.16693115234375, + "learning_rate": 3.938275596450603e-08, + "loss": 17.6792, + "step": 482010 + }, + { + "epoch": 0.9737108966252823, + "grad_norm": 750.4974975585938, + "learning_rate": 3.933904267884758e-08, + "loss": 16.873, + "step": 482020 + }, + { + "epoch": 0.9737310972579661, + "grad_norm": 354.44659423828125, + "learning_rate": 3.929535357084957e-08, + "loss": 33.9045, + "step": 482030 + }, + { + "epoch": 0.97375129789065, + "grad_norm": 163.46725463867188, + "learning_rate": 3.925168864072348e-08, + "loss": 12.3901, + "step": 482040 + }, + { + "epoch": 0.9737714985233338, + "grad_norm": 357.58935546875, + "learning_rate": 3.9208047888683597e-08, + "loss": 8.9686, + "step": 482050 + }, + { + "epoch": 0.9737916991560176, + "grad_norm": 256.0655517578125, + "learning_rate": 3.9164431314941965e-08, + "loss": 18.6118, + "step": 482060 + }, + { + "epoch": 0.9738118997887014, + "grad_norm": 475.1147155761719, + "learning_rate": 3.912083891971119e-08, + "loss": 14.304, + "step": 482070 + }, + { + "epoch": 0.9738321004213852, + "grad_norm": 621.0291137695312, + "learning_rate": 3.907727070320389e-08, + "loss": 17.7575, + "step": 482080 + }, + { + "epoch": 0.9738523010540691, + "grad_norm": 294.4523010253906, + "learning_rate": 3.9033726665632096e-08, + "loss": 11.2396, + "step": 482090 + }, + { + "epoch": 0.9738725016867529, + "grad_norm": 306.70318603515625, + "learning_rate": 3.899020680720844e-08, + "loss": 18.0931, + "step": 482100 + }, + { + "epoch": 0.9738927023194367, + "grad_norm": 857.6680297851562, + "learning_rate": 3.894671112814441e-08, + "loss": 12.2714, + "step": 482110 + }, + { + "epoch": 0.9739129029521204, + "grad_norm": 496.441162109375, + "learning_rate": 3.8903239628652615e-08, + "loss": 12.2289, + "step": 482120 + }, + { + "epoch": 0.9739331035848042, + "grad_norm": 143.51065063476562, + "learning_rate": 3.88597923089451e-08, + "loss": 20.1973, + "step": 482130 + }, + { + "epoch": 0.973953304217488, + "grad_norm": 1033.66796875, + "learning_rate": 3.881636916923281e-08, + "loss": 20.2224, + "step": 482140 + }, + { + "epoch": 0.9739735048501719, + "grad_norm": 485.4521179199219, + "learning_rate": 3.877297020972781e-08, + "loss": 24.38, + "step": 482150 + }, + { + "epoch": 0.9739937054828557, + "grad_norm": 167.51791381835938, + "learning_rate": 3.8729595430641586e-08, + "loss": 16.1247, + "step": 482160 + }, + { + "epoch": 0.9740139061155395, + "grad_norm": 431.7303771972656, + "learning_rate": 3.868624483218619e-08, + "loss": 22.341, + "step": 482170 + }, + { + "epoch": 0.9740341067482233, + "grad_norm": 76.6075439453125, + "learning_rate": 3.864291841457146e-08, + "loss": 22.8067, + "step": 482180 + }, + { + "epoch": 0.9740543073809071, + "grad_norm": 313.4910583496094, + "learning_rate": 3.859961617801e-08, + "loss": 23.8506, + "step": 482190 + }, + { + "epoch": 0.974074508013591, + "grad_norm": 1129.052001953125, + "learning_rate": 3.855633812271165e-08, + "loss": 20.4741, + "step": 482200 + }, + { + "epoch": 0.9740947086462748, + "grad_norm": 457.9103698730469, + "learning_rate": 3.8513084248888445e-08, + "loss": 23.1593, + "step": 482210 + }, + { + "epoch": 0.9741149092789586, + "grad_norm": 453.9139404296875, + "learning_rate": 3.8469854556750785e-08, + "loss": 18.997, + "step": 482220 + }, + { + "epoch": 0.9741351099116424, + "grad_norm": 312.4790954589844, + "learning_rate": 3.842664904650906e-08, + "loss": 20.262, + "step": 482230 + }, + { + "epoch": 0.9741553105443262, + "grad_norm": 347.3577575683594, + "learning_rate": 3.83834677183742e-08, + "loss": 28.9999, + "step": 482240 + }, + { + "epoch": 0.9741755111770101, + "grad_norm": 345.5404357910156, + "learning_rate": 3.83403105725566e-08, + "loss": 12.5937, + "step": 482250 + }, + { + "epoch": 0.9741957118096939, + "grad_norm": 311.71612548828125, + "learning_rate": 3.82971776092661e-08, + "loss": 17.8435, + "step": 482260 + }, + { + "epoch": 0.9742159124423777, + "grad_norm": 484.7486267089844, + "learning_rate": 3.825406882871363e-08, + "loss": 12.1719, + "step": 482270 + }, + { + "epoch": 0.9742361130750615, + "grad_norm": 942.969970703125, + "learning_rate": 3.8210984231109583e-08, + "loss": 15.9121, + "step": 482280 + }, + { + "epoch": 0.9742563137077453, + "grad_norm": 518.7152099609375, + "learning_rate": 3.816792381666268e-08, + "loss": 13.4377, + "step": 482290 + }, + { + "epoch": 0.9742765143404292, + "grad_norm": 464.7488708496094, + "learning_rate": 3.812488758558386e-08, + "loss": 41.7322, + "step": 482300 + }, + { + "epoch": 0.974296714973113, + "grad_norm": 411.00128173828125, + "learning_rate": 3.8081875538082404e-08, + "loss": 18.5426, + "step": 482310 + }, + { + "epoch": 0.9743169156057968, + "grad_norm": 474.0198974609375, + "learning_rate": 3.8038887674368697e-08, + "loss": 17.4803, + "step": 482320 + }, + { + "epoch": 0.9743371162384806, + "grad_norm": 637.5534057617188, + "learning_rate": 3.799592399465091e-08, + "loss": 20.2909, + "step": 482330 + }, + { + "epoch": 0.9743573168711644, + "grad_norm": 397.6789245605469, + "learning_rate": 3.7952984499138864e-08, + "loss": 19.7161, + "step": 482340 + }, + { + "epoch": 0.9743775175038483, + "grad_norm": 28.488502502441406, + "learning_rate": 3.791006918804296e-08, + "loss": 16.1842, + "step": 482350 + }, + { + "epoch": 0.9743977181365321, + "grad_norm": 123.11540222167969, + "learning_rate": 3.786717806157136e-08, + "loss": 16.763, + "step": 482360 + }, + { + "epoch": 0.9744179187692159, + "grad_norm": 240.61846923828125, + "learning_rate": 3.782431111993279e-08, + "loss": 27.4726, + "step": 482370 + }, + { + "epoch": 0.9744381194018996, + "grad_norm": 424.6113586425781, + "learning_rate": 3.778146836333707e-08, + "loss": 18.4896, + "step": 482380 + }, + { + "epoch": 0.9744583200345834, + "grad_norm": 316.656494140625, + "learning_rate": 3.7738649791992934e-08, + "loss": 17.3348, + "step": 482390 + }, + { + "epoch": 0.9744785206672673, + "grad_norm": 666.3976440429688, + "learning_rate": 3.769585540610799e-08, + "loss": 24.009, + "step": 482400 + }, + { + "epoch": 0.9744987212999511, + "grad_norm": 764.1138916015625, + "learning_rate": 3.765308520589206e-08, + "loss": 63.306, + "step": 482410 + }, + { + "epoch": 0.9745189219326349, + "grad_norm": 311.10931396484375, + "learning_rate": 3.761033919155333e-08, + "loss": 17.2908, + "step": 482420 + }, + { + "epoch": 0.9745391225653187, + "grad_norm": 347.3961486816406, + "learning_rate": 3.7567617363299945e-08, + "loss": 18.8477, + "step": 482430 + }, + { + "epoch": 0.9745593231980025, + "grad_norm": 341.66497802734375, + "learning_rate": 3.7524919721339535e-08, + "loss": 16.1301, + "step": 482440 + }, + { + "epoch": 0.9745795238306864, + "grad_norm": 432.0102233886719, + "learning_rate": 3.748224626588137e-08, + "loss": 30.7898, + "step": 482450 + }, + { + "epoch": 0.9745997244633702, + "grad_norm": 666.8880615234375, + "learning_rate": 3.743959699713251e-08, + "loss": 43.5052, + "step": 482460 + }, + { + "epoch": 0.974619925096054, + "grad_norm": 560.2584838867188, + "learning_rate": 3.739697191530112e-08, + "loss": 19.5189, + "step": 482470 + }, + { + "epoch": 0.9746401257287378, + "grad_norm": 146.7061309814453, + "learning_rate": 3.735437102059536e-08, + "loss": 13.0949, + "step": 482480 + }, + { + "epoch": 0.9746603263614216, + "grad_norm": 568.6498413085938, + "learning_rate": 3.731179431322285e-08, + "loss": 23.3692, + "step": 482490 + }, + { + "epoch": 0.9746805269941055, + "grad_norm": 178.3436279296875, + "learning_rate": 3.726924179339009e-08, + "loss": 16.9201, + "step": 482500 + }, + { + "epoch": 0.9747007276267893, + "grad_norm": 83.3872299194336, + "learning_rate": 3.7226713461305245e-08, + "loss": 24.7302, + "step": 482510 + }, + { + "epoch": 0.9747209282594731, + "grad_norm": 812.8739624023438, + "learning_rate": 3.7184209317175366e-08, + "loss": 37.4827, + "step": 482520 + }, + { + "epoch": 0.9747411288921569, + "grad_norm": 431.8472595214844, + "learning_rate": 3.714172936120808e-08, + "loss": 20.2624, + "step": 482530 + }, + { + "epoch": 0.9747613295248407, + "grad_norm": 129.54502868652344, + "learning_rate": 3.7099273593609316e-08, + "loss": 14.5634, + "step": 482540 + }, + { + "epoch": 0.9747815301575246, + "grad_norm": 368.3341979980469, + "learning_rate": 3.7056842014587815e-08, + "loss": 23.7714, + "step": 482550 + }, + { + "epoch": 0.9748017307902084, + "grad_norm": 510.84796142578125, + "learning_rate": 3.701443462434895e-08, + "loss": 10.4416, + "step": 482560 + }, + { + "epoch": 0.9748219314228922, + "grad_norm": 61.4462890625, + "learning_rate": 3.697205142309923e-08, + "loss": 26.5968, + "step": 482570 + }, + { + "epoch": 0.974842132055576, + "grad_norm": 402.5279235839844, + "learning_rate": 3.692969241104683e-08, + "loss": 15.7557, + "step": 482580 + }, + { + "epoch": 0.9748623326882598, + "grad_norm": 422.69580078125, + "learning_rate": 3.688735758839601e-08, + "loss": 8.5926, + "step": 482590 + }, + { + "epoch": 0.9748825333209437, + "grad_norm": 440.9681396484375, + "learning_rate": 3.684504695535496e-08, + "loss": 19.7904, + "step": 482600 + }, + { + "epoch": 0.9749027339536275, + "grad_norm": 778.0719604492188, + "learning_rate": 3.680276051212961e-08, + "loss": 17.826, + "step": 482610 + }, + { + "epoch": 0.9749229345863113, + "grad_norm": 272.3429260253906, + "learning_rate": 3.67604982589248e-08, + "loss": 16.8534, + "step": 482620 + }, + { + "epoch": 0.974943135218995, + "grad_norm": 149.3791961669922, + "learning_rate": 3.6718260195947594e-08, + "loss": 13.4943, + "step": 482630 + }, + { + "epoch": 0.9749633358516788, + "grad_norm": 983.81640625, + "learning_rate": 3.6676046323403934e-08, + "loss": 26.2069, + "step": 482640 + }, + { + "epoch": 0.9749835364843626, + "grad_norm": 727.1124267578125, + "learning_rate": 3.663385664149866e-08, + "loss": 23.7736, + "step": 482650 + }, + { + "epoch": 0.9750037371170465, + "grad_norm": 104.9148178100586, + "learning_rate": 3.659169115043826e-08, + "loss": 17.0994, + "step": 482660 + }, + { + "epoch": 0.9750239377497303, + "grad_norm": 369.6506042480469, + "learning_rate": 3.654954985042869e-08, + "loss": 23.4421, + "step": 482670 + }, + { + "epoch": 0.9750441383824141, + "grad_norm": 291.3814697265625, + "learning_rate": 3.650743274167368e-08, + "loss": 11.2448, + "step": 482680 + }, + { + "epoch": 0.9750643390150979, + "grad_norm": 292.85687255859375, + "learning_rate": 3.6465339824379165e-08, + "loss": 18.7769, + "step": 482690 + }, + { + "epoch": 0.9750845396477817, + "grad_norm": 549.0516357421875, + "learning_rate": 3.642327109875166e-08, + "loss": 24.1786, + "step": 482700 + }, + { + "epoch": 0.9751047402804656, + "grad_norm": 442.29925537109375, + "learning_rate": 3.638122656499432e-08, + "loss": 19.9398, + "step": 482710 + }, + { + "epoch": 0.9751249409131494, + "grad_norm": 524.1804809570312, + "learning_rate": 3.633920622331311e-08, + "loss": 21.6374, + "step": 482720 + }, + { + "epoch": 0.9751451415458332, + "grad_norm": 135.19679260253906, + "learning_rate": 3.629721007391229e-08, + "loss": 26.2123, + "step": 482730 + }, + { + "epoch": 0.975165342178517, + "grad_norm": 287.5704345703125, + "learning_rate": 3.625523811699727e-08, + "loss": 15.5823, + "step": 482740 + }, + { + "epoch": 0.9751855428112008, + "grad_norm": 545.2062377929688, + "learning_rate": 3.621329035277232e-08, + "loss": 12.4626, + "step": 482750 + }, + { + "epoch": 0.9752057434438847, + "grad_norm": 8.507187843322754, + "learning_rate": 3.617136678144173e-08, + "loss": 13.9089, + "step": 482760 + }, + { + "epoch": 0.9752259440765685, + "grad_norm": 1035.490234375, + "learning_rate": 3.612946740320977e-08, + "loss": 25.8994, + "step": 482770 + }, + { + "epoch": 0.9752461447092523, + "grad_norm": 542.39306640625, + "learning_rate": 3.608759221828073e-08, + "loss": 30.5626, + "step": 482780 + }, + { + "epoch": 0.9752663453419361, + "grad_norm": 566.2247924804688, + "learning_rate": 3.604574122685833e-08, + "loss": 20.473, + "step": 482790 + }, + { + "epoch": 0.97528654597462, + "grad_norm": 730.0471801757812, + "learning_rate": 3.600391442914741e-08, + "loss": 19.1692, + "step": 482800 + }, + { + "epoch": 0.9753067466073038, + "grad_norm": 455.58203125, + "learning_rate": 3.5962111825350585e-08, + "loss": 22.6982, + "step": 482810 + }, + { + "epoch": 0.9753269472399876, + "grad_norm": 464.4102783203125, + "learning_rate": 3.592033341567325e-08, + "loss": 13.5132, + "step": 482820 + }, + { + "epoch": 0.9753471478726714, + "grad_norm": 301.8681945800781, + "learning_rate": 3.5878579200318006e-08, + "loss": 25.8108, + "step": 482830 + }, + { + "epoch": 0.9753673485053552, + "grad_norm": 403.30450439453125, + "learning_rate": 3.583684917948804e-08, + "loss": 20.1903, + "step": 482840 + }, + { + "epoch": 0.975387549138039, + "grad_norm": 253.32778930664062, + "learning_rate": 3.579514335338763e-08, + "loss": 18.7812, + "step": 482850 + }, + { + "epoch": 0.9754077497707229, + "grad_norm": 298.9661560058594, + "learning_rate": 3.575346172221939e-08, + "loss": 23.7279, + "step": 482860 + }, + { + "epoch": 0.9754279504034067, + "grad_norm": 390.77490234375, + "learning_rate": 3.5711804286187035e-08, + "loss": 15.822, + "step": 482870 + }, + { + "epoch": 0.9754481510360905, + "grad_norm": 214.9738311767578, + "learning_rate": 3.5670171045492643e-08, + "loss": 9.1737, + "step": 482880 + }, + { + "epoch": 0.9754683516687742, + "grad_norm": 1.0779601335525513, + "learning_rate": 3.5628562000339925e-08, + "loss": 12.394, + "step": 482890 + }, + { + "epoch": 0.975488552301458, + "grad_norm": 2.142943859100342, + "learning_rate": 3.558697715093207e-08, + "loss": 19.8335, + "step": 482900 + }, + { + "epoch": 0.9755087529341419, + "grad_norm": 170.5755615234375, + "learning_rate": 3.554541649747056e-08, + "loss": 21.7454, + "step": 482910 + }, + { + "epoch": 0.9755289535668257, + "grad_norm": 299.7828369140625, + "learning_rate": 3.5503880040158586e-08, + "loss": 18.7396, + "step": 482920 + }, + { + "epoch": 0.9755491541995095, + "grad_norm": 310.21197509765625, + "learning_rate": 3.546236777919876e-08, + "loss": 9.8836, + "step": 482930 + }, + { + "epoch": 0.9755693548321933, + "grad_norm": 213.90985107421875, + "learning_rate": 3.542087971479313e-08, + "loss": 11.9233, + "step": 482940 + }, + { + "epoch": 0.9755895554648771, + "grad_norm": 459.5467224121094, + "learning_rate": 3.5379415847143775e-08, + "loss": 17.2514, + "step": 482950 + }, + { + "epoch": 0.975609756097561, + "grad_norm": 460.8121032714844, + "learning_rate": 3.5337976176453845e-08, + "loss": 16.0871, + "step": 482960 + }, + { + "epoch": 0.9756299567302448, + "grad_norm": 272.2505798339844, + "learning_rate": 3.529656070292375e-08, + "loss": 15.9314, + "step": 482970 + }, + { + "epoch": 0.9756501573629286, + "grad_norm": 543.2966918945312, + "learning_rate": 3.525516942675611e-08, + "loss": 25.3451, + "step": 482980 + }, + { + "epoch": 0.9756703579956124, + "grad_norm": 185.53729248046875, + "learning_rate": 3.521380234815297e-08, + "loss": 29.2085, + "step": 482990 + }, + { + "epoch": 0.9756905586282962, + "grad_norm": 146.68104553222656, + "learning_rate": 3.517245946731529e-08, + "loss": 32.7787, + "step": 483000 + }, + { + "epoch": 0.97571075926098, + "grad_norm": 288.0614013671875, + "learning_rate": 3.513114078444513e-08, + "loss": 6.5792, + "step": 483010 + }, + { + "epoch": 0.9757309598936639, + "grad_norm": 698.1644897460938, + "learning_rate": 3.508984629974288e-08, + "loss": 22.6261, + "step": 483020 + }, + { + "epoch": 0.9757511605263477, + "grad_norm": 789.5795288085938, + "learning_rate": 3.504857601341172e-08, + "loss": 14.246, + "step": 483030 + }, + { + "epoch": 0.9757713611590315, + "grad_norm": 66.64833068847656, + "learning_rate": 3.5007329925650925e-08, + "loss": 47.8748, + "step": 483040 + }, + { + "epoch": 0.9757915617917153, + "grad_norm": 508.5932312011719, + "learning_rate": 3.4966108036662006e-08, + "loss": 9.9282, + "step": 483050 + }, + { + "epoch": 0.9758117624243992, + "grad_norm": 181.7556915283203, + "learning_rate": 3.4924910346647024e-08, + "loss": 17.4592, + "step": 483060 + }, + { + "epoch": 0.975831963057083, + "grad_norm": 498.73529052734375, + "learning_rate": 3.488373685580526e-08, + "loss": 18.9131, + "step": 483070 + }, + { + "epoch": 0.9758521636897668, + "grad_norm": 512.0242919921875, + "learning_rate": 3.4842587564337674e-08, + "loss": 10.8806, + "step": 483080 + }, + { + "epoch": 0.9758723643224506, + "grad_norm": 297.0984802246094, + "learning_rate": 3.48014624724452e-08, + "loss": 8.2831, + "step": 483090 + }, + { + "epoch": 0.9758925649551344, + "grad_norm": 2.16670298576355, + "learning_rate": 3.47603615803288e-08, + "loss": 17.6927, + "step": 483100 + }, + { + "epoch": 0.9759127655878183, + "grad_norm": 350.0305480957031, + "learning_rate": 3.471928488818776e-08, + "loss": 9.4405, + "step": 483110 + }, + { + "epoch": 0.9759329662205021, + "grad_norm": 14.403679847717285, + "learning_rate": 3.467823239622248e-08, + "loss": 17.0502, + "step": 483120 + }, + { + "epoch": 0.9759531668531859, + "grad_norm": 346.641357421875, + "learning_rate": 3.463720410463334e-08, + "loss": 26.3269, + "step": 483130 + }, + { + "epoch": 0.9759733674858696, + "grad_norm": 253.56033325195312, + "learning_rate": 3.459620001362074e-08, + "loss": 15.4785, + "step": 483140 + }, + { + "epoch": 0.9759935681185534, + "grad_norm": 450.78497314453125, + "learning_rate": 3.4555220123383416e-08, + "loss": 12.1875, + "step": 483150 + }, + { + "epoch": 0.9760137687512372, + "grad_norm": 178.17855834960938, + "learning_rate": 3.451426443412231e-08, + "loss": 8.2043, + "step": 483160 + }, + { + "epoch": 0.9760339693839211, + "grad_norm": 273.9609680175781, + "learning_rate": 3.4473332946036164e-08, + "loss": 21.7305, + "step": 483170 + }, + { + "epoch": 0.9760541700166049, + "grad_norm": 691.238525390625, + "learning_rate": 3.443242565932481e-08, + "loss": 20.2564, + "step": 483180 + }, + { + "epoch": 0.9760743706492887, + "grad_norm": 357.8240661621094, + "learning_rate": 3.439154257418753e-08, + "loss": 12.5185, + "step": 483190 + }, + { + "epoch": 0.9760945712819725, + "grad_norm": 495.1055603027344, + "learning_rate": 3.435068369082306e-08, + "loss": 37.3999, + "step": 483200 + }, + { + "epoch": 0.9761147719146563, + "grad_norm": 254.81387329101562, + "learning_rate": 3.4309849009431794e-08, + "loss": 21.3162, + "step": 483210 + }, + { + "epoch": 0.9761349725473402, + "grad_norm": 273.0685729980469, + "learning_rate": 3.4269038530211906e-08, + "loss": 19.9255, + "step": 483220 + }, + { + "epoch": 0.976155173180024, + "grad_norm": 44.90702819824219, + "learning_rate": 3.4228252253362683e-08, + "loss": 13.02, + "step": 483230 + }, + { + "epoch": 0.9761753738127078, + "grad_norm": 0.0, + "learning_rate": 3.41874901790823e-08, + "loss": 23.1527, + "step": 483240 + }, + { + "epoch": 0.9761955744453916, + "grad_norm": 520.8883056640625, + "learning_rate": 3.414675230757003e-08, + "loss": 11.8354, + "step": 483250 + }, + { + "epoch": 0.9762157750780754, + "grad_norm": 266.39990234375, + "learning_rate": 3.410603863902406e-08, + "loss": 17.9792, + "step": 483260 + }, + { + "epoch": 0.9762359757107593, + "grad_norm": 64.41669464111328, + "learning_rate": 3.406534917364257e-08, + "loss": 13.4081, + "step": 483270 + }, + { + "epoch": 0.9762561763434431, + "grad_norm": 334.1542663574219, + "learning_rate": 3.402468391162539e-08, + "loss": 18.3694, + "step": 483280 + }, + { + "epoch": 0.9762763769761269, + "grad_norm": 578.1113891601562, + "learning_rate": 3.398404285316847e-08, + "loss": 10.7203, + "step": 483290 + }, + { + "epoch": 0.9762965776088107, + "grad_norm": 284.7392272949219, + "learning_rate": 3.394342599847111e-08, + "loss": 13.1058, + "step": 483300 + }, + { + "epoch": 0.9763167782414945, + "grad_norm": 366.591796875, + "learning_rate": 3.390283334773203e-08, + "loss": 19.2009, + "step": 483310 + }, + { + "epoch": 0.9763369788741784, + "grad_norm": 321.5985107421875, + "learning_rate": 3.3862264901147745e-08, + "loss": 27.2178, + "step": 483320 + }, + { + "epoch": 0.9763571795068622, + "grad_norm": 231.1267852783203, + "learning_rate": 3.3821720658916426e-08, + "loss": 8.5236, + "step": 483330 + }, + { + "epoch": 0.976377380139546, + "grad_norm": 0.8328532576560974, + "learning_rate": 3.378120062123569e-08, + "loss": 10.9436, + "step": 483340 + }, + { + "epoch": 0.9763975807722298, + "grad_norm": 158.7162322998047, + "learning_rate": 3.374070478830316e-08, + "loss": 12.2283, + "step": 483350 + }, + { + "epoch": 0.9764177814049136, + "grad_norm": 656.27880859375, + "learning_rate": 3.3700233160315897e-08, + "loss": 14.6304, + "step": 483360 + }, + { + "epoch": 0.9764379820375975, + "grad_norm": 404.3321228027344, + "learning_rate": 3.365978573747153e-08, + "loss": 31.0528, + "step": 483370 + }, + { + "epoch": 0.9764581826702813, + "grad_norm": 1263.421630859375, + "learning_rate": 3.361936251996711e-08, + "loss": 28.5038, + "step": 483380 + }, + { + "epoch": 0.9764783833029651, + "grad_norm": 152.63633728027344, + "learning_rate": 3.357896350799916e-08, + "loss": 14.4554, + "step": 483390 + }, + { + "epoch": 0.9764985839356488, + "grad_norm": 400.4908142089844, + "learning_rate": 3.3538588701765296e-08, + "loss": 15.232, + "step": 483400 + }, + { + "epoch": 0.9765187845683326, + "grad_norm": 317.1159362792969, + "learning_rate": 3.349823810146202e-08, + "loss": 8.8856, + "step": 483410 + }, + { + "epoch": 0.9765389852010165, + "grad_norm": 62.96767044067383, + "learning_rate": 3.34579117072864e-08, + "loss": 15.2614, + "step": 483420 + }, + { + "epoch": 0.9765591858337003, + "grad_norm": 244.07969665527344, + "learning_rate": 3.341760951943385e-08, + "loss": 19.5304, + "step": 483430 + }, + { + "epoch": 0.9765793864663841, + "grad_norm": 291.3792724609375, + "learning_rate": 3.337733153810141e-08, + "loss": 21.925, + "step": 483440 + }, + { + "epoch": 0.9765995870990679, + "grad_norm": 254.8742218017578, + "learning_rate": 3.3337077763485605e-08, + "loss": 26.2369, + "step": 483450 + }, + { + "epoch": 0.9766197877317517, + "grad_norm": 504.7132568359375, + "learning_rate": 3.329684819578294e-08, + "loss": 24.7492, + "step": 483460 + }, + { + "epoch": 0.9766399883644356, + "grad_norm": 401.4547119140625, + "learning_rate": 3.3256642835188816e-08, + "loss": 14.8526, + "step": 483470 + }, + { + "epoch": 0.9766601889971194, + "grad_norm": 68.80532836914062, + "learning_rate": 3.321646168189918e-08, + "loss": 11.8834, + "step": 483480 + }, + { + "epoch": 0.9766803896298032, + "grad_norm": 419.8252868652344, + "learning_rate": 3.317630473611055e-08, + "loss": 26.1874, + "step": 483490 + }, + { + "epoch": 0.976700590262487, + "grad_norm": 19.07436180114746, + "learning_rate": 3.313617199801777e-08, + "loss": 15.3926, + "step": 483500 + }, + { + "epoch": 0.9767207908951708, + "grad_norm": 770.347900390625, + "learning_rate": 3.309606346781735e-08, + "loss": 16.3904, + "step": 483510 + }, + { + "epoch": 0.9767409915278547, + "grad_norm": 479.2562561035156, + "learning_rate": 3.305597914570413e-08, + "loss": 13.9847, + "step": 483520 + }, + { + "epoch": 0.9767611921605385, + "grad_norm": 422.8157043457031, + "learning_rate": 3.301591903187351e-08, + "loss": 11.9881, + "step": 483530 + }, + { + "epoch": 0.9767813927932223, + "grad_norm": 253.63360595703125, + "learning_rate": 3.297588312652089e-08, + "loss": 22.7496, + "step": 483540 + }, + { + "epoch": 0.9768015934259061, + "grad_norm": 200.8687286376953, + "learning_rate": 3.2935871429841116e-08, + "loss": 16.5222, + "step": 483550 + }, + { + "epoch": 0.9768217940585899, + "grad_norm": 2.260951519012451, + "learning_rate": 3.289588394203014e-08, + "loss": 5.5559, + "step": 483560 + }, + { + "epoch": 0.9768419946912738, + "grad_norm": 306.1798400878906, + "learning_rate": 3.285592066328169e-08, + "loss": 13.5368, + "step": 483570 + }, + { + "epoch": 0.9768621953239576, + "grad_norm": 683.299560546875, + "learning_rate": 3.281598159379118e-08, + "loss": 12.1139, + "step": 483580 + }, + { + "epoch": 0.9768823959566414, + "grad_norm": 286.2330322265625, + "learning_rate": 3.277606673375289e-08, + "loss": 14.0862, + "step": 483590 + }, + { + "epoch": 0.9769025965893252, + "grad_norm": 208.06178283691406, + "learning_rate": 3.2736176083362216e-08, + "loss": 19.0484, + "step": 483600 + }, + { + "epoch": 0.976922797222009, + "grad_norm": 510.49981689453125, + "learning_rate": 3.2696309642812344e-08, + "loss": 15.3424, + "step": 483610 + }, + { + "epoch": 0.9769429978546929, + "grad_norm": 101.3388671875, + "learning_rate": 3.2656467412298665e-08, + "loss": 25.7158, + "step": 483620 + }, + { + "epoch": 0.9769631984873767, + "grad_norm": 340.8699645996094, + "learning_rate": 3.261664939201436e-08, + "loss": 21.4636, + "step": 483630 + }, + { + "epoch": 0.9769833991200605, + "grad_norm": 482.3835754394531, + "learning_rate": 3.2576855582154844e-08, + "loss": 25.4405, + "step": 483640 + }, + { + "epoch": 0.9770035997527442, + "grad_norm": 162.60202026367188, + "learning_rate": 3.253708598291272e-08, + "loss": 7.1479, + "step": 483650 + }, + { + "epoch": 0.977023800385428, + "grad_norm": 299.3507080078125, + "learning_rate": 3.2497340594482284e-08, + "loss": 11.9309, + "step": 483660 + }, + { + "epoch": 0.9770440010181118, + "grad_norm": 221.28309631347656, + "learning_rate": 3.245761941705727e-08, + "loss": 12.9233, + "step": 483670 + }, + { + "epoch": 0.9770642016507957, + "grad_norm": 221.97930908203125, + "learning_rate": 3.241792245083142e-08, + "loss": 7.916, + "step": 483680 + }, + { + "epoch": 0.9770844022834795, + "grad_norm": 274.4580383300781, + "learning_rate": 3.237824969599845e-08, + "loss": 13.6841, + "step": 483690 + }, + { + "epoch": 0.9771046029161633, + "grad_norm": 267.8779602050781, + "learning_rate": 3.2338601152751e-08, + "loss": 21.27, + "step": 483700 + }, + { + "epoch": 0.9771248035488471, + "grad_norm": 591.0781860351562, + "learning_rate": 3.2298976821282804e-08, + "loss": 28.3484, + "step": 483710 + }, + { + "epoch": 0.9771450041815309, + "grad_norm": 461.17681884765625, + "learning_rate": 3.2259376701787025e-08, + "loss": 15.7276, + "step": 483720 + }, + { + "epoch": 0.9771652048142148, + "grad_norm": 117.9496078491211, + "learning_rate": 3.2219800794456304e-08, + "loss": 22.1262, + "step": 483730 + }, + { + "epoch": 0.9771854054468986, + "grad_norm": 188.90655517578125, + "learning_rate": 3.2180249099483806e-08, + "loss": 12.5037, + "step": 483740 + }, + { + "epoch": 0.9772056060795824, + "grad_norm": 659.62158203125, + "learning_rate": 3.214072161706272e-08, + "loss": 18.8168, + "step": 483750 + }, + { + "epoch": 0.9772258067122662, + "grad_norm": 335.1777648925781, + "learning_rate": 3.210121834738456e-08, + "loss": 26.0619, + "step": 483760 + }, + { + "epoch": 0.97724600734495, + "grad_norm": 50.493900299072266, + "learning_rate": 3.206173929064304e-08, + "loss": 20.8472, + "step": 483770 + }, + { + "epoch": 0.9772662079776339, + "grad_norm": 134.26231384277344, + "learning_rate": 3.20222844470297e-08, + "loss": 9.9554, + "step": 483780 + }, + { + "epoch": 0.9772864086103177, + "grad_norm": 707.6260986328125, + "learning_rate": 3.198285381673716e-08, + "loss": 31.974, + "step": 483790 + }, + { + "epoch": 0.9773066092430015, + "grad_norm": 736.2801513671875, + "learning_rate": 3.194344739995803e-08, + "loss": 26.2879, + "step": 483800 + }, + { + "epoch": 0.9773268098756853, + "grad_norm": 309.8446044921875, + "learning_rate": 3.1904065196883825e-08, + "loss": 15.8285, + "step": 483810 + }, + { + "epoch": 0.9773470105083691, + "grad_norm": 123.22837829589844, + "learning_rate": 3.1864707207706624e-08, + "loss": 6.2707, + "step": 483820 + }, + { + "epoch": 0.977367211141053, + "grad_norm": 793.5512084960938, + "learning_rate": 3.182537343261849e-08, + "loss": 20.4665, + "step": 483830 + }, + { + "epoch": 0.9773874117737368, + "grad_norm": 303.29290771484375, + "learning_rate": 3.178606387181038e-08, + "loss": 26.4614, + "step": 483840 + }, + { + "epoch": 0.9774076124064206, + "grad_norm": 375.2763366699219, + "learning_rate": 3.1746778525474916e-08, + "loss": 8.8345, + "step": 483850 + }, + { + "epoch": 0.9774278130391044, + "grad_norm": 121.67900085449219, + "learning_rate": 3.1707517393803064e-08, + "loss": 8.8945, + "step": 483860 + }, + { + "epoch": 0.9774480136717882, + "grad_norm": 783.3543701171875, + "learning_rate": 3.166828047698578e-08, + "loss": 13.7929, + "step": 483870 + }, + { + "epoch": 0.9774682143044721, + "grad_norm": 465.07061767578125, + "learning_rate": 3.1629067775214575e-08, + "loss": 23.3499, + "step": 483880 + }, + { + "epoch": 0.9774884149371559, + "grad_norm": 257.0478515625, + "learning_rate": 3.158987928868151e-08, + "loss": 13.9445, + "step": 483890 + }, + { + "epoch": 0.9775086155698397, + "grad_norm": 209.32102966308594, + "learning_rate": 3.1550715017575895e-08, + "loss": 16.5252, + "step": 483900 + }, + { + "epoch": 0.9775288162025234, + "grad_norm": 448.6198425292969, + "learning_rate": 3.151157496208979e-08, + "loss": 12.6523, + "step": 483910 + }, + { + "epoch": 0.9775490168352072, + "grad_norm": 532.5560913085938, + "learning_rate": 3.1472459122414144e-08, + "loss": 16.7201, + "step": 483920 + }, + { + "epoch": 0.977569217467891, + "grad_norm": 70.59700012207031, + "learning_rate": 3.143336749873882e-08, + "loss": 15.6916, + "step": 483930 + }, + { + "epoch": 0.9775894181005749, + "grad_norm": 213.04867553710938, + "learning_rate": 3.139430009125477e-08, + "loss": 18.7005, + "step": 483940 + }, + { + "epoch": 0.9776096187332587, + "grad_norm": 996.3571166992188, + "learning_rate": 3.135525690015184e-08, + "loss": 22.9712, + "step": 483950 + }, + { + "epoch": 0.9776298193659425, + "grad_norm": 182.0755615234375, + "learning_rate": 3.131623792562155e-08, + "loss": 15.4185, + "step": 483960 + }, + { + "epoch": 0.9776500199986263, + "grad_norm": 0.8058404326438904, + "learning_rate": 3.127724316785263e-08, + "loss": 18.1625, + "step": 483970 + }, + { + "epoch": 0.9776702206313101, + "grad_norm": 46.014915466308594, + "learning_rate": 3.1238272627035494e-08, + "loss": 20.9273, + "step": 483980 + }, + { + "epoch": 0.977690421263994, + "grad_norm": 283.42718505859375, + "learning_rate": 3.119932630336109e-08, + "loss": 32.6546, + "step": 483990 + }, + { + "epoch": 0.9777106218966778, + "grad_norm": 419.70343017578125, + "learning_rate": 3.1160404197018155e-08, + "loss": 17.8015, + "step": 484000 + }, + { + "epoch": 0.9777308225293616, + "grad_norm": 168.8333740234375, + "learning_rate": 3.11215063081971e-08, + "loss": 27.3313, + "step": 484010 + }, + { + "epoch": 0.9777510231620454, + "grad_norm": 183.36619567871094, + "learning_rate": 3.108263263708666e-08, + "loss": 7.288, + "step": 484020 + }, + { + "epoch": 0.9777712237947292, + "grad_norm": 267.46441650390625, + "learning_rate": 3.104378318387724e-08, + "loss": 11.842, + "step": 484030 + }, + { + "epoch": 0.9777914244274131, + "grad_norm": 233.20396423339844, + "learning_rate": 3.1004957948757576e-08, + "loss": 17.2669, + "step": 484040 + }, + { + "epoch": 0.9778116250600969, + "grad_norm": 330.2497863769531, + "learning_rate": 3.0966156931916955e-08, + "loss": 32.6221, + "step": 484050 + }, + { + "epoch": 0.9778318256927807, + "grad_norm": 300.6095275878906, + "learning_rate": 3.092738013354468e-08, + "loss": 12.898, + "step": 484060 + }, + { + "epoch": 0.9778520263254645, + "grad_norm": 370.9511413574219, + "learning_rate": 3.088862755383004e-08, + "loss": 16.3676, + "step": 484070 + }, + { + "epoch": 0.9778722269581483, + "grad_norm": 272.9999084472656, + "learning_rate": 3.084989919296122e-08, + "loss": 25.2239, + "step": 484080 + }, + { + "epoch": 0.9778924275908322, + "grad_norm": 250.6344757080078, + "learning_rate": 3.081119505112751e-08, + "loss": 19.4249, + "step": 484090 + }, + { + "epoch": 0.977912628223516, + "grad_norm": 251.88600158691406, + "learning_rate": 3.077251512851709e-08, + "loss": 26.6836, + "step": 484100 + }, + { + "epoch": 0.9779328288561998, + "grad_norm": 395.8818054199219, + "learning_rate": 3.07338594253187e-08, + "loss": 40.7621, + "step": 484110 + }, + { + "epoch": 0.9779530294888836, + "grad_norm": 612.4962158203125, + "learning_rate": 3.069522794172109e-08, + "loss": 18.4162, + "step": 484120 + }, + { + "epoch": 0.9779732301215674, + "grad_norm": 263.68402099609375, + "learning_rate": 3.0656620677911867e-08, + "loss": 11.7562, + "step": 484130 + }, + { + "epoch": 0.9779934307542513, + "grad_norm": 524.9879760742188, + "learning_rate": 3.061803763408033e-08, + "loss": 21.7564, + "step": 484140 + }, + { + "epoch": 0.9780136313869351, + "grad_norm": 1036.5638427734375, + "learning_rate": 3.057947881041301e-08, + "loss": 22.7379, + "step": 484150 + }, + { + "epoch": 0.9780338320196189, + "grad_norm": 235.7089080810547, + "learning_rate": 3.054094420709863e-08, + "loss": 15.1089, + "step": 484160 + }, + { + "epoch": 0.9780540326523026, + "grad_norm": 278.8729553222656, + "learning_rate": 3.050243382432483e-08, + "loss": 17.9415, + "step": 484170 + }, + { + "epoch": 0.9780742332849864, + "grad_norm": 382.26318359375, + "learning_rate": 3.046394766228034e-08, + "loss": 9.5056, + "step": 484180 + }, + { + "epoch": 0.9780944339176703, + "grad_norm": 449.79052734375, + "learning_rate": 3.0425485721151115e-08, + "loss": 26.3229, + "step": 484190 + }, + { + "epoch": 0.9781146345503541, + "grad_norm": 287.5538635253906, + "learning_rate": 3.038704800112535e-08, + "loss": 8.6234, + "step": 484200 + }, + { + "epoch": 0.9781348351830379, + "grad_norm": 293.88818359375, + "learning_rate": 3.034863450239067e-08, + "loss": 32.1613, + "step": 484210 + }, + { + "epoch": 0.9781550358157217, + "grad_norm": 635.933837890625, + "learning_rate": 3.0310245225133595e-08, + "loss": 20.5715, + "step": 484220 + }, + { + "epoch": 0.9781752364484055, + "grad_norm": 546.6377563476562, + "learning_rate": 3.027188016954175e-08, + "loss": 20.026, + "step": 484230 + }, + { + "epoch": 0.9781954370810894, + "grad_norm": 340.8769836425781, + "learning_rate": 3.0233539335802195e-08, + "loss": 22.315, + "step": 484240 + }, + { + "epoch": 0.9782156377137732, + "grad_norm": 629.7188110351562, + "learning_rate": 3.019522272410202e-08, + "loss": 16.2137, + "step": 484250 + }, + { + "epoch": 0.978235838346457, + "grad_norm": 351.24102783203125, + "learning_rate": 3.0156930334626633e-08, + "loss": 13.6654, + "step": 484260 + }, + { + "epoch": 0.9782560389791408, + "grad_norm": 620.3857421875, + "learning_rate": 3.0118662167564205e-08, + "loss": 13.2582, + "step": 484270 + }, + { + "epoch": 0.9782762396118246, + "grad_norm": 320.65093994140625, + "learning_rate": 3.008041822310015e-08, + "loss": 15.7659, + "step": 484280 + }, + { + "epoch": 0.9782964402445085, + "grad_norm": 4.544155597686768, + "learning_rate": 3.004219850142209e-08, + "loss": 23.1176, + "step": 484290 + }, + { + "epoch": 0.9783166408771923, + "grad_norm": 330.84075927734375, + "learning_rate": 3.0004003002714886e-08, + "loss": 38.308, + "step": 484300 + }, + { + "epoch": 0.9783368415098761, + "grad_norm": 134.98233032226562, + "learning_rate": 2.9965831727165603e-08, + "loss": 16.8132, + "step": 484310 + }, + { + "epoch": 0.9783570421425599, + "grad_norm": 240.02906799316406, + "learning_rate": 2.992768467496021e-08, + "loss": 16.8663, + "step": 484320 + }, + { + "epoch": 0.9783772427752437, + "grad_norm": 69.66632843017578, + "learning_rate": 2.988956184628411e-08, + "loss": 17.5265, + "step": 484330 + }, + { + "epoch": 0.9783974434079276, + "grad_norm": 37.1854133605957, + "learning_rate": 2.985146324132438e-08, + "loss": 41.3305, + "step": 484340 + }, + { + "epoch": 0.9784176440406114, + "grad_norm": 91.68061828613281, + "learning_rate": 2.981338886026475e-08, + "loss": 12.9395, + "step": 484350 + }, + { + "epoch": 0.9784378446732952, + "grad_norm": 484.4888916015625, + "learning_rate": 2.97753387032923e-08, + "loss": 21.5611, + "step": 484360 + }, + { + "epoch": 0.978458045305979, + "grad_norm": 104.05294799804688, + "learning_rate": 2.9737312770591887e-08, + "loss": 21.4092, + "step": 484370 + }, + { + "epoch": 0.9784782459386628, + "grad_norm": 284.43402099609375, + "learning_rate": 2.9699311062349467e-08, + "loss": 16.0598, + "step": 484380 + }, + { + "epoch": 0.9784984465713467, + "grad_norm": 12.049307823181152, + "learning_rate": 2.966133357874934e-08, + "loss": 25.8269, + "step": 484390 + }, + { + "epoch": 0.9785186472040305, + "grad_norm": 63.954917907714844, + "learning_rate": 2.9623380319976912e-08, + "loss": 13.1331, + "step": 484400 + }, + { + "epoch": 0.9785388478367143, + "grad_norm": 486.4266662597656, + "learning_rate": 2.9585451286217593e-08, + "loss": 15.2235, + "step": 484410 + }, + { + "epoch": 0.978559048469398, + "grad_norm": 151.86865234375, + "learning_rate": 2.954754647765623e-08, + "loss": 13.0449, + "step": 484420 + }, + { + "epoch": 0.9785792491020818, + "grad_norm": 803.1467895507812, + "learning_rate": 2.950966589447657e-08, + "loss": 27.0853, + "step": 484430 + }, + { + "epoch": 0.9785994497347656, + "grad_norm": 347.372802734375, + "learning_rate": 2.947180953686457e-08, + "loss": 15.3117, + "step": 484440 + }, + { + "epoch": 0.9786196503674495, + "grad_norm": 400.6361083984375, + "learning_rate": 2.9433977405003976e-08, + "loss": 22.2511, + "step": 484450 + }, + { + "epoch": 0.9786398510001333, + "grad_norm": 344.0408935546875, + "learning_rate": 2.9396169499079087e-08, + "loss": 14.1736, + "step": 484460 + }, + { + "epoch": 0.9786600516328171, + "grad_norm": 880.786376953125, + "learning_rate": 2.935838581927475e-08, + "loss": 34.3599, + "step": 484470 + }, + { + "epoch": 0.9786802522655009, + "grad_norm": 109.6543960571289, + "learning_rate": 2.9320626365774153e-08, + "loss": 8.8492, + "step": 484480 + }, + { + "epoch": 0.9787004528981847, + "grad_norm": 12.039264678955078, + "learning_rate": 2.9282891138762148e-08, + "loss": 16.186, + "step": 484490 + }, + { + "epoch": 0.9787206535308686, + "grad_norm": 470.5296325683594, + "learning_rate": 2.9245180138423033e-08, + "loss": 36.8357, + "step": 484500 + }, + { + "epoch": 0.9787408541635524, + "grad_norm": 523.0750732421875, + "learning_rate": 2.920749336494e-08, + "loss": 13.6021, + "step": 484510 + }, + { + "epoch": 0.9787610547962362, + "grad_norm": 327.95672607421875, + "learning_rate": 2.9169830818496226e-08, + "loss": 24.0132, + "step": 484520 + }, + { + "epoch": 0.97878125542892, + "grad_norm": 125.46385955810547, + "learning_rate": 2.9132192499276014e-08, + "loss": 19.9534, + "step": 484530 + }, + { + "epoch": 0.9788014560616038, + "grad_norm": 17.092742919921875, + "learning_rate": 2.9094578407462547e-08, + "loss": 14.0685, + "step": 484540 + }, + { + "epoch": 0.9788216566942877, + "grad_norm": 230.4193878173828, + "learning_rate": 2.9056988543239018e-08, + "loss": 6.433, + "step": 484550 + }, + { + "epoch": 0.9788418573269715, + "grad_norm": 45.332794189453125, + "learning_rate": 2.9019422906789162e-08, + "loss": 13.0758, + "step": 484560 + }, + { + "epoch": 0.9788620579596553, + "grad_norm": 377.50286865234375, + "learning_rate": 2.8981881498295616e-08, + "loss": 27.4283, + "step": 484570 + }, + { + "epoch": 0.9788822585923391, + "grad_norm": 170.75660705566406, + "learning_rate": 2.8944364317941564e-08, + "loss": 11.3379, + "step": 484580 + }, + { + "epoch": 0.978902459225023, + "grad_norm": 463.7169494628906, + "learning_rate": 2.8906871365909638e-08, + "loss": 13.9359, + "step": 484590 + }, + { + "epoch": 0.9789226598577068, + "grad_norm": 578.7476806640625, + "learning_rate": 2.8869402642382473e-08, + "loss": 12.9114, + "step": 484600 + }, + { + "epoch": 0.9789428604903906, + "grad_norm": 1014.2540893554688, + "learning_rate": 2.8831958147543805e-08, + "loss": 17.8054, + "step": 484610 + }, + { + "epoch": 0.9789630611230744, + "grad_norm": 220.0530242919922, + "learning_rate": 2.8794537881574046e-08, + "loss": 20.0184, + "step": 484620 + }, + { + "epoch": 0.9789832617557582, + "grad_norm": 673.9566040039062, + "learning_rate": 2.87571418446575e-08, + "loss": 21.4076, + "step": 484630 + }, + { + "epoch": 0.979003462388442, + "grad_norm": 667.1533813476562, + "learning_rate": 2.871977003697568e-08, + "loss": 22.8192, + "step": 484640 + }, + { + "epoch": 0.9790236630211259, + "grad_norm": 424.0278625488281, + "learning_rate": 2.8682422458710667e-08, + "loss": 15.4292, + "step": 484650 + }, + { + "epoch": 0.9790438636538097, + "grad_norm": 268.6829528808594, + "learning_rate": 2.864509911004454e-08, + "loss": 32.7413, + "step": 484660 + }, + { + "epoch": 0.9790640642864935, + "grad_norm": 6.1492838859558105, + "learning_rate": 2.8607799991159368e-08, + "loss": 10.4449, + "step": 484670 + }, + { + "epoch": 0.9790842649191772, + "grad_norm": 649.07861328125, + "learning_rate": 2.857052510223668e-08, + "loss": 22.5222, + "step": 484680 + }, + { + "epoch": 0.979104465551861, + "grad_norm": 450.8924865722656, + "learning_rate": 2.853327444345799e-08, + "loss": 15.7099, + "step": 484690 + }, + { + "epoch": 0.9791246661845449, + "grad_norm": 761.1311645507812, + "learning_rate": 2.8496048015005385e-08, + "loss": 23.6273, + "step": 484700 + }, + { + "epoch": 0.9791448668172287, + "grad_norm": 269.6807556152344, + "learning_rate": 2.8458845817060376e-08, + "loss": 13.483, + "step": 484710 + }, + { + "epoch": 0.9791650674499125, + "grad_norm": 527.2760620117188, + "learning_rate": 2.8421667849803937e-08, + "loss": 13.3812, + "step": 484720 + }, + { + "epoch": 0.9791852680825963, + "grad_norm": 7.196564197540283, + "learning_rate": 2.8384514113417026e-08, + "loss": 17.1273, + "step": 484730 + }, + { + "epoch": 0.9792054687152801, + "grad_norm": 0.0, + "learning_rate": 2.8347384608081173e-08, + "loss": 26.7297, + "step": 484740 + }, + { + "epoch": 0.979225669347964, + "grad_norm": 1.6372706890106201, + "learning_rate": 2.8310279333976786e-08, + "loss": 22.1027, + "step": 484750 + }, + { + "epoch": 0.9792458699806478, + "grad_norm": 167.82321166992188, + "learning_rate": 2.827319829128594e-08, + "loss": 12.3742, + "step": 484760 + }, + { + "epoch": 0.9792660706133316, + "grad_norm": 317.5897216796875, + "learning_rate": 2.823614148018794e-08, + "loss": 13.8572, + "step": 484770 + }, + { + "epoch": 0.9792862712460154, + "grad_norm": 455.1285400390625, + "learning_rate": 2.819910890086375e-08, + "loss": 26.5275, + "step": 484780 + }, + { + "epoch": 0.9793064718786992, + "grad_norm": 508.3708801269531, + "learning_rate": 2.8162100553494887e-08, + "loss": 16.7634, + "step": 484790 + }, + { + "epoch": 0.9793266725113831, + "grad_norm": 454.05999755859375, + "learning_rate": 2.8125116438260104e-08, + "loss": 15.6647, + "step": 484800 + }, + { + "epoch": 0.9793468731440669, + "grad_norm": 482.0390930175781, + "learning_rate": 2.8088156555340916e-08, + "loss": 26.6385, + "step": 484810 + }, + { + "epoch": 0.9793670737767507, + "grad_norm": 377.8825988769531, + "learning_rate": 2.805122090491719e-08, + "loss": 13.5362, + "step": 484820 + }, + { + "epoch": 0.9793872744094345, + "grad_norm": 116.45448303222656, + "learning_rate": 2.801430948716821e-08, + "loss": 20.9232, + "step": 484830 + }, + { + "epoch": 0.9794074750421183, + "grad_norm": 210.9385986328125, + "learning_rate": 2.797742230227496e-08, + "loss": 16.5787, + "step": 484840 + }, + { + "epoch": 0.9794276756748022, + "grad_norm": 302.8087463378906, + "learning_rate": 2.794055935041673e-08, + "loss": 14.3869, + "step": 484850 + }, + { + "epoch": 0.979447876307486, + "grad_norm": 780.3137817382812, + "learning_rate": 2.7903720631772824e-08, + "loss": 24.1741, + "step": 484860 + }, + { + "epoch": 0.9794680769401698, + "grad_norm": 316.7269287109375, + "learning_rate": 2.7866906146523098e-08, + "loss": 22.5804, + "step": 484870 + }, + { + "epoch": 0.9794882775728536, + "grad_norm": 630.5425415039062, + "learning_rate": 2.783011589484741e-08, + "loss": 25.4059, + "step": 484880 + }, + { + "epoch": 0.9795084782055374, + "grad_norm": 77.95024871826172, + "learning_rate": 2.7793349876924503e-08, + "loss": 23.828, + "step": 484890 + }, + { + "epoch": 0.9795286788382213, + "grad_norm": 318.6225280761719, + "learning_rate": 2.7756608092933678e-08, + "loss": 28.741, + "step": 484900 + }, + { + "epoch": 0.9795488794709051, + "grad_norm": 187.707763671875, + "learning_rate": 2.771989054305424e-08, + "loss": 11.4559, + "step": 484910 + }, + { + "epoch": 0.9795690801035889, + "grad_norm": 259.7579650878906, + "learning_rate": 2.768319722746493e-08, + "loss": 10.9794, + "step": 484920 + }, + { + "epoch": 0.9795892807362726, + "grad_norm": 397.9490051269531, + "learning_rate": 2.7646528146345053e-08, + "loss": 13.0612, + "step": 484930 + }, + { + "epoch": 0.9796094813689564, + "grad_norm": 629.7482299804688, + "learning_rate": 2.760988329987224e-08, + "loss": 14.3817, + "step": 484940 + }, + { + "epoch": 0.9796296820016402, + "grad_norm": 643.6015625, + "learning_rate": 2.7573262688226355e-08, + "loss": 39.0208, + "step": 484950 + }, + { + "epoch": 0.9796498826343241, + "grad_norm": 417.1099548339844, + "learning_rate": 2.753666631158447e-08, + "loss": 13.9643, + "step": 484960 + }, + { + "epoch": 0.9796700832670079, + "grad_norm": 1445.2420654296875, + "learning_rate": 2.7500094170126447e-08, + "loss": 12.6738, + "step": 484970 + }, + { + "epoch": 0.9796902838996917, + "grad_norm": 310.7126770019531, + "learning_rate": 2.7463546264029915e-08, + "loss": 10.1163, + "step": 484980 + }, + { + "epoch": 0.9797104845323755, + "grad_norm": 400.7168884277344, + "learning_rate": 2.7427022593473074e-08, + "loss": 18.6557, + "step": 484990 + }, + { + "epoch": 0.9797306851650593, + "grad_norm": 440.9306335449219, + "learning_rate": 2.7390523158633552e-08, + "loss": 16.9205, + "step": 485000 + }, + { + "epoch": 0.9797508857977432, + "grad_norm": 313.15338134765625, + "learning_rate": 2.7354047959689543e-08, + "loss": 11.0791, + "step": 485010 + }, + { + "epoch": 0.979771086430427, + "grad_norm": 488.06695556640625, + "learning_rate": 2.7317596996818684e-08, + "loss": 31.8022, + "step": 485020 + }, + { + "epoch": 0.9797912870631108, + "grad_norm": 346.84796142578125, + "learning_rate": 2.728117027019861e-08, + "loss": 38.3031, + "step": 485030 + }, + { + "epoch": 0.9798114876957946, + "grad_norm": 608.5032958984375, + "learning_rate": 2.7244767780007507e-08, + "loss": 31.9193, + "step": 485040 + }, + { + "epoch": 0.9798316883284784, + "grad_norm": 215.79571533203125, + "learning_rate": 2.7208389526421907e-08, + "loss": 33.9912, + "step": 485050 + }, + { + "epoch": 0.9798518889611623, + "grad_norm": 217.80091857910156, + "learning_rate": 2.7172035509619442e-08, + "loss": 21.6836, + "step": 485060 + }, + { + "epoch": 0.9798720895938461, + "grad_norm": 467.5257873535156, + "learning_rate": 2.713570572977775e-08, + "loss": 31.3339, + "step": 485070 + }, + { + "epoch": 0.9798922902265299, + "grad_norm": 1203.36083984375, + "learning_rate": 2.7099400187073356e-08, + "loss": 20.5482, + "step": 485080 + }, + { + "epoch": 0.9799124908592137, + "grad_norm": 406.3395690917969, + "learning_rate": 2.7063118881682782e-08, + "loss": 23.3002, + "step": 485090 + }, + { + "epoch": 0.9799326914918975, + "grad_norm": 527.0682983398438, + "learning_rate": 2.7026861813783668e-08, + "loss": 19.6665, + "step": 485100 + }, + { + "epoch": 0.9799528921245814, + "grad_norm": 182.63735961914062, + "learning_rate": 2.6990628983553093e-08, + "loss": 15.1178, + "step": 485110 + }, + { + "epoch": 0.9799730927572652, + "grad_norm": 384.9981384277344, + "learning_rate": 2.6954420391166468e-08, + "loss": 23.4295, + "step": 485120 + }, + { + "epoch": 0.979993293389949, + "grad_norm": 618.421630859375, + "learning_rate": 2.691823603680088e-08, + "loss": 32.6432, + "step": 485130 + }, + { + "epoch": 0.9800134940226328, + "grad_norm": 247.70474243164062, + "learning_rate": 2.6882075920632854e-08, + "loss": 16.8914, + "step": 485140 + }, + { + "epoch": 0.9800336946553166, + "grad_norm": 258.8690490722656, + "learning_rate": 2.684594004283836e-08, + "loss": 15.5106, + "step": 485150 + }, + { + "epoch": 0.9800538952880005, + "grad_norm": 534.8699340820312, + "learning_rate": 2.6809828403593363e-08, + "loss": 12.8656, + "step": 485160 + }, + { + "epoch": 0.9800740959206843, + "grad_norm": 45.22439956665039, + "learning_rate": 2.6773741003074394e-08, + "loss": 18.8632, + "step": 485170 + }, + { + "epoch": 0.9800942965533681, + "grad_norm": 718.5444946289062, + "learning_rate": 2.6737677841456867e-08, + "loss": 22.0892, + "step": 485180 + }, + { + "epoch": 0.9801144971860518, + "grad_norm": 481.2840576171875, + "learning_rate": 2.670163891891675e-08, + "loss": 17.4947, + "step": 485190 + }, + { + "epoch": 0.9801346978187356, + "grad_norm": 320.5677490234375, + "learning_rate": 2.6665624235629463e-08, + "loss": 22.502, + "step": 485200 + }, + { + "epoch": 0.9801548984514195, + "grad_norm": 519.1952514648438, + "learning_rate": 2.662963379177097e-08, + "loss": 13.4785, + "step": 485210 + }, + { + "epoch": 0.9801750990841033, + "grad_norm": 0.9457218647003174, + "learning_rate": 2.6593667587516693e-08, + "loss": 18.4401, + "step": 485220 + }, + { + "epoch": 0.9801952997167871, + "grad_norm": 231.81671142578125, + "learning_rate": 2.6557725623041487e-08, + "loss": 19.6856, + "step": 485230 + }, + { + "epoch": 0.9802155003494709, + "grad_norm": 233.8401641845703, + "learning_rate": 2.6521807898520214e-08, + "loss": 10.5556, + "step": 485240 + }, + { + "epoch": 0.9802357009821547, + "grad_norm": 323.9283142089844, + "learning_rate": 2.64859144141294e-08, + "loss": 10.9646, + "step": 485250 + }, + { + "epoch": 0.9802559016148386, + "grad_norm": 152.94651794433594, + "learning_rate": 2.6450045170042238e-08, + "loss": 16.6187, + "step": 485260 + }, + { + "epoch": 0.9802761022475224, + "grad_norm": 560.7903442382812, + "learning_rate": 2.6414200166434144e-08, + "loss": 31.5643, + "step": 485270 + }, + { + "epoch": 0.9802963028802062, + "grad_norm": 762.9198608398438, + "learning_rate": 2.6378379403480536e-08, + "loss": 21.9723, + "step": 485280 + }, + { + "epoch": 0.98031650351289, + "grad_norm": 159.2328643798828, + "learning_rate": 2.6342582881355717e-08, + "loss": 11.4329, + "step": 485290 + }, + { + "epoch": 0.9803367041455738, + "grad_norm": 495.5337829589844, + "learning_rate": 2.6306810600233435e-08, + "loss": 19.3928, + "step": 485300 + }, + { + "epoch": 0.9803569047782577, + "grad_norm": 289.5658264160156, + "learning_rate": 2.6271062560288552e-08, + "loss": 20.7861, + "step": 485310 + }, + { + "epoch": 0.9803771054109415, + "grad_norm": 768.4794311523438, + "learning_rate": 2.6235338761695372e-08, + "loss": 21.2778, + "step": 485320 + }, + { + "epoch": 0.9803973060436253, + "grad_norm": 307.91845703125, + "learning_rate": 2.6199639204628202e-08, + "loss": 15.3602, + "step": 485330 + }, + { + "epoch": 0.9804175066763091, + "grad_norm": 549.4125366210938, + "learning_rate": 2.6163963889260236e-08, + "loss": 22.524, + "step": 485340 + }, + { + "epoch": 0.9804377073089929, + "grad_norm": 22.162643432617188, + "learning_rate": 2.6128312815766332e-08, + "loss": 21.6474, + "step": 485350 + }, + { + "epoch": 0.9804579079416768, + "grad_norm": 628.3938598632812, + "learning_rate": 2.6092685984319134e-08, + "loss": 15.8834, + "step": 485360 + }, + { + "epoch": 0.9804781085743606, + "grad_norm": 195.3922119140625, + "learning_rate": 2.6057083395093495e-08, + "loss": 26.0357, + "step": 485370 + }, + { + "epoch": 0.9804983092070444, + "grad_norm": 303.665283203125, + "learning_rate": 2.6021505048262062e-08, + "loss": 25.6902, + "step": 485380 + }, + { + "epoch": 0.9805185098397282, + "grad_norm": 24.05523681640625, + "learning_rate": 2.5985950943999137e-08, + "loss": 12.9914, + "step": 485390 + }, + { + "epoch": 0.980538710472412, + "grad_norm": 269.62017822265625, + "learning_rate": 2.5950421082476805e-08, + "loss": 13.2076, + "step": 485400 + }, + { + "epoch": 0.9805589111050959, + "grad_norm": 341.3332824707031, + "learning_rate": 2.5914915463868816e-08, + "loss": 21.3219, + "step": 485410 + }, + { + "epoch": 0.9805791117377797, + "grad_norm": 0.0197820533066988, + "learning_rate": 2.5879434088348364e-08, + "loss": 18.7422, + "step": 485420 + }, + { + "epoch": 0.9805993123704635, + "grad_norm": 641.1902465820312, + "learning_rate": 2.584397695608809e-08, + "loss": 35.1817, + "step": 485430 + }, + { + "epoch": 0.9806195130031473, + "grad_norm": 282.08807373046875, + "learning_rate": 2.580854406726174e-08, + "loss": 9.3104, + "step": 485440 + }, + { + "epoch": 0.980639713635831, + "grad_norm": 57.96085739135742, + "learning_rate": 2.5773135422040296e-08, + "loss": 11.5754, + "step": 485450 + }, + { + "epoch": 0.9806599142685148, + "grad_norm": 624.4923095703125, + "learning_rate": 2.5737751020598057e-08, + "loss": 10.0956, + "step": 485460 + }, + { + "epoch": 0.9806801149011987, + "grad_norm": 274.8274230957031, + "learning_rate": 2.5702390863105996e-08, + "loss": 8.2575, + "step": 485470 + }, + { + "epoch": 0.9807003155338825, + "grad_norm": 454.69952392578125, + "learning_rate": 2.5667054949737315e-08, + "loss": 13.499, + "step": 485480 + }, + { + "epoch": 0.9807205161665663, + "grad_norm": 227.89601135253906, + "learning_rate": 2.5631743280664643e-08, + "loss": 18.8259, + "step": 485490 + }, + { + "epoch": 0.9807407167992501, + "grad_norm": 448.7703552246094, + "learning_rate": 2.5596455856058966e-08, + "loss": 21.568, + "step": 485500 + }, + { + "epoch": 0.9807609174319339, + "grad_norm": 465.294921875, + "learning_rate": 2.556119267609347e-08, + "loss": 13.4545, + "step": 485510 + }, + { + "epoch": 0.9807811180646178, + "grad_norm": 369.0829772949219, + "learning_rate": 2.552595374093858e-08, + "loss": 16.9684, + "step": 485520 + }, + { + "epoch": 0.9808013186973016, + "grad_norm": 295.7125549316406, + "learning_rate": 2.5490739050767488e-08, + "loss": 10.4524, + "step": 485530 + }, + { + "epoch": 0.9808215193299854, + "grad_norm": 500.7685241699219, + "learning_rate": 2.5455548605751167e-08, + "loss": 32.5881, + "step": 485540 + }, + { + "epoch": 0.9808417199626692, + "grad_norm": 364.6302490234375, + "learning_rate": 2.5420382406060595e-08, + "loss": 26.903, + "step": 485550 + }, + { + "epoch": 0.980861920595353, + "grad_norm": 335.4365234375, + "learning_rate": 2.5385240451867853e-08, + "loss": 17.7864, + "step": 485560 + }, + { + "epoch": 0.9808821212280369, + "grad_norm": 441.4403381347656, + "learning_rate": 2.5350122743344476e-08, + "loss": 19.7877, + "step": 485570 + }, + { + "epoch": 0.9809023218607207, + "grad_norm": 378.8863220214844, + "learning_rate": 2.531502928066143e-08, + "loss": 12.3159, + "step": 485580 + }, + { + "epoch": 0.9809225224934045, + "grad_norm": 380.1937561035156, + "learning_rate": 2.527996006398914e-08, + "loss": 7.871, + "step": 485590 + }, + { + "epoch": 0.9809427231260883, + "grad_norm": 404.0863037109375, + "learning_rate": 2.5244915093499134e-08, + "loss": 36.1998, + "step": 485600 + }, + { + "epoch": 0.9809629237587721, + "grad_norm": 184.4862823486328, + "learning_rate": 2.5209894369362386e-08, + "loss": 13.5838, + "step": 485610 + }, + { + "epoch": 0.980983124391456, + "grad_norm": 244.2938232421875, + "learning_rate": 2.5174897891748762e-08, + "loss": 23.7825, + "step": 485620 + }, + { + "epoch": 0.9810033250241398, + "grad_norm": 220.58428955078125, + "learning_rate": 2.5139925660829233e-08, + "loss": 19.5785, + "step": 485630 + }, + { + "epoch": 0.9810235256568236, + "grad_norm": 194.8220672607422, + "learning_rate": 2.5104977676774777e-08, + "loss": 18.7257, + "step": 485640 + }, + { + "epoch": 0.9810437262895074, + "grad_norm": 316.3087158203125, + "learning_rate": 2.5070053939754702e-08, + "loss": 12.6183, + "step": 485650 + }, + { + "epoch": 0.9810639269221912, + "grad_norm": 525.3484497070312, + "learning_rate": 2.5035154449940535e-08, + "loss": 27.6374, + "step": 485660 + }, + { + "epoch": 0.9810841275548751, + "grad_norm": 691.3954467773438, + "learning_rate": 2.500027920750103e-08, + "loss": 16.5734, + "step": 485670 + }, + { + "epoch": 0.9811043281875589, + "grad_norm": 410.7261047363281, + "learning_rate": 2.496542821260717e-08, + "loss": 12.7947, + "step": 485680 + }, + { + "epoch": 0.9811245288202427, + "grad_norm": 172.7108154296875, + "learning_rate": 2.493060146542825e-08, + "loss": 22.6328, + "step": 485690 + }, + { + "epoch": 0.9811447294529264, + "grad_norm": 403.85003662109375, + "learning_rate": 2.489579896613359e-08, + "loss": 29.9945, + "step": 485700 + }, + { + "epoch": 0.9811649300856102, + "grad_norm": 175.58148193359375, + "learning_rate": 2.4861020714894156e-08, + "loss": 10.1164, + "step": 485710 + }, + { + "epoch": 0.981185130718294, + "grad_norm": 210.02651977539062, + "learning_rate": 2.482626671187871e-08, + "loss": 9.6241, + "step": 485720 + }, + { + "epoch": 0.9812053313509779, + "grad_norm": 445.175537109375, + "learning_rate": 2.4791536957256e-08, + "loss": 14.9926, + "step": 485730 + }, + { + "epoch": 0.9812255319836617, + "grad_norm": 885.3142700195312, + "learning_rate": 2.4756831451196452e-08, + "loss": 26.4219, + "step": 485740 + }, + { + "epoch": 0.9812457326163455, + "grad_norm": 28.76207733154297, + "learning_rate": 2.472215019386881e-08, + "loss": 15.5411, + "step": 485750 + }, + { + "epoch": 0.9812659332490293, + "grad_norm": 260.98870849609375, + "learning_rate": 2.4687493185441836e-08, + "loss": 25.6906, + "step": 485760 + }, + { + "epoch": 0.9812861338817132, + "grad_norm": 369.857421875, + "learning_rate": 2.4652860426084278e-08, + "loss": 23.6489, + "step": 485770 + }, + { + "epoch": 0.981306334514397, + "grad_norm": 128.75570678710938, + "learning_rate": 2.46182519159649e-08, + "loss": 10.6912, + "step": 485780 + }, + { + "epoch": 0.9813265351470808, + "grad_norm": 120.6553955078125, + "learning_rate": 2.458366765525355e-08, + "loss": 9.2437, + "step": 485790 + }, + { + "epoch": 0.9813467357797646, + "grad_norm": 560.5294189453125, + "learning_rate": 2.4549107644117888e-08, + "loss": 17.972, + "step": 485800 + }, + { + "epoch": 0.9813669364124484, + "grad_norm": 62.803314208984375, + "learning_rate": 2.4514571882726102e-08, + "loss": 16.2346, + "step": 485810 + }, + { + "epoch": 0.9813871370451323, + "grad_norm": 596.3590087890625, + "learning_rate": 2.448006037124695e-08, + "loss": 23.8473, + "step": 485820 + }, + { + "epoch": 0.9814073376778161, + "grad_norm": 491.07147216796875, + "learning_rate": 2.444557310984863e-08, + "loss": 16.2163, + "step": 485830 + }, + { + "epoch": 0.9814275383104999, + "grad_norm": 512.9915771484375, + "learning_rate": 2.441111009869879e-08, + "loss": 25.4548, + "step": 485840 + }, + { + "epoch": 0.9814477389431837, + "grad_norm": 128.34317016601562, + "learning_rate": 2.4376671337966174e-08, + "loss": 16.7849, + "step": 485850 + }, + { + "epoch": 0.9814679395758675, + "grad_norm": 383.1306457519531, + "learning_rate": 2.434225682781788e-08, + "loss": 21.5805, + "step": 485860 + }, + { + "epoch": 0.9814881402085514, + "grad_norm": 706.0603637695312, + "learning_rate": 2.43078665684221e-08, + "loss": 44.5088, + "step": 485870 + }, + { + "epoch": 0.9815083408412352, + "grad_norm": 494.68304443359375, + "learning_rate": 2.427350055994593e-08, + "loss": 30.7003, + "step": 485880 + }, + { + "epoch": 0.981528541473919, + "grad_norm": 279.5955505371094, + "learning_rate": 2.423915880255756e-08, + "loss": 15.2497, + "step": 485890 + }, + { + "epoch": 0.9815487421066028, + "grad_norm": 201.189697265625, + "learning_rate": 2.4204841296424086e-08, + "loss": 13.9122, + "step": 485900 + }, + { + "epoch": 0.9815689427392866, + "grad_norm": 508.57025146484375, + "learning_rate": 2.4170548041712594e-08, + "loss": 19.3764, + "step": 485910 + }, + { + "epoch": 0.9815891433719705, + "grad_norm": 351.7825622558594, + "learning_rate": 2.4136279038590727e-08, + "loss": 17.9565, + "step": 485920 + }, + { + "epoch": 0.9816093440046543, + "grad_norm": 316.19219970703125, + "learning_rate": 2.4102034287224462e-08, + "loss": 11.2892, + "step": 485930 + }, + { + "epoch": 0.9816295446373381, + "grad_norm": 542.6669311523438, + "learning_rate": 2.4067813787782e-08, + "loss": 24.9015, + "step": 485940 + }, + { + "epoch": 0.9816497452700219, + "grad_norm": 197.28306579589844, + "learning_rate": 2.403361754042932e-08, + "loss": 19.147, + "step": 485950 + }, + { + "epoch": 0.9816699459027056, + "grad_norm": 50.71059799194336, + "learning_rate": 2.3999445545332955e-08, + "loss": 16.8103, + "step": 485960 + }, + { + "epoch": 0.9816901465353894, + "grad_norm": 501.495361328125, + "learning_rate": 2.3965297802659993e-08, + "loss": 17.9184, + "step": 485970 + }, + { + "epoch": 0.9817103471680733, + "grad_norm": 310.5088195800781, + "learning_rate": 2.3931174312576966e-08, + "loss": 20.8237, + "step": 485980 + }, + { + "epoch": 0.9817305478007571, + "grad_norm": 262.6496276855469, + "learning_rate": 2.3897075075249298e-08, + "loss": 16.2967, + "step": 485990 + }, + { + "epoch": 0.9817507484334409, + "grad_norm": 183.33282470703125, + "learning_rate": 2.386300009084408e-08, + "loss": 19.5586, + "step": 486000 + }, + { + "epoch": 0.9817709490661247, + "grad_norm": 26.03966522216797, + "learning_rate": 2.382894935952729e-08, + "loss": 16.2692, + "step": 486010 + }, + { + "epoch": 0.9817911496988085, + "grad_norm": 364.26153564453125, + "learning_rate": 2.3794922881464344e-08, + "loss": 20.1034, + "step": 486020 + }, + { + "epoch": 0.9818113503314924, + "grad_norm": 109.6334457397461, + "learning_rate": 2.3760920656821228e-08, + "loss": 15.8652, + "step": 486030 + }, + { + "epoch": 0.9818315509641762, + "grad_norm": 449.1590270996094, + "learning_rate": 2.3726942685764474e-08, + "loss": 23.835, + "step": 486040 + }, + { + "epoch": 0.98185175159686, + "grad_norm": 94.95097351074219, + "learning_rate": 2.3692988968458398e-08, + "loss": 33.35, + "step": 486050 + }, + { + "epoch": 0.9818719522295438, + "grad_norm": 266.3463439941406, + "learning_rate": 2.3659059505069526e-08, + "loss": 25.8023, + "step": 486060 + }, + { + "epoch": 0.9818921528622276, + "grad_norm": 73.2273941040039, + "learning_rate": 2.362515429576273e-08, + "loss": 14.0591, + "step": 486070 + }, + { + "epoch": 0.9819123534949115, + "grad_norm": 332.509521484375, + "learning_rate": 2.3591273340703436e-08, + "loss": 12.0447, + "step": 486080 + }, + { + "epoch": 0.9819325541275953, + "grad_norm": 1333.14501953125, + "learning_rate": 2.3557416640056507e-08, + "loss": 16.5212, + "step": 486090 + }, + { + "epoch": 0.9819527547602791, + "grad_norm": 227.93136596679688, + "learning_rate": 2.3523584193986816e-08, + "loss": 19.2178, + "step": 486100 + }, + { + "epoch": 0.9819729553929629, + "grad_norm": 705.9186401367188, + "learning_rate": 2.3489776002660337e-08, + "loss": 19.5316, + "step": 486110 + }, + { + "epoch": 0.9819931560256467, + "grad_norm": 301.1523742675781, + "learning_rate": 2.3455992066240828e-08, + "loss": 32.5375, + "step": 486120 + }, + { + "epoch": 0.9820133566583306, + "grad_norm": 323.9628601074219, + "learning_rate": 2.342223238489316e-08, + "loss": 24.6499, + "step": 486130 + }, + { + "epoch": 0.9820335572910144, + "grad_norm": 148.93313598632812, + "learning_rate": 2.3388496958782203e-08, + "loss": 19.5107, + "step": 486140 + }, + { + "epoch": 0.9820537579236982, + "grad_norm": 247.03475952148438, + "learning_rate": 2.3354785788072265e-08, + "loss": 9.7356, + "step": 486150 + }, + { + "epoch": 0.982073958556382, + "grad_norm": 1135.49951171875, + "learning_rate": 2.3321098872927107e-08, + "loss": 39.8069, + "step": 486160 + }, + { + "epoch": 0.9820941591890658, + "grad_norm": 683.43603515625, + "learning_rate": 2.3287436213511038e-08, + "loss": 15.9695, + "step": 486170 + }, + { + "epoch": 0.9821143598217497, + "grad_norm": 344.6324462890625, + "learning_rate": 2.3253797809988933e-08, + "loss": 15.9136, + "step": 486180 + }, + { + "epoch": 0.9821345604544335, + "grad_norm": 361.53643798828125, + "learning_rate": 2.3220183662523986e-08, + "loss": 22.6791, + "step": 486190 + }, + { + "epoch": 0.9821547610871173, + "grad_norm": 196.98980712890625, + "learning_rate": 2.3186593771280518e-08, + "loss": 6.6145, + "step": 486200 + }, + { + "epoch": 0.982174961719801, + "grad_norm": 414.0221252441406, + "learning_rate": 2.3153028136421728e-08, + "loss": 18.1399, + "step": 486210 + }, + { + "epoch": 0.9821951623524848, + "grad_norm": 442.22125244140625, + "learning_rate": 2.3119486758111375e-08, + "loss": 11.9448, + "step": 486220 + }, + { + "epoch": 0.9822153629851686, + "grad_norm": 446.5096435546875, + "learning_rate": 2.3085969636513217e-08, + "loss": 15.7097, + "step": 486230 + }, + { + "epoch": 0.9822355636178525, + "grad_norm": 0.0, + "learning_rate": 2.3052476771790454e-08, + "loss": 9.7731, + "step": 486240 + }, + { + "epoch": 0.9822557642505363, + "grad_norm": 379.91375732421875, + "learning_rate": 2.301900816410574e-08, + "loss": 13.1205, + "step": 486250 + }, + { + "epoch": 0.9822759648832201, + "grad_norm": 390.3697814941406, + "learning_rate": 2.2985563813623378e-08, + "loss": 16.514, + "step": 486260 + }, + { + "epoch": 0.9822961655159039, + "grad_norm": 65.93146514892578, + "learning_rate": 2.295214372050547e-08, + "loss": 12.4505, + "step": 486270 + }, + { + "epoch": 0.9823163661485877, + "grad_norm": 382.8099365234375, + "learning_rate": 2.2918747884915216e-08, + "loss": 19.6996, + "step": 486280 + }, + { + "epoch": 0.9823365667812716, + "grad_norm": 591.4705200195312, + "learning_rate": 2.2885376307015817e-08, + "loss": 23.1165, + "step": 486290 + }, + { + "epoch": 0.9823567674139554, + "grad_norm": 466.58245849609375, + "learning_rate": 2.285202898696881e-08, + "loss": 18.6542, + "step": 486300 + }, + { + "epoch": 0.9823769680466392, + "grad_norm": 687.158935546875, + "learning_rate": 2.2818705924937402e-08, + "loss": 18.8722, + "step": 486310 + }, + { + "epoch": 0.982397168679323, + "grad_norm": 349.258056640625, + "learning_rate": 2.2785407121084236e-08, + "loss": 20.9059, + "step": 486320 + }, + { + "epoch": 0.9824173693120068, + "grad_norm": 295.1773376464844, + "learning_rate": 2.2752132575570852e-08, + "loss": 24.1587, + "step": 486330 + }, + { + "epoch": 0.9824375699446907, + "grad_norm": 432.3294372558594, + "learning_rate": 2.271888228856045e-08, + "loss": 17.8731, + "step": 486340 + }, + { + "epoch": 0.9824577705773745, + "grad_norm": 789.3005981445312, + "learning_rate": 2.268565626021457e-08, + "loss": 47.5746, + "step": 486350 + }, + { + "epoch": 0.9824779712100583, + "grad_norm": 260.5862121582031, + "learning_rate": 2.2652454490694752e-08, + "loss": 19.524, + "step": 486360 + }, + { + "epoch": 0.9824981718427421, + "grad_norm": 43.713470458984375, + "learning_rate": 2.261927698016364e-08, + "loss": 12.7472, + "step": 486370 + }, + { + "epoch": 0.982518372475426, + "grad_norm": 740.96826171875, + "learning_rate": 2.2586123728781663e-08, + "loss": 18.64, + "step": 486380 + }, + { + "epoch": 0.9825385731081098, + "grad_norm": 378.1340637207031, + "learning_rate": 2.255299473671202e-08, + "loss": 19.8248, + "step": 486390 + }, + { + "epoch": 0.9825587737407936, + "grad_norm": 174.594970703125, + "learning_rate": 2.251989000411514e-08, + "loss": 25.4638, + "step": 486400 + }, + { + "epoch": 0.9825789743734774, + "grad_norm": 139.85891723632812, + "learning_rate": 2.2486809531152563e-08, + "loss": 9.0486, + "step": 486410 + }, + { + "epoch": 0.9825991750061612, + "grad_norm": 152.03848266601562, + "learning_rate": 2.2453753317985272e-08, + "loss": 20.0999, + "step": 486420 + }, + { + "epoch": 0.982619375638845, + "grad_norm": 287.6670227050781, + "learning_rate": 2.2420721364775354e-08, + "loss": 25.8652, + "step": 486430 + }, + { + "epoch": 0.9826395762715289, + "grad_norm": 425.5420837402344, + "learning_rate": 2.2387713671682687e-08, + "loss": 25.9609, + "step": 486440 + }, + { + "epoch": 0.9826597769042127, + "grad_norm": 232.87928771972656, + "learning_rate": 2.2354730238868804e-08, + "loss": 21.0022, + "step": 486450 + }, + { + "epoch": 0.9826799775368965, + "grad_norm": 308.5003662109375, + "learning_rate": 2.2321771066494137e-08, + "loss": 13.2866, + "step": 486460 + }, + { + "epoch": 0.9827001781695802, + "grad_norm": 1615.3829345703125, + "learning_rate": 2.2288836154719663e-08, + "loss": 24.9597, + "step": 486470 + }, + { + "epoch": 0.982720378802264, + "grad_norm": 311.37188720703125, + "learning_rate": 2.2255925503705255e-08, + "loss": 13.8833, + "step": 486480 + }, + { + "epoch": 0.9827405794349479, + "grad_norm": 163.70361328125, + "learning_rate": 2.22230391136119e-08, + "loss": 8.973, + "step": 486490 + }, + { + "epoch": 0.9827607800676317, + "grad_norm": 533.0966796875, + "learning_rate": 2.219017698460002e-08, + "loss": 19.1744, + "step": 486500 + }, + { + "epoch": 0.9827809807003155, + "grad_norm": 320.56341552734375, + "learning_rate": 2.215733911682949e-08, + "loss": 12.5872, + "step": 486510 + }, + { + "epoch": 0.9828011813329993, + "grad_norm": 284.896728515625, + "learning_rate": 2.2124525510459627e-08, + "loss": 13.3462, + "step": 486520 + }, + { + "epoch": 0.9828213819656831, + "grad_norm": 89.13407897949219, + "learning_rate": 2.2091736165651966e-08, + "loss": 5.4464, + "step": 486530 + }, + { + "epoch": 0.982841582598367, + "grad_norm": 368.83697509765625, + "learning_rate": 2.205897108256472e-08, + "loss": 29.8207, + "step": 486540 + }, + { + "epoch": 0.9828617832310508, + "grad_norm": 172.1982879638672, + "learning_rate": 2.202623026135886e-08, + "loss": 14.112, + "step": 486550 + }, + { + "epoch": 0.9828819838637346, + "grad_norm": 460.03369140625, + "learning_rate": 2.1993513702193157e-08, + "loss": 17.7389, + "step": 486560 + }, + { + "epoch": 0.9829021844964184, + "grad_norm": 219.40406799316406, + "learning_rate": 2.1960821405226928e-08, + "loss": 18.4654, + "step": 486570 + }, + { + "epoch": 0.9829223851291022, + "grad_norm": 308.46697998046875, + "learning_rate": 2.1928153370620598e-08, + "loss": 20.2735, + "step": 486580 + }, + { + "epoch": 0.9829425857617861, + "grad_norm": 279.93634033203125, + "learning_rate": 2.1895509598532372e-08, + "loss": 21.5676, + "step": 486590 + }, + { + "epoch": 0.9829627863944699, + "grad_norm": 167.79373168945312, + "learning_rate": 2.1862890089121567e-08, + "loss": 24.1097, + "step": 486600 + }, + { + "epoch": 0.9829829870271537, + "grad_norm": 129.95025634765625, + "learning_rate": 2.1830294842547506e-08, + "loss": 8.3408, + "step": 486610 + }, + { + "epoch": 0.9830031876598375, + "grad_norm": 51.695579528808594, + "learning_rate": 2.1797723858968388e-08, + "loss": 11.7549, + "step": 486620 + }, + { + "epoch": 0.9830233882925213, + "grad_norm": 771.5540161132812, + "learning_rate": 2.1765177138543535e-08, + "loss": 21.0965, + "step": 486630 + }, + { + "epoch": 0.9830435889252052, + "grad_norm": 1035.76416015625, + "learning_rate": 2.173265468143171e-08, + "loss": 35.0378, + "step": 486640 + }, + { + "epoch": 0.983063789557889, + "grad_norm": 270.08856201171875, + "learning_rate": 2.1700156487790557e-08, + "loss": 13.1302, + "step": 486650 + }, + { + "epoch": 0.9830839901905728, + "grad_norm": 140.2735137939453, + "learning_rate": 2.1667682557779958e-08, + "loss": 21.5211, + "step": 486660 + }, + { + "epoch": 0.9831041908232566, + "grad_norm": 441.3387451171875, + "learning_rate": 2.1635232891556446e-08, + "loss": 11.5147, + "step": 486670 + }, + { + "epoch": 0.9831243914559404, + "grad_norm": 725.987548828125, + "learning_rate": 2.1602807489279344e-08, + "loss": 14.4808, + "step": 486680 + }, + { + "epoch": 0.9831445920886243, + "grad_norm": 652.3868408203125, + "learning_rate": 2.1570406351106298e-08, + "loss": 20.0834, + "step": 486690 + }, + { + "epoch": 0.9831647927213081, + "grad_norm": 388.7825012207031, + "learning_rate": 2.1538029477195522e-08, + "loss": 15.2553, + "step": 486700 + }, + { + "epoch": 0.9831849933539919, + "grad_norm": 134.82530212402344, + "learning_rate": 2.1505676867704105e-08, + "loss": 16.7642, + "step": 486710 + }, + { + "epoch": 0.9832051939866756, + "grad_norm": 281.6883850097656, + "learning_rate": 2.1473348522790262e-08, + "loss": 14.2663, + "step": 486720 + }, + { + "epoch": 0.9832253946193594, + "grad_norm": 546.5551147460938, + "learning_rate": 2.1441044442611634e-08, + "loss": 19.5316, + "step": 486730 + }, + { + "epoch": 0.9832455952520432, + "grad_norm": 41.95939254760742, + "learning_rate": 2.1408764627325883e-08, + "loss": 16.1222, + "step": 486740 + }, + { + "epoch": 0.9832657958847271, + "grad_norm": 326.4468994140625, + "learning_rate": 2.1376509077089546e-08, + "loss": 17.1573, + "step": 486750 + }, + { + "epoch": 0.9832859965174109, + "grad_norm": 345.43109130859375, + "learning_rate": 2.1344277792060275e-08, + "loss": 12.0682, + "step": 486760 + }, + { + "epoch": 0.9833061971500947, + "grad_norm": 158.94752502441406, + "learning_rate": 2.1312070772395165e-08, + "loss": 15.5515, + "step": 486770 + }, + { + "epoch": 0.9833263977827785, + "grad_norm": 134.98707580566406, + "learning_rate": 2.1279888018251317e-08, + "loss": 21.1968, + "step": 486780 + }, + { + "epoch": 0.9833465984154623, + "grad_norm": 548.9357299804688, + "learning_rate": 2.1247729529785822e-08, + "loss": 19.2239, + "step": 486790 + }, + { + "epoch": 0.9833667990481462, + "grad_norm": 36.52518844604492, + "learning_rate": 2.1215595307154667e-08, + "loss": 16.6772, + "step": 486800 + }, + { + "epoch": 0.98338699968083, + "grad_norm": 527.3377685546875, + "learning_rate": 2.1183485350514397e-08, + "loss": 12.0839, + "step": 486810 + }, + { + "epoch": 0.9834072003135138, + "grad_norm": 447.6247863769531, + "learning_rate": 2.1151399660022664e-08, + "loss": 16.5998, + "step": 486820 + }, + { + "epoch": 0.9834274009461976, + "grad_norm": 492.91900634765625, + "learning_rate": 2.1119338235834897e-08, + "loss": 33.839, + "step": 486830 + }, + { + "epoch": 0.9834476015788814, + "grad_norm": 192.20407104492188, + "learning_rate": 2.1087301078107637e-08, + "loss": 19.5072, + "step": 486840 + }, + { + "epoch": 0.9834678022115653, + "grad_norm": 317.2949523925781, + "learning_rate": 2.105528818699687e-08, + "loss": 15.7755, + "step": 486850 + }, + { + "epoch": 0.9834880028442491, + "grad_norm": 411.9183654785156, + "learning_rate": 2.1023299562658584e-08, + "loss": 13.3417, + "step": 486860 + }, + { + "epoch": 0.9835082034769329, + "grad_norm": 727.0975341796875, + "learning_rate": 2.0991335205249318e-08, + "loss": 17.7422, + "step": 486870 + }, + { + "epoch": 0.9835284041096167, + "grad_norm": 776.1140747070312, + "learning_rate": 2.0959395114923954e-08, + "loss": 19.5978, + "step": 486880 + }, + { + "epoch": 0.9835486047423005, + "grad_norm": 1540.5836181640625, + "learning_rate": 2.0927479291839024e-08, + "loss": 17.3088, + "step": 486890 + }, + { + "epoch": 0.9835688053749844, + "grad_norm": 350.5865478515625, + "learning_rate": 2.0895587736149414e-08, + "loss": 18.9651, + "step": 486900 + }, + { + "epoch": 0.9835890060076682, + "grad_norm": 674.6132202148438, + "learning_rate": 2.0863720448011106e-08, + "loss": 13.8975, + "step": 486910 + }, + { + "epoch": 0.983609206640352, + "grad_norm": 659.1400146484375, + "learning_rate": 2.0831877427578974e-08, + "loss": 19.0174, + "step": 486920 + }, + { + "epoch": 0.9836294072730358, + "grad_norm": 532.0677490234375, + "learning_rate": 2.0800058675007894e-08, + "loss": 26.3079, + "step": 486930 + }, + { + "epoch": 0.9836496079057196, + "grad_norm": 321.0643615722656, + "learning_rate": 2.076826419045386e-08, + "loss": 14.3447, + "step": 486940 + }, + { + "epoch": 0.9836698085384035, + "grad_norm": 227.18014526367188, + "learning_rate": 2.0736493974071736e-08, + "loss": 18.2144, + "step": 486950 + }, + { + "epoch": 0.9836900091710873, + "grad_norm": 471.3290100097656, + "learning_rate": 2.0704748026015298e-08, + "loss": 12.0176, + "step": 486960 + }, + { + "epoch": 0.9837102098037711, + "grad_norm": 331.65252685546875, + "learning_rate": 2.0673026346440526e-08, + "loss": 21.1374, + "step": 486970 + }, + { + "epoch": 0.9837304104364548, + "grad_norm": 591.6358642578125, + "learning_rate": 2.0641328935501748e-08, + "loss": 35.3378, + "step": 486980 + }, + { + "epoch": 0.9837506110691386, + "grad_norm": 253.2311248779297, + "learning_rate": 2.0609655793352724e-08, + "loss": 15.4262, + "step": 486990 + }, + { + "epoch": 0.9837708117018225, + "grad_norm": 738.0919799804688, + "learning_rate": 2.057800692014833e-08, + "loss": 20.5693, + "step": 487000 + }, + { + "epoch": 0.9837910123345063, + "grad_norm": 395.4708251953125, + "learning_rate": 2.054638231604289e-08, + "loss": 22.3301, + "step": 487010 + }, + { + "epoch": 0.9838112129671901, + "grad_norm": 770.7337646484375, + "learning_rate": 2.051478198119017e-08, + "loss": 25.5808, + "step": 487020 + }, + { + "epoch": 0.9838314135998739, + "grad_norm": 190.54969787597656, + "learning_rate": 2.0483205915745042e-08, + "loss": 23.1655, + "step": 487030 + }, + { + "epoch": 0.9838516142325577, + "grad_norm": 382.62115478515625, + "learning_rate": 2.0451654119860164e-08, + "loss": 16.6641, + "step": 487040 + }, + { + "epoch": 0.9838718148652416, + "grad_norm": 330.2117919921875, + "learning_rate": 2.0420126593690416e-08, + "loss": 9.8796, + "step": 487050 + }, + { + "epoch": 0.9838920154979254, + "grad_norm": 238.4560546875, + "learning_rate": 2.0388623337389003e-08, + "loss": 28.3645, + "step": 487060 + }, + { + "epoch": 0.9839122161306092, + "grad_norm": 222.7383270263672, + "learning_rate": 2.0357144351109693e-08, + "loss": 11.7348, + "step": 487070 + }, + { + "epoch": 0.983932416763293, + "grad_norm": 1798.53515625, + "learning_rate": 2.0325689635005142e-08, + "loss": 24.3655, + "step": 487080 + }, + { + "epoch": 0.9839526173959768, + "grad_norm": 191.74282836914062, + "learning_rate": 2.029425918922967e-08, + "loss": 15.1017, + "step": 487090 + }, + { + "epoch": 0.9839728180286607, + "grad_norm": 223.9886474609375, + "learning_rate": 2.026285301393538e-08, + "loss": 13.2002, + "step": 487100 + }, + { + "epoch": 0.9839930186613445, + "grad_norm": 115.09986114501953, + "learning_rate": 2.023147110927659e-08, + "loss": 12.3963, + "step": 487110 + }, + { + "epoch": 0.9840132192940283, + "grad_norm": 253.5218048095703, + "learning_rate": 2.020011347540596e-08, + "loss": 15.6474, + "step": 487120 + }, + { + "epoch": 0.9840334199267121, + "grad_norm": 283.21771240234375, + "learning_rate": 2.016878011247503e-08, + "loss": 22.0459, + "step": 487130 + }, + { + "epoch": 0.9840536205593959, + "grad_norm": 81.14229583740234, + "learning_rate": 2.013747102063812e-08, + "loss": 11.7839, + "step": 487140 + }, + { + "epoch": 0.9840738211920798, + "grad_norm": 506.94976806640625, + "learning_rate": 2.010618620004734e-08, + "loss": 18.794, + "step": 487150 + }, + { + "epoch": 0.9840940218247636, + "grad_norm": 214.54312133789062, + "learning_rate": 2.0074925650854226e-08, + "loss": 22.0953, + "step": 487160 + }, + { + "epoch": 0.9841142224574474, + "grad_norm": 90.94629669189453, + "learning_rate": 2.004368937321255e-08, + "loss": 14.7933, + "step": 487170 + }, + { + "epoch": 0.9841344230901312, + "grad_norm": 354.2481384277344, + "learning_rate": 2.0012477367273854e-08, + "loss": 12.9025, + "step": 487180 + }, + { + "epoch": 0.984154623722815, + "grad_norm": 485.8455810546875, + "learning_rate": 1.9981289633190237e-08, + "loss": 18.0831, + "step": 487190 + }, + { + "epoch": 0.9841748243554989, + "grad_norm": 326.9634094238281, + "learning_rate": 1.995012617111436e-08, + "loss": 11.4334, + "step": 487200 + }, + { + "epoch": 0.9841950249881827, + "grad_norm": 440.474853515625, + "learning_rate": 1.9918986981196653e-08, + "loss": 22.803, + "step": 487210 + }, + { + "epoch": 0.9842152256208665, + "grad_norm": 384.60382080078125, + "learning_rate": 1.988787206359033e-08, + "loss": 17.9767, + "step": 487220 + }, + { + "epoch": 0.9842354262535503, + "grad_norm": 531.9924926757812, + "learning_rate": 1.985678141844638e-08, + "loss": 17.9372, + "step": 487230 + }, + { + "epoch": 0.984255626886234, + "grad_norm": 341.8916015625, + "learning_rate": 1.9825715045916905e-08, + "loss": 18.8524, + "step": 487240 + }, + { + "epoch": 0.9842758275189178, + "grad_norm": 188.14895629882812, + "learning_rate": 1.9794672946152337e-08, + "loss": 19.2394, + "step": 487250 + }, + { + "epoch": 0.9842960281516017, + "grad_norm": 470.08599853515625, + "learning_rate": 1.9763655119304227e-08, + "loss": 13.1318, + "step": 487260 + }, + { + "epoch": 0.9843162287842855, + "grad_norm": 235.66885375976562, + "learning_rate": 1.973266156552467e-08, + "loss": 26.0032, + "step": 487270 + }, + { + "epoch": 0.9843364294169693, + "grad_norm": 756.0232543945312, + "learning_rate": 1.9701692284963547e-08, + "loss": 20.216, + "step": 487280 + }, + { + "epoch": 0.9843566300496531, + "grad_norm": 1019.5380859375, + "learning_rate": 1.967074727777296e-08, + "loss": 27.1592, + "step": 487290 + }, + { + "epoch": 0.984376830682337, + "grad_norm": 10.018977165222168, + "learning_rate": 1.963982654410279e-08, + "loss": 11.0307, + "step": 487300 + }, + { + "epoch": 0.9843970313150208, + "grad_norm": 255.76251220703125, + "learning_rate": 1.9608930084104027e-08, + "loss": 12.4662, + "step": 487310 + }, + { + "epoch": 0.9844172319477046, + "grad_norm": 493.1605529785156, + "learning_rate": 1.9578057897927104e-08, + "loss": 20.6991, + "step": 487320 + }, + { + "epoch": 0.9844374325803884, + "grad_norm": 460.5899963378906, + "learning_rate": 1.9547209985723015e-08, + "loss": 16.1439, + "step": 487330 + }, + { + "epoch": 0.9844576332130722, + "grad_norm": 289.14593505859375, + "learning_rate": 1.9516386347641636e-08, + "loss": 28.3802, + "step": 487340 + }, + { + "epoch": 0.984477833845756, + "grad_norm": 64.37545776367188, + "learning_rate": 1.9485586983833404e-08, + "loss": 21.3151, + "step": 487350 + }, + { + "epoch": 0.9844980344784399, + "grad_norm": 271.99395751953125, + "learning_rate": 1.94548118944482e-08, + "loss": 7.3311, + "step": 487360 + }, + { + "epoch": 0.9845182351111237, + "grad_norm": 325.72216796875, + "learning_rate": 1.9424061079636458e-08, + "loss": 18.6132, + "step": 487370 + }, + { + "epoch": 0.9845384357438075, + "grad_norm": 546.6802368164062, + "learning_rate": 1.9393334539547505e-08, + "loss": 20.1622, + "step": 487380 + }, + { + "epoch": 0.9845586363764913, + "grad_norm": 691.5068969726562, + "learning_rate": 1.9362632274331215e-08, + "loss": 26.9569, + "step": 487390 + }, + { + "epoch": 0.9845788370091751, + "grad_norm": 243.03543090820312, + "learning_rate": 1.9331954284137476e-08, + "loss": 11.425, + "step": 487400 + }, + { + "epoch": 0.984599037641859, + "grad_norm": 403.1186828613281, + "learning_rate": 1.9301300569116165e-08, + "loss": 10.5166, + "step": 487410 + }, + { + "epoch": 0.9846192382745428, + "grad_norm": 403.91058349609375, + "learning_rate": 1.9270671129415496e-08, + "loss": 21.6073, + "step": 487420 + }, + { + "epoch": 0.9846394389072266, + "grad_norm": 202.64797973632812, + "learning_rate": 1.9240065965185907e-08, + "loss": 13.2823, + "step": 487430 + }, + { + "epoch": 0.9846596395399104, + "grad_norm": 330.4432373046875, + "learning_rate": 1.9209485076576718e-08, + "loss": 17.4824, + "step": 487440 + }, + { + "epoch": 0.9846798401725942, + "grad_norm": 745.1810302734375, + "learning_rate": 1.9178928463735593e-08, + "loss": 19.5667, + "step": 487450 + }, + { + "epoch": 0.9847000408052781, + "grad_norm": 0.0, + "learning_rate": 1.9148396126812407e-08, + "loss": 21.1232, + "step": 487460 + }, + { + "epoch": 0.9847202414379619, + "grad_norm": 289.8485107421875, + "learning_rate": 1.9117888065955938e-08, + "loss": 8.0293, + "step": 487470 + }, + { + "epoch": 0.9847404420706457, + "grad_norm": 286.2195129394531, + "learning_rate": 1.908740428131495e-08, + "loss": 14.2113, + "step": 487480 + }, + { + "epoch": 0.9847606427033294, + "grad_norm": 32.78451156616211, + "learning_rate": 1.9056944773037656e-08, + "loss": 13.1252, + "step": 487490 + }, + { + "epoch": 0.9847808433360132, + "grad_norm": 131.9490203857422, + "learning_rate": 1.9026509541272276e-08, + "loss": 11.4658, + "step": 487500 + }, + { + "epoch": 0.984801043968697, + "grad_norm": 550.3273315429688, + "learning_rate": 1.8996098586168132e-08, + "loss": 10.4393, + "step": 487510 + }, + { + "epoch": 0.9848212446013809, + "grad_norm": 242.36134338378906, + "learning_rate": 1.8965711907872885e-08, + "loss": 25.7361, + "step": 487520 + }, + { + "epoch": 0.9848414452340647, + "grad_norm": 413.4701232910156, + "learning_rate": 1.8935349506534195e-08, + "loss": 24.0202, + "step": 487530 + }, + { + "epoch": 0.9848616458667485, + "grad_norm": 672.5272827148438, + "learning_rate": 1.890501138230083e-08, + "loss": 31.6589, + "step": 487540 + }, + { + "epoch": 0.9848818464994323, + "grad_norm": 291.6829833984375, + "learning_rate": 1.8874697535319897e-08, + "loss": 14.4903, + "step": 487550 + }, + { + "epoch": 0.9849020471321162, + "grad_norm": 623.2794189453125, + "learning_rate": 1.8844407965740165e-08, + "loss": 18.8497, + "step": 487560 + }, + { + "epoch": 0.9849222477648, + "grad_norm": 336.7108154296875, + "learning_rate": 1.881414267370818e-08, + "loss": 26.6136, + "step": 487570 + }, + { + "epoch": 0.9849424483974838, + "grad_norm": 363.7168884277344, + "learning_rate": 1.8783901659372162e-08, + "loss": 12.3119, + "step": 487580 + }, + { + "epoch": 0.9849626490301676, + "grad_norm": 188.1642303466797, + "learning_rate": 1.875368492287921e-08, + "loss": 12.5046, + "step": 487590 + }, + { + "epoch": 0.9849828496628514, + "grad_norm": 541.2425537109375, + "learning_rate": 1.8723492464376992e-08, + "loss": 24.7602, + "step": 487600 + }, + { + "epoch": 0.9850030502955353, + "grad_norm": 335.880859375, + "learning_rate": 1.8693324284011495e-08, + "loss": 29.85, + "step": 487610 + }, + { + "epoch": 0.9850232509282191, + "grad_norm": 479.3641662597656, + "learning_rate": 1.8663180381931488e-08, + "loss": 18.8721, + "step": 487620 + }, + { + "epoch": 0.9850434515609029, + "grad_norm": 496.9161071777344, + "learning_rate": 1.8633060758282418e-08, + "loss": 17.1028, + "step": 487630 + }, + { + "epoch": 0.9850636521935867, + "grad_norm": 662.9967041015625, + "learning_rate": 1.860296541321138e-08, + "loss": 14.4051, + "step": 487640 + }, + { + "epoch": 0.9850838528262705, + "grad_norm": 183.82313537597656, + "learning_rate": 1.8572894346866043e-08, + "loss": 6.9085, + "step": 487650 + }, + { + "epoch": 0.9851040534589544, + "grad_norm": 275.2166442871094, + "learning_rate": 1.854284755939184e-08, + "loss": 33.5495, + "step": 487660 + }, + { + "epoch": 0.9851242540916382, + "grad_norm": 2.3112215995788574, + "learning_rate": 1.8512825050935323e-08, + "loss": 14.4201, + "step": 487670 + }, + { + "epoch": 0.985144454724322, + "grad_norm": 338.2336120605469, + "learning_rate": 1.8482826821643596e-08, + "loss": 35.2743, + "step": 487680 + }, + { + "epoch": 0.9851646553570058, + "grad_norm": 64.32235717773438, + "learning_rate": 1.8452852871662653e-08, + "loss": 7.4456, + "step": 487690 + }, + { + "epoch": 0.9851848559896896, + "grad_norm": 0.2854507565498352, + "learning_rate": 1.842290320113793e-08, + "loss": 22.3451, + "step": 487700 + }, + { + "epoch": 0.9852050566223735, + "grad_norm": 331.331787109375, + "learning_rate": 1.839297781021543e-08, + "loss": 25.7413, + "step": 487710 + }, + { + "epoch": 0.9852252572550573, + "grad_norm": 257.5212707519531, + "learning_rate": 1.8363076699041695e-08, + "loss": 16.15, + "step": 487720 + }, + { + "epoch": 0.9852454578877411, + "grad_norm": 472.3876953125, + "learning_rate": 1.8333199867762163e-08, + "loss": 15.8237, + "step": 487730 + }, + { + "epoch": 0.9852656585204249, + "grad_norm": 635.3434448242188, + "learning_rate": 1.830334731652228e-08, + "loss": 26.1093, + "step": 487740 + }, + { + "epoch": 0.9852858591531086, + "grad_norm": 5.066963195800781, + "learning_rate": 1.8273519045468035e-08, + "loss": 11.3581, + "step": 487750 + }, + { + "epoch": 0.9853060597857924, + "grad_norm": 303.07904052734375, + "learning_rate": 1.8243715054744315e-08, + "loss": 19.2844, + "step": 487760 + }, + { + "epoch": 0.9853262604184763, + "grad_norm": 323.21795654296875, + "learning_rate": 1.8213935344496002e-08, + "loss": 12.7054, + "step": 487770 + }, + { + "epoch": 0.9853464610511601, + "grad_norm": 187.547119140625, + "learning_rate": 1.8184179914869093e-08, + "loss": 10.2604, + "step": 487780 + }, + { + "epoch": 0.9853666616838439, + "grad_norm": 393.1107177734375, + "learning_rate": 1.815444876600847e-08, + "loss": 11.2483, + "step": 487790 + }, + { + "epoch": 0.9853868623165277, + "grad_norm": 156.0241241455078, + "learning_rate": 1.8124741898058462e-08, + "loss": 14.5017, + "step": 487800 + }, + { + "epoch": 0.9854070629492115, + "grad_norm": 185.0515899658203, + "learning_rate": 1.8095059311164508e-08, + "loss": 16.2474, + "step": 487810 + }, + { + "epoch": 0.9854272635818954, + "grad_norm": 177.43402099609375, + "learning_rate": 1.8065401005470938e-08, + "loss": 17.9147, + "step": 487820 + }, + { + "epoch": 0.9854474642145792, + "grad_norm": 549.4453125, + "learning_rate": 1.803576698112264e-08, + "loss": 18.9288, + "step": 487830 + }, + { + "epoch": 0.985467664847263, + "grad_norm": 317.1197509765625, + "learning_rate": 1.8006157238263376e-08, + "loss": 22.0972, + "step": 487840 + }, + { + "epoch": 0.9854878654799468, + "grad_norm": 302.7123107910156, + "learning_rate": 1.7976571777038044e-08, + "loss": 16.1826, + "step": 487850 + }, + { + "epoch": 0.9855080661126306, + "grad_norm": 477.897216796875, + "learning_rate": 1.7947010597590408e-08, + "loss": 7.3532, + "step": 487860 + }, + { + "epoch": 0.9855282667453145, + "grad_norm": 425.7860412597656, + "learning_rate": 1.791747370006536e-08, + "loss": 21.8247, + "step": 487870 + }, + { + "epoch": 0.9855484673779983, + "grad_norm": 265.6903991699219, + "learning_rate": 1.7887961084605554e-08, + "loss": 17.2382, + "step": 487880 + }, + { + "epoch": 0.9855686680106821, + "grad_norm": 210.8507537841797, + "learning_rate": 1.7858472751355883e-08, + "loss": 18.1254, + "step": 487890 + }, + { + "epoch": 0.9855888686433659, + "grad_norm": 302.1631774902344, + "learning_rate": 1.7829008700460116e-08, + "loss": 26.4176, + "step": 487900 + }, + { + "epoch": 0.9856090692760497, + "grad_norm": 559.1700439453125, + "learning_rate": 1.779956893206092e-08, + "loss": 21.3995, + "step": 487910 + }, + { + "epoch": 0.9856292699087336, + "grad_norm": 342.4388427734375, + "learning_rate": 1.7770153446302618e-08, + "loss": 15.9682, + "step": 487920 + }, + { + "epoch": 0.9856494705414174, + "grad_norm": 166.08718872070312, + "learning_rate": 1.7740762243328435e-08, + "loss": 19.5306, + "step": 487930 + }, + { + "epoch": 0.9856696711741012, + "grad_norm": 369.96246337890625, + "learning_rate": 1.7711395323281588e-08, + "loss": 17.6639, + "step": 487940 + }, + { + "epoch": 0.985689871806785, + "grad_norm": 270.3794860839844, + "learning_rate": 1.768205268630474e-08, + "loss": 11.5303, + "step": 487950 + }, + { + "epoch": 0.9857100724394688, + "grad_norm": 896.9556274414062, + "learning_rate": 1.765273433254111e-08, + "loss": 13.5518, + "step": 487960 + }, + { + "epoch": 0.9857302730721527, + "grad_norm": 562.6268920898438, + "learning_rate": 1.7623440262134472e-08, + "loss": 30.144, + "step": 487970 + }, + { + "epoch": 0.9857504737048365, + "grad_norm": 626.1119384765625, + "learning_rate": 1.759417047522638e-08, + "loss": 23.2989, + "step": 487980 + }, + { + "epoch": 0.9857706743375203, + "grad_norm": 207.8027801513672, + "learning_rate": 1.756492497196005e-08, + "loss": 27.4454, + "step": 487990 + }, + { + "epoch": 0.985790874970204, + "grad_norm": 460.8661193847656, + "learning_rate": 1.753570375247815e-08, + "loss": 22.146, + "step": 488000 + }, + { + "epoch": 0.9858110756028878, + "grad_norm": 852.23779296875, + "learning_rate": 1.7506506816923342e-08, + "loss": 33.3133, + "step": 488010 + }, + { + "epoch": 0.9858312762355717, + "grad_norm": 555.6287231445312, + "learning_rate": 1.747733416543662e-08, + "loss": 9.9734, + "step": 488020 + }, + { + "epoch": 0.9858514768682555, + "grad_norm": 302.9903259277344, + "learning_rate": 1.7448185798161765e-08, + "loss": 25.9241, + "step": 488030 + }, + { + "epoch": 0.9858716775009393, + "grad_norm": 897.1173706054688, + "learning_rate": 1.741906171523977e-08, + "loss": 24.4153, + "step": 488040 + }, + { + "epoch": 0.9858918781336231, + "grad_norm": 478.850341796875, + "learning_rate": 1.73899619168133e-08, + "loss": 13.5781, + "step": 488050 + }, + { + "epoch": 0.9859120787663069, + "grad_norm": 30.23406982421875, + "learning_rate": 1.7360886403023358e-08, + "loss": 7.5778, + "step": 488060 + }, + { + "epoch": 0.9859322793989908, + "grad_norm": 202.19210815429688, + "learning_rate": 1.7331835174012602e-08, + "loss": 12.541, + "step": 488070 + }, + { + "epoch": 0.9859524800316746, + "grad_norm": 158.72364807128906, + "learning_rate": 1.7302808229921476e-08, + "loss": 11.0494, + "step": 488080 + }, + { + "epoch": 0.9859726806643584, + "grad_norm": 133.09844970703125, + "learning_rate": 1.7273805570892643e-08, + "loss": 18.3383, + "step": 488090 + }, + { + "epoch": 0.9859928812970422, + "grad_norm": 340.03326416015625, + "learning_rate": 1.7244827197067103e-08, + "loss": 12.8224, + "step": 488100 + }, + { + "epoch": 0.986013081929726, + "grad_norm": 1257.68994140625, + "learning_rate": 1.7215873108585858e-08, + "loss": 42.4838, + "step": 488110 + }, + { + "epoch": 0.9860332825624099, + "grad_norm": 718.265869140625, + "learning_rate": 1.71869433055899e-08, + "loss": 18.3349, + "step": 488120 + }, + { + "epoch": 0.9860534831950937, + "grad_norm": 379.7763671875, + "learning_rate": 1.7158037788220782e-08, + "loss": 11.872, + "step": 488130 + }, + { + "epoch": 0.9860736838277775, + "grad_norm": 353.88665771484375, + "learning_rate": 1.7129156556618398e-08, + "loss": 25.6119, + "step": 488140 + }, + { + "epoch": 0.9860938844604613, + "grad_norm": 317.9150390625, + "learning_rate": 1.7100299610924297e-08, + "loss": 15.0898, + "step": 488150 + }, + { + "epoch": 0.9861140850931451, + "grad_norm": 453.9829406738281, + "learning_rate": 1.707146695127948e-08, + "loss": 13.9989, + "step": 488160 + }, + { + "epoch": 0.986134285725829, + "grad_norm": 166.3394012451172, + "learning_rate": 1.7042658577823833e-08, + "loss": 42.1762, + "step": 488170 + }, + { + "epoch": 0.9861544863585128, + "grad_norm": 283.5535583496094, + "learning_rate": 1.7013874490697802e-08, + "loss": 11.6909, + "step": 488180 + }, + { + "epoch": 0.9861746869911966, + "grad_norm": 276.1616516113281, + "learning_rate": 1.6985114690041825e-08, + "loss": 21.0326, + "step": 488190 + }, + { + "epoch": 0.9861948876238804, + "grad_norm": 141.51397705078125, + "learning_rate": 1.6956379175995796e-08, + "loss": 7.7452, + "step": 488200 + }, + { + "epoch": 0.9862150882565642, + "grad_norm": 171.76841735839844, + "learning_rate": 1.6927667948700155e-08, + "loss": 12.0104, + "step": 488210 + }, + { + "epoch": 0.986235288889248, + "grad_norm": 595.8139038085938, + "learning_rate": 1.689898100829479e-08, + "loss": 14.0837, + "step": 488220 + }, + { + "epoch": 0.9862554895219319, + "grad_norm": 473.9314880371094, + "learning_rate": 1.687031835491959e-08, + "loss": 26.5983, + "step": 488230 + }, + { + "epoch": 0.9862756901546157, + "grad_norm": 283.1180419921875, + "learning_rate": 1.6841679988713332e-08, + "loss": 12.0923, + "step": 488240 + }, + { + "epoch": 0.9862958907872995, + "grad_norm": 289.6003112792969, + "learning_rate": 1.681306590981702e-08, + "loss": 20.0766, + "step": 488250 + }, + { + "epoch": 0.9863160914199832, + "grad_norm": 386.9776306152344, + "learning_rate": 1.678447611836942e-08, + "loss": 20.7361, + "step": 488260 + }, + { + "epoch": 0.986336292052667, + "grad_norm": 179.49264526367188, + "learning_rate": 1.6755910614509872e-08, + "loss": 13.2435, + "step": 488270 + }, + { + "epoch": 0.9863564926853509, + "grad_norm": 273.3138427734375, + "learning_rate": 1.6727369398377158e-08, + "loss": 18.0023, + "step": 488280 + }, + { + "epoch": 0.9863766933180347, + "grad_norm": 685.1600341796875, + "learning_rate": 1.669885247011116e-08, + "loss": 22.936, + "step": 488290 + }, + { + "epoch": 0.9863968939507185, + "grad_norm": 612.9713745117188, + "learning_rate": 1.6670359829850657e-08, + "loss": 24.6449, + "step": 488300 + }, + { + "epoch": 0.9864170945834023, + "grad_norm": 168.1580047607422, + "learning_rate": 1.664189147773443e-08, + "loss": 10.4781, + "step": 488310 + }, + { + "epoch": 0.9864372952160861, + "grad_norm": 116.18118286132812, + "learning_rate": 1.6613447413900696e-08, + "loss": 17.8398, + "step": 488320 + }, + { + "epoch": 0.98645749584877, + "grad_norm": 217.2073974609375, + "learning_rate": 1.6585027638489347e-08, + "loss": 16.984, + "step": 488330 + }, + { + "epoch": 0.9864776964814538, + "grad_norm": 219.95492553710938, + "learning_rate": 1.655663215163805e-08, + "loss": 8.6126, + "step": 488340 + }, + { + "epoch": 0.9864978971141376, + "grad_norm": 261.9851989746094, + "learning_rate": 1.6528260953484476e-08, + "loss": 16.6404, + "step": 488350 + }, + { + "epoch": 0.9865180977468214, + "grad_norm": 305.74176025390625, + "learning_rate": 1.6499914044168508e-08, + "loss": 14.7852, + "step": 488360 + }, + { + "epoch": 0.9865382983795052, + "grad_norm": 174.19142150878906, + "learning_rate": 1.6471591423827817e-08, + "loss": 17.0763, + "step": 488370 + }, + { + "epoch": 0.9865584990121891, + "grad_norm": 526.5982666015625, + "learning_rate": 1.644329309259951e-08, + "loss": 23.4136, + "step": 488380 + }, + { + "epoch": 0.9865786996448729, + "grad_norm": 175.1710968017578, + "learning_rate": 1.6415019050622373e-08, + "loss": 20.1765, + "step": 488390 + }, + { + "epoch": 0.9865989002775567, + "grad_norm": 442.1764221191406, + "learning_rate": 1.6386769298034067e-08, + "loss": 15.7431, + "step": 488400 + }, + { + "epoch": 0.9866191009102405, + "grad_norm": 491.32269287109375, + "learning_rate": 1.635854383497226e-08, + "loss": 10.6239, + "step": 488410 + }, + { + "epoch": 0.9866393015429243, + "grad_norm": 153.86268615722656, + "learning_rate": 1.6330342661574072e-08, + "loss": 8.8285, + "step": 488420 + }, + { + "epoch": 0.9866595021756082, + "grad_norm": 534.8939208984375, + "learning_rate": 1.6302165777977718e-08, + "loss": 15.201, + "step": 488430 + }, + { + "epoch": 0.986679702808292, + "grad_norm": 687.9249877929688, + "learning_rate": 1.6274013184319757e-08, + "loss": 24.0827, + "step": 488440 + }, + { + "epoch": 0.9866999034409758, + "grad_norm": 270.24505615234375, + "learning_rate": 1.6245884880738415e-08, + "loss": 8.0484, + "step": 488450 + }, + { + "epoch": 0.9867201040736596, + "grad_norm": 31.995512008666992, + "learning_rate": 1.621778086736969e-08, + "loss": 15.4762, + "step": 488460 + }, + { + "epoch": 0.9867403047063434, + "grad_norm": 421.179443359375, + "learning_rate": 1.6189701144351254e-08, + "loss": 18.3479, + "step": 488470 + }, + { + "epoch": 0.9867605053390273, + "grad_norm": 912.8563842773438, + "learning_rate": 1.6161645711819664e-08, + "loss": 37.9865, + "step": 488480 + }, + { + "epoch": 0.9867807059717111, + "grad_norm": 244.22555541992188, + "learning_rate": 1.6133614569912027e-08, + "loss": 16.8347, + "step": 488490 + }, + { + "epoch": 0.9868009066043949, + "grad_norm": 1216.3824462890625, + "learning_rate": 1.610560771876435e-08, + "loss": 20.0859, + "step": 488500 + }, + { + "epoch": 0.9868211072370787, + "grad_norm": 310.50262451171875, + "learning_rate": 1.607762515851319e-08, + "loss": 17.3168, + "step": 488510 + }, + { + "epoch": 0.9868413078697624, + "grad_norm": 362.2833251953125, + "learning_rate": 1.6049666889295657e-08, + "loss": 21.6378, + "step": 488520 + }, + { + "epoch": 0.9868615085024462, + "grad_norm": 901.0900268554688, + "learning_rate": 1.6021732911247756e-08, + "loss": 22.5777, + "step": 488530 + }, + { + "epoch": 0.9868817091351301, + "grad_norm": 526.10791015625, + "learning_rate": 1.5993823224504935e-08, + "loss": 24.0116, + "step": 488540 + }, + { + "epoch": 0.9869019097678139, + "grad_norm": 450.54888916015625, + "learning_rate": 1.5965937829204302e-08, + "loss": 17.4813, + "step": 488550 + }, + { + "epoch": 0.9869221104004977, + "grad_norm": 136.32603454589844, + "learning_rate": 1.5938076725480756e-08, + "loss": 15.49, + "step": 488560 + }, + { + "epoch": 0.9869423110331815, + "grad_norm": 276.5377197265625, + "learning_rate": 1.5910239913470292e-08, + "loss": 8.7842, + "step": 488570 + }, + { + "epoch": 0.9869625116658653, + "grad_norm": 132.17144775390625, + "learning_rate": 1.5882427393309475e-08, + "loss": 12.1712, + "step": 488580 + }, + { + "epoch": 0.9869827122985492, + "grad_norm": 341.4737548828125, + "learning_rate": 1.585463916513319e-08, + "loss": 14.214, + "step": 488590 + }, + { + "epoch": 0.987002912931233, + "grad_norm": 552.6314697265625, + "learning_rate": 1.582687522907633e-08, + "loss": 16.6554, + "step": 488600 + }, + { + "epoch": 0.9870231135639168, + "grad_norm": 547.34814453125, + "learning_rate": 1.5799135585274906e-08, + "loss": 21.9563, + "step": 488610 + }, + { + "epoch": 0.9870433141966006, + "grad_norm": 530.8009643554688, + "learning_rate": 1.5771420233864355e-08, + "loss": 17.8597, + "step": 488620 + }, + { + "epoch": 0.9870635148292844, + "grad_norm": 121.2981185913086, + "learning_rate": 1.5743729174979016e-08, + "loss": 9.4337, + "step": 488630 + }, + { + "epoch": 0.9870837154619683, + "grad_norm": 421.91375732421875, + "learning_rate": 1.571606240875434e-08, + "loss": 25.9695, + "step": 488640 + }, + { + "epoch": 0.9871039160946521, + "grad_norm": 24.188617706298828, + "learning_rate": 1.5688419935325216e-08, + "loss": 11.5205, + "step": 488650 + }, + { + "epoch": 0.9871241167273359, + "grad_norm": 170.5167999267578, + "learning_rate": 1.5660801754825983e-08, + "loss": 10.8688, + "step": 488660 + }, + { + "epoch": 0.9871443173600197, + "grad_norm": 225.33900451660156, + "learning_rate": 1.563320786739153e-08, + "loss": 13.0992, + "step": 488670 + }, + { + "epoch": 0.9871645179927035, + "grad_norm": 530.8095092773438, + "learning_rate": 1.56056382731562e-08, + "loss": 19.3749, + "step": 488680 + }, + { + "epoch": 0.9871847186253874, + "grad_norm": 53.277740478515625, + "learning_rate": 1.5578092972254875e-08, + "loss": 17.041, + "step": 488690 + }, + { + "epoch": 0.9872049192580712, + "grad_norm": 114.4014663696289, + "learning_rate": 1.5550571964820793e-08, + "loss": 23.5643, + "step": 488700 + }, + { + "epoch": 0.987225119890755, + "grad_norm": 106.43096160888672, + "learning_rate": 1.5523075250989395e-08, + "loss": 4.523, + "step": 488710 + }, + { + "epoch": 0.9872453205234388, + "grad_norm": 337.0357360839844, + "learning_rate": 1.5495602830893354e-08, + "loss": 22.5743, + "step": 488720 + }, + { + "epoch": 0.9872655211561226, + "grad_norm": 556.9434204101562, + "learning_rate": 1.546815470466756e-08, + "loss": 41.1725, + "step": 488730 + }, + { + "epoch": 0.9872857217888065, + "grad_norm": 95.35572052001953, + "learning_rate": 1.5440730872445242e-08, + "loss": 26.158, + "step": 488740 + }, + { + "epoch": 0.9873059224214903, + "grad_norm": 331.4527893066406, + "learning_rate": 1.541333133436018e-08, + "loss": 20.8953, + "step": 488750 + }, + { + "epoch": 0.9873261230541741, + "grad_norm": 0.0, + "learning_rate": 1.538595609054616e-08, + "loss": 11.3459, + "step": 488760 + }, + { + "epoch": 0.9873463236868578, + "grad_norm": 317.4156799316406, + "learning_rate": 1.5358605141136407e-08, + "loss": 15.1851, + "step": 488770 + }, + { + "epoch": 0.9873665243195416, + "grad_norm": 145.23828125, + "learning_rate": 1.5331278486264144e-08, + "loss": 11.5322, + "step": 488780 + }, + { + "epoch": 0.9873867249522255, + "grad_norm": 1322.247802734375, + "learning_rate": 1.53039761260626e-08, + "loss": 12.257, + "step": 488790 + }, + { + "epoch": 0.9874069255849093, + "grad_norm": 549.3560791015625, + "learning_rate": 1.5276698060665007e-08, + "loss": 20.2712, + "step": 488800 + }, + { + "epoch": 0.9874271262175931, + "grad_norm": 237.40028381347656, + "learning_rate": 1.5249444290204584e-08, + "loss": 21.5498, + "step": 488810 + }, + { + "epoch": 0.9874473268502769, + "grad_norm": 361.716796875, + "learning_rate": 1.5222214814812897e-08, + "loss": 15.8732, + "step": 488820 + }, + { + "epoch": 0.9874675274829607, + "grad_norm": 281.2061462402344, + "learning_rate": 1.519500963462428e-08, + "loss": 13.3269, + "step": 488830 + }, + { + "epoch": 0.9874877281156446, + "grad_norm": 397.55853271484375, + "learning_rate": 1.5167828749770853e-08, + "loss": 19.2063, + "step": 488840 + }, + { + "epoch": 0.9875079287483284, + "grad_norm": 1179.2423095703125, + "learning_rate": 1.5140672160384174e-08, + "loss": 27.6955, + "step": 488850 + }, + { + "epoch": 0.9875281293810122, + "grad_norm": 160.7357940673828, + "learning_rate": 1.511353986659747e-08, + "loss": 12.1364, + "step": 488860 + }, + { + "epoch": 0.987548330013696, + "grad_norm": 302.5446472167969, + "learning_rate": 1.508643186854286e-08, + "loss": 12.375, + "step": 488870 + }, + { + "epoch": 0.9875685306463798, + "grad_norm": 17.310794830322266, + "learning_rate": 1.505934816635246e-08, + "loss": 22.2095, + "step": 488880 + }, + { + "epoch": 0.9875887312790637, + "grad_norm": 464.3238220214844, + "learning_rate": 1.503228876015783e-08, + "loss": 14.6066, + "step": 488890 + }, + { + "epoch": 0.9876089319117475, + "grad_norm": 376.7144775390625, + "learning_rate": 1.500525365009109e-08, + "loss": 24.8198, + "step": 488900 + }, + { + "epoch": 0.9876291325444313, + "grad_norm": 296.1361083984375, + "learning_rate": 1.4978242836284908e-08, + "loss": 18.8745, + "step": 488910 + }, + { + "epoch": 0.9876493331771151, + "grad_norm": 398.23638916015625, + "learning_rate": 1.4951256318869733e-08, + "loss": 16.8605, + "step": 488920 + }, + { + "epoch": 0.9876695338097989, + "grad_norm": 227.01824951171875, + "learning_rate": 1.4924294097977687e-08, + "loss": 23.3466, + "step": 488930 + }, + { + "epoch": 0.9876897344424828, + "grad_norm": 2.6604208946228027, + "learning_rate": 1.4897356173739774e-08, + "loss": 8.3296, + "step": 488940 + }, + { + "epoch": 0.9877099350751666, + "grad_norm": 390.50103759765625, + "learning_rate": 1.4870442546287555e-08, + "loss": 10.9325, + "step": 488950 + }, + { + "epoch": 0.9877301357078504, + "grad_norm": 259.9412536621094, + "learning_rate": 1.4843553215752037e-08, + "loss": 7.4341, + "step": 488960 + }, + { + "epoch": 0.9877503363405342, + "grad_norm": 233.6974334716797, + "learning_rate": 1.4816688182264782e-08, + "loss": 28.2152, + "step": 488970 + }, + { + "epoch": 0.987770536973218, + "grad_norm": 143.91610717773438, + "learning_rate": 1.478984744595624e-08, + "loss": 14.1742, + "step": 488980 + }, + { + "epoch": 0.9877907376059019, + "grad_norm": 380.85211181640625, + "learning_rate": 1.4763031006957417e-08, + "loss": 19.1149, + "step": 488990 + }, + { + "epoch": 0.9878109382385857, + "grad_norm": 2.1768627166748047, + "learning_rate": 1.4736238865398766e-08, + "loss": 23.0437, + "step": 489000 + }, + { + "epoch": 0.9878311388712695, + "grad_norm": 395.1035461425781, + "learning_rate": 1.4709471021411293e-08, + "loss": 22.9502, + "step": 489010 + }, + { + "epoch": 0.9878513395039533, + "grad_norm": 282.9913024902344, + "learning_rate": 1.4682727475124891e-08, + "loss": 8.5003, + "step": 489020 + }, + { + "epoch": 0.987871540136637, + "grad_norm": 554.9068603515625, + "learning_rate": 1.4656008226670571e-08, + "loss": 17.3521, + "step": 489030 + }, + { + "epoch": 0.9878917407693208, + "grad_norm": 899.536865234375, + "learning_rate": 1.462931327617767e-08, + "loss": 16.8683, + "step": 489040 + }, + { + "epoch": 0.9879119414020047, + "grad_norm": 233.77371215820312, + "learning_rate": 1.4602642623777752e-08, + "loss": 15.6234, + "step": 489050 + }, + { + "epoch": 0.9879321420346885, + "grad_norm": 900.2330932617188, + "learning_rate": 1.4575996269599046e-08, + "loss": 23.9059, + "step": 489060 + }, + { + "epoch": 0.9879523426673723, + "grad_norm": 463.00982666015625, + "learning_rate": 1.454937421377256e-08, + "loss": 24.6737, + "step": 489070 + }, + { + "epoch": 0.9879725433000561, + "grad_norm": 685.919921875, + "learning_rate": 1.4522776456427635e-08, + "loss": 13.0379, + "step": 489080 + }, + { + "epoch": 0.98799274393274, + "grad_norm": 338.71246337890625, + "learning_rate": 1.4496202997694164e-08, + "loss": 11.774, + "step": 489090 + }, + { + "epoch": 0.9880129445654238, + "grad_norm": 429.50885009765625, + "learning_rate": 1.4469653837701491e-08, + "loss": 19.8358, + "step": 489100 + }, + { + "epoch": 0.9880331451981076, + "grad_norm": 115.12040710449219, + "learning_rate": 1.4443128976579513e-08, + "loss": 10.3165, + "step": 489110 + }, + { + "epoch": 0.9880533458307914, + "grad_norm": 182.71620178222656, + "learning_rate": 1.4416628414456457e-08, + "loss": 17.5581, + "step": 489120 + }, + { + "epoch": 0.9880735464634752, + "grad_norm": 444.47491455078125, + "learning_rate": 1.4390152151462222e-08, + "loss": 13.104, + "step": 489130 + }, + { + "epoch": 0.988093747096159, + "grad_norm": 201.85137939453125, + "learning_rate": 1.4363700187725593e-08, + "loss": 6.3272, + "step": 489140 + }, + { + "epoch": 0.9881139477288429, + "grad_norm": 151.43687438964844, + "learning_rate": 1.4337272523375911e-08, + "loss": 12.1407, + "step": 489150 + }, + { + "epoch": 0.9881341483615267, + "grad_norm": 283.36285400390625, + "learning_rate": 1.4310869158541408e-08, + "loss": 12.8991, + "step": 489160 + }, + { + "epoch": 0.9881543489942105, + "grad_norm": 352.98529052734375, + "learning_rate": 1.4284490093351421e-08, + "loss": 19.3828, + "step": 489170 + }, + { + "epoch": 0.9881745496268943, + "grad_norm": 564.781005859375, + "learning_rate": 1.425813532793363e-08, + "loss": 10.2488, + "step": 489180 + }, + { + "epoch": 0.9881947502595781, + "grad_norm": 437.80712890625, + "learning_rate": 1.4231804862417375e-08, + "loss": 16.5281, + "step": 489190 + }, + { + "epoch": 0.988214950892262, + "grad_norm": 422.3405456542969, + "learning_rate": 1.4205498696930332e-08, + "loss": 11.1556, + "step": 489200 + }, + { + "epoch": 0.9882351515249458, + "grad_norm": 362.02130126953125, + "learning_rate": 1.4179216831601284e-08, + "loss": 17.0503, + "step": 489210 + }, + { + "epoch": 0.9882553521576296, + "grad_norm": 266.0114440917969, + "learning_rate": 1.4152959266557354e-08, + "loss": 9.8067, + "step": 489220 + }, + { + "epoch": 0.9882755527903134, + "grad_norm": 357.19866943359375, + "learning_rate": 1.4126726001927882e-08, + "loss": 13.612, + "step": 489230 + }, + { + "epoch": 0.9882957534229972, + "grad_norm": 479.8896484375, + "learning_rate": 1.4100517037839989e-08, + "loss": 19.1753, + "step": 489240 + }, + { + "epoch": 0.9883159540556811, + "grad_norm": 44.63584518432617, + "learning_rate": 1.4074332374421351e-08, + "loss": 14.044, + "step": 489250 + }, + { + "epoch": 0.9883361546883649, + "grad_norm": 516.9661865234375, + "learning_rate": 1.4048172011799643e-08, + "loss": 23.0173, + "step": 489260 + }, + { + "epoch": 0.9883563553210487, + "grad_norm": 655.0848999023438, + "learning_rate": 1.4022035950102541e-08, + "loss": 15.7647, + "step": 489270 + }, + { + "epoch": 0.9883765559537324, + "grad_norm": 5.948569297790527, + "learning_rate": 1.3995924189457167e-08, + "loss": 14.569, + "step": 489280 + }, + { + "epoch": 0.9883967565864162, + "grad_norm": 198.19515991210938, + "learning_rate": 1.3969836729990637e-08, + "loss": 12.5366, + "step": 489290 + }, + { + "epoch": 0.9884169572191001, + "grad_norm": 334.4207458496094, + "learning_rate": 1.3943773571831188e-08, + "loss": 5.7287, + "step": 489300 + }, + { + "epoch": 0.9884371578517839, + "grad_norm": 279.3717956542969, + "learning_rate": 1.3917734715104269e-08, + "loss": 31.223, + "step": 489310 + }, + { + "epoch": 0.9884573584844677, + "grad_norm": 610.1446533203125, + "learning_rate": 1.3891720159938116e-08, + "loss": 22.999, + "step": 489320 + }, + { + "epoch": 0.9884775591171515, + "grad_norm": 296.9520263671875, + "learning_rate": 1.3865729906458735e-08, + "loss": 15.9857, + "step": 489330 + }, + { + "epoch": 0.9884977597498353, + "grad_norm": 521.7493896484375, + "learning_rate": 1.3839763954792695e-08, + "loss": 25.9349, + "step": 489340 + }, + { + "epoch": 0.9885179603825192, + "grad_norm": 238.2027587890625, + "learning_rate": 1.3813822305067115e-08, + "loss": 18.3275, + "step": 489350 + }, + { + "epoch": 0.988538161015203, + "grad_norm": 464.864501953125, + "learning_rate": 1.378790495740856e-08, + "loss": 18.1847, + "step": 489360 + }, + { + "epoch": 0.9885583616478868, + "grad_norm": 605.761474609375, + "learning_rate": 1.376201191194304e-08, + "loss": 16.1985, + "step": 489370 + }, + { + "epoch": 0.9885785622805706, + "grad_norm": 100.96516418457031, + "learning_rate": 1.3736143168796012e-08, + "loss": 15.8867, + "step": 489380 + }, + { + "epoch": 0.9885987629132544, + "grad_norm": 233.3070831298828, + "learning_rate": 1.371029872809515e-08, + "loss": 19.417, + "step": 489390 + }, + { + "epoch": 0.9886189635459383, + "grad_norm": 244.88894653320312, + "learning_rate": 1.3684478589964801e-08, + "loss": 15.7179, + "step": 489400 + }, + { + "epoch": 0.9886391641786221, + "grad_norm": 681.5443115234375, + "learning_rate": 1.3658682754532082e-08, + "loss": 28.7913, + "step": 489410 + }, + { + "epoch": 0.9886593648113059, + "grad_norm": 0.24459843337535858, + "learning_rate": 1.3632911221921896e-08, + "loss": 14.4319, + "step": 489420 + }, + { + "epoch": 0.9886795654439897, + "grad_norm": 497.12847900390625, + "learning_rate": 1.3607163992259697e-08, + "loss": 11.9154, + "step": 489430 + }, + { + "epoch": 0.9886997660766735, + "grad_norm": 697.6969604492188, + "learning_rate": 1.3581441065672052e-08, + "loss": 19.8152, + "step": 489440 + }, + { + "epoch": 0.9887199667093574, + "grad_norm": 733.26708984375, + "learning_rate": 1.355574244228386e-08, + "loss": 22.7065, + "step": 489450 + }, + { + "epoch": 0.9887401673420412, + "grad_norm": 430.14019775390625, + "learning_rate": 1.3530068122219464e-08, + "loss": 34.7864, + "step": 489460 + }, + { + "epoch": 0.988760367974725, + "grad_norm": 565.5632934570312, + "learning_rate": 1.3504418105604877e-08, + "loss": 17.5237, + "step": 489470 + }, + { + "epoch": 0.9887805686074088, + "grad_norm": 515.758544921875, + "learning_rate": 1.3478792392565553e-08, + "loss": 20.4981, + "step": 489480 + }, + { + "epoch": 0.9888007692400926, + "grad_norm": 424.0845947265625, + "learning_rate": 1.3453190983225285e-08, + "loss": 14.8029, + "step": 489490 + }, + { + "epoch": 0.9888209698727765, + "grad_norm": 356.08123779296875, + "learning_rate": 1.3427613877709523e-08, + "loss": 22.7797, + "step": 489500 + }, + { + "epoch": 0.9888411705054603, + "grad_norm": 481.2988586425781, + "learning_rate": 1.3402061076142613e-08, + "loss": 18.4806, + "step": 489510 + }, + { + "epoch": 0.9888613711381441, + "grad_norm": 163.1424560546875, + "learning_rate": 1.3376532578649459e-08, + "loss": 15.1123, + "step": 489520 + }, + { + "epoch": 0.9888815717708279, + "grad_norm": 243.24937438964844, + "learning_rate": 1.3351028385354402e-08, + "loss": 13.5655, + "step": 489530 + }, + { + "epoch": 0.9889017724035116, + "grad_norm": 319.78265380859375, + "learning_rate": 1.3325548496381235e-08, + "loss": 18.1097, + "step": 489540 + }, + { + "epoch": 0.9889219730361954, + "grad_norm": 457.2274475097656, + "learning_rate": 1.3300092911854856e-08, + "loss": 11.7467, + "step": 489550 + }, + { + "epoch": 0.9889421736688793, + "grad_norm": 92.75702667236328, + "learning_rate": 1.3274661631899055e-08, + "loss": 29.7643, + "step": 489560 + }, + { + "epoch": 0.9889623743015631, + "grad_norm": 312.5694885253906, + "learning_rate": 1.3249254656637622e-08, + "loss": 10.471, + "step": 489570 + }, + { + "epoch": 0.9889825749342469, + "grad_norm": 290.5263671875, + "learning_rate": 1.3223871986194348e-08, + "loss": 27.3382, + "step": 489580 + }, + { + "epoch": 0.9890027755669307, + "grad_norm": 342.47674560546875, + "learning_rate": 1.3198513620693022e-08, + "loss": 37.4274, + "step": 489590 + }, + { + "epoch": 0.9890229761996145, + "grad_norm": 383.2030029296875, + "learning_rate": 1.3173179560257432e-08, + "loss": 11.5588, + "step": 489600 + }, + { + "epoch": 0.9890431768322984, + "grad_norm": 267.69140625, + "learning_rate": 1.314786980501137e-08, + "loss": 7.4811, + "step": 489610 + }, + { + "epoch": 0.9890633774649822, + "grad_norm": 234.81167602539062, + "learning_rate": 1.3122584355076962e-08, + "loss": 12.6162, + "step": 489620 + }, + { + "epoch": 0.989083578097666, + "grad_norm": 293.77703857421875, + "learning_rate": 1.3097323210579104e-08, + "loss": 19.0022, + "step": 489630 + }, + { + "epoch": 0.9891037787303498, + "grad_norm": 183.48153686523438, + "learning_rate": 1.307208637163937e-08, + "loss": 19.9693, + "step": 489640 + }, + { + "epoch": 0.9891239793630336, + "grad_norm": 7.569753170013428, + "learning_rate": 1.3046873838381546e-08, + "loss": 11.5113, + "step": 489650 + }, + { + "epoch": 0.9891441799957175, + "grad_norm": 1021.123779296875, + "learning_rate": 1.3021685610928869e-08, + "loss": 27.3139, + "step": 489660 + }, + { + "epoch": 0.9891643806284013, + "grad_norm": 362.0072021484375, + "learning_rate": 1.2996521689403463e-08, + "loss": 24.0982, + "step": 489670 + }, + { + "epoch": 0.9891845812610851, + "grad_norm": 657.249267578125, + "learning_rate": 1.2971382073928007e-08, + "loss": 15.8356, + "step": 489680 + }, + { + "epoch": 0.9892047818937689, + "grad_norm": 462.3509521484375, + "learning_rate": 1.2946266764625182e-08, + "loss": 14.8603, + "step": 489690 + }, + { + "epoch": 0.9892249825264527, + "grad_norm": 413.03753662109375, + "learning_rate": 1.292117576161711e-08, + "loss": 33.4044, + "step": 489700 + }, + { + "epoch": 0.9892451831591366, + "grad_norm": 317.8731384277344, + "learning_rate": 1.2896109065027029e-08, + "loss": 5.5308, + "step": 489710 + }, + { + "epoch": 0.9892653837918204, + "grad_norm": 156.2231903076172, + "learning_rate": 1.2871066674975951e-08, + "loss": 13.7939, + "step": 489720 + }, + { + "epoch": 0.9892855844245042, + "grad_norm": 251.47769165039062, + "learning_rate": 1.2846048591586558e-08, + "loss": 30.8586, + "step": 489730 + }, + { + "epoch": 0.989305785057188, + "grad_norm": 72.52568817138672, + "learning_rate": 1.2821054814980971e-08, + "loss": 10.4397, + "step": 489740 + }, + { + "epoch": 0.9893259856898718, + "grad_norm": 487.9237365722656, + "learning_rate": 1.2796085345280207e-08, + "loss": 8.9422, + "step": 489750 + }, + { + "epoch": 0.9893461863225557, + "grad_norm": 412.9870910644531, + "learning_rate": 1.277114018260639e-08, + "loss": 34.0147, + "step": 489760 + }, + { + "epoch": 0.9893663869552395, + "grad_norm": 448.53582763671875, + "learning_rate": 1.2746219327081644e-08, + "loss": 7.8732, + "step": 489770 + }, + { + "epoch": 0.9893865875879233, + "grad_norm": 363.7297668457031, + "learning_rate": 1.2721322778826983e-08, + "loss": 6.6716, + "step": 489780 + }, + { + "epoch": 0.989406788220607, + "grad_norm": 118.27398681640625, + "learning_rate": 1.2696450537963422e-08, + "loss": 19.0261, + "step": 489790 + }, + { + "epoch": 0.9894269888532908, + "grad_norm": 408.3044128417969, + "learning_rate": 1.2671602604612531e-08, + "loss": 26.6716, + "step": 489800 + }, + { + "epoch": 0.9894471894859747, + "grad_norm": 380.0968322753906, + "learning_rate": 1.2646778978895325e-08, + "loss": 21.3715, + "step": 489810 + }, + { + "epoch": 0.9894673901186585, + "grad_norm": 324.5020446777344, + "learning_rate": 1.2621979660932814e-08, + "loss": 30.9385, + "step": 489820 + }, + { + "epoch": 0.9894875907513423, + "grad_norm": 225.742431640625, + "learning_rate": 1.2597204650845463e-08, + "loss": 8.4652, + "step": 489830 + }, + { + "epoch": 0.9895077913840261, + "grad_norm": 376.7015686035156, + "learning_rate": 1.2572453948755393e-08, + "loss": 18.7972, + "step": 489840 + }, + { + "epoch": 0.9895279920167099, + "grad_norm": 23.484270095825195, + "learning_rate": 1.2547727554781398e-08, + "loss": 18.036, + "step": 489850 + }, + { + "epoch": 0.9895481926493938, + "grad_norm": 273.4000549316406, + "learning_rate": 1.2523025469045047e-08, + "loss": 14.1293, + "step": 489860 + }, + { + "epoch": 0.9895683932820776, + "grad_norm": 452.1612243652344, + "learning_rate": 1.2498347691666801e-08, + "loss": 11.7554, + "step": 489870 + }, + { + "epoch": 0.9895885939147614, + "grad_norm": 178.48948669433594, + "learning_rate": 1.2473694222766563e-08, + "loss": 14.9273, + "step": 489880 + }, + { + "epoch": 0.9896087945474452, + "grad_norm": 423.0238342285156, + "learning_rate": 1.2449065062464794e-08, + "loss": 14.2976, + "step": 489890 + }, + { + "epoch": 0.989628995180129, + "grad_norm": 938.166015625, + "learning_rate": 1.2424460210881394e-08, + "loss": 26.3438, + "step": 489900 + }, + { + "epoch": 0.9896491958128129, + "grad_norm": 0.0, + "learning_rate": 1.2399879668136271e-08, + "loss": 15.9938, + "step": 489910 + }, + { + "epoch": 0.9896693964454967, + "grad_norm": 258.1879577636719, + "learning_rate": 1.2375323434348773e-08, + "loss": 22.4046, + "step": 489920 + }, + { + "epoch": 0.9896895970781805, + "grad_norm": 267.1070556640625, + "learning_rate": 1.235079150963936e-08, + "loss": 15.2209, + "step": 489930 + }, + { + "epoch": 0.9897097977108643, + "grad_norm": 313.09881591796875, + "learning_rate": 1.2326283894127378e-08, + "loss": 34.5608, + "step": 489940 + }, + { + "epoch": 0.9897299983435481, + "grad_norm": 358.4794616699219, + "learning_rate": 1.2301800587932179e-08, + "loss": 32.3783, + "step": 489950 + }, + { + "epoch": 0.989750198976232, + "grad_norm": 320.7735595703125, + "learning_rate": 1.2277341591172553e-08, + "loss": 19.7975, + "step": 489960 + }, + { + "epoch": 0.9897703996089158, + "grad_norm": 479.1424255371094, + "learning_rate": 1.225290690396841e-08, + "loss": 14.985, + "step": 489970 + }, + { + "epoch": 0.9897906002415996, + "grad_norm": 0.9020110368728638, + "learning_rate": 1.2228496526439093e-08, + "loss": 14.1797, + "step": 489980 + }, + { + "epoch": 0.9898108008742834, + "grad_norm": 21.70526123046875, + "learning_rate": 1.2204110458702844e-08, + "loss": 10.3022, + "step": 489990 + }, + { + "epoch": 0.9898310015069672, + "grad_norm": 359.36590576171875, + "learning_rate": 1.2179748700879013e-08, + "loss": 20.8642, + "step": 490000 + }, + { + "epoch": 0.989851202139651, + "grad_norm": 230.55552673339844, + "learning_rate": 1.2155411253085835e-08, + "loss": 12.0643, + "step": 490010 + }, + { + "epoch": 0.9898714027723349, + "grad_norm": 252.59933471679688, + "learning_rate": 1.2131098115442108e-08, + "loss": 21.6534, + "step": 490020 + }, + { + "epoch": 0.9898916034050187, + "grad_norm": 496.96124267578125, + "learning_rate": 1.2106809288067178e-08, + "loss": 19.3856, + "step": 490030 + }, + { + "epoch": 0.9899118040377025, + "grad_norm": 88.76834869384766, + "learning_rate": 1.208254477107762e-08, + "loss": 12.8934, + "step": 490040 + }, + { + "epoch": 0.9899320046703862, + "grad_norm": 15.983965873718262, + "learning_rate": 1.2058304564593893e-08, + "loss": 22.7476, + "step": 490050 + }, + { + "epoch": 0.98995220530307, + "grad_norm": 306.596435546875, + "learning_rate": 1.2034088668732568e-08, + "loss": 16.6889, + "step": 490060 + }, + { + "epoch": 0.9899724059357539, + "grad_norm": 292.4953918457031, + "learning_rate": 1.2009897083611888e-08, + "loss": 13.4741, + "step": 490070 + }, + { + "epoch": 0.9899926065684377, + "grad_norm": 35.3122673034668, + "learning_rate": 1.1985729809350088e-08, + "loss": 8.3754, + "step": 490080 + }, + { + "epoch": 0.9900128072011215, + "grad_norm": 322.98577880859375, + "learning_rate": 1.1961586846064855e-08, + "loss": 15.9051, + "step": 490090 + }, + { + "epoch": 0.9900330078338053, + "grad_norm": 405.03387451171875, + "learning_rate": 1.1937468193873869e-08, + "loss": 14.0023, + "step": 490100 + }, + { + "epoch": 0.9900532084664891, + "grad_norm": 319.7568359375, + "learning_rate": 1.1913373852894816e-08, + "loss": 19.7235, + "step": 490110 + }, + { + "epoch": 0.990073409099173, + "grad_norm": 1.4424026012420654, + "learning_rate": 1.1889303823244825e-08, + "loss": 13.2511, + "step": 490120 + }, + { + "epoch": 0.9900936097318568, + "grad_norm": 119.40985107421875, + "learning_rate": 1.1865258105041577e-08, + "loss": 14.8159, + "step": 490130 + }, + { + "epoch": 0.9901138103645406, + "grad_norm": 676.8298950195312, + "learning_rate": 1.1841236698402202e-08, + "loss": 30.2779, + "step": 490140 + }, + { + "epoch": 0.9901340109972244, + "grad_norm": 87.88396453857422, + "learning_rate": 1.1817239603443276e-08, + "loss": 21.4629, + "step": 490150 + }, + { + "epoch": 0.9901542116299082, + "grad_norm": 259.3276672363281, + "learning_rate": 1.1793266820282478e-08, + "loss": 10.1666, + "step": 490160 + }, + { + "epoch": 0.9901744122625921, + "grad_norm": 509.7081298828125, + "learning_rate": 1.1769318349036385e-08, + "loss": 20.826, + "step": 490170 + }, + { + "epoch": 0.9901946128952759, + "grad_norm": 166.15875244140625, + "learning_rate": 1.1745394189821013e-08, + "loss": 9.8491, + "step": 490180 + }, + { + "epoch": 0.9902148135279597, + "grad_norm": 406.0191650390625, + "learning_rate": 1.1721494342754048e-08, + "loss": 9.9096, + "step": 490190 + }, + { + "epoch": 0.9902350141606435, + "grad_norm": 195.5973358154297, + "learning_rate": 1.1697618807951504e-08, + "loss": 10.4411, + "step": 490200 + }, + { + "epoch": 0.9902552147933273, + "grad_norm": 97.89176177978516, + "learning_rate": 1.1673767585529404e-08, + "loss": 11.0284, + "step": 490210 + }, + { + "epoch": 0.9902754154260112, + "grad_norm": 1873.49365234375, + "learning_rate": 1.1649940675604876e-08, + "loss": 25.0696, + "step": 490220 + }, + { + "epoch": 0.990295616058695, + "grad_norm": 467.4261779785156, + "learning_rate": 1.1626138078293381e-08, + "loss": 14.5886, + "step": 490230 + }, + { + "epoch": 0.9903158166913788, + "grad_norm": 507.177734375, + "learning_rate": 1.1602359793710938e-08, + "loss": 23.0854, + "step": 490240 + }, + { + "epoch": 0.9903360173240626, + "grad_norm": 327.03594970703125, + "learning_rate": 1.1578605821973566e-08, + "loss": 17.8433, + "step": 490250 + }, + { + "epoch": 0.9903562179567464, + "grad_norm": 259.65167236328125, + "learning_rate": 1.1554876163197282e-08, + "loss": 12.3252, + "step": 490260 + }, + { + "epoch": 0.9903764185894303, + "grad_norm": 600.3280029296875, + "learning_rate": 1.1531170817496995e-08, + "loss": 13.0561, + "step": 490270 + }, + { + "epoch": 0.9903966192221141, + "grad_norm": 763.4371337890625, + "learning_rate": 1.1507489784989278e-08, + "loss": 14.3357, + "step": 490280 + }, + { + "epoch": 0.9904168198547979, + "grad_norm": 28.504867553710938, + "learning_rate": 1.1483833065789041e-08, + "loss": 14.0311, + "step": 490290 + }, + { + "epoch": 0.9904370204874817, + "grad_norm": 524.5272827148438, + "learning_rate": 1.146020066001119e-08, + "loss": 14.7662, + "step": 490300 + }, + { + "epoch": 0.9904572211201654, + "grad_norm": 280.05999755859375, + "learning_rate": 1.1436592567771188e-08, + "loss": 20.0143, + "step": 490310 + }, + { + "epoch": 0.9904774217528493, + "grad_norm": 394.6064147949219, + "learning_rate": 1.1413008789184498e-08, + "loss": 18.4672, + "step": 490320 + }, + { + "epoch": 0.9904976223855331, + "grad_norm": 38.09982681274414, + "learning_rate": 1.1389449324365476e-08, + "loss": 15.9906, + "step": 490330 + }, + { + "epoch": 0.9905178230182169, + "grad_norm": 122.05206298828125, + "learning_rate": 1.1365914173429582e-08, + "loss": 5.9164, + "step": 490340 + }, + { + "epoch": 0.9905380236509007, + "grad_norm": 219.7208709716797, + "learning_rate": 1.134240333649117e-08, + "loss": 18.8033, + "step": 490350 + }, + { + "epoch": 0.9905582242835845, + "grad_norm": 952.7105712890625, + "learning_rate": 1.1318916813664594e-08, + "loss": 21.4837, + "step": 490360 + }, + { + "epoch": 0.9905784249162684, + "grad_norm": 0.0, + "learning_rate": 1.129545460506476e-08, + "loss": 24.7407, + "step": 490370 + }, + { + "epoch": 0.9905986255489522, + "grad_norm": 585.8445434570312, + "learning_rate": 1.1272016710806021e-08, + "loss": 27.2635, + "step": 490380 + }, + { + "epoch": 0.990618826181636, + "grad_norm": 704.29345703125, + "learning_rate": 1.1248603131002178e-08, + "loss": 48.7618, + "step": 490390 + }, + { + "epoch": 0.9906390268143198, + "grad_norm": 1576.3946533203125, + "learning_rate": 1.1225213865767026e-08, + "loss": 40.8802, + "step": 490400 + }, + { + "epoch": 0.9906592274470036, + "grad_norm": 373.20452880859375, + "learning_rate": 1.1201848915216029e-08, + "loss": 14.5402, + "step": 490410 + }, + { + "epoch": 0.9906794280796875, + "grad_norm": 277.86138916015625, + "learning_rate": 1.1178508279461875e-08, + "loss": 26.6005, + "step": 490420 + }, + { + "epoch": 0.9906996287123713, + "grad_norm": 628.8119506835938, + "learning_rate": 1.115519195861836e-08, + "loss": 22.6955, + "step": 490430 + }, + { + "epoch": 0.9907198293450551, + "grad_norm": 659.1356201171875, + "learning_rate": 1.1131899952799285e-08, + "loss": 26.2416, + "step": 490440 + }, + { + "epoch": 0.9907400299777389, + "grad_norm": 454.5382995605469, + "learning_rate": 1.1108632262118446e-08, + "loss": 27.2168, + "step": 490450 + }, + { + "epoch": 0.9907602306104227, + "grad_norm": 0.0, + "learning_rate": 1.1085388886689085e-08, + "loss": 13.5798, + "step": 490460 + }, + { + "epoch": 0.9907804312431066, + "grad_norm": 494.09881591796875, + "learning_rate": 1.1062169826624447e-08, + "loss": 23.5275, + "step": 490470 + }, + { + "epoch": 0.9908006318757904, + "grad_norm": 211.5029296875, + "learning_rate": 1.1038975082037772e-08, + "loss": 8.1438, + "step": 490480 + }, + { + "epoch": 0.9908208325084742, + "grad_norm": 463.34075927734375, + "learning_rate": 1.101580465304175e-08, + "loss": 17.0053, + "step": 490490 + }, + { + "epoch": 0.990841033141158, + "grad_norm": 733.0955810546875, + "learning_rate": 1.0992658539750179e-08, + "loss": 28.9807, + "step": 490500 + }, + { + "epoch": 0.9908612337738418, + "grad_norm": 309.8749084472656, + "learning_rate": 1.0969536742274633e-08, + "loss": 21.6901, + "step": 490510 + }, + { + "epoch": 0.9908814344065257, + "grad_norm": 7.222733020782471, + "learning_rate": 1.0946439260728914e-08, + "loss": 8.0209, + "step": 490520 + }, + { + "epoch": 0.9909016350392095, + "grad_norm": 118.79547882080078, + "learning_rate": 1.0923366095225152e-08, + "loss": 19.2935, + "step": 490530 + }, + { + "epoch": 0.9909218356718933, + "grad_norm": 16.88963508605957, + "learning_rate": 1.090031724587548e-08, + "loss": 7.4517, + "step": 490540 + }, + { + "epoch": 0.9909420363045771, + "grad_norm": 375.42877197265625, + "learning_rate": 1.0877292712792586e-08, + "loss": 16.8823, + "step": 490550 + }, + { + "epoch": 0.9909622369372608, + "grad_norm": 546.7255859375, + "learning_rate": 1.0854292496089158e-08, + "loss": 18.4394, + "step": 490560 + }, + { + "epoch": 0.9909824375699446, + "grad_norm": 4.430992126464844, + "learning_rate": 1.0831316595876218e-08, + "loss": 13.5275, + "step": 490570 + }, + { + "epoch": 0.9910026382026285, + "grad_norm": 289.63616943359375, + "learning_rate": 1.0808365012266454e-08, + "loss": 14.6157, + "step": 490580 + }, + { + "epoch": 0.9910228388353123, + "grad_norm": 147.71971130371094, + "learning_rate": 1.0785437745371996e-08, + "loss": 12.5525, + "step": 490590 + }, + { + "epoch": 0.9910430394679961, + "grad_norm": 162.67674255371094, + "learning_rate": 1.076253479530387e-08, + "loss": 12.0218, + "step": 490600 + }, + { + "epoch": 0.9910632401006799, + "grad_norm": 108.62178039550781, + "learning_rate": 1.0739656162174205e-08, + "loss": 16.5417, + "step": 490610 + }, + { + "epoch": 0.9910834407333637, + "grad_norm": 781.3118286132812, + "learning_rate": 1.0716801846094026e-08, + "loss": 30.3789, + "step": 490620 + }, + { + "epoch": 0.9911036413660476, + "grad_norm": 172.30718994140625, + "learning_rate": 1.0693971847175466e-08, + "loss": 23.6568, + "step": 490630 + }, + { + "epoch": 0.9911238419987314, + "grad_norm": 730.6552124023438, + "learning_rate": 1.067116616552899e-08, + "loss": 20.7877, + "step": 490640 + }, + { + "epoch": 0.9911440426314152, + "grad_norm": 232.4882354736328, + "learning_rate": 1.0648384801266176e-08, + "loss": 8.2937, + "step": 490650 + }, + { + "epoch": 0.991164243264099, + "grad_norm": 0.0, + "learning_rate": 1.0625627754498048e-08, + "loss": 10.4492, + "step": 490660 + }, + { + "epoch": 0.9911844438967828, + "grad_norm": 841.0191040039062, + "learning_rate": 1.0602895025335624e-08, + "loss": 15.407, + "step": 490670 + }, + { + "epoch": 0.9912046445294667, + "grad_norm": 399.9212341308594, + "learning_rate": 1.0580186613888822e-08, + "loss": 14.38, + "step": 490680 + }, + { + "epoch": 0.9912248451621505, + "grad_norm": 776.1848754882812, + "learning_rate": 1.055750252026977e-08, + "loss": 33.8655, + "step": 490690 + }, + { + "epoch": 0.9912450457948343, + "grad_norm": 385.7315979003906, + "learning_rate": 1.0534842744588381e-08, + "loss": 29.3024, + "step": 490700 + }, + { + "epoch": 0.9912652464275181, + "grad_norm": 324.3099060058594, + "learning_rate": 1.0512207286954568e-08, + "loss": 23.4997, + "step": 490710 + }, + { + "epoch": 0.9912854470602019, + "grad_norm": 486.59674072265625, + "learning_rate": 1.0489596147479353e-08, + "loss": 24.3618, + "step": 490720 + }, + { + "epoch": 0.9913056476928858, + "grad_norm": 459.3517761230469, + "learning_rate": 1.0467009326272648e-08, + "loss": 15.0182, + "step": 490730 + }, + { + "epoch": 0.9913258483255696, + "grad_norm": 488.06982421875, + "learning_rate": 1.044444682344492e-08, + "loss": 16.9264, + "step": 490740 + }, + { + "epoch": 0.9913460489582534, + "grad_norm": 2.73164701461792, + "learning_rate": 1.0421908639104971e-08, + "loss": 31.6905, + "step": 490750 + }, + { + "epoch": 0.9913662495909372, + "grad_norm": 74.6050796508789, + "learning_rate": 1.039939477336438e-08, + "loss": 9.8221, + "step": 490760 + }, + { + "epoch": 0.991386450223621, + "grad_norm": 743.1727294921875, + "learning_rate": 1.0376905226331391e-08, + "loss": 27.881, + "step": 490770 + }, + { + "epoch": 0.9914066508563049, + "grad_norm": 181.60882568359375, + "learning_rate": 1.0354439998116473e-08, + "loss": 13.2241, + "step": 490780 + }, + { + "epoch": 0.9914268514889887, + "grad_norm": 359.5448913574219, + "learning_rate": 1.0331999088828425e-08, + "loss": 25.4082, + "step": 490790 + }, + { + "epoch": 0.9914470521216725, + "grad_norm": 87.32952880859375, + "learning_rate": 1.030958249857772e-08, + "loss": 16.0133, + "step": 490800 + }, + { + "epoch": 0.9914672527543563, + "grad_norm": 140.4916229248047, + "learning_rate": 1.02871902274726e-08, + "loss": 19.8484, + "step": 490810 + }, + { + "epoch": 0.99148745338704, + "grad_norm": 243.94195556640625, + "learning_rate": 1.026482227562242e-08, + "loss": 16.5282, + "step": 490820 + }, + { + "epoch": 0.9915076540197238, + "grad_norm": 245.15191650390625, + "learning_rate": 1.0242478643136545e-08, + "loss": 23.5337, + "step": 490830 + }, + { + "epoch": 0.9915278546524077, + "grad_norm": 496.2458801269531, + "learning_rate": 1.0220159330123214e-08, + "loss": 16.6147, + "step": 490840 + }, + { + "epoch": 0.9915480552850915, + "grad_norm": 665.3242797851562, + "learning_rate": 1.0197864336691788e-08, + "loss": 22.6857, + "step": 490850 + }, + { + "epoch": 0.9915682559177753, + "grad_norm": 360.8750915527344, + "learning_rate": 1.0175593662951066e-08, + "loss": 18.3113, + "step": 490860 + }, + { + "epoch": 0.9915884565504591, + "grad_norm": 198.45236206054688, + "learning_rate": 1.0153347309009299e-08, + "loss": 12.9173, + "step": 490870 + }, + { + "epoch": 0.991608657183143, + "grad_norm": 1000.5813598632812, + "learning_rate": 1.013112527497473e-08, + "loss": 22.6931, + "step": 490880 + }, + { + "epoch": 0.9916288578158268, + "grad_norm": 198.7476806640625, + "learning_rate": 1.0108927560955606e-08, + "loss": 13.5263, + "step": 490890 + }, + { + "epoch": 0.9916490584485106, + "grad_norm": 586.5575561523438, + "learning_rate": 1.008675416706073e-08, + "loss": 18.2384, + "step": 490900 + }, + { + "epoch": 0.9916692590811944, + "grad_norm": 230.30209350585938, + "learning_rate": 1.0064605093397794e-08, + "loss": 20.9739, + "step": 490910 + }, + { + "epoch": 0.9916894597138782, + "grad_norm": 1089.9083251953125, + "learning_rate": 1.0042480340075045e-08, + "loss": 22.8103, + "step": 490920 + }, + { + "epoch": 0.991709660346562, + "grad_norm": 1334.685791015625, + "learning_rate": 1.0020379907199618e-08, + "loss": 28.0976, + "step": 490930 + }, + { + "epoch": 0.9917298609792459, + "grad_norm": 217.3130340576172, + "learning_rate": 9.99830379487976e-09, + "loss": 20.0891, + "step": 490940 + }, + { + "epoch": 0.9917500616119297, + "grad_norm": 727.6640014648438, + "learning_rate": 9.976252003223164e-09, + "loss": 32.0702, + "step": 490950 + }, + { + "epoch": 0.9917702622446135, + "grad_norm": 117.57544708251953, + "learning_rate": 9.954224532336965e-09, + "loss": 10.9159, + "step": 490960 + }, + { + "epoch": 0.9917904628772973, + "grad_norm": 627.7258911132812, + "learning_rate": 9.932221382328299e-09, + "loss": 12.5682, + "step": 490970 + }, + { + "epoch": 0.9918106635099811, + "grad_norm": 19.738685607910156, + "learning_rate": 9.91024255330486e-09, + "loss": 12.3592, + "step": 490980 + }, + { + "epoch": 0.991830864142665, + "grad_norm": 609.0294189453125, + "learning_rate": 9.888288045374339e-09, + "loss": 15.5357, + "step": 490990 + }, + { + "epoch": 0.9918510647753488, + "grad_norm": 381.98956298828125, + "learning_rate": 9.866357858642206e-09, + "loss": 11.6295, + "step": 491000 + }, + { + "epoch": 0.9918712654080326, + "grad_norm": 250.50408935546875, + "learning_rate": 9.844451993216708e-09, + "loss": 19.4155, + "step": 491010 + }, + { + "epoch": 0.9918914660407164, + "grad_norm": 813.5974731445312, + "learning_rate": 9.822570449203873e-09, + "loss": 14.552, + "step": 491020 + }, + { + "epoch": 0.9919116666734002, + "grad_norm": 275.0490417480469, + "learning_rate": 9.800713226710834e-09, + "loss": 12.0307, + "step": 491030 + }, + { + "epoch": 0.9919318673060841, + "grad_norm": 116.39804077148438, + "learning_rate": 9.77888032584362e-09, + "loss": 17.4847, + "step": 491040 + }, + { + "epoch": 0.9919520679387679, + "grad_norm": 178.0388946533203, + "learning_rate": 9.757071746708812e-09, + "loss": 12.5501, + "step": 491050 + }, + { + "epoch": 0.9919722685714517, + "grad_norm": 22.949909210205078, + "learning_rate": 9.735287489413547e-09, + "loss": 10.2942, + "step": 491060 + }, + { + "epoch": 0.9919924692041354, + "grad_norm": 140.55003356933594, + "learning_rate": 9.71352755406274e-09, + "loss": 8.9796, + "step": 491070 + }, + { + "epoch": 0.9920126698368192, + "grad_norm": 187.78439331054688, + "learning_rate": 9.691791940762418e-09, + "loss": 26.1984, + "step": 491080 + }, + { + "epoch": 0.9920328704695031, + "grad_norm": 636.652587890625, + "learning_rate": 9.670080649619717e-09, + "loss": 33.8739, + "step": 491090 + }, + { + "epoch": 0.9920530711021869, + "grad_norm": 764.0996704101562, + "learning_rate": 9.64839368074011e-09, + "loss": 25.3695, + "step": 491100 + }, + { + "epoch": 0.9920732717348707, + "grad_norm": 547.4270629882812, + "learning_rate": 9.626731034227954e-09, + "loss": 18.0425, + "step": 491110 + }, + { + "epoch": 0.9920934723675545, + "grad_norm": 267.7295227050781, + "learning_rate": 9.605092710190943e-09, + "loss": 17.5693, + "step": 491120 + }, + { + "epoch": 0.9921136730002383, + "grad_norm": 484.971435546875, + "learning_rate": 9.583478708732886e-09, + "loss": 15.021, + "step": 491130 + }, + { + "epoch": 0.9921338736329222, + "grad_norm": 204.47447204589844, + "learning_rate": 9.561889029959249e-09, + "loss": 12.9957, + "step": 491140 + }, + { + "epoch": 0.992154074265606, + "grad_norm": 331.8847351074219, + "learning_rate": 9.540323673976615e-09, + "loss": 19.1705, + "step": 491150 + }, + { + "epoch": 0.9921742748982898, + "grad_norm": 244.10409545898438, + "learning_rate": 9.518782640888235e-09, + "loss": 19.9217, + "step": 491160 + }, + { + "epoch": 0.9921944755309736, + "grad_norm": 487.3038024902344, + "learning_rate": 9.497265930800691e-09, + "loss": 23.7191, + "step": 491170 + }, + { + "epoch": 0.9922146761636574, + "grad_norm": 764.28955078125, + "learning_rate": 9.475773543818345e-09, + "loss": 22.5038, + "step": 491180 + }, + { + "epoch": 0.9922348767963413, + "grad_norm": 481.8316345214844, + "learning_rate": 9.454305480045556e-09, + "loss": 22.492, + "step": 491190 + }, + { + "epoch": 0.9922550774290251, + "grad_norm": 852.2228393554688, + "learning_rate": 9.432861739586685e-09, + "loss": 21.2637, + "step": 491200 + }, + { + "epoch": 0.9922752780617089, + "grad_norm": 528.0516967773438, + "learning_rate": 9.411442322547204e-09, + "loss": 29.3942, + "step": 491210 + }, + { + "epoch": 0.9922954786943927, + "grad_norm": 437.1981201171875, + "learning_rate": 9.390047229031474e-09, + "loss": 12.1818, + "step": 491220 + }, + { + "epoch": 0.9923156793270765, + "grad_norm": 313.25225830078125, + "learning_rate": 9.368676459142744e-09, + "loss": 23.0166, + "step": 491230 + }, + { + "epoch": 0.9923358799597604, + "grad_norm": 614.193115234375, + "learning_rate": 9.347330012985933e-09, + "loss": 25.4203, + "step": 491240 + }, + { + "epoch": 0.9923560805924442, + "grad_norm": 527.8973999023438, + "learning_rate": 9.3260078906654e-09, + "loss": 21.4504, + "step": 491250 + }, + { + "epoch": 0.992376281225128, + "grad_norm": 103.88162994384766, + "learning_rate": 9.304710092283842e-09, + "loss": 19.1815, + "step": 491260 + }, + { + "epoch": 0.9923964818578118, + "grad_norm": 441.32818603515625, + "learning_rate": 9.283436617946173e-09, + "loss": 9.0619, + "step": 491270 + }, + { + "epoch": 0.9924166824904956, + "grad_norm": 191.62818908691406, + "learning_rate": 9.262187467756201e-09, + "loss": 16.3227, + "step": 491280 + }, + { + "epoch": 0.9924368831231795, + "grad_norm": 355.4009094238281, + "learning_rate": 9.24096264181662e-09, + "loss": 22.5769, + "step": 491290 + }, + { + "epoch": 0.9924570837558633, + "grad_norm": 576.244384765625, + "learning_rate": 9.219762140231237e-09, + "loss": 16.6766, + "step": 491300 + }, + { + "epoch": 0.9924772843885471, + "grad_norm": 161.81101989746094, + "learning_rate": 9.198585963103302e-09, + "loss": 16.9969, + "step": 491310 + }, + { + "epoch": 0.9924974850212309, + "grad_norm": 281.2635192871094, + "learning_rate": 9.177434110536065e-09, + "loss": 13.0341, + "step": 491320 + }, + { + "epoch": 0.9925176856539146, + "grad_norm": 40.424774169921875, + "learning_rate": 9.156306582633334e-09, + "loss": 11.9053, + "step": 491330 + }, + { + "epoch": 0.9925378862865984, + "grad_norm": 138.92991638183594, + "learning_rate": 9.135203379496693e-09, + "loss": 13.6707, + "step": 491340 + }, + { + "epoch": 0.9925580869192823, + "grad_norm": 447.6842956542969, + "learning_rate": 9.114124501230504e-09, + "loss": 21.1689, + "step": 491350 + }, + { + "epoch": 0.9925782875519661, + "grad_norm": 481.4045104980469, + "learning_rate": 9.09306994793635e-09, + "loss": 20.5411, + "step": 491360 + }, + { + "epoch": 0.9925984881846499, + "grad_norm": 524.9292602539062, + "learning_rate": 9.07203971971693e-09, + "loss": 30.819, + "step": 491370 + }, + { + "epoch": 0.9926186888173337, + "grad_norm": 438.7566833496094, + "learning_rate": 9.051033816675492e-09, + "loss": 13.5019, + "step": 491380 + }, + { + "epoch": 0.9926388894500175, + "grad_norm": 511.960205078125, + "learning_rate": 9.030052238913622e-09, + "loss": 18.0979, + "step": 491390 + }, + { + "epoch": 0.9926590900827014, + "grad_norm": 163.58566284179688, + "learning_rate": 9.009094986534572e-09, + "loss": 16.3686, + "step": 491400 + }, + { + "epoch": 0.9926792907153852, + "grad_norm": 2.4858508110046387, + "learning_rate": 8.988162059639371e-09, + "loss": 18.2185, + "step": 491410 + }, + { + "epoch": 0.992699491348069, + "grad_norm": 107.9386215209961, + "learning_rate": 8.967253458330715e-09, + "loss": 19.7233, + "step": 491420 + }, + { + "epoch": 0.9927196919807528, + "grad_norm": 398.495361328125, + "learning_rate": 8.946369182710191e-09, + "loss": 16.6712, + "step": 491430 + }, + { + "epoch": 0.9927398926134366, + "grad_norm": 326.6344909667969, + "learning_rate": 8.925509232879937e-09, + "loss": 17.6045, + "step": 491440 + }, + { + "epoch": 0.9927600932461205, + "grad_norm": 309.113525390625, + "learning_rate": 8.904673608940983e-09, + "loss": 19.8827, + "step": 491450 + }, + { + "epoch": 0.9927802938788043, + "grad_norm": 170.3911590576172, + "learning_rate": 8.883862310995473e-09, + "loss": 13.5585, + "step": 491460 + }, + { + "epoch": 0.9928004945114881, + "grad_norm": 92.2303466796875, + "learning_rate": 8.863075339144988e-09, + "loss": 16.4295, + "step": 491470 + }, + { + "epoch": 0.9928206951441719, + "grad_norm": 351.38494873046875, + "learning_rate": 8.842312693490563e-09, + "loss": 19.5431, + "step": 491480 + }, + { + "epoch": 0.9928408957768557, + "grad_norm": 807.0746459960938, + "learning_rate": 8.821574374132669e-09, + "loss": 16.084, + "step": 491490 + }, + { + "epoch": 0.9928610964095396, + "grad_norm": 412.27001953125, + "learning_rate": 8.800860381173448e-09, + "loss": 25.2724, + "step": 491500 + }, + { + "epoch": 0.9928812970422234, + "grad_norm": 172.590087890625, + "learning_rate": 8.780170714713931e-09, + "loss": 13.0839, + "step": 491510 + }, + { + "epoch": 0.9929014976749072, + "grad_norm": 154.42572021484375, + "learning_rate": 8.759505374854038e-09, + "loss": 19.079, + "step": 491520 + }, + { + "epoch": 0.992921698307591, + "grad_norm": 206.93417358398438, + "learning_rate": 8.738864361694799e-09, + "loss": 32.277, + "step": 491530 + }, + { + "epoch": 0.9929418989402748, + "grad_norm": 604.6255493164062, + "learning_rate": 8.718247675337243e-09, + "loss": 26.7745, + "step": 491540 + }, + { + "epoch": 0.9929620995729587, + "grad_norm": 462.11669921875, + "learning_rate": 8.697655315881293e-09, + "loss": 19.3062, + "step": 491550 + }, + { + "epoch": 0.9929823002056425, + "grad_norm": 433.7647399902344, + "learning_rate": 8.677087283427976e-09, + "loss": 20.0991, + "step": 491560 + }, + { + "epoch": 0.9930025008383263, + "grad_norm": 607.44287109375, + "learning_rate": 8.656543578077215e-09, + "loss": 19.0045, + "step": 491570 + }, + { + "epoch": 0.9930227014710101, + "grad_norm": 65.2020263671875, + "learning_rate": 8.636024199928927e-09, + "loss": 27.6576, + "step": 491580 + }, + { + "epoch": 0.9930429021036938, + "grad_norm": 720.201171875, + "learning_rate": 8.615529149083034e-09, + "loss": 27.6301, + "step": 491590 + }, + { + "epoch": 0.9930631027363777, + "grad_norm": 217.3624725341797, + "learning_rate": 8.595058425640012e-09, + "loss": 16.5394, + "step": 491600 + }, + { + "epoch": 0.9930833033690615, + "grad_norm": 468.4784851074219, + "learning_rate": 8.574612029699224e-09, + "loss": 21.0743, + "step": 491610 + }, + { + "epoch": 0.9931035040017453, + "grad_norm": 508.72381591796875, + "learning_rate": 8.554189961360037e-09, + "loss": 24.9038, + "step": 491620 + }, + { + "epoch": 0.9931237046344291, + "grad_norm": 186.89498901367188, + "learning_rate": 8.53379222072237e-09, + "loss": 14.5935, + "step": 491630 + }, + { + "epoch": 0.9931439052671129, + "grad_norm": 622.7369995117188, + "learning_rate": 8.513418807886142e-09, + "loss": 19.9239, + "step": 491640 + }, + { + "epoch": 0.9931641058997968, + "grad_norm": 121.98161315917969, + "learning_rate": 8.49306972294961e-09, + "loss": 14.8514, + "step": 491650 + }, + { + "epoch": 0.9931843065324806, + "grad_norm": 531.5045166015625, + "learning_rate": 8.472744966012691e-09, + "loss": 17.4055, + "step": 491660 + }, + { + "epoch": 0.9932045071651644, + "grad_norm": 134.05284118652344, + "learning_rate": 8.452444537174198e-09, + "loss": 21.0864, + "step": 491670 + }, + { + "epoch": 0.9932247077978482, + "grad_norm": 486.0033264160156, + "learning_rate": 8.43216843653294e-09, + "loss": 9.6031, + "step": 491680 + }, + { + "epoch": 0.993244908430532, + "grad_norm": 10.919336318969727, + "learning_rate": 8.41191666418828e-09, + "loss": 17.866, + "step": 491690 + }, + { + "epoch": 0.9932651090632159, + "grad_norm": 273.6304626464844, + "learning_rate": 8.391689220238474e-09, + "loss": 14.4463, + "step": 491700 + }, + { + "epoch": 0.9932853096958997, + "grad_norm": 598.221435546875, + "learning_rate": 8.37148610478178e-09, + "loss": 18.2435, + "step": 491710 + }, + { + "epoch": 0.9933055103285835, + "grad_norm": 382.8218078613281, + "learning_rate": 8.351307317917002e-09, + "loss": 19.4453, + "step": 491720 + }, + { + "epoch": 0.9933257109612673, + "grad_norm": 47.96708679199219, + "learning_rate": 8.331152859742952e-09, + "loss": 31.3714, + "step": 491730 + }, + { + "epoch": 0.9933459115939511, + "grad_norm": 505.9755554199219, + "learning_rate": 8.311022730357331e-09, + "loss": 20.7981, + "step": 491740 + }, + { + "epoch": 0.993366112226635, + "grad_norm": 58.72330093383789, + "learning_rate": 8.290916929858394e-09, + "loss": 30.5194, + "step": 491750 + }, + { + "epoch": 0.9933863128593188, + "grad_norm": 519.745361328125, + "learning_rate": 8.27083545834384e-09, + "loss": 20.2842, + "step": 491760 + }, + { + "epoch": 0.9934065134920026, + "grad_norm": 458.9234924316406, + "learning_rate": 8.250778315911922e-09, + "loss": 19.8348, + "step": 491770 + }, + { + "epoch": 0.9934267141246864, + "grad_norm": 184.57225036621094, + "learning_rate": 8.230745502660343e-09, + "loss": 19.1662, + "step": 491780 + }, + { + "epoch": 0.9934469147573702, + "grad_norm": 488.2207946777344, + "learning_rate": 8.210737018686798e-09, + "loss": 18.1941, + "step": 491790 + }, + { + "epoch": 0.9934671153900541, + "grad_norm": 328.6118469238281, + "learning_rate": 8.190752864088436e-09, + "loss": 27.4498, + "step": 491800 + }, + { + "epoch": 0.9934873160227379, + "grad_norm": 366.3116455078125, + "learning_rate": 8.17079303896351e-09, + "loss": 29.1802, + "step": 491810 + }, + { + "epoch": 0.9935075166554217, + "grad_norm": 229.66558837890625, + "learning_rate": 8.150857543408054e-09, + "loss": 20.8235, + "step": 491820 + }, + { + "epoch": 0.9935277172881055, + "grad_norm": 235.47610473632812, + "learning_rate": 8.130946377519767e-09, + "loss": 22.9774, + "step": 491830 + }, + { + "epoch": 0.9935479179207892, + "grad_norm": 223.5673370361328, + "learning_rate": 8.11105954139635e-09, + "loss": 12.8811, + "step": 491840 + }, + { + "epoch": 0.993568118553473, + "grad_norm": 230.26121520996094, + "learning_rate": 8.091197035133836e-09, + "loss": 18.2644, + "step": 491850 + }, + { + "epoch": 0.9935883191861569, + "grad_norm": 292.3512268066406, + "learning_rate": 8.07135885882937e-09, + "loss": 14.2108, + "step": 491860 + }, + { + "epoch": 0.9936085198188407, + "grad_norm": 438.98638916015625, + "learning_rate": 8.051545012580097e-09, + "loss": 12.9736, + "step": 491870 + }, + { + "epoch": 0.9936287204515245, + "grad_norm": 299.19549560546875, + "learning_rate": 8.031755496481496e-09, + "loss": 14.6742, + "step": 491880 + }, + { + "epoch": 0.9936489210842083, + "grad_norm": 364.4718017578125, + "learning_rate": 8.011990310631269e-09, + "loss": 15.7653, + "step": 491890 + }, + { + "epoch": 0.9936691217168921, + "grad_norm": 258.502197265625, + "learning_rate": 7.992249455124889e-09, + "loss": 10.2188, + "step": 491900 + }, + { + "epoch": 0.993689322349576, + "grad_norm": 228.58082580566406, + "learning_rate": 7.972532930058396e-09, + "loss": 14.5795, + "step": 491910 + }, + { + "epoch": 0.9937095229822598, + "grad_norm": 0.3197769820690155, + "learning_rate": 7.952840735528933e-09, + "loss": 12.5138, + "step": 491920 + }, + { + "epoch": 0.9937297236149436, + "grad_norm": 369.63385009765625, + "learning_rate": 7.933172871631978e-09, + "loss": 12.8713, + "step": 491930 + }, + { + "epoch": 0.9937499242476274, + "grad_norm": 102.29288482666016, + "learning_rate": 7.913529338463011e-09, + "loss": 16.312, + "step": 491940 + }, + { + "epoch": 0.9937701248803112, + "grad_norm": 18.225814819335938, + "learning_rate": 7.89391013611751e-09, + "loss": 18.8131, + "step": 491950 + }, + { + "epoch": 0.9937903255129951, + "grad_norm": 441.5530700683594, + "learning_rate": 7.874315264692622e-09, + "loss": 15.199, + "step": 491960 + }, + { + "epoch": 0.9938105261456789, + "grad_norm": 458.37664794921875, + "learning_rate": 7.85474472428216e-09, + "loss": 14.9665, + "step": 491970 + }, + { + "epoch": 0.9938307267783627, + "grad_norm": 374.7511901855469, + "learning_rate": 7.835198514982156e-09, + "loss": 17.1539, + "step": 491980 + }, + { + "epoch": 0.9938509274110465, + "grad_norm": 124.7674560546875, + "learning_rate": 7.815676636888093e-09, + "loss": 16.2321, + "step": 491990 + }, + { + "epoch": 0.9938711280437303, + "grad_norm": 516.1492919921875, + "learning_rate": 7.796179090094891e-09, + "loss": 24.4474, + "step": 492000 + }, + { + "epoch": 0.9938913286764142, + "grad_norm": 819.0858154296875, + "learning_rate": 7.776705874698032e-09, + "loss": 16.4256, + "step": 492010 + }, + { + "epoch": 0.993911529309098, + "grad_norm": 260.1531982421875, + "learning_rate": 7.757256990791328e-09, + "loss": 11.4037, + "step": 492020 + }, + { + "epoch": 0.9939317299417818, + "grad_norm": 52.56280517578125, + "learning_rate": 7.737832438470816e-09, + "loss": 16.7869, + "step": 492030 + }, + { + "epoch": 0.9939519305744656, + "grad_norm": 293.88433837890625, + "learning_rate": 7.718432217830307e-09, + "loss": 20.3442, + "step": 492040 + }, + { + "epoch": 0.9939721312071494, + "grad_norm": 143.10574340820312, + "learning_rate": 7.699056328964726e-09, + "loss": 18.5877, + "step": 492050 + }, + { + "epoch": 0.9939923318398333, + "grad_norm": 289.1720886230469, + "learning_rate": 7.679704771968998e-09, + "loss": 12.983, + "step": 492060 + }, + { + "epoch": 0.9940125324725171, + "grad_norm": 198.0683135986328, + "learning_rate": 7.660377546936382e-09, + "loss": 13.1834, + "step": 492070 + }, + { + "epoch": 0.9940327331052009, + "grad_norm": 412.1947326660156, + "learning_rate": 7.641074653961244e-09, + "loss": 16.0309, + "step": 492080 + }, + { + "epoch": 0.9940529337378847, + "grad_norm": 140.10911560058594, + "learning_rate": 7.621796093138512e-09, + "loss": 9.8632, + "step": 492090 + }, + { + "epoch": 0.9940731343705684, + "grad_norm": 324.3037109375, + "learning_rate": 7.602541864561442e-09, + "loss": 15.7889, + "step": 492100 + }, + { + "epoch": 0.9940933350032523, + "grad_norm": 282.5607604980469, + "learning_rate": 7.583311968324403e-09, + "loss": 22.3234, + "step": 492110 + }, + { + "epoch": 0.9941135356359361, + "grad_norm": 293.5461120605469, + "learning_rate": 7.564106404520654e-09, + "loss": 31.9959, + "step": 492120 + }, + { + "epoch": 0.9941337362686199, + "grad_norm": 317.7358093261719, + "learning_rate": 7.544925173243455e-09, + "loss": 14.6415, + "step": 492130 + }, + { + "epoch": 0.9941539369013037, + "grad_norm": 34.03724670410156, + "learning_rate": 7.525768274587175e-09, + "loss": 27.2084, + "step": 492140 + }, + { + "epoch": 0.9941741375339875, + "grad_norm": 0.0, + "learning_rate": 7.506635708645072e-09, + "loss": 17.1881, + "step": 492150 + }, + { + "epoch": 0.9941943381666714, + "grad_norm": 349.0623779296875, + "learning_rate": 7.487527475509848e-09, + "loss": 11.0595, + "step": 492160 + }, + { + "epoch": 0.9942145387993552, + "grad_norm": 590.5459594726562, + "learning_rate": 7.468443575274764e-09, + "loss": 23.4729, + "step": 492170 + }, + { + "epoch": 0.994234739432039, + "grad_norm": 46.546485900878906, + "learning_rate": 7.449384008033078e-09, + "loss": 20.6852, + "step": 492180 + }, + { + "epoch": 0.9942549400647228, + "grad_norm": 594.9143676757812, + "learning_rate": 7.430348773877494e-09, + "loss": 22.5141, + "step": 492190 + }, + { + "epoch": 0.9942751406974066, + "grad_norm": 223.10205078125, + "learning_rate": 7.411337872900715e-09, + "loss": 20.5256, + "step": 492200 + }, + { + "epoch": 0.9942953413300905, + "grad_norm": 258.2781677246094, + "learning_rate": 7.392351305195999e-09, + "loss": 18.0094, + "step": 492210 + }, + { + "epoch": 0.9943155419627743, + "grad_norm": 1.6258138418197632, + "learning_rate": 7.373389070854941e-09, + "loss": 17.1076, + "step": 492220 + }, + { + "epoch": 0.9943357425954581, + "grad_norm": 786.12109375, + "learning_rate": 7.3544511699708e-09, + "loss": 21.3983, + "step": 492230 + }, + { + "epoch": 0.9943559432281419, + "grad_norm": 660.6713256835938, + "learning_rate": 7.335537602635723e-09, + "loss": 16.2497, + "step": 492240 + }, + { + "epoch": 0.9943761438608257, + "grad_norm": 131.5867156982422, + "learning_rate": 7.3166483689413035e-09, + "loss": 18.4497, + "step": 492250 + }, + { + "epoch": 0.9943963444935096, + "grad_norm": 454.113037109375, + "learning_rate": 7.297783468980246e-09, + "loss": 17.6033, + "step": 492260 + }, + { + "epoch": 0.9944165451261934, + "grad_norm": 191.77944946289062, + "learning_rate": 7.278942902843589e-09, + "loss": 13.1115, + "step": 492270 + }, + { + "epoch": 0.9944367457588772, + "grad_norm": 182.51564025878906, + "learning_rate": 7.26012667062459e-09, + "loss": 11.9545, + "step": 492280 + }, + { + "epoch": 0.994456946391561, + "grad_norm": 196.82113647460938, + "learning_rate": 7.241334772414288e-09, + "loss": 20.5471, + "step": 492290 + }, + { + "epoch": 0.9944771470242448, + "grad_norm": 325.7108154296875, + "learning_rate": 7.222567208303721e-09, + "loss": 24.7077, + "step": 492300 + }, + { + "epoch": 0.9944973476569287, + "grad_norm": 825.5935668945312, + "learning_rate": 7.203823978384483e-09, + "loss": 36.0086, + "step": 492310 + }, + { + "epoch": 0.9945175482896125, + "grad_norm": 120.90480041503906, + "learning_rate": 7.185105082748722e-09, + "loss": 19.9979, + "step": 492320 + }, + { + "epoch": 0.9945377489222963, + "grad_norm": 310.6450500488281, + "learning_rate": 7.166410521487477e-09, + "loss": 21.8731, + "step": 492330 + }, + { + "epoch": 0.9945579495549801, + "grad_norm": 113.73008728027344, + "learning_rate": 7.14774029469123e-09, + "loss": 21.9454, + "step": 492340 + }, + { + "epoch": 0.9945781501876638, + "grad_norm": 330.3626708984375, + "learning_rate": 7.129094402451575e-09, + "loss": 20.177, + "step": 492350 + }, + { + "epoch": 0.9945983508203476, + "grad_norm": 394.8288879394531, + "learning_rate": 7.11047284485844e-09, + "loss": 15.473, + "step": 492360 + }, + { + "epoch": 0.9946185514530315, + "grad_norm": 624.0137939453125, + "learning_rate": 7.0918756220039745e-09, + "loss": 27.1499, + "step": 492370 + }, + { + "epoch": 0.9946387520857153, + "grad_norm": 167.65707397460938, + "learning_rate": 7.073302733978104e-09, + "loss": 17.0275, + "step": 492380 + }, + { + "epoch": 0.9946589527183991, + "grad_norm": 277.8253173828125, + "learning_rate": 7.054754180871315e-09, + "loss": 6.4159, + "step": 492390 + }, + { + "epoch": 0.9946791533510829, + "grad_norm": 290.10595703125, + "learning_rate": 7.036229962774088e-09, + "loss": 21.9664, + "step": 492400 + }, + { + "epoch": 0.9946993539837667, + "grad_norm": 244.20114135742188, + "learning_rate": 7.0177300797763526e-09, + "loss": 34.4278, + "step": 492410 + }, + { + "epoch": 0.9947195546164506, + "grad_norm": 656.7611083984375, + "learning_rate": 6.999254531969146e-09, + "loss": 17.8188, + "step": 492420 + }, + { + "epoch": 0.9947397552491344, + "grad_norm": 873.718017578125, + "learning_rate": 6.980803319441842e-09, + "loss": 33.7453, + "step": 492430 + }, + { + "epoch": 0.9947599558818182, + "grad_norm": 121.09410858154297, + "learning_rate": 6.962376442284368e-09, + "loss": 23.1687, + "step": 492440 + }, + { + "epoch": 0.994780156514502, + "grad_norm": 506.382080078125, + "learning_rate": 6.943973900586654e-09, + "loss": 34.1638, + "step": 492450 + }, + { + "epoch": 0.9948003571471858, + "grad_norm": 670.7462768554688, + "learning_rate": 6.925595694438625e-09, + "loss": 17.3223, + "step": 492460 + }, + { + "epoch": 0.9948205577798697, + "grad_norm": 382.9184875488281, + "learning_rate": 6.9072418239296556e-09, + "loss": 40.3718, + "step": 492470 + }, + { + "epoch": 0.9948407584125535, + "grad_norm": 1033.291015625, + "learning_rate": 6.888912289149119e-09, + "loss": 20.0352, + "step": 492480 + }, + { + "epoch": 0.9948609590452373, + "grad_norm": 244.9137725830078, + "learning_rate": 6.8706070901863876e-09, + "loss": 33.7851, + "step": 492490 + }, + { + "epoch": 0.9948811596779211, + "grad_norm": 129.25210571289062, + "learning_rate": 6.852326227130835e-09, + "loss": 13.3353, + "step": 492500 + }, + { + "epoch": 0.9949013603106049, + "grad_norm": 396.7117614746094, + "learning_rate": 6.834069700071277e-09, + "loss": 17.8248, + "step": 492510 + }, + { + "epoch": 0.9949215609432888, + "grad_norm": 13.202730178833008, + "learning_rate": 6.81583750909709e-09, + "loss": 15.2326, + "step": 492520 + }, + { + "epoch": 0.9949417615759726, + "grad_norm": 197.49044799804688, + "learning_rate": 6.797629654296533e-09, + "loss": 18.0699, + "step": 492530 + }, + { + "epoch": 0.9949619622086564, + "grad_norm": 620.369140625, + "learning_rate": 6.779446135758982e-09, + "loss": 22.7365, + "step": 492540 + }, + { + "epoch": 0.9949821628413402, + "grad_norm": 262.18646240234375, + "learning_rate": 6.761286953572699e-09, + "loss": 15.2508, + "step": 492550 + }, + { + "epoch": 0.995002363474024, + "grad_norm": 847.1273803710938, + "learning_rate": 6.7431521078265e-09, + "loss": 29.8875, + "step": 492560 + }, + { + "epoch": 0.9950225641067079, + "grad_norm": 519.87548828125, + "learning_rate": 6.725041598608651e-09, + "loss": 18.5069, + "step": 492570 + }, + { + "epoch": 0.9950427647393917, + "grad_norm": 781.2832641601562, + "learning_rate": 6.706955426006856e-09, + "loss": 16.7449, + "step": 492580 + }, + { + "epoch": 0.9950629653720755, + "grad_norm": 192.6318817138672, + "learning_rate": 6.688893590109935e-09, + "loss": 15.2852, + "step": 492590 + }, + { + "epoch": 0.9950831660047593, + "grad_norm": 322.802490234375, + "learning_rate": 6.670856091006151e-09, + "loss": 14.4477, + "step": 492600 + }, + { + "epoch": 0.995103366637443, + "grad_norm": 30.472148895263672, + "learning_rate": 6.652842928782655e-09, + "loss": 22.5054, + "step": 492610 + }, + { + "epoch": 0.9951235672701269, + "grad_norm": 488.73419189453125, + "learning_rate": 6.63485410352771e-09, + "loss": 23.5498, + "step": 492620 + }, + { + "epoch": 0.9951437679028107, + "grad_norm": 211.9298095703125, + "learning_rate": 6.61688961532847e-09, + "loss": 6.6318, + "step": 492630 + }, + { + "epoch": 0.9951639685354945, + "grad_norm": 296.3676452636719, + "learning_rate": 6.598949464273196e-09, + "loss": 18.351, + "step": 492640 + }, + { + "epoch": 0.9951841691681783, + "grad_norm": 526.3226318359375, + "learning_rate": 6.581033650449043e-09, + "loss": 16.0651, + "step": 492650 + }, + { + "epoch": 0.9952043698008621, + "grad_norm": 506.3572692871094, + "learning_rate": 6.563142173943715e-09, + "loss": 27.0753, + "step": 492660 + }, + { + "epoch": 0.995224570433546, + "grad_norm": 337.43609619140625, + "learning_rate": 6.545275034843257e-09, + "loss": 14.6582, + "step": 492670 + }, + { + "epoch": 0.9952447710662298, + "grad_norm": 195.84539794921875, + "learning_rate": 6.527432233235931e-09, + "loss": 24.3385, + "step": 492680 + }, + { + "epoch": 0.9952649716989136, + "grad_norm": 389.5736999511719, + "learning_rate": 6.509613769207778e-09, + "loss": 27.1965, + "step": 492690 + }, + { + "epoch": 0.9952851723315974, + "grad_norm": 23.38743019104004, + "learning_rate": 6.491819642846509e-09, + "loss": 16.0401, + "step": 492700 + }, + { + "epoch": 0.9953053729642812, + "grad_norm": 15.392356872558594, + "learning_rate": 6.4740498542387174e-09, + "loss": 5.5294, + "step": 492710 + }, + { + "epoch": 0.995325573596965, + "grad_norm": 100.64783477783203, + "learning_rate": 6.456304403470448e-09, + "loss": 13.0771, + "step": 492720 + }, + { + "epoch": 0.9953457742296489, + "grad_norm": 657.622314453125, + "learning_rate": 6.438583290628298e-09, + "loss": 19.4515, + "step": 492730 + }, + { + "epoch": 0.9953659748623327, + "grad_norm": 298.1041259765625, + "learning_rate": 6.420886515799418e-09, + "loss": 22.1768, + "step": 492740 + }, + { + "epoch": 0.9953861754950165, + "grad_norm": 306.6492004394531, + "learning_rate": 6.403214079069298e-09, + "loss": 23.7699, + "step": 492750 + }, + { + "epoch": 0.9954063761277003, + "grad_norm": 341.74298095703125, + "learning_rate": 6.385565980523978e-09, + "loss": 8.2629, + "step": 492760 + }, + { + "epoch": 0.9954265767603842, + "grad_norm": 176.78253173828125, + "learning_rate": 6.3679422202495015e-09, + "loss": 13.1727, + "step": 492770 + }, + { + "epoch": 0.995446777393068, + "grad_norm": 282.4524841308594, + "learning_rate": 6.350342798332465e-09, + "loss": 19.1546, + "step": 492780 + }, + { + "epoch": 0.9954669780257518, + "grad_norm": 310.2063293457031, + "learning_rate": 6.332767714858357e-09, + "loss": 23.2967, + "step": 492790 + }, + { + "epoch": 0.9954871786584356, + "grad_norm": 644.8700561523438, + "learning_rate": 6.315216969912663e-09, + "loss": 14.4751, + "step": 492800 + }, + { + "epoch": 0.9955073792911194, + "grad_norm": 546.1470947265625, + "learning_rate": 6.2976905635803165e-09, + "loss": 21.2804, + "step": 492810 + }, + { + "epoch": 0.9955275799238033, + "grad_norm": 534.7316284179688, + "learning_rate": 6.280188495947914e-09, + "loss": 16.5509, + "step": 492820 + }, + { + "epoch": 0.9955477805564871, + "grad_norm": 662.7942504882812, + "learning_rate": 6.262710767100388e-09, + "loss": 22.4259, + "step": 492830 + }, + { + "epoch": 0.9955679811891709, + "grad_norm": 319.81024169921875, + "learning_rate": 6.245257377122116e-09, + "loss": 22.5629, + "step": 492840 + }, + { + "epoch": 0.9955881818218547, + "grad_norm": 84.95552825927734, + "learning_rate": 6.227828326099139e-09, + "loss": 16.1414, + "step": 492850 + }, + { + "epoch": 0.9956083824545384, + "grad_norm": 303.6583557128906, + "learning_rate": 6.21042361411639e-09, + "loss": 12.8599, + "step": 492860 + }, + { + "epoch": 0.9956285830872222, + "grad_norm": 281.2415466308594, + "learning_rate": 6.19304324125769e-09, + "loss": 15.6234, + "step": 492870 + }, + { + "epoch": 0.9956487837199061, + "grad_norm": 78.5985336303711, + "learning_rate": 6.175687207609082e-09, + "loss": 12.9739, + "step": 492880 + }, + { + "epoch": 0.9956689843525899, + "grad_norm": 711.9841918945312, + "learning_rate": 6.1583555132543886e-09, + "loss": 25.8945, + "step": 492890 + }, + { + "epoch": 0.9956891849852737, + "grad_norm": 302.2545471191406, + "learning_rate": 6.141048158277429e-09, + "loss": 12.0464, + "step": 492900 + }, + { + "epoch": 0.9957093856179575, + "grad_norm": 197.092041015625, + "learning_rate": 6.123765142764249e-09, + "loss": 11.8187, + "step": 492910 + }, + { + "epoch": 0.9957295862506413, + "grad_norm": 53.73894500732422, + "learning_rate": 6.106506466797557e-09, + "loss": 11.5132, + "step": 492920 + }, + { + "epoch": 0.9957497868833252, + "grad_norm": 264.3664245605469, + "learning_rate": 6.0892721304622874e-09, + "loss": 24.6879, + "step": 492930 + }, + { + "epoch": 0.995769987516009, + "grad_norm": 228.70321655273438, + "learning_rate": 6.0720621338422606e-09, + "loss": 15.5944, + "step": 492940 + }, + { + "epoch": 0.9957901881486928, + "grad_norm": 371.2651062011719, + "learning_rate": 6.054876477021299e-09, + "loss": 9.8062, + "step": 492950 + }, + { + "epoch": 0.9958103887813766, + "grad_norm": 109.58891296386719, + "learning_rate": 6.037715160083224e-09, + "loss": 10.4359, + "step": 492960 + }, + { + "epoch": 0.9958305894140604, + "grad_norm": 575.9791259765625, + "learning_rate": 6.020578183111303e-09, + "loss": 14.5963, + "step": 492970 + }, + { + "epoch": 0.9958507900467443, + "grad_norm": 1932.4429931640625, + "learning_rate": 6.003465546189358e-09, + "loss": 40.4509, + "step": 492980 + }, + { + "epoch": 0.9958709906794281, + "grad_norm": 613.677978515625, + "learning_rate": 5.98637724940121e-09, + "loss": 20.3188, + "step": 492990 + }, + { + "epoch": 0.9958911913121119, + "grad_norm": 369.6727294921875, + "learning_rate": 5.969313292830126e-09, + "loss": 8.9391, + "step": 493000 + }, + { + "epoch": 0.9959113919447957, + "grad_norm": 160.49456787109375, + "learning_rate": 5.952273676558262e-09, + "loss": 9.336, + "step": 493010 + }, + { + "epoch": 0.9959315925774795, + "grad_norm": 788.9593505859375, + "learning_rate": 5.935258400669442e-09, + "loss": 15.4208, + "step": 493020 + }, + { + "epoch": 0.9959517932101634, + "grad_norm": 295.59832763671875, + "learning_rate": 5.918267465246374e-09, + "loss": 12.4223, + "step": 493030 + }, + { + "epoch": 0.9959719938428472, + "grad_norm": 255.62750244140625, + "learning_rate": 5.901300870372329e-09, + "loss": 20.9779, + "step": 493040 + }, + { + "epoch": 0.995992194475531, + "grad_norm": 254.54164123535156, + "learning_rate": 5.8843586161289045e-09, + "loss": 9.4201, + "step": 493050 + }, + { + "epoch": 0.9960123951082148, + "grad_norm": 1058.5753173828125, + "learning_rate": 5.867440702599925e-09, + "loss": 14.6705, + "step": 493060 + }, + { + "epoch": 0.9960325957408986, + "grad_norm": 930.560791015625, + "learning_rate": 5.850547129867546e-09, + "loss": 30.7085, + "step": 493070 + }, + { + "epoch": 0.9960527963735825, + "grad_norm": 206.38072204589844, + "learning_rate": 5.833677898013368e-09, + "loss": 24.0558, + "step": 493080 + }, + { + "epoch": 0.9960729970062663, + "grad_norm": 428.0592041015625, + "learning_rate": 5.816833007120659e-09, + "loss": 19.56, + "step": 493090 + }, + { + "epoch": 0.9960931976389501, + "grad_norm": 260.1087341308594, + "learning_rate": 5.800012457270466e-09, + "loss": 22.4628, + "step": 493100 + }, + { + "epoch": 0.9961133982716339, + "grad_norm": 192.6460723876953, + "learning_rate": 5.783216248545498e-09, + "loss": 12.9076, + "step": 493110 + }, + { + "epoch": 0.9961335989043176, + "grad_norm": 350.8668212890625, + "learning_rate": 5.766444381027358e-09, + "loss": 16.6594, + "step": 493120 + }, + { + "epoch": 0.9961537995370014, + "grad_norm": 180.49252319335938, + "learning_rate": 5.749696854798204e-09, + "loss": 20.0456, + "step": 493130 + }, + { + "epoch": 0.9961740001696853, + "grad_norm": 13.434219360351562, + "learning_rate": 5.732973669939079e-09, + "loss": 16.6701, + "step": 493140 + }, + { + "epoch": 0.9961942008023691, + "grad_norm": 564.3707885742188, + "learning_rate": 5.716274826531587e-09, + "loss": 18.9161, + "step": 493150 + }, + { + "epoch": 0.9962144014350529, + "grad_norm": 249.492919921875, + "learning_rate": 5.699600324657328e-09, + "loss": 21.4974, + "step": 493160 + }, + { + "epoch": 0.9962346020677367, + "grad_norm": 149.17922973632812, + "learning_rate": 5.682950164397349e-09, + "loss": 11.9207, + "step": 493170 + }, + { + "epoch": 0.9962548027004205, + "grad_norm": 593.6851806640625, + "learning_rate": 5.6663243458332514e-09, + "loss": 21.1945, + "step": 493180 + }, + { + "epoch": 0.9962750033331044, + "grad_norm": 76.89675903320312, + "learning_rate": 5.649722869044971e-09, + "loss": 29.9984, + "step": 493190 + }, + { + "epoch": 0.9962952039657882, + "grad_norm": 319.2921142578125, + "learning_rate": 5.633145734114665e-09, + "loss": 19.5254, + "step": 493200 + }, + { + "epoch": 0.996315404598472, + "grad_norm": 343.31097412109375, + "learning_rate": 5.616592941123378e-09, + "loss": 25.4214, + "step": 493210 + }, + { + "epoch": 0.9963356052311558, + "grad_norm": 247.19534301757812, + "learning_rate": 5.600064490149937e-09, + "loss": 16.5237, + "step": 493220 + }, + { + "epoch": 0.9963558058638396, + "grad_norm": 125.71563720703125, + "learning_rate": 5.583560381276498e-09, + "loss": 23.1971, + "step": 493230 + }, + { + "epoch": 0.9963760064965235, + "grad_norm": 387.4584655761719, + "learning_rate": 5.5670806145835536e-09, + "loss": 30.7633, + "step": 493240 + }, + { + "epoch": 0.9963962071292073, + "grad_norm": 239.5253143310547, + "learning_rate": 5.5506251901504825e-09, + "loss": 18.8652, + "step": 493250 + }, + { + "epoch": 0.9964164077618911, + "grad_norm": 265.25750732421875, + "learning_rate": 5.534194108057778e-09, + "loss": 7.082, + "step": 493260 + }, + { + "epoch": 0.9964366083945749, + "grad_norm": 351.1151428222656, + "learning_rate": 5.517787368385375e-09, + "loss": 11.4043, + "step": 493270 + }, + { + "epoch": 0.9964568090272587, + "grad_norm": 534.319580078125, + "learning_rate": 5.501404971214319e-09, + "loss": 16.0895, + "step": 493280 + }, + { + "epoch": 0.9964770096599426, + "grad_norm": 549.73095703125, + "learning_rate": 5.485046916622883e-09, + "loss": 8.4059, + "step": 493290 + }, + { + "epoch": 0.9964972102926264, + "grad_norm": 168.16229248046875, + "learning_rate": 5.468713204692111e-09, + "loss": 11.6919, + "step": 493300 + }, + { + "epoch": 0.9965174109253102, + "grad_norm": 233.44361877441406, + "learning_rate": 5.45240383550083e-09, + "loss": 15.7062, + "step": 493310 + }, + { + "epoch": 0.996537611557994, + "grad_norm": 671.6220092773438, + "learning_rate": 5.436118809128421e-09, + "loss": 20.8994, + "step": 493320 + }, + { + "epoch": 0.9965578121906778, + "grad_norm": 164.53518676757812, + "learning_rate": 5.419858125655375e-09, + "loss": 12.1398, + "step": 493330 + }, + { + "epoch": 0.9965780128233617, + "grad_norm": 32.66850662231445, + "learning_rate": 5.403621785159407e-09, + "loss": 15.2787, + "step": 493340 + }, + { + "epoch": 0.9965982134560455, + "grad_norm": 73.91290283203125, + "learning_rate": 5.38740978772101e-09, + "loss": 11.4727, + "step": 493350 + }, + { + "epoch": 0.9966184140887293, + "grad_norm": 656.6400756835938, + "learning_rate": 5.371222133418452e-09, + "loss": 15.5917, + "step": 493360 + }, + { + "epoch": 0.9966386147214131, + "grad_norm": 161.31642150878906, + "learning_rate": 5.355058822330561e-09, + "loss": 17.0245, + "step": 493370 + }, + { + "epoch": 0.9966588153540968, + "grad_norm": 505.07989501953125, + "learning_rate": 5.338919854536162e-09, + "loss": 14.3958, + "step": 493380 + }, + { + "epoch": 0.9966790159867807, + "grad_norm": 274.6875915527344, + "learning_rate": 5.322805230114636e-09, + "loss": 17.4259, + "step": 493390 + }, + { + "epoch": 0.9966992166194645, + "grad_norm": 466.4015197753906, + "learning_rate": 5.306714949143699e-09, + "loss": 10.4244, + "step": 493400 + }, + { + "epoch": 0.9967194172521483, + "grad_norm": 592.9553833007812, + "learning_rate": 5.290649011702176e-09, + "loss": 26.8744, + "step": 493410 + }, + { + "epoch": 0.9967396178848321, + "grad_norm": 434.5201416015625, + "learning_rate": 5.2746074178683385e-09, + "loss": 17.5278, + "step": 493420 + }, + { + "epoch": 0.9967598185175159, + "grad_norm": 162.36619567871094, + "learning_rate": 5.258590167719901e-09, + "loss": 27.6637, + "step": 493430 + }, + { + "epoch": 0.9967800191501998, + "grad_norm": 313.357666015625, + "learning_rate": 5.242597261335691e-09, + "loss": 20.6985, + "step": 493440 + }, + { + "epoch": 0.9968002197828836, + "grad_norm": 172.16201782226562, + "learning_rate": 5.226628698792868e-09, + "loss": 18.3757, + "step": 493450 + }, + { + "epoch": 0.9968204204155674, + "grad_norm": 502.9991760253906, + "learning_rate": 5.210684480169703e-09, + "loss": 20.8665, + "step": 493460 + }, + { + "epoch": 0.9968406210482512, + "grad_norm": 40.60356903076172, + "learning_rate": 5.1947646055444665e-09, + "loss": 18.3703, + "step": 493470 + }, + { + "epoch": 0.996860821680935, + "grad_norm": 549.3845825195312, + "learning_rate": 5.178869074993209e-09, + "loss": 21.0244, + "step": 493480 + }, + { + "epoch": 0.9968810223136189, + "grad_norm": 584.9131469726562, + "learning_rate": 5.162997888595312e-09, + "loss": 15.8863, + "step": 493490 + }, + { + "epoch": 0.9969012229463027, + "grad_norm": 537.6060180664062, + "learning_rate": 5.147151046426824e-09, + "loss": 15.1142, + "step": 493500 + }, + { + "epoch": 0.9969214235789865, + "grad_norm": 36.83633041381836, + "learning_rate": 5.1313285485649064e-09, + "loss": 16.8483, + "step": 493510 + }, + { + "epoch": 0.9969416242116703, + "grad_norm": 2115.87255859375, + "learning_rate": 5.115530395087276e-09, + "loss": 19.0449, + "step": 493520 + }, + { + "epoch": 0.9969618248443541, + "grad_norm": 4.222888946533203, + "learning_rate": 5.099756586071092e-09, + "loss": 9.6148, + "step": 493530 + }, + { + "epoch": 0.996982025477038, + "grad_norm": 215.87461853027344, + "learning_rate": 5.084007121592405e-09, + "loss": 25.4709, + "step": 493540 + }, + { + "epoch": 0.9970022261097218, + "grad_norm": 441.0953369140625, + "learning_rate": 5.06828200172893e-09, + "loss": 13.4962, + "step": 493550 + }, + { + "epoch": 0.9970224267424056, + "grad_norm": 324.8075256347656, + "learning_rate": 5.052581226556719e-09, + "loss": 19.6106, + "step": 493560 + }, + { + "epoch": 0.9970426273750894, + "grad_norm": 791.2510986328125, + "learning_rate": 5.036904796152375e-09, + "loss": 28.8694, + "step": 493570 + }, + { + "epoch": 0.9970628280077732, + "grad_norm": 791.0879516601562, + "learning_rate": 5.02125271059195e-09, + "loss": 27.5417, + "step": 493580 + }, + { + "epoch": 0.9970830286404571, + "grad_norm": 248.20809936523438, + "learning_rate": 5.0056249699526046e-09, + "loss": 24.7889, + "step": 493590 + }, + { + "epoch": 0.9971032292731409, + "grad_norm": 309.5415954589844, + "learning_rate": 4.990021574309834e-09, + "loss": 14.6246, + "step": 493600 + }, + { + "epoch": 0.9971234299058247, + "grad_norm": 626.6513671875, + "learning_rate": 4.9744425237396865e-09, + "loss": 12.9801, + "step": 493610 + }, + { + "epoch": 0.9971436305385085, + "grad_norm": 795.0584106445312, + "learning_rate": 4.95888781831877e-09, + "loss": 16.2975, + "step": 493620 + }, + { + "epoch": 0.9971638311711922, + "grad_norm": 247.36184692382812, + "learning_rate": 4.9433574581220225e-09, + "loss": 12.3557, + "step": 493630 + }, + { + "epoch": 0.997184031803876, + "grad_norm": 297.8055419921875, + "learning_rate": 4.927851443225495e-09, + "loss": 18.0591, + "step": 493640 + }, + { + "epoch": 0.9972042324365599, + "grad_norm": 682.8153076171875, + "learning_rate": 4.9123697737052386e-09, + "loss": 26.2157, + "step": 493650 + }, + { + "epoch": 0.9972244330692437, + "grad_norm": 850.35546875, + "learning_rate": 4.896912449635638e-09, + "loss": 21.9621, + "step": 493660 + }, + { + "epoch": 0.9972446337019275, + "grad_norm": 273.8650817871094, + "learning_rate": 4.881479471093298e-09, + "loss": 29.0988, + "step": 493670 + }, + { + "epoch": 0.9972648343346113, + "grad_norm": 239.4381561279297, + "learning_rate": 4.866070838152049e-09, + "loss": 12.6839, + "step": 493680 + }, + { + "epoch": 0.9972850349672951, + "grad_norm": 415.7494812011719, + "learning_rate": 4.850686550888495e-09, + "loss": 21.6229, + "step": 493690 + }, + { + "epoch": 0.997305235599979, + "grad_norm": 624.9031982421875, + "learning_rate": 4.835326609376468e-09, + "loss": 13.9667, + "step": 493700 + }, + { + "epoch": 0.9973254362326628, + "grad_norm": 333.7686462402344, + "learning_rate": 4.81999101369146e-09, + "loss": 22.3211, + "step": 493710 + }, + { + "epoch": 0.9973456368653466, + "grad_norm": 330.7865295410156, + "learning_rate": 4.804679763907305e-09, + "loss": 17.5816, + "step": 493720 + }, + { + "epoch": 0.9973658374980304, + "grad_norm": 478.1622619628906, + "learning_rate": 4.789392860100051e-09, + "loss": 26.7659, + "step": 493730 + }, + { + "epoch": 0.9973860381307142, + "grad_norm": 290.862060546875, + "learning_rate": 4.774130302342972e-09, + "loss": 9.9758, + "step": 493740 + }, + { + "epoch": 0.9974062387633981, + "grad_norm": 360.92816162109375, + "learning_rate": 4.758892090711009e-09, + "loss": 18.2678, + "step": 493750 + }, + { + "epoch": 0.9974264393960819, + "grad_norm": 1411.4190673828125, + "learning_rate": 4.743678225278547e-09, + "loss": 22.8019, + "step": 493760 + }, + { + "epoch": 0.9974466400287657, + "grad_norm": 82.69062042236328, + "learning_rate": 4.7284887061194165e-09, + "loss": 12.9301, + "step": 493770 + }, + { + "epoch": 0.9974668406614495, + "grad_norm": 296.7131042480469, + "learning_rate": 4.713323533308001e-09, + "loss": 21.2306, + "step": 493780 + }, + { + "epoch": 0.9974870412941333, + "grad_norm": 254.96810913085938, + "learning_rate": 4.6981827069181305e-09, + "loss": 26.7119, + "step": 493790 + }, + { + "epoch": 0.9975072419268172, + "grad_norm": 617.3046264648438, + "learning_rate": 4.683066227023081e-09, + "loss": 11.8411, + "step": 493800 + }, + { + "epoch": 0.997527442559501, + "grad_norm": 158.57591247558594, + "learning_rate": 4.667974093696681e-09, + "loss": 10.8231, + "step": 493810 + }, + { + "epoch": 0.9975476431921848, + "grad_norm": 363.39715576171875, + "learning_rate": 4.6529063070133165e-09, + "loss": 13.0921, + "step": 493820 + }, + { + "epoch": 0.9975678438248686, + "grad_norm": 448.3036804199219, + "learning_rate": 4.637862867045151e-09, + "loss": 15.935, + "step": 493830 + }, + { + "epoch": 0.9975880444575524, + "grad_norm": 701.3001098632812, + "learning_rate": 4.6228437738665695e-09, + "loss": 12.3682, + "step": 493840 + }, + { + "epoch": 0.9976082450902363, + "grad_norm": 989.6455078125, + "learning_rate": 4.607849027550293e-09, + "loss": 14.8341, + "step": 493850 + }, + { + "epoch": 0.9976284457229201, + "grad_norm": 503.3165283203125, + "learning_rate": 4.592878628169595e-09, + "loss": 27.2796, + "step": 493860 + }, + { + "epoch": 0.9976486463556039, + "grad_norm": 349.30303955078125, + "learning_rate": 4.577932575797195e-09, + "loss": 20.6817, + "step": 493870 + }, + { + "epoch": 0.9976688469882877, + "grad_norm": 1218.044189453125, + "learning_rate": 4.5630108705063684e-09, + "loss": 17.9004, + "step": 493880 + }, + { + "epoch": 0.9976890476209714, + "grad_norm": 834.479248046875, + "learning_rate": 4.5481135123692786e-09, + "loss": 17.5298, + "step": 493890 + }, + { + "epoch": 0.9977092482536553, + "grad_norm": 583.0570068359375, + "learning_rate": 4.533240501459202e-09, + "loss": 17.883, + "step": 493900 + }, + { + "epoch": 0.9977294488863391, + "grad_norm": 228.48049926757812, + "learning_rate": 4.518391837847747e-09, + "loss": 10.9652, + "step": 493910 + }, + { + "epoch": 0.9977496495190229, + "grad_norm": 477.0223083496094, + "learning_rate": 4.503567521608187e-09, + "loss": 18.4236, + "step": 493920 + }, + { + "epoch": 0.9977698501517067, + "grad_norm": 113.33273315429688, + "learning_rate": 4.4887675528121345e-09, + "loss": 28.0155, + "step": 493930 + }, + { + "epoch": 0.9977900507843905, + "grad_norm": 313.23504638671875, + "learning_rate": 4.473991931531752e-09, + "loss": 25.8596, + "step": 493940 + }, + { + "epoch": 0.9978102514170744, + "grad_norm": 135.3140869140625, + "learning_rate": 4.459240657839203e-09, + "loss": 28.1376, + "step": 493950 + }, + { + "epoch": 0.9978304520497582, + "grad_norm": 275.48236083984375, + "learning_rate": 4.4445137318072096e-09, + "loss": 22.8885, + "step": 493960 + }, + { + "epoch": 0.997850652682442, + "grad_norm": 0.3670414090156555, + "learning_rate": 4.429811153505714e-09, + "loss": 7.0648, + "step": 493970 + }, + { + "epoch": 0.9978708533151258, + "grad_norm": 560.8939819335938, + "learning_rate": 4.415132923007992e-09, + "loss": 14.6677, + "step": 493980 + }, + { + "epoch": 0.9978910539478096, + "grad_norm": 28.338668823242188, + "learning_rate": 4.400479040385098e-09, + "loss": 13.3111, + "step": 493990 + }, + { + "epoch": 0.9979112545804935, + "grad_norm": 4389.14111328125, + "learning_rate": 4.385849505708084e-09, + "loss": 41.6417, + "step": 494000 + }, + { + "epoch": 0.9979314552131773, + "grad_norm": 454.45654296875, + "learning_rate": 4.3712443190491175e-09, + "loss": 16.7873, + "step": 494010 + }, + { + "epoch": 0.9979516558458611, + "grad_norm": 189.19061279296875, + "learning_rate": 4.3566634804781405e-09, + "loss": 16.8721, + "step": 494020 + }, + { + "epoch": 0.9979718564785449, + "grad_norm": 686.783447265625, + "learning_rate": 4.342106990067319e-09, + "loss": 24.4592, + "step": 494030 + }, + { + "epoch": 0.9979920571112287, + "grad_norm": 471.0889892578125, + "learning_rate": 4.327574847886595e-09, + "loss": 17.7554, + "step": 494040 + }, + { + "epoch": 0.9980122577439126, + "grad_norm": 392.9156494140625, + "learning_rate": 4.313067054008135e-09, + "loss": 20.7474, + "step": 494050 + }, + { + "epoch": 0.9980324583765964, + "grad_norm": 868.9571533203125, + "learning_rate": 4.298583608501328e-09, + "loss": 27.625, + "step": 494060 + }, + { + "epoch": 0.9980526590092802, + "grad_norm": 2032.013916015625, + "learning_rate": 4.284124511437782e-09, + "loss": 34.6727, + "step": 494070 + }, + { + "epoch": 0.998072859641964, + "grad_norm": 257.8091735839844, + "learning_rate": 4.269689762886886e-09, + "loss": 22.6591, + "step": 494080 + }, + { + "epoch": 0.9980930602746478, + "grad_norm": 320.83758544921875, + "learning_rate": 4.2552793629202506e-09, + "loss": 20.1354, + "step": 494090 + }, + { + "epoch": 0.9981132609073317, + "grad_norm": 123.14933013916016, + "learning_rate": 4.2408933116072635e-09, + "loss": 24.1403, + "step": 494100 + }, + { + "epoch": 0.9981334615400155, + "grad_norm": 206.25991821289062, + "learning_rate": 4.22653160901787e-09, + "loss": 17.0927, + "step": 494110 + }, + { + "epoch": 0.9981536621726993, + "grad_norm": 260.6522216796875, + "learning_rate": 4.212194255222568e-09, + "loss": 12.1673, + "step": 494120 + }, + { + "epoch": 0.9981738628053831, + "grad_norm": 208.68922424316406, + "learning_rate": 4.197881250291302e-09, + "loss": 33.1985, + "step": 494130 + }, + { + "epoch": 0.9981940634380668, + "grad_norm": 363.8871154785156, + "learning_rate": 4.183592594294017e-09, + "loss": 13.1031, + "step": 494140 + }, + { + "epoch": 0.9982142640707506, + "grad_norm": 508.0299377441406, + "learning_rate": 4.169328287299545e-09, + "loss": 23.8725, + "step": 494150 + }, + { + "epoch": 0.9982344647034345, + "grad_norm": 411.2216796875, + "learning_rate": 4.155088329377832e-09, + "loss": 28.0018, + "step": 494160 + }, + { + "epoch": 0.9982546653361183, + "grad_norm": 106.09420776367188, + "learning_rate": 4.140872720598266e-09, + "loss": 16.5168, + "step": 494170 + }, + { + "epoch": 0.9982748659688021, + "grad_norm": 525.4610595703125, + "learning_rate": 4.126681461030236e-09, + "loss": 12.5805, + "step": 494180 + }, + { + "epoch": 0.9982950666014859, + "grad_norm": 475.1921081542969, + "learning_rate": 4.11251455074313e-09, + "loss": 27.8379, + "step": 494190 + }, + { + "epoch": 0.9983152672341697, + "grad_norm": 339.93804931640625, + "learning_rate": 4.098371989805227e-09, + "loss": 24.0035, + "step": 494200 + }, + { + "epoch": 0.9983354678668536, + "grad_norm": 270.710205078125, + "learning_rate": 4.0842537782859185e-09, + "loss": 13.355, + "step": 494210 + }, + { + "epoch": 0.9983556684995374, + "grad_norm": 577.5907592773438, + "learning_rate": 4.07015991625459e-09, + "loss": 17.0013, + "step": 494220 + }, + { + "epoch": 0.9983758691322212, + "grad_norm": 235.8202362060547, + "learning_rate": 4.056090403778967e-09, + "loss": 27.3389, + "step": 494230 + }, + { + "epoch": 0.998396069764905, + "grad_norm": 578.2260131835938, + "learning_rate": 4.042045240927883e-09, + "loss": 14.395, + "step": 494240 + }, + { + "epoch": 0.9984162703975888, + "grad_norm": 50.1723747253418, + "learning_rate": 4.028024427770172e-09, + "loss": 8.5401, + "step": 494250 + }, + { + "epoch": 0.9984364710302727, + "grad_norm": 268.9642333984375, + "learning_rate": 4.014027964373557e-09, + "loss": 10.8139, + "step": 494260 + }, + { + "epoch": 0.9984566716629565, + "grad_norm": 42.60501480102539, + "learning_rate": 4.000055850807427e-09, + "loss": 12.8204, + "step": 494270 + }, + { + "epoch": 0.9984768722956403, + "grad_norm": 512.7504272460938, + "learning_rate": 3.986108087138396e-09, + "loss": 15.5377, + "step": 494280 + }, + { + "epoch": 0.9984970729283241, + "grad_norm": 170.0618896484375, + "learning_rate": 3.972184673435297e-09, + "loss": 15.9197, + "step": 494290 + }, + { + "epoch": 0.998517273561008, + "grad_norm": 358.6625061035156, + "learning_rate": 3.9582856097658554e-09, + "loss": 19.4949, + "step": 494300 + }, + { + "epoch": 0.9985374741936918, + "grad_norm": 353.5986633300781, + "learning_rate": 3.944410896197792e-09, + "loss": 27.7585, + "step": 494310 + }, + { + "epoch": 0.9985576748263756, + "grad_norm": 700.8291015625, + "learning_rate": 3.930560532798832e-09, + "loss": 20.9409, + "step": 494320 + }, + { + "epoch": 0.9985778754590594, + "grad_norm": 306.12103271484375, + "learning_rate": 3.9167345196361454e-09, + "loss": 18.3705, + "step": 494330 + }, + { + "epoch": 0.9985980760917432, + "grad_norm": 338.6306457519531, + "learning_rate": 3.902932856777453e-09, + "loss": 9.2328, + "step": 494340 + }, + { + "epoch": 0.998618276724427, + "grad_norm": 335.12652587890625, + "learning_rate": 3.889155544289924e-09, + "loss": 18.5403, + "step": 494350 + }, + { + "epoch": 0.9986384773571109, + "grad_norm": 311.6947326660156, + "learning_rate": 3.8754025822407285e-09, + "loss": 21.9443, + "step": 494360 + }, + { + "epoch": 0.9986586779897947, + "grad_norm": 355.5566101074219, + "learning_rate": 3.861673970697033e-09, + "loss": 14.4083, + "step": 494370 + }, + { + "epoch": 0.9986788786224785, + "grad_norm": 265.0316162109375, + "learning_rate": 3.847969709725452e-09, + "loss": 17.4924, + "step": 494380 + }, + { + "epoch": 0.9986990792551623, + "grad_norm": 131.7287139892578, + "learning_rate": 3.834289799392598e-09, + "loss": 24.5317, + "step": 494390 + }, + { + "epoch": 0.998719279887846, + "grad_norm": 229.21507263183594, + "learning_rate": 3.820634239765642e-09, + "loss": 18.5032, + "step": 494400 + }, + { + "epoch": 0.9987394805205299, + "grad_norm": 466.8297424316406, + "learning_rate": 3.8070030309111935e-09, + "loss": 12.6156, + "step": 494410 + }, + { + "epoch": 0.9987596811532137, + "grad_norm": 680.4591064453125, + "learning_rate": 3.793396172895314e-09, + "loss": 22.3812, + "step": 494420 + }, + { + "epoch": 0.9987798817858975, + "grad_norm": 295.6737365722656, + "learning_rate": 3.77981366578406e-09, + "loss": 8.7531, + "step": 494430 + }, + { + "epoch": 0.9988000824185813, + "grad_norm": 32.82207489013672, + "learning_rate": 3.766255509644601e-09, + "loss": 18.1024, + "step": 494440 + }, + { + "epoch": 0.9988202830512651, + "grad_norm": 390.6903076171875, + "learning_rate": 3.752721704541884e-09, + "loss": 21.0745, + "step": 494450 + }, + { + "epoch": 0.998840483683949, + "grad_norm": 1059.0428466796875, + "learning_rate": 3.739212250543078e-09, + "loss": 23.7416, + "step": 494460 + }, + { + "epoch": 0.9988606843166328, + "grad_norm": 374.59564208984375, + "learning_rate": 3.7257271477131314e-09, + "loss": 25.2496, + "step": 494470 + }, + { + "epoch": 0.9988808849493166, + "grad_norm": 724.3229370117188, + "learning_rate": 3.7122663961175477e-09, + "loss": 16.3551, + "step": 494480 + }, + { + "epoch": 0.9989010855820004, + "grad_norm": 345.5867919921875, + "learning_rate": 3.698829995822939e-09, + "loss": 15.4175, + "step": 494490 + }, + { + "epoch": 0.9989212862146842, + "grad_norm": 16.395706176757812, + "learning_rate": 3.685417946894254e-09, + "loss": 26.0611, + "step": 494500 + }, + { + "epoch": 0.998941486847368, + "grad_norm": 83.29167175292969, + "learning_rate": 3.672030249396441e-09, + "loss": 9.1453, + "step": 494510 + }, + { + "epoch": 0.9989616874800519, + "grad_norm": 183.70750427246094, + "learning_rate": 3.6586669033955578e-09, + "loss": 12.162, + "step": 494520 + }, + { + "epoch": 0.9989818881127357, + "grad_norm": 322.9624328613281, + "learning_rate": 3.645327908955998e-09, + "loss": 18.2431, + "step": 494530 + }, + { + "epoch": 0.9990020887454195, + "grad_norm": 54.37944793701172, + "learning_rate": 3.632013266143264e-09, + "loss": 18.6937, + "step": 494540 + }, + { + "epoch": 0.9990222893781033, + "grad_norm": 192.83071899414062, + "learning_rate": 3.618722975022304e-09, + "loss": 11.3923, + "step": 494550 + }, + { + "epoch": 0.9990424900107872, + "grad_norm": 430.22259521484375, + "learning_rate": 3.605457035657511e-09, + "loss": 18.7379, + "step": 494560 + }, + { + "epoch": 0.999062690643471, + "grad_norm": 223.731201171875, + "learning_rate": 3.592215448113834e-09, + "loss": 20.3571, + "step": 494570 + }, + { + "epoch": 0.9990828912761548, + "grad_norm": 1489.7357177734375, + "learning_rate": 3.5789982124556646e-09, + "loss": 23.1222, + "step": 494580 + }, + { + "epoch": 0.9991030919088386, + "grad_norm": 574.494384765625, + "learning_rate": 3.565805328747951e-09, + "loss": 11.0038, + "step": 494590 + }, + { + "epoch": 0.9991232925415224, + "grad_norm": 160.38998413085938, + "learning_rate": 3.5526367970539765e-09, + "loss": 19.1827, + "step": 494600 + }, + { + "epoch": 0.9991434931742063, + "grad_norm": 358.2546081542969, + "learning_rate": 3.5394926174381338e-09, + "loss": 37.2199, + "step": 494610 + }, + { + "epoch": 0.9991636938068901, + "grad_norm": 515.025634765625, + "learning_rate": 3.526372789965371e-09, + "loss": 24.1164, + "step": 494620 + }, + { + "epoch": 0.9991838944395739, + "grad_norm": 357.302734375, + "learning_rate": 3.5132773146989706e-09, + "loss": 13.1605, + "step": 494630 + }, + { + "epoch": 0.9992040950722577, + "grad_norm": 5.499563217163086, + "learning_rate": 3.5002061917027708e-09, + "loss": 18.9358, + "step": 494640 + }, + { + "epoch": 0.9992242957049415, + "grad_norm": 177.49220275878906, + "learning_rate": 3.487159421040609e-09, + "loss": 9.681, + "step": 494650 + }, + { + "epoch": 0.9992444963376252, + "grad_norm": 444.80841064453125, + "learning_rate": 3.474137002775768e-09, + "loss": 18.1927, + "step": 494660 + }, + { + "epoch": 0.9992646969703091, + "grad_norm": 235.77890014648438, + "learning_rate": 3.461138936972086e-09, + "loss": 20.3918, + "step": 494670 + }, + { + "epoch": 0.9992848976029929, + "grad_norm": 311.6514892578125, + "learning_rate": 3.4481652236934006e-09, + "loss": 15.494, + "step": 494680 + }, + { + "epoch": 0.9993050982356767, + "grad_norm": 423.1574401855469, + "learning_rate": 3.4352158630018837e-09, + "loss": 20.682, + "step": 494690 + }, + { + "epoch": 0.9993252988683605, + "grad_norm": 421.1527099609375, + "learning_rate": 3.4222908549608193e-09, + "loss": 15.1917, + "step": 494700 + }, + { + "epoch": 0.9993454995010443, + "grad_norm": 420.02874755859375, + "learning_rate": 3.409390199634044e-09, + "loss": 23.7051, + "step": 494710 + }, + { + "epoch": 0.9993657001337282, + "grad_norm": 426.9223327636719, + "learning_rate": 3.3965138970831758e-09, + "loss": 29.5462, + "step": 494720 + }, + { + "epoch": 0.999385900766412, + "grad_norm": 436.4137268066406, + "learning_rate": 3.3836619473720522e-09, + "loss": 11.2342, + "step": 494730 + }, + { + "epoch": 0.9994061013990958, + "grad_norm": 465.861328125, + "learning_rate": 3.370834350563401e-09, + "loss": 15.2891, + "step": 494740 + }, + { + "epoch": 0.9994263020317796, + "grad_norm": 410.02935791015625, + "learning_rate": 3.3580311067188396e-09, + "loss": 14.8325, + "step": 494750 + }, + { + "epoch": 0.9994465026644634, + "grad_norm": 138.91632080078125, + "learning_rate": 3.3452522159010957e-09, + "loss": 21.0164, + "step": 494760 + }, + { + "epoch": 0.9994667032971473, + "grad_norm": 314.4662170410156, + "learning_rate": 3.332497678172897e-09, + "loss": 15.5543, + "step": 494770 + }, + { + "epoch": 0.9994869039298311, + "grad_norm": 557.0912475585938, + "learning_rate": 3.31976749359586e-09, + "loss": 21.8675, + "step": 494780 + }, + { + "epoch": 0.9995071045625149, + "grad_norm": 589.8872680664062, + "learning_rate": 3.3070616622321584e-09, + "loss": 10.1348, + "step": 494790 + }, + { + "epoch": 0.9995273051951987, + "grad_norm": 199.4197998046875, + "learning_rate": 3.294380184143964e-09, + "loss": 19.031, + "step": 494800 + }, + { + "epoch": 0.9995475058278825, + "grad_norm": 300.7760009765625, + "learning_rate": 3.2817230593928938e-09, + "loss": 16.9388, + "step": 494810 + }, + { + "epoch": 0.9995677064605664, + "grad_norm": 753.2967529296875, + "learning_rate": 3.269090288041121e-09, + "loss": 22.7836, + "step": 494820 + }, + { + "epoch": 0.9995879070932502, + "grad_norm": 262.03558349609375, + "learning_rate": 3.256481870149153e-09, + "loss": 30.8767, + "step": 494830 + }, + { + "epoch": 0.999608107725934, + "grad_norm": 381.8133239746094, + "learning_rate": 3.2438978057791615e-09, + "loss": 22.6111, + "step": 494840 + }, + { + "epoch": 0.9996283083586178, + "grad_norm": 598.3963012695312, + "learning_rate": 3.2313380949927643e-09, + "loss": 15.5221, + "step": 494850 + }, + { + "epoch": 0.9996485089913016, + "grad_norm": 212.82321166992188, + "learning_rate": 3.218802737850468e-09, + "loss": 14.6045, + "step": 494860 + }, + { + "epoch": 0.9996687096239855, + "grad_norm": 234.849609375, + "learning_rate": 3.206291734413891e-09, + "loss": 12.5164, + "step": 494870 + }, + { + "epoch": 0.9996889102566693, + "grad_norm": 207.75067138671875, + "learning_rate": 3.1938050847435398e-09, + "loss": 9.634, + "step": 494880 + }, + { + "epoch": 0.9997091108893531, + "grad_norm": 154.989501953125, + "learning_rate": 3.1813427889004767e-09, + "loss": 13.5645, + "step": 494890 + }, + { + "epoch": 0.9997293115220369, + "grad_norm": 370.97998046875, + "learning_rate": 3.1689048469457638e-09, + "loss": 13.6074, + "step": 494900 + }, + { + "epoch": 0.9997495121547206, + "grad_norm": 14.729854583740234, + "learning_rate": 3.156491258939909e-09, + "loss": 19.7983, + "step": 494910 + }, + { + "epoch": 0.9997697127874045, + "grad_norm": 132.00875854492188, + "learning_rate": 3.1441020249428635e-09, + "loss": 16.1261, + "step": 494920 + }, + { + "epoch": 0.9997899134200883, + "grad_norm": 9.60148811340332, + "learning_rate": 3.1317371450156897e-09, + "loss": 8.5566, + "step": 494930 + }, + { + "epoch": 0.9998101140527721, + "grad_norm": 168.33929443359375, + "learning_rate": 3.11939661921834e-09, + "loss": 15.349, + "step": 494940 + }, + { + "epoch": 0.9998303146854559, + "grad_norm": 572.34423828125, + "learning_rate": 3.1070804476113213e-09, + "loss": 8.7059, + "step": 494950 + }, + { + "epoch": 0.9998505153181397, + "grad_norm": 407.3402099609375, + "learning_rate": 3.094788630254031e-09, + "loss": 29.6567, + "step": 494960 + }, + { + "epoch": 0.9998707159508236, + "grad_norm": 196.3535919189453, + "learning_rate": 3.0825211672064203e-09, + "loss": 16.6998, + "step": 494970 + }, + { + "epoch": 0.9998909165835074, + "grad_norm": 592.755615234375, + "learning_rate": 3.070278058528997e-09, + "loss": 24.6521, + "step": 494980 + }, + { + "epoch": 0.9999111172161912, + "grad_norm": 352.9114990234375, + "learning_rate": 3.058059304280603e-09, + "loss": 28.3788, + "step": 494990 + }, + { + "epoch": 0.999931317848875, + "grad_norm": 373.85968017578125, + "learning_rate": 3.0458649045211897e-09, + "loss": 17.7365, + "step": 495000 + }, + { + "epoch": 0.9999515184815588, + "grad_norm": 190.60629272460938, + "learning_rate": 3.03369485931071e-09, + "loss": 21.5938, + "step": 495010 + }, + { + "epoch": 0.9999717191142427, + "grad_norm": 508.1072082519531, + "learning_rate": 3.0215491687074492e-09, + "loss": 23.1618, + "step": 495020 + }, + { + "epoch": 0.9999919197469265, + "grad_norm": 324.51171875, + "learning_rate": 3.009427832771361e-09, + "loss": 29.7145, + "step": 495030 + }, + { + "epoch": 1.0, + "eval_loss": 18.863046646118164, + "eval_runtime": 407.6455, + "eval_samples_per_second": 24.534, + "eval_steps_per_second": 12.268, + "step": 495034 + }, + { + "epoch": 1.0000121203796102, + "grad_norm": 535.4615478515625, + "learning_rate": 2.9973308515607313e-09, + "loss": 24.5712, + "step": 495040 + }, + { + "epoch": 1.0000323210122941, + "grad_norm": 304.3030700683594, + "learning_rate": 2.9852582251355124e-09, + "loss": 19.0508, + "step": 495050 + }, + { + "epoch": 1.0000525216449778, + "grad_norm": 296.67218017578125, + "learning_rate": 2.9732099535539905e-09, + "loss": 13.083, + "step": 495060 + }, + { + "epoch": 1.0000727222776618, + "grad_norm": 538.2628784179688, + "learning_rate": 2.961186036875008e-09, + "loss": 28.6931, + "step": 495070 + }, + { + "epoch": 1.0000929229103455, + "grad_norm": 728.6567993164062, + "learning_rate": 2.949186475157406e-09, + "loss": 16.9243, + "step": 495080 + }, + { + "epoch": 1.0001131235430294, + "grad_norm": 619.2899780273438, + "learning_rate": 2.937211268458917e-09, + "loss": 24.1662, + "step": 495090 + }, + { + "epoch": 1.000133324175713, + "grad_norm": 181.85565185546875, + "learning_rate": 2.9252604168383826e-09, + "loss": 11.0639, + "step": 495100 + }, + { + "epoch": 1.000153524808397, + "grad_norm": 187.00706481933594, + "learning_rate": 2.913333920354644e-09, + "loss": 21.9745, + "step": 495110 + }, + { + "epoch": 1.0001737254410807, + "grad_norm": 322.39569091796875, + "learning_rate": 2.901431779064323e-09, + "loss": 17.245, + "step": 495120 + }, + { + "epoch": 1.0001939260737647, + "grad_norm": 399.79571533203125, + "learning_rate": 2.889553993027372e-09, + "loss": 17.0467, + "step": 495130 + }, + { + "epoch": 1.0002141267064484, + "grad_norm": 650.3701171875, + "learning_rate": 2.8777005622998567e-09, + "loss": 13.9672, + "step": 495140 + }, + { + "epoch": 1.0002343273391323, + "grad_norm": 128.86141967773438, + "learning_rate": 2.865871486940619e-09, + "loss": 11.1238, + "step": 495150 + }, + { + "epoch": 1.000254527971816, + "grad_norm": 311.48162841796875, + "learning_rate": 2.8540667670073905e-09, + "loss": 13.0761, + "step": 495160 + }, + { + "epoch": 1.0002747286045, + "grad_norm": 371.01263427734375, + "learning_rate": 2.842286402556793e-09, + "loss": 21.9169, + "step": 495170 + }, + { + "epoch": 1.0002949292371837, + "grad_norm": 485.1094055175781, + "learning_rate": 2.830530393647113e-09, + "loss": 21.921, + "step": 495180 + }, + { + "epoch": 1.0003151298698676, + "grad_norm": 277.4830322265625, + "learning_rate": 2.8187987403355268e-09, + "loss": 21.0184, + "step": 495190 + }, + { + "epoch": 1.0003353305025513, + "grad_norm": 118.95313262939453, + "learning_rate": 2.8070914426786555e-09, + "loss": 36.1744, + "step": 495200 + }, + { + "epoch": 1.0003555311352352, + "grad_norm": 309.14556884765625, + "learning_rate": 2.7954085007342315e-09, + "loss": 29.7963, + "step": 495210 + }, + { + "epoch": 1.000375731767919, + "grad_norm": 127.88665008544922, + "learning_rate": 2.78374991455832e-09, + "loss": 21.9706, + "step": 495220 + }, + { + "epoch": 1.0003959324006029, + "grad_norm": 19.653532028198242, + "learning_rate": 2.772115684209209e-09, + "loss": 9.5147, + "step": 495230 + }, + { + "epoch": 1.0004161330332866, + "grad_norm": 220.71902465820312, + "learning_rate": 2.7605058097418536e-09, + "loss": 15.3792, + "step": 495240 + }, + { + "epoch": 1.0004363336659705, + "grad_norm": 282.1274719238281, + "learning_rate": 2.748920291214541e-09, + "loss": 9.1682, + "step": 495250 + }, + { + "epoch": 1.0004565342986542, + "grad_norm": 457.2867431640625, + "learning_rate": 2.7373591286822266e-09, + "loss": 18.318, + "step": 495260 + }, + { + "epoch": 1.0004767349313382, + "grad_norm": 1289.856689453125, + "learning_rate": 2.7258223222020876e-09, + "loss": 33.7968, + "step": 495270 + }, + { + "epoch": 1.0004969355640219, + "grad_norm": 611.5613403320312, + "learning_rate": 2.7143098718301896e-09, + "loss": 21.0783, + "step": 495280 + }, + { + "epoch": 1.0005171361967056, + "grad_norm": 244.9925537109375, + "learning_rate": 2.7028217776225994e-09, + "loss": 8.253, + "step": 495290 + }, + { + "epoch": 1.0005373368293895, + "grad_norm": 187.85861206054688, + "learning_rate": 2.6913580396359384e-09, + "loss": 16.2827, + "step": 495300 + }, + { + "epoch": 1.0005575374620732, + "grad_norm": 679.815185546875, + "learning_rate": 2.6799186579246074e-09, + "loss": 15.515, + "step": 495310 + }, + { + "epoch": 1.0005777380947571, + "grad_norm": 716.7931518554688, + "learning_rate": 2.6685036325457826e-09, + "loss": 29.7323, + "step": 495320 + }, + { + "epoch": 1.0005979387274408, + "grad_norm": 225.07211303710938, + "learning_rate": 2.65711296355442e-09, + "loss": 22.4483, + "step": 495330 + }, + { + "epoch": 1.0006181393601248, + "grad_norm": 456.4980163574219, + "learning_rate": 2.6457466510065866e-09, + "loss": 13.9316, + "step": 495340 + }, + { + "epoch": 1.0006383399928085, + "grad_norm": 128.2577362060547, + "learning_rate": 2.6344046949566825e-09, + "loss": 19.9119, + "step": 495350 + }, + { + "epoch": 1.0006585406254924, + "grad_norm": 564.334228515625, + "learning_rate": 2.6230870954607746e-09, + "loss": 20.1779, + "step": 495360 + }, + { + "epoch": 1.0006787412581761, + "grad_norm": 263.80462646484375, + "learning_rate": 2.6117938525738185e-09, + "loss": 13.3283, + "step": 495370 + }, + { + "epoch": 1.00069894189086, + "grad_norm": 158.897705078125, + "learning_rate": 2.6005249663513254e-09, + "loss": 20.4164, + "step": 495380 + }, + { + "epoch": 1.0007191425235438, + "grad_norm": 281.79766845703125, + "learning_rate": 2.5892804368471414e-09, + "loss": 19.5175, + "step": 495390 + }, + { + "epoch": 1.0007393431562277, + "grad_norm": 556.7461547851562, + "learning_rate": 2.5780602641167774e-09, + "loss": 19.6771, + "step": 495400 + }, + { + "epoch": 1.0007595437889114, + "grad_norm": 218.8365478515625, + "learning_rate": 2.5668644482151892e-09, + "loss": 24.8257, + "step": 495410 + }, + { + "epoch": 1.0007797444215953, + "grad_norm": 628.21533203125, + "learning_rate": 2.5556929891962234e-09, + "loss": 11.3612, + "step": 495420 + }, + { + "epoch": 1.000799945054279, + "grad_norm": 698.1282348632812, + "learning_rate": 2.5445458871148353e-09, + "loss": 18.1672, + "step": 495430 + }, + { + "epoch": 1.000820145686963, + "grad_norm": 259.7185974121094, + "learning_rate": 2.5334231420254262e-09, + "loss": 14.2079, + "step": 495440 + }, + { + "epoch": 1.0008403463196467, + "grad_norm": 70.15369415283203, + "learning_rate": 2.522324753981842e-09, + "loss": 8.7467, + "step": 495450 + }, + { + "epoch": 1.0008605469523306, + "grad_norm": 312.29986572265625, + "learning_rate": 2.511250723037928e-09, + "loss": 29.1994, + "step": 495460 + }, + { + "epoch": 1.0008807475850143, + "grad_norm": 279.4052734375, + "learning_rate": 2.5002010492486405e-09, + "loss": 16.2657, + "step": 495470 + }, + { + "epoch": 1.0009009482176983, + "grad_norm": 144.45155334472656, + "learning_rate": 2.4891757326667154e-09, + "loss": 24.549, + "step": 495480 + }, + { + "epoch": 1.000921148850382, + "grad_norm": 1126.0965576171875, + "learning_rate": 2.4781747733471085e-09, + "loss": 45.3086, + "step": 495490 + }, + { + "epoch": 1.000941349483066, + "grad_norm": 599.3152465820312, + "learning_rate": 2.4671981713420003e-09, + "loss": 25.8712, + "step": 495500 + }, + { + "epoch": 1.0009615501157496, + "grad_norm": 250.93577575683594, + "learning_rate": 2.4562459267063466e-09, + "loss": 13.252, + "step": 495510 + }, + { + "epoch": 1.0009817507484335, + "grad_norm": 290.2425231933594, + "learning_rate": 2.445318039492328e-09, + "loss": 8.9645, + "step": 495520 + }, + { + "epoch": 1.0010019513811172, + "grad_norm": 106.93073272705078, + "learning_rate": 2.4344145097537906e-09, + "loss": 13.6351, + "step": 495530 + }, + { + "epoch": 1.0010221520138012, + "grad_norm": 529.4349975585938, + "learning_rate": 2.423535337544025e-09, + "loss": 16.4544, + "step": 495540 + }, + { + "epoch": 1.001042352646485, + "grad_norm": 345.8415222167969, + "learning_rate": 2.412680522915767e-09, + "loss": 22.4422, + "step": 495550 + }, + { + "epoch": 1.0010625532791686, + "grad_norm": 712.0042114257812, + "learning_rate": 2.4018500659217515e-09, + "loss": 25.6466, + "step": 495560 + }, + { + "epoch": 1.0010827539118525, + "grad_norm": 179.86766052246094, + "learning_rate": 2.3910439666147147e-09, + "loss": 16.1174, + "step": 495570 + }, + { + "epoch": 1.0011029545445362, + "grad_norm": 207.787841796875, + "learning_rate": 2.380262225047947e-09, + "loss": 10.0085, + "step": 495580 + }, + { + "epoch": 1.0011231551772202, + "grad_norm": 695.45751953125, + "learning_rate": 2.369504841273629e-09, + "loss": 16.8075, + "step": 495590 + }, + { + "epoch": 1.0011433558099039, + "grad_norm": 158.43753051757812, + "learning_rate": 2.358771815344496e-09, + "loss": 10.7488, + "step": 495600 + }, + { + "epoch": 1.0011635564425878, + "grad_norm": 536.5902709960938, + "learning_rate": 2.348063147312174e-09, + "loss": 17.2911, + "step": 495610 + }, + { + "epoch": 1.0011837570752715, + "grad_norm": 449.4764709472656, + "learning_rate": 2.337378837229398e-09, + "loss": 19.0239, + "step": 495620 + }, + { + "epoch": 1.0012039577079554, + "grad_norm": 527.6913452148438, + "learning_rate": 2.326718885147794e-09, + "loss": 8.5869, + "step": 495630 + }, + { + "epoch": 1.0012241583406392, + "grad_norm": 2.247581958770752, + "learning_rate": 2.316083291120097e-09, + "loss": 11.1301, + "step": 495640 + }, + { + "epoch": 1.001244358973323, + "grad_norm": 163.10150146484375, + "learning_rate": 2.3054720551973773e-09, + "loss": 17.8689, + "step": 495650 + }, + { + "epoch": 1.0012645596060068, + "grad_norm": 183.3430938720703, + "learning_rate": 2.294885177431816e-09, + "loss": 15.3342, + "step": 495660 + }, + { + "epoch": 1.0012847602386907, + "grad_norm": 152.39137268066406, + "learning_rate": 2.2843226578744826e-09, + "loss": 13.1861, + "step": 495670 + }, + { + "epoch": 1.0013049608713744, + "grad_norm": 1099.2608642578125, + "learning_rate": 2.2737844965775578e-09, + "loss": 23.4051, + "step": 495680 + }, + { + "epoch": 1.0013251615040584, + "grad_norm": 121.2804183959961, + "learning_rate": 2.263270693592112e-09, + "loss": 12.0707, + "step": 495690 + }, + { + "epoch": 1.001345362136742, + "grad_norm": 831.40966796875, + "learning_rate": 2.2527812489692156e-09, + "loss": 28.1774, + "step": 495700 + }, + { + "epoch": 1.001365562769426, + "grad_norm": 540.88671875, + "learning_rate": 2.2423161627599386e-09, + "loss": 17.6186, + "step": 495710 + }, + { + "epoch": 1.0013857634021097, + "grad_norm": 46.36670684814453, + "learning_rate": 2.2318754350159067e-09, + "loss": 20.9344, + "step": 495720 + }, + { + "epoch": 1.0014059640347936, + "grad_norm": 212.47543334960938, + "learning_rate": 2.2214590657870795e-09, + "loss": 16.6399, + "step": 495730 + }, + { + "epoch": 1.0014261646674774, + "grad_norm": 47.5765266418457, + "learning_rate": 2.211067055124527e-09, + "loss": 11.9324, + "step": 495740 + }, + { + "epoch": 1.0014463653001613, + "grad_norm": 1.8978875875473022, + "learning_rate": 2.2006994030798758e-09, + "loss": 7.8692, + "step": 495750 + }, + { + "epoch": 1.001466565932845, + "grad_norm": 353.38153076171875, + "learning_rate": 2.1903561097019744e-09, + "loss": 15.6718, + "step": 495760 + }, + { + "epoch": 1.001486766565529, + "grad_norm": 534.5007934570312, + "learning_rate": 2.1800371750430037e-09, + "loss": 34.5303, + "step": 495770 + }, + { + "epoch": 1.0015069671982126, + "grad_norm": 166.82882690429688, + "learning_rate": 2.169742599151814e-09, + "loss": 22.251, + "step": 495780 + }, + { + "epoch": 1.0015271678308966, + "grad_norm": 594.2769165039062, + "learning_rate": 2.15947238207892e-09, + "loss": 16.6582, + "step": 495790 + }, + { + "epoch": 1.0015473684635803, + "grad_norm": 511.8836364746094, + "learning_rate": 2.149226523874837e-09, + "loss": 12.1377, + "step": 495800 + }, + { + "epoch": 1.001567569096264, + "grad_norm": 437.1927185058594, + "learning_rate": 2.1390050245895246e-09, + "loss": 20.587, + "step": 495810 + }, + { + "epoch": 1.001587769728948, + "grad_norm": 478.9796447753906, + "learning_rate": 2.128807884272388e-09, + "loss": 23.5372, + "step": 495820 + }, + { + "epoch": 1.0016079703616316, + "grad_norm": 479.27288818359375, + "learning_rate": 2.1186351029733877e-09, + "loss": 15.6252, + "step": 495830 + }, + { + "epoch": 1.0016281709943156, + "grad_norm": 560.8809204101562, + "learning_rate": 2.1084866807413727e-09, + "loss": 31.4543, + "step": 495840 + }, + { + "epoch": 1.0016483716269993, + "grad_norm": 632.0659790039062, + "learning_rate": 2.098362617626859e-09, + "loss": 12.4084, + "step": 495850 + }, + { + "epoch": 1.0016685722596832, + "grad_norm": 166.77639770507812, + "learning_rate": 2.088262913679251e-09, + "loss": 5.7719, + "step": 495860 + }, + { + "epoch": 1.001688772892367, + "grad_norm": 135.89581298828125, + "learning_rate": 2.078187568946288e-09, + "loss": 12.6637, + "step": 495870 + }, + { + "epoch": 1.0017089735250508, + "grad_norm": 227.5363311767578, + "learning_rate": 2.0681365834790413e-09, + "loss": 15.5393, + "step": 495880 + }, + { + "epoch": 1.0017291741577345, + "grad_norm": 291.6004333496094, + "learning_rate": 2.0581099573246943e-09, + "loss": 36.1768, + "step": 495890 + }, + { + "epoch": 1.0017493747904185, + "grad_norm": 282.8682556152344, + "learning_rate": 2.0481076905332074e-09, + "loss": 8.65, + "step": 495900 + }, + { + "epoch": 1.0017695754231022, + "grad_norm": 189.76792907714844, + "learning_rate": 2.038129783153431e-09, + "loss": 11.4054, + "step": 495910 + }, + { + "epoch": 1.0017897760557861, + "grad_norm": 233.2994384765625, + "learning_rate": 2.0281762352331034e-09, + "loss": 18.2021, + "step": 495920 + }, + { + "epoch": 1.0018099766884698, + "grad_norm": 176.9680633544922, + "learning_rate": 2.018247046821631e-09, + "loss": 11.5819, + "step": 495930 + }, + { + "epoch": 1.0018301773211538, + "grad_norm": 153.39129638671875, + "learning_rate": 2.008342217966752e-09, + "loss": 24.4084, + "step": 495940 + }, + { + "epoch": 1.0018503779538375, + "grad_norm": 409.62103271484375, + "learning_rate": 1.9984617487173174e-09, + "loss": 17.9044, + "step": 495950 + }, + { + "epoch": 1.0018705785865214, + "grad_norm": 258.4937744140625, + "learning_rate": 1.9886056391210663e-09, + "loss": 23.0219, + "step": 495960 + }, + { + "epoch": 1.001890779219205, + "grad_norm": 90.13101196289062, + "learning_rate": 1.9787738892262932e-09, + "loss": 5.2013, + "step": 495970 + }, + { + "epoch": 1.001910979851889, + "grad_norm": 273.6362609863281, + "learning_rate": 1.968966499080738e-09, + "loss": 14.6099, + "step": 495980 + }, + { + "epoch": 1.0019311804845727, + "grad_norm": 562.908935546875, + "learning_rate": 1.95918346873214e-09, + "loss": 13.6189, + "step": 495990 + }, + { + "epoch": 1.0019513811172567, + "grad_norm": 322.1513366699219, + "learning_rate": 1.9494247982282386e-09, + "loss": 31.2791, + "step": 496000 + }, + { + "epoch": 1.0019715817499404, + "grad_norm": 57.95103073120117, + "learning_rate": 1.9396904876167742e-09, + "loss": 16.7426, + "step": 496010 + }, + { + "epoch": 1.0019917823826243, + "grad_norm": 270.1626281738281, + "learning_rate": 1.9299805369449307e-09, + "loss": 15.8352, + "step": 496020 + }, + { + "epoch": 1.002011983015308, + "grad_norm": 181.19337463378906, + "learning_rate": 1.920294946260448e-09, + "loss": 12.8496, + "step": 496030 + }, + { + "epoch": 1.002032183647992, + "grad_norm": 279.1658630371094, + "learning_rate": 1.9106337156099553e-09, + "loss": 12.1115, + "step": 496040 + }, + { + "epoch": 1.0020523842806757, + "grad_norm": 173.3330841064453, + "learning_rate": 1.9009968450406368e-09, + "loss": 24.0644, + "step": 496050 + }, + { + "epoch": 1.0020725849133594, + "grad_norm": 280.337158203125, + "learning_rate": 1.8913843346002324e-09, + "loss": 14.3357, + "step": 496060 + }, + { + "epoch": 1.0020927855460433, + "grad_norm": 222.7508087158203, + "learning_rate": 1.8817961843348166e-09, + "loss": 12.7869, + "step": 496070 + }, + { + "epoch": 1.002112986178727, + "grad_norm": 387.0539245605469, + "learning_rate": 1.872232394291018e-09, + "loss": 13.7058, + "step": 496080 + }, + { + "epoch": 1.002133186811411, + "grad_norm": 144.1525115966797, + "learning_rate": 1.8626929645160218e-09, + "loss": 24.1266, + "step": 496090 + }, + { + "epoch": 1.0021533874440947, + "grad_norm": 233.26426696777344, + "learning_rate": 1.8531778950564572e-09, + "loss": 18.7332, + "step": 496100 + }, + { + "epoch": 1.0021735880767786, + "grad_norm": 364.735595703125, + "learning_rate": 1.8436871859578431e-09, + "loss": 18.3567, + "step": 496110 + }, + { + "epoch": 1.0021937887094623, + "grad_norm": 305.4839782714844, + "learning_rate": 1.834220837266809e-09, + "loss": 16.8414, + "step": 496120 + }, + { + "epoch": 1.0022139893421462, + "grad_norm": 80.03901672363281, + "learning_rate": 1.8247788490299846e-09, + "loss": 22.3505, + "step": 496130 + }, + { + "epoch": 1.00223418997483, + "grad_norm": 91.74203491210938, + "learning_rate": 1.8153612212923333e-09, + "loss": 13.1593, + "step": 496140 + }, + { + "epoch": 1.0022543906075139, + "grad_norm": 83.57035064697266, + "learning_rate": 1.80596795410104e-09, + "loss": 28.7778, + "step": 496150 + }, + { + "epoch": 1.0022745912401976, + "grad_norm": 89.92529296875, + "learning_rate": 1.7965990475010686e-09, + "loss": 20.5896, + "step": 496160 + }, + { + "epoch": 1.0022947918728815, + "grad_norm": 365.5494689941406, + "learning_rate": 1.7872545015379382e-09, + "loss": 17.4617, + "step": 496170 + }, + { + "epoch": 1.0023149925055652, + "grad_norm": 228.63990783691406, + "learning_rate": 1.7779343162577233e-09, + "loss": 9.7855, + "step": 496180 + }, + { + "epoch": 1.0023351931382491, + "grad_norm": 471.0659484863281, + "learning_rate": 1.7686384917059429e-09, + "loss": 29.9951, + "step": 496190 + }, + { + "epoch": 1.0023553937709329, + "grad_norm": 6.007596969604492, + "learning_rate": 1.759367027927561e-09, + "loss": 8.9301, + "step": 496200 + }, + { + "epoch": 1.0023755944036168, + "grad_norm": 310.35614013671875, + "learning_rate": 1.7501199249675416e-09, + "loss": 11.2919, + "step": 496210 + }, + { + "epoch": 1.0023957950363005, + "grad_norm": 67.65311431884766, + "learning_rate": 1.740897182871404e-09, + "loss": 12.9013, + "step": 496220 + }, + { + "epoch": 1.0024159956689844, + "grad_norm": 271.9510498046875, + "learning_rate": 1.7316988016835567e-09, + "loss": 10.5365, + "step": 496230 + }, + { + "epoch": 1.0024361963016681, + "grad_norm": 577.5405883789062, + "learning_rate": 1.7225247814495194e-09, + "loss": 11.4776, + "step": 496240 + }, + { + "epoch": 1.002456396934352, + "grad_norm": 158.23123168945312, + "learning_rate": 1.7133751222137007e-09, + "loss": 16.7018, + "step": 496250 + }, + { + "epoch": 1.0024765975670358, + "grad_norm": 151.22882080078125, + "learning_rate": 1.7042498240205097e-09, + "loss": 17.0154, + "step": 496260 + }, + { + "epoch": 1.0024967981997197, + "grad_norm": 351.3409118652344, + "learning_rate": 1.6951488869149103e-09, + "loss": 5.6745, + "step": 496270 + }, + { + "epoch": 1.0025169988324034, + "grad_norm": 291.2845153808594, + "learning_rate": 1.686072310940201e-09, + "loss": 9.1775, + "step": 496280 + }, + { + "epoch": 1.0025371994650873, + "grad_norm": 526.7337036132812, + "learning_rate": 1.6770200961419015e-09, + "loss": 18.1165, + "step": 496290 + }, + { + "epoch": 1.002557400097771, + "grad_norm": 308.7669372558594, + "learning_rate": 1.6679922425638651e-09, + "loss": 25.7948, + "step": 496300 + }, + { + "epoch": 1.002577600730455, + "grad_norm": 240.68191528320312, + "learning_rate": 1.6589887502493907e-09, + "loss": 16.251, + "step": 496310 + }, + { + "epoch": 1.0025978013631387, + "grad_norm": 253.4486846923828, + "learning_rate": 1.650009619242887e-09, + "loss": 12.1938, + "step": 496320 + }, + { + "epoch": 1.0026180019958224, + "grad_norm": 144.76792907714844, + "learning_rate": 1.6410548495876533e-09, + "loss": 12.4415, + "step": 496330 + }, + { + "epoch": 1.0026382026285063, + "grad_norm": 126.26376342773438, + "learning_rate": 1.632124441328098e-09, + "loss": 16.3367, + "step": 496340 + }, + { + "epoch": 1.00265840326119, + "grad_norm": 622.173095703125, + "learning_rate": 1.6232183945075197e-09, + "loss": 20.4622, + "step": 496350 + }, + { + "epoch": 1.002678603893874, + "grad_norm": 404.68792724609375, + "learning_rate": 1.6143367091686624e-09, + "loss": 17.3768, + "step": 496360 + }, + { + "epoch": 1.0026988045265577, + "grad_norm": 523.90869140625, + "learning_rate": 1.6054793853553797e-09, + "loss": 21.2731, + "step": 496370 + }, + { + "epoch": 1.0027190051592416, + "grad_norm": 419.3081970214844, + "learning_rate": 1.59664642311097e-09, + "loss": 19.0114, + "step": 496380 + }, + { + "epoch": 1.0027392057919253, + "grad_norm": 345.4902648925781, + "learning_rate": 1.5878378224781777e-09, + "loss": 8.9369, + "step": 496390 + }, + { + "epoch": 1.0027594064246093, + "grad_norm": 372.1741943359375, + "learning_rate": 1.5790535835003006e-09, + "loss": 10.9754, + "step": 496400 + }, + { + "epoch": 1.002779607057293, + "grad_norm": 194.96832275390625, + "learning_rate": 1.570293706219528e-09, + "loss": 5.5389, + "step": 496410 + }, + { + "epoch": 1.002799807689977, + "grad_norm": 182.9541473388672, + "learning_rate": 1.5615581906791576e-09, + "loss": 7.1093, + "step": 496420 + }, + { + "epoch": 1.0028200083226606, + "grad_norm": 239.05899047851562, + "learning_rate": 1.5528470369208238e-09, + "loss": 20.723, + "step": 496430 + }, + { + "epoch": 1.0028402089553445, + "grad_norm": 1060.20361328125, + "learning_rate": 1.5441602449883797e-09, + "loss": 15.2947, + "step": 496440 + }, + { + "epoch": 1.0028604095880282, + "grad_norm": 0.0, + "learning_rate": 1.535497814923459e-09, + "loss": 8.3481, + "step": 496450 + }, + { + "epoch": 1.0028806102207122, + "grad_norm": 10.102018356323242, + "learning_rate": 1.52685974676825e-09, + "loss": 21.2022, + "step": 496460 + }, + { + "epoch": 1.0029008108533959, + "grad_norm": 339.6944885253906, + "learning_rate": 1.518246040564386e-09, + "loss": 10.4844, + "step": 496470 + }, + { + "epoch": 1.0029210114860798, + "grad_norm": 1862.4864501953125, + "learning_rate": 1.509656696354611e-09, + "loss": 29.4656, + "step": 496480 + }, + { + "epoch": 1.0029412121187635, + "grad_norm": 468.07958984375, + "learning_rate": 1.5010917141811132e-09, + "loss": 25.402, + "step": 496490 + }, + { + "epoch": 1.0029614127514475, + "grad_norm": 325.1629943847656, + "learning_rate": 1.4925510940844157e-09, + "loss": 13.7435, + "step": 496500 + }, + { + "epoch": 1.0029816133841312, + "grad_norm": 661.4666137695312, + "learning_rate": 1.4840348361067069e-09, + "loss": 30.6057, + "step": 496510 + }, + { + "epoch": 1.003001814016815, + "grad_norm": 336.5476379394531, + "learning_rate": 1.4755429402901755e-09, + "loss": 10.5586, + "step": 496520 + }, + { + "epoch": 1.0030220146494988, + "grad_norm": 724.3787841796875, + "learning_rate": 1.4670754066747895e-09, + "loss": 11.5721, + "step": 496530 + }, + { + "epoch": 1.0030422152821827, + "grad_norm": 546.8881225585938, + "learning_rate": 1.4586322353032923e-09, + "loss": 14.4762, + "step": 496540 + }, + { + "epoch": 1.0030624159148664, + "grad_norm": 330.6160583496094, + "learning_rate": 1.4502134262156519e-09, + "loss": 11.2739, + "step": 496550 + }, + { + "epoch": 1.0030826165475504, + "grad_norm": 6.230769157409668, + "learning_rate": 1.4418189794540572e-09, + "loss": 11.9299, + "step": 496560 + }, + { + "epoch": 1.003102817180234, + "grad_norm": 172.46426391601562, + "learning_rate": 1.4334488950579206e-09, + "loss": 14.5759, + "step": 496570 + }, + { + "epoch": 1.0031230178129178, + "grad_norm": 20.506919860839844, + "learning_rate": 1.425103173069986e-09, + "loss": 11.4367, + "step": 496580 + }, + { + "epoch": 1.0031432184456017, + "grad_norm": 417.68963623046875, + "learning_rate": 1.4167818135291112e-09, + "loss": 13.0775, + "step": 496590 + }, + { + "epoch": 1.0031634190782854, + "grad_norm": 24.981063842773438, + "learning_rate": 1.4084848164763742e-09, + "loss": 12.4334, + "step": 496600 + }, + { + "epoch": 1.0031836197109694, + "grad_norm": 6.88071346282959, + "learning_rate": 1.4002121819528535e-09, + "loss": 15.9379, + "step": 496610 + }, + { + "epoch": 1.003203820343653, + "grad_norm": 431.84326171875, + "learning_rate": 1.3919639099985171e-09, + "loss": 23.2785, + "step": 496620 + }, + { + "epoch": 1.003224020976337, + "grad_norm": 85.91773986816406, + "learning_rate": 1.3837400006533331e-09, + "loss": 17.4635, + "step": 496630 + }, + { + "epoch": 1.0032442216090207, + "grad_norm": 304.8151550292969, + "learning_rate": 1.3755404539572692e-09, + "loss": 14.6721, + "step": 496640 + }, + { + "epoch": 1.0032644222417046, + "grad_norm": 359.48175048828125, + "learning_rate": 1.3673652699508487e-09, + "loss": 10.4681, + "step": 496650 + }, + { + "epoch": 1.0032846228743884, + "grad_norm": 236.9304656982422, + "learning_rate": 1.3592144486740399e-09, + "loss": 19.3031, + "step": 496660 + }, + { + "epoch": 1.0033048235070723, + "grad_norm": 16.39584732055664, + "learning_rate": 1.3510879901657003e-09, + "loss": 5.941, + "step": 496670 + }, + { + "epoch": 1.003325024139756, + "grad_norm": 309.649169921875, + "learning_rate": 1.342985894465798e-09, + "loss": 13.6341, + "step": 496680 + }, + { + "epoch": 1.00334522477244, + "grad_norm": 94.05716705322266, + "learning_rate": 1.3349081616143012e-09, + "loss": 21.2653, + "step": 496690 + }, + { + "epoch": 1.0033654254051236, + "grad_norm": 533.8690795898438, + "learning_rate": 1.3268547916495124e-09, + "loss": 9.9349, + "step": 496700 + }, + { + "epoch": 1.0033856260378076, + "grad_norm": 14.093035697937012, + "learning_rate": 1.3188257846119545e-09, + "loss": 12.0922, + "step": 496710 + }, + { + "epoch": 1.0034058266704913, + "grad_norm": 20.148250579833984, + "learning_rate": 1.3108211405399307e-09, + "loss": 9.9139, + "step": 496720 + }, + { + "epoch": 1.0034260273031752, + "grad_norm": 665.7136840820312, + "learning_rate": 1.3028408594728536e-09, + "loss": 15.5665, + "step": 496730 + }, + { + "epoch": 1.003446227935859, + "grad_norm": 220.5654296875, + "learning_rate": 1.2948849414495811e-09, + "loss": 14.6036, + "step": 496740 + }, + { + "epoch": 1.0034664285685428, + "grad_norm": 269.35601806640625, + "learning_rate": 1.286953386508416e-09, + "loss": 25.4018, + "step": 496750 + }, + { + "epoch": 1.0034866292012266, + "grad_norm": 287.6634521484375, + "learning_rate": 1.2790461946887712e-09, + "loss": 23.9352, + "step": 496760 + }, + { + "epoch": 1.0035068298339105, + "grad_norm": 195.8697509765625, + "learning_rate": 1.271163366028394e-09, + "loss": 24.3558, + "step": 496770 + }, + { + "epoch": 1.0035270304665942, + "grad_norm": 360.56964111328125, + "learning_rate": 1.2633049005661423e-09, + "loss": 6.6644, + "step": 496780 + }, + { + "epoch": 1.0035472310992781, + "grad_norm": 355.21771240234375, + "learning_rate": 1.2554707983403192e-09, + "loss": 18.4967, + "step": 496790 + }, + { + "epoch": 1.0035674317319618, + "grad_norm": 90.62782287597656, + "learning_rate": 1.247661059389227e-09, + "loss": 17.228, + "step": 496800 + }, + { + "epoch": 1.0035876323646458, + "grad_norm": 209.0258331298828, + "learning_rate": 1.2398756837506131e-09, + "loss": 15.8311, + "step": 496810 + }, + { + "epoch": 1.0036078329973295, + "grad_norm": 381.7867126464844, + "learning_rate": 1.2321146714627807e-09, + "loss": 8.8757, + "step": 496820 + }, + { + "epoch": 1.0036280336300132, + "grad_norm": 236.79905700683594, + "learning_rate": 1.224378022562922e-09, + "loss": 16.2411, + "step": 496830 + }, + { + "epoch": 1.0036482342626971, + "grad_norm": 210.00741577148438, + "learning_rate": 1.2166657370898948e-09, + "loss": 14.6875, + "step": 496840 + }, + { + "epoch": 1.0036684348953808, + "grad_norm": 392.40838623046875, + "learning_rate": 1.2089778150797816e-09, + "loss": 20.7608, + "step": 496850 + }, + { + "epoch": 1.0036886355280648, + "grad_norm": 65.0674819946289, + "learning_rate": 1.2013142565708845e-09, + "loss": 15.7522, + "step": 496860 + }, + { + "epoch": 1.0037088361607485, + "grad_norm": 382.4653625488281, + "learning_rate": 1.193675061600952e-09, + "loss": 15.6798, + "step": 496870 + }, + { + "epoch": 1.0037290367934324, + "grad_norm": 442.8677978515625, + "learning_rate": 1.1860602302066203e-09, + "loss": 26.6604, + "step": 496880 + }, + { + "epoch": 1.003749237426116, + "grad_norm": 1905.3214111328125, + "learning_rate": 1.178469762425083e-09, + "loss": 11.8643, + "step": 496890 + }, + { + "epoch": 1.0037694380588, + "grad_norm": 541.8901977539062, + "learning_rate": 1.170903658293532e-09, + "loss": 10.6899, + "step": 496900 + }, + { + "epoch": 1.0037896386914837, + "grad_norm": 480.2037353515625, + "learning_rate": 1.1633619178486044e-09, + "loss": 13.7109, + "step": 496910 + }, + { + "epoch": 1.0038098393241677, + "grad_norm": 348.5443115234375, + "learning_rate": 1.155844541126938e-09, + "loss": 23.0337, + "step": 496920 + }, + { + "epoch": 1.0038300399568514, + "grad_norm": 276.6700744628906, + "learning_rate": 1.1483515281657254e-09, + "loss": 14.6272, + "step": 496930 + }, + { + "epoch": 1.0038502405895353, + "grad_norm": 170.61239624023438, + "learning_rate": 1.1408828790010484e-09, + "loss": 10.445, + "step": 496940 + }, + { + "epoch": 1.003870441222219, + "grad_norm": 44.20932388305664, + "learning_rate": 1.1334385936695447e-09, + "loss": 20.9744, + "step": 496950 + }, + { + "epoch": 1.003890641854903, + "grad_norm": 435.5612487792969, + "learning_rate": 1.1260186722067411e-09, + "loss": 20.1963, + "step": 496960 + }, + { + "epoch": 1.0039108424875867, + "grad_norm": 257.3517150878906, + "learning_rate": 1.1186231146503856e-09, + "loss": 24.552, + "step": 496970 + }, + { + "epoch": 1.0039310431202706, + "grad_norm": 536.3111572265625, + "learning_rate": 1.111251921034895e-09, + "loss": 29.9063, + "step": 496980 + }, + { + "epoch": 1.0039512437529543, + "grad_norm": 302.0859375, + "learning_rate": 1.1039050913969062e-09, + "loss": 12.5154, + "step": 496990 + }, + { + "epoch": 1.0039714443856382, + "grad_norm": 840.5151977539062, + "learning_rate": 1.096582625772502e-09, + "loss": 29.2025, + "step": 497000 + }, + { + "epoch": 1.003991645018322, + "grad_norm": 393.2088623046875, + "learning_rate": 1.0892845241972094e-09, + "loss": 14.4391, + "step": 497010 + }, + { + "epoch": 1.0040118456510059, + "grad_norm": 262.2140197753906, + "learning_rate": 1.0820107867060004e-09, + "loss": 19.033, + "step": 497020 + }, + { + "epoch": 1.0040320462836896, + "grad_norm": 135.4094696044922, + "learning_rate": 1.074761413334957e-09, + "loss": 32.1915, + "step": 497030 + }, + { + "epoch": 1.0040522469163735, + "grad_norm": 378.78863525390625, + "learning_rate": 1.0675364041190516e-09, + "loss": 13.1999, + "step": 497040 + }, + { + "epoch": 1.0040724475490572, + "grad_norm": 364.27679443359375, + "learning_rate": 1.0603357590938112e-09, + "loss": 7.9666, + "step": 497050 + }, + { + "epoch": 1.0040926481817412, + "grad_norm": 273.89202880859375, + "learning_rate": 1.0531594782942079e-09, + "loss": 14.7918, + "step": 497060 + }, + { + "epoch": 1.0041128488144249, + "grad_norm": 620.6331787109375, + "learning_rate": 1.0460075617552134e-09, + "loss": 18.9597, + "step": 497070 + }, + { + "epoch": 1.0041330494471086, + "grad_norm": 967.349365234375, + "learning_rate": 1.0388800095118002e-09, + "loss": 16.9013, + "step": 497080 + }, + { + "epoch": 1.0041532500797925, + "grad_norm": 378.5223693847656, + "learning_rate": 1.0317768215983847e-09, + "loss": 11.8875, + "step": 497090 + }, + { + "epoch": 1.0041734507124762, + "grad_norm": 255.29290771484375, + "learning_rate": 1.0246979980499395e-09, + "loss": 21.5511, + "step": 497100 + }, + { + "epoch": 1.0041936513451601, + "grad_norm": 152.40760803222656, + "learning_rate": 1.017643538900881e-09, + "loss": 8.6306, + "step": 497110 + }, + { + "epoch": 1.0042138519778439, + "grad_norm": 301.55950927734375, + "learning_rate": 1.0106134441850712e-09, + "loss": 10.1663, + "step": 497120 + }, + { + "epoch": 1.0042340526105278, + "grad_norm": 366.7298583984375, + "learning_rate": 1.0036077139380373e-09, + "loss": 9.4549, + "step": 497130 + }, + { + "epoch": 1.0042542532432115, + "grad_norm": 277.9081726074219, + "learning_rate": 9.96626348192531e-10, + "loss": 44.1801, + "step": 497140 + }, + { + "epoch": 1.0042744538758954, + "grad_norm": 167.16905212402344, + "learning_rate": 9.896693469829689e-10, + "loss": 25.4072, + "step": 497150 + }, + { + "epoch": 1.0042946545085791, + "grad_norm": 246.45228576660156, + "learning_rate": 9.827367103437679e-10, + "loss": 15.8333, + "step": 497160 + }, + { + "epoch": 1.004314855141263, + "grad_norm": 320.1998291015625, + "learning_rate": 9.758284383082351e-10, + "loss": 15.7299, + "step": 497170 + }, + { + "epoch": 1.0043350557739468, + "grad_norm": 255.04876708984375, + "learning_rate": 9.68944530910787e-10, + "loss": 21.4589, + "step": 497180 + }, + { + "epoch": 1.0043552564066307, + "grad_norm": 858.8927612304688, + "learning_rate": 9.620849881836203e-10, + "loss": 11.0519, + "step": 497190 + }, + { + "epoch": 1.0043754570393144, + "grad_norm": 290.14703369140625, + "learning_rate": 9.55249810161152e-10, + "loss": 16.7142, + "step": 497200 + }, + { + "epoch": 1.0043956576719983, + "grad_norm": 79.18550872802734, + "learning_rate": 9.484389968766882e-10, + "loss": 14.6362, + "step": 497210 + }, + { + "epoch": 1.004415858304682, + "grad_norm": 2.3829500675201416, + "learning_rate": 9.416525483635364e-10, + "loss": 11.5494, + "step": 497220 + }, + { + "epoch": 1.004436058937366, + "grad_norm": 472.21539306640625, + "learning_rate": 9.348904646538925e-10, + "loss": 20.2588, + "step": 497230 + }, + { + "epoch": 1.0044562595700497, + "grad_norm": 422.0057678222656, + "learning_rate": 9.281527457816186e-10, + "loss": 15.2321, + "step": 497240 + }, + { + "epoch": 1.0044764602027336, + "grad_norm": 235.6026153564453, + "learning_rate": 9.214393917789111e-10, + "loss": 14.2804, + "step": 497250 + }, + { + "epoch": 1.0044966608354173, + "grad_norm": 138.6964569091797, + "learning_rate": 9.147504026790766e-10, + "loss": 7.2866, + "step": 497260 + }, + { + "epoch": 1.0045168614681013, + "grad_norm": 85.42167663574219, + "learning_rate": 9.080857785137564e-10, + "loss": 16.7213, + "step": 497270 + }, + { + "epoch": 1.004537062100785, + "grad_norm": 331.20611572265625, + "learning_rate": 9.014455193168125e-10, + "loss": 7.9069, + "step": 497280 + }, + { + "epoch": 1.004557262733469, + "grad_norm": 794.2719116210938, + "learning_rate": 8.948296251198863e-10, + "loss": 21.0431, + "step": 497290 + }, + { + "epoch": 1.0045774633661526, + "grad_norm": 445.93719482421875, + "learning_rate": 8.88238095955174e-10, + "loss": 17.8171, + "step": 497300 + }, + { + "epoch": 1.0045976639988365, + "grad_norm": 862.58984375, + "learning_rate": 8.816709318543171e-10, + "loss": 25.3989, + "step": 497310 + }, + { + "epoch": 1.0046178646315203, + "grad_norm": 626.967041015625, + "learning_rate": 8.751281328506223e-10, + "loss": 20.0826, + "step": 497320 + }, + { + "epoch": 1.0046380652642042, + "grad_norm": 348.0452575683594, + "learning_rate": 8.686096989751758e-10, + "loss": 17.8535, + "step": 497330 + }, + { + "epoch": 1.004658265896888, + "grad_norm": 500.5772705078125, + "learning_rate": 8.621156302590639e-10, + "loss": 10.7476, + "step": 497340 + }, + { + "epoch": 1.0046784665295716, + "grad_norm": 139.4596710205078, + "learning_rate": 8.556459267355932e-10, + "loss": 10.1938, + "step": 497350 + }, + { + "epoch": 1.0046986671622555, + "grad_norm": 663.2300415039062, + "learning_rate": 8.492005884347398e-10, + "loss": 27.1731, + "step": 497360 + }, + { + "epoch": 1.0047188677949392, + "grad_norm": 286.3394775390625, + "learning_rate": 8.427796153887002e-10, + "loss": 19.3634, + "step": 497370 + }, + { + "epoch": 1.0047390684276232, + "grad_norm": 329.2220153808594, + "learning_rate": 8.363830076285606e-10, + "loss": 8.8834, + "step": 497380 + }, + { + "epoch": 1.0047592690603069, + "grad_norm": 248.72828674316406, + "learning_rate": 8.300107651859623e-10, + "loss": 21.1644, + "step": 497390 + }, + { + "epoch": 1.0047794696929908, + "grad_norm": 326.47314453125, + "learning_rate": 8.236628880914365e-10, + "loss": 17.0379, + "step": 497400 + }, + { + "epoch": 1.0047996703256745, + "grad_norm": 615.4883422851562, + "learning_rate": 8.173393763760695e-10, + "loss": 24.0844, + "step": 497410 + }, + { + "epoch": 1.0048198709583585, + "grad_norm": 468.8843688964844, + "learning_rate": 8.110402300703924e-10, + "loss": 22.9501, + "step": 497420 + }, + { + "epoch": 1.0048400715910422, + "grad_norm": 204.00584411621094, + "learning_rate": 8.047654492054913e-10, + "loss": 6.4912, + "step": 497430 + }, + { + "epoch": 1.004860272223726, + "grad_norm": 430.0341796875, + "learning_rate": 7.985150338118974e-10, + "loss": 19.2455, + "step": 497440 + }, + { + "epoch": 1.0048804728564098, + "grad_norm": 214.84359741210938, + "learning_rate": 7.92288983920142e-10, + "loss": 20.7306, + "step": 497450 + }, + { + "epoch": 1.0049006734890937, + "grad_norm": 365.97454833984375, + "learning_rate": 7.860872995602009e-10, + "loss": 11.9094, + "step": 497460 + }, + { + "epoch": 1.0049208741217774, + "grad_norm": 416.4472351074219, + "learning_rate": 7.799099807626054e-10, + "loss": 12.0364, + "step": 497470 + }, + { + "epoch": 1.0049410747544614, + "grad_norm": 1460.086669921875, + "learning_rate": 7.737570275573314e-10, + "loss": 19.3271, + "step": 497480 + }, + { + "epoch": 1.004961275387145, + "grad_norm": 724.4830322265625, + "learning_rate": 7.67628439974355e-10, + "loss": 14.0878, + "step": 497490 + }, + { + "epoch": 1.004981476019829, + "grad_norm": 168.28736877441406, + "learning_rate": 7.615242180436521e-10, + "loss": 10.0536, + "step": 497500 + }, + { + "epoch": 1.0050016766525127, + "grad_norm": 419.37451171875, + "learning_rate": 7.55444361795199e-10, + "loss": 11.5683, + "step": 497510 + }, + { + "epoch": 1.0050218772851967, + "grad_norm": 365.8724060058594, + "learning_rate": 7.493888712584163e-10, + "loss": 10.9492, + "step": 497520 + }, + { + "epoch": 1.0050420779178804, + "grad_norm": 379.0532531738281, + "learning_rate": 7.433577464621699e-10, + "loss": 17.9451, + "step": 497530 + }, + { + "epoch": 1.0050622785505643, + "grad_norm": 64.69229888916016, + "learning_rate": 7.373509874369911e-10, + "loss": 17.502, + "step": 497540 + }, + { + "epoch": 1.005082479183248, + "grad_norm": 791.7032470703125, + "learning_rate": 7.313685942117454e-10, + "loss": 18.4521, + "step": 497550 + }, + { + "epoch": 1.005102679815932, + "grad_norm": 498.0197448730469, + "learning_rate": 7.254105668152988e-10, + "loss": 26.5022, + "step": 497560 + }, + { + "epoch": 1.0051228804486156, + "grad_norm": 162.7668914794922, + "learning_rate": 7.194769052765171e-10, + "loss": 10.4517, + "step": 497570 + }, + { + "epoch": 1.0051430810812996, + "grad_norm": 347.5716857910156, + "learning_rate": 7.135676096253763e-10, + "loss": 21.9197, + "step": 497580 + }, + { + "epoch": 1.0051632817139833, + "grad_norm": 225.25503540039062, + "learning_rate": 7.076826798890768e-10, + "loss": 14.4115, + "step": 497590 + }, + { + "epoch": 1.005183482346667, + "grad_norm": 222.23606872558594, + "learning_rate": 7.018221160981498e-10, + "loss": 10.2202, + "step": 497600 + }, + { + "epoch": 1.005203682979351, + "grad_norm": 390.8809814453125, + "learning_rate": 6.959859182792406e-10, + "loss": 11.5496, + "step": 497610 + }, + { + "epoch": 1.0052238836120346, + "grad_norm": 0.0, + "learning_rate": 6.901740864623252e-10, + "loss": 14.3404, + "step": 497620 + }, + { + "epoch": 1.0052440842447186, + "grad_norm": 217.84158325195312, + "learning_rate": 6.843866206751593e-10, + "loss": 6.5539, + "step": 497630 + }, + { + "epoch": 1.0052642848774023, + "grad_norm": 186.2528076171875, + "learning_rate": 6.786235209460534e-10, + "loss": 20.873, + "step": 497640 + }, + { + "epoch": 1.0052844855100862, + "grad_norm": 452.13299560546875, + "learning_rate": 6.728847873027633e-10, + "loss": 18.4681, + "step": 497650 + }, + { + "epoch": 1.00530468614277, + "grad_norm": 740.79296875, + "learning_rate": 6.671704197735995e-10, + "loss": 17.4599, + "step": 497660 + }, + { + "epoch": 1.0053248867754538, + "grad_norm": 409.2422180175781, + "learning_rate": 6.614804183857626e-10, + "loss": 19.8806, + "step": 497670 + }, + { + "epoch": 1.0053450874081375, + "grad_norm": 237.03514099121094, + "learning_rate": 6.558147831681183e-10, + "loss": 11.4689, + "step": 497680 + }, + { + "epoch": 1.0053652880408215, + "grad_norm": 358.61102294921875, + "learning_rate": 6.501735141478672e-10, + "loss": 23.0816, + "step": 497690 + }, + { + "epoch": 1.0053854886735052, + "grad_norm": 44.100135803222656, + "learning_rate": 6.445566113516544e-10, + "loss": 15.8719, + "step": 497700 + }, + { + "epoch": 1.0054056893061891, + "grad_norm": 798.8607788085938, + "learning_rate": 6.389640748077907e-10, + "loss": 17.1606, + "step": 497710 + }, + { + "epoch": 1.0054258899388728, + "grad_norm": 247.9388885498047, + "learning_rate": 6.333959045434768e-10, + "loss": 8.721, + "step": 497720 + }, + { + "epoch": 1.0054460905715568, + "grad_norm": 454.4766540527344, + "learning_rate": 6.278521005853578e-10, + "loss": 11.72, + "step": 497730 + }, + { + "epoch": 1.0054662912042405, + "grad_norm": 138.5755157470703, + "learning_rate": 6.223326629611893e-10, + "loss": 12.6029, + "step": 497740 + }, + { + "epoch": 1.0054864918369244, + "grad_norm": 273.68939208984375, + "learning_rate": 6.168375916970615e-10, + "loss": 11.2898, + "step": 497750 + }, + { + "epoch": 1.005506692469608, + "grad_norm": 74.56575775146484, + "learning_rate": 6.11366886820175e-10, + "loss": 16.079, + "step": 497760 + }, + { + "epoch": 1.005526893102292, + "grad_norm": 563.3392333984375, + "learning_rate": 6.05920548357175e-10, + "loss": 20.3108, + "step": 497770 + }, + { + "epoch": 1.0055470937349757, + "grad_norm": 344.8863220214844, + "learning_rate": 6.00498576334152e-10, + "loss": 21.3803, + "step": 497780 + }, + { + "epoch": 1.0055672943676597, + "grad_norm": 591.0133666992188, + "learning_rate": 5.951009707783062e-10, + "loss": 15.0725, + "step": 497790 + }, + { + "epoch": 1.0055874950003434, + "grad_norm": 220.0905303955078, + "learning_rate": 5.897277317157279e-10, + "loss": 19.6494, + "step": 497800 + }, + { + "epoch": 1.0056076956330273, + "grad_norm": 382.07415771484375, + "learning_rate": 5.843788591725074e-10, + "loss": 17.0656, + "step": 497810 + }, + { + "epoch": 1.005627896265711, + "grad_norm": 285.0874938964844, + "learning_rate": 5.790543531741799e-10, + "loss": 15.2152, + "step": 497820 + }, + { + "epoch": 1.005648096898395, + "grad_norm": 224.52911376953125, + "learning_rate": 5.737542137479457e-10, + "loss": 32.5519, + "step": 497830 + }, + { + "epoch": 1.0056682975310787, + "grad_norm": 124.53401947021484, + "learning_rate": 5.684784409182298e-10, + "loss": 11.5611, + "step": 497840 + }, + { + "epoch": 1.0056884981637624, + "grad_norm": 38.93964767456055, + "learning_rate": 5.632270347116775e-10, + "loss": 9.7367, + "step": 497850 + }, + { + "epoch": 1.0057086987964463, + "grad_norm": 280.2174987792969, + "learning_rate": 5.579999951532688e-10, + "loss": 22.2611, + "step": 497860 + }, + { + "epoch": 1.00572889942913, + "grad_norm": 653.9365844726562, + "learning_rate": 5.527973222690941e-10, + "loss": 23.448, + "step": 497870 + }, + { + "epoch": 1.005749100061814, + "grad_norm": 411.93145751953125, + "learning_rate": 5.476190160841333e-10, + "loss": 18.0152, + "step": 497880 + }, + { + "epoch": 1.0057693006944977, + "grad_norm": 292.06671142578125, + "learning_rate": 5.424650766239215e-10, + "loss": 24.901, + "step": 497890 + }, + { + "epoch": 1.0057895013271816, + "grad_norm": 1511.5247802734375, + "learning_rate": 5.373355039128836e-10, + "loss": 25.5441, + "step": 497900 + }, + { + "epoch": 1.0058097019598653, + "grad_norm": 163.15565490722656, + "learning_rate": 5.322302979771099e-10, + "loss": 13.7009, + "step": 497910 + }, + { + "epoch": 1.0058299025925492, + "grad_norm": 459.66351318359375, + "learning_rate": 5.271494588404702e-10, + "loss": 16.1014, + "step": 497920 + }, + { + "epoch": 1.005850103225233, + "grad_norm": 14.534080505371094, + "learning_rate": 5.220929865284996e-10, + "loss": 12.6089, + "step": 497930 + }, + { + "epoch": 1.0058703038579169, + "grad_norm": 387.5732727050781, + "learning_rate": 5.170608810650679e-10, + "loss": 22.0805, + "step": 497940 + }, + { + "epoch": 1.0058905044906006, + "grad_norm": 884.1080322265625, + "learning_rate": 5.120531424751551e-10, + "loss": 13.3409, + "step": 497950 + }, + { + "epoch": 1.0059107051232845, + "grad_norm": 547.0142822265625, + "learning_rate": 5.070697707837413e-10, + "loss": 39.905, + "step": 497960 + }, + { + "epoch": 1.0059309057559682, + "grad_norm": 529.3761596679688, + "learning_rate": 5.02110766013586e-10, + "loss": 14.4093, + "step": 497970 + }, + { + "epoch": 1.0059511063886521, + "grad_norm": 484.35809326171875, + "learning_rate": 4.971761281907795e-10, + "loss": 15.3145, + "step": 497980 + }, + { + "epoch": 1.0059713070213359, + "grad_norm": 1.8353664875030518, + "learning_rate": 4.922658573375261e-10, + "loss": 13.9594, + "step": 497990 + }, + { + "epoch": 1.0059915076540198, + "grad_norm": 90.44378662109375, + "learning_rate": 4.87379953478806e-10, + "loss": 14.71, + "step": 498000 + }, + { + "epoch": 1.0060117082867035, + "grad_norm": 336.6805114746094, + "learning_rate": 4.825184166384888e-10, + "loss": 27.1868, + "step": 498010 + }, + { + "epoch": 1.0060319089193874, + "grad_norm": 479.4397888183594, + "learning_rate": 4.776812468398895e-10, + "loss": 9.6992, + "step": 498020 + }, + { + "epoch": 1.0060521095520711, + "grad_norm": 310.9299011230469, + "learning_rate": 4.728684441068776e-10, + "loss": 18.8371, + "step": 498030 + }, + { + "epoch": 1.006072310184755, + "grad_norm": 168.52430725097656, + "learning_rate": 4.680800084622128e-10, + "loss": 17.1496, + "step": 498040 + }, + { + "epoch": 1.0060925108174388, + "grad_norm": 8.164981842041016, + "learning_rate": 4.6331593993032e-10, + "loss": 11.4686, + "step": 498050 + }, + { + "epoch": 1.0061127114501227, + "grad_norm": 98.24127197265625, + "learning_rate": 4.585762385334036e-10, + "loss": 4.2927, + "step": 498060 + }, + { + "epoch": 1.0061329120828064, + "grad_norm": 165.088623046875, + "learning_rate": 4.538609042953335e-10, + "loss": 15.4089, + "step": 498070 + }, + { + "epoch": 1.0061531127154903, + "grad_norm": 589.4951782226562, + "learning_rate": 4.49169937238314e-10, + "loss": 12.651, + "step": 498080 + }, + { + "epoch": 1.006173313348174, + "grad_norm": 113.78429412841797, + "learning_rate": 4.445033373862151e-10, + "loss": 14.0095, + "step": 498090 + }, + { + "epoch": 1.006193513980858, + "grad_norm": 479.8966064453125, + "learning_rate": 4.398611047612411e-10, + "loss": 17.6846, + "step": 498100 + }, + { + "epoch": 1.0062137146135417, + "grad_norm": 600.0286254882812, + "learning_rate": 4.3524323938559655e-10, + "loss": 16.7164, + "step": 498110 + }, + { + "epoch": 1.0062339152462254, + "grad_norm": 245.16664123535156, + "learning_rate": 4.3064974128259605e-10, + "loss": 11.4906, + "step": 498120 + }, + { + "epoch": 1.0062541158789093, + "grad_norm": 233.82284545898438, + "learning_rate": 4.2608061047388905e-10, + "loss": 18.9105, + "step": 498130 + }, + { + "epoch": 1.006274316511593, + "grad_norm": 74.9719009399414, + "learning_rate": 4.21535846982235e-10, + "loss": 11.5325, + "step": 498140 + }, + { + "epoch": 1.006294517144277, + "grad_norm": 379.4974365234375, + "learning_rate": 4.1701545082928343e-10, + "loss": 26.145, + "step": 498150 + }, + { + "epoch": 1.0063147177769607, + "grad_norm": 402.6711120605469, + "learning_rate": 4.125194220377937e-10, + "loss": 18.0747, + "step": 498160 + }, + { + "epoch": 1.0063349184096446, + "grad_norm": 253.51841735839844, + "learning_rate": 4.0804776062941533e-10, + "loss": 5.7595, + "step": 498170 + }, + { + "epoch": 1.0063551190423283, + "grad_norm": 314.94598388671875, + "learning_rate": 4.0360046662579753e-10, + "loss": 13.1086, + "step": 498180 + }, + { + "epoch": 1.0063753196750123, + "grad_norm": 128.70509338378906, + "learning_rate": 3.991775400485898e-10, + "loss": 19.3338, + "step": 498190 + }, + { + "epoch": 1.006395520307696, + "grad_norm": 373.6906433105469, + "learning_rate": 3.9477898091944135e-10, + "loss": 24.4669, + "step": 498200 + }, + { + "epoch": 1.00641572094038, + "grad_norm": 681.62451171875, + "learning_rate": 3.9040478925944645e-10, + "loss": 32.6684, + "step": 498210 + }, + { + "epoch": 1.0064359215730636, + "grad_norm": 95.27474975585938, + "learning_rate": 3.8605496509080966e-10, + "loss": 11.4944, + "step": 498220 + }, + { + "epoch": 1.0064561222057475, + "grad_norm": 506.1835021972656, + "learning_rate": 3.8172950843351485e-10, + "loss": 26.6546, + "step": 498230 + }, + { + "epoch": 1.0064763228384312, + "grad_norm": 118.24510192871094, + "learning_rate": 3.774284193097666e-10, + "loss": 8.1574, + "step": 498240 + }, + { + "epoch": 1.0064965234711152, + "grad_norm": 354.12835693359375, + "learning_rate": 3.7315169774010397e-10, + "loss": 9.9255, + "step": 498250 + }, + { + "epoch": 1.0065167241037989, + "grad_norm": 302.7308654785156, + "learning_rate": 3.6889934374506606e-10, + "loss": 29.9269, + "step": 498260 + }, + { + "epoch": 1.0065369247364828, + "grad_norm": 262.63055419921875, + "learning_rate": 3.646713573457472e-10, + "loss": 17.3862, + "step": 498270 + }, + { + "epoch": 1.0065571253691665, + "grad_norm": 368.1761474609375, + "learning_rate": 3.604677385626865e-10, + "loss": 13.9558, + "step": 498280 + }, + { + "epoch": 1.0065773260018505, + "grad_norm": 139.5403289794922, + "learning_rate": 3.562884874158679e-10, + "loss": 14.0402, + "step": 498290 + }, + { + "epoch": 1.0065975266345342, + "grad_norm": 166.00411987304688, + "learning_rate": 3.521336039263856e-10, + "loss": 4.6587, + "step": 498300 + }, + { + "epoch": 1.006617727267218, + "grad_norm": 315.66888427734375, + "learning_rate": 3.480030881147789e-10, + "loss": 11.2647, + "step": 498310 + }, + { + "epoch": 1.0066379278999018, + "grad_norm": 592.1396484375, + "learning_rate": 3.4389693999992146e-10, + "loss": 17.5871, + "step": 498320 + }, + { + "epoch": 1.0066581285325857, + "grad_norm": 167.7822265625, + "learning_rate": 3.3981515960290757e-10, + "loss": 14.7794, + "step": 498330 + }, + { + "epoch": 1.0066783291652694, + "grad_norm": 191.7164306640625, + "learning_rate": 3.357577469431661e-10, + "loss": 16.5493, + "step": 498340 + }, + { + "epoch": 1.0066985297979534, + "grad_norm": 135.2027587890625, + "learning_rate": 3.3172470204012597e-10, + "loss": 14.2077, + "step": 498350 + }, + { + "epoch": 1.006718730430637, + "grad_norm": 565.5507202148438, + "learning_rate": 3.277160249143263e-10, + "loss": 30.6504, + "step": 498360 + }, + { + "epoch": 1.0067389310633208, + "grad_norm": 243.60830688476562, + "learning_rate": 3.237317155846409e-10, + "loss": 13.5072, + "step": 498370 + }, + { + "epoch": 1.0067591316960047, + "grad_norm": 880.7012329101562, + "learning_rate": 3.1977177407105376e-10, + "loss": 13.7841, + "step": 498380 + }, + { + "epoch": 1.0067793323286884, + "grad_norm": 417.2605895996094, + "learning_rate": 3.158362003918836e-10, + "loss": 10.2344, + "step": 498390 + }, + { + "epoch": 1.0067995329613724, + "grad_norm": 372.4492492675781, + "learning_rate": 3.1192499456766947e-10, + "loss": 17.3512, + "step": 498400 + }, + { + "epoch": 1.006819733594056, + "grad_norm": 58.83364486694336, + "learning_rate": 3.0803815661617495e-10, + "loss": 11.6652, + "step": 498410 + }, + { + "epoch": 1.00683993422674, + "grad_norm": 2088.984375, + "learning_rate": 3.0417568655738416e-10, + "loss": 14.8833, + "step": 498420 + }, + { + "epoch": 1.0068601348594237, + "grad_norm": 418.00531005859375, + "learning_rate": 3.003375844090606e-10, + "loss": 19.6728, + "step": 498430 + }, + { + "epoch": 1.0068803354921076, + "grad_norm": 490.3150634765625, + "learning_rate": 2.9652385019118823e-10, + "loss": 18.3427, + "step": 498440 + }, + { + "epoch": 1.0069005361247914, + "grad_norm": 13.315130233764648, + "learning_rate": 2.9273448392097557e-10, + "loss": 17.9149, + "step": 498450 + }, + { + "epoch": 1.0069207367574753, + "grad_norm": 384.3954772949219, + "learning_rate": 2.8896948561785156e-10, + "loss": 22.0337, + "step": 498460 + }, + { + "epoch": 1.006940937390159, + "grad_norm": 199.98281860351562, + "learning_rate": 2.8522885530013475e-10, + "loss": 15.961, + "step": 498470 + }, + { + "epoch": 1.006961138022843, + "grad_norm": 133.38661193847656, + "learning_rate": 2.8151259298558884e-10, + "loss": 12.1903, + "step": 498480 + }, + { + "epoch": 1.0069813386555266, + "grad_norm": 370.10784912109375, + "learning_rate": 2.7782069869253247e-10, + "loss": 10.3582, + "step": 498490 + }, + { + "epoch": 1.0070015392882106, + "grad_norm": 646.2947387695312, + "learning_rate": 2.741531724392843e-10, + "loss": 13.1196, + "step": 498500 + }, + { + "epoch": 1.0070217399208943, + "grad_norm": 270.6547546386719, + "learning_rate": 2.705100142430528e-10, + "loss": 18.6352, + "step": 498510 + }, + { + "epoch": 1.0070419405535782, + "grad_norm": 250.91160583496094, + "learning_rate": 2.668912241221566e-10, + "loss": 13.065, + "step": 498520 + }, + { + "epoch": 1.007062141186262, + "grad_norm": 208.54881286621094, + "learning_rate": 2.6329680209435935e-10, + "loss": 17.9283, + "step": 498530 + }, + { + "epoch": 1.0070823418189458, + "grad_norm": 285.6217041015625, + "learning_rate": 2.597267481763144e-10, + "loss": 16.4179, + "step": 498540 + }, + { + "epoch": 1.0071025424516296, + "grad_norm": 315.50543212890625, + "learning_rate": 2.5618106238634033e-10, + "loss": 16.7256, + "step": 498550 + }, + { + "epoch": 1.0071227430843135, + "grad_norm": 163.26913452148438, + "learning_rate": 2.5265974474109054e-10, + "loss": 10.2823, + "step": 498560 + }, + { + "epoch": 1.0071429437169972, + "grad_norm": 439.5351867675781, + "learning_rate": 2.4916279525777356e-10, + "loss": 11.0456, + "step": 498570 + }, + { + "epoch": 1.0071631443496811, + "grad_norm": 148.00035095214844, + "learning_rate": 2.4569021395415283e-10, + "loss": 18.7685, + "step": 498580 + }, + { + "epoch": 1.0071833449823648, + "grad_norm": 350.3221740722656, + "learning_rate": 2.4224200084632664e-10, + "loss": 6.9299, + "step": 498590 + }, + { + "epoch": 1.0072035456150488, + "grad_norm": 314.0454406738281, + "learning_rate": 2.388181559515035e-10, + "loss": 10.4755, + "step": 498600 + }, + { + "epoch": 1.0072237462477325, + "grad_norm": 290.7841796875, + "learning_rate": 2.3541867928633665e-10, + "loss": 26.1827, + "step": 498610 + }, + { + "epoch": 1.0072439468804162, + "grad_norm": 380.9477844238281, + "learning_rate": 2.3204357086747952e-10, + "loss": 11.4454, + "step": 498620 + }, + { + "epoch": 1.0072641475131001, + "grad_norm": 363.6810607910156, + "learning_rate": 2.2869283071103032e-10, + "loss": 13.2469, + "step": 498630 + }, + { + "epoch": 1.0072843481457838, + "grad_norm": 462.4739685058594, + "learning_rate": 2.2536645883308728e-10, + "loss": 15.2276, + "step": 498640 + }, + { + "epoch": 1.0073045487784678, + "grad_norm": 88.65006256103516, + "learning_rate": 2.2206445525085886e-10, + "loss": 11.941, + "step": 498650 + }, + { + "epoch": 1.0073247494111515, + "grad_norm": 690.76416015625, + "learning_rate": 2.1878681997988816e-10, + "loss": 19.5642, + "step": 498660 + }, + { + "epoch": 1.0073449500438354, + "grad_norm": 256.72381591796875, + "learning_rate": 2.1553355303627343e-10, + "loss": 11.1942, + "step": 498670 + }, + { + "epoch": 1.007365150676519, + "grad_norm": 402.1224670410156, + "learning_rate": 2.123046544355578e-10, + "loss": 25.1618, + "step": 498680 + }, + { + "epoch": 1.007385351309203, + "grad_norm": 367.8741149902344, + "learning_rate": 2.091001241932844e-10, + "loss": 13.3203, + "step": 498690 + }, + { + "epoch": 1.0074055519418867, + "grad_norm": 30.525959014892578, + "learning_rate": 2.0591996232610656e-10, + "loss": 17.055, + "step": 498700 + }, + { + "epoch": 1.0074257525745707, + "grad_norm": 425.9206237792969, + "learning_rate": 2.0276416884845718e-10, + "loss": 12.6519, + "step": 498710 + }, + { + "epoch": 1.0074459532072544, + "grad_norm": 529.9754638671875, + "learning_rate": 1.9963274377643448e-10, + "loss": 14.4685, + "step": 498720 + }, + { + "epoch": 1.0074661538399383, + "grad_norm": 270.33941650390625, + "learning_rate": 1.965256871244714e-10, + "loss": 18.3609, + "step": 498730 + }, + { + "epoch": 1.007486354472622, + "grad_norm": 296.189208984375, + "learning_rate": 1.9344299890866614e-10, + "loss": 19.2853, + "step": 498740 + }, + { + "epoch": 1.007506555105306, + "grad_norm": 559.3936157226562, + "learning_rate": 1.903846791434516e-10, + "loss": 17.3757, + "step": 498750 + }, + { + "epoch": 1.0075267557379897, + "grad_norm": 5.866914749145508, + "learning_rate": 1.873507278438158e-10, + "loss": 13.317, + "step": 498760 + }, + { + "epoch": 1.0075469563706736, + "grad_norm": 280.8948974609375, + "learning_rate": 1.8434114502530187e-10, + "loss": 16.1338, + "step": 498770 + }, + { + "epoch": 1.0075671570033573, + "grad_norm": 569.0048217773438, + "learning_rate": 1.8135593070123246e-10, + "loss": 27.7739, + "step": 498780 + }, + { + "epoch": 1.0075873576360412, + "grad_norm": 373.78021240234375, + "learning_rate": 1.7839508488715075e-10, + "loss": 15.2462, + "step": 498790 + }, + { + "epoch": 1.007607558268725, + "grad_norm": 307.22509765625, + "learning_rate": 1.7545860759693446e-10, + "loss": 7.8948, + "step": 498800 + }, + { + "epoch": 1.0076277589014089, + "grad_norm": 312.2191467285156, + "learning_rate": 1.725464988450165e-10, + "loss": 7.6725, + "step": 498810 + }, + { + "epoch": 1.0076479595340926, + "grad_norm": 427.0631103515625, + "learning_rate": 1.6965875864582983e-10, + "loss": 13.7366, + "step": 498820 + }, + { + "epoch": 1.0076681601667765, + "grad_norm": 285.7724914550781, + "learning_rate": 1.6679538701325215e-10, + "loss": 23.4431, + "step": 498830 + }, + { + "epoch": 1.0076883607994602, + "grad_norm": 444.3369445800781, + "learning_rate": 1.6395638396171643e-10, + "loss": 11.7128, + "step": 498840 + }, + { + "epoch": 1.0077085614321442, + "grad_norm": 323.95343017578125, + "learning_rate": 1.611417495045453e-10, + "loss": 19.7216, + "step": 498850 + }, + { + "epoch": 1.0077287620648279, + "grad_norm": 865.90771484375, + "learning_rate": 1.5835148365506148e-10, + "loss": 26.0676, + "step": 498860 + }, + { + "epoch": 1.0077489626975118, + "grad_norm": 140.72048950195312, + "learning_rate": 1.5558558642769782e-10, + "loss": 13.2926, + "step": 498870 + }, + { + "epoch": 1.0077691633301955, + "grad_norm": 154.49485778808594, + "learning_rate": 1.5284405783577706e-10, + "loss": 14.5025, + "step": 498880 + }, + { + "epoch": 1.0077893639628792, + "grad_norm": 326.7127685546875, + "learning_rate": 1.501268978920667e-10, + "loss": 15.5334, + "step": 498890 + }, + { + "epoch": 1.0078095645955631, + "grad_norm": 357.9048767089844, + "learning_rate": 1.4743410661044454e-10, + "loss": 18.7476, + "step": 498900 + }, + { + "epoch": 1.0078297652282469, + "grad_norm": 217.31222534179688, + "learning_rate": 1.4476568400367819e-10, + "loss": 6.5501, + "step": 498910 + }, + { + "epoch": 1.0078499658609308, + "grad_norm": 693.2562255859375, + "learning_rate": 1.4212163008509028e-10, + "loss": 19.8704, + "step": 498920 + }, + { + "epoch": 1.0078701664936145, + "grad_norm": 393.9979248046875, + "learning_rate": 1.3950194486744838e-10, + "loss": 16.0629, + "step": 498930 + }, + { + "epoch": 1.0078903671262984, + "grad_norm": 578.2315063476562, + "learning_rate": 1.369066283635201e-10, + "loss": 15.7025, + "step": 498940 + }, + { + "epoch": 1.0079105677589821, + "grad_norm": 516.0930786132812, + "learning_rate": 1.3433568058607293e-10, + "loss": 21.4167, + "step": 498950 + }, + { + "epoch": 1.007930768391666, + "grad_norm": 320.8323669433594, + "learning_rate": 1.3178910154676427e-10, + "loss": 16.2749, + "step": 498960 + }, + { + "epoch": 1.0079509690243498, + "grad_norm": 651.0137329101562, + "learning_rate": 1.292668912594719e-10, + "loss": 14.6414, + "step": 498970 + }, + { + "epoch": 1.0079711696570337, + "grad_norm": 120.41197204589844, + "learning_rate": 1.2676904973529802e-10, + "loss": 32.7481, + "step": 498980 + }, + { + "epoch": 1.0079913702897174, + "grad_norm": 148.75979614257812, + "learning_rate": 1.2429557698645512e-10, + "loss": 16.3187, + "step": 498990 + }, + { + "epoch": 1.0080115709224013, + "grad_norm": 297.499755859375, + "learning_rate": 1.2184647302626585e-10, + "loss": 11.6246, + "step": 499000 + }, + { + "epoch": 1.008031771555085, + "grad_norm": 32.80855178833008, + "learning_rate": 1.1942173786527732e-10, + "loss": 14.9501, + "step": 499010 + }, + { + "epoch": 1.008051972187769, + "grad_norm": 151.65139770507812, + "learning_rate": 1.1702137151570203e-10, + "loss": 13.6701, + "step": 499020 + }, + { + "epoch": 1.0080721728204527, + "grad_norm": 720.9283447265625, + "learning_rate": 1.146453739897524e-10, + "loss": 14.7302, + "step": 499030 + }, + { + "epoch": 1.0080923734531366, + "grad_norm": 1566.2060546875, + "learning_rate": 1.1229374529797555e-10, + "loss": 20.6653, + "step": 499040 + }, + { + "epoch": 1.0081125740858203, + "grad_norm": 130.35076904296875, + "learning_rate": 1.0996648545313904e-10, + "loss": 8.3957, + "step": 499050 + }, + { + "epoch": 1.0081327747185043, + "grad_norm": 219.70742797851562, + "learning_rate": 1.0766359446579e-10, + "loss": 11.406, + "step": 499060 + }, + { + "epoch": 1.008152975351188, + "grad_norm": 95.866455078125, + "learning_rate": 1.0538507234703066e-10, + "loss": 14.2112, + "step": 499070 + }, + { + "epoch": 1.008173175983872, + "grad_norm": 11.98952579498291, + "learning_rate": 1.0313091910796324e-10, + "loss": 13.1056, + "step": 499080 + }, + { + "epoch": 1.0081933766165556, + "grad_norm": 3.0641424655914307, + "learning_rate": 1.009011347602451e-10, + "loss": 11.0817, + "step": 499090 + }, + { + "epoch": 1.0082135772492395, + "grad_norm": 308.2926025390625, + "learning_rate": 9.869571931442334e-11, + "loss": 15.2073, + "step": 499100 + }, + { + "epoch": 1.0082337778819233, + "grad_norm": 312.8187255859375, + "learning_rate": 9.65146727810451e-11, + "loss": 14.3809, + "step": 499110 + }, + { + "epoch": 1.0082539785146072, + "grad_norm": 122.9480972290039, + "learning_rate": 9.435799517065746e-11, + "loss": 12.6968, + "step": 499120 + }, + { + "epoch": 1.008274179147291, + "grad_norm": 447.3537902832031, + "learning_rate": 9.222568649380759e-11, + "loss": 29.2527, + "step": 499130 + }, + { + "epoch": 1.0082943797799746, + "grad_norm": 555.0584106445312, + "learning_rate": 9.011774676159767e-11, + "loss": 10.0256, + "step": 499140 + }, + { + "epoch": 1.0083145804126585, + "grad_norm": 856.760498046875, + "learning_rate": 8.803417598346465e-11, + "loss": 22.6539, + "step": 499150 + }, + { + "epoch": 1.0083347810453422, + "grad_norm": 121.70764923095703, + "learning_rate": 8.597497416940048e-11, + "loss": 17.7426, + "step": 499160 + }, + { + "epoch": 1.0083549816780262, + "grad_norm": 831.8187255859375, + "learning_rate": 8.394014133050743e-11, + "loss": 19.8091, + "step": 499170 + }, + { + "epoch": 1.0083751823107099, + "grad_norm": 0.0, + "learning_rate": 8.192967747566727e-11, + "loss": 16.5445, + "step": 499180 + }, + { + "epoch": 1.0083953829433938, + "grad_norm": 333.3159484863281, + "learning_rate": 7.994358261542712e-11, + "loss": 29.0098, + "step": 499190 + }, + { + "epoch": 1.0084155835760775, + "grad_norm": 169.3303680419922, + "learning_rate": 7.798185675866876e-11, + "loss": 10.5793, + "step": 499200 + }, + { + "epoch": 1.0084357842087615, + "grad_norm": 539.6227416992188, + "learning_rate": 7.604449991593932e-11, + "loss": 16.7954, + "step": 499210 + }, + { + "epoch": 1.0084559848414452, + "grad_norm": 514.5540161132812, + "learning_rate": 7.413151209612057e-11, + "loss": 17.1064, + "step": 499220 + }, + { + "epoch": 1.008476185474129, + "grad_norm": 317.1817321777344, + "learning_rate": 7.224289330809431e-11, + "loss": 13.7756, + "step": 499230 + }, + { + "epoch": 1.0084963861068128, + "grad_norm": 243.17190551757812, + "learning_rate": 7.037864356185254e-11, + "loss": 9.7673, + "step": 499240 + }, + { + "epoch": 1.0085165867394967, + "grad_norm": 239.8211212158203, + "learning_rate": 6.853876286627703e-11, + "loss": 15.4792, + "step": 499250 + }, + { + "epoch": 1.0085367873721804, + "grad_norm": 420.01849365234375, + "learning_rate": 6.672325122969447e-11, + "loss": 27.2993, + "step": 499260 + }, + { + "epoch": 1.0085569880048644, + "grad_norm": 167.06922912597656, + "learning_rate": 6.493210866209687e-11, + "loss": 9.2243, + "step": 499270 + }, + { + "epoch": 1.008577188637548, + "grad_norm": 240.788818359375, + "learning_rate": 6.316533517125578e-11, + "loss": 16.8287, + "step": 499280 + }, + { + "epoch": 1.008597389270232, + "grad_norm": 162.93699645996094, + "learning_rate": 6.142293076605299e-11, + "loss": 13.0423, + "step": 499290 + }, + { + "epoch": 1.0086175899029157, + "grad_norm": 256.6506042480469, + "learning_rate": 5.970489545537028e-11, + "loss": 14.9599, + "step": 499300 + }, + { + "epoch": 1.0086377905355997, + "grad_norm": 299.09710693359375, + "learning_rate": 5.801122924697922e-11, + "loss": 9.736, + "step": 499310 + }, + { + "epoch": 1.0086579911682834, + "grad_norm": 406.6426696777344, + "learning_rate": 5.634193214976158e-11, + "loss": 23.1644, + "step": 499320 + }, + { + "epoch": 1.0086781918009673, + "grad_norm": 885.6886596679688, + "learning_rate": 5.469700417093382e-11, + "loss": 26.4493, + "step": 499330 + }, + { + "epoch": 1.008698392433651, + "grad_norm": 235.24378967285156, + "learning_rate": 5.3076445319932835e-11, + "loss": 13.3897, + "step": 499340 + }, + { + "epoch": 1.008718593066335, + "grad_norm": 318.3589782714844, + "learning_rate": 5.148025560341996e-11, + "loss": 10.6445, + "step": 499350 + }, + { + "epoch": 1.0087387936990186, + "grad_norm": 773.137939453125, + "learning_rate": 4.990843502916676e-11, + "loss": 22.0634, + "step": 499360 + }, + { + "epoch": 1.0087589943317026, + "grad_norm": 497.8670654296875, + "learning_rate": 4.83609836054999e-11, + "loss": 17.3824, + "step": 499370 + }, + { + "epoch": 1.0087791949643863, + "grad_norm": 262.0536193847656, + "learning_rate": 4.683790134019095e-11, + "loss": 15.0474, + "step": 499380 + }, + { + "epoch": 1.00879939559707, + "grad_norm": 205.39247131347656, + "learning_rate": 4.533918823934613e-11, + "loss": 16.6214, + "step": 499390 + }, + { + "epoch": 1.008819596229754, + "grad_norm": 245.76242065429688, + "learning_rate": 4.3864844311847235e-11, + "loss": 13.7095, + "step": 499400 + }, + { + "epoch": 1.0088397968624376, + "grad_norm": 106.1578598022461, + "learning_rate": 4.2414869563800475e-11, + "loss": 12.3088, + "step": 499410 + }, + { + "epoch": 1.0088599974951216, + "grad_norm": 225.17047119140625, + "learning_rate": 4.0989264002422315e-11, + "loss": 21.1545, + "step": 499420 + }, + { + "epoch": 1.0088801981278053, + "grad_norm": 1929.8975830078125, + "learning_rate": 3.9588027634929195e-11, + "loss": 29.5104, + "step": 499430 + }, + { + "epoch": 1.0089003987604892, + "grad_norm": 284.91241455078125, + "learning_rate": 3.8211160467982453e-11, + "loss": 21.3082, + "step": 499440 + }, + { + "epoch": 1.008920599393173, + "grad_norm": 128.1702880859375, + "learning_rate": 3.685866250879855e-11, + "loss": 8.1251, + "step": 499450 + }, + { + "epoch": 1.0089408000258568, + "grad_norm": 333.2735595703125, + "learning_rate": 3.55305337634837e-11, + "loss": 10.4189, + "step": 499460 + }, + { + "epoch": 1.0089610006585406, + "grad_norm": 577.0447387695312, + "learning_rate": 3.4226774238144135e-11, + "loss": 20.4437, + "step": 499470 + }, + { + "epoch": 1.0089812012912245, + "grad_norm": 200.242431640625, + "learning_rate": 3.29473839399963e-11, + "loss": 8.238, + "step": 499480 + }, + { + "epoch": 1.0090014019239082, + "grad_norm": 446.62591552734375, + "learning_rate": 3.169236287459132e-11, + "loss": 21.3551, + "step": 499490 + }, + { + "epoch": 1.0090216025565921, + "grad_norm": 245.18592834472656, + "learning_rate": 3.0461711048035415e-11, + "loss": 14.5327, + "step": 499500 + }, + { + "epoch": 1.0090418031892758, + "grad_norm": 302.41912841796875, + "learning_rate": 2.925542846698992e-11, + "loss": 16.4099, + "step": 499510 + }, + { + "epoch": 1.0090620038219598, + "grad_norm": 134.86671447753906, + "learning_rate": 2.8073515137005957e-11, + "loss": 22.4217, + "step": 499520 + }, + { + "epoch": 1.0090822044546435, + "grad_norm": 9.022638320922852, + "learning_rate": 2.6915971063079527e-11, + "loss": 11.245, + "step": 499530 + }, + { + "epoch": 1.0091024050873274, + "grad_norm": 97.82958221435547, + "learning_rate": 2.5782796252427078e-11, + "loss": 21.2855, + "step": 499540 + }, + { + "epoch": 1.0091226057200111, + "grad_norm": 312.6911315917969, + "learning_rate": 2.467399070893439e-11, + "loss": 11.174, + "step": 499550 + }, + { + "epoch": 1.009142806352695, + "grad_norm": 215.1186981201172, + "learning_rate": 2.3589554439262807e-11, + "loss": 9.1901, + "step": 499560 + }, + { + "epoch": 1.0091630069853788, + "grad_norm": 136.40174865722656, + "learning_rate": 2.252948744840833e-11, + "loss": 16.0177, + "step": 499570 + }, + { + "epoch": 1.0091832076180627, + "grad_norm": 324.7275085449219, + "learning_rate": 2.1493789740811843e-11, + "loss": 24.6827, + "step": 499580 + }, + { + "epoch": 1.0092034082507464, + "grad_norm": 0.0, + "learning_rate": 2.048246132202447e-11, + "loss": 16.4556, + "step": 499590 + }, + { + "epoch": 1.0092236088834303, + "grad_norm": 422.9095153808594, + "learning_rate": 1.9495502197042214e-11, + "loss": 18.1066, + "step": 499600 + }, + { + "epoch": 1.009243809516114, + "grad_norm": 362.4075622558594, + "learning_rate": 1.8532912370861077e-11, + "loss": 17.0982, + "step": 499610 + }, + { + "epoch": 1.009264010148798, + "grad_norm": 233.91114807128906, + "learning_rate": 1.759469184792195e-11, + "loss": 16.0633, + "step": 499620 + }, + { + "epoch": 1.0092842107814817, + "grad_norm": 72.83248138427734, + "learning_rate": 1.668084063266573e-11, + "loss": 15.5275, + "step": 499630 + }, + { + "epoch": 1.0093044114141656, + "grad_norm": 169.59181213378906, + "learning_rate": 1.57913587295333e-11, + "loss": 15.0927, + "step": 499640 + }, + { + "epoch": 1.0093246120468493, + "grad_norm": 277.2950134277344, + "learning_rate": 1.4926246142965562e-11, + "loss": 19.717, + "step": 499650 + }, + { + "epoch": 1.009344812679533, + "grad_norm": 350.11395263671875, + "learning_rate": 1.40855028774034e-11, + "loss": 12.4338, + "step": 499660 + }, + { + "epoch": 1.009365013312217, + "grad_norm": 398.5290832519531, + "learning_rate": 1.32691289367326e-11, + "loss": 16.4635, + "step": 499670 + }, + { + "epoch": 1.0093852139449007, + "grad_norm": 327.2204895019531, + "learning_rate": 1.2477124325394052e-11, + "loss": 13.1397, + "step": 499680 + }, + { + "epoch": 1.0094054145775846, + "grad_norm": 458.7875671386719, + "learning_rate": 1.1709489046163313e-11, + "loss": 12.1547, + "step": 499690 + }, + { + "epoch": 1.0094256152102683, + "grad_norm": 587.9765014648438, + "learning_rate": 1.0966223103481278e-11, + "loss": 20.2309, + "step": 499700 + }, + { + "epoch": 1.0094458158429522, + "grad_norm": 238.51296997070312, + "learning_rate": 1.0247326501233723e-11, + "loss": 16.5125, + "step": 499710 + }, + { + "epoch": 1.009466016475636, + "grad_norm": 115.95252227783203, + "learning_rate": 9.55279924275132e-12, + "loss": 20.8501, + "step": 499720 + }, + { + "epoch": 1.0094862171083199, + "grad_norm": 163.24656677246094, + "learning_rate": 8.882641330809627e-12, + "loss": 22.1904, + "step": 499730 + }, + { + "epoch": 1.0095064177410036, + "grad_norm": 232.28338623046875, + "learning_rate": 8.236852769294424e-12, + "loss": 9.229, + "step": 499740 + }, + { + "epoch": 1.0095266183736875, + "grad_norm": 515.9867553710938, + "learning_rate": 7.615433561536379e-12, + "loss": 20.9562, + "step": 499750 + }, + { + "epoch": 1.0095468190063712, + "grad_norm": 247.70721435546875, + "learning_rate": 7.018383709755938e-12, + "loss": 20.4152, + "step": 499760 + }, + { + "epoch": 1.0095670196390552, + "grad_norm": 192.70872497558594, + "learning_rate": 6.445703217838883e-12, + "loss": 19.3428, + "step": 499770 + }, + { + "epoch": 1.0095872202717389, + "grad_norm": 335.126220703125, + "learning_rate": 5.89739208800566e-12, + "loss": 8.8592, + "step": 499780 + }, + { + "epoch": 1.0096074209044228, + "grad_norm": 164.87554931640625, + "learning_rate": 5.373450322476714e-12, + "loss": 11.72, + "step": 499790 + }, + { + "epoch": 1.0096276215371065, + "grad_norm": 360.1464538574219, + "learning_rate": 4.873877924582715e-12, + "loss": 16.2168, + "step": 499800 + }, + { + "epoch": 1.0096478221697904, + "grad_norm": 190.5187225341797, + "learning_rate": 4.398674896544109e-12, + "loss": 9.1129, + "step": 499810 + }, + { + "epoch": 1.0096680228024741, + "grad_norm": 275.7220458984375, + "learning_rate": 3.947841241136452e-12, + "loss": 10.8498, + "step": 499820 + }, + { + "epoch": 1.009688223435158, + "grad_norm": 530.927490234375, + "learning_rate": 3.5213769594699687e-12, + "loss": 12.1524, + "step": 499830 + }, + { + "epoch": 1.0097084240678418, + "grad_norm": 36.31602478027344, + "learning_rate": 3.119282054320216e-12, + "loss": 24.7156, + "step": 499840 + }, + { + "epoch": 1.0097286247005257, + "grad_norm": 366.23614501953125, + "learning_rate": 2.741556527352529e-12, + "loss": 18.4046, + "step": 499850 + }, + { + "epoch": 1.0097488253332094, + "grad_norm": 598.1944580078125, + "learning_rate": 2.388200380787353e-12, + "loss": 15.4348, + "step": 499860 + }, + { + "epoch": 1.0097690259658934, + "grad_norm": 451.7855529785156, + "learning_rate": 2.0592136162900234e-12, + "loss": 26.9653, + "step": 499870 + }, + { + "epoch": 1.009789226598577, + "grad_norm": 644.4403076171875, + "learning_rate": 1.754596235525874e-12, + "loss": 16.8859, + "step": 499880 + }, + { + "epoch": 1.009809427231261, + "grad_norm": 555.8035888671875, + "learning_rate": 1.4743482390500164e-12, + "loss": 10.2884, + "step": 499890 + }, + { + "epoch": 1.0098296278639447, + "grad_norm": 84.61672973632812, + "learning_rate": 1.2184696296380083e-12, + "loss": 21.937, + "step": 499900 + }, + { + "epoch": 1.0098498284966284, + "grad_norm": 516.1853637695312, + "learning_rate": 9.869604078449612e-13, + "loss": 20.4575, + "step": 499910 + }, + { + "epoch": 1.0098700291293123, + "grad_norm": 22.995227813720703, + "learning_rate": 7.798205742259868e-13, + "loss": 25.0565, + "step": 499920 + }, + { + "epoch": 1.009890229761996, + "grad_norm": 109.76983642578125, + "learning_rate": 5.970501310015308e-13, + "loss": 18.0619, + "step": 499930 + }, + { + "epoch": 1.00991043039468, + "grad_norm": 89.95011901855469, + "learning_rate": 4.386490781715935e-13, + "loss": 22.4786, + "step": 499940 + }, + { + "epoch": 1.0099306310273637, + "grad_norm": 546.2017211914062, + "learning_rate": 3.046174168463978e-13, + "loss": 16.8511, + "step": 499950 + }, + { + "epoch": 1.0099508316600476, + "grad_norm": 125.24714660644531, + "learning_rate": 1.9495514758105516e-13, + "loss": 25.8975, + "step": 499960 + }, + { + "epoch": 1.0099710322927313, + "grad_norm": 319.3497619628906, + "learning_rate": 1.0966227093067716e-13, + "loss": 18.7779, + "step": 499970 + }, + { + "epoch": 1.0099912329254153, + "grad_norm": 271.3671569824219, + "learning_rate": 4.873878689526379e-14, + "loss": 13.7524, + "step": 499980 + }, + { + "epoch": 1.010011433558099, + "grad_norm": 145.14639282226562, + "learning_rate": 1.218469658503807e-14, + "loss": 12.4994, + "step": 499990 + }, + { + "epoch": 1.010031634190783, + "grad_norm": 309.948486328125, + "learning_rate": 0.0, + "loss": 8.6393, + "step": 500000 } ], "logging_steps": 10, @@ -294021,7 +350029,7 @@ "should_evaluate": false, "should_log": false, "should_save": true, - "should_training_stop": false + "should_training_stop": true }, "attributes": {} }