| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 6885, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0014524328249818446, |
| "grad_norm": 4.328955480739728, |
| "learning_rate": 1.3062409288824383e-07, |
| "loss": 0.9607, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.002904865649963689, |
| "grad_norm": 4.469323164876104, |
| "learning_rate": 2.757619738751814e-07, |
| "loss": 0.9859, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.004357298474945534, |
| "grad_norm": 4.000416594025176, |
| "learning_rate": 4.2089985486211904e-07, |
| "loss": 0.9872, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.005809731299927378, |
| "grad_norm": 3.1566001029759914, |
| "learning_rate": 5.660377358490567e-07, |
| "loss": 0.9191, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.007262164124909223, |
| "grad_norm": 2.000776925354802, |
| "learning_rate": 7.111756168359943e-07, |
| "loss": 0.866, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.008714596949891068, |
| "grad_norm": 2.03383269865318, |
| "learning_rate": 8.563134978229319e-07, |
| "loss": 0.8475, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.010167029774872912, |
| "grad_norm": 1.981671850063017, |
| "learning_rate": 1.0014513788098695e-06, |
| "loss": 0.8145, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.011619462599854757, |
| "grad_norm": 1.9935447101504142, |
| "learning_rate": 1.146589259796807e-06, |
| "loss": 0.7874, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.013071895424836602, |
| "grad_norm": 1.696794144473072, |
| "learning_rate": 1.2917271407837448e-06, |
| "loss": 0.7606, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.014524328249818447, |
| "grad_norm": 1.8441704167155635, |
| "learning_rate": 1.4368650217706823e-06, |
| "loss": 0.7505, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.01597676107480029, |
| "grad_norm": 1.6167640330505846, |
| "learning_rate": 1.5820029027576197e-06, |
| "loss": 0.7432, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.017429193899782137, |
| "grad_norm": 1.7310300613256226, |
| "learning_rate": 1.7271407837445576e-06, |
| "loss": 0.7502, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.01888162672476398, |
| "grad_norm": 1.5504171157690307, |
| "learning_rate": 1.872278664731495e-06, |
| "loss": 0.7075, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.020334059549745823, |
| "grad_norm": 1.5001595551333269, |
| "learning_rate": 2.0174165457184327e-06, |
| "loss": 0.7242, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.02178649237472767, |
| "grad_norm": 1.7680255328873922, |
| "learning_rate": 2.1625544267053704e-06, |
| "loss": 0.7299, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.023238925199709513, |
| "grad_norm": 1.9776874021989124, |
| "learning_rate": 2.307692307692308e-06, |
| "loss": 0.7074, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.024691358024691357, |
| "grad_norm": 1.645294675336186, |
| "learning_rate": 2.4528301886792453e-06, |
| "loss": 0.7003, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.026143790849673203, |
| "grad_norm": 1.903626800669526, |
| "learning_rate": 2.597968069666183e-06, |
| "loss": 0.6935, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.027596223674655047, |
| "grad_norm": 1.6296522016767983, |
| "learning_rate": 2.7431059506531207e-06, |
| "loss": 0.7099, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.029048656499636893, |
| "grad_norm": 1.5624745122869332, |
| "learning_rate": 2.8882438316400583e-06, |
| "loss": 0.7082, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.030501089324618737, |
| "grad_norm": 1.5327148829437787, |
| "learning_rate": 3.033381712626996e-06, |
| "loss": 0.6847, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.03195352214960058, |
| "grad_norm": 1.4217156007581908, |
| "learning_rate": 3.1785195936139337e-06, |
| "loss": 0.6997, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.03340595497458242, |
| "grad_norm": 1.678714535521671, |
| "learning_rate": 3.323657474600871e-06, |
| "loss": 0.6922, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.034858387799564274, |
| "grad_norm": 1.6893028132334575, |
| "learning_rate": 3.4687953555878086e-06, |
| "loss": 0.6764, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.03631082062454612, |
| "grad_norm": 1.6842923668045748, |
| "learning_rate": 3.6139332365747467e-06, |
| "loss": 0.6838, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.03776325344952796, |
| "grad_norm": 2.0758637079489306, |
| "learning_rate": 3.759071117561684e-06, |
| "loss": 0.6961, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.0392156862745098, |
| "grad_norm": 1.651886885559497, |
| "learning_rate": 3.904208998548621e-06, |
| "loss": 0.6619, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.04066811909949165, |
| "grad_norm": 1.6813735734416895, |
| "learning_rate": 4.049346879535559e-06, |
| "loss": 0.691, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.04212055192447349, |
| "grad_norm": 1.8001370749006687, |
| "learning_rate": 4.194484760522497e-06, |
| "loss": 0.6646, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.04357298474945534, |
| "grad_norm": 1.8255351447030483, |
| "learning_rate": 4.339622641509435e-06, |
| "loss": 0.6595, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.04502541757443718, |
| "grad_norm": 1.7918481140936697, |
| "learning_rate": 4.484760522496372e-06, |
| "loss": 0.6555, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.04647785039941903, |
| "grad_norm": 1.6697318257583398, |
| "learning_rate": 4.629898403483309e-06, |
| "loss": 0.6734, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.04793028322440087, |
| "grad_norm": 1.5656777878920214, |
| "learning_rate": 4.775036284470247e-06, |
| "loss": 0.6511, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.04938271604938271, |
| "grad_norm": 1.6515736055504289, |
| "learning_rate": 4.920174165457185e-06, |
| "loss": 0.6651, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.050835148874364564, |
| "grad_norm": 1.6517233906536315, |
| "learning_rate": 5.065312046444122e-06, |
| "loss": 0.665, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.05228758169934641, |
| "grad_norm": 1.6987223199576384, |
| "learning_rate": 5.210449927431061e-06, |
| "loss": 0.6632, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.05374001452432825, |
| "grad_norm": 1.578744968443496, |
| "learning_rate": 5.355587808417998e-06, |
| "loss": 0.665, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.05519244734931009, |
| "grad_norm": 1.4975426293081397, |
| "learning_rate": 5.500725689404935e-06, |
| "loss": 0.6511, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.05664488017429194, |
| "grad_norm": 1.7386717568110297, |
| "learning_rate": 5.645863570391873e-06, |
| "loss": 0.6676, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.05809731299927379, |
| "grad_norm": 1.5916583497500596, |
| "learning_rate": 5.7910014513788105e-06, |
| "loss": 0.6635, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.05954974582425563, |
| "grad_norm": 1.6931617934865184, |
| "learning_rate": 5.936139332365748e-06, |
| "loss": 0.6668, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.06100217864923747, |
| "grad_norm": 1.5616372247201953, |
| "learning_rate": 6.081277213352685e-06, |
| "loss": 0.6685, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.06245461147421932, |
| "grad_norm": 1.5424914283941253, |
| "learning_rate": 6.226415094339623e-06, |
| "loss": 0.659, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.06390704429920116, |
| "grad_norm": 1.6468311050594455, |
| "learning_rate": 6.37155297532656e-06, |
| "loss": 0.6453, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.06535947712418301, |
| "grad_norm": 1.5765402125957226, |
| "learning_rate": 6.5166908563134976e-06, |
| "loss": 0.6598, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.06681190994916485, |
| "grad_norm": 1.7349394887283642, |
| "learning_rate": 6.6618287373004365e-06, |
| "loss": 0.6619, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.0682643427741467, |
| "grad_norm": 1.6385635232751372, |
| "learning_rate": 6.806966618287374e-06, |
| "loss": 0.6692, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.06971677559912855, |
| "grad_norm": 1.4945507177883908, |
| "learning_rate": 6.952104499274311e-06, |
| "loss": 0.6484, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.07116920842411038, |
| "grad_norm": 1.583857774726375, |
| "learning_rate": 7.097242380261249e-06, |
| "loss": 0.657, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.07262164124909223, |
| "grad_norm": 1.8780189334850588, |
| "learning_rate": 7.242380261248186e-06, |
| "loss": 0.6601, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.07407407407407407, |
| "grad_norm": 1.5153409007972507, |
| "learning_rate": 7.387518142235124e-06, |
| "loss": 0.6542, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.07552650689905592, |
| "grad_norm": 1.5243833834622142, |
| "learning_rate": 7.532656023222062e-06, |
| "loss": 0.6476, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.07697893972403776, |
| "grad_norm": 1.6429693792028686, |
| "learning_rate": 7.677793904208998e-06, |
| "loss": 0.6451, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.0784313725490196, |
| "grad_norm": 1.802860360098263, |
| "learning_rate": 7.822931785195936e-06, |
| "loss": 0.6527, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.07988380537400146, |
| "grad_norm": 1.6594363957156038, |
| "learning_rate": 7.968069666182874e-06, |
| "loss": 0.661, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.0813362381989833, |
| "grad_norm": 1.5938255936259151, |
| "learning_rate": 8.113207547169812e-06, |
| "loss": 0.6547, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.08278867102396514, |
| "grad_norm": 1.3939924292770436, |
| "learning_rate": 8.25834542815675e-06, |
| "loss": 0.6609, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.08424110384894698, |
| "grad_norm": 1.5321796462771227, |
| "learning_rate": 8.403483309143687e-06, |
| "loss": 0.6419, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.08569353667392883, |
| "grad_norm": 1.5907007682060863, |
| "learning_rate": 8.548621190130625e-06, |
| "loss": 0.625, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.08714596949891068, |
| "grad_norm": 1.6048966671231157, |
| "learning_rate": 8.693759071117563e-06, |
| "loss": 0.658, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.08859840232389252, |
| "grad_norm": 1.457751877262412, |
| "learning_rate": 8.8388969521045e-06, |
| "loss": 0.6456, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.09005083514887437, |
| "grad_norm": 1.3925725985786772, |
| "learning_rate": 8.984034833091438e-06, |
| "loss": 0.6494, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.0915032679738562, |
| "grad_norm": 1.6476815627809678, |
| "learning_rate": 9.129172714078376e-06, |
| "loss": 0.6604, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.09295570079883805, |
| "grad_norm": 1.4844043302240553, |
| "learning_rate": 9.274310595065312e-06, |
| "loss": 0.6462, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.0944081336238199, |
| "grad_norm": 1.5541257847812342, |
| "learning_rate": 9.41944847605225e-06, |
| "loss": 0.6464, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.09586056644880174, |
| "grad_norm": 1.5339956751582804, |
| "learning_rate": 9.564586357039188e-06, |
| "loss": 0.6471, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.09731299927378359, |
| "grad_norm": 1.550006983868159, |
| "learning_rate": 9.709724238026126e-06, |
| "loss": 0.6519, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.09876543209876543, |
| "grad_norm": 1.298622779401985, |
| "learning_rate": 9.854862119013063e-06, |
| "loss": 0.6508, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.10021786492374728, |
| "grad_norm": 1.4545201677417376, |
| "learning_rate": 1e-05, |
| "loss": 0.6483, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.10167029774872913, |
| "grad_norm": 1.7514454450540817, |
| "learning_rate": 9.999935728859667e-06, |
| "loss": 0.6517, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.10312273057371096, |
| "grad_norm": 1.3010290416328456, |
| "learning_rate": 9.999742917090981e-06, |
| "loss": 0.6435, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.10457516339869281, |
| "grad_norm": 1.5222737445349914, |
| "learning_rate": 9.999421569650833e-06, |
| "loss": 0.6355, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.10602759622367465, |
| "grad_norm": 1.5758824439402839, |
| "learning_rate": 9.99897169480057e-06, |
| "loss": 0.6414, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.1074800290486565, |
| "grad_norm": 1.3245458819453462, |
| "learning_rate": 9.99839330410578e-06, |
| "loss": 0.6416, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.10893246187363835, |
| "grad_norm": 1.4753577499137038, |
| "learning_rate": 9.997686412435996e-06, |
| "loss": 0.6381, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.11038489469862019, |
| "grad_norm": 1.4578988593383, |
| "learning_rate": 9.99685103796431e-06, |
| "loss": 0.6369, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.11183732752360204, |
| "grad_norm": 1.389881220599468, |
| "learning_rate": 9.99588720216691e-06, |
| "loss": 0.6622, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.11328976034858387, |
| "grad_norm": 1.2318560606230133, |
| "learning_rate": 9.994794929822527e-06, |
| "loss": 0.6279, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.11474219317356572, |
| "grad_norm": 1.355472620629438, |
| "learning_rate": 9.993574249011797e-06, |
| "loss": 0.641, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.11619462599854757, |
| "grad_norm": 1.4379602146139996, |
| "learning_rate": 9.992225191116538e-06, |
| "loss": 0.6439, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.11764705882352941, |
| "grad_norm": 1.4777958226910466, |
| "learning_rate": 9.990747790818946e-06, |
| "loss": 0.6457, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.11909949164851126, |
| "grad_norm": 1.2895229336241503, |
| "learning_rate": 9.989142086100703e-06, |
| "loss": 0.6483, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.1205519244734931, |
| "grad_norm": 1.4811460587250382, |
| "learning_rate": 9.987408118241995e-06, |
| "loss": 0.6509, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.12200435729847495, |
| "grad_norm": 1.3189208191268318, |
| "learning_rate": 9.985545931820463e-06, |
| "loss": 0.6181, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.12345679012345678, |
| "grad_norm": 1.3731300368595278, |
| "learning_rate": 9.983555574710043e-06, |
| "loss": 0.6274, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.12490922294843863, |
| "grad_norm": 1.4055775942483093, |
| "learning_rate": 9.981437098079743e-06, |
| "loss": 0.6398, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.12636165577342048, |
| "grad_norm": 1.3307192435974602, |
| "learning_rate": 9.979190556392326e-06, |
| "loss": 0.6393, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.12781408859840232, |
| "grad_norm": 1.5622917958142868, |
| "learning_rate": 9.976816007402912e-06, |
| "loss": 0.6456, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.12926652142338416, |
| "grad_norm": 1.390636406480548, |
| "learning_rate": 9.974313512157488e-06, |
| "loss": 0.6288, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.13071895424836602, |
| "grad_norm": 1.4427250843896926, |
| "learning_rate": 9.971683134991344e-06, |
| "loss": 0.6266, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.13217138707334786, |
| "grad_norm": 1.4098179198178282, |
| "learning_rate": 9.968924943527418e-06, |
| "loss": 0.6411, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.1336238198983297, |
| "grad_norm": 1.4962238363929918, |
| "learning_rate": 9.96603900867455e-06, |
| "loss": 0.6315, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.13507625272331156, |
| "grad_norm": 1.3209044251278015, |
| "learning_rate": 9.963025404625673e-06, |
| "loss": 0.6423, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.1365286855482934, |
| "grad_norm": 1.39955503516968, |
| "learning_rate": 9.959884208855893e-06, |
| "loss": 0.6361, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.13798111837327523, |
| "grad_norm": 1.5348970475105241, |
| "learning_rate": 9.956615502120504e-06, |
| "loss": 0.6241, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.1394335511982571, |
| "grad_norm": 1.48874630945738, |
| "learning_rate": 9.953219368452908e-06, |
| "loss": 0.631, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.14088598402323893, |
| "grad_norm": 1.310857282598366, |
| "learning_rate": 9.949695895162464e-06, |
| "loss": 0.627, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.14233841684822077, |
| "grad_norm": 1.3619342578169393, |
| "learning_rate": 9.946045172832224e-06, |
| "loss": 0.6387, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.1437908496732026, |
| "grad_norm": 1.4936986486504984, |
| "learning_rate": 9.942267295316625e-06, |
| "loss": 0.6331, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.14524328249818447, |
| "grad_norm": 1.32511584393411, |
| "learning_rate": 9.938362359739068e-06, |
| "loss": 0.626, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.1466957153231663, |
| "grad_norm": 1.3291454266011833, |
| "learning_rate": 9.934330466489414e-06, |
| "loss": 0.6451, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.14814814814814814, |
| "grad_norm": 1.3289648153139675, |
| "learning_rate": 9.930171719221418e-06, |
| "loss": 0.6333, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.14960058097313, |
| "grad_norm": 1.3388955314518605, |
| "learning_rate": 9.925886224850047e-06, |
| "loss": 0.6329, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.15105301379811184, |
| "grad_norm": 1.3788458990043229, |
| "learning_rate": 9.921474093548748e-06, |
| "loss": 0.6308, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.15250544662309368, |
| "grad_norm": 1.2630947233952987, |
| "learning_rate": 9.916935438746604e-06, |
| "loss": 0.6366, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.1539578794480755, |
| "grad_norm": 1.2586848110727198, |
| "learning_rate": 9.912270377125424e-06, |
| "loss": 0.6224, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.15541031227305738, |
| "grad_norm": 1.5648142512317709, |
| "learning_rate": 9.90747902861674e-06, |
| "loss": 0.6261, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.1568627450980392, |
| "grad_norm": 1.477705850244199, |
| "learning_rate": 9.902561516398723e-06, |
| "loss": 0.6207, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.15831517792302105, |
| "grad_norm": 1.2950681154644361, |
| "learning_rate": 9.897517966893023e-06, |
| "loss": 0.6218, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.15976761074800291, |
| "grad_norm": 1.4613516139089748, |
| "learning_rate": 9.892348509761509e-06, |
| "loss": 0.6237, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.16122004357298475, |
| "grad_norm": 1.2641419484176866, |
| "learning_rate": 9.887053277902943e-06, |
| "loss": 0.6425, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.1626724763979666, |
| "grad_norm": 1.2419109246681843, |
| "learning_rate": 9.881632407449561e-06, |
| "loss": 0.6423, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.16412490922294845, |
| "grad_norm": 1.4096648257937974, |
| "learning_rate": 9.876086037763575e-06, |
| "loss": 0.6383, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.1655773420479303, |
| "grad_norm": 1.2574892255736747, |
| "learning_rate": 9.870414311433585e-06, |
| "loss": 0.6059, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.16702977487291212, |
| "grad_norm": 1.2716145459010044, |
| "learning_rate": 9.86461737427092e-06, |
| "loss": 0.6098, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.16848220769789396, |
| "grad_norm": 1.1998298755084313, |
| "learning_rate": 9.858695375305885e-06, |
| "loss": 0.6214, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.16993464052287582, |
| "grad_norm": 1.4281449888166444, |
| "learning_rate": 9.852648466783927e-06, |
| "loss": 0.6241, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.17138707334785766, |
| "grad_norm": 1.4071764477667867, |
| "learning_rate": 9.84647680416173e-06, |
| "loss": 0.6474, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.1728395061728395, |
| "grad_norm": 1.2174453861834778, |
| "learning_rate": 9.840180546103215e-06, |
| "loss": 0.6326, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.17429193899782136, |
| "grad_norm": 1.3029300772595094, |
| "learning_rate": 9.833759854475453e-06, |
| "loss": 0.6185, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.1757443718228032, |
| "grad_norm": 1.271112016193465, |
| "learning_rate": 9.827214894344514e-06, |
| "loss": 0.6301, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.17719680464778503, |
| "grad_norm": 1.2997276991719462, |
| "learning_rate": 9.82054583397122e-06, |
| "loss": 0.6317, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.1786492374727669, |
| "grad_norm": 1.2096030387104992, |
| "learning_rate": 9.813752844806814e-06, |
| "loss": 0.6159, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.18010167029774873, |
| "grad_norm": 1.2973416257944899, |
| "learning_rate": 9.806836101488561e-06, |
| "loss": 0.6289, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.18155410312273057, |
| "grad_norm": 1.3197440048632956, |
| "learning_rate": 9.799795781835253e-06, |
| "loss": 0.6088, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.1830065359477124, |
| "grad_norm": 1.2535036782710556, |
| "learning_rate": 9.79263206684264e-06, |
| "loss": 0.6206, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.18445896877269427, |
| "grad_norm": 1.3190252094745194, |
| "learning_rate": 9.785345140678775e-06, |
| "loss": 0.6149, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.1859114015976761, |
| "grad_norm": 1.3148617882447478, |
| "learning_rate": 9.777935190679277e-06, |
| "loss": 0.6134, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.18736383442265794, |
| "grad_norm": 1.3368521794263946, |
| "learning_rate": 9.770402407342524e-06, |
| "loss": 0.6258, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.1888162672476398, |
| "grad_norm": 1.3941700458180073, |
| "learning_rate": 9.762746984324743e-06, |
| "loss": 0.6191, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.19026870007262164, |
| "grad_norm": 1.3152403546822757, |
| "learning_rate": 9.754969118435043e-06, |
| "loss": 0.6446, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.19172113289760348, |
| "grad_norm": 1.3013626770341264, |
| "learning_rate": 9.747069009630347e-06, |
| "loss": 0.6312, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.19317356572258534, |
| "grad_norm": 1.3966383885583535, |
| "learning_rate": 9.739046861010255e-06, |
| "loss": 0.6207, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.19462599854756718, |
| "grad_norm": 1.1439991746974036, |
| "learning_rate": 9.730902878811825e-06, |
| "loss": 0.6144, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.19607843137254902, |
| "grad_norm": 1.3540894709055364, |
| "learning_rate": 9.722637272404263e-06, |
| "loss": 0.6044, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.19753086419753085, |
| "grad_norm": 1.100639588271217, |
| "learning_rate": 9.71425025428355e-06, |
| "loss": 0.6036, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.19898329702251272, |
| "grad_norm": 1.1874319432290736, |
| "learning_rate": 9.705742040066977e-06, |
| "loss": 0.6039, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.20043572984749455, |
| "grad_norm": 1.1767671647303808, |
| "learning_rate": 9.697112848487591e-06, |
| "loss": 0.6376, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.2018881626724764, |
| "grad_norm": 1.135879944041461, |
| "learning_rate": 9.688362901388586e-06, |
| "loss": 0.6035, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.20334059549745825, |
| "grad_norm": 1.2315910796359388, |
| "learning_rate": 9.679492423717596e-06, |
| "loss": 0.6098, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.2047930283224401, |
| "grad_norm": 1.4949408462288012, |
| "learning_rate": 9.670501643520904e-06, |
| "loss": 0.6203, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.20624546114742193, |
| "grad_norm": 1.3180181445795711, |
| "learning_rate": 9.66139079193759e-06, |
| "loss": 0.6286, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.20769789397240376, |
| "grad_norm": 1.2616556885045909, |
| "learning_rate": 9.652160103193583e-06, |
| "loss": 0.6274, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.20915032679738563, |
| "grad_norm": 1.3174449455574337, |
| "learning_rate": 9.642809814595637e-06, |
| "loss": 0.6136, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.21060275962236746, |
| "grad_norm": 1.296735377133819, |
| "learning_rate": 9.633340166525238e-06, |
| "loss": 0.6145, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.2120551924473493, |
| "grad_norm": 1.2502497833244608, |
| "learning_rate": 9.62375140243242e-06, |
| "loss": 0.6031, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.21350762527233116, |
| "grad_norm": 1.2288830705505374, |
| "learning_rate": 9.6140437688295e-06, |
| "loss": 0.6128, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.214960058097313, |
| "grad_norm": 1.1119473380240397, |
| "learning_rate": 9.604217515284753e-06, |
| "loss": 0.6171, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.21641249092229484, |
| "grad_norm": 1.2070397164389806, |
| "learning_rate": 9.594272894415986e-06, |
| "loss": 0.6238, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.2178649237472767, |
| "grad_norm": 1.3345637205372078, |
| "learning_rate": 9.584210161884049e-06, |
| "loss": 0.6163, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.21931735657225854, |
| "grad_norm": 1.1385043759036517, |
| "learning_rate": 9.57402957638626e-06, |
| "loss": 0.6083, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.22076978939724037, |
| "grad_norm": 1.1936988121465326, |
| "learning_rate": 9.563731399649756e-06, |
| "loss": 0.5992, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.2222222222222222, |
| "grad_norm": 1.4103572503621762, |
| "learning_rate": 9.553315896424758e-06, |
| "loss": 0.6054, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.22367465504720407, |
| "grad_norm": 1.3209719950503893, |
| "learning_rate": 9.54278333447778e-06, |
| "loss": 0.596, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.2251270878721859, |
| "grad_norm": 1.1693016501696898, |
| "learning_rate": 9.532133984584721e-06, |
| "loss": 0.6323, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.22657952069716775, |
| "grad_norm": 1.1691510921859125, |
| "learning_rate": 9.521368120523931e-06, |
| "loss": 0.6027, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.2280319535221496, |
| "grad_norm": 1.2114364957172101, |
| "learning_rate": 9.510486019069154e-06, |
| "loss": 0.6245, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.22948438634713145, |
| "grad_norm": 1.265123327235345, |
| "learning_rate": 9.499487959982415e-06, |
| "loss": 0.6189, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.23093681917211328, |
| "grad_norm": 1.3773059483594046, |
| "learning_rate": 9.488374226006836e-06, |
| "loss": 0.6106, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.23238925199709515, |
| "grad_norm": 1.2737618179619303, |
| "learning_rate": 9.477145102859357e-06, |
| "loss": 0.6115, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.23384168482207698, |
| "grad_norm": 1.3066121502077, |
| "learning_rate": 9.4658008792234e-06, |
| "loss": 0.609, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.23529411764705882, |
| "grad_norm": 1.242518893517758, |
| "learning_rate": 9.45434184674144e-06, |
| "loss": 0.6, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.23674655047204066, |
| "grad_norm": 1.2493334973003818, |
| "learning_rate": 9.442768300007511e-06, |
| "loss": 0.6144, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.23819898329702252, |
| "grad_norm": 1.2775874117960886, |
| "learning_rate": 9.431080536559631e-06, |
| "loss": 0.6245, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.23965141612200436, |
| "grad_norm": 1.247039996382283, |
| "learning_rate": 9.419278856872154e-06, |
| "loss": 0.6279, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.2411038489469862, |
| "grad_norm": 1.302601682600637, |
| "learning_rate": 9.407363564348047e-06, |
| "loss": 0.5933, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.24255628177196806, |
| "grad_norm": 1.431347455463815, |
| "learning_rate": 9.39533496531108e-06, |
| "loss": 0.6171, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.2440087145969499, |
| "grad_norm": 1.2527655662771335, |
| "learning_rate": 9.38319336899797e-06, |
| "loss": 0.6099, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.24546114742193173, |
| "grad_norm": 1.205551788839019, |
| "learning_rate": 9.370939087550407e-06, |
| "loss": 0.6077, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.24691358024691357, |
| "grad_norm": 1.332981320431861, |
| "learning_rate": 9.358572436007052e-06, |
| "loss": 0.6126, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.24836601307189543, |
| "grad_norm": 1.2112905977700383, |
| "learning_rate": 9.346093732295422e-06, |
| "loss": 0.6141, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.24981844589687727, |
| "grad_norm": 1.1741115783770129, |
| "learning_rate": 9.333503297223725e-06, |
| "loss": 0.5977, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.2512708787218591, |
| "grad_norm": 1.2308239868942004, |
| "learning_rate": 9.320801454472607e-06, |
| "loss": 0.6213, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.25272331154684097, |
| "grad_norm": 1.3933258283474292, |
| "learning_rate": 9.30798853058684e-06, |
| "loss": 0.6217, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.2541757443718228, |
| "grad_norm": 1.2467959691205432, |
| "learning_rate": 9.29506485496691e-06, |
| "loss": 0.6089, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.25562817719680464, |
| "grad_norm": 1.106847677662664, |
| "learning_rate": 9.282030759860566e-06, |
| "loss": 0.6113, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.2570806100217865, |
| "grad_norm": 1.225606521070107, |
| "learning_rate": 9.268886580354272e-06, |
| "loss": 0.6041, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.2585330428467683, |
| "grad_norm": 1.1249241718792773, |
| "learning_rate": 9.255632654364591e-06, |
| "loss": 0.6112, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.2599854756717502, |
| "grad_norm": 1.2347205288363368, |
| "learning_rate": 9.242269322629494e-06, |
| "loss": 0.6003, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.26143790849673204, |
| "grad_norm": 1.3040805105750026, |
| "learning_rate": 9.228796928699613e-06, |
| "loss": 0.6187, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.26289034132171385, |
| "grad_norm": 1.4585670240799034, |
| "learning_rate": 9.215215818929392e-06, |
| "loss": 0.612, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.2643427741466957, |
| "grad_norm": 1.0974130075617774, |
| "learning_rate": 9.201526342468202e-06, |
| "loss": 0.6124, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.2657952069716776, |
| "grad_norm": 1.2918051377461068, |
| "learning_rate": 9.18772885125134e-06, |
| "loss": 0.6055, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.2672476397966594, |
| "grad_norm": 1.199609927095931, |
| "learning_rate": 9.17382369999101e-06, |
| "loss": 0.6086, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.26870007262164125, |
| "grad_norm": 1.2736244478450063, |
| "learning_rate": 9.159811246167182e-06, |
| "loss": 0.6111, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.2701525054466231, |
| "grad_norm": 1.2484696326393374, |
| "learning_rate": 9.14569185001841e-06, |
| "loss": 0.5951, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.2716049382716049, |
| "grad_norm": 1.3221301583704237, |
| "learning_rate": 9.131465874532568e-06, |
| "loss": 0.5861, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.2730573710965868, |
| "grad_norm": 1.2578322361866867, |
| "learning_rate": 9.117133685437524e-06, |
| "loss": 0.6073, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.27450980392156865, |
| "grad_norm": 1.3260698149158467, |
| "learning_rate": 9.102695651191737e-06, |
| "loss": 0.5838, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.27596223674655046, |
| "grad_norm": 1.2373193794097532, |
| "learning_rate": 9.088152142974771e-06, |
| "loss": 0.6013, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.2774146695715323, |
| "grad_norm": 1.1997047870357698, |
| "learning_rate": 9.073503534677773e-06, |
| "loss": 0.6219, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.2788671023965142, |
| "grad_norm": 1.2769112952981858, |
| "learning_rate": 9.058750202893844e-06, |
| "loss": 0.6052, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.280319535221496, |
| "grad_norm": 1.2302296498321919, |
| "learning_rate": 9.04389252690837e-06, |
| "loss": 0.6124, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.28177196804647786, |
| "grad_norm": 1.2009594091858158, |
| "learning_rate": 9.02893088868926e-06, |
| "loss": 0.604, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.28322440087145967, |
| "grad_norm": 1.0539872600155336, |
| "learning_rate": 9.013865672877133e-06, |
| "loss": 0.6052, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.28467683369644153, |
| "grad_norm": 1.2561895098497668, |
| "learning_rate": 8.998697266775433e-06, |
| "loss": 0.6077, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.2861292665214234, |
| "grad_norm": 1.2763583417414128, |
| "learning_rate": 8.98342606034046e-06, |
| "loss": 0.6059, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.2875816993464052, |
| "grad_norm": 1.1463184995763767, |
| "learning_rate": 8.96805244617135e-06, |
| "loss": 0.6183, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.28903413217138707, |
| "grad_norm": 1.1421597790792624, |
| "learning_rate": 8.952576819499998e-06, |
| "loss": 0.602, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.29048656499636893, |
| "grad_norm": 1.3046866547593934, |
| "learning_rate": 8.93699957818087e-06, |
| "loss": 0.5925, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.29193899782135074, |
| "grad_norm": 1.27239619384718, |
| "learning_rate": 8.921321122680789e-06, |
| "loss": 0.6037, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.2933914306463326, |
| "grad_norm": 1.3073284462474046, |
| "learning_rate": 8.905541856068641e-06, |
| "loss": 0.6077, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.29484386347131447, |
| "grad_norm": 1.2694028140938955, |
| "learning_rate": 8.889662184005007e-06, |
| "loss": 0.6076, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.2962962962962963, |
| "grad_norm": 1.1075058528848678, |
| "learning_rate": 8.873682514731746e-06, |
| "loss": 0.5986, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.29774872912127814, |
| "grad_norm": 1.25011183641691, |
| "learning_rate": 8.85760325906148e-06, |
| "loss": 0.5911, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.29920116194626, |
| "grad_norm": 1.230690665069067, |
| "learning_rate": 8.841424830367051e-06, |
| "loss": 0.5918, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.3006535947712418, |
| "grad_norm": 1.2143851276582127, |
| "learning_rate": 8.82514764457088e-06, |
| "loss": 0.6026, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.3021060275962237, |
| "grad_norm": 1.1711415813258073, |
| "learning_rate": 8.808772120134286e-06, |
| "loss": 0.6208, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.30355846042120554, |
| "grad_norm": 1.2105658122447378, |
| "learning_rate": 8.79229867804672e-06, |
| "loss": 0.6178, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.30501089324618735, |
| "grad_norm": 1.260614604486508, |
| "learning_rate": 8.775727741814945e-06, |
| "loss": 0.6033, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.3064633260711692, |
| "grad_norm": 1.1949196588242055, |
| "learning_rate": 8.75905973745215e-06, |
| "loss": 0.5954, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.307915758896151, |
| "grad_norm": 1.2358431757504627, |
| "learning_rate": 8.742295093466993e-06, |
| "loss": 0.5929, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.3093681917211329, |
| "grad_norm": 1.1788915626896657, |
| "learning_rate": 8.725434240852586e-06, |
| "loss": 0.6014, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.31082062454611475, |
| "grad_norm": 1.2899429468502281, |
| "learning_rate": 8.708477613075422e-06, |
| "loss": 0.588, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.31227305737109656, |
| "grad_norm": 1.0436767601630443, |
| "learning_rate": 8.691425646064222e-06, |
| "loss": 0.6128, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.3137254901960784, |
| "grad_norm": 1.1823668694466984, |
| "learning_rate": 8.674278778198731e-06, |
| "loss": 0.5939, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.3151779230210603, |
| "grad_norm": 1.2287777612088193, |
| "learning_rate": 8.657037450298449e-06, |
| "loss": 0.5942, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.3166303558460421, |
| "grad_norm": 1.1210160142803036, |
| "learning_rate": 8.6397021056113e-06, |
| "loss": 0.6068, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.31808278867102396, |
| "grad_norm": 1.176574092958882, |
| "learning_rate": 8.622273189802231e-06, |
| "loss": 0.6099, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.31953522149600583, |
| "grad_norm": 1.2276623152067967, |
| "learning_rate": 8.604751150941758e-06, |
| "loss": 0.598, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.32098765432098764, |
| "grad_norm": 1.2049029589388036, |
| "learning_rate": 8.58713643949445e-06, |
| "loss": 0.5934, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.3224400871459695, |
| "grad_norm": 1.2650704032924422, |
| "learning_rate": 8.569429508307345e-06, |
| "loss": 0.6039, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.32389251997095136, |
| "grad_norm": 1.088534753663297, |
| "learning_rate": 8.551630812598303e-06, |
| "loss": 0.6038, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.3253449527959332, |
| "grad_norm": 1.1678210415173849, |
| "learning_rate": 8.533740809944317e-06, |
| "loss": 0.6084, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.32679738562091504, |
| "grad_norm": 1.251355519441971, |
| "learning_rate": 8.515759960269731e-06, |
| "loss": 0.5975, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.3282498184458969, |
| "grad_norm": 1.1662322522769242, |
| "learning_rate": 8.497688725834432e-06, |
| "loss": 0.6106, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.3297022512708787, |
| "grad_norm": 1.336372713961502, |
| "learning_rate": 8.479527571221957e-06, |
| "loss": 0.6224, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.3311546840958606, |
| "grad_norm": 1.148371532122775, |
| "learning_rate": 8.461276963327555e-06, |
| "loss": 0.607, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.33260711692084244, |
| "grad_norm": 1.3691981401078914, |
| "learning_rate": 8.442937371346174e-06, |
| "loss": 0.6001, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.33405954974582425, |
| "grad_norm": 1.3343569533197541, |
| "learning_rate": 8.424509266760413e-06, |
| "loss": 0.6009, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.3355119825708061, |
| "grad_norm": 1.0903008241967769, |
| "learning_rate": 8.405993123328388e-06, |
| "loss": 0.5852, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.3369644153957879, |
| "grad_norm": 1.2770798153391716, |
| "learning_rate": 8.387389417071565e-06, |
| "loss": 0.5967, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.3384168482207698, |
| "grad_norm": 1.1893611624135727, |
| "learning_rate": 8.368698626262506e-06, |
| "loss": 0.5906, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.33986928104575165, |
| "grad_norm": 1.1182656055274527, |
| "learning_rate": 8.349921231412588e-06, |
| "loss": 0.6144, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.34132171387073346, |
| "grad_norm": 1.1569225334439495, |
| "learning_rate": 8.331057715259643e-06, |
| "loss": 0.5945, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.3427741466957153, |
| "grad_norm": 1.0553585361032343, |
| "learning_rate": 8.312108562755547e-06, |
| "loss": 0.6012, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.3442265795206972, |
| "grad_norm": 1.0429439932782214, |
| "learning_rate": 8.29307426105376e-06, |
| "loss": 0.602, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.345679012345679, |
| "grad_norm": 1.0397368512389722, |
| "learning_rate": 8.273955299496787e-06, |
| "loss": 0.5932, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.34713144517066086, |
| "grad_norm": 1.0989788243486265, |
| "learning_rate": 8.254752169603614e-06, |
| "loss": 0.5987, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.3485838779956427, |
| "grad_norm": 1.2513128657031618, |
| "learning_rate": 8.235465365057067e-06, |
| "loss": 0.597, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.35003631082062453, |
| "grad_norm": 1.2696804086094644, |
| "learning_rate": 8.21609538169111e-06, |
| "loss": 0.5962, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.3514887436456064, |
| "grad_norm": 1.3765675743894579, |
| "learning_rate": 8.196642717478113e-06, |
| "loss": 0.6083, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.35294117647058826, |
| "grad_norm": 1.1525716644685924, |
| "learning_rate": 8.177107872516041e-06, |
| "loss": 0.5912, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.35439360929557007, |
| "grad_norm": 1.1930516036081553, |
| "learning_rate": 8.157491349015599e-06, |
| "loss": 0.601, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.35584604212055193, |
| "grad_norm": 1.3453249916774477, |
| "learning_rate": 8.137793651287317e-06, |
| "loss": 0.62, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.3572984749455338, |
| "grad_norm": 1.216543063547056, |
| "learning_rate": 8.118015285728598e-06, |
| "loss": 0.6037, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.3587509077705156, |
| "grad_norm": 1.129394528084983, |
| "learning_rate": 8.098156760810683e-06, |
| "loss": 0.598, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.36020334059549747, |
| "grad_norm": 1.124156367954234, |
| "learning_rate": 8.078218587065589e-06, |
| "loss": 0.5813, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.3616557734204793, |
| "grad_norm": 1.2039082584679666, |
| "learning_rate": 8.058201277072981e-06, |
| "loss": 0.5876, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.36310820624546114, |
| "grad_norm": 1.1919842026488203, |
| "learning_rate": 8.038105345446994e-06, |
| "loss": 0.6115, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.364560639070443, |
| "grad_norm": 1.2851968482663827, |
| "learning_rate": 8.017931308823006e-06, |
| "loss": 0.592, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.3660130718954248, |
| "grad_norm": 1.1538243634302991, |
| "learning_rate": 7.997679685844353e-06, |
| "loss": 0.5867, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.3674655047204067, |
| "grad_norm": 1.0704432112589999, |
| "learning_rate": 7.977350997148994e-06, |
| "loss": 0.6007, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.36891793754538854, |
| "grad_norm": 1.2707334756597408, |
| "learning_rate": 7.956945765356133e-06, |
| "loss": 0.5746, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.37037037037037035, |
| "grad_norm": 1.2061421625898763, |
| "learning_rate": 7.936464515052776e-06, |
| "loss": 0.601, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.3718228031953522, |
| "grad_norm": 1.318015728266432, |
| "learning_rate": 7.915907772780244e-06, |
| "loss": 0.6081, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.3732752360203341, |
| "grad_norm": 1.253197445356757, |
| "learning_rate": 7.89527606702065e-06, |
| "loss": 0.6046, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.3747276688453159, |
| "grad_norm": 1.190199765539676, |
| "learning_rate": 7.87456992818329e-06, |
| "loss": 0.5986, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.37618010167029775, |
| "grad_norm": 1.193398450040499, |
| "learning_rate": 7.853789888591032e-06, |
| "loss": 0.5889, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.3776325344952796, |
| "grad_norm": 1.035053671117003, |
| "learning_rate": 7.832936482466612e-06, |
| "loss": 0.5934, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.3790849673202614, |
| "grad_norm": 1.1386993400574172, |
| "learning_rate": 7.812010245918903e-06, |
| "loss": 0.586, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.3805374001452433, |
| "grad_norm": 1.1022458257608025, |
| "learning_rate": 7.79101171692914e-06, |
| "loss": 0.5806, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.38198983297022515, |
| "grad_norm": 1.1758543851880188, |
| "learning_rate": 7.769941435337083e-06, |
| "loss": 0.5618, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.38344226579520696, |
| "grad_norm": 1.2426818455480244, |
| "learning_rate": 7.748799942827147e-06, |
| "loss": 0.6012, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.3848946986201888, |
| "grad_norm": 1.0718204571931684, |
| "learning_rate": 7.72758778291446e-06, |
| "loss": 0.5887, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.3863471314451707, |
| "grad_norm": 1.0289005823465374, |
| "learning_rate": 7.706305500930909e-06, |
| "loss": 0.6037, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.3877995642701525, |
| "grad_norm": 1.2478985029233107, |
| "learning_rate": 7.684953644011103e-06, |
| "loss": 0.584, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.38925199709513436, |
| "grad_norm": 1.1066991243562059, |
| "learning_rate": 7.66353276107832e-06, |
| "loss": 0.6007, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.39070442992011617, |
| "grad_norm": 1.2345614999374477, |
| "learning_rate": 7.64204340283039e-06, |
| "loss": 0.6033, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.39215686274509803, |
| "grad_norm": 1.0798799696274017, |
| "learning_rate": 7.620486121725536e-06, |
| "loss": 0.59, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.3936092955700799, |
| "grad_norm": 1.1600968806836478, |
| "learning_rate": 7.598861471968174e-06, |
| "loss": 0.5948, |
| "step": 2710 |
| }, |
| { |
| "epoch": 0.3950617283950617, |
| "grad_norm": 1.1860847221048887, |
| "learning_rate": 7.577170009494665e-06, |
| "loss": 0.5981, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.39651416122004357, |
| "grad_norm": 1.0670434364146835, |
| "learning_rate": 7.555412291959018e-06, |
| "loss": 0.5772, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.39796659404502543, |
| "grad_norm": 1.1865817610815497, |
| "learning_rate": 7.533588878718561e-06, |
| "loss": 0.584, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.39941902687000724, |
| "grad_norm": 1.2092053148497965, |
| "learning_rate": 7.511700330819556e-06, |
| "loss": 0.5832, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.4008714596949891, |
| "grad_norm": 1.1770338237370501, |
| "learning_rate": 7.489747210982777e-06, |
| "loss": 0.5984, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.40232389251997097, |
| "grad_norm": 1.1434774901575833, |
| "learning_rate": 7.4677300835890424e-06, |
| "loss": 0.5755, |
| "step": 2770 |
| }, |
| { |
| "epoch": 0.4037763253449528, |
| "grad_norm": 1.0366368031771818, |
| "learning_rate": 7.445649514664703e-06, |
| "loss": 0.5886, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.40522875816993464, |
| "grad_norm": 1.2729396302065998, |
| "learning_rate": 7.423506071867101e-06, |
| "loss": 0.6134, |
| "step": 2790 |
| }, |
| { |
| "epoch": 0.4066811909949165, |
| "grad_norm": 1.0518352889412923, |
| "learning_rate": 7.401300324469961e-06, |
| "loss": 0.5737, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.4081336238198983, |
| "grad_norm": 1.2001944481237583, |
| "learning_rate": 7.3790328433487665e-06, |
| "loss": 0.5874, |
| "step": 2810 |
| }, |
| { |
| "epoch": 0.4095860566448802, |
| "grad_norm": 1.250231920993964, |
| "learning_rate": 7.3567042009660786e-06, |
| "loss": 0.5862, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.41103848946986205, |
| "grad_norm": 1.1512872210708966, |
| "learning_rate": 7.3343149713568215e-06, |
| "loss": 0.593, |
| "step": 2830 |
| }, |
| { |
| "epoch": 0.41249092229484385, |
| "grad_norm": 1.1605256860138091, |
| "learning_rate": 7.311865730113525e-06, |
| "loss": 0.5939, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.4139433551198257, |
| "grad_norm": 1.3940208410225592, |
| "learning_rate": 7.2893570543715174e-06, |
| "loss": 0.6028, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.4153957879448075, |
| "grad_norm": 1.1976078557092422, |
| "learning_rate": 7.266789522794104e-06, |
| "loss": 0.6065, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.4168482207697894, |
| "grad_norm": 1.035110243445679, |
| "learning_rate": 7.244163715557683e-06, |
| "loss": 0.5915, |
| "step": 2870 |
| }, |
| { |
| "epoch": 0.41830065359477125, |
| "grad_norm": 1.1865073190747897, |
| "learning_rate": 7.2214802143368225e-06, |
| "loss": 0.5961, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.41975308641975306, |
| "grad_norm": 1.0991372561424138, |
| "learning_rate": 7.1987396022893216e-06, |
| "loss": 0.5857, |
| "step": 2890 |
| }, |
| { |
| "epoch": 0.4212055192447349, |
| "grad_norm": 1.0801243737112538, |
| "learning_rate": 7.175942464041209e-06, |
| "loss": 0.5829, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.4226579520697168, |
| "grad_norm": 1.3295568712189132, |
| "learning_rate": 7.15308938567171e-06, |
| "loss": 0.5869, |
| "step": 2910 |
| }, |
| { |
| "epoch": 0.4241103848946986, |
| "grad_norm": 1.0402363831702612, |
| "learning_rate": 7.130180954698187e-06, |
| "loss": 0.5842, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.42556281771968046, |
| "grad_norm": 1.1031276144488775, |
| "learning_rate": 7.107217760061036e-06, |
| "loss": 0.5923, |
| "step": 2930 |
| }, |
| { |
| "epoch": 0.42701525054466233, |
| "grad_norm": 1.183086396688286, |
| "learning_rate": 7.0842003921085376e-06, |
| "loss": 0.6053, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.42846768336964414, |
| "grad_norm": 1.244303339507363, |
| "learning_rate": 7.061129442581685e-06, |
| "loss": 0.5924, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.429920116194626, |
| "grad_norm": 1.2478572360385807, |
| "learning_rate": 7.038005504598975e-06, |
| "loss": 0.5922, |
| "step": 2960 |
| }, |
| { |
| "epoch": 0.43137254901960786, |
| "grad_norm": 1.0447681879549313, |
| "learning_rate": 7.0148291726411486e-06, |
| "loss": 0.5825, |
| "step": 2970 |
| }, |
| { |
| "epoch": 0.4328249818445897, |
| "grad_norm": 1.1025428022026995, |
| "learning_rate": 6.9916010425359214e-06, |
| "loss": 0.5956, |
| "step": 2980 |
| }, |
| { |
| "epoch": 0.43427741466957154, |
| "grad_norm": 1.329010163267056, |
| "learning_rate": 6.968321711442658e-06, |
| "loss": 0.5772, |
| "step": 2990 |
| }, |
| { |
| "epoch": 0.4357298474945534, |
| "grad_norm": 1.2330587975332181, |
| "learning_rate": 6.9449917778370216e-06, |
| "loss": 0.5933, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.4371822803195352, |
| "grad_norm": 1.1656344009683823, |
| "learning_rate": 6.921611841495584e-06, |
| "loss": 0.5922, |
| "step": 3010 |
| }, |
| { |
| "epoch": 0.4386347131445171, |
| "grad_norm": 1.2709734185927093, |
| "learning_rate": 6.898182503480414e-06, |
| "loss": 0.5911, |
| "step": 3020 |
| }, |
| { |
| "epoch": 0.4400871459694989, |
| "grad_norm": 1.269770194129687, |
| "learning_rate": 6.8747043661236215e-06, |
| "loss": 0.6103, |
| "step": 3030 |
| }, |
| { |
| "epoch": 0.44153957879448075, |
| "grad_norm": 1.106713465551905, |
| "learning_rate": 6.851178033011869e-06, |
| "loss": 0.5997, |
| "step": 3040 |
| }, |
| { |
| "epoch": 0.4429920116194626, |
| "grad_norm": 1.1985970638971495, |
| "learning_rate": 6.82760410897086e-06, |
| "loss": 0.5727, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.4444444444444444, |
| "grad_norm": 1.1259472634689607, |
| "learning_rate": 6.8039832000497865e-06, |
| "loss": 0.5983, |
| "step": 3060 |
| }, |
| { |
| "epoch": 0.4458968772694263, |
| "grad_norm": 1.212189906596056, |
| "learning_rate": 6.78031591350575e-06, |
| "loss": 0.5958, |
| "step": 3070 |
| }, |
| { |
| "epoch": 0.44734931009440815, |
| "grad_norm": 1.0999728539824523, |
| "learning_rate": 6.756602857788148e-06, |
| "loss": 0.5717, |
| "step": 3080 |
| }, |
| { |
| "epoch": 0.44880174291938996, |
| "grad_norm": 1.1130187014726358, |
| "learning_rate": 6.732844642523032e-06, |
| "loss": 0.5793, |
| "step": 3090 |
| }, |
| { |
| "epoch": 0.4502541757443718, |
| "grad_norm": 1.075132513625087, |
| "learning_rate": 6.70904187849744e-06, |
| "loss": 0.562, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.4517066085693537, |
| "grad_norm": 1.2147850552839328, |
| "learning_rate": 6.685195177643684e-06, |
| "loss": 0.5978, |
| "step": 3110 |
| }, |
| { |
| "epoch": 0.4531590413943355, |
| "grad_norm": 1.2836246837826484, |
| "learning_rate": 6.661305153023628e-06, |
| "loss": 0.5912, |
| "step": 3120 |
| }, |
| { |
| "epoch": 0.45461147421931736, |
| "grad_norm": 1.1766776836268427, |
| "learning_rate": 6.637372418812921e-06, |
| "loss": 0.586, |
| "step": 3130 |
| }, |
| { |
| "epoch": 0.4560639070442992, |
| "grad_norm": 1.3613669267848012, |
| "learning_rate": 6.613397590285211e-06, |
| "loss": 0.5998, |
| "step": 3140 |
| }, |
| { |
| "epoch": 0.45751633986928103, |
| "grad_norm": 1.2051701552338834, |
| "learning_rate": 6.589381283796325e-06, |
| "loss": 0.5812, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.4589687726942629, |
| "grad_norm": 1.1519365736041338, |
| "learning_rate": 6.565324116768428e-06, |
| "loss": 0.583, |
| "step": 3160 |
| }, |
| { |
| "epoch": 0.46042120551924476, |
| "grad_norm": 1.1475917123110242, |
| "learning_rate": 6.54122670767414e-06, |
| "loss": 0.5765, |
| "step": 3170 |
| }, |
| { |
| "epoch": 0.46187363834422657, |
| "grad_norm": 1.088676956077236, |
| "learning_rate": 6.517089676020648e-06, |
| "loss": 0.5997, |
| "step": 3180 |
| }, |
| { |
| "epoch": 0.46332607116920843, |
| "grad_norm": 1.1195203213303881, |
| "learning_rate": 6.492913642333768e-06, |
| "loss": 0.565, |
| "step": 3190 |
| }, |
| { |
| "epoch": 0.4647785039941903, |
| "grad_norm": 1.0927178103796473, |
| "learning_rate": 6.468699228142004e-06, |
| "loss": 0.5988, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.4662309368191721, |
| "grad_norm": 1.1180323598233408, |
| "learning_rate": 6.444447055960559e-06, |
| "loss": 0.6034, |
| "step": 3210 |
| }, |
| { |
| "epoch": 0.46768336964415397, |
| "grad_norm": 1.1581218721076667, |
| "learning_rate": 6.420157749275341e-06, |
| "loss": 0.5792, |
| "step": 3220 |
| }, |
| { |
| "epoch": 0.4691358024691358, |
| "grad_norm": 1.2355006071990586, |
| "learning_rate": 6.395831932526924e-06, |
| "loss": 0.5914, |
| "step": 3230 |
| }, |
| { |
| "epoch": 0.47058823529411764, |
| "grad_norm": 1.2628642644632941, |
| "learning_rate": 6.371470231094498e-06, |
| "loss": 0.5972, |
| "step": 3240 |
| }, |
| { |
| "epoch": 0.4720406681190995, |
| "grad_norm": 1.30372441555249, |
| "learning_rate": 6.3470732712798e-06, |
| "loss": 0.5943, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.4734931009440813, |
| "grad_norm": 1.2732465621842586, |
| "learning_rate": 6.322641680290997e-06, |
| "loss": 0.59, |
| "step": 3260 |
| }, |
| { |
| "epoch": 0.4749455337690632, |
| "grad_norm": 1.1957460012906904, |
| "learning_rate": 6.298176086226577e-06, |
| "loss": 0.5908, |
| "step": 3270 |
| }, |
| { |
| "epoch": 0.47639796659404504, |
| "grad_norm": 1.2666436895215651, |
| "learning_rate": 6.273677118059192e-06, |
| "loss": 0.579, |
| "step": 3280 |
| }, |
| { |
| "epoch": 0.47785039941902685, |
| "grad_norm": 1.1740612442844354, |
| "learning_rate": 6.24914540561949e-06, |
| "loss": 0.5849, |
| "step": 3290 |
| }, |
| { |
| "epoch": 0.4793028322440087, |
| "grad_norm": 1.170368029656733, |
| "learning_rate": 6.2245815795799235e-06, |
| "loss": 0.5914, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.4807552650689906, |
| "grad_norm": 1.060432274782722, |
| "learning_rate": 6.199986271438536e-06, |
| "loss": 0.5692, |
| "step": 3310 |
| }, |
| { |
| "epoch": 0.4822076978939724, |
| "grad_norm": 1.133481629336483, |
| "learning_rate": 6.17536011350273e-06, |
| "loss": 0.5789, |
| "step": 3320 |
| }, |
| { |
| "epoch": 0.48366013071895425, |
| "grad_norm": 1.0779584839433474, |
| "learning_rate": 6.150703738873004e-06, |
| "loss": 0.5815, |
| "step": 3330 |
| }, |
| { |
| "epoch": 0.4851125635439361, |
| "grad_norm": 1.138478981177591, |
| "learning_rate": 6.1260177814266855e-06, |
| "loss": 0.5754, |
| "step": 3340 |
| }, |
| { |
| "epoch": 0.4865649963689179, |
| "grad_norm": 1.1290987276585867, |
| "learning_rate": 6.101302875801628e-06, |
| "loss": 0.5778, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.4880174291938998, |
| "grad_norm": 1.1468009205478524, |
| "learning_rate": 6.0765596573798994e-06, |
| "loss": 0.5689, |
| "step": 3360 |
| }, |
| { |
| "epoch": 0.48946986201888165, |
| "grad_norm": 1.0683998313181482, |
| "learning_rate": 6.051788762271442e-06, |
| "loss": 0.5692, |
| "step": 3370 |
| }, |
| { |
| "epoch": 0.49092229484386346, |
| "grad_norm": 1.1889646870467425, |
| "learning_rate": 6.0269908272977295e-06, |
| "loss": 0.5808, |
| "step": 3380 |
| }, |
| { |
| "epoch": 0.4923747276688453, |
| "grad_norm": 1.2529890364621932, |
| "learning_rate": 6.002166489975385e-06, |
| "loss": 0.5772, |
| "step": 3390 |
| }, |
| { |
| "epoch": 0.49382716049382713, |
| "grad_norm": 1.1925487080641164, |
| "learning_rate": 5.977316388499794e-06, |
| "loss": 0.5862, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.495279593318809, |
| "grad_norm": 1.1372201366075154, |
| "learning_rate": 5.952441161728701e-06, |
| "loss": 0.5662, |
| "step": 3410 |
| }, |
| { |
| "epoch": 0.49673202614379086, |
| "grad_norm": 1.2981299245914195, |
| "learning_rate": 5.927541449165783e-06, |
| "loss": 0.5682, |
| "step": 3420 |
| }, |
| { |
| "epoch": 0.49818445896877267, |
| "grad_norm": 1.1198285033650917, |
| "learning_rate": 5.902617890944207e-06, |
| "loss": 0.5894, |
| "step": 3430 |
| }, |
| { |
| "epoch": 0.49963689179375453, |
| "grad_norm": 1.1442459802118357, |
| "learning_rate": 5.8776711278101765e-06, |
| "loss": 0.5735, |
| "step": 3440 |
| }, |
| { |
| "epoch": 0.5010893246187363, |
| "grad_norm": 1.10045421098352, |
| "learning_rate": 5.852701801106458e-06, |
| "loss": 0.5838, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.5025417574437182, |
| "grad_norm": 1.1675311387395517, |
| "learning_rate": 5.82771055275589e-06, |
| "loss": 0.5847, |
| "step": 3460 |
| }, |
| { |
| "epoch": 0.5039941902687001, |
| "grad_norm": 1.0028532762834719, |
| "learning_rate": 5.802698025244886e-06, |
| "loss": 0.5656, |
| "step": 3470 |
| }, |
| { |
| "epoch": 0.5054466230936819, |
| "grad_norm": 1.028656973511835, |
| "learning_rate": 5.777664861606912e-06, |
| "loss": 0.5871, |
| "step": 3480 |
| }, |
| { |
| "epoch": 0.5068990559186638, |
| "grad_norm": 1.2007383871296113, |
| "learning_rate": 5.752611705405957e-06, |
| "loss": 0.5895, |
| "step": 3490 |
| }, |
| { |
| "epoch": 0.5083514887436456, |
| "grad_norm": 1.1281898149999334, |
| "learning_rate": 5.7275392007199896e-06, |
| "loss": 0.573, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.5098039215686274, |
| "grad_norm": 1.282146433020574, |
| "learning_rate": 5.702447992124394e-06, |
| "loss": 0.57, |
| "step": 3510 |
| }, |
| { |
| "epoch": 0.5112563543936093, |
| "grad_norm": 1.05801689608913, |
| "learning_rate": 5.677338724675406e-06, |
| "loss": 0.5751, |
| "step": 3520 |
| }, |
| { |
| "epoch": 0.5127087872185911, |
| "grad_norm": 1.2511793245069922, |
| "learning_rate": 5.652212043893528e-06, |
| "loss": 0.5805, |
| "step": 3530 |
| }, |
| { |
| "epoch": 0.514161220043573, |
| "grad_norm": 1.2496537928999953, |
| "learning_rate": 5.627068595746931e-06, |
| "loss": 0.5734, |
| "step": 3540 |
| }, |
| { |
| "epoch": 0.5156136528685549, |
| "grad_norm": 1.0586939290192166, |
| "learning_rate": 5.601909026634846e-06, |
| "loss": 0.573, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.5170660856935366, |
| "grad_norm": 1.2135072197108623, |
| "learning_rate": 5.576733983370955e-06, |
| "loss": 0.5696, |
| "step": 3560 |
| }, |
| { |
| "epoch": 0.5185185185185185, |
| "grad_norm": 1.096951604322022, |
| "learning_rate": 5.551544113166752e-06, |
| "loss": 0.5764, |
| "step": 3570 |
| }, |
| { |
| "epoch": 0.5199709513435004, |
| "grad_norm": 1.067656908278471, |
| "learning_rate": 5.5263400636149104e-06, |
| "loss": 0.5945, |
| "step": 3580 |
| }, |
| { |
| "epoch": 0.5214233841684822, |
| "grad_norm": 1.2528345132805765, |
| "learning_rate": 5.50112248267263e-06, |
| "loss": 0.5698, |
| "step": 3590 |
| }, |
| { |
| "epoch": 0.5228758169934641, |
| "grad_norm": 1.153586426579592, |
| "learning_rate": 5.475892018644989e-06, |
| "loss": 0.5939, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.524328249818446, |
| "grad_norm": 1.321281822598792, |
| "learning_rate": 5.450649320168263e-06, |
| "loss": 0.5764, |
| "step": 3610 |
| }, |
| { |
| "epoch": 0.5257806826434277, |
| "grad_norm": 1.1546247883125684, |
| "learning_rate": 5.4253950361932565e-06, |
| "loss": 0.5698, |
| "step": 3620 |
| }, |
| { |
| "epoch": 0.5272331154684096, |
| "grad_norm": 1.3090075714265825, |
| "learning_rate": 5.400129815968623e-06, |
| "loss": 0.58, |
| "step": 3630 |
| }, |
| { |
| "epoch": 0.5286855482933914, |
| "grad_norm": 1.3546772950978652, |
| "learning_rate": 5.374854309024167e-06, |
| "loss": 0.5906, |
| "step": 3640 |
| }, |
| { |
| "epoch": 0.5301379811183733, |
| "grad_norm": 1.0728126839197956, |
| "learning_rate": 5.349569165154153e-06, |
| "loss": 0.5617, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.5315904139433552, |
| "grad_norm": 1.0481388119854531, |
| "learning_rate": 5.32427503440059e-06, |
| "loss": 0.5752, |
| "step": 3660 |
| }, |
| { |
| "epoch": 0.533042846768337, |
| "grad_norm": 1.251734474368655, |
| "learning_rate": 5.29897256703653e-06, |
| "loss": 0.577, |
| "step": 3670 |
| }, |
| { |
| "epoch": 0.5344952795933188, |
| "grad_norm": 1.1273771235496188, |
| "learning_rate": 5.2736624135493465e-06, |
| "loss": 0.5604, |
| "step": 3680 |
| }, |
| { |
| "epoch": 0.5359477124183006, |
| "grad_norm": 1.1728285082039356, |
| "learning_rate": 5.248345224624007e-06, |
| "loss": 0.5799, |
| "step": 3690 |
| }, |
| { |
| "epoch": 0.5374001452432825, |
| "grad_norm": 1.1207082347004158, |
| "learning_rate": 5.223021651126356e-06, |
| "loss": 0.5792, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.5388525780682644, |
| "grad_norm": 1.096111126610637, |
| "learning_rate": 5.197692344086369e-06, |
| "loss": 0.582, |
| "step": 3710 |
| }, |
| { |
| "epoch": 0.5403050108932462, |
| "grad_norm": 1.1432895144261512, |
| "learning_rate": 5.172357954681427e-06, |
| "loss": 0.5669, |
| "step": 3720 |
| }, |
| { |
| "epoch": 0.541757443718228, |
| "grad_norm": 1.2795186578480655, |
| "learning_rate": 5.147019134219569e-06, |
| "loss": 0.5727, |
| "step": 3730 |
| }, |
| { |
| "epoch": 0.5432098765432098, |
| "grad_norm": 1.1497619263404009, |
| "learning_rate": 5.121676534122746e-06, |
| "loss": 0.5665, |
| "step": 3740 |
| }, |
| { |
| "epoch": 0.5446623093681917, |
| "grad_norm": 1.053760679670929, |
| "learning_rate": 5.096330805910085e-06, |
| "loss": 0.5758, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.5461147421931736, |
| "grad_norm": 1.2455461930319618, |
| "learning_rate": 5.0709826011811246e-06, |
| "loss": 0.5715, |
| "step": 3760 |
| }, |
| { |
| "epoch": 0.5475671750181554, |
| "grad_norm": 1.2714142743729588, |
| "learning_rate": 5.045632571599076e-06, |
| "loss": 0.5764, |
| "step": 3770 |
| }, |
| { |
| "epoch": 0.5490196078431373, |
| "grad_norm": 1.2596602396359573, |
| "learning_rate": 5.020281368874063e-06, |
| "loss": 0.5777, |
| "step": 3780 |
| }, |
| { |
| "epoch": 0.5504720406681191, |
| "grad_norm": 1.096076072807335, |
| "learning_rate": 4.994929644746366e-06, |
| "loss": 0.5752, |
| "step": 3790 |
| }, |
| { |
| "epoch": 0.5519244734931009, |
| "grad_norm": 1.1180419407959938, |
| "learning_rate": 4.969578050969675e-06, |
| "loss": 0.5783, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.5533769063180828, |
| "grad_norm": 1.1457632992717688, |
| "learning_rate": 4.944227239294327e-06, |
| "loss": 0.5706, |
| "step": 3810 |
| }, |
| { |
| "epoch": 0.5548293391430646, |
| "grad_norm": 1.0431686309314605, |
| "learning_rate": 4.918877861450553e-06, |
| "loss": 0.5629, |
| "step": 3820 |
| }, |
| { |
| "epoch": 0.5562817719680465, |
| "grad_norm": 1.1033442319502207, |
| "learning_rate": 4.893530569131716e-06, |
| "loss": 0.5611, |
| "step": 3830 |
| }, |
| { |
| "epoch": 0.5577342047930284, |
| "grad_norm": 1.1929600913303742, |
| "learning_rate": 4.8681860139775745e-06, |
| "loss": 0.568, |
| "step": 3840 |
| }, |
| { |
| "epoch": 0.5591866376180101, |
| "grad_norm": 1.281488846532093, |
| "learning_rate": 4.842844847557508e-06, |
| "loss": 0.5882, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.560639070442992, |
| "grad_norm": 1.1195048036816224, |
| "learning_rate": 4.817507721353785e-06, |
| "loss": 0.596, |
| "step": 3860 |
| }, |
| { |
| "epoch": 0.5620915032679739, |
| "grad_norm": 1.1077419816516767, |
| "learning_rate": 4.792175286744802e-06, |
| "loss": 0.5747, |
| "step": 3870 |
| }, |
| { |
| "epoch": 0.5635439360929557, |
| "grad_norm": 1.3502747193694702, |
| "learning_rate": 4.766848194988344e-06, |
| "loss": 0.5915, |
| "step": 3880 |
| }, |
| { |
| "epoch": 0.5649963689179376, |
| "grad_norm": 1.001203957804234, |
| "learning_rate": 4.741527097204837e-06, |
| "loss": 0.5732, |
| "step": 3890 |
| }, |
| { |
| "epoch": 0.5664488017429193, |
| "grad_norm": 1.1428305709772093, |
| "learning_rate": 4.7162126443606145e-06, |
| "loss": 0.5682, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.5679012345679012, |
| "grad_norm": 1.220191866232699, |
| "learning_rate": 4.690905487251174e-06, |
| "loss": 0.5695, |
| "step": 3910 |
| }, |
| { |
| "epoch": 0.5693536673928831, |
| "grad_norm": 1.0555952997249456, |
| "learning_rate": 4.665606276484455e-06, |
| "loss": 0.5684, |
| "step": 3920 |
| }, |
| { |
| "epoch": 0.5708061002178649, |
| "grad_norm": 1.1675138439049109, |
| "learning_rate": 4.6403156624641085e-06, |
| "loss": 0.5876, |
| "step": 3930 |
| }, |
| { |
| "epoch": 0.5722585330428468, |
| "grad_norm": 1.2418849374572543, |
| "learning_rate": 4.615034295372777e-06, |
| "loss": 0.5838, |
| "step": 3940 |
| }, |
| { |
| "epoch": 0.5737109658678287, |
| "grad_norm": 1.0616817293128535, |
| "learning_rate": 4.589762825155374e-06, |
| "loss": 0.57, |
| "step": 3950 |
| }, |
| { |
| "epoch": 0.5751633986928104, |
| "grad_norm": 1.2414737852232787, |
| "learning_rate": 4.564501901502386e-06, |
| "loss": 0.5521, |
| "step": 3960 |
| }, |
| { |
| "epoch": 0.5766158315177923, |
| "grad_norm": 1.0962764476368352, |
| "learning_rate": 4.5392521738331585e-06, |
| "loss": 0.5761, |
| "step": 3970 |
| }, |
| { |
| "epoch": 0.5780682643427741, |
| "grad_norm": 1.2445755051746221, |
| "learning_rate": 4.514014291279208e-06, |
| "loss": 0.5612, |
| "step": 3980 |
| }, |
| { |
| "epoch": 0.579520697167756, |
| "grad_norm": 1.1248791169953434, |
| "learning_rate": 4.488788902667534e-06, |
| "loss": 0.5651, |
| "step": 3990 |
| }, |
| { |
| "epoch": 0.5809731299927379, |
| "grad_norm": 1.1052395709597995, |
| "learning_rate": 4.463576656503927e-06, |
| "loss": 0.5624, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.5824255628177197, |
| "grad_norm": 1.0979993545936089, |
| "learning_rate": 4.438378200956318e-06, |
| "loss": 0.5747, |
| "step": 4010 |
| }, |
| { |
| "epoch": 0.5838779956427015, |
| "grad_norm": 1.1585156096079503, |
| "learning_rate": 4.413194183838091e-06, |
| "loss": 0.5757, |
| "step": 4020 |
| }, |
| { |
| "epoch": 0.5853304284676834, |
| "grad_norm": 1.0657343307419072, |
| "learning_rate": 4.388025252591448e-06, |
| "loss": 0.5826, |
| "step": 4030 |
| }, |
| { |
| "epoch": 0.5867828612926652, |
| "grad_norm": 1.1584399941372348, |
| "learning_rate": 4.362872054270753e-06, |
| "loss": 0.561, |
| "step": 4040 |
| }, |
| { |
| "epoch": 0.5882352941176471, |
| "grad_norm": 1.1136815017444102, |
| "learning_rate": 4.337735235525904e-06, |
| "loss": 0.5801, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.5896877269426289, |
| "grad_norm": 1.2048049573288624, |
| "learning_rate": 4.312615442585699e-06, |
| "loss": 0.5748, |
| "step": 4060 |
| }, |
| { |
| "epoch": 0.5911401597676107, |
| "grad_norm": 1.106968794623351, |
| "learning_rate": 4.287513321241237e-06, |
| "loss": 0.5665, |
| "step": 4070 |
| }, |
| { |
| "epoch": 0.5925925925925926, |
| "grad_norm": 1.0773536810915454, |
| "learning_rate": 4.262429516829299e-06, |
| "loss": 0.5739, |
| "step": 4080 |
| }, |
| { |
| "epoch": 0.5940450254175744, |
| "grad_norm": 1.2780512286596586, |
| "learning_rate": 4.237364674215774e-06, |
| "loss": 0.573, |
| "step": 4090 |
| }, |
| { |
| "epoch": 0.5954974582425563, |
| "grad_norm": 1.015175880325257, |
| "learning_rate": 4.212319437779066e-06, |
| "loss": 0.5637, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.5969498910675382, |
| "grad_norm": 1.1403330329394572, |
| "learning_rate": 4.187294451393541e-06, |
| "loss": 0.5807, |
| "step": 4110 |
| }, |
| { |
| "epoch": 0.59840232389252, |
| "grad_norm": 1.1083139371642667, |
| "learning_rate": 4.162290358412962e-06, |
| "loss": 0.5704, |
| "step": 4120 |
| }, |
| { |
| "epoch": 0.5998547567175018, |
| "grad_norm": 1.1372343052927192, |
| "learning_rate": 4.1373078016539535e-06, |
| "loss": 0.5559, |
| "step": 4130 |
| }, |
| { |
| "epoch": 0.6013071895424836, |
| "grad_norm": 1.2137905963682751, |
| "learning_rate": 4.1123474233794845e-06, |
| "loss": 0.5588, |
| "step": 4140 |
| }, |
| { |
| "epoch": 0.6027596223674655, |
| "grad_norm": 1.2130103389722957, |
| "learning_rate": 4.087409865282341e-06, |
| "loss": 0.5776, |
| "step": 4150 |
| }, |
| { |
| "epoch": 0.6042120551924474, |
| "grad_norm": 1.21914550825707, |
| "learning_rate": 4.062495768468646e-06, |
| "loss": 0.5618, |
| "step": 4160 |
| }, |
| { |
| "epoch": 0.6056644880174292, |
| "grad_norm": 1.1540562248868875, |
| "learning_rate": 4.03760577344136e-06, |
| "loss": 0.5784, |
| "step": 4170 |
| }, |
| { |
| "epoch": 0.6071169208424111, |
| "grad_norm": 1.214796762228358, |
| "learning_rate": 4.012740520083832e-06, |
| "loss": 0.5814, |
| "step": 4180 |
| }, |
| { |
| "epoch": 0.6085693536673928, |
| "grad_norm": 1.157806370832285, |
| "learning_rate": 3.987900647643334e-06, |
| "loss": 0.5791, |
| "step": 4190 |
| }, |
| { |
| "epoch": 0.6100217864923747, |
| "grad_norm": 1.1517956672556253, |
| "learning_rate": 3.963086794714639e-06, |
| "loss": 0.5652, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.6114742193173566, |
| "grad_norm": 1.1605789001720612, |
| "learning_rate": 3.9382995992235955e-06, |
| "loss": 0.5728, |
| "step": 4210 |
| }, |
| { |
| "epoch": 0.6129266521423384, |
| "grad_norm": 1.0630436480054268, |
| "learning_rate": 3.913539698410734e-06, |
| "loss": 0.5684, |
| "step": 4220 |
| }, |
| { |
| "epoch": 0.6143790849673203, |
| "grad_norm": 1.175513347812724, |
| "learning_rate": 3.888807728814874e-06, |
| "loss": 0.5664, |
| "step": 4230 |
| }, |
| { |
| "epoch": 0.615831517792302, |
| "grad_norm": 1.1583525329647688, |
| "learning_rate": 3.864104326256775e-06, |
| "loss": 0.5805, |
| "step": 4240 |
| }, |
| { |
| "epoch": 0.6172839506172839, |
| "grad_norm": 1.1058170223844426, |
| "learning_rate": 3.8394301258227756e-06, |
| "loss": 0.5622, |
| "step": 4250 |
| }, |
| { |
| "epoch": 0.6187363834422658, |
| "grad_norm": 1.2295319541574912, |
| "learning_rate": 3.814785761848475e-06, |
| "loss": 0.5583, |
| "step": 4260 |
| }, |
| { |
| "epoch": 0.6201888162672476, |
| "grad_norm": 1.092280135001415, |
| "learning_rate": 3.790171867902426e-06, |
| "loss": 0.5755, |
| "step": 4270 |
| }, |
| { |
| "epoch": 0.6216412490922295, |
| "grad_norm": 1.274653674496685, |
| "learning_rate": 3.7655890767698384e-06, |
| "loss": 0.5729, |
| "step": 4280 |
| }, |
| { |
| "epoch": 0.6230936819172114, |
| "grad_norm": 1.2166924621577075, |
| "learning_rate": 3.741038020436323e-06, |
| "loss": 0.5572, |
| "step": 4290 |
| }, |
| { |
| "epoch": 0.6245461147421931, |
| "grad_norm": 1.0296689666125658, |
| "learning_rate": 3.7165193300716297e-06, |
| "loss": 0.5664, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.625998547567175, |
| "grad_norm": 1.0530929308425294, |
| "learning_rate": 3.6920336360134378e-06, |
| "loss": 0.5679, |
| "step": 4310 |
| }, |
| { |
| "epoch": 0.6274509803921569, |
| "grad_norm": 1.1137539642969592, |
| "learning_rate": 3.6675815677511382e-06, |
| "loss": 0.5607, |
| "step": 4320 |
| }, |
| { |
| "epoch": 0.6289034132171387, |
| "grad_norm": 1.0875536687719785, |
| "learning_rate": 3.6431637539096565e-06, |
| "loss": 0.5691, |
| "step": 4330 |
| }, |
| { |
| "epoch": 0.6303558460421206, |
| "grad_norm": 1.1268225507247402, |
| "learning_rate": 3.6187808222332852e-06, |
| "loss": 0.5668, |
| "step": 4340 |
| }, |
| { |
| "epoch": 0.6318082788671024, |
| "grad_norm": 1.1757316218974525, |
| "learning_rate": 3.594433399569559e-06, |
| "loss": 0.5551, |
| "step": 4350 |
| }, |
| { |
| "epoch": 0.6332607116920842, |
| "grad_norm": 1.1554119314408926, |
| "learning_rate": 3.5701221118531195e-06, |
| "loss": 0.5785, |
| "step": 4360 |
| }, |
| { |
| "epoch": 0.6347131445170661, |
| "grad_norm": 1.0947128171930913, |
| "learning_rate": 3.5458475840896434e-06, |
| "loss": 0.5677, |
| "step": 4370 |
| }, |
| { |
| "epoch": 0.6361655773420479, |
| "grad_norm": 1.2477952532418557, |
| "learning_rate": 3.5216104403397623e-06, |
| "loss": 0.5504, |
| "step": 4380 |
| }, |
| { |
| "epoch": 0.6376180101670298, |
| "grad_norm": 1.1149755483280817, |
| "learning_rate": 3.4974113037030257e-06, |
| "loss": 0.5753, |
| "step": 4390 |
| }, |
| { |
| "epoch": 0.6390704429920117, |
| "grad_norm": 1.214526641921585, |
| "learning_rate": 3.473250796301874e-06, |
| "loss": 0.5669, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.6405228758169934, |
| "grad_norm": 1.1149175312128623, |
| "learning_rate": 3.4491295392656497e-06, |
| "loss": 0.5604, |
| "step": 4410 |
| }, |
| { |
| "epoch": 0.6419753086419753, |
| "grad_norm": 1.1763746140746527, |
| "learning_rate": 3.425048152714635e-06, |
| "loss": 0.5651, |
| "step": 4420 |
| }, |
| { |
| "epoch": 0.6434277414669571, |
| "grad_norm": 1.169802661186734, |
| "learning_rate": 3.4010072557440967e-06, |
| "loss": 0.5685, |
| "step": 4430 |
| }, |
| { |
| "epoch": 0.644880174291939, |
| "grad_norm": 1.1404701148865375, |
| "learning_rate": 3.3770074664083827e-06, |
| "loss": 0.577, |
| "step": 4440 |
| }, |
| { |
| "epoch": 0.6463326071169209, |
| "grad_norm": 1.2951511455390947, |
| "learning_rate": 3.353049401705022e-06, |
| "loss": 0.5546, |
| "step": 4450 |
| }, |
| { |
| "epoch": 0.6477850399419027, |
| "grad_norm": 1.2188858191779428, |
| "learning_rate": 3.329133677558873e-06, |
| "loss": 0.5697, |
| "step": 4460 |
| }, |
| { |
| "epoch": 0.6492374727668845, |
| "grad_norm": 1.1239635889524127, |
| "learning_rate": 3.3052609088062767e-06, |
| "loss": 0.5901, |
| "step": 4470 |
| }, |
| { |
| "epoch": 0.6506899055918663, |
| "grad_norm": 1.0931476283773633, |
| "learning_rate": 3.281431709179264e-06, |
| "loss": 0.566, |
| "step": 4480 |
| }, |
| { |
| "epoch": 0.6521423384168482, |
| "grad_norm": 1.4718901865939953, |
| "learning_rate": 3.2576466912897674e-06, |
| "loss": 0.5761, |
| "step": 4490 |
| }, |
| { |
| "epoch": 0.6535947712418301, |
| "grad_norm": 1.2062192465520678, |
| "learning_rate": 3.2339064666138783e-06, |
| "loss": 0.5757, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.6550472040668119, |
| "grad_norm": 1.2732571104572175, |
| "learning_rate": 3.2102116454761168e-06, |
| "loss": 0.5615, |
| "step": 4510 |
| }, |
| { |
| "epoch": 0.6564996368917938, |
| "grad_norm": 1.198522063919598, |
| "learning_rate": 3.1865628370337575e-06, |
| "loss": 0.5632, |
| "step": 4520 |
| }, |
| { |
| "epoch": 0.6579520697167756, |
| "grad_norm": 1.208764455797361, |
| "learning_rate": 3.162960649261152e-06, |
| "loss": 0.5472, |
| "step": 4530 |
| }, |
| { |
| "epoch": 0.6594045025417574, |
| "grad_norm": 1.2300085896818644, |
| "learning_rate": 3.1394056889341086e-06, |
| "loss": 0.5737, |
| "step": 4540 |
| }, |
| { |
| "epoch": 0.6608569353667393, |
| "grad_norm": 1.2362227883984134, |
| "learning_rate": 3.1158985616142944e-06, |
| "loss": 0.5467, |
| "step": 4550 |
| }, |
| { |
| "epoch": 0.6623093681917211, |
| "grad_norm": 1.2577141886691818, |
| "learning_rate": 3.092439871633658e-06, |
| "loss": 0.5652, |
| "step": 4560 |
| }, |
| { |
| "epoch": 0.663761801016703, |
| "grad_norm": 1.2246719550977323, |
| "learning_rate": 3.0690302220789036e-06, |
| "loss": 0.564, |
| "step": 4570 |
| }, |
| { |
| "epoch": 0.6652142338416849, |
| "grad_norm": 0.952770111510269, |
| "learning_rate": 3.0456702147759797e-06, |
| "loss": 0.5538, |
| "step": 4580 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 1.2114290005968387, |
| "learning_rate": 3.0223604502746097e-06, |
| "loss": 0.5624, |
| "step": 4590 |
| }, |
| { |
| "epoch": 0.6681190994916485, |
| "grad_norm": 1.2379634249474247, |
| "learning_rate": 2.999101527832849e-06, |
| "loss": 0.5581, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.6695715323166304, |
| "grad_norm": 1.2432970361649818, |
| "learning_rate": 2.9758940454016893e-06, |
| "loss": 0.5519, |
| "step": 4610 |
| }, |
| { |
| "epoch": 0.6710239651416122, |
| "grad_norm": 1.1827840525798392, |
| "learning_rate": 2.9527385996096702e-06, |
| "loss": 0.5512, |
| "step": 4620 |
| }, |
| { |
| "epoch": 0.6724763979665941, |
| "grad_norm": 1.1313263342846276, |
| "learning_rate": 2.929635785747558e-06, |
| "loss": 0.5615, |
| "step": 4630 |
| }, |
| { |
| "epoch": 0.6739288307915758, |
| "grad_norm": 1.0718626125088186, |
| "learning_rate": 2.9065861977530263e-06, |
| "loss": 0.5577, |
| "step": 4640 |
| }, |
| { |
| "epoch": 0.6753812636165577, |
| "grad_norm": 1.2058366328226908, |
| "learning_rate": 2.8835904281953984e-06, |
| "loss": 0.5543, |
| "step": 4650 |
| }, |
| { |
| "epoch": 0.6768336964415396, |
| "grad_norm": 1.2044090066060698, |
| "learning_rate": 2.8606490682604083e-06, |
| "loss": 0.563, |
| "step": 4660 |
| }, |
| { |
| "epoch": 0.6782861292665214, |
| "grad_norm": 1.2440783490748353, |
| "learning_rate": 2.837762707734999e-06, |
| "loss": 0.5678, |
| "step": 4670 |
| }, |
| { |
| "epoch": 0.6797385620915033, |
| "grad_norm": 1.1447619754452882, |
| "learning_rate": 2.8149319349921678e-06, |
| "loss": 0.5443, |
| "step": 4680 |
| }, |
| { |
| "epoch": 0.6811909949164852, |
| "grad_norm": 1.0682059420594845, |
| "learning_rate": 2.7921573369758344e-06, |
| "loss": 0.5548, |
| "step": 4690 |
| }, |
| { |
| "epoch": 0.6826434277414669, |
| "grad_norm": 1.0786981942796325, |
| "learning_rate": 2.769439499185752e-06, |
| "loss": 0.557, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.6840958605664488, |
| "grad_norm": 1.1021974391300458, |
| "learning_rate": 2.7467790056624565e-06, |
| "loss": 0.5641, |
| "step": 4710 |
| }, |
| { |
| "epoch": 0.6855482933914306, |
| "grad_norm": 1.172642324603278, |
| "learning_rate": 2.7241764389722536e-06, |
| "loss": 0.5579, |
| "step": 4720 |
| }, |
| { |
| "epoch": 0.6870007262164125, |
| "grad_norm": 1.1739344769196898, |
| "learning_rate": 2.7016323801922327e-06, |
| "loss": 0.5426, |
| "step": 4730 |
| }, |
| { |
| "epoch": 0.6884531590413944, |
| "grad_norm": 1.0908808031509236, |
| "learning_rate": 2.679147408895349e-06, |
| "loss": 0.5667, |
| "step": 4740 |
| }, |
| { |
| "epoch": 0.6899055918663762, |
| "grad_norm": 1.1345661062696517, |
| "learning_rate": 2.6567221031354907e-06, |
| "loss": 0.5639, |
| "step": 4750 |
| }, |
| { |
| "epoch": 0.691358024691358, |
| "grad_norm": 1.0249096917283105, |
| "learning_rate": 2.634357039432656e-06, |
| "loss": 0.5648, |
| "step": 4760 |
| }, |
| { |
| "epoch": 0.6928104575163399, |
| "grad_norm": 1.1583880032183098, |
| "learning_rate": 2.612052792758095e-06, |
| "loss": 0.5651, |
| "step": 4770 |
| }, |
| { |
| "epoch": 0.6942628903413217, |
| "grad_norm": 1.069684864764473, |
| "learning_rate": 2.5898099365195626e-06, |
| "loss": 0.5722, |
| "step": 4780 |
| }, |
| { |
| "epoch": 0.6957153231663036, |
| "grad_norm": 1.0867414593247826, |
| "learning_rate": 2.5676290425465496e-06, |
| "loss": 0.5664, |
| "step": 4790 |
| }, |
| { |
| "epoch": 0.6971677559912854, |
| "grad_norm": 1.1375716473128172, |
| "learning_rate": 2.5455106810755957e-06, |
| "loss": 0.5585, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.6986201888162672, |
| "grad_norm": 1.034623153574018, |
| "learning_rate": 2.5234554207356266e-06, |
| "loss": 0.5722, |
| "step": 4810 |
| }, |
| { |
| "epoch": 0.7000726216412491, |
| "grad_norm": 1.0654655922639538, |
| "learning_rate": 2.5014638285333357e-06, |
| "loss": 0.5643, |
| "step": 4820 |
| }, |
| { |
| "epoch": 0.7015250544662309, |
| "grad_norm": 1.0988829596394427, |
| "learning_rate": 2.479536469838606e-06, |
| "loss": 0.5635, |
| "step": 4830 |
| }, |
| { |
| "epoch": 0.7029774872912128, |
| "grad_norm": 1.050301540250255, |
| "learning_rate": 2.4576739083699764e-06, |
| "loss": 0.55, |
| "step": 4840 |
| }, |
| { |
| "epoch": 0.7044299201161947, |
| "grad_norm": 1.3185971209726384, |
| "learning_rate": 2.43587670618015e-06, |
| "loss": 0.5686, |
| "step": 4850 |
| }, |
| { |
| "epoch": 0.7058823529411765, |
| "grad_norm": 1.1036440984293434, |
| "learning_rate": 2.4141454236415428e-06, |
| "loss": 0.5617, |
| "step": 4860 |
| }, |
| { |
| "epoch": 0.7073347857661583, |
| "grad_norm": 1.0669150287420783, |
| "learning_rate": 2.392480619431879e-06, |
| "loss": 0.5416, |
| "step": 4870 |
| }, |
| { |
| "epoch": 0.7087872185911401, |
| "grad_norm": 1.0472161733755885, |
| "learning_rate": 2.3708828505198265e-06, |
| "loss": 0.5777, |
| "step": 4880 |
| }, |
| { |
| "epoch": 0.710239651416122, |
| "grad_norm": 1.1252884484776227, |
| "learning_rate": 2.349352672150681e-06, |
| "loss": 0.5535, |
| "step": 4890 |
| }, |
| { |
| "epoch": 0.7116920842411039, |
| "grad_norm": 1.1423409076437527, |
| "learning_rate": 2.3278906378320854e-06, |
| "loss": 0.5598, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.7131445170660857, |
| "grad_norm": 0.9801237939355479, |
| "learning_rate": 2.306497299319814e-06, |
| "loss": 0.5551, |
| "step": 4910 |
| }, |
| { |
| "epoch": 0.7145969498910676, |
| "grad_norm": 1.0526887175825372, |
| "learning_rate": 2.285173206603564e-06, |
| "loss": 0.5683, |
| "step": 4920 |
| }, |
| { |
| "epoch": 0.7160493827160493, |
| "grad_norm": 1.1758853714133906, |
| "learning_rate": 2.2639189078928453e-06, |
| "loss": 0.5581, |
| "step": 4930 |
| }, |
| { |
| "epoch": 0.7175018155410312, |
| "grad_norm": 1.107044757903735, |
| "learning_rate": 2.242734949602856e-06, |
| "loss": 0.5448, |
| "step": 4940 |
| }, |
| { |
| "epoch": 0.7189542483660131, |
| "grad_norm": 1.2037164103649114, |
| "learning_rate": 2.2216218763404647e-06, |
| "loss": 0.5531, |
| "step": 4950 |
| }, |
| { |
| "epoch": 0.7204066811909949, |
| "grad_norm": 1.0588992084011324, |
| "learning_rate": 2.200580230890188e-06, |
| "loss": 0.5501, |
| "step": 4960 |
| }, |
| { |
| "epoch": 0.7218591140159768, |
| "grad_norm": 1.2543824405997601, |
| "learning_rate": 2.17961055420024e-06, |
| "loss": 0.5769, |
| "step": 4970 |
| }, |
| { |
| "epoch": 0.7233115468409586, |
| "grad_norm": 1.1899069770329052, |
| "learning_rate": 2.1587133853686422e-06, |
| "loss": 0.5683, |
| "step": 4980 |
| }, |
| { |
| "epoch": 0.7247639796659404, |
| "grad_norm": 1.144536370052011, |
| "learning_rate": 2.137889261629334e-06, |
| "loss": 0.5648, |
| "step": 4990 |
| }, |
| { |
| "epoch": 0.7262164124909223, |
| "grad_norm": 1.1936078152653293, |
| "learning_rate": 2.1171387183383936e-06, |
| "loss": 0.5646, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.7276688453159041, |
| "grad_norm": 1.26324013915445, |
| "learning_rate": 2.096462288960251e-06, |
| "loss": 0.5682, |
| "step": 5010 |
| }, |
| { |
| "epoch": 0.729121278140886, |
| "grad_norm": 1.1381437228179463, |
| "learning_rate": 2.0758605050539836e-06, |
| "loss": 0.5571, |
| "step": 5020 |
| }, |
| { |
| "epoch": 0.7305737109658679, |
| "grad_norm": 1.3500933515295954, |
| "learning_rate": 2.0553338962596492e-06, |
| "loss": 0.5716, |
| "step": 5030 |
| }, |
| { |
| "epoch": 0.7320261437908496, |
| "grad_norm": 1.0940717331908218, |
| "learning_rate": 2.03488299028467e-06, |
| "loss": 0.5626, |
| "step": 5040 |
| }, |
| { |
| "epoch": 0.7334785766158315, |
| "grad_norm": 1.1116999445105729, |
| "learning_rate": 2.0145083128902647e-06, |
| "loss": 0.5625, |
| "step": 5050 |
| }, |
| { |
| "epoch": 0.7349310094408134, |
| "grad_norm": 1.144025480175903, |
| "learning_rate": 1.9942103878779335e-06, |
| "loss": 0.5601, |
| "step": 5060 |
| }, |
| { |
| "epoch": 0.7363834422657952, |
| "grad_norm": 1.0557283567612936, |
| "learning_rate": 1.9739897370759886e-06, |
| "loss": 0.5523, |
| "step": 5070 |
| }, |
| { |
| "epoch": 0.7378358750907771, |
| "grad_norm": 1.243995372081041, |
| "learning_rate": 1.9538468803261514e-06, |
| "loss": 0.5521, |
| "step": 5080 |
| }, |
| { |
| "epoch": 0.739288307915759, |
| "grad_norm": 1.1122614530495916, |
| "learning_rate": 1.9337823354701617e-06, |
| "loss": 0.5615, |
| "step": 5090 |
| }, |
| { |
| "epoch": 0.7407407407407407, |
| "grad_norm": 1.012804702506735, |
| "learning_rate": 1.913796618336499e-06, |
| "loss": 0.5514, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.7421931735657226, |
| "grad_norm": 1.1487569184157758, |
| "learning_rate": 1.8938902427270905e-06, |
| "loss": 0.5595, |
| "step": 5110 |
| }, |
| { |
| "epoch": 0.7436456063907044, |
| "grad_norm": 1.222308594990331, |
| "learning_rate": 1.8740637204041195e-06, |
| "loss": 0.5645, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.7450980392156863, |
| "grad_norm": 1.1354476091482255, |
| "learning_rate": 1.8543175610768715e-06, |
| "loss": 0.5607, |
| "step": 5130 |
| }, |
| { |
| "epoch": 0.7465504720406682, |
| "grad_norm": 1.2205544178436005, |
| "learning_rate": 1.83465227238861e-06, |
| "loss": 0.542, |
| "step": 5140 |
| }, |
| { |
| "epoch": 0.7480029048656499, |
| "grad_norm": 1.2462160753237452, |
| "learning_rate": 1.8150683599035517e-06, |
| "loss": 0.5606, |
| "step": 5150 |
| }, |
| { |
| "epoch": 0.7494553376906318, |
| "grad_norm": 1.1396860492016365, |
| "learning_rate": 1.7955663270938501e-06, |
| "loss": 0.5689, |
| "step": 5160 |
| }, |
| { |
| "epoch": 0.7509077705156136, |
| "grad_norm": 1.1228524828818305, |
| "learning_rate": 1.7761466753266598e-06, |
| "loss": 0.5625, |
| "step": 5170 |
| }, |
| { |
| "epoch": 0.7523602033405955, |
| "grad_norm": 1.1360291736903685, |
| "learning_rate": 1.7568099038512466e-06, |
| "loss": 0.5724, |
| "step": 5180 |
| }, |
| { |
| "epoch": 0.7538126361655774, |
| "grad_norm": 1.226701284666325, |
| "learning_rate": 1.7375565097861518e-06, |
| "loss": 0.5653, |
| "step": 5190 |
| }, |
| { |
| "epoch": 0.7552650689905592, |
| "grad_norm": 1.1971595467490777, |
| "learning_rate": 1.7183869881064125e-06, |
| "loss": 0.5681, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.756717501815541, |
| "grad_norm": 1.003433379963408, |
| "learning_rate": 1.6993018316308351e-06, |
| "loss": 0.5497, |
| "step": 5210 |
| }, |
| { |
| "epoch": 0.7581699346405228, |
| "grad_norm": 1.0677706687056256, |
| "learning_rate": 1.6803015310093286e-06, |
| "loss": 0.5663, |
| "step": 5220 |
| }, |
| { |
| "epoch": 0.7596223674655047, |
| "grad_norm": 1.1960572257973088, |
| "learning_rate": 1.6613865747102876e-06, |
| "loss": 0.5566, |
| "step": 5230 |
| }, |
| { |
| "epoch": 0.7610748002904866, |
| "grad_norm": 1.1110041512712467, |
| "learning_rate": 1.6425574490080355e-06, |
| "loss": 0.5474, |
| "step": 5240 |
| }, |
| { |
| "epoch": 0.7625272331154684, |
| "grad_norm": 1.1953866183465143, |
| "learning_rate": 1.6238146379703257e-06, |
| "loss": 0.5602, |
| "step": 5250 |
| }, |
| { |
| "epoch": 0.7639796659404503, |
| "grad_norm": 1.184221410195916, |
| "learning_rate": 1.6051586234458932e-06, |
| "loss": 0.558, |
| "step": 5260 |
| }, |
| { |
| "epoch": 0.7654320987654321, |
| "grad_norm": 1.1917994670950118, |
| "learning_rate": 1.5865898850520671e-06, |
| "loss": 0.573, |
| "step": 5270 |
| }, |
| { |
| "epoch": 0.7668845315904139, |
| "grad_norm": 1.205079091727242, |
| "learning_rate": 1.5681089001624488e-06, |
| "loss": 0.5565, |
| "step": 5280 |
| }, |
| { |
| "epoch": 0.7683369644153958, |
| "grad_norm": 1.0590014592765518, |
| "learning_rate": 1.5497161438946218e-06, |
| "loss": 0.5537, |
| "step": 5290 |
| }, |
| { |
| "epoch": 0.7697893972403776, |
| "grad_norm": 1.3045355829406655, |
| "learning_rate": 1.5314120890979596e-06, |
| "loss": 0.5608, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.7712418300653595, |
| "grad_norm": 1.227226173650366, |
| "learning_rate": 1.5131972063414451e-06, |
| "loss": 0.563, |
| "step": 5310 |
| }, |
| { |
| "epoch": 0.7726942628903414, |
| "grad_norm": 1.1505400844326525, |
| "learning_rate": 1.4950719639015987e-06, |
| "loss": 0.5618, |
| "step": 5320 |
| }, |
| { |
| "epoch": 0.7741466957153231, |
| "grad_norm": 1.1971910791582392, |
| "learning_rate": 1.4770368277504183e-06, |
| "loss": 0.5559, |
| "step": 5330 |
| }, |
| { |
| "epoch": 0.775599128540305, |
| "grad_norm": 1.1465426761189066, |
| "learning_rate": 1.45909226154341e-06, |
| "loss": 0.5757, |
| "step": 5340 |
| }, |
| { |
| "epoch": 0.7770515613652869, |
| "grad_norm": 1.0530342043982832, |
| "learning_rate": 1.4412387266076677e-06, |
| "loss": 0.5699, |
| "step": 5350 |
| }, |
| { |
| "epoch": 0.7785039941902687, |
| "grad_norm": 1.1921772808125664, |
| "learning_rate": 1.4234766819300106e-06, |
| "loss": 0.5592, |
| "step": 5360 |
| }, |
| { |
| "epoch": 0.7799564270152506, |
| "grad_norm": 1.1969217401024441, |
| "learning_rate": 1.4058065841451856e-06, |
| "loss": 0.5658, |
| "step": 5370 |
| }, |
| { |
| "epoch": 0.7814088598402323, |
| "grad_norm": 1.1371738180522346, |
| "learning_rate": 1.3882288875241262e-06, |
| "loss": 0.5523, |
| "step": 5380 |
| }, |
| { |
| "epoch": 0.7828612926652142, |
| "grad_norm": 1.119312116230787, |
| "learning_rate": 1.3707440439622754e-06, |
| "loss": 0.5501, |
| "step": 5390 |
| }, |
| { |
| "epoch": 0.7843137254901961, |
| "grad_norm": 1.200972988458609, |
| "learning_rate": 1.353352502967966e-06, |
| "loss": 0.5393, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.7857661583151779, |
| "grad_norm": 1.005244568846047, |
| "learning_rate": 1.336054711650867e-06, |
| "loss": 0.5552, |
| "step": 5410 |
| }, |
| { |
| "epoch": 0.7872185911401598, |
| "grad_norm": 0.9811514201367332, |
| "learning_rate": 1.3188511147104882e-06, |
| "loss": 0.5615, |
| "step": 5420 |
| }, |
| { |
| "epoch": 0.7886710239651417, |
| "grad_norm": 1.2124333619418073, |
| "learning_rate": 1.3017421544247466e-06, |
| "loss": 0.5731, |
| "step": 5430 |
| }, |
| { |
| "epoch": 0.7901234567901234, |
| "grad_norm": 1.0164638888045425, |
| "learning_rate": 1.2847282706385962e-06, |
| "loss": 0.5449, |
| "step": 5440 |
| }, |
| { |
| "epoch": 0.7915758896151053, |
| "grad_norm": 1.0692055130184748, |
| "learning_rate": 1.267809900752725e-06, |
| "loss": 0.5581, |
| "step": 5450 |
| }, |
| { |
| "epoch": 0.7930283224400871, |
| "grad_norm": 1.2243966381535343, |
| "learning_rate": 1.2509874797122983e-06, |
| "loss": 0.5694, |
| "step": 5460 |
| }, |
| { |
| "epoch": 0.794480755265069, |
| "grad_norm": 1.1192058071022615, |
| "learning_rate": 1.2342614399957952e-06, |
| "loss": 0.5601, |
| "step": 5470 |
| }, |
| { |
| "epoch": 0.7959331880900509, |
| "grad_norm": 1.210664779695526, |
| "learning_rate": 1.217632211603868e-06, |
| "loss": 0.5383, |
| "step": 5480 |
| }, |
| { |
| "epoch": 0.7973856209150327, |
| "grad_norm": 1.2306429782422048, |
| "learning_rate": 1.2011002220483099e-06, |
| "loss": 0.5503, |
| "step": 5490 |
| }, |
| { |
| "epoch": 0.7988380537400145, |
| "grad_norm": 1.1449496150562748, |
| "learning_rate": 1.1846658963410472e-06, |
| "loss": 0.561, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.8002904865649964, |
| "grad_norm": 1.1809146975647171, |
| "learning_rate": 1.168329656983222e-06, |
| "loss": 0.5489, |
| "step": 5510 |
| }, |
| { |
| "epoch": 0.8017429193899782, |
| "grad_norm": 1.1865786985653701, |
| "learning_rate": 1.1520919239543272e-06, |
| "loss": 0.5443, |
| "step": 5520 |
| }, |
| { |
| "epoch": 0.8031953522149601, |
| "grad_norm": 1.2819514449232758, |
| "learning_rate": 1.1359531147014102e-06, |
| "loss": 0.5784, |
| "step": 5530 |
| }, |
| { |
| "epoch": 0.8046477850399419, |
| "grad_norm": 1.140249494732679, |
| "learning_rate": 1.11991364412834e-06, |
| "loss": 0.5472, |
| "step": 5540 |
| }, |
| { |
| "epoch": 0.8061002178649237, |
| "grad_norm": 1.0963574239357976, |
| "learning_rate": 1.1039739245851426e-06, |
| "loss": 0.5614, |
| "step": 5550 |
| }, |
| { |
| "epoch": 0.8075526506899056, |
| "grad_norm": 1.1963836912036798, |
| "learning_rate": 1.088134365857399e-06, |
| "loss": 0.5516, |
| "step": 5560 |
| }, |
| { |
| "epoch": 0.8090050835148874, |
| "grad_norm": 1.320400739555157, |
| "learning_rate": 1.0723953751557098e-06, |
| "loss": 0.5643, |
| "step": 5570 |
| }, |
| { |
| "epoch": 0.8104575163398693, |
| "grad_norm": 1.2261172403861758, |
| "learning_rate": 1.0567573571052265e-06, |
| "loss": 0.545, |
| "step": 5580 |
| }, |
| { |
| "epoch": 0.8119099491648512, |
| "grad_norm": 1.1363072652624087, |
| "learning_rate": 1.0412207137352504e-06, |
| "loss": 0.5562, |
| "step": 5590 |
| }, |
| { |
| "epoch": 0.813362381989833, |
| "grad_norm": 1.0696753091917897, |
| "learning_rate": 1.0257858444688968e-06, |
| "loss": 0.5584, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.8148148148148148, |
| "grad_norm": 1.092336652561905, |
| "learning_rate": 1.0104531461128224e-06, |
| "loss": 0.5509, |
| "step": 5610 |
| }, |
| { |
| "epoch": 0.8162672476397966, |
| "grad_norm": 1.2190453226296554, |
| "learning_rate": 9.952230128470358e-07, |
| "loss": 0.5552, |
| "step": 5620 |
| }, |
| { |
| "epoch": 0.8177196804647785, |
| "grad_norm": 1.1756174285580154, |
| "learning_rate": 9.800958362147433e-07, |
| "loss": 0.5611, |
| "step": 5630 |
| }, |
| { |
| "epoch": 0.8191721132897604, |
| "grad_norm": 1.050298389841538, |
| "learning_rate": 9.65072005112308e-07, |
| "loss": 0.5536, |
| "step": 5640 |
| }, |
| { |
| "epoch": 0.8206245461147422, |
| "grad_norm": 1.2990174959407426, |
| "learning_rate": 9.501519057792275e-07, |
| "loss": 0.5495, |
| "step": 5650 |
| }, |
| { |
| "epoch": 0.8220769789397241, |
| "grad_norm": 1.1318695700100998, |
| "learning_rate": 9.353359217882241e-07, |
| "loss": 0.5557, |
| "step": 5660 |
| }, |
| { |
| "epoch": 0.8235294117647058, |
| "grad_norm": 1.1818056539247317, |
| "learning_rate": 9.206244340353732e-07, |
| "loss": 0.5703, |
| "step": 5670 |
| }, |
| { |
| "epoch": 0.8249818445896877, |
| "grad_norm": 1.191491253002993, |
| "learning_rate": 9.060178207303077e-07, |
| "loss": 0.5543, |
| "step": 5680 |
| }, |
| { |
| "epoch": 0.8264342774146696, |
| "grad_norm": 1.2775803771232788, |
| "learning_rate": 8.915164573865109e-07, |
| "loss": 0.5673, |
| "step": 5690 |
| }, |
| { |
| "epoch": 0.8278867102396514, |
| "grad_norm": 1.0993365384271814, |
| "learning_rate": 8.771207168116407e-07, |
| "loss": 0.5526, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.8293391430646333, |
| "grad_norm": 1.2010857578242673, |
| "learning_rate": 8.628309690979658e-07, |
| "loss": 0.5465, |
| "step": 5710 |
| }, |
| { |
| "epoch": 0.830791575889615, |
| "grad_norm": 1.1363204888828164, |
| "learning_rate": 8.486475816128376e-07, |
| "loss": 0.5522, |
| "step": 5720 |
| }, |
| { |
| "epoch": 0.8322440087145969, |
| "grad_norm": 1.237168492535083, |
| "learning_rate": 8.345709189892504e-07, |
| "loss": 0.5377, |
| "step": 5730 |
| }, |
| { |
| "epoch": 0.8336964415395788, |
| "grad_norm": 1.1890926723132464, |
| "learning_rate": 8.206013431164683e-07, |
| "loss": 0.5613, |
| "step": 5740 |
| }, |
| { |
| "epoch": 0.8351488743645606, |
| "grad_norm": 1.2611972496063513, |
| "learning_rate": 8.0673921313072e-07, |
| "loss": 0.5562, |
| "step": 5750 |
| }, |
| { |
| "epoch": 0.8366013071895425, |
| "grad_norm": 1.1453681982727373, |
| "learning_rate": 7.929848854059663e-07, |
| "loss": 0.5469, |
| "step": 5760 |
| }, |
| { |
| "epoch": 0.8380537400145244, |
| "grad_norm": 1.1161546893459802, |
| "learning_rate": 7.793387135447372e-07, |
| "loss": 0.5688, |
| "step": 5770 |
| }, |
| { |
| "epoch": 0.8395061728395061, |
| "grad_norm": 1.242951008236561, |
| "learning_rate": 7.658010483690431e-07, |
| "loss": 0.5516, |
| "step": 5780 |
| }, |
| { |
| "epoch": 0.840958605664488, |
| "grad_norm": 1.1291848404892897, |
| "learning_rate": 7.52372237911358e-07, |
| "loss": 0.5558, |
| "step": 5790 |
| }, |
| { |
| "epoch": 0.8424110384894699, |
| "grad_norm": 1.1344340429459099, |
| "learning_rate": 7.390526274056625e-07, |
| "loss": 0.5368, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.8438634713144517, |
| "grad_norm": 1.2369341276497008, |
| "learning_rate": 7.25842559278584e-07, |
| "loss": 0.5438, |
| "step": 5810 |
| }, |
| { |
| "epoch": 0.8453159041394336, |
| "grad_norm": 1.161564478717058, |
| "learning_rate": 7.127423731405747e-07, |
| "loss": 0.5524, |
| "step": 5820 |
| }, |
| { |
| "epoch": 0.8467683369644154, |
| "grad_norm": 1.3389378618000198, |
| "learning_rate": 6.997524057771964e-07, |
| "loss": 0.5411, |
| "step": 5830 |
| }, |
| { |
| "epoch": 0.8482207697893972, |
| "grad_norm": 1.2324708082947882, |
| "learning_rate": 6.868729911404582e-07, |
| "loss": 0.5594, |
| "step": 5840 |
| }, |
| { |
| "epoch": 0.8496732026143791, |
| "grad_norm": 1.0931906751127958, |
| "learning_rate": 6.741044603402214e-07, |
| "loss": 0.5394, |
| "step": 5850 |
| }, |
| { |
| "epoch": 0.8511256354393609, |
| "grad_norm": 1.1045798920330345, |
| "learning_rate": 6.614471416357055e-07, |
| "loss": 0.5517, |
| "step": 5860 |
| }, |
| { |
| "epoch": 0.8525780682643428, |
| "grad_norm": 1.1003308882789462, |
| "learning_rate": 6.489013604270277e-07, |
| "loss": 0.5432, |
| "step": 5870 |
| }, |
| { |
| "epoch": 0.8540305010893247, |
| "grad_norm": 1.1511825195957979, |
| "learning_rate": 6.364674392468578e-07, |
| "loss": 0.5543, |
| "step": 5880 |
| }, |
| { |
| "epoch": 0.8554829339143064, |
| "grad_norm": 1.1016772920186344, |
| "learning_rate": 6.241456977521115e-07, |
| "loss": 0.5511, |
| "step": 5890 |
| }, |
| { |
| "epoch": 0.8569353667392883, |
| "grad_norm": 1.2345711604547172, |
| "learning_rate": 6.119364527157401e-07, |
| "loss": 0.5546, |
| "step": 5900 |
| }, |
| { |
| "epoch": 0.8583877995642701, |
| "grad_norm": 1.1026866190660687, |
| "learning_rate": 5.998400180185838e-07, |
| "loss": 0.5534, |
| "step": 5910 |
| }, |
| { |
| "epoch": 0.859840232389252, |
| "grad_norm": 1.0696348901565953, |
| "learning_rate": 5.878567046413025e-07, |
| "loss": 0.5431, |
| "step": 5920 |
| }, |
| { |
| "epoch": 0.8612926652142339, |
| "grad_norm": 1.074925388402079, |
| "learning_rate": 5.759868206563834e-07, |
| "loss": 0.5564, |
| "step": 5930 |
| }, |
| { |
| "epoch": 0.8627450980392157, |
| "grad_norm": 1.1892355845709555, |
| "learning_rate": 5.642306712202183e-07, |
| "loss": 0.56, |
| "step": 5940 |
| }, |
| { |
| "epoch": 0.8641975308641975, |
| "grad_norm": 1.1714018297678883, |
| "learning_rate": 5.525885585652591e-07, |
| "loss": 0.5477, |
| "step": 5950 |
| }, |
| { |
| "epoch": 0.8656499636891793, |
| "grad_norm": 1.2243789216177572, |
| "learning_rate": 5.410607819922481e-07, |
| "loss": 0.5561, |
| "step": 5960 |
| }, |
| { |
| "epoch": 0.8671023965141612, |
| "grad_norm": 1.158429282768604, |
| "learning_rate": 5.296476378625237e-07, |
| "loss": 0.5246, |
| "step": 5970 |
| }, |
| { |
| "epoch": 0.8685548293391431, |
| "grad_norm": 1.2064879125921322, |
| "learning_rate": 5.183494195904015e-07, |
| "loss": 0.5434, |
| "step": 5980 |
| }, |
| { |
| "epoch": 0.8700072621641249, |
| "grad_norm": 1.0370084252960212, |
| "learning_rate": 5.071664176356294e-07, |
| "loss": 0.556, |
| "step": 5990 |
| }, |
| { |
| "epoch": 0.8714596949891068, |
| "grad_norm": 1.1529022886105922, |
| "learning_rate": 4.960989194959225e-07, |
| "loss": 0.5349, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.8729121278140886, |
| "grad_norm": 1.0702466803229502, |
| "learning_rate": 4.851472096995741e-07, |
| "loss": 0.5641, |
| "step": 6010 |
| }, |
| { |
| "epoch": 0.8743645606390704, |
| "grad_norm": 1.195504112892932, |
| "learning_rate": 4.7431156979813097e-07, |
| "loss": 0.5627, |
| "step": 6020 |
| }, |
| { |
| "epoch": 0.8758169934640523, |
| "grad_norm": 1.0424744381436926, |
| "learning_rate": 4.6359227835916954e-07, |
| "loss": 0.5457, |
| "step": 6030 |
| }, |
| { |
| "epoch": 0.8772694262890341, |
| "grad_norm": 1.136106426677912, |
| "learning_rate": 4.529896109591203e-07, |
| "loss": 0.5536, |
| "step": 6040 |
| }, |
| { |
| "epoch": 0.878721859114016, |
| "grad_norm": 1.1941194023099557, |
| "learning_rate": 4.425038401761961e-07, |
| "loss": 0.5512, |
| "step": 6050 |
| }, |
| { |
| "epoch": 0.8801742919389978, |
| "grad_norm": 1.1005592964409183, |
| "learning_rate": 4.3213523558337354e-07, |
| "loss": 0.5522, |
| "step": 6060 |
| }, |
| { |
| "epoch": 0.8816267247639796, |
| "grad_norm": 1.3046172497671011, |
| "learning_rate": 4.218840637414695e-07, |
| "loss": 0.5389, |
| "step": 6070 |
| }, |
| { |
| "epoch": 0.8830791575889615, |
| "grad_norm": 1.2050786337197097, |
| "learning_rate": 4.117505881922856e-07, |
| "loss": 0.5637, |
| "step": 6080 |
| }, |
| { |
| "epoch": 0.8845315904139434, |
| "grad_norm": 1.1086711189663023, |
| "learning_rate": 4.0173506945183295e-07, |
| "loss": 0.5637, |
| "step": 6090 |
| }, |
| { |
| "epoch": 0.8859840232389252, |
| "grad_norm": 1.142760086036647, |
| "learning_rate": 3.9183776500363593e-07, |
| "loss": 0.5639, |
| "step": 6100 |
| }, |
| { |
| "epoch": 0.8874364560639071, |
| "grad_norm": 1.211597985547058, |
| "learning_rate": 3.8205892929211175e-07, |
| "loss": 0.5534, |
| "step": 6110 |
| }, |
| { |
| "epoch": 0.8888888888888888, |
| "grad_norm": 1.125094111731544, |
| "learning_rate": 3.7239881371603005e-07, |
| "loss": 0.5514, |
| "step": 6120 |
| }, |
| { |
| "epoch": 0.8903413217138707, |
| "grad_norm": 1.1253410539349802, |
| "learning_rate": 3.6285766662204735e-07, |
| "loss": 0.5593, |
| "step": 6130 |
| }, |
| { |
| "epoch": 0.8917937545388526, |
| "grad_norm": 1.076054931723469, |
| "learning_rate": 3.534357332983257e-07, |
| "loss": 0.5494, |
| "step": 6140 |
| }, |
| { |
| "epoch": 0.8932461873638344, |
| "grad_norm": 1.2433138382241562, |
| "learning_rate": 3.441332559682242e-07, |
| "loss": 0.5507, |
| "step": 6150 |
| }, |
| { |
| "epoch": 0.8946986201888163, |
| "grad_norm": 1.172111145318429, |
| "learning_rate": 3.349504737840742e-07, |
| "loss": 0.5632, |
| "step": 6160 |
| }, |
| { |
| "epoch": 0.8961510530137982, |
| "grad_norm": 1.2018077073853302, |
| "learning_rate": 3.258876228210267e-07, |
| "loss": 0.5381, |
| "step": 6170 |
| }, |
| { |
| "epoch": 0.8976034858387799, |
| "grad_norm": 1.1218901853415595, |
| "learning_rate": 3.169449360709914e-07, |
| "loss": 0.5651, |
| "step": 6180 |
| }, |
| { |
| "epoch": 0.8990559186637618, |
| "grad_norm": 1.075452696669577, |
| "learning_rate": 3.0812264343663467e-07, |
| "loss": 0.5518, |
| "step": 6190 |
| }, |
| { |
| "epoch": 0.9005083514887436, |
| "grad_norm": 1.2898875627777047, |
| "learning_rate": 2.99420971725482e-07, |
| "loss": 0.5535, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.9019607843137255, |
| "grad_norm": 1.064409341720963, |
| "learning_rate": 2.9084014464407837e-07, |
| "loss": 0.551, |
| "step": 6210 |
| }, |
| { |
| "epoch": 0.9034132171387074, |
| "grad_norm": 1.1430289990560287, |
| "learning_rate": 2.8238038279224e-07, |
| "loss": 0.5351, |
| "step": 6220 |
| }, |
| { |
| "epoch": 0.9048656499636892, |
| "grad_norm": 1.0942084433621513, |
| "learning_rate": 2.740419036573844e-07, |
| "loss": 0.5628, |
| "step": 6230 |
| }, |
| { |
| "epoch": 0.906318082788671, |
| "grad_norm": 1.1827726416299507, |
| "learning_rate": 2.6582492160893536e-07, |
| "loss": 0.5698, |
| "step": 6240 |
| }, |
| { |
| "epoch": 0.9077705156136529, |
| "grad_norm": 1.0512203056975564, |
| "learning_rate": 2.5772964789281593e-07, |
| "loss": 0.539, |
| "step": 6250 |
| }, |
| { |
| "epoch": 0.9092229484386347, |
| "grad_norm": 1.177449766279641, |
| "learning_rate": 2.4975629062601534e-07, |
| "loss": 0.5475, |
| "step": 6260 |
| }, |
| { |
| "epoch": 0.9106753812636166, |
| "grad_norm": 1.2124754199233574, |
| "learning_rate": 2.419050547912388e-07, |
| "loss": 0.541, |
| "step": 6270 |
| }, |
| { |
| "epoch": 0.9121278140885984, |
| "grad_norm": 1.3580937630552576, |
| "learning_rate": 2.3417614223163908e-07, |
| "loss": 0.5588, |
| "step": 6280 |
| }, |
| { |
| "epoch": 0.9135802469135802, |
| "grad_norm": 1.1170472146222037, |
| "learning_rate": 2.26569751645625e-07, |
| "loss": 0.5436, |
| "step": 6290 |
| }, |
| { |
| "epoch": 0.9150326797385621, |
| "grad_norm": 1.1184802548299553, |
| "learning_rate": 2.1908607858175612e-07, |
| "loss": 0.5377, |
| "step": 6300 |
| }, |
| { |
| "epoch": 0.9164851125635439, |
| "grad_norm": 1.1396702009546613, |
| "learning_rate": 2.117253154337118e-07, |
| "loss": 0.5683, |
| "step": 6310 |
| }, |
| { |
| "epoch": 0.9179375453885258, |
| "grad_norm": 1.2119088736658123, |
| "learning_rate": 2.0448765143534942e-07, |
| "loss": 0.5668, |
| "step": 6320 |
| }, |
| { |
| "epoch": 0.9193899782135077, |
| "grad_norm": 1.0448734314632342, |
| "learning_rate": 1.973732726558364e-07, |
| "loss": 0.5437, |
| "step": 6330 |
| }, |
| { |
| "epoch": 0.9208424110384895, |
| "grad_norm": 1.2851112602098311, |
| "learning_rate": 1.9038236199486693e-07, |
| "loss": 0.5622, |
| "step": 6340 |
| }, |
| { |
| "epoch": 0.9222948438634713, |
| "grad_norm": 1.1700640178574329, |
| "learning_rate": 1.8351509917796218e-07, |
| "loss": 0.542, |
| "step": 6350 |
| }, |
| { |
| "epoch": 0.9237472766884531, |
| "grad_norm": 1.1416778336018678, |
| "learning_rate": 1.7677166075184548e-07, |
| "loss": 0.5529, |
| "step": 6360 |
| }, |
| { |
| "epoch": 0.925199709513435, |
| "grad_norm": 1.1230308913216087, |
| "learning_rate": 1.7015222007990883e-07, |
| "loss": 0.5559, |
| "step": 6370 |
| }, |
| { |
| "epoch": 0.9266521423384169, |
| "grad_norm": 1.1568250466964043, |
| "learning_rate": 1.6365694733775305e-07, |
| "loss": 0.5507, |
| "step": 6380 |
| }, |
| { |
| "epoch": 0.9281045751633987, |
| "grad_norm": 1.1602815569402067, |
| "learning_rate": 1.572860095088108e-07, |
| "loss": 0.552, |
| "step": 6390 |
| }, |
| { |
| "epoch": 0.9295570079883806, |
| "grad_norm": 1.0423401424679095, |
| "learning_rate": 1.5103957038005935e-07, |
| "loss": 0.5446, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.9310094408133623, |
| "grad_norm": 1.1374874233890928, |
| "learning_rate": 1.4491779053780298e-07, |
| "loss": 0.5473, |
| "step": 6410 |
| }, |
| { |
| "epoch": 0.9324618736383442, |
| "grad_norm": 1.1755709384042587, |
| "learning_rate": 1.3892082736355283e-07, |
| "loss": 0.5486, |
| "step": 6420 |
| }, |
| { |
| "epoch": 0.9339143064633261, |
| "grad_norm": 1.1744643775241368, |
| "learning_rate": 1.3304883502997133e-07, |
| "loss": 0.5518, |
| "step": 6430 |
| }, |
| { |
| "epoch": 0.9353667392883079, |
| "grad_norm": 1.1216236591765696, |
| "learning_rate": 1.2730196449691756e-07, |
| "loss": 0.5492, |
| "step": 6440 |
| }, |
| { |
| "epoch": 0.9368191721132898, |
| "grad_norm": 1.1470393369010776, |
| "learning_rate": 1.2168036350755975e-07, |
| "loss": 0.5322, |
| "step": 6450 |
| }, |
| { |
| "epoch": 0.9382716049382716, |
| "grad_norm": 1.1985354195876317, |
| "learning_rate": 1.1618417658458003e-07, |
| "loss": 0.5616, |
| "step": 6460 |
| }, |
| { |
| "epoch": 0.9397240377632534, |
| "grad_norm": 1.1475497479759824, |
| "learning_rate": 1.1081354502645913e-07, |
| "loss": 0.5531, |
| "step": 6470 |
| }, |
| { |
| "epoch": 0.9411764705882353, |
| "grad_norm": 1.1396353932104606, |
| "learning_rate": 1.0556860690384252e-07, |
| "loss": 0.5472, |
| "step": 6480 |
| }, |
| { |
| "epoch": 0.9426289034132171, |
| "grad_norm": 1.1215848254083782, |
| "learning_rate": 1.0044949705599216e-07, |
| "loss": 0.5429, |
| "step": 6490 |
| }, |
| { |
| "epoch": 0.944081336238199, |
| "grad_norm": 1.005591582016032, |
| "learning_rate": 9.545634708731988e-08, |
| "loss": 0.5418, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.9455337690631809, |
| "grad_norm": 1.215225242394237, |
| "learning_rate": 9.058928536400058e-08, |
| "loss": 0.5578, |
| "step": 6510 |
| }, |
| { |
| "epoch": 0.9469862018881626, |
| "grad_norm": 1.152537711229488, |
| "learning_rate": 8.584843701067935e-08, |
| "loss": 0.5404, |
| "step": 6520 |
| }, |
| { |
| "epoch": 0.9484386347131445, |
| "grad_norm": 1.175848365037797, |
| "learning_rate": 8.123392390724682e-08, |
| "loss": 0.5522, |
| "step": 6530 |
| }, |
| { |
| "epoch": 0.9498910675381264, |
| "grad_norm": 1.0183498527962453, |
| "learning_rate": 7.674586468570999e-08, |
| "loss": 0.5564, |
| "step": 6540 |
| }, |
| { |
| "epoch": 0.9513435003631082, |
| "grad_norm": 1.2151729065782833, |
| "learning_rate": 7.238437472714466e-08, |
| "loss": 0.5561, |
| "step": 6550 |
| }, |
| { |
| "epoch": 0.9527959331880901, |
| "grad_norm": 1.1402236462651618, |
| "learning_rate": 6.81495661587217e-08, |
| "loss": 0.5411, |
| "step": 6560 |
| }, |
| { |
| "epoch": 0.954248366013072, |
| "grad_norm": 1.1521868862152016, |
| "learning_rate": 6.404154785083383e-08, |
| "loss": 0.5539, |
| "step": 6570 |
| }, |
| { |
| "epoch": 0.9557007988380537, |
| "grad_norm": 1.1258302178296054, |
| "learning_rate": 6.006042541428669e-08, |
| "loss": 0.5532, |
| "step": 6580 |
| }, |
| { |
| "epoch": 0.9571532316630356, |
| "grad_norm": 1.173412519187008, |
| "learning_rate": 5.6206301197594404e-08, |
| "loss": 0.5505, |
| "step": 6590 |
| }, |
| { |
| "epoch": 0.9586056644880174, |
| "grad_norm": 1.136513704911577, |
| "learning_rate": 5.247927428433885e-08, |
| "loss": 0.5435, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.9600580973129993, |
| "grad_norm": 1.1972723133655234, |
| "learning_rate": 4.887944049062843e-08, |
| "loss": 0.548, |
| "step": 6610 |
| }, |
| { |
| "epoch": 0.9615105301379812, |
| "grad_norm": 1.240930781464282, |
| "learning_rate": 4.5406892362632185e-08, |
| "loss": 0.5538, |
| "step": 6620 |
| }, |
| { |
| "epoch": 0.9629629629629629, |
| "grad_norm": 1.2645184421648727, |
| "learning_rate": 4.206171917420121e-08, |
| "loss": 0.5616, |
| "step": 6630 |
| }, |
| { |
| "epoch": 0.9644153957879448, |
| "grad_norm": 1.1619344530688336, |
| "learning_rate": 3.884400692457435e-08, |
| "loss": 0.5578, |
| "step": 6640 |
| }, |
| { |
| "epoch": 0.9658678286129266, |
| "grad_norm": 1.0415045949293107, |
| "learning_rate": 3.575383833616497e-08, |
| "loss": 0.536, |
| "step": 6650 |
| }, |
| { |
| "epoch": 0.9673202614379085, |
| "grad_norm": 1.1707683296063809, |
| "learning_rate": 3.2791292852437096e-08, |
| "loss": 0.5444, |
| "step": 6660 |
| }, |
| { |
| "epoch": 0.9687726942628904, |
| "grad_norm": 0.9579807050337852, |
| "learning_rate": 2.99564466358615e-08, |
| "loss": 0.5604, |
| "step": 6670 |
| }, |
| { |
| "epoch": 0.9702251270878722, |
| "grad_norm": 1.155540906901066, |
| "learning_rate": 2.7249372565957277e-08, |
| "loss": 0.5495, |
| "step": 6680 |
| }, |
| { |
| "epoch": 0.971677559912854, |
| "grad_norm": 1.0959456715901421, |
| "learning_rate": 2.4670140237419428e-08, |
| "loss": 0.5483, |
| "step": 6690 |
| }, |
| { |
| "epoch": 0.9731299927378358, |
| "grad_norm": 1.0366185075689953, |
| "learning_rate": 2.2218815958329754e-08, |
| "loss": 0.5497, |
| "step": 6700 |
| }, |
| { |
| "epoch": 0.9745824255628177, |
| "grad_norm": 1.0759294981597065, |
| "learning_rate": 1.9895462748450444e-08, |
| "loss": 0.5634, |
| "step": 6710 |
| }, |
| { |
| "epoch": 0.9760348583877996, |
| "grad_norm": 1.1209995693338786, |
| "learning_rate": 1.770014033760592e-08, |
| "loss": 0.5508, |
| "step": 6720 |
| }, |
| { |
| "epoch": 0.9774872912127814, |
| "grad_norm": 1.210238366549934, |
| "learning_rate": 1.5632905164145173e-08, |
| "loss": 0.5813, |
| "step": 6730 |
| }, |
| { |
| "epoch": 0.9789397240377633, |
| "grad_norm": 1.15542524575641, |
| "learning_rate": 1.3693810373494598e-08, |
| "loss": 0.5421, |
| "step": 6740 |
| }, |
| { |
| "epoch": 0.9803921568627451, |
| "grad_norm": 1.194050906215969, |
| "learning_rate": 1.188290581678575e-08, |
| "loss": 0.5586, |
| "step": 6750 |
| }, |
| { |
| "epoch": 0.9818445896877269, |
| "grad_norm": 1.1566645017111077, |
| "learning_rate": 1.0200238049580258e-08, |
| "loss": 0.5632, |
| "step": 6760 |
| }, |
| { |
| "epoch": 0.9832970225127088, |
| "grad_norm": 1.0710546930410338, |
| "learning_rate": 8.645850330668559e-09, |
| "loss": 0.5368, |
| "step": 6770 |
| }, |
| { |
| "epoch": 0.9847494553376906, |
| "grad_norm": 1.175731861197897, |
| "learning_rate": 7.219782620958571e-09, |
| "loss": 0.5388, |
| "step": 6780 |
| }, |
| { |
| "epoch": 0.9862018881626725, |
| "grad_norm": 1.0791848418311811, |
| "learning_rate": 5.922071582449285e-09, |
| "loss": 0.5585, |
| "step": 6790 |
| }, |
| { |
| "epoch": 0.9876543209876543, |
| "grad_norm": 1.21651622954666, |
| "learning_rate": 4.752750577288745e-09, |
| "loss": 0.5603, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.9891067538126361, |
| "grad_norm": 1.294701087862953, |
| "learning_rate": 3.711849666914735e-09, |
| "loss": 0.5713, |
| "step": 6810 |
| }, |
| { |
| "epoch": 0.990559186637618, |
| "grad_norm": 1.100757408335571, |
| "learning_rate": 2.799395611281508e-09, |
| "loss": 0.5587, |
| "step": 6820 |
| }, |
| { |
| "epoch": 0.9920116194625999, |
| "grad_norm": 1.282263624241459, |
| "learning_rate": 2.0154118681753322e-09, |
| "loss": 0.5588, |
| "step": 6830 |
| }, |
| { |
| "epoch": 0.9934640522875817, |
| "grad_norm": 1.0975199346392859, |
| "learning_rate": 1.3599185926072012e-09, |
| "loss": 0.5724, |
| "step": 6840 |
| }, |
| { |
| "epoch": 0.9949164851125636, |
| "grad_norm": 1.1620574281790235, |
| "learning_rate": 8.329326362976897e-10, |
| "loss": 0.5621, |
| "step": 6850 |
| }, |
| { |
| "epoch": 0.9963689179375453, |
| "grad_norm": 1.1717561623715795, |
| "learning_rate": 4.34467547242301e-10, |
| "loss": 0.5506, |
| "step": 6860 |
| }, |
| { |
| "epoch": 0.9978213507625272, |
| "grad_norm": 1.155270191238308, |
| "learning_rate": 1.645335693623018e-10, |
| "loss": 0.5533, |
| "step": 6870 |
| }, |
| { |
| "epoch": 0.9992737835875091, |
| "grad_norm": 1.240301119345841, |
| "learning_rate": 2.3137642244375202e-11, |
| "loss": 0.5538, |
| "step": 6880 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 6885, |
| "total_flos": 1942329112002560.0, |
| "train_loss": 0.5927019230420812, |
| "train_runtime": 56356.5973, |
| "train_samples_per_second": 1.955, |
| "train_steps_per_second": 0.122 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 6885, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1942329112002560.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|