{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 40044, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0004994506043352313, "grad_norm": 0.8967432379722595, "learning_rate": 9.999998461261478e-05, "loss": 1.5801, "step": 10 }, { "epoch": 0.0009989012086704626, "grad_norm": 2.9660449028015137, "learning_rate": 9.99999384504686e-05, "loss": 1.3974, "step": 20 }, { "epoch": 0.0014983518130056938, "grad_norm": 1.4422023296356201, "learning_rate": 9.999986151358985e-05, "loss": 1.3449, "step": 30 }, { "epoch": 0.001997802417340925, "grad_norm": 5.9646430015563965, "learning_rate": 9.999975380202592e-05, "loss": 1.264, "step": 40 }, { "epoch": 0.002497253021676156, "grad_norm": 5.202009677886963, "learning_rate": 9.999961531584305e-05, "loss": 1.2576, "step": 50 }, { "epoch": 0.0029967036260113876, "grad_norm": 2.8307554721832275, "learning_rate": 9.999944605512654e-05, "loss": 1.0792, "step": 60 }, { "epoch": 0.0034961542303466185, "grad_norm": 6.909800052642822, "learning_rate": 9.999924601998054e-05, "loss": 0.9607, "step": 70 }, { "epoch": 0.00399560483468185, "grad_norm": 4.00949239730835, "learning_rate": 9.999901521052817e-05, "loss": 1.1402, "step": 80 }, { "epoch": 0.004495055439017081, "grad_norm": 4.892696857452393, "learning_rate": 9.999875362691149e-05, "loss": 1.0316, "step": 90 }, { "epoch": 0.004994506043352312, "grad_norm": 2.0788443088531494, "learning_rate": 9.999846126929151e-05, "loss": 1.1106, "step": 100 }, { "epoch": 0.005493956647687544, "grad_norm": 4.872004508972168, "learning_rate": 9.999813813784817e-05, "loss": 0.8462, "step": 110 }, { "epoch": 0.005993407252022775, "grad_norm": 1.1907895803451538, "learning_rate": 9.999778423278036e-05, "loss": 0.841, "step": 120 }, { "epoch": 0.0064928578563580065, "grad_norm": 2.7548792362213135, "learning_rate": 9.99973995543059e-05, "loss": 1.2706, "step": 130 }, { "epoch": 0.006992308460693237, "grad_norm": 2.08931040763855, "learning_rate": 9.999698410266157e-05, "loss": 1.1731, "step": 140 }, { "epoch": 0.0074917590650284685, "grad_norm": 2.7863011360168457, "learning_rate": 9.999653787810307e-05, "loss": 1.1902, "step": 150 }, { "epoch": 0.0079912096693637, "grad_norm": 5.634822845458984, "learning_rate": 9.999606088090505e-05, "loss": 1.1306, "step": 160 }, { "epoch": 0.00849066027369893, "grad_norm": 7.692355155944824, "learning_rate": 9.999555311136111e-05, "loss": 0.8827, "step": 170 }, { "epoch": 0.008990110878034162, "grad_norm": 2.2336292266845703, "learning_rate": 9.999501456978375e-05, "loss": 1.0124, "step": 180 }, { "epoch": 0.009489561482369393, "grad_norm": 4.546370983123779, "learning_rate": 9.999444525650448e-05, "loss": 0.9965, "step": 190 }, { "epoch": 0.009989012086704625, "grad_norm": 1.5386520624160767, "learning_rate": 9.999384517187369e-05, "loss": 0.9698, "step": 200 }, { "epoch": 0.010488462691039856, "grad_norm": 7.2799391746521, "learning_rate": 9.999321431626072e-05, "loss": 1.0506, "step": 210 }, { "epoch": 0.010987913295375087, "grad_norm": 3.7693991661071777, "learning_rate": 9.999255269005387e-05, "loss": 0.814, "step": 220 }, { "epoch": 0.011487363899710319, "grad_norm": 4.580488204956055, "learning_rate": 9.999186029366036e-05, "loss": 0.7806, "step": 230 }, { "epoch": 0.01198681450404555, "grad_norm": 2.3693222999572754, "learning_rate": 9.999113712750637e-05, "loss": 1.0188, "step": 240 }, { "epoch": 0.012486265108380782, "grad_norm": 3.053426504135132, "learning_rate": 9.999038319203699e-05, "loss": 0.8191, "step": 250 }, { "epoch": 0.012985715712716013, "grad_norm": 3.2131946086883545, "learning_rate": 9.99895984877163e-05, "loss": 1.0856, "step": 260 }, { "epoch": 0.013485166317051244, "grad_norm": 5.172701835632324, "learning_rate": 9.998878301502723e-05, "loss": 0.854, "step": 270 }, { "epoch": 0.013984616921386474, "grad_norm": 4.785862922668457, "learning_rate": 9.998793677447172e-05, "loss": 0.9823, "step": 280 }, { "epoch": 0.014484067525721706, "grad_norm": 2.307417869567871, "learning_rate": 9.998705976657064e-05, "loss": 1.0952, "step": 290 }, { "epoch": 0.014983518130056937, "grad_norm": 3.640514373779297, "learning_rate": 9.998615199186378e-05, "loss": 0.9484, "step": 300 }, { "epoch": 0.015482968734392168, "grad_norm": 1.485445499420166, "learning_rate": 9.998521345090986e-05, "loss": 0.9246, "step": 310 }, { "epoch": 0.0159824193387274, "grad_norm": 6.912786960601807, "learning_rate": 9.998424414428656e-05, "loss": 0.9799, "step": 320 }, { "epoch": 0.01648186994306263, "grad_norm": 6.9490180015563965, "learning_rate": 9.998324407259048e-05, "loss": 0.7185, "step": 330 }, { "epoch": 0.01698132054739786, "grad_norm": 6.867273330688477, "learning_rate": 9.998221323643714e-05, "loss": 0.982, "step": 340 }, { "epoch": 0.017480771151733094, "grad_norm": 4.2101054191589355, "learning_rate": 9.998115163646104e-05, "loss": 1.0997, "step": 350 }, { "epoch": 0.017980221756068324, "grad_norm": 2.994784355163574, "learning_rate": 9.998005927331559e-05, "loss": 0.9564, "step": 360 }, { "epoch": 0.018479672360403557, "grad_norm": 2.8205251693725586, "learning_rate": 9.997893614767312e-05, "loss": 1.0382, "step": 370 }, { "epoch": 0.018979122964738786, "grad_norm": 4.516458511352539, "learning_rate": 9.997778226022492e-05, "loss": 1.1056, "step": 380 }, { "epoch": 0.01947857356907402, "grad_norm": 3.8063511848449707, "learning_rate": 9.997659761168119e-05, "loss": 0.8491, "step": 390 }, { "epoch": 0.01997802417340925, "grad_norm": 2.98667049407959, "learning_rate": 9.997538220277108e-05, "loss": 1.0848, "step": 400 }, { "epoch": 0.020477474777744482, "grad_norm": 4.909623622894287, "learning_rate": 9.997413603424268e-05, "loss": 0.8135, "step": 410 }, { "epoch": 0.020976925382079712, "grad_norm": 2.7305667400360107, "learning_rate": 9.9972859106863e-05, "loss": 1.0388, "step": 420 }, { "epoch": 0.021476375986414945, "grad_norm": 3.258394718170166, "learning_rate": 9.997155142141796e-05, "loss": 0.8844, "step": 430 }, { "epoch": 0.021975826590750175, "grad_norm": 5.074390888214111, "learning_rate": 9.997021297871247e-05, "loss": 1.2031, "step": 440 }, { "epoch": 0.022475277195085405, "grad_norm": 1.4644519090652466, "learning_rate": 9.996884377957029e-05, "loss": 1.0039, "step": 450 }, { "epoch": 0.022974727799420638, "grad_norm": 3.1942813396453857, "learning_rate": 9.996744382483421e-05, "loss": 1.0889, "step": 460 }, { "epoch": 0.023474178403755867, "grad_norm": 8.286642074584961, "learning_rate": 9.996601311536586e-05, "loss": 0.9957, "step": 470 }, { "epoch": 0.0239736290080911, "grad_norm": 1.7618873119354248, "learning_rate": 9.996455165204583e-05, "loss": 0.8698, "step": 480 }, { "epoch": 0.02447307961242633, "grad_norm": 1.5642335414886475, "learning_rate": 9.996305943577366e-05, "loss": 0.9486, "step": 490 }, { "epoch": 0.024972530216761563, "grad_norm": 13.810921669006348, "learning_rate": 9.996153646746781e-05, "loss": 0.9216, "step": 500 }, { "epoch": 0.025471980821096793, "grad_norm": 2.5668368339538574, "learning_rate": 9.995998274806563e-05, "loss": 1.0009, "step": 510 }, { "epoch": 0.025971431425432026, "grad_norm": 1.7821905612945557, "learning_rate": 9.995839827852346e-05, "loss": 0.9474, "step": 520 }, { "epoch": 0.026470882029767256, "grad_norm": 3.3876638412475586, "learning_rate": 9.995678305981652e-05, "loss": 0.9413, "step": 530 }, { "epoch": 0.02697033263410249, "grad_norm": 3.1569855213165283, "learning_rate": 9.995513709293897e-05, "loss": 0.9358, "step": 540 }, { "epoch": 0.02746978323843772, "grad_norm": 5.637537479400635, "learning_rate": 9.99534603789039e-05, "loss": 1.0568, "step": 550 }, { "epoch": 0.027969233842772948, "grad_norm": 2.1420366764068604, "learning_rate": 9.995175291874331e-05, "loss": 0.8176, "step": 560 }, { "epoch": 0.02846868444710818, "grad_norm": 2.7238314151763916, "learning_rate": 9.995001471350811e-05, "loss": 0.9457, "step": 570 }, { "epoch": 0.02896813505144341, "grad_norm": 8.453182220458984, "learning_rate": 9.994824576426822e-05, "loss": 0.9144, "step": 580 }, { "epoch": 0.029467585655778644, "grad_norm": 1.4573590755462646, "learning_rate": 9.994644607211236e-05, "loss": 0.8454, "step": 590 }, { "epoch": 0.029967036260113874, "grad_norm": 1.987531065940857, "learning_rate": 9.994461563814828e-05, "loss": 1.0507, "step": 600 }, { "epoch": 0.030466486864449107, "grad_norm": 2.4331166744232178, "learning_rate": 9.994275446350256e-05, "loss": 0.9642, "step": 610 }, { "epoch": 0.030965937468784337, "grad_norm": 1.337424397468567, "learning_rate": 9.994086254932078e-05, "loss": 1.0701, "step": 620 }, { "epoch": 0.03146538807311957, "grad_norm": 5.322749614715576, "learning_rate": 9.99389398967674e-05, "loss": 1.0151, "step": 630 }, { "epoch": 0.0319648386774548, "grad_norm": 1.6098060607910156, "learning_rate": 9.993698650702578e-05, "loss": 1.0929, "step": 640 }, { "epoch": 0.03246428928179003, "grad_norm": 2.8981151580810547, "learning_rate": 9.993500238129824e-05, "loss": 0.9528, "step": 650 }, { "epoch": 0.03296373988612526, "grad_norm": 2.6054182052612305, "learning_rate": 9.9932987520806e-05, "loss": 0.799, "step": 660 }, { "epoch": 0.033463190490460495, "grad_norm": 4.730319976806641, "learning_rate": 9.993094192678918e-05, "loss": 1.2382, "step": 670 }, { "epoch": 0.03396264109479572, "grad_norm": 2.3788576126098633, "learning_rate": 9.992886560050686e-05, "loss": 0.7042, "step": 680 }, { "epoch": 0.034462091699130955, "grad_norm": 9.173518180847168, "learning_rate": 9.9926758543237e-05, "loss": 0.9632, "step": 690 }, { "epoch": 0.03496154230346619, "grad_norm": 3.4788527488708496, "learning_rate": 9.992462075627646e-05, "loss": 0.7926, "step": 700 }, { "epoch": 0.03546099290780142, "grad_norm": 2.7496581077575684, "learning_rate": 9.992245224094109e-05, "loss": 1.0237, "step": 710 }, { "epoch": 0.03596044351213665, "grad_norm": 2.733050584793091, "learning_rate": 9.992025299856556e-05, "loss": 0.772, "step": 720 }, { "epoch": 0.03645989411647188, "grad_norm": 0.8263514041900635, "learning_rate": 9.99180230305035e-05, "loss": 0.9264, "step": 730 }, { "epoch": 0.036959344720807114, "grad_norm": 3.485135555267334, "learning_rate": 9.991576233812745e-05, "loss": 1.057, "step": 740 }, { "epoch": 0.03745879532514235, "grad_norm": 0.6603259444236755, "learning_rate": 9.991347092282885e-05, "loss": 0.6896, "step": 750 }, { "epoch": 0.03795824592947757, "grad_norm": 4.332391262054443, "learning_rate": 9.991114878601806e-05, "loss": 0.8355, "step": 760 }, { "epoch": 0.038457696533812806, "grad_norm": 4.493191242218018, "learning_rate": 9.990879592912436e-05, "loss": 0.8238, "step": 770 }, { "epoch": 0.03895714713814804, "grad_norm": 2.2180371284484863, "learning_rate": 9.99064123535959e-05, "loss": 0.7578, "step": 780 }, { "epoch": 0.039456597742483265, "grad_norm": 1.2803512811660767, "learning_rate": 9.990399806089976e-05, "loss": 0.7661, "step": 790 }, { "epoch": 0.0399560483468185, "grad_norm": 1.5827938318252563, "learning_rate": 9.990155305252194e-05, "loss": 0.8578, "step": 800 }, { "epoch": 0.04045549895115373, "grad_norm": 3.514148473739624, "learning_rate": 9.989907732996733e-05, "loss": 0.7647, "step": 810 }, { "epoch": 0.040954949555488965, "grad_norm": 4.280004024505615, "learning_rate": 9.989657089475972e-05, "loss": 0.9934, "step": 820 }, { "epoch": 0.04145440015982419, "grad_norm": 3.2037782669067383, "learning_rate": 9.98940337484418e-05, "loss": 0.9177, "step": 830 }, { "epoch": 0.041953850764159424, "grad_norm": 3.360799551010132, "learning_rate": 9.989146589257519e-05, "loss": 0.926, "step": 840 }, { "epoch": 0.04245330136849466, "grad_norm": 2.9952991008758545, "learning_rate": 9.98888673287404e-05, "loss": 0.9762, "step": 850 }, { "epoch": 0.04295275197282989, "grad_norm": 0.840907871723175, "learning_rate": 9.988623805853679e-05, "loss": 0.731, "step": 860 }, { "epoch": 0.04345220257716512, "grad_norm": 2.830888509750366, "learning_rate": 9.988357808358272e-05, "loss": 0.8859, "step": 870 }, { "epoch": 0.04395165318150035, "grad_norm": 2.584847927093506, "learning_rate": 9.988088740551535e-05, "loss": 0.9814, "step": 880 }, { "epoch": 0.04445110378583558, "grad_norm": 4.042998313903809, "learning_rate": 9.98781660259908e-05, "loss": 1.0667, "step": 890 }, { "epoch": 0.04495055439017081, "grad_norm": 3.0407557487487793, "learning_rate": 9.987541394668407e-05, "loss": 0.9815, "step": 900 }, { "epoch": 0.04545000499450604, "grad_norm": 4.644862651824951, "learning_rate": 9.987263116928903e-05, "loss": 1.0614, "step": 910 }, { "epoch": 0.045949455598841275, "grad_norm": 4.128206253051758, "learning_rate": 9.98698176955185e-05, "loss": 0.853, "step": 920 }, { "epoch": 0.04644890620317651, "grad_norm": 1.7882283926010132, "learning_rate": 9.986697352710413e-05, "loss": 0.7559, "step": 930 }, { "epoch": 0.046948356807511735, "grad_norm": 2.259190320968628, "learning_rate": 9.98640986657965e-05, "loss": 1.1111, "step": 940 }, { "epoch": 0.04744780741184697, "grad_norm": 2.4604718685150146, "learning_rate": 9.986119311336509e-05, "loss": 0.9216, "step": 950 }, { "epoch": 0.0479472580161822, "grad_norm": 1.7957441806793213, "learning_rate": 9.985825687159823e-05, "loss": 0.8028, "step": 960 }, { "epoch": 0.048446708620517434, "grad_norm": 3.120357036590576, "learning_rate": 9.985528994230318e-05, "loss": 0.8791, "step": 970 }, { "epoch": 0.04894615922485266, "grad_norm": 4.274290084838867, "learning_rate": 9.985229232730607e-05, "loss": 0.8734, "step": 980 }, { "epoch": 0.04944560982918789, "grad_norm": 1.9140256643295288, "learning_rate": 9.984926402845192e-05, "loss": 1.0805, "step": 990 }, { "epoch": 0.04994506043352313, "grad_norm": 2.572977066040039, "learning_rate": 9.984620504760462e-05, "loss": 1.1131, "step": 1000 }, { "epoch": 0.05044451103785835, "grad_norm": 5.084359645843506, "learning_rate": 9.984311538664697e-05, "loss": 0.8575, "step": 1010 }, { "epoch": 0.050943961642193586, "grad_norm": 11.383241653442383, "learning_rate": 9.983999504748065e-05, "loss": 1.1465, "step": 1020 }, { "epoch": 0.05144341224652882, "grad_norm": 1.993849277496338, "learning_rate": 9.98368440320262e-05, "loss": 0.9018, "step": 1030 }, { "epoch": 0.05194286285086405, "grad_norm": 5.786123275756836, "learning_rate": 9.983366234222305e-05, "loss": 1.0299, "step": 1040 }, { "epoch": 0.05244231345519928, "grad_norm": 2.2412679195404053, "learning_rate": 9.983044998002953e-05, "loss": 0.7733, "step": 1050 }, { "epoch": 0.05294176405953451, "grad_norm": 2.106492042541504, "learning_rate": 9.982720694742284e-05, "loss": 0.8868, "step": 1060 }, { "epoch": 0.053441214663869745, "grad_norm": 3.3351902961730957, "learning_rate": 9.982393324639902e-05, "loss": 0.9423, "step": 1070 }, { "epoch": 0.05394066526820498, "grad_norm": 4.159750938415527, "learning_rate": 9.982062887897307e-05, "loss": 0.7502, "step": 1080 }, { "epoch": 0.054440115872540204, "grad_norm": 4.879648685455322, "learning_rate": 9.981729384717876e-05, "loss": 0.9594, "step": 1090 }, { "epoch": 0.05493956647687544, "grad_norm": 8.314805030822754, "learning_rate": 9.981392815306882e-05, "loss": 1.1986, "step": 1100 }, { "epoch": 0.05543901708121067, "grad_norm": 1.7291924953460693, "learning_rate": 9.98105317987148e-05, "loss": 0.9061, "step": 1110 }, { "epoch": 0.055938467685545896, "grad_norm": 2.105881929397583, "learning_rate": 9.980710478620717e-05, "loss": 0.9843, "step": 1120 }, { "epoch": 0.05643791828988113, "grad_norm": 2.115079402923584, "learning_rate": 9.980364711765519e-05, "loss": 1.0894, "step": 1130 }, { "epoch": 0.05693736889421636, "grad_norm": 2.8204243183135986, "learning_rate": 9.980015879518707e-05, "loss": 0.9992, "step": 1140 }, { "epoch": 0.057436819498551596, "grad_norm": 7.11791467666626, "learning_rate": 9.979663982094987e-05, "loss": 0.8877, "step": 1150 }, { "epoch": 0.05793627010288682, "grad_norm": 1.6929785013198853, "learning_rate": 9.979309019710949e-05, "loss": 0.8585, "step": 1160 }, { "epoch": 0.058435720707222055, "grad_norm": 3.3160712718963623, "learning_rate": 9.978950992585069e-05, "loss": 0.8392, "step": 1170 }, { "epoch": 0.05893517131155729, "grad_norm": 3.6795084476470947, "learning_rate": 9.978589900937714e-05, "loss": 0.9824, "step": 1180 }, { "epoch": 0.05943462191589252, "grad_norm": 2.2378451824188232, "learning_rate": 9.978225744991133e-05, "loss": 0.8501, "step": 1190 }, { "epoch": 0.05993407252022775, "grad_norm": 1.5347133874893188, "learning_rate": 9.97785852496946e-05, "loss": 0.9016, "step": 1200 }, { "epoch": 0.06043352312456298, "grad_norm": 2.2190093994140625, "learning_rate": 9.97748824109872e-05, "loss": 0.8335, "step": 1210 }, { "epoch": 0.060932973728898214, "grad_norm": 2.3186557292938232, "learning_rate": 9.977114893606822e-05, "loss": 1.0573, "step": 1220 }, { "epoch": 0.06143242433323344, "grad_norm": 5.181076526641846, "learning_rate": 9.976738482723557e-05, "loss": 1.1189, "step": 1230 }, { "epoch": 0.06193187493756867, "grad_norm": 0.9674704074859619, "learning_rate": 9.976359008680605e-05, "loss": 0.6374, "step": 1240 }, { "epoch": 0.062431325541903906, "grad_norm": 4.01783561706543, "learning_rate": 9.97597647171153e-05, "loss": 0.9386, "step": 1250 }, { "epoch": 0.06293077614623914, "grad_norm": 4.014340400695801, "learning_rate": 9.975590872051783e-05, "loss": 0.8924, "step": 1260 }, { "epoch": 0.06343022675057437, "grad_norm": 2.0934877395629883, "learning_rate": 9.9752022099387e-05, "loss": 0.7906, "step": 1270 }, { "epoch": 0.0639296773549096, "grad_norm": 3.8254194259643555, "learning_rate": 9.974810485611497e-05, "loss": 1.0506, "step": 1280 }, { "epoch": 0.06442912795924483, "grad_norm": 2.7336432933807373, "learning_rate": 9.974415699311282e-05, "loss": 1.0317, "step": 1290 }, { "epoch": 0.06492857856358006, "grad_norm": 1.1078413724899292, "learning_rate": 9.974017851281041e-05, "loss": 1.0106, "step": 1300 }, { "epoch": 0.06542802916791529, "grad_norm": 6.520686626434326, "learning_rate": 9.97361694176565e-05, "loss": 1.0209, "step": 1310 }, { "epoch": 0.06592747977225052, "grad_norm": 4.254242897033691, "learning_rate": 9.973212971011868e-05, "loss": 0.9462, "step": 1320 }, { "epoch": 0.06642693037658576, "grad_norm": 1.754723310470581, "learning_rate": 9.972805939268332e-05, "loss": 0.7216, "step": 1330 }, { "epoch": 0.06692638098092099, "grad_norm": 1.4408202171325684, "learning_rate": 9.972395846785574e-05, "loss": 1.033, "step": 1340 }, { "epoch": 0.06742583158525622, "grad_norm": 1.896499514579773, "learning_rate": 9.971982693816001e-05, "loss": 0.8383, "step": 1350 }, { "epoch": 0.06792528218959144, "grad_norm": 2.5673983097076416, "learning_rate": 9.971566480613907e-05, "loss": 1.0443, "step": 1360 }, { "epoch": 0.06842473279392668, "grad_norm": 2.175894260406494, "learning_rate": 9.971147207435471e-05, "loss": 1.0507, "step": 1370 }, { "epoch": 0.06892418339826191, "grad_norm": 2.4703495502471924, "learning_rate": 9.970724874538753e-05, "loss": 0.8492, "step": 1380 }, { "epoch": 0.06942363400259714, "grad_norm": 3.719816207885742, "learning_rate": 9.970299482183694e-05, "loss": 1.024, "step": 1390 }, { "epoch": 0.06992308460693238, "grad_norm": 2.802920341491699, "learning_rate": 9.969871030632125e-05, "loss": 1.0782, "step": 1400 }, { "epoch": 0.07042253521126761, "grad_norm": 5.848260402679443, "learning_rate": 9.969439520147754e-05, "loss": 0.996, "step": 1410 }, { "epoch": 0.07092198581560284, "grad_norm": 3.2062735557556152, "learning_rate": 9.969004950996175e-05, "loss": 1.0682, "step": 1420 }, { "epoch": 0.07142143641993806, "grad_norm": 1.4582406282424927, "learning_rate": 9.968567323444862e-05, "loss": 0.6954, "step": 1430 }, { "epoch": 0.0719208870242733, "grad_norm": 1.4951964616775513, "learning_rate": 9.968126637763173e-05, "loss": 1.0056, "step": 1440 }, { "epoch": 0.07242033762860853, "grad_norm": 2.992192029953003, "learning_rate": 9.967682894222348e-05, "loss": 0.8429, "step": 1450 }, { "epoch": 0.07291978823294376, "grad_norm": 2.689845085144043, "learning_rate": 9.967236093095509e-05, "loss": 1.0161, "step": 1460 }, { "epoch": 0.073419238837279, "grad_norm": 1.2474361658096313, "learning_rate": 9.96678623465766e-05, "loss": 0.9082, "step": 1470 }, { "epoch": 0.07391868944161423, "grad_norm": 2.83001971244812, "learning_rate": 9.966333319185687e-05, "loss": 1.1928, "step": 1480 }, { "epoch": 0.07441814004594946, "grad_norm": 2.0565037727355957, "learning_rate": 9.96587734695836e-05, "loss": 0.8594, "step": 1490 }, { "epoch": 0.0749175906502847, "grad_norm": 4.0485148429870605, "learning_rate": 9.965418318256323e-05, "loss": 0.8476, "step": 1500 }, { "epoch": 0.07541704125461991, "grad_norm": 1.5409923791885376, "learning_rate": 9.964956233362111e-05, "loss": 0.9508, "step": 1510 }, { "epoch": 0.07591649185895515, "grad_norm": 3.438107490539551, "learning_rate": 9.96449109256013e-05, "loss": 0.8173, "step": 1520 }, { "epoch": 0.07641594246329038, "grad_norm": 1.3228156566619873, "learning_rate": 9.964022896136675e-05, "loss": 0.6898, "step": 1530 }, { "epoch": 0.07691539306762561, "grad_norm": 1.1662561893463135, "learning_rate": 9.96355164437992e-05, "loss": 0.7646, "step": 1540 }, { "epoch": 0.07741484367196085, "grad_norm": 3.2023818492889404, "learning_rate": 9.963077337579916e-05, "loss": 0.923, "step": 1550 }, { "epoch": 0.07791429427629608, "grad_norm": 2.6391987800598145, "learning_rate": 9.962599976028596e-05, "loss": 1.0986, "step": 1560 }, { "epoch": 0.07841374488063131, "grad_norm": 3.005326747894287, "learning_rate": 9.962119560019776e-05, "loss": 0.7723, "step": 1570 }, { "epoch": 0.07891319548496653, "grad_norm": 3.3779022693634033, "learning_rate": 9.961636089849149e-05, "loss": 0.8936, "step": 1580 }, { "epoch": 0.07941264608930176, "grad_norm": 2.644306182861328, "learning_rate": 9.96114956581429e-05, "loss": 0.7907, "step": 1590 }, { "epoch": 0.079912096693637, "grad_norm": 3.233680009841919, "learning_rate": 9.960659988214649e-05, "loss": 1.0045, "step": 1600 }, { "epoch": 0.08041154729797223, "grad_norm": 1.8452726602554321, "learning_rate": 9.96016735735156e-05, "loss": 0.8637, "step": 1610 }, { "epoch": 0.08091099790230746, "grad_norm": 3.7355189323425293, "learning_rate": 9.959671673528238e-05, "loss": 1.1491, "step": 1620 }, { "epoch": 0.0814104485066427, "grad_norm": 3.3520915508270264, "learning_rate": 9.95917293704977e-05, "loss": 0.8111, "step": 1630 }, { "epoch": 0.08190989911097793, "grad_norm": 1.7798441648483276, "learning_rate": 9.95867114822313e-05, "loss": 0.8114, "step": 1640 }, { "epoch": 0.08240934971531315, "grad_norm": 3.374290704727173, "learning_rate": 9.958166307357163e-05, "loss": 0.7545, "step": 1650 }, { "epoch": 0.08290880031964838, "grad_norm": 3.4847121238708496, "learning_rate": 9.957658414762598e-05, "loss": 0.9837, "step": 1660 }, { "epoch": 0.08340825092398362, "grad_norm": 2.798218011856079, "learning_rate": 9.957147470752042e-05, "loss": 0.676, "step": 1670 }, { "epoch": 0.08390770152831885, "grad_norm": 1.433990716934204, "learning_rate": 9.956633475639975e-05, "loss": 1.0011, "step": 1680 }, { "epoch": 0.08440715213265408, "grad_norm": 3.560073137283325, "learning_rate": 9.956116429742761e-05, "loss": 0.9904, "step": 1690 }, { "epoch": 0.08490660273698931, "grad_norm": 2.9942626953125, "learning_rate": 9.95559633337864e-05, "loss": 0.9149, "step": 1700 }, { "epoch": 0.08540605334132455, "grad_norm": 5.4364495277404785, "learning_rate": 9.955073186867728e-05, "loss": 0.893, "step": 1710 }, { "epoch": 0.08590550394565978, "grad_norm": 1.7060109376907349, "learning_rate": 9.954546990532019e-05, "loss": 0.8281, "step": 1720 }, { "epoch": 0.086404954549995, "grad_norm": 3.2188754081726074, "learning_rate": 9.954017744695386e-05, "loss": 0.9606, "step": 1730 }, { "epoch": 0.08690440515433023, "grad_norm": 2.1602442264556885, "learning_rate": 9.953485449683576e-05, "loss": 0.8535, "step": 1740 }, { "epoch": 0.08740385575866547, "grad_norm": 2.050398349761963, "learning_rate": 9.952950105824211e-05, "loss": 0.8366, "step": 1750 }, { "epoch": 0.0879033063630007, "grad_norm": 1.8876922130584717, "learning_rate": 9.952411713446798e-05, "loss": 0.7703, "step": 1760 }, { "epoch": 0.08840275696733593, "grad_norm": 1.2664496898651123, "learning_rate": 9.951870272882713e-05, "loss": 0.7808, "step": 1770 }, { "epoch": 0.08890220757167117, "grad_norm": 1.7820614576339722, "learning_rate": 9.95132578446521e-05, "loss": 1.0168, "step": 1780 }, { "epoch": 0.0894016581760064, "grad_norm": 3.8653557300567627, "learning_rate": 9.95077824852942e-05, "loss": 1.0786, "step": 1790 }, { "epoch": 0.08990110878034162, "grad_norm": 2.9980475902557373, "learning_rate": 9.950227665412349e-05, "loss": 0.8898, "step": 1800 }, { "epoch": 0.09040055938467685, "grad_norm": 1.1599256992340088, "learning_rate": 9.949674035452877e-05, "loss": 0.7905, "step": 1810 }, { "epoch": 0.09090000998901208, "grad_norm": 6.718392848968506, "learning_rate": 9.94911735899176e-05, "loss": 0.8412, "step": 1820 }, { "epoch": 0.09139946059334732, "grad_norm": 2.9782862663269043, "learning_rate": 9.94855763637163e-05, "loss": 0.7786, "step": 1830 }, { "epoch": 0.09189891119768255, "grad_norm": 5.974144458770752, "learning_rate": 9.947994867936997e-05, "loss": 0.7711, "step": 1840 }, { "epoch": 0.09239836180201778, "grad_norm": 5.793637275695801, "learning_rate": 9.947429054034238e-05, "loss": 0.9684, "step": 1850 }, { "epoch": 0.09289781240635302, "grad_norm": 3.8431828022003174, "learning_rate": 9.946860195011614e-05, "loss": 0.9766, "step": 1860 }, { "epoch": 0.09339726301068824, "grad_norm": 3.748358964920044, "learning_rate": 9.94628829121925e-05, "loss": 1.1646, "step": 1870 }, { "epoch": 0.09389671361502347, "grad_norm": 1.4407458305358887, "learning_rate": 9.945713343009153e-05, "loss": 0.8303, "step": 1880 }, { "epoch": 0.0943961642193587, "grad_norm": 1.9862149953842163, "learning_rate": 9.945135350735199e-05, "loss": 0.8215, "step": 1890 }, { "epoch": 0.09489561482369394, "grad_norm": 1.7664053440093994, "learning_rate": 9.944554314753143e-05, "loss": 0.9924, "step": 1900 }, { "epoch": 0.09539506542802917, "grad_norm": 2.36393404006958, "learning_rate": 9.943970235420605e-05, "loss": 0.7561, "step": 1910 }, { "epoch": 0.0958945160323644, "grad_norm": 3.7872016429901123, "learning_rate": 9.943383113097089e-05, "loss": 0.9625, "step": 1920 }, { "epoch": 0.09639396663669964, "grad_norm": 3.44275164604187, "learning_rate": 9.94279294814396e-05, "loss": 0.8123, "step": 1930 }, { "epoch": 0.09689341724103487, "grad_norm": 2.9458820819854736, "learning_rate": 9.942199740924467e-05, "loss": 0.8189, "step": 1940 }, { "epoch": 0.09739286784537009, "grad_norm": 3.4364569187164307, "learning_rate": 9.941603491803724e-05, "loss": 0.9232, "step": 1950 }, { "epoch": 0.09789231844970532, "grad_norm": 2.805171251296997, "learning_rate": 9.94100420114872e-05, "loss": 1.0483, "step": 1960 }, { "epoch": 0.09839176905404055, "grad_norm": 0.2529987394809723, "learning_rate": 9.940401869328314e-05, "loss": 0.7037, "step": 1970 }, { "epoch": 0.09889121965837579, "grad_norm": 3.3802037239074707, "learning_rate": 9.93979649671324e-05, "loss": 0.796, "step": 1980 }, { "epoch": 0.09939067026271102, "grad_norm": 1.818002700805664, "learning_rate": 9.939188083676103e-05, "loss": 0.6942, "step": 1990 }, { "epoch": 0.09989012086704625, "grad_norm": 1.597109317779541, "learning_rate": 9.938576630591377e-05, "loss": 1.0238, "step": 2000 }, { "epoch": 0.10038957147138149, "grad_norm": 1.5362275838851929, "learning_rate": 9.93796213783541e-05, "loss": 0.7831, "step": 2010 }, { "epoch": 0.1008890220757167, "grad_norm": 1.4740239381790161, "learning_rate": 9.937344605786416e-05, "loss": 0.6369, "step": 2020 }, { "epoch": 0.10138847268005194, "grad_norm": 5.3471150398254395, "learning_rate": 9.936724034824487e-05, "loss": 0.9997, "step": 2030 }, { "epoch": 0.10188792328438717, "grad_norm": 3.965418577194214, "learning_rate": 9.93610042533158e-05, "loss": 0.7309, "step": 2040 }, { "epoch": 0.1023873738887224, "grad_norm": 1.7020810842514038, "learning_rate": 9.935473777691526e-05, "loss": 1.1524, "step": 2050 }, { "epoch": 0.10288682449305764, "grad_norm": 1.3779456615447998, "learning_rate": 9.934844092290019e-05, "loss": 0.7777, "step": 2060 }, { "epoch": 0.10338627509739287, "grad_norm": 7.693228244781494, "learning_rate": 9.93421136951463e-05, "loss": 1.2078, "step": 2070 }, { "epoch": 0.1038857257017281, "grad_norm": 2.664365768432617, "learning_rate": 9.9335756097548e-05, "loss": 0.8353, "step": 2080 }, { "epoch": 0.10438517630606332, "grad_norm": 3.8594837188720703, "learning_rate": 9.93293681340183e-05, "loss": 1.0259, "step": 2090 }, { "epoch": 0.10488462691039856, "grad_norm": 1.5903313159942627, "learning_rate": 9.9322949808489e-05, "loss": 0.8916, "step": 2100 }, { "epoch": 0.10538407751473379, "grad_norm": 1.939350962638855, "learning_rate": 9.931650112491057e-05, "loss": 1.0823, "step": 2110 }, { "epoch": 0.10588352811906902, "grad_norm": 2.0447142124176025, "learning_rate": 9.93100220872521e-05, "loss": 1.0213, "step": 2120 }, { "epoch": 0.10638297872340426, "grad_norm": 2.2896556854248047, "learning_rate": 9.930351269950143e-05, "loss": 0.8663, "step": 2130 }, { "epoch": 0.10688242932773949, "grad_norm": 1.772435188293457, "learning_rate": 9.929697296566507e-05, "loss": 0.8649, "step": 2140 }, { "epoch": 0.10738187993207472, "grad_norm": 2.264937162399292, "learning_rate": 9.929040288976816e-05, "loss": 0.948, "step": 2150 }, { "epoch": 0.10788133053640996, "grad_norm": 2.3183889389038086, "learning_rate": 9.92838024758546e-05, "loss": 0.8205, "step": 2160 }, { "epoch": 0.10838078114074517, "grad_norm": 9.26515007019043, "learning_rate": 9.927717172798687e-05, "loss": 0.8452, "step": 2170 }, { "epoch": 0.10888023174508041, "grad_norm": 2.1538867950439453, "learning_rate": 9.92705106502462e-05, "loss": 0.8233, "step": 2180 }, { "epoch": 0.10937968234941564, "grad_norm": 2.3352103233337402, "learning_rate": 9.926381924673241e-05, "loss": 1.1426, "step": 2190 }, { "epoch": 0.10987913295375087, "grad_norm": 0.5940951108932495, "learning_rate": 9.925709752156407e-05, "loss": 0.7086, "step": 2200 }, { "epoch": 0.11037858355808611, "grad_norm": 2.1471216678619385, "learning_rate": 9.925034547887837e-05, "loss": 1.1785, "step": 2210 }, { "epoch": 0.11087803416242134, "grad_norm": 1.97718346118927, "learning_rate": 9.924356312283113e-05, "loss": 0.8249, "step": 2220 }, { "epoch": 0.11137748476675657, "grad_norm": 2.9583914279937744, "learning_rate": 9.923675045759689e-05, "loss": 0.7702, "step": 2230 }, { "epoch": 0.11187693537109179, "grad_norm": 0.724244236946106, "learning_rate": 9.922990748736877e-05, "loss": 0.8585, "step": 2240 }, { "epoch": 0.11237638597542703, "grad_norm": 1.6883105039596558, "learning_rate": 9.922303421635864e-05, "loss": 0.7324, "step": 2250 }, { "epoch": 0.11287583657976226, "grad_norm": 1.215675711631775, "learning_rate": 9.921613064879695e-05, "loss": 0.9073, "step": 2260 }, { "epoch": 0.11337528718409749, "grad_norm": 4.0062031745910645, "learning_rate": 9.920919678893278e-05, "loss": 0.9143, "step": 2270 }, { "epoch": 0.11387473778843273, "grad_norm": 1.8300977945327759, "learning_rate": 9.920223264103395e-05, "loss": 0.9411, "step": 2280 }, { "epoch": 0.11437418839276796, "grad_norm": 1.9074162244796753, "learning_rate": 9.919523820938681e-05, "loss": 0.8758, "step": 2290 }, { "epoch": 0.11487363899710319, "grad_norm": 3.277357339859009, "learning_rate": 9.918821349829641e-05, "loss": 1.0738, "step": 2300 }, { "epoch": 0.11537308960143841, "grad_norm": 1.000112533569336, "learning_rate": 9.918115851208644e-05, "loss": 0.9844, "step": 2310 }, { "epoch": 0.11587254020577364, "grad_norm": 2.460766315460205, "learning_rate": 9.917407325509922e-05, "loss": 0.8265, "step": 2320 }, { "epoch": 0.11637199081010888, "grad_norm": 3.4657835960388184, "learning_rate": 9.916695773169569e-05, "loss": 1.0271, "step": 2330 }, { "epoch": 0.11687144141444411, "grad_norm": 6.398989677429199, "learning_rate": 9.915981194625539e-05, "loss": 1.0857, "step": 2340 }, { "epoch": 0.11737089201877934, "grad_norm": 3.102544069290161, "learning_rate": 9.915263590317654e-05, "loss": 0.9166, "step": 2350 }, { "epoch": 0.11787034262311458, "grad_norm": 1.6086996793746948, "learning_rate": 9.914542960687597e-05, "loss": 0.7819, "step": 2360 }, { "epoch": 0.11836979322744981, "grad_norm": 3.1169967651367188, "learning_rate": 9.913819306178912e-05, "loss": 0.8216, "step": 2370 }, { "epoch": 0.11886924383178504, "grad_norm": 3.2373907566070557, "learning_rate": 9.913092627237004e-05, "loss": 0.9111, "step": 2380 }, { "epoch": 0.11936869443612026, "grad_norm": 1.271636962890625, "learning_rate": 9.91236292430914e-05, "loss": 0.7123, "step": 2390 }, { "epoch": 0.1198681450404555, "grad_norm": 1.2871203422546387, "learning_rate": 9.91163019784445e-05, "loss": 0.9538, "step": 2400 }, { "epoch": 0.12036759564479073, "grad_norm": 3.983081579208374, "learning_rate": 9.910894448293926e-05, "loss": 1.1022, "step": 2410 }, { "epoch": 0.12086704624912596, "grad_norm": 1.4097563028335571, "learning_rate": 9.910155676110412e-05, "loss": 0.9296, "step": 2420 }, { "epoch": 0.1213664968534612, "grad_norm": 2.7831010818481445, "learning_rate": 9.909413881748627e-05, "loss": 0.7646, "step": 2430 }, { "epoch": 0.12186594745779643, "grad_norm": 2.1280717849731445, "learning_rate": 9.908669065665137e-05, "loss": 0.8786, "step": 2440 }, { "epoch": 0.12236539806213166, "grad_norm": 2.318333148956299, "learning_rate": 9.907921228318373e-05, "loss": 0.8555, "step": 2450 }, { "epoch": 0.12286484866646688, "grad_norm": 4.173869609832764, "learning_rate": 9.907170370168626e-05, "loss": 0.9869, "step": 2460 }, { "epoch": 0.12336429927080211, "grad_norm": 2.4868521690368652, "learning_rate": 9.906416491678047e-05, "loss": 0.7005, "step": 2470 }, { "epoch": 0.12386374987513735, "grad_norm": 4.937372207641602, "learning_rate": 9.905659593310643e-05, "loss": 0.999, "step": 2480 }, { "epoch": 0.12436320047947258, "grad_norm": 2.488497018814087, "learning_rate": 9.904899675532282e-05, "loss": 0.9049, "step": 2490 }, { "epoch": 0.12486265108380781, "grad_norm": 1.3979971408843994, "learning_rate": 9.904136738810692e-05, "loss": 0.7388, "step": 2500 }, { "epoch": 0.12536210168814305, "grad_norm": 0.7958292961120605, "learning_rate": 9.903370783615453e-05, "loss": 0.7922, "step": 2510 }, { "epoch": 0.12586155229247828, "grad_norm": 1.8274697065353394, "learning_rate": 9.902601810418011e-05, "loss": 0.8086, "step": 2520 }, { "epoch": 0.1263610028968135, "grad_norm": 3.842331886291504, "learning_rate": 9.901829819691662e-05, "loss": 0.9567, "step": 2530 }, { "epoch": 0.12686045350114875, "grad_norm": 3.233276128768921, "learning_rate": 9.901054811911565e-05, "loss": 0.9538, "step": 2540 }, { "epoch": 0.12735990410548398, "grad_norm": 2.3809590339660645, "learning_rate": 9.900276787554734e-05, "loss": 0.9824, "step": 2550 }, { "epoch": 0.1278593547098192, "grad_norm": 12.891764640808105, "learning_rate": 9.899495747100037e-05, "loss": 0.9333, "step": 2560 }, { "epoch": 0.12835880531415442, "grad_norm": 1.6126281023025513, "learning_rate": 9.898711691028204e-05, "loss": 0.8228, "step": 2570 }, { "epoch": 0.12885825591848965, "grad_norm": 1.4332327842712402, "learning_rate": 9.897924619821815e-05, "loss": 1.1386, "step": 2580 }, { "epoch": 0.12935770652282488, "grad_norm": 1.1457003355026245, "learning_rate": 9.89713453396531e-05, "loss": 1.0789, "step": 2590 }, { "epoch": 0.12985715712716012, "grad_norm": 3.1065938472747803, "learning_rate": 9.896341433944983e-05, "loss": 0.8222, "step": 2600 }, { "epoch": 0.13035660773149535, "grad_norm": 2.5499441623687744, "learning_rate": 9.895545320248985e-05, "loss": 0.8682, "step": 2610 }, { "epoch": 0.13085605833583058, "grad_norm": 2.2303380966186523, "learning_rate": 9.894746193367317e-05, "loss": 0.8319, "step": 2620 }, { "epoch": 0.13135550894016582, "grad_norm": 1.9166508913040161, "learning_rate": 9.893944053791841e-05, "loss": 1.1738, "step": 2630 }, { "epoch": 0.13185495954450105, "grad_norm": 2.200120210647583, "learning_rate": 9.893138902016267e-05, "loss": 0.7885, "step": 2640 }, { "epoch": 0.13235441014883628, "grad_norm": 4.75616455078125, "learning_rate": 9.892330738536167e-05, "loss": 1.014, "step": 2650 }, { "epoch": 0.13285386075317152, "grad_norm": 2.533257484436035, "learning_rate": 9.891519563848959e-05, "loss": 0.9097, "step": 2660 }, { "epoch": 0.13335331135750675, "grad_norm": 1.3194255828857422, "learning_rate": 9.890705378453917e-05, "loss": 1.2093, "step": 2670 }, { "epoch": 0.13385276196184198, "grad_norm": 2.478529453277588, "learning_rate": 9.889888182852169e-05, "loss": 0.9337, "step": 2680 }, { "epoch": 0.13435221256617721, "grad_norm": 1.3562705516815186, "learning_rate": 9.889067977546694e-05, "loss": 0.7337, "step": 2690 }, { "epoch": 0.13485166317051245, "grad_norm": 1.4239592552185059, "learning_rate": 9.888244763042327e-05, "loss": 0.9373, "step": 2700 }, { "epoch": 0.13535111377484768, "grad_norm": 2.2055132389068604, "learning_rate": 9.88741853984575e-05, "loss": 0.8111, "step": 2710 }, { "epoch": 0.1358505643791829, "grad_norm": 1.5610190629959106, "learning_rate": 9.886589308465501e-05, "loss": 1.0326, "step": 2720 }, { "epoch": 0.13635001498351812, "grad_norm": 1.9877070188522339, "learning_rate": 9.88575706941197e-05, "loss": 0.8532, "step": 2730 }, { "epoch": 0.13684946558785335, "grad_norm": 1.881638526916504, "learning_rate": 9.884921823197392e-05, "loss": 0.7869, "step": 2740 }, { "epoch": 0.13734891619218859, "grad_norm": 1.3000985383987427, "learning_rate": 9.88408357033586e-05, "loss": 1.1506, "step": 2750 }, { "epoch": 0.13784836679652382, "grad_norm": 3.4877495765686035, "learning_rate": 9.883242311343314e-05, "loss": 1.005, "step": 2760 }, { "epoch": 0.13834781740085905, "grad_norm": 2.5101332664489746, "learning_rate": 9.882398046737547e-05, "loss": 0.8723, "step": 2770 }, { "epoch": 0.13884726800519429, "grad_norm": 1.4676083326339722, "learning_rate": 9.881550777038197e-05, "loss": 0.9566, "step": 2780 }, { "epoch": 0.13934671860952952, "grad_norm": 4.2586283683776855, "learning_rate": 9.880700502766758e-05, "loss": 0.9785, "step": 2790 }, { "epoch": 0.13984616921386475, "grad_norm": 6.4517717361450195, "learning_rate": 9.879847224446566e-05, "loss": 1.0445, "step": 2800 }, { "epoch": 0.14034561981819998, "grad_norm": 2.9773175716400146, "learning_rate": 9.878990942602813e-05, "loss": 0.816, "step": 2810 }, { "epoch": 0.14084507042253522, "grad_norm": 2.0519254207611084, "learning_rate": 9.878131657762535e-05, "loss": 0.8386, "step": 2820 }, { "epoch": 0.14134452102687045, "grad_norm": 1.1298130750656128, "learning_rate": 9.877269370454618e-05, "loss": 0.659, "step": 2830 }, { "epoch": 0.14184397163120568, "grad_norm": 1.5811046361923218, "learning_rate": 9.876404081209796e-05, "loss": 0.7454, "step": 2840 }, { "epoch": 0.14234342223554092, "grad_norm": 1.5561637878417969, "learning_rate": 9.87553579056065e-05, "loss": 0.733, "step": 2850 }, { "epoch": 0.14284287283987612, "grad_norm": 3.4421191215515137, "learning_rate": 9.87466449904161e-05, "loss": 0.7871, "step": 2860 }, { "epoch": 0.14334232344421136, "grad_norm": 1.2579591274261475, "learning_rate": 9.873790207188952e-05, "loss": 0.8923, "step": 2870 }, { "epoch": 0.1438417740485466, "grad_norm": 0.7126845717430115, "learning_rate": 9.872912915540799e-05, "loss": 0.9842, "step": 2880 }, { "epoch": 0.14434122465288182, "grad_norm": 1.9931137561798096, "learning_rate": 9.872032624637118e-05, "loss": 0.7486, "step": 2890 }, { "epoch": 0.14484067525721706, "grad_norm": 2.537330389022827, "learning_rate": 9.871149335019725e-05, "loss": 0.9579, "step": 2900 }, { "epoch": 0.1453401258615523, "grad_norm": 2.626526355743408, "learning_rate": 9.87026304723228e-05, "loss": 1.0062, "step": 2910 }, { "epoch": 0.14583957646588752, "grad_norm": 1.5687665939331055, "learning_rate": 9.869373761820291e-05, "loss": 0.9602, "step": 2920 }, { "epoch": 0.14633902707022275, "grad_norm": 1.7398674488067627, "learning_rate": 9.868481479331107e-05, "loss": 0.7881, "step": 2930 }, { "epoch": 0.146838477674558, "grad_norm": 1.6017673015594482, "learning_rate": 9.867586200313926e-05, "loss": 0.8862, "step": 2940 }, { "epoch": 0.14733792827889322, "grad_norm": 5.415498733520508, "learning_rate": 9.866687925319786e-05, "loss": 0.8873, "step": 2950 }, { "epoch": 0.14783737888322845, "grad_norm": 1.596312165260315, "learning_rate": 9.865786654901573e-05, "loss": 0.8904, "step": 2960 }, { "epoch": 0.1483368294875637, "grad_norm": 4.682255744934082, "learning_rate": 9.864882389614014e-05, "loss": 0.842, "step": 2970 }, { "epoch": 0.14883628009189892, "grad_norm": 5.508861064910889, "learning_rate": 9.863975130013678e-05, "loss": 1.0358, "step": 2980 }, { "epoch": 0.14933573069623415, "grad_norm": 2.0030109882354736, "learning_rate": 9.863064876658983e-05, "loss": 0.9057, "step": 2990 }, { "epoch": 0.1498351813005694, "grad_norm": 4.921465873718262, "learning_rate": 9.862151630110182e-05, "loss": 1.0228, "step": 3000 }, { "epoch": 0.1503346319049046, "grad_norm": 5.827107906341553, "learning_rate": 9.861235390929378e-05, "loss": 0.8681, "step": 3010 }, { "epoch": 0.15083408250923983, "grad_norm": 1.1233668327331543, "learning_rate": 9.860316159680507e-05, "loss": 0.8535, "step": 3020 }, { "epoch": 0.15133353311357506, "grad_norm": 3.167651414871216, "learning_rate": 9.859393936929357e-05, "loss": 0.8039, "step": 3030 }, { "epoch": 0.1518329837179103, "grad_norm": 1.0371288061141968, "learning_rate": 9.858468723243549e-05, "loss": 0.9443, "step": 3040 }, { "epoch": 0.15233243432224552, "grad_norm": 0.5512734055519104, "learning_rate": 9.857540519192547e-05, "loss": 0.7964, "step": 3050 }, { "epoch": 0.15283188492658076, "grad_norm": 1.2850098609924316, "learning_rate": 9.856609325347659e-05, "loss": 0.8136, "step": 3060 }, { "epoch": 0.153331335530916, "grad_norm": 2.381633996963501, "learning_rate": 9.855675142282028e-05, "loss": 0.8199, "step": 3070 }, { "epoch": 0.15383078613525122, "grad_norm": 2.244203805923462, "learning_rate": 9.85473797057064e-05, "loss": 0.8278, "step": 3080 }, { "epoch": 0.15433023673958646, "grad_norm": 4.833874225616455, "learning_rate": 9.853797810790322e-05, "loss": 1.0593, "step": 3090 }, { "epoch": 0.1548296873439217, "grad_norm": 1.0583378076553345, "learning_rate": 9.852854663519736e-05, "loss": 0.7239, "step": 3100 }, { "epoch": 0.15532913794825692, "grad_norm": 1.1276373863220215, "learning_rate": 9.851908529339383e-05, "loss": 0.9269, "step": 3110 }, { "epoch": 0.15582858855259216, "grad_norm": 1.5166157484054565, "learning_rate": 9.850959408831609e-05, "loss": 1.0135, "step": 3120 }, { "epoch": 0.1563280391569274, "grad_norm": 0.892557680606842, "learning_rate": 9.850007302580588e-05, "loss": 0.9219, "step": 3130 }, { "epoch": 0.15682748976126262, "grad_norm": 2.4699206352233887, "learning_rate": 9.84905221117234e-05, "loss": 0.9638, "step": 3140 }, { "epoch": 0.15732694036559786, "grad_norm": 3.945955753326416, "learning_rate": 9.84809413519472e-05, "loss": 1.0434, "step": 3150 }, { "epoch": 0.15782639096993306, "grad_norm": 3.5126733779907227, "learning_rate": 9.84713307523742e-05, "loss": 0.9455, "step": 3160 }, { "epoch": 0.1583258415742683, "grad_norm": 2.5539886951446533, "learning_rate": 9.846169031891965e-05, "loss": 0.8115, "step": 3170 }, { "epoch": 0.15882529217860353, "grad_norm": 6.17609167098999, "learning_rate": 9.845202005751721e-05, "loss": 1.0244, "step": 3180 }, { "epoch": 0.15932474278293876, "grad_norm": 1.2907859086990356, "learning_rate": 9.844231997411887e-05, "loss": 0.7892, "step": 3190 }, { "epoch": 0.159824193387274, "grad_norm": 2.824439764022827, "learning_rate": 9.843259007469501e-05, "loss": 0.7057, "step": 3200 }, { "epoch": 0.16032364399160923, "grad_norm": 2.161973476409912, "learning_rate": 9.842283036523431e-05, "loss": 0.9797, "step": 3210 }, { "epoch": 0.16082309459594446, "grad_norm": 1.7337701320648193, "learning_rate": 9.841304085174386e-05, "loss": 0.8497, "step": 3220 }, { "epoch": 0.1613225452002797, "grad_norm": 2.365299701690674, "learning_rate": 9.840322154024901e-05, "loss": 1.0819, "step": 3230 }, { "epoch": 0.16182199580461493, "grad_norm": 4.2217888832092285, "learning_rate": 9.839337243679355e-05, "loss": 0.9062, "step": 3240 }, { "epoch": 0.16232144640895016, "grad_norm": 3.642833709716797, "learning_rate": 9.838349354743954e-05, "loss": 0.7498, "step": 3250 }, { "epoch": 0.1628208970132854, "grad_norm": 1.1478465795516968, "learning_rate": 9.837358487826737e-05, "loss": 1.0177, "step": 3260 }, { "epoch": 0.16332034761762063, "grad_norm": 1.1263699531555176, "learning_rate": 9.836364643537583e-05, "loss": 0.848, "step": 3270 }, { "epoch": 0.16381979822195586, "grad_norm": 3.684113025665283, "learning_rate": 9.835367822488195e-05, "loss": 1.0706, "step": 3280 }, { "epoch": 0.1643192488262911, "grad_norm": 1.0138434171676636, "learning_rate": 9.834368025292112e-05, "loss": 0.8569, "step": 3290 }, { "epoch": 0.1648186994306263, "grad_norm": 3.8836417198181152, "learning_rate": 9.833365252564706e-05, "loss": 0.6981, "step": 3300 }, { "epoch": 0.16531815003496153, "grad_norm": 4.29389762878418, "learning_rate": 9.832359504923176e-05, "loss": 0.7828, "step": 3310 }, { "epoch": 0.16581760063929676, "grad_norm": 1.0574113130569458, "learning_rate": 9.83135078298656e-05, "loss": 0.8543, "step": 3320 }, { "epoch": 0.166317051243632, "grad_norm": 3.4949302673339844, "learning_rate": 9.830339087375717e-05, "loss": 0.8726, "step": 3330 }, { "epoch": 0.16681650184796723, "grad_norm": 1.8446637392044067, "learning_rate": 9.829324418713342e-05, "loss": 0.9586, "step": 3340 }, { "epoch": 0.16731595245230246, "grad_norm": 1.1169347763061523, "learning_rate": 9.828306777623961e-05, "loss": 0.9523, "step": 3350 }, { "epoch": 0.1678154030566377, "grad_norm": 1.7979636192321777, "learning_rate": 9.827286164733926e-05, "loss": 0.9379, "step": 3360 }, { "epoch": 0.16831485366097293, "grad_norm": 1.7395380735397339, "learning_rate": 9.826262580671422e-05, "loss": 0.7038, "step": 3370 }, { "epoch": 0.16881430426530816, "grad_norm": 1.3596173524856567, "learning_rate": 9.825236026066456e-05, "loss": 0.7086, "step": 3380 }, { "epoch": 0.1693137548696434, "grad_norm": 2.8585293292999268, "learning_rate": 9.824206501550868e-05, "loss": 0.8265, "step": 3390 }, { "epoch": 0.16981320547397863, "grad_norm": 1.5410815477371216, "learning_rate": 9.823174007758328e-05, "loss": 0.905, "step": 3400 }, { "epoch": 0.17031265607831386, "grad_norm": 3.265131950378418, "learning_rate": 9.822138545324333e-05, "loss": 0.7908, "step": 3410 }, { "epoch": 0.1708121066826491, "grad_norm": 1.7951984405517578, "learning_rate": 9.821100114886201e-05, "loss": 0.7412, "step": 3420 }, { "epoch": 0.17131155728698433, "grad_norm": 5.0211029052734375, "learning_rate": 9.820058717083083e-05, "loss": 0.9029, "step": 3430 }, { "epoch": 0.17181100789131956, "grad_norm": 1.9129225015640259, "learning_rate": 9.819014352555955e-05, "loss": 0.8848, "step": 3440 }, { "epoch": 0.17231045849565477, "grad_norm": 0.9868128895759583, "learning_rate": 9.817967021947619e-05, "loss": 0.7679, "step": 3450 }, { "epoch": 0.17280990909999, "grad_norm": 1.28288996219635, "learning_rate": 9.816916725902698e-05, "loss": 0.9419, "step": 3460 }, { "epoch": 0.17330935970432523, "grad_norm": 2.4345173835754395, "learning_rate": 9.815863465067651e-05, "loss": 1.1373, "step": 3470 }, { "epoch": 0.17380881030866047, "grad_norm": 5.2141523361206055, "learning_rate": 9.81480724009075e-05, "loss": 0.9027, "step": 3480 }, { "epoch": 0.1743082609129957, "grad_norm": 0.884800136089325, "learning_rate": 9.813748051622101e-05, "loss": 0.8381, "step": 3490 }, { "epoch": 0.17480771151733093, "grad_norm": 3.8795621395111084, "learning_rate": 9.812685900313626e-05, "loss": 0.9876, "step": 3500 }, { "epoch": 0.17530716212166617, "grad_norm": 0.9977666735649109, "learning_rate": 9.811620786819074e-05, "loss": 1.1316, "step": 3510 }, { "epoch": 0.1758066127260014, "grad_norm": 1.1967836618423462, "learning_rate": 9.810552711794021e-05, "loss": 0.9226, "step": 3520 }, { "epoch": 0.17630606333033663, "grad_norm": 0.5718588829040527, "learning_rate": 9.80948167589586e-05, "loss": 0.7573, "step": 3530 }, { "epoch": 0.17680551393467187, "grad_norm": 5.537643909454346, "learning_rate": 9.808407679783806e-05, "loss": 0.8398, "step": 3540 }, { "epoch": 0.1773049645390071, "grad_norm": 1.492255687713623, "learning_rate": 9.807330724118905e-05, "loss": 0.8927, "step": 3550 }, { "epoch": 0.17780441514334233, "grad_norm": 0.9181856513023376, "learning_rate": 9.806250809564014e-05, "loss": 1.1437, "step": 3560 }, { "epoch": 0.17830386574767756, "grad_norm": 1.5734564065933228, "learning_rate": 9.805167936783815e-05, "loss": 0.8037, "step": 3570 }, { "epoch": 0.1788033163520128, "grad_norm": 3.1623282432556152, "learning_rate": 9.804082106444814e-05, "loss": 0.9561, "step": 3580 }, { "epoch": 0.17930276695634803, "grad_norm": 2.334489345550537, "learning_rate": 9.802993319215332e-05, "loss": 1.0817, "step": 3590 }, { "epoch": 0.17980221756068324, "grad_norm": 1.4881401062011719, "learning_rate": 9.801901575765515e-05, "loss": 0.8115, "step": 3600 }, { "epoch": 0.18030166816501847, "grad_norm": 1.3333991765975952, "learning_rate": 9.800806876767324e-05, "loss": 0.8874, "step": 3610 }, { "epoch": 0.1808011187693537, "grad_norm": 3.5096232891082764, "learning_rate": 9.799709222894539e-05, "loss": 0.8099, "step": 3620 }, { "epoch": 0.18130056937368894, "grad_norm": 3.1804404258728027, "learning_rate": 9.798608614822769e-05, "loss": 0.8315, "step": 3630 }, { "epoch": 0.18180001997802417, "grad_norm": 1.045871376991272, "learning_rate": 9.797505053229425e-05, "loss": 0.6821, "step": 3640 }, { "epoch": 0.1822994705823594, "grad_norm": 2.789235830307007, "learning_rate": 9.796398538793748e-05, "loss": 0.7989, "step": 3650 }, { "epoch": 0.18279892118669464, "grad_norm": 1.0591028928756714, "learning_rate": 9.795289072196789e-05, "loss": 0.9044, "step": 3660 }, { "epoch": 0.18329837179102987, "grad_norm": 1.6923969984054565, "learning_rate": 9.794176654121425e-05, "loss": 0.8402, "step": 3670 }, { "epoch": 0.1837978223953651, "grad_norm": 1.9938535690307617, "learning_rate": 9.793061285252341e-05, "loss": 0.974, "step": 3680 }, { "epoch": 0.18429727299970033, "grad_norm": 4.054079532623291, "learning_rate": 9.791942966276043e-05, "loss": 0.954, "step": 3690 }, { "epoch": 0.18479672360403557, "grad_norm": 1.561954140663147, "learning_rate": 9.79082169788085e-05, "loss": 0.8529, "step": 3700 }, { "epoch": 0.1852961742083708, "grad_norm": 6.34870719909668, "learning_rate": 9.789697480756896e-05, "loss": 1.0314, "step": 3710 }, { "epoch": 0.18579562481270603, "grad_norm": 2.829148530960083, "learning_rate": 9.788570315596134e-05, "loss": 0.9551, "step": 3720 }, { "epoch": 0.18629507541704127, "grad_norm": 1.198886752128601, "learning_rate": 9.787440203092329e-05, "loss": 0.8587, "step": 3730 }, { "epoch": 0.18679452602137647, "grad_norm": 2.539764642715454, "learning_rate": 9.786307143941057e-05, "loss": 0.8987, "step": 3740 }, { "epoch": 0.1872939766257117, "grad_norm": 1.3188793659210205, "learning_rate": 9.785171138839715e-05, "loss": 1.1287, "step": 3750 }, { "epoch": 0.18779342723004694, "grad_norm": 4.570198059082031, "learning_rate": 9.784032188487506e-05, "loss": 0.8166, "step": 3760 }, { "epoch": 0.18829287783438217, "grad_norm": 1.0271146297454834, "learning_rate": 9.782890293585449e-05, "loss": 0.7467, "step": 3770 }, { "epoch": 0.1887923284387174, "grad_norm": 2.158318519592285, "learning_rate": 9.781745454836377e-05, "loss": 1.0177, "step": 3780 }, { "epoch": 0.18929177904305264, "grad_norm": 3.2736594676971436, "learning_rate": 9.78059767294493e-05, "loss": 0.8979, "step": 3790 }, { "epoch": 0.18979122964738787, "grad_norm": 1.217395305633545, "learning_rate": 9.779446948617565e-05, "loss": 0.8489, "step": 3800 }, { "epoch": 0.1902906802517231, "grad_norm": 2.430159568786621, "learning_rate": 9.778293282562547e-05, "loss": 0.6917, "step": 3810 }, { "epoch": 0.19079013085605834, "grad_norm": 2.7768425941467285, "learning_rate": 9.77713667548995e-05, "loss": 0.8224, "step": 3820 }, { "epoch": 0.19128958146039357, "grad_norm": 3.406033515930176, "learning_rate": 9.775977128111663e-05, "loss": 0.9499, "step": 3830 }, { "epoch": 0.1917890320647288, "grad_norm": 1.3434473276138306, "learning_rate": 9.774814641141382e-05, "loss": 1.2465, "step": 3840 }, { "epoch": 0.19228848266906404, "grad_norm": 3.096501350402832, "learning_rate": 9.773649215294611e-05, "loss": 0.7828, "step": 3850 }, { "epoch": 0.19278793327339927, "grad_norm": 2.608057737350464, "learning_rate": 9.772480851288666e-05, "loss": 0.9272, "step": 3860 }, { "epoch": 0.1932873838777345, "grad_norm": 1.0312490463256836, "learning_rate": 9.771309549842666e-05, "loss": 1.1293, "step": 3870 }, { "epoch": 0.19378683448206974, "grad_norm": 1.8020999431610107, "learning_rate": 9.770135311677546e-05, "loss": 0.8136, "step": 3880 }, { "epoch": 0.19428628508640494, "grad_norm": 1.438332200050354, "learning_rate": 9.768958137516042e-05, "loss": 0.7858, "step": 3890 }, { "epoch": 0.19478573569074017, "grad_norm": 2.0370874404907227, "learning_rate": 9.7677780280827e-05, "loss": 1.1137, "step": 3900 }, { "epoch": 0.1952851862950754, "grad_norm": 1.5007089376449585, "learning_rate": 9.766594984103872e-05, "loss": 0.6553, "step": 3910 }, { "epoch": 0.19578463689941064, "grad_norm": 4.325165271759033, "learning_rate": 9.765409006307715e-05, "loss": 1.0529, "step": 3920 }, { "epoch": 0.19628408750374587, "grad_norm": 2.121492385864258, "learning_rate": 9.764220095424195e-05, "loss": 1.0299, "step": 3930 }, { "epoch": 0.1967835381080811, "grad_norm": 3.8324851989746094, "learning_rate": 9.763028252185077e-05, "loss": 0.9219, "step": 3940 }, { "epoch": 0.19728298871241634, "grad_norm": 0.20872262120246887, "learning_rate": 9.76183347732394e-05, "loss": 0.7862, "step": 3950 }, { "epoch": 0.19778243931675157, "grad_norm": 1.5114883184432983, "learning_rate": 9.760635771576161e-05, "loss": 0.9307, "step": 3960 }, { "epoch": 0.1982818899210868, "grad_norm": 1.2050377130508423, "learning_rate": 9.75943513567892e-05, "loss": 0.9463, "step": 3970 }, { "epoch": 0.19878134052542204, "grad_norm": 2.041001558303833, "learning_rate": 9.758231570371206e-05, "loss": 0.7967, "step": 3980 }, { "epoch": 0.19928079112975727, "grad_norm": 2.6333858966827393, "learning_rate": 9.757025076393805e-05, "loss": 0.838, "step": 3990 }, { "epoch": 0.1997802417340925, "grad_norm": 1.027332067489624, "learning_rate": 9.755815654489311e-05, "loss": 0.7515, "step": 4000 }, { "epoch": 0.20027969233842774, "grad_norm": 1.96125066280365, "learning_rate": 9.754603305402117e-05, "loss": 0.872, "step": 4010 }, { "epoch": 0.20077914294276297, "grad_norm": 1.2602308988571167, "learning_rate": 9.753388029878416e-05, "loss": 0.6845, "step": 4020 }, { "epoch": 0.2012785935470982, "grad_norm": 3.9577910900115967, "learning_rate": 9.752169828666208e-05, "loss": 0.8699, "step": 4030 }, { "epoch": 0.2017780441514334, "grad_norm": 1.2592867612838745, "learning_rate": 9.750948702515289e-05, "loss": 0.7011, "step": 4040 }, { "epoch": 0.20227749475576864, "grad_norm": 3.0217552185058594, "learning_rate": 9.749724652177256e-05, "loss": 0.7576, "step": 4050 }, { "epoch": 0.20277694536010388, "grad_norm": 2.191291093826294, "learning_rate": 9.748497678405507e-05, "loss": 0.8379, "step": 4060 }, { "epoch": 0.2032763959644391, "grad_norm": 3.587172508239746, "learning_rate": 9.747267781955238e-05, "loss": 0.7689, "step": 4070 }, { "epoch": 0.20377584656877434, "grad_norm": 2.85992693901062, "learning_rate": 9.746034963583444e-05, "loss": 0.8742, "step": 4080 }, { "epoch": 0.20427529717310958, "grad_norm": 1.0260587930679321, "learning_rate": 9.74479922404892e-05, "loss": 0.9285, "step": 4090 }, { "epoch": 0.2047747477774448, "grad_norm": 1.6661021709442139, "learning_rate": 9.74356056411226e-05, "loss": 0.7533, "step": 4100 }, { "epoch": 0.20527419838178004, "grad_norm": 2.2743191719055176, "learning_rate": 9.74231898453585e-05, "loss": 0.8414, "step": 4110 }, { "epoch": 0.20577364898611528, "grad_norm": 3.976389169692993, "learning_rate": 9.741074486083878e-05, "loss": 0.8494, "step": 4120 }, { "epoch": 0.2062730995904505, "grad_norm": 2.743157386779785, "learning_rate": 9.739827069522327e-05, "loss": 1.1373, "step": 4130 }, { "epoch": 0.20677255019478574, "grad_norm": 1.540973424911499, "learning_rate": 9.738576735618977e-05, "loss": 0.8152, "step": 4140 }, { "epoch": 0.20727200079912098, "grad_norm": 2.5153651237487793, "learning_rate": 9.737323485143401e-05, "loss": 0.9452, "step": 4150 }, { "epoch": 0.2077714514034562, "grad_norm": 3.126713991165161, "learning_rate": 9.736067318866973e-05, "loss": 1.1158, "step": 4160 }, { "epoch": 0.20827090200779144, "grad_norm": 2.0404253005981445, "learning_rate": 9.734808237562851e-05, "loss": 0.9586, "step": 4170 }, { "epoch": 0.20877035261212665, "grad_norm": 2.13669490814209, "learning_rate": 9.733546242006e-05, "loss": 0.8836, "step": 4180 }, { "epoch": 0.20926980321646188, "grad_norm": 1.5237314701080322, "learning_rate": 9.732281332973168e-05, "loss": 0.8961, "step": 4190 }, { "epoch": 0.2097692538207971, "grad_norm": 1.4856115579605103, "learning_rate": 9.731013511242902e-05, "loss": 0.7561, "step": 4200 }, { "epoch": 0.21026870442513235, "grad_norm": 3.4389278888702393, "learning_rate": 9.729742777595543e-05, "loss": 0.982, "step": 4210 }, { "epoch": 0.21076815502946758, "grad_norm": 1.0415877103805542, "learning_rate": 9.728469132813218e-05, "loss": 0.6615, "step": 4220 }, { "epoch": 0.2112676056338028, "grad_norm": 0.9365307688713074, "learning_rate": 9.727192577679851e-05, "loss": 0.8517, "step": 4230 }, { "epoch": 0.21176705623813805, "grad_norm": 1.5347106456756592, "learning_rate": 9.725913112981157e-05, "loss": 0.908, "step": 4240 }, { "epoch": 0.21226650684247328, "grad_norm": 1.4954674243927002, "learning_rate": 9.724630739504641e-05, "loss": 1.0653, "step": 4250 }, { "epoch": 0.2127659574468085, "grad_norm": 4.099848747253418, "learning_rate": 9.723345458039594e-05, "loss": 0.9833, "step": 4260 }, { "epoch": 0.21326540805114375, "grad_norm": 3.3358452320098877, "learning_rate": 9.722057269377105e-05, "loss": 0.8166, "step": 4270 }, { "epoch": 0.21376485865547898, "grad_norm": 1.451033592224121, "learning_rate": 9.720766174310047e-05, "loss": 0.858, "step": 4280 }, { "epoch": 0.2142643092598142, "grad_norm": 1.987222671508789, "learning_rate": 9.719472173633082e-05, "loss": 0.6486, "step": 4290 }, { "epoch": 0.21476375986414944, "grad_norm": 1.662618637084961, "learning_rate": 9.718175268142662e-05, "loss": 0.7854, "step": 4300 }, { "epoch": 0.21526321046848468, "grad_norm": 2.374152183532715, "learning_rate": 9.716875458637027e-05, "loss": 0.9253, "step": 4310 }, { "epoch": 0.2157626610728199, "grad_norm": 1.624871015548706, "learning_rate": 9.715572745916204e-05, "loss": 0.7942, "step": 4320 }, { "epoch": 0.21626211167715512, "grad_norm": 2.8344717025756836, "learning_rate": 9.714267130782006e-05, "loss": 0.9736, "step": 4330 }, { "epoch": 0.21676156228149035, "grad_norm": 1.3681503534317017, "learning_rate": 9.712958614038033e-05, "loss": 0.7467, "step": 4340 }, { "epoch": 0.21726101288582558, "grad_norm": 1.5472357273101807, "learning_rate": 9.71164719648967e-05, "loss": 0.752, "step": 4350 }, { "epoch": 0.21776046349016082, "grad_norm": 8.472622871398926, "learning_rate": 9.71033287894409e-05, "loss": 1.1423, "step": 4360 }, { "epoch": 0.21825991409449605, "grad_norm": 1.368548035621643, "learning_rate": 9.709015662210252e-05, "loss": 0.7389, "step": 4370 }, { "epoch": 0.21875936469883128, "grad_norm": 1.1928058862686157, "learning_rate": 9.707695547098891e-05, "loss": 0.7544, "step": 4380 }, { "epoch": 0.21925881530316652, "grad_norm": 9.385339736938477, "learning_rate": 9.706372534422536e-05, "loss": 0.9805, "step": 4390 }, { "epoch": 0.21975826590750175, "grad_norm": 2.0432844161987305, "learning_rate": 9.705046624995495e-05, "loss": 0.8043, "step": 4400 }, { "epoch": 0.22025771651183698, "grad_norm": 1.434600591659546, "learning_rate": 9.703717819633856e-05, "loss": 0.9335, "step": 4410 }, { "epoch": 0.22075716711617221, "grad_norm": 1.6714608669281006, "learning_rate": 9.702386119155495e-05, "loss": 1.1171, "step": 4420 }, { "epoch": 0.22125661772050745, "grad_norm": 3.2519516944885254, "learning_rate": 9.701051524380069e-05, "loss": 0.852, "step": 4430 }, { "epoch": 0.22175606832484268, "grad_norm": 4.377466201782227, "learning_rate": 9.699714036129013e-05, "loss": 1.0314, "step": 4440 }, { "epoch": 0.22225551892917791, "grad_norm": 4.908904075622559, "learning_rate": 9.698373655225546e-05, "loss": 0.8541, "step": 4450 }, { "epoch": 0.22275496953351315, "grad_norm": 1.3215618133544922, "learning_rate": 9.697030382494663e-05, "loss": 0.7287, "step": 4460 }, { "epoch": 0.22325442013784838, "grad_norm": 5.293785095214844, "learning_rate": 9.695684218763145e-05, "loss": 0.875, "step": 4470 }, { "epoch": 0.22375387074218359, "grad_norm": 3.4198853969573975, "learning_rate": 9.694335164859552e-05, "loss": 1.0289, "step": 4480 }, { "epoch": 0.22425332134651882, "grad_norm": 1.569650411605835, "learning_rate": 9.692983221614216e-05, "loss": 0.8788, "step": 4490 }, { "epoch": 0.22475277195085405, "grad_norm": 1.5537540912628174, "learning_rate": 9.691628389859253e-05, "loss": 0.5818, "step": 4500 }, { "epoch": 0.22525222255518929, "grad_norm": 1.9405503273010254, "learning_rate": 9.690270670428557e-05, "loss": 0.7723, "step": 4510 }, { "epoch": 0.22575167315952452, "grad_norm": 1.9559584856033325, "learning_rate": 9.688910064157798e-05, "loss": 0.8411, "step": 4520 }, { "epoch": 0.22625112376385975, "grad_norm": 3.755566120147705, "learning_rate": 9.687546571884421e-05, "loss": 0.7732, "step": 4530 }, { "epoch": 0.22675057436819498, "grad_norm": 2.3459646701812744, "learning_rate": 9.686180194447652e-05, "loss": 0.9359, "step": 4540 }, { "epoch": 0.22725002497253022, "grad_norm": 3.293691635131836, "learning_rate": 9.684810932688488e-05, "loss": 0.782, "step": 4550 }, { "epoch": 0.22774947557686545, "grad_norm": 5.3083109855651855, "learning_rate": 9.683438787449704e-05, "loss": 1.019, "step": 4560 }, { "epoch": 0.22824892618120068, "grad_norm": 1.8565597534179688, "learning_rate": 9.682063759575848e-05, "loss": 0.955, "step": 4570 }, { "epoch": 0.22874837678553592, "grad_norm": 1.9487963914871216, "learning_rate": 9.680685849913244e-05, "loss": 0.8568, "step": 4580 }, { "epoch": 0.22924782738987115, "grad_norm": 0.8379784226417542, "learning_rate": 9.679305059309992e-05, "loss": 0.7309, "step": 4590 }, { "epoch": 0.22974727799420638, "grad_norm": 3.0211181640625, "learning_rate": 9.677921388615959e-05, "loss": 0.9924, "step": 4600 }, { "epoch": 0.23024672859854162, "grad_norm": 1.363000750541687, "learning_rate": 9.676534838682788e-05, "loss": 0.791, "step": 4610 }, { "epoch": 0.23074617920287682, "grad_norm": 2.081880569458008, "learning_rate": 9.675145410363894e-05, "loss": 1.0073, "step": 4620 }, { "epoch": 0.23124562980721206, "grad_norm": 1.5669691562652588, "learning_rate": 9.673753104514465e-05, "loss": 0.9327, "step": 4630 }, { "epoch": 0.2317450804115473, "grad_norm": 1.333825945854187, "learning_rate": 9.67235792199146e-05, "loss": 1.0045, "step": 4640 }, { "epoch": 0.23224453101588252, "grad_norm": 3.206181049346924, "learning_rate": 9.670959863653604e-05, "loss": 0.813, "step": 4650 }, { "epoch": 0.23274398162021775, "grad_norm": 3.0151419639587402, "learning_rate": 9.669558930361397e-05, "loss": 1.0477, "step": 4660 }, { "epoch": 0.233243432224553, "grad_norm": 2.978698968887329, "learning_rate": 9.668155122977109e-05, "loss": 0.7547, "step": 4670 }, { "epoch": 0.23374288282888822, "grad_norm": 2.5535624027252197, "learning_rate": 9.666748442364775e-05, "loss": 0.8055, "step": 4680 }, { "epoch": 0.23424233343322345, "grad_norm": 1.425445556640625, "learning_rate": 9.6653388893902e-05, "loss": 0.9289, "step": 4690 }, { "epoch": 0.2347417840375587, "grad_norm": 3.3846025466918945, "learning_rate": 9.663926464920958e-05, "loss": 0.9202, "step": 4700 }, { "epoch": 0.23524123464189392, "grad_norm": 1.5291244983673096, "learning_rate": 9.662511169826391e-05, "loss": 0.9463, "step": 4710 }, { "epoch": 0.23574068524622915, "grad_norm": 0.5786092877388, "learning_rate": 9.661093004977606e-05, "loss": 0.6806, "step": 4720 }, { "epoch": 0.2362401358505644, "grad_norm": 1.5869218111038208, "learning_rate": 9.659671971247475e-05, "loss": 0.8826, "step": 4730 }, { "epoch": 0.23673958645489962, "grad_norm": 1.6809816360473633, "learning_rate": 9.658248069510639e-05, "loss": 0.8307, "step": 4740 }, { "epoch": 0.23723903705923485, "grad_norm": 1.3082853555679321, "learning_rate": 9.656821300643504e-05, "loss": 0.945, "step": 4750 }, { "epoch": 0.23773848766357009, "grad_norm": 3.3663363456726074, "learning_rate": 9.655391665524239e-05, "loss": 0.9417, "step": 4760 }, { "epoch": 0.2382379382679053, "grad_norm": 2.5983574390411377, "learning_rate": 9.653959165032779e-05, "loss": 0.8467, "step": 4770 }, { "epoch": 0.23873738887224052, "grad_norm": 1.2755992412567139, "learning_rate": 9.652523800050819e-05, "loss": 0.8064, "step": 4780 }, { "epoch": 0.23923683947657576, "grad_norm": 2.786166191101074, "learning_rate": 9.65108557146182e-05, "loss": 0.939, "step": 4790 }, { "epoch": 0.239736290080911, "grad_norm": 3.326118230819702, "learning_rate": 9.649644480151008e-05, "loss": 0.9213, "step": 4800 }, { "epoch": 0.24023574068524622, "grad_norm": 0.9329853057861328, "learning_rate": 9.648200527005364e-05, "loss": 0.8436, "step": 4810 }, { "epoch": 0.24073519128958146, "grad_norm": 1.561836838722229, "learning_rate": 9.646753712913637e-05, "loss": 0.9049, "step": 4820 }, { "epoch": 0.2412346418939167, "grad_norm": 2.1252520084381104, "learning_rate": 9.645304038766335e-05, "loss": 1.193, "step": 4830 }, { "epoch": 0.24173409249825192, "grad_norm": 2.028834819793701, "learning_rate": 9.643851505455725e-05, "loss": 0.7113, "step": 4840 }, { "epoch": 0.24223354310258716, "grad_norm": 4.80098295211792, "learning_rate": 9.642396113875834e-05, "loss": 0.8507, "step": 4850 }, { "epoch": 0.2427329937069224, "grad_norm": 2.0205676555633545, "learning_rate": 9.640937864922447e-05, "loss": 1.0054, "step": 4860 }, { "epoch": 0.24323244431125762, "grad_norm": 2.245183229446411, "learning_rate": 9.639476759493114e-05, "loss": 0.9972, "step": 4870 }, { "epoch": 0.24373189491559286, "grad_norm": 1.1496859788894653, "learning_rate": 9.638012798487135e-05, "loss": 0.9177, "step": 4880 }, { "epoch": 0.2442313455199281, "grad_norm": 4.3665032386779785, "learning_rate": 9.636545982805574e-05, "loss": 0.7268, "step": 4890 }, { "epoch": 0.24473079612426332, "grad_norm": 1.243686318397522, "learning_rate": 9.635076313351248e-05, "loss": 0.8992, "step": 4900 }, { "epoch": 0.24523024672859856, "grad_norm": 1.5397096872329712, "learning_rate": 9.633603791028732e-05, "loss": 0.8472, "step": 4910 }, { "epoch": 0.24572969733293376, "grad_norm": 2.435830593109131, "learning_rate": 9.632128416744355e-05, "loss": 1.0926, "step": 4920 }, { "epoch": 0.246229147937269, "grad_norm": 1.8933160305023193, "learning_rate": 9.630650191406205e-05, "loss": 0.7815, "step": 4930 }, { "epoch": 0.24672859854160423, "grad_norm": 3.721090078353882, "learning_rate": 9.629169115924123e-05, "loss": 0.979, "step": 4940 }, { "epoch": 0.24722804914593946, "grad_norm": 1.8302251100540161, "learning_rate": 9.627685191209703e-05, "loss": 0.7747, "step": 4950 }, { "epoch": 0.2477274997502747, "grad_norm": 1.1439883708953857, "learning_rate": 9.626198418176296e-05, "loss": 0.9603, "step": 4960 }, { "epoch": 0.24822695035460993, "grad_norm": 1.6686792373657227, "learning_rate": 9.624708797739001e-05, "loss": 0.7666, "step": 4970 }, { "epoch": 0.24872640095894516, "grad_norm": 1.5663448572158813, "learning_rate": 9.623216330814675e-05, "loss": 0.7684, "step": 4980 }, { "epoch": 0.2492258515632804, "grad_norm": 1.3111817836761475, "learning_rate": 9.621721018321924e-05, "loss": 0.774, "step": 4990 }, { "epoch": 0.24972530216761563, "grad_norm": 1.4687976837158203, "learning_rate": 9.620222861181103e-05, "loss": 0.784, "step": 5000 }, { "epoch": 0.25022475277195083, "grad_norm": 2.047543525695801, "learning_rate": 9.618721860314326e-05, "loss": 0.9805, "step": 5010 }, { "epoch": 0.2507242033762861, "grad_norm": 1.2842210531234741, "learning_rate": 9.617218016645448e-05, "loss": 1.0804, "step": 5020 }, { "epoch": 0.2512236539806213, "grad_norm": 1.8332990407943726, "learning_rate": 9.615711331100081e-05, "loss": 0.8339, "step": 5030 }, { "epoch": 0.25172310458495656, "grad_norm": 1.4070039987564087, "learning_rate": 9.614201804605581e-05, "loss": 0.8671, "step": 5040 }, { "epoch": 0.25222255518929176, "grad_norm": 2.0862865447998047, "learning_rate": 9.612689438091054e-05, "loss": 1.0481, "step": 5050 }, { "epoch": 0.252722005793627, "grad_norm": 3.3928749561309814, "learning_rate": 9.611174232487357e-05, "loss": 0.7934, "step": 5060 }, { "epoch": 0.25322145639796223, "grad_norm": 1.5095566511154175, "learning_rate": 9.609656188727089e-05, "loss": 0.7343, "step": 5070 }, { "epoch": 0.2537209070022975, "grad_norm": 1.5008156299591064, "learning_rate": 9.608135307744601e-05, "loss": 0.9165, "step": 5080 }, { "epoch": 0.2542203576066327, "grad_norm": 1.3544814586639404, "learning_rate": 9.60661159047599e-05, "loss": 0.7254, "step": 5090 }, { "epoch": 0.25471980821096796, "grad_norm": 3.1354422569274902, "learning_rate": 9.605085037859094e-05, "loss": 0.7744, "step": 5100 }, { "epoch": 0.25521925881530316, "grad_norm": 0.9450280666351318, "learning_rate": 9.6035556508335e-05, "loss": 0.9265, "step": 5110 }, { "epoch": 0.2557187094196384, "grad_norm": 1.6109542846679688, "learning_rate": 9.602023430340539e-05, "loss": 0.8604, "step": 5120 }, { "epoch": 0.25621816002397363, "grad_norm": 2.064497470855713, "learning_rate": 9.600488377323287e-05, "loss": 0.8158, "step": 5130 }, { "epoch": 0.25671761062830883, "grad_norm": 1.5562397241592407, "learning_rate": 9.598950492726559e-05, "loss": 0.8947, "step": 5140 }, { "epoch": 0.2572170612326441, "grad_norm": 1.3519991636276245, "learning_rate": 9.597409777496918e-05, "loss": 0.8841, "step": 5150 }, { "epoch": 0.2577165118369793, "grad_norm": 2.1325738430023193, "learning_rate": 9.595866232582665e-05, "loss": 0.7257, "step": 5160 }, { "epoch": 0.25821596244131456, "grad_norm": 2.0866081714630127, "learning_rate": 9.594319858933848e-05, "loss": 0.891, "step": 5170 }, { "epoch": 0.25871541304564977, "grad_norm": 1.882603645324707, "learning_rate": 9.592770657502251e-05, "loss": 1.0195, "step": 5180 }, { "epoch": 0.25921486364998503, "grad_norm": 1.3703722953796387, "learning_rate": 9.5912186292414e-05, "loss": 0.9031, "step": 5190 }, { "epoch": 0.25971431425432023, "grad_norm": 1.3103618621826172, "learning_rate": 9.589663775106564e-05, "loss": 0.8455, "step": 5200 }, { "epoch": 0.2602137648586555, "grad_norm": 2.314448118209839, "learning_rate": 9.588106096054745e-05, "loss": 1.0818, "step": 5210 }, { "epoch": 0.2607132154629907, "grad_norm": 1.4067221879959106, "learning_rate": 9.58654559304469e-05, "loss": 0.8523, "step": 5220 }, { "epoch": 0.26121266606732596, "grad_norm": 1.8233261108398438, "learning_rate": 9.58498226703688e-05, "loss": 0.8185, "step": 5230 }, { "epoch": 0.26171211667166117, "grad_norm": 4.319331645965576, "learning_rate": 9.583416118993536e-05, "loss": 1.2356, "step": 5240 }, { "epoch": 0.2622115672759964, "grad_norm": 4.6194024085998535, "learning_rate": 9.581847149878612e-05, "loss": 0.9103, "step": 5250 }, { "epoch": 0.26271101788033163, "grad_norm": 2.9623756408691406, "learning_rate": 9.580275360657805e-05, "loss": 0.9043, "step": 5260 }, { "epoch": 0.2632104684846669, "grad_norm": 1.8878183364868164, "learning_rate": 9.578700752298542e-05, "loss": 0.8641, "step": 5270 }, { "epoch": 0.2637099190890021, "grad_norm": 1.2333494424819946, "learning_rate": 9.577123325769987e-05, "loss": 0.7547, "step": 5280 }, { "epoch": 0.2642093696933373, "grad_norm": 3.1482670307159424, "learning_rate": 9.575543082043042e-05, "loss": 1.0384, "step": 5290 }, { "epoch": 0.26470882029767256, "grad_norm": 1.1335426568984985, "learning_rate": 9.573960022090336e-05, "loss": 0.9774, "step": 5300 }, { "epoch": 0.26520827090200777, "grad_norm": 0.824373185634613, "learning_rate": 9.572374146886236e-05, "loss": 0.9198, "step": 5310 }, { "epoch": 0.26570772150634303, "grad_norm": 5.559612274169922, "learning_rate": 9.570785457406842e-05, "loss": 0.7867, "step": 5320 }, { "epoch": 0.26620717211067824, "grad_norm": 1.273941159248352, "learning_rate": 9.569193954629985e-05, "loss": 0.7399, "step": 5330 }, { "epoch": 0.2667066227150135, "grad_norm": 3.5407681465148926, "learning_rate": 9.567599639535225e-05, "loss": 0.8633, "step": 5340 }, { "epoch": 0.2672060733193487, "grad_norm": 1.2934330701828003, "learning_rate": 9.56600251310386e-05, "loss": 0.8116, "step": 5350 }, { "epoch": 0.26770552392368396, "grad_norm": 1.032084584236145, "learning_rate": 9.564402576318912e-05, "loss": 0.8625, "step": 5360 }, { "epoch": 0.26820497452801917, "grad_norm": 3.6709439754486084, "learning_rate": 9.562799830165132e-05, "loss": 0.7579, "step": 5370 }, { "epoch": 0.26870442513235443, "grad_norm": 3.813777208328247, "learning_rate": 9.561194275629006e-05, "loss": 0.8647, "step": 5380 }, { "epoch": 0.26920387573668964, "grad_norm": 2.112259864807129, "learning_rate": 9.559585913698746e-05, "loss": 0.7539, "step": 5390 }, { "epoch": 0.2697033263410249, "grad_norm": 2.7863011360168457, "learning_rate": 9.557974745364289e-05, "loss": 0.8385, "step": 5400 }, { "epoch": 0.2702027769453601, "grad_norm": 3.3667349815368652, "learning_rate": 9.556360771617305e-05, "loss": 0.8315, "step": 5410 }, { "epoch": 0.27070222754969536, "grad_norm": 3.227236032485962, "learning_rate": 9.554743993451183e-05, "loss": 0.8337, "step": 5420 }, { "epoch": 0.27120167815403057, "grad_norm": 1.1658493280410767, "learning_rate": 9.553124411861045e-05, "loss": 0.7649, "step": 5430 }, { "epoch": 0.2717011287583658, "grad_norm": 2.001400947570801, "learning_rate": 9.551502027843737e-05, "loss": 0.7902, "step": 5440 }, { "epoch": 0.27220057936270103, "grad_norm": 1.665940284729004, "learning_rate": 9.549876842397827e-05, "loss": 0.7532, "step": 5450 }, { "epoch": 0.27270002996703624, "grad_norm": 3.2563281059265137, "learning_rate": 9.548248856523611e-05, "loss": 0.9413, "step": 5460 }, { "epoch": 0.2731994805713715, "grad_norm": 1.9077765941619873, "learning_rate": 9.546618071223105e-05, "loss": 0.8394, "step": 5470 }, { "epoch": 0.2736989311757067, "grad_norm": 3.954552412033081, "learning_rate": 9.54498448750005e-05, "loss": 1.0901, "step": 5480 }, { "epoch": 0.27419838178004197, "grad_norm": 2.2440567016601562, "learning_rate": 9.543348106359912e-05, "loss": 0.7122, "step": 5490 }, { "epoch": 0.27469783238437717, "grad_norm": 1.053545355796814, "learning_rate": 9.541708928809872e-05, "loss": 0.974, "step": 5500 }, { "epoch": 0.27519728298871243, "grad_norm": 1.2328383922576904, "learning_rate": 9.54006695585884e-05, "loss": 0.6709, "step": 5510 }, { "epoch": 0.27569673359304764, "grad_norm": 1.5231152772903442, "learning_rate": 9.53842218851744e-05, "loss": 0.7991, "step": 5520 }, { "epoch": 0.2761961841973829, "grad_norm": 1.3110510110855103, "learning_rate": 9.536774627798019e-05, "loss": 0.7467, "step": 5530 }, { "epoch": 0.2766956348017181, "grad_norm": 2.076733350753784, "learning_rate": 9.535124274714647e-05, "loss": 0.8851, "step": 5540 }, { "epoch": 0.27719508540605337, "grad_norm": 1.84781014919281, "learning_rate": 9.533471130283103e-05, "loss": 0.7877, "step": 5550 }, { "epoch": 0.27769453601038857, "grad_norm": 3.3306989669799805, "learning_rate": 9.531815195520893e-05, "loss": 0.7778, "step": 5560 }, { "epoch": 0.2781939866147238, "grad_norm": 2.332953691482544, "learning_rate": 9.530156471447237e-05, "loss": 0.7651, "step": 5570 }, { "epoch": 0.27869343721905904, "grad_norm": 1.7331910133361816, "learning_rate": 9.528494959083072e-05, "loss": 0.7889, "step": 5580 }, { "epoch": 0.27919288782339424, "grad_norm": 3.0806610584259033, "learning_rate": 9.52683065945105e-05, "loss": 1.1269, "step": 5590 }, { "epoch": 0.2796923384277295, "grad_norm": 3.531687021255493, "learning_rate": 9.525163573575542e-05, "loss": 0.9093, "step": 5600 }, { "epoch": 0.2801917890320647, "grad_norm": 0.8916457295417786, "learning_rate": 9.52349370248263e-05, "loss": 0.6907, "step": 5610 }, { "epoch": 0.28069123963639997, "grad_norm": 1.2871992588043213, "learning_rate": 9.521821047200112e-05, "loss": 0.7586, "step": 5620 }, { "epoch": 0.2811906902407352, "grad_norm": 2.2138314247131348, "learning_rate": 9.5201456087575e-05, "loss": 0.7528, "step": 5630 }, { "epoch": 0.28169014084507044, "grad_norm": 1.9595929384231567, "learning_rate": 9.51846738818602e-05, "loss": 1.024, "step": 5640 }, { "epoch": 0.28218959144940564, "grad_norm": 1.5594611167907715, "learning_rate": 9.516786386518607e-05, "loss": 0.7457, "step": 5650 }, { "epoch": 0.2826890420537409, "grad_norm": 3.690540075302124, "learning_rate": 9.51510260478991e-05, "loss": 0.7899, "step": 5660 }, { "epoch": 0.2831884926580761, "grad_norm": 0.46564897894859314, "learning_rate": 9.513416044036291e-05, "loss": 0.7996, "step": 5670 }, { "epoch": 0.28368794326241137, "grad_norm": 1.169894814491272, "learning_rate": 9.511726705295817e-05, "loss": 0.6761, "step": 5680 }, { "epoch": 0.2841873938667466, "grad_norm": 3.206878900527954, "learning_rate": 9.510034589608273e-05, "loss": 0.8909, "step": 5690 }, { "epoch": 0.28468684447108183, "grad_norm": 2.0504379272460938, "learning_rate": 9.508339698015145e-05, "loss": 0.9729, "step": 5700 }, { "epoch": 0.28518629507541704, "grad_norm": 3.438324213027954, "learning_rate": 9.506642031559631e-05, "loss": 0.968, "step": 5710 }, { "epoch": 0.28568574567975225, "grad_norm": 1.320440649986267, "learning_rate": 9.504941591286637e-05, "loss": 1.0769, "step": 5720 }, { "epoch": 0.2861851962840875, "grad_norm": 3.160295009613037, "learning_rate": 9.503238378242777e-05, "loss": 0.9258, "step": 5730 }, { "epoch": 0.2866846468884227, "grad_norm": 5.033565521240234, "learning_rate": 9.501532393476371e-05, "loss": 1.1267, "step": 5740 }, { "epoch": 0.287184097492758, "grad_norm": 5.532677173614502, "learning_rate": 9.499823638037444e-05, "loss": 1.1826, "step": 5750 }, { "epoch": 0.2876835480970932, "grad_norm": 0.9898492693901062, "learning_rate": 9.498112112977729e-05, "loss": 0.9613, "step": 5760 }, { "epoch": 0.28818299870142844, "grad_norm": 0.7135995626449585, "learning_rate": 9.496397819350657e-05, "loss": 0.8187, "step": 5770 }, { "epoch": 0.28868244930576364, "grad_norm": 2.109062433242798, "learning_rate": 9.494680758211374e-05, "loss": 0.9668, "step": 5780 }, { "epoch": 0.2891818999100989, "grad_norm": 1.283250093460083, "learning_rate": 9.492960930616719e-05, "loss": 0.9275, "step": 5790 }, { "epoch": 0.2896813505144341, "grad_norm": 2.174206495285034, "learning_rate": 9.491238337625239e-05, "loss": 0.5863, "step": 5800 }, { "epoch": 0.29018080111876937, "grad_norm": 2.015956163406372, "learning_rate": 9.489512980297184e-05, "loss": 0.7635, "step": 5810 }, { "epoch": 0.2906802517231046, "grad_norm": 2.241713523864746, "learning_rate": 9.487784859694501e-05, "loss": 0.6692, "step": 5820 }, { "epoch": 0.29117970232743984, "grad_norm": 3.3468496799468994, "learning_rate": 9.486053976880842e-05, "loss": 0.9513, "step": 5830 }, { "epoch": 0.29167915293177504, "grad_norm": 2.5689618587493896, "learning_rate": 9.484320332921555e-05, "loss": 0.8257, "step": 5840 }, { "epoch": 0.2921786035361103, "grad_norm": 1.690453290939331, "learning_rate": 9.482583928883693e-05, "loss": 0.9261, "step": 5850 }, { "epoch": 0.2926780541404455, "grad_norm": 1.3991612195968628, "learning_rate": 9.480844765836004e-05, "loss": 0.8518, "step": 5860 }, { "epoch": 0.2931775047447807, "grad_norm": 2.159390687942505, "learning_rate": 9.479102844848933e-05, "loss": 0.6723, "step": 5870 }, { "epoch": 0.293676955349116, "grad_norm": 1.380337119102478, "learning_rate": 9.477358166994625e-05, "loss": 0.7546, "step": 5880 }, { "epoch": 0.2941764059534512, "grad_norm": 4.3527045249938965, "learning_rate": 9.475610733346922e-05, "loss": 1.0041, "step": 5890 }, { "epoch": 0.29467585655778644, "grad_norm": 2.296097993850708, "learning_rate": 9.473860544981362e-05, "loss": 0.8609, "step": 5900 }, { "epoch": 0.29517530716212165, "grad_norm": 2.5303385257720947, "learning_rate": 9.472107602975178e-05, "loss": 0.5821, "step": 5910 }, { "epoch": 0.2956747577664569, "grad_norm": 2.853238821029663, "learning_rate": 9.470351908407294e-05, "loss": 0.9672, "step": 5920 }, { "epoch": 0.2961742083707921, "grad_norm": 4.257169246673584, "learning_rate": 9.468593462358337e-05, "loss": 1.0844, "step": 5930 }, { "epoch": 0.2966736589751274, "grad_norm": 1.730938196182251, "learning_rate": 9.466832265910619e-05, "loss": 0.9442, "step": 5940 }, { "epoch": 0.2971731095794626, "grad_norm": 1.6568882465362549, "learning_rate": 9.46506832014815e-05, "loss": 0.8206, "step": 5950 }, { "epoch": 0.29767256018379784, "grad_norm": 2.4923627376556396, "learning_rate": 9.46330162615663e-05, "loss": 0.9823, "step": 5960 }, { "epoch": 0.29817201078813305, "grad_norm": 3.7002739906311035, "learning_rate": 9.461532185023452e-05, "loss": 0.9996, "step": 5970 }, { "epoch": 0.2986714613924683, "grad_norm": 1.3538328409194946, "learning_rate": 9.459759997837696e-05, "loss": 0.7523, "step": 5980 }, { "epoch": 0.2991709119968035, "grad_norm": 2.311701774597168, "learning_rate": 9.457985065690138e-05, "loss": 0.5584, "step": 5990 }, { "epoch": 0.2996703626011388, "grad_norm": 3.0756266117095947, "learning_rate": 9.45620738967324e-05, "loss": 0.8745, "step": 6000 }, { "epoch": 0.300169813205474, "grad_norm": 0.9751172065734863, "learning_rate": 9.454426970881153e-05, "loss": 0.9713, "step": 6010 }, { "epoch": 0.3006692638098092, "grad_norm": 1.8199493885040283, "learning_rate": 9.452643810409716e-05, "loss": 1.0827, "step": 6020 }, { "epoch": 0.30116871441414444, "grad_norm": 2.796093225479126, "learning_rate": 9.450857909356459e-05, "loss": 0.9819, "step": 6030 }, { "epoch": 0.30166816501847965, "grad_norm": 2.0498862266540527, "learning_rate": 9.449069268820592e-05, "loss": 0.7893, "step": 6040 }, { "epoch": 0.3021676156228149, "grad_norm": 3.2884140014648438, "learning_rate": 9.447277889903015e-05, "loss": 0.865, "step": 6050 }, { "epoch": 0.3026670662271501, "grad_norm": 1.4740967750549316, "learning_rate": 9.445483773706318e-05, "loss": 0.8088, "step": 6060 }, { "epoch": 0.3031665168314854, "grad_norm": 2.078514337539673, "learning_rate": 9.443686921334766e-05, "loss": 0.9547, "step": 6070 }, { "epoch": 0.3036659674358206, "grad_norm": 3.334926128387451, "learning_rate": 9.441887333894319e-05, "loss": 0.9822, "step": 6080 }, { "epoch": 0.30416541804015584, "grad_norm": 4.29825496673584, "learning_rate": 9.44008501249261e-05, "loss": 0.8485, "step": 6090 }, { "epoch": 0.30466486864449105, "grad_norm": 3.656981945037842, "learning_rate": 9.43827995823896e-05, "loss": 0.8, "step": 6100 }, { "epoch": 0.3051643192488263, "grad_norm": 2.2716410160064697, "learning_rate": 9.436472172244374e-05, "loss": 1.0473, "step": 6110 }, { "epoch": 0.3056637698531615, "grad_norm": 1.8566354513168335, "learning_rate": 9.434661655621535e-05, "loss": 0.8222, "step": 6120 }, { "epoch": 0.3061632204574968, "grad_norm": 2.2977349758148193, "learning_rate": 9.432848409484807e-05, "loss": 0.7384, "step": 6130 }, { "epoch": 0.306662671061832, "grad_norm": 0.9810746312141418, "learning_rate": 9.431032434950236e-05, "loss": 0.73, "step": 6140 }, { "epoch": 0.30716212166616724, "grad_norm": 1.3762357234954834, "learning_rate": 9.429213733135543e-05, "loss": 0.8214, "step": 6150 }, { "epoch": 0.30766157227050245, "grad_norm": 1.197029709815979, "learning_rate": 9.427392305160134e-05, "loss": 0.9194, "step": 6160 }, { "epoch": 0.30816102287483765, "grad_norm": 1.17782461643219, "learning_rate": 9.42556815214509e-05, "loss": 0.8519, "step": 6170 }, { "epoch": 0.3086604734791729, "grad_norm": 1.609448790550232, "learning_rate": 9.423741275213165e-05, "loss": 0.8852, "step": 6180 }, { "epoch": 0.3091599240835081, "grad_norm": 4.505314826965332, "learning_rate": 9.421911675488797e-05, "loss": 1.0346, "step": 6190 }, { "epoch": 0.3096593746878434, "grad_norm": 2.860553026199341, "learning_rate": 9.420079354098093e-05, "loss": 0.8123, "step": 6200 }, { "epoch": 0.3101588252921786, "grad_norm": 3.7026782035827637, "learning_rate": 9.418244312168842e-05, "loss": 0.7699, "step": 6210 }, { "epoch": 0.31065827589651385, "grad_norm": 1.043831467628479, "learning_rate": 9.4164065508305e-05, "loss": 0.8402, "step": 6220 }, { "epoch": 0.31115772650084905, "grad_norm": 2.187206506729126, "learning_rate": 9.414566071214204e-05, "loss": 0.862, "step": 6230 }, { "epoch": 0.3116571771051843, "grad_norm": 3.2246241569519043, "learning_rate": 9.412722874452758e-05, "loss": 0.9774, "step": 6240 }, { "epoch": 0.3121566277095195, "grad_norm": 1.6527377367019653, "learning_rate": 9.410876961680644e-05, "loss": 0.783, "step": 6250 }, { "epoch": 0.3126560783138548, "grad_norm": 4.022501468658447, "learning_rate": 9.409028334034011e-05, "loss": 0.8063, "step": 6260 }, { "epoch": 0.31315552891819, "grad_norm": 1.2540746927261353, "learning_rate": 9.407176992650681e-05, "loss": 0.8424, "step": 6270 }, { "epoch": 0.31365497952252525, "grad_norm": 3.3473026752471924, "learning_rate": 9.405322938670146e-05, "loss": 0.8454, "step": 6280 }, { "epoch": 0.31415443012686045, "grad_norm": 4.546389579772949, "learning_rate": 9.403466173233569e-05, "loss": 1.1217, "step": 6290 }, { "epoch": 0.3146538807311957, "grad_norm": 1.1712909936904907, "learning_rate": 9.40160669748378e-05, "loss": 0.9331, "step": 6300 }, { "epoch": 0.3151533313355309, "grad_norm": 2.0095112323760986, "learning_rate": 9.399744512565276e-05, "loss": 0.6213, "step": 6310 }, { "epoch": 0.3156527819398661, "grad_norm": 2.3807756900787354, "learning_rate": 9.397879619624226e-05, "loss": 0.8187, "step": 6320 }, { "epoch": 0.3161522325442014, "grad_norm": 10.111104965209961, "learning_rate": 9.39601201980846e-05, "loss": 1.0629, "step": 6330 }, { "epoch": 0.3166516831485366, "grad_norm": 1.3615238666534424, "learning_rate": 9.394141714267481e-05, "loss": 0.7424, "step": 6340 }, { "epoch": 0.31715113375287185, "grad_norm": 2.2437679767608643, "learning_rate": 9.392268704152449e-05, "loss": 1.0506, "step": 6350 }, { "epoch": 0.31765058435720706, "grad_norm": 1.7457895278930664, "learning_rate": 9.390392990616199e-05, "loss": 0.9029, "step": 6360 }, { "epoch": 0.3181500349615423, "grad_norm": 2.4126617908477783, "learning_rate": 9.388514574813216e-05, "loss": 1.2185, "step": 6370 }, { "epoch": 0.3186494855658775, "grad_norm": 2.142484188079834, "learning_rate": 9.386633457899665e-05, "loss": 1.0851, "step": 6380 }, { "epoch": 0.3191489361702128, "grad_norm": 6.224302768707275, "learning_rate": 9.384749641033359e-05, "loss": 0.8494, "step": 6390 }, { "epoch": 0.319648386774548, "grad_norm": 1.4591060876846313, "learning_rate": 9.38286312537378e-05, "loss": 0.9518, "step": 6400 }, { "epoch": 0.32014783737888325, "grad_norm": 4.708948135375977, "learning_rate": 9.38097391208207e-05, "loss": 0.9182, "step": 6410 }, { "epoch": 0.32064728798321845, "grad_norm": 1.6708866357803345, "learning_rate": 9.379082002321031e-05, "loss": 0.7359, "step": 6420 }, { "epoch": 0.3211467385875537, "grad_norm": 2.336205005645752, "learning_rate": 9.377187397255125e-05, "loss": 0.833, "step": 6430 }, { "epoch": 0.3216461891918889, "grad_norm": 1.5494914054870605, "learning_rate": 9.375290098050473e-05, "loss": 1.0873, "step": 6440 }, { "epoch": 0.3221456397962241, "grad_norm": 1.990413784980774, "learning_rate": 9.373390105874853e-05, "loss": 0.7674, "step": 6450 }, { "epoch": 0.3226450904005594, "grad_norm": 2.463172197341919, "learning_rate": 9.371487421897703e-05, "loss": 1.0482, "step": 6460 }, { "epoch": 0.3231445410048946, "grad_norm": 2.06982159614563, "learning_rate": 9.369582047290114e-05, "loss": 0.7685, "step": 6470 }, { "epoch": 0.32364399160922985, "grad_norm": 3.1632564067840576, "learning_rate": 9.367673983224838e-05, "loss": 0.9676, "step": 6480 }, { "epoch": 0.32414344221356506, "grad_norm": 1.9890689849853516, "learning_rate": 9.365763230876276e-05, "loss": 0.8591, "step": 6490 }, { "epoch": 0.3246428928179003, "grad_norm": 3.06638240814209, "learning_rate": 9.363849791420492e-05, "loss": 1.0175, "step": 6500 }, { "epoch": 0.3251423434222355, "grad_norm": 4.139556407928467, "learning_rate": 9.361933666035197e-05, "loss": 0.9419, "step": 6510 }, { "epoch": 0.3256417940265708, "grad_norm": 2.3454649448394775, "learning_rate": 9.360014855899755e-05, "loss": 0.8663, "step": 6520 }, { "epoch": 0.326141244630906, "grad_norm": 2.2644970417022705, "learning_rate": 9.358093362195188e-05, "loss": 0.7967, "step": 6530 }, { "epoch": 0.32664069523524125, "grad_norm": 1.9934505224227905, "learning_rate": 9.356169186104165e-05, "loss": 0.8378, "step": 6540 }, { "epoch": 0.32714014583957646, "grad_norm": 1.7212903499603271, "learning_rate": 9.35424232881101e-05, "loss": 0.7944, "step": 6550 }, { "epoch": 0.3276395964439117, "grad_norm": 1.9699835777282715, "learning_rate": 9.35231279150169e-05, "loss": 0.9057, "step": 6560 }, { "epoch": 0.3281390470482469, "grad_norm": 1.6256062984466553, "learning_rate": 9.35038057536383e-05, "loss": 0.8083, "step": 6570 }, { "epoch": 0.3286384976525822, "grad_norm": 1.4498624801635742, "learning_rate": 9.348445681586702e-05, "loss": 0.8005, "step": 6580 }, { "epoch": 0.3291379482569174, "grad_norm": 2.1569433212280273, "learning_rate": 9.346508111361218e-05, "loss": 0.8273, "step": 6590 }, { "epoch": 0.3296373988612526, "grad_norm": 4.2927703857421875, "learning_rate": 9.34456786587995e-05, "loss": 0.9587, "step": 6600 }, { "epoch": 0.33013684946558786, "grad_norm": 2.9086852073669434, "learning_rate": 9.342624946337106e-05, "loss": 1.002, "step": 6610 }, { "epoch": 0.33063630006992306, "grad_norm": 3.242313861846924, "learning_rate": 9.340679353928548e-05, "loss": 0.8861, "step": 6620 }, { "epoch": 0.3311357506742583, "grad_norm": 1.050492525100708, "learning_rate": 9.338731089851774e-05, "loss": 0.6672, "step": 6630 }, { "epoch": 0.3316352012785935, "grad_norm": 2.7671895027160645, "learning_rate": 9.336780155305935e-05, "loss": 0.8712, "step": 6640 }, { "epoch": 0.3321346518829288, "grad_norm": 3.723975896835327, "learning_rate": 9.334826551491821e-05, "loss": 0.8757, "step": 6650 }, { "epoch": 0.332634102487264, "grad_norm": 1.2424172163009644, "learning_rate": 9.332870279611868e-05, "loss": 1.0399, "step": 6660 }, { "epoch": 0.33313355309159925, "grad_norm": 1.625571370124817, "learning_rate": 9.33091134087015e-05, "loss": 0.9807, "step": 6670 }, { "epoch": 0.33363300369593446, "grad_norm": 1.5173087120056152, "learning_rate": 9.328949736472385e-05, "loss": 0.9511, "step": 6680 }, { "epoch": 0.3341324543002697, "grad_norm": 1.8095574378967285, "learning_rate": 9.326985467625932e-05, "loss": 0.8397, "step": 6690 }, { "epoch": 0.3346319049046049, "grad_norm": 0.9123914241790771, "learning_rate": 9.325018535539793e-05, "loss": 0.9547, "step": 6700 }, { "epoch": 0.3351313555089402, "grad_norm": 2.3337626457214355, "learning_rate": 9.3230489414246e-05, "loss": 0.7205, "step": 6710 }, { "epoch": 0.3356308061132754, "grad_norm": 1.0187079906463623, "learning_rate": 9.321076686492633e-05, "loss": 0.6824, "step": 6720 }, { "epoch": 0.33613025671761065, "grad_norm": 2.9060966968536377, "learning_rate": 9.319101771957804e-05, "loss": 0.9898, "step": 6730 }, { "epoch": 0.33662970732194586, "grad_norm": 1.6975250244140625, "learning_rate": 9.317124199035663e-05, "loss": 0.7268, "step": 6740 }, { "epoch": 0.33712915792628106, "grad_norm": 0.9867602586746216, "learning_rate": 9.315143968943401e-05, "loss": 0.6433, "step": 6750 }, { "epoch": 0.3376286085306163, "grad_norm": 4.174263954162598, "learning_rate": 9.313161082899834e-05, "loss": 0.7511, "step": 6760 }, { "epoch": 0.33812805913495153, "grad_norm": 3.0919437408447266, "learning_rate": 9.311175542125427e-05, "loss": 0.8776, "step": 6770 }, { "epoch": 0.3386275097392868, "grad_norm": 1.691468596458435, "learning_rate": 9.309187347842266e-05, "loss": 0.7749, "step": 6780 }, { "epoch": 0.339126960343622, "grad_norm": 1.6738675832748413, "learning_rate": 9.307196501274077e-05, "loss": 0.9477, "step": 6790 }, { "epoch": 0.33962641094795726, "grad_norm": 1.3428436517715454, "learning_rate": 9.305203003646217e-05, "loss": 0.9914, "step": 6800 }, { "epoch": 0.34012586155229246, "grad_norm": 2.856218099594116, "learning_rate": 9.303206856185674e-05, "loss": 0.7905, "step": 6810 }, { "epoch": 0.3406253121566277, "grad_norm": 5.577542304992676, "learning_rate": 9.301208060121067e-05, "loss": 0.8945, "step": 6820 }, { "epoch": 0.34112476276096293, "grad_norm": 2.0562527179718018, "learning_rate": 9.299206616682647e-05, "loss": 1.0614, "step": 6830 }, { "epoch": 0.3416242133652982, "grad_norm": 1.6865025758743286, "learning_rate": 9.297202527102294e-05, "loss": 0.8211, "step": 6840 }, { "epoch": 0.3421236639696334, "grad_norm": 1.926328420639038, "learning_rate": 9.295195792613514e-05, "loss": 0.8534, "step": 6850 }, { "epoch": 0.34262311457396866, "grad_norm": 1.5476715564727783, "learning_rate": 9.293186414451444e-05, "loss": 0.774, "step": 6860 }, { "epoch": 0.34312256517830386, "grad_norm": 1.5437580347061157, "learning_rate": 9.291174393852849e-05, "loss": 0.8928, "step": 6870 }, { "epoch": 0.3436220157826391, "grad_norm": 1.0636396408081055, "learning_rate": 9.289159732056114e-05, "loss": 0.8541, "step": 6880 }, { "epoch": 0.34412146638697433, "grad_norm": 1.2523255348205566, "learning_rate": 9.287142430301256e-05, "loss": 1.0726, "step": 6890 }, { "epoch": 0.34462091699130953, "grad_norm": 2.373462677001953, "learning_rate": 9.285122489829918e-05, "loss": 0.6725, "step": 6900 }, { "epoch": 0.3451203675956448, "grad_norm": 3.2414791584014893, "learning_rate": 9.28309991188536e-05, "loss": 0.8107, "step": 6910 }, { "epoch": 0.34561981819998, "grad_norm": 0.4396897554397583, "learning_rate": 9.281074697712471e-05, "loss": 0.7285, "step": 6920 }, { "epoch": 0.34611926880431526, "grad_norm": 1.6284270286560059, "learning_rate": 9.27904684855776e-05, "loss": 0.8076, "step": 6930 }, { "epoch": 0.34661871940865047, "grad_norm": 1.275205373764038, "learning_rate": 9.277016365669359e-05, "loss": 0.7222, "step": 6940 }, { "epoch": 0.3471181700129857, "grad_norm": 2.0777640342712402, "learning_rate": 9.274983250297025e-05, "loss": 1.0457, "step": 6950 }, { "epoch": 0.34761762061732093, "grad_norm": 2.362515926361084, "learning_rate": 9.272947503692126e-05, "loss": 1.0001, "step": 6960 }, { "epoch": 0.3481170712216562, "grad_norm": 2.075861930847168, "learning_rate": 9.270909127107655e-05, "loss": 0.8281, "step": 6970 }, { "epoch": 0.3486165218259914, "grad_norm": 7.117288112640381, "learning_rate": 9.268868121798227e-05, "loss": 1.1095, "step": 6980 }, { "epoch": 0.34911597243032666, "grad_norm": 2.2000198364257812, "learning_rate": 9.266824489020069e-05, "loss": 0.7435, "step": 6990 }, { "epoch": 0.34961542303466187, "grad_norm": 4.886950969696045, "learning_rate": 9.264778230031027e-05, "loss": 0.783, "step": 7000 }, { "epoch": 0.3501148736389971, "grad_norm": 2.541064977645874, "learning_rate": 9.262729346090566e-05, "loss": 0.8305, "step": 7010 }, { "epoch": 0.35061432424333233, "grad_norm": 2.1147301197052, "learning_rate": 9.260677838459764e-05, "loss": 1.0566, "step": 7020 }, { "epoch": 0.3511137748476676, "grad_norm": 6.015203952789307, "learning_rate": 9.258623708401313e-05, "loss": 1.0223, "step": 7030 }, { "epoch": 0.3516132254520028, "grad_norm": 3.577023506164551, "learning_rate": 9.25656695717952e-05, "loss": 0.8236, "step": 7040 }, { "epoch": 0.352112676056338, "grad_norm": 1.7369171380996704, "learning_rate": 9.254507586060311e-05, "loss": 0.8042, "step": 7050 }, { "epoch": 0.35261212666067326, "grad_norm": 2.276552677154541, "learning_rate": 9.252445596311214e-05, "loss": 0.8826, "step": 7060 }, { "epoch": 0.35311157726500847, "grad_norm": 3.7419540882110596, "learning_rate": 9.250380989201378e-05, "loss": 0.7798, "step": 7070 }, { "epoch": 0.35361102786934373, "grad_norm": 1.865341067314148, "learning_rate": 9.248313766001558e-05, "loss": 0.8, "step": 7080 }, { "epoch": 0.35411047847367894, "grad_norm": 2.2110178470611572, "learning_rate": 9.246243927984118e-05, "loss": 0.7609, "step": 7090 }, { "epoch": 0.3546099290780142, "grad_norm": 1.0193909406661987, "learning_rate": 9.244171476423037e-05, "loss": 1.0596, "step": 7100 }, { "epoch": 0.3551093796823494, "grad_norm": 1.9720994234085083, "learning_rate": 9.242096412593897e-05, "loss": 0.936, "step": 7110 }, { "epoch": 0.35560883028668466, "grad_norm": 3.3525550365448, "learning_rate": 9.240018737773892e-05, "loss": 1.0104, "step": 7120 }, { "epoch": 0.35610828089101987, "grad_norm": 2.81687331199646, "learning_rate": 9.237938453241821e-05, "loss": 1.1244, "step": 7130 }, { "epoch": 0.35660773149535513, "grad_norm": 2.2435097694396973, "learning_rate": 9.23585556027809e-05, "loss": 0.7526, "step": 7140 }, { "epoch": 0.35710718209969033, "grad_norm": 2.639529228210449, "learning_rate": 9.233770060164708e-05, "loss": 0.9304, "step": 7150 }, { "epoch": 0.3576066327040256, "grad_norm": 2.164951801300049, "learning_rate": 9.231681954185293e-05, "loss": 0.8313, "step": 7160 }, { "epoch": 0.3581060833083608, "grad_norm": 2.807096481323242, "learning_rate": 9.229591243625064e-05, "loss": 1.0993, "step": 7170 }, { "epoch": 0.35860553391269606, "grad_norm": 1.1400171518325806, "learning_rate": 9.227497929770843e-05, "loss": 0.8521, "step": 7180 }, { "epoch": 0.35910498451703127, "grad_norm": 3.1278467178344727, "learning_rate": 9.225402013911057e-05, "loss": 0.9987, "step": 7190 }, { "epoch": 0.3596044351213665, "grad_norm": 1.8795716762542725, "learning_rate": 9.22330349733573e-05, "loss": 0.9404, "step": 7200 }, { "epoch": 0.36010388572570173, "grad_norm": 2.5907552242279053, "learning_rate": 9.221202381336489e-05, "loss": 0.8723, "step": 7210 }, { "epoch": 0.36060333633003694, "grad_norm": 2.619285821914673, "learning_rate": 9.219098667206565e-05, "loss": 0.8166, "step": 7220 }, { "epoch": 0.3611027869343722, "grad_norm": 1.0197978019714355, "learning_rate": 9.216992356240782e-05, "loss": 0.8487, "step": 7230 }, { "epoch": 0.3616022375387074, "grad_norm": 1.6571626663208008, "learning_rate": 9.214883449735563e-05, "loss": 1.0165, "step": 7240 }, { "epoch": 0.36210168814304267, "grad_norm": 3.8328757286071777, "learning_rate": 9.212771948988935e-05, "loss": 1.1171, "step": 7250 }, { "epoch": 0.36260113874737787, "grad_norm": 2.775155782699585, "learning_rate": 9.210657855300511e-05, "loss": 0.8796, "step": 7260 }, { "epoch": 0.36310058935171313, "grad_norm": 1.5948903560638428, "learning_rate": 9.208541169971511e-05, "loss": 0.9731, "step": 7270 }, { "epoch": 0.36360003995604834, "grad_norm": 2.231907606124878, "learning_rate": 9.206421894304743e-05, "loss": 0.9123, "step": 7280 }, { "epoch": 0.3640994905603836, "grad_norm": 1.6792292594909668, "learning_rate": 9.20430002960461e-05, "loss": 0.8166, "step": 7290 }, { "epoch": 0.3645989411647188, "grad_norm": 2.6319034099578857, "learning_rate": 9.202175577177114e-05, "loss": 1.0514, "step": 7300 }, { "epoch": 0.36509839176905406, "grad_norm": 1.0980263948440552, "learning_rate": 9.200048538329844e-05, "loss": 0.8034, "step": 7310 }, { "epoch": 0.36559784237338927, "grad_norm": 3.211888313293457, "learning_rate": 9.197918914371979e-05, "loss": 1.0184, "step": 7320 }, { "epoch": 0.3660972929777245, "grad_norm": 2.2598297595977783, "learning_rate": 9.195786706614298e-05, "loss": 0.9803, "step": 7330 }, { "epoch": 0.36659674358205974, "grad_norm": 1.6521825790405273, "learning_rate": 9.193651916369162e-05, "loss": 0.8432, "step": 7340 }, { "epoch": 0.36709619418639494, "grad_norm": 2.371178388595581, "learning_rate": 9.191514544950525e-05, "loss": 0.8751, "step": 7350 }, { "epoch": 0.3675956447907302, "grad_norm": 2.6362617015838623, "learning_rate": 9.189374593673932e-05, "loss": 0.8984, "step": 7360 }, { "epoch": 0.3680950953950654, "grad_norm": 1.8687587976455688, "learning_rate": 9.187232063856509e-05, "loss": 0.8932, "step": 7370 }, { "epoch": 0.36859454599940067, "grad_norm": 2.2871909141540527, "learning_rate": 9.185086956816975e-05, "loss": 0.7942, "step": 7380 }, { "epoch": 0.3690939966037359, "grad_norm": 2.3869709968566895, "learning_rate": 9.182939273875634e-05, "loss": 0.8035, "step": 7390 }, { "epoch": 0.36959344720807114, "grad_norm": 1.6239734888076782, "learning_rate": 9.180789016354376e-05, "loss": 0.7439, "step": 7400 }, { "epoch": 0.37009289781240634, "grad_norm": 1.3308115005493164, "learning_rate": 9.178636185576672e-05, "loss": 0.7992, "step": 7410 }, { "epoch": 0.3705923484167416, "grad_norm": 0.9633936882019043, "learning_rate": 9.17648078286758e-05, "loss": 1.0191, "step": 7420 }, { "epoch": 0.3710917990210768, "grad_norm": 0.912649929523468, "learning_rate": 9.174322809553743e-05, "loss": 0.8333, "step": 7430 }, { "epoch": 0.37159124962541207, "grad_norm": 4.272607803344727, "learning_rate": 9.172162266963382e-05, "loss": 0.8305, "step": 7440 }, { "epoch": 0.3720907002297473, "grad_norm": 1.9677715301513672, "learning_rate": 9.169999156426301e-05, "loss": 0.9996, "step": 7450 }, { "epoch": 0.37259015083408253, "grad_norm": 1.1595687866210938, "learning_rate": 9.167833479273883e-05, "loss": 0.7525, "step": 7460 }, { "epoch": 0.37308960143841774, "grad_norm": 1.137204885482788, "learning_rate": 9.165665236839095e-05, "loss": 0.7123, "step": 7470 }, { "epoch": 0.37358905204275294, "grad_norm": 2.023561716079712, "learning_rate": 9.16349443045648e-05, "loss": 0.7421, "step": 7480 }, { "epoch": 0.3740885026470882, "grad_norm": 0.4606032371520996, "learning_rate": 9.161321061462157e-05, "loss": 0.7728, "step": 7490 }, { "epoch": 0.3745879532514234, "grad_norm": 8.039925575256348, "learning_rate": 9.159145131193827e-05, "loss": 1.0022, "step": 7500 }, { "epoch": 0.37508740385575867, "grad_norm": 2.0657260417938232, "learning_rate": 9.156966640990763e-05, "loss": 0.9218, "step": 7510 }, { "epoch": 0.3755868544600939, "grad_norm": 1.5123809576034546, "learning_rate": 9.15478559219382e-05, "loss": 0.6706, "step": 7520 }, { "epoch": 0.37608630506442914, "grad_norm": 1.5668985843658447, "learning_rate": 9.152601986145417e-05, "loss": 0.8428, "step": 7530 }, { "epoch": 0.37658575566876434, "grad_norm": 4.2369513511657715, "learning_rate": 9.15041582418956e-05, "loss": 0.8685, "step": 7540 }, { "epoch": 0.3770852062730996, "grad_norm": 1.7388386726379395, "learning_rate": 9.148227107671818e-05, "loss": 0.8348, "step": 7550 }, { "epoch": 0.3775846568774348, "grad_norm": 1.765231728553772, "learning_rate": 9.146035837939335e-05, "loss": 0.8973, "step": 7560 }, { "epoch": 0.37808410748177007, "grad_norm": 1.4724568128585815, "learning_rate": 9.143842016340831e-05, "loss": 0.9471, "step": 7570 }, { "epoch": 0.3785835580861053, "grad_norm": 1.9186527729034424, "learning_rate": 9.141645644226591e-05, "loss": 0.8671, "step": 7580 }, { "epoch": 0.37908300869044054, "grad_norm": 1.309566855430603, "learning_rate": 9.13944672294847e-05, "loss": 0.8727, "step": 7590 }, { "epoch": 0.37958245929477574, "grad_norm": 1.2128655910491943, "learning_rate": 9.137245253859898e-05, "loss": 0.7453, "step": 7600 }, { "epoch": 0.380081909899111, "grad_norm": 8.178140640258789, "learning_rate": 9.135041238315868e-05, "loss": 0.9213, "step": 7610 }, { "epoch": 0.3805813605034462, "grad_norm": 2.761096477508545, "learning_rate": 9.132834677672939e-05, "loss": 1.1112, "step": 7620 }, { "epoch": 0.3810808111077814, "grad_norm": 4.105166912078857, "learning_rate": 9.130625573289242e-05, "loss": 0.9173, "step": 7630 }, { "epoch": 0.3815802617121167, "grad_norm": 1.7994587421417236, "learning_rate": 9.128413926524468e-05, "loss": 0.9755, "step": 7640 }, { "epoch": 0.3820797123164519, "grad_norm": 1.1037505865097046, "learning_rate": 9.126199738739878e-05, "loss": 0.8283, "step": 7650 }, { "epoch": 0.38257916292078714, "grad_norm": 6.051690101623535, "learning_rate": 9.123983011298292e-05, "loss": 0.8041, "step": 7660 }, { "epoch": 0.38307861352512235, "grad_norm": 3.525859832763672, "learning_rate": 9.121763745564098e-05, "loss": 1.0139, "step": 7670 }, { "epoch": 0.3835780641294576, "grad_norm": 3.6487855911254883, "learning_rate": 9.119541942903241e-05, "loss": 0.9372, "step": 7680 }, { "epoch": 0.3840775147337928, "grad_norm": 1.3445885181427002, "learning_rate": 9.117317604683233e-05, "loss": 0.7081, "step": 7690 }, { "epoch": 0.3845769653381281, "grad_norm": 1.1839733123779297, "learning_rate": 9.115090732273141e-05, "loss": 0.737, "step": 7700 }, { "epoch": 0.3850764159424633, "grad_norm": 2.7112619876861572, "learning_rate": 9.112861327043598e-05, "loss": 0.9045, "step": 7710 }, { "epoch": 0.38557586654679854, "grad_norm": 1.1596410274505615, "learning_rate": 9.110629390366791e-05, "loss": 0.8373, "step": 7720 }, { "epoch": 0.38607531715113375, "grad_norm": 1.6945937871932983, "learning_rate": 9.108394923616468e-05, "loss": 0.9547, "step": 7730 }, { "epoch": 0.386574767755469, "grad_norm": 2.133749008178711, "learning_rate": 9.106157928167931e-05, "loss": 0.9767, "step": 7740 }, { "epoch": 0.3870742183598042, "grad_norm": 1.468874216079712, "learning_rate": 9.103918405398042e-05, "loss": 0.7833, "step": 7750 }, { "epoch": 0.3875736689641395, "grad_norm": 3.5914108753204346, "learning_rate": 9.101676356685216e-05, "loss": 0.8531, "step": 7760 }, { "epoch": 0.3880731195684747, "grad_norm": 3.0970280170440674, "learning_rate": 9.099431783409424e-05, "loss": 0.65, "step": 7770 }, { "epoch": 0.3885725701728099, "grad_norm": 2.831838846206665, "learning_rate": 9.097184686952192e-05, "loss": 0.7634, "step": 7780 }, { "epoch": 0.38907202077714514, "grad_norm": 1.6186414957046509, "learning_rate": 9.094935068696596e-05, "loss": 0.8975, "step": 7790 }, { "epoch": 0.38957147138148035, "grad_norm": 1.0518146753311157, "learning_rate": 9.092682930027264e-05, "loss": 1.136, "step": 7800 }, { "epoch": 0.3900709219858156, "grad_norm": 3.9278268814086914, "learning_rate": 9.09042827233038e-05, "loss": 0.9904, "step": 7810 }, { "epoch": 0.3905703725901508, "grad_norm": 5.630587577819824, "learning_rate": 9.088171096993675e-05, "loss": 1.1147, "step": 7820 }, { "epoch": 0.3910698231944861, "grad_norm": 4.258095741271973, "learning_rate": 9.085911405406428e-05, "loss": 1.0105, "step": 7830 }, { "epoch": 0.3915692737988213, "grad_norm": 3.0702295303344727, "learning_rate": 9.083649198959472e-05, "loss": 0.9473, "step": 7840 }, { "epoch": 0.39206872440315654, "grad_norm": 1.486130714416504, "learning_rate": 9.081384479045183e-05, "loss": 0.8446, "step": 7850 }, { "epoch": 0.39256817500749175, "grad_norm": 6.066441059112549, "learning_rate": 9.079117247057484e-05, "loss": 0.9462, "step": 7860 }, { "epoch": 0.393067625611827, "grad_norm": 4.194586753845215, "learning_rate": 9.07684750439185e-05, "loss": 0.9815, "step": 7870 }, { "epoch": 0.3935670762161622, "grad_norm": 3.13558292388916, "learning_rate": 9.074575252445293e-05, "loss": 0.9681, "step": 7880 }, { "epoch": 0.3940665268204975, "grad_norm": 1.231960415840149, "learning_rate": 9.072300492616376e-05, "loss": 1.0147, "step": 7890 }, { "epoch": 0.3945659774248327, "grad_norm": 0.8152504563331604, "learning_rate": 9.070023226305202e-05, "loss": 0.6973, "step": 7900 }, { "epoch": 0.39506542802916794, "grad_norm": 2.331622362136841, "learning_rate": 9.067743454913418e-05, "loss": 0.8363, "step": 7910 }, { "epoch": 0.39556487863350315, "grad_norm": 1.8101062774658203, "learning_rate": 9.065461179844215e-05, "loss": 0.744, "step": 7920 }, { "epoch": 0.39606432923783835, "grad_norm": 5.449390411376953, "learning_rate": 9.063176402502321e-05, "loss": 0.9046, "step": 7930 }, { "epoch": 0.3965637798421736, "grad_norm": 1.1542668342590332, "learning_rate": 9.060889124294006e-05, "loss": 1.0153, "step": 7940 }, { "epoch": 0.3970632304465088, "grad_norm": 1.930710792541504, "learning_rate": 9.05859934662708e-05, "loss": 0.774, "step": 7950 }, { "epoch": 0.3975626810508441, "grad_norm": 2.1788411140441895, "learning_rate": 9.056307070910888e-05, "loss": 0.9153, "step": 7960 }, { "epoch": 0.3980621316551793, "grad_norm": 0.6524488925933838, "learning_rate": 9.054012298556319e-05, "loss": 0.7557, "step": 7970 }, { "epoch": 0.39856158225951455, "grad_norm": 2.8747739791870117, "learning_rate": 9.051715030975793e-05, "loss": 0.7849, "step": 7980 }, { "epoch": 0.39906103286384975, "grad_norm": 2.320890426635742, "learning_rate": 9.049415269583268e-05, "loss": 0.9834, "step": 7990 }, { "epoch": 0.399560483468185, "grad_norm": 5.673957824707031, "learning_rate": 9.047113015794235e-05, "loss": 1.02, "step": 8000 }, { "epoch": 0.4000599340725202, "grad_norm": 2.38036847114563, "learning_rate": 9.044808271025722e-05, "loss": 0.9644, "step": 8010 }, { "epoch": 0.4005593846768555, "grad_norm": 2.981459617614746, "learning_rate": 9.042501036696289e-05, "loss": 1.054, "step": 8020 }, { "epoch": 0.4010588352811907, "grad_norm": 2.8263211250305176, "learning_rate": 9.04019131422603e-05, "loss": 0.8065, "step": 8030 }, { "epoch": 0.40155828588552595, "grad_norm": 2.295149326324463, "learning_rate": 9.037879105036564e-05, "loss": 0.8118, "step": 8040 }, { "epoch": 0.40205773648986115, "grad_norm": 2.1756789684295654, "learning_rate": 9.035564410551049e-05, "loss": 0.8478, "step": 8050 }, { "epoch": 0.4025571870941964, "grad_norm": 2.2337467670440674, "learning_rate": 9.033247232194166e-05, "loss": 0.8868, "step": 8060 }, { "epoch": 0.4030566376985316, "grad_norm": 1.9026663303375244, "learning_rate": 9.030927571392132e-05, "loss": 1.1555, "step": 8070 }, { "epoch": 0.4035560883028668, "grad_norm": 2.4279093742370605, "learning_rate": 9.028605429572683e-05, "loss": 0.7624, "step": 8080 }, { "epoch": 0.4040555389072021, "grad_norm": 1.2404896020889282, "learning_rate": 9.026280808165087e-05, "loss": 0.8664, "step": 8090 }, { "epoch": 0.4045549895115373, "grad_norm": 1.4976485967636108, "learning_rate": 9.02395370860014e-05, "loss": 0.8968, "step": 8100 }, { "epoch": 0.40505444011587255, "grad_norm": 1.6639426946640015, "learning_rate": 9.02162413231016e-05, "loss": 0.9324, "step": 8110 }, { "epoch": 0.40555389072020775, "grad_norm": 1.4774401187896729, "learning_rate": 9.019292080728992e-05, "loss": 0.9493, "step": 8120 }, { "epoch": 0.406053341324543, "grad_norm": 0.4843757748603821, "learning_rate": 9.016957555292e-05, "loss": 0.7782, "step": 8130 }, { "epoch": 0.4065527919288782, "grad_norm": 2.6284985542297363, "learning_rate": 9.014620557436077e-05, "loss": 0.835, "step": 8140 }, { "epoch": 0.4070522425332135, "grad_norm": 1.5704331398010254, "learning_rate": 9.012281088599632e-05, "loss": 0.9565, "step": 8150 }, { "epoch": 0.4075516931375487, "grad_norm": 5.700881004333496, "learning_rate": 9.009939150222599e-05, "loss": 1.0529, "step": 8160 }, { "epoch": 0.40805114374188395, "grad_norm": 2.501964807510376, "learning_rate": 9.007594743746429e-05, "loss": 0.8192, "step": 8170 }, { "epoch": 0.40855059434621915, "grad_norm": 2.092198371887207, "learning_rate": 9.005247870614095e-05, "loss": 0.7568, "step": 8180 }, { "epoch": 0.4090500449505544, "grad_norm": 1.0870630741119385, "learning_rate": 9.002898532270084e-05, "loss": 0.6473, "step": 8190 }, { "epoch": 0.4095494955548896, "grad_norm": 1.612242579460144, "learning_rate": 9.000546730160406e-05, "loss": 0.7603, "step": 8200 }, { "epoch": 0.4100489461592248, "grad_norm": 1.722089171409607, "learning_rate": 8.998192465732582e-05, "loss": 0.7233, "step": 8210 }, { "epoch": 0.4105483967635601, "grad_norm": 2.79156494140625, "learning_rate": 8.995835740435653e-05, "loss": 0.6629, "step": 8220 }, { "epoch": 0.4110478473678953, "grad_norm": 1.4629532098770142, "learning_rate": 8.99347655572017e-05, "loss": 0.9104, "step": 8230 }, { "epoch": 0.41154729797223055, "grad_norm": 2.188950538635254, "learning_rate": 8.991114913038202e-05, "loss": 0.9731, "step": 8240 }, { "epoch": 0.41204674857656576, "grad_norm": 2.220014810562134, "learning_rate": 8.98875081384333e-05, "loss": 0.8968, "step": 8250 }, { "epoch": 0.412546199180901, "grad_norm": 0.8119724988937378, "learning_rate": 8.986384259590645e-05, "loss": 0.7018, "step": 8260 }, { "epoch": 0.4130456497852362, "grad_norm": 7.188334941864014, "learning_rate": 8.98401525173675e-05, "loss": 0.9704, "step": 8270 }, { "epoch": 0.4135451003895715, "grad_norm": 0.9612888693809509, "learning_rate": 8.98164379173976e-05, "loss": 0.6979, "step": 8280 }, { "epoch": 0.4140445509939067, "grad_norm": 3.1759042739868164, "learning_rate": 8.979269881059295e-05, "loss": 0.8401, "step": 8290 }, { "epoch": 0.41454400159824195, "grad_norm": 1.566257119178772, "learning_rate": 8.976893521156491e-05, "loss": 0.7969, "step": 8300 }, { "epoch": 0.41504345220257716, "grad_norm": 1.340470552444458, "learning_rate": 8.974514713493983e-05, "loss": 0.7772, "step": 8310 }, { "epoch": 0.4155429028069124, "grad_norm": 2.2976415157318115, "learning_rate": 8.972133459535914e-05, "loss": 0.8865, "step": 8320 }, { "epoch": 0.4160423534112476, "grad_norm": 2.3305583000183105, "learning_rate": 8.969749760747938e-05, "loss": 0.8805, "step": 8330 }, { "epoch": 0.4165418040155829, "grad_norm": 1.950758457183838, "learning_rate": 8.967363618597213e-05, "loss": 0.7312, "step": 8340 }, { "epoch": 0.4170412546199181, "grad_norm": 1.5078762769699097, "learning_rate": 8.964975034552394e-05, "loss": 0.8563, "step": 8350 }, { "epoch": 0.4175407052242533, "grad_norm": 1.504718542098999, "learning_rate": 8.962584010083645e-05, "loss": 0.8045, "step": 8360 }, { "epoch": 0.41804015582858856, "grad_norm": 1.307501196861267, "learning_rate": 8.960190546662632e-05, "loss": 0.6778, "step": 8370 }, { "epoch": 0.41853960643292376, "grad_norm": 4.109325408935547, "learning_rate": 8.957794645762519e-05, "loss": 1.1622, "step": 8380 }, { "epoch": 0.419039057037259, "grad_norm": 4.46181583404541, "learning_rate": 8.955396308857973e-05, "loss": 0.8266, "step": 8390 }, { "epoch": 0.4195385076415942, "grad_norm": 2.433459520339966, "learning_rate": 8.952995537425157e-05, "loss": 0.8096, "step": 8400 }, { "epoch": 0.4200379582459295, "grad_norm": 1.5981595516204834, "learning_rate": 8.950592332941739e-05, "loss": 0.8317, "step": 8410 }, { "epoch": 0.4205374088502647, "grad_norm": 1.0722473859786987, "learning_rate": 8.948186696886877e-05, "loss": 0.7952, "step": 8420 }, { "epoch": 0.42103685945459995, "grad_norm": 1.6660276651382446, "learning_rate": 8.945778630741231e-05, "loss": 0.8521, "step": 8430 }, { "epoch": 0.42153631005893516, "grad_norm": 3.410531759262085, "learning_rate": 8.943368135986954e-05, "loss": 0.8354, "step": 8440 }, { "epoch": 0.4220357606632704, "grad_norm": 2.2601499557495117, "learning_rate": 8.940955214107693e-05, "loss": 0.962, "step": 8450 }, { "epoch": 0.4225352112676056, "grad_norm": 1.2968422174453735, "learning_rate": 8.938539866588592e-05, "loss": 0.7417, "step": 8460 }, { "epoch": 0.4230346618719409, "grad_norm": 1.7342209815979004, "learning_rate": 8.936122094916286e-05, "loss": 0.6829, "step": 8470 }, { "epoch": 0.4235341124762761, "grad_norm": 1.3093066215515137, "learning_rate": 8.933701900578901e-05, "loss": 0.7837, "step": 8480 }, { "epoch": 0.42403356308061135, "grad_norm": 2.27400279045105, "learning_rate": 8.931279285066059e-05, "loss": 1.1384, "step": 8490 }, { "epoch": 0.42453301368494656, "grad_norm": 1.4279674291610718, "learning_rate": 8.928854249868865e-05, "loss": 1.0138, "step": 8500 }, { "epoch": 0.42503246428928176, "grad_norm": 1.471238374710083, "learning_rate": 8.926426796479919e-05, "loss": 1.0406, "step": 8510 }, { "epoch": 0.425531914893617, "grad_norm": 1.3381599187850952, "learning_rate": 8.923996926393305e-05, "loss": 0.9937, "step": 8520 }, { "epoch": 0.42603136549795223, "grad_norm": 1.3054723739624023, "learning_rate": 8.9215646411046e-05, "loss": 0.8807, "step": 8530 }, { "epoch": 0.4265308161022875, "grad_norm": 1.0258605480194092, "learning_rate": 8.919129942110864e-05, "loss": 1.1096, "step": 8540 }, { "epoch": 0.4270302667066227, "grad_norm": 2.3745763301849365, "learning_rate": 8.916692830910642e-05, "loss": 0.8066, "step": 8550 }, { "epoch": 0.42752971731095796, "grad_norm": 0.9149400591850281, "learning_rate": 8.914253309003964e-05, "loss": 0.7646, "step": 8560 }, { "epoch": 0.42802916791529316, "grad_norm": 0.8165902495384216, "learning_rate": 8.911811377892345e-05, "loss": 0.8465, "step": 8570 }, { "epoch": 0.4285286185196284, "grad_norm": 3.450925827026367, "learning_rate": 8.909367039078784e-05, "loss": 0.9557, "step": 8580 }, { "epoch": 0.42902806912396363, "grad_norm": 1.085161805152893, "learning_rate": 8.906920294067759e-05, "loss": 0.89, "step": 8590 }, { "epoch": 0.4295275197282989, "grad_norm": 1.9233146905899048, "learning_rate": 8.904471144365232e-05, "loss": 0.7721, "step": 8600 }, { "epoch": 0.4300269703326341, "grad_norm": 0.30743876099586487, "learning_rate": 8.90201959147864e-05, "loss": 0.9018, "step": 8610 }, { "epoch": 0.43052642093696936, "grad_norm": 2.8039252758026123, "learning_rate": 8.899565636916904e-05, "loss": 0.9276, "step": 8620 }, { "epoch": 0.43102587154130456, "grad_norm": 1.278592824935913, "learning_rate": 8.897109282190423e-05, "loss": 0.846, "step": 8630 }, { "epoch": 0.4315253221456398, "grad_norm": 1.5045511722564697, "learning_rate": 8.894650528811072e-05, "loss": 0.8324, "step": 8640 }, { "epoch": 0.43202477274997503, "grad_norm": 1.3858511447906494, "learning_rate": 8.892189378292202e-05, "loss": 0.7414, "step": 8650 }, { "epoch": 0.43252422335431023, "grad_norm": 4.206646919250488, "learning_rate": 8.88972583214864e-05, "loss": 0.766, "step": 8660 }, { "epoch": 0.4330236739586455, "grad_norm": 1.2903480529785156, "learning_rate": 8.887259891896684e-05, "loss": 0.8342, "step": 8670 }, { "epoch": 0.4335231245629807, "grad_norm": 1.558398962020874, "learning_rate": 8.884791559054113e-05, "loss": 0.7591, "step": 8680 }, { "epoch": 0.43402257516731596, "grad_norm": 0.9594738483428955, "learning_rate": 8.882320835140174e-05, "loss": 0.8384, "step": 8690 }, { "epoch": 0.43452202577165117, "grad_norm": 2.354264259338379, "learning_rate": 8.879847721675586e-05, "loss": 1.1023, "step": 8700 }, { "epoch": 0.4350214763759864, "grad_norm": 1.0832626819610596, "learning_rate": 8.877372220182539e-05, "loss": 0.9754, "step": 8710 }, { "epoch": 0.43552092698032163, "grad_norm": 1.4442058801651, "learning_rate": 8.87489433218469e-05, "loss": 0.6892, "step": 8720 }, { "epoch": 0.4360203775846569, "grad_norm": 2.1258246898651123, "learning_rate": 8.872414059207172e-05, "loss": 0.932, "step": 8730 }, { "epoch": 0.4365198281889921, "grad_norm": 1.835067868232727, "learning_rate": 8.869931402776579e-05, "loss": 1.2159, "step": 8740 }, { "epoch": 0.43701927879332736, "grad_norm": 3.0866270065307617, "learning_rate": 8.867446364420975e-05, "loss": 0.7718, "step": 8750 }, { "epoch": 0.43751872939766256, "grad_norm": 3.470754623413086, "learning_rate": 8.86495894566989e-05, "loss": 0.8308, "step": 8760 }, { "epoch": 0.4380181800019978, "grad_norm": 2.440117835998535, "learning_rate": 8.862469148054319e-05, "loss": 0.9607, "step": 8770 }, { "epoch": 0.43851763060633303, "grad_norm": 1.012770175933838, "learning_rate": 8.859976973106721e-05, "loss": 0.6873, "step": 8780 }, { "epoch": 0.4390170812106683, "grad_norm": 1.4482653141021729, "learning_rate": 8.857482422361018e-05, "loss": 0.7227, "step": 8790 }, { "epoch": 0.4395165318150035, "grad_norm": 3.04582142829895, "learning_rate": 8.854985497352595e-05, "loss": 0.8951, "step": 8800 }, { "epoch": 0.4400159824193387, "grad_norm": 1.4655203819274902, "learning_rate": 8.852486199618298e-05, "loss": 1.1724, "step": 8810 }, { "epoch": 0.44051543302367396, "grad_norm": 3.0861003398895264, "learning_rate": 8.84998453069643e-05, "loss": 0.8812, "step": 8820 }, { "epoch": 0.44101488362800917, "grad_norm": 1.3584084510803223, "learning_rate": 8.847480492126761e-05, "loss": 0.678, "step": 8830 }, { "epoch": 0.44151433423234443, "grad_norm": 3.5568907260894775, "learning_rate": 8.844974085450515e-05, "loss": 0.9797, "step": 8840 }, { "epoch": 0.44201378483667964, "grad_norm": 4.693130016326904, "learning_rate": 8.84246531221037e-05, "loss": 0.9045, "step": 8850 }, { "epoch": 0.4425132354410149, "grad_norm": 2.3070759773254395, "learning_rate": 8.839954173950469e-05, "loss": 0.714, "step": 8860 }, { "epoch": 0.4430126860453501, "grad_norm": 1.7823224067687988, "learning_rate": 8.837440672216404e-05, "loss": 0.6414, "step": 8870 }, { "epoch": 0.44351213664968536, "grad_norm": 3.037031888961792, "learning_rate": 8.834924808555223e-05, "loss": 0.8324, "step": 8880 }, { "epoch": 0.44401158725402057, "grad_norm": 3.7925896644592285, "learning_rate": 8.83240658451543e-05, "loss": 0.7691, "step": 8890 }, { "epoch": 0.44451103785835583, "grad_norm": 1.6429016590118408, "learning_rate": 8.829886001646978e-05, "loss": 0.9756, "step": 8900 }, { "epoch": 0.44501048846269103, "grad_norm": 1.7131584882736206, "learning_rate": 8.827363061501276e-05, "loss": 0.9618, "step": 8910 }, { "epoch": 0.4455099390670263, "grad_norm": 2.0057897567749023, "learning_rate": 8.824837765631185e-05, "loss": 0.8267, "step": 8920 }, { "epoch": 0.4460093896713615, "grad_norm": 2.802243947982788, "learning_rate": 8.822310115591008e-05, "loss": 0.9813, "step": 8930 }, { "epoch": 0.44650884027569676, "grad_norm": 3.030748128890991, "learning_rate": 8.819780112936502e-05, "loss": 0.9917, "step": 8940 }, { "epoch": 0.44700829088003197, "grad_norm": 1.8534924983978271, "learning_rate": 8.817247759224876e-05, "loss": 0.7916, "step": 8950 }, { "epoch": 0.44750774148436717, "grad_norm": 2.390037775039673, "learning_rate": 8.814713056014781e-05, "loss": 0.8005, "step": 8960 }, { "epoch": 0.44800719208870243, "grad_norm": 2.032355308532715, "learning_rate": 8.812176004866313e-05, "loss": 1.0164, "step": 8970 }, { "epoch": 0.44850664269303764, "grad_norm": 2.673767566680908, "learning_rate": 8.809636607341017e-05, "loss": 0.9618, "step": 8980 }, { "epoch": 0.4490060932973729, "grad_norm": 1.2164344787597656, "learning_rate": 8.807094865001878e-05, "loss": 0.9268, "step": 8990 }, { "epoch": 0.4495055439017081, "grad_norm": 1.4314156770706177, "learning_rate": 8.804550779413331e-05, "loss": 0.9236, "step": 9000 }, { "epoch": 0.45000499450604337, "grad_norm": 1.4462699890136719, "learning_rate": 8.802004352141247e-05, "loss": 0.812, "step": 9010 }, { "epoch": 0.45050444511037857, "grad_norm": 3.182401657104492, "learning_rate": 8.799455584752938e-05, "loss": 0.9307, "step": 9020 }, { "epoch": 0.45100389571471383, "grad_norm": 4.526309013366699, "learning_rate": 8.796904478817162e-05, "loss": 0.9278, "step": 9030 }, { "epoch": 0.45150334631904904, "grad_norm": 1.9855213165283203, "learning_rate": 8.794351035904112e-05, "loss": 0.6975, "step": 9040 }, { "epoch": 0.4520027969233843, "grad_norm": 2.0959839820861816, "learning_rate": 8.791795257585421e-05, "loss": 0.8845, "step": 9050 }, { "epoch": 0.4525022475277195, "grad_norm": 1.1127110719680786, "learning_rate": 8.789237145434155e-05, "loss": 0.8488, "step": 9060 }, { "epoch": 0.45300169813205476, "grad_norm": 1.0070494413375854, "learning_rate": 8.786676701024826e-05, "loss": 0.7984, "step": 9070 }, { "epoch": 0.45350114873638997, "grad_norm": 2.1222105026245117, "learning_rate": 8.784113925933371e-05, "loss": 0.9088, "step": 9080 }, { "epoch": 0.4540005993407252, "grad_norm": 1.2153772115707397, "learning_rate": 8.78154882173717e-05, "loss": 0.7524, "step": 9090 }, { "epoch": 0.45450004994506044, "grad_norm": 2.5599777698516846, "learning_rate": 8.77898139001503e-05, "loss": 0.8623, "step": 9100 }, { "epoch": 0.45499950054939564, "grad_norm": 2.3141989707946777, "learning_rate": 8.776411632347194e-05, "loss": 0.8572, "step": 9110 }, { "epoch": 0.4554989511537309, "grad_norm": 4.364091873168945, "learning_rate": 8.773839550315337e-05, "loss": 0.9145, "step": 9120 }, { "epoch": 0.4559984017580661, "grad_norm": 1.888785719871521, "learning_rate": 8.771265145502566e-05, "loss": 0.7613, "step": 9130 }, { "epoch": 0.45649785236240137, "grad_norm": 1.7906880378723145, "learning_rate": 8.76868841949341e-05, "loss": 0.6708, "step": 9140 }, { "epoch": 0.4569973029667366, "grad_norm": 1.3806205987930298, "learning_rate": 8.766109373873834e-05, "loss": 0.6981, "step": 9150 }, { "epoch": 0.45749675357107183, "grad_norm": 1.7840837240219116, "learning_rate": 8.76352801023123e-05, "loss": 1.0249, "step": 9160 }, { "epoch": 0.45799620417540704, "grad_norm": 1.4487371444702148, "learning_rate": 8.760944330154418e-05, "loss": 0.8626, "step": 9170 }, { "epoch": 0.4584956547797423, "grad_norm": 3.1842362880706787, "learning_rate": 8.758358335233634e-05, "loss": 0.8693, "step": 9180 }, { "epoch": 0.4589951053840775, "grad_norm": 1.9841814041137695, "learning_rate": 8.755770027060552e-05, "loss": 0.7999, "step": 9190 }, { "epoch": 0.45949455598841277, "grad_norm": 1.462900161743164, "learning_rate": 8.75317940722826e-05, "loss": 0.8821, "step": 9200 }, { "epoch": 0.459994006592748, "grad_norm": 1.1118979454040527, "learning_rate": 8.750586477331277e-05, "loss": 0.9123, "step": 9210 }, { "epoch": 0.46049345719708323, "grad_norm": 3.976433753967285, "learning_rate": 8.747991238965536e-05, "loss": 0.807, "step": 9220 }, { "epoch": 0.46099290780141844, "grad_norm": 3.9534356594085693, "learning_rate": 8.745393693728395e-05, "loss": 1.037, "step": 9230 }, { "epoch": 0.46149235840575364, "grad_norm": 2.3069164752960205, "learning_rate": 8.742793843218633e-05, "loss": 0.8247, "step": 9240 }, { "epoch": 0.4619918090100889, "grad_norm": 2.868238925933838, "learning_rate": 8.740191689036443e-05, "loss": 0.6486, "step": 9250 }, { "epoch": 0.4624912596144241, "grad_norm": 2.9097187519073486, "learning_rate": 8.73758723278344e-05, "loss": 0.9045, "step": 9260 }, { "epoch": 0.46299071021875937, "grad_norm": 1.270633578300476, "learning_rate": 8.734980476062657e-05, "loss": 0.6936, "step": 9270 }, { "epoch": 0.4634901608230946, "grad_norm": 1.2080847024917603, "learning_rate": 8.732371420478538e-05, "loss": 0.8571, "step": 9280 }, { "epoch": 0.46398961142742984, "grad_norm": 0.9066054224967957, "learning_rate": 8.729760067636944e-05, "loss": 0.7925, "step": 9290 }, { "epoch": 0.46448906203176504, "grad_norm": 2.1034419536590576, "learning_rate": 8.727146419145155e-05, "loss": 0.6313, "step": 9300 }, { "epoch": 0.4649885126361003, "grad_norm": 1.5577855110168457, "learning_rate": 8.724530476611857e-05, "loss": 0.9527, "step": 9310 }, { "epoch": 0.4654879632404355, "grad_norm": 3.563767910003662, "learning_rate": 8.721912241647151e-05, "loss": 1.0483, "step": 9320 }, { "epoch": 0.46598741384477077, "grad_norm": 1.8607417345046997, "learning_rate": 8.719291715862547e-05, "loss": 0.8991, "step": 9330 }, { "epoch": 0.466486864449106, "grad_norm": 1.8814846277236938, "learning_rate": 8.71666890087097e-05, "loss": 0.8022, "step": 9340 }, { "epoch": 0.46698631505344124, "grad_norm": 4.706637382507324, "learning_rate": 8.714043798286746e-05, "loss": 0.8227, "step": 9350 }, { "epoch": 0.46748576565777644, "grad_norm": 1.5149883031845093, "learning_rate": 8.711416409725619e-05, "loss": 0.7861, "step": 9360 }, { "epoch": 0.4679852162621117, "grad_norm": 2.6364152431488037, "learning_rate": 8.708786736804729e-05, "loss": 0.8212, "step": 9370 }, { "epoch": 0.4684846668664469, "grad_norm": 3.7528250217437744, "learning_rate": 8.706154781142632e-05, "loss": 0.8007, "step": 9380 }, { "epoch": 0.4689841174707821, "grad_norm": 2.3562207221984863, "learning_rate": 8.703520544359282e-05, "loss": 0.9271, "step": 9390 }, { "epoch": 0.4694835680751174, "grad_norm": 2.0192863941192627, "learning_rate": 8.700884028076042e-05, "loss": 0.8238, "step": 9400 }, { "epoch": 0.4699830186794526, "grad_norm": 1.6539078950881958, "learning_rate": 8.698245233915673e-05, "loss": 0.7086, "step": 9410 }, { "epoch": 0.47048246928378784, "grad_norm": 3.2260684967041016, "learning_rate": 8.695604163502342e-05, "loss": 0.7594, "step": 9420 }, { "epoch": 0.47098191988812305, "grad_norm": 4.1658854484558105, "learning_rate": 8.692960818461617e-05, "loss": 0.9258, "step": 9430 }, { "epoch": 0.4714813704924583, "grad_norm": 1.852576732635498, "learning_rate": 8.690315200420462e-05, "loss": 0.6446, "step": 9440 }, { "epoch": 0.4719808210967935, "grad_norm": 0.9907698035240173, "learning_rate": 8.687667311007245e-05, "loss": 1.2197, "step": 9450 }, { "epoch": 0.4724802717011288, "grad_norm": 1.4926533699035645, "learning_rate": 8.685017151851728e-05, "loss": 0.8126, "step": 9460 }, { "epoch": 0.472979722305464, "grad_norm": 3.9340710639953613, "learning_rate": 8.682364724585075e-05, "loss": 0.8304, "step": 9470 }, { "epoch": 0.47347917290979924, "grad_norm": 2.3523919582366943, "learning_rate": 8.679710030839838e-05, "loss": 0.8956, "step": 9480 }, { "epoch": 0.47397862351413445, "grad_norm": 1.8453624248504639, "learning_rate": 8.677053072249972e-05, "loss": 0.7989, "step": 9490 }, { "epoch": 0.4744780741184697, "grad_norm": 0.7890266180038452, "learning_rate": 8.674393850450823e-05, "loss": 0.7103, "step": 9500 }, { "epoch": 0.4749775247228049, "grad_norm": 2.487265110015869, "learning_rate": 8.671732367079129e-05, "loss": 1.0322, "step": 9510 }, { "epoch": 0.47547697532714017, "grad_norm": 1.0967894792556763, "learning_rate": 8.66906862377302e-05, "loss": 0.8271, "step": 9520 }, { "epoch": 0.4759764259314754, "grad_norm": 5.3334059715271, "learning_rate": 8.666402622172018e-05, "loss": 1.2287, "step": 9530 }, { "epoch": 0.4764758765358106, "grad_norm": 0.7524279952049255, "learning_rate": 8.663734363917037e-05, "loss": 0.9327, "step": 9540 }, { "epoch": 0.47697532714014584, "grad_norm": 5.157166004180908, "learning_rate": 8.661063850650375e-05, "loss": 0.8208, "step": 9550 }, { "epoch": 0.47747477774448105, "grad_norm": 1.666438341140747, "learning_rate": 8.658391084015723e-05, "loss": 1.0964, "step": 9560 }, { "epoch": 0.4779742283488163, "grad_norm": 2.5982296466827393, "learning_rate": 8.655716065658154e-05, "loss": 0.8009, "step": 9570 }, { "epoch": 0.4784736789531515, "grad_norm": 1.3922905921936035, "learning_rate": 8.653038797224132e-05, "loss": 0.864, "step": 9580 }, { "epoch": 0.4789731295574868, "grad_norm": 6.944724082946777, "learning_rate": 8.6503592803615e-05, "loss": 0.8825, "step": 9590 }, { "epoch": 0.479472580161822, "grad_norm": 1.7859829664230347, "learning_rate": 8.647677516719492e-05, "loss": 1.001, "step": 9600 }, { "epoch": 0.47997203076615724, "grad_norm": 1.1808656454086304, "learning_rate": 8.64499350794872e-05, "loss": 0.8243, "step": 9610 }, { "epoch": 0.48047148137049245, "grad_norm": 1.345732569694519, "learning_rate": 8.642307255701177e-05, "loss": 0.8518, "step": 9620 }, { "epoch": 0.4809709319748277, "grad_norm": 2.1847989559173584, "learning_rate": 8.639618761630242e-05, "loss": 0.9443, "step": 9630 }, { "epoch": 0.4814703825791629, "grad_norm": 3.106778621673584, "learning_rate": 8.63692802739067e-05, "loss": 0.9343, "step": 9640 }, { "epoch": 0.4819698331834982, "grad_norm": 1.8735466003417969, "learning_rate": 8.634235054638595e-05, "loss": 0.6618, "step": 9650 }, { "epoch": 0.4824692837878334, "grad_norm": 2.5529987812042236, "learning_rate": 8.631539845031529e-05, "loss": 0.834, "step": 9660 }, { "epoch": 0.48296873439216864, "grad_norm": 1.7117773294448853, "learning_rate": 8.628842400228361e-05, "loss": 0.7188, "step": 9670 }, { "epoch": 0.48346818499650385, "grad_norm": 5.574675559997559, "learning_rate": 8.626142721889358e-05, "loss": 0.9675, "step": 9680 }, { "epoch": 0.48396763560083905, "grad_norm": 1.724246621131897, "learning_rate": 8.623440811676158e-05, "loss": 0.7304, "step": 9690 }, { "epoch": 0.4844670862051743, "grad_norm": 2.3189258575439453, "learning_rate": 8.620736671251773e-05, "loss": 0.6906, "step": 9700 }, { "epoch": 0.4849665368095095, "grad_norm": 4.428511619567871, "learning_rate": 8.618030302280591e-05, "loss": 0.8414, "step": 9710 }, { "epoch": 0.4854659874138448, "grad_norm": 0.6134352684020996, "learning_rate": 8.61532170642837e-05, "loss": 0.8284, "step": 9720 }, { "epoch": 0.48596543801818, "grad_norm": 1.8473355770111084, "learning_rate": 8.612610885362236e-05, "loss": 0.8872, "step": 9730 }, { "epoch": 0.48646488862251525, "grad_norm": 1.4100711345672607, "learning_rate": 8.60989784075069e-05, "loss": 0.8218, "step": 9740 }, { "epoch": 0.48696433922685045, "grad_norm": 2.5205984115600586, "learning_rate": 8.607182574263595e-05, "loss": 0.8991, "step": 9750 }, { "epoch": 0.4874637898311857, "grad_norm": 2.3048384189605713, "learning_rate": 8.604465087572188e-05, "loss": 0.834, "step": 9760 }, { "epoch": 0.4879632404355209, "grad_norm": 2.423758029937744, "learning_rate": 8.601745382349068e-05, "loss": 0.8198, "step": 9770 }, { "epoch": 0.4884626910398562, "grad_norm": 4.580270767211914, "learning_rate": 8.599023460268202e-05, "loss": 1.1273, "step": 9780 }, { "epoch": 0.4889621416441914, "grad_norm": 1.962369441986084, "learning_rate": 8.596299323004919e-05, "loss": 0.8828, "step": 9790 }, { "epoch": 0.48946159224852664, "grad_norm": 3.3463711738586426, "learning_rate": 8.593572972235915e-05, "loss": 0.9256, "step": 9800 }, { "epoch": 0.48996104285286185, "grad_norm": 2.2721240520477295, "learning_rate": 8.590844409639248e-05, "loss": 0.911, "step": 9810 }, { "epoch": 0.4904604934571971, "grad_norm": 2.064649820327759, "learning_rate": 8.588113636894328e-05, "loss": 0.8313, "step": 9820 }, { "epoch": 0.4909599440615323, "grad_norm": 4.452096462249756, "learning_rate": 8.585380655681943e-05, "loss": 0.8163, "step": 9830 }, { "epoch": 0.4914593946658675, "grad_norm": 1.811700701713562, "learning_rate": 8.582645467684223e-05, "loss": 0.9164, "step": 9840 }, { "epoch": 0.4919588452702028, "grad_norm": 1.8374183177947998, "learning_rate": 8.579908074584666e-05, "loss": 0.7401, "step": 9850 }, { "epoch": 0.492458295874538, "grad_norm": 1.4280540943145752, "learning_rate": 8.577168478068127e-05, "loss": 0.901, "step": 9860 }, { "epoch": 0.49295774647887325, "grad_norm": 2.486199140548706, "learning_rate": 8.574426679820813e-05, "loss": 0.5956, "step": 9870 }, { "epoch": 0.49345719708320845, "grad_norm": 2.0157670974731445, "learning_rate": 8.571682681530289e-05, "loss": 0.9413, "step": 9880 }, { "epoch": 0.4939566476875437, "grad_norm": 1.218309998512268, "learning_rate": 8.56893648488547e-05, "loss": 1.0669, "step": 9890 }, { "epoch": 0.4944560982918789, "grad_norm": 1.4599615335464478, "learning_rate": 8.566188091576634e-05, "loss": 0.6705, "step": 9900 }, { "epoch": 0.4949555488962142, "grad_norm": 4.729304790496826, "learning_rate": 8.563437503295398e-05, "loss": 0.8737, "step": 9910 }, { "epoch": 0.4954549995005494, "grad_norm": 1.7913062572479248, "learning_rate": 8.560684721734742e-05, "loss": 0.9062, "step": 9920 }, { "epoch": 0.49595445010488465, "grad_norm": 2.2768373489379883, "learning_rate": 8.557929748588986e-05, "loss": 0.8404, "step": 9930 }, { "epoch": 0.49645390070921985, "grad_norm": 6.3551788330078125, "learning_rate": 8.555172585553805e-05, "loss": 0.7617, "step": 9940 }, { "epoch": 0.4969533513135551, "grad_norm": 2.6699812412261963, "learning_rate": 8.552413234326219e-05, "loss": 0.992, "step": 9950 }, { "epoch": 0.4974528019178903, "grad_norm": 6.235543727874756, "learning_rate": 8.549651696604599e-05, "loss": 1.0864, "step": 9960 }, { "epoch": 0.4979522525222255, "grad_norm": 2.092358350753784, "learning_rate": 8.546887974088656e-05, "loss": 0.9257, "step": 9970 }, { "epoch": 0.4984517031265608, "grad_norm": 0.8900352120399475, "learning_rate": 8.544122068479449e-05, "loss": 0.7231, "step": 9980 }, { "epoch": 0.498951153730896, "grad_norm": 2.0917277336120605, "learning_rate": 8.541353981479383e-05, "loss": 0.9558, "step": 9990 }, { "epoch": 0.49945060433523125, "grad_norm": 0.6328327059745789, "learning_rate": 8.538583714792198e-05, "loss": 0.8234, "step": 10000 }, { "epoch": 0.49995005493956646, "grad_norm": 1.4690412282943726, "learning_rate": 8.535811270122986e-05, "loss": 0.907, "step": 10010 }, { "epoch": 0.5004495055439017, "grad_norm": 1.581143856048584, "learning_rate": 8.533036649178169e-05, "loss": 0.8558, "step": 10020 }, { "epoch": 0.500948956148237, "grad_norm": 2.121934652328491, "learning_rate": 8.530259853665514e-05, "loss": 0.9822, "step": 10030 }, { "epoch": 0.5014484067525722, "grad_norm": 2.247655153274536, "learning_rate": 8.52748088529413e-05, "loss": 0.8619, "step": 10040 }, { "epoch": 0.5019478573569074, "grad_norm": 2.4512712955474854, "learning_rate": 8.524699745774455e-05, "loss": 1.1144, "step": 10050 }, { "epoch": 0.5024473079612426, "grad_norm": 1.4861620664596558, "learning_rate": 8.521916436818269e-05, "loss": 0.8321, "step": 10060 }, { "epoch": 0.5029467585655779, "grad_norm": 2.7260966300964355, "learning_rate": 8.519130960138686e-05, "loss": 0.9533, "step": 10070 }, { "epoch": 0.5034462091699131, "grad_norm": 1.08968985080719, "learning_rate": 8.516343317450156e-05, "loss": 0.5477, "step": 10080 }, { "epoch": 0.5039456597742483, "grad_norm": 1.4115921258926392, "learning_rate": 8.513553510468457e-05, "loss": 0.8258, "step": 10090 }, { "epoch": 0.5044451103785835, "grad_norm": 3.039212942123413, "learning_rate": 8.510761540910704e-05, "loss": 0.9886, "step": 10100 }, { "epoch": 0.5049445609829188, "grad_norm": 3.746342897415161, "learning_rate": 8.507967410495339e-05, "loss": 0.9196, "step": 10110 }, { "epoch": 0.505444011587254, "grad_norm": 1.0602586269378662, "learning_rate": 8.505171120942142e-05, "loss": 0.7648, "step": 10120 }, { "epoch": 0.5059434621915893, "grad_norm": 1.7583624124526978, "learning_rate": 8.502372673972211e-05, "loss": 0.9866, "step": 10130 }, { "epoch": 0.5064429127959245, "grad_norm": 3.013103723526001, "learning_rate": 8.49957207130798e-05, "loss": 0.683, "step": 10140 }, { "epoch": 0.5069423634002597, "grad_norm": 1.2070417404174805, "learning_rate": 8.496769314673207e-05, "loss": 0.8469, "step": 10150 }, { "epoch": 0.507441814004595, "grad_norm": 2.0063936710357666, "learning_rate": 8.493964405792973e-05, "loss": 0.8121, "step": 10160 }, { "epoch": 0.5079412646089302, "grad_norm": 3.4089725017547607, "learning_rate": 8.491157346393693e-05, "loss": 0.9276, "step": 10170 }, { "epoch": 0.5084407152132654, "grad_norm": 2.365583658218384, "learning_rate": 8.488348138203091e-05, "loss": 1.1725, "step": 10180 }, { "epoch": 0.5089401658176006, "grad_norm": 2.678743600845337, "learning_rate": 8.485536782950228e-05, "loss": 0.9273, "step": 10190 }, { "epoch": 0.5094396164219359, "grad_norm": 1.7188700437545776, "learning_rate": 8.482723282365477e-05, "loss": 0.7503, "step": 10200 }, { "epoch": 0.5099390670262711, "grad_norm": 2.3885445594787598, "learning_rate": 8.479907638180535e-05, "loss": 0.8827, "step": 10210 }, { "epoch": 0.5104385176306063, "grad_norm": 2.0695323944091797, "learning_rate": 8.477089852128421e-05, "loss": 0.9493, "step": 10220 }, { "epoch": 0.5109379682349415, "grad_norm": 3.528078317642212, "learning_rate": 8.474269925943465e-05, "loss": 0.8015, "step": 10230 }, { "epoch": 0.5114374188392768, "grad_norm": 2.784247636795044, "learning_rate": 8.471447861361321e-05, "loss": 0.8912, "step": 10240 }, { "epoch": 0.511936869443612, "grad_norm": 1.4341551065444946, "learning_rate": 8.468623660118958e-05, "loss": 0.843, "step": 10250 }, { "epoch": 0.5124363200479473, "grad_norm": 1.393707275390625, "learning_rate": 8.465797323954656e-05, "loss": 1.0695, "step": 10260 }, { "epoch": 0.5129357706522825, "grad_norm": 2.879314661026001, "learning_rate": 8.462968854608013e-05, "loss": 0.8203, "step": 10270 }, { "epoch": 0.5134352212566177, "grad_norm": 2.527120351791382, "learning_rate": 8.46013825381994e-05, "loss": 0.7242, "step": 10280 }, { "epoch": 0.513934671860953, "grad_norm": 2.0064504146575928, "learning_rate": 8.457305523332657e-05, "loss": 0.7982, "step": 10290 }, { "epoch": 0.5144341224652882, "grad_norm": 1.5294859409332275, "learning_rate": 8.4544706648897e-05, "loss": 0.7853, "step": 10300 }, { "epoch": 0.5149335730696234, "grad_norm": 2.493802309036255, "learning_rate": 8.451633680235906e-05, "loss": 0.9062, "step": 10310 }, { "epoch": 0.5154330236739586, "grad_norm": 1.1306747198104858, "learning_rate": 8.448794571117431e-05, "loss": 0.7402, "step": 10320 }, { "epoch": 0.5159324742782939, "grad_norm": 0.39416784048080444, "learning_rate": 8.445953339281731e-05, "loss": 0.7237, "step": 10330 }, { "epoch": 0.5164319248826291, "grad_norm": 2.656313896179199, "learning_rate": 8.443109986477573e-05, "loss": 0.8015, "step": 10340 }, { "epoch": 0.5169313754869643, "grad_norm": 0.6447727680206299, "learning_rate": 8.440264514455025e-05, "loss": 0.765, "step": 10350 }, { "epoch": 0.5174308260912995, "grad_norm": 1.8436886072158813, "learning_rate": 8.437416924965464e-05, "loss": 0.9542, "step": 10360 }, { "epoch": 0.5179302766956349, "grad_norm": 2.326608657836914, "learning_rate": 8.434567219761566e-05, "loss": 1.0256, "step": 10370 }, { "epoch": 0.5184297272999701, "grad_norm": 1.378662109375, "learning_rate": 8.431715400597315e-05, "loss": 0.7725, "step": 10380 }, { "epoch": 0.5189291779043053, "grad_norm": 3.0724847316741943, "learning_rate": 8.428861469227991e-05, "loss": 0.9924, "step": 10390 }, { "epoch": 0.5194286285086405, "grad_norm": 2.2953217029571533, "learning_rate": 8.426005427410176e-05, "loss": 0.905, "step": 10400 }, { "epoch": 0.5199280791129757, "grad_norm": 1.05632483959198, "learning_rate": 8.423147276901747e-05, "loss": 0.6062, "step": 10410 }, { "epoch": 0.520427529717311, "grad_norm": 1.9191445112228394, "learning_rate": 8.420287019461887e-05, "loss": 0.7846, "step": 10420 }, { "epoch": 0.5209269803216462, "grad_norm": 1.5224536657333374, "learning_rate": 8.41742465685107e-05, "loss": 0.9027, "step": 10430 }, { "epoch": 0.5214264309259814, "grad_norm": 1.7912894487380981, "learning_rate": 8.414560190831067e-05, "loss": 1.014, "step": 10440 }, { "epoch": 0.5219258815303166, "grad_norm": 3.137920379638672, "learning_rate": 8.411693623164942e-05, "loss": 0.9506, "step": 10450 }, { "epoch": 0.5224253321346519, "grad_norm": 2.5194101333618164, "learning_rate": 8.408824955617057e-05, "loss": 1.0063, "step": 10460 }, { "epoch": 0.5229247827389871, "grad_norm": 2.2313342094421387, "learning_rate": 8.405954189953062e-05, "loss": 0.7812, "step": 10470 }, { "epoch": 0.5234242333433223, "grad_norm": 2.128974199295044, "learning_rate": 8.403081327939902e-05, "loss": 0.863, "step": 10480 }, { "epoch": 0.5239236839476575, "grad_norm": 0.898611307144165, "learning_rate": 8.400206371345809e-05, "loss": 0.6259, "step": 10490 }, { "epoch": 0.5244231345519929, "grad_norm": 1.9370143413543701, "learning_rate": 8.397329321940304e-05, "loss": 1.0129, "step": 10500 }, { "epoch": 0.5249225851563281, "grad_norm": 0.7454211115837097, "learning_rate": 8.394450181494198e-05, "loss": 0.7934, "step": 10510 }, { "epoch": 0.5254220357606633, "grad_norm": 0.5250659584999084, "learning_rate": 8.391568951779593e-05, "loss": 0.8833, "step": 10520 }, { "epoch": 0.5259214863649985, "grad_norm": 4.715530872344971, "learning_rate": 8.388685634569869e-05, "loss": 0.8296, "step": 10530 }, { "epoch": 0.5264209369693338, "grad_norm": 1.491549015045166, "learning_rate": 8.385800231639693e-05, "loss": 0.7825, "step": 10540 }, { "epoch": 0.526920387573669, "grad_norm": 3.0522241592407227, "learning_rate": 8.382912744765021e-05, "loss": 0.8571, "step": 10550 }, { "epoch": 0.5274198381780042, "grad_norm": 1.1122174263000488, "learning_rate": 8.380023175723087e-05, "loss": 0.7613, "step": 10560 }, { "epoch": 0.5279192887823394, "grad_norm": 3.839743137359619, "learning_rate": 8.377131526292405e-05, "loss": 0.8311, "step": 10570 }, { "epoch": 0.5284187393866746, "grad_norm": 2.86128306388855, "learning_rate": 8.374237798252775e-05, "loss": 1.0072, "step": 10580 }, { "epoch": 0.5289181899910099, "grad_norm": 1.6249940395355225, "learning_rate": 8.371341993385271e-05, "loss": 0.9004, "step": 10590 }, { "epoch": 0.5294176405953451, "grad_norm": 2.512314796447754, "learning_rate": 8.36844411347225e-05, "loss": 0.8968, "step": 10600 }, { "epoch": 0.5299170911996803, "grad_norm": 1.5922598838806152, "learning_rate": 8.365544160297341e-05, "loss": 0.8379, "step": 10610 }, { "epoch": 0.5304165418040155, "grad_norm": 1.7710751295089722, "learning_rate": 8.362642135645454e-05, "loss": 0.9609, "step": 10620 }, { "epoch": 0.5309159924083509, "grad_norm": 2.3227810859680176, "learning_rate": 8.359738041302772e-05, "loss": 0.7204, "step": 10630 }, { "epoch": 0.5314154430126861, "grad_norm": 2.00604248046875, "learning_rate": 8.35683187905675e-05, "loss": 0.95, "step": 10640 }, { "epoch": 0.5319148936170213, "grad_norm": 1.0721955299377441, "learning_rate": 8.353923650696118e-05, "loss": 0.8605, "step": 10650 }, { "epoch": 0.5324143442213565, "grad_norm": 1.99587881565094, "learning_rate": 8.351013358010877e-05, "loss": 0.8029, "step": 10660 }, { "epoch": 0.5329137948256918, "grad_norm": 2.7038462162017822, "learning_rate": 8.348101002792301e-05, "loss": 1.0095, "step": 10670 }, { "epoch": 0.533413245430027, "grad_norm": 2.7722578048706055, "learning_rate": 8.345186586832929e-05, "loss": 1.1092, "step": 10680 }, { "epoch": 0.5339126960343622, "grad_norm": 0.7673277854919434, "learning_rate": 8.342270111926571e-05, "loss": 0.9073, "step": 10690 }, { "epoch": 0.5344121466386974, "grad_norm": 1.2908411026000977, "learning_rate": 8.339351579868304e-05, "loss": 0.6403, "step": 10700 }, { "epoch": 0.5349115972430326, "grad_norm": 1.8550677299499512, "learning_rate": 8.336430992454474e-05, "loss": 0.9918, "step": 10710 }, { "epoch": 0.5354110478473679, "grad_norm": 0.9618448615074158, "learning_rate": 8.333508351482682e-05, "loss": 0.7949, "step": 10720 }, { "epoch": 0.5359104984517031, "grad_norm": 1.6754440069198608, "learning_rate": 8.330583658751807e-05, "loss": 0.7728, "step": 10730 }, { "epoch": 0.5364099490560383, "grad_norm": 1.1474179029464722, "learning_rate": 8.327656916061982e-05, "loss": 0.9935, "step": 10740 }, { "epoch": 0.5369093996603735, "grad_norm": 1.2575993537902832, "learning_rate": 8.324728125214603e-05, "loss": 0.7431, "step": 10750 }, { "epoch": 0.5374088502647089, "grad_norm": 2.4379231929779053, "learning_rate": 8.321797288012326e-05, "loss": 0.8684, "step": 10760 }, { "epoch": 0.5379083008690441, "grad_norm": 1.0595794916152954, "learning_rate": 8.31886440625907e-05, "loss": 0.9839, "step": 10770 }, { "epoch": 0.5384077514733793, "grad_norm": 2.0171382427215576, "learning_rate": 8.31592948176001e-05, "loss": 0.9853, "step": 10780 }, { "epoch": 0.5389072020777145, "grad_norm": 1.2597289085388184, "learning_rate": 8.312992516321578e-05, "loss": 0.8726, "step": 10790 }, { "epoch": 0.5394066526820498, "grad_norm": 4.327160835266113, "learning_rate": 8.310053511751463e-05, "loss": 0.7747, "step": 10800 }, { "epoch": 0.539906103286385, "grad_norm": 4.0043230056762695, "learning_rate": 8.307112469858608e-05, "loss": 0.8551, "step": 10810 }, { "epoch": 0.5404055538907202, "grad_norm": 2.365731954574585, "learning_rate": 8.304169392453213e-05, "loss": 0.8783, "step": 10820 }, { "epoch": 0.5409050044950554, "grad_norm": 3.5071523189544678, "learning_rate": 8.301224281346726e-05, "loss": 0.8082, "step": 10830 }, { "epoch": 0.5414044550993907, "grad_norm": 3.745530128479004, "learning_rate": 8.29827713835185e-05, "loss": 0.7679, "step": 10840 }, { "epoch": 0.5419039057037259, "grad_norm": 2.147007942199707, "learning_rate": 8.29532796528254e-05, "loss": 1.0009, "step": 10850 }, { "epoch": 0.5424033563080611, "grad_norm": 2.499675750732422, "learning_rate": 8.292376763953995e-05, "loss": 0.9068, "step": 10860 }, { "epoch": 0.5429028069123963, "grad_norm": 3.647723913192749, "learning_rate": 8.28942353618267e-05, "loss": 1.0705, "step": 10870 }, { "epoch": 0.5434022575167315, "grad_norm": 3.1378934383392334, "learning_rate": 8.28646828378626e-05, "loss": 0.7857, "step": 10880 }, { "epoch": 0.5439017081210669, "grad_norm": 2.2153217792510986, "learning_rate": 8.283511008583708e-05, "loss": 0.8, "step": 10890 }, { "epoch": 0.5444011587254021, "grad_norm": 1.5370389223098755, "learning_rate": 8.280551712395208e-05, "loss": 0.9032, "step": 10900 }, { "epoch": 0.5449006093297373, "grad_norm": 2.0717661380767822, "learning_rate": 8.27759039704219e-05, "loss": 0.7332, "step": 10910 }, { "epoch": 0.5454000599340725, "grad_norm": 1.4211114645004272, "learning_rate": 8.274627064347331e-05, "loss": 0.8698, "step": 10920 }, { "epoch": 0.5458995105384078, "grad_norm": 2.067880153656006, "learning_rate": 8.271661716134549e-05, "loss": 0.9097, "step": 10930 }, { "epoch": 0.546398961142743, "grad_norm": 4.041163921356201, "learning_rate": 8.268694354229001e-05, "loss": 0.8441, "step": 10940 }, { "epoch": 0.5468984117470782, "grad_norm": 1.7370940446853638, "learning_rate": 8.265724980457086e-05, "loss": 0.7791, "step": 10950 }, { "epoch": 0.5473978623514134, "grad_norm": 0.741644561290741, "learning_rate": 8.262753596646439e-05, "loss": 1.0678, "step": 10960 }, { "epoch": 0.5478973129557487, "grad_norm": 6.259626388549805, "learning_rate": 8.259780204625932e-05, "loss": 0.7785, "step": 10970 }, { "epoch": 0.5483967635600839, "grad_norm": 1.2122634649276733, "learning_rate": 8.256804806225677e-05, "loss": 0.9801, "step": 10980 }, { "epoch": 0.5488962141644191, "grad_norm": 3.5091569423675537, "learning_rate": 8.253827403277015e-05, "loss": 1.0206, "step": 10990 }, { "epoch": 0.5493956647687543, "grad_norm": 5.1107563972473145, "learning_rate": 8.250847997612527e-05, "loss": 0.9987, "step": 11000 }, { "epoch": 0.5498951153730895, "grad_norm": 6.479084491729736, "learning_rate": 8.24786659106602e-05, "loss": 1.11, "step": 11010 }, { "epoch": 0.5503945659774249, "grad_norm": 6.204999923706055, "learning_rate": 8.244883185472538e-05, "loss": 1.1227, "step": 11020 }, { "epoch": 0.5508940165817601, "grad_norm": 1.817765474319458, "learning_rate": 8.241897782668355e-05, "loss": 0.7799, "step": 11030 }, { "epoch": 0.5513934671860953, "grad_norm": 1.324508786201477, "learning_rate": 8.23891038449097e-05, "loss": 0.8929, "step": 11040 }, { "epoch": 0.5518929177904305, "grad_norm": 5.498021125793457, "learning_rate": 8.235920992779114e-05, "loss": 0.7613, "step": 11050 }, { "epoch": 0.5523923683947658, "grad_norm": 2.9075751304626465, "learning_rate": 8.232929609372744e-05, "loss": 0.745, "step": 11060 }, { "epoch": 0.552891818999101, "grad_norm": 3.374605894088745, "learning_rate": 8.229936236113042e-05, "loss": 0.9081, "step": 11070 }, { "epoch": 0.5533912696034362, "grad_norm": 1.7104989290237427, "learning_rate": 8.226940874842417e-05, "loss": 0.6835, "step": 11080 }, { "epoch": 0.5538907202077714, "grad_norm": 0.7547670602798462, "learning_rate": 8.223943527404498e-05, "loss": 0.6781, "step": 11090 }, { "epoch": 0.5543901708121067, "grad_norm": 2.296278476715088, "learning_rate": 8.22094419564414e-05, "loss": 1.1, "step": 11100 }, { "epoch": 0.5548896214164419, "grad_norm": 4.66751766204834, "learning_rate": 8.217942881407416e-05, "loss": 0.9201, "step": 11110 }, { "epoch": 0.5553890720207771, "grad_norm": 1.8728293180465698, "learning_rate": 8.214939586541626e-05, "loss": 0.8055, "step": 11120 }, { "epoch": 0.5558885226251123, "grad_norm": 2.5496201515197754, "learning_rate": 8.21193431289528e-05, "loss": 0.8833, "step": 11130 }, { "epoch": 0.5563879732294476, "grad_norm": 2.07619309425354, "learning_rate": 8.20892706231811e-05, "loss": 0.8857, "step": 11140 }, { "epoch": 0.5568874238337829, "grad_norm": 0.936546266078949, "learning_rate": 8.205917836661067e-05, "loss": 1.0265, "step": 11150 }, { "epoch": 0.5573868744381181, "grad_norm": 1.0726035833358765, "learning_rate": 8.202906637776316e-05, "loss": 0.9022, "step": 11160 }, { "epoch": 0.5578863250424533, "grad_norm": 2.3275067806243896, "learning_rate": 8.199893467517231e-05, "loss": 0.7763, "step": 11170 }, { "epoch": 0.5583857756467885, "grad_norm": 2.8741378784179688, "learning_rate": 8.196878327738411e-05, "loss": 0.8514, "step": 11180 }, { "epoch": 0.5588852262511238, "grad_norm": 0.6805217266082764, "learning_rate": 8.193861220295657e-05, "loss": 0.7612, "step": 11190 }, { "epoch": 0.559384676855459, "grad_norm": 1.7229316234588623, "learning_rate": 8.190842147045985e-05, "loss": 0.8793, "step": 11200 }, { "epoch": 0.5598841274597942, "grad_norm": 1.988580584526062, "learning_rate": 8.187821109847621e-05, "loss": 0.944, "step": 11210 }, { "epoch": 0.5603835780641294, "grad_norm": 3.3690719604492188, "learning_rate": 8.184798110560002e-05, "loss": 0.9576, "step": 11220 }, { "epoch": 0.5608830286684647, "grad_norm": 2.5268752574920654, "learning_rate": 8.181773151043767e-05, "loss": 0.7382, "step": 11230 }, { "epoch": 0.5613824792727999, "grad_norm": 1.3180385828018188, "learning_rate": 8.178746233160766e-05, "loss": 0.8898, "step": 11240 }, { "epoch": 0.5618819298771351, "grad_norm": 1.845851182937622, "learning_rate": 8.175717358774052e-05, "loss": 0.8032, "step": 11250 }, { "epoch": 0.5623813804814703, "grad_norm": 2.3231937885284424, "learning_rate": 8.172686529747885e-05, "loss": 0.7337, "step": 11260 }, { "epoch": 0.5628808310858057, "grad_norm": 2.671926975250244, "learning_rate": 8.169653747947724e-05, "loss": 0.8322, "step": 11270 }, { "epoch": 0.5633802816901409, "grad_norm": 0.9263166785240173, "learning_rate": 8.166619015240236e-05, "loss": 1.0253, "step": 11280 }, { "epoch": 0.5638797322944761, "grad_norm": 3.5001063346862793, "learning_rate": 8.16358233349328e-05, "loss": 0.807, "step": 11290 }, { "epoch": 0.5643791828988113, "grad_norm": 3.631415843963623, "learning_rate": 8.160543704575924e-05, "loss": 1.1009, "step": 11300 }, { "epoch": 0.5648786335031465, "grad_norm": 0.8493178486824036, "learning_rate": 8.157503130358431e-05, "loss": 0.8958, "step": 11310 }, { "epoch": 0.5653780841074818, "grad_norm": 2.1403348445892334, "learning_rate": 8.154460612712254e-05, "loss": 0.7372, "step": 11320 }, { "epoch": 0.565877534711817, "grad_norm": 2.315157413482666, "learning_rate": 8.151416153510054e-05, "loss": 0.7946, "step": 11330 }, { "epoch": 0.5663769853161522, "grad_norm": 0.8276131749153137, "learning_rate": 8.14836975462568e-05, "loss": 0.7355, "step": 11340 }, { "epoch": 0.5668764359204874, "grad_norm": 4.0848822593688965, "learning_rate": 8.145321417934179e-05, "loss": 1.068, "step": 11350 }, { "epoch": 0.5673758865248227, "grad_norm": 2.2448577880859375, "learning_rate": 8.142271145311783e-05, "loss": 1.1738, "step": 11360 }, { "epoch": 0.5678753371291579, "grad_norm": 1.619431734085083, "learning_rate": 8.139218938635927e-05, "loss": 0.9228, "step": 11370 }, { "epoch": 0.5683747877334931, "grad_norm": 3.5642194747924805, "learning_rate": 8.136164799785224e-05, "loss": 0.7772, "step": 11380 }, { "epoch": 0.5688742383378284, "grad_norm": 3.4192471504211426, "learning_rate": 8.133108730639489e-05, "loss": 1.1743, "step": 11390 }, { "epoch": 0.5693736889421637, "grad_norm": 1.5282948017120361, "learning_rate": 8.130050733079712e-05, "loss": 0.8939, "step": 11400 }, { "epoch": 0.5698731395464989, "grad_norm": 1.6148427724838257, "learning_rate": 8.126990808988082e-05, "loss": 0.954, "step": 11410 }, { "epoch": 0.5703725901508341, "grad_norm": 1.722934603691101, "learning_rate": 8.123928960247964e-05, "loss": 0.7706, "step": 11420 }, { "epoch": 0.5708720407551693, "grad_norm": 2.6629602909088135, "learning_rate": 8.120865188743914e-05, "loss": 0.9743, "step": 11430 }, { "epoch": 0.5713714913595045, "grad_norm": 4.457296848297119, "learning_rate": 8.117799496361669e-05, "loss": 0.9488, "step": 11440 }, { "epoch": 0.5718709419638398, "grad_norm": 1.44455087184906, "learning_rate": 8.114731884988149e-05, "loss": 1.03, "step": 11450 }, { "epoch": 0.572370392568175, "grad_norm": 1.7806223630905151, "learning_rate": 8.111662356511453e-05, "loss": 0.722, "step": 11460 }, { "epoch": 0.5728698431725102, "grad_norm": 1.8148882389068604, "learning_rate": 8.108590912820864e-05, "loss": 0.9295, "step": 11470 }, { "epoch": 0.5733692937768454, "grad_norm": 1.7864558696746826, "learning_rate": 8.105517555806841e-05, "loss": 0.8256, "step": 11480 }, { "epoch": 0.5738687443811807, "grad_norm": 1.2219898700714111, "learning_rate": 8.102442287361018e-05, "loss": 1.0176, "step": 11490 }, { "epoch": 0.574368194985516, "grad_norm": 3.0943713188171387, "learning_rate": 8.099365109376213e-05, "loss": 0.9669, "step": 11500 }, { "epoch": 0.5748676455898512, "grad_norm": 1.2991565465927124, "learning_rate": 8.096286023746414e-05, "loss": 0.9712, "step": 11510 }, { "epoch": 0.5753670961941864, "grad_norm": 1.5561408996582031, "learning_rate": 8.093205032366782e-05, "loss": 0.7694, "step": 11520 }, { "epoch": 0.5758665467985217, "grad_norm": 1.454827070236206, "learning_rate": 8.090122137133653e-05, "loss": 0.7044, "step": 11530 }, { "epoch": 0.5763659974028569, "grad_norm": 1.141781210899353, "learning_rate": 8.087037339944536e-05, "loss": 0.8696, "step": 11540 }, { "epoch": 0.5768654480071921, "grad_norm": 1.3285212516784668, "learning_rate": 8.083950642698112e-05, "loss": 0.9028, "step": 11550 }, { "epoch": 0.5773648986115273, "grad_norm": 1.0808900594711304, "learning_rate": 8.080862047294225e-05, "loss": 0.7895, "step": 11560 }, { "epoch": 0.5778643492158626, "grad_norm": 2.268902540206909, "learning_rate": 8.077771555633893e-05, "loss": 0.8546, "step": 11570 }, { "epoch": 0.5783637998201978, "grad_norm": 2.3639559745788574, "learning_rate": 8.0746791696193e-05, "loss": 0.8286, "step": 11580 }, { "epoch": 0.578863250424533, "grad_norm": 2.1565093994140625, "learning_rate": 8.071584891153792e-05, "loss": 0.8108, "step": 11590 }, { "epoch": 0.5793627010288682, "grad_norm": 0.9906855225563049, "learning_rate": 8.06848872214189e-05, "loss": 0.6949, "step": 11600 }, { "epoch": 0.5798621516332034, "grad_norm": 3.8159871101379395, "learning_rate": 8.065390664489264e-05, "loss": 0.8888, "step": 11610 }, { "epoch": 0.5803616022375387, "grad_norm": 3.860344648361206, "learning_rate": 8.062290720102759e-05, "loss": 0.9017, "step": 11620 }, { "epoch": 0.580861052841874, "grad_norm": 2.809664249420166, "learning_rate": 8.059188890890375e-05, "loss": 0.9088, "step": 11630 }, { "epoch": 0.5813605034462092, "grad_norm": 3.262469530105591, "learning_rate": 8.056085178761275e-05, "loss": 0.9483, "step": 11640 }, { "epoch": 0.5818599540505444, "grad_norm": 4.122176647186279, "learning_rate": 8.052979585625778e-05, "loss": 1.1103, "step": 11650 }, { "epoch": 0.5823594046548797, "grad_norm": 1.2592228651046753, "learning_rate": 8.049872113395363e-05, "loss": 0.8896, "step": 11660 }, { "epoch": 0.5828588552592149, "grad_norm": 2.1690526008605957, "learning_rate": 8.046762763982665e-05, "loss": 0.9666, "step": 11670 }, { "epoch": 0.5833583058635501, "grad_norm": 1.719984531402588, "learning_rate": 8.043651539301475e-05, "loss": 0.8685, "step": 11680 }, { "epoch": 0.5838577564678853, "grad_norm": 3.1530261039733887, "learning_rate": 8.040538441266736e-05, "loss": 0.7708, "step": 11690 }, { "epoch": 0.5843572070722206, "grad_norm": 6.055736541748047, "learning_rate": 8.037423471794545e-05, "loss": 0.8857, "step": 11700 }, { "epoch": 0.5848566576765558, "grad_norm": 2.080730676651001, "learning_rate": 8.034306632802154e-05, "loss": 1.0014, "step": 11710 }, { "epoch": 0.585356108280891, "grad_norm": 2.089596748352051, "learning_rate": 8.031187926207962e-05, "loss": 0.7845, "step": 11720 }, { "epoch": 0.5858555588852262, "grad_norm": 1.791109561920166, "learning_rate": 8.028067353931517e-05, "loss": 0.8536, "step": 11730 }, { "epoch": 0.5863550094895614, "grad_norm": 4.204870700836182, "learning_rate": 8.024944917893519e-05, "loss": 0.8977, "step": 11740 }, { "epoch": 0.5868544600938967, "grad_norm": 0.7246403098106384, "learning_rate": 8.021820620015811e-05, "loss": 0.8527, "step": 11750 }, { "epoch": 0.587353910698232, "grad_norm": 0.8796305060386658, "learning_rate": 8.018694462221387e-05, "loss": 0.8079, "step": 11760 }, { "epoch": 0.5878533613025672, "grad_norm": 1.0883392095565796, "learning_rate": 8.01556644643438e-05, "loss": 0.5785, "step": 11770 }, { "epoch": 0.5883528119069024, "grad_norm": 2.5127203464508057, "learning_rate": 8.01243657458007e-05, "loss": 0.6381, "step": 11780 }, { "epoch": 0.5888522625112377, "grad_norm": 1.3080767393112183, "learning_rate": 8.009304848584879e-05, "loss": 0.7554, "step": 11790 }, { "epoch": 0.5893517131155729, "grad_norm": 3.24627947807312, "learning_rate": 8.00617127037637e-05, "loss": 0.9587, "step": 11800 }, { "epoch": 0.5898511637199081, "grad_norm": 1.6273128986358643, "learning_rate": 8.003035841883249e-05, "loss": 0.7672, "step": 11810 }, { "epoch": 0.5903506143242433, "grad_norm": 2.0696349143981934, "learning_rate": 7.999898565035352e-05, "loss": 0.7795, "step": 11820 }, { "epoch": 0.5908500649285786, "grad_norm": 2.585515260696411, "learning_rate": 7.996759441763661e-05, "loss": 0.7312, "step": 11830 }, { "epoch": 0.5913495155329138, "grad_norm": 5.360396862030029, "learning_rate": 7.993618474000293e-05, "loss": 0.8446, "step": 11840 }, { "epoch": 0.591848966137249, "grad_norm": 1.9149426221847534, "learning_rate": 7.9904756636785e-05, "loss": 0.8087, "step": 11850 }, { "epoch": 0.5923484167415842, "grad_norm": 1.9312024116516113, "learning_rate": 7.987331012732665e-05, "loss": 0.93, "step": 11860 }, { "epoch": 0.5928478673459195, "grad_norm": 1.9590035676956177, "learning_rate": 7.984184523098307e-05, "loss": 0.8477, "step": 11870 }, { "epoch": 0.5933473179502547, "grad_norm": 2.831815481185913, "learning_rate": 7.981036196712077e-05, "loss": 0.7381, "step": 11880 }, { "epoch": 0.59384676855459, "grad_norm": 1.3266551494598389, "learning_rate": 7.977886035511753e-05, "loss": 1.0491, "step": 11890 }, { "epoch": 0.5943462191589252, "grad_norm": 1.720620036125183, "learning_rate": 7.974734041436246e-05, "loss": 1.0184, "step": 11900 }, { "epoch": 0.5948456697632604, "grad_norm": 1.073946237564087, "learning_rate": 7.971580216425596e-05, "loss": 0.6153, "step": 11910 }, { "epoch": 0.5953451203675957, "grad_norm": 1.2611136436462402, "learning_rate": 7.968424562420966e-05, "loss": 0.8856, "step": 11920 }, { "epoch": 0.5958445709719309, "grad_norm": 1.2337510585784912, "learning_rate": 7.965267081364644e-05, "loss": 1.0286, "step": 11930 }, { "epoch": 0.5963440215762661, "grad_norm": 1.8494173288345337, "learning_rate": 7.96210777520005e-05, "loss": 0.8438, "step": 11940 }, { "epoch": 0.5968434721806013, "grad_norm": 1.8579139709472656, "learning_rate": 7.958946645871719e-05, "loss": 0.7892, "step": 11950 }, { "epoch": 0.5973429227849366, "grad_norm": 1.6124542951583862, "learning_rate": 7.955783695325315e-05, "loss": 1.0447, "step": 11960 }, { "epoch": 0.5978423733892718, "grad_norm": 1.4833028316497803, "learning_rate": 7.952618925507614e-05, "loss": 0.9831, "step": 11970 }, { "epoch": 0.598341823993607, "grad_norm": 1.6952725648880005, "learning_rate": 7.94945233836652e-05, "loss": 0.891, "step": 11980 }, { "epoch": 0.5988412745979422, "grad_norm": 2.0533688068389893, "learning_rate": 7.946283935851057e-05, "loss": 0.9172, "step": 11990 }, { "epoch": 0.5993407252022775, "grad_norm": 1.5406326055526733, "learning_rate": 7.943113719911355e-05, "loss": 0.9592, "step": 12000 }, { "epoch": 0.5998401758066128, "grad_norm": 1.6479825973510742, "learning_rate": 7.939941692498674e-05, "loss": 0.8748, "step": 12010 }, { "epoch": 0.600339626410948, "grad_norm": 1.3644030094146729, "learning_rate": 7.936767855565376e-05, "loss": 0.7153, "step": 12020 }, { "epoch": 0.6008390770152832, "grad_norm": 2.766441822052002, "learning_rate": 7.933592211064949e-05, "loss": 0.9166, "step": 12030 }, { "epoch": 0.6013385276196184, "grad_norm": 3.7731025218963623, "learning_rate": 7.930414760951983e-05, "loss": 0.8657, "step": 12040 }, { "epoch": 0.6018379782239537, "grad_norm": 2.5364325046539307, "learning_rate": 7.927235507182186e-05, "loss": 0.8775, "step": 12050 }, { "epoch": 0.6023374288282889, "grad_norm": 2.266455888748169, "learning_rate": 7.924054451712375e-05, "loss": 1.0283, "step": 12060 }, { "epoch": 0.6028368794326241, "grad_norm": 1.0789244174957275, "learning_rate": 7.920871596500472e-05, "loss": 0.7952, "step": 12070 }, { "epoch": 0.6033363300369593, "grad_norm": 3.885934352874756, "learning_rate": 7.917686943505514e-05, "loss": 0.9007, "step": 12080 }, { "epoch": 0.6038357806412946, "grad_norm": 1.2866015434265137, "learning_rate": 7.914500494687637e-05, "loss": 0.6869, "step": 12090 }, { "epoch": 0.6043352312456298, "grad_norm": 3.5734331607818604, "learning_rate": 7.911312252008086e-05, "loss": 1.001, "step": 12100 }, { "epoch": 0.604834681849965, "grad_norm": 2.1679513454437256, "learning_rate": 7.908122217429212e-05, "loss": 0.897, "step": 12110 }, { "epoch": 0.6053341324543002, "grad_norm": 2.468299627304077, "learning_rate": 7.904930392914464e-05, "loss": 0.7623, "step": 12120 }, { "epoch": 0.6058335830586355, "grad_norm": 2.6202375888824463, "learning_rate": 7.901736780428394e-05, "loss": 0.8243, "step": 12130 }, { "epoch": 0.6063330336629708, "grad_norm": 1.9101169109344482, "learning_rate": 7.898541381936662e-05, "loss": 0.7416, "step": 12140 }, { "epoch": 0.606832484267306, "grad_norm": 0.6252166628837585, "learning_rate": 7.895344199406017e-05, "loss": 0.8222, "step": 12150 }, { "epoch": 0.6073319348716412, "grad_norm": 1.0127849578857422, "learning_rate": 7.89214523480431e-05, "loss": 0.7764, "step": 12160 }, { "epoch": 0.6078313854759764, "grad_norm": 0.734090268611908, "learning_rate": 7.888944490100487e-05, "loss": 1.0371, "step": 12170 }, { "epoch": 0.6083308360803117, "grad_norm": 1.8829647302627563, "learning_rate": 7.885741967264595e-05, "loss": 0.9722, "step": 12180 }, { "epoch": 0.6088302866846469, "grad_norm": 2.159318685531616, "learning_rate": 7.882537668267773e-05, "loss": 0.6813, "step": 12190 }, { "epoch": 0.6093297372889821, "grad_norm": 2.9517064094543457, "learning_rate": 7.879331595082249e-05, "loss": 0.9704, "step": 12200 }, { "epoch": 0.6098291878933173, "grad_norm": 1.1465985774993896, "learning_rate": 7.876123749681349e-05, "loss": 0.9773, "step": 12210 }, { "epoch": 0.6103286384976526, "grad_norm": 0.9804210066795349, "learning_rate": 7.872914134039484e-05, "loss": 0.7583, "step": 12220 }, { "epoch": 0.6108280891019878, "grad_norm": 3.645322561264038, "learning_rate": 7.869702750132162e-05, "loss": 0.851, "step": 12230 }, { "epoch": 0.611327539706323, "grad_norm": 1.5942487716674805, "learning_rate": 7.866489599935971e-05, "loss": 0.794, "step": 12240 }, { "epoch": 0.6118269903106582, "grad_norm": 2.031911849975586, "learning_rate": 7.863274685428594e-05, "loss": 1.1151, "step": 12250 }, { "epoch": 0.6123264409149936, "grad_norm": 1.527058482170105, "learning_rate": 7.860058008588791e-05, "loss": 0.8269, "step": 12260 }, { "epoch": 0.6128258915193288, "grad_norm": 2.355426549911499, "learning_rate": 7.856839571396417e-05, "loss": 0.7066, "step": 12270 }, { "epoch": 0.613325342123664, "grad_norm": 3.1753835678100586, "learning_rate": 7.853619375832404e-05, "loss": 0.6978, "step": 12280 }, { "epoch": 0.6138247927279992, "grad_norm": 3.317664623260498, "learning_rate": 7.850397423878766e-05, "loss": 0.7919, "step": 12290 }, { "epoch": 0.6143242433323345, "grad_norm": 2.4618120193481445, "learning_rate": 7.8471737175186e-05, "loss": 0.9619, "step": 12300 }, { "epoch": 0.6148236939366697, "grad_norm": 1.3759201765060425, "learning_rate": 7.843948258736082e-05, "loss": 0.8436, "step": 12310 }, { "epoch": 0.6153231445410049, "grad_norm": 3.534787178039551, "learning_rate": 7.840721049516468e-05, "loss": 0.845, "step": 12320 }, { "epoch": 0.6158225951453401, "grad_norm": 1.6224181652069092, "learning_rate": 7.837492091846092e-05, "loss": 0.7444, "step": 12330 }, { "epoch": 0.6163220457496753, "grad_norm": 1.3301310539245605, "learning_rate": 7.83426138771236e-05, "loss": 0.8436, "step": 12340 }, { "epoch": 0.6168214963540106, "grad_norm": 1.7994141578674316, "learning_rate": 7.831028939103757e-05, "loss": 0.8018, "step": 12350 }, { "epoch": 0.6173209469583458, "grad_norm": 1.82992684841156, "learning_rate": 7.82779474800984e-05, "loss": 1.1498, "step": 12360 }, { "epoch": 0.617820397562681, "grad_norm": 0.945503294467926, "learning_rate": 7.824558816421237e-05, "loss": 0.8305, "step": 12370 }, { "epoch": 0.6183198481670162, "grad_norm": 0.9997421503067017, "learning_rate": 7.821321146329652e-05, "loss": 1.3226, "step": 12380 }, { "epoch": 0.6188192987713516, "grad_norm": 2.531658411026001, "learning_rate": 7.818081739727855e-05, "loss": 0.7832, "step": 12390 }, { "epoch": 0.6193187493756868, "grad_norm": 2.9111216068267822, "learning_rate": 7.814840598609686e-05, "loss": 0.8982, "step": 12400 }, { "epoch": 0.619818199980022, "grad_norm": 2.1175637245178223, "learning_rate": 7.811597724970051e-05, "loss": 0.9697, "step": 12410 }, { "epoch": 0.6203176505843572, "grad_norm": 1.2090173959732056, "learning_rate": 7.808353120804926e-05, "loss": 0.6774, "step": 12420 }, { "epoch": 0.6208171011886925, "grad_norm": 2.112946033477783, "learning_rate": 7.805106788111347e-05, "loss": 0.9978, "step": 12430 }, { "epoch": 0.6213165517930277, "grad_norm": 2.040161609649658, "learning_rate": 7.801858728887421e-05, "loss": 0.8364, "step": 12440 }, { "epoch": 0.6218160023973629, "grad_norm": 2.338141679763794, "learning_rate": 7.798608945132311e-05, "loss": 0.8842, "step": 12450 }, { "epoch": 0.6223154530016981, "grad_norm": 3.354522705078125, "learning_rate": 7.795357438846243e-05, "loss": 0.9676, "step": 12460 }, { "epoch": 0.6228149036060333, "grad_norm": 1.1040174961090088, "learning_rate": 7.792104212030506e-05, "loss": 0.7489, "step": 12470 }, { "epoch": 0.6233143542103686, "grad_norm": 1.954264760017395, "learning_rate": 7.788849266687446e-05, "loss": 1.0124, "step": 12480 }, { "epoch": 0.6238138048147038, "grad_norm": 2.1573355197906494, "learning_rate": 7.785592604820466e-05, "loss": 0.7793, "step": 12490 }, { "epoch": 0.624313255419039, "grad_norm": 2.3539342880249023, "learning_rate": 7.782334228434028e-05, "loss": 1.0291, "step": 12500 }, { "epoch": 0.6248127060233742, "grad_norm": 1.8481172323226929, "learning_rate": 7.779074139533647e-05, "loss": 1.0117, "step": 12510 }, { "epoch": 0.6253121566277096, "grad_norm": 2.7877354621887207, "learning_rate": 7.77581234012589e-05, "loss": 0.8698, "step": 12520 }, { "epoch": 0.6258116072320448, "grad_norm": 4.730374336242676, "learning_rate": 7.772548832218383e-05, "loss": 1.0457, "step": 12530 }, { "epoch": 0.62631105783638, "grad_norm": 1.8074944019317627, "learning_rate": 7.7692836178198e-05, "loss": 0.9615, "step": 12540 }, { "epoch": 0.6268105084407152, "grad_norm": 0.8964559435844421, "learning_rate": 7.766016698939864e-05, "loss": 0.6061, "step": 12550 }, { "epoch": 0.6273099590450505, "grad_norm": 5.012725830078125, "learning_rate": 7.76274807758935e-05, "loss": 0.9625, "step": 12560 }, { "epoch": 0.6278094096493857, "grad_norm": 1.6120495796203613, "learning_rate": 7.759477755780078e-05, "loss": 0.8735, "step": 12570 }, { "epoch": 0.6283088602537209, "grad_norm": 2.0828094482421875, "learning_rate": 7.756205735524917e-05, "loss": 0.6527, "step": 12580 }, { "epoch": 0.6288083108580561, "grad_norm": 1.591010332107544, "learning_rate": 7.75293201883778e-05, "loss": 0.8747, "step": 12590 }, { "epoch": 0.6293077614623914, "grad_norm": 2.5814242362976074, "learning_rate": 7.749656607733624e-05, "loss": 0.9139, "step": 12600 }, { "epoch": 0.6298072120667266, "grad_norm": 1.6371376514434814, "learning_rate": 7.746379504228452e-05, "loss": 0.8774, "step": 12610 }, { "epoch": 0.6303066626710618, "grad_norm": 1.6633555889129639, "learning_rate": 7.743100710339304e-05, "loss": 0.8397, "step": 12620 }, { "epoch": 0.630806113275397, "grad_norm": 4.060708999633789, "learning_rate": 7.739820228084261e-05, "loss": 0.8574, "step": 12630 }, { "epoch": 0.6313055638797322, "grad_norm": 2.887648820877075, "learning_rate": 7.736538059482447e-05, "loss": 0.9689, "step": 12640 }, { "epoch": 0.6318050144840676, "grad_norm": 0.8675547242164612, "learning_rate": 7.733254206554024e-05, "loss": 0.7439, "step": 12650 }, { "epoch": 0.6323044650884028, "grad_norm": 1.7451297044754028, "learning_rate": 7.729968671320185e-05, "loss": 0.7079, "step": 12660 }, { "epoch": 0.632803915692738, "grad_norm": 2.2067673206329346, "learning_rate": 7.726681455803161e-05, "loss": 0.7516, "step": 12670 }, { "epoch": 0.6333033662970732, "grad_norm": 2.3583004474639893, "learning_rate": 7.723392562026221e-05, "loss": 0.8686, "step": 12680 }, { "epoch": 0.6338028169014085, "grad_norm": 3.745316505432129, "learning_rate": 7.720101992013662e-05, "loss": 0.843, "step": 12690 }, { "epoch": 0.6343022675057437, "grad_norm": 0.6729642152786255, "learning_rate": 7.716809747790817e-05, "loss": 0.8411, "step": 12700 }, { "epoch": 0.6348017181100789, "grad_norm": 1.1551730632781982, "learning_rate": 7.713515831384044e-05, "loss": 0.7959, "step": 12710 }, { "epoch": 0.6353011687144141, "grad_norm": 1.2299566268920898, "learning_rate": 7.710220244820736e-05, "loss": 0.9145, "step": 12720 }, { "epoch": 0.6358006193187494, "grad_norm": 1.5636696815490723, "learning_rate": 7.706922990129309e-05, "loss": 0.9513, "step": 12730 }, { "epoch": 0.6363000699230846, "grad_norm": 3.4296226501464844, "learning_rate": 7.703624069339211e-05, "loss": 0.9121, "step": 12740 }, { "epoch": 0.6367995205274198, "grad_norm": 3.456554889678955, "learning_rate": 7.700323484480911e-05, "loss": 0.858, "step": 12750 }, { "epoch": 0.637298971131755, "grad_norm": 2.2380423545837402, "learning_rate": 7.697021237585906e-05, "loss": 0.8616, "step": 12760 }, { "epoch": 0.6377984217360902, "grad_norm": 2.1804730892181396, "learning_rate": 7.693717330686709e-05, "loss": 0.9048, "step": 12770 }, { "epoch": 0.6382978723404256, "grad_norm": 2.1640286445617676, "learning_rate": 7.690411765816864e-05, "loss": 0.9936, "step": 12780 }, { "epoch": 0.6387973229447608, "grad_norm": 1.3663392066955566, "learning_rate": 7.687104545010928e-05, "loss": 0.9104, "step": 12790 }, { "epoch": 0.639296773549096, "grad_norm": 1.6175613403320312, "learning_rate": 7.683795670304484e-05, "loss": 0.7661, "step": 12800 }, { "epoch": 0.6397962241534312, "grad_norm": 2.310173273086548, "learning_rate": 7.680485143734125e-05, "loss": 0.9265, "step": 12810 }, { "epoch": 0.6402956747577665, "grad_norm": 1.0012131929397583, "learning_rate": 7.677172967337467e-05, "loss": 0.8629, "step": 12820 }, { "epoch": 0.6407951253621017, "grad_norm": 1.1713634729385376, "learning_rate": 7.673859143153139e-05, "loss": 0.7713, "step": 12830 }, { "epoch": 0.6412945759664369, "grad_norm": 2.488898277282715, "learning_rate": 7.670543673220786e-05, "loss": 0.825, "step": 12840 }, { "epoch": 0.6417940265707721, "grad_norm": 1.26347017288208, "learning_rate": 7.667226559581062e-05, "loss": 0.7914, "step": 12850 }, { "epoch": 0.6422934771751074, "grad_norm": 2.276587724685669, "learning_rate": 7.663907804275636e-05, "loss": 1.0954, "step": 12860 }, { "epoch": 0.6427929277794426, "grad_norm": 0.8161036968231201, "learning_rate": 7.660587409347187e-05, "loss": 0.7575, "step": 12870 }, { "epoch": 0.6432923783837778, "grad_norm": 0.5742631554603577, "learning_rate": 7.657265376839405e-05, "loss": 0.6672, "step": 12880 }, { "epoch": 0.643791828988113, "grad_norm": 2.646259307861328, "learning_rate": 7.653941708796981e-05, "loss": 0.9497, "step": 12890 }, { "epoch": 0.6442912795924483, "grad_norm": 0.9056533575057983, "learning_rate": 7.650616407265623e-05, "loss": 0.8258, "step": 12900 }, { "epoch": 0.6447907301967836, "grad_norm": 3.490792751312256, "learning_rate": 7.647289474292032e-05, "loss": 0.7076, "step": 12910 }, { "epoch": 0.6452901808011188, "grad_norm": 1.7089931964874268, "learning_rate": 7.643960911923926e-05, "loss": 0.7547, "step": 12920 }, { "epoch": 0.645789631405454, "grad_norm": 1.3501700162887573, "learning_rate": 7.640630722210018e-05, "loss": 0.8654, "step": 12930 }, { "epoch": 0.6462890820097892, "grad_norm": 1.3468042612075806, "learning_rate": 7.637298907200024e-05, "loss": 0.7137, "step": 12940 }, { "epoch": 0.6467885326141245, "grad_norm": 1.3366823196411133, "learning_rate": 7.633965468944662e-05, "loss": 0.7872, "step": 12950 }, { "epoch": 0.6472879832184597, "grad_norm": 2.4136240482330322, "learning_rate": 7.630630409495645e-05, "loss": 0.8831, "step": 12960 }, { "epoch": 0.6477874338227949, "grad_norm": 3.5967211723327637, "learning_rate": 7.627293730905689e-05, "loss": 0.8968, "step": 12970 }, { "epoch": 0.6482868844271301, "grad_norm": 4.377511501312256, "learning_rate": 7.623955435228505e-05, "loss": 0.8787, "step": 12980 }, { "epoch": 0.6487863350314654, "grad_norm": 1.7462594509124756, "learning_rate": 7.620615524518797e-05, "loss": 0.9401, "step": 12990 }, { "epoch": 0.6492857856358006, "grad_norm": 1.7474712133407593, "learning_rate": 7.617274000832266e-05, "loss": 0.9544, "step": 13000 }, { "epoch": 0.6497852362401358, "grad_norm": 0.8920682668685913, "learning_rate": 7.613930866225604e-05, "loss": 0.898, "step": 13010 }, { "epoch": 0.650284686844471, "grad_norm": 1.4357318878173828, "learning_rate": 7.610586122756496e-05, "loss": 0.6582, "step": 13020 }, { "epoch": 0.6507841374488064, "grad_norm": 2.6412606239318848, "learning_rate": 7.607239772483614e-05, "loss": 0.909, "step": 13030 }, { "epoch": 0.6512835880531416, "grad_norm": 0.8142654895782471, "learning_rate": 7.603891817466621e-05, "loss": 0.8265, "step": 13040 }, { "epoch": 0.6517830386574768, "grad_norm": 2.4540538787841797, "learning_rate": 7.600542259766173e-05, "loss": 0.6881, "step": 13050 }, { "epoch": 0.652282489261812, "grad_norm": 1.987032175064087, "learning_rate": 7.597191101443901e-05, "loss": 0.7851, "step": 13060 }, { "epoch": 0.6527819398661472, "grad_norm": 2.8409721851348877, "learning_rate": 7.593838344562432e-05, "loss": 0.626, "step": 13070 }, { "epoch": 0.6532813904704825, "grad_norm": 4.194586753845215, "learning_rate": 7.590483991185369e-05, "loss": 0.7684, "step": 13080 }, { "epoch": 0.6537808410748177, "grad_norm": 2.416386127471924, "learning_rate": 7.587128043377304e-05, "loss": 0.9145, "step": 13090 }, { "epoch": 0.6542802916791529, "grad_norm": 4.792058944702148, "learning_rate": 7.583770503203807e-05, "loss": 1.0233, "step": 13100 }, { "epoch": 0.6547797422834881, "grad_norm": 1.2306267023086548, "learning_rate": 7.580411372731426e-05, "loss": 0.7519, "step": 13110 }, { "epoch": 0.6552791928878234, "grad_norm": 1.764892816543579, "learning_rate": 7.577050654027693e-05, "loss": 0.9306, "step": 13120 }, { "epoch": 0.6557786434921586, "grad_norm": 1.615273356437683, "learning_rate": 7.573688349161115e-05, "loss": 0.8097, "step": 13130 }, { "epoch": 0.6562780940964938, "grad_norm": 1.7956855297088623, "learning_rate": 7.570324460201174e-05, "loss": 0.8041, "step": 13140 }, { "epoch": 0.656777544700829, "grad_norm": 2.333482503890991, "learning_rate": 7.566958989218329e-05, "loss": 1.0211, "step": 13150 }, { "epoch": 0.6572769953051644, "grad_norm": 1.5992728471755981, "learning_rate": 7.563591938284011e-05, "loss": 0.8495, "step": 13160 }, { "epoch": 0.6577764459094996, "grad_norm": 2.574862480163574, "learning_rate": 7.560223309470627e-05, "loss": 0.773, "step": 13170 }, { "epoch": 0.6582758965138348, "grad_norm": 2.4205756187438965, "learning_rate": 7.556853104851548e-05, "loss": 0.945, "step": 13180 }, { "epoch": 0.65877534711817, "grad_norm": 5.572146892547607, "learning_rate": 7.553481326501124e-05, "loss": 1.1562, "step": 13190 }, { "epoch": 0.6592747977225052, "grad_norm": 1.2964991331100464, "learning_rate": 7.550107976494665e-05, "loss": 0.7917, "step": 13200 }, { "epoch": 0.6597742483268405, "grad_norm": 1.5800459384918213, "learning_rate": 7.546733056908456e-05, "loss": 0.8779, "step": 13210 }, { "epoch": 0.6602736989311757, "grad_norm": 2.146944999694824, "learning_rate": 7.543356569819744e-05, "loss": 0.9196, "step": 13220 }, { "epoch": 0.6607731495355109, "grad_norm": 2.837876081466675, "learning_rate": 7.539978517306738e-05, "loss": 0.7187, "step": 13230 }, { "epoch": 0.6612726001398461, "grad_norm": 3.4195945262908936, "learning_rate": 7.536598901448617e-05, "loss": 0.7647, "step": 13240 }, { "epoch": 0.6617720507441814, "grad_norm": 2.067004680633545, "learning_rate": 7.533217724325519e-05, "loss": 1.1108, "step": 13250 }, { "epoch": 0.6622715013485166, "grad_norm": 1.316638469696045, "learning_rate": 7.529834988018542e-05, "loss": 0.7777, "step": 13260 }, { "epoch": 0.6627709519528519, "grad_norm": 2.552154064178467, "learning_rate": 7.526450694609745e-05, "loss": 1.0747, "step": 13270 }, { "epoch": 0.663270402557187, "grad_norm": 2.6598401069641113, "learning_rate": 7.523064846182145e-05, "loss": 0.7556, "step": 13280 }, { "epoch": 0.6637698531615224, "grad_norm": 0.7628982663154602, "learning_rate": 7.519677444819714e-05, "loss": 0.8351, "step": 13290 }, { "epoch": 0.6642693037658576, "grad_norm": 1.2733036279678345, "learning_rate": 7.516288492607388e-05, "loss": 0.9932, "step": 13300 }, { "epoch": 0.6647687543701928, "grad_norm": 1.3308387994766235, "learning_rate": 7.512897991631045e-05, "loss": 0.7084, "step": 13310 }, { "epoch": 0.665268204974528, "grad_norm": 2.271099328994751, "learning_rate": 7.509505943977526e-05, "loss": 0.9023, "step": 13320 }, { "epoch": 0.6657676555788633, "grad_norm": 3.208682060241699, "learning_rate": 7.50611235173462e-05, "loss": 0.8111, "step": 13330 }, { "epoch": 0.6662671061831985, "grad_norm": 3.4175901412963867, "learning_rate": 7.502717216991069e-05, "loss": 0.9072, "step": 13340 }, { "epoch": 0.6667665567875337, "grad_norm": 2.390958786010742, "learning_rate": 7.499320541836559e-05, "loss": 0.7867, "step": 13350 }, { "epoch": 0.6672660073918689, "grad_norm": 6.080948352813721, "learning_rate": 7.495922328361733e-05, "loss": 1.1479, "step": 13360 }, { "epoch": 0.6677654579962041, "grad_norm": 1.4552528858184814, "learning_rate": 7.492522578658171e-05, "loss": 0.9252, "step": 13370 }, { "epoch": 0.6682649086005394, "grad_norm": 1.0324900150299072, "learning_rate": 7.489121294818407e-05, "loss": 0.6551, "step": 13380 }, { "epoch": 0.6687643592048746, "grad_norm": 2.4894304275512695, "learning_rate": 7.485718478935913e-05, "loss": 1.0623, "step": 13390 }, { "epoch": 0.6692638098092099, "grad_norm": 2.6332945823669434, "learning_rate": 7.482314133105108e-05, "loss": 0.8745, "step": 13400 }, { "epoch": 0.6697632604135451, "grad_norm": 3.0975561141967773, "learning_rate": 7.478908259421351e-05, "loss": 1.0553, "step": 13410 }, { "epoch": 0.6702627110178804, "grad_norm": 1.7106579542160034, "learning_rate": 7.475500859980942e-05, "loss": 1.0071, "step": 13420 }, { "epoch": 0.6707621616222156, "grad_norm": 1.1007097959518433, "learning_rate": 7.47209193688112e-05, "loss": 0.9686, "step": 13430 }, { "epoch": 0.6712616122265508, "grad_norm": 4.405452728271484, "learning_rate": 7.46868149222006e-05, "loss": 0.8554, "step": 13440 }, { "epoch": 0.671761062830886, "grad_norm": 1.4012975692749023, "learning_rate": 7.465269528096875e-05, "loss": 0.9445, "step": 13450 }, { "epoch": 0.6722605134352213, "grad_norm": 1.4058866500854492, "learning_rate": 7.461856046611614e-05, "loss": 0.7213, "step": 13460 }, { "epoch": 0.6727599640395565, "grad_norm": 2.2778289318084717, "learning_rate": 7.45844104986526e-05, "loss": 0.8677, "step": 13470 }, { "epoch": 0.6732594146438917, "grad_norm": 1.989430546760559, "learning_rate": 7.455024539959727e-05, "loss": 0.8598, "step": 13480 }, { "epoch": 0.6737588652482269, "grad_norm": 0.7515740394592285, "learning_rate": 7.451606518997862e-05, "loss": 0.8142, "step": 13490 }, { "epoch": 0.6742583158525621, "grad_norm": 1.0352113246917725, "learning_rate": 7.44818698908344e-05, "loss": 0.8191, "step": 13500 }, { "epoch": 0.6747577664568974, "grad_norm": 1.4111157655715942, "learning_rate": 7.444765952321164e-05, "loss": 0.8052, "step": 13510 }, { "epoch": 0.6752572170612327, "grad_norm": 0.8759949803352356, "learning_rate": 7.441343410816671e-05, "loss": 0.7079, "step": 13520 }, { "epoch": 0.6757566676655679, "grad_norm": 1.5545209646224976, "learning_rate": 7.437919366676517e-05, "loss": 0.8178, "step": 13530 }, { "epoch": 0.6762561182699031, "grad_norm": 6.742189884185791, "learning_rate": 7.434493822008187e-05, "loss": 0.9947, "step": 13540 }, { "epoch": 0.6767555688742384, "grad_norm": 3.9772582054138184, "learning_rate": 7.431066778920086e-05, "loss": 1.0145, "step": 13550 }, { "epoch": 0.6772550194785736, "grad_norm": 3.103926420211792, "learning_rate": 7.427638239521543e-05, "loss": 1.2054, "step": 13560 }, { "epoch": 0.6777544700829088, "grad_norm": 1.726408839225769, "learning_rate": 7.424208205922812e-05, "loss": 0.7034, "step": 13570 }, { "epoch": 0.678253920687244, "grad_norm": 2.6054139137268066, "learning_rate": 7.420776680235058e-05, "loss": 0.8105, "step": 13580 }, { "epoch": 0.6787533712915793, "grad_norm": 4.8610920906066895, "learning_rate": 7.417343664570372e-05, "loss": 0.7722, "step": 13590 }, { "epoch": 0.6792528218959145, "grad_norm": 2.0720362663269043, "learning_rate": 7.413909161041759e-05, "loss": 0.9086, "step": 13600 }, { "epoch": 0.6797522725002497, "grad_norm": 1.0414842367172241, "learning_rate": 7.410473171763141e-05, "loss": 0.6482, "step": 13610 }, { "epoch": 0.6802517231045849, "grad_norm": 1.2426433563232422, "learning_rate": 7.407035698849352e-05, "loss": 0.7256, "step": 13620 }, { "epoch": 0.6807511737089202, "grad_norm": 3.9983971118927, "learning_rate": 7.403596744416141e-05, "loss": 1.1534, "step": 13630 }, { "epoch": 0.6812506243132554, "grad_norm": 2.105839490890503, "learning_rate": 7.40015631058017e-05, "loss": 0.9809, "step": 13640 }, { "epoch": 0.6817500749175907, "grad_norm": 1.7513221502304077, "learning_rate": 7.39671439945901e-05, "loss": 0.8554, "step": 13650 }, { "epoch": 0.6822495255219259, "grad_norm": 4.3615522384643555, "learning_rate": 7.393271013171142e-05, "loss": 0.976, "step": 13660 }, { "epoch": 0.6827489761262611, "grad_norm": 2.0290863513946533, "learning_rate": 7.389826153835951e-05, "loss": 0.8127, "step": 13670 }, { "epoch": 0.6832484267305964, "grad_norm": 0.8345702886581421, "learning_rate": 7.386379823573736e-05, "loss": 1.0663, "step": 13680 }, { "epoch": 0.6837478773349316, "grad_norm": 1.057742953300476, "learning_rate": 7.382932024505695e-05, "loss": 0.7797, "step": 13690 }, { "epoch": 0.6842473279392668, "grad_norm": 3.281606674194336, "learning_rate": 7.379482758753936e-05, "loss": 0.7851, "step": 13700 }, { "epoch": 0.684746778543602, "grad_norm": 5.573545932769775, "learning_rate": 7.37603202844146e-05, "loss": 0.6852, "step": 13710 }, { "epoch": 0.6852462291479373, "grad_norm": 2.6311960220336914, "learning_rate": 7.372579835692182e-05, "loss": 0.8513, "step": 13720 }, { "epoch": 0.6857456797522725, "grad_norm": 1.099637508392334, "learning_rate": 7.369126182630907e-05, "loss": 0.7877, "step": 13730 }, { "epoch": 0.6862451303566077, "grad_norm": 4.3573503494262695, "learning_rate": 7.365671071383345e-05, "loss": 0.8938, "step": 13740 }, { "epoch": 0.6867445809609429, "grad_norm": 2.802907705307007, "learning_rate": 7.362214504076097e-05, "loss": 1.0123, "step": 13750 }, { "epoch": 0.6872440315652782, "grad_norm": 2.6232540607452393, "learning_rate": 7.35875648283667e-05, "loss": 0.7469, "step": 13760 }, { "epoch": 0.6877434821696135, "grad_norm": 2.878917932510376, "learning_rate": 7.355297009793456e-05, "loss": 0.7885, "step": 13770 }, { "epoch": 0.6882429327739487, "grad_norm": 2.5070621967315674, "learning_rate": 7.351836087075748e-05, "loss": 0.9247, "step": 13780 }, { "epoch": 0.6887423833782839, "grad_norm": 5.181169033050537, "learning_rate": 7.348373716813723e-05, "loss": 0.8374, "step": 13790 }, { "epoch": 0.6892418339826191, "grad_norm": 1.5600733757019043, "learning_rate": 7.34490990113846e-05, "loss": 0.7839, "step": 13800 }, { "epoch": 0.6897412845869544, "grad_norm": 1.6517608165740967, "learning_rate": 7.341444642181917e-05, "loss": 0.7345, "step": 13810 }, { "epoch": 0.6902407351912896, "grad_norm": 1.5919768810272217, "learning_rate": 7.337977942076948e-05, "loss": 0.6954, "step": 13820 }, { "epoch": 0.6907401857956248, "grad_norm": 1.4466248750686646, "learning_rate": 7.33450980295729e-05, "loss": 0.9392, "step": 13830 }, { "epoch": 0.69123963639996, "grad_norm": 0.9786671996116638, "learning_rate": 7.331040226957566e-05, "loss": 0.6875, "step": 13840 }, { "epoch": 0.6917390870042953, "grad_norm": 4.586532115936279, "learning_rate": 7.327569216213283e-05, "loss": 1.2082, "step": 13850 }, { "epoch": 0.6922385376086305, "grad_norm": 2.9234001636505127, "learning_rate": 7.324096772860837e-05, "loss": 0.9455, "step": 13860 }, { "epoch": 0.6927379882129657, "grad_norm": 5.324400424957275, "learning_rate": 7.320622899037496e-05, "loss": 0.8514, "step": 13870 }, { "epoch": 0.6932374388173009, "grad_norm": 1.910937786102295, "learning_rate": 7.317147596881416e-05, "loss": 1.0465, "step": 13880 }, { "epoch": 0.6937368894216362, "grad_norm": 1.2394311428070068, "learning_rate": 7.313670868531628e-05, "loss": 0.8079, "step": 13890 }, { "epoch": 0.6942363400259715, "grad_norm": 1.649618148803711, "learning_rate": 7.310192716128043e-05, "loss": 1.0114, "step": 13900 }, { "epoch": 0.6947357906303067, "grad_norm": 0.8796854019165039, "learning_rate": 7.306713141811448e-05, "loss": 0.7572, "step": 13910 }, { "epoch": 0.6952352412346419, "grad_norm": 1.5423787832260132, "learning_rate": 7.303232147723504e-05, "loss": 0.8204, "step": 13920 }, { "epoch": 0.6957346918389771, "grad_norm": 3.8138532638549805, "learning_rate": 7.299749736006748e-05, "loss": 1.0489, "step": 13930 }, { "epoch": 0.6962341424433124, "grad_norm": 1.8880746364593506, "learning_rate": 7.29626590880459e-05, "loss": 0.8099, "step": 13940 }, { "epoch": 0.6967335930476476, "grad_norm": 1.2062681913375854, "learning_rate": 7.292780668261306e-05, "loss": 0.8952, "step": 13950 }, { "epoch": 0.6972330436519828, "grad_norm": 1.6811144351959229, "learning_rate": 7.289294016522048e-05, "loss": 0.9711, "step": 13960 }, { "epoch": 0.697732494256318, "grad_norm": 3.8066329956054688, "learning_rate": 7.285805955732833e-05, "loss": 0.8488, "step": 13970 }, { "epoch": 0.6982319448606533, "grad_norm": 3.49137544631958, "learning_rate": 7.282316488040546e-05, "loss": 0.7784, "step": 13980 }, { "epoch": 0.6987313954649885, "grad_norm": 2.736736536026001, "learning_rate": 7.278825615592942e-05, "loss": 0.8092, "step": 13990 }, { "epoch": 0.6992308460693237, "grad_norm": 1.3519524335861206, "learning_rate": 7.27533334053863e-05, "loss": 0.6568, "step": 14000 }, { "epoch": 0.6997302966736589, "grad_norm": 2.622758388519287, "learning_rate": 7.271839665027098e-05, "loss": 1.0338, "step": 14010 }, { "epoch": 0.7002297472779943, "grad_norm": 1.6801555156707764, "learning_rate": 7.268344591208679e-05, "loss": 0.7619, "step": 14020 }, { "epoch": 0.7007291978823295, "grad_norm": 1.671966552734375, "learning_rate": 7.264848121234581e-05, "loss": 0.9252, "step": 14030 }, { "epoch": 0.7012286484866647, "grad_norm": 3.0895462036132812, "learning_rate": 7.261350257256861e-05, "loss": 0.9392, "step": 14040 }, { "epoch": 0.7017280990909999, "grad_norm": 1.9470264911651611, "learning_rate": 7.257851001428442e-05, "loss": 0.863, "step": 14050 }, { "epoch": 0.7022275496953352, "grad_norm": 2.663722276687622, "learning_rate": 7.254350355903095e-05, "loss": 0.7673, "step": 14060 }, { "epoch": 0.7027270002996704, "grad_norm": 3.543322801589966, "learning_rate": 7.250848322835458e-05, "loss": 0.8099, "step": 14070 }, { "epoch": 0.7032264509040056, "grad_norm": 5.451095104217529, "learning_rate": 7.24734490438101e-05, "loss": 0.8569, "step": 14080 }, { "epoch": 0.7037259015083408, "grad_norm": 1.6805773973464966, "learning_rate": 7.243840102696092e-05, "loss": 0.8407, "step": 14090 }, { "epoch": 0.704225352112676, "grad_norm": 1.407873511314392, "learning_rate": 7.240333919937893e-05, "loss": 1.1547, "step": 14100 }, { "epoch": 0.7047248027170113, "grad_norm": 4.562381267547607, "learning_rate": 7.236826358264452e-05, "loss": 1.0369, "step": 14110 }, { "epoch": 0.7052242533213465, "grad_norm": 2.872616767883301, "learning_rate": 7.233317419834657e-05, "loss": 1.013, "step": 14120 }, { "epoch": 0.7057237039256817, "grad_norm": 1.679598331451416, "learning_rate": 7.229807106808244e-05, "loss": 0.8949, "step": 14130 }, { "epoch": 0.7062231545300169, "grad_norm": 2.2586183547973633, "learning_rate": 7.226295421345793e-05, "loss": 0.8691, "step": 14140 }, { "epoch": 0.7067226051343523, "grad_norm": 3.6093177795410156, "learning_rate": 7.222782365608733e-05, "loss": 1.0768, "step": 14150 }, { "epoch": 0.7072220557386875, "grad_norm": 0.9443897008895874, "learning_rate": 7.219267941759333e-05, "loss": 0.7503, "step": 14160 }, { "epoch": 0.7077215063430227, "grad_norm": 1.0663303136825562, "learning_rate": 7.215752151960702e-05, "loss": 0.7296, "step": 14170 }, { "epoch": 0.7082209569473579, "grad_norm": 3.196075677871704, "learning_rate": 7.212234998376796e-05, "loss": 0.8507, "step": 14180 }, { "epoch": 0.7087204075516932, "grad_norm": 1.3957006931304932, "learning_rate": 7.208716483172404e-05, "loss": 0.9945, "step": 14190 }, { "epoch": 0.7092198581560284, "grad_norm": 2.797950029373169, "learning_rate": 7.205196608513159e-05, "loss": 0.8203, "step": 14200 }, { "epoch": 0.7097193087603636, "grad_norm": 2.4570584297180176, "learning_rate": 7.201675376565525e-05, "loss": 0.9767, "step": 14210 }, { "epoch": 0.7102187593646988, "grad_norm": 3.108165979385376, "learning_rate": 7.198152789496804e-05, "loss": 0.8216, "step": 14220 }, { "epoch": 0.710718209969034, "grad_norm": 0.8162358999252319, "learning_rate": 7.194628849475135e-05, "loss": 0.8584, "step": 14230 }, { "epoch": 0.7112176605733693, "grad_norm": 1.895009994506836, "learning_rate": 7.191103558669486e-05, "loss": 0.9255, "step": 14240 }, { "epoch": 0.7117171111777045, "grad_norm": 3.324361562728882, "learning_rate": 7.187576919249653e-05, "loss": 0.7713, "step": 14250 }, { "epoch": 0.7122165617820397, "grad_norm": 2.6848435401916504, "learning_rate": 7.184048933386274e-05, "loss": 0.8191, "step": 14260 }, { "epoch": 0.7127160123863749, "grad_norm": 1.1187618970870972, "learning_rate": 7.180519603250801e-05, "loss": 0.9038, "step": 14270 }, { "epoch": 0.7132154629907103, "grad_norm": 2.602867364883423, "learning_rate": 7.176988931015523e-05, "loss": 0.876, "step": 14280 }, { "epoch": 0.7137149135950455, "grad_norm": 2.6675899028778076, "learning_rate": 7.173456918853555e-05, "loss": 0.8261, "step": 14290 }, { "epoch": 0.7142143641993807, "grad_norm": 1.920407772064209, "learning_rate": 7.169923568938833e-05, "loss": 0.8312, "step": 14300 }, { "epoch": 0.7147138148037159, "grad_norm": 1.513654112815857, "learning_rate": 7.166388883446113e-05, "loss": 0.6906, "step": 14310 }, { "epoch": 0.7152132654080512, "grad_norm": 2.566261053085327, "learning_rate": 7.162852864550985e-05, "loss": 0.661, "step": 14320 }, { "epoch": 0.7157127160123864, "grad_norm": 2.6750776767730713, "learning_rate": 7.159315514429847e-05, "loss": 0.879, "step": 14330 }, { "epoch": 0.7162121666167216, "grad_norm": 1.8424862623214722, "learning_rate": 7.155776835259926e-05, "loss": 1.0846, "step": 14340 }, { "epoch": 0.7167116172210568, "grad_norm": 1.061425805091858, "learning_rate": 7.15223682921926e-05, "loss": 0.7079, "step": 14350 }, { "epoch": 0.7172110678253921, "grad_norm": 4.195835590362549, "learning_rate": 7.148695498486706e-05, "loss": 1.1204, "step": 14360 }, { "epoch": 0.7177105184297273, "grad_norm": 0.6323617100715637, "learning_rate": 7.145152845241937e-05, "loss": 0.8321, "step": 14370 }, { "epoch": 0.7182099690340625, "grad_norm": 4.118073463439941, "learning_rate": 7.141608871665443e-05, "loss": 0.833, "step": 14380 }, { "epoch": 0.7187094196383977, "grad_norm": 5.030306339263916, "learning_rate": 7.13806357993852e-05, "loss": 0.8106, "step": 14390 }, { "epoch": 0.719208870242733, "grad_norm": 2.2950358390808105, "learning_rate": 7.13451697224328e-05, "loss": 0.7925, "step": 14400 }, { "epoch": 0.7197083208470683, "grad_norm": 2.321904182434082, "learning_rate": 7.130969050762644e-05, "loss": 0.8813, "step": 14410 }, { "epoch": 0.7202077714514035, "grad_norm": 3.0077672004699707, "learning_rate": 7.12741981768034e-05, "loss": 0.7522, "step": 14420 }, { "epoch": 0.7207072220557387, "grad_norm": 2.579281806945801, "learning_rate": 7.123869275180907e-05, "loss": 0.9041, "step": 14430 }, { "epoch": 0.7212066726600739, "grad_norm": 0.8570026159286499, "learning_rate": 7.120317425449683e-05, "loss": 0.822, "step": 14440 }, { "epoch": 0.7217061232644092, "grad_norm": 0.9074950218200684, "learning_rate": 7.116764270672822e-05, "loss": 0.9113, "step": 14450 }, { "epoch": 0.7222055738687444, "grad_norm": 2.054647207260132, "learning_rate": 7.113209813037269e-05, "loss": 0.9178, "step": 14460 }, { "epoch": 0.7227050244730796, "grad_norm": 1.069751262664795, "learning_rate": 7.10965405473078e-05, "loss": 0.824, "step": 14470 }, { "epoch": 0.7232044750774148, "grad_norm": 1.2552647590637207, "learning_rate": 7.106096997941905e-05, "loss": 0.7028, "step": 14480 }, { "epoch": 0.7237039256817501, "grad_norm": 2.4348955154418945, "learning_rate": 7.102538644859997e-05, "loss": 0.8449, "step": 14490 }, { "epoch": 0.7242033762860853, "grad_norm": 2.4546279907226562, "learning_rate": 7.098978997675207e-05, "loss": 0.9386, "step": 14500 }, { "epoch": 0.7247028268904205, "grad_norm": 2.0445778369903564, "learning_rate": 7.095418058578481e-05, "loss": 0.9844, "step": 14510 }, { "epoch": 0.7252022774947557, "grad_norm": 0.9729900360107422, "learning_rate": 7.09185582976156e-05, "loss": 0.8822, "step": 14520 }, { "epoch": 0.725701728099091, "grad_norm": 1.3782198429107666, "learning_rate": 7.08829231341698e-05, "loss": 0.8396, "step": 14530 }, { "epoch": 0.7262011787034263, "grad_norm": 1.925838828086853, "learning_rate": 7.084727511738068e-05, "loss": 0.6616, "step": 14540 }, { "epoch": 0.7267006293077615, "grad_norm": 0.7856277823448181, "learning_rate": 7.081161426918947e-05, "loss": 0.7673, "step": 14550 }, { "epoch": 0.7272000799120967, "grad_norm": 2.251613140106201, "learning_rate": 7.07759406115452e-05, "loss": 0.8624, "step": 14560 }, { "epoch": 0.7276995305164319, "grad_norm": 1.078507900238037, "learning_rate": 7.074025416640489e-05, "loss": 0.7539, "step": 14570 }, { "epoch": 0.7281989811207672, "grad_norm": 5.626710891723633, "learning_rate": 7.070455495573334e-05, "loss": 0.9461, "step": 14580 }, { "epoch": 0.7286984317251024, "grad_norm": 1.1566303968429565, "learning_rate": 7.066884300150329e-05, "loss": 0.9538, "step": 14590 }, { "epoch": 0.7291978823294376, "grad_norm": 2.0159220695495605, "learning_rate": 7.063311832569526e-05, "loss": 0.6276, "step": 14600 }, { "epoch": 0.7296973329337728, "grad_norm": 1.379220962524414, "learning_rate": 7.059738095029765e-05, "loss": 0.8525, "step": 14610 }, { "epoch": 0.7301967835381081, "grad_norm": 1.6599482297897339, "learning_rate": 7.056163089730661e-05, "loss": 0.8347, "step": 14620 }, { "epoch": 0.7306962341424433, "grad_norm": 1.2135945558547974, "learning_rate": 7.052586818872616e-05, "loss": 0.7454, "step": 14630 }, { "epoch": 0.7311956847467785, "grad_norm": 1.0972834825515747, "learning_rate": 7.049009284656808e-05, "loss": 0.7955, "step": 14640 }, { "epoch": 0.7316951353511137, "grad_norm": 0.9523053765296936, "learning_rate": 7.045430489285193e-05, "loss": 0.9727, "step": 14650 }, { "epoch": 0.732194585955449, "grad_norm": 1.0745872259140015, "learning_rate": 7.041850434960502e-05, "loss": 0.8024, "step": 14660 }, { "epoch": 0.7326940365597843, "grad_norm": 1.3045142889022827, "learning_rate": 7.038269123886245e-05, "loss": 0.7976, "step": 14670 }, { "epoch": 0.7331934871641195, "grad_norm": 1.8772603273391724, "learning_rate": 7.034686558266699e-05, "loss": 1.0487, "step": 14680 }, { "epoch": 0.7336929377684547, "grad_norm": 1.138713002204895, "learning_rate": 7.031102740306918e-05, "loss": 0.7276, "step": 14690 }, { "epoch": 0.7341923883727899, "grad_norm": 0.8011229038238525, "learning_rate": 7.027517672212726e-05, "loss": 0.8344, "step": 14700 }, { "epoch": 0.7346918389771252, "grad_norm": 1.3305041790008545, "learning_rate": 7.023931356190714e-05, "loss": 0.8671, "step": 14710 }, { "epoch": 0.7351912895814604, "grad_norm": 4.798947334289551, "learning_rate": 7.020343794448247e-05, "loss": 0.7968, "step": 14720 }, { "epoch": 0.7356907401857956, "grad_norm": 2.0859580039978027, "learning_rate": 7.016754989193448e-05, "loss": 0.8018, "step": 14730 }, { "epoch": 0.7361901907901308, "grad_norm": 1.570669412612915, "learning_rate": 7.013164942635216e-05, "loss": 0.6164, "step": 14740 }, { "epoch": 0.7366896413944661, "grad_norm": 2.7400262355804443, "learning_rate": 7.0095736569832e-05, "loss": 0.9568, "step": 14750 }, { "epoch": 0.7371890919988013, "grad_norm": 1.430620789527893, "learning_rate": 7.005981134447827e-05, "loss": 0.8949, "step": 14760 }, { "epoch": 0.7376885426031365, "grad_norm": 0.8303244113922119, "learning_rate": 7.002387377240276e-05, "loss": 0.7792, "step": 14770 }, { "epoch": 0.7381879932074717, "grad_norm": 1.8937126398086548, "learning_rate": 6.998792387572488e-05, "loss": 0.7584, "step": 14780 }, { "epoch": 0.7386874438118071, "grad_norm": 4.442506790161133, "learning_rate": 6.99519616765716e-05, "loss": 0.8228, "step": 14790 }, { "epoch": 0.7391868944161423, "grad_norm": 2.819027900695801, "learning_rate": 6.991598719707754e-05, "loss": 0.9048, "step": 14800 }, { "epoch": 0.7396863450204775, "grad_norm": 1.1911256313323975, "learning_rate": 6.988000045938477e-05, "loss": 0.8307, "step": 14810 }, { "epoch": 0.7401857956248127, "grad_norm": 4.393520355224609, "learning_rate": 6.984400148564303e-05, "loss": 0.957, "step": 14820 }, { "epoch": 0.7406852462291479, "grad_norm": 2.6378350257873535, "learning_rate": 6.980799029800945e-05, "loss": 0.8641, "step": 14830 }, { "epoch": 0.7411846968334832, "grad_norm": 2.7240123748779297, "learning_rate": 6.97719669186488e-05, "loss": 0.8224, "step": 14840 }, { "epoch": 0.7416841474378184, "grad_norm": 1.3037054538726807, "learning_rate": 6.973593136973327e-05, "loss": 0.8244, "step": 14850 }, { "epoch": 0.7421835980421536, "grad_norm": 1.5047438144683838, "learning_rate": 6.969988367344262e-05, "loss": 0.6301, "step": 14860 }, { "epoch": 0.7426830486464888, "grad_norm": 5.839412689208984, "learning_rate": 6.9663823851964e-05, "loss": 0.841, "step": 14870 }, { "epoch": 0.7431824992508241, "grad_norm": 2.1603047847747803, "learning_rate": 6.962775192749209e-05, "loss": 0.9352, "step": 14880 }, { "epoch": 0.7436819498551593, "grad_norm": 3.4553701877593994, "learning_rate": 6.959166792222898e-05, "loss": 0.8384, "step": 14890 }, { "epoch": 0.7441814004594945, "grad_norm": 1.3646953105926514, "learning_rate": 6.955557185838422e-05, "loss": 0.7684, "step": 14900 }, { "epoch": 0.7446808510638298, "grad_norm": 4.13494873046875, "learning_rate": 6.951946375817474e-05, "loss": 0.8509, "step": 14910 }, { "epoch": 0.7451803016681651, "grad_norm": 1.2285308837890625, "learning_rate": 6.948334364382496e-05, "loss": 0.8276, "step": 14920 }, { "epoch": 0.7456797522725003, "grad_norm": 2.744372606277466, "learning_rate": 6.944721153756661e-05, "loss": 0.772, "step": 14930 }, { "epoch": 0.7461792028768355, "grad_norm": 2.268390417098999, "learning_rate": 6.941106746163884e-05, "loss": 1.0615, "step": 14940 }, { "epoch": 0.7466786534811707, "grad_norm": 1.9456779956817627, "learning_rate": 6.937491143828818e-05, "loss": 0.7258, "step": 14950 }, { "epoch": 0.7471781040855059, "grad_norm": 2.2939226627349854, "learning_rate": 6.933874348976848e-05, "loss": 1.0961, "step": 14960 }, { "epoch": 0.7476775546898412, "grad_norm": 1.3495417833328247, "learning_rate": 6.930256363834095e-05, "loss": 0.8552, "step": 14970 }, { "epoch": 0.7481770052941764, "grad_norm": 2.128901481628418, "learning_rate": 6.926637190627413e-05, "loss": 0.6513, "step": 14980 }, { "epoch": 0.7486764558985116, "grad_norm": 3.783480405807495, "learning_rate": 6.923016831584385e-05, "loss": 1.0062, "step": 14990 }, { "epoch": 0.7491759065028468, "grad_norm": 0.9812283515930176, "learning_rate": 6.919395288933326e-05, "loss": 0.7931, "step": 15000 }, { "epoch": 0.7496753571071821, "grad_norm": 1.0028212070465088, "learning_rate": 6.915772564903278e-05, "loss": 0.8394, "step": 15010 }, { "epoch": 0.7501748077115173, "grad_norm": 2.930734157562256, "learning_rate": 6.912148661724013e-05, "loss": 0.8039, "step": 15020 }, { "epoch": 0.7506742583158525, "grad_norm": 1.2220996618270874, "learning_rate": 6.908523581626026e-05, "loss": 0.801, "step": 15030 }, { "epoch": 0.7511737089201878, "grad_norm": 4.743584156036377, "learning_rate": 6.904897326840537e-05, "loss": 0.8226, "step": 15040 }, { "epoch": 0.7516731595245231, "grad_norm": 1.7444546222686768, "learning_rate": 6.90126989959949e-05, "loss": 0.893, "step": 15050 }, { "epoch": 0.7521726101288583, "grad_norm": 0.928571879863739, "learning_rate": 6.897641302135546e-05, "loss": 0.891, "step": 15060 }, { "epoch": 0.7526720607331935, "grad_norm": 1.5573511123657227, "learning_rate": 6.894011536682097e-05, "loss": 0.721, "step": 15070 }, { "epoch": 0.7531715113375287, "grad_norm": 1.379233717918396, "learning_rate": 6.89038060547324e-05, "loss": 0.8222, "step": 15080 }, { "epoch": 0.753670961941864, "grad_norm": 1.5439879894256592, "learning_rate": 6.8867485107438e-05, "loss": 0.9543, "step": 15090 }, { "epoch": 0.7541704125461992, "grad_norm": 4.321331977844238, "learning_rate": 6.883115254729315e-05, "loss": 0.9038, "step": 15100 }, { "epoch": 0.7546698631505344, "grad_norm": 1.3366317749023438, "learning_rate": 6.879480839666037e-05, "loss": 0.7858, "step": 15110 }, { "epoch": 0.7551693137548696, "grad_norm": 1.95951247215271, "learning_rate": 6.87584526779093e-05, "loss": 0.7887, "step": 15120 }, { "epoch": 0.7556687643592048, "grad_norm": 2.0874879360198975, "learning_rate": 6.872208541341673e-05, "loss": 1.0866, "step": 15130 }, { "epoch": 0.7561682149635401, "grad_norm": 4.349565505981445, "learning_rate": 6.868570662556656e-05, "loss": 0.9345, "step": 15140 }, { "epoch": 0.7566676655678753, "grad_norm": 0.8630907535552979, "learning_rate": 6.864931633674974e-05, "loss": 0.9329, "step": 15150 }, { "epoch": 0.7571671161722106, "grad_norm": 2.172128677368164, "learning_rate": 6.861291456936435e-05, "loss": 0.6593, "step": 15160 }, { "epoch": 0.7576665667765458, "grad_norm": 2.1994476318359375, "learning_rate": 6.857650134581548e-05, "loss": 0.8166, "step": 15170 }, { "epoch": 0.7581660173808811, "grad_norm": 2.3780038356781006, "learning_rate": 6.854007668851532e-05, "loss": 0.8221, "step": 15180 }, { "epoch": 0.7586654679852163, "grad_norm": 1.0928269624710083, "learning_rate": 6.850364061988309e-05, "loss": 0.6895, "step": 15190 }, { "epoch": 0.7591649185895515, "grad_norm": 2.102372407913208, "learning_rate": 6.846719316234503e-05, "loss": 0.6285, "step": 15200 }, { "epoch": 0.7596643691938867, "grad_norm": 1.18507719039917, "learning_rate": 6.843073433833433e-05, "loss": 0.7219, "step": 15210 }, { "epoch": 0.760163819798222, "grad_norm": 1.166977882385254, "learning_rate": 6.839426417029128e-05, "loss": 0.6944, "step": 15220 }, { "epoch": 0.7606632704025572, "grad_norm": 1.865195631980896, "learning_rate": 6.835778268066309e-05, "loss": 0.756, "step": 15230 }, { "epoch": 0.7611627210068924, "grad_norm": 2.251166820526123, "learning_rate": 6.832128989190395e-05, "loss": 0.8524, "step": 15240 }, { "epoch": 0.7616621716112276, "grad_norm": 1.7802624702453613, "learning_rate": 6.828478582647499e-05, "loss": 0.8078, "step": 15250 }, { "epoch": 0.7621616222155628, "grad_norm": 1.9270282983779907, "learning_rate": 6.82482705068443e-05, "loss": 0.83, "step": 15260 }, { "epoch": 0.7626610728198981, "grad_norm": 2.9224936962127686, "learning_rate": 6.82117439554869e-05, "loss": 0.7695, "step": 15270 }, { "epoch": 0.7631605234242334, "grad_norm": 3.9404971599578857, "learning_rate": 6.817520619488471e-05, "loss": 0.7579, "step": 15280 }, { "epoch": 0.7636599740285686, "grad_norm": 3.2672882080078125, "learning_rate": 6.813865724752655e-05, "loss": 0.827, "step": 15290 }, { "epoch": 0.7641594246329038, "grad_norm": 1.6898232698440552, "learning_rate": 6.810209713590814e-05, "loss": 0.8288, "step": 15300 }, { "epoch": 0.7646588752372391, "grad_norm": 2.8118972778320312, "learning_rate": 6.806552588253204e-05, "loss": 0.9278, "step": 15310 }, { "epoch": 0.7651583258415743, "grad_norm": 0.7698076963424683, "learning_rate": 6.802894350990771e-05, "loss": 0.8713, "step": 15320 }, { "epoch": 0.7656577764459095, "grad_norm": 1.5094542503356934, "learning_rate": 6.799235004055142e-05, "loss": 0.9178, "step": 15330 }, { "epoch": 0.7661572270502447, "grad_norm": 1.088309407234192, "learning_rate": 6.79557454969863e-05, "loss": 1.0671, "step": 15340 }, { "epoch": 0.76665667765458, "grad_norm": 1.8290929794311523, "learning_rate": 6.791912990174225e-05, "loss": 0.92, "step": 15350 }, { "epoch": 0.7671561282589152, "grad_norm": 2.360210657119751, "learning_rate": 6.788250327735603e-05, "loss": 0.7379, "step": 15360 }, { "epoch": 0.7676555788632504, "grad_norm": 1.6595243215560913, "learning_rate": 6.784586564637114e-05, "loss": 0.6747, "step": 15370 }, { "epoch": 0.7681550294675856, "grad_norm": 8.949684143066406, "learning_rate": 6.780921703133786e-05, "loss": 0.8656, "step": 15380 }, { "epoch": 0.7686544800719209, "grad_norm": 3.4417693614959717, "learning_rate": 6.77725574548133e-05, "loss": 0.9244, "step": 15390 }, { "epoch": 0.7691539306762561, "grad_norm": 3.909416437149048, "learning_rate": 6.773588693936119e-05, "loss": 0.873, "step": 15400 }, { "epoch": 0.7696533812805914, "grad_norm": 1.2584865093231201, "learning_rate": 6.769920550755213e-05, "loss": 0.9571, "step": 15410 }, { "epoch": 0.7701528318849266, "grad_norm": 1.364255666732788, "learning_rate": 6.766251318196332e-05, "loss": 0.7366, "step": 15420 }, { "epoch": 0.7706522824892618, "grad_norm": 1.2045292854309082, "learning_rate": 6.762580998517875e-05, "loss": 0.7667, "step": 15430 }, { "epoch": 0.7711517330935971, "grad_norm": 0.5805503129959106, "learning_rate": 6.758909593978904e-05, "loss": 0.9362, "step": 15440 }, { "epoch": 0.7716511836979323, "grad_norm": 2.512843370437622, "learning_rate": 6.755237106839154e-05, "loss": 0.7338, "step": 15450 }, { "epoch": 0.7721506343022675, "grad_norm": 4.625179767608643, "learning_rate": 6.751563539359023e-05, "loss": 0.9157, "step": 15460 }, { "epoch": 0.7726500849066027, "grad_norm": 4.126613140106201, "learning_rate": 6.747888893799577e-05, "loss": 0.9855, "step": 15470 }, { "epoch": 0.773149535510938, "grad_norm": 2.273141622543335, "learning_rate": 6.744213172422541e-05, "loss": 0.9608, "step": 15480 }, { "epoch": 0.7736489861152732, "grad_norm": 2.4304282665252686, "learning_rate": 6.740536377490306e-05, "loss": 0.7095, "step": 15490 }, { "epoch": 0.7741484367196084, "grad_norm": 2.9688961505889893, "learning_rate": 6.736858511265921e-05, "loss": 0.8406, "step": 15500 }, { "epoch": 0.7746478873239436, "grad_norm": 2.441664934158325, "learning_rate": 6.733179576013098e-05, "loss": 1.0852, "step": 15510 }, { "epoch": 0.775147337928279, "grad_norm": 3.616377830505371, "learning_rate": 6.729499573996201e-05, "loss": 0.795, "step": 15520 }, { "epoch": 0.7756467885326142, "grad_norm": 1.4523836374282837, "learning_rate": 6.72581850748026e-05, "loss": 0.8452, "step": 15530 }, { "epoch": 0.7761462391369494, "grad_norm": 1.5587873458862305, "learning_rate": 6.722136378730948e-05, "loss": 0.9114, "step": 15540 }, { "epoch": 0.7766456897412846, "grad_norm": 0.8045804500579834, "learning_rate": 6.718453190014602e-05, "loss": 0.6468, "step": 15550 }, { "epoch": 0.7771451403456198, "grad_norm": 0.9503934979438782, "learning_rate": 6.71476894359821e-05, "loss": 0.6298, "step": 15560 }, { "epoch": 0.7776445909499551, "grad_norm": 1.3000394105911255, "learning_rate": 6.711083641749404e-05, "loss": 0.8494, "step": 15570 }, { "epoch": 0.7781440415542903, "grad_norm": 1.8700768947601318, "learning_rate": 6.707397286736472e-05, "loss": 0.9172, "step": 15580 }, { "epoch": 0.7786434921586255, "grad_norm": 2.115405559539795, "learning_rate": 6.703709880828351e-05, "loss": 0.7838, "step": 15590 }, { "epoch": 0.7791429427629607, "grad_norm": 2.222421407699585, "learning_rate": 6.700021426294619e-05, "loss": 0.9487, "step": 15600 }, { "epoch": 0.779642393367296, "grad_norm": 1.515528917312622, "learning_rate": 6.696331925405504e-05, "loss": 0.8356, "step": 15610 }, { "epoch": 0.7801418439716312, "grad_norm": 1.561881422996521, "learning_rate": 6.692641380431879e-05, "loss": 0.8453, "step": 15620 }, { "epoch": 0.7806412945759664, "grad_norm": 1.2895303964614868, "learning_rate": 6.688949793645254e-05, "loss": 0.8658, "step": 15630 }, { "epoch": 0.7811407451803016, "grad_norm": 1.4048974514007568, "learning_rate": 6.685257167317786e-05, "loss": 0.8668, "step": 15640 }, { "epoch": 0.781640195784637, "grad_norm": 1.6670610904693604, "learning_rate": 6.681563503722268e-05, "loss": 0.9999, "step": 15650 }, { "epoch": 0.7821396463889722, "grad_norm": 2.2475922107696533, "learning_rate": 6.677868805132135e-05, "loss": 0.6803, "step": 15660 }, { "epoch": 0.7826390969933074, "grad_norm": 1.3656936883926392, "learning_rate": 6.674173073821454e-05, "loss": 0.8731, "step": 15670 }, { "epoch": 0.7831385475976426, "grad_norm": 1.6786401271820068, "learning_rate": 6.670476312064934e-05, "loss": 0.8405, "step": 15680 }, { "epoch": 0.7836379982019778, "grad_norm": 2.163822650909424, "learning_rate": 6.666778522137915e-05, "loss": 0.7527, "step": 15690 }, { "epoch": 0.7841374488063131, "grad_norm": 3.461778163909912, "learning_rate": 6.663079706316366e-05, "loss": 0.9058, "step": 15700 }, { "epoch": 0.7846368994106483, "grad_norm": 0.9004759192466736, "learning_rate": 6.659379866876894e-05, "loss": 0.8703, "step": 15710 }, { "epoch": 0.7851363500149835, "grad_norm": 1.10280179977417, "learning_rate": 6.655679006096734e-05, "loss": 0.7312, "step": 15720 }, { "epoch": 0.7856358006193187, "grad_norm": 2.1050362586975098, "learning_rate": 6.651977126253744e-05, "loss": 0.8669, "step": 15730 }, { "epoch": 0.786135251223654, "grad_norm": 2.1199238300323486, "learning_rate": 6.648274229626419e-05, "loss": 0.9007, "step": 15740 }, { "epoch": 0.7866347018279892, "grad_norm": 1.3192224502563477, "learning_rate": 6.644570318493874e-05, "loss": 0.7224, "step": 15750 }, { "epoch": 0.7871341524323244, "grad_norm": 3.3043160438537598, "learning_rate": 6.640865395135848e-05, "loss": 0.8629, "step": 15760 }, { "epoch": 0.7876336030366596, "grad_norm": 2.9390816688537598, "learning_rate": 6.637159461832705e-05, "loss": 0.8931, "step": 15770 }, { "epoch": 0.788133053640995, "grad_norm": 3.64808988571167, "learning_rate": 6.633452520865428e-05, "loss": 1.0136, "step": 15780 }, { "epoch": 0.7886325042453302, "grad_norm": 1.8212376832962036, "learning_rate": 6.629744574515626e-05, "loss": 0.9769, "step": 15790 }, { "epoch": 0.7891319548496654, "grad_norm": 1.8592854738235474, "learning_rate": 6.626035625065522e-05, "loss": 0.8619, "step": 15800 }, { "epoch": 0.7896314054540006, "grad_norm": 2.5067596435546875, "learning_rate": 6.622325674797955e-05, "loss": 0.9626, "step": 15810 }, { "epoch": 0.7901308560583359, "grad_norm": 0.9920625686645508, "learning_rate": 6.618614725996382e-05, "loss": 0.9642, "step": 15820 }, { "epoch": 0.7906303066626711, "grad_norm": 4.127655982971191, "learning_rate": 6.614902780944879e-05, "loss": 0.9797, "step": 15830 }, { "epoch": 0.7911297572670063, "grad_norm": 2.1669821739196777, "learning_rate": 6.611189841928128e-05, "loss": 0.6941, "step": 15840 }, { "epoch": 0.7916292078713415, "grad_norm": 1.6327829360961914, "learning_rate": 6.607475911231426e-05, "loss": 0.9778, "step": 15850 }, { "epoch": 0.7921286584756767, "grad_norm": 1.0588231086730957, "learning_rate": 6.603760991140681e-05, "loss": 0.9366, "step": 15860 }, { "epoch": 0.792628109080012, "grad_norm": 2.468372344970703, "learning_rate": 6.60004508394241e-05, "loss": 0.8592, "step": 15870 }, { "epoch": 0.7931275596843472, "grad_norm": 1.4136161804199219, "learning_rate": 6.596328191923734e-05, "loss": 0.839, "step": 15880 }, { "epoch": 0.7936270102886824, "grad_norm": 2.3732004165649414, "learning_rate": 6.592610317372387e-05, "loss": 0.7439, "step": 15890 }, { "epoch": 0.7941264608930176, "grad_norm": 3.4499545097351074, "learning_rate": 6.588891462576701e-05, "loss": 0.7574, "step": 15900 }, { "epoch": 0.794625911497353, "grad_norm": 2.6960248947143555, "learning_rate": 6.585171629825615e-05, "loss": 0.9557, "step": 15910 }, { "epoch": 0.7951253621016882, "grad_norm": 1.224998950958252, "learning_rate": 6.581450821408668e-05, "loss": 0.9733, "step": 15920 }, { "epoch": 0.7956248127060234, "grad_norm": 1.9505045413970947, "learning_rate": 6.577729039616002e-05, "loss": 0.6941, "step": 15930 }, { "epoch": 0.7961242633103586, "grad_norm": 1.4508978128433228, "learning_rate": 6.574006286738354e-05, "loss": 0.752, "step": 15940 }, { "epoch": 0.7966237139146939, "grad_norm": 2.164996862411499, "learning_rate": 6.570282565067065e-05, "loss": 1.0168, "step": 15950 }, { "epoch": 0.7971231645190291, "grad_norm": 1.3738571405410767, "learning_rate": 6.566557876894067e-05, "loss": 1.0198, "step": 15960 }, { "epoch": 0.7976226151233643, "grad_norm": 1.2134963274002075, "learning_rate": 6.562832224511888e-05, "loss": 0.9293, "step": 15970 }, { "epoch": 0.7981220657276995, "grad_norm": 1.7455850839614868, "learning_rate": 6.559105610213649e-05, "loss": 0.9195, "step": 15980 }, { "epoch": 0.7986215163320347, "grad_norm": 2.994417190551758, "learning_rate": 6.555378036293066e-05, "loss": 0.8009, "step": 15990 }, { "epoch": 0.79912096693637, "grad_norm": 3.2063357830047607, "learning_rate": 6.551649505044442e-05, "loss": 0.9724, "step": 16000 }, { "epoch": 0.7996204175407052, "grad_norm": 1.8232203722000122, "learning_rate": 6.547920018762672e-05, "loss": 0.7755, "step": 16010 }, { "epoch": 0.8001198681450404, "grad_norm": 1.8015304803848267, "learning_rate": 6.544189579743238e-05, "loss": 0.8729, "step": 16020 }, { "epoch": 0.8006193187493756, "grad_norm": 2.915858745574951, "learning_rate": 6.540458190282207e-05, "loss": 0.7648, "step": 16030 }, { "epoch": 0.801118769353711, "grad_norm": 2.4863932132720947, "learning_rate": 6.536725852676231e-05, "loss": 0.741, "step": 16040 }, { "epoch": 0.8016182199580462, "grad_norm": 2.194597005844116, "learning_rate": 6.53299256922255e-05, "loss": 0.9772, "step": 16050 }, { "epoch": 0.8021176705623814, "grad_norm": 1.7982498407363892, "learning_rate": 6.52925834221898e-05, "loss": 0.691, "step": 16060 }, { "epoch": 0.8026171211667166, "grad_norm": 1.1537199020385742, "learning_rate": 6.525523173963922e-05, "loss": 0.8748, "step": 16070 }, { "epoch": 0.8031165717710519, "grad_norm": 2.4173102378845215, "learning_rate": 6.521787066756354e-05, "loss": 0.8947, "step": 16080 }, { "epoch": 0.8036160223753871, "grad_norm": 1.8931891918182373, "learning_rate": 6.518050022895834e-05, "loss": 0.9027, "step": 16090 }, { "epoch": 0.8041154729797223, "grad_norm": 5.7853546142578125, "learning_rate": 6.514312044682494e-05, "loss": 0.8129, "step": 16100 }, { "epoch": 0.8046149235840575, "grad_norm": 1.2767221927642822, "learning_rate": 6.510573134417043e-05, "loss": 0.7389, "step": 16110 }, { "epoch": 0.8051143741883928, "grad_norm": 1.1879867315292358, "learning_rate": 6.506833294400763e-05, "loss": 0.8777, "step": 16120 }, { "epoch": 0.805613824792728, "grad_norm": 1.1377538442611694, "learning_rate": 6.503092526935508e-05, "loss": 1.0342, "step": 16130 }, { "epoch": 0.8061132753970632, "grad_norm": 2.8258886337280273, "learning_rate": 6.499350834323706e-05, "loss": 0.9868, "step": 16140 }, { "epoch": 0.8066127260013984, "grad_norm": 2.124138832092285, "learning_rate": 6.495608218868348e-05, "loss": 0.8422, "step": 16150 }, { "epoch": 0.8071121766057336, "grad_norm": 0.7884851098060608, "learning_rate": 6.491864682872997e-05, "loss": 0.8204, "step": 16160 }, { "epoch": 0.807611627210069, "grad_norm": 1.948546051979065, "learning_rate": 6.488120228641783e-05, "loss": 0.955, "step": 16170 }, { "epoch": 0.8081110778144042, "grad_norm": 2.1687068939208984, "learning_rate": 6.484374858479402e-05, "loss": 0.9238, "step": 16180 }, { "epoch": 0.8086105284187394, "grad_norm": 0.8520563840866089, "learning_rate": 6.48062857469111e-05, "loss": 0.9406, "step": 16190 }, { "epoch": 0.8091099790230746, "grad_norm": 4.233059883117676, "learning_rate": 6.47688137958273e-05, "loss": 1.0369, "step": 16200 }, { "epoch": 0.8096094296274099, "grad_norm": 2.6756277084350586, "learning_rate": 6.473133275460638e-05, "loss": 0.8692, "step": 16210 }, { "epoch": 0.8101088802317451, "grad_norm": 5.516173362731934, "learning_rate": 6.469384264631782e-05, "loss": 0.9871, "step": 16220 }, { "epoch": 0.8106083308360803, "grad_norm": 1.0051600933074951, "learning_rate": 6.465634349403656e-05, "loss": 0.7404, "step": 16230 }, { "epoch": 0.8111077814404155, "grad_norm": 2.0114965438842773, "learning_rate": 6.461883532084316e-05, "loss": 0.7105, "step": 16240 }, { "epoch": 0.8116072320447508, "grad_norm": 1.9885752201080322, "learning_rate": 6.458131814982373e-05, "loss": 0.8684, "step": 16250 }, { "epoch": 0.812106682649086, "grad_norm": 1.7377644777297974, "learning_rate": 6.454379200406995e-05, "loss": 0.923, "step": 16260 }, { "epoch": 0.8126061332534212, "grad_norm": 1.5406426191329956, "learning_rate": 6.450625690667895e-05, "loss": 0.723, "step": 16270 }, { "epoch": 0.8131055838577564, "grad_norm": 3.6169092655181885, "learning_rate": 6.446871288075345e-05, "loss": 0.9572, "step": 16280 }, { "epoch": 0.8136050344620916, "grad_norm": 1.060236930847168, "learning_rate": 6.443115994940156e-05, "loss": 0.7335, "step": 16290 }, { "epoch": 0.814104485066427, "grad_norm": 9.283597946166992, "learning_rate": 6.4393598135737e-05, "loss": 1.1551, "step": 16300 }, { "epoch": 0.8146039356707622, "grad_norm": 2.340845823287964, "learning_rate": 6.435602746287887e-05, "loss": 0.9098, "step": 16310 }, { "epoch": 0.8151033862750974, "grad_norm": 1.9237539768218994, "learning_rate": 6.431844795395177e-05, "loss": 0.9227, "step": 16320 }, { "epoch": 0.8156028368794326, "grad_norm": 1.4562195539474487, "learning_rate": 6.428085963208566e-05, "loss": 0.7405, "step": 16330 }, { "epoch": 0.8161022874837679, "grad_norm": 0.7459195852279663, "learning_rate": 6.424326252041602e-05, "loss": 1.0744, "step": 16340 }, { "epoch": 0.8166017380881031, "grad_norm": 1.7994718551635742, "learning_rate": 6.420565664208371e-05, "loss": 0.5823, "step": 16350 }, { "epoch": 0.8171011886924383, "grad_norm": 2.3014066219329834, "learning_rate": 6.416804202023495e-05, "loss": 0.9816, "step": 16360 }, { "epoch": 0.8176006392967735, "grad_norm": 2.7471611499786377, "learning_rate": 6.413041867802138e-05, "loss": 0.8872, "step": 16370 }, { "epoch": 0.8181000899011088, "grad_norm": 4.223907947540283, "learning_rate": 6.409278663859997e-05, "loss": 0.8984, "step": 16380 }, { "epoch": 0.818599540505444, "grad_norm": 1.6525057554244995, "learning_rate": 6.40551459251331e-05, "loss": 0.9931, "step": 16390 }, { "epoch": 0.8190989911097792, "grad_norm": 0.9545865058898926, "learning_rate": 6.401749656078844e-05, "loss": 0.845, "step": 16400 }, { "epoch": 0.8195984417141144, "grad_norm": 3.1227684020996094, "learning_rate": 6.397983856873902e-05, "loss": 0.7437, "step": 16410 }, { "epoch": 0.8200978923184497, "grad_norm": 1.1917845010757446, "learning_rate": 6.394217197216312e-05, "loss": 0.8466, "step": 16420 }, { "epoch": 0.820597342922785, "grad_norm": 6.300127983093262, "learning_rate": 6.390449679424439e-05, "loss": 0.9267, "step": 16430 }, { "epoch": 0.8210967935271202, "grad_norm": 1.3488715887069702, "learning_rate": 6.386681305817172e-05, "loss": 0.6616, "step": 16440 }, { "epoch": 0.8215962441314554, "grad_norm": 1.7635983228683472, "learning_rate": 6.38291207871393e-05, "loss": 1.0489, "step": 16450 }, { "epoch": 0.8220956947357906, "grad_norm": 3.4676079750061035, "learning_rate": 6.37914200043465e-05, "loss": 0.745, "step": 16460 }, { "epoch": 0.8225951453401259, "grad_norm": 2.3036060333251953, "learning_rate": 6.375371073299802e-05, "loss": 0.8561, "step": 16470 }, { "epoch": 0.8230945959444611, "grad_norm": 0.7986567616462708, "learning_rate": 6.371599299630374e-05, "loss": 0.8123, "step": 16480 }, { "epoch": 0.8235940465487963, "grad_norm": 1.3983267545700073, "learning_rate": 6.367826681747872e-05, "loss": 1.0328, "step": 16490 }, { "epoch": 0.8240934971531315, "grad_norm": 1.514461874961853, "learning_rate": 6.364053221974329e-05, "loss": 1.1373, "step": 16500 }, { "epoch": 0.8245929477574668, "grad_norm": 0.82142573595047, "learning_rate": 6.36027892263229e-05, "loss": 0.6504, "step": 16510 }, { "epoch": 0.825092398361802, "grad_norm": 3.0528926849365234, "learning_rate": 6.35650378604482e-05, "loss": 0.8885, "step": 16520 }, { "epoch": 0.8255918489661372, "grad_norm": 1.6076291799545288, "learning_rate": 6.352727814535498e-05, "loss": 0.9077, "step": 16530 }, { "epoch": 0.8260912995704724, "grad_norm": 2.3793752193450928, "learning_rate": 6.348951010428416e-05, "loss": 0.8903, "step": 16540 }, { "epoch": 0.8265907501748078, "grad_norm": 1.5829384326934814, "learning_rate": 6.345173376048179e-05, "loss": 0.9315, "step": 16550 }, { "epoch": 0.827090200779143, "grad_norm": 1.056965708732605, "learning_rate": 6.341394913719908e-05, "loss": 0.6395, "step": 16560 }, { "epoch": 0.8275896513834782, "grad_norm": 4.013810157775879, "learning_rate": 6.337615625769225e-05, "loss": 1.025, "step": 16570 }, { "epoch": 0.8280891019878134, "grad_norm": 2.1943602561950684, "learning_rate": 6.333835514522265e-05, "loss": 0.7397, "step": 16580 }, { "epoch": 0.8285885525921486, "grad_norm": 0.603503942489624, "learning_rate": 6.330054582305671e-05, "loss": 0.644, "step": 16590 }, { "epoch": 0.8290880031964839, "grad_norm": 1.5025651454925537, "learning_rate": 6.326272831446587e-05, "loss": 0.8164, "step": 16600 }, { "epoch": 0.8295874538008191, "grad_norm": 1.9878567457199097, "learning_rate": 6.322490264272665e-05, "loss": 0.7026, "step": 16610 }, { "epoch": 0.8300869044051543, "grad_norm": 1.1617220640182495, "learning_rate": 6.318706883112058e-05, "loss": 0.8819, "step": 16620 }, { "epoch": 0.8305863550094895, "grad_norm": 2.0502772331237793, "learning_rate": 6.314922690293416e-05, "loss": 0.9579, "step": 16630 }, { "epoch": 0.8310858056138248, "grad_norm": 3.7792224884033203, "learning_rate": 6.311137688145898e-05, "loss": 0.91, "step": 16640 }, { "epoch": 0.83158525621816, "grad_norm": 0.6884984374046326, "learning_rate": 6.307351878999151e-05, "loss": 0.739, "step": 16650 }, { "epoch": 0.8320847068224952, "grad_norm": 1.9633368253707886, "learning_rate": 6.303565265183326e-05, "loss": 0.748, "step": 16660 }, { "epoch": 0.8325841574268305, "grad_norm": 1.5782347917556763, "learning_rate": 6.299777849029066e-05, "loss": 0.8776, "step": 16670 }, { "epoch": 0.8330836080311658, "grad_norm": 1.628798246383667, "learning_rate": 6.295989632867506e-05, "loss": 0.6668, "step": 16680 }, { "epoch": 0.833583058635501, "grad_norm": 2.231889009475708, "learning_rate": 6.292200619030278e-05, "loss": 0.7465, "step": 16690 }, { "epoch": 0.8340825092398362, "grad_norm": 2.304318904876709, "learning_rate": 6.288410809849502e-05, "loss": 0.8359, "step": 16700 }, { "epoch": 0.8345819598441714, "grad_norm": 3.2880563735961914, "learning_rate": 6.284620207657787e-05, "loss": 0.9138, "step": 16710 }, { "epoch": 0.8350814104485066, "grad_norm": 1.3525863885879517, "learning_rate": 6.280828814788232e-05, "loss": 1.0842, "step": 16720 }, { "epoch": 0.8355808610528419, "grad_norm": 1.97789466381073, "learning_rate": 6.277036633574421e-05, "loss": 0.8623, "step": 16730 }, { "epoch": 0.8360803116571771, "grad_norm": 1.0432552099227905, "learning_rate": 6.273243666350427e-05, "loss": 0.7067, "step": 16740 }, { "epoch": 0.8365797622615123, "grad_norm": 2.6929054260253906, "learning_rate": 6.269449915450803e-05, "loss": 0.9287, "step": 16750 }, { "epoch": 0.8370792128658475, "grad_norm": 1.5089311599731445, "learning_rate": 6.265655383210582e-05, "loss": 0.7812, "step": 16760 }, { "epoch": 0.8375786634701828, "grad_norm": 2.037862777709961, "learning_rate": 6.261860071965286e-05, "loss": 0.7476, "step": 16770 }, { "epoch": 0.838078114074518, "grad_norm": 1.1124966144561768, "learning_rate": 6.258063984050907e-05, "loss": 0.8935, "step": 16780 }, { "epoch": 0.8385775646788532, "grad_norm": 2.5978944301605225, "learning_rate": 6.254267121803922e-05, "loss": 0.9156, "step": 16790 }, { "epoch": 0.8390770152831885, "grad_norm": 1.905880331993103, "learning_rate": 6.250469487561283e-05, "loss": 0.9308, "step": 16800 }, { "epoch": 0.8395764658875238, "grad_norm": 0.8824878931045532, "learning_rate": 6.246671083660416e-05, "loss": 0.7035, "step": 16810 }, { "epoch": 0.840075916491859, "grad_norm": 1.4680544137954712, "learning_rate": 6.24287191243922e-05, "loss": 0.8742, "step": 16820 }, { "epoch": 0.8405753670961942, "grad_norm": 1.9640511274337769, "learning_rate": 6.239071976236069e-05, "loss": 0.6797, "step": 16830 }, { "epoch": 0.8410748177005294, "grad_norm": 2.1258668899536133, "learning_rate": 6.235271277389805e-05, "loss": 1.1352, "step": 16840 }, { "epoch": 0.8415742683048647, "grad_norm": 1.9400843381881714, "learning_rate": 6.231469818239741e-05, "loss": 0.7029, "step": 16850 }, { "epoch": 0.8420737189091999, "grad_norm": 0.7889087200164795, "learning_rate": 6.227667601125657e-05, "loss": 0.7216, "step": 16860 }, { "epoch": 0.8425731695135351, "grad_norm": 0.7702601552009583, "learning_rate": 6.223864628387803e-05, "loss": 0.7218, "step": 16870 }, { "epoch": 0.8430726201178703, "grad_norm": 1.353019118309021, "learning_rate": 6.220060902366888e-05, "loss": 0.732, "step": 16880 }, { "epoch": 0.8435720707222055, "grad_norm": 0.7442846298217773, "learning_rate": 6.216256425404089e-05, "loss": 0.8635, "step": 16890 }, { "epoch": 0.8440715213265408, "grad_norm": 4.365800857543945, "learning_rate": 6.212451199841043e-05, "loss": 0.8221, "step": 16900 }, { "epoch": 0.844570971930876, "grad_norm": 3.0228402614593506, "learning_rate": 6.208645228019852e-05, "loss": 0.64, "step": 16910 }, { "epoch": 0.8450704225352113, "grad_norm": 1.9168883562088013, "learning_rate": 6.204838512283072e-05, "loss": 0.8189, "step": 16920 }, { "epoch": 0.8455698731395465, "grad_norm": 2.254575729370117, "learning_rate": 6.201031054973721e-05, "loss": 0.7512, "step": 16930 }, { "epoch": 0.8460693237438818, "grad_norm": 2.7147164344787598, "learning_rate": 6.197222858435267e-05, "loss": 0.8503, "step": 16940 }, { "epoch": 0.846568774348217, "grad_norm": 2.5285685062408447, "learning_rate": 6.193413925011642e-05, "loss": 0.8169, "step": 16950 }, { "epoch": 0.8470682249525522, "grad_norm": 1.0177611112594604, "learning_rate": 6.189604257047224e-05, "loss": 0.7792, "step": 16960 }, { "epoch": 0.8475676755568874, "grad_norm": 1.2296720743179321, "learning_rate": 6.185793856886849e-05, "loss": 0.6905, "step": 16970 }, { "epoch": 0.8480671261612227, "grad_norm": 4.773038864135742, "learning_rate": 6.181982726875799e-05, "loss": 1.1014, "step": 16980 }, { "epoch": 0.8485665767655579, "grad_norm": 3.047926425933838, "learning_rate": 6.178170869359808e-05, "loss": 0.823, "step": 16990 }, { "epoch": 0.8490660273698931, "grad_norm": 1.9710932970046997, "learning_rate": 6.174358286685054e-05, "loss": 0.8389, "step": 17000 }, { "epoch": 0.8495654779742283, "grad_norm": 1.147913932800293, "learning_rate": 6.170544981198168e-05, "loss": 0.97, "step": 17010 }, { "epoch": 0.8500649285785635, "grad_norm": 1.007738709449768, "learning_rate": 6.16673095524622e-05, "loss": 0.9433, "step": 17020 }, { "epoch": 0.8505643791828988, "grad_norm": 1.1430408954620361, "learning_rate": 6.162916211176725e-05, "loss": 0.9482, "step": 17030 }, { "epoch": 0.851063829787234, "grad_norm": 1.4783233404159546, "learning_rate": 6.159100751337642e-05, "loss": 0.8203, "step": 17040 }, { "epoch": 0.8515632803915693, "grad_norm": 1.9705936908721924, "learning_rate": 6.155284578077369e-05, "loss": 0.8806, "step": 17050 }, { "epoch": 0.8520627309959045, "grad_norm": 2.27795672416687, "learning_rate": 6.15146769374474e-05, "loss": 0.9348, "step": 17060 }, { "epoch": 0.8525621816002398, "grad_norm": 3.018404960632324, "learning_rate": 6.147650100689033e-05, "loss": 1.0186, "step": 17070 }, { "epoch": 0.853061632204575, "grad_norm": 1.2547297477722168, "learning_rate": 6.14383180125996e-05, "loss": 0.9815, "step": 17080 }, { "epoch": 0.8535610828089102, "grad_norm": 1.3103141784667969, "learning_rate": 6.140012797807664e-05, "loss": 0.6705, "step": 17090 }, { "epoch": 0.8540605334132454, "grad_norm": 1.1061055660247803, "learning_rate": 6.136193092682725e-05, "loss": 0.7973, "step": 17100 }, { "epoch": 0.8545599840175807, "grad_norm": 1.5585850477218628, "learning_rate": 6.132372688236152e-05, "loss": 0.7361, "step": 17110 }, { "epoch": 0.8550594346219159, "grad_norm": 2.628854274749756, "learning_rate": 6.128551586819391e-05, "loss": 1.0098, "step": 17120 }, { "epoch": 0.8555588852262511, "grad_norm": 1.6417533159255981, "learning_rate": 6.124729790784308e-05, "loss": 0.938, "step": 17130 }, { "epoch": 0.8560583358305863, "grad_norm": 1.5524022579193115, "learning_rate": 6.120907302483205e-05, "loss": 0.914, "step": 17140 }, { "epoch": 0.8565577864349215, "grad_norm": 0.955947756767273, "learning_rate": 6.1170841242688e-05, "loss": 0.6852, "step": 17150 }, { "epoch": 0.8570572370392568, "grad_norm": 1.8045988082885742, "learning_rate": 6.113260258494247e-05, "loss": 0.8964, "step": 17160 }, { "epoch": 0.857556687643592, "grad_norm": 0.7617775201797485, "learning_rate": 6.109435707513118e-05, "loss": 0.8379, "step": 17170 }, { "epoch": 0.8580561382479273, "grad_norm": 2.472621202468872, "learning_rate": 6.105610473679403e-05, "loss": 0.8932, "step": 17180 }, { "epoch": 0.8585555888522625, "grad_norm": 1.8227863311767578, "learning_rate": 6.1017845593475166e-05, "loss": 0.8147, "step": 17190 }, { "epoch": 0.8590550394565978, "grad_norm": 0.4599326550960541, "learning_rate": 6.097957966872294e-05, "loss": 0.8396, "step": 17200 }, { "epoch": 0.859554490060933, "grad_norm": 2.4516680240631104, "learning_rate": 6.094130698608983e-05, "loss": 1.1839, "step": 17210 }, { "epoch": 0.8600539406652682, "grad_norm": 1.868933081626892, "learning_rate": 6.0903027569132506e-05, "loss": 1.1272, "step": 17220 }, { "epoch": 0.8605533912696034, "grad_norm": 2.447511911392212, "learning_rate": 6.086474144141178e-05, "loss": 0.9087, "step": 17230 }, { "epoch": 0.8610528418739387, "grad_norm": 2.8627376556396484, "learning_rate": 6.082644862649256e-05, "loss": 0.7799, "step": 17240 }, { "epoch": 0.8615522924782739, "grad_norm": 3.317176342010498, "learning_rate": 6.078814914794393e-05, "loss": 1.0523, "step": 17250 }, { "epoch": 0.8620517430826091, "grad_norm": 0.8495808243751526, "learning_rate": 6.0749843029339036e-05, "loss": 0.8708, "step": 17260 }, { "epoch": 0.8625511936869443, "grad_norm": 1.3395750522613525, "learning_rate": 6.071153029425509e-05, "loss": 1.0773, "step": 17270 }, { "epoch": 0.8630506442912796, "grad_norm": 1.3448898792266846, "learning_rate": 6.0673210966273456e-05, "loss": 0.985, "step": 17280 }, { "epoch": 0.8635500948956149, "grad_norm": 3.114107847213745, "learning_rate": 6.0634885068979466e-05, "loss": 0.8944, "step": 17290 }, { "epoch": 0.8640495454999501, "grad_norm": 2.589629888534546, "learning_rate": 6.0596552625962536e-05, "loss": 0.8187, "step": 17300 }, { "epoch": 0.8645489961042853, "grad_norm": 1.3278167247772217, "learning_rate": 6.055821366081612e-05, "loss": 0.9411, "step": 17310 }, { "epoch": 0.8650484467086205, "grad_norm": 2.274196147918701, "learning_rate": 6.0519868197137664e-05, "loss": 0.9286, "step": 17320 }, { "epoch": 0.8655478973129558, "grad_norm": 2.7580153942108154, "learning_rate": 6.0481516258528635e-05, "loss": 0.7447, "step": 17330 }, { "epoch": 0.866047347917291, "grad_norm": 2.3561079502105713, "learning_rate": 6.044315786859447e-05, "loss": 0.8562, "step": 17340 }, { "epoch": 0.8665467985216262, "grad_norm": 1.28364896774292, "learning_rate": 6.0404793050944596e-05, "loss": 0.691, "step": 17350 }, { "epoch": 0.8670462491259614, "grad_norm": 1.2503184080123901, "learning_rate": 6.0366421829192356e-05, "loss": 0.8561, "step": 17360 }, { "epoch": 0.8675456997302967, "grad_norm": 3.0285422801971436, "learning_rate": 6.0328044226955075e-05, "loss": 1.1469, "step": 17370 }, { "epoch": 0.8680451503346319, "grad_norm": 1.0694063901901245, "learning_rate": 6.028966026785399e-05, "loss": 0.6241, "step": 17380 }, { "epoch": 0.8685446009389671, "grad_norm": 1.8019158840179443, "learning_rate": 6.025126997551427e-05, "loss": 0.8304, "step": 17390 }, { "epoch": 0.8690440515433023, "grad_norm": 3.0490691661834717, "learning_rate": 6.021287337356493e-05, "loss": 0.9058, "step": 17400 }, { "epoch": 0.8695435021476376, "grad_norm": 1.581011414527893, "learning_rate": 6.017447048563891e-05, "loss": 0.7703, "step": 17410 }, { "epoch": 0.8700429527519729, "grad_norm": 1.7052842378616333, "learning_rate": 6.013606133537303e-05, "loss": 0.8926, "step": 17420 }, { "epoch": 0.8705424033563081, "grad_norm": 5.302414417266846, "learning_rate": 6.0097645946407945e-05, "loss": 0.9729, "step": 17430 }, { "epoch": 0.8710418539606433, "grad_norm": 3.3168435096740723, "learning_rate": 6.0059224342388134e-05, "loss": 0.8982, "step": 17440 }, { "epoch": 0.8715413045649785, "grad_norm": 1.1036734580993652, "learning_rate": 6.002079654696192e-05, "loss": 0.8249, "step": 17450 }, { "epoch": 0.8720407551693138, "grad_norm": 2.3743739128112793, "learning_rate": 5.998236258378143e-05, "loss": 0.7628, "step": 17460 }, { "epoch": 0.872540205773649, "grad_norm": 1.0588688850402832, "learning_rate": 5.994392247650262e-05, "loss": 0.6464, "step": 17470 }, { "epoch": 0.8730396563779842, "grad_norm": 1.3939181566238403, "learning_rate": 5.990547624878516e-05, "loss": 0.7323, "step": 17480 }, { "epoch": 0.8735391069823194, "grad_norm": 3.428685188293457, "learning_rate": 5.9867023924292575e-05, "loss": 0.8373, "step": 17490 }, { "epoch": 0.8740385575866547, "grad_norm": 2.5662968158721924, "learning_rate": 5.982856552669205e-05, "loss": 0.8612, "step": 17500 }, { "epoch": 0.8745380081909899, "grad_norm": 3.1228392124176025, "learning_rate": 5.9790101079654556e-05, "loss": 0.7528, "step": 17510 }, { "epoch": 0.8750374587953251, "grad_norm": 6.499607563018799, "learning_rate": 5.975163060685481e-05, "loss": 1.174, "step": 17520 }, { "epoch": 0.8755369093996603, "grad_norm": 1.335733413696289, "learning_rate": 5.9713154131971206e-05, "loss": 0.9548, "step": 17530 }, { "epoch": 0.8760363600039957, "grad_norm": 2.2760865688323975, "learning_rate": 5.9674671678685813e-05, "loss": 0.9427, "step": 17540 }, { "epoch": 0.8765358106083309, "grad_norm": 3.9111785888671875, "learning_rate": 5.963618327068443e-05, "loss": 0.9205, "step": 17550 }, { "epoch": 0.8770352612126661, "grad_norm": 2.4227874279022217, "learning_rate": 5.959768893165648e-05, "loss": 0.7424, "step": 17560 }, { "epoch": 0.8775347118170013, "grad_norm": 1.6442713737487793, "learning_rate": 5.9559188685295067e-05, "loss": 0.9272, "step": 17570 }, { "epoch": 0.8780341624213366, "grad_norm": 1.4858150482177734, "learning_rate": 5.952068255529689e-05, "loss": 0.9458, "step": 17580 }, { "epoch": 0.8785336130256718, "grad_norm": 2.355178117752075, "learning_rate": 5.9482170565362314e-05, "loss": 0.8913, "step": 17590 }, { "epoch": 0.879033063630007, "grad_norm": 0.8928540945053101, "learning_rate": 5.944365273919531e-05, "loss": 0.8276, "step": 17600 }, { "epoch": 0.8795325142343422, "grad_norm": 1.362815022468567, "learning_rate": 5.9405129100503384e-05, "loss": 1.0135, "step": 17610 }, { "epoch": 0.8800319648386774, "grad_norm": 1.983511209487915, "learning_rate": 5.9366599672997694e-05, "loss": 0.9972, "step": 17620 }, { "epoch": 0.8805314154430127, "grad_norm": 0.8186060786247253, "learning_rate": 5.9328064480392886e-05, "loss": 0.7884, "step": 17630 }, { "epoch": 0.8810308660473479, "grad_norm": 3.547245740890503, "learning_rate": 5.928952354640723e-05, "loss": 0.7936, "step": 17640 }, { "epoch": 0.8815303166516831, "grad_norm": 1.1291515827178955, "learning_rate": 5.925097689476249e-05, "loss": 0.7797, "step": 17650 }, { "epoch": 0.8820297672560183, "grad_norm": 0.8624637126922607, "learning_rate": 5.921242454918394e-05, "loss": 0.8495, "step": 17660 }, { "epoch": 0.8825292178603537, "grad_norm": 3.757883071899414, "learning_rate": 5.917386653340036e-05, "loss": 0.7233, "step": 17670 }, { "epoch": 0.8830286684646889, "grad_norm": 1.6425225734710693, "learning_rate": 5.913530287114406e-05, "loss": 0.8921, "step": 17680 }, { "epoch": 0.8835281190690241, "grad_norm": 1.29030442237854, "learning_rate": 5.909673358615079e-05, "loss": 0.7937, "step": 17690 }, { "epoch": 0.8840275696733593, "grad_norm": 1.672472357749939, "learning_rate": 5.905815870215976e-05, "loss": 0.7725, "step": 17700 }, { "epoch": 0.8845270202776946, "grad_norm": 1.3420329093933105, "learning_rate": 5.901957824291362e-05, "loss": 0.879, "step": 17710 }, { "epoch": 0.8850264708820298, "grad_norm": 1.5928447246551514, "learning_rate": 5.8980992232158505e-05, "loss": 0.733, "step": 17720 }, { "epoch": 0.885525921486365, "grad_norm": 2.805663585662842, "learning_rate": 5.894240069364389e-05, "loss": 0.9222, "step": 17730 }, { "epoch": 0.8860253720907002, "grad_norm": 2.8470332622528076, "learning_rate": 5.890380365112272e-05, "loss": 0.7656, "step": 17740 }, { "epoch": 0.8865248226950354, "grad_norm": 1.383072853088379, "learning_rate": 5.886520112835128e-05, "loss": 0.7539, "step": 17750 }, { "epoch": 0.8870242732993707, "grad_norm": 1.161964774131775, "learning_rate": 5.8826593149089235e-05, "loss": 0.8152, "step": 17760 }, { "epoch": 0.8875237239037059, "grad_norm": 1.071505069732666, "learning_rate": 5.878797973709964e-05, "loss": 0.7832, "step": 17770 }, { "epoch": 0.8880231745080411, "grad_norm": 0.9550650119781494, "learning_rate": 5.8749360916148865e-05, "loss": 0.7887, "step": 17780 }, { "epoch": 0.8885226251123763, "grad_norm": 2.1567580699920654, "learning_rate": 5.871073671000661e-05, "loss": 0.9789, "step": 17790 }, { "epoch": 0.8890220757167117, "grad_norm": 3.1140804290771484, "learning_rate": 5.8672107142445906e-05, "loss": 1.1522, "step": 17800 }, { "epoch": 0.8895215263210469, "grad_norm": 3.8770267963409424, "learning_rate": 5.8633472237243083e-05, "loss": 0.9059, "step": 17810 }, { "epoch": 0.8900209769253821, "grad_norm": 1.022334098815918, "learning_rate": 5.859483201817772e-05, "loss": 0.9102, "step": 17820 }, { "epoch": 0.8905204275297173, "grad_norm": 2.7335667610168457, "learning_rate": 5.855618650903272e-05, "loss": 0.9173, "step": 17830 }, { "epoch": 0.8910198781340526, "grad_norm": 1.1789907217025757, "learning_rate": 5.851753573359419e-05, "loss": 0.8724, "step": 17840 }, { "epoch": 0.8915193287383878, "grad_norm": 1.1129345893859863, "learning_rate": 5.8478879715651547e-05, "loss": 0.6434, "step": 17850 }, { "epoch": 0.892018779342723, "grad_norm": 2.9063918590545654, "learning_rate": 5.844021847899734e-05, "loss": 1.0347, "step": 17860 }, { "epoch": 0.8925182299470582, "grad_norm": 1.131264328956604, "learning_rate": 5.8401552047427444e-05, "loss": 0.9652, "step": 17870 }, { "epoch": 0.8930176805513935, "grad_norm": 0.964443564414978, "learning_rate": 5.83628804447408e-05, "loss": 0.9808, "step": 17880 }, { "epoch": 0.8935171311557287, "grad_norm": 2.7607600688934326, "learning_rate": 5.8324203694739664e-05, "loss": 0.9103, "step": 17890 }, { "epoch": 0.8940165817600639, "grad_norm": 1.1801255941390991, "learning_rate": 5.828552182122936e-05, "loss": 0.7595, "step": 17900 }, { "epoch": 0.8945160323643991, "grad_norm": 1.7192250490188599, "learning_rate": 5.8246834848018425e-05, "loss": 0.7443, "step": 17910 }, { "epoch": 0.8950154829687343, "grad_norm": 1.1626238822937012, "learning_rate": 5.82081427989185e-05, "loss": 0.8838, "step": 17920 }, { "epoch": 0.8955149335730697, "grad_norm": 1.4231460094451904, "learning_rate": 5.816944569774436e-05, "loss": 0.795, "step": 17930 }, { "epoch": 0.8960143841774049, "grad_norm": 1.108681082725525, "learning_rate": 5.81307435683139e-05, "loss": 0.9652, "step": 17940 }, { "epoch": 0.8965138347817401, "grad_norm": 1.17128324508667, "learning_rate": 5.8092036434448115e-05, "loss": 0.6767, "step": 17950 }, { "epoch": 0.8970132853860753, "grad_norm": 4.2591986656188965, "learning_rate": 5.805332431997106e-05, "loss": 1.0136, "step": 17960 }, { "epoch": 0.8975127359904106, "grad_norm": 2.6580145359039307, "learning_rate": 5.8014607248709843e-05, "loss": 1.0335, "step": 17970 }, { "epoch": 0.8980121865947458, "grad_norm": 3.448183059692383, "learning_rate": 5.7975885244494665e-05, "loss": 0.9011, "step": 17980 }, { "epoch": 0.898511637199081, "grad_norm": 3.073150634765625, "learning_rate": 5.7937158331158756e-05, "loss": 0.9683, "step": 17990 }, { "epoch": 0.8990110878034162, "grad_norm": 1.6009252071380615, "learning_rate": 5.789842653253832e-05, "loss": 0.8209, "step": 18000 }, { "epoch": 0.8995105384077515, "grad_norm": 2.7628567218780518, "learning_rate": 5.7859689872472614e-05, "loss": 0.6138, "step": 18010 }, { "epoch": 0.9000099890120867, "grad_norm": 1.1852062940597534, "learning_rate": 5.78209483748039e-05, "loss": 0.9531, "step": 18020 }, { "epoch": 0.9005094396164219, "grad_norm": 2.6659107208251953, "learning_rate": 5.7782202063377346e-05, "loss": 0.7996, "step": 18030 }, { "epoch": 0.9010088902207571, "grad_norm": 3.0761845111846924, "learning_rate": 5.774345096204117e-05, "loss": 0.9946, "step": 18040 }, { "epoch": 0.9015083408250923, "grad_norm": 2.088696241378784, "learning_rate": 5.7704695094646455e-05, "loss": 0.8477, "step": 18050 }, { "epoch": 0.9020077914294277, "grad_norm": 1.6839580535888672, "learning_rate": 5.766593448504729e-05, "loss": 0.7837, "step": 18060 }, { "epoch": 0.9025072420337629, "grad_norm": 2.36403751373291, "learning_rate": 5.762716915710065e-05, "loss": 0.739, "step": 18070 }, { "epoch": 0.9030066926380981, "grad_norm": 0.5672938823699951, "learning_rate": 5.758839913466642e-05, "loss": 0.7367, "step": 18080 }, { "epoch": 0.9035061432424333, "grad_norm": 2.3402769565582275, "learning_rate": 5.754962444160735e-05, "loss": 0.7342, "step": 18090 }, { "epoch": 0.9040055938467686, "grad_norm": 3.5708863735198975, "learning_rate": 5.75108451017891e-05, "loss": 0.8345, "step": 18100 }, { "epoch": 0.9045050444511038, "grad_norm": 3.8043088912963867, "learning_rate": 5.747206113908017e-05, "loss": 0.8163, "step": 18110 }, { "epoch": 0.905004495055439, "grad_norm": 0.3083461821079254, "learning_rate": 5.7433272577351936e-05, "loss": 0.8427, "step": 18120 }, { "epoch": 0.9055039456597742, "grad_norm": 1.7447819709777832, "learning_rate": 5.739447944047854e-05, "loss": 0.7814, "step": 18130 }, { "epoch": 0.9060033962641095, "grad_norm": 2.5935428142547607, "learning_rate": 5.7355681752337e-05, "loss": 0.7454, "step": 18140 }, { "epoch": 0.9065028468684447, "grad_norm": 1.5312762260437012, "learning_rate": 5.731687953680711e-05, "loss": 0.7288, "step": 18150 }, { "epoch": 0.9070022974727799, "grad_norm": 0.5429087281227112, "learning_rate": 5.727807281777147e-05, "loss": 0.7177, "step": 18160 }, { "epoch": 0.9075017480771151, "grad_norm": 3.9769444465637207, "learning_rate": 5.7239261619115415e-05, "loss": 0.8287, "step": 18170 }, { "epoch": 0.9080011986814503, "grad_norm": 1.3674291372299194, "learning_rate": 5.720044596472709e-05, "loss": 0.6996, "step": 18180 }, { "epoch": 0.9085006492857857, "grad_norm": 5.381434440612793, "learning_rate": 5.7161625878497294e-05, "loss": 0.8507, "step": 18190 }, { "epoch": 0.9090000998901209, "grad_norm": 4.628538131713867, "learning_rate": 5.712280138431969e-05, "loss": 0.7252, "step": 18200 }, { "epoch": 0.9094995504944561, "grad_norm": 2.7768666744232178, "learning_rate": 5.708397250609051e-05, "loss": 0.77, "step": 18210 }, { "epoch": 0.9099990010987913, "grad_norm": 2.563023090362549, "learning_rate": 5.70451392677088e-05, "loss": 0.6975, "step": 18220 }, { "epoch": 0.9104984517031266, "grad_norm": 1.5395959615707397, "learning_rate": 5.7006301693076204e-05, "loss": 0.7916, "step": 18230 }, { "epoch": 0.9109979023074618, "grad_norm": 2.5181050300598145, "learning_rate": 5.696745980609708e-05, "loss": 0.9363, "step": 18240 }, { "epoch": 0.911497352911797, "grad_norm": 1.535658597946167, "learning_rate": 5.692861363067843e-05, "loss": 0.7894, "step": 18250 }, { "epoch": 0.9119968035161322, "grad_norm": 1.0996131896972656, "learning_rate": 5.6889763190729916e-05, "loss": 0.8189, "step": 18260 }, { "epoch": 0.9124962541204675, "grad_norm": 1.8074034452438354, "learning_rate": 5.685090851016378e-05, "loss": 0.8101, "step": 18270 }, { "epoch": 0.9129957047248027, "grad_norm": 3.0158116817474365, "learning_rate": 5.68120496128949e-05, "loss": 0.7849, "step": 18280 }, { "epoch": 0.9134951553291379, "grad_norm": 3.0711352825164795, "learning_rate": 5.677318652284078e-05, "loss": 1.0275, "step": 18290 }, { "epoch": 0.9139946059334731, "grad_norm": 5.088053226470947, "learning_rate": 5.673431926392144e-05, "loss": 0.8505, "step": 18300 }, { "epoch": 0.9144940565378085, "grad_norm": 1.0952471494674683, "learning_rate": 5.6695447860059516e-05, "loss": 0.6712, "step": 18310 }, { "epoch": 0.9149935071421437, "grad_norm": 3.6260998249053955, "learning_rate": 5.665657233518018e-05, "loss": 0.7956, "step": 18320 }, { "epoch": 0.9154929577464789, "grad_norm": 0.613185465335846, "learning_rate": 5.661769271321114e-05, "loss": 0.8378, "step": 18330 }, { "epoch": 0.9159924083508141, "grad_norm": 0.9299529790878296, "learning_rate": 5.657880901808259e-05, "loss": 0.8693, "step": 18340 }, { "epoch": 0.9164918589551493, "grad_norm": 1.4949249029159546, "learning_rate": 5.6539921273727324e-05, "loss": 0.9464, "step": 18350 }, { "epoch": 0.9169913095594846, "grad_norm": 2.3860533237457275, "learning_rate": 5.650102950408051e-05, "loss": 0.7277, "step": 18360 }, { "epoch": 0.9174907601638198, "grad_norm": 2.837008237838745, "learning_rate": 5.646213373307992e-05, "loss": 0.8322, "step": 18370 }, { "epoch": 0.917990210768155, "grad_norm": 0.9468108415603638, "learning_rate": 5.6423233984665646e-05, "loss": 0.869, "step": 18380 }, { "epoch": 0.9184896613724902, "grad_norm": 0.9317816495895386, "learning_rate": 5.638433028278038e-05, "loss": 0.8345, "step": 18390 }, { "epoch": 0.9189891119768255, "grad_norm": 2.894892692565918, "learning_rate": 5.634542265136911e-05, "loss": 0.9068, "step": 18400 }, { "epoch": 0.9194885625811607, "grad_norm": 2.379563570022583, "learning_rate": 5.630651111437935e-05, "loss": 0.8521, "step": 18410 }, { "epoch": 0.919988013185496, "grad_norm": 1.6990435123443604, "learning_rate": 5.626759569576094e-05, "loss": 0.9663, "step": 18420 }, { "epoch": 0.9204874637898312, "grad_norm": 0.7252857685089111, "learning_rate": 5.622867641946617e-05, "loss": 0.8153, "step": 18430 }, { "epoch": 0.9209869143941665, "grad_norm": 1.4249210357666016, "learning_rate": 5.618975330944966e-05, "loss": 0.9578, "step": 18440 }, { "epoch": 0.9214863649985017, "grad_norm": 1.74830162525177, "learning_rate": 5.615082638966839e-05, "loss": 0.6519, "step": 18450 }, { "epoch": 0.9219858156028369, "grad_norm": 1.5790170431137085, "learning_rate": 5.6111895684081725e-05, "loss": 0.9353, "step": 18460 }, { "epoch": 0.9224852662071721, "grad_norm": 0.9895839691162109, "learning_rate": 5.607296121665133e-05, "loss": 0.7957, "step": 18470 }, { "epoch": 0.9229847168115073, "grad_norm": 1.759394645690918, "learning_rate": 5.60340230113412e-05, "loss": 0.884, "step": 18480 }, { "epoch": 0.9234841674158426, "grad_norm": 2.133049488067627, "learning_rate": 5.599508109211759e-05, "loss": 0.8668, "step": 18490 }, { "epoch": 0.9239836180201778, "grad_norm": 2.912149429321289, "learning_rate": 5.59561354829491e-05, "loss": 0.7475, "step": 18500 }, { "epoch": 0.924483068624513, "grad_norm": 2.350933313369751, "learning_rate": 5.591718620780657e-05, "loss": 0.9002, "step": 18510 }, { "epoch": 0.9249825192288482, "grad_norm": 4.567841529846191, "learning_rate": 5.587823329066308e-05, "loss": 0.7602, "step": 18520 }, { "epoch": 0.9254819698331835, "grad_norm": 5.905770301818848, "learning_rate": 5.583927675549401e-05, "loss": 0.8341, "step": 18530 }, { "epoch": 0.9259814204375187, "grad_norm": 1.3211944103240967, "learning_rate": 5.58003166262769e-05, "loss": 0.9097, "step": 18540 }, { "epoch": 0.926480871041854, "grad_norm": 1.3427737951278687, "learning_rate": 5.576135292699153e-05, "loss": 0.7227, "step": 18550 }, { "epoch": 0.9269803216461892, "grad_norm": 0.635680079460144, "learning_rate": 5.5722385681619894e-05, "loss": 0.7066, "step": 18560 }, { "epoch": 0.9274797722505245, "grad_norm": 1.1708614826202393, "learning_rate": 5.568341491414613e-05, "loss": 0.7015, "step": 18570 }, { "epoch": 0.9279792228548597, "grad_norm": 2.472395896911621, "learning_rate": 5.5644440648556595e-05, "loss": 0.8477, "step": 18580 }, { "epoch": 0.9284786734591949, "grad_norm": 2.3495097160339355, "learning_rate": 5.5605462908839746e-05, "loss": 0.7455, "step": 18590 }, { "epoch": 0.9289781240635301, "grad_norm": 1.7436636686325073, "learning_rate": 5.556648171898623e-05, "loss": 0.9545, "step": 18600 }, { "epoch": 0.9294775746678654, "grad_norm": 3.8654696941375732, "learning_rate": 5.552749710298875e-05, "loss": 0.8446, "step": 18610 }, { "epoch": 0.9299770252722006, "grad_norm": 2.026738166809082, "learning_rate": 5.548850908484219e-05, "loss": 0.8007, "step": 18620 }, { "epoch": 0.9304764758765358, "grad_norm": 1.5802265405654907, "learning_rate": 5.5449517688543486e-05, "loss": 0.7624, "step": 18630 }, { "epoch": 0.930975926480871, "grad_norm": 2.023195743560791, "learning_rate": 5.541052293809167e-05, "loss": 0.7756, "step": 18640 }, { "epoch": 0.9314753770852062, "grad_norm": 1.725743055343628, "learning_rate": 5.537152485748782e-05, "loss": 0.9886, "step": 18650 }, { "epoch": 0.9319748276895415, "grad_norm": 2.1248693466186523, "learning_rate": 5.5332523470735086e-05, "loss": 0.8428, "step": 18660 }, { "epoch": 0.9324742782938767, "grad_norm": 1.0478453636169434, "learning_rate": 5.529351880183863e-05, "loss": 0.6858, "step": 18670 }, { "epoch": 0.932973728898212, "grad_norm": 2.333965539932251, "learning_rate": 5.525451087480568e-05, "loss": 0.9799, "step": 18680 }, { "epoch": 0.9334731795025472, "grad_norm": 3.503141164779663, "learning_rate": 5.521549971364538e-05, "loss": 0.918, "step": 18690 }, { "epoch": 0.9339726301068825, "grad_norm": 1.8752412796020508, "learning_rate": 5.5176485342368964e-05, "loss": 0.7339, "step": 18700 }, { "epoch": 0.9344720807112177, "grad_norm": 2.4250271320343018, "learning_rate": 5.5137467784989595e-05, "loss": 0.8915, "step": 18710 }, { "epoch": 0.9349715313155529, "grad_norm": 1.8618446588516235, "learning_rate": 5.5098447065522386e-05, "loss": 0.8496, "step": 18720 }, { "epoch": 0.9354709819198881, "grad_norm": 1.5213110446929932, "learning_rate": 5.5059423207984407e-05, "loss": 0.8608, "step": 18730 }, { "epoch": 0.9359704325242234, "grad_norm": 2.1037065982818604, "learning_rate": 5.502039623639468e-05, "loss": 0.9171, "step": 18740 }, { "epoch": 0.9364698831285586, "grad_norm": 2.4831433296203613, "learning_rate": 5.498136617477413e-05, "loss": 0.821, "step": 18750 }, { "epoch": 0.9369693337328938, "grad_norm": 2.180809497833252, "learning_rate": 5.494233304714556e-05, "loss": 0.8884, "step": 18760 }, { "epoch": 0.937468784337229, "grad_norm": 3.107668399810791, "learning_rate": 5.4903296877533685e-05, "loss": 0.8365, "step": 18770 }, { "epoch": 0.9379682349415642, "grad_norm": 0.9787671566009521, "learning_rate": 5.4864257689965105e-05, "loss": 0.8015, "step": 18780 }, { "epoch": 0.9384676855458995, "grad_norm": 1.7403486967086792, "learning_rate": 5.482521550846823e-05, "loss": 0.9814, "step": 18790 }, { "epoch": 0.9389671361502347, "grad_norm": 1.2914844751358032, "learning_rate": 5.478617035707337e-05, "loss": 0.6709, "step": 18800 }, { "epoch": 0.93946658675457, "grad_norm": 1.861228346824646, "learning_rate": 5.4747122259812655e-05, "loss": 0.6572, "step": 18810 }, { "epoch": 0.9399660373589052, "grad_norm": 2.417015552520752, "learning_rate": 5.470807124071996e-05, "loss": 0.7708, "step": 18820 }, { "epoch": 0.9404654879632405, "grad_norm": 3.1048529148101807, "learning_rate": 5.466901732383103e-05, "loss": 0.8148, "step": 18830 }, { "epoch": 0.9409649385675757, "grad_norm": 1.4176899194717407, "learning_rate": 5.4629960533183375e-05, "loss": 0.9662, "step": 18840 }, { "epoch": 0.9414643891719109, "grad_norm": 1.4135115146636963, "learning_rate": 5.459090089281629e-05, "loss": 1.0568, "step": 18850 }, { "epoch": 0.9419638397762461, "grad_norm": 1.8020182847976685, "learning_rate": 5.455183842677076e-05, "loss": 0.8499, "step": 18860 }, { "epoch": 0.9424632903805814, "grad_norm": 1.875177025794983, "learning_rate": 5.451277315908959e-05, "loss": 0.6807, "step": 18870 }, { "epoch": 0.9429627409849166, "grad_norm": 1.8055870532989502, "learning_rate": 5.4473705113817254e-05, "loss": 1.0195, "step": 18880 }, { "epoch": 0.9434621915892518, "grad_norm": 3.1722288131713867, "learning_rate": 5.443463431499997e-05, "loss": 0.9666, "step": 18890 }, { "epoch": 0.943961642193587, "grad_norm": 2.5591423511505127, "learning_rate": 5.4395560786685606e-05, "loss": 0.7662, "step": 18900 }, { "epoch": 0.9444610927979222, "grad_norm": 2.3718013763427734, "learning_rate": 5.435648455292378e-05, "loss": 0.784, "step": 18910 }, { "epoch": 0.9449605434022575, "grad_norm": 2.845883846282959, "learning_rate": 5.431740563776568e-05, "loss": 0.9012, "step": 18920 }, { "epoch": 0.9454599940065928, "grad_norm": 0.9969069957733154, "learning_rate": 5.427832406526427e-05, "loss": 0.7442, "step": 18930 }, { "epoch": 0.945959444610928, "grad_norm": 2.8231468200683594, "learning_rate": 5.4239239859474026e-05, "loss": 0.9244, "step": 18940 }, { "epoch": 0.9464588952152632, "grad_norm": 2.8638861179351807, "learning_rate": 5.420015304445112e-05, "loss": 0.7635, "step": 18950 }, { "epoch": 0.9469583458195985, "grad_norm": 1.2569690942764282, "learning_rate": 5.4161063644253284e-05, "loss": 1.0894, "step": 18960 }, { "epoch": 0.9474577964239337, "grad_norm": 1.3123199939727783, "learning_rate": 5.4121971682939885e-05, "loss": 0.8007, "step": 18970 }, { "epoch": 0.9479572470282689, "grad_norm": 1.932054042816162, "learning_rate": 5.408287718457185e-05, "loss": 0.7395, "step": 18980 }, { "epoch": 0.9484566976326041, "grad_norm": 1.4530482292175293, "learning_rate": 5.4043780173211635e-05, "loss": 0.8638, "step": 18990 }, { "epoch": 0.9489561482369394, "grad_norm": 2.344294786453247, "learning_rate": 5.400468067292329e-05, "loss": 0.9084, "step": 19000 }, { "epoch": 0.9494555988412746, "grad_norm": 2.032836437225342, "learning_rate": 5.396557870777239e-05, "loss": 1.0318, "step": 19010 }, { "epoch": 0.9499550494456098, "grad_norm": 6.245739936828613, "learning_rate": 5.3926474301826e-05, "loss": 0.7927, "step": 19020 }, { "epoch": 0.950454500049945, "grad_norm": 1.9433112144470215, "learning_rate": 5.3887367479152706e-05, "loss": 0.6354, "step": 19030 }, { "epoch": 0.9509539506542803, "grad_norm": 1.9389113187789917, "learning_rate": 5.384825826382257e-05, "loss": 0.8145, "step": 19040 }, { "epoch": 0.9514534012586155, "grad_norm": 1.4428133964538574, "learning_rate": 5.380914667990714e-05, "loss": 0.7581, "step": 19050 }, { "epoch": 0.9519528518629508, "grad_norm": 2.562809944152832, "learning_rate": 5.377003275147943e-05, "loss": 0.7327, "step": 19060 }, { "epoch": 0.952452302467286, "grad_norm": 1.8769131898880005, "learning_rate": 5.373091650261385e-05, "loss": 0.7855, "step": 19070 }, { "epoch": 0.9529517530716212, "grad_norm": 1.8019176721572876, "learning_rate": 5.3691797957386316e-05, "loss": 0.8249, "step": 19080 }, { "epoch": 0.9534512036759565, "grad_norm": 2.966636896133423, "learning_rate": 5.365267713987407e-05, "loss": 0.8536, "step": 19090 }, { "epoch": 0.9539506542802917, "grad_norm": 5.881937503814697, "learning_rate": 5.3613554074155815e-05, "loss": 0.9598, "step": 19100 }, { "epoch": 0.9544501048846269, "grad_norm": 1.0772712230682373, "learning_rate": 5.3574428784311624e-05, "loss": 0.7772, "step": 19110 }, { "epoch": 0.9549495554889621, "grad_norm": 4.194241523742676, "learning_rate": 5.353530129442293e-05, "loss": 0.7927, "step": 19120 }, { "epoch": 0.9554490060932974, "grad_norm": 1.3591468334197998, "learning_rate": 5.349617162857251e-05, "loss": 0.6133, "step": 19130 }, { "epoch": 0.9559484566976326, "grad_norm": 2.7203056812286377, "learning_rate": 5.345703981084451e-05, "loss": 0.8675, "step": 19140 }, { "epoch": 0.9564479073019678, "grad_norm": 3.1273722648620605, "learning_rate": 5.341790586532438e-05, "loss": 0.7179, "step": 19150 }, { "epoch": 0.956947357906303, "grad_norm": 1.601722240447998, "learning_rate": 5.33787698160989e-05, "loss": 1.0063, "step": 19160 }, { "epoch": 0.9574468085106383, "grad_norm": 1.9342644214630127, "learning_rate": 5.3339631687256084e-05, "loss": 0.816, "step": 19170 }, { "epoch": 0.9579462591149736, "grad_norm": 2.065150737762451, "learning_rate": 5.330049150288531e-05, "loss": 0.7717, "step": 19180 }, { "epoch": 0.9584457097193088, "grad_norm": 1.3680709600448608, "learning_rate": 5.326134928707716e-05, "loss": 0.7374, "step": 19190 }, { "epoch": 0.958945160323644, "grad_norm": 3.846485137939453, "learning_rate": 5.322220506392352e-05, "loss": 0.8245, "step": 19200 }, { "epoch": 0.9594446109279792, "grad_norm": 2.7010490894317627, "learning_rate": 5.318305885751742e-05, "loss": 0.8996, "step": 19210 }, { "epoch": 0.9599440615323145, "grad_norm": 1.8771228790283203, "learning_rate": 5.3143910691953234e-05, "loss": 0.6841, "step": 19220 }, { "epoch": 0.9604435121366497, "grad_norm": 1.0770602226257324, "learning_rate": 5.310476059132645e-05, "loss": 0.8859, "step": 19230 }, { "epoch": 0.9609429627409849, "grad_norm": 1.264762282371521, "learning_rate": 5.3065608579733775e-05, "loss": 0.78, "step": 19240 }, { "epoch": 0.9614424133453201, "grad_norm": 0.8207452893257141, "learning_rate": 5.30264546812731e-05, "loss": 0.8977, "step": 19250 }, { "epoch": 0.9619418639496554, "grad_norm": 1.057423710823059, "learning_rate": 5.2987298920043435e-05, "loss": 0.7647, "step": 19260 }, { "epoch": 0.9624413145539906, "grad_norm": 2.5369441509246826, "learning_rate": 5.294814132014503e-05, "loss": 0.7623, "step": 19270 }, { "epoch": 0.9629407651583258, "grad_norm": 1.0441887378692627, "learning_rate": 5.290898190567917e-05, "loss": 0.9578, "step": 19280 }, { "epoch": 0.963440215762661, "grad_norm": 3.11104679107666, "learning_rate": 5.28698207007483e-05, "loss": 1.0426, "step": 19290 }, { "epoch": 0.9639396663669964, "grad_norm": 2.6482746601104736, "learning_rate": 5.283065772945594e-05, "loss": 0.8509, "step": 19300 }, { "epoch": 0.9644391169713316, "grad_norm": 1.1781560182571411, "learning_rate": 5.279149301590679e-05, "loss": 0.792, "step": 19310 }, { "epoch": 0.9649385675756668, "grad_norm": 1.125461220741272, "learning_rate": 5.275232658420648e-05, "loss": 0.6797, "step": 19320 }, { "epoch": 0.965438018180002, "grad_norm": 0.8164302110671997, "learning_rate": 5.271315845846181e-05, "loss": 0.8455, "step": 19330 }, { "epoch": 0.9659374687843373, "grad_norm": 4.994171142578125, "learning_rate": 5.267398866278054e-05, "loss": 0.9375, "step": 19340 }, { "epoch": 0.9664369193886725, "grad_norm": 2.1752095222473145, "learning_rate": 5.2634817221271534e-05, "loss": 0.898, "step": 19350 }, { "epoch": 0.9669363699930077, "grad_norm": 1.8746306896209717, "learning_rate": 5.2595644158044634e-05, "loss": 0.8818, "step": 19360 }, { "epoch": 0.9674358205973429, "grad_norm": 1.6423358917236328, "learning_rate": 5.2556469497210684e-05, "loss": 0.983, "step": 19370 }, { "epoch": 0.9679352712016781, "grad_norm": 2.424043655395508, "learning_rate": 5.251729326288147e-05, "loss": 1.0658, "step": 19380 }, { "epoch": 0.9684347218060134, "grad_norm": 1.116323709487915, "learning_rate": 5.247811547916982e-05, "loss": 0.7563, "step": 19390 }, { "epoch": 0.9689341724103486, "grad_norm": 2.169189214706421, "learning_rate": 5.243893617018945e-05, "loss": 0.7063, "step": 19400 }, { "epoch": 0.9694336230146838, "grad_norm": 1.8824118375778198, "learning_rate": 5.23997553600551e-05, "loss": 0.9368, "step": 19410 }, { "epoch": 0.969933073619019, "grad_norm": 2.2264509201049805, "learning_rate": 5.2360573072882334e-05, "loss": 0.7755, "step": 19420 }, { "epoch": 0.9704325242233544, "grad_norm": 6.045483112335205, "learning_rate": 5.2321389332787664e-05, "loss": 1.132, "step": 19430 }, { "epoch": 0.9709319748276896, "grad_norm": 1.84587824344635, "learning_rate": 5.228220416388854e-05, "loss": 0.8128, "step": 19440 }, { "epoch": 0.9714314254320248, "grad_norm": 1.2925611734390259, "learning_rate": 5.224301759030321e-05, "loss": 0.8222, "step": 19450 }, { "epoch": 0.97193087603636, "grad_norm": 3.026948928833008, "learning_rate": 5.220382963615086e-05, "loss": 1.1625, "step": 19460 }, { "epoch": 0.9724303266406953, "grad_norm": 4.627734661102295, "learning_rate": 5.2164640325551484e-05, "loss": 0.771, "step": 19470 }, { "epoch": 0.9729297772450305, "grad_norm": 2.621882200241089, "learning_rate": 5.212544968262594e-05, "loss": 0.8294, "step": 19480 }, { "epoch": 0.9734292278493657, "grad_norm": 3.361600875854492, "learning_rate": 5.2086257731495856e-05, "loss": 0.9133, "step": 19490 }, { "epoch": 0.9739286784537009, "grad_norm": 2.0608766078948975, "learning_rate": 5.204706449628374e-05, "loss": 0.8347, "step": 19500 }, { "epoch": 0.9744281290580361, "grad_norm": 1.9795416593551636, "learning_rate": 5.20078700011128e-05, "loss": 0.7975, "step": 19510 }, { "epoch": 0.9749275796623714, "grad_norm": 2.003333806991577, "learning_rate": 5.196867427010711e-05, "loss": 0.8248, "step": 19520 }, { "epoch": 0.9754270302667066, "grad_norm": 4.734315872192383, "learning_rate": 5.192947732739143e-05, "loss": 0.8322, "step": 19530 }, { "epoch": 0.9759264808710418, "grad_norm": 2.263054609298706, "learning_rate": 5.189027919709133e-05, "loss": 0.9079, "step": 19540 }, { "epoch": 0.976425931475377, "grad_norm": 1.3464040756225586, "learning_rate": 5.185107990333306e-05, "loss": 0.7659, "step": 19550 }, { "epoch": 0.9769253820797124, "grad_norm": 1.762438416481018, "learning_rate": 5.1811879470243595e-05, "loss": 0.8269, "step": 19560 }, { "epoch": 0.9774248326840476, "grad_norm": 1.8302335739135742, "learning_rate": 5.1772677921950643e-05, "loss": 0.8949, "step": 19570 }, { "epoch": 0.9779242832883828, "grad_norm": 2.3929293155670166, "learning_rate": 5.1733475282582565e-05, "loss": 0.8596, "step": 19580 }, { "epoch": 0.978423733892718, "grad_norm": 2.744567632675171, "learning_rate": 5.1694271576268415e-05, "loss": 1.0234, "step": 19590 }, { "epoch": 0.9789231844970533, "grad_norm": 1.7753416299819946, "learning_rate": 5.165506682713788e-05, "loss": 0.9801, "step": 19600 }, { "epoch": 0.9794226351013885, "grad_norm": 1.1553436517715454, "learning_rate": 5.161586105932131e-05, "loss": 0.9044, "step": 19610 }, { "epoch": 0.9799220857057237, "grad_norm": 4.240505695343018, "learning_rate": 5.15766542969497e-05, "loss": 1.082, "step": 19620 }, { "epoch": 0.9804215363100589, "grad_norm": 1.8582956790924072, "learning_rate": 5.153744656415459e-05, "loss": 0.9097, "step": 19630 }, { "epoch": 0.9809209869143942, "grad_norm": 1.5120314359664917, "learning_rate": 5.149823788506818e-05, "loss": 0.7637, "step": 19640 }, { "epoch": 0.9814204375187294, "grad_norm": 1.2795964479446411, "learning_rate": 5.145902828382323e-05, "loss": 0.7, "step": 19650 }, { "epoch": 0.9819198881230646, "grad_norm": 1.9886530637741089, "learning_rate": 5.141981778455308e-05, "loss": 0.8157, "step": 19660 }, { "epoch": 0.9824193387273998, "grad_norm": 1.139605164527893, "learning_rate": 5.1380606411391594e-05, "loss": 0.6963, "step": 19670 }, { "epoch": 0.982918789331735, "grad_norm": 3.01100754737854, "learning_rate": 5.134139418847321e-05, "loss": 0.6669, "step": 19680 }, { "epoch": 0.9834182399360704, "grad_norm": 0.967864990234375, "learning_rate": 5.130218113993285e-05, "loss": 0.8265, "step": 19690 }, { "epoch": 0.9839176905404056, "grad_norm": 1.3162167072296143, "learning_rate": 5.1262967289905974e-05, "loss": 0.8367, "step": 19700 }, { "epoch": 0.9844171411447408, "grad_norm": 0.8989589810371399, "learning_rate": 5.122375266252855e-05, "loss": 0.7617, "step": 19710 }, { "epoch": 0.984916591749076, "grad_norm": 2.836339235305786, "learning_rate": 5.118453728193696e-05, "loss": 0.8839, "step": 19720 }, { "epoch": 0.9854160423534113, "grad_norm": 2.4945757389068604, "learning_rate": 5.1145321172268115e-05, "loss": 0.8062, "step": 19730 }, { "epoch": 0.9859154929577465, "grad_norm": 3.778860330581665, "learning_rate": 5.110610435765934e-05, "loss": 0.9809, "step": 19740 }, { "epoch": 0.9864149435620817, "grad_norm": 2.4653830528259277, "learning_rate": 5.106688686224843e-05, "loss": 0.8808, "step": 19750 }, { "epoch": 0.9869143941664169, "grad_norm": 2.0902090072631836, "learning_rate": 5.102766871017355e-05, "loss": 0.8346, "step": 19760 }, { "epoch": 0.9874138447707522, "grad_norm": 3.120569944381714, "learning_rate": 5.0988449925573286e-05, "loss": 0.9277, "step": 19770 }, { "epoch": 0.9879132953750874, "grad_norm": 2.109555244445801, "learning_rate": 5.0949230532586635e-05, "loss": 0.7138, "step": 19780 }, { "epoch": 0.9884127459794226, "grad_norm": 1.553554892539978, "learning_rate": 5.0910010555352964e-05, "loss": 0.6749, "step": 19790 }, { "epoch": 0.9889121965837578, "grad_norm": 2.5567986965179443, "learning_rate": 5.087079001801196e-05, "loss": 0.7742, "step": 19800 }, { "epoch": 0.989411647188093, "grad_norm": 2.878876209259033, "learning_rate": 5.083156894470371e-05, "loss": 0.7715, "step": 19810 }, { "epoch": 0.9899110977924284, "grad_norm": 1.859236717224121, "learning_rate": 5.079234735956857e-05, "loss": 0.8226, "step": 19820 }, { "epoch": 0.9904105483967636, "grad_norm": 4.09383487701416, "learning_rate": 5.0753125286747285e-05, "loss": 0.8875, "step": 19830 }, { "epoch": 0.9909099990010988, "grad_norm": 1.2874923944473267, "learning_rate": 5.071390275038084e-05, "loss": 0.794, "step": 19840 }, { "epoch": 0.991409449605434, "grad_norm": 1.0898391008377075, "learning_rate": 5.067467977461053e-05, "loss": 0.8219, "step": 19850 }, { "epoch": 0.9919089002097693, "grad_norm": 1.6944223642349243, "learning_rate": 5.063545638357791e-05, "loss": 0.8407, "step": 19860 }, { "epoch": 0.9924083508141045, "grad_norm": 1.0680359601974487, "learning_rate": 5.059623260142481e-05, "loss": 0.8435, "step": 19870 }, { "epoch": 0.9929078014184397, "grad_norm": 2.1276001930236816, "learning_rate": 5.055700845229327e-05, "loss": 0.7866, "step": 19880 }, { "epoch": 0.9934072520227749, "grad_norm": 1.2395598888397217, "learning_rate": 5.0517783960325616e-05, "loss": 1.0635, "step": 19890 }, { "epoch": 0.9939067026271102, "grad_norm": 3.308525323867798, "learning_rate": 5.047855914966429e-05, "loss": 1.0118, "step": 19900 }, { "epoch": 0.9944061532314454, "grad_norm": 1.449928641319275, "learning_rate": 5.0439334044452e-05, "loss": 0.7627, "step": 19910 }, { "epoch": 0.9949056038357806, "grad_norm": 2.200239419937134, "learning_rate": 5.040010866883162e-05, "loss": 0.6629, "step": 19920 }, { "epoch": 0.9954050544401158, "grad_norm": 2.7821123600006104, "learning_rate": 5.036088304694622e-05, "loss": 0.7161, "step": 19930 }, { "epoch": 0.995904505044451, "grad_norm": 2.221611499786377, "learning_rate": 5.0321657202938935e-05, "loss": 0.8149, "step": 19940 }, { "epoch": 0.9964039556487864, "grad_norm": 0.8881192207336426, "learning_rate": 5.0282431160953116e-05, "loss": 0.8979, "step": 19950 }, { "epoch": 0.9969034062531216, "grad_norm": 1.5408036708831787, "learning_rate": 5.024320494513223e-05, "loss": 0.8557, "step": 19960 }, { "epoch": 0.9974028568574568, "grad_norm": 2.813300132751465, "learning_rate": 5.02039785796198e-05, "loss": 0.8963, "step": 19970 }, { "epoch": 0.997902307461792, "grad_norm": 1.3438774347305298, "learning_rate": 5.016475208855952e-05, "loss": 0.9493, "step": 19980 }, { "epoch": 0.9984017580661273, "grad_norm": 1.9065624475479126, "learning_rate": 5.012552549609505e-05, "loss": 0.8626, "step": 19990 }, { "epoch": 0.9989012086704625, "grad_norm": 2.3506124019622803, "learning_rate": 5.008629882637024e-05, "loss": 0.7836, "step": 20000 }, { "epoch": 0.9994006592747977, "grad_norm": 1.8984686136245728, "learning_rate": 5.004707210352888e-05, "loss": 1.0569, "step": 20010 }, { "epoch": 0.9999001098791329, "grad_norm": 1.8179553747177124, "learning_rate": 5.0007845351714875e-05, "loss": 0.9464, "step": 20020 }, { "epoch": 1.0003995604834681, "grad_norm": 1.3148683309555054, "learning_rate": 4.996861859507208e-05, "loss": 0.8838, "step": 20030 }, { "epoch": 1.0008990110878033, "grad_norm": 3.418382406234741, "learning_rate": 4.99293918577444e-05, "loss": 0.8809, "step": 20040 }, { "epoch": 1.0013984616921388, "grad_norm": 3.0771703720092773, "learning_rate": 4.9890165163875684e-05, "loss": 0.979, "step": 20050 }, { "epoch": 1.001897912296474, "grad_norm": 0.9666630625724792, "learning_rate": 4.985093853760983e-05, "loss": 0.6511, "step": 20060 }, { "epoch": 1.0023973629008092, "grad_norm": 1.4691716432571411, "learning_rate": 4.9811712003090616e-05, "loss": 0.7265, "step": 20070 }, { "epoch": 1.0028968135051444, "grad_norm": 1.7392897605895996, "learning_rate": 4.977248558446181e-05, "loss": 0.8647, "step": 20080 }, { "epoch": 1.0033962641094796, "grad_norm": 3.1086442470550537, "learning_rate": 4.973325930586707e-05, "loss": 0.8487, "step": 20090 }, { "epoch": 1.0038957147138148, "grad_norm": 2.4907405376434326, "learning_rate": 4.969403319145002e-05, "loss": 0.7273, "step": 20100 }, { "epoch": 1.00439516531815, "grad_norm": 1.672037959098816, "learning_rate": 4.9654807265354115e-05, "loss": 0.7771, "step": 20110 }, { "epoch": 1.0048946159224852, "grad_norm": 1.0317631959915161, "learning_rate": 4.9615581551722765e-05, "loss": 0.7185, "step": 20120 }, { "epoch": 1.0053940665268204, "grad_norm": 2.8986291885375977, "learning_rate": 4.95763560746992e-05, "loss": 0.8541, "step": 20130 }, { "epoch": 1.0058935171311558, "grad_norm": 1.1837666034698486, "learning_rate": 4.953713085842651e-05, "loss": 0.662, "step": 20140 }, { "epoch": 1.006392967735491, "grad_norm": 2.172070026397705, "learning_rate": 4.949790592704768e-05, "loss": 0.7526, "step": 20150 }, { "epoch": 1.0068924183398262, "grad_norm": 3.5338294506073, "learning_rate": 4.945868130470543e-05, "loss": 0.8894, "step": 20160 }, { "epoch": 1.0073918689441614, "grad_norm": 1.0544848442077637, "learning_rate": 4.941945701554236e-05, "loss": 0.9083, "step": 20170 }, { "epoch": 1.0078913195484966, "grad_norm": 3.303290843963623, "learning_rate": 4.938023308370083e-05, "loss": 0.7565, "step": 20180 }, { "epoch": 1.0083907701528319, "grad_norm": 2.699636936187744, "learning_rate": 4.9341009533322984e-05, "loss": 1.0665, "step": 20190 }, { "epoch": 1.008890220757167, "grad_norm": 2.069225788116455, "learning_rate": 4.9301786388550755e-05, "loss": 0.6884, "step": 20200 }, { "epoch": 1.0093896713615023, "grad_norm": 2.865107774734497, "learning_rate": 4.9262563673525794e-05, "loss": 0.9045, "step": 20210 }, { "epoch": 1.0098891219658377, "grad_norm": 1.146571397781372, "learning_rate": 4.922334141238949e-05, "loss": 0.9875, "step": 20220 }, { "epoch": 1.010388572570173, "grad_norm": 0.6638919115066528, "learning_rate": 4.9184119629283004e-05, "loss": 0.5905, "step": 20230 }, { "epoch": 1.010888023174508, "grad_norm": 1.5547287464141846, "learning_rate": 4.914489834834714e-05, "loss": 0.9609, "step": 20240 }, { "epoch": 1.0113874737788433, "grad_norm": 1.5631414651870728, "learning_rate": 4.910567759372241e-05, "loss": 0.6336, "step": 20250 }, { "epoch": 1.0118869243831785, "grad_norm": 1.0312044620513916, "learning_rate": 4.906645738954901e-05, "loss": 0.7479, "step": 20260 }, { "epoch": 1.0123863749875137, "grad_norm": 1.1923410892486572, "learning_rate": 4.9027237759966815e-05, "loss": 0.8033, "step": 20270 }, { "epoch": 1.012885825591849, "grad_norm": 3.822603225708008, "learning_rate": 4.89880187291153e-05, "loss": 0.8773, "step": 20280 }, { "epoch": 1.0133852761961841, "grad_norm": 1.42519211769104, "learning_rate": 4.894880032113362e-05, "loss": 0.8215, "step": 20290 }, { "epoch": 1.0138847268005193, "grad_norm": 2.6060791015625, "learning_rate": 4.890958256016051e-05, "loss": 0.9427, "step": 20300 }, { "epoch": 1.0143841774048548, "grad_norm": 3.0129456520080566, "learning_rate": 4.8870365470334305e-05, "loss": 0.9544, "step": 20310 }, { "epoch": 1.01488362800919, "grad_norm": 1.888476014137268, "learning_rate": 4.883114907579299e-05, "loss": 0.8689, "step": 20320 }, { "epoch": 1.0153830786135252, "grad_norm": 1.0088943243026733, "learning_rate": 4.879193340067406e-05, "loss": 0.715, "step": 20330 }, { "epoch": 1.0158825292178604, "grad_norm": 3.4786131381988525, "learning_rate": 4.875271846911456e-05, "loss": 0.7956, "step": 20340 }, { "epoch": 1.0163819798221956, "grad_norm": 2.087721824645996, "learning_rate": 4.8713504305251136e-05, "loss": 0.9573, "step": 20350 }, { "epoch": 1.0168814304265308, "grad_norm": 1.0645734071731567, "learning_rate": 4.8674290933219886e-05, "loss": 0.7742, "step": 20360 }, { "epoch": 1.017380881030866, "grad_norm": 1.1741522550582886, "learning_rate": 4.8635078377156495e-05, "loss": 0.6981, "step": 20370 }, { "epoch": 1.0178803316352012, "grad_norm": 1.568892240524292, "learning_rate": 4.859586666119608e-05, "loss": 0.9419, "step": 20380 }, { "epoch": 1.0183797822395364, "grad_norm": 1.593902826309204, "learning_rate": 4.85566558094733e-05, "loss": 0.7515, "step": 20390 }, { "epoch": 1.0188792328438718, "grad_norm": 1.9106167554855347, "learning_rate": 4.851744584612223e-05, "loss": 0.8485, "step": 20400 }, { "epoch": 1.019378683448207, "grad_norm": 1.1534074544906616, "learning_rate": 4.847823679527644e-05, "loss": 0.9878, "step": 20410 }, { "epoch": 1.0198781340525422, "grad_norm": 1.6465153694152832, "learning_rate": 4.843902868106893e-05, "loss": 0.7066, "step": 20420 }, { "epoch": 1.0203775846568774, "grad_norm": 4.3783650398254395, "learning_rate": 4.8399821527632084e-05, "loss": 0.7449, "step": 20430 }, { "epoch": 1.0208770352612127, "grad_norm": 3.2883825302124023, "learning_rate": 4.8360615359097755e-05, "loss": 0.9139, "step": 20440 }, { "epoch": 1.0213764858655479, "grad_norm": 1.0114625692367554, "learning_rate": 4.8321410199597136e-05, "loss": 0.8895, "step": 20450 }, { "epoch": 1.021875936469883, "grad_norm": 1.5111706256866455, "learning_rate": 4.828220607326083e-05, "loss": 0.6904, "step": 20460 }, { "epoch": 1.0223753870742183, "grad_norm": 5.521808624267578, "learning_rate": 4.824300300421881e-05, "loss": 0.714, "step": 20470 }, { "epoch": 1.0228748376785537, "grad_norm": 1.8357826471328735, "learning_rate": 4.8203801016600345e-05, "loss": 0.8305, "step": 20480 }, { "epoch": 1.023374288282889, "grad_norm": 2.0647709369659424, "learning_rate": 4.8164600134534124e-05, "loss": 0.8403, "step": 20490 }, { "epoch": 1.023873738887224, "grad_norm": 0.7539551854133606, "learning_rate": 4.8125400382148115e-05, "loss": 0.849, "step": 20500 }, { "epoch": 1.0243731894915593, "grad_norm": 3.9702467918395996, "learning_rate": 4.808620178356954e-05, "loss": 0.7566, "step": 20510 }, { "epoch": 1.0248726400958945, "grad_norm": 1.7215352058410645, "learning_rate": 4.8047004362924995e-05, "loss": 0.8311, "step": 20520 }, { "epoch": 1.0253720907002297, "grad_norm": 1.1995903253555298, "learning_rate": 4.800780814434029e-05, "loss": 0.8203, "step": 20530 }, { "epoch": 1.025871541304565, "grad_norm": 0.9854593276977539, "learning_rate": 4.7968613151940535e-05, "loss": 0.8736, "step": 20540 }, { "epoch": 1.0263709919089001, "grad_norm": 0.8661856651306152, "learning_rate": 4.7929419409850045e-05, "loss": 0.5844, "step": 20550 }, { "epoch": 1.0268704425132353, "grad_norm": 4.256387710571289, "learning_rate": 4.789022694219241e-05, "loss": 0.9475, "step": 20560 }, { "epoch": 1.0273698931175708, "grad_norm": 2.238889455795288, "learning_rate": 4.785103577309039e-05, "loss": 0.9999, "step": 20570 }, { "epoch": 1.027869343721906, "grad_norm": 1.0479822158813477, "learning_rate": 4.7811845926665996e-05, "loss": 0.6624, "step": 20580 }, { "epoch": 1.0283687943262412, "grad_norm": 2.819624185562134, "learning_rate": 4.777265742704039e-05, "loss": 0.7734, "step": 20590 }, { "epoch": 1.0288682449305764, "grad_norm": 1.1497563123703003, "learning_rate": 4.773347029833391e-05, "loss": 0.8966, "step": 20600 }, { "epoch": 1.0293676955349116, "grad_norm": 1.8355106115341187, "learning_rate": 4.7694284564666055e-05, "loss": 0.7147, "step": 20610 }, { "epoch": 1.0298671461392468, "grad_norm": 2.1063599586486816, "learning_rate": 4.765510025015548e-05, "loss": 0.9888, "step": 20620 }, { "epoch": 1.030366596743582, "grad_norm": 1.0438473224639893, "learning_rate": 4.761591737891992e-05, "loss": 0.9118, "step": 20630 }, { "epoch": 1.0308660473479172, "grad_norm": 2.065518617630005, "learning_rate": 4.757673597507627e-05, "loss": 0.7237, "step": 20640 }, { "epoch": 1.0313654979522524, "grad_norm": 3.6632440090179443, "learning_rate": 4.7537556062740486e-05, "loss": 0.8839, "step": 20650 }, { "epoch": 1.0318649485565878, "grad_norm": 3.2576892375946045, "learning_rate": 4.749837766602765e-05, "loss": 0.9002, "step": 20660 }, { "epoch": 1.032364399160923, "grad_norm": 1.220277190208435, "learning_rate": 4.745920080905188e-05, "loss": 0.7876, "step": 20670 }, { "epoch": 1.0328638497652582, "grad_norm": 1.7398476600646973, "learning_rate": 4.742002551592635e-05, "loss": 0.9147, "step": 20680 }, { "epoch": 1.0333633003695935, "grad_norm": 1.3966233730316162, "learning_rate": 4.738085181076327e-05, "loss": 0.7031, "step": 20690 }, { "epoch": 1.0338627509739287, "grad_norm": 1.890921950340271, "learning_rate": 4.734167971767387e-05, "loss": 1.003, "step": 20700 }, { "epoch": 1.0343622015782639, "grad_norm": 6.796555995941162, "learning_rate": 4.7302509260768405e-05, "loss": 0.858, "step": 20710 }, { "epoch": 1.034861652182599, "grad_norm": 1.2069848775863647, "learning_rate": 4.72633404641561e-05, "loss": 0.8649, "step": 20720 }, { "epoch": 1.0353611027869343, "grad_norm": 1.6276015043258667, "learning_rate": 4.7224173351945165e-05, "loss": 0.7418, "step": 20730 }, { "epoch": 1.0358605533912697, "grad_norm": 1.0076346397399902, "learning_rate": 4.718500794824278e-05, "loss": 0.8712, "step": 20740 }, { "epoch": 1.036360003995605, "grad_norm": 2.4030141830444336, "learning_rate": 4.71458442771551e-05, "loss": 0.8682, "step": 20750 }, { "epoch": 1.0368594545999401, "grad_norm": 3.834731340408325, "learning_rate": 4.7106682362787164e-05, "loss": 0.8571, "step": 20760 }, { "epoch": 1.0373589052042753, "grad_norm": 1.4459505081176758, "learning_rate": 4.706752222924295e-05, "loss": 1.0204, "step": 20770 }, { "epoch": 1.0378583558086105, "grad_norm": 4.835209369659424, "learning_rate": 4.7028363900625336e-05, "loss": 0.9897, "step": 20780 }, { "epoch": 1.0383578064129457, "grad_norm": 1.9089804887771606, "learning_rate": 4.6989207401036104e-05, "loss": 0.7256, "step": 20790 }, { "epoch": 1.038857257017281, "grad_norm": 1.5284123420715332, "learning_rate": 4.695005275457588e-05, "loss": 0.7095, "step": 20800 }, { "epoch": 1.0393567076216161, "grad_norm": 1.6941580772399902, "learning_rate": 4.6910899985344194e-05, "loss": 0.6809, "step": 20810 }, { "epoch": 1.0398561582259513, "grad_norm": 2.9180054664611816, "learning_rate": 4.687174911743938e-05, "loss": 0.8111, "step": 20820 }, { "epoch": 1.0403556088302868, "grad_norm": 2.200146436691284, "learning_rate": 4.68326001749586e-05, "loss": 0.7259, "step": 20830 }, { "epoch": 1.040855059434622, "grad_norm": 3.2036914825439453, "learning_rate": 4.679345318199791e-05, "loss": 0.7424, "step": 20840 }, { "epoch": 1.0413545100389572, "grad_norm": 2.4678800106048584, "learning_rate": 4.675430816265203e-05, "loss": 1.1705, "step": 20850 }, { "epoch": 1.0418539606432924, "grad_norm": 3.798678398132324, "learning_rate": 4.671516514101459e-05, "loss": 0.8046, "step": 20860 }, { "epoch": 1.0423534112476276, "grad_norm": 0.9682080745697021, "learning_rate": 4.667602414117793e-05, "loss": 0.9256, "step": 20870 }, { "epoch": 1.0428528618519628, "grad_norm": 3.2484371662139893, "learning_rate": 4.6636885187233136e-05, "loss": 0.7042, "step": 20880 }, { "epoch": 1.043352312456298, "grad_norm": 1.4906537532806396, "learning_rate": 4.659774830327009e-05, "loss": 0.8023, "step": 20890 }, { "epoch": 1.0438517630606332, "grad_norm": 1.853930115699768, "learning_rate": 4.6558613513377326e-05, "loss": 0.6735, "step": 20900 }, { "epoch": 1.0443512136649686, "grad_norm": 0.9832746982574463, "learning_rate": 4.651948084164213e-05, "loss": 0.7538, "step": 20910 }, { "epoch": 1.0448506642693038, "grad_norm": 1.3980239629745483, "learning_rate": 4.648035031215052e-05, "loss": 0.9877, "step": 20920 }, { "epoch": 1.045350114873639, "grad_norm": 4.709447383880615, "learning_rate": 4.644122194898712e-05, "loss": 0.7415, "step": 20930 }, { "epoch": 1.0458495654779743, "grad_norm": 3.0428555011749268, "learning_rate": 4.640209577623529e-05, "loss": 0.9322, "step": 20940 }, { "epoch": 1.0463490160823095, "grad_norm": 1.5283839702606201, "learning_rate": 4.636297181797698e-05, "loss": 0.8872, "step": 20950 }, { "epoch": 1.0468484666866447, "grad_norm": 2.7132692337036133, "learning_rate": 4.632385009829282e-05, "loss": 0.7143, "step": 20960 }, { "epoch": 1.0473479172909799, "grad_norm": 1.8737280368804932, "learning_rate": 4.6284730641262044e-05, "loss": 0.8883, "step": 20970 }, { "epoch": 1.047847367895315, "grad_norm": 3.156431198120117, "learning_rate": 4.62456134709625e-05, "loss": 1.0162, "step": 20980 }, { "epoch": 1.0483468184996503, "grad_norm": 1.4966521263122559, "learning_rate": 4.6206498611470625e-05, "loss": 0.9248, "step": 20990 }, { "epoch": 1.0488462691039857, "grad_norm": 2.4617843627929688, "learning_rate": 4.616738608686143e-05, "loss": 0.8857, "step": 21000 }, { "epoch": 1.049345719708321, "grad_norm": 1.136779546737671, "learning_rate": 4.6128275921208505e-05, "loss": 0.6961, "step": 21010 }, { "epoch": 1.0498451703126561, "grad_norm": 1.9916175603866577, "learning_rate": 4.6089168138583994e-05, "loss": 0.7659, "step": 21020 }, { "epoch": 1.0503446209169913, "grad_norm": 1.5092531442642212, "learning_rate": 4.605006276305852e-05, "loss": 0.887, "step": 21030 }, { "epoch": 1.0508440715213265, "grad_norm": 2.9537715911865234, "learning_rate": 4.601095981870129e-05, "loss": 0.7141, "step": 21040 }, { "epoch": 1.0513435221256617, "grad_norm": 1.9168869256973267, "learning_rate": 4.597185932957997e-05, "loss": 0.9075, "step": 21050 }, { "epoch": 1.051842972729997, "grad_norm": 1.1907002925872803, "learning_rate": 4.593276131976075e-05, "loss": 1.1503, "step": 21060 }, { "epoch": 1.0523424233343321, "grad_norm": 1.8155781030654907, "learning_rate": 4.589366581330826e-05, "loss": 0.7788, "step": 21070 }, { "epoch": 1.0528418739386676, "grad_norm": 1.92268705368042, "learning_rate": 4.5854572834285586e-05, "loss": 0.9962, "step": 21080 }, { "epoch": 1.0533413245430028, "grad_norm": 3.222135066986084, "learning_rate": 4.581548240675433e-05, "loss": 0.9354, "step": 21090 }, { "epoch": 1.053840775147338, "grad_norm": 4.756688594818115, "learning_rate": 4.577639455477443e-05, "loss": 0.7255, "step": 21100 }, { "epoch": 1.0543402257516732, "grad_norm": 2.4278318881988525, "learning_rate": 4.573730930240429e-05, "loss": 0.8412, "step": 21110 }, { "epoch": 1.0548396763560084, "grad_norm": 0.7765146493911743, "learning_rate": 4.56982266737007e-05, "loss": 1.0732, "step": 21120 }, { "epoch": 1.0553391269603436, "grad_norm": 1.1242207288742065, "learning_rate": 4.565914669271884e-05, "loss": 0.8613, "step": 21130 }, { "epoch": 1.0558385775646788, "grad_norm": 4.15506649017334, "learning_rate": 4.5620069383512275e-05, "loss": 0.7641, "step": 21140 }, { "epoch": 1.056338028169014, "grad_norm": 1.001942753791809, "learning_rate": 4.5580994770132886e-05, "loss": 0.6772, "step": 21150 }, { "epoch": 1.0568374787733492, "grad_norm": 1.7589328289031982, "learning_rate": 4.554192287663093e-05, "loss": 0.8932, "step": 21160 }, { "epoch": 1.0573369293776846, "grad_norm": 0.6992228031158447, "learning_rate": 4.5502853727054965e-05, "loss": 0.8226, "step": 21170 }, { "epoch": 1.0578363799820198, "grad_norm": 1.098769187927246, "learning_rate": 4.546378734545188e-05, "loss": 0.8497, "step": 21180 }, { "epoch": 1.058335830586355, "grad_norm": 1.690246343612671, "learning_rate": 4.5424723755866874e-05, "loss": 0.9684, "step": 21190 }, { "epoch": 1.0588352811906903, "grad_norm": 3.9650039672851562, "learning_rate": 4.538566298234339e-05, "loss": 0.848, "step": 21200 }, { "epoch": 1.0593347317950255, "grad_norm": 5.305043697357178, "learning_rate": 4.534660504892315e-05, "loss": 0.8502, "step": 21210 }, { "epoch": 1.0598341823993607, "grad_norm": 2.613572597503662, "learning_rate": 4.530754997964613e-05, "loss": 0.9342, "step": 21220 }, { "epoch": 1.0603336330036959, "grad_norm": 2.373366355895996, "learning_rate": 4.526849779855058e-05, "loss": 0.7232, "step": 21230 }, { "epoch": 1.060833083608031, "grad_norm": 1.9203239679336548, "learning_rate": 4.5229448529672886e-05, "loss": 0.8832, "step": 21240 }, { "epoch": 1.0613325342123665, "grad_norm": 4.835063934326172, "learning_rate": 4.519040219704771e-05, "loss": 0.6922, "step": 21250 }, { "epoch": 1.0618319848167017, "grad_norm": 1.0359176397323608, "learning_rate": 4.515135882470791e-05, "loss": 0.7786, "step": 21260 }, { "epoch": 1.062331435421037, "grad_norm": 1.2481149435043335, "learning_rate": 4.511231843668451e-05, "loss": 0.6857, "step": 21270 }, { "epoch": 1.0628308860253721, "grad_norm": 3.979484796524048, "learning_rate": 4.507328105700665e-05, "loss": 0.9468, "step": 21280 }, { "epoch": 1.0633303366297073, "grad_norm": 1.9539848566055298, "learning_rate": 4.50342467097017e-05, "loss": 0.6989, "step": 21290 }, { "epoch": 1.0638297872340425, "grad_norm": 2.3054099082946777, "learning_rate": 4.4995215418795085e-05, "loss": 0.9694, "step": 21300 }, { "epoch": 1.0643292378383777, "grad_norm": 2.1202197074890137, "learning_rate": 4.495618720831042e-05, "loss": 0.9991, "step": 21310 }, { "epoch": 1.064828688442713, "grad_norm": 3.8901000022888184, "learning_rate": 4.491716210226935e-05, "loss": 0.9782, "step": 21320 }, { "epoch": 1.0653281390470482, "grad_norm": 2.9180335998535156, "learning_rate": 4.487814012469169e-05, "loss": 0.6976, "step": 21330 }, { "epoch": 1.0658275896513836, "grad_norm": 2.9929444789886475, "learning_rate": 4.4839121299595235e-05, "loss": 0.8993, "step": 21340 }, { "epoch": 1.0663270402557188, "grad_norm": 1.6835776567459106, "learning_rate": 4.4800105650995945e-05, "loss": 0.7274, "step": 21350 }, { "epoch": 1.066826490860054, "grad_norm": 1.4402450323104858, "learning_rate": 4.476109320290776e-05, "loss": 0.8332, "step": 21360 }, { "epoch": 1.0673259414643892, "grad_norm": 2.380845069885254, "learning_rate": 4.472208397934264e-05, "loss": 0.8302, "step": 21370 }, { "epoch": 1.0678253920687244, "grad_norm": 1.4167990684509277, "learning_rate": 4.4683078004310624e-05, "loss": 0.7856, "step": 21380 }, { "epoch": 1.0683248426730596, "grad_norm": 1.6825275421142578, "learning_rate": 4.4644075301819665e-05, "loss": 0.8426, "step": 21390 }, { "epoch": 1.0688242932773948, "grad_norm": 1.3320188522338867, "learning_rate": 4.460507589587577e-05, "loss": 1.0464, "step": 21400 }, { "epoch": 1.06932374388173, "grad_norm": 1.1074612140655518, "learning_rate": 4.4566079810482896e-05, "loss": 0.8321, "step": 21410 }, { "epoch": 1.0698231944860654, "grad_norm": 4.844636917114258, "learning_rate": 4.452708706964295e-05, "loss": 0.7913, "step": 21420 }, { "epoch": 1.0703226450904006, "grad_norm": 1.8939027786254883, "learning_rate": 4.448809769735577e-05, "loss": 0.6461, "step": 21430 }, { "epoch": 1.0708220956947359, "grad_norm": 1.686738133430481, "learning_rate": 4.4449111717619165e-05, "loss": 0.727, "step": 21440 }, { "epoch": 1.071321546299071, "grad_norm": 1.9671411514282227, "learning_rate": 4.441012915442881e-05, "loss": 0.7398, "step": 21450 }, { "epoch": 1.0718209969034063, "grad_norm": 2.4284956455230713, "learning_rate": 4.437115003177831e-05, "loss": 1.0047, "step": 21460 }, { "epoch": 1.0723204475077415, "grad_norm": 0.8931595683097839, "learning_rate": 4.43321743736591e-05, "loss": 1.0206, "step": 21470 }, { "epoch": 1.0728198981120767, "grad_norm": 1.7112840414047241, "learning_rate": 4.429320220406056e-05, "loss": 1.003, "step": 21480 }, { "epoch": 1.0733193487164119, "grad_norm": 2.095308542251587, "learning_rate": 4.425423354696985e-05, "loss": 0.8374, "step": 21490 }, { "epoch": 1.073818799320747, "grad_norm": 2.3773016929626465, "learning_rate": 4.421526842637203e-05, "loss": 0.9291, "step": 21500 }, { "epoch": 1.0743182499250823, "grad_norm": 1.2903590202331543, "learning_rate": 4.41763068662499e-05, "loss": 0.756, "step": 21510 }, { "epoch": 1.0748177005294177, "grad_norm": 1.8999165296554565, "learning_rate": 4.413734889058417e-05, "loss": 0.7325, "step": 21520 }, { "epoch": 1.075317151133753, "grad_norm": 0.8447512984275818, "learning_rate": 4.409839452335328e-05, "loss": 0.7226, "step": 21530 }, { "epoch": 1.0758166017380881, "grad_norm": 1.1658834218978882, "learning_rate": 4.405944378853348e-05, "loss": 0.6476, "step": 21540 }, { "epoch": 1.0763160523424233, "grad_norm": 2.2168357372283936, "learning_rate": 4.402049671009874e-05, "loss": 0.8539, "step": 21550 }, { "epoch": 1.0768155029467585, "grad_norm": 2.6177611351013184, "learning_rate": 4.398155331202084e-05, "loss": 1.0979, "step": 21560 }, { "epoch": 1.0773149535510937, "grad_norm": 2.435091257095337, "learning_rate": 4.394261361826923e-05, "loss": 0.8169, "step": 21570 }, { "epoch": 1.077814404155429, "grad_norm": 1.081041693687439, "learning_rate": 4.390367765281113e-05, "loss": 0.826, "step": 21580 }, { "epoch": 1.0783138547597642, "grad_norm": 2.6045467853546143, "learning_rate": 4.3864745439611445e-05, "loss": 0.86, "step": 21590 }, { "epoch": 1.0788133053640996, "grad_norm": 2.4726202487945557, "learning_rate": 4.382581700263276e-05, "loss": 0.8407, "step": 21600 }, { "epoch": 1.0793127559684348, "grad_norm": 1.0080868005752563, "learning_rate": 4.378689236583538e-05, "loss": 0.6806, "step": 21610 }, { "epoch": 1.07981220657277, "grad_norm": 0.793289840221405, "learning_rate": 4.374797155317722e-05, "loss": 0.6821, "step": 21620 }, { "epoch": 1.0803116571771052, "grad_norm": 2.7318053245544434, "learning_rate": 4.370905458861386e-05, "loss": 0.7765, "step": 21630 }, { "epoch": 1.0808111077814404, "grad_norm": 1.3041576147079468, "learning_rate": 4.367014149609852e-05, "loss": 0.6706, "step": 21640 }, { "epoch": 1.0813105583857756, "grad_norm": 1.8340661525726318, "learning_rate": 4.363123229958204e-05, "loss": 0.701, "step": 21650 }, { "epoch": 1.0818100089901108, "grad_norm": 2.4773507118225098, "learning_rate": 4.359232702301282e-05, "loss": 0.8656, "step": 21660 }, { "epoch": 1.082309459594446, "grad_norm": 2.2554712295532227, "learning_rate": 4.3553425690336904e-05, "loss": 0.7446, "step": 21670 }, { "epoch": 1.0828089101987812, "grad_norm": 1.3553982973098755, "learning_rate": 4.351452832549786e-05, "loss": 0.6754, "step": 21680 }, { "epoch": 1.0833083608031167, "grad_norm": 1.8978179693222046, "learning_rate": 4.347563495243688e-05, "loss": 1.1477, "step": 21690 }, { "epoch": 1.0838078114074519, "grad_norm": 2.054039716720581, "learning_rate": 4.343674559509263e-05, "loss": 0.894, "step": 21700 }, { "epoch": 1.084307262011787, "grad_norm": 0.769442081451416, "learning_rate": 4.3397860277401336e-05, "loss": 0.9464, "step": 21710 }, { "epoch": 1.0848067126161223, "grad_norm": 3.470543146133423, "learning_rate": 4.335897902329672e-05, "loss": 1.0199, "step": 21720 }, { "epoch": 1.0853061632204575, "grad_norm": 2.183955669403076, "learning_rate": 4.3320101856710036e-05, "loss": 1.1479, "step": 21730 }, { "epoch": 1.0858056138247927, "grad_norm": 1.3213378190994263, "learning_rate": 4.328122880156998e-05, "loss": 0.7217, "step": 21740 }, { "epoch": 1.0863050644291279, "grad_norm": 2.4817240238189697, "learning_rate": 4.3242359881802754e-05, "loss": 0.8431, "step": 21750 }, { "epoch": 1.086804515033463, "grad_norm": 3.1147491931915283, "learning_rate": 4.3203495121331995e-05, "loss": 1.037, "step": 21760 }, { "epoch": 1.0873039656377985, "grad_norm": 3.9146809577941895, "learning_rate": 4.316463454407876e-05, "loss": 1.0937, "step": 21770 }, { "epoch": 1.0878034162421337, "grad_norm": 1.6914969682693481, "learning_rate": 4.312577817396162e-05, "loss": 0.9083, "step": 21780 }, { "epoch": 1.088302866846469, "grad_norm": 1.6754413843154907, "learning_rate": 4.308692603489642e-05, "loss": 0.8159, "step": 21790 }, { "epoch": 1.0888023174508041, "grad_norm": 1.836352825164795, "learning_rate": 4.304807815079652e-05, "loss": 0.7718, "step": 21800 }, { "epoch": 1.0893017680551393, "grad_norm": 1.4404078722000122, "learning_rate": 4.3009234545572615e-05, "loss": 0.8602, "step": 21810 }, { "epoch": 1.0898012186594745, "grad_norm": 1.9482877254486084, "learning_rate": 4.2970395243132734e-05, "loss": 0.7122, "step": 21820 }, { "epoch": 1.0903006692638098, "grad_norm": 4.805246829986572, "learning_rate": 4.293156026738232e-05, "loss": 0.8193, "step": 21830 }, { "epoch": 1.090800119868145, "grad_norm": 2.567323684692383, "learning_rate": 4.28927296422241e-05, "loss": 0.9152, "step": 21840 }, { "epoch": 1.0912995704724802, "grad_norm": 1.1770821809768677, "learning_rate": 4.285390339155815e-05, "loss": 0.6835, "step": 21850 }, { "epoch": 1.0917990210768156, "grad_norm": 1.427119493484497, "learning_rate": 4.281508153928184e-05, "loss": 0.6686, "step": 21860 }, { "epoch": 1.0922984716811508, "grad_norm": 1.1898581981658936, "learning_rate": 4.277626410928988e-05, "loss": 0.7239, "step": 21870 }, { "epoch": 1.092797922285486, "grad_norm": 0.605453372001648, "learning_rate": 4.273745112547419e-05, "loss": 0.6242, "step": 21880 }, { "epoch": 1.0932973728898212, "grad_norm": 2.172987937927246, "learning_rate": 4.2698642611723975e-05, "loss": 0.7557, "step": 21890 }, { "epoch": 1.0937968234941564, "grad_norm": 4.230695724487305, "learning_rate": 4.265983859192573e-05, "loss": 1.0447, "step": 21900 }, { "epoch": 1.0942962740984916, "grad_norm": 2.5373029708862305, "learning_rate": 4.262103908996312e-05, "loss": 0.9028, "step": 21910 }, { "epoch": 1.0947957247028268, "grad_norm": 1.8671683073043823, "learning_rate": 4.258224412971708e-05, "loss": 0.9087, "step": 21920 }, { "epoch": 1.095295175307162, "grad_norm": 4.17625617980957, "learning_rate": 4.254345373506571e-05, "loss": 0.6761, "step": 21930 }, { "epoch": 1.0957946259114975, "grad_norm": 0.9671834111213684, "learning_rate": 4.2504667929884326e-05, "loss": 0.6663, "step": 21940 }, { "epoch": 1.0962940765158327, "grad_norm": 2.4726083278656006, "learning_rate": 4.2465886738045426e-05, "loss": 0.7318, "step": 21950 }, { "epoch": 1.0967935271201679, "grad_norm": 2.255537986755371, "learning_rate": 4.242711018341865e-05, "loss": 0.7995, "step": 21960 }, { "epoch": 1.097292977724503, "grad_norm": 2.2027065753936768, "learning_rate": 4.238833828987079e-05, "loss": 0.8499, "step": 21970 }, { "epoch": 1.0977924283288383, "grad_norm": 1.4387292861938477, "learning_rate": 4.234957108126577e-05, "loss": 0.9985, "step": 21980 }, { "epoch": 1.0982918789331735, "grad_norm": 1.425628423690796, "learning_rate": 4.2310808581464615e-05, "loss": 1.018, "step": 21990 }, { "epoch": 1.0987913295375087, "grad_norm": 1.7563644647598267, "learning_rate": 4.227205081432548e-05, "loss": 0.9115, "step": 22000 }, { "epoch": 1.099290780141844, "grad_norm": 2.4520087242126465, "learning_rate": 4.2233297803703586e-05, "loss": 0.7245, "step": 22010 }, { "epoch": 1.099790230746179, "grad_norm": 0.8853928446769714, "learning_rate": 4.219454957345125e-05, "loss": 0.6746, "step": 22020 }, { "epoch": 1.1002896813505145, "grad_norm": 1.3165818452835083, "learning_rate": 4.215580614741778e-05, "loss": 0.7532, "step": 22030 }, { "epoch": 1.1007891319548497, "grad_norm": 1.3060871362686157, "learning_rate": 4.211706754944963e-05, "loss": 0.8213, "step": 22040 }, { "epoch": 1.101288582559185, "grad_norm": 1.1650493144989014, "learning_rate": 4.207833380339022e-05, "loss": 0.8142, "step": 22050 }, { "epoch": 1.1017880331635201, "grad_norm": 2.5564217567443848, "learning_rate": 4.2039604933079976e-05, "loss": 0.8618, "step": 22060 }, { "epoch": 1.1022874837678553, "grad_norm": 3.255312442779541, "learning_rate": 4.2000880962356346e-05, "loss": 0.7137, "step": 22070 }, { "epoch": 1.1027869343721906, "grad_norm": 1.4971452951431274, "learning_rate": 4.196216191505377e-05, "loss": 0.9192, "step": 22080 }, { "epoch": 1.1032863849765258, "grad_norm": 1.3835216760635376, "learning_rate": 4.192344781500362e-05, "loss": 0.8122, "step": 22090 }, { "epoch": 1.103785835580861, "grad_norm": 2.2991790771484375, "learning_rate": 4.188473868603427e-05, "loss": 0.764, "step": 22100 }, { "epoch": 1.1042852861851964, "grad_norm": 1.5846309661865234, "learning_rate": 4.184603455197098e-05, "loss": 0.792, "step": 22110 }, { "epoch": 1.1047847367895316, "grad_norm": 2.542344570159912, "learning_rate": 4.180733543663599e-05, "loss": 0.721, "step": 22120 }, { "epoch": 1.1052841873938668, "grad_norm": 5.463176250457764, "learning_rate": 4.1768641363848434e-05, "loss": 0.9708, "step": 22130 }, { "epoch": 1.105783637998202, "grad_norm": 2.457188129425049, "learning_rate": 4.1729952357424326e-05, "loss": 0.9367, "step": 22140 }, { "epoch": 1.1062830886025372, "grad_norm": 1.1759368181228638, "learning_rate": 4.169126844117658e-05, "loss": 0.7094, "step": 22150 }, { "epoch": 1.1067825392068724, "grad_norm": 1.2258940935134888, "learning_rate": 4.165258963891495e-05, "loss": 0.7657, "step": 22160 }, { "epoch": 1.1072819898112076, "grad_norm": 2.286513090133667, "learning_rate": 4.161391597444608e-05, "loss": 0.7684, "step": 22170 }, { "epoch": 1.1077814404155428, "grad_norm": 2.545077085494995, "learning_rate": 4.157524747157342e-05, "loss": 0.7825, "step": 22180 }, { "epoch": 1.108280891019878, "grad_norm": 2.474790573120117, "learning_rate": 4.153658415409727e-05, "loss": 0.6016, "step": 22190 }, { "epoch": 1.1087803416242135, "grad_norm": 1.7622703313827515, "learning_rate": 4.149792604581468e-05, "loss": 0.8443, "step": 22200 }, { "epoch": 1.1092797922285487, "grad_norm": 1.2006711959838867, "learning_rate": 4.145927317051961e-05, "loss": 0.901, "step": 22210 }, { "epoch": 1.1097792428328839, "grad_norm": 1.397416591644287, "learning_rate": 4.142062555200268e-05, "loss": 0.6977, "step": 22220 }, { "epoch": 1.110278693437219, "grad_norm": 1.9232399463653564, "learning_rate": 4.138198321405134e-05, "loss": 0.868, "step": 22230 }, { "epoch": 1.1107781440415543, "grad_norm": 4.904851913452148, "learning_rate": 4.134334618044976e-05, "loss": 0.8662, "step": 22240 }, { "epoch": 1.1112775946458895, "grad_norm": 2.4636669158935547, "learning_rate": 4.1304714474978864e-05, "loss": 0.7113, "step": 22250 }, { "epoch": 1.1117770452502247, "grad_norm": 1.4750014543533325, "learning_rate": 4.1266088121416286e-05, "loss": 0.7375, "step": 22260 }, { "epoch": 1.11227649585456, "grad_norm": 1.588144302368164, "learning_rate": 4.1227467143536375e-05, "loss": 0.7707, "step": 22270 }, { "epoch": 1.1127759464588953, "grad_norm": 3.0492358207702637, "learning_rate": 4.1188851565110154e-05, "loss": 0.7105, "step": 22280 }, { "epoch": 1.1132753970632305, "grad_norm": 1.858864665031433, "learning_rate": 4.115024140990532e-05, "loss": 1.1374, "step": 22290 }, { "epoch": 1.1137748476675657, "grad_norm": 3.0037105083465576, "learning_rate": 4.11116367016863e-05, "loss": 1.0394, "step": 22300 }, { "epoch": 1.114274298271901, "grad_norm": 2.810905694961548, "learning_rate": 4.107303746421407e-05, "loss": 0.9284, "step": 22310 }, { "epoch": 1.1147737488762361, "grad_norm": 1.5444875955581665, "learning_rate": 4.10344437212463e-05, "loss": 0.8357, "step": 22320 }, { "epoch": 1.1152731994805714, "grad_norm": 2.53558611869812, "learning_rate": 4.099585549653724e-05, "loss": 1.0276, "step": 22330 }, { "epoch": 1.1157726500849066, "grad_norm": 3.1491739749908447, "learning_rate": 4.095727281383779e-05, "loss": 0.8039, "step": 22340 }, { "epoch": 1.1162721006892418, "grad_norm": 5.887537956237793, "learning_rate": 4.091869569689542e-05, "loss": 0.9011, "step": 22350 }, { "epoch": 1.116771551293577, "grad_norm": 1.7766972780227661, "learning_rate": 4.088012416945414e-05, "loss": 0.8803, "step": 22360 }, { "epoch": 1.1172710018979124, "grad_norm": 1.5830647945404053, "learning_rate": 4.0841558255254555e-05, "loss": 0.882, "step": 22370 }, { "epoch": 1.1177704525022476, "grad_norm": 1.3810070753097534, "learning_rate": 4.080299797803383e-05, "loss": 0.5441, "step": 22380 }, { "epoch": 1.1182699031065828, "grad_norm": 2.6069095134735107, "learning_rate": 4.076444336152562e-05, "loss": 0.9322, "step": 22390 }, { "epoch": 1.118769353710918, "grad_norm": 3.663212299346924, "learning_rate": 4.0725894429460124e-05, "loss": 0.8004, "step": 22400 }, { "epoch": 1.1192688043152532, "grad_norm": 1.2883365154266357, "learning_rate": 4.068735120556402e-05, "loss": 0.9513, "step": 22410 }, { "epoch": 1.1197682549195884, "grad_norm": 1.2880377769470215, "learning_rate": 4.06488137135605e-05, "loss": 1.0123, "step": 22420 }, { "epoch": 1.1202677055239236, "grad_norm": 1.5551667213439941, "learning_rate": 4.061028197716919e-05, "loss": 0.7889, "step": 22430 }, { "epoch": 1.1207671561282588, "grad_norm": 1.2151408195495605, "learning_rate": 4.057175602010624e-05, "loss": 0.7681, "step": 22440 }, { "epoch": 1.1212666067325943, "grad_norm": 3.719527244567871, "learning_rate": 4.053323586608415e-05, "loss": 0.8907, "step": 22450 }, { "epoch": 1.1217660573369295, "grad_norm": 1.5962892770767212, "learning_rate": 4.0494721538811916e-05, "loss": 1.0818, "step": 22460 }, { "epoch": 1.1222655079412647, "grad_norm": 2.084139823913574, "learning_rate": 4.0456213061994934e-05, "loss": 0.5582, "step": 22470 }, { "epoch": 1.1227649585455999, "grad_norm": 2.4208035469055176, "learning_rate": 4.0417710459335015e-05, "loss": 0.6898, "step": 22480 }, { "epoch": 1.123264409149935, "grad_norm": 2.1943869590759277, "learning_rate": 4.03792137545303e-05, "loss": 0.9084, "step": 22490 }, { "epoch": 1.1237638597542703, "grad_norm": 3.631171464920044, "learning_rate": 4.034072297127536e-05, "loss": 0.8411, "step": 22500 }, { "epoch": 1.1242633103586055, "grad_norm": 0.9454237222671509, "learning_rate": 4.030223813326107e-05, "loss": 0.7007, "step": 22510 }, { "epoch": 1.1247627609629407, "grad_norm": 1.5163288116455078, "learning_rate": 4.02637592641747e-05, "loss": 0.7863, "step": 22520 }, { "epoch": 1.125262211567276, "grad_norm": 2.3316762447357178, "learning_rate": 4.0225286387699785e-05, "loss": 0.9502, "step": 22530 }, { "epoch": 1.125761662171611, "grad_norm": 1.7603729963302612, "learning_rate": 4.0186819527516214e-05, "loss": 0.8574, "step": 22540 }, { "epoch": 1.1262611127759465, "grad_norm": 2.4625086784362793, "learning_rate": 4.014835870730019e-05, "loss": 0.9053, "step": 22550 }, { "epoch": 1.1267605633802817, "grad_norm": 2.543297290802002, "learning_rate": 4.0109903950724134e-05, "loss": 0.8014, "step": 22560 }, { "epoch": 1.127260013984617, "grad_norm": 1.9582078456878662, "learning_rate": 4.007145528145681e-05, "loss": 0.8648, "step": 22570 }, { "epoch": 1.1277594645889522, "grad_norm": 4.234289646148682, "learning_rate": 4.003301272316316e-05, "loss": 0.8282, "step": 22580 }, { "epoch": 1.1282589151932874, "grad_norm": 2.0867433547973633, "learning_rate": 3.9994576299504425e-05, "loss": 0.7685, "step": 22590 }, { "epoch": 1.1287583657976226, "grad_norm": 2.516019344329834, "learning_rate": 3.995614603413804e-05, "loss": 0.6542, "step": 22600 }, { "epoch": 1.1292578164019578, "grad_norm": 1.555553674697876, "learning_rate": 3.9917721950717644e-05, "loss": 0.9156, "step": 22610 }, { "epoch": 1.1297572670062932, "grad_norm": 1.5469837188720703, "learning_rate": 3.987930407289312e-05, "loss": 0.7304, "step": 22620 }, { "epoch": 1.1302567176106284, "grad_norm": 2.492070436477661, "learning_rate": 3.9840892424310447e-05, "loss": 1.0429, "step": 22630 }, { "epoch": 1.1307561682149636, "grad_norm": 2.70359468460083, "learning_rate": 3.980248702861186e-05, "loss": 0.825, "step": 22640 }, { "epoch": 1.1312556188192988, "grad_norm": 2.0422637462615967, "learning_rate": 3.9764087909435686e-05, "loss": 0.7127, "step": 22650 }, { "epoch": 1.131755069423634, "grad_norm": 2.4656219482421875, "learning_rate": 3.97256950904164e-05, "loss": 0.9567, "step": 22660 }, { "epoch": 1.1322545200279692, "grad_norm": 2.2993409633636475, "learning_rate": 3.968730859518464e-05, "loss": 0.7306, "step": 22670 }, { "epoch": 1.1327539706323044, "grad_norm": 0.9881686568260193, "learning_rate": 3.964892844736707e-05, "loss": 0.6873, "step": 22680 }, { "epoch": 1.1332534212366396, "grad_norm": 3.301509141921997, "learning_rate": 3.961055467058652e-05, "loss": 0.8113, "step": 22690 }, { "epoch": 1.1337528718409748, "grad_norm": 1.5210189819335938, "learning_rate": 3.957218728846187e-05, "loss": 0.8103, "step": 22700 }, { "epoch": 1.13425232244531, "grad_norm": 2.3673174381256104, "learning_rate": 3.953382632460806e-05, "loss": 0.9072, "step": 22710 }, { "epoch": 1.1347517730496455, "grad_norm": 1.4789212942123413, "learning_rate": 3.94954718026361e-05, "loss": 0.8392, "step": 22720 }, { "epoch": 1.1352512236539807, "grad_norm": 4.791720390319824, "learning_rate": 3.9457123746153026e-05, "loss": 0.8257, "step": 22730 }, { "epoch": 1.1357506742583159, "grad_norm": 2.2029247283935547, "learning_rate": 3.941878217876187e-05, "loss": 0.9206, "step": 22740 }, { "epoch": 1.136250124862651, "grad_norm": 3.179882049560547, "learning_rate": 3.938044712406171e-05, "loss": 0.7342, "step": 22750 }, { "epoch": 1.1367495754669863, "grad_norm": 1.4464020729064941, "learning_rate": 3.934211860564759e-05, "loss": 0.6641, "step": 22760 }, { "epoch": 1.1372490260713215, "grad_norm": 1.3858355283737183, "learning_rate": 3.930379664711054e-05, "loss": 1.0155, "step": 22770 }, { "epoch": 1.1377484766756567, "grad_norm": 1.6937365531921387, "learning_rate": 3.926548127203753e-05, "loss": 0.9355, "step": 22780 }, { "epoch": 1.1382479272799921, "grad_norm": 2.14349627494812, "learning_rate": 3.922717250401153e-05, "loss": 0.6883, "step": 22790 }, { "epoch": 1.1387473778843273, "grad_norm": 2.1082565784454346, "learning_rate": 3.918887036661137e-05, "loss": 0.9609, "step": 22800 }, { "epoch": 1.1392468284886625, "grad_norm": 2.299395799636841, "learning_rate": 3.9150574883411874e-05, "loss": 0.9142, "step": 22810 }, { "epoch": 1.1397462790929977, "grad_norm": 1.4147734642028809, "learning_rate": 3.911228607798373e-05, "loss": 0.8741, "step": 22820 }, { "epoch": 1.140245729697333, "grad_norm": 1.0008089542388916, "learning_rate": 3.907400397389351e-05, "loss": 0.8697, "step": 22830 }, { "epoch": 1.1407451803016682, "grad_norm": 2.649331569671631, "learning_rate": 3.9035728594703695e-05, "loss": 0.8416, "step": 22840 }, { "epoch": 1.1412446309060034, "grad_norm": 1.722180724143982, "learning_rate": 3.8997459963972576e-05, "loss": 0.8263, "step": 22850 }, { "epoch": 1.1417440815103386, "grad_norm": 1.484548568725586, "learning_rate": 3.8959198105254346e-05, "loss": 0.9039, "step": 22860 }, { "epoch": 1.1422435321146738, "grad_norm": 3.1194725036621094, "learning_rate": 3.892094304209898e-05, "loss": 1.1315, "step": 22870 }, { "epoch": 1.142742982719009, "grad_norm": 3.557985544204712, "learning_rate": 3.888269479805231e-05, "loss": 0.8009, "step": 22880 }, { "epoch": 1.1432424333233444, "grad_norm": 1.5057775974273682, "learning_rate": 3.8844453396655936e-05, "loss": 0.7724, "step": 22890 }, { "epoch": 1.1437418839276796, "grad_norm": 2.3584694862365723, "learning_rate": 3.88062188614473e-05, "loss": 0.876, "step": 22900 }, { "epoch": 1.1442413345320148, "grad_norm": 1.5394564867019653, "learning_rate": 3.876799121595954e-05, "loss": 0.8241, "step": 22910 }, { "epoch": 1.14474078513635, "grad_norm": 3.491891384124756, "learning_rate": 3.8729770483721645e-05, "loss": 0.8303, "step": 22920 }, { "epoch": 1.1452402357406852, "grad_norm": 5.012977600097656, "learning_rate": 3.8691556688258255e-05, "loss": 0.8466, "step": 22930 }, { "epoch": 1.1457396863450204, "grad_norm": 1.206251621246338, "learning_rate": 3.8653349853089814e-05, "loss": 0.8151, "step": 22940 }, { "epoch": 1.1462391369493556, "grad_norm": 3.0752151012420654, "learning_rate": 3.861515000173244e-05, "loss": 0.808, "step": 22950 }, { "epoch": 1.1467385875536908, "grad_norm": 1.3112505674362183, "learning_rate": 3.857695715769797e-05, "loss": 0.8653, "step": 22960 }, { "epoch": 1.1472380381580263, "grad_norm": 2.9006643295288086, "learning_rate": 3.853877134449391e-05, "loss": 0.9092, "step": 22970 }, { "epoch": 1.1477374887623615, "grad_norm": 1.3856655359268188, "learning_rate": 3.8500592585623476e-05, "loss": 0.7803, "step": 22980 }, { "epoch": 1.1482369393666967, "grad_norm": 1.8607912063598633, "learning_rate": 3.84624209045855e-05, "loss": 0.7026, "step": 22990 }, { "epoch": 1.148736389971032, "grad_norm": 2.954310894012451, "learning_rate": 3.842425632487451e-05, "loss": 0.9582, "step": 23000 }, { "epoch": 1.149235840575367, "grad_norm": 3.082057237625122, "learning_rate": 3.8386098869980584e-05, "loss": 0.8102, "step": 23010 }, { "epoch": 1.1497352911797023, "grad_norm": 1.6299151182174683, "learning_rate": 3.83479485633895e-05, "loss": 0.688, "step": 23020 }, { "epoch": 1.1502347417840375, "grad_norm": 1.6488139629364014, "learning_rate": 3.830980542858256e-05, "loss": 0.7549, "step": 23030 }, { "epoch": 1.1507341923883727, "grad_norm": 2.522825002670288, "learning_rate": 3.827166948903672e-05, "loss": 0.9499, "step": 23040 }, { "epoch": 1.151233642992708, "grad_norm": 2.193681240081787, "learning_rate": 3.8233540768224455e-05, "loss": 0.8278, "step": 23050 }, { "epoch": 1.1517330935970433, "grad_norm": 2.95638370513916, "learning_rate": 3.8195419289613815e-05, "loss": 0.7737, "step": 23060 }, { "epoch": 1.1522325442013786, "grad_norm": 2.197012186050415, "learning_rate": 3.815730507666842e-05, "loss": 0.7072, "step": 23070 }, { "epoch": 1.1527319948057138, "grad_norm": 1.2980161905288696, "learning_rate": 3.811919815284737e-05, "loss": 0.8329, "step": 23080 }, { "epoch": 1.153231445410049, "grad_norm": 1.2120863199234009, "learning_rate": 3.808109854160532e-05, "loss": 0.9046, "step": 23090 }, { "epoch": 1.1537308960143842, "grad_norm": 1.540791630744934, "learning_rate": 3.804300626639239e-05, "loss": 0.7549, "step": 23100 }, { "epoch": 1.1542303466187194, "grad_norm": 1.3808330297470093, "learning_rate": 3.800492135065421e-05, "loss": 0.6932, "step": 23110 }, { "epoch": 1.1547297972230546, "grad_norm": 2.295161008834839, "learning_rate": 3.7966843817831854e-05, "loss": 0.8839, "step": 23120 }, { "epoch": 1.1552292478273898, "grad_norm": 1.6494848728179932, "learning_rate": 3.79287736913619e-05, "loss": 0.7687, "step": 23130 }, { "epoch": 1.1557286984317252, "grad_norm": 2.7643682956695557, "learning_rate": 3.789071099467628e-05, "loss": 0.907, "step": 23140 }, { "epoch": 1.1562281490360604, "grad_norm": 1.0402114391326904, "learning_rate": 3.785265575120247e-05, "loss": 0.8396, "step": 23150 }, { "epoch": 1.1567275996403956, "grad_norm": 2.017916202545166, "learning_rate": 3.781460798436328e-05, "loss": 0.8912, "step": 23160 }, { "epoch": 1.1572270502447308, "grad_norm": 2.552933692932129, "learning_rate": 3.7776567717576934e-05, "loss": 0.8215, "step": 23170 }, { "epoch": 1.157726500849066, "grad_norm": 1.3778395652770996, "learning_rate": 3.773853497425702e-05, "loss": 0.8303, "step": 23180 }, { "epoch": 1.1582259514534012, "grad_norm": 2.2042012214660645, "learning_rate": 3.7700509777812555e-05, "loss": 0.7876, "step": 23190 }, { "epoch": 1.1587254020577364, "grad_norm": 2.222207546234131, "learning_rate": 3.766249215164784e-05, "loss": 0.934, "step": 23200 }, { "epoch": 1.1592248526620716, "grad_norm": 4.275891304016113, "learning_rate": 3.762448211916256e-05, "loss": 1.1166, "step": 23210 }, { "epoch": 1.1597243032664069, "grad_norm": 1.9678081274032593, "learning_rate": 3.758647970375172e-05, "loss": 0.8445, "step": 23220 }, { "epoch": 1.1602237538707423, "grad_norm": 1.8879679441452026, "learning_rate": 3.7548484928805614e-05, "loss": 1.0431, "step": 23230 }, { "epoch": 1.1607232044750775, "grad_norm": 2.698927164077759, "learning_rate": 3.751049781770989e-05, "loss": 0.908, "step": 23240 }, { "epoch": 1.1612226550794127, "grad_norm": 3.3032877445220947, "learning_rate": 3.7472518393845404e-05, "loss": 0.8235, "step": 23250 }, { "epoch": 1.161722105683748, "grad_norm": 1.2432293891906738, "learning_rate": 3.743454668058833e-05, "loss": 0.8948, "step": 23260 }, { "epoch": 1.162221556288083, "grad_norm": 1.9784486293792725, "learning_rate": 3.7396582701310095e-05, "loss": 0.8382, "step": 23270 }, { "epoch": 1.1627210068924183, "grad_norm": 3.0758001804351807, "learning_rate": 3.735862647937734e-05, "loss": 1.0648, "step": 23280 }, { "epoch": 1.1632204574967535, "grad_norm": 3.8586246967315674, "learning_rate": 3.732067803815194e-05, "loss": 0.7155, "step": 23290 }, { "epoch": 1.1637199081010887, "grad_norm": 2.635354518890381, "learning_rate": 3.7282737400991e-05, "loss": 0.6676, "step": 23300 }, { "epoch": 1.1642193587054241, "grad_norm": 1.5208978652954102, "learning_rate": 3.7244804591246796e-05, "loss": 0.9599, "step": 23310 }, { "epoch": 1.1647188093097594, "grad_norm": 1.1427876949310303, "learning_rate": 3.7206879632266795e-05, "loss": 0.7575, "step": 23320 }, { "epoch": 1.1652182599140946, "grad_norm": 2.438267707824707, "learning_rate": 3.716896254739365e-05, "loss": 0.8802, "step": 23330 }, { "epoch": 1.1657177105184298, "grad_norm": 4.247088432312012, "learning_rate": 3.713105335996516e-05, "loss": 0.6147, "step": 23340 }, { "epoch": 1.166217161122765, "grad_norm": 3.7587497234344482, "learning_rate": 3.709315209331423e-05, "loss": 0.9525, "step": 23350 }, { "epoch": 1.1667166117271002, "grad_norm": 2.0425808429718018, "learning_rate": 3.705525877076894e-05, "loss": 0.8288, "step": 23360 }, { "epoch": 1.1672160623314354, "grad_norm": 2.57705020904541, "learning_rate": 3.701737341565244e-05, "loss": 0.8069, "step": 23370 }, { "epoch": 1.1677155129357706, "grad_norm": 1.1115586757659912, "learning_rate": 3.6979496051283e-05, "loss": 0.8589, "step": 23380 }, { "epoch": 1.1682149635401058, "grad_norm": 1.0861155986785889, "learning_rate": 3.694162670097395e-05, "loss": 0.9391, "step": 23390 }, { "epoch": 1.168714414144441, "grad_norm": 2.139554977416992, "learning_rate": 3.690376538803371e-05, "loss": 0.8089, "step": 23400 }, { "epoch": 1.1692138647487764, "grad_norm": 3.0387351512908936, "learning_rate": 3.686591213576574e-05, "loss": 0.9831, "step": 23410 }, { "epoch": 1.1697133153531116, "grad_norm": 1.431955337524414, "learning_rate": 3.682806696746858e-05, "loss": 0.9796, "step": 23420 }, { "epoch": 1.1702127659574468, "grad_norm": 0.9049012660980225, "learning_rate": 3.6790229906435705e-05, "loss": 1.0167, "step": 23430 }, { "epoch": 1.170712216561782, "grad_norm": 1.435412049293518, "learning_rate": 3.675240097595568e-05, "loss": 0.7465, "step": 23440 }, { "epoch": 1.1712116671661172, "grad_norm": 1.6066842079162598, "learning_rate": 3.6714580199312024e-05, "loss": 0.8472, "step": 23450 }, { "epoch": 1.1717111177704524, "grad_norm": 0.9418919086456299, "learning_rate": 3.667676759978327e-05, "loss": 0.8018, "step": 23460 }, { "epoch": 1.1722105683747877, "grad_norm": 2.083212375640869, "learning_rate": 3.663896320064288e-05, "loss": 1.165, "step": 23470 }, { "epoch": 1.172710018979123, "grad_norm": 1.097571849822998, "learning_rate": 3.6601167025159305e-05, "loss": 0.8453, "step": 23480 }, { "epoch": 1.1732094695834583, "grad_norm": 2.709289312362671, "learning_rate": 3.656337909659589e-05, "loss": 0.7878, "step": 23490 }, { "epoch": 1.1737089201877935, "grad_norm": 0.9390159249305725, "learning_rate": 3.6525599438210956e-05, "loss": 0.7294, "step": 23500 }, { "epoch": 1.1742083707921287, "grad_norm": 2.481174945831299, "learning_rate": 3.648782807325772e-05, "loss": 1.0097, "step": 23510 }, { "epoch": 1.174707821396464, "grad_norm": 0.7492309212684631, "learning_rate": 3.6450065024984256e-05, "loss": 0.674, "step": 23520 }, { "epoch": 1.175207272000799, "grad_norm": 1.9587234258651733, "learning_rate": 3.641231031663356e-05, "loss": 0.8633, "step": 23530 }, { "epoch": 1.1757067226051343, "grad_norm": 2.0799505710601807, "learning_rate": 3.637456397144349e-05, "loss": 0.8515, "step": 23540 }, { "epoch": 1.1762061732094695, "grad_norm": 0.8701553344726562, "learning_rate": 3.6336826012646715e-05, "loss": 0.8166, "step": 23550 }, { "epoch": 1.1767056238138047, "grad_norm": 1.1255234479904175, "learning_rate": 3.629909646347083e-05, "loss": 0.6838, "step": 23560 }, { "epoch": 1.17720507441814, "grad_norm": 1.754080891609192, "learning_rate": 3.626137534713813e-05, "loss": 0.8903, "step": 23570 }, { "epoch": 1.1777045250224754, "grad_norm": 1.688990592956543, "learning_rate": 3.622366268686585e-05, "loss": 0.7127, "step": 23580 }, { "epoch": 1.1782039756268106, "grad_norm": 4.060592174530029, "learning_rate": 3.618595850586594e-05, "loss": 0.9284, "step": 23590 }, { "epoch": 1.1787034262311458, "grad_norm": 2.38169264793396, "learning_rate": 3.6148262827345134e-05, "loss": 0.9753, "step": 23600 }, { "epoch": 1.179202876835481, "grad_norm": 2.5346882343292236, "learning_rate": 3.611057567450497e-05, "loss": 0.9816, "step": 23610 }, { "epoch": 1.1797023274398162, "grad_norm": 1.2993940114974976, "learning_rate": 3.607289707054171e-05, "loss": 0.754, "step": 23620 }, { "epoch": 1.1802017780441514, "grad_norm": 1.5754035711288452, "learning_rate": 3.603522703864636e-05, "loss": 0.709, "step": 23630 }, { "epoch": 1.1807012286484866, "grad_norm": 3.104963779449463, "learning_rate": 3.5997565602004645e-05, "loss": 0.7182, "step": 23640 }, { "epoch": 1.181200679252822, "grad_norm": 1.4492530822753906, "learning_rate": 3.5959912783797026e-05, "loss": 0.8378, "step": 23650 }, { "epoch": 1.1817001298571572, "grad_norm": 1.7293347120285034, "learning_rate": 3.5922268607198596e-05, "loss": 0.7682, "step": 23660 }, { "epoch": 1.1821995804614924, "grad_norm": 1.5840585231781006, "learning_rate": 3.588463309537923e-05, "loss": 0.6802, "step": 23670 }, { "epoch": 1.1826990310658276, "grad_norm": 1.595658779144287, "learning_rate": 3.584700627150338e-05, "loss": 0.9833, "step": 23680 }, { "epoch": 1.1831984816701628, "grad_norm": 1.6533112525939941, "learning_rate": 3.5809388158730204e-05, "loss": 0.9171, "step": 23690 }, { "epoch": 1.183697932274498, "grad_norm": 1.3337810039520264, "learning_rate": 3.577177878021345e-05, "loss": 1.0197, "step": 23700 }, { "epoch": 1.1841973828788332, "grad_norm": 2.351137399673462, "learning_rate": 3.5734178159101553e-05, "loss": 0.7535, "step": 23710 }, { "epoch": 1.1846968334831685, "grad_norm": 1.2320972681045532, "learning_rate": 3.5696586318537475e-05, "loss": 0.6952, "step": 23720 }, { "epoch": 1.1851962840875037, "grad_norm": 2.435931444168091, "learning_rate": 3.5659003281658865e-05, "loss": 0.7009, "step": 23730 }, { "epoch": 1.1856957346918389, "grad_norm": 2.2575273513793945, "learning_rate": 3.5621429071597876e-05, "loss": 0.7289, "step": 23740 }, { "epoch": 1.1861951852961743, "grad_norm": 2.661933660507202, "learning_rate": 3.558386371148128e-05, "loss": 0.7459, "step": 23750 }, { "epoch": 1.1866946359005095, "grad_norm": 0.818601667881012, "learning_rate": 3.55463072244304e-05, "loss": 1.043, "step": 23760 }, { "epoch": 1.1871940865048447, "grad_norm": 2.59973406791687, "learning_rate": 3.550875963356105e-05, "loss": 0.7153, "step": 23770 }, { "epoch": 1.18769353710918, "grad_norm": 1.3302559852600098, "learning_rate": 3.547122096198362e-05, "loss": 0.7872, "step": 23780 }, { "epoch": 1.1881929877135151, "grad_norm": 2.066756248474121, "learning_rate": 3.543369123280298e-05, "loss": 0.7201, "step": 23790 }, { "epoch": 1.1886924383178503, "grad_norm": 1.1501679420471191, "learning_rate": 3.539617046911851e-05, "loss": 0.9643, "step": 23800 }, { "epoch": 1.1891918889221855, "grad_norm": 1.878116488456726, "learning_rate": 3.5358658694024064e-05, "loss": 0.8447, "step": 23810 }, { "epoch": 1.1896913395265207, "grad_norm": 1.3904608488082886, "learning_rate": 3.5321155930607974e-05, "loss": 0.5751, "step": 23820 }, { "epoch": 1.1901907901308562, "grad_norm": 2.7343826293945312, "learning_rate": 3.5283662201952995e-05, "loss": 0.919, "step": 23830 }, { "epoch": 1.1906902407351914, "grad_norm": 1.696589708328247, "learning_rate": 3.524617753113639e-05, "loss": 0.6473, "step": 23840 }, { "epoch": 1.1911896913395266, "grad_norm": 1.5298988819122314, "learning_rate": 3.5208701941229763e-05, "loss": 0.6168, "step": 23850 }, { "epoch": 1.1916891419438618, "grad_norm": 1.5565485954284668, "learning_rate": 3.51712354552992e-05, "loss": 0.8951, "step": 23860 }, { "epoch": 1.192188592548197, "grad_norm": 2.0042617321014404, "learning_rate": 3.5133778096405115e-05, "loss": 0.7911, "step": 23870 }, { "epoch": 1.1926880431525322, "grad_norm": 1.1127945184707642, "learning_rate": 3.509632988760237e-05, "loss": 0.783, "step": 23880 }, { "epoch": 1.1931874937568674, "grad_norm": 3.932978630065918, "learning_rate": 3.505889085194014e-05, "loss": 0.9247, "step": 23890 }, { "epoch": 1.1936869443612026, "grad_norm": 2.509732246398926, "learning_rate": 3.5021461012462006e-05, "loss": 0.906, "step": 23900 }, { "epoch": 1.1941863949655378, "grad_norm": 2.096905469894409, "learning_rate": 3.498404039220583e-05, "loss": 0.7429, "step": 23910 }, { "epoch": 1.1946858455698732, "grad_norm": 1.7966076135635376, "learning_rate": 3.494662901420385e-05, "loss": 0.9374, "step": 23920 }, { "epoch": 1.1951852961742084, "grad_norm": 4.01101541519165, "learning_rate": 3.49092269014826e-05, "loss": 0.9392, "step": 23930 }, { "epoch": 1.1956847467785436, "grad_norm": 2.4715611934661865, "learning_rate": 3.487183407706293e-05, "loss": 0.9434, "step": 23940 }, { "epoch": 1.1961841973828788, "grad_norm": 0.9261486530303955, "learning_rate": 3.483445056395991e-05, "loss": 0.5894, "step": 23950 }, { "epoch": 1.196683647987214, "grad_norm": 1.4591783285140991, "learning_rate": 3.479707638518296e-05, "loss": 0.8462, "step": 23960 }, { "epoch": 1.1971830985915493, "grad_norm": 0.7941261529922485, "learning_rate": 3.475971156373567e-05, "loss": 0.5464, "step": 23970 }, { "epoch": 1.1976825491958845, "grad_norm": 3.5773754119873047, "learning_rate": 3.4722356122615965e-05, "loss": 0.9812, "step": 23980 }, { "epoch": 1.1981819998002197, "grad_norm": 2.8366076946258545, "learning_rate": 3.468501008481591e-05, "loss": 0.7621, "step": 23990 }, { "epoch": 1.198681450404555, "grad_norm": 2.315237045288086, "learning_rate": 3.464767347332182e-05, "loss": 0.8565, "step": 24000 }, { "epoch": 1.1991809010088903, "grad_norm": 2.0847604274749756, "learning_rate": 3.461034631111424e-05, "loss": 0.7876, "step": 24010 }, { "epoch": 1.1996803516132255, "grad_norm": 3.1894140243530273, "learning_rate": 3.4573028621167837e-05, "loss": 0.6479, "step": 24020 }, { "epoch": 1.2001798022175607, "grad_norm": 1.7675485610961914, "learning_rate": 3.45357204264515e-05, "loss": 0.8184, "step": 24030 }, { "epoch": 1.200679252821896, "grad_norm": 2.26784610748291, "learning_rate": 3.4498421749928235e-05, "loss": 1.2616, "step": 24040 }, { "epoch": 1.2011787034262311, "grad_norm": 2.226661443710327, "learning_rate": 3.4461132614555216e-05, "loss": 0.6328, "step": 24050 }, { "epoch": 1.2016781540305663, "grad_norm": 1.3927441835403442, "learning_rate": 3.442385304328373e-05, "loss": 0.7502, "step": 24060 }, { "epoch": 1.2021776046349015, "grad_norm": 2.26334810256958, "learning_rate": 3.438658305905917e-05, "loss": 1.0494, "step": 24070 }, { "epoch": 1.2026770552392367, "grad_norm": 1.3363146781921387, "learning_rate": 3.434932268482107e-05, "loss": 0.9224, "step": 24080 }, { "epoch": 1.2031765058435722, "grad_norm": 1.6434037685394287, "learning_rate": 3.431207194350298e-05, "loss": 0.9712, "step": 24090 }, { "epoch": 1.2036759564479074, "grad_norm": 2.1229751110076904, "learning_rate": 3.427483085803259e-05, "loss": 1.0839, "step": 24100 }, { "epoch": 1.2041754070522426, "grad_norm": 2.9721579551696777, "learning_rate": 3.423759945133162e-05, "loss": 0.884, "step": 24110 }, { "epoch": 1.2046748576565778, "grad_norm": 1.6693525314331055, "learning_rate": 3.4200377746315806e-05, "loss": 0.8454, "step": 24120 }, { "epoch": 1.205174308260913, "grad_norm": 2.425537347793579, "learning_rate": 3.4163165765894966e-05, "loss": 0.9329, "step": 24130 }, { "epoch": 1.2056737588652482, "grad_norm": 1.3295772075653076, "learning_rate": 3.4125963532972873e-05, "loss": 0.7244, "step": 24140 }, { "epoch": 1.2061732094695834, "grad_norm": 1.0378376245498657, "learning_rate": 3.408877107044736e-05, "loss": 0.6402, "step": 24150 }, { "epoch": 1.2066726600739186, "grad_norm": 1.1823464632034302, "learning_rate": 3.405158840121019e-05, "loss": 0.6038, "step": 24160 }, { "epoch": 1.207172110678254, "grad_norm": 3.0953123569488525, "learning_rate": 3.401441554814712e-05, "loss": 0.8217, "step": 24170 }, { "epoch": 1.2076715612825892, "grad_norm": 1.7542980909347534, "learning_rate": 3.397725253413791e-05, "loss": 0.7519, "step": 24180 }, { "epoch": 1.2081710118869244, "grad_norm": 1.069738507270813, "learning_rate": 3.394009938205619e-05, "loss": 0.6695, "step": 24190 }, { "epoch": 1.2086704624912596, "grad_norm": 2.0274806022644043, "learning_rate": 3.3902956114769566e-05, "loss": 0.71, "step": 24200 }, { "epoch": 1.2091699130955949, "grad_norm": 1.4085724353790283, "learning_rate": 3.3865822755139566e-05, "loss": 0.8458, "step": 24210 }, { "epoch": 1.20966936369993, "grad_norm": 3.137226104736328, "learning_rate": 3.382869932602156e-05, "loss": 0.8453, "step": 24220 }, { "epoch": 1.2101688143042653, "grad_norm": 2.1104557514190674, "learning_rate": 3.3791585850264895e-05, "loss": 0.7888, "step": 24230 }, { "epoch": 1.2106682649086005, "grad_norm": 1.276123046875, "learning_rate": 3.375448235071271e-05, "loss": 0.7926, "step": 24240 }, { "epoch": 1.2111677155129357, "grad_norm": 1.7154566049575806, "learning_rate": 3.371738885020206e-05, "loss": 1.0529, "step": 24250 }, { "epoch": 1.2116671661172709, "grad_norm": 2.8417937755584717, "learning_rate": 3.36803053715638e-05, "loss": 0.9414, "step": 24260 }, { "epoch": 1.2121666167216063, "grad_norm": 3.309243679046631, "learning_rate": 3.364323193762266e-05, "loss": 0.8492, "step": 24270 }, { "epoch": 1.2126660673259415, "grad_norm": 0.9691088199615479, "learning_rate": 3.360616857119718e-05, "loss": 0.8063, "step": 24280 }, { "epoch": 1.2131655179302767, "grad_norm": 1.1264917850494385, "learning_rate": 3.356911529509966e-05, "loss": 0.6784, "step": 24290 }, { "epoch": 1.213664968534612, "grad_norm": 2.9707438945770264, "learning_rate": 3.353207213213625e-05, "loss": 0.7139, "step": 24300 }, { "epoch": 1.2141644191389471, "grad_norm": 3.8646249771118164, "learning_rate": 3.349503910510683e-05, "loss": 0.8759, "step": 24310 }, { "epoch": 1.2146638697432823, "grad_norm": 2.5710110664367676, "learning_rate": 3.345801623680507e-05, "loss": 0.7641, "step": 24320 }, { "epoch": 1.2151633203476175, "grad_norm": 1.463781476020813, "learning_rate": 3.3421003550018356e-05, "loss": 0.7935, "step": 24330 }, { "epoch": 1.215662770951953, "grad_norm": 3.968480348587036, "learning_rate": 3.3384001067527845e-05, "loss": 0.8224, "step": 24340 }, { "epoch": 1.2161622215562882, "grad_norm": 2.164271593093872, "learning_rate": 3.334700881210837e-05, "loss": 0.7836, "step": 24350 }, { "epoch": 1.2166616721606234, "grad_norm": 2.2151601314544678, "learning_rate": 3.3310026806528546e-05, "loss": 0.9765, "step": 24360 }, { "epoch": 1.2171611227649586, "grad_norm": 1.6081761121749878, "learning_rate": 3.3273055073550576e-05, "loss": 0.8301, "step": 24370 }, { "epoch": 1.2176605733692938, "grad_norm": 2.1113345623016357, "learning_rate": 3.323609363593042e-05, "loss": 0.6533, "step": 24380 }, { "epoch": 1.218160023973629, "grad_norm": 2.9898107051849365, "learning_rate": 3.319914251641766e-05, "loss": 0.7601, "step": 24390 }, { "epoch": 1.2186594745779642, "grad_norm": 2.3055214881896973, "learning_rate": 3.316220173775555e-05, "loss": 0.9953, "step": 24400 }, { "epoch": 1.2191589251822994, "grad_norm": 1.3491095304489136, "learning_rate": 3.312527132268096e-05, "loss": 0.9097, "step": 24410 }, { "epoch": 1.2196583757866346, "grad_norm": 1.5335065126419067, "learning_rate": 3.30883512939244e-05, "loss": 0.8371, "step": 24420 }, { "epoch": 1.2201578263909698, "grad_norm": 4.337724685668945, "learning_rate": 3.305144167420996e-05, "loss": 0.9757, "step": 24430 }, { "epoch": 1.2206572769953052, "grad_norm": 1.8044408559799194, "learning_rate": 3.301454248625536e-05, "loss": 0.7247, "step": 24440 }, { "epoch": 1.2211567275996404, "grad_norm": 1.9333540201187134, "learning_rate": 3.297765375277189e-05, "loss": 0.9072, "step": 24450 }, { "epoch": 1.2216561782039757, "grad_norm": 3.61417293548584, "learning_rate": 3.294077549646436e-05, "loss": 0.7666, "step": 24460 }, { "epoch": 1.2221556288083109, "grad_norm": 2.842644691467285, "learning_rate": 3.2903907740031206e-05, "loss": 0.8116, "step": 24470 }, { "epoch": 1.222655079412646, "grad_norm": 1.4528379440307617, "learning_rate": 3.2867050506164344e-05, "loss": 0.7526, "step": 24480 }, { "epoch": 1.2231545300169813, "grad_norm": 2.3200719356536865, "learning_rate": 3.2830203817549226e-05, "loss": 0.6552, "step": 24490 }, { "epoch": 1.2236539806213165, "grad_norm": 1.696016550064087, "learning_rate": 3.279336769686484e-05, "loss": 0.7839, "step": 24500 }, { "epoch": 1.224153431225652, "grad_norm": 1.6958246231079102, "learning_rate": 3.275654216678363e-05, "loss": 0.8333, "step": 24510 }, { "epoch": 1.224652881829987, "grad_norm": 0.8731072545051575, "learning_rate": 3.271972724997153e-05, "loss": 0.7066, "step": 24520 }, { "epoch": 1.2251523324343223, "grad_norm": 1.858869194984436, "learning_rate": 3.2682922969087995e-05, "loss": 0.8306, "step": 24530 }, { "epoch": 1.2256517830386575, "grad_norm": 5.138544082641602, "learning_rate": 3.264612934678586e-05, "loss": 0.7675, "step": 24540 }, { "epoch": 1.2261512336429927, "grad_norm": 1.2755622863769531, "learning_rate": 3.260934640571144e-05, "loss": 0.8294, "step": 24550 }, { "epoch": 1.226650684247328, "grad_norm": 0.9115080833435059, "learning_rate": 3.2572574168504455e-05, "loss": 0.9503, "step": 24560 }, { "epoch": 1.2271501348516631, "grad_norm": 2.475282669067383, "learning_rate": 3.253581265779807e-05, "loss": 1.0624, "step": 24570 }, { "epoch": 1.2276495854559983, "grad_norm": 1.879016399383545, "learning_rate": 3.24990618962188e-05, "loss": 0.755, "step": 24580 }, { "epoch": 1.2281490360603335, "grad_norm": 2.8642232418060303, "learning_rate": 3.246232190638659e-05, "loss": 0.7766, "step": 24590 }, { "epoch": 1.2286484866646687, "grad_norm": 2.552367925643921, "learning_rate": 3.24255927109147e-05, "loss": 0.8826, "step": 24600 }, { "epoch": 1.2291479372690042, "grad_norm": 2.440946578979492, "learning_rate": 3.238887433240983e-05, "loss": 0.9215, "step": 24610 }, { "epoch": 1.2296473878733394, "grad_norm": 0.9382161498069763, "learning_rate": 3.2352166793471956e-05, "loss": 1.0155, "step": 24620 }, { "epoch": 1.2301468384776746, "grad_norm": 1.785659670829773, "learning_rate": 3.23154701166944e-05, "loss": 0.7213, "step": 24630 }, { "epoch": 1.2306462890820098, "grad_norm": 3.157031774520874, "learning_rate": 3.227878432466378e-05, "loss": 1.0759, "step": 24640 }, { "epoch": 1.231145739686345, "grad_norm": 1.0278220176696777, "learning_rate": 3.224210943996006e-05, "loss": 0.7412, "step": 24650 }, { "epoch": 1.2316451902906802, "grad_norm": 2.4850709438323975, "learning_rate": 3.2205445485156435e-05, "loss": 0.8121, "step": 24660 }, { "epoch": 1.2321446408950154, "grad_norm": 1.1703661680221558, "learning_rate": 3.216879248281943e-05, "loss": 0.825, "step": 24670 }, { "epoch": 1.2326440914993508, "grad_norm": 1.9365071058273315, "learning_rate": 3.2132150455508766e-05, "loss": 0.9783, "step": 24680 }, { "epoch": 1.233143542103686, "grad_norm": 1.1746280193328857, "learning_rate": 3.2095519425777465e-05, "loss": 0.9327, "step": 24690 }, { "epoch": 1.2336429927080212, "grad_norm": 3.6677918434143066, "learning_rate": 3.205889941617176e-05, "loss": 0.8417, "step": 24700 }, { "epoch": 1.2341424433123565, "grad_norm": 1.7713302373886108, "learning_rate": 3.20222904492311e-05, "loss": 0.7107, "step": 24710 }, { "epoch": 1.2346418939166917, "grad_norm": 3.61954665184021, "learning_rate": 3.198569254748812e-05, "loss": 0.8151, "step": 24720 }, { "epoch": 1.2351413445210269, "grad_norm": 1.0562653541564941, "learning_rate": 3.194910573346867e-05, "loss": 0.856, "step": 24730 }, { "epoch": 1.235640795125362, "grad_norm": 4.016234397888184, "learning_rate": 3.191253002969177e-05, "loss": 0.7908, "step": 24740 }, { "epoch": 1.2361402457296973, "grad_norm": 1.525303840637207, "learning_rate": 3.187596545866959e-05, "loss": 0.8773, "step": 24750 }, { "epoch": 1.2366396963340325, "grad_norm": 1.3310763835906982, "learning_rate": 3.1839412042907455e-05, "loss": 0.6175, "step": 24760 }, { "epoch": 1.2371391469383677, "grad_norm": 1.0592769384384155, "learning_rate": 3.180286980490382e-05, "loss": 0.7033, "step": 24770 }, { "epoch": 1.2376385975427031, "grad_norm": 3.28275990486145, "learning_rate": 3.176633876715029e-05, "loss": 0.6485, "step": 24780 }, { "epoch": 1.2381380481470383, "grad_norm": 3.4900319576263428, "learning_rate": 3.1729818952131526e-05, "loss": 0.822, "step": 24790 }, { "epoch": 1.2386374987513735, "grad_norm": 6.244616985321045, "learning_rate": 3.169331038232533e-05, "loss": 0.7934, "step": 24800 }, { "epoch": 1.2391369493557087, "grad_norm": 1.189617395401001, "learning_rate": 3.165681308020252e-05, "loss": 0.6847, "step": 24810 }, { "epoch": 1.239636399960044, "grad_norm": 1.9598374366760254, "learning_rate": 3.162032706822706e-05, "loss": 0.7298, "step": 24820 }, { "epoch": 1.2401358505643791, "grad_norm": 1.4160430431365967, "learning_rate": 3.1583852368855896e-05, "loss": 0.7593, "step": 24830 }, { "epoch": 1.2406353011687143, "grad_norm": 2.816969394683838, "learning_rate": 3.154738900453906e-05, "loss": 0.7613, "step": 24840 }, { "epoch": 1.2411347517730495, "grad_norm": 2.571857213973999, "learning_rate": 3.1510936997719555e-05, "loss": 0.9554, "step": 24850 }, { "epoch": 1.241634202377385, "grad_norm": 2.238813638687134, "learning_rate": 3.147449637083343e-05, "loss": 0.7022, "step": 24860 }, { "epoch": 1.2421336529817202, "grad_norm": 1.2973177433013916, "learning_rate": 3.1438067146309745e-05, "loss": 0.8813, "step": 24870 }, { "epoch": 1.2426331035860554, "grad_norm": 2.5212416648864746, "learning_rate": 3.1401649346570525e-05, "loss": 1.0145, "step": 24880 }, { "epoch": 1.2431325541903906, "grad_norm": 1.6452910900115967, "learning_rate": 3.136524299403072e-05, "loss": 1.0311, "step": 24890 }, { "epoch": 1.2436320047947258, "grad_norm": 1.3847055435180664, "learning_rate": 3.132884811109832e-05, "loss": 0.717, "step": 24900 }, { "epoch": 1.244131455399061, "grad_norm": 3.166264772415161, "learning_rate": 3.1292464720174165e-05, "loss": 1.0564, "step": 24910 }, { "epoch": 1.2446309060033962, "grad_norm": 1.6997510194778442, "learning_rate": 3.12560928436521e-05, "loss": 0.6297, "step": 24920 }, { "epoch": 1.2451303566077314, "grad_norm": 1.114342212677002, "learning_rate": 3.121973250391882e-05, "loss": 1.0382, "step": 24930 }, { "epoch": 1.2456298072120666, "grad_norm": 2.3849868774414062, "learning_rate": 3.118338372335397e-05, "loss": 0.9502, "step": 24940 }, { "epoch": 1.246129257816402, "grad_norm": 2.6257517337799072, "learning_rate": 3.114704652433003e-05, "loss": 0.6718, "step": 24950 }, { "epoch": 1.2466287084207373, "grad_norm": 1.0890600681304932, "learning_rate": 3.11107209292124e-05, "loss": 0.9186, "step": 24960 }, { "epoch": 1.2471281590250725, "grad_norm": 2.0612778663635254, "learning_rate": 3.107440696035933e-05, "loss": 0.8563, "step": 24970 }, { "epoch": 1.2476276096294077, "grad_norm": 1.4048233032226562, "learning_rate": 3.103810464012187e-05, "loss": 0.7819, "step": 24980 }, { "epoch": 1.2481270602337429, "grad_norm": 3.173872709274292, "learning_rate": 3.100181399084395e-05, "loss": 0.8003, "step": 24990 }, { "epoch": 1.248626510838078, "grad_norm": 1.1218109130859375, "learning_rate": 3.096553503486229e-05, "loss": 0.867, "step": 25000 }, { "epoch": 1.2491259614424133, "grad_norm": 1.4468357563018799, "learning_rate": 3.092926779450643e-05, "loss": 0.9924, "step": 25010 }, { "epoch": 1.2496254120467485, "grad_norm": 2.281041383743286, "learning_rate": 3.089301229209869e-05, "loss": 0.8591, "step": 25020 }, { "epoch": 1.250124862651084, "grad_norm": 1.3560094833374023, "learning_rate": 3.0856768549954144e-05, "loss": 0.6198, "step": 25030 }, { "epoch": 1.2506243132554191, "grad_norm": 1.0620819330215454, "learning_rate": 3.082053659038067e-05, "loss": 0.7338, "step": 25040 }, { "epoch": 1.2511237638597543, "grad_norm": 2.7363362312316895, "learning_rate": 3.078431643567887e-05, "loss": 1.0301, "step": 25050 }, { "epoch": 1.2516232144640895, "grad_norm": 0.8370758891105652, "learning_rate": 3.074810810814208e-05, "loss": 0.815, "step": 25060 }, { "epoch": 1.2521226650684247, "grad_norm": 2.5961215496063232, "learning_rate": 3.071191163005637e-05, "loss": 0.9319, "step": 25070 }, { "epoch": 1.25262211567276, "grad_norm": 1.3736803531646729, "learning_rate": 3.067572702370048e-05, "loss": 0.8014, "step": 25080 }, { "epoch": 1.2531215662770951, "grad_norm": 1.962709665298462, "learning_rate": 3.06395543113459e-05, "loss": 0.7109, "step": 25090 }, { "epoch": 1.2536210168814304, "grad_norm": 1.578259825706482, "learning_rate": 3.060339351525674e-05, "loss": 0.8149, "step": 25100 }, { "epoch": 1.2541204674857656, "grad_norm": 3.259934902191162, "learning_rate": 3.056724465768983e-05, "loss": 0.8071, "step": 25110 }, { "epoch": 1.2546199180901008, "grad_norm": 2.0123560428619385, "learning_rate": 3.0531107760894585e-05, "loss": 1.0255, "step": 25120 }, { "epoch": 1.2551193686944362, "grad_norm": 3.2788171768188477, "learning_rate": 3.049498284711315e-05, "loss": 0.9064, "step": 25130 }, { "epoch": 1.2556188192987714, "grad_norm": 2.713536262512207, "learning_rate": 3.0458869938580213e-05, "loss": 0.8965, "step": 25140 }, { "epoch": 1.2561182699031066, "grad_norm": 2.793422222137451, "learning_rate": 3.0422769057523127e-05, "loss": 0.7667, "step": 25150 }, { "epoch": 1.2566177205074418, "grad_norm": 7.172312259674072, "learning_rate": 3.038668022616179e-05, "loss": 0.7628, "step": 25160 }, { "epoch": 1.257117171111777, "grad_norm": 0.6939265727996826, "learning_rate": 3.0350603466708725e-05, "loss": 0.7269, "step": 25170 }, { "epoch": 1.2576166217161122, "grad_norm": 3.493987798690796, "learning_rate": 3.0314538801369018e-05, "loss": 0.8517, "step": 25180 }, { "epoch": 1.2581160723204476, "grad_norm": 2.091142177581787, "learning_rate": 3.02784862523403e-05, "loss": 0.8482, "step": 25190 }, { "epoch": 1.2586155229247828, "grad_norm": 3.821892023086548, "learning_rate": 3.0242445841812727e-05, "loss": 0.7234, "step": 25200 }, { "epoch": 1.259114973529118, "grad_norm": 2.6875596046447754, "learning_rate": 3.020641759196904e-05, "loss": 0.889, "step": 25210 }, { "epoch": 1.2596144241334533, "grad_norm": 3.890988826751709, "learning_rate": 3.017040152498446e-05, "loss": 1.0065, "step": 25220 }, { "epoch": 1.2601138747377885, "grad_norm": 3.5323803424835205, "learning_rate": 3.013439766302668e-05, "loss": 0.7329, "step": 25230 }, { "epoch": 1.2606133253421237, "grad_norm": 1.3713383674621582, "learning_rate": 3.0098406028255955e-05, "loss": 0.903, "step": 25240 }, { "epoch": 1.2611127759464589, "grad_norm": 0.9502451419830322, "learning_rate": 3.0062426642824925e-05, "loss": 0.9751, "step": 25250 }, { "epoch": 1.261612226550794, "grad_norm": 1.2087663412094116, "learning_rate": 3.002645952887877e-05, "loss": 0.8141, "step": 25260 }, { "epoch": 1.2621116771551293, "grad_norm": 1.050290584564209, "learning_rate": 2.9990504708555066e-05, "loss": 0.7031, "step": 25270 }, { "epoch": 1.2626111277594645, "grad_norm": 2.656682014465332, "learning_rate": 2.995456220398385e-05, "loss": 0.9216, "step": 25280 }, { "epoch": 1.2631105783637997, "grad_norm": 1.5212676525115967, "learning_rate": 2.991863203728755e-05, "loss": 0.6842, "step": 25290 }, { "epoch": 1.2636100289681351, "grad_norm": 1.554700493812561, "learning_rate": 2.988271423058105e-05, "loss": 0.7464, "step": 25300 }, { "epoch": 1.2641094795724703, "grad_norm": 4.207770347595215, "learning_rate": 2.984680880597157e-05, "loss": 1.0052, "step": 25310 }, { "epoch": 1.2646089301768055, "grad_norm": 2.349421262741089, "learning_rate": 2.9810915785558756e-05, "loss": 0.9049, "step": 25320 }, { "epoch": 1.2651083807811407, "grad_norm": 0.8805211186408997, "learning_rate": 2.9775035191434563e-05, "loss": 0.6806, "step": 25330 }, { "epoch": 1.265607831385476, "grad_norm": 1.8856866359710693, "learning_rate": 2.9739167045683375e-05, "loss": 0.655, "step": 25340 }, { "epoch": 1.2661072819898112, "grad_norm": 1.7591133117675781, "learning_rate": 2.9703311370381837e-05, "loss": 0.9164, "step": 25350 }, { "epoch": 1.2666067325941464, "grad_norm": 1.035176157951355, "learning_rate": 2.966746818759897e-05, "loss": 0.7433, "step": 25360 }, { "epoch": 1.2671061831984818, "grad_norm": 2.319363594055176, "learning_rate": 2.963163751939607e-05, "loss": 0.8723, "step": 25370 }, { "epoch": 1.267605633802817, "grad_norm": 2.4335274696350098, "learning_rate": 2.959581938782675e-05, "loss": 0.7508, "step": 25380 }, { "epoch": 1.2681050844071522, "grad_norm": 3.6997673511505127, "learning_rate": 2.956001381493693e-05, "loss": 0.9433, "step": 25390 }, { "epoch": 1.2686045350114874, "grad_norm": 1.7910585403442383, "learning_rate": 2.9524220822764775e-05, "loss": 0.7691, "step": 25400 }, { "epoch": 1.2691039856158226, "grad_norm": 2.087606906890869, "learning_rate": 2.948844043334068e-05, "loss": 0.9178, "step": 25410 }, { "epoch": 1.2696034362201578, "grad_norm": 1.9469484090805054, "learning_rate": 2.945267266868734e-05, "loss": 0.7922, "step": 25420 }, { "epoch": 1.270102886824493, "grad_norm": 1.6279138326644897, "learning_rate": 2.9416917550819623e-05, "loss": 0.6211, "step": 25430 }, { "epoch": 1.2706023374288282, "grad_norm": 2.592158079147339, "learning_rate": 2.9381175101744663e-05, "loss": 0.9265, "step": 25440 }, { "epoch": 1.2711017880331634, "grad_norm": 1.0540589094161987, "learning_rate": 2.934544534346175e-05, "loss": 0.6785, "step": 25450 }, { "epoch": 1.2716012386374986, "grad_norm": 7.0633721351623535, "learning_rate": 2.930972829796239e-05, "loss": 1.1962, "step": 25460 }, { "epoch": 1.272100689241834, "grad_norm": 3.2388880252838135, "learning_rate": 2.927402398723029e-05, "loss": 0.782, "step": 25470 }, { "epoch": 1.2726001398461693, "grad_norm": 1.9963425397872925, "learning_rate": 2.923833243324126e-05, "loss": 0.8876, "step": 25480 }, { "epoch": 1.2730995904505045, "grad_norm": 1.7312681674957275, "learning_rate": 2.9202653657963303e-05, "loss": 0.6034, "step": 25490 }, { "epoch": 1.2735990410548397, "grad_norm": 2.3396546840667725, "learning_rate": 2.9166987683356527e-05, "loss": 0.8036, "step": 25500 }, { "epoch": 1.2740984916591749, "grad_norm": 4.383434295654297, "learning_rate": 2.9131334531373177e-05, "loss": 0.8276, "step": 25510 }, { "epoch": 1.27459794226351, "grad_norm": 1.4872502088546753, "learning_rate": 2.9095694223957636e-05, "loss": 0.837, "step": 25520 }, { "epoch": 1.2750973928678453, "grad_norm": 2.1265480518341064, "learning_rate": 2.9060066783046303e-05, "loss": 0.994, "step": 25530 }, { "epoch": 1.2755968434721807, "grad_norm": 4.892418384552002, "learning_rate": 2.9024452230567723e-05, "loss": 0.9606, "step": 25540 }, { "epoch": 1.276096294076516, "grad_norm": 3.2725839614868164, "learning_rate": 2.898885058844247e-05, "loss": 0.5869, "step": 25550 }, { "epoch": 1.2765957446808511, "grad_norm": 1.9172431230545044, "learning_rate": 2.895326187858326e-05, "loss": 0.9424, "step": 25560 }, { "epoch": 1.2770951952851863, "grad_norm": 1.5717350244522095, "learning_rate": 2.8917686122894704e-05, "loss": 0.8012, "step": 25570 }, { "epoch": 1.2775946458895215, "grad_norm": 3.3950858116149902, "learning_rate": 2.8882123343273555e-05, "loss": 0.7058, "step": 25580 }, { "epoch": 1.2780940964938567, "grad_norm": 1.6237913370132446, "learning_rate": 2.8846573561608525e-05, "loss": 0.7389, "step": 25590 }, { "epoch": 1.278593547098192, "grad_norm": 2.1186885833740234, "learning_rate": 2.881103679978037e-05, "loss": 1.0403, "step": 25600 }, { "epoch": 1.2790929977025272, "grad_norm": 1.809799075126648, "learning_rate": 2.8775513079661763e-05, "loss": 0.9461, "step": 25610 }, { "epoch": 1.2795924483068624, "grad_norm": 3.4378304481506348, "learning_rate": 2.8740002423117407e-05, "loss": 0.9774, "step": 25620 }, { "epoch": 1.2800918989111976, "grad_norm": 2.171168565750122, "learning_rate": 2.8704504852003954e-05, "loss": 0.9213, "step": 25630 }, { "epoch": 1.280591349515533, "grad_norm": 1.464285969734192, "learning_rate": 2.8669020388169986e-05, "loss": 1.0249, "step": 25640 }, { "epoch": 1.2810908001198682, "grad_norm": 2.6096229553222656, "learning_rate": 2.8633549053456043e-05, "loss": 0.7559, "step": 25650 }, { "epoch": 1.2815902507242034, "grad_norm": 1.2009655237197876, "learning_rate": 2.859809086969455e-05, "loss": 0.7502, "step": 25660 }, { "epoch": 1.2820897013285386, "grad_norm": 1.84615957736969, "learning_rate": 2.856264585870987e-05, "loss": 1.0261, "step": 25670 }, { "epoch": 1.2825891519328738, "grad_norm": 2.4030511379241943, "learning_rate": 2.8527214042318263e-05, "loss": 0.7864, "step": 25680 }, { "epoch": 1.283088602537209, "grad_norm": 3.0597686767578125, "learning_rate": 2.8491795442327805e-05, "loss": 0.7979, "step": 25690 }, { "epoch": 1.2835880531415442, "grad_norm": 2.1812305450439453, "learning_rate": 2.8456390080538498e-05, "loss": 0.8063, "step": 25700 }, { "epoch": 1.2840875037458797, "grad_norm": 1.3359405994415283, "learning_rate": 2.8420997978742193e-05, "loss": 0.7925, "step": 25710 }, { "epoch": 1.2845869543502149, "grad_norm": 4.406698703765869, "learning_rate": 2.8385619158722553e-05, "loss": 0.8084, "step": 25720 }, { "epoch": 1.28508640495455, "grad_norm": 1.9411131143569946, "learning_rate": 2.8350253642255085e-05, "loss": 1.0207, "step": 25730 }, { "epoch": 1.2855858555588853, "grad_norm": 1.3505710363388062, "learning_rate": 2.83149014511071e-05, "loss": 0.9775, "step": 25740 }, { "epoch": 1.2860853061632205, "grad_norm": 0.8326596021652222, "learning_rate": 2.8279562607037734e-05, "loss": 0.6705, "step": 25750 }, { "epoch": 1.2865847567675557, "grad_norm": 3.094120740890503, "learning_rate": 2.824423713179784e-05, "loss": 0.8511, "step": 25760 }, { "epoch": 1.2870842073718909, "grad_norm": 1.5164830684661865, "learning_rate": 2.8208925047130108e-05, "loss": 0.8633, "step": 25770 }, { "epoch": 1.287583657976226, "grad_norm": 0.8644943237304688, "learning_rate": 2.8173626374768957e-05, "loss": 0.6869, "step": 25780 }, { "epoch": 1.2880831085805613, "grad_norm": 3.0497546195983887, "learning_rate": 2.8138341136440582e-05, "loss": 1.0132, "step": 25790 }, { "epoch": 1.2885825591848965, "grad_norm": 1.2002036571502686, "learning_rate": 2.8103069353862854e-05, "loss": 0.799, "step": 25800 }, { "epoch": 1.2890820097892317, "grad_norm": 1.5014221668243408, "learning_rate": 2.8067811048745373e-05, "loss": 0.6567, "step": 25810 }, { "epoch": 1.2895814603935671, "grad_norm": 1.125079870223999, "learning_rate": 2.8032566242789526e-05, "loss": 0.8227, "step": 25820 }, { "epoch": 1.2900809109979023, "grad_norm": 1.010020136833191, "learning_rate": 2.7997334957688315e-05, "loss": 0.8606, "step": 25830 }, { "epoch": 1.2905803616022375, "grad_norm": 2.353187084197998, "learning_rate": 2.79621172151264e-05, "loss": 0.9387, "step": 25840 }, { "epoch": 1.2910798122065728, "grad_norm": 2.716230869293213, "learning_rate": 2.7926913036780157e-05, "loss": 0.9933, "step": 25850 }, { "epoch": 1.291579262810908, "grad_norm": 4.099030494689941, "learning_rate": 2.7891722444317603e-05, "loss": 0.7803, "step": 25860 }, { "epoch": 1.2920787134152432, "grad_norm": 4.673940658569336, "learning_rate": 2.785654545939839e-05, "loss": 0.8436, "step": 25870 }, { "epoch": 1.2925781640195786, "grad_norm": 1.6985145807266235, "learning_rate": 2.7821382103673765e-05, "loss": 0.9857, "step": 25880 }, { "epoch": 1.2930776146239138, "grad_norm": 2.822093963623047, "learning_rate": 2.7786232398786614e-05, "loss": 0.9068, "step": 25890 }, { "epoch": 1.293577065228249, "grad_norm": 1.675399899482727, "learning_rate": 2.7751096366371465e-05, "loss": 0.8926, "step": 25900 }, { "epoch": 1.2940765158325842, "grad_norm": 1.4630440473556519, "learning_rate": 2.7715974028054337e-05, "loss": 0.8559, "step": 25910 }, { "epoch": 1.2945759664369194, "grad_norm": 1.9225897789001465, "learning_rate": 2.768086540545289e-05, "loss": 0.6175, "step": 25920 }, { "epoch": 1.2950754170412546, "grad_norm": 1.2835670709609985, "learning_rate": 2.7645770520176306e-05, "loss": 0.9175, "step": 25930 }, { "epoch": 1.2955748676455898, "grad_norm": 1.357814908027649, "learning_rate": 2.761068939382534e-05, "loss": 0.9106, "step": 25940 }, { "epoch": 1.296074318249925, "grad_norm": 1.2162941694259644, "learning_rate": 2.7575622047992268e-05, "loss": 0.8144, "step": 25950 }, { "epoch": 1.2965737688542602, "grad_norm": 2.0831847190856934, "learning_rate": 2.7540568504260856e-05, "loss": 0.8208, "step": 25960 }, { "epoch": 1.2970732194585954, "grad_norm": 2.710693359375, "learning_rate": 2.7505528784206412e-05, "loss": 0.6868, "step": 25970 }, { "epoch": 1.2975726700629306, "grad_norm": 2.4084606170654297, "learning_rate": 2.747050290939573e-05, "loss": 0.6739, "step": 25980 }, { "epoch": 1.298072120667266, "grad_norm": 1.3072469234466553, "learning_rate": 2.7435490901387063e-05, "loss": 0.7255, "step": 25990 }, { "epoch": 1.2985715712716013, "grad_norm": 1.5046652555465698, "learning_rate": 2.7400492781730147e-05, "loss": 0.744, "step": 26000 }, { "epoch": 1.2990710218759365, "grad_norm": 1.3858652114868164, "learning_rate": 2.736550857196617e-05, "loss": 0.7211, "step": 26010 }, { "epoch": 1.2995704724802717, "grad_norm": 1.4871467351913452, "learning_rate": 2.7330538293627762e-05, "loss": 0.9627, "step": 26020 }, { "epoch": 1.300069923084607, "grad_norm": 1.5029727220535278, "learning_rate": 2.729558196823894e-05, "loss": 1.0496, "step": 26030 }, { "epoch": 1.300569373688942, "grad_norm": 1.2640323638916016, "learning_rate": 2.7260639617315164e-05, "loss": 0.869, "step": 26040 }, { "epoch": 1.3010688242932775, "grad_norm": 1.5839718580245972, "learning_rate": 2.722571126236331e-05, "loss": 0.786, "step": 26050 }, { "epoch": 1.3015682748976127, "grad_norm": 2.033895969390869, "learning_rate": 2.719079692488161e-05, "loss": 0.8583, "step": 26060 }, { "epoch": 1.302067725501948, "grad_norm": 1.3492399454116821, "learning_rate": 2.715589662635969e-05, "loss": 0.8909, "step": 26070 }, { "epoch": 1.3025671761062831, "grad_norm": 1.1985543966293335, "learning_rate": 2.71210103882785e-05, "loss": 0.7265, "step": 26080 }, { "epoch": 1.3030666267106183, "grad_norm": 1.33757483959198, "learning_rate": 2.708613823211038e-05, "loss": 0.7627, "step": 26090 }, { "epoch": 1.3035660773149536, "grad_norm": 2.5424530506134033, "learning_rate": 2.7051280179319e-05, "loss": 0.6792, "step": 26100 }, { "epoch": 1.3040655279192888, "grad_norm": 1.192478895187378, "learning_rate": 2.7016436251359283e-05, "loss": 0.8778, "step": 26110 }, { "epoch": 1.304564978523624, "grad_norm": 1.8942397832870483, "learning_rate": 2.6981606469677536e-05, "loss": 0.839, "step": 26120 }, { "epoch": 1.3050644291279592, "grad_norm": 4.347995281219482, "learning_rate": 2.694679085571131e-05, "loss": 0.7367, "step": 26130 }, { "epoch": 1.3055638797322944, "grad_norm": 1.639884114265442, "learning_rate": 2.6911989430889505e-05, "loss": 0.7742, "step": 26140 }, { "epoch": 1.3060633303366296, "grad_norm": 3.8017916679382324, "learning_rate": 2.687720221663216e-05, "loss": 0.8827, "step": 26150 }, { "epoch": 1.306562780940965, "grad_norm": 1.2692903280258179, "learning_rate": 2.6842429234350698e-05, "loss": 0.7772, "step": 26160 }, { "epoch": 1.3070622315453002, "grad_norm": 5.200440406799316, "learning_rate": 2.680767050544775e-05, "loss": 0.9094, "step": 26170 }, { "epoch": 1.3075616821496354, "grad_norm": 2.226263999938965, "learning_rate": 2.6772926051317105e-05, "loss": 0.7522, "step": 26180 }, { "epoch": 1.3080611327539706, "grad_norm": 1.3870896100997925, "learning_rate": 2.673819589334383e-05, "loss": 0.8461, "step": 26190 }, { "epoch": 1.3085605833583058, "grad_norm": 1.9252040386199951, "learning_rate": 2.6703480052904185e-05, "loss": 0.8828, "step": 26200 }, { "epoch": 1.309060033962641, "grad_norm": 1.4164537191390991, "learning_rate": 2.6668778551365615e-05, "loss": 0.7703, "step": 26210 }, { "epoch": 1.3095594845669765, "grad_norm": 2.184199810028076, "learning_rate": 2.6634091410086738e-05, "loss": 0.8513, "step": 26220 }, { "epoch": 1.3100589351713117, "grad_norm": 3.2855100631713867, "learning_rate": 2.6599418650417285e-05, "loss": 0.7017, "step": 26230 }, { "epoch": 1.3105583857756469, "grad_norm": 1.4663140773773193, "learning_rate": 2.6564760293698233e-05, "loss": 0.674, "step": 26240 }, { "epoch": 1.311057836379982, "grad_norm": 2.286970615386963, "learning_rate": 2.653011636126165e-05, "loss": 0.7949, "step": 26250 }, { "epoch": 1.3115572869843173, "grad_norm": 2.743060350418091, "learning_rate": 2.6495486874430664e-05, "loss": 0.817, "step": 26260 }, { "epoch": 1.3120567375886525, "grad_norm": 2.114896059036255, "learning_rate": 2.6460871854519594e-05, "loss": 0.7223, "step": 26270 }, { "epoch": 1.3125561881929877, "grad_norm": 1.9517860412597656, "learning_rate": 2.6426271322833823e-05, "loss": 0.6969, "step": 26280 }, { "epoch": 1.313055638797323, "grad_norm": 1.98906409740448, "learning_rate": 2.6391685300669844e-05, "loss": 0.7596, "step": 26290 }, { "epoch": 1.313555089401658, "grad_norm": 2.372474193572998, "learning_rate": 2.6357113809315147e-05, "loss": 1.0178, "step": 26300 }, { "epoch": 1.3140545400059933, "grad_norm": 1.938651442527771, "learning_rate": 2.6322556870048354e-05, "loss": 0.7313, "step": 26310 }, { "epoch": 1.3145539906103285, "grad_norm": 2.4512546062469482, "learning_rate": 2.6288014504139104e-05, "loss": 0.6829, "step": 26320 }, { "epoch": 1.315053441214664, "grad_norm": 1.3412909507751465, "learning_rate": 2.6253486732848054e-05, "loss": 0.6252, "step": 26330 }, { "epoch": 1.3155528918189991, "grad_norm": 2.624915838241577, "learning_rate": 2.621897357742689e-05, "loss": 0.6921, "step": 26340 }, { "epoch": 1.3160523424233344, "grad_norm": 2.5888686180114746, "learning_rate": 2.6184475059118314e-05, "loss": 0.7307, "step": 26350 }, { "epoch": 1.3165517930276696, "grad_norm": 4.230358600616455, "learning_rate": 2.6149991199155986e-05, "loss": 0.9353, "step": 26360 }, { "epoch": 1.3170512436320048, "grad_norm": 2.7066123485565186, "learning_rate": 2.6115522018764603e-05, "loss": 0.8521, "step": 26370 }, { "epoch": 1.31755069423634, "grad_norm": 2.8542397022247314, "learning_rate": 2.6081067539159744e-05, "loss": 0.6292, "step": 26380 }, { "epoch": 1.3180501448406752, "grad_norm": 1.3912320137023926, "learning_rate": 2.604662778154799e-05, "loss": 0.6771, "step": 26390 }, { "epoch": 1.3185495954450106, "grad_norm": 2.823066234588623, "learning_rate": 2.6012202767126858e-05, "loss": 0.9555, "step": 26400 }, { "epoch": 1.3190490460493458, "grad_norm": 1.9173599481582642, "learning_rate": 2.5977792517084797e-05, "loss": 0.8277, "step": 26410 }, { "epoch": 1.319548496653681, "grad_norm": 2.8986942768096924, "learning_rate": 2.5943397052601147e-05, "loss": 1.0224, "step": 26420 }, { "epoch": 1.3200479472580162, "grad_norm": 1.836143136024475, "learning_rate": 2.590901639484616e-05, "loss": 0.7598, "step": 26430 }, { "epoch": 1.3205473978623514, "grad_norm": 2.321305990219116, "learning_rate": 2.5874650564980996e-05, "loss": 0.7754, "step": 26440 }, { "epoch": 1.3210468484666866, "grad_norm": 2.339043378829956, "learning_rate": 2.5840299584157622e-05, "loss": 0.7872, "step": 26450 }, { "epoch": 1.3215462990710218, "grad_norm": 0.9723687767982483, "learning_rate": 2.5805963473518935e-05, "loss": 0.7584, "step": 26460 }, { "epoch": 1.322045749675357, "grad_norm": 1.5570881366729736, "learning_rate": 2.5771642254198654e-05, "loss": 0.766, "step": 26470 }, { "epoch": 1.3225452002796922, "grad_norm": 1.399144172668457, "learning_rate": 2.5737335947321324e-05, "loss": 0.6879, "step": 26480 }, { "epoch": 1.3230446508840275, "grad_norm": 1.6958900690078735, "learning_rate": 2.570304457400232e-05, "loss": 0.6603, "step": 26490 }, { "epoch": 1.3235441014883629, "grad_norm": 3.0228214263916016, "learning_rate": 2.5668768155347834e-05, "loss": 0.7593, "step": 26500 }, { "epoch": 1.324043552092698, "grad_norm": 0.8537726998329163, "learning_rate": 2.563450671245483e-05, "loss": 0.7344, "step": 26510 }, { "epoch": 1.3245430026970333, "grad_norm": 1.8870234489440918, "learning_rate": 2.5600260266411103e-05, "loss": 0.8112, "step": 26520 }, { "epoch": 1.3250424533013685, "grad_norm": 1.6890041828155518, "learning_rate": 2.556602883829514e-05, "loss": 0.8621, "step": 26530 }, { "epoch": 1.3255419039057037, "grad_norm": 2.4928133487701416, "learning_rate": 2.553181244917624e-05, "loss": 0.9159, "step": 26540 }, { "epoch": 1.326041354510039, "grad_norm": 1.6931321620941162, "learning_rate": 2.549761112011444e-05, "loss": 0.8533, "step": 26550 }, { "epoch": 1.326540805114374, "grad_norm": 2.798311471939087, "learning_rate": 2.5463424872160525e-05, "loss": 0.8342, "step": 26560 }, { "epoch": 1.3270402557187095, "grad_norm": 2.576824426651001, "learning_rate": 2.5429253726355918e-05, "loss": 0.7163, "step": 26570 }, { "epoch": 1.3275397063230447, "grad_norm": 1.5676735639572144, "learning_rate": 2.5395097703732816e-05, "loss": 0.7813, "step": 26580 }, { "epoch": 1.32803915692738, "grad_norm": 0.8699400424957275, "learning_rate": 2.5360956825314158e-05, "loss": 0.6391, "step": 26590 }, { "epoch": 1.3285386075317152, "grad_norm": 1.8087270259857178, "learning_rate": 2.5326831112113424e-05, "loss": 0.8855, "step": 26600 }, { "epoch": 1.3290380581360504, "grad_norm": 0.8431574106216431, "learning_rate": 2.5292720585134867e-05, "loss": 0.9068, "step": 26610 }, { "epoch": 1.3295375087403856, "grad_norm": 2.543140172958374, "learning_rate": 2.525862526537335e-05, "loss": 0.7355, "step": 26620 }, { "epoch": 1.3300369593447208, "grad_norm": 4.0236382484436035, "learning_rate": 2.5224545173814385e-05, "loss": 0.9041, "step": 26630 }, { "epoch": 1.330536409949056, "grad_norm": 0.6019638180732727, "learning_rate": 2.519048033143414e-05, "loss": 0.7289, "step": 26640 }, { "epoch": 1.3310358605533912, "grad_norm": 1.3549529314041138, "learning_rate": 2.5156430759199324e-05, "loss": 0.974, "step": 26650 }, { "epoch": 1.3315353111577264, "grad_norm": 2.550344705581665, "learning_rate": 2.512239647806729e-05, "loss": 1.0729, "step": 26660 }, { "epoch": 1.3320347617620618, "grad_norm": 2.0747127532958984, "learning_rate": 2.508837750898605e-05, "loss": 0.7989, "step": 26670 }, { "epoch": 1.332534212366397, "grad_norm": 1.3112684488296509, "learning_rate": 2.505437387289406e-05, "loss": 0.6326, "step": 26680 }, { "epoch": 1.3330336629707322, "grad_norm": 1.4353485107421875, "learning_rate": 2.502038559072042e-05, "loss": 0.752, "step": 26690 }, { "epoch": 1.3335331135750674, "grad_norm": 4.3663859367370605, "learning_rate": 2.4986412683384757e-05, "loss": 0.9616, "step": 26700 }, { "epoch": 1.3340325641794026, "grad_norm": 1.7282423973083496, "learning_rate": 2.4952455171797268e-05, "loss": 0.8071, "step": 26710 }, { "epoch": 1.3345320147837378, "grad_norm": 3.4242000579833984, "learning_rate": 2.4918513076858595e-05, "loss": 1.007, "step": 26720 }, { "epoch": 1.335031465388073, "grad_norm": 2.4444282054901123, "learning_rate": 2.4884586419459976e-05, "loss": 1.0735, "step": 26730 }, { "epoch": 1.3355309159924085, "grad_norm": 1.5772311687469482, "learning_rate": 2.4850675220483104e-05, "loss": 0.8308, "step": 26740 }, { "epoch": 1.3360303665967437, "grad_norm": 4.243272304534912, "learning_rate": 2.4816779500800168e-05, "loss": 1.0224, "step": 26750 }, { "epoch": 1.3365298172010789, "grad_norm": 2.3153557777404785, "learning_rate": 2.478289928127383e-05, "loss": 0.7532, "step": 26760 }, { "epoch": 1.337029267805414, "grad_norm": 0.7694450616836548, "learning_rate": 2.4749034582757203e-05, "loss": 0.7016, "step": 26770 }, { "epoch": 1.3375287184097493, "grad_norm": 2.605919361114502, "learning_rate": 2.4715185426093862e-05, "loss": 1.0076, "step": 26780 }, { "epoch": 1.3380281690140845, "grad_norm": 5.179083824157715, "learning_rate": 2.4681351832117815e-05, "loss": 1.137, "step": 26790 }, { "epoch": 1.3385276196184197, "grad_norm": 2.2539525032043457, "learning_rate": 2.4647533821653463e-05, "loss": 1.0885, "step": 26800 }, { "epoch": 1.339027070222755, "grad_norm": 2.8160221576690674, "learning_rate": 2.4613731415515633e-05, "loss": 0.8886, "step": 26810 }, { "epoch": 1.3395265208270901, "grad_norm": 1.7527929544448853, "learning_rate": 2.457994463450957e-05, "loss": 0.8078, "step": 26820 }, { "epoch": 1.3400259714314253, "grad_norm": 1.7972664833068848, "learning_rate": 2.4546173499430895e-05, "loss": 0.7986, "step": 26830 }, { "epoch": 1.3405254220357605, "grad_norm": 3.02156662940979, "learning_rate": 2.4512418031065525e-05, "loss": 0.8894, "step": 26840 }, { "epoch": 1.341024872640096, "grad_norm": 2.1933252811431885, "learning_rate": 2.4478678250189863e-05, "loss": 0.8215, "step": 26850 }, { "epoch": 1.3415243232444312, "grad_norm": 2.0021753311157227, "learning_rate": 2.444495417757058e-05, "loss": 0.8465, "step": 26860 }, { "epoch": 1.3420237738487664, "grad_norm": 2.044494152069092, "learning_rate": 2.4411245833964653e-05, "loss": 0.7785, "step": 26870 }, { "epoch": 1.3425232244531016, "grad_norm": 1.5613638162612915, "learning_rate": 2.4377553240119427e-05, "loss": 0.7757, "step": 26880 }, { "epoch": 1.3430226750574368, "grad_norm": 2.2117979526519775, "learning_rate": 2.434387641677255e-05, "loss": 0.9169, "step": 26890 }, { "epoch": 1.343522125661772, "grad_norm": 2.7478301525115967, "learning_rate": 2.431021538465194e-05, "loss": 0.911, "step": 26900 }, { "epoch": 1.3440215762661074, "grad_norm": 5.291362762451172, "learning_rate": 2.4276570164475836e-05, "loss": 0.8214, "step": 26910 }, { "epoch": 1.3445210268704426, "grad_norm": 2.119905710220337, "learning_rate": 2.4242940776952654e-05, "loss": 0.8149, "step": 26920 }, { "epoch": 1.3450204774747778, "grad_norm": 1.398186445236206, "learning_rate": 2.4209327242781187e-05, "loss": 0.5638, "step": 26930 }, { "epoch": 1.345519928079113, "grad_norm": 4.486873149871826, "learning_rate": 2.4175729582650412e-05, "loss": 0.5749, "step": 26940 }, { "epoch": 1.3460193786834482, "grad_norm": 0.7865625023841858, "learning_rate": 2.414214781723949e-05, "loss": 0.8484, "step": 26950 }, { "epoch": 1.3465188292877834, "grad_norm": 0.9800479412078857, "learning_rate": 2.4108581967217876e-05, "loss": 0.8201, "step": 26960 }, { "epoch": 1.3470182798921186, "grad_norm": 1.160328984260559, "learning_rate": 2.4075032053245182e-05, "loss": 0.9048, "step": 26970 }, { "epoch": 1.3475177304964538, "grad_norm": 2.4143083095550537, "learning_rate": 2.4041498095971253e-05, "loss": 0.8337, "step": 26980 }, { "epoch": 1.348017181100789, "grad_norm": 1.1071438789367676, "learning_rate": 2.4007980116036043e-05, "loss": 0.768, "step": 26990 }, { "epoch": 1.3485166317051243, "grad_norm": 1.9384424686431885, "learning_rate": 2.397447813406974e-05, "loss": 0.6876, "step": 27000 }, { "epoch": 1.3490160823094595, "grad_norm": 2.009960889816284, "learning_rate": 2.3940992170692634e-05, "loss": 0.9766, "step": 27010 }, { "epoch": 1.349515532913795, "grad_norm": 1.7792822122573853, "learning_rate": 2.3907522246515246e-05, "loss": 0.7847, "step": 27020 }, { "epoch": 1.35001498351813, "grad_norm": 4.0502190589904785, "learning_rate": 2.38740683821381e-05, "loss": 0.9855, "step": 27030 }, { "epoch": 1.3505144341224653, "grad_norm": 0.7262182235717773, "learning_rate": 2.3840630598151908e-05, "loss": 0.9214, "step": 27040 }, { "epoch": 1.3510138847268005, "grad_norm": 1.7056944370269775, "learning_rate": 2.3807208915137486e-05, "loss": 0.8062, "step": 27050 }, { "epoch": 1.3515133353311357, "grad_norm": 0.6190218925476074, "learning_rate": 2.377380335366574e-05, "loss": 0.9852, "step": 27060 }, { "epoch": 1.352012785935471, "grad_norm": 2.6608030796051025, "learning_rate": 2.3740413934297595e-05, "loss": 0.9902, "step": 27070 }, { "epoch": 1.3525122365398063, "grad_norm": 1.693894863128662, "learning_rate": 2.3707040677584112e-05, "loss": 0.6989, "step": 27080 }, { "epoch": 1.3530116871441416, "grad_norm": 0.8011934161186218, "learning_rate": 2.3673683604066378e-05, "loss": 0.6936, "step": 27090 }, { "epoch": 1.3535111377484768, "grad_norm": 3.084833860397339, "learning_rate": 2.3640342734275518e-05, "loss": 0.8468, "step": 27100 }, { "epoch": 1.354010588352812, "grad_norm": 3.6632261276245117, "learning_rate": 2.3607018088732673e-05, "loss": 1.0011, "step": 27110 }, { "epoch": 1.3545100389571472, "grad_norm": 6.358667850494385, "learning_rate": 2.3573709687949025e-05, "loss": 1.037, "step": 27120 }, { "epoch": 1.3550094895614824, "grad_norm": 2.2811105251312256, "learning_rate": 2.3540417552425753e-05, "loss": 0.7105, "step": 27130 }, { "epoch": 1.3555089401658176, "grad_norm": 1.1203714609146118, "learning_rate": 2.3507141702653972e-05, "loss": 0.8722, "step": 27140 }, { "epoch": 1.3560083907701528, "grad_norm": 2.0409436225891113, "learning_rate": 2.3473882159114848e-05, "loss": 0.9253, "step": 27150 }, { "epoch": 1.356507841374488, "grad_norm": 1.8561183214187622, "learning_rate": 2.3440638942279462e-05, "loss": 0.7566, "step": 27160 }, { "epoch": 1.3570072919788232, "grad_norm": 1.9220620393753052, "learning_rate": 2.3407412072608865e-05, "loss": 0.8353, "step": 27170 }, { "epoch": 1.3575067425831584, "grad_norm": 2.4859888553619385, "learning_rate": 2.337420157055405e-05, "loss": 0.7355, "step": 27180 }, { "epoch": 1.3580061931874938, "grad_norm": 3.319899797439575, "learning_rate": 2.334100745655592e-05, "loss": 0.779, "step": 27190 }, { "epoch": 1.358505643791829, "grad_norm": 1.3624628782272339, "learning_rate": 2.3307829751045307e-05, "loss": 0.6951, "step": 27200 }, { "epoch": 1.3590050943961642, "grad_norm": 0.639570415019989, "learning_rate": 2.3274668474442946e-05, "loss": 0.7568, "step": 27210 }, { "epoch": 1.3595045450004994, "grad_norm": 0.9351773262023926, "learning_rate": 2.324152364715942e-05, "loss": 0.83, "step": 27220 }, { "epoch": 1.3600039956048346, "grad_norm": 1.543528437614441, "learning_rate": 2.3208395289595238e-05, "loss": 0.7359, "step": 27230 }, { "epoch": 1.3605034462091699, "grad_norm": 2.134068012237549, "learning_rate": 2.3175283422140748e-05, "loss": 0.7221, "step": 27240 }, { "epoch": 1.3610028968135053, "grad_norm": 7.020972728729248, "learning_rate": 2.3142188065176167e-05, "loss": 1.056, "step": 27250 }, { "epoch": 1.3615023474178405, "grad_norm": 2.349780321121216, "learning_rate": 2.3109109239071492e-05, "loss": 0.6357, "step": 27260 }, { "epoch": 1.3620017980221757, "grad_norm": 1.3304367065429688, "learning_rate": 2.3076046964186633e-05, "loss": 0.8889, "step": 27270 }, { "epoch": 1.362501248626511, "grad_norm": 2.544924020767212, "learning_rate": 2.3043001260871256e-05, "loss": 0.6522, "step": 27280 }, { "epoch": 1.363000699230846, "grad_norm": 5.3520050048828125, "learning_rate": 2.3009972149464854e-05, "loss": 0.9287, "step": 27290 }, { "epoch": 1.3635001498351813, "grad_norm": 1.7571983337402344, "learning_rate": 2.297695965029666e-05, "loss": 0.8317, "step": 27300 }, { "epoch": 1.3639996004395165, "grad_norm": 2.879591941833496, "learning_rate": 2.294396378368573e-05, "loss": 0.9106, "step": 27310 }, { "epoch": 1.3644990510438517, "grad_norm": 1.6200952529907227, "learning_rate": 2.2910984569940875e-05, "loss": 0.8736, "step": 27320 }, { "epoch": 1.364998501648187, "grad_norm": 2.3251466751098633, "learning_rate": 2.2878022029360663e-05, "loss": 0.7795, "step": 27330 }, { "epoch": 1.3654979522525221, "grad_norm": 1.6437568664550781, "learning_rate": 2.2845076182233345e-05, "loss": 0.861, "step": 27340 }, { "epoch": 1.3659974028568573, "grad_norm": 2.6947972774505615, "learning_rate": 2.281214704883695e-05, "loss": 0.9195, "step": 27350 }, { "epoch": 1.3664968534611928, "grad_norm": 1.5877389907836914, "learning_rate": 2.2779234649439256e-05, "loss": 0.7399, "step": 27360 }, { "epoch": 1.366996304065528, "grad_norm": 1.1317212581634521, "learning_rate": 2.274633900429764e-05, "loss": 0.6701, "step": 27370 }, { "epoch": 1.3674957546698632, "grad_norm": 0.8718187808990479, "learning_rate": 2.2713460133659237e-05, "loss": 0.7554, "step": 27380 }, { "epoch": 1.3679952052741984, "grad_norm": 2.810588836669922, "learning_rate": 2.268059805776085e-05, "loss": 0.9007, "step": 27390 }, { "epoch": 1.3684946558785336, "grad_norm": 2.9779884815216064, "learning_rate": 2.264775279682894e-05, "loss": 0.8176, "step": 27400 }, { "epoch": 1.3689941064828688, "grad_norm": 1.3950903415679932, "learning_rate": 2.2614924371079586e-05, "loss": 0.598, "step": 27410 }, { "epoch": 1.369493557087204, "grad_norm": 2.1478521823883057, "learning_rate": 2.258211280071856e-05, "loss": 0.8133, "step": 27420 }, { "epoch": 1.3699930076915394, "grad_norm": 1.6993751525878906, "learning_rate": 2.2549318105941213e-05, "loss": 0.844, "step": 27430 }, { "epoch": 1.3704924582958746, "grad_norm": 2.621405839920044, "learning_rate": 2.2516540306932542e-05, "loss": 0.8564, "step": 27440 }, { "epoch": 1.3709919089002098, "grad_norm": 1.7271287441253662, "learning_rate": 2.2483779423867134e-05, "loss": 1.0225, "step": 27450 }, { "epoch": 1.371491359504545, "grad_norm": 1.8531676530838013, "learning_rate": 2.2451035476909156e-05, "loss": 0.8139, "step": 27460 }, { "epoch": 1.3719908101088802, "grad_norm": 1.5809015035629272, "learning_rate": 2.2418308486212352e-05, "loss": 1.1132, "step": 27470 }, { "epoch": 1.3724902607132154, "grad_norm": 0.6896778345108032, "learning_rate": 2.2385598471920065e-05, "loss": 0.769, "step": 27480 }, { "epoch": 1.3729897113175507, "grad_norm": 2.1524176597595215, "learning_rate": 2.235290545416512e-05, "loss": 0.8848, "step": 27490 }, { "epoch": 1.3734891619218859, "grad_norm": 2.9202041625976562, "learning_rate": 2.232022945306993e-05, "loss": 0.8083, "step": 27500 }, { "epoch": 1.373988612526221, "grad_norm": 1.8689171075820923, "learning_rate": 2.2287570488746428e-05, "loss": 0.7616, "step": 27510 }, { "epoch": 1.3744880631305563, "grad_norm": 2.178342819213867, "learning_rate": 2.2254928581296054e-05, "loss": 1.0022, "step": 27520 }, { "epoch": 1.3749875137348917, "grad_norm": 1.7452607154846191, "learning_rate": 2.2222303750809758e-05, "loss": 0.6733, "step": 27530 }, { "epoch": 1.375486964339227, "grad_norm": 0.9586209058761597, "learning_rate": 2.2189696017367968e-05, "loss": 1.0299, "step": 27540 }, { "epoch": 1.375986414943562, "grad_norm": 2.5984480381011963, "learning_rate": 2.2157105401040595e-05, "loss": 0.8884, "step": 27550 }, { "epoch": 1.3764858655478973, "grad_norm": 0.9682304859161377, "learning_rate": 2.2124531921887037e-05, "loss": 0.7093, "step": 27560 }, { "epoch": 1.3769853161522325, "grad_norm": 2.2306740283966064, "learning_rate": 2.209197559995608e-05, "loss": 0.6894, "step": 27570 }, { "epoch": 1.3774847667565677, "grad_norm": 4.199643135070801, "learning_rate": 2.2059436455286002e-05, "loss": 0.9119, "step": 27580 }, { "epoch": 1.377984217360903, "grad_norm": 1.6499110460281372, "learning_rate": 2.2026914507904512e-05, "loss": 0.746, "step": 27590 }, { "epoch": 1.3784836679652384, "grad_norm": 2.6509008407592773, "learning_rate": 2.1994409777828728e-05, "loss": 0.8428, "step": 27600 }, { "epoch": 1.3789831185695736, "grad_norm": 2.3291807174682617, "learning_rate": 2.196192228506511e-05, "loss": 0.7173, "step": 27610 }, { "epoch": 1.3794825691739088, "grad_norm": 2.6399214267730713, "learning_rate": 2.192945204960961e-05, "loss": 0.9121, "step": 27620 }, { "epoch": 1.379982019778244, "grad_norm": 1.5159897804260254, "learning_rate": 2.1896999091447525e-05, "loss": 0.9107, "step": 27630 }, { "epoch": 1.3804814703825792, "grad_norm": 2.087423801422119, "learning_rate": 2.1864563430553443e-05, "loss": 0.8158, "step": 27640 }, { "epoch": 1.3809809209869144, "grad_norm": 1.2869203090667725, "learning_rate": 2.1832145086891398e-05, "loss": 0.9143, "step": 27650 }, { "epoch": 1.3814803715912496, "grad_norm": 2.859027862548828, "learning_rate": 2.1799744080414724e-05, "loss": 0.9133, "step": 27660 }, { "epoch": 1.3819798221955848, "grad_norm": 0.9540075659751892, "learning_rate": 2.176736043106612e-05, "loss": 0.8243, "step": 27670 }, { "epoch": 1.38247927279992, "grad_norm": 1.8289692401885986, "learning_rate": 2.1734994158777523e-05, "loss": 0.8115, "step": 27680 }, { "epoch": 1.3829787234042552, "grad_norm": 1.72902250289917, "learning_rate": 2.1702645283470236e-05, "loss": 0.771, "step": 27690 }, { "epoch": 1.3834781740085906, "grad_norm": 3.6999597549438477, "learning_rate": 2.1670313825054872e-05, "loss": 0.8572, "step": 27700 }, { "epoch": 1.3839776246129258, "grad_norm": 1.7897573709487915, "learning_rate": 2.1637999803431302e-05, "loss": 0.8757, "step": 27710 }, { "epoch": 1.384477075217261, "grad_norm": 3.017788887023926, "learning_rate": 2.1605703238488622e-05, "loss": 0.9728, "step": 27720 }, { "epoch": 1.3849765258215962, "grad_norm": 1.4526762962341309, "learning_rate": 2.157342415010523e-05, "loss": 0.6822, "step": 27730 }, { "epoch": 1.3854759764259315, "grad_norm": 2.897826671600342, "learning_rate": 2.1541162558148765e-05, "loss": 0.7911, "step": 27740 }, { "epoch": 1.3859754270302667, "grad_norm": 1.6730420589447021, "learning_rate": 2.15089184824761e-05, "loss": 0.6569, "step": 27750 }, { "epoch": 1.3864748776346019, "grad_norm": 1.777726173400879, "learning_rate": 2.147669194293328e-05, "loss": 0.895, "step": 27760 }, { "epoch": 1.3869743282389373, "grad_norm": 0.6175350546836853, "learning_rate": 2.144448295935561e-05, "loss": 0.797, "step": 27770 }, { "epoch": 1.3874737788432725, "grad_norm": 1.3161417245864868, "learning_rate": 2.141229155156758e-05, "loss": 0.7415, "step": 27780 }, { "epoch": 1.3879732294476077, "grad_norm": 1.6619001626968384, "learning_rate": 2.1380117739382848e-05, "loss": 0.8998, "step": 27790 }, { "epoch": 1.388472680051943, "grad_norm": 2.7125279903411865, "learning_rate": 2.1347961542604244e-05, "loss": 0.7463, "step": 27800 }, { "epoch": 1.3889721306562781, "grad_norm": 1.7043880224227905, "learning_rate": 2.1315822981023765e-05, "loss": 0.8127, "step": 27810 }, { "epoch": 1.3894715812606133, "grad_norm": 1.8581377267837524, "learning_rate": 2.128370207442254e-05, "loss": 0.751, "step": 27820 }, { "epoch": 1.3899710318649485, "grad_norm": 1.3206275701522827, "learning_rate": 2.125159884257087e-05, "loss": 0.946, "step": 27830 }, { "epoch": 1.3904704824692837, "grad_norm": 1.8693413734436035, "learning_rate": 2.12195133052281e-05, "loss": 0.7617, "step": 27840 }, { "epoch": 1.390969933073619, "grad_norm": 0.605567216873169, "learning_rate": 2.118744548214276e-05, "loss": 0.6335, "step": 27850 }, { "epoch": 1.3914693836779541, "grad_norm": 2.917574644088745, "learning_rate": 2.1155395393052436e-05, "loss": 0.7454, "step": 27860 }, { "epoch": 1.3919688342822893, "grad_norm": 7.583207607269287, "learning_rate": 2.1123363057683808e-05, "loss": 0.8804, "step": 27870 }, { "epoch": 1.3924682848866248, "grad_norm": 2.4581804275512695, "learning_rate": 2.109134849575265e-05, "loss": 0.7783, "step": 27880 }, { "epoch": 1.39296773549096, "grad_norm": 0.5140214562416077, "learning_rate": 2.1059351726963767e-05, "loss": 0.699, "step": 27890 }, { "epoch": 1.3934671860952952, "grad_norm": 4.090611934661865, "learning_rate": 2.1027372771011034e-05, "loss": 0.881, "step": 27900 }, { "epoch": 1.3939666366996304, "grad_norm": 1.7151902914047241, "learning_rate": 2.0995411647577328e-05, "loss": 0.6948, "step": 27910 }, { "epoch": 1.3944660873039656, "grad_norm": 2.7404935359954834, "learning_rate": 2.0963468376334583e-05, "loss": 0.8319, "step": 27920 }, { "epoch": 1.3949655379083008, "grad_norm": 1.2969218492507935, "learning_rate": 2.093154297694374e-05, "loss": 0.6873, "step": 27930 }, { "epoch": 1.3954649885126362, "grad_norm": 1.7877838611602783, "learning_rate": 2.089963546905476e-05, "loss": 0.9042, "step": 27940 }, { "epoch": 1.3959644391169714, "grad_norm": 1.4071444272994995, "learning_rate": 2.08677458723065e-05, "loss": 0.7379, "step": 27950 }, { "epoch": 1.3964638897213066, "grad_norm": 1.950614094734192, "learning_rate": 2.0835874206326923e-05, "loss": 0.8029, "step": 27960 }, { "epoch": 1.3969633403256418, "grad_norm": 1.4258003234863281, "learning_rate": 2.080402049073287e-05, "loss": 0.9281, "step": 27970 }, { "epoch": 1.397462790929977, "grad_norm": 3.0963985919952393, "learning_rate": 2.077218474513018e-05, "loss": 0.9083, "step": 27980 }, { "epoch": 1.3979622415343123, "grad_norm": 2.9463553428649902, "learning_rate": 2.074036698911357e-05, "loss": 0.7052, "step": 27990 }, { "epoch": 1.3984616921386475, "grad_norm": 1.3723751306533813, "learning_rate": 2.070856724226674e-05, "loss": 0.771, "step": 28000 }, { "epoch": 1.3989611427429827, "grad_norm": 2.263819932937622, "learning_rate": 2.067678552416229e-05, "loss": 0.7968, "step": 28010 }, { "epoch": 1.3994605933473179, "grad_norm": 2.6847734451293945, "learning_rate": 2.0645021854361734e-05, "loss": 0.8374, "step": 28020 }, { "epoch": 1.399960043951653, "grad_norm": 2.3462750911712646, "learning_rate": 2.061327625241544e-05, "loss": 0.928, "step": 28030 }, { "epoch": 1.4004594945559883, "grad_norm": 1.5605076551437378, "learning_rate": 2.0581548737862673e-05, "loss": 0.7897, "step": 28040 }, { "epoch": 1.4009589451603237, "grad_norm": 0.8992563486099243, "learning_rate": 2.0549839330231625e-05, "loss": 0.6896, "step": 28050 }, { "epoch": 1.401458395764659, "grad_norm": 2.374424934387207, "learning_rate": 2.0518148049039237e-05, "loss": 0.5774, "step": 28060 }, { "epoch": 1.4019578463689941, "grad_norm": 4.495545864105225, "learning_rate": 2.048647491379137e-05, "loss": 0.7919, "step": 28070 }, { "epoch": 1.4024572969733293, "grad_norm": 1.4818958044052124, "learning_rate": 2.045481994398269e-05, "loss": 0.9154, "step": 28080 }, { "epoch": 1.4029567475776645, "grad_norm": 1.3862239122390747, "learning_rate": 2.042318315909669e-05, "loss": 0.9076, "step": 28090 }, { "epoch": 1.4034561981819997, "grad_norm": 1.2710994482040405, "learning_rate": 2.0391564578605677e-05, "loss": 0.794, "step": 28100 }, { "epoch": 1.4039556487863352, "grad_norm": 1.2284008264541626, "learning_rate": 2.0359964221970706e-05, "loss": 1.0372, "step": 28110 }, { "epoch": 1.4044550993906704, "grad_norm": 1.3174121379852295, "learning_rate": 2.032838210864166e-05, "loss": 0.9405, "step": 28120 }, { "epoch": 1.4049545499950056, "grad_norm": 5.149236679077148, "learning_rate": 2.0296818258057225e-05, "loss": 0.9663, "step": 28130 }, { "epoch": 1.4054540005993408, "grad_norm": 2.028315305709839, "learning_rate": 2.0265272689644767e-05, "loss": 0.68, "step": 28140 }, { "epoch": 1.405953451203676, "grad_norm": 0.9091372489929199, "learning_rate": 2.023374542282045e-05, "loss": 0.7398, "step": 28150 }, { "epoch": 1.4064529018080112, "grad_norm": 4.5730156898498535, "learning_rate": 2.0202236476989155e-05, "loss": 0.8106, "step": 28160 }, { "epoch": 1.4069523524123464, "grad_norm": 3.3818130493164062, "learning_rate": 2.017074587154453e-05, "loss": 0.8392, "step": 28170 }, { "epoch": 1.4074518030166816, "grad_norm": 1.0334668159484863, "learning_rate": 2.0139273625868838e-05, "loss": 0.6978, "step": 28180 }, { "epoch": 1.4079512536210168, "grad_norm": 3.1381216049194336, "learning_rate": 2.010781975933314e-05, "loss": 0.6799, "step": 28190 }, { "epoch": 1.408450704225352, "grad_norm": 2.7070388793945312, "learning_rate": 2.0076384291297134e-05, "loss": 0.8116, "step": 28200 }, { "epoch": 1.4089501548296872, "grad_norm": 2.0675418376922607, "learning_rate": 2.0044967241109214e-05, "loss": 0.654, "step": 28210 }, { "epoch": 1.4094496054340226, "grad_norm": 2.2831497192382812, "learning_rate": 2.0013568628106427e-05, "loss": 0.7788, "step": 28220 }, { "epoch": 1.4099490560383579, "grad_norm": 0.8777928948402405, "learning_rate": 1.9982188471614478e-05, "loss": 0.6852, "step": 28230 }, { "epoch": 1.410448506642693, "grad_norm": 2.0990521907806396, "learning_rate": 1.99508267909477e-05, "loss": 0.8341, "step": 28240 }, { "epoch": 1.4109479572470283, "grad_norm": 1.5556305646896362, "learning_rate": 1.991948360540909e-05, "loss": 0.8365, "step": 28250 }, { "epoch": 1.4114474078513635, "grad_norm": 1.3633089065551758, "learning_rate": 1.9888158934290198e-05, "loss": 0.8509, "step": 28260 }, { "epoch": 1.4119468584556987, "grad_norm": 1.076629877090454, "learning_rate": 1.9856852796871227e-05, "loss": 0.8002, "step": 28270 }, { "epoch": 1.412446309060034, "grad_norm": 2.0934979915618896, "learning_rate": 1.9825565212420972e-05, "loss": 0.8888, "step": 28280 }, { "epoch": 1.4129457596643693, "grad_norm": 1.7095686197280884, "learning_rate": 1.979429620019681e-05, "loss": 0.7881, "step": 28290 }, { "epoch": 1.4134452102687045, "grad_norm": 1.9121060371398926, "learning_rate": 1.9763045779444617e-05, "loss": 0.6701, "step": 28300 }, { "epoch": 1.4139446608730397, "grad_norm": 1.45902681350708, "learning_rate": 1.9731813969398937e-05, "loss": 0.7469, "step": 28310 }, { "epoch": 1.414444111477375, "grad_norm": 2.207597255706787, "learning_rate": 1.9700600789282813e-05, "loss": 0.9726, "step": 28320 }, { "epoch": 1.4149435620817101, "grad_norm": 2.6745400428771973, "learning_rate": 1.966940625830777e-05, "loss": 0.8956, "step": 28330 }, { "epoch": 1.4154430126860453, "grad_norm": 2.2539052963256836, "learning_rate": 1.9638230395673928e-05, "loss": 0.7509, "step": 28340 }, { "epoch": 1.4159424632903805, "grad_norm": 0.9813952445983887, "learning_rate": 1.960707322056987e-05, "loss": 0.6432, "step": 28350 }, { "epoch": 1.4164419138947157, "grad_norm": 2.062953472137451, "learning_rate": 1.9575934752172714e-05, "loss": 0.9818, "step": 28360 }, { "epoch": 1.416941364499051, "grad_norm": 1.6751025915145874, "learning_rate": 1.9544815009648048e-05, "loss": 0.9827, "step": 28370 }, { "epoch": 1.4174408151033862, "grad_norm": 1.202103853225708, "learning_rate": 1.9513714012149882e-05, "loss": 0.6203, "step": 28380 }, { "epoch": 1.4179402657077216, "grad_norm": 2.6885855197906494, "learning_rate": 1.9482631778820788e-05, "loss": 0.8036, "step": 28390 }, { "epoch": 1.4184397163120568, "grad_norm": 1.7237088680267334, "learning_rate": 1.945156832879174e-05, "loss": 0.6784, "step": 28400 }, { "epoch": 1.418939166916392, "grad_norm": 2.560153007507324, "learning_rate": 1.942052368118212e-05, "loss": 0.9285, "step": 28410 }, { "epoch": 1.4194386175207272, "grad_norm": 2.7827835083007812, "learning_rate": 1.938949785509977e-05, "loss": 0.8504, "step": 28420 }, { "epoch": 1.4199380681250624, "grad_norm": 1.1426506042480469, "learning_rate": 1.9358490869640948e-05, "loss": 0.7836, "step": 28430 }, { "epoch": 1.4204375187293976, "grad_norm": 1.8969206809997559, "learning_rate": 1.932750274389034e-05, "loss": 0.8636, "step": 28440 }, { "epoch": 1.4209369693337328, "grad_norm": 4.134188175201416, "learning_rate": 1.929653349692095e-05, "loss": 0.8888, "step": 28450 }, { "epoch": 1.4214364199380682, "grad_norm": 3.6038193702697754, "learning_rate": 1.926558314779422e-05, "loss": 0.9347, "step": 28460 }, { "epoch": 1.4219358705424034, "grad_norm": 6.218140125274658, "learning_rate": 1.9234651715559942e-05, "loss": 0.7081, "step": 28470 }, { "epoch": 1.4224353211467387, "grad_norm": 1.4552103281021118, "learning_rate": 1.9203739219256312e-05, "loss": 0.6853, "step": 28480 }, { "epoch": 1.4229347717510739, "grad_norm": 1.0263328552246094, "learning_rate": 1.917284567790979e-05, "loss": 0.648, "step": 28490 }, { "epoch": 1.423434222355409, "grad_norm": 3.2307348251342773, "learning_rate": 1.9141971110535212e-05, "loss": 0.7135, "step": 28500 }, { "epoch": 1.4239336729597443, "grad_norm": 3.1052818298339844, "learning_rate": 1.9111115536135743e-05, "loss": 1.0097, "step": 28510 }, { "epoch": 1.4244331235640795, "grad_norm": 0.38739892840385437, "learning_rate": 1.9080278973702863e-05, "loss": 0.8454, "step": 28520 }, { "epoch": 1.4249325741684147, "grad_norm": 2.029616594314575, "learning_rate": 1.90494614422163e-05, "loss": 1.026, "step": 28530 }, { "epoch": 1.4254320247727499, "grad_norm": 1.4098113775253296, "learning_rate": 1.9018662960644112e-05, "loss": 0.7137, "step": 28540 }, { "epoch": 1.425931475377085, "grad_norm": 1.4638160467147827, "learning_rate": 1.898788354794263e-05, "loss": 0.7584, "step": 28550 }, { "epoch": 1.4264309259814205, "grad_norm": 0.9639886021614075, "learning_rate": 1.8957123223056445e-05, "loss": 0.9064, "step": 28560 }, { "epoch": 1.4269303765857557, "grad_norm": 2.4433507919311523, "learning_rate": 1.892638200491839e-05, "loss": 0.8157, "step": 28570 }, { "epoch": 1.427429827190091, "grad_norm": 1.522930383682251, "learning_rate": 1.8895659912449543e-05, "loss": 0.8241, "step": 28580 }, { "epoch": 1.4279292777944261, "grad_norm": 1.5414764881134033, "learning_rate": 1.8864956964559232e-05, "loss": 0.608, "step": 28590 }, { "epoch": 1.4284287283987613, "grad_norm": 1.267751932144165, "learning_rate": 1.8834273180144944e-05, "loss": 0.644, "step": 28600 }, { "epoch": 1.4289281790030965, "grad_norm": 3.442837715148926, "learning_rate": 1.880360857809243e-05, "loss": 1.1303, "step": 28610 }, { "epoch": 1.4294276296074317, "grad_norm": 2.341076135635376, "learning_rate": 1.87729631772756e-05, "loss": 0.8173, "step": 28620 }, { "epoch": 1.4299270802117672, "grad_norm": 1.5916999578475952, "learning_rate": 1.8742336996556563e-05, "loss": 0.7439, "step": 28630 }, { "epoch": 1.4304265308161024, "grad_norm": 1.885399341583252, "learning_rate": 1.8711730054785598e-05, "loss": 0.8916, "step": 28640 }, { "epoch": 1.4309259814204376, "grad_norm": 1.9260399341583252, "learning_rate": 1.8681142370801136e-05, "loss": 0.7211, "step": 28650 }, { "epoch": 1.4314254320247728, "grad_norm": 3.0995867252349854, "learning_rate": 1.8650573963429745e-05, "loss": 0.6949, "step": 28660 }, { "epoch": 1.431924882629108, "grad_norm": 1.6041392087936401, "learning_rate": 1.8620024851486172e-05, "loss": 0.8692, "step": 28670 }, { "epoch": 1.4324243332334432, "grad_norm": 1.8370980024337769, "learning_rate": 1.858949505377321e-05, "loss": 0.7496, "step": 28680 }, { "epoch": 1.4329237838377784, "grad_norm": 1.3790216445922852, "learning_rate": 1.855898458908183e-05, "loss": 0.7764, "step": 28690 }, { "epoch": 1.4334232344421136, "grad_norm": 1.209531307220459, "learning_rate": 1.8528493476191082e-05, "loss": 0.9801, "step": 28700 }, { "epoch": 1.4339226850464488, "grad_norm": 3.863590955734253, "learning_rate": 1.8498021733868125e-05, "loss": 0.8023, "step": 28710 }, { "epoch": 1.434422135650784, "grad_norm": 1.5565155744552612, "learning_rate": 1.8467569380868122e-05, "loss": 0.7388, "step": 28720 }, { "epoch": 1.4349215862551192, "grad_norm": 2.479034900665283, "learning_rate": 1.8437136435934416e-05, "loss": 0.858, "step": 28730 }, { "epoch": 1.4354210368594547, "grad_norm": 5.617199897766113, "learning_rate": 1.8406722917798313e-05, "loss": 0.9907, "step": 28740 }, { "epoch": 1.4359204874637899, "grad_norm": 4.307305812835693, "learning_rate": 1.8376328845179225e-05, "loss": 0.7558, "step": 28750 }, { "epoch": 1.436419938068125, "grad_norm": 2.7041280269622803, "learning_rate": 1.8345954236784524e-05, "loss": 0.864, "step": 28760 }, { "epoch": 1.4369193886724603, "grad_norm": 1.1041229963302612, "learning_rate": 1.8315599111309655e-05, "loss": 0.7418, "step": 28770 }, { "epoch": 1.4374188392767955, "grad_norm": 2.7768383026123047, "learning_rate": 1.828526348743807e-05, "loss": 0.7256, "step": 28780 }, { "epoch": 1.4379182898811307, "grad_norm": 3.146407127380371, "learning_rate": 1.8254947383841215e-05, "loss": 0.9051, "step": 28790 }, { "epoch": 1.4384177404854661, "grad_norm": 1.5446263551712036, "learning_rate": 1.8224650819178474e-05, "loss": 0.6931, "step": 28800 }, { "epoch": 1.4389171910898013, "grad_norm": 2.5284624099731445, "learning_rate": 1.8194373812097256e-05, "loss": 0.8047, "step": 28810 }, { "epoch": 1.4394166416941365, "grad_norm": 3.3939034938812256, "learning_rate": 1.816411638123296e-05, "loss": 0.8334, "step": 28820 }, { "epoch": 1.4399160922984717, "grad_norm": 1.5703613758087158, "learning_rate": 1.813387854520885e-05, "loss": 0.8995, "step": 28830 }, { "epoch": 1.440415542902807, "grad_norm": 1.469664454460144, "learning_rate": 1.810366032263619e-05, "loss": 0.7527, "step": 28840 }, { "epoch": 1.4409149935071421, "grad_norm": 1.1113535165786743, "learning_rate": 1.807346173211416e-05, "loss": 0.6603, "step": 28850 }, { "epoch": 1.4414144441114773, "grad_norm": 2.9713151454925537, "learning_rate": 1.8043282792229866e-05, "loss": 0.8782, "step": 28860 }, { "epoch": 1.4419138947158125, "grad_norm": 1.772800087928772, "learning_rate": 1.801312352155828e-05, "loss": 0.8576, "step": 28870 }, { "epoch": 1.4424133453201478, "grad_norm": 1.4547010660171509, "learning_rate": 1.7982983938662306e-05, "loss": 0.7491, "step": 28880 }, { "epoch": 1.442912795924483, "grad_norm": 1.5173430442810059, "learning_rate": 1.7952864062092723e-05, "loss": 0.7665, "step": 28890 }, { "epoch": 1.4434122465288182, "grad_norm": 2.2784523963928223, "learning_rate": 1.792276391038817e-05, "loss": 0.8184, "step": 28900 }, { "epoch": 1.4439116971331536, "grad_norm": 1.6444159746170044, "learning_rate": 1.7892683502075157e-05, "loss": 0.9729, "step": 28910 }, { "epoch": 1.4444111477374888, "grad_norm": 1.7428510189056396, "learning_rate": 1.786262285566804e-05, "loss": 1.0095, "step": 28920 }, { "epoch": 1.444910598341824, "grad_norm": 1.1461379528045654, "learning_rate": 1.7832581989669e-05, "loss": 0.7286, "step": 28930 }, { "epoch": 1.4454100489461592, "grad_norm": 2.907576322555542, "learning_rate": 1.7802560922568078e-05, "loss": 0.9489, "step": 28940 }, { "epoch": 1.4459094995504944, "grad_norm": 2.1478140354156494, "learning_rate": 1.777255967284307e-05, "loss": 0.9163, "step": 28950 }, { "epoch": 1.4464089501548296, "grad_norm": 1.46336030960083, "learning_rate": 1.7742578258959614e-05, "loss": 0.8653, "step": 28960 }, { "epoch": 1.446908400759165, "grad_norm": 4.735803127288818, "learning_rate": 1.771261669937114e-05, "loss": 0.8468, "step": 28970 }, { "epoch": 1.4474078513635003, "grad_norm": 2.8550615310668945, "learning_rate": 1.768267501251885e-05, "loss": 0.921, "step": 28980 }, { "epoch": 1.4479073019678355, "grad_norm": 1.8415980339050293, "learning_rate": 1.765275321683172e-05, "loss": 0.7601, "step": 28990 }, { "epoch": 1.4484067525721707, "grad_norm": 1.7350083589553833, "learning_rate": 1.7622851330726463e-05, "loss": 0.9346, "step": 29000 }, { "epoch": 1.4489062031765059, "grad_norm": 2.4892044067382812, "learning_rate": 1.7592969372607565e-05, "loss": 0.816, "step": 29010 }, { "epoch": 1.449405653780841, "grad_norm": 2.418515205383301, "learning_rate": 1.7563107360867244e-05, "loss": 1.0204, "step": 29020 }, { "epoch": 1.4499051043851763, "grad_norm": 2.5340194702148438, "learning_rate": 1.7533265313885404e-05, "loss": 1.0798, "step": 29030 }, { "epoch": 1.4504045549895115, "grad_norm": 1.373233675956726, "learning_rate": 1.75034432500297e-05, "loss": 0.818, "step": 29040 }, { "epoch": 1.4509040055938467, "grad_norm": 1.2390849590301514, "learning_rate": 1.7473641187655478e-05, "loss": 0.7464, "step": 29050 }, { "epoch": 1.451403456198182, "grad_norm": 1.6846845149993896, "learning_rate": 1.7443859145105785e-05, "loss": 0.7785, "step": 29060 }, { "epoch": 1.451902906802517, "grad_norm": 1.7043160200119019, "learning_rate": 1.7414097140711276e-05, "loss": 0.9006, "step": 29070 }, { "epoch": 1.4524023574068525, "grad_norm": 2.3897504806518555, "learning_rate": 1.7384355192790397e-05, "loss": 0.6827, "step": 29080 }, { "epoch": 1.4529018080111877, "grad_norm": 1.457085371017456, "learning_rate": 1.7354633319649162e-05, "loss": 1.1355, "step": 29090 }, { "epoch": 1.453401258615523, "grad_norm": 2.2086880207061768, "learning_rate": 1.732493153958122e-05, "loss": 0.7172, "step": 29100 }, { "epoch": 1.4539007092198581, "grad_norm": 2.0934090614318848, "learning_rate": 1.7295249870867896e-05, "loss": 0.7152, "step": 29110 }, { "epoch": 1.4544001598241934, "grad_norm": 2.264425039291382, "learning_rate": 1.726558833177812e-05, "loss": 0.7377, "step": 29120 }, { "epoch": 1.4548996104285286, "grad_norm": 3.516007661819458, "learning_rate": 1.7235946940568452e-05, "loss": 0.7442, "step": 29130 }, { "epoch": 1.455399061032864, "grad_norm": 1.9093573093414307, "learning_rate": 1.7206325715483e-05, "loss": 0.7429, "step": 29140 }, { "epoch": 1.4558985116371992, "grad_norm": 1.1381827592849731, "learning_rate": 1.7176724674753497e-05, "loss": 0.8306, "step": 29150 }, { "epoch": 1.4563979622415344, "grad_norm": 1.3810150623321533, "learning_rate": 1.7147143836599268e-05, "loss": 0.9741, "step": 29160 }, { "epoch": 1.4568974128458696, "grad_norm": 4.208400249481201, "learning_rate": 1.711758321922719e-05, "loss": 0.892, "step": 29170 }, { "epoch": 1.4573968634502048, "grad_norm": 1.5972979068756104, "learning_rate": 1.7088042840831654e-05, "loss": 0.79, "step": 29180 }, { "epoch": 1.45789631405454, "grad_norm": 1.7069154977798462, "learning_rate": 1.7058522719594644e-05, "loss": 0.8388, "step": 29190 }, { "epoch": 1.4583957646588752, "grad_norm": 2.9115262031555176, "learning_rate": 1.7029022873685658e-05, "loss": 1.0459, "step": 29200 }, { "epoch": 1.4588952152632104, "grad_norm": 2.905496835708618, "learning_rate": 1.699954332126173e-05, "loss": 0.7959, "step": 29210 }, { "epoch": 1.4593946658675456, "grad_norm": 1.838226556777954, "learning_rate": 1.6970084080467363e-05, "loss": 0.8194, "step": 29220 }, { "epoch": 1.4598941164718808, "grad_norm": 3.2017369270324707, "learning_rate": 1.6940645169434594e-05, "loss": 1.0328, "step": 29230 }, { "epoch": 1.460393567076216, "grad_norm": 2.454606771469116, "learning_rate": 1.6911226606282944e-05, "loss": 0.7741, "step": 29240 }, { "epoch": 1.4608930176805515, "grad_norm": 1.7968065738677979, "learning_rate": 1.6881828409119395e-05, "loss": 0.7797, "step": 29250 }, { "epoch": 1.4613924682848867, "grad_norm": 2.331393241882324, "learning_rate": 1.685245059603841e-05, "loss": 0.8434, "step": 29260 }, { "epoch": 1.4618919188892219, "grad_norm": 1.2128174304962158, "learning_rate": 1.6823093185121896e-05, "loss": 0.8171, "step": 29270 }, { "epoch": 1.462391369493557, "grad_norm": 1.5789635181427002, "learning_rate": 1.6793756194439203e-05, "loss": 0.8425, "step": 29280 }, { "epoch": 1.4628908200978923, "grad_norm": 2.80997371673584, "learning_rate": 1.676443964204713e-05, "loss": 0.7381, "step": 29290 }, { "epoch": 1.4633902707022275, "grad_norm": 1.896931767463684, "learning_rate": 1.673514354598985e-05, "loss": 0.7937, "step": 29300 }, { "epoch": 1.463889721306563, "grad_norm": 1.85825514793396, "learning_rate": 1.6705867924298996e-05, "loss": 0.8281, "step": 29310 }, { "epoch": 1.4643891719108981, "grad_norm": 0.889171302318573, "learning_rate": 1.667661279499357e-05, "loss": 0.968, "step": 29320 }, { "epoch": 1.4648886225152333, "grad_norm": 3.0360727310180664, "learning_rate": 1.6647378176079975e-05, "loss": 0.9017, "step": 29330 }, { "epoch": 1.4653880731195685, "grad_norm": 1.203291893005371, "learning_rate": 1.661816408555198e-05, "loss": 0.8425, "step": 29340 }, { "epoch": 1.4658875237239037, "grad_norm": 1.5826753377914429, "learning_rate": 1.6588970541390732e-05, "loss": 0.7645, "step": 29350 }, { "epoch": 1.466386974328239, "grad_norm": 3.3171403408050537, "learning_rate": 1.6559797561564733e-05, "loss": 0.7069, "step": 29360 }, { "epoch": 1.4668864249325742, "grad_norm": 1.854358434677124, "learning_rate": 1.6530645164029784e-05, "loss": 0.6878, "step": 29370 }, { "epoch": 1.4673858755369094, "grad_norm": 2.115454912185669, "learning_rate": 1.6501513366729078e-05, "loss": 0.7761, "step": 29380 }, { "epoch": 1.4678853261412446, "grad_norm": 2.4859368801116943, "learning_rate": 1.647240218759309e-05, "loss": 0.8014, "step": 29390 }, { "epoch": 1.4683847767455798, "grad_norm": 2.6893231868743896, "learning_rate": 1.6443311644539637e-05, "loss": 0.8345, "step": 29400 }, { "epoch": 1.468884227349915, "grad_norm": 1.4402644634246826, "learning_rate": 1.6414241755473776e-05, "loss": 0.8922, "step": 29410 }, { "epoch": 1.4693836779542504, "grad_norm": 1.2642183303833008, "learning_rate": 1.6385192538287925e-05, "loss": 0.7644, "step": 29420 }, { "epoch": 1.4698831285585856, "grad_norm": 0.3462284505367279, "learning_rate": 1.6356164010861736e-05, "loss": 0.6409, "step": 29430 }, { "epoch": 1.4703825791629208, "grad_norm": 1.0375354290008545, "learning_rate": 1.6327156191062147e-05, "loss": 0.6228, "step": 29440 }, { "epoch": 1.470882029767256, "grad_norm": 1.9272801876068115, "learning_rate": 1.629816909674331e-05, "loss": 0.7567, "step": 29450 }, { "epoch": 1.4713814803715912, "grad_norm": 2.9556493759155273, "learning_rate": 1.626920274574666e-05, "loss": 0.7677, "step": 29460 }, { "epoch": 1.4718809309759264, "grad_norm": 3.0389068126678467, "learning_rate": 1.6240257155900852e-05, "loss": 0.8054, "step": 29470 }, { "epoch": 1.4723803815802616, "grad_norm": 1.4731686115264893, "learning_rate": 1.6211332345021784e-05, "loss": 0.8738, "step": 29480 }, { "epoch": 1.472879832184597, "grad_norm": 1.023686170578003, "learning_rate": 1.6182428330912512e-05, "loss": 0.699, "step": 29490 }, { "epoch": 1.4733792827889323, "grad_norm": 3.6420235633850098, "learning_rate": 1.6153545131363325e-05, "loss": 0.8831, "step": 29500 }, { "epoch": 1.4738787333932675, "grad_norm": 3.7023417949676514, "learning_rate": 1.6124682764151738e-05, "loss": 0.6814, "step": 29510 }, { "epoch": 1.4743781839976027, "grad_norm": 3.581373691558838, "learning_rate": 1.6095841247042365e-05, "loss": 0.6889, "step": 29520 }, { "epoch": 1.4748776346019379, "grad_norm": 3.167379379272461, "learning_rate": 1.6067020597787047e-05, "loss": 0.645, "step": 29530 }, { "epoch": 1.475377085206273, "grad_norm": 0.9794548749923706, "learning_rate": 1.6038220834124758e-05, "loss": 0.8456, "step": 29540 }, { "epoch": 1.4758765358106083, "grad_norm": 2.1338908672332764, "learning_rate": 1.600944197378162e-05, "loss": 1.0714, "step": 29550 }, { "epoch": 1.4763759864149435, "grad_norm": 1.995682954788208, "learning_rate": 1.59806840344709e-05, "loss": 0.8789, "step": 29560 }, { "epoch": 1.4768754370192787, "grad_norm": 1.9024702310562134, "learning_rate": 1.5951947033892957e-05, "loss": 0.8198, "step": 29570 }, { "epoch": 1.477374887623614, "grad_norm": 3.0384185314178467, "learning_rate": 1.5923230989735267e-05, "loss": 0.7691, "step": 29580 }, { "epoch": 1.4778743382279493, "grad_norm": 0.7259630560874939, "learning_rate": 1.5894535919672488e-05, "loss": 0.7616, "step": 29590 }, { "epoch": 1.4783737888322845, "grad_norm": 5.399988174438477, "learning_rate": 1.5865861841366246e-05, "loss": 0.9501, "step": 29600 }, { "epoch": 1.4788732394366197, "grad_norm": 0.9695346355438232, "learning_rate": 1.583720877246533e-05, "loss": 0.827, "step": 29610 }, { "epoch": 1.479372690040955, "grad_norm": 2.568943738937378, "learning_rate": 1.5808576730605556e-05, "loss": 0.9285, "step": 29620 }, { "epoch": 1.4798721406452902, "grad_norm": 4.16342830657959, "learning_rate": 1.5779965733409846e-05, "loss": 0.8241, "step": 29630 }, { "epoch": 1.4803715912496254, "grad_norm": 2.3884239196777344, "learning_rate": 1.5751375798488098e-05, "loss": 0.9992, "step": 29640 }, { "epoch": 1.4808710418539606, "grad_norm": 1.9052380323410034, "learning_rate": 1.5722806943437302e-05, "loss": 0.8298, "step": 29650 }, { "epoch": 1.481370492458296, "grad_norm": 2.739285945892334, "learning_rate": 1.569425918584146e-05, "loss": 0.7727, "step": 29660 }, { "epoch": 1.4818699430626312, "grad_norm": 1.5429344177246094, "learning_rate": 1.5665732543271583e-05, "loss": 0.8095, "step": 29670 }, { "epoch": 1.4823693936669664, "grad_norm": 2.4039535522460938, "learning_rate": 1.5637227033285688e-05, "loss": 0.6982, "step": 29680 }, { "epoch": 1.4828688442713016, "grad_norm": 2.808340072631836, "learning_rate": 1.5608742673428788e-05, "loss": 0.6962, "step": 29690 }, { "epoch": 1.4833682948756368, "grad_norm": 1.9518710374832153, "learning_rate": 1.5580279481232878e-05, "loss": 0.8831, "step": 29700 }, { "epoch": 1.483867745479972, "grad_norm": 2.6959991455078125, "learning_rate": 1.555183747421693e-05, "loss": 0.7846, "step": 29710 }, { "epoch": 1.4843671960843072, "grad_norm": 0.9121154546737671, "learning_rate": 1.5523416669886843e-05, "loss": 0.7071, "step": 29720 }, { "epoch": 1.4848666466886424, "grad_norm": 4.97515869140625, "learning_rate": 1.549501708573551e-05, "loss": 0.8471, "step": 29730 }, { "epoch": 1.4853660972929776, "grad_norm": 1.7935420274734497, "learning_rate": 1.5466638739242732e-05, "loss": 0.8679, "step": 29740 }, { "epoch": 1.4858655478973128, "grad_norm": 1.4937760829925537, "learning_rate": 1.543828164787527e-05, "loss": 0.8786, "step": 29750 }, { "epoch": 1.486364998501648, "grad_norm": 3.030714750289917, "learning_rate": 1.5409945829086768e-05, "loss": 0.8141, "step": 29760 }, { "epoch": 1.4868644491059835, "grad_norm": 1.8423618078231812, "learning_rate": 1.5381631300317795e-05, "loss": 0.7205, "step": 29770 }, { "epoch": 1.4873638997103187, "grad_norm": 2.622493028640747, "learning_rate": 1.535333807899584e-05, "loss": 0.897, "step": 29780 }, { "epoch": 1.4878633503146539, "grad_norm": 0.507530927658081, "learning_rate": 1.5325066182535204e-05, "loss": 0.7389, "step": 29790 }, { "epoch": 1.488362800918989, "grad_norm": 2.889690399169922, "learning_rate": 1.5296815628337135e-05, "loss": 0.9367, "step": 29800 }, { "epoch": 1.4888622515233243, "grad_norm": 1.3797270059585571, "learning_rate": 1.5268586433789716e-05, "loss": 0.7787, "step": 29810 }, { "epoch": 1.4893617021276595, "grad_norm": 1.3913246393203735, "learning_rate": 1.5240378616267886e-05, "loss": 0.6306, "step": 29820 }, { "epoch": 1.489861152731995, "grad_norm": 1.9883455038070679, "learning_rate": 1.5212192193133446e-05, "loss": 0.8073, "step": 29830 }, { "epoch": 1.4903606033363301, "grad_norm": 0.7126007080078125, "learning_rate": 1.518402718173496e-05, "loss": 0.6368, "step": 29840 }, { "epoch": 1.4908600539406653, "grad_norm": 1.695792317390442, "learning_rate": 1.5155883599407917e-05, "loss": 1.0107, "step": 29850 }, { "epoch": 1.4913595045450005, "grad_norm": 2.113374948501587, "learning_rate": 1.5127761463474554e-05, "loss": 0.7613, "step": 29860 }, { "epoch": 1.4918589551493358, "grad_norm": 2.3648486137390137, "learning_rate": 1.5099660791243886e-05, "loss": 0.8281, "step": 29870 }, { "epoch": 1.492358405753671, "grad_norm": 3.0216188430786133, "learning_rate": 1.507158160001177e-05, "loss": 0.9265, "step": 29880 }, { "epoch": 1.4928578563580062, "grad_norm": 3.645329713821411, "learning_rate": 1.5043523907060807e-05, "loss": 0.8626, "step": 29890 }, { "epoch": 1.4933573069623414, "grad_norm": 0.8662146925926208, "learning_rate": 1.501548772966041e-05, "loss": 0.5897, "step": 29900 }, { "epoch": 1.4938567575666766, "grad_norm": 2.579042434692383, "learning_rate": 1.4987473085066678e-05, "loss": 0.7507, "step": 29910 }, { "epoch": 1.4943562081710118, "grad_norm": 1.6398191452026367, "learning_rate": 1.4959479990522508e-05, "loss": 0.9949, "step": 29920 }, { "epoch": 1.494855658775347, "grad_norm": 1.2803360223770142, "learning_rate": 1.4931508463257526e-05, "loss": 0.6838, "step": 29930 }, { "epoch": 1.4953551093796824, "grad_norm": 2.342067241668701, "learning_rate": 1.4903558520488081e-05, "loss": 0.71, "step": 29940 }, { "epoch": 1.4958545599840176, "grad_norm": 2.7188761234283447, "learning_rate": 1.4875630179417227e-05, "loss": 0.7602, "step": 29950 }, { "epoch": 1.4963540105883528, "grad_norm": 1.5564262866973877, "learning_rate": 1.4847723457234736e-05, "loss": 1.1034, "step": 29960 }, { "epoch": 1.496853461192688, "grad_norm": 1.265726089477539, "learning_rate": 1.4819838371117068e-05, "loss": 0.7586, "step": 29970 }, { "epoch": 1.4973529117970232, "grad_norm": 2.5439975261688232, "learning_rate": 1.4791974938227371e-05, "loss": 0.6913, "step": 29980 }, { "epoch": 1.4978523624013584, "grad_norm": 1.1225391626358032, "learning_rate": 1.4764133175715445e-05, "loss": 0.7251, "step": 29990 }, { "epoch": 1.4983518130056939, "grad_norm": 1.454066514968872, "learning_rate": 1.4736313100717764e-05, "loss": 0.7131, "step": 30000 }, { "epoch": 1.498851263610029, "grad_norm": 0.9774606823921204, "learning_rate": 1.4708514730357476e-05, "loss": 0.7209, "step": 30010 }, { "epoch": 1.4993507142143643, "grad_norm": 4.219155788421631, "learning_rate": 1.4680738081744333e-05, "loss": 0.8506, "step": 30020 }, { "epoch": 1.4998501648186995, "grad_norm": 1.6903527975082397, "learning_rate": 1.4652983171974743e-05, "loss": 1.0318, "step": 30030 }, { "epoch": 1.5003496154230347, "grad_norm": 2.2009520530700684, "learning_rate": 1.4625250018131732e-05, "loss": 0.7863, "step": 30040 }, { "epoch": 1.50084906602737, "grad_norm": 2.234261989593506, "learning_rate": 1.4597538637284929e-05, "loss": 0.9799, "step": 30050 }, { "epoch": 1.501348516631705, "grad_norm": 1.8039568662643433, "learning_rate": 1.4569849046490541e-05, "loss": 0.9087, "step": 30060 }, { "epoch": 1.5018479672360403, "grad_norm": 1.8494824171066284, "learning_rate": 1.4542181262791398e-05, "loss": 0.693, "step": 30070 }, { "epoch": 1.5023474178403755, "grad_norm": 2.5761897563934326, "learning_rate": 1.4514535303216892e-05, "loss": 0.9996, "step": 30080 }, { "epoch": 1.5028468684447107, "grad_norm": 1.5435025691986084, "learning_rate": 1.4486911184782987e-05, "loss": 0.812, "step": 30090 }, { "epoch": 1.503346319049046, "grad_norm": 3.536646604537964, "learning_rate": 1.4459308924492199e-05, "loss": 0.9886, "step": 30100 }, { "epoch": 1.5038457696533811, "grad_norm": 2.4818308353424072, "learning_rate": 1.4431728539333595e-05, "loss": 0.8595, "step": 30110 }, { "epoch": 1.5043452202577166, "grad_norm": 1.7154827117919922, "learning_rate": 1.4404170046282766e-05, "loss": 0.7771, "step": 30120 }, { "epoch": 1.5048446708620518, "grad_norm": 2.278930187225342, "learning_rate": 1.437663346230187e-05, "loss": 0.8331, "step": 30130 }, { "epoch": 1.505344121466387, "grad_norm": 1.2678732872009277, "learning_rate": 1.43491188043395e-05, "loss": 0.8235, "step": 30140 }, { "epoch": 1.5058435720707222, "grad_norm": 2.7540650367736816, "learning_rate": 1.432162608933083e-05, "loss": 0.7712, "step": 30150 }, { "epoch": 1.5063430226750576, "grad_norm": 5.757235050201416, "learning_rate": 1.429415533419749e-05, "loss": 0.8346, "step": 30160 }, { "epoch": 1.5068424732793928, "grad_norm": 1.6434779167175293, "learning_rate": 1.4266706555847631e-05, "loss": 0.8487, "step": 30170 }, { "epoch": 1.507341923883728, "grad_norm": 2.494244337081909, "learning_rate": 1.423927977117579e-05, "loss": 0.7456, "step": 30180 }, { "epoch": 1.5078413744880632, "grad_norm": 1.7851744890213013, "learning_rate": 1.4211874997063085e-05, "loss": 0.9274, "step": 30190 }, { "epoch": 1.5083408250923984, "grad_norm": 3.2342989444732666, "learning_rate": 1.4184492250377018e-05, "loss": 0.8859, "step": 30200 }, { "epoch": 1.5088402756967336, "grad_norm": 2.3857553005218506, "learning_rate": 1.4157131547971524e-05, "loss": 0.857, "step": 30210 }, { "epoch": 1.5093397263010688, "grad_norm": 1.7428107261657715, "learning_rate": 1.4129792906686996e-05, "loss": 1.0446, "step": 30220 }, { "epoch": 1.509839176905404, "grad_norm": 1.3851697444915771, "learning_rate": 1.4102476343350247e-05, "loss": 0.7341, "step": 30230 }, { "epoch": 1.5103386275097392, "grad_norm": 2.130681037902832, "learning_rate": 1.4075181874774496e-05, "loss": 0.6779, "step": 30240 }, { "epoch": 1.5108380781140744, "grad_norm": 1.0010192394256592, "learning_rate": 1.4047909517759378e-05, "loss": 0.7917, "step": 30250 }, { "epoch": 1.5113375287184097, "grad_norm": 1.142500638961792, "learning_rate": 1.402065928909087e-05, "loss": 0.7656, "step": 30260 }, { "epoch": 1.5118369793227449, "grad_norm": 3.9105794429779053, "learning_rate": 1.3993431205541368e-05, "loss": 0.9728, "step": 30270 }, { "epoch": 1.51233642992708, "grad_norm": 2.1779394149780273, "learning_rate": 1.3966225283869672e-05, "loss": 0.9498, "step": 30280 }, { "epoch": 1.5128358805314155, "grad_norm": 2.3318753242492676, "learning_rate": 1.3939041540820862e-05, "loss": 1.0175, "step": 30290 }, { "epoch": 1.5133353311357507, "grad_norm": 2.814800500869751, "learning_rate": 1.3911879993126419e-05, "loss": 0.78, "step": 30300 }, { "epoch": 1.513834781740086, "grad_norm": 1.6510111093521118, "learning_rate": 1.3884740657504152e-05, "loss": 0.8457, "step": 30310 }, { "epoch": 1.514334232344421, "grad_norm": 2.627974271774292, "learning_rate": 1.3857623550658217e-05, "loss": 0.6787, "step": 30320 }, { "epoch": 1.5148336829487565, "grad_norm": 1.6362733840942383, "learning_rate": 1.3830528689279026e-05, "loss": 0.8481, "step": 30330 }, { "epoch": 1.5153331335530917, "grad_norm": 3.4498698711395264, "learning_rate": 1.380345609004337e-05, "loss": 0.87, "step": 30340 }, { "epoch": 1.515832584157427, "grad_norm": 3.157440662384033, "learning_rate": 1.3776405769614303e-05, "loss": 1.0211, "step": 30350 }, { "epoch": 1.5163320347617621, "grad_norm": 1.6975563764572144, "learning_rate": 1.3749377744641173e-05, "loss": 0.691, "step": 30360 }, { "epoch": 1.5168314853660974, "grad_norm": 0.8293794393539429, "learning_rate": 1.3722372031759612e-05, "loss": 0.9521, "step": 30370 }, { "epoch": 1.5173309359704326, "grad_norm": 2.5215940475463867, "learning_rate": 1.3695388647591501e-05, "loss": 0.7159, "step": 30380 }, { "epoch": 1.5178303865747678, "grad_norm": 1.5681865215301514, "learning_rate": 1.3668427608745e-05, "loss": 0.8352, "step": 30390 }, { "epoch": 1.518329837179103, "grad_norm": 1.2905075550079346, "learning_rate": 1.3641488931814506e-05, "loss": 0.755, "step": 30400 }, { "epoch": 1.5188292877834382, "grad_norm": 1.81891930103302, "learning_rate": 1.3614572633380635e-05, "loss": 0.7673, "step": 30410 }, { "epoch": 1.5193287383877734, "grad_norm": 1.5387520790100098, "learning_rate": 1.3587678730010255e-05, "loss": 0.9249, "step": 30420 }, { "epoch": 1.5198281889921086, "grad_norm": 2.308189630508423, "learning_rate": 1.356080723825644e-05, "loss": 0.7586, "step": 30430 }, { "epoch": 1.5203276395964438, "grad_norm": 2.3123340606689453, "learning_rate": 1.3533958174658468e-05, "loss": 0.7297, "step": 30440 }, { "epoch": 1.520827090200779, "grad_norm": 3.070478677749634, "learning_rate": 1.3507131555741815e-05, "loss": 0.7546, "step": 30450 }, { "epoch": 1.5213265408051144, "grad_norm": 2.7860846519470215, "learning_rate": 1.3480327398018145e-05, "loss": 0.8104, "step": 30460 }, { "epoch": 1.5218259914094496, "grad_norm": 2.0408151149749756, "learning_rate": 1.3453545717985305e-05, "loss": 0.7324, "step": 30470 }, { "epoch": 1.5223254420137848, "grad_norm": 3.6001429557800293, "learning_rate": 1.3426786532127272e-05, "loss": 0.8593, "step": 30480 }, { "epoch": 1.52282489261812, "grad_norm": 4.266958713531494, "learning_rate": 1.340004985691421e-05, "loss": 0.8489, "step": 30490 }, { "epoch": 1.5233243432224552, "grad_norm": 1.4965885877609253, "learning_rate": 1.337333570880242e-05, "loss": 0.6866, "step": 30500 }, { "epoch": 1.5238237938267907, "grad_norm": 1.1413147449493408, "learning_rate": 1.334664410423434e-05, "loss": 0.677, "step": 30510 }, { "epoch": 1.5243232444311259, "grad_norm": 1.9351661205291748, "learning_rate": 1.331997505963855e-05, "loss": 0.8049, "step": 30520 }, { "epoch": 1.524822695035461, "grad_norm": 1.4408106803894043, "learning_rate": 1.3293328591429671e-05, "loss": 0.7478, "step": 30530 }, { "epoch": 1.5253221456397963, "grad_norm": 2.2334883213043213, "learning_rate": 1.3266704716008531e-05, "loss": 0.6535, "step": 30540 }, { "epoch": 1.5258215962441315, "grad_norm": 2.196227550506592, "learning_rate": 1.3240103449762003e-05, "loss": 0.6127, "step": 30550 }, { "epoch": 1.5263210468484667, "grad_norm": 2.4032485485076904, "learning_rate": 1.3213524809063015e-05, "loss": 0.7032, "step": 30560 }, { "epoch": 1.526820497452802, "grad_norm": 1.167373538017273, "learning_rate": 1.3186968810270611e-05, "loss": 0.858, "step": 30570 }, { "epoch": 1.5273199480571371, "grad_norm": 1.784424066543579, "learning_rate": 1.3160435469729893e-05, "loss": 0.7, "step": 30580 }, { "epoch": 1.5278193986614723, "grad_norm": 2.5436196327209473, "learning_rate": 1.3133924803772014e-05, "loss": 0.8007, "step": 30590 }, { "epoch": 1.5283188492658075, "grad_norm": 2.082775831222534, "learning_rate": 1.3107436828714143e-05, "loss": 0.65, "step": 30600 }, { "epoch": 1.5288182998701427, "grad_norm": 1.702917456626892, "learning_rate": 1.3080971560859506e-05, "loss": 0.7432, "step": 30610 }, { "epoch": 1.529317750474478, "grad_norm": 1.8651313781738281, "learning_rate": 1.305452901649739e-05, "loss": 0.7946, "step": 30620 }, { "epoch": 1.5298172010788134, "grad_norm": 2.095562219619751, "learning_rate": 1.302810921190305e-05, "loss": 0.8329, "step": 30630 }, { "epoch": 1.5303166516831486, "grad_norm": 1.1025739908218384, "learning_rate": 1.3001712163337731e-05, "loss": 0.721, "step": 30640 }, { "epoch": 1.5308161022874838, "grad_norm": 1.3714433908462524, "learning_rate": 1.297533788704871e-05, "loss": 0.8486, "step": 30650 }, { "epoch": 1.531315552891819, "grad_norm": 1.8379263877868652, "learning_rate": 1.2948986399269225e-05, "loss": 0.8305, "step": 30660 }, { "epoch": 1.5318150034961542, "grad_norm": 1.5231393575668335, "learning_rate": 1.2922657716218522e-05, "loss": 0.6148, "step": 30670 }, { "epoch": 1.5323144541004896, "grad_norm": 1.9530363082885742, "learning_rate": 1.289635185410175e-05, "loss": 0.9022, "step": 30680 }, { "epoch": 1.5328139047048248, "grad_norm": 2.157413959503174, "learning_rate": 1.287006882911006e-05, "loss": 0.6977, "step": 30690 }, { "epoch": 1.53331335530916, "grad_norm": 4.529205799102783, "learning_rate": 1.284380865742053e-05, "loss": 0.9356, "step": 30700 }, { "epoch": 1.5338128059134952, "grad_norm": 2.5316667556762695, "learning_rate": 1.2817571355196183e-05, "loss": 0.9604, "step": 30710 }, { "epoch": 1.5343122565178304, "grad_norm": 2.0367202758789062, "learning_rate": 1.2791356938585952e-05, "loss": 0.7963, "step": 30720 }, { "epoch": 1.5348117071221656, "grad_norm": 1.6521406173706055, "learning_rate": 1.2765165423724695e-05, "loss": 0.6794, "step": 30730 }, { "epoch": 1.5353111577265008, "grad_norm": 2.469229221343994, "learning_rate": 1.273899682673318e-05, "loss": 0.7851, "step": 30740 }, { "epoch": 1.535810608330836, "grad_norm": 2.2572221755981445, "learning_rate": 1.2712851163718032e-05, "loss": 1.0097, "step": 30750 }, { "epoch": 1.5363100589351713, "grad_norm": 1.5922163724899292, "learning_rate": 1.2686728450771795e-05, "loss": 0.7176, "step": 30760 }, { "epoch": 1.5368095095395065, "grad_norm": 1.7964224815368652, "learning_rate": 1.2660628703972888e-05, "loss": 1.0273, "step": 30770 }, { "epoch": 1.5373089601438417, "grad_norm": 2.392090320587158, "learning_rate": 1.2634551939385575e-05, "loss": 0.703, "step": 30780 }, { "epoch": 1.5378084107481769, "grad_norm": 2.5064966678619385, "learning_rate": 1.2608498173059996e-05, "loss": 0.8022, "step": 30790 }, { "epoch": 1.538307861352512, "grad_norm": 2.706282615661621, "learning_rate": 1.2582467421032112e-05, "loss": 1.0197, "step": 30800 }, { "epoch": 1.5388073119568475, "grad_norm": 0.9064802527427673, "learning_rate": 1.2556459699323742e-05, "loss": 0.8854, "step": 30810 }, { "epoch": 1.5393067625611827, "grad_norm": 3.5567431449890137, "learning_rate": 1.2530475023942534e-05, "loss": 0.8852, "step": 30820 }, { "epoch": 1.539806213165518, "grad_norm": 1.3355212211608887, "learning_rate": 1.2504513410881896e-05, "loss": 0.8235, "step": 30830 }, { "epoch": 1.5403056637698531, "grad_norm": 1.4792804718017578, "learning_rate": 1.2478574876121107e-05, "loss": 0.8175, "step": 30840 }, { "epoch": 1.5408051143741885, "grad_norm": 1.6876802444458008, "learning_rate": 1.2452659435625214e-05, "loss": 0.7032, "step": 30850 }, { "epoch": 1.5413045649785238, "grad_norm": 5.1803364753723145, "learning_rate": 1.2426767105345067e-05, "loss": 0.9774, "step": 30860 }, { "epoch": 1.541804015582859, "grad_norm": 1.3539265394210815, "learning_rate": 1.2400897901217223e-05, "loss": 1.0029, "step": 30870 }, { "epoch": 1.5423034661871942, "grad_norm": 2.3148884773254395, "learning_rate": 1.2375051839164115e-05, "loss": 0.8473, "step": 30880 }, { "epoch": 1.5428029167915294, "grad_norm": 2.019747257232666, "learning_rate": 1.2349228935093854e-05, "loss": 0.7623, "step": 30890 }, { "epoch": 1.5433023673958646, "grad_norm": 1.3296257257461548, "learning_rate": 1.2323429204900339e-05, "loss": 0.8477, "step": 30900 }, { "epoch": 1.5438018180001998, "grad_norm": 2.756925582885742, "learning_rate": 1.2297652664463143e-05, "loss": 0.6336, "step": 30910 }, { "epoch": 1.544301268604535, "grad_norm": 4.278036594390869, "learning_rate": 1.2271899329647641e-05, "loss": 0.9313, "step": 30920 }, { "epoch": 1.5448007192088702, "grad_norm": 1.8495391607284546, "learning_rate": 1.2246169216304881e-05, "loss": 0.9615, "step": 30930 }, { "epoch": 1.5453001698132054, "grad_norm": 1.3167756795883179, "learning_rate": 1.222046234027165e-05, "loss": 0.7611, "step": 30940 }, { "epoch": 1.5457996204175406, "grad_norm": 1.765541434288025, "learning_rate": 1.2194778717370376e-05, "loss": 0.833, "step": 30950 }, { "epoch": 1.5462990710218758, "grad_norm": 2.200493335723877, "learning_rate": 1.2169118363409209e-05, "loss": 0.9027, "step": 30960 }, { "epoch": 1.546798521626211, "grad_norm": 1.4009946584701538, "learning_rate": 1.2143481294182035e-05, "loss": 0.7839, "step": 30970 }, { "epoch": 1.5472979722305464, "grad_norm": 0.8973079323768616, "learning_rate": 1.2117867525468308e-05, "loss": 0.7668, "step": 30980 }, { "epoch": 1.5477974228348816, "grad_norm": 1.9180610179901123, "learning_rate": 1.2092277073033186e-05, "loss": 0.8643, "step": 30990 }, { "epoch": 1.5482968734392168, "grad_norm": 2.4540059566497803, "learning_rate": 1.2066709952627487e-05, "loss": 0.7176, "step": 31000 }, { "epoch": 1.548796324043552, "grad_norm": 5.1701436042785645, "learning_rate": 1.2041166179987662e-05, "loss": 1.033, "step": 31010 }, { "epoch": 1.5492957746478875, "grad_norm": 2.318596601486206, "learning_rate": 1.2015645770835764e-05, "loss": 0.9198, "step": 31020 }, { "epoch": 1.5497952252522227, "grad_norm": 3.1683437824249268, "learning_rate": 1.1990148740879497e-05, "loss": 0.7606, "step": 31030 }, { "epoch": 1.550294675856558, "grad_norm": 1.9257067441940308, "learning_rate": 1.1964675105812156e-05, "loss": 0.8831, "step": 31040 }, { "epoch": 1.550794126460893, "grad_norm": 3.900425434112549, "learning_rate": 1.1939224881312689e-05, "loss": 0.9795, "step": 31050 }, { "epoch": 1.5512935770652283, "grad_norm": 1.3262271881103516, "learning_rate": 1.1913798083045547e-05, "loss": 0.697, "step": 31060 }, { "epoch": 1.5517930276695635, "grad_norm": 1.9045617580413818, "learning_rate": 1.1888394726660824e-05, "loss": 0.9252, "step": 31070 }, { "epoch": 1.5522924782738987, "grad_norm": 1.2037007808685303, "learning_rate": 1.1863014827794167e-05, "loss": 0.8139, "step": 31080 }, { "epoch": 1.552791928878234, "grad_norm": 1.404613971710205, "learning_rate": 1.1837658402066798e-05, "loss": 0.7385, "step": 31090 }, { "epoch": 1.5532913794825691, "grad_norm": 2.0043561458587646, "learning_rate": 1.1812325465085461e-05, "loss": 1.0024, "step": 31100 }, { "epoch": 1.5537908300869043, "grad_norm": 2.3613462448120117, "learning_rate": 1.1787016032442471e-05, "loss": 0.7806, "step": 31110 }, { "epoch": 1.5542902806912395, "grad_norm": 1.9408031702041626, "learning_rate": 1.176173011971567e-05, "loss": 0.6944, "step": 31120 }, { "epoch": 1.5547897312955747, "grad_norm": 1.1201269626617432, "learning_rate": 1.173646774246842e-05, "loss": 0.8308, "step": 31130 }, { "epoch": 1.55528918189991, "grad_norm": 1.7780576944351196, "learning_rate": 1.17112289162496e-05, "loss": 0.816, "step": 31140 }, { "epoch": 1.5557886325042454, "grad_norm": 6.616031646728516, "learning_rate": 1.1686013656593586e-05, "loss": 0.9933, "step": 31150 }, { "epoch": 1.5562880831085806, "grad_norm": 2.7344319820404053, "learning_rate": 1.1660821979020258e-05, "loss": 0.713, "step": 31160 }, { "epoch": 1.5567875337129158, "grad_norm": 1.7584202289581299, "learning_rate": 1.1635653899034998e-05, "loss": 0.7788, "step": 31170 }, { "epoch": 1.557286984317251, "grad_norm": 4.082503795623779, "learning_rate": 1.1610509432128608e-05, "loss": 1.1513, "step": 31180 }, { "epoch": 1.5577864349215864, "grad_norm": 2.1807994842529297, "learning_rate": 1.1585388593777407e-05, "loss": 0.7392, "step": 31190 }, { "epoch": 1.5582858855259216, "grad_norm": 1.341025710105896, "learning_rate": 1.1560291399443152e-05, "loss": 0.6684, "step": 31200 }, { "epoch": 1.5587853361302568, "grad_norm": 3.491328477859497, "learning_rate": 1.153521786457306e-05, "loss": 0.8452, "step": 31210 }, { "epoch": 1.559284786734592, "grad_norm": 2.343776226043701, "learning_rate": 1.1510168004599775e-05, "loss": 0.6737, "step": 31220 }, { "epoch": 1.5597842373389272, "grad_norm": 3.8371551036834717, "learning_rate": 1.1485141834941366e-05, "loss": 0.8605, "step": 31230 }, { "epoch": 1.5602836879432624, "grad_norm": 2.081101894378662, "learning_rate": 1.1460139371001338e-05, "loss": 0.7126, "step": 31240 }, { "epoch": 1.5607831385475976, "grad_norm": 1.6233638525009155, "learning_rate": 1.1435160628168567e-05, "loss": 0.8289, "step": 31250 }, { "epoch": 1.5612825891519329, "grad_norm": 4.825038433074951, "learning_rate": 1.1410205621817376e-05, "loss": 0.8498, "step": 31260 }, { "epoch": 1.561782039756268, "grad_norm": 1.198391318321228, "learning_rate": 1.1385274367307447e-05, "loss": 0.9779, "step": 31270 }, { "epoch": 1.5622814903606033, "grad_norm": 3.0213980674743652, "learning_rate": 1.136036687998387e-05, "loss": 0.6206, "step": 31280 }, { "epoch": 1.5627809409649385, "grad_norm": 0.8899410963058472, "learning_rate": 1.1335483175177064e-05, "loss": 0.8666, "step": 31290 }, { "epoch": 1.5632803915692737, "grad_norm": 2.1878502368927, "learning_rate": 1.131062326820283e-05, "loss": 0.885, "step": 31300 }, { "epoch": 1.5637798421736089, "grad_norm": 1.4064066410064697, "learning_rate": 1.1285787174362356e-05, "loss": 0.8171, "step": 31310 }, { "epoch": 1.5642792927779443, "grad_norm": 1.864975929260254, "learning_rate": 1.1260974908942145e-05, "loss": 0.8747, "step": 31320 }, { "epoch": 1.5647787433822795, "grad_norm": 4.4099531173706055, "learning_rate": 1.1236186487214006e-05, "loss": 1.0614, "step": 31330 }, { "epoch": 1.5652781939866147, "grad_norm": 2.5076744556427, "learning_rate": 1.1211421924435111e-05, "loss": 0.7338, "step": 31340 }, { "epoch": 1.56577764459095, "grad_norm": 1.09100341796875, "learning_rate": 1.1186681235847934e-05, "loss": 0.8336, "step": 31350 }, { "epoch": 1.5662770951952854, "grad_norm": 1.5299378633499146, "learning_rate": 1.1161964436680266e-05, "loss": 0.6609, "step": 31360 }, { "epoch": 1.5667765457996206, "grad_norm": 1.7906962633132935, "learning_rate": 1.1137271542145167e-05, "loss": 0.8778, "step": 31370 }, { "epoch": 1.5672759964039558, "grad_norm": 1.5180699825286865, "learning_rate": 1.1112602567441e-05, "loss": 0.6722, "step": 31380 }, { "epoch": 1.567775447008291, "grad_norm": 3.350332736968994, "learning_rate": 1.1087957527751413e-05, "loss": 1.0082, "step": 31390 }, { "epoch": 1.5682748976126262, "grad_norm": 1.7311373949050903, "learning_rate": 1.106333643824532e-05, "loss": 0.9374, "step": 31400 }, { "epoch": 1.5687743482169614, "grad_norm": 4.224371433258057, "learning_rate": 1.1038739314076874e-05, "loss": 0.8296, "step": 31410 }, { "epoch": 1.5692737988212966, "grad_norm": 3.4230434894561768, "learning_rate": 1.1014166170385504e-05, "loss": 0.887, "step": 31420 }, { "epoch": 1.5697732494256318, "grad_norm": 1.5443240404129028, "learning_rate": 1.0989617022295868e-05, "loss": 0.7434, "step": 31430 }, { "epoch": 1.570272700029967, "grad_norm": 2.06730580329895, "learning_rate": 1.0965091884917856e-05, "loss": 0.9402, "step": 31440 }, { "epoch": 1.5707721506343022, "grad_norm": 5.781002044677734, "learning_rate": 1.0940590773346565e-05, "loss": 1.0519, "step": 31450 }, { "epoch": 1.5712716012386374, "grad_norm": 2.9871609210968018, "learning_rate": 1.0916113702662317e-05, "loss": 0.6445, "step": 31460 }, { "epoch": 1.5717710518429726, "grad_norm": 2.0108935832977295, "learning_rate": 1.0891660687930644e-05, "loss": 0.9147, "step": 31470 }, { "epoch": 1.5722705024473078, "grad_norm": 1.3426589965820312, "learning_rate": 1.0867231744202261e-05, "loss": 0.569, "step": 31480 }, { "epoch": 1.5727699530516432, "grad_norm": 2.108978748321533, "learning_rate": 1.0842826886513075e-05, "loss": 1.0129, "step": 31490 }, { "epoch": 1.5732694036559784, "grad_norm": 1.9420535564422607, "learning_rate": 1.0818446129884158e-05, "loss": 0.8177, "step": 31500 }, { "epoch": 1.5737688542603137, "grad_norm": 4.504317760467529, "learning_rate": 1.0794089489321774e-05, "loss": 0.9145, "step": 31510 }, { "epoch": 1.5742683048646489, "grad_norm": 1.2333039045333862, "learning_rate": 1.076975697981729e-05, "loss": 0.9017, "step": 31520 }, { "epoch": 1.574767755468984, "grad_norm": 1.332015037536621, "learning_rate": 1.074544861634727e-05, "loss": 0.6437, "step": 31530 }, { "epoch": 1.5752672060733195, "grad_norm": 1.7420223951339722, "learning_rate": 1.0721164413873403e-05, "loss": 0.7937, "step": 31540 }, { "epoch": 1.5757666566776547, "grad_norm": 3.1507651805877686, "learning_rate": 1.069690438734251e-05, "loss": 0.9026, "step": 31550 }, { "epoch": 1.57626610728199, "grad_norm": 2.0124878883361816, "learning_rate": 1.0672668551686498e-05, "loss": 0.8631, "step": 31560 }, { "epoch": 1.576765557886325, "grad_norm": 1.033548355102539, "learning_rate": 1.0648456921822442e-05, "loss": 0.6024, "step": 31570 }, { "epoch": 1.5772650084906603, "grad_norm": 2.166360378265381, "learning_rate": 1.062426951265248e-05, "loss": 0.8208, "step": 31580 }, { "epoch": 1.5777644590949955, "grad_norm": 1.5757492780685425, "learning_rate": 1.0600106339063858e-05, "loss": 0.6985, "step": 31590 }, { "epoch": 1.5782639096993307, "grad_norm": 4.370702266693115, "learning_rate": 1.0575967415928883e-05, "loss": 0.9478, "step": 31600 }, { "epoch": 1.578763360303666, "grad_norm": 1.9108558893203735, "learning_rate": 1.0551852758104957e-05, "loss": 0.978, "step": 31610 }, { "epoch": 1.5792628109080011, "grad_norm": 2.2360966205596924, "learning_rate": 1.0527762380434547e-05, "loss": 0.9045, "step": 31620 }, { "epoch": 1.5797622615123363, "grad_norm": 0.8620438575744629, "learning_rate": 1.0503696297745174e-05, "loss": 0.7104, "step": 31630 }, { "epoch": 1.5802617121166715, "grad_norm": 2.071526288986206, "learning_rate": 1.047965452484937e-05, "loss": 0.9629, "step": 31640 }, { "epoch": 1.5807611627210068, "grad_norm": 1.5074695348739624, "learning_rate": 1.0455637076544771e-05, "loss": 0.824, "step": 31650 }, { "epoch": 1.581260613325342, "grad_norm": 4.199308395385742, "learning_rate": 1.0431643967614008e-05, "loss": 0.8011, "step": 31660 }, { "epoch": 1.5817600639296774, "grad_norm": 1.2590677738189697, "learning_rate": 1.0407675212824707e-05, "loss": 0.9194, "step": 31670 }, { "epoch": 1.5822595145340126, "grad_norm": 1.4987024068832397, "learning_rate": 1.0383730826929532e-05, "loss": 0.8037, "step": 31680 }, { "epoch": 1.5827589651383478, "grad_norm": 1.7136954069137573, "learning_rate": 1.0359810824666144e-05, "loss": 0.7921, "step": 31690 }, { "epoch": 1.583258415742683, "grad_norm": 1.6246813535690308, "learning_rate": 1.0335915220757198e-05, "loss": 0.7006, "step": 31700 }, { "epoch": 1.5837578663470184, "grad_norm": 2.211655855178833, "learning_rate": 1.0312044029910345e-05, "loss": 0.9562, "step": 31710 }, { "epoch": 1.5842573169513536, "grad_norm": 1.9821131229400635, "learning_rate": 1.0288197266818156e-05, "loss": 0.6396, "step": 31720 }, { "epoch": 1.5847567675556888, "grad_norm": 2.451228618621826, "learning_rate": 1.0264374946158211e-05, "loss": 0.7699, "step": 31730 }, { "epoch": 1.585256218160024, "grad_norm": 1.0698542594909668, "learning_rate": 1.0240577082593083e-05, "loss": 0.7871, "step": 31740 }, { "epoch": 1.5857556687643592, "grad_norm": 2.035271406173706, "learning_rate": 1.0216803690770198e-05, "loss": 0.7764, "step": 31750 }, { "epoch": 1.5862551193686945, "grad_norm": 2.276421308517456, "learning_rate": 1.0193054785321987e-05, "loss": 0.9262, "step": 31760 }, { "epoch": 1.5867545699730297, "grad_norm": 1.1644502878189087, "learning_rate": 1.01693303808658e-05, "loss": 0.9181, "step": 31770 }, { "epoch": 1.5872540205773649, "grad_norm": 2.024810314178467, "learning_rate": 1.0145630492003899e-05, "loss": 0.8324, "step": 31780 }, { "epoch": 1.5877534711817, "grad_norm": 2.9631001949310303, "learning_rate": 1.012195513332344e-05, "loss": 0.8849, "step": 31790 }, { "epoch": 1.5882529217860353, "grad_norm": 2.4166016578674316, "learning_rate": 1.0098304319396506e-05, "loss": 0.9094, "step": 31800 }, { "epoch": 1.5887523723903705, "grad_norm": 1.1361298561096191, "learning_rate": 1.0074678064780069e-05, "loss": 0.9197, "step": 31810 }, { "epoch": 1.5892518229947057, "grad_norm": 1.3493423461914062, "learning_rate": 1.0051076384015973e-05, "loss": 0.8524, "step": 31820 }, { "epoch": 1.589751273599041, "grad_norm": 1.1254128217697144, "learning_rate": 1.0027499291630954e-05, "loss": 0.7056, "step": 31830 }, { "epoch": 1.5902507242033763, "grad_norm": 1.7950289249420166, "learning_rate": 1.0003946802136593e-05, "loss": 0.8225, "step": 31840 }, { "epoch": 1.5907501748077115, "grad_norm": 0.9591406583786011, "learning_rate": 9.980418930029345e-06, "loss": 0.7257, "step": 31850 }, { "epoch": 1.5912496254120467, "grad_norm": 1.1171596050262451, "learning_rate": 9.956915689790524e-06, "loss": 0.8054, "step": 31860 }, { "epoch": 1.591749076016382, "grad_norm": 1.6064636707305908, "learning_rate": 9.933437095886234e-06, "loss": 0.633, "step": 31870 }, { "epoch": 1.5922485266207174, "grad_norm": 3.331109046936035, "learning_rate": 9.909983162767456e-06, "loss": 0.8652, "step": 31880 }, { "epoch": 1.5927479772250526, "grad_norm": 2.948329448699951, "learning_rate": 9.886553904869977e-06, "loss": 0.7149, "step": 31890 }, { "epoch": 1.5932474278293878, "grad_norm": 1.2678991556167603, "learning_rate": 9.863149336614408e-06, "loss": 0.8716, "step": 31900 }, { "epoch": 1.593746878433723, "grad_norm": 1.0215388536453247, "learning_rate": 9.839769472406135e-06, "loss": 0.9567, "step": 31910 }, { "epoch": 1.5942463290380582, "grad_norm": 1.9969433546066284, "learning_rate": 9.816414326635376e-06, "loss": 0.7871, "step": 31920 }, { "epoch": 1.5947457796423934, "grad_norm": 2.0878634452819824, "learning_rate": 9.793083913677115e-06, "loss": 0.9773, "step": 31930 }, { "epoch": 1.5952452302467286, "grad_norm": 1.556214451789856, "learning_rate": 9.769778247891098e-06, "loss": 0.8011, "step": 31940 }, { "epoch": 1.5957446808510638, "grad_norm": 3.1769673824310303, "learning_rate": 9.746497343621857e-06, "loss": 0.723, "step": 31950 }, { "epoch": 1.596244131455399, "grad_norm": 3.4388644695281982, "learning_rate": 9.723241215198692e-06, "loss": 0.862, "step": 31960 }, { "epoch": 1.5967435820597342, "grad_norm": 3.2258336544036865, "learning_rate": 9.70000987693564e-06, "loss": 0.8754, "step": 31970 }, { "epoch": 1.5972430326640694, "grad_norm": 2.3358383178710938, "learning_rate": 9.676803343131497e-06, "loss": 0.8185, "step": 31980 }, { "epoch": 1.5977424832684046, "grad_norm": 2.6055355072021484, "learning_rate": 9.653621628069737e-06, "loss": 0.8393, "step": 31990 }, { "epoch": 1.5982419338727398, "grad_norm": 1.314213514328003, "learning_rate": 9.630464746018636e-06, "loss": 0.7871, "step": 32000 }, { "epoch": 1.5987413844770753, "grad_norm": 2.312258005142212, "learning_rate": 9.607332711231155e-06, "loss": 0.7513, "step": 32010 }, { "epoch": 1.5992408350814105, "grad_norm": 5.951169490814209, "learning_rate": 9.584225537944925e-06, "loss": 0.9554, "step": 32020 }, { "epoch": 1.5997402856857457, "grad_norm": 1.5048185586929321, "learning_rate": 9.561143240382314e-06, "loss": 0.8106, "step": 32030 }, { "epoch": 1.6002397362900809, "grad_norm": 3.8291537761688232, "learning_rate": 9.538085832750383e-06, "loss": 1.0216, "step": 32040 }, { "epoch": 1.6007391868944163, "grad_norm": 1.3885672092437744, "learning_rate": 9.515053329240859e-06, "loss": 0.923, "step": 32050 }, { "epoch": 1.6012386374987515, "grad_norm": 1.1815651655197144, "learning_rate": 9.49204574403012e-06, "loss": 0.6874, "step": 32060 }, { "epoch": 1.6017380881030867, "grad_norm": 3.35941743850708, "learning_rate": 9.46906309127924e-06, "loss": 0.9226, "step": 32070 }, { "epoch": 1.602237538707422, "grad_norm": 2.532893657684326, "learning_rate": 9.446105385133946e-06, "loss": 0.8684, "step": 32080 }, { "epoch": 1.6027369893117571, "grad_norm": 1.742720365524292, "learning_rate": 9.423172639724615e-06, "loss": 0.8136, "step": 32090 }, { "epoch": 1.6032364399160923, "grad_norm": 0.9976614117622375, "learning_rate": 9.400264869166208e-06, "loss": 0.7394, "step": 32100 }, { "epoch": 1.6037358905204275, "grad_norm": 2.207717180252075, "learning_rate": 9.377382087558379e-06, "loss": 0.7517, "step": 32110 }, { "epoch": 1.6042353411247627, "grad_norm": 0.7826811075210571, "learning_rate": 9.354524308985363e-06, "loss": 0.7112, "step": 32120 }, { "epoch": 1.604734791729098, "grad_norm": 2.0939645767211914, "learning_rate": 9.331691547516035e-06, "loss": 0.5882, "step": 32130 }, { "epoch": 1.6052342423334331, "grad_norm": 1.5870779752731323, "learning_rate": 9.308883817203833e-06, "loss": 0.6258, "step": 32140 }, { "epoch": 1.6057336929377684, "grad_norm": 4.894512176513672, "learning_rate": 9.286101132086816e-06, "loss": 0.836, "step": 32150 }, { "epoch": 1.6062331435421036, "grad_norm": 1.577953577041626, "learning_rate": 9.263343506187627e-06, "loss": 0.7639, "step": 32160 }, { "epoch": 1.6067325941464388, "grad_norm": 1.238731026649475, "learning_rate": 9.24061095351348e-06, "loss": 0.7604, "step": 32170 }, { "epoch": 1.6072320447507742, "grad_norm": 1.3724886178970337, "learning_rate": 9.217903488056156e-06, "loss": 0.89, "step": 32180 }, { "epoch": 1.6077314953551094, "grad_norm": 1.1861090660095215, "learning_rate": 9.195221123791997e-06, "loss": 0.8001, "step": 32190 }, { "epoch": 1.6082309459594446, "grad_norm": 1.0363649129867554, "learning_rate": 9.1725638746819e-06, "loss": 0.7344, "step": 32200 }, { "epoch": 1.6087303965637798, "grad_norm": 1.48933744430542, "learning_rate": 9.149931754671282e-06, "loss": 0.833, "step": 32210 }, { "epoch": 1.6092298471681152, "grad_norm": 1.0401525497436523, "learning_rate": 9.127324777690116e-06, "loss": 0.7905, "step": 32220 }, { "epoch": 1.6097292977724504, "grad_norm": 1.7170989513397217, "learning_rate": 9.104742957652895e-06, "loss": 0.7003, "step": 32230 }, { "epoch": 1.6102287483767856, "grad_norm": 1.157591462135315, "learning_rate": 9.082186308458624e-06, "loss": 0.9246, "step": 32240 }, { "epoch": 1.6107281989811209, "grad_norm": 3.7075676918029785, "learning_rate": 9.059654843990816e-06, "loss": 0.8432, "step": 32250 }, { "epoch": 1.611227649585456, "grad_norm": 1.432910680770874, "learning_rate": 9.037148578117488e-06, "loss": 0.7212, "step": 32260 }, { "epoch": 1.6117271001897913, "grad_norm": 2.974475145339966, "learning_rate": 9.014667524691139e-06, "loss": 0.732, "step": 32270 }, { "epoch": 1.6122265507941265, "grad_norm": 1.1430411338806152, "learning_rate": 8.99221169754877e-06, "loss": 0.7917, "step": 32280 }, { "epoch": 1.6127260013984617, "grad_norm": 0.7074918150901794, "learning_rate": 8.96978111051181e-06, "loss": 0.6692, "step": 32290 }, { "epoch": 1.6132254520027969, "grad_norm": 1.9604017734527588, "learning_rate": 8.947375777386197e-06, "loss": 0.8617, "step": 32300 }, { "epoch": 1.613724902607132, "grad_norm": 2.695582389831543, "learning_rate": 8.924995711962314e-06, "loss": 0.9039, "step": 32310 }, { "epoch": 1.6142243532114673, "grad_norm": 1.2286263704299927, "learning_rate": 8.902640928014999e-06, "loss": 0.5689, "step": 32320 }, { "epoch": 1.6147238038158025, "grad_norm": 1.6729316711425781, "learning_rate": 8.88031143930348e-06, "loss": 0.9481, "step": 32330 }, { "epoch": 1.6152232544201377, "grad_norm": 2.2705130577087402, "learning_rate": 8.858007259571488e-06, "loss": 0.7857, "step": 32340 }, { "epoch": 1.6157227050244731, "grad_norm": 1.7610599994659424, "learning_rate": 8.835728402547144e-06, "loss": 0.821, "step": 32350 }, { "epoch": 1.6162221556288083, "grad_norm": 2.831382989883423, "learning_rate": 8.813474881942985e-06, "loss": 0.8333, "step": 32360 }, { "epoch": 1.6167216062331435, "grad_norm": 1.0621943473815918, "learning_rate": 8.791246711455924e-06, "loss": 1.0901, "step": 32370 }, { "epoch": 1.6172210568374787, "grad_norm": 2.082852602005005, "learning_rate": 8.769043904767316e-06, "loss": 0.6617, "step": 32380 }, { "epoch": 1.6177205074418142, "grad_norm": 4.713111400604248, "learning_rate": 8.74686647554288e-06, "loss": 0.7603, "step": 32390 }, { "epoch": 1.6182199580461494, "grad_norm": 3.5373897552490234, "learning_rate": 8.72471443743274e-06, "loss": 0.8522, "step": 32400 }, { "epoch": 1.6187194086504846, "grad_norm": 4.242556571960449, "learning_rate": 8.702587804071355e-06, "loss": 0.6167, "step": 32410 }, { "epoch": 1.6192188592548198, "grad_norm": 0.9275137782096863, "learning_rate": 8.680486589077551e-06, "loss": 0.8567, "step": 32420 }, { "epoch": 1.619718309859155, "grad_norm": 1.8644840717315674, "learning_rate": 8.658410806054568e-06, "loss": 0.9137, "step": 32430 }, { "epoch": 1.6202177604634902, "grad_norm": 4.178505897521973, "learning_rate": 8.63636046858992e-06, "loss": 0.9598, "step": 32440 }, { "epoch": 1.6207172110678254, "grad_norm": 3.228980541229248, "learning_rate": 8.61433559025549e-06, "loss": 1.0185, "step": 32450 }, { "epoch": 1.6212166616721606, "grad_norm": 2.3021411895751953, "learning_rate": 8.592336184607491e-06, "loss": 0.8254, "step": 32460 }, { "epoch": 1.6217161122764958, "grad_norm": 4.386714935302734, "learning_rate": 8.570362265186476e-06, "loss": 0.8675, "step": 32470 }, { "epoch": 1.622215562880831, "grad_norm": 2.426510810852051, "learning_rate": 8.548413845517261e-06, "loss": 0.8936, "step": 32480 }, { "epoch": 1.6227150134851662, "grad_norm": 4.432008266448975, "learning_rate": 8.526490939109011e-06, "loss": 0.7079, "step": 32490 }, { "epoch": 1.6232144640895014, "grad_norm": 1.026883602142334, "learning_rate": 8.504593559455159e-06, "loss": 0.7477, "step": 32500 }, { "epoch": 1.6237139146938366, "grad_norm": 1.8085023164749146, "learning_rate": 8.482721720033482e-06, "loss": 0.7032, "step": 32510 }, { "epoch": 1.624213365298172, "grad_norm": 4.327006816864014, "learning_rate": 8.46087543430596e-06, "loss": 1.0039, "step": 32520 }, { "epoch": 1.6247128159025073, "grad_norm": 2.479306697845459, "learning_rate": 8.43905471571889e-06, "loss": 0.9161, "step": 32530 }, { "epoch": 1.6252122665068425, "grad_norm": 1.624556541442871, "learning_rate": 8.417259577702829e-06, "loss": 0.8412, "step": 32540 }, { "epoch": 1.6257117171111777, "grad_norm": 1.3046996593475342, "learning_rate": 8.395490033672598e-06, "loss": 0.9115, "step": 32550 }, { "epoch": 1.6262111677155129, "grad_norm": 2.177978515625, "learning_rate": 8.37374609702722e-06, "loss": 0.8041, "step": 32560 }, { "epoch": 1.6267106183198483, "grad_norm": 2.772327184677124, "learning_rate": 8.352027781150008e-06, "loss": 0.8544, "step": 32570 }, { "epoch": 1.6272100689241835, "grad_norm": 2.5145325660705566, "learning_rate": 8.330335099408476e-06, "loss": 0.832, "step": 32580 }, { "epoch": 1.6277095195285187, "grad_norm": 1.5851458311080933, "learning_rate": 8.308668065154379e-06, "loss": 0.8207, "step": 32590 }, { "epoch": 1.628208970132854, "grad_norm": 2.7843308448791504, "learning_rate": 8.287026691723671e-06, "loss": 1.028, "step": 32600 }, { "epoch": 1.6287084207371891, "grad_norm": 1.5754821300506592, "learning_rate": 8.265410992436523e-06, "loss": 0.8082, "step": 32610 }, { "epoch": 1.6292078713415243, "grad_norm": 2.1539711952209473, "learning_rate": 8.243820980597294e-06, "loss": 0.9706, "step": 32620 }, { "epoch": 1.6297073219458595, "grad_norm": 1.7117652893066406, "learning_rate": 8.222256669494554e-06, "loss": 0.8241, "step": 32630 }, { "epoch": 1.6302067725501947, "grad_norm": 3.156545877456665, "learning_rate": 8.200718072401015e-06, "loss": 0.9081, "step": 32640 }, { "epoch": 1.63070622315453, "grad_norm": 1.1768215894699097, "learning_rate": 8.179205202573587e-06, "loss": 0.7026, "step": 32650 }, { "epoch": 1.6312056737588652, "grad_norm": 0.41125527024269104, "learning_rate": 8.157718073253351e-06, "loss": 0.7687, "step": 32660 }, { "epoch": 1.6317051243632004, "grad_norm": 2.116495132446289, "learning_rate": 8.136256697665534e-06, "loss": 0.8865, "step": 32670 }, { "epoch": 1.6322045749675356, "grad_norm": 1.6022876501083374, "learning_rate": 8.114821089019513e-06, "loss": 0.7732, "step": 32680 }, { "epoch": 1.6327040255718708, "grad_norm": 1.8937041759490967, "learning_rate": 8.093411260508804e-06, "loss": 0.8118, "step": 32690 }, { "epoch": 1.6332034761762062, "grad_norm": 2.3878931999206543, "learning_rate": 8.07202722531108e-06, "loss": 0.7368, "step": 32700 }, { "epoch": 1.6337029267805414, "grad_norm": 5.616055011749268, "learning_rate": 8.050668996588084e-06, "loss": 0.6612, "step": 32710 }, { "epoch": 1.6342023773848766, "grad_norm": 0.7340039610862732, "learning_rate": 8.02933658748572e-06, "loss": 0.8277, "step": 32720 }, { "epoch": 1.6347018279892118, "grad_norm": 4.959640979766846, "learning_rate": 8.008030011133993e-06, "loss": 0.6724, "step": 32730 }, { "epoch": 1.6352012785935472, "grad_norm": 1.4575165510177612, "learning_rate": 7.98674928064701e-06, "loss": 0.7702, "step": 32740 }, { "epoch": 1.6357007291978825, "grad_norm": 3.662757635116577, "learning_rate": 7.965494409122941e-06, "loss": 0.9819, "step": 32750 }, { "epoch": 1.6362001798022177, "grad_norm": 2.8622636795043945, "learning_rate": 7.944265409644063e-06, "loss": 0.7653, "step": 32760 }, { "epoch": 1.6366996304065529, "grad_norm": 1.658539891242981, "learning_rate": 7.92306229527675e-06, "loss": 0.8849, "step": 32770 }, { "epoch": 1.637199081010888, "grad_norm": 1.4610798358917236, "learning_rate": 7.901885079071419e-06, "loss": 0.9691, "step": 32780 }, { "epoch": 1.6376985316152233, "grad_norm": 2.061894655227661, "learning_rate": 7.880733774062532e-06, "loss": 0.6977, "step": 32790 }, { "epoch": 1.6381979822195585, "grad_norm": 6.265122890472412, "learning_rate": 7.859608393268624e-06, "loss": 0.7697, "step": 32800 }, { "epoch": 1.6386974328238937, "grad_norm": 0.9943069219589233, "learning_rate": 7.838508949692275e-06, "loss": 0.6232, "step": 32810 }, { "epoch": 1.639196883428229, "grad_norm": 1.2408188581466675, "learning_rate": 7.81743545632011e-06, "loss": 0.6267, "step": 32820 }, { "epoch": 1.639696334032564, "grad_norm": 1.0629234313964844, "learning_rate": 7.79638792612274e-06, "loss": 0.5875, "step": 32830 }, { "epoch": 1.6401957846368993, "grad_norm": 1.6345546245574951, "learning_rate": 7.775366372054832e-06, "loss": 0.8302, "step": 32840 }, { "epoch": 1.6406952352412345, "grad_norm": 2.1406784057617188, "learning_rate": 7.754370807055062e-06, "loss": 0.8808, "step": 32850 }, { "epoch": 1.6411946858455697, "grad_norm": 3.8250038623809814, "learning_rate": 7.733401244046095e-06, "loss": 0.7931, "step": 32860 }, { "epoch": 1.6416941364499051, "grad_norm": 1.4751918315887451, "learning_rate": 7.712457695934616e-06, "loss": 0.6696, "step": 32870 }, { "epoch": 1.6421935870542403, "grad_norm": 1.8803482055664062, "learning_rate": 7.691540175611272e-06, "loss": 0.9578, "step": 32880 }, { "epoch": 1.6426930376585755, "grad_norm": 1.4702768325805664, "learning_rate": 7.6706486959507e-06, "loss": 0.7134, "step": 32890 }, { "epoch": 1.6431924882629108, "grad_norm": 2.0815553665161133, "learning_rate": 7.649783269811523e-06, "loss": 0.7398, "step": 32900 }, { "epoch": 1.6436919388672462, "grad_norm": 1.0663114786148071, "learning_rate": 7.6289439100362955e-06, "loss": 0.8397, "step": 32910 }, { "epoch": 1.6441913894715814, "grad_norm": 2.6448476314544678, "learning_rate": 7.608130629451554e-06, "loss": 0.8774, "step": 32920 }, { "epoch": 1.6446908400759166, "grad_norm": 1.7423839569091797, "learning_rate": 7.587343440867784e-06, "loss": 0.6785, "step": 32930 }, { "epoch": 1.6451902906802518, "grad_norm": 2.126605749130249, "learning_rate": 7.566582357079394e-06, "loss": 1.0271, "step": 32940 }, { "epoch": 1.645689741284587, "grad_norm": 3.8434581756591797, "learning_rate": 7.545847390864746e-06, "loss": 0.8559, "step": 32950 }, { "epoch": 1.6461891918889222, "grad_norm": 1.0559673309326172, "learning_rate": 7.525138554986111e-06, "loss": 0.9776, "step": 32960 }, { "epoch": 1.6466886424932574, "grad_norm": 1.3364135026931763, "learning_rate": 7.5044558621896965e-06, "loss": 0.6528, "step": 32970 }, { "epoch": 1.6471880930975926, "grad_norm": 2.827162981033325, "learning_rate": 7.483799325205582e-06, "loss": 0.8785, "step": 32980 }, { "epoch": 1.6476875437019278, "grad_norm": 1.246030330657959, "learning_rate": 7.463168956747779e-06, "loss": 1.0053, "step": 32990 }, { "epoch": 1.648186994306263, "grad_norm": 2.4645471572875977, "learning_rate": 7.442564769514193e-06, "loss": 0.8522, "step": 33000 }, { "epoch": 1.6486864449105982, "grad_norm": 1.6491093635559082, "learning_rate": 7.421986776186607e-06, "loss": 0.8641, "step": 33010 }, { "epoch": 1.6491858955149334, "grad_norm": 0.4822236895561218, "learning_rate": 7.40143498943065e-06, "loss": 0.8152, "step": 33020 }, { "epoch": 1.6496853461192686, "grad_norm": 1.6322946548461914, "learning_rate": 7.380909421895893e-06, "loss": 0.968, "step": 33030 }, { "epoch": 1.650184796723604, "grad_norm": 2.290846824645996, "learning_rate": 7.360410086215713e-06, "loss": 0.8921, "step": 33040 }, { "epoch": 1.6506842473279393, "grad_norm": 2.1733434200286865, "learning_rate": 7.339936995007369e-06, "loss": 1.0277, "step": 33050 }, { "epoch": 1.6511836979322745, "grad_norm": 2.965209722518921, "learning_rate": 7.319490160871934e-06, "loss": 0.7613, "step": 33060 }, { "epoch": 1.6516831485366097, "grad_norm": 1.191996455192566, "learning_rate": 7.2990695963943425e-06, "loss": 0.7597, "step": 33070 }, { "epoch": 1.6521825991409451, "grad_norm": 2.5393970012664795, "learning_rate": 7.278675314143368e-06, "loss": 0.7128, "step": 33080 }, { "epoch": 1.6526820497452803, "grad_norm": 0.5430086851119995, "learning_rate": 7.258307326671609e-06, "loss": 0.7097, "step": 33090 }, { "epoch": 1.6531815003496155, "grad_norm": 1.4291471242904663, "learning_rate": 7.237965646515426e-06, "loss": 0.7841, "step": 33100 }, { "epoch": 1.6536809509539507, "grad_norm": 1.7150503396987915, "learning_rate": 7.2176502861950715e-06, "loss": 0.6978, "step": 33110 }, { "epoch": 1.654180401558286, "grad_norm": 2.958411931991577, "learning_rate": 7.1973612582145574e-06, "loss": 0.8231, "step": 33120 }, { "epoch": 1.6546798521626211, "grad_norm": 1.0735105276107788, "learning_rate": 7.177098575061658e-06, "loss": 0.8593, "step": 33130 }, { "epoch": 1.6551793027669564, "grad_norm": 0.8563856482505798, "learning_rate": 7.156862249207974e-06, "loss": 0.7039, "step": 33140 }, { "epoch": 1.6556787533712916, "grad_norm": 0.6159844994544983, "learning_rate": 7.136652293108875e-06, "loss": 0.8684, "step": 33150 }, { "epoch": 1.6561782039756268, "grad_norm": 0.9419215321540833, "learning_rate": 7.116468719203501e-06, "loss": 0.6959, "step": 33160 }, { "epoch": 1.656677654579962, "grad_norm": 2.407841920852661, "learning_rate": 7.096311539914746e-06, "loss": 0.8834, "step": 33170 }, { "epoch": 1.6571771051842972, "grad_norm": 1.8229483366012573, "learning_rate": 7.0761807676492485e-06, "loss": 0.6873, "step": 33180 }, { "epoch": 1.6576765557886324, "grad_norm": 2.6188106536865234, "learning_rate": 7.056076414797408e-06, "loss": 1.0978, "step": 33190 }, { "epoch": 1.6581760063929676, "grad_norm": 4.037859916687012, "learning_rate": 7.035998493733387e-06, "loss": 0.8739, "step": 33200 }, { "epoch": 1.658675456997303, "grad_norm": 7.681492328643799, "learning_rate": 7.015947016815027e-06, "loss": 0.8856, "step": 33210 }, { "epoch": 1.6591749076016382, "grad_norm": 6.089690208435059, "learning_rate": 6.995921996383925e-06, "loss": 1.3148, "step": 33220 }, { "epoch": 1.6596743582059734, "grad_norm": 1.725597858428955, "learning_rate": 6.975923444765398e-06, "loss": 0.96, "step": 33230 }, { "epoch": 1.6601738088103086, "grad_norm": 1.72244393825531, "learning_rate": 6.955951374268465e-06, "loss": 0.8336, "step": 33240 }, { "epoch": 1.660673259414644, "grad_norm": 1.3152128458023071, "learning_rate": 6.93600579718583e-06, "loss": 0.8354, "step": 33250 }, { "epoch": 1.6611727100189793, "grad_norm": 1.496792197227478, "learning_rate": 6.916086725793908e-06, "loss": 0.835, "step": 33260 }, { "epoch": 1.6616721606233145, "grad_norm": 3.461909770965576, "learning_rate": 6.896194172352799e-06, "loss": 0.9006, "step": 33270 }, { "epoch": 1.6621716112276497, "grad_norm": 0.8485634922981262, "learning_rate": 6.876328149106276e-06, "loss": 1.0869, "step": 33280 }, { "epoch": 1.6626710618319849, "grad_norm": 3.615523099899292, "learning_rate": 6.856488668281791e-06, "loss": 0.8065, "step": 33290 }, { "epoch": 1.66317051243632, "grad_norm": 1.458704948425293, "learning_rate": 6.836675742090448e-06, "loss": 0.9652, "step": 33300 }, { "epoch": 1.6636699630406553, "grad_norm": 4.264139175415039, "learning_rate": 6.81688938272701e-06, "loss": 0.7607, "step": 33310 }, { "epoch": 1.6641694136449905, "grad_norm": 0.9312965869903564, "learning_rate": 6.797129602369911e-06, "loss": 0.8519, "step": 33320 }, { "epoch": 1.6646688642493257, "grad_norm": 1.2911723852157593, "learning_rate": 6.7773964131811695e-06, "loss": 0.8216, "step": 33330 }, { "epoch": 1.665168314853661, "grad_norm": 1.1230634450912476, "learning_rate": 6.757689827306496e-06, "loss": 0.6416, "step": 33340 }, { "epoch": 1.665667765457996, "grad_norm": 3.272620439529419, "learning_rate": 6.738009856875199e-06, "loss": 0.8208, "step": 33350 }, { "epoch": 1.6661672160623313, "grad_norm": 1.6357247829437256, "learning_rate": 6.718356514000207e-06, "loss": 0.7812, "step": 33360 }, { "epoch": 1.6666666666666665, "grad_norm": 1.6113152503967285, "learning_rate": 6.698729810778065e-06, "loss": 0.7492, "step": 33370 }, { "epoch": 1.667166117271002, "grad_norm": 1.2577111721038818, "learning_rate": 6.6791297592889214e-06, "loss": 0.6302, "step": 33380 }, { "epoch": 1.6676655678753372, "grad_norm": 0.6654781103134155, "learning_rate": 6.659556371596526e-06, "loss": 0.8911, "step": 33390 }, { "epoch": 1.6681650184796724, "grad_norm": 5.6416473388671875, "learning_rate": 6.640009659748186e-06, "loss": 0.8402, "step": 33400 }, { "epoch": 1.6686644690840076, "grad_norm": 4.411472320556641, "learning_rate": 6.6204896357748295e-06, "loss": 0.9637, "step": 33410 }, { "epoch": 1.6691639196883428, "grad_norm": 1.7153258323669434, "learning_rate": 6.600996311690932e-06, "loss": 0.8352, "step": 33420 }, { "epoch": 1.6696633702926782, "grad_norm": 1.8173235654830933, "learning_rate": 6.5815296994945485e-06, "loss": 0.8196, "step": 33430 }, { "epoch": 1.6701628208970134, "grad_norm": 2.0316596031188965, "learning_rate": 6.56208981116731e-06, "loss": 0.7712, "step": 33440 }, { "epoch": 1.6706622715013486, "grad_norm": 2.026198387145996, "learning_rate": 6.54267665867433e-06, "loss": 0.7491, "step": 33450 }, { "epoch": 1.6711617221056838, "grad_norm": 2.6455156803131104, "learning_rate": 6.523290253964359e-06, "loss": 0.7961, "step": 33460 }, { "epoch": 1.671661172710019, "grad_norm": 1.7842084169387817, "learning_rate": 6.503930608969638e-06, "loss": 0.7955, "step": 33470 }, { "epoch": 1.6721606233143542, "grad_norm": 3.3867053985595703, "learning_rate": 6.484597735605913e-06, "loss": 0.959, "step": 33480 }, { "epoch": 1.6726600739186894, "grad_norm": 1.4159226417541504, "learning_rate": 6.465291645772498e-06, "loss": 0.7885, "step": 33490 }, { "epoch": 1.6731595245230246, "grad_norm": 1.989566683769226, "learning_rate": 6.446012351352193e-06, "loss": 0.8379, "step": 33500 }, { "epoch": 1.6736589751273598, "grad_norm": 1.9329557418823242, "learning_rate": 6.426759864211329e-06, "loss": 0.6747, "step": 33510 }, { "epoch": 1.674158425731695, "grad_norm": 2.7391014099121094, "learning_rate": 6.40753419619971e-06, "loss": 0.7669, "step": 33520 }, { "epoch": 1.6746578763360302, "grad_norm": 2.698151111602783, "learning_rate": 6.3883353591506325e-06, "loss": 0.8588, "step": 33530 }, { "epoch": 1.6751573269403655, "grad_norm": 2.433194875717163, "learning_rate": 6.369163364880937e-06, "loss": 0.8161, "step": 33540 }, { "epoch": 1.6756567775447009, "grad_norm": 3.5668649673461914, "learning_rate": 6.3500182251908625e-06, "loss": 0.8271, "step": 33550 }, { "epoch": 1.676156228149036, "grad_norm": 3.206387996673584, "learning_rate": 6.330899951864161e-06, "loss": 0.9621, "step": 33560 }, { "epoch": 1.6766556787533713, "grad_norm": 1.9001898765563965, "learning_rate": 6.311808556668042e-06, "loss": 0.8277, "step": 33570 }, { "epoch": 1.6771551293577065, "grad_norm": 2.9335436820983887, "learning_rate": 6.292744051353178e-06, "loss": 0.8933, "step": 33580 }, { "epoch": 1.6776545799620417, "grad_norm": 2.652553081512451, "learning_rate": 6.273706447653693e-06, "loss": 0.7744, "step": 33590 }, { "epoch": 1.6781540305663771, "grad_norm": 1.9013726711273193, "learning_rate": 6.2546957572871156e-06, "loss": 0.8477, "step": 33600 }, { "epoch": 1.6786534811707123, "grad_norm": 1.825754165649414, "learning_rate": 6.235711991954462e-06, "loss": 1.0587, "step": 33610 }, { "epoch": 1.6791529317750475, "grad_norm": 7.607336521148682, "learning_rate": 6.2167551633401435e-06, "loss": 0.92, "step": 33620 }, { "epoch": 1.6796523823793827, "grad_norm": 2.1030635833740234, "learning_rate": 6.197825283112002e-06, "loss": 0.8888, "step": 33630 }, { "epoch": 1.680151832983718, "grad_norm": 0.9250654578208923, "learning_rate": 6.178922362921291e-06, "loss": 0.716, "step": 33640 }, { "epoch": 1.6806512835880532, "grad_norm": 1.2000844478607178, "learning_rate": 6.1600464144026756e-06, "loss": 0.807, "step": 33650 }, { "epoch": 1.6811507341923884, "grad_norm": 1.6095917224884033, "learning_rate": 6.14119744917423e-06, "loss": 0.7966, "step": 33660 }, { "epoch": 1.6816501847967236, "grad_norm": 1.3341346979141235, "learning_rate": 6.122375478837372e-06, "loss": 0.622, "step": 33670 }, { "epoch": 1.6821496354010588, "grad_norm": 2.662108898162842, "learning_rate": 6.103580514976953e-06, "loss": 0.8237, "step": 33680 }, { "epoch": 1.682649086005394, "grad_norm": 1.737113356590271, "learning_rate": 6.084812569161197e-06, "loss": 0.9471, "step": 33690 }, { "epoch": 1.6831485366097292, "grad_norm": 0.9765511751174927, "learning_rate": 6.066071652941674e-06, "loss": 0.8581, "step": 33700 }, { "epoch": 1.6836479872140644, "grad_norm": 2.6003849506378174, "learning_rate": 6.047357777853341e-06, "loss": 0.7063, "step": 33710 }, { "epoch": 1.6841474378183996, "grad_norm": 2.0167593955993652, "learning_rate": 6.028670955414495e-06, "loss": 0.7861, "step": 33720 }, { "epoch": 1.684646888422735, "grad_norm": 1.3521000146865845, "learning_rate": 6.010011197126797e-06, "loss": 0.7654, "step": 33730 }, { "epoch": 1.6851463390270702, "grad_norm": 1.2209583520889282, "learning_rate": 5.991378514475255e-06, "loss": 0.767, "step": 33740 }, { "epoch": 1.6856457896314054, "grad_norm": 1.4293640851974487, "learning_rate": 5.972772918928165e-06, "loss": 0.9524, "step": 33750 }, { "epoch": 1.6861452402357406, "grad_norm": 1.5426479578018188, "learning_rate": 5.9541944219372094e-06, "loss": 0.6861, "step": 33760 }, { "epoch": 1.686644690840076, "grad_norm": 2.488340139389038, "learning_rate": 5.93564303493736e-06, "loss": 0.7732, "step": 33770 }, { "epoch": 1.6871441414444113, "grad_norm": 1.7369437217712402, "learning_rate": 5.91711876934693e-06, "loss": 0.7041, "step": 33780 }, { "epoch": 1.6876435920487465, "grad_norm": 1.331055998802185, "learning_rate": 5.8986216365674755e-06, "loss": 0.6413, "step": 33790 }, { "epoch": 1.6881430426530817, "grad_norm": 7.045438289642334, "learning_rate": 5.880151647983939e-06, "loss": 1.0464, "step": 33800 }, { "epoch": 1.6886424932574169, "grad_norm": 2.9155807495117188, "learning_rate": 5.8617088149645115e-06, "loss": 1.0191, "step": 33810 }, { "epoch": 1.689141943861752, "grad_norm": 6.615844249725342, "learning_rate": 5.843293148860645e-06, "loss": 0.8243, "step": 33820 }, { "epoch": 1.6896413944660873, "grad_norm": 1.526130199432373, "learning_rate": 5.824904661007113e-06, "loss": 0.754, "step": 33830 }, { "epoch": 1.6901408450704225, "grad_norm": 3.0641677379608154, "learning_rate": 5.806543362721945e-06, "loss": 0.874, "step": 33840 }, { "epoch": 1.6906402956747577, "grad_norm": 2.185114860534668, "learning_rate": 5.788209265306438e-06, "loss": 0.7972, "step": 33850 }, { "epoch": 1.691139746279093, "grad_norm": 1.3387336730957031, "learning_rate": 5.7699023800451504e-06, "loss": 0.8518, "step": 33860 }, { "epoch": 1.6916391968834281, "grad_norm": 1.4972590208053589, "learning_rate": 5.751622718205868e-06, "loss": 0.9012, "step": 33870 }, { "epoch": 1.6921386474877633, "grad_norm": 1.5444080829620361, "learning_rate": 5.733370291039636e-06, "loss": 0.8396, "step": 33880 }, { "epoch": 1.6926380980920985, "grad_norm": 5.174015522003174, "learning_rate": 5.715145109780773e-06, "loss": 0.7983, "step": 33890 }, { "epoch": 1.693137548696434, "grad_norm": 1.8786383867263794, "learning_rate": 5.696947185646767e-06, "loss": 0.9327, "step": 33900 }, { "epoch": 1.6936369993007692, "grad_norm": 1.9934834241867065, "learning_rate": 5.678776529838359e-06, "loss": 0.8043, "step": 33910 }, { "epoch": 1.6941364499051044, "grad_norm": 1.8914780616760254, "learning_rate": 5.660633153539513e-06, "loss": 0.6915, "step": 33920 }, { "epoch": 1.6946359005094396, "grad_norm": 3.606254816055298, "learning_rate": 5.642517067917397e-06, "loss": 0.88, "step": 33930 }, { "epoch": 1.695135351113775, "grad_norm": 3.041576385498047, "learning_rate": 5.624428284122357e-06, "loss": 0.9043, "step": 33940 }, { "epoch": 1.6956348017181102, "grad_norm": 2.0273914337158203, "learning_rate": 5.606366813287972e-06, "loss": 0.7096, "step": 33950 }, { "epoch": 1.6961342523224454, "grad_norm": 1.5106263160705566, "learning_rate": 5.588332666530982e-06, "loss": 0.808, "step": 33960 }, { "epoch": 1.6966337029267806, "grad_norm": 2.7801899909973145, "learning_rate": 5.5703258549513584e-06, "loss": 0.8118, "step": 33970 }, { "epoch": 1.6971331535311158, "grad_norm": 3.7293999195098877, "learning_rate": 5.5523463896321684e-06, "loss": 0.8433, "step": 33980 }, { "epoch": 1.697632604135451, "grad_norm": 1.002167820930481, "learning_rate": 5.5343942816397096e-06, "loss": 0.8146, "step": 33990 }, { "epoch": 1.6981320547397862, "grad_norm": 0.7675527334213257, "learning_rate": 5.5164695420234165e-06, "loss": 0.6892, "step": 34000 }, { "epoch": 1.6986315053441214, "grad_norm": 2.516131639480591, "learning_rate": 5.498572181815898e-06, "loss": 0.8342, "step": 34010 }, { "epoch": 1.6991309559484566, "grad_norm": 0.8710960745811462, "learning_rate": 5.480702212032873e-06, "loss": 0.7836, "step": 34020 }, { "epoch": 1.6996304065527919, "grad_norm": 0.640049934387207, "learning_rate": 5.4628596436732405e-06, "loss": 0.7594, "step": 34030 }, { "epoch": 1.700129857157127, "grad_norm": 0.6289708614349365, "learning_rate": 5.44504448771902e-06, "loss": 0.7695, "step": 34040 }, { "epoch": 1.7006293077614623, "grad_norm": 1.565186619758606, "learning_rate": 5.427256755135357e-06, "loss": 0.7647, "step": 34050 }, { "epoch": 1.7011287583657975, "grad_norm": 2.139749050140381, "learning_rate": 5.4094964568705105e-06, "loss": 0.6776, "step": 34060 }, { "epoch": 1.701628208970133, "grad_norm": 1.1552468538284302, "learning_rate": 5.391763603855876e-06, "loss": 0.8067, "step": 34070 }, { "epoch": 1.702127659574468, "grad_norm": 2.7003135681152344, "learning_rate": 5.374058207005944e-06, "loss": 0.6771, "step": 34080 }, { "epoch": 1.7026271101788033, "grad_norm": 2.3676655292510986, "learning_rate": 5.356380277218293e-06, "loss": 0.7065, "step": 34090 }, { "epoch": 1.7031265607831385, "grad_norm": 1.9275338649749756, "learning_rate": 5.338729825373606e-06, "loss": 1.0622, "step": 34100 }, { "epoch": 1.703626011387474, "grad_norm": 2.2466483116149902, "learning_rate": 5.321106862335668e-06, "loss": 0.896, "step": 34110 }, { "epoch": 1.7041254619918091, "grad_norm": 2.1116015911102295, "learning_rate": 5.3035113989513215e-06, "loss": 0.9516, "step": 34120 }, { "epoch": 1.7046249125961443, "grad_norm": 1.3542670011520386, "learning_rate": 5.285943446050501e-06, "loss": 0.734, "step": 34130 }, { "epoch": 1.7051243632004796, "grad_norm": 2.048325538635254, "learning_rate": 5.268403014446194e-06, "loss": 0.8479, "step": 34140 }, { "epoch": 1.7056238138048148, "grad_norm": 2.1398186683654785, "learning_rate": 5.250890114934459e-06, "loss": 0.9084, "step": 34150 }, { "epoch": 1.70612326440915, "grad_norm": 2.2073745727539062, "learning_rate": 5.233404758294414e-06, "loss": 0.6988, "step": 34160 }, { "epoch": 1.7066227150134852, "grad_norm": 1.1738636493682861, "learning_rate": 5.2159469552882035e-06, "loss": 0.6632, "step": 34170 }, { "epoch": 1.7071221656178204, "grad_norm": 2.565769910812378, "learning_rate": 5.198516716661022e-06, "loss": 0.8613, "step": 34180 }, { "epoch": 1.7076216162221556, "grad_norm": 2.5892958641052246, "learning_rate": 5.181114053141112e-06, "loss": 0.7024, "step": 34190 }, { "epoch": 1.7081210668264908, "grad_norm": 0.9414462447166443, "learning_rate": 5.163738975439736e-06, "loss": 0.7751, "step": 34200 }, { "epoch": 1.708620517430826, "grad_norm": 1.9717763662338257, "learning_rate": 5.146391494251157e-06, "loss": 0.8567, "step": 34210 }, { "epoch": 1.7091199680351612, "grad_norm": 0.757167398929596, "learning_rate": 5.129071620252673e-06, "loss": 0.6658, "step": 34220 }, { "epoch": 1.7096194186394964, "grad_norm": 2.6256396770477295, "learning_rate": 5.111779364104608e-06, "loss": 0.9474, "step": 34230 }, { "epoch": 1.7101188692438318, "grad_norm": 1.6641573905944824, "learning_rate": 5.094514736450257e-06, "loss": 0.669, "step": 34240 }, { "epoch": 1.710618319848167, "grad_norm": 2.3431596755981445, "learning_rate": 5.077277747915904e-06, "loss": 0.9436, "step": 34250 }, { "epoch": 1.7111177704525022, "grad_norm": 4.355306625366211, "learning_rate": 5.060068409110852e-06, "loss": 0.8129, "step": 34260 }, { "epoch": 1.7116172210568374, "grad_norm": 2.1051623821258545, "learning_rate": 5.04288673062736e-06, "loss": 0.8254, "step": 34270 }, { "epoch": 1.7121166716611729, "grad_norm": 1.0960193872451782, "learning_rate": 5.025732723040688e-06, "loss": 0.7505, "step": 34280 }, { "epoch": 1.712616122265508, "grad_norm": 2.2933967113494873, "learning_rate": 5.008606396909032e-06, "loss": 0.8967, "step": 34290 }, { "epoch": 1.7131155728698433, "grad_norm": 2.107546806335449, "learning_rate": 4.991507762773573e-06, "loss": 0.7854, "step": 34300 }, { "epoch": 1.7136150234741785, "grad_norm": 1.8853667974472046, "learning_rate": 4.974436831158441e-06, "loss": 0.7576, "step": 34310 }, { "epoch": 1.7141144740785137, "grad_norm": 1.567556619644165, "learning_rate": 4.957393612570716e-06, "loss": 0.7076, "step": 34320 }, { "epoch": 1.714613924682849, "grad_norm": 2.721653699874878, "learning_rate": 4.9403781175004214e-06, "loss": 0.965, "step": 34330 }, { "epoch": 1.715113375287184, "grad_norm": 1.5229129791259766, "learning_rate": 4.923390356420521e-06, "loss": 0.9254, "step": 34340 }, { "epoch": 1.7156128258915193, "grad_norm": 2.1019489765167236, "learning_rate": 4.906430339786905e-06, "loss": 0.9536, "step": 34350 }, { "epoch": 1.7161122764958545, "grad_norm": 2.95880126953125, "learning_rate": 4.8894980780383684e-06, "loss": 0.7796, "step": 34360 }, { "epoch": 1.7166117271001897, "grad_norm": 1.334531307220459, "learning_rate": 4.872593581596657e-06, "loss": 0.9902, "step": 34370 }, { "epoch": 1.717111177704525, "grad_norm": 2.7048702239990234, "learning_rate": 4.855716860866405e-06, "loss": 0.6322, "step": 34380 }, { "epoch": 1.7176106283088601, "grad_norm": 1.3477656841278076, "learning_rate": 4.838867926235158e-06, "loss": 0.7448, "step": 34390 }, { "epoch": 1.7181100789131953, "grad_norm": 3.038686513900757, "learning_rate": 4.822046788073359e-06, "loss": 0.7705, "step": 34400 }, { "epoch": 1.7186095295175308, "grad_norm": 1.5411460399627686, "learning_rate": 4.805253456734343e-06, "loss": 0.703, "step": 34410 }, { "epoch": 1.719108980121866, "grad_norm": 1.8554091453552246, "learning_rate": 4.788487942554321e-06, "loss": 0.6587, "step": 34420 }, { "epoch": 1.7196084307262012, "grad_norm": 3.690652370452881, "learning_rate": 4.7717502558524026e-06, "loss": 0.8542, "step": 34430 }, { "epoch": 1.7201078813305364, "grad_norm": 1.4600750207901, "learning_rate": 4.755040406930539e-06, "loss": 0.8823, "step": 34440 }, { "epoch": 1.7206073319348716, "grad_norm": 6.3015875816345215, "learning_rate": 4.738358406073579e-06, "loss": 1.1294, "step": 34450 }, { "epoch": 1.721106782539207, "grad_norm": 2.5710482597351074, "learning_rate": 4.721704263549214e-06, "loss": 0.9515, "step": 34460 }, { "epoch": 1.7216062331435422, "grad_norm": 2.5637593269348145, "learning_rate": 4.7050779896079974e-06, "loss": 0.6839, "step": 34470 }, { "epoch": 1.7221056837478774, "grad_norm": 1.7954245805740356, "learning_rate": 4.688479594483303e-06, "loss": 0.6864, "step": 34480 }, { "epoch": 1.7226051343522126, "grad_norm": 2.9475743770599365, "learning_rate": 4.671909088391396e-06, "loss": 0.7648, "step": 34490 }, { "epoch": 1.7231045849565478, "grad_norm": 1.3644464015960693, "learning_rate": 4.655366481531326e-06, "loss": 0.8244, "step": 34500 }, { "epoch": 1.723604035560883, "grad_norm": 1.7841483354568481, "learning_rate": 4.6388517840850135e-06, "loss": 0.9108, "step": 34510 }, { "epoch": 1.7241034861652182, "grad_norm": 1.744478702545166, "learning_rate": 4.622365006217155e-06, "loss": 0.7329, "step": 34520 }, { "epoch": 1.7246029367695535, "grad_norm": 1.2790329456329346, "learning_rate": 4.605906158075302e-06, "loss": 0.9263, "step": 34530 }, { "epoch": 1.7251023873738887, "grad_norm": 2.385261297225952, "learning_rate": 4.589475249789787e-06, "loss": 1.1937, "step": 34540 }, { "epoch": 1.7256018379782239, "grad_norm": 0.9439675807952881, "learning_rate": 4.573072291473779e-06, "loss": 0.9036, "step": 34550 }, { "epoch": 1.726101288582559, "grad_norm": 1.7510716915130615, "learning_rate": 4.556697293223189e-06, "loss": 1.0077, "step": 34560 }, { "epoch": 1.7266007391868943, "grad_norm": 1.3997886180877686, "learning_rate": 4.540350265116783e-06, "loss": 1.14, "step": 34570 }, { "epoch": 1.7271001897912297, "grad_norm": 1.2974086999893188, "learning_rate": 4.524031217216085e-06, "loss": 0.7906, "step": 34580 }, { "epoch": 1.727599640395565, "grad_norm": 1.6591622829437256, "learning_rate": 4.5077401595653764e-06, "loss": 0.791, "step": 34590 }, { "epoch": 1.7280990909999001, "grad_norm": 2.1445977687835693, "learning_rate": 4.491477102191732e-06, "loss": 0.8317, "step": 34600 }, { "epoch": 1.7285985416042353, "grad_norm": 1.6166549921035767, "learning_rate": 4.475242055104989e-06, "loss": 0.77, "step": 34610 }, { "epoch": 1.7290979922085705, "grad_norm": 2.105823040008545, "learning_rate": 4.459035028297759e-06, "loss": 0.754, "step": 34620 }, { "epoch": 1.729597442812906, "grad_norm": 1.5075491666793823, "learning_rate": 4.4428560317453734e-06, "loss": 0.9758, "step": 34630 }, { "epoch": 1.7300968934172412, "grad_norm": 4.970326900482178, "learning_rate": 4.426705075405935e-06, "loss": 0.9965, "step": 34640 }, { "epoch": 1.7305963440215764, "grad_norm": 1.1162242889404297, "learning_rate": 4.410582169220278e-06, "loss": 0.9061, "step": 34650 }, { "epoch": 1.7310957946259116, "grad_norm": 2.308248281478882, "learning_rate": 4.394487323112001e-06, "loss": 0.756, "step": 34660 }, { "epoch": 1.7315952452302468, "grad_norm": 3.1205217838287354, "learning_rate": 4.378420546987383e-06, "loss": 0.7574, "step": 34670 }, { "epoch": 1.732094695834582, "grad_norm": 1.6314212083816528, "learning_rate": 4.362381850735458e-06, "loss": 0.7974, "step": 34680 }, { "epoch": 1.7325941464389172, "grad_norm": 1.5543373823165894, "learning_rate": 4.346371244227975e-06, "loss": 0.7417, "step": 34690 }, { "epoch": 1.7330935970432524, "grad_norm": 0.9526101350784302, "learning_rate": 4.330388737319391e-06, "loss": 0.6185, "step": 34700 }, { "epoch": 1.7335930476475876, "grad_norm": 3.3719732761383057, "learning_rate": 4.314434339846857e-06, "loss": 1.051, "step": 34710 }, { "epoch": 1.7340924982519228, "grad_norm": 3.4890267848968506, "learning_rate": 4.29850806163023e-06, "loss": 0.9674, "step": 34720 }, { "epoch": 1.734591948856258, "grad_norm": 2.9204952716827393, "learning_rate": 4.282609912472063e-06, "loss": 0.8939, "step": 34730 }, { "epoch": 1.7350913994605932, "grad_norm": 0.9521095752716064, "learning_rate": 4.266739902157602e-06, "loss": 0.6879, "step": 34740 }, { "epoch": 1.7355908500649284, "grad_norm": 0.9983149170875549, "learning_rate": 4.250898040454754e-06, "loss": 0.7481, "step": 34750 }, { "epoch": 1.7360903006692638, "grad_norm": 1.3006421327590942, "learning_rate": 4.2350843371141205e-06, "loss": 0.7445, "step": 34760 }, { "epoch": 1.736589751273599, "grad_norm": 1.0855071544647217, "learning_rate": 4.219298801868959e-06, "loss": 0.8049, "step": 34770 }, { "epoch": 1.7370892018779343, "grad_norm": 1.4389673471450806, "learning_rate": 4.20354144443521e-06, "loss": 0.7711, "step": 34780 }, { "epoch": 1.7375886524822695, "grad_norm": 1.9440547227859497, "learning_rate": 4.187812274511427e-06, "loss": 0.6016, "step": 34790 }, { "epoch": 1.7380881030866049, "grad_norm": 2.9604620933532715, "learning_rate": 4.172111301778858e-06, "loss": 0.7277, "step": 34800 }, { "epoch": 1.73858755369094, "grad_norm": 1.9499428272247314, "learning_rate": 4.156438535901375e-06, "loss": 0.7722, "step": 34810 }, { "epoch": 1.7390870042952753, "grad_norm": 1.2327196598052979, "learning_rate": 4.140793986525493e-06, "loss": 0.8656, "step": 34820 }, { "epoch": 1.7395864548996105, "grad_norm": 1.617674469947815, "learning_rate": 4.125177663280366e-06, "loss": 0.7876, "step": 34830 }, { "epoch": 1.7400859055039457, "grad_norm": 1.6188279390335083, "learning_rate": 4.109589575777761e-06, "loss": 0.8418, "step": 34840 }, { "epoch": 1.740585356108281, "grad_norm": 2.5659329891204834, "learning_rate": 4.0940297336120925e-06, "loss": 0.8457, "step": 34850 }, { "epoch": 1.7410848067126161, "grad_norm": 1.775943636894226, "learning_rate": 4.078498146360349e-06, "loss": 0.7053, "step": 34860 }, { "epoch": 1.7415842573169513, "grad_norm": 3.4689862728118896, "learning_rate": 4.062994823582161e-06, "loss": 0.6655, "step": 34870 }, { "epoch": 1.7420837079212865, "grad_norm": 3.053076982498169, "learning_rate": 4.047519774819752e-06, "loss": 1.0372, "step": 34880 }, { "epoch": 1.7425831585256217, "grad_norm": 6.300508499145508, "learning_rate": 4.032073009597953e-06, "loss": 0.8557, "step": 34890 }, { "epoch": 1.743082609129957, "grad_norm": 2.8892078399658203, "learning_rate": 4.016654537424158e-06, "loss": 0.7995, "step": 34900 }, { "epoch": 1.7435820597342921, "grad_norm": 2.6619675159454346, "learning_rate": 4.001264367788365e-06, "loss": 0.7375, "step": 34910 }, { "epoch": 1.7440815103386273, "grad_norm": 1.9357144832611084, "learning_rate": 3.985902510163175e-06, "loss": 0.9065, "step": 34920 }, { "epoch": 1.7445809609429628, "grad_norm": 1.7168684005737305, "learning_rate": 3.970568974003741e-06, "loss": 0.7814, "step": 34930 }, { "epoch": 1.745080411547298, "grad_norm": 1.4109448194503784, "learning_rate": 3.955263768747758e-06, "loss": 0.6658, "step": 34940 }, { "epoch": 1.7455798621516332, "grad_norm": 3.3588008880615234, "learning_rate": 3.939986903815523e-06, "loss": 0.952, "step": 34950 }, { "epoch": 1.7460793127559684, "grad_norm": 1.6808346509933472, "learning_rate": 3.924738388609883e-06, "loss": 0.8995, "step": 34960 }, { "epoch": 1.7465787633603038, "grad_norm": 1.3778560161590576, "learning_rate": 3.90951823251623e-06, "loss": 0.9788, "step": 34970 }, { "epoch": 1.747078213964639, "grad_norm": 2.0245771408081055, "learning_rate": 3.89432644490248e-06, "loss": 0.7781, "step": 34980 }, { "epoch": 1.7475776645689742, "grad_norm": 1.1359102725982666, "learning_rate": 3.879163035119116e-06, "loss": 0.8038, "step": 34990 }, { "epoch": 1.7480771151733094, "grad_norm": 3.046802043914795, "learning_rate": 3.864028012499171e-06, "loss": 0.7879, "step": 35000 }, { "epoch": 1.7485765657776446, "grad_norm": 2.4534199237823486, "learning_rate": 3.848921386358156e-06, "loss": 0.8207, "step": 35010 }, { "epoch": 1.7490760163819798, "grad_norm": 1.5019700527191162, "learning_rate": 3.833843165994133e-06, "loss": 0.9865, "step": 35020 }, { "epoch": 1.749575466986315, "grad_norm": 3.659109115600586, "learning_rate": 3.818793360687689e-06, "loss": 0.82, "step": 35030 }, { "epoch": 1.7500749175906503, "grad_norm": 1.5497944355010986, "learning_rate": 3.8037719797018977e-06, "loss": 0.7137, "step": 35040 }, { "epoch": 1.7505743681949855, "grad_norm": 1.4163156747817993, "learning_rate": 3.7887790322823692e-06, "loss": 0.7716, "step": 35050 }, { "epoch": 1.7510738187993207, "grad_norm": 9.005719184875488, "learning_rate": 3.7738145276571714e-06, "loss": 0.9387, "step": 35060 }, { "epoch": 1.7515732694036559, "grad_norm": 2.677325963973999, "learning_rate": 3.7588784750368977e-06, "loss": 0.728, "step": 35070 }, { "epoch": 1.752072720007991, "grad_norm": 2.716270685195923, "learning_rate": 3.743970883614617e-06, "loss": 0.8576, "step": 35080 }, { "epoch": 1.7525721706123263, "grad_norm": 0.8777887225151062, "learning_rate": 3.729091762565884e-06, "loss": 0.6477, "step": 35090 }, { "epoch": 1.7530716212166617, "grad_norm": 1.774182677268982, "learning_rate": 3.714241121048734e-06, "loss": 0.7331, "step": 35100 }, { "epoch": 1.753571071820997, "grad_norm": 2.188457727432251, "learning_rate": 3.699418968203666e-06, "loss": 0.8522, "step": 35110 }, { "epoch": 1.7540705224253321, "grad_norm": 3.928419589996338, "learning_rate": 3.6846253131536557e-06, "loss": 1.1962, "step": 35120 }, { "epoch": 1.7545699730296673, "grad_norm": 3.10632061958313, "learning_rate": 3.6698601650041132e-06, "loss": 0.7582, "step": 35130 }, { "epoch": 1.7550694236340028, "grad_norm": 1.448272943496704, "learning_rate": 3.6551235328429246e-06, "loss": 0.8542, "step": 35140 }, { "epoch": 1.755568874238338, "grad_norm": 2.9972660541534424, "learning_rate": 3.640415425740423e-06, "loss": 0.8446, "step": 35150 }, { "epoch": 1.7560683248426732, "grad_norm": 3.519113779067993, "learning_rate": 3.625735852749379e-06, "loss": 0.8874, "step": 35160 }, { "epoch": 1.7565677754470084, "grad_norm": 3.0787265300750732, "learning_rate": 3.6110848229050033e-06, "loss": 1.0266, "step": 35170 }, { "epoch": 1.7570672260513436, "grad_norm": 1.7008774280548096, "learning_rate": 3.596462345224938e-06, "loss": 0.6857, "step": 35180 }, { "epoch": 1.7575666766556788, "grad_norm": 1.0056792497634888, "learning_rate": 3.5818684287092565e-06, "loss": 0.7632, "step": 35190 }, { "epoch": 1.758066127260014, "grad_norm": 4.376323699951172, "learning_rate": 3.5673030823404396e-06, "loss": 0.8802, "step": 35200 }, { "epoch": 1.7585655778643492, "grad_norm": 1.3027852773666382, "learning_rate": 3.552766315083389e-06, "loss": 0.7642, "step": 35210 }, { "epoch": 1.7590650284686844, "grad_norm": 2.6667373180389404, "learning_rate": 3.538258135885425e-06, "loss": 0.7576, "step": 35220 }, { "epoch": 1.7595644790730196, "grad_norm": 2.3906915187835693, "learning_rate": 3.52377855367626e-06, "loss": 0.7995, "step": 35230 }, { "epoch": 1.7600639296773548, "grad_norm": 2.685397148132324, "learning_rate": 3.5093275773680155e-06, "loss": 0.8184, "step": 35240 }, { "epoch": 1.76056338028169, "grad_norm": 2.3524937629699707, "learning_rate": 3.4949052158551875e-06, "loss": 0.7623, "step": 35250 }, { "epoch": 1.7610628308860252, "grad_norm": 3.061622142791748, "learning_rate": 3.480511478014692e-06, "loss": 0.8054, "step": 35260 }, { "epoch": 1.7615622814903606, "grad_norm": 1.3194270133972168, "learning_rate": 3.4661463727058086e-06, "loss": 0.7058, "step": 35270 }, { "epoch": 1.7620617320946959, "grad_norm": 1.4347525835037231, "learning_rate": 3.4518099087701762e-06, "loss": 0.8684, "step": 35280 }, { "epoch": 1.762561182699031, "grad_norm": 3.1809377670288086, "learning_rate": 3.437502095031836e-06, "loss": 0.981, "step": 35290 }, { "epoch": 1.7630606333033663, "grad_norm": 1.5727275609970093, "learning_rate": 3.4232229402971717e-06, "loss": 0.9053, "step": 35300 }, { "epoch": 1.7635600839077017, "grad_norm": 1.1857830286026, "learning_rate": 3.4089724533549473e-06, "loss": 0.7795, "step": 35310 }, { "epoch": 1.764059534512037, "grad_norm": 1.6227647066116333, "learning_rate": 3.3947506429762745e-06, "loss": 1.0572, "step": 35320 }, { "epoch": 1.764558985116372, "grad_norm": 1.5130252838134766, "learning_rate": 3.3805575179145955e-06, "loss": 0.8011, "step": 35330 }, { "epoch": 1.7650584357207073, "grad_norm": 2.095486640930176, "learning_rate": 3.3663930869057224e-06, "loss": 0.7711, "step": 35340 }, { "epoch": 1.7655578863250425, "grad_norm": 1.3634223937988281, "learning_rate": 3.35225735866781e-06, "loss": 0.7542, "step": 35350 }, { "epoch": 1.7660573369293777, "grad_norm": 1.3298194408416748, "learning_rate": 3.3381503419013195e-06, "loss": 0.7986, "step": 35360 }, { "epoch": 1.766556787533713, "grad_norm": 1.482733130455017, "learning_rate": 3.324072045289056e-06, "loss": 0.7156, "step": 35370 }, { "epoch": 1.7670562381380481, "grad_norm": 1.1290950775146484, "learning_rate": 3.3100224774961497e-06, "loss": 0.8372, "step": 35380 }, { "epoch": 1.7675556887423833, "grad_norm": 2.8298797607421875, "learning_rate": 3.2960016471700495e-06, "loss": 0.8869, "step": 35390 }, { "epoch": 1.7680551393467185, "grad_norm": 3.9646389484405518, "learning_rate": 3.2820095629405024e-06, "loss": 0.6835, "step": 35400 }, { "epoch": 1.7685545899510537, "grad_norm": 0.6883142590522766, "learning_rate": 3.2680462334195805e-06, "loss": 0.7136, "step": 35410 }, { "epoch": 1.769054040555389, "grad_norm": 1.1219985485076904, "learning_rate": 3.2541116672016315e-06, "loss": 0.6866, "step": 35420 }, { "epoch": 1.7695534911597242, "grad_norm": 1.2209354639053345, "learning_rate": 3.24020587286335e-06, "loss": 0.9593, "step": 35430 }, { "epoch": 1.7700529417640596, "grad_norm": 1.1638636589050293, "learning_rate": 3.2263288589636565e-06, "loss": 0.6533, "step": 35440 }, { "epoch": 1.7705523923683948, "grad_norm": 0.7668918967247009, "learning_rate": 3.2124806340438073e-06, "loss": 0.613, "step": 35450 }, { "epoch": 1.77105184297273, "grad_norm": 2.0484378337860107, "learning_rate": 3.1986612066273127e-06, "loss": 0.8066, "step": 35460 }, { "epoch": 1.7715512935770652, "grad_norm": 2.1879031658172607, "learning_rate": 3.184870585219979e-06, "loss": 0.8853, "step": 35470 }, { "epoch": 1.7720507441814004, "grad_norm": 4.4826555252075195, "learning_rate": 3.171108778309856e-06, "loss": 1.1747, "step": 35480 }, { "epoch": 1.7725501947857358, "grad_norm": 1.0032776594161987, "learning_rate": 3.157375794367273e-06, "loss": 0.7511, "step": 35490 }, { "epoch": 1.773049645390071, "grad_norm": 3.0913190841674805, "learning_rate": 3.1436716418448307e-06, "loss": 0.66, "step": 35500 }, { "epoch": 1.7735490959944062, "grad_norm": 1.4925191402435303, "learning_rate": 3.129996329177359e-06, "loss": 0.7249, "step": 35510 }, { "epoch": 1.7740485465987414, "grad_norm": 2.6196932792663574, "learning_rate": 3.116349864781959e-06, "loss": 0.9659, "step": 35520 }, { "epoch": 1.7745479972030767, "grad_norm": 1.2698544263839722, "learning_rate": 3.1027322570579662e-06, "loss": 0.6995, "step": 35530 }, { "epoch": 1.7750474478074119, "grad_norm": 3.3668904304504395, "learning_rate": 3.089143514386955e-06, "loss": 0.9762, "step": 35540 }, { "epoch": 1.775546898411747, "grad_norm": 0.7958011627197266, "learning_rate": 3.0755836451327293e-06, "loss": 0.8425, "step": 35550 }, { "epoch": 1.7760463490160823, "grad_norm": 2.241260290145874, "learning_rate": 3.062052657641329e-06, "loss": 0.8025, "step": 35560 }, { "epoch": 1.7765457996204175, "grad_norm": 0.8565200567245483, "learning_rate": 3.0485505602410204e-06, "loss": 0.7493, "step": 35570 }, { "epoch": 1.7770452502247527, "grad_norm": 3.199446678161621, "learning_rate": 3.035077361242272e-06, "loss": 0.8458, "step": 35580 }, { "epoch": 1.7775447008290879, "grad_norm": 1.3564001321792603, "learning_rate": 3.021633068937779e-06, "loss": 0.8096, "step": 35590 }, { "epoch": 1.778044151433423, "grad_norm": 0.8203519582748413, "learning_rate": 3.0082176916024494e-06, "loss": 0.7561, "step": 35600 }, { "epoch": 1.7785436020377585, "grad_norm": 2.2097702026367188, "learning_rate": 2.9948312374933783e-06, "loss": 0.7856, "step": 35610 }, { "epoch": 1.7790430526420937, "grad_norm": 2.2635576725006104, "learning_rate": 2.9814737148498806e-06, "loss": 0.9608, "step": 35620 }, { "epoch": 1.779542503246429, "grad_norm": 2.1437602043151855, "learning_rate": 2.9681451318934295e-06, "loss": 0.752, "step": 35630 }, { "epoch": 1.7800419538507641, "grad_norm": 2.8573081493377686, "learning_rate": 2.954845496827713e-06, "loss": 0.8175, "step": 35640 }, { "epoch": 1.7805414044550993, "grad_norm": 2.7811226844787598, "learning_rate": 2.9415748178385994e-06, "loss": 0.9224, "step": 35650 }, { "epoch": 1.7810408550594348, "grad_norm": 1.3401638269424438, "learning_rate": 2.9283331030941387e-06, "loss": 0.6958, "step": 35660 }, { "epoch": 1.78154030566377, "grad_norm": 1.7810553312301636, "learning_rate": 2.915120360744533e-06, "loss": 0.7183, "step": 35670 }, { "epoch": 1.7820397562681052, "grad_norm": 0.9359515309333801, "learning_rate": 2.901936598922156e-06, "loss": 0.754, "step": 35680 }, { "epoch": 1.7825392068724404, "grad_norm": 2.2551419734954834, "learning_rate": 2.8887818257415712e-06, "loss": 0.7901, "step": 35690 }, { "epoch": 1.7830386574767756, "grad_norm": 2.9914348125457764, "learning_rate": 2.8756560492994854e-06, "loss": 0.7996, "step": 35700 }, { "epoch": 1.7835381080811108, "grad_norm": 2.419156551361084, "learning_rate": 2.8625592776747366e-06, "loss": 0.7975, "step": 35710 }, { "epoch": 1.784037558685446, "grad_norm": 2.0841658115386963, "learning_rate": 2.8494915189283324e-06, "loss": 0.7499, "step": 35720 }, { "epoch": 1.7845370092897812, "grad_norm": 1.741248607635498, "learning_rate": 2.836452781103416e-06, "loss": 0.891, "step": 35730 }, { "epoch": 1.7850364598941164, "grad_norm": 1.356842279434204, "learning_rate": 2.823443072225285e-06, "loss": 0.8262, "step": 35740 }, { "epoch": 1.7855359104984516, "grad_norm": 1.0266705751419067, "learning_rate": 2.810462400301339e-06, "loss": 0.7411, "step": 35750 }, { "epoch": 1.7860353611027868, "grad_norm": 7.0921196937561035, "learning_rate": 2.7975107733211215e-06, "loss": 0.8509, "step": 35760 }, { "epoch": 1.786534811707122, "grad_norm": 2.041550397872925, "learning_rate": 2.784588199256316e-06, "loss": 0.642, "step": 35770 }, { "epoch": 1.7870342623114572, "grad_norm": 1.2752599716186523, "learning_rate": 2.7716946860606942e-06, "loss": 0.7308, "step": 35780 }, { "epoch": 1.7875337129157927, "grad_norm": 2.6199724674224854, "learning_rate": 2.7588302416701573e-06, "loss": 0.9344, "step": 35790 }, { "epoch": 1.7880331635201279, "grad_norm": 0.9878147840499878, "learning_rate": 2.745994874002711e-06, "loss": 0.6846, "step": 35800 }, { "epoch": 1.788532614124463, "grad_norm": 1.8787811994552612, "learning_rate": 2.7331885909584744e-06, "loss": 0.8218, "step": 35810 }, { "epoch": 1.7890320647287983, "grad_norm": 2.2020928859710693, "learning_rate": 2.7204114004196422e-06, "loss": 0.8301, "step": 35820 }, { "epoch": 1.7895315153331337, "grad_norm": 2.454972267150879, "learning_rate": 2.707663310250519e-06, "loss": 0.7884, "step": 35830 }, { "epoch": 1.790030965937469, "grad_norm": 1.7029662132263184, "learning_rate": 2.6949443282974997e-06, "loss": 0.7832, "step": 35840 }, { "epoch": 1.7905304165418041, "grad_norm": 1.7552825212478638, "learning_rate": 2.682254462389061e-06, "loss": 0.8428, "step": 35850 }, { "epoch": 1.7910298671461393, "grad_norm": 2.6518142223358154, "learning_rate": 2.6695937203357502e-06, "loss": 0.935, "step": 35860 }, { "epoch": 1.7915293177504745, "grad_norm": 2.2561256885528564, "learning_rate": 2.6569621099302e-06, "loss": 0.8149, "step": 35870 }, { "epoch": 1.7920287683548097, "grad_norm": 1.3084312677383423, "learning_rate": 2.6443596389471137e-06, "loss": 0.9377, "step": 35880 }, { "epoch": 1.792528218959145, "grad_norm": 1.627254843711853, "learning_rate": 2.631786315143253e-06, "loss": 0.7601, "step": 35890 }, { "epoch": 1.7930276695634801, "grad_norm": 1.4347816705703735, "learning_rate": 2.6192421462574333e-06, "loss": 0.8017, "step": 35900 }, { "epoch": 1.7935271201678153, "grad_norm": 2.343937397003174, "learning_rate": 2.6067271400105343e-06, "loss": 0.9129, "step": 35910 }, { "epoch": 1.7940265707721506, "grad_norm": 2.804797887802124, "learning_rate": 2.594241304105488e-06, "loss": 0.8435, "step": 35920 }, { "epoch": 1.7945260213764858, "grad_norm": 0.8968095779418945, "learning_rate": 2.5817846462272755e-06, "loss": 0.7967, "step": 35930 }, { "epoch": 1.795025471980821, "grad_norm": 0.8546461462974548, "learning_rate": 2.5693571740428914e-06, "loss": 0.7104, "step": 35940 }, { "epoch": 1.7955249225851562, "grad_norm": 1.5795838832855225, "learning_rate": 2.556958895201411e-06, "loss": 0.9141, "step": 35950 }, { "epoch": 1.7960243731894916, "grad_norm": 1.1717382669448853, "learning_rate": 2.544589817333909e-06, "loss": 0.7888, "step": 35960 }, { "epoch": 1.7965238237938268, "grad_norm": 0.6156312823295593, "learning_rate": 2.5322499480535e-06, "loss": 0.7581, "step": 35970 }, { "epoch": 1.797023274398162, "grad_norm": 1.3926143646240234, "learning_rate": 2.519939294955309e-06, "loss": 0.8339, "step": 35980 }, { "epoch": 1.7975227250024972, "grad_norm": 1.6335564851760864, "learning_rate": 2.507657865616492e-06, "loss": 0.8657, "step": 35990 }, { "epoch": 1.7980221756068326, "grad_norm": 5.138033390045166, "learning_rate": 2.4954056675962123e-06, "loss": 0.9677, "step": 36000 }, { "epoch": 1.7985216262111678, "grad_norm": 2.689020872116089, "learning_rate": 2.483182708435644e-06, "loss": 0.9653, "step": 36010 }, { "epoch": 1.799021076815503, "grad_norm": 7.020102024078369, "learning_rate": 2.4709889956579415e-06, "loss": 0.9286, "step": 36020 }, { "epoch": 1.7995205274198383, "grad_norm": 2.41326642036438, "learning_rate": 2.458824536768306e-06, "loss": 0.7382, "step": 36030 }, { "epoch": 1.8000199780241735, "grad_norm": 2.3130578994750977, "learning_rate": 2.4466893392539047e-06, "loss": 0.7812, "step": 36040 }, { "epoch": 1.8005194286285087, "grad_norm": 2.094149112701416, "learning_rate": 2.4345834105838794e-06, "loss": 0.8577, "step": 36050 }, { "epoch": 1.8010188792328439, "grad_norm": 4.530421733856201, "learning_rate": 2.422506758209381e-06, "loss": 0.822, "step": 36060 }, { "epoch": 1.801518329837179, "grad_norm": 1.2445532083511353, "learning_rate": 2.4104593895635353e-06, "loss": 1.0076, "step": 36070 }, { "epoch": 1.8020177804415143, "grad_norm": 1.2192314863204956, "learning_rate": 2.3984413120614447e-06, "loss": 0.9226, "step": 36080 }, { "epoch": 1.8025172310458495, "grad_norm": 1.793763279914856, "learning_rate": 2.3864525331001752e-06, "loss": 0.8876, "step": 36090 }, { "epoch": 1.8030166816501847, "grad_norm": 1.541576623916626, "learning_rate": 2.3744930600587635e-06, "loss": 0.675, "step": 36100 }, { "epoch": 1.80351613225452, "grad_norm": 2.6183204650878906, "learning_rate": 2.3625629002982095e-06, "loss": 0.8669, "step": 36110 }, { "epoch": 1.804015582858855, "grad_norm": 2.4909584522247314, "learning_rate": 2.3506620611614903e-06, "loss": 0.698, "step": 36120 }, { "epoch": 1.8045150334631905, "grad_norm": 3.180830717086792, "learning_rate": 2.3387905499734965e-06, "loss": 0.7879, "step": 36130 }, { "epoch": 1.8050144840675257, "grad_norm": 1.708243727684021, "learning_rate": 2.3269483740410934e-06, "loss": 0.8254, "step": 36140 }, { "epoch": 1.805513934671861, "grad_norm": 2.7718276977539062, "learning_rate": 2.315135540653096e-06, "loss": 0.9821, "step": 36150 }, { "epoch": 1.8060133852761961, "grad_norm": 1.0548055171966553, "learning_rate": 2.3033520570802435e-06, "loss": 0.6761, "step": 36160 }, { "epoch": 1.8065128358805316, "grad_norm": 0.7616592645645142, "learning_rate": 2.2915979305752066e-06, "loss": 0.8286, "step": 36170 }, { "epoch": 1.8070122864848668, "grad_norm": 1.7027424573898315, "learning_rate": 2.279873168372604e-06, "loss": 0.7274, "step": 36180 }, { "epoch": 1.807511737089202, "grad_norm": 2.3203492164611816, "learning_rate": 2.2681777776889735e-06, "loss": 0.9278, "step": 36190 }, { "epoch": 1.8080111876935372, "grad_norm": 1.4732754230499268, "learning_rate": 2.256511765722774e-06, "loss": 0.7915, "step": 36200 }, { "epoch": 1.8085106382978724, "grad_norm": 1.982208251953125, "learning_rate": 2.2448751396543787e-06, "loss": 0.9625, "step": 36210 }, { "epoch": 1.8090100889022076, "grad_norm": 2.9983904361724854, "learning_rate": 2.233267906646086e-06, "loss": 0.891, "step": 36220 }, { "epoch": 1.8095095395065428, "grad_norm": 0.9729540348052979, "learning_rate": 2.221690073842081e-06, "loss": 0.7775, "step": 36230 }, { "epoch": 1.810008990110878, "grad_norm": 6.451554775238037, "learning_rate": 2.2101416483684912e-06, "loss": 1.0564, "step": 36240 }, { "epoch": 1.8105084407152132, "grad_norm": 1.3823543787002563, "learning_rate": 2.198622637333292e-06, "loss": 0.5939, "step": 36250 }, { "epoch": 1.8110078913195484, "grad_norm": 4.945366382598877, "learning_rate": 2.18713304782639e-06, "loss": 0.7639, "step": 36260 }, { "epoch": 1.8115073419238836, "grad_norm": 1.014919400215149, "learning_rate": 2.175672886919583e-06, "loss": 0.7121, "step": 36270 }, { "epoch": 1.8120067925282188, "grad_norm": 4.568160533905029, "learning_rate": 2.1642421616665407e-06, "loss": 0.7426, "step": 36280 }, { "epoch": 1.812506243132554, "grad_norm": 1.13238525390625, "learning_rate": 2.152840879102819e-06, "loss": 0.8854, "step": 36290 }, { "epoch": 1.8130056937368895, "grad_norm": 1.3518203496932983, "learning_rate": 2.1414690462458643e-06, "loss": 0.6826, "step": 36300 }, { "epoch": 1.8135051443412247, "grad_norm": 2.2611231803894043, "learning_rate": 2.1301266700949894e-06, "loss": 0.8881, "step": 36310 }, { "epoch": 1.8140045949455599, "grad_norm": 1.8472015857696533, "learning_rate": 2.118813757631355e-06, "loss": 0.9527, "step": 36320 }, { "epoch": 1.814504045549895, "grad_norm": 4.762202739715576, "learning_rate": 2.1075303158180294e-06, "loss": 0.9185, "step": 36330 }, { "epoch": 1.8150034961542305, "grad_norm": 4.7360711097717285, "learning_rate": 2.096276351599902e-06, "loss": 0.9668, "step": 36340 }, { "epoch": 1.8155029467585657, "grad_norm": 1.803353190422058, "learning_rate": 2.085051871903754e-06, "loss": 0.8605, "step": 36350 }, { "epoch": 1.816002397362901, "grad_norm": 1.5579333305358887, "learning_rate": 2.073856883638181e-06, "loss": 0.9635, "step": 36360 }, { "epoch": 1.8165018479672361, "grad_norm": 1.4732489585876465, "learning_rate": 2.0626913936936554e-06, "loss": 0.9519, "step": 36370 }, { "epoch": 1.8170012985715713, "grad_norm": 5.456287860870361, "learning_rate": 2.051555408942485e-06, "loss": 0.9949, "step": 36380 }, { "epoch": 1.8175007491759065, "grad_norm": 1.23586106300354, "learning_rate": 2.040448936238831e-06, "loss": 0.6888, "step": 36390 }, { "epoch": 1.8180001997802417, "grad_norm": 2.487851142883301, "learning_rate": 2.0293719824186543e-06, "loss": 1.0862, "step": 36400 }, { "epoch": 1.818499650384577, "grad_norm": 1.8467823266983032, "learning_rate": 2.018324554299783e-06, "loss": 0.9047, "step": 36410 }, { "epoch": 1.8189991009889122, "grad_norm": 1.1711785793304443, "learning_rate": 2.0073066586818514e-06, "loss": 0.7446, "step": 36420 }, { "epoch": 1.8194985515932474, "grad_norm": 1.5726587772369385, "learning_rate": 1.996318302346334e-06, "loss": 0.9615, "step": 36430 }, { "epoch": 1.8199980021975826, "grad_norm": 1.9423017501831055, "learning_rate": 1.985359492056499e-06, "loss": 0.8248, "step": 36440 }, { "epoch": 1.8204974528019178, "grad_norm": 1.345139980316162, "learning_rate": 1.974430234557445e-06, "loss": 0.8341, "step": 36450 }, { "epoch": 1.820996903406253, "grad_norm": 0.9771599769592285, "learning_rate": 1.963530536576097e-06, "loss": 0.629, "step": 36460 }, { "epoch": 1.8214963540105884, "grad_norm": 1.711058259010315, "learning_rate": 1.9526604048211496e-06, "loss": 0.8397, "step": 36470 }, { "epoch": 1.8219958046149236, "grad_norm": 0.6953372359275818, "learning_rate": 1.9418198459831284e-06, "loss": 0.7847, "step": 36480 }, { "epoch": 1.8224952552192588, "grad_norm": 1.5376777648925781, "learning_rate": 1.9310088667343462e-06, "loss": 0.7276, "step": 36490 }, { "epoch": 1.822994705823594, "grad_norm": 1.581800937652588, "learning_rate": 1.920227473728914e-06, "loss": 0.8078, "step": 36500 }, { "epoch": 1.8234941564279292, "grad_norm": 2.3458943367004395, "learning_rate": 1.909475673602734e-06, "loss": 0.8155, "step": 36510 }, { "epoch": 1.8239936070322647, "grad_norm": 1.7553411722183228, "learning_rate": 1.8987534729734746e-06, "loss": 0.6698, "step": 36520 }, { "epoch": 1.8244930576365999, "grad_norm": 6.426192283630371, "learning_rate": 1.8880608784406061e-06, "loss": 0.8481, "step": 36530 }, { "epoch": 1.824992508240935, "grad_norm": 2.9949448108673096, "learning_rate": 1.8773978965853756e-06, "loss": 0.7418, "step": 36540 }, { "epoch": 1.8254919588452703, "grad_norm": 2.1208808422088623, "learning_rate": 1.8667645339707995e-06, "loss": 0.7231, "step": 36550 }, { "epoch": 1.8259914094496055, "grad_norm": 1.0377401113510132, "learning_rate": 1.856160797141665e-06, "loss": 0.7966, "step": 36560 }, { "epoch": 1.8264908600539407, "grad_norm": 3.553891658782959, "learning_rate": 1.845586692624518e-06, "loss": 0.8061, "step": 36570 }, { "epoch": 1.8269903106582759, "grad_norm": 1.9853954315185547, "learning_rate": 1.83504222692768e-06, "loss": 1.0525, "step": 36580 }, { "epoch": 1.827489761262611, "grad_norm": 3.1537060737609863, "learning_rate": 1.82452740654121e-06, "loss": 0.7374, "step": 36590 }, { "epoch": 1.8279892118669463, "grad_norm": 3.8631086349487305, "learning_rate": 1.8140422379369416e-06, "loss": 0.7949, "step": 36600 }, { "epoch": 1.8284886624712815, "grad_norm": 4.027760028839111, "learning_rate": 1.803586727568435e-06, "loss": 1.0987, "step": 36610 }, { "epoch": 1.8289881130756167, "grad_norm": 1.0728559494018555, "learning_rate": 1.7931608818710255e-06, "loss": 0.7608, "step": 36620 }, { "epoch": 1.829487563679952, "grad_norm": 0.6002448797225952, "learning_rate": 1.782764707261758e-06, "loss": 0.8324, "step": 36630 }, { "epoch": 1.8299870142842873, "grad_norm": 2.828483819961548, "learning_rate": 1.772398210139442e-06, "loss": 0.7755, "step": 36640 }, { "epoch": 1.8304864648886225, "grad_norm": 1.5651190280914307, "learning_rate": 1.7620613968846012e-06, "loss": 0.7828, "step": 36650 }, { "epoch": 1.8309859154929577, "grad_norm": 1.0795658826828003, "learning_rate": 1.7517542738595071e-06, "loss": 0.7629, "step": 36660 }, { "epoch": 1.831485366097293, "grad_norm": 1.6965725421905518, "learning_rate": 1.74147684740813e-06, "loss": 0.7867, "step": 36670 }, { "epoch": 1.8319848167016282, "grad_norm": 4.927463054656982, "learning_rate": 1.7312291238561929e-06, "loss": 0.7794, "step": 36680 }, { "epoch": 1.8324842673059636, "grad_norm": 1.472868800163269, "learning_rate": 1.721011109511117e-06, "loss": 0.9138, "step": 36690 }, { "epoch": 1.8329837179102988, "grad_norm": 2.4641835689544678, "learning_rate": 1.7108228106620439e-06, "loss": 0.7066, "step": 36700 }, { "epoch": 1.833483168514634, "grad_norm": 1.2599446773529053, "learning_rate": 1.7006642335798184e-06, "loss": 1.01, "step": 36710 }, { "epoch": 1.8339826191189692, "grad_norm": 2.637688159942627, "learning_rate": 1.6905353845170113e-06, "loss": 0.7802, "step": 36720 }, { "epoch": 1.8344820697233044, "grad_norm": 5.250273704528809, "learning_rate": 1.6804362697078745e-06, "loss": 0.7277, "step": 36730 }, { "epoch": 1.8349815203276396, "grad_norm": 2.62386417388916, "learning_rate": 1.6703668953683693e-06, "loss": 0.7685, "step": 36740 }, { "epoch": 1.8354809709319748, "grad_norm": 2.839879274368286, "learning_rate": 1.6603272676961435e-06, "loss": 0.8774, "step": 36750 }, { "epoch": 1.83598042153631, "grad_norm": 2.535888195037842, "learning_rate": 1.6503173928705484e-06, "loss": 0.7193, "step": 36760 }, { "epoch": 1.8364798721406452, "grad_norm": 1.5889999866485596, "learning_rate": 1.640337277052617e-06, "loss": 0.7647, "step": 36770 }, { "epoch": 1.8369793227449804, "grad_norm": 0.8776832818984985, "learning_rate": 1.6303869263850636e-06, "loss": 0.7911, "step": 36780 }, { "epoch": 1.8374787733493156, "grad_norm": 3.40168833732605, "learning_rate": 1.620466346992272e-06, "loss": 0.9623, "step": 36790 }, { "epoch": 1.8379782239536508, "grad_norm": 3.110584020614624, "learning_rate": 1.6105755449803251e-06, "loss": 1.1086, "step": 36800 }, { "epoch": 1.838477674557986, "grad_norm": 1.3743491172790527, "learning_rate": 1.6007145264369694e-06, "loss": 0.8489, "step": 36810 }, { "epoch": 1.8389771251623215, "grad_norm": 1.0851255655288696, "learning_rate": 1.5908832974316113e-06, "loss": 0.8076, "step": 36820 }, { "epoch": 1.8394765757666567, "grad_norm": 1.584679126739502, "learning_rate": 1.5810818640153214e-06, "loss": 0.9447, "step": 36830 }, { "epoch": 1.839976026370992, "grad_norm": 0.9320236444473267, "learning_rate": 1.5713102322208407e-06, "loss": 0.7344, "step": 36840 }, { "epoch": 1.840475476975327, "grad_norm": 0.9436264634132385, "learning_rate": 1.5615684080625747e-06, "loss": 0.7371, "step": 36850 }, { "epoch": 1.8409749275796625, "grad_norm": 1.9408799409866333, "learning_rate": 1.5518563975365552e-06, "loss": 0.7509, "step": 36860 }, { "epoch": 1.8414743781839977, "grad_norm": 1.2907048463821411, "learning_rate": 1.5421742066204892e-06, "loss": 0.8238, "step": 36870 }, { "epoch": 1.841973828788333, "grad_norm": 2.7229514122009277, "learning_rate": 1.5325218412737152e-06, "loss": 0.7027, "step": 36880 }, { "epoch": 1.8424732793926681, "grad_norm": 2.0745961666107178, "learning_rate": 1.5228993074372255e-06, "loss": 0.8348, "step": 36890 }, { "epoch": 1.8429727299970033, "grad_norm": 2.2836952209472656, "learning_rate": 1.5133066110336437e-06, "loss": 0.9206, "step": 36900 }, { "epoch": 1.8434721806013386, "grad_norm": 1.418420433998108, "learning_rate": 1.5037437579672243e-06, "loss": 0.918, "step": 36910 }, { "epoch": 1.8439716312056738, "grad_norm": 0.9497084617614746, "learning_rate": 1.4942107541238704e-06, "loss": 0.623, "step": 36920 }, { "epoch": 1.844471081810009, "grad_norm": 1.2863852977752686, "learning_rate": 1.4847076053710996e-06, "loss": 0.8215, "step": 36930 }, { "epoch": 1.8449705324143442, "grad_norm": 2.917018175125122, "learning_rate": 1.4752343175580496e-06, "loss": 0.8172, "step": 36940 }, { "epoch": 1.8454699830186794, "grad_norm": 1.2232264280319214, "learning_rate": 1.4657908965154899e-06, "loss": 0.7736, "step": 36950 }, { "epoch": 1.8459694336230146, "grad_norm": 1.3352242708206177, "learning_rate": 1.4563773480557984e-06, "loss": 0.6525, "step": 36960 }, { "epoch": 1.8464688842273498, "grad_norm": 1.957614541053772, "learning_rate": 1.4469936779729799e-06, "loss": 0.7533, "step": 36970 }, { "epoch": 1.846968334831685, "grad_norm": 2.4884417057037354, "learning_rate": 1.4376398920426304e-06, "loss": 0.8812, "step": 36980 }, { "epoch": 1.8474677854360204, "grad_norm": 1.1783238649368286, "learning_rate": 1.4283159960219671e-06, "loss": 0.7876, "step": 36990 }, { "epoch": 1.8479672360403556, "grad_norm": 1.1369096040725708, "learning_rate": 1.4190219956498108e-06, "loss": 0.9508, "step": 37000 }, { "epoch": 1.8484666866446908, "grad_norm": 1.6631437540054321, "learning_rate": 1.4097578966465686e-06, "loss": 0.9235, "step": 37010 }, { "epoch": 1.848966137249026, "grad_norm": 1.7621214389801025, "learning_rate": 1.4005237047142516e-06, "loss": 0.7556, "step": 37020 }, { "epoch": 1.8494655878533615, "grad_norm": 2.372312068939209, "learning_rate": 1.3913194255364581e-06, "loss": 0.7891, "step": 37030 }, { "epoch": 1.8499650384576967, "grad_norm": 2.985626220703125, "learning_rate": 1.3821450647783895e-06, "loss": 1.0131, "step": 37040 }, { "epoch": 1.8504644890620319, "grad_norm": 1.7253177165985107, "learning_rate": 1.3730006280868179e-06, "loss": 0.7961, "step": 37050 }, { "epoch": 1.850963939666367, "grad_norm": 2.0936331748962402, "learning_rate": 1.3638861210901022e-06, "loss": 0.9157, "step": 37060 }, { "epoch": 1.8514633902707023, "grad_norm": 1.595031976699829, "learning_rate": 1.3548015493981825e-06, "loss": 0.7765, "step": 37070 }, { "epoch": 1.8519628408750375, "grad_norm": 1.2044181823730469, "learning_rate": 1.3457469186025695e-06, "loss": 0.8601, "step": 37080 }, { "epoch": 1.8524622914793727, "grad_norm": 2.531481981277466, "learning_rate": 1.3367222342763386e-06, "loss": 1.1788, "step": 37090 }, { "epoch": 1.852961742083708, "grad_norm": 2.440699338912964, "learning_rate": 1.327727501974152e-06, "loss": 1.0185, "step": 37100 }, { "epoch": 1.853461192688043, "grad_norm": 6.92039680480957, "learning_rate": 1.3187627272322257e-06, "loss": 1.1688, "step": 37110 }, { "epoch": 1.8539606432923783, "grad_norm": 2.0533528327941895, "learning_rate": 1.3098279155683346e-06, "loss": 0.7569, "step": 37120 }, { "epoch": 1.8544600938967135, "grad_norm": 1.7414357662200928, "learning_rate": 1.3009230724818133e-06, "loss": 0.8002, "step": 37130 }, { "epoch": 1.8549595445010487, "grad_norm": 2.2459020614624023, "learning_rate": 1.2920482034535441e-06, "loss": 1.0269, "step": 37140 }, { "epoch": 1.855458995105384, "grad_norm": 1.5557661056518555, "learning_rate": 1.2832033139459909e-06, "loss": 0.7286, "step": 37150 }, { "epoch": 1.8559584457097194, "grad_norm": 2.566761016845703, "learning_rate": 1.274388409403121e-06, "loss": 0.8711, "step": 37160 }, { "epoch": 1.8564578963140546, "grad_norm": 1.2169697284698486, "learning_rate": 1.2656034952504726e-06, "loss": 0.7382, "step": 37170 }, { "epoch": 1.8569573469183898, "grad_norm": 2.421743869781494, "learning_rate": 1.2568485768951256e-06, "loss": 0.6857, "step": 37180 }, { "epoch": 1.857456797522725, "grad_norm": 0.6758628487586975, "learning_rate": 1.2481236597256863e-06, "loss": 0.6933, "step": 37190 }, { "epoch": 1.8579562481270604, "grad_norm": 2.2655436992645264, "learning_rate": 1.2394287491123146e-06, "loss": 0.9936, "step": 37200 }, { "epoch": 1.8584556987313956, "grad_norm": 3.4133973121643066, "learning_rate": 1.2307638504066687e-06, "loss": 0.6958, "step": 37210 }, { "epoch": 1.8589551493357308, "grad_norm": 1.4379960298538208, "learning_rate": 1.2221289689419546e-06, "loss": 0.7429, "step": 37220 }, { "epoch": 1.859454599940066, "grad_norm": 1.2873954772949219, "learning_rate": 1.2135241100329264e-06, "loss": 0.6887, "step": 37230 }, { "epoch": 1.8599540505444012, "grad_norm": 2.5836598873138428, "learning_rate": 1.2049492789758088e-06, "loss": 0.9737, "step": 37240 }, { "epoch": 1.8604535011487364, "grad_norm": 0.9402455687522888, "learning_rate": 1.1964044810483854e-06, "loss": 0.7533, "step": 37250 }, { "epoch": 1.8609529517530716, "grad_norm": 0.762193500995636, "learning_rate": 1.1878897215099327e-06, "loss": 0.6922, "step": 37260 }, { "epoch": 1.8614524023574068, "grad_norm": 0.9147796630859375, "learning_rate": 1.179405005601253e-06, "loss": 0.8275, "step": 37270 }, { "epoch": 1.861951852961742, "grad_norm": 7.237677097320557, "learning_rate": 1.1709503385446464e-06, "loss": 1.0095, "step": 37280 }, { "epoch": 1.8624513035660772, "grad_norm": 3.1037633419036865, "learning_rate": 1.162525725543917e-06, "loss": 0.8227, "step": 37290 }, { "epoch": 1.8629507541704124, "grad_norm": 0.8072869777679443, "learning_rate": 1.1541311717843838e-06, "loss": 0.8877, "step": 37300 }, { "epoch": 1.8634502047747477, "grad_norm": 1.3907021284103394, "learning_rate": 1.1457666824328473e-06, "loss": 0.6262, "step": 37310 }, { "epoch": 1.8639496553790829, "grad_norm": 1.6882739067077637, "learning_rate": 1.137432262637622e-06, "loss": 0.7603, "step": 37320 }, { "epoch": 1.8644491059834183, "grad_norm": 2.3211216926574707, "learning_rate": 1.1291279175284997e-06, "loss": 1.082, "step": 37330 }, { "epoch": 1.8649485565877535, "grad_norm": 1.3241509199142456, "learning_rate": 1.1208536522167635e-06, "loss": 0.7005, "step": 37340 }, { "epoch": 1.8654480071920887, "grad_norm": 2.3975155353546143, "learning_rate": 1.112609471795195e-06, "loss": 0.9437, "step": 37350 }, { "epoch": 1.865947457796424, "grad_norm": 2.898634672164917, "learning_rate": 1.1043953813380414e-06, "loss": 0.7256, "step": 37360 }, { "epoch": 1.8664469084007593, "grad_norm": 2.223987102508545, "learning_rate": 1.0962113859010413e-06, "loss": 0.7116, "step": 37370 }, { "epoch": 1.8669463590050945, "grad_norm": 2.3411850929260254, "learning_rate": 1.0880574905213991e-06, "loss": 0.8443, "step": 37380 }, { "epoch": 1.8674458096094297, "grad_norm": 1.744638442993164, "learning_rate": 1.0799337002178167e-06, "loss": 0.8556, "step": 37390 }, { "epoch": 1.867945260213765, "grad_norm": 2.3048319816589355, "learning_rate": 1.0718400199904276e-06, "loss": 0.9891, "step": 37400 }, { "epoch": 1.8684447108181002, "grad_norm": 1.3180681467056274, "learning_rate": 1.0637764548208696e-06, "loss": 0.6401, "step": 37410 }, { "epoch": 1.8689441614224354, "grad_norm": 1.2160851955413818, "learning_rate": 1.0557430096722333e-06, "loss": 0.8252, "step": 37420 }, { "epoch": 1.8694436120267706, "grad_norm": 1.4728838205337524, "learning_rate": 1.0477396894890578e-06, "loss": 0.7892, "step": 37430 }, { "epoch": 1.8699430626311058, "grad_norm": 2.373886823654175, "learning_rate": 1.0397664991973533e-06, "loss": 0.925, "step": 37440 }, { "epoch": 1.870442513235441, "grad_norm": 1.0829194784164429, "learning_rate": 1.031823443704577e-06, "loss": 0.7126, "step": 37450 }, { "epoch": 1.8709419638397762, "grad_norm": 1.7888859510421753, "learning_rate": 1.0239105278996464e-06, "loss": 0.8941, "step": 37460 }, { "epoch": 1.8714414144441114, "grad_norm": 3.2157223224639893, "learning_rate": 1.0160277566529375e-06, "loss": 0.8499, "step": 37470 }, { "epoch": 1.8719408650484466, "grad_norm": 2.7298121452331543, "learning_rate": 1.0081751348162305e-06, "loss": 0.8249, "step": 37480 }, { "epoch": 1.8724403156527818, "grad_norm": 3.2157411575317383, "learning_rate": 1.0003526672228092e-06, "loss": 1.0766, "step": 37490 }, { "epoch": 1.8729397662571172, "grad_norm": 4.085764408111572, "learning_rate": 9.925603586873499e-07, "loss": 0.9375, "step": 37500 }, { "epoch": 1.8734392168614524, "grad_norm": 1.4071623086929321, "learning_rate": 9.84798214005983e-07, "loss": 0.6476, "step": 37510 }, { "epoch": 1.8739386674657876, "grad_norm": 1.9814296960830688, "learning_rate": 9.77066237956281e-07, "loss": 0.6278, "step": 37520 }, { "epoch": 1.8744381180701228, "grad_norm": 1.177120566368103, "learning_rate": 9.69364435297232e-07, "loss": 0.8363, "step": 37530 }, { "epoch": 1.874937568674458, "grad_norm": 3.99025821685791, "learning_rate": 9.616928107692658e-07, "loss": 1.0677, "step": 37540 }, { "epoch": 1.8754370192787935, "grad_norm": 3.0358285903930664, "learning_rate": 9.54051369094222e-07, "loss": 0.85, "step": 37550 }, { "epoch": 1.8759364698831287, "grad_norm": 3.0026743412017822, "learning_rate": 9.464401149753832e-07, "loss": 0.8057, "step": 37560 }, { "epoch": 1.8764359204874639, "grad_norm": 1.3496410846710205, "learning_rate": 9.388590530974351e-07, "loss": 0.6673, "step": 37570 }, { "epoch": 1.876935371091799, "grad_norm": 1.2854838371276855, "learning_rate": 9.313081881264951e-07, "loss": 0.6518, "step": 37580 }, { "epoch": 1.8774348216961343, "grad_norm": 2.2696845531463623, "learning_rate": 9.237875247100736e-07, "loss": 0.7384, "step": 37590 }, { "epoch": 1.8779342723004695, "grad_norm": 1.8676453828811646, "learning_rate": 9.162970674771176e-07, "loss": 0.9786, "step": 37600 }, { "epoch": 1.8784337229048047, "grad_norm": 0.9983563423156738, "learning_rate": 9.088368210379561e-07, "loss": 0.774, "step": 37610 }, { "epoch": 1.87893317350914, "grad_norm": 0.9873731732368469, "learning_rate": 9.014067899843493e-07, "loss": 0.9082, "step": 37620 }, { "epoch": 1.8794326241134751, "grad_norm": 1.8019541501998901, "learning_rate": 8.94006978889439e-07, "loss": 0.6728, "step": 37630 }, { "epoch": 1.8799320747178103, "grad_norm": 0.6842108368873596, "learning_rate": 8.866373923077764e-07, "loss": 0.8022, "step": 37640 }, { "epoch": 1.8804315253221455, "grad_norm": 1.8676726818084717, "learning_rate": 8.792980347753055e-07, "loss": 0.839, "step": 37650 }, { "epoch": 1.8809309759264807, "grad_norm": 2.473867893218994, "learning_rate": 8.719889108093737e-07, "loss": 0.7283, "step": 37660 }, { "epoch": 1.881430426530816, "grad_norm": 1.949810266494751, "learning_rate": 8.647100249087048e-07, "loss": 0.7271, "step": 37670 }, { "epoch": 1.8819298771351514, "grad_norm": 1.088167428970337, "learning_rate": 8.574613815534261e-07, "loss": 0.5746, "step": 37680 }, { "epoch": 1.8824293277394866, "grad_norm": 1.6611754894256592, "learning_rate": 8.50242985205052e-07, "loss": 0.9361, "step": 37690 }, { "epoch": 1.8829287783438218, "grad_norm": 4.316952705383301, "learning_rate": 8.430548403064564e-07, "loss": 0.755, "step": 37700 }, { "epoch": 1.883428228948157, "grad_norm": 1.359334111213684, "learning_rate": 8.35896951281917e-07, "loss": 1.0327, "step": 37710 }, { "epoch": 1.8839276795524924, "grad_norm": 2.72778058052063, "learning_rate": 8.287693225370763e-07, "loss": 0.8413, "step": 37720 }, { "epoch": 1.8844271301568276, "grad_norm": 1.729676604270935, "learning_rate": 8.216719584589639e-07, "loss": 0.8361, "step": 37730 }, { "epoch": 1.8849265807611628, "grad_norm": 3.179363965988159, "learning_rate": 8.146048634159742e-07, "loss": 0.8664, "step": 37740 }, { "epoch": 1.885426031365498, "grad_norm": 2.108206033706665, "learning_rate": 8.075680417578724e-07, "loss": 0.9995, "step": 37750 }, { "epoch": 1.8859254819698332, "grad_norm": 1.8745157718658447, "learning_rate": 8.005614978157827e-07, "loss": 0.7661, "step": 37760 }, { "epoch": 1.8864249325741684, "grad_norm": 2.7155895233154297, "learning_rate": 7.935852359022111e-07, "loss": 0.8478, "step": 37770 }, { "epoch": 1.8869243831785036, "grad_norm": 1.0509263277053833, "learning_rate": 7.866392603110062e-07, "loss": 0.9175, "step": 37780 }, { "epoch": 1.8874238337828388, "grad_norm": 1.6978052854537964, "learning_rate": 7.79723575317387e-07, "loss": 1.0155, "step": 37790 }, { "epoch": 1.887923284387174, "grad_norm": 2.9031314849853516, "learning_rate": 7.728381851779265e-07, "loss": 0.8715, "step": 37800 }, { "epoch": 1.8884227349915093, "grad_norm": 1.771519660949707, "learning_rate": 7.659830941305568e-07, "loss": 0.8516, "step": 37810 }, { "epoch": 1.8889221855958445, "grad_norm": 1.8304922580718994, "learning_rate": 7.591583063945417e-07, "loss": 0.7413, "step": 37820 }, { "epoch": 1.8894216362001797, "grad_norm": 2.650054454803467, "learning_rate": 7.523638261705046e-07, "loss": 0.7139, "step": 37830 }, { "epoch": 1.8899210868045149, "grad_norm": 1.5766794681549072, "learning_rate": 7.455996576404389e-07, "loss": 0.7161, "step": 37840 }, { "epoch": 1.8904205374088503, "grad_norm": 8.148262977600098, "learning_rate": 7.388658049676423e-07, "loss": 0.8837, "step": 37850 }, { "epoch": 1.8909199880131855, "grad_norm": 1.6757593154907227, "learning_rate": 7.321622722967769e-07, "loss": 0.7342, "step": 37860 }, { "epoch": 1.8914194386175207, "grad_norm": 1.5398114919662476, "learning_rate": 7.254890637538259e-07, "loss": 0.8048, "step": 37870 }, { "epoch": 1.891918889221856, "grad_norm": 1.9274250268936157, "learning_rate": 7.188461834461313e-07, "loss": 0.8306, "step": 37880 }, { "epoch": 1.8924183398261913, "grad_norm": 1.3405691385269165, "learning_rate": 7.12233635462356e-07, "loss": 0.8062, "step": 37890 }, { "epoch": 1.8929177904305265, "grad_norm": 1.815018653869629, "learning_rate": 7.056514238724777e-07, "loss": 0.8739, "step": 37900 }, { "epoch": 1.8934172410348618, "grad_norm": 4.293715476989746, "learning_rate": 6.990995527278276e-07, "loss": 0.9029, "step": 37910 }, { "epoch": 1.893916691639197, "grad_norm": 3.882087230682373, "learning_rate": 6.925780260610581e-07, "loss": 0.7406, "step": 37920 }, { "epoch": 1.8944161422435322, "grad_norm": 1.110909342765808, "learning_rate": 6.860868478861249e-07, "loss": 0.8628, "step": 37930 }, { "epoch": 1.8949155928478674, "grad_norm": 1.5207172632217407, "learning_rate": 6.796260221983264e-07, "loss": 0.8614, "step": 37940 }, { "epoch": 1.8954150434522026, "grad_norm": 3.3391783237457275, "learning_rate": 6.731955529742762e-07, "loss": 0.6527, "step": 37950 }, { "epoch": 1.8959144940565378, "grad_norm": 2.131694793701172, "learning_rate": 6.667954441718916e-07, "loss": 0.6526, "step": 37960 }, { "epoch": 1.896413944660873, "grad_norm": 2.951368570327759, "learning_rate": 6.604256997304104e-07, "loss": 0.8335, "step": 37970 }, { "epoch": 1.8969133952652082, "grad_norm": 2.380202054977417, "learning_rate": 6.540863235703798e-07, "loss": 0.8378, "step": 37980 }, { "epoch": 1.8974128458695434, "grad_norm": 1.715438961982727, "learning_rate": 6.477773195936621e-07, "loss": 0.6246, "step": 37990 }, { "epoch": 1.8979122964738786, "grad_norm": 1.7839713096618652, "learning_rate": 6.41498691683412e-07, "loss": 0.7716, "step": 38000 }, { "epoch": 1.8984117470782138, "grad_norm": 2.3171377182006836, "learning_rate": 6.352504437041051e-07, "loss": 0.6701, "step": 38010 }, { "epoch": 1.8989111976825492, "grad_norm": 2.048083543777466, "learning_rate": 6.290325795015095e-07, "loss": 0.8286, "step": 38020 }, { "epoch": 1.8994106482868844, "grad_norm": 1.8699361085891724, "learning_rate": 6.228451029026805e-07, "loss": 0.6735, "step": 38030 }, { "epoch": 1.8999100988912196, "grad_norm": 0.8058998584747314, "learning_rate": 6.166880177159995e-07, "loss": 0.7895, "step": 38040 }, { "epoch": 1.9004095494955549, "grad_norm": 2.1313376426696777, "learning_rate": 6.105613277311073e-07, "loss": 0.6796, "step": 38050 }, { "epoch": 1.9009090000998903, "grad_norm": 0.8913472294807434, "learning_rate": 6.044650367189597e-07, "loss": 0.7288, "step": 38060 }, { "epoch": 1.9014084507042255, "grad_norm": 2.643505811691284, "learning_rate": 5.983991484317997e-07, "loss": 0.841, "step": 38070 }, { "epoch": 1.9019079013085607, "grad_norm": 1.6981201171875, "learning_rate": 5.923636666031463e-07, "loss": 0.7371, "step": 38080 }, { "epoch": 1.902407351912896, "grad_norm": 2.20851469039917, "learning_rate": 5.863585949478168e-07, "loss": 0.893, "step": 38090 }, { "epoch": 1.902906802517231, "grad_norm": 1.285595417022705, "learning_rate": 5.803839371618991e-07, "loss": 0.7736, "step": 38100 }, { "epoch": 1.9034062531215663, "grad_norm": 1.0376107692718506, "learning_rate": 5.744396969227795e-07, "loss": 0.7023, "step": 38110 }, { "epoch": 1.9039057037259015, "grad_norm": 1.0116043090820312, "learning_rate": 5.685258778890979e-07, "loss": 0.7496, "step": 38120 }, { "epoch": 1.9044051543302367, "grad_norm": 2.4546825885772705, "learning_rate": 5.626424837007927e-07, "loss": 0.7372, "step": 38130 }, { "epoch": 1.904904604934572, "grad_norm": 3.43674373626709, "learning_rate": 5.56789517979056e-07, "loss": 0.8585, "step": 38140 }, { "epoch": 1.9054040555389071, "grad_norm": 1.8024232387542725, "learning_rate": 5.509669843263676e-07, "loss": 1.0122, "step": 38150 }, { "epoch": 1.9059035061432423, "grad_norm": 1.7715247869491577, "learning_rate": 5.451748863264716e-07, "loss": 0.7282, "step": 38160 }, { "epoch": 1.9064029567475775, "grad_norm": 1.0849653482437134, "learning_rate": 5.394132275443664e-07, "loss": 0.5994, "step": 38170 }, { "epoch": 1.9069024073519127, "grad_norm": 2.599396228790283, "learning_rate": 5.33682011526343e-07, "loss": 0.7225, "step": 38180 }, { "epoch": 1.9074018579562482, "grad_norm": 2.610759735107422, "learning_rate": 5.279812417999297e-07, "loss": 0.7228, "step": 38190 }, { "epoch": 1.9079013085605834, "grad_norm": 1.4986180067062378, "learning_rate": 5.223109218739253e-07, "loss": 0.7363, "step": 38200 }, { "epoch": 1.9084007591649186, "grad_norm": 2.5232491493225098, "learning_rate": 5.166710552383825e-07, "loss": 0.7829, "step": 38210 }, { "epoch": 1.9089002097692538, "grad_norm": 2.935343027114868, "learning_rate": 5.110616453646188e-07, "loss": 0.8224, "step": 38220 }, { "epoch": 1.9093996603735892, "grad_norm": 1.3761450052261353, "learning_rate": 5.054826957052005e-07, "loss": 0.827, "step": 38230 }, { "epoch": 1.9098991109779244, "grad_norm": 2.6284067630767822, "learning_rate": 4.999342096939364e-07, "loss": 0.9095, "step": 38240 }, { "epoch": 1.9103985615822596, "grad_norm": 3.79300856590271, "learning_rate": 4.944161907458999e-07, "loss": 0.8354, "step": 38250 }, { "epoch": 1.9108980121865948, "grad_norm": 2.9497082233428955, "learning_rate": 4.889286422574135e-07, "loss": 0.9162, "step": 38260 }, { "epoch": 1.91139746279093, "grad_norm": 2.1179943084716797, "learning_rate": 4.834715676060309e-07, "loss": 0.9308, "step": 38270 }, { "epoch": 1.9118969133952652, "grad_norm": 1.976205825805664, "learning_rate": 4.780449701505541e-07, "loss": 0.9383, "step": 38280 }, { "epoch": 1.9123963639996004, "grad_norm": 4.775969982147217, "learning_rate": 4.726488532310336e-07, "loss": 0.6525, "step": 38290 }, { "epoch": 1.9128958146039357, "grad_norm": 2.6134207248687744, "learning_rate": 4.672832201687516e-07, "loss": 0.9203, "step": 38300 }, { "epoch": 1.9133952652082709, "grad_norm": 1.9384845495224, "learning_rate": 4.6194807426623853e-07, "loss": 0.7506, "step": 38310 }, { "epoch": 1.913894715812606, "grad_norm": 1.1082091331481934, "learning_rate": 4.566434188072399e-07, "loss": 0.8207, "step": 38320 }, { "epoch": 1.9143941664169413, "grad_norm": 1.9808390140533447, "learning_rate": 4.5136925705674957e-07, "loss": 0.9711, "step": 38330 }, { "epoch": 1.9148936170212765, "grad_norm": 2.4651217460632324, "learning_rate": 4.461255922609986e-07, "loss": 0.7805, "step": 38340 }, { "epoch": 1.9153930676256117, "grad_norm": 2.2612953186035156, "learning_rate": 4.409124276474275e-07, "loss": 0.8405, "step": 38350 }, { "epoch": 1.915892518229947, "grad_norm": 1.5166573524475098, "learning_rate": 4.357297664247251e-07, "loss": 0.9475, "step": 38360 }, { "epoch": 1.9163919688342823, "grad_norm": 2.375563621520996, "learning_rate": 4.305776117827842e-07, "loss": 0.6658, "step": 38370 }, { "epoch": 1.9168914194386175, "grad_norm": 1.7153723239898682, "learning_rate": 4.25455966892746e-07, "loss": 0.7867, "step": 38380 }, { "epoch": 1.9173908700429527, "grad_norm": 2.6330456733703613, "learning_rate": 4.2036483490695555e-07, "loss": 0.9835, "step": 38390 }, { "epoch": 1.9178903206472881, "grad_norm": 1.3542981147766113, "learning_rate": 4.1530421895897287e-07, "loss": 0.7411, "step": 38400 }, { "epoch": 1.9183897712516234, "grad_norm": 1.7611544132232666, "learning_rate": 4.1027412216358975e-07, "loss": 0.8558, "step": 38410 }, { "epoch": 1.9188892218559586, "grad_norm": 1.103027582168579, "learning_rate": 4.052745476168074e-07, "loss": 0.7788, "step": 38420 }, { "epoch": 1.9193886724602938, "grad_norm": 1.8738675117492676, "learning_rate": 4.0030549839583654e-07, "loss": 0.7124, "step": 38430 }, { "epoch": 1.919888123064629, "grad_norm": 1.4337223768234253, "learning_rate": 3.95366977559114e-07, "loss": 0.6702, "step": 38440 }, { "epoch": 1.9203875736689642, "grad_norm": 4.447963237762451, "learning_rate": 3.9045898814626946e-07, "loss": 0.7363, "step": 38450 }, { "epoch": 1.9208870242732994, "grad_norm": 1.2333414554595947, "learning_rate": 3.855815331781476e-07, "loss": 0.7191, "step": 38460 }, { "epoch": 1.9213864748776346, "grad_norm": 2.4996657371520996, "learning_rate": 3.807346156568026e-07, "loss": 0.8725, "step": 38470 }, { "epoch": 1.9218859254819698, "grad_norm": 3.429889440536499, "learning_rate": 3.7591823856549244e-07, "loss": 0.8243, "step": 38480 }, { "epoch": 1.922385376086305, "grad_norm": 1.755198359489441, "learning_rate": 3.711324048686682e-07, "loss": 0.9959, "step": 38490 }, { "epoch": 1.9228848266906402, "grad_norm": 2.0222361087799072, "learning_rate": 3.663771175119901e-07, "loss": 0.7681, "step": 38500 }, { "epoch": 1.9233842772949754, "grad_norm": 1.13322114944458, "learning_rate": 3.61652379422317e-07, "loss": 0.7783, "step": 38510 }, { "epoch": 1.9238837278993106, "grad_norm": 2.8027260303497314, "learning_rate": 3.56958193507706e-07, "loss": 0.8399, "step": 38520 }, { "epoch": 1.924383178503646, "grad_norm": 2.946244478225708, "learning_rate": 3.5229456265740145e-07, "loss": 0.8195, "step": 38530 }, { "epoch": 1.9248826291079812, "grad_norm": 1.9200987815856934, "learning_rate": 3.476614897418573e-07, "loss": 0.781, "step": 38540 }, { "epoch": 1.9253820797123165, "grad_norm": 2.083024501800537, "learning_rate": 3.43058977612698e-07, "loss": 0.7912, "step": 38550 }, { "epoch": 1.9258815303166517, "grad_norm": 3.6144940853118896, "learning_rate": 3.38487029102752e-07, "loss": 0.7019, "step": 38560 }, { "epoch": 1.9263809809209869, "grad_norm": 1.0712109804153442, "learning_rate": 3.3394564702602936e-07, "loss": 0.9913, "step": 38570 }, { "epoch": 1.9268804315253223, "grad_norm": 1.636195182800293, "learning_rate": 3.2943483417773866e-07, "loss": 0.7675, "step": 38580 }, { "epoch": 1.9273798821296575, "grad_norm": 3.2453606128692627, "learning_rate": 3.249545933342535e-07, "loss": 0.6751, "step": 38590 }, { "epoch": 1.9278793327339927, "grad_norm": 6.202270030975342, "learning_rate": 3.205049272531457e-07, "loss": 0.6988, "step": 38600 }, { "epoch": 1.928378783338328, "grad_norm": 1.3032634258270264, "learning_rate": 3.1608583867317463e-07, "loss": 0.6374, "step": 38610 }, { "epoch": 1.9288782339426631, "grad_norm": 0.9276584982872009, "learning_rate": 3.1169733031425343e-07, "loss": 0.7946, "step": 38620 }, { "epoch": 1.9293776845469983, "grad_norm": 1.9601482152938843, "learning_rate": 3.0733940487749914e-07, "loss": 0.7772, "step": 38630 }, { "epoch": 1.9298771351513335, "grad_norm": 2.568361282348633, "learning_rate": 3.030120650451884e-07, "loss": 1.0218, "step": 38640 }, { "epoch": 1.9303765857556687, "grad_norm": 3.4699294567108154, "learning_rate": 2.987153134807796e-07, "loss": 0.913, "step": 38650 }, { "epoch": 1.930876036360004, "grad_norm": 6.755033493041992, "learning_rate": 2.9444915282891284e-07, "loss": 1.0563, "step": 38660 }, { "epoch": 1.9313754869643391, "grad_norm": 1.4525831937789917, "learning_rate": 2.9021358571538204e-07, "loss": 0.9287, "step": 38670 }, { "epoch": 1.9318749375686743, "grad_norm": 0.8486120700836182, "learning_rate": 2.860086147471519e-07, "loss": 0.739, "step": 38680 }, { "epoch": 1.9323743881730095, "grad_norm": 2.333954095840454, "learning_rate": 2.818342425123799e-07, "loss": 0.8125, "step": 38690 }, { "epoch": 1.9328738387773448, "grad_norm": 1.2519758939743042, "learning_rate": 2.776904715803608e-07, "loss": 0.7221, "step": 38700 }, { "epoch": 1.9333732893816802, "grad_norm": 2.9941816329956055, "learning_rate": 2.735773045015766e-07, "loss": 0.8098, "step": 38710 }, { "epoch": 1.9338727399860154, "grad_norm": 2.3914289474487305, "learning_rate": 2.694947438076467e-07, "loss": 0.924, "step": 38720 }, { "epoch": 1.9343721905903506, "grad_norm": 1.7096226215362549, "learning_rate": 2.654427920113889e-07, "loss": 0.7377, "step": 38730 }, { "epoch": 1.9348716411946858, "grad_norm": 1.7441887855529785, "learning_rate": 2.614214516067415e-07, "loss": 0.7712, "step": 38740 }, { "epoch": 1.9353710917990212, "grad_norm": 2.1528828144073486, "learning_rate": 2.574307250688357e-07, "loss": 1.1289, "step": 38750 }, { "epoch": 1.9358705424033564, "grad_norm": 2.5711512565612793, "learning_rate": 2.5347061485393453e-07, "loss": 0.7565, "step": 38760 }, { "epoch": 1.9363699930076916, "grad_norm": 2.726969003677368, "learning_rate": 2.495411233994771e-07, "loss": 0.8647, "step": 38770 }, { "epoch": 1.9368694436120268, "grad_norm": 5.262991905212402, "learning_rate": 2.4564225312403986e-07, "loss": 0.7585, "step": 38780 }, { "epoch": 1.937368894216362, "grad_norm": 1.1469573974609375, "learning_rate": 2.417740064273644e-07, "loss": 0.7898, "step": 38790 }, { "epoch": 1.9378683448206973, "grad_norm": 2.885629415512085, "learning_rate": 2.3793638569033494e-07, "loss": 0.9307, "step": 38800 }, { "epoch": 1.9383677954250325, "grad_norm": 4.446005821228027, "learning_rate": 2.341293932749955e-07, "loss": 0.6734, "step": 38810 }, { "epoch": 1.9388672460293677, "grad_norm": 2.1867306232452393, "learning_rate": 2.3035303152452725e-07, "loss": 0.7337, "step": 38820 }, { "epoch": 1.9393666966337029, "grad_norm": 0.9655390381813049, "learning_rate": 2.2660730276326536e-07, "loss": 0.6344, "step": 38830 }, { "epoch": 1.939866147238038, "grad_norm": 2.997008800506592, "learning_rate": 2.2289220929668231e-07, "loss": 1.0494, "step": 38840 }, { "epoch": 1.9403655978423733, "grad_norm": 1.050862431526184, "learning_rate": 2.1920775341141565e-07, "loss": 0.9449, "step": 38850 }, { "epoch": 1.9408650484467085, "grad_norm": 2.732304811477661, "learning_rate": 2.1555393737521244e-07, "loss": 1.0013, "step": 38860 }, { "epoch": 1.9413644990510437, "grad_norm": 3.013045310974121, "learning_rate": 2.1193076343699603e-07, "loss": 1.1354, "step": 38870 }, { "epoch": 1.9418639496553791, "grad_norm": 2.4861466884613037, "learning_rate": 2.0833823382680474e-07, "loss": 0.9317, "step": 38880 }, { "epoch": 1.9423634002597143, "grad_norm": 3.381823778152466, "learning_rate": 2.047763507558309e-07, "loss": 0.9398, "step": 38890 }, { "epoch": 1.9428628508640495, "grad_norm": 2.195995807647705, "learning_rate": 2.0124511641638755e-07, "loss": 0.8674, "step": 38900 }, { "epoch": 1.9433623014683847, "grad_norm": 1.9074788093566895, "learning_rate": 1.9774453298193607e-07, "loss": 0.9743, "step": 38910 }, { "epoch": 1.9438617520727202, "grad_norm": 1.1502209901809692, "learning_rate": 1.942746026070752e-07, "loss": 0.6304, "step": 38920 }, { "epoch": 1.9443612026770554, "grad_norm": 1.8683198690414429, "learning_rate": 1.908353274275243e-07, "loss": 0.7868, "step": 38930 }, { "epoch": 1.9448606532813906, "grad_norm": 2.0038022994995117, "learning_rate": 1.8742670956014562e-07, "loss": 0.9149, "step": 38940 }, { "epoch": 1.9453601038857258, "grad_norm": 1.1857025623321533, "learning_rate": 1.8404875110292764e-07, "loss": 0.6651, "step": 38950 }, { "epoch": 1.945859554490061, "grad_norm": 2.5860588550567627, "learning_rate": 1.807014541349905e-07, "loss": 0.7599, "step": 38960 }, { "epoch": 1.9463590050943962, "grad_norm": 1.2038654088974, "learning_rate": 1.7738482071656958e-07, "loss": 0.8189, "step": 38970 }, { "epoch": 1.9468584556987314, "grad_norm": 3.3104500770568848, "learning_rate": 1.7409885288904858e-07, "loss": 0.9386, "step": 38980 }, { "epoch": 1.9473579063030666, "grad_norm": 1.5102159976959229, "learning_rate": 1.708435526749208e-07, "loss": 0.7472, "step": 38990 }, { "epoch": 1.9478573569074018, "grad_norm": 5.062881946563721, "learning_rate": 1.6761892207781683e-07, "loss": 1.2067, "step": 39000 }, { "epoch": 1.948356807511737, "grad_norm": 1.0124144554138184, "learning_rate": 1.644249630824657e-07, "loss": 0.6681, "step": 39010 }, { "epoch": 1.9488562581160722, "grad_norm": 0.6750489473342896, "learning_rate": 1.6126167765475043e-07, "loss": 0.8734, "step": 39020 }, { "epoch": 1.9493557087204074, "grad_norm": 2.4404916763305664, "learning_rate": 1.5812906774164694e-07, "loss": 0.9192, "step": 39030 }, { "epoch": 1.9498551593247426, "grad_norm": 3.1012375354766846, "learning_rate": 1.5502713527126843e-07, "loss": 0.8007, "step": 39040 }, { "epoch": 1.950354609929078, "grad_norm": 1.7646880149841309, "learning_rate": 1.5195588215283773e-07, "loss": 0.8481, "step": 39050 }, { "epoch": 1.9508540605334133, "grad_norm": 2.5629656314849854, "learning_rate": 1.489153102767038e-07, "loss": 0.907, "step": 39060 }, { "epoch": 1.9513535111377485, "grad_norm": 2.265373945236206, "learning_rate": 1.4590542151431406e-07, "loss": 0.8382, "step": 39070 }, { "epoch": 1.9518529617420837, "grad_norm": 0.3716246485710144, "learning_rate": 1.4292621771824777e-07, "loss": 0.7001, "step": 39080 }, { "epoch": 1.952352412346419, "grad_norm": 1.7797048091888428, "learning_rate": 1.3997770072219364e-07, "loss": 0.6487, "step": 39090 }, { "epoch": 1.9528518629507543, "grad_norm": 2.702284812927246, "learning_rate": 1.3705987234093886e-07, "loss": 0.8319, "step": 39100 }, { "epoch": 1.9533513135550895, "grad_norm": 1.6364545822143555, "learning_rate": 1.3417273437040246e-07, "loss": 0.7802, "step": 39110 }, { "epoch": 1.9538507641594247, "grad_norm": 2.1336441040039062, "learning_rate": 1.3131628858760737e-07, "loss": 0.7323, "step": 39120 }, { "epoch": 1.95435021476376, "grad_norm": 2.1186883449554443, "learning_rate": 1.2849053675067503e-07, "loss": 0.7431, "step": 39130 }, { "epoch": 1.9548496653680951, "grad_norm": 1.0149272680282593, "learning_rate": 1.256954805988475e-07, "loss": 0.7575, "step": 39140 }, { "epoch": 1.9553491159724303, "grad_norm": 2.055619716644287, "learning_rate": 1.2293112185247092e-07, "loss": 0.6864, "step": 39150 }, { "epoch": 1.9558485665767655, "grad_norm": 1.3865033388137817, "learning_rate": 1.2019746221298978e-07, "loss": 0.7448, "step": 39160 }, { "epoch": 1.9563480171811007, "grad_norm": 2.3338584899902344, "learning_rate": 1.174945033629582e-07, "loss": 0.813, "step": 39170 }, { "epoch": 1.956847467785436, "grad_norm": 1.9646928310394287, "learning_rate": 1.1482224696603983e-07, "loss": 0.8744, "step": 39180 }, { "epoch": 1.9573469183897712, "grad_norm": 3.0873661041259766, "learning_rate": 1.1218069466699676e-07, "loss": 0.8274, "step": 39190 }, { "epoch": 1.9578463689941064, "grad_norm": 5.946899890899658, "learning_rate": 1.0956984809168958e-07, "loss": 0.8267, "step": 39200 }, { "epoch": 1.9583458195984416, "grad_norm": 0.9239414930343628, "learning_rate": 1.0698970884708281e-07, "loss": 0.7627, "step": 39210 }, { "epoch": 1.958845270202777, "grad_norm": 2.566192865371704, "learning_rate": 1.044402785212395e-07, "loss": 1.0968, "step": 39220 }, { "epoch": 1.9593447208071122, "grad_norm": 2.113461494445801, "learning_rate": 1.0192155868332664e-07, "loss": 0.8054, "step": 39230 }, { "epoch": 1.9598441714114474, "grad_norm": 1.4655832052230835, "learning_rate": 9.943355088359863e-08, "loss": 0.6697, "step": 39240 }, { "epoch": 1.9603436220157826, "grad_norm": 3.021570920944214, "learning_rate": 9.697625665341381e-08, "loss": 0.9202, "step": 39250 }, { "epoch": 1.960843072620118, "grad_norm": 1.4923847913742065, "learning_rate": 9.454967750522903e-08, "loss": 0.784, "step": 39260 }, { "epoch": 1.9613425232244532, "grad_norm": 3.282676935195923, "learning_rate": 9.2153814932594e-08, "loss": 0.716, "step": 39270 }, { "epoch": 1.9618419738287884, "grad_norm": 4.731926918029785, "learning_rate": 8.978867041014028e-08, "loss": 0.9229, "step": 39280 }, { "epoch": 1.9623414244331236, "grad_norm": 2.0138251781463623, "learning_rate": 8.745424539362002e-08, "loss": 1.0717, "step": 39290 }, { "epoch": 1.9628408750374589, "grad_norm": 3.6514225006103516, "learning_rate": 8.515054131984501e-08, "loss": 0.7903, "step": 39300 }, { "epoch": 1.963340325641794, "grad_norm": 1.2794667482376099, "learning_rate": 8.28775596067477e-08, "loss": 0.8283, "step": 39310 }, { "epoch": 1.9638397762461293, "grad_norm": 1.6307498216629028, "learning_rate": 8.063530165333122e-08, "loss": 0.8918, "step": 39320 }, { "epoch": 1.9643392268504645, "grad_norm": 2.1946792602539062, "learning_rate": 7.84237688396916e-08, "loss": 0.8836, "step": 39330 }, { "epoch": 1.9648386774547997, "grad_norm": 4.008111000061035, "learning_rate": 7.624296252702334e-08, "loss": 0.8347, "step": 39340 }, { "epoch": 1.9653381280591349, "grad_norm": 1.0843454599380493, "learning_rate": 7.409288405760273e-08, "loss": 0.7732, "step": 39350 }, { "epoch": 1.96583757866347, "grad_norm": 2.5510149002075195, "learning_rate": 7.197353475478785e-08, "loss": 0.8123, "step": 39360 }, { "epoch": 1.9663370292678053, "grad_norm": 2.196392297744751, "learning_rate": 6.98849159230297e-08, "loss": 0.9499, "step": 39370 }, { "epoch": 1.9668364798721405, "grad_norm": 1.3309155702590942, "learning_rate": 6.782702884787217e-08, "loss": 0.8776, "step": 39380 }, { "epoch": 1.967335930476476, "grad_norm": 2.63332200050354, "learning_rate": 6.579987479592432e-08, "loss": 0.8467, "step": 39390 }, { "epoch": 1.9678353810808111, "grad_norm": 1.4559003114700317, "learning_rate": 6.380345501489915e-08, "loss": 0.6639, "step": 39400 }, { "epoch": 1.9683348316851463, "grad_norm": 1.8891857862472534, "learning_rate": 6.183777073357488e-08, "loss": 0.8028, "step": 39410 }, { "epoch": 1.9688342822894815, "grad_norm": 1.3499447107315063, "learning_rate": 5.990282316182816e-08, "loss": 0.7644, "step": 39420 }, { "epoch": 1.9693337328938167, "grad_norm": 3.0304222106933594, "learning_rate": 5.7998613490606315e-08, "loss": 1.094, "step": 39430 }, { "epoch": 1.9698331834981522, "grad_norm": 1.503991961479187, "learning_rate": 5.612514289194404e-08, "loss": 0.6968, "step": 39440 }, { "epoch": 1.9703326341024874, "grad_norm": 2.001195192337036, "learning_rate": 5.428241251895227e-08, "loss": 0.7498, "step": 39450 }, { "epoch": 1.9708320847068226, "grad_norm": 1.271515130996704, "learning_rate": 5.247042350582376e-08, "loss": 0.845, "step": 39460 }, { "epoch": 1.9713315353111578, "grad_norm": 2.6156809329986572, "learning_rate": 5.068917696782749e-08, "loss": 0.9919, "step": 39470 }, { "epoch": 1.971830985915493, "grad_norm": 1.5499589443206787, "learning_rate": 4.89386740013198e-08, "loss": 0.8676, "step": 39480 }, { "epoch": 1.9723304365198282, "grad_norm": 1.536037564277649, "learning_rate": 4.7218915683716614e-08, "loss": 0.8404, "step": 39490 }, { "epoch": 1.9728298871241634, "grad_norm": 1.4923033714294434, "learning_rate": 4.552990307353233e-08, "loss": 1.0102, "step": 39500 }, { "epoch": 1.9733293377284986, "grad_norm": 2.5278944969177246, "learning_rate": 4.3871637210335384e-08, "loss": 0.9048, "step": 39510 }, { "epoch": 1.9738287883328338, "grad_norm": 1.8926359415054321, "learning_rate": 4.2244119114792644e-08, "loss": 0.8455, "step": 39520 }, { "epoch": 1.974328238937169, "grad_norm": 1.9808841943740845, "learning_rate": 4.0647349788619506e-08, "loss": 1.0278, "step": 39530 }, { "epoch": 1.9748276895415042, "grad_norm": 1.298982858657837, "learning_rate": 3.9081330214635333e-08, "loss": 0.7926, "step": 39540 }, { "epoch": 1.9753271401458394, "grad_norm": 2.337252378463745, "learning_rate": 3.754606135670802e-08, "loss": 0.7904, "step": 39550 }, { "epoch": 1.9758265907501749, "grad_norm": 1.3410919904708862, "learning_rate": 3.604154415978722e-08, "loss": 0.8496, "step": 39560 }, { "epoch": 1.97632604135451, "grad_norm": 1.9480030536651611, "learning_rate": 3.4567779549904425e-08, "loss": 0.9046, "step": 39570 }, { "epoch": 1.9768254919588453, "grad_norm": 3.5804975032806396, "learning_rate": 3.312476843415069e-08, "loss": 0.9903, "step": 39580 }, { "epoch": 1.9773249425631805, "grad_norm": 1.5981268882751465, "learning_rate": 3.1712511700693335e-08, "loss": 0.7417, "step": 39590 }, { "epoch": 1.9778243931675157, "grad_norm": 1.7833573818206787, "learning_rate": 3.0331010218770384e-08, "loss": 0.7805, "step": 39600 }, { "epoch": 1.978323843771851, "grad_norm": 1.5239499807357788, "learning_rate": 2.898026483869054e-08, "loss": 0.7612, "step": 39610 }, { "epoch": 1.9788232943761863, "grad_norm": 1.1489386558532715, "learning_rate": 2.7660276391827667e-08, "loss": 0.6576, "step": 39620 }, { "epoch": 1.9793227449805215, "grad_norm": 2.484985589981079, "learning_rate": 2.6371045690626318e-08, "loss": 1.0078, "step": 39630 }, { "epoch": 1.9798221955848567, "grad_norm": 1.2232320308685303, "learning_rate": 2.511257352861285e-08, "loss": 0.7404, "step": 39640 }, { "epoch": 1.980321646189192, "grad_norm": 0.9947903752326965, "learning_rate": 2.388486068036211e-08, "loss": 0.8212, "step": 39650 }, { "epoch": 1.9808210967935271, "grad_norm": 3.484882354736328, "learning_rate": 2.2687907901525195e-08, "loss": 0.7427, "step": 39660 }, { "epoch": 1.9813205473978623, "grad_norm": 2.839507818222046, "learning_rate": 2.1521715928829454e-08, "loss": 0.8838, "step": 39670 }, { "epoch": 1.9818199980021975, "grad_norm": 1.9030522108078003, "learning_rate": 2.0386285480050728e-08, "loss": 0.7494, "step": 39680 }, { "epoch": 1.9823194486065328, "grad_norm": 1.4792015552520752, "learning_rate": 1.9281617254052197e-08, "loss": 0.6392, "step": 39690 }, { "epoch": 1.982818899210868, "grad_norm": 2.6815295219421387, "learning_rate": 1.8207711930740003e-08, "loss": 0.8992, "step": 39700 }, { "epoch": 1.9833183498152032, "grad_norm": 1.9661310911178589, "learning_rate": 1.716457017110762e-08, "loss": 0.6718, "step": 39710 }, { "epoch": 1.9838178004195384, "grad_norm": 1.4973111152648926, "learning_rate": 1.6152192617197025e-08, "loss": 0.9802, "step": 39720 }, { "epoch": 1.9843172510238736, "grad_norm": 1.121622085571289, "learning_rate": 1.5170579892126446e-08, "loss": 0.7206, "step": 39730 }, { "epoch": 1.984816701628209, "grad_norm": 1.3193241357803345, "learning_rate": 1.4219732600073699e-08, "loss": 0.8397, "step": 39740 }, { "epoch": 1.9853161522325442, "grad_norm": 1.8370063304901123, "learning_rate": 1.3299651326276197e-08, "loss": 0.8966, "step": 39750 }, { "epoch": 1.9858156028368794, "grad_norm": 1.4745622873306274, "learning_rate": 1.2410336637047605e-08, "loss": 0.6803, "step": 39760 }, { "epoch": 1.9863150534412146, "grad_norm": 2.2566349506378174, "learning_rate": 1.1551789079750075e-08, "loss": 1.0624, "step": 39770 }, { "epoch": 1.98681450404555, "grad_norm": 1.3781640529632568, "learning_rate": 1.0724009182816463e-08, "loss": 0.7659, "step": 39780 }, { "epoch": 1.9873139546498853, "grad_norm": 1.9181629419326782, "learning_rate": 9.92699745575032e-09, "loss": 0.923, "step": 39790 }, { "epoch": 1.9878134052542205, "grad_norm": 1.6109904050827026, "learning_rate": 9.160754389098136e-09, "loss": 0.7843, "step": 39800 }, { "epoch": 1.9883128558585557, "grad_norm": 1.616071343421936, "learning_rate": 8.425280454482654e-09, "loss": 0.7009, "step": 39810 }, { "epoch": 1.9888123064628909, "grad_norm": 1.2986334562301636, "learning_rate": 7.720576104580657e-09, "loss": 0.8101, "step": 39820 }, { "epoch": 1.989311757067226, "grad_norm": 1.4591253995895386, "learning_rate": 7.0466417731451795e-09, "loss": 0.9131, "step": 39830 }, { "epoch": 1.9898112076715613, "grad_norm": 2.142432689666748, "learning_rate": 6.403477874966646e-09, "loss": 0.8832, "step": 39840 }, { "epoch": 1.9903106582758965, "grad_norm": 2.1662228107452393, "learning_rate": 5.791084805922831e-09, "loss": 0.9557, "step": 39850 }, { "epoch": 1.9908101088802317, "grad_norm": 3.377622365951538, "learning_rate": 5.2094629429288995e-09, "loss": 0.882, "step": 39860 }, { "epoch": 1.991309559484567, "grad_norm": 0.9694475531578064, "learning_rate": 4.6586126439707164e-09, "loss": 0.8255, "step": 39870 }, { "epoch": 1.991809010088902, "grad_norm": 4.829628944396973, "learning_rate": 4.138534248099291e-09, "loss": 0.8026, "step": 39880 }, { "epoch": 1.9923084606932373, "grad_norm": 1.882993459701538, "learning_rate": 3.649228075419675e-09, "loss": 0.8205, "step": 39890 }, { "epoch": 1.9928079112975725, "grad_norm": 2.552271604537964, "learning_rate": 3.190694427090968e-09, "loss": 0.914, "step": 39900 }, { "epoch": 1.993307361901908, "grad_norm": 3.684934139251709, "learning_rate": 2.762933585348515e-09, "loss": 0.8397, "step": 39910 }, { "epoch": 1.9938068125062431, "grad_norm": 1.875923991203308, "learning_rate": 2.365945813470605e-09, "loss": 0.843, "step": 39920 }, { "epoch": 1.9943062631105783, "grad_norm": 1.62857985496521, "learning_rate": 1.9997313558006716e-09, "loss": 0.652, "step": 39930 }, { "epoch": 1.9948057137149136, "grad_norm": 3.290679931640625, "learning_rate": 1.6642904377472956e-09, "loss": 1.0049, "step": 39940 }, { "epoch": 1.995305164319249, "grad_norm": 1.4945985078811646, "learning_rate": 1.359623265767551e-09, "loss": 0.9436, "step": 39950 }, { "epoch": 1.9958046149235842, "grad_norm": 2.789457321166992, "learning_rate": 1.085730027389209e-09, "loss": 1.0226, "step": 39960 }, { "epoch": 1.9963040655279194, "grad_norm": 4.102095603942871, "learning_rate": 8.426108911829822e-10, "loss": 0.912, "step": 39970 }, { "epoch": 1.9968035161322546, "grad_norm": 1.5451637506484985, "learning_rate": 6.302660067958322e-10, "loss": 0.687, "step": 39980 }, { "epoch": 1.9973029667365898, "grad_norm": 2.4991366863250732, "learning_rate": 4.4869550492321334e-10, "loss": 0.8485, "step": 39990 }, { "epoch": 1.997802417340925, "grad_norm": 1.1374187469482422, "learning_rate": 2.978994973146243e-10, "loss": 0.7392, "step": 40000 }, { "epoch": 1.9983018679452602, "grad_norm": 3.9948456287384033, "learning_rate": 1.7787807679026102e-10, "loss": 1.0236, "step": 40010 }, { "epoch": 1.9988013185495954, "grad_norm": 1.2467082738876343, "learning_rate": 8.863131722436358e-11, "loss": 0.8505, "step": 40020 }, { "epoch": 1.9993007691539306, "grad_norm": 1.9844173192977905, "learning_rate": 3.015927354521608e-11, "loss": 0.7795, "step": 40030 }, { "epoch": 1.9998002197582658, "grad_norm": 2.8241963386535645, "learning_rate": 2.461981740697894e-12, "loss": 0.8601, "step": 40040 } ], "logging_steps": 10, "max_steps": 40044, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.5510169361678336e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }