diff --git "a/checkpoint-66298/trainer_state.json" "b/checkpoint-66298/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-66298/trainer_state.json" @@ -0,0 +1,46443 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 66298, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.5083411264291532e-05, + "grad_norm": 189.43104036529024, + "learning_rate": 3.0165912518853697e-08, + "loss": 10.9684, + "step": 1 + }, + { + "epoch": 0.0001508341126429153, + "grad_norm": 158.81693907913464, + "learning_rate": 3.01659125188537e-07, + "loss": 11.003, + "step": 10 + }, + { + "epoch": 0.0003016682252858306, + "grad_norm": 111.91774792699805, + "learning_rate": 6.03318250377074e-07, + "loss": 10.6798, + "step": 20 + }, + { + "epoch": 0.00045250233792874594, + "grad_norm": 64.17067463820119, + "learning_rate": 9.049773755656109e-07, + "loss": 9.9156, + "step": 30 + }, + { + "epoch": 0.0006033364505716613, + "grad_norm": 33.99022873865264, + "learning_rate": 1.206636500754148e-06, + "loss": 9.2466, + "step": 40 + }, + { + "epoch": 0.0007541705632145766, + "grad_norm": 14.573710631341669, + "learning_rate": 1.5082956259426847e-06, + "loss": 8.732, + "step": 50 + }, + { + "epoch": 0.0009050046758574919, + "grad_norm": 11.012857180168934, + "learning_rate": 1.8099547511312218e-06, + "loss": 8.3678, + "step": 60 + }, + { + "epoch": 0.0010558387885004072, + "grad_norm": 7.651349342602105, + "learning_rate": 2.111613876319759e-06, + "loss": 8.0962, + "step": 70 + }, + { + "epoch": 0.0012066729011433225, + "grad_norm": 14.307098929418151, + "learning_rate": 2.413273001508296e-06, + "loss": 7.926, + "step": 80 + }, + { + "epoch": 0.001357507013786238, + "grad_norm": 8.50387239680155, + "learning_rate": 2.7149321266968327e-06, + "loss": 7.7939, + "step": 90 + }, + { + "epoch": 0.0015083411264291532, + "grad_norm": 7.214337013352543, + "learning_rate": 3.0165912518853694e-06, + "loss": 7.5742, + "step": 100 + }, + { + "epoch": 0.0016591752390720685, + "grad_norm": 10.101065193395398, + "learning_rate": 3.3182503770739065e-06, + "loss": 7.3805, + "step": 110 + }, + { + "epoch": 0.0018100093517149838, + "grad_norm": 15.623082372794242, + "learning_rate": 3.6199095022624436e-06, + "loss": 7.1893, + "step": 120 + }, + { + "epoch": 0.0019608434643578992, + "grad_norm": 17.4198088418466, + "learning_rate": 3.92156862745098e-06, + "loss": 6.9639, + "step": 130 + }, + { + "epoch": 0.0021116775770008145, + "grad_norm": 18.225455686978577, + "learning_rate": 4.223227752639518e-06, + "loss": 6.7347, + "step": 140 + }, + { + "epoch": 0.0022625116896437297, + "grad_norm": 23.061097788185776, + "learning_rate": 4.5248868778280546e-06, + "loss": 6.5032, + "step": 150 + }, + { + "epoch": 0.002413345802286645, + "grad_norm": 17.86696206539706, + "learning_rate": 4.826546003016592e-06, + "loss": 6.2862, + "step": 160 + }, + { + "epoch": 0.0025641799149295603, + "grad_norm": 14.766414157452708, + "learning_rate": 5.128205128205128e-06, + "loss": 5.9795, + "step": 170 + }, + { + "epoch": 0.002715014027572476, + "grad_norm": 16.251458431137067, + "learning_rate": 5.4298642533936655e-06, + "loss": 5.7621, + "step": 180 + }, + { + "epoch": 0.002865848140215391, + "grad_norm": 19.311921356658104, + "learning_rate": 5.731523378582202e-06, + "loss": 5.5333, + "step": 190 + }, + { + "epoch": 0.0030166822528583065, + "grad_norm": 14.75377092071776, + "learning_rate": 6.033182503770739e-06, + "loss": 5.3017, + "step": 200 + }, + { + "epoch": 0.0031675163655012217, + "grad_norm": 15.115204317151598, + "learning_rate": 6.334841628959276e-06, + "loss": 5.0054, + "step": 210 + }, + { + "epoch": 0.003318350478144137, + "grad_norm": 14.034005771226335, + "learning_rate": 6.636500754147813e-06, + "loss": 4.7371, + "step": 220 + }, + { + "epoch": 0.0034691845907870522, + "grad_norm": 11.955231201443905, + "learning_rate": 6.938159879336351e-06, + "loss": 4.4852, + "step": 230 + }, + { + "epoch": 0.0036200187034299675, + "grad_norm": 10.749401358571431, + "learning_rate": 7.239819004524887e-06, + "loss": 4.1952, + "step": 240 + }, + { + "epoch": 0.003770852816072883, + "grad_norm": 11.139595035378271, + "learning_rate": 7.541478129713424e-06, + "loss": 3.9917, + "step": 250 + }, + { + "epoch": 0.0039216869287157985, + "grad_norm": 14.235662990523876, + "learning_rate": 7.84313725490196e-06, + "loss": 3.8445, + "step": 260 + }, + { + "epoch": 0.004072521041358713, + "grad_norm": 8.11064590010642, + "learning_rate": 8.144796380090498e-06, + "loss": 3.644, + "step": 270 + }, + { + "epoch": 0.004223355154001629, + "grad_norm": 9.054616810462878, + "learning_rate": 8.446455505279036e-06, + "loss": 3.5348, + "step": 280 + }, + { + "epoch": 0.004374189266644545, + "grad_norm": 5.936854335826162, + "learning_rate": 8.748114630467572e-06, + "loss": 3.4039, + "step": 290 + }, + { + "epoch": 0.0045250233792874595, + "grad_norm": 6.806082220524581, + "learning_rate": 9.049773755656109e-06, + "loss": 3.2947, + "step": 300 + }, + { + "epoch": 0.004675857491930375, + "grad_norm": 6.534021692964764, + "learning_rate": 9.351432880844647e-06, + "loss": 3.2006, + "step": 310 + }, + { + "epoch": 0.00482669160457329, + "grad_norm": 5.081448261094301, + "learning_rate": 9.653092006033184e-06, + "loss": 3.1092, + "step": 320 + }, + { + "epoch": 0.004977525717216206, + "grad_norm": 5.4557333108065365, + "learning_rate": 9.95475113122172e-06, + "loss": 3.0185, + "step": 330 + }, + { + "epoch": 0.0051283598298591205, + "grad_norm": 4.936974674175276, + "learning_rate": 1.0256410256410256e-05, + "loss": 2.9358, + "step": 340 + }, + { + "epoch": 0.005279193942502036, + "grad_norm": 4.352437358542264, + "learning_rate": 1.0558069381598795e-05, + "loss": 2.8717, + "step": 350 + }, + { + "epoch": 0.005430028055144952, + "grad_norm": 6.394979487970069, + "learning_rate": 1.0859728506787331e-05, + "loss": 2.8176, + "step": 360 + }, + { + "epoch": 0.005580862167787867, + "grad_norm": 3.872168081417127, + "learning_rate": 1.1161387631975868e-05, + "loss": 2.7485, + "step": 370 + }, + { + "epoch": 0.005731696280430782, + "grad_norm": 5.493815956306029, + "learning_rate": 1.1463046757164404e-05, + "loss": 2.7332, + "step": 380 + }, + { + "epoch": 0.005882530393073697, + "grad_norm": 4.072174373934873, + "learning_rate": 1.1764705882352942e-05, + "loss": 2.6565, + "step": 390 + }, + { + "epoch": 0.006033364505716613, + "grad_norm": 3.6045731851910183, + "learning_rate": 1.2066365007541478e-05, + "loss": 2.6229, + "step": 400 + }, + { + "epoch": 0.006184198618359528, + "grad_norm": 3.737870752756752, + "learning_rate": 1.2368024132730017e-05, + "loss": 2.5934, + "step": 410 + }, + { + "epoch": 0.0063350327310024435, + "grad_norm": 3.948566029956791, + "learning_rate": 1.2669683257918553e-05, + "loss": 2.5378, + "step": 420 + }, + { + "epoch": 0.006485866843645359, + "grad_norm": 3.234149111679052, + "learning_rate": 1.297134238310709e-05, + "loss": 2.5097, + "step": 430 + }, + { + "epoch": 0.006636700956288274, + "grad_norm": 3.1493788832183442, + "learning_rate": 1.3273001508295626e-05, + "loss": 2.4727, + "step": 440 + }, + { + "epoch": 0.00678753506893119, + "grad_norm": 3.1564183970918704, + "learning_rate": 1.3574660633484165e-05, + "loss": 2.4466, + "step": 450 + }, + { + "epoch": 0.0069383691815741045, + "grad_norm": 2.7540049255935313, + "learning_rate": 1.3876319758672701e-05, + "loss": 2.4348, + "step": 460 + }, + { + "epoch": 0.00708920329421702, + "grad_norm": 2.724255818122302, + "learning_rate": 1.4177978883861239e-05, + "loss": 2.38, + "step": 470 + }, + { + "epoch": 0.007240037406859935, + "grad_norm": 2.4311103149561646, + "learning_rate": 1.4479638009049775e-05, + "loss": 2.3458, + "step": 480 + }, + { + "epoch": 0.007390871519502851, + "grad_norm": 2.473924798193681, + "learning_rate": 1.4781297134238312e-05, + "loss": 2.3567, + "step": 490 + }, + { + "epoch": 0.007541705632145766, + "grad_norm": 3.156106798412538, + "learning_rate": 1.5082956259426848e-05, + "loss": 2.3284, + "step": 500 + }, + { + "epoch": 0.007692539744788681, + "grad_norm": 2.4418178608732437, + "learning_rate": 1.5384615384615387e-05, + "loss": 2.3108, + "step": 510 + }, + { + "epoch": 0.007843373857431597, + "grad_norm": 2.4988737750497263, + "learning_rate": 1.568627450980392e-05, + "loss": 2.285, + "step": 520 + }, + { + "epoch": 0.007994207970074512, + "grad_norm": 2.3506891988039675, + "learning_rate": 1.5987933634992462e-05, + "loss": 2.2463, + "step": 530 + }, + { + "epoch": 0.008145042082717427, + "grad_norm": 2.4269366374627293, + "learning_rate": 1.6289592760180996e-05, + "loss": 2.2608, + "step": 540 + }, + { + "epoch": 0.008295876195360343, + "grad_norm": 2.7846711170849283, + "learning_rate": 1.6591251885369534e-05, + "loss": 2.2586, + "step": 550 + }, + { + "epoch": 0.008446710308003258, + "grad_norm": 2.486578249931519, + "learning_rate": 1.689291101055807e-05, + "loss": 2.2215, + "step": 560 + }, + { + "epoch": 0.008597544420646173, + "grad_norm": 2.4861235233174823, + "learning_rate": 1.719457013574661e-05, + "loss": 2.1627, + "step": 570 + }, + { + "epoch": 0.00874837853328909, + "grad_norm": 2.0057392850268085, + "learning_rate": 1.7496229260935143e-05, + "loss": 2.1796, + "step": 580 + }, + { + "epoch": 0.008899212645932004, + "grad_norm": 2.3305617240011203, + "learning_rate": 1.779788838612368e-05, + "loss": 2.1501, + "step": 590 + }, + { + "epoch": 0.009050046758574919, + "grad_norm": 2.248262058500248, + "learning_rate": 1.8099547511312218e-05, + "loss": 2.1316, + "step": 600 + }, + { + "epoch": 0.009200880871217834, + "grad_norm": 1.9336201482109237, + "learning_rate": 1.8401206636500756e-05, + "loss": 2.1165, + "step": 610 + }, + { + "epoch": 0.00935171498386075, + "grad_norm": 1.7555462803110207, + "learning_rate": 1.8702865761689293e-05, + "loss": 2.108, + "step": 620 + }, + { + "epoch": 0.009502549096503665, + "grad_norm": 1.9765000593437931, + "learning_rate": 1.9004524886877827e-05, + "loss": 2.0969, + "step": 630 + }, + { + "epoch": 0.00965338320914658, + "grad_norm": 2.5982184990137074, + "learning_rate": 1.930618401206637e-05, + "loss": 2.0872, + "step": 640 + }, + { + "epoch": 0.009804217321789497, + "grad_norm": 1.8358566230579085, + "learning_rate": 1.9607843137254903e-05, + "loss": 2.0808, + "step": 650 + }, + { + "epoch": 0.009955051434432411, + "grad_norm": 1.8410692249074527, + "learning_rate": 1.990950226244344e-05, + "loss": 2.0482, + "step": 660 + }, + { + "epoch": 0.010105885547075326, + "grad_norm": 1.6456808082854075, + "learning_rate": 1.9999999438700256e-05, + "loss": 2.038, + "step": 670 + }, + { + "epoch": 0.010256719659718241, + "grad_norm": 1.8097321576232261, + "learning_rate": 1.9999996689477156e-05, + "loss": 2.0115, + "step": 680 + }, + { + "epoch": 0.010407553772361158, + "grad_norm": 1.738984872387825, + "learning_rate": 1.999999164923546e-05, + "loss": 2.0101, + "step": 690 + }, + { + "epoch": 0.010558387885004072, + "grad_norm": 2.120711304978566, + "learning_rate": 1.9999984317976325e-05, + "loss": 1.9905, + "step": 700 + }, + { + "epoch": 0.010709221997646987, + "grad_norm": 1.9747151956206166, + "learning_rate": 1.9999974695701428e-05, + "loss": 2.0056, + "step": 710 + }, + { + "epoch": 0.010860056110289904, + "grad_norm": 2.1248363379948123, + "learning_rate": 1.9999962782412978e-05, + "loss": 1.9797, + "step": 720 + }, + { + "epoch": 0.011010890222932819, + "grad_norm": 1.8566236765156294, + "learning_rate": 1.9999948578113698e-05, + "loss": 1.9632, + "step": 730 + }, + { + "epoch": 0.011161724335575733, + "grad_norm": 1.636621771362688, + "learning_rate": 1.9999932082806848e-05, + "loss": 1.9575, + "step": 740 + }, + { + "epoch": 0.011312558448218648, + "grad_norm": 1.7328695957899025, + "learning_rate": 1.9999913296496203e-05, + "loss": 1.9332, + "step": 750 + }, + { + "epoch": 0.011463392560861565, + "grad_norm": 1.6524033321052103, + "learning_rate": 1.999989221918607e-05, + "loss": 1.9342, + "step": 760 + }, + { + "epoch": 0.01161422667350448, + "grad_norm": 2.1345512534494184, + "learning_rate": 1.9999868850881273e-05, + "loss": 1.9219, + "step": 770 + }, + { + "epoch": 0.011765060786147395, + "grad_norm": 1.4526188268762177, + "learning_rate": 1.999984319158717e-05, + "loss": 1.9108, + "step": 780 + }, + { + "epoch": 0.011915894898790311, + "grad_norm": 1.6754985346858662, + "learning_rate": 1.999981524130964e-05, + "loss": 1.8829, + "step": 790 + }, + { + "epoch": 0.012066729011433226, + "grad_norm": 2.1816335789443757, + "learning_rate": 1.9999785000055084e-05, + "loss": 1.9267, + "step": 800 + }, + { + "epoch": 0.01221756312407614, + "grad_norm": 1.8462485551143168, + "learning_rate": 1.9999752467830432e-05, + "loss": 1.9018, + "step": 810 + }, + { + "epoch": 0.012368397236719056, + "grad_norm": 1.4858863787320835, + "learning_rate": 1.9999717644643136e-05, + "loss": 1.8729, + "step": 820 + }, + { + "epoch": 0.012519231349361972, + "grad_norm": 1.6352859914626143, + "learning_rate": 1.9999680530501173e-05, + "loss": 1.8648, + "step": 830 + }, + { + "epoch": 0.012670065462004887, + "grad_norm": 1.7441500511123695, + "learning_rate": 1.999964112541305e-05, + "loss": 1.8566, + "step": 840 + }, + { + "epoch": 0.012820899574647802, + "grad_norm": 1.933007884502389, + "learning_rate": 1.9999599429387792e-05, + "loss": 1.8594, + "step": 850 + }, + { + "epoch": 0.012971733687290718, + "grad_norm": 1.596714797538184, + "learning_rate": 1.9999555442434952e-05, + "loss": 1.851, + "step": 860 + }, + { + "epoch": 0.013122567799933633, + "grad_norm": 1.6747568528865833, + "learning_rate": 1.9999509164564605e-05, + "loss": 1.8523, + "step": 870 + }, + { + "epoch": 0.013273401912576548, + "grad_norm": 1.5850583943375904, + "learning_rate": 1.999946059578736e-05, + "loss": 1.8337, + "step": 880 + }, + { + "epoch": 0.013424236025219463, + "grad_norm": 1.5982562487873, + "learning_rate": 1.9999409736114333e-05, + "loss": 1.8395, + "step": 890 + }, + { + "epoch": 0.01357507013786238, + "grad_norm": 1.7264151632839362, + "learning_rate": 1.999935658555719e-05, + "loss": 1.8292, + "step": 900 + }, + { + "epoch": 0.013725904250505294, + "grad_norm": 1.5353842757342084, + "learning_rate": 1.99993011441281e-05, + "loss": 1.8252, + "step": 910 + }, + { + "epoch": 0.013876738363148209, + "grad_norm": 1.486139609989486, + "learning_rate": 1.9999243411839763e-05, + "loss": 1.8275, + "step": 920 + }, + { + "epoch": 0.014027572475791126, + "grad_norm": 1.5460170109014892, + "learning_rate": 1.999918338870541e-05, + "loss": 1.8007, + "step": 930 + }, + { + "epoch": 0.01417840658843404, + "grad_norm": 1.4697042473168522, + "learning_rate": 1.999912107473879e-05, + "loss": 1.7965, + "step": 940 + }, + { + "epoch": 0.014329240701076955, + "grad_norm": 1.420089550019931, + "learning_rate": 1.999905646995418e-05, + "loss": 1.8031, + "step": 950 + }, + { + "epoch": 0.01448007481371987, + "grad_norm": 1.5638336588751518, + "learning_rate": 1.999898957436638e-05, + "loss": 1.7718, + "step": 960 + }, + { + "epoch": 0.014630908926362787, + "grad_norm": 1.3844097324274744, + "learning_rate": 1.999892038799072e-05, + "loss": 1.7747, + "step": 970 + }, + { + "epoch": 0.014781743039005701, + "grad_norm": 1.5468233533840074, + "learning_rate": 1.9998848910843046e-05, + "loss": 1.7787, + "step": 980 + }, + { + "epoch": 0.014932577151648616, + "grad_norm": 1.610614614039702, + "learning_rate": 1.9998775142939734e-05, + "loss": 1.7535, + "step": 990 + }, + { + "epoch": 0.015083411264291533, + "grad_norm": 1.5088286089031928, + "learning_rate": 1.9998699084297687e-05, + "loss": 1.7679, + "step": 1000 + }, + { + "epoch": 0.015234245376934448, + "grad_norm": 1.3454791398740034, + "learning_rate": 1.999862073493433e-05, + "loss": 1.7612, + "step": 1010 + }, + { + "epoch": 0.015385079489577362, + "grad_norm": 1.5657055850052355, + "learning_rate": 1.999854009486761e-05, + "loss": 1.7545, + "step": 1020 + }, + { + "epoch": 0.015535913602220277, + "grad_norm": 1.3827045055130938, + "learning_rate": 1.9998457164116002e-05, + "loss": 1.7571, + "step": 1030 + }, + { + "epoch": 0.015686747714863194, + "grad_norm": 1.4066253485330937, + "learning_rate": 1.999837194269851e-05, + "loss": 1.7654, + "step": 1040 + }, + { + "epoch": 0.01583758182750611, + "grad_norm": 1.3410071631119902, + "learning_rate": 1.9998284430634655e-05, + "loss": 1.747, + "step": 1050 + }, + { + "epoch": 0.015988415940149023, + "grad_norm": 2.018752743761453, + "learning_rate": 1.9998194627944485e-05, + "loss": 1.7595, + "step": 1060 + }, + { + "epoch": 0.01613925005279194, + "grad_norm": 1.3158434076917227, + "learning_rate": 1.999810253464858e-05, + "loss": 1.7403, + "step": 1070 + }, + { + "epoch": 0.016290084165434853, + "grad_norm": 1.3176980860910648, + "learning_rate": 1.999800815076803e-05, + "loss": 1.7437, + "step": 1080 + }, + { + "epoch": 0.01644091827807777, + "grad_norm": 1.2680642871163972, + "learning_rate": 1.9997911476324462e-05, + "loss": 1.7167, + "step": 1090 + }, + { + "epoch": 0.016591752390720686, + "grad_norm": 4.058172855282546, + "learning_rate": 1.9997812511340027e-05, + "loss": 1.7448, + "step": 1100 + }, + { + "epoch": 0.0167425865033636, + "grad_norm": 1.3301381090543303, + "learning_rate": 1.9997711255837394e-05, + "loss": 1.7247, + "step": 1110 + }, + { + "epoch": 0.016893420616006516, + "grad_norm": 1.3930913559964258, + "learning_rate": 1.999760770983977e-05, + "loss": 1.7171, + "step": 1120 + }, + { + "epoch": 0.01704425472864943, + "grad_norm": 1.3881938750292484, + "learning_rate": 1.9997501873370866e-05, + "loss": 1.7182, + "step": 1130 + }, + { + "epoch": 0.017195088841292346, + "grad_norm": 1.32170800447192, + "learning_rate": 1.9997393746454934e-05, + "loss": 1.7127, + "step": 1140 + }, + { + "epoch": 0.01734592295393526, + "grad_norm": 1.2068405306845402, + "learning_rate": 1.9997283329116745e-05, + "loss": 1.6968, + "step": 1150 + }, + { + "epoch": 0.01749675706657818, + "grad_norm": 1.2324921820651273, + "learning_rate": 1.99971706213816e-05, + "loss": 1.7065, + "step": 1160 + }, + { + "epoch": 0.017647591179221093, + "grad_norm": 1.2452996766525826, + "learning_rate": 1.9997055623275315e-05, + "loss": 1.7047, + "step": 1170 + }, + { + "epoch": 0.01779842529186401, + "grad_norm": 1.2256010694296684, + "learning_rate": 1.9996938334824242e-05, + "loss": 1.6867, + "step": 1180 + }, + { + "epoch": 0.017949259404506923, + "grad_norm": 1.3586401355704512, + "learning_rate": 1.9996818756055246e-05, + "loss": 1.7056, + "step": 1190 + }, + { + "epoch": 0.018100093517149838, + "grad_norm": 1.4482089150420554, + "learning_rate": 1.9996696886995728e-05, + "loss": 1.6716, + "step": 1200 + }, + { + "epoch": 0.018250927629792753, + "grad_norm": 1.1127377268132548, + "learning_rate": 1.9996572727673605e-05, + "loss": 1.675, + "step": 1210 + }, + { + "epoch": 0.018401761742435668, + "grad_norm": 1.2139452562071913, + "learning_rate": 1.9996446278117322e-05, + "loss": 1.6544, + "step": 1220 + }, + { + "epoch": 0.018552595855078586, + "grad_norm": 1.2629305784439893, + "learning_rate": 1.9996317538355852e-05, + "loss": 1.6655, + "step": 1230 + }, + { + "epoch": 0.0187034299677215, + "grad_norm": 1.364810844611628, + "learning_rate": 1.9996186508418687e-05, + "loss": 1.6711, + "step": 1240 + }, + { + "epoch": 0.018854264080364416, + "grad_norm": 1.3849149919693524, + "learning_rate": 1.9996053188335846e-05, + "loss": 1.649, + "step": 1250 + }, + { + "epoch": 0.01900509819300733, + "grad_norm": 1.3621323115199522, + "learning_rate": 1.9995917578137872e-05, + "loss": 1.6463, + "step": 1260 + }, + { + "epoch": 0.019155932305650245, + "grad_norm": 1.2344918098431865, + "learning_rate": 1.9995779677855838e-05, + "loss": 1.656, + "step": 1270 + }, + { + "epoch": 0.01930676641829316, + "grad_norm": 1.1787279010493863, + "learning_rate": 1.9995639487521335e-05, + "loss": 1.6452, + "step": 1280 + }, + { + "epoch": 0.019457600530936075, + "grad_norm": 1.3837419345438366, + "learning_rate": 1.999549700716648e-05, + "loss": 1.6545, + "step": 1290 + }, + { + "epoch": 0.019608434643578993, + "grad_norm": 1.19836676905929, + "learning_rate": 1.9995352236823914e-05, + "loss": 1.6526, + "step": 1300 + }, + { + "epoch": 0.019759268756221908, + "grad_norm": 1.251918610870385, + "learning_rate": 1.9995205176526804e-05, + "loss": 1.6567, + "step": 1310 + }, + { + "epoch": 0.019910102868864823, + "grad_norm": 1.2552641582202206, + "learning_rate": 1.9995055826308848e-05, + "loss": 1.6482, + "step": 1320 + }, + { + "epoch": 0.020060936981507738, + "grad_norm": 1.2893029349471323, + "learning_rate": 1.9994904186204257e-05, + "loss": 1.6306, + "step": 1330 + }, + { + "epoch": 0.020211771094150652, + "grad_norm": 1.2375926590880397, + "learning_rate": 1.9994750256247772e-05, + "loss": 1.6499, + "step": 1340 + }, + { + "epoch": 0.020362605206793567, + "grad_norm": 1.1871660392425663, + "learning_rate": 1.9994594036474662e-05, + "loss": 1.6279, + "step": 1350 + }, + { + "epoch": 0.020513439319436482, + "grad_norm": 1.1476895220789736, + "learning_rate": 1.9994435526920713e-05, + "loss": 1.6329, + "step": 1360 + }, + { + "epoch": 0.0206642734320794, + "grad_norm": 1.354180075063677, + "learning_rate": 1.9994274727622245e-05, + "loss": 1.6361, + "step": 1370 + }, + { + "epoch": 0.020815107544722315, + "grad_norm": 1.1888342337812303, + "learning_rate": 1.9994111638616092e-05, + "loss": 1.629, + "step": 1380 + }, + { + "epoch": 0.02096594165736523, + "grad_norm": 1.1727592222134233, + "learning_rate": 1.999394625993962e-05, + "loss": 1.642, + "step": 1390 + }, + { + "epoch": 0.021116775770008145, + "grad_norm": 1.2865151199306062, + "learning_rate": 1.999377859163072e-05, + "loss": 1.6407, + "step": 1400 + }, + { + "epoch": 0.02126760988265106, + "grad_norm": 1.2199702275960433, + "learning_rate": 1.9993608633727798e-05, + "loss": 1.6025, + "step": 1410 + }, + { + "epoch": 0.021418443995293975, + "grad_norm": 1.231644086154285, + "learning_rate": 1.9993436386269805e-05, + "loss": 1.6308, + "step": 1420 + }, + { + "epoch": 0.02156927810793689, + "grad_norm": 1.2125523956037627, + "learning_rate": 1.999326184929619e-05, + "loss": 1.5996, + "step": 1430 + }, + { + "epoch": 0.021720112220579808, + "grad_norm": 1.0675766026430564, + "learning_rate": 1.9993085022846944e-05, + "loss": 1.614, + "step": 1440 + }, + { + "epoch": 0.021870946333222722, + "grad_norm": 1.0879017247399494, + "learning_rate": 1.999290590696258e-05, + "loss": 1.6086, + "step": 1450 + }, + { + "epoch": 0.022021780445865637, + "grad_norm": 1.0713848617031947, + "learning_rate": 1.9992724501684135e-05, + "loss": 1.6023, + "step": 1460 + }, + { + "epoch": 0.022172614558508552, + "grad_norm": 1.1017228502273868, + "learning_rate": 1.9992540807053166e-05, + "loss": 1.6181, + "step": 1470 + }, + { + "epoch": 0.022323448671151467, + "grad_norm": 1.0694774580863444, + "learning_rate": 1.999235482311176e-05, + "loss": 1.5746, + "step": 1480 + }, + { + "epoch": 0.022474282783794382, + "grad_norm": 1.1338655840944982, + "learning_rate": 1.9992166549902522e-05, + "loss": 1.5983, + "step": 1490 + }, + { + "epoch": 0.022625116896437297, + "grad_norm": 1.0451209066044493, + "learning_rate": 1.9991975987468593e-05, + "loss": 1.5935, + "step": 1500 + }, + { + "epoch": 0.022775951009080215, + "grad_norm": 1.1171877130878702, + "learning_rate": 1.9991783135853623e-05, + "loss": 1.5915, + "step": 1510 + }, + { + "epoch": 0.02292678512172313, + "grad_norm": 1.4031967007765085, + "learning_rate": 1.9991587995101806e-05, + "loss": 1.5872, + "step": 1520 + }, + { + "epoch": 0.023077619234366045, + "grad_norm": 1.0892027087422236, + "learning_rate": 1.9991390565257834e-05, + "loss": 1.5785, + "step": 1530 + }, + { + "epoch": 0.02322845334700896, + "grad_norm": 1.1561420078851687, + "learning_rate": 1.999119084636695e-05, + "loss": 1.5923, + "step": 1540 + }, + { + "epoch": 0.023379287459651874, + "grad_norm": 1.1899829882305513, + "learning_rate": 1.999098883847491e-05, + "loss": 1.5811, + "step": 1550 + }, + { + "epoch": 0.02353012157229479, + "grad_norm": 1.0656077745257284, + "learning_rate": 1.9990784541627986e-05, + "loss": 1.5779, + "step": 1560 + }, + { + "epoch": 0.023680955684937704, + "grad_norm": 0.9983364024697126, + "learning_rate": 1.999057795587299e-05, + "loss": 1.5684, + "step": 1570 + }, + { + "epoch": 0.023831789797580622, + "grad_norm": 1.0570168053886244, + "learning_rate": 1.999036908125725e-05, + "loss": 1.5502, + "step": 1580 + }, + { + "epoch": 0.023982623910223537, + "grad_norm": 1.2320482716368135, + "learning_rate": 1.9990157917828614e-05, + "loss": 1.5618, + "step": 1590 + }, + { + "epoch": 0.024133458022866452, + "grad_norm": 1.0291591149771284, + "learning_rate": 1.998994446563547e-05, + "loss": 1.5777, + "step": 1600 + }, + { + "epoch": 0.024284292135509367, + "grad_norm": 1.2257313017039746, + "learning_rate": 1.998972872472671e-05, + "loss": 1.5762, + "step": 1610 + }, + { + "epoch": 0.02443512624815228, + "grad_norm": 1.0980195421761703, + "learning_rate": 1.9989510695151767e-05, + "loss": 1.5914, + "step": 1620 + }, + { + "epoch": 0.024585960360795196, + "grad_norm": 1.038741932070898, + "learning_rate": 1.9989290376960593e-05, + "loss": 1.5766, + "step": 1630 + }, + { + "epoch": 0.02473679447343811, + "grad_norm": 1.0421345903393862, + "learning_rate": 1.998906777020366e-05, + "loss": 1.5673, + "step": 1640 + }, + { + "epoch": 0.02488762858608103, + "grad_norm": 1.0808898343985396, + "learning_rate": 1.9988842874931965e-05, + "loss": 1.5548, + "step": 1650 + }, + { + "epoch": 0.025038462698723944, + "grad_norm": 1.1272634095831404, + "learning_rate": 1.998861569119704e-05, + "loss": 1.5545, + "step": 1660 + }, + { + "epoch": 0.02518929681136686, + "grad_norm": 1.0265497336131748, + "learning_rate": 1.9988386219050924e-05, + "loss": 1.5625, + "step": 1670 + }, + { + "epoch": 0.025340130924009774, + "grad_norm": 1.0216574085093317, + "learning_rate": 1.99881544585462e-05, + "loss": 1.5587, + "step": 1680 + }, + { + "epoch": 0.02549096503665269, + "grad_norm": 1.0857284975225014, + "learning_rate": 1.9987920409735956e-05, + "loss": 1.5545, + "step": 1690 + }, + { + "epoch": 0.025641799149295603, + "grad_norm": 1.0578308589767556, + "learning_rate": 1.9987684072673815e-05, + "loss": 1.5651, + "step": 1700 + }, + { + "epoch": 0.02579263326193852, + "grad_norm": 1.0624586572034576, + "learning_rate": 1.9987445447413923e-05, + "loss": 1.5636, + "step": 1710 + }, + { + "epoch": 0.025943467374581437, + "grad_norm": 1.024531741332833, + "learning_rate": 1.998720453401095e-05, + "loss": 1.5564, + "step": 1720 + }, + { + "epoch": 0.02609430148722435, + "grad_norm": 1.092994446700227, + "learning_rate": 1.998696133252009e-05, + "loss": 1.5523, + "step": 1730 + }, + { + "epoch": 0.026245135599867266, + "grad_norm": 1.0896142393037027, + "learning_rate": 1.998671584299706e-05, + "loss": 1.5609, + "step": 1740 + }, + { + "epoch": 0.02639596971251018, + "grad_norm": 1.162604226936227, + "learning_rate": 1.9986468065498105e-05, + "loss": 1.5564, + "step": 1750 + }, + { + "epoch": 0.026546803825153096, + "grad_norm": 1.0707945554555462, + "learning_rate": 1.9986218000079988e-05, + "loss": 1.5503, + "step": 1760 + }, + { + "epoch": 0.02669763793779601, + "grad_norm": 1.1116907134888412, + "learning_rate": 1.99859656468e-05, + "loss": 1.5294, + "step": 1770 + }, + { + "epoch": 0.026848472050438926, + "grad_norm": 1.1740560463740979, + "learning_rate": 1.9985711005715952e-05, + "loss": 1.5378, + "step": 1780 + }, + { + "epoch": 0.026999306163081844, + "grad_norm": 0.9957968629342134, + "learning_rate": 1.998545407688619e-05, + "loss": 1.5195, + "step": 1790 + }, + { + "epoch": 0.02715014027572476, + "grad_norm": 0.9728716859302728, + "learning_rate": 1.9985194860369576e-05, + "loss": 1.5544, + "step": 1800 + }, + { + "epoch": 0.027300974388367674, + "grad_norm": 1.062800973542473, + "learning_rate": 1.998493335622549e-05, + "loss": 1.5392, + "step": 1810 + }, + { + "epoch": 0.02745180850101059, + "grad_norm": 1.013124994692403, + "learning_rate": 1.998466956451385e-05, + "loss": 1.5364, + "step": 1820 + }, + { + "epoch": 0.027602642613653503, + "grad_norm": 1.0934587450320719, + "learning_rate": 1.998440348529509e-05, + "loss": 1.5192, + "step": 1830 + }, + { + "epoch": 0.027753476726296418, + "grad_norm": 1.0805604342324684, + "learning_rate": 1.9984135118630162e-05, + "loss": 1.5226, + "step": 1840 + }, + { + "epoch": 0.027904310838939333, + "grad_norm": 1.0837278218320117, + "learning_rate": 1.9983864464580565e-05, + "loss": 1.5267, + "step": 1850 + }, + { + "epoch": 0.02805514495158225, + "grad_norm": 1.1379748172457385, + "learning_rate": 1.998359152320829e-05, + "loss": 1.5389, + "step": 1860 + }, + { + "epoch": 0.028205979064225166, + "grad_norm": 1.0556722722053307, + "learning_rate": 1.998331629457588e-05, + "loss": 1.5229, + "step": 1870 + }, + { + "epoch": 0.02835681317686808, + "grad_norm": 1.1635367899164226, + "learning_rate": 1.998303877874638e-05, + "loss": 1.5295, + "step": 1880 + }, + { + "epoch": 0.028507647289510996, + "grad_norm": 1.0664792523251296, + "learning_rate": 1.9982758975783378e-05, + "loss": 1.5187, + "step": 1890 + }, + { + "epoch": 0.02865848140215391, + "grad_norm": 1.0161400605631845, + "learning_rate": 1.9982476885750977e-05, + "loss": 1.5343, + "step": 1900 + }, + { + "epoch": 0.028809315514796825, + "grad_norm": 1.7291560958390064, + "learning_rate": 1.9982192508713797e-05, + "loss": 1.526, + "step": 1910 + }, + { + "epoch": 0.02896014962743974, + "grad_norm": 1.0847809802887007, + "learning_rate": 1.9981905844736996e-05, + "loss": 1.5207, + "step": 1920 + }, + { + "epoch": 0.02911098374008266, + "grad_norm": 1.0464730170996346, + "learning_rate": 1.998161689388625e-05, + "loss": 1.5031, + "step": 1930 + }, + { + "epoch": 0.029261817852725573, + "grad_norm": 0.9816803760983098, + "learning_rate": 1.9981325656227754e-05, + "loss": 1.5381, + "step": 1940 + }, + { + "epoch": 0.029412651965368488, + "grad_norm": 1.0227686467047508, + "learning_rate": 1.9981032131828232e-05, + "loss": 1.5148, + "step": 1950 + }, + { + "epoch": 0.029563486078011403, + "grad_norm": 0.9966173402609461, + "learning_rate": 1.9980736320754933e-05, + "loss": 1.4986, + "step": 1960 + }, + { + "epoch": 0.029714320190654318, + "grad_norm": 1.0235593795275104, + "learning_rate": 1.9980438223075624e-05, + "loss": 1.5086, + "step": 1970 + }, + { + "epoch": 0.029865154303297232, + "grad_norm": 1.126967098049971, + "learning_rate": 1.99801378388586e-05, + "loss": 1.4907, + "step": 1980 + }, + { + "epoch": 0.030015988415940147, + "grad_norm": 0.9737048476316021, + "learning_rate": 1.997983516817269e-05, + "loss": 1.4969, + "step": 1990 + }, + { + "epoch": 0.030166822528583066, + "grad_norm": 1.0660550247860874, + "learning_rate": 1.9979530211087224e-05, + "loss": 1.5, + "step": 2000 + }, + { + "epoch": 0.03031765664122598, + "grad_norm": 1.0146434071351111, + "learning_rate": 1.9979222967672072e-05, + "loss": 1.5188, + "step": 2010 + }, + { + "epoch": 0.030468490753868895, + "grad_norm": 0.9900285291369268, + "learning_rate": 1.997891343799763e-05, + "loss": 1.4812, + "step": 2020 + }, + { + "epoch": 0.03061932486651181, + "grad_norm": 0.9975709675422251, + "learning_rate": 1.9978601622134798e-05, + "loss": 1.4866, + "step": 2030 + }, + { + "epoch": 0.030770158979154725, + "grad_norm": 0.9927076173484801, + "learning_rate": 1.997828752015503e-05, + "loss": 1.4939, + "step": 2040 + }, + { + "epoch": 0.03092099309179764, + "grad_norm": 1.0049942946173194, + "learning_rate": 1.9977971132130277e-05, + "loss": 1.4963, + "step": 2050 + }, + { + "epoch": 0.031071827204440555, + "grad_norm": 0.9900882299622423, + "learning_rate": 1.9977652458133025e-05, + "loss": 1.5067, + "step": 2060 + }, + { + "epoch": 0.031222661317083473, + "grad_norm": 1.0912223262726413, + "learning_rate": 1.9977331498236286e-05, + "loss": 1.5002, + "step": 2070 + }, + { + "epoch": 0.03137349542972639, + "grad_norm": 0.905671752400172, + "learning_rate": 1.997700825251359e-05, + "loss": 1.4886, + "step": 2080 + }, + { + "epoch": 0.0315243295423693, + "grad_norm": 1.0071376124937796, + "learning_rate": 1.9976682721038997e-05, + "loss": 1.4897, + "step": 2090 + }, + { + "epoch": 0.03167516365501222, + "grad_norm": 0.9970323257714934, + "learning_rate": 1.9976354903887084e-05, + "loss": 1.4932, + "step": 2100 + }, + { + "epoch": 0.031825997767655136, + "grad_norm": 1.033402026631153, + "learning_rate": 1.9976024801132952e-05, + "loss": 1.4998, + "step": 2110 + }, + { + "epoch": 0.03197683188029805, + "grad_norm": 0.9801831882871451, + "learning_rate": 1.997569241285223e-05, + "loss": 1.5115, + "step": 2120 + }, + { + "epoch": 0.032127665992940965, + "grad_norm": 0.9773447462433478, + "learning_rate": 1.997535773912107e-05, + "loss": 1.5029, + "step": 2130 + }, + { + "epoch": 0.03227850010558388, + "grad_norm": 1.0259158353306714, + "learning_rate": 1.997502078001615e-05, + "loss": 1.4943, + "step": 2140 + }, + { + "epoch": 0.032429334218226795, + "grad_norm": 0.9862555322152993, + "learning_rate": 1.9974681535614662e-05, + "loss": 1.4683, + "step": 2150 + }, + { + "epoch": 0.032580168330869706, + "grad_norm": 0.9682391596823, + "learning_rate": 1.997434000599433e-05, + "loss": 1.4787, + "step": 2160 + }, + { + "epoch": 0.032731002443512625, + "grad_norm": 0.9520365928392005, + "learning_rate": 1.9973996191233396e-05, + "loss": 1.4634, + "step": 2170 + }, + { + "epoch": 0.03288183655615554, + "grad_norm": 1.0029852297111548, + "learning_rate": 1.9973650091410635e-05, + "loss": 1.4966, + "step": 2180 + }, + { + "epoch": 0.033032670668798454, + "grad_norm": 0.9783554305347112, + "learning_rate": 1.9973301706605334e-05, + "loss": 1.4596, + "step": 2190 + }, + { + "epoch": 0.03318350478144137, + "grad_norm": 0.94020049531717, + "learning_rate": 1.997295103689731e-05, + "loss": 1.4708, + "step": 2200 + }, + { + "epoch": 0.033334338894084284, + "grad_norm": 0.9894426180475294, + "learning_rate": 1.9972598082366904e-05, + "loss": 1.4743, + "step": 2210 + }, + { + "epoch": 0.0334851730067272, + "grad_norm": 0.9904898909471758, + "learning_rate": 1.9972242843094976e-05, + "loss": 1.4818, + "step": 2220 + }, + { + "epoch": 0.033636007119370114, + "grad_norm": 0.9710365743517803, + "learning_rate": 1.9971885319162912e-05, + "loss": 1.4763, + "step": 2230 + }, + { + "epoch": 0.03378684123201303, + "grad_norm": 1.0289395109301451, + "learning_rate": 1.997152551065262e-05, + "loss": 1.4738, + "step": 2240 + }, + { + "epoch": 0.03393767534465595, + "grad_norm": 0.9501885854897809, + "learning_rate": 1.9971163417646537e-05, + "loss": 1.4744, + "step": 2250 + }, + { + "epoch": 0.03408850945729886, + "grad_norm": 0.9881622728383795, + "learning_rate": 1.997079904022762e-05, + "loss": 1.4663, + "step": 2260 + }, + { + "epoch": 0.03423934356994178, + "grad_norm": 0.9774011918903214, + "learning_rate": 1.9970432378479344e-05, + "loss": 1.4641, + "step": 2270 + }, + { + "epoch": 0.03439017768258469, + "grad_norm": 1.0200784758776786, + "learning_rate": 1.9970063432485715e-05, + "loss": 1.4648, + "step": 2280 + }, + { + "epoch": 0.03454101179522761, + "grad_norm": 1.175621771291439, + "learning_rate": 1.9969692202331252e-05, + "loss": 1.4703, + "step": 2290 + }, + { + "epoch": 0.03469184590787052, + "grad_norm": 0.887926401701697, + "learning_rate": 1.996931868810102e-05, + "loss": 1.4533, + "step": 2300 + }, + { + "epoch": 0.03484268002051344, + "grad_norm": 0.9550776642099655, + "learning_rate": 1.9968942889880572e-05, + "loss": 1.4705, + "step": 2310 + }, + { + "epoch": 0.03499351413315636, + "grad_norm": 0.9430671249578266, + "learning_rate": 1.996856480775602e-05, + "loss": 1.4697, + "step": 2320 + }, + { + "epoch": 0.03514434824579927, + "grad_norm": 0.9877489526075709, + "learning_rate": 1.9968184441813977e-05, + "loss": 1.4747, + "step": 2330 + }, + { + "epoch": 0.03529518235844219, + "grad_norm": 0.8981586448268053, + "learning_rate": 1.996780179214159e-05, + "loss": 1.4582, + "step": 2340 + }, + { + "epoch": 0.0354460164710851, + "grad_norm": 0.9830863276579005, + "learning_rate": 1.9967416858826517e-05, + "loss": 1.4414, + "step": 2350 + }, + { + "epoch": 0.03559685058372802, + "grad_norm": 0.9706538797424409, + "learning_rate": 1.9967029641956953e-05, + "loss": 1.4599, + "step": 2360 + }, + { + "epoch": 0.03574768469637093, + "grad_norm": 0.91575137479225, + "learning_rate": 1.9966640141621607e-05, + "loss": 1.451, + "step": 2370 + }, + { + "epoch": 0.035898518809013846, + "grad_norm": 1.0284419331274168, + "learning_rate": 1.9966248357909716e-05, + "loss": 1.4466, + "step": 2380 + }, + { + "epoch": 0.036049352921656765, + "grad_norm": 0.8821606499722466, + "learning_rate": 1.996585429091104e-05, + "loss": 1.4474, + "step": 2390 + }, + { + "epoch": 0.036200187034299676, + "grad_norm": 0.9234711838421872, + "learning_rate": 1.9965457940715855e-05, + "loss": 1.4243, + "step": 2400 + }, + { + "epoch": 0.036351021146942594, + "grad_norm": 0.9344251013027839, + "learning_rate": 1.9965059307414973e-05, + "loss": 1.4342, + "step": 2410 + }, + { + "epoch": 0.036501855259585506, + "grad_norm": 0.9917309622252661, + "learning_rate": 1.9964658391099714e-05, + "loss": 1.4555, + "step": 2420 + }, + { + "epoch": 0.036652689372228424, + "grad_norm": 0.9241520433690372, + "learning_rate": 1.9964255191861936e-05, + "loss": 1.4387, + "step": 2430 + }, + { + "epoch": 0.036803523484871335, + "grad_norm": 0.8808438336450857, + "learning_rate": 1.9963849709794007e-05, + "loss": 1.4402, + "step": 2440 + }, + { + "epoch": 0.036954357597514254, + "grad_norm": 0.9282560701086844, + "learning_rate": 1.9963441944988823e-05, + "loss": 1.4166, + "step": 2450 + }, + { + "epoch": 0.03710519171015717, + "grad_norm": 0.8995028021485915, + "learning_rate": 1.996303189753981e-05, + "loss": 1.4517, + "step": 2460 + }, + { + "epoch": 0.03725602582280008, + "grad_norm": 0.9223959301328966, + "learning_rate": 1.9962619567540907e-05, + "loss": 1.4373, + "step": 2470 + }, + { + "epoch": 0.037406859935443, + "grad_norm": 1.0660198711852034, + "learning_rate": 1.996220495508658e-05, + "loss": 1.4224, + "step": 2480 + }, + { + "epoch": 0.03755769404808591, + "grad_norm": 0.8827496134989145, + "learning_rate": 1.9961788060271815e-05, + "loss": 1.4427, + "step": 2490 + }, + { + "epoch": 0.03770852816072883, + "grad_norm": 0.9331813970601189, + "learning_rate": 1.996136888319213e-05, + "loss": 1.4224, + "step": 2500 + }, + { + "epoch": 0.03785936227337174, + "grad_norm": 0.905154315619218, + "learning_rate": 1.9960947423943554e-05, + "loss": 1.4196, + "step": 2510 + }, + { + "epoch": 0.03801019638601466, + "grad_norm": 0.9214327605166669, + "learning_rate": 1.9960523682622645e-05, + "loss": 1.4495, + "step": 2520 + }, + { + "epoch": 0.03816103049865758, + "grad_norm": 0.8514919177934788, + "learning_rate": 1.996009765932648e-05, + "loss": 1.4104, + "step": 2530 + }, + { + "epoch": 0.03831186461130049, + "grad_norm": 0.9139998782744221, + "learning_rate": 1.9959669354152668e-05, + "loss": 1.4529, + "step": 2540 + }, + { + "epoch": 0.03846269872394341, + "grad_norm": 0.8623578310583014, + "learning_rate": 1.995923876719933e-05, + "loss": 1.419, + "step": 2550 + }, + { + "epoch": 0.03861353283658632, + "grad_norm": 1.091875788719718, + "learning_rate": 1.995880589856512e-05, + "loss": 1.4437, + "step": 2560 + }, + { + "epoch": 0.03876436694922924, + "grad_norm": 0.959185288309008, + "learning_rate": 1.99583707483492e-05, + "loss": 1.4381, + "step": 2570 + }, + { + "epoch": 0.03891520106187215, + "grad_norm": 0.9534283146636786, + "learning_rate": 1.995793331665127e-05, + "loss": 1.4258, + "step": 2580 + }, + { + "epoch": 0.03906603517451507, + "grad_norm": 0.9364702515788047, + "learning_rate": 1.9957493603571545e-05, + "loss": 1.4408, + "step": 2590 + }, + { + "epoch": 0.039216869287157986, + "grad_norm": 0.9028661508685295, + "learning_rate": 1.9957051609210763e-05, + "loss": 1.4369, + "step": 2600 + }, + { + "epoch": 0.0393677033998009, + "grad_norm": 0.9131113200293222, + "learning_rate": 1.995660733367019e-05, + "loss": 1.4442, + "step": 2610 + }, + { + "epoch": 0.039518537512443816, + "grad_norm": 0.9158808176778979, + "learning_rate": 1.9956160777051605e-05, + "loss": 1.4321, + "step": 2620 + }, + { + "epoch": 0.03966937162508673, + "grad_norm": 0.9597043312052836, + "learning_rate": 1.9955711939457316e-05, + "loss": 1.4233, + "step": 2630 + }, + { + "epoch": 0.039820205737729646, + "grad_norm": 0.9677522263693541, + "learning_rate": 1.9955260820990156e-05, + "loss": 1.4274, + "step": 2640 + }, + { + "epoch": 0.03997103985037256, + "grad_norm": 0.9623123886496594, + "learning_rate": 1.9954807421753476e-05, + "loss": 1.4398, + "step": 2650 + }, + { + "epoch": 0.040121873963015475, + "grad_norm": 0.9012508834235863, + "learning_rate": 1.9954351741851146e-05, + "loss": 1.4341, + "step": 2660 + }, + { + "epoch": 0.040272708075658394, + "grad_norm": 0.8361911222310381, + "learning_rate": 1.9953893781387568e-05, + "loss": 1.4079, + "step": 2670 + }, + { + "epoch": 0.040423542188301305, + "grad_norm": 0.9224652587114515, + "learning_rate": 1.9953433540467663e-05, + "loss": 1.4157, + "step": 2680 + }, + { + "epoch": 0.04057437630094422, + "grad_norm": 0.974033005472384, + "learning_rate": 1.9952971019196868e-05, + "loss": 1.433, + "step": 2690 + }, + { + "epoch": 0.040725210413587135, + "grad_norm": 0.9783993944550442, + "learning_rate": 1.9952506217681147e-05, + "loss": 1.4306, + "step": 2700 + }, + { + "epoch": 0.04087604452623005, + "grad_norm": 0.9029824682723433, + "learning_rate": 1.9952039136026994e-05, + "loss": 1.431, + "step": 2710 + }, + { + "epoch": 0.041026878638872964, + "grad_norm": 0.9021299665782552, + "learning_rate": 1.9951569774341416e-05, + "loss": 1.4201, + "step": 2720 + }, + { + "epoch": 0.04117771275151588, + "grad_norm": 0.8622525846759819, + "learning_rate": 1.995109813273194e-05, + "loss": 1.4117, + "step": 2730 + }, + { + "epoch": 0.0413285468641588, + "grad_norm": 0.8742630797928759, + "learning_rate": 1.9950624211306623e-05, + "loss": 1.3949, + "step": 2740 + }, + { + "epoch": 0.04147938097680171, + "grad_norm": 0.8729777042428839, + "learning_rate": 1.9950148010174043e-05, + "loss": 1.3994, + "step": 2750 + }, + { + "epoch": 0.04163021508944463, + "grad_norm": 0.8703364888127278, + "learning_rate": 1.9949669529443295e-05, + "loss": 1.4, + "step": 2760 + }, + { + "epoch": 0.04178104920208754, + "grad_norm": 0.8908999369901163, + "learning_rate": 1.9949188769224002e-05, + "loss": 1.3951, + "step": 2770 + }, + { + "epoch": 0.04193188331473046, + "grad_norm": 0.8350469902343738, + "learning_rate": 1.9948705729626305e-05, + "loss": 1.4193, + "step": 2780 + }, + { + "epoch": 0.04208271742737337, + "grad_norm": 0.9375194926104646, + "learning_rate": 1.9948220410760874e-05, + "loss": 1.4157, + "step": 2790 + }, + { + "epoch": 0.04223355154001629, + "grad_norm": 0.983395583822878, + "learning_rate": 1.994773281273889e-05, + "loss": 1.3881, + "step": 2800 + }, + { + "epoch": 0.04238438565265921, + "grad_norm": 0.8698662604210865, + "learning_rate": 1.994724293567207e-05, + "loss": 1.4012, + "step": 2810 + }, + { + "epoch": 0.04253521976530212, + "grad_norm": 0.9048350551317943, + "learning_rate": 1.9946750779672642e-05, + "loss": 1.4068, + "step": 2820 + }, + { + "epoch": 0.04268605387794504, + "grad_norm": 0.8892842936502975, + "learning_rate": 1.994625634485336e-05, + "loss": 1.4129, + "step": 2830 + }, + { + "epoch": 0.04283688799058795, + "grad_norm": 0.8653435194480199, + "learning_rate": 1.9945759631327495e-05, + "loss": 1.4004, + "step": 2840 + }, + { + "epoch": 0.04298772210323087, + "grad_norm": 0.888380954108334, + "learning_rate": 1.9945260639208857e-05, + "loss": 1.4228, + "step": 2850 + }, + { + "epoch": 0.04313855621587378, + "grad_norm": 0.9112586245223439, + "learning_rate": 1.9944759368611754e-05, + "loss": 1.3991, + "step": 2860 + }, + { + "epoch": 0.0432893903285167, + "grad_norm": 0.9342421750923496, + "learning_rate": 1.9944255819651033e-05, + "loss": 1.3922, + "step": 2870 + }, + { + "epoch": 0.043440224441159615, + "grad_norm": 0.8801157442486394, + "learning_rate": 1.9943749992442057e-05, + "loss": 1.3899, + "step": 2880 + }, + { + "epoch": 0.04359105855380253, + "grad_norm": 0.8834484980015511, + "learning_rate": 1.994324188710072e-05, + "loss": 1.4015, + "step": 2890 + }, + { + "epoch": 0.043741892666445445, + "grad_norm": 0.881223384539484, + "learning_rate": 1.994273150374342e-05, + "loss": 1.4014, + "step": 2900 + }, + { + "epoch": 0.043892726779088356, + "grad_norm": 0.8664136789865612, + "learning_rate": 1.9942218842487084e-05, + "loss": 1.3971, + "step": 2910 + }, + { + "epoch": 0.044043560891731275, + "grad_norm": 0.9158899324891896, + "learning_rate": 1.9941703903449174e-05, + "loss": 1.4037, + "step": 2920 + }, + { + "epoch": 0.044194395004374186, + "grad_norm": 0.8435000575697257, + "learning_rate": 1.994118668674766e-05, + "loss": 1.4037, + "step": 2930 + }, + { + "epoch": 0.044345229117017104, + "grad_norm": 0.8180239405472856, + "learning_rate": 1.9940667192501034e-05, + "loss": 1.4079, + "step": 2940 + }, + { + "epoch": 0.04449606322966002, + "grad_norm": 0.7965515700950448, + "learning_rate": 1.9940145420828315e-05, + "loss": 1.3881, + "step": 2950 + }, + { + "epoch": 0.044646897342302934, + "grad_norm": 0.90385136419503, + "learning_rate": 1.993962137184904e-05, + "loss": 1.3979, + "step": 2960 + }, + { + "epoch": 0.04479773145494585, + "grad_norm": 0.9842761010131751, + "learning_rate": 1.9939095045683282e-05, + "loss": 1.399, + "step": 2970 + }, + { + "epoch": 0.044948565567588764, + "grad_norm": 0.9102777736485186, + "learning_rate": 1.9938566442451607e-05, + "loss": 1.3735, + "step": 2980 + }, + { + "epoch": 0.04509939968023168, + "grad_norm": 0.8480975418533405, + "learning_rate": 1.9938035562275124e-05, + "loss": 1.3903, + "step": 2990 + }, + { + "epoch": 0.04525023379287459, + "grad_norm": 0.8610996407068455, + "learning_rate": 1.993750240527546e-05, + "loss": 1.3937, + "step": 3000 + }, + { + "epoch": 0.04540106790551751, + "grad_norm": 0.8943424763367797, + "learning_rate": 1.9936966971574766e-05, + "loss": 1.3889, + "step": 3010 + }, + { + "epoch": 0.04555190201816043, + "grad_norm": 0.8760784171086284, + "learning_rate": 1.9936429261295706e-05, + "loss": 1.4096, + "step": 3020 + }, + { + "epoch": 0.04570273613080334, + "grad_norm": 1.1471896666655044, + "learning_rate": 1.9935889274561467e-05, + "loss": 1.3729, + "step": 3030 + }, + { + "epoch": 0.04585357024344626, + "grad_norm": 0.9177719929625547, + "learning_rate": 1.9935347011495773e-05, + "loss": 1.4059, + "step": 3040 + }, + { + "epoch": 0.04600440435608917, + "grad_norm": 0.9607257115621206, + "learning_rate": 1.9934802472222847e-05, + "loss": 1.391, + "step": 3050 + }, + { + "epoch": 0.04615523846873209, + "grad_norm": 0.8642324117413623, + "learning_rate": 1.9934255656867444e-05, + "loss": 1.3973, + "step": 3060 + }, + { + "epoch": 0.046306072581375, + "grad_norm": 0.8771577094002971, + "learning_rate": 1.9933706565554847e-05, + "loss": 1.3835, + "step": 3070 + }, + { + "epoch": 0.04645690669401792, + "grad_norm": 0.8381945614048434, + "learning_rate": 1.993315519841085e-05, + "loss": 1.3896, + "step": 3080 + }, + { + "epoch": 0.04660774080666084, + "grad_norm": 0.8908314560433126, + "learning_rate": 1.9932601555561774e-05, + "loss": 1.4048, + "step": 3090 + }, + { + "epoch": 0.04675857491930375, + "grad_norm": 0.8151618911750504, + "learning_rate": 1.9932045637134457e-05, + "loss": 1.383, + "step": 3100 + }, + { + "epoch": 0.04690940903194667, + "grad_norm": 0.8857852000501794, + "learning_rate": 1.9931487443256263e-05, + "loss": 1.3774, + "step": 3110 + }, + { + "epoch": 0.04706024314458958, + "grad_norm": 0.8920613387658481, + "learning_rate": 1.9930926974055076e-05, + "loss": 1.3843, + "step": 3120 + }, + { + "epoch": 0.047211077257232496, + "grad_norm": 0.8365407712004922, + "learning_rate": 1.99303642296593e-05, + "loss": 1.3913, + "step": 3130 + }, + { + "epoch": 0.04736191136987541, + "grad_norm": 0.8642550308495022, + "learning_rate": 1.9929799210197854e-05, + "loss": 1.3933, + "step": 3140 + }, + { + "epoch": 0.047512745482518326, + "grad_norm": 0.8827469401683659, + "learning_rate": 1.99292319158002e-05, + "loss": 1.3814, + "step": 3150 + }, + { + "epoch": 0.047663579595161244, + "grad_norm": 0.9209122972827439, + "learning_rate": 1.9928662346596294e-05, + "loss": 1.3716, + "step": 3160 + }, + { + "epoch": 0.047814413707804156, + "grad_norm": 0.8922542612062088, + "learning_rate": 1.9928090502716625e-05, + "loss": 1.3789, + "step": 3170 + }, + { + "epoch": 0.047965247820447074, + "grad_norm": 0.9493768178027978, + "learning_rate": 1.992751638429221e-05, + "loss": 1.3774, + "step": 3180 + }, + { + "epoch": 0.048116081933089985, + "grad_norm": 0.8450335339689871, + "learning_rate": 1.9926939991454583e-05, + "loss": 1.3841, + "step": 3190 + }, + { + "epoch": 0.048266916045732904, + "grad_norm": 0.9283479359300119, + "learning_rate": 1.9926361324335786e-05, + "loss": 1.3882, + "step": 3200 + }, + { + "epoch": 0.048417750158375815, + "grad_norm": 0.8265880316322788, + "learning_rate": 1.99257803830684e-05, + "loss": 1.3667, + "step": 3210 + }, + { + "epoch": 0.04856858427101873, + "grad_norm": 0.8581891955040732, + "learning_rate": 1.992519716778552e-05, + "loss": 1.3697, + "step": 3220 + }, + { + "epoch": 0.04871941838366165, + "grad_norm": 0.829309832805809, + "learning_rate": 1.9924611678620757e-05, + "loss": 1.3685, + "step": 3230 + }, + { + "epoch": 0.04887025249630456, + "grad_norm": 0.9276710646440459, + "learning_rate": 1.992402391570825e-05, + "loss": 1.3716, + "step": 3240 + }, + { + "epoch": 0.04902108660894748, + "grad_norm": 0.8990449167980709, + "learning_rate": 1.9923433879182662e-05, + "loss": 1.3719, + "step": 3250 + }, + { + "epoch": 0.04917192072159039, + "grad_norm": 0.8475151710381479, + "learning_rate": 1.9922841569179165e-05, + "loss": 1.373, + "step": 3260 + }, + { + "epoch": 0.04932275483423331, + "grad_norm": 0.8463249474052319, + "learning_rate": 1.9922246985833458e-05, + "loss": 1.3744, + "step": 3270 + }, + { + "epoch": 0.04947358894687622, + "grad_norm": 0.839481722032419, + "learning_rate": 1.9921650129281767e-05, + "loss": 1.395, + "step": 3280 + }, + { + "epoch": 0.04962442305951914, + "grad_norm": 0.9089806884324553, + "learning_rate": 1.992105099966083e-05, + "loss": 1.4095, + "step": 3290 + }, + { + "epoch": 0.04977525717216206, + "grad_norm": 0.9540707490380903, + "learning_rate": 1.9920449597107905e-05, + "loss": 1.3814, + "step": 3300 + }, + { + "epoch": 0.04992609128480497, + "grad_norm": 0.960343539772368, + "learning_rate": 1.9919845921760777e-05, + "loss": 1.3651, + "step": 3310 + }, + { + "epoch": 0.05007692539744789, + "grad_norm": 0.8040049496532636, + "learning_rate": 1.9919239973757754e-05, + "loss": 1.3715, + "step": 3320 + }, + { + "epoch": 0.0502277595100908, + "grad_norm": 0.8785292078562049, + "learning_rate": 1.9918631753237653e-05, + "loss": 1.3762, + "step": 3330 + }, + { + "epoch": 0.05037859362273372, + "grad_norm": 0.8975439058622757, + "learning_rate": 1.991802126033982e-05, + "loss": 1.3702, + "step": 3340 + }, + { + "epoch": 0.05052942773537663, + "grad_norm": 0.8146879170593115, + "learning_rate": 1.9917408495204123e-05, + "loss": 1.3573, + "step": 3350 + }, + { + "epoch": 0.05068026184801955, + "grad_norm": 0.9028101903839235, + "learning_rate": 1.9916793457970948e-05, + "loss": 1.3582, + "step": 3360 + }, + { + "epoch": 0.050831095960662466, + "grad_norm": 0.8918626370440506, + "learning_rate": 1.9916176148781193e-05, + "loss": 1.3609, + "step": 3370 + }, + { + "epoch": 0.05098193007330538, + "grad_norm": 0.8254674293873725, + "learning_rate": 1.9915556567776297e-05, + "loss": 1.3606, + "step": 3380 + }, + { + "epoch": 0.051132764185948296, + "grad_norm": 0.8113694035600874, + "learning_rate": 1.99149347150982e-05, + "loss": 1.3644, + "step": 3390 + }, + { + "epoch": 0.05128359829859121, + "grad_norm": 0.8434326413316993, + "learning_rate": 1.991431059088937e-05, + "loss": 1.378, + "step": 3400 + }, + { + "epoch": 0.051434432411234125, + "grad_norm": 0.8707439516617267, + "learning_rate": 1.99136841952928e-05, + "loss": 1.3639, + "step": 3410 + }, + { + "epoch": 0.05158526652387704, + "grad_norm": 0.8874806749408258, + "learning_rate": 1.9913055528451988e-05, + "loss": 1.3544, + "step": 3420 + }, + { + "epoch": 0.051736100636519955, + "grad_norm": 0.8368678943136807, + "learning_rate": 1.9912424590510972e-05, + "loss": 1.3634, + "step": 3430 + }, + { + "epoch": 0.05188693474916287, + "grad_norm": 0.858419224489044, + "learning_rate": 1.99117913816143e-05, + "loss": 1.3606, + "step": 3440 + }, + { + "epoch": 0.052037768861805785, + "grad_norm": 0.8801151710354461, + "learning_rate": 1.991115590190704e-05, + "loss": 1.3686, + "step": 3450 + }, + { + "epoch": 0.0521886029744487, + "grad_norm": 0.8252193145956846, + "learning_rate": 1.9910518151534775e-05, + "loss": 1.363, + "step": 3460 + }, + { + "epoch": 0.052339437087091614, + "grad_norm": 0.9190230681209511, + "learning_rate": 1.9909878130643625e-05, + "loss": 1.3711, + "step": 3470 + }, + { + "epoch": 0.05249027119973453, + "grad_norm": 0.8600381483750655, + "learning_rate": 1.9909235839380216e-05, + "loss": 1.3631, + "step": 3480 + }, + { + "epoch": 0.052641105312377444, + "grad_norm": 0.8842524871237624, + "learning_rate": 1.99085912778917e-05, + "loss": 1.359, + "step": 3490 + }, + { + "epoch": 0.05279193942502036, + "grad_norm": 0.8236208190613394, + "learning_rate": 1.9907944446325743e-05, + "loss": 1.3342, + "step": 3500 + }, + { + "epoch": 0.05294277353766328, + "grad_norm": 0.8450723596900124, + "learning_rate": 1.990729534483054e-05, + "loss": 1.3526, + "step": 3510 + }, + { + "epoch": 0.05309360765030619, + "grad_norm": 0.7960860681887499, + "learning_rate": 1.99066439735548e-05, + "loss": 1.3456, + "step": 3520 + }, + { + "epoch": 0.05324444176294911, + "grad_norm": 0.8312312622214874, + "learning_rate": 1.990599033264775e-05, + "loss": 1.3372, + "step": 3530 + }, + { + "epoch": 0.05339527587559202, + "grad_norm": 0.8292984748383392, + "learning_rate": 1.9905334422259145e-05, + "loss": 1.3511, + "step": 3540 + }, + { + "epoch": 0.05354610998823494, + "grad_norm": 0.9051716930092867, + "learning_rate": 1.9904676242539255e-05, + "loss": 1.3618, + "step": 3550 + }, + { + "epoch": 0.05369694410087785, + "grad_norm": 0.8147497474368153, + "learning_rate": 1.9904015793638862e-05, + "loss": 1.3596, + "step": 3560 + }, + { + "epoch": 0.05384777821352077, + "grad_norm": 0.8979733579021891, + "learning_rate": 1.990335307570929e-05, + "loss": 1.3708, + "step": 3570 + }, + { + "epoch": 0.05399861232616369, + "grad_norm": 0.8284372589817613, + "learning_rate": 1.9902688088902358e-05, + "loss": 1.3517, + "step": 3580 + }, + { + "epoch": 0.0541494464388066, + "grad_norm": 0.7758233791140522, + "learning_rate": 1.9902020833370423e-05, + "loss": 1.3409, + "step": 3590 + }, + { + "epoch": 0.05430028055144952, + "grad_norm": 0.8926780614028951, + "learning_rate": 1.990135130926635e-05, + "loss": 1.3489, + "step": 3600 + }, + { + "epoch": 0.05445111466409243, + "grad_norm": 0.8576155244033683, + "learning_rate": 1.9900679516743525e-05, + "loss": 1.364, + "step": 3610 + }, + { + "epoch": 0.05460194877673535, + "grad_norm": 0.8671740992809918, + "learning_rate": 1.990000545595587e-05, + "loss": 1.3296, + "step": 3620 + }, + { + "epoch": 0.05475278288937826, + "grad_norm": 0.8235347812081165, + "learning_rate": 1.9899329127057795e-05, + "loss": 1.3335, + "step": 3630 + }, + { + "epoch": 0.05490361700202118, + "grad_norm": 0.7836198496484278, + "learning_rate": 1.9898650530204264e-05, + "loss": 1.3299, + "step": 3640 + }, + { + "epoch": 0.055054451114664095, + "grad_norm": 0.8440938114289751, + "learning_rate": 1.989796966555074e-05, + "loss": 1.3547, + "step": 3650 + }, + { + "epoch": 0.055205285227307006, + "grad_norm": 0.8152251495899566, + "learning_rate": 1.989728653325321e-05, + "loss": 1.339, + "step": 3660 + }, + { + "epoch": 0.055356119339949925, + "grad_norm": 0.8311512912608912, + "learning_rate": 1.9896601133468175e-05, + "loss": 1.3407, + "step": 3670 + }, + { + "epoch": 0.055506953452592836, + "grad_norm": 0.9195921992887641, + "learning_rate": 1.989591346635267e-05, + "loss": 1.3304, + "step": 3680 + }, + { + "epoch": 0.055657787565235754, + "grad_norm": 0.8217607287736957, + "learning_rate": 1.9895223532064242e-05, + "loss": 1.3655, + "step": 3690 + }, + { + "epoch": 0.055808621677878666, + "grad_norm": 0.8442828919529235, + "learning_rate": 1.9894531330760947e-05, + "loss": 1.3437, + "step": 3700 + }, + { + "epoch": 0.055959455790521584, + "grad_norm": 0.8421036767291469, + "learning_rate": 1.9893836862601373e-05, + "loss": 1.3302, + "step": 3710 + }, + { + "epoch": 0.0561102899031645, + "grad_norm": 0.7952562302498671, + "learning_rate": 1.9893140127744634e-05, + "loss": 1.3533, + "step": 3720 + }, + { + "epoch": 0.056261124015807414, + "grad_norm": 0.847613538003416, + "learning_rate": 1.9892441126350338e-05, + "loss": 1.3432, + "step": 3730 + }, + { + "epoch": 0.05641195812845033, + "grad_norm": 0.836737671964378, + "learning_rate": 1.9891739858578634e-05, + "loss": 1.3548, + "step": 3740 + }, + { + "epoch": 0.05656279224109324, + "grad_norm": 0.8721961824177357, + "learning_rate": 1.9891036324590186e-05, + "loss": 1.3437, + "step": 3750 + }, + { + "epoch": 0.05671362635373616, + "grad_norm": 0.8494044243366381, + "learning_rate": 1.9890330524546176e-05, + "loss": 1.3798, + "step": 3760 + }, + { + "epoch": 0.05686446046637907, + "grad_norm": 0.9082728529922043, + "learning_rate": 1.98896224586083e-05, + "loss": 1.3638, + "step": 3770 + }, + { + "epoch": 0.05701529457902199, + "grad_norm": 0.8486331598854757, + "learning_rate": 1.9888912126938778e-05, + "loss": 1.3184, + "step": 3780 + }, + { + "epoch": 0.05716612869166491, + "grad_norm": 0.8743404716959646, + "learning_rate": 1.988819952970035e-05, + "loss": 1.3474, + "step": 3790 + }, + { + "epoch": 0.05731696280430782, + "grad_norm": 0.8213353524002011, + "learning_rate": 1.988748466705627e-05, + "loss": 1.3624, + "step": 3800 + }, + { + "epoch": 0.05746779691695074, + "grad_norm": 0.8643236548435229, + "learning_rate": 1.988676753917032e-05, + "loss": 1.3386, + "step": 3810 + }, + { + "epoch": 0.05761863102959365, + "grad_norm": 0.8927718320036346, + "learning_rate": 1.988604814620679e-05, + "loss": 1.337, + "step": 3820 + }, + { + "epoch": 0.05776946514223657, + "grad_norm": 0.8444152477692979, + "learning_rate": 1.9885326488330498e-05, + "loss": 1.3275, + "step": 3830 + }, + { + "epoch": 0.05792029925487948, + "grad_norm": 0.8205165802804529, + "learning_rate": 1.9884602565706778e-05, + "loss": 1.3385, + "step": 3840 + }, + { + "epoch": 0.0580711333675224, + "grad_norm": 0.814709816823394, + "learning_rate": 1.9883876378501476e-05, + "loss": 1.3231, + "step": 3850 + }, + { + "epoch": 0.05822196748016532, + "grad_norm": 0.844168008870218, + "learning_rate": 1.988314792688097e-05, + "loss": 1.3386, + "step": 3860 + }, + { + "epoch": 0.05837280159280823, + "grad_norm": 0.8256933516169186, + "learning_rate": 1.9882417211012144e-05, + "loss": 1.3487, + "step": 3870 + }, + { + "epoch": 0.058523635705451146, + "grad_norm": 0.7841266154942558, + "learning_rate": 1.9881684231062412e-05, + "loss": 1.3262, + "step": 3880 + }, + { + "epoch": 0.05867446981809406, + "grad_norm": 0.8168677773875919, + "learning_rate": 1.9880948987199694e-05, + "loss": 1.3435, + "step": 3890 + }, + { + "epoch": 0.058825303930736976, + "grad_norm": 0.87409145571922, + "learning_rate": 1.9880211479592443e-05, + "loss": 1.3277, + "step": 3900 + }, + { + "epoch": 0.05897613804337989, + "grad_norm": 0.8175869971304841, + "learning_rate": 1.987947170840962e-05, + "loss": 1.3476, + "step": 3910 + }, + { + "epoch": 0.059126972156022806, + "grad_norm": 0.7935307605257822, + "learning_rate": 1.987872967382071e-05, + "loss": 1.3436, + "step": 3920 + }, + { + "epoch": 0.059277806268665724, + "grad_norm": 0.7847350428439266, + "learning_rate": 1.987798537599571e-05, + "loss": 1.3278, + "step": 3930 + }, + { + "epoch": 0.059428640381308635, + "grad_norm": 0.8107369844802103, + "learning_rate": 1.9877238815105142e-05, + "loss": 1.3338, + "step": 3940 + }, + { + "epoch": 0.059579474493951554, + "grad_norm": 0.8409348383890677, + "learning_rate": 1.9876489991320048e-05, + "loss": 1.3174, + "step": 3950 + }, + { + "epoch": 0.059730308606594465, + "grad_norm": 0.8465448645341159, + "learning_rate": 1.9875738904811983e-05, + "loss": 1.3365, + "step": 3960 + }, + { + "epoch": 0.05988114271923738, + "grad_norm": 0.815552121342426, + "learning_rate": 1.987498555575302e-05, + "loss": 1.3459, + "step": 3970 + }, + { + "epoch": 0.060031976831880295, + "grad_norm": 0.8315318393624116, + "learning_rate": 1.9874229944315755e-05, + "loss": 1.344, + "step": 3980 + }, + { + "epoch": 0.06018281094452321, + "grad_norm": 0.7924863659121979, + "learning_rate": 1.9873472070673297e-05, + "loss": 1.3421, + "step": 3990 + }, + { + "epoch": 0.06033364505716613, + "grad_norm": 0.7868964537073537, + "learning_rate": 1.9872711934999284e-05, + "loss": 1.3222, + "step": 4000 + }, + { + "epoch": 0.06048447916980904, + "grad_norm": 0.8455414525141646, + "learning_rate": 1.987194953746786e-05, + "loss": 1.3296, + "step": 4010 + }, + { + "epoch": 0.06063531328245196, + "grad_norm": 0.8193910973853732, + "learning_rate": 1.9871184878253684e-05, + "loss": 1.3289, + "step": 4020 + }, + { + "epoch": 0.06078614739509487, + "grad_norm": 0.8784869485345711, + "learning_rate": 1.9870417957531955e-05, + "loss": 1.3073, + "step": 4030 + }, + { + "epoch": 0.06093698150773779, + "grad_norm": 0.8017416999396423, + "learning_rate": 1.9869648775478363e-05, + "loss": 1.3403, + "step": 4040 + }, + { + "epoch": 0.0610878156203807, + "grad_norm": 0.8964479599821759, + "learning_rate": 1.9868877332269144e-05, + "loss": 1.3312, + "step": 4050 + }, + { + "epoch": 0.06123864973302362, + "grad_norm": 0.838172462466292, + "learning_rate": 1.986810362808102e-05, + "loss": 1.3056, + "step": 4060 + }, + { + "epoch": 0.06138948384566654, + "grad_norm": 0.8602790336667302, + "learning_rate": 1.986732766309126e-05, + "loss": 1.3141, + "step": 4070 + }, + { + "epoch": 0.06154031795830945, + "grad_norm": 0.8724304339426525, + "learning_rate": 1.9866549437477634e-05, + "loss": 1.3262, + "step": 4080 + }, + { + "epoch": 0.06169115207095237, + "grad_norm": 0.8503469541818812, + "learning_rate": 1.986576895141844e-05, + "loss": 1.3272, + "step": 4090 + }, + { + "epoch": 0.06184198618359528, + "grad_norm": 0.8708363438383171, + "learning_rate": 1.9864986205092483e-05, + "loss": 1.3247, + "step": 4100 + }, + { + "epoch": 0.0619928202962382, + "grad_norm": 0.8401094959665009, + "learning_rate": 1.9864201198679097e-05, + "loss": 1.3407, + "step": 4110 + }, + { + "epoch": 0.06214365440888111, + "grad_norm": 0.8752859234025614, + "learning_rate": 1.9863413932358123e-05, + "loss": 1.3067, + "step": 4120 + }, + { + "epoch": 0.06229448852152403, + "grad_norm": 0.812590567836639, + "learning_rate": 1.986262440630993e-05, + "loss": 1.3248, + "step": 4130 + }, + { + "epoch": 0.062445322634166946, + "grad_norm": 0.7627982916015565, + "learning_rate": 1.9861832620715394e-05, + "loss": 1.3288, + "step": 4140 + }, + { + "epoch": 0.06259615674680986, + "grad_norm": 0.8111682656836553, + "learning_rate": 1.986103857575592e-05, + "loss": 1.328, + "step": 4150 + }, + { + "epoch": 0.06274699085945278, + "grad_norm": 0.8228102745636254, + "learning_rate": 1.9860242271613425e-05, + "loss": 1.3213, + "step": 4160 + }, + { + "epoch": 0.06289782497209569, + "grad_norm": 0.8324778561876344, + "learning_rate": 1.985944370847034e-05, + "loss": 1.3311, + "step": 4170 + }, + { + "epoch": 0.0630486590847386, + "grad_norm": 0.8263493457804105, + "learning_rate": 1.9858642886509624e-05, + "loss": 1.3151, + "step": 4180 + }, + { + "epoch": 0.06319949319738152, + "grad_norm": 0.821793471462707, + "learning_rate": 1.985783980591474e-05, + "loss": 1.3131, + "step": 4190 + }, + { + "epoch": 0.06335032731002443, + "grad_norm": 0.8178228743105757, + "learning_rate": 1.985703446686968e-05, + "loss": 1.315, + "step": 4200 + }, + { + "epoch": 0.06350116142266735, + "grad_norm": 0.8881399058392304, + "learning_rate": 1.9856226869558944e-05, + "loss": 1.3287, + "step": 4210 + }, + { + "epoch": 0.06365199553531027, + "grad_norm": 0.8128419919559723, + "learning_rate": 1.9855417014167557e-05, + "loss": 1.3253, + "step": 4220 + }, + { + "epoch": 0.06380282964795318, + "grad_norm": 0.8039517648475216, + "learning_rate": 1.985460490088106e-05, + "loss": 1.323, + "step": 4230 + }, + { + "epoch": 0.0639536637605961, + "grad_norm": 0.7993898665818109, + "learning_rate": 1.9853790529885505e-05, + "loss": 1.3368, + "step": 4240 + }, + { + "epoch": 0.064104497873239, + "grad_norm": 0.8354149138629405, + "learning_rate": 1.9852973901367472e-05, + "loss": 1.315, + "step": 4250 + }, + { + "epoch": 0.06425533198588193, + "grad_norm": 0.859507644398019, + "learning_rate": 1.985215501551405e-05, + "loss": 1.2961, + "step": 4260 + }, + { + "epoch": 0.06440616609852484, + "grad_norm": 0.8309486768895253, + "learning_rate": 1.9851333872512845e-05, + "loss": 1.3105, + "step": 4270 + }, + { + "epoch": 0.06455700021116775, + "grad_norm": 0.7843641643318466, + "learning_rate": 1.9850510472551983e-05, + "loss": 1.309, + "step": 4280 + }, + { + "epoch": 0.06470783432381068, + "grad_norm": 0.8004201748203906, + "learning_rate": 1.984968481582011e-05, + "loss": 1.3196, + "step": 4290 + }, + { + "epoch": 0.06485866843645359, + "grad_norm": 0.9042305382063642, + "learning_rate": 1.984885690250638e-05, + "loss": 1.3239, + "step": 4300 + }, + { + "epoch": 0.0650095025490965, + "grad_norm": 0.8479311457165892, + "learning_rate": 1.9848026732800476e-05, + "loss": 1.299, + "step": 4310 + }, + { + "epoch": 0.06516033666173941, + "grad_norm": 0.7937120571347733, + "learning_rate": 1.984719430689259e-05, + "loss": 1.3152, + "step": 4320 + }, + { + "epoch": 0.06531117077438234, + "grad_norm": 0.7719107324597669, + "learning_rate": 1.9846359624973422e-05, + "loss": 1.3063, + "step": 4330 + }, + { + "epoch": 0.06546200488702525, + "grad_norm": 0.7906102862025144, + "learning_rate": 1.9845522687234214e-05, + "loss": 1.2895, + "step": 4340 + }, + { + "epoch": 0.06561283899966816, + "grad_norm": 0.8374056668078099, + "learning_rate": 1.98446834938667e-05, + "loss": 1.3163, + "step": 4350 + }, + { + "epoch": 0.06576367311231109, + "grad_norm": 0.8359468617270055, + "learning_rate": 1.984384204506315e-05, + "loss": 1.3006, + "step": 4360 + }, + { + "epoch": 0.065914507224954, + "grad_norm": 0.8073496427807921, + "learning_rate": 1.9842998341016333e-05, + "loss": 1.3209, + "step": 4370 + }, + { + "epoch": 0.06606534133759691, + "grad_norm": 0.8054172464348646, + "learning_rate": 1.9842152381919546e-05, + "loss": 1.3042, + "step": 4380 + }, + { + "epoch": 0.06621617545023982, + "grad_norm": 0.8618452780533568, + "learning_rate": 1.98413041679666e-05, + "loss": 1.3035, + "step": 4390 + }, + { + "epoch": 0.06636700956288274, + "grad_norm": 0.7676761182116588, + "learning_rate": 1.9840453699351823e-05, + "loss": 1.3091, + "step": 4400 + }, + { + "epoch": 0.06651784367552566, + "grad_norm": 0.7411033671427132, + "learning_rate": 1.983960097627006e-05, + "loss": 1.2882, + "step": 4410 + }, + { + "epoch": 0.06666867778816857, + "grad_norm": 0.8247001763235612, + "learning_rate": 1.9838745998916668e-05, + "loss": 1.3158, + "step": 4420 + }, + { + "epoch": 0.06681951190081149, + "grad_norm": 0.8159596486845633, + "learning_rate": 1.983788876748753e-05, + "loss": 1.2937, + "step": 4430 + }, + { + "epoch": 0.0669703460134544, + "grad_norm": 0.7831087588169757, + "learning_rate": 1.983702928217903e-05, + "loss": 1.2888, + "step": 4440 + }, + { + "epoch": 0.06712118012609732, + "grad_norm": 0.8354870527254219, + "learning_rate": 1.983616754318809e-05, + "loss": 1.2903, + "step": 4450 + }, + { + "epoch": 0.06727201423874023, + "grad_norm": 0.7825598490500797, + "learning_rate": 1.9835303550712123e-05, + "loss": 1.313, + "step": 4460 + }, + { + "epoch": 0.06742284835138315, + "grad_norm": 0.7839036011525812, + "learning_rate": 1.9834437304949082e-05, + "loss": 1.2772, + "step": 4470 + }, + { + "epoch": 0.06757368246402606, + "grad_norm": 0.7602863598774384, + "learning_rate": 1.983356880609742e-05, + "loss": 1.3122, + "step": 4480 + }, + { + "epoch": 0.06772451657666897, + "grad_norm": 0.7891050581847745, + "learning_rate": 1.9832698054356112e-05, + "loss": 1.2944, + "step": 4490 + }, + { + "epoch": 0.0678753506893119, + "grad_norm": 0.7876545609471626, + "learning_rate": 1.983182504992465e-05, + "loss": 1.3023, + "step": 4500 + }, + { + "epoch": 0.06802618480195481, + "grad_norm": 0.8368007051087184, + "learning_rate": 1.9830949793003044e-05, + "loss": 1.2914, + "step": 4510 + }, + { + "epoch": 0.06817701891459772, + "grad_norm": 0.7717553866115823, + "learning_rate": 1.983007228379181e-05, + "loss": 1.2834, + "step": 4520 + }, + { + "epoch": 0.06832785302724063, + "grad_norm": 0.8196234451983244, + "learning_rate": 1.9829192522491994e-05, + "loss": 1.2792, + "step": 4530 + }, + { + "epoch": 0.06847868713988356, + "grad_norm": 0.8403802915878934, + "learning_rate": 1.9828310509305148e-05, + "loss": 1.2907, + "step": 4540 + }, + { + "epoch": 0.06862952125252647, + "grad_norm": 0.7794728385651801, + "learning_rate": 1.982742624443334e-05, + "loss": 1.3039, + "step": 4550 + }, + { + "epoch": 0.06878035536516938, + "grad_norm": 0.8271730120264229, + "learning_rate": 1.9826539728079164e-05, + "loss": 1.2955, + "step": 4560 + }, + { + "epoch": 0.06893118947781231, + "grad_norm": 0.8864429508301392, + "learning_rate": 1.9825650960445718e-05, + "loss": 1.3036, + "step": 4570 + }, + { + "epoch": 0.06908202359045522, + "grad_norm": 0.7288551266485832, + "learning_rate": 1.9824759941736616e-05, + "loss": 1.2784, + "step": 4580 + }, + { + "epoch": 0.06923285770309813, + "grad_norm": 0.7934953945466575, + "learning_rate": 1.9823866672156e-05, + "loss": 1.3012, + "step": 4590 + }, + { + "epoch": 0.06938369181574104, + "grad_norm": 0.7786774370353985, + "learning_rate": 1.982297115190852e-05, + "loss": 1.299, + "step": 4600 + }, + { + "epoch": 0.06953452592838397, + "grad_norm": 0.7968349575373216, + "learning_rate": 1.9822073381199335e-05, + "loss": 1.3162, + "step": 4610 + }, + { + "epoch": 0.06968536004102688, + "grad_norm": 0.8106781444680801, + "learning_rate": 1.982117336023413e-05, + "loss": 1.3104, + "step": 4620 + }, + { + "epoch": 0.06983619415366979, + "grad_norm": 0.8962653657935393, + "learning_rate": 1.98202710892191e-05, + "loss": 1.2891, + "step": 4630 + }, + { + "epoch": 0.06998702826631271, + "grad_norm": 0.7487361003786, + "learning_rate": 1.9819366568360957e-05, + "loss": 1.2878, + "step": 4640 + }, + { + "epoch": 0.07013786237895563, + "grad_norm": 0.8005324904545233, + "learning_rate": 1.981845979786693e-05, + "loss": 1.2777, + "step": 4650 + }, + { + "epoch": 0.07028869649159854, + "grad_norm": 0.818395648823212, + "learning_rate": 1.981755077794476e-05, + "loss": 1.3133, + "step": 4660 + }, + { + "epoch": 0.07043953060424145, + "grad_norm": 0.8219802169694262, + "learning_rate": 1.981663950880271e-05, + "loss": 1.2976, + "step": 4670 + }, + { + "epoch": 0.07059036471688437, + "grad_norm": 0.8058976107649065, + "learning_rate": 1.9815725990649544e-05, + "loss": 1.3017, + "step": 4680 + }, + { + "epoch": 0.07074119882952729, + "grad_norm": 0.8267544512447721, + "learning_rate": 1.9814810223694564e-05, + "loss": 1.3129, + "step": 4690 + }, + { + "epoch": 0.0708920329421702, + "grad_norm": 0.8139271398442287, + "learning_rate": 1.981389220814756e-05, + "loss": 1.2866, + "step": 4700 + }, + { + "epoch": 0.07104286705481312, + "grad_norm": 0.7986142768270879, + "learning_rate": 1.981297194421886e-05, + "loss": 1.3072, + "step": 4710 + }, + { + "epoch": 0.07119370116745603, + "grad_norm": 0.7395834804918409, + "learning_rate": 1.9812049432119296e-05, + "loss": 1.2796, + "step": 4720 + }, + { + "epoch": 0.07134453528009894, + "grad_norm": 0.8498209257020126, + "learning_rate": 1.981112467206022e-05, + "loss": 1.2954, + "step": 4730 + }, + { + "epoch": 0.07149536939274186, + "grad_norm": 0.7864746935631592, + "learning_rate": 1.981019766425349e-05, + "loss": 1.2784, + "step": 4740 + }, + { + "epoch": 0.07164620350538478, + "grad_norm": 0.8127546394942874, + "learning_rate": 1.980926840891149e-05, + "loss": 1.2871, + "step": 4750 + }, + { + "epoch": 0.07179703761802769, + "grad_norm": 0.7946697727719457, + "learning_rate": 1.9808336906247113e-05, + "loss": 1.3033, + "step": 4760 + }, + { + "epoch": 0.0719478717306706, + "grad_norm": 0.7993831402145071, + "learning_rate": 1.980740315647377e-05, + "loss": 1.3012, + "step": 4770 + }, + { + "epoch": 0.07209870584331353, + "grad_norm": 0.8029802842777787, + "learning_rate": 1.980646715980538e-05, + "loss": 1.2919, + "step": 4780 + }, + { + "epoch": 0.07224953995595644, + "grad_norm": 0.8420566536279236, + "learning_rate": 1.9805528916456386e-05, + "loss": 1.2738, + "step": 4790 + }, + { + "epoch": 0.07240037406859935, + "grad_norm": 0.787462680958289, + "learning_rate": 1.980458842664174e-05, + "loss": 1.3098, + "step": 4800 + }, + { + "epoch": 0.07255120818124226, + "grad_norm": 0.7760886227440793, + "learning_rate": 1.980364569057691e-05, + "loss": 1.2773, + "step": 4810 + }, + { + "epoch": 0.07270204229388519, + "grad_norm": 0.7507489784156713, + "learning_rate": 1.980270070847788e-05, + "loss": 1.2963, + "step": 4820 + }, + { + "epoch": 0.0728528764065281, + "grad_norm": 0.8729904790816928, + "learning_rate": 1.9801753480561146e-05, + "loss": 1.2835, + "step": 4830 + }, + { + "epoch": 0.07300371051917101, + "grad_norm": 0.7517475348515094, + "learning_rate": 1.9800804007043715e-05, + "loss": 1.2787, + "step": 4840 + }, + { + "epoch": 0.07315454463181394, + "grad_norm": 0.791883972539114, + "learning_rate": 1.9799852288143128e-05, + "loss": 1.294, + "step": 4850 + }, + { + "epoch": 0.07330537874445685, + "grad_norm": 0.7602679895134862, + "learning_rate": 1.9798898324077407e-05, + "loss": 1.3017, + "step": 4860 + }, + { + "epoch": 0.07345621285709976, + "grad_norm": 0.7818002677162748, + "learning_rate": 1.9797942115065117e-05, + "loss": 1.2849, + "step": 4870 + }, + { + "epoch": 0.07360704696974267, + "grad_norm": 0.807603158254249, + "learning_rate": 1.9796983661325328e-05, + "loss": 1.2843, + "step": 4880 + }, + { + "epoch": 0.0737578810823856, + "grad_norm": 0.802596610315488, + "learning_rate": 1.9796022963077618e-05, + "loss": 1.2825, + "step": 4890 + }, + { + "epoch": 0.07390871519502851, + "grad_norm": 0.7440469934263939, + "learning_rate": 1.979506002054209e-05, + "loss": 1.2898, + "step": 4900 + }, + { + "epoch": 0.07405954930767142, + "grad_norm": 0.7847887085003785, + "learning_rate": 1.9794094833939357e-05, + "loss": 1.2976, + "step": 4910 + }, + { + "epoch": 0.07421038342031434, + "grad_norm": 0.7688122540444462, + "learning_rate": 1.979312740349054e-05, + "loss": 1.2807, + "step": 4920 + }, + { + "epoch": 0.07436121753295726, + "grad_norm": 0.7555248920290291, + "learning_rate": 1.9792157729417285e-05, + "loss": 1.2938, + "step": 4930 + }, + { + "epoch": 0.07451205164560017, + "grad_norm": 0.7781398158801671, + "learning_rate": 1.979118581194174e-05, + "loss": 1.2838, + "step": 4940 + }, + { + "epoch": 0.07466288575824308, + "grad_norm": 0.8349738816564368, + "learning_rate": 1.9790211651286577e-05, + "loss": 1.3253, + "step": 4950 + }, + { + "epoch": 0.074813719870886, + "grad_norm": 0.7930147677132928, + "learning_rate": 1.9789235247674978e-05, + "loss": 1.2964, + "step": 4960 + }, + { + "epoch": 0.07496455398352891, + "grad_norm": 0.7861346385167807, + "learning_rate": 1.9788256601330632e-05, + "loss": 1.282, + "step": 4970 + }, + { + "epoch": 0.07511538809617183, + "grad_norm": 0.8254623403059599, + "learning_rate": 1.978727571247776e-05, + "loss": 1.296, + "step": 4980 + }, + { + "epoch": 0.07526622220881475, + "grad_norm": 0.8364627254112657, + "learning_rate": 1.9786292581341082e-05, + "loss": 1.2906, + "step": 4990 + }, + { + "epoch": 0.07541705632145766, + "grad_norm": 0.787256806882557, + "learning_rate": 1.978530720814583e-05, + "loss": 1.2784, + "step": 5000 + }, + { + "epoch": 0.07556789043410057, + "grad_norm": 0.8068208258827804, + "learning_rate": 1.978431959311776e-05, + "loss": 1.29, + "step": 5010 + }, + { + "epoch": 0.07571872454674348, + "grad_norm": 0.8004411631808093, + "learning_rate": 1.9783329736483137e-05, + "loss": 1.2848, + "step": 5020 + }, + { + "epoch": 0.07586955865938641, + "grad_norm": 0.792804387777248, + "learning_rate": 1.9782337638468733e-05, + "loss": 1.2812, + "step": 5030 + }, + { + "epoch": 0.07602039277202932, + "grad_norm": 0.7480178690129704, + "learning_rate": 1.9781343299301847e-05, + "loss": 1.2766, + "step": 5040 + }, + { + "epoch": 0.07617122688467223, + "grad_norm": 0.7679270683666611, + "learning_rate": 1.978034671921028e-05, + "loss": 1.2915, + "step": 5050 + }, + { + "epoch": 0.07632206099731516, + "grad_norm": 0.7966466593900228, + "learning_rate": 1.9779347898422352e-05, + "loss": 1.276, + "step": 5060 + }, + { + "epoch": 0.07647289510995807, + "grad_norm": 0.7976537724077783, + "learning_rate": 1.9778346837166893e-05, + "loss": 1.2797, + "step": 5070 + }, + { + "epoch": 0.07662372922260098, + "grad_norm": 0.7876498159082201, + "learning_rate": 1.9777343535673247e-05, + "loss": 1.2769, + "step": 5080 + }, + { + "epoch": 0.07677456333524389, + "grad_norm": 0.7969858194608791, + "learning_rate": 1.9776337994171278e-05, + "loss": 1.2788, + "step": 5090 + }, + { + "epoch": 0.07692539744788682, + "grad_norm": 0.7950224099889297, + "learning_rate": 1.9775330212891352e-05, + "loss": 1.278, + "step": 5100 + }, + { + "epoch": 0.07707623156052973, + "grad_norm": 0.7642377015840437, + "learning_rate": 1.9774320192064354e-05, + "loss": 1.2792, + "step": 5110 + }, + { + "epoch": 0.07722706567317264, + "grad_norm": 0.7540157750106113, + "learning_rate": 1.977330793192169e-05, + "loss": 1.2953, + "step": 5120 + }, + { + "epoch": 0.07737789978581557, + "grad_norm": 0.7447888086217911, + "learning_rate": 1.9772293432695256e-05, + "loss": 1.2526, + "step": 5130 + }, + { + "epoch": 0.07752873389845848, + "grad_norm": 0.756993273497143, + "learning_rate": 1.9771276694617486e-05, + "loss": 1.2824, + "step": 5140 + }, + { + "epoch": 0.07767956801110139, + "grad_norm": 0.7593147387674661, + "learning_rate": 1.9770257717921316e-05, + "loss": 1.2793, + "step": 5150 + }, + { + "epoch": 0.0778304021237443, + "grad_norm": 0.7313644902243374, + "learning_rate": 1.9769236502840193e-05, + "loss": 1.2923, + "step": 5160 + }, + { + "epoch": 0.07798123623638722, + "grad_norm": 1.1382642212434386, + "learning_rate": 1.976821304960808e-05, + "loss": 1.2723, + "step": 5170 + }, + { + "epoch": 0.07813207034903014, + "grad_norm": 0.8127233720062476, + "learning_rate": 1.976718735845945e-05, + "loss": 1.307, + "step": 5180 + }, + { + "epoch": 0.07828290446167305, + "grad_norm": 0.7897351448930232, + "learning_rate": 1.97661594296293e-05, + "loss": 1.2744, + "step": 5190 + }, + { + "epoch": 0.07843373857431597, + "grad_norm": 0.7981569383061228, + "learning_rate": 1.976512926335312e-05, + "loss": 1.2871, + "step": 5200 + }, + { + "epoch": 0.07858457268695888, + "grad_norm": 0.8088437298006997, + "learning_rate": 1.9764096859866925e-05, + "loss": 1.2956, + "step": 5210 + }, + { + "epoch": 0.0787354067996018, + "grad_norm": 0.7731533715408412, + "learning_rate": 1.9763062219407242e-05, + "loss": 1.2747, + "step": 5220 + }, + { + "epoch": 0.0788862409122447, + "grad_norm": 0.8032434375778046, + "learning_rate": 1.9762025342211112e-05, + "loss": 1.2865, + "step": 5230 + }, + { + "epoch": 0.07903707502488763, + "grad_norm": 0.776785254137973, + "learning_rate": 1.976098622851608e-05, + "loss": 1.2758, + "step": 5240 + }, + { + "epoch": 0.07918790913753054, + "grad_norm": 0.7887708544200597, + "learning_rate": 1.9759944878560216e-05, + "loss": 1.2842, + "step": 5250 + }, + { + "epoch": 0.07933874325017345, + "grad_norm": 0.8100921979952653, + "learning_rate": 1.9758901292582088e-05, + "loss": 1.2796, + "step": 5260 + }, + { + "epoch": 0.07948957736281638, + "grad_norm": 0.779988372940657, + "learning_rate": 1.975785547082079e-05, + "loss": 1.2715, + "step": 5270 + }, + { + "epoch": 0.07964041147545929, + "grad_norm": 0.7684327630534681, + "learning_rate": 1.975680741351592e-05, + "loss": 1.2787, + "step": 5280 + }, + { + "epoch": 0.0797912455881022, + "grad_norm": 0.8706729279302883, + "learning_rate": 1.9755757120907584e-05, + "loss": 1.2701, + "step": 5290 + }, + { + "epoch": 0.07994207970074511, + "grad_norm": 0.8049923181674095, + "learning_rate": 1.9754704593236414e-05, + "loss": 1.2791, + "step": 5300 + }, + { + "epoch": 0.08009291381338804, + "grad_norm": 0.7721186003884933, + "learning_rate": 1.975364983074354e-05, + "loss": 1.2795, + "step": 5310 + }, + { + "epoch": 0.08024374792603095, + "grad_norm": 0.7594930232483279, + "learning_rate": 1.9752592833670616e-05, + "loss": 1.2652, + "step": 5320 + }, + { + "epoch": 0.08039458203867386, + "grad_norm": 0.7599032037765712, + "learning_rate": 1.9751533602259795e-05, + "loss": 1.2712, + "step": 5330 + }, + { + "epoch": 0.08054541615131679, + "grad_norm": 0.8118597617991082, + "learning_rate": 1.9750472136753757e-05, + "loss": 1.271, + "step": 5340 + }, + { + "epoch": 0.0806962502639597, + "grad_norm": 0.798988875671718, + "learning_rate": 1.9749408437395684e-05, + "loss": 1.26, + "step": 5350 + }, + { + "epoch": 0.08084708437660261, + "grad_norm": 0.7855911976039621, + "learning_rate": 1.9748342504429262e-05, + "loss": 1.2665, + "step": 5360 + }, + { + "epoch": 0.08099791848924552, + "grad_norm": 0.7617491066387996, + "learning_rate": 1.9747274338098712e-05, + "loss": 1.2681, + "step": 5370 + }, + { + "epoch": 0.08114875260188845, + "grad_norm": 0.8083643209310135, + "learning_rate": 1.9746203938648745e-05, + "loss": 1.2675, + "step": 5380 + }, + { + "epoch": 0.08129958671453136, + "grad_norm": 0.7794512020344742, + "learning_rate": 1.9745131306324593e-05, + "loss": 1.2515, + "step": 5390 + }, + { + "epoch": 0.08145042082717427, + "grad_norm": 0.7541756016251304, + "learning_rate": 1.9744056441372e-05, + "loss": 1.296, + "step": 5400 + }, + { + "epoch": 0.0816012549398172, + "grad_norm": 0.802620561289381, + "learning_rate": 1.974297934403721e-05, + "loss": 1.2926, + "step": 5410 + }, + { + "epoch": 0.0817520890524601, + "grad_norm": 0.7628736199611997, + "learning_rate": 1.9741900014567006e-05, + "loss": 1.2722, + "step": 5420 + }, + { + "epoch": 0.08190292316510302, + "grad_norm": 0.7957646867996123, + "learning_rate": 1.974081845320865e-05, + "loss": 1.2691, + "step": 5430 + }, + { + "epoch": 0.08205375727774593, + "grad_norm": 0.7719604621863513, + "learning_rate": 1.9739734660209937e-05, + "loss": 1.2622, + "step": 5440 + }, + { + "epoch": 0.08220459139038885, + "grad_norm": 0.7648172090827998, + "learning_rate": 1.9738648635819162e-05, + "loss": 1.2691, + "step": 5450 + }, + { + "epoch": 0.08235542550303177, + "grad_norm": 0.7476203600433012, + "learning_rate": 1.9737560380285138e-05, + "loss": 1.2585, + "step": 5460 + }, + { + "epoch": 0.08250625961567468, + "grad_norm": 0.7835821965749306, + "learning_rate": 1.9736469893857184e-05, + "loss": 1.2724, + "step": 5470 + }, + { + "epoch": 0.0826570937283176, + "grad_norm": 0.8142927256384452, + "learning_rate": 1.9735377176785132e-05, + "loss": 1.2763, + "step": 5480 + }, + { + "epoch": 0.08280792784096051, + "grad_norm": 0.7448904579554764, + "learning_rate": 1.973428222931933e-05, + "loss": 1.2577, + "step": 5490 + }, + { + "epoch": 0.08295876195360342, + "grad_norm": 0.7476616413287814, + "learning_rate": 1.9733185051710628e-05, + "loss": 1.265, + "step": 5500 + }, + { + "epoch": 0.08310959606624634, + "grad_norm": 0.743500172930043, + "learning_rate": 1.9732085644210396e-05, + "loss": 1.2645, + "step": 5510 + }, + { + "epoch": 0.08326043017888926, + "grad_norm": 0.8077565769087862, + "learning_rate": 1.9730984007070507e-05, + "loss": 1.2717, + "step": 5520 + }, + { + "epoch": 0.08341126429153217, + "grad_norm": 0.7637094119391025, + "learning_rate": 1.9729880140543346e-05, + "loss": 1.2673, + "step": 5530 + }, + { + "epoch": 0.08356209840417508, + "grad_norm": 0.8792630126821015, + "learning_rate": 1.9728774044881818e-05, + "loss": 1.2576, + "step": 5540 + }, + { + "epoch": 0.08371293251681801, + "grad_norm": 0.7605189298716021, + "learning_rate": 1.9727665720339328e-05, + "loss": 1.2579, + "step": 5550 + }, + { + "epoch": 0.08386376662946092, + "grad_norm": 0.7122298959623384, + "learning_rate": 1.972655516716979e-05, + "loss": 1.2498, + "step": 5560 + }, + { + "epoch": 0.08401460074210383, + "grad_norm": 0.8352390459154005, + "learning_rate": 1.9725442385627644e-05, + "loss": 1.2595, + "step": 5570 + }, + { + "epoch": 0.08416543485474674, + "grad_norm": 0.7700614657249166, + "learning_rate": 1.9724327375967826e-05, + "loss": 1.2797, + "step": 5580 + }, + { + "epoch": 0.08431626896738967, + "grad_norm": 0.7592179634205316, + "learning_rate": 1.9723210138445782e-05, + "loss": 1.2581, + "step": 5590 + }, + { + "epoch": 0.08446710308003258, + "grad_norm": 0.775449822386392, + "learning_rate": 1.972209067331748e-05, + "loss": 1.2647, + "step": 5600 + }, + { + "epoch": 0.08461793719267549, + "grad_norm": 0.8275531255402095, + "learning_rate": 1.9720968980839385e-05, + "loss": 1.2586, + "step": 5610 + }, + { + "epoch": 0.08476877130531842, + "grad_norm": 0.7473394355524774, + "learning_rate": 1.971984506126849e-05, + "loss": 1.303, + "step": 5620 + }, + { + "epoch": 0.08491960541796133, + "grad_norm": 0.7667702635387073, + "learning_rate": 1.9718718914862272e-05, + "loss": 1.2664, + "step": 5630 + }, + { + "epoch": 0.08507043953060424, + "grad_norm": 0.7737136500854324, + "learning_rate": 1.971759054187875e-05, + "loss": 1.2615, + "step": 5640 + }, + { + "epoch": 0.08522127364324715, + "grad_norm": 0.763728099634977, + "learning_rate": 1.9716459942576422e-05, + "loss": 1.2414, + "step": 5650 + }, + { + "epoch": 0.08537210775589008, + "grad_norm": 0.7342616319474465, + "learning_rate": 1.9715327117214315e-05, + "loss": 1.2568, + "step": 5660 + }, + { + "epoch": 0.08552294186853299, + "grad_norm": 0.8013013486595485, + "learning_rate": 1.9714192066051968e-05, + "loss": 1.2425, + "step": 5670 + }, + { + "epoch": 0.0856737759811759, + "grad_norm": 0.811012189528632, + "learning_rate": 1.9713054789349418e-05, + "loss": 1.2475, + "step": 5680 + }, + { + "epoch": 0.08582461009381882, + "grad_norm": 0.7646468246694303, + "learning_rate": 1.9711915287367214e-05, + "loss": 1.2386, + "step": 5690 + }, + { + "epoch": 0.08597544420646173, + "grad_norm": 0.8030596701484678, + "learning_rate": 1.9710773560366424e-05, + "loss": 1.2461, + "step": 5700 + }, + { + "epoch": 0.08612627831910465, + "grad_norm": 0.7984814784087498, + "learning_rate": 1.9709629608608613e-05, + "loss": 1.2672, + "step": 5710 + }, + { + "epoch": 0.08627711243174756, + "grad_norm": 0.7614731786515381, + "learning_rate": 1.9708483432355875e-05, + "loss": 1.2516, + "step": 5720 + }, + { + "epoch": 0.08642794654439048, + "grad_norm": 0.8414875030460908, + "learning_rate": 1.9707335031870786e-05, + "loss": 1.2672, + "step": 5730 + }, + { + "epoch": 0.0865787806570334, + "grad_norm": 0.7827609965619916, + "learning_rate": 1.970618440741646e-05, + "loss": 1.2627, + "step": 5740 + }, + { + "epoch": 0.0867296147696763, + "grad_norm": 0.7524964805734559, + "learning_rate": 1.97050315592565e-05, + "loss": 1.2516, + "step": 5750 + }, + { + "epoch": 0.08688044888231923, + "grad_norm": 0.7441598786128564, + "learning_rate": 1.9703876487655022e-05, + "loss": 1.2545, + "step": 5760 + }, + { + "epoch": 0.08703128299496214, + "grad_norm": 0.7744274737616788, + "learning_rate": 1.9702719192876665e-05, + "loss": 1.2628, + "step": 5770 + }, + { + "epoch": 0.08718211710760505, + "grad_norm": 0.7745019746184977, + "learning_rate": 1.970155967518656e-05, + "loss": 1.2487, + "step": 5780 + }, + { + "epoch": 0.08733295122024796, + "grad_norm": 0.7744062587283363, + "learning_rate": 1.970039793485036e-05, + "loss": 1.2901, + "step": 5790 + }, + { + "epoch": 0.08748378533289089, + "grad_norm": 0.7684263321264545, + "learning_rate": 1.9699233972134218e-05, + "loss": 1.2352, + "step": 5800 + }, + { + "epoch": 0.0876346194455338, + "grad_norm": 0.7159136706528986, + "learning_rate": 1.9698067787304802e-05, + "loss": 1.2451, + "step": 5810 + }, + { + "epoch": 0.08778545355817671, + "grad_norm": 0.8096788333696309, + "learning_rate": 1.9696899380629286e-05, + "loss": 1.247, + "step": 5820 + }, + { + "epoch": 0.08793628767081964, + "grad_norm": 0.7596919602609281, + "learning_rate": 1.9695728752375357e-05, + "loss": 1.2833, + "step": 5830 + }, + { + "epoch": 0.08808712178346255, + "grad_norm": 0.7633709351888895, + "learning_rate": 1.96945559028112e-05, + "loss": 1.2668, + "step": 5840 + }, + { + "epoch": 0.08823795589610546, + "grad_norm": 0.7789323333396138, + "learning_rate": 1.9693380832205533e-05, + "loss": 1.2411, + "step": 5850 + }, + { + "epoch": 0.08838879000874837, + "grad_norm": 0.8451431751243141, + "learning_rate": 1.969220354082755e-05, + "loss": 1.2499, + "step": 5860 + }, + { + "epoch": 0.0885396241213913, + "grad_norm": 0.7628363607684209, + "learning_rate": 1.9691024028946982e-05, + "loss": 1.2578, + "step": 5870 + }, + { + "epoch": 0.08869045823403421, + "grad_norm": 0.7785530322917137, + "learning_rate": 1.968984229683405e-05, + "loss": 1.2527, + "step": 5880 + }, + { + "epoch": 0.08884129234667712, + "grad_norm": 0.7408732447591686, + "learning_rate": 1.9688658344759494e-05, + "loss": 1.2571, + "step": 5890 + }, + { + "epoch": 0.08899212645932005, + "grad_norm": 0.7330697098048119, + "learning_rate": 1.9687472172994564e-05, + "loss": 1.2446, + "step": 5900 + }, + { + "epoch": 0.08914296057196296, + "grad_norm": 0.7510693282930618, + "learning_rate": 1.9686283781811012e-05, + "loss": 1.2224, + "step": 5910 + }, + { + "epoch": 0.08929379468460587, + "grad_norm": 0.7698629423529786, + "learning_rate": 1.9685093171481095e-05, + "loss": 1.2511, + "step": 5920 + }, + { + "epoch": 0.08944462879724878, + "grad_norm": 0.8696200059869212, + "learning_rate": 1.9683900342277587e-05, + "loss": 1.257, + "step": 5930 + }, + { + "epoch": 0.0895954629098917, + "grad_norm": 0.7853462950173259, + "learning_rate": 1.9682705294473773e-05, + "loss": 1.2368, + "step": 5940 + }, + { + "epoch": 0.08974629702253462, + "grad_norm": 0.7874417764188654, + "learning_rate": 1.9681508028343433e-05, + "loss": 1.2625, + "step": 5950 + }, + { + "epoch": 0.08989713113517753, + "grad_norm": 0.7609385066034349, + "learning_rate": 1.9680308544160868e-05, + "loss": 1.2509, + "step": 5960 + }, + { + "epoch": 0.09004796524782045, + "grad_norm": 0.7630231756677173, + "learning_rate": 1.967910684220088e-05, + "loss": 1.2491, + "step": 5970 + }, + { + "epoch": 0.09019879936046336, + "grad_norm": 0.8087271256802815, + "learning_rate": 1.9677902922738786e-05, + "loss": 1.237, + "step": 5980 + }, + { + "epoch": 0.09034963347310628, + "grad_norm": 0.8034110061995275, + "learning_rate": 1.9676696786050397e-05, + "loss": 1.2265, + "step": 5990 + }, + { + "epoch": 0.09050046758574919, + "grad_norm": 0.7492800972224236, + "learning_rate": 1.967548843241205e-05, + "loss": 1.2488, + "step": 6000 + }, + { + "epoch": 0.09065130169839211, + "grad_norm": 0.7317624866045888, + "learning_rate": 1.9674277862100575e-05, + "loss": 1.2303, + "step": 6010 + }, + { + "epoch": 0.09080213581103502, + "grad_norm": 0.7170166889086934, + "learning_rate": 1.967306507539332e-05, + "loss": 1.2524, + "step": 6020 + }, + { + "epoch": 0.09095296992367793, + "grad_norm": 0.7307826821956893, + "learning_rate": 1.9671850072568135e-05, + "loss": 1.2623, + "step": 6030 + }, + { + "epoch": 0.09110380403632086, + "grad_norm": 0.7468186012359245, + "learning_rate": 1.967063285390338e-05, + "loss": 1.2312, + "step": 6040 + }, + { + "epoch": 0.09125463814896377, + "grad_norm": 0.7526938997568844, + "learning_rate": 1.9669413419677924e-05, + "loss": 1.2656, + "step": 6050 + }, + { + "epoch": 0.09140547226160668, + "grad_norm": 0.8050303600696603, + "learning_rate": 1.9668191770171136e-05, + "loss": 1.2489, + "step": 6060 + }, + { + "epoch": 0.0915563063742496, + "grad_norm": 0.7936172949673618, + "learning_rate": 1.9666967905662904e-05, + "loss": 1.2556, + "step": 6070 + }, + { + "epoch": 0.09170714048689252, + "grad_norm": 0.8201501074593475, + "learning_rate": 1.9665741826433617e-05, + "loss": 1.2512, + "step": 6080 + }, + { + "epoch": 0.09185797459953543, + "grad_norm": 0.8750926822760868, + "learning_rate": 1.9664513532764168e-05, + "loss": 1.237, + "step": 6090 + }, + { + "epoch": 0.09200880871217834, + "grad_norm": 0.8141411854076961, + "learning_rate": 1.9663283024935968e-05, + "loss": 1.2313, + "step": 6100 + }, + { + "epoch": 0.09215964282482127, + "grad_norm": 0.7750760989282494, + "learning_rate": 1.9662050303230923e-05, + "loss": 1.2509, + "step": 6110 + }, + { + "epoch": 0.09231047693746418, + "grad_norm": 0.8126505153104664, + "learning_rate": 1.9660815367931452e-05, + "loss": 1.2441, + "step": 6120 + }, + { + "epoch": 0.09246131105010709, + "grad_norm": 0.7722439509932224, + "learning_rate": 1.9659578219320486e-05, + "loss": 1.2378, + "step": 6130 + }, + { + "epoch": 0.09261214516275, + "grad_norm": 0.7536865678415894, + "learning_rate": 1.9658338857681458e-05, + "loss": 1.2399, + "step": 6140 + }, + { + "epoch": 0.09276297927539293, + "grad_norm": 0.7452935556950061, + "learning_rate": 1.9657097283298298e-05, + "loss": 1.2427, + "step": 6150 + }, + { + "epoch": 0.09291381338803584, + "grad_norm": 0.7587858478599233, + "learning_rate": 1.9655853496455467e-05, + "loss": 1.2346, + "step": 6160 + }, + { + "epoch": 0.09306464750067875, + "grad_norm": 0.7822985647716725, + "learning_rate": 1.965460749743791e-05, + "loss": 1.2338, + "step": 6170 + }, + { + "epoch": 0.09321548161332167, + "grad_norm": 0.7489011754748169, + "learning_rate": 1.965335928653109e-05, + "loss": 1.2303, + "step": 6180 + }, + { + "epoch": 0.09336631572596459, + "grad_norm": 0.7791039637063968, + "learning_rate": 1.9652108864020972e-05, + "loss": 1.2182, + "step": 6190 + }, + { + "epoch": 0.0935171498386075, + "grad_norm": 0.7516681637964054, + "learning_rate": 1.965085623019404e-05, + "loss": 1.2417, + "step": 6200 + }, + { + "epoch": 0.09366798395125041, + "grad_norm": 0.7147065339755855, + "learning_rate": 1.964960138533726e-05, + "loss": 1.2416, + "step": 6210 + }, + { + "epoch": 0.09381881806389333, + "grad_norm": 0.7883417006297797, + "learning_rate": 1.9648344329738132e-05, + "loss": 1.248, + "step": 6220 + }, + { + "epoch": 0.09396965217653624, + "grad_norm": 0.7357518364208504, + "learning_rate": 1.9647085063684646e-05, + "loss": 1.2345, + "step": 6230 + }, + { + "epoch": 0.09412048628917916, + "grad_norm": 0.832996843178576, + "learning_rate": 1.9645823587465298e-05, + "loss": 1.228, + "step": 6240 + }, + { + "epoch": 0.09427132040182208, + "grad_norm": 0.777461029311949, + "learning_rate": 1.96445599013691e-05, + "loss": 1.2134, + "step": 6250 + }, + { + "epoch": 0.09442215451446499, + "grad_norm": 0.7846622189436951, + "learning_rate": 1.9643294005685562e-05, + "loss": 1.2533, + "step": 6260 + }, + { + "epoch": 0.0945729886271079, + "grad_norm": 0.7405252247399952, + "learning_rate": 1.9642025900704703e-05, + "loss": 1.234, + "step": 6270 + }, + { + "epoch": 0.09472382273975082, + "grad_norm": 0.7410394894071384, + "learning_rate": 1.9640755586717053e-05, + "loss": 1.2531, + "step": 6280 + }, + { + "epoch": 0.09487465685239374, + "grad_norm": 0.7585612995281882, + "learning_rate": 1.9639483064013638e-05, + "loss": 1.2542, + "step": 6290 + }, + { + "epoch": 0.09502549096503665, + "grad_norm": 0.7898213206619347, + "learning_rate": 1.9638208332885998e-05, + "loss": 1.2384, + "step": 6300 + }, + { + "epoch": 0.09517632507767956, + "grad_norm": 0.7580534777910124, + "learning_rate": 1.963693139362617e-05, + "loss": 1.2328, + "step": 6310 + }, + { + "epoch": 0.09532715919032249, + "grad_norm": 0.7960063951793525, + "learning_rate": 1.9635652246526718e-05, + "loss": 1.2513, + "step": 6320 + }, + { + "epoch": 0.0954779933029654, + "grad_norm": 0.8042307307778556, + "learning_rate": 1.963437089188068e-05, + "loss": 1.2332, + "step": 6330 + }, + { + "epoch": 0.09562882741560831, + "grad_norm": 0.7422963500288239, + "learning_rate": 1.963308732998163e-05, + "loss": 1.245, + "step": 6340 + }, + { + "epoch": 0.09577966152825122, + "grad_norm": 0.7523300464364536, + "learning_rate": 1.9631801561123627e-05, + "loss": 1.2318, + "step": 6350 + }, + { + "epoch": 0.09593049564089415, + "grad_norm": 0.7501125890391073, + "learning_rate": 1.9630513585601246e-05, + "loss": 1.2319, + "step": 6360 + }, + { + "epoch": 0.09608132975353706, + "grad_norm": 0.7308970378523061, + "learning_rate": 1.962922340370956e-05, + "loss": 1.2155, + "step": 6370 + }, + { + "epoch": 0.09623216386617997, + "grad_norm": 0.7664047363779317, + "learning_rate": 1.962793101574416e-05, + "loss": 1.2445, + "step": 6380 + }, + { + "epoch": 0.0963829979788229, + "grad_norm": 0.7431124939154418, + "learning_rate": 1.9626636422001133e-05, + "loss": 1.2436, + "step": 6390 + }, + { + "epoch": 0.09653383209146581, + "grad_norm": 0.7570273381014454, + "learning_rate": 1.962533962277707e-05, + "loss": 1.2186, + "step": 6400 + }, + { + "epoch": 0.09668466620410872, + "grad_norm": 0.7383702173480221, + "learning_rate": 1.9624040618369068e-05, + "loss": 1.2215, + "step": 6410 + }, + { + "epoch": 0.09683550031675163, + "grad_norm": 0.774150405810787, + "learning_rate": 1.9622739409074736e-05, + "loss": 1.2548, + "step": 6420 + }, + { + "epoch": 0.09698633442939456, + "grad_norm": 0.7470507037050833, + "learning_rate": 1.9621435995192183e-05, + "loss": 1.2429, + "step": 6430 + }, + { + "epoch": 0.09713716854203747, + "grad_norm": 0.7262468070222363, + "learning_rate": 1.962013037702002e-05, + "loss": 1.2378, + "step": 6440 + }, + { + "epoch": 0.09728800265468038, + "grad_norm": 0.7681311373469678, + "learning_rate": 1.961882255485737e-05, + "loss": 1.2333, + "step": 6450 + }, + { + "epoch": 0.0974388367673233, + "grad_norm": 0.7833584929637141, + "learning_rate": 1.9617512529003862e-05, + "loss": 1.2162, + "step": 6460 + }, + { + "epoch": 0.09758967087996621, + "grad_norm": 0.7562926053415595, + "learning_rate": 1.9616200299759614e-05, + "loss": 1.2209, + "step": 6470 + }, + { + "epoch": 0.09774050499260913, + "grad_norm": 0.7697504258725043, + "learning_rate": 1.961488586742527e-05, + "loss": 1.2537, + "step": 6480 + }, + { + "epoch": 0.09789133910525204, + "grad_norm": 0.7080352591634195, + "learning_rate": 1.9613569232301964e-05, + "loss": 1.2286, + "step": 6490 + }, + { + "epoch": 0.09804217321789496, + "grad_norm": 0.7546592466286919, + "learning_rate": 1.961225039469134e-05, + "loss": 1.2346, + "step": 6500 + }, + { + "epoch": 0.09819300733053787, + "grad_norm": 0.7403979747530846, + "learning_rate": 1.9610929354895548e-05, + "loss": 1.2317, + "step": 6510 + }, + { + "epoch": 0.09834384144318079, + "grad_norm": 0.7774054497893574, + "learning_rate": 1.9609606113217242e-05, + "loss": 1.2168, + "step": 6520 + }, + { + "epoch": 0.09849467555582371, + "grad_norm": 0.7472467782606125, + "learning_rate": 1.9608280669959577e-05, + "loss": 1.2556, + "step": 6530 + }, + { + "epoch": 0.09864550966846662, + "grad_norm": 0.7804243431238923, + "learning_rate": 1.9606953025426213e-05, + "loss": 1.2216, + "step": 6540 + }, + { + "epoch": 0.09879634378110953, + "grad_norm": 2.8941021293002875, + "learning_rate": 1.960562317992132e-05, + "loss": 1.2718, + "step": 6550 + }, + { + "epoch": 0.09894717789375244, + "grad_norm": 0.8027900282913074, + "learning_rate": 1.9604291133749565e-05, + "loss": 1.2309, + "step": 6560 + }, + { + "epoch": 0.09909801200639537, + "grad_norm": 0.800751933359967, + "learning_rate": 1.9602956887216124e-05, + "loss": 1.2373, + "step": 6570 + }, + { + "epoch": 0.09924884611903828, + "grad_norm": 0.7292653823416209, + "learning_rate": 1.9601620440626675e-05, + "loss": 1.237, + "step": 6580 + }, + { + "epoch": 0.09939968023168119, + "grad_norm": 0.7720239685578059, + "learning_rate": 1.9600281794287403e-05, + "loss": 1.2207, + "step": 6590 + }, + { + "epoch": 0.09955051434432412, + "grad_norm": 0.7376647361201979, + "learning_rate": 1.959894094850499e-05, + "loss": 1.2527, + "step": 6600 + }, + { + "epoch": 0.09970134845696703, + "grad_norm": 0.764128044493117, + "learning_rate": 1.959759790358663e-05, + "loss": 1.2362, + "step": 6610 + }, + { + "epoch": 0.09985218256960994, + "grad_norm": 0.7546302203321884, + "learning_rate": 1.959625265984001e-05, + "loss": 1.2387, + "step": 6620 + }, + { + "epoch": 0.10000301668225285, + "grad_norm": 0.7339118278594167, + "learning_rate": 1.9594905217573336e-05, + "loss": 1.233, + "step": 6630 + }, + { + "epoch": 0.10015385079489578, + "grad_norm": 0.7140125621490842, + "learning_rate": 1.9593555577095312e-05, + "loss": 1.2322, + "step": 6640 + }, + { + "epoch": 0.10030468490753869, + "grad_norm": 0.7967958944788253, + "learning_rate": 1.9592203738715133e-05, + "loss": 1.2387, + "step": 6650 + }, + { + "epoch": 0.1004555190201816, + "grad_norm": 0.7693302245652374, + "learning_rate": 1.9590849702742514e-05, + "loss": 1.2299, + "step": 6660 + }, + { + "epoch": 0.10060635313282452, + "grad_norm": 0.7495600359647175, + "learning_rate": 1.958949346948767e-05, + "loss": 1.2386, + "step": 6670 + }, + { + "epoch": 0.10075718724546744, + "grad_norm": 0.7588352850164636, + "learning_rate": 1.9588135039261307e-05, + "loss": 1.2192, + "step": 6680 + }, + { + "epoch": 0.10090802135811035, + "grad_norm": 0.7419090458364673, + "learning_rate": 1.9586774412374656e-05, + "loss": 1.2417, + "step": 6690 + }, + { + "epoch": 0.10105885547075326, + "grad_norm": 0.735756218013272, + "learning_rate": 1.958541158913943e-05, + "loss": 1.2182, + "step": 6700 + }, + { + "epoch": 0.10120968958339618, + "grad_norm": 0.7640037420401615, + "learning_rate": 1.9584046569867856e-05, + "loss": 1.2106, + "step": 6710 + }, + { + "epoch": 0.1013605236960391, + "grad_norm": 0.7867628582186671, + "learning_rate": 1.9582679354872667e-05, + "loss": 1.2357, + "step": 6720 + }, + { + "epoch": 0.101511357808682, + "grad_norm": 0.7837935392407589, + "learning_rate": 1.9581309944467093e-05, + "loss": 1.2332, + "step": 6730 + }, + { + "epoch": 0.10166219192132493, + "grad_norm": 0.7874094755290112, + "learning_rate": 1.9579938338964865e-05, + "loss": 1.2387, + "step": 6740 + }, + { + "epoch": 0.10181302603396784, + "grad_norm": 0.7471762613337166, + "learning_rate": 1.9578564538680226e-05, + "loss": 1.2067, + "step": 6750 + }, + { + "epoch": 0.10196386014661075, + "grad_norm": 0.7220700391238617, + "learning_rate": 1.9577188543927906e-05, + "loss": 1.2293, + "step": 6760 + }, + { + "epoch": 0.10211469425925367, + "grad_norm": 0.7564750823509033, + "learning_rate": 1.9575810355023164e-05, + "loss": 1.231, + "step": 6770 + }, + { + "epoch": 0.10226552837189659, + "grad_norm": 0.7166513168350389, + "learning_rate": 1.9574429972281733e-05, + "loss": 1.2391, + "step": 6780 + }, + { + "epoch": 0.1024163624845395, + "grad_norm": 0.7204153936096243, + "learning_rate": 1.9573047396019865e-05, + "loss": 1.2297, + "step": 6790 + }, + { + "epoch": 0.10256719659718241, + "grad_norm": 0.7459865435621108, + "learning_rate": 1.9571662626554314e-05, + "loss": 1.2225, + "step": 6800 + }, + { + "epoch": 0.10271803070982534, + "grad_norm": 0.7282758171200276, + "learning_rate": 1.957027566420233e-05, + "loss": 1.2177, + "step": 6810 + }, + { + "epoch": 0.10286886482246825, + "grad_norm": 0.7169556717911534, + "learning_rate": 1.956888650928167e-05, + "loss": 1.2072, + "step": 6820 + }, + { + "epoch": 0.10301969893511116, + "grad_norm": 0.7905624447817354, + "learning_rate": 1.9567495162110588e-05, + "loss": 1.2074, + "step": 6830 + }, + { + "epoch": 0.10317053304775407, + "grad_norm": 0.7664126023592177, + "learning_rate": 1.956610162300785e-05, + "loss": 1.2319, + "step": 6840 + }, + { + "epoch": 0.103321367160397, + "grad_norm": 0.7386490414424692, + "learning_rate": 1.9564705892292716e-05, + "loss": 1.2105, + "step": 6850 + }, + { + "epoch": 0.10347220127303991, + "grad_norm": 0.7671349807075807, + "learning_rate": 1.956330797028495e-05, + "loss": 1.2285, + "step": 6860 + }, + { + "epoch": 0.10362303538568282, + "grad_norm": 0.7632622262597346, + "learning_rate": 1.956190785730482e-05, + "loss": 1.2336, + "step": 6870 + }, + { + "epoch": 0.10377386949832575, + "grad_norm": 0.7247947552762211, + "learning_rate": 1.95605055536731e-05, + "loss": 1.2325, + "step": 6880 + }, + { + "epoch": 0.10392470361096866, + "grad_norm": 0.7393027375724707, + "learning_rate": 1.955910105971105e-05, + "loss": 1.2088, + "step": 6890 + }, + { + "epoch": 0.10407553772361157, + "grad_norm": 0.7681524793852269, + "learning_rate": 1.9557694375740445e-05, + "loss": 1.2286, + "step": 6900 + }, + { + "epoch": 0.10422637183625448, + "grad_norm": 0.7081417590076156, + "learning_rate": 1.955628550208356e-05, + "loss": 1.2126, + "step": 6910 + }, + { + "epoch": 0.1043772059488974, + "grad_norm": 0.8240623147386081, + "learning_rate": 1.9554874439063174e-05, + "loss": 1.2393, + "step": 6920 + }, + { + "epoch": 0.10452804006154032, + "grad_norm": 0.682071209900918, + "learning_rate": 1.9553461187002566e-05, + "loss": 1.2186, + "step": 6930 + }, + { + "epoch": 0.10467887417418323, + "grad_norm": 0.7649321659082594, + "learning_rate": 1.9552045746225505e-05, + "loss": 1.2193, + "step": 6940 + }, + { + "epoch": 0.10482970828682615, + "grad_norm": 0.795383197256307, + "learning_rate": 1.955062811705628e-05, + "loss": 1.2332, + "step": 6950 + }, + { + "epoch": 0.10498054239946907, + "grad_norm": 0.7717981046586359, + "learning_rate": 1.954920829981967e-05, + "loss": 1.2162, + "step": 6960 + }, + { + "epoch": 0.10513137651211198, + "grad_norm": 0.7681459679584168, + "learning_rate": 1.9547786294840952e-05, + "loss": 1.2471, + "step": 6970 + }, + { + "epoch": 0.10528221062475489, + "grad_norm": 0.7909380122142646, + "learning_rate": 1.954636210244592e-05, + "loss": 1.2215, + "step": 6980 + }, + { + "epoch": 0.10543304473739781, + "grad_norm": 0.814093590412838, + "learning_rate": 1.9544935722960858e-05, + "loss": 1.2227, + "step": 6990 + }, + { + "epoch": 0.10558387885004072, + "grad_norm": 0.7296491555711381, + "learning_rate": 1.9543507156712543e-05, + "loss": 1.1995, + "step": 7000 + }, + { + "epoch": 0.10573471296268364, + "grad_norm": 0.7690035225114272, + "learning_rate": 1.954207640402827e-05, + "loss": 1.21, + "step": 7010 + }, + { + "epoch": 0.10588554707532656, + "grad_norm": 0.7091794483845264, + "learning_rate": 1.9540643465235826e-05, + "loss": 1.2129, + "step": 7020 + }, + { + "epoch": 0.10603638118796947, + "grad_norm": 0.7847330982428027, + "learning_rate": 1.95392083406635e-05, + "loss": 1.1965, + "step": 7030 + }, + { + "epoch": 0.10618721530061238, + "grad_norm": 0.7617122664674935, + "learning_rate": 1.953777103064008e-05, + "loss": 1.2241, + "step": 7040 + }, + { + "epoch": 0.1063380494132553, + "grad_norm": 0.7825021098237156, + "learning_rate": 1.953633153549486e-05, + "loss": 1.2111, + "step": 7050 + }, + { + "epoch": 0.10648888352589822, + "grad_norm": 8.248535652701005, + "learning_rate": 1.9534889855557627e-05, + "loss": 1.2352, + "step": 7060 + }, + { + "epoch": 0.10663971763854113, + "grad_norm": 0.7575441248854972, + "learning_rate": 1.9533445991158678e-05, + "loss": 1.2129, + "step": 7070 + }, + { + "epoch": 0.10679055175118404, + "grad_norm": 0.8102568698558764, + "learning_rate": 1.95319999426288e-05, + "loss": 1.2142, + "step": 7080 + }, + { + "epoch": 0.10694138586382697, + "grad_norm": 0.7317768277175766, + "learning_rate": 1.9530551710299283e-05, + "loss": 1.2339, + "step": 7090 + }, + { + "epoch": 0.10709221997646988, + "grad_norm": 0.7640298196886105, + "learning_rate": 1.952910129450193e-05, + "loss": 1.2068, + "step": 7100 + }, + { + "epoch": 0.10724305408911279, + "grad_norm": 0.7119560783923763, + "learning_rate": 1.9527648695569023e-05, + "loss": 1.198, + "step": 7110 + }, + { + "epoch": 0.1073938882017557, + "grad_norm": 0.7386252677983153, + "learning_rate": 1.9526193913833364e-05, + "loss": 1.2082, + "step": 7120 + }, + { + "epoch": 0.10754472231439863, + "grad_norm": 0.7737018931772764, + "learning_rate": 1.9524736949628246e-05, + "loss": 1.2222, + "step": 7130 + }, + { + "epoch": 0.10769555642704154, + "grad_norm": 0.7591237374310973, + "learning_rate": 1.9523277803287453e-05, + "loss": 1.2313, + "step": 7140 + }, + { + "epoch": 0.10784639053968445, + "grad_norm": 0.8049359407587588, + "learning_rate": 1.9521816475145287e-05, + "loss": 1.2296, + "step": 7150 + }, + { + "epoch": 0.10799722465232738, + "grad_norm": 0.7597724059075098, + "learning_rate": 1.952035296553654e-05, + "loss": 1.2242, + "step": 7160 + }, + { + "epoch": 0.10814805876497029, + "grad_norm": 0.7687974304291413, + "learning_rate": 1.95188872747965e-05, + "loss": 1.2121, + "step": 7170 + }, + { + "epoch": 0.1082988928776132, + "grad_norm": 0.736000928033096, + "learning_rate": 1.9517419403260964e-05, + "loss": 1.2217, + "step": 7180 + }, + { + "epoch": 0.10844972699025611, + "grad_norm": 0.704913979090375, + "learning_rate": 1.9515949351266225e-05, + "loss": 1.2131, + "step": 7190 + }, + { + "epoch": 0.10860056110289903, + "grad_norm": 0.765101781312247, + "learning_rate": 1.951447711914907e-05, + "loss": 1.221, + "step": 7200 + }, + { + "epoch": 0.10875139521554195, + "grad_norm": 0.8034100228768958, + "learning_rate": 1.9513002707246795e-05, + "loss": 1.2172, + "step": 7210 + }, + { + "epoch": 0.10890222932818486, + "grad_norm": 0.7176005173036375, + "learning_rate": 1.951152611589719e-05, + "loss": 1.217, + "step": 7220 + }, + { + "epoch": 0.10905306344082778, + "grad_norm": 0.7373200463322026, + "learning_rate": 1.951004734543854e-05, + "loss": 1.2066, + "step": 7230 + }, + { + "epoch": 0.1092038975534707, + "grad_norm": 0.7291607052401243, + "learning_rate": 1.9508566396209644e-05, + "loss": 1.2128, + "step": 7240 + }, + { + "epoch": 0.1093547316661136, + "grad_norm": 0.8960272693056918, + "learning_rate": 1.950708326854978e-05, + "loss": 1.2398, + "step": 7250 + }, + { + "epoch": 0.10950556577875652, + "grad_norm": 0.7107960910774234, + "learning_rate": 1.950559796279874e-05, + "loss": 1.22, + "step": 7260 + }, + { + "epoch": 0.10965639989139944, + "grad_norm": 0.7421529058491391, + "learning_rate": 1.9504110479296808e-05, + "loss": 1.2134, + "step": 7270 + }, + { + "epoch": 0.10980723400404235, + "grad_norm": 0.7974197406640986, + "learning_rate": 1.9502620818384775e-05, + "loss": 1.1915, + "step": 7280 + }, + { + "epoch": 0.10995806811668526, + "grad_norm": 0.7802318421042287, + "learning_rate": 1.9501128980403918e-05, + "loss": 1.2068, + "step": 7290 + }, + { + "epoch": 0.11010890222932819, + "grad_norm": 0.7246873235990974, + "learning_rate": 1.9499634965696024e-05, + "loss": 1.2016, + "step": 7300 + }, + { + "epoch": 0.1102597363419711, + "grad_norm": 0.7038327548550344, + "learning_rate": 1.9498138774603376e-05, + "loss": 1.2081, + "step": 7310 + }, + { + "epoch": 0.11041057045461401, + "grad_norm": 0.812066147879098, + "learning_rate": 1.9496640407468752e-05, + "loss": 1.2268, + "step": 7320 + }, + { + "epoch": 0.11056140456725692, + "grad_norm": 0.7216494609487655, + "learning_rate": 1.949513986463543e-05, + "loss": 1.2124, + "step": 7330 + }, + { + "epoch": 0.11071223867989985, + "grad_norm": 0.7467518628540686, + "learning_rate": 1.949363714644719e-05, + "loss": 1.1895, + "step": 7340 + }, + { + "epoch": 0.11086307279254276, + "grad_norm": 0.7553740500571435, + "learning_rate": 1.949213225324831e-05, + "loss": 1.2285, + "step": 7350 + }, + { + "epoch": 0.11101390690518567, + "grad_norm": 0.7914311004934085, + "learning_rate": 1.9490625185383553e-05, + "loss": 1.2153, + "step": 7360 + }, + { + "epoch": 0.1111647410178286, + "grad_norm": 0.730247050744581, + "learning_rate": 1.94891159431982e-05, + "loss": 1.2092, + "step": 7370 + }, + { + "epoch": 0.11131557513047151, + "grad_norm": 0.7629624201398167, + "learning_rate": 1.9487604527038022e-05, + "loss": 1.2075, + "step": 7380 + }, + { + "epoch": 0.11146640924311442, + "grad_norm": 0.7050596744777028, + "learning_rate": 1.9486090937249284e-05, + "loss": 1.208, + "step": 7390 + }, + { + "epoch": 0.11161724335575733, + "grad_norm": 0.7480920373236665, + "learning_rate": 1.9484575174178754e-05, + "loss": 1.2114, + "step": 7400 + }, + { + "epoch": 0.11176807746840026, + "grad_norm": 0.7545994718579797, + "learning_rate": 1.9483057238173693e-05, + "loss": 1.1905, + "step": 7410 + }, + { + "epoch": 0.11191891158104317, + "grad_norm": 0.7348457919647262, + "learning_rate": 1.948153712958187e-05, + "loss": 1.208, + "step": 7420 + }, + { + "epoch": 0.11206974569368608, + "grad_norm": 0.7709948189366598, + "learning_rate": 1.9480014848751535e-05, + "loss": 1.1993, + "step": 7430 + }, + { + "epoch": 0.112220579806329, + "grad_norm": 0.7189913282691793, + "learning_rate": 1.9478490396031457e-05, + "loss": 1.208, + "step": 7440 + }, + { + "epoch": 0.11237141391897192, + "grad_norm": 0.7327793272021237, + "learning_rate": 1.947696377177088e-05, + "loss": 1.197, + "step": 7450 + }, + { + "epoch": 0.11252224803161483, + "grad_norm": 0.7659132345915707, + "learning_rate": 1.9475434976319563e-05, + "loss": 1.2142, + "step": 7460 + }, + { + "epoch": 0.11267308214425774, + "grad_norm": 0.7870141733627148, + "learning_rate": 1.9473904010027756e-05, + "loss": 1.2008, + "step": 7470 + }, + { + "epoch": 0.11282391625690066, + "grad_norm": 0.7929437238352646, + "learning_rate": 1.9472370873246206e-05, + "loss": 1.2156, + "step": 7480 + }, + { + "epoch": 0.11297475036954358, + "grad_norm": 0.7510689278540121, + "learning_rate": 1.9470835566326153e-05, + "loss": 1.203, + "step": 7490 + }, + { + "epoch": 0.11312558448218649, + "grad_norm": 0.7416415548059662, + "learning_rate": 1.9469298089619343e-05, + "loss": 1.204, + "step": 7500 + }, + { + "epoch": 0.11327641859482941, + "grad_norm": 0.759885784008553, + "learning_rate": 1.9467758443478017e-05, + "loss": 1.2047, + "step": 7510 + }, + { + "epoch": 0.11342725270747232, + "grad_norm": 0.744625944969179, + "learning_rate": 1.9466216628254905e-05, + "loss": 1.2039, + "step": 7520 + }, + { + "epoch": 0.11357808682011523, + "grad_norm": 0.7037329383064528, + "learning_rate": 1.9464672644303245e-05, + "loss": 1.2135, + "step": 7530 + }, + { + "epoch": 0.11372892093275815, + "grad_norm": 0.7173742656240258, + "learning_rate": 1.9463126491976764e-05, + "loss": 1.213, + "step": 7540 + }, + { + "epoch": 0.11387975504540107, + "grad_norm": 0.7591534065838464, + "learning_rate": 1.946157817162969e-05, + "loss": 1.1955, + "step": 7550 + }, + { + "epoch": 0.11403058915804398, + "grad_norm": 0.7359962928973889, + "learning_rate": 1.9460027683616745e-05, + "loss": 1.2284, + "step": 7560 + }, + { + "epoch": 0.1141814232706869, + "grad_norm": 0.771995139972905, + "learning_rate": 1.9458475028293145e-05, + "loss": 1.2045, + "step": 7570 + }, + { + "epoch": 0.11433225738332982, + "grad_norm": 0.7191654318379005, + "learning_rate": 1.9456920206014616e-05, + "loss": 1.2181, + "step": 7580 + }, + { + "epoch": 0.11448309149597273, + "grad_norm": 0.7611950982847123, + "learning_rate": 1.9455363217137362e-05, + "loss": 1.2127, + "step": 7590 + }, + { + "epoch": 0.11463392560861564, + "grad_norm": 0.7043974136546757, + "learning_rate": 1.9453804062018097e-05, + "loss": 1.1959, + "step": 7600 + }, + { + "epoch": 0.11478475972125855, + "grad_norm": 0.7570972405822234, + "learning_rate": 1.9452242741014026e-05, + "loss": 1.1977, + "step": 7610 + }, + { + "epoch": 0.11493559383390148, + "grad_norm": 0.7876839048634755, + "learning_rate": 1.9450679254482846e-05, + "loss": 1.2032, + "step": 7620 + }, + { + "epoch": 0.11508642794654439, + "grad_norm": 0.6947963803227324, + "learning_rate": 1.9449113602782762e-05, + "loss": 1.1936, + "step": 7630 + }, + { + "epoch": 0.1152372620591873, + "grad_norm": 0.7332224324319729, + "learning_rate": 1.9447545786272465e-05, + "loss": 1.2097, + "step": 7640 + }, + { + "epoch": 0.11538809617183023, + "grad_norm": 0.7538319971438081, + "learning_rate": 1.944597580531114e-05, + "loss": 1.2163, + "step": 7650 + }, + { + "epoch": 0.11553893028447314, + "grad_norm": 0.717747148291956, + "learning_rate": 1.944440366025848e-05, + "loss": 1.1972, + "step": 7660 + }, + { + "epoch": 0.11568976439711605, + "grad_norm": 0.7861773708636716, + "learning_rate": 1.944282935147466e-05, + "loss": 1.2069, + "step": 7670 + }, + { + "epoch": 0.11584059850975896, + "grad_norm": 0.7856396331506292, + "learning_rate": 1.9441252879320364e-05, + "loss": 1.1984, + "step": 7680 + }, + { + "epoch": 0.11599143262240189, + "grad_norm": 0.7401370001259309, + "learning_rate": 1.9439674244156758e-05, + "loss": 1.1922, + "step": 7690 + }, + { + "epoch": 0.1161422667350448, + "grad_norm": 0.7314742717324364, + "learning_rate": 1.9438093446345516e-05, + "loss": 1.1833, + "step": 7700 + }, + { + "epoch": 0.11629310084768771, + "grad_norm": 0.7385760868042651, + "learning_rate": 1.9436510486248798e-05, + "loss": 1.2107, + "step": 7710 + }, + { + "epoch": 0.11644393496033063, + "grad_norm": 0.7326755508277807, + "learning_rate": 1.9434925364229267e-05, + "loss": 1.2085, + "step": 7720 + }, + { + "epoch": 0.11659476907297354, + "grad_norm": 0.7427494101998136, + "learning_rate": 1.9433338080650072e-05, + "loss": 1.1989, + "step": 7730 + }, + { + "epoch": 0.11674560318561646, + "grad_norm": 0.7591084036983301, + "learning_rate": 1.9431748635874867e-05, + "loss": 1.2035, + "step": 7740 + }, + { + "epoch": 0.11689643729825937, + "grad_norm": 0.7756892843479689, + "learning_rate": 1.9430157030267795e-05, + "loss": 1.2072, + "step": 7750 + }, + { + "epoch": 0.11704727141090229, + "grad_norm": 0.7627122736639593, + "learning_rate": 1.9428563264193497e-05, + "loss": 1.2176, + "step": 7760 + }, + { + "epoch": 0.1171981055235452, + "grad_norm": 0.7984386998580789, + "learning_rate": 1.942696733801711e-05, + "loss": 1.1856, + "step": 7770 + }, + { + "epoch": 0.11734893963618812, + "grad_norm": 0.7267896040122308, + "learning_rate": 1.9425369252104256e-05, + "loss": 1.1944, + "step": 7780 + }, + { + "epoch": 0.11749977374883104, + "grad_norm": 0.7639450112924009, + "learning_rate": 1.9423769006821072e-05, + "loss": 1.2237, + "step": 7790 + }, + { + "epoch": 0.11765060786147395, + "grad_norm": 0.8098261571528365, + "learning_rate": 1.9422166602534165e-05, + "loss": 1.201, + "step": 7800 + }, + { + "epoch": 0.11780144197411686, + "grad_norm": 0.8154359296439536, + "learning_rate": 1.9420562039610657e-05, + "loss": 1.2239, + "step": 7810 + }, + { + "epoch": 0.11795227608675977, + "grad_norm": 0.7525829028991068, + "learning_rate": 1.941895531841815e-05, + "loss": 1.194, + "step": 7820 + }, + { + "epoch": 0.1181031101994027, + "grad_norm": 0.7523935956816882, + "learning_rate": 1.9417346439324755e-05, + "loss": 1.2031, + "step": 7830 + }, + { + "epoch": 0.11825394431204561, + "grad_norm": 0.7422331532613807, + "learning_rate": 1.9415735402699057e-05, + "loss": 1.2081, + "step": 7840 + }, + { + "epoch": 0.11840477842468852, + "grad_norm": 0.7282692104329122, + "learning_rate": 1.9414122208910164e-05, + "loss": 1.2003, + "step": 7850 + }, + { + "epoch": 0.11855561253733145, + "grad_norm": 0.7553039661108605, + "learning_rate": 1.941250685832765e-05, + "loss": 1.1957, + "step": 7860 + }, + { + "epoch": 0.11870644664997436, + "grad_norm": 0.8311473333622663, + "learning_rate": 1.94108893513216e-05, + "loss": 1.2107, + "step": 7870 + }, + { + "epoch": 0.11885728076261727, + "grad_norm": 0.6992415040076897, + "learning_rate": 1.9409269688262584e-05, + "loss": 1.2167, + "step": 7880 + }, + { + "epoch": 0.11900811487526018, + "grad_norm": 0.7124305994581099, + "learning_rate": 1.9407647869521674e-05, + "loss": 1.2176, + "step": 7890 + }, + { + "epoch": 0.11915894898790311, + "grad_norm": 0.7416210772802558, + "learning_rate": 1.9406023895470424e-05, + "loss": 1.1957, + "step": 7900 + }, + { + "epoch": 0.11930978310054602, + "grad_norm": 0.8097329014280207, + "learning_rate": 1.9404397766480902e-05, + "loss": 1.2022, + "step": 7910 + }, + { + "epoch": 0.11946061721318893, + "grad_norm": 0.7967743903092286, + "learning_rate": 1.940276948292565e-05, + "loss": 1.216, + "step": 7920 + }, + { + "epoch": 0.11961145132583186, + "grad_norm": 0.7433570295036105, + "learning_rate": 1.940113904517771e-05, + "loss": 1.1917, + "step": 7930 + }, + { + "epoch": 0.11976228543847477, + "grad_norm": 0.7488819696544597, + "learning_rate": 1.9399506453610622e-05, + "loss": 1.1801, + "step": 7940 + }, + { + "epoch": 0.11991311955111768, + "grad_norm": 0.776112918392525, + "learning_rate": 1.9397871708598412e-05, + "loss": 1.2125, + "step": 7950 + }, + { + "epoch": 0.12006395366376059, + "grad_norm": 0.7148172015361044, + "learning_rate": 1.9396234810515607e-05, + "loss": 1.2082, + "step": 7960 + }, + { + "epoch": 0.12021478777640351, + "grad_norm": 0.7071917753025434, + "learning_rate": 1.939459575973722e-05, + "loss": 1.2001, + "step": 7970 + }, + { + "epoch": 0.12036562188904643, + "grad_norm": 0.7370062688547964, + "learning_rate": 1.9392954556638767e-05, + "loss": 1.1941, + "step": 7980 + }, + { + "epoch": 0.12051645600168934, + "grad_norm": 0.8055704998090703, + "learning_rate": 1.9391311201596242e-05, + "loss": 1.1882, + "step": 7990 + }, + { + "epoch": 0.12066729011433226, + "grad_norm": 0.7281895891066112, + "learning_rate": 1.9389665694986148e-05, + "loss": 1.2074, + "step": 8000 + }, + { + "epoch": 0.12081812422697517, + "grad_norm": 0.7226016227593254, + "learning_rate": 1.938801803718547e-05, + "loss": 1.1822, + "step": 8010 + }, + { + "epoch": 0.12096895833961809, + "grad_norm": 0.7741743135706858, + "learning_rate": 1.938636822857169e-05, + "loss": 1.1877, + "step": 8020 + }, + { + "epoch": 0.121119792452261, + "grad_norm": 0.7181968439179593, + "learning_rate": 1.938471626952278e-05, + "loss": 1.1897, + "step": 8030 + }, + { + "epoch": 0.12127062656490392, + "grad_norm": 0.7143430884582934, + "learning_rate": 1.9383062160417215e-05, + "loss": 1.1955, + "step": 8040 + }, + { + "epoch": 0.12142146067754683, + "grad_norm": 0.7369368328797664, + "learning_rate": 1.9381405901633945e-05, + "loss": 1.1733, + "step": 8050 + }, + { + "epoch": 0.12157229479018974, + "grad_norm": 0.7548957200815439, + "learning_rate": 1.937974749355243e-05, + "loss": 1.1999, + "step": 8060 + }, + { + "epoch": 0.12172312890283267, + "grad_norm": 0.7425730338532732, + "learning_rate": 1.937808693655261e-05, + "loss": 1.1991, + "step": 8070 + }, + { + "epoch": 0.12187396301547558, + "grad_norm": 0.7275077078686559, + "learning_rate": 1.937642423101492e-05, + "loss": 1.2102, + "step": 8080 + }, + { + "epoch": 0.12202479712811849, + "grad_norm": 0.7258362606057989, + "learning_rate": 1.9374759377320297e-05, + "loss": 1.1979, + "step": 8090 + }, + { + "epoch": 0.1221756312407614, + "grad_norm": 0.743973888205383, + "learning_rate": 1.9373092375850156e-05, + "loss": 1.2014, + "step": 8100 + }, + { + "epoch": 0.12232646535340433, + "grad_norm": 0.7818149191424436, + "learning_rate": 1.937142322698641e-05, + "loss": 1.1851, + "step": 8110 + }, + { + "epoch": 0.12247729946604724, + "grad_norm": 0.7215446531621182, + "learning_rate": 1.9369751931111465e-05, + "loss": 1.1861, + "step": 8120 + }, + { + "epoch": 0.12262813357869015, + "grad_norm": 0.720027812181535, + "learning_rate": 1.9368078488608223e-05, + "loss": 1.2126, + "step": 8130 + }, + { + "epoch": 0.12277896769133308, + "grad_norm": 0.7455154020591502, + "learning_rate": 1.936640289986006e-05, + "loss": 1.2197, + "step": 8140 + }, + { + "epoch": 0.12292980180397599, + "grad_norm": 0.7394777716661156, + "learning_rate": 1.9364725165250876e-05, + "loss": 1.1841, + "step": 8150 + }, + { + "epoch": 0.1230806359166189, + "grad_norm": 0.7299229994652486, + "learning_rate": 1.9363045285165026e-05, + "loss": 1.191, + "step": 8160 + }, + { + "epoch": 0.12323147002926181, + "grad_norm": 0.717863694632372, + "learning_rate": 1.936136325998738e-05, + "loss": 1.1893, + "step": 8170 + }, + { + "epoch": 0.12338230414190474, + "grad_norm": 0.7667695195532287, + "learning_rate": 1.9359679090103295e-05, + "loss": 1.1969, + "step": 8180 + }, + { + "epoch": 0.12353313825454765, + "grad_norm": 0.7331546802386787, + "learning_rate": 1.9357992775898617e-05, + "loss": 1.184, + "step": 8190 + }, + { + "epoch": 0.12368397236719056, + "grad_norm": 0.684914370041588, + "learning_rate": 1.935630431775968e-05, + "loss": 1.1778, + "step": 8200 + }, + { + "epoch": 0.12383480647983348, + "grad_norm": 0.7494376522803937, + "learning_rate": 1.9354613716073324e-05, + "loss": 1.1801, + "step": 8210 + }, + { + "epoch": 0.1239856405924764, + "grad_norm": 0.7158868916534766, + "learning_rate": 1.9352920971226853e-05, + "loss": 1.1888, + "step": 8220 + }, + { + "epoch": 0.1241364747051193, + "grad_norm": 0.7074540165821116, + "learning_rate": 1.935122608360809e-05, + "loss": 1.1915, + "step": 8230 + }, + { + "epoch": 0.12428730881776222, + "grad_norm": 0.7406871589633134, + "learning_rate": 1.934952905360533e-05, + "loss": 1.1672, + "step": 8240 + }, + { + "epoch": 0.12443814293040514, + "grad_norm": 0.732403217294245, + "learning_rate": 1.9347829881607373e-05, + "loss": 1.2056, + "step": 8250 + }, + { + "epoch": 0.12458897704304805, + "grad_norm": 0.8039177902734285, + "learning_rate": 1.9346128568003497e-05, + "loss": 1.2019, + "step": 8260 + }, + { + "epoch": 0.12473981115569097, + "grad_norm": 0.7282518266001335, + "learning_rate": 1.934442511318348e-05, + "loss": 1.16, + "step": 8270 + }, + { + "epoch": 0.12489064526833389, + "grad_norm": 0.7289754258671579, + "learning_rate": 1.934271951753758e-05, + "loss": 1.1816, + "step": 8280 + }, + { + "epoch": 0.1250414793809768, + "grad_norm": 0.7142974683774076, + "learning_rate": 1.934101178145656e-05, + "loss": 1.1977, + "step": 8290 + }, + { + "epoch": 0.12519231349361973, + "grad_norm": 0.747849200455642, + "learning_rate": 1.9339301905331666e-05, + "loss": 1.1737, + "step": 8300 + }, + { + "epoch": 0.12534314760626264, + "grad_norm": 0.7259583449264978, + "learning_rate": 1.9337589889554627e-05, + "loss": 1.2054, + "step": 8310 + }, + { + "epoch": 0.12549398171890555, + "grad_norm": 0.7059664363299555, + "learning_rate": 1.9335875734517672e-05, + "loss": 1.1968, + "step": 8320 + }, + { + "epoch": 0.12564481583154846, + "grad_norm": 0.8124268780791143, + "learning_rate": 1.9334159440613522e-05, + "loss": 1.199, + "step": 8330 + }, + { + "epoch": 0.12579564994419137, + "grad_norm": 0.7188356996080003, + "learning_rate": 1.9332441008235373e-05, + "loss": 1.1801, + "step": 8340 + }, + { + "epoch": 0.12594648405683428, + "grad_norm": 0.7254737564911162, + "learning_rate": 1.933072043777693e-05, + "loss": 1.2137, + "step": 8350 + }, + { + "epoch": 0.1260973181694772, + "grad_norm": 0.6948907748160911, + "learning_rate": 1.932899772963238e-05, + "loss": 1.1859, + "step": 8360 + }, + { + "epoch": 0.12624815228212014, + "grad_norm": 0.7286677276276174, + "learning_rate": 1.932727288419639e-05, + "loss": 1.2003, + "step": 8370 + }, + { + "epoch": 0.12639898639476305, + "grad_norm": 0.7617323811603669, + "learning_rate": 1.9325545901864134e-05, + "loss": 1.2021, + "step": 8380 + }, + { + "epoch": 0.12654982050740596, + "grad_norm": 0.7737552772739915, + "learning_rate": 1.9323816783031263e-05, + "loss": 1.1906, + "step": 8390 + }, + { + "epoch": 0.12670065462004887, + "grad_norm": 0.7424610947443973, + "learning_rate": 1.932208552809392e-05, + "loss": 1.2103, + "step": 8400 + }, + { + "epoch": 0.12685148873269178, + "grad_norm": 0.7571225010000194, + "learning_rate": 1.9320352137448742e-05, + "loss": 1.1884, + "step": 8410 + }, + { + "epoch": 0.1270023228453347, + "grad_norm": 0.737783630023911, + "learning_rate": 1.9318616611492853e-05, + "loss": 1.1881, + "step": 8420 + }, + { + "epoch": 0.1271531569579776, + "grad_norm": 0.8918275858874083, + "learning_rate": 1.931687895062386e-05, + "loss": 1.1844, + "step": 8430 + }, + { + "epoch": 0.12730399107062054, + "grad_norm": 0.7261094835384223, + "learning_rate": 1.9315139155239873e-05, + "loss": 1.1799, + "step": 8440 + }, + { + "epoch": 0.12745482518326345, + "grad_norm": 0.7034804856298168, + "learning_rate": 1.9313397225739473e-05, + "loss": 1.1818, + "step": 8450 + }, + { + "epoch": 0.12760565929590637, + "grad_norm": 0.7005863589553085, + "learning_rate": 1.9311653162521748e-05, + "loss": 1.1974, + "step": 8460 + }, + { + "epoch": 0.12775649340854928, + "grad_norm": 0.7280067480087287, + "learning_rate": 1.930990696598626e-05, + "loss": 1.1992, + "step": 8470 + }, + { + "epoch": 0.1279073275211922, + "grad_norm": 0.7361202784544785, + "learning_rate": 1.9308158636533067e-05, + "loss": 1.1741, + "step": 8480 + }, + { + "epoch": 0.1280581616338351, + "grad_norm": 0.7279079344287948, + "learning_rate": 1.9306408174562717e-05, + "loss": 1.2044, + "step": 8490 + }, + { + "epoch": 0.128208995746478, + "grad_norm": 0.7515679527154845, + "learning_rate": 1.9304655580476245e-05, + "loss": 1.1821, + "step": 8500 + }, + { + "epoch": 0.12835982985912095, + "grad_norm": 0.7095581184110755, + "learning_rate": 1.9302900854675168e-05, + "loss": 1.1797, + "step": 8510 + }, + { + "epoch": 0.12851066397176386, + "grad_norm": 0.693548047604916, + "learning_rate": 1.93011439975615e-05, + "loss": 1.1853, + "step": 8520 + }, + { + "epoch": 0.12866149808440677, + "grad_norm": 0.7270627830576413, + "learning_rate": 1.9299385009537746e-05, + "loss": 1.205, + "step": 8530 + }, + { + "epoch": 0.12881233219704968, + "grad_norm": 0.7080649804367208, + "learning_rate": 1.9297623891006886e-05, + "loss": 1.1715, + "step": 8540 + }, + { + "epoch": 0.1289631663096926, + "grad_norm": 0.7464147572459489, + "learning_rate": 1.9295860642372396e-05, + "loss": 1.1936, + "step": 8550 + }, + { + "epoch": 0.1291140004223355, + "grad_norm": 0.7184005179529355, + "learning_rate": 1.9294095264038247e-05, + "loss": 1.1801, + "step": 8560 + }, + { + "epoch": 0.12926483453497842, + "grad_norm": 0.7718773666219955, + "learning_rate": 1.929232775640888e-05, + "loss": 1.1943, + "step": 8570 + }, + { + "epoch": 0.12941566864762136, + "grad_norm": 0.7285473523357388, + "learning_rate": 1.9290558119889243e-05, + "loss": 1.182, + "step": 8580 + }, + { + "epoch": 0.12956650276026427, + "grad_norm": 0.7065476892019336, + "learning_rate": 1.9288786354884764e-05, + "loss": 1.1924, + "step": 8590 + }, + { + "epoch": 0.12971733687290718, + "grad_norm": 0.7517893174109624, + "learning_rate": 1.928701246180135e-05, + "loss": 1.177, + "step": 8600 + }, + { + "epoch": 0.1298681709855501, + "grad_norm": 0.827516995091975, + "learning_rate": 1.928523644104541e-05, + "loss": 1.1938, + "step": 8610 + }, + { + "epoch": 0.130019005098193, + "grad_norm": 0.7337810780496574, + "learning_rate": 1.9283458293023826e-05, + "loss": 1.1936, + "step": 8620 + }, + { + "epoch": 0.1301698392108359, + "grad_norm": 0.734356096562213, + "learning_rate": 1.9281678018143983e-05, + "loss": 1.1728, + "step": 8630 + }, + { + "epoch": 0.13032067332347883, + "grad_norm": 0.7611211191322974, + "learning_rate": 1.927989561681374e-05, + "loss": 1.1596, + "step": 8640 + }, + { + "epoch": 0.13047150743612176, + "grad_norm": 0.7708792477945329, + "learning_rate": 1.9278111089441456e-05, + "loss": 1.1936, + "step": 8650 + }, + { + "epoch": 0.13062234154876468, + "grad_norm": 0.7442103176302413, + "learning_rate": 1.927632443643596e-05, + "loss": 1.1767, + "step": 8660 + }, + { + "epoch": 0.1307731756614076, + "grad_norm": 0.70721910159391, + "learning_rate": 1.9274535658206588e-05, + "loss": 1.1927, + "step": 8670 + }, + { + "epoch": 0.1309240097740505, + "grad_norm": 0.7368134838017709, + "learning_rate": 1.9272744755163147e-05, + "loss": 1.1921, + "step": 8680 + }, + { + "epoch": 0.1310748438866934, + "grad_norm": 0.7401289585725294, + "learning_rate": 1.927095172771593e-05, + "loss": 1.173, + "step": 8690 + }, + { + "epoch": 0.13122567799933632, + "grad_norm": 0.7468943135795245, + "learning_rate": 1.9269156576275736e-05, + "loss": 1.1927, + "step": 8700 + }, + { + "epoch": 0.13137651211197923, + "grad_norm": 0.7183049642419145, + "learning_rate": 1.926735930125383e-05, + "loss": 1.2016, + "step": 8710 + }, + { + "epoch": 0.13152734622462217, + "grad_norm": 0.7901794891105901, + "learning_rate": 1.9265559903061973e-05, + "loss": 1.1814, + "step": 8720 + }, + { + "epoch": 0.13167818033726508, + "grad_norm": 0.737533022958294, + "learning_rate": 1.9263758382112407e-05, + "loss": 1.2096, + "step": 8730 + }, + { + "epoch": 0.131829014449908, + "grad_norm": 0.7071264837947414, + "learning_rate": 1.9261954738817868e-05, + "loss": 1.1871, + "step": 8740 + }, + { + "epoch": 0.1319798485625509, + "grad_norm": 0.7095055828985511, + "learning_rate": 1.9260148973591576e-05, + "loss": 1.1876, + "step": 8750 + }, + { + "epoch": 0.13213068267519382, + "grad_norm": 0.709820180505148, + "learning_rate": 1.9258341086847227e-05, + "loss": 1.1677, + "step": 8760 + }, + { + "epoch": 0.13228151678783673, + "grad_norm": 0.7217209346575197, + "learning_rate": 1.925653107899902e-05, + "loss": 1.209, + "step": 8770 + }, + { + "epoch": 0.13243235090047964, + "grad_norm": 0.8275770127109566, + "learning_rate": 1.925471895046163e-05, + "loss": 1.1835, + "step": 8780 + }, + { + "epoch": 0.13258318501312258, + "grad_norm": 0.7601321598296241, + "learning_rate": 1.9252904701650215e-05, + "loss": 1.1915, + "step": 8790 + }, + { + "epoch": 0.1327340191257655, + "grad_norm": 0.6919257249649435, + "learning_rate": 1.9251088332980425e-05, + "loss": 1.1786, + "step": 8800 + }, + { + "epoch": 0.1328848532384084, + "grad_norm": 0.7085295969126255, + "learning_rate": 1.9249269844868396e-05, + "loss": 1.1937, + "step": 8810 + }, + { + "epoch": 0.1330356873510513, + "grad_norm": 0.8290121038162485, + "learning_rate": 1.9247449237730746e-05, + "loss": 1.1858, + "step": 8820 + }, + { + "epoch": 0.13318652146369422, + "grad_norm": 0.7632655882335172, + "learning_rate": 1.9245626511984574e-05, + "loss": 1.1653, + "step": 8830 + }, + { + "epoch": 0.13333735557633714, + "grad_norm": 0.7187734549353159, + "learning_rate": 1.924380166804748e-05, + "loss": 1.2047, + "step": 8840 + }, + { + "epoch": 0.13348818968898005, + "grad_norm": 0.7179611945777927, + "learning_rate": 1.9241974706337527e-05, + "loss": 1.1781, + "step": 8850 + }, + { + "epoch": 0.13363902380162299, + "grad_norm": 0.7252272234465821, + "learning_rate": 1.9240145627273287e-05, + "loss": 1.1713, + "step": 8860 + }, + { + "epoch": 0.1337898579142659, + "grad_norm": 0.7232484791723552, + "learning_rate": 1.9238314431273798e-05, + "loss": 1.1857, + "step": 8870 + }, + { + "epoch": 0.1339406920269088, + "grad_norm": 0.723624512094108, + "learning_rate": 1.9236481118758596e-05, + "loss": 1.177, + "step": 8880 + }, + { + "epoch": 0.13409152613955172, + "grad_norm": 0.7907251367036755, + "learning_rate": 1.9234645690147694e-05, + "loss": 1.1868, + "step": 8890 + }, + { + "epoch": 0.13424236025219463, + "grad_norm": 0.7197684556887317, + "learning_rate": 1.923280814586159e-05, + "loss": 1.1802, + "step": 8900 + }, + { + "epoch": 0.13439319436483754, + "grad_norm": 0.7334925893530693, + "learning_rate": 1.923096848632127e-05, + "loss": 1.1816, + "step": 8910 + }, + { + "epoch": 0.13454402847748045, + "grad_norm": 0.7485301337754373, + "learning_rate": 1.9229126711948207e-05, + "loss": 1.1722, + "step": 8920 + }, + { + "epoch": 0.1346948625901234, + "grad_norm": 0.8113928646265858, + "learning_rate": 1.922728282316435e-05, + "loss": 1.1664, + "step": 8930 + }, + { + "epoch": 0.1348456967027663, + "grad_norm": 0.7220367686773805, + "learning_rate": 1.9225436820392144e-05, + "loss": 1.1985, + "step": 8940 + }, + { + "epoch": 0.13499653081540922, + "grad_norm": 0.7415974074898813, + "learning_rate": 1.9223588704054505e-05, + "loss": 1.2021, + "step": 8950 + }, + { + "epoch": 0.13514736492805213, + "grad_norm": 0.7675528008083503, + "learning_rate": 1.9221738474574844e-05, + "loss": 1.1839, + "step": 8960 + }, + { + "epoch": 0.13529819904069504, + "grad_norm": 0.7182175009608814, + "learning_rate": 1.9219886132377054e-05, + "loss": 1.1661, + "step": 8970 + }, + { + "epoch": 0.13544903315333795, + "grad_norm": 0.7470839845384107, + "learning_rate": 1.9218031677885502e-05, + "loss": 1.168, + "step": 8980 + }, + { + "epoch": 0.13559986726598086, + "grad_norm": 0.6801307307762374, + "learning_rate": 1.9216175111525056e-05, + "loss": 1.1582, + "step": 8990 + }, + { + "epoch": 0.1357507013786238, + "grad_norm": 0.7450124030857932, + "learning_rate": 1.9214316433721053e-05, + "loss": 1.1826, + "step": 9000 + }, + { + "epoch": 0.1359015354912667, + "grad_norm": 0.7135000396967831, + "learning_rate": 1.9212455644899325e-05, + "loss": 1.1661, + "step": 9010 + }, + { + "epoch": 0.13605236960390962, + "grad_norm": 0.7090294724754144, + "learning_rate": 1.921059274548618e-05, + "loss": 1.1818, + "step": 9020 + }, + { + "epoch": 0.13620320371655253, + "grad_norm": 0.7579054149937209, + "learning_rate": 1.9208727735908406e-05, + "loss": 1.1793, + "step": 9030 + }, + { + "epoch": 0.13635403782919545, + "grad_norm": 0.7380163837815327, + "learning_rate": 1.9206860616593288e-05, + "loss": 1.1883, + "step": 9040 + }, + { + "epoch": 0.13650487194183836, + "grad_norm": 0.6882708382551403, + "learning_rate": 1.9204991387968587e-05, + "loss": 1.1675, + "step": 9050 + }, + { + "epoch": 0.13665570605448127, + "grad_norm": 0.6844737137977392, + "learning_rate": 1.9203120050462544e-05, + "loss": 1.166, + "step": 9060 + }, + { + "epoch": 0.1368065401671242, + "grad_norm": 0.7190692957625598, + "learning_rate": 1.920124660450388e-05, + "loss": 1.201, + "step": 9070 + }, + { + "epoch": 0.13695737427976712, + "grad_norm": 0.6731092951314548, + "learning_rate": 1.9199371050521817e-05, + "loss": 1.1806, + "step": 9080 + }, + { + "epoch": 0.13710820839241003, + "grad_norm": 0.7665335955683249, + "learning_rate": 1.919749338894604e-05, + "loss": 1.1876, + "step": 9090 + }, + { + "epoch": 0.13725904250505294, + "grad_norm": 0.7338478421143948, + "learning_rate": 1.9195613620206728e-05, + "loss": 1.1778, + "step": 9100 + }, + { + "epoch": 0.13740987661769585, + "grad_norm": 0.6922672883810428, + "learning_rate": 1.919373174473454e-05, + "loss": 1.1471, + "step": 9110 + }, + { + "epoch": 0.13756071073033876, + "grad_norm": 0.6840603333830219, + "learning_rate": 1.9191847762960614e-05, + "loss": 1.1695, + "step": 9120 + }, + { + "epoch": 0.13771154484298168, + "grad_norm": 0.7059183376908549, + "learning_rate": 1.9189961675316574e-05, + "loss": 1.2071, + "step": 9130 + }, + { + "epoch": 0.13786237895562461, + "grad_norm": 0.727472395611224, + "learning_rate": 1.9188073482234532e-05, + "loss": 1.1765, + "step": 9140 + }, + { + "epoch": 0.13801321306826753, + "grad_norm": 0.7043457856904992, + "learning_rate": 1.918618318414707e-05, + "loss": 1.1633, + "step": 9150 + }, + { + "epoch": 0.13816404718091044, + "grad_norm": 0.7230469990960405, + "learning_rate": 1.9184290781487266e-05, + "loss": 1.1913, + "step": 9160 + }, + { + "epoch": 0.13831488129355335, + "grad_norm": 0.7059928462606803, + "learning_rate": 1.918239627468867e-05, + "loss": 1.1826, + "step": 9170 + }, + { + "epoch": 0.13846571540619626, + "grad_norm": 0.7525097351795983, + "learning_rate": 1.918049966418531e-05, + "loss": 1.189, + "step": 9180 + }, + { + "epoch": 0.13861654951883917, + "grad_norm": 0.7084645771640204, + "learning_rate": 1.9178600950411716e-05, + "loss": 1.1797, + "step": 9190 + }, + { + "epoch": 0.13876738363148208, + "grad_norm": 0.7269099976425738, + "learning_rate": 1.9176700133802873e-05, + "loss": 1.1668, + "step": 9200 + }, + { + "epoch": 0.13891821774412502, + "grad_norm": 0.7215322925317355, + "learning_rate": 1.9174797214794277e-05, + "loss": 1.1949, + "step": 9210 + }, + { + "epoch": 0.13906905185676793, + "grad_norm": 0.7218245175633882, + "learning_rate": 1.917289219382188e-05, + "loss": 1.1985, + "step": 9220 + }, + { + "epoch": 0.13921988596941084, + "grad_norm": 0.7108515911976517, + "learning_rate": 1.9170985071322125e-05, + "loss": 1.1702, + "step": 9230 + }, + { + "epoch": 0.13937072008205376, + "grad_norm": 0.7945068118109861, + "learning_rate": 1.9169075847731947e-05, + "loss": 1.1657, + "step": 9240 + }, + { + "epoch": 0.13952155419469667, + "grad_norm": 0.7224039474801176, + "learning_rate": 1.916716452348874e-05, + "loss": 1.1667, + "step": 9250 + }, + { + "epoch": 0.13967238830733958, + "grad_norm": 0.6902171571456306, + "learning_rate": 1.9165251099030403e-05, + "loss": 1.1859, + "step": 9260 + }, + { + "epoch": 0.1398232224199825, + "grad_norm": 0.7428457355556223, + "learning_rate": 1.91633355747953e-05, + "loss": 1.182, + "step": 9270 + }, + { + "epoch": 0.13997405653262543, + "grad_norm": 0.7728442587342053, + "learning_rate": 1.9161417951222285e-05, + "loss": 1.1791, + "step": 9280 + }, + { + "epoch": 0.14012489064526834, + "grad_norm": 0.7766114324876696, + "learning_rate": 1.9159498228750686e-05, + "loss": 1.1758, + "step": 9290 + }, + { + "epoch": 0.14027572475791125, + "grad_norm": 0.7792917631692297, + "learning_rate": 1.915757640782031e-05, + "loss": 1.177, + "step": 9300 + }, + { + "epoch": 0.14042655887055416, + "grad_norm": 0.743047864946642, + "learning_rate": 1.9155652488871465e-05, + "loss": 1.1607, + "step": 9310 + }, + { + "epoch": 0.14057739298319707, + "grad_norm": 0.7073019085792276, + "learning_rate": 1.915372647234491e-05, + "loss": 1.1597, + "step": 9320 + }, + { + "epoch": 0.14072822709584, + "grad_norm": 0.7714526759117307, + "learning_rate": 1.9151798358681905e-05, + "loss": 1.1786, + "step": 9330 + }, + { + "epoch": 0.1408790612084829, + "grad_norm": 0.7151714751133988, + "learning_rate": 1.9149868148324184e-05, + "loss": 1.1587, + "step": 9340 + }, + { + "epoch": 0.14102989532112584, + "grad_norm": 0.7452149720984644, + "learning_rate": 1.9147935841713962e-05, + "loss": 1.1699, + "step": 9350 + }, + { + "epoch": 0.14118072943376875, + "grad_norm": 0.6669112757122636, + "learning_rate": 1.9146001439293938e-05, + "loss": 1.1767, + "step": 9360 + }, + { + "epoch": 0.14133156354641166, + "grad_norm": 0.7399329570367423, + "learning_rate": 1.9144064941507282e-05, + "loss": 1.1569, + "step": 9370 + }, + { + "epoch": 0.14148239765905457, + "grad_norm": 0.738353257507497, + "learning_rate": 1.9142126348797646e-05, + "loss": 1.1781, + "step": 9380 + }, + { + "epoch": 0.14163323177169748, + "grad_norm": 0.7214316995445619, + "learning_rate": 1.9140185661609174e-05, + "loss": 1.1762, + "step": 9390 + }, + { + "epoch": 0.1417840658843404, + "grad_norm": 0.7089551163714519, + "learning_rate": 1.9138242880386476e-05, + "loss": 1.1697, + "step": 9400 + }, + { + "epoch": 0.1419348999969833, + "grad_norm": 0.8305643699725253, + "learning_rate": 1.9136298005574652e-05, + "loss": 1.1895, + "step": 9410 + }, + { + "epoch": 0.14208573410962624, + "grad_norm": 0.755790714766322, + "learning_rate": 1.9134351037619268e-05, + "loss": 1.1632, + "step": 9420 + }, + { + "epoch": 0.14223656822226916, + "grad_norm": 0.7227981576493616, + "learning_rate": 1.9132401976966384e-05, + "loss": 1.1779, + "step": 9430 + }, + { + "epoch": 0.14238740233491207, + "grad_norm": 0.7257906426811398, + "learning_rate": 1.9130450824062534e-05, + "loss": 1.1679, + "step": 9440 + }, + { + "epoch": 0.14253823644755498, + "grad_norm": 0.792169416337431, + "learning_rate": 1.912849757935473e-05, + "loss": 1.1693, + "step": 9450 + }, + { + "epoch": 0.1426890705601979, + "grad_norm": 0.7199941976329318, + "learning_rate": 1.9126542243290463e-05, + "loss": 1.1543, + "step": 9460 + }, + { + "epoch": 0.1428399046728408, + "grad_norm": 0.7055064277981052, + "learning_rate": 1.9124584816317706e-05, + "loss": 1.175, + "step": 9470 + }, + { + "epoch": 0.1429907387854837, + "grad_norm": 0.7088036502072887, + "learning_rate": 1.9122625298884907e-05, + "loss": 1.1547, + "step": 9480 + }, + { + "epoch": 0.14314157289812665, + "grad_norm": 0.7268773294597443, + "learning_rate": 1.9120663691440998e-05, + "loss": 1.1862, + "step": 9490 + }, + { + "epoch": 0.14329240701076956, + "grad_norm": 0.7163540747043275, + "learning_rate": 1.911869999443538e-05, + "loss": 1.1713, + "step": 9500 + }, + { + "epoch": 0.14344324112341247, + "grad_norm": 0.719172184324345, + "learning_rate": 1.9116734208317954e-05, + "loss": 1.1633, + "step": 9510 + }, + { + "epoch": 0.14359407523605539, + "grad_norm": 0.7061645697709681, + "learning_rate": 1.9114766333539073e-05, + "loss": 1.1554, + "step": 9520 + }, + { + "epoch": 0.1437449093486983, + "grad_norm": 0.7475078689086578, + "learning_rate": 1.9112796370549582e-05, + "loss": 1.1749, + "step": 9530 + }, + { + "epoch": 0.1438957434613412, + "grad_norm": 0.740268578827075, + "learning_rate": 1.911082431980081e-05, + "loss": 1.1529, + "step": 9540 + }, + { + "epoch": 0.14404657757398412, + "grad_norm": 0.7736908936523181, + "learning_rate": 1.910885018174455e-05, + "loss": 1.1688, + "step": 9550 + }, + { + "epoch": 0.14419741168662706, + "grad_norm": 0.7475151747843225, + "learning_rate": 1.9106873956833087e-05, + "loss": 1.1671, + "step": 9560 + }, + { + "epoch": 0.14434824579926997, + "grad_norm": 0.6675895181170384, + "learning_rate": 1.9104895645519174e-05, + "loss": 1.1497, + "step": 9570 + }, + { + "epoch": 0.14449907991191288, + "grad_norm": 0.7125061273116985, + "learning_rate": 1.910291524825605e-05, + "loss": 1.176, + "step": 9580 + }, + { + "epoch": 0.1446499140245558, + "grad_norm": 0.6930432039066503, + "learning_rate": 1.9100932765497423e-05, + "loss": 1.1779, + "step": 9590 + }, + { + "epoch": 0.1448007481371987, + "grad_norm": 0.7459678965622564, + "learning_rate": 1.9098948197697486e-05, + "loss": 1.1667, + "step": 9600 + }, + { + "epoch": 0.14495158224984162, + "grad_norm": 0.7665913842333439, + "learning_rate": 1.9096961545310904e-05, + "loss": 1.1701, + "step": 9610 + }, + { + "epoch": 0.14510241636248453, + "grad_norm": 0.7456306613568189, + "learning_rate": 1.909497280879283e-05, + "loss": 1.1864, + "step": 9620 + }, + { + "epoch": 0.14525325047512747, + "grad_norm": 0.762590301825068, + "learning_rate": 1.9092981988598885e-05, + "loss": 1.1859, + "step": 9630 + }, + { + "epoch": 0.14540408458777038, + "grad_norm": 0.6792778469254279, + "learning_rate": 1.909098908518517e-05, + "loss": 1.1646, + "step": 9640 + }, + { + "epoch": 0.1455549187004133, + "grad_norm": 0.7282972351914044, + "learning_rate": 1.9088994099008255e-05, + "loss": 1.1443, + "step": 9650 + }, + { + "epoch": 0.1457057528130562, + "grad_norm": 0.7159708110677193, + "learning_rate": 1.9086997030525204e-05, + "loss": 1.1652, + "step": 9660 + }, + { + "epoch": 0.1458565869256991, + "grad_norm": 0.7686843265394151, + "learning_rate": 1.9084997880193548e-05, + "loss": 1.1563, + "step": 9670 + }, + { + "epoch": 0.14600742103834202, + "grad_norm": 0.7148458980214385, + "learning_rate": 1.9082996648471293e-05, + "loss": 1.1757, + "step": 9680 + }, + { + "epoch": 0.14615825515098493, + "grad_norm": 0.7018362810823854, + "learning_rate": 1.9080993335816925e-05, + "loss": 1.1709, + "step": 9690 + }, + { + "epoch": 0.14630908926362787, + "grad_norm": 0.7169797198216793, + "learning_rate": 1.9078987942689414e-05, + "loss": 1.1656, + "step": 9700 + }, + { + "epoch": 0.14645992337627078, + "grad_norm": 0.7353908147221409, + "learning_rate": 1.9076980469548194e-05, + "loss": 1.1594, + "step": 9710 + }, + { + "epoch": 0.1466107574889137, + "grad_norm": 0.7202333150283665, + "learning_rate": 1.9074970916853176e-05, + "loss": 1.1824, + "step": 9720 + }, + { + "epoch": 0.1467615916015566, + "grad_norm": 0.7478892316882806, + "learning_rate": 1.9072959285064764e-05, + "loss": 1.1875, + "step": 9730 + }, + { + "epoch": 0.14691242571419952, + "grad_norm": 0.7583610010681839, + "learning_rate": 1.9070945574643815e-05, + "loss": 1.1608, + "step": 9740 + }, + { + "epoch": 0.14706325982684243, + "grad_norm": 0.7152072091748012, + "learning_rate": 1.9068929786051683e-05, + "loss": 1.1666, + "step": 9750 + }, + { + "epoch": 0.14721409393948534, + "grad_norm": 0.8164824262626562, + "learning_rate": 1.9066911919750182e-05, + "loss": 1.1725, + "step": 9760 + }, + { + "epoch": 0.14736492805212828, + "grad_norm": 0.7055454780041883, + "learning_rate": 1.9064891976201616e-05, + "loss": 1.1762, + "step": 9770 + }, + { + "epoch": 0.1475157621647712, + "grad_norm": 0.7012997338472907, + "learning_rate": 1.906286995586875e-05, + "loss": 1.1648, + "step": 9780 + }, + { + "epoch": 0.1476665962774141, + "grad_norm": 0.6650373491324031, + "learning_rate": 1.906084585921484e-05, + "loss": 1.1506, + "step": 9790 + }, + { + "epoch": 0.14781743039005701, + "grad_norm": 0.6940481344203477, + "learning_rate": 1.905881968670361e-05, + "loss": 1.1664, + "step": 9800 + }, + { + "epoch": 0.14796826450269993, + "grad_norm": 0.7295038035899611, + "learning_rate": 1.9056791438799254e-05, + "loss": 1.184, + "step": 9810 + }, + { + "epoch": 0.14811909861534284, + "grad_norm": 0.7737132609842179, + "learning_rate": 1.9054761115966457e-05, + "loss": 1.1383, + "step": 9820 + }, + { + "epoch": 0.14826993272798575, + "grad_norm": 0.7664817695305418, + "learning_rate": 1.905272871867036e-05, + "loss": 1.1766, + "step": 9830 + }, + { + "epoch": 0.1484207668406287, + "grad_norm": 0.8019076727577042, + "learning_rate": 1.905069424737659e-05, + "loss": 1.1872, + "step": 9840 + }, + { + "epoch": 0.1485716009532716, + "grad_norm": 0.6910901136824897, + "learning_rate": 1.9048657702551256e-05, + "loss": 1.1753, + "step": 9850 + }, + { + "epoch": 0.1487224350659145, + "grad_norm": 0.7931836671068272, + "learning_rate": 1.904661908466093e-05, + "loss": 1.1859, + "step": 9860 + }, + { + "epoch": 0.14887326917855742, + "grad_norm": 0.722195921050241, + "learning_rate": 1.9044578394172664e-05, + "loss": 1.1772, + "step": 9870 + }, + { + "epoch": 0.14902410329120033, + "grad_norm": 0.72397359476492, + "learning_rate": 1.904253563155398e-05, + "loss": 1.1697, + "step": 9880 + }, + { + "epoch": 0.14917493740384324, + "grad_norm": 0.6768723210960375, + "learning_rate": 1.9040490797272887e-05, + "loss": 1.1587, + "step": 9890 + }, + { + "epoch": 0.14932577151648616, + "grad_norm": 0.7126195315470385, + "learning_rate": 1.903844389179785e-05, + "loss": 1.1823, + "step": 9900 + }, + { + "epoch": 0.1494766056291291, + "grad_norm": 0.6878632827653255, + "learning_rate": 1.9036394915597828e-05, + "loss": 1.1627, + "step": 9910 + }, + { + "epoch": 0.149627439741772, + "grad_norm": 0.7395898517625551, + "learning_rate": 1.903434386914224e-05, + "loss": 1.1664, + "step": 9920 + }, + { + "epoch": 0.14977827385441492, + "grad_norm": 0.6758958007030873, + "learning_rate": 1.903229075290099e-05, + "loss": 1.1501, + "step": 9930 + }, + { + "epoch": 0.14992910796705783, + "grad_norm": 0.7211473390782104, + "learning_rate": 1.9030235567344445e-05, + "loss": 1.1761, + "step": 9940 + }, + { + "epoch": 0.15007994207970074, + "grad_norm": 0.7240851554590464, + "learning_rate": 1.9028178312943456e-05, + "loss": 1.1613, + "step": 9950 + }, + { + "epoch": 0.15023077619234365, + "grad_norm": 0.684965582337032, + "learning_rate": 1.9026118990169345e-05, + "loss": 1.1523, + "step": 9960 + }, + { + "epoch": 0.15038161030498656, + "grad_norm": 0.7265770581967833, + "learning_rate": 1.9024057599493903e-05, + "loss": 1.1759, + "step": 9970 + }, + { + "epoch": 0.1505324444176295, + "grad_norm": 0.6820600907405776, + "learning_rate": 1.90219941413894e-05, + "loss": 1.1464, + "step": 9980 + }, + { + "epoch": 0.1506832785302724, + "grad_norm": 0.7302835977231714, + "learning_rate": 1.901992861632858e-05, + "loss": 1.1826, + "step": 9990 + }, + { + "epoch": 0.15083411264291532, + "grad_norm": 0.70841298017769, + "learning_rate": 1.9017861024784655e-05, + "loss": 1.166, + "step": 10000 + }, + { + "epoch": 0.15098494675555824, + "grad_norm": 0.7714260285415001, + "learning_rate": 1.9015791367231314e-05, + "loss": 1.1693, + "step": 10010 + }, + { + "epoch": 0.15113578086820115, + "grad_norm": 0.8051252496061578, + "learning_rate": 1.9013719644142728e-05, + "loss": 1.1599, + "step": 10020 + }, + { + "epoch": 0.15128661498084406, + "grad_norm": 0.7124793009703871, + "learning_rate": 1.9011645855993525e-05, + "loss": 1.1367, + "step": 10030 + }, + { + "epoch": 0.15143744909348697, + "grad_norm": 0.7370201769933088, + "learning_rate": 1.9009570003258813e-05, + "loss": 1.144, + "step": 10040 + }, + { + "epoch": 0.1515882832061299, + "grad_norm": 0.7471574995071657, + "learning_rate": 1.9007492086414178e-05, + "loss": 1.1611, + "step": 10050 + }, + { + "epoch": 0.15173911731877282, + "grad_norm": 0.7373476777843044, + "learning_rate": 1.9005412105935672e-05, + "loss": 1.1595, + "step": 10060 + }, + { + "epoch": 0.15188995143141573, + "grad_norm": 0.7120383050415692, + "learning_rate": 1.9003330062299826e-05, + "loss": 1.1604, + "step": 10070 + }, + { + "epoch": 0.15204078554405864, + "grad_norm": 0.6955857321764968, + "learning_rate": 1.9001245955983638e-05, + "loss": 1.1668, + "step": 10080 + }, + { + "epoch": 0.15219161965670155, + "grad_norm": 0.6792384599863504, + "learning_rate": 1.899915978746458e-05, + "loss": 1.1573, + "step": 10090 + }, + { + "epoch": 0.15234245376934447, + "grad_norm": 0.6792080953218064, + "learning_rate": 1.89970715572206e-05, + "loss": 1.1511, + "step": 10100 + }, + { + "epoch": 0.15249328788198738, + "grad_norm": 0.730543829932567, + "learning_rate": 1.8994981265730114e-05, + "loss": 1.1712, + "step": 10110 + }, + { + "epoch": 0.15264412199463032, + "grad_norm": 0.7025998651663351, + "learning_rate": 1.8992888913472006e-05, + "loss": 1.159, + "step": 10120 + }, + { + "epoch": 0.15279495610727323, + "grad_norm": 0.7504530750569209, + "learning_rate": 1.8990794500925647e-05, + "loss": 1.1831, + "step": 10130 + }, + { + "epoch": 0.15294579021991614, + "grad_norm": 0.7599759959631252, + "learning_rate": 1.8988698028570866e-05, + "loss": 1.1593, + "step": 10140 + }, + { + "epoch": 0.15309662433255905, + "grad_norm": 0.719545279913323, + "learning_rate": 1.8986599496887975e-05, + "loss": 1.1298, + "step": 10150 + }, + { + "epoch": 0.15324745844520196, + "grad_norm": 0.706242972282617, + "learning_rate": 1.898449890635774e-05, + "loss": 1.1586, + "step": 10160 + }, + { + "epoch": 0.15339829255784487, + "grad_norm": 0.6948536553660372, + "learning_rate": 1.8982396257461423e-05, + "loss": 1.164, + "step": 10170 + }, + { + "epoch": 0.15354912667048778, + "grad_norm": 0.7323987474295613, + "learning_rate": 1.8980291550680738e-05, + "loss": 1.1516, + "step": 10180 + }, + { + "epoch": 0.15369996078313072, + "grad_norm": 0.7262664989208512, + "learning_rate": 1.897818478649788e-05, + "loss": 1.1699, + "step": 10190 + }, + { + "epoch": 0.15385079489577363, + "grad_norm": 0.737358374773887, + "learning_rate": 1.897607596539551e-05, + "loss": 1.1539, + "step": 10200 + }, + { + "epoch": 0.15400162900841655, + "grad_norm": 0.7339152047560806, + "learning_rate": 1.8973965087856764e-05, + "loss": 1.1588, + "step": 10210 + }, + { + "epoch": 0.15415246312105946, + "grad_norm": 0.6956028588512007, + "learning_rate": 1.8971852154365245e-05, + "loss": 1.1642, + "step": 10220 + }, + { + "epoch": 0.15430329723370237, + "grad_norm": 6.7275032515425615, + "learning_rate": 1.896973716540504e-05, + "loss": 1.1595, + "step": 10230 + }, + { + "epoch": 0.15445413134634528, + "grad_norm": 0.7183947581830167, + "learning_rate": 1.896762012146069e-05, + "loss": 1.1503, + "step": 10240 + }, + { + "epoch": 0.1546049654589882, + "grad_norm": 0.7050399749493976, + "learning_rate": 1.8965501023017208e-05, + "loss": 1.1658, + "step": 10250 + }, + { + "epoch": 0.15475579957163113, + "grad_norm": 0.7197052245520359, + "learning_rate": 1.89633798705601e-05, + "loss": 1.1452, + "step": 10260 + }, + { + "epoch": 0.15490663368427404, + "grad_norm": 0.7059411317086506, + "learning_rate": 1.896125666457531e-05, + "loss": 1.1891, + "step": 10270 + }, + { + "epoch": 0.15505746779691695, + "grad_norm": 0.7491225618005054, + "learning_rate": 1.8959131405549276e-05, + "loss": 1.1547, + "step": 10280 + }, + { + "epoch": 0.15520830190955986, + "grad_norm": 0.728546688910115, + "learning_rate": 1.8957004093968896e-05, + "loss": 1.1602, + "step": 10290 + }, + { + "epoch": 0.15535913602220278, + "grad_norm": 0.6791672781883519, + "learning_rate": 1.8954874730321547e-05, + "loss": 1.1522, + "step": 10300 + }, + { + "epoch": 0.1555099701348457, + "grad_norm": 0.7277384184122753, + "learning_rate": 1.8952743315095064e-05, + "loss": 1.1542, + "step": 10310 + }, + { + "epoch": 0.1556608042474886, + "grad_norm": 0.6671091027542797, + "learning_rate": 1.895060984877776e-05, + "loss": 1.1398, + "step": 10320 + }, + { + "epoch": 0.15581163836013154, + "grad_norm": 0.7224565600799873, + "learning_rate": 1.894847433185842e-05, + "loss": 1.1369, + "step": 10330 + }, + { + "epoch": 0.15596247247277445, + "grad_norm": 0.6767514383528602, + "learning_rate": 1.894633676482629e-05, + "loss": 1.1551, + "step": 10340 + }, + { + "epoch": 0.15611330658541736, + "grad_norm": 0.6617094217875323, + "learning_rate": 1.8944197148171093e-05, + "loss": 1.1477, + "step": 10350 + }, + { + "epoch": 0.15626414069806027, + "grad_norm": 2.4252658238184797, + "learning_rate": 1.894205548238302e-05, + "loss": 1.1781, + "step": 10360 + }, + { + "epoch": 0.15641497481070318, + "grad_norm": 0.7395645809002291, + "learning_rate": 1.893991176795273e-05, + "loss": 1.1714, + "step": 10370 + }, + { + "epoch": 0.1565658089233461, + "grad_norm": 0.7732579427213784, + "learning_rate": 1.893776600537135e-05, + "loss": 1.1582, + "step": 10380 + }, + { + "epoch": 0.156716643035989, + "grad_norm": 0.738046852586735, + "learning_rate": 1.8935618195130483e-05, + "loss": 1.1518, + "step": 10390 + }, + { + "epoch": 0.15686747714863195, + "grad_norm": 0.7343385662509193, + "learning_rate": 1.8933468337722194e-05, + "loss": 1.16, + "step": 10400 + }, + { + "epoch": 0.15701831126127486, + "grad_norm": 0.6906241272896755, + "learning_rate": 1.893131643363902e-05, + "loss": 1.1386, + "step": 10410 + }, + { + "epoch": 0.15716914537391777, + "grad_norm": 0.8087334813287711, + "learning_rate": 1.8929162483373962e-05, + "loss": 1.1593, + "step": 10420 + }, + { + "epoch": 0.15731997948656068, + "grad_norm": 0.7030553981685134, + "learning_rate": 1.89270064874205e-05, + "loss": 1.13, + "step": 10430 + }, + { + "epoch": 0.1574708135992036, + "grad_norm": 0.6848520854521586, + "learning_rate": 1.8924848446272573e-05, + "loss": 1.1596, + "step": 10440 + }, + { + "epoch": 0.1576216477118465, + "grad_norm": 0.7017392648490982, + "learning_rate": 1.89226883604246e-05, + "loss": 1.1607, + "step": 10450 + }, + { + "epoch": 0.1577724818244894, + "grad_norm": 0.7653754329607411, + "learning_rate": 1.8920526230371448e-05, + "loss": 1.1522, + "step": 10460 + }, + { + "epoch": 0.15792331593713235, + "grad_norm": 0.754565303227473, + "learning_rate": 1.8918362056608477e-05, + "loss": 1.1443, + "step": 10470 + }, + { + "epoch": 0.15807415004977526, + "grad_norm": 0.7066826963384651, + "learning_rate": 1.89161958396315e-05, + "loss": 1.1654, + "step": 10480 + }, + { + "epoch": 0.15822498416241818, + "grad_norm": 0.6853262573192804, + "learning_rate": 1.8914027579936794e-05, + "loss": 1.155, + "step": 10490 + }, + { + "epoch": 0.1583758182750611, + "grad_norm": 1.2109735899904348, + "learning_rate": 1.8911857278021123e-05, + "loss": 1.1383, + "step": 10500 + }, + { + "epoch": 0.158526652387704, + "grad_norm": 0.6762415197984875, + "learning_rate": 1.8909684934381694e-05, + "loss": 1.1639, + "step": 10510 + }, + { + "epoch": 0.1586774865003469, + "grad_norm": 0.7125104389741647, + "learning_rate": 1.890751054951621e-05, + "loss": 1.1364, + "step": 10520 + }, + { + "epoch": 0.15882832061298982, + "grad_norm": 0.7334596919026344, + "learning_rate": 1.890533412392282e-05, + "loss": 1.1604, + "step": 10530 + }, + { + "epoch": 0.15897915472563276, + "grad_norm": 0.7175855044291646, + "learning_rate": 1.8903155658100147e-05, + "loss": 1.1293, + "step": 10540 + }, + { + "epoch": 0.15912998883827567, + "grad_norm": 0.7360895494592443, + "learning_rate": 1.8900975152547282e-05, + "loss": 1.1418, + "step": 10550 + }, + { + "epoch": 0.15928082295091858, + "grad_norm": 0.6893858468510901, + "learning_rate": 1.889879260776378e-05, + "loss": 1.1519, + "step": 10560 + }, + { + "epoch": 0.1594316570635615, + "grad_norm": 0.6950728517096905, + "learning_rate": 1.8896608024249672e-05, + "loss": 1.1606, + "step": 10570 + }, + { + "epoch": 0.1595824911762044, + "grad_norm": 0.7116834623370714, + "learning_rate": 1.8894421402505446e-05, + "loss": 1.1465, + "step": 10580 + }, + { + "epoch": 0.15973332528884732, + "grad_norm": 0.6885366178806771, + "learning_rate": 1.8892232743032063e-05, + "loss": 1.1482, + "step": 10590 + }, + { + "epoch": 0.15988415940149023, + "grad_norm": 0.7097542673768172, + "learning_rate": 1.889004204633095e-05, + "loss": 1.1616, + "step": 10600 + }, + { + "epoch": 0.16003499351413317, + "grad_norm": 0.7565978345339756, + "learning_rate": 1.8887849312904e-05, + "loss": 1.1453, + "step": 10610 + }, + { + "epoch": 0.16018582762677608, + "grad_norm": 0.7154750384980507, + "learning_rate": 1.8885654543253568e-05, + "loss": 1.1603, + "step": 10620 + }, + { + "epoch": 0.160336661739419, + "grad_norm": 0.7049246942248134, + "learning_rate": 1.8883457737882485e-05, + "loss": 1.1535, + "step": 10630 + }, + { + "epoch": 0.1604874958520619, + "grad_norm": 0.7129586418879905, + "learning_rate": 1.8881258897294043e-05, + "loss": 1.1603, + "step": 10640 + }, + { + "epoch": 0.1606383299647048, + "grad_norm": 0.6892742547072631, + "learning_rate": 1.8879058021991996e-05, + "loss": 1.1357, + "step": 10650 + }, + { + "epoch": 0.16078916407734772, + "grad_norm": 0.6773940717703167, + "learning_rate": 1.8876855112480577e-05, + "loss": 1.1422, + "step": 10660 + }, + { + "epoch": 0.16093999818999064, + "grad_norm": 0.7212219134050705, + "learning_rate": 1.8874650169264468e-05, + "loss": 1.149, + "step": 10670 + }, + { + "epoch": 0.16109083230263357, + "grad_norm": 0.6705665000659659, + "learning_rate": 1.887244319284883e-05, + "loss": 1.1303, + "step": 10680 + }, + { + "epoch": 0.16124166641527649, + "grad_norm": 0.7227568699084088, + "learning_rate": 1.8870234183739285e-05, + "loss": 1.1581, + "step": 10690 + }, + { + "epoch": 0.1613925005279194, + "grad_norm": 0.709901124241161, + "learning_rate": 1.886802314244192e-05, + "loss": 1.1491, + "step": 10700 + }, + { + "epoch": 0.1615433346405623, + "grad_norm": 0.6883239562441127, + "learning_rate": 1.8865810069463294e-05, + "loss": 1.162, + "step": 10710 + }, + { + "epoch": 0.16169416875320522, + "grad_norm": 0.6653807536386039, + "learning_rate": 1.886359496531042e-05, + "loss": 1.1446, + "step": 10720 + }, + { + "epoch": 0.16184500286584813, + "grad_norm": 0.6789566951645387, + "learning_rate": 1.8861377830490786e-05, + "loss": 1.1597, + "step": 10730 + }, + { + "epoch": 0.16199583697849104, + "grad_norm": 0.7223302589956966, + "learning_rate": 1.8859158665512342e-05, + "loss": 1.1275, + "step": 10740 + }, + { + "epoch": 0.16214667109113398, + "grad_norm": 0.7087665172314104, + "learning_rate": 1.88569374708835e-05, + "loss": 1.1554, + "step": 10750 + }, + { + "epoch": 0.1622975052037769, + "grad_norm": 0.7313559820953467, + "learning_rate": 1.8854714247113144e-05, + "loss": 1.1386, + "step": 10760 + }, + { + "epoch": 0.1624483393164198, + "grad_norm": 0.7472535508566892, + "learning_rate": 1.8852488994710617e-05, + "loss": 1.1408, + "step": 10770 + }, + { + "epoch": 0.16259917342906272, + "grad_norm": 0.7267854926639493, + "learning_rate": 1.8850261714185728e-05, + "loss": 1.1659, + "step": 10780 + }, + { + "epoch": 0.16275000754170563, + "grad_norm": 0.7085658080331158, + "learning_rate": 1.884803240604875e-05, + "loss": 1.141, + "step": 10790 + }, + { + "epoch": 0.16290084165434854, + "grad_norm": 0.6828131855378669, + "learning_rate": 1.8845801070810424e-05, + "loss": 1.1591, + "step": 10800 + }, + { + "epoch": 0.16305167576699145, + "grad_norm": 0.7316982784856245, + "learning_rate": 1.8843567708981956e-05, + "loss": 1.1409, + "step": 10810 + }, + { + "epoch": 0.1632025098796344, + "grad_norm": 0.7185946729354363, + "learning_rate": 1.884133232107501e-05, + "loss": 1.1575, + "step": 10820 + }, + { + "epoch": 0.1633533439922773, + "grad_norm": 0.7030146710442285, + "learning_rate": 1.883909490760172e-05, + "loss": 1.1304, + "step": 10830 + }, + { + "epoch": 0.1635041781049202, + "grad_norm": 0.6815594129539176, + "learning_rate": 1.8836855469074672e-05, + "loss": 1.1487, + "step": 10840 + }, + { + "epoch": 0.16365501221756312, + "grad_norm": 0.7112387627833117, + "learning_rate": 1.883461400600694e-05, + "loss": 1.121, + "step": 10850 + }, + { + "epoch": 0.16380584633020603, + "grad_norm": 0.7049815764812574, + "learning_rate": 1.883237051891204e-05, + "loss": 1.1688, + "step": 10860 + }, + { + "epoch": 0.16395668044284895, + "grad_norm": 0.7302821078836311, + "learning_rate": 1.8830125008303958e-05, + "loss": 1.1498, + "step": 10870 + }, + { + "epoch": 0.16410751455549186, + "grad_norm": 0.7414251730478946, + "learning_rate": 1.8827877474697152e-05, + "loss": 1.1608, + "step": 10880 + }, + { + "epoch": 0.1642583486681348, + "grad_norm": 0.7490303786281891, + "learning_rate": 1.882562791860653e-05, + "loss": 1.1341, + "step": 10890 + }, + { + "epoch": 0.1644091827807777, + "grad_norm": 0.7137210682672602, + "learning_rate": 1.8823376340547466e-05, + "loss": 1.1414, + "step": 10900 + }, + { + "epoch": 0.16456001689342062, + "grad_norm": 0.7196697380759874, + "learning_rate": 1.8821122741035812e-05, + "loss": 1.1409, + "step": 10910 + }, + { + "epoch": 0.16471085100606353, + "grad_norm": 0.6675395816912647, + "learning_rate": 1.8818867120587863e-05, + "loss": 1.1353, + "step": 10920 + }, + { + "epoch": 0.16486168511870644, + "grad_norm": 0.7047843053712136, + "learning_rate": 1.881660947972039e-05, + "loss": 1.1504, + "step": 10930 + }, + { + "epoch": 0.16501251923134935, + "grad_norm": 0.7088507493278665, + "learning_rate": 1.8814349818950618e-05, + "loss": 1.1387, + "step": 10940 + }, + { + "epoch": 0.16516335334399226, + "grad_norm": 0.7300463433684194, + "learning_rate": 1.8812088138796248e-05, + "loss": 1.1302, + "step": 10950 + }, + { + "epoch": 0.1653141874566352, + "grad_norm": 0.7140345242060977, + "learning_rate": 1.8809824439775433e-05, + "loss": 1.1407, + "step": 10960 + }, + { + "epoch": 0.16546502156927811, + "grad_norm": 0.7390741926103637, + "learning_rate": 1.8807558722406785e-05, + "loss": 1.1523, + "step": 10970 + }, + { + "epoch": 0.16561585568192103, + "grad_norm": 0.7251049177219989, + "learning_rate": 1.880529098720939e-05, + "loss": 1.1513, + "step": 10980 + }, + { + "epoch": 0.16576668979456394, + "grad_norm": 0.6915863890393205, + "learning_rate": 1.8803021234702784e-05, + "loss": 1.1331, + "step": 10990 + }, + { + "epoch": 0.16591752390720685, + "grad_norm": 0.7016333128459603, + "learning_rate": 1.880074946540698e-05, + "loss": 1.1517, + "step": 11000 + }, + { + "epoch": 0.16606835801984976, + "grad_norm": 0.7321660518139232, + "learning_rate": 1.879847567984244e-05, + "loss": 1.1378, + "step": 11010 + }, + { + "epoch": 0.16621919213249267, + "grad_norm": 0.6935169094432954, + "learning_rate": 1.8796199878530096e-05, + "loss": 1.151, + "step": 11020 + }, + { + "epoch": 0.1663700262451356, + "grad_norm": 0.7052352833004306, + "learning_rate": 1.8793922061991333e-05, + "loss": 1.1455, + "step": 11030 + }, + { + "epoch": 0.16652086035777852, + "grad_norm": 0.7081361148528544, + "learning_rate": 1.8791642230748007e-05, + "loss": 1.1385, + "step": 11040 + }, + { + "epoch": 0.16667169447042143, + "grad_norm": 0.7146303262419318, + "learning_rate": 1.8789360385322434e-05, + "loss": 1.1762, + "step": 11050 + }, + { + "epoch": 0.16682252858306434, + "grad_norm": 0.7237607701942101, + "learning_rate": 1.878707652623738e-05, + "loss": 1.1651, + "step": 11060 + }, + { + "epoch": 0.16697336269570726, + "grad_norm": 0.7007064958435751, + "learning_rate": 1.8784790654016093e-05, + "loss": 1.1469, + "step": 11070 + }, + { + "epoch": 0.16712419680835017, + "grad_norm": 0.7025139564823175, + "learning_rate": 1.878250276918226e-05, + "loss": 1.1518, + "step": 11080 + }, + { + "epoch": 0.16727503092099308, + "grad_norm": 0.6669060921184211, + "learning_rate": 1.878021287226005e-05, + "loss": 1.1426, + "step": 11090 + }, + { + "epoch": 0.16742586503363602, + "grad_norm": 0.7172925966737583, + "learning_rate": 1.8777920963774077e-05, + "loss": 1.1751, + "step": 11100 + }, + { + "epoch": 0.16757669914627893, + "grad_norm": 0.7231295355095084, + "learning_rate": 1.8775627044249425e-05, + "loss": 1.1487, + "step": 11110 + }, + { + "epoch": 0.16772753325892184, + "grad_norm": 0.7274142813682, + "learning_rate": 1.877333111421163e-05, + "loss": 1.1423, + "step": 11120 + }, + { + "epoch": 0.16787836737156475, + "grad_norm": 0.6737205548449494, + "learning_rate": 1.87710331741867e-05, + "loss": 1.1534, + "step": 11130 + }, + { + "epoch": 0.16802920148420766, + "grad_norm": 0.6738635815852947, + "learning_rate": 1.8768733224701092e-05, + "loss": 1.1377, + "step": 11140 + }, + { + "epoch": 0.16818003559685057, + "grad_norm": 0.6697110815136748, + "learning_rate": 1.8766431266281733e-05, + "loss": 1.1699, + "step": 11150 + }, + { + "epoch": 0.16833086970949349, + "grad_norm": 0.7213773030224506, + "learning_rate": 1.8764127299456002e-05, + "loss": 1.1538, + "step": 11160 + }, + { + "epoch": 0.16848170382213642, + "grad_norm": 0.6766260028811965, + "learning_rate": 1.876182132475175e-05, + "loss": 1.1284, + "step": 11170 + }, + { + "epoch": 0.16863253793477934, + "grad_norm": 0.6759125877256534, + "learning_rate": 1.875951334269727e-05, + "loss": 1.1376, + "step": 11180 + }, + { + "epoch": 0.16878337204742225, + "grad_norm": 0.6800214046295562, + "learning_rate": 1.8757203353821338e-05, + "loss": 1.1498, + "step": 11190 + }, + { + "epoch": 0.16893420616006516, + "grad_norm": 0.6958084273263708, + "learning_rate": 1.875489135865316e-05, + "loss": 1.1285, + "step": 11200 + }, + { + "epoch": 0.16908504027270807, + "grad_norm": 0.7252547865034261, + "learning_rate": 1.875257735772243e-05, + "loss": 1.1328, + "step": 11210 + }, + { + "epoch": 0.16923587438535098, + "grad_norm": 0.7964476486990195, + "learning_rate": 1.8750261351559292e-05, + "loss": 1.1649, + "step": 11220 + }, + { + "epoch": 0.1693867084979939, + "grad_norm": 0.7224954284462283, + "learning_rate": 1.8747943340694342e-05, + "loss": 1.139, + "step": 11230 + }, + { + "epoch": 0.16953754261063683, + "grad_norm": 0.7240388071676396, + "learning_rate": 1.8745623325658644e-05, + "loss": 1.1727, + "step": 11240 + }, + { + "epoch": 0.16968837672327974, + "grad_norm": 0.7466026468487267, + "learning_rate": 1.8743301306983714e-05, + "loss": 1.1465, + "step": 11250 + }, + { + "epoch": 0.16983921083592265, + "grad_norm": 0.7063319203489826, + "learning_rate": 1.8740977285201532e-05, + "loss": 1.127, + "step": 11260 + }, + { + "epoch": 0.16999004494856557, + "grad_norm": 0.7067593815357751, + "learning_rate": 1.873865126084454e-05, + "loss": 1.1377, + "step": 11270 + }, + { + "epoch": 0.17014087906120848, + "grad_norm": 0.7462487355917177, + "learning_rate": 1.873632323444563e-05, + "loss": 1.1378, + "step": 11280 + }, + { + "epoch": 0.1702917131738514, + "grad_norm": 0.7750828837301408, + "learning_rate": 1.873399320653816e-05, + "loss": 1.1488, + "step": 11290 + }, + { + "epoch": 0.1704425472864943, + "grad_norm": 0.6892708412745916, + "learning_rate": 1.873166117765594e-05, + "loss": 1.1469, + "step": 11300 + }, + { + "epoch": 0.17059338139913724, + "grad_norm": 0.7131567843312023, + "learning_rate": 1.872932714833325e-05, + "loss": 1.132, + "step": 11310 + }, + { + "epoch": 0.17074421551178015, + "grad_norm": 0.6817920364479637, + "learning_rate": 1.8726991119104814e-05, + "loss": 1.1287, + "step": 11320 + }, + { + "epoch": 0.17089504962442306, + "grad_norm": 0.6950383416906021, + "learning_rate": 1.872465309050582e-05, + "loss": 1.1236, + "step": 11330 + }, + { + "epoch": 0.17104588373706597, + "grad_norm": 0.6886441342418951, + "learning_rate": 1.872231306307192e-05, + "loss": 1.1414, + "step": 11340 + }, + { + "epoch": 0.17119671784970888, + "grad_norm": 0.6805622290008957, + "learning_rate": 1.871997103733922e-05, + "loss": 1.1298, + "step": 11350 + }, + { + "epoch": 0.1713475519623518, + "grad_norm": 0.7324375363384341, + "learning_rate": 1.8717627013844275e-05, + "loss": 1.1517, + "step": 11360 + }, + { + "epoch": 0.1714983860749947, + "grad_norm": 0.6937902444225248, + "learning_rate": 1.871528099312411e-05, + "loss": 1.1452, + "step": 11370 + }, + { + "epoch": 0.17164922018763765, + "grad_norm": 0.6934544490750081, + "learning_rate": 1.87129329757162e-05, + "loss": 1.1263, + "step": 11380 + }, + { + "epoch": 0.17180005430028056, + "grad_norm": 0.6978621892109649, + "learning_rate": 1.8710582962158485e-05, + "loss": 1.1259, + "step": 11390 + }, + { + "epoch": 0.17195088841292347, + "grad_norm": 0.7307065458936725, + "learning_rate": 1.8708230952989356e-05, + "loss": 1.1646, + "step": 11400 + }, + { + "epoch": 0.17210172252556638, + "grad_norm": 0.6955546110475004, + "learning_rate": 1.8705876948747663e-05, + "loss": 1.1388, + "step": 11410 + }, + { + "epoch": 0.1722525566382093, + "grad_norm": 0.6894489921387011, + "learning_rate": 1.8703520949972712e-05, + "loss": 1.1451, + "step": 11420 + }, + { + "epoch": 0.1724033907508522, + "grad_norm": 0.706824104748863, + "learning_rate": 1.8701162957204265e-05, + "loss": 1.1402, + "step": 11430 + }, + { + "epoch": 0.17255422486349511, + "grad_norm": 0.6935062273379664, + "learning_rate": 1.8698802970982545e-05, + "loss": 1.1326, + "step": 11440 + }, + { + "epoch": 0.17270505897613805, + "grad_norm": 0.7107771438058624, + "learning_rate": 1.8696440991848228e-05, + "loss": 1.1331, + "step": 11450 + }, + { + "epoch": 0.17285589308878097, + "grad_norm": 0.7134663161499031, + "learning_rate": 1.869407702034245e-05, + "loss": 1.1249, + "step": 11460 + }, + { + "epoch": 0.17300672720142388, + "grad_norm": 0.792195280878197, + "learning_rate": 1.86917110570068e-05, + "loss": 1.1617, + "step": 11470 + }, + { + "epoch": 0.1731575613140668, + "grad_norm": 0.681815750093441, + "learning_rate": 1.8689343102383327e-05, + "loss": 1.153, + "step": 11480 + }, + { + "epoch": 0.1733083954267097, + "grad_norm": 0.6959232727285589, + "learning_rate": 1.8686973157014533e-05, + "loss": 1.1477, + "step": 11490 + }, + { + "epoch": 0.1734592295393526, + "grad_norm": 0.6993195801589731, + "learning_rate": 1.8684601221443374e-05, + "loss": 1.1331, + "step": 11500 + }, + { + "epoch": 0.17361006365199552, + "grad_norm": 0.6663025565719527, + "learning_rate": 1.868222729621327e-05, + "loss": 1.1345, + "step": 11510 + }, + { + "epoch": 0.17376089776463846, + "grad_norm": 0.7289991631845055, + "learning_rate": 1.8679851381868087e-05, + "loss": 1.1274, + "step": 11520 + }, + { + "epoch": 0.17391173187728137, + "grad_norm": 0.7203349609795747, + "learning_rate": 1.8677473478952152e-05, + "loss": 1.1333, + "step": 11530 + }, + { + "epoch": 0.17406256598992428, + "grad_norm": 0.7325516864070929, + "learning_rate": 1.8675093588010253e-05, + "loss": 1.129, + "step": 11540 + }, + { + "epoch": 0.1742134001025672, + "grad_norm": 0.7032676446234007, + "learning_rate": 1.867271170958762e-05, + "loss": 1.1531, + "step": 11550 + }, + { + "epoch": 0.1743642342152101, + "grad_norm": 0.8278168377431049, + "learning_rate": 1.8670327844229953e-05, + "loss": 1.1214, + "step": 11560 + }, + { + "epoch": 0.17451506832785302, + "grad_norm": 0.74163409873251, + "learning_rate": 1.8667941992483395e-05, + "loss": 1.1404, + "step": 11570 + }, + { + "epoch": 0.17466590244049593, + "grad_norm": 0.6722956396142516, + "learning_rate": 1.866555415489455e-05, + "loss": 1.1419, + "step": 11580 + }, + { + "epoch": 0.17481673655313887, + "grad_norm": 0.7906893076085106, + "learning_rate": 1.866316433201048e-05, + "loss": 1.156, + "step": 11590 + }, + { + "epoch": 0.17496757066578178, + "grad_norm": 0.6992610259855967, + "learning_rate": 1.8660772524378693e-05, + "loss": 1.1361, + "step": 11600 + }, + { + "epoch": 0.1751184047784247, + "grad_norm": 0.6367007804872112, + "learning_rate": 1.8658378732547158e-05, + "loss": 1.1595, + "step": 11610 + }, + { + "epoch": 0.1752692388910676, + "grad_norm": 0.7414724771168911, + "learning_rate": 1.86559829570643e-05, + "loss": 1.1464, + "step": 11620 + }, + { + "epoch": 0.1754200730037105, + "grad_norm": 0.6850688421434288, + "learning_rate": 1.8653585198478996e-05, + "loss": 1.1389, + "step": 11630 + }, + { + "epoch": 0.17557090711635343, + "grad_norm": 0.6806334161586931, + "learning_rate": 1.865118545734057e-05, + "loss": 1.1312, + "step": 11640 + }, + { + "epoch": 0.17572174122899634, + "grad_norm": 0.7249915809945029, + "learning_rate": 1.8648783734198818e-05, + "loss": 1.1534, + "step": 11650 + }, + { + "epoch": 0.17587257534163928, + "grad_norm": 0.7172861414050379, + "learning_rate": 1.864638002960397e-05, + "loss": 1.1293, + "step": 11660 + }, + { + "epoch": 0.1760234094542822, + "grad_norm": 0.7322609775628944, + "learning_rate": 1.8643974344106727e-05, + "loss": 1.1412, + "step": 11670 + }, + { + "epoch": 0.1761742435669251, + "grad_norm": 0.76278552688628, + "learning_rate": 1.8641566678258232e-05, + "loss": 1.1509, + "step": 11680 + }, + { + "epoch": 0.176325077679568, + "grad_norm": 0.7052568522928484, + "learning_rate": 1.8639157032610086e-05, + "loss": 1.1446, + "step": 11690 + }, + { + "epoch": 0.17647591179221092, + "grad_norm": 0.7180530205359456, + "learning_rate": 1.8636745407714343e-05, + "loss": 1.1437, + "step": 11700 + }, + { + "epoch": 0.17662674590485383, + "grad_norm": 0.6964432708760545, + "learning_rate": 1.8634331804123512e-05, + "loss": 1.1184, + "step": 11710 + }, + { + "epoch": 0.17677758001749674, + "grad_norm": 0.7237104674251096, + "learning_rate": 1.8631916222390554e-05, + "loss": 1.1395, + "step": 11720 + }, + { + "epoch": 0.17692841413013968, + "grad_norm": 0.6693676718316235, + "learning_rate": 1.8629498663068887e-05, + "loss": 1.1343, + "step": 11730 + }, + { + "epoch": 0.1770792482427826, + "grad_norm": 0.7034521014975006, + "learning_rate": 1.8627079126712367e-05, + "loss": 1.124, + "step": 11740 + }, + { + "epoch": 0.1772300823554255, + "grad_norm": 0.6906185027695817, + "learning_rate": 1.862465761387533e-05, + "loss": 1.143, + "step": 11750 + }, + { + "epoch": 0.17738091646806842, + "grad_norm": 0.7089710204477996, + "learning_rate": 1.862223412511254e-05, + "loss": 1.1236, + "step": 11760 + }, + { + "epoch": 0.17753175058071133, + "grad_norm": 0.7000586366759262, + "learning_rate": 1.8619808660979222e-05, + "loss": 1.1285, + "step": 11770 + }, + { + "epoch": 0.17768258469335424, + "grad_norm": 0.7083347822146864, + "learning_rate": 1.8617381222031063e-05, + "loss": 1.1266, + "step": 11780 + }, + { + "epoch": 0.17783341880599715, + "grad_norm": 0.6760381894906194, + "learning_rate": 1.8614951808824185e-05, + "loss": 1.1339, + "step": 11790 + }, + { + "epoch": 0.1779842529186401, + "grad_norm": 0.7124606792764755, + "learning_rate": 1.8612520421915175e-05, + "loss": 1.1411, + "step": 11800 + }, + { + "epoch": 0.178135087031283, + "grad_norm": 0.6611594770847617, + "learning_rate": 1.8610087061861067e-05, + "loss": 1.1515, + "step": 11810 + }, + { + "epoch": 0.1782859211439259, + "grad_norm": 0.6946362563634354, + "learning_rate": 1.8607651729219352e-05, + "loss": 1.1128, + "step": 11820 + }, + { + "epoch": 0.17843675525656882, + "grad_norm": 0.7179187077874, + "learning_rate": 1.8605214424547968e-05, + "loss": 1.1376, + "step": 11830 + }, + { + "epoch": 0.17858758936921174, + "grad_norm": 0.7958932099804359, + "learning_rate": 1.86027751484053e-05, + "loss": 1.126, + "step": 11840 + }, + { + "epoch": 0.17873842348185465, + "grad_norm": 0.6963588139297575, + "learning_rate": 1.86003339013502e-05, + "loss": 1.124, + "step": 11850 + }, + { + "epoch": 0.17888925759449756, + "grad_norm": 0.6924000618590286, + "learning_rate": 1.8597890683941957e-05, + "loss": 1.1262, + "step": 11860 + }, + { + "epoch": 0.1790400917071405, + "grad_norm": 0.723582489171851, + "learning_rate": 1.859544549674032e-05, + "loss": 1.1466, + "step": 11870 + }, + { + "epoch": 0.1791909258197834, + "grad_norm": 0.7016879431522105, + "learning_rate": 1.8592998340305482e-05, + "loss": 1.1211, + "step": 11880 + }, + { + "epoch": 0.17934175993242632, + "grad_norm": 0.6756767311949402, + "learning_rate": 1.8590549215198094e-05, + "loss": 1.1341, + "step": 11890 + }, + { + "epoch": 0.17949259404506923, + "grad_norm": 0.6790583611009955, + "learning_rate": 1.8588098121979257e-05, + "loss": 1.1315, + "step": 11900 + }, + { + "epoch": 0.17964342815771214, + "grad_norm": 0.7064102609271506, + "learning_rate": 1.8585645061210518e-05, + "loss": 1.1424, + "step": 11910 + }, + { + "epoch": 0.17979426227035505, + "grad_norm": 0.6895317030017645, + "learning_rate": 1.858319003345388e-05, + "loss": 1.1343, + "step": 11920 + }, + { + "epoch": 0.17994509638299797, + "grad_norm": 0.6715516501169626, + "learning_rate": 1.858073303927179e-05, + "loss": 1.1332, + "step": 11930 + }, + { + "epoch": 0.1800959304956409, + "grad_norm": 0.7022922862341832, + "learning_rate": 1.857827407922716e-05, + "loss": 1.1327, + "step": 11940 + }, + { + "epoch": 0.18024676460828382, + "grad_norm": 0.7213169047782166, + "learning_rate": 1.8575813153883333e-05, + "loss": 1.128, + "step": 11950 + }, + { + "epoch": 0.18039759872092673, + "grad_norm": 0.6975143137827594, + "learning_rate": 1.8573350263804116e-05, + "loss": 1.1252, + "step": 11960 + }, + { + "epoch": 0.18054843283356964, + "grad_norm": 0.6917466095172239, + "learning_rate": 1.857088540955376e-05, + "loss": 1.1119, + "step": 11970 + }, + { + "epoch": 0.18069926694621255, + "grad_norm": 0.695247188149585, + "learning_rate": 1.8568418591696972e-05, + "loss": 1.146, + "step": 11980 + }, + { + "epoch": 0.18085010105885546, + "grad_norm": 0.6898722754723089, + "learning_rate": 1.85659498107989e-05, + "loss": 1.1271, + "step": 11990 + }, + { + "epoch": 0.18100093517149837, + "grad_norm": 0.6954463284499152, + "learning_rate": 1.8563479067425146e-05, + "loss": 1.1299, + "step": 12000 + }, + { + "epoch": 0.1811517692841413, + "grad_norm": 0.6729436186370567, + "learning_rate": 1.8561006362141768e-05, + "loss": 1.1445, + "step": 12010 + }, + { + "epoch": 0.18130260339678422, + "grad_norm": 0.7072410281278466, + "learning_rate": 1.855853169551526e-05, + "loss": 1.146, + "step": 12020 + }, + { + "epoch": 0.18145343750942713, + "grad_norm": 0.6458457210218185, + "learning_rate": 1.8556055068112576e-05, + "loss": 1.1197, + "step": 12030 + }, + { + "epoch": 0.18160427162207005, + "grad_norm": 0.6880009899901076, + "learning_rate": 1.8553576480501122e-05, + "loss": 1.1338, + "step": 12040 + }, + { + "epoch": 0.18175510573471296, + "grad_norm": 0.7097412473948749, + "learning_rate": 1.855109593324874e-05, + "loss": 1.1438, + "step": 12050 + }, + { + "epoch": 0.18190593984735587, + "grad_norm": 0.664413029905543, + "learning_rate": 1.854861342692373e-05, + "loss": 1.1435, + "step": 12060 + }, + { + "epoch": 0.18205677395999878, + "grad_norm": 3.677159794499236, + "learning_rate": 1.854612896209484e-05, + "loss": 1.1332, + "step": 12070 + }, + { + "epoch": 0.18220760807264172, + "grad_norm": 0.7579402104843769, + "learning_rate": 1.8543642539331263e-05, + "loss": 1.1433, + "step": 12080 + }, + { + "epoch": 0.18235844218528463, + "grad_norm": 0.7290118902690076, + "learning_rate": 1.8541154159202647e-05, + "loss": 1.1326, + "step": 12090 + }, + { + "epoch": 0.18250927629792754, + "grad_norm": 0.6927537418337405, + "learning_rate": 1.853866382227908e-05, + "loss": 1.147, + "step": 12100 + }, + { + "epoch": 0.18266011041057045, + "grad_norm": 0.6817346325570711, + "learning_rate": 1.8536171529131112e-05, + "loss": 1.1185, + "step": 12110 + }, + { + "epoch": 0.18281094452321336, + "grad_norm": 0.7162943704803846, + "learning_rate": 1.853367728032972e-05, + "loss": 1.1484, + "step": 12120 + }, + { + "epoch": 0.18296177863585628, + "grad_norm": 0.6859126500046775, + "learning_rate": 1.853118107644635e-05, + "loss": 1.1269, + "step": 12130 + }, + { + "epoch": 0.1831126127484992, + "grad_norm": 0.7229628480757891, + "learning_rate": 1.8528682918052884e-05, + "loss": 1.1298, + "step": 12140 + }, + { + "epoch": 0.18326344686114213, + "grad_norm": 0.7666183218742261, + "learning_rate": 1.8526182805721656e-05, + "loss": 1.1401, + "step": 12150 + }, + { + "epoch": 0.18341428097378504, + "grad_norm": 0.7070950866500478, + "learning_rate": 1.8523680740025448e-05, + "loss": 1.1497, + "step": 12160 + }, + { + "epoch": 0.18356511508642795, + "grad_norm": 0.6902095378431503, + "learning_rate": 1.852117672153748e-05, + "loss": 1.1458, + "step": 12170 + }, + { + "epoch": 0.18371594919907086, + "grad_norm": 0.7282174055333527, + "learning_rate": 1.851867075083144e-05, + "loss": 1.1511, + "step": 12180 + }, + { + "epoch": 0.18386678331171377, + "grad_norm": 0.7587439523802539, + "learning_rate": 1.8516162828481442e-05, + "loss": 1.1333, + "step": 12190 + }, + { + "epoch": 0.18401761742435668, + "grad_norm": 0.7090202436917232, + "learning_rate": 1.851365295506206e-05, + "loss": 1.1262, + "step": 12200 + }, + { + "epoch": 0.1841684515369996, + "grad_norm": 0.7084807726541713, + "learning_rate": 1.8511141131148305e-05, + "loss": 1.1509, + "step": 12210 + }, + { + "epoch": 0.18431928564964253, + "grad_norm": 0.6732100214891191, + "learning_rate": 1.8508627357315646e-05, + "loss": 1.1441, + "step": 12220 + }, + { + "epoch": 0.18447011976228544, + "grad_norm": 0.7045360644186814, + "learning_rate": 1.8506111634139997e-05, + "loss": 1.157, + "step": 12230 + }, + { + "epoch": 0.18462095387492836, + "grad_norm": 0.7182047164262373, + "learning_rate": 1.8503593962197703e-05, + "loss": 1.1404, + "step": 12240 + }, + { + "epoch": 0.18477178798757127, + "grad_norm": 0.711917037513882, + "learning_rate": 1.850107434206558e-05, + "loss": 1.1365, + "step": 12250 + }, + { + "epoch": 0.18492262210021418, + "grad_norm": 0.6900919454778266, + "learning_rate": 1.849855277432087e-05, + "loss": 1.1398, + "step": 12260 + }, + { + "epoch": 0.1850734562128571, + "grad_norm": 0.686851393362948, + "learning_rate": 1.8496029259541273e-05, + "loss": 1.1254, + "step": 12270 + }, + { + "epoch": 0.1852242903255, + "grad_norm": 0.6694445075991758, + "learning_rate": 1.8493503798304927e-05, + "loss": 1.1299, + "step": 12280 + }, + { + "epoch": 0.18537512443814294, + "grad_norm": 0.7092813194397192, + "learning_rate": 1.8490976391190427e-05, + "loss": 1.1101, + "step": 12290 + }, + { + "epoch": 0.18552595855078585, + "grad_norm": 0.6886600166676624, + "learning_rate": 1.84884470387768e-05, + "loss": 1.1229, + "step": 12300 + }, + { + "epoch": 0.18567679266342876, + "grad_norm": 0.6984384577672301, + "learning_rate": 1.848591574164353e-05, + "loss": 1.1143, + "step": 12310 + }, + { + "epoch": 0.18582762677607167, + "grad_norm": 0.7912304287690568, + "learning_rate": 1.848338250037054e-05, + "loss": 1.1192, + "step": 12320 + }, + { + "epoch": 0.1859784608887146, + "grad_norm": 0.7061972031211088, + "learning_rate": 1.84808473155382e-05, + "loss": 1.1486, + "step": 12330 + }, + { + "epoch": 0.1861292950013575, + "grad_norm": 0.6884815270467702, + "learning_rate": 1.8478310187727326e-05, + "loss": 1.1344, + "step": 12340 + }, + { + "epoch": 0.1862801291140004, + "grad_norm": 0.699225625288545, + "learning_rate": 1.8475771117519185e-05, + "loss": 1.1296, + "step": 12350 + }, + { + "epoch": 0.18643096322664335, + "grad_norm": 0.6606483231536348, + "learning_rate": 1.847323010549547e-05, + "loss": 1.1411, + "step": 12360 + }, + { + "epoch": 0.18658179733928626, + "grad_norm": 0.7168132744860471, + "learning_rate": 1.8470687152238343e-05, + "loss": 1.1157, + "step": 12370 + }, + { + "epoch": 0.18673263145192917, + "grad_norm": 0.7191797555277687, + "learning_rate": 1.8468142258330394e-05, + "loss": 1.1187, + "step": 12380 + }, + { + "epoch": 0.18688346556457208, + "grad_norm": 0.6968644909807438, + "learning_rate": 1.8465595424354665e-05, + "loss": 1.1391, + "step": 12390 + }, + { + "epoch": 0.187034299677215, + "grad_norm": 0.7068087497647, + "learning_rate": 1.846304665089464e-05, + "loss": 1.1331, + "step": 12400 + }, + { + "epoch": 0.1871851337898579, + "grad_norm": 0.7611369500668416, + "learning_rate": 1.8460495938534247e-05, + "loss": 1.1071, + "step": 12410 + }, + { + "epoch": 0.18733596790250082, + "grad_norm": 0.6848395200597107, + "learning_rate": 1.8457943287857865e-05, + "loss": 1.1617, + "step": 12420 + }, + { + "epoch": 0.18748680201514376, + "grad_norm": 0.7040703267813405, + "learning_rate": 1.8455388699450304e-05, + "loss": 1.1187, + "step": 12430 + }, + { + "epoch": 0.18763763612778667, + "grad_norm": 0.7062718670195195, + "learning_rate": 1.8452832173896828e-05, + "loss": 1.1158, + "step": 12440 + }, + { + "epoch": 0.18778847024042958, + "grad_norm": 0.7013939164379771, + "learning_rate": 1.845027371178314e-05, + "loss": 1.1412, + "step": 12450 + }, + { + "epoch": 0.1879393043530725, + "grad_norm": 0.6760579591137814, + "learning_rate": 1.844771331369539e-05, + "loss": 1.1332, + "step": 12460 + }, + { + "epoch": 0.1880901384657154, + "grad_norm": 0.6972739723908954, + "learning_rate": 1.844515098022017e-05, + "loss": 1.1281, + "step": 12470 + }, + { + "epoch": 0.1882409725783583, + "grad_norm": 0.6791979946191236, + "learning_rate": 1.844258671194452e-05, + "loss": 1.1278, + "step": 12480 + }, + { + "epoch": 0.18839180669100122, + "grad_norm": 0.703838733599386, + "learning_rate": 1.8440020509455916e-05, + "loss": 1.1232, + "step": 12490 + }, + { + "epoch": 0.18854264080364416, + "grad_norm": 0.7058911869244318, + "learning_rate": 1.8437452373342277e-05, + "loss": 1.1443, + "step": 12500 + }, + { + "epoch": 0.18869347491628707, + "grad_norm": 0.6865942290419431, + "learning_rate": 1.8434882304191967e-05, + "loss": 1.1497, + "step": 12510 + }, + { + "epoch": 0.18884430902892999, + "grad_norm": 0.669694211352191, + "learning_rate": 1.84323103025938e-05, + "loss": 1.1228, + "step": 12520 + }, + { + "epoch": 0.1889951431415729, + "grad_norm": 0.7228916775916501, + "learning_rate": 1.8429736369137022e-05, + "loss": 1.1174, + "step": 12530 + }, + { + "epoch": 0.1891459772542158, + "grad_norm": 0.693732706577696, + "learning_rate": 1.8427160504411325e-05, + "loss": 1.1513, + "step": 12540 + }, + { + "epoch": 0.18929681136685872, + "grad_norm": 0.6806447203150313, + "learning_rate": 1.842458270900685e-05, + "loss": 1.1227, + "step": 12550 + }, + { + "epoch": 0.18944764547950163, + "grad_norm": 0.6776208272267064, + "learning_rate": 1.842200298351417e-05, + "loss": 1.1448, + "step": 12560 + }, + { + "epoch": 0.18959847959214457, + "grad_norm": 0.6777784547033135, + "learning_rate": 1.841942132852431e-05, + "loss": 1.1386, + "step": 12570 + }, + { + "epoch": 0.18974931370478748, + "grad_norm": 0.6627718629041571, + "learning_rate": 1.8416837744628725e-05, + "loss": 1.1238, + "step": 12580 + }, + { + "epoch": 0.1899001478174304, + "grad_norm": 0.6667141169401853, + "learning_rate": 1.8414252232419325e-05, + "loss": 1.1303, + "step": 12590 + }, + { + "epoch": 0.1900509819300733, + "grad_norm": 0.6757882914590764, + "learning_rate": 1.8411664792488454e-05, + "loss": 1.1316, + "step": 12600 + }, + { + "epoch": 0.19020181604271622, + "grad_norm": 0.7040046827360492, + "learning_rate": 1.84090754254289e-05, + "loss": 1.1274, + "step": 12610 + }, + { + "epoch": 0.19035265015535913, + "grad_norm": 0.729728767126885, + "learning_rate": 1.840648413183389e-05, + "loss": 1.1556, + "step": 12620 + }, + { + "epoch": 0.19050348426800204, + "grad_norm": 0.6655035858076608, + "learning_rate": 1.8403890912297094e-05, + "loss": 1.1291, + "step": 12630 + }, + { + "epoch": 0.19065431838064498, + "grad_norm": 0.7371450163772761, + "learning_rate": 1.840129576741263e-05, + "loss": 1.1193, + "step": 12640 + }, + { + "epoch": 0.1908051524932879, + "grad_norm": 0.7743359128593528, + "learning_rate": 1.8398698697775047e-05, + "loss": 1.1276, + "step": 12650 + }, + { + "epoch": 0.1909559866059308, + "grad_norm": 0.7534008122082648, + "learning_rate": 1.839609970397933e-05, + "loss": 1.1192, + "step": 12660 + }, + { + "epoch": 0.1911068207185737, + "grad_norm": 0.8395879565615082, + "learning_rate": 1.8393498786620928e-05, + "loss": 1.1376, + "step": 12670 + }, + { + "epoch": 0.19125765483121662, + "grad_norm": 0.7113643434271525, + "learning_rate": 1.8390895946295712e-05, + "loss": 1.1124, + "step": 12680 + }, + { + "epoch": 0.19140848894385953, + "grad_norm": 0.7102754463207457, + "learning_rate": 1.838829118359999e-05, + "loss": 1.1407, + "step": 12690 + }, + { + "epoch": 0.19155932305650245, + "grad_norm": 0.7716429381518779, + "learning_rate": 1.8385684499130524e-05, + "loss": 1.1154, + "step": 12700 + }, + { + "epoch": 0.19171015716914538, + "grad_norm": 0.6759878918707201, + "learning_rate": 1.838307589348451e-05, + "loss": 1.1199, + "step": 12710 + }, + { + "epoch": 0.1918609912817883, + "grad_norm": 0.6979147410400274, + "learning_rate": 1.8380465367259587e-05, + "loss": 1.1167, + "step": 12720 + }, + { + "epoch": 0.1920118253944312, + "grad_norm": 0.656157220747405, + "learning_rate": 1.8377852921053826e-05, + "loss": 1.117, + "step": 12730 + }, + { + "epoch": 0.19216265950707412, + "grad_norm": 0.7122041129684212, + "learning_rate": 1.8375238555465752e-05, + "loss": 1.1314, + "step": 12740 + }, + { + "epoch": 0.19231349361971703, + "grad_norm": 0.694782861529624, + "learning_rate": 1.837262227109431e-05, + "loss": 1.1192, + "step": 12750 + }, + { + "epoch": 0.19246432773235994, + "grad_norm": 0.6836959644722296, + "learning_rate": 1.8370004068538904e-05, + "loss": 1.0963, + "step": 12760 + }, + { + "epoch": 0.19261516184500285, + "grad_norm": 0.666316038981398, + "learning_rate": 1.8367383948399365e-05, + "loss": 1.0961, + "step": 12770 + }, + { + "epoch": 0.1927659959576458, + "grad_norm": 0.7012461288350427, + "learning_rate": 1.8364761911275972e-05, + "loss": 1.1301, + "step": 12780 + }, + { + "epoch": 0.1929168300702887, + "grad_norm": 0.7586881169608619, + "learning_rate": 1.836213795776944e-05, + "loss": 1.1309, + "step": 12790 + }, + { + "epoch": 0.19306766418293161, + "grad_norm": 0.6754964882029466, + "learning_rate": 1.835951208848091e-05, + "loss": 1.1272, + "step": 12800 + }, + { + "epoch": 0.19321849829557453, + "grad_norm": 0.6722194810814724, + "learning_rate": 1.8356884304011987e-05, + "loss": 1.1317, + "step": 12810 + }, + { + "epoch": 0.19336933240821744, + "grad_norm": 0.6674872971552519, + "learning_rate": 1.8354254604964696e-05, + "loss": 1.107, + "step": 12820 + }, + { + "epoch": 0.19352016652086035, + "grad_norm": 0.7264306388566426, + "learning_rate": 1.8351622991941503e-05, + "loss": 1.1229, + "step": 12830 + }, + { + "epoch": 0.19367100063350326, + "grad_norm": 0.6900674671378623, + "learning_rate": 1.8348989465545327e-05, + "loss": 1.125, + "step": 12840 + }, + { + "epoch": 0.1938218347461462, + "grad_norm": 0.7184245986772119, + "learning_rate": 1.8346354026379498e-05, + "loss": 1.1264, + "step": 12850 + }, + { + "epoch": 0.1939726688587891, + "grad_norm": 0.6810708356427008, + "learning_rate": 1.8343716675047812e-05, + "loss": 1.1311, + "step": 12860 + }, + { + "epoch": 0.19412350297143202, + "grad_norm": 0.7742113951686252, + "learning_rate": 1.8341077412154486e-05, + "loss": 1.113, + "step": 12870 + }, + { + "epoch": 0.19427433708407493, + "grad_norm": 0.7335158724342619, + "learning_rate": 1.8338436238304184e-05, + "loss": 1.1367, + "step": 12880 + }, + { + "epoch": 0.19442517119671784, + "grad_norm": 0.6478085514356221, + "learning_rate": 1.8335793154102e-05, + "loss": 1.1158, + "step": 12890 + }, + { + "epoch": 0.19457600530936076, + "grad_norm": 0.8099069405927026, + "learning_rate": 1.8333148160153477e-05, + "loss": 1.1454, + "step": 12900 + }, + { + "epoch": 0.19472683942200367, + "grad_norm": 0.710567116818115, + "learning_rate": 1.833050125706458e-05, + "loss": 1.1436, + "step": 12910 + }, + { + "epoch": 0.1948776735346466, + "grad_norm": 0.7113881875159195, + "learning_rate": 1.832785244544172e-05, + "loss": 1.1346, + "step": 12920 + }, + { + "epoch": 0.19502850764728952, + "grad_norm": 0.742052358570264, + "learning_rate": 1.8325201725891747e-05, + "loss": 1.1139, + "step": 12930 + }, + { + "epoch": 0.19517934175993243, + "grad_norm": 0.6488890758612343, + "learning_rate": 1.832254909902195e-05, + "loss": 1.1308, + "step": 12940 + }, + { + "epoch": 0.19533017587257534, + "grad_norm": 0.7228149416412022, + "learning_rate": 1.8319894565440043e-05, + "loss": 1.1167, + "step": 12950 + }, + { + "epoch": 0.19548100998521825, + "grad_norm": 0.6971144195047997, + "learning_rate": 1.8317238125754193e-05, + "loss": 1.1219, + "step": 12960 + }, + { + "epoch": 0.19563184409786116, + "grad_norm": 0.7211908432071787, + "learning_rate": 1.831457978057299e-05, + "loss": 1.1267, + "step": 12970 + }, + { + "epoch": 0.19578267821050407, + "grad_norm": 0.7797875643305394, + "learning_rate": 1.8311919530505468e-05, + "loss": 1.1216, + "step": 12980 + }, + { + "epoch": 0.195933512323147, + "grad_norm": 0.7185585405671944, + "learning_rate": 1.8309257376161094e-05, + "loss": 1.1178, + "step": 12990 + }, + { + "epoch": 0.19608434643578992, + "grad_norm": 0.7136402094282365, + "learning_rate": 1.8306593318149776e-05, + "loss": 1.1134, + "step": 13000 + }, + { + "epoch": 0.19623518054843284, + "grad_norm": 0.7127502655979123, + "learning_rate": 1.830392735708185e-05, + "loss": 1.1145, + "step": 13010 + }, + { + "epoch": 0.19638601466107575, + "grad_norm": 0.6721924751956533, + "learning_rate": 1.83012594935681e-05, + "loss": 1.0784, + "step": 13020 + }, + { + "epoch": 0.19653684877371866, + "grad_norm": 0.6984854713333559, + "learning_rate": 1.8298589728219725e-05, + "loss": 1.1028, + "step": 13030 + }, + { + "epoch": 0.19668768288636157, + "grad_norm": 0.6783637618457942, + "learning_rate": 1.829591806164839e-05, + "loss": 1.1119, + "step": 13040 + }, + { + "epoch": 0.19683851699900448, + "grad_norm": 0.6531462079646336, + "learning_rate": 1.8293244494466172e-05, + "loss": 1.1199, + "step": 13050 + }, + { + "epoch": 0.19698935111164742, + "grad_norm": 0.7210949888522357, + "learning_rate": 1.8290569027285587e-05, + "loss": 1.1306, + "step": 13060 + }, + { + "epoch": 0.19714018522429033, + "grad_norm": 0.7110400716559422, + "learning_rate": 1.8287891660719594e-05, + "loss": 1.1231, + "step": 13070 + }, + { + "epoch": 0.19729101933693324, + "grad_norm": 0.6704387561507323, + "learning_rate": 1.828521239538158e-05, + "loss": 1.133, + "step": 13080 + }, + { + "epoch": 0.19744185344957615, + "grad_norm": 0.6952978690403011, + "learning_rate": 1.8282531231885374e-05, + "loss": 1.115, + "step": 13090 + }, + { + "epoch": 0.19759268756221907, + "grad_norm": 0.6795643803162182, + "learning_rate": 1.8279848170845233e-05, + "loss": 1.1007, + "step": 13100 + }, + { + "epoch": 0.19774352167486198, + "grad_norm": 0.6872308350667354, + "learning_rate": 1.8277163212875847e-05, + "loss": 1.117, + "step": 13110 + }, + { + "epoch": 0.1978943557875049, + "grad_norm": 0.6945040885374333, + "learning_rate": 1.8274476358592354e-05, + "loss": 1.1151, + "step": 13120 + }, + { + "epoch": 0.19804518990014783, + "grad_norm": 0.6990711265269207, + "learning_rate": 1.827178760861031e-05, + "loss": 1.1131, + "step": 13130 + }, + { + "epoch": 0.19819602401279074, + "grad_norm": 0.6621825204877276, + "learning_rate": 1.826909696354572e-05, + "loss": 1.1028, + "step": 13140 + }, + { + "epoch": 0.19834685812543365, + "grad_norm": 0.6829111596779546, + "learning_rate": 1.826640442401501e-05, + "loss": 1.1273, + "step": 13150 + }, + { + "epoch": 0.19849769223807656, + "grad_norm": 0.6931932358154714, + "learning_rate": 1.8263709990635048e-05, + "loss": 1.1192, + "step": 13160 + }, + { + "epoch": 0.19864852635071947, + "grad_norm": 0.7987454332326249, + "learning_rate": 1.8261013664023136e-05, + "loss": 1.1384, + "step": 13170 + }, + { + "epoch": 0.19879936046336238, + "grad_norm": 0.6954233364478967, + "learning_rate": 1.8258315444797003e-05, + "loss": 1.1279, + "step": 13180 + }, + { + "epoch": 0.1989501945760053, + "grad_norm": 0.6842872248969154, + "learning_rate": 1.825561533357482e-05, + "loss": 1.1222, + "step": 13190 + }, + { + "epoch": 0.19910102868864823, + "grad_norm": 0.7446751775086962, + "learning_rate": 1.8252913330975185e-05, + "loss": 1.0971, + "step": 13200 + }, + { + "epoch": 0.19925186280129115, + "grad_norm": 0.7145105339295874, + "learning_rate": 1.825020943761713e-05, + "loss": 1.1293, + "step": 13210 + }, + { + "epoch": 0.19940269691393406, + "grad_norm": 0.6986310270143061, + "learning_rate": 1.8247503654120132e-05, + "loss": 1.1255, + "step": 13220 + }, + { + "epoch": 0.19955353102657697, + "grad_norm": 0.6511159118869585, + "learning_rate": 1.8244795981104085e-05, + "loss": 1.1289, + "step": 13230 + }, + { + "epoch": 0.19970436513921988, + "grad_norm": 0.6870817117844183, + "learning_rate": 1.824208641918932e-05, + "loss": 1.1178, + "step": 13240 + }, + { + "epoch": 0.1998551992518628, + "grad_norm": 0.6774789531551796, + "learning_rate": 1.8239374968996607e-05, + "loss": 1.1115, + "step": 13250 + }, + { + "epoch": 0.2000060333645057, + "grad_norm": 0.7016248099267838, + "learning_rate": 1.823666163114714e-05, + "loss": 1.1267, + "step": 13260 + }, + { + "epoch": 0.20015686747714864, + "grad_norm": 0.6795694268312399, + "learning_rate": 1.8233946406262552e-05, + "loss": 1.1198, + "step": 13270 + }, + { + "epoch": 0.20030770158979155, + "grad_norm": 0.6746807871994521, + "learning_rate": 1.8231229294964908e-05, + "loss": 1.115, + "step": 13280 + }, + { + "epoch": 0.20045853570243446, + "grad_norm": 0.7743305403696652, + "learning_rate": 1.8228510297876704e-05, + "loss": 1.121, + "step": 13290 + }, + { + "epoch": 0.20060936981507738, + "grad_norm": 0.7458951441673802, + "learning_rate": 1.8225789415620864e-05, + "loss": 1.1203, + "step": 13300 + }, + { + "epoch": 0.2007602039277203, + "grad_norm": 0.6737383600533127, + "learning_rate": 1.8223066648820748e-05, + "loss": 1.1249, + "step": 13310 + }, + { + "epoch": 0.2009110380403632, + "grad_norm": 0.6780775626964904, + "learning_rate": 1.822034199810015e-05, + "loss": 1.12, + "step": 13320 + }, + { + "epoch": 0.2010618721530061, + "grad_norm": 0.6783645583350876, + "learning_rate": 1.8217615464083288e-05, + "loss": 1.1151, + "step": 13330 + }, + { + "epoch": 0.20121270626564905, + "grad_norm": 0.657046524848232, + "learning_rate": 1.821488704739482e-05, + "loss": 1.1083, + "step": 13340 + }, + { + "epoch": 0.20136354037829196, + "grad_norm": 0.6772512691299946, + "learning_rate": 1.8212156748659833e-05, + "loss": 1.1013, + "step": 13350 + }, + { + "epoch": 0.20151437449093487, + "grad_norm": 0.6783212780599649, + "learning_rate": 1.8209424568503843e-05, + "loss": 1.1272, + "step": 13360 + }, + { + "epoch": 0.20166520860357778, + "grad_norm": 0.6686018911531736, + "learning_rate": 1.8206690507552793e-05, + "loss": 1.1111, + "step": 13370 + }, + { + "epoch": 0.2018160427162207, + "grad_norm": 0.714930602111522, + "learning_rate": 1.8203954566433064e-05, + "loss": 1.1091, + "step": 13380 + }, + { + "epoch": 0.2019668768288636, + "grad_norm": 0.695794508781831, + "learning_rate": 1.8201216745771468e-05, + "loss": 1.1323, + "step": 13390 + }, + { + "epoch": 0.20211771094150652, + "grad_norm": 0.740623854301095, + "learning_rate": 1.8198477046195245e-05, + "loss": 1.1204, + "step": 13400 + }, + { + "epoch": 0.20226854505414946, + "grad_norm": 0.7049912449139891, + "learning_rate": 1.819573546833206e-05, + "loss": 1.0834, + "step": 13410 + }, + { + "epoch": 0.20241937916679237, + "grad_norm": 0.6848252086767419, + "learning_rate": 1.819299201281002e-05, + "loss": 1.1213, + "step": 13420 + }, + { + "epoch": 0.20257021327943528, + "grad_norm": 0.6890469534305929, + "learning_rate": 1.8190246680257652e-05, + "loss": 1.1168, + "step": 13430 + }, + { + "epoch": 0.2027210473920782, + "grad_norm": 0.6605307688984581, + "learning_rate": 1.818749947130392e-05, + "loss": 1.1106, + "step": 13440 + }, + { + "epoch": 0.2028718815047211, + "grad_norm": 0.7093464488997065, + "learning_rate": 1.8184750386578213e-05, + "loss": 1.1237, + "step": 13450 + }, + { + "epoch": 0.203022715617364, + "grad_norm": 0.7234009870608376, + "learning_rate": 1.818199942671035e-05, + "loss": 1.1221, + "step": 13460 + }, + { + "epoch": 0.20317354973000692, + "grad_norm": 0.7597448775059668, + "learning_rate": 1.8179246592330587e-05, + "loss": 1.1126, + "step": 13470 + }, + { + "epoch": 0.20332438384264986, + "grad_norm": 0.6819832663056694, + "learning_rate": 1.81764918840696e-05, + "loss": 1.1181, + "step": 13480 + }, + { + "epoch": 0.20347521795529278, + "grad_norm": 0.7363627239720105, + "learning_rate": 1.8173735302558496e-05, + "loss": 1.1345, + "step": 13490 + }, + { + "epoch": 0.2036260520679357, + "grad_norm": 0.7085832701515208, + "learning_rate": 1.817097684842881e-05, + "loss": 1.1191, + "step": 13500 + }, + { + "epoch": 0.2037768861805786, + "grad_norm": 0.768193990680594, + "learning_rate": 1.8168216522312523e-05, + "loss": 1.1211, + "step": 13510 + }, + { + "epoch": 0.2039277202932215, + "grad_norm": 0.6606673636137438, + "learning_rate": 1.816545432484202e-05, + "loss": 1.1114, + "step": 13520 + }, + { + "epoch": 0.20407855440586442, + "grad_norm": 0.764885581986934, + "learning_rate": 1.8162690256650127e-05, + "loss": 1.1243, + "step": 13530 + }, + { + "epoch": 0.20422938851850733, + "grad_norm": 0.6994476504759588, + "learning_rate": 1.8159924318370094e-05, + "loss": 1.1086, + "step": 13540 + }, + { + "epoch": 0.20438022263115027, + "grad_norm": 0.702649809616147, + "learning_rate": 1.8157156510635614e-05, + "loss": 1.1307, + "step": 13550 + }, + { + "epoch": 0.20453105674379318, + "grad_norm": 0.6568875047025057, + "learning_rate": 1.8154386834080785e-05, + "loss": 1.1123, + "step": 13560 + }, + { + "epoch": 0.2046818908564361, + "grad_norm": 0.6528050117926801, + "learning_rate": 1.8151615289340158e-05, + "loss": 1.1149, + "step": 13570 + }, + { + "epoch": 0.204832724969079, + "grad_norm": 0.8260330642183956, + "learning_rate": 1.8148841877048685e-05, + "loss": 1.1001, + "step": 13580 + }, + { + "epoch": 0.20498355908172192, + "grad_norm": 0.712797639502007, + "learning_rate": 1.8146066597841773e-05, + "loss": 1.1014, + "step": 13590 + }, + { + "epoch": 0.20513439319436483, + "grad_norm": 0.714358642627648, + "learning_rate": 1.814328945235523e-05, + "loss": 1.1093, + "step": 13600 + }, + { + "epoch": 0.20528522730700774, + "grad_norm": 0.6628505936861528, + "learning_rate": 1.814051044122532e-05, + "loss": 1.1241, + "step": 13610 + }, + { + "epoch": 0.20543606141965068, + "grad_norm": 0.6945190946594277, + "learning_rate": 1.813772956508871e-05, + "loss": 1.0951, + "step": 13620 + }, + { + "epoch": 0.2055868955322936, + "grad_norm": 0.6589812919535326, + "learning_rate": 1.813494682458251e-05, + "loss": 1.0803, + "step": 13630 + }, + { + "epoch": 0.2057377296449365, + "grad_norm": 0.684803593923353, + "learning_rate": 1.8132162220344246e-05, + "loss": 1.0992, + "step": 13640 + }, + { + "epoch": 0.2058885637575794, + "grad_norm": 0.6856286180045482, + "learning_rate": 1.812937575301188e-05, + "loss": 1.1051, + "step": 13650 + }, + { + "epoch": 0.20603939787022232, + "grad_norm": 0.7245698364834849, + "learning_rate": 1.8126587423223795e-05, + "loss": 1.0981, + "step": 13660 + }, + { + "epoch": 0.20619023198286524, + "grad_norm": 0.6734507114944105, + "learning_rate": 1.8123797231618804e-05, + "loss": 1.114, + "step": 13670 + }, + { + "epoch": 0.20634106609550815, + "grad_norm": 0.6583387399893541, + "learning_rate": 1.8121005178836145e-05, + "loss": 1.0983, + "step": 13680 + }, + { + "epoch": 0.20649190020815109, + "grad_norm": 0.6661318310347046, + "learning_rate": 1.8118211265515483e-05, + "loss": 1.1111, + "step": 13690 + }, + { + "epoch": 0.206642734320794, + "grad_norm": 0.6699441096820292, + "learning_rate": 1.8115415492296908e-05, + "loss": 1.115, + "step": 13700 + }, + { + "epoch": 0.2067935684334369, + "grad_norm": 0.7123730679798163, + "learning_rate": 1.8112617859820936e-05, + "loss": 1.0978, + "step": 13710 + }, + { + "epoch": 0.20694440254607982, + "grad_norm": 0.6748242410108531, + "learning_rate": 1.8109818368728512e-05, + "loss": 1.1181, + "step": 13720 + }, + { + "epoch": 0.20709523665872273, + "grad_norm": 0.6669149171114855, + "learning_rate": 1.8107017019661005e-05, + "loss": 1.1095, + "step": 13730 + }, + { + "epoch": 0.20724607077136564, + "grad_norm": 0.7127243672154123, + "learning_rate": 1.810421381326021e-05, + "loss": 1.1207, + "step": 13740 + }, + { + "epoch": 0.20739690488400855, + "grad_norm": 0.6486342830296034, + "learning_rate": 1.8101408750168344e-05, + "loss": 1.1076, + "step": 13750 + }, + { + "epoch": 0.2075477389966515, + "grad_norm": 0.6583926597616069, + "learning_rate": 1.8098601831028056e-05, + "loss": 1.1132, + "step": 13760 + }, + { + "epoch": 0.2076985731092944, + "grad_norm": 0.6791809004297885, + "learning_rate": 1.8095793056482415e-05, + "loss": 1.0969, + "step": 13770 + }, + { + "epoch": 0.20784940722193732, + "grad_norm": 0.6836959633977542, + "learning_rate": 1.8092982427174912e-05, + "loss": 1.1012, + "step": 13780 + }, + { + "epoch": 0.20800024133458023, + "grad_norm": 0.7102443456939566, + "learning_rate": 1.8090169943749477e-05, + "loss": 1.1315, + "step": 13790 + }, + { + "epoch": 0.20815107544722314, + "grad_norm": 0.6868675394876558, + "learning_rate": 1.808735560685045e-05, + "loss": 1.0992, + "step": 13800 + }, + { + "epoch": 0.20830190955986605, + "grad_norm": 0.6810019140336319, + "learning_rate": 1.80845394171226e-05, + "loss": 1.1029, + "step": 13810 + }, + { + "epoch": 0.20845274367250896, + "grad_norm": 0.6862406056763503, + "learning_rate": 1.8081721375211124e-05, + "loss": 1.1103, + "step": 13820 + }, + { + "epoch": 0.2086035777851519, + "grad_norm": 0.6680654643566842, + "learning_rate": 1.807890148176164e-05, + "loss": 1.0971, + "step": 13830 + }, + { + "epoch": 0.2087544118977948, + "grad_norm": 0.72942388692847, + "learning_rate": 1.8076079737420186e-05, + "loss": 1.1297, + "step": 13840 + }, + { + "epoch": 0.20890524601043772, + "grad_norm": 0.7367654560274234, + "learning_rate": 1.807325614283324e-05, + "loss": 1.1035, + "step": 13850 + }, + { + "epoch": 0.20905608012308063, + "grad_norm": 0.6922000349430802, + "learning_rate": 1.8070430698647684e-05, + "loss": 1.1149, + "step": 13860 + }, + { + "epoch": 0.20920691423572355, + "grad_norm": 0.7662436814001445, + "learning_rate": 1.806760340551084e-05, + "loss": 1.1079, + "step": 13870 + }, + { + "epoch": 0.20935774834836646, + "grad_norm": 0.671190557615294, + "learning_rate": 1.806477426407044e-05, + "loss": 1.1047, + "step": 13880 + }, + { + "epoch": 0.20950858246100937, + "grad_norm": 0.6488845876446406, + "learning_rate": 1.8061943274974645e-05, + "loss": 1.1029, + "step": 13890 + }, + { + "epoch": 0.2096594165736523, + "grad_norm": 0.6793653808438085, + "learning_rate": 1.8059110438872048e-05, + "loss": 1.0947, + "step": 13900 + }, + { + "epoch": 0.20981025068629522, + "grad_norm": 0.6867217238359388, + "learning_rate": 1.805627575641165e-05, + "loss": 1.1002, + "step": 13910 + }, + { + "epoch": 0.20996108479893813, + "grad_norm": 0.6953727870835451, + "learning_rate": 1.8053439228242883e-05, + "loss": 1.1134, + "step": 13920 + }, + { + "epoch": 0.21011191891158104, + "grad_norm": 0.6653796759011867, + "learning_rate": 1.8050600855015602e-05, + "loss": 1.1015, + "step": 13930 + }, + { + "epoch": 0.21026275302422395, + "grad_norm": 0.67578280996323, + "learning_rate": 1.8047760637380086e-05, + "loss": 1.1169, + "step": 13940 + }, + { + "epoch": 0.21041358713686686, + "grad_norm": 0.734227137092205, + "learning_rate": 1.8044918575987032e-05, + "loss": 1.1134, + "step": 13950 + }, + { + "epoch": 0.21056442124950978, + "grad_norm": 0.7721767301273763, + "learning_rate": 1.8042074671487565e-05, + "loss": 1.1011, + "step": 13960 + }, + { + "epoch": 0.21071525536215271, + "grad_norm": 0.6818962190481834, + "learning_rate": 1.8039228924533225e-05, + "loss": 1.0957, + "step": 13970 + }, + { + "epoch": 0.21086608947479563, + "grad_norm": 0.694189434192548, + "learning_rate": 1.803638133577598e-05, + "loss": 1.0976, + "step": 13980 + }, + { + "epoch": 0.21101692358743854, + "grad_norm": 0.659527526138621, + "learning_rate": 1.8033531905868216e-05, + "loss": 1.1094, + "step": 13990 + }, + { + "epoch": 0.21116775770008145, + "grad_norm": 0.7697849023870582, + "learning_rate": 1.8030680635462744e-05, + "loss": 1.127, + "step": 14000 + }, + { + "epoch": 0.21131859181272436, + "grad_norm": 0.7056090286492708, + "learning_rate": 1.80278275252128e-05, + "loss": 1.1153, + "step": 14010 + }, + { + "epoch": 0.21146942592536727, + "grad_norm": 0.6753967862567547, + "learning_rate": 1.802497257577203e-05, + "loss": 1.1182, + "step": 14020 + }, + { + "epoch": 0.21162026003801018, + "grad_norm": 0.6808027196482743, + "learning_rate": 1.8022115787794515e-05, + "loss": 1.0987, + "step": 14030 + }, + { + "epoch": 0.21177109415065312, + "grad_norm": 0.6672355273843327, + "learning_rate": 1.8019257161934744e-05, + "loss": 1.1174, + "step": 14040 + }, + { + "epoch": 0.21192192826329603, + "grad_norm": 0.666926007046935, + "learning_rate": 1.8016396698847635e-05, + "loss": 1.1096, + "step": 14050 + }, + { + "epoch": 0.21207276237593894, + "grad_norm": 0.7031086053682446, + "learning_rate": 1.801353439918853e-05, + "loss": 1.1328, + "step": 14060 + }, + { + "epoch": 0.21222359648858186, + "grad_norm": 0.6982504801377075, + "learning_rate": 1.801067026361319e-05, + "loss": 1.1046, + "step": 14070 + }, + { + "epoch": 0.21237443060122477, + "grad_norm": 0.6762776136489627, + "learning_rate": 1.8007804292777783e-05, + "loss": 1.1167, + "step": 14080 + }, + { + "epoch": 0.21252526471386768, + "grad_norm": 0.8143203055532565, + "learning_rate": 1.8004936487338918e-05, + "loss": 1.0858, + "step": 14090 + }, + { + "epoch": 0.2126760988265106, + "grad_norm": 0.6901539852198502, + "learning_rate": 1.8002066847953613e-05, + "loss": 1.0903, + "step": 14100 + }, + { + "epoch": 0.21282693293915353, + "grad_norm": 0.674370112698135, + "learning_rate": 1.79991953752793e-05, + "loss": 1.0948, + "step": 14110 + }, + { + "epoch": 0.21297776705179644, + "grad_norm": 0.6868007971993411, + "learning_rate": 1.7996322069973852e-05, + "loss": 1.1, + "step": 14120 + }, + { + "epoch": 0.21312860116443935, + "grad_norm": 0.7237560788877101, + "learning_rate": 1.7993446932695536e-05, + "loss": 1.1057, + "step": 14130 + }, + { + "epoch": 0.21327943527708226, + "grad_norm": 0.6901430975210521, + "learning_rate": 1.799056996410306e-05, + "loss": 1.1059, + "step": 14140 + }, + { + "epoch": 0.21343026938972517, + "grad_norm": 0.6725486556534908, + "learning_rate": 1.7987691164855542e-05, + "loss": 1.0926, + "step": 14150 + }, + { + "epoch": 0.21358110350236809, + "grad_norm": 0.6773224710969874, + "learning_rate": 1.7984810535612517e-05, + "loss": 1.1123, + "step": 14160 + }, + { + "epoch": 0.213731937615011, + "grad_norm": 0.6797642565152825, + "learning_rate": 1.7981928077033948e-05, + "loss": 1.1044, + "step": 14170 + }, + { + "epoch": 0.21388277172765394, + "grad_norm": 0.7296262415146413, + "learning_rate": 1.7979043789780208e-05, + "loss": 1.1062, + "step": 14180 + }, + { + "epoch": 0.21403360584029685, + "grad_norm": 0.6763601571951274, + "learning_rate": 1.797615767451209e-05, + "loss": 1.0985, + "step": 14190 + }, + { + "epoch": 0.21418443995293976, + "grad_norm": 0.6869203267785567, + "learning_rate": 1.7973269731890813e-05, + "loss": 1.111, + "step": 14200 + }, + { + "epoch": 0.21433527406558267, + "grad_norm": 0.679173319319237, + "learning_rate": 1.7970379962578012e-05, + "loss": 1.107, + "step": 14210 + }, + { + "epoch": 0.21448610817822558, + "grad_norm": 0.716902828952182, + "learning_rate": 1.7967488367235733e-05, + "loss": 1.1111, + "step": 14220 + }, + { + "epoch": 0.2146369422908685, + "grad_norm": 0.6810343389275618, + "learning_rate": 1.796459494652645e-05, + "loss": 1.1208, + "step": 14230 + }, + { + "epoch": 0.2147877764035114, + "grad_norm": 0.7196813012367693, + "learning_rate": 1.7961699701113052e-05, + "loss": 1.1046, + "step": 14240 + }, + { + "epoch": 0.21493861051615434, + "grad_norm": 0.6776449015604403, + "learning_rate": 1.7958802631658842e-05, + "loss": 1.0966, + "step": 14250 + }, + { + "epoch": 0.21508944462879726, + "grad_norm": 0.7039399864141729, + "learning_rate": 1.7955903738827548e-05, + "loss": 1.1201, + "step": 14260 + }, + { + "epoch": 0.21524027874144017, + "grad_norm": 0.6769298138590549, + "learning_rate": 1.795300302328331e-05, + "loss": 1.1045, + "step": 14270 + }, + { + "epoch": 0.21539111285408308, + "grad_norm": 0.6933429897804078, + "learning_rate": 1.7950100485690684e-05, + "loss": 1.0989, + "step": 14280 + }, + { + "epoch": 0.215541946966726, + "grad_norm": 0.679788118428669, + "learning_rate": 1.7947196126714654e-05, + "loss": 1.1098, + "step": 14290 + }, + { + "epoch": 0.2156927810793689, + "grad_norm": 0.692150377165008, + "learning_rate": 1.7944289947020606e-05, + "loss": 1.098, + "step": 14300 + }, + { + "epoch": 0.2158436151920118, + "grad_norm": 0.7228829005965157, + "learning_rate": 1.7941381947274362e-05, + "loss": 1.1138, + "step": 14310 + }, + { + "epoch": 0.21599444930465475, + "grad_norm": 0.6660225317046663, + "learning_rate": 1.7938472128142144e-05, + "loss": 1.0864, + "step": 14320 + }, + { + "epoch": 0.21614528341729766, + "grad_norm": 0.6643853917955505, + "learning_rate": 1.7935560490290597e-05, + "loss": 1.1251, + "step": 14330 + }, + { + "epoch": 0.21629611752994057, + "grad_norm": 0.8306216646874459, + "learning_rate": 1.793264703438678e-05, + "loss": 1.1158, + "step": 14340 + }, + { + "epoch": 0.21644695164258348, + "grad_norm": 0.7151311090530558, + "learning_rate": 1.7929731761098175e-05, + "loss": 1.1013, + "step": 14350 + }, + { + "epoch": 0.2165977857552264, + "grad_norm": 0.6758916253190587, + "learning_rate": 1.7926814671092682e-05, + "loss": 1.1223, + "step": 14360 + }, + { + "epoch": 0.2167486198678693, + "grad_norm": 0.7497665880200246, + "learning_rate": 1.7923895765038602e-05, + "loss": 1.0994, + "step": 14370 + }, + { + "epoch": 0.21689945398051222, + "grad_norm": 0.6997868265570042, + "learning_rate": 1.7920975043604668e-05, + "loss": 1.1154, + "step": 14380 + }, + { + "epoch": 0.21705028809315516, + "grad_norm": 0.728656416998413, + "learning_rate": 1.7918052507460023e-05, + "loss": 1.1079, + "step": 14390 + }, + { + "epoch": 0.21720112220579807, + "grad_norm": 0.688045040274859, + "learning_rate": 1.7915128157274222e-05, + "loss": 1.1092, + "step": 14400 + }, + { + "epoch": 0.21735195631844098, + "grad_norm": 0.7474927064509022, + "learning_rate": 1.7912201993717245e-05, + "loss": 1.0938, + "step": 14410 + }, + { + "epoch": 0.2175027904310839, + "grad_norm": 0.728873347574301, + "learning_rate": 1.7909274017459474e-05, + "loss": 1.0942, + "step": 14420 + }, + { + "epoch": 0.2176536245437268, + "grad_norm": 0.6426929535763358, + "learning_rate": 1.7906344229171722e-05, + "loss": 1.1192, + "step": 14430 + }, + { + "epoch": 0.21780445865636971, + "grad_norm": 0.68848164667077, + "learning_rate": 1.79034126295252e-05, + "loss": 1.0929, + "step": 14440 + }, + { + "epoch": 0.21795529276901263, + "grad_norm": 0.6640592323980373, + "learning_rate": 1.790047921919156e-05, + "loss": 1.1147, + "step": 14450 + }, + { + "epoch": 0.21810612688165557, + "grad_norm": 0.6884032229060625, + "learning_rate": 1.789754399884283e-05, + "loss": 1.1052, + "step": 14460 + }, + { + "epoch": 0.21825696099429848, + "grad_norm": 0.709597627607459, + "learning_rate": 1.789460696915149e-05, + "loss": 1.1078, + "step": 14470 + }, + { + "epoch": 0.2184077951069414, + "grad_norm": 0.7211188102858535, + "learning_rate": 1.789166813079041e-05, + "loss": 1.1118, + "step": 14480 + }, + { + "epoch": 0.2185586292195843, + "grad_norm": 0.6690423977841052, + "learning_rate": 1.7888727484432892e-05, + "loss": 1.0983, + "step": 14490 + }, + { + "epoch": 0.2187094633322272, + "grad_norm": 0.6315866348439336, + "learning_rate": 1.788578503075264e-05, + "loss": 1.121, + "step": 14500 + }, + { + "epoch": 0.21886029744487012, + "grad_norm": 0.7648205534042971, + "learning_rate": 1.7882840770423774e-05, + "loss": 1.1305, + "step": 14510 + }, + { + "epoch": 0.21901113155751303, + "grad_norm": 0.7314453028645725, + "learning_rate": 1.7879894704120832e-05, + "loss": 1.1099, + "step": 14520 + }, + { + "epoch": 0.21916196567015597, + "grad_norm": 0.7243558051900384, + "learning_rate": 1.787694683251876e-05, + "loss": 1.1106, + "step": 14530 + }, + { + "epoch": 0.21931279978279888, + "grad_norm": 0.695868400880282, + "learning_rate": 1.787399715629293e-05, + "loss": 1.0995, + "step": 14540 + }, + { + "epoch": 0.2194636338954418, + "grad_norm": 0.7452881106997423, + "learning_rate": 1.787104567611911e-05, + "loss": 1.1077, + "step": 14550 + }, + { + "epoch": 0.2196144680080847, + "grad_norm": 0.6914031810316938, + "learning_rate": 1.786809239267349e-05, + "loss": 1.1054, + "step": 14560 + }, + { + "epoch": 0.21976530212072762, + "grad_norm": 0.6821325353918407, + "learning_rate": 1.7865137306632677e-05, + "loss": 1.0918, + "step": 14570 + }, + { + "epoch": 0.21991613623337053, + "grad_norm": 0.74036958354489, + "learning_rate": 1.7862180418673687e-05, + "loss": 1.0985, + "step": 14580 + }, + { + "epoch": 0.22006697034601344, + "grad_norm": 0.6495620258022826, + "learning_rate": 1.7859221729473943e-05, + "loss": 1.0806, + "step": 14590 + }, + { + "epoch": 0.22021780445865638, + "grad_norm": 0.7306177664285215, + "learning_rate": 1.7856261239711294e-05, + "loss": 1.0957, + "step": 14600 + }, + { + "epoch": 0.2203686385712993, + "grad_norm": 0.7175006975019631, + "learning_rate": 1.7853298950063987e-05, + "loss": 1.0938, + "step": 14610 + }, + { + "epoch": 0.2205194726839422, + "grad_norm": 0.6614229770677365, + "learning_rate": 1.7850334861210697e-05, + "loss": 1.094, + "step": 14620 + }, + { + "epoch": 0.22067030679658511, + "grad_norm": 0.6682989591423544, + "learning_rate": 1.784736897383049e-05, + "loss": 1.0848, + "step": 14630 + }, + { + "epoch": 0.22082114090922803, + "grad_norm": 0.6711539381109989, + "learning_rate": 1.784440128860287e-05, + "loss": 1.1024, + "step": 14640 + }, + { + "epoch": 0.22097197502187094, + "grad_norm": 0.7428655411756798, + "learning_rate": 1.784143180620773e-05, + "loss": 1.1186, + "step": 14650 + }, + { + "epoch": 0.22112280913451385, + "grad_norm": 0.7257757209752669, + "learning_rate": 1.783846052732539e-05, + "loss": 1.1211, + "step": 14660 + }, + { + "epoch": 0.2212736432471568, + "grad_norm": 0.6569372976886673, + "learning_rate": 1.783548745263657e-05, + "loss": 1.0993, + "step": 14670 + }, + { + "epoch": 0.2214244773597997, + "grad_norm": 0.661931494929091, + "learning_rate": 1.783251258282241e-05, + "loss": 1.109, + "step": 14680 + }, + { + "epoch": 0.2215753114724426, + "grad_norm": 0.7138476762436432, + "learning_rate": 1.7829535918564458e-05, + "loss": 1.1103, + "step": 14690 + }, + { + "epoch": 0.22172614558508552, + "grad_norm": 0.6522435336887675, + "learning_rate": 1.7826557460544677e-05, + "loss": 1.1179, + "step": 14700 + }, + { + "epoch": 0.22187697969772843, + "grad_norm": 0.708210090028287, + "learning_rate": 1.782357720944543e-05, + "loss": 1.0982, + "step": 14710 + }, + { + "epoch": 0.22202781381037134, + "grad_norm": 0.7723836588240303, + "learning_rate": 1.782059516594951e-05, + "loss": 1.0847, + "step": 14720 + }, + { + "epoch": 0.22217864792301426, + "grad_norm": 0.7192745307596424, + "learning_rate": 1.7817611330740098e-05, + "loss": 1.099, + "step": 14730 + }, + { + "epoch": 0.2223294820356572, + "grad_norm": 0.664858710386557, + "learning_rate": 1.7814625704500798e-05, + "loss": 1.0895, + "step": 14740 + }, + { + "epoch": 0.2224803161483001, + "grad_norm": 0.6815411881369254, + "learning_rate": 1.7811638287915628e-05, + "loss": 1.0963, + "step": 14750 + }, + { + "epoch": 0.22263115026094302, + "grad_norm": 0.6868737946983016, + "learning_rate": 1.7808649081669004e-05, + "loss": 1.0946, + "step": 14760 + }, + { + "epoch": 0.22278198437358593, + "grad_norm": 0.6594026911685653, + "learning_rate": 1.7805658086445767e-05, + "loss": 1.1022, + "step": 14770 + }, + { + "epoch": 0.22293281848622884, + "grad_norm": 0.6808113401395861, + "learning_rate": 1.7802665302931155e-05, + "loss": 1.0966, + "step": 14780 + }, + { + "epoch": 0.22308365259887175, + "grad_norm": 0.7170698687187315, + "learning_rate": 1.7799670731810825e-05, + "loss": 1.0938, + "step": 14790 + }, + { + "epoch": 0.22323448671151466, + "grad_norm": 0.6686136948509395, + "learning_rate": 1.7796674373770826e-05, + "loss": 1.1138, + "step": 14800 + }, + { + "epoch": 0.2233853208241576, + "grad_norm": 0.6578929188350353, + "learning_rate": 1.7793676229497644e-05, + "loss": 1.1181, + "step": 14810 + }, + { + "epoch": 0.2235361549368005, + "grad_norm": 0.670222599847885, + "learning_rate": 1.7790676299678156e-05, + "loss": 1.093, + "step": 14820 + }, + { + "epoch": 0.22368698904944342, + "grad_norm": 0.7619792170645067, + "learning_rate": 1.7787674584999647e-05, + "loss": 1.1109, + "step": 14830 + }, + { + "epoch": 0.22383782316208634, + "grad_norm": 0.6640447954370773, + "learning_rate": 1.778467108614982e-05, + "loss": 1.1144, + "step": 14840 + }, + { + "epoch": 0.22398865727472925, + "grad_norm": 0.7310867854975569, + "learning_rate": 1.778166580381678e-05, + "loss": 1.1136, + "step": 14850 + }, + { + "epoch": 0.22413949138737216, + "grad_norm": 0.664184176320297, + "learning_rate": 1.7778658738689046e-05, + "loss": 1.093, + "step": 14860 + }, + { + "epoch": 0.22429032550001507, + "grad_norm": 0.7301719110114934, + "learning_rate": 1.777564989145554e-05, + "loss": 1.091, + "step": 14870 + }, + { + "epoch": 0.224441159612658, + "grad_norm": 0.6673073855124689, + "learning_rate": 1.7772639262805592e-05, + "loss": 1.1006, + "step": 14880 + }, + { + "epoch": 0.22459199372530092, + "grad_norm": 0.7193893238171307, + "learning_rate": 1.776962685342895e-05, + "loss": 1.0846, + "step": 14890 + }, + { + "epoch": 0.22474282783794383, + "grad_norm": 0.798072714759334, + "learning_rate": 1.7766612664015758e-05, + "loss": 1.0848, + "step": 14900 + }, + { + "epoch": 0.22489366195058674, + "grad_norm": 0.67028548772799, + "learning_rate": 1.7763596695256572e-05, + "loss": 1.0872, + "step": 14910 + }, + { + "epoch": 0.22504449606322965, + "grad_norm": 0.6950351957033659, + "learning_rate": 1.776057894784236e-05, + "loss": 1.1138, + "step": 14920 + }, + { + "epoch": 0.22519533017587257, + "grad_norm": 0.6903639569046915, + "learning_rate": 1.7757559422464488e-05, + "loss": 1.1037, + "step": 14930 + }, + { + "epoch": 0.22534616428851548, + "grad_norm": 0.6667504549478803, + "learning_rate": 1.7754538119814743e-05, + "loss": 1.0814, + "step": 14940 + }, + { + "epoch": 0.22549699840115842, + "grad_norm": 0.6911971230276427, + "learning_rate": 1.7751515040585305e-05, + "loss": 1.1119, + "step": 14950 + }, + { + "epoch": 0.22564783251380133, + "grad_norm": 0.7661580582614036, + "learning_rate": 1.7748490185468767e-05, + "loss": 1.094, + "step": 14960 + }, + { + "epoch": 0.22579866662644424, + "grad_norm": 0.6925397318584574, + "learning_rate": 1.7745463555158137e-05, + "loss": 1.101, + "step": 14970 + }, + { + "epoch": 0.22594950073908715, + "grad_norm": 0.7072289962440089, + "learning_rate": 1.774243515034681e-05, + "loss": 1.0886, + "step": 14980 + }, + { + "epoch": 0.22610033485173006, + "grad_norm": 0.6751344117794871, + "learning_rate": 1.773940497172861e-05, + "loss": 1.1003, + "step": 14990 + }, + { + "epoch": 0.22625116896437297, + "grad_norm": 0.6928814680794468, + "learning_rate": 1.773637301999775e-05, + "loss": 1.1034, + "step": 15000 + }, + { + "epoch": 0.22640200307701588, + "grad_norm": 0.6712912816031943, + "learning_rate": 1.773333929584886e-05, + "loss": 1.1041, + "step": 15010 + }, + { + "epoch": 0.22655283718965882, + "grad_norm": 0.648935002343413, + "learning_rate": 1.773030379997697e-05, + "loss": 1.0922, + "step": 15020 + }, + { + "epoch": 0.22670367130230173, + "grad_norm": 0.7889358033340783, + "learning_rate": 1.772726653307752e-05, + "loss": 1.0913, + "step": 15030 + }, + { + "epoch": 0.22685450541494465, + "grad_norm": 0.6928706734836552, + "learning_rate": 1.772422749584635e-05, + "loss": 1.1156, + "step": 15040 + }, + { + "epoch": 0.22700533952758756, + "grad_norm": 0.7012443452539776, + "learning_rate": 1.7721186688979714e-05, + "loss": 1.0893, + "step": 15050 + }, + { + "epoch": 0.22715617364023047, + "grad_norm": 0.6905707850604544, + "learning_rate": 1.7718144113174264e-05, + "loss": 1.0837, + "step": 15060 + }, + { + "epoch": 0.22730700775287338, + "grad_norm": 0.6979706805268538, + "learning_rate": 1.771509976912706e-05, + "loss": 1.0839, + "step": 15070 + }, + { + "epoch": 0.2274578418655163, + "grad_norm": 0.7471856428661534, + "learning_rate": 1.7712053657535567e-05, + "loss": 1.0997, + "step": 15080 + }, + { + "epoch": 0.22760867597815923, + "grad_norm": 0.6942181657388702, + "learning_rate": 1.7709005779097652e-05, + "loss": 1.1181, + "step": 15090 + }, + { + "epoch": 0.22775951009080214, + "grad_norm": 0.6772282989500898, + "learning_rate": 1.77059561345116e-05, + "loss": 1.1102, + "step": 15100 + }, + { + "epoch": 0.22791034420344505, + "grad_norm": 0.6367819082474029, + "learning_rate": 1.770290472447608e-05, + "loss": 1.0802, + "step": 15110 + }, + { + "epoch": 0.22806117831608796, + "grad_norm": 0.6484406690585535, + "learning_rate": 1.769985154969018e-05, + "loss": 1.0934, + "step": 15120 + }, + { + "epoch": 0.22821201242873088, + "grad_norm": 0.7108813743936482, + "learning_rate": 1.7696796610853387e-05, + "loss": 1.097, + "step": 15130 + }, + { + "epoch": 0.2283628465413738, + "grad_norm": 0.8157443064574387, + "learning_rate": 1.7693739908665595e-05, + "loss": 1.0973, + "step": 15140 + }, + { + "epoch": 0.2285136806540167, + "grad_norm": 0.666936278320696, + "learning_rate": 1.7690681443827102e-05, + "loss": 1.1034, + "step": 15150 + }, + { + "epoch": 0.22866451476665964, + "grad_norm": 0.6310564430954874, + "learning_rate": 1.7687621217038604e-05, + "loss": 1.0801, + "step": 15160 + }, + { + "epoch": 0.22881534887930255, + "grad_norm": 0.6848712862539585, + "learning_rate": 1.7684559229001206e-05, + "loss": 1.089, + "step": 15170 + }, + { + "epoch": 0.22896618299194546, + "grad_norm": 0.9699214238719515, + "learning_rate": 1.7681495480416416e-05, + "loss": 1.1111, + "step": 15180 + }, + { + "epoch": 0.22911701710458837, + "grad_norm": 0.6694916991770467, + "learning_rate": 1.767842997198615e-05, + "loss": 1.0958, + "step": 15190 + }, + { + "epoch": 0.22926785121723128, + "grad_norm": 0.731424948685193, + "learning_rate": 1.767536270441271e-05, + "loss": 1.1212, + "step": 15200 + }, + { + "epoch": 0.2294186853298742, + "grad_norm": 0.6880063635410374, + "learning_rate": 1.7672293678398826e-05, + "loss": 1.1033, + "step": 15210 + }, + { + "epoch": 0.2295695194425171, + "grad_norm": 0.673602600510989, + "learning_rate": 1.7669222894647607e-05, + "loss": 1.0974, + "step": 15220 + }, + { + "epoch": 0.22972035355516005, + "grad_norm": 0.6920611381632878, + "learning_rate": 1.7666150353862583e-05, + "loss": 1.1002, + "step": 15230 + }, + { + "epoch": 0.22987118766780296, + "grad_norm": 0.7091971758361966, + "learning_rate": 1.7663076056747675e-05, + "loss": 1.128, + "step": 15240 + }, + { + "epoch": 0.23002202178044587, + "grad_norm": 0.6723679832585396, + "learning_rate": 1.7660000004007215e-05, + "loss": 1.1014, + "step": 15250 + }, + { + "epoch": 0.23017285589308878, + "grad_norm": 0.6543644409603879, + "learning_rate": 1.7656922196345924e-05, + "loss": 1.1086, + "step": 15260 + }, + { + "epoch": 0.2303236900057317, + "grad_norm": 0.7576691032464872, + "learning_rate": 1.7653842634468943e-05, + "loss": 1.1043, + "step": 15270 + }, + { + "epoch": 0.2304745241183746, + "grad_norm": 0.7146509602824781, + "learning_rate": 1.76507613190818e-05, + "loss": 1.0845, + "step": 15280 + }, + { + "epoch": 0.2306253582310175, + "grad_norm": 0.6652374996451126, + "learning_rate": 1.764767825089043e-05, + "loss": 1.0954, + "step": 15290 + }, + { + "epoch": 0.23077619234366045, + "grad_norm": 0.66264990954283, + "learning_rate": 1.7644593430601174e-05, + "loss": 1.0978, + "step": 15300 + }, + { + "epoch": 0.23092702645630336, + "grad_norm": 0.7349405461801555, + "learning_rate": 1.764150685892077e-05, + "loss": 1.0954, + "step": 15310 + }, + { + "epoch": 0.23107786056894628, + "grad_norm": 0.6456782572527426, + "learning_rate": 1.763841853655635e-05, + "loss": 1.0955, + "step": 15320 + }, + { + "epoch": 0.2312286946815892, + "grad_norm": 0.6741822673211703, + "learning_rate": 1.7635328464215465e-05, + "loss": 1.0901, + "step": 15330 + }, + { + "epoch": 0.2313795287942321, + "grad_norm": 0.6792293778436218, + "learning_rate": 1.7632236642606047e-05, + "loss": 1.0888, + "step": 15340 + }, + { + "epoch": 0.231530362906875, + "grad_norm": 0.6936865189123698, + "learning_rate": 1.7629143072436446e-05, + "loss": 1.1165, + "step": 15350 + }, + { + "epoch": 0.23168119701951792, + "grad_norm": 0.6803363412058172, + "learning_rate": 1.7626047754415406e-05, + "loss": 1.0944, + "step": 15360 + }, + { + "epoch": 0.23183203113216086, + "grad_norm": 0.6559109317802075, + "learning_rate": 1.762295068925206e-05, + "loss": 1.0893, + "step": 15370 + }, + { + "epoch": 0.23198286524480377, + "grad_norm": 0.6413371646763776, + "learning_rate": 1.7619851877655963e-05, + "loss": 1.0854, + "step": 15380 + }, + { + "epoch": 0.23213369935744668, + "grad_norm": 0.6865396379189896, + "learning_rate": 1.761675132033705e-05, + "loss": 1.1007, + "step": 15390 + }, + { + "epoch": 0.2322845334700896, + "grad_norm": 0.7076586142363238, + "learning_rate": 1.7613649018005668e-05, + "loss": 1.0951, + "step": 15400 + }, + { + "epoch": 0.2324353675827325, + "grad_norm": 0.702240750462431, + "learning_rate": 1.7610544971372567e-05, + "loss": 1.0884, + "step": 15410 + }, + { + "epoch": 0.23258620169537542, + "grad_norm": 0.7108883332712679, + "learning_rate": 1.7607439181148875e-05, + "loss": 1.1071, + "step": 15420 + }, + { + "epoch": 0.23273703580801833, + "grad_norm": 0.659685986949961, + "learning_rate": 1.760433164804615e-05, + "loss": 1.105, + "step": 15430 + }, + { + "epoch": 0.23288786992066127, + "grad_norm": 0.6489110656000635, + "learning_rate": 1.7601222372776324e-05, + "loss": 1.0909, + "step": 15440 + }, + { + "epoch": 0.23303870403330418, + "grad_norm": 0.6585104394032361, + "learning_rate": 1.7598111356051743e-05, + "loss": 1.086, + "step": 15450 + }, + { + "epoch": 0.2331895381459471, + "grad_norm": 0.6637785249288088, + "learning_rate": 1.7594998598585144e-05, + "loss": 1.0878, + "step": 15460 + }, + { + "epoch": 0.23334037225859, + "grad_norm": 0.6938448129063192, + "learning_rate": 1.759188410108967e-05, + "loss": 1.0958, + "step": 15470 + }, + { + "epoch": 0.2334912063712329, + "grad_norm": 0.6996806834534638, + "learning_rate": 1.7588767864278853e-05, + "loss": 1.0972, + "step": 15480 + }, + { + "epoch": 0.23364204048387582, + "grad_norm": 0.6713624933304551, + "learning_rate": 1.758564988886663e-05, + "loss": 1.0933, + "step": 15490 + }, + { + "epoch": 0.23379287459651873, + "grad_norm": 0.6856932178441554, + "learning_rate": 1.758253017556734e-05, + "loss": 1.0985, + "step": 15500 + }, + { + "epoch": 0.23394370870916167, + "grad_norm": 0.6705710279513117, + "learning_rate": 1.757940872509571e-05, + "loss": 1.0957, + "step": 15510 + }, + { + "epoch": 0.23409454282180459, + "grad_norm": 0.691563970914557, + "learning_rate": 1.7576285538166874e-05, + "loss": 1.0904, + "step": 15520 + }, + { + "epoch": 0.2342453769344475, + "grad_norm": 0.6909807275801183, + "learning_rate": 1.7573160615496356e-05, + "loss": 1.0814, + "step": 15530 + }, + { + "epoch": 0.2343962110470904, + "grad_norm": 0.7623228582967326, + "learning_rate": 1.7570033957800086e-05, + "loss": 1.0917, + "step": 15540 + }, + { + "epoch": 0.23454704515973332, + "grad_norm": 0.663820352227587, + "learning_rate": 1.7566905565794383e-05, + "loss": 1.1113, + "step": 15550 + }, + { + "epoch": 0.23469787927237623, + "grad_norm": 0.6841213191069678, + "learning_rate": 1.7563775440195973e-05, + "loss": 1.0945, + "step": 15560 + }, + { + "epoch": 0.23484871338501914, + "grad_norm": 0.6376373399966095, + "learning_rate": 1.7560643581721967e-05, + "loss": 1.0964, + "step": 15570 + }, + { + "epoch": 0.23499954749766208, + "grad_norm": 0.7104761866594225, + "learning_rate": 1.755750999108989e-05, + "loss": 1.1018, + "step": 15580 + }, + { + "epoch": 0.235150381610305, + "grad_norm": 0.6798856133391946, + "learning_rate": 1.755437466901764e-05, + "loss": 1.0714, + "step": 15590 + }, + { + "epoch": 0.2353012157229479, + "grad_norm": 0.6732966013535184, + "learning_rate": 1.755123761622354e-05, + "loss": 1.0719, + "step": 15600 + }, + { + "epoch": 0.23545204983559082, + "grad_norm": 0.655872715564369, + "learning_rate": 1.7548098833426285e-05, + "loss": 1.1196, + "step": 15610 + }, + { + "epoch": 0.23560288394823373, + "grad_norm": 0.6567060230494766, + "learning_rate": 1.7544958321344982e-05, + "loss": 1.1031, + "step": 15620 + }, + { + "epoch": 0.23575371806087664, + "grad_norm": 0.7052433402603276, + "learning_rate": 1.754181608069912e-05, + "loss": 1.0771, + "step": 15630 + }, + { + "epoch": 0.23590455217351955, + "grad_norm": 0.6433796703523532, + "learning_rate": 1.75386721122086e-05, + "loss": 1.0786, + "step": 15640 + }, + { + "epoch": 0.2360553862861625, + "grad_norm": 0.6774937401375238, + "learning_rate": 1.753552641659371e-05, + "loss": 1.0738, + "step": 15650 + }, + { + "epoch": 0.2362062203988054, + "grad_norm": 0.6798296785540031, + "learning_rate": 1.7532378994575134e-05, + "loss": 1.0915, + "step": 15660 + }, + { + "epoch": 0.2363570545114483, + "grad_norm": 0.6927555813615226, + "learning_rate": 1.7529229846873953e-05, + "loss": 1.0771, + "step": 15670 + }, + { + "epoch": 0.23650788862409122, + "grad_norm": 0.657127448350099, + "learning_rate": 1.752607897421164e-05, + "loss": 1.0815, + "step": 15680 + }, + { + "epoch": 0.23665872273673413, + "grad_norm": 0.6965813521908525, + "learning_rate": 1.752292637731007e-05, + "loss": 1.0846, + "step": 15690 + }, + { + "epoch": 0.23680955684937705, + "grad_norm": 0.7286668446075291, + "learning_rate": 1.7519772056891508e-05, + "loss": 1.0829, + "step": 15700 + }, + { + "epoch": 0.23696039096201996, + "grad_norm": 0.7455676727773405, + "learning_rate": 1.7516616013678615e-05, + "loss": 1.0776, + "step": 15710 + }, + { + "epoch": 0.2371112250746629, + "grad_norm": 0.7227441724596116, + "learning_rate": 1.751345824839444e-05, + "loss": 1.1101, + "step": 15720 + }, + { + "epoch": 0.2372620591873058, + "grad_norm": 0.7030170002301638, + "learning_rate": 1.7510298761762444e-05, + "loss": 1.0814, + "step": 15730 + }, + { + "epoch": 0.23741289329994872, + "grad_norm": 0.6970789391567073, + "learning_rate": 1.7507137554506468e-05, + "loss": 1.102, + "step": 15740 + }, + { + "epoch": 0.23756372741259163, + "grad_norm": 0.6861875475953136, + "learning_rate": 1.7503974627350747e-05, + "loss": 1.0949, + "step": 15750 + }, + { + "epoch": 0.23771456152523454, + "grad_norm": 0.7323097591984026, + "learning_rate": 1.7500809981019914e-05, + "loss": 1.0743, + "step": 15760 + }, + { + "epoch": 0.23786539563787745, + "grad_norm": 0.6971049931380234, + "learning_rate": 1.7497643616239e-05, + "loss": 1.0861, + "step": 15770 + }, + { + "epoch": 0.23801622975052036, + "grad_norm": 0.7092555423002392, + "learning_rate": 1.7494475533733422e-05, + "loss": 1.0972, + "step": 15780 + }, + { + "epoch": 0.2381670638631633, + "grad_norm": 0.7458933927692736, + "learning_rate": 1.7491305734229e-05, + "loss": 1.099, + "step": 15790 + }, + { + "epoch": 0.23831789797580621, + "grad_norm": 0.7028860814121699, + "learning_rate": 1.7488134218451927e-05, + "loss": 1.0905, + "step": 15800 + }, + { + "epoch": 0.23846873208844913, + "grad_norm": 0.6865841541551829, + "learning_rate": 1.7484960987128822e-05, + "loss": 1.0801, + "step": 15810 + }, + { + "epoch": 0.23861956620109204, + "grad_norm": 0.6577262767537073, + "learning_rate": 1.748178604098666e-05, + "loss": 1.0787, + "step": 15820 + }, + { + "epoch": 0.23877040031373495, + "grad_norm": 0.6501446918523556, + "learning_rate": 1.7478609380752843e-05, + "loss": 1.0828, + "step": 15830 + }, + { + "epoch": 0.23892123442637786, + "grad_norm": 0.7082294271855138, + "learning_rate": 1.7475431007155138e-05, + "loss": 1.0959, + "step": 15840 + }, + { + "epoch": 0.23907206853902077, + "grad_norm": 0.7115241061092181, + "learning_rate": 1.7472250920921724e-05, + "loss": 1.1351, + "step": 15850 + }, + { + "epoch": 0.2392229026516637, + "grad_norm": 0.8293454009933845, + "learning_rate": 1.7469069122781163e-05, + "loss": 1.1162, + "step": 15860 + }, + { + "epoch": 0.23937373676430662, + "grad_norm": 0.6920181379592191, + "learning_rate": 1.746588561346241e-05, + "loss": 1.0908, + "step": 15870 + }, + { + "epoch": 0.23952457087694953, + "grad_norm": 0.6976079882441263, + "learning_rate": 1.7462700393694813e-05, + "loss": 1.1029, + "step": 15880 + }, + { + "epoch": 0.23967540498959244, + "grad_norm": 0.7084877189327822, + "learning_rate": 1.7459513464208116e-05, + "loss": 1.0776, + "step": 15890 + }, + { + "epoch": 0.23982623910223536, + "grad_norm": 0.7023982723406494, + "learning_rate": 1.7456324825732444e-05, + "loss": 1.09, + "step": 15900 + }, + { + "epoch": 0.23997707321487827, + "grad_norm": 0.7170027880169938, + "learning_rate": 1.7453134478998325e-05, + "loss": 1.1109, + "step": 15910 + }, + { + "epoch": 0.24012790732752118, + "grad_norm": 0.6929543405410127, + "learning_rate": 1.744994242473667e-05, + "loss": 1.0835, + "step": 15920 + }, + { + "epoch": 0.24027874144016412, + "grad_norm": 0.6759866828052908, + "learning_rate": 1.7446748663678793e-05, + "loss": 1.0901, + "step": 15930 + }, + { + "epoch": 0.24042957555280703, + "grad_norm": 0.7508242747102744, + "learning_rate": 1.744355319655638e-05, + "loss": 1.0774, + "step": 15940 + }, + { + "epoch": 0.24058040966544994, + "grad_norm": 0.6495485646555573, + "learning_rate": 1.7440356024101522e-05, + "loss": 1.0778, + "step": 15950 + }, + { + "epoch": 0.24073124377809285, + "grad_norm": 0.697199013542487, + "learning_rate": 1.7437157147046707e-05, + "loss": 1.1013, + "step": 15960 + }, + { + "epoch": 0.24088207789073576, + "grad_norm": 0.7105183905743939, + "learning_rate": 1.743395656612479e-05, + "loss": 1.0874, + "step": 15970 + }, + { + "epoch": 0.24103291200337867, + "grad_norm": 0.7104416588228624, + "learning_rate": 1.743075428206904e-05, + "loss": 1.1001, + "step": 15980 + }, + { + "epoch": 0.24118374611602159, + "grad_norm": 0.6751708752465215, + "learning_rate": 1.7427550295613098e-05, + "loss": 1.0781, + "step": 15990 + }, + { + "epoch": 0.24133458022866452, + "grad_norm": 6.058044513116909, + "learning_rate": 1.7424344607491012e-05, + "loss": 1.113, + "step": 16000 + }, + { + "epoch": 0.24148541434130744, + "grad_norm": 0.6952137249078618, + "learning_rate": 1.7421137218437206e-05, + "loss": 1.0772, + "step": 16010 + }, + { + "epoch": 0.24163624845395035, + "grad_norm": 0.6737104311661658, + "learning_rate": 1.7417928129186502e-05, + "loss": 1.0838, + "step": 16020 + }, + { + "epoch": 0.24178708256659326, + "grad_norm": 0.7086469827813899, + "learning_rate": 1.7414717340474106e-05, + "loss": 1.0955, + "step": 16030 + }, + { + "epoch": 0.24193791667923617, + "grad_norm": 0.663768147148374, + "learning_rate": 1.7411504853035618e-05, + "loss": 1.0897, + "step": 16040 + }, + { + "epoch": 0.24208875079187908, + "grad_norm": 0.6653608020723749, + "learning_rate": 1.7408290667607025e-05, + "loss": 1.0902, + "step": 16050 + }, + { + "epoch": 0.242239584904522, + "grad_norm": 0.6555367369174305, + "learning_rate": 1.74050747849247e-05, + "loss": 1.0895, + "step": 16060 + }, + { + "epoch": 0.24239041901716493, + "grad_norm": 0.659909759649067, + "learning_rate": 1.7401857205725413e-05, + "loss": 1.1136, + "step": 16070 + }, + { + "epoch": 0.24254125312980784, + "grad_norm": 0.7244755238260294, + "learning_rate": 1.7398637930746312e-05, + "loss": 1.097, + "step": 16080 + }, + { + "epoch": 0.24269208724245075, + "grad_norm": 0.6779658147060673, + "learning_rate": 1.7395416960724945e-05, + "loss": 1.0759, + "step": 16090 + }, + { + "epoch": 0.24284292135509367, + "grad_norm": 0.6647423766176802, + "learning_rate": 1.739219429639924e-05, + "loss": 1.1043, + "step": 16100 + }, + { + "epoch": 0.24299375546773658, + "grad_norm": 0.6565153854516984, + "learning_rate": 1.7388969938507512e-05, + "loss": 1.0919, + "step": 16110 + }, + { + "epoch": 0.2431445895803795, + "grad_norm": 0.6583677990386009, + "learning_rate": 1.7385743887788472e-05, + "loss": 1.0841, + "step": 16120 + }, + { + "epoch": 0.2432954236930224, + "grad_norm": 0.7702549606725703, + "learning_rate": 1.7382516144981214e-05, + "loss": 1.0772, + "step": 16130 + }, + { + "epoch": 0.24344625780566534, + "grad_norm": 0.6830115801787519, + "learning_rate": 1.7379286710825218e-05, + "loss": 1.0861, + "step": 16140 + }, + { + "epoch": 0.24359709191830825, + "grad_norm": 0.6904062608524558, + "learning_rate": 1.7376055586060356e-05, + "loss": 1.0788, + "step": 16150 + }, + { + "epoch": 0.24374792603095116, + "grad_norm": 0.6865043963187775, + "learning_rate": 1.7372822771426886e-05, + "loss": 1.0895, + "step": 16160 + }, + { + "epoch": 0.24389876014359407, + "grad_norm": 0.7289922452421915, + "learning_rate": 1.7369588267665448e-05, + "loss": 1.0866, + "step": 16170 + }, + { + "epoch": 0.24404959425623698, + "grad_norm": 0.6519326162556323, + "learning_rate": 1.7366352075517076e-05, + "loss": 1.0939, + "step": 16180 + }, + { + "epoch": 0.2442004283688799, + "grad_norm": 0.6644918396138065, + "learning_rate": 1.7363114195723186e-05, + "loss": 1.0993, + "step": 16190 + }, + { + "epoch": 0.2443512624815228, + "grad_norm": 0.6833712900750445, + "learning_rate": 1.7359874629025584e-05, + "loss": 1.0899, + "step": 16200 + }, + { + "epoch": 0.24450209659416575, + "grad_norm": 0.6751648562335925, + "learning_rate": 1.735663337616646e-05, + "loss": 1.0885, + "step": 16210 + }, + { + "epoch": 0.24465293070680866, + "grad_norm": 0.6741397508921538, + "learning_rate": 1.735339043788839e-05, + "loss": 1.0827, + "step": 16220 + }, + { + "epoch": 0.24480376481945157, + "grad_norm": 0.6578491338998969, + "learning_rate": 1.7350145814934344e-05, + "loss": 1.0873, + "step": 16230 + }, + { + "epoch": 0.24495459893209448, + "grad_norm": 0.6591983433432504, + "learning_rate": 1.734689950804766e-05, + "loss": 1.0866, + "step": 16240 + }, + { + "epoch": 0.2451054330447374, + "grad_norm": 0.7321984572730325, + "learning_rate": 1.7343651517972086e-05, + "loss": 1.1156, + "step": 16250 + }, + { + "epoch": 0.2452562671573803, + "grad_norm": 0.7461734300911839, + "learning_rate": 1.7340401845451737e-05, + "loss": 1.0633, + "step": 16260 + }, + { + "epoch": 0.24540710127002321, + "grad_norm": 0.6819273979436479, + "learning_rate": 1.7337150491231114e-05, + "loss": 1.0755, + "step": 16270 + }, + { + "epoch": 0.24555793538266615, + "grad_norm": 0.7284124025382337, + "learning_rate": 1.7333897456055114e-05, + "loss": 1.0837, + "step": 16280 + }, + { + "epoch": 0.24570876949530907, + "grad_norm": 0.6612095636766797, + "learning_rate": 1.7330642740669016e-05, + "loss": 1.0777, + "step": 16290 + }, + { + "epoch": 0.24585960360795198, + "grad_norm": 0.6595135284243575, + "learning_rate": 1.732738634581848e-05, + "loss": 1.0878, + "step": 16300 + }, + { + "epoch": 0.2460104377205949, + "grad_norm": 0.7403873476974436, + "learning_rate": 1.7324128272249548e-05, + "loss": 1.0881, + "step": 16310 + }, + { + "epoch": 0.2461612718332378, + "grad_norm": 0.6892363874729406, + "learning_rate": 1.7320868520708654e-05, + "loss": 1.0782, + "step": 16320 + }, + { + "epoch": 0.2463121059458807, + "grad_norm": 0.6598437843137153, + "learning_rate": 1.7317607091942615e-05, + "loss": 1.1046, + "step": 16330 + }, + { + "epoch": 0.24646294005852362, + "grad_norm": 0.6489064651862456, + "learning_rate": 1.7314343986698627e-05, + "loss": 1.0738, + "step": 16340 + }, + { + "epoch": 0.24661377417116656, + "grad_norm": 0.7121288941064778, + "learning_rate": 1.731107920572428e-05, + "loss": 1.1038, + "step": 16350 + }, + { + "epoch": 0.24676460828380947, + "grad_norm": 0.7063894774848856, + "learning_rate": 1.7307812749767534e-05, + "loss": 1.088, + "step": 16360 + }, + { + "epoch": 0.24691544239645238, + "grad_norm": 0.681732161210106, + "learning_rate": 1.7304544619576744e-05, + "loss": 1.1031, + "step": 16370 + }, + { + "epoch": 0.2470662765090953, + "grad_norm": 0.7520532891410872, + "learning_rate": 1.7301274815900643e-05, + "loss": 1.0621, + "step": 16380 + }, + { + "epoch": 0.2472171106217382, + "grad_norm": 0.7120540094506577, + "learning_rate": 1.7298003339488353e-05, + "loss": 1.0824, + "step": 16390 + }, + { + "epoch": 0.24736794473438112, + "grad_norm": 0.7188298697572592, + "learning_rate": 1.7294730191089377e-05, + "loss": 1.11, + "step": 16400 + }, + { + "epoch": 0.24751877884702403, + "grad_norm": 0.7480285631752364, + "learning_rate": 1.7291455371453592e-05, + "loss": 1.0917, + "step": 16410 + }, + { + "epoch": 0.24766961295966697, + "grad_norm": 0.6614061232739616, + "learning_rate": 1.728817888133127e-05, + "loss": 1.1073, + "step": 16420 + }, + { + "epoch": 0.24782044707230988, + "grad_norm": 0.6715816458263948, + "learning_rate": 1.7284900721473064e-05, + "loss": 1.1062, + "step": 16430 + }, + { + "epoch": 0.2479712811849528, + "grad_norm": 0.7275870322399433, + "learning_rate": 1.728162089263e-05, + "loss": 1.115, + "step": 16440 + }, + { + "epoch": 0.2481221152975957, + "grad_norm": 0.6680390996025952, + "learning_rate": 1.72783393955535e-05, + "loss": 1.1014, + "step": 16450 + }, + { + "epoch": 0.2482729494102386, + "grad_norm": 0.7270295253449169, + "learning_rate": 1.7275056230995358e-05, + "loss": 1.0953, + "step": 16460 + }, + { + "epoch": 0.24842378352288152, + "grad_norm": 0.6645853264537533, + "learning_rate": 1.7271771399707753e-05, + "loss": 1.0731, + "step": 16470 + }, + { + "epoch": 0.24857461763552444, + "grad_norm": 0.6367683580880206, + "learning_rate": 1.726848490244325e-05, + "loss": 1.0747, + "step": 16480 + }, + { + "epoch": 0.24872545174816738, + "grad_norm": 0.7172361255031485, + "learning_rate": 1.7265196739954784e-05, + "loss": 1.0873, + "step": 16490 + }, + { + "epoch": 0.2488762858608103, + "grad_norm": 0.6925443699990375, + "learning_rate": 1.7261906912995683e-05, + "loss": 1.0708, + "step": 16500 + }, + { + "epoch": 0.2490271199734532, + "grad_norm": 0.7520611648179001, + "learning_rate": 1.725861542231966e-05, + "loss": 1.0949, + "step": 16510 + }, + { + "epoch": 0.2491779540860961, + "grad_norm": 0.688612432267042, + "learning_rate": 1.725532226868079e-05, + "loss": 1.0845, + "step": 16520 + }, + { + "epoch": 0.24932878819873902, + "grad_norm": 0.7089287695101018, + "learning_rate": 1.725202745283355e-05, + "loss": 1.0704, + "step": 16530 + }, + { + "epoch": 0.24947962231138193, + "grad_norm": 0.662156870888791, + "learning_rate": 1.724873097553278e-05, + "loss": 1.0847, + "step": 16540 + }, + { + "epoch": 0.24963045642402484, + "grad_norm": 0.6731786835942362, + "learning_rate": 1.7245432837533724e-05, + "loss": 1.0978, + "step": 16550 + }, + { + "epoch": 0.24978129053666778, + "grad_norm": 0.6662514712655024, + "learning_rate": 1.7242133039591976e-05, + "loss": 1.0797, + "step": 16560 + }, + { + "epoch": 0.2499321246493107, + "grad_norm": 0.6392253497625524, + "learning_rate": 1.7238831582463534e-05, + "loss": 1.0692, + "step": 16570 + }, + { + "epoch": 0.2500829587619536, + "grad_norm": 1.38242780104566, + "learning_rate": 1.723552846690477e-05, + "loss": 1.0977, + "step": 16580 + }, + { + "epoch": 0.2502337928745965, + "grad_norm": 0.6456962359534696, + "learning_rate": 1.723222369367242e-05, + "loss": 1.0761, + "step": 16590 + }, + { + "epoch": 0.25038462698723946, + "grad_norm": 0.7270387325503179, + "learning_rate": 1.7228917263523636e-05, + "loss": 1.0937, + "step": 16600 + }, + { + "epoch": 0.25053546109988234, + "grad_norm": 0.6510184915286906, + "learning_rate": 1.7225609177215913e-05, + "loss": 1.0985, + "step": 16610 + }, + { + "epoch": 0.2506862952125253, + "grad_norm": 0.6956381794058627, + "learning_rate": 1.7222299435507143e-05, + "loss": 1.0954, + "step": 16620 + }, + { + "epoch": 0.25083712932516816, + "grad_norm": 0.7147499122392693, + "learning_rate": 1.7218988039155595e-05, + "loss": 1.1036, + "step": 16630 + }, + { + "epoch": 0.2509879634378111, + "grad_norm": 0.7273204528551125, + "learning_rate": 1.7215674988919918e-05, + "loss": 1.0922, + "step": 16640 + }, + { + "epoch": 0.251138797550454, + "grad_norm": 0.7110987312864112, + "learning_rate": 1.7212360285559132e-05, + "loss": 1.1158, + "step": 16650 + }, + { + "epoch": 0.2512896316630969, + "grad_norm": 0.6513173056906062, + "learning_rate": 1.720904392983265e-05, + "loss": 1.0894, + "step": 16660 + }, + { + "epoch": 0.25144046577573986, + "grad_norm": 0.6431060216933655, + "learning_rate": 1.7205725922500248e-05, + "loss": 1.1143, + "step": 16670 + }, + { + "epoch": 0.25159129988838275, + "grad_norm": 0.6920776293177927, + "learning_rate": 1.7202406264322092e-05, + "loss": 1.0921, + "step": 16680 + }, + { + "epoch": 0.2517421340010257, + "grad_norm": 0.6502659804527181, + "learning_rate": 1.719908495605872e-05, + "loss": 1.1013, + "step": 16690 + }, + { + "epoch": 0.25189296811366857, + "grad_norm": 0.6584405911290965, + "learning_rate": 1.7195761998471056e-05, + "loss": 1.0847, + "step": 16700 + }, + { + "epoch": 0.2520438022263115, + "grad_norm": 0.7421869270728567, + "learning_rate": 1.7192437392320392e-05, + "loss": 1.0768, + "step": 16710 + }, + { + "epoch": 0.2521946363389544, + "grad_norm": 0.6520393646513423, + "learning_rate": 1.71891111383684e-05, + "loss": 1.0942, + "step": 16720 + }, + { + "epoch": 0.25234547045159733, + "grad_norm": 0.6472550679685086, + "learning_rate": 1.7185783237377132e-05, + "loss": 1.1005, + "step": 16730 + }, + { + "epoch": 0.25249630456424027, + "grad_norm": 0.6859043662337728, + "learning_rate": 1.718245369010902e-05, + "loss": 1.096, + "step": 16740 + }, + { + "epoch": 0.25264713867688315, + "grad_norm": 0.6833948610075732, + "learning_rate": 1.7179122497326863e-05, + "loss": 1.0764, + "step": 16750 + }, + { + "epoch": 0.2527979727895261, + "grad_norm": 0.6781085174587321, + "learning_rate": 1.717578965979385e-05, + "loss": 1.0756, + "step": 16760 + }, + { + "epoch": 0.252948806902169, + "grad_norm": 0.7679486130216275, + "learning_rate": 1.7172455178273536e-05, + "loss": 1.09, + "step": 16770 + }, + { + "epoch": 0.2530996410148119, + "grad_norm": 0.6646987203354692, + "learning_rate": 1.716911905352986e-05, + "loss": 1.0697, + "step": 16780 + }, + { + "epoch": 0.2532504751274548, + "grad_norm": 0.7471812736504744, + "learning_rate": 1.7165781286327137e-05, + "loss": 1.0761, + "step": 16790 + }, + { + "epoch": 0.25340130924009774, + "grad_norm": 0.6569833586571766, + "learning_rate": 1.7162441877430052e-05, + "loss": 1.0799, + "step": 16800 + }, + { + "epoch": 0.2535521433527407, + "grad_norm": 0.6792993507107811, + "learning_rate": 1.715910082760367e-05, + "loss": 1.0989, + "step": 16810 + }, + { + "epoch": 0.25370297746538356, + "grad_norm": 0.6652678293403826, + "learning_rate": 1.7155758137613432e-05, + "loss": 1.0823, + "step": 16820 + }, + { + "epoch": 0.2538538115780265, + "grad_norm": 0.6868883686381418, + "learning_rate": 1.7152413808225158e-05, + "loss": 1.071, + "step": 16830 + }, + { + "epoch": 0.2540046456906694, + "grad_norm": 0.6595845142024451, + "learning_rate": 1.714906784020504e-05, + "loss": 1.0834, + "step": 16840 + }, + { + "epoch": 0.2541554798033123, + "grad_norm": 0.6823905491941226, + "learning_rate": 1.714572023431964e-05, + "loss": 1.0936, + "step": 16850 + }, + { + "epoch": 0.2543063139159552, + "grad_norm": 0.6492781914361829, + "learning_rate": 1.7142370991335904e-05, + "loss": 1.0743, + "step": 16860 + }, + { + "epoch": 0.25445714802859815, + "grad_norm": 0.6586292986362517, + "learning_rate": 1.7139020112021155e-05, + "loss": 1.0871, + "step": 16870 + }, + { + "epoch": 0.2546079821412411, + "grad_norm": 0.6442089448324245, + "learning_rate": 1.713566759714308e-05, + "loss": 1.0755, + "step": 16880 + }, + { + "epoch": 0.25475881625388397, + "grad_norm": 0.7367056418034434, + "learning_rate": 1.7132313447469746e-05, + "loss": 1.0951, + "step": 16890 + }, + { + "epoch": 0.2549096503665269, + "grad_norm": 0.7147949882641449, + "learning_rate": 1.7128957663769597e-05, + "loss": 1.0995, + "step": 16900 + }, + { + "epoch": 0.2550604844791698, + "grad_norm": 0.7210117366519104, + "learning_rate": 1.7125600246811453e-05, + "loss": 1.0748, + "step": 16910 + }, + { + "epoch": 0.25521131859181273, + "grad_norm": 0.6552850012752661, + "learning_rate": 1.7122241197364503e-05, + "loss": 1.0941, + "step": 16920 + }, + { + "epoch": 0.2553621527044556, + "grad_norm": 0.6685949954503344, + "learning_rate": 1.711888051619831e-05, + "loss": 1.0766, + "step": 16930 + }, + { + "epoch": 0.25551298681709855, + "grad_norm": 0.6557388069096344, + "learning_rate": 1.7115518204082812e-05, + "loss": 1.1041, + "step": 16940 + }, + { + "epoch": 0.2556638209297415, + "grad_norm": 0.682389905476468, + "learning_rate": 1.7112154261788322e-05, + "loss": 1.0755, + "step": 16950 + }, + { + "epoch": 0.2558146550423844, + "grad_norm": 0.6527650409071788, + "learning_rate": 1.7108788690085526e-05, + "loss": 1.0591, + "step": 16960 + }, + { + "epoch": 0.2559654891550273, + "grad_norm": 0.6719844296343843, + "learning_rate": 1.7105421489745483e-05, + "loss": 1.0729, + "step": 16970 + }, + { + "epoch": 0.2561163232676702, + "grad_norm": 0.6812051659533009, + "learning_rate": 1.710205266153963e-05, + "loss": 1.0751, + "step": 16980 + }, + { + "epoch": 0.25626715738031314, + "grad_norm": 0.7148727731700426, + "learning_rate": 1.709868220623976e-05, + "loss": 1.0866, + "step": 16990 + }, + { + "epoch": 0.256417991492956, + "grad_norm": 0.6597957458604448, + "learning_rate": 1.709531012461806e-05, + "loss": 1.0942, + "step": 17000 + }, + { + "epoch": 0.25656882560559896, + "grad_norm": 0.7178251419830076, + "learning_rate": 1.7091936417447083e-05, + "loss": 1.0829, + "step": 17010 + }, + { + "epoch": 0.2567196597182419, + "grad_norm": 0.6379423172236756, + "learning_rate": 1.7088561085499744e-05, + "loss": 1.0651, + "step": 17020 + }, + { + "epoch": 0.2568704938308848, + "grad_norm": 0.6937114619644019, + "learning_rate": 1.7085184129549342e-05, + "loss": 1.0653, + "step": 17030 + }, + { + "epoch": 0.2570213279435277, + "grad_norm": 0.6685689706149969, + "learning_rate": 1.708180555036954e-05, + "loss": 1.0666, + "step": 17040 + }, + { + "epoch": 0.2571721620561706, + "grad_norm": 0.7194734600404116, + "learning_rate": 1.7078425348734385e-05, + "loss": 1.0657, + "step": 17050 + }, + { + "epoch": 0.25732299616881354, + "grad_norm": 0.7147578879056896, + "learning_rate": 1.7075043525418284e-05, + "loss": 1.1012, + "step": 17060 + }, + { + "epoch": 0.25747383028145643, + "grad_norm": 0.6399850866766377, + "learning_rate": 1.7071660081196014e-05, + "loss": 1.0825, + "step": 17070 + }, + { + "epoch": 0.25762466439409937, + "grad_norm": 0.6618307423708201, + "learning_rate": 1.7068275016842737e-05, + "loss": 1.0866, + "step": 17080 + }, + { + "epoch": 0.2577754985067423, + "grad_norm": 0.6880262773438697, + "learning_rate": 1.7064888333133972e-05, + "loss": 1.0845, + "step": 17090 + }, + { + "epoch": 0.2579263326193852, + "grad_norm": 0.7235844319025921, + "learning_rate": 1.706150003084562e-05, + "loss": 1.0818, + "step": 17100 + }, + { + "epoch": 0.25807716673202813, + "grad_norm": 0.6359569926836567, + "learning_rate": 1.705811011075394e-05, + "loss": 1.0692, + "step": 17110 + }, + { + "epoch": 0.258228000844671, + "grad_norm": 0.7170422476891587, + "learning_rate": 1.7054718573635578e-05, + "loss": 1.0782, + "step": 17120 + }, + { + "epoch": 0.25837883495731395, + "grad_norm": 0.6499227737682173, + "learning_rate": 1.7051325420267532e-05, + "loss": 1.095, + "step": 17130 + }, + { + "epoch": 0.25852966906995684, + "grad_norm": 0.6960032351523744, + "learning_rate": 1.704793065142719e-05, + "loss": 1.0797, + "step": 17140 + }, + { + "epoch": 0.2586805031825998, + "grad_norm": 0.6839374224569401, + "learning_rate": 1.7044534267892295e-05, + "loss": 1.0805, + "step": 17150 + }, + { + "epoch": 0.2588313372952427, + "grad_norm": 0.7060053264132442, + "learning_rate": 1.7041136270440964e-05, + "loss": 1.0924, + "step": 17160 + }, + { + "epoch": 0.2589821714078856, + "grad_norm": 0.7007195494381712, + "learning_rate": 1.703773665985169e-05, + "loss": 1.0972, + "step": 17170 + }, + { + "epoch": 0.25913300552052854, + "grad_norm": 0.6340422854889286, + "learning_rate": 1.703433543690332e-05, + "loss": 1.0931, + "step": 17180 + }, + { + "epoch": 0.2592838396331714, + "grad_norm": 0.6833682971089694, + "learning_rate": 1.7030932602375094e-05, + "loss": 1.0648, + "step": 17190 + }, + { + "epoch": 0.25943467374581436, + "grad_norm": 0.6677090419486919, + "learning_rate": 1.70275281570466e-05, + "loss": 1.0816, + "step": 17200 + }, + { + "epoch": 0.25958550785845724, + "grad_norm": 0.6666679811312904, + "learning_rate": 1.7024122101697803e-05, + "loss": 1.079, + "step": 17210 + }, + { + "epoch": 0.2597363419711002, + "grad_norm": 0.6484138419869792, + "learning_rate": 1.7020714437109038e-05, + "loss": 1.0793, + "step": 17220 + }, + { + "epoch": 0.2598871760837431, + "grad_norm": 0.7903036581988503, + "learning_rate": 1.7017305164061006e-05, + "loss": 1.0864, + "step": 17230 + }, + { + "epoch": 0.260038010196386, + "grad_norm": 0.7624459064554472, + "learning_rate": 1.7013894283334784e-05, + "loss": 1.0741, + "step": 17240 + }, + { + "epoch": 0.26018884430902894, + "grad_norm": 0.6661476363822062, + "learning_rate": 1.7010481795711804e-05, + "loss": 1.0682, + "step": 17250 + }, + { + "epoch": 0.2603396784216718, + "grad_norm": 0.6647970740821332, + "learning_rate": 1.700706770197388e-05, + "loss": 1.0734, + "step": 17260 + }, + { + "epoch": 0.26049051253431477, + "grad_norm": 0.6637598965817165, + "learning_rate": 1.7003652002903185e-05, + "loss": 1.0773, + "step": 17270 + }, + { + "epoch": 0.26064134664695765, + "grad_norm": 0.6789981913578315, + "learning_rate": 1.7000234699282256e-05, + "loss": 1.0801, + "step": 17280 + }, + { + "epoch": 0.2607921807596006, + "grad_norm": 0.6728514525206059, + "learning_rate": 1.6996815791894013e-05, + "loss": 1.0667, + "step": 17290 + }, + { + "epoch": 0.26094301487224353, + "grad_norm": 0.6808546709492179, + "learning_rate": 1.6993395281521732e-05, + "loss": 1.057, + "step": 17300 + }, + { + "epoch": 0.2610938489848864, + "grad_norm": 0.6765262549819395, + "learning_rate": 1.6989973168949054e-05, + "loss": 1.0718, + "step": 17310 + }, + { + "epoch": 0.26124468309752935, + "grad_norm": 0.650744673240033, + "learning_rate": 1.6986549454959998e-05, + "loss": 1.0952, + "step": 17320 + }, + { + "epoch": 0.26139551721017223, + "grad_norm": 0.6563165411583988, + "learning_rate": 1.698312414033894e-05, + "loss": 1.0727, + "step": 17330 + }, + { + "epoch": 0.2615463513228152, + "grad_norm": 0.723466866540933, + "learning_rate": 1.6979697225870623e-05, + "loss": 1.0753, + "step": 17340 + }, + { + "epoch": 0.26169718543545806, + "grad_norm": 0.6785261003609379, + "learning_rate": 1.6976268712340168e-05, + "loss": 1.0852, + "step": 17350 + }, + { + "epoch": 0.261848019548101, + "grad_norm": 0.6721846216970727, + "learning_rate": 1.697283860053305e-05, + "loss": 1.0706, + "step": 17360 + }, + { + "epoch": 0.26199885366074394, + "grad_norm": 0.6989948309429691, + "learning_rate": 1.696940689123511e-05, + "loss": 1.061, + "step": 17370 + }, + { + "epoch": 0.2621496877733868, + "grad_norm": 0.6771920838819507, + "learning_rate": 1.6965973585232562e-05, + "loss": 1.0914, + "step": 17380 + }, + { + "epoch": 0.26230052188602976, + "grad_norm": 0.6728260449423192, + "learning_rate": 1.6962538683311988e-05, + "loss": 1.0646, + "step": 17390 + }, + { + "epoch": 0.26245135599867264, + "grad_norm": 0.6782221849695428, + "learning_rate": 1.6959102186260326e-05, + "loss": 1.0948, + "step": 17400 + }, + { + "epoch": 0.2626021901113156, + "grad_norm": 0.6671741121090194, + "learning_rate": 1.6955664094864884e-05, + "loss": 1.0819, + "step": 17410 + }, + { + "epoch": 0.26275302422395846, + "grad_norm": 0.7171421728817783, + "learning_rate": 1.6952224409913334e-05, + "loss": 1.0707, + "step": 17420 + }, + { + "epoch": 0.2629038583366014, + "grad_norm": 0.6883984645499082, + "learning_rate": 1.6948783132193718e-05, + "loss": 1.0897, + "step": 17430 + }, + { + "epoch": 0.26305469244924434, + "grad_norm": 0.7308047462847842, + "learning_rate": 1.694534026249444e-05, + "loss": 1.0685, + "step": 17440 + }, + { + "epoch": 0.2632055265618872, + "grad_norm": 0.6963954063299547, + "learning_rate": 1.6941895801604264e-05, + "loss": 1.069, + "step": 17450 + }, + { + "epoch": 0.26335636067453017, + "grad_norm": 0.662947022562076, + "learning_rate": 1.6938449750312326e-05, + "loss": 1.0758, + "step": 17460 + }, + { + "epoch": 0.26350719478717305, + "grad_norm": 0.6657566355578989, + "learning_rate": 1.6935002109408124e-05, + "loss": 1.0888, + "step": 17470 + }, + { + "epoch": 0.263658028899816, + "grad_norm": 0.729514213889611, + "learning_rate": 1.693155287968151e-05, + "loss": 1.0809, + "step": 17480 + }, + { + "epoch": 0.26380886301245887, + "grad_norm": 0.6988278450586275, + "learning_rate": 1.6928102061922723e-05, + "loss": 1.0917, + "step": 17490 + }, + { + "epoch": 0.2639596971251018, + "grad_norm": 0.6455122742915873, + "learning_rate": 1.6924649656922343e-05, + "loss": 1.1049, + "step": 17500 + }, + { + "epoch": 0.26411053123774475, + "grad_norm": 0.657616903342784, + "learning_rate": 1.6921195665471323e-05, + "loss": 1.0809, + "step": 17510 + }, + { + "epoch": 0.26426136535038763, + "grad_norm": 0.6656107655446379, + "learning_rate": 1.6917740088360985e-05, + "loss": 1.0632, + "step": 17520 + }, + { + "epoch": 0.2644121994630306, + "grad_norm": 0.6901248117162404, + "learning_rate": 1.6914282926383e-05, + "loss": 1.0759, + "step": 17530 + }, + { + "epoch": 0.26456303357567346, + "grad_norm": 0.6682963010087336, + "learning_rate": 1.6910824180329418e-05, + "loss": 1.0764, + "step": 17540 + }, + { + "epoch": 0.2647138676883164, + "grad_norm": 0.6916025302966818, + "learning_rate": 1.6907363850992646e-05, + "loss": 1.055, + "step": 17550 + }, + { + "epoch": 0.2648647018009593, + "grad_norm": 0.6748811009699357, + "learning_rate": 1.6903901939165438e-05, + "loss": 1.0968, + "step": 17560 + }, + { + "epoch": 0.2650155359136022, + "grad_norm": 0.6689885689540229, + "learning_rate": 1.690043844564094e-05, + "loss": 1.0592, + "step": 17570 + }, + { + "epoch": 0.26516637002624516, + "grad_norm": 0.7320593939936121, + "learning_rate": 1.6896973371212637e-05, + "loss": 1.0655, + "step": 17580 + }, + { + "epoch": 0.26531720413888804, + "grad_norm": 0.6365913600726972, + "learning_rate": 1.6893506716674387e-05, + "loss": 1.0679, + "step": 17590 + }, + { + "epoch": 0.265468038251531, + "grad_norm": 0.6559415805496541, + "learning_rate": 1.6890038482820408e-05, + "loss": 1.0806, + "step": 17600 + }, + { + "epoch": 0.26561887236417386, + "grad_norm": 0.6615243272823926, + "learning_rate": 1.6886568670445278e-05, + "loss": 1.0696, + "step": 17610 + }, + { + "epoch": 0.2657697064768168, + "grad_norm": 0.6789021785108625, + "learning_rate": 1.6883097280343937e-05, + "loss": 1.0807, + "step": 17620 + }, + { + "epoch": 0.2659205405894597, + "grad_norm": 0.7701521734850528, + "learning_rate": 1.687962431331169e-05, + "loss": 1.1102, + "step": 17630 + }, + { + "epoch": 0.2660713747021026, + "grad_norm": 0.6505078808093085, + "learning_rate": 1.6876149770144192e-05, + "loss": 1.0725, + "step": 17640 + }, + { + "epoch": 0.26622220881474556, + "grad_norm": 0.675038450463956, + "learning_rate": 1.687267365163748e-05, + "loss": 1.0971, + "step": 17650 + }, + { + "epoch": 0.26637304292738845, + "grad_norm": 0.7044560835303033, + "learning_rate": 1.6869195958587932e-05, + "loss": 1.0728, + "step": 17660 + }, + { + "epoch": 0.2665238770400314, + "grad_norm": 0.6681496534624876, + "learning_rate": 1.6865716691792293e-05, + "loss": 1.0696, + "step": 17670 + }, + { + "epoch": 0.26667471115267427, + "grad_norm": 0.7050399354060519, + "learning_rate": 1.6862235852047675e-05, + "loss": 1.0875, + "step": 17680 + }, + { + "epoch": 0.2668255452653172, + "grad_norm": 0.6214629911324773, + "learning_rate": 1.6858753440151543e-05, + "loss": 1.0736, + "step": 17690 + }, + { + "epoch": 0.2669763793779601, + "grad_norm": 0.677925105872565, + "learning_rate": 1.685526945690172e-05, + "loss": 1.0754, + "step": 17700 + }, + { + "epoch": 0.26712721349060303, + "grad_norm": 0.64829676624973, + "learning_rate": 1.68517839030964e-05, + "loss": 1.064, + "step": 17710 + }, + { + "epoch": 0.26727804760324597, + "grad_norm": 0.6677761263102115, + "learning_rate": 1.6848296779534127e-05, + "loss": 1.097, + "step": 17720 + }, + { + "epoch": 0.26742888171588886, + "grad_norm": 0.7131440512629742, + "learning_rate": 1.6844808087013808e-05, + "loss": 1.0914, + "step": 17730 + }, + { + "epoch": 0.2675797158285318, + "grad_norm": 0.681649208803706, + "learning_rate": 1.684131782633471e-05, + "loss": 1.0636, + "step": 17740 + }, + { + "epoch": 0.2677305499411747, + "grad_norm": 0.7120222887787658, + "learning_rate": 1.6837825998296455e-05, + "loss": 1.0608, + "step": 17750 + }, + { + "epoch": 0.2678813840538176, + "grad_norm": 0.6382245371993828, + "learning_rate": 1.683433260369903e-05, + "loss": 1.0736, + "step": 17760 + }, + { + "epoch": 0.2680322181664605, + "grad_norm": 0.684946995417606, + "learning_rate": 1.683083764334278e-05, + "loss": 1.0749, + "step": 17770 + }, + { + "epoch": 0.26818305227910344, + "grad_norm": 0.6546012360044103, + "learning_rate": 1.6827341118028406e-05, + "loss": 1.0722, + "step": 17780 + }, + { + "epoch": 0.2683338863917464, + "grad_norm": 0.6575578348857607, + "learning_rate": 1.682384302855697e-05, + "loss": 1.0998, + "step": 17790 + }, + { + "epoch": 0.26848472050438926, + "grad_norm": 0.6717128159447376, + "learning_rate": 1.6820343375729887e-05, + "loss": 1.0615, + "step": 17800 + }, + { + "epoch": 0.2686355546170322, + "grad_norm": 0.6794233809553629, + "learning_rate": 1.6816842160348937e-05, + "loss": 1.0794, + "step": 17810 + }, + { + "epoch": 0.2687863887296751, + "grad_norm": 0.7037364039497996, + "learning_rate": 1.681333938321626e-05, + "loss": 1.0821, + "step": 17820 + }, + { + "epoch": 0.268937222842318, + "grad_norm": 0.6689861158780198, + "learning_rate": 1.680983504513434e-05, + "loss": 1.0743, + "step": 17830 + }, + { + "epoch": 0.2690880569549609, + "grad_norm": 0.6801708748344908, + "learning_rate": 1.6806329146906028e-05, + "loss": 1.0612, + "step": 17840 + }, + { + "epoch": 0.26923889106760385, + "grad_norm": 0.6487652996392265, + "learning_rate": 1.680282168933454e-05, + "loss": 1.085, + "step": 17850 + }, + { + "epoch": 0.2693897251802468, + "grad_norm": 0.6436347363775354, + "learning_rate": 1.679931267322344e-05, + "loss": 1.0812, + "step": 17860 + }, + { + "epoch": 0.26954055929288967, + "grad_norm": 0.6935481898568103, + "learning_rate": 1.6795802099376638e-05, + "loss": 1.0832, + "step": 17870 + }, + { + "epoch": 0.2696913934055326, + "grad_norm": 0.7146559289927161, + "learning_rate": 1.679228996859843e-05, + "loss": 1.0707, + "step": 17880 + }, + { + "epoch": 0.2698422275181755, + "grad_norm": 0.7008679427019436, + "learning_rate": 1.6788776281693446e-05, + "loss": 1.0737, + "step": 17890 + }, + { + "epoch": 0.26999306163081843, + "grad_norm": 0.6633604963268792, + "learning_rate": 1.6785261039466673e-05, + "loss": 1.0778, + "step": 17900 + }, + { + "epoch": 0.2701438957434613, + "grad_norm": 0.7120686147703599, + "learning_rate": 1.6781744242723467e-05, + "loss": 1.1042, + "step": 17910 + }, + { + "epoch": 0.27029472985610425, + "grad_norm": 1.6296991221353914, + "learning_rate": 1.6778225892269528e-05, + "loss": 1.0797, + "step": 17920 + }, + { + "epoch": 0.2704455639687472, + "grad_norm": 0.6987262439620364, + "learning_rate": 1.677470598891092e-05, + "loss": 1.0694, + "step": 17930 + }, + { + "epoch": 0.2705963980813901, + "grad_norm": 0.6966963550070513, + "learning_rate": 1.6771184533454058e-05, + "loss": 1.0828, + "step": 17940 + }, + { + "epoch": 0.270747232194033, + "grad_norm": 0.7486381121693219, + "learning_rate": 1.6767661526705712e-05, + "loss": 1.0605, + "step": 17950 + }, + { + "epoch": 0.2708980663066759, + "grad_norm": 0.7008983773527229, + "learning_rate": 1.6764136969473016e-05, + "loss": 1.0581, + "step": 17960 + }, + { + "epoch": 0.27104890041931884, + "grad_norm": 0.7644056230962191, + "learning_rate": 1.676061086256345e-05, + "loss": 1.0878, + "step": 17970 + }, + { + "epoch": 0.2711997345319617, + "grad_norm": 0.652874309475053, + "learning_rate": 1.675708320678485e-05, + "loss": 1.0991, + "step": 17980 + }, + { + "epoch": 0.27135056864460466, + "grad_norm": 0.7034569368638057, + "learning_rate": 1.6753554002945407e-05, + "loss": 1.0636, + "step": 17990 + }, + { + "epoch": 0.2715014027572476, + "grad_norm": 0.6768746507215943, + "learning_rate": 1.6750023251853673e-05, + "loss": 1.0737, + "step": 18000 + }, + { + "epoch": 0.2716522368698905, + "grad_norm": 0.6745867180900721, + "learning_rate": 1.674649095431855e-05, + "loss": 1.068, + "step": 18010 + }, + { + "epoch": 0.2718030709825334, + "grad_norm": 0.6212370508287574, + "learning_rate": 1.674295711114929e-05, + "loss": 1.0529, + "step": 18020 + }, + { + "epoch": 0.2719539050951763, + "grad_norm": 0.6782323290426485, + "learning_rate": 1.6739421723155512e-05, + "loss": 1.0755, + "step": 18030 + }, + { + "epoch": 0.27210473920781925, + "grad_norm": 0.7014809254890383, + "learning_rate": 1.6735884791147167e-05, + "loss": 1.0971, + "step": 18040 + }, + { + "epoch": 0.27225557332046213, + "grad_norm": 0.6604238123971937, + "learning_rate": 1.6732346315934583e-05, + "loss": 1.0821, + "step": 18050 + }, + { + "epoch": 0.27240640743310507, + "grad_norm": 0.6941224621839625, + "learning_rate": 1.6728806298328427e-05, + "loss": 1.0792, + "step": 18060 + }, + { + "epoch": 0.272557241545748, + "grad_norm": 0.6496403611619279, + "learning_rate": 1.672526473913973e-05, + "loss": 1.0755, + "step": 18070 + }, + { + "epoch": 0.2727080756583909, + "grad_norm": 0.6717921166905418, + "learning_rate": 1.6721721639179858e-05, + "loss": 1.0839, + "step": 18080 + }, + { + "epoch": 0.27285890977103383, + "grad_norm": 0.7940269637714975, + "learning_rate": 1.6718176999260553e-05, + "loss": 1.0837, + "step": 18090 + }, + { + "epoch": 0.2730097438836767, + "grad_norm": 0.6689704842541299, + "learning_rate": 1.6714630820193898e-05, + "loss": 1.0768, + "step": 18100 + }, + { + "epoch": 0.27316057799631965, + "grad_norm": 0.6877941014379344, + "learning_rate": 1.6711083102792324e-05, + "loss": 1.0597, + "step": 18110 + }, + { + "epoch": 0.27331141210896254, + "grad_norm": 0.7302408777061937, + "learning_rate": 1.6707533847868626e-05, + "loss": 1.0874, + "step": 18120 + }, + { + "epoch": 0.2734622462216055, + "grad_norm": 0.6996785213280533, + "learning_rate": 1.670398305623594e-05, + "loss": 1.0497, + "step": 18130 + }, + { + "epoch": 0.2736130803342484, + "grad_norm": 0.6547505343560623, + "learning_rate": 1.670043072870776e-05, + "loss": 1.0689, + "step": 18140 + }, + { + "epoch": 0.2737639144468913, + "grad_norm": 0.6744686179588639, + "learning_rate": 1.6696876866097935e-05, + "loss": 1.0696, + "step": 18150 + }, + { + "epoch": 0.27391474855953424, + "grad_norm": 0.6911139964706309, + "learning_rate": 1.6693321469220653e-05, + "loss": 1.0846, + "step": 18160 + }, + { + "epoch": 0.2740655826721771, + "grad_norm": 0.7155629690375022, + "learning_rate": 1.6689764538890474e-05, + "loss": 1.0796, + "step": 18170 + }, + { + "epoch": 0.27421641678482006, + "grad_norm": 0.6743527248940167, + "learning_rate": 1.668620607592229e-05, + "loss": 1.0751, + "step": 18180 + }, + { + "epoch": 0.27436725089746294, + "grad_norm": 0.7340345772738679, + "learning_rate": 1.6682646081131354e-05, + "loss": 1.0847, + "step": 18190 + }, + { + "epoch": 0.2745180850101059, + "grad_norm": 0.6620098163596458, + "learning_rate": 1.667908455533327e-05, + "loss": 1.0778, + "step": 18200 + }, + { + "epoch": 0.2746689191227488, + "grad_norm": 0.7486539880451606, + "learning_rate": 1.6675521499343985e-05, + "loss": 1.0704, + "step": 18210 + }, + { + "epoch": 0.2748197532353917, + "grad_norm": 0.6644165264930859, + "learning_rate": 1.6671956913979808e-05, + "loss": 1.0705, + "step": 18220 + }, + { + "epoch": 0.27497058734803465, + "grad_norm": 0.6498383908748449, + "learning_rate": 1.6668390800057385e-05, + "loss": 1.0624, + "step": 18230 + }, + { + "epoch": 0.27512142146067753, + "grad_norm": 0.6761463124357705, + "learning_rate": 1.6664823158393728e-05, + "loss": 1.0666, + "step": 18240 + }, + { + "epoch": 0.27527225557332047, + "grad_norm": 0.6687239035923921, + "learning_rate": 1.6661253989806186e-05, + "loss": 1.0769, + "step": 18250 + }, + { + "epoch": 0.27542308968596335, + "grad_norm": 0.7478577984601515, + "learning_rate": 1.665768329511246e-05, + "loss": 1.0876, + "step": 18260 + }, + { + "epoch": 0.2755739237986063, + "grad_norm": 0.6986223833193743, + "learning_rate": 1.665411107513061e-05, + "loss": 1.1957, + "step": 18270 + }, + { + "epoch": 0.27572475791124923, + "grad_norm": 0.6856809969413732, + "learning_rate": 1.6650537330679034e-05, + "loss": 1.0683, + "step": 18280 + }, + { + "epoch": 0.2758755920238921, + "grad_norm": 0.6932533988332795, + "learning_rate": 1.664696206257648e-05, + "loss": 1.0876, + "step": 18290 + }, + { + "epoch": 0.27602642613653505, + "grad_norm": 0.6607216047224924, + "learning_rate": 1.664338527164206e-05, + "loss": 1.0708, + "step": 18300 + }, + { + "epoch": 0.27617726024917794, + "grad_norm": 0.6472686333844129, + "learning_rate": 1.6639806958695213e-05, + "loss": 1.0762, + "step": 18310 + }, + { + "epoch": 0.2763280943618209, + "grad_norm": 0.6816408275777073, + "learning_rate": 1.6636227124555743e-05, + "loss": 1.0818, + "step": 18320 + }, + { + "epoch": 0.27647892847446376, + "grad_norm": 0.6510306921597399, + "learning_rate": 1.6632645770043795e-05, + "loss": 1.0862, + "step": 18330 + }, + { + "epoch": 0.2766297625871067, + "grad_norm": 0.6658131236409707, + "learning_rate": 1.6629062895979863e-05, + "loss": 1.0637, + "step": 18340 + }, + { + "epoch": 0.27678059669974964, + "grad_norm": 0.6267300623341985, + "learning_rate": 1.6625478503184793e-05, + "loss": 1.0573, + "step": 18350 + }, + { + "epoch": 0.2769314308123925, + "grad_norm": 0.6976230026530487, + "learning_rate": 1.6621892592479772e-05, + "loss": 1.0882, + "step": 18360 + }, + { + "epoch": 0.27708226492503546, + "grad_norm": 0.6772618962038931, + "learning_rate": 1.661830516468635e-05, + "loss": 1.0789, + "step": 18370 + }, + { + "epoch": 0.27723309903767834, + "grad_norm": 0.6437082315180505, + "learning_rate": 1.6614716220626396e-05, + "loss": 1.0623, + "step": 18380 + }, + { + "epoch": 0.2773839331503213, + "grad_norm": 0.7250996198027831, + "learning_rate": 1.6611125761122162e-05, + "loss": 1.0598, + "step": 18390 + }, + { + "epoch": 0.27753476726296417, + "grad_norm": 0.6509652005500334, + "learning_rate": 1.6607533786996218e-05, + "loss": 1.0786, + "step": 18400 + }, + { + "epoch": 0.2776856013756071, + "grad_norm": 0.7397833194250368, + "learning_rate": 1.6603940299071496e-05, + "loss": 1.0745, + "step": 18410 + }, + { + "epoch": 0.27783643548825004, + "grad_norm": 0.6756479450132861, + "learning_rate": 1.660034529817127e-05, + "loss": 1.0784, + "step": 18420 + }, + { + "epoch": 0.27798726960089293, + "grad_norm": 0.684934839594583, + "learning_rate": 1.6596748785119167e-05, + "loss": 1.0712, + "step": 18430 + }, + { + "epoch": 0.27813810371353587, + "grad_norm": 0.6933089315138521, + "learning_rate": 1.6593150760739144e-05, + "loss": 1.075, + "step": 18440 + }, + { + "epoch": 0.27828893782617875, + "grad_norm": 0.701339635073096, + "learning_rate": 1.6589551225855522e-05, + "loss": 1.0741, + "step": 18450 + }, + { + "epoch": 0.2784397719388217, + "grad_norm": 0.6597767173602758, + "learning_rate": 1.6585950181292966e-05, + "loss": 1.0554, + "step": 18460 + }, + { + "epoch": 0.2785906060514646, + "grad_norm": 0.645972913802055, + "learning_rate": 1.658234762787647e-05, + "loss": 1.0621, + "step": 18470 + }, + { + "epoch": 0.2787414401641075, + "grad_norm": 0.6858343595211777, + "learning_rate": 1.6578743566431398e-05, + "loss": 1.0561, + "step": 18480 + }, + { + "epoch": 0.27889227427675045, + "grad_norm": 0.6888733796672117, + "learning_rate": 1.6575137997783444e-05, + "loss": 1.0602, + "step": 18490 + }, + { + "epoch": 0.27904310838939334, + "grad_norm": 0.68877207762859, + "learning_rate": 1.6571530922758646e-05, + "loss": 1.058, + "step": 18500 + }, + { + "epoch": 0.2791939425020363, + "grad_norm": 0.6825987085550478, + "learning_rate": 1.65679223421834e-05, + "loss": 1.0577, + "step": 18510 + }, + { + "epoch": 0.27934477661467916, + "grad_norm": 0.6888665756450776, + "learning_rate": 1.6564312256884428e-05, + "loss": 1.0573, + "step": 18520 + }, + { + "epoch": 0.2794956107273221, + "grad_norm": 0.6517388760756347, + "learning_rate": 1.6560700667688817e-05, + "loss": 1.0554, + "step": 18530 + }, + { + "epoch": 0.279646444839965, + "grad_norm": 0.6751363846976022, + "learning_rate": 1.6557087575423985e-05, + "loss": 1.0704, + "step": 18540 + }, + { + "epoch": 0.2797972789526079, + "grad_norm": 0.663903657876861, + "learning_rate": 1.65534729809177e-05, + "loss": 1.0614, + "step": 18550 + }, + { + "epoch": 0.27994811306525086, + "grad_norm": 0.6189855233545402, + "learning_rate": 1.6549856884998074e-05, + "loss": 1.0604, + "step": 18560 + }, + { + "epoch": 0.28009894717789374, + "grad_norm": 0.6858918107412442, + "learning_rate": 1.6546239288493555e-05, + "loss": 1.0734, + "step": 18570 + }, + { + "epoch": 0.2802497812905367, + "grad_norm": 0.6592829595849679, + "learning_rate": 1.6542620192232946e-05, + "loss": 1.0652, + "step": 18580 + }, + { + "epoch": 0.28040061540317957, + "grad_norm": 0.7468072007267762, + "learning_rate": 1.6538999597045392e-05, + "loss": 1.0523, + "step": 18590 + }, + { + "epoch": 0.2805514495158225, + "grad_norm": 0.6891193288415736, + "learning_rate": 1.653537750376037e-05, + "loss": 1.0719, + "step": 18600 + }, + { + "epoch": 0.2807022836284654, + "grad_norm": 9.395321041983635, + "learning_rate": 1.653175391320772e-05, + "loss": 1.0875, + "step": 18610 + }, + { + "epoch": 0.2808531177411083, + "grad_norm": 0.626018830565342, + "learning_rate": 1.6528128826217602e-05, + "loss": 1.0722, + "step": 18620 + }, + { + "epoch": 0.28100395185375127, + "grad_norm": 0.6469588329845165, + "learning_rate": 1.652450224362054e-05, + "loss": 1.0632, + "step": 18630 + }, + { + "epoch": 0.28115478596639415, + "grad_norm": 0.6442060637575641, + "learning_rate": 1.6520874166247382e-05, + "loss": 1.0469, + "step": 18640 + }, + { + "epoch": 0.2813056200790371, + "grad_norm": 0.6558204086985421, + "learning_rate": 1.6517244594929337e-05, + "loss": 1.0711, + "step": 18650 + }, + { + "epoch": 0.28145645419168, + "grad_norm": 0.6365308791762685, + "learning_rate": 1.651361353049794e-05, + "loss": 1.0859, + "step": 18660 + }, + { + "epoch": 0.2816072883043229, + "grad_norm": 0.6481790980157497, + "learning_rate": 1.6509980973785077e-05, + "loss": 1.075, + "step": 18670 + }, + { + "epoch": 0.2817581224169658, + "grad_norm": 0.7902180559429329, + "learning_rate": 1.6506346925622972e-05, + "loss": 1.0732, + "step": 18680 + }, + { + "epoch": 0.28190895652960873, + "grad_norm": 0.7095897261213354, + "learning_rate": 1.6502711386844196e-05, + "loss": 1.0715, + "step": 18690 + }, + { + "epoch": 0.2820597906422517, + "grad_norm": 0.6517293088403681, + "learning_rate": 1.6499074358281657e-05, + "loss": 1.0662, + "step": 18700 + }, + { + "epoch": 0.28221062475489456, + "grad_norm": 0.6594285769366354, + "learning_rate": 1.6495435840768604e-05, + "loss": 1.0741, + "step": 18710 + }, + { + "epoch": 0.2823614588675375, + "grad_norm": 0.6755475891984629, + "learning_rate": 1.649179583513863e-05, + "loss": 1.0565, + "step": 18720 + }, + { + "epoch": 0.2825122929801804, + "grad_norm": 0.7772695112749857, + "learning_rate": 1.6488154342225665e-05, + "loss": 1.0626, + "step": 18730 + }, + { + "epoch": 0.2826631270928233, + "grad_norm": 0.6919207087493977, + "learning_rate": 1.648451136286398e-05, + "loss": 1.0729, + "step": 18740 + }, + { + "epoch": 0.2828139612054662, + "grad_norm": 0.6698482168957819, + "learning_rate": 1.6480866897888196e-05, + "loss": 1.0743, + "step": 18750 + }, + { + "epoch": 0.28296479531810914, + "grad_norm": 0.6857393208858872, + "learning_rate": 1.6477220948133262e-05, + "loss": 1.0758, + "step": 18760 + }, + { + "epoch": 0.2831156294307521, + "grad_norm": 0.7242731895118237, + "learning_rate": 1.6473573514434473e-05, + "loss": 1.063, + "step": 18770 + }, + { + "epoch": 0.28326646354339496, + "grad_norm": 0.7398462891442221, + "learning_rate": 1.6469924597627462e-05, + "loss": 1.0828, + "step": 18780 + }, + { + "epoch": 0.2834172976560379, + "grad_norm": 0.6514783898435522, + "learning_rate": 1.6466274198548203e-05, + "loss": 1.0616, + "step": 18790 + }, + { + "epoch": 0.2835681317686808, + "grad_norm": 0.6573924032804571, + "learning_rate": 1.6462622318033015e-05, + "loss": 1.0711, + "step": 18800 + }, + { + "epoch": 0.2837189658813237, + "grad_norm": 0.6549964716330389, + "learning_rate": 1.6458968956918547e-05, + "loss": 1.0626, + "step": 18810 + }, + { + "epoch": 0.2838697999939666, + "grad_norm": 0.6673031544013104, + "learning_rate": 1.645531411604179e-05, + "loss": 1.0576, + "step": 18820 + }, + { + "epoch": 0.28402063410660955, + "grad_norm": 0.686484105246027, + "learning_rate": 1.645165779624007e-05, + "loss": 1.0682, + "step": 18830 + }, + { + "epoch": 0.2841714682192525, + "grad_norm": 0.6741983901036709, + "learning_rate": 1.6447999998351066e-05, + "loss": 1.075, + "step": 18840 + }, + { + "epoch": 0.28432230233189537, + "grad_norm": 0.6969127149685466, + "learning_rate": 1.6444340723212784e-05, + "loss": 1.0422, + "step": 18850 + }, + { + "epoch": 0.2844731364445383, + "grad_norm": 0.6812291010334486, + "learning_rate": 1.644067997166357e-05, + "loss": 1.0692, + "step": 18860 + }, + { + "epoch": 0.2846239705571812, + "grad_norm": 0.6927054017271991, + "learning_rate": 1.6437017744542108e-05, + "loss": 1.0835, + "step": 18870 + }, + { + "epoch": 0.28477480466982413, + "grad_norm": 0.7546880650277942, + "learning_rate": 1.6433354042687427e-05, + "loss": 1.0826, + "step": 18880 + }, + { + "epoch": 0.284925638782467, + "grad_norm": 0.6754261769610848, + "learning_rate": 1.642968886693888e-05, + "loss": 1.0437, + "step": 18890 + }, + { + "epoch": 0.28507647289510996, + "grad_norm": 0.7111729245354741, + "learning_rate": 1.642602221813617e-05, + "loss": 1.0806, + "step": 18900 + }, + { + "epoch": 0.2852273070077529, + "grad_norm": 0.7531810029564535, + "learning_rate": 1.6422354097119332e-05, + "loss": 1.0614, + "step": 18910 + }, + { + "epoch": 0.2853781411203958, + "grad_norm": 0.6669904542712124, + "learning_rate": 1.6418684504728745e-05, + "loss": 1.0483, + "step": 18920 + }, + { + "epoch": 0.2855289752330387, + "grad_norm": 0.6667454630613803, + "learning_rate": 1.641501344180511e-05, + "loss": 1.0709, + "step": 18930 + }, + { + "epoch": 0.2856798093456816, + "grad_norm": 0.7301320757054722, + "learning_rate": 1.6411340909189485e-05, + "loss": 1.0681, + "step": 18940 + }, + { + "epoch": 0.28583064345832454, + "grad_norm": 0.6509151709704791, + "learning_rate": 1.640766690772325e-05, + "loss": 1.0545, + "step": 18950 + }, + { + "epoch": 0.2859814775709674, + "grad_norm": 0.6599747361252761, + "learning_rate": 1.640399143824812e-05, + "loss": 1.0588, + "step": 18960 + }, + { + "epoch": 0.28613231168361036, + "grad_norm": 0.6698072432280137, + "learning_rate": 1.6400314501606163e-05, + "loss": 1.0647, + "step": 18970 + }, + { + "epoch": 0.2862831457962533, + "grad_norm": 0.7423554649232875, + "learning_rate": 1.6396636098639765e-05, + "loss": 1.0848, + "step": 18980 + }, + { + "epoch": 0.2864339799088962, + "grad_norm": 0.71027230667034, + "learning_rate": 1.6392956230191656e-05, + "loss": 1.0614, + "step": 18990 + }, + { + "epoch": 0.2865848140215391, + "grad_norm": 0.7090640719006839, + "learning_rate": 1.63892748971049e-05, + "loss": 1.0452, + "step": 19000 + }, + { + "epoch": 0.286735648134182, + "grad_norm": 0.7161418830656449, + "learning_rate": 1.6385592100222906e-05, + "loss": 1.0767, + "step": 19010 + }, + { + "epoch": 0.28688648224682495, + "grad_norm": 0.6405072332866616, + "learning_rate": 1.63819078403894e-05, + "loss": 1.0564, + "step": 19020 + }, + { + "epoch": 0.28703731635946783, + "grad_norm": 0.6458249586613464, + "learning_rate": 1.6378222118448456e-05, + "loss": 1.0594, + "step": 19030 + }, + { + "epoch": 0.28718815047211077, + "grad_norm": 0.6701745038205124, + "learning_rate": 1.637453493524448e-05, + "loss": 1.0492, + "step": 19040 + }, + { + "epoch": 0.2873389845847537, + "grad_norm": 0.6574031667639727, + "learning_rate": 1.6370846291622214e-05, + "loss": 1.0564, + "step": 19050 + }, + { + "epoch": 0.2874898186973966, + "grad_norm": 0.8451725818141239, + "learning_rate": 1.6367156188426732e-05, + "loss": 1.0683, + "step": 19060 + }, + { + "epoch": 0.28764065281003953, + "grad_norm": 0.7021709395266261, + "learning_rate": 1.6363464626503445e-05, + "loss": 1.0752, + "step": 19070 + }, + { + "epoch": 0.2877914869226824, + "grad_norm": 0.6786290810945783, + "learning_rate": 1.63597716066981e-05, + "loss": 1.056, + "step": 19080 + }, + { + "epoch": 0.28794232103532535, + "grad_norm": 0.6618719871369179, + "learning_rate": 1.6356077129856766e-05, + "loss": 1.0691, + "step": 19090 + }, + { + "epoch": 0.28809315514796824, + "grad_norm": 0.6365532074206385, + "learning_rate": 1.6352381196825863e-05, + "loss": 1.0576, + "step": 19100 + }, + { + "epoch": 0.2882439892606112, + "grad_norm": 0.7039459549574176, + "learning_rate": 1.6348683808452133e-05, + "loss": 1.081, + "step": 19110 + }, + { + "epoch": 0.2883948233732541, + "grad_norm": 0.7233446982685345, + "learning_rate": 1.6344984965582655e-05, + "loss": 1.0625, + "step": 19120 + }, + { + "epoch": 0.288545657485897, + "grad_norm": 0.6398184115017842, + "learning_rate": 1.6341284669064842e-05, + "loss": 1.0633, + "step": 19130 + }, + { + "epoch": 0.28869649159853994, + "grad_norm": 0.6437532987109776, + "learning_rate": 1.6337582919746438e-05, + "loss": 1.073, + "step": 19140 + }, + { + "epoch": 0.2888473257111828, + "grad_norm": 0.6739647646405792, + "learning_rate": 1.6333879718475527e-05, + "loss": 1.0833, + "step": 19150 + }, + { + "epoch": 0.28899815982382576, + "grad_norm": 0.7310536299861706, + "learning_rate": 1.633017506610051e-05, + "loss": 1.0754, + "step": 19160 + }, + { + "epoch": 0.28914899393646865, + "grad_norm": 0.7146796117649148, + "learning_rate": 1.6326468963470134e-05, + "loss": 1.055, + "step": 19170 + }, + { + "epoch": 0.2892998280491116, + "grad_norm": 0.6815567798123096, + "learning_rate": 1.6322761411433473e-05, + "loss": 1.0695, + "step": 19180 + }, + { + "epoch": 0.2894506621617545, + "grad_norm": 0.6658091097437291, + "learning_rate": 1.6319052410839937e-05, + "loss": 1.0646, + "step": 19190 + }, + { + "epoch": 0.2896014962743974, + "grad_norm": 0.6777519052661535, + "learning_rate": 1.6315341962539268e-05, + "loss": 1.0738, + "step": 19200 + }, + { + "epoch": 0.28975233038704035, + "grad_norm": 0.6911089676845064, + "learning_rate": 1.631163006738153e-05, + "loss": 1.0561, + "step": 19210 + }, + { + "epoch": 0.28990316449968323, + "grad_norm": 0.6497769518142462, + "learning_rate": 1.6307916726217125e-05, + "loss": 1.0539, + "step": 19220 + }, + { + "epoch": 0.29005399861232617, + "grad_norm": 0.6595749370730368, + "learning_rate": 1.6304201939896796e-05, + "loss": 1.0577, + "step": 19230 + }, + { + "epoch": 0.29020483272496905, + "grad_norm": 0.7349592454093143, + "learning_rate": 1.6300485709271597e-05, + "loss": 1.0707, + "step": 19240 + }, + { + "epoch": 0.290355666837612, + "grad_norm": 0.6792188983161989, + "learning_rate": 1.629676803519293e-05, + "loss": 1.0692, + "step": 19250 + }, + { + "epoch": 0.29050650095025493, + "grad_norm": 0.6360656992835662, + "learning_rate": 1.629304891851252e-05, + "loss": 1.0629, + "step": 19260 + }, + { + "epoch": 0.2906573350628978, + "grad_norm": 0.6504641513003904, + "learning_rate": 1.628932836008242e-05, + "loss": 1.0685, + "step": 19270 + }, + { + "epoch": 0.29080816917554075, + "grad_norm": 0.657276756512571, + "learning_rate": 1.6285606360755024e-05, + "loss": 1.0579, + "step": 19280 + }, + { + "epoch": 0.29095900328818364, + "grad_norm": 0.6826021444270538, + "learning_rate": 1.6281882921383045e-05, + "loss": 1.07, + "step": 19290 + }, + { + "epoch": 0.2911098374008266, + "grad_norm": 0.6830811051676188, + "learning_rate": 1.6278158042819533e-05, + "loss": 1.069, + "step": 19300 + }, + { + "epoch": 0.29126067151346946, + "grad_norm": 0.6974609987560835, + "learning_rate": 1.6274431725917864e-05, + "loss": 1.0678, + "step": 19310 + }, + { + "epoch": 0.2914115056261124, + "grad_norm": 0.5952350625723478, + "learning_rate": 1.627070397153174e-05, + "loss": 1.0483, + "step": 19320 + }, + { + "epoch": 0.29156233973875534, + "grad_norm": 0.700034016623041, + "learning_rate": 1.62669747805152e-05, + "loss": 1.0552, + "step": 19330 + }, + { + "epoch": 0.2917131738513982, + "grad_norm": 0.6626024543639514, + "learning_rate": 1.626324415372261e-05, + "loss": 1.0533, + "step": 19340 + }, + { + "epoch": 0.29186400796404116, + "grad_norm": 0.6549980954673289, + "learning_rate": 1.625951209200866e-05, + "loss": 1.0641, + "step": 19350 + }, + { + "epoch": 0.29201484207668404, + "grad_norm": 0.6644625324615999, + "learning_rate": 1.6255778596228376e-05, + "loss": 1.0457, + "step": 19360 + }, + { + "epoch": 0.292165676189327, + "grad_norm": 0.6558179229518004, + "learning_rate": 1.6252043667237113e-05, + "loss": 1.0488, + "step": 19370 + }, + { + "epoch": 0.29231651030196987, + "grad_norm": 0.6797820039212646, + "learning_rate": 1.6248307305890544e-05, + "loss": 1.0726, + "step": 19380 + }, + { + "epoch": 0.2924673444146128, + "grad_norm": 0.685298914355499, + "learning_rate": 1.6244569513044677e-05, + "loss": 1.0765, + "step": 19390 + }, + { + "epoch": 0.29261817852725575, + "grad_norm": 0.7071456785993845, + "learning_rate": 1.624083028955585e-05, + "loss": 1.06, + "step": 19400 + }, + { + "epoch": 0.29276901263989863, + "grad_norm": 0.6571474797150094, + "learning_rate": 1.623708963628073e-05, + "loss": 1.0665, + "step": 19410 + }, + { + "epoch": 0.29291984675254157, + "grad_norm": 0.6432391430536765, + "learning_rate": 1.6233347554076297e-05, + "loss": 1.058, + "step": 19420 + }, + { + "epoch": 0.29307068086518445, + "grad_norm": 0.6788017285086921, + "learning_rate": 1.6229604043799873e-05, + "loss": 1.0696, + "step": 19430 + }, + { + "epoch": 0.2932215149778274, + "grad_norm": 0.6688964962810405, + "learning_rate": 1.6225859106309114e-05, + "loss": 1.0763, + "step": 19440 + }, + { + "epoch": 0.2933723490904703, + "grad_norm": 0.7243583855014974, + "learning_rate": 1.622211274246198e-05, + "loss": 1.0553, + "step": 19450 + }, + { + "epoch": 0.2935231832031132, + "grad_norm": 0.6427256075058306, + "learning_rate": 1.6218364953116775e-05, + "loss": 1.0594, + "step": 19460 + }, + { + "epoch": 0.29367401731575615, + "grad_norm": 0.6920122395421011, + "learning_rate": 1.6214615739132124e-05, + "loss": 1.0986, + "step": 19470 + }, + { + "epoch": 0.29382485142839904, + "grad_norm": 0.6638691311580767, + "learning_rate": 1.6210865101366983e-05, + "loss": 1.0535, + "step": 19480 + }, + { + "epoch": 0.293975685541042, + "grad_norm": 0.6621908523168295, + "learning_rate": 1.6207113040680626e-05, + "loss": 1.0671, + "step": 19490 + }, + { + "epoch": 0.29412651965368486, + "grad_norm": 0.6923301615615962, + "learning_rate": 1.6203359557932653e-05, + "loss": 1.0578, + "step": 19500 + }, + { + "epoch": 0.2942773537663278, + "grad_norm": 0.6444778623472224, + "learning_rate": 1.6199604653983006e-05, + "loss": 1.0518, + "step": 19510 + }, + { + "epoch": 0.2944281878789707, + "grad_norm": 0.6930452633050186, + "learning_rate": 1.6195848329691935e-05, + "loss": 1.0548, + "step": 19520 + }, + { + "epoch": 0.2945790219916136, + "grad_norm": 0.734174578057327, + "learning_rate": 1.6192090585920018e-05, + "loss": 1.0487, + "step": 19530 + }, + { + "epoch": 0.29472985610425656, + "grad_norm": 0.6548723385532055, + "learning_rate": 1.618833142352816e-05, + "loss": 1.0513, + "step": 19540 + }, + { + "epoch": 0.29488069021689944, + "grad_norm": 0.678132495235191, + "learning_rate": 1.6184570843377602e-05, + "loss": 1.0654, + "step": 19550 + }, + { + "epoch": 0.2950315243295424, + "grad_norm": 0.7266034215465241, + "learning_rate": 1.618080884632989e-05, + "loss": 1.0686, + "step": 19560 + }, + { + "epoch": 0.29518235844218527, + "grad_norm": 0.7859373060031909, + "learning_rate": 1.6177045433246915e-05, + "loss": 1.0558, + "step": 19570 + }, + { + "epoch": 0.2953331925548282, + "grad_norm": 0.6645630929217404, + "learning_rate": 1.6173280604990872e-05, + "loss": 1.0659, + "step": 19580 + }, + { + "epoch": 0.2954840266674711, + "grad_norm": 0.6783791571778064, + "learning_rate": 1.6169514362424293e-05, + "loss": 1.0578, + "step": 19590 + }, + { + "epoch": 0.29563486078011403, + "grad_norm": 0.6866013158346889, + "learning_rate": 1.6165746706410038e-05, + "loss": 1.0686, + "step": 19600 + }, + { + "epoch": 0.29578569489275697, + "grad_norm": 0.740039148273617, + "learning_rate": 1.616197763781128e-05, + "loss": 1.0858, + "step": 19610 + }, + { + "epoch": 0.29593652900539985, + "grad_norm": 0.7083364053812031, + "learning_rate": 1.6158207157491516e-05, + "loss": 1.0612, + "step": 19620 + }, + { + "epoch": 0.2960873631180428, + "grad_norm": 0.6475027765984173, + "learning_rate": 1.6154435266314573e-05, + "loss": 1.0653, + "step": 19630 + }, + { + "epoch": 0.2962381972306857, + "grad_norm": 0.6722815218575239, + "learning_rate": 1.61506619651446e-05, + "loss": 1.0584, + "step": 19640 + }, + { + "epoch": 0.2963890313433286, + "grad_norm": 0.7252810427709048, + "learning_rate": 1.614688725484607e-05, + "loss": 1.0737, + "step": 19650 + }, + { + "epoch": 0.2965398654559715, + "grad_norm": 0.695572222721221, + "learning_rate": 1.6143111136283774e-05, + "loss": 1.0656, + "step": 19660 + }, + { + "epoch": 0.29669069956861444, + "grad_norm": 0.7056296609732376, + "learning_rate": 1.6139333610322827e-05, + "loss": 1.0598, + "step": 19670 + }, + { + "epoch": 0.2968415336812574, + "grad_norm": 0.66478964058379, + "learning_rate": 1.6135554677828666e-05, + "loss": 1.0413, + "step": 19680 + }, + { + "epoch": 0.29699236779390026, + "grad_norm": 0.6489543514245226, + "learning_rate": 1.613177433966706e-05, + "loss": 1.0582, + "step": 19690 + }, + { + "epoch": 0.2971432019065432, + "grad_norm": 0.6906324898887234, + "learning_rate": 1.6127992596704076e-05, + "loss": 1.067, + "step": 19700 + }, + { + "epoch": 0.2972940360191861, + "grad_norm": 0.6755812436901808, + "learning_rate": 1.6124209449806132e-05, + "loss": 1.0583, + "step": 19710 + }, + { + "epoch": 0.297444870131829, + "grad_norm": 0.6795571523635769, + "learning_rate": 1.612042489983995e-05, + "loss": 1.0752, + "step": 19720 + }, + { + "epoch": 0.2975957042444719, + "grad_norm": 0.6439442426687484, + "learning_rate": 1.6116638947672582e-05, + "loss": 1.0455, + "step": 19730 + }, + { + "epoch": 0.29774653835711484, + "grad_norm": 0.6346140522675386, + "learning_rate": 1.6112851594171392e-05, + "loss": 1.0655, + "step": 19740 + }, + { + "epoch": 0.2978973724697578, + "grad_norm": 0.6963485889119254, + "learning_rate": 1.6109062840204067e-05, + "loss": 1.0651, + "step": 19750 + }, + { + "epoch": 0.29804820658240067, + "grad_norm": 0.675787593446358, + "learning_rate": 1.6105272686638626e-05, + "loss": 1.0612, + "step": 19760 + }, + { + "epoch": 0.2981990406950436, + "grad_norm": 0.6814539236857907, + "learning_rate": 1.6101481134343396e-05, + "loss": 1.0717, + "step": 19770 + }, + { + "epoch": 0.2983498748076865, + "grad_norm": 0.6771217234532654, + "learning_rate": 1.609768818418703e-05, + "loss": 1.0531, + "step": 19780 + }, + { + "epoch": 0.2985007089203294, + "grad_norm": 0.6874704383175211, + "learning_rate": 1.60938938370385e-05, + "loss": 1.0528, + "step": 19790 + }, + { + "epoch": 0.2986515430329723, + "grad_norm": 0.6849561555425576, + "learning_rate": 1.6090098093767098e-05, + "loss": 1.0524, + "step": 19800 + }, + { + "epoch": 0.29880237714561525, + "grad_norm": 0.6931404421895767, + "learning_rate": 1.6086300955242435e-05, + "loss": 1.0524, + "step": 19810 + }, + { + "epoch": 0.2989532112582582, + "grad_norm": 0.653591484849252, + "learning_rate": 1.6082502422334442e-05, + "loss": 1.0565, + "step": 19820 + }, + { + "epoch": 0.2991040453709011, + "grad_norm": 0.6926754509586439, + "learning_rate": 1.6078702495913377e-05, + "loss": 1.047, + "step": 19830 + }, + { + "epoch": 0.299254879483544, + "grad_norm": 0.7184319941878757, + "learning_rate": 1.6074901176849804e-05, + "loss": 1.0575, + "step": 19840 + }, + { + "epoch": 0.2994057135961869, + "grad_norm": 0.6390312729925115, + "learning_rate": 1.607109846601461e-05, + "loss": 1.0643, + "step": 19850 + }, + { + "epoch": 0.29955654770882983, + "grad_norm": 0.6827493119781015, + "learning_rate": 1.6067294364279012e-05, + "loss": 1.0695, + "step": 19860 + }, + { + "epoch": 0.2997073818214727, + "grad_norm": 0.7698762436659143, + "learning_rate": 1.6063488872514534e-05, + "loss": 1.0753, + "step": 19870 + }, + { + "epoch": 0.29985821593411566, + "grad_norm": 0.6581911986897075, + "learning_rate": 1.605968199159302e-05, + "loss": 1.0506, + "step": 19880 + }, + { + "epoch": 0.3000090500467586, + "grad_norm": 0.7700730259229213, + "learning_rate": 1.605587372238663e-05, + "loss": 1.0605, + "step": 19890 + }, + { + "epoch": 0.3001598841594015, + "grad_norm": 0.6513631402143817, + "learning_rate": 1.6052064065767856e-05, + "loss": 1.0436, + "step": 19900 + }, + { + "epoch": 0.3003107182720444, + "grad_norm": 0.7329169167935256, + "learning_rate": 1.6048253022609486e-05, + "loss": 1.0634, + "step": 19910 + }, + { + "epoch": 0.3004615523846873, + "grad_norm": 0.665190029173255, + "learning_rate": 1.6044440593784646e-05, + "loss": 1.0674, + "step": 19920 + }, + { + "epoch": 0.30061238649733024, + "grad_norm": 0.7323056807386814, + "learning_rate": 1.6040626780166765e-05, + "loss": 1.0503, + "step": 19930 + }, + { + "epoch": 0.3007632206099731, + "grad_norm": 0.6487809329363922, + "learning_rate": 1.6036811582629602e-05, + "loss": 1.0527, + "step": 19940 + }, + { + "epoch": 0.30091405472261606, + "grad_norm": 0.6718953639288741, + "learning_rate": 1.603299500204722e-05, + "loss": 1.0732, + "step": 19950 + }, + { + "epoch": 0.301064888835259, + "grad_norm": 0.6129628348845709, + "learning_rate": 1.6029177039294003e-05, + "loss": 1.0627, + "step": 19960 + }, + { + "epoch": 0.3012157229479019, + "grad_norm": 0.686826795884212, + "learning_rate": 1.6025357695244667e-05, + "loss": 1.0661, + "step": 19970 + }, + { + "epoch": 0.3013665570605448, + "grad_norm": 0.6708462183659735, + "learning_rate": 1.6021536970774216e-05, + "loss": 1.0494, + "step": 19980 + }, + { + "epoch": 0.3015173911731877, + "grad_norm": 0.6481221849485799, + "learning_rate": 1.6017714866757992e-05, + "loss": 1.0499, + "step": 19990 + }, + { + "epoch": 0.30166822528583065, + "grad_norm": 0.6509476720357172, + "learning_rate": 1.601389138407164e-05, + "loss": 1.0625, + "step": 20000 + }, + { + "epoch": 0.30181905939847353, + "grad_norm": 0.6464822570691788, + "learning_rate": 1.6010066523591144e-05, + "loss": 1.0475, + "step": 20010 + }, + { + "epoch": 0.30196989351111647, + "grad_norm": 0.6821796803414445, + "learning_rate": 1.6006240286192767e-05, + "loss": 1.0398, + "step": 20020 + }, + { + "epoch": 0.3021207276237594, + "grad_norm": 0.7722271572994572, + "learning_rate": 1.600241267275312e-05, + "loss": 1.0541, + "step": 20030 + }, + { + "epoch": 0.3022715617364023, + "grad_norm": 0.678942030150491, + "learning_rate": 1.5998583684149114e-05, + "loss": 1.0676, + "step": 20040 + }, + { + "epoch": 0.30242239584904523, + "grad_norm": 0.6828227872256849, + "learning_rate": 1.5994753321257973e-05, + "loss": 1.055, + "step": 20050 + }, + { + "epoch": 0.3025732299616881, + "grad_norm": 0.6710102859844873, + "learning_rate": 1.5990921584957244e-05, + "loss": 1.0515, + "step": 20060 + }, + { + "epoch": 0.30272406407433106, + "grad_norm": 0.6816077555046033, + "learning_rate": 1.5987088476124787e-05, + "loss": 1.055, + "step": 20070 + }, + { + "epoch": 0.30287489818697394, + "grad_norm": 0.7343558707322274, + "learning_rate": 1.5983253995638777e-05, + "loss": 1.0461, + "step": 20080 + }, + { + "epoch": 0.3030257322996169, + "grad_norm": 0.7059487682825074, + "learning_rate": 1.5979418144377688e-05, + "loss": 1.0471, + "step": 20090 + }, + { + "epoch": 0.3031765664122598, + "grad_norm": 0.6471404676086652, + "learning_rate": 1.5975580923220337e-05, + "loss": 1.0674, + "step": 20100 + }, + { + "epoch": 0.3033274005249027, + "grad_norm": 0.6691506729319094, + "learning_rate": 1.5971742333045833e-05, + "loss": 1.0873, + "step": 20110 + }, + { + "epoch": 0.30347823463754564, + "grad_norm": 0.6496890262926505, + "learning_rate": 1.59679023747336e-05, + "loss": 1.0506, + "step": 20120 + }, + { + "epoch": 0.3036290687501885, + "grad_norm": 0.6438967642033904, + "learning_rate": 1.596406104916338e-05, + "loss": 1.0606, + "step": 20130 + }, + { + "epoch": 0.30377990286283146, + "grad_norm": 0.6930453337109205, + "learning_rate": 1.596021835721523e-05, + "loss": 1.0567, + "step": 20140 + }, + { + "epoch": 0.30393073697547435, + "grad_norm": 0.6440078486114115, + "learning_rate": 1.5956374299769528e-05, + "loss": 1.0564, + "step": 20150 + }, + { + "epoch": 0.3040815710881173, + "grad_norm": 0.6423809486197289, + "learning_rate": 1.5952528877706938e-05, + "loss": 1.0513, + "step": 20160 + }, + { + "epoch": 0.3042324052007602, + "grad_norm": 0.7031554962373757, + "learning_rate": 1.5948682091908465e-05, + "loss": 1.0773, + "step": 20170 + }, + { + "epoch": 0.3043832393134031, + "grad_norm": 0.670229559213274, + "learning_rate": 1.5944833943255413e-05, + "loss": 1.0638, + "step": 20180 + }, + { + "epoch": 0.30453407342604605, + "grad_norm": 0.645304720154129, + "learning_rate": 1.5940984432629398e-05, + "loss": 1.0581, + "step": 20190 + }, + { + "epoch": 0.30468490753868893, + "grad_norm": 0.6768003723268728, + "learning_rate": 1.593713356091235e-05, + "loss": 1.0644, + "step": 20200 + }, + { + "epoch": 0.30483574165133187, + "grad_norm": 0.6395032322484725, + "learning_rate": 1.5933281328986513e-05, + "loss": 1.0427, + "step": 20210 + }, + { + "epoch": 0.30498657576397475, + "grad_norm": 0.6735002274948975, + "learning_rate": 1.5929427737734446e-05, + "loss": 1.0683, + "step": 20220 + }, + { + "epoch": 0.3051374098766177, + "grad_norm": 0.7166373455738095, + "learning_rate": 1.5925572788039002e-05, + "loss": 1.0547, + "step": 20230 + }, + { + "epoch": 0.30528824398926063, + "grad_norm": 0.6533407161188102, + "learning_rate": 1.592171648078337e-05, + "loss": 1.0504, + "step": 20240 + }, + { + "epoch": 0.3054390781019035, + "grad_norm": 0.7145979681445317, + "learning_rate": 1.5917858816851026e-05, + "loss": 1.0442, + "step": 20250 + }, + { + "epoch": 0.30558991221454646, + "grad_norm": 0.6389367035394035, + "learning_rate": 1.5913999797125777e-05, + "loss": 1.0726, + "step": 20260 + }, + { + "epoch": 0.30574074632718934, + "grad_norm": 0.7667371507398106, + "learning_rate": 1.591013942249173e-05, + "loss": 1.0515, + "step": 20270 + }, + { + "epoch": 0.3058915804398323, + "grad_norm": 0.6856495587707141, + "learning_rate": 1.5906277693833297e-05, + "loss": 1.043, + "step": 20280 + }, + { + "epoch": 0.30604241455247516, + "grad_norm": 0.6536323304399888, + "learning_rate": 1.590241461203522e-05, + "loss": 1.0571, + "step": 20290 + }, + { + "epoch": 0.3061932486651181, + "grad_norm": 0.6670890930138572, + "learning_rate": 1.5898550177982527e-05, + "loss": 1.0659, + "step": 20300 + }, + { + "epoch": 0.30634408277776104, + "grad_norm": 0.7539350038406488, + "learning_rate": 1.589468439256058e-05, + "loss": 1.0646, + "step": 20310 + }, + { + "epoch": 0.3064949168904039, + "grad_norm": 0.6673848035331946, + "learning_rate": 1.5890817256655022e-05, + "loss": 1.0617, + "step": 20320 + }, + { + "epoch": 0.30664575100304686, + "grad_norm": 0.6728181892753016, + "learning_rate": 1.5886948771151836e-05, + "loss": 1.0337, + "step": 20330 + }, + { + "epoch": 0.30679658511568975, + "grad_norm": 0.6644066616978958, + "learning_rate": 1.588307893693729e-05, + "loss": 1.0573, + "step": 20340 + }, + { + "epoch": 0.3069474192283327, + "grad_norm": 0.6521345132524509, + "learning_rate": 1.5879207754897973e-05, + "loss": 1.0684, + "step": 20350 + }, + { + "epoch": 0.30709825334097557, + "grad_norm": 0.7938055000489262, + "learning_rate": 1.5875335225920783e-05, + "loss": 1.0664, + "step": 20360 + }, + { + "epoch": 0.3072490874536185, + "grad_norm": 0.6709844732665228, + "learning_rate": 1.587146135089292e-05, + "loss": 1.0747, + "step": 20370 + }, + { + "epoch": 0.30739992156626145, + "grad_norm": 0.7390871810504016, + "learning_rate": 1.58675861307019e-05, + "loss": 1.0541, + "step": 20380 + }, + { + "epoch": 0.30755075567890433, + "grad_norm": 0.6427465098357894, + "learning_rate": 1.5863709566235538e-05, + "loss": 1.0498, + "step": 20390 + }, + { + "epoch": 0.30770158979154727, + "grad_norm": 0.6561048588191103, + "learning_rate": 1.585983165838197e-05, + "loss": 1.0363, + "step": 20400 + }, + { + "epoch": 0.30785242390419015, + "grad_norm": 0.6699083663973828, + "learning_rate": 1.585595240802963e-05, + "loss": 1.0686, + "step": 20410 + }, + { + "epoch": 0.3080032580168331, + "grad_norm": 0.7284983970715394, + "learning_rate": 1.5852071816067257e-05, + "loss": 1.0522, + "step": 20420 + }, + { + "epoch": 0.308154092129476, + "grad_norm": 0.7056043139349814, + "learning_rate": 1.5848189883383907e-05, + "loss": 1.0473, + "step": 20430 + }, + { + "epoch": 0.3083049262421189, + "grad_norm": 0.6584421024795541, + "learning_rate": 1.584430661086893e-05, + "loss": 1.0392, + "step": 20440 + }, + { + "epoch": 0.30845576035476185, + "grad_norm": 0.665204738764808, + "learning_rate": 1.5840421999412002e-05, + "loss": 1.0462, + "step": 20450 + }, + { + "epoch": 0.30860659446740474, + "grad_norm": 0.64250739303501, + "learning_rate": 1.583653604990309e-05, + "loss": 1.0605, + "step": 20460 + }, + { + "epoch": 0.3087574285800477, + "grad_norm": 0.6648879900342745, + "learning_rate": 1.5832648763232477e-05, + "loss": 1.036, + "step": 20470 + }, + { + "epoch": 0.30890826269269056, + "grad_norm": 0.6602481429589399, + "learning_rate": 1.582876014029074e-05, + "loss": 1.062, + "step": 20480 + }, + { + "epoch": 0.3090590968053335, + "grad_norm": 0.7067823423414354, + "learning_rate": 1.5824870181968775e-05, + "loss": 1.0502, + "step": 20490 + }, + { + "epoch": 0.3092099309179764, + "grad_norm": 0.7015602419598039, + "learning_rate": 1.5820978889157783e-05, + "loss": 1.0345, + "step": 20500 + }, + { + "epoch": 0.3093607650306193, + "grad_norm": 0.6800957401568586, + "learning_rate": 1.5817086262749254e-05, + "loss": 1.0706, + "step": 20510 + }, + { + "epoch": 0.30951159914326226, + "grad_norm": 0.6340199842832731, + "learning_rate": 1.5813192303635007e-05, + "loss": 1.0463, + "step": 20520 + }, + { + "epoch": 0.30966243325590515, + "grad_norm": 0.6687447066793976, + "learning_rate": 1.580929701270715e-05, + "loss": 1.0525, + "step": 20530 + }, + { + "epoch": 0.3098132673685481, + "grad_norm": 0.6759213112674417, + "learning_rate": 1.580540039085811e-05, + "loss": 1.0444, + "step": 20540 + }, + { + "epoch": 0.30996410148119097, + "grad_norm": 0.6560231251171721, + "learning_rate": 1.58015024389806e-05, + "loss": 1.0519, + "step": 20550 + }, + { + "epoch": 0.3101149355938339, + "grad_norm": 0.7688084345133391, + "learning_rate": 1.5797603157967658e-05, + "loss": 1.0499, + "step": 20560 + }, + { + "epoch": 0.3102657697064768, + "grad_norm": 0.6648231236872795, + "learning_rate": 1.5793702548712612e-05, + "loss": 1.0413, + "step": 20570 + }, + { + "epoch": 0.31041660381911973, + "grad_norm": 0.6716914399219154, + "learning_rate": 1.5789800612109094e-05, + "loss": 1.0624, + "step": 20580 + }, + { + "epoch": 0.31056743793176267, + "grad_norm": 0.6962130968554537, + "learning_rate": 1.5785897349051055e-05, + "loss": 1.0535, + "step": 20590 + }, + { + "epoch": 0.31071827204440555, + "grad_norm": 0.693703712147167, + "learning_rate": 1.5781992760432733e-05, + "loss": 1.0318, + "step": 20600 + }, + { + "epoch": 0.3108691061570485, + "grad_norm": 0.6564702123882097, + "learning_rate": 1.577808684714868e-05, + "loss": 1.0433, + "step": 20610 + }, + { + "epoch": 0.3110199402696914, + "grad_norm": 0.6440774254998552, + "learning_rate": 1.577417961009375e-05, + "loss": 1.0379, + "step": 20620 + }, + { + "epoch": 0.3111707743823343, + "grad_norm": 0.7161699972446529, + "learning_rate": 1.577027105016309e-05, + "loss": 1.0378, + "step": 20630 + }, + { + "epoch": 0.3113216084949772, + "grad_norm": 0.6653504022281576, + "learning_rate": 1.5766361168252174e-05, + "loss": 1.0513, + "step": 20640 + }, + { + "epoch": 0.31147244260762014, + "grad_norm": 0.7163596530056581, + "learning_rate": 1.576244996525675e-05, + "loss": 1.0472, + "step": 20650 + }, + { + "epoch": 0.3116232767202631, + "grad_norm": 0.7150494249457109, + "learning_rate": 1.5758537442072885e-05, + "loss": 1.0541, + "step": 20660 + }, + { + "epoch": 0.31177411083290596, + "grad_norm": 0.6449785516578206, + "learning_rate": 1.5754623599596947e-05, + "loss": 1.0489, + "step": 20670 + }, + { + "epoch": 0.3119249449455489, + "grad_norm": 0.6569304920231153, + "learning_rate": 1.5750708438725608e-05, + "loss": 1.065, + "step": 20680 + }, + { + "epoch": 0.3120757790581918, + "grad_norm": 0.7141902535848021, + "learning_rate": 1.574679196035583e-05, + "loss": 1.0869, + "step": 20690 + }, + { + "epoch": 0.3122266131708347, + "grad_norm": 0.6783151449635202, + "learning_rate": 1.5742874165384898e-05, + "loss": 1.0436, + "step": 20700 + }, + { + "epoch": 0.3123774472834776, + "grad_norm": 0.6415716592144736, + "learning_rate": 1.573895505471038e-05, + "loss": 1.0582, + "step": 20710 + }, + { + "epoch": 0.31252828139612054, + "grad_norm": 0.6914159258995599, + "learning_rate": 1.573503462923015e-05, + "loss": 1.0468, + "step": 20720 + }, + { + "epoch": 0.3126791155087635, + "grad_norm": 0.6419778266199845, + "learning_rate": 1.5731112889842387e-05, + "loss": 1.0472, + "step": 20730 + }, + { + "epoch": 0.31282994962140637, + "grad_norm": 0.6155489958775944, + "learning_rate": 1.572718983744557e-05, + "loss": 1.0374, + "step": 20740 + }, + { + "epoch": 0.3129807837340493, + "grad_norm": 0.6777308920713007, + "learning_rate": 1.5723265472938474e-05, + "loss": 1.0554, + "step": 20750 + }, + { + "epoch": 0.3131316178466922, + "grad_norm": 0.6981165424039931, + "learning_rate": 1.5719339797220187e-05, + "loss": 1.0577, + "step": 20760 + }, + { + "epoch": 0.31328245195933513, + "grad_norm": 0.6507207721815155, + "learning_rate": 1.5715412811190082e-05, + "loss": 1.0583, + "step": 20770 + }, + { + "epoch": 0.313433286071978, + "grad_norm": 0.7406344313563328, + "learning_rate": 1.5711484515747838e-05, + "loss": 1.0769, + "step": 20780 + }, + { + "epoch": 0.31358412018462095, + "grad_norm": 0.6736053166308519, + "learning_rate": 1.570755491179344e-05, + "loss": 1.0566, + "step": 20790 + }, + { + "epoch": 0.3137349542972639, + "grad_norm": 0.6518476487987231, + "learning_rate": 1.5703624000227163e-05, + "loss": 1.0488, + "step": 20800 + }, + { + "epoch": 0.3138857884099068, + "grad_norm": 0.6632544162325347, + "learning_rate": 1.5699691781949592e-05, + "loss": 1.0553, + "step": 20810 + }, + { + "epoch": 0.3140366225225497, + "grad_norm": 0.6743908495997063, + "learning_rate": 1.56957582578616e-05, + "loss": 1.0271, + "step": 20820 + }, + { + "epoch": 0.3141874566351926, + "grad_norm": 0.6478736414791718, + "learning_rate": 1.569182342886437e-05, + "loss": 1.047, + "step": 20830 + }, + { + "epoch": 0.31433829074783554, + "grad_norm": 0.6476910581054766, + "learning_rate": 1.5687887295859376e-05, + "loss": 1.0438, + "step": 20840 + }, + { + "epoch": 0.3144891248604784, + "grad_norm": 0.6725730494644306, + "learning_rate": 1.5683949859748397e-05, + "loss": 1.0531, + "step": 20850 + }, + { + "epoch": 0.31463995897312136, + "grad_norm": 0.7050729185925935, + "learning_rate": 1.5680011121433502e-05, + "loss": 1.0543, + "step": 20860 + }, + { + "epoch": 0.3147907930857643, + "grad_norm": 0.6852318586007824, + "learning_rate": 1.5676071081817065e-05, + "loss": 1.0479, + "step": 20870 + }, + { + "epoch": 0.3149416271984072, + "grad_norm": 0.6777404940827512, + "learning_rate": 1.567212974180176e-05, + "loss": 1.0662, + "step": 20880 + }, + { + "epoch": 0.3150924613110501, + "grad_norm": 0.6783732062095418, + "learning_rate": 1.5668187102290553e-05, + "loss": 1.0609, + "step": 20890 + }, + { + "epoch": 0.315243295423693, + "grad_norm": 0.6750486656798731, + "learning_rate": 1.566424316418671e-05, + "loss": 1.0467, + "step": 20900 + }, + { + "epoch": 0.31539412953633594, + "grad_norm": 0.7030578572764371, + "learning_rate": 1.5660297928393797e-05, + "loss": 1.0531, + "step": 20910 + }, + { + "epoch": 0.3155449636489788, + "grad_norm": 0.6737546718857365, + "learning_rate": 1.5656351395815675e-05, + "loss": 1.0525, + "step": 20920 + }, + { + "epoch": 0.31569579776162177, + "grad_norm": 0.664657865321707, + "learning_rate": 1.56524035673565e-05, + "loss": 1.0469, + "step": 20930 + }, + { + "epoch": 0.3158466318742647, + "grad_norm": 0.6800885867752896, + "learning_rate": 1.5648454443920727e-05, + "loss": 1.0613, + "step": 20940 + }, + { + "epoch": 0.3159974659869076, + "grad_norm": 0.6868526200313082, + "learning_rate": 1.564450402641311e-05, + "loss": 1.0492, + "step": 20950 + }, + { + "epoch": 0.3161483000995505, + "grad_norm": 0.7149239527373261, + "learning_rate": 1.5640552315738696e-05, + "loss": 1.0497, + "step": 20960 + }, + { + "epoch": 0.3162991342121934, + "grad_norm": 0.6627335305136334, + "learning_rate": 1.5636599312802833e-05, + "loss": 1.0501, + "step": 20970 + }, + { + "epoch": 0.31644996832483635, + "grad_norm": 0.6968572778132703, + "learning_rate": 1.5632645018511156e-05, + "loss": 1.0519, + "step": 20980 + }, + { + "epoch": 0.31660080243747923, + "grad_norm": 0.678470300143325, + "learning_rate": 1.5628689433769602e-05, + "loss": 1.0577, + "step": 20990 + }, + { + "epoch": 0.3167516365501222, + "grad_norm": 0.656375406294781, + "learning_rate": 1.562473255948441e-05, + "loss": 1.0335, + "step": 21000 + }, + { + "epoch": 0.3169024706627651, + "grad_norm": 0.6998454189081444, + "learning_rate": 1.5620774396562096e-05, + "loss": 1.0563, + "step": 21010 + }, + { + "epoch": 0.317053304775408, + "grad_norm": 0.6521590378021729, + "learning_rate": 1.5616814945909496e-05, + "loss": 1.0548, + "step": 21020 + }, + { + "epoch": 0.31720413888805093, + "grad_norm": 0.686136427152365, + "learning_rate": 1.5612854208433722e-05, + "loss": 1.0505, + "step": 21030 + }, + { + "epoch": 0.3173549730006938, + "grad_norm": 0.6521966260238589, + "learning_rate": 1.5608892185042178e-05, + "loss": 1.0629, + "step": 21040 + }, + { + "epoch": 0.31750580711333676, + "grad_norm": 0.6519050429693363, + "learning_rate": 1.5604928876642587e-05, + "loss": 1.0306, + "step": 21050 + }, + { + "epoch": 0.31765664122597964, + "grad_norm": 0.6579158068071154, + "learning_rate": 1.5600964284142942e-05, + "loss": 1.056, + "step": 21060 + }, + { + "epoch": 0.3178074753386226, + "grad_norm": 0.7109515238487991, + "learning_rate": 1.5596998408451536e-05, + "loss": 1.0453, + "step": 21070 + }, + { + "epoch": 0.3179583094512655, + "grad_norm": 0.6589641623721101, + "learning_rate": 1.5593031250476963e-05, + "loss": 1.0611, + "step": 21080 + }, + { + "epoch": 0.3181091435639084, + "grad_norm": 0.6550516916137665, + "learning_rate": 1.5589062811128107e-05, + "loss": 1.0571, + "step": 21090 + }, + { + "epoch": 0.31825997767655134, + "grad_norm": 0.6901357964460447, + "learning_rate": 1.5585093091314148e-05, + "loss": 1.0692, + "step": 21100 + }, + { + "epoch": 0.3184108117891942, + "grad_norm": 0.7105433493709263, + "learning_rate": 1.5581122091944548e-05, + "loss": 1.0706, + "step": 21110 + }, + { + "epoch": 0.31856164590183716, + "grad_norm": 0.706857679654889, + "learning_rate": 1.5577149813929075e-05, + "loss": 1.048, + "step": 21120 + }, + { + "epoch": 0.31871248001448005, + "grad_norm": 0.6706368511379532, + "learning_rate": 1.557317625817779e-05, + "loss": 1.0526, + "step": 21130 + }, + { + "epoch": 0.318863314127123, + "grad_norm": 0.6667220981175551, + "learning_rate": 1.5569201425601035e-05, + "loss": 1.0493, + "step": 21140 + }, + { + "epoch": 0.3190141482397659, + "grad_norm": 0.8003816940615538, + "learning_rate": 1.5565225317109453e-05, + "loss": 1.0456, + "step": 21150 + }, + { + "epoch": 0.3191649823524088, + "grad_norm": 0.7084512688442058, + "learning_rate": 1.5561247933613987e-05, + "loss": 1.053, + "step": 21160 + }, + { + "epoch": 0.31931581646505175, + "grad_norm": 0.6524711636706065, + "learning_rate": 1.555726927602585e-05, + "loss": 1.0544, + "step": 21170 + }, + { + "epoch": 0.31946665057769463, + "grad_norm": 0.7138320750956897, + "learning_rate": 1.5553289345256568e-05, + "loss": 1.06, + "step": 21180 + }, + { + "epoch": 0.31961748469033757, + "grad_norm": 0.6936055375710993, + "learning_rate": 1.5549308142217952e-05, + "loss": 1.0238, + "step": 21190 + }, + { + "epoch": 0.31976831880298046, + "grad_norm": 0.6695520318636611, + "learning_rate": 1.5545325667822097e-05, + "loss": 1.0603, + "step": 21200 + }, + { + "epoch": 0.3199191529156234, + "grad_norm": 0.6874951711116976, + "learning_rate": 1.55413419229814e-05, + "loss": 1.0473, + "step": 21210 + }, + { + "epoch": 0.32006998702826633, + "grad_norm": 0.6427635373404352, + "learning_rate": 1.5537356908608544e-05, + "loss": 1.0682, + "step": 21220 + }, + { + "epoch": 0.3202208211409092, + "grad_norm": 0.6557633687279286, + "learning_rate": 1.5533370625616506e-05, + "loss": 1.0455, + "step": 21230 + }, + { + "epoch": 0.32037165525355216, + "grad_norm": 0.6975029747969057, + "learning_rate": 1.552938307491855e-05, + "loss": 1.0537, + "step": 21240 + }, + { + "epoch": 0.32052248936619504, + "grad_norm": 0.6669486545184882, + "learning_rate": 1.5525394257428226e-05, + "loss": 1.0317, + "step": 21250 + }, + { + "epoch": 0.320673323478838, + "grad_norm": 0.6793881532632843, + "learning_rate": 1.5521404174059383e-05, + "loss": 1.0334, + "step": 21260 + }, + { + "epoch": 0.32082415759148086, + "grad_norm": 0.7056464118169553, + "learning_rate": 1.5517412825726163e-05, + "loss": 1.0668, + "step": 21270 + }, + { + "epoch": 0.3209749917041238, + "grad_norm": 0.632204580973463, + "learning_rate": 1.5513420213342986e-05, + "loss": 1.0475, + "step": 21280 + }, + { + "epoch": 0.32112582581676674, + "grad_norm": 0.6563165465829738, + "learning_rate": 1.550942633782456e-05, + "loss": 1.0384, + "step": 21290 + }, + { + "epoch": 0.3212766599294096, + "grad_norm": 0.6562640136588354, + "learning_rate": 1.5505431200085906e-05, + "loss": 1.0363, + "step": 21300 + }, + { + "epoch": 0.32142749404205256, + "grad_norm": 0.7117316991107062, + "learning_rate": 1.5501434801042304e-05, + "loss": 1.0488, + "step": 21310 + }, + { + "epoch": 0.32157832815469545, + "grad_norm": 0.7204568976179275, + "learning_rate": 1.5497437141609346e-05, + "loss": 1.0422, + "step": 21320 + }, + { + "epoch": 0.3217291622673384, + "grad_norm": 0.7045503566804302, + "learning_rate": 1.5493438222702896e-05, + "loss": 1.0415, + "step": 21330 + }, + { + "epoch": 0.32187999637998127, + "grad_norm": 0.6543093808718021, + "learning_rate": 1.548943804523912e-05, + "loss": 1.0515, + "step": 21340 + }, + { + "epoch": 0.3220308304926242, + "grad_norm": 0.672452211545328, + "learning_rate": 1.5485436610134462e-05, + "loss": 1.0452, + "step": 21350 + }, + { + "epoch": 0.32218166460526715, + "grad_norm": 0.6602938277126671, + "learning_rate": 1.5481433918305658e-05, + "loss": 1.0578, + "step": 21360 + }, + { + "epoch": 0.32233249871791003, + "grad_norm": 0.681964328624787, + "learning_rate": 1.547742997066974e-05, + "loss": 1.0259, + "step": 21370 + }, + { + "epoch": 0.32248333283055297, + "grad_norm": 0.6500740984285682, + "learning_rate": 1.5473424768144014e-05, + "loss": 1.0514, + "step": 21380 + }, + { + "epoch": 0.32263416694319585, + "grad_norm": 0.6707919482109919, + "learning_rate": 1.5469418311646078e-05, + "loss": 1.0625, + "step": 21390 + }, + { + "epoch": 0.3227850010558388, + "grad_norm": 0.6776071612910091, + "learning_rate": 1.546541060209382e-05, + "loss": 1.0672, + "step": 21400 + }, + { + "epoch": 0.3229358351684817, + "grad_norm": 0.6785183306690731, + "learning_rate": 1.546140164040542e-05, + "loss": 1.0525, + "step": 21410 + }, + { + "epoch": 0.3230866692811246, + "grad_norm": 0.6789939657638653, + "learning_rate": 1.5457391427499332e-05, + "loss": 1.0338, + "step": 21420 + }, + { + "epoch": 0.32323750339376756, + "grad_norm": 0.6439561331535266, + "learning_rate": 1.5453379964294307e-05, + "loss": 1.0554, + "step": 21430 + }, + { + "epoch": 0.32338833750641044, + "grad_norm": 0.7014526004746624, + "learning_rate": 1.544936725170938e-05, + "loss": 1.0502, + "step": 21440 + }, + { + "epoch": 0.3235391716190534, + "grad_norm": 0.6297915461700393, + "learning_rate": 1.5445353290663866e-05, + "loss": 1.0442, + "step": 21450 + }, + { + "epoch": 0.32369000573169626, + "grad_norm": 0.6496746752836889, + "learning_rate": 1.5441338082077372e-05, + "loss": 1.0574, + "step": 21460 + }, + { + "epoch": 0.3238408398443392, + "grad_norm": 0.7609179604800456, + "learning_rate": 1.54373216268698e-05, + "loss": 1.0505, + "step": 21470 + }, + { + "epoch": 0.3239916739569821, + "grad_norm": 0.6544907478919688, + "learning_rate": 1.5433303925961315e-05, + "loss": 1.0461, + "step": 21480 + }, + { + "epoch": 0.324142508069625, + "grad_norm": 0.6855140934496318, + "learning_rate": 1.5429284980272386e-05, + "loss": 1.0302, + "step": 21490 + }, + { + "epoch": 0.32429334218226796, + "grad_norm": 0.6447359522261605, + "learning_rate": 1.5425264790723757e-05, + "loss": 1.0413, + "step": 21500 + }, + { + "epoch": 0.32444417629491085, + "grad_norm": 0.6937392902277266, + "learning_rate": 1.542124335823647e-05, + "loss": 1.0604, + "step": 21510 + }, + { + "epoch": 0.3245950104075538, + "grad_norm": 0.6958016551643074, + "learning_rate": 1.5417220683731836e-05, + "loss": 1.0369, + "step": 21520 + }, + { + "epoch": 0.32474584452019667, + "grad_norm": 0.6586294773170357, + "learning_rate": 1.5413196768131458e-05, + "loss": 1.0499, + "step": 21530 + }, + { + "epoch": 0.3248966786328396, + "grad_norm": 0.6533540056621548, + "learning_rate": 1.5409171612357227e-05, + "loss": 1.0306, + "step": 21540 + }, + { + "epoch": 0.3250475127454825, + "grad_norm": 0.6623910303431283, + "learning_rate": 1.5405145217331306e-05, + "loss": 1.0327, + "step": 21550 + }, + { + "epoch": 0.32519834685812543, + "grad_norm": 0.7069894711796545, + "learning_rate": 1.5401117583976156e-05, + "loss": 1.025, + "step": 21560 + }, + { + "epoch": 0.32534918097076837, + "grad_norm": 0.6437836101494977, + "learning_rate": 1.5397088713214517e-05, + "loss": 1.0358, + "step": 21570 + }, + { + "epoch": 0.32550001508341125, + "grad_norm": 0.67014376978613, + "learning_rate": 1.5393058605969405e-05, + "loss": 1.0464, + "step": 21580 + }, + { + "epoch": 0.3256508491960542, + "grad_norm": 0.6523286489252035, + "learning_rate": 1.5389027263164136e-05, + "loss": 1.0627, + "step": 21590 + }, + { + "epoch": 0.3258016833086971, + "grad_norm": 0.6679804132769086, + "learning_rate": 1.5384994685722285e-05, + "loss": 1.0385, + "step": 21600 + }, + { + "epoch": 0.32595251742134, + "grad_norm": 0.6347721922190186, + "learning_rate": 1.5380960874567733e-05, + "loss": 1.0538, + "step": 21610 + }, + { + "epoch": 0.3261033515339829, + "grad_norm": 0.6856550446576674, + "learning_rate": 1.537692583062463e-05, + "loss": 1.0417, + "step": 21620 + }, + { + "epoch": 0.32625418564662584, + "grad_norm": 0.644213333847142, + "learning_rate": 1.537288955481741e-05, + "loss": 1.0384, + "step": 21630 + }, + { + "epoch": 0.3264050197592688, + "grad_norm": 0.6315852560050872, + "learning_rate": 1.5368852048070793e-05, + "loss": 1.0714, + "step": 21640 + }, + { + "epoch": 0.32655585387191166, + "grad_norm": 0.693810977101808, + "learning_rate": 1.5364813311309787e-05, + "loss": 1.058, + "step": 21650 + }, + { + "epoch": 0.3267066879845546, + "grad_norm": 0.6293741123650696, + "learning_rate": 1.5360773345459667e-05, + "loss": 1.0318, + "step": 21660 + }, + { + "epoch": 0.3268575220971975, + "grad_norm": 0.6536186238630372, + "learning_rate": 1.5356732151445997e-05, + "loss": 1.0359, + "step": 21670 + }, + { + "epoch": 0.3270083562098404, + "grad_norm": 0.6921405262450667, + "learning_rate": 1.5352689730194625e-05, + "loss": 1.0471, + "step": 21680 + }, + { + "epoch": 0.3271591903224833, + "grad_norm": 0.7298434528517217, + "learning_rate": 1.5348646082631672e-05, + "loss": 1.0432, + "step": 21690 + }, + { + "epoch": 0.32731002443512625, + "grad_norm": 0.6971942892614866, + "learning_rate": 1.5344601209683556e-05, + "loss": 1.0475, + "step": 21700 + }, + { + "epoch": 0.3274608585477692, + "grad_norm": 0.6424021323721942, + "learning_rate": 1.534055511227696e-05, + "loss": 1.0552, + "step": 21710 + }, + { + "epoch": 0.32761169266041207, + "grad_norm": 0.7034745832392777, + "learning_rate": 1.5336507791338848e-05, + "loss": 1.0658, + "step": 21720 + }, + { + "epoch": 0.327762526773055, + "grad_norm": 0.6814221258868922, + "learning_rate": 1.533245924779647e-05, + "loss": 1.0593, + "step": 21730 + }, + { + "epoch": 0.3279133608856979, + "grad_norm": 0.7133913477936605, + "learning_rate": 1.5328409482577366e-05, + "loss": 1.0631, + "step": 21740 + }, + { + "epoch": 0.32806419499834083, + "grad_norm": 0.7083350705928031, + "learning_rate": 1.5324358496609332e-05, + "loss": 1.0474, + "step": 21750 + }, + { + "epoch": 0.3282150291109837, + "grad_norm": 0.6667967952904218, + "learning_rate": 1.5320306290820464e-05, + "loss": 1.0456, + "step": 21760 + }, + { + "epoch": 0.32836586322362665, + "grad_norm": 0.6469351228194745, + "learning_rate": 1.5316252866139127e-05, + "loss": 1.0461, + "step": 21770 + }, + { + "epoch": 0.3285166973362696, + "grad_norm": 0.6864661789347068, + "learning_rate": 1.5312198223493966e-05, + "loss": 1.0406, + "step": 21780 + }, + { + "epoch": 0.3286675314489125, + "grad_norm": 0.6460320946241314, + "learning_rate": 1.5308142363813917e-05, + "loss": 1.0442, + "step": 21790 + }, + { + "epoch": 0.3288183655615554, + "grad_norm": 0.6328133142234798, + "learning_rate": 1.5304085288028177e-05, + "loss": 1.0431, + "step": 21800 + }, + { + "epoch": 0.3289691996741983, + "grad_norm": 0.6506064135123412, + "learning_rate": 1.5300026997066233e-05, + "loss": 1.0405, + "step": 21810 + }, + { + "epoch": 0.32912003378684124, + "grad_norm": 0.6357156435873271, + "learning_rate": 1.5295967491857843e-05, + "loss": 1.0422, + "step": 21820 + }, + { + "epoch": 0.3292708678994841, + "grad_norm": 0.6539670765057006, + "learning_rate": 1.5291906773333052e-05, + "loss": 1.0438, + "step": 21830 + }, + { + "epoch": 0.32942170201212706, + "grad_norm": 0.6760170248291396, + "learning_rate": 1.5287844842422184e-05, + "loss": 1.0451, + "step": 21840 + }, + { + "epoch": 0.32957253612477, + "grad_norm": 0.6980146252341815, + "learning_rate": 1.5283781700055823e-05, + "loss": 1.0588, + "step": 21850 + }, + { + "epoch": 0.3297233702374129, + "grad_norm": 0.6494475621353601, + "learning_rate": 1.527971734716485e-05, + "loss": 1.0321, + "step": 21860 + }, + { + "epoch": 0.3298742043500558, + "grad_norm": 0.6685974663282304, + "learning_rate": 1.5275651784680415e-05, + "loss": 1.0389, + "step": 21870 + }, + { + "epoch": 0.3300250384626987, + "grad_norm": 0.6664516774035013, + "learning_rate": 1.5271585013533944e-05, + "loss": 1.0396, + "step": 21880 + }, + { + "epoch": 0.33017587257534164, + "grad_norm": 0.7328258399931877, + "learning_rate": 1.5267517034657148e-05, + "loss": 1.0335, + "step": 21890 + }, + { + "epoch": 0.33032670668798453, + "grad_norm": 0.6659034823765796, + "learning_rate": 1.5263447848982e-05, + "loss": 1.04, + "step": 21900 + }, + { + "epoch": 0.33047754080062747, + "grad_norm": 0.6801997303302965, + "learning_rate": 1.525937745744077e-05, + "loss": 1.034, + "step": 21910 + }, + { + "epoch": 0.3306283749132704, + "grad_norm": 0.6674468831569113, + "learning_rate": 1.5255305860965977e-05, + "loss": 1.0242, + "step": 21920 + }, + { + "epoch": 0.3307792090259133, + "grad_norm": 0.6674373297595203, + "learning_rate": 1.5251233060490442e-05, + "loss": 1.0466, + "step": 21930 + }, + { + "epoch": 0.33093004313855623, + "grad_norm": 0.8027905704045962, + "learning_rate": 1.5247159056947253e-05, + "loss": 1.0428, + "step": 21940 + }, + { + "epoch": 0.3310808772511991, + "grad_norm": 0.6635551847239298, + "learning_rate": 1.524308385126977e-05, + "loss": 1.0477, + "step": 21950 + }, + { + "epoch": 0.33123171136384205, + "grad_norm": 0.6930077889845047, + "learning_rate": 1.5239007444391626e-05, + "loss": 1.0498, + "step": 21960 + }, + { + "epoch": 0.33138254547648494, + "grad_norm": 0.6511371957519633, + "learning_rate": 1.5234929837246738e-05, + "loss": 1.0441, + "step": 21970 + }, + { + "epoch": 0.3315333795891279, + "grad_norm": 0.663569455783397, + "learning_rate": 1.5230851030769293e-05, + "loss": 1.0658, + "step": 21980 + }, + { + "epoch": 0.3316842137017708, + "grad_norm": 0.6263668833137409, + "learning_rate": 1.5226771025893757e-05, + "loss": 1.0614, + "step": 21990 + }, + { + "epoch": 0.3318350478144137, + "grad_norm": 0.691390778653943, + "learning_rate": 1.522268982355486e-05, + "loss": 1.0521, + "step": 22000 + }, + { + "epoch": 0.33198588192705664, + "grad_norm": 0.7103925319055518, + "learning_rate": 1.5218607424687615e-05, + "loss": 1.0477, + "step": 22010 + }, + { + "epoch": 0.3321367160396995, + "grad_norm": 0.6724534550705007, + "learning_rate": 1.5214523830227313e-05, + "loss": 1.0375, + "step": 22020 + }, + { + "epoch": 0.33228755015234246, + "grad_norm": 0.6728487778959094, + "learning_rate": 1.5210439041109507e-05, + "loss": 1.0504, + "step": 22030 + }, + { + "epoch": 0.33243838426498534, + "grad_norm": 0.6971610058257853, + "learning_rate": 1.5206353058270033e-05, + "loss": 1.0397, + "step": 22040 + }, + { + "epoch": 0.3325892183776283, + "grad_norm": 0.6614809251640806, + "learning_rate": 1.5202265882644995e-05, + "loss": 1.0224, + "step": 22050 + }, + { + "epoch": 0.3327400524902712, + "grad_norm": 0.7102789303730045, + "learning_rate": 1.5198177515170778e-05, + "loss": 1.045, + "step": 22060 + }, + { + "epoch": 0.3328908866029141, + "grad_norm": 0.6445779594077015, + "learning_rate": 1.5194087956784031e-05, + "loss": 1.0272, + "step": 22070 + }, + { + "epoch": 0.33304172071555704, + "grad_norm": 0.6622498493341574, + "learning_rate": 1.5189997208421681e-05, + "loss": 1.0521, + "step": 22080 + }, + { + "epoch": 0.3331925548281999, + "grad_norm": 0.6653471496073258, + "learning_rate": 1.518590527102093e-05, + "loss": 1.0608, + "step": 22090 + }, + { + "epoch": 0.33334338894084287, + "grad_norm": 0.6497564120602402, + "learning_rate": 1.5181812145519238e-05, + "loss": 1.0396, + "step": 22100 + }, + { + "epoch": 0.33349422305348575, + "grad_norm": 0.6873653868018063, + "learning_rate": 1.5177717832854358e-05, + "loss": 1.0575, + "step": 22110 + }, + { + "epoch": 0.3336450571661287, + "grad_norm": 0.6327355327315294, + "learning_rate": 1.5173622333964302e-05, + "loss": 1.0299, + "step": 22120 + }, + { + "epoch": 0.33379589127877163, + "grad_norm": 0.6478690774571815, + "learning_rate": 1.5169525649787357e-05, + "loss": 1.0509, + "step": 22130 + }, + { + "epoch": 0.3339467253914145, + "grad_norm": 0.6378198256884442, + "learning_rate": 1.5165427781262077e-05, + "loss": 1.0183, + "step": 22140 + }, + { + "epoch": 0.33409755950405745, + "grad_norm": 0.6558177619691649, + "learning_rate": 1.5161328729327296e-05, + "loss": 1.0335, + "step": 22150 + }, + { + "epoch": 0.33424839361670033, + "grad_norm": 0.664159290910833, + "learning_rate": 1.5157228494922121e-05, + "loss": 1.0381, + "step": 22160 + }, + { + "epoch": 0.3343992277293433, + "grad_norm": 0.6643592391419948, + "learning_rate": 1.515312707898591e-05, + "loss": 1.0324, + "step": 22170 + }, + { + "epoch": 0.33455006184198616, + "grad_norm": 0.6530097966886467, + "learning_rate": 1.5149024482458314e-05, + "loss": 1.0403, + "step": 22180 + }, + { + "epoch": 0.3347008959546291, + "grad_norm": 0.703474475898379, + "learning_rate": 1.5144920706279247e-05, + "loss": 1.0366, + "step": 22190 + }, + { + "epoch": 0.33485173006727204, + "grad_norm": 0.6223612546440033, + "learning_rate": 1.5140815751388884e-05, + "loss": 1.0333, + "step": 22200 + }, + { + "epoch": 0.3350025641799149, + "grad_norm": 0.6582913802301453, + "learning_rate": 1.5136709618727685e-05, + "loss": 1.0449, + "step": 22210 + }, + { + "epoch": 0.33515339829255786, + "grad_norm": 0.6506630699288253, + "learning_rate": 1.5132602309236371e-05, + "loss": 1.0504, + "step": 22220 + }, + { + "epoch": 0.33530423240520074, + "grad_norm": 0.6669566329443556, + "learning_rate": 1.5128493823855932e-05, + "loss": 1.0408, + "step": 22230 + }, + { + "epoch": 0.3354550665178437, + "grad_norm": 0.6519972008785699, + "learning_rate": 1.5124384163527635e-05, + "loss": 1.0394, + "step": 22240 + }, + { + "epoch": 0.33560590063048656, + "grad_norm": 0.7163969206901298, + "learning_rate": 1.512027332919301e-05, + "loss": 1.0277, + "step": 22250 + }, + { + "epoch": 0.3357567347431295, + "grad_norm": 0.6421033300702144, + "learning_rate": 1.5116161321793854e-05, + "loss": 1.0327, + "step": 22260 + }, + { + "epoch": 0.33590756885577244, + "grad_norm": 0.720267939507939, + "learning_rate": 1.5112048142272238e-05, + "loss": 1.053, + "step": 22270 + }, + { + "epoch": 0.3360584029684153, + "grad_norm": 0.6529821431412729, + "learning_rate": 1.5107933791570499e-05, + "loss": 1.062, + "step": 22280 + }, + { + "epoch": 0.33620923708105827, + "grad_norm": 0.648568681989125, + "learning_rate": 1.5103818270631243e-05, + "loss": 1.0532, + "step": 22290 + }, + { + "epoch": 0.33636007119370115, + "grad_norm": 0.7101645616992635, + "learning_rate": 1.5099701580397343e-05, + "loss": 1.0664, + "step": 22300 + }, + { + "epoch": 0.3365109053063441, + "grad_norm": 0.6394014849366396, + "learning_rate": 1.5095583721811942e-05, + "loss": 1.0331, + "step": 22310 + }, + { + "epoch": 0.33666173941898697, + "grad_norm": 0.6492732709502219, + "learning_rate": 1.5091464695818445e-05, + "loss": 1.0672, + "step": 22320 + }, + { + "epoch": 0.3368125735316299, + "grad_norm": 0.6876120531824462, + "learning_rate": 1.508734450336054e-05, + "loss": 1.0347, + "step": 22330 + }, + { + "epoch": 0.33696340764427285, + "grad_norm": 0.7119045653555168, + "learning_rate": 1.5083223145382158e-05, + "loss": 1.0357, + "step": 22340 + }, + { + "epoch": 0.33711424175691573, + "grad_norm": 0.6867745231016052, + "learning_rate": 1.5079100622827513e-05, + "loss": 1.0151, + "step": 22350 + }, + { + "epoch": 0.3372650758695587, + "grad_norm": 0.6625894942238462, + "learning_rate": 1.5074976936641091e-05, + "loss": 1.047, + "step": 22360 + }, + { + "epoch": 0.33741590998220156, + "grad_norm": 0.6472601702552819, + "learning_rate": 1.5070852087767628e-05, + "loss": 1.0439, + "step": 22370 + }, + { + "epoch": 0.3375667440948445, + "grad_norm": 0.6916627784142422, + "learning_rate": 1.5066726077152139e-05, + "loss": 1.0475, + "step": 22380 + }, + { + "epoch": 0.3377175782074874, + "grad_norm": 0.689743480801828, + "learning_rate": 1.5062598905739898e-05, + "loss": 1.0395, + "step": 22390 + }, + { + "epoch": 0.3378684123201303, + "grad_norm": 0.6554964274895356, + "learning_rate": 1.5058470574476452e-05, + "loss": 1.0394, + "step": 22400 + }, + { + "epoch": 0.33801924643277326, + "grad_norm": 0.6897900077559426, + "learning_rate": 1.5054341084307606e-05, + "loss": 1.0439, + "step": 22410 + }, + { + "epoch": 0.33817008054541614, + "grad_norm": 0.6595106917809817, + "learning_rate": 1.5050210436179438e-05, + "loss": 1.047, + "step": 22420 + }, + { + "epoch": 0.3383209146580591, + "grad_norm": 0.6599303969400164, + "learning_rate": 1.5046078631038282e-05, + "loss": 1.0443, + "step": 22430 + }, + { + "epoch": 0.33847174877070196, + "grad_norm": 0.6713839768443278, + "learning_rate": 1.5041945669830748e-05, + "loss": 1.0346, + "step": 22440 + }, + { + "epoch": 0.3386225828833449, + "grad_norm": 0.6740207615305854, + "learning_rate": 1.50378115535037e-05, + "loss": 1.0457, + "step": 22450 + }, + { + "epoch": 0.3387734169959878, + "grad_norm": 0.7283071106271528, + "learning_rate": 1.5033676283004278e-05, + "loss": 1.0313, + "step": 22460 + }, + { + "epoch": 0.3389242511086307, + "grad_norm": 0.6607768965735391, + "learning_rate": 1.5029539859279875e-05, + "loss": 1.0511, + "step": 22470 + }, + { + "epoch": 0.33907508522127366, + "grad_norm": 0.637325208449894, + "learning_rate": 1.5025402283278159e-05, + "loss": 1.04, + "step": 22480 + }, + { + "epoch": 0.33922591933391655, + "grad_norm": 0.734030221491033, + "learning_rate": 1.5021263555947055e-05, + "loss": 1.0361, + "step": 22490 + }, + { + "epoch": 0.3393767534465595, + "grad_norm": 0.6576094904650228, + "learning_rate": 1.5017123678234748e-05, + "loss": 1.0229, + "step": 22500 + }, + { + "epoch": 0.33952758755920237, + "grad_norm": 0.6702141608165408, + "learning_rate": 1.50129826510897e-05, + "loss": 1.046, + "step": 22510 + }, + { + "epoch": 0.3396784216718453, + "grad_norm": 0.6822894189592134, + "learning_rate": 1.5008840475460624e-05, + "loss": 1.0452, + "step": 22520 + }, + { + "epoch": 0.3398292557844882, + "grad_norm": 0.6927426904715128, + "learning_rate": 1.5004697152296501e-05, + "loss": 1.0402, + "step": 22530 + }, + { + "epoch": 0.33998008989713113, + "grad_norm": 0.6375424779929408, + "learning_rate": 1.5000552682546574e-05, + "loss": 1.0269, + "step": 22540 + }, + { + "epoch": 0.34013092400977407, + "grad_norm": 0.7158208933180997, + "learning_rate": 1.499640706716035e-05, + "loss": 1.03, + "step": 22550 + }, + { + "epoch": 0.34028175812241696, + "grad_norm": 0.6302692816007741, + "learning_rate": 1.4992260307087596e-05, + "loss": 1.0376, + "step": 22560 + }, + { + "epoch": 0.3404325922350599, + "grad_norm": 0.7076386672262605, + "learning_rate": 1.4988112403278345e-05, + "loss": 1.0344, + "step": 22570 + }, + { + "epoch": 0.3405834263477028, + "grad_norm": 0.6473557586939697, + "learning_rate": 1.4983963356682889e-05, + "loss": 1.0498, + "step": 22580 + }, + { + "epoch": 0.3407342604603457, + "grad_norm": 0.7343754121466843, + "learning_rate": 1.4979813168251783e-05, + "loss": 1.0636, + "step": 22590 + }, + { + "epoch": 0.3408850945729886, + "grad_norm": 0.6282407124082158, + "learning_rate": 1.497566183893584e-05, + "loss": 1.0472, + "step": 22600 + }, + { + "epoch": 0.34103592868563154, + "grad_norm": 0.6347559280371521, + "learning_rate": 1.497150936968614e-05, + "loss": 1.028, + "step": 22610 + }, + { + "epoch": 0.3411867627982745, + "grad_norm": 0.6329317364443768, + "learning_rate": 1.4967355761454026e-05, + "loss": 1.0331, + "step": 22620 + }, + { + "epoch": 0.34133759691091736, + "grad_norm": 0.6758545471619963, + "learning_rate": 1.4963201015191089e-05, + "loss": 1.0444, + "step": 22630 + }, + { + "epoch": 0.3414884310235603, + "grad_norm": 0.6690871797215527, + "learning_rate": 1.4959045131849193e-05, + "loss": 1.0519, + "step": 22640 + }, + { + "epoch": 0.3416392651362032, + "grad_norm": 0.6783943345697262, + "learning_rate": 1.4954888112380464e-05, + "loss": 1.0496, + "step": 22650 + }, + { + "epoch": 0.3417900992488461, + "grad_norm": 0.6756479765495643, + "learning_rate": 1.4950729957737275e-05, + "loss": 1.0591, + "step": 22660 + }, + { + "epoch": 0.341940933361489, + "grad_norm": 0.6511618264488642, + "learning_rate": 1.4946570668872275e-05, + "loss": 1.0465, + "step": 22670 + }, + { + "epoch": 0.34209176747413195, + "grad_norm": 0.7125070671350356, + "learning_rate": 1.494241024673836e-05, + "loss": 1.0348, + "step": 22680 + }, + { + "epoch": 0.3422426015867749, + "grad_norm": 0.7017112018999144, + "learning_rate": 1.4938248692288693e-05, + "loss": 1.0305, + "step": 22690 + }, + { + "epoch": 0.34239343569941777, + "grad_norm": 0.6238372575079217, + "learning_rate": 1.4934086006476692e-05, + "loss": 1.0332, + "step": 22700 + }, + { + "epoch": 0.3425442698120607, + "grad_norm": 0.6446860716185299, + "learning_rate": 1.4929922190256037e-05, + "loss": 1.0432, + "step": 22710 + }, + { + "epoch": 0.3426951039247036, + "grad_norm": 0.7395629189459105, + "learning_rate": 1.4925757244580668e-05, + "loss": 1.0412, + "step": 22720 + }, + { + "epoch": 0.34284593803734653, + "grad_norm": 0.640742421280338, + "learning_rate": 1.4921591170404779e-05, + "loss": 1.0376, + "step": 22730 + }, + { + "epoch": 0.3429967721499894, + "grad_norm": 0.6512028732292022, + "learning_rate": 1.4917423968682832e-05, + "loss": 1.0528, + "step": 22740 + }, + { + "epoch": 0.34314760626263235, + "grad_norm": 0.6290540179480426, + "learning_rate": 1.4913255640369533e-05, + "loss": 1.0282, + "step": 22750 + }, + { + "epoch": 0.3432984403752753, + "grad_norm": 0.7130068964190514, + "learning_rate": 1.490908618641986e-05, + "loss": 1.0434, + "step": 22760 + }, + { + "epoch": 0.3434492744879182, + "grad_norm": 0.6674719869917362, + "learning_rate": 1.4904915607789041e-05, + "loss": 1.0331, + "step": 22770 + }, + { + "epoch": 0.3436001086005611, + "grad_norm": 0.6854511534871851, + "learning_rate": 1.4900743905432564e-05, + "loss": 1.0532, + "step": 22780 + }, + { + "epoch": 0.343750942713204, + "grad_norm": 0.7145468612026212, + "learning_rate": 1.4896571080306172e-05, + "loss": 1.0368, + "step": 22790 + }, + { + "epoch": 0.34390177682584694, + "grad_norm": 0.7395955171290761, + "learning_rate": 1.4892397133365868e-05, + "loss": 1.0401, + "step": 22800 + }, + { + "epoch": 0.3440526109384898, + "grad_norm": 0.7113187773547176, + "learning_rate": 1.4888222065567915e-05, + "loss": 1.044, + "step": 22810 + }, + { + "epoch": 0.34420344505113276, + "grad_norm": 0.6591462117856817, + "learning_rate": 1.4884045877868824e-05, + "loss": 1.0325, + "step": 22820 + }, + { + "epoch": 0.3443542791637757, + "grad_norm": 0.7007480366376717, + "learning_rate": 1.4879868571225373e-05, + "loss": 1.0304, + "step": 22830 + }, + { + "epoch": 0.3445051132764186, + "grad_norm": 0.6451213625483387, + "learning_rate": 1.4875690146594586e-05, + "loss": 1.04, + "step": 22840 + }, + { + "epoch": 0.3446559473890615, + "grad_norm": 0.6160524202642275, + "learning_rate": 1.487151060493375e-05, + "loss": 1.0309, + "step": 22850 + }, + { + "epoch": 0.3448067815017044, + "grad_norm": 0.6495562013982039, + "learning_rate": 1.4867329947200409e-05, + "loss": 1.0177, + "step": 22860 + }, + { + "epoch": 0.34495761561434735, + "grad_norm": 0.6636617586607344, + "learning_rate": 1.4863148174352354e-05, + "loss": 1.0388, + "step": 22870 + }, + { + "epoch": 0.34510844972699023, + "grad_norm": 0.6637189490447235, + "learning_rate": 1.485896528734764e-05, + "loss": 1.03, + "step": 22880 + }, + { + "epoch": 0.34525928383963317, + "grad_norm": 0.6523623008789838, + "learning_rate": 1.4854781287144576e-05, + "loss": 1.0225, + "step": 22890 + }, + { + "epoch": 0.3454101179522761, + "grad_norm": 0.6950244488990163, + "learning_rate": 1.4850596174701723e-05, + "loss": 1.0638, + "step": 22900 + }, + { + "epoch": 0.345560952064919, + "grad_norm": 0.7439558370758815, + "learning_rate": 1.4846409950977898e-05, + "loss": 1.033, + "step": 22910 + }, + { + "epoch": 0.34571178617756193, + "grad_norm": 0.6576956208242065, + "learning_rate": 1.4842222616932177e-05, + "loss": 1.0389, + "step": 22920 + }, + { + "epoch": 0.3458626202902048, + "grad_norm": 0.7166169049158512, + "learning_rate": 1.4838034173523882e-05, + "loss": 1.032, + "step": 22930 + }, + { + "epoch": 0.34601345440284775, + "grad_norm": 0.6561634479204485, + "learning_rate": 1.4833844621712592e-05, + "loss": 1.0427, + "step": 22940 + }, + { + "epoch": 0.34616428851549064, + "grad_norm": 0.6491516902181851, + "learning_rate": 1.4829653962458147e-05, + "loss": 1.0305, + "step": 22950 + }, + { + "epoch": 0.3463151226281336, + "grad_norm": 0.6536143913688477, + "learning_rate": 1.4825462196720629e-05, + "loss": 1.0415, + "step": 22960 + }, + { + "epoch": 0.3464659567407765, + "grad_norm": 0.6915026109130451, + "learning_rate": 1.4821269325460384e-05, + "loss": 1.0404, + "step": 22970 + }, + { + "epoch": 0.3466167908534194, + "grad_norm": 0.6828103885391812, + "learning_rate": 1.4817075349638007e-05, + "loss": 1.0603, + "step": 22980 + }, + { + "epoch": 0.34676762496606234, + "grad_norm": 0.6661913318325102, + "learning_rate": 1.481288027021434e-05, + "loss": 1.0372, + "step": 22990 + }, + { + "epoch": 0.3469184590787052, + "grad_norm": 0.7336118707524806, + "learning_rate": 1.4808684088150496e-05, + "loss": 1.0315, + "step": 23000 + }, + { + "epoch": 0.34706929319134816, + "grad_norm": 0.7526301755914508, + "learning_rate": 1.4804486804407815e-05, + "loss": 1.0763, + "step": 23010 + }, + { + "epoch": 0.34722012730399104, + "grad_norm": 0.6443442511744526, + "learning_rate": 1.480028841994791e-05, + "loss": 1.0414, + "step": 23020 + }, + { + "epoch": 0.347370961416634, + "grad_norm": 0.6872738874487694, + "learning_rate": 1.4796088935732638e-05, + "loss": 1.0307, + "step": 23030 + }, + { + "epoch": 0.3475217955292769, + "grad_norm": 0.7253163502566827, + "learning_rate": 1.4791888352724111e-05, + "loss": 1.0402, + "step": 23040 + }, + { + "epoch": 0.3476726296419198, + "grad_norm": 0.6620212416629773, + "learning_rate": 1.4787686671884685e-05, + "loss": 1.0458, + "step": 23050 + }, + { + "epoch": 0.34782346375456275, + "grad_norm": 0.6678663647165436, + "learning_rate": 1.4783483894176978e-05, + "loss": 1.0366, + "step": 23060 + }, + { + "epoch": 0.34797429786720563, + "grad_norm": 0.6774562565388235, + "learning_rate": 1.4779280020563854e-05, + "loss": 1.0526, + "step": 23070 + }, + { + "epoch": 0.34812513197984857, + "grad_norm": 0.6293256059734043, + "learning_rate": 1.4775075052008425e-05, + "loss": 1.0206, + "step": 23080 + }, + { + "epoch": 0.34827596609249145, + "grad_norm": 0.6658930206961589, + "learning_rate": 1.4770868989474064e-05, + "loss": 1.0322, + "step": 23090 + }, + { + "epoch": 0.3484268002051344, + "grad_norm": 0.6742161269351524, + "learning_rate": 1.4766661833924381e-05, + "loss": 1.0377, + "step": 23100 + }, + { + "epoch": 0.34857763431777733, + "grad_norm": 0.6814776000248167, + "learning_rate": 1.476245358632325e-05, + "loss": 1.0283, + "step": 23110 + }, + { + "epoch": 0.3487284684304202, + "grad_norm": 0.662495858131771, + "learning_rate": 1.475824424763478e-05, + "loss": 1.0399, + "step": 23120 + }, + { + "epoch": 0.34887930254306315, + "grad_norm": 0.6774332650675491, + "learning_rate": 1.4754033818823348e-05, + "loss": 1.0376, + "step": 23130 + }, + { + "epoch": 0.34903013665570604, + "grad_norm": 0.6501135716100328, + "learning_rate": 1.4749822300853568e-05, + "loss": 1.0294, + "step": 23140 + }, + { + "epoch": 0.349180970768349, + "grad_norm": 0.6795705746335347, + "learning_rate": 1.4745609694690304e-05, + "loss": 1.0319, + "step": 23150 + }, + { + "epoch": 0.34933180488099186, + "grad_norm": 0.681711625193294, + "learning_rate": 1.4741396001298674e-05, + "loss": 1.0379, + "step": 23160 + }, + { + "epoch": 0.3494826389936348, + "grad_norm": 0.6637430769207504, + "learning_rate": 1.4737181221644043e-05, + "loss": 1.0336, + "step": 23170 + }, + { + "epoch": 0.34963347310627774, + "grad_norm": 0.6723700864780637, + "learning_rate": 1.473296535669203e-05, + "loss": 1.0553, + "step": 23180 + }, + { + "epoch": 0.3497843072189206, + "grad_norm": 0.6511083773021338, + "learning_rate": 1.4728748407408494e-05, + "loss": 1.0231, + "step": 23190 + }, + { + "epoch": 0.34993514133156356, + "grad_norm": 0.6749330343773585, + "learning_rate": 1.4724530374759543e-05, + "loss": 1.0323, + "step": 23200 + }, + { + "epoch": 0.35008597544420644, + "grad_norm": 0.6835182940636094, + "learning_rate": 1.4720311259711539e-05, + "loss": 1.0296, + "step": 23210 + }, + { + "epoch": 0.3502368095568494, + "grad_norm": 0.655471855573812, + "learning_rate": 1.471609106323109e-05, + "loss": 1.0461, + "step": 23220 + }, + { + "epoch": 0.35038764366949227, + "grad_norm": 0.6913926878697382, + "learning_rate": 1.4711869786285053e-05, + "loss": 1.0412, + "step": 23230 + }, + { + "epoch": 0.3505384777821352, + "grad_norm": 0.6603402901599902, + "learning_rate": 1.4707647429840528e-05, + "loss": 1.0371, + "step": 23240 + }, + { + "epoch": 0.35068931189477814, + "grad_norm": 0.6800255353159471, + "learning_rate": 1.4703423994864867e-05, + "loss": 1.0489, + "step": 23250 + }, + { + "epoch": 0.350840146007421, + "grad_norm": 0.6874312399750158, + "learning_rate": 1.4699199482325665e-05, + "loss": 1.0341, + "step": 23260 + }, + { + "epoch": 0.35099098012006397, + "grad_norm": 0.7392736945521226, + "learning_rate": 1.4694973893190768e-05, + "loss": 1.0441, + "step": 23270 + }, + { + "epoch": 0.35114181423270685, + "grad_norm": 0.6577473650092838, + "learning_rate": 1.4690747228428264e-05, + "loss": 1.043, + "step": 23280 + }, + { + "epoch": 0.3512926483453498, + "grad_norm": 0.6557414874109643, + "learning_rate": 1.4686519489006494e-05, + "loss": 1.0418, + "step": 23290 + }, + { + "epoch": 0.3514434824579927, + "grad_norm": 0.9082180666517118, + "learning_rate": 1.4682290675894036e-05, + "loss": 1.0613, + "step": 23300 + }, + { + "epoch": 0.3515943165706356, + "grad_norm": 0.6648032121595742, + "learning_rate": 1.4678060790059722e-05, + "loss": 1.0428, + "step": 23310 + }, + { + "epoch": 0.35174515068327855, + "grad_norm": 0.651326787674963, + "learning_rate": 1.467382983247263e-05, + "loss": 1.0448, + "step": 23320 + }, + { + "epoch": 0.35189598479592143, + "grad_norm": 0.6343276012593252, + "learning_rate": 1.4669597804102078e-05, + "loss": 1.0395, + "step": 23330 + }, + { + "epoch": 0.3520468189085644, + "grad_norm": 0.6870001670308697, + "learning_rate": 1.4665364705917631e-05, + "loss": 1.046, + "step": 23340 + }, + { + "epoch": 0.35219765302120726, + "grad_norm": 0.6597355846360925, + "learning_rate": 1.46611305388891e-05, + "loss": 1.0326, + "step": 23350 + }, + { + "epoch": 0.3523484871338502, + "grad_norm": 0.7281675708023101, + "learning_rate": 1.4656895303986543e-05, + "loss": 1.0328, + "step": 23360 + }, + { + "epoch": 0.3524993212464931, + "grad_norm": 0.6477532055702446, + "learning_rate": 1.4652659002180257e-05, + "loss": 1.0482, + "step": 23370 + }, + { + "epoch": 0.352650155359136, + "grad_norm": 0.6393316738077738, + "learning_rate": 1.464842163444079e-05, + "loss": 1.0331, + "step": 23380 + }, + { + "epoch": 0.35280098947177896, + "grad_norm": 0.6752565301848884, + "learning_rate": 1.4644183201738931e-05, + "loss": 1.0297, + "step": 23390 + }, + { + "epoch": 0.35295182358442184, + "grad_norm": 0.6793387859218883, + "learning_rate": 1.463994370504571e-05, + "loss": 1.0293, + "step": 23400 + }, + { + "epoch": 0.3531026576970648, + "grad_norm": 0.7129971249153768, + "learning_rate": 1.4635703145332406e-05, + "loss": 1.0338, + "step": 23410 + }, + { + "epoch": 0.35325349180970766, + "grad_norm": 0.6418426076472549, + "learning_rate": 1.4631461523570542e-05, + "loss": 1.0288, + "step": 23420 + }, + { + "epoch": 0.3534043259223506, + "grad_norm": 0.6455465941635277, + "learning_rate": 1.4627218840731883e-05, + "loss": 1.0563, + "step": 23430 + }, + { + "epoch": 0.3535551600349935, + "grad_norm": 0.6422773951914869, + "learning_rate": 1.4622975097788428e-05, + "loss": 1.0669, + "step": 23440 + }, + { + "epoch": 0.3537059941476364, + "grad_norm": 0.6620857815900831, + "learning_rate": 1.4618730295712429e-05, + "loss": 1.0328, + "step": 23450 + }, + { + "epoch": 0.35385682826027937, + "grad_norm": 0.6885607083084874, + "learning_rate": 1.4614484435476382e-05, + "loss": 1.0311, + "step": 23460 + }, + { + "epoch": 0.35400766237292225, + "grad_norm": 0.6384373598635928, + "learning_rate": 1.461023751805302e-05, + "loss": 1.0082, + "step": 23470 + }, + { + "epoch": 0.3541584964855652, + "grad_norm": 0.6700386378809327, + "learning_rate": 1.4605989544415323e-05, + "loss": 1.0398, + "step": 23480 + }, + { + "epoch": 0.35430933059820807, + "grad_norm": 0.6324346364087561, + "learning_rate": 1.4601740515536502e-05, + "loss": 1.0038, + "step": 23490 + }, + { + "epoch": 0.354460164710851, + "grad_norm": 0.686529259070637, + "learning_rate": 1.459749043239003e-05, + "loss": 1.039, + "step": 23500 + }, + { + "epoch": 0.3546109988234939, + "grad_norm": 0.6849745173923579, + "learning_rate": 1.4593239295949598e-05, + "loss": 1.0402, + "step": 23510 + }, + { + "epoch": 0.35476183293613683, + "grad_norm": 0.6822661575788729, + "learning_rate": 1.4588987107189151e-05, + "loss": 1.0245, + "step": 23520 + }, + { + "epoch": 0.3549126670487798, + "grad_norm": 0.6684461675106452, + "learning_rate": 1.4584733867082883e-05, + "loss": 1.0348, + "step": 23530 + }, + { + "epoch": 0.35506350116142266, + "grad_norm": 0.6540036475976966, + "learning_rate": 1.4580479576605208e-05, + "loss": 1.0484, + "step": 23540 + }, + { + "epoch": 0.3552143352740656, + "grad_norm": 0.7123092539285859, + "learning_rate": 1.4576224236730798e-05, + "loss": 1.0382, + "step": 23550 + }, + { + "epoch": 0.3553651693867085, + "grad_norm": 0.6479279481892962, + "learning_rate": 1.4571967848434558e-05, + "loss": 1.0367, + "step": 23560 + }, + { + "epoch": 0.3555160034993514, + "grad_norm": 0.6464843358558374, + "learning_rate": 1.4567710412691638e-05, + "loss": 1.0396, + "step": 23570 + }, + { + "epoch": 0.3556668376119943, + "grad_norm": 0.6491007799825713, + "learning_rate": 1.4563451930477422e-05, + "loss": 1.0398, + "step": 23580 + }, + { + "epoch": 0.35581767172463724, + "grad_norm": 0.7304272424837219, + "learning_rate": 1.4559192402767535e-05, + "loss": 1.0214, + "step": 23590 + }, + { + "epoch": 0.3559685058372802, + "grad_norm": 0.6536255307534978, + "learning_rate": 1.4554931830537846e-05, + "loss": 1.0306, + "step": 23600 + }, + { + "epoch": 0.35611933994992306, + "grad_norm": 0.6495058405111481, + "learning_rate": 1.4550670214764457e-05, + "loss": 1.0196, + "step": 23610 + }, + { + "epoch": 0.356270174062566, + "grad_norm": 0.6195430794781528, + "learning_rate": 1.4546407556423718e-05, + "loss": 1.0171, + "step": 23620 + }, + { + "epoch": 0.3564210081752089, + "grad_norm": 0.6901532489331309, + "learning_rate": 1.4542143856492207e-05, + "loss": 1.0485, + "step": 23630 + }, + { + "epoch": 0.3565718422878518, + "grad_norm": 0.6908076209602413, + "learning_rate": 1.4537879115946746e-05, + "loss": 1.0292, + "step": 23640 + }, + { + "epoch": 0.3567226764004947, + "grad_norm": 0.7117902659880015, + "learning_rate": 1.45336133357644e-05, + "loss": 1.0307, + "step": 23650 + }, + { + "epoch": 0.35687351051313765, + "grad_norm": 0.7019106496165013, + "learning_rate": 1.4529346516922465e-05, + "loss": 1.0245, + "step": 23660 + }, + { + "epoch": 0.3570243446257806, + "grad_norm": 0.6880719632163464, + "learning_rate": 1.4525078660398477e-05, + "loss": 1.0233, + "step": 23670 + }, + { + "epoch": 0.35717517873842347, + "grad_norm": 0.6472417672777082, + "learning_rate": 1.452080976717021e-05, + "loss": 1.0293, + "step": 23680 + }, + { + "epoch": 0.3573260128510664, + "grad_norm": 0.7745547094464151, + "learning_rate": 1.4516539838215675e-05, + "loss": 1.0553, + "step": 23690 + }, + { + "epoch": 0.3574768469637093, + "grad_norm": 0.6236261373440244, + "learning_rate": 1.4512268874513123e-05, + "loss": 1.0438, + "step": 23700 + }, + { + "epoch": 0.35762768107635223, + "grad_norm": 0.7070372815226249, + "learning_rate": 1.4507996877041037e-05, + "loss": 1.0456, + "step": 23710 + }, + { + "epoch": 0.3577785151889951, + "grad_norm": 0.6997014579957825, + "learning_rate": 1.4503723846778144e-05, + "loss": 1.0384, + "step": 23720 + }, + { + "epoch": 0.35792934930163806, + "grad_norm": 0.6416900902008699, + "learning_rate": 1.4499449784703399e-05, + "loss": 1.0243, + "step": 23730 + }, + { + "epoch": 0.358080183414281, + "grad_norm": 0.6684167276969957, + "learning_rate": 1.4495174691796003e-05, + "loss": 1.0189, + "step": 23740 + }, + { + "epoch": 0.3582310175269239, + "grad_norm": 0.652937001031654, + "learning_rate": 1.4490898569035384e-05, + "loss": 1.0088, + "step": 23750 + }, + { + "epoch": 0.3583818516395668, + "grad_norm": 0.6717987486304994, + "learning_rate": 1.4486621417401213e-05, + "loss": 1.0319, + "step": 23760 + }, + { + "epoch": 0.3585326857522097, + "grad_norm": 0.6707890295808091, + "learning_rate": 1.4482343237873389e-05, + "loss": 1.0313, + "step": 23770 + }, + { + "epoch": 0.35868351986485264, + "grad_norm": 0.6731202474181218, + "learning_rate": 1.4478064031432057e-05, + "loss": 1.0507, + "step": 23780 + }, + { + "epoch": 0.3588343539774955, + "grad_norm": 0.6949802993510629, + "learning_rate": 1.4473783799057588e-05, + "loss": 1.0376, + "step": 23790 + }, + { + "epoch": 0.35898518809013846, + "grad_norm": 0.6982288455181109, + "learning_rate": 1.446950254173059e-05, + "loss": 1.0268, + "step": 23800 + }, + { + "epoch": 0.3591360222027814, + "grad_norm": 0.6690959938315115, + "learning_rate": 1.4465220260431914e-05, + "loss": 1.0109, + "step": 23810 + }, + { + "epoch": 0.3592868563154243, + "grad_norm": 0.6651691251864503, + "learning_rate": 1.446093695614263e-05, + "loss": 1.0606, + "step": 23820 + }, + { + "epoch": 0.3594376904280672, + "grad_norm": 0.7331832499762766, + "learning_rate": 1.4456652629844059e-05, + "loss": 1.0586, + "step": 23830 + }, + { + "epoch": 0.3595885245407101, + "grad_norm": 0.6615402114740194, + "learning_rate": 1.4452367282517743e-05, + "loss": 1.0239, + "step": 23840 + }, + { + "epoch": 0.35973935865335305, + "grad_norm": 0.6683539421643491, + "learning_rate": 1.4448080915145469e-05, + "loss": 1.0257, + "step": 23850 + }, + { + "epoch": 0.35989019276599593, + "grad_norm": 0.6103794243627098, + "learning_rate": 1.4443793528709246e-05, + "loss": 1.04, + "step": 23860 + }, + { + "epoch": 0.36004102687863887, + "grad_norm": 0.6534068103425478, + "learning_rate": 1.4439505124191327e-05, + "loss": 1.0467, + "step": 23870 + }, + { + "epoch": 0.3601918609912818, + "grad_norm": 0.654501937241968, + "learning_rate": 1.443521570257419e-05, + "loss": 1.0398, + "step": 23880 + }, + { + "epoch": 0.3603426951039247, + "grad_norm": 0.6835471428087336, + "learning_rate": 1.4430925264840551e-05, + "loss": 1.0487, + "step": 23890 + }, + { + "epoch": 0.36049352921656763, + "grad_norm": 0.6699380650247856, + "learning_rate": 1.4426633811973361e-05, + "loss": 1.0297, + "step": 23900 + }, + { + "epoch": 0.3606443633292105, + "grad_norm": 0.6795615605450892, + "learning_rate": 1.4422341344955797e-05, + "loss": 1.0328, + "step": 23910 + }, + { + "epoch": 0.36079519744185345, + "grad_norm": 0.7498899147035186, + "learning_rate": 1.4418047864771274e-05, + "loss": 1.0323, + "step": 23920 + }, + { + "epoch": 0.36094603155449634, + "grad_norm": 0.6619877667700995, + "learning_rate": 1.4413753372403431e-05, + "loss": 1.0229, + "step": 23930 + }, + { + "epoch": 0.3610968656671393, + "grad_norm": 0.6890912174319126, + "learning_rate": 1.440945786883615e-05, + "loss": 1.0383, + "step": 23940 + }, + { + "epoch": 0.3612476997797822, + "grad_norm": 0.6525753091973258, + "learning_rate": 1.4405161355053539e-05, + "loss": 1.0198, + "step": 23950 + }, + { + "epoch": 0.3613985338924251, + "grad_norm": 0.6805598379427733, + "learning_rate": 1.4400863832039932e-05, + "loss": 1.0412, + "step": 23960 + }, + { + "epoch": 0.36154936800506804, + "grad_norm": 0.7470902471970883, + "learning_rate": 1.4396565300779905e-05, + "loss": 1.0332, + "step": 23970 + }, + { + "epoch": 0.3617002021177109, + "grad_norm": 0.6563763808092108, + "learning_rate": 1.439226576225826e-05, + "loss": 1.0302, + "step": 23980 + }, + { + "epoch": 0.36185103623035386, + "grad_norm": 0.6420683616644657, + "learning_rate": 1.438796521746003e-05, + "loss": 1.0256, + "step": 23990 + }, + { + "epoch": 0.36200187034299675, + "grad_norm": 0.6970785114068233, + "learning_rate": 1.4383663667370474e-05, + "loss": 1.0472, + "step": 24000 + }, + { + "epoch": 0.3621527044556397, + "grad_norm": 0.6575109660922549, + "learning_rate": 1.4379361112975088e-05, + "loss": 1.0465, + "step": 24010 + }, + { + "epoch": 0.3623035385682826, + "grad_norm": 0.6894984480167015, + "learning_rate": 1.4375057555259595e-05, + "loss": 1.0273, + "step": 24020 + }, + { + "epoch": 0.3624543726809255, + "grad_norm": 0.6521048946627919, + "learning_rate": 1.4370752995209949e-05, + "loss": 1.0523, + "step": 24030 + }, + { + "epoch": 0.36260520679356845, + "grad_norm": 0.7590962803241553, + "learning_rate": 1.4366447433812334e-05, + "loss": 1.0282, + "step": 24040 + }, + { + "epoch": 0.36275604090621133, + "grad_norm": 0.6808815807019241, + "learning_rate": 1.436214087205316e-05, + "loss": 1.0433, + "step": 24050 + }, + { + "epoch": 0.36290687501885427, + "grad_norm": 0.6209069682565218, + "learning_rate": 1.4357833310919071e-05, + "loss": 1.0242, + "step": 24060 + }, + { + "epoch": 0.36305770913149715, + "grad_norm": 0.6654368577434342, + "learning_rate": 1.4353524751396935e-05, + "loss": 1.041, + "step": 24070 + }, + { + "epoch": 0.3632085432441401, + "grad_norm": 0.689568193294961, + "learning_rate": 1.4349215194473853e-05, + "loss": 1.0509, + "step": 24080 + }, + { + "epoch": 0.36335937735678303, + "grad_norm": 0.6492332103463695, + "learning_rate": 1.4344904641137154e-05, + "loss": 1.0485, + "step": 24090 + }, + { + "epoch": 0.3635102114694259, + "grad_norm": 0.6419536235380802, + "learning_rate": 1.4340593092374393e-05, + "loss": 1.0275, + "step": 24100 + }, + { + "epoch": 0.36366104558206885, + "grad_norm": 0.7094109728708213, + "learning_rate": 1.4336280549173353e-05, + "loss": 1.0254, + "step": 24110 + }, + { + "epoch": 0.36381187969471174, + "grad_norm": 0.6530173260550992, + "learning_rate": 1.4331967012522048e-05, + "loss": 1.0444, + "step": 24120 + }, + { + "epoch": 0.3639627138073547, + "grad_norm": 0.6664632581644974, + "learning_rate": 1.4327652483408713e-05, + "loss": 1.035, + "step": 24130 + }, + { + "epoch": 0.36411354791999756, + "grad_norm": 0.6620380227685604, + "learning_rate": 1.4323336962821823e-05, + "loss": 1.0385, + "step": 24140 + }, + { + "epoch": 0.3642643820326405, + "grad_norm": 0.6704080410278657, + "learning_rate": 1.4319020451750065e-05, + "loss": 1.0305, + "step": 24150 + }, + { + "epoch": 0.36441521614528344, + "grad_norm": 0.6758025807744827, + "learning_rate": 1.4314702951182363e-05, + "loss": 1.0518, + "step": 24160 + }, + { + "epoch": 0.3645660502579263, + "grad_norm": 0.6554477569463407, + "learning_rate": 1.4310384462107866e-05, + "loss": 1.0351, + "step": 24170 + }, + { + "epoch": 0.36471688437056926, + "grad_norm": 0.6848370945655597, + "learning_rate": 1.4306064985515946e-05, + "loss": 1.0293, + "step": 24180 + }, + { + "epoch": 0.36486771848321214, + "grad_norm": 0.7499214581771156, + "learning_rate": 1.4301744522396204e-05, + "loss": 1.019, + "step": 24190 + }, + { + "epoch": 0.3650185525958551, + "grad_norm": 0.7814109328439806, + "learning_rate": 1.4297423073738468e-05, + "loss": 1.041, + "step": 24200 + }, + { + "epoch": 0.36516938670849797, + "grad_norm": 0.6810611612402017, + "learning_rate": 1.4293100640532785e-05, + "loss": 1.0412, + "step": 24210 + }, + { + "epoch": 0.3653202208211409, + "grad_norm": 0.6461710526137585, + "learning_rate": 1.4288777223769437e-05, + "loss": 1.0308, + "step": 24220 + }, + { + "epoch": 0.36547105493378385, + "grad_norm": 0.7147431046498237, + "learning_rate": 1.4284452824438928e-05, + "loss": 1.0449, + "step": 24230 + }, + { + "epoch": 0.36562188904642673, + "grad_norm": 2.168661695863731, + "learning_rate": 1.4280127443531985e-05, + "loss": 1.032, + "step": 24240 + }, + { + "epoch": 0.36577272315906967, + "grad_norm": 0.7166596597050997, + "learning_rate": 1.4275801082039562e-05, + "loss": 1.0409, + "step": 24250 + }, + { + "epoch": 0.36592355727171255, + "grad_norm": 0.665799226674694, + "learning_rate": 1.4271473740952835e-05, + "loss": 1.0512, + "step": 24260 + }, + { + "epoch": 0.3660743913843555, + "grad_norm": 0.6304996748853158, + "learning_rate": 1.426714542126321e-05, + "loss": 1.0127, + "step": 24270 + }, + { + "epoch": 0.3662252254969984, + "grad_norm": 0.7025437808917071, + "learning_rate": 1.4262816123962307e-05, + "loss": 1.0421, + "step": 24280 + }, + { + "epoch": 0.3663760596096413, + "grad_norm": 0.650877503606768, + "learning_rate": 1.4258485850041979e-05, + "loss": 1.0197, + "step": 24290 + }, + { + "epoch": 0.36652689372228425, + "grad_norm": 0.6820487183750519, + "learning_rate": 1.4254154600494303e-05, + "loss": 1.0448, + "step": 24300 + }, + { + "epoch": 0.36667772783492714, + "grad_norm": 0.6654297374629871, + "learning_rate": 1.4249822376311575e-05, + "loss": 1.0124, + "step": 24310 + }, + { + "epoch": 0.3668285619475701, + "grad_norm": 0.716168147800129, + "learning_rate": 1.4245489178486316e-05, + "loss": 1.0489, + "step": 24320 + }, + { + "epoch": 0.36697939606021296, + "grad_norm": 0.6740796633494722, + "learning_rate": 1.4241155008011267e-05, + "loss": 1.0236, + "step": 24330 + }, + { + "epoch": 0.3671302301728559, + "grad_norm": 0.6560935371457569, + "learning_rate": 1.42368198658794e-05, + "loss": 1.0291, + "step": 24340 + }, + { + "epoch": 0.3672810642854988, + "grad_norm": 0.7498895647259972, + "learning_rate": 1.4232483753083903e-05, + "loss": 1.0271, + "step": 24350 + }, + { + "epoch": 0.3674318983981417, + "grad_norm": 0.6404939225937863, + "learning_rate": 1.4228146670618186e-05, + "loss": 1.0414, + "step": 24360 + }, + { + "epoch": 0.36758273251078466, + "grad_norm": 0.6594887143594426, + "learning_rate": 1.422380861947588e-05, + "loss": 1.0407, + "step": 24370 + }, + { + "epoch": 0.36773356662342754, + "grad_norm": 0.6356075473842144, + "learning_rate": 1.4219469600650848e-05, + "loss": 1.0234, + "step": 24380 + }, + { + "epoch": 0.3678844007360705, + "grad_norm": 0.6714368234970827, + "learning_rate": 1.4215129615137164e-05, + "loss": 1.0377, + "step": 24390 + }, + { + "epoch": 0.36803523484871337, + "grad_norm": 0.6308283216660447, + "learning_rate": 1.4210788663929127e-05, + "loss": 1.0245, + "step": 24400 + }, + { + "epoch": 0.3681860689613563, + "grad_norm": 0.6354728921963743, + "learning_rate": 1.4206446748021261e-05, + "loss": 1.0303, + "step": 24410 + }, + { + "epoch": 0.3683369030739992, + "grad_norm": 0.6835642268421946, + "learning_rate": 1.4202103868408302e-05, + "loss": 1.0296, + "step": 24420 + }, + { + "epoch": 0.36848773718664213, + "grad_norm": 0.7099986765522049, + "learning_rate": 1.4197760026085212e-05, + "loss": 1.0535, + "step": 24430 + }, + { + "epoch": 0.36863857129928507, + "grad_norm": 0.6577244332830049, + "learning_rate": 1.4193415222047176e-05, + "loss": 1.0328, + "step": 24440 + }, + { + "epoch": 0.36878940541192795, + "grad_norm": 0.6581917224936505, + "learning_rate": 1.41890694572896e-05, + "loss": 1.0562, + "step": 24450 + }, + { + "epoch": 0.3689402395245709, + "grad_norm": 0.6647719292265547, + "learning_rate": 1.4184722732808103e-05, + "loss": 1.0355, + "step": 24460 + }, + { + "epoch": 0.3690910736372138, + "grad_norm": 0.616175144708989, + "learning_rate": 1.4180375049598525e-05, + "loss": 1.0412, + "step": 24470 + }, + { + "epoch": 0.3692419077498567, + "grad_norm": 0.7289431364986546, + "learning_rate": 1.4176026408656938e-05, + "loss": 1.032, + "step": 24480 + }, + { + "epoch": 0.3693927418624996, + "grad_norm": 0.6462161447770505, + "learning_rate": 1.4171676810979616e-05, + "loss": 1.0408, + "step": 24490 + }, + { + "epoch": 0.36954357597514254, + "grad_norm": 0.6695528918917009, + "learning_rate": 1.4167326257563063e-05, + "loss": 1.0408, + "step": 24500 + }, + { + "epoch": 0.3696944100877855, + "grad_norm": 0.6312171685026339, + "learning_rate": 1.4162974749404e-05, + "loss": 1.0289, + "step": 24510 + }, + { + "epoch": 0.36984524420042836, + "grad_norm": 0.6795772383299354, + "learning_rate": 1.4158622287499366e-05, + "loss": 1.0321, + "step": 24520 + }, + { + "epoch": 0.3699960783130713, + "grad_norm": 0.664062952458869, + "learning_rate": 1.4154268872846315e-05, + "loss": 1.0311, + "step": 24530 + }, + { + "epoch": 0.3701469124257142, + "grad_norm": 0.624954110023092, + "learning_rate": 1.4149914506442225e-05, + "loss": 1.0266, + "step": 24540 + }, + { + "epoch": 0.3702977465383571, + "grad_norm": 0.6536003506892843, + "learning_rate": 1.4145559189284693e-05, + "loss": 1.046, + "step": 24550 + }, + { + "epoch": 0.370448580651, + "grad_norm": 0.7126617253313773, + "learning_rate": 1.4141202922371524e-05, + "loss": 1.032, + "step": 24560 + }, + { + "epoch": 0.37059941476364294, + "grad_norm": 0.7078426939667231, + "learning_rate": 1.4136845706700753e-05, + "loss": 1.0421, + "step": 24570 + }, + { + "epoch": 0.3707502488762859, + "grad_norm": 0.6775755067179438, + "learning_rate": 1.4132487543270625e-05, + "loss": 1.025, + "step": 24580 + }, + { + "epoch": 0.37090108298892877, + "grad_norm": 0.7173187710961292, + "learning_rate": 1.4128128433079602e-05, + "loss": 1.0233, + "step": 24590 + }, + { + "epoch": 0.3710519171015717, + "grad_norm": 0.6644125873287385, + "learning_rate": 1.4123768377126367e-05, + "loss": 1.0275, + "step": 24600 + }, + { + "epoch": 0.3712027512142146, + "grad_norm": 0.6745415330732714, + "learning_rate": 1.4119407376409814e-05, + "loss": 1.0362, + "step": 24610 + }, + { + "epoch": 0.3713535853268575, + "grad_norm": 0.6820515025890914, + "learning_rate": 1.4115045431929062e-05, + "loss": 1.0407, + "step": 24620 + }, + { + "epoch": 0.3715044194395004, + "grad_norm": 0.770688290544702, + "learning_rate": 1.4110682544683432e-05, + "loss": 1.0413, + "step": 24630 + }, + { + "epoch": 0.37165525355214335, + "grad_norm": 0.6323192562437854, + "learning_rate": 1.4106318715672477e-05, + "loss": 1.0313, + "step": 24640 + }, + { + "epoch": 0.3718060876647863, + "grad_norm": 0.6544316320360133, + "learning_rate": 1.4101953945895956e-05, + "loss": 1.0379, + "step": 24650 + }, + { + "epoch": 0.3719569217774292, + "grad_norm": 0.6530377334547705, + "learning_rate": 1.4097588236353852e-05, + "loss": 1.0208, + "step": 24660 + }, + { + "epoch": 0.3721077558900721, + "grad_norm": 0.6638995597373417, + "learning_rate": 1.409322158804635e-05, + "loss": 1.0348, + "step": 24670 + }, + { + "epoch": 0.372258590002715, + "grad_norm": 0.6582368314825296, + "learning_rate": 1.408885400197386e-05, + "loss": 1.0354, + "step": 24680 + }, + { + "epoch": 0.37240942411535793, + "grad_norm": 0.626036057951477, + "learning_rate": 1.4084485479137007e-05, + "loss": 1.0372, + "step": 24690 + }, + { + "epoch": 0.3725602582280008, + "grad_norm": 0.6673311958217581, + "learning_rate": 1.4080116020536624e-05, + "loss": 1.0472, + "step": 24700 + }, + { + "epoch": 0.37271109234064376, + "grad_norm": 0.6582473227155369, + "learning_rate": 1.4075745627173763e-05, + "loss": 1.0381, + "step": 24710 + }, + { + "epoch": 0.3728619264532867, + "grad_norm": 0.6982923892989078, + "learning_rate": 1.407137430004969e-05, + "loss": 1.0365, + "step": 24720 + }, + { + "epoch": 0.3730127605659296, + "grad_norm": 0.6920812589449759, + "learning_rate": 1.4067002040165889e-05, + "loss": 1.0254, + "step": 24730 + }, + { + "epoch": 0.3731635946785725, + "grad_norm": 0.7849950132993355, + "learning_rate": 1.4062628848524047e-05, + "loss": 1.0333, + "step": 24740 + }, + { + "epoch": 0.3733144287912154, + "grad_norm": 0.6479540638824958, + "learning_rate": 1.4058254726126075e-05, + "loss": 1.0314, + "step": 24750 + }, + { + "epoch": 0.37346526290385834, + "grad_norm": 0.6854575058854233, + "learning_rate": 1.405387967397409e-05, + "loss": 1.0711, + "step": 24760 + }, + { + "epoch": 0.3736160970165012, + "grad_norm": 0.6478805283447472, + "learning_rate": 1.4049503693070428e-05, + "loss": 1.0248, + "step": 24770 + }, + { + "epoch": 0.37376693112914416, + "grad_norm": 0.6840481246629545, + "learning_rate": 1.4045126784417629e-05, + "loss": 1.0277, + "step": 24780 + }, + { + "epoch": 0.3739177652417871, + "grad_norm": 0.6656780747698718, + "learning_rate": 1.4040748949018457e-05, + "loss": 1.0342, + "step": 24790 + }, + { + "epoch": 0.37406859935443, + "grad_norm": 0.6384234860149994, + "learning_rate": 1.4036370187875878e-05, + "loss": 1.0376, + "step": 24800 + }, + { + "epoch": 0.3742194334670729, + "grad_norm": 0.6388191664487944, + "learning_rate": 1.403199050199308e-05, + "loss": 1.0139, + "step": 24810 + }, + { + "epoch": 0.3743702675797158, + "grad_norm": 0.6919667376925392, + "learning_rate": 1.4027609892373453e-05, + "loss": 1.0372, + "step": 24820 + }, + { + "epoch": 0.37452110169235875, + "grad_norm": 0.6534368637110285, + "learning_rate": 1.4023228360020607e-05, + "loss": 1.0208, + "step": 24830 + }, + { + "epoch": 0.37467193580500163, + "grad_norm": 0.7166658636758522, + "learning_rate": 1.4018845905938356e-05, + "loss": 1.015, + "step": 24840 + }, + { + "epoch": 0.37482276991764457, + "grad_norm": 0.6854817233772279, + "learning_rate": 1.4014462531130731e-05, + "loss": 1.0293, + "step": 24850 + }, + { + "epoch": 0.3749736040302875, + "grad_norm": 0.6510797798943408, + "learning_rate": 1.4010078236601969e-05, + "loss": 1.0378, + "step": 24860 + }, + { + "epoch": 0.3751244381429304, + "grad_norm": 0.7882400831856449, + "learning_rate": 1.4005693023356527e-05, + "loss": 1.0359, + "step": 24870 + }, + { + "epoch": 0.37527527225557333, + "grad_norm": 0.7078798372332621, + "learning_rate": 1.4001306892399053e-05, + "loss": 1.0449, + "step": 24880 + }, + { + "epoch": 0.3754261063682162, + "grad_norm": 0.6232841329768765, + "learning_rate": 1.3996919844734429e-05, + "loss": 1.0281, + "step": 24890 + }, + { + "epoch": 0.37557694048085916, + "grad_norm": 0.6595933893068491, + "learning_rate": 1.3992531881367734e-05, + "loss": 1.0385, + "step": 24900 + }, + { + "epoch": 0.37572777459350204, + "grad_norm": 0.648982592253236, + "learning_rate": 1.398814300330426e-05, + "loss": 1.0447, + "step": 24910 + }, + { + "epoch": 0.375878608706145, + "grad_norm": 0.6323557731524105, + "learning_rate": 1.3983753211549502e-05, + "loss": 1.0135, + "step": 24920 + }, + { + "epoch": 0.3760294428187879, + "grad_norm": 0.6634577005867397, + "learning_rate": 1.3979362507109176e-05, + "loss": 1.0393, + "step": 24930 + }, + { + "epoch": 0.3761802769314308, + "grad_norm": 0.705925557181139, + "learning_rate": 1.3974970890989199e-05, + "loss": 1.0428, + "step": 24940 + }, + { + "epoch": 0.37633111104407374, + "grad_norm": 0.6674373760824606, + "learning_rate": 1.3970578364195695e-05, + "loss": 1.0224, + "step": 24950 + }, + { + "epoch": 0.3764819451567166, + "grad_norm": 0.811580522990782, + "learning_rate": 1.3966184927735002e-05, + "loss": 1.0214, + "step": 24960 + }, + { + "epoch": 0.37663277926935956, + "grad_norm": 0.6538104399767632, + "learning_rate": 1.3961790582613671e-05, + "loss": 1.0344, + "step": 24970 + }, + { + "epoch": 0.37678361338200245, + "grad_norm": 0.7375957810687117, + "learning_rate": 1.3957395329838446e-05, + "loss": 1.0299, + "step": 24980 + }, + { + "epoch": 0.3769344474946454, + "grad_norm": 0.6735325659393869, + "learning_rate": 1.395299917041629e-05, + "loss": 1.0328, + "step": 24990 + }, + { + "epoch": 0.3770852816072883, + "grad_norm": 0.6889265498741803, + "learning_rate": 1.394860210535438e-05, + "loss": 1.0288, + "step": 25000 + }, + { + "epoch": 0.3772361157199312, + "grad_norm": 0.6593570169291895, + "learning_rate": 1.3944204135660082e-05, + "loss": 1.0374, + "step": 25010 + }, + { + "epoch": 0.37738694983257415, + "grad_norm": 0.6329848017245101, + "learning_rate": 1.3939805262340985e-05, + "loss": 1.0225, + "step": 25020 + }, + { + "epoch": 0.37753778394521703, + "grad_norm": 0.7585887018024058, + "learning_rate": 1.3935405486404874e-05, + "loss": 1.0167, + "step": 25030 + }, + { + "epoch": 0.37768861805785997, + "grad_norm": 0.6582715711525524, + "learning_rate": 1.3931004808859751e-05, + "loss": 1.0296, + "step": 25040 + }, + { + "epoch": 0.37783945217050285, + "grad_norm": 0.7153887204528591, + "learning_rate": 1.3926603230713819e-05, + "loss": 1.0431, + "step": 25050 + }, + { + "epoch": 0.3779902862831458, + "grad_norm": 0.6352095400834331, + "learning_rate": 1.3922200752975489e-05, + "loss": 1.0468, + "step": 25060 + }, + { + "epoch": 0.37814112039578873, + "grad_norm": 0.6575126995970283, + "learning_rate": 1.3917797376653372e-05, + "loss": 1.0215, + "step": 25070 + }, + { + "epoch": 0.3782919545084316, + "grad_norm": 0.647507018284194, + "learning_rate": 1.3913393102756298e-05, + "loss": 1.0063, + "step": 25080 + }, + { + "epoch": 0.37844278862107456, + "grad_norm": 0.6570369912515587, + "learning_rate": 1.3908987932293288e-05, + "loss": 1.035, + "step": 25090 + }, + { + "epoch": 0.37859362273371744, + "grad_norm": 0.6610863834655178, + "learning_rate": 1.3904581866273577e-05, + "loss": 1.0423, + "step": 25100 + }, + { + "epoch": 0.3787444568463604, + "grad_norm": 0.6657949165981344, + "learning_rate": 1.3900174905706604e-05, + "loss": 1.0224, + "step": 25110 + }, + { + "epoch": 0.37889529095900326, + "grad_norm": 0.6520287487721171, + "learning_rate": 1.3895767051602014e-05, + "loss": 1.0627, + "step": 25120 + }, + { + "epoch": 0.3790461250716462, + "grad_norm": 0.6687898453994152, + "learning_rate": 1.389135830496965e-05, + "loss": 1.0255, + "step": 25130 + }, + { + "epoch": 0.37919695918428914, + "grad_norm": 0.6146963685818635, + "learning_rate": 1.3886948666819566e-05, + "loss": 1.0187, + "step": 25140 + }, + { + "epoch": 0.379347793296932, + "grad_norm": 0.646803149035084, + "learning_rate": 1.3882538138162024e-05, + "loss": 1.0357, + "step": 25150 + }, + { + "epoch": 0.37949862740957496, + "grad_norm": 0.6736900592991545, + "learning_rate": 1.3878126720007478e-05, + "loss": 1.0225, + "step": 25160 + }, + { + "epoch": 0.37964946152221785, + "grad_norm": 0.6607611638936768, + "learning_rate": 1.3873714413366595e-05, + "loss": 1.0294, + "step": 25170 + }, + { + "epoch": 0.3798002956348608, + "grad_norm": 0.6749709697326112, + "learning_rate": 1.3869301219250244e-05, + "loss": 1.0284, + "step": 25180 + }, + { + "epoch": 0.37995112974750367, + "grad_norm": 0.6870804166161567, + "learning_rate": 1.3864887138669496e-05, + "loss": 1.025, + "step": 25190 + }, + { + "epoch": 0.3801019638601466, + "grad_norm": 0.6478672209771752, + "learning_rate": 1.3860472172635622e-05, + "loss": 1.0384, + "step": 25200 + }, + { + "epoch": 0.38025279797278955, + "grad_norm": 0.6543804402554666, + "learning_rate": 1.3856056322160104e-05, + "loss": 1.0482, + "step": 25210 + }, + { + "epoch": 0.38040363208543243, + "grad_norm": 0.7093905475192139, + "learning_rate": 1.3851639588254617e-05, + "loss": 1.0276, + "step": 25220 + }, + { + "epoch": 0.38055446619807537, + "grad_norm": 0.6546460551551682, + "learning_rate": 1.3847221971931046e-05, + "loss": 1.0308, + "step": 25230 + }, + { + "epoch": 0.38070530031071825, + "grad_norm": 0.6434530017139064, + "learning_rate": 1.3842803474201479e-05, + "loss": 1.0326, + "step": 25240 + }, + { + "epoch": 0.3808561344233612, + "grad_norm": 0.6464987991552371, + "learning_rate": 1.3838384096078197e-05, + "loss": 1.0509, + "step": 25250 + }, + { + "epoch": 0.3810069685360041, + "grad_norm": 0.6496955898692698, + "learning_rate": 1.3833963838573689e-05, + "loss": 1.0192, + "step": 25260 + }, + { + "epoch": 0.381157802648647, + "grad_norm": 0.6739226061319635, + "learning_rate": 1.3829542702700649e-05, + "loss": 1.0057, + "step": 25270 + }, + { + "epoch": 0.38130863676128995, + "grad_norm": 0.6784291855691944, + "learning_rate": 1.382512068947196e-05, + "loss": 1.0178, + "step": 25280 + }, + { + "epoch": 0.38145947087393284, + "grad_norm": 0.6352923944971356, + "learning_rate": 1.382069779990072e-05, + "loss": 1.0286, + "step": 25290 + }, + { + "epoch": 0.3816103049865758, + "grad_norm": 0.6476516739251417, + "learning_rate": 1.3816274035000219e-05, + "loss": 1.0159, + "step": 25300 + }, + { + "epoch": 0.38176113909921866, + "grad_norm": 0.6779352814410359, + "learning_rate": 1.3811849395783952e-05, + "loss": 1.0316, + "step": 25310 + }, + { + "epoch": 0.3819119732118616, + "grad_norm": 0.6673570493721738, + "learning_rate": 1.3807423883265609e-05, + "loss": 1.0379, + "step": 25320 + }, + { + "epoch": 0.3820628073245045, + "grad_norm": 0.6393678606907314, + "learning_rate": 1.380299749845909e-05, + "loss": 1.0283, + "step": 25330 + }, + { + "epoch": 0.3822136414371474, + "grad_norm": 0.6659630267424973, + "learning_rate": 1.379857024237848e-05, + "loss": 1.0337, + "step": 25340 + }, + { + "epoch": 0.38236447554979036, + "grad_norm": 0.6747444232247687, + "learning_rate": 1.3794142116038076e-05, + "loss": 1.0152, + "step": 25350 + }, + { + "epoch": 0.38251530966243324, + "grad_norm": 0.641418025311614, + "learning_rate": 1.3789713120452373e-05, + "loss": 1.0338, + "step": 25360 + }, + { + "epoch": 0.3826661437750762, + "grad_norm": 0.6870986334030663, + "learning_rate": 1.3785283256636057e-05, + "loss": 1.0471, + "step": 25370 + }, + { + "epoch": 0.38281697788771907, + "grad_norm": 0.6283966745898149, + "learning_rate": 1.3780852525604021e-05, + "loss": 1.0293, + "step": 25380 + }, + { + "epoch": 0.382967812000362, + "grad_norm": 0.6273049694689029, + "learning_rate": 1.3776420928371352e-05, + "loss": 1.0214, + "step": 25390 + }, + { + "epoch": 0.3831186461130049, + "grad_norm": 0.6905315225118833, + "learning_rate": 1.3771988465953346e-05, + "loss": 1.0634, + "step": 25400 + }, + { + "epoch": 0.38326948022564783, + "grad_norm": 0.6613555945305477, + "learning_rate": 1.3767555139365479e-05, + "loss": 1.0326, + "step": 25410 + }, + { + "epoch": 0.38342031433829077, + "grad_norm": 0.6576992068974875, + "learning_rate": 1.3763120949623437e-05, + "loss": 1.023, + "step": 25420 + }, + { + "epoch": 0.38357114845093365, + "grad_norm": 0.6182692576566448, + "learning_rate": 1.3758685897743105e-05, + "loss": 1.0227, + "step": 25430 + }, + { + "epoch": 0.3837219825635766, + "grad_norm": 0.7360224101298705, + "learning_rate": 1.3754249984740557e-05, + "loss": 1.0311, + "step": 25440 + }, + { + "epoch": 0.3838728166762195, + "grad_norm": 0.6443320020797968, + "learning_rate": 1.3749813211632072e-05, + "loss": 1.0422, + "step": 25450 + }, + { + "epoch": 0.3840236507888624, + "grad_norm": 0.6810004630256424, + "learning_rate": 1.3745375579434122e-05, + "loss": 1.0293, + "step": 25460 + }, + { + "epoch": 0.3841744849015053, + "grad_norm": 0.6199832733597588, + "learning_rate": 1.374093708916338e-05, + "loss": 1.0222, + "step": 25470 + }, + { + "epoch": 0.38432531901414824, + "grad_norm": 0.673870796791169, + "learning_rate": 1.373649774183671e-05, + "loss": 1.0416, + "step": 25480 + }, + { + "epoch": 0.3844761531267912, + "grad_norm": 0.6694703047435598, + "learning_rate": 1.3732057538471175e-05, + "loss": 1.0475, + "step": 25490 + }, + { + "epoch": 0.38462698723943406, + "grad_norm": 0.6540154096150611, + "learning_rate": 1.3727616480084037e-05, + "loss": 1.0124, + "step": 25500 + }, + { + "epoch": 0.384777821352077, + "grad_norm": 0.6477978062238111, + "learning_rate": 1.372317456769275e-05, + "loss": 1.0201, + "step": 25510 + }, + { + "epoch": 0.3849286554647199, + "grad_norm": 0.613841970689594, + "learning_rate": 1.3718731802314962e-05, + "loss": 1.0211, + "step": 25520 + }, + { + "epoch": 0.3850794895773628, + "grad_norm": 0.728416670092224, + "learning_rate": 1.371428818496852e-05, + "loss": 1.0576, + "step": 25530 + }, + { + "epoch": 0.3852303236900057, + "grad_norm": 0.6577209147125918, + "learning_rate": 1.370984371667147e-05, + "loss": 1.0302, + "step": 25540 + }, + { + "epoch": 0.38538115780264864, + "grad_norm": 0.6978384628926968, + "learning_rate": 1.370539839844204e-05, + "loss": 1.0278, + "step": 25550 + }, + { + "epoch": 0.3855319919152916, + "grad_norm": 0.6550375691408193, + "learning_rate": 1.3700952231298667e-05, + "loss": 1.0263, + "step": 25560 + }, + { + "epoch": 0.38568282602793447, + "grad_norm": 0.6723081475061509, + "learning_rate": 1.3696505216259977e-05, + "loss": 1.0497, + "step": 25570 + }, + { + "epoch": 0.3858336601405774, + "grad_norm": 0.7292590109750205, + "learning_rate": 1.3692057354344788e-05, + "loss": 1.0374, + "step": 25580 + }, + { + "epoch": 0.3859844942532203, + "grad_norm": 0.7091487258606353, + "learning_rate": 1.3687608646572112e-05, + "loss": 1.0424, + "step": 25590 + }, + { + "epoch": 0.38613532836586323, + "grad_norm": 0.6299546416353058, + "learning_rate": 1.368315909396116e-05, + "loss": 1.0154, + "step": 25600 + }, + { + "epoch": 0.3862861624785061, + "grad_norm": 0.7119535342004303, + "learning_rate": 1.367870869753133e-05, + "loss": 1.0077, + "step": 25610 + }, + { + "epoch": 0.38643699659114905, + "grad_norm": 0.6318406264118079, + "learning_rate": 1.367425745830222e-05, + "loss": 1.0231, + "step": 25620 + }, + { + "epoch": 0.386587830703792, + "grad_norm": 0.6686734884419289, + "learning_rate": 1.3669805377293612e-05, + "loss": 1.0277, + "step": 25630 + }, + { + "epoch": 0.3867386648164349, + "grad_norm": 0.6163886955576866, + "learning_rate": 1.366535245552549e-05, + "loss": 1.0346, + "step": 25640 + }, + { + "epoch": 0.3868894989290778, + "grad_norm": 0.6865068315425645, + "learning_rate": 1.366089869401803e-05, + "loss": 1.0036, + "step": 25650 + }, + { + "epoch": 0.3870403330417207, + "grad_norm": 0.6827808986999361, + "learning_rate": 1.3656444093791591e-05, + "loss": 1.029, + "step": 25660 + }, + { + "epoch": 0.38719116715436364, + "grad_norm": 0.8079571787475398, + "learning_rate": 1.3651988655866735e-05, + "loss": 1.024, + "step": 25670 + }, + { + "epoch": 0.3873420012670065, + "grad_norm": 0.647384286019711, + "learning_rate": 1.364753238126421e-05, + "loss": 1.0373, + "step": 25680 + }, + { + "epoch": 0.38749283537964946, + "grad_norm": 0.6472076519350608, + "learning_rate": 1.3643075271004956e-05, + "loss": 1.0346, + "step": 25690 + }, + { + "epoch": 0.3876436694922924, + "grad_norm": 0.6752379881610253, + "learning_rate": 1.3638617326110109e-05, + "loss": 0.9962, + "step": 25700 + }, + { + "epoch": 0.3877945036049353, + "grad_norm": 0.6308721391759217, + "learning_rate": 1.3634158547600987e-05, + "loss": 1.0387, + "step": 25710 + }, + { + "epoch": 0.3879453377175782, + "grad_norm": 0.7021632776641707, + "learning_rate": 1.3629698936499112e-05, + "loss": 1.0381, + "step": 25720 + }, + { + "epoch": 0.3880961718302211, + "grad_norm": 0.6893561935651217, + "learning_rate": 1.3625238493826183e-05, + "loss": 1.0093, + "step": 25730 + }, + { + "epoch": 0.38824700594286404, + "grad_norm": 0.6749921623423486, + "learning_rate": 1.36207772206041e-05, + "loss": 1.0325, + "step": 25740 + }, + { + "epoch": 0.3883978400555069, + "grad_norm": 0.6860936233321492, + "learning_rate": 1.361631511785495e-05, + "loss": 1.02, + "step": 25750 + }, + { + "epoch": 0.38854867416814987, + "grad_norm": 0.6369285701168259, + "learning_rate": 1.3611852186601007e-05, + "loss": 1.0173, + "step": 25760 + }, + { + "epoch": 0.3886995082807928, + "grad_norm": 0.6622805257295893, + "learning_rate": 1.3607388427864736e-05, + "loss": 1.0311, + "step": 25770 + }, + { + "epoch": 0.3888503423934357, + "grad_norm": 0.6743942535280878, + "learning_rate": 1.3602923842668794e-05, + "loss": 1.0145, + "step": 25780 + }, + { + "epoch": 0.3890011765060786, + "grad_norm": 0.6216578990012557, + "learning_rate": 1.3598458432036028e-05, + "loss": 1.0144, + "step": 25790 + }, + { + "epoch": 0.3891520106187215, + "grad_norm": 0.6357858735620161, + "learning_rate": 1.3593992196989472e-05, + "loss": 1.0413, + "step": 25800 + }, + { + "epoch": 0.38930284473136445, + "grad_norm": 0.6683155051107845, + "learning_rate": 1.3589525138552346e-05, + "loss": 1.0302, + "step": 25810 + }, + { + "epoch": 0.38945367884400733, + "grad_norm": 0.6533302479081323, + "learning_rate": 1.3585057257748067e-05, + "loss": 1.0128, + "step": 25820 + }, + { + "epoch": 0.3896045129566503, + "grad_norm": 0.689481602833364, + "learning_rate": 1.3580588555600229e-05, + "loss": 1.0144, + "step": 25830 + }, + { + "epoch": 0.3897553470692932, + "grad_norm": 0.641038542979886, + "learning_rate": 1.3576119033132624e-05, + "loss": 1.0096, + "step": 25840 + }, + { + "epoch": 0.3899061811819361, + "grad_norm": 0.6726061713465478, + "learning_rate": 1.3571648691369229e-05, + "loss": 1.0205, + "step": 25850 + }, + { + "epoch": 0.39005701529457903, + "grad_norm": 0.6837016736180902, + "learning_rate": 1.3567177531334203e-05, + "loss": 1.014, + "step": 25860 + }, + { + "epoch": 0.3902078494072219, + "grad_norm": 0.6323150206089636, + "learning_rate": 1.3562705554051905e-05, + "loss": 1.0179, + "step": 25870 + }, + { + "epoch": 0.39035868351986486, + "grad_norm": 0.6296605629948951, + "learning_rate": 1.3558232760546866e-05, + "loss": 1.0272, + "step": 25880 + }, + { + "epoch": 0.39050951763250774, + "grad_norm": 0.7121733649862176, + "learning_rate": 1.3553759151843817e-05, + "loss": 1.0126, + "step": 25890 + }, + { + "epoch": 0.3906603517451507, + "grad_norm": 0.7240033906544167, + "learning_rate": 1.3549284728967666e-05, + "loss": 1.031, + "step": 25900 + }, + { + "epoch": 0.3908111858577936, + "grad_norm": 0.6304938289578842, + "learning_rate": 1.3544809492943516e-05, + "loss": 1.0205, + "step": 25910 + }, + { + "epoch": 0.3909620199704365, + "grad_norm": 0.7039465341019349, + "learning_rate": 1.3540333444796652e-05, + "loss": 1.0122, + "step": 25920 + }, + { + "epoch": 0.39111285408307944, + "grad_norm": 0.7280576214792905, + "learning_rate": 1.3535856585552545e-05, + "loss": 1.0219, + "step": 25930 + }, + { + "epoch": 0.3912636881957223, + "grad_norm": 0.6711335884467877, + "learning_rate": 1.3531378916236847e-05, + "loss": 1.018, + "step": 25940 + }, + { + "epoch": 0.39141452230836526, + "grad_norm": 0.687412924290685, + "learning_rate": 1.3526900437875408e-05, + "loss": 1.0374, + "step": 25950 + }, + { + "epoch": 0.39156535642100815, + "grad_norm": 0.641858280838546, + "learning_rate": 1.3522421151494252e-05, + "loss": 1.0211, + "step": 25960 + }, + { + "epoch": 0.3917161905336511, + "grad_norm": 0.9921661237920468, + "learning_rate": 1.3517941058119593e-05, + "loss": 1.0196, + "step": 25970 + }, + { + "epoch": 0.391867024646294, + "grad_norm": 0.6763569699427225, + "learning_rate": 1.3513460158777826e-05, + "loss": 1.0201, + "step": 25980 + }, + { + "epoch": 0.3920178587589369, + "grad_norm": 0.7215498715932344, + "learning_rate": 1.350897845449554e-05, + "loss": 1.0394, + "step": 25990 + }, + { + "epoch": 0.39216869287157985, + "grad_norm": 0.7053928883689686, + "learning_rate": 1.35044959462995e-05, + "loss": 1.0308, + "step": 26000 + }, + { + "epoch": 0.39231952698422273, + "grad_norm": 0.719174428580862, + "learning_rate": 1.3500012635216657e-05, + "loss": 1.0089, + "step": 26010 + }, + { + "epoch": 0.39247036109686567, + "grad_norm": 0.6450332665774893, + "learning_rate": 1.3495528522274142e-05, + "loss": 1.0388, + "step": 26020 + }, + { + "epoch": 0.39262119520950856, + "grad_norm": 0.6833717154921624, + "learning_rate": 1.3491043608499282e-05, + "loss": 1.0389, + "step": 26030 + }, + { + "epoch": 0.3927720293221515, + "grad_norm": 0.6481114935404285, + "learning_rate": 1.3486557894919572e-05, + "loss": 1.0222, + "step": 26040 + }, + { + "epoch": 0.39292286343479443, + "grad_norm": 0.6530054049797704, + "learning_rate": 1.3482071382562696e-05, + "loss": 1.023, + "step": 26050 + }, + { + "epoch": 0.3930736975474373, + "grad_norm": 0.720522165209391, + "learning_rate": 1.347758407245653e-05, + "loss": 1.027, + "step": 26060 + }, + { + "epoch": 0.39322453166008026, + "grad_norm": 0.6802631777042204, + "learning_rate": 1.3473095965629128e-05, + "loss": 1.0184, + "step": 26070 + }, + { + "epoch": 0.39337536577272314, + "grad_norm": 0.80908122299263, + "learning_rate": 1.3468607063108714e-05, + "loss": 1.0409, + "step": 26080 + }, + { + "epoch": 0.3935261998853661, + "grad_norm": 0.6571580535159118, + "learning_rate": 1.346411736592371e-05, + "loss": 1.0188, + "step": 26090 + }, + { + "epoch": 0.39367703399800896, + "grad_norm": 0.6883229390054468, + "learning_rate": 1.3459626875102716e-05, + "loss": 1.0405, + "step": 26100 + }, + { + "epoch": 0.3938278681106519, + "grad_norm": 0.658916208068455, + "learning_rate": 1.3455135591674504e-05, + "loss": 1.0111, + "step": 26110 + }, + { + "epoch": 0.39397870222329484, + "grad_norm": 0.6417963127450621, + "learning_rate": 1.3450643516668045e-05, + "loss": 1.0317, + "step": 26120 + }, + { + "epoch": 0.3941295363359377, + "grad_norm": 0.6371695780421284, + "learning_rate": 1.3446150651112475e-05, + "loss": 1.0389, + "step": 26130 + }, + { + "epoch": 0.39428037044858066, + "grad_norm": 0.7014200683167879, + "learning_rate": 1.3441656996037122e-05, + "loss": 1.0302, + "step": 26140 + }, + { + "epoch": 0.39443120456122355, + "grad_norm": 0.716361152382591, + "learning_rate": 1.343716255247149e-05, + "loss": 1.0369, + "step": 26150 + }, + { + "epoch": 0.3945820386738665, + "grad_norm": 0.6697525507053493, + "learning_rate": 1.3432667321445267e-05, + "loss": 1.0367, + "step": 26160 + }, + { + "epoch": 0.39473287278650937, + "grad_norm": 0.6616458092468428, + "learning_rate": 1.3428171303988317e-05, + "loss": 1.0274, + "step": 26170 + }, + { + "epoch": 0.3948837068991523, + "grad_norm": 0.6657245361375078, + "learning_rate": 1.3423674501130686e-05, + "loss": 1.016, + "step": 26180 + }, + { + "epoch": 0.39503454101179525, + "grad_norm": 0.6793406617868865, + "learning_rate": 1.3419176913902601e-05, + "loss": 0.9979, + "step": 26190 + }, + { + "epoch": 0.39518537512443813, + "grad_norm": 0.6788744220162932, + "learning_rate": 1.3414678543334466e-05, + "loss": 1.0286, + "step": 26200 + }, + { + "epoch": 0.39533620923708107, + "grad_norm": 0.6816873285978307, + "learning_rate": 1.341017939045687e-05, + "loss": 1.0111, + "step": 26210 + }, + { + "epoch": 0.39548704334972395, + "grad_norm": 0.7192734993445156, + "learning_rate": 1.3405679456300576e-05, + "loss": 1.0205, + "step": 26220 + }, + { + "epoch": 0.3956378774623669, + "grad_norm": 0.6450258862418454, + "learning_rate": 1.3401178741896525e-05, + "loss": 1.0213, + "step": 26230 + }, + { + "epoch": 0.3957887115750098, + "grad_norm": 0.6739699014137217, + "learning_rate": 1.3396677248275842e-05, + "loss": 1.0221, + "step": 26240 + }, + { + "epoch": 0.3959395456876527, + "grad_norm": 0.6676162763524662, + "learning_rate": 1.3392174976469832e-05, + "loss": 1.0162, + "step": 26250 + }, + { + "epoch": 0.39609037980029566, + "grad_norm": 0.6541145987179253, + "learning_rate": 1.3387671927509967e-05, + "loss": 1.0053, + "step": 26260 + }, + { + "epoch": 0.39624121391293854, + "grad_norm": 0.6626200305043539, + "learning_rate": 1.3383168102427906e-05, + "loss": 1.0271, + "step": 26270 + }, + { + "epoch": 0.3963920480255815, + "grad_norm": 0.6693917517093455, + "learning_rate": 1.3378663502255487e-05, + "loss": 1.0135, + "step": 26280 + }, + { + "epoch": 0.39654288213822436, + "grad_norm": 0.9061570257557259, + "learning_rate": 1.3374158128024722e-05, + "loss": 1.038, + "step": 26290 + }, + { + "epoch": 0.3966937162508673, + "grad_norm": 0.6988429619400803, + "learning_rate": 1.3369651980767792e-05, + "loss": 1.0191, + "step": 26300 + }, + { + "epoch": 0.3968445503635102, + "grad_norm": 0.6286989392560608, + "learning_rate": 1.3365145061517077e-05, + "loss": 1.0256, + "step": 26310 + }, + { + "epoch": 0.3969953844761531, + "grad_norm": 0.7935748377569813, + "learning_rate": 1.3360637371305119e-05, + "loss": 1.0311, + "step": 26320 + }, + { + "epoch": 0.39714621858879606, + "grad_norm": 0.6237048915226802, + "learning_rate": 1.3356128911164631e-05, + "loss": 1.0261, + "step": 26330 + }, + { + "epoch": 0.39729705270143895, + "grad_norm": 0.6227532569211015, + "learning_rate": 1.3351619682128514e-05, + "loss": 1.0107, + "step": 26340 + }, + { + "epoch": 0.3974478868140819, + "grad_norm": 0.629357016364676, + "learning_rate": 1.3347109685229845e-05, + "loss": 1.0291, + "step": 26350 + }, + { + "epoch": 0.39759872092672477, + "grad_norm": 0.6802495534486027, + "learning_rate": 1.3342598921501866e-05, + "loss": 1.0359, + "step": 26360 + }, + { + "epoch": 0.3977495550393677, + "grad_norm": 0.683060922877234, + "learning_rate": 1.3338087391978005e-05, + "loss": 1.0073, + "step": 26370 + }, + { + "epoch": 0.3979003891520106, + "grad_norm": 0.6644410077592553, + "learning_rate": 1.3333575097691863e-05, + "loss": 1.0247, + "step": 26380 + }, + { + "epoch": 0.39805122326465353, + "grad_norm": 0.6544494198096368, + "learning_rate": 1.3329062039677214e-05, + "loss": 1.0102, + "step": 26390 + }, + { + "epoch": 0.39820205737729647, + "grad_norm": 0.6563066173839447, + "learning_rate": 1.3324548218968007e-05, + "loss": 1.026, + "step": 26400 + }, + { + "epoch": 0.39835289148993935, + "grad_norm": 0.6961083789390852, + "learning_rate": 1.3320033636598374e-05, + "loss": 1.017, + "step": 26410 + }, + { + "epoch": 0.3985037256025823, + "grad_norm": 0.627916724600084, + "learning_rate": 1.3315518293602605e-05, + "loss": 1.0416, + "step": 26420 + }, + { + "epoch": 0.3986545597152252, + "grad_norm": 0.6344066818368742, + "learning_rate": 1.3311002191015181e-05, + "loss": 1.0041, + "step": 26430 + }, + { + "epoch": 0.3988053938278681, + "grad_norm": 0.6442574580816788, + "learning_rate": 1.3306485329870745e-05, + "loss": 1.0173, + "step": 26440 + }, + { + "epoch": 0.398956227940511, + "grad_norm": 0.6450722590584567, + "learning_rate": 1.330196771120412e-05, + "loss": 1.0217, + "step": 26450 + }, + { + "epoch": 0.39910706205315394, + "grad_norm": 0.6673677800579503, + "learning_rate": 1.3297449336050303e-05, + "loss": 1.0323, + "step": 26460 + }, + { + "epoch": 0.3992578961657969, + "grad_norm": 0.6564962214804336, + "learning_rate": 1.3292930205444461e-05, + "loss": 1.0218, + "step": 26470 + }, + { + "epoch": 0.39940873027843976, + "grad_norm": 0.6922933277134663, + "learning_rate": 1.3288410320421936e-05, + "loss": 1.037, + "step": 26480 + }, + { + "epoch": 0.3995595643910827, + "grad_norm": 0.6352281815998273, + "learning_rate": 1.3283889682018246e-05, + "loss": 1.0086, + "step": 26490 + }, + { + "epoch": 0.3997103985037256, + "grad_norm": 0.6931575419764224, + "learning_rate": 1.327936829126907e-05, + "loss": 1.0132, + "step": 26500 + }, + { + "epoch": 0.3998612326163685, + "grad_norm": 0.7959458931513103, + "learning_rate": 1.3274846149210272e-05, + "loss": 1.0212, + "step": 26510 + }, + { + "epoch": 0.4000120667290114, + "grad_norm": 0.6692365747114772, + "learning_rate": 1.3270323256877885e-05, + "loss": 1.0213, + "step": 26520 + }, + { + "epoch": 0.40016290084165435, + "grad_norm": 0.6531865048546859, + "learning_rate": 1.3265799615308111e-05, + "loss": 1.0358, + "step": 26530 + }, + { + "epoch": 0.4003137349542973, + "grad_norm": 0.663386246777261, + "learning_rate": 1.3261275225537323e-05, + "loss": 1.0173, + "step": 26540 + }, + { + "epoch": 0.40046456906694017, + "grad_norm": 0.6580560774282866, + "learning_rate": 1.3256750088602067e-05, + "loss": 1.0239, + "step": 26550 + }, + { + "epoch": 0.4006154031795831, + "grad_norm": 0.6833457727373974, + "learning_rate": 1.3252224205539066e-05, + "loss": 1.0327, + "step": 26560 + }, + { + "epoch": 0.400766237292226, + "grad_norm": 0.6812535574528203, + "learning_rate": 1.3247697577385203e-05, + "loss": 1.0279, + "step": 26570 + }, + { + "epoch": 0.40091707140486893, + "grad_norm": 0.686585782633144, + "learning_rate": 1.3243170205177543e-05, + "loss": 1.0435, + "step": 26580 + }, + { + "epoch": 0.4010679055175118, + "grad_norm": 0.65807103913164, + "learning_rate": 1.3238642089953309e-05, + "loss": 1.0355, + "step": 26590 + }, + { + "epoch": 0.40121873963015475, + "grad_norm": 0.6940365587805951, + "learning_rate": 1.3234113232749909e-05, + "loss": 1.0034, + "step": 26600 + }, + { + "epoch": 0.4013695737427977, + "grad_norm": 0.6579014296789998, + "learning_rate": 1.3229583634604904e-05, + "loss": 1.0141, + "step": 26610 + }, + { + "epoch": 0.4015204078554406, + "grad_norm": 0.665508203719433, + "learning_rate": 1.3225053296556038e-05, + "loss": 0.998, + "step": 26620 + }, + { + "epoch": 0.4016712419680835, + "grad_norm": 0.6511960398448203, + "learning_rate": 1.322052221964122e-05, + "loss": 1.0103, + "step": 26630 + }, + { + "epoch": 0.4018220760807264, + "grad_norm": 0.6461014259026747, + "learning_rate": 1.3215990404898528e-05, + "loss": 1.0246, + "step": 26640 + }, + { + "epoch": 0.40197291019336934, + "grad_norm": 0.6673040181823915, + "learning_rate": 1.3211457853366212e-05, + "loss": 1.0313, + "step": 26650 + }, + { + "epoch": 0.4021237443060122, + "grad_norm": 0.6962309539985772, + "learning_rate": 1.3206924566082689e-05, + "loss": 1.011, + "step": 26660 + }, + { + "epoch": 0.40227457841865516, + "grad_norm": 0.6285497431762541, + "learning_rate": 1.3202390544086538e-05, + "loss": 1.0147, + "step": 26670 + }, + { + "epoch": 0.4024254125312981, + "grad_norm": 0.7117324921059532, + "learning_rate": 1.3197855788416515e-05, + "loss": 1.0139, + "step": 26680 + }, + { + "epoch": 0.402576246643941, + "grad_norm": 0.6881347709883774, + "learning_rate": 1.3193320300111543e-05, + "loss": 1.0153, + "step": 26690 + }, + { + "epoch": 0.4027270807565839, + "grad_norm": 0.6250134169159706, + "learning_rate": 1.3188784080210713e-05, + "loss": 1.024, + "step": 26700 + }, + { + "epoch": 0.4028779148692268, + "grad_norm": 0.6723229753684548, + "learning_rate": 1.3184247129753274e-05, + "loss": 1.0174, + "step": 26710 + }, + { + "epoch": 0.40302874898186974, + "grad_norm": 0.6888306983965926, + "learning_rate": 1.3179709449778657e-05, + "loss": 1.0153, + "step": 26720 + }, + { + "epoch": 0.40317958309451263, + "grad_norm": 0.6545084189940223, + "learning_rate": 1.3175171041326449e-05, + "loss": 1.0239, + "step": 26730 + }, + { + "epoch": 0.40333041720715557, + "grad_norm": 0.7087423728994455, + "learning_rate": 1.3170631905436415e-05, + "loss": 1.012, + "step": 26740 + }, + { + "epoch": 0.4034812513197985, + "grad_norm": 0.6463138158911202, + "learning_rate": 1.316609204314847e-05, + "loss": 1.0149, + "step": 26750 + }, + { + "epoch": 0.4036320854324414, + "grad_norm": 0.7002198578793377, + "learning_rate": 1.3161551455502715e-05, + "loss": 1.0108, + "step": 26760 + }, + { + "epoch": 0.40378291954508433, + "grad_norm": 0.6840985022740729, + "learning_rate": 1.3157010143539401e-05, + "loss": 0.9995, + "step": 26770 + }, + { + "epoch": 0.4039337536577272, + "grad_norm": 0.66706669958029, + "learning_rate": 1.3152468108298955e-05, + "loss": 1.0284, + "step": 26780 + }, + { + "epoch": 0.40408458777037015, + "grad_norm": 0.6470461951144538, + "learning_rate": 1.3147925350821961e-05, + "loss": 1.0191, + "step": 26790 + }, + { + "epoch": 0.40423542188301304, + "grad_norm": 0.6675703203414863, + "learning_rate": 1.3143381872149175e-05, + "loss": 1.0033, + "step": 26800 + }, + { + "epoch": 0.404386255995656, + "grad_norm": 0.6573092070173383, + "learning_rate": 1.3138837673321523e-05, + "loss": 1.0253, + "step": 26810 + }, + { + "epoch": 0.4045370901082989, + "grad_norm": 0.6637541210527159, + "learning_rate": 1.3134292755380083e-05, + "loss": 1.0194, + "step": 26820 + }, + { + "epoch": 0.4046879242209418, + "grad_norm": 0.6845112543921175, + "learning_rate": 1.3129747119366108e-05, + "loss": 1.0088, + "step": 26830 + }, + { + "epoch": 0.40483875833358474, + "grad_norm": 0.6914866418012129, + "learning_rate": 1.3125200766321012e-05, + "loss": 1.0255, + "step": 26840 + }, + { + "epoch": 0.4049895924462276, + "grad_norm": 0.7040059261242932, + "learning_rate": 1.312065369728637e-05, + "loss": 1.0216, + "step": 26850 + }, + { + "epoch": 0.40514042655887056, + "grad_norm": 0.6479384504809381, + "learning_rate": 1.3116105913303928e-05, + "loss": 1.0262, + "step": 26860 + }, + { + "epoch": 0.40529126067151344, + "grad_norm": 0.672120010540155, + "learning_rate": 1.3111557415415588e-05, + "loss": 1.0314, + "step": 26870 + }, + { + "epoch": 0.4054420947841564, + "grad_norm": 0.6932735290054577, + "learning_rate": 1.3107008204663423e-05, + "loss": 1.0194, + "step": 26880 + }, + { + "epoch": 0.4055929288967993, + "grad_norm": 0.6139081938477632, + "learning_rate": 1.3102458282089665e-05, + "loss": 1.0138, + "step": 26890 + }, + { + "epoch": 0.4057437630094422, + "grad_norm": 0.6068043091801533, + "learning_rate": 1.309790764873671e-05, + "loss": 1.0364, + "step": 26900 + }, + { + "epoch": 0.40589459712208514, + "grad_norm": 0.6430647427728902, + "learning_rate": 1.309335630564712e-05, + "loss": 1.0226, + "step": 26910 + }, + { + "epoch": 0.406045431234728, + "grad_norm": 0.7436045312450718, + "learning_rate": 1.3088804253863611e-05, + "loss": 1.0344, + "step": 26920 + }, + { + "epoch": 0.40619626534737097, + "grad_norm": 0.621054939780627, + "learning_rate": 1.308425149442907e-05, + "loss": 1.0228, + "step": 26930 + }, + { + "epoch": 0.40634709946001385, + "grad_norm": 0.7011652415852868, + "learning_rate": 1.3079698028386543e-05, + "loss": 1.0148, + "step": 26940 + }, + { + "epoch": 0.4064979335726568, + "grad_norm": 0.7994051850446545, + "learning_rate": 1.3075143856779238e-05, + "loss": 1.0353, + "step": 26950 + }, + { + "epoch": 0.40664876768529973, + "grad_norm": 0.6733666166464384, + "learning_rate": 1.307058898065052e-05, + "loss": 1.0154, + "step": 26960 + }, + { + "epoch": 0.4067996017979426, + "grad_norm": 0.655602170200735, + "learning_rate": 1.3066033401043927e-05, + "loss": 0.9967, + "step": 26970 + }, + { + "epoch": 0.40695043591058555, + "grad_norm": 0.6540767095994704, + "learning_rate": 1.3061477119003147e-05, + "loss": 1.0243, + "step": 26980 + }, + { + "epoch": 0.40710127002322843, + "grad_norm": 0.7204851907862039, + "learning_rate": 1.3056920135572036e-05, + "loss": 1.025, + "step": 26990 + }, + { + "epoch": 0.4072521041358714, + "grad_norm": 0.688905594497674, + "learning_rate": 1.3052362451794604e-05, + "loss": 1.0164, + "step": 27000 + }, + { + "epoch": 0.40740293824851426, + "grad_norm": 0.6337736297620626, + "learning_rate": 1.3047804068715026e-05, + "loss": 1.0102, + "step": 27010 + }, + { + "epoch": 0.4075537723611572, + "grad_norm": 0.6575353147405852, + "learning_rate": 1.3043244987377641e-05, + "loss": 1.0333, + "step": 27020 + }, + { + "epoch": 0.40770460647380014, + "grad_norm": 0.7220872021480542, + "learning_rate": 1.3038685208826937e-05, + "loss": 1.0005, + "step": 27030 + }, + { + "epoch": 0.407855440586443, + "grad_norm": 0.6602828817936109, + "learning_rate": 1.3034124734107572e-05, + "loss": 1.0134, + "step": 27040 + }, + { + "epoch": 0.40800627469908596, + "grad_norm": 0.669489217838389, + "learning_rate": 1.3029563564264356e-05, + "loss": 1.0406, + "step": 27050 + }, + { + "epoch": 0.40815710881172884, + "grad_norm": 0.6513805138965117, + "learning_rate": 1.3025001700342266e-05, + "loss": 1.0138, + "step": 27060 + }, + { + "epoch": 0.4083079429243718, + "grad_norm": 0.6775291395134211, + "learning_rate": 1.302043914338643e-05, + "loss": 1.0166, + "step": 27070 + }, + { + "epoch": 0.40845877703701466, + "grad_norm": 0.7315437921086109, + "learning_rate": 1.301587589444214e-05, + "loss": 1.0275, + "step": 27080 + }, + { + "epoch": 0.4086096111496576, + "grad_norm": 0.644370784760802, + "learning_rate": 1.301131195455485e-05, + "loss": 1.0188, + "step": 27090 + }, + { + "epoch": 0.40876044526230054, + "grad_norm": 0.6716157943058249, + "learning_rate": 1.3006747324770161e-05, + "loss": 1.018, + "step": 27100 + }, + { + "epoch": 0.4089112793749434, + "grad_norm": 0.6896179384708486, + "learning_rate": 1.3002182006133841e-05, + "loss": 1.0128, + "step": 27110 + }, + { + "epoch": 0.40906211348758637, + "grad_norm": 0.6354845288333058, + "learning_rate": 1.2997615999691813e-05, + "loss": 1.0145, + "step": 27120 + }, + { + "epoch": 0.40921294760022925, + "grad_norm": 0.6666383611265709, + "learning_rate": 1.2993049306490158e-05, + "loss": 1.0, + "step": 27130 + }, + { + "epoch": 0.4093637817128722, + "grad_norm": 0.6830528237480998, + "learning_rate": 1.2988481927575114e-05, + "loss": 1.0119, + "step": 27140 + }, + { + "epoch": 0.40951461582551507, + "grad_norm": 0.6602269592109321, + "learning_rate": 1.2983913863993076e-05, + "loss": 1.0155, + "step": 27150 + }, + { + "epoch": 0.409665449938158, + "grad_norm": 0.6778718252510005, + "learning_rate": 1.2979345116790598e-05, + "loss": 1.0153, + "step": 27160 + }, + { + "epoch": 0.40981628405080095, + "grad_norm": 0.7053259937902517, + "learning_rate": 1.2974775687014386e-05, + "loss": 1.0258, + "step": 27170 + }, + { + "epoch": 0.40996711816344383, + "grad_norm": 0.6587745470672259, + "learning_rate": 1.297020557571131e-05, + "loss": 1.0237, + "step": 27180 + }, + { + "epoch": 0.4101179522760868, + "grad_norm": 0.6772546471925869, + "learning_rate": 1.2965634783928384e-05, + "loss": 1.0223, + "step": 27190 + }, + { + "epoch": 0.41026878638872966, + "grad_norm": 0.691431404954029, + "learning_rate": 1.2961063312712795e-05, + "loss": 1.0203, + "step": 27200 + }, + { + "epoch": 0.4104196205013726, + "grad_norm": 0.6954097300708385, + "learning_rate": 1.2956491163111867e-05, + "loss": 1.0188, + "step": 27210 + }, + { + "epoch": 0.4105704546140155, + "grad_norm": 0.6420454744004148, + "learning_rate": 1.295191833617309e-05, + "loss": 0.9933, + "step": 27220 + }, + { + "epoch": 0.4107212887266584, + "grad_norm": 0.7028859184193814, + "learning_rate": 1.2947344832944114e-05, + "loss": 1.0288, + "step": 27230 + }, + { + "epoch": 0.41087212283930136, + "grad_norm": 0.6467914856542674, + "learning_rate": 1.2942770654472728e-05, + "loss": 1.0177, + "step": 27240 + }, + { + "epoch": 0.41102295695194424, + "grad_norm": 0.615710641401897, + "learning_rate": 1.2938195801806889e-05, + "loss": 1.0087, + "step": 27250 + }, + { + "epoch": 0.4111737910645872, + "grad_norm": 0.6879609133891935, + "learning_rate": 1.2933620275994709e-05, + "loss": 1.0155, + "step": 27260 + }, + { + "epoch": 0.41132462517723006, + "grad_norm": 0.6558027474237881, + "learning_rate": 1.2929044078084445e-05, + "loss": 1.0312, + "step": 27270 + }, + { + "epoch": 0.411475459289873, + "grad_norm": 0.6147023088741886, + "learning_rate": 1.2924467209124516e-05, + "loss": 1.0152, + "step": 27280 + }, + { + "epoch": 0.4116262934025159, + "grad_norm": 0.7018788419709687, + "learning_rate": 1.2919889670163486e-05, + "loss": 1.0242, + "step": 27290 + }, + { + "epoch": 0.4117771275151588, + "grad_norm": 0.6704625871745751, + "learning_rate": 1.2915311462250082e-05, + "loss": 1.0215, + "step": 27300 + }, + { + "epoch": 0.41192796162780176, + "grad_norm": 0.6462257414270963, + "learning_rate": 1.291073258643318e-05, + "loss": 1.0095, + "step": 27310 + }, + { + "epoch": 0.41207879574044465, + "grad_norm": 0.6474382426385086, + "learning_rate": 1.2906153043761811e-05, + "loss": 1.0036, + "step": 27320 + }, + { + "epoch": 0.4122296298530876, + "grad_norm": 0.6758147116739744, + "learning_rate": 1.2901572835285153e-05, + "loss": 1.007, + "step": 27330 + }, + { + "epoch": 0.41238046396573047, + "grad_norm": 0.6305514143620081, + "learning_rate": 1.2896991962052545e-05, + "loss": 0.9977, + "step": 27340 + }, + { + "epoch": 0.4125312980783734, + "grad_norm": 0.6458203059931944, + "learning_rate": 1.289241042511347e-05, + "loss": 1.02, + "step": 27350 + }, + { + "epoch": 0.4126821321910163, + "grad_norm": 0.6592063106807763, + "learning_rate": 1.2887828225517571e-05, + "loss": 1.0147, + "step": 27360 + }, + { + "epoch": 0.41283296630365923, + "grad_norm": 0.6503921307945496, + "learning_rate": 1.2883245364314634e-05, + "loss": 1.0256, + "step": 27370 + }, + { + "epoch": 0.41298380041630217, + "grad_norm": 0.6646154010310407, + "learning_rate": 1.2878661842554605e-05, + "loss": 1.0086, + "step": 27380 + }, + { + "epoch": 0.41313463452894506, + "grad_norm": 0.6791966913996631, + "learning_rate": 1.2874077661287576e-05, + "loss": 1.0234, + "step": 27390 + }, + { + "epoch": 0.413285468641588, + "grad_norm": 0.6349353216515959, + "learning_rate": 1.2869492821563794e-05, + "loss": 0.9996, + "step": 27400 + }, + { + "epoch": 0.4134363027542309, + "grad_norm": 0.6532731116440017, + "learning_rate": 1.2864907324433653e-05, + "loss": 1.0187, + "step": 27410 + }, + { + "epoch": 0.4135871368668738, + "grad_norm": 0.6788122057064673, + "learning_rate": 1.2860321170947698e-05, + "loss": 1.0272, + "step": 27420 + }, + { + "epoch": 0.4137379709795167, + "grad_norm": 0.6246406386862682, + "learning_rate": 1.2855734362156627e-05, + "loss": 1.0247, + "step": 27430 + }, + { + "epoch": 0.41388880509215964, + "grad_norm": 0.9053772343896884, + "learning_rate": 1.2851146899111288e-05, + "loss": 1.0003, + "step": 27440 + }, + { + "epoch": 0.4140396392048026, + "grad_norm": 0.6832094271333482, + "learning_rate": 1.2846558782862677e-05, + "loss": 1.0064, + "step": 27450 + }, + { + "epoch": 0.41419047331744546, + "grad_norm": 0.8049969271262861, + "learning_rate": 1.2841970014461935e-05, + "loss": 1.0085, + "step": 27460 + }, + { + "epoch": 0.4143413074300884, + "grad_norm": 0.6947973458379227, + "learning_rate": 1.2837380594960367e-05, + "loss": 1.0167, + "step": 27470 + }, + { + "epoch": 0.4144921415427313, + "grad_norm": 0.6828356729427405, + "learning_rate": 1.2832790525409414e-05, + "loss": 1.0201, + "step": 27480 + }, + { + "epoch": 0.4146429756553742, + "grad_norm": 0.6633121240624332, + "learning_rate": 1.2828199806860666e-05, + "loss": 1.0201, + "step": 27490 + }, + { + "epoch": 0.4147938097680171, + "grad_norm": 0.6921509887612186, + "learning_rate": 1.282360844036587e-05, + "loss": 1.011, + "step": 27500 + }, + { + "epoch": 0.41494464388066005, + "grad_norm": 0.6757607103043095, + "learning_rate": 1.2819016426976917e-05, + "loss": 1.0262, + "step": 27510 + }, + { + "epoch": 0.415095477993303, + "grad_norm": 0.6353075831795025, + "learning_rate": 1.2814423767745846e-05, + "loss": 1.028, + "step": 27520 + }, + { + "epoch": 0.41524631210594587, + "grad_norm": 0.6381056620562826, + "learning_rate": 1.2809830463724839e-05, + "loss": 1.0278, + "step": 27530 + }, + { + "epoch": 0.4153971462185888, + "grad_norm": 0.7324074670681665, + "learning_rate": 1.2805236515966234e-05, + "loss": 1.0135, + "step": 27540 + }, + { + "epoch": 0.4155479803312317, + "grad_norm": 0.6489022370431976, + "learning_rate": 1.280064192552252e-05, + "loss": 1.0285, + "step": 27550 + }, + { + "epoch": 0.41569881444387463, + "grad_norm": 0.6590123020861758, + "learning_rate": 1.2796046693446314e-05, + "loss": 1.0018, + "step": 27560 + }, + { + "epoch": 0.4158496485565175, + "grad_norm": 0.6703625789708363, + "learning_rate": 1.2791450820790404e-05, + "loss": 1.0272, + "step": 27570 + }, + { + "epoch": 0.41600048266916045, + "grad_norm": 0.6669051638650686, + "learning_rate": 1.2786854308607707e-05, + "loss": 1.0172, + "step": 27580 + }, + { + "epoch": 0.4161513167818034, + "grad_norm": 0.6397467583564155, + "learning_rate": 1.2782257157951294e-05, + "loss": 0.9886, + "step": 27590 + }, + { + "epoch": 0.4163021508944463, + "grad_norm": 0.6742350458037785, + "learning_rate": 1.277765936987438e-05, + "loss": 1.0161, + "step": 27600 + }, + { + "epoch": 0.4164529850070892, + "grad_norm": 0.687046624738598, + "learning_rate": 1.2773060945430329e-05, + "loss": 1.0346, + "step": 27610 + }, + { + "epoch": 0.4166038191197321, + "grad_norm": 0.7151354828027944, + "learning_rate": 1.2768461885672649e-05, + "loss": 1.04, + "step": 27620 + }, + { + "epoch": 0.41675465323237504, + "grad_norm": 0.6574144584601066, + "learning_rate": 1.2763862191654995e-05, + "loss": 1.0067, + "step": 27630 + }, + { + "epoch": 0.4169054873450179, + "grad_norm": 0.6967248412136336, + "learning_rate": 1.2759261864431163e-05, + "loss": 1.0215, + "step": 27640 + }, + { + "epoch": 0.41705632145766086, + "grad_norm": 0.6604203385943894, + "learning_rate": 1.2754660905055096e-05, + "loss": 1.0147, + "step": 27650 + }, + { + "epoch": 0.4172071555703038, + "grad_norm": 0.6710742294607583, + "learning_rate": 1.2750059314580887e-05, + "loss": 0.9906, + "step": 27660 + }, + { + "epoch": 0.4173579896829467, + "grad_norm": 0.697900027502695, + "learning_rate": 1.2745457094062762e-05, + "loss": 1.0233, + "step": 27670 + }, + { + "epoch": 0.4175088237955896, + "grad_norm": 0.6395139645719479, + "learning_rate": 1.2740854244555108e-05, + "loss": 1.0272, + "step": 27680 + }, + { + "epoch": 0.4176596579082325, + "grad_norm": 0.7354228640106276, + "learning_rate": 1.2736250767112443e-05, + "loss": 1.0344, + "step": 27690 + }, + { + "epoch": 0.41781049202087545, + "grad_norm": 0.6358373026970772, + "learning_rate": 1.273164666278943e-05, + "loss": 1.0173, + "step": 27700 + }, + { + "epoch": 0.41796132613351833, + "grad_norm": 0.675837455938757, + "learning_rate": 1.2727041932640877e-05, + "loss": 1.017, + "step": 27710 + }, + { + "epoch": 0.41811216024616127, + "grad_norm": 0.7472166095297446, + "learning_rate": 1.272243657772174e-05, + "loss": 1.0212, + "step": 27720 + }, + { + "epoch": 0.4182629943588042, + "grad_norm": 0.6377776588653832, + "learning_rate": 1.2717830599087118e-05, + "loss": 0.9973, + "step": 27730 + }, + { + "epoch": 0.4184138284714471, + "grad_norm": 0.6662354899810894, + "learning_rate": 1.271322399779224e-05, + "loss": 1.0292, + "step": 27740 + }, + { + "epoch": 0.41856466258409003, + "grad_norm": 0.7175245455826077, + "learning_rate": 1.2708616774892499e-05, + "loss": 1.0243, + "step": 27750 + }, + { + "epoch": 0.4187154966967329, + "grad_norm": 0.6727778511176135, + "learning_rate": 1.2704008931443412e-05, + "loss": 1.0295, + "step": 27760 + }, + { + "epoch": 0.41886633080937585, + "grad_norm": 0.6753740964799987, + "learning_rate": 1.2699400468500644e-05, + "loss": 1.0228, + "step": 27770 + }, + { + "epoch": 0.41901716492201874, + "grad_norm": 0.6664247827757717, + "learning_rate": 1.2694791387120003e-05, + "loss": 1.0284, + "step": 27780 + }, + { + "epoch": 0.4191679990346617, + "grad_norm": 0.7029585973721211, + "learning_rate": 1.269018168835744e-05, + "loss": 1.0, + "step": 27790 + }, + { + "epoch": 0.4193188331473046, + "grad_norm": 0.6797928151606527, + "learning_rate": 1.2685571373269048e-05, + "loss": 1.0203, + "step": 27800 + }, + { + "epoch": 0.4194696672599475, + "grad_norm": 0.6573926879029969, + "learning_rate": 1.2680960442911055e-05, + "loss": 1.0076, + "step": 27810 + }, + { + "epoch": 0.41962050137259044, + "grad_norm": 0.6810065214047032, + "learning_rate": 1.2676348898339836e-05, + "loss": 1.0383, + "step": 27820 + }, + { + "epoch": 0.4197713354852333, + "grad_norm": 0.6206413344786341, + "learning_rate": 1.2671736740611907e-05, + "loss": 1.0084, + "step": 27830 + }, + { + "epoch": 0.41992216959787626, + "grad_norm": 0.7430236029991639, + "learning_rate": 1.2667123970783916e-05, + "loss": 0.9984, + "step": 27840 + }, + { + "epoch": 0.42007300371051914, + "grad_norm": 0.6880630138367255, + "learning_rate": 1.2662510589912664e-05, + "loss": 1.0007, + "step": 27850 + }, + { + "epoch": 0.4202238378231621, + "grad_norm": 0.6456171558918973, + "learning_rate": 1.2657896599055084e-05, + "loss": 1.0131, + "step": 27860 + }, + { + "epoch": 0.420374671935805, + "grad_norm": 0.6266038489933029, + "learning_rate": 1.2653281999268247e-05, + "loss": 0.9878, + "step": 27870 + }, + { + "epoch": 0.4205255060484479, + "grad_norm": 0.7945545766799894, + "learning_rate": 1.2648666791609369e-05, + "loss": 1.035, + "step": 27880 + }, + { + "epoch": 0.42067634016109084, + "grad_norm": 0.6439469909815436, + "learning_rate": 1.2644050977135805e-05, + "loss": 0.9873, + "step": 27890 + }, + { + "epoch": 0.42082717427373373, + "grad_norm": 0.6649465567273091, + "learning_rate": 1.2639434556905043e-05, + "loss": 1.0275, + "step": 27900 + }, + { + "epoch": 0.42097800838637667, + "grad_norm": 0.6339040393309149, + "learning_rate": 1.2634817531974717e-05, + "loss": 1.0209, + "step": 27910 + }, + { + "epoch": 0.42112884249901955, + "grad_norm": 0.6456635152808125, + "learning_rate": 1.2630199903402596e-05, + "loss": 1.0065, + "step": 27920 + }, + { + "epoch": 0.4212796766116625, + "grad_norm": 0.6651435115921853, + "learning_rate": 1.2625581672246583e-05, + "loss": 1.0099, + "step": 27930 + }, + { + "epoch": 0.42143051072430543, + "grad_norm": 0.6514451443047076, + "learning_rate": 1.2620962839564732e-05, + "loss": 1.021, + "step": 27940 + }, + { + "epoch": 0.4215813448369483, + "grad_norm": 0.6488253795308706, + "learning_rate": 1.261634340641522e-05, + "loss": 1.0062, + "step": 27950 + }, + { + "epoch": 0.42173217894959125, + "grad_norm": 0.6608520811420737, + "learning_rate": 1.2611723373856368e-05, + "loss": 1.0029, + "step": 27960 + }, + { + "epoch": 0.42188301306223414, + "grad_norm": 0.6592426969587607, + "learning_rate": 1.2607102742946639e-05, + "loss": 1.0063, + "step": 27970 + }, + { + "epoch": 0.4220338471748771, + "grad_norm": 0.6472730366181405, + "learning_rate": 1.2602481514744624e-05, + "loss": 1.0315, + "step": 27980 + }, + { + "epoch": 0.42218468128751996, + "grad_norm": 0.6204827376196196, + "learning_rate": 1.2597859690309058e-05, + "loss": 1.0107, + "step": 27990 + }, + { + "epoch": 0.4223355154001629, + "grad_norm": 0.6459860424578792, + "learning_rate": 1.259323727069881e-05, + "loss": 1.0255, + "step": 28000 + }, + { + "epoch": 0.42248634951280584, + "grad_norm": 0.6046857518453291, + "learning_rate": 1.2588614256972886e-05, + "loss": 1.0108, + "step": 28010 + }, + { + "epoch": 0.4226371836254487, + "grad_norm": 0.6306876505196138, + "learning_rate": 1.2583990650190423e-05, + "loss": 1.0286, + "step": 28020 + }, + { + "epoch": 0.42278801773809166, + "grad_norm": 0.660160468881301, + "learning_rate": 1.2579366451410702e-05, + "loss": 1.0269, + "step": 28030 + }, + { + "epoch": 0.42293885185073454, + "grad_norm": 0.670339878941362, + "learning_rate": 1.2574741661693133e-05, + "loss": 1.0067, + "step": 28040 + }, + { + "epoch": 0.4230896859633775, + "grad_norm": 0.6360711638594235, + "learning_rate": 1.257011628209727e-05, + "loss": 1.0035, + "step": 28050 + }, + { + "epoch": 0.42324052007602037, + "grad_norm": 0.6489794954263162, + "learning_rate": 1.2565490313682788e-05, + "loss": 1.0038, + "step": 28060 + }, + { + "epoch": 0.4233913541886633, + "grad_norm": 0.6874904964071665, + "learning_rate": 1.2560863757509514e-05, + "loss": 1.0129, + "step": 28070 + }, + { + "epoch": 0.42354218830130624, + "grad_norm": 0.7148127051331712, + "learning_rate": 1.2556236614637398e-05, + "loss": 1.0189, + "step": 28080 + }, + { + "epoch": 0.4236930224139491, + "grad_norm": 0.6565986565347456, + "learning_rate": 1.2551608886126524e-05, + "loss": 1.0132, + "step": 28090 + }, + { + "epoch": 0.42384385652659207, + "grad_norm": 0.6845343249509883, + "learning_rate": 1.2546980573037115e-05, + "loss": 0.9991, + "step": 28100 + }, + { + "epoch": 0.42399469063923495, + "grad_norm": 0.8069056596391554, + "learning_rate": 1.2542351676429529e-05, + "loss": 1.0183, + "step": 28110 + }, + { + "epoch": 0.4241455247518779, + "grad_norm": 0.6121068424192668, + "learning_rate": 1.2537722197364256e-05, + "loss": 0.9996, + "step": 28120 + }, + { + "epoch": 0.4242963588645208, + "grad_norm": 0.6581412879369565, + "learning_rate": 1.253309213690191e-05, + "loss": 1.0174, + "step": 28130 + }, + { + "epoch": 0.4244471929771637, + "grad_norm": 0.6497983946724184, + "learning_rate": 1.2528461496103256e-05, + "loss": 1.0024, + "step": 28140 + }, + { + "epoch": 0.42459802708980665, + "grad_norm": 0.7074249338685787, + "learning_rate": 1.2523830276029182e-05, + "loss": 1.0102, + "step": 28150 + }, + { + "epoch": 0.42474886120244953, + "grad_norm": 0.6736205395394542, + "learning_rate": 1.2519198477740704e-05, + "loss": 0.9795, + "step": 28160 + }, + { + "epoch": 0.4248996953150925, + "grad_norm": 0.6529148960230673, + "learning_rate": 1.251456610229898e-05, + "loss": 1.0089, + "step": 28170 + }, + { + "epoch": 0.42505052942773536, + "grad_norm": 0.6792762698892283, + "learning_rate": 1.2509933150765297e-05, + "loss": 1.0317, + "step": 28180 + }, + { + "epoch": 0.4252013635403783, + "grad_norm": 0.6533467469223577, + "learning_rate": 1.250529962420107e-05, + "loss": 1.0128, + "step": 28190 + }, + { + "epoch": 0.4253521976530212, + "grad_norm": 0.6640816834788312, + "learning_rate": 1.2500665523667852e-05, + "loss": 1.002, + "step": 28200 + }, + { + "epoch": 0.4255030317656641, + "grad_norm": 0.6674985934163833, + "learning_rate": 1.2496030850227321e-05, + "loss": 1.003, + "step": 28210 + }, + { + "epoch": 0.42565386587830706, + "grad_norm": 0.7842384712436798, + "learning_rate": 1.2491395604941292e-05, + "loss": 0.9999, + "step": 28220 + }, + { + "epoch": 0.42580469999094994, + "grad_norm": 0.6575431624931538, + "learning_rate": 1.2486759788871708e-05, + "loss": 1.0069, + "step": 28230 + }, + { + "epoch": 0.4259555341035929, + "grad_norm": 0.716667157326144, + "learning_rate": 1.2482123403080643e-05, + "loss": 1.0027, + "step": 28240 + }, + { + "epoch": 0.42610636821623576, + "grad_norm": 0.646804157164873, + "learning_rate": 1.2477486448630306e-05, + "loss": 1.0148, + "step": 28250 + }, + { + "epoch": 0.4262572023288787, + "grad_norm": 0.7264479114362202, + "learning_rate": 1.2472848926583026e-05, + "loss": 1.0236, + "step": 28260 + }, + { + "epoch": 0.4264080364415216, + "grad_norm": 0.6794142172902226, + "learning_rate": 1.2468210838001271e-05, + "loss": 1.0252, + "step": 28270 + }, + { + "epoch": 0.4265588705541645, + "grad_norm": 0.6330638349041486, + "learning_rate": 1.2463572183947637e-05, + "loss": 1.033, + "step": 28280 + }, + { + "epoch": 0.42670970466680747, + "grad_norm": 0.6271816377569455, + "learning_rate": 1.2458932965484846e-05, + "loss": 0.9964, + "step": 28290 + }, + { + "epoch": 0.42686053877945035, + "grad_norm": 0.665128048096364, + "learning_rate": 1.2454293183675757e-05, + "loss": 1.0031, + "step": 28300 + }, + { + "epoch": 0.4270113728920933, + "grad_norm": 0.6525844369419875, + "learning_rate": 1.2449652839583346e-05, + "loss": 0.9958, + "step": 28310 + }, + { + "epoch": 0.42716220700473617, + "grad_norm": 0.6369386941983363, + "learning_rate": 1.2445011934270733e-05, + "loss": 0.9976, + "step": 28320 + }, + { + "epoch": 0.4273130411173791, + "grad_norm": 0.6256311760423501, + "learning_rate": 1.244037046880115e-05, + "loss": 1.0254, + "step": 28330 + }, + { + "epoch": 0.427463875230022, + "grad_norm": 0.6479175874887885, + "learning_rate": 1.2435728444237971e-05, + "loss": 1.0132, + "step": 28340 + }, + { + "epoch": 0.42761470934266493, + "grad_norm": 0.7069646248408457, + "learning_rate": 1.243108586164469e-05, + "loss": 1.0205, + "step": 28350 + }, + { + "epoch": 0.4277655434553079, + "grad_norm": 0.6581423472131376, + "learning_rate": 1.2426442722084937e-05, + "loss": 1.0254, + "step": 28360 + }, + { + "epoch": 0.42791637756795076, + "grad_norm": 0.681961002444167, + "learning_rate": 1.2421799026622456e-05, + "loss": 1.0156, + "step": 28370 + }, + { + "epoch": 0.4280672116805937, + "grad_norm": 0.6607911522624057, + "learning_rate": 1.2417154776321132e-05, + "loss": 1.024, + "step": 28380 + }, + { + "epoch": 0.4282180457932366, + "grad_norm": 0.6476253363908854, + "learning_rate": 1.241250997224497e-05, + "loss": 1.0217, + "step": 28390 + }, + { + "epoch": 0.4283688799058795, + "grad_norm": 0.6610997929633093, + "learning_rate": 1.2407864615458107e-05, + "loss": 1.0278, + "step": 28400 + }, + { + "epoch": 0.4285197140185224, + "grad_norm": 0.7522221504742739, + "learning_rate": 1.24032187070248e-05, + "loss": 1.0187, + "step": 28410 + }, + { + "epoch": 0.42867054813116534, + "grad_norm": 0.684841699349838, + "learning_rate": 1.2398572248009435e-05, + "loss": 1.0288, + "step": 28420 + }, + { + "epoch": 0.4288213822438083, + "grad_norm": 0.6540399167961687, + "learning_rate": 1.2393925239476528e-05, + "loss": 1.0136, + "step": 28430 + }, + { + "epoch": 0.42897221635645116, + "grad_norm": 0.6500751588617629, + "learning_rate": 1.2389277682490713e-05, + "loss": 1.0283, + "step": 28440 + }, + { + "epoch": 0.4291230504690941, + "grad_norm": 0.7085354456002649, + "learning_rate": 1.2384629578116755e-05, + "loss": 1.0282, + "step": 28450 + }, + { + "epoch": 0.429273884581737, + "grad_norm": 0.6804919013202917, + "learning_rate": 1.237998092741955e-05, + "loss": 1.0309, + "step": 28460 + }, + { + "epoch": 0.4294247186943799, + "grad_norm": 0.6493696534581415, + "learning_rate": 1.2375331731464107e-05, + "loss": 1.0196, + "step": 28470 + }, + { + "epoch": 0.4295755528070228, + "grad_norm": 0.6412377570990087, + "learning_rate": 1.2370681991315565e-05, + "loss": 1.0255, + "step": 28480 + }, + { + "epoch": 0.42972638691966575, + "grad_norm": 0.6672685597979487, + "learning_rate": 1.2366031708039192e-05, + "loss": 1.0068, + "step": 28490 + }, + { + "epoch": 0.4298772210323087, + "grad_norm": 0.6590909887711941, + "learning_rate": 1.2361380882700378e-05, + "loss": 1.0177, + "step": 28500 + }, + { + "epoch": 0.43002805514495157, + "grad_norm": 0.6847487458831651, + "learning_rate": 1.2356729516364631e-05, + "loss": 1.0132, + "step": 28510 + }, + { + "epoch": 0.4301788892575945, + "grad_norm": 0.6887831611488574, + "learning_rate": 1.2352077610097593e-05, + "loss": 1.0299, + "step": 28520 + }, + { + "epoch": 0.4303297233702374, + "grad_norm": 0.6593143974684093, + "learning_rate": 1.234742516496502e-05, + "loss": 1.0171, + "step": 28530 + }, + { + "epoch": 0.43048055748288033, + "grad_norm": 0.6343775513323318, + "learning_rate": 1.2342772182032798e-05, + "loss": 1.0102, + "step": 28540 + }, + { + "epoch": 0.4306313915955232, + "grad_norm": 0.6561984331858377, + "learning_rate": 1.2338118662366936e-05, + "loss": 1.0269, + "step": 28550 + }, + { + "epoch": 0.43078222570816616, + "grad_norm": 0.6868967877748126, + "learning_rate": 1.2333464607033563e-05, + "loss": 1.0321, + "step": 28560 + }, + { + "epoch": 0.4309330598208091, + "grad_norm": 0.6594326869613917, + "learning_rate": 1.2328810017098936e-05, + "loss": 1.0062, + "step": 28570 + }, + { + "epoch": 0.431083893933452, + "grad_norm": 0.680834856427866, + "learning_rate": 1.2324154893629423e-05, + "loss": 0.9976, + "step": 28580 + }, + { + "epoch": 0.4312347280460949, + "grad_norm": 0.6478933811878677, + "learning_rate": 1.2319499237691528e-05, + "loss": 1.0092, + "step": 28590 + }, + { + "epoch": 0.4313855621587378, + "grad_norm": 0.6620271252948889, + "learning_rate": 1.2314843050351865e-05, + "loss": 1.0288, + "step": 28600 + }, + { + "epoch": 0.43153639627138074, + "grad_norm": 0.6788584773769643, + "learning_rate": 1.2310186332677185e-05, + "loss": 0.9882, + "step": 28610 + }, + { + "epoch": 0.4316872303840236, + "grad_norm": 0.6636276761316076, + "learning_rate": 1.2305529085734343e-05, + "loss": 0.9991, + "step": 28620 + }, + { + "epoch": 0.43183806449666656, + "grad_norm": 0.6449654995674051, + "learning_rate": 1.230087131059032e-05, + "loss": 0.9957, + "step": 28630 + }, + { + "epoch": 0.4319888986093095, + "grad_norm": 0.6507691622353081, + "learning_rate": 1.2296213008312232e-05, + "loss": 1.0076, + "step": 28640 + }, + { + "epoch": 0.4321397327219524, + "grad_norm": 0.6382344202771848, + "learning_rate": 1.22915541799673e-05, + "loss": 1.0029, + "step": 28650 + }, + { + "epoch": 0.4322905668345953, + "grad_norm": 0.7059850078770823, + "learning_rate": 1.2286894826622868e-05, + "loss": 1.0166, + "step": 28660 + }, + { + "epoch": 0.4324414009472382, + "grad_norm": 0.651053590539834, + "learning_rate": 1.2282234949346408e-05, + "loss": 1.0019, + "step": 28670 + }, + { + "epoch": 0.43259223505988115, + "grad_norm": 0.6483677556609472, + "learning_rate": 1.2277574549205502e-05, + "loss": 1.0121, + "step": 28680 + }, + { + "epoch": 0.43274306917252403, + "grad_norm": 0.6697730675242474, + "learning_rate": 1.227291362726786e-05, + "loss": 1.0177, + "step": 28690 + }, + { + "epoch": 0.43289390328516697, + "grad_norm": 0.6505690783503388, + "learning_rate": 1.2268252184601306e-05, + "loss": 1.0386, + "step": 28700 + }, + { + "epoch": 0.4330447373978099, + "grad_norm": 0.6711885278797666, + "learning_rate": 1.2263590222273787e-05, + "loss": 0.9998, + "step": 28710 + }, + { + "epoch": 0.4331955715104528, + "grad_norm": 0.6673181387808353, + "learning_rate": 1.2258927741353369e-05, + "loss": 1.0, + "step": 28720 + }, + { + "epoch": 0.43334640562309573, + "grad_norm": 0.6699815673498882, + "learning_rate": 1.2254264742908231e-05, + "loss": 1.008, + "step": 28730 + }, + { + "epoch": 0.4334972397357386, + "grad_norm": 0.6320930595094074, + "learning_rate": 1.2249601228006678e-05, + "loss": 0.9789, + "step": 28740 + }, + { + "epoch": 0.43364807384838155, + "grad_norm": 0.6309914653877238, + "learning_rate": 1.2244937197717132e-05, + "loss": 0.9949, + "step": 28750 + }, + { + "epoch": 0.43379890796102444, + "grad_norm": 0.6245902461207907, + "learning_rate": 1.2240272653108127e-05, + "loss": 1.0054, + "step": 28760 + }, + { + "epoch": 0.4339497420736674, + "grad_norm": 0.6484368624929802, + "learning_rate": 1.2235607595248322e-05, + "loss": 1.0061, + "step": 28770 + }, + { + "epoch": 0.4341005761863103, + "grad_norm": 0.6484812359680433, + "learning_rate": 1.2230942025206494e-05, + "loss": 1.0354, + "step": 28780 + }, + { + "epoch": 0.4342514102989532, + "grad_norm": 0.662741499293717, + "learning_rate": 1.2226275944051525e-05, + "loss": 1.0158, + "step": 28790 + }, + { + "epoch": 0.43440224441159614, + "grad_norm": 0.6444013138769183, + "learning_rate": 1.2221609352852428e-05, + "loss": 0.9968, + "step": 28800 + }, + { + "epoch": 0.434553078524239, + "grad_norm": 0.7526478164409566, + "learning_rate": 1.2216942252678332e-05, + "loss": 1.0248, + "step": 28810 + }, + { + "epoch": 0.43470391263688196, + "grad_norm": 0.7191157741404504, + "learning_rate": 1.2212274644598477e-05, + "loss": 1.0156, + "step": 28820 + }, + { + "epoch": 0.43485474674952485, + "grad_norm": 0.7235271203773964, + "learning_rate": 1.2207606529682217e-05, + "loss": 1.0182, + "step": 28830 + }, + { + "epoch": 0.4350055808621678, + "grad_norm": 0.69584524930142, + "learning_rate": 1.2202937908999026e-05, + "loss": 1.0212, + "step": 28840 + }, + { + "epoch": 0.4351564149748107, + "grad_norm": 0.6337011948625301, + "learning_rate": 1.2198268783618501e-05, + "loss": 0.9968, + "step": 28850 + }, + { + "epoch": 0.4353072490874536, + "grad_norm": 0.6879370360074811, + "learning_rate": 1.219359915461034e-05, + "loss": 0.9965, + "step": 28860 + }, + { + "epoch": 0.43545808320009655, + "grad_norm": 0.674600434356973, + "learning_rate": 1.2188929023044369e-05, + "loss": 1.0095, + "step": 28870 + }, + { + "epoch": 0.43560891731273943, + "grad_norm": 0.6482514076677022, + "learning_rate": 1.218425838999052e-05, + "loss": 1.011, + "step": 28880 + }, + { + "epoch": 0.43575975142538237, + "grad_norm": 0.6416094436013345, + "learning_rate": 1.217958725651885e-05, + "loss": 0.9993, + "step": 28890 + }, + { + "epoch": 0.43591058553802525, + "grad_norm": 0.6546503091429221, + "learning_rate": 1.217491562369952e-05, + "loss": 1.0206, + "step": 28900 + }, + { + "epoch": 0.4360614196506682, + "grad_norm": 0.6593306094545679, + "learning_rate": 1.2170243492602812e-05, + "loss": 1.0327, + "step": 28910 + }, + { + "epoch": 0.43621225376331113, + "grad_norm": 0.6333165482379163, + "learning_rate": 1.2165570864299118e-05, + "loss": 1.0138, + "step": 28920 + }, + { + "epoch": 0.436363087875954, + "grad_norm": 0.7436016805485873, + "learning_rate": 1.216089773985895e-05, + "loss": 1.0044, + "step": 28930 + }, + { + "epoch": 0.43651392198859695, + "grad_norm": 0.6698705622853116, + "learning_rate": 1.2156224120352926e-05, + "loss": 1.0125, + "step": 28940 + }, + { + "epoch": 0.43666475610123984, + "grad_norm": 0.649713576197528, + "learning_rate": 1.2151550006851782e-05, + "loss": 1.0203, + "step": 28950 + }, + { + "epoch": 0.4368155902138828, + "grad_norm": 0.66879697988066, + "learning_rate": 1.214687540042637e-05, + "loss": 0.9941, + "step": 28960 + }, + { + "epoch": 0.43696642432652566, + "grad_norm": 0.6459453347813375, + "learning_rate": 1.2142200302147647e-05, + "loss": 1.0172, + "step": 28970 + }, + { + "epoch": 0.4371172584391686, + "grad_norm": 0.6615293869844292, + "learning_rate": 1.2137524713086688e-05, + "loss": 1.0103, + "step": 28980 + }, + { + "epoch": 0.43726809255181154, + "grad_norm": 0.6942127051659103, + "learning_rate": 1.2132848634314685e-05, + "loss": 1.0343, + "step": 28990 + }, + { + "epoch": 0.4374189266644544, + "grad_norm": 0.6501857011177707, + "learning_rate": 1.2128172066902928e-05, + "loss": 0.9982, + "step": 29000 + }, + { + "epoch": 0.43756976077709736, + "grad_norm": 0.6990229086802353, + "learning_rate": 1.2123495011922832e-05, + "loss": 0.9917, + "step": 29010 + }, + { + "epoch": 0.43772059488974024, + "grad_norm": 0.6892895676135538, + "learning_rate": 1.211881747044592e-05, + "loss": 0.9971, + "step": 29020 + }, + { + "epoch": 0.4378714290023832, + "grad_norm": 0.6553461547338285, + "learning_rate": 1.2114139443543827e-05, + "loss": 1.0031, + "step": 29030 + }, + { + "epoch": 0.43802226311502607, + "grad_norm": 0.6860151280140554, + "learning_rate": 1.2109460932288291e-05, + "loss": 1.0085, + "step": 29040 + }, + { + "epoch": 0.438173097227669, + "grad_norm": 0.6443983661044094, + "learning_rate": 1.2104781937751173e-05, + "loss": 1.0126, + "step": 29050 + }, + { + "epoch": 0.43832393134031195, + "grad_norm": 0.6536354712979185, + "learning_rate": 1.2100102461004444e-05, + "loss": 1.0111, + "step": 29060 + }, + { + "epoch": 0.43847476545295483, + "grad_norm": 0.6394510060161853, + "learning_rate": 1.2095422503120174e-05, + "loss": 1.0102, + "step": 29070 + }, + { + "epoch": 0.43862559956559777, + "grad_norm": 0.672123436509511, + "learning_rate": 1.2090742065170553e-05, + "loss": 1.0055, + "step": 29080 + }, + { + "epoch": 0.43877643367824065, + "grad_norm": 0.6378734864222606, + "learning_rate": 1.2086061148227878e-05, + "loss": 1.011, + "step": 29090 + }, + { + "epoch": 0.4389272677908836, + "grad_norm": 0.6245667300361386, + "learning_rate": 1.2081379753364561e-05, + "loss": 1.0083, + "step": 29100 + }, + { + "epoch": 0.4390781019035265, + "grad_norm": 0.6669420471860269, + "learning_rate": 1.207669788165311e-05, + "loss": 1.0014, + "step": 29110 + }, + { + "epoch": 0.4392289360161694, + "grad_norm": 0.6508867550632045, + "learning_rate": 1.2072015534166158e-05, + "loss": 0.9989, + "step": 29120 + }, + { + "epoch": 0.43937977012881235, + "grad_norm": 0.6434316077720694, + "learning_rate": 1.2067332711976434e-05, + "loss": 1.0034, + "step": 29130 + }, + { + "epoch": 0.43953060424145524, + "grad_norm": 0.6664107329852029, + "learning_rate": 1.2062649416156785e-05, + "loss": 0.9918, + "step": 29140 + }, + { + "epoch": 0.4396814383540982, + "grad_norm": 0.7111614172958703, + "learning_rate": 1.2057965647780164e-05, + "loss": 1.0174, + "step": 29150 + }, + { + "epoch": 0.43983227246674106, + "grad_norm": 0.6514109336054277, + "learning_rate": 1.205328140791963e-05, + "loss": 1.0071, + "step": 29160 + }, + { + "epoch": 0.439983106579384, + "grad_norm": 0.7071142934336304, + "learning_rate": 1.2048596697648354e-05, + "loss": 1.0213, + "step": 29170 + }, + { + "epoch": 0.4401339406920269, + "grad_norm": 0.8208277259141742, + "learning_rate": 1.2043911518039606e-05, + "loss": 1.0111, + "step": 29180 + }, + { + "epoch": 0.4402847748046698, + "grad_norm": 0.6979990577520891, + "learning_rate": 1.2039225870166776e-05, + "loss": 1.0094, + "step": 29190 + }, + { + "epoch": 0.44043560891731276, + "grad_norm": 0.6330880210976525, + "learning_rate": 1.203453975510335e-05, + "loss": 1.0026, + "step": 29200 + }, + { + "epoch": 0.44058644302995564, + "grad_norm": 0.6681658391873843, + "learning_rate": 1.202985317392293e-05, + "loss": 1.0146, + "step": 29210 + }, + { + "epoch": 0.4407372771425986, + "grad_norm": 0.6777178956988156, + "learning_rate": 1.2025166127699219e-05, + "loss": 1.0021, + "step": 29220 + }, + { + "epoch": 0.44088811125524147, + "grad_norm": 0.6619573266207465, + "learning_rate": 1.2020478617506029e-05, + "loss": 0.9975, + "step": 29230 + }, + { + "epoch": 0.4410389453678844, + "grad_norm": 0.6698116273190388, + "learning_rate": 1.2015790644417278e-05, + "loss": 1.0045, + "step": 29240 + }, + { + "epoch": 0.4411897794805273, + "grad_norm": 0.6780257917013564, + "learning_rate": 1.2011102209506989e-05, + "loss": 1.0018, + "step": 29250 + }, + { + "epoch": 0.44134061359317023, + "grad_norm": 0.6730510918064423, + "learning_rate": 1.200641331384929e-05, + "loss": 1.0068, + "step": 29260 + }, + { + "epoch": 0.44149144770581317, + "grad_norm": 0.637822117437513, + "learning_rate": 1.200172395851842e-05, + "loss": 0.9863, + "step": 29270 + }, + { + "epoch": 0.44164228181845605, + "grad_norm": 0.6742545071895907, + "learning_rate": 1.1997034144588717e-05, + "loss": 1.0084, + "step": 29280 + }, + { + "epoch": 0.441793115931099, + "grad_norm": 0.6740504625429657, + "learning_rate": 1.1992343873134625e-05, + "loss": 1.0062, + "step": 29290 + }, + { + "epoch": 0.4419439500437419, + "grad_norm": 0.7245642583586523, + "learning_rate": 1.1987653145230694e-05, + "loss": 0.9917, + "step": 29300 + }, + { + "epoch": 0.4420947841563848, + "grad_norm": 0.6505376675919533, + "learning_rate": 1.1982961961951585e-05, + "loss": 1.0183, + "step": 29310 + }, + { + "epoch": 0.4422456182690277, + "grad_norm": 0.6570825877557984, + "learning_rate": 1.197827032437205e-05, + "loss": 1.0106, + "step": 29320 + }, + { + "epoch": 0.44239645238167064, + "grad_norm": 0.6667647626568208, + "learning_rate": 1.1973578233566957e-05, + "loss": 1.0145, + "step": 29330 + }, + { + "epoch": 0.4425472864943136, + "grad_norm": 0.6372054124982338, + "learning_rate": 1.1968885690611267e-05, + "loss": 1.0093, + "step": 29340 + }, + { + "epoch": 0.44269812060695646, + "grad_norm": 0.6365562088686456, + "learning_rate": 1.196419269658006e-05, + "loss": 1.0162, + "step": 29350 + }, + { + "epoch": 0.4428489547195994, + "grad_norm": 0.6562346715279028, + "learning_rate": 1.1959499252548503e-05, + "loss": 1.005, + "step": 29360 + }, + { + "epoch": 0.4429997888322423, + "grad_norm": 0.6647060846041621, + "learning_rate": 1.1954805359591873e-05, + "loss": 1.0145, + "step": 29370 + }, + { + "epoch": 0.4431506229448852, + "grad_norm": 0.6793874293424311, + "learning_rate": 1.1950111018785554e-05, + "loss": 1.0151, + "step": 29380 + }, + { + "epoch": 0.4433014570575281, + "grad_norm": 0.6735218766493761, + "learning_rate": 1.1945416231205025e-05, + "loss": 1.0185, + "step": 29390 + }, + { + "epoch": 0.44345229117017104, + "grad_norm": 0.6600465279251202, + "learning_rate": 1.1940720997925872e-05, + "loss": 1.0088, + "step": 29400 + }, + { + "epoch": 0.443603125282814, + "grad_norm": 0.6324306298284973, + "learning_rate": 1.1936025320023783e-05, + "loss": 0.9966, + "step": 29410 + }, + { + "epoch": 0.44375395939545687, + "grad_norm": 0.6568249118903001, + "learning_rate": 1.1931329198574548e-05, + "loss": 1.0131, + "step": 29420 + }, + { + "epoch": 0.4439047935080998, + "grad_norm": 0.6854504552130782, + "learning_rate": 1.1926632634654053e-05, + "loss": 1.0036, + "step": 29430 + }, + { + "epoch": 0.4440556276207427, + "grad_norm": 0.632434415519403, + "learning_rate": 1.1921935629338294e-05, + "loss": 0.9951, + "step": 29440 + }, + { + "epoch": 0.4442064617333856, + "grad_norm": 0.6242433567754194, + "learning_rate": 1.1917238183703363e-05, + "loss": 1.0115, + "step": 29450 + }, + { + "epoch": 0.4443572958460285, + "grad_norm": 0.6376958433915803, + "learning_rate": 1.1912540298825452e-05, + "loss": 0.9809, + "step": 29460 + }, + { + "epoch": 0.44450812995867145, + "grad_norm": 0.6306893792553138, + "learning_rate": 1.1907841975780861e-05, + "loss": 1.0355, + "step": 29470 + }, + { + "epoch": 0.4446589640713144, + "grad_norm": 0.6486998090213902, + "learning_rate": 1.1903143215645976e-05, + "loss": 1.0348, + "step": 29480 + }, + { + "epoch": 0.4448097981839573, + "grad_norm": 0.646256131696759, + "learning_rate": 1.1898444019497302e-05, + "loss": 1.0021, + "step": 29490 + }, + { + "epoch": 0.4449606322966002, + "grad_norm": 0.6213165803525338, + "learning_rate": 1.1893744388411425e-05, + "loss": 1.0163, + "step": 29500 + }, + { + "epoch": 0.4451114664092431, + "grad_norm": 0.7211768247960737, + "learning_rate": 1.1889044323465046e-05, + "loss": 1.0176, + "step": 29510 + }, + { + "epoch": 0.44526230052188603, + "grad_norm": 0.6302744442980057, + "learning_rate": 1.1884343825734956e-05, + "loss": 1.0196, + "step": 29520 + }, + { + "epoch": 0.4454131346345289, + "grad_norm": 0.6887398680803428, + "learning_rate": 1.187964289629805e-05, + "loss": 0.997, + "step": 29530 + }, + { + "epoch": 0.44556396874717186, + "grad_norm": 0.6330544775375059, + "learning_rate": 1.1874941536231313e-05, + "loss": 1.008, + "step": 29540 + }, + { + "epoch": 0.4457148028598148, + "grad_norm": 0.659936447604859, + "learning_rate": 1.1870239746611845e-05, + "loss": 1.006, + "step": 29550 + }, + { + "epoch": 0.4458656369724577, + "grad_norm": 0.6627369152923918, + "learning_rate": 1.1865537528516832e-05, + "loss": 1.0164, + "step": 29560 + }, + { + "epoch": 0.4460164710851006, + "grad_norm": 0.6880259908476666, + "learning_rate": 1.1860834883023562e-05, + "loss": 1.0129, + "step": 29570 + }, + { + "epoch": 0.4461673051977435, + "grad_norm": 0.6675190295544603, + "learning_rate": 1.1856131811209416e-05, + "loss": 1.0321, + "step": 29580 + }, + { + "epoch": 0.44631813931038644, + "grad_norm": 0.6713505801617737, + "learning_rate": 1.1851428314151885e-05, + "loss": 1.0151, + "step": 29590 + }, + { + "epoch": 0.4464689734230293, + "grad_norm": 0.6458137073221631, + "learning_rate": 1.1846724392928539e-05, + "loss": 1.0248, + "step": 29600 + }, + { + "epoch": 0.44661980753567226, + "grad_norm": 0.6462017750728858, + "learning_rate": 1.1842020048617063e-05, + "loss": 1.01, + "step": 29610 + }, + { + "epoch": 0.4467706416483152, + "grad_norm": 0.67673008497241, + "learning_rate": 1.1837315282295226e-05, + "loss": 1.0076, + "step": 29620 + }, + { + "epoch": 0.4469214757609581, + "grad_norm": 0.6571166797173346, + "learning_rate": 1.1832610095040905e-05, + "loss": 1.0058, + "step": 29630 + }, + { + "epoch": 0.447072309873601, + "grad_norm": 0.7011608685425136, + "learning_rate": 1.182790448793206e-05, + "loss": 1.0119, + "step": 29640 + }, + { + "epoch": 0.4472231439862439, + "grad_norm": 0.6524374071077367, + "learning_rate": 1.1823198462046762e-05, + "loss": 0.9961, + "step": 29650 + }, + { + "epoch": 0.44737397809888685, + "grad_norm": 0.6200830128909544, + "learning_rate": 1.181849201846317e-05, + "loss": 1.0063, + "step": 29660 + }, + { + "epoch": 0.44752481221152973, + "grad_norm": 0.6539319923304534, + "learning_rate": 1.1813785158259537e-05, + "loss": 1.0134, + "step": 29670 + }, + { + "epoch": 0.44767564632417267, + "grad_norm": 0.6742687566185738, + "learning_rate": 1.1809077882514211e-05, + "loss": 1.0131, + "step": 29680 + }, + { + "epoch": 0.4478264804368156, + "grad_norm": 0.7370468608141743, + "learning_rate": 1.180437019230564e-05, + "loss": 1.0017, + "step": 29690 + }, + { + "epoch": 0.4479773145494585, + "grad_norm": 0.7222657324319526, + "learning_rate": 1.1799662088712369e-05, + "loss": 1.0169, + "step": 29700 + }, + { + "epoch": 0.44812814866210143, + "grad_norm": 0.746688791145096, + "learning_rate": 1.1794953572813028e-05, + "loss": 1.0087, + "step": 29710 + }, + { + "epoch": 0.4482789827747443, + "grad_norm": 0.7030184858975803, + "learning_rate": 1.1790244645686351e-05, + "loss": 1.0091, + "step": 29720 + }, + { + "epoch": 0.44842981688738726, + "grad_norm": 0.6646180556500683, + "learning_rate": 1.1785535308411162e-05, + "loss": 1.0171, + "step": 29730 + }, + { + "epoch": 0.44858065100003014, + "grad_norm": 0.6870565013785903, + "learning_rate": 1.1780825562066379e-05, + "loss": 0.9916, + "step": 29740 + }, + { + "epoch": 0.4487314851126731, + "grad_norm": 0.6403654601424835, + "learning_rate": 1.1776115407731008e-05, + "loss": 1.0197, + "step": 29750 + }, + { + "epoch": 0.448882319225316, + "grad_norm": 0.6641831835547802, + "learning_rate": 1.1771404846484165e-05, + "loss": 1.0252, + "step": 29760 + }, + { + "epoch": 0.4490331533379589, + "grad_norm": 0.6584916896899481, + "learning_rate": 1.1766693879405042e-05, + "loss": 1.0066, + "step": 29770 + }, + { + "epoch": 0.44918398745060184, + "grad_norm": 0.6826185024897553, + "learning_rate": 1.1761982507572931e-05, + "loss": 0.9815, + "step": 29780 + }, + { + "epoch": 0.4493348215632447, + "grad_norm": 0.6825852762597102, + "learning_rate": 1.1757270732067218e-05, + "loss": 1.024, + "step": 29790 + }, + { + "epoch": 0.44948565567588766, + "grad_norm": 0.7271873133824319, + "learning_rate": 1.175255855396738e-05, + "loss": 1.0063, + "step": 29800 + }, + { + "epoch": 0.44963648978853055, + "grad_norm": 0.6900877233819288, + "learning_rate": 1.1747845974352983e-05, + "loss": 0.9877, + "step": 29810 + }, + { + "epoch": 0.4497873239011735, + "grad_norm": 0.6523534120475554, + "learning_rate": 1.1743132994303691e-05, + "loss": 1.006, + "step": 29820 + }, + { + "epoch": 0.4499381580138164, + "grad_norm": 0.7131597159981015, + "learning_rate": 1.1738419614899256e-05, + "loss": 1.0057, + "step": 29830 + }, + { + "epoch": 0.4500889921264593, + "grad_norm": 0.6662109066311386, + "learning_rate": 1.173370583721953e-05, + "loss": 1.0207, + "step": 29840 + }, + { + "epoch": 0.45023982623910225, + "grad_norm": 0.6789042980873691, + "learning_rate": 1.1728991662344433e-05, + "loss": 1.001, + "step": 29850 + }, + { + "epoch": 0.45039066035174513, + "grad_norm": 0.6452563308427088, + "learning_rate": 1.1724277091354002e-05, + "loss": 0.9812, + "step": 29860 + }, + { + "epoch": 0.45054149446438807, + "grad_norm": 0.6421890893147238, + "learning_rate": 1.1719562125328355e-05, + "loss": 1.0086, + "step": 29870 + }, + { + "epoch": 0.45069232857703095, + "grad_norm": 0.6882221551585865, + "learning_rate": 1.1714846765347694e-05, + "loss": 1.0111, + "step": 29880 + }, + { + "epoch": 0.4508431626896739, + "grad_norm": 0.7006184416427093, + "learning_rate": 1.1710131012492321e-05, + "loss": 1.0142, + "step": 29890 + }, + { + "epoch": 0.45099399680231683, + "grad_norm": 0.7146369661579918, + "learning_rate": 1.1705414867842622e-05, + "loss": 1.002, + "step": 29900 + }, + { + "epoch": 0.4511448309149597, + "grad_norm": 0.6191467097747476, + "learning_rate": 1.170069833247908e-05, + "loss": 1.0113, + "step": 29910 + }, + { + "epoch": 0.45129566502760265, + "grad_norm": 0.6499810313673077, + "learning_rate": 1.1695981407482256e-05, + "loss": 1.0071, + "step": 29920 + }, + { + "epoch": 0.45144649914024554, + "grad_norm": 0.6398303920152119, + "learning_rate": 1.169126409393281e-05, + "loss": 1.0164, + "step": 29930 + }, + { + "epoch": 0.4515973332528885, + "grad_norm": 0.720718549560709, + "learning_rate": 1.1686546392911486e-05, + "loss": 0.9873, + "step": 29940 + }, + { + "epoch": 0.45174816736553136, + "grad_norm": 0.6440258258232726, + "learning_rate": 1.168182830549912e-05, + "loss": 1.0064, + "step": 29950 + }, + { + "epoch": 0.4518990014781743, + "grad_norm": 0.6435410952452695, + "learning_rate": 1.1677109832776634e-05, + "loss": 1.0123, + "step": 29960 + }, + { + "epoch": 0.45204983559081724, + "grad_norm": 0.6216369966286708, + "learning_rate": 1.1672390975825038e-05, + "loss": 0.9905, + "step": 29970 + }, + { + "epoch": 0.4522006697034601, + "grad_norm": 0.6443132480574835, + "learning_rate": 1.1667671735725436e-05, + "loss": 1.005, + "step": 29980 + }, + { + "epoch": 0.45235150381610306, + "grad_norm": 0.6601074561046519, + "learning_rate": 1.1662952113559009e-05, + "loss": 1.0127, + "step": 29990 + }, + { + "epoch": 0.45250233792874595, + "grad_norm": 0.6386344982285083, + "learning_rate": 1.1658232110407037e-05, + "loss": 0.99, + "step": 30000 + }, + { + "epoch": 0.4526531720413889, + "grad_norm": 0.6252975028906902, + "learning_rate": 1.1653511727350878e-05, + "loss": 1.0048, + "step": 30010 + }, + { + "epoch": 0.45280400615403177, + "grad_norm": 0.6620293496036658, + "learning_rate": 1.1648790965471984e-05, + "loss": 1.0045, + "step": 30020 + }, + { + "epoch": 0.4529548402666747, + "grad_norm": 0.6936675507892237, + "learning_rate": 1.1644069825851885e-05, + "loss": 0.9976, + "step": 30030 + }, + { + "epoch": 0.45310567437931765, + "grad_norm": 0.6382982453702383, + "learning_rate": 1.163934830957221e-05, + "loss": 1.0171, + "step": 30040 + }, + { + "epoch": 0.45325650849196053, + "grad_norm": 0.6473305452521144, + "learning_rate": 1.1634626417714662e-05, + "loss": 1.0096, + "step": 30050 + }, + { + "epoch": 0.45340734260460347, + "grad_norm": 0.6993710729512097, + "learning_rate": 1.162990415136104e-05, + "loss": 1.0403, + "step": 30060 + }, + { + "epoch": 0.45355817671724635, + "grad_norm": 0.6640926359412701, + "learning_rate": 1.1625181511593219e-05, + "loss": 0.9911, + "step": 30070 + }, + { + "epoch": 0.4537090108298893, + "grad_norm": 0.7157937571984547, + "learning_rate": 1.1620458499493173e-05, + "loss": 1.0069, + "step": 30080 + }, + { + "epoch": 0.4538598449425322, + "grad_norm": 0.6930465418581218, + "learning_rate": 1.1615735116142947e-05, + "loss": 1.0013, + "step": 30090 + }, + { + "epoch": 0.4540106790551751, + "grad_norm": 0.6487455116411817, + "learning_rate": 1.161101136262468e-05, + "loss": 1.011, + "step": 30100 + }, + { + "epoch": 0.45416151316781805, + "grad_norm": 0.6630388122202319, + "learning_rate": 1.1606287240020591e-05, + "loss": 0.9998, + "step": 30110 + }, + { + "epoch": 0.45431234728046094, + "grad_norm": 0.6406553539929983, + "learning_rate": 1.1601562749412985e-05, + "loss": 1.0096, + "step": 30120 + }, + { + "epoch": 0.4544631813931039, + "grad_norm": 0.6453935222402465, + "learning_rate": 1.1596837891884253e-05, + "loss": 0.9953, + "step": 30130 + }, + { + "epoch": 0.45461401550574676, + "grad_norm": 0.6458451604386171, + "learning_rate": 1.1592112668516872e-05, + "loss": 0.9953, + "step": 30140 + }, + { + "epoch": 0.4547648496183897, + "grad_norm": 0.6826412754973106, + "learning_rate": 1.1587387080393398e-05, + "loss": 1.0114, + "step": 30150 + }, + { + "epoch": 0.4549156837310326, + "grad_norm": 0.7224071148290789, + "learning_rate": 1.1582661128596471e-05, + "loss": 1.0078, + "step": 30160 + }, + { + "epoch": 0.4550665178436755, + "grad_norm": 0.6442881535477047, + "learning_rate": 1.1577934814208816e-05, + "loss": 1.0034, + "step": 30170 + }, + { + "epoch": 0.45521735195631846, + "grad_norm": 0.6585719574848833, + "learning_rate": 1.1573208138313241e-05, + "loss": 1.0044, + "step": 30180 + }, + { + "epoch": 0.45536818606896134, + "grad_norm": 0.7327709418074264, + "learning_rate": 1.1568481101992639e-05, + "loss": 1.0058, + "step": 30190 + }, + { + "epoch": 0.4555190201816043, + "grad_norm": 0.6602694855009797, + "learning_rate": 1.156375370632998e-05, + "loss": 1.0055, + "step": 30200 + }, + { + "epoch": 0.45566985429424717, + "grad_norm": 0.6523308232023005, + "learning_rate": 1.1559025952408316e-05, + "loss": 1.0038, + "step": 30210 + }, + { + "epoch": 0.4558206884068901, + "grad_norm": 0.6339040557928832, + "learning_rate": 1.1554297841310791e-05, + "loss": 1.0076, + "step": 30220 + }, + { + "epoch": 0.455971522519533, + "grad_norm": 0.6657803569655292, + "learning_rate": 1.1549569374120626e-05, + "loss": 1.0286, + "step": 30230 + }, + { + "epoch": 0.45612235663217593, + "grad_norm": 0.6187659861084754, + "learning_rate": 1.1544840551921115e-05, + "loss": 1.0017, + "step": 30240 + }, + { + "epoch": 0.45627319074481887, + "grad_norm": 0.6300650035484457, + "learning_rate": 1.1540111375795644e-05, + "loss": 0.9914, + "step": 30250 + }, + { + "epoch": 0.45642402485746175, + "grad_norm": 0.6841863925977318, + "learning_rate": 1.153538184682768e-05, + "loss": 0.9953, + "step": 30260 + }, + { + "epoch": 0.4565748589701047, + "grad_norm": 0.6718629025261437, + "learning_rate": 1.153065196610076e-05, + "loss": 1.0054, + "step": 30270 + }, + { + "epoch": 0.4567256930827476, + "grad_norm": 0.6231251130883934, + "learning_rate": 1.1525921734698513e-05, + "loss": 1.0226, + "step": 30280 + }, + { + "epoch": 0.4568765271953905, + "grad_norm": 0.6253813778146476, + "learning_rate": 1.1521191153704643e-05, + "loss": 1.0252, + "step": 30290 + }, + { + "epoch": 0.4570273613080334, + "grad_norm": 0.6616730372913253, + "learning_rate": 1.1516460224202934e-05, + "loss": 1.0161, + "step": 30300 + }, + { + "epoch": 0.45717819542067634, + "grad_norm": 0.6411904495531975, + "learning_rate": 1.1511728947277254e-05, + "loss": 0.9669, + "step": 30310 + }, + { + "epoch": 0.4573290295333193, + "grad_norm": 0.6816751205115323, + "learning_rate": 1.1506997324011546e-05, + "loss": 1.004, + "step": 30320 + }, + { + "epoch": 0.45747986364596216, + "grad_norm": 0.7182672871034274, + "learning_rate": 1.1502265355489839e-05, + "loss": 0.9868, + "step": 30330 + }, + { + "epoch": 0.4576306977586051, + "grad_norm": 0.6995814649033818, + "learning_rate": 1.1497533042796228e-05, + "loss": 1.0077, + "step": 30340 + }, + { + "epoch": 0.457781531871248, + "grad_norm": 0.6570550923465507, + "learning_rate": 1.1492800387014898e-05, + "loss": 0.9951, + "step": 30350 + }, + { + "epoch": 0.4579323659838909, + "grad_norm": 0.6942828031918029, + "learning_rate": 1.1488067389230109e-05, + "loss": 1.013, + "step": 30360 + }, + { + "epoch": 0.4580832000965338, + "grad_norm": 0.6902580673829857, + "learning_rate": 1.1483334050526202e-05, + "loss": 1.0151, + "step": 30370 + }, + { + "epoch": 0.45823403420917674, + "grad_norm": 0.6649650594728665, + "learning_rate": 1.1478600371987596e-05, + "loss": 1.0069, + "step": 30380 + }, + { + "epoch": 0.4583848683218197, + "grad_norm": 0.6626147628960115, + "learning_rate": 1.1473866354698781e-05, + "loss": 1.0035, + "step": 30390 + }, + { + "epoch": 0.45853570243446257, + "grad_norm": 0.6736347657127861, + "learning_rate": 1.1469131999744334e-05, + "loss": 1.0112, + "step": 30400 + }, + { + "epoch": 0.4586865365471055, + "grad_norm": 0.6773215766517068, + "learning_rate": 1.1464397308208901e-05, + "loss": 1.0009, + "step": 30410 + }, + { + "epoch": 0.4588373706597484, + "grad_norm": 0.6557287338676488, + "learning_rate": 1.1459662281177209e-05, + "loss": 1.0054, + "step": 30420 + }, + { + "epoch": 0.45898820477239133, + "grad_norm": 0.6603145657988718, + "learning_rate": 1.1454926919734063e-05, + "loss": 0.9944, + "step": 30430 + }, + { + "epoch": 0.4591390388850342, + "grad_norm": 0.6470168320878046, + "learning_rate": 1.1450191224964349e-05, + "loss": 1.0044, + "step": 30440 + }, + { + "epoch": 0.45928987299767715, + "grad_norm": 0.6470555225041736, + "learning_rate": 1.1445455197953015e-05, + "loss": 1.0169, + "step": 30450 + }, + { + "epoch": 0.4594407071103201, + "grad_norm": 0.6529994651999049, + "learning_rate": 1.1440718839785094e-05, + "loss": 1.0021, + "step": 30460 + }, + { + "epoch": 0.459591541222963, + "grad_norm": 0.6515785948230183, + "learning_rate": 1.1435982151545706e-05, + "loss": 1.0079, + "step": 30470 + }, + { + "epoch": 0.4597423753356059, + "grad_norm": 0.6488770083515275, + "learning_rate": 1.1431245134320023e-05, + "loss": 0.9982, + "step": 30480 + }, + { + "epoch": 0.4598932094482488, + "grad_norm": 0.655150059597173, + "learning_rate": 1.1426507789193312e-05, + "loss": 1.0019, + "step": 30490 + }, + { + "epoch": 0.46004404356089174, + "grad_norm": 0.660030572286205, + "learning_rate": 1.1421770117250903e-05, + "loss": 1.0036, + "step": 30500 + }, + { + "epoch": 0.4601948776735346, + "grad_norm": 0.6724692956331678, + "learning_rate": 1.1417032119578213e-05, + "loss": 1.0139, + "step": 30510 + }, + { + "epoch": 0.46034571178617756, + "grad_norm": 0.6501911726553484, + "learning_rate": 1.141229379726072e-05, + "loss": 1.0145, + "step": 30520 + }, + { + "epoch": 0.4604965458988205, + "grad_norm": 0.6613108852729904, + "learning_rate": 1.1407555151383982e-05, + "loss": 0.9959, + "step": 30530 + }, + { + "epoch": 0.4606473800114634, + "grad_norm": 0.6431761302160381, + "learning_rate": 1.1402816183033638e-05, + "loss": 1.0141, + "step": 30540 + }, + { + "epoch": 0.4607982141241063, + "grad_norm": 0.6761950648254957, + "learning_rate": 1.139807689329539e-05, + "loss": 1.0137, + "step": 30550 + }, + { + "epoch": 0.4609490482367492, + "grad_norm": 0.6478012633180935, + "learning_rate": 1.139333728325502e-05, + "loss": 1.0079, + "step": 30560 + }, + { + "epoch": 0.46109988234939214, + "grad_norm": 0.6685304372185126, + "learning_rate": 1.138859735399838e-05, + "loss": 1.0104, + "step": 30570 + }, + { + "epoch": 0.461250716462035, + "grad_norm": 0.646053605448224, + "learning_rate": 1.1383857106611404e-05, + "loss": 0.9854, + "step": 30580 + }, + { + "epoch": 0.46140155057467797, + "grad_norm": 0.6710141726013358, + "learning_rate": 1.1379116542180084e-05, + "loss": 1.013, + "step": 30590 + }, + { + "epoch": 0.4615523846873209, + "grad_norm": 0.6514578129169207, + "learning_rate": 1.1374375661790493e-05, + "loss": 1.0059, + "step": 30600 + }, + { + "epoch": 0.4617032187999638, + "grad_norm": 0.7016182497907868, + "learning_rate": 1.1369634466528781e-05, + "loss": 1.0103, + "step": 30610 + }, + { + "epoch": 0.4618540529126067, + "grad_norm": 0.6405795744706863, + "learning_rate": 1.136489295748116e-05, + "loss": 1.0127, + "step": 30620 + }, + { + "epoch": 0.4620048870252496, + "grad_norm": 0.6572542097843118, + "learning_rate": 1.136015113573392e-05, + "loss": 0.9956, + "step": 30630 + }, + { + "epoch": 0.46215572113789255, + "grad_norm": 0.6777735679712056, + "learning_rate": 1.1355409002373423e-05, + "loss": 0.9898, + "step": 30640 + }, + { + "epoch": 0.46230655525053543, + "grad_norm": 0.7329386453746145, + "learning_rate": 1.1350666558486103e-05, + "loss": 0.9913, + "step": 30650 + }, + { + "epoch": 0.4624573893631784, + "grad_norm": 0.6733218855682604, + "learning_rate": 1.1345923805158458e-05, + "loss": 0.9879, + "step": 30660 + }, + { + "epoch": 0.4626082234758213, + "grad_norm": 0.6545870560047278, + "learning_rate": 1.1341180743477067e-05, + "loss": 1.0164, + "step": 30670 + }, + { + "epoch": 0.4627590575884642, + "grad_norm": 0.7098242638049536, + "learning_rate": 1.1336437374528569e-05, + "loss": 1.0023, + "step": 30680 + }, + { + "epoch": 0.46290989170110713, + "grad_norm": 0.7009275208363045, + "learning_rate": 1.1331693699399685e-05, + "loss": 1.0161, + "step": 30690 + }, + { + "epoch": 0.46306072581375, + "grad_norm": 0.6431592089750832, + "learning_rate": 1.1326949719177195e-05, + "loss": 0.9962, + "step": 30700 + }, + { + "epoch": 0.46321155992639296, + "grad_norm": 0.651761983577984, + "learning_rate": 1.1322205434947952e-05, + "loss": 1.0115, + "step": 30710 + }, + { + "epoch": 0.46336239403903584, + "grad_norm": 0.6885666638242076, + "learning_rate": 1.1317460847798894e-05, + "loss": 1.0084, + "step": 30720 + }, + { + "epoch": 0.4635132281516788, + "grad_norm": 0.6586479305665509, + "learning_rate": 1.1312715958817e-05, + "loss": 1.0087, + "step": 30730 + }, + { + "epoch": 0.4636640622643217, + "grad_norm": 0.6423331000412525, + "learning_rate": 1.130797076908934e-05, + "loss": 0.9905, + "step": 30740 + }, + { + "epoch": 0.4638148963769646, + "grad_norm": 0.644925432507295, + "learning_rate": 1.1303225279703048e-05, + "loss": 1.0064, + "step": 30750 + }, + { + "epoch": 0.46396573048960754, + "grad_norm": 0.6744634189658882, + "learning_rate": 1.129847949174532e-05, + "loss": 1.0081, + "step": 30760 + }, + { + "epoch": 0.4641165646022504, + "grad_norm": 0.6847549365915523, + "learning_rate": 1.129373340630343e-05, + "loss": 1.0183, + "step": 30770 + }, + { + "epoch": 0.46426739871489336, + "grad_norm": 0.6843098300447058, + "learning_rate": 1.1288987024464708e-05, + "loss": 1.0043, + "step": 30780 + }, + { + "epoch": 0.46441823282753625, + "grad_norm": 0.671395690586771, + "learning_rate": 1.1284240347316567e-05, + "loss": 1.0006, + "step": 30790 + }, + { + "epoch": 0.4645690669401792, + "grad_norm": 0.6533134282338396, + "learning_rate": 1.1279493375946475e-05, + "loss": 0.995, + "step": 30800 + }, + { + "epoch": 0.4647199010528221, + "grad_norm": 0.6395689992562787, + "learning_rate": 1.1274746111441977e-05, + "loss": 1.0156, + "step": 30810 + }, + { + "epoch": 0.464870735165465, + "grad_norm": 0.720929969399397, + "learning_rate": 1.1269998554890676e-05, + "loss": 1.002, + "step": 30820 + }, + { + "epoch": 0.46502156927810795, + "grad_norm": 0.666096208734155, + "learning_rate": 1.126525070738025e-05, + "loss": 0.9995, + "step": 30830 + }, + { + "epoch": 0.46517240339075083, + "grad_norm": 0.6343395158570897, + "learning_rate": 1.1260502569998437e-05, + "loss": 0.9963, + "step": 30840 + }, + { + "epoch": 0.46532323750339377, + "grad_norm": 0.6556883129877208, + "learning_rate": 1.1255754143833044e-05, + "loss": 1.0082, + "step": 30850 + }, + { + "epoch": 0.46547407161603666, + "grad_norm": 0.7067751624581715, + "learning_rate": 1.125100542997195e-05, + "loss": 0.9944, + "step": 30860 + }, + { + "epoch": 0.4656249057286796, + "grad_norm": 0.6400074508027728, + "learning_rate": 1.1246256429503086e-05, + "loss": 0.9977, + "step": 30870 + }, + { + "epoch": 0.46577573984132253, + "grad_norm": 0.6508675351788119, + "learning_rate": 1.1241507143514466e-05, + "loss": 0.9815, + "step": 30880 + }, + { + "epoch": 0.4659265739539654, + "grad_norm": 0.672820591648378, + "learning_rate": 1.1236757573094155e-05, + "loss": 0.9899, + "step": 30890 + }, + { + "epoch": 0.46607740806660836, + "grad_norm": 0.6575889787276519, + "learning_rate": 1.1232007719330294e-05, + "loss": 0.9919, + "step": 30900 + }, + { + "epoch": 0.46622824217925124, + "grad_norm": 0.7238332499494615, + "learning_rate": 1.1227257583311075e-05, + "loss": 1.0026, + "step": 30910 + }, + { + "epoch": 0.4663790762918942, + "grad_norm": 0.6552447103590051, + "learning_rate": 1.1222507166124773e-05, + "loss": 1.0217, + "step": 30920 + }, + { + "epoch": 0.46652991040453706, + "grad_norm": 0.6868920728140679, + "learning_rate": 1.121775646885971e-05, + "loss": 0.9887, + "step": 30930 + }, + { + "epoch": 0.46668074451718, + "grad_norm": 0.6448172782673858, + "learning_rate": 1.1213005492604285e-05, + "loss": 1.0095, + "step": 30940 + }, + { + "epoch": 0.46683157862982294, + "grad_norm": 0.6880425760551464, + "learning_rate": 1.1208254238446952e-05, + "loss": 1.0073, + "step": 30950 + }, + { + "epoch": 0.4669824127424658, + "grad_norm": 0.6882352445858303, + "learning_rate": 1.1203502707476235e-05, + "loss": 0.994, + "step": 30960 + }, + { + "epoch": 0.46713324685510876, + "grad_norm": 0.6622513501733773, + "learning_rate": 1.119875090078072e-05, + "loss": 0.9949, + "step": 30970 + }, + { + "epoch": 0.46728408096775165, + "grad_norm": 0.6716294358956653, + "learning_rate": 1.1193998819449053e-05, + "loss": 0.9981, + "step": 30980 + }, + { + "epoch": 0.4674349150803946, + "grad_norm": 0.6295785202239568, + "learning_rate": 1.1189246464569945e-05, + "loss": 1.0094, + "step": 30990 + }, + { + "epoch": 0.46758574919303747, + "grad_norm": 0.6777254176188708, + "learning_rate": 1.1184493837232173e-05, + "loss": 1.0062, + "step": 31000 + }, + { + "epoch": 0.4677365833056804, + "grad_norm": 0.6423164066586096, + "learning_rate": 1.1179740938524567e-05, + "loss": 1.0016, + "step": 31010 + }, + { + "epoch": 0.46788741741832335, + "grad_norm": 0.625423481408169, + "learning_rate": 1.117498776953603e-05, + "loss": 1.024, + "step": 31020 + }, + { + "epoch": 0.46803825153096623, + "grad_norm": 0.6534571115540463, + "learning_rate": 1.1170234331355521e-05, + "loss": 1.0008, + "step": 31030 + }, + { + "epoch": 0.46818908564360917, + "grad_norm": 0.6354538209002367, + "learning_rate": 1.1165480625072063e-05, + "loss": 0.9831, + "step": 31040 + }, + { + "epoch": 0.46833991975625205, + "grad_norm": 0.6392816682598482, + "learning_rate": 1.1160726651774735e-05, + "loss": 0.9812, + "step": 31050 + }, + { + "epoch": 0.468490753868895, + "grad_norm": 0.6602494071810797, + "learning_rate": 1.1155972412552687e-05, + "loss": 0.9909, + "step": 31060 + }, + { + "epoch": 0.4686415879815379, + "grad_norm": 0.6577188311139124, + "learning_rate": 1.1151217908495124e-05, + "loss": 0.9882, + "step": 31070 + }, + { + "epoch": 0.4687924220941808, + "grad_norm": 0.6278814379908018, + "learning_rate": 1.1146463140691308e-05, + "loss": 1.0083, + "step": 31080 + }, + { + "epoch": 0.46894325620682376, + "grad_norm": 0.6430768240070626, + "learning_rate": 1.1141708110230566e-05, + "loss": 1.0109, + "step": 31090 + }, + { + "epoch": 0.46909409031946664, + "grad_norm": 0.6343427616738532, + "learning_rate": 1.113695281820229e-05, + "loss": 0.9969, + "step": 31100 + }, + { + "epoch": 0.4692449244321096, + "grad_norm": 0.6796272411432887, + "learning_rate": 1.1132197265695921e-05, + "loss": 1.0133, + "step": 31110 + }, + { + "epoch": 0.46939575854475246, + "grad_norm": 0.6920755097435157, + "learning_rate": 1.1127441453800964e-05, + "loss": 0.9928, + "step": 31120 + }, + { + "epoch": 0.4695465926573954, + "grad_norm": 0.6583473020722604, + "learning_rate": 1.1122685383606993e-05, + "loss": 1.0001, + "step": 31130 + }, + { + "epoch": 0.4696974267700383, + "grad_norm": 0.6849505646815802, + "learning_rate": 1.1117929056203626e-05, + "loss": 0.9962, + "step": 31140 + }, + { + "epoch": 0.4698482608826812, + "grad_norm": 0.7208460551123579, + "learning_rate": 1.1113172472680548e-05, + "loss": 0.9767, + "step": 31150 + }, + { + "epoch": 0.46999909499532416, + "grad_norm": 0.7441518541848056, + "learning_rate": 1.1108415634127502e-05, + "loss": 1.0004, + "step": 31160 + }, + { + "epoch": 0.47014992910796705, + "grad_norm": 0.6115274230641814, + "learning_rate": 1.110365854163429e-05, + "loss": 1.0008, + "step": 31170 + }, + { + "epoch": 0.47030076322061, + "grad_norm": 0.6564113443308366, + "learning_rate": 1.1098901196290772e-05, + "loss": 0.9802, + "step": 31180 + }, + { + "epoch": 0.47045159733325287, + "grad_norm": 0.6644361204552283, + "learning_rate": 1.1094143599186862e-05, + "loss": 1.004, + "step": 31190 + }, + { + "epoch": 0.4706024314458958, + "grad_norm": 0.6704433385959152, + "learning_rate": 1.1089385751412534e-05, + "loss": 1.0064, + "step": 31200 + }, + { + "epoch": 0.4707532655585387, + "grad_norm": 0.6578297149958987, + "learning_rate": 1.1084627654057823e-05, + "loss": 1.0212, + "step": 31210 + }, + { + "epoch": 0.47090409967118163, + "grad_norm": 0.6629060591301206, + "learning_rate": 1.1079869308212817e-05, + "loss": 1.0106, + "step": 31220 + }, + { + "epoch": 0.47105493378382457, + "grad_norm": 0.6776013770624474, + "learning_rate": 1.107511071496766e-05, + "loss": 0.9959, + "step": 31230 + }, + { + "epoch": 0.47120576789646745, + "grad_norm": 0.6600473631909012, + "learning_rate": 1.1070351875412562e-05, + "loss": 1.0074, + "step": 31240 + }, + { + "epoch": 0.4713566020091104, + "grad_norm": 0.6817160581175611, + "learning_rate": 1.1065592790637778e-05, + "loss": 0.9868, + "step": 31250 + }, + { + "epoch": 0.4715074361217533, + "grad_norm": 0.6695429039733994, + "learning_rate": 1.106083346173362e-05, + "loss": 0.9865, + "step": 31260 + }, + { + "epoch": 0.4716582702343962, + "grad_norm": 0.687189722576509, + "learning_rate": 1.1056073889790464e-05, + "loss": 1.006, + "step": 31270 + }, + { + "epoch": 0.4718091043470391, + "grad_norm": 0.6816357976869212, + "learning_rate": 1.1051314075898737e-05, + "loss": 1.0112, + "step": 31280 + }, + { + "epoch": 0.47195993845968204, + "grad_norm": 0.6563301275920346, + "learning_rate": 1.1046554021148918e-05, + "loss": 0.984, + "step": 31290 + }, + { + "epoch": 0.472110772572325, + "grad_norm": 0.6405204113404191, + "learning_rate": 1.104179372663155e-05, + "loss": 0.9806, + "step": 31300 + }, + { + "epoch": 0.47226160668496786, + "grad_norm": 0.6508302632229462, + "learning_rate": 1.103703319343722e-05, + "loss": 1.015, + "step": 31310 + }, + { + "epoch": 0.4724124407976108, + "grad_norm": 0.6923789388110632, + "learning_rate": 1.1032272422656582e-05, + "loss": 0.9982, + "step": 31320 + }, + { + "epoch": 0.4725632749102537, + "grad_norm": 0.6615176240234714, + "learning_rate": 1.1027511415380335e-05, + "loss": 0.9807, + "step": 31330 + }, + { + "epoch": 0.4727141090228966, + "grad_norm": 0.7361577887415104, + "learning_rate": 1.102275017269923e-05, + "loss": 1.001, + "step": 31340 + }, + { + "epoch": 0.4728649431355395, + "grad_norm": 0.696843460157405, + "learning_rate": 1.1017988695704083e-05, + "loss": 1.0272, + "step": 31350 + }, + { + "epoch": 0.47301577724818245, + "grad_norm": 0.6541838962415797, + "learning_rate": 1.1013226985485757e-05, + "loss": 1.0126, + "step": 31360 + }, + { + "epoch": 0.4731666113608254, + "grad_norm": 0.6366985966467821, + "learning_rate": 1.1008465043135164e-05, + "loss": 0.9979, + "step": 31370 + }, + { + "epoch": 0.47331744547346827, + "grad_norm": 0.6570554618635793, + "learning_rate": 1.100370286974328e-05, + "loss": 1.0016, + "step": 31380 + }, + { + "epoch": 0.4734682795861112, + "grad_norm": 0.7142212026828937, + "learning_rate": 1.0998940466401127e-05, + "loss": 1.013, + "step": 31390 + }, + { + "epoch": 0.4736191136987541, + "grad_norm": 0.6450623143729491, + "learning_rate": 1.0994177834199779e-05, + "loss": 0.9966, + "step": 31400 + }, + { + "epoch": 0.47376994781139703, + "grad_norm": 0.6438347070064984, + "learning_rate": 1.0989414974230364e-05, + "loss": 0.9982, + "step": 31410 + }, + { + "epoch": 0.4739207819240399, + "grad_norm": 0.658703880614826, + "learning_rate": 1.0984651887584063e-05, + "loss": 1.0017, + "step": 31420 + }, + { + "epoch": 0.47407161603668285, + "grad_norm": 0.6375358577414119, + "learning_rate": 1.0979888575352114e-05, + "loss": 1.0029, + "step": 31430 + }, + { + "epoch": 0.4742224501493258, + "grad_norm": 0.6650924603715491, + "learning_rate": 1.0975125038625791e-05, + "loss": 0.9864, + "step": 31440 + }, + { + "epoch": 0.4743732842619687, + "grad_norm": 0.6844687838282264, + "learning_rate": 1.0970361278496435e-05, + "loss": 0.9972, + "step": 31450 + }, + { + "epoch": 0.4745241183746116, + "grad_norm": 0.6365585587031751, + "learning_rate": 1.0965597296055434e-05, + "loss": 0.9881, + "step": 31460 + }, + { + "epoch": 0.4746749524872545, + "grad_norm": 0.6947503798784943, + "learning_rate": 1.0960833092394222e-05, + "loss": 0.9849, + "step": 31470 + }, + { + "epoch": 0.47482578659989744, + "grad_norm": 0.6598721000562484, + "learning_rate": 1.0956068668604289e-05, + "loss": 1.0006, + "step": 31480 + }, + { + "epoch": 0.4749766207125403, + "grad_norm": 0.668780601864993, + "learning_rate": 1.0951304025777175e-05, + "loss": 1.004, + "step": 31490 + }, + { + "epoch": 0.47512745482518326, + "grad_norm": 0.8152939043154728, + "learning_rate": 1.0946539165004469e-05, + "loss": 1.0148, + "step": 31500 + }, + { + "epoch": 0.4752782889378262, + "grad_norm": 0.6739324379746514, + "learning_rate": 1.0941774087377806e-05, + "loss": 1.0, + "step": 31510 + }, + { + "epoch": 0.4754291230504691, + "grad_norm": 0.6522130439533124, + "learning_rate": 1.0937008793988875e-05, + "loss": 0.9981, + "step": 31520 + }, + { + "epoch": 0.475579957163112, + "grad_norm": 0.7014480339708147, + "learning_rate": 1.0932243285929418e-05, + "loss": 0.9954, + "step": 31530 + }, + { + "epoch": 0.4757307912757549, + "grad_norm": 0.648232517819773, + "learning_rate": 1.092747756429122e-05, + "loss": 0.9768, + "step": 31540 + }, + { + "epoch": 0.47588162538839784, + "grad_norm": 0.6688599154235035, + "learning_rate": 1.0922711630166116e-05, + "loss": 1.0104, + "step": 31550 + }, + { + "epoch": 0.47603245950104073, + "grad_norm": 0.6905890142215776, + "learning_rate": 1.0917945484645993e-05, + "loss": 0.9972, + "step": 31560 + }, + { + "epoch": 0.47618329361368367, + "grad_norm": 0.689752020275917, + "learning_rate": 1.0913179128822785e-05, + "loss": 0.9983, + "step": 31570 + }, + { + "epoch": 0.4763341277263266, + "grad_norm": 0.6833314301459718, + "learning_rate": 1.0908412563788469e-05, + "loss": 1.0036, + "step": 31580 + }, + { + "epoch": 0.4764849618389695, + "grad_norm": 0.6593429004298372, + "learning_rate": 1.0903645790635076e-05, + "loss": 1.013, + "step": 31590 + }, + { + "epoch": 0.47663579595161243, + "grad_norm": 0.6391667221064093, + "learning_rate": 1.0898878810454685e-05, + "loss": 0.9845, + "step": 31600 + }, + { + "epoch": 0.4767866300642553, + "grad_norm": 0.6489079817047378, + "learning_rate": 1.0894111624339418e-05, + "loss": 0.993, + "step": 31610 + }, + { + "epoch": 0.47693746417689825, + "grad_norm": 0.6708315114122186, + "learning_rate": 1.0889344233381444e-05, + "loss": 0.9783, + "step": 31620 + }, + { + "epoch": 0.47708829828954114, + "grad_norm": 0.6512002465981782, + "learning_rate": 1.0884576638672988e-05, + "loss": 1.0008, + "step": 31630 + }, + { + "epoch": 0.4772391324021841, + "grad_norm": 0.6420035859329198, + "learning_rate": 1.0879808841306314e-05, + "loss": 1.0192, + "step": 31640 + }, + { + "epoch": 0.477389966514827, + "grad_norm": 0.6575040516409747, + "learning_rate": 1.087504084237373e-05, + "loss": 0.9869, + "step": 31650 + }, + { + "epoch": 0.4775408006274699, + "grad_norm": 0.6615635948766404, + "learning_rate": 1.0870272642967596e-05, + "loss": 0.9972, + "step": 31660 + }, + { + "epoch": 0.47769163474011284, + "grad_norm": 0.6510262003371345, + "learning_rate": 1.0865504244180316e-05, + "loss": 0.9905, + "step": 31670 + }, + { + "epoch": 0.4778424688527557, + "grad_norm": 0.6859144004361193, + "learning_rate": 1.0860735647104337e-05, + "loss": 0.9925, + "step": 31680 + }, + { + "epoch": 0.47799330296539866, + "grad_norm": 0.7555317824237755, + "learning_rate": 1.0855966852832156e-05, + "loss": 1.0175, + "step": 31690 + }, + { + "epoch": 0.47814413707804154, + "grad_norm": 0.6447877614738826, + "learning_rate": 1.0851197862456312e-05, + "loss": 0.9915, + "step": 31700 + }, + { + "epoch": 0.4782949711906845, + "grad_norm": 0.722100186815051, + "learning_rate": 1.084642867706939e-05, + "loss": 1.0047, + "step": 31710 + }, + { + "epoch": 0.4784458053033274, + "grad_norm": 0.610888370593979, + "learning_rate": 1.084165929776402e-05, + "loss": 1.002, + "step": 31720 + }, + { + "epoch": 0.4785966394159703, + "grad_norm": 0.6891074836830926, + "learning_rate": 1.0836889725632874e-05, + "loss": 1.0041, + "step": 31730 + }, + { + "epoch": 0.47874747352861324, + "grad_norm": 0.6455477025360278, + "learning_rate": 1.0832119961768679e-05, + "loss": 1.0068, + "step": 31740 + }, + { + "epoch": 0.4788983076412561, + "grad_norm": 0.6380614963415331, + "learning_rate": 1.0827350007264186e-05, + "loss": 0.9943, + "step": 31750 + }, + { + "epoch": 0.47904914175389907, + "grad_norm": 0.6637504628011073, + "learning_rate": 1.0822579863212204e-05, + "loss": 0.9908, + "step": 31760 + }, + { + "epoch": 0.47919997586654195, + "grad_norm": 0.7899945949296222, + "learning_rate": 1.0817809530705582e-05, + "loss": 0.9931, + "step": 31770 + }, + { + "epoch": 0.4793508099791849, + "grad_norm": 0.7086269206282109, + "learning_rate": 1.0813039010837217e-05, + "loss": 0.9857, + "step": 31780 + }, + { + "epoch": 0.47950164409182783, + "grad_norm": 0.6817288970878912, + "learning_rate": 1.080826830470004e-05, + "loss": 0.9967, + "step": 31790 + }, + { + "epoch": 0.4796524782044707, + "grad_norm": 0.6539930437915229, + "learning_rate": 1.080349741338703e-05, + "loss": 0.9831, + "step": 31800 + }, + { + "epoch": 0.47980331231711365, + "grad_norm": 0.6941932024439308, + "learning_rate": 1.079872633799121e-05, + "loss": 1.0026, + "step": 31810 + }, + { + "epoch": 0.47995414642975653, + "grad_norm": 0.6894598868905929, + "learning_rate": 1.0793955079605638e-05, + "loss": 1.0023, + "step": 31820 + }, + { + "epoch": 0.4801049805423995, + "grad_norm": 0.6160107433949146, + "learning_rate": 1.078918363932342e-05, + "loss": 0.9891, + "step": 31830 + }, + { + "epoch": 0.48025581465504236, + "grad_norm": 0.7223258124753478, + "learning_rate": 1.0784412018237704e-05, + "loss": 1.0073, + "step": 31840 + }, + { + "epoch": 0.4804066487676853, + "grad_norm": 0.6822518461655718, + "learning_rate": 1.0779640217441677e-05, + "loss": 1.0235, + "step": 31850 + }, + { + "epoch": 0.48055748288032824, + "grad_norm": 0.6895011476020141, + "learning_rate": 1.0774868238028565e-05, + "loss": 0.9972, + "step": 31860 + }, + { + "epoch": 0.4807083169929711, + "grad_norm": 0.6392579483505044, + "learning_rate": 1.0770096081091644e-05, + "loss": 1.0202, + "step": 31870 + }, + { + "epoch": 0.48085915110561406, + "grad_norm": 0.6572996812497259, + "learning_rate": 1.0765323747724217e-05, + "loss": 0.9918, + "step": 31880 + }, + { + "epoch": 0.48100998521825694, + "grad_norm": 0.6625797859693447, + "learning_rate": 1.0760551239019639e-05, + "loss": 1.0053, + "step": 31890 + }, + { + "epoch": 0.4811608193308999, + "grad_norm": 0.6596979708499129, + "learning_rate": 1.0755778556071298e-05, + "loss": 0.9785, + "step": 31900 + }, + { + "epoch": 0.48131165344354276, + "grad_norm": 0.665902081424081, + "learning_rate": 1.075100569997263e-05, + "loss": 1.0101, + "step": 31910 + }, + { + "epoch": 0.4814624875561857, + "grad_norm": 0.6261268632061656, + "learning_rate": 1.0746232671817106e-05, + "loss": 1.0083, + "step": 31920 + }, + { + "epoch": 0.48161332166882864, + "grad_norm": 0.712989509201627, + "learning_rate": 1.0741459472698225e-05, + "loss": 0.9778, + "step": 31930 + }, + { + "epoch": 0.4817641557814715, + "grad_norm": 0.7223194018556074, + "learning_rate": 1.0736686103709549e-05, + "loss": 0.9879, + "step": 31940 + }, + { + "epoch": 0.48191498989411446, + "grad_norm": 0.6799132441959959, + "learning_rate": 1.0731912565944658e-05, + "loss": 1.0078, + "step": 31950 + }, + { + "epoch": 0.48206582400675735, + "grad_norm": 0.6559156634241162, + "learning_rate": 1.0727138860497183e-05, + "loss": 0.9986, + "step": 31960 + }, + { + "epoch": 0.4822166581194003, + "grad_norm": 0.6276002964335992, + "learning_rate": 1.0722364988460787e-05, + "loss": 0.9853, + "step": 31970 + }, + { + "epoch": 0.48236749223204317, + "grad_norm": 0.6421139617199075, + "learning_rate": 1.0717590950929175e-05, + "loss": 0.9909, + "step": 31980 + }, + { + "epoch": 0.4825183263446861, + "grad_norm": 0.6753472848897186, + "learning_rate": 1.071281674899609e-05, + "loss": 0.9934, + "step": 31990 + }, + { + "epoch": 0.48266916045732905, + "grad_norm": 0.6690187406183681, + "learning_rate": 1.0708042383755307e-05, + "loss": 0.9967, + "step": 32000 + }, + { + "epoch": 0.48281999456997193, + "grad_norm": 0.6763313138947558, + "learning_rate": 1.070326785630064e-05, + "loss": 1.0241, + "step": 32010 + }, + { + "epoch": 0.48297082868261487, + "grad_norm": 0.6501468196280137, + "learning_rate": 1.069849316772595e-05, + "loss": 0.9699, + "step": 32020 + }, + { + "epoch": 0.48312166279525776, + "grad_norm": 0.6645322089652861, + "learning_rate": 1.069371831912512e-05, + "loss": 1.0, + "step": 32030 + }, + { + "epoch": 0.4832724969079007, + "grad_norm": 0.637981784462506, + "learning_rate": 1.0688943311592082e-05, + "loss": 0.9975, + "step": 32040 + }, + { + "epoch": 0.4834233310205436, + "grad_norm": 0.643395114913876, + "learning_rate": 1.0684168146220796e-05, + "loss": 0.9947, + "step": 32050 + }, + { + "epoch": 0.4835741651331865, + "grad_norm": 0.6779030577851296, + "learning_rate": 1.0679392824105268e-05, + "loss": 0.996, + "step": 32060 + }, + { + "epoch": 0.48372499924582946, + "grad_norm": 0.6253848151216959, + "learning_rate": 1.0674617346339527e-05, + "loss": 0.9915, + "step": 32070 + }, + { + "epoch": 0.48387583335847234, + "grad_norm": 0.6748756205720792, + "learning_rate": 1.0669841714017646e-05, + "loss": 0.9885, + "step": 32080 + }, + { + "epoch": 0.4840266674711153, + "grad_norm": 0.6629802669230057, + "learning_rate": 1.0665065928233732e-05, + "loss": 1.0147, + "step": 32090 + }, + { + "epoch": 0.48417750158375816, + "grad_norm": 0.6170771840331436, + "learning_rate": 1.066028999008193e-05, + "loss": 0.9906, + "step": 32100 + }, + { + "epoch": 0.4843283356964011, + "grad_norm": 0.6610376702353391, + "learning_rate": 1.0655513900656406e-05, + "loss": 0.9822, + "step": 32110 + }, + { + "epoch": 0.484479169809044, + "grad_norm": 0.6403873979495898, + "learning_rate": 1.0650737661051384e-05, + "loss": 0.9886, + "step": 32120 + }, + { + "epoch": 0.4846300039216869, + "grad_norm": 0.6649564040532417, + "learning_rate": 1.0645961272361102e-05, + "loss": 1.0091, + "step": 32130 + }, + { + "epoch": 0.48478083803432986, + "grad_norm": 0.7182086416278612, + "learning_rate": 1.0641184735679843e-05, + "loss": 0.9928, + "step": 32140 + }, + { + "epoch": 0.48493167214697275, + "grad_norm": 0.6219648642096236, + "learning_rate": 1.0636408052101924e-05, + "loss": 1.0042, + "step": 32150 + }, + { + "epoch": 0.4850825062596157, + "grad_norm": 0.6939735486131948, + "learning_rate": 1.0631631222721684e-05, + "loss": 0.9989, + "step": 32160 + }, + { + "epoch": 0.48523334037225857, + "grad_norm": 0.6572743798984825, + "learning_rate": 1.0626854248633512e-05, + "loss": 0.999, + "step": 32170 + }, + { + "epoch": 0.4853841744849015, + "grad_norm": 0.7105660636225869, + "learning_rate": 1.0622077130931813e-05, + "loss": 1.0063, + "step": 32180 + }, + { + "epoch": 0.4855350085975444, + "grad_norm": 0.7202195264506362, + "learning_rate": 1.0617299870711045e-05, + "loss": 1.0022, + "step": 32190 + }, + { + "epoch": 0.48568584271018733, + "grad_norm": 0.6455767515297582, + "learning_rate": 1.0612522469065678e-05, + "loss": 1.0011, + "step": 32200 + }, + { + "epoch": 0.48583667682283027, + "grad_norm": 0.6653378853045434, + "learning_rate": 1.060774492709023e-05, + "loss": 0.9855, + "step": 32210 + }, + { + "epoch": 0.48598751093547315, + "grad_norm": 0.6754557563082927, + "learning_rate": 1.0602967245879241e-05, + "loss": 0.9912, + "step": 32220 + }, + { + "epoch": 0.4861383450481161, + "grad_norm": 0.6502685176127756, + "learning_rate": 1.059818942652729e-05, + "loss": 0.9841, + "step": 32230 + }, + { + "epoch": 0.486289179160759, + "grad_norm": 0.6453999765233445, + "learning_rate": 1.0593411470128987e-05, + "loss": 0.9863, + "step": 32240 + }, + { + "epoch": 0.4864400132734019, + "grad_norm": 0.7075538301809482, + "learning_rate": 1.0588633377778965e-05, + "loss": 0.9928, + "step": 32250 + }, + { + "epoch": 0.4865908473860448, + "grad_norm": 0.714489923478518, + "learning_rate": 1.0583855150571896e-05, + "loss": 1.0275, + "step": 32260 + }, + { + "epoch": 0.48674168149868774, + "grad_norm": 0.6228560154593781, + "learning_rate": 1.0579076789602485e-05, + "loss": 1.0007, + "step": 32270 + }, + { + "epoch": 0.4868925156113307, + "grad_norm": 0.7037425480496695, + "learning_rate": 1.0574298295965457e-05, + "loss": 1.0035, + "step": 32280 + }, + { + "epoch": 0.48704334972397356, + "grad_norm": 0.6859600390569733, + "learning_rate": 1.0569519670755577e-05, + "loss": 0.9842, + "step": 32290 + }, + { + "epoch": 0.4871941838366165, + "grad_norm": 0.7042030526439507, + "learning_rate": 1.0564740915067642e-05, + "loss": 0.9929, + "step": 32300 + }, + { + "epoch": 0.4873450179492594, + "grad_norm": 0.6456973699946672, + "learning_rate": 1.055996202999647e-05, + "loss": 1.0227, + "step": 32310 + }, + { + "epoch": 0.4874958520619023, + "grad_norm": 0.6279938748718678, + "learning_rate": 1.0555183016636912e-05, + "loss": 0.9738, + "step": 32320 + }, + { + "epoch": 0.4876466861745452, + "grad_norm": 0.6321310604756823, + "learning_rate": 1.055040387608385e-05, + "loss": 1.0031, + "step": 32330 + }, + { + "epoch": 0.48779752028718815, + "grad_norm": 0.6565602595830288, + "learning_rate": 1.05456246094322e-05, + "loss": 1.002, + "step": 32340 + }, + { + "epoch": 0.4879483543998311, + "grad_norm": 0.6794032548879376, + "learning_rate": 1.054084521777689e-05, + "loss": 1.0214, + "step": 32350 + }, + { + "epoch": 0.48809918851247397, + "grad_norm": 0.6637574432693676, + "learning_rate": 1.0536065702212895e-05, + "loss": 0.9879, + "step": 32360 + }, + { + "epoch": 0.4882500226251169, + "grad_norm": 0.6342922060888192, + "learning_rate": 1.0531286063835209e-05, + "loss": 0.9989, + "step": 32370 + }, + { + "epoch": 0.4884008567377598, + "grad_norm": 0.6447752132572083, + "learning_rate": 1.0526506303738859e-05, + "loss": 0.997, + "step": 32380 + }, + { + "epoch": 0.48855169085040273, + "grad_norm": 0.6377744035643749, + "learning_rate": 1.0521726423018897e-05, + "loss": 1.0117, + "step": 32390 + }, + { + "epoch": 0.4887025249630456, + "grad_norm": 0.6850384297737594, + "learning_rate": 1.05169464227704e-05, + "loss": 0.9738, + "step": 32400 + }, + { + "epoch": 0.48885335907568855, + "grad_norm": 0.7191367729678281, + "learning_rate": 1.0512166304088478e-05, + "loss": 0.9927, + "step": 32410 + }, + { + "epoch": 0.4890041931883315, + "grad_norm": 0.6520469604426805, + "learning_rate": 1.0507386068068264e-05, + "loss": 1.0078, + "step": 32420 + }, + { + "epoch": 0.4891550273009744, + "grad_norm": 0.6502657798857248, + "learning_rate": 1.0502605715804922e-05, + "loss": 1.016, + "step": 32430 + }, + { + "epoch": 0.4893058614136173, + "grad_norm": 0.6702835530192756, + "learning_rate": 1.0497825248393635e-05, + "loss": 0.9955, + "step": 32440 + }, + { + "epoch": 0.4894566955262602, + "grad_norm": 0.7486460734814819, + "learning_rate": 1.0493044666929619e-05, + "loss": 0.9889, + "step": 32450 + }, + { + "epoch": 0.48960752963890314, + "grad_norm": 0.710263261956527, + "learning_rate": 1.0488263972508117e-05, + "loss": 0.9984, + "step": 32460 + }, + { + "epoch": 0.489758363751546, + "grad_norm": 0.6711851532570609, + "learning_rate": 1.0483483166224396e-05, + "loss": 0.9811, + "step": 32470 + }, + { + "epoch": 0.48990919786418896, + "grad_norm": 0.6429442933143986, + "learning_rate": 1.0478702249173746e-05, + "loss": 0.9937, + "step": 32480 + }, + { + "epoch": 0.4900600319768319, + "grad_norm": 0.655375263383407, + "learning_rate": 1.0473921222451481e-05, + "loss": 0.9797, + "step": 32490 + }, + { + "epoch": 0.4902108660894748, + "grad_norm": 0.6873163035809295, + "learning_rate": 1.0469140087152948e-05, + "loss": 1.0054, + "step": 32500 + }, + { + "epoch": 0.4903617002021177, + "grad_norm": 0.6544764899160442, + "learning_rate": 1.0464358844373514e-05, + "loss": 0.9806, + "step": 32510 + }, + { + "epoch": 0.4905125343147606, + "grad_norm": 0.6308074447669019, + "learning_rate": 1.0459577495208573e-05, + "loss": 1.0061, + "step": 32520 + }, + { + "epoch": 0.49066336842740355, + "grad_norm": 0.6488285017126064, + "learning_rate": 1.0454796040753534e-05, + "loss": 0.9747, + "step": 32530 + }, + { + "epoch": 0.49081420254004643, + "grad_norm": 0.6636533787533357, + "learning_rate": 1.0450014482103838e-05, + "loss": 0.9834, + "step": 32540 + }, + { + "epoch": 0.49096503665268937, + "grad_norm": 0.6930041437821842, + "learning_rate": 1.0445232820354962e-05, + "loss": 1.0081, + "step": 32550 + }, + { + "epoch": 0.4911158707653323, + "grad_norm": 0.7474924845592986, + "learning_rate": 1.044045105660238e-05, + "loss": 0.98, + "step": 32560 + }, + { + "epoch": 0.4912667048779752, + "grad_norm": 0.6869884301146393, + "learning_rate": 1.043566919194161e-05, + "loss": 0.991, + "step": 32570 + }, + { + "epoch": 0.49141753899061813, + "grad_norm": 0.6899036327990437, + "learning_rate": 1.0430887227468185e-05, + "loss": 0.9948, + "step": 32580 + }, + { + "epoch": 0.491568373103261, + "grad_norm": 0.6615783985039265, + "learning_rate": 1.0426105164277663e-05, + "loss": 1.0141, + "step": 32590 + }, + { + "epoch": 0.49171920721590395, + "grad_norm": 0.6498021142656142, + "learning_rate": 1.0421323003465623e-05, + "loss": 1.0119, + "step": 32600 + }, + { + "epoch": 0.49187004132854684, + "grad_norm": 0.6180167753169441, + "learning_rate": 1.0416540746127666e-05, + "loss": 0.9963, + "step": 32610 + }, + { + "epoch": 0.4920208754411898, + "grad_norm": 0.6792951779747034, + "learning_rate": 1.0411758393359417e-05, + "loss": 0.9942, + "step": 32620 + }, + { + "epoch": 0.4921717095538327, + "grad_norm": 0.6537408883713645, + "learning_rate": 1.0406975946256525e-05, + "loss": 1.0023, + "step": 32630 + }, + { + "epoch": 0.4923225436664756, + "grad_norm": 0.6441946697413862, + "learning_rate": 1.0402193405914656e-05, + "loss": 0.9882, + "step": 32640 + }, + { + "epoch": 0.49247337777911854, + "grad_norm": 0.6550018819361562, + "learning_rate": 1.0397410773429496e-05, + "loss": 0.9923, + "step": 32650 + }, + { + "epoch": 0.4926242118917614, + "grad_norm": 0.7307599027002034, + "learning_rate": 1.0392628049896765e-05, + "loss": 1.0196, + "step": 32660 + }, + { + "epoch": 0.49277504600440436, + "grad_norm": 0.6621430932177765, + "learning_rate": 1.0387845236412183e-05, + "loss": 0.9827, + "step": 32670 + }, + { + "epoch": 0.49292588011704724, + "grad_norm": 0.6512358849472679, + "learning_rate": 1.038306233407151e-05, + "loss": 0.9887, + "step": 32680 + }, + { + "epoch": 0.4930767142296902, + "grad_norm": 0.646312688792931, + "learning_rate": 1.0378279343970511e-05, + "loss": 0.9922, + "step": 32690 + }, + { + "epoch": 0.4932275483423331, + "grad_norm": 3.2166422692212637, + "learning_rate": 1.0373496267204985e-05, + "loss": 1.0236, + "step": 32700 + }, + { + "epoch": 0.493378382454976, + "grad_norm": 0.6833005803237444, + "learning_rate": 1.0368713104870743e-05, + "loss": 1.0006, + "step": 32710 + }, + { + "epoch": 0.49352921656761894, + "grad_norm": 0.717665373365784, + "learning_rate": 1.036392985806361e-05, + "loss": 0.9818, + "step": 32720 + }, + { + "epoch": 0.49368005068026183, + "grad_norm": 0.6651807980724239, + "learning_rate": 1.0359146527879448e-05, + "loss": 0.9882, + "step": 32730 + }, + { + "epoch": 0.49383088479290477, + "grad_norm": 0.6554160182262226, + "learning_rate": 1.0354363115414118e-05, + "loss": 1.0001, + "step": 32740 + }, + { + "epoch": 0.49398171890554765, + "grad_norm": 0.6395377868673514, + "learning_rate": 1.0349579621763513e-05, + "loss": 0.9842, + "step": 32750 + }, + { + "epoch": 0.4941325530181906, + "grad_norm": 0.6589216021558001, + "learning_rate": 1.0344796048023541e-05, + "loss": 0.9988, + "step": 32760 + }, + { + "epoch": 0.49428338713083353, + "grad_norm": 0.6448916456852855, + "learning_rate": 1.0340012395290126e-05, + "loss": 0.9885, + "step": 32770 + }, + { + "epoch": 0.4944342212434764, + "grad_norm": 0.6619126450443859, + "learning_rate": 1.0335228664659214e-05, + "loss": 1.0132, + "step": 32780 + }, + { + "epoch": 0.49458505535611935, + "grad_norm": 0.6612164911298921, + "learning_rate": 1.0330444857226762e-05, + "loss": 0.9953, + "step": 32790 + }, + { + "epoch": 0.49473588946876224, + "grad_norm": 0.6964741243468222, + "learning_rate": 1.032566097408876e-05, + "loss": 1.0014, + "step": 32800 + }, + { + "epoch": 0.4948867235814052, + "grad_norm": 0.7249337648904006, + "learning_rate": 1.0320877016341195e-05, + "loss": 0.9984, + "step": 32810 + }, + { + "epoch": 0.49503755769404806, + "grad_norm": 0.6607185192686346, + "learning_rate": 1.0316092985080086e-05, + "loss": 0.9924, + "step": 32820 + }, + { + "epoch": 0.495188391806691, + "grad_norm": 0.7327424996757631, + "learning_rate": 1.031130888140146e-05, + "loss": 1.0019, + "step": 32830 + }, + { + "epoch": 0.49533922591933394, + "grad_norm": 0.6586171582075501, + "learning_rate": 1.030652470640137e-05, + "loss": 0.9927, + "step": 32840 + }, + { + "epoch": 0.4954900600319768, + "grad_norm": 0.6519890093975456, + "learning_rate": 1.0301740461175875e-05, + "loss": 0.9904, + "step": 32850 + }, + { + "epoch": 0.49564089414461976, + "grad_norm": 0.6772882442300793, + "learning_rate": 1.0296956146821055e-05, + "loss": 0.9755, + "step": 32860 + }, + { + "epoch": 0.49579172825726264, + "grad_norm": 0.6481151651879454, + "learning_rate": 1.029217176443301e-05, + "loss": 0.978, + "step": 32870 + }, + { + "epoch": 0.4959425623699056, + "grad_norm": 0.6478226601875867, + "learning_rate": 1.0287387315107846e-05, + "loss": 0.9801, + "step": 32880 + }, + { + "epoch": 0.49609339648254847, + "grad_norm": 0.6688109802474821, + "learning_rate": 1.0282602799941692e-05, + "loss": 0.9924, + "step": 32890 + }, + { + "epoch": 0.4962442305951914, + "grad_norm": 0.6453201014355273, + "learning_rate": 1.0277818220030689e-05, + "loss": 0.988, + "step": 32900 + }, + { + "epoch": 0.49639506470783434, + "grad_norm": 0.7928885980826975, + "learning_rate": 1.0273033576470998e-05, + "loss": 0.9857, + "step": 32910 + }, + { + "epoch": 0.4965458988204772, + "grad_norm": 0.6753712190345057, + "learning_rate": 1.0268248870358784e-05, + "loss": 1.0036, + "step": 32920 + }, + { + "epoch": 0.49669673293312017, + "grad_norm": 0.6504847453226433, + "learning_rate": 1.0263464102790232e-05, + "loss": 0.9913, + "step": 32930 + }, + { + "epoch": 0.49684756704576305, + "grad_norm": 0.6310437048804295, + "learning_rate": 1.0258679274861549e-05, + "loss": 0.981, + "step": 32940 + }, + { + "epoch": 0.496998401158406, + "grad_norm": 0.7124764207739331, + "learning_rate": 1.0253894387668939e-05, + "loss": 0.9866, + "step": 32950 + }, + { + "epoch": 0.4971492352710489, + "grad_norm": 0.6875764306861054, + "learning_rate": 1.0249109442308634e-05, + "loss": 1.0007, + "step": 32960 + }, + { + "epoch": 0.4973000693836918, + "grad_norm": 0.7223219280012537, + "learning_rate": 1.0244324439876874e-05, + "loss": 0.9987, + "step": 32970 + }, + { + "epoch": 0.49745090349633475, + "grad_norm": 0.8316923316162685, + "learning_rate": 1.0239539381469917e-05, + "loss": 1.0048, + "step": 32980 + }, + { + "epoch": 0.49760173760897763, + "grad_norm": 0.6576426667150893, + "learning_rate": 1.0234754268184017e-05, + "loss": 0.9944, + "step": 32990 + }, + { + "epoch": 0.4977525717216206, + "grad_norm": 0.6486915744487659, + "learning_rate": 1.0229969101115462e-05, + "loss": 0.989, + "step": 33000 + }, + { + "epoch": 0.49790340583426346, + "grad_norm": 0.7084486532539052, + "learning_rate": 1.0225183881360545e-05, + "loss": 0.9901, + "step": 33010 + }, + { + "epoch": 0.4980542399469064, + "grad_norm": 0.6434253472344483, + "learning_rate": 1.022039861001556e-05, + "loss": 0.9867, + "step": 33020 + }, + { + "epoch": 0.4982050740595493, + "grad_norm": 0.6509292103662452, + "learning_rate": 1.0215613288176829e-05, + "loss": 0.9832, + "step": 33030 + }, + { + "epoch": 0.4983559081721922, + "grad_norm": 0.6790404111014833, + "learning_rate": 1.0210827916940673e-05, + "loss": 0.9961, + "step": 33040 + }, + { + "epoch": 0.49850674228483516, + "grad_norm": 0.7037983824454885, + "learning_rate": 1.0206042497403439e-05, + "loss": 0.9903, + "step": 33050 + }, + { + "epoch": 0.49865757639747804, + "grad_norm": 0.6880987753147618, + "learning_rate": 1.0201257030661467e-05, + "loss": 0.9936, + "step": 33060 + }, + { + "epoch": 0.498808410510121, + "grad_norm": 0.6681446005351639, + "learning_rate": 1.019647151781112e-05, + "loss": 0.9958, + "step": 33070 + }, + { + "epoch": 0.49895924462276386, + "grad_norm": 0.6854048646795302, + "learning_rate": 1.019168595994877e-05, + "loss": 0.9746, + "step": 33080 + }, + { + "epoch": 0.4991100787354068, + "grad_norm": 0.726747698875401, + "learning_rate": 1.0186900358170793e-05, + "loss": 0.9968, + "step": 33090 + }, + { + "epoch": 0.4992609128480497, + "grad_norm": 0.8272643389982924, + "learning_rate": 1.0182114713573581e-05, + "loss": 0.9931, + "step": 33100 + }, + { + "epoch": 0.4994117469606926, + "grad_norm": 0.6822363153570851, + "learning_rate": 1.0177329027253537e-05, + "loss": 0.9855, + "step": 33110 + }, + { + "epoch": 0.49956258107333557, + "grad_norm": 0.6537785348057953, + "learning_rate": 1.017254330030707e-05, + "loss": 1.0265, + "step": 33120 + }, + { + "epoch": 0.49971341518597845, + "grad_norm": 0.6985771880296615, + "learning_rate": 1.01677575338306e-05, + "loss": 0.9822, + "step": 33130 + }, + { + "epoch": 0.4998642492986214, + "grad_norm": 0.6625997302015765, + "learning_rate": 1.0162971728920552e-05, + "loss": 1.0176, + "step": 33140 + }, + { + "epoch": 0.5000150834112643, + "grad_norm": 0.6871500990022245, + "learning_rate": 1.0158185886673369e-05, + "loss": 0.9995, + "step": 33150 + }, + { + "epoch": 0.5001659175239072, + "grad_norm": 0.6469041674712157, + "learning_rate": 1.015340000818549e-05, + "loss": 0.993, + "step": 33160 + }, + { + "epoch": 0.5003167516365501, + "grad_norm": 0.650958260030707, + "learning_rate": 1.0148614094553373e-05, + "loss": 1.0042, + "step": 33170 + }, + { + "epoch": 0.500467585749193, + "grad_norm": 0.6243248064756268, + "learning_rate": 1.014382814687348e-05, + "loss": 0.987, + "step": 33180 + }, + { + "epoch": 0.500618419861836, + "grad_norm": 0.6455610998985986, + "learning_rate": 1.0139042166242282e-05, + "loss": 0.9938, + "step": 33190 + }, + { + "epoch": 0.5007692539744789, + "grad_norm": 0.6518649430717104, + "learning_rate": 1.013425615375625e-05, + "loss": 1.0037, + "step": 33200 + }, + { + "epoch": 0.5009200880871217, + "grad_norm": 0.6240919187975399, + "learning_rate": 1.0129470110511873e-05, + "loss": 0.9917, + "step": 33210 + }, + { + "epoch": 0.5010709221997647, + "grad_norm": 0.6374784925489163, + "learning_rate": 1.012468403760565e-05, + "loss": 0.9952, + "step": 33220 + }, + { + "epoch": 0.5012217563124076, + "grad_norm": 0.6329242550007285, + "learning_rate": 1.0119897936134069e-05, + "loss": 0.9833, + "step": 33230 + }, + { + "epoch": 0.5013725904250506, + "grad_norm": 0.6479707191944828, + "learning_rate": 1.0115111807193638e-05, + "loss": 0.9781, + "step": 33240 + }, + { + "epoch": 0.5015234245376935, + "grad_norm": 0.673743150156272, + "learning_rate": 1.0110325651880868e-05, + "loss": 0.9917, + "step": 33250 + }, + { + "epoch": 0.5016742586503363, + "grad_norm": 0.6441197822455365, + "learning_rate": 1.0105539471292281e-05, + "loss": 0.989, + "step": 33260 + }, + { + "epoch": 0.5018250927629793, + "grad_norm": 0.6892820408799094, + "learning_rate": 1.0100753266524395e-05, + "loss": 0.9934, + "step": 33270 + }, + { + "epoch": 0.5019759268756222, + "grad_norm": 0.6907672947947391, + "learning_rate": 1.009596703867374e-05, + "loss": 0.9889, + "step": 33280 + }, + { + "epoch": 0.5021267609882651, + "grad_norm": 0.6861800073518888, + "learning_rate": 1.0091180788836852e-05, + "loss": 0.9967, + "step": 33290 + }, + { + "epoch": 0.502277595100908, + "grad_norm": 0.692802011918545, + "learning_rate": 1.0086394518110268e-05, + "loss": 0.9787, + "step": 33300 + }, + { + "epoch": 0.5024284292135509, + "grad_norm": 0.6733296801683425, + "learning_rate": 1.0081608227590532e-05, + "loss": 0.9977, + "step": 33310 + }, + { + "epoch": 0.5025792633261938, + "grad_norm": 0.6773369257781027, + "learning_rate": 1.0076821918374193e-05, + "loss": 1.0042, + "step": 33320 + }, + { + "epoch": 0.5027300974388368, + "grad_norm": 0.6650927016302877, + "learning_rate": 1.0072035591557807e-05, + "loss": 0.9986, + "step": 33330 + }, + { + "epoch": 0.5028809315514797, + "grad_norm": 0.6944573595938283, + "learning_rate": 1.0067249248237926e-05, + "loss": 0.9863, + "step": 33340 + }, + { + "epoch": 0.5030317656641226, + "grad_norm": 0.7212268708613143, + "learning_rate": 1.0062462889511111e-05, + "loss": 1.0113, + "step": 33350 + }, + { + "epoch": 0.5031825997767655, + "grad_norm": 0.6515577842892224, + "learning_rate": 1.0057676516473926e-05, + "loss": 0.9885, + "step": 33360 + }, + { + "epoch": 0.5033334338894084, + "grad_norm": 0.7396083659018897, + "learning_rate": 1.0052890130222943e-05, + "loss": 0.9923, + "step": 33370 + }, + { + "epoch": 0.5034842680020514, + "grad_norm": 0.6667609451986057, + "learning_rate": 1.0048103731854726e-05, + "loss": 0.9789, + "step": 33380 + }, + { + "epoch": 0.5036351021146943, + "grad_norm": 0.649151818884044, + "learning_rate": 1.004331732246585e-05, + "loss": 0.9996, + "step": 33390 + }, + { + "epoch": 0.5037859362273371, + "grad_norm": 0.6205552256150662, + "learning_rate": 1.0038530903152896e-05, + "loss": 0.9975, + "step": 33400 + }, + { + "epoch": 0.5039367703399801, + "grad_norm": 0.6585840300848973, + "learning_rate": 1.0033744475012435e-05, + "loss": 0.9728, + "step": 33410 + }, + { + "epoch": 0.504087604452623, + "grad_norm": 0.629556055873276, + "learning_rate": 1.002895803914105e-05, + "loss": 1.0075, + "step": 33420 + }, + { + "epoch": 0.504238438565266, + "grad_norm": 0.6381225889700782, + "learning_rate": 1.0024171596635324e-05, + "loss": 0.9923, + "step": 33430 + }, + { + "epoch": 0.5043892726779088, + "grad_norm": 0.6335892323167555, + "learning_rate": 1.0019385148591834e-05, + "loss": 0.9722, + "step": 33440 + }, + { + "epoch": 0.5045401067905517, + "grad_norm": 0.6606520112244013, + "learning_rate": 1.001459869610717e-05, + "loss": 0.9969, + "step": 33450 + }, + { + "epoch": 0.5046909409031947, + "grad_norm": 0.6417068916364653, + "learning_rate": 1.0009812240277916e-05, + "loss": 0.9952, + "step": 33460 + }, + { + "epoch": 0.5048417750158376, + "grad_norm": 0.6750852812533192, + "learning_rate": 1.0005025782200661e-05, + "loss": 0.9978, + "step": 33470 + }, + { + "epoch": 0.5049926091284805, + "grad_norm": 0.6662650412801692, + "learning_rate": 1.000023932297199e-05, + "loss": 1.0004, + "step": 33480 + }, + { + "epoch": 0.5051434432411234, + "grad_norm": 0.6664744266222817, + "learning_rate": 9.995452863688488e-06, + "loss": 0.9834, + "step": 33490 + }, + { + "epoch": 0.5052942773537663, + "grad_norm": 0.6927071801006274, + "learning_rate": 9.990666405446746e-06, + "loss": 0.994, + "step": 33500 + }, + { + "epoch": 0.5054451114664092, + "grad_norm": 0.733373185456067, + "learning_rate": 9.985879949343343e-06, + "loss": 0.9939, + "step": 33510 + }, + { + "epoch": 0.5055959455790522, + "grad_norm": 0.6618923718068919, + "learning_rate": 9.981093496474878e-06, + "loss": 0.9893, + "step": 33520 + }, + { + "epoch": 0.5057467796916951, + "grad_norm": 0.6726312292827783, + "learning_rate": 9.976307047937925e-06, + "loss": 0.9743, + "step": 33530 + }, + { + "epoch": 0.505897613804338, + "grad_norm": 0.6661327278960241, + "learning_rate": 9.971520604829074e-06, + "loss": 1.0191, + "step": 33540 + }, + { + "epoch": 0.5060484479169809, + "grad_norm": 0.7106617109088684, + "learning_rate": 9.966734168244908e-06, + "loss": 0.9884, + "step": 33550 + }, + { + "epoch": 0.5061992820296238, + "grad_norm": 0.6310127302391613, + "learning_rate": 9.96194773928201e-06, + "loss": 0.9915, + "step": 33560 + }, + { + "epoch": 0.5063501161422668, + "grad_norm": 0.6421210810780389, + "learning_rate": 9.957161319036954e-06, + "loss": 0.9693, + "step": 33570 + }, + { + "epoch": 0.5065009502549096, + "grad_norm": 0.6749406722220278, + "learning_rate": 9.952374908606329e-06, + "loss": 1.0057, + "step": 33580 + }, + { + "epoch": 0.5066517843675525, + "grad_norm": 0.6764106524794662, + "learning_rate": 9.947588509086701e-06, + "loss": 0.9934, + "step": 33590 + }, + { + "epoch": 0.5068026184801955, + "grad_norm": 0.6433099745145141, + "learning_rate": 9.942802121574647e-06, + "loss": 1.0158, + "step": 33600 + }, + { + "epoch": 0.5069534525928384, + "grad_norm": 0.6637028537137587, + "learning_rate": 9.938015747166738e-06, + "loss": 0.9928, + "step": 33610 + }, + { + "epoch": 0.5071042867054814, + "grad_norm": 0.6461853248526448, + "learning_rate": 9.933229386959539e-06, + "loss": 1.0009, + "step": 33620 + }, + { + "epoch": 0.5072551208181242, + "grad_norm": 0.6936860541074206, + "learning_rate": 9.92844304204962e-06, + "loss": 0.9823, + "step": 33630 + }, + { + "epoch": 0.5074059549307671, + "grad_norm": 0.6692267445160647, + "learning_rate": 9.923656713533539e-06, + "loss": 0.9891, + "step": 33640 + }, + { + "epoch": 0.5075567890434101, + "grad_norm": 0.6257249982965373, + "learning_rate": 9.918870402507848e-06, + "loss": 0.9896, + "step": 33650 + }, + { + "epoch": 0.507707623156053, + "grad_norm": 0.6404960478509439, + "learning_rate": 9.91408411006911e-06, + "loss": 0.9951, + "step": 33660 + }, + { + "epoch": 0.5078584572686959, + "grad_norm": 0.650703319407212, + "learning_rate": 9.909297837313868e-06, + "loss": 0.9971, + "step": 33670 + }, + { + "epoch": 0.5080092913813388, + "grad_norm": 0.6579024988062621, + "learning_rate": 9.904511585338664e-06, + "loss": 0.9781, + "step": 33680 + }, + { + "epoch": 0.5081601254939817, + "grad_norm": 0.6247355414463139, + "learning_rate": 9.899725355240042e-06, + "loss": 0.9868, + "step": 33690 + }, + { + "epoch": 0.5083109596066246, + "grad_norm": 0.6509778602773414, + "learning_rate": 9.894939148114537e-06, + "loss": 0.9783, + "step": 33700 + }, + { + "epoch": 0.5084617937192676, + "grad_norm": 0.6747964461506533, + "learning_rate": 9.890152965058672e-06, + "loss": 0.9998, + "step": 33710 + }, + { + "epoch": 0.5086126278319104, + "grad_norm": 0.6684147325341045, + "learning_rate": 9.88536680716898e-06, + "loss": 1.0008, + "step": 33720 + }, + { + "epoch": 0.5087634619445534, + "grad_norm": 0.6754969763096504, + "learning_rate": 9.880580675541974e-06, + "loss": 0.9847, + "step": 33730 + }, + { + "epoch": 0.5089142960571963, + "grad_norm": 0.6860019109523343, + "learning_rate": 9.875794571274163e-06, + "loss": 1.0118, + "step": 33740 + }, + { + "epoch": 0.5090651301698392, + "grad_norm": 0.7038149623186783, + "learning_rate": 9.871008495462059e-06, + "loss": 0.9676, + "step": 33750 + }, + { + "epoch": 0.5092159642824822, + "grad_norm": 0.6559320964229431, + "learning_rate": 9.866222449202156e-06, + "loss": 0.9697, + "step": 33760 + }, + { + "epoch": 0.509366798395125, + "grad_norm": 0.652560203738097, + "learning_rate": 9.86143643359095e-06, + "loss": 1.0078, + "step": 33770 + }, + { + "epoch": 0.5095176325077679, + "grad_norm": 0.6974602862755171, + "learning_rate": 9.856650449724927e-06, + "loss": 0.9938, + "step": 33780 + }, + { + "epoch": 0.5096684666204109, + "grad_norm": 0.8130727365562939, + "learning_rate": 9.851864498700561e-06, + "loss": 0.992, + "step": 33790 + }, + { + "epoch": 0.5098193007330538, + "grad_norm": 0.6647082715701118, + "learning_rate": 9.847078581614324e-06, + "loss": 0.9979, + "step": 33800 + }, + { + "epoch": 0.5099701348456968, + "grad_norm": 0.6539836667372635, + "learning_rate": 9.842292699562684e-06, + "loss": 0.9997, + "step": 33810 + }, + { + "epoch": 0.5101209689583396, + "grad_norm": 0.6396078625377692, + "learning_rate": 9.837506853642088e-06, + "loss": 0.9996, + "step": 33820 + }, + { + "epoch": 0.5102718030709825, + "grad_norm": 0.6555940807631437, + "learning_rate": 9.832721044948988e-06, + "loss": 0.9956, + "step": 33830 + }, + { + "epoch": 0.5104226371836255, + "grad_norm": 0.6705359278390137, + "learning_rate": 9.827935274579823e-06, + "loss": 1.0045, + "step": 33840 + }, + { + "epoch": 0.5105734712962684, + "grad_norm": 0.6604654646949217, + "learning_rate": 9.823149543631014e-06, + "loss": 1.0052, + "step": 33850 + }, + { + "epoch": 0.5107243054089112, + "grad_norm": 0.6526058759951614, + "learning_rate": 9.81836385319899e-06, + "loss": 1.0011, + "step": 33860 + }, + { + "epoch": 0.5108751395215542, + "grad_norm": 0.6472499259071329, + "learning_rate": 9.813578204380158e-06, + "loss": 0.9745, + "step": 33870 + }, + { + "epoch": 0.5110259736341971, + "grad_norm": 0.639741969347573, + "learning_rate": 9.808792598270919e-06, + "loss": 0.9915, + "step": 33880 + }, + { + "epoch": 0.51117680774684, + "grad_norm": 0.6498472990413229, + "learning_rate": 9.804007035967669e-06, + "loss": 0.9844, + "step": 33890 + }, + { + "epoch": 0.511327641859483, + "grad_norm": 0.7516494021074213, + "learning_rate": 9.799221518566782e-06, + "loss": 0.9963, + "step": 33900 + }, + { + "epoch": 0.5114784759721258, + "grad_norm": 0.6559892125898007, + "learning_rate": 9.794436047164638e-06, + "loss": 0.9891, + "step": 33910 + }, + { + "epoch": 0.5116293100847688, + "grad_norm": 0.6961477426549432, + "learning_rate": 9.789650622857594e-06, + "loss": 0.9767, + "step": 33920 + }, + { + "epoch": 0.5117801441974117, + "grad_norm": 0.6245902829797015, + "learning_rate": 9.784865246741995e-06, + "loss": 0.9799, + "step": 33930 + }, + { + "epoch": 0.5119309783100546, + "grad_norm": 0.6613627350021025, + "learning_rate": 9.780079919914187e-06, + "loss": 0.9815, + "step": 33940 + }, + { + "epoch": 0.5120818124226976, + "grad_norm": 0.6612473543891023, + "learning_rate": 9.775294643470494e-06, + "loss": 0.9828, + "step": 33950 + }, + { + "epoch": 0.5122326465353404, + "grad_norm": 0.695734457698255, + "learning_rate": 9.770509418507231e-06, + "loss": 0.9911, + "step": 33960 + }, + { + "epoch": 0.5123834806479833, + "grad_norm": 0.6420196071673058, + "learning_rate": 9.76572424612071e-06, + "loss": 1.0156, + "step": 33970 + }, + { + "epoch": 0.5125343147606263, + "grad_norm": 0.681425821525902, + "learning_rate": 9.760939127407216e-06, + "loss": 0.9856, + "step": 33980 + }, + { + "epoch": 0.5126851488732692, + "grad_norm": 0.6688228164137794, + "learning_rate": 9.756154063463026e-06, + "loss": 0.9967, + "step": 33990 + }, + { + "epoch": 0.512835982985912, + "grad_norm": 0.7329478234328209, + "learning_rate": 9.751369055384418e-06, + "loss": 0.9905, + "step": 34000 + }, + { + "epoch": 0.512986817098555, + "grad_norm": 0.6472401174795505, + "learning_rate": 9.746584104267635e-06, + "loss": 0.995, + "step": 34010 + }, + { + "epoch": 0.5131376512111979, + "grad_norm": 0.7246124411506062, + "learning_rate": 9.741799211208927e-06, + "loss": 0.9922, + "step": 34020 + }, + { + "epoch": 0.5132884853238409, + "grad_norm": 0.6555914312175755, + "learning_rate": 9.73701437730452e-06, + "loss": 0.9862, + "step": 34030 + }, + { + "epoch": 0.5134393194364838, + "grad_norm": 0.6987282352125477, + "learning_rate": 9.732229603650628e-06, + "loss": 0.9769, + "step": 34040 + }, + { + "epoch": 0.5135901535491266, + "grad_norm": 0.652352244061427, + "learning_rate": 9.72744489134345e-06, + "loss": 0.9913, + "step": 34050 + }, + { + "epoch": 0.5137409876617696, + "grad_norm": 0.6960522761449126, + "learning_rate": 9.722660241479178e-06, + "loss": 0.998, + "step": 34060 + }, + { + "epoch": 0.5138918217744125, + "grad_norm": 0.6927835123075601, + "learning_rate": 9.717875655153976e-06, + "loss": 0.9955, + "step": 34070 + }, + { + "epoch": 0.5140426558870554, + "grad_norm": 0.634716503111135, + "learning_rate": 9.713091133464014e-06, + "loss": 0.9995, + "step": 34080 + }, + { + "epoch": 0.5141934899996984, + "grad_norm": 0.6384456635448562, + "learning_rate": 9.708306677505428e-06, + "loss": 0.9974, + "step": 34090 + }, + { + "epoch": 0.5143443241123412, + "grad_norm": 0.6690648445523008, + "learning_rate": 9.703522288374342e-06, + "loss": 0.9981, + "step": 34100 + }, + { + "epoch": 0.5144951582249842, + "grad_norm": 0.6544385476967834, + "learning_rate": 9.698737967166875e-06, + "loss": 1.0152, + "step": 34110 + }, + { + "epoch": 0.5146459923376271, + "grad_norm": 0.7317122934439729, + "learning_rate": 9.693953714979125e-06, + "loss": 0.9859, + "step": 34120 + }, + { + "epoch": 0.51479682645027, + "grad_norm": 0.6481093461296542, + "learning_rate": 9.689169532907168e-06, + "loss": 0.9924, + "step": 34130 + }, + { + "epoch": 0.5149476605629129, + "grad_norm": 0.7306598725224159, + "learning_rate": 9.684385422047077e-06, + "loss": 0.992, + "step": 34140 + }, + { + "epoch": 0.5150984946755558, + "grad_norm": 0.7176492425441865, + "learning_rate": 9.679601383494891e-06, + "loss": 1.0244, + "step": 34150 + }, + { + "epoch": 0.5152493287881987, + "grad_norm": 0.7052626948755042, + "learning_rate": 9.674817418346653e-06, + "loss": 0.9776, + "step": 34160 + }, + { + "epoch": 0.5154001629008417, + "grad_norm": 0.6881392585671319, + "learning_rate": 9.670033527698374e-06, + "loss": 0.9636, + "step": 34170 + }, + { + "epoch": 0.5155509970134846, + "grad_norm": 0.653931364997754, + "learning_rate": 9.665249712646048e-06, + "loss": 1.0056, + "step": 34180 + }, + { + "epoch": 0.5157018311261274, + "grad_norm": 0.6800796153856659, + "learning_rate": 9.660465974285666e-06, + "loss": 0.9856, + "step": 34190 + }, + { + "epoch": 0.5158526652387704, + "grad_norm": 0.6878039023747752, + "learning_rate": 9.655682313713183e-06, + "loss": 0.9883, + "step": 34200 + }, + { + "epoch": 0.5160034993514133, + "grad_norm": 0.6426101052123798, + "learning_rate": 9.650898732024546e-06, + "loss": 0.979, + "step": 34210 + }, + { + "epoch": 0.5161543334640563, + "grad_norm": 0.6062639362270398, + "learning_rate": 9.646115230315689e-06, + "loss": 0.9832, + "step": 34220 + }, + { + "epoch": 0.5163051675766992, + "grad_norm": 0.6483831197261676, + "learning_rate": 9.64133180968252e-06, + "loss": 0.9872, + "step": 34230 + }, + { + "epoch": 0.516456001689342, + "grad_norm": 0.6658452684229235, + "learning_rate": 9.636548471220922e-06, + "loss": 0.9759, + "step": 34240 + }, + { + "epoch": 0.516606835801985, + "grad_norm": 0.6419159783013374, + "learning_rate": 9.631765216026775e-06, + "loss": 1.0023, + "step": 34250 + }, + { + "epoch": 0.5167576699146279, + "grad_norm": 0.7194869282318134, + "learning_rate": 9.626982045195932e-06, + "loss": 0.9955, + "step": 34260 + }, + { + "epoch": 0.5169085040272708, + "grad_norm": 0.686353169116326, + "learning_rate": 9.622198959824219e-06, + "loss": 1.0119, + "step": 34270 + }, + { + "epoch": 0.5170593381399137, + "grad_norm": 0.6637541599912709, + "learning_rate": 9.617415961007457e-06, + "loss": 0.9922, + "step": 34280 + }, + { + "epoch": 0.5172101722525566, + "grad_norm": 0.6568900843641359, + "learning_rate": 9.612633049841442e-06, + "loss": 0.9858, + "step": 34290 + }, + { + "epoch": 0.5173610063651995, + "grad_norm": 0.6378777763562573, + "learning_rate": 9.607850227421943e-06, + "loss": 0.9827, + "step": 34300 + }, + { + "epoch": 0.5175118404778425, + "grad_norm": 0.7687412422851835, + "learning_rate": 9.603067494844716e-06, + "loss": 0.9944, + "step": 34310 + }, + { + "epoch": 0.5176626745904854, + "grad_norm": 0.7306993017195902, + "learning_rate": 9.598284853205492e-06, + "loss": 0.9819, + "step": 34320 + }, + { + "epoch": 0.5178135087031283, + "grad_norm": 0.6314528520914892, + "learning_rate": 9.593502303599989e-06, + "loss": 1.0055, + "step": 34330 + }, + { + "epoch": 0.5179643428157712, + "grad_norm": 0.6426367895869375, + "learning_rate": 9.588719847123897e-06, + "loss": 0.9841, + "step": 34340 + }, + { + "epoch": 0.5181151769284141, + "grad_norm": 0.6592943533990392, + "learning_rate": 9.583937484872877e-06, + "loss": 0.9875, + "step": 34350 + }, + { + "epoch": 0.5182660110410571, + "grad_norm": 0.6452811300531888, + "learning_rate": 9.57915521794259e-06, + "loss": 0.985, + "step": 34360 + }, + { + "epoch": 0.5184168451537, + "grad_norm": 0.7093341324194486, + "learning_rate": 9.574373047428655e-06, + "loss": 0.9922, + "step": 34370 + }, + { + "epoch": 0.5185676792663428, + "grad_norm": 0.657753703779544, + "learning_rate": 9.569590974426675e-06, + "loss": 0.9859, + "step": 34380 + }, + { + "epoch": 0.5187185133789858, + "grad_norm": 0.66006706120135, + "learning_rate": 9.564809000032243e-06, + "loss": 0.9832, + "step": 34390 + }, + { + "epoch": 0.5188693474916287, + "grad_norm": 0.628775587974248, + "learning_rate": 9.560027125340909e-06, + "loss": 1.0093, + "step": 34400 + }, + { + "epoch": 0.5190201816042717, + "grad_norm": 0.6947888294668104, + "learning_rate": 9.555245351448208e-06, + "loss": 0.9886, + "step": 34410 + }, + { + "epoch": 0.5191710157169145, + "grad_norm": 0.6224546482932419, + "learning_rate": 9.550463679449662e-06, + "loss": 0.9871, + "step": 34420 + }, + { + "epoch": 0.5193218498295574, + "grad_norm": 0.6991151247316871, + "learning_rate": 9.545682110440753e-06, + "loss": 0.9893, + "step": 34430 + }, + { + "epoch": 0.5194726839422004, + "grad_norm": 0.8142786472344392, + "learning_rate": 9.540900645516957e-06, + "loss": 1.0027, + "step": 34440 + }, + { + "epoch": 0.5196235180548433, + "grad_norm": 0.6670172708193125, + "learning_rate": 9.536119285773709e-06, + "loss": 1.002, + "step": 34450 + }, + { + "epoch": 0.5197743521674862, + "grad_norm": 0.6531200709984576, + "learning_rate": 9.531338032306428e-06, + "loss": 1.0034, + "step": 34460 + }, + { + "epoch": 0.5199251862801291, + "grad_norm": 0.6355442987527872, + "learning_rate": 9.526556886210516e-06, + "loss": 0.9843, + "step": 34470 + }, + { + "epoch": 0.520076020392772, + "grad_norm": 0.7412008410836082, + "learning_rate": 9.521775848581336e-06, + "loss": 0.9819, + "step": 34480 + }, + { + "epoch": 0.520226854505415, + "grad_norm": 0.6723044002593673, + "learning_rate": 9.51699492051423e-06, + "loss": 1.0029, + "step": 34490 + }, + { + "epoch": 0.5203776886180579, + "grad_norm": 0.6571912244301227, + "learning_rate": 9.512214103104528e-06, + "loss": 0.9822, + "step": 34500 + }, + { + "epoch": 0.5205285227307008, + "grad_norm": 0.6378817822506019, + "learning_rate": 9.50743339744752e-06, + "loss": 0.9986, + "step": 34510 + }, + { + "epoch": 0.5206793568433437, + "grad_norm": 0.6470269889586212, + "learning_rate": 9.502652804638469e-06, + "loss": 0.9917, + "step": 34520 + }, + { + "epoch": 0.5208301909559866, + "grad_norm": 0.6899886650465303, + "learning_rate": 9.497872325772625e-06, + "loss": 0.9986, + "step": 34530 + }, + { + "epoch": 0.5209810250686295, + "grad_norm": 0.7142022322405575, + "learning_rate": 9.493091961945204e-06, + "loss": 0.9922, + "step": 34540 + }, + { + "epoch": 0.5211318591812725, + "grad_norm": 0.6674143811196738, + "learning_rate": 9.488311714251392e-06, + "loss": 0.9874, + "step": 34550 + }, + { + "epoch": 0.5212826932939153, + "grad_norm": 0.6515531328784513, + "learning_rate": 9.48353158378636e-06, + "loss": 0.9746, + "step": 34560 + }, + { + "epoch": 0.5214335274065582, + "grad_norm": 0.6288706195682003, + "learning_rate": 9.478751571645239e-06, + "loss": 0.9857, + "step": 34570 + }, + { + "epoch": 0.5215843615192012, + "grad_norm": 0.6594576724086026, + "learning_rate": 9.473971678923147e-06, + "loss": 0.9922, + "step": 34580 + }, + { + "epoch": 0.5217351956318441, + "grad_norm": 0.6441221303368957, + "learning_rate": 9.469191906715158e-06, + "loss": 0.9923, + "step": 34590 + }, + { + "epoch": 0.5218860297444871, + "grad_norm": 0.661098837661348, + "learning_rate": 9.464412256116329e-06, + "loss": 1.0052, + "step": 34600 + }, + { + "epoch": 0.5220368638571299, + "grad_norm": 0.6612880496563285, + "learning_rate": 9.459632728221692e-06, + "loss": 0.9772, + "step": 34610 + }, + { + "epoch": 0.5221876979697728, + "grad_norm": 0.6391036788095179, + "learning_rate": 9.45485332412624e-06, + "loss": 0.9961, + "step": 34620 + }, + { + "epoch": 0.5223385320824158, + "grad_norm": 0.6521192898121071, + "learning_rate": 9.450074044924945e-06, + "loss": 0.9791, + "step": 34630 + }, + { + "epoch": 0.5224893661950587, + "grad_norm": 0.6764379640609761, + "learning_rate": 9.445294891712756e-06, + "loss": 0.9808, + "step": 34640 + }, + { + "epoch": 0.5226402003077016, + "grad_norm": 0.6591791728347953, + "learning_rate": 9.440515865584578e-06, + "loss": 0.993, + "step": 34650 + }, + { + "epoch": 0.5227910344203445, + "grad_norm": 0.6658472050206968, + "learning_rate": 9.435736967635296e-06, + "loss": 0.9986, + "step": 34660 + }, + { + "epoch": 0.5229418685329874, + "grad_norm": 0.6905826177701958, + "learning_rate": 9.430958198959769e-06, + "loss": 0.9789, + "step": 34670 + }, + { + "epoch": 0.5230927026456303, + "grad_norm": 0.6545986557821777, + "learning_rate": 9.426179560652818e-06, + "loss": 0.9981, + "step": 34680 + }, + { + "epoch": 0.5232435367582733, + "grad_norm": 0.6147169098424499, + "learning_rate": 9.421401053809244e-06, + "loss": 0.9769, + "step": 34690 + }, + { + "epoch": 0.5233943708709161, + "grad_norm": 0.7128366865659569, + "learning_rate": 9.416622679523806e-06, + "loss": 0.974, + "step": 34700 + }, + { + "epoch": 0.523545204983559, + "grad_norm": 0.6477592436670714, + "learning_rate": 9.41184443889124e-06, + "loss": 1.0111, + "step": 34710 + }, + { + "epoch": 0.523696039096202, + "grad_norm": 0.7865246675672978, + "learning_rate": 9.407066333006253e-06, + "loss": 0.9931, + "step": 34720 + }, + { + "epoch": 0.5238468732088449, + "grad_norm": 0.6918870619081838, + "learning_rate": 9.402288362963518e-06, + "loss": 0.9908, + "step": 34730 + }, + { + "epoch": 0.5239977073214879, + "grad_norm": 0.7189122465964287, + "learning_rate": 9.397510529857672e-06, + "loss": 0.9878, + "step": 34740 + }, + { + "epoch": 0.5241485414341307, + "grad_norm": 0.6446272014800184, + "learning_rate": 9.392732834783333e-06, + "loss": 0.9827, + "step": 34750 + }, + { + "epoch": 0.5242993755467736, + "grad_norm": 0.6508883638597867, + "learning_rate": 9.387955278835077e-06, + "loss": 0.9755, + "step": 34760 + }, + { + "epoch": 0.5244502096594166, + "grad_norm": 0.6644495811814283, + "learning_rate": 9.383177863107448e-06, + "loss": 0.9903, + "step": 34770 + }, + { + "epoch": 0.5246010437720595, + "grad_norm": 0.6805791921654951, + "learning_rate": 9.378400588694964e-06, + "loss": 0.996, + "step": 34780 + }, + { + "epoch": 0.5247518778847025, + "grad_norm": 0.6746259122483247, + "learning_rate": 9.373623456692114e-06, + "loss": 0.9666, + "step": 34790 + }, + { + "epoch": 0.5249027119973453, + "grad_norm": 0.6638404415858523, + "learning_rate": 9.368846468193335e-06, + "loss": 0.9811, + "step": 34800 + }, + { + "epoch": 0.5250535461099882, + "grad_norm": 0.6437194227641655, + "learning_rate": 9.364069624293058e-06, + "loss": 0.9863, + "step": 34810 + }, + { + "epoch": 0.5252043802226312, + "grad_norm": 0.6814613883247579, + "learning_rate": 9.359292926085655e-06, + "loss": 0.9928, + "step": 34820 + }, + { + "epoch": 0.5253552143352741, + "grad_norm": 0.6866265532707536, + "learning_rate": 9.354516374665487e-06, + "loss": 0.9769, + "step": 34830 + }, + { + "epoch": 0.5255060484479169, + "grad_norm": 0.642754181284806, + "learning_rate": 9.349739971126868e-06, + "loss": 0.9772, + "step": 34840 + }, + { + "epoch": 0.5256568825605599, + "grad_norm": 0.6240339889978209, + "learning_rate": 9.344963716564074e-06, + "loss": 0.9929, + "step": 34850 + }, + { + "epoch": 0.5258077166732028, + "grad_norm": 0.6290327529937002, + "learning_rate": 9.340187612071366e-06, + "loss": 0.9785, + "step": 34860 + }, + { + "epoch": 0.5259585507858457, + "grad_norm": 0.6392364279663161, + "learning_rate": 9.33541165874295e-06, + "loss": 0.9979, + "step": 34870 + }, + { + "epoch": 0.5261093848984887, + "grad_norm": 0.6652054936742798, + "learning_rate": 9.330635857673008e-06, + "loss": 0.9859, + "step": 34880 + }, + { + "epoch": 0.5262602190111315, + "grad_norm": 0.6511516409656505, + "learning_rate": 9.32586020995569e-06, + "loss": 0.9772, + "step": 34890 + }, + { + "epoch": 0.5264110531237745, + "grad_norm": 0.6693205054902186, + "learning_rate": 9.321084716685103e-06, + "loss": 0.9857, + "step": 34900 + }, + { + "epoch": 0.5265618872364174, + "grad_norm": 0.6431487713193376, + "learning_rate": 9.316309378955318e-06, + "loss": 0.9974, + "step": 34910 + }, + { + "epoch": 0.5267127213490603, + "grad_norm": 0.6636818969097897, + "learning_rate": 9.31153419786038e-06, + "loss": 0.9992, + "step": 34920 + }, + { + "epoch": 0.5268635554617033, + "grad_norm": 0.680611315186257, + "learning_rate": 9.306759174494288e-06, + "loss": 0.9761, + "step": 34930 + }, + { + "epoch": 0.5270143895743461, + "grad_norm": 0.6267541594251017, + "learning_rate": 9.30198430995101e-06, + "loss": 0.9674, + "step": 34940 + }, + { + "epoch": 0.527165223686989, + "grad_norm": 0.6787334314167168, + "learning_rate": 9.29720960532448e-06, + "loss": 0.9966, + "step": 34950 + }, + { + "epoch": 0.527316057799632, + "grad_norm": 0.6280074234295887, + "learning_rate": 9.292435061708588e-06, + "loss": 0.972, + "step": 34960 + }, + { + "epoch": 0.5274668919122749, + "grad_norm": 0.6807395487065518, + "learning_rate": 9.287660680197192e-06, + "loss": 0.9726, + "step": 34970 + }, + { + "epoch": 0.5276177260249177, + "grad_norm": 0.6290107070854487, + "learning_rate": 9.282886461884116e-06, + "loss": 0.975, + "step": 34980 + }, + { + "epoch": 0.5277685601375607, + "grad_norm": 0.6397101597027268, + "learning_rate": 9.278112407863135e-06, + "loss": 0.976, + "step": 34990 + }, + { + "epoch": 0.5279193942502036, + "grad_norm": 0.7157064130426366, + "learning_rate": 9.273338519228004e-06, + "loss": 0.9904, + "step": 35000 + }, + { + "epoch": 0.5280702283628466, + "grad_norm": 0.7090777543813149, + "learning_rate": 9.268564797072423e-06, + "loss": 0.9676, + "step": 35010 + }, + { + "epoch": 0.5282210624754895, + "grad_norm": 0.6832862304724994, + "learning_rate": 9.263791242490057e-06, + "loss": 0.9911, + "step": 35020 + }, + { + "epoch": 0.5283718965881323, + "grad_norm": 0.6814829290300554, + "learning_rate": 9.259017856574547e-06, + "loss": 0.995, + "step": 35030 + }, + { + "epoch": 0.5285227307007753, + "grad_norm": 0.6341638920653571, + "learning_rate": 9.25424464041948e-06, + "loss": 0.9864, + "step": 35040 + }, + { + "epoch": 0.5286735648134182, + "grad_norm": 0.6492774242243871, + "learning_rate": 9.249471595118407e-06, + "loss": 0.9696, + "step": 35050 + }, + { + "epoch": 0.5288243989260611, + "grad_norm": 0.6058061859201683, + "learning_rate": 9.244698721764848e-06, + "loss": 0.9916, + "step": 35060 + }, + { + "epoch": 0.5289752330387041, + "grad_norm": 0.6957611490868334, + "learning_rate": 9.239926021452272e-06, + "loss": 0.9855, + "step": 35070 + }, + { + "epoch": 0.5291260671513469, + "grad_norm": 0.7206006224223678, + "learning_rate": 9.235153495274112e-06, + "loss": 0.9777, + "step": 35080 + }, + { + "epoch": 0.5292769012639899, + "grad_norm": 0.6741282361359568, + "learning_rate": 9.230381144323772e-06, + "loss": 0.9925, + "step": 35090 + }, + { + "epoch": 0.5294277353766328, + "grad_norm": 0.647886477063416, + "learning_rate": 9.225608969694596e-06, + "loss": 0.9986, + "step": 35100 + }, + { + "epoch": 0.5295785694892757, + "grad_norm": 0.6633877496183517, + "learning_rate": 9.220836972479909e-06, + "loss": 0.9909, + "step": 35110 + }, + { + "epoch": 0.5297294036019186, + "grad_norm": 0.6750626519214912, + "learning_rate": 9.216065153772976e-06, + "loss": 0.9821, + "step": 35120 + }, + { + "epoch": 0.5298802377145615, + "grad_norm": 0.671183705955832, + "learning_rate": 9.211293514667031e-06, + "loss": 0.9852, + "step": 35130 + }, + { + "epoch": 0.5300310718272044, + "grad_norm": 0.6724302056571536, + "learning_rate": 9.206522056255274e-06, + "loss": 0.9962, + "step": 35140 + }, + { + "epoch": 0.5301819059398474, + "grad_norm": 0.6827411146152027, + "learning_rate": 9.20175077963085e-06, + "loss": 1.009, + "step": 35150 + }, + { + "epoch": 0.5303327400524903, + "grad_norm": 0.6350872609919883, + "learning_rate": 9.196979685886861e-06, + "loss": 0.9864, + "step": 35160 + }, + { + "epoch": 0.5304835741651331, + "grad_norm": 0.7015627176494375, + "learning_rate": 9.192208776116384e-06, + "loss": 0.9847, + "step": 35170 + }, + { + "epoch": 0.5306344082777761, + "grad_norm": 0.7479982522481221, + "learning_rate": 9.18743805141244e-06, + "loss": 1.0014, + "step": 35180 + }, + { + "epoch": 0.530785242390419, + "grad_norm": 0.6657543134160083, + "learning_rate": 9.18266751286801e-06, + "loss": 1.0031, + "step": 35190 + }, + { + "epoch": 0.530936076503062, + "grad_norm": 0.6408324775488643, + "learning_rate": 9.177897161576035e-06, + "loss": 0.9656, + "step": 35200 + }, + { + "epoch": 0.5310869106157049, + "grad_norm": 0.6366615373177766, + "learning_rate": 9.173126998629412e-06, + "loss": 0.9823, + "step": 35210 + }, + { + "epoch": 0.5312377447283477, + "grad_norm": 0.7031969790850957, + "learning_rate": 9.168357025120992e-06, + "loss": 0.9734, + "step": 35220 + }, + { + "epoch": 0.5313885788409907, + "grad_norm": 0.6438820964794388, + "learning_rate": 9.16358724214359e-06, + "loss": 1.001, + "step": 35230 + }, + { + "epoch": 0.5315394129536336, + "grad_norm": 0.6422533238357108, + "learning_rate": 9.158817650789965e-06, + "loss": 0.9845, + "step": 35240 + }, + { + "epoch": 0.5316902470662765, + "grad_norm": 0.6502765482951612, + "learning_rate": 9.154048252152849e-06, + "loss": 0.981, + "step": 35250 + }, + { + "epoch": 0.5318410811789194, + "grad_norm": 0.6598980612541668, + "learning_rate": 9.149279047324915e-06, + "loss": 0.978, + "step": 35260 + }, + { + "epoch": 0.5319919152915623, + "grad_norm": 0.6428556158378133, + "learning_rate": 9.144510037398794e-06, + "loss": 0.9783, + "step": 35270 + }, + { + "epoch": 0.5321427494042053, + "grad_norm": 0.6195231682630076, + "learning_rate": 9.139741223467082e-06, + "loss": 0.9876, + "step": 35280 + }, + { + "epoch": 0.5322935835168482, + "grad_norm": 0.6902657155537786, + "learning_rate": 9.134972606622322e-06, + "loss": 0.9827, + "step": 35290 + }, + { + "epoch": 0.5324444176294911, + "grad_norm": 0.6461197196454141, + "learning_rate": 9.130204187957006e-06, + "loss": 0.9786, + "step": 35300 + }, + { + "epoch": 0.532595251742134, + "grad_norm": 0.6469888022354795, + "learning_rate": 9.125435968563601e-06, + "loss": 0.9734, + "step": 35310 + }, + { + "epoch": 0.5327460858547769, + "grad_norm": 0.6852621329477946, + "learning_rate": 9.120667949534507e-06, + "loss": 0.9753, + "step": 35320 + }, + { + "epoch": 0.5328969199674198, + "grad_norm": 0.6703071845188772, + "learning_rate": 9.115900131962084e-06, + "loss": 0.979, + "step": 35330 + }, + { + "epoch": 0.5330477540800628, + "grad_norm": 0.757763973073327, + "learning_rate": 9.111132516938654e-06, + "loss": 0.9827, + "step": 35340 + }, + { + "epoch": 0.5331985881927057, + "grad_norm": 0.6673297410932826, + "learning_rate": 9.106365105556484e-06, + "loss": 0.9712, + "step": 35350 + }, + { + "epoch": 0.5333494223053485, + "grad_norm": 0.7144263075996535, + "learning_rate": 9.101597898907797e-06, + "loss": 0.9977, + "step": 35360 + }, + { + "epoch": 0.5335002564179915, + "grad_norm": 0.694308890373548, + "learning_rate": 9.096830898084773e-06, + "loss": 0.9782, + "step": 35370 + }, + { + "epoch": 0.5336510905306344, + "grad_norm": 0.7295585689866024, + "learning_rate": 9.092064104179532e-06, + "loss": 0.9707, + "step": 35380 + }, + { + "epoch": 0.5338019246432774, + "grad_norm": 0.6779794492980437, + "learning_rate": 9.087297518284168e-06, + "loss": 0.9848, + "step": 35390 + }, + { + "epoch": 0.5339527587559202, + "grad_norm": 0.6656137516601495, + "learning_rate": 9.082531141490709e-06, + "loss": 0.9675, + "step": 35400 + }, + { + "epoch": 0.5341035928685631, + "grad_norm": 0.6650747562304127, + "learning_rate": 9.077764974891137e-06, + "loss": 0.9947, + "step": 35410 + }, + { + "epoch": 0.5342544269812061, + "grad_norm": 0.6453421554364559, + "learning_rate": 9.072999019577398e-06, + "loss": 0.9664, + "step": 35420 + }, + { + "epoch": 0.534405261093849, + "grad_norm": 0.641183208314457, + "learning_rate": 9.068233276641378e-06, + "loss": 0.9742, + "step": 35430 + }, + { + "epoch": 0.5345560952064919, + "grad_norm": 0.6479083757574761, + "learning_rate": 9.063467747174914e-06, + "loss": 0.9722, + "step": 35440 + }, + { + "epoch": 0.5347069293191348, + "grad_norm": 0.6882774697384891, + "learning_rate": 9.058702432269803e-06, + "loss": 0.9716, + "step": 35450 + }, + { + "epoch": 0.5348577634317777, + "grad_norm": 0.6378721309874746, + "learning_rate": 9.053937333017789e-06, + "loss": 0.9949, + "step": 35460 + }, + { + "epoch": 0.5350085975444206, + "grad_norm": 0.6653925824143756, + "learning_rate": 9.049172450510558e-06, + "loss": 0.9844, + "step": 35470 + }, + { + "epoch": 0.5351594316570636, + "grad_norm": 0.6640248299817275, + "learning_rate": 9.044407785839762e-06, + "loss": 0.9993, + "step": 35480 + }, + { + "epoch": 0.5353102657697065, + "grad_norm": 0.6706814262023147, + "learning_rate": 9.03964334009699e-06, + "loss": 0.9801, + "step": 35490 + }, + { + "epoch": 0.5354610998823494, + "grad_norm": 0.6297214916468439, + "learning_rate": 9.03487911437379e-06, + "loss": 0.978, + "step": 35500 + }, + { + "epoch": 0.5356119339949923, + "grad_norm": 0.6474720497504769, + "learning_rate": 9.030115109761653e-06, + "loss": 0.9893, + "step": 35510 + }, + { + "epoch": 0.5357627681076352, + "grad_norm": 0.6394773579731878, + "learning_rate": 9.025351327352018e-06, + "loss": 0.9892, + "step": 35520 + }, + { + "epoch": 0.5359136022202782, + "grad_norm": 0.6390561433052676, + "learning_rate": 9.02058776823628e-06, + "loss": 0.9891, + "step": 35530 + }, + { + "epoch": 0.536064436332921, + "grad_norm": 0.6405305885118687, + "learning_rate": 9.015824433505786e-06, + "loss": 0.9859, + "step": 35540 + }, + { + "epoch": 0.5362152704455639, + "grad_norm": 0.684401773119734, + "learning_rate": 9.011061324251811e-06, + "loss": 0.9979, + "step": 35550 + }, + { + "epoch": 0.5363661045582069, + "grad_norm": 0.6410012496801554, + "learning_rate": 9.006298441565606e-06, + "loss": 0.9843, + "step": 35560 + }, + { + "epoch": 0.5365169386708498, + "grad_norm": 0.6437297634707807, + "learning_rate": 9.001535786538353e-06, + "loss": 0.994, + "step": 35570 + }, + { + "epoch": 0.5366677727834928, + "grad_norm": 0.6469356587701214, + "learning_rate": 8.996773360261177e-06, + "loss": 0.9977, + "step": 35580 + }, + { + "epoch": 0.5368186068961356, + "grad_norm": 0.6670026225581082, + "learning_rate": 8.99201116382517e-06, + "loss": 0.9917, + "step": 35590 + }, + { + "epoch": 0.5369694410087785, + "grad_norm": 0.6740071003311929, + "learning_rate": 8.987249198321357e-06, + "loss": 0.9752, + "step": 35600 + }, + { + "epoch": 0.5371202751214215, + "grad_norm": 0.670009303450585, + "learning_rate": 8.98248746484071e-06, + "loss": 0.9898, + "step": 35610 + }, + { + "epoch": 0.5372711092340644, + "grad_norm": 0.852847576414134, + "learning_rate": 8.977725964474154e-06, + "loss": 1.0035, + "step": 35620 + }, + { + "epoch": 0.5374219433467073, + "grad_norm": 0.738176386529366, + "learning_rate": 8.972964698312556e-06, + "loss": 0.9967, + "step": 35630 + }, + { + "epoch": 0.5375727774593502, + "grad_norm": 0.7072851676531974, + "learning_rate": 8.96820366744674e-06, + "loss": 0.9819, + "step": 35640 + }, + { + "epoch": 0.5377236115719931, + "grad_norm": 0.6896736954633516, + "learning_rate": 8.963442872967457e-06, + "loss": 0.9856, + "step": 35650 + }, + { + "epoch": 0.537874445684636, + "grad_norm": 0.6702632991340269, + "learning_rate": 8.958682315965416e-06, + "loss": 0.9929, + "step": 35660 + }, + { + "epoch": 0.538025279797279, + "grad_norm": 0.6951146063150402, + "learning_rate": 8.953921997531276e-06, + "loss": 0.9894, + "step": 35670 + }, + { + "epoch": 0.5381761139099218, + "grad_norm": 0.8266254627980824, + "learning_rate": 8.94916191875563e-06, + "loss": 1.003, + "step": 35680 + }, + { + "epoch": 0.5383269480225648, + "grad_norm": 0.685896836068571, + "learning_rate": 8.944402080729017e-06, + "loss": 0.9868, + "step": 35690 + }, + { + "epoch": 0.5384777821352077, + "grad_norm": 0.6752837226642435, + "learning_rate": 8.939642484541935e-06, + "loss": 0.9944, + "step": 35700 + }, + { + "epoch": 0.5386286162478506, + "grad_norm": 0.7176973906294379, + "learning_rate": 8.934883131284813e-06, + "loss": 0.9874, + "step": 35710 + }, + { + "epoch": 0.5387794503604936, + "grad_norm": 0.680903414919179, + "learning_rate": 8.93012402204802e-06, + "loss": 0.9985, + "step": 35720 + }, + { + "epoch": 0.5389302844731364, + "grad_norm": 0.688561045522324, + "learning_rate": 8.925365157921892e-06, + "loss": 0.9916, + "step": 35730 + }, + { + "epoch": 0.5390811185857793, + "grad_norm": 0.7359603704811906, + "learning_rate": 8.920606539996686e-06, + "loss": 0.9875, + "step": 35740 + }, + { + "epoch": 0.5392319526984223, + "grad_norm": 0.6431739260973495, + "learning_rate": 8.915848169362605e-06, + "loss": 0.9725, + "step": 35750 + }, + { + "epoch": 0.5393827868110652, + "grad_norm": 0.6440945451910254, + "learning_rate": 8.911090047109812e-06, + "loss": 0.9772, + "step": 35760 + }, + { + "epoch": 0.5395336209237082, + "grad_norm": 0.6608874248430828, + "learning_rate": 8.906332174328392e-06, + "loss": 0.9836, + "step": 35770 + }, + { + "epoch": 0.539684455036351, + "grad_norm": 0.6986393345702605, + "learning_rate": 8.901574552108391e-06, + "loss": 0.9699, + "step": 35780 + }, + { + "epoch": 0.5398352891489939, + "grad_norm": 0.6510885405838647, + "learning_rate": 8.896817181539785e-06, + "loss": 0.9723, + "step": 35790 + }, + { + "epoch": 0.5399861232616369, + "grad_norm": 0.716157358978558, + "learning_rate": 8.892060063712493e-06, + "loss": 0.9889, + "step": 35800 + }, + { + "epoch": 0.5401369573742798, + "grad_norm": 0.6478993159369478, + "learning_rate": 8.887303199716392e-06, + "loss": 0.9888, + "step": 35810 + }, + { + "epoch": 0.5402877914869226, + "grad_norm": 0.7272797839206224, + "learning_rate": 8.88254659064128e-06, + "loss": 0.9857, + "step": 35820 + }, + { + "epoch": 0.5404386255995656, + "grad_norm": 0.6364858258035466, + "learning_rate": 8.877790237576901e-06, + "loss": 0.9893, + "step": 35830 + }, + { + "epoch": 0.5405894597122085, + "grad_norm": 0.6487580612950858, + "learning_rate": 8.873034141612958e-06, + "loss": 0.9988, + "step": 35840 + }, + { + "epoch": 0.5407402938248514, + "grad_norm": 0.6843557026011511, + "learning_rate": 8.868278303839071e-06, + "loss": 1.0049, + "step": 35850 + }, + { + "epoch": 0.5408911279374944, + "grad_norm": 0.6217313925997312, + "learning_rate": 8.863522725344814e-06, + "loss": 0.9778, + "step": 35860 + }, + { + "epoch": 0.5410419620501372, + "grad_norm": 0.6303435850424773, + "learning_rate": 8.858767407219699e-06, + "loss": 0.9813, + "step": 35870 + }, + { + "epoch": 0.5411927961627802, + "grad_norm": 0.7123970318205926, + "learning_rate": 8.854012350553186e-06, + "loss": 0.9889, + "step": 35880 + }, + { + "epoch": 0.5413436302754231, + "grad_norm": 0.6495690258804231, + "learning_rate": 8.849257556434653e-06, + "loss": 0.974, + "step": 35890 + }, + { + "epoch": 0.541494464388066, + "grad_norm": 0.6354769127773098, + "learning_rate": 8.844503025953447e-06, + "loss": 0.9682, + "step": 35900 + }, + { + "epoch": 0.541645298500709, + "grad_norm": 0.6767434889409984, + "learning_rate": 8.839748760198828e-06, + "loss": 0.9752, + "step": 35910 + }, + { + "epoch": 0.5417961326133518, + "grad_norm": 0.6710926908265774, + "learning_rate": 8.83499476026002e-06, + "loss": 0.9739, + "step": 35920 + }, + { + "epoch": 0.5419469667259947, + "grad_norm": 0.7100846380957364, + "learning_rate": 8.830241027226166e-06, + "loss": 0.9823, + "step": 35930 + }, + { + "epoch": 0.5420978008386377, + "grad_norm": 0.7081429568057211, + "learning_rate": 8.825487562186353e-06, + "loss": 1.0083, + "step": 35940 + }, + { + "epoch": 0.5422486349512806, + "grad_norm": 0.661696960434364, + "learning_rate": 8.820734366229614e-06, + "loss": 0.9867, + "step": 35950 + }, + { + "epoch": 0.5423994690639234, + "grad_norm": 0.6427912804533379, + "learning_rate": 8.815981440444916e-06, + "loss": 0.9958, + "step": 35960 + }, + { + "epoch": 0.5425503031765664, + "grad_norm": 0.6436864932596137, + "learning_rate": 8.81122878592116e-06, + "loss": 0.9867, + "step": 35970 + }, + { + "epoch": 0.5427011372892093, + "grad_norm": 0.6917538117882185, + "learning_rate": 8.806476403747191e-06, + "loss": 0.9724, + "step": 35980 + }, + { + "epoch": 0.5428519714018523, + "grad_norm": 0.6723733109058107, + "learning_rate": 8.801724295011787e-06, + "loss": 0.9777, + "step": 35990 + }, + { + "epoch": 0.5430028055144952, + "grad_norm": 0.632112410822808, + "learning_rate": 8.796972460803664e-06, + "loss": 0.9982, + "step": 36000 + }, + { + "epoch": 0.543153639627138, + "grad_norm": 0.7299556533620264, + "learning_rate": 8.792220902211482e-06, + "loss": 0.9978, + "step": 36010 + }, + { + "epoch": 0.543304473739781, + "grad_norm": 0.6585961117860362, + "learning_rate": 8.787469620323827e-06, + "loss": 0.9862, + "step": 36020 + }, + { + "epoch": 0.5434553078524239, + "grad_norm": 0.6886020673666795, + "learning_rate": 8.78271861622923e-06, + "loss": 1.0097, + "step": 36030 + }, + { + "epoch": 0.5436061419650668, + "grad_norm": 0.6675327755379649, + "learning_rate": 8.777967891016152e-06, + "loss": 0.9859, + "step": 36040 + }, + { + "epoch": 0.5437569760777098, + "grad_norm": 0.6721021959648399, + "learning_rate": 8.773217445772992e-06, + "loss": 0.9785, + "step": 36050 + }, + { + "epoch": 0.5439078101903526, + "grad_norm": 0.6507225176692242, + "learning_rate": 8.768467281588096e-06, + "loss": 0.9869, + "step": 36060 + }, + { + "epoch": 0.5440586443029956, + "grad_norm": 0.6369855519097577, + "learning_rate": 8.763717399549728e-06, + "loss": 0.979, + "step": 36070 + }, + { + "epoch": 0.5442094784156385, + "grad_norm": 0.6317867300547103, + "learning_rate": 8.758967800746093e-06, + "loss": 0.9692, + "step": 36080 + }, + { + "epoch": 0.5443603125282814, + "grad_norm": 0.681791692301512, + "learning_rate": 8.754218486265341e-06, + "loss": 0.9666, + "step": 36090 + }, + { + "epoch": 0.5445111466409243, + "grad_norm": 0.6320705709957809, + "learning_rate": 8.749469457195542e-06, + "loss": 1.0153, + "step": 36100 + }, + { + "epoch": 0.5446619807535672, + "grad_norm": 0.6486324081285433, + "learning_rate": 8.74472071462471e-06, + "loss": 0.9855, + "step": 36110 + }, + { + "epoch": 0.5448128148662101, + "grad_norm": 0.677626460877084, + "learning_rate": 8.739972259640794e-06, + "loss": 0.9945, + "step": 36120 + }, + { + "epoch": 0.5449636489788531, + "grad_norm": 0.6613721803777721, + "learning_rate": 8.735224093331672e-06, + "loss": 0.9671, + "step": 36130 + }, + { + "epoch": 0.545114483091496, + "grad_norm": 0.6448334292097488, + "learning_rate": 8.730476216785154e-06, + "loss": 0.9813, + "step": 36140 + }, + { + "epoch": 0.5452653172041388, + "grad_norm": 0.6720142534972895, + "learning_rate": 8.725728631088997e-06, + "loss": 0.978, + "step": 36150 + }, + { + "epoch": 0.5454161513167818, + "grad_norm": 0.6770739281381478, + "learning_rate": 8.720981337330871e-06, + "loss": 0.989, + "step": 36160 + }, + { + "epoch": 0.5455669854294247, + "grad_norm": 0.7287451370993088, + "learning_rate": 8.716234336598402e-06, + "loss": 0.9625, + "step": 36170 + }, + { + "epoch": 0.5457178195420677, + "grad_norm": 0.6863424922265978, + "learning_rate": 8.711487629979129e-06, + "loss": 0.9794, + "step": 36180 + }, + { + "epoch": 0.5458686536547106, + "grad_norm": 0.6376556304908558, + "learning_rate": 8.706741218560531e-06, + "loss": 0.9737, + "step": 36190 + }, + { + "epoch": 0.5460194877673534, + "grad_norm": 0.7092539382987827, + "learning_rate": 8.701995103430025e-06, + "loss": 0.9991, + "step": 36200 + }, + { + "epoch": 0.5461703218799964, + "grad_norm": 0.6776679567793248, + "learning_rate": 8.697249285674954e-06, + "loss": 0.9733, + "step": 36210 + }, + { + "epoch": 0.5463211559926393, + "grad_norm": 0.6613786212732219, + "learning_rate": 8.69250376638259e-06, + "loss": 0.9918, + "step": 36220 + }, + { + "epoch": 0.5464719901052822, + "grad_norm": 0.6587923974532345, + "learning_rate": 8.687758546640146e-06, + "loss": 0.9745, + "step": 36230 + }, + { + "epoch": 0.5466228242179251, + "grad_norm": 0.6638212710967369, + "learning_rate": 8.683013627534759e-06, + "loss": 0.9677, + "step": 36240 + }, + { + "epoch": 0.546773658330568, + "grad_norm": 0.6310450846912138, + "learning_rate": 8.678269010153496e-06, + "loss": 0.9708, + "step": 36250 + }, + { + "epoch": 0.546924492443211, + "grad_norm": 0.6453451915696615, + "learning_rate": 8.673524695583365e-06, + "loss": 0.9853, + "step": 36260 + }, + { + "epoch": 0.5470753265558539, + "grad_norm": 0.653088943377878, + "learning_rate": 8.66878068491129e-06, + "loss": 0.9723, + "step": 36270 + }, + { + "epoch": 0.5472261606684968, + "grad_norm": 0.6220204780074279, + "learning_rate": 8.664036979224137e-06, + "loss": 0.9849, + "step": 36280 + }, + { + "epoch": 0.5473769947811397, + "grad_norm": 0.7817793553967688, + "learning_rate": 8.659293579608696e-06, + "loss": 0.9889, + "step": 36290 + }, + { + "epoch": 0.5475278288937826, + "grad_norm": 0.6809254741052395, + "learning_rate": 8.65455048715169e-06, + "loss": 0.9911, + "step": 36300 + }, + { + "epoch": 0.5476786630064255, + "grad_norm": 0.6535945186598244, + "learning_rate": 8.649807702939777e-06, + "loss": 0.9836, + "step": 36310 + }, + { + "epoch": 0.5478294971190685, + "grad_norm": 0.6709579109453515, + "learning_rate": 8.64506522805953e-06, + "loss": 0.9947, + "step": 36320 + }, + { + "epoch": 0.5479803312317114, + "grad_norm": 0.6712917691175857, + "learning_rate": 8.640323063597459e-06, + "loss": 0.9964, + "step": 36330 + }, + { + "epoch": 0.5481311653443542, + "grad_norm": 0.6422957282056251, + "learning_rate": 8.63558121064001e-06, + "loss": 0.9739, + "step": 36340 + }, + { + "epoch": 0.5482819994569972, + "grad_norm": 0.6256890520646129, + "learning_rate": 8.630839670273541e-06, + "loss": 0.9885, + "step": 36350 + }, + { + "epoch": 0.5484328335696401, + "grad_norm": 0.680748831256748, + "learning_rate": 8.626098443584356e-06, + "loss": 1.003, + "step": 36360 + }, + { + "epoch": 0.5485836676822831, + "grad_norm": 0.6546097683031311, + "learning_rate": 8.621357531658677e-06, + "loss": 0.9856, + "step": 36370 + }, + { + "epoch": 0.5487345017949259, + "grad_norm": 0.7020410222954778, + "learning_rate": 8.616616935582656e-06, + "loss": 0.9752, + "step": 36380 + }, + { + "epoch": 0.5488853359075688, + "grad_norm": 0.6463356877730996, + "learning_rate": 8.611876656442371e-06, + "loss": 0.997, + "step": 36390 + }, + { + "epoch": 0.5490361700202118, + "grad_norm": 0.6562866312392998, + "learning_rate": 8.607136695323832e-06, + "loss": 0.9937, + "step": 36400 + }, + { + "epoch": 0.5491870041328547, + "grad_norm": 0.6553945482158111, + "learning_rate": 8.602397053312971e-06, + "loss": 0.9922, + "step": 36410 + }, + { + "epoch": 0.5493378382454976, + "grad_norm": 0.6193107544143517, + "learning_rate": 8.597657731495649e-06, + "loss": 0.9761, + "step": 36420 + }, + { + "epoch": 0.5494886723581405, + "grad_norm": 0.6332864978181859, + "learning_rate": 8.592918730957657e-06, + "loss": 0.9886, + "step": 36430 + }, + { + "epoch": 0.5496395064707834, + "grad_norm": 0.67911539785545, + "learning_rate": 8.588180052784702e-06, + "loss": 0.9967, + "step": 36440 + }, + { + "epoch": 0.5497903405834264, + "grad_norm": 0.672946305639559, + "learning_rate": 8.583441698062434e-06, + "loss": 0.9758, + "step": 36450 + }, + { + "epoch": 0.5499411746960693, + "grad_norm": 0.6727699536625196, + "learning_rate": 8.578703667876415e-06, + "loss": 0.9889, + "step": 36460 + }, + { + "epoch": 0.5500920088087122, + "grad_norm": 0.6481904749241949, + "learning_rate": 8.573965963312132e-06, + "loss": 0.9809, + "step": 36470 + }, + { + "epoch": 0.5502428429213551, + "grad_norm": 0.6202655052563928, + "learning_rate": 8.569228585455012e-06, + "loss": 0.9869, + "step": 36480 + }, + { + "epoch": 0.550393677033998, + "grad_norm": 0.670951031124557, + "learning_rate": 8.564491535390391e-06, + "loss": 0.985, + "step": 36490 + }, + { + "epoch": 0.5505445111466409, + "grad_norm": 0.6579741372166196, + "learning_rate": 8.559754814203534e-06, + "loss": 0.9734, + "step": 36500 + }, + { + "epoch": 0.5506953452592839, + "grad_norm": 0.6597635884852543, + "learning_rate": 8.555018422979639e-06, + "loss": 0.9846, + "step": 36510 + }, + { + "epoch": 0.5508461793719267, + "grad_norm": 0.6552611306752376, + "learning_rate": 8.55028236280382e-06, + "loss": 0.9998, + "step": 36520 + }, + { + "epoch": 0.5509970134845696, + "grad_norm": 0.6797079022413436, + "learning_rate": 8.545546634761114e-06, + "loss": 0.9873, + "step": 36530 + }, + { + "epoch": 0.5511478475972126, + "grad_norm": 0.6998934630524619, + "learning_rate": 8.540811239936493e-06, + "loss": 0.9608, + "step": 36540 + }, + { + "epoch": 0.5512986817098555, + "grad_norm": 0.6930348910208677, + "learning_rate": 8.53607617941484e-06, + "loss": 0.9734, + "step": 36550 + }, + { + "epoch": 0.5514495158224985, + "grad_norm": 0.6594257430716265, + "learning_rate": 8.531341454280966e-06, + "loss": 0.9802, + "step": 36560 + }, + { + "epoch": 0.5516003499351413, + "grad_norm": 0.6858160345960216, + "learning_rate": 8.52660706561961e-06, + "loss": 1.0041, + "step": 36570 + }, + { + "epoch": 0.5517511840477842, + "grad_norm": 0.746759362753791, + "learning_rate": 8.521873014515422e-06, + "loss": 0.989, + "step": 36580 + }, + { + "epoch": 0.5519020181604272, + "grad_norm": 0.6598406382036143, + "learning_rate": 8.517139302052993e-06, + "loss": 0.9821, + "step": 36590 + }, + { + "epoch": 0.5520528522730701, + "grad_norm": 0.6943752670616655, + "learning_rate": 8.512405929316816e-06, + "loss": 0.9699, + "step": 36600 + }, + { + "epoch": 0.552203686385713, + "grad_norm": 0.6237629723925183, + "learning_rate": 8.50767289739132e-06, + "loss": 0.9525, + "step": 36610 + }, + { + "epoch": 0.5523545204983559, + "grad_norm": 0.6556525153614388, + "learning_rate": 8.50294020736085e-06, + "loss": 0.9628, + "step": 36620 + }, + { + "epoch": 0.5525053546109988, + "grad_norm": 0.7992426846706916, + "learning_rate": 8.49820786030968e-06, + "loss": 1.0, + "step": 36630 + }, + { + "epoch": 0.5526561887236418, + "grad_norm": 0.6812237091177443, + "learning_rate": 8.493475857321991e-06, + "loss": 0.9858, + "step": 36640 + }, + { + "epoch": 0.5528070228362847, + "grad_norm": 0.7868610333511743, + "learning_rate": 8.488744199481905e-06, + "loss": 0.9908, + "step": 36650 + }, + { + "epoch": 0.5529578569489275, + "grad_norm": 0.6196194692606778, + "learning_rate": 8.484012887873444e-06, + "loss": 0.978, + "step": 36660 + }, + { + "epoch": 0.5531086910615705, + "grad_norm": 0.6593990197797018, + "learning_rate": 8.479281923580562e-06, + "loss": 0.9762, + "step": 36670 + }, + { + "epoch": 0.5532595251742134, + "grad_norm": 0.6474372039335735, + "learning_rate": 8.474551307687138e-06, + "loss": 0.9899, + "step": 36680 + }, + { + "epoch": 0.5534103592868563, + "grad_norm": 0.6386985601845132, + "learning_rate": 8.46982104127696e-06, + "loss": 0.9698, + "step": 36690 + }, + { + "epoch": 0.5535611933994993, + "grad_norm": 0.6877257075172405, + "learning_rate": 8.465091125433741e-06, + "loss": 0.988, + "step": 36700 + }, + { + "epoch": 0.5537120275121421, + "grad_norm": 0.6981355314292181, + "learning_rate": 8.46036156124112e-06, + "loss": 0.9653, + "step": 36710 + }, + { + "epoch": 0.553862861624785, + "grad_norm": 0.6582977316852388, + "learning_rate": 8.45563234978264e-06, + "loss": 0.9648, + "step": 36720 + }, + { + "epoch": 0.554013695737428, + "grad_norm": 0.6707249065267673, + "learning_rate": 8.450903492141783e-06, + "loss": 1.0026, + "step": 36730 + }, + { + "epoch": 0.5541645298500709, + "grad_norm": 0.720535312647766, + "learning_rate": 8.446174989401934e-06, + "loss": 0.9869, + "step": 36740 + }, + { + "epoch": 0.5543153639627139, + "grad_norm": 0.6447701698797212, + "learning_rate": 8.441446842646397e-06, + "loss": 0.9825, + "step": 36750 + }, + { + "epoch": 0.5544661980753567, + "grad_norm": 0.7041102491101329, + "learning_rate": 8.43671905295841e-06, + "loss": 0.9819, + "step": 36760 + }, + { + "epoch": 0.5546170321879996, + "grad_norm": 0.7208991221800934, + "learning_rate": 8.431991621421109e-06, + "loss": 0.98, + "step": 36770 + }, + { + "epoch": 0.5547678663006426, + "grad_norm": 0.6845943519875591, + "learning_rate": 8.427264549117565e-06, + "loss": 0.9692, + "step": 36780 + }, + { + "epoch": 0.5549187004132855, + "grad_norm": 0.663564095813663, + "learning_rate": 8.422537837130755e-06, + "loss": 0.9658, + "step": 36790 + }, + { + "epoch": 0.5550695345259283, + "grad_norm": 0.5964196124492046, + "learning_rate": 8.417811486543581e-06, + "loss": 0.956, + "step": 36800 + }, + { + "epoch": 0.5552203686385713, + "grad_norm": 0.643863897101407, + "learning_rate": 8.413085498438855e-06, + "loss": 0.983, + "step": 36810 + }, + { + "epoch": 0.5553712027512142, + "grad_norm": 0.7927892425406978, + "learning_rate": 8.408359873899316e-06, + "loss": 0.9912, + "step": 36820 + }, + { + "epoch": 0.5555220368638571, + "grad_norm": 0.6892192405665364, + "learning_rate": 8.403634614007607e-06, + "loss": 1.0016, + "step": 36830 + }, + { + "epoch": 0.5556728709765001, + "grad_norm": 0.6836407260659523, + "learning_rate": 8.3989097198463e-06, + "loss": 0.9933, + "step": 36840 + }, + { + "epoch": 0.5558237050891429, + "grad_norm": 0.7117557035464027, + "learning_rate": 8.394185192497875e-06, + "loss": 0.9857, + "step": 36850 + }, + { + "epoch": 0.5559745392017859, + "grad_norm": 0.6733697611229219, + "learning_rate": 8.389461033044727e-06, + "loss": 1.0124, + "step": 36860 + }, + { + "epoch": 0.5561253733144288, + "grad_norm": 0.6557008127645165, + "learning_rate": 8.384737242569175e-06, + "loss": 0.9929, + "step": 36870 + }, + { + "epoch": 0.5562762074270717, + "grad_norm": 0.6562798057230994, + "learning_rate": 8.38001382215345e-06, + "loss": 0.9657, + "step": 36880 + }, + { + "epoch": 0.5564270415397147, + "grad_norm": 0.6993223491353763, + "learning_rate": 8.375290772879687e-06, + "loss": 0.9505, + "step": 36890 + }, + { + "epoch": 0.5565778756523575, + "grad_norm": 0.6759535346892942, + "learning_rate": 8.370568095829958e-06, + "loss": 0.9765, + "step": 36900 + }, + { + "epoch": 0.5567287097650004, + "grad_norm": 0.6444708703467873, + "learning_rate": 8.365845792086232e-06, + "loss": 0.9994, + "step": 36910 + }, + { + "epoch": 0.5568795438776434, + "grad_norm": 0.6307766411372328, + "learning_rate": 8.361123862730394e-06, + "loss": 0.987, + "step": 36920 + }, + { + "epoch": 0.5570303779902863, + "grad_norm": 0.6419024000437646, + "learning_rate": 8.356402308844254e-06, + "loss": 0.9934, + "step": 36930 + }, + { + "epoch": 0.5571812121029291, + "grad_norm": 0.8293812247055957, + "learning_rate": 8.351681131509524e-06, + "loss": 0.9957, + "step": 36940 + }, + { + "epoch": 0.5573320462155721, + "grad_norm": 0.6349058471980044, + "learning_rate": 8.346960331807834e-06, + "loss": 0.9534, + "step": 36950 + }, + { + "epoch": 0.557482880328215, + "grad_norm": 0.657866290342281, + "learning_rate": 8.342239910820738e-06, + "loss": 0.9834, + "step": 36960 + }, + { + "epoch": 0.557633714440858, + "grad_norm": 0.6576783779351324, + "learning_rate": 8.33751986962968e-06, + "loss": 0.9786, + "step": 36970 + }, + { + "epoch": 0.5577845485535009, + "grad_norm": 0.6902522474486943, + "learning_rate": 8.332800209316042e-06, + "loss": 0.9734, + "step": 36980 + }, + { + "epoch": 0.5579353826661437, + "grad_norm": 0.6618707179737362, + "learning_rate": 8.328080930961103e-06, + "loss": 0.9822, + "step": 36990 + }, + { + "epoch": 0.5580862167787867, + "grad_norm": 0.8089796157713004, + "learning_rate": 8.323362035646053e-06, + "loss": 1.0154, + "step": 37000 + }, + { + "epoch": 0.5582370508914296, + "grad_norm": 0.6532439993985943, + "learning_rate": 8.318643524452013e-06, + "loss": 0.9603, + "step": 37010 + }, + { + "epoch": 0.5583878850040725, + "grad_norm": 0.6586093199779903, + "learning_rate": 8.31392539845999e-06, + "loss": 0.9719, + "step": 37020 + }, + { + "epoch": 0.5585387191167155, + "grad_norm": 0.626103698380508, + "learning_rate": 8.309207658750924e-06, + "loss": 0.9892, + "step": 37030 + }, + { + "epoch": 0.5586895532293583, + "grad_norm": 0.6533061679019754, + "learning_rate": 8.304490306405655e-06, + "loss": 0.9892, + "step": 37040 + }, + { + "epoch": 0.5588403873420013, + "grad_norm": 0.6517791927288192, + "learning_rate": 8.299773342504939e-06, + "loss": 0.9696, + "step": 37050 + }, + { + "epoch": 0.5589912214546442, + "grad_norm": 0.7224475529414004, + "learning_rate": 8.295056768129439e-06, + "loss": 0.997, + "step": 37060 + }, + { + "epoch": 0.5591420555672871, + "grad_norm": 0.6813875376845967, + "learning_rate": 8.290340584359735e-06, + "loss": 0.9681, + "step": 37070 + }, + { + "epoch": 0.55929288967993, + "grad_norm": 0.6774117928583988, + "learning_rate": 8.285624792276312e-06, + "loss": 0.9637, + "step": 37080 + }, + { + "epoch": 0.5594437237925729, + "grad_norm": 0.7074495950654826, + "learning_rate": 8.280909392959564e-06, + "loss": 0.9828, + "step": 37090 + }, + { + "epoch": 0.5595945579052158, + "grad_norm": 0.6678513523793144, + "learning_rate": 8.276194387489805e-06, + "loss": 1.0051, + "step": 37100 + }, + { + "epoch": 0.5597453920178588, + "grad_norm": 0.6243534024956172, + "learning_rate": 8.271479776947244e-06, + "loss": 0.969, + "step": 37110 + }, + { + "epoch": 0.5598962261305017, + "grad_norm": 0.6625450221090075, + "learning_rate": 8.266765562412012e-06, + "loss": 0.9831, + "step": 37120 + }, + { + "epoch": 0.5600470602431445, + "grad_norm": 0.6885452218507191, + "learning_rate": 8.262051744964148e-06, + "loss": 0.9653, + "step": 37130 + }, + { + "epoch": 0.5601978943557875, + "grad_norm": 0.885696380534731, + "learning_rate": 8.257338325683587e-06, + "loss": 0.9851, + "step": 37140 + }, + { + "epoch": 0.5603487284684304, + "grad_norm": 0.8018040675358407, + "learning_rate": 8.252625305650194e-06, + "loss": 0.9668, + "step": 37150 + }, + { + "epoch": 0.5604995625810734, + "grad_norm": 0.652530232900422, + "learning_rate": 8.247912685943724e-06, + "loss": 0.9591, + "step": 37160 + }, + { + "epoch": 0.5606503966937163, + "grad_norm": 0.6622824626466395, + "learning_rate": 8.243200467643845e-06, + "loss": 0.9873, + "step": 37170 + }, + { + "epoch": 0.5608012308063591, + "grad_norm": 0.6421521629657899, + "learning_rate": 8.238488651830144e-06, + "loss": 0.9791, + "step": 37180 + }, + { + "epoch": 0.5609520649190021, + "grad_norm": 0.7105385773208408, + "learning_rate": 8.2337772395821e-06, + "loss": 0.9723, + "step": 37190 + }, + { + "epoch": 0.561102899031645, + "grad_norm": 0.7344906462767874, + "learning_rate": 8.229066231979103e-06, + "loss": 0.9717, + "step": 37200 + }, + { + "epoch": 0.561253733144288, + "grad_norm": 0.7380768776072117, + "learning_rate": 8.224355630100466e-06, + "loss": 0.9712, + "step": 37210 + }, + { + "epoch": 0.5614045672569308, + "grad_norm": 0.6681868733774989, + "learning_rate": 8.219645435025393e-06, + "loss": 0.976, + "step": 37220 + }, + { + "epoch": 0.5615554013695737, + "grad_norm": 0.6778810023828156, + "learning_rate": 8.21493564783299e-06, + "loss": 1.0247, + "step": 37230 + }, + { + "epoch": 0.5617062354822167, + "grad_norm": 0.6613994354565734, + "learning_rate": 8.210226269602289e-06, + "loss": 0.9667, + "step": 37240 + }, + { + "epoch": 0.5618570695948596, + "grad_norm": 0.8067552209811126, + "learning_rate": 8.20551730141221e-06, + "loss": 0.9985, + "step": 37250 + }, + { + "epoch": 0.5620079037075025, + "grad_norm": 0.6519478703256023, + "learning_rate": 8.200808744341595e-06, + "loss": 0.9956, + "step": 37260 + }, + { + "epoch": 0.5621587378201454, + "grad_norm": 0.6609325249667906, + "learning_rate": 8.196100599469175e-06, + "loss": 0.9766, + "step": 37270 + }, + { + "epoch": 0.5623095719327883, + "grad_norm": 0.6470746659992127, + "learning_rate": 8.191392867873599e-06, + "loss": 0.9869, + "step": 37280 + }, + { + "epoch": 0.5624604060454312, + "grad_norm": 0.6616471552152895, + "learning_rate": 8.186685550633418e-06, + "loss": 0.9831, + "step": 37290 + }, + { + "epoch": 0.5626112401580742, + "grad_norm": 0.6719625250422645, + "learning_rate": 8.181978648827087e-06, + "loss": 0.9735, + "step": 37300 + }, + { + "epoch": 0.5627620742707171, + "grad_norm": 0.6765642202458985, + "learning_rate": 8.177272163532964e-06, + "loss": 0.9894, + "step": 37310 + }, + { + "epoch": 0.56291290838336, + "grad_norm": 0.6421572609271472, + "learning_rate": 8.172566095829319e-06, + "loss": 0.9857, + "step": 37320 + }, + { + "epoch": 0.5630637424960029, + "grad_norm": 0.6798308360553084, + "learning_rate": 8.16786044679432e-06, + "loss": 0.9716, + "step": 37330 + }, + { + "epoch": 0.5632145766086458, + "grad_norm": 0.6422098987862057, + "learning_rate": 8.163155217506033e-06, + "loss": 0.984, + "step": 37340 + }, + { + "epoch": 0.5633654107212888, + "grad_norm": 0.6825795134672724, + "learning_rate": 8.158450409042446e-06, + "loss": 0.9848, + "step": 37350 + }, + { + "epoch": 0.5635162448339316, + "grad_norm": 0.6381668063420564, + "learning_rate": 8.15374602248143e-06, + "loss": 0.991, + "step": 37360 + }, + { + "epoch": 0.5636670789465745, + "grad_norm": 0.6578211763150443, + "learning_rate": 8.149042058900773e-06, + "loss": 0.9825, + "step": 37370 + }, + { + "epoch": 0.5638179130592175, + "grad_norm": 0.6279286239489692, + "learning_rate": 8.144338519378165e-06, + "loss": 0.9627, + "step": 37380 + }, + { + "epoch": 0.5639687471718604, + "grad_norm": 0.6731113709478409, + "learning_rate": 8.13963540499119e-06, + "loss": 0.9803, + "step": 37390 + }, + { + "epoch": 0.5641195812845033, + "grad_norm": 0.6824451191353358, + "learning_rate": 8.134932716817349e-06, + "loss": 0.9865, + "step": 37400 + }, + { + "epoch": 0.5642704153971462, + "grad_norm": 0.6313714776068432, + "learning_rate": 8.130230455934033e-06, + "loss": 0.9744, + "step": 37410 + }, + { + "epoch": 0.5644212495097891, + "grad_norm": 0.6558018259639367, + "learning_rate": 8.125528623418533e-06, + "loss": 0.9832, + "step": 37420 + }, + { + "epoch": 0.564572083622432, + "grad_norm": 0.6387932153987497, + "learning_rate": 8.120827220348058e-06, + "loss": 0.9786, + "step": 37430 + }, + { + "epoch": 0.564722917735075, + "grad_norm": 0.6984912360839056, + "learning_rate": 8.116126247799702e-06, + "loss": 0.9864, + "step": 37440 + }, + { + "epoch": 0.5648737518477179, + "grad_norm": 0.7435699923222032, + "learning_rate": 8.111425706850467e-06, + "loss": 0.9897, + "step": 37450 + }, + { + "epoch": 0.5650245859603608, + "grad_norm": 0.6672352993166283, + "learning_rate": 8.106725598577259e-06, + "loss": 0.9778, + "step": 37460 + }, + { + "epoch": 0.5651754200730037, + "grad_norm": 0.6938628802525852, + "learning_rate": 8.102025924056882e-06, + "loss": 0.9974, + "step": 37470 + }, + { + "epoch": 0.5653262541856466, + "grad_norm": 0.674524845263591, + "learning_rate": 8.097326684366034e-06, + "loss": 0.9786, + "step": 37480 + }, + { + "epoch": 0.5654770882982896, + "grad_norm": 0.666565233465427, + "learning_rate": 8.09262788058133e-06, + "loss": 0.9745, + "step": 37490 + }, + { + "epoch": 0.5656279224109324, + "grad_norm": 0.6259986307625391, + "learning_rate": 8.08792951377927e-06, + "loss": 0.9613, + "step": 37500 + }, + { + "epoch": 0.5657787565235753, + "grad_norm": 0.6438339270251596, + "learning_rate": 8.083231585036254e-06, + "loss": 0.975, + "step": 37510 + }, + { + "epoch": 0.5659295906362183, + "grad_norm": 0.6729317803877471, + "learning_rate": 8.078534095428594e-06, + "loss": 0.9944, + "step": 37520 + }, + { + "epoch": 0.5660804247488612, + "grad_norm": 0.6436013779809048, + "learning_rate": 8.073837046032492e-06, + "loss": 0.9799, + "step": 37530 + }, + { + "epoch": 0.5662312588615042, + "grad_norm": 0.6664943709563832, + "learning_rate": 8.06914043792405e-06, + "loss": 0.9798, + "step": 37540 + }, + { + "epoch": 0.566382092974147, + "grad_norm": 0.7649928636655018, + "learning_rate": 8.064444272179274e-06, + "loss": 0.9981, + "step": 37550 + }, + { + "epoch": 0.5665329270867899, + "grad_norm": 0.7192919116138257, + "learning_rate": 8.059748549874055e-06, + "loss": 0.9711, + "step": 37560 + }, + { + "epoch": 0.5666837611994329, + "grad_norm": 0.6410667154089172, + "learning_rate": 8.055053272084203e-06, + "loss": 0.9847, + "step": 37570 + }, + { + "epoch": 0.5668345953120758, + "grad_norm": 0.6615021095247932, + "learning_rate": 8.050358439885413e-06, + "loss": 0.984, + "step": 37580 + }, + { + "epoch": 0.5669854294247187, + "grad_norm": 0.6486900972937573, + "learning_rate": 8.04566405435327e-06, + "loss": 0.9901, + "step": 37590 + }, + { + "epoch": 0.5671362635373616, + "grad_norm": 0.6316255491269556, + "learning_rate": 8.04097011656328e-06, + "loss": 0.9787, + "step": 37600 + }, + { + "epoch": 0.5672870976500045, + "grad_norm": 0.7418088434906588, + "learning_rate": 8.036276627590826e-06, + "loss": 1.0006, + "step": 37610 + }, + { + "epoch": 0.5674379317626475, + "grad_norm": 0.6335398671360087, + "learning_rate": 8.031583588511195e-06, + "loss": 0.9621, + "step": 37620 + }, + { + "epoch": 0.5675887658752904, + "grad_norm": 0.6338560283527429, + "learning_rate": 8.026891000399577e-06, + "loss": 0.9737, + "step": 37630 + }, + { + "epoch": 0.5677395999879332, + "grad_norm": 0.6974003521197841, + "learning_rate": 8.022198864331046e-06, + "loss": 0.984, + "step": 37640 + }, + { + "epoch": 0.5678904341005762, + "grad_norm": 0.6745160839906473, + "learning_rate": 8.017507181380588e-06, + "loss": 0.9792, + "step": 37650 + }, + { + "epoch": 0.5680412682132191, + "grad_norm": 0.6343669577909842, + "learning_rate": 8.01281595262307e-06, + "loss": 0.986, + "step": 37660 + }, + { + "epoch": 0.568192102325862, + "grad_norm": 0.717808876800018, + "learning_rate": 8.008125179133257e-06, + "loss": 0.9793, + "step": 37670 + }, + { + "epoch": 0.568342936438505, + "grad_norm": 0.6348388003299347, + "learning_rate": 8.003434861985828e-06, + "loss": 0.9884, + "step": 37680 + }, + { + "epoch": 0.5684937705511478, + "grad_norm": 0.6644534232148366, + "learning_rate": 7.998745002255334e-06, + "loss": 0.9659, + "step": 37690 + }, + { + "epoch": 0.5686446046637907, + "grad_norm": 0.6282342475882019, + "learning_rate": 7.994055601016231e-06, + "loss": 0.9798, + "step": 37700 + }, + { + "epoch": 0.5687954387764337, + "grad_norm": 0.6707282946141898, + "learning_rate": 7.989366659342874e-06, + "loss": 0.9772, + "step": 37710 + }, + { + "epoch": 0.5689462728890766, + "grad_norm": 0.6780815413051028, + "learning_rate": 7.984678178309509e-06, + "loss": 0.9743, + "step": 37720 + }, + { + "epoch": 0.5690971070017196, + "grad_norm": 0.673534065965594, + "learning_rate": 7.979990158990267e-06, + "loss": 0.9734, + "step": 37730 + }, + { + "epoch": 0.5692479411143624, + "grad_norm": 0.6599026851909178, + "learning_rate": 7.975302602459196e-06, + "loss": 0.9819, + "step": 37740 + }, + { + "epoch": 0.5693987752270053, + "grad_norm": 0.6742452185218495, + "learning_rate": 7.970615509790215e-06, + "loss": 0.982, + "step": 37750 + }, + { + "epoch": 0.5695496093396483, + "grad_norm": 0.6463845574955099, + "learning_rate": 7.965928882057146e-06, + "loss": 0.9746, + "step": 37760 + }, + { + "epoch": 0.5697004434522912, + "grad_norm": 0.6790415112880547, + "learning_rate": 7.961242720333707e-06, + "loss": 0.9695, + "step": 37770 + }, + { + "epoch": 0.569851277564934, + "grad_norm": 0.6933176875985063, + "learning_rate": 7.956557025693508e-06, + "loss": 0.984, + "step": 37780 + }, + { + "epoch": 0.570002111677577, + "grad_norm": 0.6777519010214053, + "learning_rate": 7.951871799210048e-06, + "loss": 0.9831, + "step": 37790 + }, + { + "epoch": 0.5701529457902199, + "grad_norm": 0.7037773370538696, + "learning_rate": 7.947187041956725e-06, + "loss": 1.0124, + "step": 37800 + }, + { + "epoch": 0.5703037799028629, + "grad_norm": 0.6686624655954352, + "learning_rate": 7.94250275500682e-06, + "loss": 0.9565, + "step": 37810 + }, + { + "epoch": 0.5704546140155058, + "grad_norm": 0.8164925482349162, + "learning_rate": 7.93781893943352e-06, + "loss": 0.9901, + "step": 37820 + }, + { + "epoch": 0.5706054481281486, + "grad_norm": 0.719782581128482, + "learning_rate": 7.933135596309892e-06, + "loss": 0.9886, + "step": 37830 + }, + { + "epoch": 0.5707562822407916, + "grad_norm": 0.6731853199831647, + "learning_rate": 7.928452726708896e-06, + "loss": 0.9926, + "step": 37840 + }, + { + "epoch": 0.5709071163534345, + "grad_norm": 0.6579048205173011, + "learning_rate": 7.923770331703391e-06, + "loss": 0.9674, + "step": 37850 + }, + { + "epoch": 0.5710579504660774, + "grad_norm": 0.676056218970864, + "learning_rate": 7.91908841236612e-06, + "loss": 0.9744, + "step": 37860 + }, + { + "epoch": 0.5712087845787204, + "grad_norm": 0.6450352395203557, + "learning_rate": 7.91440696976972e-06, + "loss": 0.9757, + "step": 37870 + }, + { + "epoch": 0.5713596186913632, + "grad_norm": 0.6460787269741389, + "learning_rate": 7.909726004986723e-06, + "loss": 0.9862, + "step": 37880 + }, + { + "epoch": 0.5715104528040061, + "grad_norm": 0.6425654566846252, + "learning_rate": 7.905045519089543e-06, + "loss": 0.99, + "step": 37890 + }, + { + "epoch": 0.5716612869166491, + "grad_norm": 0.680331086955652, + "learning_rate": 7.900365513150487e-06, + "loss": 0.9987, + "step": 37900 + }, + { + "epoch": 0.571812121029292, + "grad_norm": 0.7068184771590881, + "learning_rate": 7.895685988241758e-06, + "loss": 0.9908, + "step": 37910 + }, + { + "epoch": 0.5719629551419348, + "grad_norm": 0.6534280853110656, + "learning_rate": 7.891006945435437e-06, + "loss": 0.9715, + "step": 37920 + }, + { + "epoch": 0.5721137892545778, + "grad_norm": 0.6366744863794699, + "learning_rate": 7.886328385803512e-06, + "loss": 0.9738, + "step": 37930 + }, + { + "epoch": 0.5722646233672207, + "grad_norm": 0.6517941416367123, + "learning_rate": 7.881650310417842e-06, + "loss": 0.9902, + "step": 37940 + }, + { + "epoch": 0.5724154574798637, + "grad_norm": 0.6883019969517563, + "learning_rate": 7.876972720350187e-06, + "loss": 0.9663, + "step": 37950 + }, + { + "epoch": 0.5725662915925066, + "grad_norm": 0.6769699293098675, + "learning_rate": 7.872295616672187e-06, + "loss": 0.9929, + "step": 37960 + }, + { + "epoch": 0.5727171257051494, + "grad_norm": 0.7374412802522848, + "learning_rate": 7.867619000455385e-06, + "loss": 0.9748, + "step": 37970 + }, + { + "epoch": 0.5728679598177924, + "grad_norm": 0.6802043984724869, + "learning_rate": 7.86294287277119e-06, + "loss": 0.9823, + "step": 37980 + }, + { + "epoch": 0.5730187939304353, + "grad_norm": 0.6736671212201681, + "learning_rate": 7.858267234690926e-06, + "loss": 0.9781, + "step": 37990 + }, + { + "epoch": 0.5731696280430782, + "grad_norm": 0.645782003778104, + "learning_rate": 7.85359208728578e-06, + "loss": 0.9712, + "step": 38000 + }, + { + "epoch": 0.5733204621557212, + "grad_norm": 0.6115476685156008, + "learning_rate": 7.84891743162684e-06, + "loss": 0.9906, + "step": 38010 + }, + { + "epoch": 0.573471296268364, + "grad_norm": 0.6852358313030044, + "learning_rate": 7.844243268785078e-06, + "loss": 0.9678, + "step": 38020 + }, + { + "epoch": 0.573622130381007, + "grad_norm": 0.6462356423868727, + "learning_rate": 7.839569599831361e-06, + "loss": 0.9762, + "step": 38030 + }, + { + "epoch": 0.5737729644936499, + "grad_norm": 0.6906537515782719, + "learning_rate": 7.834896425836423e-06, + "loss": 0.9814, + "step": 38040 + }, + { + "epoch": 0.5739237986062928, + "grad_norm": 0.7146655205552549, + "learning_rate": 7.830223747870908e-06, + "loss": 0.979, + "step": 38050 + }, + { + "epoch": 0.5740746327189357, + "grad_norm": 0.725469508814457, + "learning_rate": 7.82555156700533e-06, + "loss": 0.9685, + "step": 38060 + }, + { + "epoch": 0.5742254668315786, + "grad_norm": 0.6465048404894566, + "learning_rate": 7.820879884310099e-06, + "loss": 0.9634, + "step": 38070 + }, + { + "epoch": 0.5743763009442215, + "grad_norm": 0.7399050069147649, + "learning_rate": 7.816208700855505e-06, + "loss": 0.9854, + "step": 38080 + }, + { + "epoch": 0.5745271350568645, + "grad_norm": 0.6357190194791986, + "learning_rate": 7.811538017711718e-06, + "loss": 0.9751, + "step": 38090 + }, + { + "epoch": 0.5746779691695074, + "grad_norm": 0.7057973810458971, + "learning_rate": 7.80686783594881e-06, + "loss": 0.9878, + "step": 38100 + }, + { + "epoch": 0.5748288032821502, + "grad_norm": 0.7153941822101646, + "learning_rate": 7.802198156636727e-06, + "loss": 0.9798, + "step": 38110 + }, + { + "epoch": 0.5749796373947932, + "grad_norm": 0.6449040891085643, + "learning_rate": 7.797528980845294e-06, + "loss": 0.9815, + "step": 38120 + }, + { + "epoch": 0.5751304715074361, + "grad_norm": 0.6466522512475931, + "learning_rate": 7.792860309644241e-06, + "loss": 0.9924, + "step": 38130 + }, + { + "epoch": 0.5752813056200791, + "grad_norm": 0.6308203225825899, + "learning_rate": 7.788192144103162e-06, + "loss": 0.9835, + "step": 38140 + }, + { + "epoch": 0.575432139732722, + "grad_norm": 0.6907858760876966, + "learning_rate": 7.783524485291538e-06, + "loss": 0.9963, + "step": 38150 + }, + { + "epoch": 0.5755829738453648, + "grad_norm": 0.6682709546605107, + "learning_rate": 7.77885733427875e-06, + "loss": 0.9918, + "step": 38160 + }, + { + "epoch": 0.5757338079580078, + "grad_norm": 0.6938362511495859, + "learning_rate": 7.774190692134045e-06, + "loss": 0.9716, + "step": 38170 + }, + { + "epoch": 0.5758846420706507, + "grad_norm": 0.67844712443717, + "learning_rate": 7.769524559926554e-06, + "loss": 1.0017, + "step": 38180 + }, + { + "epoch": 0.5760354761832936, + "grad_norm": 0.6746222255165766, + "learning_rate": 7.764858938725307e-06, + "loss": 0.9715, + "step": 38190 + }, + { + "epoch": 0.5761863102959365, + "grad_norm": 0.7064442098533563, + "learning_rate": 7.760193829599203e-06, + "loss": 0.9791, + "step": 38200 + }, + { + "epoch": 0.5763371444085794, + "grad_norm": 0.6458833606691698, + "learning_rate": 7.755529233617028e-06, + "loss": 0.9776, + "step": 38210 + }, + { + "epoch": 0.5764879785212224, + "grad_norm": 0.6166699293290573, + "learning_rate": 7.75086515184745e-06, + "loss": 0.9642, + "step": 38220 + }, + { + "epoch": 0.5766388126338653, + "grad_norm": 0.6871848632105484, + "learning_rate": 7.746201585359015e-06, + "loss": 0.9763, + "step": 38230 + }, + { + "epoch": 0.5767896467465082, + "grad_norm": 0.7058909247436673, + "learning_rate": 7.741538535220163e-06, + "loss": 0.9777, + "step": 38240 + }, + { + "epoch": 0.5769404808591511, + "grad_norm": 0.6396333815208027, + "learning_rate": 7.736876002499203e-06, + "loss": 0.9861, + "step": 38250 + }, + { + "epoch": 0.577091314971794, + "grad_norm": 0.7096438197976448, + "learning_rate": 7.732213988264327e-06, + "loss": 0.9741, + "step": 38260 + }, + { + "epoch": 0.5772421490844369, + "grad_norm": 0.6305148681139008, + "learning_rate": 7.727552493583617e-06, + "loss": 0.9712, + "step": 38270 + }, + { + "epoch": 0.5773929831970799, + "grad_norm": 0.6496921738968371, + "learning_rate": 7.722891519525031e-06, + "loss": 0.9853, + "step": 38280 + }, + { + "epoch": 0.5775438173097228, + "grad_norm": 0.7070230283267145, + "learning_rate": 7.7182310671564e-06, + "loss": 0.9796, + "step": 38290 + }, + { + "epoch": 0.5776946514223656, + "grad_norm": 0.6947016913984421, + "learning_rate": 7.713571137545454e-06, + "loss": 0.9587, + "step": 38300 + }, + { + "epoch": 0.5778454855350086, + "grad_norm": 0.663788391927041, + "learning_rate": 7.708911731759778e-06, + "loss": 0.963, + "step": 38310 + }, + { + "epoch": 0.5779963196476515, + "grad_norm": 0.6512116485120221, + "learning_rate": 7.704252850866864e-06, + "loss": 0.983, + "step": 38320 + }, + { + "epoch": 0.5781471537602945, + "grad_norm": 0.6791883260954957, + "learning_rate": 7.699594495934066e-06, + "loss": 0.9741, + "step": 38330 + }, + { + "epoch": 0.5782979878729373, + "grad_norm": 0.711590322044384, + "learning_rate": 7.694936668028616e-06, + "loss": 0.9992, + "step": 38340 + }, + { + "epoch": 0.5784488219855802, + "grad_norm": 0.6593468617139916, + "learning_rate": 7.690279368217642e-06, + "loss": 0.9808, + "step": 38350 + }, + { + "epoch": 0.5785996560982232, + "grad_norm": 0.679371916164237, + "learning_rate": 7.68562259756813e-06, + "loss": 0.9825, + "step": 38360 + }, + { + "epoch": 0.5787504902108661, + "grad_norm": 0.6371904674346712, + "learning_rate": 7.68096635714696e-06, + "loss": 0.9562, + "step": 38370 + }, + { + "epoch": 0.578901324323509, + "grad_norm": 0.6890726837385629, + "learning_rate": 7.67631064802089e-06, + "loss": 0.9832, + "step": 38380 + }, + { + "epoch": 0.5790521584361519, + "grad_norm": 0.6248968994566461, + "learning_rate": 7.671655471256546e-06, + "loss": 0.974, + "step": 38390 + }, + { + "epoch": 0.5792029925487948, + "grad_norm": 0.6624694123895729, + "learning_rate": 7.667000827920438e-06, + "loss": 0.9817, + "step": 38400 + }, + { + "epoch": 0.5793538266614378, + "grad_norm": 0.663913612078674, + "learning_rate": 7.662346719078958e-06, + "loss": 1.0014, + "step": 38410 + }, + { + "epoch": 0.5795046607740807, + "grad_norm": 0.6749858193783291, + "learning_rate": 7.657693145798369e-06, + "loss": 0.9632, + "step": 38420 + }, + { + "epoch": 0.5796554948867236, + "grad_norm": 0.6293825726902865, + "learning_rate": 7.653040109144809e-06, + "loss": 0.9603, + "step": 38430 + }, + { + "epoch": 0.5798063289993665, + "grad_norm": 0.6418351304116524, + "learning_rate": 7.648387610184306e-06, + "loss": 1.0006, + "step": 38440 + }, + { + "epoch": 0.5799571631120094, + "grad_norm": 0.7438702812376227, + "learning_rate": 7.643735649982752e-06, + "loss": 0.9706, + "step": 38450 + }, + { + "epoch": 0.5801079972246523, + "grad_norm": 0.7302323742738401, + "learning_rate": 7.63908422960592e-06, + "loss": 0.9715, + "step": 38460 + }, + { + "epoch": 0.5802588313372953, + "grad_norm": 0.6840652967247455, + "learning_rate": 7.634433350119462e-06, + "loss": 0.9818, + "step": 38470 + }, + { + "epoch": 0.5804096654499381, + "grad_norm": 0.7300516949391765, + "learning_rate": 7.629783012588898e-06, + "loss": 0.9651, + "step": 38480 + }, + { + "epoch": 0.580560499562581, + "grad_norm": 0.6611369540169134, + "learning_rate": 7.625133218079636e-06, + "loss": 0.9947, + "step": 38490 + }, + { + "epoch": 0.580711333675224, + "grad_norm": 0.7623274839507531, + "learning_rate": 7.620483967656953e-06, + "loss": 0.9705, + "step": 38500 + }, + { + "epoch": 0.5808621677878669, + "grad_norm": 0.7709391299344396, + "learning_rate": 7.615835262385992e-06, + "loss": 0.9755, + "step": 38510 + }, + { + "epoch": 0.5810130019005099, + "grad_norm": 0.6722097359174605, + "learning_rate": 7.611187103331791e-06, + "loss": 0.979, + "step": 38520 + }, + { + "epoch": 0.5811638360131527, + "grad_norm": 0.670397294922049, + "learning_rate": 7.606539491559246e-06, + "loss": 0.9663, + "step": 38530 + }, + { + "epoch": 0.5813146701257956, + "grad_norm": 0.6587714324699597, + "learning_rate": 7.601892428133133e-06, + "loss": 0.9825, + "step": 38540 + }, + { + "epoch": 0.5814655042384386, + "grad_norm": 0.7094303705325611, + "learning_rate": 7.597245914118111e-06, + "loss": 0.9853, + "step": 38550 + }, + { + "epoch": 0.5816163383510815, + "grad_norm": 0.6892198224769206, + "learning_rate": 7.5925999505787e-06, + "loss": 0.9819, + "step": 38560 + }, + { + "epoch": 0.5817671724637244, + "grad_norm": 0.7288158920421347, + "learning_rate": 7.587954538579294e-06, + "loss": 0.9743, + "step": 38570 + }, + { + "epoch": 0.5819180065763673, + "grad_norm": 0.7229319475163506, + "learning_rate": 7.583309679184176e-06, + "loss": 0.9727, + "step": 38580 + }, + { + "epoch": 0.5820688406890102, + "grad_norm": 0.6706465391969451, + "learning_rate": 7.5786653734574855e-06, + "loss": 0.973, + "step": 38590 + }, + { + "epoch": 0.5822196748016532, + "grad_norm": 0.7491691185489268, + "learning_rate": 7.574021622463247e-06, + "loss": 0.9862, + "step": 38600 + }, + { + "epoch": 0.5823705089142961, + "grad_norm": 0.6839864169975838, + "learning_rate": 7.569378427265347e-06, + "loss": 0.9815, + "step": 38610 + }, + { + "epoch": 0.5825213430269389, + "grad_norm": 0.6271494361950704, + "learning_rate": 7.564735788927552e-06, + "loss": 0.9688, + "step": 38620 + }, + { + "epoch": 0.5826721771395819, + "grad_norm": 0.6405112573669117, + "learning_rate": 7.560093708513505e-06, + "loss": 0.9663, + "step": 38630 + }, + { + "epoch": 0.5828230112522248, + "grad_norm": 0.7035769136129292, + "learning_rate": 7.555452187086712e-06, + "loss": 0.9915, + "step": 38640 + }, + { + "epoch": 0.5829738453648677, + "grad_norm": 0.709205339566002, + "learning_rate": 7.550811225710549e-06, + "loss": 0.9815, + "step": 38650 + }, + { + "epoch": 0.5831246794775107, + "grad_norm": 0.6871152297745702, + "learning_rate": 7.546170825448279e-06, + "loss": 0.9911, + "step": 38660 + }, + { + "epoch": 0.5832755135901535, + "grad_norm": 0.7094584893128087, + "learning_rate": 7.541530987363022e-06, + "loss": 0.9972, + "step": 38670 + }, + { + "epoch": 0.5834263477027964, + "grad_norm": 0.6965011964066903, + "learning_rate": 7.536891712517769e-06, + "loss": 0.9703, + "step": 38680 + }, + { + "epoch": 0.5835771818154394, + "grad_norm": 0.6633735507934441, + "learning_rate": 7.532253001975394e-06, + "loss": 0.9743, + "step": 38690 + }, + { + "epoch": 0.5837280159280823, + "grad_norm": 0.6817632675281563, + "learning_rate": 7.527614856798634e-06, + "loss": 0.9802, + "step": 38700 + }, + { + "epoch": 0.5838788500407253, + "grad_norm": 0.6888840660210684, + "learning_rate": 7.5229772780500924e-06, + "loss": 0.9686, + "step": 38710 + }, + { + "epoch": 0.5840296841533681, + "grad_norm": 0.6575822256002318, + "learning_rate": 7.518340266792253e-06, + "loss": 0.9703, + "step": 38720 + }, + { + "epoch": 0.584180518266011, + "grad_norm": 0.6521061627877077, + "learning_rate": 7.513703824087457e-06, + "loss": 0.9783, + "step": 38730 + }, + { + "epoch": 0.584331352378654, + "grad_norm": 0.6511195868486133, + "learning_rate": 7.5090679509979325e-06, + "loss": 0.9574, + "step": 38740 + }, + { + "epoch": 0.5844821864912969, + "grad_norm": 0.6478114918767995, + "learning_rate": 7.50443264858576e-06, + "loss": 0.9714, + "step": 38750 + }, + { + "epoch": 0.5846330206039397, + "grad_norm": 0.6404179230787248, + "learning_rate": 7.4997979179128946e-06, + "loss": 0.9859, + "step": 38760 + }, + { + "epoch": 0.5847838547165827, + "grad_norm": 0.657029492168814, + "learning_rate": 7.495163760041167e-06, + "loss": 0.9673, + "step": 38770 + }, + { + "epoch": 0.5849346888292256, + "grad_norm": 0.6654049495556723, + "learning_rate": 7.490530176032269e-06, + "loss": 0.9684, + "step": 38780 + }, + { + "epoch": 0.5850855229418686, + "grad_norm": 0.651972232311582, + "learning_rate": 7.485897166947763e-06, + "loss": 0.9778, + "step": 38790 + }, + { + "epoch": 0.5852363570545115, + "grad_norm": 0.6583471569453494, + "learning_rate": 7.481264733849085e-06, + "loss": 0.9805, + "step": 38800 + }, + { + "epoch": 0.5853871911671543, + "grad_norm": 0.7193651174293297, + "learning_rate": 7.476632877797532e-06, + "loss": 0.956, + "step": 38810 + }, + { + "epoch": 0.5855380252797973, + "grad_norm": 0.668119065289806, + "learning_rate": 7.472001599854265e-06, + "loss": 0.9638, + "step": 38820 + }, + { + "epoch": 0.5856888593924402, + "grad_norm": 0.8001914167502743, + "learning_rate": 7.4673709010803284e-06, + "loss": 0.9884, + "step": 38830 + }, + { + "epoch": 0.5858396935050831, + "grad_norm": 0.7154112588737546, + "learning_rate": 7.462740782536618e-06, + "loss": 0.9924, + "step": 38840 + }, + { + "epoch": 0.5859905276177261, + "grad_norm": 0.6853840141456223, + "learning_rate": 7.458111245283906e-06, + "loss": 0.9625, + "step": 38850 + }, + { + "epoch": 0.5861413617303689, + "grad_norm": 0.6432276732002455, + "learning_rate": 7.453482290382825e-06, + "loss": 0.9514, + "step": 38860 + }, + { + "epoch": 0.5862921958430118, + "grad_norm": 0.6528052203929362, + "learning_rate": 7.448853918893881e-06, + "loss": 0.9698, + "step": 38870 + }, + { + "epoch": 0.5864430299556548, + "grad_norm": 0.7111845023719273, + "learning_rate": 7.444226131877441e-06, + "loss": 0.9807, + "step": 38880 + }, + { + "epoch": 0.5865938640682977, + "grad_norm": 0.6635429683583772, + "learning_rate": 7.439598930393743e-06, + "loss": 0.9837, + "step": 38890 + }, + { + "epoch": 0.5867446981809405, + "grad_norm": 0.6935469194713002, + "learning_rate": 7.4349723155028805e-06, + "loss": 0.9715, + "step": 38900 + }, + { + "epoch": 0.5868955322935835, + "grad_norm": 0.6863647774011108, + "learning_rate": 7.430346288264829e-06, + "loss": 0.9823, + "step": 38910 + }, + { + "epoch": 0.5870463664062264, + "grad_norm": 0.6851095523685837, + "learning_rate": 7.425720849739417e-06, + "loss": 0.9704, + "step": 38920 + }, + { + "epoch": 0.5871972005188694, + "grad_norm": 0.6921681122209883, + "learning_rate": 7.421096000986337e-06, + "loss": 0.983, + "step": 38930 + }, + { + "epoch": 0.5873480346315123, + "grad_norm": 0.6533874115439664, + "learning_rate": 7.416471743065154e-06, + "loss": 0.9858, + "step": 38940 + }, + { + "epoch": 0.5874988687441551, + "grad_norm": 0.6874044408003814, + "learning_rate": 7.411848077035298e-06, + "loss": 0.9736, + "step": 38950 + }, + { + "epoch": 0.5876497028567981, + "grad_norm": 0.71611666186859, + "learning_rate": 7.407225003956053e-06, + "loss": 0.9649, + "step": 38960 + }, + { + "epoch": 0.587800536969441, + "grad_norm": 0.6604522066960761, + "learning_rate": 7.40260252488658e-06, + "loss": 0.9659, + "step": 38970 + }, + { + "epoch": 0.587951371082084, + "grad_norm": 0.6424314151812559, + "learning_rate": 7.397980640885898e-06, + "loss": 0.9954, + "step": 38980 + }, + { + "epoch": 0.5881022051947269, + "grad_norm": 0.669820448576272, + "learning_rate": 7.393359353012881e-06, + "loss": 0.9608, + "step": 38990 + }, + { + "epoch": 0.5882530393073697, + "grad_norm": 0.6575819262711898, + "learning_rate": 7.388738662326285e-06, + "loss": 0.9856, + "step": 39000 + }, + { + "epoch": 0.5884038734200127, + "grad_norm": 0.6648633102760292, + "learning_rate": 7.384118569884712e-06, + "loss": 0.96, + "step": 39010 + }, + { + "epoch": 0.5885547075326556, + "grad_norm": 0.712296277856304, + "learning_rate": 7.379499076746642e-06, + "loss": 0.9679, + "step": 39020 + }, + { + "epoch": 0.5887055416452985, + "grad_norm": 0.6766065113407574, + "learning_rate": 7.374880183970401e-06, + "loss": 0.9883, + "step": 39030 + }, + { + "epoch": 0.5888563757579414, + "grad_norm": 0.6528077445031027, + "learning_rate": 7.3702618926141885e-06, + "loss": 0.9812, + "step": 39040 + }, + { + "epoch": 0.5890072098705843, + "grad_norm": 0.6865726822971372, + "learning_rate": 7.3656442037360706e-06, + "loss": 0.9929, + "step": 39050 + }, + { + "epoch": 0.5891580439832272, + "grad_norm": 0.6256666835684365, + "learning_rate": 7.361027118393964e-06, + "loss": 0.9565, + "step": 39060 + }, + { + "epoch": 0.5893088780958702, + "grad_norm": 0.6625072162052061, + "learning_rate": 7.356410637645649e-06, + "loss": 0.9696, + "step": 39070 + }, + { + "epoch": 0.5894597122085131, + "grad_norm": 0.64563873801806, + "learning_rate": 7.3517947625487765e-06, + "loss": 0.9743, + "step": 39080 + }, + { + "epoch": 0.589610546321156, + "grad_norm": 0.7086654692254961, + "learning_rate": 7.3471794941608466e-06, + "loss": 0.9744, + "step": 39090 + }, + { + "epoch": 0.5897613804337989, + "grad_norm": 0.7006890772407808, + "learning_rate": 7.342564833539229e-06, + "loss": 0.9714, + "step": 39100 + }, + { + "epoch": 0.5899122145464418, + "grad_norm": 0.668259150423053, + "learning_rate": 7.337950781741152e-06, + "loss": 0.9877, + "step": 39110 + }, + { + "epoch": 0.5900630486590848, + "grad_norm": 0.673241666491993, + "learning_rate": 7.333337339823702e-06, + "loss": 0.9657, + "step": 39120 + }, + { + "epoch": 0.5902138827717277, + "grad_norm": 0.6493628325499915, + "learning_rate": 7.328724508843827e-06, + "loss": 0.9681, + "step": 39130 + }, + { + "epoch": 0.5903647168843705, + "grad_norm": 0.7112520081476728, + "learning_rate": 7.324112289858342e-06, + "loss": 0.9929, + "step": 39140 + }, + { + "epoch": 0.5905155509970135, + "grad_norm": 0.648525949122636, + "learning_rate": 7.319500683923904e-06, + "loss": 0.9935, + "step": 39150 + }, + { + "epoch": 0.5906663851096564, + "grad_norm": 0.6567088504176277, + "learning_rate": 7.314889692097052e-06, + "loss": 0.9672, + "step": 39160 + }, + { + "epoch": 0.5908172192222994, + "grad_norm": 0.7014830781096574, + "learning_rate": 7.310279315434168e-06, + "loss": 0.9789, + "step": 39170 + }, + { + "epoch": 0.5909680533349422, + "grad_norm": 0.6784024048290584, + "learning_rate": 7.305669554991493e-06, + "loss": 0.9654, + "step": 39180 + }, + { + "epoch": 0.5911188874475851, + "grad_norm": 0.6196703168939582, + "learning_rate": 7.3010604118251405e-06, + "loss": 0.9722, + "step": 39190 + }, + { + "epoch": 0.5912697215602281, + "grad_norm": 0.6999764906673992, + "learning_rate": 7.296451886991074e-06, + "loss": 0.9744, + "step": 39200 + }, + { + "epoch": 0.591420555672871, + "grad_norm": 0.7107847966757966, + "learning_rate": 7.2918439815451085e-06, + "loss": 0.9875, + "step": 39210 + }, + { + "epoch": 0.5915713897855139, + "grad_norm": 0.66000718357433, + "learning_rate": 7.28723669654293e-06, + "loss": 0.9859, + "step": 39220 + }, + { + "epoch": 0.5917222238981568, + "grad_norm": 0.6522174057480915, + "learning_rate": 7.282630033040077e-06, + "loss": 0.9744, + "step": 39230 + }, + { + "epoch": 0.5918730580107997, + "grad_norm": 0.7742175161123925, + "learning_rate": 7.278023992091936e-06, + "loss": 0.9804, + "step": 39240 + }, + { + "epoch": 0.5920238921234426, + "grad_norm": 0.6583131029751378, + "learning_rate": 7.2734185747537724e-06, + "loss": 0.957, + "step": 39250 + }, + { + "epoch": 0.5921747262360856, + "grad_norm": 0.6417533322887258, + "learning_rate": 7.268813782080686e-06, + "loss": 0.9717, + "step": 39260 + }, + { + "epoch": 0.5923255603487285, + "grad_norm": 0.6437727085848796, + "learning_rate": 7.264209615127651e-06, + "loss": 0.9755, + "step": 39270 + }, + { + "epoch": 0.5924763944613713, + "grad_norm": 0.6961050356814374, + "learning_rate": 7.2596060749494855e-06, + "loss": 0.9723, + "step": 39280 + }, + { + "epoch": 0.5926272285740143, + "grad_norm": 0.7590379885206581, + "learning_rate": 7.255003162600871e-06, + "loss": 0.9798, + "step": 39290 + }, + { + "epoch": 0.5927780626866572, + "grad_norm": 0.657539618242099, + "learning_rate": 7.250400879136348e-06, + "loss": 0.9817, + "step": 39300 + }, + { + "epoch": 0.5929288967993002, + "grad_norm": 0.6780476418947977, + "learning_rate": 7.245799225610306e-06, + "loss": 0.991, + "step": 39310 + }, + { + "epoch": 0.593079730911943, + "grad_norm": 0.6752552515426499, + "learning_rate": 7.2411982030769866e-06, + "loss": 0.9735, + "step": 39320 + }, + { + "epoch": 0.5932305650245859, + "grad_norm": 0.6793125751491733, + "learning_rate": 7.236597812590503e-06, + "loss": 0.9937, + "step": 39330 + }, + { + "epoch": 0.5933813991372289, + "grad_norm": 0.6370274826853529, + "learning_rate": 7.231998055204806e-06, + "loss": 0.9615, + "step": 39340 + }, + { + "epoch": 0.5935322332498718, + "grad_norm": 0.6440102498879517, + "learning_rate": 7.227398931973711e-06, + "loss": 0.9677, + "step": 39350 + }, + { + "epoch": 0.5936830673625147, + "grad_norm": 0.6384577838055417, + "learning_rate": 7.222800443950888e-06, + "loss": 0.9695, + "step": 39360 + }, + { + "epoch": 0.5938339014751576, + "grad_norm": 0.7035372351657758, + "learning_rate": 7.218202592189861e-06, + "loss": 0.9593, + "step": 39370 + }, + { + "epoch": 0.5939847355878005, + "grad_norm": 0.6760974379167999, + "learning_rate": 7.2136053777439975e-06, + "loss": 0.973, + "step": 39380 + }, + { + "epoch": 0.5941355697004435, + "grad_norm": 0.8805935920397334, + "learning_rate": 7.2090088016665395e-06, + "loss": 0.9807, + "step": 39390 + }, + { + "epoch": 0.5942864038130864, + "grad_norm": 0.6494715608480505, + "learning_rate": 7.204412865010563e-06, + "loss": 0.9833, + "step": 39400 + }, + { + "epoch": 0.5944372379257293, + "grad_norm": 0.6444166115474435, + "learning_rate": 7.199817568829013e-06, + "loss": 0.973, + "step": 39410 + }, + { + "epoch": 0.5945880720383722, + "grad_norm": 0.6220170024596696, + "learning_rate": 7.195222914174678e-06, + "loss": 0.9879, + "step": 39420 + }, + { + "epoch": 0.5947389061510151, + "grad_norm": 0.7006495152559892, + "learning_rate": 7.190628902100197e-06, + "loss": 0.9924, + "step": 39430 + }, + { + "epoch": 0.594889740263658, + "grad_norm": 0.6590671821323609, + "learning_rate": 7.186035533658073e-06, + "loss": 0.9677, + "step": 39440 + }, + { + "epoch": 0.595040574376301, + "grad_norm": 0.6432637234366329, + "learning_rate": 7.181442809900659e-06, + "loss": 0.9648, + "step": 39450 + }, + { + "epoch": 0.5951914084889438, + "grad_norm": 0.6445547747479498, + "learning_rate": 7.176850731880145e-06, + "loss": 0.9604, + "step": 39460 + }, + { + "epoch": 0.5953422426015867, + "grad_norm": 0.6383562489741237, + "learning_rate": 7.172259300648599e-06, + "loss": 0.9715, + "step": 39470 + }, + { + "epoch": 0.5954930767142297, + "grad_norm": 0.7868428647669694, + "learning_rate": 7.167668517257917e-06, + "loss": 0.973, + "step": 39480 + }, + { + "epoch": 0.5956439108268726, + "grad_norm": 0.640743889007744, + "learning_rate": 7.163078382759857e-06, + "loss": 0.9791, + "step": 39490 + }, + { + "epoch": 0.5957947449395156, + "grad_norm": 0.7406294322365009, + "learning_rate": 7.158488898206033e-06, + "loss": 0.9843, + "step": 39500 + }, + { + "epoch": 0.5959455790521584, + "grad_norm": 0.6839127904604175, + "learning_rate": 7.153900064647901e-06, + "loss": 0.9706, + "step": 39510 + }, + { + "epoch": 0.5960964131648013, + "grad_norm": 0.6243839962415298, + "learning_rate": 7.149311883136772e-06, + "loss": 0.9877, + "step": 39520 + }, + { + "epoch": 0.5962472472774443, + "grad_norm": 0.7171214616260911, + "learning_rate": 7.144724354723806e-06, + "loss": 0.9794, + "step": 39530 + }, + { + "epoch": 0.5963980813900872, + "grad_norm": 0.6968236901106132, + "learning_rate": 7.140137480460017e-06, + "loss": 0.9806, + "step": 39540 + }, + { + "epoch": 0.5965489155027301, + "grad_norm": 0.6684173860839919, + "learning_rate": 7.135551261396268e-06, + "loss": 0.9878, + "step": 39550 + }, + { + "epoch": 0.596699749615373, + "grad_norm": 0.6555264365301856, + "learning_rate": 7.13096569858327e-06, + "loss": 0.979, + "step": 39560 + }, + { + "epoch": 0.5968505837280159, + "grad_norm": 0.6424626737078626, + "learning_rate": 7.126380793071579e-06, + "loss": 0.976, + "step": 39570 + }, + { + "epoch": 0.5970014178406589, + "grad_norm": 0.7521527275821099, + "learning_rate": 7.121796545911613e-06, + "loss": 0.9603, + "step": 39580 + }, + { + "epoch": 0.5971522519533018, + "grad_norm": 0.6457044408437754, + "learning_rate": 7.11721295815363e-06, + "loss": 0.9611, + "step": 39590 + }, + { + "epoch": 0.5973030860659446, + "grad_norm": 0.7003355991588738, + "learning_rate": 7.112630030847731e-06, + "loss": 0.9645, + "step": 39600 + }, + { + "epoch": 0.5974539201785876, + "grad_norm": 0.6236417021181002, + "learning_rate": 7.108047765043884e-06, + "loss": 0.9725, + "step": 39610 + }, + { + "epoch": 0.5976047542912305, + "grad_norm": 0.6578189102461425, + "learning_rate": 7.103466161791893e-06, + "loss": 0.9759, + "step": 39620 + }, + { + "epoch": 0.5977555884038734, + "grad_norm": 0.7101433682421174, + "learning_rate": 7.098885222141405e-06, + "loss": 0.9685, + "step": 39630 + }, + { + "epoch": 0.5979064225165164, + "grad_norm": 0.745774451326783, + "learning_rate": 7.0943049471419325e-06, + "loss": 0.9921, + "step": 39640 + }, + { + "epoch": 0.5980572566291592, + "grad_norm": 0.710946288675834, + "learning_rate": 7.08972533784282e-06, + "loss": 0.9674, + "step": 39650 + }, + { + "epoch": 0.5982080907418021, + "grad_norm": 0.6452494481936315, + "learning_rate": 7.085146395293261e-06, + "loss": 0.9634, + "step": 39660 + }, + { + "epoch": 0.5983589248544451, + "grad_norm": 0.6649167672710934, + "learning_rate": 7.080568120542309e-06, + "loss": 0.9869, + "step": 39670 + }, + { + "epoch": 0.598509758967088, + "grad_norm": 0.6719569458900919, + "learning_rate": 7.075990514638844e-06, + "loss": 0.9898, + "step": 39680 + }, + { + "epoch": 0.598660593079731, + "grad_norm": 0.6708594030585142, + "learning_rate": 7.071413578631614e-06, + "loss": 0.979, + "step": 39690 + }, + { + "epoch": 0.5988114271923738, + "grad_norm": 0.6534866005797666, + "learning_rate": 7.0668373135692036e-06, + "loss": 0.9608, + "step": 39700 + }, + { + "epoch": 0.5989622613050167, + "grad_norm": 0.644041653108873, + "learning_rate": 7.0622617205000385e-06, + "loss": 0.9822, + "step": 39710 + }, + { + "epoch": 0.5991130954176597, + "grad_norm": 0.7579564976149918, + "learning_rate": 7.057686800472401e-06, + "loss": 0.9696, + "step": 39720 + }, + { + "epoch": 0.5992639295303026, + "grad_norm": 0.6942250338137056, + "learning_rate": 7.053112554534413e-06, + "loss": 0.9634, + "step": 39730 + }, + { + "epoch": 0.5994147636429454, + "grad_norm": 0.7636109623214034, + "learning_rate": 7.048538983734037e-06, + "loss": 0.9547, + "step": 39740 + }, + { + "epoch": 0.5995655977555884, + "grad_norm": 0.6526589049326506, + "learning_rate": 7.043966089119096e-06, + "loss": 0.9813, + "step": 39750 + }, + { + "epoch": 0.5997164318682313, + "grad_norm": 0.6480458728283153, + "learning_rate": 7.039393871737244e-06, + "loss": 0.9638, + "step": 39760 + }, + { + "epoch": 0.5998672659808743, + "grad_norm": 0.7237720987088857, + "learning_rate": 7.034822332635986e-06, + "loss": 0.9937, + "step": 39770 + }, + { + "epoch": 0.6000181000935172, + "grad_norm": 0.6979507860578399, + "learning_rate": 7.0302514728626704e-06, + "loss": 0.9741, + "step": 39780 + }, + { + "epoch": 0.60016893420616, + "grad_norm": 0.6752735456111983, + "learning_rate": 7.025681293464486e-06, + "loss": 0.9779, + "step": 39790 + }, + { + "epoch": 0.600319768318803, + "grad_norm": 0.6631279150253157, + "learning_rate": 7.021111795488479e-06, + "loss": 0.9505, + "step": 39800 + }, + { + "epoch": 0.6004706024314459, + "grad_norm": 0.630026595320796, + "learning_rate": 7.016542979981525e-06, + "loss": 0.9665, + "step": 39810 + }, + { + "epoch": 0.6006214365440888, + "grad_norm": 0.6408759447712331, + "learning_rate": 7.011974847990344e-06, + "loss": 0.9635, + "step": 39820 + }, + { + "epoch": 0.6007722706567318, + "grad_norm": 0.6540128450315699, + "learning_rate": 7.007407400561512e-06, + "loss": 0.9677, + "step": 39830 + }, + { + "epoch": 0.6009231047693746, + "grad_norm": 0.6684045364174896, + "learning_rate": 7.002840638741436e-06, + "loss": 0.9688, + "step": 39840 + }, + { + "epoch": 0.6010739388820175, + "grad_norm": 0.6784480473550547, + "learning_rate": 6.998274563576366e-06, + "loss": 0.9592, + "step": 39850 + }, + { + "epoch": 0.6012247729946605, + "grad_norm": 0.6636256533650423, + "learning_rate": 6.993709176112406e-06, + "loss": 0.9841, + "step": 39860 + }, + { + "epoch": 0.6013756071073034, + "grad_norm": 0.8829347396888823, + "learning_rate": 6.989144477395493e-06, + "loss": 0.9888, + "step": 39870 + }, + { + "epoch": 0.6015264412199463, + "grad_norm": 0.7256193183479497, + "learning_rate": 6.984580468471405e-06, + "loss": 0.9731, + "step": 39880 + }, + { + "epoch": 0.6016772753325892, + "grad_norm": 0.6632827515195678, + "learning_rate": 6.980017150385769e-06, + "loss": 0.9673, + "step": 39890 + }, + { + "epoch": 0.6018281094452321, + "grad_norm": 0.6507299650832029, + "learning_rate": 6.975454524184051e-06, + "loss": 0.9798, + "step": 39900 + }, + { + "epoch": 0.6019789435578751, + "grad_norm": 0.7714999815545814, + "learning_rate": 6.97089259091155e-06, + "loss": 0.9764, + "step": 39910 + }, + { + "epoch": 0.602129777670518, + "grad_norm": 0.6383479162625543, + "learning_rate": 6.9663313516134226e-06, + "loss": 0.981, + "step": 39920 + }, + { + "epoch": 0.6022806117831608, + "grad_norm": 0.6956036303914191, + "learning_rate": 6.9617708073346505e-06, + "loss": 0.9811, + "step": 39930 + }, + { + "epoch": 0.6024314458958038, + "grad_norm": 0.668740718718075, + "learning_rate": 6.95721095912007e-06, + "loss": 0.9609, + "step": 39940 + }, + { + "epoch": 0.6025822800084467, + "grad_norm": 0.7536767224019023, + "learning_rate": 6.952651808014345e-06, + "loss": 0.966, + "step": 39950 + }, + { + "epoch": 0.6027331141210897, + "grad_norm": 0.7406201051660652, + "learning_rate": 6.948093355061988e-06, + "loss": 0.9696, + "step": 39960 + }, + { + "epoch": 0.6028839482337326, + "grad_norm": 0.6715902695117344, + "learning_rate": 6.943535601307353e-06, + "loss": 0.9648, + "step": 39970 + }, + { + "epoch": 0.6030347823463754, + "grad_norm": 0.6432878187230744, + "learning_rate": 6.938978547794627e-06, + "loss": 0.9661, + "step": 39980 + }, + { + "epoch": 0.6031856164590184, + "grad_norm": 0.6854065105918534, + "learning_rate": 6.934422195567836e-06, + "loss": 0.9774, + "step": 39990 + }, + { + "epoch": 0.6033364505716613, + "grad_norm": 0.654525729662184, + "learning_rate": 6.929866545670857e-06, + "loss": 0.983, + "step": 40000 + }, + { + "epoch": 0.6034872846843042, + "grad_norm": 0.6902617313676503, + "learning_rate": 6.925311599147393e-06, + "loss": 0.9725, + "step": 40010 + }, + { + "epoch": 0.6036381187969471, + "grad_norm": 0.6580353289494234, + "learning_rate": 6.9207573570409906e-06, + "loss": 0.9703, + "step": 40020 + }, + { + "epoch": 0.60378895290959, + "grad_norm": 0.7282998631880704, + "learning_rate": 6.916203820395037e-06, + "loss": 0.9728, + "step": 40030 + }, + { + "epoch": 0.6039397870222329, + "grad_norm": 0.6569516939528286, + "learning_rate": 6.91165099025276e-06, + "loss": 0.9688, + "step": 40040 + }, + { + "epoch": 0.6040906211348759, + "grad_norm": 0.7198624987732218, + "learning_rate": 6.907098867657213e-06, + "loss": 0.9686, + "step": 40050 + }, + { + "epoch": 0.6042414552475188, + "grad_norm": 0.6629292282801231, + "learning_rate": 6.902547453651307e-06, + "loss": 0.9738, + "step": 40060 + }, + { + "epoch": 0.6043922893601616, + "grad_norm": 0.7215998380285412, + "learning_rate": 6.897996749277769e-06, + "loss": 0.9782, + "step": 40070 + }, + { + "epoch": 0.6045431234728046, + "grad_norm": 0.6761763060989464, + "learning_rate": 6.893446755579183e-06, + "loss": 0.9826, + "step": 40080 + }, + { + "epoch": 0.6046939575854475, + "grad_norm": 0.7108256121905472, + "learning_rate": 6.888897473597958e-06, + "loss": 0.9904, + "step": 40090 + }, + { + "epoch": 0.6048447916980905, + "grad_norm": 0.6702239878065746, + "learning_rate": 6.884348904376338e-06, + "loss": 0.9808, + "step": 40100 + }, + { + "epoch": 0.6049956258107334, + "grad_norm": 0.6552524181212381, + "learning_rate": 6.8798010489564156e-06, + "loss": 0.9692, + "step": 40110 + }, + { + "epoch": 0.6051464599233762, + "grad_norm": 0.8135877515823143, + "learning_rate": 6.8752539083801155e-06, + "loss": 0.9599, + "step": 40120 + }, + { + "epoch": 0.6052972940360192, + "grad_norm": 0.7459713017783524, + "learning_rate": 6.870707483689188e-06, + "loss": 0.9745, + "step": 40130 + }, + { + "epoch": 0.6054481281486621, + "grad_norm": 0.6468481229608783, + "learning_rate": 6.866161775925234e-06, + "loss": 0.9896, + "step": 40140 + }, + { + "epoch": 0.605598962261305, + "grad_norm": 0.6968506303720635, + "learning_rate": 6.861616786129684e-06, + "loss": 0.9498, + "step": 40150 + }, + { + "epoch": 0.6057497963739479, + "grad_norm": 0.6760983861031945, + "learning_rate": 6.857072515343799e-06, + "loss": 0.9689, + "step": 40160 + }, + { + "epoch": 0.6059006304865908, + "grad_norm": 0.6761779832235622, + "learning_rate": 6.852528964608686e-06, + "loss": 0.9836, + "step": 40170 + }, + { + "epoch": 0.6060514645992338, + "grad_norm": 0.7391497869075799, + "learning_rate": 6.8479861349652764e-06, + "loss": 0.9842, + "step": 40180 + }, + { + "epoch": 0.6062022987118767, + "grad_norm": 0.70325969585862, + "learning_rate": 6.843444027454344e-06, + "loss": 0.9941, + "step": 40190 + }, + { + "epoch": 0.6063531328245196, + "grad_norm": 0.6883984303836479, + "learning_rate": 6.838902643116495e-06, + "loss": 0.9657, + "step": 40200 + }, + { + "epoch": 0.6065039669371625, + "grad_norm": 0.6583361849982304, + "learning_rate": 6.834361982992166e-06, + "loss": 0.9714, + "step": 40210 + }, + { + "epoch": 0.6066548010498054, + "grad_norm": 0.7212256428444095, + "learning_rate": 6.829822048121637e-06, + "loss": 0.9873, + "step": 40220 + }, + { + "epoch": 0.6068056351624483, + "grad_norm": 0.6775369116310246, + "learning_rate": 6.825282839545011e-06, + "loss": 1.0063, + "step": 40230 + }, + { + "epoch": 0.6069564692750913, + "grad_norm": 0.6460248157522139, + "learning_rate": 6.820744358302228e-06, + "loss": 0.9794, + "step": 40240 + }, + { + "epoch": 0.6071073033877342, + "grad_norm": 0.6582563650661367, + "learning_rate": 6.81620660543307e-06, + "loss": 0.9668, + "step": 40250 + }, + { + "epoch": 0.607258137500377, + "grad_norm": 0.6313972630669065, + "learning_rate": 6.811669581977137e-06, + "loss": 0.9698, + "step": 40260 + }, + { + "epoch": 0.60740897161302, + "grad_norm": 0.6991028842340357, + "learning_rate": 6.807133288973875e-06, + "loss": 0.9873, + "step": 40270 + }, + { + "epoch": 0.6075598057256629, + "grad_norm": 0.6562073067214952, + "learning_rate": 6.802597727462554e-06, + "loss": 0.9708, + "step": 40280 + }, + { + "epoch": 0.6077106398383059, + "grad_norm": 0.6995541272431166, + "learning_rate": 6.798062898482285e-06, + "loss": 0.983, + "step": 40290 + }, + { + "epoch": 0.6078614739509487, + "grad_norm": 0.7010720393997908, + "learning_rate": 6.7935288030719985e-06, + "loss": 0.9572, + "step": 40300 + }, + { + "epoch": 0.6080123080635916, + "grad_norm": 0.6607831473357998, + "learning_rate": 6.788995442270473e-06, + "loss": 0.987, + "step": 40310 + }, + { + "epoch": 0.6081631421762346, + "grad_norm": 0.6809022016955038, + "learning_rate": 6.784462817116306e-06, + "loss": 0.9584, + "step": 40320 + }, + { + "epoch": 0.6083139762888775, + "grad_norm": 0.616380494446703, + "learning_rate": 6.7799309286479265e-06, + "loss": 0.9855, + "step": 40330 + }, + { + "epoch": 0.6084648104015205, + "grad_norm": 0.6442359017098673, + "learning_rate": 6.775399777903607e-06, + "loss": 0.9718, + "step": 40340 + }, + { + "epoch": 0.6086156445141633, + "grad_norm": 0.623499793745814, + "learning_rate": 6.770869365921435e-06, + "loss": 0.9566, + "step": 40350 + }, + { + "epoch": 0.6087664786268062, + "grad_norm": 0.7321224558800267, + "learning_rate": 6.766339693739344e-06, + "loss": 0.9796, + "step": 40360 + }, + { + "epoch": 0.6089173127394492, + "grad_norm": 0.6605661674256732, + "learning_rate": 6.761810762395089e-06, + "loss": 0.9896, + "step": 40370 + }, + { + "epoch": 0.6090681468520921, + "grad_norm": 0.6764035905065356, + "learning_rate": 6.757282572926251e-06, + "loss": 0.9587, + "step": 40380 + }, + { + "epoch": 0.609218980964735, + "grad_norm": 0.6264862757571857, + "learning_rate": 6.752755126370256e-06, + "loss": 0.9892, + "step": 40390 + }, + { + "epoch": 0.6093698150773779, + "grad_norm": 0.6643638204782122, + "learning_rate": 6.748228423764346e-06, + "loss": 0.9948, + "step": 40400 + }, + { + "epoch": 0.6095206491900208, + "grad_norm": 0.6927509426178539, + "learning_rate": 6.743702466145593e-06, + "loss": 0.9666, + "step": 40410 + }, + { + "epoch": 0.6096714833026637, + "grad_norm": 0.7310377860696362, + "learning_rate": 6.739177254550911e-06, + "loss": 0.98, + "step": 40420 + }, + { + "epoch": 0.6098223174153067, + "grad_norm": 0.6589574728654852, + "learning_rate": 6.734652790017029e-06, + "loss": 0.9679, + "step": 40430 + }, + { + "epoch": 0.6099731515279495, + "grad_norm": 0.6965338621355341, + "learning_rate": 6.730129073580512e-06, + "loss": 0.9734, + "step": 40440 + }, + { + "epoch": 0.6101239856405924, + "grad_norm": 0.6483447229153406, + "learning_rate": 6.7256061062777535e-06, + "loss": 0.9619, + "step": 40450 + }, + { + "epoch": 0.6102748197532354, + "grad_norm": 0.7183948519157097, + "learning_rate": 6.721083889144975e-06, + "loss": 0.9777, + "step": 40460 + }, + { + "epoch": 0.6104256538658783, + "grad_norm": 0.6584425828479846, + "learning_rate": 6.716562423218219e-06, + "loss": 0.9667, + "step": 40470 + }, + { + "epoch": 0.6105764879785213, + "grad_norm": 0.6416701643287637, + "learning_rate": 6.71204170953337e-06, + "loss": 0.9868, + "step": 40480 + }, + { + "epoch": 0.6107273220911641, + "grad_norm": 0.7076946589278589, + "learning_rate": 6.7075217491261255e-06, + "loss": 0.9877, + "step": 40490 + }, + { + "epoch": 0.610878156203807, + "grad_norm": 0.6117961753655352, + "learning_rate": 6.703002543032024e-06, + "loss": 0.9692, + "step": 40500 + }, + { + "epoch": 0.61102899031645, + "grad_norm": 0.6825987159929827, + "learning_rate": 6.698484092286419e-06, + "loss": 0.9746, + "step": 40510 + }, + { + "epoch": 0.6111798244290929, + "grad_norm": 0.788608424008207, + "learning_rate": 6.693966397924498e-06, + "loss": 0.9694, + "step": 40520 + }, + { + "epoch": 0.6113306585417358, + "grad_norm": 0.6553170724142487, + "learning_rate": 6.689449460981274e-06, + "loss": 0.9733, + "step": 40530 + }, + { + "epoch": 0.6114814926543787, + "grad_norm": 0.6515127748191698, + "learning_rate": 6.684933282491588e-06, + "loss": 0.9712, + "step": 40540 + }, + { + "epoch": 0.6116323267670216, + "grad_norm": 0.6479192664386438, + "learning_rate": 6.680417863490097e-06, + "loss": 0.9684, + "step": 40550 + }, + { + "epoch": 0.6117831608796646, + "grad_norm": 0.6589025405520882, + "learning_rate": 6.675903205011302e-06, + "loss": 0.9711, + "step": 40560 + }, + { + "epoch": 0.6119339949923075, + "grad_norm": 0.6839016103947154, + "learning_rate": 6.671389308089518e-06, + "loss": 0.98, + "step": 40570 + }, + { + "epoch": 0.6120848291049503, + "grad_norm": 0.7157250439342581, + "learning_rate": 6.666876173758881e-06, + "loss": 0.9761, + "step": 40580 + }, + { + "epoch": 0.6122356632175933, + "grad_norm": 0.6357131520444516, + "learning_rate": 6.6623638030533665e-06, + "loss": 0.9723, + "step": 40590 + }, + { + "epoch": 0.6123864973302362, + "grad_norm": 0.676794879447042, + "learning_rate": 6.657852197006761e-06, + "loss": 0.9681, + "step": 40600 + }, + { + "epoch": 0.6125373314428791, + "grad_norm": 0.6926690130565154, + "learning_rate": 6.653341356652687e-06, + "loss": 0.9821, + "step": 40610 + }, + { + "epoch": 0.6126881655555221, + "grad_norm": 0.7071192627152849, + "learning_rate": 6.648831283024588e-06, + "loss": 0.9786, + "step": 40620 + }, + { + "epoch": 0.6128389996681649, + "grad_norm": 0.6466185678047468, + "learning_rate": 6.644321977155722e-06, + "loss": 0.9657, + "step": 40630 + }, + { + "epoch": 0.6129898337808078, + "grad_norm": 0.7504364995601932, + "learning_rate": 6.63981344007919e-06, + "loss": 0.9862, + "step": 40640 + }, + { + "epoch": 0.6131406678934508, + "grad_norm": 0.6779566173857543, + "learning_rate": 6.635305672827903e-06, + "loss": 0.9672, + "step": 40650 + }, + { + "epoch": 0.6132915020060937, + "grad_norm": 0.6405057214042998, + "learning_rate": 6.630798676434595e-06, + "loss": 0.9557, + "step": 40660 + }, + { + "epoch": 0.6134423361187367, + "grad_norm": 0.6704606053876396, + "learning_rate": 6.626292451931833e-06, + "loss": 0.9776, + "step": 40670 + }, + { + "epoch": 0.6135931702313795, + "grad_norm": 0.7837984258219206, + "learning_rate": 6.621787000351998e-06, + "loss": 0.9696, + "step": 40680 + }, + { + "epoch": 0.6137440043440224, + "grad_norm": 0.6796119011669941, + "learning_rate": 6.6172823227273e-06, + "loss": 0.9827, + "step": 40690 + }, + { + "epoch": 0.6138948384566654, + "grad_norm": 0.9454857869620336, + "learning_rate": 6.612778420089767e-06, + "loss": 0.9869, + "step": 40700 + }, + { + "epoch": 0.6140456725693083, + "grad_norm": 0.7311351243829596, + "learning_rate": 6.608275293471257e-06, + "loss": 0.9672, + "step": 40710 + }, + { + "epoch": 0.6141965066819511, + "grad_norm": 0.6635710467752975, + "learning_rate": 6.603772943903437e-06, + "loss": 0.9513, + "step": 40720 + }, + { + "epoch": 0.6143473407945941, + "grad_norm": 0.6901744766614493, + "learning_rate": 6.599271372417813e-06, + "loss": 0.9786, + "step": 40730 + }, + { + "epoch": 0.614498174907237, + "grad_norm": 0.6332904953240308, + "learning_rate": 6.594770580045694e-06, + "loss": 0.972, + "step": 40740 + }, + { + "epoch": 0.61464900901988, + "grad_norm": 0.6346661933352482, + "learning_rate": 6.59027056781823e-06, + "loss": 0.9706, + "step": 40750 + }, + { + "epoch": 0.6147998431325229, + "grad_norm": 0.6314844031059383, + "learning_rate": 6.585771336766376e-06, + "loss": 0.9682, + "step": 40760 + }, + { + "epoch": 0.6149506772451657, + "grad_norm": 0.6523503967734817, + "learning_rate": 6.581272887920917e-06, + "loss": 0.9461, + "step": 40770 + }, + { + "epoch": 0.6151015113578087, + "grad_norm": 0.6619174976290045, + "learning_rate": 6.576775222312455e-06, + "loss": 0.9834, + "step": 40780 + }, + { + "epoch": 0.6152523454704516, + "grad_norm": 0.6756442853678464, + "learning_rate": 6.572278340971417e-06, + "loss": 0.9684, + "step": 40790 + }, + { + "epoch": 0.6154031795830945, + "grad_norm": 0.6537967359504243, + "learning_rate": 6.56778224492804e-06, + "loss": 0.975, + "step": 40800 + }, + { + "epoch": 0.6155540136957375, + "grad_norm": 0.684518544405477, + "learning_rate": 6.563286935212399e-06, + "loss": 0.9805, + "step": 40810 + }, + { + "epoch": 0.6157048478083803, + "grad_norm": 0.7260928891779724, + "learning_rate": 6.5587924128543714e-06, + "loss": 0.969, + "step": 40820 + }, + { + "epoch": 0.6158556819210232, + "grad_norm": 0.6571140640386629, + "learning_rate": 6.554298678883658e-06, + "loss": 0.9735, + "step": 40830 + }, + { + "epoch": 0.6160065160336662, + "grad_norm": 0.6270937164909112, + "learning_rate": 6.549805734329791e-06, + "loss": 0.9694, + "step": 40840 + }, + { + "epoch": 0.6161573501463091, + "grad_norm": 0.7044717031810511, + "learning_rate": 6.5453135802221034e-06, + "loss": 0.9619, + "step": 40850 + }, + { + "epoch": 0.616308184258952, + "grad_norm": 0.6911424796363772, + "learning_rate": 6.540822217589761e-06, + "loss": 0.9514, + "step": 40860 + }, + { + "epoch": 0.6164590183715949, + "grad_norm": 0.7219727751571404, + "learning_rate": 6.536331647461747e-06, + "loss": 0.979, + "step": 40870 + }, + { + "epoch": 0.6166098524842378, + "grad_norm": 0.6549170497999606, + "learning_rate": 6.5318418708668505e-06, + "loss": 0.9773, + "step": 40880 + }, + { + "epoch": 0.6167606865968808, + "grad_norm": 0.6763248692262079, + "learning_rate": 6.527352888833698e-06, + "loss": 0.976, + "step": 40890 + }, + { + "epoch": 0.6169115207095237, + "grad_norm": 0.6698917792292113, + "learning_rate": 6.52286470239072e-06, + "loss": 0.9753, + "step": 40900 + }, + { + "epoch": 0.6170623548221665, + "grad_norm": 0.6718409652929381, + "learning_rate": 6.5183773125661644e-06, + "loss": 0.947, + "step": 40910 + }, + { + "epoch": 0.6172131889348095, + "grad_norm": 0.5984935533987022, + "learning_rate": 6.513890720388108e-06, + "loss": 0.965, + "step": 40920 + }, + { + "epoch": 0.6173640230474524, + "grad_norm": 0.6403058234676475, + "learning_rate": 6.509404926884431e-06, + "loss": 0.9559, + "step": 40930 + }, + { + "epoch": 0.6175148571600954, + "grad_norm": 0.6506710615061492, + "learning_rate": 6.504919933082843e-06, + "loss": 0.9818, + "step": 40940 + }, + { + "epoch": 0.6176656912727383, + "grad_norm": 0.6501347329637287, + "learning_rate": 6.500435740010862e-06, + "loss": 0.966, + "step": 40950 + }, + { + "epoch": 0.6178165253853811, + "grad_norm": 0.7897210160311273, + "learning_rate": 6.495952348695829e-06, + "loss": 0.9699, + "step": 40960 + }, + { + "epoch": 0.6179673594980241, + "grad_norm": 0.6913199332777156, + "learning_rate": 6.491469760164888e-06, + "loss": 0.97, + "step": 40970 + }, + { + "epoch": 0.618118193610667, + "grad_norm": 0.7100521827295744, + "learning_rate": 6.48698797544502e-06, + "loss": 0.9649, + "step": 40980 + }, + { + "epoch": 0.6182690277233099, + "grad_norm": 0.6530864000050478, + "learning_rate": 6.482506995563007e-06, + "loss": 0.9683, + "step": 40990 + }, + { + "epoch": 0.6184198618359528, + "grad_norm": 0.6395617761300791, + "learning_rate": 6.478026821545445e-06, + "loss": 0.9715, + "step": 41000 + }, + { + "epoch": 0.6185706959485957, + "grad_norm": 0.6561528678343738, + "learning_rate": 6.473547454418756e-06, + "loss": 0.9788, + "step": 41010 + }, + { + "epoch": 0.6187215300612386, + "grad_norm": 0.6637975120894448, + "learning_rate": 6.469068895209169e-06, + "loss": 0.9549, + "step": 41020 + }, + { + "epoch": 0.6188723641738816, + "grad_norm": 0.6528801652563555, + "learning_rate": 6.4645911449427315e-06, + "loss": 0.983, + "step": 41030 + }, + { + "epoch": 0.6190231982865245, + "grad_norm": 0.6458149713613122, + "learning_rate": 6.460114204645308e-06, + "loss": 0.949, + "step": 41040 + }, + { + "epoch": 0.6191740323991674, + "grad_norm": 0.6914268952818814, + "learning_rate": 6.455638075342567e-06, + "loss": 0.9717, + "step": 41050 + }, + { + "epoch": 0.6193248665118103, + "grad_norm": 0.6772471140689199, + "learning_rate": 6.451162758060006e-06, + "loss": 0.957, + "step": 41060 + }, + { + "epoch": 0.6194757006244532, + "grad_norm": 0.7298664791735501, + "learning_rate": 6.446688253822926e-06, + "loss": 0.9747, + "step": 41070 + }, + { + "epoch": 0.6196265347370962, + "grad_norm": 0.7017487903497717, + "learning_rate": 6.442214563656441e-06, + "loss": 0.9535, + "step": 41080 + }, + { + "epoch": 0.6197773688497391, + "grad_norm": 0.6196486724332854, + "learning_rate": 6.4377416885854885e-06, + "loss": 0.9718, + "step": 41090 + }, + { + "epoch": 0.6199282029623819, + "grad_norm": 0.6473236658787199, + "learning_rate": 6.433269629634806e-06, + "loss": 0.9721, + "step": 41100 + }, + { + "epoch": 0.6200790370750249, + "grad_norm": 0.6989939686530817, + "learning_rate": 6.4287983878289525e-06, + "loss": 0.9694, + "step": 41110 + }, + { + "epoch": 0.6202298711876678, + "grad_norm": 0.6671645952964329, + "learning_rate": 6.424327964192305e-06, + "loss": 0.9528, + "step": 41120 + }, + { + "epoch": 0.6203807053003108, + "grad_norm": 0.6755480889265033, + "learning_rate": 6.41985835974904e-06, + "loss": 0.9616, + "step": 41130 + }, + { + "epoch": 0.6205315394129536, + "grad_norm": 0.6746122554906114, + "learning_rate": 6.41538957552315e-06, + "loss": 0.9762, + "step": 41140 + }, + { + "epoch": 0.6206823735255965, + "grad_norm": 0.6884175169195443, + "learning_rate": 6.41092161253845e-06, + "loss": 0.9709, + "step": 41150 + }, + { + "epoch": 0.6208332076382395, + "grad_norm": 0.6644122299538052, + "learning_rate": 6.406454471818551e-06, + "loss": 0.9716, + "step": 41160 + }, + { + "epoch": 0.6209840417508824, + "grad_norm": 0.6687780161789131, + "learning_rate": 6.4019881543868915e-06, + "loss": 0.9829, + "step": 41170 + }, + { + "epoch": 0.6211348758635253, + "grad_norm": 0.6526633155127597, + "learning_rate": 6.397522661266706e-06, + "loss": 0.9558, + "step": 41180 + }, + { + "epoch": 0.6212857099761682, + "grad_norm": 0.769772241038409, + "learning_rate": 6.393057993481052e-06, + "loss": 0.9634, + "step": 41190 + }, + { + "epoch": 0.6214365440888111, + "grad_norm": 0.6822196862852363, + "learning_rate": 6.388594152052791e-06, + "loss": 0.9811, + "step": 41200 + }, + { + "epoch": 0.621587378201454, + "grad_norm": 0.734380797457964, + "learning_rate": 6.384131138004603e-06, + "loss": 0.9804, + "step": 41210 + }, + { + "epoch": 0.621738212314097, + "grad_norm": 0.6602951101943793, + "learning_rate": 6.379668952358964e-06, + "loss": 0.9771, + "step": 41220 + }, + { + "epoch": 0.6218890464267399, + "grad_norm": 0.677593648304965, + "learning_rate": 6.3752075961381794e-06, + "loss": 0.9678, + "step": 41230 + }, + { + "epoch": 0.6220398805393828, + "grad_norm": 0.6678661094205576, + "learning_rate": 6.370747070364349e-06, + "loss": 0.9675, + "step": 41240 + }, + { + "epoch": 0.6221907146520257, + "grad_norm": 0.6973457715794062, + "learning_rate": 6.366287376059386e-06, + "loss": 0.9577, + "step": 41250 + }, + { + "epoch": 0.6223415487646686, + "grad_norm": 0.7285836553137357, + "learning_rate": 6.361828514245021e-06, + "loss": 0.9837, + "step": 41260 + }, + { + "epoch": 0.6224923828773116, + "grad_norm": 0.6425254013064228, + "learning_rate": 6.35737048594278e-06, + "loss": 0.9571, + "step": 41270 + }, + { + "epoch": 0.6226432169899544, + "grad_norm": 0.7122393896787177, + "learning_rate": 6.352913292174012e-06, + "loss": 0.9608, + "step": 41280 + }, + { + "epoch": 0.6227940511025973, + "grad_norm": 0.6377830290362226, + "learning_rate": 6.34845693395987e-06, + "loss": 0.9477, + "step": 41290 + }, + { + "epoch": 0.6229448852152403, + "grad_norm": 0.7409717069283472, + "learning_rate": 6.344001412321308e-06, + "loss": 0.9892, + "step": 41300 + }, + { + "epoch": 0.6230957193278832, + "grad_norm": 0.6619649508448767, + "learning_rate": 6.339546728279101e-06, + "loss": 0.9724, + "step": 41310 + }, + { + "epoch": 0.6232465534405262, + "grad_norm": 0.6810519699796942, + "learning_rate": 6.3350928828538235e-06, + "loss": 0.9861, + "step": 41320 + }, + { + "epoch": 0.623397387553169, + "grad_norm": 0.6500573737711117, + "learning_rate": 6.330639877065855e-06, + "loss": 0.9731, + "step": 41330 + }, + { + "epoch": 0.6235482216658119, + "grad_norm": 0.6565457071178031, + "learning_rate": 6.326187711935397e-06, + "loss": 0.9645, + "step": 41340 + }, + { + "epoch": 0.6236990557784549, + "grad_norm": 0.6475486370341257, + "learning_rate": 6.321736388482441e-06, + "loss": 0.9667, + "step": 41350 + }, + { + "epoch": 0.6238498898910978, + "grad_norm": 0.7207604093832704, + "learning_rate": 6.317285907726795e-06, + "loss": 0.9773, + "step": 41360 + }, + { + "epoch": 0.6240007240037407, + "grad_norm": 0.683346617122645, + "learning_rate": 6.312836270688081e-06, + "loss": 0.9827, + "step": 41370 + }, + { + "epoch": 0.6241515581163836, + "grad_norm": 0.7650694697320195, + "learning_rate": 6.308387478385711e-06, + "loss": 0.9753, + "step": 41380 + }, + { + "epoch": 0.6243023922290265, + "grad_norm": 0.6597397341111274, + "learning_rate": 6.303939531838912e-06, + "loss": 0.9961, + "step": 41390 + }, + { + "epoch": 0.6244532263416694, + "grad_norm": 0.6320375934155047, + "learning_rate": 6.2994924320667226e-06, + "loss": 0.9498, + "step": 41400 + }, + { + "epoch": 0.6246040604543124, + "grad_norm": 0.6483813854285004, + "learning_rate": 6.2950461800879755e-06, + "loss": 0.9569, + "step": 41410 + }, + { + "epoch": 0.6247548945669552, + "grad_norm": 0.6910509371654658, + "learning_rate": 6.290600776921323e-06, + "loss": 0.9733, + "step": 41420 + }, + { + "epoch": 0.6249057286795981, + "grad_norm": 0.7371144288421904, + "learning_rate": 6.286156223585209e-06, + "loss": 0.962, + "step": 41430 + }, + { + "epoch": 0.6250565627922411, + "grad_norm": 0.6683720775766387, + "learning_rate": 6.281712521097891e-06, + "loss": 0.9692, + "step": 41440 + }, + { + "epoch": 0.625207396904884, + "grad_norm": 0.7200189269004235, + "learning_rate": 6.277269670477431e-06, + "loss": 0.9778, + "step": 41450 + }, + { + "epoch": 0.625358231017527, + "grad_norm": 0.7005228435744321, + "learning_rate": 6.272827672741697e-06, + "loss": 0.9626, + "step": 41460 + }, + { + "epoch": 0.6255090651301698, + "grad_norm": 0.6449655964289907, + "learning_rate": 6.2683865289083524e-06, + "loss": 0.9767, + "step": 41470 + }, + { + "epoch": 0.6256598992428127, + "grad_norm": 0.6875673064502597, + "learning_rate": 6.26394623999488e-06, + "loss": 0.9687, + "step": 41480 + }, + { + "epoch": 0.6258107333554557, + "grad_norm": 0.6565067620743621, + "learning_rate": 6.259506807018555e-06, + "loss": 0.9668, + "step": 41490 + }, + { + "epoch": 0.6259615674680986, + "grad_norm": 0.6845445431300611, + "learning_rate": 6.255068230996455e-06, + "loss": 0.9626, + "step": 41500 + }, + { + "epoch": 0.6261124015807416, + "grad_norm": 0.6561046409156356, + "learning_rate": 6.250630512945475e-06, + "loss": 0.9774, + "step": 41510 + }, + { + "epoch": 0.6262632356933844, + "grad_norm": 0.7017926777825747, + "learning_rate": 6.2461936538823e-06, + "loss": 0.9825, + "step": 41520 + }, + { + "epoch": 0.6264140698060273, + "grad_norm": 0.6644145649865407, + "learning_rate": 6.24175765482342e-06, + "loss": 0.9615, + "step": 41530 + }, + { + "epoch": 0.6265649039186703, + "grad_norm": 0.6660875305226113, + "learning_rate": 6.237322516785141e-06, + "loss": 0.9543, + "step": 41540 + }, + { + "epoch": 0.6267157380313132, + "grad_norm": 0.6552266560384439, + "learning_rate": 6.23288824078355e-06, + "loss": 0.9782, + "step": 41550 + }, + { + "epoch": 0.626866572143956, + "grad_norm": 0.7068786983699148, + "learning_rate": 6.228454827834558e-06, + "loss": 0.9652, + "step": 41560 + }, + { + "epoch": 0.627017406256599, + "grad_norm": 0.6797120381805115, + "learning_rate": 6.224022278953864e-06, + "loss": 0.9661, + "step": 41570 + }, + { + "epoch": 0.6271682403692419, + "grad_norm": 0.6731460988346083, + "learning_rate": 6.21959059515697e-06, + "loss": 0.9763, + "step": 41580 + }, + { + "epoch": 0.6273190744818848, + "grad_norm": 0.6649465789063443, + "learning_rate": 6.215159777459191e-06, + "loss": 0.97, + "step": 41590 + }, + { + "epoch": 0.6274699085945278, + "grad_norm": 0.6641145711810325, + "learning_rate": 6.21072982687563e-06, + "loss": 0.9715, + "step": 41600 + }, + { + "epoch": 0.6276207427071706, + "grad_norm": 0.6620566917997665, + "learning_rate": 6.2063007444211985e-06, + "loss": 0.989, + "step": 41610 + }, + { + "epoch": 0.6277715768198135, + "grad_norm": 0.6183711565482788, + "learning_rate": 6.201872531110611e-06, + "loss": 1.0001, + "step": 41620 + }, + { + "epoch": 0.6279224109324565, + "grad_norm": 0.6388098354421861, + "learning_rate": 6.197445187958378e-06, + "loss": 0.981, + "step": 41630 + }, + { + "epoch": 0.6280732450450994, + "grad_norm": 0.6479236839553271, + "learning_rate": 6.193018715978807e-06, + "loss": 0.9952, + "step": 41640 + }, + { + "epoch": 0.6282240791577424, + "grad_norm": 0.6837645337085057, + "learning_rate": 6.18859311618602e-06, + "loss": 0.9594, + "step": 41650 + }, + { + "epoch": 0.6283749132703852, + "grad_norm": 0.694543212057163, + "learning_rate": 6.184168389593927e-06, + "loss": 0.96, + "step": 41660 + }, + { + "epoch": 0.6285257473830281, + "grad_norm": 0.6460113119081553, + "learning_rate": 6.179744537216237e-06, + "loss": 0.9567, + "step": 41670 + }, + { + "epoch": 0.6286765814956711, + "grad_norm": 0.6369391869117509, + "learning_rate": 6.1753215600664695e-06, + "loss": 0.9746, + "step": 41680 + }, + { + "epoch": 0.628827415608314, + "grad_norm": 0.7016216392991933, + "learning_rate": 6.170899459157933e-06, + "loss": 0.9707, + "step": 41690 + }, + { + "epoch": 0.6289782497209568, + "grad_norm": 0.7143377815065252, + "learning_rate": 6.166478235503743e-06, + "loss": 0.9531, + "step": 41700 + }, + { + "epoch": 0.6291290838335998, + "grad_norm": 0.6892140451180655, + "learning_rate": 6.16205789011681e-06, + "loss": 0.9789, + "step": 41710 + }, + { + "epoch": 0.6292799179462427, + "grad_norm": 0.6307479921517524, + "learning_rate": 6.157638424009837e-06, + "loss": 0.9792, + "step": 41720 + }, + { + "epoch": 0.6294307520588857, + "grad_norm": 0.7218844220882286, + "learning_rate": 6.1532198381953436e-06, + "loss": 0.9737, + "step": 41730 + }, + { + "epoch": 0.6295815861715286, + "grad_norm": 0.6640643768104757, + "learning_rate": 6.148802133685629e-06, + "loss": 0.9694, + "step": 41740 + }, + { + "epoch": 0.6297324202841714, + "grad_norm": 0.6690837122220743, + "learning_rate": 6.144385311492794e-06, + "loss": 0.9628, + "step": 41750 + }, + { + "epoch": 0.6298832543968144, + "grad_norm": 0.6632084043491177, + "learning_rate": 6.139969372628752e-06, + "loss": 0.9549, + "step": 41760 + }, + { + "epoch": 0.6300340885094573, + "grad_norm": 0.6537764044656609, + "learning_rate": 6.135554318105194e-06, + "loss": 0.9787, + "step": 41770 + }, + { + "epoch": 0.6301849226221002, + "grad_norm": 0.6262956670342489, + "learning_rate": 6.131140148933618e-06, + "loss": 0.9704, + "step": 41780 + }, + { + "epoch": 0.6303357567347432, + "grad_norm": 0.6340393505795509, + "learning_rate": 6.126726866125328e-06, + "loss": 0.9709, + "step": 41790 + }, + { + "epoch": 0.630486590847386, + "grad_norm": 0.6573723616698053, + "learning_rate": 6.122314470691408e-06, + "loss": 0.9638, + "step": 41800 + }, + { + "epoch": 0.630637424960029, + "grad_norm": 0.6938461050731476, + "learning_rate": 6.117902963642742e-06, + "loss": 0.962, + "step": 41810 + }, + { + "epoch": 0.6307882590726719, + "grad_norm": 0.6582179945246109, + "learning_rate": 6.113492345990026e-06, + "loss": 0.9772, + "step": 41820 + }, + { + "epoch": 0.6309390931853148, + "grad_norm": 0.6985167303056089, + "learning_rate": 6.1090826187437304e-06, + "loss": 0.992, + "step": 41830 + }, + { + "epoch": 0.6310899272979577, + "grad_norm": 0.6928458883333004, + "learning_rate": 6.104673782914141e-06, + "loss": 0.9714, + "step": 41840 + }, + { + "epoch": 0.6312407614106006, + "grad_norm": 0.6554741125471505, + "learning_rate": 6.100265839511326e-06, + "loss": 0.9715, + "step": 41850 + }, + { + "epoch": 0.6313915955232435, + "grad_norm": 0.6551146405319791, + "learning_rate": 6.095858789545153e-06, + "loss": 0.9702, + "step": 41860 + }, + { + "epoch": 0.6315424296358865, + "grad_norm": 0.7074688447391585, + "learning_rate": 6.091452634025288e-06, + "loss": 0.9632, + "step": 41870 + }, + { + "epoch": 0.6316932637485294, + "grad_norm": 0.6777565474937974, + "learning_rate": 6.087047373961191e-06, + "loss": 0.9781, + "step": 41880 + }, + { + "epoch": 0.6318440978611722, + "grad_norm": 0.6553616038226989, + "learning_rate": 6.082643010362111e-06, + "loss": 0.9706, + "step": 41890 + }, + { + "epoch": 0.6319949319738152, + "grad_norm": 0.6448076072808296, + "learning_rate": 6.0782395442371014e-06, + "loss": 0.9496, + "step": 41900 + }, + { + "epoch": 0.6321457660864581, + "grad_norm": 0.649292709390274, + "learning_rate": 6.073836976595001e-06, + "loss": 0.9692, + "step": 41910 + }, + { + "epoch": 0.632296600199101, + "grad_norm": 0.7009730995700477, + "learning_rate": 6.069435308444445e-06, + "loss": 0.9715, + "step": 41920 + }, + { + "epoch": 0.632447434311744, + "grad_norm": 0.6468202494103125, + "learning_rate": 6.065034540793868e-06, + "loss": 0.9828, + "step": 41930 + }, + { + "epoch": 0.6325982684243868, + "grad_norm": 0.6504666366419821, + "learning_rate": 6.060634674651497e-06, + "loss": 0.9739, + "step": 41940 + }, + { + "epoch": 0.6327491025370298, + "grad_norm": 0.7631534726971162, + "learning_rate": 6.056235711025341e-06, + "loss": 0.9671, + "step": 41950 + }, + { + "epoch": 0.6328999366496727, + "grad_norm": 0.6630685336501202, + "learning_rate": 6.05183765092322e-06, + "loss": 0.9808, + "step": 41960 + }, + { + "epoch": 0.6330507707623156, + "grad_norm": 0.7259721231143369, + "learning_rate": 6.047440495352731e-06, + "loss": 0.9603, + "step": 41970 + }, + { + "epoch": 0.6332016048749585, + "grad_norm": 0.6636174199060039, + "learning_rate": 6.043044245321278e-06, + "loss": 0.9758, + "step": 41980 + }, + { + "epoch": 0.6333524389876014, + "grad_norm": 0.6845088619060663, + "learning_rate": 6.038648901836046e-06, + "loss": 0.9991, + "step": 41990 + }, + { + "epoch": 0.6335032731002443, + "grad_norm": 0.6482284003359908, + "learning_rate": 6.034254465904014e-06, + "loss": 0.9718, + "step": 42000 + }, + { + "epoch": 0.6336541072128873, + "grad_norm": 0.6310375655795228, + "learning_rate": 6.029860938531962e-06, + "loss": 0.9502, + "step": 42010 + }, + { + "epoch": 0.6338049413255302, + "grad_norm": 0.7744998533196623, + "learning_rate": 6.025468320726451e-06, + "loss": 0.9721, + "step": 42020 + }, + { + "epoch": 0.633955775438173, + "grad_norm": 0.6744995148197758, + "learning_rate": 6.0210766134938384e-06, + "loss": 0.9728, + "step": 42030 + }, + { + "epoch": 0.634106609550816, + "grad_norm": 0.7048587172377982, + "learning_rate": 6.016685817840278e-06, + "loss": 0.97, + "step": 42040 + }, + { + "epoch": 0.6342574436634589, + "grad_norm": 0.6936014358419789, + "learning_rate": 6.012295934771706e-06, + "loss": 0.9809, + "step": 42050 + }, + { + "epoch": 0.6344082777761019, + "grad_norm": 0.6861781279823476, + "learning_rate": 6.007906965293848e-06, + "loss": 0.9604, + "step": 42060 + }, + { + "epoch": 0.6345591118887448, + "grad_norm": 0.6552954370044342, + "learning_rate": 6.003518910412236e-06, + "loss": 0.982, + "step": 42070 + }, + { + "epoch": 0.6347099460013876, + "grad_norm": 0.6435610546413729, + "learning_rate": 5.99913177113217e-06, + "loss": 0.9769, + "step": 42080 + }, + { + "epoch": 0.6348607801140306, + "grad_norm": 0.6540015742466013, + "learning_rate": 5.994745548458765e-06, + "loss": 0.9625, + "step": 42090 + }, + { + "epoch": 0.6350116142266735, + "grad_norm": 0.6704043129424776, + "learning_rate": 5.990360243396902e-06, + "loss": 0.9616, + "step": 42100 + }, + { + "epoch": 0.6351624483393165, + "grad_norm": 0.6681894416718644, + "learning_rate": 5.9859758569512684e-06, + "loss": 0.9644, + "step": 42110 + }, + { + "epoch": 0.6353132824519593, + "grad_norm": 0.6646913849981891, + "learning_rate": 5.981592390126334e-06, + "loss": 0.9656, + "step": 42120 + }, + { + "epoch": 0.6354641165646022, + "grad_norm": 0.6627591305747943, + "learning_rate": 5.977209843926364e-06, + "loss": 0.9744, + "step": 42130 + }, + { + "epoch": 0.6356149506772452, + "grad_norm": 0.6448972810880089, + "learning_rate": 5.972828219355398e-06, + "loss": 1.0053, + "step": 42140 + }, + { + "epoch": 0.6357657847898881, + "grad_norm": 0.6665673653009554, + "learning_rate": 5.968447517417286e-06, + "loss": 0.9448, + "step": 42150 + }, + { + "epoch": 0.635916618902531, + "grad_norm": 0.6333548435469116, + "learning_rate": 5.96406773911565e-06, + "loss": 0.9689, + "step": 42160 + }, + { + "epoch": 0.6360674530151739, + "grad_norm": 0.6572649142957872, + "learning_rate": 5.959688885453901e-06, + "loss": 0.9502, + "step": 42170 + }, + { + "epoch": 0.6362182871278168, + "grad_norm": 0.6431866874337295, + "learning_rate": 5.9553109574352515e-06, + "loss": 0.9892, + "step": 42180 + }, + { + "epoch": 0.6363691212404597, + "grad_norm": 0.655868242446939, + "learning_rate": 5.95093395606269e-06, + "loss": 0.9743, + "step": 42190 + }, + { + "epoch": 0.6365199553531027, + "grad_norm": 0.6652611527777057, + "learning_rate": 5.946557882338992e-06, + "loss": 0.9572, + "step": 42200 + }, + { + "epoch": 0.6366707894657456, + "grad_norm": 0.6519025128778658, + "learning_rate": 5.942182737266733e-06, + "loss": 0.9691, + "step": 42210 + }, + { + "epoch": 0.6368216235783885, + "grad_norm": 0.7431256224922417, + "learning_rate": 5.937808521848259e-06, + "loss": 0.9897, + "step": 42220 + }, + { + "epoch": 0.6369724576910314, + "grad_norm": 0.6560364382175824, + "learning_rate": 5.933435237085716e-06, + "loss": 0.9532, + "step": 42230 + }, + { + "epoch": 0.6371232918036743, + "grad_norm": 0.7003189940218373, + "learning_rate": 5.929062883981032e-06, + "loss": 0.9626, + "step": 42240 + }, + { + "epoch": 0.6372741259163173, + "grad_norm": 0.6675595601557676, + "learning_rate": 5.924691463535915e-06, + "loss": 0.9681, + "step": 42250 + }, + { + "epoch": 0.6374249600289601, + "grad_norm": 0.6832141256209567, + "learning_rate": 5.9203209767518745e-06, + "loss": 0.9408, + "step": 42260 + }, + { + "epoch": 0.637575794141603, + "grad_norm": 0.6768533835123041, + "learning_rate": 5.915951424630193e-06, + "loss": 0.9789, + "step": 42270 + }, + { + "epoch": 0.637726628254246, + "grad_norm": 0.714124266886499, + "learning_rate": 5.9115828081719415e-06, + "loss": 0.9849, + "step": 42280 + }, + { + "epoch": 0.6378774623668889, + "grad_norm": 0.7382813792021558, + "learning_rate": 5.907215128377986e-06, + "loss": 0.9809, + "step": 42290 + }, + { + "epoch": 0.6380282964795319, + "grad_norm": 0.6356824411127139, + "learning_rate": 5.902848386248965e-06, + "loss": 0.964, + "step": 42300 + }, + { + "epoch": 0.6381791305921747, + "grad_norm": 0.6956562263254537, + "learning_rate": 5.898482582785305e-06, + "loss": 0.9638, + "step": 42310 + }, + { + "epoch": 0.6383299647048176, + "grad_norm": 0.6553825871141756, + "learning_rate": 5.8941177189872255e-06, + "loss": 0.958, + "step": 42320 + }, + { + "epoch": 0.6384807988174606, + "grad_norm": 0.6607106653741094, + "learning_rate": 5.889753795854725e-06, + "loss": 0.9704, + "step": 42330 + }, + { + "epoch": 0.6386316329301035, + "grad_norm": 0.7365123440315453, + "learning_rate": 5.885390814387578e-06, + "loss": 0.9761, + "step": 42340 + }, + { + "epoch": 0.6387824670427464, + "grad_norm": 0.6588109026028254, + "learning_rate": 5.8810287755853624e-06, + "loss": 0.9764, + "step": 42350 + }, + { + "epoch": 0.6389333011553893, + "grad_norm": 0.6945281958994179, + "learning_rate": 5.876667680447425e-06, + "loss": 0.9713, + "step": 42360 + }, + { + "epoch": 0.6390841352680322, + "grad_norm": 0.6543586847843864, + "learning_rate": 5.872307529972904e-06, + "loss": 0.956, + "step": 42370 + }, + { + "epoch": 0.6392349693806751, + "grad_norm": 0.6956183207481679, + "learning_rate": 5.867948325160717e-06, + "loss": 0.9604, + "step": 42380 + }, + { + "epoch": 0.6393858034933181, + "grad_norm": 0.6973239412933835, + "learning_rate": 5.863590067009561e-06, + "loss": 0.9679, + "step": 42390 + }, + { + "epoch": 0.6395366376059609, + "grad_norm": 0.6782296253191169, + "learning_rate": 5.859232756517932e-06, + "loss": 0.9723, + "step": 42400 + }, + { + "epoch": 0.6396874717186039, + "grad_norm": 0.6715963432009546, + "learning_rate": 5.854876394684091e-06, + "loss": 0.9662, + "step": 42410 + }, + { + "epoch": 0.6398383058312468, + "grad_norm": 0.6428201554767236, + "learning_rate": 5.850520982506087e-06, + "loss": 0.9778, + "step": 42420 + }, + { + "epoch": 0.6399891399438897, + "grad_norm": 0.6411233736671075, + "learning_rate": 5.8461665209817575e-06, + "loss": 0.971, + "step": 42430 + }, + { + "epoch": 0.6401399740565327, + "grad_norm": 0.6553629527023742, + "learning_rate": 5.841813011108721e-06, + "loss": 0.9624, + "step": 42440 + }, + { + "epoch": 0.6402908081691755, + "grad_norm": 0.7185071061288759, + "learning_rate": 5.837460453884366e-06, + "loss": 1.0014, + "step": 42450 + }, + { + "epoch": 0.6404416422818184, + "grad_norm": 0.7144650756748009, + "learning_rate": 5.833108850305881e-06, + "loss": 0.9665, + "step": 42460 + }, + { + "epoch": 0.6405924763944614, + "grad_norm": 0.7521007538484308, + "learning_rate": 5.828758201370224e-06, + "loss": 0.9676, + "step": 42470 + }, + { + "epoch": 0.6407433105071043, + "grad_norm": 0.66871776811873, + "learning_rate": 5.824408508074129e-06, + "loss": 0.9662, + "step": 42480 + }, + { + "epoch": 0.6408941446197473, + "grad_norm": 0.6952077499229133, + "learning_rate": 5.8200597714141325e-06, + "loss": 0.9914, + "step": 42490 + }, + { + "epoch": 0.6410449787323901, + "grad_norm": 0.6366038164662208, + "learning_rate": 5.815711992386528e-06, + "loss": 0.9819, + "step": 42500 + }, + { + "epoch": 0.641195812845033, + "grad_norm": 0.641044901832692, + "learning_rate": 5.811365171987407e-06, + "loss": 0.964, + "step": 42510 + }, + { + "epoch": 0.641346646957676, + "grad_norm": 0.6504356267340656, + "learning_rate": 5.807019311212633e-06, + "loss": 0.9669, + "step": 42520 + }, + { + "epoch": 0.6414974810703189, + "grad_norm": 0.6336791978319972, + "learning_rate": 5.802674411057845e-06, + "loss": 0.9549, + "step": 42530 + }, + { + "epoch": 0.6416483151829617, + "grad_norm": 0.6597812395366566, + "learning_rate": 5.798330472518476e-06, + "loss": 0.9735, + "step": 42540 + }, + { + "epoch": 0.6417991492956047, + "grad_norm": 0.6886936258111338, + "learning_rate": 5.793987496589723e-06, + "loss": 0.96, + "step": 42550 + }, + { + "epoch": 0.6419499834082476, + "grad_norm": 0.6447217953130334, + "learning_rate": 5.7896454842665814e-06, + "loss": 0.9543, + "step": 42560 + }, + { + "epoch": 0.6421008175208905, + "grad_norm": 0.6894600013655907, + "learning_rate": 5.785304436543802e-06, + "loss": 0.9789, + "step": 42570 + }, + { + "epoch": 0.6422516516335335, + "grad_norm": 0.6737366211449386, + "learning_rate": 5.780964354415937e-06, + "loss": 0.9717, + "step": 42580 + }, + { + "epoch": 0.6424024857461763, + "grad_norm": 0.6916674613570246, + "learning_rate": 5.7766252388773e-06, + "loss": 0.9846, + "step": 42590 + }, + { + "epoch": 0.6425533198588192, + "grad_norm": 0.6736845968810052, + "learning_rate": 5.772287090922e-06, + "loss": 0.985, + "step": 42600 + }, + { + "epoch": 0.6427041539714622, + "grad_norm": 0.6441223017248404, + "learning_rate": 5.767949911543907e-06, + "loss": 0.975, + "step": 42610 + }, + { + "epoch": 0.6428549880841051, + "grad_norm": 0.6523898973913099, + "learning_rate": 5.763613701736677e-06, + "loss": 0.971, + "step": 42620 + }, + { + "epoch": 0.6430058221967481, + "grad_norm": 0.6757285882446171, + "learning_rate": 5.759278462493752e-06, + "loss": 0.973, + "step": 42630 + }, + { + "epoch": 0.6431566563093909, + "grad_norm": 0.6586735887119388, + "learning_rate": 5.754944194808332e-06, + "loss": 0.9575, + "step": 42640 + }, + { + "epoch": 0.6433074904220338, + "grad_norm": 0.6690788059194364, + "learning_rate": 5.750610899673419e-06, + "loss": 0.9761, + "step": 42650 + }, + { + "epoch": 0.6434583245346768, + "grad_norm": 0.6720196042285724, + "learning_rate": 5.746278578081772e-06, + "loss": 0.956, + "step": 42660 + }, + { + "epoch": 0.6436091586473197, + "grad_norm": 0.6769536631333384, + "learning_rate": 5.74194723102593e-06, + "loss": 0.9767, + "step": 42670 + }, + { + "epoch": 0.6437599927599625, + "grad_norm": 0.6718800270657119, + "learning_rate": 5.737616859498224e-06, + "loss": 0.9665, + "step": 42680 + }, + { + "epoch": 0.6439108268726055, + "grad_norm": 0.6904956711232069, + "learning_rate": 5.733287464490745e-06, + "loss": 0.9701, + "step": 42690 + }, + { + "epoch": 0.6440616609852484, + "grad_norm": 0.6629669485437492, + "learning_rate": 5.728959046995359e-06, + "loss": 0.9404, + "step": 42700 + }, + { + "epoch": 0.6442124950978914, + "grad_norm": 0.6542551501260208, + "learning_rate": 5.7246316080037275e-06, + "loss": 0.9809, + "step": 42710 + }, + { + "epoch": 0.6443633292105343, + "grad_norm": 0.6300381782890898, + "learning_rate": 5.720305148507265e-06, + "loss": 0.9596, + "step": 42720 + }, + { + "epoch": 0.6445141633231771, + "grad_norm": 0.6706069170532343, + "learning_rate": 5.715979669497178e-06, + "loss": 0.9673, + "step": 42730 + }, + { + "epoch": 0.6446649974358201, + "grad_norm": 0.6572002220459922, + "learning_rate": 5.711655171964438e-06, + "loss": 0.9569, + "step": 42740 + }, + { + "epoch": 0.644815831548463, + "grad_norm": 0.7041394873741993, + "learning_rate": 5.707331656899796e-06, + "loss": 0.9862, + "step": 42750 + }, + { + "epoch": 0.6449666656611059, + "grad_norm": 0.6571985343572138, + "learning_rate": 5.7030091252937835e-06, + "loss": 0.978, + "step": 42760 + }, + { + "epoch": 0.6451174997737489, + "grad_norm": 0.6584832876801933, + "learning_rate": 5.6986875781366965e-06, + "loss": 0.9868, + "step": 42770 + }, + { + "epoch": 0.6452683338863917, + "grad_norm": 0.7587623501408065, + "learning_rate": 5.694367016418607e-06, + "loss": 0.9802, + "step": 42780 + }, + { + "epoch": 0.6454191679990346, + "grad_norm": 0.7253421408913294, + "learning_rate": 5.69004744112937e-06, + "loss": 0.9926, + "step": 42790 + }, + { + "epoch": 0.6455700021116776, + "grad_norm": 0.6502834811611508, + "learning_rate": 5.685728853258605e-06, + "loss": 0.9782, + "step": 42800 + }, + { + "epoch": 0.6457208362243205, + "grad_norm": 0.7281671123176058, + "learning_rate": 5.681411253795707e-06, + "loss": 0.9657, + "step": 42810 + }, + { + "epoch": 0.6458716703369634, + "grad_norm": 0.6180512513824692, + "learning_rate": 5.677094643729852e-06, + "loss": 0.9706, + "step": 42820 + }, + { + "epoch": 0.6460225044496063, + "grad_norm": 0.6675832213354944, + "learning_rate": 5.67277902404998e-06, + "loss": 0.9839, + "step": 42830 + }, + { + "epoch": 0.6461733385622492, + "grad_norm": 0.6538338573039657, + "learning_rate": 5.668464395744805e-06, + "loss": 0.9569, + "step": 42840 + }, + { + "epoch": 0.6463241726748922, + "grad_norm": 0.6778410453779325, + "learning_rate": 5.664150759802824e-06, + "loss": 0.9593, + "step": 42850 + }, + { + "epoch": 0.6464750067875351, + "grad_norm": 0.6873299348775346, + "learning_rate": 5.659838117212295e-06, + "loss": 0.9779, + "step": 42860 + }, + { + "epoch": 0.6466258409001779, + "grad_norm": 0.6844411093160175, + "learning_rate": 5.655526468961249e-06, + "loss": 0.9659, + "step": 42870 + }, + { + "epoch": 0.6467766750128209, + "grad_norm": 0.6796045112087769, + "learning_rate": 5.651215816037501e-06, + "loss": 0.943, + "step": 42880 + }, + { + "epoch": 0.6469275091254638, + "grad_norm": 0.7011784944903624, + "learning_rate": 5.6469061594286225e-06, + "loss": 0.9767, + "step": 42890 + }, + { + "epoch": 0.6470783432381068, + "grad_norm": 0.6823143167668787, + "learning_rate": 5.642597500121967e-06, + "loss": 0.973, + "step": 42900 + }, + { + "epoch": 0.6472291773507497, + "grad_norm": 0.6771525350938744, + "learning_rate": 5.638289839104661e-06, + "loss": 0.9694, + "step": 42910 + }, + { + "epoch": 0.6473800114633925, + "grad_norm": 0.6666671521686408, + "learning_rate": 5.6339831773635915e-06, + "loss": 0.9728, + "step": 42920 + }, + { + "epoch": 0.6475308455760355, + "grad_norm": 0.7132763654253146, + "learning_rate": 5.62967751588543e-06, + "loss": 0.9659, + "step": 42930 + }, + { + "epoch": 0.6476816796886784, + "grad_norm": 0.6698672216211826, + "learning_rate": 5.625372855656605e-06, + "loss": 0.9734, + "step": 42940 + }, + { + "epoch": 0.6478325138013213, + "grad_norm": 0.673744851491136, + "learning_rate": 5.6210691976633244e-06, + "loss": 0.9676, + "step": 42950 + }, + { + "epoch": 0.6479833479139642, + "grad_norm": 0.676455663777131, + "learning_rate": 5.616766542891568e-06, + "loss": 0.9677, + "step": 42960 + }, + { + "epoch": 0.6481341820266071, + "grad_norm": 0.6412332285841934, + "learning_rate": 5.612464892327081e-06, + "loss": 0.9446, + "step": 42970 + }, + { + "epoch": 0.64828501613925, + "grad_norm": 0.6600964269997174, + "learning_rate": 5.608164246955374e-06, + "loss": 0.9585, + "step": 42980 + }, + { + "epoch": 0.648435850251893, + "grad_norm": 0.6860107634340914, + "learning_rate": 5.60386460776174e-06, + "loss": 0.9723, + "step": 42990 + }, + { + "epoch": 0.6485866843645359, + "grad_norm": 0.7006775870313375, + "learning_rate": 5.599565975731234e-06, + "loss": 0.9715, + "step": 43000 + }, + { + "epoch": 0.6487375184771788, + "grad_norm": 0.6680904622232058, + "learning_rate": 5.595268351848675e-06, + "loss": 0.9812, + "step": 43010 + }, + { + "epoch": 0.6488883525898217, + "grad_norm": 0.6597807187268595, + "learning_rate": 5.590971737098666e-06, + "loss": 0.9606, + "step": 43020 + }, + { + "epoch": 0.6490391867024646, + "grad_norm": 0.6453063574652307, + "learning_rate": 5.58667613246556e-06, + "loss": 0.9589, + "step": 43030 + }, + { + "epoch": 0.6491900208151076, + "grad_norm": 0.6792432134972969, + "learning_rate": 5.582381538933499e-06, + "loss": 0.9828, + "step": 43040 + }, + { + "epoch": 0.6493408549277505, + "grad_norm": 0.6849893885026607, + "learning_rate": 5.578087957486371e-06, + "loss": 0.9369, + "step": 43050 + }, + { + "epoch": 0.6494916890403933, + "grad_norm": 0.6164675018449819, + "learning_rate": 5.573795389107855e-06, + "loss": 0.9463, + "step": 43060 + }, + { + "epoch": 0.6496425231530363, + "grad_norm": 0.6438426398195195, + "learning_rate": 5.569503834781378e-06, + "loss": 0.9816, + "step": 43070 + }, + { + "epoch": 0.6497933572656792, + "grad_norm": 0.6806728867243337, + "learning_rate": 5.56521329549015e-06, + "loss": 0.9656, + "step": 43080 + }, + { + "epoch": 0.6499441913783222, + "grad_norm": 0.7235518394878131, + "learning_rate": 5.560923772217136e-06, + "loss": 0.957, + "step": 43090 + }, + { + "epoch": 0.650095025490965, + "grad_norm": 0.6794416307201425, + "learning_rate": 5.556635265945081e-06, + "loss": 0.9727, + "step": 43100 + }, + { + "epoch": 0.6502458596036079, + "grad_norm": 0.6665090152137334, + "learning_rate": 5.552347777656486e-06, + "loss": 0.9775, + "step": 43110 + }, + { + "epoch": 0.6503966937162509, + "grad_norm": 0.6562245094332584, + "learning_rate": 5.548061308333618e-06, + "loss": 0.9615, + "step": 43120 + }, + { + "epoch": 0.6505475278288938, + "grad_norm": 0.6706480885310071, + "learning_rate": 5.543775858958524e-06, + "loss": 0.9757, + "step": 43130 + }, + { + "epoch": 0.6506983619415367, + "grad_norm": 0.657420738729188, + "learning_rate": 5.539491430513007e-06, + "loss": 0.9668, + "step": 43140 + }, + { + "epoch": 0.6508491960541796, + "grad_norm": 0.717124010874959, + "learning_rate": 5.535208023978631e-06, + "loss": 0.9687, + "step": 43150 + }, + { + "epoch": 0.6510000301668225, + "grad_norm": 0.658012757127713, + "learning_rate": 5.530925640336742e-06, + "loss": 0.9728, + "step": 43160 + }, + { + "epoch": 0.6511508642794654, + "grad_norm": 0.6673215592955289, + "learning_rate": 5.526644280568431e-06, + "loss": 0.967, + "step": 43170 + }, + { + "epoch": 0.6513016983921084, + "grad_norm": 0.7304548275999562, + "learning_rate": 5.522363945654581e-06, + "loss": 0.9666, + "step": 43180 + }, + { + "epoch": 0.6514525325047513, + "grad_norm": 0.6414873454643423, + "learning_rate": 5.518084636575816e-06, + "loss": 0.9599, + "step": 43190 + }, + { + "epoch": 0.6516033666173942, + "grad_norm": 0.6399191356852458, + "learning_rate": 5.513806354312529e-06, + "loss": 0.9622, + "step": 43200 + }, + { + "epoch": 0.6517542007300371, + "grad_norm": 0.6854441285024157, + "learning_rate": 5.509529099844894e-06, + "loss": 0.9601, + "step": 43210 + }, + { + "epoch": 0.65190503484268, + "grad_norm": 0.7132111677603207, + "learning_rate": 5.505252874152829e-06, + "loss": 0.9643, + "step": 43220 + }, + { + "epoch": 0.652055868955323, + "grad_norm": 0.6867380743713667, + "learning_rate": 5.500977678216033e-06, + "loss": 0.9883, + "step": 43230 + }, + { + "epoch": 0.6522067030679658, + "grad_norm": 0.67049621667043, + "learning_rate": 5.496703513013955e-06, + "loss": 0.9628, + "step": 43240 + }, + { + "epoch": 0.6523575371806087, + "grad_norm": 0.721279869669179, + "learning_rate": 5.49243037952582e-06, + "loss": 0.9619, + "step": 43250 + }, + { + "epoch": 0.6525083712932517, + "grad_norm": 0.6378619885205665, + "learning_rate": 5.488158278730606e-06, + "loss": 0.9611, + "step": 43260 + }, + { + "epoch": 0.6526592054058946, + "grad_norm": 0.6594868075587375, + "learning_rate": 5.483887211607066e-06, + "loss": 0.9519, + "step": 43270 + }, + { + "epoch": 0.6528100395185376, + "grad_norm": 0.667409529437208, + "learning_rate": 5.479617179133705e-06, + "loss": 0.9663, + "step": 43280 + }, + { + "epoch": 0.6529608736311804, + "grad_norm": 0.738234791224016, + "learning_rate": 5.475348182288795e-06, + "loss": 0.9607, + "step": 43290 + }, + { + "epoch": 0.6531117077438233, + "grad_norm": 0.6488474454451069, + "learning_rate": 5.471080222050375e-06, + "loss": 0.9604, + "step": 43300 + }, + { + "epoch": 0.6532625418564663, + "grad_norm": 0.6588926497309513, + "learning_rate": 5.466813299396239e-06, + "loss": 0.9786, + "step": 43310 + }, + { + "epoch": 0.6534133759691092, + "grad_norm": 0.6387800278293362, + "learning_rate": 5.462547415303952e-06, + "loss": 0.9482, + "step": 43320 + }, + { + "epoch": 0.6535642100817521, + "grad_norm": 0.7435764416657624, + "learning_rate": 5.4582825707508346e-06, + "loss": 0.9529, + "step": 43330 + }, + { + "epoch": 0.653715044194395, + "grad_norm": 0.6829494156047341, + "learning_rate": 5.454018766713967e-06, + "loss": 0.9619, + "step": 43340 + }, + { + "epoch": 0.6538658783070379, + "grad_norm": 0.6456173046645054, + "learning_rate": 5.449756004170201e-06, + "loss": 0.9629, + "step": 43350 + }, + { + "epoch": 0.6540167124196808, + "grad_norm": 0.6614402896074215, + "learning_rate": 5.44549428409614e-06, + "loss": 0.967, + "step": 43360 + }, + { + "epoch": 0.6541675465323238, + "grad_norm": 0.6796963417800855, + "learning_rate": 5.44123360746815e-06, + "loss": 0.9713, + "step": 43370 + }, + { + "epoch": 0.6543183806449666, + "grad_norm": 0.6522407463862717, + "learning_rate": 5.4369739752623675e-06, + "loss": 0.9553, + "step": 43380 + }, + { + "epoch": 0.6544692147576096, + "grad_norm": 0.6704952094128125, + "learning_rate": 5.432715388454675e-06, + "loss": 0.9508, + "step": 43390 + }, + { + "epoch": 0.6546200488702525, + "grad_norm": 0.6446688315777713, + "learning_rate": 5.428457848020725e-06, + "loss": 0.9776, + "step": 43400 + }, + { + "epoch": 0.6547708829828954, + "grad_norm": 0.6758518706572655, + "learning_rate": 5.424201354935934e-06, + "loss": 0.9527, + "step": 43410 + }, + { + "epoch": 0.6549217170955384, + "grad_norm": 0.7164767874084702, + "learning_rate": 5.419945910175464e-06, + "loss": 0.9666, + "step": 43420 + }, + { + "epoch": 0.6550725512081812, + "grad_norm": 0.6503230755404374, + "learning_rate": 5.415691514714254e-06, + "loss": 0.9709, + "step": 43430 + }, + { + "epoch": 0.6552233853208241, + "grad_norm": 0.6432080969121698, + "learning_rate": 5.411438169526991e-06, + "loss": 0.9711, + "step": 43440 + }, + { + "epoch": 0.6553742194334671, + "grad_norm": 0.7475823974410453, + "learning_rate": 5.40718587558812e-06, + "loss": 0.9592, + "step": 43450 + }, + { + "epoch": 0.65552505354611, + "grad_norm": 0.6747274301842728, + "learning_rate": 5.402934633871857e-06, + "loss": 0.9563, + "step": 43460 + }, + { + "epoch": 0.655675887658753, + "grad_norm": 0.6448407577151277, + "learning_rate": 5.398684445352167e-06, + "loss": 0.9596, + "step": 43470 + }, + { + "epoch": 0.6558267217713958, + "grad_norm": 0.675586292094985, + "learning_rate": 5.39443531100277e-06, + "loss": 0.9532, + "step": 43480 + }, + { + "epoch": 0.6559775558840387, + "grad_norm": 0.6689015108933138, + "learning_rate": 5.390187231797163e-06, + "loss": 0.9687, + "step": 43490 + }, + { + "epoch": 0.6561283899966817, + "grad_norm": 0.6407359286028214, + "learning_rate": 5.385940208708581e-06, + "loss": 0.9642, + "step": 43500 + }, + { + "epoch": 0.6562792241093246, + "grad_norm": 0.6840842516948749, + "learning_rate": 5.381694242710024e-06, + "loss": 0.9763, + "step": 43510 + }, + { + "epoch": 0.6564300582219674, + "grad_norm": 0.6604233307739646, + "learning_rate": 5.377449334774258e-06, + "loss": 0.9651, + "step": 43520 + }, + { + "epoch": 0.6565808923346104, + "grad_norm": 0.6466183352009494, + "learning_rate": 5.373205485873796e-06, + "loss": 0.9428, + "step": 43530 + }, + { + "epoch": 0.6567317264472533, + "grad_norm": 0.7484283962735679, + "learning_rate": 5.3689626969809065e-06, + "loss": 0.9985, + "step": 43540 + }, + { + "epoch": 0.6568825605598962, + "grad_norm": 0.7195390878934959, + "learning_rate": 5.364720969067626e-06, + "loss": 0.9616, + "step": 43550 + }, + { + "epoch": 0.6570333946725392, + "grad_norm": 0.6662579226606726, + "learning_rate": 5.360480303105747e-06, + "loss": 0.968, + "step": 43560 + }, + { + "epoch": 0.657184228785182, + "grad_norm": 0.6511540614416341, + "learning_rate": 5.356240700066806e-06, + "loss": 0.9771, + "step": 43570 + }, + { + "epoch": 0.657335062897825, + "grad_norm": 0.6661636168302049, + "learning_rate": 5.352002160922111e-06, + "loss": 0.9716, + "step": 43580 + }, + { + "epoch": 0.6574858970104679, + "grad_norm": 0.6629882778271066, + "learning_rate": 5.347764686642713e-06, + "loss": 0.9715, + "step": 43590 + }, + { + "epoch": 0.6576367311231108, + "grad_norm": 0.7509937244356302, + "learning_rate": 5.343528278199432e-06, + "loss": 0.9594, + "step": 43600 + }, + { + "epoch": 0.6577875652357538, + "grad_norm": 0.6366137100208268, + "learning_rate": 5.339292936562837e-06, + "loss": 0.9916, + "step": 43610 + }, + { + "epoch": 0.6579383993483966, + "grad_norm": 0.6618914812774381, + "learning_rate": 5.335058662703244e-06, + "loss": 0.971, + "step": 43620 + }, + { + "epoch": 0.6580892334610395, + "grad_norm": 0.6880824643233399, + "learning_rate": 5.330825457590745e-06, + "loss": 0.9533, + "step": 43630 + }, + { + "epoch": 0.6582400675736825, + "grad_norm": 0.7009681959075993, + "learning_rate": 5.326593322195168e-06, + "loss": 0.9631, + "step": 43640 + }, + { + "epoch": 0.6583909016863254, + "grad_norm": 0.674801924834598, + "learning_rate": 5.3223622574861044e-06, + "loss": 0.9672, + "step": 43650 + }, + { + "epoch": 0.6585417357989682, + "grad_norm": 0.626230035307963, + "learning_rate": 5.3181322644329025e-06, + "loss": 0.9619, + "step": 43660 + }, + { + "epoch": 0.6586925699116112, + "grad_norm": 0.6984090058441124, + "learning_rate": 5.313903344004661e-06, + "loss": 0.9782, + "step": 43670 + }, + { + "epoch": 0.6588434040242541, + "grad_norm": 0.689943468586631, + "learning_rate": 5.309675497170228e-06, + "loss": 0.951, + "step": 43680 + }, + { + "epoch": 0.6589942381368971, + "grad_norm": 0.6633576948083195, + "learning_rate": 5.305448724898221e-06, + "loss": 0.9665, + "step": 43690 + }, + { + "epoch": 0.65914507224954, + "grad_norm": 0.6514938952972472, + "learning_rate": 5.301223028156992e-06, + "loss": 0.9649, + "step": 43700 + }, + { + "epoch": 0.6592959063621828, + "grad_norm": 0.6898196176377571, + "learning_rate": 5.296998407914664e-06, + "loss": 0.9631, + "step": 43710 + }, + { + "epoch": 0.6594467404748258, + "grad_norm": 0.6397919770207124, + "learning_rate": 5.292774865139101e-06, + "loss": 0.9693, + "step": 43720 + }, + { + "epoch": 0.6595975745874687, + "grad_norm": 0.6410883052223008, + "learning_rate": 5.2885524007979285e-06, + "loss": 0.9533, + "step": 43730 + }, + { + "epoch": 0.6597484087001116, + "grad_norm": 0.649442006482289, + "learning_rate": 5.284331015858517e-06, + "loss": 0.9535, + "step": 43740 + }, + { + "epoch": 0.6598992428127546, + "grad_norm": 0.6588967239003393, + "learning_rate": 5.280110711287999e-06, + "loss": 0.9787, + "step": 43750 + }, + { + "epoch": 0.6600500769253974, + "grad_norm": 0.6525470552281564, + "learning_rate": 5.275891488053249e-06, + "loss": 0.9693, + "step": 43760 + }, + { + "epoch": 0.6602009110380404, + "grad_norm": 0.6704566235451238, + "learning_rate": 5.2716733471209045e-06, + "loss": 0.9648, + "step": 43770 + }, + { + "epoch": 0.6603517451506833, + "grad_norm": 0.6330914718186241, + "learning_rate": 5.267456289457348e-06, + "loss": 0.9483, + "step": 43780 + }, + { + "epoch": 0.6605025792633262, + "grad_norm": 0.6998365127205389, + "learning_rate": 5.26324031602871e-06, + "loss": 0.9762, + "step": 43790 + }, + { + "epoch": 0.6606534133759691, + "grad_norm": 0.65704708260716, + "learning_rate": 5.259025427800888e-06, + "loss": 0.9619, + "step": 43800 + }, + { + "epoch": 0.660804247488612, + "grad_norm": 0.651284476495845, + "learning_rate": 5.254811625739515e-06, + "loss": 0.9713, + "step": 43810 + }, + { + "epoch": 0.6609550816012549, + "grad_norm": 0.6550320378226633, + "learning_rate": 5.2505989108099785e-06, + "loss": 0.9673, + "step": 43820 + }, + { + "epoch": 0.6611059157138979, + "grad_norm": 0.7316824763847918, + "learning_rate": 5.246387283977428e-06, + "loss": 0.9616, + "step": 43830 + }, + { + "epoch": 0.6612567498265408, + "grad_norm": 0.6983829186619218, + "learning_rate": 5.242176746206745e-06, + "loss": 0.9555, + "step": 43840 + }, + { + "epoch": 0.6614075839391836, + "grad_norm": 0.6536701315037724, + "learning_rate": 5.237967298462583e-06, + "loss": 0.9522, + "step": 43850 + }, + { + "epoch": 0.6615584180518266, + "grad_norm": 0.7117023388344519, + "learning_rate": 5.233758941709328e-06, + "loss": 0.9627, + "step": 43860 + }, + { + "epoch": 0.6617092521644695, + "grad_norm": 0.6510621819389172, + "learning_rate": 5.22955167691112e-06, + "loss": 0.9439, + "step": 43870 + }, + { + "epoch": 0.6618600862771125, + "grad_norm": 0.7510690680531368, + "learning_rate": 5.22534550503186e-06, + "loss": 0.9656, + "step": 43880 + }, + { + "epoch": 0.6620109203897554, + "grad_norm": 0.6735741656694806, + "learning_rate": 5.221140427035182e-06, + "loss": 0.9586, + "step": 43890 + }, + { + "epoch": 0.6621617545023982, + "grad_norm": 0.6612159611062374, + "learning_rate": 5.2169364438844796e-06, + "loss": 0.9756, + "step": 43900 + }, + { + "epoch": 0.6623125886150412, + "grad_norm": 0.6618898823184242, + "learning_rate": 5.212733556542899e-06, + "loss": 0.9569, + "step": 43910 + }, + { + "epoch": 0.6624634227276841, + "grad_norm": 0.7026392263810005, + "learning_rate": 5.208531765973327e-06, + "loss": 0.9595, + "step": 43920 + }, + { + "epoch": 0.662614256840327, + "grad_norm": 0.6607311000675723, + "learning_rate": 5.204331073138396e-06, + "loss": 0.946, + "step": 43930 + }, + { + "epoch": 0.6627650909529699, + "grad_norm": 0.7005553444153297, + "learning_rate": 5.2001314790005005e-06, + "loss": 0.9683, + "step": 43940 + }, + { + "epoch": 0.6629159250656128, + "grad_norm": 0.9366275774526532, + "learning_rate": 5.195932984521773e-06, + "loss": 0.9451, + "step": 43950 + }, + { + "epoch": 0.6630667591782557, + "grad_norm": 0.6536008794648395, + "learning_rate": 5.191735590664093e-06, + "loss": 0.965, + "step": 43960 + }, + { + "epoch": 0.6632175932908987, + "grad_norm": 0.6911218995101667, + "learning_rate": 5.187539298389099e-06, + "loss": 0.9505, + "step": 43970 + }, + { + "epoch": 0.6633684274035416, + "grad_norm": 0.6583584909100484, + "learning_rate": 5.18334410865816e-06, + "loss": 0.9744, + "step": 43980 + }, + { + "epoch": 0.6635192615161845, + "grad_norm": 0.6673949301459423, + "learning_rate": 5.179150022432413e-06, + "loss": 0.9551, + "step": 43990 + }, + { + "epoch": 0.6636700956288274, + "grad_norm": 0.6555455766303412, + "learning_rate": 5.174957040672726e-06, + "loss": 0.9637, + "step": 44000 + }, + { + "epoch": 0.6638209297414703, + "grad_norm": 0.7068669248289817, + "learning_rate": 5.170765164339715e-06, + "loss": 0.9642, + "step": 44010 + }, + { + "epoch": 0.6639717638541133, + "grad_norm": 0.6708061154673499, + "learning_rate": 5.166574394393755e-06, + "loss": 0.949, + "step": 44020 + }, + { + "epoch": 0.6641225979667562, + "grad_norm": 0.6786282635650138, + "learning_rate": 5.1623847317949545e-06, + "loss": 0.9548, + "step": 44030 + }, + { + "epoch": 0.664273432079399, + "grad_norm": 0.6881243193679054, + "learning_rate": 5.158196177503172e-06, + "loss": 0.9752, + "step": 44040 + }, + { + "epoch": 0.664424266192042, + "grad_norm": 0.6707588549348976, + "learning_rate": 5.15400873247802e-06, + "loss": 0.9749, + "step": 44050 + }, + { + "epoch": 0.6645751003046849, + "grad_norm": 0.6638399469300998, + "learning_rate": 5.149822397678841e-06, + "loss": 0.9553, + "step": 44060 + }, + { + "epoch": 0.6647259344173279, + "grad_norm": 0.6599923001356313, + "learning_rate": 5.145637174064737e-06, + "loss": 0.9465, + "step": 44070 + }, + { + "epoch": 0.6648767685299707, + "grad_norm": 0.7111763628408917, + "learning_rate": 5.141453062594557e-06, + "loss": 0.983, + "step": 44080 + }, + { + "epoch": 0.6650276026426136, + "grad_norm": 0.6770697125672892, + "learning_rate": 5.137270064226883e-06, + "loss": 0.9683, + "step": 44090 + }, + { + "epoch": 0.6651784367552566, + "grad_norm": 0.675389477355461, + "learning_rate": 5.133088179920043e-06, + "loss": 0.9667, + "step": 44100 + }, + { + "epoch": 0.6653292708678995, + "grad_norm": 0.69019371959059, + "learning_rate": 5.128907410632126e-06, + "loss": 0.9788, + "step": 44110 + }, + { + "epoch": 0.6654801049805424, + "grad_norm": 0.6593835473851882, + "learning_rate": 5.124727757320944e-06, + "loss": 0.9735, + "step": 44120 + }, + { + "epoch": 0.6656309390931853, + "grad_norm": 0.6458930781926323, + "learning_rate": 5.120549220944071e-06, + "loss": 0.976, + "step": 44130 + }, + { + "epoch": 0.6657817732058282, + "grad_norm": 0.6794603292645648, + "learning_rate": 5.116371802458816e-06, + "loss": 0.9813, + "step": 44140 + }, + { + "epoch": 0.6659326073184711, + "grad_norm": 0.6780976633794951, + "learning_rate": 5.112195502822228e-06, + "loss": 0.9679, + "step": 44150 + }, + { + "epoch": 0.6660834414311141, + "grad_norm": 0.6565783305125694, + "learning_rate": 5.108020322991115e-06, + "loss": 0.9454, + "step": 44160 + }, + { + "epoch": 0.666234275543757, + "grad_norm": 0.7010520577695403, + "learning_rate": 5.103846263922013e-06, + "loss": 0.9827, + "step": 44170 + }, + { + "epoch": 0.6663851096563999, + "grad_norm": 0.7430974843221075, + "learning_rate": 5.099673326571204e-06, + "loss": 0.9765, + "step": 44180 + }, + { + "epoch": 0.6665359437690428, + "grad_norm": 0.6291901446320217, + "learning_rate": 5.095501511894724e-06, + "loss": 0.9745, + "step": 44190 + }, + { + "epoch": 0.6666867778816857, + "grad_norm": 0.6241889726416157, + "learning_rate": 5.091330820848339e-06, + "loss": 0.9578, + "step": 44200 + }, + { + "epoch": 0.6668376119943287, + "grad_norm": 0.6950710018594823, + "learning_rate": 5.0871612543875595e-06, + "loss": 0.9666, + "step": 44210 + }, + { + "epoch": 0.6669884461069715, + "grad_norm": 0.6697271138584667, + "learning_rate": 5.082992813467644e-06, + "loss": 0.9707, + "step": 44220 + }, + { + "epoch": 0.6671392802196144, + "grad_norm": 0.7086357977722784, + "learning_rate": 5.078825499043596e-06, + "loss": 0.9686, + "step": 44230 + }, + { + "epoch": 0.6672901143322574, + "grad_norm": 0.6576201015116467, + "learning_rate": 5.074659312070145e-06, + "loss": 0.9685, + "step": 44240 + }, + { + "epoch": 0.6674409484449003, + "grad_norm": 0.7544918613844414, + "learning_rate": 5.070494253501783e-06, + "loss": 0.9703, + "step": 44250 + }, + { + "epoch": 0.6675917825575433, + "grad_norm": 0.6491979128261557, + "learning_rate": 5.066330324292723e-06, + "loss": 0.9667, + "step": 44260 + }, + { + "epoch": 0.6677426166701861, + "grad_norm": 0.7150192286193244, + "learning_rate": 5.062167525396939e-06, + "loss": 0.9525, + "step": 44270 + }, + { + "epoch": 0.667893450782829, + "grad_norm": 0.6257765177828207, + "learning_rate": 5.058005857768131e-06, + "loss": 0.9825, + "step": 44280 + }, + { + "epoch": 0.668044284895472, + "grad_norm": 0.6822864867725681, + "learning_rate": 5.053845322359741e-06, + "loss": 0.9824, + "step": 44290 + }, + { + "epoch": 0.6681951190081149, + "grad_norm": 0.654472873329132, + "learning_rate": 5.049685920124965e-06, + "loss": 0.945, + "step": 44300 + }, + { + "epoch": 0.6683459531207578, + "grad_norm": 0.7354028822529969, + "learning_rate": 5.0455276520167265e-06, + "loss": 0.9506, + "step": 44310 + }, + { + "epoch": 0.6684967872334007, + "grad_norm": 0.6896470853265237, + "learning_rate": 5.041370518987685e-06, + "loss": 0.9727, + "step": 44320 + }, + { + "epoch": 0.6686476213460436, + "grad_norm": 0.6901578222394753, + "learning_rate": 5.037214521990259e-06, + "loss": 0.9797, + "step": 44330 + }, + { + "epoch": 0.6687984554586865, + "grad_norm": 0.6351239747256512, + "learning_rate": 5.033059661976591e-06, + "loss": 0.9528, + "step": 44340 + }, + { + "epoch": 0.6689492895713295, + "grad_norm": 0.6626853004759229, + "learning_rate": 5.0289059398985655e-06, + "loss": 0.9523, + "step": 44350 + }, + { + "epoch": 0.6691001236839723, + "grad_norm": 0.6523004689021776, + "learning_rate": 5.024753356707811e-06, + "loss": 0.9625, + "step": 44360 + }, + { + "epoch": 0.6692509577966153, + "grad_norm": 0.6721980465715961, + "learning_rate": 5.020601913355688e-06, + "loss": 0.9419, + "step": 44370 + }, + { + "epoch": 0.6694017919092582, + "grad_norm": 0.6428957207399423, + "learning_rate": 5.016451610793308e-06, + "loss": 0.9795, + "step": 44380 + }, + { + "epoch": 0.6695526260219011, + "grad_norm": 0.7309889050789887, + "learning_rate": 5.012302449971505e-06, + "loss": 0.9606, + "step": 44390 + }, + { + "epoch": 0.6697034601345441, + "grad_norm": 0.6632710087672097, + "learning_rate": 5.008154431840868e-06, + "loss": 0.9694, + "step": 44400 + }, + { + "epoch": 0.6698542942471869, + "grad_norm": 0.6418911118228943, + "learning_rate": 5.004007557351707e-06, + "loss": 0.9467, + "step": 44410 + }, + { + "epoch": 0.6700051283598298, + "grad_norm": 0.6367357334222009, + "learning_rate": 4.9998618274540875e-06, + "loss": 0.9713, + "step": 44420 + }, + { + "epoch": 0.6701559624724728, + "grad_norm": 0.6974148099670776, + "learning_rate": 4.995717243097796e-06, + "loss": 0.9594, + "step": 44430 + }, + { + "epoch": 0.6703067965851157, + "grad_norm": 0.724086268836598, + "learning_rate": 4.9915738052323745e-06, + "loss": 0.9447, + "step": 44440 + }, + { + "epoch": 0.6704576306977587, + "grad_norm": 0.6907949234602132, + "learning_rate": 4.9874315148070865e-06, + "loss": 0.9935, + "step": 44450 + }, + { + "epoch": 0.6706084648104015, + "grad_norm": 0.6449216263313553, + "learning_rate": 4.983290372770936e-06, + "loss": 0.9711, + "step": 44460 + }, + { + "epoch": 0.6707592989230444, + "grad_norm": 0.6791197856844114, + "learning_rate": 4.9791503800726734e-06, + "loss": 0.9704, + "step": 44470 + }, + { + "epoch": 0.6709101330356874, + "grad_norm": 0.772195479415328, + "learning_rate": 4.975011537660777e-06, + "loss": 0.9715, + "step": 44480 + }, + { + "epoch": 0.6710609671483303, + "grad_norm": 0.6816070935917553, + "learning_rate": 4.970873846483457e-06, + "loss": 0.9696, + "step": 44490 + }, + { + "epoch": 0.6712118012609731, + "grad_norm": 0.6797123653651209, + "learning_rate": 4.966737307488677e-06, + "loss": 0.9758, + "step": 44500 + }, + { + "epoch": 0.6713626353736161, + "grad_norm": 0.6776419983696919, + "learning_rate": 4.962601921624116e-06, + "loss": 0.9613, + "step": 44510 + }, + { + "epoch": 0.671513469486259, + "grad_norm": 0.7316055167487443, + "learning_rate": 4.95846768983721e-06, + "loss": 0.9701, + "step": 44520 + }, + { + "epoch": 0.671664303598902, + "grad_norm": 0.677389761031889, + "learning_rate": 4.954334613075112e-06, + "loss": 0.9581, + "step": 44530 + }, + { + "epoch": 0.6718151377115449, + "grad_norm": 0.6533007141382355, + "learning_rate": 4.9502026922847145e-06, + "loss": 0.9786, + "step": 44540 + }, + { + "epoch": 0.6719659718241877, + "grad_norm": 0.6854645585749595, + "learning_rate": 4.946071928412658e-06, + "loss": 0.9488, + "step": 44550 + }, + { + "epoch": 0.6721168059368307, + "grad_norm": 0.6440733903048161, + "learning_rate": 4.941942322405299e-06, + "loss": 0.9662, + "step": 44560 + }, + { + "epoch": 0.6722676400494736, + "grad_norm": 0.6470902339915203, + "learning_rate": 4.937813875208745e-06, + "loss": 0.975, + "step": 44570 + }, + { + "epoch": 0.6724184741621165, + "grad_norm": 0.6898835007873588, + "learning_rate": 4.933686587768832e-06, + "loss": 0.9662, + "step": 44580 + }, + { + "epoch": 0.6725693082747595, + "grad_norm": 0.7068794836444846, + "learning_rate": 4.9295604610311274e-06, + "loss": 0.9646, + "step": 44590 + }, + { + "epoch": 0.6727201423874023, + "grad_norm": 0.6682389888400445, + "learning_rate": 4.925435495940931e-06, + "loss": 0.9774, + "step": 44600 + }, + { + "epoch": 0.6728709765000452, + "grad_norm": 0.6787248503705002, + "learning_rate": 4.921311693443287e-06, + "loss": 0.9914, + "step": 44610 + }, + { + "epoch": 0.6730218106126882, + "grad_norm": 0.6654586095702274, + "learning_rate": 4.9171890544829625e-06, + "loss": 0.9614, + "step": 44620 + }, + { + "epoch": 0.6731726447253311, + "grad_norm": 0.7065881684048221, + "learning_rate": 4.913067580004459e-06, + "loss": 0.9652, + "step": 44630 + }, + { + "epoch": 0.6733234788379739, + "grad_norm": 0.6588598044336946, + "learning_rate": 4.908947270952021e-06, + "loss": 0.9504, + "step": 44640 + }, + { + "epoch": 0.6734743129506169, + "grad_norm": 0.6812706232621971, + "learning_rate": 4.904828128269614e-06, + "loss": 0.9517, + "step": 44650 + }, + { + "epoch": 0.6736251470632598, + "grad_norm": 0.6585922966096791, + "learning_rate": 4.900710152900947e-06, + "loss": 0.9687, + "step": 44660 + }, + { + "epoch": 0.6737759811759028, + "grad_norm": 0.7047297570079565, + "learning_rate": 4.896593345789452e-06, + "loss": 0.9714, + "step": 44670 + }, + { + "epoch": 0.6739268152885457, + "grad_norm": 0.7129380276749296, + "learning_rate": 4.892477707878296e-06, + "loss": 0.9717, + "step": 44680 + }, + { + "epoch": 0.6740776494011885, + "grad_norm": 0.6829494784046465, + "learning_rate": 4.888363240110385e-06, + "loss": 0.9652, + "step": 44690 + }, + { + "epoch": 0.6742284835138315, + "grad_norm": 0.6206896061041978, + "learning_rate": 4.884249943428349e-06, + "loss": 0.9603, + "step": 44700 + }, + { + "epoch": 0.6743793176264744, + "grad_norm": 0.672847772985567, + "learning_rate": 4.880137818774547e-06, + "loss": 0.9686, + "step": 44710 + }, + { + "epoch": 0.6745301517391173, + "grad_norm": 0.6724721106201565, + "learning_rate": 4.87602686709108e-06, + "loss": 0.9584, + "step": 44720 + }, + { + "epoch": 0.6746809858517603, + "grad_norm": 0.662926433104969, + "learning_rate": 4.871917089319779e-06, + "loss": 0.9714, + "step": 44730 + }, + { + "epoch": 0.6748318199644031, + "grad_norm": 0.6574236339484277, + "learning_rate": 4.867808486402192e-06, + "loss": 0.9764, + "step": 44740 + }, + { + "epoch": 0.674982654077046, + "grad_norm": 0.6540566782435295, + "learning_rate": 4.8637010592796185e-06, + "loss": 0.9518, + "step": 44750 + }, + { + "epoch": 0.675133488189689, + "grad_norm": 0.6309098832082936, + "learning_rate": 4.859594808893072e-06, + "loss": 0.9524, + "step": 44760 + }, + { + "epoch": 0.6752843223023319, + "grad_norm": 0.6706110501745434, + "learning_rate": 4.855489736183301e-06, + "loss": 0.9597, + "step": 44770 + }, + { + "epoch": 0.6754351564149748, + "grad_norm": 0.6849752278190082, + "learning_rate": 4.85138584209079e-06, + "loss": 0.9785, + "step": 44780 + }, + { + "epoch": 0.6755859905276177, + "grad_norm": 0.6728049126051985, + "learning_rate": 4.847283127555744e-06, + "loss": 0.9553, + "step": 44790 + }, + { + "epoch": 0.6757368246402606, + "grad_norm": 0.6506338404422428, + "learning_rate": 4.843181593518109e-06, + "loss": 0.9469, + "step": 44800 + }, + { + "epoch": 0.6758876587529036, + "grad_norm": 0.6516511422529933, + "learning_rate": 4.839081240917552e-06, + "loss": 0.958, + "step": 44810 + }, + { + "epoch": 0.6760384928655465, + "grad_norm": 0.6528491100855482, + "learning_rate": 4.8349820706934666e-06, + "loss": 0.9715, + "step": 44820 + }, + { + "epoch": 0.6761893269781893, + "grad_norm": 0.6724988599646379, + "learning_rate": 4.830884083784989e-06, + "loss": 0.9635, + "step": 44830 + }, + { + "epoch": 0.6763401610908323, + "grad_norm": 0.6860778214938321, + "learning_rate": 4.8267872811309725e-06, + "loss": 0.9792, + "step": 44840 + }, + { + "epoch": 0.6764909952034752, + "grad_norm": 0.6696385360098037, + "learning_rate": 4.8226916636699975e-06, + "loss": 0.9674, + "step": 44850 + }, + { + "epoch": 0.6766418293161182, + "grad_norm": 0.6720418434193399, + "learning_rate": 4.818597232340386e-06, + "loss": 0.9722, + "step": 44860 + }, + { + "epoch": 0.6767926634287611, + "grad_norm": 0.6524611263598169, + "learning_rate": 4.814503988080177e-06, + "loss": 0.9625, + "step": 44870 + }, + { + "epoch": 0.6769434975414039, + "grad_norm": 0.6873792815225376, + "learning_rate": 4.810411931827134e-06, + "loss": 0.9614, + "step": 44880 + }, + { + "epoch": 0.6770943316540469, + "grad_norm": 0.6897424468159009, + "learning_rate": 4.806321064518764e-06, + "loss": 0.9466, + "step": 44890 + }, + { + "epoch": 0.6772451657666898, + "grad_norm": 0.6891525960623327, + "learning_rate": 4.802231387092292e-06, + "loss": 0.9724, + "step": 44900 + }, + { + "epoch": 0.6773959998793327, + "grad_norm": 0.6788749496120672, + "learning_rate": 4.7981429004846655e-06, + "loss": 0.9741, + "step": 44910 + }, + { + "epoch": 0.6775468339919756, + "grad_norm": 0.6624103699576492, + "learning_rate": 4.794055605632571e-06, + "loss": 0.972, + "step": 44920 + }, + { + "epoch": 0.6776976681046185, + "grad_norm": 0.7263723286495654, + "learning_rate": 4.7899695034724105e-06, + "loss": 0.9748, + "step": 44930 + }, + { + "epoch": 0.6778485022172615, + "grad_norm": 0.6646017016516542, + "learning_rate": 4.785884594940322e-06, + "loss": 0.9894, + "step": 44940 + }, + { + "epoch": 0.6779993363299044, + "grad_norm": 0.6868359791294666, + "learning_rate": 4.781800880972165e-06, + "loss": 0.9491, + "step": 44950 + }, + { + "epoch": 0.6781501704425473, + "grad_norm": 0.6369835494632096, + "learning_rate": 4.777718362503522e-06, + "loss": 0.9531, + "step": 44960 + }, + { + "epoch": 0.6783010045551902, + "grad_norm": 0.6503150690924367, + "learning_rate": 4.773637040469712e-06, + "loss": 0.9515, + "step": 44970 + }, + { + "epoch": 0.6784518386678331, + "grad_norm": 0.6829546959856486, + "learning_rate": 4.769556915805771e-06, + "loss": 0.9752, + "step": 44980 + }, + { + "epoch": 0.678602672780476, + "grad_norm": 0.6673949467816047, + "learning_rate": 4.765477989446458e-06, + "loss": 0.9774, + "step": 44990 + }, + { + "epoch": 0.678753506893119, + "grad_norm": 0.692350923827781, + "learning_rate": 4.761400262326275e-06, + "loss": 0.9598, + "step": 45000 + }, + { + "epoch": 0.6789043410057619, + "grad_norm": 0.7007303786273901, + "learning_rate": 4.757323735379429e-06, + "loss": 0.9667, + "step": 45010 + }, + { + "epoch": 0.6790551751184047, + "grad_norm": 0.6429106395116131, + "learning_rate": 4.753248409539857e-06, + "loss": 0.9695, + "step": 45020 + }, + { + "epoch": 0.6792060092310477, + "grad_norm": 0.6723131170691823, + "learning_rate": 4.7491742857412334e-06, + "loss": 0.9607, + "step": 45030 + }, + { + "epoch": 0.6793568433436906, + "grad_norm": 0.6612843265304617, + "learning_rate": 4.745101364916939e-06, + "loss": 0.9619, + "step": 45040 + }, + { + "epoch": 0.6795076774563336, + "grad_norm": 0.6580202472519242, + "learning_rate": 4.7410296480000965e-06, + "loss": 0.9603, + "step": 45050 + }, + { + "epoch": 0.6796585115689764, + "grad_norm": 0.6802933516851245, + "learning_rate": 4.7369591359235355e-06, + "loss": 0.9711, + "step": 45060 + }, + { + "epoch": 0.6798093456816193, + "grad_norm": 0.6493998832312345, + "learning_rate": 4.732889829619823e-06, + "loss": 0.9378, + "step": 45070 + }, + { + "epoch": 0.6799601797942623, + "grad_norm": 0.6809091576475677, + "learning_rate": 4.728821730021247e-06, + "loss": 0.9628, + "step": 45080 + }, + { + "epoch": 0.6801110139069052, + "grad_norm": 0.6716858553030255, + "learning_rate": 4.724754838059814e-06, + "loss": 0.9342, + "step": 45090 + }, + { + "epoch": 0.6802618480195481, + "grad_norm": 0.6935701511549577, + "learning_rate": 4.720689154667254e-06, + "loss": 0.9579, + "step": 45100 + }, + { + "epoch": 0.680412682132191, + "grad_norm": 0.6748608504943583, + "learning_rate": 4.71662468077503e-06, + "loss": 0.9623, + "step": 45110 + }, + { + "epoch": 0.6805635162448339, + "grad_norm": 0.7922936304374386, + "learning_rate": 4.712561417314317e-06, + "loss": 0.9628, + "step": 45120 + }, + { + "epoch": 0.6807143503574768, + "grad_norm": 0.6541490223813421, + "learning_rate": 4.708499365216011e-06, + "loss": 0.9595, + "step": 45130 + }, + { + "epoch": 0.6808651844701198, + "grad_norm": 0.6608073020180297, + "learning_rate": 4.704438525410746e-06, + "loss": 0.9674, + "step": 45140 + }, + { + "epoch": 0.6810160185827627, + "grad_norm": 0.6364781169557732, + "learning_rate": 4.700378898828864e-06, + "loss": 0.9669, + "step": 45150 + }, + { + "epoch": 0.6811668526954056, + "grad_norm": 0.6458831465211516, + "learning_rate": 4.696320486400428e-06, + "loss": 0.97, + "step": 45160 + }, + { + "epoch": 0.6813176868080485, + "grad_norm": 0.6835357456476768, + "learning_rate": 4.6922632890552385e-06, + "loss": 0.9646, + "step": 45170 + }, + { + "epoch": 0.6814685209206914, + "grad_norm": 0.6549224423737501, + "learning_rate": 4.6882073077227976e-06, + "loss": 0.9571, + "step": 45180 + }, + { + "epoch": 0.6816193550333344, + "grad_norm": 0.7191759302645618, + "learning_rate": 4.684152543332346e-06, + "loss": 0.9508, + "step": 45190 + }, + { + "epoch": 0.6817701891459772, + "grad_norm": 0.6742984168136282, + "learning_rate": 4.680098996812835e-06, + "loss": 0.9597, + "step": 45200 + }, + { + "epoch": 0.6819210232586201, + "grad_norm": 0.6723186538935414, + "learning_rate": 4.676046669092936e-06, + "loss": 0.9859, + "step": 45210 + }, + { + "epoch": 0.6820718573712631, + "grad_norm": 0.6684453347338225, + "learning_rate": 4.671995561101049e-06, + "loss": 0.9602, + "step": 45220 + }, + { + "epoch": 0.682222691483906, + "grad_norm": 0.6564815576233088, + "learning_rate": 4.667945673765294e-06, + "loss": 0.9761, + "step": 45230 + }, + { + "epoch": 0.682373525596549, + "grad_norm": 0.6825103475482759, + "learning_rate": 4.663897008013501e-06, + "loss": 0.9685, + "step": 45240 + }, + { + "epoch": 0.6825243597091918, + "grad_norm": 0.6817443389216523, + "learning_rate": 4.659849564773235e-06, + "loss": 0.9696, + "step": 45250 + }, + { + "epoch": 0.6826751938218347, + "grad_norm": 0.6676223254898364, + "learning_rate": 4.655803344971768e-06, + "loss": 0.9605, + "step": 45260 + }, + { + "epoch": 0.6828260279344777, + "grad_norm": 0.6730103579935085, + "learning_rate": 4.6517583495360944e-06, + "loss": 0.9508, + "step": 45270 + }, + { + "epoch": 0.6829768620471206, + "grad_norm": 0.6579280410568001, + "learning_rate": 4.647714579392937e-06, + "loss": 0.9705, + "step": 45280 + }, + { + "epoch": 0.6831276961597635, + "grad_norm": 0.7136609123763619, + "learning_rate": 4.643672035468728e-06, + "loss": 0.955, + "step": 45290 + }, + { + "epoch": 0.6832785302724064, + "grad_norm": 0.6950146901330524, + "learning_rate": 4.639630718689618e-06, + "loss": 0.9725, + "step": 45300 + }, + { + "epoch": 0.6834293643850493, + "grad_norm": 0.6704384447356385, + "learning_rate": 4.635590629981489e-06, + "loss": 0.9624, + "step": 45310 + }, + { + "epoch": 0.6835801984976922, + "grad_norm": 0.6892086284304622, + "learning_rate": 4.631551770269924e-06, + "loss": 0.9502, + "step": 45320 + }, + { + "epoch": 0.6837310326103352, + "grad_norm": 0.6360195270401641, + "learning_rate": 4.6275141404802425e-06, + "loss": 0.9508, + "step": 45330 + }, + { + "epoch": 0.683881866722978, + "grad_norm": 0.7272997812277977, + "learning_rate": 4.62347774153747e-06, + "loss": 0.9664, + "step": 45340 + }, + { + "epoch": 0.684032700835621, + "grad_norm": 0.6738204269691319, + "learning_rate": 4.6194425743663475e-06, + "loss": 0.966, + "step": 45350 + }, + { + "epoch": 0.6841835349482639, + "grad_norm": 0.6836815712565403, + "learning_rate": 4.615408639891349e-06, + "loss": 0.9522, + "step": 45360 + }, + { + "epoch": 0.6843343690609068, + "grad_norm": 0.7126351029398088, + "learning_rate": 4.611375939036652e-06, + "loss": 0.9503, + "step": 45370 + }, + { + "epoch": 0.6844852031735498, + "grad_norm": 0.7273674467390678, + "learning_rate": 4.6073444727261515e-06, + "loss": 0.9567, + "step": 45380 + }, + { + "epoch": 0.6846360372861926, + "grad_norm": 0.6463792949666948, + "learning_rate": 4.6033142418834705e-06, + "loss": 0.9746, + "step": 45390 + }, + { + "epoch": 0.6847868713988355, + "grad_norm": 0.7478375834069731, + "learning_rate": 4.599285247431945e-06, + "loss": 0.9704, + "step": 45400 + }, + { + "epoch": 0.6849377055114785, + "grad_norm": 0.6760970985646554, + "learning_rate": 4.595257490294617e-06, + "loss": 0.9766, + "step": 45410 + }, + { + "epoch": 0.6850885396241214, + "grad_norm": 0.6705529856146347, + "learning_rate": 4.591230971394262e-06, + "loss": 0.9703, + "step": 45420 + }, + { + "epoch": 0.6852393737367644, + "grad_norm": 0.6459358967296305, + "learning_rate": 4.587205691653359e-06, + "loss": 0.9936, + "step": 45430 + }, + { + "epoch": 0.6853902078494072, + "grad_norm": 0.6502858060284757, + "learning_rate": 4.583181651994104e-06, + "loss": 0.9509, + "step": 45440 + }, + { + "epoch": 0.6855410419620501, + "grad_norm": 0.6593049869690452, + "learning_rate": 4.57915885333842e-06, + "loss": 0.965, + "step": 45450 + }, + { + "epoch": 0.6856918760746931, + "grad_norm": 0.691119887676957, + "learning_rate": 4.575137296607931e-06, + "loss": 0.9554, + "step": 45460 + }, + { + "epoch": 0.685842710187336, + "grad_norm": 0.6439943959885372, + "learning_rate": 4.571116982723989e-06, + "loss": 0.9472, + "step": 45470 + }, + { + "epoch": 0.6859935442999788, + "grad_norm": 0.6659360602107288, + "learning_rate": 4.567097912607653e-06, + "loss": 0.9832, + "step": 45480 + }, + { + "epoch": 0.6861443784126218, + "grad_norm": 0.6785371692265144, + "learning_rate": 4.563080087179696e-06, + "loss": 0.9631, + "step": 45490 + }, + { + "epoch": 0.6862952125252647, + "grad_norm": 0.7499705667511194, + "learning_rate": 4.559063507360618e-06, + "loss": 0.9467, + "step": 45500 + }, + { + "epoch": 0.6864460466379076, + "grad_norm": 0.6755259417618038, + "learning_rate": 4.555048174070621e-06, + "loss": 0.9459, + "step": 45510 + }, + { + "epoch": 0.6865968807505506, + "grad_norm": 0.6538238865286049, + "learning_rate": 4.5510340882296214e-06, + "loss": 0.9754, + "step": 45520 + }, + { + "epoch": 0.6867477148631934, + "grad_norm": 0.6897022488160396, + "learning_rate": 4.54702125075726e-06, + "loss": 0.9766, + "step": 45530 + }, + { + "epoch": 0.6868985489758364, + "grad_norm": 0.6699926279928845, + "learning_rate": 4.5430096625728815e-06, + "loss": 0.9451, + "step": 45540 + }, + { + "epoch": 0.6870493830884793, + "grad_norm": 0.6898908025455938, + "learning_rate": 4.538999324595553e-06, + "loss": 0.9576, + "step": 45550 + }, + { + "epoch": 0.6872002172011222, + "grad_norm": 0.6586399518839949, + "learning_rate": 4.534990237744045e-06, + "loss": 0.9517, + "step": 45560 + }, + { + "epoch": 0.6873510513137652, + "grad_norm": 0.7400109250090484, + "learning_rate": 4.530982402936855e-06, + "loss": 0.9752, + "step": 45570 + }, + { + "epoch": 0.687501885426408, + "grad_norm": 0.6314537516489849, + "learning_rate": 4.526975821092175e-06, + "loss": 0.9506, + "step": 45580 + }, + { + "epoch": 0.6876527195390509, + "grad_norm": 0.6497361678156094, + "learning_rate": 4.522970493127932e-06, + "loss": 0.9501, + "step": 45590 + }, + { + "epoch": 0.6878035536516939, + "grad_norm": 0.6361853786804775, + "learning_rate": 4.518966419961745e-06, + "loss": 0.9466, + "step": 45600 + }, + { + "epoch": 0.6879543877643368, + "grad_norm": 0.66922490602224, + "learning_rate": 4.514963602510962e-06, + "loss": 0.9722, + "step": 45610 + }, + { + "epoch": 0.6881052218769796, + "grad_norm": 0.6560729006734141, + "learning_rate": 4.510962041692634e-06, + "loss": 0.9683, + "step": 45620 + }, + { + "epoch": 0.6882560559896226, + "grad_norm": 0.7191759139025694, + "learning_rate": 4.506961738423521e-06, + "loss": 0.9667, + "step": 45630 + }, + { + "epoch": 0.6884068901022655, + "grad_norm": 0.684926663951674, + "learning_rate": 4.5029626936201094e-06, + "loss": 0.96, + "step": 45640 + }, + { + "epoch": 0.6885577242149085, + "grad_norm": 0.6986294058343118, + "learning_rate": 4.498964908198583e-06, + "loss": 0.9496, + "step": 45650 + }, + { + "epoch": 0.6887085583275514, + "grad_norm": 0.6704628095757628, + "learning_rate": 4.494968383074838e-06, + "loss": 0.9671, + "step": 45660 + }, + { + "epoch": 0.6888593924401942, + "grad_norm": 0.6821152233707913, + "learning_rate": 4.490973119164496e-06, + "loss": 0.9785, + "step": 45670 + }, + { + "epoch": 0.6890102265528372, + "grad_norm": 0.6429713079434074, + "learning_rate": 4.486979117382873e-06, + "loss": 0.964, + "step": 45680 + }, + { + "epoch": 0.6891610606654801, + "grad_norm": 0.6684258749530736, + "learning_rate": 4.482986378645001e-06, + "loss": 0.9675, + "step": 45690 + }, + { + "epoch": 0.689311894778123, + "grad_norm": 0.6758108179938799, + "learning_rate": 4.47899490386563e-06, + "loss": 0.9835, + "step": 45700 + }, + { + "epoch": 0.689462728890766, + "grad_norm": 0.7400476508786575, + "learning_rate": 4.475004693959208e-06, + "loss": 0.9439, + "step": 45710 + }, + { + "epoch": 0.6896135630034088, + "grad_norm": 0.7072066450619263, + "learning_rate": 4.471015749839907e-06, + "loss": 0.9721, + "step": 45720 + }, + { + "epoch": 0.6897643971160518, + "grad_norm": 0.6405281738144503, + "learning_rate": 4.467028072421595e-06, + "loss": 0.959, + "step": 45730 + }, + { + "epoch": 0.6899152312286947, + "grad_norm": 0.6718743149051954, + "learning_rate": 4.463041662617861e-06, + "loss": 0.9527, + "step": 45740 + }, + { + "epoch": 0.6900660653413376, + "grad_norm": 0.6570382990523594, + "learning_rate": 4.459056521342e-06, + "loss": 0.9584, + "step": 45750 + }, + { + "epoch": 0.6902168994539805, + "grad_norm": 0.6868539504842, + "learning_rate": 4.455072649507014e-06, + "loss": 0.9703, + "step": 45760 + }, + { + "epoch": 0.6903677335666234, + "grad_norm": 0.6716817844239954, + "learning_rate": 4.4510900480256125e-06, + "loss": 0.9828, + "step": 45770 + }, + { + "epoch": 0.6905185676792663, + "grad_norm": 0.6966892729321353, + "learning_rate": 4.447108717810222e-06, + "loss": 0.9739, + "step": 45780 + }, + { + "epoch": 0.6906694017919093, + "grad_norm": 0.6443339505978952, + "learning_rate": 4.443128659772973e-06, + "loss": 0.9667, + "step": 45790 + }, + { + "epoch": 0.6908202359045522, + "grad_norm": 0.7510775856455729, + "learning_rate": 4.439149874825699e-06, + "loss": 0.9616, + "step": 45800 + }, + { + "epoch": 0.690971070017195, + "grad_norm": 0.6841103508370449, + "learning_rate": 4.435172363879953e-06, + "loss": 0.9696, + "step": 45810 + }, + { + "epoch": 0.691121904129838, + "grad_norm": 0.708415746911094, + "learning_rate": 4.43119612784699e-06, + "loss": 0.9542, + "step": 45820 + }, + { + "epoch": 0.6912727382424809, + "grad_norm": 0.6731440039546975, + "learning_rate": 4.427221167637767e-06, + "loss": 0.9605, + "step": 45830 + }, + { + "epoch": 0.6914235723551239, + "grad_norm": 0.684928524226018, + "learning_rate": 4.423247484162963e-06, + "loss": 0.9645, + "step": 45840 + }, + { + "epoch": 0.6915744064677668, + "grad_norm": 0.6907573110172496, + "learning_rate": 4.419275078332951e-06, + "loss": 0.9655, + "step": 45850 + }, + { + "epoch": 0.6917252405804096, + "grad_norm": 0.661761120098287, + "learning_rate": 4.415303951057822e-06, + "loss": 0.9744, + "step": 45860 + }, + { + "epoch": 0.6918760746930526, + "grad_norm": 0.6871057484606771, + "learning_rate": 4.411334103247367e-06, + "loss": 0.9484, + "step": 45870 + }, + { + "epoch": 0.6920269088056955, + "grad_norm": 0.6274742073776481, + "learning_rate": 4.40736553581108e-06, + "loss": 0.955, + "step": 45880 + }, + { + "epoch": 0.6921777429183384, + "grad_norm": 0.6618269390620635, + "learning_rate": 4.4033982496581725e-06, + "loss": 0.9655, + "step": 45890 + }, + { + "epoch": 0.6923285770309813, + "grad_norm": 0.6926010708488595, + "learning_rate": 4.399432245697563e-06, + "loss": 0.9767, + "step": 45900 + }, + { + "epoch": 0.6924794111436242, + "grad_norm": 0.6657752343776865, + "learning_rate": 4.39546752483786e-06, + "loss": 0.9693, + "step": 45910 + }, + { + "epoch": 0.6926302452562672, + "grad_norm": 0.6676023664993439, + "learning_rate": 4.391504087987397e-06, + "loss": 0.9573, + "step": 45920 + }, + { + "epoch": 0.6927810793689101, + "grad_norm": 0.7299129032527284, + "learning_rate": 4.387541936054203e-06, + "loss": 0.9419, + "step": 45930 + }, + { + "epoch": 0.692931913481553, + "grad_norm": 0.7218538791949618, + "learning_rate": 4.383581069946009e-06, + "loss": 0.9568, + "step": 45940 + }, + { + "epoch": 0.6930827475941959, + "grad_norm": 0.727672582894359, + "learning_rate": 4.379621490570266e-06, + "loss": 0.9649, + "step": 45950 + }, + { + "epoch": 0.6932335817068388, + "grad_norm": 0.6540824113215598, + "learning_rate": 4.3756631988341165e-06, + "loss": 0.9665, + "step": 45960 + }, + { + "epoch": 0.6933844158194817, + "grad_norm": 0.6481494704594554, + "learning_rate": 4.3717061956444105e-06, + "loss": 0.9551, + "step": 45970 + }, + { + "epoch": 0.6935352499321247, + "grad_norm": 0.6830142758353723, + "learning_rate": 4.367750481907711e-06, + "loss": 0.9551, + "step": 45980 + }, + { + "epoch": 0.6936860840447676, + "grad_norm": 0.6698279087667591, + "learning_rate": 4.363796058530272e-06, + "loss": 0.9496, + "step": 45990 + }, + { + "epoch": 0.6938369181574104, + "grad_norm": 0.6446649941863352, + "learning_rate": 4.359842926418069e-06, + "loss": 0.9697, + "step": 46000 + }, + { + "epoch": 0.6939877522700534, + "grad_norm": 0.6777314971246143, + "learning_rate": 4.355891086476767e-06, + "loss": 0.968, + "step": 46010 + }, + { + "epoch": 0.6941385863826963, + "grad_norm": 0.6333072041186231, + "learning_rate": 4.351940539611735e-06, + "loss": 0.9674, + "step": 46020 + }, + { + "epoch": 0.6942894204953393, + "grad_norm": 0.6525731955138507, + "learning_rate": 4.347991286728062e-06, + "loss": 0.9674, + "step": 46030 + }, + { + "epoch": 0.6944402546079821, + "grad_norm": 0.6594736603486671, + "learning_rate": 4.344043328730519e-06, + "loss": 0.9566, + "step": 46040 + }, + { + "epoch": 0.694591088720625, + "grad_norm": 0.7121127127305211, + "learning_rate": 4.340096666523601e-06, + "loss": 0.9572, + "step": 46050 + }, + { + "epoch": 0.694741922833268, + "grad_norm": 0.6422460021817237, + "learning_rate": 4.336151301011485e-06, + "loss": 0.9568, + "step": 46060 + }, + { + "epoch": 0.6948927569459109, + "grad_norm": 0.666722919060133, + "learning_rate": 4.332207233098072e-06, + "loss": 0.955, + "step": 46070 + }, + { + "epoch": 0.6950435910585538, + "grad_norm": 0.7499808137191358, + "learning_rate": 4.328264463686947e-06, + "loss": 0.9625, + "step": 46080 + }, + { + "epoch": 0.6951944251711967, + "grad_norm": 0.7030089299701825, + "learning_rate": 4.324322993681416e-06, + "loss": 0.9722, + "step": 46090 + }, + { + "epoch": 0.6953452592838396, + "grad_norm": 0.6901131191056763, + "learning_rate": 4.320382823984469e-06, + "loss": 0.9745, + "step": 46100 + }, + { + "epoch": 0.6954960933964826, + "grad_norm": 0.6601244362655699, + "learning_rate": 4.316443955498807e-06, + "loss": 0.9741, + "step": 46110 + }, + { + "epoch": 0.6956469275091255, + "grad_norm": 0.7352909257163434, + "learning_rate": 4.312506389126837e-06, + "loss": 0.963, + "step": 46120 + }, + { + "epoch": 0.6957977616217684, + "grad_norm": 0.6583008887708923, + "learning_rate": 4.308570125770658e-06, + "loss": 0.9669, + "step": 46130 + }, + { + "epoch": 0.6959485957344113, + "grad_norm": 0.6752105999016458, + "learning_rate": 4.304635166332081e-06, + "loss": 0.9398, + "step": 46140 + }, + { + "epoch": 0.6960994298470542, + "grad_norm": 0.7057203564934664, + "learning_rate": 4.300701511712611e-06, + "loss": 0.9744, + "step": 46150 + }, + { + "epoch": 0.6962502639596971, + "grad_norm": 0.6976697850598422, + "learning_rate": 4.296769162813449e-06, + "loss": 0.9571, + "step": 46160 + }, + { + "epoch": 0.6964010980723401, + "grad_norm": 0.7420654995771638, + "learning_rate": 4.292838120535515e-06, + "loss": 0.9614, + "step": 46170 + }, + { + "epoch": 0.6965519321849829, + "grad_norm": 0.6520531911783436, + "learning_rate": 4.2889083857794126e-06, + "loss": 0.963, + "step": 46180 + }, + { + "epoch": 0.6967027662976258, + "grad_norm": 0.6816408365052989, + "learning_rate": 4.284979959445448e-06, + "loss": 0.9497, + "step": 46190 + }, + { + "epoch": 0.6968536004102688, + "grad_norm": 0.6605134084222238, + "learning_rate": 4.28105284243364e-06, + "loss": 0.9558, + "step": 46200 + }, + { + "epoch": 0.6970044345229117, + "grad_norm": 0.6744584119635759, + "learning_rate": 4.277127035643689e-06, + "loss": 0.9534, + "step": 46210 + }, + { + "epoch": 0.6971552686355547, + "grad_norm": 0.6383333305650215, + "learning_rate": 4.2732025399750146e-06, + "loss": 0.9601, + "step": 46220 + }, + { + "epoch": 0.6973061027481975, + "grad_norm": 0.7007246443427905, + "learning_rate": 4.269279356326717e-06, + "loss": 0.9726, + "step": 46230 + }, + { + "epoch": 0.6974569368608404, + "grad_norm": 0.6247301976567439, + "learning_rate": 4.2653574855976145e-06, + "loss": 0.9692, + "step": 46240 + }, + { + "epoch": 0.6976077709734834, + "grad_norm": 0.6832322902697068, + "learning_rate": 4.261436928686206e-06, + "loss": 0.9683, + "step": 46250 + }, + { + "epoch": 0.6977586050861263, + "grad_norm": 0.6652832064539247, + "learning_rate": 4.257517686490708e-06, + "loss": 0.9477, + "step": 46260 + }, + { + "epoch": 0.6979094391987692, + "grad_norm": 0.69584338347662, + "learning_rate": 4.253599759909017e-06, + "loss": 0.9675, + "step": 46270 + }, + { + "epoch": 0.6980602733114121, + "grad_norm": 0.7259618613370924, + "learning_rate": 4.249683149838747e-06, + "loss": 0.9751, + "step": 46280 + }, + { + "epoch": 0.698211107424055, + "grad_norm": 0.6960100657610158, + "learning_rate": 4.245767857177196e-06, + "loss": 0.9626, + "step": 46290 + }, + { + "epoch": 0.698361941536698, + "grad_norm": 0.6631559420552978, + "learning_rate": 4.241853882821362e-06, + "loss": 0.9596, + "step": 46300 + }, + { + "epoch": 0.6985127756493409, + "grad_norm": 0.6512764186789002, + "learning_rate": 4.23794122766795e-06, + "loss": 0.9746, + "step": 46310 + }, + { + "epoch": 0.6986636097619837, + "grad_norm": 0.6658506239958233, + "learning_rate": 4.234029892613355e-06, + "loss": 0.9547, + "step": 46320 + }, + { + "epoch": 0.6988144438746267, + "grad_norm": 0.6601783270650099, + "learning_rate": 4.230119878553668e-06, + "loss": 0.9638, + "step": 46330 + }, + { + "epoch": 0.6989652779872696, + "grad_norm": 0.6779591723737115, + "learning_rate": 4.226211186384686e-06, + "loss": 0.9518, + "step": 46340 + }, + { + "epoch": 0.6991161120999125, + "grad_norm": 0.6643849826872951, + "learning_rate": 4.222303817001896e-06, + "loss": 0.971, + "step": 46350 + }, + { + "epoch": 0.6992669462125555, + "grad_norm": 0.7745850813250303, + "learning_rate": 4.21839777130048e-06, + "loss": 0.9537, + "step": 46360 + }, + { + "epoch": 0.6994177803251983, + "grad_norm": 0.793309061005381, + "learning_rate": 4.214493050175328e-06, + "loss": 0.9475, + "step": 46370 + }, + { + "epoch": 0.6995686144378412, + "grad_norm": 0.6589333183471031, + "learning_rate": 4.21058965452101e-06, + "loss": 0.9557, + "step": 46380 + }, + { + "epoch": 0.6997194485504842, + "grad_norm": 0.6649087195813501, + "learning_rate": 4.2066875852318076e-06, + "loss": 0.9651, + "step": 46390 + }, + { + "epoch": 0.6998702826631271, + "grad_norm": 0.682029894423432, + "learning_rate": 4.202786843201696e-06, + "loss": 0.9503, + "step": 46400 + }, + { + "epoch": 0.7000211167757701, + "grad_norm": 0.6637501793413799, + "learning_rate": 4.198887429324335e-06, + "loss": 0.9828, + "step": 46410 + }, + { + "epoch": 0.7001719508884129, + "grad_norm": 0.6313473185942042, + "learning_rate": 4.1949893444930926e-06, + "loss": 0.9751, + "step": 46420 + }, + { + "epoch": 0.7003227850010558, + "grad_norm": 0.6980282506178779, + "learning_rate": 4.191092589601028e-06, + "loss": 0.9437, + "step": 46430 + }, + { + "epoch": 0.7004736191136988, + "grad_norm": 0.6780358231503378, + "learning_rate": 4.187197165540888e-06, + "loss": 0.951, + "step": 46440 + }, + { + "epoch": 0.7006244532263417, + "grad_norm": 0.705807766465969, + "learning_rate": 4.183303073205133e-06, + "loss": 0.9709, + "step": 46450 + }, + { + "epoch": 0.7007752873389845, + "grad_norm": 0.7326648366056014, + "learning_rate": 4.1794103134859e-06, + "loss": 0.9597, + "step": 46460 + }, + { + "epoch": 0.7009261214516275, + "grad_norm": 0.6947577527381211, + "learning_rate": 4.175518887275025e-06, + "loss": 0.9613, + "step": 46470 + }, + { + "epoch": 0.7010769555642704, + "grad_norm": 0.7318169392795641, + "learning_rate": 4.17162879546405e-06, + "loss": 0.9658, + "step": 46480 + }, + { + "epoch": 0.7012277896769133, + "grad_norm": 0.6411279319543851, + "learning_rate": 4.167740038944198e-06, + "loss": 0.9637, + "step": 46490 + }, + { + "epoch": 0.7013786237895563, + "grad_norm": 0.6812025537741934, + "learning_rate": 4.1638526186063866e-06, + "loss": 0.9797, + "step": 46500 + }, + { + "epoch": 0.7015294579021991, + "grad_norm": 0.7088444724287825, + "learning_rate": 4.1599665353412385e-06, + "loss": 0.9741, + "step": 46510 + }, + { + "epoch": 0.701680292014842, + "grad_norm": 0.7635981633782271, + "learning_rate": 4.156081790039057e-06, + "loss": 0.9627, + "step": 46520 + }, + { + "epoch": 0.701831126127485, + "grad_norm": 0.6635600683419108, + "learning_rate": 4.1521983835898504e-06, + "loss": 0.9596, + "step": 46530 + }, + { + "epoch": 0.7019819602401279, + "grad_norm": 0.7238343093300341, + "learning_rate": 4.148316316883311e-06, + "loss": 0.9622, + "step": 46540 + }, + { + "epoch": 0.7021327943527709, + "grad_norm": 0.6715269187296798, + "learning_rate": 4.144435590808826e-06, + "loss": 0.9649, + "step": 46550 + }, + { + "epoch": 0.7022836284654137, + "grad_norm": 0.6882737545081075, + "learning_rate": 4.140556206255481e-06, + "loss": 0.9633, + "step": 46560 + }, + { + "epoch": 0.7024344625780566, + "grad_norm": 0.6739165065209161, + "learning_rate": 4.13667816411205e-06, + "loss": 0.9588, + "step": 46570 + }, + { + "epoch": 0.7025852966906996, + "grad_norm": 0.6748984028605369, + "learning_rate": 4.132801465266998e-06, + "loss": 0.9573, + "step": 46580 + }, + { + "epoch": 0.7027361308033425, + "grad_norm": 0.6757847317849344, + "learning_rate": 4.128926110608488e-06, + "loss": 0.9707, + "step": 46590 + }, + { + "epoch": 0.7028869649159853, + "grad_norm": 0.7020255583292958, + "learning_rate": 4.125052101024369e-06, + "loss": 0.9671, + "step": 46600 + }, + { + "epoch": 0.7030377990286283, + "grad_norm": 0.6520267882098144, + "learning_rate": 4.121179437402178e-06, + "loss": 0.952, + "step": 46610 + }, + { + "epoch": 0.7031886331412712, + "grad_norm": 0.6310255300544896, + "learning_rate": 4.117308120629162e-06, + "loss": 0.9601, + "step": 46620 + }, + { + "epoch": 0.7033394672539142, + "grad_norm": 0.6647110513455966, + "learning_rate": 4.11343815159224e-06, + "loss": 0.9512, + "step": 46630 + }, + { + "epoch": 0.7034903013665571, + "grad_norm": 0.9883327562385378, + "learning_rate": 4.1095695311780244e-06, + "loss": 0.9468, + "step": 46640 + }, + { + "epoch": 0.7036411354791999, + "grad_norm": 0.6804006296981888, + "learning_rate": 4.105702260272836e-06, + "loss": 0.9783, + "step": 46650 + }, + { + "epoch": 0.7037919695918429, + "grad_norm": 0.6767018958184663, + "learning_rate": 4.101836339762662e-06, + "loss": 0.9748, + "step": 46660 + }, + { + "epoch": 0.7039428037044858, + "grad_norm": 0.6242442208940384, + "learning_rate": 4.097971770533201e-06, + "loss": 0.9622, + "step": 46670 + }, + { + "epoch": 0.7040936378171287, + "grad_norm": 0.7040824889007817, + "learning_rate": 4.09410855346983e-06, + "loss": 0.9649, + "step": 46680 + }, + { + "epoch": 0.7042444719297717, + "grad_norm": 0.6604508830346076, + "learning_rate": 4.090246689457617e-06, + "loss": 0.9396, + "step": 46690 + }, + { + "epoch": 0.7043953060424145, + "grad_norm": 0.6577625980883592, + "learning_rate": 4.086386179381326e-06, + "loss": 0.9583, + "step": 46700 + }, + { + "epoch": 0.7045461401550575, + "grad_norm": 0.65502528045122, + "learning_rate": 4.082527024125406e-06, + "loss": 0.9322, + "step": 46710 + }, + { + "epoch": 0.7046969742677004, + "grad_norm": 0.6869731688831912, + "learning_rate": 4.0786692245739976e-06, + "loss": 0.9431, + "step": 46720 + }, + { + "epoch": 0.7048478083803433, + "grad_norm": 0.633546880261343, + "learning_rate": 4.0748127816109285e-06, + "loss": 0.9578, + "step": 46730 + }, + { + "epoch": 0.7049986424929862, + "grad_norm": 0.6947146787795917, + "learning_rate": 4.070957696119721e-06, + "loss": 0.951, + "step": 46740 + }, + { + "epoch": 0.7051494766056291, + "grad_norm": 0.6479643319646379, + "learning_rate": 4.067103968983577e-06, + "loss": 0.9618, + "step": 46750 + }, + { + "epoch": 0.705300310718272, + "grad_norm": 0.6650306326612199, + "learning_rate": 4.0632516010853985e-06, + "loss": 0.9788, + "step": 46760 + }, + { + "epoch": 0.705451144830915, + "grad_norm": 0.657518055897108, + "learning_rate": 4.059400593307768e-06, + "loss": 0.9459, + "step": 46770 + }, + { + "epoch": 0.7056019789435579, + "grad_norm": 0.7002119658694078, + "learning_rate": 4.055550946532957e-06, + "loss": 0.9631, + "step": 46780 + }, + { + "epoch": 0.7057528130562007, + "grad_norm": 0.694420524058616, + "learning_rate": 4.051702661642931e-06, + "loss": 0.9713, + "step": 46790 + }, + { + "epoch": 0.7059036471688437, + "grad_norm": 0.6480676760006493, + "learning_rate": 4.047855739519334e-06, + "loss": 0.9624, + "step": 46800 + }, + { + "epoch": 0.7060544812814866, + "grad_norm": 0.7129604916469708, + "learning_rate": 4.04401018104351e-06, + "loss": 0.9688, + "step": 46810 + }, + { + "epoch": 0.7062053153941296, + "grad_norm": 0.7106224202893109, + "learning_rate": 4.040165987096481e-06, + "loss": 0.957, + "step": 46820 + }, + { + "epoch": 0.7063561495067725, + "grad_norm": 0.68457776885077, + "learning_rate": 4.036323158558954e-06, + "loss": 0.9651, + "step": 46830 + }, + { + "epoch": 0.7065069836194153, + "grad_norm": 0.669380455565879, + "learning_rate": 4.0324816963113376e-06, + "loss": 0.9552, + "step": 46840 + }, + { + "epoch": 0.7066578177320583, + "grad_norm": 0.6424247905128729, + "learning_rate": 4.028641601233714e-06, + "loss": 0.9619, + "step": 46850 + }, + { + "epoch": 0.7068086518447012, + "grad_norm": 0.6444121851178191, + "learning_rate": 4.024802874205852e-06, + "loss": 0.9717, + "step": 46860 + }, + { + "epoch": 0.7069594859573441, + "grad_norm": 0.6544577730117273, + "learning_rate": 4.020965516107219e-06, + "loss": 0.9488, + "step": 46870 + }, + { + "epoch": 0.707110320069987, + "grad_norm": 0.6920525797250552, + "learning_rate": 4.017129527816955e-06, + "loss": 0.9473, + "step": 46880 + }, + { + "epoch": 0.7072611541826299, + "grad_norm": 0.6526854909595337, + "learning_rate": 4.013294910213898e-06, + "loss": 0.9511, + "step": 46890 + }, + { + "epoch": 0.7074119882952729, + "grad_norm": 0.6953087283275167, + "learning_rate": 4.009461664176562e-06, + "loss": 0.9708, + "step": 46900 + }, + { + "epoch": 0.7075628224079158, + "grad_norm": 0.6884707957241152, + "learning_rate": 4.0056297905831535e-06, + "loss": 0.9688, + "step": 46910 + }, + { + "epoch": 0.7077136565205587, + "grad_norm": 0.6652332775694099, + "learning_rate": 4.001799290311559e-06, + "loss": 0.956, + "step": 46920 + }, + { + "epoch": 0.7078644906332016, + "grad_norm": 0.6492424282677549, + "learning_rate": 3.997970164239359e-06, + "loss": 0.9442, + "step": 46930 + }, + { + "epoch": 0.7080153247458445, + "grad_norm": 0.6546843891083776, + "learning_rate": 3.994142413243807e-06, + "loss": 0.9626, + "step": 46940 + }, + { + "epoch": 0.7081661588584874, + "grad_norm": 0.6431075686337528, + "learning_rate": 3.990316038201854e-06, + "loss": 0.9502, + "step": 46950 + }, + { + "epoch": 0.7083169929711304, + "grad_norm": 0.6669336936642396, + "learning_rate": 3.986491039990129e-06, + "loss": 0.9562, + "step": 46960 + }, + { + "epoch": 0.7084678270837733, + "grad_norm": 0.6257615988371066, + "learning_rate": 3.982667419484941e-06, + "loss": 0.9448, + "step": 46970 + }, + { + "epoch": 0.7086186611964161, + "grad_norm": 0.7082028797432709, + "learning_rate": 3.9788451775622975e-06, + "loss": 0.9752, + "step": 46980 + }, + { + "epoch": 0.7087694953090591, + "grad_norm": 0.7099829253717275, + "learning_rate": 3.975024315097875e-06, + "loss": 0.9503, + "step": 46990 + }, + { + "epoch": 0.708920329421702, + "grad_norm": 0.6372472964026623, + "learning_rate": 3.97120483296704e-06, + "loss": 0.9476, + "step": 47000 + }, + { + "epoch": 0.709071163534345, + "grad_norm": 0.683597609925833, + "learning_rate": 3.967386732044849e-06, + "loss": 0.9521, + "step": 47010 + }, + { + "epoch": 0.7092219976469878, + "grad_norm": 0.6554273683915379, + "learning_rate": 3.963570013206033e-06, + "loss": 0.9658, + "step": 47020 + }, + { + "epoch": 0.7093728317596307, + "grad_norm": 0.6411946483272774, + "learning_rate": 3.959754677325005e-06, + "loss": 0.944, + "step": 47030 + }, + { + "epoch": 0.7095236658722737, + "grad_norm": 0.6412396442952871, + "learning_rate": 3.955940725275875e-06, + "loss": 0.9553, + "step": 47040 + }, + { + "epoch": 0.7096744999849166, + "grad_norm": 0.665208679782186, + "learning_rate": 3.952128157932419e-06, + "loss": 0.9501, + "step": 47050 + }, + { + "epoch": 0.7098253340975595, + "grad_norm": 0.6949971244879308, + "learning_rate": 3.948316976168106e-06, + "loss": 0.9633, + "step": 47060 + }, + { + "epoch": 0.7099761682102024, + "grad_norm": 0.7433079036438413, + "learning_rate": 3.94450718085609e-06, + "loss": 0.9478, + "step": 47070 + }, + { + "epoch": 0.7101270023228453, + "grad_norm": 0.6878678977855508, + "learning_rate": 3.940698772869195e-06, + "loss": 0.9743, + "step": 47080 + }, + { + "epoch": 0.7102778364354883, + "grad_norm": 0.6541245224872728, + "learning_rate": 3.936891753079941e-06, + "loss": 0.9845, + "step": 47090 + }, + { + "epoch": 0.7104286705481312, + "grad_norm": 0.6710853851434214, + "learning_rate": 3.93308612236052e-06, + "loss": 0.9538, + "step": 47100 + }, + { + "epoch": 0.7105795046607741, + "grad_norm": 0.693165859341172, + "learning_rate": 3.929281881582808e-06, + "loss": 0.9723, + "step": 47110 + }, + { + "epoch": 0.710730338773417, + "grad_norm": 0.6768603833740372, + "learning_rate": 3.9254790316183686e-06, + "loss": 0.9764, + "step": 47120 + }, + { + "epoch": 0.7108811728860599, + "grad_norm": 0.693718509897444, + "learning_rate": 3.9216775733384396e-06, + "loss": 0.9605, + "step": 47130 + }, + { + "epoch": 0.7110320069987028, + "grad_norm": 0.6625921302029895, + "learning_rate": 3.917877507613939e-06, + "loss": 0.9556, + "step": 47140 + }, + { + "epoch": 0.7111828411113458, + "grad_norm": 0.6413536074296515, + "learning_rate": 3.914078835315476e-06, + "loss": 0.961, + "step": 47150 + }, + { + "epoch": 0.7113336752239886, + "grad_norm": 0.6880298409031902, + "learning_rate": 3.910281557313329e-06, + "loss": 0.9542, + "step": 47160 + }, + { + "epoch": 0.7114845093366315, + "grad_norm": 0.6777251657918608, + "learning_rate": 3.906485674477461e-06, + "loss": 0.9771, + "step": 47170 + }, + { + "epoch": 0.7116353434492745, + "grad_norm": 0.7020803015044228, + "learning_rate": 3.902691187677521e-06, + "loss": 0.9651, + "step": 47180 + }, + { + "epoch": 0.7117861775619174, + "grad_norm": 0.7110888254237464, + "learning_rate": 3.898898097782826e-06, + "loss": 0.96, + "step": 47190 + }, + { + "epoch": 0.7119370116745604, + "grad_norm": 0.6599605863662148, + "learning_rate": 3.8951064056623895e-06, + "loss": 0.959, + "step": 47200 + }, + { + "epoch": 0.7120878457872032, + "grad_norm": 0.6439015250069083, + "learning_rate": 3.8913161121848865e-06, + "loss": 0.9762, + "step": 47210 + }, + { + "epoch": 0.7122386798998461, + "grad_norm": 0.6720624296746536, + "learning_rate": 3.8875272182186886e-06, + "loss": 0.9519, + "step": 47220 + }, + { + "epoch": 0.7123895140124891, + "grad_norm": 0.6978053011503775, + "learning_rate": 3.883739724631831e-06, + "loss": 0.9618, + "step": 47230 + }, + { + "epoch": 0.712540348125132, + "grad_norm": 0.6997628301174308, + "learning_rate": 3.879953632292043e-06, + "loss": 0.9639, + "step": 47240 + }, + { + "epoch": 0.712691182237775, + "grad_norm": 0.6581840981645872, + "learning_rate": 3.876168942066718e-06, + "loss": 0.9489, + "step": 47250 + }, + { + "epoch": 0.7128420163504178, + "grad_norm": 0.6663037780577884, + "learning_rate": 3.872385654822945e-06, + "loss": 0.9384, + "step": 47260 + }, + { + "epoch": 0.7129928504630607, + "grad_norm": 0.6487854058957366, + "learning_rate": 3.868603771427477e-06, + "loss": 0.9273, + "step": 47270 + }, + { + "epoch": 0.7131436845757037, + "grad_norm": 0.7474077436516612, + "learning_rate": 3.864823292746748e-06, + "loss": 0.9702, + "step": 47280 + }, + { + "epoch": 0.7132945186883466, + "grad_norm": 0.8125838271530047, + "learning_rate": 3.86104421964688e-06, + "loss": 0.9409, + "step": 47290 + }, + { + "epoch": 0.7134453528009894, + "grad_norm": 0.6831392718117217, + "learning_rate": 3.857266552993663e-06, + "loss": 0.9667, + "step": 47300 + }, + { + "epoch": 0.7135961869136324, + "grad_norm": 0.6750774140499342, + "learning_rate": 3.853490293652562e-06, + "loss": 0.9474, + "step": 47310 + }, + { + "epoch": 0.7137470210262753, + "grad_norm": 0.6650564211583561, + "learning_rate": 3.849715442488735e-06, + "loss": 0.9641, + "step": 47320 + }, + { + "epoch": 0.7138978551389182, + "grad_norm": 0.7117149081881479, + "learning_rate": 3.845942000366999e-06, + "loss": 0.9576, + "step": 47330 + }, + { + "epoch": 0.7140486892515612, + "grad_norm": 0.7128050465961584, + "learning_rate": 3.842169968151865e-06, + "loss": 0.9611, + "step": 47340 + }, + { + "epoch": 0.714199523364204, + "grad_norm": 0.6738742018229502, + "learning_rate": 3.838399346707508e-06, + "loss": 0.9904, + "step": 47350 + }, + { + "epoch": 0.7143503574768469, + "grad_norm": 0.6789548792259386, + "learning_rate": 3.8346301368977835e-06, + "loss": 0.9373, + "step": 47360 + }, + { + "epoch": 0.7145011915894899, + "grad_norm": 0.6732408211178786, + "learning_rate": 3.83086233958623e-06, + "loss": 0.9631, + "step": 47370 + }, + { + "epoch": 0.7146520257021328, + "grad_norm": 0.6614506292760138, + "learning_rate": 3.82709595563605e-06, + "loss": 0.9545, + "step": 47380 + }, + { + "epoch": 0.7148028598147758, + "grad_norm": 0.6491561180966019, + "learning_rate": 3.823330985910136e-06, + "loss": 0.9558, + "step": 47390 + }, + { + "epoch": 0.7149536939274186, + "grad_norm": 0.6572018744285403, + "learning_rate": 3.819567431271045e-06, + "loss": 0.9552, + "step": 47400 + }, + { + "epoch": 0.7151045280400615, + "grad_norm": 0.6437277945721892, + "learning_rate": 3.81580529258102e-06, + "loss": 0.9585, + "step": 47410 + }, + { + "epoch": 0.7152553621527045, + "grad_norm": 0.6515780163898407, + "learning_rate": 3.812044570701967e-06, + "loss": 0.9667, + "step": 47420 + }, + { + "epoch": 0.7154061962653474, + "grad_norm": 0.669378220606123, + "learning_rate": 3.8082852664954827e-06, + "loss": 0.9652, + "step": 47430 + }, + { + "epoch": 0.7155570303779902, + "grad_norm": 0.7285829740019882, + "learning_rate": 3.804527380822827e-06, + "loss": 0.9589, + "step": 47440 + }, + { + "epoch": 0.7157078644906332, + "grad_norm": 0.774001119111385, + "learning_rate": 3.8007709145449355e-06, + "loss": 0.9482, + "step": 47450 + }, + { + "epoch": 0.7158586986032761, + "grad_norm": 0.6569706492199994, + "learning_rate": 3.7970158685224277e-06, + "loss": 0.9557, + "step": 47460 + }, + { + "epoch": 0.716009532715919, + "grad_norm": 0.8101281466896586, + "learning_rate": 3.7932622436155877e-06, + "loss": 0.9545, + "step": 47470 + }, + { + "epoch": 0.716160366828562, + "grad_norm": 0.6791403492741251, + "learning_rate": 3.789510040684382e-06, + "loss": 0.9464, + "step": 47480 + }, + { + "epoch": 0.7163112009412048, + "grad_norm": 0.6619463688687927, + "learning_rate": 3.7857592605884463e-06, + "loss": 0.955, + "step": 47490 + }, + { + "epoch": 0.7164620350538478, + "grad_norm": 0.7092685359023067, + "learning_rate": 3.782009904187086e-06, + "loss": 0.9641, + "step": 47500 + }, + { + "epoch": 0.7166128691664907, + "grad_norm": 0.6874424870555979, + "learning_rate": 3.7782619723392956e-06, + "loss": 0.9673, + "step": 47510 + }, + { + "epoch": 0.7167637032791336, + "grad_norm": 0.6963092335328299, + "learning_rate": 3.7745154659037275e-06, + "loss": 0.9532, + "step": 47520 + }, + { + "epoch": 0.7169145373917766, + "grad_norm": 0.6732966301620184, + "learning_rate": 3.7707703857387123e-06, + "loss": 0.9628, + "step": 47530 + }, + { + "epoch": 0.7170653715044194, + "grad_norm": 0.6718100368154939, + "learning_rate": 3.7670267327022603e-06, + "loss": 0.9566, + "step": 47540 + }, + { + "epoch": 0.7172162056170623, + "grad_norm": 0.6707197807352047, + "learning_rate": 3.763284507652043e-06, + "loss": 0.9695, + "step": 47550 + }, + { + "epoch": 0.7173670397297053, + "grad_norm": 0.6495702843078641, + "learning_rate": 3.7595437114454157e-06, + "loss": 0.9542, + "step": 47560 + }, + { + "epoch": 0.7175178738423482, + "grad_norm": 0.6942856383495352, + "learning_rate": 3.7558043449394055e-06, + "loss": 0.9311, + "step": 47570 + }, + { + "epoch": 0.717668707954991, + "grad_norm": 0.6515124429398991, + "learning_rate": 3.7520664089907043e-06, + "loss": 0.9524, + "step": 47580 + }, + { + "epoch": 0.717819542067634, + "grad_norm": 0.6792019273423433, + "learning_rate": 3.7483299044556777e-06, + "loss": 0.9682, + "step": 47590 + }, + { + "epoch": 0.7179703761802769, + "grad_norm": 0.6382241106778239, + "learning_rate": 3.7445948321903715e-06, + "loss": 0.9535, + "step": 47600 + }, + { + "epoch": 0.7181212102929199, + "grad_norm": 0.6950847686407222, + "learning_rate": 3.7408611930504935e-06, + "loss": 0.9616, + "step": 47610 + }, + { + "epoch": 0.7182720444055628, + "grad_norm": 0.6407521088094537, + "learning_rate": 3.737128987891432e-06, + "loss": 0.9765, + "step": 47620 + }, + { + "epoch": 0.7184228785182056, + "grad_norm": 0.6457720991521317, + "learning_rate": 3.7333982175682417e-06, + "loss": 0.949, + "step": 47630 + }, + { + "epoch": 0.7185737126308486, + "grad_norm": 0.6950628864499372, + "learning_rate": 3.7296688829356432e-06, + "loss": 0.9648, + "step": 47640 + }, + { + "epoch": 0.7187245467434915, + "grad_norm": 0.6646309951461534, + "learning_rate": 3.7259409848480432e-06, + "loss": 0.9643, + "step": 47650 + }, + { + "epoch": 0.7188753808561344, + "grad_norm": 0.6773403276019162, + "learning_rate": 3.7222145241595055e-06, + "loss": 0.9537, + "step": 47660 + }, + { + "epoch": 0.7190262149687774, + "grad_norm": 0.6768251938483725, + "learning_rate": 3.7184895017237663e-06, + "loss": 0.9526, + "step": 47670 + }, + { + "epoch": 0.7191770490814202, + "grad_norm": 0.6594695159874128, + "learning_rate": 3.7147659183942433e-06, + "loss": 0.9486, + "step": 47680 + }, + { + "epoch": 0.7193278831940632, + "grad_norm": 0.651328311253593, + "learning_rate": 3.7110437750240123e-06, + "loss": 0.9721, + "step": 47690 + }, + { + "epoch": 0.7194787173067061, + "grad_norm": 0.6353297079831874, + "learning_rate": 3.7073230724658206e-06, + "loss": 0.9749, + "step": 47700 + }, + { + "epoch": 0.719629551419349, + "grad_norm": 0.6815490977998643, + "learning_rate": 3.703603811572092e-06, + "loss": 0.9527, + "step": 47710 + }, + { + "epoch": 0.7197803855319919, + "grad_norm": 0.6910897294753924, + "learning_rate": 3.699885993194918e-06, + "loss": 0.9413, + "step": 47720 + }, + { + "epoch": 0.7199312196446348, + "grad_norm": 0.682195792099426, + "learning_rate": 3.696169618186054e-06, + "loss": 0.9519, + "step": 47730 + }, + { + "epoch": 0.7200820537572777, + "grad_norm": 0.6624412035121656, + "learning_rate": 3.6924546873969335e-06, + "loss": 0.9518, + "step": 47740 + }, + { + "epoch": 0.7202328878699207, + "grad_norm": 0.6543896506425161, + "learning_rate": 3.6887412016786472e-06, + "loss": 0.9552, + "step": 47750 + }, + { + "epoch": 0.7203837219825636, + "grad_norm": 0.6698627094320222, + "learning_rate": 3.68502916188197e-06, + "loss": 0.9503, + "step": 47760 + }, + { + "epoch": 0.7205345560952064, + "grad_norm": 0.64888475072049, + "learning_rate": 3.681318568857334e-06, + "loss": 0.9636, + "step": 47770 + }, + { + "epoch": 0.7206853902078494, + "grad_norm": 0.6882742159050188, + "learning_rate": 3.67760942345484e-06, + "loss": 0.9555, + "step": 47780 + }, + { + "epoch": 0.7208362243204923, + "grad_norm": 0.6596428130681502, + "learning_rate": 3.673901726524265e-06, + "loss": 0.9531, + "step": 47790 + }, + { + "epoch": 0.7209870584331353, + "grad_norm": 0.7109566194528558, + "learning_rate": 3.6701954789150484e-06, + "loss": 0.9598, + "step": 47800 + }, + { + "epoch": 0.7211378925457782, + "grad_norm": 0.724054215389158, + "learning_rate": 3.6664906814762946e-06, + "loss": 0.9476, + "step": 47810 + }, + { + "epoch": 0.721288726658421, + "grad_norm": 0.6762458146711188, + "learning_rate": 3.6627873350567866e-06, + "loss": 0.9688, + "step": 47820 + }, + { + "epoch": 0.721439560771064, + "grad_norm": 0.652431935799018, + "learning_rate": 3.6590854405049646e-06, + "loss": 0.9512, + "step": 47830 + }, + { + "epoch": 0.7215903948837069, + "grad_norm": 0.6641192934365195, + "learning_rate": 3.6553849986689373e-06, + "loss": 0.9736, + "step": 47840 + }, + { + "epoch": 0.7217412289963498, + "grad_norm": 0.6517505712988857, + "learning_rate": 3.651686010396489e-06, + "loss": 0.9535, + "step": 47850 + }, + { + "epoch": 0.7218920631089927, + "grad_norm": 0.7082755487513314, + "learning_rate": 3.647988476535058e-06, + "loss": 0.9598, + "step": 47860 + }, + { + "epoch": 0.7220428972216356, + "grad_norm": 0.6543089778994934, + "learning_rate": 3.6442923979317644e-06, + "loss": 0.963, + "step": 47870 + }, + { + "epoch": 0.7221937313342786, + "grad_norm": 0.7955735394348667, + "learning_rate": 3.6405977754333798e-06, + "loss": 0.9435, + "step": 47880 + }, + { + "epoch": 0.7223445654469215, + "grad_norm": 0.6949203829558152, + "learning_rate": 3.636904609886356e-06, + "loss": 0.9662, + "step": 47890 + }, + { + "epoch": 0.7224953995595644, + "grad_norm": 0.656048569573886, + "learning_rate": 3.6332129021367977e-06, + "loss": 0.9775, + "step": 47900 + }, + { + "epoch": 0.7226462336722073, + "grad_norm": 0.6626043757837392, + "learning_rate": 3.629522653030488e-06, + "loss": 0.9768, + "step": 47910 + }, + { + "epoch": 0.7227970677848502, + "grad_norm": 0.6465453421129627, + "learning_rate": 3.6258338634128652e-06, + "loss": 0.9628, + "step": 47920 + }, + { + "epoch": 0.7229479018974931, + "grad_norm": 0.678821130325255, + "learning_rate": 3.622146534129043e-06, + "loss": 0.9537, + "step": 47930 + }, + { + "epoch": 0.7230987360101361, + "grad_norm": 0.6845408318367228, + "learning_rate": 3.618460666023794e-06, + "loss": 0.95, + "step": 47940 + }, + { + "epoch": 0.723249570122779, + "grad_norm": 0.6815838353304873, + "learning_rate": 3.6147762599415526e-06, + "loss": 0.9621, + "step": 47950 + }, + { + "epoch": 0.7234004042354218, + "grad_norm": 0.6394715833882783, + "learning_rate": 3.611093316726432e-06, + "loss": 0.9652, + "step": 47960 + }, + { + "epoch": 0.7235512383480648, + "grad_norm": 0.6616517634465903, + "learning_rate": 3.607411837222198e-06, + "loss": 0.9463, + "step": 47970 + }, + { + "epoch": 0.7237020724607077, + "grad_norm": 0.6660783057835182, + "learning_rate": 3.6037318222722795e-06, + "loss": 0.9717, + "step": 47980 + }, + { + "epoch": 0.7238529065733507, + "grad_norm": 0.704292421273704, + "learning_rate": 3.6000532727197823e-06, + "loss": 0.9498, + "step": 47990 + }, + { + "epoch": 0.7240037406859935, + "grad_norm": 0.6498727078614209, + "learning_rate": 3.5963761894074643e-06, + "loss": 0.9447, + "step": 48000 + }, + { + "epoch": 0.7241545747986364, + "grad_norm": 0.6799345706380046, + "learning_rate": 3.5927005731777577e-06, + "loss": 0.95, + "step": 48010 + }, + { + "epoch": 0.7243054089112794, + "grad_norm": 0.6530663465985476, + "learning_rate": 3.58902642487275e-06, + "loss": 0.9589, + "step": 48020 + }, + { + "epoch": 0.7244562430239223, + "grad_norm": 0.6669938109495237, + "learning_rate": 3.5853537453341924e-06, + "loss": 0.9615, + "step": 48030 + }, + { + "epoch": 0.7246070771365652, + "grad_norm": 0.6664476741084179, + "learning_rate": 3.58168253540351e-06, + "loss": 0.971, + "step": 48040 + }, + { + "epoch": 0.7247579112492081, + "grad_norm": 0.6632901152141826, + "learning_rate": 3.578012795921778e-06, + "loss": 0.9541, + "step": 48050 + }, + { + "epoch": 0.724908745361851, + "grad_norm": 0.7173059837853432, + "learning_rate": 3.574344527729742e-06, + "loss": 0.9598, + "step": 48060 + }, + { + "epoch": 0.725059579474494, + "grad_norm": 0.6613534419717941, + "learning_rate": 3.570677731667814e-06, + "loss": 0.9544, + "step": 48070 + }, + { + "epoch": 0.7252104135871369, + "grad_norm": 0.6553309703892992, + "learning_rate": 3.5670124085760616e-06, + "loss": 0.9356, + "step": 48080 + }, + { + "epoch": 0.7253612476997798, + "grad_norm": 0.6688290501642974, + "learning_rate": 3.5633485592942117e-06, + "loss": 0.9776, + "step": 48090 + }, + { + "epoch": 0.7255120818124227, + "grad_norm": 0.6840770896722793, + "learning_rate": 3.5596861846616694e-06, + "loss": 0.9526, + "step": 48100 + }, + { + "epoch": 0.7256629159250656, + "grad_norm": 0.6849511473397051, + "learning_rate": 3.5560252855174847e-06, + "loss": 0.9619, + "step": 48110 + }, + { + "epoch": 0.7258137500377085, + "grad_norm": 0.7209493750005184, + "learning_rate": 3.5523658627003765e-06, + "loss": 0.9726, + "step": 48120 + }, + { + "epoch": 0.7259645841503515, + "grad_norm": 0.7099545330400935, + "learning_rate": 3.5487079170487305e-06, + "loss": 0.9486, + "step": 48130 + }, + { + "epoch": 0.7261154182629943, + "grad_norm": 0.7633555726341277, + "learning_rate": 3.5450514494005815e-06, + "loss": 0.967, + "step": 48140 + }, + { + "epoch": 0.7262662523756372, + "grad_norm": 0.6540102287084322, + "learning_rate": 3.5413964605936434e-06, + "loss": 0.9531, + "step": 48150 + }, + { + "epoch": 0.7264170864882802, + "grad_norm": 0.6509261397710812, + "learning_rate": 3.537742951465275e-06, + "loss": 0.9706, + "step": 48160 + }, + { + "epoch": 0.7265679206009231, + "grad_norm": 0.6924763410200506, + "learning_rate": 3.5340909228525e-06, + "loss": 0.9645, + "step": 48170 + }, + { + "epoch": 0.7267187547135661, + "grad_norm": 0.6979677104691717, + "learning_rate": 3.5304403755920114e-06, + "loss": 0.9429, + "step": 48180 + }, + { + "epoch": 0.7268695888262089, + "grad_norm": 0.8112680377203292, + "learning_rate": 3.5267913105201535e-06, + "loss": 0.963, + "step": 48190 + }, + { + "epoch": 0.7270204229388518, + "grad_norm": 0.7204305680053227, + "learning_rate": 3.523143728472932e-06, + "loss": 0.9553, + "step": 48200 + }, + { + "epoch": 0.7271712570514948, + "grad_norm": 0.6360805588371372, + "learning_rate": 3.5194976302860197e-06, + "loss": 0.959, + "step": 48210 + }, + { + "epoch": 0.7273220911641377, + "grad_norm": 0.6961360453594886, + "learning_rate": 3.515853016794739e-06, + "loss": 0.9733, + "step": 48220 + }, + { + "epoch": 0.7274729252767806, + "grad_norm": 0.6867711300402014, + "learning_rate": 3.5122098888340817e-06, + "loss": 0.9613, + "step": 48230 + }, + { + "epoch": 0.7276237593894235, + "grad_norm": 0.6859027638051252, + "learning_rate": 3.5085682472386984e-06, + "loss": 0.9519, + "step": 48240 + }, + { + "epoch": 0.7277745935020664, + "grad_norm": 0.7123178587190861, + "learning_rate": 3.504928092842893e-06, + "loss": 0.9558, + "step": 48250 + }, + { + "epoch": 0.7279254276147094, + "grad_norm": 0.7428070307942072, + "learning_rate": 3.5012894264806283e-06, + "loss": 0.964, + "step": 48260 + }, + { + "epoch": 0.7280762617273523, + "grad_norm": 0.644272887376305, + "learning_rate": 3.497652248985537e-06, + "loss": 0.9519, + "step": 48270 + }, + { + "epoch": 0.7282270958399951, + "grad_norm": 0.6747362019358448, + "learning_rate": 3.494016561190896e-06, + "loss": 0.962, + "step": 48280 + }, + { + "epoch": 0.7283779299526381, + "grad_norm": 0.6717207493134704, + "learning_rate": 3.4903823639296552e-06, + "loss": 0.9481, + "step": 48290 + }, + { + "epoch": 0.728528764065281, + "grad_norm": 0.6553870771314737, + "learning_rate": 3.486749658034414e-06, + "loss": 0.9656, + "step": 48300 + }, + { + "epoch": 0.7286795981779239, + "grad_norm": 0.6849661676382964, + "learning_rate": 3.4831184443374276e-06, + "loss": 0.9397, + "step": 48310 + }, + { + "epoch": 0.7288304322905669, + "grad_norm": 0.6876784222169127, + "learning_rate": 3.479488723670622e-06, + "loss": 0.9546, + "step": 48320 + }, + { + "epoch": 0.7289812664032097, + "grad_norm": 0.655923041881297, + "learning_rate": 3.4758604968655685e-06, + "loss": 0.9554, + "step": 48330 + }, + { + "epoch": 0.7291321005158526, + "grad_norm": 0.8117552231917999, + "learning_rate": 3.4722337647534985e-06, + "loss": 0.9667, + "step": 48340 + }, + { + "epoch": 0.7292829346284956, + "grad_norm": 0.6558883278702126, + "learning_rate": 3.4686085281653113e-06, + "loss": 0.9611, + "step": 48350 + }, + { + "epoch": 0.7294337687411385, + "grad_norm": 0.6373339892826034, + "learning_rate": 3.46498478793155e-06, + "loss": 0.9496, + "step": 48360 + }, + { + "epoch": 0.7295846028537815, + "grad_norm": 0.7043900421531033, + "learning_rate": 3.4613625448824174e-06, + "loss": 0.9577, + "step": 48370 + }, + { + "epoch": 0.7297354369664243, + "grad_norm": 0.6850358824310634, + "learning_rate": 3.4577417998477814e-06, + "loss": 0.9507, + "step": 48380 + }, + { + "epoch": 0.7298862710790672, + "grad_norm": 0.7063788157338459, + "learning_rate": 3.4541225536571634e-06, + "loss": 0.9913, + "step": 48390 + }, + { + "epoch": 0.7300371051917102, + "grad_norm": 0.6461293103100348, + "learning_rate": 3.450504807139733e-06, + "loss": 0.946, + "step": 48400 + }, + { + "epoch": 0.7301879393043531, + "grad_norm": 0.7235753133038256, + "learning_rate": 3.4468885611243308e-06, + "loss": 0.9657, + "step": 48410 + }, + { + "epoch": 0.7303387734169959, + "grad_norm": 0.6662443967215633, + "learning_rate": 3.4432738164394386e-06, + "loss": 0.9619, + "step": 48420 + }, + { + "epoch": 0.7304896075296389, + "grad_norm": 0.6572516371688739, + "learning_rate": 3.439660573913208e-06, + "loss": 0.9653, + "step": 48430 + }, + { + "epoch": 0.7306404416422818, + "grad_norm": 0.680804971978832, + "learning_rate": 3.4360488343734366e-06, + "loss": 0.9406, + "step": 48440 + }, + { + "epoch": 0.7307912757549248, + "grad_norm": 0.678480640750069, + "learning_rate": 3.4324385986475774e-06, + "loss": 0.9543, + "step": 48450 + }, + { + "epoch": 0.7309421098675677, + "grad_norm": 0.6662689376449245, + "learning_rate": 3.4288298675627485e-06, + "loss": 0.9605, + "step": 48460 + }, + { + "epoch": 0.7310929439802105, + "grad_norm": 0.7477519299029478, + "learning_rate": 3.425222641945716e-06, + "loss": 0.9657, + "step": 48470 + }, + { + "epoch": 0.7312437780928535, + "grad_norm": 0.704319510960076, + "learning_rate": 3.4216169226228955e-06, + "loss": 0.9414, + "step": 48480 + }, + { + "epoch": 0.7313946122054964, + "grad_norm": 0.7250075406381105, + "learning_rate": 3.4180127104203743e-06, + "loss": 0.9636, + "step": 48490 + }, + { + "epoch": 0.7315454463181393, + "grad_norm": 0.6521789956857255, + "learning_rate": 3.4144100061638797e-06, + "loss": 0.9552, + "step": 48500 + }, + { + "epoch": 0.7316962804307823, + "grad_norm": 0.6840081547484006, + "learning_rate": 3.410808810678794e-06, + "loss": 0.9686, + "step": 48510 + }, + { + "epoch": 0.7318471145434251, + "grad_norm": 0.6526753919481436, + "learning_rate": 3.407209124790165e-06, + "loss": 0.9585, + "step": 48520 + }, + { + "epoch": 0.731997948656068, + "grad_norm": 0.6526521687165605, + "learning_rate": 3.4036109493226833e-06, + "loss": 0.9635, + "step": 48530 + }, + { + "epoch": 0.732148782768711, + "grad_norm": 0.6756223956364444, + "learning_rate": 3.4000142851007013e-06, + "loss": 0.9584, + "step": 48540 + }, + { + "epoch": 0.7322996168813539, + "grad_norm": 0.6963556788002552, + "learning_rate": 3.3964191329482175e-06, + "loss": 0.9404, + "step": 48550 + }, + { + "epoch": 0.7324504509939967, + "grad_norm": 0.636194686450013, + "learning_rate": 3.392825493688894e-06, + "loss": 0.9546, + "step": 48560 + }, + { + "epoch": 0.7326012851066397, + "grad_norm": 0.6832168877945413, + "learning_rate": 3.389233368146033e-06, + "loss": 0.9589, + "step": 48570 + }, + { + "epoch": 0.7327521192192826, + "grad_norm": 0.7057549232217641, + "learning_rate": 3.3856427571426054e-06, + "loss": 0.9651, + "step": 48580 + }, + { + "epoch": 0.7329029533319256, + "grad_norm": 0.7274982306938597, + "learning_rate": 3.3820536615012187e-06, + "loss": 0.9585, + "step": 48590 + }, + { + "epoch": 0.7330537874445685, + "grad_norm": 0.6905698507691416, + "learning_rate": 3.3784660820441515e-06, + "loss": 0.9472, + "step": 48600 + }, + { + "epoch": 0.7332046215572113, + "grad_norm": 0.6810296113284262, + "learning_rate": 3.3748800195933183e-06, + "loss": 0.9526, + "step": 48610 + }, + { + "epoch": 0.7333554556698543, + "grad_norm": 0.6923266083522853, + "learning_rate": 3.371295474970293e-06, + "loss": 0.9493, + "step": 48620 + }, + { + "epoch": 0.7335062897824972, + "grad_norm": 0.7081104582791201, + "learning_rate": 3.3677124489963054e-06, + "loss": 0.9622, + "step": 48630 + }, + { + "epoch": 0.7336571238951402, + "grad_norm": 0.7003477381067906, + "learning_rate": 3.364130942492232e-06, + "loss": 0.9569, + "step": 48640 + }, + { + "epoch": 0.7338079580077831, + "grad_norm": 0.6297387241612874, + "learning_rate": 3.360550956278599e-06, + "loss": 0.9464, + "step": 48650 + }, + { + "epoch": 0.7339587921204259, + "grad_norm": 0.6799606236944886, + "learning_rate": 3.3569724911755942e-06, + "loss": 0.923, + "step": 48660 + }, + { + "epoch": 0.7341096262330689, + "grad_norm": 0.6918216034651646, + "learning_rate": 3.3533955480030454e-06, + "loss": 0.9695, + "step": 48670 + }, + { + "epoch": 0.7342604603457118, + "grad_norm": 0.7188458005693118, + "learning_rate": 3.349820127580444e-06, + "loss": 0.9523, + "step": 48680 + }, + { + "epoch": 0.7344112944583547, + "grad_norm": 0.6846283923304505, + "learning_rate": 3.3462462307269216e-06, + "loss": 0.9569, + "step": 48690 + }, + { + "epoch": 0.7345621285709976, + "grad_norm": 0.6946656243261684, + "learning_rate": 3.342673858261262e-06, + "loss": 0.9547, + "step": 48700 + }, + { + "epoch": 0.7347129626836405, + "grad_norm": 0.6793755840269571, + "learning_rate": 3.3391030110019083e-06, + "loss": 0.9369, + "step": 48710 + }, + { + "epoch": 0.7348637967962834, + "grad_norm": 0.7515926370570382, + "learning_rate": 3.3355336897669432e-06, + "loss": 0.962, + "step": 48720 + }, + { + "epoch": 0.7350146309089264, + "grad_norm": 0.6503092271815465, + "learning_rate": 3.3319658953741074e-06, + "loss": 0.9379, + "step": 48730 + }, + { + "epoch": 0.7351654650215693, + "grad_norm": 0.6718042846279744, + "learning_rate": 3.3283996286407937e-06, + "loss": 0.9499, + "step": 48740 + }, + { + "epoch": 0.7353162991342121, + "grad_norm": 0.6366059317412271, + "learning_rate": 3.3248348903840367e-06, + "loss": 0.9379, + "step": 48750 + }, + { + "epoch": 0.7354671332468551, + "grad_norm": 0.684312704553655, + "learning_rate": 3.3212716814205226e-06, + "loss": 0.951, + "step": 48760 + }, + { + "epoch": 0.735617967359498, + "grad_norm": 0.6817997876702219, + "learning_rate": 3.3177100025665944e-06, + "loss": 0.9434, + "step": 48770 + }, + { + "epoch": 0.735768801472141, + "grad_norm": 0.6808071814088082, + "learning_rate": 3.3141498546382376e-06, + "loss": 0.9572, + "step": 48780 + }, + { + "epoch": 0.7359196355847839, + "grad_norm": 0.6980894037797378, + "learning_rate": 3.310591238451085e-06, + "loss": 0.9574, + "step": 48790 + }, + { + "epoch": 0.7360704696974267, + "grad_norm": 0.6576876483953572, + "learning_rate": 3.30703415482043e-06, + "loss": 0.9591, + "step": 48800 + }, + { + "epoch": 0.7362213038100697, + "grad_norm": 0.6760332344362207, + "learning_rate": 3.3034786045612e-06, + "loss": 0.9427, + "step": 48810 + }, + { + "epoch": 0.7363721379227126, + "grad_norm": 0.7269937386269855, + "learning_rate": 3.299924588487984e-06, + "loss": 0.9535, + "step": 48820 + }, + { + "epoch": 0.7365229720353555, + "grad_norm": 0.6470148429691329, + "learning_rate": 3.2963721074150136e-06, + "loss": 0.9496, + "step": 48830 + }, + { + "epoch": 0.7366738061479984, + "grad_norm": 0.6518220982731986, + "learning_rate": 3.2928211621561623e-06, + "loss": 0.9751, + "step": 48840 + }, + { + "epoch": 0.7368246402606413, + "grad_norm": 0.6581480632169208, + "learning_rate": 3.2892717535249672e-06, + "loss": 0.9493, + "step": 48850 + }, + { + "epoch": 0.7369754743732843, + "grad_norm": 0.6814076617430598, + "learning_rate": 3.2857238823346015e-06, + "loss": 0.9591, + "step": 48860 + }, + { + "epoch": 0.7371263084859272, + "grad_norm": 0.650527701340711, + "learning_rate": 3.282177549397886e-06, + "loss": 0.9527, + "step": 48870 + }, + { + "epoch": 0.7372771425985701, + "grad_norm": 0.6618353565830998, + "learning_rate": 3.2786327555272947e-06, + "loss": 0.9531, + "step": 48880 + }, + { + "epoch": 0.737427976711213, + "grad_norm": 0.676623157875576, + "learning_rate": 3.2750895015349506e-06, + "loss": 0.9678, + "step": 48890 + }, + { + "epoch": 0.7375788108238559, + "grad_norm": 0.6799743748777216, + "learning_rate": 3.271547788232613e-06, + "loss": 0.9608, + "step": 48900 + }, + { + "epoch": 0.7377296449364988, + "grad_norm": 0.6570325028926893, + "learning_rate": 3.268007616431703e-06, + "loss": 0.9751, + "step": 48910 + }, + { + "epoch": 0.7378804790491418, + "grad_norm": 0.6920185252747016, + "learning_rate": 3.2644689869432777e-06, + "loss": 0.9493, + "step": 48920 + }, + { + "epoch": 0.7380313131617847, + "grad_norm": 0.6941041961901341, + "learning_rate": 3.2609319005780382e-06, + "loss": 0.9487, + "step": 48930 + }, + { + "epoch": 0.7381821472744275, + "grad_norm": 0.6623763483115399, + "learning_rate": 3.2573963581463474e-06, + "loss": 0.9405, + "step": 48940 + }, + { + "epoch": 0.7383329813870705, + "grad_norm": 0.6426503191385126, + "learning_rate": 3.2538623604581967e-06, + "loss": 0.953, + "step": 48950 + }, + { + "epoch": 0.7384838154997134, + "grad_norm": 0.6589254523717816, + "learning_rate": 3.2503299083232376e-06, + "loss": 0.9453, + "step": 48960 + }, + { + "epoch": 0.7386346496123564, + "grad_norm": 0.6943136142418689, + "learning_rate": 3.2467990025507603e-06, + "loss": 0.9555, + "step": 48970 + }, + { + "epoch": 0.7387854837249992, + "grad_norm": 0.6724802927726654, + "learning_rate": 3.243269643949698e-06, + "loss": 0.9367, + "step": 48980 + }, + { + "epoch": 0.7389363178376421, + "grad_norm": 0.6893556414355514, + "learning_rate": 3.23974183332864e-06, + "loss": 0.9295, + "step": 48990 + }, + { + "epoch": 0.7390871519502851, + "grad_norm": 0.7250138946862996, + "learning_rate": 3.2362155714958123e-06, + "loss": 0.9587, + "step": 49000 + }, + { + "epoch": 0.739237986062928, + "grad_norm": 0.6578381297858147, + "learning_rate": 3.232690859259083e-06, + "loss": 0.9429, + "step": 49010 + }, + { + "epoch": 0.739388820175571, + "grad_norm": 0.7272005825139507, + "learning_rate": 3.22916769742598e-06, + "loss": 0.9613, + "step": 49020 + }, + { + "epoch": 0.7395396542882138, + "grad_norm": 0.7095325512376434, + "learning_rate": 3.22564608680366e-06, + "loss": 0.9659, + "step": 49030 + }, + { + "epoch": 0.7396904884008567, + "grad_norm": 0.6553369476457473, + "learning_rate": 3.22212602819893e-06, + "loss": 0.9597, + "step": 49040 + }, + { + "epoch": 0.7398413225134997, + "grad_norm": 0.6475145811476151, + "learning_rate": 3.2186075224182432e-06, + "loss": 0.9433, + "step": 49050 + }, + { + "epoch": 0.7399921566261426, + "grad_norm": 0.6422653890695608, + "learning_rate": 3.2150905702677016e-06, + "loss": 0.9548, + "step": 49060 + }, + { + "epoch": 0.7401429907387855, + "grad_norm": 0.7143043386980256, + "learning_rate": 3.2115751725530384e-06, + "loss": 0.9634, + "step": 49070 + }, + { + "epoch": 0.7402938248514284, + "grad_norm": 0.6597726204127182, + "learning_rate": 3.208061330079644e-06, + "loss": 0.9674, + "step": 49080 + }, + { + "epoch": 0.7404446589640713, + "grad_norm": 0.647544377974944, + "learning_rate": 3.2045490436525407e-06, + "loss": 0.9656, + "step": 49090 + }, + { + "epoch": 0.7405954930767142, + "grad_norm": 0.6756738202904594, + "learning_rate": 3.2010383140764055e-06, + "loss": 0.9362, + "step": 49100 + }, + { + "epoch": 0.7407463271893572, + "grad_norm": 0.7504677182153664, + "learning_rate": 3.197529142155552e-06, + "loss": 0.955, + "step": 49110 + }, + { + "epoch": 0.740897161302, + "grad_norm": 0.6538694073499207, + "learning_rate": 3.1940215286939334e-06, + "loss": 0.9651, + "step": 49120 + }, + { + "epoch": 0.741047995414643, + "grad_norm": 0.6624180655311687, + "learning_rate": 3.1905154744951583e-06, + "loss": 0.9653, + "step": 49130 + }, + { + "epoch": 0.7411988295272859, + "grad_norm": 0.6906122968660331, + "learning_rate": 3.187010980362467e-06, + "loss": 0.948, + "step": 49140 + }, + { + "epoch": 0.7413496636399288, + "grad_norm": 0.6498235931285608, + "learning_rate": 3.183508047098742e-06, + "loss": 0.9651, + "step": 49150 + }, + { + "epoch": 0.7415004977525718, + "grad_norm": 0.7473675012763751, + "learning_rate": 3.1800066755065196e-06, + "loss": 0.9518, + "step": 49160 + }, + { + "epoch": 0.7416513318652146, + "grad_norm": 0.7175724219430406, + "learning_rate": 3.176506866387966e-06, + "loss": 0.9605, + "step": 49170 + }, + { + "epoch": 0.7418021659778575, + "grad_norm": 0.6711311598213318, + "learning_rate": 3.173008620544892e-06, + "loss": 0.9765, + "step": 49180 + }, + { + "epoch": 0.7419530000905005, + "grad_norm": 0.6635478009295179, + "learning_rate": 3.169511938778759e-06, + "loss": 0.9351, + "step": 49190 + }, + { + "epoch": 0.7421038342031434, + "grad_norm": 0.6757351817915115, + "learning_rate": 3.1660168218906575e-06, + "loss": 0.9607, + "step": 49200 + }, + { + "epoch": 0.7422546683157863, + "grad_norm": 0.6744921163293357, + "learning_rate": 3.1625232706813314e-06, + "loss": 0.9566, + "step": 49210 + }, + { + "epoch": 0.7424055024284292, + "grad_norm": 0.6781303861328806, + "learning_rate": 3.159031285951153e-06, + "loss": 0.963, + "step": 49220 + }, + { + "epoch": 0.7425563365410721, + "grad_norm": 0.7000794237518131, + "learning_rate": 3.1555408685001477e-06, + "loss": 0.9563, + "step": 49230 + }, + { + "epoch": 0.742707170653715, + "grad_norm": 0.6969197229630805, + "learning_rate": 3.152052019127978e-06, + "loss": 0.9535, + "step": 49240 + }, + { + "epoch": 0.742858004766358, + "grad_norm": 0.6812012240332004, + "learning_rate": 3.1485647386339445e-06, + "loss": 0.9657, + "step": 49250 + }, + { + "epoch": 0.7430088388790008, + "grad_norm": 0.6588815238843346, + "learning_rate": 3.145079027816986e-06, + "loss": 0.9587, + "step": 49260 + }, + { + "epoch": 0.7431596729916438, + "grad_norm": 0.6587484621202538, + "learning_rate": 3.141594887475693e-06, + "loss": 0.9626, + "step": 49270 + }, + { + "epoch": 0.7433105071042867, + "grad_norm": 0.6707019831227758, + "learning_rate": 3.138112318408284e-06, + "loss": 0.9654, + "step": 49280 + }, + { + "epoch": 0.7434613412169296, + "grad_norm": 0.7004917744829773, + "learning_rate": 3.13463132141262e-06, + "loss": 0.9744, + "step": 49290 + }, + { + "epoch": 0.7436121753295726, + "grad_norm": 0.6770432770633643, + "learning_rate": 3.13115189728621e-06, + "loss": 0.9442, + "step": 49300 + }, + { + "epoch": 0.7437630094422154, + "grad_norm": 0.6959793647208663, + "learning_rate": 3.1276740468261945e-06, + "loss": 0.9666, + "step": 49310 + }, + { + "epoch": 0.7439138435548583, + "grad_norm": 0.7315203053853487, + "learning_rate": 3.1241977708293514e-06, + "loss": 0.9741, + "step": 49320 + }, + { + "epoch": 0.7440646776675013, + "grad_norm": 0.6695292978813447, + "learning_rate": 3.12072307009211e-06, + "loss": 0.9738, + "step": 49330 + }, + { + "epoch": 0.7442155117801442, + "grad_norm": 0.6629649965622257, + "learning_rate": 3.1172499454105245e-06, + "loss": 0.9626, + "step": 49340 + }, + { + "epoch": 0.7443663458927872, + "grad_norm": 0.7006713195031807, + "learning_rate": 3.1137783975802993e-06, + "loss": 0.9652, + "step": 49350 + }, + { + "epoch": 0.74451718000543, + "grad_norm": 0.6679319500700931, + "learning_rate": 3.11030842739677e-06, + "loss": 0.9733, + "step": 49360 + }, + { + "epoch": 0.7446680141180729, + "grad_norm": 0.6694803068523899, + "learning_rate": 3.106840035654911e-06, + "loss": 0.9579, + "step": 49370 + }, + { + "epoch": 0.7448188482307159, + "grad_norm": 0.7174413136844882, + "learning_rate": 3.103373223149342e-06, + "loss": 0.9415, + "step": 49380 + }, + { + "epoch": 0.7449696823433588, + "grad_norm": 0.6827789182289059, + "learning_rate": 3.0999079906743167e-06, + "loss": 0.9714, + "step": 49390 + }, + { + "epoch": 0.7451205164560016, + "grad_norm": 0.7206388939613737, + "learning_rate": 3.096444339023722e-06, + "loss": 0.9557, + "step": 49400 + }, + { + "epoch": 0.7452713505686446, + "grad_norm": 0.6950685939181, + "learning_rate": 3.0929822689910937e-06, + "loss": 0.9571, + "step": 49410 + }, + { + "epoch": 0.7454221846812875, + "grad_norm": 0.741125293495853, + "learning_rate": 3.0895217813695943e-06, + "loss": 0.9685, + "step": 49420 + }, + { + "epoch": 0.7455730187939305, + "grad_norm": 0.6878143286344042, + "learning_rate": 3.086062876952026e-06, + "loss": 0.9818, + "step": 49430 + }, + { + "epoch": 0.7457238529065734, + "grad_norm": 0.6467557167627954, + "learning_rate": 3.082605556530838e-06, + "loss": 0.9809, + "step": 49440 + }, + { + "epoch": 0.7458746870192162, + "grad_norm": 0.6471526575777377, + "learning_rate": 3.079149820898103e-06, + "loss": 0.9437, + "step": 49450 + }, + { + "epoch": 0.7460255211318592, + "grad_norm": 0.7250500524813266, + "learning_rate": 3.0756956708455356e-06, + "loss": 0.9482, + "step": 49460 + }, + { + "epoch": 0.7461763552445021, + "grad_norm": 0.7080143690870961, + "learning_rate": 3.0722431071644944e-06, + "loss": 0.9397, + "step": 49470 + }, + { + "epoch": 0.746327189357145, + "grad_norm": 0.6908695976237413, + "learning_rate": 3.068792130645961e-06, + "loss": 0.9699, + "step": 49480 + }, + { + "epoch": 0.746478023469788, + "grad_norm": 0.6959148813682914, + "learning_rate": 3.0653427420805693e-06, + "loss": 0.9464, + "step": 49490 + }, + { + "epoch": 0.7466288575824308, + "grad_norm": 0.7800262323553042, + "learning_rate": 3.0618949422585762e-06, + "loss": 0.9581, + "step": 49500 + }, + { + "epoch": 0.7467796916950737, + "grad_norm": 0.6574472219982915, + "learning_rate": 3.0584487319698765e-06, + "loss": 0.9476, + "step": 49510 + }, + { + "epoch": 0.7469305258077167, + "grad_norm": 0.6551965103923679, + "learning_rate": 3.0550041120040086e-06, + "loss": 0.9555, + "step": 49520 + }, + { + "epoch": 0.7470813599203596, + "grad_norm": 0.6414981183133306, + "learning_rate": 3.051561083150141e-06, + "loss": 0.9645, + "step": 49530 + }, + { + "epoch": 0.7472321940330025, + "grad_norm": 0.6724742408772101, + "learning_rate": 3.0481196461970732e-06, + "loss": 0.9427, + "step": 49540 + }, + { + "epoch": 0.7473830281456454, + "grad_norm": 0.6650176595039514, + "learning_rate": 3.044679801933248e-06, + "loss": 0.9472, + "step": 49550 + }, + { + "epoch": 0.7475338622582883, + "grad_norm": 0.681347453575809, + "learning_rate": 3.0412415511467453e-06, + "loss": 0.9444, + "step": 49560 + }, + { + "epoch": 0.7476846963709313, + "grad_norm": 0.6578298895700636, + "learning_rate": 3.037804894625268e-06, + "loss": 0.9441, + "step": 49570 + }, + { + "epoch": 0.7478355304835742, + "grad_norm": 0.6957840468905188, + "learning_rate": 3.0343698331561665e-06, + "loss": 0.9304, + "step": 49580 + }, + { + "epoch": 0.747986364596217, + "grad_norm": 0.7175973334430433, + "learning_rate": 3.030936367526417e-06, + "loss": 0.9641, + "step": 49590 + }, + { + "epoch": 0.74813719870886, + "grad_norm": 0.731820712895854, + "learning_rate": 3.0275044985226297e-06, + "loss": 0.9452, + "step": 49600 + }, + { + "epoch": 0.7482880328215029, + "grad_norm": 0.6325289696663545, + "learning_rate": 3.0240742269310606e-06, + "loss": 1.0056, + "step": 49610 + }, + { + "epoch": 0.7484388669341459, + "grad_norm": 0.7146699384720396, + "learning_rate": 3.020645553537582e-06, + "loss": 0.9544, + "step": 49620 + }, + { + "epoch": 0.7485897010467888, + "grad_norm": 0.6720525055114916, + "learning_rate": 3.0172184791277193e-06, + "loss": 0.9522, + "step": 49630 + }, + { + "epoch": 0.7487405351594316, + "grad_norm": 0.663684801946581, + "learning_rate": 3.013793004486617e-06, + "loss": 0.9381, + "step": 49640 + }, + { + "epoch": 0.7488913692720746, + "grad_norm": 0.6581042966587608, + "learning_rate": 3.0103691303990546e-06, + "loss": 0.958, + "step": 49650 + }, + { + "epoch": 0.7490422033847175, + "grad_norm": 0.6690474370783325, + "learning_rate": 3.0069468576494553e-06, + "loss": 0.9688, + "step": 49660 + }, + { + "epoch": 0.7491930374973604, + "grad_norm": 0.6571167513532302, + "learning_rate": 3.0035261870218644e-06, + "loss": 0.9623, + "step": 49670 + }, + { + "epoch": 0.7493438716100033, + "grad_norm": 0.6778116213718027, + "learning_rate": 3.000107119299962e-06, + "loss": 0.9688, + "step": 49680 + }, + { + "epoch": 0.7494947057226462, + "grad_norm": 0.6741872479468691, + "learning_rate": 2.996689655267069e-06, + "loss": 0.9498, + "step": 49690 + }, + { + "epoch": 0.7496455398352891, + "grad_norm": 0.7041221732578754, + "learning_rate": 2.9932737957061266e-06, + "loss": 0.9582, + "step": 49700 + }, + { + "epoch": 0.7497963739479321, + "grad_norm": 0.6549406412075783, + "learning_rate": 2.9898595413997212e-06, + "loss": 0.9413, + "step": 49710 + }, + { + "epoch": 0.749947208060575, + "grad_norm": 0.6803020482284784, + "learning_rate": 2.986446893130058e-06, + "loss": 0.9656, + "step": 49720 + }, + { + "epoch": 0.7500980421732178, + "grad_norm": 0.6918708178225083, + "learning_rate": 2.9830358516789903e-06, + "loss": 0.9472, + "step": 49730 + }, + { + "epoch": 0.7502488762858608, + "grad_norm": 0.6510299238374918, + "learning_rate": 2.9796264178279855e-06, + "loss": 0.9595, + "step": 49740 + }, + { + "epoch": 0.7503997103985037, + "grad_norm": 0.6709167461249532, + "learning_rate": 2.976218592358158e-06, + "loss": 0.9681, + "step": 49750 + }, + { + "epoch": 0.7505505445111467, + "grad_norm": 0.7176397930854005, + "learning_rate": 2.9728123760502415e-06, + "loss": 0.9678, + "step": 49760 + }, + { + "epoch": 0.7507013786237896, + "grad_norm": 0.7418758948373395, + "learning_rate": 2.9694077696846123e-06, + "loss": 0.983, + "step": 49770 + }, + { + "epoch": 0.7508522127364324, + "grad_norm": 0.679900342045222, + "learning_rate": 2.9660047740412702e-06, + "loss": 0.9425, + "step": 49780 + }, + { + "epoch": 0.7510030468490754, + "grad_norm": 0.7461066394521345, + "learning_rate": 2.962603389899845e-06, + "loss": 0.9682, + "step": 49790 + }, + { + "epoch": 0.7511538809617183, + "grad_norm": 0.7360620291586555, + "learning_rate": 2.9592036180396057e-06, + "loss": 0.9632, + "step": 49800 + }, + { + "epoch": 0.7513047150743613, + "grad_norm": 0.6618565119464731, + "learning_rate": 2.955805459239445e-06, + "loss": 0.9525, + "step": 49810 + }, + { + "epoch": 0.7514555491870041, + "grad_norm": 0.6643365064497458, + "learning_rate": 2.952408914277882e-06, + "loss": 0.9702, + "step": 49820 + }, + { + "epoch": 0.751606383299647, + "grad_norm": 0.7235355004965733, + "learning_rate": 2.94901398393308e-06, + "loss": 0.9674, + "step": 49830 + }, + { + "epoch": 0.75175721741229, + "grad_norm": 0.748496435485276, + "learning_rate": 2.94562066898282e-06, + "loss": 0.9636, + "step": 49840 + }, + { + "epoch": 0.7519080515249329, + "grad_norm": 0.7113485828179061, + "learning_rate": 2.9422289702045146e-06, + "loss": 0.9722, + "step": 49850 + }, + { + "epoch": 0.7520588856375758, + "grad_norm": 0.6590743151906413, + "learning_rate": 2.938838888375215e-06, + "loss": 0.9523, + "step": 49860 + }, + { + "epoch": 0.7522097197502187, + "grad_norm": 0.644551783769592, + "learning_rate": 2.9354504242715887e-06, + "loss": 0.9594, + "step": 49870 + }, + { + "epoch": 0.7523605538628616, + "grad_norm": 0.6711211814500319, + "learning_rate": 2.932063578669946e-06, + "loss": 0.9611, + "step": 49880 + }, + { + "epoch": 0.7525113879755045, + "grad_norm": 0.7025154056253271, + "learning_rate": 2.9286783523462127e-06, + "loss": 0.9658, + "step": 49890 + }, + { + "epoch": 0.7526622220881475, + "grad_norm": 0.6773477981568181, + "learning_rate": 2.925294746075954e-06, + "loss": 0.9743, + "step": 49900 + }, + { + "epoch": 0.7528130562007904, + "grad_norm": 0.6888167081400327, + "learning_rate": 2.9219127606343646e-06, + "loss": 0.9603, + "step": 49910 + }, + { + "epoch": 0.7529638903134332, + "grad_norm": 0.6669207839123339, + "learning_rate": 2.918532396796261e-06, + "loss": 0.948, + "step": 49920 + }, + { + "epoch": 0.7531147244260762, + "grad_norm": 0.6580009833397301, + "learning_rate": 2.915153655336086e-06, + "loss": 0.9416, + "step": 49930 + }, + { + "epoch": 0.7532655585387191, + "grad_norm": 0.6504395119095195, + "learning_rate": 2.911776537027925e-06, + "loss": 0.957, + "step": 49940 + }, + { + "epoch": 0.7534163926513621, + "grad_norm": 0.6680372511029071, + "learning_rate": 2.9084010426454757e-06, + "loss": 0.9643, + "step": 49950 + }, + { + "epoch": 0.7535672267640049, + "grad_norm": 0.6662592060594518, + "learning_rate": 2.9050271729620706e-06, + "loss": 0.9664, + "step": 49960 + }, + { + "epoch": 0.7537180608766478, + "grad_norm": 0.7143156009482812, + "learning_rate": 2.9016549287506746e-06, + "loss": 0.9551, + "step": 49970 + }, + { + "epoch": 0.7538688949892908, + "grad_norm": 0.6803852072871021, + "learning_rate": 2.898284310783871e-06, + "loss": 0.9618, + "step": 49980 + }, + { + "epoch": 0.7540197291019337, + "grad_norm": 0.6290397566462973, + "learning_rate": 2.894915319833874e-06, + "loss": 0.9509, + "step": 49990 + }, + { + "epoch": 0.7541705632145767, + "grad_norm": 0.6452565658058835, + "learning_rate": 2.891547956672531e-06, + "loss": 0.9555, + "step": 50000 + }, + { + "epoch": 0.7543213973272195, + "grad_norm": 0.6885010053062123, + "learning_rate": 2.8881822220713086e-06, + "loss": 0.9688, + "step": 50010 + }, + { + "epoch": 0.7544722314398624, + "grad_norm": 0.655978145826037, + "learning_rate": 2.8848181168013e-06, + "loss": 0.9498, + "step": 50020 + }, + { + "epoch": 0.7546230655525054, + "grad_norm": 0.7150460541591175, + "learning_rate": 2.881455641633234e-06, + "loss": 0.9542, + "step": 50030 + }, + { + "epoch": 0.7547738996651483, + "grad_norm": 0.6328637390116363, + "learning_rate": 2.878094797337455e-06, + "loss": 0.9562, + "step": 50040 + }, + { + "epoch": 0.7549247337777912, + "grad_norm": 0.6640086679422483, + "learning_rate": 2.8747355846839407e-06, + "loss": 0.9585, + "step": 50050 + }, + { + "epoch": 0.7550755678904341, + "grad_norm": 0.6452334474228308, + "learning_rate": 2.871378004442297e-06, + "loss": 0.9456, + "step": 50060 + }, + { + "epoch": 0.755226402003077, + "grad_norm": 0.698365763226649, + "learning_rate": 2.8680220573817465e-06, + "loss": 0.9493, + "step": 50070 + }, + { + "epoch": 0.7553772361157199, + "grad_norm": 0.7791072402966552, + "learning_rate": 2.864667744271148e-06, + "loss": 0.9569, + "step": 50080 + }, + { + "epoch": 0.7555280702283629, + "grad_norm": 0.7300864636561449, + "learning_rate": 2.861315065878978e-06, + "loss": 0.9631, + "step": 50090 + }, + { + "epoch": 0.7556789043410057, + "grad_norm": 0.7688947627681881, + "learning_rate": 2.857964022973341e-06, + "loss": 0.9632, + "step": 50100 + }, + { + "epoch": 0.7558297384536486, + "grad_norm": 0.6441513236305126, + "learning_rate": 2.85461461632197e-06, + "loss": 0.9526, + "step": 50110 + }, + { + "epoch": 0.7559805725662916, + "grad_norm": 0.6841621091819293, + "learning_rate": 2.85126684669222e-06, + "loss": 0.9587, + "step": 50120 + }, + { + "epoch": 0.7561314066789345, + "grad_norm": 0.6542339941621191, + "learning_rate": 2.847920714851068e-06, + "loss": 0.9332, + "step": 50130 + }, + { + "epoch": 0.7562822407915775, + "grad_norm": 0.7550095396196196, + "learning_rate": 2.844576221565125e-06, + "loss": 0.924, + "step": 50140 + }, + { + "epoch": 0.7564330749042203, + "grad_norm": 0.7049902261827954, + "learning_rate": 2.841233367600614e-06, + "loss": 0.97, + "step": 50150 + }, + { + "epoch": 0.7565839090168632, + "grad_norm": 0.6513206181253206, + "learning_rate": 2.8378921537233972e-06, + "loss": 0.9424, + "step": 50160 + }, + { + "epoch": 0.7567347431295062, + "grad_norm": 0.636673822065561, + "learning_rate": 2.8345525806989494e-06, + "loss": 0.9603, + "step": 50170 + }, + { + "epoch": 0.7568855772421491, + "grad_norm": 0.7116746591020094, + "learning_rate": 2.8312146492923697e-06, + "loss": 0.9503, + "step": 50180 + }, + { + "epoch": 0.757036411354792, + "grad_norm": 0.7503592112578326, + "learning_rate": 2.82787836026839e-06, + "loss": 0.952, + "step": 50190 + }, + { + "epoch": 0.7571872454674349, + "grad_norm": 0.6945232362654299, + "learning_rate": 2.824543714391357e-06, + "loss": 0.9677, + "step": 50200 + }, + { + "epoch": 0.7573380795800778, + "grad_norm": 0.697435150001337, + "learning_rate": 2.8212107124252487e-06, + "loss": 0.9461, + "step": 50210 + }, + { + "epoch": 0.7574889136927208, + "grad_norm": 0.6639256310286817, + "learning_rate": 2.8178793551336558e-06, + "loss": 0.9647, + "step": 50220 + }, + { + "epoch": 0.7576397478053637, + "grad_norm": 0.7320529885738508, + "learning_rate": 2.814549643279807e-06, + "loss": 0.9653, + "step": 50230 + }, + { + "epoch": 0.7577905819180065, + "grad_norm": 0.8319898808936791, + "learning_rate": 2.8112215776265372e-06, + "loss": 0.9521, + "step": 50240 + }, + { + "epoch": 0.7579414160306495, + "grad_norm": 0.7129400177009095, + "learning_rate": 2.8078951589363202e-06, + "loss": 0.9542, + "step": 50250 + }, + { + "epoch": 0.7580922501432924, + "grad_norm": 0.6571128000610802, + "learning_rate": 2.8045703879712415e-06, + "loss": 0.937, + "step": 50260 + }, + { + "epoch": 0.7582430842559353, + "grad_norm": 0.707178072807923, + "learning_rate": 2.8012472654930102e-06, + "loss": 0.9296, + "step": 50270 + }, + { + "epoch": 0.7583939183685783, + "grad_norm": 0.6765469460959981, + "learning_rate": 2.7979257922629655e-06, + "loss": 0.9724, + "step": 50280 + }, + { + "epoch": 0.7585447524812211, + "grad_norm": 0.6182274527842677, + "learning_rate": 2.7946059690420567e-06, + "loss": 0.9543, + "step": 50290 + }, + { + "epoch": 0.758695586593864, + "grad_norm": 0.6417944602652365, + "learning_rate": 2.7912877965908682e-06, + "loss": 0.9548, + "step": 50300 + }, + { + "epoch": 0.758846420706507, + "grad_norm": 0.7118796159805829, + "learning_rate": 2.7879712756695977e-06, + "loss": 0.9502, + "step": 50310 + }, + { + "epoch": 0.7589972548191499, + "grad_norm": 0.6690825655248122, + "learning_rate": 2.784656407038062e-06, + "loss": 0.9713, + "step": 50320 + }, + { + "epoch": 0.7591480889317929, + "grad_norm": 0.6216934733216747, + "learning_rate": 2.781343191455711e-06, + "loss": 0.9368, + "step": 50330 + }, + { + "epoch": 0.7592989230444357, + "grad_norm": 0.6600282255972487, + "learning_rate": 2.7780316296816055e-06, + "loss": 0.9494, + "step": 50340 + }, + { + "epoch": 0.7594497571570786, + "grad_norm": 0.6773033058495916, + "learning_rate": 2.7747217224744283e-06, + "loss": 0.9553, + "step": 50350 + }, + { + "epoch": 0.7596005912697216, + "grad_norm": 0.6475110340913675, + "learning_rate": 2.77141347059249e-06, + "loss": 0.9637, + "step": 50360 + }, + { + "epoch": 0.7597514253823645, + "grad_norm": 0.636134345227625, + "learning_rate": 2.7681068747937135e-06, + "loss": 0.957, + "step": 50370 + }, + { + "epoch": 0.7599022594950073, + "grad_norm": 0.6502101937356548, + "learning_rate": 2.7648019358356502e-06, + "loss": 0.9626, + "step": 50380 + }, + { + "epoch": 0.7600530936076503, + "grad_norm": 0.7429523765501097, + "learning_rate": 2.761498654475464e-06, + "loss": 0.9407, + "step": 50390 + }, + { + "epoch": 0.7602039277202932, + "grad_norm": 0.6879566353447821, + "learning_rate": 2.758197031469948e-06, + "loss": 0.9552, + "step": 50400 + }, + { + "epoch": 0.7603547618329362, + "grad_norm": 0.7287771705146173, + "learning_rate": 2.754897067575504e-06, + "loss": 0.9556, + "step": 50410 + }, + { + "epoch": 0.7605055959455791, + "grad_norm": 0.7289013756808029, + "learning_rate": 2.7515987635481666e-06, + "loss": 0.9479, + "step": 50420 + }, + { + "epoch": 0.7606564300582219, + "grad_norm": 0.6699710706477634, + "learning_rate": 2.7483021201435777e-06, + "loss": 0.9707, + "step": 50430 + }, + { + "epoch": 0.7608072641708649, + "grad_norm": 0.6414798525432615, + "learning_rate": 2.745007138117012e-06, + "loss": 0.9458, + "step": 50440 + }, + { + "epoch": 0.7609580982835078, + "grad_norm": 0.6487160295395703, + "learning_rate": 2.7417138182233506e-06, + "loss": 0.9666, + "step": 50450 + }, + { + "epoch": 0.7611089323961507, + "grad_norm": 0.6769406901113326, + "learning_rate": 2.738422161217098e-06, + "loss": 0.9455, + "step": 50460 + }, + { + "epoch": 0.7612597665087937, + "grad_norm": 0.701996548724651, + "learning_rate": 2.7351321678523847e-06, + "loss": 0.9629, + "step": 50470 + }, + { + "epoch": 0.7614106006214365, + "grad_norm": 0.6541879796213996, + "learning_rate": 2.7318438388829525e-06, + "loss": 0.9702, + "step": 50480 + }, + { + "epoch": 0.7615614347340794, + "grad_norm": 1.0966486858651898, + "learning_rate": 2.7285571750621597e-06, + "loss": 0.9692, + "step": 50490 + }, + { + "epoch": 0.7617122688467224, + "grad_norm": 0.7208058580322921, + "learning_rate": 2.7252721771429946e-06, + "loss": 0.971, + "step": 50500 + }, + { + "epoch": 0.7618631029593653, + "grad_norm": 0.6249716649989774, + "learning_rate": 2.721988845878053e-06, + "loss": 0.9479, + "step": 50510 + }, + { + "epoch": 0.7620139370720082, + "grad_norm": 0.6672946711190693, + "learning_rate": 2.7187071820195486e-06, + "loss": 0.9594, + "step": 50520 + }, + { + "epoch": 0.7621647711846511, + "grad_norm": 0.6895647190458647, + "learning_rate": 2.715427186319324e-06, + "loss": 0.971, + "step": 50530 + }, + { + "epoch": 0.762315605297294, + "grad_norm": 0.6493054350543906, + "learning_rate": 2.7121488595288257e-06, + "loss": 0.952, + "step": 50540 + }, + { + "epoch": 0.762466439409937, + "grad_norm": 0.6527739270548824, + "learning_rate": 2.7088722023991297e-06, + "loss": 0.9446, + "step": 50550 + }, + { + "epoch": 0.7626172735225799, + "grad_norm": 0.7094825839162462, + "learning_rate": 2.7055972156809253e-06, + "loss": 0.9524, + "step": 50560 + }, + { + "epoch": 0.7627681076352227, + "grad_norm": 0.6758099891064352, + "learning_rate": 2.702323900124513e-06, + "loss": 0.9453, + "step": 50570 + }, + { + "epoch": 0.7629189417478657, + "grad_norm": 0.7237029908832255, + "learning_rate": 2.6990522564798214e-06, + "loss": 0.9287, + "step": 50580 + }, + { + "epoch": 0.7630697758605086, + "grad_norm": 0.7029167919878375, + "learning_rate": 2.6957822854963877e-06, + "loss": 0.9881, + "step": 50590 + }, + { + "epoch": 0.7632206099731516, + "grad_norm": 0.6521340500084504, + "learning_rate": 2.6925139879233664e-06, + "loss": 0.9393, + "step": 50600 + }, + { + "epoch": 0.7633714440857945, + "grad_norm": 0.6336477468637722, + "learning_rate": 2.689247364509535e-06, + "loss": 0.9585, + "step": 50610 + }, + { + "epoch": 0.7635222781984373, + "grad_norm": 0.6889290330723653, + "learning_rate": 2.685982416003282e-06, + "loss": 0.9538, + "step": 50620 + }, + { + "epoch": 0.7636731123110803, + "grad_norm": 0.6623849898216786, + "learning_rate": 2.68271914315261e-06, + "loss": 0.9624, + "step": 50630 + }, + { + "epoch": 0.7638239464237232, + "grad_norm": 0.6464545460843597, + "learning_rate": 2.6794575467051463e-06, + "loss": 0.9486, + "step": 50640 + }, + { + "epoch": 0.7639747805363661, + "grad_norm": 0.6473744783911408, + "learning_rate": 2.676197627408127e-06, + "loss": 0.952, + "step": 50650 + }, + { + "epoch": 0.764125614649009, + "grad_norm": 0.6711322545815506, + "learning_rate": 2.672939386008402e-06, + "loss": 0.9516, + "step": 50660 + }, + { + "epoch": 0.7642764487616519, + "grad_norm": 0.6502789061900058, + "learning_rate": 2.6696828232524472e-06, + "loss": 0.9562, + "step": 50670 + }, + { + "epoch": 0.7644272828742948, + "grad_norm": 0.6572072528050433, + "learning_rate": 2.666427939886345e-06, + "loss": 0.9531, + "step": 50680 + }, + { + "epoch": 0.7645781169869378, + "grad_norm": 0.6628908230486799, + "learning_rate": 2.6631747366557903e-06, + "loss": 0.9485, + "step": 50690 + }, + { + "epoch": 0.7647289510995807, + "grad_norm": 0.6592350170910112, + "learning_rate": 2.659923214306107e-06, + "loss": 0.9538, + "step": 50700 + }, + { + "epoch": 0.7648797852122236, + "grad_norm": 0.6763335074894217, + "learning_rate": 2.656673373582218e-06, + "loss": 0.9367, + "step": 50710 + }, + { + "epoch": 0.7650306193248665, + "grad_norm": 0.691233585220721, + "learning_rate": 2.653425215228671e-06, + "loss": 0.9548, + "step": 50720 + }, + { + "epoch": 0.7651814534375094, + "grad_norm": 0.6605447423952137, + "learning_rate": 2.6501787399896272e-06, + "loss": 0.9465, + "step": 50730 + }, + { + "epoch": 0.7653322875501524, + "grad_norm": 0.6893429373841709, + "learning_rate": 2.6469339486088574e-06, + "loss": 0.96, + "step": 50740 + }, + { + "epoch": 0.7654831216627953, + "grad_norm": 0.6740041142715971, + "learning_rate": 2.6436908418297525e-06, + "loss": 0.9753, + "step": 50750 + }, + { + "epoch": 0.7656339557754381, + "grad_norm": 0.6585884818677841, + "learning_rate": 2.6404494203953135e-06, + "loss": 0.9376, + "step": 50760 + }, + { + "epoch": 0.7657847898880811, + "grad_norm": 0.6356516046186972, + "learning_rate": 2.637209685048152e-06, + "loss": 0.9483, + "step": 50770 + }, + { + "epoch": 0.765935624000724, + "grad_norm": 0.6899405818398087, + "learning_rate": 2.6339716365305045e-06, + "loss": 0.9756, + "step": 50780 + }, + { + "epoch": 0.766086458113367, + "grad_norm": 0.6901436422911067, + "learning_rate": 2.6307352755842107e-06, + "loss": 0.9481, + "step": 50790 + }, + { + "epoch": 0.7662372922260098, + "grad_norm": 0.6669810868385853, + "learning_rate": 2.6275006029507256e-06, + "loss": 0.9419, + "step": 50800 + }, + { + "epoch": 0.7663881263386527, + "grad_norm": 0.634257360657343, + "learning_rate": 2.6242676193711224e-06, + "loss": 0.9483, + "step": 50810 + }, + { + "epoch": 0.7665389604512957, + "grad_norm": 0.6579783273777113, + "learning_rate": 2.6210363255860807e-06, + "loss": 0.9496, + "step": 50820 + }, + { + "epoch": 0.7666897945639386, + "grad_norm": 0.6980883051532025, + "learning_rate": 2.6178067223358996e-06, + "loss": 0.9521, + "step": 50830 + }, + { + "epoch": 0.7668406286765815, + "grad_norm": 0.6973398730130976, + "learning_rate": 2.6145788103604853e-06, + "loss": 0.9651, + "step": 50840 + }, + { + "epoch": 0.7669914627892244, + "grad_norm": 0.6607593949687702, + "learning_rate": 2.6113525903993564e-06, + "loss": 0.9527, + "step": 50850 + }, + { + "epoch": 0.7671422969018673, + "grad_norm": 0.6568723226706553, + "learning_rate": 2.6081280631916517e-06, + "loss": 0.9546, + "step": 50860 + }, + { + "epoch": 0.7672931310145102, + "grad_norm": 0.6495480321026159, + "learning_rate": 2.6049052294761113e-06, + "loss": 0.9659, + "step": 50870 + }, + { + "epoch": 0.7674439651271532, + "grad_norm": 0.6475657731415086, + "learning_rate": 2.6016840899910965e-06, + "loss": 0.9398, + "step": 50880 + }, + { + "epoch": 0.7675947992397961, + "grad_norm": 0.6422330798659969, + "learning_rate": 2.5984646454745732e-06, + "loss": 0.9481, + "step": 50890 + }, + { + "epoch": 0.767745633352439, + "grad_norm": 0.7256589830266865, + "learning_rate": 2.5952468966641276e-06, + "loss": 0.9499, + "step": 50900 + }, + { + "epoch": 0.7678964674650819, + "grad_norm": 0.6656493672242738, + "learning_rate": 2.5920308442969443e-06, + "loss": 0.9581, + "step": 50910 + }, + { + "epoch": 0.7680473015777248, + "grad_norm": 0.6476221228211491, + "learning_rate": 2.5888164891098357e-06, + "loss": 0.9298, + "step": 50920 + }, + { + "epoch": 0.7681981356903678, + "grad_norm": 0.6357857920520124, + "learning_rate": 2.585603831839213e-06, + "loss": 0.9415, + "step": 50930 + }, + { + "epoch": 0.7683489698030106, + "grad_norm": 1.8857740179582798, + "learning_rate": 2.582392873221098e-06, + "loss": 0.9534, + "step": 50940 + }, + { + "epoch": 0.7684998039156535, + "grad_norm": 0.6846765668015358, + "learning_rate": 2.579183613991134e-06, + "loss": 0.973, + "step": 50950 + }, + { + "epoch": 0.7686506380282965, + "grad_norm": 0.6811508247769695, + "learning_rate": 2.5759760548845647e-06, + "loss": 0.9428, + "step": 50960 + }, + { + "epoch": 0.7688014721409394, + "grad_norm": 0.646212943333934, + "learning_rate": 2.5727701966362505e-06, + "loss": 0.9387, + "step": 50970 + }, + { + "epoch": 0.7689523062535824, + "grad_norm": 0.669762572407794, + "learning_rate": 2.56956603998066e-06, + "loss": 0.9516, + "step": 50980 + }, + { + "epoch": 0.7691031403662252, + "grad_norm": 0.6768878514982702, + "learning_rate": 2.5663635856518664e-06, + "loss": 0.9795, + "step": 50990 + }, + { + "epoch": 0.7692539744788681, + "grad_norm": 0.6578935133401596, + "learning_rate": 2.5631628343835657e-06, + "loss": 0.9598, + "step": 51000 + }, + { + "epoch": 0.7694048085915111, + "grad_norm": 0.7065462895004935, + "learning_rate": 2.559963786909052e-06, + "loss": 0.9766, + "step": 51010 + }, + { + "epoch": 0.769555642704154, + "grad_norm": 0.6808251319037482, + "learning_rate": 2.5567664439612318e-06, + "loss": 0.9456, + "step": 51020 + }, + { + "epoch": 0.7697064768167969, + "grad_norm": 0.7396880608403634, + "learning_rate": 2.553570806272626e-06, + "loss": 0.9269, + "step": 51030 + }, + { + "epoch": 0.7698573109294398, + "grad_norm": 0.6611710272423822, + "learning_rate": 2.550376874575359e-06, + "loss": 0.9497, + "step": 51040 + }, + { + "epoch": 0.7700081450420827, + "grad_norm": 0.640569241904494, + "learning_rate": 2.5471846496011686e-06, + "loss": 0.9766, + "step": 51050 + }, + { + "epoch": 0.7701589791547256, + "grad_norm": 0.6741263849370126, + "learning_rate": 2.5439941320813965e-06, + "loss": 0.9527, + "step": 51060 + }, + { + "epoch": 0.7703098132673686, + "grad_norm": 0.6846268293254351, + "learning_rate": 2.540805322747002e-06, + "loss": 0.9359, + "step": 51070 + }, + { + "epoch": 0.7704606473800114, + "grad_norm": 0.672996596822265, + "learning_rate": 2.5376182223285415e-06, + "loss": 0.9355, + "step": 51080 + }, + { + "epoch": 0.7706114814926543, + "grad_norm": 0.6921754491644024, + "learning_rate": 2.5344328315561915e-06, + "loss": 0.9575, + "step": 51090 + }, + { + "epoch": 0.7707623156052973, + "grad_norm": 0.646769042293536, + "learning_rate": 2.5312491511597247e-06, + "loss": 0.9519, + "step": 51100 + }, + { + "epoch": 0.7709131497179402, + "grad_norm": 0.7202602534357033, + "learning_rate": 2.5280671818685364e-06, + "loss": 0.9777, + "step": 51110 + }, + { + "epoch": 0.7710639838305832, + "grad_norm": 0.7678548454313479, + "learning_rate": 2.524886924411616e-06, + "loss": 0.9489, + "step": 51120 + }, + { + "epoch": 0.771214817943226, + "grad_norm": 0.6512627085570654, + "learning_rate": 2.5217083795175667e-06, + "loss": 0.9527, + "step": 51130 + }, + { + "epoch": 0.7713656520558689, + "grad_norm": 0.6430158133725274, + "learning_rate": 2.5185315479146023e-06, + "loss": 0.9416, + "step": 51140 + }, + { + "epoch": 0.7715164861685119, + "grad_norm": 0.6489217362512951, + "learning_rate": 2.5153564303305413e-06, + "loss": 0.952, + "step": 51150 + }, + { + "epoch": 0.7716673202811548, + "grad_norm": 0.6951051138147727, + "learning_rate": 2.5121830274928027e-06, + "loss": 0.9708, + "step": 51160 + }, + { + "epoch": 0.7718181543937978, + "grad_norm": 0.7002900546994706, + "learning_rate": 2.5090113401284276e-06, + "loss": 0.9605, + "step": 51170 + }, + { + "epoch": 0.7719689885064406, + "grad_norm": 0.6476499543042148, + "learning_rate": 2.5058413689640526e-06, + "loss": 0.9457, + "step": 51180 + }, + { + "epoch": 0.7721198226190835, + "grad_norm": 0.6904353074591559, + "learning_rate": 2.502673114725921e-06, + "loss": 0.955, + "step": 51190 + }, + { + "epoch": 0.7722706567317265, + "grad_norm": 0.6598176300790108, + "learning_rate": 2.499506578139892e-06, + "loss": 0.9499, + "step": 51200 + }, + { + "epoch": 0.7724214908443694, + "grad_norm": 0.6752734746939945, + "learning_rate": 2.4963417599314187e-06, + "loss": 0.9595, + "step": 51210 + }, + { + "epoch": 0.7725723249570122, + "grad_norm": 0.6694376387141958, + "learning_rate": 2.49317866082557e-06, + "loss": 0.9581, + "step": 51220 + }, + { + "epoch": 0.7727231590696552, + "grad_norm": 0.7715139444140932, + "learning_rate": 2.4900172815470225e-06, + "loss": 0.9413, + "step": 51230 + }, + { + "epoch": 0.7728739931822981, + "grad_norm": 0.6724158801746718, + "learning_rate": 2.4868576228200457e-06, + "loss": 0.9585, + "step": 51240 + }, + { + "epoch": 0.773024827294941, + "grad_norm": 0.6563066355835677, + "learning_rate": 2.4836996853685327e-06, + "loss": 0.9461, + "step": 51250 + }, + { + "epoch": 0.773175661407584, + "grad_norm": 0.7223646231709617, + "learning_rate": 2.480543469915968e-06, + "loss": 0.9663, + "step": 51260 + }, + { + "epoch": 0.7733264955202268, + "grad_norm": 0.6731541469206822, + "learning_rate": 2.4773889771854442e-06, + "loss": 0.9397, + "step": 51270 + }, + { + "epoch": 0.7734773296328697, + "grad_norm": 0.7011511652044784, + "learning_rate": 2.4742362078996673e-06, + "loss": 0.9626, + "step": 51280 + }, + { + "epoch": 0.7736281637455127, + "grad_norm": 0.7656404547840503, + "learning_rate": 2.4710851627809395e-06, + "loss": 0.9503, + "step": 51290 + }, + { + "epoch": 0.7737789978581556, + "grad_norm": 0.6579650070611628, + "learning_rate": 2.46793584255117e-06, + "loss": 0.9564, + "step": 51300 + }, + { + "epoch": 0.7739298319707986, + "grad_norm": 0.758092570526209, + "learning_rate": 2.4647882479318797e-06, + "loss": 0.9578, + "step": 51310 + }, + { + "epoch": 0.7740806660834414, + "grad_norm": 0.7228468303889668, + "learning_rate": 2.4616423796441846e-06, + "loss": 0.9632, + "step": 51320 + }, + { + "epoch": 0.7742315001960843, + "grad_norm": 0.6989727658202693, + "learning_rate": 2.4584982384088065e-06, + "loss": 0.9569, + "step": 51330 + }, + { + "epoch": 0.7743823343087273, + "grad_norm": 0.6449750118163896, + "learning_rate": 2.4553558249460796e-06, + "loss": 0.9601, + "step": 51340 + }, + { + "epoch": 0.7745331684213702, + "grad_norm": 0.6379600286432624, + "learning_rate": 2.4522151399759365e-06, + "loss": 0.9566, + "step": 51350 + }, + { + "epoch": 0.774684002534013, + "grad_norm": 0.68747610796553, + "learning_rate": 2.449076184217909e-06, + "loss": 0.941, + "step": 51360 + }, + { + "epoch": 0.774834836646656, + "grad_norm": 0.6737498022256275, + "learning_rate": 2.445938958391141e-06, + "loss": 0.9522, + "step": 51370 + }, + { + "epoch": 0.7749856707592989, + "grad_norm": 0.6785467562168429, + "learning_rate": 2.4428034632143803e-06, + "loss": 0.9501, + "step": 51380 + }, + { + "epoch": 0.7751365048719419, + "grad_norm": 0.7311096314636112, + "learning_rate": 2.439669699405969e-06, + "loss": 0.9612, + "step": 51390 + }, + { + "epoch": 0.7752873389845848, + "grad_norm": 0.724026818340762, + "learning_rate": 2.4365376676838646e-06, + "loss": 0.9648, + "step": 51400 + }, + { + "epoch": 0.7754381730972276, + "grad_norm": 0.6488467564928669, + "learning_rate": 2.4334073687656156e-06, + "loss": 0.9649, + "step": 51410 + }, + { + "epoch": 0.7755890072098706, + "grad_norm": 0.6475798962705788, + "learning_rate": 2.4302788033683853e-06, + "loss": 0.9539, + "step": 51420 + }, + { + "epoch": 0.7757398413225135, + "grad_norm": 0.6618873024908879, + "learning_rate": 2.427151972208932e-06, + "loss": 0.9572, + "step": 51430 + }, + { + "epoch": 0.7758906754351564, + "grad_norm": 0.8686260098587163, + "learning_rate": 2.4240268760036144e-06, + "loss": 0.9461, + "step": 51440 + }, + { + "epoch": 0.7760415095477994, + "grad_norm": 0.7006633617397653, + "learning_rate": 2.4209035154684047e-06, + "loss": 0.9372, + "step": 51450 + }, + { + "epoch": 0.7761923436604422, + "grad_norm": 0.7259111767723098, + "learning_rate": 2.4177818913188677e-06, + "loss": 0.9472, + "step": 51460 + }, + { + "epoch": 0.7763431777730851, + "grad_norm": 0.7294135136159815, + "learning_rate": 2.414662004270172e-06, + "loss": 0.957, + "step": 51470 + }, + { + "epoch": 0.7764940118857281, + "grad_norm": 0.713851195376746, + "learning_rate": 2.411543855037093e-06, + "loss": 0.9563, + "step": 51480 + }, + { + "epoch": 0.776644845998371, + "grad_norm": 0.706276360451949, + "learning_rate": 2.4084274443340005e-06, + "loss": 0.954, + "step": 51490 + }, + { + "epoch": 0.7767956801110139, + "grad_norm": 0.7121484985567546, + "learning_rate": 2.405312772874876e-06, + "loss": 0.9344, + "step": 51500 + }, + { + "epoch": 0.7769465142236568, + "grad_norm": 0.628251569444885, + "learning_rate": 2.4021998413732927e-06, + "loss": 0.9597, + "step": 51510 + }, + { + "epoch": 0.7770973483362997, + "grad_norm": 0.6640066090588406, + "learning_rate": 2.399088650542428e-06, + "loss": 0.9507, + "step": 51520 + }, + { + "epoch": 0.7772481824489427, + "grad_norm": 0.6773550036186599, + "learning_rate": 2.395979201095068e-06, + "loss": 0.9533, + "step": 51530 + }, + { + "epoch": 0.7773990165615856, + "grad_norm": 0.6444345791866106, + "learning_rate": 2.392871493743585e-06, + "loss": 0.9521, + "step": 51540 + }, + { + "epoch": 0.7775498506742284, + "grad_norm": 0.6386283420319163, + "learning_rate": 2.389765529199969e-06, + "loss": 0.9638, + "step": 51550 + }, + { + "epoch": 0.7777006847868714, + "grad_norm": 0.661826746451305, + "learning_rate": 2.386661308175795e-06, + "loss": 0.9411, + "step": 51560 + }, + { + "epoch": 0.7778515188995143, + "grad_norm": 0.6824386542760921, + "learning_rate": 2.3835588313822535e-06, + "loss": 0.9587, + "step": 51570 + }, + { + "epoch": 0.7780023530121573, + "grad_norm": 0.6619044145164914, + "learning_rate": 2.380458099530122e-06, + "loss": 0.9765, + "step": 51580 + }, + { + "epoch": 0.7781531871248002, + "grad_norm": 0.666845082983518, + "learning_rate": 2.377359113329788e-06, + "loss": 0.9428, + "step": 51590 + }, + { + "epoch": 0.778304021237443, + "grad_norm": 0.6607859213108628, + "learning_rate": 2.3742618734912337e-06, + "loss": 0.9437, + "step": 51600 + }, + { + "epoch": 0.778454855350086, + "grad_norm": 0.690720127121636, + "learning_rate": 2.3711663807240402e-06, + "loss": 0.9673, + "step": 51610 + }, + { + "epoch": 0.7786056894627289, + "grad_norm": 0.6692700121434569, + "learning_rate": 2.368072635737396e-06, + "loss": 0.9584, + "step": 51620 + }, + { + "epoch": 0.7787565235753718, + "grad_norm": 0.6437618204331236, + "learning_rate": 2.3649806392400796e-06, + "loss": 0.9582, + "step": 51630 + }, + { + "epoch": 0.7789073576880147, + "grad_norm": 0.6836629590676317, + "learning_rate": 2.361890391940477e-06, + "loss": 0.9607, + "step": 51640 + }, + { + "epoch": 0.7790581918006576, + "grad_norm": 0.6537469548365333, + "learning_rate": 2.3588018945465684e-06, + "loss": 0.9473, + "step": 51650 + }, + { + "epoch": 0.7792090259133005, + "grad_norm": 0.669625780769981, + "learning_rate": 2.3557151477659314e-06, + "loss": 0.9391, + "step": 51660 + }, + { + "epoch": 0.7793598600259435, + "grad_norm": 0.6841787479918764, + "learning_rate": 2.3526301523057516e-06, + "loss": 0.9402, + "step": 51670 + }, + { + "epoch": 0.7795106941385864, + "grad_norm": 0.6641773848225478, + "learning_rate": 2.349546908872803e-06, + "loss": 0.972, + "step": 51680 + }, + { + "epoch": 0.7796615282512293, + "grad_norm": 0.7298643391846465, + "learning_rate": 2.3464654181734626e-06, + "loss": 0.9656, + "step": 51690 + }, + { + "epoch": 0.7798123623638722, + "grad_norm": 0.6991267589057817, + "learning_rate": 2.343385680913709e-06, + "loss": 0.9477, + "step": 51700 + }, + { + "epoch": 0.7799631964765151, + "grad_norm": 0.6975693790603372, + "learning_rate": 2.340307697799112e-06, + "loss": 0.9331, + "step": 51710 + }, + { + "epoch": 0.7801140305891581, + "grad_norm": 0.6664898676191042, + "learning_rate": 2.3372314695348454e-06, + "loss": 0.9642, + "step": 51720 + }, + { + "epoch": 0.780264864701801, + "grad_norm": 0.7364177223577266, + "learning_rate": 2.3341569968256817e-06, + "loss": 0.9908, + "step": 51730 + }, + { + "epoch": 0.7804156988144438, + "grad_norm": 0.693138093000827, + "learning_rate": 2.331084280375987e-06, + "loss": 0.9394, + "step": 51740 + }, + { + "epoch": 0.7805665329270868, + "grad_norm": 0.6565456601493035, + "learning_rate": 2.3280133208897226e-06, + "loss": 0.9447, + "step": 51750 + }, + { + "epoch": 0.7807173670397297, + "grad_norm": 0.6990859503574661, + "learning_rate": 2.324944119070457e-06, + "loss": 0.9473, + "step": 51760 + }, + { + "epoch": 0.7808682011523727, + "grad_norm": 0.7387217377336596, + "learning_rate": 2.321876675621345e-06, + "loss": 0.9671, + "step": 51770 + }, + { + "epoch": 0.7810190352650155, + "grad_norm": 0.6662677560490149, + "learning_rate": 2.3188109912451494e-06, + "loss": 0.9413, + "step": 51780 + }, + { + "epoch": 0.7811698693776584, + "grad_norm": 0.6696325717764414, + "learning_rate": 2.3157470666442215e-06, + "loss": 0.9383, + "step": 51790 + }, + { + "epoch": 0.7813207034903014, + "grad_norm": 0.6485204849385495, + "learning_rate": 2.31268490252051e-06, + "loss": 0.9362, + "step": 51800 + }, + { + "epoch": 0.7814715376029443, + "grad_norm": 0.696475096410113, + "learning_rate": 2.309624499575567e-06, + "loss": 0.9447, + "step": 51810 + }, + { + "epoch": 0.7816223717155872, + "grad_norm": 0.6842807846734219, + "learning_rate": 2.3065658585105357e-06, + "loss": 0.9749, + "step": 51820 + }, + { + "epoch": 0.7817732058282301, + "grad_norm": 0.670076547753936, + "learning_rate": 2.3035089800261513e-06, + "loss": 0.9329, + "step": 51830 + }, + { + "epoch": 0.781924039940873, + "grad_norm": 0.6738044985629178, + "learning_rate": 2.3004538648227593e-06, + "loss": 0.9421, + "step": 51840 + }, + { + "epoch": 0.782074874053516, + "grad_norm": 0.6631938375621778, + "learning_rate": 2.2974005136002875e-06, + "loss": 0.9567, + "step": 51850 + }, + { + "epoch": 0.7822257081661589, + "grad_norm": 0.6622705806936449, + "learning_rate": 2.294348927058263e-06, + "loss": 0.9555, + "step": 51860 + }, + { + "epoch": 0.7823765422788018, + "grad_norm": 0.7175412724603266, + "learning_rate": 2.2912991058958123e-06, + "loss": 0.9209, + "step": 51870 + }, + { + "epoch": 0.7825273763914447, + "grad_norm": 0.6778591312777628, + "learning_rate": 2.2882510508116586e-06, + "loss": 0.9323, + "step": 51880 + }, + { + "epoch": 0.7826782105040876, + "grad_norm": 0.6329791992386539, + "learning_rate": 2.2852047625041107e-06, + "loss": 0.9533, + "step": 51890 + }, + { + "epoch": 0.7828290446167305, + "grad_norm": 0.6927773272328065, + "learning_rate": 2.282160241671085e-06, + "loss": 0.9497, + "step": 51900 + }, + { + "epoch": 0.7829798787293735, + "grad_norm": 0.6689041505417951, + "learning_rate": 2.2791174890100832e-06, + "loss": 0.9587, + "step": 51910 + }, + { + "epoch": 0.7831307128420163, + "grad_norm": 0.6549759383126578, + "learning_rate": 2.27607650521821e-06, + "loss": 0.9454, + "step": 51920 + }, + { + "epoch": 0.7832815469546592, + "grad_norm": 0.6424228264956445, + "learning_rate": 2.2730372909921573e-06, + "loss": 0.9444, + "step": 51930 + }, + { + "epoch": 0.7834323810673022, + "grad_norm": 0.6899870603612328, + "learning_rate": 2.2699998470282124e-06, + "loss": 0.9484, + "step": 51940 + }, + { + "epoch": 0.7835832151799451, + "grad_norm": 0.6824832006338409, + "learning_rate": 2.266964174022267e-06, + "loss": 0.9486, + "step": 51950 + }, + { + "epoch": 0.783734049292588, + "grad_norm": 0.7207984683047143, + "learning_rate": 2.2639302726697944e-06, + "loss": 0.9535, + "step": 51960 + }, + { + "epoch": 0.7838848834052309, + "grad_norm": 0.6760800543991786, + "learning_rate": 2.260898143665866e-06, + "loss": 0.9593, + "step": 51970 + }, + { + "epoch": 0.7840357175178738, + "grad_norm": 0.7182680061888863, + "learning_rate": 2.2578677877051534e-06, + "loss": 0.9629, + "step": 51980 + }, + { + "epoch": 0.7841865516305168, + "grad_norm": 0.6538622913802459, + "learning_rate": 2.2548392054819145e-06, + "loss": 0.9549, + "step": 51990 + }, + { + "epoch": 0.7843373857431597, + "grad_norm": 0.6814616717433606, + "learning_rate": 2.251812397690001e-06, + "loss": 0.9659, + "step": 52000 + }, + { + "epoch": 0.7844882198558026, + "grad_norm": 0.700214573149894, + "learning_rate": 2.248787365022864e-06, + "loss": 0.9658, + "step": 52010 + }, + { + "epoch": 0.7846390539684455, + "grad_norm": 0.6825983538578014, + "learning_rate": 2.2457641081735448e-06, + "loss": 0.9545, + "step": 52020 + }, + { + "epoch": 0.7847898880810884, + "grad_norm": 0.6561497212803121, + "learning_rate": 2.242742627834672e-06, + "loss": 0.9606, + "step": 52030 + }, + { + "epoch": 0.7849407221937313, + "grad_norm": 0.6750734400634575, + "learning_rate": 2.2397229246984766e-06, + "loss": 0.9504, + "step": 52040 + }, + { + "epoch": 0.7850915563063743, + "grad_norm": 0.6535468868291622, + "learning_rate": 2.23670499945678e-06, + "loss": 0.9687, + "step": 52050 + }, + { + "epoch": 0.7852423904190171, + "grad_norm": 0.6676660749167407, + "learning_rate": 2.2336888528009905e-06, + "loss": 0.9709, + "step": 52060 + }, + { + "epoch": 0.78539322453166, + "grad_norm": 0.671786670563632, + "learning_rate": 2.2306744854221186e-06, + "loss": 0.9554, + "step": 52070 + }, + { + "epoch": 0.785544058644303, + "grad_norm": 0.6620657591502737, + "learning_rate": 2.2276618980107547e-06, + "loss": 0.9551, + "step": 52080 + }, + { + "epoch": 0.7856948927569459, + "grad_norm": 0.6622142713905615, + "learning_rate": 2.224651091257095e-06, + "loss": 0.9357, + "step": 52090 + }, + { + "epoch": 0.7858457268695889, + "grad_norm": 0.6642708901865094, + "learning_rate": 2.2216420658509196e-06, + "loss": 0.9541, + "step": 52100 + }, + { + "epoch": 0.7859965609822317, + "grad_norm": 0.6796036300927668, + "learning_rate": 2.218634822481597e-06, + "loss": 0.957, + "step": 52110 + }, + { + "epoch": 0.7861473950948746, + "grad_norm": 0.7326521831744619, + "learning_rate": 2.2156293618381e-06, + "loss": 0.9542, + "step": 52120 + }, + { + "epoch": 0.7862982292075176, + "grad_norm": 0.7335974693238457, + "learning_rate": 2.212625684608981e-06, + "loss": 0.9728, + "step": 52130 + }, + { + "epoch": 0.7864490633201605, + "grad_norm": 0.6511720045476299, + "learning_rate": 2.209623791482386e-06, + "loss": 0.961, + "step": 52140 + }, + { + "epoch": 0.7865998974328035, + "grad_norm": 0.648894546193952, + "learning_rate": 2.206623683146061e-06, + "loss": 0.9542, + "step": 52150 + }, + { + "epoch": 0.7867507315454463, + "grad_norm": 0.650445378791852, + "learning_rate": 2.2036253602873327e-06, + "loss": 0.9591, + "step": 52160 + }, + { + "epoch": 0.7869015656580892, + "grad_norm": 0.6382355948181034, + "learning_rate": 2.2006288235931194e-06, + "loss": 0.9316, + "step": 52170 + }, + { + "epoch": 0.7870523997707322, + "grad_norm": 0.6626915502750235, + "learning_rate": 2.197634073749941e-06, + "loss": 0.9556, + "step": 52180 + }, + { + "epoch": 0.7872032338833751, + "grad_norm": 0.7092840870672612, + "learning_rate": 2.194641111443894e-06, + "loss": 0.9362, + "step": 52190 + }, + { + "epoch": 0.7873540679960179, + "grad_norm": 0.6649100866425568, + "learning_rate": 2.191649937360677e-06, + "loss": 0.9636, + "step": 52200 + }, + { + "epoch": 0.7875049021086609, + "grad_norm": 0.6954745019172358, + "learning_rate": 2.1886605521855688e-06, + "loss": 0.9402, + "step": 52210 + }, + { + "epoch": 0.7876557362213038, + "grad_norm": 0.6784264621442158, + "learning_rate": 2.185672956603445e-06, + "loss": 0.939, + "step": 52220 + }, + { + "epoch": 0.7878065703339467, + "grad_norm": 0.6701713976059803, + "learning_rate": 2.1826871512987735e-06, + "loss": 0.94, + "step": 52230 + }, + { + "epoch": 0.7879574044465897, + "grad_norm": 0.7320507418129626, + "learning_rate": 2.1797031369556052e-06, + "loss": 0.9426, + "step": 52240 + }, + { + "epoch": 0.7881082385592325, + "grad_norm": 0.6926633002908039, + "learning_rate": 2.1767209142575807e-06, + "loss": 0.9492, + "step": 52250 + }, + { + "epoch": 0.7882590726718754, + "grad_norm": 0.6824491231548822, + "learning_rate": 2.1737404838879383e-06, + "loss": 0.9421, + "step": 52260 + }, + { + "epoch": 0.7884099067845184, + "grad_norm": 0.6404035084852084, + "learning_rate": 2.1707618465294976e-06, + "loss": 0.9441, + "step": 52270 + }, + { + "epoch": 0.7885607408971613, + "grad_norm": 0.6939823813494544, + "learning_rate": 2.1677850028646663e-06, + "loss": 0.9809, + "step": 52280 + }, + { + "epoch": 0.7887115750098043, + "grad_norm": 0.6656984842746583, + "learning_rate": 2.164809953575453e-06, + "loss": 0.9571, + "step": 52290 + }, + { + "epoch": 0.7888624091224471, + "grad_norm": 0.6493450500428738, + "learning_rate": 2.16183669934344e-06, + "loss": 0.9534, + "step": 52300 + }, + { + "epoch": 0.78901324323509, + "grad_norm": 0.6811683341019706, + "learning_rate": 2.1588652408498124e-06, + "loss": 0.9705, + "step": 52310 + }, + { + "epoch": 0.789164077347733, + "grad_norm": 0.6735793443350567, + "learning_rate": 2.1558955787753323e-06, + "loss": 0.9489, + "step": 52320 + }, + { + "epoch": 0.7893149114603759, + "grad_norm": 0.6787127752750866, + "learning_rate": 2.1529277138003536e-06, + "loss": 0.9506, + "step": 52330 + }, + { + "epoch": 0.7894657455730187, + "grad_norm": 0.6949375778576519, + "learning_rate": 2.1499616466048257e-06, + "loss": 0.9533, + "step": 52340 + }, + { + "epoch": 0.7896165796856617, + "grad_norm": 0.6504284494378421, + "learning_rate": 2.1469973778682774e-06, + "loss": 0.9429, + "step": 52350 + }, + { + "epoch": 0.7897674137983046, + "grad_norm": 0.6744858902334803, + "learning_rate": 2.1440349082698254e-06, + "loss": 0.9476, + "step": 52360 + }, + { + "epoch": 0.7899182479109476, + "grad_norm": 0.667653171998503, + "learning_rate": 2.141074238488182e-06, + "loss": 0.9457, + "step": 52370 + }, + { + "epoch": 0.7900690820235905, + "grad_norm": 0.6935983287244344, + "learning_rate": 2.1381153692016375e-06, + "loss": 0.9509, + "step": 52380 + }, + { + "epoch": 0.7902199161362333, + "grad_norm": 0.7317422733528648, + "learning_rate": 2.1351583010880785e-06, + "loss": 0.9559, + "step": 52390 + }, + { + "epoch": 0.7903707502488763, + "grad_norm": 0.661254082013803, + "learning_rate": 2.1322030348249765e-06, + "loss": 0.9328, + "step": 52400 + }, + { + "epoch": 0.7905215843615192, + "grad_norm": 0.6724432796739267, + "learning_rate": 2.1292495710893855e-06, + "loss": 0.9451, + "step": 52410 + }, + { + "epoch": 0.7906724184741621, + "grad_norm": 0.6697675826808249, + "learning_rate": 2.1262979105579483e-06, + "loss": 0.9385, + "step": 52420 + }, + { + "epoch": 0.7908232525868051, + "grad_norm": 0.7100259054854055, + "learning_rate": 2.1233480539069006e-06, + "loss": 0.9558, + "step": 52430 + }, + { + "epoch": 0.7909740866994479, + "grad_norm": 0.6790453691472886, + "learning_rate": 2.120400001812055e-06, + "loss": 0.9647, + "step": 52440 + }, + { + "epoch": 0.7911249208120908, + "grad_norm": 0.6649618407508213, + "learning_rate": 2.117453754948823e-06, + "loss": 0.9376, + "step": 52450 + }, + { + "epoch": 0.7912757549247338, + "grad_norm": 0.6352990610312023, + "learning_rate": 2.1145093139921903e-06, + "loss": 0.9658, + "step": 52460 + }, + { + "epoch": 0.7914265890373767, + "grad_norm": 0.6541803933392613, + "learning_rate": 2.1115666796167312e-06, + "loss": 0.9514, + "step": 52470 + }, + { + "epoch": 0.7915774231500196, + "grad_norm": 0.6494099177116237, + "learning_rate": 2.1086258524966165e-06, + "loss": 0.9401, + "step": 52480 + }, + { + "epoch": 0.7917282572626625, + "grad_norm": 0.7427859418437839, + "learning_rate": 2.1056868333055913e-06, + "loss": 0.9353, + "step": 52490 + }, + { + "epoch": 0.7918790913753054, + "grad_norm": 0.6521783158374882, + "learning_rate": 2.1027496227169884e-06, + "loss": 0.958, + "step": 52500 + }, + { + "epoch": 0.7920299254879484, + "grad_norm": 0.6528057323701165, + "learning_rate": 2.0998142214037345e-06, + "loss": 0.964, + "step": 52510 + }, + { + "epoch": 0.7921807596005913, + "grad_norm": 0.6877741001524983, + "learning_rate": 2.0968806300383304e-06, + "loss": 0.9525, + "step": 52520 + }, + { + "epoch": 0.7923315937132341, + "grad_norm": 0.6453164584579001, + "learning_rate": 2.093948849292867e-06, + "loss": 0.9394, + "step": 52530 + }, + { + "epoch": 0.7924824278258771, + "grad_norm": 0.6976682377489307, + "learning_rate": 2.091018879839024e-06, + "loss": 0.9304, + "step": 52540 + }, + { + "epoch": 0.79263326193852, + "grad_norm": 0.6634964440978793, + "learning_rate": 2.0880907223480653e-06, + "loss": 0.9495, + "step": 52550 + }, + { + "epoch": 0.792784096051163, + "grad_norm": 0.6632646665627323, + "learning_rate": 2.0851643774908304e-06, + "loss": 0.9467, + "step": 52560 + }, + { + "epoch": 0.7929349301638059, + "grad_norm": 0.6610455764156462, + "learning_rate": 2.082239845937758e-06, + "loss": 0.9483, + "step": 52570 + }, + { + "epoch": 0.7930857642764487, + "grad_norm": 0.6732218576734338, + "learning_rate": 2.0793171283588574e-06, + "loss": 0.9513, + "step": 52580 + }, + { + "epoch": 0.7932365983890917, + "grad_norm": 0.6539805069118783, + "learning_rate": 2.0763962254237358e-06, + "loss": 0.9732, + "step": 52590 + }, + { + "epoch": 0.7933874325017346, + "grad_norm": 0.7093691939202199, + "learning_rate": 2.073477137801574e-06, + "loss": 0.9656, + "step": 52600 + }, + { + "epoch": 0.7935382666143775, + "grad_norm": 0.679284027112106, + "learning_rate": 2.070559866161137e-06, + "loss": 0.9458, + "step": 52610 + }, + { + "epoch": 0.7936891007270204, + "grad_norm": 0.7157548437473448, + "learning_rate": 2.067644411170784e-06, + "loss": 0.9727, + "step": 52620 + }, + { + "epoch": 0.7938399348396633, + "grad_norm": 0.644289799857466, + "learning_rate": 2.064730773498448e-06, + "loss": 0.9504, + "step": 52630 + }, + { + "epoch": 0.7939907689523062, + "grad_norm": 0.6700978725844521, + "learning_rate": 2.0618189538116464e-06, + "loss": 0.9622, + "step": 52640 + }, + { + "epoch": 0.7941416030649492, + "grad_norm": 0.6573296933711876, + "learning_rate": 2.0589089527774896e-06, + "loss": 0.9402, + "step": 52650 + }, + { + "epoch": 0.7942924371775921, + "grad_norm": 0.6574182102735986, + "learning_rate": 2.0560007710626595e-06, + "loss": 0.9477, + "step": 52660 + }, + { + "epoch": 0.794443271290235, + "grad_norm": 0.6813618257165373, + "learning_rate": 2.053094409333425e-06, + "loss": 0.9441, + "step": 52670 + }, + { + "epoch": 0.7945941054028779, + "grad_norm": 0.638685746043896, + "learning_rate": 2.050189868255643e-06, + "loss": 0.9512, + "step": 52680 + }, + { + "epoch": 0.7947449395155208, + "grad_norm": 0.6600291490878342, + "learning_rate": 2.0472871484947467e-06, + "loss": 0.9373, + "step": 52690 + }, + { + "epoch": 0.7948957736281638, + "grad_norm": 0.6624608465864792, + "learning_rate": 2.0443862507157587e-06, + "loss": 0.9405, + "step": 52700 + }, + { + "epoch": 0.7950466077408067, + "grad_norm": 0.6523667988241353, + "learning_rate": 2.0414871755832733e-06, + "loss": 0.947, + "step": 52710 + }, + { + "epoch": 0.7951974418534495, + "grad_norm": 0.6490752718934177, + "learning_rate": 2.0385899237614824e-06, + "loss": 0.9427, + "step": 52720 + }, + { + "epoch": 0.7953482759660925, + "grad_norm": 0.6299579260402733, + "learning_rate": 2.035694495914146e-06, + "loss": 0.9351, + "step": 52730 + }, + { + "epoch": 0.7954991100787354, + "grad_norm": 0.6549181594719347, + "learning_rate": 2.0328008927046163e-06, + "loss": 0.9392, + "step": 52740 + }, + { + "epoch": 0.7956499441913784, + "grad_norm": 0.6671522264778093, + "learning_rate": 2.0299091147958184e-06, + "loss": 0.9486, + "step": 52750 + }, + { + "epoch": 0.7958007783040212, + "grad_norm": 0.649962451021287, + "learning_rate": 2.02701916285027e-06, + "loss": 0.9623, + "step": 52760 + }, + { + "epoch": 0.7959516124166641, + "grad_norm": 0.6492234639476637, + "learning_rate": 2.0241310375300627e-06, + "loss": 0.9621, + "step": 52770 + }, + { + "epoch": 0.7961024465293071, + "grad_norm": 0.7284324733792517, + "learning_rate": 2.021244739496867e-06, + "loss": 0.9548, + "step": 52780 + }, + { + "epoch": 0.79625328064195, + "grad_norm": 0.6892988076100772, + "learning_rate": 2.0183602694119462e-06, + "loss": 0.9406, + "step": 52790 + }, + { + "epoch": 0.7964041147545929, + "grad_norm": 0.639307524054499, + "learning_rate": 2.0154776279361356e-06, + "loss": 0.9634, + "step": 52800 + }, + { + "epoch": 0.7965549488672358, + "grad_norm": 0.6708626416267229, + "learning_rate": 2.01259681572985e-06, + "loss": 0.9565, + "step": 52810 + }, + { + "epoch": 0.7967057829798787, + "grad_norm": 0.6989593060155104, + "learning_rate": 2.0097178334530963e-06, + "loss": 0.9514, + "step": 52820 + }, + { + "epoch": 0.7968566170925216, + "grad_norm": 0.6647394326663357, + "learning_rate": 2.0068406817654494e-06, + "loss": 0.9499, + "step": 52830 + }, + { + "epoch": 0.7970074512051646, + "grad_norm": 0.6439373405817018, + "learning_rate": 2.0039653613260702e-06, + "loss": 0.9485, + "step": 52840 + }, + { + "epoch": 0.7971582853178075, + "grad_norm": 0.7628531385963639, + "learning_rate": 2.0010918727937045e-06, + "loss": 0.963, + "step": 52850 + }, + { + "epoch": 0.7973091194304504, + "grad_norm": 0.6643748225790836, + "learning_rate": 1.9982202168266684e-06, + "loss": 0.9362, + "step": 52860 + }, + { + "epoch": 0.7974599535430933, + "grad_norm": 0.667133229300319, + "learning_rate": 1.99535039408287e-06, + "loss": 0.9537, + "step": 52870 + }, + { + "epoch": 0.7976107876557362, + "grad_norm": 0.6552103235535524, + "learning_rate": 1.9924824052197856e-06, + "loss": 0.9283, + "step": 52880 + }, + { + "epoch": 0.7977616217683792, + "grad_norm": 0.6897666122136955, + "learning_rate": 1.9896162508944793e-06, + "loss": 0.9674, + "step": 52890 + }, + { + "epoch": 0.797912455881022, + "grad_norm": 0.6465780130470514, + "learning_rate": 1.986751931763595e-06, + "loss": 0.9475, + "step": 52900 + }, + { + "epoch": 0.7980632899936649, + "grad_norm": 0.6866970425370957, + "learning_rate": 1.9838894484833526e-06, + "loss": 0.9422, + "step": 52910 + }, + { + "epoch": 0.7982141241063079, + "grad_norm": 0.6708461026894448, + "learning_rate": 1.981028801709548e-06, + "loss": 0.9441, + "step": 52920 + }, + { + "epoch": 0.7983649582189508, + "grad_norm": 0.724101065385693, + "learning_rate": 1.978169992097567e-06, + "loss": 0.9538, + "step": 52930 + }, + { + "epoch": 0.7985157923315938, + "grad_norm": 0.7188598105927481, + "learning_rate": 1.9753130203023672e-06, + "loss": 0.9382, + "step": 52940 + }, + { + "epoch": 0.7986666264442366, + "grad_norm": 0.6434025743784054, + "learning_rate": 1.9724578869784815e-06, + "loss": 0.9485, + "step": 52950 + }, + { + "epoch": 0.7988174605568795, + "grad_norm": 0.6821382645729003, + "learning_rate": 1.969604592780032e-06, + "loss": 0.9582, + "step": 52960 + }, + { + "epoch": 0.7989682946695225, + "grad_norm": 0.7001568589926935, + "learning_rate": 1.9667531383607107e-06, + "loss": 0.9451, + "step": 52970 + }, + { + "epoch": 0.7991191287821654, + "grad_norm": 0.6705266940476797, + "learning_rate": 1.963903524373795e-06, + "loss": 0.9498, + "step": 52980 + }, + { + "epoch": 0.7992699628948083, + "grad_norm": 0.6631109875640527, + "learning_rate": 1.961055751472134e-06, + "loss": 0.9548, + "step": 52990 + }, + { + "epoch": 0.7994207970074512, + "grad_norm": 0.6839341384829652, + "learning_rate": 1.9582098203081554e-06, + "loss": 0.9476, + "step": 53000 + }, + { + "epoch": 0.7995716311200941, + "grad_norm": 0.6723215082510969, + "learning_rate": 1.955365731533875e-06, + "loss": 0.9492, + "step": 53010 + }, + { + "epoch": 0.799722465232737, + "grad_norm": 0.6644216348818084, + "learning_rate": 1.9525234858008734e-06, + "loss": 0.9525, + "step": 53020 + }, + { + "epoch": 0.79987329934538, + "grad_norm": 0.6591158214603298, + "learning_rate": 1.9496830837603144e-06, + "loss": 0.9525, + "step": 53030 + }, + { + "epoch": 0.8000241334580228, + "grad_norm": 0.6420726659118271, + "learning_rate": 1.9468445260629397e-06, + "loss": 0.9493, + "step": 53040 + }, + { + "epoch": 0.8001749675706658, + "grad_norm": 0.6329197957646137, + "learning_rate": 1.9440078133590736e-06, + "loss": 0.9575, + "step": 53050 + }, + { + "epoch": 0.8003258016833087, + "grad_norm": 0.677359734013385, + "learning_rate": 1.941172946298605e-06, + "loss": 0.9614, + "step": 53060 + }, + { + "epoch": 0.8004766357959516, + "grad_norm": 0.707816828471931, + "learning_rate": 1.9383399255310143e-06, + "loss": 0.9462, + "step": 53070 + }, + { + "epoch": 0.8006274699085946, + "grad_norm": 0.6731304609907145, + "learning_rate": 1.9355087517053493e-06, + "loss": 0.9457, + "step": 53080 + }, + { + "epoch": 0.8007783040212374, + "grad_norm": 0.6548605825466308, + "learning_rate": 1.932679425470233e-06, + "loss": 0.9595, + "step": 53090 + }, + { + "epoch": 0.8009291381338803, + "grad_norm": 0.6937933691955472, + "learning_rate": 1.9298519474738775e-06, + "loss": 0.9395, + "step": 53100 + }, + { + "epoch": 0.8010799722465233, + "grad_norm": 0.6491812441701712, + "learning_rate": 1.927026318364056e-06, + "loss": 0.974, + "step": 53110 + }, + { + "epoch": 0.8012308063591662, + "grad_norm": 0.6372556814728864, + "learning_rate": 1.9242025387881326e-06, + "loss": 0.9743, + "step": 53120 + }, + { + "epoch": 0.8013816404718092, + "grad_norm": 0.7021921316339249, + "learning_rate": 1.9213806093930354e-06, + "loss": 0.96, + "step": 53130 + }, + { + "epoch": 0.801532474584452, + "grad_norm": 0.6421671637946395, + "learning_rate": 1.9185605308252743e-06, + "loss": 0.9689, + "step": 53140 + }, + { + "epoch": 0.8016833086970949, + "grad_norm": 0.7012474220167578, + "learning_rate": 1.9157423037309386e-06, + "loss": 0.9297, + "step": 53150 + }, + { + "epoch": 0.8018341428097379, + "grad_norm": 0.679058555440088, + "learning_rate": 1.9129259287556868e-06, + "loss": 0.9567, + "step": 53160 + }, + { + "epoch": 0.8019849769223808, + "grad_norm": 0.6441468140870441, + "learning_rate": 1.9101114065447534e-06, + "loss": 0.9799, + "step": 53170 + }, + { + "epoch": 0.8021358110350236, + "grad_norm": 0.6637720725995868, + "learning_rate": 1.9072987377429552e-06, + "loss": 0.928, + "step": 53180 + }, + { + "epoch": 0.8022866451476666, + "grad_norm": 0.6779269546581981, + "learning_rate": 1.90448792299468e-06, + "loss": 0.9478, + "step": 53190 + }, + { + "epoch": 0.8024374792603095, + "grad_norm": 0.6402921309901448, + "learning_rate": 1.901678962943886e-06, + "loss": 0.9589, + "step": 53200 + }, + { + "epoch": 0.8025883133729524, + "grad_norm": 0.6469111483736513, + "learning_rate": 1.8988718582341147e-06, + "loss": 0.9398, + "step": 53210 + }, + { + "epoch": 0.8027391474855954, + "grad_norm": 0.64169564568327, + "learning_rate": 1.8960666095084824e-06, + "loss": 0.9566, + "step": 53220 + }, + { + "epoch": 0.8028899815982382, + "grad_norm": 0.6802508552370318, + "learning_rate": 1.8932632174096711e-06, + "loss": 0.9343, + "step": 53230 + }, + { + "epoch": 0.8030408157108812, + "grad_norm": 0.658263853939891, + "learning_rate": 1.8904616825799483e-06, + "loss": 0.9472, + "step": 53240 + }, + { + "epoch": 0.8031916498235241, + "grad_norm": 0.6750647752579954, + "learning_rate": 1.8876620056611461e-06, + "loss": 0.9438, + "step": 53250 + }, + { + "epoch": 0.803342483936167, + "grad_norm": 0.6778941969283852, + "learning_rate": 1.8848641872946817e-06, + "loss": 0.9608, + "step": 53260 + }, + { + "epoch": 0.80349331804881, + "grad_norm": 0.6836586683253463, + "learning_rate": 1.8820682281215375e-06, + "loss": 0.9507, + "step": 53270 + }, + { + "epoch": 0.8036441521614528, + "grad_norm": 0.6863292346987975, + "learning_rate": 1.8792741287822702e-06, + "loss": 0.9694, + "step": 53280 + }, + { + "epoch": 0.8037949862740957, + "grad_norm": 0.6528288522998088, + "learning_rate": 1.87648188991702e-06, + "loss": 0.9579, + "step": 53290 + }, + { + "epoch": 0.8039458203867387, + "grad_norm": 0.6600441448648099, + "learning_rate": 1.8736915121654898e-06, + "loss": 0.9559, + "step": 53300 + }, + { + "epoch": 0.8040966544993816, + "grad_norm": 0.6645340268572139, + "learning_rate": 1.87090299616696e-06, + "loss": 0.9418, + "step": 53310 + }, + { + "epoch": 0.8042474886120244, + "grad_norm": 0.6866450647488552, + "learning_rate": 1.8681163425602877e-06, + "loss": 0.9453, + "step": 53320 + }, + { + "epoch": 0.8043983227246674, + "grad_norm": 0.6659908332228179, + "learning_rate": 1.8653315519839011e-06, + "loss": 0.9522, + "step": 53330 + }, + { + "epoch": 0.8045491568373103, + "grad_norm": 0.6628892617692512, + "learning_rate": 1.862548625075795e-06, + "loss": 0.9425, + "step": 53340 + }, + { + "epoch": 0.8046999909499533, + "grad_norm": 0.6809506221877056, + "learning_rate": 1.8597675624735512e-06, + "loss": 0.9452, + "step": 53350 + }, + { + "epoch": 0.8048508250625962, + "grad_norm": 0.6788402085173285, + "learning_rate": 1.8569883648143105e-06, + "loss": 0.9401, + "step": 53360 + }, + { + "epoch": 0.805001659175239, + "grad_norm": 0.6913452635378959, + "learning_rate": 1.8542110327347985e-06, + "loss": 0.9552, + "step": 53370 + }, + { + "epoch": 0.805152493287882, + "grad_norm": 0.6668458556385057, + "learning_rate": 1.8514355668713003e-06, + "loss": 0.9433, + "step": 53380 + }, + { + "epoch": 0.8053033274005249, + "grad_norm": 0.6791804464873397, + "learning_rate": 1.848661967859684e-06, + "loss": 0.9573, + "step": 53390 + }, + { + "epoch": 0.8054541615131678, + "grad_norm": 0.6686453542501888, + "learning_rate": 1.8458902363353892e-06, + "loss": 0.9356, + "step": 53400 + }, + { + "epoch": 0.8056049956258108, + "grad_norm": 0.693809122443268, + "learning_rate": 1.8431203729334224e-06, + "loss": 0.9309, + "step": 53410 + }, + { + "epoch": 0.8057558297384536, + "grad_norm": 0.6773542197627661, + "learning_rate": 1.8403523782883624e-06, + "loss": 0.9334, + "step": 53420 + }, + { + "epoch": 0.8059066638510965, + "grad_norm": 0.6546799809986776, + "learning_rate": 1.8375862530343669e-06, + "loss": 0.9346, + "step": 53430 + }, + { + "epoch": 0.8060574979637395, + "grad_norm": 0.6564574217944639, + "learning_rate": 1.8348219978051574e-06, + "loss": 0.952, + "step": 53440 + }, + { + "epoch": 0.8062083320763824, + "grad_norm": 0.6727035869380822, + "learning_rate": 1.8320596132340286e-06, + "loss": 0.9539, + "step": 53450 + }, + { + "epoch": 0.8063591661890253, + "grad_norm": 0.6942003443264252, + "learning_rate": 1.829299099953853e-06, + "loss": 0.9646, + "step": 53460 + }, + { + "epoch": 0.8065100003016682, + "grad_norm": 0.6428121443903773, + "learning_rate": 1.826540458597066e-06, + "loss": 0.9429, + "step": 53470 + }, + { + "epoch": 0.8066608344143111, + "grad_norm": 0.650292364400498, + "learning_rate": 1.8237836897956774e-06, + "loss": 0.9479, + "step": 53480 + }, + { + "epoch": 0.8068116685269541, + "grad_norm": 0.7027170479073547, + "learning_rate": 1.821028794181271e-06, + "loss": 0.981, + "step": 53490 + }, + { + "epoch": 0.806962502639597, + "grad_norm": 0.6583102835363303, + "learning_rate": 1.8182757723849964e-06, + "loss": 0.9574, + "step": 53500 + }, + { + "epoch": 0.8071133367522398, + "grad_norm": 0.6554030586701955, + "learning_rate": 1.8155246250375747e-06, + "loss": 0.9566, + "step": 53510 + }, + { + "epoch": 0.8072641708648828, + "grad_norm": 0.6620488643007584, + "learning_rate": 1.8127753527693047e-06, + "loss": 0.9283, + "step": 53520 + }, + { + "epoch": 0.8074150049775257, + "grad_norm": 0.6776868171455191, + "learning_rate": 1.8100279562100432e-06, + "loss": 0.9438, + "step": 53530 + }, + { + "epoch": 0.8075658390901687, + "grad_norm": 0.6652370343259065, + "learning_rate": 1.8072824359892305e-06, + "loss": 0.9535, + "step": 53540 + }, + { + "epoch": 0.8077166732028116, + "grad_norm": 0.6744403396576996, + "learning_rate": 1.804538792735865e-06, + "loss": 0.9506, + "step": 53550 + }, + { + "epoch": 0.8078675073154544, + "grad_norm": 0.6453085015251434, + "learning_rate": 1.8017970270785222e-06, + "loss": 0.9441, + "step": 53560 + }, + { + "epoch": 0.8080183414280974, + "grad_norm": 0.6609007638748755, + "learning_rate": 1.7990571396453505e-06, + "loss": 0.9502, + "step": 53570 + }, + { + "epoch": 0.8081691755407403, + "grad_norm": 0.6812086189350113, + "learning_rate": 1.7963191310640593e-06, + "loss": 0.9337, + "step": 53580 + }, + { + "epoch": 0.8083200096533832, + "grad_norm": 0.6818929082512669, + "learning_rate": 1.7935830019619306e-06, + "loss": 0.9487, + "step": 53590 + }, + { + "epoch": 0.8084708437660261, + "grad_norm": 0.6704421873079418, + "learning_rate": 1.7908487529658203e-06, + "loss": 0.9313, + "step": 53600 + }, + { + "epoch": 0.808621677878669, + "grad_norm": 0.7319498406301566, + "learning_rate": 1.7881163847021478e-06, + "loss": 0.9556, + "step": 53610 + }, + { + "epoch": 0.808772511991312, + "grad_norm": 0.6994224980284309, + "learning_rate": 1.785385897796903e-06, + "loss": 0.949, + "step": 53620 + }, + { + "epoch": 0.8089233461039549, + "grad_norm": 0.696854734675285, + "learning_rate": 1.782657292875649e-06, + "loss": 0.9512, + "step": 53630 + }, + { + "epoch": 0.8090741802165978, + "grad_norm": 0.6846273792864644, + "learning_rate": 1.7799305705635128e-06, + "loss": 0.9541, + "step": 53640 + }, + { + "epoch": 0.8092250143292407, + "grad_norm": 0.6563223996952886, + "learning_rate": 1.7772057314851898e-06, + "loss": 0.9652, + "step": 53650 + }, + { + "epoch": 0.8093758484418836, + "grad_norm": 0.6557963702741668, + "learning_rate": 1.7744827762649497e-06, + "loss": 0.9596, + "step": 53660 + }, + { + "epoch": 0.8095266825545265, + "grad_norm": 0.6514039293640157, + "learning_rate": 1.7717617055266222e-06, + "loss": 0.9576, + "step": 53670 + }, + { + "epoch": 0.8096775166671695, + "grad_norm": 0.6717299602810889, + "learning_rate": 1.7690425198936146e-06, + "loss": 0.9482, + "step": 53680 + }, + { + "epoch": 0.8098283507798124, + "grad_norm": 0.6764445735373907, + "learning_rate": 1.7663252199888958e-06, + "loss": 0.9378, + "step": 53690 + }, + { + "epoch": 0.8099791848924552, + "grad_norm": 0.6492008653214874, + "learning_rate": 1.7636098064350026e-06, + "loss": 0.9695, + "step": 53700 + }, + { + "epoch": 0.8101300190050982, + "grad_norm": 0.6699912495249832, + "learning_rate": 1.7608962798540418e-06, + "loss": 0.9418, + "step": 53710 + }, + { + "epoch": 0.8102808531177411, + "grad_norm": 0.6745368590445222, + "learning_rate": 1.7581846408676929e-06, + "loss": 0.9667, + "step": 53720 + }, + { + "epoch": 0.8104316872303841, + "grad_norm": 0.6290770364511046, + "learning_rate": 1.7554748900971896e-06, + "loss": 0.9553, + "step": 53730 + }, + { + "epoch": 0.8105825213430269, + "grad_norm": 0.6431877748716892, + "learning_rate": 1.752767028163348e-06, + "loss": 0.948, + "step": 53740 + }, + { + "epoch": 0.8107333554556698, + "grad_norm": 0.6991327090999463, + "learning_rate": 1.7500610556865416e-06, + "loss": 0.9472, + "step": 53750 + }, + { + "epoch": 0.8108841895683128, + "grad_norm": 0.6581917648589503, + "learning_rate": 1.7473569732867125e-06, + "loss": 0.9569, + "step": 53760 + }, + { + "epoch": 0.8110350236809557, + "grad_norm": 0.646323097736434, + "learning_rate": 1.7446547815833736e-06, + "loss": 0.9553, + "step": 53770 + }, + { + "epoch": 0.8111858577935986, + "grad_norm": 0.6811409333350108, + "learning_rate": 1.7419544811955991e-06, + "loss": 0.9661, + "step": 53780 + }, + { + "epoch": 0.8113366919062415, + "grad_norm": 0.6820595541062082, + "learning_rate": 1.7392560727420383e-06, + "loss": 0.9551, + "step": 53790 + }, + { + "epoch": 0.8114875260188844, + "grad_norm": 0.6491846152830167, + "learning_rate": 1.7365595568408988e-06, + "loss": 0.9517, + "step": 53800 + }, + { + "epoch": 0.8116383601315273, + "grad_norm": 0.7693836854582122, + "learning_rate": 1.7338649341099545e-06, + "loss": 0.9551, + "step": 53810 + }, + { + "epoch": 0.8117891942441703, + "grad_norm": 0.6501411123138665, + "learning_rate": 1.7311722051665537e-06, + "loss": 0.9477, + "step": 53820 + }, + { + "epoch": 0.8119400283568132, + "grad_norm": 0.7010168005602183, + "learning_rate": 1.7284813706276049e-06, + "loss": 0.9469, + "step": 53830 + }, + { + "epoch": 0.812090862469456, + "grad_norm": 0.7159868876902203, + "learning_rate": 1.7257924311095786e-06, + "loss": 0.9541, + "step": 53840 + }, + { + "epoch": 0.812241696582099, + "grad_norm": 0.6210190038009508, + "learning_rate": 1.7231053872285242e-06, + "loss": 0.9294, + "step": 53850 + }, + { + "epoch": 0.8123925306947419, + "grad_norm": 0.6715734024513441, + "learning_rate": 1.7204202396000402e-06, + "loss": 0.9504, + "step": 53860 + }, + { + "epoch": 0.8125433648073849, + "grad_norm": 0.6956514749707512, + "learning_rate": 1.717736988839306e-06, + "loss": 0.9516, + "step": 53870 + }, + { + "epoch": 0.8126941989200277, + "grad_norm": 0.6380948280074276, + "learning_rate": 1.7150556355610547e-06, + "loss": 0.9291, + "step": 53880 + }, + { + "epoch": 0.8128450330326706, + "grad_norm": 0.6827709285712726, + "learning_rate": 1.7123761803795945e-06, + "loss": 0.9442, + "step": 53890 + }, + { + "epoch": 0.8129958671453136, + "grad_norm": 0.6643929958867294, + "learning_rate": 1.7096986239087877e-06, + "loss": 0.9609, + "step": 53900 + }, + { + "epoch": 0.8131467012579565, + "grad_norm": 0.6760395581955233, + "learning_rate": 1.707022966762073e-06, + "loss": 0.9604, + "step": 53910 + }, + { + "epoch": 0.8132975353705995, + "grad_norm": 0.6816828334500626, + "learning_rate": 1.704349209552445e-06, + "loss": 0.9429, + "step": 53920 + }, + { + "epoch": 0.8134483694832423, + "grad_norm": 0.6403099513519934, + "learning_rate": 1.70167735289247e-06, + "loss": 0.9473, + "step": 53930 + }, + { + "epoch": 0.8135992035958852, + "grad_norm": 0.6263055156480569, + "learning_rate": 1.6990073973942733e-06, + "loss": 0.9667, + "step": 53940 + }, + { + "epoch": 0.8137500377085282, + "grad_norm": 0.6570804381187713, + "learning_rate": 1.6963393436695453e-06, + "loss": 0.9508, + "step": 53950 + }, + { + "epoch": 0.8139008718211711, + "grad_norm": 0.673191842915567, + "learning_rate": 1.6936731923295468e-06, + "loss": 0.9414, + "step": 53960 + }, + { + "epoch": 0.814051705933814, + "grad_norm": 0.6753524744689987, + "learning_rate": 1.6910089439850952e-06, + "loss": 0.9614, + "step": 53970 + }, + { + "epoch": 0.8142025400464569, + "grad_norm": 0.6721591703264398, + "learning_rate": 1.6883465992465731e-06, + "loss": 0.96, + "step": 53980 + }, + { + "epoch": 0.8143533741590998, + "grad_norm": 0.6491550194469354, + "learning_rate": 1.6856861587239336e-06, + "loss": 0.9537, + "step": 53990 + }, + { + "epoch": 0.8145042082717427, + "grad_norm": 0.66338535074407, + "learning_rate": 1.6830276230266862e-06, + "loss": 0.9634, + "step": 54000 + }, + { + "epoch": 0.8146550423843857, + "grad_norm": 0.684511347916283, + "learning_rate": 1.6803709927639046e-06, + "loss": 0.9546, + "step": 54010 + }, + { + "epoch": 0.8148058764970285, + "grad_norm": 0.6769452115978284, + "learning_rate": 1.6777162685442316e-06, + "loss": 0.9506, + "step": 54020 + }, + { + "epoch": 0.8149567106096715, + "grad_norm": 0.674102504411776, + "learning_rate": 1.6750634509758667e-06, + "loss": 0.9491, + "step": 54030 + }, + { + "epoch": 0.8151075447223144, + "grad_norm": 0.636896396444268, + "learning_rate": 1.6724125406665781e-06, + "loss": 0.9629, + "step": 54040 + }, + { + "epoch": 0.8152583788349573, + "grad_norm": 0.6852620935188232, + "learning_rate": 1.669763538223692e-06, + "loss": 0.9435, + "step": 54050 + }, + { + "epoch": 0.8154092129476003, + "grad_norm": 0.6482703352994612, + "learning_rate": 1.6671164442541e-06, + "loss": 0.9514, + "step": 54060 + }, + { + "epoch": 0.8155600470602431, + "grad_norm": 0.6713457638968986, + "learning_rate": 1.664471259364261e-06, + "loss": 0.9513, + "step": 54070 + }, + { + "epoch": 0.815710881172886, + "grad_norm": 0.7107307150117539, + "learning_rate": 1.6618279841601892e-06, + "loss": 0.9555, + "step": 54080 + }, + { + "epoch": 0.815861715285529, + "grad_norm": 0.6865288304548661, + "learning_rate": 1.6591866192474605e-06, + "loss": 0.9536, + "step": 54090 + }, + { + "epoch": 0.8160125493981719, + "grad_norm": 0.6994935213769806, + "learning_rate": 1.6565471652312216e-06, + "loss": 0.9376, + "step": 54100 + }, + { + "epoch": 0.8161633835108149, + "grad_norm": 0.6720604497035269, + "learning_rate": 1.6539096227161756e-06, + "loss": 0.9702, + "step": 54110 + }, + { + "epoch": 0.8163142176234577, + "grad_norm": 0.6654328973103356, + "learning_rate": 1.6512739923065845e-06, + "loss": 0.9375, + "step": 54120 + }, + { + "epoch": 0.8164650517361006, + "grad_norm": 0.6829451696303911, + "learning_rate": 1.6486402746062824e-06, + "loss": 0.9568, + "step": 54130 + }, + { + "epoch": 0.8166158858487436, + "grad_norm": 0.7078679267803377, + "learning_rate": 1.6460084702186563e-06, + "loss": 0.9511, + "step": 54140 + }, + { + "epoch": 0.8167667199613865, + "grad_norm": 0.7014494750092091, + "learning_rate": 1.643378579746655e-06, + "loss": 0.956, + "step": 54150 + }, + { + "epoch": 0.8169175540740293, + "grad_norm": 0.6805437598779749, + "learning_rate": 1.6407506037927967e-06, + "loss": 0.9411, + "step": 54160 + }, + { + "epoch": 0.8170683881866723, + "grad_norm": 0.673784277689434, + "learning_rate": 1.638124542959153e-06, + "loss": 0.9515, + "step": 54170 + }, + { + "epoch": 0.8172192222993152, + "grad_norm": 0.6919328955657116, + "learning_rate": 1.635500397847357e-06, + "loss": 0.9533, + "step": 54180 + }, + { + "epoch": 0.8173700564119581, + "grad_norm": 0.6663048453399923, + "learning_rate": 1.6328781690586115e-06, + "loss": 0.9296, + "step": 54190 + }, + { + "epoch": 0.8175208905246011, + "grad_norm": 0.6468806105256277, + "learning_rate": 1.6302578571936677e-06, + "loss": 0.9419, + "step": 54200 + }, + { + "epoch": 0.8176717246372439, + "grad_norm": 0.6890863497497398, + "learning_rate": 1.6276394628528469e-06, + "loss": 0.9505, + "step": 54210 + }, + { + "epoch": 0.8178225587498869, + "grad_norm": 0.6710788218706547, + "learning_rate": 1.6250229866360322e-06, + "loss": 0.9528, + "step": 54220 + }, + { + "epoch": 0.8179733928625298, + "grad_norm": 0.6501753574486118, + "learning_rate": 1.6224084291426568e-06, + "loss": 0.9364, + "step": 54230 + }, + { + "epoch": 0.8181242269751727, + "grad_norm": 0.7140965097214572, + "learning_rate": 1.6197957909717254e-06, + "loss": 0.9542, + "step": 54240 + }, + { + "epoch": 0.8182750610878157, + "grad_norm": 0.7523753532347495, + "learning_rate": 1.6171850727217985e-06, + "loss": 0.9554, + "step": 54250 + }, + { + "epoch": 0.8184258952004585, + "grad_norm": 0.6776329047448306, + "learning_rate": 1.6145762749909922e-06, + "loss": 0.9572, + "step": 54260 + }, + { + "epoch": 0.8185767293131014, + "grad_norm": 0.7098139680884369, + "learning_rate": 1.6119693983769912e-06, + "loss": 0.9375, + "step": 54270 + }, + { + "epoch": 0.8187275634257444, + "grad_norm": 0.6272918359499328, + "learning_rate": 1.6093644434770361e-06, + "loss": 0.9519, + "step": 54280 + }, + { + "epoch": 0.8188783975383873, + "grad_norm": 0.7179666513626427, + "learning_rate": 1.6067614108879237e-06, + "loss": 0.9306, + "step": 54290 + }, + { + "epoch": 0.8190292316510301, + "grad_norm": 0.6451647536711229, + "learning_rate": 1.604160301206018e-06, + "loss": 0.9398, + "step": 54300 + }, + { + "epoch": 0.8191800657636731, + "grad_norm": 0.7138826151802184, + "learning_rate": 1.6015611150272358e-06, + "loss": 0.9352, + "step": 54310 + }, + { + "epoch": 0.819330899876316, + "grad_norm": 0.6709412445830707, + "learning_rate": 1.5989638529470542e-06, + "loss": 0.9523, + "step": 54320 + }, + { + "epoch": 0.819481733988959, + "grad_norm": 0.6756844772408223, + "learning_rate": 1.5963685155605157e-06, + "loss": 0.9341, + "step": 54330 + }, + { + "epoch": 0.8196325681016019, + "grad_norm": 0.6731222656299557, + "learning_rate": 1.5937751034622107e-06, + "loss": 0.9521, + "step": 54340 + }, + { + "epoch": 0.8197834022142447, + "grad_norm": 0.6447087401930499, + "learning_rate": 1.5911836172463024e-06, + "loss": 0.9417, + "step": 54350 + }, + { + "epoch": 0.8199342363268877, + "grad_norm": 0.6413555840597212, + "learning_rate": 1.5885940575064985e-06, + "loss": 0.9483, + "step": 54360 + }, + { + "epoch": 0.8200850704395306, + "grad_norm": 0.649434077489733, + "learning_rate": 1.5860064248360763e-06, + "loss": 0.973, + "step": 54370 + }, + { + "epoch": 0.8202359045521735, + "grad_norm": 0.6756717253938501, + "learning_rate": 1.583420719827865e-06, + "loss": 0.9572, + "step": 54380 + }, + { + "epoch": 0.8203867386648165, + "grad_norm": 0.6384342024122626, + "learning_rate": 1.5808369430742575e-06, + "loss": 0.9355, + "step": 54390 + }, + { + "epoch": 0.8205375727774593, + "grad_norm": 0.6960018576247395, + "learning_rate": 1.5782550951671993e-06, + "loss": 0.9448, + "step": 54400 + }, + { + "epoch": 0.8206884068901023, + "grad_norm": 0.7160628872144178, + "learning_rate": 1.5756751766981993e-06, + "loss": 0.9534, + "step": 54410 + }, + { + "epoch": 0.8208392410027452, + "grad_norm": 0.674406657831021, + "learning_rate": 1.57309718825832e-06, + "loss": 0.9599, + "step": 54420 + }, + { + "epoch": 0.8209900751153881, + "grad_norm": 0.7527602568207044, + "learning_rate": 1.5705211304381829e-06, + "loss": 0.9625, + "step": 54430 + }, + { + "epoch": 0.821140909228031, + "grad_norm": 0.6770755844906646, + "learning_rate": 1.5679470038279698e-06, + "loss": 0.9621, + "step": 54440 + }, + { + "epoch": 0.8212917433406739, + "grad_norm": 0.6655272401022875, + "learning_rate": 1.565374809017416e-06, + "loss": 0.9343, + "step": 54450 + }, + { + "epoch": 0.8214425774533168, + "grad_norm": 0.6445139998905759, + "learning_rate": 1.5628045465958185e-06, + "loss": 0.9439, + "step": 54460 + }, + { + "epoch": 0.8215934115659598, + "grad_norm": 0.6980870745780323, + "learning_rate": 1.5602362171520292e-06, + "loss": 0.9493, + "step": 54470 + }, + { + "epoch": 0.8217442456786027, + "grad_norm": 0.6597939788188798, + "learning_rate": 1.5576698212744546e-06, + "loss": 0.9539, + "step": 54480 + }, + { + "epoch": 0.8218950797912455, + "grad_norm": 0.6715098482853843, + "learning_rate": 1.5551053595510646e-06, + "loss": 0.9604, + "step": 54490 + }, + { + "epoch": 0.8220459139038885, + "grad_norm": 0.6464916995250302, + "learning_rate": 1.5525428325693803e-06, + "loss": 0.9555, + "step": 54500 + }, + { + "epoch": 0.8221967480165314, + "grad_norm": 0.6778417738866348, + "learning_rate": 1.54998224091648e-06, + "loss": 0.9663, + "step": 54510 + }, + { + "epoch": 0.8223475821291744, + "grad_norm": 0.6686521295532256, + "learning_rate": 1.5474235851790042e-06, + "loss": 0.9399, + "step": 54520 + }, + { + "epoch": 0.8224984162418173, + "grad_norm": 0.6780084829021074, + "learning_rate": 1.544866865943141e-06, + "loss": 0.9417, + "step": 54530 + }, + { + "epoch": 0.8226492503544601, + "grad_norm": 0.669202633674467, + "learning_rate": 1.542312083794646e-06, + "loss": 0.9523, + "step": 54540 + }, + { + "epoch": 0.8228000844671031, + "grad_norm": 0.6875572728419802, + "learning_rate": 1.5397592393188176e-06, + "loss": 0.9616, + "step": 54550 + }, + { + "epoch": 0.822950918579746, + "grad_norm": 0.660609226414831, + "learning_rate": 1.5372083331005239e-06, + "loss": 0.9458, + "step": 54560 + }, + { + "epoch": 0.8231017526923889, + "grad_norm": 0.6808868695750466, + "learning_rate": 1.5346593657241771e-06, + "loss": 0.9572, + "step": 54570 + }, + { + "epoch": 0.8232525868050318, + "grad_norm": 0.6772715747600175, + "learning_rate": 1.5321123377737556e-06, + "loss": 0.9464, + "step": 54580 + }, + { + "epoch": 0.8234034209176747, + "grad_norm": 0.6766437818426271, + "learning_rate": 1.529567249832784e-06, + "loss": 0.9543, + "step": 54590 + }, + { + "epoch": 0.8235542550303177, + "grad_norm": 0.65796758202863, + "learning_rate": 1.52702410248435e-06, + "loss": 0.9414, + "step": 54600 + }, + { + "epoch": 0.8237050891429606, + "grad_norm": 0.6605702390649006, + "learning_rate": 1.5244828963110936e-06, + "loss": 0.9546, + "step": 54610 + }, + { + "epoch": 0.8238559232556035, + "grad_norm": 0.650890593286069, + "learning_rate": 1.5219436318952064e-06, + "loss": 0.9623, + "step": 54620 + }, + { + "epoch": 0.8240067573682464, + "grad_norm": 0.7098569577937364, + "learning_rate": 1.519406309818442e-06, + "loss": 0.9701, + "step": 54630 + }, + { + "epoch": 0.8241575914808893, + "grad_norm": 0.6779186871265488, + "learning_rate": 1.5168709306621066e-06, + "loss": 0.9539, + "step": 54640 + }, + { + "epoch": 0.8243084255935322, + "grad_norm": 0.6574655235456568, + "learning_rate": 1.5143374950070556e-06, + "loss": 0.9415, + "step": 54650 + }, + { + "epoch": 0.8244592597061752, + "grad_norm": 0.6616755366329338, + "learning_rate": 1.51180600343371e-06, + "loss": 0.9545, + "step": 54660 + }, + { + "epoch": 0.8246100938188181, + "grad_norm": 0.6809941759419906, + "learning_rate": 1.5092764565220364e-06, + "loss": 0.9515, + "step": 54670 + }, + { + "epoch": 0.8247609279314609, + "grad_norm": 1.7372521362285038, + "learning_rate": 1.5067488548515575e-06, + "loss": 0.9505, + "step": 54680 + }, + { + "epoch": 0.8249117620441039, + "grad_norm": 0.6725456895426702, + "learning_rate": 1.5042231990013545e-06, + "loss": 0.9595, + "step": 54690 + }, + { + "epoch": 0.8250625961567468, + "grad_norm": 0.7098409355205834, + "learning_rate": 1.501699489550058e-06, + "loss": 0.9428, + "step": 54700 + }, + { + "epoch": 0.8252134302693898, + "grad_norm": 0.6848027427828666, + "learning_rate": 1.4991777270758545e-06, + "loss": 0.9601, + "step": 54710 + }, + { + "epoch": 0.8253642643820326, + "grad_norm": 0.6844274641375256, + "learning_rate": 1.4966579121564883e-06, + "loss": 0.9529, + "step": 54720 + }, + { + "epoch": 0.8255150984946755, + "grad_norm": 0.7344183427124696, + "learning_rate": 1.4941400453692502e-06, + "loss": 0.9553, + "step": 54730 + }, + { + "epoch": 0.8256659326073185, + "grad_norm": 0.6394167093950267, + "learning_rate": 1.4916241272909914e-06, + "loss": 0.9329, + "step": 54740 + }, + { + "epoch": 0.8258167667199614, + "grad_norm": 0.664540158970831, + "learning_rate": 1.4891101584981116e-06, + "loss": 0.9714, + "step": 54750 + }, + { + "epoch": 0.8259676008326043, + "grad_norm": 0.6723728665031151, + "learning_rate": 1.486598139566563e-06, + "loss": 0.9458, + "step": 54760 + }, + { + "epoch": 0.8261184349452472, + "grad_norm": 0.6453429200771961, + "learning_rate": 1.4840880710718607e-06, + "loss": 0.9292, + "step": 54770 + }, + { + "epoch": 0.8262692690578901, + "grad_norm": 0.6658276497676376, + "learning_rate": 1.4815799535890618e-06, + "loss": 0.9398, + "step": 54780 + }, + { + "epoch": 0.826420103170533, + "grad_norm": 0.6459770404991589, + "learning_rate": 1.4790737876927796e-06, + "loss": 0.9405, + "step": 54790 + }, + { + "epoch": 0.826570937283176, + "grad_norm": 0.6699320848735789, + "learning_rate": 1.4765695739571862e-06, + "loss": 0.9747, + "step": 54800 + }, + { + "epoch": 0.8267217713958189, + "grad_norm": 0.6961600433894183, + "learning_rate": 1.474067312955999e-06, + "loss": 0.9633, + "step": 54810 + }, + { + "epoch": 0.8268726055084618, + "grad_norm": 0.6346220747074836, + "learning_rate": 1.4715670052624886e-06, + "loss": 0.9525, + "step": 54820 + }, + { + "epoch": 0.8270234396211047, + "grad_norm": 0.7181449333528869, + "learning_rate": 1.4690686514494856e-06, + "loss": 0.9546, + "step": 54830 + }, + { + "epoch": 0.8271742737337476, + "grad_norm": 0.6881842080442349, + "learning_rate": 1.4665722520893643e-06, + "loss": 0.947, + "step": 54840 + }, + { + "epoch": 0.8273251078463906, + "grad_norm": 0.6394202963033002, + "learning_rate": 1.464077807754053e-06, + "loss": 0.9471, + "step": 54850 + }, + { + "epoch": 0.8274759419590334, + "grad_norm": 0.650250756719387, + "learning_rate": 1.4615853190150386e-06, + "loss": 0.9602, + "step": 54860 + }, + { + "epoch": 0.8276267760716763, + "grad_norm": 0.6672346094633077, + "learning_rate": 1.4590947864433502e-06, + "loss": 0.9361, + "step": 54870 + }, + { + "epoch": 0.8277776101843193, + "grad_norm": 0.6891241373173561, + "learning_rate": 1.4566062106095768e-06, + "loss": 0.9406, + "step": 54880 + }, + { + "epoch": 0.8279284442969622, + "grad_norm": 0.6432410373571201, + "learning_rate": 1.4541195920838557e-06, + "loss": 0.9394, + "step": 54890 + }, + { + "epoch": 0.8280792784096052, + "grad_norm": 0.6765392974667064, + "learning_rate": 1.4516349314358747e-06, + "loss": 0.9646, + "step": 54900 + }, + { + "epoch": 0.828230112522248, + "grad_norm": 0.6456248944855416, + "learning_rate": 1.449152229234876e-06, + "loss": 0.9636, + "step": 54910 + }, + { + "epoch": 0.8283809466348909, + "grad_norm": 0.6800992124532836, + "learning_rate": 1.4466714860496511e-06, + "loss": 0.9526, + "step": 54920 + }, + { + "epoch": 0.8285317807475339, + "grad_norm": 0.6380556366560497, + "learning_rate": 1.4441927024485402e-06, + "loss": 0.9592, + "step": 54930 + }, + { + "epoch": 0.8286826148601768, + "grad_norm": 0.6570978405870564, + "learning_rate": 1.4417158789994412e-06, + "loss": 0.964, + "step": 54940 + }, + { + "epoch": 0.8288334489728197, + "grad_norm": 0.7437199235882346, + "learning_rate": 1.4392410162697989e-06, + "loss": 0.934, + "step": 54950 + }, + { + "epoch": 0.8289842830854626, + "grad_norm": 0.6418584265447681, + "learning_rate": 1.4367681148266043e-06, + "loss": 0.9621, + "step": 54960 + }, + { + "epoch": 0.8291351171981055, + "grad_norm": 0.701237423226791, + "learning_rate": 1.434297175236411e-06, + "loss": 0.9588, + "step": 54970 + }, + { + "epoch": 0.8292859513107484, + "grad_norm": 0.6269547705410646, + "learning_rate": 1.4318281980653115e-06, + "loss": 0.9279, + "step": 54980 + }, + { + "epoch": 0.8294367854233914, + "grad_norm": 0.6701471969028713, + "learning_rate": 1.4293611838789523e-06, + "loss": 0.9498, + "step": 54990 + }, + { + "epoch": 0.8295876195360342, + "grad_norm": 0.6309707218409671, + "learning_rate": 1.4268961332425357e-06, + "loss": 0.9364, + "step": 55000 + }, + { + "epoch": 0.8297384536486772, + "grad_norm": 0.6793827921379287, + "learning_rate": 1.4244330467208046e-06, + "loss": 0.9572, + "step": 55010 + }, + { + "epoch": 0.8298892877613201, + "grad_norm": 0.6840610148340528, + "learning_rate": 1.4219719248780618e-06, + "loss": 0.9567, + "step": 55020 + }, + { + "epoch": 0.830040121873963, + "grad_norm": 0.7475154413272195, + "learning_rate": 1.4195127682781496e-06, + "loss": 0.9397, + "step": 55030 + }, + { + "epoch": 0.830190955986606, + "grad_norm": 0.693512929204307, + "learning_rate": 1.4170555774844718e-06, + "loss": 0.9356, + "step": 55040 + }, + { + "epoch": 0.8303417900992488, + "grad_norm": 0.7029721115439973, + "learning_rate": 1.4146003530599694e-06, + "loss": 0.9929, + "step": 55050 + }, + { + "epoch": 0.8304926242118917, + "grad_norm": 0.7037313094829627, + "learning_rate": 1.4121470955671435e-06, + "loss": 0.948, + "step": 55060 + }, + { + "epoch": 0.8306434583245347, + "grad_norm": 0.6754826930593721, + "learning_rate": 1.4096958055680366e-06, + "loss": 0.9701, + "step": 55070 + }, + { + "epoch": 0.8307942924371776, + "grad_norm": 0.6672530365786609, + "learning_rate": 1.4072464836242483e-06, + "loss": 0.9587, + "step": 55080 + }, + { + "epoch": 0.8309451265498206, + "grad_norm": 0.6659567641957275, + "learning_rate": 1.4047991302969212e-06, + "loss": 0.9655, + "step": 55090 + }, + { + "epoch": 0.8310959606624634, + "grad_norm": 0.6932549313334706, + "learning_rate": 1.4023537461467451e-06, + "loss": 0.9471, + "step": 55100 + }, + { + "epoch": 0.8312467947751063, + "grad_norm": 0.6369507853365073, + "learning_rate": 1.399910331733968e-06, + "loss": 0.9458, + "step": 55110 + }, + { + "epoch": 0.8313976288877493, + "grad_norm": 0.6603938105240488, + "learning_rate": 1.3974688876183784e-06, + "loss": 0.9546, + "step": 55120 + }, + { + "epoch": 0.8315484630003922, + "grad_norm": 0.6724083547211368, + "learning_rate": 1.3950294143593124e-06, + "loss": 0.9513, + "step": 55130 + }, + { + "epoch": 0.831699297113035, + "grad_norm": 0.6637639763608593, + "learning_rate": 1.392591912515664e-06, + "loss": 0.9532, + "step": 55140 + }, + { + "epoch": 0.831850131225678, + "grad_norm": 0.6689306047328271, + "learning_rate": 1.3901563826458641e-06, + "loss": 0.9586, + "step": 55150 + }, + { + "epoch": 0.8320009653383209, + "grad_norm": 0.707300386283317, + "learning_rate": 1.3877228253079034e-06, + "loss": 0.9494, + "step": 55160 + }, + { + "epoch": 0.8321517994509638, + "grad_norm": 0.6814682063907438, + "learning_rate": 1.3852912410593111e-06, + "loss": 0.9595, + "step": 55170 + }, + { + "epoch": 0.8323026335636068, + "grad_norm": 0.6468283465033188, + "learning_rate": 1.3828616304571651e-06, + "loss": 0.9428, + "step": 55180 + }, + { + "epoch": 0.8324534676762496, + "grad_norm": 0.6312994346919797, + "learning_rate": 1.380433994058099e-06, + "loss": 0.9446, + "step": 55190 + }, + { + "epoch": 0.8326043017888926, + "grad_norm": 0.6776412920328884, + "learning_rate": 1.3780083324182847e-06, + "loss": 0.9424, + "step": 55200 + }, + { + "epoch": 0.8327551359015355, + "grad_norm": 0.6388237155871437, + "learning_rate": 1.3755846460934498e-06, + "loss": 0.9376, + "step": 55210 + }, + { + "epoch": 0.8329059700141784, + "grad_norm": 0.6502327047051536, + "learning_rate": 1.373162935638862e-06, + "loss": 0.9407, + "step": 55220 + }, + { + "epoch": 0.8330568041268214, + "grad_norm": 0.6384566768991667, + "learning_rate": 1.3707432016093447e-06, + "loss": 0.9422, + "step": 55230 + }, + { + "epoch": 0.8332076382394642, + "grad_norm": 0.6424676740019967, + "learning_rate": 1.3683254445592576e-06, + "loss": 0.9405, + "step": 55240 + }, + { + "epoch": 0.8333584723521071, + "grad_norm": 0.634292880296123, + "learning_rate": 1.3659096650425184e-06, + "loss": 0.9362, + "step": 55250 + }, + { + "epoch": 0.8335093064647501, + "grad_norm": 0.6970769193208873, + "learning_rate": 1.3634958636125828e-06, + "loss": 0.9383, + "step": 55260 + }, + { + "epoch": 0.833660140577393, + "grad_norm": 0.6537636538253073, + "learning_rate": 1.3610840408224623e-06, + "loss": 0.958, + "step": 55270 + }, + { + "epoch": 0.8338109746900358, + "grad_norm": 0.6874874291113275, + "learning_rate": 1.3586741972247076e-06, + "loss": 0.9385, + "step": 55280 + }, + { + "epoch": 0.8339618088026788, + "grad_norm": 0.6796739445498428, + "learning_rate": 1.3562663333714155e-06, + "loss": 0.9618, + "step": 55290 + }, + { + "epoch": 0.8341126429153217, + "grad_norm": 0.6448016565746739, + "learning_rate": 1.3538604498142372e-06, + "loss": 0.9458, + "step": 55300 + }, + { + "epoch": 0.8342634770279647, + "grad_norm": 0.6527699483027454, + "learning_rate": 1.351456547104364e-06, + "loss": 0.9249, + "step": 55310 + }, + { + "epoch": 0.8344143111406076, + "grad_norm": 0.6751007326710515, + "learning_rate": 1.3490546257925308e-06, + "loss": 0.9511, + "step": 55320 + }, + { + "epoch": 0.8345651452532504, + "grad_norm": 0.6655101589742584, + "learning_rate": 1.3466546864290276e-06, + "loss": 0.9557, + "step": 55330 + }, + { + "epoch": 0.8347159793658934, + "grad_norm": 0.6816913083344144, + "learning_rate": 1.3442567295636833e-06, + "loss": 0.9438, + "step": 55340 + }, + { + "epoch": 0.8348668134785363, + "grad_norm": 0.6464126959751433, + "learning_rate": 1.341860755745873e-06, + "loss": 0.9538, + "step": 55350 + }, + { + "epoch": 0.8350176475911792, + "grad_norm": 0.6763032528089201, + "learning_rate": 1.3394667655245208e-06, + "loss": 0.9575, + "step": 55360 + }, + { + "epoch": 0.8351684817038222, + "grad_norm": 0.6467332507879425, + "learning_rate": 1.3370747594480926e-06, + "loss": 0.9555, + "step": 55370 + }, + { + "epoch": 0.835319315816465, + "grad_norm": 0.6740096585244323, + "learning_rate": 1.3346847380646022e-06, + "loss": 0.9359, + "step": 55380 + }, + { + "epoch": 0.835470149929108, + "grad_norm": 0.6696367454283584, + "learning_rate": 1.3322967019216116e-06, + "loss": 0.9578, + "step": 55390 + }, + { + "epoch": 0.8356209840417509, + "grad_norm": 0.6746749949695837, + "learning_rate": 1.3299106515662197e-06, + "loss": 0.9676, + "step": 55400 + }, + { + "epoch": 0.8357718181543938, + "grad_norm": 0.6946044726438098, + "learning_rate": 1.3275265875450782e-06, + "loss": 0.9576, + "step": 55410 + }, + { + "epoch": 0.8359226522670367, + "grad_norm": 0.6724836272945324, + "learning_rate": 1.3251445104043802e-06, + "loss": 0.946, + "step": 55420 + }, + { + "epoch": 0.8360734863796796, + "grad_norm": 0.7002082246127329, + "learning_rate": 1.3227644206898626e-06, + "loss": 0.9558, + "step": 55430 + }, + { + "epoch": 0.8362243204923225, + "grad_norm": 0.679494280903341, + "learning_rate": 1.3203863189468113e-06, + "loss": 0.9485, + "step": 55440 + }, + { + "epoch": 0.8363751546049655, + "grad_norm": 0.6500145805235283, + "learning_rate": 1.3180102057200527e-06, + "loss": 0.9742, + "step": 55450 + }, + { + "epoch": 0.8365259887176084, + "grad_norm": 0.6447808302202893, + "learning_rate": 1.315636081553956e-06, + "loss": 0.9479, + "step": 55460 + }, + { + "epoch": 0.8366768228302512, + "grad_norm": 0.6588020212544005, + "learning_rate": 1.3132639469924423e-06, + "loss": 0.9426, + "step": 55470 + }, + { + "epoch": 0.8368276569428942, + "grad_norm": 0.6398528828806752, + "learning_rate": 1.3108938025789707e-06, + "loss": 0.9606, + "step": 55480 + }, + { + "epoch": 0.8369784910555371, + "grad_norm": 0.6625268354139745, + "learning_rate": 1.3085256488565433e-06, + "loss": 0.9452, + "step": 55490 + }, + { + "epoch": 0.8371293251681801, + "grad_norm": 0.7321712904396503, + "learning_rate": 1.306159486367713e-06, + "loss": 0.9596, + "step": 55500 + }, + { + "epoch": 0.837280159280823, + "grad_norm": 0.6684463132866956, + "learning_rate": 1.30379531565457e-06, + "loss": 0.9226, + "step": 55510 + }, + { + "epoch": 0.8374309933934658, + "grad_norm": 0.6333029687686451, + "learning_rate": 1.3014331372587486e-06, + "loss": 0.9478, + "step": 55520 + }, + { + "epoch": 0.8375818275061088, + "grad_norm": 0.6917747755591935, + "learning_rate": 1.2990729517214296e-06, + "loss": 0.9608, + "step": 55530 + }, + { + "epoch": 0.8377326616187517, + "grad_norm": 0.6362868659522383, + "learning_rate": 1.2967147595833396e-06, + "loss": 0.9537, + "step": 55540 + }, + { + "epoch": 0.8378834957313946, + "grad_norm": 0.6850538311577408, + "learning_rate": 1.29435856138474e-06, + "loss": 0.9591, + "step": 55550 + }, + { + "epoch": 0.8380343298440375, + "grad_norm": 0.6256270989303505, + "learning_rate": 1.292004357665444e-06, + "loss": 0.9634, + "step": 55560 + }, + { + "epoch": 0.8381851639566804, + "grad_norm": 0.6297348933341378, + "learning_rate": 1.2896521489648006e-06, + "loss": 0.9401, + "step": 55570 + }, + { + "epoch": 0.8383359980693234, + "grad_norm": 0.6720218500254572, + "learning_rate": 1.28730193582171e-06, + "loss": 0.9386, + "step": 55580 + }, + { + "epoch": 0.8384868321819663, + "grad_norm": 0.6713343253003485, + "learning_rate": 1.2849537187746087e-06, + "loss": 0.934, + "step": 55590 + }, + { + "epoch": 0.8386376662946092, + "grad_norm": 0.6991262693881777, + "learning_rate": 1.2826074983614733e-06, + "loss": 0.965, + "step": 55600 + }, + { + "epoch": 0.8387885004072521, + "grad_norm": 0.6581130285198228, + "learning_rate": 1.2802632751198352e-06, + "loss": 0.9447, + "step": 55610 + }, + { + "epoch": 0.838939334519895, + "grad_norm": 0.6989507943086608, + "learning_rate": 1.2779210495867555e-06, + "loss": 0.9614, + "step": 55620 + }, + { + "epoch": 0.8390901686325379, + "grad_norm": 0.6612817911448104, + "learning_rate": 1.275580822298842e-06, + "loss": 0.9513, + "step": 55630 + }, + { + "epoch": 0.8392410027451809, + "grad_norm": 0.6892302288098066, + "learning_rate": 1.2732425937922487e-06, + "loss": 0.9481, + "step": 55640 + }, + { + "epoch": 0.8393918368578238, + "grad_norm": 0.6602242041867863, + "learning_rate": 1.2709063646026665e-06, + "loss": 0.9455, + "step": 55650 + }, + { + "epoch": 0.8395426709704666, + "grad_norm": 0.675840481903306, + "learning_rate": 1.268572135265328e-06, + "loss": 0.9288, + "step": 55660 + }, + { + "epoch": 0.8396935050831096, + "grad_norm": 0.6817344515884072, + "learning_rate": 1.2662399063150143e-06, + "loss": 0.9321, + "step": 55670 + }, + { + "epoch": 0.8398443391957525, + "grad_norm": 0.6740109348443468, + "learning_rate": 1.2639096782860382e-06, + "loss": 0.9609, + "step": 55680 + }, + { + "epoch": 0.8399951733083955, + "grad_norm": 0.63664550601722, + "learning_rate": 1.2615814517122648e-06, + "loss": 0.9408, + "step": 55690 + }, + { + "epoch": 0.8401460074210383, + "grad_norm": 0.6734350614712636, + "learning_rate": 1.2592552271270908e-06, + "loss": 0.9416, + "step": 55700 + }, + { + "epoch": 0.8402968415336812, + "grad_norm": 0.6710909004714481, + "learning_rate": 1.256931005063462e-06, + "loss": 0.9419, + "step": 55710 + }, + { + "epoch": 0.8404476756463242, + "grad_norm": 0.6779399111837295, + "learning_rate": 1.2546087860538591e-06, + "loss": 0.9604, + "step": 55720 + }, + { + "epoch": 0.8405985097589671, + "grad_norm": 0.6938368421079145, + "learning_rate": 1.2522885706303112e-06, + "loss": 0.9743, + "step": 55730 + }, + { + "epoch": 0.84074934387161, + "grad_norm": 0.676847674179809, + "learning_rate": 1.2499703593243796e-06, + "loss": 0.9586, + "step": 55740 + }, + { + "epoch": 0.8409001779842529, + "grad_norm": 0.7055879524691557, + "learning_rate": 1.2476541526671749e-06, + "loss": 0.9282, + "step": 55750 + }, + { + "epoch": 0.8410510120968958, + "grad_norm": 0.6763599460782651, + "learning_rate": 1.2453399511893438e-06, + "loss": 0.9659, + "step": 55760 + }, + { + "epoch": 0.8412018462095388, + "grad_norm": 0.7936249572788758, + "learning_rate": 1.24302775542107e-06, + "loss": 0.9438, + "step": 55770 + }, + { + "epoch": 0.8413526803221817, + "grad_norm": 0.6662634473448711, + "learning_rate": 1.2407175658920878e-06, + "loss": 0.9408, + "step": 55780 + }, + { + "epoch": 0.8415035144348246, + "grad_norm": 0.7321569412630714, + "learning_rate": 1.2384093831316636e-06, + "loss": 0.9498, + "step": 55790 + }, + { + "epoch": 0.8416543485474675, + "grad_norm": 0.6510846722938444, + "learning_rate": 1.2361032076686031e-06, + "loss": 0.9584, + "step": 55800 + }, + { + "epoch": 0.8418051826601104, + "grad_norm": 0.6877809888443321, + "learning_rate": 1.2337990400312628e-06, + "loss": 0.9446, + "step": 55810 + }, + { + "epoch": 0.8419560167727533, + "grad_norm": 0.6841363679897946, + "learning_rate": 1.2314968807475247e-06, + "loss": 0.9351, + "step": 55820 + }, + { + "epoch": 0.8421068508853963, + "grad_norm": 0.6711638477164705, + "learning_rate": 1.2291967303448248e-06, + "loss": 0.947, + "step": 55830 + }, + { + "epoch": 0.8422576849980391, + "grad_norm": 0.6777510244399888, + "learning_rate": 1.2268985893501284e-06, + "loss": 0.9468, + "step": 55840 + }, + { + "epoch": 0.842408519110682, + "grad_norm": 0.7162970435581058, + "learning_rate": 1.2246024582899418e-06, + "loss": 0.9712, + "step": 55850 + }, + { + "epoch": 0.842559353223325, + "grad_norm": 0.6898865677750301, + "learning_rate": 1.2223083376903166e-06, + "loss": 0.9428, + "step": 55860 + }, + { + "epoch": 0.8427101873359679, + "grad_norm": 0.6483351028741119, + "learning_rate": 1.220016228076839e-06, + "loss": 0.9467, + "step": 55870 + }, + { + "epoch": 0.8428610214486109, + "grad_norm": 0.6475311549063247, + "learning_rate": 1.2177261299746347e-06, + "loss": 0.9349, + "step": 55880 + }, + { + "epoch": 0.8430118555612537, + "grad_norm": 0.6679857567869586, + "learning_rate": 1.2154380439083735e-06, + "loss": 0.9569, + "step": 55890 + }, + { + "epoch": 0.8431626896738966, + "grad_norm": 0.6703031332889879, + "learning_rate": 1.2131519704022565e-06, + "loss": 0.9489, + "step": 55900 + }, + { + "epoch": 0.8433135237865396, + "grad_norm": 0.6500803053387736, + "learning_rate": 1.210867909980028e-06, + "loss": 0.938, + "step": 55910 + }, + { + "epoch": 0.8434643578991825, + "grad_norm": 0.6602804490785584, + "learning_rate": 1.2085858631649728e-06, + "loss": 0.9355, + "step": 55920 + }, + { + "epoch": 0.8436151920118254, + "grad_norm": 0.6752344978483641, + "learning_rate": 1.2063058304799091e-06, + "loss": 0.9461, + "step": 55930 + }, + { + "epoch": 0.8437660261244683, + "grad_norm": 0.6820282322123833, + "learning_rate": 1.2040278124472005e-06, + "loss": 0.9321, + "step": 55940 + }, + { + "epoch": 0.8439168602371112, + "grad_norm": 0.6903917930548235, + "learning_rate": 1.2017518095887437e-06, + "loss": 0.9583, + "step": 55950 + }, + { + "epoch": 0.8440676943497541, + "grad_norm": 0.6454282765082134, + "learning_rate": 1.1994778224259728e-06, + "loss": 0.9614, + "step": 55960 + }, + { + "epoch": 0.8442185284623971, + "grad_norm": 0.7418616519656126, + "learning_rate": 1.1972058514798667e-06, + "loss": 0.9538, + "step": 55970 + }, + { + "epoch": 0.8443693625750399, + "grad_norm": 0.6505254075330592, + "learning_rate": 1.194935897270937e-06, + "loss": 0.9374, + "step": 55980 + }, + { + "epoch": 0.8445201966876829, + "grad_norm": 0.73359247233582, + "learning_rate": 1.1926679603192315e-06, + "loss": 0.9615, + "step": 55990 + }, + { + "epoch": 0.8446710308003258, + "grad_norm": 0.657534069367192, + "learning_rate": 1.190402041144344e-06, + "loss": 0.9522, + "step": 56000 + }, + { + "epoch": 0.8448218649129687, + "grad_norm": 0.7003897303233884, + "learning_rate": 1.1881381402653991e-06, + "loss": 0.967, + "step": 56010 + }, + { + "epoch": 0.8449726990256117, + "grad_norm": 0.6728333748717518, + "learning_rate": 1.1858762582010575e-06, + "loss": 0.933, + "step": 56020 + }, + { + "epoch": 0.8451235331382545, + "grad_norm": 0.7075214291190784, + "learning_rate": 1.1836163954695246e-06, + "loss": 0.9499, + "step": 56030 + }, + { + "epoch": 0.8452743672508974, + "grad_norm": 0.6858830540070083, + "learning_rate": 1.1813585525885397e-06, + "loss": 0.9501, + "step": 56040 + }, + { + "epoch": 0.8454252013635404, + "grad_norm": 0.6628121782713592, + "learning_rate": 1.1791027300753754e-06, + "loss": 0.9412, + "step": 56050 + }, + { + "epoch": 0.8455760354761833, + "grad_norm": 0.6727025537549697, + "learning_rate": 1.1768489284468488e-06, + "loss": 0.9301, + "step": 56060 + }, + { + "epoch": 0.8457268695888263, + "grad_norm": 0.6801231427180094, + "learning_rate": 1.1745971482193074e-06, + "loss": 0.9578, + "step": 56070 + }, + { + "epoch": 0.8458777037014691, + "grad_norm": 0.6650244695630815, + "learning_rate": 1.1723473899086413e-06, + "loss": 0.9735, + "step": 56080 + }, + { + "epoch": 0.846028537814112, + "grad_norm": 0.6524971897626658, + "learning_rate": 1.1700996540302734e-06, + "loss": 0.9486, + "step": 56090 + }, + { + "epoch": 0.846179371926755, + "grad_norm": 0.7636367806264602, + "learning_rate": 1.1678539410991606e-06, + "loss": 0.9354, + "step": 56100 + }, + { + "epoch": 0.8463302060393979, + "grad_norm": 0.7169881104144261, + "learning_rate": 1.1656102516298062e-06, + "loss": 0.9469, + "step": 56110 + }, + { + "epoch": 0.8464810401520407, + "grad_norm": 0.673205206906749, + "learning_rate": 1.163368586136241e-06, + "loss": 0.9436, + "step": 56120 + }, + { + "epoch": 0.8466318742646837, + "grad_norm": 0.6586880181590994, + "learning_rate": 1.1611289451320317e-06, + "loss": 0.9496, + "step": 56130 + }, + { + "epoch": 0.8467827083773266, + "grad_norm": 0.6449727231475997, + "learning_rate": 1.1588913291302906e-06, + "loss": 0.9637, + "step": 56140 + }, + { + "epoch": 0.8469335424899695, + "grad_norm": 0.7476040946547907, + "learning_rate": 1.1566557386436561e-06, + "loss": 0.9625, + "step": 56150 + }, + { + "epoch": 0.8470843766026125, + "grad_norm": 0.670377129196441, + "learning_rate": 1.1544221741843054e-06, + "loss": 0.9665, + "step": 56160 + }, + { + "epoch": 0.8472352107152553, + "grad_norm": 0.7146703963515472, + "learning_rate": 1.1521906362639546e-06, + "loss": 0.9668, + "step": 56170 + }, + { + "epoch": 0.8473860448278983, + "grad_norm": 0.6372320755939547, + "learning_rate": 1.1499611253938537e-06, + "loss": 0.9441, + "step": 56180 + }, + { + "epoch": 0.8475368789405412, + "grad_norm": 0.6533896702728333, + "learning_rate": 1.1477336420847851e-06, + "loss": 0.9452, + "step": 56190 + }, + { + "epoch": 0.8476877130531841, + "grad_norm": 0.6723826332478321, + "learning_rate": 1.1455081868470708e-06, + "loss": 0.9572, + "step": 56200 + }, + { + "epoch": 0.8478385471658271, + "grad_norm": 0.6230003244423756, + "learning_rate": 1.1432847601905694e-06, + "loss": 0.9443, + "step": 56210 + }, + { + "epoch": 0.8479893812784699, + "grad_norm": 0.6376089775532163, + "learning_rate": 1.1410633626246693e-06, + "loss": 0.9378, + "step": 56220 + }, + { + "epoch": 0.8481402153911128, + "grad_norm": 0.6672668583672265, + "learning_rate": 1.1388439946582985e-06, + "loss": 0.9577, + "step": 56230 + }, + { + "epoch": 0.8482910495037558, + "grad_norm": 0.6774778653086156, + "learning_rate": 1.1366266567999173e-06, + "loss": 0.9279, + "step": 56240 + }, + { + "epoch": 0.8484418836163987, + "grad_norm": 0.7250530119715412, + "learning_rate": 1.1344113495575248e-06, + "loss": 0.9393, + "step": 56250 + }, + { + "epoch": 0.8485927177290415, + "grad_norm": 0.6577775050995287, + "learning_rate": 1.1321980734386496e-06, + "loss": 0.9588, + "step": 56260 + }, + { + "epoch": 0.8487435518416845, + "grad_norm": 0.6579638235974653, + "learning_rate": 1.1299868289503558e-06, + "loss": 0.9357, + "step": 56270 + }, + { + "epoch": 0.8488943859543274, + "grad_norm": 0.6387700163639783, + "learning_rate": 1.1277776165992482e-06, + "loss": 0.9405, + "step": 56280 + }, + { + "epoch": 0.8490452200669704, + "grad_norm": 0.7200158428955171, + "learning_rate": 1.12557043689146e-06, + "loss": 0.9555, + "step": 56290 + }, + { + "epoch": 0.8491960541796133, + "grad_norm": 0.6824703288516132, + "learning_rate": 1.1233652903326586e-06, + "loss": 0.9537, + "step": 56300 + }, + { + "epoch": 0.8493468882922561, + "grad_norm": 0.6754311561731045, + "learning_rate": 1.121162177428049e-06, + "loss": 0.9379, + "step": 56310 + }, + { + "epoch": 0.8494977224048991, + "grad_norm": 0.6437183417915243, + "learning_rate": 1.1189610986823697e-06, + "loss": 0.9509, + "step": 56320 + }, + { + "epoch": 0.849648556517542, + "grad_norm": 0.6544695062602796, + "learning_rate": 1.116762054599888e-06, + "loss": 0.9461, + "step": 56330 + }, + { + "epoch": 0.849799390630185, + "grad_norm": 0.6416310300137679, + "learning_rate": 1.1145650456844137e-06, + "loss": 0.9754, + "step": 56340 + }, + { + "epoch": 0.8499502247428279, + "grad_norm": 0.6669293776305777, + "learning_rate": 1.1123700724392817e-06, + "loss": 0.9685, + "step": 56350 + }, + { + "epoch": 0.8501010588554707, + "grad_norm": 0.6757173985653121, + "learning_rate": 1.1101771353673696e-06, + "loss": 0.9515, + "step": 56360 + }, + { + "epoch": 0.8502518929681137, + "grad_norm": 0.6736743555434572, + "learning_rate": 1.107986234971078e-06, + "loss": 0.9651, + "step": 56370 + }, + { + "epoch": 0.8504027270807566, + "grad_norm": 0.6477276395256022, + "learning_rate": 1.1057973717523497e-06, + "loss": 0.9412, + "step": 56380 + }, + { + "epoch": 0.8505535611933995, + "grad_norm": 0.6563230022264183, + "learning_rate": 1.1036105462126589e-06, + "loss": 0.9524, + "step": 56390 + }, + { + "epoch": 0.8507043953060424, + "grad_norm": 0.6723214304953372, + "learning_rate": 1.1014257588530086e-06, + "loss": 0.9505, + "step": 56400 + }, + { + "epoch": 0.8508552294186853, + "grad_norm": 0.6630112256850159, + "learning_rate": 1.0992430101739382e-06, + "loss": 0.9452, + "step": 56410 + }, + { + "epoch": 0.8510060635313282, + "grad_norm": 0.6612789697101764, + "learning_rate": 1.0970623006755222e-06, + "loss": 0.9312, + "step": 56420 + }, + { + "epoch": 0.8511568976439712, + "grad_norm": 0.6672012490525748, + "learning_rate": 1.094883630857363e-06, + "loss": 0.9552, + "step": 56430 + }, + { + "epoch": 0.8513077317566141, + "grad_norm": 0.6799649924247619, + "learning_rate": 1.092707001218596e-06, + "loss": 0.9522, + "step": 56440 + }, + { + "epoch": 0.851458565869257, + "grad_norm": 0.6827805607665068, + "learning_rate": 1.0905324122578964e-06, + "loss": 0.9549, + "step": 56450 + }, + { + "epoch": 0.8516093999818999, + "grad_norm": 0.6452306602265688, + "learning_rate": 1.0883598644734638e-06, + "loss": 0.9398, + "step": 56460 + }, + { + "epoch": 0.8517602340945428, + "grad_norm": 0.7202033137424059, + "learning_rate": 1.0861893583630322e-06, + "loss": 0.9472, + "step": 56470 + }, + { + "epoch": 0.8519110682071858, + "grad_norm": 0.6672211745696269, + "learning_rate": 1.0840208944238717e-06, + "loss": 0.949, + "step": 56480 + }, + { + "epoch": 0.8520619023198287, + "grad_norm": 0.6689748776596911, + "learning_rate": 1.081854473152778e-06, + "loss": 0.9685, + "step": 56490 + }, + { + "epoch": 0.8522127364324715, + "grad_norm": 0.6459613401382011, + "learning_rate": 1.0796900950460853e-06, + "loss": 0.9429, + "step": 56500 + }, + { + "epoch": 0.8523635705451145, + "grad_norm": 0.6449625994735206, + "learning_rate": 1.077527760599657e-06, + "loss": 0.9582, + "step": 56510 + }, + { + "epoch": 0.8525144046577574, + "grad_norm": 0.6767458743293502, + "learning_rate": 1.0753674703088846e-06, + "loss": 0.9506, + "step": 56520 + }, + { + "epoch": 0.8526652387704003, + "grad_norm": 0.6922702856090118, + "learning_rate": 1.0732092246686988e-06, + "loss": 0.9444, + "step": 56530 + }, + { + "epoch": 0.8528160728830432, + "grad_norm": 0.7281505478180237, + "learning_rate": 1.071053024173554e-06, + "loss": 0.949, + "step": 56540 + }, + { + "epoch": 0.8529669069956861, + "grad_norm": 0.6716357320083947, + "learning_rate": 1.0688988693174418e-06, + "loss": 0.9483, + "step": 56550 + }, + { + "epoch": 0.853117741108329, + "grad_norm": 0.6716320832201185, + "learning_rate": 1.0667467605938864e-06, + "loss": 0.9455, + "step": 56560 + }, + { + "epoch": 0.853268575220972, + "grad_norm": 0.6390282547740994, + "learning_rate": 1.0645966984959365e-06, + "loss": 0.951, + "step": 56570 + }, + { + "epoch": 0.8534194093336149, + "grad_norm": 0.6715091677053007, + "learning_rate": 1.0624486835161741e-06, + "loss": 0.9485, + "step": 56580 + }, + { + "epoch": 0.8535702434462578, + "grad_norm": 0.6555309460125169, + "learning_rate": 1.0603027161467183e-06, + "loss": 0.9769, + "step": 56590 + }, + { + "epoch": 0.8537210775589007, + "grad_norm": 0.6334128578442488, + "learning_rate": 1.0581587968792116e-06, + "loss": 0.9443, + "step": 56600 + }, + { + "epoch": 0.8538719116715436, + "grad_norm": 0.6947073975249733, + "learning_rate": 1.0560169262048269e-06, + "loss": 0.9607, + "step": 56610 + }, + { + "epoch": 0.8540227457841866, + "grad_norm": 0.635933272008296, + "learning_rate": 1.0538771046142782e-06, + "loss": 0.94, + "step": 56620 + }, + { + "epoch": 0.8541735798968295, + "grad_norm": 0.6909412724445655, + "learning_rate": 1.0517393325977954e-06, + "loss": 0.9482, + "step": 56630 + }, + { + "epoch": 0.8543244140094723, + "grad_norm": 0.6589385112101087, + "learning_rate": 1.0496036106451524e-06, + "loss": 0.9313, + "step": 56640 + }, + { + "epoch": 0.8544752481221153, + "grad_norm": 0.6795948097604163, + "learning_rate": 1.0474699392456434e-06, + "loss": 0.9489, + "step": 56650 + }, + { + "epoch": 0.8546260822347582, + "grad_norm": 0.669546268911843, + "learning_rate": 1.045338318888096e-06, + "loss": 0.9298, + "step": 56660 + }, + { + "epoch": 0.8547769163474012, + "grad_norm": 0.6851315480060969, + "learning_rate": 1.0432087500608722e-06, + "loss": 0.9549, + "step": 56670 + }, + { + "epoch": 0.854927750460044, + "grad_norm": 0.6947581994127169, + "learning_rate": 1.0410812332518583e-06, + "loss": 0.9445, + "step": 56680 + }, + { + "epoch": 0.8550785845726869, + "grad_norm": 0.6954259140441925, + "learning_rate": 1.0389557689484708e-06, + "loss": 0.9766, + "step": 56690 + }, + { + "epoch": 0.8552294186853299, + "grad_norm": 0.6557462619356492, + "learning_rate": 1.0368323576376592e-06, + "loss": 0.941, + "step": 56700 + }, + { + "epoch": 0.8553802527979728, + "grad_norm": 0.6617038637532622, + "learning_rate": 1.0347109998059036e-06, + "loss": 0.9601, + "step": 56710 + }, + { + "epoch": 0.8555310869106157, + "grad_norm": 0.7000261838850104, + "learning_rate": 1.0325916959392068e-06, + "loss": 0.9419, + "step": 56720 + }, + { + "epoch": 0.8556819210232586, + "grad_norm": 0.6627647914407426, + "learning_rate": 1.0304744465231097e-06, + "loss": 0.9405, + "step": 56730 + }, + { + "epoch": 0.8558327551359015, + "grad_norm": 0.6724200639150277, + "learning_rate": 1.0283592520426743e-06, + "loss": 0.944, + "step": 56740 + }, + { + "epoch": 0.8559835892485445, + "grad_norm": 0.713459596924031, + "learning_rate": 1.0262461129824996e-06, + "loss": 0.957, + "step": 56750 + }, + { + "epoch": 0.8561344233611874, + "grad_norm": 0.6654006811469481, + "learning_rate": 1.0241350298267073e-06, + "loss": 0.9576, + "step": 56760 + }, + { + "epoch": 0.8562852574738303, + "grad_norm": 0.6813953569214704, + "learning_rate": 1.0220260030589492e-06, + "loss": 0.9602, + "step": 56770 + }, + { + "epoch": 0.8564360915864732, + "grad_norm": 0.696477703109672, + "learning_rate": 1.019919033162412e-06, + "loss": 0.9233, + "step": 56780 + }, + { + "epoch": 0.8565869256991161, + "grad_norm": 0.6543916617748546, + "learning_rate": 1.0178141206198034e-06, + "loss": 0.9626, + "step": 56790 + }, + { + "epoch": 0.856737759811759, + "grad_norm": 0.6787702733010041, + "learning_rate": 1.0157112659133617e-06, + "loss": 0.9233, + "step": 56800 + }, + { + "epoch": 0.856888593924402, + "grad_norm": 0.6543197309017502, + "learning_rate": 1.0136104695248583e-06, + "loss": 0.9439, + "step": 56810 + }, + { + "epoch": 0.8570394280370448, + "grad_norm": 0.6678085280836296, + "learning_rate": 1.0115117319355882e-06, + "loss": 0.9399, + "step": 56820 + }, + { + "epoch": 0.8571902621496877, + "grad_norm": 0.6481824534304277, + "learning_rate": 1.0094150536263746e-06, + "loss": 0.9583, + "step": 56830 + }, + { + "epoch": 0.8573410962623307, + "grad_norm": 0.6768265763301797, + "learning_rate": 1.0073204350775744e-06, + "loss": 0.9464, + "step": 56840 + }, + { + "epoch": 0.8574919303749736, + "grad_norm": 0.6587983107286024, + "learning_rate": 1.0052278767690649e-06, + "loss": 0.9535, + "step": 56850 + }, + { + "epoch": 0.8576427644876166, + "grad_norm": 0.6848620951857299, + "learning_rate": 1.0031373791802579e-06, + "loss": 0.9682, + "step": 56860 + }, + { + "epoch": 0.8577935986002594, + "grad_norm": 0.7322153894740956, + "learning_rate": 1.0010489427900883e-06, + "loss": 0.9651, + "step": 56870 + }, + { + "epoch": 0.8579444327129023, + "grad_norm": 0.659571599140204, + "learning_rate": 9.989625680770233e-07, + "loss": 0.9349, + "step": 56880 + }, + { + "epoch": 0.8580952668255453, + "grad_norm": 0.688883943297916, + "learning_rate": 9.968782555190526e-07, + "loss": 0.9638, + "step": 56890 + }, + { + "epoch": 0.8582461009381882, + "grad_norm": 0.6598487611854481, + "learning_rate": 9.947960055937e-07, + "loss": 0.9525, + "step": 56900 + }, + { + "epoch": 0.8583969350508311, + "grad_norm": 0.6374509370937773, + "learning_rate": 9.927158187780085e-07, + "loss": 0.9649, + "step": 56910 + }, + { + "epoch": 0.858547769163474, + "grad_norm": 0.6476535008574185, + "learning_rate": 9.906376955485565e-07, + "loss": 0.9435, + "step": 56920 + }, + { + "epoch": 0.8586986032761169, + "grad_norm": 0.644298862668246, + "learning_rate": 9.885616363814455e-07, + "loss": 0.9464, + "step": 56930 + }, + { + "epoch": 0.8588494373887599, + "grad_norm": 0.6358182071868245, + "learning_rate": 9.864876417523018e-07, + "loss": 0.9561, + "step": 56940 + }, + { + "epoch": 0.8590002715014028, + "grad_norm": 0.6823662320720838, + "learning_rate": 9.844157121362862e-07, + "loss": 0.9339, + "step": 56950 + }, + { + "epoch": 0.8591511056140456, + "grad_norm": 0.6525659475773188, + "learning_rate": 9.823458480080783e-07, + "loss": 0.9467, + "step": 56960 + }, + { + "epoch": 0.8593019397266886, + "grad_norm": 0.6605830075383982, + "learning_rate": 9.802780498418885e-07, + "loss": 0.9421, + "step": 56970 + }, + { + "epoch": 0.8594527738393315, + "grad_norm": 0.626434000715552, + "learning_rate": 9.782123181114545e-07, + "loss": 0.9241, + "step": 56980 + }, + { + "epoch": 0.8596036079519744, + "grad_norm": 0.6870448696442781, + "learning_rate": 9.761486532900387e-07, + "loss": 0.9545, + "step": 56990 + }, + { + "epoch": 0.8597544420646174, + "grad_norm": 0.6769977766450325, + "learning_rate": 9.7408705585043e-07, + "loss": 0.9395, + "step": 57000 + }, + { + "epoch": 0.8599052761772602, + "grad_norm": 0.6802149162580658, + "learning_rate": 9.720275262649458e-07, + "loss": 0.9665, + "step": 57010 + }, + { + "epoch": 0.8600561102899031, + "grad_norm": 0.662899265710393, + "learning_rate": 9.699700650054267e-07, + "loss": 0.9369, + "step": 57020 + }, + { + "epoch": 0.8602069444025461, + "grad_norm": 0.7114616811627585, + "learning_rate": 9.679146725432443e-07, + "loss": 0.9589, + "step": 57030 + }, + { + "epoch": 0.860357778515189, + "grad_norm": 0.6878819334669299, + "learning_rate": 9.658613493492874e-07, + "loss": 0.9403, + "step": 57040 + }, + { + "epoch": 0.860508612627832, + "grad_norm": 0.6821309968795307, + "learning_rate": 9.63810095893981e-07, + "loss": 0.9505, + "step": 57050 + }, + { + "epoch": 0.8606594467404748, + "grad_norm": 0.6809125104841982, + "learning_rate": 9.617609126472705e-07, + "loss": 0.9391, + "step": 57060 + }, + { + "epoch": 0.8608102808531177, + "grad_norm": 0.6537908263508847, + "learning_rate": 9.597138000786277e-07, + "loss": 0.9476, + "step": 57070 + }, + { + "epoch": 0.8609611149657607, + "grad_norm": 0.6958125089823479, + "learning_rate": 9.576687586570476e-07, + "loss": 0.9708, + "step": 57080 + }, + { + "epoch": 0.8611119490784036, + "grad_norm": 0.6563402443615849, + "learning_rate": 9.556257888510567e-07, + "loss": 0.9603, + "step": 57090 + }, + { + "epoch": 0.8612627831910464, + "grad_norm": 0.7227240066609935, + "learning_rate": 9.535848911287026e-07, + "loss": 0.9412, + "step": 57100 + }, + { + "epoch": 0.8614136173036894, + "grad_norm": 0.6740229924329764, + "learning_rate": 9.515460659575548e-07, + "loss": 0.9222, + "step": 57110 + }, + { + "epoch": 0.8615644514163323, + "grad_norm": 0.628811817537418, + "learning_rate": 9.495093138047174e-07, + "loss": 0.9405, + "step": 57120 + }, + { + "epoch": 0.8617152855289752, + "grad_norm": 0.6612946486627775, + "learning_rate": 9.474746351368125e-07, + "loss": 0.9394, + "step": 57130 + }, + { + "epoch": 0.8618661196416182, + "grad_norm": 0.6377060582821179, + "learning_rate": 9.454420304199874e-07, + "loss": 0.9529, + "step": 57140 + }, + { + "epoch": 0.862016953754261, + "grad_norm": 0.6570955830357369, + "learning_rate": 9.434115001199174e-07, + "loss": 0.9476, + "step": 57150 + }, + { + "epoch": 0.862167787866904, + "grad_norm": 0.6695290378823279, + "learning_rate": 9.413830447017991e-07, + "loss": 0.9569, + "step": 57160 + }, + { + "epoch": 0.8623186219795469, + "grad_norm": 0.6540798221442012, + "learning_rate": 9.393566646303587e-07, + "loss": 0.9444, + "step": 57170 + }, + { + "epoch": 0.8624694560921898, + "grad_norm": 0.6943989319685778, + "learning_rate": 9.373323603698425e-07, + "loss": 0.9571, + "step": 57180 + }, + { + "epoch": 0.8626202902048328, + "grad_norm": 0.6605222681203649, + "learning_rate": 9.353101323840197e-07, + "loss": 0.9505, + "step": 57190 + }, + { + "epoch": 0.8627711243174756, + "grad_norm": 0.6523746812961613, + "learning_rate": 9.332899811361884e-07, + "loss": 0.9508, + "step": 57200 + }, + { + "epoch": 0.8629219584301185, + "grad_norm": 0.7708685786150006, + "learning_rate": 9.312719070891729e-07, + "loss": 0.9361, + "step": 57210 + }, + { + "epoch": 0.8630727925427615, + "grad_norm": 0.6828380057326501, + "learning_rate": 9.292559107053112e-07, + "loss": 0.9509, + "step": 57220 + }, + { + "epoch": 0.8632236266554044, + "grad_norm": 0.6693892014920663, + "learning_rate": 9.272419924464781e-07, + "loss": 0.935, + "step": 57230 + }, + { + "epoch": 0.8633744607680472, + "grad_norm": 0.6473958446162575, + "learning_rate": 9.252301527740637e-07, + "loss": 0.9412, + "step": 57240 + }, + { + "epoch": 0.8635252948806902, + "grad_norm": 0.7101402465353068, + "learning_rate": 9.232203921489824e-07, + "loss": 0.9554, + "step": 57250 + }, + { + "epoch": 0.8636761289933331, + "grad_norm": 0.6689147375935893, + "learning_rate": 9.212127110316782e-07, + "loss": 0.9528, + "step": 57260 + }, + { + "epoch": 0.8638269631059761, + "grad_norm": 0.669275398463237, + "learning_rate": 9.192071098821121e-07, + "loss": 0.9399, + "step": 57270 + }, + { + "epoch": 0.863977797218619, + "grad_norm": 0.7083928133454476, + "learning_rate": 9.172035891597697e-07, + "loss": 0.952, + "step": 57280 + }, + { + "epoch": 0.8641286313312618, + "grad_norm": 0.6855154167620401, + "learning_rate": 9.152021493236662e-07, + "loss": 0.9409, + "step": 57290 + }, + { + "epoch": 0.8642794654439048, + "grad_norm": 0.6618968865625718, + "learning_rate": 9.132027908323294e-07, + "loss": 0.948, + "step": 57300 + }, + { + "epoch": 0.8644302995565477, + "grad_norm": 0.6889532114624622, + "learning_rate": 9.112055141438225e-07, + "loss": 0.9532, + "step": 57310 + }, + { + "epoch": 0.8645811336691906, + "grad_norm": 0.6473323492572562, + "learning_rate": 9.092103197157221e-07, + "loss": 0.9373, + "step": 57320 + }, + { + "epoch": 0.8647319677818336, + "grad_norm": 0.6495487556274964, + "learning_rate": 9.072172080051289e-07, + "loss": 0.9481, + "step": 57330 + }, + { + "epoch": 0.8648828018944764, + "grad_norm": 0.67382035251976, + "learning_rate": 9.052261794686734e-07, + "loss": 0.9533, + "step": 57340 + }, + { + "epoch": 0.8650336360071194, + "grad_norm": 0.6958512619147729, + "learning_rate": 9.032372345625007e-07, + "loss": 0.9603, + "step": 57350 + }, + { + "epoch": 0.8651844701197623, + "grad_norm": 0.6646064595979808, + "learning_rate": 9.012503737422828e-07, + "loss": 0.9508, + "step": 57360 + }, + { + "epoch": 0.8653353042324052, + "grad_norm": 0.6688915556839401, + "learning_rate": 8.992655974632125e-07, + "loss": 0.9197, + "step": 57370 + }, + { + "epoch": 0.8654861383450481, + "grad_norm": 0.6741757913815639, + "learning_rate": 8.972829061800092e-07, + "loss": 0.9462, + "step": 57380 + }, + { + "epoch": 0.865636972457691, + "grad_norm": 0.675903960667421, + "learning_rate": 8.953023003469074e-07, + "loss": 0.9478, + "step": 57390 + }, + { + "epoch": 0.8657878065703339, + "grad_norm": 0.6710064697313801, + "learning_rate": 8.933237804176708e-07, + "loss": 0.9378, + "step": 57400 + }, + { + "epoch": 0.8659386406829769, + "grad_norm": 0.6556589272506816, + "learning_rate": 8.913473468455791e-07, + "loss": 0.941, + "step": 57410 + }, + { + "epoch": 0.8660894747956198, + "grad_norm": 0.718128545268666, + "learning_rate": 8.893730000834411e-07, + "loss": 0.9751, + "step": 57420 + }, + { + "epoch": 0.8662403089082626, + "grad_norm": 0.6809220253974237, + "learning_rate": 8.874007405835805e-07, + "loss": 0.952, + "step": 57430 + }, + { + "epoch": 0.8663911430209056, + "grad_norm": 0.6721014084116582, + "learning_rate": 8.854305687978448e-07, + "loss": 0.96, + "step": 57440 + }, + { + "epoch": 0.8665419771335485, + "grad_norm": 0.657426689265164, + "learning_rate": 8.834624851776074e-07, + "loss": 0.9521, + "step": 57450 + }, + { + "epoch": 0.8666928112461915, + "grad_norm": 0.6496201283438695, + "learning_rate": 8.814964901737588e-07, + "loss": 0.9541, + "step": 57460 + }, + { + "epoch": 0.8668436453588344, + "grad_norm": 0.6792138120033318, + "learning_rate": 8.795325842367098e-07, + "loss": 0.9405, + "step": 57470 + }, + { + "epoch": 0.8669944794714772, + "grad_norm": 0.6709670620909632, + "learning_rate": 8.77570767816398e-07, + "loss": 0.9475, + "step": 57480 + }, + { + "epoch": 0.8671453135841202, + "grad_norm": 0.664873137707105, + "learning_rate": 8.756110413622799e-07, + "loss": 0.9505, + "step": 57490 + }, + { + "epoch": 0.8672961476967631, + "grad_norm": 0.7041014839630031, + "learning_rate": 8.736534053233281e-07, + "loss": 0.9468, + "step": 57500 + }, + { + "epoch": 0.867446981809406, + "grad_norm": 0.6500893965800828, + "learning_rate": 8.716978601480463e-07, + "loss": 0.9504, + "step": 57510 + }, + { + "epoch": 0.8675978159220489, + "grad_norm": 0.6475005962455483, + "learning_rate": 8.697444062844496e-07, + "loss": 0.9418, + "step": 57520 + }, + { + "epoch": 0.8677486500346918, + "grad_norm": 0.648388694363382, + "learning_rate": 8.677930441800808e-07, + "loss": 0.9436, + "step": 57530 + }, + { + "epoch": 0.8678994841473348, + "grad_norm": 0.646293556239782, + "learning_rate": 8.65843774281998e-07, + "loss": 0.9417, + "step": 57540 + }, + { + "epoch": 0.8680503182599777, + "grad_norm": 0.6684906409755981, + "learning_rate": 8.638965970367852e-07, + "loss": 0.9517, + "step": 57550 + }, + { + "epoch": 0.8682011523726206, + "grad_norm": 0.6860848913949088, + "learning_rate": 8.619515128905442e-07, + "loss": 0.9827, + "step": 57560 + }, + { + "epoch": 0.8683519864852635, + "grad_norm": 0.6533281345805921, + "learning_rate": 8.600085222888977e-07, + "loss": 0.9392, + "step": 57570 + }, + { + "epoch": 0.8685028205979064, + "grad_norm": 0.6594058563573019, + "learning_rate": 8.58067625676986e-07, + "loss": 0.9447, + "step": 57580 + }, + { + "epoch": 0.8686536547105493, + "grad_norm": 0.6590868657172209, + "learning_rate": 8.561288234994758e-07, + "loss": 0.9552, + "step": 57590 + }, + { + "epoch": 0.8688044888231923, + "grad_norm": 0.6746005248716327, + "learning_rate": 8.541921162005496e-07, + "loss": 0.9354, + "step": 57600 + }, + { + "epoch": 0.8689553229358352, + "grad_norm": 0.6615666321144739, + "learning_rate": 8.522575042239068e-07, + "loss": 0.9782, + "step": 57610 + }, + { + "epoch": 0.869106157048478, + "grad_norm": 0.6969733687115037, + "learning_rate": 8.503249880127762e-07, + "loss": 0.9509, + "step": 57620 + }, + { + "epoch": 0.869256991161121, + "grad_norm": 0.6514948180232703, + "learning_rate": 8.483945680098993e-07, + "loss": 0.9428, + "step": 57630 + }, + { + "epoch": 0.8694078252737639, + "grad_norm": 0.6940451372188933, + "learning_rate": 8.464662446575367e-07, + "loss": 0.9615, + "step": 57640 + }, + { + "epoch": 0.8695586593864069, + "grad_norm": 0.6574324526199685, + "learning_rate": 8.445400183974739e-07, + "loss": 0.9362, + "step": 57650 + }, + { + "epoch": 0.8697094934990497, + "grad_norm": 0.6638517688163152, + "learning_rate": 8.426158896710123e-07, + "loss": 0.9454, + "step": 57660 + }, + { + "epoch": 0.8698603276116926, + "grad_norm": 0.7930377587852807, + "learning_rate": 8.406938589189706e-07, + "loss": 0.9467, + "step": 57670 + }, + { + "epoch": 0.8700111617243356, + "grad_norm": 0.6520957519008219, + "learning_rate": 8.387739265816952e-07, + "loss": 0.9314, + "step": 57680 + }, + { + "epoch": 0.8701619958369785, + "grad_norm": 0.6639792043867819, + "learning_rate": 8.368560930990411e-07, + "loss": 0.939, + "step": 57690 + }, + { + "epoch": 0.8703128299496214, + "grad_norm": 0.6704589556775951, + "learning_rate": 8.349403589103922e-07, + "loss": 0.9509, + "step": 57700 + }, + { + "epoch": 0.8704636640622643, + "grad_norm": 0.7231636863499145, + "learning_rate": 8.330267244546419e-07, + "loss": 0.947, + "step": 57710 + }, + { + "epoch": 0.8706144981749072, + "grad_norm": 0.6625084254829263, + "learning_rate": 8.311151901702119e-07, + "loss": 0.9545, + "step": 57720 + }, + { + "epoch": 0.8707653322875502, + "grad_norm": 0.7287939846075313, + "learning_rate": 8.292057564950374e-07, + "loss": 0.9285, + "step": 57730 + }, + { + "epoch": 0.8709161664001931, + "grad_norm": 0.645337071309584, + "learning_rate": 8.272984238665738e-07, + "loss": 0.9348, + "step": 57740 + }, + { + "epoch": 0.871067000512836, + "grad_norm": 0.6416244091777346, + "learning_rate": 8.253931927217918e-07, + "loss": 0.9461, + "step": 57750 + }, + { + "epoch": 0.8712178346254789, + "grad_norm": 0.6453484421060919, + "learning_rate": 8.234900634971865e-07, + "loss": 0.9521, + "step": 57760 + }, + { + "epoch": 0.8713686687381218, + "grad_norm": 0.6426927139876586, + "learning_rate": 8.215890366287682e-07, + "loss": 0.9506, + "step": 57770 + }, + { + "epoch": 0.8715195028507647, + "grad_norm": 0.6975962241356936, + "learning_rate": 8.196901125520629e-07, + "loss": 0.9578, + "step": 57780 + }, + { + "epoch": 0.8716703369634077, + "grad_norm": 0.6865822319192624, + "learning_rate": 8.177932917021214e-07, + "loss": 0.9369, + "step": 57790 + }, + { + "epoch": 0.8718211710760505, + "grad_norm": 0.6423349110454523, + "learning_rate": 8.158985745135084e-07, + "loss": 0.9279, + "step": 57800 + }, + { + "epoch": 0.8719720051886934, + "grad_norm": 0.6755947832027156, + "learning_rate": 8.140059614203044e-07, + "loss": 0.9468, + "step": 57810 + }, + { + "epoch": 0.8721228393013364, + "grad_norm": 0.6558110088036834, + "learning_rate": 8.121154528561137e-07, + "loss": 0.9468, + "step": 57820 + }, + { + "epoch": 0.8722736734139793, + "grad_norm": 0.667982939956077, + "learning_rate": 8.102270492540543e-07, + "loss": 0.9688, + "step": 57830 + }, + { + "epoch": 0.8724245075266223, + "grad_norm": 0.6352546213520056, + "learning_rate": 8.083407510467644e-07, + "loss": 0.9551, + "step": 57840 + }, + { + "epoch": 0.8725753416392651, + "grad_norm": 0.6607140713680381, + "learning_rate": 8.064565586663974e-07, + "loss": 0.9563, + "step": 57850 + }, + { + "epoch": 0.872726175751908, + "grad_norm": 0.6577904461839774, + "learning_rate": 8.045744725446236e-07, + "loss": 0.9458, + "step": 57860 + }, + { + "epoch": 0.872877009864551, + "grad_norm": 0.6518704710198716, + "learning_rate": 8.026944931126357e-07, + "loss": 0.9379, + "step": 57870 + }, + { + "epoch": 0.8730278439771939, + "grad_norm": 0.6416739573960192, + "learning_rate": 8.008166208011403e-07, + "loss": 0.9465, + "step": 57880 + }, + { + "epoch": 0.8731786780898368, + "grad_norm": 0.6615304400594504, + "learning_rate": 7.989408560403589e-07, + "loss": 0.9402, + "step": 57890 + }, + { + "epoch": 0.8733295122024797, + "grad_norm": 0.7051087575371546, + "learning_rate": 7.970671992600366e-07, + "loss": 0.9539, + "step": 57900 + }, + { + "epoch": 0.8734803463151226, + "grad_norm": 0.6549041188483405, + "learning_rate": 7.95195650889431e-07, + "loss": 0.9434, + "step": 57910 + }, + { + "epoch": 0.8736311804277656, + "grad_norm": 0.6456209609414995, + "learning_rate": 7.933262113573136e-07, + "loss": 0.963, + "step": 57920 + }, + { + "epoch": 0.8737820145404085, + "grad_norm": 0.6624219070257971, + "learning_rate": 7.91458881091981e-07, + "loss": 0.9418, + "step": 57930 + }, + { + "epoch": 0.8739328486530513, + "grad_norm": 0.6480146734494069, + "learning_rate": 7.895936605212407e-07, + "loss": 0.9448, + "step": 57940 + }, + { + "epoch": 0.8740836827656943, + "grad_norm": 0.6642375331927722, + "learning_rate": 7.877305500724164e-07, + "loss": 0.9567, + "step": 57950 + }, + { + "epoch": 0.8742345168783372, + "grad_norm": 0.6520767003441319, + "learning_rate": 7.858695501723535e-07, + "loss": 0.9405, + "step": 57960 + }, + { + "epoch": 0.8743853509909801, + "grad_norm": 0.6732036018247144, + "learning_rate": 7.840106612474085e-07, + "loss": 0.9546, + "step": 57970 + }, + { + "epoch": 0.8745361851036231, + "grad_norm": 0.6569531609350513, + "learning_rate": 7.821538837234577e-07, + "loss": 0.9413, + "step": 57980 + }, + { + "epoch": 0.8746870192162659, + "grad_norm": 0.6707849979008428, + "learning_rate": 7.802992180258928e-07, + "loss": 0.9613, + "step": 57990 + }, + { + "epoch": 0.8748378533289088, + "grad_norm": 0.6747468589597809, + "learning_rate": 7.784466645796185e-07, + "loss": 0.9661, + "step": 58000 + }, + { + "epoch": 0.8749886874415518, + "grad_norm": 0.6632889571084168, + "learning_rate": 7.765962238090619e-07, + "loss": 0.929, + "step": 58010 + }, + { + "epoch": 0.8751395215541947, + "grad_norm": 0.7408063598481535, + "learning_rate": 7.747478961381594e-07, + "loss": 0.9433, + "step": 58020 + }, + { + "epoch": 0.8752903556668377, + "grad_norm": 0.6802574405318925, + "learning_rate": 7.729016819903701e-07, + "loss": 0.9525, + "step": 58030 + }, + { + "epoch": 0.8754411897794805, + "grad_norm": 0.7225394083821812, + "learning_rate": 7.710575817886612e-07, + "loss": 0.9358, + "step": 58040 + }, + { + "epoch": 0.8755920238921234, + "grad_norm": 0.6777029054191773, + "learning_rate": 7.692155959555225e-07, + "loss": 0.9503, + "step": 58050 + }, + { + "epoch": 0.8757428580047664, + "grad_norm": 0.6467737636168622, + "learning_rate": 7.673757249129554e-07, + "loss": 0.9566, + "step": 58060 + }, + { + "epoch": 0.8758936921174093, + "grad_norm": 0.6868033934676373, + "learning_rate": 7.655379690824793e-07, + "loss": 0.9459, + "step": 58070 + }, + { + "epoch": 0.8760445262300521, + "grad_norm": 0.6738982285499159, + "learning_rate": 7.637023288851253e-07, + "loss": 0.9507, + "step": 58080 + }, + { + "epoch": 0.8761953603426951, + "grad_norm": 0.7008371153898327, + "learning_rate": 7.618688047414424e-07, + "loss": 0.9415, + "step": 58090 + }, + { + "epoch": 0.876346194455338, + "grad_norm": 0.6923504690858373, + "learning_rate": 7.60037397071498e-07, + "loss": 0.9416, + "step": 58100 + }, + { + "epoch": 0.876497028567981, + "grad_norm": 0.6516409662827974, + "learning_rate": 7.582081062948654e-07, + "loss": 0.939, + "step": 58110 + }, + { + "epoch": 0.8766478626806239, + "grad_norm": 0.7122298025484189, + "learning_rate": 7.563809328306448e-07, + "loss": 0.9444, + "step": 58120 + }, + { + "epoch": 0.8767986967932667, + "grad_norm": 0.662050090119573, + "learning_rate": 7.545558770974415e-07, + "loss": 0.9694, + "step": 58130 + }, + { + "epoch": 0.8769495309059097, + "grad_norm": 0.6877934977662468, + "learning_rate": 7.527329395133775e-07, + "loss": 0.9501, + "step": 58140 + }, + { + "epoch": 0.8771003650185526, + "grad_norm": 0.6662760893168943, + "learning_rate": 7.509121204960967e-07, + "loss": 0.9333, + "step": 58150 + }, + { + "epoch": 0.8772511991311955, + "grad_norm": 0.6784782791988776, + "learning_rate": 7.490934204627487e-07, + "loss": 0.9649, + "step": 58160 + }, + { + "epoch": 0.8774020332438385, + "grad_norm": 0.6730430166642042, + "learning_rate": 7.472768398300001e-07, + "loss": 0.9484, + "step": 58170 + }, + { + "epoch": 0.8775528673564813, + "grad_norm": 0.6765756931391267, + "learning_rate": 7.454623790140369e-07, + "loss": 0.9413, + "step": 58180 + }, + { + "epoch": 0.8777037014691242, + "grad_norm": 0.6467321756631975, + "learning_rate": 7.436500384305522e-07, + "loss": 0.9308, + "step": 58190 + }, + { + "epoch": 0.8778545355817672, + "grad_norm": 0.6606703266518118, + "learning_rate": 7.418398184947584e-07, + "loss": 0.939, + "step": 58200 + }, + { + "epoch": 0.8780053696944101, + "grad_norm": 0.7431557211669368, + "learning_rate": 7.400317196213791e-07, + "loss": 0.9527, + "step": 58210 + }, + { + "epoch": 0.878156203807053, + "grad_norm": 0.6777797970462388, + "learning_rate": 7.382257422246542e-07, + "loss": 0.9388, + "step": 58220 + }, + { + "epoch": 0.8783070379196959, + "grad_norm": 0.6650466155877357, + "learning_rate": 7.364218867183382e-07, + "loss": 0.9653, + "step": 58230 + }, + { + "epoch": 0.8784578720323388, + "grad_norm": 0.6981641703248811, + "learning_rate": 7.346201535156971e-07, + "loss": 0.9028, + "step": 58240 + }, + { + "epoch": 0.8786087061449818, + "grad_norm": 0.6470250478575896, + "learning_rate": 7.328205430295076e-07, + "loss": 0.9239, + "step": 58250 + }, + { + "epoch": 0.8787595402576247, + "grad_norm": 0.7074417257466024, + "learning_rate": 7.310230556720699e-07, + "loss": 0.9541, + "step": 58260 + }, + { + "epoch": 0.8789103743702675, + "grad_norm": 0.6806795853842814, + "learning_rate": 7.292276918551877e-07, + "loss": 0.9357, + "step": 58270 + }, + { + "epoch": 0.8790612084829105, + "grad_norm": 0.7218103688603464, + "learning_rate": 7.274344519901821e-07, + "loss": 0.9567, + "step": 58280 + }, + { + "epoch": 0.8792120425955534, + "grad_norm": 0.6974970797884136, + "learning_rate": 7.256433364878913e-07, + "loss": 0.9547, + "step": 58290 + }, + { + "epoch": 0.8793628767081964, + "grad_norm": 0.6771468138949416, + "learning_rate": 7.238543457586611e-07, + "loss": 0.9294, + "step": 58300 + }, + { + "epoch": 0.8795137108208393, + "grad_norm": 0.6577878507597346, + "learning_rate": 7.220674802123517e-07, + "loss": 0.9547, + "step": 58310 + }, + { + "epoch": 0.8796645449334821, + "grad_norm": 0.6670822151886976, + "learning_rate": 7.202827402583402e-07, + "loss": 0.9573, + "step": 58320 + }, + { + "epoch": 0.8798153790461251, + "grad_norm": 0.6726160378193107, + "learning_rate": 7.185001263055114e-07, + "loss": 0.9563, + "step": 58330 + }, + { + "epoch": 0.879966213158768, + "grad_norm": 0.7267175482816317, + "learning_rate": 7.167196387622666e-07, + "loss": 0.9452, + "step": 58340 + }, + { + "epoch": 0.8801170472714109, + "grad_norm": 0.7164137763871113, + "learning_rate": 7.149412780365195e-07, + "loss": 0.943, + "step": 58350 + }, + { + "epoch": 0.8802678813840538, + "grad_norm": 0.6526751090949473, + "learning_rate": 7.131650445356942e-07, + "loss": 0.9521, + "step": 58360 + }, + { + "epoch": 0.8804187154966967, + "grad_norm": 0.7170001215906873, + "learning_rate": 7.113909386667306e-07, + "loss": 0.9616, + "step": 58370 + }, + { + "epoch": 0.8805695496093396, + "grad_norm": 0.6772059218201935, + "learning_rate": 7.096189608360815e-07, + "loss": 0.9458, + "step": 58380 + }, + { + "epoch": 0.8807203837219826, + "grad_norm": 0.6882192118327551, + "learning_rate": 7.078491114497066e-07, + "loss": 0.955, + "step": 58390 + }, + { + "epoch": 0.8808712178346255, + "grad_norm": 0.6705128049650495, + "learning_rate": 7.060813909130848e-07, + "loss": 0.9551, + "step": 58400 + }, + { + "epoch": 0.8810220519472683, + "grad_norm": 0.6668346502651173, + "learning_rate": 7.043157996312044e-07, + "loss": 0.9487, + "step": 58410 + }, + { + "epoch": 0.8811728860599113, + "grad_norm": 0.6615915914122449, + "learning_rate": 7.02552338008563e-07, + "loss": 0.9652, + "step": 58420 + }, + { + "epoch": 0.8813237201725542, + "grad_norm": 0.7017786920807567, + "learning_rate": 7.007910064491763e-07, + "loss": 0.9377, + "step": 58430 + }, + { + "epoch": 0.8814745542851972, + "grad_norm": 0.6741791155223517, + "learning_rate": 6.990318053565681e-07, + "loss": 0.9645, + "step": 58440 + }, + { + "epoch": 0.8816253883978401, + "grad_norm": 0.7163859617809246, + "learning_rate": 6.972747351337716e-07, + "loss": 0.9353, + "step": 58450 + }, + { + "epoch": 0.8817762225104829, + "grad_norm": 0.677846466972082, + "learning_rate": 6.955197961833393e-07, + "loss": 0.9489, + "step": 58460 + }, + { + "epoch": 0.8819270566231259, + "grad_norm": 0.6593530579153506, + "learning_rate": 6.937669889073284e-07, + "loss": 0.9541, + "step": 58470 + }, + { + "epoch": 0.8820778907357688, + "grad_norm": 0.6568711751984948, + "learning_rate": 6.920163137073099e-07, + "loss": 0.9651, + "step": 58480 + }, + { + "epoch": 0.8822287248484117, + "grad_norm": 0.6649866537406113, + "learning_rate": 6.902677709843708e-07, + "loss": 0.9537, + "step": 58490 + }, + { + "epoch": 0.8823795589610546, + "grad_norm": 0.645922293390908, + "learning_rate": 6.885213611391006e-07, + "loss": 0.9465, + "step": 58500 + }, + { + "epoch": 0.8825303930736975, + "grad_norm": 0.6662735544984836, + "learning_rate": 6.867770845716093e-07, + "loss": 0.9338, + "step": 58510 + }, + { + "epoch": 0.8826812271863405, + "grad_norm": 0.6451104661261721, + "learning_rate": 6.850349416815106e-07, + "loss": 0.9353, + "step": 58520 + }, + { + "epoch": 0.8828320612989834, + "grad_norm": 0.6705632467574832, + "learning_rate": 6.832949328679362e-07, + "loss": 0.942, + "step": 58530 + }, + { + "epoch": 0.8829828954116263, + "grad_norm": 0.6412341000579458, + "learning_rate": 6.815570585295217e-07, + "loss": 0.9621, + "step": 58540 + }, + { + "epoch": 0.8831337295242692, + "grad_norm": 0.6661923666121033, + "learning_rate": 6.79821319064421e-07, + "loss": 0.9461, + "step": 58550 + }, + { + "epoch": 0.8832845636369121, + "grad_norm": 0.6680034108885966, + "learning_rate": 6.780877148702936e-07, + "loss": 0.9437, + "step": 58560 + }, + { + "epoch": 0.883435397749555, + "grad_norm": 0.6625137775051796, + "learning_rate": 6.76356246344313e-07, + "loss": 0.9584, + "step": 58570 + }, + { + "epoch": 0.883586231862198, + "grad_norm": 0.662884863007266, + "learning_rate": 6.746269138831607e-07, + "loss": 0.9486, + "step": 58580 + }, + { + "epoch": 0.8837370659748409, + "grad_norm": 0.6634228472085567, + "learning_rate": 6.728997178830288e-07, + "loss": 0.9463, + "step": 58590 + }, + { + "epoch": 0.8838879000874837, + "grad_norm": 0.681231831740882, + "learning_rate": 6.71174658739624e-07, + "loss": 0.934, + "step": 58600 + }, + { + "epoch": 0.8840387342001267, + "grad_norm": 0.6406550838094918, + "learning_rate": 6.694517368481601e-07, + "loss": 0.928, + "step": 58610 + }, + { + "epoch": 0.8841895683127696, + "grad_norm": 0.7149921235215994, + "learning_rate": 6.677309526033593e-07, + "loss": 0.9434, + "step": 58620 + }, + { + "epoch": 0.8843404024254126, + "grad_norm": 0.6686167294620456, + "learning_rate": 6.660123063994605e-07, + "loss": 0.949, + "step": 58630 + }, + { + "epoch": 0.8844912365380554, + "grad_norm": 0.7008355600734382, + "learning_rate": 6.642957986302057e-07, + "loss": 0.9443, + "step": 58640 + }, + { + "epoch": 0.8846420706506983, + "grad_norm": 0.6640683887038829, + "learning_rate": 6.625814296888522e-07, + "loss": 0.9397, + "step": 58650 + }, + { + "epoch": 0.8847929047633413, + "grad_norm": 0.6930890564610962, + "learning_rate": 6.608691999681638e-07, + "loss": 0.9499, + "step": 58660 + }, + { + "epoch": 0.8849437388759842, + "grad_norm": 0.7052243081614424, + "learning_rate": 6.591591098604155e-07, + "loss": 0.9562, + "step": 58670 + }, + { + "epoch": 0.8850945729886271, + "grad_norm": 0.6598970130272075, + "learning_rate": 6.57451159757394e-07, + "loss": 0.9498, + "step": 58680 + }, + { + "epoch": 0.88524540710127, + "grad_norm": 0.738590202512444, + "learning_rate": 6.557453500503919e-07, + "loss": 0.9574, + "step": 58690 + }, + { + "epoch": 0.8853962412139129, + "grad_norm": 0.6882854626052133, + "learning_rate": 6.540416811302164e-07, + "loss": 0.9339, + "step": 58700 + }, + { + "epoch": 0.8855470753265559, + "grad_norm": 0.7124329010768135, + "learning_rate": 6.523401533871765e-07, + "loss": 0.9316, + "step": 58710 + }, + { + "epoch": 0.8856979094391988, + "grad_norm": 0.6579794647095127, + "learning_rate": 6.506407672111015e-07, + "loss": 0.9476, + "step": 58720 + }, + { + "epoch": 0.8858487435518417, + "grad_norm": 0.6819753223646697, + "learning_rate": 6.489435229913188e-07, + "loss": 0.9511, + "step": 58730 + }, + { + "epoch": 0.8859995776644846, + "grad_norm": 0.6522086585253785, + "learning_rate": 6.47248421116674e-07, + "loss": 0.9432, + "step": 58740 + }, + { + "epoch": 0.8861504117771275, + "grad_norm": 0.7362331839236684, + "learning_rate": 6.455554619755178e-07, + "loss": 0.9418, + "step": 58750 + }, + { + "epoch": 0.8863012458897704, + "grad_norm": 0.7048251073898875, + "learning_rate": 6.438646459557074e-07, + "loss": 0.9571, + "step": 58760 + }, + { + "epoch": 0.8864520800024134, + "grad_norm": 0.736566988502032, + "learning_rate": 6.421759734446154e-07, + "loss": 0.9385, + "step": 58770 + }, + { + "epoch": 0.8866029141150562, + "grad_norm": 0.7108581698794545, + "learning_rate": 6.404894448291166e-07, + "loss": 0.9534, + "step": 58780 + }, + { + "epoch": 0.8867537482276991, + "grad_norm": 0.656227614356859, + "learning_rate": 6.38805060495602e-07, + "loss": 0.9646, + "step": 58790 + }, + { + "epoch": 0.8869045823403421, + "grad_norm": 0.680120979243502, + "learning_rate": 6.371228208299663e-07, + "loss": 0.944, + "step": 58800 + }, + { + "epoch": 0.887055416452985, + "grad_norm": 0.6748236696466097, + "learning_rate": 6.354427262176099e-07, + "loss": 0.9351, + "step": 58810 + }, + { + "epoch": 0.887206250565628, + "grad_norm": 0.6373420828333843, + "learning_rate": 6.337647770434508e-07, + "loss": 0.9233, + "step": 58820 + }, + { + "epoch": 0.8873570846782708, + "grad_norm": 0.6742377536568614, + "learning_rate": 6.320889736919078e-07, + "loss": 0.9459, + "step": 58830 + }, + { + "epoch": 0.8875079187909137, + "grad_norm": 0.6865961011961685, + "learning_rate": 6.304153165469096e-07, + "loss": 0.9458, + "step": 58840 + }, + { + "epoch": 0.8876587529035567, + "grad_norm": 0.6642370407205658, + "learning_rate": 6.287438059918982e-07, + "loss": 0.9406, + "step": 58850 + }, + { + "epoch": 0.8878095870161996, + "grad_norm": 0.6667822010777104, + "learning_rate": 6.270744424098152e-07, + "loss": 0.9534, + "step": 58860 + }, + { + "epoch": 0.8879604211288425, + "grad_norm": 0.6992963419710689, + "learning_rate": 6.254072261831179e-07, + "loss": 0.9529, + "step": 58870 + }, + { + "epoch": 0.8881112552414854, + "grad_norm": 0.7222646941128996, + "learning_rate": 6.237421576937696e-07, + "loss": 0.944, + "step": 58880 + }, + { + "epoch": 0.8882620893541283, + "grad_norm": 0.6853138711719873, + "learning_rate": 6.220792373232376e-07, + "loss": 0.9718, + "step": 58890 + }, + { + "epoch": 0.8884129234667713, + "grad_norm": 0.6834155519156973, + "learning_rate": 6.204184654525036e-07, + "loss": 0.9432, + "step": 58900 + }, + { + "epoch": 0.8885637575794142, + "grad_norm": 0.6551298102320839, + "learning_rate": 6.187598424620523e-07, + "loss": 0.9522, + "step": 58910 + }, + { + "epoch": 0.888714591692057, + "grad_norm": 0.7202662132622306, + "learning_rate": 6.17103368731875e-07, + "loss": 0.9291, + "step": 58920 + }, + { + "epoch": 0.8888654258047, + "grad_norm": 0.6500472946388891, + "learning_rate": 6.154490446414773e-07, + "loss": 0.9618, + "step": 58930 + }, + { + "epoch": 0.8890162599173429, + "grad_norm": 0.6403356351576381, + "learning_rate": 6.13796870569865e-07, + "loss": 0.9615, + "step": 58940 + }, + { + "epoch": 0.8891670940299858, + "grad_norm": 0.7180921553001611, + "learning_rate": 6.121468468955549e-07, + "loss": 0.9465, + "step": 58950 + }, + { + "epoch": 0.8893179281426288, + "grad_norm": 0.648475663961055, + "learning_rate": 6.104989739965716e-07, + "loss": 0.9524, + "step": 58960 + }, + { + "epoch": 0.8894687622552716, + "grad_norm": 0.668158771097024, + "learning_rate": 6.088532522504464e-07, + "loss": 0.9503, + "step": 58970 + }, + { + "epoch": 0.8896195963679145, + "grad_norm": 0.6729386336424563, + "learning_rate": 6.072096820342133e-07, + "loss": 0.9221, + "step": 58980 + }, + { + "epoch": 0.8897704304805575, + "grad_norm": 0.7002915845524245, + "learning_rate": 6.055682637244232e-07, + "loss": 0.943, + "step": 58990 + }, + { + "epoch": 0.8899212645932004, + "grad_norm": 0.6650944738261263, + "learning_rate": 6.039289976971252e-07, + "loss": 0.9536, + "step": 59000 + }, + { + "epoch": 0.8900720987058434, + "grad_norm": 0.6792408244884702, + "learning_rate": 6.022918843278769e-07, + "loss": 0.9575, + "step": 59010 + }, + { + "epoch": 0.8902229328184862, + "grad_norm": 0.6790976319418552, + "learning_rate": 6.00656923991747e-07, + "loss": 0.9469, + "step": 59020 + }, + { + "epoch": 0.8903737669311291, + "grad_norm": 0.6644622764540727, + "learning_rate": 5.99024117063306e-07, + "loss": 0.9496, + "step": 59030 + }, + { + "epoch": 0.8905246010437721, + "grad_norm": 0.7502487389824881, + "learning_rate": 5.973934639166334e-07, + "loss": 0.9282, + "step": 59040 + }, + { + "epoch": 0.890675435156415, + "grad_norm": 0.6859087498823098, + "learning_rate": 5.957649649253173e-07, + "loss": 0.9502, + "step": 59050 + }, + { + "epoch": 0.8908262692690578, + "grad_norm": 0.6721299184872811, + "learning_rate": 5.941386204624478e-07, + "loss": 0.9507, + "step": 59060 + }, + { + "epoch": 0.8909771033817008, + "grad_norm": 0.7033714162444271, + "learning_rate": 5.925144309006236e-07, + "loss": 0.9526, + "step": 59070 + }, + { + "epoch": 0.8911279374943437, + "grad_norm": 0.6651712998990563, + "learning_rate": 5.908923966119517e-07, + "loss": 0.9377, + "step": 59080 + }, + { + "epoch": 0.8912787716069867, + "grad_norm": 0.6594540317449978, + "learning_rate": 5.892725179680392e-07, + "loss": 0.9307, + "step": 59090 + }, + { + "epoch": 0.8914296057196296, + "grad_norm": 0.6506943888193903, + "learning_rate": 5.87654795340008e-07, + "loss": 0.9496, + "step": 59100 + }, + { + "epoch": 0.8915804398322724, + "grad_norm": 0.678422035636427, + "learning_rate": 5.860392290984795e-07, + "loss": 0.9375, + "step": 59110 + }, + { + "epoch": 0.8917312739449154, + "grad_norm": 0.6684091430954888, + "learning_rate": 5.844258196135811e-07, + "loss": 0.9531, + "step": 59120 + }, + { + "epoch": 0.8918821080575583, + "grad_norm": 0.6680447793393461, + "learning_rate": 5.828145672549513e-07, + "loss": 0.9526, + "step": 59130 + }, + { + "epoch": 0.8920329421702012, + "grad_norm": 0.6496822586959897, + "learning_rate": 5.812054723917304e-07, + "loss": 0.9459, + "step": 59140 + }, + { + "epoch": 0.8921837762828442, + "grad_norm": 0.6833206020371667, + "learning_rate": 5.795985353925626e-07, + "loss": 0.9732, + "step": 59150 + }, + { + "epoch": 0.892334610395487, + "grad_norm": 0.649163975322639, + "learning_rate": 5.779937566256033e-07, + "loss": 0.9416, + "step": 59160 + }, + { + "epoch": 0.8924854445081299, + "grad_norm": 0.6459523195433929, + "learning_rate": 5.763911364585084e-07, + "loss": 0.9491, + "step": 59170 + }, + { + "epoch": 0.8926362786207729, + "grad_norm": 0.661371386752771, + "learning_rate": 5.747906752584431e-07, + "loss": 0.9713, + "step": 59180 + }, + { + "epoch": 0.8927871127334158, + "grad_norm": 0.6682431733366905, + "learning_rate": 5.731923733920741e-07, + "loss": 0.9582, + "step": 59190 + }, + { + "epoch": 0.8929379468460586, + "grad_norm": 0.6590121178102183, + "learning_rate": 5.715962312255785e-07, + "loss": 0.9463, + "step": 59200 + }, + { + "epoch": 0.8930887809587016, + "grad_norm": 0.6965332236711066, + "learning_rate": 5.700022491246315e-07, + "loss": 0.9597, + "step": 59210 + }, + { + "epoch": 0.8932396150713445, + "grad_norm": 0.669719547070079, + "learning_rate": 5.684104274544211e-07, + "loss": 0.9656, + "step": 59220 + }, + { + "epoch": 0.8933904491839875, + "grad_norm": 0.6645559221774875, + "learning_rate": 5.668207665796333e-07, + "loss": 0.946, + "step": 59230 + }, + { + "epoch": 0.8935412832966304, + "grad_norm": 0.662846266113567, + "learning_rate": 5.652332668644667e-07, + "loss": 0.9469, + "step": 59240 + }, + { + "epoch": 0.8936921174092732, + "grad_norm": 0.6512891903783331, + "learning_rate": 5.636479286726182e-07, + "loss": 0.9539, + "step": 59250 + }, + { + "epoch": 0.8938429515219162, + "grad_norm": 0.6985751487383829, + "learning_rate": 5.620647523672907e-07, + "loss": 0.917, + "step": 59260 + }, + { + "epoch": 0.8939937856345591, + "grad_norm": 0.698250527034424, + "learning_rate": 5.60483738311195e-07, + "loss": 0.9339, + "step": 59270 + }, + { + "epoch": 0.894144619747202, + "grad_norm": 0.6390601217197178, + "learning_rate": 5.589048868665436e-07, + "loss": 0.9429, + "step": 59280 + }, + { + "epoch": 0.894295453859845, + "grad_norm": 0.6716147574296777, + "learning_rate": 5.57328198395054e-07, + "loss": 0.9401, + "step": 59290 + }, + { + "epoch": 0.8944462879724878, + "grad_norm": 0.6628404239748825, + "learning_rate": 5.557536732579494e-07, + "loss": 0.9451, + "step": 59300 + }, + { + "epoch": 0.8945971220851308, + "grad_norm": 0.662232116973523, + "learning_rate": 5.541813118159556e-07, + "loss": 0.9528, + "step": 59310 + }, + { + "epoch": 0.8947479561977737, + "grad_norm": 0.6626117201177466, + "learning_rate": 5.526111144293056e-07, + "loss": 0.9513, + "step": 59320 + }, + { + "epoch": 0.8948987903104166, + "grad_norm": 0.7457670034633456, + "learning_rate": 5.510430814577328e-07, + "loss": 0.9483, + "step": 59330 + }, + { + "epoch": 0.8950496244230595, + "grad_norm": 0.6789999970029715, + "learning_rate": 5.494772132604754e-07, + "loss": 0.9485, + "step": 59340 + }, + { + "epoch": 0.8952004585357024, + "grad_norm": 0.6602968072089227, + "learning_rate": 5.479135101962807e-07, + "loss": 0.9368, + "step": 59350 + }, + { + "epoch": 0.8953512926483453, + "grad_norm": 0.6893914760144835, + "learning_rate": 5.463519726233923e-07, + "loss": 0.9323, + "step": 59360 + }, + { + "epoch": 0.8955021267609883, + "grad_norm": 0.6721570804274681, + "learning_rate": 5.447926008995652e-07, + "loss": 0.9357, + "step": 59370 + }, + { + "epoch": 0.8956529608736312, + "grad_norm": 0.6644077538148492, + "learning_rate": 5.432353953820502e-07, + "loss": 0.9734, + "step": 59380 + }, + { + "epoch": 0.895803794986274, + "grad_norm": 0.665149680853523, + "learning_rate": 5.416803564276108e-07, + "loss": 0.9425, + "step": 59390 + }, + { + "epoch": 0.895954629098917, + "grad_norm": 0.6614980382762733, + "learning_rate": 5.401274843925041e-07, + "loss": 0.9391, + "step": 59400 + }, + { + "epoch": 0.8961054632115599, + "grad_norm": 0.665639716286186, + "learning_rate": 5.385767796325015e-07, + "loss": 0.9478, + "step": 59410 + }, + { + "epoch": 0.8962562973242029, + "grad_norm": 0.647226692066415, + "learning_rate": 5.370282425028694e-07, + "loss": 0.9456, + "step": 59420 + }, + { + "epoch": 0.8964071314368458, + "grad_norm": 0.6543618559695656, + "learning_rate": 5.354818733583789e-07, + "loss": 0.9441, + "step": 59430 + }, + { + "epoch": 0.8965579655494886, + "grad_norm": 0.6877435095638929, + "learning_rate": 5.339376725533108e-07, + "loss": 0.9585, + "step": 59440 + }, + { + "epoch": 0.8967087996621316, + "grad_norm": 0.6658409853800882, + "learning_rate": 5.32395640441441e-07, + "loss": 0.9466, + "step": 59450 + }, + { + "epoch": 0.8968596337747745, + "grad_norm": 0.674663831012486, + "learning_rate": 5.308557773760537e-07, + "loss": 0.9318, + "step": 59460 + }, + { + "epoch": 0.8970104678874175, + "grad_norm": 0.6457135577328363, + "learning_rate": 5.293180837099343e-07, + "loss": 0.9466, + "step": 59470 + }, + { + "epoch": 0.8971613020000603, + "grad_norm": 0.663278776862415, + "learning_rate": 5.277825597953701e-07, + "loss": 0.9453, + "step": 59480 + }, + { + "epoch": 0.8973121361127032, + "grad_norm": 0.6556009758005604, + "learning_rate": 5.262492059841551e-07, + "loss": 0.9379, + "step": 59490 + }, + { + "epoch": 0.8974629702253462, + "grad_norm": 0.646784641046921, + "learning_rate": 5.247180226275816e-07, + "loss": 0.9586, + "step": 59500 + }, + { + "epoch": 0.8976138043379891, + "grad_norm": 0.6472340669358319, + "learning_rate": 5.231890100764447e-07, + "loss": 0.9582, + "step": 59510 + }, + { + "epoch": 0.897764638450632, + "grad_norm": 0.6623806987049894, + "learning_rate": 5.216621686810496e-07, + "loss": 0.9401, + "step": 59520 + }, + { + "epoch": 0.8979154725632749, + "grad_norm": 0.6812504378348105, + "learning_rate": 5.201374987911933e-07, + "loss": 0.9301, + "step": 59530 + }, + { + "epoch": 0.8980663066759178, + "grad_norm": 0.651102982907012, + "learning_rate": 5.186150007561829e-07, + "loss": 0.9408, + "step": 59540 + }, + { + "epoch": 0.8982171407885607, + "grad_norm": 0.7284169901353955, + "learning_rate": 5.170946749248284e-07, + "loss": 0.9347, + "step": 59550 + }, + { + "epoch": 0.8983679749012037, + "grad_norm": 0.6658127320066456, + "learning_rate": 5.155765216454333e-07, + "loss": 0.9378, + "step": 59560 + }, + { + "epoch": 0.8985188090138466, + "grad_norm": 0.6696395555430031, + "learning_rate": 5.140605412658162e-07, + "loss": 0.9468, + "step": 59570 + }, + { + "epoch": 0.8986696431264894, + "grad_norm": 0.6682730804388433, + "learning_rate": 5.125467341332857e-07, + "loss": 0.9363, + "step": 59580 + }, + { + "epoch": 0.8988204772391324, + "grad_norm": 0.6968376718268906, + "learning_rate": 5.110351005946601e-07, + "loss": 0.9443, + "step": 59590 + }, + { + "epoch": 0.8989713113517753, + "grad_norm": 0.6889072341145208, + "learning_rate": 5.09525640996259e-07, + "loss": 0.9445, + "step": 59600 + }, + { + "epoch": 0.8991221454644183, + "grad_norm": 0.6662865408413904, + "learning_rate": 5.080183556839003e-07, + "loss": 0.9533, + "step": 59610 + }, + { + "epoch": 0.8992729795770611, + "grad_norm": 0.6757625865154233, + "learning_rate": 5.065132450029053e-07, + "loss": 0.9628, + "step": 59620 + }, + { + "epoch": 0.899423813689704, + "grad_norm": 0.6582729158987398, + "learning_rate": 5.050103092981007e-07, + "loss": 0.9466, + "step": 59630 + }, + { + "epoch": 0.899574647802347, + "grad_norm": 0.6862016154441446, + "learning_rate": 5.035095489138098e-07, + "loss": 0.9402, + "step": 59640 + }, + { + "epoch": 0.8997254819149899, + "grad_norm": 0.699272505289389, + "learning_rate": 5.020109641938598e-07, + "loss": 0.9302, + "step": 59650 + }, + { + "epoch": 0.8998763160276328, + "grad_norm": 0.6582701542426185, + "learning_rate": 5.005145554815805e-07, + "loss": 0.9421, + "step": 59660 + }, + { + "epoch": 0.9000271501402757, + "grad_norm": 0.6785296302999839, + "learning_rate": 4.99020323119802e-07, + "loss": 0.94, + "step": 59670 + }, + { + "epoch": 0.9001779842529186, + "grad_norm": 0.6663406719344344, + "learning_rate": 4.97528267450853e-07, + "loss": 0.9513, + "step": 59680 + }, + { + "epoch": 0.9003288183655616, + "grad_norm": 0.6654277409715077, + "learning_rate": 4.960383888165698e-07, + "loss": 0.9347, + "step": 59690 + }, + { + "epoch": 0.9004796524782045, + "grad_norm": 0.6648616572980931, + "learning_rate": 4.945506875582861e-07, + "loss": 0.9674, + "step": 59700 + }, + { + "epoch": 0.9006304865908474, + "grad_norm": 0.6557522920057721, + "learning_rate": 4.930651640168349e-07, + "loss": 0.9426, + "step": 59710 + }, + { + "epoch": 0.9007813207034903, + "grad_norm": 0.6384631659447466, + "learning_rate": 4.915818185325549e-07, + "loss": 0.9452, + "step": 59720 + }, + { + "epoch": 0.9009321548161332, + "grad_norm": 0.6922847342107538, + "learning_rate": 4.901006514452822e-07, + "loss": 0.9533, + "step": 59730 + }, + { + "epoch": 0.9010829889287761, + "grad_norm": 0.6679775285942029, + "learning_rate": 4.886216630943552e-07, + "loss": 0.9506, + "step": 59740 + }, + { + "epoch": 0.9012338230414191, + "grad_norm": 0.6586446174184958, + "learning_rate": 4.871448538186141e-07, + "loss": 0.9605, + "step": 59750 + }, + { + "epoch": 0.9013846571540619, + "grad_norm": 0.7126510007093894, + "learning_rate": 4.85670223956396e-07, + "loss": 0.9394, + "step": 59760 + }, + { + "epoch": 0.9015354912667048, + "grad_norm": 0.6854544312893954, + "learning_rate": 4.841977738455451e-07, + "loss": 0.9453, + "step": 59770 + }, + { + "epoch": 0.9016863253793478, + "grad_norm": 0.6379044851556657, + "learning_rate": 4.827275038234003e-07, + "loss": 0.9339, + "step": 59780 + }, + { + "epoch": 0.9018371594919907, + "grad_norm": 0.6635542730372117, + "learning_rate": 4.812594142268023e-07, + "loss": 0.9536, + "step": 59790 + }, + { + "epoch": 0.9019879936046337, + "grad_norm": 0.6573040236569566, + "learning_rate": 4.797935053920955e-07, + "loss": 0.946, + "step": 59800 + }, + { + "epoch": 0.9021388277172765, + "grad_norm": 0.6692716573235473, + "learning_rate": 4.783297776551221e-07, + "loss": 0.9719, + "step": 59810 + }, + { + "epoch": 0.9022896618299194, + "grad_norm": 0.6617242946481038, + "learning_rate": 4.76868231351223e-07, + "loss": 0.9495, + "step": 59820 + }, + { + "epoch": 0.9024404959425624, + "grad_norm": 0.6577512542664069, + "learning_rate": 4.7540886681524455e-07, + "loss": 0.9394, + "step": 59830 + }, + { + "epoch": 0.9025913300552053, + "grad_norm": 0.6880834892897189, + "learning_rate": 4.739516843815262e-07, + "loss": 0.951, + "step": 59840 + }, + { + "epoch": 0.9027421641678482, + "grad_norm": 0.6417683076461944, + "learning_rate": 4.7249668438391516e-07, + "loss": 0.9441, + "step": 59850 + }, + { + "epoch": 0.9028929982804911, + "grad_norm": 0.6711070215651297, + "learning_rate": 4.7104386715575047e-07, + "loss": 0.9376, + "step": 59860 + }, + { + "epoch": 0.903043832393134, + "grad_norm": 0.6327675562764447, + "learning_rate": 4.6959323302988024e-07, + "loss": 0.9561, + "step": 59870 + }, + { + "epoch": 0.903194666505777, + "grad_norm": 0.6864162273335322, + "learning_rate": 4.6814478233864313e-07, + "loss": 0.9652, + "step": 59880 + }, + { + "epoch": 0.9033455006184199, + "grad_norm": 0.6904115238723454, + "learning_rate": 4.666985154138848e-07, + "loss": 0.9411, + "step": 59890 + }, + { + "epoch": 0.9034963347310627, + "grad_norm": 0.6660890947449499, + "learning_rate": 4.652544325869457e-07, + "loss": 0.9532, + "step": 59900 + }, + { + "epoch": 0.9036471688437057, + "grad_norm": 0.6708698985638235, + "learning_rate": 4.6381253418867124e-07, + "loss": 0.9438, + "step": 59910 + }, + { + "epoch": 0.9037980029563486, + "grad_norm": 0.6857997182626311, + "learning_rate": 4.623728205494005e-07, + "loss": 0.947, + "step": 59920 + }, + { + "epoch": 0.9039488370689915, + "grad_norm": 0.6900743100118615, + "learning_rate": 4.609352919989751e-07, + "loss": 0.9716, + "step": 59930 + }, + { + "epoch": 0.9040996711816345, + "grad_norm": 0.6709180363325397, + "learning_rate": 4.5949994886673623e-07, + "loss": 0.9499, + "step": 59940 + }, + { + "epoch": 0.9042505052942773, + "grad_norm": 0.6544519607462285, + "learning_rate": 4.580667914815229e-07, + "loss": 0.9459, + "step": 59950 + }, + { + "epoch": 0.9044013394069202, + "grad_norm": 0.6913306248314633, + "learning_rate": 4.5663582017167364e-07, + "loss": 0.9441, + "step": 59960 + }, + { + "epoch": 0.9045521735195632, + "grad_norm": 0.6981635284477667, + "learning_rate": 4.552070352650295e-07, + "loss": 0.9484, + "step": 59970 + }, + { + "epoch": 0.9047030076322061, + "grad_norm": 0.71304533741561, + "learning_rate": 4.537804370889243e-07, + "loss": 0.9564, + "step": 59980 + }, + { + "epoch": 0.9048538417448491, + "grad_norm": 0.6823062000656982, + "learning_rate": 4.5235602597019756e-07, + "loss": 0.9655, + "step": 59990 + }, + { + "epoch": 0.9050046758574919, + "grad_norm": 0.6316799013352011, + "learning_rate": 4.509338022351839e-07, + "loss": 0.9498, + "step": 60000 + }, + { + "epoch": 0.9051555099701348, + "grad_norm": 0.6755563646727502, + "learning_rate": 4.495137662097149e-07, + "loss": 0.9368, + "step": 60010 + }, + { + "epoch": 0.9053063440827778, + "grad_norm": 0.6492709948070042, + "learning_rate": 4.48095918219128e-07, + "loss": 0.9411, + "step": 60020 + }, + { + "epoch": 0.9054571781954207, + "grad_norm": 0.6515632429334255, + "learning_rate": 4.4668025858825015e-07, + "loss": 0.9603, + "step": 60030 + }, + { + "epoch": 0.9056080123080635, + "grad_norm": 0.6877378928476958, + "learning_rate": 4.452667876414152e-07, + "loss": 0.9664, + "step": 60040 + }, + { + "epoch": 0.9057588464207065, + "grad_norm": 0.6538372579257608, + "learning_rate": 4.4385550570245184e-07, + "loss": 0.9459, + "step": 60050 + }, + { + "epoch": 0.9059096805333494, + "grad_norm": 0.6824846157695822, + "learning_rate": 4.4244641309468595e-07, + "loss": 0.9518, + "step": 60060 + }, + { + "epoch": 0.9060605146459924, + "grad_norm": 0.6867049174865285, + "learning_rate": 4.410395101409437e-07, + "loss": 0.9523, + "step": 60070 + }, + { + "epoch": 0.9062113487586353, + "grad_norm": 0.6630720733991473, + "learning_rate": 4.3963479716355064e-07, + "loss": 0.9578, + "step": 60080 + }, + { + "epoch": 0.9063621828712781, + "grad_norm": 0.6582538640413695, + "learning_rate": 4.3823227448432816e-07, + "loss": 0.9513, + "step": 60090 + }, + { + "epoch": 0.9065130169839211, + "grad_norm": 0.7857680158143965, + "learning_rate": 4.3683194242459594e-07, + "loss": 0.955, + "step": 60100 + }, + { + "epoch": 0.906663851096564, + "grad_norm": 0.6392797391155058, + "learning_rate": 4.354338013051751e-07, + "loss": 0.9645, + "step": 60110 + }, + { + "epoch": 0.9068146852092069, + "grad_norm": 0.6813829564906126, + "learning_rate": 4.3403785144638057e-07, + "loss": 0.9465, + "step": 60120 + }, + { + "epoch": 0.9069655193218499, + "grad_norm": 0.6570599496820049, + "learning_rate": 4.326440931680276e-07, + "loss": 0.9413, + "step": 60130 + }, + { + "epoch": 0.9071163534344927, + "grad_norm": 0.6441434585727083, + "learning_rate": 4.3125252678943074e-07, + "loss": 0.952, + "step": 60140 + }, + { + "epoch": 0.9072671875471356, + "grad_norm": 0.6604164754055137, + "learning_rate": 4.2986315262939615e-07, + "loss": 0.9572, + "step": 60150 + }, + { + "epoch": 0.9074180216597786, + "grad_norm": 0.6603409327161754, + "learning_rate": 4.2847597100623695e-07, + "loss": 0.9517, + "step": 60160 + }, + { + "epoch": 0.9075688557724215, + "grad_norm": 0.6841181865056705, + "learning_rate": 4.270909822377567e-07, + "loss": 0.9464, + "step": 60170 + }, + { + "epoch": 0.9077196898850644, + "grad_norm": 0.6727803842639952, + "learning_rate": 4.2570818664125826e-07, + "loss": 0.9619, + "step": 60180 + }, + { + "epoch": 0.9078705239977073, + "grad_norm": 0.7041254446580443, + "learning_rate": 4.2432758453354485e-07, + "loss": 0.9442, + "step": 60190 + }, + { + "epoch": 0.9080213581103502, + "grad_norm": 0.6381496311704098, + "learning_rate": 4.2294917623091456e-07, + "loss": 0.9479, + "step": 60200 + }, + { + "epoch": 0.9081721922229932, + "grad_norm": 0.6515065078929849, + "learning_rate": 4.2157296204916156e-07, + "loss": 0.9612, + "step": 60210 + }, + { + "epoch": 0.9083230263356361, + "grad_norm": 0.6140484995809826, + "learning_rate": 4.2019894230358237e-07, + "loss": 0.9453, + "step": 60220 + }, + { + "epoch": 0.9084738604482789, + "grad_norm": 0.6941782621126197, + "learning_rate": 4.188271173089653e-07, + "loss": 0.9456, + "step": 60230 + }, + { + "epoch": 0.9086246945609219, + "grad_norm": 0.6632812709017865, + "learning_rate": 4.174574873795978e-07, + "loss": 0.9414, + "step": 60240 + }, + { + "epoch": 0.9087755286735648, + "grad_norm": 0.6351733201219206, + "learning_rate": 4.1609005282926664e-07, + "loss": 0.9411, + "step": 60250 + }, + { + "epoch": 0.9089263627862078, + "grad_norm": 0.6786611814735412, + "learning_rate": 4.147248139712512e-07, + "loss": 0.9555, + "step": 60260 + }, + { + "epoch": 0.9090771968988507, + "grad_norm": 0.6952439359931558, + "learning_rate": 4.133617711183324e-07, + "loss": 0.9457, + "step": 60270 + }, + { + "epoch": 0.9092280310114935, + "grad_norm": 0.6544149799895491, + "learning_rate": 4.120009245827872e-07, + "loss": 0.9495, + "step": 60280 + }, + { + "epoch": 0.9093788651241365, + "grad_norm": 0.6857938712230822, + "learning_rate": 4.1064227467638383e-07, + "loss": 0.9709, + "step": 60290 + }, + { + "epoch": 0.9095296992367794, + "grad_norm": 0.6332464829264175, + "learning_rate": 4.0928582171039564e-07, + "loss": 0.9571, + "step": 60300 + }, + { + "epoch": 0.9096805333494223, + "grad_norm": 0.6989116309856889, + "learning_rate": 4.0793156599558737e-07, + "loss": 0.951, + "step": 60310 + }, + { + "epoch": 0.9098313674620652, + "grad_norm": 0.6762641391323548, + "learning_rate": 4.0657950784221967e-07, + "loss": 0.953, + "step": 60320 + }, + { + "epoch": 0.9099822015747081, + "grad_norm": 0.641849704109911, + "learning_rate": 4.0522964756005477e-07, + "loss": 0.9437, + "step": 60330 + }, + { + "epoch": 0.910133035687351, + "grad_norm": 0.7005593559790686, + "learning_rate": 4.0388198545834646e-07, + "loss": 0.9358, + "step": 60340 + }, + { + "epoch": 0.910283869799994, + "grad_norm": 0.6699046844929475, + "learning_rate": 4.0253652184584655e-07, + "loss": 0.9266, + "step": 60350 + }, + { + "epoch": 0.9104347039126369, + "grad_norm": 0.6551639793241637, + "learning_rate": 4.011932570308041e-07, + "loss": 0.9512, + "step": 60360 + }, + { + "epoch": 0.9105855380252798, + "grad_norm": 0.6968644520368859, + "learning_rate": 3.998521913209652e-07, + "loss": 0.9539, + "step": 60370 + }, + { + "epoch": 0.9107363721379227, + "grad_norm": 0.6359874085332141, + "learning_rate": 3.9851332502356623e-07, + "loss": 0.9619, + "step": 60380 + }, + { + "epoch": 0.9108872062505656, + "grad_norm": 0.636244747615031, + "learning_rate": 3.971766584453496e-07, + "loss": 0.9439, + "step": 60390 + }, + { + "epoch": 0.9110380403632086, + "grad_norm": 0.6729143924592879, + "learning_rate": 3.958421918925437e-07, + "loss": 0.9541, + "step": 60400 + }, + { + "epoch": 0.9111888744758515, + "grad_norm": 0.6567425287642528, + "learning_rate": 3.945099256708795e-07, + "loss": 0.938, + "step": 60410 + }, + { + "epoch": 0.9113397085884943, + "grad_norm": 0.6628731653476942, + "learning_rate": 3.931798600855818e-07, + "loss": 0.9347, + "step": 60420 + }, + { + "epoch": 0.9114905427011373, + "grad_norm": 0.6716125782133451, + "learning_rate": 3.918519954413702e-07, + "loss": 0.9613, + "step": 60430 + }, + { + "epoch": 0.9116413768137802, + "grad_norm": 0.663775176434463, + "learning_rate": 3.9052633204246237e-07, + "loss": 0.947, + "step": 60440 + }, + { + "epoch": 0.9117922109264232, + "grad_norm": 0.6428712234473903, + "learning_rate": 3.8920287019256987e-07, + "loss": 0.9431, + "step": 60450 + }, + { + "epoch": 0.911943045039066, + "grad_norm": 0.6956436443552032, + "learning_rate": 3.878816101948979e-07, + "loss": 0.9595, + "step": 60460 + }, + { + "epoch": 0.9120938791517089, + "grad_norm": 0.6378152865794612, + "learning_rate": 3.8656255235215435e-07, + "loss": 0.9638, + "step": 60470 + }, + { + "epoch": 0.9122447132643519, + "grad_norm": 0.7224589962976008, + "learning_rate": 3.8524569696653526e-07, + "loss": 0.9536, + "step": 60480 + }, + { + "epoch": 0.9123955473769948, + "grad_norm": 0.67550705836605, + "learning_rate": 3.839310443397326e-07, + "loss": 0.9478, + "step": 60490 + }, + { + "epoch": 0.9125463814896377, + "grad_norm": 0.6589592524722606, + "learning_rate": 3.8261859477294107e-07, + "loss": 0.9482, + "step": 60500 + }, + { + "epoch": 0.9126972156022806, + "grad_norm": 0.6590801694571661, + "learning_rate": 3.8130834856684007e-07, + "loss": 0.9622, + "step": 60510 + }, + { + "epoch": 0.9128480497149235, + "grad_norm": 0.6480930253903755, + "learning_rate": 3.8000030602161396e-07, + "loss": 0.9442, + "step": 60520 + }, + { + "epoch": 0.9129988838275664, + "grad_norm": 0.6344556132708021, + "learning_rate": 3.786944674369342e-07, + "loss": 0.9499, + "step": 60530 + }, + { + "epoch": 0.9131497179402094, + "grad_norm": 0.6987765595174555, + "learning_rate": 3.7739083311197243e-07, + "loss": 0.9411, + "step": 60540 + }, + { + "epoch": 0.9133005520528523, + "grad_norm": 0.6369859927661305, + "learning_rate": 3.7608940334539545e-07, + "loss": 0.9489, + "step": 60550 + }, + { + "epoch": 0.9134513861654951, + "grad_norm": 0.6617675482750287, + "learning_rate": 3.747901784353614e-07, + "loss": 0.9526, + "step": 60560 + }, + { + "epoch": 0.9136022202781381, + "grad_norm": 0.6635323914390274, + "learning_rate": 3.734931586795254e-07, + "loss": 0.9724, + "step": 60570 + }, + { + "epoch": 0.913753054390781, + "grad_norm": 0.6502743910113841, + "learning_rate": 3.7219834437503764e-07, + "loss": 0.9346, + "step": 60580 + }, + { + "epoch": 0.913903888503424, + "grad_norm": 0.6646557921067416, + "learning_rate": 3.7090573581854306e-07, + "loss": 0.9553, + "step": 60590 + }, + { + "epoch": 0.9140547226160668, + "grad_norm": 0.6671616757074863, + "learning_rate": 3.696153333061792e-07, + "loss": 0.9424, + "step": 60600 + }, + { + "epoch": 0.9142055567287097, + "grad_norm": 0.6778305088401017, + "learning_rate": 3.683271371335817e-07, + "loss": 0.9429, + "step": 60610 + }, + { + "epoch": 0.9143563908413527, + "grad_norm": 0.6880820722320776, + "learning_rate": 3.6704114759587684e-07, + "loss": 0.944, + "step": 60620 + }, + { + "epoch": 0.9145072249539956, + "grad_norm": 0.7073840929375073, + "learning_rate": 3.657573649876878e-07, + "loss": 0.9345, + "step": 60630 + }, + { + "epoch": 0.9146580590666386, + "grad_norm": 0.6500975754127666, + "learning_rate": 3.644757896031337e-07, + "loss": 0.9547, + "step": 60640 + }, + { + "epoch": 0.9148088931792814, + "grad_norm": 0.6407288454974052, + "learning_rate": 3.6319642173582304e-07, + "loss": 0.9495, + "step": 60650 + }, + { + "epoch": 0.9149597272919243, + "grad_norm": 0.6989722327274674, + "learning_rate": 3.619192616788636e-07, + "loss": 0.9517, + "step": 60660 + }, + { + "epoch": 0.9151105614045673, + "grad_norm": 0.6819258518440665, + "learning_rate": 3.6064430972485463e-07, + "loss": 0.9462, + "step": 60670 + }, + { + "epoch": 0.9152613955172102, + "grad_norm": 0.6493834817814208, + "learning_rate": 3.593715661658881e-07, + "loss": 0.9512, + "step": 60680 + }, + { + "epoch": 0.9154122296298531, + "grad_norm": 0.670509450471623, + "learning_rate": 3.581010312935551e-07, + "loss": 0.9527, + "step": 60690 + }, + { + "epoch": 0.915563063742496, + "grad_norm": 0.6989080619268744, + "learning_rate": 3.5683270539893623e-07, + "loss": 0.9507, + "step": 60700 + }, + { + "epoch": 0.9157138978551389, + "grad_norm": 0.6764268603332717, + "learning_rate": 3.5556658877260677e-07, + "loss": 0.9343, + "step": 60710 + }, + { + "epoch": 0.9158647319677818, + "grad_norm": 0.6531014353762925, + "learning_rate": 3.54302681704638e-07, + "loss": 0.9389, + "step": 60720 + }, + { + "epoch": 0.9160155660804248, + "grad_norm": 0.678739909974198, + "learning_rate": 3.530409844845928e-07, + "loss": 0.953, + "step": 60730 + }, + { + "epoch": 0.9161664001930676, + "grad_norm": 0.6749018033323992, + "learning_rate": 3.517814974015277e-07, + "loss": 0.9552, + "step": 60740 + }, + { + "epoch": 0.9163172343057105, + "grad_norm": 0.6972309683698968, + "learning_rate": 3.5052422074399515e-07, + "loss": 0.9365, + "step": 60750 + }, + { + "epoch": 0.9164680684183535, + "grad_norm": 0.6982174290264476, + "learning_rate": 3.492691548000393e-07, + "loss": 0.9364, + "step": 60760 + }, + { + "epoch": 0.9166189025309964, + "grad_norm": 0.7152257451392989, + "learning_rate": 3.4801629985719563e-07, + "loss": 0.959, + "step": 60770 + }, + { + "epoch": 0.9167697366436394, + "grad_norm": 0.6660674805400408, + "learning_rate": 3.4676565620250016e-07, + "loss": 0.9546, + "step": 60780 + }, + { + "epoch": 0.9169205707562822, + "grad_norm": 0.6545355339844519, + "learning_rate": 3.4551722412247247e-07, + "loss": 0.941, + "step": 60790 + }, + { + "epoch": 0.9170714048689251, + "grad_norm": 0.6459537120815686, + "learning_rate": 3.4427100390313605e-07, + "loss": 0.9394, + "step": 60800 + }, + { + "epoch": 0.9172222389815681, + "grad_norm": 0.6754736728379382, + "learning_rate": 3.430269958299992e-07, + "loss": 0.9288, + "step": 60810 + }, + { + "epoch": 0.917373073094211, + "grad_norm": 0.6558714408200788, + "learning_rate": 3.4178520018806614e-07, + "loss": 0.9535, + "step": 60820 + }, + { + "epoch": 0.917523907206854, + "grad_norm": 0.6431262799275912, + "learning_rate": 3.405456172618371e-07, + "loss": 0.9317, + "step": 60830 + }, + { + "epoch": 0.9176747413194968, + "grad_norm": 0.6536485978078137, + "learning_rate": 3.393082473353015e-07, + "loss": 0.9279, + "step": 60840 + }, + { + "epoch": 0.9178255754321397, + "grad_norm": 0.6528550062080175, + "learning_rate": 3.380730906919427e-07, + "loss": 0.9529, + "step": 60850 + }, + { + "epoch": 0.9179764095447827, + "grad_norm": 0.6434817497466413, + "learning_rate": 3.368401476147376e-07, + "loss": 0.9347, + "step": 60860 + }, + { + "epoch": 0.9181272436574256, + "grad_norm": 0.6784575208218615, + "learning_rate": 3.356094183861569e-07, + "loss": 0.9418, + "step": 60870 + }, + { + "epoch": 0.9182780777700684, + "grad_norm": 0.6807806959908764, + "learning_rate": 3.343809032881629e-07, + "loss": 0.9604, + "step": 60880 + }, + { + "epoch": 0.9184289118827114, + "grad_norm": 0.6554009742168423, + "learning_rate": 3.331546026022103e-07, + "loss": 0.9608, + "step": 60890 + }, + { + "epoch": 0.9185797459953543, + "grad_norm": 0.6564207448172898, + "learning_rate": 3.319305166092468e-07, + "loss": 0.9635, + "step": 60900 + }, + { + "epoch": 0.9187305801079972, + "grad_norm": 0.6646315728834168, + "learning_rate": 3.3070864558971237e-07, + "loss": 0.9479, + "step": 60910 + }, + { + "epoch": 0.9188814142206402, + "grad_norm": 0.6609440255302601, + "learning_rate": 3.2948898982354203e-07, + "loss": 0.947, + "step": 60920 + }, + { + "epoch": 0.919032248333283, + "grad_norm": 0.6677301577531843, + "learning_rate": 3.2827154959015895e-07, + "loss": 0.942, + "step": 60930 + }, + { + "epoch": 0.919183082445926, + "grad_norm": 0.6525897083525775, + "learning_rate": 3.270563251684833e-07, + "loss": 0.9358, + "step": 60940 + }, + { + "epoch": 0.9193339165585689, + "grad_norm": 0.6773844977896692, + "learning_rate": 3.2584331683692347e-07, + "loss": 0.9499, + "step": 60950 + }, + { + "epoch": 0.9194847506712118, + "grad_norm": 0.6460236207604478, + "learning_rate": 3.246325248733817e-07, + "loss": 0.9316, + "step": 60960 + }, + { + "epoch": 0.9196355847838548, + "grad_norm": 0.6837941999687757, + "learning_rate": 3.234239495552549e-07, + "loss": 0.933, + "step": 60970 + }, + { + "epoch": 0.9197864188964976, + "grad_norm": 0.6389192685706769, + "learning_rate": 3.222175911594294e-07, + "loss": 0.9492, + "step": 60980 + }, + { + "epoch": 0.9199372530091405, + "grad_norm": 0.6506455088236109, + "learning_rate": 3.21013449962283e-07, + "loss": 0.961, + "step": 60990 + }, + { + "epoch": 0.9200880871217835, + "grad_norm": 0.6498897641334784, + "learning_rate": 3.198115262396873e-07, + "loss": 0.9463, + "step": 61000 + }, + { + "epoch": 0.9202389212344264, + "grad_norm": 0.6781598302037608, + "learning_rate": 3.186118202670052e-07, + "loss": 0.9504, + "step": 61010 + }, + { + "epoch": 0.9203897553470692, + "grad_norm": 0.6390136468710433, + "learning_rate": 3.174143323190926e-07, + "loss": 0.9319, + "step": 61020 + }, + { + "epoch": 0.9205405894597122, + "grad_norm": 0.6708309132299066, + "learning_rate": 3.1621906267029544e-07, + "loss": 0.9571, + "step": 61030 + }, + { + "epoch": 0.9206914235723551, + "grad_norm": 0.6401318995114615, + "learning_rate": 3.1502601159445256e-07, + "loss": 0.9414, + "step": 61040 + }, + { + "epoch": 0.9208422576849981, + "grad_norm": 0.6458059833233449, + "learning_rate": 3.1383517936489416e-07, + "loss": 0.9294, + "step": 61050 + }, + { + "epoch": 0.920993091797641, + "grad_norm": 0.6450817327247416, + "learning_rate": 3.126465662544431e-07, + "loss": 0.9267, + "step": 61060 + }, + { + "epoch": 0.9211439259102838, + "grad_norm": 0.6738772656591493, + "learning_rate": 3.114601725354105e-07, + "loss": 0.9533, + "step": 61070 + }, + { + "epoch": 0.9212947600229268, + "grad_norm": 0.6856754984771976, + "learning_rate": 3.1027599847960555e-07, + "loss": 0.9469, + "step": 61080 + }, + { + "epoch": 0.9214455941355697, + "grad_norm": 0.6544044785456492, + "learning_rate": 3.090940443583212e-07, + "loss": 0.9331, + "step": 61090 + }, + { + "epoch": 0.9215964282482126, + "grad_norm": 0.6556854729523568, + "learning_rate": 3.079143104423465e-07, + "loss": 0.9636, + "step": 61100 + }, + { + "epoch": 0.9217472623608556, + "grad_norm": 0.6804254076026197, + "learning_rate": 3.067367970019608e-07, + "loss": 0.9459, + "step": 61110 + }, + { + "epoch": 0.9218980964734984, + "grad_norm": 0.6348096489614284, + "learning_rate": 3.05561504306936e-07, + "loss": 0.9431, + "step": 61120 + }, + { + "epoch": 0.9220489305861413, + "grad_norm": 0.6336723811867445, + "learning_rate": 3.043884326265312e-07, + "loss": 0.9592, + "step": 61130 + }, + { + "epoch": 0.9221997646987843, + "grad_norm": 0.6713458021893481, + "learning_rate": 3.0321758222950136e-07, + "loss": 0.959, + "step": 61140 + }, + { + "epoch": 0.9223505988114272, + "grad_norm": 0.6994293137109311, + "learning_rate": 3.020489533840909e-07, + "loss": 0.9523, + "step": 61150 + }, + { + "epoch": 0.92250143292407, + "grad_norm": 0.6459575754689144, + "learning_rate": 3.0088254635803336e-07, + "loss": 0.9538, + "step": 61160 + }, + { + "epoch": 0.922652267036713, + "grad_norm": 0.6576791365324769, + "learning_rate": 2.9971836141855617e-07, + "loss": 0.944, + "step": 61170 + }, + { + "epoch": 0.9228031011493559, + "grad_norm": 0.6309253820066947, + "learning_rate": 2.9855639883237473e-07, + "loss": 0.9605, + "step": 61180 + }, + { + "epoch": 0.9229539352619989, + "grad_norm": 0.6606799737856721, + "learning_rate": 2.973966588656996e-07, + "loss": 0.9504, + "step": 61190 + }, + { + "epoch": 0.9231047693746418, + "grad_norm": 0.707670485657748, + "learning_rate": 2.9623914178422695e-07, + "loss": 0.9659, + "step": 61200 + }, + { + "epoch": 0.9232556034872846, + "grad_norm": 0.6618434201894139, + "learning_rate": 2.9508384785314593e-07, + "loss": 0.9336, + "step": 61210 + }, + { + "epoch": 0.9234064375999276, + "grad_norm": 0.6743083932982056, + "learning_rate": 2.9393077733714023e-07, + "loss": 0.9337, + "step": 61220 + }, + { + "epoch": 0.9235572717125705, + "grad_norm": 0.6661052836694249, + "learning_rate": 2.927799305003776e-07, + "loss": 0.9631, + "step": 61230 + }, + { + "epoch": 0.9237081058252135, + "grad_norm": 0.6547096231014864, + "learning_rate": 2.9163130760651916e-07, + "loss": 0.9495, + "step": 61240 + }, + { + "epoch": 0.9238589399378564, + "grad_norm": 0.6701536006098292, + "learning_rate": 2.9048490891871783e-07, + "loss": 0.9357, + "step": 61250 + }, + { + "epoch": 0.9240097740504992, + "grad_norm": 0.6593568060567546, + "learning_rate": 2.8934073469961463e-07, + "loss": 0.9327, + "step": 61260 + }, + { + "epoch": 0.9241606081631422, + "grad_norm": 0.642756244545332, + "learning_rate": 2.881987852113433e-07, + "loss": 0.9567, + "step": 61270 + }, + { + "epoch": 0.9243114422757851, + "grad_norm": 0.6554952740246268, + "learning_rate": 2.870590607155255e-07, + "loss": 0.9428, + "step": 61280 + }, + { + "epoch": 0.924462276388428, + "grad_norm": 0.657494533061588, + "learning_rate": 2.8592156147327575e-07, + "loss": 0.9447, + "step": 61290 + }, + { + "epoch": 0.9246131105010709, + "grad_norm": 0.6595557540471296, + "learning_rate": 2.847862877451957e-07, + "loss": 0.9612, + "step": 61300 + }, + { + "epoch": 0.9247639446137138, + "grad_norm": 0.6374188889314699, + "learning_rate": 2.8365323979137936e-07, + "loss": 0.9408, + "step": 61310 + }, + { + "epoch": 0.9249147787263567, + "grad_norm": 0.6486114296134498, + "learning_rate": 2.8252241787141034e-07, + "loss": 0.9634, + "step": 61320 + }, + { + "epoch": 0.9250656128389997, + "grad_norm": 0.688805200091639, + "learning_rate": 2.813938222443624e-07, + "loss": 0.968, + "step": 61330 + }, + { + "epoch": 0.9252164469516426, + "grad_norm": 0.6689997371615799, + "learning_rate": 2.802674531687999e-07, + "loss": 0.9528, + "step": 61340 + }, + { + "epoch": 0.9253672810642855, + "grad_norm": 0.6441179030088475, + "learning_rate": 2.7914331090277305e-07, + "loss": 0.9513, + "step": 61350 + }, + { + "epoch": 0.9255181151769284, + "grad_norm": 0.675923126206416, + "learning_rate": 2.780213957038269e-07, + "loss": 0.9402, + "step": 61360 + }, + { + "epoch": 0.9256689492895713, + "grad_norm": 0.6320827642167668, + "learning_rate": 2.7690170782899596e-07, + "loss": 0.9468, + "step": 61370 + }, + { + "epoch": 0.9258197834022143, + "grad_norm": 0.6646729993085533, + "learning_rate": 2.757842475348005e-07, + "loss": 0.9427, + "step": 61380 + }, + { + "epoch": 0.9259706175148572, + "grad_norm": 0.6867072387776226, + "learning_rate": 2.7466901507725463e-07, + "loss": 0.9548, + "step": 61390 + }, + { + "epoch": 0.9261214516275, + "grad_norm": 0.693670979473121, + "learning_rate": 2.7355601071185957e-07, + "loss": 0.9547, + "step": 61400 + }, + { + "epoch": 0.926272285740143, + "grad_norm": 0.664413763382124, + "learning_rate": 2.7244523469360463e-07, + "loss": 0.9323, + "step": 61410 + }, + { + "epoch": 0.9264231198527859, + "grad_norm": 0.6670852872937034, + "learning_rate": 2.7133668727697516e-07, + "loss": 0.9444, + "step": 61420 + }, + { + "epoch": 0.9265739539654289, + "grad_norm": 0.6297511119302022, + "learning_rate": 2.70230368715938e-07, + "loss": 0.927, + "step": 61430 + }, + { + "epoch": 0.9267247880780717, + "grad_norm": 0.6859029526889849, + "learning_rate": 2.6912627926395265e-07, + "loss": 0.9505, + "step": 61440 + }, + { + "epoch": 0.9268756221907146, + "grad_norm": 0.6675292081344352, + "learning_rate": 2.6802441917397117e-07, + "loss": 0.9441, + "step": 61450 + }, + { + "epoch": 0.9270264563033576, + "grad_norm": 0.6399318818356093, + "learning_rate": 2.669247886984294e-07, + "loss": 0.9399, + "step": 61460 + }, + { + "epoch": 0.9271772904160005, + "grad_norm": 0.6690528982996807, + "learning_rate": 2.658273880892559e-07, + "loss": 0.953, + "step": 61470 + }, + { + "epoch": 0.9273281245286434, + "grad_norm": 0.6584565755709096, + "learning_rate": 2.6473221759786615e-07, + "loss": 0.94, + "step": 61480 + }, + { + "epoch": 0.9274789586412863, + "grad_norm": 0.6624652383116393, + "learning_rate": 2.636392774751662e-07, + "loss": 0.9591, + "step": 61490 + }, + { + "epoch": 0.9276297927539292, + "grad_norm": 0.6640179829114193, + "learning_rate": 2.6254856797155226e-07, + "loss": 0.9514, + "step": 61500 + }, + { + "epoch": 0.9277806268665721, + "grad_norm": 0.7761014993691834, + "learning_rate": 2.614600893369068e-07, + "loss": 0.9551, + "step": 61510 + }, + { + "epoch": 0.9279314609792151, + "grad_norm": 0.6657389015084376, + "learning_rate": 2.6037384182060145e-07, + "loss": 0.9501, + "step": 61520 + }, + { + "epoch": 0.928082295091858, + "grad_norm": 0.6397739440188136, + "learning_rate": 2.5928982567149817e-07, + "loss": 0.9413, + "step": 61530 + }, + { + "epoch": 0.9282331292045009, + "grad_norm": 0.7042531434247118, + "learning_rate": 2.582080411379495e-07, + "loss": 0.9313, + "step": 61540 + }, + { + "epoch": 0.9283839633171438, + "grad_norm": 0.6765015311788625, + "learning_rate": 2.5712848846779046e-07, + "loss": 0.9573, + "step": 61550 + }, + { + "epoch": 0.9285347974297867, + "grad_norm": 0.6295032052713414, + "learning_rate": 2.5605116790835217e-07, + "loss": 0.9509, + "step": 61560 + }, + { + "epoch": 0.9286856315424297, + "grad_norm": 0.6950375644268658, + "learning_rate": 2.5497607970644934e-07, + "loss": 0.9582, + "step": 61570 + }, + { + "epoch": 0.9288364656550725, + "grad_norm": 0.6892329251271763, + "learning_rate": 2.5390322410838497e-07, + "loss": 0.9329, + "step": 61580 + }, + { + "epoch": 0.9289872997677154, + "grad_norm": 0.6774214405345398, + "learning_rate": 2.528326013599558e-07, + "loss": 0.9564, + "step": 61590 + }, + { + "epoch": 0.9291381338803584, + "grad_norm": 0.656974233265128, + "learning_rate": 2.5176421170644115e-07, + "loss": 0.9336, + "step": 61600 + }, + { + "epoch": 0.9292889679930013, + "grad_norm": 0.696388812022405, + "learning_rate": 2.506980553926119e-07, + "loss": 0.944, + "step": 61610 + }, + { + "epoch": 0.9294398021056443, + "grad_norm": 0.6696997609466788, + "learning_rate": 2.4963413266272715e-07, + "loss": 0.9422, + "step": 61620 + }, + { + "epoch": 0.9295906362182871, + "grad_norm": 0.7177795333985887, + "learning_rate": 2.485724437605319e-07, + "loss": 0.9576, + "step": 61630 + }, + { + "epoch": 0.92974147033093, + "grad_norm": 0.637202811553197, + "learning_rate": 2.4751298892926376e-07, + "loss": 0.963, + "step": 61640 + }, + { + "epoch": 0.929892304443573, + "grad_norm": 0.6561420599818174, + "learning_rate": 2.464557684116442e-07, + "loss": 0.9481, + "step": 61650 + }, + { + "epoch": 0.9300431385562159, + "grad_norm": 0.6747626730520659, + "learning_rate": 2.4540078244988275e-07, + "loss": 0.952, + "step": 61660 + }, + { + "epoch": 0.9301939726688588, + "grad_norm": 0.6862936572706112, + "learning_rate": 2.4434803128568276e-07, + "loss": 0.9368, + "step": 61670 + }, + { + "epoch": 0.9303448067815017, + "grad_norm": 0.6782623941501467, + "learning_rate": 2.4329751516022793e-07, + "loss": 0.9529, + "step": 61680 + }, + { + "epoch": 0.9304956408941446, + "grad_norm": 0.6483539444883284, + "learning_rate": 2.4224923431419577e-07, + "loss": 0.9423, + "step": 61690 + }, + { + "epoch": 0.9306464750067875, + "grad_norm": 0.6585537507001891, + "learning_rate": 2.412031889877486e-07, + "loss": 0.9627, + "step": 61700 + }, + { + "epoch": 0.9307973091194305, + "grad_norm": 0.6681046990318394, + "learning_rate": 2.401593794205381e-07, + "loss": 0.9543, + "step": 61710 + }, + { + "epoch": 0.9309481432320733, + "grad_norm": 0.8134315358260181, + "learning_rate": 2.3911780585170075e-07, + "loss": 0.947, + "step": 61720 + }, + { + "epoch": 0.9310989773447162, + "grad_norm": 0.6746873510410418, + "learning_rate": 2.380784685198667e-07, + "loss": 0.9418, + "step": 61730 + }, + { + "epoch": 0.9312498114573592, + "grad_norm": 0.6620129516536007, + "learning_rate": 2.3704136766314778e-07, + "loss": 0.9507, + "step": 61740 + }, + { + "epoch": 0.9314006455700021, + "grad_norm": 0.6587301544804722, + "learning_rate": 2.3600650351914612e-07, + "loss": 0.9343, + "step": 61750 + }, + { + "epoch": 0.9315514796826451, + "grad_norm": 0.6701368719433088, + "learning_rate": 2.3497387632495206e-07, + "loss": 0.9499, + "step": 61760 + }, + { + "epoch": 0.9317023137952879, + "grad_norm": 0.6839386409361111, + "learning_rate": 2.3394348631714082e-07, + "loss": 0.9489, + "step": 61770 + }, + { + "epoch": 0.9318531479079308, + "grad_norm": 0.707705877556042, + "learning_rate": 2.32915333731778e-07, + "loss": 0.95, + "step": 61780 + }, + { + "epoch": 0.9320039820205738, + "grad_norm": 0.6843993229548548, + "learning_rate": 2.3188941880441517e-07, + "loss": 0.9536, + "step": 61790 + }, + { + "epoch": 0.9321548161332167, + "grad_norm": 0.6832494245182958, + "learning_rate": 2.3086574177009103e-07, + "loss": 0.9463, + "step": 61800 + }, + { + "epoch": 0.9323056502458597, + "grad_norm": 0.6681388345570004, + "learning_rate": 2.2984430286333236e-07, + "loss": 0.9448, + "step": 61810 + }, + { + "epoch": 0.9324564843585025, + "grad_norm": 0.6669515289631354, + "learning_rate": 2.2882510231815313e-07, + "loss": 0.964, + "step": 61820 + }, + { + "epoch": 0.9326073184711454, + "grad_norm": 0.700874945452153, + "learning_rate": 2.278081403680521e-07, + "loss": 0.9459, + "step": 61830 + }, + { + "epoch": 0.9327581525837884, + "grad_norm": 0.6522211751953791, + "learning_rate": 2.2679341724602066e-07, + "loss": 0.9481, + "step": 61840 + }, + { + "epoch": 0.9329089866964313, + "grad_norm": 0.652986863177807, + "learning_rate": 2.2578093318453065e-07, + "loss": 0.9564, + "step": 61850 + }, + { + "epoch": 0.9330598208090741, + "grad_norm": 0.6801176920006842, + "learning_rate": 2.247706884155454e-07, + "loss": 0.9581, + "step": 61860 + }, + { + "epoch": 0.9332106549217171, + "grad_norm": 0.674430282954747, + "learning_rate": 2.237626831705142e-07, + "loss": 0.9673, + "step": 61870 + }, + { + "epoch": 0.93336148903436, + "grad_norm": 0.6242849789139028, + "learning_rate": 2.2275691768037234e-07, + "loss": 0.9537, + "step": 61880 + }, + { + "epoch": 0.9335123231470029, + "grad_norm": 0.6500103165161096, + "learning_rate": 2.2175339217554436e-07, + "loss": 0.9329, + "step": 61890 + }, + { + "epoch": 0.9336631572596459, + "grad_norm": 0.6573979609383059, + "learning_rate": 2.2075210688593752e-07, + "loss": 0.9467, + "step": 61900 + }, + { + "epoch": 0.9338139913722887, + "grad_norm": 0.6719312307460923, + "learning_rate": 2.1975306204094937e-07, + "loss": 0.9382, + "step": 61910 + }, + { + "epoch": 0.9339648254849316, + "grad_norm": 0.6703666297763626, + "learning_rate": 2.1875625786946353e-07, + "loss": 0.9354, + "step": 61920 + }, + { + "epoch": 0.9341156595975746, + "grad_norm": 0.6666623707703756, + "learning_rate": 2.1776169459984953e-07, + "loss": 0.9457, + "step": 61930 + }, + { + "epoch": 0.9342664937102175, + "grad_norm": 0.6736984444764524, + "learning_rate": 2.1676937245996176e-07, + "loss": 0.9424, + "step": 61940 + }, + { + "epoch": 0.9344173278228605, + "grad_norm": 0.7054831520811626, + "learning_rate": 2.157792916771473e-07, + "loss": 0.9299, + "step": 61950 + }, + { + "epoch": 0.9345681619355033, + "grad_norm": 0.687299152120542, + "learning_rate": 2.147914524782313e-07, + "loss": 0.942, + "step": 61960 + }, + { + "epoch": 0.9347189960481462, + "grad_norm": 0.6690855448313647, + "learning_rate": 2.1380585508953168e-07, + "loss": 0.9459, + "step": 61970 + }, + { + "epoch": 0.9348698301607892, + "grad_norm": 0.665603424102266, + "learning_rate": 2.1282249973684998e-07, + "loss": 0.9233, + "step": 61980 + }, + { + "epoch": 0.9350206642734321, + "grad_norm": 0.6209761386988568, + "learning_rate": 2.1184138664547605e-07, + "loss": 0.9592, + "step": 61990 + }, + { + "epoch": 0.9351714983860749, + "grad_norm": 0.6314838711457187, + "learning_rate": 2.1086251604018337e-07, + "loss": 0.951, + "step": 62000 + }, + { + "epoch": 0.9353223324987179, + "grad_norm": 0.6653980349010764, + "learning_rate": 2.098858881452348e-07, + "loss": 0.9571, + "step": 62010 + }, + { + "epoch": 0.9354731666113608, + "grad_norm": 0.6625594108490576, + "learning_rate": 2.089115031843747e-07, + "loss": 0.9541, + "step": 62020 + }, + { + "epoch": 0.9356240007240038, + "grad_norm": 0.6626042795729583, + "learning_rate": 2.079393613808389e-07, + "loss": 0.9551, + "step": 62030 + }, + { + "epoch": 0.9357748348366467, + "grad_norm": 0.6461471935536185, + "learning_rate": 2.069694629573482e-07, + "loss": 0.9529, + "step": 62040 + }, + { + "epoch": 0.9359256689492895, + "grad_norm": 0.6480880442096734, + "learning_rate": 2.060018081361048e-07, + "loss": 0.9348, + "step": 62050 + }, + { + "epoch": 0.9360765030619325, + "grad_norm": 0.6664464484769217, + "learning_rate": 2.0503639713880252e-07, + "loss": 0.9405, + "step": 62060 + }, + { + "epoch": 0.9362273371745754, + "grad_norm": 0.6763576927762026, + "learning_rate": 2.0407323018661885e-07, + "loss": 0.952, + "step": 62070 + }, + { + "epoch": 0.9363781712872183, + "grad_norm": 0.6600888158579592, + "learning_rate": 2.0311230750021616e-07, + "loss": 0.97, + "step": 62080 + }, + { + "epoch": 0.9365290053998613, + "grad_norm": 0.6521923616437101, + "learning_rate": 2.0215362929974503e-07, + "loss": 0.9332, + "step": 62090 + }, + { + "epoch": 0.9366798395125041, + "grad_norm": 0.6534643481092505, + "learning_rate": 2.0119719580483977e-07, + "loss": 0.9627, + "step": 62100 + }, + { + "epoch": 0.936830673625147, + "grad_norm": 0.6583382615034236, + "learning_rate": 2.0024300723462064e-07, + "loss": 0.9442, + "step": 62110 + }, + { + "epoch": 0.93698150773779, + "grad_norm": 0.6398189176990806, + "learning_rate": 1.9929106380769393e-07, + "loss": 0.9515, + "step": 62120 + }, + { + "epoch": 0.9371323418504329, + "grad_norm": 0.6529434712997583, + "learning_rate": 1.983413657421529e-07, + "loss": 0.9727, + "step": 62130 + }, + { + "epoch": 0.9372831759630758, + "grad_norm": 0.6933278649024638, + "learning_rate": 1.9739391325557467e-07, + "loss": 0.9402, + "step": 62140 + }, + { + "epoch": 0.9374340100757187, + "grad_norm": 0.6569617195545351, + "learning_rate": 1.9644870656502334e-07, + "loss": 0.9255, + "step": 62150 + }, + { + "epoch": 0.9375848441883616, + "grad_norm": 0.6876948681427768, + "learning_rate": 1.9550574588704573e-07, + "loss": 0.9548, + "step": 62160 + }, + { + "epoch": 0.9377356783010046, + "grad_norm": 0.6798356993946695, + "learning_rate": 1.945650314376768e-07, + "loss": 0.9654, + "step": 62170 + }, + { + "epoch": 0.9378865124136475, + "grad_norm": 0.6364682442899211, + "learning_rate": 1.9362656343243636e-07, + "loss": 0.9637, + "step": 62180 + }, + { + "epoch": 0.9380373465262903, + "grad_norm": 0.6919406149598487, + "learning_rate": 1.9269034208632908e-07, + "loss": 0.9366, + "step": 62190 + }, + { + "epoch": 0.9381881806389333, + "grad_norm": 0.6455923025009743, + "learning_rate": 1.9175636761384453e-07, + "loss": 0.9469, + "step": 62200 + }, + { + "epoch": 0.9383390147515762, + "grad_norm": 0.6779929029942797, + "learning_rate": 1.908246402289593e-07, + "loss": 0.9439, + "step": 62210 + }, + { + "epoch": 0.9384898488642192, + "grad_norm": 0.659245749750215, + "learning_rate": 1.8989516014513266e-07, + "loss": 0.9433, + "step": 62220 + }, + { + "epoch": 0.9386406829768621, + "grad_norm": 0.6401166654939295, + "learning_rate": 1.88967927575312e-07, + "loss": 0.962, + "step": 62230 + }, + { + "epoch": 0.9387915170895049, + "grad_norm": 0.6575566162573403, + "learning_rate": 1.8804294273192637e-07, + "loss": 0.9499, + "step": 62240 + }, + { + "epoch": 0.9389423512021479, + "grad_norm": 0.6918825261228959, + "learning_rate": 1.8712020582689172e-07, + "loss": 0.9519, + "step": 62250 + }, + { + "epoch": 0.9390931853147908, + "grad_norm": 0.6554841862261203, + "learning_rate": 1.8619971707161012e-07, + "loss": 0.9475, + "step": 62260 + }, + { + "epoch": 0.9392440194274337, + "grad_norm": 0.6597837897498777, + "learning_rate": 1.8528147667696505e-07, + "loss": 0.945, + "step": 62270 + }, + { + "epoch": 0.9393948535400766, + "grad_norm": 0.6600718547501894, + "learning_rate": 1.843654848533305e-07, + "loss": 0.9379, + "step": 62280 + }, + { + "epoch": 0.9395456876527195, + "grad_norm": 0.6516852176928735, + "learning_rate": 1.834517418105597e-07, + "loss": 0.9547, + "step": 62290 + }, + { + "epoch": 0.9396965217653624, + "grad_norm": 0.6638728397214024, + "learning_rate": 1.8254024775799185e-07, + "loss": 0.9493, + "step": 62300 + }, + { + "epoch": 0.9398473558780054, + "grad_norm": 0.6633857017348229, + "learning_rate": 1.8163100290445434e-07, + "loss": 0.9414, + "step": 62310 + }, + { + "epoch": 0.9399981899906483, + "grad_norm": 0.6446061931738012, + "learning_rate": 1.8072400745825724e-07, + "loss": 0.9579, + "step": 62320 + }, + { + "epoch": 0.9401490241032912, + "grad_norm": 0.6639767840924565, + "learning_rate": 1.798192616271921e-07, + "loss": 0.9381, + "step": 62330 + }, + { + "epoch": 0.9402998582159341, + "grad_norm": 0.6652654915121722, + "learning_rate": 1.7891676561854087e-07, + "loss": 0.9573, + "step": 62340 + }, + { + "epoch": 0.940450692328577, + "grad_norm": 0.6451139795399903, + "learning_rate": 1.7801651963906487e-07, + "loss": 0.9445, + "step": 62350 + }, + { + "epoch": 0.94060152644122, + "grad_norm": 0.6280421953164853, + "learning_rate": 1.7711852389501461e-07, + "loss": 0.9409, + "step": 62360 + }, + { + "epoch": 0.9407523605538629, + "grad_norm": 0.7070271475216987, + "learning_rate": 1.7622277859212001e-07, + "loss": 0.9299, + "step": 62370 + }, + { + "epoch": 0.9409031946665057, + "grad_norm": 0.6846548741781254, + "learning_rate": 1.7532928393560024e-07, + "loss": 0.9518, + "step": 62380 + }, + { + "epoch": 0.9410540287791487, + "grad_norm": 0.6572336380987978, + "learning_rate": 1.7443804013015487e-07, + "loss": 0.9675, + "step": 62390 + }, + { + "epoch": 0.9412048628917916, + "grad_norm": 0.6472890920525162, + "learning_rate": 1.735490473799706e-07, + "loss": 0.9412, + "step": 62400 + }, + { + "epoch": 0.9413556970044346, + "grad_norm": 0.6537108700289816, + "learning_rate": 1.726623058887178e-07, + "loss": 0.9426, + "step": 62410 + }, + { + "epoch": 0.9415065311170774, + "grad_norm": 0.6642049224107207, + "learning_rate": 1.7177781585954956e-07, + "loss": 0.9328, + "step": 62420 + }, + { + "epoch": 0.9416573652297203, + "grad_norm": 0.6668555352033739, + "learning_rate": 1.70895577495106e-07, + "loss": 0.9513, + "step": 62430 + }, + { + "epoch": 0.9418081993423633, + "grad_norm": 0.6494237718838046, + "learning_rate": 1.7001559099750764e-07, + "loss": 0.9305, + "step": 62440 + }, + { + "epoch": 0.9419590334550062, + "grad_norm": 0.6682116235883135, + "learning_rate": 1.6913785656836212e-07, + "loss": 0.9419, + "step": 62450 + }, + { + "epoch": 0.9421098675676491, + "grad_norm": 0.677804174947524, + "learning_rate": 1.682623744087597e-07, + "loss": 0.9554, + "step": 62460 + }, + { + "epoch": 0.942260701680292, + "grad_norm": 0.663210547979419, + "learning_rate": 1.6738914471927437e-07, + "loss": 0.9517, + "step": 62470 + }, + { + "epoch": 0.9424115357929349, + "grad_norm": 0.6899394465397988, + "learning_rate": 1.665181676999661e-07, + "loss": 0.9496, + "step": 62480 + }, + { + "epoch": 0.9425623699055778, + "grad_norm": 0.735493247210088, + "learning_rate": 1.656494435503775e-07, + "loss": 0.9482, + "step": 62490 + }, + { + "epoch": 0.9427132040182208, + "grad_norm": 0.650867427341161, + "learning_rate": 1.647829724695338e-07, + "loss": 0.9377, + "step": 62500 + }, + { + "epoch": 0.9428640381308637, + "grad_norm": 0.6956340614372027, + "learning_rate": 1.639187546559462e-07, + "loss": 0.9301, + "step": 62510 + }, + { + "epoch": 0.9430148722435066, + "grad_norm": 0.6631173467498513, + "learning_rate": 1.6305679030760746e-07, + "loss": 0.9647, + "step": 62520 + }, + { + "epoch": 0.9431657063561495, + "grad_norm": 0.639038903937292, + "learning_rate": 1.6219707962199628e-07, + "loss": 0.9462, + "step": 62530 + }, + { + "epoch": 0.9433165404687924, + "grad_norm": 0.6555608630356291, + "learning_rate": 1.6133962279607396e-07, + "loss": 0.9377, + "step": 62540 + }, + { + "epoch": 0.9434673745814354, + "grad_norm": 0.644944386934535, + "learning_rate": 1.604844200262845e-07, + "loss": 0.9363, + "step": 62550 + }, + { + "epoch": 0.9436182086940782, + "grad_norm": 0.6739947063764096, + "learning_rate": 1.5963147150855895e-07, + "loss": 0.9406, + "step": 62560 + }, + { + "epoch": 0.9437690428067211, + "grad_norm": 0.6568964325028326, + "learning_rate": 1.5878077743830766e-07, + "loss": 0.9532, + "step": 62570 + }, + { + "epoch": 0.9439198769193641, + "grad_norm": 0.6577075649038171, + "learning_rate": 1.5793233801042583e-07, + "loss": 0.9326, + "step": 62580 + }, + { + "epoch": 0.944070711032007, + "grad_norm": 0.6894429895198552, + "learning_rate": 1.5708615341929357e-07, + "loss": 0.9537, + "step": 62590 + }, + { + "epoch": 0.94422154514465, + "grad_norm": 0.6623782145492568, + "learning_rate": 1.5624222385877353e-07, + "loss": 0.9372, + "step": 62600 + }, + { + "epoch": 0.9443723792572928, + "grad_norm": 0.6383053455318471, + "learning_rate": 1.5540054952220995e-07, + "loss": 0.956, + "step": 62610 + }, + { + "epoch": 0.9445232133699357, + "grad_norm": 0.671883897873581, + "learning_rate": 1.5456113060243416e-07, + "loss": 0.9371, + "step": 62620 + }, + { + "epoch": 0.9446740474825787, + "grad_norm": 0.6774860779738014, + "learning_rate": 1.5372396729175787e-07, + "loss": 0.9587, + "step": 62630 + }, + { + "epoch": 0.9448248815952216, + "grad_norm": 0.6846252438817624, + "learning_rate": 1.5288905978197543e-07, + "loss": 0.9584, + "step": 62640 + }, + { + "epoch": 0.9449757157078645, + "grad_norm": 0.6546834597438437, + "learning_rate": 1.520564082643683e-07, + "loss": 0.9334, + "step": 62650 + }, + { + "epoch": 0.9451265498205074, + "grad_norm": 0.6595289507671276, + "learning_rate": 1.512260129296972e-07, + "loss": 0.9536, + "step": 62660 + }, + { + "epoch": 0.9452773839331503, + "grad_norm": 0.6947506377254417, + "learning_rate": 1.5039787396820772e-07, + "loss": 0.9457, + "step": 62670 + }, + { + "epoch": 0.9454282180457932, + "grad_norm": 0.6414842191387579, + "learning_rate": 1.49571991569627e-07, + "loss": 0.9308, + "step": 62680 + }, + { + "epoch": 0.9455790521584362, + "grad_norm": 0.6729707428761139, + "learning_rate": 1.48748365923167e-07, + "loss": 0.9347, + "step": 62690 + }, + { + "epoch": 0.945729886271079, + "grad_norm": 0.6826557276563902, + "learning_rate": 1.4792699721752236e-07, + "loss": 0.9455, + "step": 62700 + }, + { + "epoch": 0.945880720383722, + "grad_norm": 0.6694129243009822, + "learning_rate": 1.4710788564087032e-07, + "loss": 0.9416, + "step": 62710 + }, + { + "epoch": 0.9460315544963649, + "grad_norm": 0.6514420300193314, + "learning_rate": 1.4629103138086964e-07, + "loss": 0.9475, + "step": 62720 + }, + { + "epoch": 0.9461823886090078, + "grad_norm": 0.6482591633878287, + "learning_rate": 1.454764346246651e-07, + "loss": 0.9444, + "step": 62730 + }, + { + "epoch": 0.9463332227216508, + "grad_norm": 0.6470133122408233, + "learning_rate": 1.4466409555888073e-07, + "loss": 0.9528, + "step": 62740 + }, + { + "epoch": 0.9464840568342936, + "grad_norm": 0.6759714104274446, + "learning_rate": 1.4385401436962542e-07, + "loss": 0.9585, + "step": 62750 + }, + { + "epoch": 0.9466348909469365, + "grad_norm": 0.6719504763040876, + "learning_rate": 1.4304619124249185e-07, + "loss": 0.9493, + "step": 62760 + }, + { + "epoch": 0.9467857250595795, + "grad_norm": 0.71079688073272, + "learning_rate": 1.4224062636255197e-07, + "loss": 0.9533, + "step": 62770 + }, + { + "epoch": 0.9469365591722224, + "grad_norm": 0.6768486814167489, + "learning_rate": 1.414373199143615e-07, + "loss": 0.9443, + "step": 62780 + }, + { + "epoch": 0.9470873932848654, + "grad_norm": 0.6856612425005548, + "learning_rate": 1.4063627208196205e-07, + "loss": 0.9359, + "step": 62790 + }, + { + "epoch": 0.9472382273975082, + "grad_norm": 0.6603221893632147, + "learning_rate": 1.3983748304887358e-07, + "loss": 0.937, + "step": 62800 + }, + { + "epoch": 0.9473890615101511, + "grad_norm": 0.6817798623406987, + "learning_rate": 1.3904095299810182e-07, + "loss": 0.9601, + "step": 62810 + }, + { + "epoch": 0.9475398956227941, + "grad_norm": 0.6634750097875696, + "learning_rate": 1.3824668211213087e-07, + "loss": 0.9526, + "step": 62820 + }, + { + "epoch": 0.947690729735437, + "grad_norm": 0.6994485043574509, + "learning_rate": 1.3745467057293182e-07, + "loss": 0.9461, + "step": 62830 + }, + { + "epoch": 0.9478415638480798, + "grad_norm": 0.6427815422682039, + "learning_rate": 1.3666491856195507e-07, + "loss": 0.9386, + "step": 62840 + }, + { + "epoch": 0.9479923979607228, + "grad_norm": 0.6833651300955265, + "learning_rate": 1.3587742626013367e-07, + "loss": 0.958, + "step": 62850 + }, + { + "epoch": 0.9481432320733657, + "grad_norm": 0.6596263467828097, + "learning_rate": 1.3509219384788553e-07, + "loss": 0.9493, + "step": 62860 + }, + { + "epoch": 0.9482940661860086, + "grad_norm": 0.7344502468829746, + "learning_rate": 1.3430922150510784e-07, + "loss": 0.9452, + "step": 62870 + }, + { + "epoch": 0.9484449002986516, + "grad_norm": 0.6807123035774848, + "learning_rate": 1.3352850941118044e-07, + "loss": 0.9519, + "step": 62880 + }, + { + "epoch": 0.9485957344112944, + "grad_norm": 0.6597104858387332, + "learning_rate": 1.3275005774496698e-07, + "loss": 0.9401, + "step": 62890 + }, + { + "epoch": 0.9487465685239374, + "grad_norm": 0.7111538747874346, + "learning_rate": 1.3197386668481248e-07, + "loss": 0.9525, + "step": 62900 + }, + { + "epoch": 0.9488974026365803, + "grad_norm": 0.6793418646497453, + "learning_rate": 1.3119993640854256e-07, + "loss": 0.9559, + "step": 62910 + }, + { + "epoch": 0.9490482367492232, + "grad_norm": 0.6878327500695061, + "learning_rate": 1.3042826709346757e-07, + "loss": 0.9514, + "step": 62920 + }, + { + "epoch": 0.9491990708618662, + "grad_norm": 0.6672558265033155, + "learning_rate": 1.296588589163772e-07, + "loss": 0.9448, + "step": 62930 + }, + { + "epoch": 0.949349904974509, + "grad_norm": 0.6370772069972706, + "learning_rate": 1.2889171205354379e-07, + "loss": 0.9447, + "step": 62940 + }, + { + "epoch": 0.9495007390871519, + "grad_norm": 0.6665215812620524, + "learning_rate": 1.2812682668072562e-07, + "loss": 0.9372, + "step": 62950 + }, + { + "epoch": 0.9496515731997949, + "grad_norm": 0.6564227548900821, + "learning_rate": 1.2736420297315588e-07, + "loss": 0.9409, + "step": 62960 + }, + { + "epoch": 0.9498024073124378, + "grad_norm": 0.6478208384839054, + "learning_rate": 1.266038411055537e-07, + "loss": 0.9571, + "step": 62970 + }, + { + "epoch": 0.9499532414250806, + "grad_norm": 0.6658463714506576, + "learning_rate": 1.258457412521208e-07, + "loss": 0.9264, + "step": 62980 + }, + { + "epoch": 0.9501040755377236, + "grad_norm": 0.6878321332555769, + "learning_rate": 1.2508990358653827e-07, + "loss": 0.9474, + "step": 62990 + }, + { + "epoch": 0.9502549096503665, + "grad_norm": 0.6788366165247318, + "learning_rate": 1.2433632828196984e-07, + "loss": 0.9522, + "step": 63000 + }, + { + "epoch": 0.9504057437630095, + "grad_norm": 0.6607774123022222, + "learning_rate": 1.2358501551106295e-07, + "loss": 0.948, + "step": 63010 + }, + { + "epoch": 0.9505565778756524, + "grad_norm": 0.6670042305322359, + "learning_rate": 1.2283596544594213e-07, + "loss": 0.9411, + "step": 63020 + }, + { + "epoch": 0.9507074119882952, + "grad_norm": 0.6344299616141231, + "learning_rate": 1.2208917825821676e-07, + "loss": 0.9403, + "step": 63030 + }, + { + "epoch": 0.9508582461009382, + "grad_norm": 0.6608937693600009, + "learning_rate": 1.213446541189789e-07, + "loss": 0.935, + "step": 63040 + }, + { + "epoch": 0.9510090802135811, + "grad_norm": 0.6759824456970797, + "learning_rate": 1.2060239319879986e-07, + "loss": 0.9633, + "step": 63050 + }, + { + "epoch": 0.951159914326224, + "grad_norm": 0.6645489166482758, + "learning_rate": 1.198623956677325e-07, + "loss": 0.9456, + "step": 63060 + }, + { + "epoch": 0.951310748438867, + "grad_norm": 0.6272563485243627, + "learning_rate": 1.1912466169531123e-07, + "loss": 0.9331, + "step": 63070 + }, + { + "epoch": 0.9514615825515098, + "grad_norm": 0.6506394745713746, + "learning_rate": 1.1838919145055306e-07, + "loss": 0.9484, + "step": 63080 + }, + { + "epoch": 0.9516124166641527, + "grad_norm": 0.6760734114959557, + "learning_rate": 1.1765598510195653e-07, + "loss": 0.9538, + "step": 63090 + }, + { + "epoch": 0.9517632507767957, + "grad_norm": 0.6427524796151144, + "learning_rate": 1.169250428174995e-07, + "loss": 0.9658, + "step": 63100 + }, + { + "epoch": 0.9519140848894386, + "grad_norm": 0.6804608532083175, + "learning_rate": 1.1619636476464135e-07, + "loss": 0.9352, + "step": 63110 + }, + { + "epoch": 0.9520649190020815, + "grad_norm": 0.6961365334791761, + "learning_rate": 1.1546995111032522e-07, + "loss": 0.9521, + "step": 63120 + }, + { + "epoch": 0.9522157531147244, + "grad_norm": 0.646969699171338, + "learning_rate": 1.1474580202097351e-07, + "loss": 0.9608, + "step": 63130 + }, + { + "epoch": 0.9523665872273673, + "grad_norm": 0.656404670805589, + "learning_rate": 1.1402391766249022e-07, + "loss": 0.9552, + "step": 63140 + }, + { + "epoch": 0.9525174213400103, + "grad_norm": 0.6746585481307469, + "learning_rate": 1.1330429820025967e-07, + "loss": 0.9518, + "step": 63150 + }, + { + "epoch": 0.9526682554526532, + "grad_norm": 0.6327570062924085, + "learning_rate": 1.1258694379915002e-07, + "loss": 0.9426, + "step": 63160 + }, + { + "epoch": 0.952819089565296, + "grad_norm": 0.6573335350829507, + "learning_rate": 1.1187185462350536e-07, + "loss": 0.9317, + "step": 63170 + }, + { + "epoch": 0.952969923677939, + "grad_norm": 0.6611068643556135, + "learning_rate": 1.1115903083715685e-07, + "loss": 0.9346, + "step": 63180 + }, + { + "epoch": 0.9531207577905819, + "grad_norm": 0.6510292602739328, + "learning_rate": 1.1044847260341273e-07, + "loss": 0.9386, + "step": 63190 + }, + { + "epoch": 0.9532715919032249, + "grad_norm": 0.6795225020984644, + "learning_rate": 1.0974018008506281e-07, + "loss": 0.936, + "step": 63200 + }, + { + "epoch": 0.9534224260158678, + "grad_norm": 0.6582403842855362, + "learning_rate": 1.0903415344437951e-07, + "loss": 0.9465, + "step": 63210 + }, + { + "epoch": 0.9535732601285106, + "grad_norm": 0.6526824621165659, + "learning_rate": 1.0833039284311342e-07, + "loss": 0.9402, + "step": 63220 + }, + { + "epoch": 0.9537240942411536, + "grad_norm": 0.67392622186774, + "learning_rate": 1.0762889844249891e-07, + "loss": 0.9547, + "step": 63230 + }, + { + "epoch": 0.9538749283537965, + "grad_norm": 0.6993969657746878, + "learning_rate": 1.0692967040324854e-07, + "loss": 0.9647, + "step": 63240 + }, + { + "epoch": 0.9540257624664394, + "grad_norm": 0.6781783453616294, + "learning_rate": 1.0623270888555748e-07, + "loss": 0.9567, + "step": 63250 + }, + { + "epoch": 0.9541765965790823, + "grad_norm": 0.6458738004009809, + "learning_rate": 1.0553801404910025e-07, + "loss": 0.9545, + "step": 63260 + }, + { + "epoch": 0.9543274306917252, + "grad_norm": 0.6316069122666327, + "learning_rate": 1.0484558605303508e-07, + "loss": 0.951, + "step": 63270 + }, + { + "epoch": 0.9544782648043681, + "grad_norm": 0.6492688608926827, + "learning_rate": 1.0415542505599507e-07, + "loss": 0.9728, + "step": 63280 + }, + { + "epoch": 0.9546290989170111, + "grad_norm": 0.656546643991609, + "learning_rate": 1.0346753121609931e-07, + "loss": 0.9456, + "step": 63290 + }, + { + "epoch": 0.954779933029654, + "grad_norm": 0.6479722318517763, + "learning_rate": 1.0278190469094617e-07, + "loss": 0.942, + "step": 63300 + }, + { + "epoch": 0.9549307671422969, + "grad_norm": 0.6679056378090134, + "learning_rate": 1.0209854563761224e-07, + "loss": 0.9584, + "step": 63310 + }, + { + "epoch": 0.9550816012549398, + "grad_norm": 0.6501946256921126, + "learning_rate": 1.0141745421265892e-07, + "loss": 0.9567, + "step": 63320 + }, + { + "epoch": 0.9552324353675827, + "grad_norm": 0.6355868253474308, + "learning_rate": 1.0073863057212252e-07, + "loss": 0.9406, + "step": 63330 + }, + { + "epoch": 0.9553832694802257, + "grad_norm": 0.6658805636419823, + "learning_rate": 1.000620748715253e-07, + "loss": 0.9508, + "step": 63340 + }, + { + "epoch": 0.9555341035928686, + "grad_norm": 0.6395876041397319, + "learning_rate": 9.938778726586662e-08, + "loss": 0.9482, + "step": 63350 + }, + { + "epoch": 0.9556849377055114, + "grad_norm": 0.663443471290537, + "learning_rate": 9.871576790962733e-08, + "loss": 0.9575, + "step": 63360 + }, + { + "epoch": 0.9558357718181544, + "grad_norm": 0.6472727278735686, + "learning_rate": 9.804601695676764e-08, + "loss": 0.9371, + "step": 63370 + }, + { + "epoch": 0.9559866059307973, + "grad_norm": 0.640715828264482, + "learning_rate": 9.737853456072921e-08, + "loss": 0.953, + "step": 63380 + }, + { + "epoch": 0.9561374400434403, + "grad_norm": 0.6599009945797685, + "learning_rate": 9.671332087443308e-08, + "loss": 0.9461, + "step": 63390 + }, + { + "epoch": 0.9562882741560831, + "grad_norm": 0.7164469805154728, + "learning_rate": 9.605037605028288e-08, + "loss": 0.95, + "step": 63400 + }, + { + "epoch": 0.956439108268726, + "grad_norm": 0.6729929432347243, + "learning_rate": 9.538970024015825e-08, + "loss": 0.9559, + "step": 63410 + }, + { + "epoch": 0.956589942381369, + "grad_norm": 0.6780474789309074, + "learning_rate": 9.473129359542255e-08, + "loss": 0.9512, + "step": 63420 + }, + { + "epoch": 0.9567407764940119, + "grad_norm": 0.6642145619283168, + "learning_rate": 9.407515626691844e-08, + "loss": 0.96, + "step": 63430 + }, + { + "epoch": 0.9568916106066548, + "grad_norm": 0.6331630588267176, + "learning_rate": 9.342128840496678e-08, + "loss": 0.9507, + "step": 63440 + }, + { + "epoch": 0.9570424447192977, + "grad_norm": 0.6901930712824257, + "learning_rate": 9.276969015936998e-08, + "loss": 0.9603, + "step": 63450 + }, + { + "epoch": 0.9571932788319406, + "grad_norm": 0.7156782695233227, + "learning_rate": 9.212036167941196e-08, + "loss": 0.9598, + "step": 63460 + }, + { + "epoch": 0.9573441129445835, + "grad_norm": 0.6805655346057607, + "learning_rate": 9.147330311385372e-08, + "loss": 0.954, + "step": 63470 + }, + { + "epoch": 0.9574949470572265, + "grad_norm": 0.6640021898282916, + "learning_rate": 9.08285146109389e-08, + "loss": 0.9346, + "step": 63480 + }, + { + "epoch": 0.9576457811698694, + "grad_norm": 0.6930097109895205, + "learning_rate": 9.018599631838932e-08, + "loss": 0.9504, + "step": 63490 + }, + { + "epoch": 0.9577966152825123, + "grad_norm": 0.6560125244150575, + "learning_rate": 8.954574838340724e-08, + "loss": 0.953, + "step": 63500 + }, + { + "epoch": 0.9579474493951552, + "grad_norm": 0.6567114921394925, + "learning_rate": 8.890777095267311e-08, + "loss": 0.956, + "step": 63510 + }, + { + "epoch": 0.9580982835077981, + "grad_norm": 0.6390381021030338, + "learning_rate": 8.827206417235112e-08, + "loss": 0.9367, + "step": 63520 + }, + { + "epoch": 0.9582491176204411, + "grad_norm": 0.6615727280197417, + "learning_rate": 8.763862818808144e-08, + "loss": 0.9363, + "step": 63530 + }, + { + "epoch": 0.9583999517330839, + "grad_norm": 0.6443731812413904, + "learning_rate": 8.700746314498687e-08, + "loss": 0.9639, + "step": 63540 + }, + { + "epoch": 0.9585507858457268, + "grad_norm": 0.6371002236756068, + "learning_rate": 8.637856918766729e-08, + "loss": 0.9462, + "step": 63550 + }, + { + "epoch": 0.9587016199583698, + "grad_norm": 0.7534820955969878, + "learning_rate": 8.575194646020302e-08, + "loss": 0.9631, + "step": 63560 + }, + { + "epoch": 0.9588524540710127, + "grad_norm": 0.7018307234795242, + "learning_rate": 8.512759510615587e-08, + "loss": 0.9555, + "step": 63570 + }, + { + "epoch": 0.9590032881836557, + "grad_norm": 0.6760614444054843, + "learning_rate": 8.450551526856587e-08, + "loss": 0.9513, + "step": 63580 + }, + { + "epoch": 0.9591541222962985, + "grad_norm": 0.6517156575658407, + "learning_rate": 8.388570708995237e-08, + "loss": 0.9429, + "step": 63590 + }, + { + "epoch": 0.9593049564089414, + "grad_norm": 0.6853484276969533, + "learning_rate": 8.326817071231397e-08, + "loss": 0.953, + "step": 63600 + }, + { + "epoch": 0.9594557905215844, + "grad_norm": 0.6800095073807149, + "learning_rate": 8.265290627712975e-08, + "loss": 0.9603, + "step": 63610 + }, + { + "epoch": 0.9596066246342273, + "grad_norm": 0.6375970795530684, + "learning_rate": 8.203991392535914e-08, + "loss": 0.9469, + "step": 63620 + }, + { + "epoch": 0.9597574587468702, + "grad_norm": 0.6897112434311805, + "learning_rate": 8.142919379743985e-08, + "loss": 0.9458, + "step": 63630 + }, + { + "epoch": 0.9599082928595131, + "grad_norm": 0.6967448132322268, + "learning_rate": 8.08207460332866e-08, + "loss": 0.9433, + "step": 63640 + }, + { + "epoch": 0.960059126972156, + "grad_norm": 0.676597393191825, + "learning_rate": 8.021457077229789e-08, + "loss": 0.957, + "step": 63650 + }, + { + "epoch": 0.960209961084799, + "grad_norm": 0.7009270933844028, + "learning_rate": 7.961066815335039e-08, + "loss": 0.9226, + "step": 63660 + }, + { + "epoch": 0.9603607951974419, + "grad_norm": 0.65153335730131, + "learning_rate": 7.900903831479679e-08, + "loss": 0.9513, + "step": 63670 + }, + { + "epoch": 0.9605116293100847, + "grad_norm": 0.6927678173176713, + "learning_rate": 7.840968139447458e-08, + "loss": 0.9439, + "step": 63680 + }, + { + "epoch": 0.9606624634227277, + "grad_norm": 0.6821521974730204, + "learning_rate": 7.781259752969506e-08, + "loss": 0.9426, + "step": 63690 + }, + { + "epoch": 0.9608132975353706, + "grad_norm": 0.6823531441525565, + "learning_rate": 7.721778685725212e-08, + "loss": 0.9528, + "step": 63700 + }, + { + "epoch": 0.9609641316480135, + "grad_norm": 0.671039768260824, + "learning_rate": 7.662524951341899e-08, + "loss": 0.9498, + "step": 63710 + }, + { + "epoch": 0.9611149657606565, + "grad_norm": 0.6516936264228853, + "learning_rate": 7.603498563394707e-08, + "loss": 0.9484, + "step": 63720 + }, + { + "epoch": 0.9612657998732993, + "grad_norm": 0.6778618959417757, + "learning_rate": 7.544699535406485e-08, + "loss": 0.9532, + "step": 63730 + }, + { + "epoch": 0.9614166339859422, + "grad_norm": 0.6783496435292199, + "learning_rate": 7.48612788084846e-08, + "loss": 0.9461, + "step": 63740 + }, + { + "epoch": 0.9615674680985852, + "grad_norm": 0.6559069150594237, + "learning_rate": 7.427783613139339e-08, + "loss": 0.9537, + "step": 63750 + }, + { + "epoch": 0.9617183022112281, + "grad_norm": 0.6654205430360408, + "learning_rate": 7.369666745645987e-08, + "loss": 0.9454, + "step": 63760 + }, + { + "epoch": 0.961869136323871, + "grad_norm": 0.6688406368657228, + "learning_rate": 7.311777291683197e-08, + "loss": 0.949, + "step": 63770 + }, + { + "epoch": 0.9620199704365139, + "grad_norm": 0.6612894926729578, + "learning_rate": 7.25411526451325e-08, + "loss": 0.9588, + "step": 63780 + }, + { + "epoch": 0.9621708045491568, + "grad_norm": 2.060121210810861, + "learning_rate": 7.196680677346912e-08, + "loss": 0.9337, + "step": 63790 + }, + { + "epoch": 0.9623216386617998, + "grad_norm": 0.658411753467605, + "learning_rate": 7.139473543342545e-08, + "loss": 0.9421, + "step": 63800 + }, + { + "epoch": 0.9624724727744427, + "grad_norm": 0.659018303152242, + "learning_rate": 7.08249387560611e-08, + "loss": 0.9397, + "step": 63810 + }, + { + "epoch": 0.9626233068870855, + "grad_norm": 0.6758179431624672, + "learning_rate": 7.025741687192167e-08, + "loss": 0.9472, + "step": 63820 + }, + { + "epoch": 0.9627741409997285, + "grad_norm": 0.6399475042104897, + "learning_rate": 6.969216991102534e-08, + "loss": 0.9337, + "step": 63830 + }, + { + "epoch": 0.9629249751123714, + "grad_norm": 0.6407961056149594, + "learning_rate": 6.912919800287077e-08, + "loss": 0.9499, + "step": 63840 + }, + { + "epoch": 0.9630758092250143, + "grad_norm": 0.6840333200070564, + "learning_rate": 6.856850127643589e-08, + "loss": 0.943, + "step": 63850 + }, + { + "epoch": 0.9632266433376573, + "grad_norm": 0.6579546949310578, + "learning_rate": 6.801007986018015e-08, + "loss": 0.9562, + "step": 63860 + }, + { + "epoch": 0.9633774774503001, + "grad_norm": 0.6673608188745225, + "learning_rate": 6.74539338820357e-08, + "loss": 0.9315, + "step": 63870 + }, + { + "epoch": 0.963528311562943, + "grad_norm": 0.6743647415026397, + "learning_rate": 6.690006346941835e-08, + "loss": 0.9506, + "step": 63880 + }, + { + "epoch": 0.963679145675586, + "grad_norm": 0.6410303189250967, + "learning_rate": 6.634846874922107e-08, + "loss": 0.9312, + "step": 63890 + }, + { + "epoch": 0.9638299797882289, + "grad_norm": 0.6643059513556198, + "learning_rate": 6.579914984781388e-08, + "loss": 0.9519, + "step": 63900 + }, + { + "epoch": 0.9639808139008719, + "grad_norm": 0.6828788162338458, + "learning_rate": 6.525210689104833e-08, + "loss": 0.9624, + "step": 63910 + }, + { + "epoch": 0.9641316480135147, + "grad_norm": 0.66064839584745, + "learning_rate": 6.470734000425193e-08, + "loss": 0.9326, + "step": 63920 + }, + { + "epoch": 0.9642824821261576, + "grad_norm": 0.6996506034766715, + "learning_rate": 6.416484931223266e-08, + "loss": 0.9538, + "step": 63930 + }, + { + "epoch": 0.9644333162388006, + "grad_norm": 0.6609110991503582, + "learning_rate": 6.362463493927662e-08, + "loss": 0.9522, + "step": 63940 + }, + { + "epoch": 0.9645841503514435, + "grad_norm": 0.6467128509616799, + "learning_rate": 6.308669700914593e-08, + "loss": 0.9344, + "step": 63950 + }, + { + "epoch": 0.9647349844640863, + "grad_norm": 0.6470685142285888, + "learning_rate": 6.255103564508425e-08, + "loss": 0.9266, + "step": 63960 + }, + { + "epoch": 0.9648858185767293, + "grad_norm": 0.6522165752878931, + "learning_rate": 6.201765096981338e-08, + "loss": 0.9379, + "step": 63970 + }, + { + "epoch": 0.9650366526893722, + "grad_norm": 0.6765029478972994, + "learning_rate": 6.148654310553114e-08, + "loss": 0.9596, + "step": 63980 + }, + { + "epoch": 0.9651874868020152, + "grad_norm": 0.6594760310952247, + "learning_rate": 6.095771217391799e-08, + "loss": 0.9572, + "step": 63990 + }, + { + "epoch": 0.9653383209146581, + "grad_norm": 0.654536731470653, + "learning_rate": 6.0431158296127e-08, + "loss": 0.9442, + "step": 64000 + }, + { + "epoch": 0.9654891550273009, + "grad_norm": 0.671030987457993, + "learning_rate": 5.990688159279611e-08, + "loss": 0.956, + "step": 64010 + }, + { + "epoch": 0.9656399891399439, + "grad_norm": 0.6466914632859775, + "learning_rate": 5.938488218403482e-08, + "loss": 0.9524, + "step": 64020 + }, + { + "epoch": 0.9657908232525868, + "grad_norm": 0.6574212538546593, + "learning_rate": 5.8865160189436334e-08, + "loss": 0.9454, + "step": 64030 + }, + { + "epoch": 0.9659416573652297, + "grad_norm": 0.6821929467007025, + "learning_rate": 5.8347715728068744e-08, + "loss": 0.9602, + "step": 64040 + }, + { + "epoch": 0.9660924914778727, + "grad_norm": 0.6675907378129664, + "learning_rate": 5.783254891848056e-08, + "loss": 0.941, + "step": 64050 + }, + { + "epoch": 0.9662433255905155, + "grad_norm": 0.6513749092775276, + "learning_rate": 5.7319659878696257e-08, + "loss": 0.9316, + "step": 64060 + }, + { + "epoch": 0.9663941597031585, + "grad_norm": 0.6857636722383365, + "learning_rate": 5.680904872622073e-08, + "loss": 0.9546, + "step": 64070 + }, + { + "epoch": 0.9665449938158014, + "grad_norm": 0.7212494178746214, + "learning_rate": 5.630071557803596e-08, + "loss": 0.9377, + "step": 64080 + }, + { + "epoch": 0.9666958279284443, + "grad_norm": 0.6462201436131234, + "learning_rate": 5.579466055060212e-08, + "loss": 0.9312, + "step": 64090 + }, + { + "epoch": 0.9668466620410872, + "grad_norm": 0.6392646751238953, + "learning_rate": 5.529088375985758e-08, + "loss": 0.9336, + "step": 64100 + }, + { + "epoch": 0.9669974961537301, + "grad_norm": 0.670400811257824, + "learning_rate": 5.4789385321216694e-08, + "loss": 0.9276, + "step": 64110 + }, + { + "epoch": 0.967148330266373, + "grad_norm": 0.7203999586107044, + "learning_rate": 5.429016534957643e-08, + "loss": 0.9385, + "step": 64120 + }, + { + "epoch": 0.967299164379016, + "grad_norm": 0.655465943723205, + "learning_rate": 5.379322395930753e-08, + "loss": 0.9402, + "step": 64130 + }, + { + "epoch": 0.9674499984916589, + "grad_norm": 0.6558968699531235, + "learning_rate": 5.329856126426003e-08, + "loss": 0.9425, + "step": 64140 + }, + { + "epoch": 0.9676008326043017, + "grad_norm": 0.6619188289867262, + "learning_rate": 5.2806177377762166e-08, + "loss": 0.9463, + "step": 64150 + }, + { + "epoch": 0.9677516667169447, + "grad_norm": 0.6529083268227769, + "learning_rate": 5.2316072412621486e-08, + "loss": 0.9618, + "step": 64160 + }, + { + "epoch": 0.9679025008295876, + "grad_norm": 0.6567596579819639, + "learning_rate": 5.182824648112039e-08, + "loss": 0.9537, + "step": 64170 + }, + { + "epoch": 0.9680533349422306, + "grad_norm": 0.6430867482587564, + "learning_rate": 5.1342699695020595e-08, + "loss": 0.933, + "step": 64180 + }, + { + "epoch": 0.9682041690548735, + "grad_norm": 0.6773843106861398, + "learning_rate": 5.08594321655631e-08, + "loss": 0.9337, + "step": 64190 + }, + { + "epoch": 0.9683550031675163, + "grad_norm": 0.6498508315984184, + "learning_rate": 5.037844400346492e-08, + "loss": 0.9498, + "step": 64200 + }, + { + "epoch": 0.9685058372801593, + "grad_norm": 0.6458678834106404, + "learning_rate": 4.989973531892123e-08, + "loss": 0.9508, + "step": 64210 + }, + { + "epoch": 0.9686566713928022, + "grad_norm": 0.6788410382638618, + "learning_rate": 4.94233062216054e-08, + "loss": 0.9525, + "step": 64220 + }, + { + "epoch": 0.9688075055054451, + "grad_norm": 0.6600723869717802, + "learning_rate": 4.894915682066681e-08, + "loss": 0.9398, + "step": 64230 + }, + { + "epoch": 0.968958339618088, + "grad_norm": 0.7221598073248576, + "learning_rate": 4.8477287224736324e-08, + "loss": 0.9455, + "step": 64240 + }, + { + "epoch": 0.9691091737307309, + "grad_norm": 0.6673399485826714, + "learning_rate": 4.80076975419197e-08, + "loss": 0.9462, + "step": 64250 + }, + { + "epoch": 0.9692600078433738, + "grad_norm": 0.6787282206554643, + "learning_rate": 4.7540387879798645e-08, + "loss": 0.9368, + "step": 64260 + }, + { + "epoch": 0.9694108419560168, + "grad_norm": 0.6703716340695636, + "learning_rate": 4.707535834543753e-08, + "loss": 0.9238, + "step": 64270 + }, + { + "epoch": 0.9695616760686597, + "grad_norm": 0.7164651563333432, + "learning_rate": 4.6612609045373344e-08, + "loss": 0.9471, + "step": 64280 + }, + { + "epoch": 0.9697125101813026, + "grad_norm": 0.6701420414949194, + "learning_rate": 4.615214008562463e-08, + "loss": 0.9536, + "step": 64290 + }, + { + "epoch": 0.9698633442939455, + "grad_norm": 0.6803919484523294, + "learning_rate": 4.569395157168477e-08, + "loss": 0.9656, + "step": 64300 + }, + { + "epoch": 0.9700141784065884, + "grad_norm": 0.7074014018293776, + "learning_rate": 4.523804360852424e-08, + "loss": 0.9328, + "step": 64310 + }, + { + "epoch": 0.9701650125192314, + "grad_norm": 0.6618121511821559, + "learning_rate": 4.4784416300595045e-08, + "loss": 0.9374, + "step": 64320 + }, + { + "epoch": 0.9703158466318743, + "grad_norm": 0.6533363833430755, + "learning_rate": 4.4333069751822946e-08, + "loss": 0.9497, + "step": 64330 + }, + { + "epoch": 0.9704666807445171, + "grad_norm": 0.7010789153247744, + "learning_rate": 4.3884004065613e-08, + "loss": 0.9484, + "step": 64340 + }, + { + "epoch": 0.9706175148571601, + "grad_norm": 0.6732305210435701, + "learning_rate": 4.3437219344845126e-08, + "loss": 0.9418, + "step": 64350 + }, + { + "epoch": 0.970768348969803, + "grad_norm": 0.6489370478249075, + "learning_rate": 4.2992715691879684e-08, + "loss": 0.9688, + "step": 64360 + }, + { + "epoch": 0.970919183082446, + "grad_norm": 0.6673094757775103, + "learning_rate": 4.255049320855409e-08, + "loss": 0.9668, + "step": 64370 + }, + { + "epoch": 0.9710700171950888, + "grad_norm": 0.7014315305845936, + "learning_rate": 4.211055199618175e-08, + "loss": 0.9437, + "step": 64380 + }, + { + "epoch": 0.9712208513077317, + "grad_norm": 0.6516242348302425, + "learning_rate": 4.1672892155554256e-08, + "loss": 0.928, + "step": 64390 + }, + { + "epoch": 0.9713716854203747, + "grad_norm": 0.6802115980300758, + "learning_rate": 4.1237513786939184e-08, + "loss": 0.9437, + "step": 64400 + }, + { + "epoch": 0.9715225195330176, + "grad_norm": 0.6504703969753812, + "learning_rate": 4.080441699008453e-08, + "loss": 0.9442, + "step": 64410 + }, + { + "epoch": 0.9716733536456605, + "grad_norm": 0.6580379897270857, + "learning_rate": 4.037360186421202e-08, + "loss": 0.9438, + "step": 64420 + }, + { + "epoch": 0.9718241877583034, + "grad_norm": 0.6632293902007944, + "learning_rate": 3.9945068508022714e-08, + "loss": 0.9507, + "step": 64430 + }, + { + "epoch": 0.9719750218709463, + "grad_norm": 0.7141017281555663, + "learning_rate": 3.9518817019693624e-08, + "loss": 0.9509, + "step": 64440 + }, + { + "epoch": 0.9721258559835892, + "grad_norm": 0.6471314554284328, + "learning_rate": 3.909484749688108e-08, + "loss": 0.9532, + "step": 64450 + }, + { + "epoch": 0.9722766900962322, + "grad_norm": 0.6819911520199322, + "learning_rate": 3.867316003671739e-08, + "loss": 0.9466, + "step": 64460 + }, + { + "epoch": 0.9724275242088751, + "grad_norm": 0.6710624062132429, + "learning_rate": 3.8253754735811943e-08, + "loss": 0.9456, + "step": 64470 + }, + { + "epoch": 0.972578358321518, + "grad_norm": 0.6532011134882436, + "learning_rate": 3.7836631690250095e-08, + "loss": 0.9332, + "step": 64480 + }, + { + "epoch": 0.9727291924341609, + "grad_norm": 0.6792437222796001, + "learning_rate": 3.7421790995596506e-08, + "loss": 0.9415, + "step": 64490 + }, + { + "epoch": 0.9728800265468038, + "grad_norm": 0.6733262825376507, + "learning_rate": 3.700923274689072e-08, + "loss": 0.9588, + "step": 64500 + }, + { + "epoch": 0.9730308606594468, + "grad_norm": 0.6414453489245457, + "learning_rate": 3.659895703865268e-08, + "loss": 0.9419, + "step": 64510 + }, + { + "epoch": 0.9731816947720896, + "grad_norm": 0.6317171799514533, + "learning_rate": 3.6190963964874984e-08, + "loss": 0.9516, + "step": 64520 + }, + { + "epoch": 0.9733325288847325, + "grad_norm": 0.6334037165343561, + "learning_rate": 3.5785253619032844e-08, + "loss": 0.9247, + "step": 64530 + }, + { + "epoch": 0.9734833629973755, + "grad_norm": 0.6666229350992648, + "learning_rate": 3.5381826094071925e-08, + "loss": 0.9562, + "step": 64540 + }, + { + "epoch": 0.9736341971100184, + "grad_norm": 0.6697764504087881, + "learning_rate": 3.498068148242051e-08, + "loss": 0.9588, + "step": 64550 + }, + { + "epoch": 0.9737850312226614, + "grad_norm": 0.6467168735800694, + "learning_rate": 3.458181987598064e-08, + "loss": 0.9345, + "step": 64560 + }, + { + "epoch": 0.9739358653353042, + "grad_norm": 0.6440013941127452, + "learning_rate": 3.418524136613255e-08, + "loss": 0.9565, + "step": 64570 + }, + { + "epoch": 0.9740866994479471, + "grad_norm": 0.6470730518538831, + "learning_rate": 3.3790946043732454e-08, + "loss": 0.9577, + "step": 64580 + }, + { + "epoch": 0.9742375335605901, + "grad_norm": 0.6664379735040195, + "learning_rate": 3.339893399911587e-08, + "loss": 0.9478, + "step": 64590 + }, + { + "epoch": 0.974388367673233, + "grad_norm": 0.6864761844370003, + "learning_rate": 3.300920532209095e-08, + "loss": 0.9369, + "step": 64600 + }, + { + "epoch": 0.9745392017858759, + "grad_norm": 0.6413607129647354, + "learning_rate": 3.262176010194851e-08, + "loss": 0.9516, + "step": 64610 + }, + { + "epoch": 0.9746900358985188, + "grad_norm": 0.6692019239757186, + "learning_rate": 3.223659842744975e-08, + "loss": 0.9621, + "step": 64620 + }, + { + "epoch": 0.9748408700111617, + "grad_norm": 0.6713410387026892, + "learning_rate": 3.1853720386838536e-08, + "loss": 0.9573, + "step": 64630 + }, + { + "epoch": 0.9749917041238046, + "grad_norm": 0.6461514348467166, + "learning_rate": 3.147312606783137e-08, + "loss": 0.9558, + "step": 64640 + }, + { + "epoch": 0.9751425382364476, + "grad_norm": 0.7105355434666908, + "learning_rate": 3.109481555762295e-08, + "loss": 0.9459, + "step": 64650 + }, + { + "epoch": 0.9752933723490904, + "grad_norm": 0.6328375897936844, + "learning_rate": 3.071878894288727e-08, + "loss": 0.9439, + "step": 64660 + }, + { + "epoch": 0.9754442064617334, + "grad_norm": 0.6805380370503661, + "learning_rate": 3.034504630976987e-08, + "loss": 0.942, + "step": 64670 + }, + { + "epoch": 0.9755950405743763, + "grad_norm": 0.6471654281634787, + "learning_rate": 2.9973587743897804e-08, + "loss": 0.9667, + "step": 64680 + }, + { + "epoch": 0.9757458746870192, + "grad_norm": 0.6773533661999586, + "learning_rate": 2.9604413330371894e-08, + "loss": 0.9476, + "step": 64690 + }, + { + "epoch": 0.9758967087996622, + "grad_norm": 0.661597226247244, + "learning_rate": 2.923752315377004e-08, + "loss": 0.9484, + "step": 64700 + }, + { + "epoch": 0.976047542912305, + "grad_norm": 0.6705142162382792, + "learning_rate": 2.887291729815056e-08, + "loss": 0.9375, + "step": 64710 + }, + { + "epoch": 0.9761983770249479, + "grad_norm": 0.64190752655479, + "learning_rate": 2.85105958470433e-08, + "loss": 0.9454, + "step": 64720 + }, + { + "epoch": 0.9763492111375909, + "grad_norm": 0.6805118994836922, + "learning_rate": 2.8150558883455192e-08, + "loss": 0.9511, + "step": 64730 + }, + { + "epoch": 0.9765000452502338, + "grad_norm": 0.6988205483135005, + "learning_rate": 2.7792806489874703e-08, + "loss": 0.9646, + "step": 64740 + }, + { + "epoch": 0.9766508793628768, + "grad_norm": 0.6713986669302908, + "learning_rate": 2.7437338748261823e-08, + "loss": 0.9509, + "step": 64750 + }, + { + "epoch": 0.9768017134755196, + "grad_norm": 0.6832834957827761, + "learning_rate": 2.708415574005474e-08, + "loss": 0.9385, + "step": 64760 + }, + { + "epoch": 0.9769525475881625, + "grad_norm": 0.6426362533598607, + "learning_rate": 2.6733257546168735e-08, + "loss": 0.9541, + "step": 64770 + }, + { + "epoch": 0.9771033817008055, + "grad_norm": 0.6529272613669991, + "learning_rate": 2.638464424699505e-08, + "loss": 0.9152, + "step": 64780 + }, + { + "epoch": 0.9772542158134484, + "grad_norm": 0.675457177588781, + "learning_rate": 2.6038315922402024e-08, + "loss": 0.9498, + "step": 64790 + }, + { + "epoch": 0.9774050499260912, + "grad_norm": 0.647056374986445, + "learning_rate": 2.5694272651733966e-08, + "loss": 0.942, + "step": 64800 + }, + { + "epoch": 0.9775558840387342, + "grad_norm": 0.6752590505826432, + "learning_rate": 2.5352514513812266e-08, + "loss": 0.9445, + "step": 64810 + }, + { + "epoch": 0.9777067181513771, + "grad_norm": 0.6701359116844707, + "learning_rate": 2.5013041586933184e-08, + "loss": 0.9603, + "step": 64820 + }, + { + "epoch": 0.97785755226402, + "grad_norm": 0.6386533166091006, + "learning_rate": 2.4675853948872286e-08, + "loss": 0.9453, + "step": 64830 + }, + { + "epoch": 0.978008386376663, + "grad_norm": 0.6802894609535171, + "learning_rate": 2.4340951676878888e-08, + "loss": 0.9467, + "step": 64840 + }, + { + "epoch": 0.9781592204893058, + "grad_norm": 0.6540995785575531, + "learning_rate": 2.4008334847679392e-08, + "loss": 0.923, + "step": 64850 + }, + { + "epoch": 0.9783100546019488, + "grad_norm": 0.6353970839143286, + "learning_rate": 2.367800353747729e-08, + "loss": 0.952, + "step": 64860 + }, + { + "epoch": 0.9784608887145917, + "grad_norm": 0.6297088726043168, + "learning_rate": 2.3349957821953152e-08, + "loss": 0.9547, + "step": 64870 + }, + { + "epoch": 0.9786117228272346, + "grad_norm": 0.6645978607695469, + "learning_rate": 2.3024197776261305e-08, + "loss": 0.9407, + "step": 64880 + }, + { + "epoch": 0.9787625569398776, + "grad_norm": 0.633760511407513, + "learning_rate": 2.270072347503538e-08, + "loss": 0.945, + "step": 64890 + }, + { + "epoch": 0.9789133910525204, + "grad_norm": 0.6890754542818377, + "learning_rate": 2.2379534992382768e-08, + "loss": 0.9472, + "step": 64900 + }, + { + "epoch": 0.9790642251651633, + "grad_norm": 0.6813421675900333, + "learning_rate": 2.206063240188905e-08, + "loss": 0.9492, + "step": 64910 + }, + { + "epoch": 0.9792150592778063, + "grad_norm": 0.6525278656934138, + "learning_rate": 2.174401577661578e-08, + "loss": 0.9502, + "step": 64920 + }, + { + "epoch": 0.9793658933904492, + "grad_norm": 0.6863799412192059, + "learning_rate": 2.142968518909938e-08, + "loss": 0.9544, + "step": 64930 + }, + { + "epoch": 0.979516727503092, + "grad_norm": 0.6614364873918971, + "learning_rate": 2.111764071135447e-08, + "loss": 0.9424, + "step": 64940 + }, + { + "epoch": 0.979667561615735, + "grad_norm": 0.6499671485008721, + "learning_rate": 2.080788241487053e-08, + "loss": 0.9662, + "step": 64950 + }, + { + "epoch": 0.9798183957283779, + "grad_norm": 0.643261790199386, + "learning_rate": 2.050041037061412e-08, + "loss": 0.9537, + "step": 64960 + }, + { + "epoch": 0.9799692298410209, + "grad_norm": 0.6306417058788, + "learning_rate": 2.0195224649027788e-08, + "loss": 0.9292, + "step": 64970 + }, + { + "epoch": 0.9801200639536638, + "grad_norm": 0.6454107692768932, + "learning_rate": 1.9892325320028938e-08, + "loss": 0.945, + "step": 64980 + }, + { + "epoch": 0.9802708980663066, + "grad_norm": 0.6474044894325267, + "learning_rate": 1.959171245301428e-08, + "loss": 0.9612, + "step": 64990 + }, + { + "epoch": 0.9804217321789496, + "grad_norm": 0.6614204741153364, + "learning_rate": 1.9293386116854273e-08, + "loss": 0.945, + "step": 65000 + }, + { + "epoch": 0.9805725662915925, + "grad_norm": 0.6575897091007507, + "learning_rate": 1.8997346379895365e-08, + "loss": 0.9428, + "step": 65010 + }, + { + "epoch": 0.9807234004042354, + "grad_norm": 0.6593490261348314, + "learning_rate": 1.8703593309961075e-08, + "loss": 0.9534, + "step": 65020 + }, + { + "epoch": 0.9808742345168784, + "grad_norm": 0.640315890385878, + "learning_rate": 1.8412126974350906e-08, + "loss": 0.9336, + "step": 65030 + }, + { + "epoch": 0.9810250686295212, + "grad_norm": 0.6999951730839628, + "learning_rate": 1.8122947439840333e-08, + "loss": 0.9506, + "step": 65040 + }, + { + "epoch": 0.9811759027421642, + "grad_norm": 0.6400547560999406, + "learning_rate": 1.78360547726808e-08, + "loss": 0.9492, + "step": 65050 + }, + { + "epoch": 0.9813267368548071, + "grad_norm": 0.6557809509837803, + "learning_rate": 1.7551449038600844e-08, + "loss": 0.9475, + "step": 65060 + }, + { + "epoch": 0.98147757096745, + "grad_norm": 0.6518961969010737, + "learning_rate": 1.726913030280275e-08, + "loss": 0.942, + "step": 65070 + }, + { + "epoch": 0.9816284050800929, + "grad_norm": 0.6468644461814405, + "learning_rate": 1.6989098629967006e-08, + "loss": 0.9382, + "step": 65080 + }, + { + "epoch": 0.9817792391927358, + "grad_norm": 0.6640815276680815, + "learning_rate": 1.6711354084250063e-08, + "loss": 0.9417, + "step": 65090 + }, + { + "epoch": 0.9819300733053787, + "grad_norm": 0.63555045391381, + "learning_rate": 1.643589672928325e-08, + "loss": 0.9516, + "step": 65100 + }, + { + "epoch": 0.9820809074180217, + "grad_norm": 0.6462629794030997, + "learning_rate": 1.6162726628173864e-08, + "loss": 0.953, + "step": 65110 + }, + { + "epoch": 0.9822317415306646, + "grad_norm": 0.6289269946013178, + "learning_rate": 1.589184384350517e-08, + "loss": 0.9482, + "step": 65120 + }, + { + "epoch": 0.9823825756433074, + "grad_norm": 0.6300364766684137, + "learning_rate": 1.5623248437338645e-08, + "loss": 0.926, + "step": 65130 + }, + { + "epoch": 0.9825334097559504, + "grad_norm": 0.679968693579918, + "learning_rate": 1.53569404712095e-08, + "loss": 0.9593, + "step": 65140 + }, + { + "epoch": 0.9826842438685933, + "grad_norm": 0.688132897763721, + "learning_rate": 1.509292000612894e-08, + "loss": 0.9547, + "step": 65150 + }, + { + "epoch": 0.9828350779812363, + "grad_norm": 0.630944132620297, + "learning_rate": 1.4831187102585243e-08, + "loss": 0.956, + "step": 65160 + }, + { + "epoch": 0.9829859120938792, + "grad_norm": 0.6555354728775269, + "learning_rate": 1.4571741820540442e-08, + "loss": 0.9456, + "step": 65170 + }, + { + "epoch": 0.983136746206522, + "grad_norm": 0.662765050903665, + "learning_rate": 1.4314584219434768e-08, + "loss": 0.939, + "step": 65180 + }, + { + "epoch": 0.983287580319165, + "grad_norm": 0.6441278602475108, + "learning_rate": 1.4059714358184428e-08, + "loss": 0.9355, + "step": 65190 + }, + { + "epoch": 0.9834384144318079, + "grad_norm": 0.683678738510366, + "learning_rate": 1.3807132295180492e-08, + "loss": 0.9552, + "step": 65200 + }, + { + "epoch": 0.9835892485444508, + "grad_norm": 0.6448180654552547, + "learning_rate": 1.3556838088287783e-08, + "loss": 0.9507, + "step": 65210 + }, + { + "epoch": 0.9837400826570937, + "grad_norm": 0.6502830506503882, + "learning_rate": 1.330883179485154e-08, + "loss": 0.9462, + "step": 65220 + }, + { + "epoch": 0.9838909167697366, + "grad_norm": 0.7065360174014409, + "learning_rate": 1.3063113471689648e-08, + "loss": 0.9358, + "step": 65230 + }, + { + "epoch": 0.9840417508823796, + "grad_norm": 0.6594470084062546, + "learning_rate": 1.2819683175097076e-08, + "loss": 0.9459, + "step": 65240 + }, + { + "epoch": 0.9841925849950225, + "grad_norm": 0.6680815292102845, + "learning_rate": 1.2578540960843654e-08, + "loss": 0.9654, + "step": 65250 + }, + { + "epoch": 0.9843434191076654, + "grad_norm": 0.66299275514134, + "learning_rate": 1.2339686884176306e-08, + "loss": 0.9645, + "step": 65260 + }, + { + "epoch": 0.9844942532203083, + "grad_norm": 0.685170915381019, + "learning_rate": 1.2103120999815699e-08, + "loss": 0.9579, + "step": 65270 + }, + { + "epoch": 0.9846450873329512, + "grad_norm": 0.6847625159841947, + "learning_rate": 1.1868843361959592e-08, + "loss": 0.9602, + "step": 65280 + }, + { + "epoch": 0.9847959214455941, + "grad_norm": 0.688540991687206, + "learning_rate": 1.1636854024282828e-08, + "loss": 0.9427, + "step": 65290 + }, + { + "epoch": 0.9849467555582371, + "grad_norm": 0.6829991220633872, + "learning_rate": 1.1407153039934005e-08, + "loss": 0.9342, + "step": 65300 + }, + { + "epoch": 0.98509758967088, + "grad_norm": 0.6646176394887983, + "learning_rate": 1.1179740461537691e-08, + "loss": 0.9364, + "step": 65310 + }, + { + "epoch": 0.9852484237835228, + "grad_norm": 0.6605050660553946, + "learning_rate": 1.0954616341194435e-08, + "loss": 0.9543, + "step": 65320 + }, + { + "epoch": 0.9853992578961658, + "grad_norm": 0.6889831587195533, + "learning_rate": 1.0731780730480756e-08, + "loss": 0.9524, + "step": 65330 + }, + { + "epoch": 0.9855500920088087, + "grad_norm": 0.6622452284251744, + "learning_rate": 1.0511233680449152e-08, + "loss": 0.9814, + "step": 65340 + }, + { + "epoch": 0.9857009261214517, + "grad_norm": 0.6720559070373426, + "learning_rate": 1.029297524162809e-08, + "loss": 0.9422, + "step": 65350 + }, + { + "epoch": 0.9858517602340945, + "grad_norm": 0.665247539031855, + "learning_rate": 1.0077005464018686e-08, + "loss": 0.946, + "step": 65360 + }, + { + "epoch": 0.9860025943467374, + "grad_norm": 0.6627959962234296, + "learning_rate": 9.863324397101359e-09, + "loss": 0.9431, + "step": 65370 + }, + { + "epoch": 0.9861534284593804, + "grad_norm": 0.6890194714016381, + "learning_rate": 9.651932089831395e-09, + "loss": 0.9411, + "step": 65380 + }, + { + "epoch": 0.9863042625720233, + "grad_norm": 0.6547803830801058, + "learning_rate": 9.442828590638941e-09, + "loss": 0.9309, + "step": 65390 + }, + { + "epoch": 0.9864550966846662, + "grad_norm": 0.6331938301638954, + "learning_rate": 9.236013947429013e-09, + "loss": 0.9465, + "step": 65400 + }, + { + "epoch": 0.9866059307973091, + "grad_norm": 0.6869075826463981, + "learning_rate": 9.031488207584816e-09, + "loss": 0.9547, + "step": 65410 + }, + { + "epoch": 0.986756764909952, + "grad_norm": 0.6381651081949177, + "learning_rate": 8.829251417962204e-09, + "loss": 0.9432, + "step": 65420 + }, + { + "epoch": 0.986907599022595, + "grad_norm": 0.6616432004643767, + "learning_rate": 8.629303624895225e-09, + "loss": 0.9387, + "step": 65430 + }, + { + "epoch": 0.9870584331352379, + "grad_norm": 0.6361086125134173, + "learning_rate": 8.43164487419057e-09, + "loss": 0.9536, + "step": 65440 + }, + { + "epoch": 0.9872092672478808, + "grad_norm": 0.6712834130305457, + "learning_rate": 8.236275211134237e-09, + "loss": 0.9588, + "step": 65450 + }, + { + "epoch": 0.9873601013605237, + "grad_norm": 0.6546107141181016, + "learning_rate": 8.043194680483758e-09, + "loss": 0.9506, + "step": 65460 + }, + { + "epoch": 0.9875109354731666, + "grad_norm": 0.6919785950886543, + "learning_rate": 7.852403326475966e-09, + "loss": 0.9394, + "step": 65470 + }, + { + "epoch": 0.9876617695858095, + "grad_norm": 0.6914770436642108, + "learning_rate": 7.663901192821455e-09, + "loss": 0.9407, + "step": 65480 + }, + { + "epoch": 0.9878126036984525, + "grad_norm": 0.6723413593506835, + "learning_rate": 7.477688322705678e-09, + "loss": 0.9725, + "step": 65490 + }, + { + "epoch": 0.9879634378110953, + "grad_norm": 0.6877152126639514, + "learning_rate": 7.293764758790067e-09, + "loss": 0.9522, + "step": 65500 + }, + { + "epoch": 0.9881142719237382, + "grad_norm": 0.6938604232362428, + "learning_rate": 7.112130543212026e-09, + "loss": 0.9365, + "step": 65510 + }, + { + "epoch": 0.9882651060363812, + "grad_norm": 0.6825376257932904, + "learning_rate": 6.932785717586044e-09, + "loss": 0.9505, + "step": 65520 + }, + { + "epoch": 0.9884159401490241, + "grad_norm": 0.6530483531254122, + "learning_rate": 6.755730322998144e-09, + "loss": 0.9597, + "step": 65530 + }, + { + "epoch": 0.9885667742616671, + "grad_norm": 0.648448938788065, + "learning_rate": 6.580964400012546e-09, + "loss": 0.9532, + "step": 65540 + }, + { + "epoch": 0.9887176083743099, + "grad_norm": 0.66366788829493, + "learning_rate": 6.408487988669443e-09, + "loss": 0.9535, + "step": 65550 + }, + { + "epoch": 0.9888684424869528, + "grad_norm": 0.6644390317781836, + "learning_rate": 6.238301128482782e-09, + "loss": 0.9639, + "step": 65560 + }, + { + "epoch": 0.9890192765995958, + "grad_norm": 0.7160655909259771, + "learning_rate": 6.070403858442486e-09, + "loss": 0.9609, + "step": 65570 + }, + { + "epoch": 0.9891701107122387, + "grad_norm": 0.6572721424461027, + "learning_rate": 5.904796217014453e-09, + "loss": 0.953, + "step": 65580 + }, + { + "epoch": 0.9893209448248816, + "grad_norm": 0.65944924249381, + "learning_rate": 5.741478242140553e-09, + "loss": 0.9674, + "step": 65590 + }, + { + "epoch": 0.9894717789375245, + "grad_norm": 0.6400416887968278, + "learning_rate": 5.580449971235302e-09, + "loss": 0.9472, + "step": 65600 + }, + { + "epoch": 0.9896226130501674, + "grad_norm": 0.7303774441852771, + "learning_rate": 5.421711441192523e-09, + "loss": 0.9382, + "step": 65610 + }, + { + "epoch": 0.9897734471628103, + "grad_norm": 0.6533810395567147, + "learning_rate": 5.26526268837868e-09, + "loss": 0.9565, + "step": 65620 + }, + { + "epoch": 0.9899242812754533, + "grad_norm": 0.6755933508316073, + "learning_rate": 5.111103748636214e-09, + "loss": 0.9515, + "step": 65630 + }, + { + "epoch": 0.9900751153880961, + "grad_norm": 0.6365331807138992, + "learning_rate": 4.959234657283541e-09, + "loss": 0.9426, + "step": 65640 + }, + { + "epoch": 0.990225949500739, + "grad_norm": 0.6491299733463617, + "learning_rate": 4.809655449113937e-09, + "loss": 0.9493, + "step": 65650 + }, + { + "epoch": 0.990376783613382, + "grad_norm": 0.6608152123597231, + "learning_rate": 4.662366158396658e-09, + "loss": 0.9484, + "step": 65660 + }, + { + "epoch": 0.9905276177260249, + "grad_norm": 0.6753227517834163, + "learning_rate": 4.5173668188758235e-09, + "loss": 0.9659, + "step": 65670 + }, + { + "epoch": 0.9906784518386679, + "grad_norm": 0.6678744526311582, + "learning_rate": 4.374657463771525e-09, + "loss": 0.954, + "step": 65680 + }, + { + "epoch": 0.9908292859513107, + "grad_norm": 0.6942341379498038, + "learning_rate": 4.234238125777612e-09, + "loss": 0.9512, + "step": 65690 + }, + { + "epoch": 0.9909801200639536, + "grad_norm": 0.6882002471606125, + "learning_rate": 4.096108837066126e-09, + "loss": 0.9657, + "step": 65700 + }, + { + "epoch": 0.9911309541765966, + "grad_norm": 0.728921615977374, + "learning_rate": 3.960269629280644e-09, + "loss": 0.9465, + "step": 65710 + }, + { + "epoch": 0.9912817882892395, + "grad_norm": 0.6404236813052324, + "learning_rate": 3.826720533544049e-09, + "loss": 0.9459, + "step": 65720 + }, + { + "epoch": 0.9914326224018825, + "grad_norm": 0.6549761206479239, + "learning_rate": 3.6954615804507543e-09, + "loss": 0.9501, + "step": 65730 + }, + { + "epoch": 0.9915834565145253, + "grad_norm": 0.6554020964818702, + "learning_rate": 3.5664928000744837e-09, + "loss": 0.9542, + "step": 65740 + }, + { + "epoch": 0.9917342906271682, + "grad_norm": 0.6689292020903925, + "learning_rate": 3.4398142219616015e-09, + "loss": 0.9487, + "step": 65750 + }, + { + "epoch": 0.9918851247398112, + "grad_norm": 0.6236681958823924, + "learning_rate": 3.3154258751333377e-09, + "loss": 0.9421, + "step": 65760 + }, + { + "epoch": 0.9920359588524541, + "grad_norm": 0.6611727003593321, + "learning_rate": 3.1933277880891177e-09, + "loss": 0.9351, + "step": 65770 + }, + { + "epoch": 0.9921867929650969, + "grad_norm": 0.6243295845648899, + "learning_rate": 3.0735199888010105e-09, + "loss": 0.9689, + "step": 65780 + }, + { + "epoch": 0.9923376270777399, + "grad_norm": 0.6770321916232241, + "learning_rate": 2.956002504715949e-09, + "loss": 0.9706, + "step": 65790 + }, + { + "epoch": 0.9924884611903828, + "grad_norm": 0.6548435514649363, + "learning_rate": 2.8407753627590628e-09, + "loss": 0.9538, + "step": 65800 + }, + { + "epoch": 0.9926392953030257, + "grad_norm": 0.6962171375889273, + "learning_rate": 2.7278385893281247e-09, + "loss": 0.9601, + "step": 65810 + }, + { + "epoch": 0.9927901294156687, + "grad_norm": 0.6501802421719302, + "learning_rate": 2.6171922102979917e-09, + "loss": 0.9432, + "step": 65820 + }, + { + "epoch": 0.9929409635283115, + "grad_norm": 0.6367428834918875, + "learning_rate": 2.5088362510183874e-09, + "loss": 0.9597, + "step": 65830 + }, + { + "epoch": 0.9930917976409545, + "grad_norm": 0.7129287692052316, + "learning_rate": 2.402770736311677e-09, + "loss": 0.9412, + "step": 65840 + }, + { + "epoch": 0.9932426317535974, + "grad_norm": 0.6640641679150656, + "learning_rate": 2.2989956904806433e-09, + "loss": 0.9329, + "step": 65850 + }, + { + "epoch": 0.9933934658662403, + "grad_norm": 0.68039233179534, + "learning_rate": 2.1975111372973812e-09, + "loss": 0.9413, + "step": 65860 + }, + { + "epoch": 0.9935442999788833, + "grad_norm": 0.6424972396702754, + "learning_rate": 2.098317100014402e-09, + "loss": 0.9302, + "step": 65870 + }, + { + "epoch": 0.9936951340915261, + "grad_norm": 0.6437815137078426, + "learning_rate": 2.0014136013568607e-09, + "loss": 0.9599, + "step": 65880 + }, + { + "epoch": 0.993845968204169, + "grad_norm": 0.6419821579841865, + "learning_rate": 1.9068006635236667e-09, + "loss": 0.9619, + "step": 65890 + }, + { + "epoch": 0.993996802316812, + "grad_norm": 0.7046595460813169, + "learning_rate": 1.814478308193035e-09, + "loss": 0.9377, + "step": 65900 + }, + { + "epoch": 0.9941476364294549, + "grad_norm": 0.6927591663535815, + "learning_rate": 1.7244465565158242e-09, + "loss": 0.9451, + "step": 65910 + }, + { + "epoch": 0.9942984705420977, + "grad_norm": 0.7152604549054679, + "learning_rate": 1.6367054291177576e-09, + "loss": 0.9659, + "step": 65920 + }, + { + "epoch": 0.9944493046547407, + "grad_norm": 0.6448362082917994, + "learning_rate": 1.5512549461016435e-09, + "loss": 0.9511, + "step": 65930 + }, + { + "epoch": 0.9946001387673836, + "grad_norm": 0.6582840098432079, + "learning_rate": 1.4680951270429345e-09, + "loss": 0.9263, + "step": 65940 + }, + { + "epoch": 0.9947509728800266, + "grad_norm": 0.666574182001213, + "learning_rate": 1.3872259909941676e-09, + "loss": 0.9446, + "step": 65950 + }, + { + "epoch": 0.9949018069926695, + "grad_norm": 0.6841087882149149, + "learning_rate": 1.308647556482745e-09, + "loss": 0.9619, + "step": 65960 + }, + { + "epoch": 0.9950526411053123, + "grad_norm": 0.7253566755427552, + "learning_rate": 1.232359841512043e-09, + "loss": 0.9481, + "step": 65970 + }, + { + "epoch": 0.9952034752179553, + "grad_norm": 0.6861953678538165, + "learning_rate": 1.1583628635580823e-09, + "loss": 0.9407, + "step": 65980 + }, + { + "epoch": 0.9953543093305982, + "grad_norm": 0.6690026388241204, + "learning_rate": 1.0866566395739687e-09, + "loss": 0.9509, + "step": 65990 + }, + { + "epoch": 0.9955051434432411, + "grad_norm": 0.6748903894784021, + "learning_rate": 1.0172411859898923e-09, + "loss": 0.9519, + "step": 66000 + }, + { + "epoch": 0.9956559775558841, + "grad_norm": 0.6590181812607078, + "learning_rate": 9.501165187053573e-10, + "loss": 0.9413, + "step": 66010 + }, + { + "epoch": 0.9958068116685269, + "grad_norm": 0.6651677256444527, + "learning_rate": 8.852826531025038e-10, + "loss": 0.9551, + "step": 66020 + }, + { + "epoch": 0.9959576457811699, + "grad_norm": 0.7348111319123602, + "learning_rate": 8.22739604032785e-10, + "loss": 0.9662, + "step": 66030 + }, + { + "epoch": 0.9961084798938128, + "grad_norm": 0.6255521824798366, + "learning_rate": 7.624873858247395e-10, + "loss": 0.9506, + "step": 66040 + }, + { + "epoch": 0.9962593140064557, + "grad_norm": 0.6415760842253955, + "learning_rate": 7.045260122839903e-10, + "loss": 0.9306, + "step": 66050 + }, + { + "epoch": 0.9964101481190986, + "grad_norm": 0.6689424668492022, + "learning_rate": 6.488554966876948e-10, + "loss": 0.9496, + "step": 66060 + }, + { + "epoch": 0.9965609822317415, + "grad_norm": 0.6635693966188118, + "learning_rate": 5.954758517912051e-10, + "loss": 0.9529, + "step": 66070 + }, + { + "epoch": 0.9967118163443844, + "grad_norm": 0.6550724862993289, + "learning_rate": 5.443870898236281e-10, + "loss": 0.9371, + "step": 66080 + }, + { + "epoch": 0.9968626504570274, + "grad_norm": 0.6536783594389377, + "learning_rate": 4.955892224900449e-10, + "loss": 0.9508, + "step": 66090 + }, + { + "epoch": 0.9970134845696703, + "grad_norm": 0.6627271023410723, + "learning_rate": 4.490822609681811e-10, + "loss": 0.9409, + "step": 66100 + }, + { + "epoch": 0.9971643186823131, + "grad_norm": 0.6450497317362973, + "learning_rate": 4.048662159150674e-10, + "loss": 0.9295, + "step": 66110 + }, + { + "epoch": 0.9973151527949561, + "grad_norm": 0.6655260015735397, + "learning_rate": 3.6294109746037864e-10, + "loss": 0.9374, + "step": 66120 + }, + { + "epoch": 0.997465986907599, + "grad_norm": 0.7294807601551127, + "learning_rate": 3.233069152075441e-10, + "loss": 0.9369, + "step": 66130 + }, + { + "epoch": 0.997616821020242, + "grad_norm": 0.6558426084360428, + "learning_rate": 2.8596367823929826e-10, + "loss": 0.9447, + "step": 66140 + }, + { + "epoch": 0.9977676551328849, + "grad_norm": 0.6280584864846743, + "learning_rate": 2.5091139510879935e-10, + "loss": 0.941, + "step": 66150 + }, + { + "epoch": 0.9979184892455277, + "grad_norm": 0.6442651309488325, + "learning_rate": 2.1815007384740073e-10, + "loss": 0.9186, + "step": 66160 + }, + { + "epoch": 0.9980693233581707, + "grad_norm": 0.6766852421490172, + "learning_rate": 1.8767972196132023e-10, + "loss": 0.9432, + "step": 66170 + }, + { + "epoch": 0.9982201574708136, + "grad_norm": 0.6676653670499447, + "learning_rate": 1.5950034643164026e-10, + "loss": 0.9547, + "step": 66180 + }, + { + "epoch": 0.9983709915834565, + "grad_norm": 0.6538344241200746, + "learning_rate": 1.3361195371319745e-10, + "loss": 0.9409, + "step": 66190 + }, + { + "epoch": 0.9985218256960994, + "grad_norm": 0.6457640108175507, + "learning_rate": 1.1001454973680326e-10, + "loss": 0.9387, + "step": 66200 + }, + { + "epoch": 0.9986726598087423, + "grad_norm": 0.6976029307071817, + "learning_rate": 8.870813991035399e-11, + "loss": 0.9592, + "step": 66210 + }, + { + "epoch": 0.9988234939213853, + "grad_norm": 0.6818676789615996, + "learning_rate": 6.969272911439007e-11, + "loss": 0.9537, + "step": 66220 + }, + { + "epoch": 0.9989743280340282, + "grad_norm": 0.6942605857034341, + "learning_rate": 5.2968321704316425e-11, + "loss": 0.9463, + "step": 66230 + }, + { + "epoch": 0.9991251621466711, + "grad_norm": 0.6578498474337087, + "learning_rate": 3.853492151373317e-11, + "loss": 0.9514, + "step": 66240 + }, + { + "epoch": 0.999275996259314, + "grad_norm": 0.6895491620886198, + "learning_rate": 2.639253184777424e-11, + "loss": 0.9595, + "step": 66250 + }, + { + "epoch": 0.9994268303719569, + "grad_norm": 0.6533232328913292, + "learning_rate": 1.6541155488658533e-11, + "loss": 0.947, + "step": 66260 + }, + { + "epoch": 0.9995776644845998, + "grad_norm": 0.6503891860445346, + "learning_rate": 8.980794693469463e-12, + "loss": 0.9488, + "step": 66270 + }, + { + "epoch": 0.9997284985972428, + "grad_norm": 0.6478810842480915, + "learning_rate": 3.711451194154947e-12, + "loss": 0.9459, + "step": 66280 + }, + { + "epoch": 0.9998793327098857, + "grad_norm": 0.6394448098275978, + "learning_rate": 7.331261986376348e-13, + "loss": 0.9355, + "step": 66290 + } + ], + "logging_steps": 10, + "max_steps": 66298, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 6000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 9077377046937600.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +}