{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0380366278638689, "eval_steps": 500, "global_step": 14000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 7.414547341884777e-05, "grad_norm": 5.018120288848877, "learning_rate": 4e-05, "loss": 4.798, "step": 1 }, { "epoch": 0.00014829094683769555, "grad_norm": 5.04449462890625, "learning_rate": 8e-05, "loss": 4.8039, "step": 2 }, { "epoch": 0.00022243642025654333, "grad_norm": 5.038125038146973, "learning_rate": 0.00012, "loss": 4.8284, "step": 3 }, { "epoch": 0.0002965818936753911, "grad_norm": 4.887388229370117, "learning_rate": 0.00016, "loss": 4.3122, "step": 4 }, { "epoch": 0.0003707273670942389, "grad_norm": 4.149261474609375, "learning_rate": 0.0002, "loss": 3.433, "step": 5 }, { "epoch": 0.00044487284051308667, "grad_norm": 4.49135684967041, "learning_rate": 0.00019998999749937485, "loss": 2.7563, "step": 6 }, { "epoch": 0.0005190183139319345, "grad_norm": 5.35917329788208, "learning_rate": 0.00019997999499874968, "loss": 2.2588, "step": 7 }, { "epoch": 0.0005931637873507822, "grad_norm": 6.407321929931641, "learning_rate": 0.00019996999249812455, "loss": 1.82, "step": 8 }, { "epoch": 0.00066730926076963, "grad_norm": 3.834652900695801, "learning_rate": 0.00019995998999749939, "loss": 1.4105, "step": 9 }, { "epoch": 0.0007414547341884778, "grad_norm": 2.5889906883239746, "learning_rate": 0.00019994998749687425, "loss": 1.3477, "step": 10 }, { "epoch": 0.0008156002076073255, "grad_norm": 2.5410232543945312, "learning_rate": 0.00019993998499624906, "loss": 1.1089, "step": 11 }, { "epoch": 0.0008897456810261733, "grad_norm": 2.7493386268615723, "learning_rate": 0.00019992998249562392, "loss": 1.0602, "step": 12 }, { "epoch": 0.0009638911544450211, "grad_norm": 2.490690231323242, "learning_rate": 0.00019991997999499876, "loss": 0.9321, "step": 13 }, { "epoch": 0.001038036627863869, "grad_norm": 1.0331135988235474, "learning_rate": 0.00019990997749437362, "loss": 0.9789, "step": 14 }, { "epoch": 0.0011121821012827168, "grad_norm": 0.6616149544715881, "learning_rate": 0.00019989997499374843, "loss": 0.9192, "step": 15 }, { "epoch": 0.0011863275747015644, "grad_norm": 1.1646225452423096, "learning_rate": 0.0001998899724931233, "loss": 0.8935, "step": 16 }, { "epoch": 0.0012604730481204122, "grad_norm": 0.6624720692634583, "learning_rate": 0.00019987996999249814, "loss": 0.9094, "step": 17 }, { "epoch": 0.00133461852153926, "grad_norm": 0.6470763087272644, "learning_rate": 0.00019986996749187297, "loss": 0.8233, "step": 18 }, { "epoch": 0.0014087639949581078, "grad_norm": 0.6901748776435852, "learning_rate": 0.0001998599649912478, "loss": 0.8566, "step": 19 }, { "epoch": 0.0014829094683769556, "grad_norm": 0.4936569929122925, "learning_rate": 0.00019984996249062267, "loss": 0.7908, "step": 20 }, { "epoch": 0.0015570549417958034, "grad_norm": 0.4914224147796631, "learning_rate": 0.0001998399599899975, "loss": 0.8322, "step": 21 }, { "epoch": 0.001631200415214651, "grad_norm": 0.4565899968147278, "learning_rate": 0.00019982995748937235, "loss": 0.8399, "step": 22 }, { "epoch": 0.0017053458886334989, "grad_norm": 0.5580044984817505, "learning_rate": 0.00019981995498874719, "loss": 0.8093, "step": 23 }, { "epoch": 0.0017794913620523467, "grad_norm": 0.43089380860328674, "learning_rate": 0.00019980995248812205, "loss": 0.7816, "step": 24 }, { "epoch": 0.0018536368354711945, "grad_norm": 0.505142331123352, "learning_rate": 0.0001997999499874969, "loss": 0.8412, "step": 25 }, { "epoch": 0.0019277823088900423, "grad_norm": 0.42089980840682983, "learning_rate": 0.00019978994748687172, "loss": 0.7517, "step": 26 }, { "epoch": 0.00200192778230889, "grad_norm": 0.44702333211898804, "learning_rate": 0.00019977994498624656, "loss": 0.7407, "step": 27 }, { "epoch": 0.002076073255727738, "grad_norm": 0.5817534923553467, "learning_rate": 0.00019976994248562143, "loss": 0.7585, "step": 28 }, { "epoch": 0.0021502187291465857, "grad_norm": 0.44061407446861267, "learning_rate": 0.00019975993998499626, "loss": 0.7821, "step": 29 }, { "epoch": 0.0022243642025654336, "grad_norm": 0.48626708984375, "learning_rate": 0.0001997499374843711, "loss": 0.7528, "step": 30 }, { "epoch": 0.0022985096759842814, "grad_norm": 0.4685170352458954, "learning_rate": 0.00019973993498374594, "loss": 0.739, "step": 31 }, { "epoch": 0.0023726551494031287, "grad_norm": 0.4305589199066162, "learning_rate": 0.0001997299324831208, "loss": 0.7534, "step": 32 }, { "epoch": 0.0024468006228219766, "grad_norm": 0.44318443536758423, "learning_rate": 0.0001997199299824956, "loss": 0.7674, "step": 33 }, { "epoch": 0.0025209460962408244, "grad_norm": 0.4475301206111908, "learning_rate": 0.00019970992748187047, "loss": 0.7013, "step": 34 }, { "epoch": 0.002595091569659672, "grad_norm": 0.48515433073043823, "learning_rate": 0.0001996999249812453, "loss": 0.7751, "step": 35 }, { "epoch": 0.00266923704307852, "grad_norm": 0.6036186218261719, "learning_rate": 0.00019968992248062018, "loss": 0.7207, "step": 36 }, { "epoch": 0.002743382516497368, "grad_norm": 0.4775208532810211, "learning_rate": 0.000199679919979995, "loss": 0.8073, "step": 37 }, { "epoch": 0.0028175279899162156, "grad_norm": 0.5439049601554871, "learning_rate": 0.00019966991747936985, "loss": 0.722, "step": 38 }, { "epoch": 0.0028916734633350634, "grad_norm": 0.643951952457428, "learning_rate": 0.0001996599149787447, "loss": 0.7587, "step": 39 }, { "epoch": 0.0029658189367539113, "grad_norm": 0.4711515009403229, "learning_rate": 0.00019964991247811955, "loss": 0.6834, "step": 40 }, { "epoch": 0.003039964410172759, "grad_norm": 0.5383298397064209, "learning_rate": 0.0001996399099774944, "loss": 0.7984, "step": 41 }, { "epoch": 0.003114109883591607, "grad_norm": 0.48809218406677246, "learning_rate": 0.00019962990747686923, "loss": 0.737, "step": 42 }, { "epoch": 0.0031882553570104547, "grad_norm": 0.5248563885688782, "learning_rate": 0.0001996199049762441, "loss": 0.7538, "step": 43 }, { "epoch": 0.003262400830429302, "grad_norm": 0.5228831171989441, "learning_rate": 0.0001996099024756189, "loss": 0.733, "step": 44 }, { "epoch": 0.00333654630384815, "grad_norm": 0.4848816692829132, "learning_rate": 0.00019959989997499376, "loss": 0.7056, "step": 45 }, { "epoch": 0.0034106917772669977, "grad_norm": 0.46852174401283264, "learning_rate": 0.0001995898974743686, "loss": 0.7127, "step": 46 }, { "epoch": 0.0034848372506858455, "grad_norm": 0.5433565378189087, "learning_rate": 0.00019957989497374347, "loss": 0.7215, "step": 47 }, { "epoch": 0.0035589827241046933, "grad_norm": 0.501708984375, "learning_rate": 0.00019956989247311828, "loss": 0.7268, "step": 48 }, { "epoch": 0.003633128197523541, "grad_norm": 0.5020154118537903, "learning_rate": 0.00019955988997249314, "loss": 0.6835, "step": 49 }, { "epoch": 0.003707273670942389, "grad_norm": 0.4898451864719391, "learning_rate": 0.00019954988747186798, "loss": 0.6738, "step": 50 }, { "epoch": 0.0037814191443612368, "grad_norm": 0.4498940706253052, "learning_rate": 0.00019953988497124284, "loss": 0.7293, "step": 51 }, { "epoch": 0.0038555646177800846, "grad_norm": 0.4351322650909424, "learning_rate": 0.00019952988247061765, "loss": 0.7362, "step": 52 }, { "epoch": 0.003929710091198932, "grad_norm": 0.4285847544670105, "learning_rate": 0.00019951987996999252, "loss": 0.6934, "step": 53 }, { "epoch": 0.00400385556461778, "grad_norm": 0.4434189796447754, "learning_rate": 0.00019950987746936735, "loss": 0.7297, "step": 54 }, { "epoch": 0.004078001038036628, "grad_norm": 0.4073265790939331, "learning_rate": 0.0001994998749687422, "loss": 0.7412, "step": 55 }, { "epoch": 0.004152146511455476, "grad_norm": 0.43486592173576355, "learning_rate": 0.00019948987246811703, "loss": 0.7259, "step": 56 }, { "epoch": 0.004226291984874324, "grad_norm": 0.4635395407676697, "learning_rate": 0.0001994798699674919, "loss": 0.7065, "step": 57 }, { "epoch": 0.0043004374582931715, "grad_norm": 0.4395889937877655, "learning_rate": 0.00019946986746686673, "loss": 0.7528, "step": 58 }, { "epoch": 0.004374582931712019, "grad_norm": 0.39878204464912415, "learning_rate": 0.00019945986496624156, "loss": 0.6712, "step": 59 }, { "epoch": 0.004448728405130867, "grad_norm": 0.42334067821502686, "learning_rate": 0.0001994498624656164, "loss": 0.6955, "step": 60 }, { "epoch": 0.004522873878549715, "grad_norm": 0.42296838760375977, "learning_rate": 0.00019943985996499127, "loss": 0.7165, "step": 61 }, { "epoch": 0.004597019351968563, "grad_norm": 0.41188889741897583, "learning_rate": 0.0001994298574643661, "loss": 0.7046, "step": 62 }, { "epoch": 0.00467116482538741, "grad_norm": 0.43292665481567383, "learning_rate": 0.00019941985496374094, "loss": 0.6833, "step": 63 }, { "epoch": 0.0047453102988062575, "grad_norm": 0.429616779088974, "learning_rate": 0.00019940985246311578, "loss": 0.6736, "step": 64 }, { "epoch": 0.004819455772225105, "grad_norm": 0.39143863320350647, "learning_rate": 0.00019939984996249064, "loss": 0.6166, "step": 65 }, { "epoch": 0.004893601245643953, "grad_norm": 0.5005157589912415, "learning_rate": 0.00019938984746186548, "loss": 0.7336, "step": 66 }, { "epoch": 0.004967746719062801, "grad_norm": 0.4069496989250183, "learning_rate": 0.00019937984496124032, "loss": 0.6447, "step": 67 }, { "epoch": 0.005041892192481649, "grad_norm": 0.4353240728378296, "learning_rate": 0.00019936984246061515, "loss": 0.7048, "step": 68 }, { "epoch": 0.0051160376659004966, "grad_norm": 0.42617061734199524, "learning_rate": 0.00019935983995999002, "loss": 0.6799, "step": 69 }, { "epoch": 0.005190183139319344, "grad_norm": 0.4510687291622162, "learning_rate": 0.00019934983745936485, "loss": 0.6754, "step": 70 }, { "epoch": 0.005264328612738192, "grad_norm": 0.4561275541782379, "learning_rate": 0.0001993398349587397, "loss": 0.6602, "step": 71 }, { "epoch": 0.00533847408615704, "grad_norm": 0.48108747601509094, "learning_rate": 0.00019932983245811453, "loss": 0.6471, "step": 72 }, { "epoch": 0.005412619559575888, "grad_norm": 0.4552939236164093, "learning_rate": 0.0001993198299574894, "loss": 0.6871, "step": 73 }, { "epoch": 0.005486765032994736, "grad_norm": 0.4524765610694885, "learning_rate": 0.00019930982745686423, "loss": 0.6966, "step": 74 }, { "epoch": 0.0055609105064135834, "grad_norm": 0.4276913106441498, "learning_rate": 0.00019929982495623907, "loss": 0.6549, "step": 75 }, { "epoch": 0.005635055979832431, "grad_norm": 0.437625914812088, "learning_rate": 0.0001992898224556139, "loss": 0.6903, "step": 76 }, { "epoch": 0.005709201453251279, "grad_norm": 0.416545033454895, "learning_rate": 0.00019927981995498877, "loss": 0.6473, "step": 77 }, { "epoch": 0.005783346926670127, "grad_norm": 0.5285107493400574, "learning_rate": 0.0001992698174543636, "loss": 0.6801, "step": 78 }, { "epoch": 0.005857492400088975, "grad_norm": 0.482400506734848, "learning_rate": 0.00019925981495373844, "loss": 0.6782, "step": 79 }, { "epoch": 0.0059316378735078225, "grad_norm": 0.46806052327156067, "learning_rate": 0.0001992498124531133, "loss": 0.675, "step": 80 }, { "epoch": 0.00600578334692667, "grad_norm": 0.5683769583702087, "learning_rate": 0.00019923980995248812, "loss": 0.6812, "step": 81 }, { "epoch": 0.006079928820345518, "grad_norm": 0.4781586825847626, "learning_rate": 0.00019922980745186298, "loss": 0.7084, "step": 82 }, { "epoch": 0.006154074293764366, "grad_norm": 0.5086846351623535, "learning_rate": 0.00019921980495123782, "loss": 0.7071, "step": 83 }, { "epoch": 0.006228219767183214, "grad_norm": 0.4660095274448395, "learning_rate": 0.00019920980245061268, "loss": 0.6614, "step": 84 }, { "epoch": 0.006302365240602062, "grad_norm": 0.4385964870452881, "learning_rate": 0.0001991997999499875, "loss": 0.6727, "step": 85 }, { "epoch": 0.006376510714020909, "grad_norm": 0.4327223002910614, "learning_rate": 0.00019918979744936236, "loss": 0.6684, "step": 86 }, { "epoch": 0.006450656187439757, "grad_norm": 0.4530227780342102, "learning_rate": 0.0001991797949487372, "loss": 0.6534, "step": 87 }, { "epoch": 0.006524801660858604, "grad_norm": 0.4909004867076874, "learning_rate": 0.00019916979244811206, "loss": 0.7145, "step": 88 }, { "epoch": 0.006598947134277452, "grad_norm": 0.47614890336990356, "learning_rate": 0.00019915978994748687, "loss": 0.6474, "step": 89 }, { "epoch": 0.0066730926076963, "grad_norm": 0.4389638900756836, "learning_rate": 0.00019914978744686173, "loss": 0.6591, "step": 90 }, { "epoch": 0.006747238081115148, "grad_norm": 0.46090275049209595, "learning_rate": 0.00019913978494623657, "loss": 0.6307, "step": 91 }, { "epoch": 0.006821383554533995, "grad_norm": 0.41981884837150574, "learning_rate": 0.0001991297824456114, "loss": 0.6713, "step": 92 }, { "epoch": 0.006895529027952843, "grad_norm": 0.4931015372276306, "learning_rate": 0.00019911977994498624, "loss": 0.6943, "step": 93 }, { "epoch": 0.006969674501371691, "grad_norm": 0.4798949658870697, "learning_rate": 0.0001991097774443611, "loss": 0.6548, "step": 94 }, { "epoch": 0.007043819974790539, "grad_norm": 0.47880855202674866, "learning_rate": 0.00019909977494373594, "loss": 0.6717, "step": 95 }, { "epoch": 0.007117965448209387, "grad_norm": 0.42143312096595764, "learning_rate": 0.00019908977244311078, "loss": 0.665, "step": 96 }, { "epoch": 0.0071921109216282345, "grad_norm": 0.46562454104423523, "learning_rate": 0.00019907976994248562, "loss": 0.6807, "step": 97 }, { "epoch": 0.007266256395047082, "grad_norm": 0.4677669405937195, "learning_rate": 0.00019906976744186048, "loss": 0.6371, "step": 98 }, { "epoch": 0.00734040186846593, "grad_norm": 0.46294349431991577, "learning_rate": 0.00019905976494123532, "loss": 0.6225, "step": 99 }, { "epoch": 0.007414547341884778, "grad_norm": 0.4734184443950653, "learning_rate": 0.00019904976244061016, "loss": 0.6886, "step": 100 }, { "epoch": 0.007488692815303626, "grad_norm": 0.5326193571090698, "learning_rate": 0.000199039759939985, "loss": 0.6902, "step": 101 }, { "epoch": 0.0075628382887224736, "grad_norm": 0.4847554564476013, "learning_rate": 0.00019902975743935986, "loss": 0.6505, "step": 102 }, { "epoch": 0.007636983762141321, "grad_norm": 0.4065643846988678, "learning_rate": 0.0001990197549387347, "loss": 0.6372, "step": 103 }, { "epoch": 0.007711129235560169, "grad_norm": 0.5002569556236267, "learning_rate": 0.00019900975243810953, "loss": 0.6932, "step": 104 }, { "epoch": 0.007785274708979017, "grad_norm": 0.4244154393672943, "learning_rate": 0.00019899974993748437, "loss": 0.6868, "step": 105 }, { "epoch": 0.007859420182397865, "grad_norm": 0.48799335956573486, "learning_rate": 0.00019898974743685923, "loss": 0.6471, "step": 106 }, { "epoch": 0.007933565655816712, "grad_norm": 0.4388320744037628, "learning_rate": 0.00019897974493623407, "loss": 0.6737, "step": 107 }, { "epoch": 0.00800771112923556, "grad_norm": 0.4524044394493103, "learning_rate": 0.0001989697424356089, "loss": 0.6532, "step": 108 }, { "epoch": 0.008081856602654407, "grad_norm": 0.5094372034072876, "learning_rate": 0.00019895973993498374, "loss": 0.6243, "step": 109 }, { "epoch": 0.008156002076073256, "grad_norm": 0.4410964548587799, "learning_rate": 0.0001989497374343586, "loss": 0.6614, "step": 110 }, { "epoch": 0.008230147549492103, "grad_norm": 0.5263974070549011, "learning_rate": 0.00019893973493373345, "loss": 0.687, "step": 111 }, { "epoch": 0.008304293022910952, "grad_norm": 0.4105038046836853, "learning_rate": 0.00019892973243310828, "loss": 0.658, "step": 112 }, { "epoch": 0.008378438496329799, "grad_norm": 0.4612182378768921, "learning_rate": 0.00019891972993248315, "loss": 0.6367, "step": 113 }, { "epoch": 0.008452583969748647, "grad_norm": 0.5487355589866638, "learning_rate": 0.00019890972743185798, "loss": 0.6467, "step": 114 }, { "epoch": 0.008526729443167494, "grad_norm": 0.4223743677139282, "learning_rate": 0.00019889972493123282, "loss": 0.6332, "step": 115 }, { "epoch": 0.008600874916586343, "grad_norm": 0.41857191920280457, "learning_rate": 0.00019888972243060766, "loss": 0.7275, "step": 116 }, { "epoch": 0.00867502039000519, "grad_norm": 0.43704351782798767, "learning_rate": 0.00019887971992998252, "loss": 0.6596, "step": 117 }, { "epoch": 0.008749165863424039, "grad_norm": 0.5122873187065125, "learning_rate": 0.00019886971742935733, "loss": 0.6808, "step": 118 }, { "epoch": 0.008823311336842886, "grad_norm": 0.40995481610298157, "learning_rate": 0.0001988597149287322, "loss": 0.6555, "step": 119 }, { "epoch": 0.008897456810261734, "grad_norm": 0.4570493996143341, "learning_rate": 0.00019884971242810703, "loss": 0.6914, "step": 120 }, { "epoch": 0.008971602283680581, "grad_norm": 0.45733383297920227, "learning_rate": 0.0001988397099274819, "loss": 0.6576, "step": 121 }, { "epoch": 0.00904574775709943, "grad_norm": 0.41248658299446106, "learning_rate": 0.0001988297074268567, "loss": 0.6404, "step": 122 }, { "epoch": 0.009119893230518277, "grad_norm": 0.42830878496170044, "learning_rate": 0.00019881970492623157, "loss": 0.6531, "step": 123 }, { "epoch": 0.009194038703937125, "grad_norm": 0.4142945110797882, "learning_rate": 0.0001988097024256064, "loss": 0.6092, "step": 124 }, { "epoch": 0.009268184177355972, "grad_norm": 0.4431367516517639, "learning_rate": 0.00019879969992498127, "loss": 0.6669, "step": 125 }, { "epoch": 0.00934232965077482, "grad_norm": 0.49269065260887146, "learning_rate": 0.00019878969742435608, "loss": 0.6609, "step": 126 }, { "epoch": 0.009416475124193668, "grad_norm": 0.49380356073379517, "learning_rate": 0.00019877969492373095, "loss": 0.6718, "step": 127 }, { "epoch": 0.009490620597612515, "grad_norm": 0.42208343744277954, "learning_rate": 0.00019876969242310578, "loss": 0.6926, "step": 128 }, { "epoch": 0.009564766071031364, "grad_norm": 0.4084603488445282, "learning_rate": 0.00019875968992248062, "loss": 0.6152, "step": 129 }, { "epoch": 0.00963891154445021, "grad_norm": 0.49325522780418396, "learning_rate": 0.00019874968742185546, "loss": 0.6311, "step": 130 }, { "epoch": 0.00971305701786906, "grad_norm": 0.5479703545570374, "learning_rate": 0.00019873968492123032, "loss": 0.6706, "step": 131 }, { "epoch": 0.009787202491287906, "grad_norm": 0.4423280954360962, "learning_rate": 0.00019872968242060516, "loss": 0.6865, "step": 132 }, { "epoch": 0.009861347964706755, "grad_norm": 0.4486439824104309, "learning_rate": 0.00019871967991998, "loss": 0.6092, "step": 133 }, { "epoch": 0.009935493438125602, "grad_norm": 0.5445942878723145, "learning_rate": 0.00019870967741935483, "loss": 0.6694, "step": 134 }, { "epoch": 0.01000963891154445, "grad_norm": 0.46590104699134827, "learning_rate": 0.0001986996749187297, "loss": 0.672, "step": 135 }, { "epoch": 0.010083784384963298, "grad_norm": 0.4492329955101013, "learning_rate": 0.00019868967241810454, "loss": 0.6758, "step": 136 }, { "epoch": 0.010157929858382146, "grad_norm": 0.39360982179641724, "learning_rate": 0.00019867966991747937, "loss": 0.6303, "step": 137 }, { "epoch": 0.010232075331800993, "grad_norm": 0.44553348422050476, "learning_rate": 0.0001986696674168542, "loss": 0.6645, "step": 138 }, { "epoch": 0.010306220805219842, "grad_norm": 0.5305299758911133, "learning_rate": 0.00019865966491622907, "loss": 0.7069, "step": 139 }, { "epoch": 0.010380366278638689, "grad_norm": 0.4813813865184784, "learning_rate": 0.0001986496624156039, "loss": 0.7434, "step": 140 }, { "epoch": 0.010454511752057537, "grad_norm": 0.5300836563110352, "learning_rate": 0.00019863965991497875, "loss": 0.6459, "step": 141 }, { "epoch": 0.010528657225476384, "grad_norm": 0.44599780440330505, "learning_rate": 0.00019862965741435359, "loss": 0.6617, "step": 142 }, { "epoch": 0.010602802698895233, "grad_norm": 0.42879384756088257, "learning_rate": 0.00019861965491372845, "loss": 0.6527, "step": 143 }, { "epoch": 0.01067694817231408, "grad_norm": 0.44864630699157715, "learning_rate": 0.0001986096524131033, "loss": 0.6561, "step": 144 }, { "epoch": 0.010751093645732929, "grad_norm": 0.4115261137485504, "learning_rate": 0.00019859964991247812, "loss": 0.6305, "step": 145 }, { "epoch": 0.010825239119151776, "grad_norm": 0.418890118598938, "learning_rate": 0.000198589647411853, "loss": 0.6175, "step": 146 }, { "epoch": 0.010899384592570624, "grad_norm": 0.4272039234638214, "learning_rate": 0.00019857964491122783, "loss": 0.653, "step": 147 }, { "epoch": 0.010973530065989471, "grad_norm": 0.45172929763793945, "learning_rate": 0.00019856964241060266, "loss": 0.6157, "step": 148 }, { "epoch": 0.01104767553940832, "grad_norm": 0.4390547275543213, "learning_rate": 0.0001985596399099775, "loss": 0.6957, "step": 149 }, { "epoch": 0.011121821012827167, "grad_norm": 0.44054192304611206, "learning_rate": 0.00019854963740935236, "loss": 0.6371, "step": 150 }, { "epoch": 0.011195966486246014, "grad_norm": 0.4239805340766907, "learning_rate": 0.0001985396349087272, "loss": 0.6644, "step": 151 }, { "epoch": 0.011270111959664863, "grad_norm": 0.4296945333480835, "learning_rate": 0.00019852963240810204, "loss": 0.6496, "step": 152 }, { "epoch": 0.01134425743308371, "grad_norm": 0.453365296125412, "learning_rate": 0.00019851962990747687, "loss": 0.6792, "step": 153 }, { "epoch": 0.011418402906502558, "grad_norm": 0.44845858216285706, "learning_rate": 0.00019850962740685174, "loss": 0.6464, "step": 154 }, { "epoch": 0.011492548379921405, "grad_norm": 0.42280662059783936, "learning_rate": 0.00019849962490622655, "loss": 0.6312, "step": 155 }, { "epoch": 0.011566693853340254, "grad_norm": 0.5322517156600952, "learning_rate": 0.0001984896224056014, "loss": 0.649, "step": 156 }, { "epoch": 0.0116408393267591, "grad_norm": 0.40082821249961853, "learning_rate": 0.00019847961990497625, "loss": 0.6309, "step": 157 }, { "epoch": 0.01171498480017795, "grad_norm": 0.4447239637374878, "learning_rate": 0.00019846961740435111, "loss": 0.7046, "step": 158 }, { "epoch": 0.011789130273596796, "grad_norm": 0.4507167637348175, "learning_rate": 0.00019845961490372592, "loss": 0.706, "step": 159 }, { "epoch": 0.011863275747015645, "grad_norm": 0.42788442969322205, "learning_rate": 0.0001984496124031008, "loss": 0.6324, "step": 160 }, { "epoch": 0.011937421220434492, "grad_norm": 0.417186439037323, "learning_rate": 0.00019843960990247563, "loss": 0.6596, "step": 161 }, { "epoch": 0.01201156669385334, "grad_norm": 0.41632384061813354, "learning_rate": 0.0001984296074018505, "loss": 0.6441, "step": 162 }, { "epoch": 0.012085712167272188, "grad_norm": 0.45272254943847656, "learning_rate": 0.0001984196049012253, "loss": 0.6131, "step": 163 }, { "epoch": 0.012159857640691036, "grad_norm": 0.42977985739707947, "learning_rate": 0.00019840960240060016, "loss": 0.6958, "step": 164 }, { "epoch": 0.012234003114109883, "grad_norm": 0.4148579239845276, "learning_rate": 0.000198399599899975, "loss": 0.6692, "step": 165 }, { "epoch": 0.012308148587528732, "grad_norm": 0.4047302305698395, "learning_rate": 0.00019838959739934987, "loss": 0.6211, "step": 166 }, { "epoch": 0.012382294060947579, "grad_norm": 0.4109320342540741, "learning_rate": 0.00019837959489872468, "loss": 0.6787, "step": 167 }, { "epoch": 0.012456439534366428, "grad_norm": 0.4073224365711212, "learning_rate": 0.00019836959239809954, "loss": 0.6388, "step": 168 }, { "epoch": 0.012530585007785274, "grad_norm": 0.39819779992103577, "learning_rate": 0.00019835958989747438, "loss": 0.6551, "step": 169 }, { "epoch": 0.012604730481204123, "grad_norm": 0.45688748359680176, "learning_rate": 0.00019834958739684921, "loss": 0.662, "step": 170 }, { "epoch": 0.01267887595462297, "grad_norm": 0.3923928141593933, "learning_rate": 0.00019833958489622405, "loss": 0.6282, "step": 171 }, { "epoch": 0.012753021428041819, "grad_norm": 0.42128872871398926, "learning_rate": 0.00019832958239559891, "loss": 0.6391, "step": 172 }, { "epoch": 0.012827166901460666, "grad_norm": 0.39416369795799255, "learning_rate": 0.00019831957989497375, "loss": 0.6377, "step": 173 }, { "epoch": 0.012901312374879514, "grad_norm": 0.45660364627838135, "learning_rate": 0.0001983095773943486, "loss": 0.6582, "step": 174 }, { "epoch": 0.012975457848298361, "grad_norm": 0.4255370497703552, "learning_rate": 0.00019829957489372343, "loss": 0.6448, "step": 175 }, { "epoch": 0.013049603321717208, "grad_norm": 0.41216886043548584, "learning_rate": 0.0001982895723930983, "loss": 0.5885, "step": 176 }, { "epoch": 0.013123748795136057, "grad_norm": 0.45943963527679443, "learning_rate": 0.00019827956989247313, "loss": 0.6012, "step": 177 }, { "epoch": 0.013197894268554904, "grad_norm": 0.4331807494163513, "learning_rate": 0.00019826956739184796, "loss": 0.6678, "step": 178 }, { "epoch": 0.013272039741973753, "grad_norm": 0.42463815212249756, "learning_rate": 0.00019825956489122283, "loss": 0.6458, "step": 179 }, { "epoch": 0.0133461852153926, "grad_norm": 0.49266889691352844, "learning_rate": 0.00019824956239059767, "loss": 0.7135, "step": 180 }, { "epoch": 0.013420330688811448, "grad_norm": 0.4284825623035431, "learning_rate": 0.0001982395598899725, "loss": 0.6996, "step": 181 }, { "epoch": 0.013494476162230295, "grad_norm": 0.43228355050086975, "learning_rate": 0.00019822955738934734, "loss": 0.6129, "step": 182 }, { "epoch": 0.013568621635649144, "grad_norm": 0.48813048005104065, "learning_rate": 0.0001982195548887222, "loss": 0.6157, "step": 183 }, { "epoch": 0.01364276710906799, "grad_norm": 0.48207321763038635, "learning_rate": 0.00019820955238809704, "loss": 0.6233, "step": 184 }, { "epoch": 0.01371691258248684, "grad_norm": 0.45921778678894043, "learning_rate": 0.00019819954988747188, "loss": 0.6259, "step": 185 }, { "epoch": 0.013791058055905686, "grad_norm": 0.40628641843795776, "learning_rate": 0.00019818954738684672, "loss": 0.6659, "step": 186 }, { "epoch": 0.013865203529324535, "grad_norm": 0.41001367568969727, "learning_rate": 0.00019817954488622158, "loss": 0.6419, "step": 187 }, { "epoch": 0.013939349002743382, "grad_norm": 0.41239097714424133, "learning_rate": 0.00019816954238559642, "loss": 0.6288, "step": 188 }, { "epoch": 0.01401349447616223, "grad_norm": 0.4627619683742523, "learning_rate": 0.00019815953988497125, "loss": 0.6629, "step": 189 }, { "epoch": 0.014087639949581078, "grad_norm": 0.42691051959991455, "learning_rate": 0.0001981495373843461, "loss": 0.6287, "step": 190 }, { "epoch": 0.014161785422999926, "grad_norm": 0.41519421339035034, "learning_rate": 0.00019813953488372096, "loss": 0.6475, "step": 191 }, { "epoch": 0.014235930896418773, "grad_norm": 0.4508211612701416, "learning_rate": 0.00019812953238309577, "loss": 0.6882, "step": 192 }, { "epoch": 0.014310076369837622, "grad_norm": 0.4942317306995392, "learning_rate": 0.00019811952988247063, "loss": 0.6484, "step": 193 }, { "epoch": 0.014384221843256469, "grad_norm": 0.5561189651489258, "learning_rate": 0.00019810952738184547, "loss": 0.6232, "step": 194 }, { "epoch": 0.014458367316675318, "grad_norm": 0.45120900869369507, "learning_rate": 0.00019809952488122033, "loss": 0.6792, "step": 195 }, { "epoch": 0.014532512790094165, "grad_norm": 0.4168568253517151, "learning_rate": 0.00019808952238059514, "loss": 0.6425, "step": 196 }, { "epoch": 0.014606658263513013, "grad_norm": 0.3944159746170044, "learning_rate": 0.00019807951987997, "loss": 0.5842, "step": 197 }, { "epoch": 0.01468080373693186, "grad_norm": 0.43105873465538025, "learning_rate": 0.00019806951737934484, "loss": 0.5535, "step": 198 }, { "epoch": 0.014754949210350709, "grad_norm": 0.43557417392730713, "learning_rate": 0.0001980595148787197, "loss": 0.6294, "step": 199 }, { "epoch": 0.014829094683769556, "grad_norm": 0.431264728307724, "learning_rate": 0.00019804951237809452, "loss": 0.6218, "step": 200 }, { "epoch": 0.014903240157188403, "grad_norm": 0.4243219494819641, "learning_rate": 0.00019803950987746938, "loss": 0.6223, "step": 201 }, { "epoch": 0.014977385630607251, "grad_norm": 0.4936630129814148, "learning_rate": 0.00019802950737684422, "loss": 0.6281, "step": 202 }, { "epoch": 0.015051531104026098, "grad_norm": 0.46275919675827026, "learning_rate": 0.00019801950487621908, "loss": 0.6068, "step": 203 }, { "epoch": 0.015125676577444947, "grad_norm": 0.4349607825279236, "learning_rate": 0.0001980095023755939, "loss": 0.6531, "step": 204 }, { "epoch": 0.015199822050863794, "grad_norm": 0.4184302091598511, "learning_rate": 0.00019799949987496876, "loss": 0.5988, "step": 205 }, { "epoch": 0.015273967524282643, "grad_norm": 0.4767571985721588, "learning_rate": 0.0001979894973743436, "loss": 0.674, "step": 206 }, { "epoch": 0.01534811299770149, "grad_norm": 0.4165602922439575, "learning_rate": 0.00019797949487371843, "loss": 0.6295, "step": 207 }, { "epoch": 0.015422258471120338, "grad_norm": 0.420572429895401, "learning_rate": 0.00019796949237309327, "loss": 0.6833, "step": 208 }, { "epoch": 0.015496403944539185, "grad_norm": 0.41251426935195923, "learning_rate": 0.00019795948987246813, "loss": 0.661, "step": 209 }, { "epoch": 0.015570549417958034, "grad_norm": 0.41366565227508545, "learning_rate": 0.00019794948737184297, "loss": 0.6572, "step": 210 }, { "epoch": 0.015644694891376883, "grad_norm": 0.4286874830722809, "learning_rate": 0.0001979394848712178, "loss": 0.6549, "step": 211 }, { "epoch": 0.01571884036479573, "grad_norm": 0.4197264313697815, "learning_rate": 0.00019792948237059267, "loss": 0.6727, "step": 212 }, { "epoch": 0.015792985838214577, "grad_norm": 0.3967081606388092, "learning_rate": 0.0001979194798699675, "loss": 0.6847, "step": 213 }, { "epoch": 0.015867131311633424, "grad_norm": 0.4107568860054016, "learning_rate": 0.00019790947736934234, "loss": 0.6103, "step": 214 }, { "epoch": 0.015941276785052274, "grad_norm": 0.42648494243621826, "learning_rate": 0.00019789947486871718, "loss": 0.6499, "step": 215 }, { "epoch": 0.01601542225847112, "grad_norm": 0.42215442657470703, "learning_rate": 0.00019788947236809204, "loss": 0.6971, "step": 216 }, { "epoch": 0.016089567731889968, "grad_norm": 0.4154587984085083, "learning_rate": 0.00019787946986746688, "loss": 0.6512, "step": 217 }, { "epoch": 0.016163713205308815, "grad_norm": 0.41122040152549744, "learning_rate": 0.00019786946736684172, "loss": 0.6429, "step": 218 }, { "epoch": 0.016237858678727665, "grad_norm": 0.4159112274646759, "learning_rate": 0.00019785946486621656, "loss": 0.6109, "step": 219 }, { "epoch": 0.016312004152146512, "grad_norm": 0.4174700677394867, "learning_rate": 0.00019784946236559142, "loss": 0.6018, "step": 220 }, { "epoch": 0.01638614962556536, "grad_norm": 0.4380917251110077, "learning_rate": 0.00019783945986496626, "loss": 0.6394, "step": 221 }, { "epoch": 0.016460295098984206, "grad_norm": 0.42614662647247314, "learning_rate": 0.0001978294573643411, "loss": 0.6873, "step": 222 }, { "epoch": 0.016534440572403056, "grad_norm": 0.38402438163757324, "learning_rate": 0.00019781945486371593, "loss": 0.6248, "step": 223 }, { "epoch": 0.016608586045821903, "grad_norm": 0.4346957802772522, "learning_rate": 0.0001978094523630908, "loss": 0.6404, "step": 224 }, { "epoch": 0.01668273151924075, "grad_norm": 0.42391863465309143, "learning_rate": 0.00019779944986246563, "loss": 0.6603, "step": 225 }, { "epoch": 0.016756876992659597, "grad_norm": 0.435705304145813, "learning_rate": 0.00019778944736184047, "loss": 0.6916, "step": 226 }, { "epoch": 0.016831022466078444, "grad_norm": 0.4293709695339203, "learning_rate": 0.0001977794448612153, "loss": 0.6184, "step": 227 }, { "epoch": 0.016905167939497295, "grad_norm": 0.47087493538856506, "learning_rate": 0.00019776944236059017, "loss": 0.6318, "step": 228 }, { "epoch": 0.01697931341291614, "grad_norm": 0.4167906939983368, "learning_rate": 0.00019775943985996498, "loss": 0.6457, "step": 229 }, { "epoch": 0.01705345888633499, "grad_norm": 0.43258655071258545, "learning_rate": 0.00019774943735933985, "loss": 0.7047, "step": 230 }, { "epoch": 0.017127604359753835, "grad_norm": 0.41788530349731445, "learning_rate": 0.00019773943485871468, "loss": 0.5969, "step": 231 }, { "epoch": 0.017201749833172686, "grad_norm": 0.43856340646743774, "learning_rate": 0.00019772943235808955, "loss": 0.6612, "step": 232 }, { "epoch": 0.017275895306591533, "grad_norm": 0.43399524688720703, "learning_rate": 0.00019771942985746436, "loss": 0.6479, "step": 233 }, { "epoch": 0.01735004078001038, "grad_norm": 0.41547834873199463, "learning_rate": 0.00019770942735683922, "loss": 0.6162, "step": 234 }, { "epoch": 0.017424186253429227, "grad_norm": 0.39383211731910706, "learning_rate": 0.00019769942485621406, "loss": 0.6344, "step": 235 }, { "epoch": 0.017498331726848077, "grad_norm": 0.39800575375556946, "learning_rate": 0.00019768942235558892, "loss": 0.6558, "step": 236 }, { "epoch": 0.017572477200266924, "grad_norm": 0.41717126965522766, "learning_rate": 0.00019767941985496373, "loss": 0.6348, "step": 237 }, { "epoch": 0.01764662267368577, "grad_norm": 0.3903905749320984, "learning_rate": 0.0001976694173543386, "loss": 0.6157, "step": 238 }, { "epoch": 0.017720768147104618, "grad_norm": 0.40841013193130493, "learning_rate": 0.00019765941485371343, "loss": 0.6085, "step": 239 }, { "epoch": 0.01779491362052347, "grad_norm": 0.44844090938568115, "learning_rate": 0.0001976494123530883, "loss": 0.6545, "step": 240 }, { "epoch": 0.017869059093942315, "grad_norm": 0.4745931923389435, "learning_rate": 0.0001976394098524631, "loss": 0.6262, "step": 241 }, { "epoch": 0.017943204567361162, "grad_norm": 0.384184867143631, "learning_rate": 0.00019762940735183797, "loss": 0.6372, "step": 242 }, { "epoch": 0.01801735004078001, "grad_norm": 0.5134305953979492, "learning_rate": 0.0001976194048512128, "loss": 0.6568, "step": 243 }, { "epoch": 0.01809149551419886, "grad_norm": 0.4340967535972595, "learning_rate": 0.00019760940235058765, "loss": 0.6255, "step": 244 }, { "epoch": 0.018165640987617707, "grad_norm": 0.44444260001182556, "learning_rate": 0.00019759939984996248, "loss": 0.6449, "step": 245 }, { "epoch": 0.018239786461036554, "grad_norm": 0.400378555059433, "learning_rate": 0.00019758939734933735, "loss": 0.6082, "step": 246 }, { "epoch": 0.0183139319344554, "grad_norm": 0.3888809084892273, "learning_rate": 0.00019757939484871218, "loss": 0.5629, "step": 247 }, { "epoch": 0.01838807740787425, "grad_norm": 0.4306424856185913, "learning_rate": 0.00019756939234808702, "loss": 0.6396, "step": 248 }, { "epoch": 0.018462222881293098, "grad_norm": 0.4137131869792938, "learning_rate": 0.00019755938984746189, "loss": 0.6066, "step": 249 }, { "epoch": 0.018536368354711945, "grad_norm": 0.43699654936790466, "learning_rate": 0.00019754938734683672, "loss": 0.6501, "step": 250 }, { "epoch": 0.018610513828130792, "grad_norm": 0.43349939584732056, "learning_rate": 0.00019753938484621156, "loss": 0.6142, "step": 251 }, { "epoch": 0.01868465930154964, "grad_norm": 0.40275275707244873, "learning_rate": 0.0001975293823455864, "loss": 0.6388, "step": 252 }, { "epoch": 0.01875880477496849, "grad_norm": 0.4282354712486267, "learning_rate": 0.00019751937984496126, "loss": 0.6442, "step": 253 }, { "epoch": 0.018832950248387336, "grad_norm": 0.43281790614128113, "learning_rate": 0.0001975093773443361, "loss": 0.6087, "step": 254 }, { "epoch": 0.018907095721806183, "grad_norm": 0.42167308926582336, "learning_rate": 0.00019749937484371094, "loss": 0.5887, "step": 255 }, { "epoch": 0.01898124119522503, "grad_norm": 0.4779164493083954, "learning_rate": 0.00019748937234308577, "loss": 0.6314, "step": 256 }, { "epoch": 0.01905538666864388, "grad_norm": 0.3990832567214966, "learning_rate": 0.00019747936984246064, "loss": 0.6573, "step": 257 }, { "epoch": 0.019129532142062727, "grad_norm": 0.38400721549987793, "learning_rate": 0.00019746936734183547, "loss": 0.6088, "step": 258 }, { "epoch": 0.019203677615481574, "grad_norm": 0.3935655355453491, "learning_rate": 0.0001974593648412103, "loss": 0.5828, "step": 259 }, { "epoch": 0.01927782308890042, "grad_norm": 0.4323386251926422, "learning_rate": 0.00019744936234058515, "loss": 0.6212, "step": 260 }, { "epoch": 0.01935196856231927, "grad_norm": 0.4103447198867798, "learning_rate": 0.00019743935983996, "loss": 0.5638, "step": 261 }, { "epoch": 0.01942611403573812, "grad_norm": 0.40592390298843384, "learning_rate": 0.00019742935733933485, "loss": 0.6501, "step": 262 }, { "epoch": 0.019500259509156966, "grad_norm": 0.576086699962616, "learning_rate": 0.00019741935483870969, "loss": 0.6317, "step": 263 }, { "epoch": 0.019574404982575812, "grad_norm": 0.48204684257507324, "learning_rate": 0.00019740935233808452, "loss": 0.6384, "step": 264 }, { "epoch": 0.019648550455994663, "grad_norm": 0.40188315510749817, "learning_rate": 0.0001973993498374594, "loss": 0.6083, "step": 265 }, { "epoch": 0.01972269592941351, "grad_norm": 0.4633297324180603, "learning_rate": 0.0001973893473368342, "loss": 0.6627, "step": 266 }, { "epoch": 0.019796841402832357, "grad_norm": 0.45905184745788574, "learning_rate": 0.00019737934483620906, "loss": 0.6275, "step": 267 }, { "epoch": 0.019870986876251204, "grad_norm": 0.4713208079338074, "learning_rate": 0.0001973693423355839, "loss": 0.6453, "step": 268 }, { "epoch": 0.019945132349670054, "grad_norm": 0.42116665840148926, "learning_rate": 0.00019735933983495876, "loss": 0.6545, "step": 269 }, { "epoch": 0.0200192778230889, "grad_norm": 0.399183988571167, "learning_rate": 0.00019734933733433357, "loss": 0.5651, "step": 270 }, { "epoch": 0.020093423296507748, "grad_norm": 0.35838836431503296, "learning_rate": 0.00019733933483370844, "loss": 0.5664, "step": 271 }, { "epoch": 0.020167568769926595, "grad_norm": 0.4273037016391754, "learning_rate": 0.00019732933233308327, "loss": 0.6424, "step": 272 }, { "epoch": 0.020241714243345445, "grad_norm": 0.4567945897579193, "learning_rate": 0.00019731932983245814, "loss": 0.6276, "step": 273 }, { "epoch": 0.020315859716764292, "grad_norm": 0.4650830328464508, "learning_rate": 0.00019730932733183295, "loss": 0.585, "step": 274 }, { "epoch": 0.02039000519018314, "grad_norm": 0.40823203325271606, "learning_rate": 0.0001972993248312078, "loss": 0.6117, "step": 275 }, { "epoch": 0.020464150663601986, "grad_norm": 0.3931099772453308, "learning_rate": 0.00019728932233058265, "loss": 0.5806, "step": 276 }, { "epoch": 0.020538296137020833, "grad_norm": 0.4471275210380554, "learning_rate": 0.00019727931982995751, "loss": 0.6152, "step": 277 }, { "epoch": 0.020612441610439684, "grad_norm": 0.4287128746509552, "learning_rate": 0.00019726931732933232, "loss": 0.6127, "step": 278 }, { "epoch": 0.02068658708385853, "grad_norm": 0.5298720598220825, "learning_rate": 0.0001972593148287072, "loss": 0.6696, "step": 279 }, { "epoch": 0.020760732557277378, "grad_norm": 0.3971945643424988, "learning_rate": 0.00019724931232808203, "loss": 0.5767, "step": 280 }, { "epoch": 0.020834878030696224, "grad_norm": 0.40942123532295227, "learning_rate": 0.00019723930982745686, "loss": 0.6347, "step": 281 }, { "epoch": 0.020909023504115075, "grad_norm": 0.4549572467803955, "learning_rate": 0.00019722930732683173, "loss": 0.6235, "step": 282 }, { "epoch": 0.020983168977533922, "grad_norm": 0.42177632451057434, "learning_rate": 0.00019721930482620656, "loss": 0.63, "step": 283 }, { "epoch": 0.02105731445095277, "grad_norm": 0.4434545338153839, "learning_rate": 0.0001972093023255814, "loss": 0.5959, "step": 284 }, { "epoch": 0.021131459924371616, "grad_norm": 0.39924728870391846, "learning_rate": 0.00019719929982495624, "loss": 0.59, "step": 285 }, { "epoch": 0.021205605397790466, "grad_norm": 0.40012669563293457, "learning_rate": 0.0001971892973243311, "loss": 0.6282, "step": 286 }, { "epoch": 0.021279750871209313, "grad_norm": 0.41246330738067627, "learning_rate": 0.00019717929482370594, "loss": 0.6285, "step": 287 }, { "epoch": 0.02135389634462816, "grad_norm": 0.4374382793903351, "learning_rate": 0.0001971692923230808, "loss": 0.7038, "step": 288 }, { "epoch": 0.021428041818047007, "grad_norm": 0.3911195397377014, "learning_rate": 0.0001971592898224556, "loss": 0.6027, "step": 289 }, { "epoch": 0.021502187291465857, "grad_norm": 0.4268444776535034, "learning_rate": 0.00019714928732183048, "loss": 0.6409, "step": 290 }, { "epoch": 0.021576332764884704, "grad_norm": 0.47866982221603394, "learning_rate": 0.00019713928482120531, "loss": 0.6151, "step": 291 }, { "epoch": 0.02165047823830355, "grad_norm": 0.41583144664764404, "learning_rate": 0.00019712928232058015, "loss": 0.6411, "step": 292 }, { "epoch": 0.021724623711722398, "grad_norm": 0.40891239047050476, "learning_rate": 0.000197119279819955, "loss": 0.6834, "step": 293 }, { "epoch": 0.02179876918514125, "grad_norm": 0.4311814308166504, "learning_rate": 0.00019710927731932985, "loss": 0.5847, "step": 294 }, { "epoch": 0.021872914658560096, "grad_norm": 0.4371280372142792, "learning_rate": 0.0001970992748187047, "loss": 0.6803, "step": 295 }, { "epoch": 0.021947060131978943, "grad_norm": 0.3768019378185272, "learning_rate": 0.00019708927231807953, "loss": 0.6172, "step": 296 }, { "epoch": 0.02202120560539779, "grad_norm": 0.44617727398872375, "learning_rate": 0.00019707926981745436, "loss": 0.6217, "step": 297 }, { "epoch": 0.02209535107881664, "grad_norm": 0.39800411462783813, "learning_rate": 0.00019706926731682923, "loss": 0.6368, "step": 298 }, { "epoch": 0.022169496552235487, "grad_norm": 0.3798884153366089, "learning_rate": 0.00019705926481620407, "loss": 0.598, "step": 299 }, { "epoch": 0.022243642025654334, "grad_norm": 0.4436784088611603, "learning_rate": 0.0001970492623155789, "loss": 0.6312, "step": 300 }, { "epoch": 0.02231778749907318, "grad_norm": 0.43613025546073914, "learning_rate": 0.00019703925981495374, "loss": 0.648, "step": 301 }, { "epoch": 0.022391932972492028, "grad_norm": 0.40670260787010193, "learning_rate": 0.0001970292573143286, "loss": 0.6163, "step": 302 }, { "epoch": 0.022466078445910878, "grad_norm": 0.3933914601802826, "learning_rate": 0.00019701925481370341, "loss": 0.6402, "step": 303 }, { "epoch": 0.022540223919329725, "grad_norm": 0.40131503343582153, "learning_rate": 0.00019700925231307828, "loss": 0.6098, "step": 304 }, { "epoch": 0.022614369392748572, "grad_norm": 0.40973955392837524, "learning_rate": 0.00019699924981245312, "loss": 0.6144, "step": 305 }, { "epoch": 0.02268851486616742, "grad_norm": 0.4447597563266754, "learning_rate": 0.00019698924731182798, "loss": 0.6468, "step": 306 }, { "epoch": 0.02276266033958627, "grad_norm": 0.40768271684646606, "learning_rate": 0.0001969792448112028, "loss": 0.6112, "step": 307 }, { "epoch": 0.022836805813005116, "grad_norm": 0.44881513714790344, "learning_rate": 0.00019696924231057765, "loss": 0.599, "step": 308 }, { "epoch": 0.022910951286423963, "grad_norm": 0.4982854723930359, "learning_rate": 0.0001969592398099525, "loss": 0.5978, "step": 309 }, { "epoch": 0.02298509675984281, "grad_norm": 0.4602535367012024, "learning_rate": 0.00019694923730932735, "loss": 0.6388, "step": 310 }, { "epoch": 0.02305924223326166, "grad_norm": 0.39899739623069763, "learning_rate": 0.00019693923480870216, "loss": 0.5913, "step": 311 }, { "epoch": 0.023133387706680508, "grad_norm": 0.44356605410575867, "learning_rate": 0.00019692923230807703, "loss": 0.6808, "step": 312 }, { "epoch": 0.023207533180099355, "grad_norm": 0.42584457993507385, "learning_rate": 0.00019691922980745187, "loss": 0.6723, "step": 313 }, { "epoch": 0.0232816786535182, "grad_norm": 0.406721293926239, "learning_rate": 0.00019690922730682673, "loss": 0.6182, "step": 314 }, { "epoch": 0.023355824126937052, "grad_norm": 0.4564056396484375, "learning_rate": 0.00019689922480620157, "loss": 0.5916, "step": 315 }, { "epoch": 0.0234299696003559, "grad_norm": 0.4192248582839966, "learning_rate": 0.0001968892223055764, "loss": 0.6579, "step": 316 }, { "epoch": 0.023504115073774746, "grad_norm": 0.3864688277244568, "learning_rate": 0.00019687921980495124, "loss": 0.6487, "step": 317 }, { "epoch": 0.023578260547193593, "grad_norm": 0.4255799949169159, "learning_rate": 0.00019686921730432608, "loss": 0.6332, "step": 318 }, { "epoch": 0.023652406020612443, "grad_norm": 0.4905986487865448, "learning_rate": 0.00019685921480370094, "loss": 0.6557, "step": 319 }, { "epoch": 0.02372655149403129, "grad_norm": 0.4470495581626892, "learning_rate": 0.00019684921230307578, "loss": 0.5777, "step": 320 }, { "epoch": 0.023800696967450137, "grad_norm": 0.38695743680000305, "learning_rate": 0.00019683920980245062, "loss": 0.6121, "step": 321 }, { "epoch": 0.023874842440868984, "grad_norm": 0.4009791612625122, "learning_rate": 0.00019682920730182545, "loss": 0.5757, "step": 322 }, { "epoch": 0.023948987914287834, "grad_norm": 0.36851105093955994, "learning_rate": 0.00019681920480120032, "loss": 0.5736, "step": 323 }, { "epoch": 0.02402313338770668, "grad_norm": 0.4529784917831421, "learning_rate": 0.00019680920230057516, "loss": 0.6584, "step": 324 }, { "epoch": 0.02409727886112553, "grad_norm": 0.40925344824790955, "learning_rate": 0.00019679919979995002, "loss": 0.6306, "step": 325 }, { "epoch": 0.024171424334544375, "grad_norm": 0.43600764870643616, "learning_rate": 0.00019678919729932483, "loss": 0.5788, "step": 326 }, { "epoch": 0.024245569807963222, "grad_norm": 0.3902277946472168, "learning_rate": 0.0001967791947986997, "loss": 0.602, "step": 327 }, { "epoch": 0.024319715281382073, "grad_norm": 0.42548874020576477, "learning_rate": 0.00019676919229807453, "loss": 0.6218, "step": 328 }, { "epoch": 0.02439386075480092, "grad_norm": 0.40485164523124695, "learning_rate": 0.00019675918979744937, "loss": 0.6053, "step": 329 }, { "epoch": 0.024468006228219766, "grad_norm": 0.4307533800601959, "learning_rate": 0.0001967491872968242, "loss": 0.5972, "step": 330 }, { "epoch": 0.024542151701638613, "grad_norm": 0.3908674120903015, "learning_rate": 0.00019673918479619907, "loss": 0.6217, "step": 331 }, { "epoch": 0.024616297175057464, "grad_norm": 0.4709630012512207, "learning_rate": 0.0001967291822955739, "loss": 0.6287, "step": 332 }, { "epoch": 0.02469044264847631, "grad_norm": 0.41264522075653076, "learning_rate": 0.00019671917979494874, "loss": 0.6317, "step": 333 }, { "epoch": 0.024764588121895158, "grad_norm": 0.39937257766723633, "learning_rate": 0.00019670917729432358, "loss": 0.5926, "step": 334 }, { "epoch": 0.024838733595314005, "grad_norm": 0.3881007134914398, "learning_rate": 0.00019669917479369844, "loss": 0.633, "step": 335 }, { "epoch": 0.024912879068732855, "grad_norm": 0.3762933611869812, "learning_rate": 0.00019668917229307328, "loss": 0.6083, "step": 336 }, { "epoch": 0.024987024542151702, "grad_norm": 0.4045356512069702, "learning_rate": 0.00019667916979244812, "loss": 0.6414, "step": 337 }, { "epoch": 0.02506117001557055, "grad_norm": 0.4312673807144165, "learning_rate": 0.00019666916729182296, "loss": 0.6488, "step": 338 }, { "epoch": 0.025135315488989396, "grad_norm": 0.4002157747745514, "learning_rate": 0.00019665916479119782, "loss": 0.6067, "step": 339 }, { "epoch": 0.025209460962408246, "grad_norm": 0.446450799703598, "learning_rate": 0.00019664916229057263, "loss": 0.6609, "step": 340 }, { "epoch": 0.025283606435827093, "grad_norm": 0.3815259039402008, "learning_rate": 0.0001966391597899475, "loss": 0.6084, "step": 341 }, { "epoch": 0.02535775190924594, "grad_norm": 0.4300139248371124, "learning_rate": 0.00019662915728932233, "loss": 0.6466, "step": 342 }, { "epoch": 0.025431897382664787, "grad_norm": 0.42108282446861267, "learning_rate": 0.0001966191547886972, "loss": 0.6265, "step": 343 }, { "epoch": 0.025506042856083638, "grad_norm": 0.420325368642807, "learning_rate": 0.000196609152288072, "loss": 0.649, "step": 344 }, { "epoch": 0.025580188329502485, "grad_norm": 0.446119487285614, "learning_rate": 0.00019659914978744687, "loss": 0.6014, "step": 345 }, { "epoch": 0.02565433380292133, "grad_norm": 0.3824400007724762, "learning_rate": 0.0001965891472868217, "loss": 0.5907, "step": 346 }, { "epoch": 0.02572847927634018, "grad_norm": 0.3972635269165039, "learning_rate": 0.00019657914478619657, "loss": 0.6148, "step": 347 }, { "epoch": 0.02580262474975903, "grad_norm": 0.4042629599571228, "learning_rate": 0.0001965691422855714, "loss": 0.6209, "step": 348 }, { "epoch": 0.025876770223177876, "grad_norm": 0.3926657438278198, "learning_rate": 0.00019655913978494625, "loss": 0.5967, "step": 349 }, { "epoch": 0.025950915696596723, "grad_norm": 0.3814225494861603, "learning_rate": 0.00019654913728432108, "loss": 0.6061, "step": 350 }, { "epoch": 0.02602506117001557, "grad_norm": 0.4053734838962555, "learning_rate": 0.00019653913478369595, "loss": 0.6215, "step": 351 }, { "epoch": 0.026099206643434417, "grad_norm": 0.375785768032074, "learning_rate": 0.00019652913228307078, "loss": 0.5949, "step": 352 }, { "epoch": 0.026173352116853267, "grad_norm": 0.3969149887561798, "learning_rate": 0.00019651912978244562, "loss": 0.5603, "step": 353 }, { "epoch": 0.026247497590272114, "grad_norm": 0.4352680444717407, "learning_rate": 0.00019650912728182046, "loss": 0.6156, "step": 354 }, { "epoch": 0.02632164306369096, "grad_norm": 0.40746769309043884, "learning_rate": 0.0001964991247811953, "loss": 0.6432, "step": 355 }, { "epoch": 0.026395788537109808, "grad_norm": 0.4134785234928131, "learning_rate": 0.00019648912228057016, "loss": 0.6006, "step": 356 }, { "epoch": 0.02646993401052866, "grad_norm": 0.38163235783576965, "learning_rate": 0.000196479119779945, "loss": 0.5935, "step": 357 }, { "epoch": 0.026544079483947505, "grad_norm": 0.40674689412117004, "learning_rate": 0.00019646911727931986, "loss": 0.5855, "step": 358 }, { "epoch": 0.026618224957366352, "grad_norm": 0.3670750558376312, "learning_rate": 0.00019645911477869467, "loss": 0.5687, "step": 359 }, { "epoch": 0.0266923704307852, "grad_norm": 0.41592901945114136, "learning_rate": 0.00019644911227806953, "loss": 0.6222, "step": 360 }, { "epoch": 0.02676651590420405, "grad_norm": 0.42836451530456543, "learning_rate": 0.00019643910977744437, "loss": 0.5701, "step": 361 }, { "epoch": 0.026840661377622897, "grad_norm": 0.43434837460517883, "learning_rate": 0.00019642910727681924, "loss": 0.6457, "step": 362 }, { "epoch": 0.026914806851041743, "grad_norm": 0.40723899006843567, "learning_rate": 0.00019641910477619405, "loss": 0.6508, "step": 363 }, { "epoch": 0.02698895232446059, "grad_norm": 0.42243698239326477, "learning_rate": 0.0001964091022755689, "loss": 0.6234, "step": 364 }, { "epoch": 0.02706309779787944, "grad_norm": 0.4437359571456909, "learning_rate": 0.00019639909977494375, "loss": 0.6262, "step": 365 }, { "epoch": 0.027137243271298288, "grad_norm": 0.5638275146484375, "learning_rate": 0.00019638909727431858, "loss": 0.6231, "step": 366 }, { "epoch": 0.027211388744717135, "grad_norm": 0.4030593931674957, "learning_rate": 0.00019637909477369342, "loss": 0.6092, "step": 367 }, { "epoch": 0.02728553421813598, "grad_norm": 0.43405407667160034, "learning_rate": 0.00019636909227306829, "loss": 0.6786, "step": 368 }, { "epoch": 0.027359679691554832, "grad_norm": 0.4491245746612549, "learning_rate": 0.00019635908977244312, "loss": 0.5742, "step": 369 }, { "epoch": 0.02743382516497368, "grad_norm": 0.5255609750747681, "learning_rate": 0.00019634908727181796, "loss": 0.6357, "step": 370 }, { "epoch": 0.027507970638392526, "grad_norm": 0.4583694040775299, "learning_rate": 0.0001963390847711928, "loss": 0.6296, "step": 371 }, { "epoch": 0.027582116111811373, "grad_norm": 0.3989011347293854, "learning_rate": 0.00019632908227056766, "loss": 0.6122, "step": 372 }, { "epoch": 0.027656261585230223, "grad_norm": 0.408693790435791, "learning_rate": 0.0001963190797699425, "loss": 0.6183, "step": 373 }, { "epoch": 0.02773040705864907, "grad_norm": 0.4415619373321533, "learning_rate": 0.00019630907726931734, "loss": 0.6261, "step": 374 }, { "epoch": 0.027804552532067917, "grad_norm": 0.4340820908546448, "learning_rate": 0.00019629907476869217, "loss": 0.6799, "step": 375 }, { "epoch": 0.027878698005486764, "grad_norm": 0.39810264110565186, "learning_rate": 0.00019628907226806704, "loss": 0.5894, "step": 376 }, { "epoch": 0.02795284347890561, "grad_norm": 0.440666139125824, "learning_rate": 0.00019627906976744185, "loss": 0.6091, "step": 377 }, { "epoch": 0.02802698895232446, "grad_norm": 0.4710243344306946, "learning_rate": 0.0001962690672668167, "loss": 0.6118, "step": 378 }, { "epoch": 0.02810113442574331, "grad_norm": 0.3904111683368683, "learning_rate": 0.00019625906476619155, "loss": 0.6076, "step": 379 }, { "epoch": 0.028175279899162155, "grad_norm": 0.3999873101711273, "learning_rate": 0.0001962490622655664, "loss": 0.6002, "step": 380 }, { "epoch": 0.028249425372581002, "grad_norm": 0.42525529861450195, "learning_rate": 0.00019623905976494125, "loss": 0.6044, "step": 381 }, { "epoch": 0.028323570845999853, "grad_norm": 0.39856621623039246, "learning_rate": 0.00019622905726431609, "loss": 0.59, "step": 382 }, { "epoch": 0.0283977163194187, "grad_norm": 0.3909347355365753, "learning_rate": 0.00019621905476369092, "loss": 0.6291, "step": 383 }, { "epoch": 0.028471861792837547, "grad_norm": 0.4171190857887268, "learning_rate": 0.0001962090522630658, "loss": 0.5801, "step": 384 }, { "epoch": 0.028546007266256394, "grad_norm": 0.4131286144256592, "learning_rate": 0.00019619904976244062, "loss": 0.6017, "step": 385 }, { "epoch": 0.028620152739675244, "grad_norm": 0.3810795843601227, "learning_rate": 0.00019618904726181546, "loss": 0.5587, "step": 386 }, { "epoch": 0.02869429821309409, "grad_norm": 0.3913341164588928, "learning_rate": 0.0001961790447611903, "loss": 0.6069, "step": 387 }, { "epoch": 0.028768443686512938, "grad_norm": 0.41528549790382385, "learning_rate": 0.00019616904226056516, "loss": 0.6329, "step": 388 }, { "epoch": 0.028842589159931785, "grad_norm": 0.38166847825050354, "learning_rate": 0.00019615903975994, "loss": 0.6211, "step": 389 }, { "epoch": 0.028916734633350635, "grad_norm": 0.36976537108421326, "learning_rate": 0.00019614903725931484, "loss": 0.6503, "step": 390 }, { "epoch": 0.028990880106769482, "grad_norm": 0.4200809597969055, "learning_rate": 0.0001961390347586897, "loss": 0.6459, "step": 391 }, { "epoch": 0.02906502558018833, "grad_norm": 0.4257374107837677, "learning_rate": 0.0001961290322580645, "loss": 0.6641, "step": 392 }, { "epoch": 0.029139171053607176, "grad_norm": 0.37118902802467346, "learning_rate": 0.00019611902975743938, "loss": 0.5911, "step": 393 }, { "epoch": 0.029213316527026027, "grad_norm": 0.41842275857925415, "learning_rate": 0.0001961090272568142, "loss": 0.6308, "step": 394 }, { "epoch": 0.029287462000444874, "grad_norm": 0.4117114543914795, "learning_rate": 0.00019609902475618908, "loss": 0.614, "step": 395 }, { "epoch": 0.02936160747386372, "grad_norm": 0.357730895280838, "learning_rate": 0.0001960890222555639, "loss": 0.5753, "step": 396 }, { "epoch": 0.029435752947282567, "grad_norm": 0.38720566034317017, "learning_rate": 0.00019607901975493875, "loss": 0.6052, "step": 397 }, { "epoch": 0.029509898420701418, "grad_norm": 0.38282403349876404, "learning_rate": 0.0001960690172543136, "loss": 0.581, "step": 398 }, { "epoch": 0.029584043894120265, "grad_norm": 0.393320769071579, "learning_rate": 0.00019605901475368845, "loss": 0.6048, "step": 399 }, { "epoch": 0.02965818936753911, "grad_norm": 0.41295674443244934, "learning_rate": 0.00019604901225306326, "loss": 0.6324, "step": 400 }, { "epoch": 0.02973233484095796, "grad_norm": 0.3870887756347656, "learning_rate": 0.00019603900975243813, "loss": 0.6141, "step": 401 }, { "epoch": 0.029806480314376806, "grad_norm": 0.404788076877594, "learning_rate": 0.00019602900725181296, "loss": 0.6797, "step": 402 }, { "epoch": 0.029880625787795656, "grad_norm": 0.4420028030872345, "learning_rate": 0.0001960190047511878, "loss": 0.6369, "step": 403 }, { "epoch": 0.029954771261214503, "grad_norm": 0.4176959693431854, "learning_rate": 0.00019600900225056264, "loss": 0.5913, "step": 404 }, { "epoch": 0.03002891673463335, "grad_norm": 0.376982718706131, "learning_rate": 0.0001959989997499375, "loss": 0.5998, "step": 405 }, { "epoch": 0.030103062208052197, "grad_norm": 0.39821892976760864, "learning_rate": 0.00019598899724931234, "loss": 0.6257, "step": 406 }, { "epoch": 0.030177207681471047, "grad_norm": 0.4305233359336853, "learning_rate": 0.00019597899474868718, "loss": 0.6197, "step": 407 }, { "epoch": 0.030251353154889894, "grad_norm": 0.40179428458213806, "learning_rate": 0.000195968992248062, "loss": 0.6291, "step": 408 }, { "epoch": 0.03032549862830874, "grad_norm": 0.36676985025405884, "learning_rate": 0.00019595898974743688, "loss": 0.581, "step": 409 }, { "epoch": 0.030399644101727588, "grad_norm": 0.38073694705963135, "learning_rate": 0.00019594898724681171, "loss": 0.6029, "step": 410 }, { "epoch": 0.03047378957514644, "grad_norm": 0.3933388888835907, "learning_rate": 0.00019593898474618655, "loss": 0.6259, "step": 411 }, { "epoch": 0.030547935048565285, "grad_norm": 0.37052032351493835, "learning_rate": 0.0001959289822455614, "loss": 0.6007, "step": 412 }, { "epoch": 0.030622080521984132, "grad_norm": 0.3914024531841278, "learning_rate": 0.00019591897974493625, "loss": 0.5934, "step": 413 }, { "epoch": 0.03069622599540298, "grad_norm": 0.42045170068740845, "learning_rate": 0.0001959089772443111, "loss": 0.6962, "step": 414 }, { "epoch": 0.03077037146882183, "grad_norm": 0.37824520468711853, "learning_rate": 0.00019589897474368593, "loss": 0.5517, "step": 415 }, { "epoch": 0.030844516942240677, "grad_norm": 0.3611850440502167, "learning_rate": 0.00019588897224306076, "loss": 0.5848, "step": 416 }, { "epoch": 0.030918662415659524, "grad_norm": 0.39393696188926697, "learning_rate": 0.00019587896974243563, "loss": 0.6071, "step": 417 }, { "epoch": 0.03099280788907837, "grad_norm": 0.37725183367729187, "learning_rate": 0.00019586896724181047, "loss": 0.5731, "step": 418 }, { "epoch": 0.03106695336249722, "grad_norm": 0.3767041265964508, "learning_rate": 0.0001958589647411853, "loss": 0.5748, "step": 419 }, { "epoch": 0.031141098835916068, "grad_norm": 0.3863943815231323, "learning_rate": 0.00019584896224056014, "loss": 0.6044, "step": 420 }, { "epoch": 0.031215244309334915, "grad_norm": 0.4112786650657654, "learning_rate": 0.000195838959739935, "loss": 0.5904, "step": 421 }, { "epoch": 0.031289389782753765, "grad_norm": 0.3671964406967163, "learning_rate": 0.00019582895723930984, "loss": 0.5787, "step": 422 }, { "epoch": 0.03136353525617261, "grad_norm": 0.42759159207344055, "learning_rate": 0.00019581895473868468, "loss": 0.6453, "step": 423 }, { "epoch": 0.03143768072959146, "grad_norm": 0.41400378942489624, "learning_rate": 0.00019580895223805954, "loss": 0.6286, "step": 424 }, { "epoch": 0.03151182620301031, "grad_norm": 0.39791300892829895, "learning_rate": 0.00019579894973743438, "loss": 0.6349, "step": 425 }, { "epoch": 0.03158597167642915, "grad_norm": 0.4294086694717407, "learning_rate": 0.00019578894723680922, "loss": 0.5845, "step": 426 }, { "epoch": 0.031660117149848004, "grad_norm": 0.3989426791667938, "learning_rate": 0.00019577894473618405, "loss": 0.5941, "step": 427 }, { "epoch": 0.03173426262326685, "grad_norm": 0.3748646378517151, "learning_rate": 0.00019576894223555892, "loss": 0.5985, "step": 428 }, { "epoch": 0.0318084080966857, "grad_norm": 0.40575823187828064, "learning_rate": 0.00019575893973493373, "loss": 0.575, "step": 429 }, { "epoch": 0.03188255357010455, "grad_norm": 0.3827514946460724, "learning_rate": 0.0001957489372343086, "loss": 0.5957, "step": 430 }, { "epoch": 0.03195669904352339, "grad_norm": 0.4274916350841522, "learning_rate": 0.00019573893473368343, "loss": 0.6222, "step": 431 }, { "epoch": 0.03203084451694224, "grad_norm": 0.38988247513771057, "learning_rate": 0.0001957289322330583, "loss": 0.6415, "step": 432 }, { "epoch": 0.032104989990361085, "grad_norm": 0.39597466588020325, "learning_rate": 0.0001957189297324331, "loss": 0.6219, "step": 433 }, { "epoch": 0.032179135463779936, "grad_norm": 0.4024665057659149, "learning_rate": 0.00019570892723180797, "loss": 0.6081, "step": 434 }, { "epoch": 0.032253280937198786, "grad_norm": 0.373253732919693, "learning_rate": 0.0001956989247311828, "loss": 0.5935, "step": 435 }, { "epoch": 0.03232742641061763, "grad_norm": 0.4146507680416107, "learning_rate": 0.00019568892223055767, "loss": 0.6064, "step": 436 }, { "epoch": 0.03240157188403648, "grad_norm": 0.3907158672809601, "learning_rate": 0.00019567891972993248, "loss": 0.589, "step": 437 }, { "epoch": 0.03247571735745533, "grad_norm": 0.38785508275032043, "learning_rate": 0.00019566891722930734, "loss": 0.6148, "step": 438 }, { "epoch": 0.032549862830874174, "grad_norm": 0.3862541615962982, "learning_rate": 0.00019565891472868218, "loss": 0.5946, "step": 439 }, { "epoch": 0.032624008304293024, "grad_norm": 0.3864539563655853, "learning_rate": 0.00019564891222805702, "loss": 0.5778, "step": 440 }, { "epoch": 0.03269815377771187, "grad_norm": 0.4023522436618805, "learning_rate": 0.00019563890972743185, "loss": 0.5728, "step": 441 }, { "epoch": 0.03277229925113072, "grad_norm": 0.393999308347702, "learning_rate": 0.00019562890722680672, "loss": 0.6256, "step": 442 }, { "epoch": 0.03284644472454957, "grad_norm": 0.41281768679618835, "learning_rate": 0.00019561890472618156, "loss": 0.6196, "step": 443 }, { "epoch": 0.03292059019796841, "grad_norm": 0.39950332045555115, "learning_rate": 0.0001956089022255564, "loss": 0.5875, "step": 444 }, { "epoch": 0.03299473567138726, "grad_norm": 0.4663471579551697, "learning_rate": 0.00019559889972493123, "loss": 0.6058, "step": 445 }, { "epoch": 0.03306888114480611, "grad_norm": 0.41644641757011414, "learning_rate": 0.0001955888972243061, "loss": 0.669, "step": 446 }, { "epoch": 0.033143026618224956, "grad_norm": 0.40181198716163635, "learning_rate": 0.00019557889472368093, "loss": 0.6206, "step": 447 }, { "epoch": 0.03321717209164381, "grad_norm": 0.4152888059616089, "learning_rate": 0.00019556889222305577, "loss": 0.655, "step": 448 }, { "epoch": 0.03329131756506265, "grad_norm": 0.3880520462989807, "learning_rate": 0.0001955588897224306, "loss": 0.593, "step": 449 }, { "epoch": 0.0333654630384815, "grad_norm": 0.41677775979042053, "learning_rate": 0.00019554888722180547, "loss": 0.5763, "step": 450 }, { "epoch": 0.03343960851190035, "grad_norm": 0.4256456196308136, "learning_rate": 0.0001955388847211803, "loss": 0.6166, "step": 451 }, { "epoch": 0.033513753985319195, "grad_norm": 0.38774213194847107, "learning_rate": 0.00019552888222055514, "loss": 0.6296, "step": 452 }, { "epoch": 0.033587899458738045, "grad_norm": 0.4019193947315216, "learning_rate": 0.00019551887971992998, "loss": 0.6536, "step": 453 }, { "epoch": 0.03366204493215689, "grad_norm": 0.4069003164768219, "learning_rate": 0.00019550887721930484, "loss": 0.6127, "step": 454 }, { "epoch": 0.03373619040557574, "grad_norm": 0.3881022334098816, "learning_rate": 0.00019549887471867968, "loss": 0.5965, "step": 455 }, { "epoch": 0.03381033587899459, "grad_norm": 0.4628612697124481, "learning_rate": 0.00019548887221805452, "loss": 0.5864, "step": 456 }, { "epoch": 0.03388448135241343, "grad_norm": 0.42349791526794434, "learning_rate": 0.00019547886971742938, "loss": 0.6389, "step": 457 }, { "epoch": 0.03395862682583228, "grad_norm": 0.41096100211143494, "learning_rate": 0.00019546886721680422, "loss": 0.6098, "step": 458 }, { "epoch": 0.034032772299251134, "grad_norm": 0.41759008169174194, "learning_rate": 0.00019545886471617906, "loss": 0.6162, "step": 459 }, { "epoch": 0.03410691777266998, "grad_norm": 0.3847372233867645, "learning_rate": 0.0001954488622155539, "loss": 0.6104, "step": 460 }, { "epoch": 0.03418106324608883, "grad_norm": 0.4216840863227844, "learning_rate": 0.00019543885971492876, "loss": 0.6286, "step": 461 }, { "epoch": 0.03425520871950767, "grad_norm": 0.39498278498649597, "learning_rate": 0.0001954288572143036, "loss": 0.5906, "step": 462 }, { "epoch": 0.03432935419292652, "grad_norm": 0.38263407349586487, "learning_rate": 0.00019541885471367843, "loss": 0.6383, "step": 463 }, { "epoch": 0.03440349966634537, "grad_norm": 0.4239981472492218, "learning_rate": 0.00019540885221305327, "loss": 0.6685, "step": 464 }, { "epoch": 0.034477645139764215, "grad_norm": 0.4269295930862427, "learning_rate": 0.00019539884971242813, "loss": 0.6387, "step": 465 }, { "epoch": 0.034551790613183066, "grad_norm": 0.38752490282058716, "learning_rate": 0.00019538884721180294, "loss": 0.6156, "step": 466 }, { "epoch": 0.034625936086601916, "grad_norm": 0.44098809361457825, "learning_rate": 0.0001953788447111778, "loss": 0.5972, "step": 467 }, { "epoch": 0.03470008156002076, "grad_norm": 0.4114607870578766, "learning_rate": 0.00019536884221055265, "loss": 0.5972, "step": 468 }, { "epoch": 0.03477422703343961, "grad_norm": 0.5069464445114136, "learning_rate": 0.0001953588397099275, "loss": 0.584, "step": 469 }, { "epoch": 0.034848372506858454, "grad_norm": 0.4171728491783142, "learning_rate": 0.00019534883720930232, "loss": 0.5779, "step": 470 }, { "epoch": 0.034922517980277304, "grad_norm": 0.4129730463027954, "learning_rate": 0.00019533883470867718, "loss": 0.5361, "step": 471 }, { "epoch": 0.034996663453696154, "grad_norm": 0.4347470998764038, "learning_rate": 0.00019532883220805202, "loss": 0.6411, "step": 472 }, { "epoch": 0.035070808927115, "grad_norm": 0.39352649450302124, "learning_rate": 0.00019531882970742688, "loss": 0.604, "step": 473 }, { "epoch": 0.03514495440053385, "grad_norm": 0.4227195978164673, "learning_rate": 0.0001953088272068017, "loss": 0.6076, "step": 474 }, { "epoch": 0.0352190998739527, "grad_norm": 0.47753629088401794, "learning_rate": 0.00019529882470617656, "loss": 0.6127, "step": 475 }, { "epoch": 0.03529324534737154, "grad_norm": 0.45499035716056824, "learning_rate": 0.0001952888222055514, "loss": 0.5977, "step": 476 }, { "epoch": 0.03536739082079039, "grad_norm": 0.39455530047416687, "learning_rate": 0.00019527881970492623, "loss": 0.5542, "step": 477 }, { "epoch": 0.035441536294209236, "grad_norm": 0.43430373072624207, "learning_rate": 0.00019526881720430107, "loss": 0.5743, "step": 478 }, { "epoch": 0.035515681767628086, "grad_norm": 0.3955865204334259, "learning_rate": 0.00019525881470367593, "loss": 0.6068, "step": 479 }, { "epoch": 0.03558982724104694, "grad_norm": 0.3622227907180786, "learning_rate": 0.00019524881220305077, "loss": 0.5595, "step": 480 }, { "epoch": 0.03566397271446578, "grad_norm": 0.42686137557029724, "learning_rate": 0.0001952388097024256, "loss": 0.6401, "step": 481 }, { "epoch": 0.03573811818788463, "grad_norm": 0.36828967928886414, "learning_rate": 0.00019522880720180045, "loss": 0.5792, "step": 482 }, { "epoch": 0.035812263661303474, "grad_norm": 0.37651801109313965, "learning_rate": 0.0001952188047011753, "loss": 0.5724, "step": 483 }, { "epoch": 0.035886409134722325, "grad_norm": 0.41241464018821716, "learning_rate": 0.00019520880220055015, "loss": 0.5929, "step": 484 }, { "epoch": 0.035960554608141175, "grad_norm": 0.42999979853630066, "learning_rate": 0.00019519879969992498, "loss": 0.72, "step": 485 }, { "epoch": 0.03603470008156002, "grad_norm": 0.3997892439365387, "learning_rate": 0.00019518879719929982, "loss": 0.6364, "step": 486 }, { "epoch": 0.03610884555497887, "grad_norm": 0.40727850794792175, "learning_rate": 0.00019517879469867469, "loss": 0.5893, "step": 487 }, { "epoch": 0.03618299102839772, "grad_norm": 0.42485281825065613, "learning_rate": 0.00019516879219804952, "loss": 0.6085, "step": 488 }, { "epoch": 0.03625713650181656, "grad_norm": 0.39974620938301086, "learning_rate": 0.00019515878969742436, "loss": 0.612, "step": 489 }, { "epoch": 0.03633128197523541, "grad_norm": 0.4314735233783722, "learning_rate": 0.0001951487871967992, "loss": 0.6573, "step": 490 }, { "epoch": 0.03640542744865426, "grad_norm": 0.37823614478111267, "learning_rate": 0.00019513878469617406, "loss": 0.5529, "step": 491 }, { "epoch": 0.03647957292207311, "grad_norm": 0.36652514338493347, "learning_rate": 0.0001951287821955489, "loss": 0.6302, "step": 492 }, { "epoch": 0.03655371839549196, "grad_norm": 0.4368113875389099, "learning_rate": 0.00019511877969492373, "loss": 0.6524, "step": 493 }, { "epoch": 0.0366278638689108, "grad_norm": 0.39254748821258545, "learning_rate": 0.0001951087771942986, "loss": 0.5964, "step": 494 }, { "epoch": 0.03670200934232965, "grad_norm": 0.40433377027511597, "learning_rate": 0.00019509877469367344, "loss": 0.6721, "step": 495 }, { "epoch": 0.0367761548157485, "grad_norm": 0.3806106150150299, "learning_rate": 0.00019508877219304827, "loss": 0.6427, "step": 496 }, { "epoch": 0.036850300289167345, "grad_norm": 0.37514829635620117, "learning_rate": 0.0001950787696924231, "loss": 0.557, "step": 497 }, { "epoch": 0.036924445762586196, "grad_norm": 0.4145525097846985, "learning_rate": 0.00019506876719179797, "loss": 0.5639, "step": 498 }, { "epoch": 0.03699859123600504, "grad_norm": 0.4338890016078949, "learning_rate": 0.0001950587646911728, "loss": 0.6037, "step": 499 }, { "epoch": 0.03707273670942389, "grad_norm": 0.4589567184448242, "learning_rate": 0.00019504876219054765, "loss": 0.694, "step": 500 }, { "epoch": 0.03714688218284274, "grad_norm": 0.3885138928890228, "learning_rate": 0.00019503875968992249, "loss": 0.6255, "step": 501 }, { "epoch": 0.037221027656261584, "grad_norm": 0.40958598256111145, "learning_rate": 0.00019502875718929735, "loss": 0.5509, "step": 502 }, { "epoch": 0.037295173129680434, "grad_norm": 0.41324520111083984, "learning_rate": 0.00019501875468867216, "loss": 0.5989, "step": 503 }, { "epoch": 0.03736931860309928, "grad_norm": 0.4057694971561432, "learning_rate": 0.00019500875218804702, "loss": 0.6528, "step": 504 }, { "epoch": 0.03744346407651813, "grad_norm": 0.38921141624450684, "learning_rate": 0.00019499874968742186, "loss": 0.5607, "step": 505 }, { "epoch": 0.03751760954993698, "grad_norm": 0.3915947377681732, "learning_rate": 0.00019498874718679673, "loss": 0.6071, "step": 506 }, { "epoch": 0.03759175502335582, "grad_norm": 0.41303664445877075, "learning_rate": 0.00019497874468617154, "loss": 0.6703, "step": 507 }, { "epoch": 0.03766590049677467, "grad_norm": 0.3944874405860901, "learning_rate": 0.0001949687421855464, "loss": 0.6002, "step": 508 }, { "epoch": 0.03774004597019352, "grad_norm": 0.3739301860332489, "learning_rate": 0.00019495873968492124, "loss": 0.5387, "step": 509 }, { "epoch": 0.037814191443612366, "grad_norm": 0.41850948333740234, "learning_rate": 0.0001949487371842961, "loss": 0.645, "step": 510 }, { "epoch": 0.037888336917031216, "grad_norm": 0.4409671723842621, "learning_rate": 0.0001949387346836709, "loss": 0.6765, "step": 511 }, { "epoch": 0.03796248239045006, "grad_norm": 0.37015780806541443, "learning_rate": 0.00019492873218304578, "loss": 0.5589, "step": 512 }, { "epoch": 0.03803662786386891, "grad_norm": 0.40318411588668823, "learning_rate": 0.0001949187296824206, "loss": 0.6178, "step": 513 }, { "epoch": 0.03811077333728776, "grad_norm": 0.4061470329761505, "learning_rate": 0.00019490872718179545, "loss": 0.5926, "step": 514 }, { "epoch": 0.038184918810706604, "grad_norm": 0.3789256513118744, "learning_rate": 0.00019489872468117029, "loss": 0.5777, "step": 515 }, { "epoch": 0.038259064284125455, "grad_norm": 0.41558295488357544, "learning_rate": 0.00019488872218054515, "loss": 0.619, "step": 516 }, { "epoch": 0.038333209757544305, "grad_norm": 0.399703711271286, "learning_rate": 0.00019487871967992, "loss": 0.5949, "step": 517 }, { "epoch": 0.03840735523096315, "grad_norm": 0.3907957971096039, "learning_rate": 0.00019486871717929482, "loss": 0.6037, "step": 518 }, { "epoch": 0.038481500704382, "grad_norm": 0.3878666162490845, "learning_rate": 0.00019485871467866966, "loss": 0.5892, "step": 519 }, { "epoch": 0.03855564617780084, "grad_norm": 0.40430212020874023, "learning_rate": 0.00019484871217804453, "loss": 0.578, "step": 520 }, { "epoch": 0.03862979165121969, "grad_norm": 0.4088311791419983, "learning_rate": 0.00019483870967741936, "loss": 0.6348, "step": 521 }, { "epoch": 0.03870393712463854, "grad_norm": 0.44321829080581665, "learning_rate": 0.0001948287071767942, "loss": 0.5855, "step": 522 }, { "epoch": 0.03877808259805739, "grad_norm": 0.40773215889930725, "learning_rate": 0.00019481870467616904, "loss": 0.6411, "step": 523 }, { "epoch": 0.03885222807147624, "grad_norm": 0.41496387124061584, "learning_rate": 0.0001948087021755439, "loss": 0.6182, "step": 524 }, { "epoch": 0.03892637354489509, "grad_norm": 0.4211590588092804, "learning_rate": 0.00019479869967491874, "loss": 0.5532, "step": 525 }, { "epoch": 0.03900051901831393, "grad_norm": 0.38212573528289795, "learning_rate": 0.00019478869717429358, "loss": 0.5527, "step": 526 }, { "epoch": 0.03907466449173278, "grad_norm": 0.38387641310691833, "learning_rate": 0.00019477869467366844, "loss": 0.6023, "step": 527 }, { "epoch": 0.039148809965151625, "grad_norm": 0.3975747525691986, "learning_rate": 0.00019476869217304328, "loss": 0.6152, "step": 528 }, { "epoch": 0.039222955438570475, "grad_norm": 0.47487062215805054, "learning_rate": 0.00019475868967241811, "loss": 0.6018, "step": 529 }, { "epoch": 0.039297100911989326, "grad_norm": 0.4296361804008484, "learning_rate": 0.00019474868717179295, "loss": 0.5922, "step": 530 }, { "epoch": 0.03937124638540817, "grad_norm": 0.38546764850616455, "learning_rate": 0.00019473868467116782, "loss": 0.5614, "step": 531 }, { "epoch": 0.03944539185882702, "grad_norm": 0.3961602449417114, "learning_rate": 0.00019472868217054265, "loss": 0.567, "step": 532 }, { "epoch": 0.03951953733224586, "grad_norm": 0.49981242418289185, "learning_rate": 0.0001947186796699175, "loss": 0.5939, "step": 533 }, { "epoch": 0.039593682805664714, "grad_norm": 0.4450765550136566, "learning_rate": 0.00019470867716929233, "loss": 0.5976, "step": 534 }, { "epoch": 0.039667828279083564, "grad_norm": 0.4086145758628845, "learning_rate": 0.0001946986746686672, "loss": 0.5892, "step": 535 }, { "epoch": 0.03974197375250241, "grad_norm": 0.38707488775253296, "learning_rate": 0.00019468867216804203, "loss": 0.6188, "step": 536 }, { "epoch": 0.03981611922592126, "grad_norm": 0.3994200825691223, "learning_rate": 0.00019467866966741686, "loss": 0.6144, "step": 537 }, { "epoch": 0.03989026469934011, "grad_norm": 0.4286232888698578, "learning_rate": 0.0001946686671667917, "loss": 0.5903, "step": 538 }, { "epoch": 0.03996441017275895, "grad_norm": 0.38526585698127747, "learning_rate": 0.00019465866466616657, "loss": 0.5694, "step": 539 }, { "epoch": 0.0400385556461778, "grad_norm": 0.41333869099617004, "learning_rate": 0.00019464866216554138, "loss": 0.643, "step": 540 }, { "epoch": 0.040112701119596646, "grad_norm": 0.47428032755851746, "learning_rate": 0.00019463865966491624, "loss": 0.6428, "step": 541 }, { "epoch": 0.040186846593015496, "grad_norm": 0.387861967086792, "learning_rate": 0.00019462865716429108, "loss": 0.6382, "step": 542 }, { "epoch": 0.040260992066434347, "grad_norm": 0.4012882113456726, "learning_rate": 0.00019461865466366594, "loss": 0.6056, "step": 543 }, { "epoch": 0.04033513753985319, "grad_norm": 0.37661853432655334, "learning_rate": 0.00019460865216304075, "loss": 0.5741, "step": 544 }, { "epoch": 0.04040928301327204, "grad_norm": 0.3973684310913086, "learning_rate": 0.00019459864966241562, "loss": 0.6157, "step": 545 }, { "epoch": 0.04048342848669089, "grad_norm": 0.3902902603149414, "learning_rate": 0.00019458864716179045, "loss": 0.644, "step": 546 }, { "epoch": 0.040557573960109734, "grad_norm": 0.3884023427963257, "learning_rate": 0.00019457864466116532, "loss": 0.5894, "step": 547 }, { "epoch": 0.040631719433528585, "grad_norm": 0.38078832626342773, "learning_rate": 0.00019456864216054013, "loss": 0.5847, "step": 548 }, { "epoch": 0.04070586490694743, "grad_norm": 0.396949827671051, "learning_rate": 0.000194558639659915, "loss": 0.6616, "step": 549 }, { "epoch": 0.04078001038036628, "grad_norm": 0.4385409355163574, "learning_rate": 0.00019454863715928983, "loss": 0.6321, "step": 550 }, { "epoch": 0.04085415585378513, "grad_norm": 0.4646832346916199, "learning_rate": 0.00019453863465866467, "loss": 0.6199, "step": 551 }, { "epoch": 0.04092830132720397, "grad_norm": 0.4127965569496155, "learning_rate": 0.0001945286321580395, "loss": 0.5825, "step": 552 }, { "epoch": 0.04100244680062282, "grad_norm": 0.4193863868713379, "learning_rate": 0.00019451862965741437, "loss": 0.5854, "step": 553 }, { "epoch": 0.041076592274041666, "grad_norm": 0.41279053688049316, "learning_rate": 0.0001945086271567892, "loss": 0.605, "step": 554 }, { "epoch": 0.04115073774746052, "grad_norm": 0.38829439878463745, "learning_rate": 0.00019449862465616404, "loss": 0.5562, "step": 555 }, { "epoch": 0.04122488322087937, "grad_norm": 0.4167730510234833, "learning_rate": 0.00019448862215553888, "loss": 0.6553, "step": 556 }, { "epoch": 0.04129902869429821, "grad_norm": 0.3888501524925232, "learning_rate": 0.00019447861965491374, "loss": 0.5587, "step": 557 }, { "epoch": 0.04137317416771706, "grad_norm": 0.4042329788208008, "learning_rate": 0.00019446861715428858, "loss": 0.6554, "step": 558 }, { "epoch": 0.04144731964113591, "grad_norm": 0.3913702070713043, "learning_rate": 0.00019445861465366342, "loss": 0.6013, "step": 559 }, { "epoch": 0.041521465114554755, "grad_norm": 0.38883689045906067, "learning_rate": 0.00019444861215303828, "loss": 0.6019, "step": 560 }, { "epoch": 0.041595610587973605, "grad_norm": 0.4066160023212433, "learning_rate": 0.00019443860965241312, "loss": 0.6219, "step": 561 }, { "epoch": 0.04166975606139245, "grad_norm": 0.43058285117149353, "learning_rate": 0.00019442860715178795, "loss": 0.6381, "step": 562 }, { "epoch": 0.0417439015348113, "grad_norm": 0.38071122765541077, "learning_rate": 0.0001944186046511628, "loss": 0.6047, "step": 563 }, { "epoch": 0.04181804700823015, "grad_norm": 0.43016892671585083, "learning_rate": 0.00019440860215053766, "loss": 0.6161, "step": 564 }, { "epoch": 0.04189219248164899, "grad_norm": 0.3862711191177368, "learning_rate": 0.0001943985996499125, "loss": 0.6245, "step": 565 }, { "epoch": 0.041966337955067844, "grad_norm": 0.38118496537208557, "learning_rate": 0.00019438859714928733, "loss": 0.6196, "step": 566 }, { "epoch": 0.042040483428486694, "grad_norm": 0.38892149925231934, "learning_rate": 0.00019437859464866217, "loss": 0.5817, "step": 567 }, { "epoch": 0.04211462890190554, "grad_norm": 0.3796570301055908, "learning_rate": 0.00019436859214803703, "loss": 0.556, "step": 568 }, { "epoch": 0.04218877437532439, "grad_norm": 0.3880278170108795, "learning_rate": 0.00019435858964741187, "loss": 0.6109, "step": 569 }, { "epoch": 0.04226291984874323, "grad_norm": 0.4276772141456604, "learning_rate": 0.0001943485871467867, "loss": 0.564, "step": 570 }, { "epoch": 0.04233706532216208, "grad_norm": 0.3757319748401642, "learning_rate": 0.00019433858464616154, "loss": 0.587, "step": 571 }, { "epoch": 0.04241121079558093, "grad_norm": 0.4779418706893921, "learning_rate": 0.0001943285821455364, "loss": 0.6139, "step": 572 }, { "epoch": 0.042485356268999776, "grad_norm": 0.49138328433036804, "learning_rate": 0.00019431857964491124, "loss": 0.5847, "step": 573 }, { "epoch": 0.042559501742418626, "grad_norm": 0.43123000860214233, "learning_rate": 0.00019430857714428608, "loss": 0.5999, "step": 574 }, { "epoch": 0.04263364721583748, "grad_norm": 0.43195533752441406, "learning_rate": 0.00019429857464366092, "loss": 0.5771, "step": 575 }, { "epoch": 0.04270779268925632, "grad_norm": 0.4925196170806885, "learning_rate": 0.00019428857214303578, "loss": 0.5829, "step": 576 }, { "epoch": 0.04278193816267517, "grad_norm": 0.4349517822265625, "learning_rate": 0.0001942785696424106, "loss": 0.6039, "step": 577 }, { "epoch": 0.042856083636094014, "grad_norm": 0.42332184314727783, "learning_rate": 0.00019426856714178546, "loss": 0.6311, "step": 578 }, { "epoch": 0.042930229109512864, "grad_norm": 0.38060545921325684, "learning_rate": 0.0001942585646411603, "loss": 0.6035, "step": 579 }, { "epoch": 0.043004374582931715, "grad_norm": 0.3791099786758423, "learning_rate": 0.00019424856214053516, "loss": 0.6295, "step": 580 }, { "epoch": 0.04307852005635056, "grad_norm": 0.4297429919242859, "learning_rate": 0.00019423855963990997, "loss": 0.6329, "step": 581 }, { "epoch": 0.04315266552976941, "grad_norm": 0.4738735258579254, "learning_rate": 0.00019422855713928483, "loss": 0.6347, "step": 582 }, { "epoch": 0.04322681100318825, "grad_norm": 0.4026232063770294, "learning_rate": 0.00019421855463865967, "loss": 0.5785, "step": 583 }, { "epoch": 0.0433009564766071, "grad_norm": 0.44843795895576477, "learning_rate": 0.00019420855213803453, "loss": 0.6288, "step": 584 }, { "epoch": 0.04337510195002595, "grad_norm": 0.4503443241119385, "learning_rate": 0.00019419854963740934, "loss": 0.6275, "step": 585 }, { "epoch": 0.043449247423444796, "grad_norm": 0.42440953850746155, "learning_rate": 0.0001941885471367842, "loss": 0.5954, "step": 586 }, { "epoch": 0.04352339289686365, "grad_norm": 0.3744544982910156, "learning_rate": 0.00019417854463615904, "loss": 0.5738, "step": 587 }, { "epoch": 0.0435975383702825, "grad_norm": 0.4024685025215149, "learning_rate": 0.00019416854213553388, "loss": 0.6166, "step": 588 }, { "epoch": 0.04367168384370134, "grad_norm": 0.3707767724990845, "learning_rate": 0.00019415853963490872, "loss": 0.5808, "step": 589 }, { "epoch": 0.04374582931712019, "grad_norm": 0.4508272111415863, "learning_rate": 0.00019414853713428358, "loss": 0.5744, "step": 590 }, { "epoch": 0.043819974790539035, "grad_norm": 0.42922475934028625, "learning_rate": 0.00019413853463365842, "loss": 0.6096, "step": 591 }, { "epoch": 0.043894120263957885, "grad_norm": 0.39816224575042725, "learning_rate": 0.00019412853213303326, "loss": 0.5788, "step": 592 }, { "epoch": 0.043968265737376735, "grad_norm": 0.4491688907146454, "learning_rate": 0.00019411852963240812, "loss": 0.6127, "step": 593 }, { "epoch": 0.04404241121079558, "grad_norm": 0.4310486912727356, "learning_rate": 0.00019410852713178296, "loss": 0.5699, "step": 594 }, { "epoch": 0.04411655668421443, "grad_norm": 0.4387257397174835, "learning_rate": 0.0001940985246311578, "loss": 0.6622, "step": 595 }, { "epoch": 0.04419070215763328, "grad_norm": 0.4105393588542938, "learning_rate": 0.00019408852213053263, "loss": 0.5779, "step": 596 }, { "epoch": 0.04426484763105212, "grad_norm": 0.4416123628616333, "learning_rate": 0.0001940785196299075, "loss": 0.6011, "step": 597 }, { "epoch": 0.044338993104470974, "grad_norm": 0.3760926425457001, "learning_rate": 0.00019406851712928233, "loss": 0.5858, "step": 598 }, { "epoch": 0.04441313857788982, "grad_norm": 0.43953952193260193, "learning_rate": 0.00019405851462865717, "loss": 0.581, "step": 599 }, { "epoch": 0.04448728405130867, "grad_norm": 0.39641889929771423, "learning_rate": 0.000194048512128032, "loss": 0.6139, "step": 600 }, { "epoch": 0.04456142952472752, "grad_norm": 0.48046064376831055, "learning_rate": 0.00019403850962740687, "loss": 0.5704, "step": 601 }, { "epoch": 0.04463557499814636, "grad_norm": 0.4013110101222992, "learning_rate": 0.0001940285071267817, "loss": 0.614, "step": 602 }, { "epoch": 0.04470972047156521, "grad_norm": 0.4086866080760956, "learning_rate": 0.00019401850462615655, "loss": 0.6091, "step": 603 }, { "epoch": 0.044783865944984055, "grad_norm": 0.41020888090133667, "learning_rate": 0.00019400850212553138, "loss": 0.5991, "step": 604 }, { "epoch": 0.044858011418402906, "grad_norm": 0.38070571422576904, "learning_rate": 0.00019399849962490625, "loss": 0.5966, "step": 605 }, { "epoch": 0.044932156891821756, "grad_norm": 0.39757320284843445, "learning_rate": 0.00019398849712428108, "loss": 0.6263, "step": 606 }, { "epoch": 0.0450063023652406, "grad_norm": 0.36120665073394775, "learning_rate": 0.00019397849462365592, "loss": 0.5732, "step": 607 }, { "epoch": 0.04508044783865945, "grad_norm": 0.39103421568870544, "learning_rate": 0.00019396849212303076, "loss": 0.6009, "step": 608 }, { "epoch": 0.0451545933120783, "grad_norm": 0.3966640830039978, "learning_rate": 0.00019395848962240562, "loss": 0.5929, "step": 609 }, { "epoch": 0.045228738785497144, "grad_norm": 0.37033939361572266, "learning_rate": 0.00019394848712178046, "loss": 0.5781, "step": 610 }, { "epoch": 0.045302884258915994, "grad_norm": 0.34682056307792664, "learning_rate": 0.0001939384846211553, "loss": 0.5909, "step": 611 }, { "epoch": 0.04537702973233484, "grad_norm": 0.37470757961273193, "learning_rate": 0.00019392848212053013, "loss": 0.563, "step": 612 }, { "epoch": 0.04545117520575369, "grad_norm": 0.3714827299118042, "learning_rate": 0.000193918479619905, "loss": 0.5741, "step": 613 }, { "epoch": 0.04552532067917254, "grad_norm": 0.40758442878723145, "learning_rate": 0.0001939084771192798, "loss": 0.5907, "step": 614 }, { "epoch": 0.04559946615259138, "grad_norm": 0.4810771942138672, "learning_rate": 0.00019389847461865467, "loss": 0.603, "step": 615 }, { "epoch": 0.04567361162601023, "grad_norm": 0.40632474422454834, "learning_rate": 0.0001938884721180295, "loss": 0.6132, "step": 616 }, { "epoch": 0.04574775709942908, "grad_norm": 0.4188229441642761, "learning_rate": 0.00019387846961740437, "loss": 0.5947, "step": 617 }, { "epoch": 0.045821902572847927, "grad_norm": 0.37953251600265503, "learning_rate": 0.00019386846711677918, "loss": 0.6436, "step": 618 }, { "epoch": 0.04589604804626678, "grad_norm": 0.3870197534561157, "learning_rate": 0.00019385846461615405, "loss": 0.6265, "step": 619 }, { "epoch": 0.04597019351968562, "grad_norm": 0.3876538872718811, "learning_rate": 0.00019384846211552889, "loss": 0.6399, "step": 620 }, { "epoch": 0.04604433899310447, "grad_norm": 0.3703904449939728, "learning_rate": 0.00019383845961490375, "loss": 0.5775, "step": 621 }, { "epoch": 0.04611848446652332, "grad_norm": 0.3960292935371399, "learning_rate": 0.00019382845711427856, "loss": 0.6058, "step": 622 }, { "epoch": 0.046192629939942165, "grad_norm": 0.3762204349040985, "learning_rate": 0.00019381845461365342, "loss": 0.5556, "step": 623 }, { "epoch": 0.046266775413361015, "grad_norm": 0.3839866518974304, "learning_rate": 0.00019380845211302826, "loss": 0.6499, "step": 624 }, { "epoch": 0.046340920886779866, "grad_norm": 0.38416826725006104, "learning_rate": 0.0001937984496124031, "loss": 0.6091, "step": 625 }, { "epoch": 0.04641506636019871, "grad_norm": 0.3893815279006958, "learning_rate": 0.00019378844711177796, "loss": 0.6321, "step": 626 }, { "epoch": 0.04648921183361756, "grad_norm": 0.386563777923584, "learning_rate": 0.0001937784446111528, "loss": 0.5818, "step": 627 }, { "epoch": 0.0465633573070364, "grad_norm": 0.41775551438331604, "learning_rate": 0.00019376844211052764, "loss": 0.6141, "step": 628 }, { "epoch": 0.04663750278045525, "grad_norm": 0.38854652643203735, "learning_rate": 0.00019375843960990247, "loss": 0.6276, "step": 629 }, { "epoch": 0.046711648253874104, "grad_norm": 0.3778268098831177, "learning_rate": 0.00019374843710927734, "loss": 0.6155, "step": 630 }, { "epoch": 0.04678579372729295, "grad_norm": 0.37630289793014526, "learning_rate": 0.00019373843460865217, "loss": 0.5876, "step": 631 }, { "epoch": 0.0468599392007118, "grad_norm": 0.38356101512908936, "learning_rate": 0.000193728432108027, "loss": 0.5972, "step": 632 }, { "epoch": 0.04693408467413064, "grad_norm": 0.466828316450119, "learning_rate": 0.00019371842960740185, "loss": 0.6447, "step": 633 }, { "epoch": 0.04700823014754949, "grad_norm": 0.40004900097846985, "learning_rate": 0.0001937084271067767, "loss": 0.5641, "step": 634 }, { "epoch": 0.04708237562096834, "grad_norm": 0.4274490177631378, "learning_rate": 0.00019369842460615155, "loss": 0.5923, "step": 635 }, { "epoch": 0.047156521094387185, "grad_norm": 0.3842507600784302, "learning_rate": 0.00019368842210552641, "loss": 0.5877, "step": 636 }, { "epoch": 0.047230666567806036, "grad_norm": 0.3693373203277588, "learning_rate": 0.00019367841960490122, "loss": 0.5486, "step": 637 }, { "epoch": 0.047304812041224886, "grad_norm": 0.39860889315605164, "learning_rate": 0.0001936684171042761, "loss": 0.5434, "step": 638 }, { "epoch": 0.04737895751464373, "grad_norm": 0.4249580204486847, "learning_rate": 0.00019365841460365093, "loss": 0.5902, "step": 639 }, { "epoch": 0.04745310298806258, "grad_norm": 0.39503422379493713, "learning_rate": 0.00019364841210302576, "loss": 0.5596, "step": 640 }, { "epoch": 0.047527248461481424, "grad_norm": 0.3763625919818878, "learning_rate": 0.0001936384096024006, "loss": 0.5937, "step": 641 }, { "epoch": 0.047601393934900274, "grad_norm": 0.3557787537574768, "learning_rate": 0.00019362840710177546, "loss": 0.5398, "step": 642 }, { "epoch": 0.047675539408319124, "grad_norm": 0.4083307683467865, "learning_rate": 0.0001936184046011503, "loss": 0.6226, "step": 643 }, { "epoch": 0.04774968488173797, "grad_norm": 0.38856521248817444, "learning_rate": 0.00019360840210052514, "loss": 0.5617, "step": 644 }, { "epoch": 0.04782383035515682, "grad_norm": 0.3810798227787018, "learning_rate": 0.00019359839959989998, "loss": 0.5472, "step": 645 }, { "epoch": 0.04789797582857567, "grad_norm": 0.39890795946121216, "learning_rate": 0.00019358839709927484, "loss": 0.5733, "step": 646 }, { "epoch": 0.04797212130199451, "grad_norm": 0.448803186416626, "learning_rate": 0.00019357839459864968, "loss": 0.5724, "step": 647 }, { "epoch": 0.04804626677541336, "grad_norm": 0.43062686920166016, "learning_rate": 0.00019356839209802451, "loss": 0.6072, "step": 648 }, { "epoch": 0.048120412248832206, "grad_norm": 0.37272998690605164, "learning_rate": 0.00019355838959739935, "loss": 0.5867, "step": 649 }, { "epoch": 0.04819455772225106, "grad_norm": 0.5291959047317505, "learning_rate": 0.00019354838709677422, "loss": 0.6543, "step": 650 }, { "epoch": 0.04826870319566991, "grad_norm": 0.4443143904209137, "learning_rate": 0.00019353838459614903, "loss": 0.6245, "step": 651 }, { "epoch": 0.04834284866908875, "grad_norm": 0.42065587639808655, "learning_rate": 0.0001935283820955239, "loss": 0.6056, "step": 652 }, { "epoch": 0.0484169941425076, "grad_norm": 0.37995192408561707, "learning_rate": 0.00019351837959489873, "loss": 0.5985, "step": 653 }, { "epoch": 0.048491139615926444, "grad_norm": 0.3986353278160095, "learning_rate": 0.0001935083770942736, "loss": 0.5585, "step": 654 }, { "epoch": 0.048565285089345295, "grad_norm": 0.4694853127002716, "learning_rate": 0.0001934983745936484, "loss": 0.6126, "step": 655 }, { "epoch": 0.048639430562764145, "grad_norm": 0.38145357370376587, "learning_rate": 0.00019348837209302326, "loss": 0.5949, "step": 656 }, { "epoch": 0.04871357603618299, "grad_norm": 0.38639116287231445, "learning_rate": 0.0001934783695923981, "loss": 0.5851, "step": 657 }, { "epoch": 0.04878772150960184, "grad_norm": 0.4051510691642761, "learning_rate": 0.00019346836709177297, "loss": 0.5825, "step": 658 }, { "epoch": 0.04886186698302069, "grad_norm": 0.40911242365837097, "learning_rate": 0.00019345836459114778, "loss": 0.603, "step": 659 }, { "epoch": 0.04893601245643953, "grad_norm": 0.37201303243637085, "learning_rate": 0.00019344836209052264, "loss": 0.5829, "step": 660 }, { "epoch": 0.04901015792985838, "grad_norm": 0.3785178065299988, "learning_rate": 0.00019343835958989748, "loss": 0.5328, "step": 661 }, { "epoch": 0.04908430340327723, "grad_norm": 0.3934203088283539, "learning_rate": 0.00019342835708927231, "loss": 0.5542, "step": 662 }, { "epoch": 0.04915844887669608, "grad_norm": 0.40624696016311646, "learning_rate": 0.00019341835458864718, "loss": 0.5557, "step": 663 }, { "epoch": 0.04923259435011493, "grad_norm": 0.4206804931163788, "learning_rate": 0.00019340835208802202, "loss": 0.652, "step": 664 }, { "epoch": 0.04930673982353377, "grad_norm": 0.42323434352874756, "learning_rate": 0.00019339834958739685, "loss": 0.5857, "step": 665 }, { "epoch": 0.04938088529695262, "grad_norm": 0.39634349942207336, "learning_rate": 0.0001933883470867717, "loss": 0.6233, "step": 666 }, { "epoch": 0.04945503077037147, "grad_norm": 0.37332531809806824, "learning_rate": 0.00019337834458614655, "loss": 0.6022, "step": 667 }, { "epoch": 0.049529176243790315, "grad_norm": 0.3631075918674469, "learning_rate": 0.0001933683420855214, "loss": 0.5756, "step": 668 }, { "epoch": 0.049603321717209166, "grad_norm": 0.3736778497695923, "learning_rate": 0.00019335833958489626, "loss": 0.5793, "step": 669 }, { "epoch": 0.04967746719062801, "grad_norm": 0.37197718024253845, "learning_rate": 0.00019334833708427107, "loss": 0.5747, "step": 670 }, { "epoch": 0.04975161266404686, "grad_norm": 0.37998536229133606, "learning_rate": 0.00019333833458364593, "loss": 0.5863, "step": 671 }, { "epoch": 0.04982575813746571, "grad_norm": 0.3845256567001343, "learning_rate": 0.00019332833208302077, "loss": 0.5959, "step": 672 }, { "epoch": 0.049899903610884554, "grad_norm": 0.3588480055332184, "learning_rate": 0.00019331832958239563, "loss": 0.5609, "step": 673 }, { "epoch": 0.049974049084303404, "grad_norm": 0.3697630763053894, "learning_rate": 0.00019330832708177044, "loss": 0.5647, "step": 674 }, { "epoch": 0.050048194557722255, "grad_norm": 0.382545530796051, "learning_rate": 0.0001932983245811453, "loss": 0.5698, "step": 675 }, { "epoch": 0.0501223400311411, "grad_norm": 0.40050575137138367, "learning_rate": 0.00019328832208052014, "loss": 0.62, "step": 676 }, { "epoch": 0.05019648550455995, "grad_norm": 0.39731109142303467, "learning_rate": 0.00019327831957989498, "loss": 0.5995, "step": 677 }, { "epoch": 0.05027063097797879, "grad_norm": 0.4079475402832031, "learning_rate": 0.00019326831707926982, "loss": 0.6184, "step": 678 }, { "epoch": 0.05034477645139764, "grad_norm": 0.397697776556015, "learning_rate": 0.00019325831457864468, "loss": 0.5689, "step": 679 }, { "epoch": 0.05041892192481649, "grad_norm": 0.3898303806781769, "learning_rate": 0.00019324831207801952, "loss": 0.562, "step": 680 }, { "epoch": 0.050493067398235336, "grad_norm": 0.38109782338142395, "learning_rate": 0.00019323830957739435, "loss": 0.5561, "step": 681 }, { "epoch": 0.05056721287165419, "grad_norm": 0.373543381690979, "learning_rate": 0.0001932283070767692, "loss": 0.5716, "step": 682 }, { "epoch": 0.05064135834507303, "grad_norm": 0.3799091577529907, "learning_rate": 0.00019321830457614406, "loss": 0.5791, "step": 683 }, { "epoch": 0.05071550381849188, "grad_norm": 0.3766174912452698, "learning_rate": 0.0001932083020755189, "loss": 0.5781, "step": 684 }, { "epoch": 0.05078964929191073, "grad_norm": 0.39352723956108093, "learning_rate": 0.00019319829957489373, "loss": 0.5811, "step": 685 }, { "epoch": 0.050863794765329574, "grad_norm": 0.3994705379009247, "learning_rate": 0.00019318829707426857, "loss": 0.5921, "step": 686 }, { "epoch": 0.050937940238748425, "grad_norm": 0.4826222062110901, "learning_rate": 0.00019317829457364343, "loss": 0.5923, "step": 687 }, { "epoch": 0.051012085712167275, "grad_norm": 0.4157077670097351, "learning_rate": 0.00019316829207301824, "loss": 0.5914, "step": 688 }, { "epoch": 0.05108623118558612, "grad_norm": 0.45465269684791565, "learning_rate": 0.0001931582895723931, "loss": 0.5887, "step": 689 }, { "epoch": 0.05116037665900497, "grad_norm": 0.42204901576042175, "learning_rate": 0.00019314828707176794, "loss": 0.573, "step": 690 }, { "epoch": 0.05123452213242381, "grad_norm": 0.396316260099411, "learning_rate": 0.0001931382845711428, "loss": 0.5499, "step": 691 }, { "epoch": 0.05130866760584266, "grad_norm": 0.3835430443286896, "learning_rate": 0.00019312828207051762, "loss": 0.5688, "step": 692 }, { "epoch": 0.05138281307926151, "grad_norm": 0.4217252731323242, "learning_rate": 0.00019311827956989248, "loss": 0.6108, "step": 693 }, { "epoch": 0.05145695855268036, "grad_norm": 0.44021153450012207, "learning_rate": 0.00019310827706926732, "loss": 0.6078, "step": 694 }, { "epoch": 0.05153110402609921, "grad_norm": 0.4107126295566559, "learning_rate": 0.00019309827456864218, "loss": 0.611, "step": 695 }, { "epoch": 0.05160524949951806, "grad_norm": 0.3689759373664856, "learning_rate": 0.00019308827206801702, "loss": 0.6039, "step": 696 }, { "epoch": 0.0516793949729369, "grad_norm": 0.4076346158981323, "learning_rate": 0.00019307826956739186, "loss": 0.6143, "step": 697 }, { "epoch": 0.05175354044635575, "grad_norm": 0.4176998734474182, "learning_rate": 0.0001930682670667667, "loss": 0.5824, "step": 698 }, { "epoch": 0.051827685919774595, "grad_norm": 0.37488606572151184, "learning_rate": 0.00019305826456614156, "loss": 0.5891, "step": 699 }, { "epoch": 0.051901831393193446, "grad_norm": 0.4158381223678589, "learning_rate": 0.0001930482620655164, "loss": 0.5958, "step": 700 }, { "epoch": 0.051975976866612296, "grad_norm": 0.3752988576889038, "learning_rate": 0.00019303825956489123, "loss": 0.555, "step": 701 }, { "epoch": 0.05205012234003114, "grad_norm": 0.4128042459487915, "learning_rate": 0.0001930282570642661, "loss": 0.664, "step": 702 }, { "epoch": 0.05212426781344999, "grad_norm": 0.403687983751297, "learning_rate": 0.0001930182545636409, "loss": 0.5689, "step": 703 }, { "epoch": 0.05219841328686883, "grad_norm": 0.3741587996482849, "learning_rate": 0.00019300825206301577, "loss": 0.5954, "step": 704 }, { "epoch": 0.052272558760287684, "grad_norm": 0.3484935164451599, "learning_rate": 0.0001929982495623906, "loss": 0.5759, "step": 705 }, { "epoch": 0.052346704233706534, "grad_norm": 0.38622093200683594, "learning_rate": 0.00019298824706176547, "loss": 0.6177, "step": 706 }, { "epoch": 0.05242084970712538, "grad_norm": 0.397634357213974, "learning_rate": 0.00019297824456114028, "loss": 0.5728, "step": 707 }, { "epoch": 0.05249499518054423, "grad_norm": 0.39391252398490906, "learning_rate": 0.00019296824206051515, "loss": 0.5717, "step": 708 }, { "epoch": 0.05256914065396308, "grad_norm": 0.39965254068374634, "learning_rate": 0.00019295823955988998, "loss": 0.5952, "step": 709 }, { "epoch": 0.05264328612738192, "grad_norm": 0.4154767394065857, "learning_rate": 0.00019294823705926485, "loss": 0.6009, "step": 710 }, { "epoch": 0.05271743160080077, "grad_norm": 0.409214049577713, "learning_rate": 0.00019293823455863966, "loss": 0.5957, "step": 711 }, { "epoch": 0.052791577074219616, "grad_norm": 0.3859963119029999, "learning_rate": 0.00019292823205801452, "loss": 0.617, "step": 712 }, { "epoch": 0.052865722547638466, "grad_norm": 0.3979794383049011, "learning_rate": 0.00019291822955738936, "loss": 0.6398, "step": 713 }, { "epoch": 0.05293986802105732, "grad_norm": 0.41251200437545776, "learning_rate": 0.0001929082270567642, "loss": 0.5508, "step": 714 }, { "epoch": 0.05301401349447616, "grad_norm": 0.3886280059814453, "learning_rate": 0.00019289822455613903, "loss": 0.5936, "step": 715 }, { "epoch": 0.05308815896789501, "grad_norm": 0.41248464584350586, "learning_rate": 0.0001928882220555139, "loss": 0.6118, "step": 716 }, { "epoch": 0.05316230444131386, "grad_norm": 0.3653118908405304, "learning_rate": 0.00019287821955488873, "loss": 0.5717, "step": 717 }, { "epoch": 0.053236449914732704, "grad_norm": 0.41846227645874023, "learning_rate": 0.00019286821705426357, "loss": 0.6056, "step": 718 }, { "epoch": 0.053310595388151555, "grad_norm": 0.3842783272266388, "learning_rate": 0.0001928582145536384, "loss": 0.5612, "step": 719 }, { "epoch": 0.0533847408615704, "grad_norm": 0.4110718071460724, "learning_rate": 0.00019284821205301327, "loss": 0.6134, "step": 720 }, { "epoch": 0.05345888633498925, "grad_norm": 0.390304297208786, "learning_rate": 0.0001928382095523881, "loss": 0.5885, "step": 721 }, { "epoch": 0.0535330318084081, "grad_norm": 0.42014122009277344, "learning_rate": 0.00019282820705176295, "loss": 0.6488, "step": 722 }, { "epoch": 0.05360717728182694, "grad_norm": 0.39040279388427734, "learning_rate": 0.00019281820455113778, "loss": 0.6349, "step": 723 }, { "epoch": 0.05368132275524579, "grad_norm": 0.38820987939834595, "learning_rate": 0.00019280820205051265, "loss": 0.582, "step": 724 }, { "epoch": 0.053755468228664643, "grad_norm": 0.39792391657829285, "learning_rate": 0.00019279819954988746, "loss": 0.5944, "step": 725 }, { "epoch": 0.05382961370208349, "grad_norm": 0.3907301127910614, "learning_rate": 0.00019278819704926232, "loss": 0.6023, "step": 726 }, { "epoch": 0.05390375917550234, "grad_norm": 0.38883036375045776, "learning_rate": 0.00019277819454863716, "loss": 0.6075, "step": 727 }, { "epoch": 0.05397790464892118, "grad_norm": 0.3439013957977295, "learning_rate": 0.00019276819204801202, "loss": 0.5778, "step": 728 }, { "epoch": 0.05405205012234003, "grad_norm": 0.37905174493789673, "learning_rate": 0.00019275818954738686, "loss": 0.5687, "step": 729 }, { "epoch": 0.05412619559575888, "grad_norm": 0.5157143473625183, "learning_rate": 0.0001927481870467617, "loss": 0.5963, "step": 730 }, { "epoch": 0.054200341069177725, "grad_norm": 0.3831190764904022, "learning_rate": 0.00019273818454613653, "loss": 0.5533, "step": 731 }, { "epoch": 0.054274486542596576, "grad_norm": 0.3544011414051056, "learning_rate": 0.0001927281820455114, "loss": 0.5696, "step": 732 }, { "epoch": 0.05434863201601542, "grad_norm": 0.3868606388568878, "learning_rate": 0.00019271817954488624, "loss": 0.6286, "step": 733 }, { "epoch": 0.05442277748943427, "grad_norm": 0.43853119015693665, "learning_rate": 0.00019270817704426107, "loss": 0.619, "step": 734 }, { "epoch": 0.05449692296285312, "grad_norm": 0.43267589807510376, "learning_rate": 0.0001926981745436359, "loss": 0.5731, "step": 735 }, { "epoch": 0.05457106843627196, "grad_norm": 0.3789524435997009, "learning_rate": 0.00019268817204301077, "loss": 0.5981, "step": 736 }, { "epoch": 0.054645213909690814, "grad_norm": 0.36814093589782715, "learning_rate": 0.0001926781695423856, "loss": 0.566, "step": 737 }, { "epoch": 0.054719359383109664, "grad_norm": 0.38567209243774414, "learning_rate": 0.00019266816704176045, "loss": 0.6201, "step": 738 }, { "epoch": 0.05479350485652851, "grad_norm": 0.4298228919506073, "learning_rate": 0.0001926581645411353, "loss": 0.6449, "step": 739 }, { "epoch": 0.05486765032994736, "grad_norm": 0.37596872448921204, "learning_rate": 0.00019264816204051012, "loss": 0.5823, "step": 740 }, { "epoch": 0.0549417958033662, "grad_norm": 0.37924903631210327, "learning_rate": 0.000192638159539885, "loss": 0.6161, "step": 741 }, { "epoch": 0.05501594127678505, "grad_norm": 0.4088937044143677, "learning_rate": 0.00019262815703925982, "loss": 0.6392, "step": 742 }, { "epoch": 0.0550900867502039, "grad_norm": 0.36918213963508606, "learning_rate": 0.0001926181545386347, "loss": 0.6124, "step": 743 }, { "epoch": 0.055164232223622746, "grad_norm": 0.37800708413124084, "learning_rate": 0.0001926081520380095, "loss": 0.6019, "step": 744 }, { "epoch": 0.055238377697041596, "grad_norm": 0.3837113678455353, "learning_rate": 0.00019259814953738436, "loss": 0.5568, "step": 745 }, { "epoch": 0.05531252317046045, "grad_norm": 0.4137560725212097, "learning_rate": 0.0001925881470367592, "loss": 0.6066, "step": 746 }, { "epoch": 0.05538666864387929, "grad_norm": 0.3916035294532776, "learning_rate": 0.00019257814453613406, "loss": 0.5895, "step": 747 }, { "epoch": 0.05546081411729814, "grad_norm": 0.38012078404426575, "learning_rate": 0.00019256814203550887, "loss": 0.5638, "step": 748 }, { "epoch": 0.055534959590716984, "grad_norm": 0.4043962359428406, "learning_rate": 0.00019255813953488374, "loss": 0.6127, "step": 749 }, { "epoch": 0.055609105064135834, "grad_norm": 0.38461068272590637, "learning_rate": 0.00019254813703425857, "loss": 0.5717, "step": 750 }, { "epoch": 0.055683250537554685, "grad_norm": 0.41524365544319153, "learning_rate": 0.0001925381345336334, "loss": 0.621, "step": 751 }, { "epoch": 0.05575739601097353, "grad_norm": 0.3793230950832367, "learning_rate": 0.00019252813203300825, "loss": 0.6125, "step": 752 }, { "epoch": 0.05583154148439238, "grad_norm": 0.4164716303348541, "learning_rate": 0.0001925181295323831, "loss": 0.6201, "step": 753 }, { "epoch": 0.05590568695781122, "grad_norm": 0.3847607970237732, "learning_rate": 0.00019250812703175795, "loss": 0.5865, "step": 754 }, { "epoch": 0.05597983243123007, "grad_norm": 0.4311927258968353, "learning_rate": 0.0001924981245311328, "loss": 0.6767, "step": 755 }, { "epoch": 0.05605397790464892, "grad_norm": 0.3945215940475464, "learning_rate": 0.00019248812203050762, "loss": 0.6341, "step": 756 }, { "epoch": 0.05612812337806777, "grad_norm": 0.37485525012016296, "learning_rate": 0.0001924781195298825, "loss": 0.5553, "step": 757 }, { "epoch": 0.05620226885148662, "grad_norm": 0.3919658660888672, "learning_rate": 0.00019246811702925733, "loss": 0.5912, "step": 758 }, { "epoch": 0.05627641432490547, "grad_norm": 0.4637816846370697, "learning_rate": 0.00019245811452863216, "loss": 0.5912, "step": 759 }, { "epoch": 0.05635055979832431, "grad_norm": 0.4015760123729706, "learning_rate": 0.000192448112028007, "loss": 0.6296, "step": 760 }, { "epoch": 0.05642470527174316, "grad_norm": 0.38846564292907715, "learning_rate": 0.00019243810952738186, "loss": 0.5924, "step": 761 }, { "epoch": 0.056498850745162005, "grad_norm": 0.3646942973136902, "learning_rate": 0.0001924281070267567, "loss": 0.5427, "step": 762 }, { "epoch": 0.056572996218580855, "grad_norm": 0.3708893954753876, "learning_rate": 0.00019241810452613154, "loss": 0.6129, "step": 763 }, { "epoch": 0.056647141691999706, "grad_norm": 0.3805895447731018, "learning_rate": 0.00019240810202550638, "loss": 0.5928, "step": 764 }, { "epoch": 0.05672128716541855, "grad_norm": 0.3829721212387085, "learning_rate": 0.00019239809952488124, "loss": 0.5913, "step": 765 }, { "epoch": 0.0567954326388374, "grad_norm": 0.36960792541503906, "learning_rate": 0.00019238809702425608, "loss": 0.6064, "step": 766 }, { "epoch": 0.05686957811225625, "grad_norm": 0.3850354552268982, "learning_rate": 0.0001923780945236309, "loss": 0.5974, "step": 767 }, { "epoch": 0.05694372358567509, "grad_norm": 0.4251306354999542, "learning_rate": 0.00019236809202300575, "loss": 0.5941, "step": 768 }, { "epoch": 0.057017869059093944, "grad_norm": 0.41377463936805725, "learning_rate": 0.00019235808952238061, "loss": 0.5935, "step": 769 }, { "epoch": 0.05709201453251279, "grad_norm": 0.4122122824192047, "learning_rate": 0.00019234808702175545, "loss": 0.597, "step": 770 }, { "epoch": 0.05716616000593164, "grad_norm": 0.45548760890960693, "learning_rate": 0.0001923380845211303, "loss": 0.5915, "step": 771 }, { "epoch": 0.05724030547935049, "grad_norm": 0.4150952696800232, "learning_rate": 0.00019232808202050515, "loss": 0.6014, "step": 772 }, { "epoch": 0.05731445095276933, "grad_norm": 0.40556520223617554, "learning_rate": 0.00019231807951988, "loss": 0.5458, "step": 773 }, { "epoch": 0.05738859642618818, "grad_norm": 0.37604522705078125, "learning_rate": 0.00019230807701925483, "loss": 0.5907, "step": 774 }, { "epoch": 0.05746274189960703, "grad_norm": 0.3935409486293793, "learning_rate": 0.00019229807451862966, "loss": 0.5644, "step": 775 }, { "epoch": 0.057536887373025876, "grad_norm": 0.36781561374664307, "learning_rate": 0.00019228807201800453, "loss": 0.6056, "step": 776 }, { "epoch": 0.057611032846444726, "grad_norm": 0.44002193212509155, "learning_rate": 0.00019227806951737934, "loss": 0.5603, "step": 777 }, { "epoch": 0.05768517831986357, "grad_norm": 0.42306622862815857, "learning_rate": 0.0001922680670167542, "loss": 0.645, "step": 778 }, { "epoch": 0.05775932379328242, "grad_norm": 0.3748851418495178, "learning_rate": 0.00019225806451612904, "loss": 0.5924, "step": 779 }, { "epoch": 0.05783346926670127, "grad_norm": 0.38309699296951294, "learning_rate": 0.0001922480620155039, "loss": 0.5794, "step": 780 }, { "epoch": 0.057907614740120114, "grad_norm": 0.4218568205833435, "learning_rate": 0.00019223805951487871, "loss": 0.6198, "step": 781 }, { "epoch": 0.057981760213538965, "grad_norm": 0.3742879629135132, "learning_rate": 0.00019222805701425358, "loss": 0.5553, "step": 782 }, { "epoch": 0.05805590568695781, "grad_norm": 0.3696449100971222, "learning_rate": 0.00019221805451362842, "loss": 0.5563, "step": 783 }, { "epoch": 0.05813005116037666, "grad_norm": 0.38147157430648804, "learning_rate": 0.00019220805201300328, "loss": 0.571, "step": 784 }, { "epoch": 0.05820419663379551, "grad_norm": 0.41841429471969604, "learning_rate": 0.0001921980495123781, "loss": 0.6146, "step": 785 }, { "epoch": 0.05827834210721435, "grad_norm": 0.3799084722995758, "learning_rate": 0.00019218804701175295, "loss": 0.5807, "step": 786 }, { "epoch": 0.0583524875806332, "grad_norm": 0.4172448217868805, "learning_rate": 0.0001921780445111278, "loss": 0.5704, "step": 787 }, { "epoch": 0.05842663305405205, "grad_norm": 0.3853074312210083, "learning_rate": 0.00019216804201050263, "loss": 0.5899, "step": 788 }, { "epoch": 0.0585007785274709, "grad_norm": 0.41434577107429504, "learning_rate": 0.00019215803950987747, "loss": 0.5792, "step": 789 }, { "epoch": 0.05857492400088975, "grad_norm": 0.3998521864414215, "learning_rate": 0.00019214803700925233, "loss": 0.5038, "step": 790 }, { "epoch": 0.05864906947430859, "grad_norm": 0.3800047039985657, "learning_rate": 0.00019213803450862717, "loss": 0.6018, "step": 791 }, { "epoch": 0.05872321494772744, "grad_norm": 0.39157694578170776, "learning_rate": 0.000192128032008002, "loss": 0.5819, "step": 792 }, { "epoch": 0.05879736042114629, "grad_norm": 0.34956473112106323, "learning_rate": 0.00019211802950737684, "loss": 0.5181, "step": 793 }, { "epoch": 0.058871505894565135, "grad_norm": 0.4236034154891968, "learning_rate": 0.0001921080270067517, "loss": 0.6187, "step": 794 }, { "epoch": 0.058945651367983985, "grad_norm": 0.4098091423511505, "learning_rate": 0.00019209802450612654, "loss": 0.5742, "step": 795 }, { "epoch": 0.059019796841402836, "grad_norm": 0.414330393075943, "learning_rate": 0.00019208802200550138, "loss": 0.5889, "step": 796 }, { "epoch": 0.05909394231482168, "grad_norm": 0.39282673597335815, "learning_rate": 0.00019207801950487622, "loss": 0.5917, "step": 797 }, { "epoch": 0.05916808778824053, "grad_norm": 0.370039165019989, "learning_rate": 0.00019206801700425108, "loss": 0.5406, "step": 798 }, { "epoch": 0.05924223326165937, "grad_norm": 0.4214326739311218, "learning_rate": 0.00019205801450362592, "loss": 0.5947, "step": 799 }, { "epoch": 0.05931637873507822, "grad_norm": 0.3956966698169708, "learning_rate": 0.00019204801200300075, "loss": 0.5944, "step": 800 }, { "epoch": 0.059390524208497074, "grad_norm": 0.41635143756866455, "learning_rate": 0.0001920380095023756, "loss": 0.5951, "step": 801 }, { "epoch": 0.05946466968191592, "grad_norm": 0.40340930223464966, "learning_rate": 0.00019202800700175046, "loss": 0.6122, "step": 802 }, { "epoch": 0.05953881515533477, "grad_norm": 0.3999878466129303, "learning_rate": 0.0001920180045011253, "loss": 0.5605, "step": 803 }, { "epoch": 0.05961296062875361, "grad_norm": 0.39828842878341675, "learning_rate": 0.00019200800200050013, "loss": 0.6015, "step": 804 }, { "epoch": 0.05968710610217246, "grad_norm": 0.3803527355194092, "learning_rate": 0.000191997999499875, "loss": 0.5692, "step": 805 }, { "epoch": 0.05976125157559131, "grad_norm": 0.40300261974334717, "learning_rate": 0.00019198799699924983, "loss": 0.6013, "step": 806 }, { "epoch": 0.059835397049010156, "grad_norm": 0.4392409026622772, "learning_rate": 0.00019197799449862467, "loss": 0.6457, "step": 807 }, { "epoch": 0.059909542522429006, "grad_norm": 0.42537879943847656, "learning_rate": 0.0001919679919979995, "loss": 0.5624, "step": 808 }, { "epoch": 0.059983687995847856, "grad_norm": 0.39952895045280457, "learning_rate": 0.00019195798949737437, "loss": 0.6006, "step": 809 }, { "epoch": 0.0600578334692667, "grad_norm": 0.3890600800514221, "learning_rate": 0.0001919479869967492, "loss": 0.5911, "step": 810 }, { "epoch": 0.06013197894268555, "grad_norm": 0.3848801255226135, "learning_rate": 0.00019193798449612404, "loss": 0.6029, "step": 811 }, { "epoch": 0.060206124416104394, "grad_norm": 0.4020692706108093, "learning_rate": 0.00019192798199549888, "loss": 0.5427, "step": 812 }, { "epoch": 0.060280269889523244, "grad_norm": 2.0448451042175293, "learning_rate": 0.00019191797949487374, "loss": 0.5809, "step": 813 }, { "epoch": 0.060354415362942095, "grad_norm": 0.42193761467933655, "learning_rate": 0.00019190797699424855, "loss": 0.5474, "step": 814 }, { "epoch": 0.06042856083636094, "grad_norm": 0.37256479263305664, "learning_rate": 0.00019189797449362342, "loss": 0.5469, "step": 815 }, { "epoch": 0.06050270630977979, "grad_norm": 0.42928430438041687, "learning_rate": 0.00019188797199299826, "loss": 0.611, "step": 816 }, { "epoch": 0.06057685178319864, "grad_norm": 0.46261805295944214, "learning_rate": 0.00019187796949237312, "loss": 0.5535, "step": 817 }, { "epoch": 0.06065099725661748, "grad_norm": 0.37934446334838867, "learning_rate": 0.00019186796699174793, "loss": 0.531, "step": 818 }, { "epoch": 0.06072514273003633, "grad_norm": 0.492903470993042, "learning_rate": 0.0001918579644911228, "loss": 0.5774, "step": 819 }, { "epoch": 0.060799288203455176, "grad_norm": 0.5175749659538269, "learning_rate": 0.00019184796199049763, "loss": 0.5928, "step": 820 }, { "epoch": 0.06087343367687403, "grad_norm": 0.4261670708656311, "learning_rate": 0.0001918379594898725, "loss": 0.6265, "step": 821 }, { "epoch": 0.06094757915029288, "grad_norm": 0.48903170228004456, "learning_rate": 0.0001918279569892473, "loss": 0.592, "step": 822 }, { "epoch": 0.06102172462371172, "grad_norm": 0.41740211844444275, "learning_rate": 0.00019181795448862217, "loss": 0.5826, "step": 823 }, { "epoch": 0.06109587009713057, "grad_norm": 0.4063926339149475, "learning_rate": 0.000191807951987997, "loss": 0.5809, "step": 824 }, { "epoch": 0.06117001557054942, "grad_norm": 0.6722632646560669, "learning_rate": 0.00019179794948737184, "loss": 0.54, "step": 825 }, { "epoch": 0.061244161043968265, "grad_norm": 0.35439062118530273, "learning_rate": 0.00019178794698674668, "loss": 0.5403, "step": 826 }, { "epoch": 0.061318306517387115, "grad_norm": 0.42818355560302734, "learning_rate": 0.00019177794448612155, "loss": 0.5816, "step": 827 }, { "epoch": 0.06139245199080596, "grad_norm": 0.3872561454772949, "learning_rate": 0.00019176794198549638, "loss": 0.5326, "step": 828 }, { "epoch": 0.06146659746422481, "grad_norm": 0.4323882460594177, "learning_rate": 0.00019175793948487122, "loss": 0.5483, "step": 829 }, { "epoch": 0.06154074293764366, "grad_norm": 0.38045835494995117, "learning_rate": 0.00019174793698424606, "loss": 0.5655, "step": 830 }, { "epoch": 0.0616148884110625, "grad_norm": 0.45046383142471313, "learning_rate": 0.00019173793448362092, "loss": 0.6088, "step": 831 }, { "epoch": 0.061689033884481353, "grad_norm": 0.40152910351753235, "learning_rate": 0.00019172793198299576, "loss": 0.6287, "step": 832 }, { "epoch": 0.0617631793579002, "grad_norm": 0.3850104510784149, "learning_rate": 0.0001917179294823706, "loss": 0.54, "step": 833 }, { "epoch": 0.06183732483131905, "grad_norm": 0.3881586492061615, "learning_rate": 0.00019170792698174543, "loss": 0.5491, "step": 834 }, { "epoch": 0.0619114703047379, "grad_norm": 0.5173059701919556, "learning_rate": 0.0001916979244811203, "loss": 0.6006, "step": 835 }, { "epoch": 0.06198561577815674, "grad_norm": 0.4466266334056854, "learning_rate": 0.00019168792198049513, "loss": 0.6123, "step": 836 }, { "epoch": 0.06205976125157559, "grad_norm": 0.3726794123649597, "learning_rate": 0.00019167791947986997, "loss": 0.5462, "step": 837 }, { "epoch": 0.06213390672499444, "grad_norm": 0.4030221998691559, "learning_rate": 0.00019166791697924483, "loss": 0.5624, "step": 838 }, { "epoch": 0.062208052198413286, "grad_norm": 0.43138089776039124, "learning_rate": 0.00019165791447861967, "loss": 0.5663, "step": 839 }, { "epoch": 0.062282197671832136, "grad_norm": 0.41736334562301636, "learning_rate": 0.0001916479119779945, "loss": 0.6293, "step": 840 }, { "epoch": 0.06235634314525098, "grad_norm": 0.49237731099128723, "learning_rate": 0.00019163790947736935, "loss": 0.578, "step": 841 }, { "epoch": 0.06243048861866983, "grad_norm": 0.3872168958187103, "learning_rate": 0.0001916279069767442, "loss": 0.5763, "step": 842 }, { "epoch": 0.06250463409208867, "grad_norm": 0.4326758086681366, "learning_rate": 0.00019161790447611905, "loss": 0.6034, "step": 843 }, { "epoch": 0.06257877956550753, "grad_norm": 0.3777730464935303, "learning_rate": 0.00019160790197549388, "loss": 0.5904, "step": 844 }, { "epoch": 0.06265292503892637, "grad_norm": 0.43551114201545715, "learning_rate": 0.00019159789947486872, "loss": 0.5377, "step": 845 }, { "epoch": 0.06272707051234522, "grad_norm": 0.37813490629196167, "learning_rate": 0.00019158789697424359, "loss": 0.5678, "step": 846 }, { "epoch": 0.06280121598576408, "grad_norm": 0.391948401927948, "learning_rate": 0.00019157789447361842, "loss": 0.5726, "step": 847 }, { "epoch": 0.06287536145918292, "grad_norm": 0.3844928443431854, "learning_rate": 0.00019156789197299326, "loss": 0.6023, "step": 848 }, { "epoch": 0.06294950693260176, "grad_norm": 0.45449379086494446, "learning_rate": 0.0001915578894723681, "loss": 0.5903, "step": 849 }, { "epoch": 0.06302365240602062, "grad_norm": 0.3847752511501312, "learning_rate": 0.00019154788697174296, "loss": 0.603, "step": 850 }, { "epoch": 0.06309779787943946, "grad_norm": 0.3635810911655426, "learning_rate": 0.00019153788447111777, "loss": 0.5225, "step": 851 }, { "epoch": 0.0631719433528583, "grad_norm": 0.3544882833957672, "learning_rate": 0.00019152788197049264, "loss": 0.5268, "step": 852 }, { "epoch": 0.06324608882627715, "grad_norm": 0.4106333255767822, "learning_rate": 0.00019151787946986747, "loss": 0.5727, "step": 853 }, { "epoch": 0.06332023429969601, "grad_norm": 0.4182192385196686, "learning_rate": 0.00019150787696924234, "loss": 0.58, "step": 854 }, { "epoch": 0.06339437977311485, "grad_norm": 0.4378495514392853, "learning_rate": 0.00019149787446861715, "loss": 0.595, "step": 855 }, { "epoch": 0.0634685252465337, "grad_norm": 0.43167588114738464, "learning_rate": 0.000191487871967992, "loss": 0.5912, "step": 856 }, { "epoch": 0.06354267071995255, "grad_norm": 0.41413065791130066, "learning_rate": 0.00019147786946736685, "loss": 0.6633, "step": 857 }, { "epoch": 0.0636168161933714, "grad_norm": 0.36019644141197205, "learning_rate": 0.0001914678669667417, "loss": 0.5487, "step": 858 }, { "epoch": 0.06369096166679024, "grad_norm": 0.38462722301483154, "learning_rate": 0.00019145786446611652, "loss": 0.617, "step": 859 }, { "epoch": 0.0637651071402091, "grad_norm": 0.3647100627422333, "learning_rate": 0.00019144786196549139, "loss": 0.5921, "step": 860 }, { "epoch": 0.06383925261362794, "grad_norm": 0.44536322355270386, "learning_rate": 0.00019143785946486622, "loss": 0.6298, "step": 861 }, { "epoch": 0.06391339808704678, "grad_norm": 0.4321828782558441, "learning_rate": 0.00019142785696424106, "loss": 0.5711, "step": 862 }, { "epoch": 0.06398754356046564, "grad_norm": 0.3695819675922394, "learning_rate": 0.0001914178544636159, "loss": 0.5902, "step": 863 }, { "epoch": 0.06406168903388448, "grad_norm": 0.4226962625980377, "learning_rate": 0.00019140785196299076, "loss": 0.5945, "step": 864 }, { "epoch": 0.06413583450730333, "grad_norm": 0.4675977826118469, "learning_rate": 0.0001913978494623656, "loss": 0.6311, "step": 865 }, { "epoch": 0.06420997998072217, "grad_norm": 0.39813390374183655, "learning_rate": 0.00019138784696174044, "loss": 0.5733, "step": 866 }, { "epoch": 0.06428412545414103, "grad_norm": 0.41872328519821167, "learning_rate": 0.00019137784446111527, "loss": 0.5785, "step": 867 }, { "epoch": 0.06435827092755987, "grad_norm": 0.3601435720920563, "learning_rate": 0.00019136784196049014, "loss": 0.546, "step": 868 }, { "epoch": 0.06443241640097871, "grad_norm": 0.4034818410873413, "learning_rate": 0.00019135783945986497, "loss": 0.6023, "step": 869 }, { "epoch": 0.06450656187439757, "grad_norm": 0.3731749355792999, "learning_rate": 0.0001913478369592398, "loss": 0.5547, "step": 870 }, { "epoch": 0.06458070734781642, "grad_norm": 0.4028605818748474, "learning_rate": 0.00019133783445861468, "loss": 0.5888, "step": 871 }, { "epoch": 0.06465485282123526, "grad_norm": 0.41001105308532715, "learning_rate": 0.0001913278319579895, "loss": 0.6034, "step": 872 }, { "epoch": 0.06472899829465412, "grad_norm": 0.36585769057273865, "learning_rate": 0.00019131782945736435, "loss": 0.5637, "step": 873 }, { "epoch": 0.06480314376807296, "grad_norm": 0.39999380707740784, "learning_rate": 0.0001913078269567392, "loss": 0.5771, "step": 874 }, { "epoch": 0.0648772892414918, "grad_norm": 0.3611980676651001, "learning_rate": 0.00019129782445611405, "loss": 0.5564, "step": 875 }, { "epoch": 0.06495143471491066, "grad_norm": 0.39500054717063904, "learning_rate": 0.0001912878219554889, "loss": 0.6046, "step": 876 }, { "epoch": 0.0650255801883295, "grad_norm": 0.40393248200416565, "learning_rate": 0.00019127781945486373, "loss": 0.5703, "step": 877 }, { "epoch": 0.06509972566174835, "grad_norm": 0.3855907917022705, "learning_rate": 0.00019126781695423856, "loss": 0.5719, "step": 878 }, { "epoch": 0.06517387113516719, "grad_norm": 0.39853930473327637, "learning_rate": 0.00019125781445361343, "loss": 0.6005, "step": 879 }, { "epoch": 0.06524801660858605, "grad_norm": 0.4256182610988617, "learning_rate": 0.00019124781195298826, "loss": 0.6099, "step": 880 }, { "epoch": 0.06532216208200489, "grad_norm": 0.37904152274131775, "learning_rate": 0.0001912378094523631, "loss": 0.5804, "step": 881 }, { "epoch": 0.06539630755542374, "grad_norm": 0.3615025579929352, "learning_rate": 0.00019122780695173794, "loss": 0.5732, "step": 882 }, { "epoch": 0.06547045302884259, "grad_norm": 0.37143880128860474, "learning_rate": 0.0001912178044511128, "loss": 0.5996, "step": 883 }, { "epoch": 0.06554459850226144, "grad_norm": 0.44165489077568054, "learning_rate": 0.00019120780195048764, "loss": 0.6115, "step": 884 }, { "epoch": 0.06561874397568028, "grad_norm": 0.3620339632034302, "learning_rate": 0.00019119779944986248, "loss": 0.5445, "step": 885 }, { "epoch": 0.06569288944909914, "grad_norm": 0.38263413310050964, "learning_rate": 0.0001911877969492373, "loss": 0.5716, "step": 886 }, { "epoch": 0.06576703492251798, "grad_norm": 0.4657733738422394, "learning_rate": 0.00019117779444861218, "loss": 0.5793, "step": 887 }, { "epoch": 0.06584118039593682, "grad_norm": 0.3986786901950836, "learning_rate": 0.000191167791947987, "loss": 0.5682, "step": 888 }, { "epoch": 0.06591532586935568, "grad_norm": 0.3807837665081024, "learning_rate": 0.00019115778944736185, "loss": 0.6009, "step": 889 }, { "epoch": 0.06598947134277452, "grad_norm": 0.3949527144432068, "learning_rate": 0.0001911477869467367, "loss": 0.5904, "step": 890 }, { "epoch": 0.06606361681619337, "grad_norm": 0.3844720721244812, "learning_rate": 0.00019113778444611155, "loss": 0.5459, "step": 891 }, { "epoch": 0.06613776228961223, "grad_norm": 0.37723368406295776, "learning_rate": 0.00019112778194548636, "loss": 0.5683, "step": 892 }, { "epoch": 0.06621190776303107, "grad_norm": 0.4161356985569, "learning_rate": 0.00019111777944486123, "loss": 0.6357, "step": 893 }, { "epoch": 0.06628605323644991, "grad_norm": 0.40984198451042175, "learning_rate": 0.00019110777694423606, "loss": 0.5833, "step": 894 }, { "epoch": 0.06636019870986876, "grad_norm": 0.36166146397590637, "learning_rate": 0.00019109777444361093, "loss": 0.5699, "step": 895 }, { "epoch": 0.06643434418328761, "grad_norm": 0.4445840120315552, "learning_rate": 0.00019108777194298574, "loss": 0.5758, "step": 896 }, { "epoch": 0.06650848965670646, "grad_norm": 0.3972358703613281, "learning_rate": 0.0001910777694423606, "loss": 0.5777, "step": 897 }, { "epoch": 0.0665826351301253, "grad_norm": 0.4148390293121338, "learning_rate": 0.00019106776694173544, "loss": 0.6477, "step": 898 }, { "epoch": 0.06665678060354416, "grad_norm": 0.39832037687301636, "learning_rate": 0.00019105776444111028, "loss": 0.5941, "step": 899 }, { "epoch": 0.066730926076963, "grad_norm": 0.3663426637649536, "learning_rate": 0.00019104776194048511, "loss": 0.5441, "step": 900 }, { "epoch": 0.06680507155038184, "grad_norm": 0.40095674991607666, "learning_rate": 0.00019103775943985998, "loss": 0.5466, "step": 901 }, { "epoch": 0.0668792170238007, "grad_norm": 0.4054796099662781, "learning_rate": 0.00019102775693923482, "loss": 0.5556, "step": 902 }, { "epoch": 0.06695336249721955, "grad_norm": 0.381262868642807, "learning_rate": 0.00019101775443860965, "loss": 0.592, "step": 903 }, { "epoch": 0.06702750797063839, "grad_norm": 0.40069830417633057, "learning_rate": 0.0001910077519379845, "loss": 0.5814, "step": 904 }, { "epoch": 0.06710165344405725, "grad_norm": 0.4046432375907898, "learning_rate": 0.00019099774943735935, "loss": 0.6019, "step": 905 }, { "epoch": 0.06717579891747609, "grad_norm": 0.4137316346168518, "learning_rate": 0.0001909877469367342, "loss": 0.5916, "step": 906 }, { "epoch": 0.06724994439089493, "grad_norm": 0.3676306903362274, "learning_rate": 0.00019097774443610903, "loss": 0.5458, "step": 907 }, { "epoch": 0.06732408986431378, "grad_norm": 0.3517340123653412, "learning_rate": 0.0001909677419354839, "loss": 0.5458, "step": 908 }, { "epoch": 0.06739823533773263, "grad_norm": 0.361502468585968, "learning_rate": 0.00019095773943485873, "loss": 0.5901, "step": 909 }, { "epoch": 0.06747238081115148, "grad_norm": 0.3809692859649658, "learning_rate": 0.00019094773693423357, "loss": 0.5869, "step": 910 }, { "epoch": 0.06754652628457032, "grad_norm": 0.4271315336227417, "learning_rate": 0.0001909377344336084, "loss": 0.5795, "step": 911 }, { "epoch": 0.06762067175798918, "grad_norm": 0.3595374524593353, "learning_rate": 0.00019092773193298327, "loss": 0.5725, "step": 912 }, { "epoch": 0.06769481723140802, "grad_norm": 0.3735651671886444, "learning_rate": 0.0001909177294323581, "loss": 0.5698, "step": 913 }, { "epoch": 0.06776896270482687, "grad_norm": 0.37508994340896606, "learning_rate": 0.00019090772693173294, "loss": 0.6138, "step": 914 }, { "epoch": 0.06784310817824572, "grad_norm": 0.38414978981018066, "learning_rate": 0.00019089772443110778, "loss": 0.5482, "step": 915 }, { "epoch": 0.06791725365166457, "grad_norm": 0.3660641014575958, "learning_rate": 0.00019088772193048264, "loss": 0.5611, "step": 916 }, { "epoch": 0.06799139912508341, "grad_norm": 0.3634829521179199, "learning_rate": 0.00019087771942985748, "loss": 0.5764, "step": 917 }, { "epoch": 0.06806554459850227, "grad_norm": 0.39042338728904724, "learning_rate": 0.00019086771692923232, "loss": 0.5485, "step": 918 }, { "epoch": 0.06813969007192111, "grad_norm": 0.4835030436515808, "learning_rate": 0.00019085771442860715, "loss": 0.6343, "step": 919 }, { "epoch": 0.06821383554533995, "grad_norm": 0.3875594437122345, "learning_rate": 0.00019084771192798202, "loss": 0.5808, "step": 920 }, { "epoch": 0.06828798101875881, "grad_norm": 0.35438865423202515, "learning_rate": 0.00019083770942735686, "loss": 0.5467, "step": 921 }, { "epoch": 0.06836212649217766, "grad_norm": 0.3627762794494629, "learning_rate": 0.0001908277069267317, "loss": 0.5416, "step": 922 }, { "epoch": 0.0684362719655965, "grad_norm": 0.40402892231941223, "learning_rate": 0.00019081770442610653, "loss": 0.5747, "step": 923 }, { "epoch": 0.06851041743901534, "grad_norm": 0.3914015591144562, "learning_rate": 0.0001908077019254814, "loss": 0.5386, "step": 924 }, { "epoch": 0.0685845629124342, "grad_norm": 0.5077201128005981, "learning_rate": 0.0001907976994248562, "loss": 0.555, "step": 925 }, { "epoch": 0.06865870838585304, "grad_norm": 0.4394811689853668, "learning_rate": 0.00019078769692423107, "loss": 0.6341, "step": 926 }, { "epoch": 0.06873285385927189, "grad_norm": 0.38517656922340393, "learning_rate": 0.0001907776944236059, "loss": 0.5672, "step": 927 }, { "epoch": 0.06880699933269074, "grad_norm": 0.3772699534893036, "learning_rate": 0.00019076769192298077, "loss": 0.5495, "step": 928 }, { "epoch": 0.06888114480610959, "grad_norm": 0.40809357166290283, "learning_rate": 0.00019075768942235558, "loss": 0.6174, "step": 929 }, { "epoch": 0.06895529027952843, "grad_norm": 0.4525878131389618, "learning_rate": 0.00019074768692173044, "loss": 0.595, "step": 930 }, { "epoch": 0.06902943575294729, "grad_norm": 0.41514071822166443, "learning_rate": 0.00019073768442110528, "loss": 0.5701, "step": 931 }, { "epoch": 0.06910358122636613, "grad_norm": 0.4157116115093231, "learning_rate": 0.00019072768192048014, "loss": 0.5699, "step": 932 }, { "epoch": 0.06917772669978497, "grad_norm": 0.3949311077594757, "learning_rate": 0.00019071767941985495, "loss": 0.5623, "step": 933 }, { "epoch": 0.06925187217320383, "grad_norm": 0.49557116627693176, "learning_rate": 0.00019070767691922982, "loss": 0.57, "step": 934 }, { "epoch": 0.06932601764662268, "grad_norm": 0.4115126430988312, "learning_rate": 0.00019069767441860466, "loss": 0.5748, "step": 935 }, { "epoch": 0.06940016312004152, "grad_norm": 0.41264867782592773, "learning_rate": 0.0001906876719179795, "loss": 0.5109, "step": 936 }, { "epoch": 0.06947430859346036, "grad_norm": 0.3665938079357147, "learning_rate": 0.00019067766941735433, "loss": 0.5712, "step": 937 }, { "epoch": 0.06954845406687922, "grad_norm": 0.41107264161109924, "learning_rate": 0.0001906676669167292, "loss": 0.5614, "step": 938 }, { "epoch": 0.06962259954029806, "grad_norm": 0.4660252332687378, "learning_rate": 0.00019065766441610403, "loss": 0.6148, "step": 939 }, { "epoch": 0.06969674501371691, "grad_norm": 0.43861186504364014, "learning_rate": 0.00019064766191547887, "loss": 0.6157, "step": 940 }, { "epoch": 0.06977089048713576, "grad_norm": 0.4101986885070801, "learning_rate": 0.00019063765941485373, "loss": 0.546, "step": 941 }, { "epoch": 0.06984503596055461, "grad_norm": 0.40166762471199036, "learning_rate": 0.00019062765691422857, "loss": 0.5891, "step": 942 }, { "epoch": 0.06991918143397345, "grad_norm": 0.45155513286590576, "learning_rate": 0.0001906176544136034, "loss": 0.5637, "step": 943 }, { "epoch": 0.06999332690739231, "grad_norm": 0.4644082486629486, "learning_rate": 0.00019060765191297824, "loss": 0.5961, "step": 944 }, { "epoch": 0.07006747238081115, "grad_norm": 0.37560588121414185, "learning_rate": 0.0001905976494123531, "loss": 0.6041, "step": 945 }, { "epoch": 0.07014161785423, "grad_norm": 0.39714404940605164, "learning_rate": 0.00019058764691172795, "loss": 0.5831, "step": 946 }, { "epoch": 0.07021576332764885, "grad_norm": 0.44821277260780334, "learning_rate": 0.0001905776444111028, "loss": 0.6006, "step": 947 }, { "epoch": 0.0702899088010677, "grad_norm": 0.3865433633327484, "learning_rate": 0.00019056764191047762, "loss": 0.5621, "step": 948 }, { "epoch": 0.07036405427448654, "grad_norm": 0.40650057792663574, "learning_rate": 0.00019055763940985248, "loss": 0.5882, "step": 949 }, { "epoch": 0.0704381997479054, "grad_norm": 0.3808147609233856, "learning_rate": 0.00019054763690922732, "loss": 0.5655, "step": 950 }, { "epoch": 0.07051234522132424, "grad_norm": 0.355453759431839, "learning_rate": 0.00019053763440860216, "loss": 0.5318, "step": 951 }, { "epoch": 0.07058649069474308, "grad_norm": 0.3942928612232208, "learning_rate": 0.000190527631907977, "loss": 0.5966, "step": 952 }, { "epoch": 0.07066063616816193, "grad_norm": 0.37817996740341187, "learning_rate": 0.00019051762940735186, "loss": 0.5526, "step": 953 }, { "epoch": 0.07073478164158079, "grad_norm": 0.4173699915409088, "learning_rate": 0.0001905076269067267, "loss": 0.6034, "step": 954 }, { "epoch": 0.07080892711499963, "grad_norm": 0.44551777839660645, "learning_rate": 0.00019049762440610153, "loss": 0.6029, "step": 955 }, { "epoch": 0.07088307258841847, "grad_norm": 0.37072262167930603, "learning_rate": 0.00019048762190547637, "loss": 0.5418, "step": 956 }, { "epoch": 0.07095721806183733, "grad_norm": 0.4470019042491913, "learning_rate": 0.00019047761940485123, "loss": 0.5891, "step": 957 }, { "epoch": 0.07103136353525617, "grad_norm": 0.526253342628479, "learning_rate": 0.00019046761690422607, "loss": 0.5437, "step": 958 }, { "epoch": 0.07110550900867502, "grad_norm": 0.3777520954608917, "learning_rate": 0.0001904576144036009, "loss": 0.5787, "step": 959 }, { "epoch": 0.07117965448209387, "grad_norm": 0.3762562572956085, "learning_rate": 0.00019044761190297575, "loss": 0.5913, "step": 960 }, { "epoch": 0.07125379995551272, "grad_norm": 0.45989665389060974, "learning_rate": 0.0001904376094023506, "loss": 0.6245, "step": 961 }, { "epoch": 0.07132794542893156, "grad_norm": 0.4293542504310608, "learning_rate": 0.00019042760690172542, "loss": 0.5663, "step": 962 }, { "epoch": 0.07140209090235042, "grad_norm": 0.3906775116920471, "learning_rate": 0.00019041760440110028, "loss": 0.5831, "step": 963 }, { "epoch": 0.07147623637576926, "grad_norm": 0.3945332169532776, "learning_rate": 0.00019040760190047512, "loss": 0.6051, "step": 964 }, { "epoch": 0.0715503818491881, "grad_norm": 0.402756005525589, "learning_rate": 0.00019039759939984999, "loss": 0.6313, "step": 965 }, { "epoch": 0.07162452732260695, "grad_norm": 0.3569755256175995, "learning_rate": 0.0001903875968992248, "loss": 0.5652, "step": 966 }, { "epoch": 0.0716986727960258, "grad_norm": 0.3632189929485321, "learning_rate": 0.00019037759439859966, "loss": 0.5477, "step": 967 }, { "epoch": 0.07177281826944465, "grad_norm": 0.3816775381565094, "learning_rate": 0.0001903675918979745, "loss": 0.5737, "step": 968 }, { "epoch": 0.07184696374286349, "grad_norm": 0.3429560959339142, "learning_rate": 0.00019035758939734936, "loss": 0.5219, "step": 969 }, { "epoch": 0.07192110921628235, "grad_norm": 0.3472171425819397, "learning_rate": 0.00019034758689672417, "loss": 0.5677, "step": 970 }, { "epoch": 0.0719952546897012, "grad_norm": 0.4168139398097992, "learning_rate": 0.00019033758439609904, "loss": 0.5674, "step": 971 }, { "epoch": 0.07206940016312004, "grad_norm": 0.36297479271888733, "learning_rate": 0.00019032758189547387, "loss": 0.5382, "step": 972 }, { "epoch": 0.0721435456365389, "grad_norm": 0.41933533549308777, "learning_rate": 0.0001903175793948487, "loss": 0.6426, "step": 973 }, { "epoch": 0.07221769110995774, "grad_norm": 0.384814977645874, "learning_rate": 0.00019030757689422357, "loss": 0.6093, "step": 974 }, { "epoch": 0.07229183658337658, "grad_norm": 0.41343939304351807, "learning_rate": 0.0001902975743935984, "loss": 0.5662, "step": 975 }, { "epoch": 0.07236598205679544, "grad_norm": 0.38459399342536926, "learning_rate": 0.00019028757189297325, "loss": 0.5604, "step": 976 }, { "epoch": 0.07244012753021428, "grad_norm": 0.4143699109554291, "learning_rate": 0.00019027756939234808, "loss": 0.6003, "step": 977 }, { "epoch": 0.07251427300363313, "grad_norm": 0.45354387164115906, "learning_rate": 0.00019026756689172295, "loss": 0.6009, "step": 978 }, { "epoch": 0.07258841847705197, "grad_norm": 0.44543492794036865, "learning_rate": 0.00019025756439109779, "loss": 0.5514, "step": 979 }, { "epoch": 0.07266256395047083, "grad_norm": 0.38826486468315125, "learning_rate": 0.00019024756189047262, "loss": 0.5782, "step": 980 }, { "epoch": 0.07273670942388967, "grad_norm": 0.38497403264045715, "learning_rate": 0.00019023755938984746, "loss": 0.5787, "step": 981 }, { "epoch": 0.07281085489730851, "grad_norm": 0.4145567715167999, "learning_rate": 0.00019022755688922232, "loss": 0.5723, "step": 982 }, { "epoch": 0.07288500037072737, "grad_norm": 0.418400377035141, "learning_rate": 0.00019021755438859716, "loss": 0.5725, "step": 983 }, { "epoch": 0.07295914584414621, "grad_norm": 0.380609393119812, "learning_rate": 0.00019020755188797203, "loss": 0.6157, "step": 984 }, { "epoch": 0.07303329131756506, "grad_norm": 0.38000836968421936, "learning_rate": 0.00019019754938734684, "loss": 0.5606, "step": 985 }, { "epoch": 0.07310743679098392, "grad_norm": 0.4005124270915985, "learning_rate": 0.0001901875468867217, "loss": 0.5597, "step": 986 }, { "epoch": 0.07318158226440276, "grad_norm": 0.3773104250431061, "learning_rate": 0.00019017754438609654, "loss": 0.53, "step": 987 }, { "epoch": 0.0732557277378216, "grad_norm": 0.43270203471183777, "learning_rate": 0.00019016754188547137, "loss": 0.593, "step": 988 }, { "epoch": 0.07332987321124046, "grad_norm": 0.3683762848377228, "learning_rate": 0.0001901575393848462, "loss": 0.585, "step": 989 }, { "epoch": 0.0734040186846593, "grad_norm": 0.4079657793045044, "learning_rate": 0.00019014753688422108, "loss": 0.5922, "step": 990 }, { "epoch": 0.07347816415807815, "grad_norm": 0.3925129175186157, "learning_rate": 0.0001901375343835959, "loss": 0.5914, "step": 991 }, { "epoch": 0.073552309631497, "grad_norm": 0.4134807586669922, "learning_rate": 0.00019012753188297075, "loss": 0.5597, "step": 992 }, { "epoch": 0.07362645510491585, "grad_norm": 0.39071568846702576, "learning_rate": 0.0001901175293823456, "loss": 0.6199, "step": 993 }, { "epoch": 0.07370060057833469, "grad_norm": 0.4072871804237366, "learning_rate": 0.00019010752688172045, "loss": 0.5711, "step": 994 }, { "epoch": 0.07377474605175353, "grad_norm": 0.3875932991504669, "learning_rate": 0.0001900975243810953, "loss": 0.608, "step": 995 }, { "epoch": 0.07384889152517239, "grad_norm": 0.40085339546203613, "learning_rate": 0.00019008752188047012, "loss": 0.5516, "step": 996 }, { "epoch": 0.07392303699859124, "grad_norm": 0.34750810265541077, "learning_rate": 0.00019007751937984496, "loss": 0.5596, "step": 997 }, { "epoch": 0.07399718247201008, "grad_norm": 0.3941650688648224, "learning_rate": 0.00019006751687921983, "loss": 0.5757, "step": 998 }, { "epoch": 0.07407132794542894, "grad_norm": 0.37261834740638733, "learning_rate": 0.00019005751437859464, "loss": 0.5178, "step": 999 }, { "epoch": 0.07414547341884778, "grad_norm": 0.3751106560230255, "learning_rate": 0.0001900475118779695, "loss": 0.5638, "step": 1000 }, { "epoch": 0.07421961889226662, "grad_norm": 0.37150874733924866, "learning_rate": 0.00019003750937734434, "loss": 0.5302, "step": 1001 }, { "epoch": 0.07429376436568548, "grad_norm": 0.42387425899505615, "learning_rate": 0.0001900275068767192, "loss": 0.6162, "step": 1002 }, { "epoch": 0.07436790983910432, "grad_norm": 0.4314328730106354, "learning_rate": 0.000190017504376094, "loss": 0.5659, "step": 1003 }, { "epoch": 0.07444205531252317, "grad_norm": 0.42653024196624756, "learning_rate": 0.00019000750187546888, "loss": 0.6331, "step": 1004 }, { "epoch": 0.07451620078594202, "grad_norm": 0.3598942458629608, "learning_rate": 0.0001899974993748437, "loss": 0.5823, "step": 1005 }, { "epoch": 0.07459034625936087, "grad_norm": 0.3752782642841339, "learning_rate": 0.00018998749687421858, "loss": 0.5834, "step": 1006 }, { "epoch": 0.07466449173277971, "grad_norm": 0.4069672226905823, "learning_rate": 0.00018997749437359341, "loss": 0.5731, "step": 1007 }, { "epoch": 0.07473863720619855, "grad_norm": 0.3918244242668152, "learning_rate": 0.00018996749187296825, "loss": 0.5849, "step": 1008 }, { "epoch": 0.07481278267961741, "grad_norm": 0.3833501935005188, "learning_rate": 0.0001899574893723431, "loss": 0.5923, "step": 1009 }, { "epoch": 0.07488692815303626, "grad_norm": 0.34118199348449707, "learning_rate": 0.00018994748687171793, "loss": 0.5321, "step": 1010 }, { "epoch": 0.0749610736264551, "grad_norm": 0.3498409688472748, "learning_rate": 0.0001899374843710928, "loss": 0.5375, "step": 1011 }, { "epoch": 0.07503521909987396, "grad_norm": 0.3572872579097748, "learning_rate": 0.00018992748187046763, "loss": 0.5406, "step": 1012 }, { "epoch": 0.0751093645732928, "grad_norm": 0.3752976059913635, "learning_rate": 0.00018991747936984246, "loss": 0.559, "step": 1013 }, { "epoch": 0.07518351004671164, "grad_norm": 0.38547348976135254, "learning_rate": 0.0001899074768692173, "loss": 0.5898, "step": 1014 }, { "epoch": 0.0752576555201305, "grad_norm": 0.4094976782798767, "learning_rate": 0.00018989747436859217, "loss": 0.596, "step": 1015 }, { "epoch": 0.07533180099354934, "grad_norm": 0.39736407995224, "learning_rate": 0.000189887471867967, "loss": 0.6021, "step": 1016 }, { "epoch": 0.07540594646696819, "grad_norm": 0.39651623368263245, "learning_rate": 0.00018987746936734187, "loss": 0.5254, "step": 1017 }, { "epoch": 0.07548009194038705, "grad_norm": 0.3723140060901642, "learning_rate": 0.00018986746686671668, "loss": 0.5812, "step": 1018 }, { "epoch": 0.07555423741380589, "grad_norm": 0.38207507133483887, "learning_rate": 0.00018985746436609154, "loss": 0.591, "step": 1019 }, { "epoch": 0.07562838288722473, "grad_norm": 0.37096917629241943, "learning_rate": 0.00018984746186546638, "loss": 0.5562, "step": 1020 }, { "epoch": 0.07570252836064359, "grad_norm": 0.3853181302547455, "learning_rate": 0.00018983745936484124, "loss": 0.6041, "step": 1021 }, { "epoch": 0.07577667383406243, "grad_norm": 0.41393449902534485, "learning_rate": 0.00018982745686421605, "loss": 0.6487, "step": 1022 }, { "epoch": 0.07585081930748128, "grad_norm": 0.3762393295764923, "learning_rate": 0.00018981745436359092, "loss": 0.5436, "step": 1023 }, { "epoch": 0.07592496478090012, "grad_norm": 0.38360196352005005, "learning_rate": 0.00018980745186296575, "loss": 0.5773, "step": 1024 }, { "epoch": 0.07599911025431898, "grad_norm": 0.36247754096984863, "learning_rate": 0.0001897974493623406, "loss": 0.566, "step": 1025 }, { "epoch": 0.07607325572773782, "grad_norm": 0.3866340219974518, "learning_rate": 0.00018978744686171543, "loss": 0.5683, "step": 1026 }, { "epoch": 0.07614740120115666, "grad_norm": 0.3882400393486023, "learning_rate": 0.0001897774443610903, "loss": 0.558, "step": 1027 }, { "epoch": 0.07622154667457552, "grad_norm": 0.3806648850440979, "learning_rate": 0.00018976744186046513, "loss": 0.5939, "step": 1028 }, { "epoch": 0.07629569214799437, "grad_norm": 0.41669762134552, "learning_rate": 0.00018975743935983997, "loss": 0.6945, "step": 1029 }, { "epoch": 0.07636983762141321, "grad_norm": 0.36687904596328735, "learning_rate": 0.0001897474368592148, "loss": 0.5997, "step": 1030 }, { "epoch": 0.07644398309483207, "grad_norm": 0.3635111153125763, "learning_rate": 0.00018973743435858967, "loss": 0.5314, "step": 1031 }, { "epoch": 0.07651812856825091, "grad_norm": 0.38127195835113525, "learning_rate": 0.0001897274318579645, "loss": 0.5996, "step": 1032 }, { "epoch": 0.07659227404166975, "grad_norm": 0.362022340297699, "learning_rate": 0.00018971742935733934, "loss": 0.5682, "step": 1033 }, { "epoch": 0.07666641951508861, "grad_norm": 0.37065404653549194, "learning_rate": 0.00018970742685671418, "loss": 0.5685, "step": 1034 }, { "epoch": 0.07674056498850745, "grad_norm": 0.3682611286640167, "learning_rate": 0.00018969742435608904, "loss": 0.5551, "step": 1035 }, { "epoch": 0.0768147104619263, "grad_norm": 0.39264747500419617, "learning_rate": 0.00018968742185546385, "loss": 0.5762, "step": 1036 }, { "epoch": 0.07688885593534514, "grad_norm": 0.37076687812805176, "learning_rate": 0.00018967741935483872, "loss": 0.5384, "step": 1037 }, { "epoch": 0.076963001408764, "grad_norm": 0.4082990884780884, "learning_rate": 0.00018966741685421355, "loss": 0.5675, "step": 1038 }, { "epoch": 0.07703714688218284, "grad_norm": 0.37958261370658875, "learning_rate": 0.00018965741435358842, "loss": 0.5912, "step": 1039 }, { "epoch": 0.07711129235560168, "grad_norm": 0.40461263060569763, "learning_rate": 0.00018964741185296326, "loss": 0.5831, "step": 1040 }, { "epoch": 0.07718543782902054, "grad_norm": 0.40641725063323975, "learning_rate": 0.0001896374093523381, "loss": 0.6399, "step": 1041 }, { "epoch": 0.07725958330243939, "grad_norm": 0.39310190081596375, "learning_rate": 0.00018962740685171293, "loss": 0.5929, "step": 1042 }, { "epoch": 0.07733372877585823, "grad_norm": 0.4116590619087219, "learning_rate": 0.0001896174043510878, "loss": 0.6685, "step": 1043 }, { "epoch": 0.07740787424927709, "grad_norm": 0.38834699988365173, "learning_rate": 0.00018960740185046263, "loss": 0.55, "step": 1044 }, { "epoch": 0.07748201972269593, "grad_norm": 0.366383820772171, "learning_rate": 0.00018959739934983747, "loss": 0.549, "step": 1045 }, { "epoch": 0.07755616519611477, "grad_norm": 0.3706623911857605, "learning_rate": 0.0001895873968492123, "loss": 0.5639, "step": 1046 }, { "epoch": 0.07763031066953363, "grad_norm": 0.34453365206718445, "learning_rate": 0.00018957739434858714, "loss": 0.5614, "step": 1047 }, { "epoch": 0.07770445614295247, "grad_norm": 0.3645910918712616, "learning_rate": 0.000189567391847962, "loss": 0.5869, "step": 1048 }, { "epoch": 0.07777860161637132, "grad_norm": 0.399275004863739, "learning_rate": 0.00018955738934733684, "loss": 0.5994, "step": 1049 }, { "epoch": 0.07785274708979018, "grad_norm": 0.375650554895401, "learning_rate": 0.0001895473868467117, "loss": 0.5801, "step": 1050 }, { "epoch": 0.07792689256320902, "grad_norm": 0.3651721179485321, "learning_rate": 0.00018953738434608652, "loss": 0.5572, "step": 1051 }, { "epoch": 0.07800103803662786, "grad_norm": 0.3899763226509094, "learning_rate": 0.00018952738184546138, "loss": 0.5785, "step": 1052 }, { "epoch": 0.0780751835100467, "grad_norm": 0.4016595184803009, "learning_rate": 0.00018951737934483622, "loss": 0.5278, "step": 1053 }, { "epoch": 0.07814932898346556, "grad_norm": 0.3821891248226166, "learning_rate": 0.00018950737684421108, "loss": 0.5922, "step": 1054 }, { "epoch": 0.0782234744568844, "grad_norm": 0.40426382422447205, "learning_rate": 0.0001894973743435859, "loss": 0.6779, "step": 1055 }, { "epoch": 0.07829761993030325, "grad_norm": 0.3872550427913666, "learning_rate": 0.00018948737184296076, "loss": 0.5668, "step": 1056 }, { "epoch": 0.07837176540372211, "grad_norm": 0.42344966530799866, "learning_rate": 0.0001894773693423356, "loss": 0.5938, "step": 1057 }, { "epoch": 0.07844591087714095, "grad_norm": 0.5394008755683899, "learning_rate": 0.00018946736684171046, "loss": 0.5429, "step": 1058 }, { "epoch": 0.0785200563505598, "grad_norm": 0.38449686765670776, "learning_rate": 0.00018945736434108527, "loss": 0.5391, "step": 1059 }, { "epoch": 0.07859420182397865, "grad_norm": 0.39341893792152405, "learning_rate": 0.00018944736184046013, "loss": 0.5463, "step": 1060 }, { "epoch": 0.0786683472973975, "grad_norm": 0.39712321758270264, "learning_rate": 0.00018943735933983497, "loss": 0.6099, "step": 1061 }, { "epoch": 0.07874249277081634, "grad_norm": 0.42164507508277893, "learning_rate": 0.0001894273568392098, "loss": 0.5832, "step": 1062 }, { "epoch": 0.0788166382442352, "grad_norm": 0.388313353061676, "learning_rate": 0.00018941735433858464, "loss": 0.5792, "step": 1063 }, { "epoch": 0.07889078371765404, "grad_norm": 0.3577899634838104, "learning_rate": 0.0001894073518379595, "loss": 0.5405, "step": 1064 }, { "epoch": 0.07896492919107288, "grad_norm": 0.4240328073501587, "learning_rate": 0.00018939734933733434, "loss": 0.6155, "step": 1065 }, { "epoch": 0.07903907466449173, "grad_norm": 0.4006487727165222, "learning_rate": 0.00018938734683670918, "loss": 0.5983, "step": 1066 }, { "epoch": 0.07911322013791058, "grad_norm": 0.3713149428367615, "learning_rate": 0.00018937734433608402, "loss": 0.5839, "step": 1067 }, { "epoch": 0.07918736561132943, "grad_norm": 0.45911404490470886, "learning_rate": 0.00018936734183545888, "loss": 0.5896, "step": 1068 }, { "epoch": 0.07926151108474827, "grad_norm": 0.417202353477478, "learning_rate": 0.00018935733933483372, "loss": 0.611, "step": 1069 }, { "epoch": 0.07933565655816713, "grad_norm": 0.38330790400505066, "learning_rate": 0.00018934733683420856, "loss": 0.6156, "step": 1070 }, { "epoch": 0.07940980203158597, "grad_norm": 0.3702802360057831, "learning_rate": 0.0001893373343335834, "loss": 0.546, "step": 1071 }, { "epoch": 0.07948394750500481, "grad_norm": 0.3397338390350342, "learning_rate": 0.00018932733183295826, "loss": 0.5116, "step": 1072 }, { "epoch": 0.07955809297842367, "grad_norm": 0.40408995747566223, "learning_rate": 0.00018931732933233307, "loss": 0.5826, "step": 1073 }, { "epoch": 0.07963223845184252, "grad_norm": 0.38514554500579834, "learning_rate": 0.00018930732683170793, "loss": 0.5946, "step": 1074 }, { "epoch": 0.07970638392526136, "grad_norm": 0.4068520665168762, "learning_rate": 0.00018929732433108277, "loss": 0.5501, "step": 1075 }, { "epoch": 0.07978052939868022, "grad_norm": 0.37064844369888306, "learning_rate": 0.00018928732183045763, "loss": 0.5642, "step": 1076 }, { "epoch": 0.07985467487209906, "grad_norm": 0.4214935302734375, "learning_rate": 0.00018927731932983247, "loss": 0.6433, "step": 1077 }, { "epoch": 0.0799288203455179, "grad_norm": 0.4059064984321594, "learning_rate": 0.0001892673168292073, "loss": 0.5851, "step": 1078 }, { "epoch": 0.08000296581893675, "grad_norm": 0.39194658398628235, "learning_rate": 0.00018925731432858215, "loss": 0.5546, "step": 1079 }, { "epoch": 0.0800771112923556, "grad_norm": 0.3618759512901306, "learning_rate": 0.000189247311827957, "loss": 0.59, "step": 1080 }, { "epoch": 0.08015125676577445, "grad_norm": 0.3868390619754791, "learning_rate": 0.00018923730932733185, "loss": 0.6255, "step": 1081 }, { "epoch": 0.08022540223919329, "grad_norm": 0.38565927743911743, "learning_rate": 0.00018922730682670668, "loss": 0.5931, "step": 1082 }, { "epoch": 0.08029954771261215, "grad_norm": 0.38890552520751953, "learning_rate": 0.00018921730432608155, "loss": 0.5938, "step": 1083 }, { "epoch": 0.08037369318603099, "grad_norm": 0.3789362609386444, "learning_rate": 0.00018920730182545636, "loss": 0.5309, "step": 1084 }, { "epoch": 0.08044783865944984, "grad_norm": 0.3668486773967743, "learning_rate": 0.00018919729932483122, "loss": 0.5657, "step": 1085 }, { "epoch": 0.08052198413286869, "grad_norm": 0.3914008140563965, "learning_rate": 0.00018918729682420606, "loss": 0.6221, "step": 1086 }, { "epoch": 0.08059612960628754, "grad_norm": 0.37056294083595276, "learning_rate": 0.00018917729432358092, "loss": 0.5528, "step": 1087 }, { "epoch": 0.08067027507970638, "grad_norm": 0.3837694823741913, "learning_rate": 0.00018916729182295573, "loss": 0.6032, "step": 1088 }, { "epoch": 0.08074442055312524, "grad_norm": 0.3586488664150238, "learning_rate": 0.0001891572893223306, "loss": 0.5825, "step": 1089 }, { "epoch": 0.08081856602654408, "grad_norm": 0.38153430819511414, "learning_rate": 0.00018914728682170543, "loss": 0.603, "step": 1090 }, { "epoch": 0.08089271149996292, "grad_norm": 0.40659254789352417, "learning_rate": 0.0001891372843210803, "loss": 0.5616, "step": 1091 }, { "epoch": 0.08096685697338178, "grad_norm": 0.39703789353370667, "learning_rate": 0.0001891272818204551, "loss": 0.6102, "step": 1092 }, { "epoch": 0.08104100244680063, "grad_norm": 0.3414754867553711, "learning_rate": 0.00018911727931982997, "loss": 0.509, "step": 1093 }, { "epoch": 0.08111514792021947, "grad_norm": 0.37710636854171753, "learning_rate": 0.0001891072768192048, "loss": 0.5627, "step": 1094 }, { "epoch": 0.08118929339363831, "grad_norm": 0.4102470576763153, "learning_rate": 0.00018909727431857967, "loss": 0.5938, "step": 1095 }, { "epoch": 0.08126343886705717, "grad_norm": 0.38152655959129333, "learning_rate": 0.00018908727181795448, "loss": 0.5557, "step": 1096 }, { "epoch": 0.08133758434047601, "grad_norm": 0.3700115978717804, "learning_rate": 0.00018907726931732935, "loss": 0.5631, "step": 1097 }, { "epoch": 0.08141172981389486, "grad_norm": 0.3639596104621887, "learning_rate": 0.00018906726681670419, "loss": 0.5217, "step": 1098 }, { "epoch": 0.08148587528731371, "grad_norm": 0.37774497270584106, "learning_rate": 0.00018905726431607902, "loss": 0.5581, "step": 1099 }, { "epoch": 0.08156002076073256, "grad_norm": 0.3977414667606354, "learning_rate": 0.00018904726181545386, "loss": 0.613, "step": 1100 }, { "epoch": 0.0816341662341514, "grad_norm": 0.3688589930534363, "learning_rate": 0.00018903725931482872, "loss": 0.5465, "step": 1101 }, { "epoch": 0.08170831170757026, "grad_norm": 0.39510318636894226, "learning_rate": 0.00018902725681420356, "loss": 0.592, "step": 1102 }, { "epoch": 0.0817824571809891, "grad_norm": 0.37958401441574097, "learning_rate": 0.0001890172543135784, "loss": 0.5556, "step": 1103 }, { "epoch": 0.08185660265440795, "grad_norm": 0.38963478803634644, "learning_rate": 0.00018900725181295324, "loss": 0.5671, "step": 1104 }, { "epoch": 0.0819307481278268, "grad_norm": 0.36553865671157837, "learning_rate": 0.0001889972493123281, "loss": 0.5744, "step": 1105 }, { "epoch": 0.08200489360124565, "grad_norm": 0.42809078097343445, "learning_rate": 0.00018898724681170294, "loss": 0.5983, "step": 1106 }, { "epoch": 0.08207903907466449, "grad_norm": 0.3870912492275238, "learning_rate": 0.00018897724431107777, "loss": 0.5931, "step": 1107 }, { "epoch": 0.08215318454808333, "grad_norm": 0.38330307602882385, "learning_rate": 0.0001889672418104526, "loss": 0.5671, "step": 1108 }, { "epoch": 0.08222733002150219, "grad_norm": 0.37595921754837036, "learning_rate": 0.00018895723930982748, "loss": 0.5742, "step": 1109 }, { "epoch": 0.08230147549492103, "grad_norm": 0.38556089997291565, "learning_rate": 0.0001889472368092023, "loss": 0.5662, "step": 1110 }, { "epoch": 0.08237562096833988, "grad_norm": 0.4157800078392029, "learning_rate": 0.00018893723430857715, "loss": 0.5874, "step": 1111 }, { "epoch": 0.08244976644175873, "grad_norm": 0.4163574278354645, "learning_rate": 0.00018892723180795199, "loss": 0.5428, "step": 1112 }, { "epoch": 0.08252391191517758, "grad_norm": 0.36318114399909973, "learning_rate": 0.00018891722930732685, "loss": 0.5533, "step": 1113 }, { "epoch": 0.08259805738859642, "grad_norm": 0.3573673665523529, "learning_rate": 0.0001889072268067017, "loss": 0.5506, "step": 1114 }, { "epoch": 0.08267220286201528, "grad_norm": 0.42922478914260864, "learning_rate": 0.00018889722430607652, "loss": 0.5704, "step": 1115 }, { "epoch": 0.08274634833543412, "grad_norm": 0.40331193804740906, "learning_rate": 0.0001888872218054514, "loss": 0.6036, "step": 1116 }, { "epoch": 0.08282049380885297, "grad_norm": 0.40913406014442444, "learning_rate": 0.00018887721930482623, "loss": 0.5973, "step": 1117 }, { "epoch": 0.08289463928227182, "grad_norm": 0.4456445872783661, "learning_rate": 0.00018886721680420106, "loss": 0.5851, "step": 1118 }, { "epoch": 0.08296878475569067, "grad_norm": 0.40168479084968567, "learning_rate": 0.0001888572143035759, "loss": 0.5313, "step": 1119 }, { "epoch": 0.08304293022910951, "grad_norm": 0.38708773255348206, "learning_rate": 0.00018884721180295076, "loss": 0.5687, "step": 1120 }, { "epoch": 0.08311707570252837, "grad_norm": 0.37819501757621765, "learning_rate": 0.00018883720930232557, "loss": 0.5524, "step": 1121 }, { "epoch": 0.08319122117594721, "grad_norm": 0.3635140061378479, "learning_rate": 0.00018882720680170044, "loss": 0.541, "step": 1122 }, { "epoch": 0.08326536664936605, "grad_norm": 0.4056885540485382, "learning_rate": 0.00018881720430107528, "loss": 0.5973, "step": 1123 }, { "epoch": 0.0833395121227849, "grad_norm": 0.39061787724494934, "learning_rate": 0.00018880720180045014, "loss": 0.5913, "step": 1124 }, { "epoch": 0.08341365759620376, "grad_norm": 0.3710435628890991, "learning_rate": 0.00018879719929982495, "loss": 0.553, "step": 1125 }, { "epoch": 0.0834878030696226, "grad_norm": 0.33881571888923645, "learning_rate": 0.00018878719679919981, "loss": 0.53, "step": 1126 }, { "epoch": 0.08356194854304144, "grad_norm": 0.38529112935066223, "learning_rate": 0.00018877719429857465, "loss": 0.611, "step": 1127 }, { "epoch": 0.0836360940164603, "grad_norm": 0.45561516284942627, "learning_rate": 0.00018876719179794952, "loss": 0.5308, "step": 1128 }, { "epoch": 0.08371023948987914, "grad_norm": 0.3695347309112549, "learning_rate": 0.00018875718929732433, "loss": 0.5401, "step": 1129 }, { "epoch": 0.08378438496329799, "grad_norm": 0.38775137066841125, "learning_rate": 0.0001887471867966992, "loss": 0.5787, "step": 1130 }, { "epoch": 0.08385853043671684, "grad_norm": 0.3706720471382141, "learning_rate": 0.00018873718429607403, "loss": 0.5693, "step": 1131 }, { "epoch": 0.08393267591013569, "grad_norm": 0.3993111550807953, "learning_rate": 0.0001887271817954489, "loss": 0.6174, "step": 1132 }, { "epoch": 0.08400682138355453, "grad_norm": 0.3878619968891144, "learning_rate": 0.0001887171792948237, "loss": 0.5946, "step": 1133 }, { "epoch": 0.08408096685697339, "grad_norm": 0.3810214400291443, "learning_rate": 0.00018870717679419856, "loss": 0.5601, "step": 1134 }, { "epoch": 0.08415511233039223, "grad_norm": 0.36274245381355286, "learning_rate": 0.0001886971742935734, "loss": 0.5768, "step": 1135 }, { "epoch": 0.08422925780381108, "grad_norm": 0.35710301995277405, "learning_rate": 0.00018868717179294824, "loss": 0.5569, "step": 1136 }, { "epoch": 0.08430340327722992, "grad_norm": 0.40464359521865845, "learning_rate": 0.00018867716929232308, "loss": 0.5785, "step": 1137 }, { "epoch": 0.08437754875064878, "grad_norm": 0.3816587030887604, "learning_rate": 0.00018866716679169794, "loss": 0.5424, "step": 1138 }, { "epoch": 0.08445169422406762, "grad_norm": 0.36191049218177795, "learning_rate": 0.00018865716429107278, "loss": 0.5126, "step": 1139 }, { "epoch": 0.08452583969748646, "grad_norm": 0.36759573221206665, "learning_rate": 0.00018864716179044761, "loss": 0.5771, "step": 1140 }, { "epoch": 0.08459998517090532, "grad_norm": 0.3769162893295288, "learning_rate": 0.00018863715928982245, "loss": 0.5998, "step": 1141 }, { "epoch": 0.08467413064432416, "grad_norm": 0.4057658016681671, "learning_rate": 0.00018862715678919732, "loss": 0.5654, "step": 1142 }, { "epoch": 0.08474827611774301, "grad_norm": 0.4010295867919922, "learning_rate": 0.00018861715428857215, "loss": 0.5912, "step": 1143 }, { "epoch": 0.08482242159116186, "grad_norm": 0.36862537264823914, "learning_rate": 0.000188607151787947, "loss": 0.5696, "step": 1144 }, { "epoch": 0.08489656706458071, "grad_norm": 0.3855178654193878, "learning_rate": 0.00018859714928732183, "loss": 0.5724, "step": 1145 }, { "epoch": 0.08497071253799955, "grad_norm": 0.372541606426239, "learning_rate": 0.0001885871467866967, "loss": 0.5959, "step": 1146 }, { "epoch": 0.08504485801141841, "grad_norm": 0.3672063648700714, "learning_rate": 0.00018857714428607153, "loss": 0.5513, "step": 1147 }, { "epoch": 0.08511900348483725, "grad_norm": 0.34718647599220276, "learning_rate": 0.00018856714178544637, "loss": 0.5201, "step": 1148 }, { "epoch": 0.0851931489582561, "grad_norm": 0.3754417598247528, "learning_rate": 0.0001885571392848212, "loss": 0.5831, "step": 1149 }, { "epoch": 0.08526729443167495, "grad_norm": 0.3526322543621063, "learning_rate": 0.00018854713678419607, "loss": 0.5256, "step": 1150 }, { "epoch": 0.0853414399050938, "grad_norm": 0.40268176794052124, "learning_rate": 0.0001885371342835709, "loss": 0.5799, "step": 1151 }, { "epoch": 0.08541558537851264, "grad_norm": 0.4093998372554779, "learning_rate": 0.00018852713178294574, "loss": 0.6064, "step": 1152 }, { "epoch": 0.08548973085193148, "grad_norm": 0.37530773878097534, "learning_rate": 0.0001885171292823206, "loss": 0.5796, "step": 1153 }, { "epoch": 0.08556387632535034, "grad_norm": 0.39043092727661133, "learning_rate": 0.00018850712678169544, "loss": 0.5321, "step": 1154 }, { "epoch": 0.08563802179876918, "grad_norm": 0.395626962184906, "learning_rate": 0.00018849712428107028, "loss": 0.5616, "step": 1155 }, { "epoch": 0.08571216727218803, "grad_norm": 0.3778228461742401, "learning_rate": 0.00018848712178044512, "loss": 0.5776, "step": 1156 }, { "epoch": 0.08578631274560689, "grad_norm": 0.4435516595840454, "learning_rate": 0.00018847711927981998, "loss": 0.6078, "step": 1157 }, { "epoch": 0.08586045821902573, "grad_norm": 0.410016268491745, "learning_rate": 0.0001884671167791948, "loss": 0.608, "step": 1158 }, { "epoch": 0.08593460369244457, "grad_norm": 0.3641214370727539, "learning_rate": 0.00018845711427856965, "loss": 0.548, "step": 1159 }, { "epoch": 0.08600874916586343, "grad_norm": 0.36015310883522034, "learning_rate": 0.0001884471117779445, "loss": 0.5384, "step": 1160 }, { "epoch": 0.08608289463928227, "grad_norm": 0.39722713828086853, "learning_rate": 0.00018843710927731936, "loss": 0.5729, "step": 1161 }, { "epoch": 0.08615704011270112, "grad_norm": 0.3778017461299896, "learning_rate": 0.00018842710677669417, "loss": 0.5757, "step": 1162 }, { "epoch": 0.08623118558611997, "grad_norm": 0.40557861328125, "learning_rate": 0.00018841710427606903, "loss": 0.5057, "step": 1163 }, { "epoch": 0.08630533105953882, "grad_norm": 0.37460020184516907, "learning_rate": 0.00018840710177544387, "loss": 0.5842, "step": 1164 }, { "epoch": 0.08637947653295766, "grad_norm": 0.3792310953140259, "learning_rate": 0.00018839709927481873, "loss": 0.6019, "step": 1165 }, { "epoch": 0.0864536220063765, "grad_norm": 0.3798016607761383, "learning_rate": 0.00018838709677419354, "loss": 0.587, "step": 1166 }, { "epoch": 0.08652776747979536, "grad_norm": 0.38482287526130676, "learning_rate": 0.0001883770942735684, "loss": 0.6097, "step": 1167 }, { "epoch": 0.0866019129532142, "grad_norm": 0.37641990184783936, "learning_rate": 0.00018836709177294324, "loss": 0.5517, "step": 1168 }, { "epoch": 0.08667605842663305, "grad_norm": 0.3969367742538452, "learning_rate": 0.0001883570892723181, "loss": 0.5247, "step": 1169 }, { "epoch": 0.0867502039000519, "grad_norm": 0.4153239130973816, "learning_rate": 0.00018834708677169292, "loss": 0.6052, "step": 1170 }, { "epoch": 0.08682434937347075, "grad_norm": 0.39450231194496155, "learning_rate": 0.00018833708427106778, "loss": 0.5776, "step": 1171 }, { "epoch": 0.08689849484688959, "grad_norm": 0.3893013894557953, "learning_rate": 0.00018832708177044262, "loss": 0.592, "step": 1172 }, { "epoch": 0.08697264032030845, "grad_norm": 0.37967750430107117, "learning_rate": 0.00018831707926981746, "loss": 0.5317, "step": 1173 }, { "epoch": 0.0870467857937273, "grad_norm": 0.38685131072998047, "learning_rate": 0.0001883070767691923, "loss": 0.5825, "step": 1174 }, { "epoch": 0.08712093126714614, "grad_norm": 0.37996843457221985, "learning_rate": 0.00018829707426856716, "loss": 0.5992, "step": 1175 }, { "epoch": 0.087195076740565, "grad_norm": 0.3414592742919922, "learning_rate": 0.000188287071767942, "loss": 0.5214, "step": 1176 }, { "epoch": 0.08726922221398384, "grad_norm": 0.3945949673652649, "learning_rate": 0.00018827706926731683, "loss": 0.5959, "step": 1177 }, { "epoch": 0.08734336768740268, "grad_norm": 0.36827903985977173, "learning_rate": 0.00018826706676669167, "loss": 0.5975, "step": 1178 }, { "epoch": 0.08741751316082153, "grad_norm": 0.3492521345615387, "learning_rate": 0.00018825706426606653, "loss": 0.57, "step": 1179 }, { "epoch": 0.08749165863424038, "grad_norm": 0.3843616247177124, "learning_rate": 0.00018824706176544137, "loss": 0.6082, "step": 1180 }, { "epoch": 0.08756580410765923, "grad_norm": 0.38644057512283325, "learning_rate": 0.0001882370592648162, "loss": 0.5746, "step": 1181 }, { "epoch": 0.08763994958107807, "grad_norm": 0.37552332878112793, "learning_rate": 0.00018822705676419104, "loss": 0.5359, "step": 1182 }, { "epoch": 0.08771409505449693, "grad_norm": 0.37861916422843933, "learning_rate": 0.0001882170542635659, "loss": 0.5437, "step": 1183 }, { "epoch": 0.08778824052791577, "grad_norm": 0.3685823976993561, "learning_rate": 0.00018820705176294074, "loss": 0.5675, "step": 1184 }, { "epoch": 0.08786238600133461, "grad_norm": 0.3774687647819519, "learning_rate": 0.00018819704926231558, "loss": 0.5452, "step": 1185 }, { "epoch": 0.08793653147475347, "grad_norm": 0.41439273953437805, "learning_rate": 0.00018818704676169045, "loss": 0.5555, "step": 1186 }, { "epoch": 0.08801067694817231, "grad_norm": 0.3831767737865448, "learning_rate": 0.00018817704426106528, "loss": 0.548, "step": 1187 }, { "epoch": 0.08808482242159116, "grad_norm": 0.4118499159812927, "learning_rate": 0.00018816704176044012, "loss": 0.548, "step": 1188 }, { "epoch": 0.08815896789501002, "grad_norm": 0.38040536642074585, "learning_rate": 0.00018815703925981496, "loss": 0.6018, "step": 1189 }, { "epoch": 0.08823311336842886, "grad_norm": 0.3394857943058014, "learning_rate": 0.00018814703675918982, "loss": 0.5237, "step": 1190 }, { "epoch": 0.0883072588418477, "grad_norm": 0.36812594532966614, "learning_rate": 0.00018813703425856466, "loss": 0.5979, "step": 1191 }, { "epoch": 0.08838140431526656, "grad_norm": 0.395608514547348, "learning_rate": 0.0001881270317579395, "loss": 0.587, "step": 1192 }, { "epoch": 0.0884555497886854, "grad_norm": 0.3655269742012024, "learning_rate": 0.00018811702925731433, "loss": 0.5535, "step": 1193 }, { "epoch": 0.08852969526210425, "grad_norm": 0.3640616834163666, "learning_rate": 0.0001881070267566892, "loss": 0.5393, "step": 1194 }, { "epoch": 0.08860384073552309, "grad_norm": 0.4080580472946167, "learning_rate": 0.00018809702425606403, "loss": 0.5971, "step": 1195 }, { "epoch": 0.08867798620894195, "grad_norm": 0.3984844982624054, "learning_rate": 0.00018808702175543887, "loss": 0.5902, "step": 1196 }, { "epoch": 0.08875213168236079, "grad_norm": 0.39033347368240356, "learning_rate": 0.0001880770192548137, "loss": 0.6011, "step": 1197 }, { "epoch": 0.08882627715577963, "grad_norm": 0.37383195757865906, "learning_rate": 0.00018806701675418857, "loss": 0.5577, "step": 1198 }, { "epoch": 0.08890042262919849, "grad_norm": 0.3741360008716583, "learning_rate": 0.00018805701425356338, "loss": 0.5683, "step": 1199 }, { "epoch": 0.08897456810261734, "grad_norm": 0.36032578349113464, "learning_rate": 0.00018804701175293825, "loss": 0.5968, "step": 1200 }, { "epoch": 0.08904871357603618, "grad_norm": 0.3818228542804718, "learning_rate": 0.00018803700925231308, "loss": 0.5479, "step": 1201 }, { "epoch": 0.08912285904945504, "grad_norm": 0.3707422614097595, "learning_rate": 0.00018802700675168795, "loss": 0.5496, "step": 1202 }, { "epoch": 0.08919700452287388, "grad_norm": 0.3618408441543579, "learning_rate": 0.00018801700425106276, "loss": 0.5358, "step": 1203 }, { "epoch": 0.08927114999629272, "grad_norm": 0.39610886573791504, "learning_rate": 0.00018800700175043762, "loss": 0.5999, "step": 1204 }, { "epoch": 0.08934529546971158, "grad_norm": 0.37920501828193665, "learning_rate": 0.00018799699924981246, "loss": 0.6102, "step": 1205 }, { "epoch": 0.08941944094313042, "grad_norm": 0.3782200217247009, "learning_rate": 0.00018798699674918732, "loss": 0.5444, "step": 1206 }, { "epoch": 0.08949358641654927, "grad_norm": 0.39756014943122864, "learning_rate": 0.00018797699424856213, "loss": 0.5876, "step": 1207 }, { "epoch": 0.08956773188996811, "grad_norm": 0.377819299697876, "learning_rate": 0.000187966991747937, "loss": 0.5927, "step": 1208 }, { "epoch": 0.08964187736338697, "grad_norm": 0.4101908802986145, "learning_rate": 0.00018795698924731183, "loss": 0.5307, "step": 1209 }, { "epoch": 0.08971602283680581, "grad_norm": 0.3592652976512909, "learning_rate": 0.00018794698674668667, "loss": 0.5688, "step": 1210 }, { "epoch": 0.08979016831022466, "grad_norm": 0.41505226492881775, "learning_rate": 0.0001879369842460615, "loss": 0.6366, "step": 1211 }, { "epoch": 0.08986431378364351, "grad_norm": 0.40735960006713867, "learning_rate": 0.00018792698174543637, "loss": 0.5652, "step": 1212 }, { "epoch": 0.08993845925706236, "grad_norm": 0.38273608684539795, "learning_rate": 0.0001879169792448112, "loss": 0.5841, "step": 1213 }, { "epoch": 0.0900126047304812, "grad_norm": 0.3968263566493988, "learning_rate": 0.00018790697674418605, "loss": 0.5585, "step": 1214 }, { "epoch": 0.09008675020390006, "grad_norm": 0.38389119505882263, "learning_rate": 0.00018789697424356088, "loss": 0.5696, "step": 1215 }, { "epoch": 0.0901608956773189, "grad_norm": 0.37127387523651123, "learning_rate": 0.00018788697174293575, "loss": 0.5269, "step": 1216 }, { "epoch": 0.09023504115073774, "grad_norm": 0.38106659054756165, "learning_rate": 0.00018787696924231059, "loss": 0.5576, "step": 1217 }, { "epoch": 0.0903091866241566, "grad_norm": 0.3794125020503998, "learning_rate": 0.00018786696674168542, "loss": 0.5577, "step": 1218 }, { "epoch": 0.09038333209757544, "grad_norm": 0.38720080256462097, "learning_rate": 0.0001878569642410603, "loss": 0.5637, "step": 1219 }, { "epoch": 0.09045747757099429, "grad_norm": 0.36571913957595825, "learning_rate": 0.00018784696174043512, "loss": 0.5651, "step": 1220 }, { "epoch": 0.09053162304441315, "grad_norm": 0.381017804145813, "learning_rate": 0.00018783695923980996, "loss": 0.6106, "step": 1221 }, { "epoch": 0.09060576851783199, "grad_norm": 0.42560040950775146, "learning_rate": 0.0001878269567391848, "loss": 0.5948, "step": 1222 }, { "epoch": 0.09067991399125083, "grad_norm": 0.3889475166797638, "learning_rate": 0.00018781695423855966, "loss": 0.5697, "step": 1223 }, { "epoch": 0.09075405946466968, "grad_norm": 0.483655720949173, "learning_rate": 0.0001878069517379345, "loss": 0.6027, "step": 1224 }, { "epoch": 0.09082820493808853, "grad_norm": 0.45895689725875854, "learning_rate": 0.00018779694923730934, "loss": 0.5797, "step": 1225 }, { "epoch": 0.09090235041150738, "grad_norm": 0.3858874440193176, "learning_rate": 0.00018778694673668417, "loss": 0.5634, "step": 1226 }, { "epoch": 0.09097649588492622, "grad_norm": 0.4587661325931549, "learning_rate": 0.00018777694423605904, "loss": 0.5618, "step": 1227 }, { "epoch": 0.09105064135834508, "grad_norm": 0.43541058897972107, "learning_rate": 0.00018776694173543387, "loss": 0.594, "step": 1228 }, { "epoch": 0.09112478683176392, "grad_norm": 0.40857282280921936, "learning_rate": 0.0001877569392348087, "loss": 0.5811, "step": 1229 }, { "epoch": 0.09119893230518276, "grad_norm": 0.42328011989593506, "learning_rate": 0.00018774693673418355, "loss": 0.5977, "step": 1230 }, { "epoch": 0.09127307777860162, "grad_norm": 0.48312780261039734, "learning_rate": 0.0001877369342335584, "loss": 0.6093, "step": 1231 }, { "epoch": 0.09134722325202047, "grad_norm": 0.40111443400382996, "learning_rate": 0.00018772693173293325, "loss": 0.6143, "step": 1232 }, { "epoch": 0.09142136872543931, "grad_norm": 0.40715527534484863, "learning_rate": 0.0001877169292323081, "loss": 0.5927, "step": 1233 }, { "epoch": 0.09149551419885817, "grad_norm": 0.389335036277771, "learning_rate": 0.00018770692673168292, "loss": 0.5681, "step": 1234 }, { "epoch": 0.09156965967227701, "grad_norm": 0.4212806224822998, "learning_rate": 0.0001876969242310578, "loss": 0.5768, "step": 1235 }, { "epoch": 0.09164380514569585, "grad_norm": 0.38850048184394836, "learning_rate": 0.0001876869217304326, "loss": 0.5654, "step": 1236 }, { "epoch": 0.0917179506191147, "grad_norm": 0.38291820883750916, "learning_rate": 0.00018767691922980746, "loss": 0.59, "step": 1237 }, { "epoch": 0.09179209609253355, "grad_norm": 0.4100579023361206, "learning_rate": 0.0001876669167291823, "loss": 0.5502, "step": 1238 }, { "epoch": 0.0918662415659524, "grad_norm": 0.3826446831226349, "learning_rate": 0.00018765691422855716, "loss": 0.5294, "step": 1239 }, { "epoch": 0.09194038703937124, "grad_norm": 0.3589223325252533, "learning_rate": 0.00018764691172793197, "loss": 0.556, "step": 1240 }, { "epoch": 0.0920145325127901, "grad_norm": 0.3760088086128235, "learning_rate": 0.00018763690922730684, "loss": 0.588, "step": 1241 }, { "epoch": 0.09208867798620894, "grad_norm": 0.3411153256893158, "learning_rate": 0.00018762690672668168, "loss": 0.5331, "step": 1242 }, { "epoch": 0.09216282345962779, "grad_norm": 0.37880292534828186, "learning_rate": 0.00018761690422605654, "loss": 0.5797, "step": 1243 }, { "epoch": 0.09223696893304664, "grad_norm": 0.39564648270606995, "learning_rate": 0.00018760690172543135, "loss": 0.575, "step": 1244 }, { "epoch": 0.09231111440646549, "grad_norm": 0.3613705337047577, "learning_rate": 0.00018759689922480621, "loss": 0.5394, "step": 1245 }, { "epoch": 0.09238525987988433, "grad_norm": 0.3784105181694031, "learning_rate": 0.00018758689672418105, "loss": 0.5608, "step": 1246 }, { "epoch": 0.09245940535330319, "grad_norm": 0.3758923411369324, "learning_rate": 0.0001875768942235559, "loss": 0.5639, "step": 1247 }, { "epoch": 0.09253355082672203, "grad_norm": 0.38114985823631287, "learning_rate": 0.00018756689172293072, "loss": 0.5585, "step": 1248 }, { "epoch": 0.09260769630014087, "grad_norm": 0.38076144456863403, "learning_rate": 0.0001875568892223056, "loss": 0.6051, "step": 1249 }, { "epoch": 0.09268184177355973, "grad_norm": 0.37600794434547424, "learning_rate": 0.00018754688672168043, "loss": 0.5742, "step": 1250 }, { "epoch": 0.09275598724697857, "grad_norm": 0.3778863847255707, "learning_rate": 0.00018753688422105526, "loss": 0.5702, "step": 1251 }, { "epoch": 0.09283013272039742, "grad_norm": 0.3589608669281006, "learning_rate": 0.00018752688172043013, "loss": 0.5338, "step": 1252 }, { "epoch": 0.09290427819381626, "grad_norm": 0.37212666869163513, "learning_rate": 0.00018751687921980496, "loss": 0.5713, "step": 1253 }, { "epoch": 0.09297842366723512, "grad_norm": 0.3756145238876343, "learning_rate": 0.0001875068767191798, "loss": 0.5433, "step": 1254 }, { "epoch": 0.09305256914065396, "grad_norm": 0.3660539984703064, "learning_rate": 0.00018749687421855464, "loss": 0.5866, "step": 1255 }, { "epoch": 0.0931267146140728, "grad_norm": 0.3816901445388794, "learning_rate": 0.0001874868717179295, "loss": 0.5681, "step": 1256 }, { "epoch": 0.09320086008749166, "grad_norm": 0.401668518781662, "learning_rate": 0.00018747686921730434, "loss": 0.577, "step": 1257 }, { "epoch": 0.0932750055609105, "grad_norm": 0.36891475319862366, "learning_rate": 0.00018746686671667918, "loss": 0.5579, "step": 1258 }, { "epoch": 0.09334915103432935, "grad_norm": 0.4345835745334625, "learning_rate": 0.00018745686421605401, "loss": 0.6478, "step": 1259 }, { "epoch": 0.09342329650774821, "grad_norm": 0.4012323319911957, "learning_rate": 0.00018744686171542888, "loss": 0.6073, "step": 1260 }, { "epoch": 0.09349744198116705, "grad_norm": 0.375914067029953, "learning_rate": 0.00018743685921480372, "loss": 0.5802, "step": 1261 }, { "epoch": 0.0935715874545859, "grad_norm": 0.3952946364879608, "learning_rate": 0.00018742685671417855, "loss": 0.55, "step": 1262 }, { "epoch": 0.09364573292800475, "grad_norm": 0.40768691897392273, "learning_rate": 0.0001874168542135534, "loss": 0.5647, "step": 1263 }, { "epoch": 0.0937198784014236, "grad_norm": 0.37271612882614136, "learning_rate": 0.00018740685171292825, "loss": 0.5907, "step": 1264 }, { "epoch": 0.09379402387484244, "grad_norm": 0.37623342871665955, "learning_rate": 0.0001873968492123031, "loss": 0.5975, "step": 1265 }, { "epoch": 0.09386816934826128, "grad_norm": 0.34878483414649963, "learning_rate": 0.00018738684671167793, "loss": 0.5552, "step": 1266 }, { "epoch": 0.09394231482168014, "grad_norm": 0.3736770749092102, "learning_rate": 0.00018737684421105277, "loss": 0.5561, "step": 1267 }, { "epoch": 0.09401646029509898, "grad_norm": 0.42355141043663025, "learning_rate": 0.00018736684171042763, "loss": 0.6389, "step": 1268 }, { "epoch": 0.09409060576851783, "grad_norm": 0.35418227314949036, "learning_rate": 0.00018735683920980247, "loss": 0.5237, "step": 1269 }, { "epoch": 0.09416475124193668, "grad_norm": 0.38205280900001526, "learning_rate": 0.0001873468367091773, "loss": 0.5869, "step": 1270 }, { "epoch": 0.09423889671535553, "grad_norm": 0.38755789399147034, "learning_rate": 0.00018733683420855214, "loss": 0.6327, "step": 1271 }, { "epoch": 0.09431304218877437, "grad_norm": 0.3496604263782501, "learning_rate": 0.000187326831707927, "loss": 0.5595, "step": 1272 }, { "epoch": 0.09438718766219323, "grad_norm": 0.33956220746040344, "learning_rate": 0.00018731682920730181, "loss": 0.5429, "step": 1273 }, { "epoch": 0.09446133313561207, "grad_norm": 0.37482205033302307, "learning_rate": 0.00018730682670667668, "loss": 0.5514, "step": 1274 }, { "epoch": 0.09453547860903092, "grad_norm": 0.38844311237335205, "learning_rate": 0.00018729682420605152, "loss": 0.5416, "step": 1275 }, { "epoch": 0.09460962408244977, "grad_norm": 0.4500686824321747, "learning_rate": 0.00018728682170542638, "loss": 0.6283, "step": 1276 }, { "epoch": 0.09468376955586862, "grad_norm": 0.40047264099121094, "learning_rate": 0.0001872768192048012, "loss": 0.591, "step": 1277 }, { "epoch": 0.09475791502928746, "grad_norm": 0.3782615661621094, "learning_rate": 0.00018726681670417605, "loss": 0.6091, "step": 1278 }, { "epoch": 0.0948320605027063, "grad_norm": 0.38177061080932617, "learning_rate": 0.0001872568142035509, "loss": 0.5974, "step": 1279 }, { "epoch": 0.09490620597612516, "grad_norm": 0.3800857961177826, "learning_rate": 0.00018724681170292576, "loss": 0.6072, "step": 1280 }, { "epoch": 0.094980351449544, "grad_norm": 0.4103955328464508, "learning_rate": 0.00018723680920230057, "loss": 0.5761, "step": 1281 }, { "epoch": 0.09505449692296285, "grad_norm": 0.38492846488952637, "learning_rate": 0.00018722680670167543, "loss": 0.5784, "step": 1282 }, { "epoch": 0.0951286423963817, "grad_norm": 0.3844730854034424, "learning_rate": 0.00018721680420105027, "loss": 0.5606, "step": 1283 }, { "epoch": 0.09520278786980055, "grad_norm": 0.36227262020111084, "learning_rate": 0.0001872068017004251, "loss": 0.5759, "step": 1284 }, { "epoch": 0.09527693334321939, "grad_norm": 0.407626748085022, "learning_rate": 0.00018719679919979997, "loss": 0.6218, "step": 1285 }, { "epoch": 0.09535107881663825, "grad_norm": 0.4025302827358246, "learning_rate": 0.0001871867966991748, "loss": 0.6359, "step": 1286 }, { "epoch": 0.09542522429005709, "grad_norm": 0.37482234835624695, "learning_rate": 0.00018717679419854964, "loss": 0.5319, "step": 1287 }, { "epoch": 0.09549936976347594, "grad_norm": 0.3439026474952698, "learning_rate": 0.00018716679169792448, "loss": 0.5703, "step": 1288 }, { "epoch": 0.0955735152368948, "grad_norm": 0.44264668226242065, "learning_rate": 0.00018715678919729934, "loss": 0.5597, "step": 1289 }, { "epoch": 0.09564766071031364, "grad_norm": 0.41000130772590637, "learning_rate": 0.00018714678669667418, "loss": 0.6031, "step": 1290 }, { "epoch": 0.09572180618373248, "grad_norm": 0.40597212314605713, "learning_rate": 0.00018713678419604902, "loss": 0.5806, "step": 1291 }, { "epoch": 0.09579595165715134, "grad_norm": 0.3983173370361328, "learning_rate": 0.00018712678169542386, "loss": 0.5924, "step": 1292 }, { "epoch": 0.09587009713057018, "grad_norm": 0.3857348561286926, "learning_rate": 0.00018711677919479872, "loss": 0.5522, "step": 1293 }, { "epoch": 0.09594424260398902, "grad_norm": 0.4148540794849396, "learning_rate": 0.00018710677669417356, "loss": 0.5621, "step": 1294 }, { "epoch": 0.09601838807740787, "grad_norm": 0.39005836844444275, "learning_rate": 0.0001870967741935484, "loss": 0.5454, "step": 1295 }, { "epoch": 0.09609253355082673, "grad_norm": 0.39049455523490906, "learning_rate": 0.00018708677169292323, "loss": 0.5489, "step": 1296 }, { "epoch": 0.09616667902424557, "grad_norm": 0.40870481729507446, "learning_rate": 0.0001870767691922981, "loss": 0.5551, "step": 1297 }, { "epoch": 0.09624082449766441, "grad_norm": 0.4217332899570465, "learning_rate": 0.00018706676669167293, "loss": 0.6161, "step": 1298 }, { "epoch": 0.09631496997108327, "grad_norm": 0.376453161239624, "learning_rate": 0.00018705676419104777, "loss": 0.5603, "step": 1299 }, { "epoch": 0.09638911544450211, "grad_norm": 0.4023270905017853, "learning_rate": 0.0001870467616904226, "loss": 0.598, "step": 1300 }, { "epoch": 0.09646326091792096, "grad_norm": 0.3985259532928467, "learning_rate": 0.00018703675918979747, "loss": 0.6259, "step": 1301 }, { "epoch": 0.09653740639133981, "grad_norm": 0.37226760387420654, "learning_rate": 0.0001870267566891723, "loss": 0.5604, "step": 1302 }, { "epoch": 0.09661155186475866, "grad_norm": 0.3574097752571106, "learning_rate": 0.00018701675418854714, "loss": 0.5304, "step": 1303 }, { "epoch": 0.0966856973381775, "grad_norm": 0.43141454458236694, "learning_rate": 0.00018700675168792198, "loss": 0.584, "step": 1304 }, { "epoch": 0.09675984281159636, "grad_norm": 0.3903699815273285, "learning_rate": 0.00018699674918729685, "loss": 0.5812, "step": 1305 }, { "epoch": 0.0968339882850152, "grad_norm": 0.4147559702396393, "learning_rate": 0.00018698674668667168, "loss": 0.6074, "step": 1306 }, { "epoch": 0.09690813375843405, "grad_norm": 0.3748653531074524, "learning_rate": 0.00018697674418604652, "loss": 0.5633, "step": 1307 }, { "epoch": 0.09698227923185289, "grad_norm": 0.3900514841079712, "learning_rate": 0.00018696674168542136, "loss": 0.611, "step": 1308 }, { "epoch": 0.09705642470527175, "grad_norm": 0.39061638712882996, "learning_rate": 0.00018695673918479622, "loss": 0.6231, "step": 1309 }, { "epoch": 0.09713057017869059, "grad_norm": 0.4073624014854431, "learning_rate": 0.00018694673668417103, "loss": 0.5838, "step": 1310 }, { "epoch": 0.09720471565210943, "grad_norm": 0.36315682530403137, "learning_rate": 0.0001869367341835459, "loss": 0.5601, "step": 1311 }, { "epoch": 0.09727886112552829, "grad_norm": 0.37829166650772095, "learning_rate": 0.00018692673168292073, "loss": 0.5657, "step": 1312 }, { "epoch": 0.09735300659894713, "grad_norm": 0.3750380277633667, "learning_rate": 0.0001869167291822956, "loss": 0.5389, "step": 1313 }, { "epoch": 0.09742715207236598, "grad_norm": 0.3671341836452484, "learning_rate": 0.0001869067266816704, "loss": 0.5621, "step": 1314 }, { "epoch": 0.09750129754578483, "grad_norm": 0.3733050525188446, "learning_rate": 0.00018689672418104527, "loss": 0.5493, "step": 1315 }, { "epoch": 0.09757544301920368, "grad_norm": 0.3739110231399536, "learning_rate": 0.0001868867216804201, "loss": 0.5983, "step": 1316 }, { "epoch": 0.09764958849262252, "grad_norm": 0.4242646098136902, "learning_rate": 0.00018687671917979497, "loss": 0.5734, "step": 1317 }, { "epoch": 0.09772373396604138, "grad_norm": 0.3703160881996155, "learning_rate": 0.00018686671667916978, "loss": 0.5837, "step": 1318 }, { "epoch": 0.09779787943946022, "grad_norm": 0.3786211311817169, "learning_rate": 0.00018685671417854465, "loss": 0.5708, "step": 1319 }, { "epoch": 0.09787202491287907, "grad_norm": 0.3735358417034149, "learning_rate": 0.00018684671167791948, "loss": 0.5769, "step": 1320 }, { "epoch": 0.09794617038629792, "grad_norm": 0.3789082169532776, "learning_rate": 0.00018683670917729432, "loss": 0.5822, "step": 1321 }, { "epoch": 0.09802031585971677, "grad_norm": 0.35534486174583435, "learning_rate": 0.00018682670667666918, "loss": 0.5542, "step": 1322 }, { "epoch": 0.09809446133313561, "grad_norm": 0.3697262704372406, "learning_rate": 0.00018681670417604402, "loss": 0.555, "step": 1323 }, { "epoch": 0.09816860680655445, "grad_norm": 0.35856565833091736, "learning_rate": 0.00018680670167541886, "loss": 0.5321, "step": 1324 }, { "epoch": 0.09824275227997331, "grad_norm": 0.42962339520454407, "learning_rate": 0.0001867966991747937, "loss": 0.577, "step": 1325 }, { "epoch": 0.09831689775339215, "grad_norm": 0.3772558867931366, "learning_rate": 0.00018678669667416856, "loss": 0.5693, "step": 1326 }, { "epoch": 0.098391043226811, "grad_norm": 0.3988826870918274, "learning_rate": 0.0001867766941735434, "loss": 0.6064, "step": 1327 }, { "epoch": 0.09846518870022986, "grad_norm": 0.38224637508392334, "learning_rate": 0.00018676669167291826, "loss": 0.6217, "step": 1328 }, { "epoch": 0.0985393341736487, "grad_norm": 0.4032258987426758, "learning_rate": 0.00018675668917229307, "loss": 0.5807, "step": 1329 }, { "epoch": 0.09861347964706754, "grad_norm": 0.4619221091270447, "learning_rate": 0.00018674668667166794, "loss": 0.596, "step": 1330 }, { "epoch": 0.0986876251204864, "grad_norm": 0.4338846206665039, "learning_rate": 0.00018673668417104277, "loss": 0.6184, "step": 1331 }, { "epoch": 0.09876177059390524, "grad_norm": 0.3832836151123047, "learning_rate": 0.0001867266816704176, "loss": 0.605, "step": 1332 }, { "epoch": 0.09883591606732409, "grad_norm": 0.3810766041278839, "learning_rate": 0.00018671667916979245, "loss": 0.5286, "step": 1333 }, { "epoch": 0.09891006154074294, "grad_norm": 0.382315456867218, "learning_rate": 0.0001867066766691673, "loss": 0.5714, "step": 1334 }, { "epoch": 0.09898420701416179, "grad_norm": 0.362845242023468, "learning_rate": 0.00018669667416854215, "loss": 0.523, "step": 1335 }, { "epoch": 0.09905835248758063, "grad_norm": 0.4305019676685333, "learning_rate": 0.00018668667166791699, "loss": 0.5582, "step": 1336 }, { "epoch": 0.09913249796099947, "grad_norm": 0.40316706895828247, "learning_rate": 0.00018667666916729182, "loss": 0.6085, "step": 1337 }, { "epoch": 0.09920664343441833, "grad_norm": 0.35564491152763367, "learning_rate": 0.0001866666666666667, "loss": 0.5742, "step": 1338 }, { "epoch": 0.09928078890783718, "grad_norm": 0.3727628290653229, "learning_rate": 0.00018665666416604152, "loss": 0.5252, "step": 1339 }, { "epoch": 0.09935493438125602, "grad_norm": 0.41271474957466125, "learning_rate": 0.00018664666166541636, "loss": 0.5695, "step": 1340 }, { "epoch": 0.09942907985467488, "grad_norm": 0.4185779094696045, "learning_rate": 0.0001866366591647912, "loss": 0.6053, "step": 1341 }, { "epoch": 0.09950322532809372, "grad_norm": 0.3791866898536682, "learning_rate": 0.00018662665666416606, "loss": 0.5782, "step": 1342 }, { "epoch": 0.09957737080151256, "grad_norm": 0.38509345054626465, "learning_rate": 0.0001866166541635409, "loss": 0.6093, "step": 1343 }, { "epoch": 0.09965151627493142, "grad_norm": 0.3689505159854889, "learning_rate": 0.00018660665166291574, "loss": 0.5541, "step": 1344 }, { "epoch": 0.09972566174835026, "grad_norm": 0.4085945188999176, "learning_rate": 0.00018659664916229057, "loss": 0.5418, "step": 1345 }, { "epoch": 0.09979980722176911, "grad_norm": 0.3679603338241577, "learning_rate": 0.00018658664666166544, "loss": 0.5553, "step": 1346 }, { "epoch": 0.09987395269518796, "grad_norm": 0.4125443398952484, "learning_rate": 0.00018657664416104025, "loss": 0.5766, "step": 1347 }, { "epoch": 0.09994809816860681, "grad_norm": 0.34689217805862427, "learning_rate": 0.0001865666416604151, "loss": 0.5146, "step": 1348 }, { "epoch": 0.10002224364202565, "grad_norm": 0.37115633487701416, "learning_rate": 0.00018655663915978995, "loss": 0.5551, "step": 1349 }, { "epoch": 0.10009638911544451, "grad_norm": 0.3487500250339508, "learning_rate": 0.0001865466366591648, "loss": 0.5409, "step": 1350 }, { "epoch": 0.10017053458886335, "grad_norm": 0.3742349147796631, "learning_rate": 0.00018653663415853962, "loss": 0.5383, "step": 1351 }, { "epoch": 0.1002446800622822, "grad_norm": 0.37116575241088867, "learning_rate": 0.0001865266316579145, "loss": 0.564, "step": 1352 }, { "epoch": 0.10031882553570104, "grad_norm": 0.38556909561157227, "learning_rate": 0.00018651662915728932, "loss": 0.5931, "step": 1353 }, { "epoch": 0.1003929710091199, "grad_norm": 0.377008855342865, "learning_rate": 0.0001865066266566642, "loss": 0.5649, "step": 1354 }, { "epoch": 0.10046711648253874, "grad_norm": 0.38170841336250305, "learning_rate": 0.00018649662415603903, "loss": 0.5624, "step": 1355 }, { "epoch": 0.10054126195595758, "grad_norm": 0.3862817883491516, "learning_rate": 0.00018648662165541386, "loss": 0.5604, "step": 1356 }, { "epoch": 0.10061540742937644, "grad_norm": 0.4047718346118927, "learning_rate": 0.0001864766191547887, "loss": 0.5929, "step": 1357 }, { "epoch": 0.10068955290279528, "grad_norm": 0.4095413088798523, "learning_rate": 0.00018646661665416354, "loss": 0.5943, "step": 1358 }, { "epoch": 0.10076369837621413, "grad_norm": 0.41396450996398926, "learning_rate": 0.0001864566141535384, "loss": 0.5912, "step": 1359 }, { "epoch": 0.10083784384963299, "grad_norm": 0.39415550231933594, "learning_rate": 0.00018644661165291324, "loss": 0.5437, "step": 1360 }, { "epoch": 0.10091198932305183, "grad_norm": 0.388150691986084, "learning_rate": 0.0001864366091522881, "loss": 0.5726, "step": 1361 }, { "epoch": 0.10098613479647067, "grad_norm": 0.393710196018219, "learning_rate": 0.0001864266066516629, "loss": 0.6134, "step": 1362 }, { "epoch": 0.10106028026988953, "grad_norm": 0.3757755160331726, "learning_rate": 0.00018641660415103778, "loss": 0.5534, "step": 1363 }, { "epoch": 0.10113442574330837, "grad_norm": 0.361391544342041, "learning_rate": 0.0001864066016504126, "loss": 0.5642, "step": 1364 }, { "epoch": 0.10120857121672722, "grad_norm": 0.3890390694141388, "learning_rate": 0.00018639659914978748, "loss": 0.5823, "step": 1365 }, { "epoch": 0.10128271669014606, "grad_norm": 0.3962453007698059, "learning_rate": 0.0001863865966491623, "loss": 0.6217, "step": 1366 }, { "epoch": 0.10135686216356492, "grad_norm": 0.3766905665397644, "learning_rate": 0.00018637659414853715, "loss": 0.5643, "step": 1367 }, { "epoch": 0.10143100763698376, "grad_norm": 0.4076567590236664, "learning_rate": 0.000186366591647912, "loss": 0.5884, "step": 1368 }, { "epoch": 0.1015051531104026, "grad_norm": 0.3830561637878418, "learning_rate": 0.00018635658914728683, "loss": 0.5755, "step": 1369 }, { "epoch": 0.10157929858382146, "grad_norm": 0.38991013169288635, "learning_rate": 0.00018634658664666166, "loss": 0.5483, "step": 1370 }, { "epoch": 0.1016534440572403, "grad_norm": 0.4020402133464813, "learning_rate": 0.00018633658414603653, "loss": 0.6298, "step": 1371 }, { "epoch": 0.10172758953065915, "grad_norm": 0.3768400549888611, "learning_rate": 0.00018632658164541136, "loss": 0.5918, "step": 1372 }, { "epoch": 0.101801735004078, "grad_norm": 0.3665834665298462, "learning_rate": 0.0001863165791447862, "loss": 0.5726, "step": 1373 }, { "epoch": 0.10187588047749685, "grad_norm": 0.3681175410747528, "learning_rate": 0.00018630657664416104, "loss": 0.541, "step": 1374 }, { "epoch": 0.1019500259509157, "grad_norm": 0.3550202250480652, "learning_rate": 0.0001862965741435359, "loss": 0.5119, "step": 1375 }, { "epoch": 0.10202417142433455, "grad_norm": 0.3764946162700653, "learning_rate": 0.00018628657164291074, "loss": 0.6152, "step": 1376 }, { "epoch": 0.1020983168977534, "grad_norm": 0.35790687799453735, "learning_rate": 0.00018627656914228558, "loss": 0.5457, "step": 1377 }, { "epoch": 0.10217246237117224, "grad_norm": 0.3725239038467407, "learning_rate": 0.00018626656664166041, "loss": 0.5808, "step": 1378 }, { "epoch": 0.10224660784459108, "grad_norm": 0.369703084230423, "learning_rate": 0.00018625656414103528, "loss": 0.5165, "step": 1379 }, { "epoch": 0.10232075331800994, "grad_norm": 0.3745657801628113, "learning_rate": 0.00018624656164041012, "loss": 0.5908, "step": 1380 }, { "epoch": 0.10239489879142878, "grad_norm": 0.3839459717273712, "learning_rate": 0.00018623655913978495, "loss": 0.5341, "step": 1381 }, { "epoch": 0.10246904426484763, "grad_norm": 0.4100271165370941, "learning_rate": 0.0001862265566391598, "loss": 0.5973, "step": 1382 }, { "epoch": 0.10254318973826648, "grad_norm": 0.37953972816467285, "learning_rate": 0.00018621655413853465, "loss": 0.5996, "step": 1383 }, { "epoch": 0.10261733521168533, "grad_norm": 0.38117966055870056, "learning_rate": 0.00018620655163790946, "loss": 0.6145, "step": 1384 }, { "epoch": 0.10269148068510417, "grad_norm": 0.3729732036590576, "learning_rate": 0.00018619654913728433, "loss": 0.5903, "step": 1385 }, { "epoch": 0.10276562615852303, "grad_norm": 0.35651907324790955, "learning_rate": 0.00018618654663665916, "loss": 0.5408, "step": 1386 }, { "epoch": 0.10283977163194187, "grad_norm": 0.40367212891578674, "learning_rate": 0.00018617654413603403, "loss": 0.603, "step": 1387 }, { "epoch": 0.10291391710536071, "grad_norm": 0.3703070878982544, "learning_rate": 0.00018616654163540887, "loss": 0.5918, "step": 1388 }, { "epoch": 0.10298806257877957, "grad_norm": 0.3441908061504364, "learning_rate": 0.0001861565391347837, "loss": 0.5262, "step": 1389 }, { "epoch": 0.10306220805219841, "grad_norm": 0.4165975749492645, "learning_rate": 0.00018614653663415854, "loss": 0.5899, "step": 1390 }, { "epoch": 0.10313635352561726, "grad_norm": 0.4198061525821686, "learning_rate": 0.0001861365341335334, "loss": 0.5777, "step": 1391 }, { "epoch": 0.10321049899903612, "grad_norm": 0.37887078523635864, "learning_rate": 0.00018612653163290824, "loss": 0.5656, "step": 1392 }, { "epoch": 0.10328464447245496, "grad_norm": 0.3591947555541992, "learning_rate": 0.00018611652913228308, "loss": 0.5035, "step": 1393 }, { "epoch": 0.1033587899458738, "grad_norm": 0.36542338132858276, "learning_rate": 0.00018610652663165792, "loss": 0.5471, "step": 1394 }, { "epoch": 0.10343293541929265, "grad_norm": 0.35746902227401733, "learning_rate": 0.00018609652413103275, "loss": 0.5139, "step": 1395 }, { "epoch": 0.1035070808927115, "grad_norm": 0.4088047444820404, "learning_rate": 0.00018608652163040762, "loss": 0.6383, "step": 1396 }, { "epoch": 0.10358122636613035, "grad_norm": 0.3936910033226013, "learning_rate": 0.00018607651912978245, "loss": 0.5428, "step": 1397 }, { "epoch": 0.10365537183954919, "grad_norm": 0.3712942600250244, "learning_rate": 0.00018606651662915732, "loss": 0.5327, "step": 1398 }, { "epoch": 0.10372951731296805, "grad_norm": 0.3952820599079132, "learning_rate": 0.00018605651412853213, "loss": 0.5995, "step": 1399 }, { "epoch": 0.10380366278638689, "grad_norm": 0.4029887914657593, "learning_rate": 0.000186046511627907, "loss": 0.6003, "step": 1400 }, { "epoch": 0.10387780825980573, "grad_norm": 0.3689008355140686, "learning_rate": 0.00018603650912728183, "loss": 0.5385, "step": 1401 }, { "epoch": 0.10395195373322459, "grad_norm": 0.38548725843429565, "learning_rate": 0.0001860265066266567, "loss": 0.5939, "step": 1402 }, { "epoch": 0.10402609920664344, "grad_norm": 0.3547968566417694, "learning_rate": 0.0001860165041260315, "loss": 0.5419, "step": 1403 }, { "epoch": 0.10410024468006228, "grad_norm": 0.362112432718277, "learning_rate": 0.00018600650162540637, "loss": 0.5151, "step": 1404 }, { "epoch": 0.10417439015348114, "grad_norm": 0.37966427206993103, "learning_rate": 0.0001859964991247812, "loss": 0.5751, "step": 1405 }, { "epoch": 0.10424853562689998, "grad_norm": 0.35579368472099304, "learning_rate": 0.00018598649662415604, "loss": 0.5408, "step": 1406 }, { "epoch": 0.10432268110031882, "grad_norm": 0.39456361532211304, "learning_rate": 0.00018597649412353088, "loss": 0.593, "step": 1407 }, { "epoch": 0.10439682657373767, "grad_norm": 0.39815017580986023, "learning_rate": 0.00018596649162290574, "loss": 0.5495, "step": 1408 }, { "epoch": 0.10447097204715652, "grad_norm": 0.365141898393631, "learning_rate": 0.00018595648912228058, "loss": 0.5819, "step": 1409 }, { "epoch": 0.10454511752057537, "grad_norm": 0.34814774990081787, "learning_rate": 0.00018594648662165542, "loss": 0.5462, "step": 1410 }, { "epoch": 0.10461926299399421, "grad_norm": 0.37303444743156433, "learning_rate": 0.00018593648412103025, "loss": 0.5821, "step": 1411 }, { "epoch": 0.10469340846741307, "grad_norm": 0.38536709547042847, "learning_rate": 0.00018592648162040512, "loss": 0.5846, "step": 1412 }, { "epoch": 0.10476755394083191, "grad_norm": 0.33586376905441284, "learning_rate": 0.00018591647911977996, "loss": 0.5576, "step": 1413 }, { "epoch": 0.10484169941425076, "grad_norm": 0.3602062165737152, "learning_rate": 0.0001859064766191548, "loss": 0.5356, "step": 1414 }, { "epoch": 0.10491584488766961, "grad_norm": 0.3798888325691223, "learning_rate": 0.00018589647411852963, "loss": 0.5666, "step": 1415 }, { "epoch": 0.10498999036108846, "grad_norm": 0.38017353415489197, "learning_rate": 0.0001858864716179045, "loss": 0.5763, "step": 1416 }, { "epoch": 0.1050641358345073, "grad_norm": 0.4083872437477112, "learning_rate": 0.00018587646911727933, "loss": 0.625, "step": 1417 }, { "epoch": 0.10513828130792616, "grad_norm": 0.3979604244232178, "learning_rate": 0.00018586646661665417, "loss": 0.611, "step": 1418 }, { "epoch": 0.105212426781345, "grad_norm": 0.3812127113342285, "learning_rate": 0.000185856464116029, "loss": 0.5995, "step": 1419 }, { "epoch": 0.10528657225476384, "grad_norm": 0.3847323954105377, "learning_rate": 0.00018584646161540387, "loss": 0.5527, "step": 1420 }, { "epoch": 0.1053607177281827, "grad_norm": 0.37490540742874146, "learning_rate": 0.0001858364591147787, "loss": 0.5829, "step": 1421 }, { "epoch": 0.10543486320160154, "grad_norm": 0.3822769522666931, "learning_rate": 0.00018582645661415354, "loss": 0.5884, "step": 1422 }, { "epoch": 0.10550900867502039, "grad_norm": 0.40239405632019043, "learning_rate": 0.00018581645411352838, "loss": 0.5796, "step": 1423 }, { "epoch": 0.10558315414843923, "grad_norm": 0.38269317150115967, "learning_rate": 0.00018580645161290325, "loss": 0.578, "step": 1424 }, { "epoch": 0.10565729962185809, "grad_norm": 0.3911905288696289, "learning_rate": 0.00018579644911227808, "loss": 0.5736, "step": 1425 }, { "epoch": 0.10573144509527693, "grad_norm": 0.38150689005851746, "learning_rate": 0.00018578644661165292, "loss": 0.5815, "step": 1426 }, { "epoch": 0.10580559056869578, "grad_norm": 0.38343045115470886, "learning_rate": 0.00018577644411102776, "loss": 0.5485, "step": 1427 }, { "epoch": 0.10587973604211463, "grad_norm": 0.38599830865859985, "learning_rate": 0.00018576644161040262, "loss": 0.546, "step": 1428 }, { "epoch": 0.10595388151553348, "grad_norm": 0.3977763056755066, "learning_rate": 0.00018575643910977746, "loss": 0.5359, "step": 1429 }, { "epoch": 0.10602802698895232, "grad_norm": 0.3899437487125397, "learning_rate": 0.0001857464366091523, "loss": 0.581, "step": 1430 }, { "epoch": 0.10610217246237118, "grad_norm": 0.39690667390823364, "learning_rate": 0.00018573643410852716, "loss": 0.5597, "step": 1431 }, { "epoch": 0.10617631793579002, "grad_norm": 0.3627343475818634, "learning_rate": 0.00018572643160790197, "loss": 0.5283, "step": 1432 }, { "epoch": 0.10625046340920886, "grad_norm": 0.3947216868400574, "learning_rate": 0.00018571642910727683, "loss": 0.5307, "step": 1433 }, { "epoch": 0.10632460888262772, "grad_norm": 0.36889415979385376, "learning_rate": 0.00018570642660665167, "loss": 0.5944, "step": 1434 }, { "epoch": 0.10639875435604657, "grad_norm": 0.3957497179508209, "learning_rate": 0.00018569642410602653, "loss": 0.5665, "step": 1435 }, { "epoch": 0.10647289982946541, "grad_norm": 0.3634052872657776, "learning_rate": 0.00018568642160540134, "loss": 0.5782, "step": 1436 }, { "epoch": 0.10654704530288425, "grad_norm": 0.3730221390724182, "learning_rate": 0.0001856764191047762, "loss": 0.5789, "step": 1437 }, { "epoch": 0.10662119077630311, "grad_norm": 0.3973667323589325, "learning_rate": 0.00018566641660415105, "loss": 0.5557, "step": 1438 }, { "epoch": 0.10669533624972195, "grad_norm": 0.3721894323825836, "learning_rate": 0.0001856564141035259, "loss": 0.5838, "step": 1439 }, { "epoch": 0.1067694817231408, "grad_norm": 0.3958568871021271, "learning_rate": 0.00018564641160290072, "loss": 0.5505, "step": 1440 }, { "epoch": 0.10684362719655965, "grad_norm": 0.3521881401538849, "learning_rate": 0.00018563640910227558, "loss": 0.5504, "step": 1441 }, { "epoch": 0.1069177726699785, "grad_norm": 0.379482626914978, "learning_rate": 0.00018562640660165042, "loss": 0.5925, "step": 1442 }, { "epoch": 0.10699191814339734, "grad_norm": 0.3513132333755493, "learning_rate": 0.00018561640410102526, "loss": 0.5455, "step": 1443 }, { "epoch": 0.1070660636168162, "grad_norm": 0.3796188235282898, "learning_rate": 0.0001856064016004001, "loss": 0.5325, "step": 1444 }, { "epoch": 0.10714020909023504, "grad_norm": 0.37160050868988037, "learning_rate": 0.00018559639909977496, "loss": 0.5693, "step": 1445 }, { "epoch": 0.10721435456365389, "grad_norm": 0.3555106222629547, "learning_rate": 0.0001855863965991498, "loss": 0.5797, "step": 1446 }, { "epoch": 0.10728850003707274, "grad_norm": 0.38509446382522583, "learning_rate": 0.00018557639409852463, "loss": 0.5956, "step": 1447 }, { "epoch": 0.10736264551049159, "grad_norm": 0.3797213137149811, "learning_rate": 0.00018556639159789947, "loss": 0.5121, "step": 1448 }, { "epoch": 0.10743679098391043, "grad_norm": 0.3650873601436615, "learning_rate": 0.00018555638909727434, "loss": 0.5848, "step": 1449 }, { "epoch": 0.10751093645732929, "grad_norm": 0.39790746569633484, "learning_rate": 0.00018554638659664917, "loss": 0.5956, "step": 1450 }, { "epoch": 0.10758508193074813, "grad_norm": 0.3909676671028137, "learning_rate": 0.000185536384096024, "loss": 0.5885, "step": 1451 }, { "epoch": 0.10765922740416697, "grad_norm": 0.3651200532913208, "learning_rate": 0.00018552638159539885, "loss": 0.5389, "step": 1452 }, { "epoch": 0.10773337287758582, "grad_norm": 0.3828370273113251, "learning_rate": 0.0001855163790947737, "loss": 0.5555, "step": 1453 }, { "epoch": 0.10780751835100467, "grad_norm": 0.39776965975761414, "learning_rate": 0.00018550637659414855, "loss": 0.5839, "step": 1454 }, { "epoch": 0.10788166382442352, "grad_norm": 0.39389339089393616, "learning_rate": 0.00018549637409352338, "loss": 0.5658, "step": 1455 }, { "epoch": 0.10795580929784236, "grad_norm": 0.3969157338142395, "learning_rate": 0.00018548637159289822, "loss": 0.5357, "step": 1456 }, { "epoch": 0.10802995477126122, "grad_norm": 0.38389942049980164, "learning_rate": 0.00018547636909227309, "loss": 0.5492, "step": 1457 }, { "epoch": 0.10810410024468006, "grad_norm": 0.39673706889152527, "learning_rate": 0.00018546636659164792, "loss": 0.6339, "step": 1458 }, { "epoch": 0.1081782457180989, "grad_norm": 0.3941386044025421, "learning_rate": 0.00018545636409102276, "loss": 0.5492, "step": 1459 }, { "epoch": 0.10825239119151776, "grad_norm": 0.38537222146987915, "learning_rate": 0.0001854463615903976, "loss": 0.5578, "step": 1460 }, { "epoch": 0.1083265366649366, "grad_norm": 0.37393680214881897, "learning_rate": 0.00018543635908977246, "loss": 0.5584, "step": 1461 }, { "epoch": 0.10840068213835545, "grad_norm": 0.37443676590919495, "learning_rate": 0.0001854263565891473, "loss": 0.5244, "step": 1462 }, { "epoch": 0.10847482761177431, "grad_norm": 0.3746628761291504, "learning_rate": 0.00018541635408852214, "loss": 0.5531, "step": 1463 }, { "epoch": 0.10854897308519315, "grad_norm": 0.3739517629146576, "learning_rate": 0.000185406351587897, "loss": 0.59, "step": 1464 }, { "epoch": 0.108623118558612, "grad_norm": 0.4022943079471588, "learning_rate": 0.00018539634908727184, "loss": 0.5948, "step": 1465 }, { "epoch": 0.10869726403203084, "grad_norm": 0.34484606981277466, "learning_rate": 0.00018538634658664667, "loss": 0.4898, "step": 1466 }, { "epoch": 0.1087714095054497, "grad_norm": 0.3797956705093384, "learning_rate": 0.0001853763440860215, "loss": 0.5862, "step": 1467 }, { "epoch": 0.10884555497886854, "grad_norm": 0.3910621404647827, "learning_rate": 0.00018536634158539638, "loss": 0.5494, "step": 1468 }, { "epoch": 0.10891970045228738, "grad_norm": 0.36396846175193787, "learning_rate": 0.00018535633908477119, "loss": 0.5259, "step": 1469 }, { "epoch": 0.10899384592570624, "grad_norm": 0.39145174622535706, "learning_rate": 0.00018534633658414605, "loss": 0.5667, "step": 1470 }, { "epoch": 0.10906799139912508, "grad_norm": 0.408829927444458, "learning_rate": 0.0001853363340835209, "loss": 0.612, "step": 1471 }, { "epoch": 0.10914213687254393, "grad_norm": 0.3637007176876068, "learning_rate": 0.00018532633158289575, "loss": 0.5863, "step": 1472 }, { "epoch": 0.10921628234596278, "grad_norm": 0.37665072083473206, "learning_rate": 0.00018531632908227056, "loss": 0.5309, "step": 1473 }, { "epoch": 0.10929042781938163, "grad_norm": 0.35054993629455566, "learning_rate": 0.00018530632658164543, "loss": 0.5146, "step": 1474 }, { "epoch": 0.10936457329280047, "grad_norm": 0.3783966898918152, "learning_rate": 0.00018529632408102026, "loss": 0.5834, "step": 1475 }, { "epoch": 0.10943871876621933, "grad_norm": 0.4067683219909668, "learning_rate": 0.00018528632158039513, "loss": 0.5936, "step": 1476 }, { "epoch": 0.10951286423963817, "grad_norm": 0.36456817388534546, "learning_rate": 0.00018527631907976994, "loss": 0.5518, "step": 1477 }, { "epoch": 0.10958700971305702, "grad_norm": 0.3969786465167999, "learning_rate": 0.0001852663165791448, "loss": 0.5435, "step": 1478 }, { "epoch": 0.10966115518647586, "grad_norm": 0.37326329946517944, "learning_rate": 0.00018525631407851964, "loss": 0.5714, "step": 1479 }, { "epoch": 0.10973530065989472, "grad_norm": 0.386311411857605, "learning_rate": 0.0001852463115778945, "loss": 0.5275, "step": 1480 }, { "epoch": 0.10980944613331356, "grad_norm": 0.4548892378807068, "learning_rate": 0.0001852363090772693, "loss": 0.629, "step": 1481 }, { "epoch": 0.1098835916067324, "grad_norm": 0.3870919942855835, "learning_rate": 0.00018522630657664418, "loss": 0.5898, "step": 1482 }, { "epoch": 0.10995773708015126, "grad_norm": 0.3981582820415497, "learning_rate": 0.000185216304076019, "loss": 0.5831, "step": 1483 }, { "epoch": 0.1100318825535701, "grad_norm": 0.41973876953125, "learning_rate": 0.00018520630157539385, "loss": 0.5922, "step": 1484 }, { "epoch": 0.11010602802698895, "grad_norm": 0.39266902208328247, "learning_rate": 0.0001851962990747687, "loss": 0.5897, "step": 1485 }, { "epoch": 0.1101801735004078, "grad_norm": 0.4015812873840332, "learning_rate": 0.00018518629657414355, "loss": 0.5776, "step": 1486 }, { "epoch": 0.11025431897382665, "grad_norm": 0.37408745288848877, "learning_rate": 0.0001851762940735184, "loss": 0.5551, "step": 1487 }, { "epoch": 0.11032846444724549, "grad_norm": 0.40008848905563354, "learning_rate": 0.00018516629157289323, "loss": 0.5744, "step": 1488 }, { "epoch": 0.11040260992066435, "grad_norm": 0.3901994228363037, "learning_rate": 0.00018515628907226806, "loss": 0.5999, "step": 1489 }, { "epoch": 0.11047675539408319, "grad_norm": 0.3378581702709198, "learning_rate": 0.00018514628657164293, "loss": 0.5208, "step": 1490 }, { "epoch": 0.11055090086750204, "grad_norm": 0.3547845482826233, "learning_rate": 0.00018513628407101776, "loss": 0.5117, "step": 1491 }, { "epoch": 0.1106250463409209, "grad_norm": 0.35066142678260803, "learning_rate": 0.0001851262815703926, "loss": 0.5412, "step": 1492 }, { "epoch": 0.11069919181433974, "grad_norm": 0.38293299078941345, "learning_rate": 0.00018511627906976744, "loss": 0.5844, "step": 1493 }, { "epoch": 0.11077333728775858, "grad_norm": 0.3892222046852112, "learning_rate": 0.0001851062765691423, "loss": 0.5577, "step": 1494 }, { "epoch": 0.11084748276117742, "grad_norm": 0.3685709536075592, "learning_rate": 0.00018509627406851714, "loss": 0.5586, "step": 1495 }, { "epoch": 0.11092162823459628, "grad_norm": 0.37634986639022827, "learning_rate": 0.00018508627156789198, "loss": 0.5512, "step": 1496 }, { "epoch": 0.11099577370801512, "grad_norm": 0.37456008791923523, "learning_rate": 0.00018507626906726684, "loss": 0.5618, "step": 1497 }, { "epoch": 0.11106991918143397, "grad_norm": 0.37206727266311646, "learning_rate": 0.00018506626656664168, "loss": 0.5497, "step": 1498 }, { "epoch": 0.11114406465485283, "grad_norm": 0.36362770199775696, "learning_rate": 0.00018505626406601651, "loss": 0.5591, "step": 1499 }, { "epoch": 0.11121821012827167, "grad_norm": 0.38339048624038696, "learning_rate": 0.00018504626156539135, "loss": 0.529, "step": 1500 }, { "epoch": 0.11129235560169051, "grad_norm": 0.3754494786262512, "learning_rate": 0.00018503625906476622, "loss": 0.5082, "step": 1501 }, { "epoch": 0.11136650107510937, "grad_norm": 0.38891807198524475, "learning_rate": 0.00018502625656414105, "loss": 0.5553, "step": 1502 }, { "epoch": 0.11144064654852821, "grad_norm": 0.40827542543411255, "learning_rate": 0.0001850162540635159, "loss": 0.5302, "step": 1503 }, { "epoch": 0.11151479202194706, "grad_norm": 0.39926573634147644, "learning_rate": 0.00018500625156289073, "loss": 0.566, "step": 1504 }, { "epoch": 0.11158893749536591, "grad_norm": 0.38201451301574707, "learning_rate": 0.0001849962490622656, "loss": 0.5579, "step": 1505 }, { "epoch": 0.11166308296878476, "grad_norm": 0.3790867328643799, "learning_rate": 0.0001849862465616404, "loss": 0.5361, "step": 1506 }, { "epoch": 0.1117372284422036, "grad_norm": 0.404154896736145, "learning_rate": 0.00018497624406101527, "loss": 0.5716, "step": 1507 }, { "epoch": 0.11181137391562244, "grad_norm": 0.3595219552516937, "learning_rate": 0.0001849662415603901, "loss": 0.538, "step": 1508 }, { "epoch": 0.1118855193890413, "grad_norm": 0.3691364824771881, "learning_rate": 0.00018495623905976497, "loss": 0.5396, "step": 1509 }, { "epoch": 0.11195966486246015, "grad_norm": 0.34873130917549133, "learning_rate": 0.00018494623655913978, "loss": 0.523, "step": 1510 }, { "epoch": 0.11203381033587899, "grad_norm": 0.3995990753173828, "learning_rate": 0.00018493623405851464, "loss": 0.5592, "step": 1511 }, { "epoch": 0.11210795580929785, "grad_norm": 0.3750894069671631, "learning_rate": 0.00018492623155788948, "loss": 0.5787, "step": 1512 }, { "epoch": 0.11218210128271669, "grad_norm": 0.38577306270599365, "learning_rate": 0.00018491622905726434, "loss": 0.5823, "step": 1513 }, { "epoch": 0.11225624675613553, "grad_norm": 0.408880352973938, "learning_rate": 0.00018490622655663915, "loss": 0.6071, "step": 1514 }, { "epoch": 0.11233039222955439, "grad_norm": 0.39408910274505615, "learning_rate": 0.00018489622405601402, "loss": 0.6096, "step": 1515 }, { "epoch": 0.11240453770297323, "grad_norm": 0.4137237071990967, "learning_rate": 0.00018488622155538885, "loss": 0.6453, "step": 1516 }, { "epoch": 0.11247868317639208, "grad_norm": 0.3643457889556885, "learning_rate": 0.00018487621905476372, "loss": 0.526, "step": 1517 }, { "epoch": 0.11255282864981093, "grad_norm": 0.3844296932220459, "learning_rate": 0.00018486621655413853, "loss": 0.5853, "step": 1518 }, { "epoch": 0.11262697412322978, "grad_norm": 0.4374054968357086, "learning_rate": 0.0001848562140535134, "loss": 0.6056, "step": 1519 }, { "epoch": 0.11270111959664862, "grad_norm": 0.36784759163856506, "learning_rate": 0.00018484621155288823, "loss": 0.5828, "step": 1520 }, { "epoch": 0.11277526507006748, "grad_norm": 0.35659000277519226, "learning_rate": 0.00018483620905226307, "loss": 0.5047, "step": 1521 }, { "epoch": 0.11284941054348632, "grad_norm": 0.3749816417694092, "learning_rate": 0.0001848262065516379, "loss": 0.5641, "step": 1522 }, { "epoch": 0.11292355601690517, "grad_norm": 0.4042738378047943, "learning_rate": 0.00018481620405101277, "loss": 0.5663, "step": 1523 }, { "epoch": 0.11299770149032401, "grad_norm": 0.4236210584640503, "learning_rate": 0.0001848062015503876, "loss": 0.5938, "step": 1524 }, { "epoch": 0.11307184696374287, "grad_norm": 0.4035767614841461, "learning_rate": 0.00018479619904976244, "loss": 0.634, "step": 1525 }, { "epoch": 0.11314599243716171, "grad_norm": 0.36568567156791687, "learning_rate": 0.00018478619654913728, "loss": 0.5256, "step": 1526 }, { "epoch": 0.11322013791058055, "grad_norm": 0.41936516761779785, "learning_rate": 0.00018477619404851214, "loss": 0.6215, "step": 1527 }, { "epoch": 0.11329428338399941, "grad_norm": 0.3777017891407013, "learning_rate": 0.00018476619154788698, "loss": 0.5821, "step": 1528 }, { "epoch": 0.11336842885741825, "grad_norm": 0.3703807592391968, "learning_rate": 0.00018475618904726182, "loss": 0.5407, "step": 1529 }, { "epoch": 0.1134425743308371, "grad_norm": 0.4115603566169739, "learning_rate": 0.00018474618654663668, "loss": 0.6473, "step": 1530 }, { "epoch": 0.11351671980425596, "grad_norm": 0.3756676912307739, "learning_rate": 0.00018473618404601152, "loss": 0.554, "step": 1531 }, { "epoch": 0.1135908652776748, "grad_norm": 0.3681916296482086, "learning_rate": 0.00018472618154538636, "loss": 0.531, "step": 1532 }, { "epoch": 0.11366501075109364, "grad_norm": 0.3987833559513092, "learning_rate": 0.0001847161790447612, "loss": 0.5555, "step": 1533 }, { "epoch": 0.1137391562245125, "grad_norm": 0.36592814326286316, "learning_rate": 0.00018470617654413606, "loss": 0.557, "step": 1534 }, { "epoch": 0.11381330169793134, "grad_norm": 0.38465800881385803, "learning_rate": 0.0001846961740435109, "loss": 0.5693, "step": 1535 }, { "epoch": 0.11388744717135019, "grad_norm": 0.36368006467819214, "learning_rate": 0.00018468617154288573, "loss": 0.5299, "step": 1536 }, { "epoch": 0.11396159264476903, "grad_norm": 0.37407180666923523, "learning_rate": 0.00018467616904226057, "loss": 0.5739, "step": 1537 }, { "epoch": 0.11403573811818789, "grad_norm": 0.40974897146224976, "learning_rate": 0.00018466616654163543, "loss": 0.596, "step": 1538 }, { "epoch": 0.11410988359160673, "grad_norm": 0.3795612156391144, "learning_rate": 0.00018465616404101027, "loss": 0.5506, "step": 1539 }, { "epoch": 0.11418402906502557, "grad_norm": 0.37713730335235596, "learning_rate": 0.0001846461615403851, "loss": 0.5737, "step": 1540 }, { "epoch": 0.11425817453844443, "grad_norm": 0.36196187138557434, "learning_rate": 0.00018463615903975994, "loss": 0.5436, "step": 1541 }, { "epoch": 0.11433232001186328, "grad_norm": 0.3685915768146515, "learning_rate": 0.0001846261565391348, "loss": 0.545, "step": 1542 }, { "epoch": 0.11440646548528212, "grad_norm": 0.3769037425518036, "learning_rate": 0.00018461615403850962, "loss": 0.5793, "step": 1543 }, { "epoch": 0.11448061095870098, "grad_norm": 0.3787190616130829, "learning_rate": 0.00018460615153788448, "loss": 0.5795, "step": 1544 }, { "epoch": 0.11455475643211982, "grad_norm": 0.36132967472076416, "learning_rate": 0.00018459614903725932, "loss": 0.5562, "step": 1545 }, { "epoch": 0.11462890190553866, "grad_norm": 0.3851565420627594, "learning_rate": 0.00018458614653663418, "loss": 0.5239, "step": 1546 }, { "epoch": 0.11470304737895752, "grad_norm": 0.40275731682777405, "learning_rate": 0.000184576144036009, "loss": 0.5759, "step": 1547 }, { "epoch": 0.11477719285237636, "grad_norm": 0.37576824426651, "learning_rate": 0.00018456614153538386, "loss": 0.5541, "step": 1548 }, { "epoch": 0.11485133832579521, "grad_norm": 0.4205724596977234, "learning_rate": 0.0001845561390347587, "loss": 0.5468, "step": 1549 }, { "epoch": 0.11492548379921406, "grad_norm": 0.3861696720123291, "learning_rate": 0.00018454613653413356, "loss": 0.5556, "step": 1550 }, { "epoch": 0.11499962927263291, "grad_norm": 0.37609097361564636, "learning_rate": 0.00018453613403350837, "loss": 0.5903, "step": 1551 }, { "epoch": 0.11507377474605175, "grad_norm": 0.38272303342819214, "learning_rate": 0.00018452613153288323, "loss": 0.5749, "step": 1552 }, { "epoch": 0.1151479202194706, "grad_norm": 0.37557661533355713, "learning_rate": 0.00018451612903225807, "loss": 0.5804, "step": 1553 }, { "epoch": 0.11522206569288945, "grad_norm": 0.3823698163032532, "learning_rate": 0.00018450612653163293, "loss": 0.573, "step": 1554 }, { "epoch": 0.1152962111663083, "grad_norm": 0.38363975286483765, "learning_rate": 0.00018449612403100774, "loss": 0.5626, "step": 1555 }, { "epoch": 0.11537035663972714, "grad_norm": 0.39004671573638916, "learning_rate": 0.0001844861215303826, "loss": 0.6326, "step": 1556 }, { "epoch": 0.115444502113146, "grad_norm": 0.36673760414123535, "learning_rate": 0.00018447611902975745, "loss": 0.5727, "step": 1557 }, { "epoch": 0.11551864758656484, "grad_norm": 0.3535611033439636, "learning_rate": 0.00018446611652913228, "loss": 0.5763, "step": 1558 }, { "epoch": 0.11559279305998368, "grad_norm": 0.3791483938694, "learning_rate": 0.00018445611402850712, "loss": 0.5225, "step": 1559 }, { "epoch": 0.11566693853340254, "grad_norm": 0.3389028012752533, "learning_rate": 0.00018444611152788198, "loss": 0.4985, "step": 1560 }, { "epoch": 0.11574108400682138, "grad_norm": 0.3666473627090454, "learning_rate": 0.00018443610902725682, "loss": 0.5333, "step": 1561 }, { "epoch": 0.11581522948024023, "grad_norm": 0.3435836434364319, "learning_rate": 0.00018442610652663166, "loss": 0.5187, "step": 1562 }, { "epoch": 0.11588937495365909, "grad_norm": 0.38036611676216125, "learning_rate": 0.0001844161040260065, "loss": 0.5971, "step": 1563 }, { "epoch": 0.11596352042707793, "grad_norm": 0.3942089378833771, "learning_rate": 0.00018440610152538136, "loss": 0.597, "step": 1564 }, { "epoch": 0.11603766590049677, "grad_norm": 0.3586452305316925, "learning_rate": 0.0001843960990247562, "loss": 0.5581, "step": 1565 }, { "epoch": 0.11611181137391562, "grad_norm": 0.3796660304069519, "learning_rate": 0.00018438609652413103, "loss": 0.5492, "step": 1566 }, { "epoch": 0.11618595684733447, "grad_norm": 0.3495527505874634, "learning_rate": 0.0001843760940235059, "loss": 0.5502, "step": 1567 }, { "epoch": 0.11626010232075332, "grad_norm": 0.3855728805065155, "learning_rate": 0.00018436609152288073, "loss": 0.5492, "step": 1568 }, { "epoch": 0.11633424779417216, "grad_norm": 0.3777061402797699, "learning_rate": 0.00018435608902225557, "loss": 0.5858, "step": 1569 }, { "epoch": 0.11640839326759102, "grad_norm": 0.36846473813056946, "learning_rate": 0.0001843460865216304, "loss": 0.5496, "step": 1570 }, { "epoch": 0.11648253874100986, "grad_norm": 0.37890946865081787, "learning_rate": 0.00018433608402100527, "loss": 0.5587, "step": 1571 }, { "epoch": 0.1165566842144287, "grad_norm": 0.3724903166294098, "learning_rate": 0.0001843260815203801, "loss": 0.5738, "step": 1572 }, { "epoch": 0.11663082968784756, "grad_norm": 0.33685946464538574, "learning_rate": 0.00018431607901975495, "loss": 0.5388, "step": 1573 }, { "epoch": 0.1167049751612664, "grad_norm": 0.3653276264667511, "learning_rate": 0.00018430607651912978, "loss": 0.543, "step": 1574 }, { "epoch": 0.11677912063468525, "grad_norm": 0.37650924921035767, "learning_rate": 0.00018429607401850465, "loss": 0.5775, "step": 1575 }, { "epoch": 0.1168532661081041, "grad_norm": 0.36204424500465393, "learning_rate": 0.00018428607151787949, "loss": 0.5712, "step": 1576 }, { "epoch": 0.11692741158152295, "grad_norm": 0.34634870290756226, "learning_rate": 0.00018427606901725432, "loss": 0.4975, "step": 1577 }, { "epoch": 0.1170015570549418, "grad_norm": 0.36759600043296814, "learning_rate": 0.00018426606651662916, "loss": 0.5383, "step": 1578 }, { "epoch": 0.11707570252836064, "grad_norm": 0.41109347343444824, "learning_rate": 0.00018425606401600402, "loss": 0.6016, "step": 1579 }, { "epoch": 0.1171498480017795, "grad_norm": 0.3480721414089203, "learning_rate": 0.00018424606151537883, "loss": 0.5445, "step": 1580 }, { "epoch": 0.11722399347519834, "grad_norm": 0.3860350549221039, "learning_rate": 0.0001842360590147537, "loss": 0.5654, "step": 1581 }, { "epoch": 0.11729813894861718, "grad_norm": 0.37105920910835266, "learning_rate": 0.00018422605651412854, "loss": 0.532, "step": 1582 }, { "epoch": 0.11737228442203604, "grad_norm": 0.3496335744857788, "learning_rate": 0.0001842160540135034, "loss": 0.5587, "step": 1583 }, { "epoch": 0.11744642989545488, "grad_norm": 0.3834249973297119, "learning_rate": 0.0001842060515128782, "loss": 0.5726, "step": 1584 }, { "epoch": 0.11752057536887373, "grad_norm": 0.35640764236450195, "learning_rate": 0.00018419604901225307, "loss": 0.5694, "step": 1585 }, { "epoch": 0.11759472084229258, "grad_norm": 0.3988732695579529, "learning_rate": 0.0001841860465116279, "loss": 0.5545, "step": 1586 }, { "epoch": 0.11766886631571143, "grad_norm": 0.3749835193157196, "learning_rate": 0.00018417604401100278, "loss": 0.5621, "step": 1587 }, { "epoch": 0.11774301178913027, "grad_norm": 0.3395446240901947, "learning_rate": 0.00018416604151037759, "loss": 0.5186, "step": 1588 }, { "epoch": 0.11781715726254913, "grad_norm": 0.37658238410949707, "learning_rate": 0.00018415603900975245, "loss": 0.5663, "step": 1589 }, { "epoch": 0.11789130273596797, "grad_norm": 0.38259008526802063, "learning_rate": 0.0001841460365091273, "loss": 0.6096, "step": 1590 }, { "epoch": 0.11796544820938681, "grad_norm": 0.3749852776527405, "learning_rate": 0.00018413603400850215, "loss": 0.5445, "step": 1591 }, { "epoch": 0.11803959368280567, "grad_norm": 0.3912453055381775, "learning_rate": 0.00018412603150787696, "loss": 0.5894, "step": 1592 }, { "epoch": 0.11811373915622451, "grad_norm": 0.3655342161655426, "learning_rate": 0.00018411602900725182, "loss": 0.5243, "step": 1593 }, { "epoch": 0.11818788462964336, "grad_norm": 0.38319289684295654, "learning_rate": 0.00018410602650662666, "loss": 0.576, "step": 1594 }, { "epoch": 0.1182620301030622, "grad_norm": 0.358384370803833, "learning_rate": 0.0001840960240060015, "loss": 0.5491, "step": 1595 }, { "epoch": 0.11833617557648106, "grad_norm": 0.3626919686794281, "learning_rate": 0.00018408602150537634, "loss": 0.5482, "step": 1596 }, { "epoch": 0.1184103210498999, "grad_norm": 0.37597882747650146, "learning_rate": 0.0001840760190047512, "loss": 0.5317, "step": 1597 }, { "epoch": 0.11848446652331875, "grad_norm": 0.4176007807254791, "learning_rate": 0.00018406601650412604, "loss": 0.5273, "step": 1598 }, { "epoch": 0.1185586119967376, "grad_norm": 0.39862340688705444, "learning_rate": 0.00018405601400350087, "loss": 0.6131, "step": 1599 }, { "epoch": 0.11863275747015645, "grad_norm": 0.3743409514427185, "learning_rate": 0.00018404601150287574, "loss": 0.5309, "step": 1600 }, { "epoch": 0.11870690294357529, "grad_norm": 0.40256455540657043, "learning_rate": 0.00018403600900225058, "loss": 0.6025, "step": 1601 }, { "epoch": 0.11878104841699415, "grad_norm": 0.41396719217300415, "learning_rate": 0.0001840260065016254, "loss": 0.6148, "step": 1602 }, { "epoch": 0.11885519389041299, "grad_norm": 0.40761780738830566, "learning_rate": 0.00018401600400100025, "loss": 0.5408, "step": 1603 }, { "epoch": 0.11892933936383183, "grad_norm": 0.3562292456626892, "learning_rate": 0.00018400600150037511, "loss": 0.5282, "step": 1604 }, { "epoch": 0.11900348483725069, "grad_norm": 0.39805248379707336, "learning_rate": 0.00018399599899974995, "loss": 0.5758, "step": 1605 }, { "epoch": 0.11907763031066954, "grad_norm": 0.3734200596809387, "learning_rate": 0.0001839859964991248, "loss": 0.5848, "step": 1606 }, { "epoch": 0.11915177578408838, "grad_norm": 0.39312198758125305, "learning_rate": 0.00018397599399849963, "loss": 0.6281, "step": 1607 }, { "epoch": 0.11922592125750722, "grad_norm": 0.3700329065322876, "learning_rate": 0.0001839659914978745, "loss": 0.5526, "step": 1608 }, { "epoch": 0.11930006673092608, "grad_norm": 0.36237528920173645, "learning_rate": 0.00018395598899724933, "loss": 0.5654, "step": 1609 }, { "epoch": 0.11937421220434492, "grad_norm": 0.42808645963668823, "learning_rate": 0.00018394598649662416, "loss": 0.5394, "step": 1610 }, { "epoch": 0.11944835767776377, "grad_norm": 0.37177932262420654, "learning_rate": 0.000183935983995999, "loss": 0.5767, "step": 1611 }, { "epoch": 0.11952250315118262, "grad_norm": 0.3856918215751648, "learning_rate": 0.00018392598149537387, "loss": 0.5573, "step": 1612 }, { "epoch": 0.11959664862460147, "grad_norm": 0.374811053276062, "learning_rate": 0.0001839159789947487, "loss": 0.556, "step": 1613 }, { "epoch": 0.11967079409802031, "grad_norm": 0.427131712436676, "learning_rate": 0.00018390597649412354, "loss": 0.5786, "step": 1614 }, { "epoch": 0.11974493957143917, "grad_norm": 0.3873565196990967, "learning_rate": 0.00018389597399349838, "loss": 0.5638, "step": 1615 }, { "epoch": 0.11981908504485801, "grad_norm": 0.362878680229187, "learning_rate": 0.00018388597149287324, "loss": 0.562, "step": 1616 }, { "epoch": 0.11989323051827686, "grad_norm": 0.4178910255432129, "learning_rate": 0.00018387596899224805, "loss": 0.6097, "step": 1617 }, { "epoch": 0.11996737599169571, "grad_norm": 0.4004707634449005, "learning_rate": 0.00018386596649162291, "loss": 0.5439, "step": 1618 }, { "epoch": 0.12004152146511456, "grad_norm": 0.3634319007396698, "learning_rate": 0.00018385596399099775, "loss": 0.5398, "step": 1619 }, { "epoch": 0.1201156669385334, "grad_norm": 0.409654825925827, "learning_rate": 0.00018384596149037262, "loss": 0.5885, "step": 1620 }, { "epoch": 0.12018981241195226, "grad_norm": 0.41101983189582825, "learning_rate": 0.00018383595898974743, "loss": 0.5692, "step": 1621 }, { "epoch": 0.1202639578853711, "grad_norm": 0.3789733648300171, "learning_rate": 0.0001838259564891223, "loss": 0.5587, "step": 1622 }, { "epoch": 0.12033810335878994, "grad_norm": 0.3954521715641022, "learning_rate": 0.00018381595398849713, "loss": 0.5816, "step": 1623 }, { "epoch": 0.12041224883220879, "grad_norm": 0.38137438893318176, "learning_rate": 0.000183805951487872, "loss": 0.5823, "step": 1624 }, { "epoch": 0.12048639430562764, "grad_norm": 0.3978256285190582, "learning_rate": 0.0001837959489872468, "loss": 0.5774, "step": 1625 }, { "epoch": 0.12056053977904649, "grad_norm": 0.39179152250289917, "learning_rate": 0.00018378594648662167, "loss": 0.5986, "step": 1626 }, { "epoch": 0.12063468525246533, "grad_norm": 0.385206401348114, "learning_rate": 0.0001837759439859965, "loss": 0.5729, "step": 1627 }, { "epoch": 0.12070883072588419, "grad_norm": 0.38596799969673157, "learning_rate": 0.00018376594148537137, "loss": 0.5861, "step": 1628 }, { "epoch": 0.12078297619930303, "grad_norm": 0.38221311569213867, "learning_rate": 0.00018375593898474618, "loss": 0.5378, "step": 1629 }, { "epoch": 0.12085712167272188, "grad_norm": 0.38120192289352417, "learning_rate": 0.00018374593648412104, "loss": 0.5855, "step": 1630 }, { "epoch": 0.12093126714614073, "grad_norm": 0.37497755885124207, "learning_rate": 0.00018373593398349588, "loss": 0.5541, "step": 1631 }, { "epoch": 0.12100541261955958, "grad_norm": 0.3498243987560272, "learning_rate": 0.00018372593148287072, "loss": 0.5278, "step": 1632 }, { "epoch": 0.12107955809297842, "grad_norm": 0.37957361340522766, "learning_rate": 0.00018371592898224558, "loss": 0.5904, "step": 1633 }, { "epoch": 0.12115370356639728, "grad_norm": 0.36662477254867554, "learning_rate": 0.00018370592648162042, "loss": 0.5482, "step": 1634 }, { "epoch": 0.12122784903981612, "grad_norm": 0.3734324276447296, "learning_rate": 0.00018369592398099525, "loss": 0.5513, "step": 1635 }, { "epoch": 0.12130199451323496, "grad_norm": 0.3640524446964264, "learning_rate": 0.0001836859214803701, "loss": 0.5236, "step": 1636 }, { "epoch": 0.12137613998665381, "grad_norm": 0.4168744385242462, "learning_rate": 0.00018367591897974495, "loss": 0.559, "step": 1637 }, { "epoch": 0.12145028546007267, "grad_norm": 0.375152587890625, "learning_rate": 0.0001836659164791198, "loss": 0.5406, "step": 1638 }, { "epoch": 0.12152443093349151, "grad_norm": 0.3908528983592987, "learning_rate": 0.00018365591397849463, "loss": 0.554, "step": 1639 }, { "epoch": 0.12159857640691035, "grad_norm": 0.3814478814601898, "learning_rate": 0.00018364591147786947, "loss": 0.5518, "step": 1640 }, { "epoch": 0.12167272188032921, "grad_norm": 0.3837980329990387, "learning_rate": 0.00018363590897724433, "loss": 0.6061, "step": 1641 }, { "epoch": 0.12174686735374805, "grad_norm": 0.3818836510181427, "learning_rate": 0.00018362590647661917, "loss": 0.5578, "step": 1642 }, { "epoch": 0.1218210128271669, "grad_norm": 0.4040834307670593, "learning_rate": 0.000183615903975994, "loss": 0.5688, "step": 1643 }, { "epoch": 0.12189515830058575, "grad_norm": 0.3808279037475586, "learning_rate": 0.00018360590147536884, "loss": 0.5516, "step": 1644 }, { "epoch": 0.1219693037740046, "grad_norm": 0.3460055887699127, "learning_rate": 0.0001835958989747437, "loss": 0.5639, "step": 1645 }, { "epoch": 0.12204344924742344, "grad_norm": 0.37035420536994934, "learning_rate": 0.00018358589647411854, "loss": 0.5534, "step": 1646 }, { "epoch": 0.1221175947208423, "grad_norm": 0.3775860369205475, "learning_rate": 0.00018357589397349338, "loss": 0.5488, "step": 1647 }, { "epoch": 0.12219174019426114, "grad_norm": 0.3949218690395355, "learning_rate": 0.00018356589147286822, "loss": 0.6116, "step": 1648 }, { "epoch": 0.12226588566767999, "grad_norm": 0.3738548755645752, "learning_rate": 0.00018355588897224308, "loss": 0.5651, "step": 1649 }, { "epoch": 0.12234003114109884, "grad_norm": 0.3909963071346283, "learning_rate": 0.00018354588647161792, "loss": 0.5822, "step": 1650 }, { "epoch": 0.12241417661451769, "grad_norm": 0.3582206666469574, "learning_rate": 0.00018353588397099276, "loss": 0.563, "step": 1651 }, { "epoch": 0.12248832208793653, "grad_norm": 0.33646246790885925, "learning_rate": 0.0001835258814703676, "loss": 0.4963, "step": 1652 }, { "epoch": 0.12256246756135537, "grad_norm": 0.3600361943244934, "learning_rate": 0.00018351587896974246, "loss": 0.5254, "step": 1653 }, { "epoch": 0.12263661303477423, "grad_norm": 0.3729383647441864, "learning_rate": 0.00018350587646911727, "loss": 0.5413, "step": 1654 }, { "epoch": 0.12271075850819307, "grad_norm": 0.4006785452365875, "learning_rate": 0.00018349587396849213, "loss": 0.5817, "step": 1655 }, { "epoch": 0.12278490398161192, "grad_norm": 0.3590477406978607, "learning_rate": 0.00018348587146786697, "loss": 0.5295, "step": 1656 }, { "epoch": 0.12285904945503077, "grad_norm": 0.373159259557724, "learning_rate": 0.00018347586896724183, "loss": 0.5667, "step": 1657 }, { "epoch": 0.12293319492844962, "grad_norm": 0.4050595164299011, "learning_rate": 0.00018346586646661664, "loss": 0.5983, "step": 1658 }, { "epoch": 0.12300734040186846, "grad_norm": 0.3855028450489044, "learning_rate": 0.0001834558639659915, "loss": 0.5883, "step": 1659 }, { "epoch": 0.12308148587528732, "grad_norm": 0.3845366835594177, "learning_rate": 0.00018344586146536634, "loss": 0.5442, "step": 1660 }, { "epoch": 0.12315563134870616, "grad_norm": 0.3565390408039093, "learning_rate": 0.0001834358589647412, "loss": 0.5078, "step": 1661 }, { "epoch": 0.123229776822125, "grad_norm": 0.38088178634643555, "learning_rate": 0.00018342585646411602, "loss": 0.5674, "step": 1662 }, { "epoch": 0.12330392229554386, "grad_norm": 0.3549913763999939, "learning_rate": 0.00018341585396349088, "loss": 0.5304, "step": 1663 }, { "epoch": 0.12337806776896271, "grad_norm": 0.3728253245353699, "learning_rate": 0.00018340585146286572, "loss": 0.5537, "step": 1664 }, { "epoch": 0.12345221324238155, "grad_norm": 0.41011738777160645, "learning_rate": 0.00018339584896224058, "loss": 0.5431, "step": 1665 }, { "epoch": 0.1235263587158004, "grad_norm": 0.39000770449638367, "learning_rate": 0.00018338584646161542, "loss": 0.5119, "step": 1666 }, { "epoch": 0.12360050418921925, "grad_norm": 0.37467843294143677, "learning_rate": 0.00018337584396099026, "loss": 0.5292, "step": 1667 }, { "epoch": 0.1236746496626381, "grad_norm": 0.4143701493740082, "learning_rate": 0.0001833658414603651, "loss": 0.5527, "step": 1668 }, { "epoch": 0.12374879513605694, "grad_norm": 0.3717619478702545, "learning_rate": 0.00018335583895973993, "loss": 0.5303, "step": 1669 }, { "epoch": 0.1238229406094758, "grad_norm": 0.4372784197330475, "learning_rate": 0.0001833458364591148, "loss": 0.5619, "step": 1670 }, { "epoch": 0.12389708608289464, "grad_norm": 0.37712761759757996, "learning_rate": 0.00018333583395848963, "loss": 0.5737, "step": 1671 }, { "epoch": 0.12397123155631348, "grad_norm": 0.37084683775901794, "learning_rate": 0.00018332583145786447, "loss": 0.5256, "step": 1672 }, { "epoch": 0.12404537702973234, "grad_norm": 0.39609646797180176, "learning_rate": 0.0001833158289572393, "loss": 0.5388, "step": 1673 }, { "epoch": 0.12411952250315118, "grad_norm": 0.3540687561035156, "learning_rate": 0.00018330582645661417, "loss": 0.5407, "step": 1674 }, { "epoch": 0.12419366797657003, "grad_norm": 0.3537897765636444, "learning_rate": 0.000183295823955989, "loss": 0.5463, "step": 1675 }, { "epoch": 0.12426781344998888, "grad_norm": 0.36832985281944275, "learning_rate": 0.00018328582145536387, "loss": 0.5313, "step": 1676 }, { "epoch": 0.12434195892340773, "grad_norm": 0.3841755986213684, "learning_rate": 0.00018327581895473868, "loss": 0.5585, "step": 1677 }, { "epoch": 0.12441610439682657, "grad_norm": 0.39496296644210815, "learning_rate": 0.00018326581645411355, "loss": 0.5412, "step": 1678 }, { "epoch": 0.12449024987024541, "grad_norm": 0.37934204936027527, "learning_rate": 0.00018325581395348838, "loss": 0.5543, "step": 1679 }, { "epoch": 0.12456439534366427, "grad_norm": 0.3934924900531769, "learning_rate": 0.00018324581145286322, "loss": 0.5772, "step": 1680 }, { "epoch": 0.12463854081708312, "grad_norm": 0.35899853706359863, "learning_rate": 0.00018323580895223806, "loss": 0.5536, "step": 1681 }, { "epoch": 0.12471268629050196, "grad_norm": 0.37265121936798096, "learning_rate": 0.00018322580645161292, "loss": 0.5805, "step": 1682 }, { "epoch": 0.12478683176392082, "grad_norm": 0.37083935737609863, "learning_rate": 0.00018321580395098776, "loss": 0.5314, "step": 1683 }, { "epoch": 0.12486097723733966, "grad_norm": 0.36682936549186707, "learning_rate": 0.0001832058014503626, "loss": 0.5701, "step": 1684 }, { "epoch": 0.1249351227107585, "grad_norm": 0.3866897225379944, "learning_rate": 0.00018319579894973743, "loss": 0.5727, "step": 1685 }, { "epoch": 0.12500926818417735, "grad_norm": 0.38112178444862366, "learning_rate": 0.0001831857964491123, "loss": 0.5404, "step": 1686 }, { "epoch": 0.1250834136575962, "grad_norm": 0.3958095610141754, "learning_rate": 0.00018317579394848713, "loss": 0.5666, "step": 1687 }, { "epoch": 0.12515755913101506, "grad_norm": 0.36120569705963135, "learning_rate": 0.00018316579144786197, "loss": 0.5523, "step": 1688 }, { "epoch": 0.1252317046044339, "grad_norm": 0.37140512466430664, "learning_rate": 0.0001831557889472368, "loss": 0.5785, "step": 1689 }, { "epoch": 0.12530585007785275, "grad_norm": 0.37682974338531494, "learning_rate": 0.00018314578644661167, "loss": 0.5694, "step": 1690 }, { "epoch": 0.1253799955512716, "grad_norm": 0.3910876214504242, "learning_rate": 0.00018313578394598648, "loss": 0.5371, "step": 1691 }, { "epoch": 0.12545414102469044, "grad_norm": 0.35089483857154846, "learning_rate": 0.00018312578144536135, "loss": 0.5406, "step": 1692 }, { "epoch": 0.1255282864981093, "grad_norm": 0.37252622842788696, "learning_rate": 0.00018311577894473618, "loss": 0.5483, "step": 1693 }, { "epoch": 0.12560243197152815, "grad_norm": 0.37846219539642334, "learning_rate": 0.00018310577644411105, "loss": 0.5728, "step": 1694 }, { "epoch": 0.12567657744494698, "grad_norm": 0.36474674940109253, "learning_rate": 0.00018309577394348586, "loss": 0.5593, "step": 1695 }, { "epoch": 0.12575072291836584, "grad_norm": 0.3508955240249634, "learning_rate": 0.00018308577144286072, "loss": 0.5698, "step": 1696 }, { "epoch": 0.1258248683917847, "grad_norm": 0.36404865980148315, "learning_rate": 0.00018307576894223556, "loss": 0.5348, "step": 1697 }, { "epoch": 0.12589901386520352, "grad_norm": 0.35045114159584045, "learning_rate": 0.00018306576644161042, "loss": 0.5448, "step": 1698 }, { "epoch": 0.12597315933862238, "grad_norm": 0.38537731766700745, "learning_rate": 0.00018305576394098526, "loss": 0.5652, "step": 1699 }, { "epoch": 0.12604730481204124, "grad_norm": 0.37192502617836, "learning_rate": 0.0001830457614403601, "loss": 0.5255, "step": 1700 }, { "epoch": 0.12612145028546007, "grad_norm": 0.38613569736480713, "learning_rate": 0.00018303575893973494, "loss": 0.5428, "step": 1701 }, { "epoch": 0.12619559575887893, "grad_norm": 0.37625306844711304, "learning_rate": 0.0001830257564391098, "loss": 0.5273, "step": 1702 }, { "epoch": 0.12626974123229776, "grad_norm": 0.39964690804481506, "learning_rate": 0.00018301575393848464, "loss": 0.5957, "step": 1703 }, { "epoch": 0.1263438867057166, "grad_norm": 0.420776903629303, "learning_rate": 0.00018300575143785947, "loss": 0.6219, "step": 1704 }, { "epoch": 0.12641803217913547, "grad_norm": 0.38265788555145264, "learning_rate": 0.0001829957489372343, "loss": 0.5549, "step": 1705 }, { "epoch": 0.1264921776525543, "grad_norm": 0.41560307145118713, "learning_rate": 0.00018298574643660915, "loss": 0.5318, "step": 1706 }, { "epoch": 0.12656632312597316, "grad_norm": 0.3824876546859741, "learning_rate": 0.000182975743935984, "loss": 0.6046, "step": 1707 }, { "epoch": 0.12664046859939201, "grad_norm": 0.3693774938583374, "learning_rate": 0.00018296574143535885, "loss": 0.5552, "step": 1708 }, { "epoch": 0.12671461407281084, "grad_norm": 0.4609512686729431, "learning_rate": 0.0001829557389347337, "loss": 0.624, "step": 1709 }, { "epoch": 0.1267887595462297, "grad_norm": 0.3710711598396301, "learning_rate": 0.00018294573643410852, "loss": 0.5647, "step": 1710 }, { "epoch": 0.12686290501964856, "grad_norm": 0.3742707073688507, "learning_rate": 0.0001829357339334834, "loss": 0.6072, "step": 1711 }, { "epoch": 0.1269370504930674, "grad_norm": 0.3544057011604309, "learning_rate": 0.00018292573143285822, "loss": 0.5473, "step": 1712 }, { "epoch": 0.12701119596648625, "grad_norm": 0.3850916028022766, "learning_rate": 0.0001829157289322331, "loss": 0.6195, "step": 1713 }, { "epoch": 0.1270853414399051, "grad_norm": 0.38421499729156494, "learning_rate": 0.0001829057264316079, "loss": 0.5818, "step": 1714 }, { "epoch": 0.12715948691332393, "grad_norm": 0.3875584006309509, "learning_rate": 0.00018289572393098276, "loss": 0.5612, "step": 1715 }, { "epoch": 0.1272336323867428, "grad_norm": 0.39619237184524536, "learning_rate": 0.0001828857214303576, "loss": 0.5908, "step": 1716 }, { "epoch": 0.12730777786016165, "grad_norm": 0.3615531027317047, "learning_rate": 0.00018287571892973244, "loss": 0.6022, "step": 1717 }, { "epoch": 0.12738192333358048, "grad_norm": 0.38879403471946716, "learning_rate": 0.00018286571642910727, "loss": 0.6059, "step": 1718 }, { "epoch": 0.12745606880699933, "grad_norm": 0.3675578534603119, "learning_rate": 0.00018285571392848214, "loss": 0.5323, "step": 1719 }, { "epoch": 0.1275302142804182, "grad_norm": 0.3975476026535034, "learning_rate": 0.00018284571142785698, "loss": 0.5337, "step": 1720 }, { "epoch": 0.12760435975383702, "grad_norm": 0.40729820728302, "learning_rate": 0.0001828357089272318, "loss": 0.5465, "step": 1721 }, { "epoch": 0.12767850522725588, "grad_norm": 0.3819001615047455, "learning_rate": 0.00018282570642660665, "loss": 0.5754, "step": 1722 }, { "epoch": 0.12775265070067474, "grad_norm": 0.4004845917224884, "learning_rate": 0.00018281570392598151, "loss": 0.5611, "step": 1723 }, { "epoch": 0.12782679617409357, "grad_norm": 0.3724479675292969, "learning_rate": 0.00018280570142535635, "loss": 0.5535, "step": 1724 }, { "epoch": 0.12790094164751242, "grad_norm": 0.4440998136997223, "learning_rate": 0.0001827956989247312, "loss": 0.554, "step": 1725 }, { "epoch": 0.12797508712093128, "grad_norm": 0.36798205971717834, "learning_rate": 0.00018278569642410603, "loss": 0.5396, "step": 1726 }, { "epoch": 0.1280492325943501, "grad_norm": 0.3962744176387787, "learning_rate": 0.0001827756939234809, "loss": 0.585, "step": 1727 }, { "epoch": 0.12812337806776897, "grad_norm": 0.37204864621162415, "learning_rate": 0.00018276569142285573, "loss": 0.5393, "step": 1728 }, { "epoch": 0.1281975235411878, "grad_norm": 0.4047146439552307, "learning_rate": 0.00018275568892223056, "loss": 0.5239, "step": 1729 }, { "epoch": 0.12827166901460665, "grad_norm": 0.3527173399925232, "learning_rate": 0.0001827456864216054, "loss": 0.5838, "step": 1730 }, { "epoch": 0.1283458144880255, "grad_norm": 0.37680646777153015, "learning_rate": 0.00018273568392098026, "loss": 0.5712, "step": 1731 }, { "epoch": 0.12841995996144434, "grad_norm": 0.3912186026573181, "learning_rate": 0.00018272568142035507, "loss": 0.525, "step": 1732 }, { "epoch": 0.1284941054348632, "grad_norm": 0.3950353264808655, "learning_rate": 0.00018271567891972994, "loss": 0.5408, "step": 1733 }, { "epoch": 0.12856825090828206, "grad_norm": 0.37522661685943604, "learning_rate": 0.00018270567641910478, "loss": 0.5469, "step": 1734 }, { "epoch": 0.12864239638170089, "grad_norm": 0.38690319657325745, "learning_rate": 0.00018269567391847964, "loss": 0.5463, "step": 1735 }, { "epoch": 0.12871654185511974, "grad_norm": 0.36457282304763794, "learning_rate": 0.00018268567141785448, "loss": 0.5599, "step": 1736 }, { "epoch": 0.1287906873285386, "grad_norm": 0.38523998856544495, "learning_rate": 0.00018267566891722931, "loss": 0.5739, "step": 1737 }, { "epoch": 0.12886483280195743, "grad_norm": 0.3943268358707428, "learning_rate": 0.00018266566641660415, "loss": 0.5979, "step": 1738 }, { "epoch": 0.1289389782753763, "grad_norm": 0.3609391152858734, "learning_rate": 0.00018265566391597902, "loss": 0.5649, "step": 1739 }, { "epoch": 0.12901312374879514, "grad_norm": 0.38458797335624695, "learning_rate": 0.00018264566141535385, "loss": 0.5964, "step": 1740 }, { "epoch": 0.12908726922221397, "grad_norm": 0.35416314005851746, "learning_rate": 0.0001826356589147287, "loss": 0.5514, "step": 1741 }, { "epoch": 0.12916141469563283, "grad_norm": 0.3722313642501831, "learning_rate": 0.00018262565641410355, "loss": 0.5531, "step": 1742 }, { "epoch": 0.1292355601690517, "grad_norm": 0.3732784688472748, "learning_rate": 0.00018261565391347836, "loss": 0.5685, "step": 1743 }, { "epoch": 0.12930970564247052, "grad_norm": 0.3642188608646393, "learning_rate": 0.00018260565141285323, "loss": 0.5275, "step": 1744 }, { "epoch": 0.12938385111588938, "grad_norm": 0.3834596872329712, "learning_rate": 0.00018259564891222807, "loss": 0.5676, "step": 1745 }, { "epoch": 0.12945799658930823, "grad_norm": 0.38901084661483765, "learning_rate": 0.00018258564641160293, "loss": 0.5634, "step": 1746 }, { "epoch": 0.12953214206272706, "grad_norm": 0.36028608679771423, "learning_rate": 0.00018257564391097774, "loss": 0.5199, "step": 1747 }, { "epoch": 0.12960628753614592, "grad_norm": 0.4198925793170929, "learning_rate": 0.0001825656414103526, "loss": 0.5999, "step": 1748 }, { "epoch": 0.12968043300956478, "grad_norm": 0.36019596457481384, "learning_rate": 0.00018255563890972744, "loss": 0.5514, "step": 1749 }, { "epoch": 0.1297545784829836, "grad_norm": 0.3816182315349579, "learning_rate": 0.0001825456364091023, "loss": 0.5612, "step": 1750 }, { "epoch": 0.12982872395640246, "grad_norm": 0.4234984219074249, "learning_rate": 0.00018253563390847712, "loss": 0.5821, "step": 1751 }, { "epoch": 0.12990286942982132, "grad_norm": 0.376354455947876, "learning_rate": 0.00018252563140785198, "loss": 0.5511, "step": 1752 }, { "epoch": 0.12997701490324015, "grad_norm": 0.37425047159194946, "learning_rate": 0.00018251562890722682, "loss": 0.5373, "step": 1753 }, { "epoch": 0.130051160376659, "grad_norm": 0.3734097182750702, "learning_rate": 0.00018250562640660165, "loss": 0.5446, "step": 1754 }, { "epoch": 0.13012530585007787, "grad_norm": 0.37091243267059326, "learning_rate": 0.0001824956239059765, "loss": 0.5601, "step": 1755 }, { "epoch": 0.1301994513234967, "grad_norm": 0.36450156569480896, "learning_rate": 0.00018248562140535135, "loss": 0.535, "step": 1756 }, { "epoch": 0.13027359679691555, "grad_norm": 0.3838571012020111, "learning_rate": 0.0001824756189047262, "loss": 0.5357, "step": 1757 }, { "epoch": 0.13034774227033438, "grad_norm": 0.3905516266822815, "learning_rate": 0.00018246561640410103, "loss": 0.5705, "step": 1758 }, { "epoch": 0.13042188774375324, "grad_norm": 0.3865044116973877, "learning_rate": 0.00018245561390347587, "loss": 0.5235, "step": 1759 }, { "epoch": 0.1304960332171721, "grad_norm": 0.39038196206092834, "learning_rate": 0.00018244561140285073, "loss": 0.5637, "step": 1760 }, { "epoch": 0.13057017869059093, "grad_norm": 0.3761669397354126, "learning_rate": 0.00018243560890222557, "loss": 0.5405, "step": 1761 }, { "epoch": 0.13064432416400978, "grad_norm": 0.39746522903442383, "learning_rate": 0.0001824256064016004, "loss": 0.5792, "step": 1762 }, { "epoch": 0.13071846963742864, "grad_norm": 0.4239092171192169, "learning_rate": 0.00018241560390097524, "loss": 0.5923, "step": 1763 }, { "epoch": 0.13079261511084747, "grad_norm": 0.3783685266971588, "learning_rate": 0.0001824056014003501, "loss": 0.5437, "step": 1764 }, { "epoch": 0.13086676058426633, "grad_norm": 0.44566288590431213, "learning_rate": 0.00018239559889972494, "loss": 0.5697, "step": 1765 }, { "epoch": 0.13094090605768519, "grad_norm": 0.3911699950695038, "learning_rate": 0.00018238559639909978, "loss": 0.6149, "step": 1766 }, { "epoch": 0.13101505153110402, "grad_norm": 0.3754197359085083, "learning_rate": 0.00018237559389847462, "loss": 0.5758, "step": 1767 }, { "epoch": 0.13108919700452287, "grad_norm": 0.4379519522190094, "learning_rate": 0.00018236559139784948, "loss": 0.6335, "step": 1768 }, { "epoch": 0.13116334247794173, "grad_norm": 0.3721013367176056, "learning_rate": 0.00018235558889722432, "loss": 0.5614, "step": 1769 }, { "epoch": 0.13123748795136056, "grad_norm": 0.38077831268310547, "learning_rate": 0.00018234558639659916, "loss": 0.5889, "step": 1770 }, { "epoch": 0.13131163342477942, "grad_norm": 0.3898366093635559, "learning_rate": 0.000182335583895974, "loss": 0.6023, "step": 1771 }, { "epoch": 0.13138577889819827, "grad_norm": 0.36667126417160034, "learning_rate": 0.00018232558139534886, "loss": 0.5263, "step": 1772 }, { "epoch": 0.1314599243716171, "grad_norm": 0.3643749952316284, "learning_rate": 0.0001823155788947237, "loss": 0.5291, "step": 1773 }, { "epoch": 0.13153406984503596, "grad_norm": 0.4162447154521942, "learning_rate": 0.00018230557639409853, "loss": 0.6647, "step": 1774 }, { "epoch": 0.13160821531845482, "grad_norm": 0.38971400260925293, "learning_rate": 0.0001822955738934734, "loss": 0.5238, "step": 1775 }, { "epoch": 0.13168236079187365, "grad_norm": 0.3545731008052826, "learning_rate": 0.00018228557139284823, "loss": 0.538, "step": 1776 }, { "epoch": 0.1317565062652925, "grad_norm": 0.36139899492263794, "learning_rate": 0.00018227556889222307, "loss": 0.5597, "step": 1777 }, { "epoch": 0.13183065173871136, "grad_norm": 0.4195714592933655, "learning_rate": 0.0001822655663915979, "loss": 0.5515, "step": 1778 }, { "epoch": 0.1319047972121302, "grad_norm": 0.40056341886520386, "learning_rate": 0.00018225556389097277, "loss": 0.5906, "step": 1779 }, { "epoch": 0.13197894268554905, "grad_norm": 0.38163256645202637, "learning_rate": 0.00018224556139034758, "loss": 0.5648, "step": 1780 }, { "epoch": 0.1320530881589679, "grad_norm": 0.37200137972831726, "learning_rate": 0.00018223555888972244, "loss": 0.5151, "step": 1781 }, { "epoch": 0.13212723363238674, "grad_norm": 0.40364786982536316, "learning_rate": 0.00018222555638909728, "loss": 0.5555, "step": 1782 }, { "epoch": 0.1322013791058056, "grad_norm": 0.3883340656757355, "learning_rate": 0.00018221555388847215, "loss": 0.5824, "step": 1783 }, { "epoch": 0.13227552457922445, "grad_norm": 0.36195728182792664, "learning_rate": 0.00018220555138784696, "loss": 0.5246, "step": 1784 }, { "epoch": 0.13234967005264328, "grad_norm": 0.3751981854438782, "learning_rate": 0.00018219554888722182, "loss": 0.5636, "step": 1785 }, { "epoch": 0.13242381552606214, "grad_norm": 0.35236966609954834, "learning_rate": 0.00018218554638659666, "loss": 0.56, "step": 1786 }, { "epoch": 0.13249796099948097, "grad_norm": 0.3874991536140442, "learning_rate": 0.00018217554388597152, "loss": 0.5291, "step": 1787 }, { "epoch": 0.13257210647289983, "grad_norm": 0.37078696489334106, "learning_rate": 0.00018216554138534633, "loss": 0.5744, "step": 1788 }, { "epoch": 0.13264625194631868, "grad_norm": 0.4083297550678253, "learning_rate": 0.0001821555388847212, "loss": 0.5688, "step": 1789 }, { "epoch": 0.1327203974197375, "grad_norm": 0.35806432366371155, "learning_rate": 0.00018214553638409603, "loss": 0.5555, "step": 1790 }, { "epoch": 0.13279454289315637, "grad_norm": 0.3887023627758026, "learning_rate": 0.00018213553388347087, "loss": 0.5832, "step": 1791 }, { "epoch": 0.13286868836657523, "grad_norm": 0.4226858615875244, "learning_rate": 0.0001821255313828457, "loss": 0.5614, "step": 1792 }, { "epoch": 0.13294283383999406, "grad_norm": 0.37741735577583313, "learning_rate": 0.00018211552888222057, "loss": 0.5445, "step": 1793 }, { "epoch": 0.13301697931341291, "grad_norm": 0.368358314037323, "learning_rate": 0.0001821055263815954, "loss": 0.5738, "step": 1794 }, { "epoch": 0.13309112478683177, "grad_norm": 0.3444255590438843, "learning_rate": 0.00018209552388097025, "loss": 0.5285, "step": 1795 }, { "epoch": 0.1331652702602506, "grad_norm": 0.35821008682250977, "learning_rate": 0.00018208552138034508, "loss": 0.5727, "step": 1796 }, { "epoch": 0.13323941573366946, "grad_norm": 0.391743004322052, "learning_rate": 0.00018207551887971995, "loss": 0.5632, "step": 1797 }, { "epoch": 0.13331356120708832, "grad_norm": 0.37725502252578735, "learning_rate": 0.00018206551637909478, "loss": 0.5411, "step": 1798 }, { "epoch": 0.13338770668050715, "grad_norm": 0.3432353138923645, "learning_rate": 0.00018205551387846962, "loss": 0.5266, "step": 1799 }, { "epoch": 0.133461852153926, "grad_norm": 0.37906819581985474, "learning_rate": 0.00018204551137784446, "loss": 0.559, "step": 1800 }, { "epoch": 0.13353599762734486, "grad_norm": 0.4001822769641876, "learning_rate": 0.00018203550887721932, "loss": 0.5938, "step": 1801 }, { "epoch": 0.1336101431007637, "grad_norm": 0.3669690191745758, "learning_rate": 0.00018202550637659416, "loss": 0.521, "step": 1802 }, { "epoch": 0.13368428857418255, "grad_norm": 0.37229159474372864, "learning_rate": 0.000182015503875969, "loss": 0.572, "step": 1803 }, { "epoch": 0.1337584340476014, "grad_norm": 0.3698316514492035, "learning_rate": 0.00018200550137534383, "loss": 0.5154, "step": 1804 }, { "epoch": 0.13383257952102023, "grad_norm": 0.3814597427845001, "learning_rate": 0.0001819954988747187, "loss": 0.5126, "step": 1805 }, { "epoch": 0.1339067249944391, "grad_norm": 0.3880426287651062, "learning_rate": 0.00018198549637409353, "loss": 0.5807, "step": 1806 }, { "epoch": 0.13398087046785795, "grad_norm": 0.38152387738227844, "learning_rate": 0.00018197549387346837, "loss": 0.5938, "step": 1807 }, { "epoch": 0.13405501594127678, "grad_norm": 0.36311984062194824, "learning_rate": 0.0001819654913728432, "loss": 0.5746, "step": 1808 }, { "epoch": 0.13412916141469564, "grad_norm": 0.36933696269989014, "learning_rate": 0.00018195548887221807, "loss": 0.5405, "step": 1809 }, { "epoch": 0.1342033068881145, "grad_norm": 0.39937010407447815, "learning_rate": 0.0001819454863715929, "loss": 0.6033, "step": 1810 }, { "epoch": 0.13427745236153332, "grad_norm": 0.3385579586029053, "learning_rate": 0.00018193548387096775, "loss": 0.5304, "step": 1811 }, { "epoch": 0.13435159783495218, "grad_norm": 0.4368140399456024, "learning_rate": 0.0001819254813703426, "loss": 0.5927, "step": 1812 }, { "epoch": 0.13442574330837104, "grad_norm": 0.4108233153820038, "learning_rate": 0.00018191547886971745, "loss": 0.6287, "step": 1813 }, { "epoch": 0.13449988878178987, "grad_norm": 0.38325464725494385, "learning_rate": 0.00018190547636909229, "loss": 0.5757, "step": 1814 }, { "epoch": 0.13457403425520872, "grad_norm": 0.42363491654396057, "learning_rate": 0.00018189547386846712, "loss": 0.5723, "step": 1815 }, { "epoch": 0.13464817972862755, "grad_norm": 0.3916628658771515, "learning_rate": 0.000181885471367842, "loss": 0.5514, "step": 1816 }, { "epoch": 0.1347223252020464, "grad_norm": 0.37803885340690613, "learning_rate": 0.0001818754688672168, "loss": 0.5565, "step": 1817 }, { "epoch": 0.13479647067546527, "grad_norm": 0.40130335092544556, "learning_rate": 0.00018186546636659166, "loss": 0.5774, "step": 1818 }, { "epoch": 0.1348706161488841, "grad_norm": 0.36704444885253906, "learning_rate": 0.0001818554638659665, "loss": 0.5689, "step": 1819 }, { "epoch": 0.13494476162230296, "grad_norm": 0.3676425814628601, "learning_rate": 0.00018184546136534136, "loss": 0.5402, "step": 1820 }, { "epoch": 0.1350189070957218, "grad_norm": 0.366222083568573, "learning_rate": 0.00018183545886471617, "loss": 0.4974, "step": 1821 }, { "epoch": 0.13509305256914064, "grad_norm": 0.3774815797805786, "learning_rate": 0.00018182545636409104, "loss": 0.5545, "step": 1822 }, { "epoch": 0.1351671980425595, "grad_norm": 0.35941922664642334, "learning_rate": 0.00018181545386346587, "loss": 0.5099, "step": 1823 }, { "epoch": 0.13524134351597836, "grad_norm": 0.37007614970207214, "learning_rate": 0.00018180545136284074, "loss": 0.5315, "step": 1824 }, { "epoch": 0.1353154889893972, "grad_norm": 0.36980271339416504, "learning_rate": 0.00018179544886221555, "loss": 0.5183, "step": 1825 }, { "epoch": 0.13538963446281604, "grad_norm": 0.3618273138999939, "learning_rate": 0.0001817854463615904, "loss": 0.5195, "step": 1826 }, { "epoch": 0.1354637799362349, "grad_norm": 0.426099568605423, "learning_rate": 0.00018177544386096525, "loss": 0.5729, "step": 1827 }, { "epoch": 0.13553792540965373, "grad_norm": 0.3490959405899048, "learning_rate": 0.00018176544136034009, "loss": 0.5413, "step": 1828 }, { "epoch": 0.1356120708830726, "grad_norm": 0.36519160866737366, "learning_rate": 0.00018175543885971492, "loss": 0.5473, "step": 1829 }, { "epoch": 0.13568621635649145, "grad_norm": 0.43901145458221436, "learning_rate": 0.0001817454363590898, "loss": 0.5952, "step": 1830 }, { "epoch": 0.13576036182991028, "grad_norm": 0.38502418994903564, "learning_rate": 0.00018173543385846462, "loss": 0.5919, "step": 1831 }, { "epoch": 0.13583450730332913, "grad_norm": 0.3684796988964081, "learning_rate": 0.00018172543135783946, "loss": 0.6022, "step": 1832 }, { "epoch": 0.135908652776748, "grad_norm": 0.36804354190826416, "learning_rate": 0.0001817154288572143, "loss": 0.5927, "step": 1833 }, { "epoch": 0.13598279825016682, "grad_norm": 0.3677428364753723, "learning_rate": 0.00018170542635658916, "loss": 0.5644, "step": 1834 }, { "epoch": 0.13605694372358568, "grad_norm": 0.37706810235977173, "learning_rate": 0.000181695423855964, "loss": 0.5435, "step": 1835 }, { "epoch": 0.13613108919700453, "grad_norm": 0.4095498323440552, "learning_rate": 0.00018168542135533884, "loss": 0.6045, "step": 1836 }, { "epoch": 0.13620523467042336, "grad_norm": 0.39587077498435974, "learning_rate": 0.00018167541885471367, "loss": 0.5453, "step": 1837 }, { "epoch": 0.13627938014384222, "grad_norm": 0.3494703471660614, "learning_rate": 0.00018166541635408854, "loss": 0.5639, "step": 1838 }, { "epoch": 0.13635352561726108, "grad_norm": 0.3867640197277069, "learning_rate": 0.00018165541385346338, "loss": 0.6018, "step": 1839 }, { "epoch": 0.1364276710906799, "grad_norm": 0.36481973528862, "learning_rate": 0.0001816454113528382, "loss": 0.5847, "step": 1840 }, { "epoch": 0.13650181656409877, "grad_norm": 0.36716899275779724, "learning_rate": 0.00018163540885221305, "loss": 0.5569, "step": 1841 }, { "epoch": 0.13657596203751762, "grad_norm": 0.39225128293037415, "learning_rate": 0.00018162540635158791, "loss": 0.5813, "step": 1842 }, { "epoch": 0.13665010751093645, "grad_norm": 0.3770802915096283, "learning_rate": 0.00018161540385096275, "loss": 0.5812, "step": 1843 }, { "epoch": 0.1367242529843553, "grad_norm": 0.36439964175224304, "learning_rate": 0.0001816054013503376, "loss": 0.5349, "step": 1844 }, { "epoch": 0.13679839845777414, "grad_norm": 0.379311203956604, "learning_rate": 0.00018159539884971245, "loss": 0.5655, "step": 1845 }, { "epoch": 0.136872543931193, "grad_norm": 0.42362120747566223, "learning_rate": 0.0001815853963490873, "loss": 0.5591, "step": 1846 }, { "epoch": 0.13694668940461185, "grad_norm": 0.3605532944202423, "learning_rate": 0.00018157539384846213, "loss": 0.5326, "step": 1847 }, { "epoch": 0.13702083487803068, "grad_norm": 0.388636976480484, "learning_rate": 0.00018156539134783696, "loss": 0.5491, "step": 1848 }, { "epoch": 0.13709498035144954, "grad_norm": 0.3531862497329712, "learning_rate": 0.00018155538884721183, "loss": 0.5436, "step": 1849 }, { "epoch": 0.1371691258248684, "grad_norm": 0.38272202014923096, "learning_rate": 0.00018154538634658666, "loss": 0.6267, "step": 1850 }, { "epoch": 0.13724327129828723, "grad_norm": 0.37087705731391907, "learning_rate": 0.0001815353838459615, "loss": 0.5366, "step": 1851 }, { "epoch": 0.13731741677170609, "grad_norm": 0.3761844336986542, "learning_rate": 0.00018152538134533634, "loss": 0.582, "step": 1852 }, { "epoch": 0.13739156224512494, "grad_norm": 0.36111828684806824, "learning_rate": 0.0001815153788447112, "loss": 0.5422, "step": 1853 }, { "epoch": 0.13746570771854377, "grad_norm": 0.3892499804496765, "learning_rate": 0.000181505376344086, "loss": 0.5464, "step": 1854 }, { "epoch": 0.13753985319196263, "grad_norm": 0.35849180817604065, "learning_rate": 0.00018149537384346088, "loss": 0.5138, "step": 1855 }, { "epoch": 0.1376139986653815, "grad_norm": 0.39428120851516724, "learning_rate": 0.00018148537134283571, "loss": 0.6106, "step": 1856 }, { "epoch": 0.13768814413880032, "grad_norm": 0.4038977026939392, "learning_rate": 0.00018147536884221058, "loss": 0.539, "step": 1857 }, { "epoch": 0.13776228961221917, "grad_norm": 0.3344937264919281, "learning_rate": 0.0001814653663415854, "loss": 0.4929, "step": 1858 }, { "epoch": 0.13783643508563803, "grad_norm": 0.41131895780563354, "learning_rate": 0.00018145536384096025, "loss": 0.5977, "step": 1859 }, { "epoch": 0.13791058055905686, "grad_norm": 0.34788748621940613, "learning_rate": 0.0001814453613403351, "loss": 0.5259, "step": 1860 }, { "epoch": 0.13798472603247572, "grad_norm": 0.3771216571331024, "learning_rate": 0.00018143535883970995, "loss": 0.5332, "step": 1861 }, { "epoch": 0.13805887150589458, "grad_norm": 0.3566390573978424, "learning_rate": 0.00018142535633908476, "loss": 0.5433, "step": 1862 }, { "epoch": 0.1381330169793134, "grad_norm": 0.37929680943489075, "learning_rate": 0.00018141535383845963, "loss": 0.58, "step": 1863 }, { "epoch": 0.13820716245273226, "grad_norm": 0.4094679355621338, "learning_rate": 0.00018140535133783447, "loss": 0.5907, "step": 1864 }, { "epoch": 0.13828130792615112, "grad_norm": 0.37660476565361023, "learning_rate": 0.0001813953488372093, "loss": 0.5536, "step": 1865 }, { "epoch": 0.13835545339956995, "grad_norm": 0.4104023277759552, "learning_rate": 0.00018138534633658414, "loss": 0.5516, "step": 1866 }, { "epoch": 0.1384295988729888, "grad_norm": 0.35966211557388306, "learning_rate": 0.000181375343835959, "loss": 0.5486, "step": 1867 }, { "epoch": 0.13850374434640766, "grad_norm": 0.38000109791755676, "learning_rate": 0.00018136534133533384, "loss": 0.5698, "step": 1868 }, { "epoch": 0.1385778898198265, "grad_norm": 0.3899790644645691, "learning_rate": 0.00018135533883470868, "loss": 0.5615, "step": 1869 }, { "epoch": 0.13865203529324535, "grad_norm": 0.3855242133140564, "learning_rate": 0.00018134533633408351, "loss": 0.5634, "step": 1870 }, { "epoch": 0.1387261807666642, "grad_norm": 0.36351510882377625, "learning_rate": 0.00018133533383345838, "loss": 0.5349, "step": 1871 }, { "epoch": 0.13880032624008304, "grad_norm": 0.38920605182647705, "learning_rate": 0.00018132533133283322, "loss": 0.5672, "step": 1872 }, { "epoch": 0.1388744717135019, "grad_norm": 0.3780278265476227, "learning_rate": 0.00018131532883220805, "loss": 0.5611, "step": 1873 }, { "epoch": 0.13894861718692073, "grad_norm": 0.34601521492004395, "learning_rate": 0.0001813053263315829, "loss": 0.5198, "step": 1874 }, { "epoch": 0.13902276266033958, "grad_norm": 0.3687717020511627, "learning_rate": 0.00018129532383095775, "loss": 0.5352, "step": 1875 }, { "epoch": 0.13909690813375844, "grad_norm": 0.3611721396446228, "learning_rate": 0.0001812853213303326, "loss": 0.5455, "step": 1876 }, { "epoch": 0.13917105360717727, "grad_norm": 0.38835540413856506, "learning_rate": 0.00018127531882970743, "loss": 0.5678, "step": 1877 }, { "epoch": 0.13924519908059613, "grad_norm": 0.37489816546440125, "learning_rate": 0.0001812653163290823, "loss": 0.5797, "step": 1878 }, { "epoch": 0.13931934455401498, "grad_norm": 0.38962072134017944, "learning_rate": 0.00018125531382845713, "loss": 0.5718, "step": 1879 }, { "epoch": 0.13939349002743381, "grad_norm": 0.38869866728782654, "learning_rate": 0.00018124531132783197, "loss": 0.5941, "step": 1880 }, { "epoch": 0.13946763550085267, "grad_norm": 0.3989139795303345, "learning_rate": 0.0001812353088272068, "loss": 0.551, "step": 1881 }, { "epoch": 0.13954178097427153, "grad_norm": 0.35794007778167725, "learning_rate": 0.00018122530632658167, "loss": 0.5447, "step": 1882 }, { "epoch": 0.13961592644769036, "grad_norm": 0.3779074549674988, "learning_rate": 0.0001812153038259565, "loss": 0.5565, "step": 1883 }, { "epoch": 0.13969007192110922, "grad_norm": 0.3649229407310486, "learning_rate": 0.00018120530132533134, "loss": 0.5376, "step": 1884 }, { "epoch": 0.13976421739452807, "grad_norm": 0.352299302816391, "learning_rate": 0.00018119529882470618, "loss": 0.5337, "step": 1885 }, { "epoch": 0.1398383628679469, "grad_norm": 0.37630128860473633, "learning_rate": 0.00018118529632408104, "loss": 0.5298, "step": 1886 }, { "epoch": 0.13991250834136576, "grad_norm": 0.38069722056388855, "learning_rate": 0.00018117529382345588, "loss": 0.5453, "step": 1887 }, { "epoch": 0.13998665381478462, "grad_norm": 0.43126180768013, "learning_rate": 0.00018116529132283072, "loss": 0.5742, "step": 1888 }, { "epoch": 0.14006079928820345, "grad_norm": 0.4194243848323822, "learning_rate": 0.00018115528882220555, "loss": 0.5772, "step": 1889 }, { "epoch": 0.1401349447616223, "grad_norm": 0.40236666798591614, "learning_rate": 0.00018114528632158042, "loss": 0.5468, "step": 1890 }, { "epoch": 0.14020909023504116, "grad_norm": 0.4010073244571686, "learning_rate": 0.00018113528382095523, "loss": 0.5735, "step": 1891 }, { "epoch": 0.14028323570846, "grad_norm": 0.4006150960922241, "learning_rate": 0.0001811252813203301, "loss": 0.5182, "step": 1892 }, { "epoch": 0.14035738118187885, "grad_norm": 0.39853718876838684, "learning_rate": 0.00018111527881970493, "loss": 0.5765, "step": 1893 }, { "epoch": 0.1404315266552977, "grad_norm": 0.36202964186668396, "learning_rate": 0.0001811052763190798, "loss": 0.5733, "step": 1894 }, { "epoch": 0.14050567212871654, "grad_norm": 0.3901797831058502, "learning_rate": 0.0001810952738184546, "loss": 0.5971, "step": 1895 }, { "epoch": 0.1405798176021354, "grad_norm": 0.384635329246521, "learning_rate": 0.00018108527131782947, "loss": 0.5824, "step": 1896 }, { "epoch": 0.14065396307555425, "grad_norm": 0.3440004885196686, "learning_rate": 0.0001810752688172043, "loss": 0.5224, "step": 1897 }, { "epoch": 0.14072810854897308, "grad_norm": 0.37220996618270874, "learning_rate": 0.00018106526631657917, "loss": 0.5683, "step": 1898 }, { "epoch": 0.14080225402239194, "grad_norm": 0.34421542286872864, "learning_rate": 0.00018105526381595398, "loss": 0.548, "step": 1899 }, { "epoch": 0.1408763994958108, "grad_norm": 0.36090707778930664, "learning_rate": 0.00018104526131532884, "loss": 0.5121, "step": 1900 }, { "epoch": 0.14095054496922962, "grad_norm": 0.3953814208507538, "learning_rate": 0.00018103525881470368, "loss": 0.5448, "step": 1901 }, { "epoch": 0.14102469044264848, "grad_norm": 0.4298102557659149, "learning_rate": 0.00018102525631407852, "loss": 0.5421, "step": 1902 }, { "epoch": 0.1410988359160673, "grad_norm": 0.35320955514907837, "learning_rate": 0.00018101525381345336, "loss": 0.5174, "step": 1903 }, { "epoch": 0.14117298138948617, "grad_norm": 0.3475368320941925, "learning_rate": 0.00018100525131282822, "loss": 0.544, "step": 1904 }, { "epoch": 0.14124712686290503, "grad_norm": 0.3598816394805908, "learning_rate": 0.00018099524881220306, "loss": 0.5143, "step": 1905 }, { "epoch": 0.14132127233632386, "grad_norm": 0.3980805277824402, "learning_rate": 0.0001809852463115779, "loss": 0.6244, "step": 1906 }, { "epoch": 0.1413954178097427, "grad_norm": 0.39928773045539856, "learning_rate": 0.00018097524381095273, "loss": 0.5619, "step": 1907 }, { "epoch": 0.14146956328316157, "grad_norm": 0.3820854723453522, "learning_rate": 0.0001809652413103276, "loss": 0.555, "step": 1908 }, { "epoch": 0.1415437087565804, "grad_norm": 0.3676750957965851, "learning_rate": 0.00018095523880970243, "loss": 0.528, "step": 1909 }, { "epoch": 0.14161785422999926, "grad_norm": 0.4038364887237549, "learning_rate": 0.00018094523630907727, "loss": 0.5432, "step": 1910 }, { "epoch": 0.14169199970341811, "grad_norm": 0.38189414143562317, "learning_rate": 0.00018093523380845213, "loss": 0.5775, "step": 1911 }, { "epoch": 0.14176614517683694, "grad_norm": 0.3742334246635437, "learning_rate": 0.00018092523130782697, "loss": 0.5538, "step": 1912 }, { "epoch": 0.1418402906502558, "grad_norm": 0.3724448084831238, "learning_rate": 0.0001809152288072018, "loss": 0.5519, "step": 1913 }, { "epoch": 0.14191443612367466, "grad_norm": 0.38440194725990295, "learning_rate": 0.00018090522630657664, "loss": 0.5796, "step": 1914 }, { "epoch": 0.1419885815970935, "grad_norm": 0.3533400595188141, "learning_rate": 0.0001808952238059515, "loss": 0.573, "step": 1915 }, { "epoch": 0.14206272707051235, "grad_norm": 0.3665550649166107, "learning_rate": 0.00018088522130532635, "loss": 0.5541, "step": 1916 }, { "epoch": 0.1421368725439312, "grad_norm": 0.3527677357196808, "learning_rate": 0.00018087521880470118, "loss": 0.5297, "step": 1917 }, { "epoch": 0.14221101801735003, "grad_norm": 0.3662387728691101, "learning_rate": 0.00018086521630407602, "loss": 0.5516, "step": 1918 }, { "epoch": 0.1422851634907689, "grad_norm": 0.3618030548095703, "learning_rate": 0.00018085521380345088, "loss": 0.5265, "step": 1919 }, { "epoch": 0.14235930896418775, "grad_norm": 0.372715562582016, "learning_rate": 0.00018084521130282572, "loss": 0.554, "step": 1920 }, { "epoch": 0.14243345443760658, "grad_norm": 0.39177632331848145, "learning_rate": 0.00018083520880220056, "loss": 0.5169, "step": 1921 }, { "epoch": 0.14250759991102543, "grad_norm": 0.3632829785346985, "learning_rate": 0.0001808252063015754, "loss": 0.5781, "step": 1922 }, { "epoch": 0.1425817453844443, "grad_norm": 0.3932723104953766, "learning_rate": 0.00018081520380095026, "loss": 0.5797, "step": 1923 }, { "epoch": 0.14265589085786312, "grad_norm": 0.38953831791877747, "learning_rate": 0.0001808052013003251, "loss": 0.5548, "step": 1924 }, { "epoch": 0.14273003633128198, "grad_norm": 0.37579602003097534, "learning_rate": 0.00018079519879969993, "loss": 0.5459, "step": 1925 }, { "epoch": 0.14280418180470084, "grad_norm": 0.34270891547203064, "learning_rate": 0.00018078519629907477, "loss": 0.5145, "step": 1926 }, { "epoch": 0.14287832727811967, "grad_norm": 0.3691083788871765, "learning_rate": 0.00018077519379844964, "loss": 0.5662, "step": 1927 }, { "epoch": 0.14295247275153852, "grad_norm": 0.3592486083507538, "learning_rate": 0.00018076519129782445, "loss": 0.5609, "step": 1928 }, { "epoch": 0.14302661822495735, "grad_norm": 0.3926646113395691, "learning_rate": 0.0001807551887971993, "loss": 0.5873, "step": 1929 }, { "epoch": 0.1431007636983762, "grad_norm": 0.41332167387008667, "learning_rate": 0.00018074518629657415, "loss": 0.6394, "step": 1930 }, { "epoch": 0.14317490917179507, "grad_norm": 0.37225142121315, "learning_rate": 0.000180735183795949, "loss": 0.5859, "step": 1931 }, { "epoch": 0.1432490546452139, "grad_norm": 0.3750663995742798, "learning_rate": 0.00018072518129532382, "loss": 0.5622, "step": 1932 }, { "epoch": 0.14332320011863275, "grad_norm": 0.38627952337265015, "learning_rate": 0.00018071517879469869, "loss": 0.5768, "step": 1933 }, { "epoch": 0.1433973455920516, "grad_norm": 0.35804665088653564, "learning_rate": 0.00018070517629407352, "loss": 0.5415, "step": 1934 }, { "epoch": 0.14347149106547044, "grad_norm": 0.39695072174072266, "learning_rate": 0.00018069517379344839, "loss": 0.5506, "step": 1935 }, { "epoch": 0.1435456365388893, "grad_norm": 0.35636693239212036, "learning_rate": 0.0001806851712928232, "loss": 0.5522, "step": 1936 }, { "epoch": 0.14361978201230816, "grad_norm": 0.35115939378738403, "learning_rate": 0.00018067516879219806, "loss": 0.5609, "step": 1937 }, { "epoch": 0.14369392748572699, "grad_norm": 0.3726571202278137, "learning_rate": 0.0001806651662915729, "loss": 0.5477, "step": 1938 }, { "epoch": 0.14376807295914584, "grad_norm": 0.4001075029373169, "learning_rate": 0.00018065516379094773, "loss": 0.5614, "step": 1939 }, { "epoch": 0.1438422184325647, "grad_norm": 0.37009096145629883, "learning_rate": 0.00018064516129032257, "loss": 0.602, "step": 1940 }, { "epoch": 0.14391636390598353, "grad_norm": 0.342112272977829, "learning_rate": 0.00018063515878969744, "loss": 0.5285, "step": 1941 }, { "epoch": 0.1439905093794024, "grad_norm": 0.37203991413116455, "learning_rate": 0.00018062515628907227, "loss": 0.5622, "step": 1942 }, { "epoch": 0.14406465485282124, "grad_norm": 0.3509122133255005, "learning_rate": 0.0001806151537884471, "loss": 0.5164, "step": 1943 }, { "epoch": 0.14413880032624007, "grad_norm": 0.3788917064666748, "learning_rate": 0.00018060515128782197, "loss": 0.5552, "step": 1944 }, { "epoch": 0.14421294579965893, "grad_norm": 0.36137378215789795, "learning_rate": 0.0001805951487871968, "loss": 0.5418, "step": 1945 }, { "epoch": 0.1442870912730778, "grad_norm": 0.3685062825679779, "learning_rate": 0.00018058514628657165, "loss": 0.5315, "step": 1946 }, { "epoch": 0.14436123674649662, "grad_norm": 0.39516881108283997, "learning_rate": 0.00018057514378594649, "loss": 0.5729, "step": 1947 }, { "epoch": 0.14443538221991548, "grad_norm": 0.3774047791957855, "learning_rate": 0.00018056514128532135, "loss": 0.5295, "step": 1948 }, { "epoch": 0.14450952769333433, "grad_norm": 0.3881818950176239, "learning_rate": 0.0001805551387846962, "loss": 0.5697, "step": 1949 }, { "epoch": 0.14458367316675316, "grad_norm": 0.3653724193572998, "learning_rate": 0.00018054513628407102, "loss": 0.5506, "step": 1950 }, { "epoch": 0.14465781864017202, "grad_norm": 0.3771563172340393, "learning_rate": 0.00018053513378344586, "loss": 0.5703, "step": 1951 }, { "epoch": 0.14473196411359088, "grad_norm": 0.4109928011894226, "learning_rate": 0.00018052513128282073, "loss": 0.5449, "step": 1952 }, { "epoch": 0.1448061095870097, "grad_norm": 0.38120585680007935, "learning_rate": 0.00018051512878219556, "loss": 0.5092, "step": 1953 }, { "epoch": 0.14488025506042856, "grad_norm": 0.4049600660800934, "learning_rate": 0.0001805051262815704, "loss": 0.567, "step": 1954 }, { "epoch": 0.14495440053384742, "grad_norm": 0.3755863606929779, "learning_rate": 0.00018049512378094524, "loss": 0.5492, "step": 1955 }, { "epoch": 0.14502854600726625, "grad_norm": 0.39165449142456055, "learning_rate": 0.0001804851212803201, "loss": 0.5663, "step": 1956 }, { "epoch": 0.1451026914806851, "grad_norm": 0.40745213627815247, "learning_rate": 0.00018047511877969494, "loss": 0.5521, "step": 1957 }, { "epoch": 0.14517683695410394, "grad_norm": 0.37278011441230774, "learning_rate": 0.00018046511627906977, "loss": 0.5555, "step": 1958 }, { "epoch": 0.1452509824275228, "grad_norm": 0.40239056944847107, "learning_rate": 0.0001804551137784446, "loss": 0.5632, "step": 1959 }, { "epoch": 0.14532512790094165, "grad_norm": 0.3688340187072754, "learning_rate": 0.00018044511127781948, "loss": 0.5954, "step": 1960 }, { "epoch": 0.14539927337436048, "grad_norm": 0.3621130585670471, "learning_rate": 0.0001804351087771943, "loss": 0.545, "step": 1961 }, { "epoch": 0.14547341884777934, "grad_norm": 0.36264336109161377, "learning_rate": 0.00018042510627656915, "loss": 0.5448, "step": 1962 }, { "epoch": 0.1455475643211982, "grad_norm": 0.38852375745773315, "learning_rate": 0.000180415103775944, "loss": 0.533, "step": 1963 }, { "epoch": 0.14562170979461703, "grad_norm": 0.410462349653244, "learning_rate": 0.00018040510127531885, "loss": 0.5486, "step": 1964 }, { "epoch": 0.14569585526803588, "grad_norm": 0.3575645685195923, "learning_rate": 0.00018039509877469366, "loss": 0.5887, "step": 1965 }, { "epoch": 0.14577000074145474, "grad_norm": 0.36553460359573364, "learning_rate": 0.00018038509627406853, "loss": 0.5527, "step": 1966 }, { "epoch": 0.14584414621487357, "grad_norm": 0.39923396706581116, "learning_rate": 0.00018037509377344336, "loss": 0.5618, "step": 1967 }, { "epoch": 0.14591829168829243, "grad_norm": 0.39755961298942566, "learning_rate": 0.00018036509127281823, "loss": 0.5753, "step": 1968 }, { "epoch": 0.14599243716171129, "grad_norm": 0.35844525694847107, "learning_rate": 0.00018035508877219304, "loss": 0.5852, "step": 1969 }, { "epoch": 0.14606658263513012, "grad_norm": 0.38421183824539185, "learning_rate": 0.0001803450862715679, "loss": 0.5553, "step": 1970 }, { "epoch": 0.14614072810854897, "grad_norm": 0.368292897939682, "learning_rate": 0.00018033508377094274, "loss": 0.5017, "step": 1971 }, { "epoch": 0.14621487358196783, "grad_norm": 0.4172115921974182, "learning_rate": 0.0001803250812703176, "loss": 0.5211, "step": 1972 }, { "epoch": 0.14628901905538666, "grad_norm": 0.3593875467777252, "learning_rate": 0.0001803150787696924, "loss": 0.4962, "step": 1973 }, { "epoch": 0.14636316452880552, "grad_norm": 0.3520998954772949, "learning_rate": 0.00018030507626906728, "loss": 0.5485, "step": 1974 }, { "epoch": 0.14643731000222437, "grad_norm": 0.3849192261695862, "learning_rate": 0.00018029507376844211, "loss": 0.5154, "step": 1975 }, { "epoch": 0.1465114554756432, "grad_norm": 0.3967880606651306, "learning_rate": 0.00018028507126781695, "loss": 0.5471, "step": 1976 }, { "epoch": 0.14658560094906206, "grad_norm": 0.36546921730041504, "learning_rate": 0.0001802750687671918, "loss": 0.514, "step": 1977 }, { "epoch": 0.14665974642248092, "grad_norm": 0.3822888433933258, "learning_rate": 0.00018026506626656665, "loss": 0.5383, "step": 1978 }, { "epoch": 0.14673389189589975, "grad_norm": 0.39203473925590515, "learning_rate": 0.0001802550637659415, "loss": 0.5653, "step": 1979 }, { "epoch": 0.1468080373693186, "grad_norm": 0.3746464252471924, "learning_rate": 0.00018024506126531633, "loss": 0.5419, "step": 1980 }, { "epoch": 0.14688218284273746, "grad_norm": 0.35456156730651855, "learning_rate": 0.0001802350587646912, "loss": 0.529, "step": 1981 }, { "epoch": 0.1469563283161563, "grad_norm": 0.36709293723106384, "learning_rate": 0.00018022505626406603, "loss": 0.5374, "step": 1982 }, { "epoch": 0.14703047378957515, "grad_norm": 0.387584924697876, "learning_rate": 0.00018021505376344086, "loss": 0.5558, "step": 1983 }, { "epoch": 0.147104619262994, "grad_norm": 0.40649932622909546, "learning_rate": 0.0001802050512628157, "loss": 0.5902, "step": 1984 }, { "epoch": 0.14717876473641284, "grad_norm": 0.36484473943710327, "learning_rate": 0.00018019504876219057, "loss": 0.5849, "step": 1985 }, { "epoch": 0.1472529102098317, "grad_norm": 0.38724827766418457, "learning_rate": 0.0001801850462615654, "loss": 0.5485, "step": 1986 }, { "epoch": 0.14732705568325052, "grad_norm": 0.3844332993030548, "learning_rate": 0.00018017504376094027, "loss": 0.5738, "step": 1987 }, { "epoch": 0.14740120115666938, "grad_norm": 0.3752062916755676, "learning_rate": 0.00018016504126031508, "loss": 0.5835, "step": 1988 }, { "epoch": 0.14747534663008824, "grad_norm": 0.3581080138683319, "learning_rate": 0.00018015503875968994, "loss": 0.5064, "step": 1989 }, { "epoch": 0.14754949210350707, "grad_norm": 0.3786541819572449, "learning_rate": 0.00018014503625906478, "loss": 0.6068, "step": 1990 }, { "epoch": 0.14762363757692593, "grad_norm": 0.4005014896392822, "learning_rate": 0.00018013503375843962, "loss": 0.5832, "step": 1991 }, { "epoch": 0.14769778305034478, "grad_norm": 0.38475191593170166, "learning_rate": 0.00018012503125781445, "loss": 0.5484, "step": 1992 }, { "epoch": 0.1477719285237636, "grad_norm": 0.3633044362068176, "learning_rate": 0.00018011502875718932, "loss": 0.5272, "step": 1993 }, { "epoch": 0.14784607399718247, "grad_norm": 0.3707912564277649, "learning_rate": 0.00018010502625656415, "loss": 0.5255, "step": 1994 }, { "epoch": 0.14792021947060133, "grad_norm": 0.38373711705207825, "learning_rate": 0.000180095023755939, "loss": 0.5675, "step": 1995 }, { "epoch": 0.14799436494402016, "grad_norm": 0.3564542829990387, "learning_rate": 0.00018008502125531383, "loss": 0.5646, "step": 1996 }, { "epoch": 0.14806851041743901, "grad_norm": 0.383382111787796, "learning_rate": 0.0001800750187546887, "loss": 0.5236, "step": 1997 }, { "epoch": 0.14814265589085787, "grad_norm": 0.37467747926712036, "learning_rate": 0.00018006501625406353, "loss": 0.5838, "step": 1998 }, { "epoch": 0.1482168013642767, "grad_norm": 0.38913655281066895, "learning_rate": 0.00018005501375343837, "loss": 0.5351, "step": 1999 }, { "epoch": 0.14829094683769556, "grad_norm": 0.37637799978256226, "learning_rate": 0.0001800450112528132, "loss": 0.5339, "step": 2000 }, { "epoch": 0.14836509231111442, "grad_norm": 0.3739481270313263, "learning_rate": 0.00018003500875218807, "loss": 0.5028, "step": 2001 }, { "epoch": 0.14843923778453325, "grad_norm": 0.38191550970077515, "learning_rate": 0.00018002500625156288, "loss": 0.5404, "step": 2002 }, { "epoch": 0.1485133832579521, "grad_norm": 0.36631542444229126, "learning_rate": 0.00018001500375093774, "loss": 0.5563, "step": 2003 }, { "epoch": 0.14858752873137096, "grad_norm": 0.42167651653289795, "learning_rate": 0.00018000500125031258, "loss": 0.5672, "step": 2004 }, { "epoch": 0.1486616742047898, "grad_norm": 0.38440635800361633, "learning_rate": 0.00017999499874968744, "loss": 0.5543, "step": 2005 }, { "epoch": 0.14873581967820865, "grad_norm": 0.3520413041114807, "learning_rate": 0.00017998499624906225, "loss": 0.533, "step": 2006 }, { "epoch": 0.1488099651516275, "grad_norm": 0.3806062638759613, "learning_rate": 0.00017997499374843712, "loss": 0.5486, "step": 2007 }, { "epoch": 0.14888411062504633, "grad_norm": 0.35067734122276306, "learning_rate": 0.00017996499124781195, "loss": 0.5102, "step": 2008 }, { "epoch": 0.1489582560984652, "grad_norm": 0.37563884258270264, "learning_rate": 0.00017995498874718682, "loss": 0.5709, "step": 2009 }, { "epoch": 0.14903240157188405, "grad_norm": 0.3586774468421936, "learning_rate": 0.00017994498624656163, "loss": 0.5606, "step": 2010 }, { "epoch": 0.14910654704530288, "grad_norm": 0.4085915982723236, "learning_rate": 0.0001799349837459365, "loss": 0.5492, "step": 2011 }, { "epoch": 0.14918069251872174, "grad_norm": 0.3555697500705719, "learning_rate": 0.00017992498124531133, "loss": 0.5343, "step": 2012 }, { "epoch": 0.1492548379921406, "grad_norm": 0.4014168083667755, "learning_rate": 0.0001799149787446862, "loss": 0.5987, "step": 2013 }, { "epoch": 0.14932898346555942, "grad_norm": 0.3703691363334656, "learning_rate": 0.00017990497624406103, "loss": 0.5711, "step": 2014 }, { "epoch": 0.14940312893897828, "grad_norm": 0.36629393696784973, "learning_rate": 0.00017989497374343587, "loss": 0.5879, "step": 2015 }, { "epoch": 0.1494772744123971, "grad_norm": 0.3641664385795593, "learning_rate": 0.0001798849712428107, "loss": 0.5393, "step": 2016 }, { "epoch": 0.14955141988581597, "grad_norm": 0.3931787610054016, "learning_rate": 0.00017987496874218554, "loss": 0.5831, "step": 2017 }, { "epoch": 0.14962556535923482, "grad_norm": 0.4258120357990265, "learning_rate": 0.0001798649662415604, "loss": 0.5477, "step": 2018 }, { "epoch": 0.14969971083265365, "grad_norm": 0.368003249168396, "learning_rate": 0.00017985496374093524, "loss": 0.5407, "step": 2019 }, { "epoch": 0.1497738563060725, "grad_norm": 0.3676373064517975, "learning_rate": 0.0001798449612403101, "loss": 0.5883, "step": 2020 }, { "epoch": 0.14984800177949137, "grad_norm": 0.39342162013053894, "learning_rate": 0.00017983495873968492, "loss": 0.5409, "step": 2021 }, { "epoch": 0.1499221472529102, "grad_norm": 0.42041096091270447, "learning_rate": 0.00017982495623905978, "loss": 0.5533, "step": 2022 }, { "epoch": 0.14999629272632906, "grad_norm": 0.36020681262016296, "learning_rate": 0.00017981495373843462, "loss": 0.5182, "step": 2023 }, { "epoch": 0.1500704381997479, "grad_norm": 0.37568822503089905, "learning_rate": 0.00017980495123780948, "loss": 0.5222, "step": 2024 }, { "epoch": 0.15014458367316674, "grad_norm": 0.3604893088340759, "learning_rate": 0.0001797949487371843, "loss": 0.5305, "step": 2025 }, { "epoch": 0.1502187291465856, "grad_norm": 0.45661666989326477, "learning_rate": 0.00017978494623655916, "loss": 0.5154, "step": 2026 }, { "epoch": 0.15029287462000446, "grad_norm": 0.38480857014656067, "learning_rate": 0.000179774943735934, "loss": 0.5193, "step": 2027 }, { "epoch": 0.1503670200934233, "grad_norm": 0.3757968842983246, "learning_rate": 0.00017976494123530883, "loss": 0.5399, "step": 2028 }, { "epoch": 0.15044116556684214, "grad_norm": 0.3818523585796356, "learning_rate": 0.00017975493873468367, "loss": 0.585, "step": 2029 }, { "epoch": 0.150515311040261, "grad_norm": 0.4258759021759033, "learning_rate": 0.00017974493623405853, "loss": 0.6421, "step": 2030 }, { "epoch": 0.15058945651367983, "grad_norm": 0.3734185993671417, "learning_rate": 0.00017973493373343337, "loss": 0.5396, "step": 2031 }, { "epoch": 0.1506636019870987, "grad_norm": 0.3947763442993164, "learning_rate": 0.0001797249312328082, "loss": 0.6271, "step": 2032 }, { "epoch": 0.15073774746051755, "grad_norm": 0.36548829078674316, "learning_rate": 0.00017971492873218304, "loss": 0.5306, "step": 2033 }, { "epoch": 0.15081189293393638, "grad_norm": 0.3726060688495636, "learning_rate": 0.0001797049262315579, "loss": 0.5283, "step": 2034 }, { "epoch": 0.15088603840735523, "grad_norm": 0.37183481454849243, "learning_rate": 0.00017969492373093275, "loss": 0.5754, "step": 2035 }, { "epoch": 0.1509601838807741, "grad_norm": 0.34594273567199707, "learning_rate": 0.00017968492123030758, "loss": 0.5146, "step": 2036 }, { "epoch": 0.15103432935419292, "grad_norm": 0.3375546932220459, "learning_rate": 0.00017967491872968242, "loss": 0.4846, "step": 2037 }, { "epoch": 0.15110847482761178, "grad_norm": 0.3965754806995392, "learning_rate": 0.00017966491622905728, "loss": 0.5619, "step": 2038 }, { "epoch": 0.15118262030103063, "grad_norm": 0.35951754450798035, "learning_rate": 0.0001796549137284321, "loss": 0.5603, "step": 2039 }, { "epoch": 0.15125676577444946, "grad_norm": 0.43008285760879517, "learning_rate": 0.00017964491122780696, "loss": 0.5685, "step": 2040 }, { "epoch": 0.15133091124786832, "grad_norm": 0.36666738986968994, "learning_rate": 0.0001796349087271818, "loss": 0.5532, "step": 2041 }, { "epoch": 0.15140505672128718, "grad_norm": 0.3643645942211151, "learning_rate": 0.00017962490622655666, "loss": 0.5227, "step": 2042 }, { "epoch": 0.151479202194706, "grad_norm": 0.3751247227191925, "learning_rate": 0.00017961490372593147, "loss": 0.5718, "step": 2043 }, { "epoch": 0.15155334766812487, "grad_norm": 0.37440726161003113, "learning_rate": 0.00017960490122530633, "loss": 0.536, "step": 2044 }, { "epoch": 0.1516274931415437, "grad_norm": 0.3943290710449219, "learning_rate": 0.00017959489872468117, "loss": 0.5394, "step": 2045 }, { "epoch": 0.15170163861496255, "grad_norm": 0.3802017569541931, "learning_rate": 0.00017958489622405604, "loss": 0.5653, "step": 2046 }, { "epoch": 0.1517757840883814, "grad_norm": 0.3594210147857666, "learning_rate": 0.00017957489372343087, "loss": 0.5613, "step": 2047 }, { "epoch": 0.15184992956180024, "grad_norm": 0.4251808226108551, "learning_rate": 0.0001795648912228057, "loss": 0.6423, "step": 2048 }, { "epoch": 0.1519240750352191, "grad_norm": 0.3966468274593353, "learning_rate": 0.00017955488872218055, "loss": 0.525, "step": 2049 }, { "epoch": 0.15199822050863795, "grad_norm": 0.3746250867843628, "learning_rate": 0.0001795448862215554, "loss": 0.5771, "step": 2050 }, { "epoch": 0.15207236598205678, "grad_norm": 0.3785117268562317, "learning_rate": 0.00017953488372093025, "loss": 0.5795, "step": 2051 }, { "epoch": 0.15214651145547564, "grad_norm": 0.4144158959388733, "learning_rate": 0.00017952488122030508, "loss": 0.5862, "step": 2052 }, { "epoch": 0.1522206569288945, "grad_norm": 0.4319784641265869, "learning_rate": 0.00017951487871967992, "loss": 0.5363, "step": 2053 }, { "epoch": 0.15229480240231333, "grad_norm": 0.3674542307853699, "learning_rate": 0.00017950487621905476, "loss": 0.512, "step": 2054 }, { "epoch": 0.15236894787573219, "grad_norm": 0.36805102229118347, "learning_rate": 0.00017949487371842962, "loss": 0.6029, "step": 2055 }, { "epoch": 0.15244309334915104, "grad_norm": 0.35786646604537964, "learning_rate": 0.00017948487121780446, "loss": 0.5527, "step": 2056 }, { "epoch": 0.15251723882256987, "grad_norm": 0.3948870301246643, "learning_rate": 0.00017947486871717932, "loss": 0.5692, "step": 2057 }, { "epoch": 0.15259138429598873, "grad_norm": 0.3516908288002014, "learning_rate": 0.00017946486621655413, "loss": 0.5001, "step": 2058 }, { "epoch": 0.1526655297694076, "grad_norm": 0.34126967191696167, "learning_rate": 0.000179454863715929, "loss": 0.507, "step": 2059 }, { "epoch": 0.15273967524282642, "grad_norm": 0.3821374475955963, "learning_rate": 0.00017944486121530384, "loss": 0.5369, "step": 2060 }, { "epoch": 0.15281382071624527, "grad_norm": 0.38966652750968933, "learning_rate": 0.0001794348587146787, "loss": 0.5659, "step": 2061 }, { "epoch": 0.15288796618966413, "grad_norm": 0.38861411809921265, "learning_rate": 0.0001794248562140535, "loss": 0.5563, "step": 2062 }, { "epoch": 0.15296211166308296, "grad_norm": 0.4932343065738678, "learning_rate": 0.00017941485371342837, "loss": 0.5332, "step": 2063 }, { "epoch": 0.15303625713650182, "grad_norm": 0.3743651807308197, "learning_rate": 0.0001794048512128032, "loss": 0.5673, "step": 2064 }, { "epoch": 0.15311040260992068, "grad_norm": 0.40849968791007996, "learning_rate": 0.00017939484871217805, "loss": 0.5709, "step": 2065 }, { "epoch": 0.1531845480833395, "grad_norm": 0.3368576467037201, "learning_rate": 0.00017938484621155289, "loss": 0.5183, "step": 2066 }, { "epoch": 0.15325869355675836, "grad_norm": 0.43366676568984985, "learning_rate": 0.00017937484371092775, "loss": 0.5635, "step": 2067 }, { "epoch": 0.15333283903017722, "grad_norm": 0.36677709221839905, "learning_rate": 0.0001793648412103026, "loss": 0.5364, "step": 2068 }, { "epoch": 0.15340698450359605, "grad_norm": 0.38458600640296936, "learning_rate": 0.00017935483870967742, "loss": 0.5839, "step": 2069 }, { "epoch": 0.1534811299770149, "grad_norm": 0.3801257312297821, "learning_rate": 0.00017934483620905226, "loss": 0.5339, "step": 2070 }, { "epoch": 0.15355527545043376, "grad_norm": 0.387206494808197, "learning_rate": 0.00017933483370842713, "loss": 0.5722, "step": 2071 }, { "epoch": 0.1536294209238526, "grad_norm": 0.4436449706554413, "learning_rate": 0.00017932483120780196, "loss": 0.557, "step": 2072 }, { "epoch": 0.15370356639727145, "grad_norm": 0.4403812289237976, "learning_rate": 0.0001793148287071768, "loss": 0.6244, "step": 2073 }, { "epoch": 0.15377771187069028, "grad_norm": 0.3526081144809723, "learning_rate": 0.00017930482620655164, "loss": 0.5291, "step": 2074 }, { "epoch": 0.15385185734410914, "grad_norm": 0.3741590976715088, "learning_rate": 0.0001792948237059265, "loss": 0.6034, "step": 2075 }, { "epoch": 0.153926002817528, "grad_norm": 0.3926807641983032, "learning_rate": 0.0001792848212053013, "loss": 0.5839, "step": 2076 }, { "epoch": 0.15400014829094683, "grad_norm": 0.38220059871673584, "learning_rate": 0.00017927481870467617, "loss": 0.5847, "step": 2077 }, { "epoch": 0.15407429376436568, "grad_norm": 0.3744441866874695, "learning_rate": 0.000179264816204051, "loss": 0.6048, "step": 2078 }, { "epoch": 0.15414843923778454, "grad_norm": 0.34949982166290283, "learning_rate": 0.00017925481370342588, "loss": 0.5704, "step": 2079 }, { "epoch": 0.15422258471120337, "grad_norm": 0.3904534876346588, "learning_rate": 0.0001792448112028007, "loss": 0.6014, "step": 2080 }, { "epoch": 0.15429673018462223, "grad_norm": 0.3896668255329132, "learning_rate": 0.00017923480870217555, "loss": 0.5218, "step": 2081 }, { "epoch": 0.15437087565804108, "grad_norm": 0.3854965269565582, "learning_rate": 0.0001792248062015504, "loss": 0.5744, "step": 2082 }, { "epoch": 0.15444502113145991, "grad_norm": 0.36849522590637207, "learning_rate": 0.00017921480370092525, "loss": 0.514, "step": 2083 }, { "epoch": 0.15451916660487877, "grad_norm": 0.34495124220848083, "learning_rate": 0.0001792048012003001, "loss": 0.5512, "step": 2084 }, { "epoch": 0.15459331207829763, "grad_norm": 0.37981414794921875, "learning_rate": 0.00017919479869967493, "loss": 0.5726, "step": 2085 }, { "epoch": 0.15466745755171646, "grad_norm": 0.4247170686721802, "learning_rate": 0.00017918479619904976, "loss": 0.5462, "step": 2086 }, { "epoch": 0.15474160302513532, "grad_norm": 0.42158257961273193, "learning_rate": 0.00017917479369842463, "loss": 0.5419, "step": 2087 }, { "epoch": 0.15481574849855417, "grad_norm": 0.36447834968566895, "learning_rate": 0.00017916479119779946, "loss": 0.5504, "step": 2088 }, { "epoch": 0.154889893971973, "grad_norm": 0.38666078448295593, "learning_rate": 0.0001791547886971743, "loss": 0.5549, "step": 2089 }, { "epoch": 0.15496403944539186, "grad_norm": 0.4113125503063202, "learning_rate": 0.00017914478619654917, "loss": 0.6039, "step": 2090 }, { "epoch": 0.15503818491881072, "grad_norm": 0.389440655708313, "learning_rate": 0.00017913478369592398, "loss": 0.5764, "step": 2091 }, { "epoch": 0.15511233039222955, "grad_norm": 0.35512155294418335, "learning_rate": 0.00017912478119529884, "loss": 0.5215, "step": 2092 }, { "epoch": 0.1551864758656484, "grad_norm": 0.36569535732269287, "learning_rate": 0.00017911477869467368, "loss": 0.5563, "step": 2093 }, { "epoch": 0.15526062133906726, "grad_norm": 0.3672513961791992, "learning_rate": 0.00017910477619404854, "loss": 0.5579, "step": 2094 }, { "epoch": 0.1553347668124861, "grad_norm": 0.3853113353252411, "learning_rate": 0.00017909477369342335, "loss": 0.5517, "step": 2095 }, { "epoch": 0.15540891228590495, "grad_norm": 0.3761195242404938, "learning_rate": 0.00017908477119279821, "loss": 0.5846, "step": 2096 }, { "epoch": 0.1554830577593238, "grad_norm": 0.36029303073883057, "learning_rate": 0.00017907476869217305, "loss": 0.5538, "step": 2097 }, { "epoch": 0.15555720323274264, "grad_norm": 0.3597443103790283, "learning_rate": 0.00017906476619154792, "loss": 0.5624, "step": 2098 }, { "epoch": 0.1556313487061615, "grad_norm": 0.35374513268470764, "learning_rate": 0.00017905476369092273, "loss": 0.5401, "step": 2099 }, { "epoch": 0.15570549417958035, "grad_norm": 0.36768838763237, "learning_rate": 0.0001790447611902976, "loss": 0.5486, "step": 2100 }, { "epoch": 0.15577963965299918, "grad_norm": 0.37549859285354614, "learning_rate": 0.00017903475868967243, "loss": 0.5619, "step": 2101 }, { "epoch": 0.15585378512641804, "grad_norm": 0.38107168674468994, "learning_rate": 0.00017902475618904726, "loss": 0.5447, "step": 2102 }, { "epoch": 0.15592793059983687, "grad_norm": 0.38343945145606995, "learning_rate": 0.0001790147536884221, "loss": 0.5601, "step": 2103 }, { "epoch": 0.15600207607325572, "grad_norm": 0.3750793933868408, "learning_rate": 0.00017900475118779697, "loss": 0.5351, "step": 2104 }, { "epoch": 0.15607622154667458, "grad_norm": 0.36154600977897644, "learning_rate": 0.0001789947486871718, "loss": 0.5216, "step": 2105 }, { "epoch": 0.1561503670200934, "grad_norm": 0.38447028398513794, "learning_rate": 0.00017898474618654664, "loss": 0.5365, "step": 2106 }, { "epoch": 0.15622451249351227, "grad_norm": 0.36704617738723755, "learning_rate": 0.00017897474368592148, "loss": 0.5628, "step": 2107 }, { "epoch": 0.15629865796693113, "grad_norm": 0.4111734628677368, "learning_rate": 0.00017896474118529634, "loss": 0.5758, "step": 2108 }, { "epoch": 0.15637280344034996, "grad_norm": 0.3974115550518036, "learning_rate": 0.00017895473868467118, "loss": 0.5874, "step": 2109 }, { "epoch": 0.1564469489137688, "grad_norm": 0.3845028877258301, "learning_rate": 0.00017894473618404602, "loss": 0.5487, "step": 2110 }, { "epoch": 0.15652109438718767, "grad_norm": 0.3825159966945648, "learning_rate": 0.00017893473368342085, "loss": 0.5235, "step": 2111 }, { "epoch": 0.1565952398606065, "grad_norm": 0.3758125603199005, "learning_rate": 0.00017892473118279572, "loss": 0.5838, "step": 2112 }, { "epoch": 0.15666938533402536, "grad_norm": 0.3607305884361267, "learning_rate": 0.00017891472868217055, "loss": 0.5218, "step": 2113 }, { "epoch": 0.15674353080744421, "grad_norm": 0.3853399157524109, "learning_rate": 0.0001789047261815454, "loss": 0.5511, "step": 2114 }, { "epoch": 0.15681767628086304, "grad_norm": 0.3495887815952301, "learning_rate": 0.00017889472368092023, "loss": 0.5421, "step": 2115 }, { "epoch": 0.1568918217542819, "grad_norm": 0.39484426379203796, "learning_rate": 0.0001788847211802951, "loss": 0.5403, "step": 2116 }, { "epoch": 0.15696596722770076, "grad_norm": 0.3599729537963867, "learning_rate": 0.00017887471867966993, "loss": 0.512, "step": 2117 }, { "epoch": 0.1570401127011196, "grad_norm": 0.3813100755214691, "learning_rate": 0.00017886471617904477, "loss": 0.524, "step": 2118 }, { "epoch": 0.15711425817453845, "grad_norm": 0.34726518392562866, "learning_rate": 0.0001788547136784196, "loss": 0.5306, "step": 2119 }, { "epoch": 0.1571884036479573, "grad_norm": 0.38824865221977234, "learning_rate": 0.00017884471117779447, "loss": 0.5898, "step": 2120 }, { "epoch": 0.15726254912137613, "grad_norm": 0.3512933552265167, "learning_rate": 0.0001788347086771693, "loss": 0.5376, "step": 2121 }, { "epoch": 0.157336694594795, "grad_norm": 0.4060463011264801, "learning_rate": 0.00017882470617654414, "loss": 0.5748, "step": 2122 }, { "epoch": 0.15741084006821385, "grad_norm": 0.35486605763435364, "learning_rate": 0.000178814703675919, "loss": 0.5074, "step": 2123 }, { "epoch": 0.15748498554163268, "grad_norm": 0.38583630323410034, "learning_rate": 0.00017880470117529384, "loss": 0.5765, "step": 2124 }, { "epoch": 0.15755913101505153, "grad_norm": 0.3972720503807068, "learning_rate": 0.00017879469867466868, "loss": 0.6097, "step": 2125 }, { "epoch": 0.1576332764884704, "grad_norm": 0.3674602210521698, "learning_rate": 0.00017878469617404352, "loss": 0.5433, "step": 2126 }, { "epoch": 0.15770742196188922, "grad_norm": 0.38850754499435425, "learning_rate": 0.00017877469367341838, "loss": 0.5852, "step": 2127 }, { "epoch": 0.15778156743530808, "grad_norm": 0.36809659004211426, "learning_rate": 0.0001787646911727932, "loss": 0.555, "step": 2128 }, { "epoch": 0.1578557129087269, "grad_norm": 0.388625830411911, "learning_rate": 0.00017875468867216806, "loss": 0.5879, "step": 2129 }, { "epoch": 0.15792985838214577, "grad_norm": 0.34897103905677795, "learning_rate": 0.0001787446861715429, "loss": 0.5292, "step": 2130 }, { "epoch": 0.15800400385556462, "grad_norm": 0.407378613948822, "learning_rate": 0.00017873468367091776, "loss": 0.5821, "step": 2131 }, { "epoch": 0.15807814932898345, "grad_norm": 0.36599817872047424, "learning_rate": 0.00017872468117029257, "loss": 0.5238, "step": 2132 }, { "epoch": 0.1581522948024023, "grad_norm": 0.3636179268360138, "learning_rate": 0.00017871467866966743, "loss": 0.569, "step": 2133 }, { "epoch": 0.15822644027582117, "grad_norm": 0.38094833493232727, "learning_rate": 0.00017870467616904227, "loss": 0.5739, "step": 2134 }, { "epoch": 0.15830058574924, "grad_norm": 0.3640671372413635, "learning_rate": 0.00017869467366841713, "loss": 0.5124, "step": 2135 }, { "epoch": 0.15837473122265885, "grad_norm": 0.3778103291988373, "learning_rate": 0.00017868467116779194, "loss": 0.5291, "step": 2136 }, { "epoch": 0.1584488766960777, "grad_norm": 0.3725220859050751, "learning_rate": 0.0001786746686671668, "loss": 0.5286, "step": 2137 }, { "epoch": 0.15852302216949654, "grad_norm": 0.3794831931591034, "learning_rate": 0.00017866466616654164, "loss": 0.5658, "step": 2138 }, { "epoch": 0.1585971676429154, "grad_norm": 0.3661184310913086, "learning_rate": 0.00017865466366591648, "loss": 0.5419, "step": 2139 }, { "epoch": 0.15867131311633426, "grad_norm": 0.3771517872810364, "learning_rate": 0.00017864466116529132, "loss": 0.5765, "step": 2140 }, { "epoch": 0.15874545858975309, "grad_norm": 0.3743377923965454, "learning_rate": 0.00017863465866466618, "loss": 0.5331, "step": 2141 }, { "epoch": 0.15881960406317194, "grad_norm": 0.39283668994903564, "learning_rate": 0.00017862465616404102, "loss": 0.5365, "step": 2142 }, { "epoch": 0.1588937495365908, "grad_norm": 0.35106441378593445, "learning_rate": 0.00017861465366341586, "loss": 0.5242, "step": 2143 }, { "epoch": 0.15896789501000963, "grad_norm": 0.37218600511550903, "learning_rate": 0.0001786046511627907, "loss": 0.5132, "step": 2144 }, { "epoch": 0.1590420404834285, "grad_norm": 0.4343532919883728, "learning_rate": 0.00017859464866216556, "loss": 0.5542, "step": 2145 }, { "epoch": 0.15911618595684734, "grad_norm": 0.3621729016304016, "learning_rate": 0.0001785846461615404, "loss": 0.5527, "step": 2146 }, { "epoch": 0.15919033143026617, "grad_norm": 0.37428995966911316, "learning_rate": 0.00017857464366091523, "loss": 0.5548, "step": 2147 }, { "epoch": 0.15926447690368503, "grad_norm": 0.361581027507782, "learning_rate": 0.00017856464116029007, "loss": 0.5356, "step": 2148 }, { "epoch": 0.1593386223771039, "grad_norm": 0.41272857785224915, "learning_rate": 0.00017855463865966493, "loss": 0.6075, "step": 2149 }, { "epoch": 0.15941276785052272, "grad_norm": 0.42636170983314514, "learning_rate": 0.00017854463615903977, "loss": 0.5072, "step": 2150 }, { "epoch": 0.15948691332394158, "grad_norm": 0.36909839510917664, "learning_rate": 0.0001785346336584146, "loss": 0.5333, "step": 2151 }, { "epoch": 0.15956105879736043, "grad_norm": 0.34912821650505066, "learning_rate": 0.00017852463115778944, "loss": 0.5408, "step": 2152 }, { "epoch": 0.15963520427077926, "grad_norm": 0.39193612337112427, "learning_rate": 0.0001785146286571643, "loss": 0.5773, "step": 2153 }, { "epoch": 0.15970934974419812, "grad_norm": 0.3596639633178711, "learning_rate": 0.00017850462615653915, "loss": 0.5376, "step": 2154 }, { "epoch": 0.15978349521761698, "grad_norm": 0.4068359136581421, "learning_rate": 0.00017849462365591398, "loss": 0.5748, "step": 2155 }, { "epoch": 0.1598576406910358, "grad_norm": 0.38559311628341675, "learning_rate": 0.00017848462115528885, "loss": 0.5352, "step": 2156 }, { "epoch": 0.15993178616445466, "grad_norm": 0.37604203820228577, "learning_rate": 0.00017847461865466368, "loss": 0.5363, "step": 2157 }, { "epoch": 0.1600059316378735, "grad_norm": 0.3769836723804474, "learning_rate": 0.00017846461615403852, "loss": 0.5462, "step": 2158 }, { "epoch": 0.16008007711129235, "grad_norm": 0.4150184988975525, "learning_rate": 0.00017845461365341336, "loss": 0.5844, "step": 2159 }, { "epoch": 0.1601542225847112, "grad_norm": 0.3804035186767578, "learning_rate": 0.00017844461115278822, "loss": 0.5136, "step": 2160 }, { "epoch": 0.16022836805813004, "grad_norm": 0.3769925832748413, "learning_rate": 0.00017843460865216306, "loss": 0.5267, "step": 2161 }, { "epoch": 0.1603025135315489, "grad_norm": 0.3811175525188446, "learning_rate": 0.0001784246061515379, "loss": 0.5461, "step": 2162 }, { "epoch": 0.16037665900496775, "grad_norm": 0.36450818181037903, "learning_rate": 0.00017841460365091273, "loss": 0.5486, "step": 2163 }, { "epoch": 0.16045080447838658, "grad_norm": 0.36897921562194824, "learning_rate": 0.0001784046011502876, "loss": 0.5436, "step": 2164 }, { "epoch": 0.16052494995180544, "grad_norm": 0.38133159279823303, "learning_rate": 0.0001783945986496624, "loss": 0.5549, "step": 2165 }, { "epoch": 0.1605990954252243, "grad_norm": 0.3910656273365021, "learning_rate": 0.00017838459614903727, "loss": 0.5117, "step": 2166 }, { "epoch": 0.16067324089864313, "grad_norm": 0.3884727954864502, "learning_rate": 0.0001783745936484121, "loss": 0.5701, "step": 2167 }, { "epoch": 0.16074738637206198, "grad_norm": 0.37470564246177673, "learning_rate": 0.00017836459114778697, "loss": 0.5631, "step": 2168 }, { "epoch": 0.16082153184548084, "grad_norm": 0.3664219081401825, "learning_rate": 0.00017835458864716178, "loss": 0.497, "step": 2169 }, { "epoch": 0.16089567731889967, "grad_norm": 0.38856378197669983, "learning_rate": 0.00017834458614653665, "loss": 0.5857, "step": 2170 }, { "epoch": 0.16096982279231853, "grad_norm": 0.37893787026405334, "learning_rate": 0.00017833458364591148, "loss": 0.5844, "step": 2171 }, { "epoch": 0.16104396826573739, "grad_norm": 0.39005038142204285, "learning_rate": 0.00017832458114528635, "loss": 0.582, "step": 2172 }, { "epoch": 0.16111811373915622, "grad_norm": 0.3681955933570862, "learning_rate": 0.00017831457864466116, "loss": 0.5394, "step": 2173 }, { "epoch": 0.16119225921257507, "grad_norm": 0.38094574213027954, "learning_rate": 0.00017830457614403602, "loss": 0.5701, "step": 2174 }, { "epoch": 0.16126640468599393, "grad_norm": 0.3915345370769501, "learning_rate": 0.00017829457364341086, "loss": 0.5285, "step": 2175 }, { "epoch": 0.16134055015941276, "grad_norm": 0.3590497672557831, "learning_rate": 0.0001782845711427857, "loss": 0.5229, "step": 2176 }, { "epoch": 0.16141469563283162, "grad_norm": 0.3705751895904541, "learning_rate": 0.00017827456864216053, "loss": 0.5379, "step": 2177 }, { "epoch": 0.16148884110625047, "grad_norm": 0.3846525549888611, "learning_rate": 0.0001782645661415354, "loss": 0.5764, "step": 2178 }, { "epoch": 0.1615629865796693, "grad_norm": 0.3705470561981201, "learning_rate": 0.00017825456364091024, "loss": 0.5129, "step": 2179 }, { "epoch": 0.16163713205308816, "grad_norm": 0.3504432141780853, "learning_rate": 0.00017824456114028507, "loss": 0.5206, "step": 2180 }, { "epoch": 0.16171127752650702, "grad_norm": 0.3510917127132416, "learning_rate": 0.0001782345586396599, "loss": 0.5336, "step": 2181 }, { "epoch": 0.16178542299992585, "grad_norm": 0.3589082658290863, "learning_rate": 0.00017822455613903477, "loss": 0.5588, "step": 2182 }, { "epoch": 0.1618595684733447, "grad_norm": 0.3972313106060028, "learning_rate": 0.0001782145536384096, "loss": 0.5835, "step": 2183 }, { "epoch": 0.16193371394676356, "grad_norm": 0.3760554790496826, "learning_rate": 0.00017820455113778445, "loss": 0.5302, "step": 2184 }, { "epoch": 0.1620078594201824, "grad_norm": 0.37440985441207886, "learning_rate": 0.00017819454863715929, "loss": 0.6095, "step": 2185 }, { "epoch": 0.16208200489360125, "grad_norm": 0.3700944185256958, "learning_rate": 0.00017818454613653415, "loss": 0.55, "step": 2186 }, { "epoch": 0.16215615036702008, "grad_norm": 0.3618887960910797, "learning_rate": 0.00017817454363590899, "loss": 0.5756, "step": 2187 }, { "epoch": 0.16223029584043894, "grad_norm": 0.37289372086524963, "learning_rate": 0.00017816454113528382, "loss": 0.5466, "step": 2188 }, { "epoch": 0.1623044413138578, "grad_norm": 0.35442298650741577, "learning_rate": 0.0001781545386346587, "loss": 0.5432, "step": 2189 }, { "epoch": 0.16237858678727662, "grad_norm": 0.36479857563972473, "learning_rate": 0.00017814453613403352, "loss": 0.5644, "step": 2190 }, { "epoch": 0.16245273226069548, "grad_norm": 0.3767174780368805, "learning_rate": 0.00017813453363340836, "loss": 0.5407, "step": 2191 }, { "epoch": 0.16252687773411434, "grad_norm": 0.3550739586353302, "learning_rate": 0.0001781245311327832, "loss": 0.5552, "step": 2192 }, { "epoch": 0.16260102320753317, "grad_norm": 0.37220698595046997, "learning_rate": 0.00017811452863215806, "loss": 0.5705, "step": 2193 }, { "epoch": 0.16267516868095203, "grad_norm": 0.35757943987846375, "learning_rate": 0.0001781045261315329, "loss": 0.5238, "step": 2194 }, { "epoch": 0.16274931415437088, "grad_norm": 0.3738338053226471, "learning_rate": 0.00017809452363090774, "loss": 0.5679, "step": 2195 }, { "epoch": 0.1628234596277897, "grad_norm": 0.35153666138648987, "learning_rate": 0.00017808452113028257, "loss": 0.5415, "step": 2196 }, { "epoch": 0.16289760510120857, "grad_norm": 0.36506906151771545, "learning_rate": 0.00017807451862965744, "loss": 0.5375, "step": 2197 }, { "epoch": 0.16297175057462743, "grad_norm": 0.35860446095466614, "learning_rate": 0.00017806451612903228, "loss": 0.5291, "step": 2198 }, { "epoch": 0.16304589604804626, "grad_norm": 0.3564091622829437, "learning_rate": 0.0001780545136284071, "loss": 0.5539, "step": 2199 }, { "epoch": 0.16312004152146511, "grad_norm": 0.3511275053024292, "learning_rate": 0.00017804451112778195, "loss": 0.5143, "step": 2200 }, { "epoch": 0.16319418699488397, "grad_norm": 0.37694892287254333, "learning_rate": 0.00017803450862715681, "loss": 0.5705, "step": 2201 }, { "epoch": 0.1632683324683028, "grad_norm": 0.3948685824871063, "learning_rate": 0.00017802450612653162, "loss": 0.5657, "step": 2202 }, { "epoch": 0.16334247794172166, "grad_norm": 0.3729794919490814, "learning_rate": 0.0001780145036259065, "loss": 0.566, "step": 2203 }, { "epoch": 0.16341662341514052, "grad_norm": 0.35052821040153503, "learning_rate": 0.00017800450112528133, "loss": 0.5306, "step": 2204 }, { "epoch": 0.16349076888855935, "grad_norm": 0.37212643027305603, "learning_rate": 0.0001779944986246562, "loss": 0.5728, "step": 2205 }, { "epoch": 0.1635649143619782, "grad_norm": 0.35891255736351013, "learning_rate": 0.000177984496124031, "loss": 0.5292, "step": 2206 }, { "epoch": 0.16363905983539706, "grad_norm": 0.3505384922027588, "learning_rate": 0.00017797449362340586, "loss": 0.5488, "step": 2207 }, { "epoch": 0.1637132053088159, "grad_norm": 0.3706895411014557, "learning_rate": 0.0001779644911227807, "loss": 0.5516, "step": 2208 }, { "epoch": 0.16378735078223475, "grad_norm": 0.3483189344406128, "learning_rate": 0.00017795448862215556, "loss": 0.5178, "step": 2209 }, { "epoch": 0.1638614962556536, "grad_norm": 0.38374844193458557, "learning_rate": 0.00017794448612153037, "loss": 0.5543, "step": 2210 }, { "epoch": 0.16393564172907243, "grad_norm": 0.3631893992424011, "learning_rate": 0.00017793448362090524, "loss": 0.5311, "step": 2211 }, { "epoch": 0.1640097872024913, "grad_norm": 0.4161281883716583, "learning_rate": 0.00017792448112028008, "loss": 0.6351, "step": 2212 }, { "epoch": 0.16408393267591015, "grad_norm": 0.36912834644317627, "learning_rate": 0.0001779144786196549, "loss": 0.5512, "step": 2213 }, { "epoch": 0.16415807814932898, "grad_norm": 0.3706563115119934, "learning_rate": 0.00017790447611902975, "loss": 0.5093, "step": 2214 }, { "epoch": 0.16423222362274784, "grad_norm": 0.3773249387741089, "learning_rate": 0.00017789447361840461, "loss": 0.5812, "step": 2215 }, { "epoch": 0.16430636909616667, "grad_norm": 0.3509494364261627, "learning_rate": 0.00017788447111777945, "loss": 0.5413, "step": 2216 }, { "epoch": 0.16438051456958552, "grad_norm": 0.38854819536209106, "learning_rate": 0.0001778744686171543, "loss": 0.6005, "step": 2217 }, { "epoch": 0.16445466004300438, "grad_norm": 0.3735141456127167, "learning_rate": 0.00017786446611652913, "loss": 0.5947, "step": 2218 }, { "epoch": 0.1645288055164232, "grad_norm": 0.3646602928638458, "learning_rate": 0.000177854463615904, "loss": 0.549, "step": 2219 }, { "epoch": 0.16460295098984207, "grad_norm": 0.3753717541694641, "learning_rate": 0.00017784446111527883, "loss": 0.5334, "step": 2220 }, { "epoch": 0.16467709646326092, "grad_norm": 0.38373610377311707, "learning_rate": 0.00017783445861465366, "loss": 0.5905, "step": 2221 }, { "epoch": 0.16475124193667975, "grad_norm": 0.3622858226299286, "learning_rate": 0.0001778244561140285, "loss": 0.5455, "step": 2222 }, { "epoch": 0.1648253874100986, "grad_norm": 0.35507503151893616, "learning_rate": 0.00017781445361340337, "loss": 0.5536, "step": 2223 }, { "epoch": 0.16489953288351747, "grad_norm": 0.3651731610298157, "learning_rate": 0.0001778044511127782, "loss": 0.5617, "step": 2224 }, { "epoch": 0.1649736783569363, "grad_norm": 0.38080769777297974, "learning_rate": 0.00017779444861215304, "loss": 0.6089, "step": 2225 }, { "epoch": 0.16504782383035516, "grad_norm": 0.3418336510658264, "learning_rate": 0.0001777844461115279, "loss": 0.5252, "step": 2226 }, { "epoch": 0.165121969303774, "grad_norm": 0.38903185725212097, "learning_rate": 0.00017777444361090274, "loss": 0.6166, "step": 2227 }, { "epoch": 0.16519611477719284, "grad_norm": 0.3908338248729706, "learning_rate": 0.00017776444111027758, "loss": 0.5698, "step": 2228 }, { "epoch": 0.1652702602506117, "grad_norm": 0.36733460426330566, "learning_rate": 0.00017775443860965242, "loss": 0.5326, "step": 2229 }, { "epoch": 0.16534440572403056, "grad_norm": 0.37417927384376526, "learning_rate": 0.00017774443610902728, "loss": 0.5513, "step": 2230 }, { "epoch": 0.1654185511974494, "grad_norm": 0.3571787476539612, "learning_rate": 0.00017773443360840212, "loss": 0.5079, "step": 2231 }, { "epoch": 0.16549269667086824, "grad_norm": 0.3611381947994232, "learning_rate": 0.00017772443110777695, "loss": 0.5498, "step": 2232 }, { "epoch": 0.1655668421442871, "grad_norm": 0.36660462617874146, "learning_rate": 0.0001777144286071518, "loss": 0.5561, "step": 2233 }, { "epoch": 0.16564098761770593, "grad_norm": 0.4077196419239044, "learning_rate": 0.00017770442610652665, "loss": 0.647, "step": 2234 }, { "epoch": 0.1657151330911248, "grad_norm": 0.35907819867134094, "learning_rate": 0.0001776944236059015, "loss": 0.5115, "step": 2235 }, { "epoch": 0.16578927856454365, "grad_norm": 0.35171619057655334, "learning_rate": 0.00017768442110527633, "loss": 0.5326, "step": 2236 }, { "epoch": 0.16586342403796248, "grad_norm": 0.36202099919319153, "learning_rate": 0.00017767441860465117, "loss": 0.5327, "step": 2237 }, { "epoch": 0.16593756951138133, "grad_norm": 0.3486269414424896, "learning_rate": 0.00017766441610402603, "loss": 0.5172, "step": 2238 }, { "epoch": 0.1660117149848002, "grad_norm": 0.38371890783309937, "learning_rate": 0.00017765441360340084, "loss": 0.5721, "step": 2239 }, { "epoch": 0.16608586045821902, "grad_norm": 0.3500496745109558, "learning_rate": 0.0001776444111027757, "loss": 0.5258, "step": 2240 }, { "epoch": 0.16616000593163788, "grad_norm": 0.3717384338378906, "learning_rate": 0.00017763440860215054, "loss": 0.5925, "step": 2241 }, { "epoch": 0.16623415140505673, "grad_norm": 0.3576542139053345, "learning_rate": 0.0001776244061015254, "loss": 0.554, "step": 2242 }, { "epoch": 0.16630829687847556, "grad_norm": 0.34796130657196045, "learning_rate": 0.00017761440360090022, "loss": 0.5379, "step": 2243 }, { "epoch": 0.16638244235189442, "grad_norm": 0.3634518086910248, "learning_rate": 0.00017760440110027508, "loss": 0.5385, "step": 2244 }, { "epoch": 0.16645658782531325, "grad_norm": 0.36478427052497864, "learning_rate": 0.00017759439859964992, "loss": 0.5484, "step": 2245 }, { "epoch": 0.1665307332987321, "grad_norm": 0.3985823690891266, "learning_rate": 0.00017758439609902478, "loss": 0.5929, "step": 2246 }, { "epoch": 0.16660487877215097, "grad_norm": 0.37763091921806335, "learning_rate": 0.0001775743935983996, "loss": 0.5443, "step": 2247 }, { "epoch": 0.1666790242455698, "grad_norm": 0.375049352645874, "learning_rate": 0.00017756439109777446, "loss": 0.5727, "step": 2248 }, { "epoch": 0.16675316971898865, "grad_norm": 0.3663257360458374, "learning_rate": 0.0001775543885971493, "loss": 0.5405, "step": 2249 }, { "epoch": 0.1668273151924075, "grad_norm": 0.36240723729133606, "learning_rate": 0.00017754438609652413, "loss": 0.5337, "step": 2250 }, { "epoch": 0.16690146066582634, "grad_norm": 0.4025961458683014, "learning_rate": 0.00017753438359589897, "loss": 0.6355, "step": 2251 }, { "epoch": 0.1669756061392452, "grad_norm": 0.35488361120224, "learning_rate": 0.00017752438109527383, "loss": 0.536, "step": 2252 }, { "epoch": 0.16704975161266405, "grad_norm": 0.3804045617580414, "learning_rate": 0.00017751437859464867, "loss": 0.5792, "step": 2253 }, { "epoch": 0.16712389708608288, "grad_norm": 0.37200066447257996, "learning_rate": 0.0001775043760940235, "loss": 0.5237, "step": 2254 }, { "epoch": 0.16719804255950174, "grad_norm": 0.36474359035491943, "learning_rate": 0.00017749437359339834, "loss": 0.5614, "step": 2255 }, { "epoch": 0.1672721880329206, "grad_norm": 0.3653022050857544, "learning_rate": 0.0001774843710927732, "loss": 0.4954, "step": 2256 }, { "epoch": 0.16734633350633943, "grad_norm": 0.38803815841674805, "learning_rate": 0.00017747436859214804, "loss": 0.5605, "step": 2257 }, { "epoch": 0.16742047897975829, "grad_norm": 0.3565492033958435, "learning_rate": 0.00017746436609152288, "loss": 0.5598, "step": 2258 }, { "epoch": 0.16749462445317714, "grad_norm": 0.3660525679588318, "learning_rate": 0.00017745436359089774, "loss": 0.5468, "step": 2259 }, { "epoch": 0.16756876992659597, "grad_norm": 0.39616847038269043, "learning_rate": 0.00017744436109027258, "loss": 0.538, "step": 2260 }, { "epoch": 0.16764291540001483, "grad_norm": 0.4202890992164612, "learning_rate": 0.00017743435858964742, "loss": 0.5784, "step": 2261 }, { "epoch": 0.1677170608734337, "grad_norm": 0.3880101144313812, "learning_rate": 0.00017742435608902226, "loss": 0.5712, "step": 2262 }, { "epoch": 0.16779120634685252, "grad_norm": 0.3554372191429138, "learning_rate": 0.00017741435358839712, "loss": 0.5676, "step": 2263 }, { "epoch": 0.16786535182027137, "grad_norm": 0.39090317487716675, "learning_rate": 0.00017740435108777196, "loss": 0.5724, "step": 2264 }, { "epoch": 0.16793949729369023, "grad_norm": 0.36723214387893677, "learning_rate": 0.0001773943485871468, "loss": 0.5694, "step": 2265 }, { "epoch": 0.16801364276710906, "grad_norm": 0.36193203926086426, "learning_rate": 0.00017738434608652163, "loss": 0.5835, "step": 2266 }, { "epoch": 0.16808778824052792, "grad_norm": 0.3879966139793396, "learning_rate": 0.0001773743435858965, "loss": 0.5768, "step": 2267 }, { "epoch": 0.16816193371394678, "grad_norm": 0.3810979127883911, "learning_rate": 0.00017736434108527133, "loss": 0.5398, "step": 2268 }, { "epoch": 0.1682360791873656, "grad_norm": 0.39123040437698364, "learning_rate": 0.00017735433858464617, "loss": 0.5908, "step": 2269 }, { "epoch": 0.16831022466078446, "grad_norm": 0.3515491783618927, "learning_rate": 0.000177344336084021, "loss": 0.4908, "step": 2270 }, { "epoch": 0.16838437013420332, "grad_norm": 0.36755120754241943, "learning_rate": 0.00017733433358339587, "loss": 0.5457, "step": 2271 }, { "epoch": 0.16845851560762215, "grad_norm": 0.3515872061252594, "learning_rate": 0.0001773243310827707, "loss": 0.559, "step": 2272 }, { "epoch": 0.168532661081041, "grad_norm": 0.38232171535491943, "learning_rate": 0.00017731432858214555, "loss": 0.5379, "step": 2273 }, { "epoch": 0.16860680655445984, "grad_norm": 0.3529175817966461, "learning_rate": 0.00017730432608152038, "loss": 0.5231, "step": 2274 }, { "epoch": 0.1686809520278787, "grad_norm": 0.37197446823120117, "learning_rate": 0.00017729432358089525, "loss": 0.5639, "step": 2275 }, { "epoch": 0.16875509750129755, "grad_norm": 0.3532503843307495, "learning_rate": 0.00017728432108027006, "loss": 0.5137, "step": 2276 }, { "epoch": 0.16882924297471638, "grad_norm": 0.37742525339126587, "learning_rate": 0.00017727431857964492, "loss": 0.5951, "step": 2277 }, { "epoch": 0.16890338844813524, "grad_norm": 0.3690308630466461, "learning_rate": 0.00017726431607901976, "loss": 0.5379, "step": 2278 }, { "epoch": 0.1689775339215541, "grad_norm": 0.36836040019989014, "learning_rate": 0.00017725431357839462, "loss": 0.5381, "step": 2279 }, { "epoch": 0.16905167939497293, "grad_norm": 0.3838198184967041, "learning_rate": 0.00017724431107776943, "loss": 0.5684, "step": 2280 }, { "epoch": 0.16912582486839178, "grad_norm": 0.3704422414302826, "learning_rate": 0.0001772343085771443, "loss": 0.5141, "step": 2281 }, { "epoch": 0.16919997034181064, "grad_norm": 0.3587093651294708, "learning_rate": 0.00017722430607651913, "loss": 0.5098, "step": 2282 }, { "epoch": 0.16927411581522947, "grad_norm": 0.3717123568058014, "learning_rate": 0.000177214303575894, "loss": 0.5195, "step": 2283 }, { "epoch": 0.16934826128864833, "grad_norm": 0.3474084436893463, "learning_rate": 0.0001772043010752688, "loss": 0.489, "step": 2284 }, { "epoch": 0.16942240676206718, "grad_norm": 0.37498077750205994, "learning_rate": 0.00017719429857464367, "loss": 0.5563, "step": 2285 }, { "epoch": 0.16949655223548601, "grad_norm": 0.3821123242378235, "learning_rate": 0.0001771842960740185, "loss": 0.5788, "step": 2286 }, { "epoch": 0.16957069770890487, "grad_norm": 0.3743712902069092, "learning_rate": 0.00017717429357339335, "loss": 0.5622, "step": 2287 }, { "epoch": 0.16964484318232373, "grad_norm": 0.37143605947494507, "learning_rate": 0.00017716429107276818, "loss": 0.5487, "step": 2288 }, { "epoch": 0.16971898865574256, "grad_norm": 0.37787172198295593, "learning_rate": 0.00017715428857214305, "loss": 0.5713, "step": 2289 }, { "epoch": 0.16979313412916142, "grad_norm": 0.36489829421043396, "learning_rate": 0.00017714428607151788, "loss": 0.5589, "step": 2290 }, { "epoch": 0.16986727960258027, "grad_norm": 0.39099565148353577, "learning_rate": 0.00017713428357089272, "loss": 0.6129, "step": 2291 }, { "epoch": 0.1699414250759991, "grad_norm": 0.3697611093521118, "learning_rate": 0.00017712428107026759, "loss": 0.5217, "step": 2292 }, { "epoch": 0.17001557054941796, "grad_norm": 0.3446539044380188, "learning_rate": 0.00017711427856964242, "loss": 0.5269, "step": 2293 }, { "epoch": 0.17008971602283682, "grad_norm": 0.3547232151031494, "learning_rate": 0.00017710427606901726, "loss": 0.553, "step": 2294 }, { "epoch": 0.17016386149625565, "grad_norm": 0.38911154866218567, "learning_rate": 0.0001770942735683921, "loss": 0.5326, "step": 2295 }, { "epoch": 0.1702380069696745, "grad_norm": 0.35906705260276794, "learning_rate": 0.00017708427106776696, "loss": 0.577, "step": 2296 }, { "epoch": 0.17031215244309336, "grad_norm": 0.3969687521457672, "learning_rate": 0.0001770742685671418, "loss": 0.5814, "step": 2297 }, { "epoch": 0.1703862979165122, "grad_norm": 0.33170199394226074, "learning_rate": 0.00017706426606651664, "loss": 0.5257, "step": 2298 }, { "epoch": 0.17046044338993105, "grad_norm": 0.4679187834262848, "learning_rate": 0.00017705426356589147, "loss": 0.5155, "step": 2299 }, { "epoch": 0.1705345888633499, "grad_norm": 0.37174490094184875, "learning_rate": 0.00017704426106526634, "loss": 0.5267, "step": 2300 }, { "epoch": 0.17060873433676874, "grad_norm": 0.3612993061542511, "learning_rate": 0.00017703425856464117, "loss": 0.5306, "step": 2301 }, { "epoch": 0.1706828798101876, "grad_norm": 0.3852008879184723, "learning_rate": 0.000177024256064016, "loss": 0.5514, "step": 2302 }, { "epoch": 0.17075702528360642, "grad_norm": 0.3624255657196045, "learning_rate": 0.00017701425356339085, "loss": 0.5124, "step": 2303 }, { "epoch": 0.17083117075702528, "grad_norm": 0.36410650610923767, "learning_rate": 0.0001770042510627657, "loss": 0.538, "step": 2304 }, { "epoch": 0.17090531623044414, "grad_norm": 0.41192612051963806, "learning_rate": 0.00017699424856214055, "loss": 0.6221, "step": 2305 }, { "epoch": 0.17097946170386297, "grad_norm": 0.4231109321117401, "learning_rate": 0.00017698424606151539, "loss": 0.6061, "step": 2306 }, { "epoch": 0.17105360717728182, "grad_norm": 0.3801780343055725, "learning_rate": 0.00017697424356089022, "loss": 0.5139, "step": 2307 }, { "epoch": 0.17112775265070068, "grad_norm": 0.36073967814445496, "learning_rate": 0.0001769642410602651, "loss": 0.5579, "step": 2308 }, { "epoch": 0.1712018981241195, "grad_norm": 0.4018751084804535, "learning_rate": 0.00017695423855963992, "loss": 0.5771, "step": 2309 }, { "epoch": 0.17127604359753837, "grad_norm": 0.378621369600296, "learning_rate": 0.00017694423605901476, "loss": 0.5376, "step": 2310 }, { "epoch": 0.17135018907095723, "grad_norm": 0.3662964105606079, "learning_rate": 0.0001769342335583896, "loss": 0.5429, "step": 2311 }, { "epoch": 0.17142433454437606, "grad_norm": 0.36284202337265015, "learning_rate": 0.00017692423105776446, "loss": 0.5342, "step": 2312 }, { "epoch": 0.1714984800177949, "grad_norm": 0.4098476469516754, "learning_rate": 0.00017691422855713927, "loss": 0.6022, "step": 2313 }, { "epoch": 0.17157262549121377, "grad_norm": 0.3679230511188507, "learning_rate": 0.00017690422605651414, "loss": 0.5207, "step": 2314 }, { "epoch": 0.1716467709646326, "grad_norm": 0.3592246472835541, "learning_rate": 0.00017689422355588897, "loss": 0.5443, "step": 2315 }, { "epoch": 0.17172091643805146, "grad_norm": 0.38155031204223633, "learning_rate": 0.00017688422105526384, "loss": 0.5582, "step": 2316 }, { "epoch": 0.17179506191147031, "grad_norm": 0.3879794776439667, "learning_rate": 0.00017687421855463865, "loss": 0.5856, "step": 2317 }, { "epoch": 0.17186920738488914, "grad_norm": 0.3868959844112396, "learning_rate": 0.0001768642160540135, "loss": 0.5365, "step": 2318 }, { "epoch": 0.171943352858308, "grad_norm": 0.44299960136413574, "learning_rate": 0.00017685421355338835, "loss": 0.6495, "step": 2319 }, { "epoch": 0.17201749833172686, "grad_norm": 0.35636892914772034, "learning_rate": 0.00017684421105276321, "loss": 0.5452, "step": 2320 }, { "epoch": 0.1720916438051457, "grad_norm": 0.35778987407684326, "learning_rate": 0.00017683420855213802, "loss": 0.4934, "step": 2321 }, { "epoch": 0.17216578927856455, "grad_norm": 0.3676290810108185, "learning_rate": 0.0001768242060515129, "loss": 0.5818, "step": 2322 }, { "epoch": 0.1722399347519834, "grad_norm": 0.3665786683559418, "learning_rate": 0.00017681420355088773, "loss": 0.5088, "step": 2323 }, { "epoch": 0.17231408022540223, "grad_norm": 0.36391356587409973, "learning_rate": 0.00017680420105026256, "loss": 0.5334, "step": 2324 }, { "epoch": 0.1723882256988211, "grad_norm": 0.3625040650367737, "learning_rate": 0.00017679419854963743, "loss": 0.5, "step": 2325 }, { "epoch": 0.17246237117223995, "grad_norm": 0.36564552783966064, "learning_rate": 0.00017678419604901226, "loss": 0.5238, "step": 2326 }, { "epoch": 0.17253651664565878, "grad_norm": 0.37389299273490906, "learning_rate": 0.0001767741935483871, "loss": 0.5563, "step": 2327 }, { "epoch": 0.17261066211907763, "grad_norm": 0.4137362539768219, "learning_rate": 0.00017676419104776194, "loss": 0.5529, "step": 2328 }, { "epoch": 0.17268480759249646, "grad_norm": 0.3875930905342102, "learning_rate": 0.0001767541885471368, "loss": 0.5501, "step": 2329 }, { "epoch": 0.17275895306591532, "grad_norm": 0.3704962730407715, "learning_rate": 0.00017674418604651164, "loss": 0.5253, "step": 2330 }, { "epoch": 0.17283309853933418, "grad_norm": 0.36972835659980774, "learning_rate": 0.00017673418354588648, "loss": 0.5249, "step": 2331 }, { "epoch": 0.172907244012753, "grad_norm": 0.37617939710617065, "learning_rate": 0.0001767241810452613, "loss": 0.5255, "step": 2332 }, { "epoch": 0.17298138948617187, "grad_norm": 0.3754170536994934, "learning_rate": 0.00017671417854463618, "loss": 0.5141, "step": 2333 }, { "epoch": 0.17305553495959072, "grad_norm": 0.3793553411960602, "learning_rate": 0.00017670417604401101, "loss": 0.5749, "step": 2334 }, { "epoch": 0.17312968043300955, "grad_norm": 0.3622954487800598, "learning_rate": 0.00017669417354338588, "loss": 0.5244, "step": 2335 }, { "epoch": 0.1732038259064284, "grad_norm": 0.3744570314884186, "learning_rate": 0.0001766841710427607, "loss": 0.5666, "step": 2336 }, { "epoch": 0.17327797137984727, "grad_norm": 0.39116257429122925, "learning_rate": 0.00017667416854213555, "loss": 0.5405, "step": 2337 }, { "epoch": 0.1733521168532661, "grad_norm": 0.3759772777557373, "learning_rate": 0.0001766641660415104, "loss": 0.5259, "step": 2338 }, { "epoch": 0.17342626232668495, "grad_norm": 0.36597704887390137, "learning_rate": 0.00017665416354088523, "loss": 0.5271, "step": 2339 }, { "epoch": 0.1735004078001038, "grad_norm": 0.39601755142211914, "learning_rate": 0.00017664416104026006, "loss": 0.637, "step": 2340 }, { "epoch": 0.17357455327352264, "grad_norm": 0.36746275424957275, "learning_rate": 0.00017663415853963493, "loss": 0.5603, "step": 2341 }, { "epoch": 0.1736486987469415, "grad_norm": 0.3662208616733551, "learning_rate": 0.00017662415603900977, "loss": 0.5425, "step": 2342 }, { "epoch": 0.17372284422036036, "grad_norm": 0.3729868531227112, "learning_rate": 0.0001766141535383846, "loss": 0.5206, "step": 2343 }, { "epoch": 0.17379698969377919, "grad_norm": 0.33064937591552734, "learning_rate": 0.00017660415103775944, "loss": 0.5375, "step": 2344 }, { "epoch": 0.17387113516719804, "grad_norm": 0.38212403655052185, "learning_rate": 0.0001765941485371343, "loss": 0.5641, "step": 2345 }, { "epoch": 0.1739452806406169, "grad_norm": 0.3687257468700409, "learning_rate": 0.00017658414603650914, "loss": 0.5493, "step": 2346 }, { "epoch": 0.17401942611403573, "grad_norm": 0.38030779361724854, "learning_rate": 0.00017657414353588398, "loss": 0.5763, "step": 2347 }, { "epoch": 0.1740935715874546, "grad_norm": 0.3768347203731537, "learning_rate": 0.00017656414103525881, "loss": 0.5618, "step": 2348 }, { "epoch": 0.17416771706087344, "grad_norm": 0.3805773854255676, "learning_rate": 0.00017655413853463368, "loss": 0.5543, "step": 2349 }, { "epoch": 0.17424186253429227, "grad_norm": 0.38536810874938965, "learning_rate": 0.0001765441360340085, "loss": 0.5759, "step": 2350 }, { "epoch": 0.17431600800771113, "grad_norm": 0.35914361476898193, "learning_rate": 0.00017653413353338335, "loss": 0.5368, "step": 2351 }, { "epoch": 0.17439015348113, "grad_norm": 0.36374226212501526, "learning_rate": 0.0001765241310327582, "loss": 0.5601, "step": 2352 }, { "epoch": 0.17446429895454882, "grad_norm": 0.3936760127544403, "learning_rate": 0.00017651412853213305, "loss": 0.5512, "step": 2353 }, { "epoch": 0.17453844442796768, "grad_norm": 0.392249196767807, "learning_rate": 0.00017650412603150786, "loss": 0.5888, "step": 2354 }, { "epoch": 0.17461258990138653, "grad_norm": 0.4147699177265167, "learning_rate": 0.00017649412353088273, "loss": 0.6167, "step": 2355 }, { "epoch": 0.17468673537480536, "grad_norm": 0.36469247937202454, "learning_rate": 0.00017648412103025757, "loss": 0.554, "step": 2356 }, { "epoch": 0.17476088084822422, "grad_norm": 0.3717665374279022, "learning_rate": 0.00017647411852963243, "loss": 0.5458, "step": 2357 }, { "epoch": 0.17483502632164305, "grad_norm": 0.34605810046195984, "learning_rate": 0.00017646411602900727, "loss": 0.5277, "step": 2358 }, { "epoch": 0.1749091717950619, "grad_norm": 0.3809416592121124, "learning_rate": 0.0001764541135283821, "loss": 0.5557, "step": 2359 }, { "epoch": 0.17498331726848076, "grad_norm": 0.3670560419559479, "learning_rate": 0.00017644411102775694, "loss": 0.5281, "step": 2360 }, { "epoch": 0.1750574627418996, "grad_norm": 0.3514253795146942, "learning_rate": 0.00017643410852713178, "loss": 0.5284, "step": 2361 }, { "epoch": 0.17513160821531845, "grad_norm": 0.35769087076187134, "learning_rate": 0.00017642410602650664, "loss": 0.5078, "step": 2362 }, { "epoch": 0.1752057536887373, "grad_norm": 0.368365615606308, "learning_rate": 0.00017641410352588148, "loss": 0.55, "step": 2363 }, { "epoch": 0.17527989916215614, "grad_norm": 0.3670368492603302, "learning_rate": 0.00017640410102525632, "loss": 0.5418, "step": 2364 }, { "epoch": 0.175354044635575, "grad_norm": 0.38695791363716125, "learning_rate": 0.00017639409852463115, "loss": 0.5887, "step": 2365 }, { "epoch": 0.17542819010899385, "grad_norm": 0.36521410942077637, "learning_rate": 0.00017638409602400602, "loss": 0.5987, "step": 2366 }, { "epoch": 0.17550233558241268, "grad_norm": 0.38885498046875, "learning_rate": 0.00017637409352338086, "loss": 0.5605, "step": 2367 }, { "epoch": 0.17557648105583154, "grad_norm": 0.3787408769130707, "learning_rate": 0.00017636409102275572, "loss": 0.5815, "step": 2368 }, { "epoch": 0.1756506265292504, "grad_norm": 0.3793809115886688, "learning_rate": 0.00017635408852213053, "loss": 0.5488, "step": 2369 }, { "epoch": 0.17572477200266923, "grad_norm": 0.3665654957294464, "learning_rate": 0.0001763440860215054, "loss": 0.5358, "step": 2370 }, { "epoch": 0.17579891747608808, "grad_norm": 0.3878568708896637, "learning_rate": 0.00017633408352088023, "loss": 0.5566, "step": 2371 }, { "epoch": 0.17587306294950694, "grad_norm": 0.3726690113544464, "learning_rate": 0.0001763240810202551, "loss": 0.5856, "step": 2372 }, { "epoch": 0.17594720842292577, "grad_norm": 0.37569329142570496, "learning_rate": 0.0001763140785196299, "loss": 0.5115, "step": 2373 }, { "epoch": 0.17602135389634463, "grad_norm": 0.3922618627548218, "learning_rate": 0.00017630407601900477, "loss": 0.547, "step": 2374 }, { "epoch": 0.1760954993697635, "grad_norm": 0.362604558467865, "learning_rate": 0.0001762940735183796, "loss": 0.5334, "step": 2375 }, { "epoch": 0.17616964484318232, "grad_norm": 0.34633371233940125, "learning_rate": 0.00017628407101775444, "loss": 0.5299, "step": 2376 }, { "epoch": 0.17624379031660117, "grad_norm": 0.4081854522228241, "learning_rate": 0.00017627406851712928, "loss": 0.5563, "step": 2377 }, { "epoch": 0.17631793579002003, "grad_norm": 0.351566344499588, "learning_rate": 0.00017626406601650414, "loss": 0.5256, "step": 2378 }, { "epoch": 0.17639208126343886, "grad_norm": 0.3494775891304016, "learning_rate": 0.00017625406351587898, "loss": 0.5246, "step": 2379 }, { "epoch": 0.17646622673685772, "grad_norm": 0.40291574597358704, "learning_rate": 0.00017624406101525382, "loss": 0.5431, "step": 2380 }, { "epoch": 0.17654037221027657, "grad_norm": 0.39949649572372437, "learning_rate": 0.00017623405851462866, "loss": 0.5513, "step": 2381 }, { "epoch": 0.1766145176836954, "grad_norm": 0.3996434509754181, "learning_rate": 0.00017622405601400352, "loss": 0.5469, "step": 2382 }, { "epoch": 0.17668866315711426, "grad_norm": 0.38866496086120605, "learning_rate": 0.00017621405351337836, "loss": 0.5471, "step": 2383 }, { "epoch": 0.17676280863053312, "grad_norm": 0.4038231670856476, "learning_rate": 0.0001762040510127532, "loss": 0.5616, "step": 2384 }, { "epoch": 0.17683695410395195, "grad_norm": 0.4052535891532898, "learning_rate": 0.00017619404851212803, "loss": 0.5451, "step": 2385 }, { "epoch": 0.1769110995773708, "grad_norm": 0.3960769474506378, "learning_rate": 0.0001761840460115029, "loss": 0.5748, "step": 2386 }, { "epoch": 0.17698524505078964, "grad_norm": 0.3825004994869232, "learning_rate": 0.0001761740435108777, "loss": 0.5462, "step": 2387 }, { "epoch": 0.1770593905242085, "grad_norm": 0.4379635453224182, "learning_rate": 0.00017616404101025257, "loss": 0.5418, "step": 2388 }, { "epoch": 0.17713353599762735, "grad_norm": 0.37676021456718445, "learning_rate": 0.0001761540385096274, "loss": 0.532, "step": 2389 }, { "epoch": 0.17720768147104618, "grad_norm": 0.4074331223964691, "learning_rate": 0.00017614403600900227, "loss": 0.5736, "step": 2390 }, { "epoch": 0.17728182694446504, "grad_norm": 0.37093228101730347, "learning_rate": 0.00017613403350837708, "loss": 0.5518, "step": 2391 }, { "epoch": 0.1773559724178839, "grad_norm": 0.3639700412750244, "learning_rate": 0.00017612403100775195, "loss": 0.5272, "step": 2392 }, { "epoch": 0.17743011789130272, "grad_norm": 0.3633266091346741, "learning_rate": 0.00017611402850712678, "loss": 0.5568, "step": 2393 }, { "epoch": 0.17750426336472158, "grad_norm": 0.41478005051612854, "learning_rate": 0.00017610402600650165, "loss": 0.498, "step": 2394 }, { "epoch": 0.17757840883814044, "grad_norm": 0.38689514994621277, "learning_rate": 0.00017609402350587648, "loss": 0.5299, "step": 2395 }, { "epoch": 0.17765255431155927, "grad_norm": 0.3843262493610382, "learning_rate": 0.00017608402100525132, "loss": 0.5337, "step": 2396 }, { "epoch": 0.17772669978497813, "grad_norm": 0.3919505476951599, "learning_rate": 0.00017607401850462616, "loss": 0.5685, "step": 2397 }, { "epoch": 0.17780084525839698, "grad_norm": 0.3886670172214508, "learning_rate": 0.000176064016004001, "loss": 0.5522, "step": 2398 }, { "epoch": 0.1778749907318158, "grad_norm": 0.41829580068588257, "learning_rate": 0.00017605401350337586, "loss": 0.5312, "step": 2399 }, { "epoch": 0.17794913620523467, "grad_norm": 0.351917564868927, "learning_rate": 0.0001760440110027507, "loss": 0.5331, "step": 2400 }, { "epoch": 0.17802328167865353, "grad_norm": 0.3854627013206482, "learning_rate": 0.00017603400850212556, "loss": 0.5528, "step": 2401 }, { "epoch": 0.17809742715207236, "grad_norm": 0.37646278738975525, "learning_rate": 0.00017602400600150037, "loss": 0.5828, "step": 2402 }, { "epoch": 0.17817157262549121, "grad_norm": 0.3911263942718506, "learning_rate": 0.00017601400350087523, "loss": 0.5991, "step": 2403 }, { "epoch": 0.17824571809891007, "grad_norm": 0.3829743564128876, "learning_rate": 0.00017600400100025007, "loss": 0.5707, "step": 2404 }, { "epoch": 0.1783198635723289, "grad_norm": 0.3757765293121338, "learning_rate": 0.00017599399849962494, "loss": 0.5442, "step": 2405 }, { "epoch": 0.17839400904574776, "grad_norm": 0.3917434811592102, "learning_rate": 0.00017598399599899975, "loss": 0.5688, "step": 2406 }, { "epoch": 0.17846815451916662, "grad_norm": 0.35604503750801086, "learning_rate": 0.0001759739934983746, "loss": 0.5108, "step": 2407 }, { "epoch": 0.17854229999258545, "grad_norm": 0.46259430050849915, "learning_rate": 0.00017596399099774945, "loss": 0.5499, "step": 2408 }, { "epoch": 0.1786164454660043, "grad_norm": 0.3846888244152069, "learning_rate": 0.0001759539884971243, "loss": 0.5875, "step": 2409 }, { "epoch": 0.17869059093942316, "grad_norm": 0.37382277846336365, "learning_rate": 0.00017594398599649912, "loss": 0.5349, "step": 2410 }, { "epoch": 0.178764736412842, "grad_norm": 0.40136295557022095, "learning_rate": 0.00017593398349587399, "loss": 0.6031, "step": 2411 }, { "epoch": 0.17883888188626085, "grad_norm": 0.39307114481925964, "learning_rate": 0.00017592398099524882, "loss": 0.5903, "step": 2412 }, { "epoch": 0.1789130273596797, "grad_norm": 0.38410285115242004, "learning_rate": 0.00017591397849462366, "loss": 0.5493, "step": 2413 }, { "epoch": 0.17898717283309853, "grad_norm": 0.36074861884117126, "learning_rate": 0.0001759039759939985, "loss": 0.5559, "step": 2414 }, { "epoch": 0.1790613183065174, "grad_norm": 0.37233006954193115, "learning_rate": 0.00017589397349337336, "loss": 0.5599, "step": 2415 }, { "epoch": 0.17913546377993622, "grad_norm": 0.3531981408596039, "learning_rate": 0.0001758839709927482, "loss": 0.51, "step": 2416 }, { "epoch": 0.17920960925335508, "grad_norm": 0.38347622752189636, "learning_rate": 0.00017587396849212303, "loss": 0.5653, "step": 2417 }, { "epoch": 0.17928375472677394, "grad_norm": 0.36534643173217773, "learning_rate": 0.00017586396599149787, "loss": 0.5354, "step": 2418 }, { "epoch": 0.17935790020019277, "grad_norm": 0.37188470363616943, "learning_rate": 0.00017585396349087274, "loss": 0.5713, "step": 2419 }, { "epoch": 0.17943204567361162, "grad_norm": 0.3696678578853607, "learning_rate": 0.00017584396099024757, "loss": 0.5454, "step": 2420 }, { "epoch": 0.17950619114703048, "grad_norm": 0.3649783134460449, "learning_rate": 0.0001758339584896224, "loss": 0.573, "step": 2421 }, { "epoch": 0.1795803366204493, "grad_norm": 0.39369213581085205, "learning_rate": 0.00017582395598899725, "loss": 0.5936, "step": 2422 }, { "epoch": 0.17965448209386817, "grad_norm": 0.37069255113601685, "learning_rate": 0.0001758139534883721, "loss": 0.5334, "step": 2423 }, { "epoch": 0.17972862756728702, "grad_norm": 0.38291123509407043, "learning_rate": 0.00017580395098774692, "loss": 0.5519, "step": 2424 }, { "epoch": 0.17980277304070585, "grad_norm": 0.3514748215675354, "learning_rate": 0.00017579394848712179, "loss": 0.52, "step": 2425 }, { "epoch": 0.1798769185141247, "grad_norm": 0.3477431833744049, "learning_rate": 0.00017578394598649662, "loss": 0.5273, "step": 2426 }, { "epoch": 0.17995106398754357, "grad_norm": 0.38243675231933594, "learning_rate": 0.0001757739434858715, "loss": 0.5537, "step": 2427 }, { "epoch": 0.1800252094609624, "grad_norm": 0.3750416934490204, "learning_rate": 0.00017576394098524632, "loss": 0.5291, "step": 2428 }, { "epoch": 0.18009935493438126, "grad_norm": 0.3517555594444275, "learning_rate": 0.00017575393848462116, "loss": 0.5331, "step": 2429 }, { "epoch": 0.1801735004078001, "grad_norm": 0.36706483364105225, "learning_rate": 0.000175743935983996, "loss": 0.5655, "step": 2430 }, { "epoch": 0.18024764588121894, "grad_norm": 0.37413230538368225, "learning_rate": 0.00017573393348337086, "loss": 0.5857, "step": 2431 }, { "epoch": 0.1803217913546378, "grad_norm": 0.3595559298992157, "learning_rate": 0.0001757239309827457, "loss": 0.4928, "step": 2432 }, { "epoch": 0.18039593682805666, "grad_norm": 0.38453492522239685, "learning_rate": 0.00017571392848212054, "loss": 0.5228, "step": 2433 }, { "epoch": 0.1804700823014755, "grad_norm": 0.3713968098163605, "learning_rate": 0.0001757039259814954, "loss": 0.5162, "step": 2434 }, { "epoch": 0.18054422777489434, "grad_norm": 0.3596845269203186, "learning_rate": 0.0001756939234808702, "loss": 0.529, "step": 2435 }, { "epoch": 0.1806183732483132, "grad_norm": 0.40022873878479004, "learning_rate": 0.00017568392098024508, "loss": 0.5665, "step": 2436 }, { "epoch": 0.18069251872173203, "grad_norm": 0.3950504660606384, "learning_rate": 0.0001756739184796199, "loss": 0.5847, "step": 2437 }, { "epoch": 0.1807666641951509, "grad_norm": 0.3850703835487366, "learning_rate": 0.00017566391597899478, "loss": 0.6011, "step": 2438 }, { "epoch": 0.18084080966856975, "grad_norm": 0.375749796628952, "learning_rate": 0.00017565391347836959, "loss": 0.544, "step": 2439 }, { "epoch": 0.18091495514198858, "grad_norm": 0.39863741397857666, "learning_rate": 0.00017564391097774445, "loss": 0.56, "step": 2440 }, { "epoch": 0.18098910061540743, "grad_norm": 0.36195212602615356, "learning_rate": 0.0001756339084771193, "loss": 0.5528, "step": 2441 }, { "epoch": 0.1810632460888263, "grad_norm": 0.3824903070926666, "learning_rate": 0.00017562390597649415, "loss": 0.5464, "step": 2442 }, { "epoch": 0.18113739156224512, "grad_norm": 0.37374863028526306, "learning_rate": 0.00017561390347586896, "loss": 0.5246, "step": 2443 }, { "epoch": 0.18121153703566398, "grad_norm": 0.35747644305229187, "learning_rate": 0.00017560390097524383, "loss": 0.5173, "step": 2444 }, { "epoch": 0.1812856825090828, "grad_norm": 0.3587506115436554, "learning_rate": 0.00017559389847461866, "loss": 0.5491, "step": 2445 }, { "epoch": 0.18135982798250166, "grad_norm": 0.3383811116218567, "learning_rate": 0.00017558389597399353, "loss": 0.5097, "step": 2446 }, { "epoch": 0.18143397345592052, "grad_norm": 0.35980647802352905, "learning_rate": 0.00017557389347336834, "loss": 0.5596, "step": 2447 }, { "epoch": 0.18150811892933935, "grad_norm": 0.37028443813323975, "learning_rate": 0.0001755638909727432, "loss": 0.5257, "step": 2448 }, { "epoch": 0.1815822644027582, "grad_norm": 0.3545834422111511, "learning_rate": 0.00017555388847211804, "loss": 0.5011, "step": 2449 }, { "epoch": 0.18165640987617707, "grad_norm": 0.4149450957775116, "learning_rate": 0.00017554388597149288, "loss": 0.5464, "step": 2450 }, { "epoch": 0.1817305553495959, "grad_norm": 0.37741392850875854, "learning_rate": 0.0001755338834708677, "loss": 0.5685, "step": 2451 }, { "epoch": 0.18180470082301475, "grad_norm": 0.3906761407852173, "learning_rate": 0.00017552388097024258, "loss": 0.6075, "step": 2452 }, { "epoch": 0.1818788462964336, "grad_norm": 0.36389872431755066, "learning_rate": 0.00017551387846961741, "loss": 0.5398, "step": 2453 }, { "epoch": 0.18195299176985244, "grad_norm": 0.37242719531059265, "learning_rate": 0.00017550387596899225, "loss": 0.5462, "step": 2454 }, { "epoch": 0.1820271372432713, "grad_norm": 0.376862108707428, "learning_rate": 0.0001754938734683671, "loss": 0.5335, "step": 2455 }, { "epoch": 0.18210128271669015, "grad_norm": 0.38642531633377075, "learning_rate": 0.00017548387096774195, "loss": 0.5549, "step": 2456 }, { "epoch": 0.18217542819010898, "grad_norm": 0.3815830945968628, "learning_rate": 0.0001754738684671168, "loss": 0.5467, "step": 2457 }, { "epoch": 0.18224957366352784, "grad_norm": 0.36647406220436096, "learning_rate": 0.00017546386596649163, "loss": 0.5714, "step": 2458 }, { "epoch": 0.1823237191369467, "grad_norm": 0.39748474955558777, "learning_rate": 0.00017545386346586646, "loss": 0.5454, "step": 2459 }, { "epoch": 0.18239786461036553, "grad_norm": 0.39860185980796814, "learning_rate": 0.00017544386096524133, "loss": 0.5619, "step": 2460 }, { "epoch": 0.1824720100837844, "grad_norm": 0.3839053809642792, "learning_rate": 0.00017543385846461617, "loss": 0.5577, "step": 2461 }, { "epoch": 0.18254615555720324, "grad_norm": 0.3606187403202057, "learning_rate": 0.000175423855963991, "loss": 0.5374, "step": 2462 }, { "epoch": 0.18262030103062207, "grad_norm": 0.38878002762794495, "learning_rate": 0.00017541385346336584, "loss": 0.54, "step": 2463 }, { "epoch": 0.18269444650404093, "grad_norm": 0.43336421251296997, "learning_rate": 0.0001754038509627407, "loss": 0.556, "step": 2464 }, { "epoch": 0.1827685919774598, "grad_norm": 0.3499844968318939, "learning_rate": 0.00017539384846211554, "loss": 0.5182, "step": 2465 }, { "epoch": 0.18284273745087862, "grad_norm": 0.35643449425697327, "learning_rate": 0.00017538384596149038, "loss": 0.5036, "step": 2466 }, { "epoch": 0.18291688292429747, "grad_norm": 0.40949520468711853, "learning_rate": 0.00017537384346086521, "loss": 0.6131, "step": 2467 }, { "epoch": 0.18299102839771633, "grad_norm": 0.39332887530326843, "learning_rate": 0.00017536384096024008, "loss": 0.5647, "step": 2468 }, { "epoch": 0.18306517387113516, "grad_norm": 0.3809000253677368, "learning_rate": 0.00017535383845961492, "loss": 0.5158, "step": 2469 }, { "epoch": 0.18313931934455402, "grad_norm": 0.37660449743270874, "learning_rate": 0.00017534383595898975, "loss": 0.5723, "step": 2470 }, { "epoch": 0.18321346481797288, "grad_norm": 0.3836134076118469, "learning_rate": 0.00017533383345836462, "loss": 0.5635, "step": 2471 }, { "epoch": 0.1832876102913917, "grad_norm": 0.3743184208869934, "learning_rate": 0.00017532383095773943, "loss": 0.521, "step": 2472 }, { "epoch": 0.18336175576481056, "grad_norm": 0.36124128103256226, "learning_rate": 0.0001753138284571143, "loss": 0.5209, "step": 2473 }, { "epoch": 0.1834359012382294, "grad_norm": 0.3430120646953583, "learning_rate": 0.00017530382595648913, "loss": 0.4912, "step": 2474 }, { "epoch": 0.18351004671164825, "grad_norm": 0.37005171179771423, "learning_rate": 0.000175293823455864, "loss": 0.5468, "step": 2475 }, { "epoch": 0.1835841921850671, "grad_norm": 0.3900744616985321, "learning_rate": 0.0001752838209552388, "loss": 0.5782, "step": 2476 }, { "epoch": 0.18365833765848594, "grad_norm": 0.376926988363266, "learning_rate": 0.00017527381845461367, "loss": 0.5447, "step": 2477 }, { "epoch": 0.1837324831319048, "grad_norm": 0.3417437970638275, "learning_rate": 0.0001752638159539885, "loss": 0.5172, "step": 2478 }, { "epoch": 0.18380662860532365, "grad_norm": 0.34957486391067505, "learning_rate": 0.00017525381345336337, "loss": 0.5177, "step": 2479 }, { "epoch": 0.18388077407874248, "grad_norm": 0.3479164242744446, "learning_rate": 0.00017524381095273818, "loss": 0.541, "step": 2480 }, { "epoch": 0.18395491955216134, "grad_norm": 0.35527828335762024, "learning_rate": 0.00017523380845211304, "loss": 0.51, "step": 2481 }, { "epoch": 0.1840290650255802, "grad_norm": 0.3754366338253021, "learning_rate": 0.00017522380595148788, "loss": 0.5472, "step": 2482 }, { "epoch": 0.18410321049899903, "grad_norm": 0.36146607995033264, "learning_rate": 0.00017521380345086274, "loss": 0.5437, "step": 2483 }, { "epoch": 0.18417735597241788, "grad_norm": 0.4004985988140106, "learning_rate": 0.00017520380095023755, "loss": 0.5644, "step": 2484 }, { "epoch": 0.18425150144583674, "grad_norm": 0.3737723231315613, "learning_rate": 0.00017519379844961242, "loss": 0.5567, "step": 2485 }, { "epoch": 0.18432564691925557, "grad_norm": 0.4453146755695343, "learning_rate": 0.00017518379594898725, "loss": 0.5591, "step": 2486 }, { "epoch": 0.18439979239267443, "grad_norm": 0.3895033299922943, "learning_rate": 0.0001751737934483621, "loss": 0.5617, "step": 2487 }, { "epoch": 0.18447393786609328, "grad_norm": 0.3694755733013153, "learning_rate": 0.00017516379094773693, "loss": 0.542, "step": 2488 }, { "epoch": 0.18454808333951211, "grad_norm": 0.3570566773414612, "learning_rate": 0.0001751537884471118, "loss": 0.515, "step": 2489 }, { "epoch": 0.18462222881293097, "grad_norm": 0.37832707166671753, "learning_rate": 0.00017514378594648663, "loss": 0.5746, "step": 2490 }, { "epoch": 0.18469637428634983, "grad_norm": 0.33644866943359375, "learning_rate": 0.00017513378344586147, "loss": 0.4932, "step": 2491 }, { "epoch": 0.18477051975976866, "grad_norm": 0.3955686092376709, "learning_rate": 0.0001751237809452363, "loss": 0.5623, "step": 2492 }, { "epoch": 0.18484466523318752, "grad_norm": 0.39345040917396545, "learning_rate": 0.00017511377844461117, "loss": 0.5241, "step": 2493 }, { "epoch": 0.18491881070660637, "grad_norm": 0.35470351576805115, "learning_rate": 0.000175103775943986, "loss": 0.4924, "step": 2494 }, { "epoch": 0.1849929561800252, "grad_norm": 0.3678523898124695, "learning_rate": 0.00017509377344336084, "loss": 0.5352, "step": 2495 }, { "epoch": 0.18506710165344406, "grad_norm": 0.3409607708454132, "learning_rate": 0.00017508377094273568, "loss": 0.4904, "step": 2496 }, { "epoch": 0.18514124712686292, "grad_norm": 0.36989110708236694, "learning_rate": 0.00017507376844211054, "loss": 0.5584, "step": 2497 }, { "epoch": 0.18521539260028175, "grad_norm": 0.40968409180641174, "learning_rate": 0.00017506376594148538, "loss": 0.5333, "step": 2498 }, { "epoch": 0.1852895380737006, "grad_norm": 0.3725428283214569, "learning_rate": 0.00017505376344086022, "loss": 0.5512, "step": 2499 }, { "epoch": 0.18536368354711946, "grad_norm": 0.357582688331604, "learning_rate": 0.00017504376094023506, "loss": 0.5325, "step": 2500 }, { "epoch": 0.1854378290205383, "grad_norm": 0.3825441300868988, "learning_rate": 0.00017503375843960992, "loss": 0.5316, "step": 2501 }, { "epoch": 0.18551197449395715, "grad_norm": 0.3956829309463501, "learning_rate": 0.00017502375593898476, "loss": 0.5787, "step": 2502 }, { "epoch": 0.18558611996737598, "grad_norm": 0.3979155421257019, "learning_rate": 0.0001750137534383596, "loss": 0.549, "step": 2503 }, { "epoch": 0.18566026544079484, "grad_norm": 0.36888331174850464, "learning_rate": 0.00017500375093773446, "loss": 0.5098, "step": 2504 }, { "epoch": 0.1857344109142137, "grad_norm": 0.3524383306503296, "learning_rate": 0.0001749937484371093, "loss": 0.5475, "step": 2505 }, { "epoch": 0.18580855638763252, "grad_norm": 0.3592681288719177, "learning_rate": 0.00017498374593648413, "loss": 0.5428, "step": 2506 }, { "epoch": 0.18588270186105138, "grad_norm": 0.42212310433387756, "learning_rate": 0.00017497374343585897, "loss": 0.6031, "step": 2507 }, { "epoch": 0.18595684733447024, "grad_norm": 0.39607128500938416, "learning_rate": 0.00017496374093523383, "loss": 0.563, "step": 2508 }, { "epoch": 0.18603099280788907, "grad_norm": 0.4397938549518585, "learning_rate": 0.00017495373843460867, "loss": 0.5444, "step": 2509 }, { "epoch": 0.18610513828130792, "grad_norm": 0.38557398319244385, "learning_rate": 0.0001749437359339835, "loss": 0.6055, "step": 2510 }, { "epoch": 0.18617928375472678, "grad_norm": 0.34404975175857544, "learning_rate": 0.00017493373343335834, "loss": 0.5332, "step": 2511 }, { "epoch": 0.1862534292281456, "grad_norm": 0.37359511852264404, "learning_rate": 0.0001749237309327332, "loss": 0.5841, "step": 2512 }, { "epoch": 0.18632757470156447, "grad_norm": 0.37149348855018616, "learning_rate": 0.00017491372843210802, "loss": 0.5157, "step": 2513 }, { "epoch": 0.18640172017498333, "grad_norm": 0.3637515604496002, "learning_rate": 0.00017490372593148288, "loss": 0.5454, "step": 2514 }, { "epoch": 0.18647586564840216, "grad_norm": 0.44989651441574097, "learning_rate": 0.00017489372343085772, "loss": 0.5879, "step": 2515 }, { "epoch": 0.186550011121821, "grad_norm": 0.3525008261203766, "learning_rate": 0.00017488372093023258, "loss": 0.5186, "step": 2516 }, { "epoch": 0.18662415659523987, "grad_norm": 0.364148885011673, "learning_rate": 0.0001748737184296074, "loss": 0.5545, "step": 2517 }, { "epoch": 0.1866983020686587, "grad_norm": 0.3384614884853363, "learning_rate": 0.00017486371592898226, "loss": 0.5049, "step": 2518 }, { "epoch": 0.18677244754207756, "grad_norm": 0.38927486538887024, "learning_rate": 0.0001748537134283571, "loss": 0.5477, "step": 2519 }, { "epoch": 0.18684659301549641, "grad_norm": 0.39205577969551086, "learning_rate": 0.00017484371092773196, "loss": 0.4992, "step": 2520 }, { "epoch": 0.18692073848891524, "grad_norm": 0.38360825181007385, "learning_rate": 0.00017483370842710677, "loss": 0.5499, "step": 2521 }, { "epoch": 0.1869948839623341, "grad_norm": 0.3570335805416107, "learning_rate": 0.00017482370592648163, "loss": 0.5329, "step": 2522 }, { "epoch": 0.18706902943575296, "grad_norm": 0.38862287998199463, "learning_rate": 0.00017481370342585647, "loss": 0.5726, "step": 2523 }, { "epoch": 0.1871431749091718, "grad_norm": 0.3690339922904968, "learning_rate": 0.0001748037009252313, "loss": 0.5258, "step": 2524 }, { "epoch": 0.18721732038259065, "grad_norm": 0.38290247321128845, "learning_rate": 0.00017479369842460615, "loss": 0.5773, "step": 2525 }, { "epoch": 0.1872914658560095, "grad_norm": 0.41541218757629395, "learning_rate": 0.000174783695923981, "loss": 0.5995, "step": 2526 }, { "epoch": 0.18736561132942833, "grad_norm": 0.35102301836013794, "learning_rate": 0.00017477369342335585, "loss": 0.5131, "step": 2527 }, { "epoch": 0.1874397568028472, "grad_norm": 0.3777007758617401, "learning_rate": 0.00017476369092273068, "loss": 0.5476, "step": 2528 }, { "epoch": 0.18751390227626602, "grad_norm": 0.38326966762542725, "learning_rate": 0.00017475368842210552, "loss": 0.5711, "step": 2529 }, { "epoch": 0.18758804774968488, "grad_norm": 0.3716869652271271, "learning_rate": 0.00017474368592148038, "loss": 0.5831, "step": 2530 }, { "epoch": 0.18766219322310373, "grad_norm": 0.37463876605033875, "learning_rate": 0.00017473368342085522, "loss": 0.4915, "step": 2531 }, { "epoch": 0.18773633869652256, "grad_norm": 0.37553128600120544, "learning_rate": 0.00017472368092023006, "loss": 0.5947, "step": 2532 }, { "epoch": 0.18781048416994142, "grad_norm": 0.3453991711139679, "learning_rate": 0.0001747136784196049, "loss": 0.5422, "step": 2533 }, { "epoch": 0.18788462964336028, "grad_norm": 0.37144556641578674, "learning_rate": 0.00017470367591897976, "loss": 0.5384, "step": 2534 }, { "epoch": 0.1879587751167791, "grad_norm": 0.3415352702140808, "learning_rate": 0.0001746936734183546, "loss": 0.5142, "step": 2535 }, { "epoch": 0.18803292059019797, "grad_norm": 0.37061625719070435, "learning_rate": 0.00017468367091772943, "loss": 0.5513, "step": 2536 }, { "epoch": 0.18810706606361682, "grad_norm": 0.3591844439506531, "learning_rate": 0.0001746736684171043, "loss": 0.535, "step": 2537 }, { "epoch": 0.18818121153703565, "grad_norm": 0.36355894804000854, "learning_rate": 0.00017466366591647914, "loss": 0.5216, "step": 2538 }, { "epoch": 0.1882553570104545, "grad_norm": 0.3612039387226105, "learning_rate": 0.00017465366341585397, "loss": 0.5142, "step": 2539 }, { "epoch": 0.18832950248387337, "grad_norm": 0.38181477785110474, "learning_rate": 0.0001746436609152288, "loss": 0.5021, "step": 2540 }, { "epoch": 0.1884036479572922, "grad_norm": 0.4053398072719574, "learning_rate": 0.00017463365841460367, "loss": 0.5656, "step": 2541 }, { "epoch": 0.18847779343071105, "grad_norm": 0.36756041646003723, "learning_rate": 0.0001746236559139785, "loss": 0.5355, "step": 2542 }, { "epoch": 0.1885519389041299, "grad_norm": 0.3654715418815613, "learning_rate": 0.00017461365341335335, "loss": 0.5447, "step": 2543 }, { "epoch": 0.18862608437754874, "grad_norm": 0.36894431710243225, "learning_rate": 0.00017460365091272819, "loss": 0.5371, "step": 2544 }, { "epoch": 0.1887002298509676, "grad_norm": 0.38145166635513306, "learning_rate": 0.00017459364841210305, "loss": 0.5508, "step": 2545 }, { "epoch": 0.18877437532438646, "grad_norm": 0.4081367254257202, "learning_rate": 0.0001745836459114779, "loss": 0.5876, "step": 2546 }, { "epoch": 0.18884852079780529, "grad_norm": 0.37020444869995117, "learning_rate": 0.00017457364341085272, "loss": 0.5191, "step": 2547 }, { "epoch": 0.18892266627122414, "grad_norm": 0.36426791548728943, "learning_rate": 0.00017456364091022756, "loss": 0.538, "step": 2548 }, { "epoch": 0.188996811744643, "grad_norm": 0.36916452646255493, "learning_rate": 0.00017455363840960243, "loss": 0.5834, "step": 2549 }, { "epoch": 0.18907095721806183, "grad_norm": 0.36656221747398376, "learning_rate": 0.00017454363590897724, "loss": 0.5424, "step": 2550 }, { "epoch": 0.1891451026914807, "grad_norm": 0.3659138083457947, "learning_rate": 0.0001745336334083521, "loss": 0.5574, "step": 2551 }, { "epoch": 0.18921924816489955, "grad_norm": 0.3765780031681061, "learning_rate": 0.00017452363090772694, "loss": 0.5493, "step": 2552 }, { "epoch": 0.18929339363831837, "grad_norm": 0.39137184619903564, "learning_rate": 0.0001745136284071018, "loss": 0.5161, "step": 2553 }, { "epoch": 0.18936753911173723, "grad_norm": 0.3628242313861847, "learning_rate": 0.0001745036259064766, "loss": 0.5588, "step": 2554 }, { "epoch": 0.1894416845851561, "grad_norm": 0.388408899307251, "learning_rate": 0.00017449362340585147, "loss": 0.528, "step": 2555 }, { "epoch": 0.18951583005857492, "grad_norm": 0.3519296944141388, "learning_rate": 0.0001744836209052263, "loss": 0.5583, "step": 2556 }, { "epoch": 0.18958997553199378, "grad_norm": 0.37396326661109924, "learning_rate": 0.00017447361840460118, "loss": 0.5666, "step": 2557 }, { "epoch": 0.1896641210054126, "grad_norm": 0.3601435422897339, "learning_rate": 0.00017446361590397599, "loss": 0.5543, "step": 2558 }, { "epoch": 0.18973826647883146, "grad_norm": 0.3775237202644348, "learning_rate": 0.00017445361340335085, "loss": 0.5489, "step": 2559 }, { "epoch": 0.18981241195225032, "grad_norm": 0.3842271864414215, "learning_rate": 0.0001744436109027257, "loss": 0.5718, "step": 2560 }, { "epoch": 0.18988655742566915, "grad_norm": 0.4168427884578705, "learning_rate": 0.00017443360840210052, "loss": 0.6091, "step": 2561 }, { "epoch": 0.189960702899088, "grad_norm": 0.36006173491477966, "learning_rate": 0.00017442360590147536, "loss": 0.5394, "step": 2562 }, { "epoch": 0.19003484837250686, "grad_norm": 0.39804136753082275, "learning_rate": 0.00017441360340085023, "loss": 0.5691, "step": 2563 }, { "epoch": 0.1901089938459257, "grad_norm": 0.3599531948566437, "learning_rate": 0.00017440360090022506, "loss": 0.5177, "step": 2564 }, { "epoch": 0.19018313931934455, "grad_norm": 0.4167648255825043, "learning_rate": 0.0001743935983995999, "loss": 0.553, "step": 2565 }, { "epoch": 0.1902572847927634, "grad_norm": 0.3799290359020233, "learning_rate": 0.00017438359589897474, "loss": 0.5635, "step": 2566 }, { "epoch": 0.19033143026618224, "grad_norm": 0.34487831592559814, "learning_rate": 0.0001743735933983496, "loss": 0.4883, "step": 2567 }, { "epoch": 0.1904055757396011, "grad_norm": 0.3687065839767456, "learning_rate": 0.00017436359089772444, "loss": 0.5812, "step": 2568 }, { "epoch": 0.19047972121301995, "grad_norm": 0.3704318404197693, "learning_rate": 0.00017435358839709928, "loss": 0.5212, "step": 2569 }, { "epoch": 0.19055386668643878, "grad_norm": 0.3919447064399719, "learning_rate": 0.00017434358589647414, "loss": 0.5651, "step": 2570 }, { "epoch": 0.19062801215985764, "grad_norm": 0.4047188460826874, "learning_rate": 0.00017433358339584898, "loss": 0.5419, "step": 2571 }, { "epoch": 0.1907021576332765, "grad_norm": 0.3566936254501343, "learning_rate": 0.00017432358089522381, "loss": 0.532, "step": 2572 }, { "epoch": 0.19077630310669533, "grad_norm": 0.35413482785224915, "learning_rate": 0.00017431357839459865, "loss": 0.5192, "step": 2573 }, { "epoch": 0.19085044858011418, "grad_norm": 0.3861897885799408, "learning_rate": 0.00017430357589397352, "loss": 0.5578, "step": 2574 }, { "epoch": 0.19092459405353304, "grad_norm": 0.40834811329841614, "learning_rate": 0.00017429357339334835, "loss": 0.57, "step": 2575 }, { "epoch": 0.19099873952695187, "grad_norm": 0.3581947386264801, "learning_rate": 0.0001742835708927232, "loss": 0.5015, "step": 2576 }, { "epoch": 0.19107288500037073, "grad_norm": 0.3621872663497925, "learning_rate": 0.00017427356839209803, "loss": 0.5299, "step": 2577 }, { "epoch": 0.1911470304737896, "grad_norm": 0.4068659842014313, "learning_rate": 0.0001742635658914729, "loss": 0.563, "step": 2578 }, { "epoch": 0.19122117594720842, "grad_norm": 0.3649819791316986, "learning_rate": 0.00017425356339084773, "loss": 0.5469, "step": 2579 }, { "epoch": 0.19129532142062727, "grad_norm": 0.41155076026916504, "learning_rate": 0.00017424356089022256, "loss": 0.5959, "step": 2580 }, { "epoch": 0.19136946689404613, "grad_norm": 0.3827474117279053, "learning_rate": 0.0001742335583895974, "loss": 0.5675, "step": 2581 }, { "epoch": 0.19144361236746496, "grad_norm": 0.38603049516677856, "learning_rate": 0.00017422355588897227, "loss": 0.5806, "step": 2582 }, { "epoch": 0.19151775784088382, "grad_norm": 0.3544858396053314, "learning_rate": 0.0001742135533883471, "loss": 0.563, "step": 2583 }, { "epoch": 0.19159190331430268, "grad_norm": 0.3644912540912628, "learning_rate": 0.00017420355088772194, "loss": 0.5336, "step": 2584 }, { "epoch": 0.1916660487877215, "grad_norm": 0.39381101727485657, "learning_rate": 0.00017419354838709678, "loss": 0.5663, "step": 2585 }, { "epoch": 0.19174019426114036, "grad_norm": 0.36619094014167786, "learning_rate": 0.00017418354588647164, "loss": 0.5581, "step": 2586 }, { "epoch": 0.1918143397345592, "grad_norm": 0.3487679362297058, "learning_rate": 0.00017417354338584645, "loss": 0.5485, "step": 2587 }, { "epoch": 0.19188848520797805, "grad_norm": 0.3445708453655243, "learning_rate": 0.00017416354088522132, "loss": 0.525, "step": 2588 }, { "epoch": 0.1919626306813969, "grad_norm": 0.3348333239555359, "learning_rate": 0.00017415353838459615, "loss": 0.5023, "step": 2589 }, { "epoch": 0.19203677615481574, "grad_norm": 0.3523937165737152, "learning_rate": 0.00017414353588397102, "loss": 0.4816, "step": 2590 }, { "epoch": 0.1921109216282346, "grad_norm": 0.3827712833881378, "learning_rate": 0.00017413353338334583, "loss": 0.5751, "step": 2591 }, { "epoch": 0.19218506710165345, "grad_norm": 0.4033728241920471, "learning_rate": 0.0001741235308827207, "loss": 0.5781, "step": 2592 }, { "epoch": 0.19225921257507228, "grad_norm": 0.39362362027168274, "learning_rate": 0.00017411352838209553, "loss": 0.5283, "step": 2593 }, { "epoch": 0.19233335804849114, "grad_norm": 0.38419845700263977, "learning_rate": 0.0001741035258814704, "loss": 0.486, "step": 2594 }, { "epoch": 0.19240750352191, "grad_norm": 0.38583725690841675, "learning_rate": 0.0001740935233808452, "loss": 0.5677, "step": 2595 }, { "epoch": 0.19248164899532882, "grad_norm": 0.3525702655315399, "learning_rate": 0.00017408352088022007, "loss": 0.5044, "step": 2596 }, { "epoch": 0.19255579446874768, "grad_norm": 0.3745267689228058, "learning_rate": 0.0001740735183795949, "loss": 0.5102, "step": 2597 }, { "epoch": 0.19262993994216654, "grad_norm": 0.4018622040748596, "learning_rate": 0.00017406351587896974, "loss": 0.5443, "step": 2598 }, { "epoch": 0.19270408541558537, "grad_norm": 0.3483121395111084, "learning_rate": 0.00017405351337834458, "loss": 0.5493, "step": 2599 }, { "epoch": 0.19277823088900423, "grad_norm": 0.38222602009773254, "learning_rate": 0.00017404351087771944, "loss": 0.5382, "step": 2600 }, { "epoch": 0.19285237636242308, "grad_norm": 0.3689139485359192, "learning_rate": 0.00017403350837709428, "loss": 0.5082, "step": 2601 }, { "epoch": 0.1929265218358419, "grad_norm": 0.38886114954948425, "learning_rate": 0.00017402350587646912, "loss": 0.6018, "step": 2602 }, { "epoch": 0.19300066730926077, "grad_norm": 0.3523424565792084, "learning_rate": 0.00017401350337584398, "loss": 0.5458, "step": 2603 }, { "epoch": 0.19307481278267963, "grad_norm": 0.35378319025039673, "learning_rate": 0.00017400350087521882, "loss": 0.5193, "step": 2604 }, { "epoch": 0.19314895825609846, "grad_norm": 0.3727543354034424, "learning_rate": 0.00017399349837459365, "loss": 0.5244, "step": 2605 }, { "epoch": 0.19322310372951731, "grad_norm": 0.3667072355747223, "learning_rate": 0.0001739834958739685, "loss": 0.5246, "step": 2606 }, { "epoch": 0.19329724920293617, "grad_norm": 0.3738144636154175, "learning_rate": 0.00017397349337334336, "loss": 0.5321, "step": 2607 }, { "epoch": 0.193371394676355, "grad_norm": 0.3745671212673187, "learning_rate": 0.0001739634908727182, "loss": 0.5518, "step": 2608 }, { "epoch": 0.19344554014977386, "grad_norm": 0.3498757779598236, "learning_rate": 0.00017395348837209303, "loss": 0.5167, "step": 2609 }, { "epoch": 0.19351968562319272, "grad_norm": 0.3733258843421936, "learning_rate": 0.00017394348587146787, "loss": 0.5573, "step": 2610 }, { "epoch": 0.19359383109661155, "grad_norm": 0.3961907923221588, "learning_rate": 0.00017393348337084273, "loss": 0.5636, "step": 2611 }, { "epoch": 0.1936679765700304, "grad_norm": 0.38348621129989624, "learning_rate": 0.00017392348087021757, "loss": 0.5721, "step": 2612 }, { "epoch": 0.19374212204344926, "grad_norm": 0.37903478741645813, "learning_rate": 0.0001739134783695924, "loss": 0.5171, "step": 2613 }, { "epoch": 0.1938162675168681, "grad_norm": 0.37355223298072815, "learning_rate": 0.00017390347586896724, "loss": 0.5676, "step": 2614 }, { "epoch": 0.19389041299028695, "grad_norm": 0.36899885535240173, "learning_rate": 0.0001738934733683421, "loss": 0.556, "step": 2615 }, { "epoch": 0.19396455846370578, "grad_norm": 0.3694542646408081, "learning_rate": 0.00017388347086771694, "loss": 0.5232, "step": 2616 }, { "epoch": 0.19403870393712463, "grad_norm": 0.44912827014923096, "learning_rate": 0.00017387346836709178, "loss": 0.5613, "step": 2617 }, { "epoch": 0.1941128494105435, "grad_norm": 0.39268842339515686, "learning_rate": 0.00017386346586646662, "loss": 0.5477, "step": 2618 }, { "epoch": 0.19418699488396232, "grad_norm": 0.36969512701034546, "learning_rate": 0.00017385346336584148, "loss": 0.5215, "step": 2619 }, { "epoch": 0.19426114035738118, "grad_norm": 0.3638516068458557, "learning_rate": 0.00017384346086521632, "loss": 0.5621, "step": 2620 }, { "epoch": 0.19433528583080004, "grad_norm": 0.37146878242492676, "learning_rate": 0.00017383345836459116, "loss": 0.5269, "step": 2621 }, { "epoch": 0.19440943130421887, "grad_norm": 0.3562355041503906, "learning_rate": 0.000173823455863966, "loss": 0.5544, "step": 2622 }, { "epoch": 0.19448357677763772, "grad_norm": 0.34572088718414307, "learning_rate": 0.00017381345336334086, "loss": 0.5144, "step": 2623 }, { "epoch": 0.19455772225105658, "grad_norm": 0.34310320019721985, "learning_rate": 0.00017380345086271567, "loss": 0.4951, "step": 2624 }, { "epoch": 0.1946318677244754, "grad_norm": 0.3656855821609497, "learning_rate": 0.00017379344836209053, "loss": 0.5199, "step": 2625 }, { "epoch": 0.19470601319789427, "grad_norm": 0.3551807403564453, "learning_rate": 0.00017378344586146537, "loss": 0.5426, "step": 2626 }, { "epoch": 0.19478015867131312, "grad_norm": 0.38513168692588806, "learning_rate": 0.00017377344336084023, "loss": 0.567, "step": 2627 }, { "epoch": 0.19485430414473195, "grad_norm": 0.3632346987724304, "learning_rate": 0.00017376344086021504, "loss": 0.5644, "step": 2628 }, { "epoch": 0.1949284496181508, "grad_norm": 0.35517245531082153, "learning_rate": 0.0001737534383595899, "loss": 0.5212, "step": 2629 }, { "epoch": 0.19500259509156967, "grad_norm": 0.36551472544670105, "learning_rate": 0.00017374343585896474, "loss": 0.5348, "step": 2630 }, { "epoch": 0.1950767405649885, "grad_norm": 0.34680891036987305, "learning_rate": 0.0001737334333583396, "loss": 0.5087, "step": 2631 }, { "epoch": 0.19515088603840736, "grad_norm": 0.38361164927482605, "learning_rate": 0.00017372343085771442, "loss": 0.5958, "step": 2632 }, { "epoch": 0.1952250315118262, "grad_norm": 0.36767879128456116, "learning_rate": 0.00017371342835708928, "loss": 0.5322, "step": 2633 }, { "epoch": 0.19529917698524504, "grad_norm": 0.37342122197151184, "learning_rate": 0.00017370342585646412, "loss": 0.5387, "step": 2634 }, { "epoch": 0.1953733224586639, "grad_norm": 0.3892972767353058, "learning_rate": 0.00017369342335583896, "loss": 0.5409, "step": 2635 }, { "epoch": 0.19544746793208276, "grad_norm": 0.4026935398578644, "learning_rate": 0.0001736834208552138, "loss": 0.5115, "step": 2636 }, { "epoch": 0.1955216134055016, "grad_norm": 0.36044520139694214, "learning_rate": 0.00017367341835458866, "loss": 0.5205, "step": 2637 }, { "epoch": 0.19559575887892044, "grad_norm": 0.38748154044151306, "learning_rate": 0.0001736634158539635, "loss": 0.568, "step": 2638 }, { "epoch": 0.1956699043523393, "grad_norm": 0.4023933708667755, "learning_rate": 0.00017365341335333833, "loss": 0.581, "step": 2639 }, { "epoch": 0.19574404982575813, "grad_norm": 0.38269853591918945, "learning_rate": 0.0001736434108527132, "loss": 0.5684, "step": 2640 }, { "epoch": 0.195818195299177, "grad_norm": 0.3772481381893158, "learning_rate": 0.00017363340835208803, "loss": 0.5525, "step": 2641 }, { "epoch": 0.19589234077259585, "grad_norm": 0.35072800517082214, "learning_rate": 0.00017362340585146287, "loss": 0.5389, "step": 2642 }, { "epoch": 0.19596648624601468, "grad_norm": 0.35835710167884827, "learning_rate": 0.0001736134033508377, "loss": 0.503, "step": 2643 }, { "epoch": 0.19604063171943353, "grad_norm": 0.35188016295433044, "learning_rate": 0.00017360340085021257, "loss": 0.4913, "step": 2644 }, { "epoch": 0.19611477719285236, "grad_norm": 0.3530520796775818, "learning_rate": 0.0001735933983495874, "loss": 0.5193, "step": 2645 }, { "epoch": 0.19618892266627122, "grad_norm": 0.3612789511680603, "learning_rate": 0.00017358339584896225, "loss": 0.5146, "step": 2646 }, { "epoch": 0.19626306813969008, "grad_norm": 0.39364370703697205, "learning_rate": 0.00017357339334833708, "loss": 0.5809, "step": 2647 }, { "epoch": 0.1963372136131089, "grad_norm": 0.3966844975948334, "learning_rate": 0.00017356339084771195, "loss": 0.5868, "step": 2648 }, { "epoch": 0.19641135908652776, "grad_norm": 0.35285326838493347, "learning_rate": 0.00017355338834708678, "loss": 0.5362, "step": 2649 }, { "epoch": 0.19648550455994662, "grad_norm": 0.37535208463668823, "learning_rate": 0.00017354338584646162, "loss": 0.5331, "step": 2650 }, { "epoch": 0.19655965003336545, "grad_norm": 0.38150766491889954, "learning_rate": 0.00017353338334583646, "loss": 0.5778, "step": 2651 }, { "epoch": 0.1966337955067843, "grad_norm": 0.37646451592445374, "learning_rate": 0.00017352338084521132, "loss": 0.5536, "step": 2652 }, { "epoch": 0.19670794098020317, "grad_norm": 0.3482462465763092, "learning_rate": 0.00017351337834458616, "loss": 0.5028, "step": 2653 }, { "epoch": 0.196782086453622, "grad_norm": 0.36854952573776245, "learning_rate": 0.000173503375843961, "loss": 0.5384, "step": 2654 }, { "epoch": 0.19685623192704085, "grad_norm": 0.3756875991821289, "learning_rate": 0.00017349337334333583, "loss": 0.5643, "step": 2655 }, { "epoch": 0.1969303774004597, "grad_norm": 0.38643836975097656, "learning_rate": 0.0001734833708427107, "loss": 0.5472, "step": 2656 }, { "epoch": 0.19700452287387854, "grad_norm": 0.3998029828071594, "learning_rate": 0.00017347336834208554, "loss": 0.5829, "step": 2657 }, { "epoch": 0.1970786683472974, "grad_norm": 0.3739239275455475, "learning_rate": 0.00017346336584146037, "loss": 0.5625, "step": 2658 }, { "epoch": 0.19715281382071626, "grad_norm": 0.3722023367881775, "learning_rate": 0.0001734533633408352, "loss": 0.5508, "step": 2659 }, { "epoch": 0.19722695929413508, "grad_norm": 0.4042607247829437, "learning_rate": 0.00017344336084021007, "loss": 0.5508, "step": 2660 }, { "epoch": 0.19730110476755394, "grad_norm": 0.35391971468925476, "learning_rate": 0.00017343335833958488, "loss": 0.5216, "step": 2661 }, { "epoch": 0.1973752502409728, "grad_norm": 0.35260042548179626, "learning_rate": 0.00017342335583895975, "loss": 0.502, "step": 2662 }, { "epoch": 0.19744939571439163, "grad_norm": 0.36487722396850586, "learning_rate": 0.00017341335333833459, "loss": 0.5668, "step": 2663 }, { "epoch": 0.1975235411878105, "grad_norm": 0.36253243684768677, "learning_rate": 0.00017340335083770945, "loss": 0.5331, "step": 2664 }, { "epoch": 0.19759768666122934, "grad_norm": 0.3718768358230591, "learning_rate": 0.00017339334833708426, "loss": 0.5899, "step": 2665 }, { "epoch": 0.19767183213464817, "grad_norm": 0.35034120082855225, "learning_rate": 0.00017338334583645912, "loss": 0.5406, "step": 2666 }, { "epoch": 0.19774597760806703, "grad_norm": 0.3796120882034302, "learning_rate": 0.00017337334333583396, "loss": 0.5205, "step": 2667 }, { "epoch": 0.1978201230814859, "grad_norm": 0.3754165768623352, "learning_rate": 0.00017336334083520882, "loss": 0.5736, "step": 2668 }, { "epoch": 0.19789426855490472, "grad_norm": 0.3621882200241089, "learning_rate": 0.00017335333833458363, "loss": 0.5346, "step": 2669 }, { "epoch": 0.19796841402832357, "grad_norm": 0.3670138716697693, "learning_rate": 0.0001733433358339585, "loss": 0.5163, "step": 2670 }, { "epoch": 0.19804255950174243, "grad_norm": 0.3709562122821808, "learning_rate": 0.00017333333333333334, "loss": 0.558, "step": 2671 }, { "epoch": 0.19811670497516126, "grad_norm": 0.3450066149234772, "learning_rate": 0.00017332333083270817, "loss": 0.519, "step": 2672 }, { "epoch": 0.19819085044858012, "grad_norm": 0.37174829840660095, "learning_rate": 0.00017331332833208304, "loss": 0.556, "step": 2673 }, { "epoch": 0.19826499592199895, "grad_norm": 0.3778265416622162, "learning_rate": 0.00017330332583145787, "loss": 0.5861, "step": 2674 }, { "epoch": 0.1983391413954178, "grad_norm": 0.3661586344242096, "learning_rate": 0.0001732933233308327, "loss": 0.5142, "step": 2675 }, { "epoch": 0.19841328686883666, "grad_norm": 0.3633919060230255, "learning_rate": 0.00017328332083020755, "loss": 0.5562, "step": 2676 }, { "epoch": 0.1984874323422555, "grad_norm": 0.3771962523460388, "learning_rate": 0.0001732733183295824, "loss": 0.5225, "step": 2677 }, { "epoch": 0.19856157781567435, "grad_norm": 0.3853077292442322, "learning_rate": 0.00017326331582895725, "loss": 0.5587, "step": 2678 }, { "epoch": 0.1986357232890932, "grad_norm": 0.3698320984840393, "learning_rate": 0.00017325331332833211, "loss": 0.5633, "step": 2679 }, { "epoch": 0.19870986876251204, "grad_norm": 0.3634043335914612, "learning_rate": 0.00017324331082770692, "loss": 0.5188, "step": 2680 }, { "epoch": 0.1987840142359309, "grad_norm": 0.3835042119026184, "learning_rate": 0.0001732333083270818, "loss": 0.5431, "step": 2681 }, { "epoch": 0.19885815970934975, "grad_norm": 0.37465736269950867, "learning_rate": 0.00017322330582645663, "loss": 0.5652, "step": 2682 }, { "epoch": 0.19893230518276858, "grad_norm": 0.3732108175754547, "learning_rate": 0.00017321330332583146, "loss": 0.5433, "step": 2683 }, { "epoch": 0.19900645065618744, "grad_norm": 0.36679279804229736, "learning_rate": 0.0001732033008252063, "loss": 0.5159, "step": 2684 }, { "epoch": 0.1990805961296063, "grad_norm": 0.34602752327919006, "learning_rate": 0.00017319329832458116, "loss": 0.5183, "step": 2685 }, { "epoch": 0.19915474160302513, "grad_norm": 0.36388692259788513, "learning_rate": 0.000173183295823956, "loss": 0.569, "step": 2686 }, { "epoch": 0.19922888707644398, "grad_norm": 0.36550068855285645, "learning_rate": 0.00017317329332333084, "loss": 0.5258, "step": 2687 }, { "epoch": 0.19930303254986284, "grad_norm": 0.3807063400745392, "learning_rate": 0.00017316329082270568, "loss": 0.5181, "step": 2688 }, { "epoch": 0.19937717802328167, "grad_norm": 0.355060338973999, "learning_rate": 0.00017315328832208054, "loss": 0.4927, "step": 2689 }, { "epoch": 0.19945132349670053, "grad_norm": 0.3940209150314331, "learning_rate": 0.00017314328582145538, "loss": 0.5529, "step": 2690 }, { "epoch": 0.19952546897011939, "grad_norm": 0.37279561161994934, "learning_rate": 0.0001731332833208302, "loss": 0.4989, "step": 2691 }, { "epoch": 0.19959961444353821, "grad_norm": 0.40294402837753296, "learning_rate": 0.00017312328082020505, "loss": 0.5788, "step": 2692 }, { "epoch": 0.19967375991695707, "grad_norm": 0.3594437539577484, "learning_rate": 0.00017311327831957991, "loss": 0.517, "step": 2693 }, { "epoch": 0.19974790539037593, "grad_norm": 0.3777635395526886, "learning_rate": 0.00017310327581895475, "loss": 0.5577, "step": 2694 }, { "epoch": 0.19982205086379476, "grad_norm": 0.3496341407299042, "learning_rate": 0.0001730932733183296, "loss": 0.5108, "step": 2695 }, { "epoch": 0.19989619633721362, "grad_norm": 0.37001171708106995, "learning_rate": 0.00017308327081770443, "loss": 0.536, "step": 2696 }, { "epoch": 0.19997034181063247, "grad_norm": 0.37752142548561096, "learning_rate": 0.0001730732683170793, "loss": 0.5571, "step": 2697 }, { "epoch": 0.2000444872840513, "grad_norm": 0.36305421590805054, "learning_rate": 0.0001730632658164541, "loss": 0.5345, "step": 2698 }, { "epoch": 0.20011863275747016, "grad_norm": 0.35269224643707275, "learning_rate": 0.00017305326331582896, "loss": 0.5143, "step": 2699 }, { "epoch": 0.20019277823088902, "grad_norm": 0.40251633524894714, "learning_rate": 0.0001730432608152038, "loss": 0.5628, "step": 2700 }, { "epoch": 0.20026692370430785, "grad_norm": 0.3803676962852478, "learning_rate": 0.00017303325831457867, "loss": 0.572, "step": 2701 }, { "epoch": 0.2003410691777267, "grad_norm": 0.348974347114563, "learning_rate": 0.00017302325581395348, "loss": 0.499, "step": 2702 }, { "epoch": 0.20041521465114553, "grad_norm": 0.41582202911376953, "learning_rate": 0.00017301325331332834, "loss": 0.5881, "step": 2703 }, { "epoch": 0.2004893601245644, "grad_norm": 0.34615999460220337, "learning_rate": 0.00017300325081270318, "loss": 0.5097, "step": 2704 }, { "epoch": 0.20056350559798325, "grad_norm": 0.4050581455230713, "learning_rate": 0.00017299324831207804, "loss": 0.5586, "step": 2705 }, { "epoch": 0.20063765107140208, "grad_norm": 0.380361407995224, "learning_rate": 0.00017298324581145288, "loss": 0.6, "step": 2706 }, { "epoch": 0.20071179654482094, "grad_norm": 0.35937923192977905, "learning_rate": 0.00017297324331082772, "loss": 0.5455, "step": 2707 }, { "epoch": 0.2007859420182398, "grad_norm": 0.42063161730766296, "learning_rate": 0.00017296324081020255, "loss": 0.556, "step": 2708 }, { "epoch": 0.20086008749165862, "grad_norm": 0.3900492489337921, "learning_rate": 0.0001729532383095774, "loss": 0.5645, "step": 2709 }, { "epoch": 0.20093423296507748, "grad_norm": 0.3547527492046356, "learning_rate": 0.00017294323580895225, "loss": 0.5176, "step": 2710 }, { "epoch": 0.20100837843849634, "grad_norm": 0.3523033857345581, "learning_rate": 0.0001729332333083271, "loss": 0.5387, "step": 2711 }, { "epoch": 0.20108252391191517, "grad_norm": 0.3877973258495331, "learning_rate": 0.00017292323080770193, "loss": 0.6101, "step": 2712 }, { "epoch": 0.20115666938533402, "grad_norm": 0.36906272172927856, "learning_rate": 0.00017291322830707677, "loss": 0.5732, "step": 2713 }, { "epoch": 0.20123081485875288, "grad_norm": 0.3833303153514862, "learning_rate": 0.00017290322580645163, "loss": 0.5335, "step": 2714 }, { "epoch": 0.2013049603321717, "grad_norm": 0.366628497838974, "learning_rate": 0.00017289322330582647, "loss": 0.5259, "step": 2715 }, { "epoch": 0.20137910580559057, "grad_norm": 0.38324370980262756, "learning_rate": 0.00017288322080520133, "loss": 0.5621, "step": 2716 }, { "epoch": 0.20145325127900943, "grad_norm": 0.3754621744155884, "learning_rate": 0.00017287321830457614, "loss": 0.5484, "step": 2717 }, { "epoch": 0.20152739675242826, "grad_norm": 0.3822977542877197, "learning_rate": 0.000172863215803951, "loss": 0.5662, "step": 2718 }, { "epoch": 0.2016015422258471, "grad_norm": 0.34953030943870544, "learning_rate": 0.00017285321330332584, "loss": 0.5476, "step": 2719 }, { "epoch": 0.20167568769926597, "grad_norm": 0.34603989124298096, "learning_rate": 0.00017284321080270068, "loss": 0.5188, "step": 2720 }, { "epoch": 0.2017498331726848, "grad_norm": 0.3665860593318939, "learning_rate": 0.00017283320830207552, "loss": 0.554, "step": 2721 }, { "epoch": 0.20182397864610366, "grad_norm": 0.37746015191078186, "learning_rate": 0.00017282320580145038, "loss": 0.5132, "step": 2722 }, { "epoch": 0.20189812411952252, "grad_norm": 0.351207971572876, "learning_rate": 0.00017281320330082522, "loss": 0.514, "step": 2723 }, { "epoch": 0.20197226959294134, "grad_norm": 0.3581894636154175, "learning_rate": 0.00017280320080020005, "loss": 0.5204, "step": 2724 }, { "epoch": 0.2020464150663602, "grad_norm": 0.3655920922756195, "learning_rate": 0.0001727931982995749, "loss": 0.5641, "step": 2725 }, { "epoch": 0.20212056053977906, "grad_norm": 0.373323917388916, "learning_rate": 0.00017278319579894976, "loss": 0.5709, "step": 2726 }, { "epoch": 0.2021947060131979, "grad_norm": 0.3528076708316803, "learning_rate": 0.0001727731932983246, "loss": 0.5363, "step": 2727 }, { "epoch": 0.20226885148661675, "grad_norm": 0.39759352803230286, "learning_rate": 0.00017276319079769943, "loss": 0.5794, "step": 2728 }, { "epoch": 0.20234299696003558, "grad_norm": 0.3701114356517792, "learning_rate": 0.00017275318829707427, "loss": 0.5498, "step": 2729 }, { "epoch": 0.20241714243345443, "grad_norm": 0.37958917021751404, "learning_rate": 0.00017274318579644913, "loss": 0.5814, "step": 2730 }, { "epoch": 0.2024912879068733, "grad_norm": 0.36879462003707886, "learning_rate": 0.00017273318329582397, "loss": 0.5028, "step": 2731 }, { "epoch": 0.20256543338029212, "grad_norm": 0.3640316426753998, "learning_rate": 0.0001727231807951988, "loss": 0.535, "step": 2732 }, { "epoch": 0.20263957885371098, "grad_norm": 0.3676459789276123, "learning_rate": 0.00017271317829457364, "loss": 0.5316, "step": 2733 }, { "epoch": 0.20271372432712984, "grad_norm": 0.36586514115333557, "learning_rate": 0.0001727031757939485, "loss": 0.5225, "step": 2734 }, { "epoch": 0.20278786980054866, "grad_norm": 0.3794933557510376, "learning_rate": 0.00017269317329332332, "loss": 0.545, "step": 2735 }, { "epoch": 0.20286201527396752, "grad_norm": 0.407819002866745, "learning_rate": 0.00017268317079269818, "loss": 0.5681, "step": 2736 }, { "epoch": 0.20293616074738638, "grad_norm": 0.4046270549297333, "learning_rate": 0.00017267316829207302, "loss": 0.5656, "step": 2737 }, { "epoch": 0.2030103062208052, "grad_norm": 0.36339443922042847, "learning_rate": 0.00017266316579144788, "loss": 0.4953, "step": 2738 }, { "epoch": 0.20308445169422407, "grad_norm": 0.36730098724365234, "learning_rate": 0.00017265316329082272, "loss": 0.563, "step": 2739 }, { "epoch": 0.20315859716764292, "grad_norm": 0.3997146189212799, "learning_rate": 0.00017264316079019756, "loss": 0.5517, "step": 2740 }, { "epoch": 0.20323274264106175, "grad_norm": 0.37046414613723755, "learning_rate": 0.0001726331582895724, "loss": 0.5248, "step": 2741 }, { "epoch": 0.2033068881144806, "grad_norm": 0.3620127737522125, "learning_rate": 0.00017262315578894726, "loss": 0.5095, "step": 2742 }, { "epoch": 0.20338103358789947, "grad_norm": 0.3704400658607483, "learning_rate": 0.0001726131532883221, "loss": 0.5664, "step": 2743 }, { "epoch": 0.2034551790613183, "grad_norm": 0.3654153048992157, "learning_rate": 0.00017260315078769693, "loss": 0.546, "step": 2744 }, { "epoch": 0.20352932453473715, "grad_norm": 0.3813161253929138, "learning_rate": 0.00017259314828707177, "loss": 0.5398, "step": 2745 }, { "epoch": 0.203603470008156, "grad_norm": 0.37346315383911133, "learning_rate": 0.0001725831457864466, "loss": 0.5377, "step": 2746 }, { "epoch": 0.20367761548157484, "grad_norm": 0.3449152410030365, "learning_rate": 0.00017257314328582147, "loss": 0.5228, "step": 2747 }, { "epoch": 0.2037517609549937, "grad_norm": 0.35343822836875916, "learning_rate": 0.0001725631407851963, "loss": 0.5286, "step": 2748 }, { "epoch": 0.20382590642841256, "grad_norm": 0.3611797094345093, "learning_rate": 0.00017255313828457117, "loss": 0.5649, "step": 2749 }, { "epoch": 0.2039000519018314, "grad_norm": 0.3601290285587311, "learning_rate": 0.00017254313578394598, "loss": 0.5339, "step": 2750 }, { "epoch": 0.20397419737525024, "grad_norm": 0.36074572801589966, "learning_rate": 0.00017253313328332085, "loss": 0.5457, "step": 2751 }, { "epoch": 0.2040483428486691, "grad_norm": 0.3674439787864685, "learning_rate": 0.00017252313078269568, "loss": 0.5452, "step": 2752 }, { "epoch": 0.20412248832208793, "grad_norm": 0.3426230251789093, "learning_rate": 0.00017251312828207055, "loss": 0.5067, "step": 2753 }, { "epoch": 0.2041966337955068, "grad_norm": 0.37352949380874634, "learning_rate": 0.00017250312578144536, "loss": 0.5644, "step": 2754 }, { "epoch": 0.20427077926892565, "grad_norm": 0.3831371068954468, "learning_rate": 0.00017249312328082022, "loss": 0.5317, "step": 2755 }, { "epoch": 0.20434492474234447, "grad_norm": 0.41011103987693787, "learning_rate": 0.00017248312078019506, "loss": 0.527, "step": 2756 }, { "epoch": 0.20441907021576333, "grad_norm": 0.3528183698654175, "learning_rate": 0.0001724731182795699, "loss": 0.549, "step": 2757 }, { "epoch": 0.20449321568918216, "grad_norm": 0.3734571635723114, "learning_rate": 0.00017246311577894473, "loss": 0.5076, "step": 2758 }, { "epoch": 0.20456736116260102, "grad_norm": 0.35605698823928833, "learning_rate": 0.0001724531132783196, "loss": 0.5005, "step": 2759 }, { "epoch": 0.20464150663601988, "grad_norm": 0.3858832120895386, "learning_rate": 0.00017244311077769443, "loss": 0.5642, "step": 2760 }, { "epoch": 0.2047156521094387, "grad_norm": 0.36860141158103943, "learning_rate": 0.00017243310827706927, "loss": 0.5105, "step": 2761 }, { "epoch": 0.20478979758285756, "grad_norm": 0.37785351276397705, "learning_rate": 0.0001724231057764441, "loss": 0.5287, "step": 2762 }, { "epoch": 0.20486394305627642, "grad_norm": 0.3509896993637085, "learning_rate": 0.00017241310327581897, "loss": 0.5275, "step": 2763 }, { "epoch": 0.20493808852969525, "grad_norm": 0.36568012833595276, "learning_rate": 0.0001724031007751938, "loss": 0.5451, "step": 2764 }, { "epoch": 0.2050122340031141, "grad_norm": 0.3662180006504059, "learning_rate": 0.00017239309827456865, "loss": 0.5486, "step": 2765 }, { "epoch": 0.20508637947653297, "grad_norm": 0.37588024139404297, "learning_rate": 0.00017238309577394348, "loss": 0.5485, "step": 2766 }, { "epoch": 0.2051605249499518, "grad_norm": 0.38909846544265747, "learning_rate": 0.00017237309327331835, "loss": 0.5782, "step": 2767 }, { "epoch": 0.20523467042337065, "grad_norm": 0.3737635314464569, "learning_rate": 0.00017236309077269318, "loss": 0.5623, "step": 2768 }, { "epoch": 0.2053088158967895, "grad_norm": 0.34876856207847595, "learning_rate": 0.00017235308827206802, "loss": 0.5172, "step": 2769 }, { "epoch": 0.20538296137020834, "grad_norm": 0.3520967960357666, "learning_rate": 0.00017234308577144286, "loss": 0.4859, "step": 2770 }, { "epoch": 0.2054571068436272, "grad_norm": 0.3663542568683624, "learning_rate": 0.00017233308327081772, "loss": 0.5408, "step": 2771 }, { "epoch": 0.20553125231704605, "grad_norm": 0.391356498003006, "learning_rate": 0.00017232308077019256, "loss": 0.5871, "step": 2772 }, { "epoch": 0.20560539779046488, "grad_norm": 0.3589385449886322, "learning_rate": 0.0001723130782695674, "loss": 0.5565, "step": 2773 }, { "epoch": 0.20567954326388374, "grad_norm": 0.3675306737422943, "learning_rate": 0.00017230307576894223, "loss": 0.5941, "step": 2774 }, { "epoch": 0.2057536887373026, "grad_norm": 0.3551604747772217, "learning_rate": 0.0001722930732683171, "loss": 0.5162, "step": 2775 }, { "epoch": 0.20582783421072143, "grad_norm": 0.3836532533168793, "learning_rate": 0.00017228307076769194, "loss": 0.5527, "step": 2776 }, { "epoch": 0.20590197968414028, "grad_norm": 0.3691129982471466, "learning_rate": 0.00017227306826706677, "loss": 0.5113, "step": 2777 }, { "epoch": 0.20597612515755914, "grad_norm": 0.3658298850059509, "learning_rate": 0.0001722630657664416, "loss": 0.5305, "step": 2778 }, { "epoch": 0.20605027063097797, "grad_norm": 0.33437222242355347, "learning_rate": 0.00017225306326581647, "loss": 0.5151, "step": 2779 }, { "epoch": 0.20612441610439683, "grad_norm": 0.3569604754447937, "learning_rate": 0.0001722430607651913, "loss": 0.5392, "step": 2780 }, { "epoch": 0.2061985615778157, "grad_norm": 0.3679859936237335, "learning_rate": 0.00017223305826456615, "loss": 0.5635, "step": 2781 }, { "epoch": 0.20627270705123452, "grad_norm": 0.3554103672504425, "learning_rate": 0.000172223055763941, "loss": 0.5322, "step": 2782 }, { "epoch": 0.20634685252465337, "grad_norm": 0.3623935282230377, "learning_rate": 0.00017221305326331582, "loss": 0.5048, "step": 2783 }, { "epoch": 0.20642099799807223, "grad_norm": 0.3700580298900604, "learning_rate": 0.00017220305076269069, "loss": 0.494, "step": 2784 }, { "epoch": 0.20649514347149106, "grad_norm": 0.34708184003829956, "learning_rate": 0.00017219304826206552, "loss": 0.5154, "step": 2785 }, { "epoch": 0.20656928894490992, "grad_norm": 0.3524700105190277, "learning_rate": 0.0001721830457614404, "loss": 0.4965, "step": 2786 }, { "epoch": 0.20664343441832875, "grad_norm": 0.3867945075035095, "learning_rate": 0.0001721730432608152, "loss": 0.5407, "step": 2787 }, { "epoch": 0.2067175798917476, "grad_norm": 0.362530380487442, "learning_rate": 0.00017216304076019006, "loss": 0.5073, "step": 2788 }, { "epoch": 0.20679172536516646, "grad_norm": 0.3685743510723114, "learning_rate": 0.0001721530382595649, "loss": 0.5475, "step": 2789 }, { "epoch": 0.2068658708385853, "grad_norm": 0.3709597885608673, "learning_rate": 0.00017214303575893976, "loss": 0.5844, "step": 2790 }, { "epoch": 0.20694001631200415, "grad_norm": 0.37296950817108154, "learning_rate": 0.00017213303325831457, "loss": 0.5762, "step": 2791 }, { "epoch": 0.207014161785423, "grad_norm": 0.3502786457538605, "learning_rate": 0.00017212303075768944, "loss": 0.5431, "step": 2792 }, { "epoch": 0.20708830725884184, "grad_norm": 0.3700212240219116, "learning_rate": 0.00017211302825706427, "loss": 0.5598, "step": 2793 }, { "epoch": 0.2071624527322607, "grad_norm": 0.37088894844055176, "learning_rate": 0.00017210302575643914, "loss": 0.5718, "step": 2794 }, { "epoch": 0.20723659820567955, "grad_norm": 0.36430624127388, "learning_rate": 0.00017209302325581395, "loss": 0.5203, "step": 2795 }, { "epoch": 0.20731074367909838, "grad_norm": 0.3508058488368988, "learning_rate": 0.0001720830207551888, "loss": 0.5126, "step": 2796 }, { "epoch": 0.20738488915251724, "grad_norm": 0.3544170558452606, "learning_rate": 0.00017207301825456365, "loss": 0.5348, "step": 2797 }, { "epoch": 0.2074590346259361, "grad_norm": 0.3587694466114044, "learning_rate": 0.0001720630157539385, "loss": 0.5346, "step": 2798 }, { "epoch": 0.20753318009935492, "grad_norm": 0.36898109316825867, "learning_rate": 0.00017205301325331332, "loss": 0.5401, "step": 2799 }, { "epoch": 0.20760732557277378, "grad_norm": 0.3788089156150818, "learning_rate": 0.0001720430107526882, "loss": 0.5491, "step": 2800 }, { "epoch": 0.20768147104619264, "grad_norm": 0.408095121383667, "learning_rate": 0.00017203300825206303, "loss": 0.5311, "step": 2801 }, { "epoch": 0.20775561651961147, "grad_norm": 0.382901668548584, "learning_rate": 0.00017202300575143786, "loss": 0.5464, "step": 2802 }, { "epoch": 0.20782976199303033, "grad_norm": 0.38483989238739014, "learning_rate": 0.0001720130032508127, "loss": 0.5572, "step": 2803 }, { "epoch": 0.20790390746644918, "grad_norm": 0.36150240898132324, "learning_rate": 0.00017200300075018756, "loss": 0.5088, "step": 2804 }, { "epoch": 0.207978052939868, "grad_norm": 0.3557947874069214, "learning_rate": 0.0001719929982495624, "loss": 0.502, "step": 2805 }, { "epoch": 0.20805219841328687, "grad_norm": 0.3664115071296692, "learning_rate": 0.00017198299574893724, "loss": 0.5113, "step": 2806 }, { "epoch": 0.20812634388670573, "grad_norm": 0.4165622889995575, "learning_rate": 0.00017197299324831207, "loss": 0.5437, "step": 2807 }, { "epoch": 0.20820048936012456, "grad_norm": 0.4017401933670044, "learning_rate": 0.00017196299074768694, "loss": 0.5871, "step": 2808 }, { "epoch": 0.20827463483354342, "grad_norm": 0.3642117381095886, "learning_rate": 0.00017195298824706178, "loss": 0.5365, "step": 2809 }, { "epoch": 0.20834878030696227, "grad_norm": 0.3892356753349304, "learning_rate": 0.0001719429857464366, "loss": 0.581, "step": 2810 }, { "epoch": 0.2084229257803811, "grad_norm": 0.38817235827445984, "learning_rate": 0.00017193298324581145, "loss": 0.5261, "step": 2811 }, { "epoch": 0.20849707125379996, "grad_norm": 0.3697802722454071, "learning_rate": 0.00017192298074518631, "loss": 0.5147, "step": 2812 }, { "epoch": 0.20857121672721882, "grad_norm": 0.41075462102890015, "learning_rate": 0.00017191297824456115, "loss": 0.5214, "step": 2813 }, { "epoch": 0.20864536220063765, "grad_norm": 0.37026992440223694, "learning_rate": 0.000171902975743936, "loss": 0.5448, "step": 2814 }, { "epoch": 0.2087195076740565, "grad_norm": 0.40034180879592896, "learning_rate": 0.00017189297324331085, "loss": 0.5521, "step": 2815 }, { "epoch": 0.20879365314747533, "grad_norm": 0.39864179491996765, "learning_rate": 0.0001718829707426857, "loss": 0.564, "step": 2816 }, { "epoch": 0.2088677986208942, "grad_norm": 0.35262346267700195, "learning_rate": 0.00017187296824206053, "loss": 0.5033, "step": 2817 }, { "epoch": 0.20894194409431305, "grad_norm": 0.36385276913642883, "learning_rate": 0.00017186296574143536, "loss": 0.5423, "step": 2818 }, { "epoch": 0.20901608956773188, "grad_norm": 0.39901646971702576, "learning_rate": 0.00017185296324081023, "loss": 0.5523, "step": 2819 }, { "epoch": 0.20909023504115073, "grad_norm": 0.39998072385787964, "learning_rate": 0.00017184296074018504, "loss": 0.5427, "step": 2820 }, { "epoch": 0.2091643805145696, "grad_norm": 0.38153183460235596, "learning_rate": 0.0001718329582395599, "loss": 0.5616, "step": 2821 }, { "epoch": 0.20923852598798842, "grad_norm": 0.3824169933795929, "learning_rate": 0.00017182295573893474, "loss": 0.5438, "step": 2822 }, { "epoch": 0.20931267146140728, "grad_norm": 0.35975903272628784, "learning_rate": 0.0001718129532383096, "loss": 0.5296, "step": 2823 }, { "epoch": 0.20938681693482614, "grad_norm": 0.37178659439086914, "learning_rate": 0.00017180295073768441, "loss": 0.5536, "step": 2824 }, { "epoch": 0.20946096240824497, "grad_norm": 0.3436260521411896, "learning_rate": 0.00017179294823705928, "loss": 0.5313, "step": 2825 }, { "epoch": 0.20953510788166382, "grad_norm": 0.3850666582584381, "learning_rate": 0.00017178294573643412, "loss": 0.5162, "step": 2826 }, { "epoch": 0.20960925335508268, "grad_norm": 0.37225449085235596, "learning_rate": 0.00017177294323580898, "loss": 0.5249, "step": 2827 }, { "epoch": 0.2096833988285015, "grad_norm": 0.3774525821208954, "learning_rate": 0.0001717629407351838, "loss": 0.5541, "step": 2828 }, { "epoch": 0.20975754430192037, "grad_norm": 0.3765939772129059, "learning_rate": 0.00017175293823455865, "loss": 0.6055, "step": 2829 }, { "epoch": 0.20983168977533923, "grad_norm": 0.40228286385536194, "learning_rate": 0.0001717429357339335, "loss": 0.516, "step": 2830 }, { "epoch": 0.20990583524875805, "grad_norm": 0.36946168541908264, "learning_rate": 0.00017173293323330835, "loss": 0.5304, "step": 2831 }, { "epoch": 0.2099799807221769, "grad_norm": 0.388671875, "learning_rate": 0.00017172293073268316, "loss": 0.5556, "step": 2832 }, { "epoch": 0.21005412619559577, "grad_norm": 0.36153364181518555, "learning_rate": 0.00017171292823205803, "loss": 0.5489, "step": 2833 }, { "epoch": 0.2101282716690146, "grad_norm": 0.36718422174453735, "learning_rate": 0.00017170292573143287, "loss": 0.5246, "step": 2834 }, { "epoch": 0.21020241714243346, "grad_norm": 0.36496102809906006, "learning_rate": 0.0001716929232308077, "loss": 0.564, "step": 2835 }, { "epoch": 0.2102765626158523, "grad_norm": 0.37000447511672974, "learning_rate": 0.00017168292073018254, "loss": 0.5746, "step": 2836 }, { "epoch": 0.21035070808927114, "grad_norm": 0.45153820514678955, "learning_rate": 0.0001716729182295574, "loss": 0.6081, "step": 2837 }, { "epoch": 0.21042485356269, "grad_norm": 0.34394070506095886, "learning_rate": 0.00017166291572893224, "loss": 0.5317, "step": 2838 }, { "epoch": 0.21049899903610886, "grad_norm": 0.35344403982162476, "learning_rate": 0.00017165291322830708, "loss": 0.5581, "step": 2839 }, { "epoch": 0.2105731445095277, "grad_norm": 0.3692741394042969, "learning_rate": 0.00017164291072768192, "loss": 0.5199, "step": 2840 }, { "epoch": 0.21064728998294655, "grad_norm": 0.35518980026245117, "learning_rate": 0.00017163290822705678, "loss": 0.5395, "step": 2841 }, { "epoch": 0.2107214354563654, "grad_norm": 0.40170371532440186, "learning_rate": 0.00017162290572643162, "loss": 0.5895, "step": 2842 }, { "epoch": 0.21079558092978423, "grad_norm": 0.36165767908096313, "learning_rate": 0.00017161290322580645, "loss": 0.5568, "step": 2843 }, { "epoch": 0.2108697264032031, "grad_norm": 0.3327789902687073, "learning_rate": 0.0001716029007251813, "loss": 0.4947, "step": 2844 }, { "epoch": 0.21094387187662192, "grad_norm": 0.3664519786834717, "learning_rate": 0.00017159289822455616, "loss": 0.5528, "step": 2845 }, { "epoch": 0.21101801735004078, "grad_norm": 0.36445003747940063, "learning_rate": 0.000171582895723931, "loss": 0.5439, "step": 2846 }, { "epoch": 0.21109216282345963, "grad_norm": 0.37415388226509094, "learning_rate": 0.00017157289322330583, "loss": 0.5296, "step": 2847 }, { "epoch": 0.21116630829687846, "grad_norm": 0.36910393834114075, "learning_rate": 0.0001715628907226807, "loss": 0.5428, "step": 2848 }, { "epoch": 0.21124045377029732, "grad_norm": 0.36170637607574463, "learning_rate": 0.00017155288822205553, "loss": 0.5584, "step": 2849 }, { "epoch": 0.21131459924371618, "grad_norm": 0.35255077481269836, "learning_rate": 0.00017154288572143037, "loss": 0.5513, "step": 2850 }, { "epoch": 0.211388744717135, "grad_norm": 0.36895185708999634, "learning_rate": 0.0001715328832208052, "loss": 0.5442, "step": 2851 }, { "epoch": 0.21146289019055386, "grad_norm": 0.3587835133075714, "learning_rate": 0.00017152288072018007, "loss": 0.5141, "step": 2852 }, { "epoch": 0.21153703566397272, "grad_norm": 0.36098551750183105, "learning_rate": 0.0001715128782195549, "loss": 0.5719, "step": 2853 }, { "epoch": 0.21161118113739155, "grad_norm": 0.39052826166152954, "learning_rate": 0.00017150287571892974, "loss": 0.5847, "step": 2854 }, { "epoch": 0.2116853266108104, "grad_norm": 0.3597087860107422, "learning_rate": 0.00017149287321830458, "loss": 0.5549, "step": 2855 }, { "epoch": 0.21175947208422927, "grad_norm": 0.34899598360061646, "learning_rate": 0.00017148287071767944, "loss": 0.5233, "step": 2856 }, { "epoch": 0.2118336175576481, "grad_norm": 0.3763315677642822, "learning_rate": 0.00017147286821705425, "loss": 0.5763, "step": 2857 }, { "epoch": 0.21190776303106695, "grad_norm": 0.3728256821632385, "learning_rate": 0.00017146286571642912, "loss": 0.5539, "step": 2858 }, { "epoch": 0.2119819085044858, "grad_norm": 0.34805765748023987, "learning_rate": 0.00017145286321580396, "loss": 0.5243, "step": 2859 }, { "epoch": 0.21205605397790464, "grad_norm": 0.3706824481487274, "learning_rate": 0.00017144286071517882, "loss": 0.5293, "step": 2860 }, { "epoch": 0.2121301994513235, "grad_norm": 0.37931668758392334, "learning_rate": 0.00017143285821455363, "loss": 0.5731, "step": 2861 }, { "epoch": 0.21220434492474236, "grad_norm": 0.3534395396709442, "learning_rate": 0.0001714228557139285, "loss": 0.5416, "step": 2862 }, { "epoch": 0.21227849039816118, "grad_norm": 0.368834525346756, "learning_rate": 0.00017141285321330333, "loss": 0.5309, "step": 2863 }, { "epoch": 0.21235263587158004, "grad_norm": 0.3758552074432373, "learning_rate": 0.0001714028507126782, "loss": 0.5137, "step": 2864 }, { "epoch": 0.2124267813449989, "grad_norm": 0.3588717579841614, "learning_rate": 0.000171392848212053, "loss": 0.5454, "step": 2865 }, { "epoch": 0.21250092681841773, "grad_norm": 0.4527890384197235, "learning_rate": 0.00017138284571142787, "loss": 0.5922, "step": 2866 }, { "epoch": 0.2125750722918366, "grad_norm": 0.36484867334365845, "learning_rate": 0.0001713728432108027, "loss": 0.5514, "step": 2867 }, { "epoch": 0.21264921776525544, "grad_norm": 0.37331703305244446, "learning_rate": 0.00017136284071017757, "loss": 0.5528, "step": 2868 }, { "epoch": 0.21272336323867427, "grad_norm": 0.40462443232536316, "learning_rate": 0.00017135283820955238, "loss": 0.5396, "step": 2869 }, { "epoch": 0.21279750871209313, "grad_norm": 0.35170918703079224, "learning_rate": 0.00017134283570892725, "loss": 0.5043, "step": 2870 }, { "epoch": 0.212871654185512, "grad_norm": 0.33885514736175537, "learning_rate": 0.00017133283320830208, "loss": 0.5305, "step": 2871 }, { "epoch": 0.21294579965893082, "grad_norm": 0.3695027530193329, "learning_rate": 0.00017132283070767692, "loss": 0.496, "step": 2872 }, { "epoch": 0.21301994513234968, "grad_norm": 0.36338916420936584, "learning_rate": 0.00017131282820705176, "loss": 0.5229, "step": 2873 }, { "epoch": 0.2130940906057685, "grad_norm": 0.36866989731788635, "learning_rate": 0.00017130282570642662, "loss": 0.5331, "step": 2874 }, { "epoch": 0.21316823607918736, "grad_norm": 0.3686973750591278, "learning_rate": 0.00017129282320580146, "loss": 0.5461, "step": 2875 }, { "epoch": 0.21324238155260622, "grad_norm": 0.37565138936042786, "learning_rate": 0.0001712828207051763, "loss": 0.5685, "step": 2876 }, { "epoch": 0.21331652702602505, "grad_norm": 0.3548624813556671, "learning_rate": 0.00017127281820455113, "loss": 0.5372, "step": 2877 }, { "epoch": 0.2133906724994439, "grad_norm": 0.3637142479419708, "learning_rate": 0.000171262815703926, "loss": 0.5176, "step": 2878 }, { "epoch": 0.21346481797286276, "grad_norm": 0.36585065722465515, "learning_rate": 0.00017125281320330083, "loss": 0.5528, "step": 2879 }, { "epoch": 0.2135389634462816, "grad_norm": 0.3560551404953003, "learning_rate": 0.00017124281070267567, "loss": 0.5295, "step": 2880 }, { "epoch": 0.21361310891970045, "grad_norm": 0.37937554717063904, "learning_rate": 0.0001712328082020505, "loss": 0.6197, "step": 2881 }, { "epoch": 0.2136872543931193, "grad_norm": 0.3617582321166992, "learning_rate": 0.00017122280570142537, "loss": 0.515, "step": 2882 }, { "epoch": 0.21376139986653814, "grad_norm": 0.38558530807495117, "learning_rate": 0.0001712128032008002, "loss": 0.5978, "step": 2883 }, { "epoch": 0.213835545339957, "grad_norm": 0.36257246136665344, "learning_rate": 0.00017120280070017505, "loss": 0.5185, "step": 2884 }, { "epoch": 0.21390969081337585, "grad_norm": 0.3557923138141632, "learning_rate": 0.0001711927981995499, "loss": 0.5139, "step": 2885 }, { "epoch": 0.21398383628679468, "grad_norm": 0.372611403465271, "learning_rate": 0.00017118279569892475, "loss": 0.5233, "step": 2886 }, { "epoch": 0.21405798176021354, "grad_norm": 0.3499337434768677, "learning_rate": 0.00017117279319829958, "loss": 0.5169, "step": 2887 }, { "epoch": 0.2141321272336324, "grad_norm": 0.3674405515193939, "learning_rate": 0.00017116279069767442, "loss": 0.5253, "step": 2888 }, { "epoch": 0.21420627270705123, "grad_norm": 0.33784034848213196, "learning_rate": 0.00017115278819704929, "loss": 0.5415, "step": 2889 }, { "epoch": 0.21428041818047008, "grad_norm": 0.3635071814060211, "learning_rate": 0.00017114278569642412, "loss": 0.5581, "step": 2890 }, { "epoch": 0.21435456365388894, "grad_norm": 0.3645602762699127, "learning_rate": 0.00017113278319579896, "loss": 0.492, "step": 2891 }, { "epoch": 0.21442870912730777, "grad_norm": 0.35554152727127075, "learning_rate": 0.0001711227806951738, "loss": 0.4945, "step": 2892 }, { "epoch": 0.21450285460072663, "grad_norm": 0.36659541726112366, "learning_rate": 0.00017111277819454866, "loss": 0.5205, "step": 2893 }, { "epoch": 0.21457700007414549, "grad_norm": 0.35838598012924194, "learning_rate": 0.00017110277569392347, "loss": 0.5236, "step": 2894 }, { "epoch": 0.21465114554756431, "grad_norm": 0.3563297688961029, "learning_rate": 0.00017109277319329834, "loss": 0.5352, "step": 2895 }, { "epoch": 0.21472529102098317, "grad_norm": 0.3597363829612732, "learning_rate": 0.00017108277069267317, "loss": 0.5427, "step": 2896 }, { "epoch": 0.21479943649440203, "grad_norm": 0.37233367562294006, "learning_rate": 0.00017107276819204804, "loss": 0.5648, "step": 2897 }, { "epoch": 0.21487358196782086, "grad_norm": 0.3749842345714569, "learning_rate": 0.00017106276569142285, "loss": 0.5418, "step": 2898 }, { "epoch": 0.21494772744123972, "grad_norm": 0.36467573046684265, "learning_rate": 0.0001710527631907977, "loss": 0.5287, "step": 2899 }, { "epoch": 0.21502187291465857, "grad_norm": 0.3610907793045044, "learning_rate": 0.00017104276069017255, "loss": 0.5541, "step": 2900 }, { "epoch": 0.2150960183880774, "grad_norm": 0.38704636693000793, "learning_rate": 0.0001710327581895474, "loss": 0.606, "step": 2901 }, { "epoch": 0.21517016386149626, "grad_norm": 0.3454762101173401, "learning_rate": 0.00017102275568892222, "loss": 0.5573, "step": 2902 }, { "epoch": 0.2152443093349151, "grad_norm": 0.3836224675178528, "learning_rate": 0.00017101275318829709, "loss": 0.5922, "step": 2903 }, { "epoch": 0.21531845480833395, "grad_norm": 0.35970091819763184, "learning_rate": 0.00017100275068767192, "loss": 0.5113, "step": 2904 }, { "epoch": 0.2153926002817528, "grad_norm": 0.3846072852611542, "learning_rate": 0.0001709927481870468, "loss": 0.5645, "step": 2905 }, { "epoch": 0.21546674575517163, "grad_norm": 0.3430723249912262, "learning_rate": 0.0001709827456864216, "loss": 0.4985, "step": 2906 }, { "epoch": 0.2155408912285905, "grad_norm": 0.35706961154937744, "learning_rate": 0.00017097274318579646, "loss": 0.5264, "step": 2907 }, { "epoch": 0.21561503670200935, "grad_norm": 0.3641817271709442, "learning_rate": 0.0001709627406851713, "loss": 0.5574, "step": 2908 }, { "epoch": 0.21568918217542818, "grad_norm": 0.34732216596603394, "learning_rate": 0.00017095273818454614, "loss": 0.5566, "step": 2909 }, { "epoch": 0.21576332764884704, "grad_norm": 0.35962551832199097, "learning_rate": 0.00017094273568392097, "loss": 0.5636, "step": 2910 }, { "epoch": 0.2158374731222659, "grad_norm": 0.35720834136009216, "learning_rate": 0.00017093273318329584, "loss": 0.5255, "step": 2911 }, { "epoch": 0.21591161859568472, "grad_norm": 0.3921355903148651, "learning_rate": 0.00017092273068267067, "loss": 0.6292, "step": 2912 }, { "epoch": 0.21598576406910358, "grad_norm": 0.36105430126190186, "learning_rate": 0.0001709127281820455, "loss": 0.5292, "step": 2913 }, { "epoch": 0.21605990954252244, "grad_norm": 0.35330888628959656, "learning_rate": 0.00017090272568142035, "loss": 0.5019, "step": 2914 }, { "epoch": 0.21613405501594127, "grad_norm": 0.35514843463897705, "learning_rate": 0.0001708927231807952, "loss": 0.5097, "step": 2915 }, { "epoch": 0.21620820048936013, "grad_norm": 0.38511306047439575, "learning_rate": 0.00017088272068017005, "loss": 0.5383, "step": 2916 }, { "epoch": 0.21628234596277898, "grad_norm": 0.37869879603385925, "learning_rate": 0.0001708727181795449, "loss": 0.4782, "step": 2917 }, { "epoch": 0.2163564914361978, "grad_norm": 0.3528784513473511, "learning_rate": 0.00017086271567891975, "loss": 0.5433, "step": 2918 }, { "epoch": 0.21643063690961667, "grad_norm": 0.3633810579776764, "learning_rate": 0.0001708527131782946, "loss": 0.493, "step": 2919 }, { "epoch": 0.21650478238303553, "grad_norm": 0.3702612817287445, "learning_rate": 0.00017084271067766942, "loss": 0.5138, "step": 2920 }, { "epoch": 0.21657892785645436, "grad_norm": 0.37703487277030945, "learning_rate": 0.00017083270817704426, "loss": 0.5576, "step": 2921 }, { "epoch": 0.2166530733298732, "grad_norm": 0.3777710199356079, "learning_rate": 0.00017082270567641913, "loss": 0.5232, "step": 2922 }, { "epoch": 0.21672721880329207, "grad_norm": 0.3902844190597534, "learning_rate": 0.00017081270317579396, "loss": 0.5566, "step": 2923 }, { "epoch": 0.2168013642767109, "grad_norm": 0.36900386214256287, "learning_rate": 0.0001708027006751688, "loss": 0.5221, "step": 2924 }, { "epoch": 0.21687550975012976, "grad_norm": 0.35253992676734924, "learning_rate": 0.00017079269817454364, "loss": 0.5044, "step": 2925 }, { "epoch": 0.21694965522354862, "grad_norm": 0.38322770595550537, "learning_rate": 0.0001707826956739185, "loss": 0.5343, "step": 2926 }, { "epoch": 0.21702380069696744, "grad_norm": 0.3563730716705322, "learning_rate": 0.00017077269317329334, "loss": 0.5448, "step": 2927 }, { "epoch": 0.2170979461703863, "grad_norm": 0.3768388330936432, "learning_rate": 0.00017076269067266818, "loss": 0.5533, "step": 2928 }, { "epoch": 0.21717209164380513, "grad_norm": 0.3598300814628601, "learning_rate": 0.000170752688172043, "loss": 0.5308, "step": 2929 }, { "epoch": 0.217246237117224, "grad_norm": 0.37006106972694397, "learning_rate": 0.00017074268567141788, "loss": 0.5399, "step": 2930 }, { "epoch": 0.21732038259064285, "grad_norm": 0.3649306893348694, "learning_rate": 0.0001707326831707927, "loss": 0.558, "step": 2931 }, { "epoch": 0.21739452806406168, "grad_norm": 0.3597034513950348, "learning_rate": 0.00017072268067016755, "loss": 0.5638, "step": 2932 }, { "epoch": 0.21746867353748053, "grad_norm": 0.3461408019065857, "learning_rate": 0.0001707126781695424, "loss": 0.5392, "step": 2933 }, { "epoch": 0.2175428190108994, "grad_norm": 0.3772400915622711, "learning_rate": 0.00017070267566891725, "loss": 0.5694, "step": 2934 }, { "epoch": 0.21761696448431822, "grad_norm": 0.3764164447784424, "learning_rate": 0.00017069267316829206, "loss": 0.5514, "step": 2935 }, { "epoch": 0.21769110995773708, "grad_norm": 0.3556857705116272, "learning_rate": 0.00017068267066766693, "loss": 0.5428, "step": 2936 }, { "epoch": 0.21776525543115594, "grad_norm": 0.3693090081214905, "learning_rate": 0.00017067266816704176, "loss": 0.5033, "step": 2937 }, { "epoch": 0.21783940090457476, "grad_norm": 0.3559574484825134, "learning_rate": 0.00017066266566641663, "loss": 0.5175, "step": 2938 }, { "epoch": 0.21791354637799362, "grad_norm": 0.34635651111602783, "learning_rate": 0.00017065266316579144, "loss": 0.51, "step": 2939 }, { "epoch": 0.21798769185141248, "grad_norm": 0.38837718963623047, "learning_rate": 0.0001706426606651663, "loss": 0.6089, "step": 2940 }, { "epoch": 0.2180618373248313, "grad_norm": 0.379401296377182, "learning_rate": 0.00017063265816454114, "loss": 0.5323, "step": 2941 }, { "epoch": 0.21813598279825017, "grad_norm": 0.3513876795768738, "learning_rate": 0.000170622655663916, "loss": 0.519, "step": 2942 }, { "epoch": 0.21821012827166902, "grad_norm": 0.39852407574653625, "learning_rate": 0.00017061265316329081, "loss": 0.5543, "step": 2943 }, { "epoch": 0.21828427374508785, "grad_norm": 0.3672902286052704, "learning_rate": 0.00017060265066266568, "loss": 0.5382, "step": 2944 }, { "epoch": 0.2183584192185067, "grad_norm": 0.3640556335449219, "learning_rate": 0.00017059264816204051, "loss": 0.537, "step": 2945 }, { "epoch": 0.21843256469192557, "grad_norm": 0.35401836037635803, "learning_rate": 0.00017058264566141535, "loss": 0.5168, "step": 2946 }, { "epoch": 0.2185067101653444, "grad_norm": 0.3875291645526886, "learning_rate": 0.0001705726431607902, "loss": 0.5168, "step": 2947 }, { "epoch": 0.21858085563876326, "grad_norm": 0.38534262776374817, "learning_rate": 0.00017056264066016505, "loss": 0.5726, "step": 2948 }, { "epoch": 0.2186550011121821, "grad_norm": 0.3831932246685028, "learning_rate": 0.0001705526381595399, "loss": 0.516, "step": 2949 }, { "epoch": 0.21872914658560094, "grad_norm": 0.3240167796611786, "learning_rate": 0.00017054263565891473, "loss": 0.4773, "step": 2950 }, { "epoch": 0.2188032920590198, "grad_norm": 0.39205506443977356, "learning_rate": 0.0001705326331582896, "loss": 0.5218, "step": 2951 }, { "epoch": 0.21887743753243866, "grad_norm": 0.368210107088089, "learning_rate": 0.00017052263065766443, "loss": 0.5482, "step": 2952 }, { "epoch": 0.2189515830058575, "grad_norm": 0.3666994869709015, "learning_rate": 0.00017051262815703927, "loss": 0.5237, "step": 2953 }, { "epoch": 0.21902572847927634, "grad_norm": 0.35253044962882996, "learning_rate": 0.0001705026256564141, "loss": 0.5194, "step": 2954 }, { "epoch": 0.2190998739526952, "grad_norm": 0.37504440546035767, "learning_rate": 0.00017049262315578897, "loss": 0.5583, "step": 2955 }, { "epoch": 0.21917401942611403, "grad_norm": 0.3685874342918396, "learning_rate": 0.0001704826206551638, "loss": 0.4992, "step": 2956 }, { "epoch": 0.2192481648995329, "grad_norm": 0.3813149929046631, "learning_rate": 0.00017047261815453864, "loss": 0.528, "step": 2957 }, { "epoch": 0.21932231037295172, "grad_norm": 0.3633582890033722, "learning_rate": 0.00017046261565391348, "loss": 0.5238, "step": 2958 }, { "epoch": 0.21939645584637057, "grad_norm": 0.403937429189682, "learning_rate": 0.00017045261315328834, "loss": 0.6199, "step": 2959 }, { "epoch": 0.21947060131978943, "grad_norm": 0.36983487010002136, "learning_rate": 0.00017044261065266318, "loss": 0.5353, "step": 2960 }, { "epoch": 0.21954474679320826, "grad_norm": 0.3431692123413086, "learning_rate": 0.00017043260815203802, "loss": 0.5275, "step": 2961 }, { "epoch": 0.21961889226662712, "grad_norm": 0.3580249547958374, "learning_rate": 0.00017042260565141285, "loss": 0.5219, "step": 2962 }, { "epoch": 0.21969303774004598, "grad_norm": 0.3492765426635742, "learning_rate": 0.00017041260315078772, "loss": 0.5458, "step": 2963 }, { "epoch": 0.2197671832134648, "grad_norm": 0.37430712580680847, "learning_rate": 0.00017040260065016256, "loss": 0.5278, "step": 2964 }, { "epoch": 0.21984132868688366, "grad_norm": 0.3700549304485321, "learning_rate": 0.0001703925981495374, "loss": 0.5173, "step": 2965 }, { "epoch": 0.21991547416030252, "grad_norm": 0.37025076150894165, "learning_rate": 0.00017038259564891223, "loss": 0.541, "step": 2966 }, { "epoch": 0.21998961963372135, "grad_norm": 0.35488584637641907, "learning_rate": 0.0001703725931482871, "loss": 0.5489, "step": 2967 }, { "epoch": 0.2200637651071402, "grad_norm": 0.37913277745246887, "learning_rate": 0.0001703625906476619, "loss": 0.6017, "step": 2968 }, { "epoch": 0.22013791058055907, "grad_norm": 0.35395386815071106, "learning_rate": 0.00017035258814703677, "loss": 0.5303, "step": 2969 }, { "epoch": 0.2202120560539779, "grad_norm": 0.38251644372940063, "learning_rate": 0.0001703425856464116, "loss": 0.5612, "step": 2970 }, { "epoch": 0.22028620152739675, "grad_norm": 0.3783484399318695, "learning_rate": 0.00017033258314578647, "loss": 0.5729, "step": 2971 }, { "epoch": 0.2203603470008156, "grad_norm": 0.36555495858192444, "learning_rate": 0.00017032258064516128, "loss": 0.5557, "step": 2972 }, { "epoch": 0.22043449247423444, "grad_norm": 0.38577961921691895, "learning_rate": 0.00017031257814453614, "loss": 0.5744, "step": 2973 }, { "epoch": 0.2205086379476533, "grad_norm": 0.3724294602870941, "learning_rate": 0.00017030257564391098, "loss": 0.567, "step": 2974 }, { "epoch": 0.22058278342107215, "grad_norm": 0.3651389479637146, "learning_rate": 0.00017029257314328584, "loss": 0.5407, "step": 2975 }, { "epoch": 0.22065692889449098, "grad_norm": 0.3659926950931549, "learning_rate": 0.00017028257064266065, "loss": 0.5751, "step": 2976 }, { "epoch": 0.22073107436790984, "grad_norm": 0.3768198490142822, "learning_rate": 0.00017027256814203552, "loss": 0.5192, "step": 2977 }, { "epoch": 0.2208052198413287, "grad_norm": 0.33975279331207275, "learning_rate": 0.00017026256564141036, "loss": 0.54, "step": 2978 }, { "epoch": 0.22087936531474753, "grad_norm": 0.3554896116256714, "learning_rate": 0.00017025256314078522, "loss": 0.5056, "step": 2979 }, { "epoch": 0.22095351078816639, "grad_norm": 0.3432273864746094, "learning_rate": 0.00017024256064016003, "loss": 0.51, "step": 2980 }, { "epoch": 0.22102765626158524, "grad_norm": 0.3943188786506653, "learning_rate": 0.0001702325581395349, "loss": 0.5102, "step": 2981 }, { "epoch": 0.22110180173500407, "grad_norm": 0.3702898323535919, "learning_rate": 0.00017022255563890973, "loss": 0.4821, "step": 2982 }, { "epoch": 0.22117594720842293, "grad_norm": 0.35469868779182434, "learning_rate": 0.00017021255313828457, "loss": 0.5258, "step": 2983 }, { "epoch": 0.2212500926818418, "grad_norm": 0.37245652079582214, "learning_rate": 0.00017020255063765943, "loss": 0.5404, "step": 2984 }, { "epoch": 0.22132423815526062, "grad_norm": 0.37004199624061584, "learning_rate": 0.00017019254813703427, "loss": 0.5364, "step": 2985 }, { "epoch": 0.22139838362867947, "grad_norm": 0.4054192006587982, "learning_rate": 0.0001701825456364091, "loss": 0.584, "step": 2986 }, { "epoch": 0.2214725291020983, "grad_norm": 0.37991559505462646, "learning_rate": 0.00017017254313578394, "loss": 0.5535, "step": 2987 }, { "epoch": 0.22154667457551716, "grad_norm": 0.39185118675231934, "learning_rate": 0.0001701625406351588, "loss": 0.5451, "step": 2988 }, { "epoch": 0.22162082004893602, "grad_norm": 0.38047245144844055, "learning_rate": 0.00017015253813453364, "loss": 0.5332, "step": 2989 }, { "epoch": 0.22169496552235485, "grad_norm": 0.3568689525127411, "learning_rate": 0.00017014253563390848, "loss": 0.5009, "step": 2990 }, { "epoch": 0.2217691109957737, "grad_norm": 0.3457100987434387, "learning_rate": 0.00017013253313328332, "loss": 0.552, "step": 2991 }, { "epoch": 0.22184325646919256, "grad_norm": 0.37807655334472656, "learning_rate": 0.00017012253063265818, "loss": 0.5338, "step": 2992 }, { "epoch": 0.2219174019426114, "grad_norm": 0.3625606596469879, "learning_rate": 0.00017011252813203302, "loss": 0.5205, "step": 2993 }, { "epoch": 0.22199154741603025, "grad_norm": 0.3553274869918823, "learning_rate": 0.00017010252563140786, "loss": 0.5537, "step": 2994 }, { "epoch": 0.2220656928894491, "grad_norm": 0.405963659286499, "learning_rate": 0.0001700925231307827, "loss": 0.5698, "step": 2995 }, { "epoch": 0.22213983836286794, "grad_norm": 0.3705889582633972, "learning_rate": 0.00017008252063015756, "loss": 0.536, "step": 2996 }, { "epoch": 0.2222139838362868, "grad_norm": 0.3788050413131714, "learning_rate": 0.0001700725181295324, "loss": 0.5409, "step": 2997 }, { "epoch": 0.22228812930970565, "grad_norm": 0.37649381160736084, "learning_rate": 0.00017006251562890723, "loss": 0.5712, "step": 2998 }, { "epoch": 0.22236227478312448, "grad_norm": 0.35810407996177673, "learning_rate": 0.00017005251312828207, "loss": 0.4999, "step": 2999 }, { "epoch": 0.22243642025654334, "grad_norm": 0.3585812449455261, "learning_rate": 0.00017004251062765693, "loss": 0.5159, "step": 3000 }, { "epoch": 0.2225105657299622, "grad_norm": 0.34457409381866455, "learning_rate": 0.00017003250812703177, "loss": 0.5144, "step": 3001 }, { "epoch": 0.22258471120338102, "grad_norm": 0.3432406485080719, "learning_rate": 0.0001700225056264066, "loss": 0.5162, "step": 3002 }, { "epoch": 0.22265885667679988, "grad_norm": 0.3632732033729553, "learning_rate": 0.00017001250312578145, "loss": 0.5417, "step": 3003 }, { "epoch": 0.22273300215021874, "grad_norm": 0.3682090938091278, "learning_rate": 0.0001700025006251563, "loss": 0.5618, "step": 3004 }, { "epoch": 0.22280714762363757, "grad_norm": 0.37338927388191223, "learning_rate": 0.00016999249812453112, "loss": 0.557, "step": 3005 }, { "epoch": 0.22288129309705643, "grad_norm": 0.38216739892959595, "learning_rate": 0.00016998249562390598, "loss": 0.5686, "step": 3006 }, { "epoch": 0.22295543857047528, "grad_norm": 0.3594052195549011, "learning_rate": 0.00016997249312328082, "loss": 0.521, "step": 3007 }, { "epoch": 0.2230295840438941, "grad_norm": 0.37006455659866333, "learning_rate": 0.00016996249062265569, "loss": 0.5553, "step": 3008 }, { "epoch": 0.22310372951731297, "grad_norm": 0.34978586435317993, "learning_rate": 0.0001699524881220305, "loss": 0.5197, "step": 3009 }, { "epoch": 0.22317787499073183, "grad_norm": 0.381541907787323, "learning_rate": 0.00016994248562140536, "loss": 0.5626, "step": 3010 }, { "epoch": 0.22325202046415066, "grad_norm": 0.3960428237915039, "learning_rate": 0.0001699324831207802, "loss": 0.5375, "step": 3011 }, { "epoch": 0.22332616593756952, "grad_norm": 0.3596988022327423, "learning_rate": 0.00016992248062015506, "loss": 0.5396, "step": 3012 }, { "epoch": 0.22340031141098837, "grad_norm": 0.3820812404155731, "learning_rate": 0.00016991247811952987, "loss": 0.5373, "step": 3013 }, { "epoch": 0.2234744568844072, "grad_norm": 0.36483508348464966, "learning_rate": 0.00016990247561890473, "loss": 0.541, "step": 3014 }, { "epoch": 0.22354860235782606, "grad_norm": 0.3934757113456726, "learning_rate": 0.00016989247311827957, "loss": 0.5863, "step": 3015 }, { "epoch": 0.2236227478312449, "grad_norm": 0.35506775975227356, "learning_rate": 0.00016988247061765444, "loss": 0.523, "step": 3016 }, { "epoch": 0.22369689330466375, "grad_norm": 0.38739508390426636, "learning_rate": 0.00016987246811702927, "loss": 0.5341, "step": 3017 }, { "epoch": 0.2237710387780826, "grad_norm": 0.37799814343452454, "learning_rate": 0.0001698624656164041, "loss": 0.5945, "step": 3018 }, { "epoch": 0.22384518425150143, "grad_norm": 0.3774837553501129, "learning_rate": 0.00016985246311577895, "loss": 0.5652, "step": 3019 }, { "epoch": 0.2239193297249203, "grad_norm": 0.3548969328403473, "learning_rate": 0.00016984246061515378, "loss": 0.5293, "step": 3020 }, { "epoch": 0.22399347519833915, "grad_norm": 0.3853811025619507, "learning_rate": 0.00016983245811452865, "loss": 0.5695, "step": 3021 }, { "epoch": 0.22406762067175798, "grad_norm": 0.3694855570793152, "learning_rate": 0.00016982245561390349, "loss": 0.54, "step": 3022 }, { "epoch": 0.22414176614517684, "grad_norm": 0.4225345551967621, "learning_rate": 0.00016981245311327832, "loss": 0.645, "step": 3023 }, { "epoch": 0.2242159116185957, "grad_norm": 0.3643638491630554, "learning_rate": 0.00016980245061265316, "loss": 0.5363, "step": 3024 }, { "epoch": 0.22429005709201452, "grad_norm": 0.35957491397857666, "learning_rate": 0.00016979244811202802, "loss": 0.5123, "step": 3025 }, { "epoch": 0.22436420256543338, "grad_norm": 0.3351064622402191, "learning_rate": 0.00016978244561140286, "loss": 0.4915, "step": 3026 }, { "epoch": 0.22443834803885224, "grad_norm": 0.37965986132621765, "learning_rate": 0.00016977244311077773, "loss": 0.5632, "step": 3027 }, { "epoch": 0.22451249351227107, "grad_norm": 0.35406482219696045, "learning_rate": 0.00016976244061015254, "loss": 0.5255, "step": 3028 }, { "epoch": 0.22458663898568992, "grad_norm": 0.36888566613197327, "learning_rate": 0.0001697524381095274, "loss": 0.5438, "step": 3029 }, { "epoch": 0.22466078445910878, "grad_norm": 0.3474070727825165, "learning_rate": 0.00016974243560890224, "loss": 0.53, "step": 3030 }, { "epoch": 0.2247349299325276, "grad_norm": 0.3701600432395935, "learning_rate": 0.00016973243310827707, "loss": 0.5008, "step": 3031 }, { "epoch": 0.22480907540594647, "grad_norm": 0.3805924952030182, "learning_rate": 0.0001697224306076519, "loss": 0.5696, "step": 3032 }, { "epoch": 0.22488322087936533, "grad_norm": 0.3857348561286926, "learning_rate": 0.00016971242810702678, "loss": 0.5533, "step": 3033 }, { "epoch": 0.22495736635278415, "grad_norm": 0.35774675011634827, "learning_rate": 0.0001697024256064016, "loss": 0.5289, "step": 3034 }, { "epoch": 0.225031511826203, "grad_norm": 0.3494180738925934, "learning_rate": 0.00016969242310577645, "loss": 0.5296, "step": 3035 }, { "epoch": 0.22510565729962187, "grad_norm": 0.38060179352760315, "learning_rate": 0.00016968242060515129, "loss": 0.5572, "step": 3036 }, { "epoch": 0.2251798027730407, "grad_norm": 0.36021992564201355, "learning_rate": 0.00016967241810452615, "loss": 0.5363, "step": 3037 }, { "epoch": 0.22525394824645956, "grad_norm": 0.3909338414669037, "learning_rate": 0.000169662415603901, "loss": 0.5821, "step": 3038 }, { "epoch": 0.22532809371987841, "grad_norm": 0.34130847454071045, "learning_rate": 0.00016965241310327582, "loss": 0.5404, "step": 3039 }, { "epoch": 0.22540223919329724, "grad_norm": 0.36100783944129944, "learning_rate": 0.00016964241060265066, "loss": 0.4999, "step": 3040 }, { "epoch": 0.2254763846667161, "grad_norm": 0.3507991135120392, "learning_rate": 0.00016963240810202553, "loss": 0.5058, "step": 3041 }, { "epoch": 0.22555053014013496, "grad_norm": 0.3713712692260742, "learning_rate": 0.00016962240560140036, "loss": 0.5452, "step": 3042 }, { "epoch": 0.2256246756135538, "grad_norm": 0.3729938864707947, "learning_rate": 0.0001696124031007752, "loss": 0.5306, "step": 3043 }, { "epoch": 0.22569882108697265, "grad_norm": 0.42686548829078674, "learning_rate": 0.00016960240060015004, "loss": 0.568, "step": 3044 }, { "epoch": 0.22577296656039147, "grad_norm": 0.38140082359313965, "learning_rate": 0.0001695923980995249, "loss": 0.5796, "step": 3045 }, { "epoch": 0.22584711203381033, "grad_norm": 0.34894296526908875, "learning_rate": 0.0001695823955988997, "loss": 0.4977, "step": 3046 }, { "epoch": 0.2259212575072292, "grad_norm": 0.41490182280540466, "learning_rate": 0.00016957239309827458, "loss": 0.5402, "step": 3047 }, { "epoch": 0.22599540298064802, "grad_norm": 0.4055007994174957, "learning_rate": 0.0001695623905976494, "loss": 0.5781, "step": 3048 }, { "epoch": 0.22606954845406688, "grad_norm": 0.37112313508987427, "learning_rate": 0.00016955238809702428, "loss": 0.5169, "step": 3049 }, { "epoch": 0.22614369392748573, "grad_norm": 0.3638371527194977, "learning_rate": 0.0001695423855963991, "loss": 0.5587, "step": 3050 }, { "epoch": 0.22621783940090456, "grad_norm": 0.3795914053916931, "learning_rate": 0.00016953238309577395, "loss": 0.5206, "step": 3051 }, { "epoch": 0.22629198487432342, "grad_norm": 0.36272257566452026, "learning_rate": 0.0001695223805951488, "loss": 0.5311, "step": 3052 }, { "epoch": 0.22636613034774228, "grad_norm": 0.38388076424598694, "learning_rate": 0.00016951237809452365, "loss": 0.5327, "step": 3053 }, { "epoch": 0.2264402758211611, "grad_norm": 0.3583090901374817, "learning_rate": 0.0001695023755938985, "loss": 0.5288, "step": 3054 }, { "epoch": 0.22651442129457997, "grad_norm": 0.36943334341049194, "learning_rate": 0.00016949237309327333, "loss": 0.548, "step": 3055 }, { "epoch": 0.22658856676799882, "grad_norm": 0.3534252941608429, "learning_rate": 0.00016948237059264816, "loss": 0.5183, "step": 3056 }, { "epoch": 0.22666271224141765, "grad_norm": 0.3679530620574951, "learning_rate": 0.000169472368092023, "loss": 0.5101, "step": 3057 }, { "epoch": 0.2267368577148365, "grad_norm": 0.3501662313938141, "learning_rate": 0.00016946236559139786, "loss": 0.5103, "step": 3058 }, { "epoch": 0.22681100318825537, "grad_norm": 0.40064874291419983, "learning_rate": 0.0001694523630907727, "loss": 0.5607, "step": 3059 }, { "epoch": 0.2268851486616742, "grad_norm": 0.36866775155067444, "learning_rate": 0.00016944236059014757, "loss": 0.5768, "step": 3060 }, { "epoch": 0.22695929413509305, "grad_norm": 0.37826287746429443, "learning_rate": 0.00016943235808952238, "loss": 0.5971, "step": 3061 }, { "epoch": 0.2270334396085119, "grad_norm": 0.35442182421684265, "learning_rate": 0.00016942235558889724, "loss": 0.5415, "step": 3062 }, { "epoch": 0.22710758508193074, "grad_norm": 0.38005131483078003, "learning_rate": 0.00016941235308827208, "loss": 0.5801, "step": 3063 }, { "epoch": 0.2271817305553496, "grad_norm": 0.3847039043903351, "learning_rate": 0.00016940235058764694, "loss": 0.5698, "step": 3064 }, { "epoch": 0.22725587602876846, "grad_norm": 0.3548467457294464, "learning_rate": 0.00016939234808702175, "loss": 0.5299, "step": 3065 }, { "epoch": 0.22733002150218728, "grad_norm": 0.39147499203681946, "learning_rate": 0.00016938234558639662, "loss": 0.5642, "step": 3066 }, { "epoch": 0.22740416697560614, "grad_norm": 0.35621047019958496, "learning_rate": 0.00016937234308577145, "loss": 0.5112, "step": 3067 }, { "epoch": 0.227478312449025, "grad_norm": 0.3656405806541443, "learning_rate": 0.0001693623405851463, "loss": 0.5145, "step": 3068 }, { "epoch": 0.22755245792244383, "grad_norm": 0.36267948150634766, "learning_rate": 0.00016935233808452113, "loss": 0.535, "step": 3069 }, { "epoch": 0.2276266033958627, "grad_norm": 0.3703133463859558, "learning_rate": 0.000169342335583896, "loss": 0.5664, "step": 3070 }, { "epoch": 0.22770074886928154, "grad_norm": 0.3698337972164154, "learning_rate": 0.00016933233308327083, "loss": 0.5047, "step": 3071 }, { "epoch": 0.22777489434270037, "grad_norm": 0.3689439296722412, "learning_rate": 0.00016932233058264567, "loss": 0.5605, "step": 3072 }, { "epoch": 0.22784903981611923, "grad_norm": 0.35993075370788574, "learning_rate": 0.0001693123280820205, "loss": 0.5275, "step": 3073 }, { "epoch": 0.22792318528953806, "grad_norm": 0.3565292954444885, "learning_rate": 0.00016930232558139537, "loss": 0.5281, "step": 3074 }, { "epoch": 0.22799733076295692, "grad_norm": 0.3648369014263153, "learning_rate": 0.0001692923230807702, "loss": 0.5269, "step": 3075 }, { "epoch": 0.22807147623637578, "grad_norm": 0.353726327419281, "learning_rate": 0.00016928232058014504, "loss": 0.5111, "step": 3076 }, { "epoch": 0.2281456217097946, "grad_norm": 0.35830163955688477, "learning_rate": 0.00016927231807951988, "loss": 0.4897, "step": 3077 }, { "epoch": 0.22821976718321346, "grad_norm": 0.3681171238422394, "learning_rate": 0.00016926231557889474, "loss": 0.5151, "step": 3078 }, { "epoch": 0.22829391265663232, "grad_norm": 0.3782983124256134, "learning_rate": 0.00016925231307826958, "loss": 0.5594, "step": 3079 }, { "epoch": 0.22836805813005115, "grad_norm": 0.3758261501789093, "learning_rate": 0.00016924231057764442, "loss": 0.5249, "step": 3080 }, { "epoch": 0.22844220360347, "grad_norm": 0.3702141344547272, "learning_rate": 0.00016923230807701925, "loss": 0.5219, "step": 3081 }, { "epoch": 0.22851634907688886, "grad_norm": 0.3834594488143921, "learning_rate": 0.00016922230557639412, "loss": 0.5634, "step": 3082 }, { "epoch": 0.2285904945503077, "grad_norm": 0.37699779868125916, "learning_rate": 0.00016921230307576893, "loss": 0.4968, "step": 3083 }, { "epoch": 0.22866464002372655, "grad_norm": 0.3641580641269684, "learning_rate": 0.0001692023005751438, "loss": 0.5543, "step": 3084 }, { "epoch": 0.2287387854971454, "grad_norm": 0.34201446175575256, "learning_rate": 0.00016919229807451863, "loss": 0.4824, "step": 3085 }, { "epoch": 0.22881293097056424, "grad_norm": 0.36024153232574463, "learning_rate": 0.0001691822955738935, "loss": 0.5233, "step": 3086 }, { "epoch": 0.2288870764439831, "grad_norm": 0.3876475691795349, "learning_rate": 0.00016917229307326833, "loss": 0.5238, "step": 3087 }, { "epoch": 0.22896122191740195, "grad_norm": 0.363008588552475, "learning_rate": 0.00016916229057264317, "loss": 0.5228, "step": 3088 }, { "epoch": 0.22903536739082078, "grad_norm": 0.37381982803344727, "learning_rate": 0.000169152288072018, "loss": 0.5299, "step": 3089 }, { "epoch": 0.22910951286423964, "grad_norm": 0.39468955993652344, "learning_rate": 0.00016914228557139287, "loss": 0.5457, "step": 3090 }, { "epoch": 0.2291836583376585, "grad_norm": 0.3471645414829254, "learning_rate": 0.0001691322830707677, "loss": 0.5101, "step": 3091 }, { "epoch": 0.22925780381107733, "grad_norm": 0.38554319739341736, "learning_rate": 0.00016912228057014254, "loss": 0.5717, "step": 3092 }, { "epoch": 0.22933194928449618, "grad_norm": 0.42511579394340515, "learning_rate": 0.0001691122780695174, "loss": 0.4829, "step": 3093 }, { "epoch": 0.22940609475791504, "grad_norm": 0.390516459941864, "learning_rate": 0.00016910227556889222, "loss": 0.5293, "step": 3094 }, { "epoch": 0.22948024023133387, "grad_norm": 0.36082398891448975, "learning_rate": 0.00016909227306826708, "loss": 0.4828, "step": 3095 }, { "epoch": 0.22955438570475273, "grad_norm": 0.3910001814365387, "learning_rate": 0.00016908227056764192, "loss": 0.5258, "step": 3096 }, { "epoch": 0.22962853117817159, "grad_norm": 0.3838588297367096, "learning_rate": 0.00016907226806701678, "loss": 0.593, "step": 3097 }, { "epoch": 0.22970267665159042, "grad_norm": 0.38668006658554077, "learning_rate": 0.0001690622655663916, "loss": 0.5312, "step": 3098 }, { "epoch": 0.22977682212500927, "grad_norm": 0.4177023470401764, "learning_rate": 0.00016905226306576646, "loss": 0.5466, "step": 3099 }, { "epoch": 0.22985096759842813, "grad_norm": 0.3572791814804077, "learning_rate": 0.0001690422605651413, "loss": 0.5262, "step": 3100 }, { "epoch": 0.22992511307184696, "grad_norm": 0.3725454807281494, "learning_rate": 0.00016903225806451616, "loss": 0.5385, "step": 3101 }, { "epoch": 0.22999925854526582, "grad_norm": 0.380288302898407, "learning_rate": 0.00016902225556389097, "loss": 0.5402, "step": 3102 }, { "epoch": 0.23007340401868465, "grad_norm": 0.394797146320343, "learning_rate": 0.00016901225306326583, "loss": 0.5499, "step": 3103 }, { "epoch": 0.2301475494921035, "grad_norm": 0.3709663152694702, "learning_rate": 0.00016900225056264067, "loss": 0.5594, "step": 3104 }, { "epoch": 0.23022169496552236, "grad_norm": 0.3537650406360626, "learning_rate": 0.0001689922480620155, "loss": 0.5283, "step": 3105 }, { "epoch": 0.2302958404389412, "grad_norm": 0.34475287795066833, "learning_rate": 0.00016898224556139034, "loss": 0.5214, "step": 3106 }, { "epoch": 0.23036998591236005, "grad_norm": 0.37331900000572205, "learning_rate": 0.0001689722430607652, "loss": 0.5495, "step": 3107 }, { "epoch": 0.2304441313857789, "grad_norm": 0.33916938304901123, "learning_rate": 0.00016896224056014004, "loss": 0.5121, "step": 3108 }, { "epoch": 0.23051827685919773, "grad_norm": 0.39028292894363403, "learning_rate": 0.00016895223805951488, "loss": 0.5524, "step": 3109 }, { "epoch": 0.2305924223326166, "grad_norm": 0.3692082464694977, "learning_rate": 0.00016894223555888972, "loss": 0.5946, "step": 3110 }, { "epoch": 0.23066656780603545, "grad_norm": 0.34561413526535034, "learning_rate": 0.00016893223305826458, "loss": 0.504, "step": 3111 }, { "epoch": 0.23074071327945428, "grad_norm": 0.376939982175827, "learning_rate": 0.00016892223055763942, "loss": 0.543, "step": 3112 }, { "epoch": 0.23081485875287314, "grad_norm": 0.37572193145751953, "learning_rate": 0.00016891222805701426, "loss": 0.5413, "step": 3113 }, { "epoch": 0.230889004226292, "grad_norm": 0.35735735297203064, "learning_rate": 0.0001689022255563891, "loss": 0.5153, "step": 3114 }, { "epoch": 0.23096314969971082, "grad_norm": 0.3644809126853943, "learning_rate": 0.00016889222305576396, "loss": 0.5497, "step": 3115 }, { "epoch": 0.23103729517312968, "grad_norm": 0.4166473150253296, "learning_rate": 0.0001688822205551388, "loss": 0.5309, "step": 3116 }, { "epoch": 0.23111144064654854, "grad_norm": 0.3807069659233093, "learning_rate": 0.00016887221805451363, "loss": 0.5116, "step": 3117 }, { "epoch": 0.23118558611996737, "grad_norm": 0.3638266324996948, "learning_rate": 0.00016886221555388847, "loss": 0.5297, "step": 3118 }, { "epoch": 0.23125973159338623, "grad_norm": 0.34700801968574524, "learning_rate": 0.00016885221305326333, "loss": 0.4962, "step": 3119 }, { "epoch": 0.23133387706680508, "grad_norm": 0.380198210477829, "learning_rate": 0.00016884221055263817, "loss": 0.5361, "step": 3120 }, { "epoch": 0.2314080225402239, "grad_norm": 0.3677632212638855, "learning_rate": 0.000168832208052013, "loss": 0.5846, "step": 3121 }, { "epoch": 0.23148216801364277, "grad_norm": 0.37532341480255127, "learning_rate": 0.00016882220555138785, "loss": 0.5755, "step": 3122 }, { "epoch": 0.23155631348706163, "grad_norm": 0.3635807931423187, "learning_rate": 0.0001688122030507627, "loss": 0.5632, "step": 3123 }, { "epoch": 0.23163045896048046, "grad_norm": 0.3988536596298218, "learning_rate": 0.00016880220055013755, "loss": 0.5595, "step": 3124 }, { "epoch": 0.2317046044338993, "grad_norm": 0.3725000023841858, "learning_rate": 0.00016879219804951238, "loss": 0.5066, "step": 3125 }, { "epoch": 0.23177874990731817, "grad_norm": 0.37904393672943115, "learning_rate": 0.00016878219554888722, "loss": 0.5637, "step": 3126 }, { "epoch": 0.231852895380737, "grad_norm": 0.36648818850517273, "learning_rate": 0.00016877219304826208, "loss": 0.5226, "step": 3127 }, { "epoch": 0.23192704085415586, "grad_norm": 0.36643823981285095, "learning_rate": 0.00016876219054763692, "loss": 0.539, "step": 3128 }, { "epoch": 0.2320011863275747, "grad_norm": 0.38038715720176697, "learning_rate": 0.00016875218804701176, "loss": 0.5505, "step": 3129 }, { "epoch": 0.23207533180099355, "grad_norm": 0.3625529706478119, "learning_rate": 0.00016874218554638662, "loss": 0.5348, "step": 3130 }, { "epoch": 0.2321494772744124, "grad_norm": 0.3957379162311554, "learning_rate": 0.00016873218304576143, "loss": 0.5674, "step": 3131 }, { "epoch": 0.23222362274783123, "grad_norm": 0.34537288546562195, "learning_rate": 0.0001687221805451363, "loss": 0.5251, "step": 3132 }, { "epoch": 0.2322977682212501, "grad_norm": 0.3905269205570221, "learning_rate": 0.00016871217804451113, "loss": 0.5777, "step": 3133 }, { "epoch": 0.23237191369466895, "grad_norm": 0.3503686189651489, "learning_rate": 0.000168702175543886, "loss": 0.539, "step": 3134 }, { "epoch": 0.23244605916808778, "grad_norm": 0.33836305141448975, "learning_rate": 0.0001686921730432608, "loss": 0.5143, "step": 3135 }, { "epoch": 0.23252020464150663, "grad_norm": 0.3733169734477997, "learning_rate": 0.00016868217054263567, "loss": 0.5133, "step": 3136 }, { "epoch": 0.2325943501149255, "grad_norm": 0.3562396466732025, "learning_rate": 0.0001686721680420105, "loss": 0.5338, "step": 3137 }, { "epoch": 0.23266849558834432, "grad_norm": 0.3470671772956848, "learning_rate": 0.00016866216554138537, "loss": 0.5683, "step": 3138 }, { "epoch": 0.23274264106176318, "grad_norm": 0.35264337062835693, "learning_rate": 0.00016865216304076018, "loss": 0.5342, "step": 3139 }, { "epoch": 0.23281678653518204, "grad_norm": 0.36774125695228577, "learning_rate": 0.00016864216054013505, "loss": 0.5094, "step": 3140 }, { "epoch": 0.23289093200860086, "grad_norm": 0.35757896304130554, "learning_rate": 0.00016863215803950989, "loss": 0.5411, "step": 3141 }, { "epoch": 0.23296507748201972, "grad_norm": 0.3764273524284363, "learning_rate": 0.00016862215553888472, "loss": 0.5495, "step": 3142 }, { "epoch": 0.23303922295543858, "grad_norm": 0.3448147177696228, "learning_rate": 0.00016861215303825956, "loss": 0.5097, "step": 3143 }, { "epoch": 0.2331133684288574, "grad_norm": 0.38526853919029236, "learning_rate": 0.00016860215053763442, "loss": 0.5283, "step": 3144 }, { "epoch": 0.23318751390227627, "grad_norm": 0.3246736526489258, "learning_rate": 0.00016859214803700926, "loss": 0.5153, "step": 3145 }, { "epoch": 0.23326165937569512, "grad_norm": 0.3920454680919647, "learning_rate": 0.0001685821455363841, "loss": 0.528, "step": 3146 }, { "epoch": 0.23333580484911395, "grad_norm": 0.357504278421402, "learning_rate": 0.00016857214303575894, "loss": 0.5067, "step": 3147 }, { "epoch": 0.2334099503225328, "grad_norm": 0.414666086435318, "learning_rate": 0.0001685621405351338, "loss": 0.6121, "step": 3148 }, { "epoch": 0.23348409579595167, "grad_norm": 0.36362531781196594, "learning_rate": 0.00016855213803450864, "loss": 0.5298, "step": 3149 }, { "epoch": 0.2335582412693705, "grad_norm": 0.3486076295375824, "learning_rate": 0.00016854213553388347, "loss": 0.5263, "step": 3150 }, { "epoch": 0.23363238674278936, "grad_norm": 0.3632141649723053, "learning_rate": 0.0001685321330332583, "loss": 0.5378, "step": 3151 }, { "epoch": 0.2337065322162082, "grad_norm": 0.34240150451660156, "learning_rate": 0.00016852213053263317, "loss": 0.5269, "step": 3152 }, { "epoch": 0.23378067768962704, "grad_norm": 0.354167640209198, "learning_rate": 0.000168512128032008, "loss": 0.5199, "step": 3153 }, { "epoch": 0.2338548231630459, "grad_norm": 0.3374011218547821, "learning_rate": 0.00016850212553138285, "loss": 0.4899, "step": 3154 }, { "epoch": 0.23392896863646476, "grad_norm": 0.3594050705432892, "learning_rate": 0.00016849212303075769, "loss": 0.5279, "step": 3155 }, { "epoch": 0.2340031141098836, "grad_norm": 0.3612161874771118, "learning_rate": 0.00016848212053013255, "loss": 0.5401, "step": 3156 }, { "epoch": 0.23407725958330244, "grad_norm": 0.36094874143600464, "learning_rate": 0.0001684721180295074, "loss": 0.5217, "step": 3157 }, { "epoch": 0.23415140505672127, "grad_norm": 0.3590131998062134, "learning_rate": 0.00016846211552888222, "loss": 0.5402, "step": 3158 }, { "epoch": 0.23422555053014013, "grad_norm": 0.37657028436660767, "learning_rate": 0.00016845211302825706, "loss": 0.522, "step": 3159 }, { "epoch": 0.234299696003559, "grad_norm": 0.35386237502098083, "learning_rate": 0.00016844211052763193, "loss": 0.51, "step": 3160 }, { "epoch": 0.23437384147697782, "grad_norm": 0.3551791310310364, "learning_rate": 0.00016843210802700676, "loss": 0.5243, "step": 3161 }, { "epoch": 0.23444798695039668, "grad_norm": 0.34886208176612854, "learning_rate": 0.0001684221055263816, "loss": 0.5171, "step": 3162 }, { "epoch": 0.23452213242381553, "grad_norm": 0.34098976850509644, "learning_rate": 0.00016841210302575646, "loss": 0.4626, "step": 3163 }, { "epoch": 0.23459627789723436, "grad_norm": 0.3911603093147278, "learning_rate": 0.0001684021005251313, "loss": 0.5706, "step": 3164 }, { "epoch": 0.23467042337065322, "grad_norm": 0.38422414660453796, "learning_rate": 0.00016839209802450614, "loss": 0.5308, "step": 3165 }, { "epoch": 0.23474456884407208, "grad_norm": 0.37052538990974426, "learning_rate": 0.00016838209552388098, "loss": 0.5167, "step": 3166 }, { "epoch": 0.2348187143174909, "grad_norm": 0.37653639912605286, "learning_rate": 0.00016837209302325584, "loss": 0.5588, "step": 3167 }, { "epoch": 0.23489285979090976, "grad_norm": 0.38234758377075195, "learning_rate": 0.00016836209052263065, "loss": 0.5677, "step": 3168 }, { "epoch": 0.23496700526432862, "grad_norm": 0.3611529469490051, "learning_rate": 0.00016835208802200551, "loss": 0.5118, "step": 3169 }, { "epoch": 0.23504115073774745, "grad_norm": 0.3457759916782379, "learning_rate": 0.00016834208552138035, "loss": 0.492, "step": 3170 }, { "epoch": 0.2351152962111663, "grad_norm": 0.3444843590259552, "learning_rate": 0.00016833208302075522, "loss": 0.5061, "step": 3171 }, { "epoch": 0.23518944168458517, "grad_norm": 0.36827781796455383, "learning_rate": 0.00016832208052013002, "loss": 0.5536, "step": 3172 }, { "epoch": 0.235263587158004, "grad_norm": 0.35348203778266907, "learning_rate": 0.0001683120780195049, "loss": 0.5402, "step": 3173 }, { "epoch": 0.23533773263142285, "grad_norm": 0.3647434711456299, "learning_rate": 0.00016830207551887973, "loss": 0.544, "step": 3174 }, { "epoch": 0.2354118781048417, "grad_norm": 0.3643631339073181, "learning_rate": 0.0001682920730182546, "loss": 0.5138, "step": 3175 }, { "epoch": 0.23548602357826054, "grad_norm": 0.3814161419868469, "learning_rate": 0.0001682820705176294, "loss": 0.5877, "step": 3176 }, { "epoch": 0.2355601690516794, "grad_norm": 0.3430224359035492, "learning_rate": 0.00016827206801700426, "loss": 0.5361, "step": 3177 }, { "epoch": 0.23563431452509825, "grad_norm": 0.3301253616809845, "learning_rate": 0.0001682620655163791, "loss": 0.5033, "step": 3178 }, { "epoch": 0.23570845999851708, "grad_norm": 0.39269307255744934, "learning_rate": 0.00016825206301575394, "loss": 0.5097, "step": 3179 }, { "epoch": 0.23578260547193594, "grad_norm": 0.37744471430778503, "learning_rate": 0.00016824206051512878, "loss": 0.5361, "step": 3180 }, { "epoch": 0.2358567509453548, "grad_norm": 0.3639797866344452, "learning_rate": 0.00016823205801450364, "loss": 0.526, "step": 3181 }, { "epoch": 0.23593089641877363, "grad_norm": 0.36549317836761475, "learning_rate": 0.00016822205551387848, "loss": 0.4992, "step": 3182 }, { "epoch": 0.23600504189219249, "grad_norm": 0.394991010427475, "learning_rate": 0.00016821205301325331, "loss": 0.5703, "step": 3183 }, { "epoch": 0.23607918736561134, "grad_norm": 0.4093645215034485, "learning_rate": 0.00016820205051262815, "loss": 0.5755, "step": 3184 }, { "epoch": 0.23615333283903017, "grad_norm": 0.37450361251831055, "learning_rate": 0.00016819204801200302, "loss": 0.543, "step": 3185 }, { "epoch": 0.23622747831244903, "grad_norm": 0.3813417851924896, "learning_rate": 0.00016818204551137785, "loss": 0.4987, "step": 3186 }, { "epoch": 0.23630162378586786, "grad_norm": 0.368304044008255, "learning_rate": 0.0001681720430107527, "loss": 0.5111, "step": 3187 }, { "epoch": 0.23637576925928672, "grad_norm": 0.36609986424446106, "learning_rate": 0.00016816204051012753, "loss": 0.5407, "step": 3188 }, { "epoch": 0.23644991473270557, "grad_norm": 0.3607521653175354, "learning_rate": 0.0001681520380095024, "loss": 0.5277, "step": 3189 }, { "epoch": 0.2365240602061244, "grad_norm": 0.3467067778110504, "learning_rate": 0.00016814203550887723, "loss": 0.5054, "step": 3190 }, { "epoch": 0.23659820567954326, "grad_norm": 0.40096089243888855, "learning_rate": 0.00016813203300825207, "loss": 0.5163, "step": 3191 }, { "epoch": 0.23667235115296212, "grad_norm": 0.3699919283390045, "learning_rate": 0.0001681220305076269, "loss": 0.5874, "step": 3192 }, { "epoch": 0.23674649662638095, "grad_norm": 0.3540811240673065, "learning_rate": 0.00016811202800700177, "loss": 0.4916, "step": 3193 }, { "epoch": 0.2368206420997998, "grad_norm": 0.37788060307502747, "learning_rate": 0.0001681020255063766, "loss": 0.5506, "step": 3194 }, { "epoch": 0.23689478757321866, "grad_norm": 0.3559504449367523, "learning_rate": 0.00016809202300575144, "loss": 0.4978, "step": 3195 }, { "epoch": 0.2369689330466375, "grad_norm": 0.3484579622745514, "learning_rate": 0.0001680820205051263, "loss": 0.5326, "step": 3196 }, { "epoch": 0.23704307852005635, "grad_norm": 0.3663431406021118, "learning_rate": 0.00016807201800450114, "loss": 0.5637, "step": 3197 }, { "epoch": 0.2371172239934752, "grad_norm": 0.37066638469696045, "learning_rate": 0.00016806201550387598, "loss": 0.5426, "step": 3198 }, { "epoch": 0.23719136946689404, "grad_norm": 0.3335663378238678, "learning_rate": 0.00016805201300325082, "loss": 0.5068, "step": 3199 }, { "epoch": 0.2372655149403129, "grad_norm": 0.35608214139938354, "learning_rate": 0.00016804201050262568, "loss": 0.5541, "step": 3200 }, { "epoch": 0.23733966041373175, "grad_norm": 0.3695331811904907, "learning_rate": 0.00016803200800200052, "loss": 0.5169, "step": 3201 }, { "epoch": 0.23741380588715058, "grad_norm": 0.3649688959121704, "learning_rate": 0.00016802200550137535, "loss": 0.5533, "step": 3202 }, { "epoch": 0.23748795136056944, "grad_norm": 0.36585232615470886, "learning_rate": 0.0001680120030007502, "loss": 0.5129, "step": 3203 }, { "epoch": 0.2375620968339883, "grad_norm": 0.34507399797439575, "learning_rate": 0.00016800200050012506, "loss": 0.4967, "step": 3204 }, { "epoch": 0.23763624230740713, "grad_norm": 0.3762645125389099, "learning_rate": 0.00016799199799949987, "loss": 0.5363, "step": 3205 }, { "epoch": 0.23771038778082598, "grad_norm": 0.37071657180786133, "learning_rate": 0.00016798199549887473, "loss": 0.5225, "step": 3206 }, { "epoch": 0.23778453325424484, "grad_norm": 0.35472673177719116, "learning_rate": 0.00016797199299824957, "loss": 0.4973, "step": 3207 }, { "epoch": 0.23785867872766367, "grad_norm": 0.35388708114624023, "learning_rate": 0.00016796199049762443, "loss": 0.5138, "step": 3208 }, { "epoch": 0.23793282420108253, "grad_norm": 0.3742550313472748, "learning_rate": 0.00016795198799699924, "loss": 0.5402, "step": 3209 }, { "epoch": 0.23800696967450138, "grad_norm": 0.3714539408683777, "learning_rate": 0.0001679419854963741, "loss": 0.4967, "step": 3210 }, { "epoch": 0.2380811151479202, "grad_norm": 0.3649148643016815, "learning_rate": 0.00016793198299574894, "loss": 0.519, "step": 3211 }, { "epoch": 0.23815526062133907, "grad_norm": 0.38707396388053894, "learning_rate": 0.0001679219804951238, "loss": 0.5537, "step": 3212 }, { "epoch": 0.23822940609475793, "grad_norm": 0.37395477294921875, "learning_rate": 0.00016791197799449862, "loss": 0.5437, "step": 3213 }, { "epoch": 0.23830355156817676, "grad_norm": 0.37692323327064514, "learning_rate": 0.00016790197549387348, "loss": 0.5234, "step": 3214 }, { "epoch": 0.23837769704159562, "grad_norm": 0.3705780804157257, "learning_rate": 0.00016789197299324832, "loss": 0.5252, "step": 3215 }, { "epoch": 0.23845184251501444, "grad_norm": 0.3787778317928314, "learning_rate": 0.00016788197049262316, "loss": 0.5308, "step": 3216 }, { "epoch": 0.2385259879884333, "grad_norm": 0.35072749853134155, "learning_rate": 0.000167871967991998, "loss": 0.4991, "step": 3217 }, { "epoch": 0.23860013346185216, "grad_norm": 0.3894120156764984, "learning_rate": 0.00016786196549137286, "loss": 0.5357, "step": 3218 }, { "epoch": 0.238674278935271, "grad_norm": 0.3730423152446747, "learning_rate": 0.0001678519629907477, "loss": 0.5093, "step": 3219 }, { "epoch": 0.23874842440868985, "grad_norm": 0.40243270993232727, "learning_rate": 0.00016784196049012253, "loss": 0.519, "step": 3220 }, { "epoch": 0.2388225698821087, "grad_norm": 0.37014085054397583, "learning_rate": 0.00016783195798949737, "loss": 0.541, "step": 3221 }, { "epoch": 0.23889671535552753, "grad_norm": 0.3828112483024597, "learning_rate": 0.00016782195548887223, "loss": 0.5356, "step": 3222 }, { "epoch": 0.2389708608289464, "grad_norm": 0.3805207908153534, "learning_rate": 0.00016781195298824707, "loss": 0.5268, "step": 3223 }, { "epoch": 0.23904500630236525, "grad_norm": 0.4237818419933319, "learning_rate": 0.0001678019504876219, "loss": 0.5269, "step": 3224 }, { "epoch": 0.23911915177578408, "grad_norm": 0.46765658259391785, "learning_rate": 0.00016779194798699674, "loss": 0.5398, "step": 3225 }, { "epoch": 0.23919329724920294, "grad_norm": 0.4134621322154999, "learning_rate": 0.0001677819454863716, "loss": 0.5757, "step": 3226 }, { "epoch": 0.2392674427226218, "grad_norm": 0.36894306540489197, "learning_rate": 0.00016777194298574644, "loss": 0.511, "step": 3227 }, { "epoch": 0.23934158819604062, "grad_norm": 0.3896133005619049, "learning_rate": 0.00016776194048512128, "loss": 0.5197, "step": 3228 }, { "epoch": 0.23941573366945948, "grad_norm": 0.4005463719367981, "learning_rate": 0.00016775193798449615, "loss": 0.5662, "step": 3229 }, { "epoch": 0.23948987914287834, "grad_norm": 0.38989242911338806, "learning_rate": 0.00016774193548387098, "loss": 0.5796, "step": 3230 }, { "epoch": 0.23956402461629717, "grad_norm": 0.3702715039253235, "learning_rate": 0.00016773193298324582, "loss": 0.5463, "step": 3231 }, { "epoch": 0.23963817008971602, "grad_norm": 0.36246734857559204, "learning_rate": 0.00016772193048262066, "loss": 0.4924, "step": 3232 }, { "epoch": 0.23971231556313488, "grad_norm": 0.367194265127182, "learning_rate": 0.00016771192798199552, "loss": 0.5316, "step": 3233 }, { "epoch": 0.2397864610365537, "grad_norm": 0.37226665019989014, "learning_rate": 0.00016770192548137036, "loss": 0.549, "step": 3234 }, { "epoch": 0.23986060650997257, "grad_norm": 0.33362898230552673, "learning_rate": 0.0001676919229807452, "loss": 0.507, "step": 3235 }, { "epoch": 0.23993475198339143, "grad_norm": 0.4077602028846741, "learning_rate": 0.00016768192048012003, "loss": 0.5267, "step": 3236 }, { "epoch": 0.24000889745681026, "grad_norm": 0.35338646173477173, "learning_rate": 0.0001676719179794949, "loss": 0.5121, "step": 3237 }, { "epoch": 0.2400830429302291, "grad_norm": 0.36523446440696716, "learning_rate": 0.00016766191547886973, "loss": 0.5674, "step": 3238 }, { "epoch": 0.24015718840364797, "grad_norm": 0.35041749477386475, "learning_rate": 0.00016765191297824457, "loss": 0.5469, "step": 3239 }, { "epoch": 0.2402313338770668, "grad_norm": 0.3926205337047577, "learning_rate": 0.0001676419104776194, "loss": 0.548, "step": 3240 }, { "epoch": 0.24030547935048566, "grad_norm": 0.33840954303741455, "learning_rate": 0.00016763190797699427, "loss": 0.4916, "step": 3241 }, { "epoch": 0.24037962482390451, "grad_norm": 0.34775304794311523, "learning_rate": 0.00016762190547636908, "loss": 0.5413, "step": 3242 }, { "epoch": 0.24045377029732334, "grad_norm": 0.3927387297153473, "learning_rate": 0.00016761190297574395, "loss": 0.5549, "step": 3243 }, { "epoch": 0.2405279157707422, "grad_norm": 0.37727588415145874, "learning_rate": 0.00016760190047511878, "loss": 0.5732, "step": 3244 }, { "epoch": 0.24060206124416103, "grad_norm": 0.3556016683578491, "learning_rate": 0.00016759189797449365, "loss": 0.5373, "step": 3245 }, { "epoch": 0.2406762067175799, "grad_norm": 0.3552757203578949, "learning_rate": 0.00016758189547386846, "loss": 0.5188, "step": 3246 }, { "epoch": 0.24075035219099875, "grad_norm": 0.34709030389785767, "learning_rate": 0.00016757189297324332, "loss": 0.5075, "step": 3247 }, { "epoch": 0.24082449766441757, "grad_norm": 0.3681974709033966, "learning_rate": 0.00016756189047261816, "loss": 0.5719, "step": 3248 }, { "epoch": 0.24089864313783643, "grad_norm": 0.37361568212509155, "learning_rate": 0.00016755188797199302, "loss": 0.5535, "step": 3249 }, { "epoch": 0.2409727886112553, "grad_norm": 0.36473923921585083, "learning_rate": 0.00016754188547136783, "loss": 0.5462, "step": 3250 }, { "epoch": 0.24104693408467412, "grad_norm": 0.4272880554199219, "learning_rate": 0.0001675318829707427, "loss": 0.5513, "step": 3251 }, { "epoch": 0.24112107955809298, "grad_norm": 0.3755418062210083, "learning_rate": 0.00016752188047011753, "loss": 0.5098, "step": 3252 }, { "epoch": 0.24119522503151183, "grad_norm": 0.3654344081878662, "learning_rate": 0.00016751187796949237, "loss": 0.554, "step": 3253 }, { "epoch": 0.24126937050493066, "grad_norm": 0.35913076996803284, "learning_rate": 0.0001675018754688672, "loss": 0.5493, "step": 3254 }, { "epoch": 0.24134351597834952, "grad_norm": 0.3598824441432953, "learning_rate": 0.00016749187296824207, "loss": 0.5242, "step": 3255 }, { "epoch": 0.24141766145176838, "grad_norm": 0.38911163806915283, "learning_rate": 0.0001674818704676169, "loss": 0.5416, "step": 3256 }, { "epoch": 0.2414918069251872, "grad_norm": 0.35995355248451233, "learning_rate": 0.00016747186796699175, "loss": 0.5317, "step": 3257 }, { "epoch": 0.24156595239860607, "grad_norm": 0.37859368324279785, "learning_rate": 0.00016746186546636658, "loss": 0.5656, "step": 3258 }, { "epoch": 0.24164009787202492, "grad_norm": 0.35370561480522156, "learning_rate": 0.00016745186296574145, "loss": 0.5197, "step": 3259 }, { "epoch": 0.24171424334544375, "grad_norm": 0.34636765718460083, "learning_rate": 0.00016744186046511629, "loss": 0.519, "step": 3260 }, { "epoch": 0.2417883888188626, "grad_norm": 0.3512532114982605, "learning_rate": 0.00016743185796449112, "loss": 0.5303, "step": 3261 }, { "epoch": 0.24186253429228147, "grad_norm": 0.4067784249782562, "learning_rate": 0.000167421855463866, "loss": 0.564, "step": 3262 }, { "epoch": 0.2419366797657003, "grad_norm": 0.3577440679073334, "learning_rate": 0.00016741185296324082, "loss": 0.5467, "step": 3263 }, { "epoch": 0.24201082523911915, "grad_norm": 0.36968404054641724, "learning_rate": 0.00016740185046261566, "loss": 0.5353, "step": 3264 }, { "epoch": 0.242084970712538, "grad_norm": 0.36389443278312683, "learning_rate": 0.0001673918479619905, "loss": 0.5525, "step": 3265 }, { "epoch": 0.24215911618595684, "grad_norm": 0.3646070659160614, "learning_rate": 0.00016738184546136536, "loss": 0.5392, "step": 3266 }, { "epoch": 0.2422332616593757, "grad_norm": 0.38601255416870117, "learning_rate": 0.0001673718429607402, "loss": 0.5505, "step": 3267 }, { "epoch": 0.24230740713279456, "grad_norm": 0.382845938205719, "learning_rate": 0.00016736184046011504, "loss": 0.5382, "step": 3268 }, { "epoch": 0.24238155260621339, "grad_norm": 0.3777501583099365, "learning_rate": 0.00016735183795948987, "loss": 0.5936, "step": 3269 }, { "epoch": 0.24245569807963224, "grad_norm": 0.36954912543296814, "learning_rate": 0.00016734183545886474, "loss": 0.544, "step": 3270 }, { "epoch": 0.2425298435530511, "grad_norm": 0.36607053875923157, "learning_rate": 0.00016733183295823957, "loss": 0.542, "step": 3271 }, { "epoch": 0.24260398902646993, "grad_norm": 0.36468085646629333, "learning_rate": 0.0001673218304576144, "loss": 0.526, "step": 3272 }, { "epoch": 0.2426781344998888, "grad_norm": 0.3494719862937927, "learning_rate": 0.00016731182795698925, "loss": 0.504, "step": 3273 }, { "epoch": 0.24275227997330762, "grad_norm": 0.374379426240921, "learning_rate": 0.0001673018254563641, "loss": 0.5987, "step": 3274 }, { "epoch": 0.24282642544672647, "grad_norm": 0.3393305242061615, "learning_rate": 0.00016729182295573895, "loss": 0.4919, "step": 3275 }, { "epoch": 0.24290057092014533, "grad_norm": 0.35261082649230957, "learning_rate": 0.0001672818204551138, "loss": 0.509, "step": 3276 }, { "epoch": 0.24297471639356416, "grad_norm": 0.4264230728149414, "learning_rate": 0.00016727181795448862, "loss": 0.5499, "step": 3277 }, { "epoch": 0.24304886186698302, "grad_norm": 0.3755591809749603, "learning_rate": 0.0001672618154538635, "loss": 0.5349, "step": 3278 }, { "epoch": 0.24312300734040188, "grad_norm": 0.36761850118637085, "learning_rate": 0.0001672518129532383, "loss": 0.5361, "step": 3279 }, { "epoch": 0.2431971528138207, "grad_norm": 0.40554898977279663, "learning_rate": 0.00016724181045261316, "loss": 0.5713, "step": 3280 }, { "epoch": 0.24327129828723956, "grad_norm": 0.3479567766189575, "learning_rate": 0.000167231807951988, "loss": 0.5274, "step": 3281 }, { "epoch": 0.24334544376065842, "grad_norm": 0.35743218660354614, "learning_rate": 0.00016722180545136286, "loss": 0.5355, "step": 3282 }, { "epoch": 0.24341958923407725, "grad_norm": 0.38218212127685547, "learning_rate": 0.00016721180295073767, "loss": 0.5369, "step": 3283 }, { "epoch": 0.2434937347074961, "grad_norm": 0.37711405754089355, "learning_rate": 0.00016720180045011254, "loss": 0.5294, "step": 3284 }, { "epoch": 0.24356788018091496, "grad_norm": 0.3678039014339447, "learning_rate": 0.00016719179794948738, "loss": 0.5126, "step": 3285 }, { "epoch": 0.2436420256543338, "grad_norm": 0.39531219005584717, "learning_rate": 0.00016718179544886224, "loss": 0.5445, "step": 3286 }, { "epoch": 0.24371617112775265, "grad_norm": 0.39402228593826294, "learning_rate": 0.00016717179294823705, "loss": 0.5583, "step": 3287 }, { "epoch": 0.2437903166011715, "grad_norm": 0.3500918447971344, "learning_rate": 0.0001671617904476119, "loss": 0.499, "step": 3288 }, { "epoch": 0.24386446207459034, "grad_norm": 0.39370808005332947, "learning_rate": 0.00016715178794698675, "loss": 0.5564, "step": 3289 }, { "epoch": 0.2439386075480092, "grad_norm": 0.39286309480667114, "learning_rate": 0.0001671417854463616, "loss": 0.5927, "step": 3290 }, { "epoch": 0.24401275302142805, "grad_norm": 0.3722301125526428, "learning_rate": 0.00016713178294573642, "loss": 0.5214, "step": 3291 }, { "epoch": 0.24408689849484688, "grad_norm": 0.3561072051525116, "learning_rate": 0.0001671217804451113, "loss": 0.5394, "step": 3292 }, { "epoch": 0.24416104396826574, "grad_norm": 0.3468705117702484, "learning_rate": 0.00016711177794448613, "loss": 0.5247, "step": 3293 }, { "epoch": 0.2442351894416846, "grad_norm": 0.37846288084983826, "learning_rate": 0.00016710177544386096, "loss": 0.5172, "step": 3294 }, { "epoch": 0.24430933491510343, "grad_norm": 0.4013006091117859, "learning_rate": 0.0001670917729432358, "loss": 0.5296, "step": 3295 }, { "epoch": 0.24438348038852228, "grad_norm": 0.3500313460826874, "learning_rate": 0.00016708177044261066, "loss": 0.5214, "step": 3296 }, { "epoch": 0.24445762586194114, "grad_norm": 0.3613377809524536, "learning_rate": 0.0001670717679419855, "loss": 0.5305, "step": 3297 }, { "epoch": 0.24453177133535997, "grad_norm": 0.3617749810218811, "learning_rate": 0.00016706176544136034, "loss": 0.5451, "step": 3298 }, { "epoch": 0.24460591680877883, "grad_norm": 0.35211071372032166, "learning_rate": 0.0001670517629407352, "loss": 0.5112, "step": 3299 }, { "epoch": 0.24468006228219769, "grad_norm": 0.39198848605155945, "learning_rate": 0.00016704176044011004, "loss": 0.5196, "step": 3300 }, { "epoch": 0.24475420775561652, "grad_norm": 0.3789917826652527, "learning_rate": 0.00016703175793948488, "loss": 0.5449, "step": 3301 }, { "epoch": 0.24482835322903537, "grad_norm": 0.34538498520851135, "learning_rate": 0.00016702175543885971, "loss": 0.4943, "step": 3302 }, { "epoch": 0.2449024987024542, "grad_norm": 0.39300164580345154, "learning_rate": 0.00016701175293823458, "loss": 0.5815, "step": 3303 }, { "epoch": 0.24497664417587306, "grad_norm": 0.34699195623397827, "learning_rate": 0.00016700175043760942, "loss": 0.5249, "step": 3304 }, { "epoch": 0.24505078964929192, "grad_norm": 0.37451696395874023, "learning_rate": 0.00016699174793698425, "loss": 0.5553, "step": 3305 }, { "epoch": 0.24512493512271075, "grad_norm": 0.3739456832408905, "learning_rate": 0.0001669817454363591, "loss": 0.5553, "step": 3306 }, { "epoch": 0.2451990805961296, "grad_norm": 0.36084166169166565, "learning_rate": 0.00016697174293573395, "loss": 0.5369, "step": 3307 }, { "epoch": 0.24527322606954846, "grad_norm": 0.3772769272327423, "learning_rate": 0.0001669617404351088, "loss": 0.5348, "step": 3308 }, { "epoch": 0.2453473715429673, "grad_norm": 0.3716500699520111, "learning_rate": 0.00016695173793448363, "loss": 0.5392, "step": 3309 }, { "epoch": 0.24542151701638615, "grad_norm": 0.3538169860839844, "learning_rate": 0.00016694173543385846, "loss": 0.5435, "step": 3310 }, { "epoch": 0.245495662489805, "grad_norm": 0.38239946961402893, "learning_rate": 0.00016693173293323333, "loss": 0.5595, "step": 3311 }, { "epoch": 0.24556980796322384, "grad_norm": 0.40918490290641785, "learning_rate": 0.00016692173043260817, "loss": 0.5461, "step": 3312 }, { "epoch": 0.2456439534366427, "grad_norm": 0.430387020111084, "learning_rate": 0.000166911727931983, "loss": 0.5247, "step": 3313 }, { "epoch": 0.24571809891006155, "grad_norm": 0.4003908932209015, "learning_rate": 0.00016690172543135784, "loss": 0.5841, "step": 3314 }, { "epoch": 0.24579224438348038, "grad_norm": 0.3492433726787567, "learning_rate": 0.0001668917229307327, "loss": 0.5144, "step": 3315 }, { "epoch": 0.24586638985689924, "grad_norm": 0.3741854429244995, "learning_rate": 0.00016688172043010751, "loss": 0.5382, "step": 3316 }, { "epoch": 0.2459405353303181, "grad_norm": 0.37713325023651123, "learning_rate": 0.00016687171792948238, "loss": 0.5068, "step": 3317 }, { "epoch": 0.24601468080373692, "grad_norm": 0.35915523767471313, "learning_rate": 0.00016686171542885722, "loss": 0.5269, "step": 3318 }, { "epoch": 0.24608882627715578, "grad_norm": 0.4032568633556366, "learning_rate": 0.00016685171292823208, "loss": 0.6077, "step": 3319 }, { "epoch": 0.24616297175057464, "grad_norm": 0.36324208974838257, "learning_rate": 0.0001668417104276069, "loss": 0.5022, "step": 3320 }, { "epoch": 0.24623711722399347, "grad_norm": 0.359764039516449, "learning_rate": 0.00016683170792698175, "loss": 0.52, "step": 3321 }, { "epoch": 0.24631126269741233, "grad_norm": 0.4222489893436432, "learning_rate": 0.0001668217054263566, "loss": 0.6153, "step": 3322 }, { "epoch": 0.24638540817083118, "grad_norm": 0.36787399649620056, "learning_rate": 0.00016681170292573146, "loss": 0.5353, "step": 3323 }, { "epoch": 0.24645955364425, "grad_norm": 0.39032626152038574, "learning_rate": 0.00016680170042510627, "loss": 0.5547, "step": 3324 }, { "epoch": 0.24653369911766887, "grad_norm": 0.36440929770469666, "learning_rate": 0.00016679169792448113, "loss": 0.5341, "step": 3325 }, { "epoch": 0.24660784459108773, "grad_norm": 0.35474568605422974, "learning_rate": 0.00016678169542385597, "loss": 0.567, "step": 3326 }, { "epoch": 0.24668199006450656, "grad_norm": 0.36861103773117065, "learning_rate": 0.00016677169292323083, "loss": 0.5342, "step": 3327 }, { "epoch": 0.24675613553792541, "grad_norm": 0.3693939745426178, "learning_rate": 0.00016676169042260564, "loss": 0.5267, "step": 3328 }, { "epoch": 0.24683028101134424, "grad_norm": 0.3834732472896576, "learning_rate": 0.0001667516879219805, "loss": 0.5621, "step": 3329 }, { "epoch": 0.2469044264847631, "grad_norm": 0.3614034950733185, "learning_rate": 0.00016674168542135534, "loss": 0.5515, "step": 3330 }, { "epoch": 0.24697857195818196, "grad_norm": 0.3790854811668396, "learning_rate": 0.00016673168292073018, "loss": 0.5501, "step": 3331 }, { "epoch": 0.2470527174316008, "grad_norm": 0.3647060990333557, "learning_rate": 0.00016672168042010504, "loss": 0.5599, "step": 3332 }, { "epoch": 0.24712686290501965, "grad_norm": 0.3690299689769745, "learning_rate": 0.00016671167791947988, "loss": 0.5591, "step": 3333 }, { "epoch": 0.2472010083784385, "grad_norm": 0.39238718152046204, "learning_rate": 0.00016670167541885472, "loss": 0.5774, "step": 3334 }, { "epoch": 0.24727515385185733, "grad_norm": 0.35550549626350403, "learning_rate": 0.00016669167291822955, "loss": 0.5105, "step": 3335 }, { "epoch": 0.2473492993252762, "grad_norm": 0.35920530557632446, "learning_rate": 0.00016668167041760442, "loss": 0.5503, "step": 3336 }, { "epoch": 0.24742344479869505, "grad_norm": 0.39490410685539246, "learning_rate": 0.00016667166791697926, "loss": 0.5761, "step": 3337 }, { "epoch": 0.24749759027211388, "grad_norm": 0.3611270785331726, "learning_rate": 0.00016666166541635412, "loss": 0.5675, "step": 3338 }, { "epoch": 0.24757173574553273, "grad_norm": 0.36728212237358093, "learning_rate": 0.00016665166291572893, "loss": 0.5536, "step": 3339 }, { "epoch": 0.2476458812189516, "grad_norm": 0.37420517206192017, "learning_rate": 0.0001666416604151038, "loss": 0.5226, "step": 3340 }, { "epoch": 0.24772002669237042, "grad_norm": 0.3478246331214905, "learning_rate": 0.00016663165791447863, "loss": 0.5118, "step": 3341 }, { "epoch": 0.24779417216578928, "grad_norm": 0.35498911142349243, "learning_rate": 0.00016662165541385347, "loss": 0.5381, "step": 3342 }, { "epoch": 0.24786831763920814, "grad_norm": 0.36117345094680786, "learning_rate": 0.0001666116529132283, "loss": 0.5432, "step": 3343 }, { "epoch": 0.24794246311262697, "grad_norm": 0.37840744853019714, "learning_rate": 0.00016660165041260317, "loss": 0.6028, "step": 3344 }, { "epoch": 0.24801660858604582, "grad_norm": 0.3741895854473114, "learning_rate": 0.000166591647911978, "loss": 0.5033, "step": 3345 }, { "epoch": 0.24809075405946468, "grad_norm": 0.4101541042327881, "learning_rate": 0.00016658164541135284, "loss": 0.5409, "step": 3346 }, { "epoch": 0.2481648995328835, "grad_norm": 0.3451301157474518, "learning_rate": 0.00016657164291072768, "loss": 0.4983, "step": 3347 }, { "epoch": 0.24823904500630237, "grad_norm": 0.3356908857822418, "learning_rate": 0.00016656164041010255, "loss": 0.4899, "step": 3348 }, { "epoch": 0.24831319047972122, "grad_norm": 0.36115771532058716, "learning_rate": 0.00016655163790947738, "loss": 0.513, "step": 3349 }, { "epoch": 0.24838733595314005, "grad_norm": 0.3728066682815552, "learning_rate": 0.00016654163540885222, "loss": 0.56, "step": 3350 }, { "epoch": 0.2484614814265589, "grad_norm": 0.3772042989730835, "learning_rate": 0.00016653163290822706, "loss": 0.5347, "step": 3351 }, { "epoch": 0.24853562689997777, "grad_norm": 0.4079764485359192, "learning_rate": 0.00016652163040760192, "loss": 0.5776, "step": 3352 }, { "epoch": 0.2486097723733966, "grad_norm": 0.3886663019657135, "learning_rate": 0.00016651162790697673, "loss": 0.5649, "step": 3353 }, { "epoch": 0.24868391784681546, "grad_norm": 0.3470710813999176, "learning_rate": 0.0001665016254063516, "loss": 0.5268, "step": 3354 }, { "epoch": 0.2487580633202343, "grad_norm": 0.3889142870903015, "learning_rate": 0.00016649162290572643, "loss": 0.5702, "step": 3355 }, { "epoch": 0.24883220879365314, "grad_norm": 0.3424304127693176, "learning_rate": 0.0001664816204051013, "loss": 0.5152, "step": 3356 }, { "epoch": 0.248906354267072, "grad_norm": 0.3567855656147003, "learning_rate": 0.0001664716179044761, "loss": 0.5441, "step": 3357 }, { "epoch": 0.24898049974049083, "grad_norm": 0.38103601336479187, "learning_rate": 0.00016646161540385097, "loss": 0.5619, "step": 3358 }, { "epoch": 0.2490546452139097, "grad_norm": 0.3546956181526184, "learning_rate": 0.0001664516129032258, "loss": 0.4998, "step": 3359 }, { "epoch": 0.24912879068732854, "grad_norm": 0.3830730617046356, "learning_rate": 0.00016644161040260067, "loss": 0.5265, "step": 3360 }, { "epoch": 0.24920293616074737, "grad_norm": 0.34359580278396606, "learning_rate": 0.00016643160790197548, "loss": 0.512, "step": 3361 }, { "epoch": 0.24927708163416623, "grad_norm": 0.3535948693752289, "learning_rate": 0.00016642160540135035, "loss": 0.5116, "step": 3362 }, { "epoch": 0.2493512271075851, "grad_norm": 0.36408090591430664, "learning_rate": 0.00016641160290072518, "loss": 0.4832, "step": 3363 }, { "epoch": 0.24942537258100392, "grad_norm": 0.38519811630249023, "learning_rate": 0.00016640160040010005, "loss": 0.5237, "step": 3364 }, { "epoch": 0.24949951805442278, "grad_norm": 0.3399234116077423, "learning_rate": 0.00016639159789947488, "loss": 0.476, "step": 3365 }, { "epoch": 0.24957366352784163, "grad_norm": 0.36859890818595886, "learning_rate": 0.00016638159539884972, "loss": 0.5107, "step": 3366 }, { "epoch": 0.24964780900126046, "grad_norm": 0.37905070185661316, "learning_rate": 0.00016637159289822456, "loss": 0.533, "step": 3367 }, { "epoch": 0.24972195447467932, "grad_norm": 0.37422215938568115, "learning_rate": 0.0001663615903975994, "loss": 0.5544, "step": 3368 }, { "epoch": 0.24979609994809818, "grad_norm": 0.3416261672973633, "learning_rate": 0.00016635158789697426, "loss": 0.4916, "step": 3369 }, { "epoch": 0.249870245421517, "grad_norm": 0.3535440266132355, "learning_rate": 0.0001663415853963491, "loss": 0.527, "step": 3370 }, { "epoch": 0.24994439089493586, "grad_norm": 0.36488503217697144, "learning_rate": 0.00016633158289572393, "loss": 0.5572, "step": 3371 }, { "epoch": 0.2500185363683547, "grad_norm": 0.3686935603618622, "learning_rate": 0.00016632158039509877, "loss": 0.542, "step": 3372 }, { "epoch": 0.2500926818417736, "grad_norm": 0.3625973165035248, "learning_rate": 0.00016631157789447364, "loss": 0.5394, "step": 3373 }, { "epoch": 0.2501668273151924, "grad_norm": 0.3458068370819092, "learning_rate": 0.00016630157539384847, "loss": 0.5245, "step": 3374 }, { "epoch": 0.25024097278861124, "grad_norm": 0.37482860684394836, "learning_rate": 0.00016629157289322334, "loss": 0.5595, "step": 3375 }, { "epoch": 0.2503151182620301, "grad_norm": 0.3589373230934143, "learning_rate": 0.00016628157039259815, "loss": 0.5376, "step": 3376 }, { "epoch": 0.25038926373544895, "grad_norm": 0.35633915662765503, "learning_rate": 0.000166271567891973, "loss": 0.4997, "step": 3377 }, { "epoch": 0.2504634092088678, "grad_norm": 0.3857879936695099, "learning_rate": 0.00016626156539134785, "loss": 0.5542, "step": 3378 }, { "epoch": 0.25053755468228667, "grad_norm": 0.3769915699958801, "learning_rate": 0.00016625156289072268, "loss": 0.5487, "step": 3379 }, { "epoch": 0.2506117001557055, "grad_norm": 0.3502064049243927, "learning_rate": 0.00016624156039009752, "loss": 0.4881, "step": 3380 }, { "epoch": 0.2506858456291243, "grad_norm": 0.3929956555366516, "learning_rate": 0.00016623155788947239, "loss": 0.6253, "step": 3381 }, { "epoch": 0.2507599911025432, "grad_norm": 0.3800484836101532, "learning_rate": 0.00016622155538884722, "loss": 0.5237, "step": 3382 }, { "epoch": 0.25083413657596204, "grad_norm": 0.37584468722343445, "learning_rate": 0.00016621155288822206, "loss": 0.5423, "step": 3383 }, { "epoch": 0.25090828204938087, "grad_norm": 0.3630279004573822, "learning_rate": 0.0001662015503875969, "loss": 0.5374, "step": 3384 }, { "epoch": 0.25098242752279976, "grad_norm": 0.3600642681121826, "learning_rate": 0.00016619154788697176, "loss": 0.5154, "step": 3385 }, { "epoch": 0.2510565729962186, "grad_norm": 0.36982211470603943, "learning_rate": 0.0001661815453863466, "loss": 0.5094, "step": 3386 }, { "epoch": 0.2511307184696374, "grad_norm": 0.4287145733833313, "learning_rate": 0.00016617154288572144, "loss": 0.5588, "step": 3387 }, { "epoch": 0.2512048639430563, "grad_norm": 0.39158737659454346, "learning_rate": 0.00016616154038509627, "loss": 0.5114, "step": 3388 }, { "epoch": 0.25127900941647513, "grad_norm": 0.36508360505104065, "learning_rate": 0.00016615153788447114, "loss": 0.5804, "step": 3389 }, { "epoch": 0.25135315488989396, "grad_norm": 0.3532850444316864, "learning_rate": 0.00016614153538384595, "loss": 0.4979, "step": 3390 }, { "epoch": 0.25142730036331284, "grad_norm": 0.3402103781700134, "learning_rate": 0.0001661315328832208, "loss": 0.5146, "step": 3391 }, { "epoch": 0.2515014458367317, "grad_norm": 0.3879495859146118, "learning_rate": 0.00016612153038259565, "loss": 0.555, "step": 3392 }, { "epoch": 0.2515755913101505, "grad_norm": 0.3709789514541626, "learning_rate": 0.0001661115278819705, "loss": 0.5373, "step": 3393 }, { "epoch": 0.2516497367835694, "grad_norm": 0.3992503583431244, "learning_rate": 0.00016610152538134532, "loss": 0.5727, "step": 3394 }, { "epoch": 0.2517238822569882, "grad_norm": 0.3610248565673828, "learning_rate": 0.0001660915228807202, "loss": 0.5366, "step": 3395 }, { "epoch": 0.25179802773040705, "grad_norm": 0.3760262429714203, "learning_rate": 0.00016608152038009502, "loss": 0.5582, "step": 3396 }, { "epoch": 0.25187217320382593, "grad_norm": 0.3598494529724121, "learning_rate": 0.0001660715178794699, "loss": 0.5256, "step": 3397 }, { "epoch": 0.25194631867724476, "grad_norm": 0.3594709038734436, "learning_rate": 0.00016606151537884473, "loss": 0.5157, "step": 3398 }, { "epoch": 0.2520204641506636, "grad_norm": 0.34947875142097473, "learning_rate": 0.00016605151287821956, "loss": 0.485, "step": 3399 }, { "epoch": 0.2520946096240825, "grad_norm": 0.36437126994132996, "learning_rate": 0.0001660415103775944, "loss": 0.5344, "step": 3400 }, { "epoch": 0.2521687550975013, "grad_norm": 0.3770104944705963, "learning_rate": 0.00016603150787696926, "loss": 0.5411, "step": 3401 }, { "epoch": 0.25224290057092014, "grad_norm": 0.37300336360931396, "learning_rate": 0.0001660215053763441, "loss": 0.516, "step": 3402 }, { "epoch": 0.25231704604433897, "grad_norm": 0.36095839738845825, "learning_rate": 0.00016601150287571894, "loss": 0.5501, "step": 3403 }, { "epoch": 0.25239119151775785, "grad_norm": 0.3592807352542877, "learning_rate": 0.00016600150037509377, "loss": 0.52, "step": 3404 }, { "epoch": 0.2524653369911767, "grad_norm": 0.38850921392440796, "learning_rate": 0.0001659914978744686, "loss": 0.5407, "step": 3405 }, { "epoch": 0.2525394824645955, "grad_norm": 0.3501318693161011, "learning_rate": 0.00016598149537384348, "loss": 0.5003, "step": 3406 }, { "epoch": 0.2526136279380144, "grad_norm": 0.38126492500305176, "learning_rate": 0.0001659714928732183, "loss": 0.5972, "step": 3407 }, { "epoch": 0.2526877734114332, "grad_norm": 0.36437365412712097, "learning_rate": 0.00016596149037259318, "loss": 0.5336, "step": 3408 }, { "epoch": 0.25276191888485205, "grad_norm": 0.3672381341457367, "learning_rate": 0.000165951487871968, "loss": 0.5327, "step": 3409 }, { "epoch": 0.25283606435827094, "grad_norm": 0.3542165458202362, "learning_rate": 0.00016594148537134285, "loss": 0.5076, "step": 3410 }, { "epoch": 0.25291020983168977, "grad_norm": 0.366530179977417, "learning_rate": 0.0001659314828707177, "loss": 0.5306, "step": 3411 }, { "epoch": 0.2529843553051086, "grad_norm": 0.3631214201450348, "learning_rate": 0.00016592148037009255, "loss": 0.5408, "step": 3412 }, { "epoch": 0.2530585007785275, "grad_norm": 0.3585284948348999, "learning_rate": 0.00016591147786946736, "loss": 0.481, "step": 3413 }, { "epoch": 0.2531326462519463, "grad_norm": 0.4147912859916687, "learning_rate": 0.00016590147536884223, "loss": 0.581, "step": 3414 }, { "epoch": 0.25320679172536514, "grad_norm": 0.357879638671875, "learning_rate": 0.00016589147286821706, "loss": 0.5218, "step": 3415 }, { "epoch": 0.25328093719878403, "grad_norm": 0.36036407947540283, "learning_rate": 0.0001658814703675919, "loss": 0.5268, "step": 3416 }, { "epoch": 0.25335508267220286, "grad_norm": 0.3544567823410034, "learning_rate": 0.00016587146786696674, "loss": 0.5139, "step": 3417 }, { "epoch": 0.2534292281456217, "grad_norm": 0.3581709861755371, "learning_rate": 0.0001658614653663416, "loss": 0.5605, "step": 3418 }, { "epoch": 0.2535033736190406, "grad_norm": 0.3658219873905182, "learning_rate": 0.00016585146286571644, "loss": 0.5052, "step": 3419 }, { "epoch": 0.2535775190924594, "grad_norm": 0.3350900709629059, "learning_rate": 0.00016584146036509128, "loss": 0.5364, "step": 3420 }, { "epoch": 0.25365166456587823, "grad_norm": 0.35622137784957886, "learning_rate": 0.00016583145786446611, "loss": 0.5334, "step": 3421 }, { "epoch": 0.2537258100392971, "grad_norm": 0.35290130972862244, "learning_rate": 0.00016582145536384098, "loss": 0.5159, "step": 3422 }, { "epoch": 0.25379995551271595, "grad_norm": 0.34683462977409363, "learning_rate": 0.00016581145286321582, "loss": 0.5138, "step": 3423 }, { "epoch": 0.2538741009861348, "grad_norm": 0.3926638960838318, "learning_rate": 0.00016580145036259065, "loss": 0.5497, "step": 3424 }, { "epoch": 0.25394824645955366, "grad_norm": 0.35471686720848083, "learning_rate": 0.0001657914478619655, "loss": 0.494, "step": 3425 }, { "epoch": 0.2540223919329725, "grad_norm": 0.37422633171081543, "learning_rate": 0.00016578144536134035, "loss": 0.5698, "step": 3426 }, { "epoch": 0.2540965374063913, "grad_norm": 0.33627283573150635, "learning_rate": 0.00016577144286071516, "loss": 0.5021, "step": 3427 }, { "epoch": 0.2541706828798102, "grad_norm": 0.414602130651474, "learning_rate": 0.00016576144036009003, "loss": 0.5511, "step": 3428 }, { "epoch": 0.25424482835322904, "grad_norm": 0.3549787700176239, "learning_rate": 0.00016575143785946486, "loss": 0.5381, "step": 3429 }, { "epoch": 0.25431897382664786, "grad_norm": 0.359348863363266, "learning_rate": 0.00016574143535883973, "loss": 0.5217, "step": 3430 }, { "epoch": 0.25439311930006675, "grad_norm": 0.3569994270801544, "learning_rate": 0.00016573143285821457, "loss": 0.5266, "step": 3431 }, { "epoch": 0.2544672647734856, "grad_norm": 0.34716153144836426, "learning_rate": 0.0001657214303575894, "loss": 0.5135, "step": 3432 }, { "epoch": 0.2545414102469044, "grad_norm": 0.33863064646720886, "learning_rate": 0.00016571142785696424, "loss": 0.4995, "step": 3433 }, { "epoch": 0.2546155557203233, "grad_norm": 0.4172699451446533, "learning_rate": 0.0001657014253563391, "loss": 0.5892, "step": 3434 }, { "epoch": 0.2546897011937421, "grad_norm": 0.36969268321990967, "learning_rate": 0.00016569142285571394, "loss": 0.5725, "step": 3435 }, { "epoch": 0.25476384666716095, "grad_norm": 0.35275495052337646, "learning_rate": 0.00016568142035508878, "loss": 0.4815, "step": 3436 }, { "epoch": 0.25483799214057984, "grad_norm": 0.34947672486305237, "learning_rate": 0.00016567141785446362, "loss": 0.5166, "step": 3437 }, { "epoch": 0.25491213761399867, "grad_norm": 0.35023146867752075, "learning_rate": 0.00016566141535383848, "loss": 0.5339, "step": 3438 }, { "epoch": 0.2549862830874175, "grad_norm": 0.384124755859375, "learning_rate": 0.00016565141285321332, "loss": 0.5557, "step": 3439 }, { "epoch": 0.2550604285608364, "grad_norm": 0.35352423787117004, "learning_rate": 0.00016564141035258815, "loss": 0.5438, "step": 3440 }, { "epoch": 0.2551345740342552, "grad_norm": 0.3678523898124695, "learning_rate": 0.00016563140785196302, "loss": 0.5132, "step": 3441 }, { "epoch": 0.25520871950767404, "grad_norm": 0.38982394337654114, "learning_rate": 0.00016562140535133783, "loss": 0.5449, "step": 3442 }, { "epoch": 0.2552828649810929, "grad_norm": 0.36352917551994324, "learning_rate": 0.0001656114028507127, "loss": 0.5454, "step": 3443 }, { "epoch": 0.25535701045451176, "grad_norm": 0.36938655376434326, "learning_rate": 0.00016560140035008753, "loss": 0.5524, "step": 3444 }, { "epoch": 0.2554311559279306, "grad_norm": 0.3775148391723633, "learning_rate": 0.0001655913978494624, "loss": 0.5793, "step": 3445 }, { "epoch": 0.25550530140134947, "grad_norm": 0.3852376639842987, "learning_rate": 0.0001655813953488372, "loss": 0.5396, "step": 3446 }, { "epoch": 0.2555794468747683, "grad_norm": 0.3685280978679657, "learning_rate": 0.00016557139284821207, "loss": 0.545, "step": 3447 }, { "epoch": 0.25565359234818713, "grad_norm": 0.36195099353790283, "learning_rate": 0.0001655613903475869, "loss": 0.5318, "step": 3448 }, { "epoch": 0.255727737821606, "grad_norm": 0.4028662145137787, "learning_rate": 0.00016555138784696177, "loss": 0.5424, "step": 3449 }, { "epoch": 0.25580188329502485, "grad_norm": 0.3903498351573944, "learning_rate": 0.00016554138534633658, "loss": 0.5532, "step": 3450 }, { "epoch": 0.2558760287684437, "grad_norm": 0.3621997535228729, "learning_rate": 0.00016553138284571144, "loss": 0.5318, "step": 3451 }, { "epoch": 0.25595017424186256, "grad_norm": 0.37556248903274536, "learning_rate": 0.00016552138034508628, "loss": 0.5483, "step": 3452 }, { "epoch": 0.2560243197152814, "grad_norm": 0.37047436833381653, "learning_rate": 0.00016551137784446112, "loss": 0.5095, "step": 3453 }, { "epoch": 0.2560984651887002, "grad_norm": 0.3619502782821655, "learning_rate": 0.00016550137534383595, "loss": 0.5491, "step": 3454 }, { "epoch": 0.2561726106621191, "grad_norm": 0.3824634253978729, "learning_rate": 0.00016549137284321082, "loss": 0.5265, "step": 3455 }, { "epoch": 0.25624675613553793, "grad_norm": 0.40486422181129456, "learning_rate": 0.00016548137034258566, "loss": 0.5624, "step": 3456 }, { "epoch": 0.25632090160895676, "grad_norm": 0.35604625940322876, "learning_rate": 0.0001654713678419605, "loss": 0.4957, "step": 3457 }, { "epoch": 0.2563950470823756, "grad_norm": 0.3567882478237152, "learning_rate": 0.00016546136534133533, "loss": 0.5221, "step": 3458 }, { "epoch": 0.2564691925557945, "grad_norm": 0.397843599319458, "learning_rate": 0.0001654513628407102, "loss": 0.5952, "step": 3459 }, { "epoch": 0.2565433380292133, "grad_norm": 0.3981975018978119, "learning_rate": 0.00016544136034008503, "loss": 0.544, "step": 3460 }, { "epoch": 0.25661748350263214, "grad_norm": 0.36160212755203247, "learning_rate": 0.00016543135783945987, "loss": 0.5324, "step": 3461 }, { "epoch": 0.256691628976051, "grad_norm": 0.37319839000701904, "learning_rate": 0.0001654213553388347, "loss": 0.5552, "step": 3462 }, { "epoch": 0.25676577444946985, "grad_norm": 0.3729632496833801, "learning_rate": 0.00016541135283820957, "loss": 0.5108, "step": 3463 }, { "epoch": 0.2568399199228887, "grad_norm": 0.35054540634155273, "learning_rate": 0.00016540135033758438, "loss": 0.494, "step": 3464 }, { "epoch": 0.25691406539630757, "grad_norm": 0.365863561630249, "learning_rate": 0.00016539134783695924, "loss": 0.565, "step": 3465 }, { "epoch": 0.2569882108697264, "grad_norm": 0.3928017020225525, "learning_rate": 0.00016538134533633408, "loss": 0.5203, "step": 3466 }, { "epoch": 0.2570623563431452, "grad_norm": 0.35324597358703613, "learning_rate": 0.00016537134283570895, "loss": 0.492, "step": 3467 }, { "epoch": 0.2571365018165641, "grad_norm": 0.3856508433818817, "learning_rate": 0.00016536134033508378, "loss": 0.5051, "step": 3468 }, { "epoch": 0.25721064728998294, "grad_norm": 0.3714316487312317, "learning_rate": 0.00016535133783445862, "loss": 0.5322, "step": 3469 }, { "epoch": 0.25728479276340177, "grad_norm": 0.3490816652774811, "learning_rate": 0.00016534133533383346, "loss": 0.5663, "step": 3470 }, { "epoch": 0.25735893823682066, "grad_norm": 0.3638123869895935, "learning_rate": 0.00016533133283320832, "loss": 0.5332, "step": 3471 }, { "epoch": 0.2574330837102395, "grad_norm": 0.37782374024391174, "learning_rate": 0.00016532133033258316, "loss": 0.5918, "step": 3472 }, { "epoch": 0.2575072291836583, "grad_norm": 0.3462027311325073, "learning_rate": 0.000165311327831958, "loss": 0.5121, "step": 3473 }, { "epoch": 0.2575813746570772, "grad_norm": 0.3473947048187256, "learning_rate": 0.00016530132533133286, "loss": 0.4879, "step": 3474 }, { "epoch": 0.25765552013049603, "grad_norm": 0.36277008056640625, "learning_rate": 0.0001652913228307077, "loss": 0.4979, "step": 3475 }, { "epoch": 0.25772966560391486, "grad_norm": 0.3669394254684448, "learning_rate": 0.00016528132033008253, "loss": 0.5535, "step": 3476 }, { "epoch": 0.25780381107733374, "grad_norm": 0.39962390065193176, "learning_rate": 0.00016527131782945737, "loss": 0.5827, "step": 3477 }, { "epoch": 0.2578779565507526, "grad_norm": 0.3970809876918793, "learning_rate": 0.00016526131532883223, "loss": 0.5608, "step": 3478 }, { "epoch": 0.2579521020241714, "grad_norm": 0.373417466878891, "learning_rate": 0.00016525131282820704, "loss": 0.5457, "step": 3479 }, { "epoch": 0.2580262474975903, "grad_norm": 0.3557855486869812, "learning_rate": 0.0001652413103275819, "loss": 0.5432, "step": 3480 }, { "epoch": 0.2581003929710091, "grad_norm": 0.3637807369232178, "learning_rate": 0.00016523130782695675, "loss": 0.5014, "step": 3481 }, { "epoch": 0.25817453844442795, "grad_norm": 0.3623547852039337, "learning_rate": 0.0001652213053263316, "loss": 0.525, "step": 3482 }, { "epoch": 0.25824868391784683, "grad_norm": 0.3836563527584076, "learning_rate": 0.00016521130282570642, "loss": 0.5868, "step": 3483 }, { "epoch": 0.25832282939126566, "grad_norm": 0.3626660704612732, "learning_rate": 0.00016520130032508128, "loss": 0.5028, "step": 3484 }, { "epoch": 0.2583969748646845, "grad_norm": 0.3482823371887207, "learning_rate": 0.00016519129782445612, "loss": 0.523, "step": 3485 }, { "epoch": 0.2584711203381034, "grad_norm": 0.350510835647583, "learning_rate": 0.00016518129532383099, "loss": 0.55, "step": 3486 }, { "epoch": 0.2585452658115222, "grad_norm": 0.41747695207595825, "learning_rate": 0.0001651712928232058, "loss": 0.574, "step": 3487 }, { "epoch": 0.25861941128494104, "grad_norm": 0.4050782322883606, "learning_rate": 0.00016516129032258066, "loss": 0.5141, "step": 3488 }, { "epoch": 0.2586935567583599, "grad_norm": 0.3684224486351013, "learning_rate": 0.0001651512878219555, "loss": 0.5469, "step": 3489 }, { "epoch": 0.25876770223177875, "grad_norm": 0.41359224915504456, "learning_rate": 0.00016514128532133033, "loss": 0.5564, "step": 3490 }, { "epoch": 0.2588418477051976, "grad_norm": 0.38042330741882324, "learning_rate": 0.00016513128282070517, "loss": 0.534, "step": 3491 }, { "epoch": 0.25891599317861647, "grad_norm": 0.37723445892333984, "learning_rate": 0.00016512128032008004, "loss": 0.533, "step": 3492 }, { "epoch": 0.2589901386520353, "grad_norm": 0.3962837755680084, "learning_rate": 0.00016511127781945487, "loss": 0.5648, "step": 3493 }, { "epoch": 0.2590642841254541, "grad_norm": 0.3665388226509094, "learning_rate": 0.0001651012753188297, "loss": 0.5586, "step": 3494 }, { "epoch": 0.259138429598873, "grad_norm": 0.341927170753479, "learning_rate": 0.00016509127281820455, "loss": 0.5193, "step": 3495 }, { "epoch": 0.25921257507229184, "grad_norm": 0.36235079169273376, "learning_rate": 0.0001650812703175794, "loss": 0.5738, "step": 3496 }, { "epoch": 0.25928672054571067, "grad_norm": 0.3576965034008026, "learning_rate": 0.00016507126781695425, "loss": 0.5431, "step": 3497 }, { "epoch": 0.25936086601912955, "grad_norm": 0.34627196192741394, "learning_rate": 0.00016506126531632908, "loss": 0.5098, "step": 3498 }, { "epoch": 0.2594350114925484, "grad_norm": 0.3807649314403534, "learning_rate": 0.00016505126281570392, "loss": 0.608, "step": 3499 }, { "epoch": 0.2595091569659672, "grad_norm": 0.35043641924858093, "learning_rate": 0.00016504126031507879, "loss": 0.5647, "step": 3500 }, { "epoch": 0.2595833024393861, "grad_norm": 0.3786196708679199, "learning_rate": 0.00016503125781445362, "loss": 0.5341, "step": 3501 }, { "epoch": 0.25965744791280493, "grad_norm": 0.3508590757846832, "learning_rate": 0.00016502125531382846, "loss": 0.4964, "step": 3502 }, { "epoch": 0.25973159338622376, "grad_norm": 0.3844590187072754, "learning_rate": 0.0001650112528132033, "loss": 0.5717, "step": 3503 }, { "epoch": 0.25980573885964264, "grad_norm": 0.3518267273902893, "learning_rate": 0.00016500125031257816, "loss": 0.5085, "step": 3504 }, { "epoch": 0.2598798843330615, "grad_norm": 0.3602518141269684, "learning_rate": 0.000164991247811953, "loss": 0.5622, "step": 3505 }, { "epoch": 0.2599540298064803, "grad_norm": 0.3580520451068878, "learning_rate": 0.00016498124531132784, "loss": 0.5211, "step": 3506 }, { "epoch": 0.2600281752798992, "grad_norm": 0.36657270789146423, "learning_rate": 0.0001649712428107027, "loss": 0.5304, "step": 3507 }, { "epoch": 0.260102320753318, "grad_norm": 0.3495907187461853, "learning_rate": 0.00016496124031007754, "loss": 0.5432, "step": 3508 }, { "epoch": 0.26017646622673685, "grad_norm": 0.41866564750671387, "learning_rate": 0.00016495123780945237, "loss": 0.5506, "step": 3509 }, { "epoch": 0.26025061170015573, "grad_norm": 0.3942282199859619, "learning_rate": 0.0001649412353088272, "loss": 0.5104, "step": 3510 }, { "epoch": 0.26032475717357456, "grad_norm": 0.3698032796382904, "learning_rate": 0.00016493123280820208, "loss": 0.5041, "step": 3511 }, { "epoch": 0.2603989026469934, "grad_norm": 0.37440547347068787, "learning_rate": 0.0001649212303075769, "loss": 0.5812, "step": 3512 }, { "epoch": 0.2604730481204123, "grad_norm": 0.3441199064254761, "learning_rate": 0.00016491122780695175, "loss": 0.5024, "step": 3513 }, { "epoch": 0.2605471935938311, "grad_norm": 0.38282033801078796, "learning_rate": 0.0001649012253063266, "loss": 0.5516, "step": 3514 }, { "epoch": 0.26062133906724994, "grad_norm": 0.37268969416618347, "learning_rate": 0.00016489122280570145, "loss": 0.5164, "step": 3515 }, { "epoch": 0.26069548454066876, "grad_norm": 0.3579404652118683, "learning_rate": 0.00016488122030507626, "loss": 0.5071, "step": 3516 }, { "epoch": 0.26076963001408765, "grad_norm": 0.3635385036468506, "learning_rate": 0.00016487121780445112, "loss": 0.5434, "step": 3517 }, { "epoch": 0.2608437754875065, "grad_norm": 0.38875558972358704, "learning_rate": 0.00016486121530382596, "loss": 0.5457, "step": 3518 }, { "epoch": 0.2609179209609253, "grad_norm": 0.3657631278038025, "learning_rate": 0.00016485121280320083, "loss": 0.5407, "step": 3519 }, { "epoch": 0.2609920664343442, "grad_norm": 0.3633245527744293, "learning_rate": 0.00016484121030257564, "loss": 0.5145, "step": 3520 }, { "epoch": 0.261066211907763, "grad_norm": 0.35065579414367676, "learning_rate": 0.0001648312078019505, "loss": 0.5064, "step": 3521 }, { "epoch": 0.26114035738118185, "grad_norm": 0.3527795970439911, "learning_rate": 0.00016482120530132534, "loss": 0.5104, "step": 3522 }, { "epoch": 0.26121450285460074, "grad_norm": 0.37473946809768677, "learning_rate": 0.0001648112028007002, "loss": 0.5023, "step": 3523 }, { "epoch": 0.26128864832801957, "grad_norm": 0.3745957911014557, "learning_rate": 0.000164801200300075, "loss": 0.5209, "step": 3524 }, { "epoch": 0.2613627938014384, "grad_norm": 0.37029263377189636, "learning_rate": 0.00016479119779944988, "loss": 0.5277, "step": 3525 }, { "epoch": 0.2614369392748573, "grad_norm": 0.36668527126312256, "learning_rate": 0.0001647811952988247, "loss": 0.5553, "step": 3526 }, { "epoch": 0.2615110847482761, "grad_norm": 0.38211575150489807, "learning_rate": 0.00016477119279819955, "loss": 0.5691, "step": 3527 }, { "epoch": 0.26158523022169494, "grad_norm": 0.36539211869239807, "learning_rate": 0.0001647611902975744, "loss": 0.5071, "step": 3528 }, { "epoch": 0.2616593756951138, "grad_norm": 0.3878132402896881, "learning_rate": 0.00016475118779694925, "loss": 0.5935, "step": 3529 }, { "epoch": 0.26173352116853266, "grad_norm": 0.354051411151886, "learning_rate": 0.0001647411852963241, "loss": 0.5469, "step": 3530 }, { "epoch": 0.2618076666419515, "grad_norm": 0.3969053328037262, "learning_rate": 0.00016473118279569893, "loss": 0.5476, "step": 3531 }, { "epoch": 0.26188181211537037, "grad_norm": 0.3634618818759918, "learning_rate": 0.00016472118029507376, "loss": 0.5405, "step": 3532 }, { "epoch": 0.2619559575887892, "grad_norm": 0.3615754246711731, "learning_rate": 0.00016471117779444863, "loss": 0.5209, "step": 3533 }, { "epoch": 0.26203010306220803, "grad_norm": 0.341440349817276, "learning_rate": 0.00016470117529382346, "loss": 0.5241, "step": 3534 }, { "epoch": 0.2621042485356269, "grad_norm": 0.33795931935310364, "learning_rate": 0.0001646911727931983, "loss": 0.5094, "step": 3535 }, { "epoch": 0.26217839400904575, "grad_norm": 0.3762549161911011, "learning_rate": 0.00016468117029257314, "loss": 0.5776, "step": 3536 }, { "epoch": 0.2622525394824646, "grad_norm": 0.35096511244773865, "learning_rate": 0.000164671167791948, "loss": 0.5103, "step": 3537 }, { "epoch": 0.26232668495588346, "grad_norm": 0.36555686593055725, "learning_rate": 0.00016466116529132284, "loss": 0.5646, "step": 3538 }, { "epoch": 0.2624008304293023, "grad_norm": 0.3413756489753723, "learning_rate": 0.00016465116279069768, "loss": 0.528, "step": 3539 }, { "epoch": 0.2624749759027211, "grad_norm": 0.3834722340106964, "learning_rate": 0.0001646411602900725, "loss": 0.5755, "step": 3540 }, { "epoch": 0.26254912137614, "grad_norm": 0.35666602849960327, "learning_rate": 0.00016463115778944738, "loss": 0.5535, "step": 3541 }, { "epoch": 0.26262326684955883, "grad_norm": 0.34369757771492004, "learning_rate": 0.00016462115528882221, "loss": 0.5193, "step": 3542 }, { "epoch": 0.26269741232297766, "grad_norm": 0.3385297656059265, "learning_rate": 0.00016461115278819705, "loss": 0.5365, "step": 3543 }, { "epoch": 0.26277155779639655, "grad_norm": 0.374775767326355, "learning_rate": 0.00016460115028757192, "loss": 0.5496, "step": 3544 }, { "epoch": 0.2628457032698154, "grad_norm": 0.3242216110229492, "learning_rate": 0.00016459114778694675, "loss": 0.4762, "step": 3545 }, { "epoch": 0.2629198487432342, "grad_norm": 0.39730891585350037, "learning_rate": 0.0001645811452863216, "loss": 0.6058, "step": 3546 }, { "epoch": 0.2629939942166531, "grad_norm": 0.3913711905479431, "learning_rate": 0.00016457114278569643, "loss": 0.5402, "step": 3547 }, { "epoch": 0.2630681396900719, "grad_norm": 0.37072131037712097, "learning_rate": 0.0001645611402850713, "loss": 0.5468, "step": 3548 }, { "epoch": 0.26314228516349075, "grad_norm": 0.3430386781692505, "learning_rate": 0.00016455113778444613, "loss": 0.4867, "step": 3549 }, { "epoch": 0.26321643063690964, "grad_norm": 0.3564560115337372, "learning_rate": 0.00016454113528382097, "loss": 0.5274, "step": 3550 }, { "epoch": 0.26329057611032847, "grad_norm": 0.3628866374492645, "learning_rate": 0.0001645311327831958, "loss": 0.5137, "step": 3551 }, { "epoch": 0.2633647215837473, "grad_norm": 0.37900957465171814, "learning_rate": 0.00016452113028257067, "loss": 0.5599, "step": 3552 }, { "epoch": 0.2634388670571662, "grad_norm": 0.359286904335022, "learning_rate": 0.00016451112778194548, "loss": 0.5382, "step": 3553 }, { "epoch": 0.263513012530585, "grad_norm": 0.3772462010383606, "learning_rate": 0.00016450112528132034, "loss": 0.5301, "step": 3554 }, { "epoch": 0.26358715800400384, "grad_norm": 0.3481152653694153, "learning_rate": 0.00016449112278069518, "loss": 0.514, "step": 3555 }, { "epoch": 0.2636613034774227, "grad_norm": 0.3644728362560272, "learning_rate": 0.00016448112028007004, "loss": 0.5605, "step": 3556 }, { "epoch": 0.26373544895084156, "grad_norm": 0.3430963158607483, "learning_rate": 0.00016447111777944485, "loss": 0.5265, "step": 3557 }, { "epoch": 0.2638095944242604, "grad_norm": 0.3504129946231842, "learning_rate": 0.00016446111527881972, "loss": 0.525, "step": 3558 }, { "epoch": 0.26388373989767927, "grad_norm": 0.38464096188545227, "learning_rate": 0.00016445111277819455, "loss": 0.5633, "step": 3559 }, { "epoch": 0.2639578853710981, "grad_norm": 0.356923907995224, "learning_rate": 0.00016444111027756942, "loss": 0.5193, "step": 3560 }, { "epoch": 0.26403203084451693, "grad_norm": 0.40190643072128296, "learning_rate": 0.00016443110777694423, "loss": 0.6219, "step": 3561 }, { "epoch": 0.2641061763179358, "grad_norm": 0.396893173456192, "learning_rate": 0.0001644211052763191, "loss": 0.5509, "step": 3562 }, { "epoch": 0.26418032179135464, "grad_norm": 0.35536685585975647, "learning_rate": 0.00016441110277569393, "loss": 0.4779, "step": 3563 }, { "epoch": 0.2642544672647735, "grad_norm": 0.35735395550727844, "learning_rate": 0.00016440110027506877, "loss": 0.5035, "step": 3564 }, { "epoch": 0.26432861273819236, "grad_norm": 0.3965809941291809, "learning_rate": 0.0001643910977744436, "loss": 0.5729, "step": 3565 }, { "epoch": 0.2644027582116112, "grad_norm": 0.36776965856552124, "learning_rate": 0.00016438109527381847, "loss": 0.5187, "step": 3566 }, { "epoch": 0.26447690368503, "grad_norm": 0.3806729316711426, "learning_rate": 0.0001643710927731933, "loss": 0.5815, "step": 3567 }, { "epoch": 0.2645510491584489, "grad_norm": 0.36952105164527893, "learning_rate": 0.00016436109027256814, "loss": 0.5067, "step": 3568 }, { "epoch": 0.26462519463186773, "grad_norm": 0.35566475987434387, "learning_rate": 0.00016435108777194298, "loss": 0.5122, "step": 3569 }, { "epoch": 0.26469934010528656, "grad_norm": 0.39680215716362, "learning_rate": 0.00016434108527131784, "loss": 0.552, "step": 3570 }, { "epoch": 0.26477348557870545, "grad_norm": 0.35840755701065063, "learning_rate": 0.00016433108277069268, "loss": 0.4848, "step": 3571 }, { "epoch": 0.2648476310521243, "grad_norm": 0.3689603805541992, "learning_rate": 0.00016432108027006752, "loss": 0.5141, "step": 3572 }, { "epoch": 0.2649217765255431, "grad_norm": 0.37836790084838867, "learning_rate": 0.00016431107776944235, "loss": 0.5569, "step": 3573 }, { "epoch": 0.26499592199896194, "grad_norm": 0.3657991290092468, "learning_rate": 0.00016430107526881722, "loss": 0.4957, "step": 3574 }, { "epoch": 0.2650700674723808, "grad_norm": 0.37228095531463623, "learning_rate": 0.00016429107276819206, "loss": 0.5544, "step": 3575 }, { "epoch": 0.26514421294579965, "grad_norm": 0.36628949642181396, "learning_rate": 0.0001642810702675669, "loss": 0.503, "step": 3576 }, { "epoch": 0.2652183584192185, "grad_norm": 0.3483349680900574, "learning_rate": 0.00016427106776694176, "loss": 0.5214, "step": 3577 }, { "epoch": 0.26529250389263737, "grad_norm": 0.36473533511161804, "learning_rate": 0.0001642610652663166, "loss": 0.5493, "step": 3578 }, { "epoch": 0.2653666493660562, "grad_norm": 0.3769003450870514, "learning_rate": 0.00016425106276569143, "loss": 0.508, "step": 3579 }, { "epoch": 0.265440794839475, "grad_norm": 0.3708319365978241, "learning_rate": 0.00016424106026506627, "loss": 0.5416, "step": 3580 }, { "epoch": 0.2655149403128939, "grad_norm": 0.3496326208114624, "learning_rate": 0.00016423105776444113, "loss": 0.5198, "step": 3581 }, { "epoch": 0.26558908578631274, "grad_norm": 0.36983317136764526, "learning_rate": 0.00016422105526381597, "loss": 0.5335, "step": 3582 }, { "epoch": 0.26566323125973157, "grad_norm": 0.35786885023117065, "learning_rate": 0.0001642110527631908, "loss": 0.4875, "step": 3583 }, { "epoch": 0.26573737673315045, "grad_norm": 0.3549981415271759, "learning_rate": 0.00016420105026256564, "loss": 0.5292, "step": 3584 }, { "epoch": 0.2658115222065693, "grad_norm": 0.38842570781707764, "learning_rate": 0.0001641910477619405, "loss": 0.537, "step": 3585 }, { "epoch": 0.2658856676799881, "grad_norm": 0.3723103702068329, "learning_rate": 0.00016418104526131534, "loss": 0.5216, "step": 3586 }, { "epoch": 0.265959813153407, "grad_norm": 0.3951936960220337, "learning_rate": 0.00016417104276069018, "loss": 0.5139, "step": 3587 }, { "epoch": 0.26603395862682583, "grad_norm": 0.36500057578086853, "learning_rate": 0.00016416104026006502, "loss": 0.542, "step": 3588 }, { "epoch": 0.26610810410024466, "grad_norm": 0.34768542647361755, "learning_rate": 0.00016415103775943988, "loss": 0.5264, "step": 3589 }, { "epoch": 0.26618224957366354, "grad_norm": 0.3568667471408844, "learning_rate": 0.0001641410352588147, "loss": 0.4985, "step": 3590 }, { "epoch": 0.2662563950470824, "grad_norm": 0.35841837525367737, "learning_rate": 0.00016413103275818956, "loss": 0.535, "step": 3591 }, { "epoch": 0.2663305405205012, "grad_norm": 0.3743859529495239, "learning_rate": 0.0001641210302575644, "loss": 0.5115, "step": 3592 }, { "epoch": 0.2664046859939201, "grad_norm": 0.37434473633766174, "learning_rate": 0.00016411102775693926, "loss": 0.5592, "step": 3593 }, { "epoch": 0.2664788314673389, "grad_norm": 0.3591171205043793, "learning_rate": 0.00016410102525631407, "loss": 0.5453, "step": 3594 }, { "epoch": 0.26655297694075775, "grad_norm": 0.36940115690231323, "learning_rate": 0.00016409102275568893, "loss": 0.5421, "step": 3595 }, { "epoch": 0.26662712241417663, "grad_norm": 0.36266106367111206, "learning_rate": 0.00016408102025506377, "loss": 0.5263, "step": 3596 }, { "epoch": 0.26670126788759546, "grad_norm": 0.3775557279586792, "learning_rate": 0.00016407101775443863, "loss": 0.5757, "step": 3597 }, { "epoch": 0.2667754133610143, "grad_norm": 0.3412711024284363, "learning_rate": 0.00016406101525381344, "loss": 0.49, "step": 3598 }, { "epoch": 0.2668495588344332, "grad_norm": 0.36943742632865906, "learning_rate": 0.0001640510127531883, "loss": 0.528, "step": 3599 }, { "epoch": 0.266923704307852, "grad_norm": 0.34438109397888184, "learning_rate": 0.00016404101025256315, "loss": 0.5428, "step": 3600 }, { "epoch": 0.26699784978127084, "grad_norm": 0.3950587809085846, "learning_rate": 0.00016403100775193798, "loss": 0.5222, "step": 3601 }, { "epoch": 0.2670719952546897, "grad_norm": 0.41965243220329285, "learning_rate": 0.00016402100525131282, "loss": 0.5795, "step": 3602 }, { "epoch": 0.26714614072810855, "grad_norm": 0.3655712902545929, "learning_rate": 0.00016401100275068768, "loss": 0.5016, "step": 3603 }, { "epoch": 0.2672202862015274, "grad_norm": 0.37627744674682617, "learning_rate": 0.00016400100025006252, "loss": 0.5548, "step": 3604 }, { "epoch": 0.26729443167494626, "grad_norm": 0.36326864361763, "learning_rate": 0.00016399099774943736, "loss": 0.552, "step": 3605 }, { "epoch": 0.2673685771483651, "grad_norm": 0.3903508484363556, "learning_rate": 0.0001639809952488122, "loss": 0.516, "step": 3606 }, { "epoch": 0.2674427226217839, "grad_norm": 0.36906635761260986, "learning_rate": 0.00016397099274818706, "loss": 0.5263, "step": 3607 }, { "epoch": 0.2675168680952028, "grad_norm": 0.36555737257003784, "learning_rate": 0.0001639609902475619, "loss": 0.5464, "step": 3608 }, { "epoch": 0.26759101356862164, "grad_norm": 0.3474041521549225, "learning_rate": 0.00016395098774693673, "loss": 0.5545, "step": 3609 }, { "epoch": 0.26766515904204047, "grad_norm": 0.3389364778995514, "learning_rate": 0.0001639409852463116, "loss": 0.5083, "step": 3610 }, { "epoch": 0.26773930451545935, "grad_norm": 0.3546210825443268, "learning_rate": 0.00016393098274568643, "loss": 0.5132, "step": 3611 }, { "epoch": 0.2678134499888782, "grad_norm": 0.3519228398799896, "learning_rate": 0.00016392098024506127, "loss": 0.5185, "step": 3612 }, { "epoch": 0.267887595462297, "grad_norm": 0.3377664089202881, "learning_rate": 0.0001639109777444361, "loss": 0.4824, "step": 3613 }, { "epoch": 0.2679617409357159, "grad_norm": 0.3585679829120636, "learning_rate": 0.00016390097524381097, "loss": 0.516, "step": 3614 }, { "epoch": 0.2680358864091347, "grad_norm": 0.37644606828689575, "learning_rate": 0.0001638909727431858, "loss": 0.574, "step": 3615 }, { "epoch": 0.26811003188255356, "grad_norm": 0.3614829480648041, "learning_rate": 0.00016388097024256065, "loss": 0.5126, "step": 3616 }, { "epoch": 0.26818417735597244, "grad_norm": 0.3700408339500427, "learning_rate": 0.00016387096774193548, "loss": 0.5207, "step": 3617 }, { "epoch": 0.26825832282939127, "grad_norm": 0.34921586513519287, "learning_rate": 0.00016386096524131035, "loss": 0.5036, "step": 3618 }, { "epoch": 0.2683324683028101, "grad_norm": 0.3689384460449219, "learning_rate": 0.00016385096274068519, "loss": 0.5535, "step": 3619 }, { "epoch": 0.268406613776229, "grad_norm": 0.3403341472148895, "learning_rate": 0.00016384096024006002, "loss": 0.5067, "step": 3620 }, { "epoch": 0.2684807592496478, "grad_norm": 0.3739043176174164, "learning_rate": 0.00016383095773943486, "loss": 0.5323, "step": 3621 }, { "epoch": 0.26855490472306665, "grad_norm": 0.38915765285491943, "learning_rate": 0.00016382095523880972, "loss": 0.5965, "step": 3622 }, { "epoch": 0.26862905019648553, "grad_norm": 0.3573678731918335, "learning_rate": 0.00016381095273818456, "loss": 0.5211, "step": 3623 }, { "epoch": 0.26870319566990436, "grad_norm": 0.35617923736572266, "learning_rate": 0.0001638009502375594, "loss": 0.5427, "step": 3624 }, { "epoch": 0.2687773411433232, "grad_norm": 0.37080198526382446, "learning_rate": 0.00016379094773693424, "loss": 0.5469, "step": 3625 }, { "epoch": 0.2688514866167421, "grad_norm": 0.3828301429748535, "learning_rate": 0.0001637809452363091, "loss": 0.54, "step": 3626 }, { "epoch": 0.2689256320901609, "grad_norm": 0.3743877410888672, "learning_rate": 0.0001637709427356839, "loss": 0.5603, "step": 3627 }, { "epoch": 0.26899977756357973, "grad_norm": 0.37353917956352234, "learning_rate": 0.00016376094023505877, "loss": 0.5444, "step": 3628 }, { "epoch": 0.26907392303699856, "grad_norm": 0.3492814898490906, "learning_rate": 0.0001637509377344336, "loss": 0.5232, "step": 3629 }, { "epoch": 0.26914806851041745, "grad_norm": 0.36517807841300964, "learning_rate": 0.00016374093523380847, "loss": 0.5328, "step": 3630 }, { "epoch": 0.2692222139838363, "grad_norm": 0.3457784950733185, "learning_rate": 0.00016373093273318328, "loss": 0.5298, "step": 3631 }, { "epoch": 0.2692963594572551, "grad_norm": 0.3796440362930298, "learning_rate": 0.00016372093023255815, "loss": 0.5836, "step": 3632 }, { "epoch": 0.269370504930674, "grad_norm": 0.34497174620628357, "learning_rate": 0.00016371092773193299, "loss": 0.5083, "step": 3633 }, { "epoch": 0.2694446504040928, "grad_norm": 0.349254846572876, "learning_rate": 0.00016370092523130785, "loss": 0.5328, "step": 3634 }, { "epoch": 0.26951879587751165, "grad_norm": 0.41109952330589294, "learning_rate": 0.00016369092273068266, "loss": 0.5713, "step": 3635 }, { "epoch": 0.26959294135093054, "grad_norm": 0.3780217170715332, "learning_rate": 0.00016368092023005752, "loss": 0.5344, "step": 3636 }, { "epoch": 0.26966708682434937, "grad_norm": 0.36721721291542053, "learning_rate": 0.00016367091772943236, "loss": 0.5398, "step": 3637 }, { "epoch": 0.2697412322977682, "grad_norm": 0.36043769121170044, "learning_rate": 0.0001636609152288072, "loss": 0.5267, "step": 3638 }, { "epoch": 0.2698153777711871, "grad_norm": 0.3424500823020935, "learning_rate": 0.00016365091272818204, "loss": 0.5, "step": 3639 }, { "epoch": 0.2698895232446059, "grad_norm": 0.3427444398403168, "learning_rate": 0.0001636409102275569, "loss": 0.5159, "step": 3640 }, { "epoch": 0.26996366871802474, "grad_norm": 0.3728165328502655, "learning_rate": 0.00016363090772693174, "loss": 0.5318, "step": 3641 }, { "epoch": 0.2700378141914436, "grad_norm": 0.3512375056743622, "learning_rate": 0.00016362090522630657, "loss": 0.4887, "step": 3642 }, { "epoch": 0.27011195966486246, "grad_norm": 0.37110868096351624, "learning_rate": 0.00016361090272568144, "loss": 0.5485, "step": 3643 }, { "epoch": 0.2701861051382813, "grad_norm": 0.39294472336769104, "learning_rate": 0.00016360090022505628, "loss": 0.5924, "step": 3644 }, { "epoch": 0.27026025061170017, "grad_norm": 0.34596896171569824, "learning_rate": 0.0001635908977244311, "loss": 0.4963, "step": 3645 }, { "epoch": 0.270334396085119, "grad_norm": 0.362288236618042, "learning_rate": 0.00016358089522380595, "loss": 0.506, "step": 3646 }, { "epoch": 0.27040854155853783, "grad_norm": 0.3799595832824707, "learning_rate": 0.00016357089272318081, "loss": 0.5325, "step": 3647 }, { "epoch": 0.2704826870319567, "grad_norm": 0.38216033577919006, "learning_rate": 0.00016356089022255565, "loss": 0.5272, "step": 3648 }, { "epoch": 0.27055683250537554, "grad_norm": 0.4113522171974182, "learning_rate": 0.0001635508877219305, "loss": 0.5615, "step": 3649 }, { "epoch": 0.2706309779787944, "grad_norm": 0.35181641578674316, "learning_rate": 0.00016354088522130533, "loss": 0.5323, "step": 3650 }, { "epoch": 0.27070512345221326, "grad_norm": 0.38388359546661377, "learning_rate": 0.0001635308827206802, "loss": 0.5137, "step": 3651 }, { "epoch": 0.2707792689256321, "grad_norm": 0.35873889923095703, "learning_rate": 0.00016352088022005503, "loss": 0.5372, "step": 3652 }, { "epoch": 0.2708534143990509, "grad_norm": 0.368283748626709, "learning_rate": 0.00016351087771942986, "loss": 0.5152, "step": 3653 }, { "epoch": 0.2709275598724698, "grad_norm": 0.3628714382648468, "learning_rate": 0.0001635008752188047, "loss": 0.5286, "step": 3654 }, { "epoch": 0.27100170534588863, "grad_norm": 0.3568710684776306, "learning_rate": 0.00016349087271817956, "loss": 0.5316, "step": 3655 }, { "epoch": 0.27107585081930746, "grad_norm": 0.43285897374153137, "learning_rate": 0.0001634808702175544, "loss": 0.5067, "step": 3656 }, { "epoch": 0.27114999629272635, "grad_norm": 0.36361056566238403, "learning_rate": 0.00016347086771692924, "loss": 0.501, "step": 3657 }, { "epoch": 0.2712241417661452, "grad_norm": 0.3838912844657898, "learning_rate": 0.00016346086521630408, "loss": 0.543, "step": 3658 }, { "epoch": 0.271298287239564, "grad_norm": 0.3721596896648407, "learning_rate": 0.00016345086271567894, "loss": 0.5488, "step": 3659 }, { "epoch": 0.2713724327129829, "grad_norm": 0.3951026201248169, "learning_rate": 0.00016344086021505378, "loss": 0.5784, "step": 3660 }, { "epoch": 0.2714465781864017, "grad_norm": 0.4249403476715088, "learning_rate": 0.00016343085771442861, "loss": 0.5929, "step": 3661 }, { "epoch": 0.27152072365982055, "grad_norm": 0.40014684200286865, "learning_rate": 0.00016342085521380345, "loss": 0.5787, "step": 3662 }, { "epoch": 0.27159486913323944, "grad_norm": 0.38888707756996155, "learning_rate": 0.00016341085271317832, "loss": 0.5373, "step": 3663 }, { "epoch": 0.27166901460665827, "grad_norm": 0.3837782144546509, "learning_rate": 0.00016340085021255313, "loss": 0.5485, "step": 3664 }, { "epoch": 0.2717431600800771, "grad_norm": 0.4031786024570465, "learning_rate": 0.000163390847711928, "loss": 0.6063, "step": 3665 }, { "epoch": 0.271817305553496, "grad_norm": 0.36688292026519775, "learning_rate": 0.00016338084521130283, "loss": 0.5507, "step": 3666 }, { "epoch": 0.2718914510269148, "grad_norm": 0.37323805689811707, "learning_rate": 0.0001633708427106777, "loss": 0.5334, "step": 3667 }, { "epoch": 0.27196559650033364, "grad_norm": 0.36620956659317017, "learning_rate": 0.0001633608402100525, "loss": 0.5181, "step": 3668 }, { "epoch": 0.2720397419737525, "grad_norm": 0.343544065952301, "learning_rate": 0.00016335083770942737, "loss": 0.5224, "step": 3669 }, { "epoch": 0.27211388744717135, "grad_norm": 0.3575773537158966, "learning_rate": 0.0001633408352088022, "loss": 0.5242, "step": 3670 }, { "epoch": 0.2721880329205902, "grad_norm": 0.3823361396789551, "learning_rate": 0.00016333083270817707, "loss": 0.5068, "step": 3671 }, { "epoch": 0.27226217839400907, "grad_norm": 0.38721054792404175, "learning_rate": 0.00016332083020755188, "loss": 0.5885, "step": 3672 }, { "epoch": 0.2723363238674279, "grad_norm": 0.35802018642425537, "learning_rate": 0.00016331082770692674, "loss": 0.55, "step": 3673 }, { "epoch": 0.27241046934084673, "grad_norm": 0.35702452063560486, "learning_rate": 0.00016330082520630158, "loss": 0.5213, "step": 3674 }, { "epoch": 0.2724846148142656, "grad_norm": 0.3645957112312317, "learning_rate": 0.00016329082270567642, "loss": 0.5623, "step": 3675 }, { "epoch": 0.27255876028768444, "grad_norm": 0.354327529668808, "learning_rate": 0.00016328082020505128, "loss": 0.5181, "step": 3676 }, { "epoch": 0.2726329057611033, "grad_norm": 0.36768409609794617, "learning_rate": 0.00016327081770442612, "loss": 0.5393, "step": 3677 }, { "epoch": 0.27270705123452216, "grad_norm": 0.3987981379032135, "learning_rate": 0.00016326081520380095, "loss": 0.5729, "step": 3678 }, { "epoch": 0.272781196707941, "grad_norm": 0.40492016077041626, "learning_rate": 0.0001632508127031758, "loss": 0.5997, "step": 3679 }, { "epoch": 0.2728553421813598, "grad_norm": 0.3626430928707123, "learning_rate": 0.00016324081020255065, "loss": 0.5273, "step": 3680 }, { "epoch": 0.2729294876547787, "grad_norm": 0.33311378955841064, "learning_rate": 0.0001632308077019255, "loss": 0.4942, "step": 3681 }, { "epoch": 0.27300363312819753, "grad_norm": 0.37895238399505615, "learning_rate": 0.00016322080520130033, "loss": 0.5718, "step": 3682 }, { "epoch": 0.27307777860161636, "grad_norm": 0.3634902834892273, "learning_rate": 0.00016321080270067517, "loss": 0.56, "step": 3683 }, { "epoch": 0.27315192407503525, "grad_norm": 0.3585858941078186, "learning_rate": 0.00016320080020005003, "loss": 0.5016, "step": 3684 }, { "epoch": 0.2732260695484541, "grad_norm": 0.3766036629676819, "learning_rate": 0.00016319079769942487, "loss": 0.5619, "step": 3685 }, { "epoch": 0.2733002150218729, "grad_norm": 0.35959187150001526, "learning_rate": 0.00016318079519879973, "loss": 0.5284, "step": 3686 }, { "epoch": 0.27337436049529173, "grad_norm": 0.3763591945171356, "learning_rate": 0.00016317079269817454, "loss": 0.524, "step": 3687 }, { "epoch": 0.2734485059687106, "grad_norm": 0.37593474984169006, "learning_rate": 0.0001631607901975494, "loss": 0.5484, "step": 3688 }, { "epoch": 0.27352265144212945, "grad_norm": 0.36697864532470703, "learning_rate": 0.00016315078769692424, "loss": 0.5319, "step": 3689 }, { "epoch": 0.2735967969155483, "grad_norm": 0.40271925926208496, "learning_rate": 0.00016314078519629908, "loss": 0.568, "step": 3690 }, { "epoch": 0.27367094238896716, "grad_norm": 0.3666543960571289, "learning_rate": 0.00016313078269567392, "loss": 0.5151, "step": 3691 }, { "epoch": 0.273745087862386, "grad_norm": 0.36337706446647644, "learning_rate": 0.00016312078019504878, "loss": 0.5308, "step": 3692 }, { "epoch": 0.2738192333358048, "grad_norm": 0.3830936849117279, "learning_rate": 0.00016311077769442362, "loss": 0.5627, "step": 3693 }, { "epoch": 0.2738933788092237, "grad_norm": 0.35115110874176025, "learning_rate": 0.00016310077519379846, "loss": 0.4867, "step": 3694 }, { "epoch": 0.27396752428264254, "grad_norm": 0.35083574056625366, "learning_rate": 0.0001630907726931733, "loss": 0.5045, "step": 3695 }, { "epoch": 0.27404166975606137, "grad_norm": 0.3545580804347992, "learning_rate": 0.00016308077019254816, "loss": 0.5426, "step": 3696 }, { "epoch": 0.27411581522948025, "grad_norm": 0.35112887620925903, "learning_rate": 0.000163070767691923, "loss": 0.5096, "step": 3697 }, { "epoch": 0.2741899607028991, "grad_norm": 0.34932801127433777, "learning_rate": 0.00016306076519129783, "loss": 0.5079, "step": 3698 }, { "epoch": 0.2742641061763179, "grad_norm": 0.3869774639606476, "learning_rate": 0.00016305076269067267, "loss": 0.5817, "step": 3699 }, { "epoch": 0.2743382516497368, "grad_norm": 0.3561572730541229, "learning_rate": 0.00016304076019004753, "loss": 0.515, "step": 3700 }, { "epoch": 0.2744123971231556, "grad_norm": 0.36633047461509705, "learning_rate": 0.00016303075768942234, "loss": 0.5283, "step": 3701 }, { "epoch": 0.27448654259657446, "grad_norm": 0.35955390334129333, "learning_rate": 0.0001630207551887972, "loss": 0.5252, "step": 3702 }, { "epoch": 0.27456068806999334, "grad_norm": 0.34809908270835876, "learning_rate": 0.00016301075268817204, "loss": 0.5176, "step": 3703 }, { "epoch": 0.27463483354341217, "grad_norm": 0.36508285999298096, "learning_rate": 0.0001630007501875469, "loss": 0.5447, "step": 3704 }, { "epoch": 0.274708979016831, "grad_norm": 0.38115552067756653, "learning_rate": 0.00016299074768692172, "loss": 0.5465, "step": 3705 }, { "epoch": 0.2747831244902499, "grad_norm": 0.35319384932518005, "learning_rate": 0.00016298074518629658, "loss": 0.5281, "step": 3706 }, { "epoch": 0.2748572699636687, "grad_norm": 0.3574587404727936, "learning_rate": 0.00016297074268567142, "loss": 0.5297, "step": 3707 }, { "epoch": 0.27493141543708755, "grad_norm": 0.3530995845794678, "learning_rate": 0.00016296074018504628, "loss": 0.4924, "step": 3708 }, { "epoch": 0.27500556091050643, "grad_norm": 0.3799471855163574, "learning_rate": 0.0001629507376844211, "loss": 0.5893, "step": 3709 }, { "epoch": 0.27507970638392526, "grad_norm": 0.3889644742012024, "learning_rate": 0.00016294073518379596, "loss": 0.5402, "step": 3710 }, { "epoch": 0.2751538518573441, "grad_norm": 0.36488959193229675, "learning_rate": 0.0001629307326831708, "loss": 0.5347, "step": 3711 }, { "epoch": 0.275227997330763, "grad_norm": 0.3803863227367401, "learning_rate": 0.00016292073018254563, "loss": 0.5655, "step": 3712 }, { "epoch": 0.2753021428041818, "grad_norm": 0.36917293071746826, "learning_rate": 0.0001629107276819205, "loss": 0.5773, "step": 3713 }, { "epoch": 0.27537628827760063, "grad_norm": 0.3763654828071594, "learning_rate": 0.00016290072518129533, "loss": 0.5412, "step": 3714 }, { "epoch": 0.2754504337510195, "grad_norm": 0.35898256301879883, "learning_rate": 0.00016289072268067017, "loss": 0.5102, "step": 3715 }, { "epoch": 0.27552457922443835, "grad_norm": 0.37125715613365173, "learning_rate": 0.000162880720180045, "loss": 0.5217, "step": 3716 }, { "epoch": 0.2755987246978572, "grad_norm": 0.340906023979187, "learning_rate": 0.00016287071767941987, "loss": 0.4953, "step": 3717 }, { "epoch": 0.27567287017127606, "grad_norm": 0.35383015871047974, "learning_rate": 0.0001628607151787947, "loss": 0.4855, "step": 3718 }, { "epoch": 0.2757470156446949, "grad_norm": 0.36644601821899414, "learning_rate": 0.00016285071267816957, "loss": 0.5375, "step": 3719 }, { "epoch": 0.2758211611181137, "grad_norm": 0.35339146852493286, "learning_rate": 0.00016284071017754438, "loss": 0.5232, "step": 3720 }, { "epoch": 0.2758953065915326, "grad_norm": 0.36312350630760193, "learning_rate": 0.00016283070767691925, "loss": 0.5321, "step": 3721 }, { "epoch": 0.27596945206495144, "grad_norm": 0.34067052602767944, "learning_rate": 0.00016282070517629408, "loss": 0.4967, "step": 3722 }, { "epoch": 0.27604359753837027, "grad_norm": 0.34637197852134705, "learning_rate": 0.00016281070267566895, "loss": 0.4992, "step": 3723 }, { "epoch": 0.27611774301178915, "grad_norm": 0.3529849946498871, "learning_rate": 0.00016280070017504376, "loss": 0.5271, "step": 3724 }, { "epoch": 0.276191888485208, "grad_norm": 0.37737542390823364, "learning_rate": 0.00016279069767441862, "loss": 0.5481, "step": 3725 }, { "epoch": 0.2762660339586268, "grad_norm": 0.36187052726745605, "learning_rate": 0.00016278069517379346, "loss": 0.4892, "step": 3726 }, { "epoch": 0.2763401794320457, "grad_norm": 0.3561766445636749, "learning_rate": 0.0001627706926731683, "loss": 0.5285, "step": 3727 }, { "epoch": 0.2764143249054645, "grad_norm": 0.3636195659637451, "learning_rate": 0.00016276069017254313, "loss": 0.5003, "step": 3728 }, { "epoch": 0.27648847037888336, "grad_norm": 0.3699357211589813, "learning_rate": 0.000162750687671918, "loss": 0.5248, "step": 3729 }, { "epoch": 0.27656261585230224, "grad_norm": 0.4136372208595276, "learning_rate": 0.00016274068517129283, "loss": 0.5475, "step": 3730 }, { "epoch": 0.27663676132572107, "grad_norm": 0.3684573769569397, "learning_rate": 0.00016273068267066767, "loss": 0.5834, "step": 3731 }, { "epoch": 0.2767109067991399, "grad_norm": 0.35961437225341797, "learning_rate": 0.0001627206801700425, "loss": 0.5085, "step": 3732 }, { "epoch": 0.2767850522725588, "grad_norm": 0.3617570102214813, "learning_rate": 0.00016271067766941737, "loss": 0.5159, "step": 3733 }, { "epoch": 0.2768591977459776, "grad_norm": 0.36958447098731995, "learning_rate": 0.0001627006751687922, "loss": 0.517, "step": 3734 }, { "epoch": 0.27693334321939644, "grad_norm": 0.3784533441066742, "learning_rate": 0.00016269067266816705, "loss": 0.5618, "step": 3735 }, { "epoch": 0.27700748869281533, "grad_norm": 0.37317192554473877, "learning_rate": 0.00016268067016754188, "loss": 0.5603, "step": 3736 }, { "epoch": 0.27708163416623416, "grad_norm": 0.3692326247692108, "learning_rate": 0.00016267066766691675, "loss": 0.5632, "step": 3737 }, { "epoch": 0.277155779639653, "grad_norm": 0.3360002040863037, "learning_rate": 0.00016266066516629156, "loss": 0.4786, "step": 3738 }, { "epoch": 0.2772299251130719, "grad_norm": 0.3529733419418335, "learning_rate": 0.00016265066266566642, "loss": 0.5142, "step": 3739 }, { "epoch": 0.2773040705864907, "grad_norm": 0.34998220205307007, "learning_rate": 0.00016264066016504126, "loss": 0.5325, "step": 3740 }, { "epoch": 0.27737821605990953, "grad_norm": 0.3902311623096466, "learning_rate": 0.00016263065766441612, "loss": 0.5339, "step": 3741 }, { "epoch": 0.2774523615333284, "grad_norm": 0.3579995632171631, "learning_rate": 0.00016262065516379093, "loss": 0.4931, "step": 3742 }, { "epoch": 0.27752650700674725, "grad_norm": 0.366786926984787, "learning_rate": 0.0001626106526631658, "loss": 0.5627, "step": 3743 }, { "epoch": 0.2776006524801661, "grad_norm": 0.3508332669734955, "learning_rate": 0.00016260065016254064, "loss": 0.5083, "step": 3744 }, { "epoch": 0.2776747979535849, "grad_norm": 0.4140220284461975, "learning_rate": 0.0001625906476619155, "loss": 0.5693, "step": 3745 }, { "epoch": 0.2777489434270038, "grad_norm": 0.35552218556404114, "learning_rate": 0.00016258064516129034, "loss": 0.5422, "step": 3746 }, { "epoch": 0.2778230889004226, "grad_norm": 0.36338290572166443, "learning_rate": 0.00016257064266066517, "loss": 0.5593, "step": 3747 }, { "epoch": 0.27789723437384145, "grad_norm": 0.37539103627204895, "learning_rate": 0.00016256064016004, "loss": 0.5502, "step": 3748 }, { "epoch": 0.27797137984726034, "grad_norm": 0.34830382466316223, "learning_rate": 0.00016255063765941485, "loss": 0.5503, "step": 3749 }, { "epoch": 0.27804552532067917, "grad_norm": 0.3554478585720062, "learning_rate": 0.0001625406351587897, "loss": 0.5051, "step": 3750 }, { "epoch": 0.278119670794098, "grad_norm": 0.32381314039230347, "learning_rate": 0.00016253063265816455, "loss": 0.483, "step": 3751 }, { "epoch": 0.2781938162675169, "grad_norm": 0.3837431073188782, "learning_rate": 0.0001625206301575394, "loss": 0.5328, "step": 3752 }, { "epoch": 0.2782679617409357, "grad_norm": 0.3529229164123535, "learning_rate": 0.00016251062765691422, "loss": 0.5625, "step": 3753 }, { "epoch": 0.27834210721435454, "grad_norm": 0.3596112132072449, "learning_rate": 0.0001625006251562891, "loss": 0.5469, "step": 3754 }, { "epoch": 0.2784162526877734, "grad_norm": 0.3621310889720917, "learning_rate": 0.00016249062265566392, "loss": 0.5331, "step": 3755 }, { "epoch": 0.27849039816119225, "grad_norm": 0.36877110600471497, "learning_rate": 0.0001624806201550388, "loss": 0.5154, "step": 3756 }, { "epoch": 0.2785645436346111, "grad_norm": 0.38884255290031433, "learning_rate": 0.0001624706176544136, "loss": 0.5707, "step": 3757 }, { "epoch": 0.27863868910802997, "grad_norm": 0.369281142950058, "learning_rate": 0.00016246061515378846, "loss": 0.493, "step": 3758 }, { "epoch": 0.2787128345814488, "grad_norm": 0.36343303322792053, "learning_rate": 0.0001624506126531633, "loss": 0.5428, "step": 3759 }, { "epoch": 0.27878698005486763, "grad_norm": 0.36114639043807983, "learning_rate": 0.00016244061015253816, "loss": 0.5249, "step": 3760 }, { "epoch": 0.2788611255282865, "grad_norm": 0.37436342239379883, "learning_rate": 0.00016243060765191297, "loss": 0.5525, "step": 3761 }, { "epoch": 0.27893527100170534, "grad_norm": 0.3671393394470215, "learning_rate": 0.00016242060515128784, "loss": 0.5484, "step": 3762 }, { "epoch": 0.2790094164751242, "grad_norm": 0.35233697295188904, "learning_rate": 0.00016241060265066268, "loss": 0.501, "step": 3763 }, { "epoch": 0.27908356194854306, "grad_norm": 0.3689388930797577, "learning_rate": 0.0001624006001500375, "loss": 0.5426, "step": 3764 }, { "epoch": 0.2791577074219619, "grad_norm": 0.36655405163764954, "learning_rate": 0.00016239059764941235, "loss": 0.5565, "step": 3765 }, { "epoch": 0.2792318528953807, "grad_norm": 0.36935263872146606, "learning_rate": 0.00016238059514878721, "loss": 0.5179, "step": 3766 }, { "epoch": 0.2793059983687996, "grad_norm": 0.36629682779312134, "learning_rate": 0.00016237059264816205, "loss": 0.5395, "step": 3767 }, { "epoch": 0.27938014384221843, "grad_norm": 0.364418089389801, "learning_rate": 0.0001623605901475369, "loss": 0.5266, "step": 3768 }, { "epoch": 0.27945428931563726, "grad_norm": 0.36823976039886475, "learning_rate": 0.00016235058764691172, "loss": 0.5407, "step": 3769 }, { "epoch": 0.27952843478905615, "grad_norm": 0.35468220710754395, "learning_rate": 0.0001623405851462866, "loss": 0.5294, "step": 3770 }, { "epoch": 0.279602580262475, "grad_norm": 0.3900938630104065, "learning_rate": 0.00016233058264566143, "loss": 0.5313, "step": 3771 }, { "epoch": 0.2796767257358938, "grad_norm": 0.3560287058353424, "learning_rate": 0.00016232058014503626, "loss": 0.504, "step": 3772 }, { "epoch": 0.2797508712093127, "grad_norm": 0.3605218231678009, "learning_rate": 0.0001623105776444111, "loss": 0.5753, "step": 3773 }, { "epoch": 0.2798250166827315, "grad_norm": 0.3479291796684265, "learning_rate": 0.00016230057514378596, "loss": 0.5042, "step": 3774 }, { "epoch": 0.27989916215615035, "grad_norm": 0.3805810511112213, "learning_rate": 0.00016229057264316077, "loss": 0.5553, "step": 3775 }, { "epoch": 0.27997330762956923, "grad_norm": 0.37446367740631104, "learning_rate": 0.00016228057014253564, "loss": 0.5699, "step": 3776 }, { "epoch": 0.28004745310298806, "grad_norm": 0.40709221363067627, "learning_rate": 0.00016227056764191048, "loss": 0.5637, "step": 3777 }, { "epoch": 0.2801215985764069, "grad_norm": 0.33113181591033936, "learning_rate": 0.00016226056514128534, "loss": 0.5207, "step": 3778 }, { "epoch": 0.2801957440498258, "grad_norm": 0.36120566725730896, "learning_rate": 0.00016225056264066018, "loss": 0.5137, "step": 3779 }, { "epoch": 0.2802698895232446, "grad_norm": 0.33180785179138184, "learning_rate": 0.00016224056014003501, "loss": 0.4933, "step": 3780 }, { "epoch": 0.28034403499666344, "grad_norm": 0.36342552304267883, "learning_rate": 0.00016223055763940985, "loss": 0.5458, "step": 3781 }, { "epoch": 0.2804181804700823, "grad_norm": 0.37192878127098083, "learning_rate": 0.00016222055513878472, "loss": 0.5439, "step": 3782 }, { "epoch": 0.28049232594350115, "grad_norm": 0.37548303604125977, "learning_rate": 0.00016221055263815955, "loss": 0.5145, "step": 3783 }, { "epoch": 0.28056647141692, "grad_norm": 0.35773664712905884, "learning_rate": 0.0001622005501375344, "loss": 0.5166, "step": 3784 }, { "epoch": 0.28064061689033887, "grad_norm": 0.3865993320941925, "learning_rate": 0.00016219054763690923, "loss": 0.5384, "step": 3785 }, { "epoch": 0.2807147623637577, "grad_norm": 0.3691571056842804, "learning_rate": 0.00016218054513628406, "loss": 0.5356, "step": 3786 }, { "epoch": 0.2807889078371765, "grad_norm": 0.36779865622520447, "learning_rate": 0.00016217054263565893, "loss": 0.5426, "step": 3787 }, { "epoch": 0.2808630533105954, "grad_norm": 0.3626827895641327, "learning_rate": 0.00016216054013503377, "loss": 0.5417, "step": 3788 }, { "epoch": 0.28093719878401424, "grad_norm": 0.3551347553730011, "learning_rate": 0.00016215053763440863, "loss": 0.513, "step": 3789 }, { "epoch": 0.28101134425743307, "grad_norm": 0.36370259523391724, "learning_rate": 0.00016214053513378344, "loss": 0.5118, "step": 3790 }, { "epoch": 0.28108548973085196, "grad_norm": 0.3670547604560852, "learning_rate": 0.0001621305326331583, "loss": 0.4988, "step": 3791 }, { "epoch": 0.2811596352042708, "grad_norm": 0.3509088456630707, "learning_rate": 0.00016212053013253314, "loss": 0.5463, "step": 3792 }, { "epoch": 0.2812337806776896, "grad_norm": 0.341035395860672, "learning_rate": 0.000162110527631908, "loss": 0.5105, "step": 3793 }, { "epoch": 0.2813079261511085, "grad_norm": 0.3344830274581909, "learning_rate": 0.00016210052513128281, "loss": 0.479, "step": 3794 }, { "epoch": 0.28138207162452733, "grad_norm": 0.3357739746570587, "learning_rate": 0.00016209052263065768, "loss": 0.5125, "step": 3795 }, { "epoch": 0.28145621709794616, "grad_norm": 0.37884387373924255, "learning_rate": 0.00016208052013003252, "loss": 0.5627, "step": 3796 }, { "epoch": 0.28153036257136504, "grad_norm": 0.3599092960357666, "learning_rate": 0.00016207051762940738, "loss": 0.5383, "step": 3797 }, { "epoch": 0.2816045080447839, "grad_norm": 0.3685641884803772, "learning_rate": 0.0001620605151287822, "loss": 0.5517, "step": 3798 }, { "epoch": 0.2816786535182027, "grad_norm": 0.3521081507205963, "learning_rate": 0.00016205051262815705, "loss": 0.5223, "step": 3799 }, { "epoch": 0.2817527989916216, "grad_norm": 0.3627978265285492, "learning_rate": 0.0001620405101275319, "loss": 0.508, "step": 3800 }, { "epoch": 0.2818269444650404, "grad_norm": 0.3817351758480072, "learning_rate": 0.00016203050762690673, "loss": 0.5266, "step": 3801 }, { "epoch": 0.28190108993845925, "grad_norm": 0.361419141292572, "learning_rate": 0.00016202050512628157, "loss": 0.5569, "step": 3802 }, { "epoch": 0.2819752354118781, "grad_norm": 0.3768002390861511, "learning_rate": 0.00016201050262565643, "loss": 0.5218, "step": 3803 }, { "epoch": 0.28204938088529696, "grad_norm": 0.34842878580093384, "learning_rate": 0.00016200050012503127, "loss": 0.5324, "step": 3804 }, { "epoch": 0.2821235263587158, "grad_norm": 0.33227530121803284, "learning_rate": 0.0001619904976244061, "loss": 0.4733, "step": 3805 }, { "epoch": 0.2821976718321346, "grad_norm": 0.3505420982837677, "learning_rate": 0.00016198049512378094, "loss": 0.5083, "step": 3806 }, { "epoch": 0.2822718173055535, "grad_norm": 0.35989347100257874, "learning_rate": 0.0001619704926231558, "loss": 0.5338, "step": 3807 }, { "epoch": 0.28234596277897234, "grad_norm": 0.3584437072277069, "learning_rate": 0.00016196049012253064, "loss": 0.5478, "step": 3808 }, { "epoch": 0.28242010825239117, "grad_norm": 0.3733910024166107, "learning_rate": 0.00016195048762190548, "loss": 0.5667, "step": 3809 }, { "epoch": 0.28249425372581005, "grad_norm": 0.36041703820228577, "learning_rate": 0.00016194048512128032, "loss": 0.558, "step": 3810 }, { "epoch": 0.2825683991992289, "grad_norm": 0.3554653823375702, "learning_rate": 0.00016193048262065518, "loss": 0.4796, "step": 3811 }, { "epoch": 0.2826425446726477, "grad_norm": 0.3675292432308197, "learning_rate": 0.00016192048012003002, "loss": 0.5461, "step": 3812 }, { "epoch": 0.2827166901460666, "grad_norm": 0.35334718227386475, "learning_rate": 0.00016191047761940486, "loss": 0.5112, "step": 3813 }, { "epoch": 0.2827908356194854, "grad_norm": 0.3720510005950928, "learning_rate": 0.0001619004751187797, "loss": 0.5711, "step": 3814 }, { "epoch": 0.28286498109290426, "grad_norm": 0.36645999550819397, "learning_rate": 0.00016189047261815456, "loss": 0.5, "step": 3815 }, { "epoch": 0.28293912656632314, "grad_norm": 0.38374343514442444, "learning_rate": 0.0001618804701175294, "loss": 0.5507, "step": 3816 }, { "epoch": 0.28301327203974197, "grad_norm": 0.38661083579063416, "learning_rate": 0.00016187046761690423, "loss": 0.5878, "step": 3817 }, { "epoch": 0.2830874175131608, "grad_norm": 0.3744741678237915, "learning_rate": 0.00016186046511627907, "loss": 0.5359, "step": 3818 }, { "epoch": 0.2831615629865797, "grad_norm": 0.3729152977466583, "learning_rate": 0.00016185046261565393, "loss": 0.5483, "step": 3819 }, { "epoch": 0.2832357084599985, "grad_norm": 0.3505602180957794, "learning_rate": 0.00016184046011502877, "loss": 0.5159, "step": 3820 }, { "epoch": 0.28330985393341734, "grad_norm": 0.34990739822387695, "learning_rate": 0.0001618304576144036, "loss": 0.5631, "step": 3821 }, { "epoch": 0.28338399940683623, "grad_norm": 0.3747354745864868, "learning_rate": 0.00016182045511377847, "loss": 0.5255, "step": 3822 }, { "epoch": 0.28345814488025506, "grad_norm": 0.40564167499542236, "learning_rate": 0.0001618104526131533, "loss": 0.5637, "step": 3823 }, { "epoch": 0.2835322903536739, "grad_norm": 0.36579710245132446, "learning_rate": 0.00016180045011252814, "loss": 0.5339, "step": 3824 }, { "epoch": 0.2836064358270928, "grad_norm": 0.3634471893310547, "learning_rate": 0.00016179044761190298, "loss": 0.4875, "step": 3825 }, { "epoch": 0.2836805813005116, "grad_norm": 0.3312987685203552, "learning_rate": 0.00016178044511127785, "loss": 0.4885, "step": 3826 }, { "epoch": 0.28375472677393043, "grad_norm": 0.36401841044425964, "learning_rate": 0.00016177044261065266, "loss": 0.5018, "step": 3827 }, { "epoch": 0.2838288722473493, "grad_norm": 0.3558604419231415, "learning_rate": 0.00016176044011002752, "loss": 0.5427, "step": 3828 }, { "epoch": 0.28390301772076815, "grad_norm": 0.3713003098964691, "learning_rate": 0.00016175043760940236, "loss": 0.5258, "step": 3829 }, { "epoch": 0.283977163194187, "grad_norm": 0.3907265067100525, "learning_rate": 0.00016174043510877722, "loss": 0.5709, "step": 3830 }, { "epoch": 0.28405130866760586, "grad_norm": 0.3526594340801239, "learning_rate": 0.00016173043260815203, "loss": 0.4923, "step": 3831 }, { "epoch": 0.2841254541410247, "grad_norm": 0.3732338547706604, "learning_rate": 0.0001617204301075269, "loss": 0.5339, "step": 3832 }, { "epoch": 0.2841995996144435, "grad_norm": 0.3975437879562378, "learning_rate": 0.00016171042760690173, "loss": 0.5147, "step": 3833 }, { "epoch": 0.2842737450878624, "grad_norm": 0.36805614829063416, "learning_rate": 0.0001617004251062766, "loss": 0.5323, "step": 3834 }, { "epoch": 0.28434789056128124, "grad_norm": 0.3712303936481476, "learning_rate": 0.0001616904226056514, "loss": 0.5694, "step": 3835 }, { "epoch": 0.28442203603470007, "grad_norm": 0.34734538197517395, "learning_rate": 0.00016168042010502627, "loss": 0.5644, "step": 3836 }, { "epoch": 0.28449618150811895, "grad_norm": 0.35078415274620056, "learning_rate": 0.0001616704176044011, "loss": 0.5348, "step": 3837 }, { "epoch": 0.2845703269815378, "grad_norm": 0.38095638155937195, "learning_rate": 0.00016166041510377594, "loss": 0.5572, "step": 3838 }, { "epoch": 0.2846444724549566, "grad_norm": 0.3399215340614319, "learning_rate": 0.00016165041260315078, "loss": 0.4901, "step": 3839 }, { "epoch": 0.2847186179283755, "grad_norm": 0.3614933490753174, "learning_rate": 0.00016164041010252565, "loss": 0.5193, "step": 3840 }, { "epoch": 0.2847927634017943, "grad_norm": 0.35867002606391907, "learning_rate": 0.00016163040760190048, "loss": 0.5287, "step": 3841 }, { "epoch": 0.28486690887521315, "grad_norm": 0.35367366671562195, "learning_rate": 0.00016162040510127532, "loss": 0.5151, "step": 3842 }, { "epoch": 0.28494105434863204, "grad_norm": 0.37613922357559204, "learning_rate": 0.00016161040260065016, "loss": 0.5724, "step": 3843 }, { "epoch": 0.28501519982205087, "grad_norm": 0.3691346049308777, "learning_rate": 0.00016160040010002502, "loss": 0.4985, "step": 3844 }, { "epoch": 0.2850893452954697, "grad_norm": 0.3723798990249634, "learning_rate": 0.00016159039759939986, "loss": 0.527, "step": 3845 }, { "epoch": 0.2851634907688886, "grad_norm": 0.41057196259498596, "learning_rate": 0.0001615803950987747, "loss": 0.6018, "step": 3846 }, { "epoch": 0.2852376362423074, "grad_norm": 0.36534765362739563, "learning_rate": 0.00016157039259814953, "loss": 0.5855, "step": 3847 }, { "epoch": 0.28531178171572624, "grad_norm": 0.3863787353038788, "learning_rate": 0.0001615603900975244, "loss": 0.5586, "step": 3848 }, { "epoch": 0.2853859271891451, "grad_norm": 0.34450381994247437, "learning_rate": 0.00016155038759689923, "loss": 0.5215, "step": 3849 }, { "epoch": 0.28546007266256396, "grad_norm": 0.37074312567710876, "learning_rate": 0.00016154038509627407, "loss": 0.5243, "step": 3850 }, { "epoch": 0.2855342181359828, "grad_norm": 0.3465893268585205, "learning_rate": 0.0001615303825956489, "loss": 0.509, "step": 3851 }, { "epoch": 0.28560836360940167, "grad_norm": 0.33603018522262573, "learning_rate": 0.00016152038009502377, "loss": 0.5031, "step": 3852 }, { "epoch": 0.2856825090828205, "grad_norm": 0.3682221472263336, "learning_rate": 0.0001615103775943986, "loss": 0.5351, "step": 3853 }, { "epoch": 0.28575665455623933, "grad_norm": 0.3754462003707886, "learning_rate": 0.00016150037509377345, "loss": 0.5838, "step": 3854 }, { "epoch": 0.2858308000296582, "grad_norm": 0.3667030930519104, "learning_rate": 0.0001614903725931483, "loss": 0.5667, "step": 3855 }, { "epoch": 0.28590494550307705, "grad_norm": 0.3931807577610016, "learning_rate": 0.00016148037009252315, "loss": 0.616, "step": 3856 }, { "epoch": 0.2859790909764959, "grad_norm": 0.3647717535495758, "learning_rate": 0.00016147036759189799, "loss": 0.5624, "step": 3857 }, { "epoch": 0.2860532364499147, "grad_norm": 0.35481855273246765, "learning_rate": 0.00016146036509127282, "loss": 0.5232, "step": 3858 }, { "epoch": 0.2861273819233336, "grad_norm": 0.3493170738220215, "learning_rate": 0.00016145036259064769, "loss": 0.5086, "step": 3859 }, { "epoch": 0.2862015273967524, "grad_norm": 0.3506039083003998, "learning_rate": 0.00016144036009002252, "loss": 0.5302, "step": 3860 }, { "epoch": 0.28627567287017125, "grad_norm": 0.3237190544605255, "learning_rate": 0.00016143035758939736, "loss": 0.4863, "step": 3861 }, { "epoch": 0.28634981834359013, "grad_norm": 0.39840441942214966, "learning_rate": 0.0001614203550887722, "loss": 0.5525, "step": 3862 }, { "epoch": 0.28642396381700896, "grad_norm": 0.40424591302871704, "learning_rate": 0.00016141035258814706, "loss": 0.5503, "step": 3863 }, { "epoch": 0.2864981092904278, "grad_norm": 0.32963827252388, "learning_rate": 0.00016140035008752187, "loss": 0.4835, "step": 3864 }, { "epoch": 0.2865722547638467, "grad_norm": 0.3769855499267578, "learning_rate": 0.00016139034758689674, "loss": 0.5379, "step": 3865 }, { "epoch": 0.2866464002372655, "grad_norm": 0.3819376230239868, "learning_rate": 0.00016138034508627157, "loss": 0.5814, "step": 3866 }, { "epoch": 0.28672054571068434, "grad_norm": 0.3761710226535797, "learning_rate": 0.00016137034258564644, "loss": 0.5809, "step": 3867 }, { "epoch": 0.2867946911841032, "grad_norm": 0.34651103615760803, "learning_rate": 0.00016136034008502125, "loss": 0.5296, "step": 3868 }, { "epoch": 0.28686883665752205, "grad_norm": 0.3688606917858124, "learning_rate": 0.0001613503375843961, "loss": 0.5433, "step": 3869 }, { "epoch": 0.2869429821309409, "grad_norm": 0.3528714179992676, "learning_rate": 0.00016134033508377095, "loss": 0.5135, "step": 3870 }, { "epoch": 0.28701712760435977, "grad_norm": 0.3741759657859802, "learning_rate": 0.0001613303325831458, "loss": 0.5302, "step": 3871 }, { "epoch": 0.2870912730777786, "grad_norm": 0.3778518736362457, "learning_rate": 0.00016132033008252062, "loss": 0.5512, "step": 3872 }, { "epoch": 0.2871654185511974, "grad_norm": 0.3893776535987854, "learning_rate": 0.0001613103275818955, "loss": 0.5227, "step": 3873 }, { "epoch": 0.2872395640246163, "grad_norm": 0.3471131920814514, "learning_rate": 0.00016130032508127032, "loss": 0.5499, "step": 3874 }, { "epoch": 0.28731370949803514, "grad_norm": 0.3509293496608734, "learning_rate": 0.00016129032258064516, "loss": 0.4949, "step": 3875 }, { "epoch": 0.28738785497145397, "grad_norm": 0.3709462881088257, "learning_rate": 0.00016128032008002, "loss": 0.5414, "step": 3876 }, { "epoch": 0.28746200044487286, "grad_norm": 0.38067498803138733, "learning_rate": 0.00016127031757939486, "loss": 0.5287, "step": 3877 }, { "epoch": 0.2875361459182917, "grad_norm": 0.3767003118991852, "learning_rate": 0.0001612603150787697, "loss": 0.5158, "step": 3878 }, { "epoch": 0.2876102913917105, "grad_norm": 0.38141143321990967, "learning_rate": 0.00016125031257814454, "loss": 0.5537, "step": 3879 }, { "epoch": 0.2876844368651294, "grad_norm": 0.34878161549568176, "learning_rate": 0.00016124031007751937, "loss": 0.5286, "step": 3880 }, { "epoch": 0.28775858233854823, "grad_norm": 0.35408616065979004, "learning_rate": 0.00016123030757689424, "loss": 0.5303, "step": 3881 }, { "epoch": 0.28783272781196706, "grad_norm": 0.37463897466659546, "learning_rate": 0.00016122030507626907, "loss": 0.5552, "step": 3882 }, { "epoch": 0.28790687328538594, "grad_norm": 0.35056474804878235, "learning_rate": 0.0001612103025756439, "loss": 0.5091, "step": 3883 }, { "epoch": 0.2879810187588048, "grad_norm": 0.36270108819007874, "learning_rate": 0.00016120030007501875, "loss": 0.5292, "step": 3884 }, { "epoch": 0.2880551642322236, "grad_norm": 0.3548993468284607, "learning_rate": 0.0001611902975743936, "loss": 0.5418, "step": 3885 }, { "epoch": 0.2881293097056425, "grad_norm": 0.3547601103782654, "learning_rate": 0.00016118029507376845, "loss": 0.4888, "step": 3886 }, { "epoch": 0.2882034551790613, "grad_norm": 0.3685084879398346, "learning_rate": 0.0001611702925731433, "loss": 0.4992, "step": 3887 }, { "epoch": 0.28827760065248015, "grad_norm": 0.35465574264526367, "learning_rate": 0.00016116029007251815, "loss": 0.5253, "step": 3888 }, { "epoch": 0.28835174612589903, "grad_norm": 0.3712548017501831, "learning_rate": 0.000161150287571893, "loss": 0.542, "step": 3889 }, { "epoch": 0.28842589159931786, "grad_norm": 0.4007807672023773, "learning_rate": 0.00016114028507126783, "loss": 0.5917, "step": 3890 }, { "epoch": 0.2885000370727367, "grad_norm": 0.35779547691345215, "learning_rate": 0.00016113028257064266, "loss": 0.5235, "step": 3891 }, { "epoch": 0.2885741825461556, "grad_norm": 0.3538782298564911, "learning_rate": 0.00016112028007001753, "loss": 0.4849, "step": 3892 }, { "epoch": 0.2886483280195744, "grad_norm": 0.37442082166671753, "learning_rate": 0.00016111027756939236, "loss": 0.5056, "step": 3893 }, { "epoch": 0.28872247349299324, "grad_norm": 0.3661153316497803, "learning_rate": 0.0001611002750687672, "loss": 0.5353, "step": 3894 }, { "epoch": 0.2887966189664121, "grad_norm": 0.3582547903060913, "learning_rate": 0.00016109027256814204, "loss": 0.5125, "step": 3895 }, { "epoch": 0.28887076443983095, "grad_norm": 0.36761948466300964, "learning_rate": 0.0001610802700675169, "loss": 0.5067, "step": 3896 }, { "epoch": 0.2889449099132498, "grad_norm": 0.3483116626739502, "learning_rate": 0.00016107026756689174, "loss": 0.4899, "step": 3897 }, { "epoch": 0.28901905538666867, "grad_norm": 0.3932817578315735, "learning_rate": 0.00016106026506626658, "loss": 0.5102, "step": 3898 }, { "epoch": 0.2890932008600875, "grad_norm": 0.39007988572120667, "learning_rate": 0.00016105026256564141, "loss": 0.5628, "step": 3899 }, { "epoch": 0.2891673463335063, "grad_norm": 0.3651290237903595, "learning_rate": 0.00016104026006501628, "loss": 0.5153, "step": 3900 }, { "epoch": 0.2892414918069252, "grad_norm": 0.3385315239429474, "learning_rate": 0.0001610302575643911, "loss": 0.4926, "step": 3901 }, { "epoch": 0.28931563728034404, "grad_norm": 0.3626154959201813, "learning_rate": 0.00016102025506376595, "loss": 0.525, "step": 3902 }, { "epoch": 0.28938978275376287, "grad_norm": 0.3743407428264618, "learning_rate": 0.0001610102525631408, "loss": 0.5231, "step": 3903 }, { "epoch": 0.28946392822718175, "grad_norm": 0.3404303193092346, "learning_rate": 0.00016100025006251565, "loss": 0.5054, "step": 3904 }, { "epoch": 0.2895380737006006, "grad_norm": 0.38325825333595276, "learning_rate": 0.00016099024756189046, "loss": 0.5676, "step": 3905 }, { "epoch": 0.2896122191740194, "grad_norm": 0.35478973388671875, "learning_rate": 0.00016098024506126533, "loss": 0.547, "step": 3906 }, { "epoch": 0.2896863646474383, "grad_norm": 0.33849743008613586, "learning_rate": 0.00016097024256064016, "loss": 0.5067, "step": 3907 }, { "epoch": 0.28976051012085713, "grad_norm": 0.3498205542564392, "learning_rate": 0.00016096024006001503, "loss": 0.5526, "step": 3908 }, { "epoch": 0.28983465559427596, "grad_norm": 0.34798410534858704, "learning_rate": 0.00016095023755938984, "loss": 0.5452, "step": 3909 }, { "epoch": 0.28990880106769484, "grad_norm": 0.3800436556339264, "learning_rate": 0.0001609402350587647, "loss": 0.5186, "step": 3910 }, { "epoch": 0.2899829465411137, "grad_norm": 0.37732994556427, "learning_rate": 0.00016093023255813954, "loss": 0.5766, "step": 3911 }, { "epoch": 0.2900570920145325, "grad_norm": 0.362515926361084, "learning_rate": 0.00016092023005751438, "loss": 0.5233, "step": 3912 }, { "epoch": 0.2901312374879514, "grad_norm": 0.3449791669845581, "learning_rate": 0.00016091022755688921, "loss": 0.5064, "step": 3913 }, { "epoch": 0.2902053829613702, "grad_norm": 0.3485266864299774, "learning_rate": 0.00016090022505626408, "loss": 0.5324, "step": 3914 }, { "epoch": 0.29027952843478905, "grad_norm": 0.3609869182109833, "learning_rate": 0.00016089022255563892, "loss": 0.5657, "step": 3915 }, { "epoch": 0.2903536739082079, "grad_norm": 0.34787383675575256, "learning_rate": 0.00016088022005501375, "loss": 0.4719, "step": 3916 }, { "epoch": 0.29042781938162676, "grad_norm": 0.3868739604949951, "learning_rate": 0.0001608702175543886, "loss": 0.5653, "step": 3917 }, { "epoch": 0.2905019648550456, "grad_norm": 0.36880630254745483, "learning_rate": 0.00016086021505376345, "loss": 0.5326, "step": 3918 }, { "epoch": 0.2905761103284644, "grad_norm": 0.3681747615337372, "learning_rate": 0.0001608502125531383, "loss": 0.5411, "step": 3919 }, { "epoch": 0.2906502558018833, "grad_norm": 0.40139040350914, "learning_rate": 0.00016084021005251313, "loss": 0.5379, "step": 3920 }, { "epoch": 0.29072440127530214, "grad_norm": 0.4201037883758545, "learning_rate": 0.000160830207551888, "loss": 0.5448, "step": 3921 }, { "epoch": 0.29079854674872097, "grad_norm": 0.348874568939209, "learning_rate": 0.00016082020505126283, "loss": 0.5049, "step": 3922 }, { "epoch": 0.29087269222213985, "grad_norm": 0.3374391198158264, "learning_rate": 0.00016081020255063767, "loss": 0.4906, "step": 3923 }, { "epoch": 0.2909468376955587, "grad_norm": 0.3644963800907135, "learning_rate": 0.0001608002000500125, "loss": 0.5278, "step": 3924 }, { "epoch": 0.2910209831689775, "grad_norm": 0.37291520833969116, "learning_rate": 0.00016079019754938737, "loss": 0.5072, "step": 3925 }, { "epoch": 0.2910951286423964, "grad_norm": 0.3879813253879547, "learning_rate": 0.0001607801950487622, "loss": 0.5519, "step": 3926 }, { "epoch": 0.2911692741158152, "grad_norm": 0.38568586111068726, "learning_rate": 0.00016077019254813704, "loss": 0.5442, "step": 3927 }, { "epoch": 0.29124341958923405, "grad_norm": 0.38708823919296265, "learning_rate": 0.00016076019004751188, "loss": 0.5503, "step": 3928 }, { "epoch": 0.29131756506265294, "grad_norm": 0.37522974610328674, "learning_rate": 0.00016075018754688674, "loss": 0.5527, "step": 3929 }, { "epoch": 0.29139171053607177, "grad_norm": 0.36341366171836853, "learning_rate": 0.00016074018504626158, "loss": 0.5073, "step": 3930 }, { "epoch": 0.2914658560094906, "grad_norm": 0.3634618818759918, "learning_rate": 0.00016073018254563642, "loss": 0.5134, "step": 3931 }, { "epoch": 0.2915400014829095, "grad_norm": 0.3562803268432617, "learning_rate": 0.00016072018004501125, "loss": 0.5086, "step": 3932 }, { "epoch": 0.2916141469563283, "grad_norm": 0.38353943824768066, "learning_rate": 0.00016071017754438612, "loss": 0.5496, "step": 3933 }, { "epoch": 0.29168829242974714, "grad_norm": 0.37369635701179504, "learning_rate": 0.00016070017504376096, "loss": 0.5235, "step": 3934 }, { "epoch": 0.291762437903166, "grad_norm": 0.3572530746459961, "learning_rate": 0.0001606901725431358, "loss": 0.4955, "step": 3935 }, { "epoch": 0.29183658337658486, "grad_norm": 0.348367840051651, "learning_rate": 0.00016068017004251063, "loss": 0.4731, "step": 3936 }, { "epoch": 0.2919107288500037, "grad_norm": 0.37875011563301086, "learning_rate": 0.0001606701675418855, "loss": 0.5357, "step": 3937 }, { "epoch": 0.29198487432342257, "grad_norm": 0.3814442753791809, "learning_rate": 0.0001606601650412603, "loss": 0.5817, "step": 3938 }, { "epoch": 0.2920590197968414, "grad_norm": 0.3591715693473816, "learning_rate": 0.00016065016254063517, "loss": 0.525, "step": 3939 }, { "epoch": 0.29213316527026023, "grad_norm": 0.3514496088027954, "learning_rate": 0.00016064016004001, "loss": 0.5333, "step": 3940 }, { "epoch": 0.2922073107436791, "grad_norm": 0.37300369143486023, "learning_rate": 0.00016063015753938487, "loss": 0.5342, "step": 3941 }, { "epoch": 0.29228145621709795, "grad_norm": 0.38956573605537415, "learning_rate": 0.00016062015503875968, "loss": 0.5044, "step": 3942 }, { "epoch": 0.2923556016905168, "grad_norm": 0.357345849275589, "learning_rate": 0.00016061015253813454, "loss": 0.5502, "step": 3943 }, { "epoch": 0.29242974716393566, "grad_norm": 0.33527129888534546, "learning_rate": 0.00016060015003750938, "loss": 0.4874, "step": 3944 }, { "epoch": 0.2925038926373545, "grad_norm": 0.3317430019378662, "learning_rate": 0.00016059014753688425, "loss": 0.4922, "step": 3945 }, { "epoch": 0.2925780381107733, "grad_norm": 0.35070154070854187, "learning_rate": 0.00016058014503625906, "loss": 0.5437, "step": 3946 }, { "epoch": 0.2926521835841922, "grad_norm": 0.36259734630584717, "learning_rate": 0.00016057014253563392, "loss": 0.5546, "step": 3947 }, { "epoch": 0.29272632905761103, "grad_norm": 0.3382718563079834, "learning_rate": 0.00016056014003500876, "loss": 0.5039, "step": 3948 }, { "epoch": 0.29280047453102986, "grad_norm": 0.3646923005580902, "learning_rate": 0.0001605501375343836, "loss": 0.5433, "step": 3949 }, { "epoch": 0.29287462000444875, "grad_norm": 0.39176145195961, "learning_rate": 0.00016054013503375843, "loss": 0.5938, "step": 3950 }, { "epoch": 0.2929487654778676, "grad_norm": 0.36378586292266846, "learning_rate": 0.0001605301325331333, "loss": 0.5309, "step": 3951 }, { "epoch": 0.2930229109512864, "grad_norm": 0.3581850528717041, "learning_rate": 0.00016052013003250813, "loss": 0.5429, "step": 3952 }, { "epoch": 0.2930970564247053, "grad_norm": 0.34166058897972107, "learning_rate": 0.00016051012753188297, "loss": 0.5017, "step": 3953 }, { "epoch": 0.2931712018981241, "grad_norm": 0.3411039412021637, "learning_rate": 0.0001605001250312578, "loss": 0.4998, "step": 3954 }, { "epoch": 0.29324534737154295, "grad_norm": 0.3706164062023163, "learning_rate": 0.00016049012253063267, "loss": 0.5229, "step": 3955 }, { "epoch": 0.29331949284496184, "grad_norm": 0.370395690202713, "learning_rate": 0.0001604801200300075, "loss": 0.5473, "step": 3956 }, { "epoch": 0.29339363831838067, "grad_norm": 0.3472321629524231, "learning_rate": 0.00016047011752938234, "loss": 0.5151, "step": 3957 }, { "epoch": 0.2934677837917995, "grad_norm": 0.3575213849544525, "learning_rate": 0.0001604601150287572, "loss": 0.4955, "step": 3958 }, { "epoch": 0.2935419292652184, "grad_norm": 0.3484896719455719, "learning_rate": 0.00016045011252813205, "loss": 0.4876, "step": 3959 }, { "epoch": 0.2936160747386372, "grad_norm": 0.37622225284576416, "learning_rate": 0.00016044011002750688, "loss": 0.4934, "step": 3960 }, { "epoch": 0.29369022021205604, "grad_norm": 0.33847326040267944, "learning_rate": 0.00016043010752688172, "loss": 0.5157, "step": 3961 }, { "epoch": 0.2937643656854749, "grad_norm": 0.36501890420913696, "learning_rate": 0.00016042010502625658, "loss": 0.486, "step": 3962 }, { "epoch": 0.29383851115889376, "grad_norm": 0.3552279770374298, "learning_rate": 0.00016041010252563142, "loss": 0.5236, "step": 3963 }, { "epoch": 0.2939126566323126, "grad_norm": 0.37486982345581055, "learning_rate": 0.00016040010002500626, "loss": 0.526, "step": 3964 }, { "epoch": 0.29398680210573147, "grad_norm": 0.370578795671463, "learning_rate": 0.0001603900975243811, "loss": 0.5652, "step": 3965 }, { "epoch": 0.2940609475791503, "grad_norm": 0.3578402101993561, "learning_rate": 0.00016038009502375596, "loss": 0.4913, "step": 3966 }, { "epoch": 0.29413509305256913, "grad_norm": 0.33607998490333557, "learning_rate": 0.0001603700925231308, "loss": 0.4789, "step": 3967 }, { "epoch": 0.294209238525988, "grad_norm": 0.38722550868988037, "learning_rate": 0.00016036009002250563, "loss": 0.5332, "step": 3968 }, { "epoch": 0.29428338399940684, "grad_norm": 0.37247389554977417, "learning_rate": 0.00016035008752188047, "loss": 0.5474, "step": 3969 }, { "epoch": 0.2943575294728257, "grad_norm": 0.3544765114784241, "learning_rate": 0.00016034008502125534, "loss": 0.5083, "step": 3970 }, { "epoch": 0.29443167494624456, "grad_norm": 0.35752007365226746, "learning_rate": 0.00016033008252063017, "loss": 0.5206, "step": 3971 }, { "epoch": 0.2945058204196634, "grad_norm": 0.36112740635871887, "learning_rate": 0.000160320080020005, "loss": 0.5174, "step": 3972 }, { "epoch": 0.2945799658930822, "grad_norm": 0.3784008026123047, "learning_rate": 0.00016031007751937985, "loss": 0.5688, "step": 3973 }, { "epoch": 0.29465411136650105, "grad_norm": 0.35536134243011475, "learning_rate": 0.0001603000750187547, "loss": 0.4897, "step": 3974 }, { "epoch": 0.29472825683991993, "grad_norm": 0.39411622285842896, "learning_rate": 0.00016029007251812952, "loss": 0.4976, "step": 3975 }, { "epoch": 0.29480240231333876, "grad_norm": 0.4020124077796936, "learning_rate": 0.00016028007001750438, "loss": 0.5904, "step": 3976 }, { "epoch": 0.2948765477867576, "grad_norm": 0.35982000827789307, "learning_rate": 0.00016027006751687922, "loss": 0.5492, "step": 3977 }, { "epoch": 0.2949506932601765, "grad_norm": 0.3702249228954315, "learning_rate": 0.00016026006501625409, "loss": 0.5219, "step": 3978 }, { "epoch": 0.2950248387335953, "grad_norm": 0.33178773522377014, "learning_rate": 0.0001602500625156289, "loss": 0.4994, "step": 3979 }, { "epoch": 0.29509898420701414, "grad_norm": 0.4044390916824341, "learning_rate": 0.00016024006001500376, "loss": 0.5839, "step": 3980 }, { "epoch": 0.295173129680433, "grad_norm": 0.34748324751853943, "learning_rate": 0.0001602300575143786, "loss": 0.5063, "step": 3981 }, { "epoch": 0.29524727515385185, "grad_norm": 0.35179615020751953, "learning_rate": 0.00016022005501375346, "loss": 0.5419, "step": 3982 }, { "epoch": 0.2953214206272707, "grad_norm": 0.36619338393211365, "learning_rate": 0.00016021005251312827, "loss": 0.5452, "step": 3983 }, { "epoch": 0.29539556610068957, "grad_norm": 0.3531129062175751, "learning_rate": 0.00016020005001250314, "loss": 0.5132, "step": 3984 }, { "epoch": 0.2954697115741084, "grad_norm": 0.37103918194770813, "learning_rate": 0.00016019004751187797, "loss": 0.557, "step": 3985 }, { "epoch": 0.2955438570475272, "grad_norm": 0.368247389793396, "learning_rate": 0.0001601800450112528, "loss": 0.5085, "step": 3986 }, { "epoch": 0.2956180025209461, "grad_norm": 0.3717050552368164, "learning_rate": 0.00016017004251062765, "loss": 0.5433, "step": 3987 }, { "epoch": 0.29569214799436494, "grad_norm": 0.39062556624412537, "learning_rate": 0.0001601600400100025, "loss": 0.5332, "step": 3988 }, { "epoch": 0.29576629346778377, "grad_norm": 0.3517850935459137, "learning_rate": 0.00016015003750937735, "loss": 0.5224, "step": 3989 }, { "epoch": 0.29584043894120265, "grad_norm": 0.3601027727127075, "learning_rate": 0.00016014003500875219, "loss": 0.522, "step": 3990 }, { "epoch": 0.2959145844146215, "grad_norm": 0.40231916308403015, "learning_rate": 0.00016013003250812705, "loss": 0.5542, "step": 3991 }, { "epoch": 0.2959887298880403, "grad_norm": 0.38911208510398865, "learning_rate": 0.0001601200300075019, "loss": 0.5242, "step": 3992 }, { "epoch": 0.2960628753614592, "grad_norm": 0.3717588186264038, "learning_rate": 0.00016011002750687672, "loss": 0.53, "step": 3993 }, { "epoch": 0.29613702083487803, "grad_norm": 0.369008332490921, "learning_rate": 0.00016010002500625156, "loss": 0.5396, "step": 3994 }, { "epoch": 0.29621116630829686, "grad_norm": 0.3760620355606079, "learning_rate": 0.00016009002250562643, "loss": 0.4855, "step": 3995 }, { "epoch": 0.29628531178171574, "grad_norm": 0.36975231766700745, "learning_rate": 0.00016008002000500126, "loss": 0.5256, "step": 3996 }, { "epoch": 0.2963594572551346, "grad_norm": 0.36148807406425476, "learning_rate": 0.0001600700175043761, "loss": 0.5145, "step": 3997 }, { "epoch": 0.2964336027285534, "grad_norm": 0.36933550238609314, "learning_rate": 0.00016006001500375094, "loss": 0.5765, "step": 3998 }, { "epoch": 0.2965077482019723, "grad_norm": 0.37279388308525085, "learning_rate": 0.0001600500125031258, "loss": 0.5145, "step": 3999 }, { "epoch": 0.2965818936753911, "grad_norm": 0.3484081029891968, "learning_rate": 0.00016004001000250064, "loss": 0.5418, "step": 4000 }, { "epoch": 0.29665603914880995, "grad_norm": 0.38228434324264526, "learning_rate": 0.00016003000750187547, "loss": 0.5211, "step": 4001 }, { "epoch": 0.29673018462222883, "grad_norm": 0.3695968985557556, "learning_rate": 0.0001600200050012503, "loss": 0.5587, "step": 4002 }, { "epoch": 0.29680433009564766, "grad_norm": 0.35244524478912354, "learning_rate": 0.00016001000250062518, "loss": 0.5117, "step": 4003 }, { "epoch": 0.2968784755690665, "grad_norm": 0.37051695585250854, "learning_rate": 0.00016, "loss": 0.5273, "step": 4004 }, { "epoch": 0.2969526210424854, "grad_norm": 0.3502669930458069, "learning_rate": 0.00015998999749937485, "loss": 0.5115, "step": 4005 }, { "epoch": 0.2970267665159042, "grad_norm": 0.33197668194770813, "learning_rate": 0.0001599799949987497, "loss": 0.5157, "step": 4006 }, { "epoch": 0.29710091198932304, "grad_norm": 0.39661309123039246, "learning_rate": 0.00015996999249812455, "loss": 0.564, "step": 4007 }, { "epoch": 0.2971750574627419, "grad_norm": 0.37122493982315063, "learning_rate": 0.0001599599899974994, "loss": 0.5808, "step": 4008 }, { "epoch": 0.29724920293616075, "grad_norm": 0.35254213213920593, "learning_rate": 0.00015994998749687423, "loss": 0.5448, "step": 4009 }, { "epoch": 0.2973233484095796, "grad_norm": 0.34533223509788513, "learning_rate": 0.00015993998499624906, "loss": 0.5047, "step": 4010 }, { "epoch": 0.29739749388299846, "grad_norm": 0.4069889485836029, "learning_rate": 0.00015992998249562393, "loss": 0.5407, "step": 4011 }, { "epoch": 0.2974716393564173, "grad_norm": 0.3407602608203888, "learning_rate": 0.00015991997999499874, "loss": 0.4871, "step": 4012 }, { "epoch": 0.2975457848298361, "grad_norm": 0.36621248722076416, "learning_rate": 0.0001599099774943736, "loss": 0.5313, "step": 4013 }, { "epoch": 0.297619930303255, "grad_norm": 0.35396698117256165, "learning_rate": 0.00015989997499374844, "loss": 0.5634, "step": 4014 }, { "epoch": 0.29769407577667384, "grad_norm": 0.3550879955291748, "learning_rate": 0.0001598899724931233, "loss": 0.5414, "step": 4015 }, { "epoch": 0.29776822125009267, "grad_norm": 0.3559340834617615, "learning_rate": 0.0001598799699924981, "loss": 0.507, "step": 4016 }, { "epoch": 0.29784236672351155, "grad_norm": 0.34013742208480835, "learning_rate": 0.00015986996749187298, "loss": 0.4969, "step": 4017 }, { "epoch": 0.2979165121969304, "grad_norm": 0.3727584779262543, "learning_rate": 0.00015985996499124781, "loss": 0.525, "step": 4018 }, { "epoch": 0.2979906576703492, "grad_norm": 0.3956488072872162, "learning_rate": 0.00015984996249062268, "loss": 0.5818, "step": 4019 }, { "epoch": 0.2980648031437681, "grad_norm": 0.3867981433868408, "learning_rate": 0.0001598399599899975, "loss": 0.5318, "step": 4020 }, { "epoch": 0.2981389486171869, "grad_norm": 0.3732658624649048, "learning_rate": 0.00015982995748937235, "loss": 0.5477, "step": 4021 }, { "epoch": 0.29821309409060576, "grad_norm": 0.3599327504634857, "learning_rate": 0.0001598199549887472, "loss": 0.5363, "step": 4022 }, { "epoch": 0.29828723956402464, "grad_norm": 0.4112119674682617, "learning_rate": 0.00015980995248812203, "loss": 0.5759, "step": 4023 }, { "epoch": 0.29836138503744347, "grad_norm": 0.39399659633636475, "learning_rate": 0.0001597999499874969, "loss": 0.5792, "step": 4024 }, { "epoch": 0.2984355305108623, "grad_norm": 0.3639194965362549, "learning_rate": 0.00015978994748687173, "loss": 0.5122, "step": 4025 }, { "epoch": 0.2985096759842812, "grad_norm": 0.3862966001033783, "learning_rate": 0.00015977994498624656, "loss": 0.5478, "step": 4026 }, { "epoch": 0.2985838214577, "grad_norm": 0.37826642394065857, "learning_rate": 0.0001597699424856214, "loss": 0.5515, "step": 4027 }, { "epoch": 0.29865796693111885, "grad_norm": 0.353733628988266, "learning_rate": 0.00015975993998499627, "loss": 0.4912, "step": 4028 }, { "epoch": 0.2987321124045377, "grad_norm": 0.38914716243743896, "learning_rate": 0.0001597499374843711, "loss": 0.5553, "step": 4029 }, { "epoch": 0.29880625787795656, "grad_norm": 0.3868093490600586, "learning_rate": 0.00015973993498374594, "loss": 0.5082, "step": 4030 }, { "epoch": 0.2988804033513754, "grad_norm": 0.36759594082832336, "learning_rate": 0.00015972993248312078, "loss": 0.5804, "step": 4031 }, { "epoch": 0.2989545488247942, "grad_norm": 0.36228832602500916, "learning_rate": 0.00015971992998249564, "loss": 0.5037, "step": 4032 }, { "epoch": 0.2990286942982131, "grad_norm": 0.3493717610836029, "learning_rate": 0.00015970992748187048, "loss": 0.5165, "step": 4033 }, { "epoch": 0.29910283977163193, "grad_norm": 0.32856541872024536, "learning_rate": 0.00015969992498124532, "loss": 0.4951, "step": 4034 }, { "epoch": 0.29917698524505076, "grad_norm": 0.35782667994499207, "learning_rate": 0.00015968992248062015, "loss": 0.5322, "step": 4035 }, { "epoch": 0.29925113071846965, "grad_norm": 0.36489975452423096, "learning_rate": 0.00015967991997999502, "loss": 0.506, "step": 4036 }, { "epoch": 0.2993252761918885, "grad_norm": 0.3861667513847351, "learning_rate": 0.00015966991747936985, "loss": 0.517, "step": 4037 }, { "epoch": 0.2993994216653073, "grad_norm": 0.377248615026474, "learning_rate": 0.0001596599149787447, "loss": 0.5137, "step": 4038 }, { "epoch": 0.2994735671387262, "grad_norm": 0.36739224195480347, "learning_rate": 0.00015964991247811953, "loss": 0.5292, "step": 4039 }, { "epoch": 0.299547712612145, "grad_norm": 0.35575205087661743, "learning_rate": 0.0001596399099774944, "loss": 0.5363, "step": 4040 }, { "epoch": 0.29962185808556385, "grad_norm": 0.35003021359443665, "learning_rate": 0.00015962990747686923, "loss": 0.5047, "step": 4041 }, { "epoch": 0.29969600355898274, "grad_norm": 0.3954920768737793, "learning_rate": 0.00015961990497624407, "loss": 0.5301, "step": 4042 }, { "epoch": 0.29977014903240157, "grad_norm": 0.3598799407482147, "learning_rate": 0.0001596099024756189, "loss": 0.5352, "step": 4043 }, { "epoch": 0.2998442945058204, "grad_norm": 0.3522927165031433, "learning_rate": 0.00015959989997499377, "loss": 0.5258, "step": 4044 }, { "epoch": 0.2999184399792393, "grad_norm": 0.33578965067863464, "learning_rate": 0.0001595898974743686, "loss": 0.511, "step": 4045 }, { "epoch": 0.2999925854526581, "grad_norm": 0.35926783084869385, "learning_rate": 0.00015957989497374344, "loss": 0.5326, "step": 4046 }, { "epoch": 0.30006673092607694, "grad_norm": 0.37652483582496643, "learning_rate": 0.00015956989247311828, "loss": 0.5446, "step": 4047 }, { "epoch": 0.3001408763994958, "grad_norm": 0.37628257274627686, "learning_rate": 0.00015955988997249314, "loss": 0.5559, "step": 4048 }, { "epoch": 0.30021502187291466, "grad_norm": 0.3356396555900574, "learning_rate": 0.00015954988747186795, "loss": 0.4988, "step": 4049 }, { "epoch": 0.3002891673463335, "grad_norm": 0.3667522370815277, "learning_rate": 0.00015953988497124282, "loss": 0.5738, "step": 4050 }, { "epoch": 0.30036331281975237, "grad_norm": 0.3712879717350006, "learning_rate": 0.00015952988247061765, "loss": 0.5209, "step": 4051 }, { "epoch": 0.3004374582931712, "grad_norm": 0.3679860830307007, "learning_rate": 0.00015951987996999252, "loss": 0.5556, "step": 4052 }, { "epoch": 0.30051160376659003, "grad_norm": 0.3702663779258728, "learning_rate": 0.00015950987746936733, "loss": 0.5731, "step": 4053 }, { "epoch": 0.3005857492400089, "grad_norm": 0.3740931451320648, "learning_rate": 0.0001594998749687422, "loss": 0.5357, "step": 4054 }, { "epoch": 0.30065989471342774, "grad_norm": 0.36415183544158936, "learning_rate": 0.00015948987246811703, "loss": 0.5414, "step": 4055 }, { "epoch": 0.3007340401868466, "grad_norm": 0.35504385828971863, "learning_rate": 0.0001594798699674919, "loss": 0.5383, "step": 4056 }, { "epoch": 0.30080818566026546, "grad_norm": 0.34725698828697205, "learning_rate": 0.00015946986746686673, "loss": 0.529, "step": 4057 }, { "epoch": 0.3008823311336843, "grad_norm": 0.38940998911857605, "learning_rate": 0.00015945986496624157, "loss": 0.539, "step": 4058 }, { "epoch": 0.3009564766071031, "grad_norm": 0.35077518224716187, "learning_rate": 0.0001594498624656164, "loss": 0.5087, "step": 4059 }, { "epoch": 0.301030622080522, "grad_norm": 0.35892966389656067, "learning_rate": 0.00015943985996499124, "loss": 0.5128, "step": 4060 }, { "epoch": 0.30110476755394083, "grad_norm": 0.32215508818626404, "learning_rate": 0.0001594298574643661, "loss": 0.4824, "step": 4061 }, { "epoch": 0.30117891302735966, "grad_norm": 0.3656335473060608, "learning_rate": 0.00015941985496374094, "loss": 0.5544, "step": 4062 }, { "epoch": 0.30125305850077855, "grad_norm": 0.3468722403049469, "learning_rate": 0.00015940985246311578, "loss": 0.4984, "step": 4063 }, { "epoch": 0.3013272039741974, "grad_norm": 0.35011231899261475, "learning_rate": 0.00015939984996249062, "loss": 0.4916, "step": 4064 }, { "epoch": 0.3014013494476162, "grad_norm": 0.3436737358570099, "learning_rate": 0.00015938984746186548, "loss": 0.5294, "step": 4065 }, { "epoch": 0.3014754949210351, "grad_norm": 0.3615875244140625, "learning_rate": 0.00015937984496124032, "loss": 0.5417, "step": 4066 }, { "epoch": 0.3015496403944539, "grad_norm": 0.36190465092658997, "learning_rate": 0.00015936984246061518, "loss": 0.5604, "step": 4067 }, { "epoch": 0.30162378586787275, "grad_norm": 0.35017842054367065, "learning_rate": 0.00015935983995999, "loss": 0.5134, "step": 4068 }, { "epoch": 0.30169793134129164, "grad_norm": 0.3596512973308563, "learning_rate": 0.00015934983745936486, "loss": 0.5912, "step": 4069 }, { "epoch": 0.30177207681471047, "grad_norm": 0.3592354357242584, "learning_rate": 0.0001593398349587397, "loss": 0.5441, "step": 4070 }, { "epoch": 0.3018462222881293, "grad_norm": 0.3731875717639923, "learning_rate": 0.00015932983245811453, "loss": 0.5369, "step": 4071 }, { "epoch": 0.3019203677615482, "grad_norm": 0.35013845562934875, "learning_rate": 0.00015931982995748937, "loss": 0.5126, "step": 4072 }, { "epoch": 0.301994513234967, "grad_norm": 0.3775531053543091, "learning_rate": 0.00015930982745686423, "loss": 0.5329, "step": 4073 }, { "epoch": 0.30206865870838584, "grad_norm": 0.3568129241466522, "learning_rate": 0.00015929982495623907, "loss": 0.5472, "step": 4074 }, { "epoch": 0.3021428041818047, "grad_norm": 0.3960687816143036, "learning_rate": 0.0001592898224556139, "loss": 0.5891, "step": 4075 }, { "epoch": 0.30221694965522355, "grad_norm": 0.3744184374809265, "learning_rate": 0.00015927981995498874, "loss": 0.5331, "step": 4076 }, { "epoch": 0.3022910951286424, "grad_norm": 0.3542018234729767, "learning_rate": 0.0001592698174543636, "loss": 0.5209, "step": 4077 }, { "epoch": 0.30236524060206127, "grad_norm": 0.3578112721443176, "learning_rate": 0.00015925981495373845, "loss": 0.532, "step": 4078 }, { "epoch": 0.3024393860754801, "grad_norm": 0.36466196179389954, "learning_rate": 0.00015924981245311328, "loss": 0.5304, "step": 4079 }, { "epoch": 0.30251353154889893, "grad_norm": 0.35446664690971375, "learning_rate": 0.00015923980995248812, "loss": 0.5193, "step": 4080 }, { "epoch": 0.3025876770223178, "grad_norm": 0.36852818727493286, "learning_rate": 0.00015922980745186298, "loss": 0.5256, "step": 4081 }, { "epoch": 0.30266182249573664, "grad_norm": 0.3709505796432495, "learning_rate": 0.00015921980495123782, "loss": 0.5455, "step": 4082 }, { "epoch": 0.3027359679691555, "grad_norm": 0.35271862149238586, "learning_rate": 0.00015920980245061266, "loss": 0.5242, "step": 4083 }, { "epoch": 0.30281011344257436, "grad_norm": 0.33031851053237915, "learning_rate": 0.0001591997999499875, "loss": 0.4857, "step": 4084 }, { "epoch": 0.3028842589159932, "grad_norm": 0.38353532552719116, "learning_rate": 0.00015918979744936236, "loss": 0.6002, "step": 4085 }, { "epoch": 0.302958404389412, "grad_norm": 0.34422823786735535, "learning_rate": 0.00015917979494873717, "loss": 0.4851, "step": 4086 }, { "epoch": 0.30303254986283085, "grad_norm": 0.3623892068862915, "learning_rate": 0.00015916979244811203, "loss": 0.5368, "step": 4087 }, { "epoch": 0.30310669533624973, "grad_norm": 0.3662673830986023, "learning_rate": 0.00015915978994748687, "loss": 0.5553, "step": 4088 }, { "epoch": 0.30318084080966856, "grad_norm": 0.3625752329826355, "learning_rate": 0.00015914978744686173, "loss": 0.5435, "step": 4089 }, { "epoch": 0.3032549862830874, "grad_norm": 0.3684788644313812, "learning_rate": 0.00015913978494623657, "loss": 0.526, "step": 4090 }, { "epoch": 0.3033291317565063, "grad_norm": 0.37446197867393494, "learning_rate": 0.0001591297824456114, "loss": 0.57, "step": 4091 }, { "epoch": 0.3034032772299251, "grad_norm": 0.39754658937454224, "learning_rate": 0.00015911977994498625, "loss": 0.5802, "step": 4092 }, { "epoch": 0.30347742270334394, "grad_norm": 0.380154013633728, "learning_rate": 0.0001591097774443611, "loss": 0.5549, "step": 4093 }, { "epoch": 0.3035515681767628, "grad_norm": 0.3581731915473938, "learning_rate": 0.00015909977494373595, "loss": 0.551, "step": 4094 }, { "epoch": 0.30362571365018165, "grad_norm": 0.3650974631309509, "learning_rate": 0.00015908977244311078, "loss": 0.5334, "step": 4095 }, { "epoch": 0.3036998591236005, "grad_norm": 0.34302130341529846, "learning_rate": 0.00015907976994248562, "loss": 0.5239, "step": 4096 }, { "epoch": 0.30377400459701936, "grad_norm": 0.3576764166355133, "learning_rate": 0.00015906976744186046, "loss": 0.5635, "step": 4097 }, { "epoch": 0.3038481500704382, "grad_norm": 0.3519270420074463, "learning_rate": 0.00015905976494123532, "loss": 0.4928, "step": 4098 }, { "epoch": 0.303922295543857, "grad_norm": 0.36679762601852417, "learning_rate": 0.00015904976244061016, "loss": 0.5368, "step": 4099 }, { "epoch": 0.3039964410172759, "grad_norm": 0.3840171694755554, "learning_rate": 0.00015903975993998502, "loss": 0.5917, "step": 4100 }, { "epoch": 0.30407058649069474, "grad_norm": 0.33106502890586853, "learning_rate": 0.00015902975743935983, "loss": 0.5016, "step": 4101 }, { "epoch": 0.30414473196411357, "grad_norm": 0.3404703736305237, "learning_rate": 0.0001590197549387347, "loss": 0.5065, "step": 4102 }, { "epoch": 0.30421887743753245, "grad_norm": 0.3664514124393463, "learning_rate": 0.00015900975243810954, "loss": 0.5363, "step": 4103 }, { "epoch": 0.3042930229109513, "grad_norm": 0.37743622064590454, "learning_rate": 0.0001589997499374844, "loss": 0.5394, "step": 4104 }, { "epoch": 0.3043671683843701, "grad_norm": 0.3764043152332306, "learning_rate": 0.0001589897474368592, "loss": 0.5378, "step": 4105 }, { "epoch": 0.304441313857789, "grad_norm": 0.3931509852409363, "learning_rate": 0.00015897974493623407, "loss": 0.5707, "step": 4106 }, { "epoch": 0.3045154593312078, "grad_norm": 0.35728803277015686, "learning_rate": 0.0001589697424356089, "loss": 0.521, "step": 4107 }, { "epoch": 0.30458960480462666, "grad_norm": 0.3438619375228882, "learning_rate": 0.00015895973993498378, "loss": 0.4833, "step": 4108 }, { "epoch": 0.30466375027804554, "grad_norm": 0.36911559104919434, "learning_rate": 0.00015894973743435859, "loss": 0.5487, "step": 4109 }, { "epoch": 0.30473789575146437, "grad_norm": 0.3570270836353302, "learning_rate": 0.00015893973493373345, "loss": 0.5263, "step": 4110 }, { "epoch": 0.3048120412248832, "grad_norm": 0.38454028964042664, "learning_rate": 0.00015892973243310829, "loss": 0.5621, "step": 4111 }, { "epoch": 0.3048861866983021, "grad_norm": 0.3667009174823761, "learning_rate": 0.00015891972993248312, "loss": 0.5322, "step": 4112 }, { "epoch": 0.3049603321717209, "grad_norm": 0.3356033265590668, "learning_rate": 0.00015890972743185796, "loss": 0.462, "step": 4113 }, { "epoch": 0.30503447764513975, "grad_norm": 0.3573039472103119, "learning_rate": 0.00015889972493123282, "loss": 0.5514, "step": 4114 }, { "epoch": 0.30510862311855863, "grad_norm": 0.4111579954624176, "learning_rate": 0.00015888972243060766, "loss": 0.5749, "step": 4115 }, { "epoch": 0.30518276859197746, "grad_norm": 0.36047500371932983, "learning_rate": 0.0001588797199299825, "loss": 0.5636, "step": 4116 }, { "epoch": 0.3052569140653963, "grad_norm": 0.36978307366371155, "learning_rate": 0.00015886971742935734, "loss": 0.544, "step": 4117 }, { "epoch": 0.3053310595388152, "grad_norm": 0.37328270077705383, "learning_rate": 0.0001588597149287322, "loss": 0.5263, "step": 4118 }, { "epoch": 0.305405205012234, "grad_norm": 0.3558354377746582, "learning_rate": 0.00015884971242810704, "loss": 0.5542, "step": 4119 }, { "epoch": 0.30547935048565283, "grad_norm": 0.4023416340351105, "learning_rate": 0.00015883970992748187, "loss": 0.52, "step": 4120 }, { "epoch": 0.3055534959590717, "grad_norm": 0.39063557982444763, "learning_rate": 0.0001588297074268567, "loss": 0.5629, "step": 4121 }, { "epoch": 0.30562764143249055, "grad_norm": 0.3617022931575775, "learning_rate": 0.00015881970492623158, "loss": 0.536, "step": 4122 }, { "epoch": 0.3057017869059094, "grad_norm": 0.3590830862522125, "learning_rate": 0.00015880970242560639, "loss": 0.5302, "step": 4123 }, { "epoch": 0.30577593237932826, "grad_norm": 0.34919580817222595, "learning_rate": 0.00015879969992498125, "loss": 0.5131, "step": 4124 }, { "epoch": 0.3058500778527471, "grad_norm": 0.3856774866580963, "learning_rate": 0.0001587896974243561, "loss": 0.5361, "step": 4125 }, { "epoch": 0.3059242233261659, "grad_norm": 0.3794576823711395, "learning_rate": 0.00015877969492373095, "loss": 0.5327, "step": 4126 }, { "epoch": 0.3059983687995848, "grad_norm": 0.35037508606910706, "learning_rate": 0.0001587696924231058, "loss": 0.5427, "step": 4127 }, { "epoch": 0.30607251427300364, "grad_norm": 0.34288913011550903, "learning_rate": 0.00015875968992248063, "loss": 0.532, "step": 4128 }, { "epoch": 0.30614665974642247, "grad_norm": 0.366894394159317, "learning_rate": 0.00015874968742185546, "loss": 0.532, "step": 4129 }, { "epoch": 0.30622080521984135, "grad_norm": 0.3284915089607239, "learning_rate": 0.00015873968492123033, "loss": 0.474, "step": 4130 }, { "epoch": 0.3062949506932602, "grad_norm": 0.37263646721839905, "learning_rate": 0.00015872968242060516, "loss": 0.6109, "step": 4131 }, { "epoch": 0.306369096166679, "grad_norm": 0.36157727241516113, "learning_rate": 0.00015871967991998, "loss": 0.5255, "step": 4132 }, { "epoch": 0.3064432416400979, "grad_norm": 0.360089510679245, "learning_rate": 0.00015870967741935487, "loss": 0.5181, "step": 4133 }, { "epoch": 0.3065173871135167, "grad_norm": 0.3682079017162323, "learning_rate": 0.00015869967491872968, "loss": 0.5195, "step": 4134 }, { "epoch": 0.30659153258693556, "grad_norm": 0.35399356484413147, "learning_rate": 0.00015868967241810454, "loss": 0.521, "step": 4135 }, { "epoch": 0.30666567806035444, "grad_norm": 0.3875964879989624, "learning_rate": 0.00015867966991747938, "loss": 0.5366, "step": 4136 }, { "epoch": 0.30673982353377327, "grad_norm": 0.37401750683784485, "learning_rate": 0.00015866966741685424, "loss": 0.5593, "step": 4137 }, { "epoch": 0.3068139690071921, "grad_norm": 0.34614282846450806, "learning_rate": 0.00015865966491622905, "loss": 0.5257, "step": 4138 }, { "epoch": 0.306888114480611, "grad_norm": 0.35527288913726807, "learning_rate": 0.00015864966241560391, "loss": 0.5409, "step": 4139 }, { "epoch": 0.3069622599540298, "grad_norm": 0.3389769494533539, "learning_rate": 0.00015863965991497875, "loss": 0.5355, "step": 4140 }, { "epoch": 0.30703640542744864, "grad_norm": 0.3787468373775482, "learning_rate": 0.00015862965741435362, "loss": 0.5609, "step": 4141 }, { "epoch": 0.30711055090086753, "grad_norm": 0.3453628718852997, "learning_rate": 0.00015861965491372843, "loss": 0.5247, "step": 4142 }, { "epoch": 0.30718469637428636, "grad_norm": 0.37349146604537964, "learning_rate": 0.0001586096524131033, "loss": 0.5177, "step": 4143 }, { "epoch": 0.3072588418477052, "grad_norm": 0.3539080321788788, "learning_rate": 0.00015859964991247813, "loss": 0.489, "step": 4144 }, { "epoch": 0.307332987321124, "grad_norm": 0.3705402612686157, "learning_rate": 0.000158589647411853, "loss": 0.5367, "step": 4145 }, { "epoch": 0.3074071327945429, "grad_norm": 0.3488110303878784, "learning_rate": 0.0001585796449112278, "loss": 0.4858, "step": 4146 }, { "epoch": 0.30748127826796173, "grad_norm": 0.3771372437477112, "learning_rate": 0.00015856964241060267, "loss": 0.5948, "step": 4147 }, { "epoch": 0.30755542374138056, "grad_norm": 0.3651280105113983, "learning_rate": 0.0001585596399099775, "loss": 0.506, "step": 4148 }, { "epoch": 0.30762956921479945, "grad_norm": 0.3644495904445648, "learning_rate": 0.00015854963740935234, "loss": 0.5354, "step": 4149 }, { "epoch": 0.3077037146882183, "grad_norm": 0.35131797194480896, "learning_rate": 0.00015853963490872718, "loss": 0.53, "step": 4150 }, { "epoch": 0.3077778601616371, "grad_norm": 0.369128555059433, "learning_rate": 0.00015852963240810204, "loss": 0.5242, "step": 4151 }, { "epoch": 0.307852005635056, "grad_norm": 0.3996574282646179, "learning_rate": 0.00015851962990747688, "loss": 0.5018, "step": 4152 }, { "epoch": 0.3079261511084748, "grad_norm": 0.37228694558143616, "learning_rate": 0.00015850962740685172, "loss": 0.5348, "step": 4153 }, { "epoch": 0.30800029658189365, "grad_norm": 0.34642016887664795, "learning_rate": 0.00015849962490622655, "loss": 0.48, "step": 4154 }, { "epoch": 0.30807444205531254, "grad_norm": 0.3543301224708557, "learning_rate": 0.00015848962240560142, "loss": 0.468, "step": 4155 }, { "epoch": 0.30814858752873137, "grad_norm": 0.36531779170036316, "learning_rate": 0.00015847961990497625, "loss": 0.4936, "step": 4156 }, { "epoch": 0.3082227330021502, "grad_norm": 0.40166789293289185, "learning_rate": 0.0001584696174043511, "loss": 0.604, "step": 4157 }, { "epoch": 0.3082968784755691, "grad_norm": 0.3527965247631073, "learning_rate": 0.00015845961490372593, "loss": 0.4995, "step": 4158 }, { "epoch": 0.3083710239489879, "grad_norm": 0.3552376329898834, "learning_rate": 0.0001584496124031008, "loss": 0.5279, "step": 4159 }, { "epoch": 0.30844516942240674, "grad_norm": 0.35990700125694275, "learning_rate": 0.00015843960990247563, "loss": 0.5697, "step": 4160 }, { "epoch": 0.3085193148958256, "grad_norm": 0.36470916867256165, "learning_rate": 0.00015842960740185047, "loss": 0.5415, "step": 4161 }, { "epoch": 0.30859346036924445, "grad_norm": 0.3675316274166107, "learning_rate": 0.0001584196049012253, "loss": 0.5323, "step": 4162 }, { "epoch": 0.3086676058426633, "grad_norm": 0.33732908964157104, "learning_rate": 0.00015840960240060017, "loss": 0.4893, "step": 4163 }, { "epoch": 0.30874175131608217, "grad_norm": 0.35558605194091797, "learning_rate": 0.000158399599899975, "loss": 0.4909, "step": 4164 }, { "epoch": 0.308815896789501, "grad_norm": 0.3689940273761749, "learning_rate": 0.00015838959739934984, "loss": 0.4976, "step": 4165 }, { "epoch": 0.30889004226291983, "grad_norm": 0.3794845938682556, "learning_rate": 0.0001583795948987247, "loss": 0.5339, "step": 4166 }, { "epoch": 0.3089641877363387, "grad_norm": 0.4010487496852875, "learning_rate": 0.00015836959239809954, "loss": 0.5396, "step": 4167 }, { "epoch": 0.30903833320975754, "grad_norm": 0.37855517864227295, "learning_rate": 0.00015835958989747438, "loss": 0.4547, "step": 4168 }, { "epoch": 0.3091124786831764, "grad_norm": 0.3654952049255371, "learning_rate": 0.00015834958739684922, "loss": 0.5519, "step": 4169 }, { "epoch": 0.30918662415659526, "grad_norm": 0.3705197870731354, "learning_rate": 0.00015833958489622408, "loss": 0.5251, "step": 4170 }, { "epoch": 0.3092607696300141, "grad_norm": 0.3837865889072418, "learning_rate": 0.0001583295823955989, "loss": 0.5609, "step": 4171 }, { "epoch": 0.3093349151034329, "grad_norm": 0.36137086153030396, "learning_rate": 0.00015831957989497376, "loss": 0.5416, "step": 4172 }, { "epoch": 0.3094090605768518, "grad_norm": 0.33942902088165283, "learning_rate": 0.0001583095773943486, "loss": 0.4751, "step": 4173 }, { "epoch": 0.30948320605027063, "grad_norm": 0.37732943892478943, "learning_rate": 0.00015829957489372346, "loss": 0.5491, "step": 4174 }, { "epoch": 0.30955735152368946, "grad_norm": 0.3830644488334656, "learning_rate": 0.00015828957239309827, "loss": 0.5534, "step": 4175 }, { "epoch": 0.30963149699710835, "grad_norm": 0.3483121991157532, "learning_rate": 0.00015827956989247313, "loss": 0.502, "step": 4176 }, { "epoch": 0.3097056424705272, "grad_norm": 0.37677496671676636, "learning_rate": 0.00015826956739184797, "loss": 0.6086, "step": 4177 }, { "epoch": 0.309779787943946, "grad_norm": 0.35031214356422424, "learning_rate": 0.00015825956489122283, "loss": 0.5088, "step": 4178 }, { "epoch": 0.3098539334173649, "grad_norm": 0.32977360486984253, "learning_rate": 0.00015824956239059764, "loss": 0.4758, "step": 4179 }, { "epoch": 0.3099280788907837, "grad_norm": 0.35538461804389954, "learning_rate": 0.0001582395598899725, "loss": 0.5128, "step": 4180 }, { "epoch": 0.31000222436420255, "grad_norm": 0.41398587822914124, "learning_rate": 0.00015822955738934734, "loss": 0.55, "step": 4181 }, { "epoch": 0.31007636983762144, "grad_norm": 0.3638046085834503, "learning_rate": 0.0001582195548887222, "loss": 0.4902, "step": 4182 }, { "epoch": 0.31015051531104026, "grad_norm": 0.3893817663192749, "learning_rate": 0.00015820955238809702, "loss": 0.5413, "step": 4183 }, { "epoch": 0.3102246607844591, "grad_norm": 0.3765319585800171, "learning_rate": 0.00015819954988747188, "loss": 0.5363, "step": 4184 }, { "epoch": 0.310298806257878, "grad_norm": 0.42014580965042114, "learning_rate": 0.00015818954738684672, "loss": 0.5739, "step": 4185 }, { "epoch": 0.3103729517312968, "grad_norm": 0.3793708384037018, "learning_rate": 0.00015817954488622156, "loss": 0.4984, "step": 4186 }, { "epoch": 0.31044709720471564, "grad_norm": 0.3904971778392792, "learning_rate": 0.0001581695423855964, "loss": 0.5473, "step": 4187 }, { "epoch": 0.3105212426781345, "grad_norm": 0.35482290387153625, "learning_rate": 0.00015815953988497126, "loss": 0.5253, "step": 4188 }, { "epoch": 0.31059538815155335, "grad_norm": 0.3756190240383148, "learning_rate": 0.0001581495373843461, "loss": 0.5265, "step": 4189 }, { "epoch": 0.3106695336249722, "grad_norm": 0.3608039617538452, "learning_rate": 0.00015813953488372093, "loss": 0.5309, "step": 4190 }, { "epoch": 0.31074367909839107, "grad_norm": 0.35763946175575256, "learning_rate": 0.00015812953238309577, "loss": 0.5106, "step": 4191 }, { "epoch": 0.3108178245718099, "grad_norm": 0.3789529800415039, "learning_rate": 0.00015811952988247063, "loss": 0.5191, "step": 4192 }, { "epoch": 0.3108919700452287, "grad_norm": 0.3552412688732147, "learning_rate": 0.00015810952738184547, "loss": 0.4929, "step": 4193 }, { "epoch": 0.3109661155186476, "grad_norm": 0.4032411277294159, "learning_rate": 0.0001580995248812203, "loss": 0.5956, "step": 4194 }, { "epoch": 0.31104026099206644, "grad_norm": 0.3935663104057312, "learning_rate": 0.00015808952238059514, "loss": 0.6194, "step": 4195 }, { "epoch": 0.31111440646548527, "grad_norm": 0.34442251920700073, "learning_rate": 0.00015807951987997, "loss": 0.4795, "step": 4196 }, { "epoch": 0.31118855193890416, "grad_norm": 0.3629276752471924, "learning_rate": 0.00015806951737934485, "loss": 0.5204, "step": 4197 }, { "epoch": 0.311262697412323, "grad_norm": 0.3734612464904785, "learning_rate": 0.00015805951487871968, "loss": 0.5232, "step": 4198 }, { "epoch": 0.3113368428857418, "grad_norm": 0.3765210211277008, "learning_rate": 0.00015804951237809452, "loss": 0.5235, "step": 4199 }, { "epoch": 0.3114109883591607, "grad_norm": 0.3899495601654053, "learning_rate": 0.00015803950987746938, "loss": 0.5011, "step": 4200 }, { "epoch": 0.31148513383257953, "grad_norm": 0.3745100498199463, "learning_rate": 0.00015802950737684422, "loss": 0.5436, "step": 4201 }, { "epoch": 0.31155927930599836, "grad_norm": 0.41309666633605957, "learning_rate": 0.00015801950487621906, "loss": 0.5386, "step": 4202 }, { "epoch": 0.3116334247794172, "grad_norm": 0.3708886206150055, "learning_rate": 0.00015800950237559392, "loss": 0.5479, "step": 4203 }, { "epoch": 0.3117075702528361, "grad_norm": 0.33386364579200745, "learning_rate": 0.00015799949987496876, "loss": 0.4901, "step": 4204 }, { "epoch": 0.3117817157262549, "grad_norm": 0.3691994249820709, "learning_rate": 0.0001579894973743436, "loss": 0.5556, "step": 4205 }, { "epoch": 0.31185586119967373, "grad_norm": 0.3645547926425934, "learning_rate": 0.00015797949487371843, "loss": 0.5499, "step": 4206 }, { "epoch": 0.3119300066730926, "grad_norm": 0.40353092551231384, "learning_rate": 0.0001579694923730933, "loss": 0.5372, "step": 4207 }, { "epoch": 0.31200415214651145, "grad_norm": 0.38109374046325684, "learning_rate": 0.0001579594898724681, "loss": 0.5167, "step": 4208 }, { "epoch": 0.3120782976199303, "grad_norm": 0.3568698465824127, "learning_rate": 0.00015794948737184297, "loss": 0.5544, "step": 4209 }, { "epoch": 0.31215244309334916, "grad_norm": 0.38137903809547424, "learning_rate": 0.0001579394848712178, "loss": 0.5486, "step": 4210 }, { "epoch": 0.312226588566768, "grad_norm": 0.3243560194969177, "learning_rate": 0.00015792948237059267, "loss": 0.4986, "step": 4211 }, { "epoch": 0.3123007340401868, "grad_norm": 0.36378777027130127, "learning_rate": 0.00015791947986996748, "loss": 0.5176, "step": 4212 }, { "epoch": 0.3123748795136057, "grad_norm": 0.3507065773010254, "learning_rate": 0.00015790947736934235, "loss": 0.5255, "step": 4213 }, { "epoch": 0.31244902498702454, "grad_norm": 0.3499090373516083, "learning_rate": 0.00015789947486871718, "loss": 0.4938, "step": 4214 }, { "epoch": 0.31252317046044337, "grad_norm": 0.38815852999687195, "learning_rate": 0.00015788947236809205, "loss": 0.5368, "step": 4215 }, { "epoch": 0.31259731593386225, "grad_norm": 0.36502891778945923, "learning_rate": 0.00015787946986746686, "loss": 0.5199, "step": 4216 }, { "epoch": 0.3126714614072811, "grad_norm": 0.3531673848628998, "learning_rate": 0.00015786946736684172, "loss": 0.5171, "step": 4217 }, { "epoch": 0.3127456068806999, "grad_norm": 0.34533557295799255, "learning_rate": 0.00015785946486621656, "loss": 0.5257, "step": 4218 }, { "epoch": 0.3128197523541188, "grad_norm": 0.38106030225753784, "learning_rate": 0.00015784946236559142, "loss": 0.5403, "step": 4219 }, { "epoch": 0.3128938978275376, "grad_norm": 0.384890615940094, "learning_rate": 0.00015783945986496623, "loss": 0.559, "step": 4220 }, { "epoch": 0.31296804330095646, "grad_norm": 0.4041353464126587, "learning_rate": 0.0001578294573643411, "loss": 0.6286, "step": 4221 }, { "epoch": 0.31304218877437534, "grad_norm": 0.3745250403881073, "learning_rate": 0.00015781945486371594, "loss": 0.5498, "step": 4222 }, { "epoch": 0.31311633424779417, "grad_norm": 0.36428672075271606, "learning_rate": 0.00015780945236309077, "loss": 0.5269, "step": 4223 }, { "epoch": 0.313190479721213, "grad_norm": 0.3576951026916504, "learning_rate": 0.0001577994498624656, "loss": 0.5003, "step": 4224 }, { "epoch": 0.3132646251946319, "grad_norm": 0.33381587266921997, "learning_rate": 0.00015778944736184047, "loss": 0.4851, "step": 4225 }, { "epoch": 0.3133387706680507, "grad_norm": 0.3801150619983673, "learning_rate": 0.0001577794448612153, "loss": 0.5285, "step": 4226 }, { "epoch": 0.31341291614146954, "grad_norm": 0.3548659384250641, "learning_rate": 0.00015776944236059015, "loss": 0.5196, "step": 4227 }, { "epoch": 0.31348706161488843, "grad_norm": 0.3636166751384735, "learning_rate": 0.00015775943985996498, "loss": 0.5166, "step": 4228 }, { "epoch": 0.31356120708830726, "grad_norm": 0.35685640573501587, "learning_rate": 0.00015774943735933985, "loss": 0.5551, "step": 4229 }, { "epoch": 0.3136353525617261, "grad_norm": 0.37799185514450073, "learning_rate": 0.00015773943485871469, "loss": 0.5582, "step": 4230 }, { "epoch": 0.313709498035145, "grad_norm": 0.36660799384117126, "learning_rate": 0.00015772943235808952, "loss": 0.5536, "step": 4231 }, { "epoch": 0.3137836435085638, "grad_norm": 0.37931081652641296, "learning_rate": 0.00015771942985746436, "loss": 0.5483, "step": 4232 }, { "epoch": 0.31385778898198263, "grad_norm": 0.3426106870174408, "learning_rate": 0.00015770942735683922, "loss": 0.4929, "step": 4233 }, { "epoch": 0.3139319344554015, "grad_norm": 0.3514460325241089, "learning_rate": 0.00015769942485621406, "loss": 0.5016, "step": 4234 }, { "epoch": 0.31400607992882035, "grad_norm": 0.3551497161388397, "learning_rate": 0.0001576894223555889, "loss": 0.5164, "step": 4235 }, { "epoch": 0.3140802254022392, "grad_norm": 0.36741548776626587, "learning_rate": 0.00015767941985496376, "loss": 0.5076, "step": 4236 }, { "epoch": 0.31415437087565806, "grad_norm": 0.37250766158103943, "learning_rate": 0.0001576694173543386, "loss": 0.5569, "step": 4237 }, { "epoch": 0.3142285163490769, "grad_norm": 0.3440800607204437, "learning_rate": 0.00015765941485371344, "loss": 0.4942, "step": 4238 }, { "epoch": 0.3143026618224957, "grad_norm": 0.35552072525024414, "learning_rate": 0.00015764941235308827, "loss": 0.5278, "step": 4239 }, { "epoch": 0.3143768072959146, "grad_norm": 0.3399142026901245, "learning_rate": 0.00015763940985246314, "loss": 0.4712, "step": 4240 }, { "epoch": 0.31445095276933344, "grad_norm": 0.38702839612960815, "learning_rate": 0.00015762940735183798, "loss": 0.5439, "step": 4241 }, { "epoch": 0.31452509824275227, "grad_norm": 0.36340251564979553, "learning_rate": 0.0001576194048512128, "loss": 0.5501, "step": 4242 }, { "epoch": 0.31459924371617115, "grad_norm": 0.33273983001708984, "learning_rate": 0.00015760940235058765, "loss": 0.4893, "step": 4243 }, { "epoch": 0.31467338918959, "grad_norm": 0.33978021144866943, "learning_rate": 0.00015759939984996251, "loss": 0.4792, "step": 4244 }, { "epoch": 0.3147475346630088, "grad_norm": 0.3486379384994507, "learning_rate": 0.00015758939734933732, "loss": 0.5114, "step": 4245 }, { "epoch": 0.3148216801364277, "grad_norm": 0.3783853352069855, "learning_rate": 0.0001575793948487122, "loss": 0.5502, "step": 4246 }, { "epoch": 0.3148958256098465, "grad_norm": 0.35964080691337585, "learning_rate": 0.00015756939234808703, "loss": 0.5242, "step": 4247 }, { "epoch": 0.31496997108326535, "grad_norm": 0.3486814796924591, "learning_rate": 0.0001575593898474619, "loss": 0.4978, "step": 4248 }, { "epoch": 0.31504411655668424, "grad_norm": 0.3539011776447296, "learning_rate": 0.0001575493873468367, "loss": 0.5132, "step": 4249 }, { "epoch": 0.31511826203010307, "grad_norm": 0.3521828055381775, "learning_rate": 0.00015753938484621156, "loss": 0.5044, "step": 4250 }, { "epoch": 0.3151924075035219, "grad_norm": 0.36824309825897217, "learning_rate": 0.0001575293823455864, "loss": 0.5546, "step": 4251 }, { "epoch": 0.3152665529769408, "grad_norm": 0.36393022537231445, "learning_rate": 0.00015751937984496126, "loss": 0.5556, "step": 4252 }, { "epoch": 0.3153406984503596, "grad_norm": 0.36243829131126404, "learning_rate": 0.00015750937734433607, "loss": 0.514, "step": 4253 }, { "epoch": 0.31541484392377844, "grad_norm": 0.3719482421875, "learning_rate": 0.00015749937484371094, "loss": 0.526, "step": 4254 }, { "epoch": 0.31548898939719733, "grad_norm": 0.3751578629016876, "learning_rate": 0.00015748937234308578, "loss": 0.5975, "step": 4255 }, { "epoch": 0.31556313487061616, "grad_norm": 0.3411685824394226, "learning_rate": 0.00015747936984246064, "loss": 0.4533, "step": 4256 }, { "epoch": 0.315637280344035, "grad_norm": 0.34510302543640137, "learning_rate": 0.00015746936734183545, "loss": 0.5055, "step": 4257 }, { "epoch": 0.3157114258174538, "grad_norm": 0.3383331894874573, "learning_rate": 0.00015745936484121031, "loss": 0.5158, "step": 4258 }, { "epoch": 0.3157855712908727, "grad_norm": 0.356693834066391, "learning_rate": 0.00015744936234058515, "loss": 0.5529, "step": 4259 }, { "epoch": 0.31585971676429153, "grad_norm": 0.37055331468582153, "learning_rate": 0.00015743935983996, "loss": 0.5262, "step": 4260 }, { "epoch": 0.31593386223771036, "grad_norm": 0.34249207377433777, "learning_rate": 0.00015742935733933483, "loss": 0.5186, "step": 4261 }, { "epoch": 0.31600800771112925, "grad_norm": 0.3503451943397522, "learning_rate": 0.0001574193548387097, "loss": 0.5137, "step": 4262 }, { "epoch": 0.3160821531845481, "grad_norm": 0.3444245755672455, "learning_rate": 0.00015740935233808453, "loss": 0.505, "step": 4263 }, { "epoch": 0.3161562986579669, "grad_norm": 0.36463382840156555, "learning_rate": 0.00015739934983745936, "loss": 0.529, "step": 4264 }, { "epoch": 0.3162304441313858, "grad_norm": 0.3396190106868744, "learning_rate": 0.0001573893473368342, "loss": 0.5013, "step": 4265 }, { "epoch": 0.3163045896048046, "grad_norm": 0.35971054434776306, "learning_rate": 0.00015737934483620907, "loss": 0.5378, "step": 4266 }, { "epoch": 0.31637873507822345, "grad_norm": 0.3563656806945801, "learning_rate": 0.0001573693423355839, "loss": 0.5028, "step": 4267 }, { "epoch": 0.31645288055164233, "grad_norm": 0.358222633600235, "learning_rate": 0.00015735933983495874, "loss": 0.4897, "step": 4268 }, { "epoch": 0.31652702602506116, "grad_norm": 0.34831738471984863, "learning_rate": 0.0001573493373343336, "loss": 0.5006, "step": 4269 }, { "epoch": 0.31660117149848, "grad_norm": 0.3906458020210266, "learning_rate": 0.00015733933483370844, "loss": 0.5927, "step": 4270 }, { "epoch": 0.3166753169718989, "grad_norm": 0.3585027754306793, "learning_rate": 0.00015732933233308328, "loss": 0.5473, "step": 4271 }, { "epoch": 0.3167494624453177, "grad_norm": 0.3294893801212311, "learning_rate": 0.00015731932983245811, "loss": 0.4959, "step": 4272 }, { "epoch": 0.31682360791873654, "grad_norm": 0.3504067063331604, "learning_rate": 0.00015730932733183298, "loss": 0.4888, "step": 4273 }, { "epoch": 0.3168977533921554, "grad_norm": 0.3473062813282013, "learning_rate": 0.00015729932483120782, "loss": 0.5012, "step": 4274 }, { "epoch": 0.31697189886557425, "grad_norm": 0.3767194449901581, "learning_rate": 0.00015728932233058265, "loss": 0.5697, "step": 4275 }, { "epoch": 0.3170460443389931, "grad_norm": 0.37383630871772766, "learning_rate": 0.0001572793198299575, "loss": 0.5405, "step": 4276 }, { "epoch": 0.31712018981241197, "grad_norm": 0.3657255172729492, "learning_rate": 0.00015726931732933235, "loss": 0.5304, "step": 4277 }, { "epoch": 0.3171943352858308, "grad_norm": 0.37538209557533264, "learning_rate": 0.0001572593148287072, "loss": 0.5315, "step": 4278 }, { "epoch": 0.3172684807592496, "grad_norm": 0.35161444544792175, "learning_rate": 0.00015724931232808203, "loss": 0.4982, "step": 4279 }, { "epoch": 0.3173426262326685, "grad_norm": 0.34304141998291016, "learning_rate": 0.00015723930982745687, "loss": 0.4911, "step": 4280 }, { "epoch": 0.31741677170608734, "grad_norm": 0.36278241872787476, "learning_rate": 0.00015722930732683173, "loss": 0.5693, "step": 4281 }, { "epoch": 0.31749091717950617, "grad_norm": 0.3750555217266083, "learning_rate": 0.00015721930482620654, "loss": 0.5357, "step": 4282 }, { "epoch": 0.31756506265292506, "grad_norm": 0.36059626936912537, "learning_rate": 0.0001572093023255814, "loss": 0.5342, "step": 4283 }, { "epoch": 0.3176392081263439, "grad_norm": 0.3667651414871216, "learning_rate": 0.00015719929982495624, "loss": 0.531, "step": 4284 }, { "epoch": 0.3177133535997627, "grad_norm": 0.37519803643226624, "learning_rate": 0.0001571892973243311, "loss": 0.5503, "step": 4285 }, { "epoch": 0.3177874990731816, "grad_norm": 0.3640037477016449, "learning_rate": 0.00015717929482370592, "loss": 0.5284, "step": 4286 }, { "epoch": 0.31786164454660043, "grad_norm": 0.34191781282424927, "learning_rate": 0.00015716929232308078, "loss": 0.4812, "step": 4287 }, { "epoch": 0.31793579002001926, "grad_norm": 0.3675232529640198, "learning_rate": 0.00015715928982245562, "loss": 0.5214, "step": 4288 }, { "epoch": 0.31800993549343815, "grad_norm": 0.36352023482322693, "learning_rate": 0.00015714928732183048, "loss": 0.5042, "step": 4289 }, { "epoch": 0.318084080966857, "grad_norm": 0.3882080912590027, "learning_rate": 0.0001571392848212053, "loss": 0.5184, "step": 4290 }, { "epoch": 0.3181582264402758, "grad_norm": 0.3817613124847412, "learning_rate": 0.00015712928232058016, "loss": 0.519, "step": 4291 }, { "epoch": 0.3182323719136947, "grad_norm": 0.37355661392211914, "learning_rate": 0.000157119279819955, "loss": 0.5435, "step": 4292 }, { "epoch": 0.3183065173871135, "grad_norm": 0.3823162913322449, "learning_rate": 0.00015710927731932986, "loss": 0.5595, "step": 4293 }, { "epoch": 0.31838066286053235, "grad_norm": 0.3642808496952057, "learning_rate": 0.00015709927481870467, "loss": 0.5094, "step": 4294 }, { "epoch": 0.31845480833395123, "grad_norm": 0.38223057985305786, "learning_rate": 0.00015708927231807953, "loss": 0.5369, "step": 4295 }, { "epoch": 0.31852895380737006, "grad_norm": 0.38124558329582214, "learning_rate": 0.00015707926981745437, "loss": 0.5828, "step": 4296 }, { "epoch": 0.3186030992807889, "grad_norm": 0.3956843912601471, "learning_rate": 0.0001570692673168292, "loss": 0.5684, "step": 4297 }, { "epoch": 0.3186772447542078, "grad_norm": 0.3568836748600006, "learning_rate": 0.00015705926481620404, "loss": 0.5102, "step": 4298 }, { "epoch": 0.3187513902276266, "grad_norm": 0.354462206363678, "learning_rate": 0.0001570492623155789, "loss": 0.5197, "step": 4299 }, { "epoch": 0.31882553570104544, "grad_norm": 0.3801683783531189, "learning_rate": 0.00015703925981495374, "loss": 0.5402, "step": 4300 }, { "epoch": 0.3188996811744643, "grad_norm": 0.40182673931121826, "learning_rate": 0.00015702925731432858, "loss": 0.5612, "step": 4301 }, { "epoch": 0.31897382664788315, "grad_norm": 0.35851597785949707, "learning_rate": 0.00015701925481370344, "loss": 0.4866, "step": 4302 }, { "epoch": 0.319047972121302, "grad_norm": 0.3697846531867981, "learning_rate": 0.00015700925231307828, "loss": 0.5097, "step": 4303 }, { "epoch": 0.31912211759472087, "grad_norm": 0.38027095794677734, "learning_rate": 0.00015699924981245312, "loss": 0.5518, "step": 4304 }, { "epoch": 0.3191962630681397, "grad_norm": 0.39000383019447327, "learning_rate": 0.00015698924731182796, "loss": 0.5106, "step": 4305 }, { "epoch": 0.3192704085415585, "grad_norm": 0.33684471249580383, "learning_rate": 0.00015697924481120282, "loss": 0.5154, "step": 4306 }, { "epoch": 0.3193445540149774, "grad_norm": 0.35821789503097534, "learning_rate": 0.00015696924231057766, "loss": 0.5207, "step": 4307 }, { "epoch": 0.31941869948839624, "grad_norm": 0.34894171357154846, "learning_rate": 0.0001569592398099525, "loss": 0.5011, "step": 4308 }, { "epoch": 0.31949284496181507, "grad_norm": 0.3741971254348755, "learning_rate": 0.00015694923730932733, "loss": 0.5871, "step": 4309 }, { "epoch": 0.31956699043523396, "grad_norm": 0.3383735418319702, "learning_rate": 0.0001569392348087022, "loss": 0.508, "step": 4310 }, { "epoch": 0.3196411359086528, "grad_norm": 0.3552512526512146, "learning_rate": 0.00015692923230807703, "loss": 0.5305, "step": 4311 }, { "epoch": 0.3197152813820716, "grad_norm": 0.34144601225852966, "learning_rate": 0.00015691922980745187, "loss": 0.5302, "step": 4312 }, { "epoch": 0.3197894268554905, "grad_norm": 0.35132133960723877, "learning_rate": 0.0001569092273068267, "loss": 0.475, "step": 4313 }, { "epoch": 0.31986357232890933, "grad_norm": 0.35382676124572754, "learning_rate": 0.00015689922480620157, "loss": 0.5306, "step": 4314 }, { "epoch": 0.31993771780232816, "grad_norm": 0.3547792434692383, "learning_rate": 0.0001568892223055764, "loss": 0.5243, "step": 4315 }, { "epoch": 0.320011863275747, "grad_norm": 0.37757566571235657, "learning_rate": 0.00015687921980495125, "loss": 0.5217, "step": 4316 }, { "epoch": 0.3200860087491659, "grad_norm": 0.3760511577129364, "learning_rate": 0.00015686921730432608, "loss": 0.5752, "step": 4317 }, { "epoch": 0.3201601542225847, "grad_norm": 0.34633395075798035, "learning_rate": 0.00015685921480370095, "loss": 0.5264, "step": 4318 }, { "epoch": 0.32023429969600353, "grad_norm": 0.38020968437194824, "learning_rate": 0.00015684921230307576, "loss": 0.5156, "step": 4319 }, { "epoch": 0.3203084451694224, "grad_norm": 0.36158573627471924, "learning_rate": 0.00015683920980245062, "loss": 0.5199, "step": 4320 }, { "epoch": 0.32038259064284125, "grad_norm": 0.3858563303947449, "learning_rate": 0.00015682920730182546, "loss": 0.5171, "step": 4321 }, { "epoch": 0.3204567361162601, "grad_norm": 0.37609410285949707, "learning_rate": 0.00015681920480120032, "loss": 0.5295, "step": 4322 }, { "epoch": 0.32053088158967896, "grad_norm": 0.37179261445999146, "learning_rate": 0.00015680920230057513, "loss": 0.5243, "step": 4323 }, { "epoch": 0.3206050270630978, "grad_norm": 0.3938494920730591, "learning_rate": 0.00015679919979995, "loss": 0.5145, "step": 4324 }, { "epoch": 0.3206791725365166, "grad_norm": 0.3691622316837311, "learning_rate": 0.00015678919729932483, "loss": 0.5681, "step": 4325 }, { "epoch": 0.3207533180099355, "grad_norm": 0.3537808656692505, "learning_rate": 0.0001567791947986997, "loss": 0.524, "step": 4326 }, { "epoch": 0.32082746348335434, "grad_norm": 0.34005483984947205, "learning_rate": 0.0001567691922980745, "loss": 0.4688, "step": 4327 }, { "epoch": 0.32090160895677317, "grad_norm": 0.3705071806907654, "learning_rate": 0.00015675918979744937, "loss": 0.5212, "step": 4328 }, { "epoch": 0.32097575443019205, "grad_norm": 0.3754902184009552, "learning_rate": 0.0001567491872968242, "loss": 0.551, "step": 4329 }, { "epoch": 0.3210498999036109, "grad_norm": 0.3589431941509247, "learning_rate": 0.00015673918479619907, "loss": 0.5149, "step": 4330 }, { "epoch": 0.3211240453770297, "grad_norm": 0.34061819314956665, "learning_rate": 0.00015672918229557388, "loss": 0.5178, "step": 4331 }, { "epoch": 0.3211981908504486, "grad_norm": 0.3678046762943268, "learning_rate": 0.00015671917979494875, "loss": 0.505, "step": 4332 }, { "epoch": 0.3212723363238674, "grad_norm": 0.352742999792099, "learning_rate": 0.00015670917729432358, "loss": 0.4864, "step": 4333 }, { "epoch": 0.32134648179728625, "grad_norm": 0.3675253093242645, "learning_rate": 0.00015669917479369842, "loss": 0.5573, "step": 4334 }, { "epoch": 0.32142062727070514, "grad_norm": 0.3588803708553314, "learning_rate": 0.00015668917229307329, "loss": 0.5512, "step": 4335 }, { "epoch": 0.32149477274412397, "grad_norm": 0.361253947019577, "learning_rate": 0.00015667916979244812, "loss": 0.5382, "step": 4336 }, { "epoch": 0.3215689182175428, "grad_norm": 0.38948681950569153, "learning_rate": 0.00015666916729182296, "loss": 0.5833, "step": 4337 }, { "epoch": 0.3216430636909617, "grad_norm": 0.37081125378608704, "learning_rate": 0.0001566591647911978, "loss": 0.5412, "step": 4338 }, { "epoch": 0.3217172091643805, "grad_norm": 0.3749834895133972, "learning_rate": 0.00015664916229057266, "loss": 0.526, "step": 4339 }, { "epoch": 0.32179135463779934, "grad_norm": 0.3495367467403412, "learning_rate": 0.0001566391597899475, "loss": 0.532, "step": 4340 }, { "epoch": 0.32186550011121823, "grad_norm": 0.3962334394454956, "learning_rate": 0.00015662915728932233, "loss": 0.5562, "step": 4341 }, { "epoch": 0.32193964558463706, "grad_norm": 0.37188589572906494, "learning_rate": 0.00015661915478869717, "loss": 0.5834, "step": 4342 }, { "epoch": 0.3220137910580559, "grad_norm": 0.36521515250205994, "learning_rate": 0.00015660915228807204, "loss": 0.5746, "step": 4343 }, { "epoch": 0.32208793653147477, "grad_norm": 0.3646768033504486, "learning_rate": 0.00015659914978744687, "loss": 0.5359, "step": 4344 }, { "epoch": 0.3221620820048936, "grad_norm": 0.34590911865234375, "learning_rate": 0.0001565891472868217, "loss": 0.5119, "step": 4345 }, { "epoch": 0.32223622747831243, "grad_norm": 0.33893710374832153, "learning_rate": 0.00015657914478619655, "loss": 0.507, "step": 4346 }, { "epoch": 0.3223103729517313, "grad_norm": 0.3721477687358856, "learning_rate": 0.0001565691422855714, "loss": 0.556, "step": 4347 }, { "epoch": 0.32238451842515015, "grad_norm": 0.3672190010547638, "learning_rate": 0.00015655913978494625, "loss": 0.5278, "step": 4348 }, { "epoch": 0.322458663898569, "grad_norm": 0.33895328640937805, "learning_rate": 0.00015654913728432109, "loss": 0.4987, "step": 4349 }, { "epoch": 0.32253280937198786, "grad_norm": 0.3631647527217865, "learning_rate": 0.00015653913478369592, "loss": 0.5078, "step": 4350 }, { "epoch": 0.3226069548454067, "grad_norm": 0.3761783838272095, "learning_rate": 0.0001565291322830708, "loss": 0.4977, "step": 4351 }, { "epoch": 0.3226811003188255, "grad_norm": 0.3770754039287567, "learning_rate": 0.00015651912978244562, "loss": 0.5068, "step": 4352 }, { "epoch": 0.3227552457922444, "grad_norm": 0.369062215089798, "learning_rate": 0.00015650912728182046, "loss": 0.5346, "step": 4353 }, { "epoch": 0.32282939126566323, "grad_norm": 0.37398818135261536, "learning_rate": 0.0001564991247811953, "loss": 0.5376, "step": 4354 }, { "epoch": 0.32290353673908206, "grad_norm": 0.3564271330833435, "learning_rate": 0.00015648912228057016, "loss": 0.5279, "step": 4355 }, { "epoch": 0.32297768221250095, "grad_norm": 0.35639509558677673, "learning_rate": 0.000156479119779945, "loss": 0.4943, "step": 4356 }, { "epoch": 0.3230518276859198, "grad_norm": 0.3827967345714569, "learning_rate": 0.00015646911727931984, "loss": 0.5065, "step": 4357 }, { "epoch": 0.3231259731593386, "grad_norm": 0.394087553024292, "learning_rate": 0.00015645911477869467, "loss": 0.5523, "step": 4358 }, { "epoch": 0.3232001186327575, "grad_norm": 0.3849841356277466, "learning_rate": 0.00015644911227806954, "loss": 0.5135, "step": 4359 }, { "epoch": 0.3232742641061763, "grad_norm": 0.3563753366470337, "learning_rate": 0.00015643910977744435, "loss": 0.5184, "step": 4360 }, { "epoch": 0.32334840957959515, "grad_norm": 0.35956743359565735, "learning_rate": 0.0001564291072768192, "loss": 0.5014, "step": 4361 }, { "epoch": 0.32342255505301404, "grad_norm": 0.3741682767868042, "learning_rate": 0.00015641910477619405, "loss": 0.4894, "step": 4362 }, { "epoch": 0.32349670052643287, "grad_norm": 0.44259342551231384, "learning_rate": 0.0001564091022755689, "loss": 0.5734, "step": 4363 }, { "epoch": 0.3235708459998517, "grad_norm": 0.4130704700946808, "learning_rate": 0.00015639909977494372, "loss": 0.5279, "step": 4364 }, { "epoch": 0.3236449914732706, "grad_norm": 0.3808513283729553, "learning_rate": 0.0001563890972743186, "loss": 0.5489, "step": 4365 }, { "epoch": 0.3237191369466894, "grad_norm": 0.3428858518600464, "learning_rate": 0.00015637909477369342, "loss": 0.5236, "step": 4366 }, { "epoch": 0.32379328242010824, "grad_norm": 0.36626091599464417, "learning_rate": 0.0001563690922730683, "loss": 0.5239, "step": 4367 }, { "epoch": 0.3238674278935271, "grad_norm": 0.3895432651042938, "learning_rate": 0.0001563590897724431, "loss": 0.5397, "step": 4368 }, { "epoch": 0.32394157336694596, "grad_norm": 0.3618556261062622, "learning_rate": 0.00015634908727181796, "loss": 0.5254, "step": 4369 }, { "epoch": 0.3240157188403648, "grad_norm": 0.41363829374313354, "learning_rate": 0.0001563390847711928, "loss": 0.5355, "step": 4370 }, { "epoch": 0.32408986431378367, "grad_norm": 0.375773549079895, "learning_rate": 0.00015632908227056764, "loss": 0.5784, "step": 4371 }, { "epoch": 0.3241640097872025, "grad_norm": 0.3768467605113983, "learning_rate": 0.0001563190797699425, "loss": 0.5355, "step": 4372 }, { "epoch": 0.32423815526062133, "grad_norm": 0.38879886269569397, "learning_rate": 0.00015630907726931734, "loss": 0.5334, "step": 4373 }, { "epoch": 0.32431230073404016, "grad_norm": 0.3778061866760254, "learning_rate": 0.00015629907476869218, "loss": 0.5148, "step": 4374 }, { "epoch": 0.32438644620745904, "grad_norm": 0.3608262240886688, "learning_rate": 0.000156289072268067, "loss": 0.5404, "step": 4375 }, { "epoch": 0.3244605916808779, "grad_norm": 0.36888501048088074, "learning_rate": 0.00015627906976744188, "loss": 0.5133, "step": 4376 }, { "epoch": 0.3245347371542967, "grad_norm": 0.36764708161354065, "learning_rate": 0.00015626906726681671, "loss": 0.51, "step": 4377 }, { "epoch": 0.3246088826277156, "grad_norm": 0.3541494905948639, "learning_rate": 0.00015625906476619158, "loss": 0.5401, "step": 4378 }, { "epoch": 0.3246830281011344, "grad_norm": 0.34617871046066284, "learning_rate": 0.0001562490622655664, "loss": 0.505, "step": 4379 }, { "epoch": 0.32475717357455325, "grad_norm": 0.3741236925125122, "learning_rate": 0.00015623905976494125, "loss": 0.5188, "step": 4380 }, { "epoch": 0.32483131904797213, "grad_norm": 0.35670506954193115, "learning_rate": 0.0001562290572643161, "loss": 0.5327, "step": 4381 }, { "epoch": 0.32490546452139096, "grad_norm": 0.3575766682624817, "learning_rate": 0.00015621905476369093, "loss": 0.4687, "step": 4382 }, { "epoch": 0.3249796099948098, "grad_norm": 0.3759806752204895, "learning_rate": 0.00015620905226306576, "loss": 0.527, "step": 4383 }, { "epoch": 0.3250537554682287, "grad_norm": 0.3452536463737488, "learning_rate": 0.00015619904976244063, "loss": 0.5037, "step": 4384 }, { "epoch": 0.3251279009416475, "grad_norm": 0.3551695644855499, "learning_rate": 0.00015618904726181547, "loss": 0.5444, "step": 4385 }, { "epoch": 0.32520204641506634, "grad_norm": 0.40187767148017883, "learning_rate": 0.0001561790447611903, "loss": 0.5346, "step": 4386 }, { "epoch": 0.3252761918884852, "grad_norm": 0.415995717048645, "learning_rate": 0.00015616904226056514, "loss": 0.5127, "step": 4387 }, { "epoch": 0.32535033736190405, "grad_norm": 0.36558467149734497, "learning_rate": 0.00015615903975994, "loss": 0.5007, "step": 4388 }, { "epoch": 0.3254244828353229, "grad_norm": 0.3810109496116638, "learning_rate": 0.00015614903725931484, "loss": 0.4842, "step": 4389 }, { "epoch": 0.32549862830874177, "grad_norm": 0.3403966426849365, "learning_rate": 0.00015613903475868968, "loss": 0.4799, "step": 4390 }, { "epoch": 0.3255727737821606, "grad_norm": 0.3749012351036072, "learning_rate": 0.00015612903225806451, "loss": 0.5395, "step": 4391 }, { "epoch": 0.3256469192555794, "grad_norm": 0.3557334244251251, "learning_rate": 0.00015611902975743938, "loss": 0.5404, "step": 4392 }, { "epoch": 0.3257210647289983, "grad_norm": 0.3624899387359619, "learning_rate": 0.00015610902725681422, "loss": 0.4977, "step": 4393 }, { "epoch": 0.32579521020241714, "grad_norm": 0.39691442251205444, "learning_rate": 0.00015609902475618905, "loss": 0.592, "step": 4394 }, { "epoch": 0.32586935567583597, "grad_norm": 0.3867834508419037, "learning_rate": 0.0001560890222555639, "loss": 0.5622, "step": 4395 }, { "epoch": 0.32594350114925486, "grad_norm": 0.3408941924571991, "learning_rate": 0.00015607901975493875, "loss": 0.4705, "step": 4396 }, { "epoch": 0.3260176466226737, "grad_norm": 0.3773006796836853, "learning_rate": 0.00015606901725431356, "loss": 0.4981, "step": 4397 }, { "epoch": 0.3260917920960925, "grad_norm": 0.3650248646736145, "learning_rate": 0.00015605901475368843, "loss": 0.5245, "step": 4398 }, { "epoch": 0.3261659375695114, "grad_norm": 0.3649666905403137, "learning_rate": 0.00015604901225306327, "loss": 0.5037, "step": 4399 }, { "epoch": 0.32624008304293023, "grad_norm": 0.3678824305534363, "learning_rate": 0.00015603900975243813, "loss": 0.5317, "step": 4400 }, { "epoch": 0.32631422851634906, "grad_norm": 0.37992003560066223, "learning_rate": 0.00015602900725181294, "loss": 0.5845, "step": 4401 }, { "epoch": 0.32638837398976794, "grad_norm": 0.3635723292827606, "learning_rate": 0.0001560190047511878, "loss": 0.5104, "step": 4402 }, { "epoch": 0.3264625194631868, "grad_norm": 0.3380695581436157, "learning_rate": 0.00015600900225056264, "loss": 0.5225, "step": 4403 }, { "epoch": 0.3265366649366056, "grad_norm": 0.3721259832382202, "learning_rate": 0.0001559989997499375, "loss": 0.5404, "step": 4404 }, { "epoch": 0.3266108104100245, "grad_norm": 0.3655799627304077, "learning_rate": 0.00015598899724931234, "loss": 0.5128, "step": 4405 }, { "epoch": 0.3266849558834433, "grad_norm": 0.37339383363723755, "learning_rate": 0.00015597899474868718, "loss": 0.504, "step": 4406 }, { "epoch": 0.32675910135686215, "grad_norm": 0.3426039218902588, "learning_rate": 0.00015596899224806202, "loss": 0.4763, "step": 4407 }, { "epoch": 0.32683324683028103, "grad_norm": 0.38057583570480347, "learning_rate": 0.00015595898974743685, "loss": 0.5566, "step": 4408 }, { "epoch": 0.32690739230369986, "grad_norm": 0.35614484548568726, "learning_rate": 0.00015594898724681172, "loss": 0.4981, "step": 4409 }, { "epoch": 0.3269815377771187, "grad_norm": 0.3666004240512848, "learning_rate": 0.00015593898474618655, "loss": 0.5658, "step": 4410 }, { "epoch": 0.3270556832505376, "grad_norm": 0.3642128109931946, "learning_rate": 0.00015592898224556142, "loss": 0.529, "step": 4411 }, { "epoch": 0.3271298287239564, "grad_norm": 0.3571443259716034, "learning_rate": 0.00015591897974493623, "loss": 0.5123, "step": 4412 }, { "epoch": 0.32720397419737524, "grad_norm": 0.3874385952949524, "learning_rate": 0.0001559089772443111, "loss": 0.5621, "step": 4413 }, { "epoch": 0.3272781196707941, "grad_norm": 0.37018057703971863, "learning_rate": 0.00015589897474368593, "loss": 0.5228, "step": 4414 }, { "epoch": 0.32735226514421295, "grad_norm": 0.3981660008430481, "learning_rate": 0.0001558889722430608, "loss": 0.5953, "step": 4415 }, { "epoch": 0.3274264106176318, "grad_norm": 0.3520508110523224, "learning_rate": 0.0001558789697424356, "loss": 0.524, "step": 4416 }, { "epoch": 0.32750055609105067, "grad_norm": 0.3617519736289978, "learning_rate": 0.00015586896724181047, "loss": 0.5253, "step": 4417 }, { "epoch": 0.3275747015644695, "grad_norm": 0.3210487365722656, "learning_rate": 0.0001558589647411853, "loss": 0.4835, "step": 4418 }, { "epoch": 0.3276488470378883, "grad_norm": 0.3636001646518707, "learning_rate": 0.00015584896224056014, "loss": 0.5207, "step": 4419 }, { "epoch": 0.3277229925113072, "grad_norm": 0.35400962829589844, "learning_rate": 0.00015583895973993498, "loss": 0.5002, "step": 4420 }, { "epoch": 0.32779713798472604, "grad_norm": 0.357876181602478, "learning_rate": 0.00015582895723930984, "loss": 0.5121, "step": 4421 }, { "epoch": 0.32787128345814487, "grad_norm": 0.3594564199447632, "learning_rate": 0.00015581895473868468, "loss": 0.5205, "step": 4422 }, { "epoch": 0.32794542893156375, "grad_norm": 0.3705635368824005, "learning_rate": 0.00015580895223805952, "loss": 0.5228, "step": 4423 }, { "epoch": 0.3280195744049826, "grad_norm": 0.35483482480049133, "learning_rate": 0.00015579894973743436, "loss": 0.5459, "step": 4424 }, { "epoch": 0.3280937198784014, "grad_norm": 0.37612396478652954, "learning_rate": 0.00015578894723680922, "loss": 0.5513, "step": 4425 }, { "epoch": 0.3281678653518203, "grad_norm": 0.3642234206199646, "learning_rate": 0.00015577894473618406, "loss": 0.5339, "step": 4426 }, { "epoch": 0.32824201082523913, "grad_norm": 0.35568737983703613, "learning_rate": 0.0001557689422355589, "loss": 0.5408, "step": 4427 }, { "epoch": 0.32831615629865796, "grad_norm": 0.370050311088562, "learning_rate": 0.00015575893973493373, "loss": 0.5353, "step": 4428 }, { "epoch": 0.3283903017720768, "grad_norm": 0.38492244482040405, "learning_rate": 0.0001557489372343086, "loss": 0.5368, "step": 4429 }, { "epoch": 0.32846444724549567, "grad_norm": 0.3786000907421112, "learning_rate": 0.00015573893473368343, "loss": 0.5176, "step": 4430 }, { "epoch": 0.3285385927189145, "grad_norm": 0.35831865668296814, "learning_rate": 0.00015572893223305827, "loss": 0.5554, "step": 4431 }, { "epoch": 0.32861273819233333, "grad_norm": 0.35072439908981323, "learning_rate": 0.0001557189297324331, "loss": 0.539, "step": 4432 }, { "epoch": 0.3286868836657522, "grad_norm": 0.35316506028175354, "learning_rate": 0.00015570892723180797, "loss": 0.5391, "step": 4433 }, { "epoch": 0.32876102913917105, "grad_norm": 0.35943883657455444, "learning_rate": 0.00015569892473118278, "loss": 0.5427, "step": 4434 }, { "epoch": 0.3288351746125899, "grad_norm": 0.3661952614784241, "learning_rate": 0.00015568892223055764, "loss": 0.5261, "step": 4435 }, { "epoch": 0.32890932008600876, "grad_norm": 0.3660911023616791, "learning_rate": 0.00015567891972993248, "loss": 0.5141, "step": 4436 }, { "epoch": 0.3289834655594276, "grad_norm": 0.3792217969894409, "learning_rate": 0.00015566891722930735, "loss": 0.5789, "step": 4437 }, { "epoch": 0.3290576110328464, "grad_norm": 0.3598741590976715, "learning_rate": 0.00015565891472868218, "loss": 0.5061, "step": 4438 }, { "epoch": 0.3291317565062653, "grad_norm": 0.35661473870277405, "learning_rate": 0.00015564891222805702, "loss": 0.5188, "step": 4439 }, { "epoch": 0.32920590197968413, "grad_norm": 0.34733012318611145, "learning_rate": 0.00015563890972743186, "loss": 0.5118, "step": 4440 }, { "epoch": 0.32928004745310296, "grad_norm": 0.37982165813446045, "learning_rate": 0.00015562890722680672, "loss": 0.524, "step": 4441 }, { "epoch": 0.32935419292652185, "grad_norm": 0.3580527901649475, "learning_rate": 0.00015561890472618156, "loss": 0.5211, "step": 4442 }, { "epoch": 0.3294283383999407, "grad_norm": 0.35755786299705505, "learning_rate": 0.0001556089022255564, "loss": 0.5378, "step": 4443 }, { "epoch": 0.3295024838733595, "grad_norm": 0.3281191289424896, "learning_rate": 0.00015559889972493123, "loss": 0.5048, "step": 4444 }, { "epoch": 0.3295766293467784, "grad_norm": 0.36294087767601013, "learning_rate": 0.00015558889722430607, "loss": 0.5441, "step": 4445 }, { "epoch": 0.3296507748201972, "grad_norm": 0.35636070370674133, "learning_rate": 0.00015557889472368093, "loss": 0.5071, "step": 4446 }, { "epoch": 0.32972492029361605, "grad_norm": 0.3821764588356018, "learning_rate": 0.00015556889222305577, "loss": 0.5568, "step": 4447 }, { "epoch": 0.32979906576703494, "grad_norm": 0.37103167176246643, "learning_rate": 0.00015555888972243064, "loss": 0.4957, "step": 4448 }, { "epoch": 0.32987321124045377, "grad_norm": 0.3905184268951416, "learning_rate": 0.00015554888722180545, "loss": 0.5597, "step": 4449 }, { "epoch": 0.3299473567138726, "grad_norm": 0.35774582624435425, "learning_rate": 0.0001555388847211803, "loss": 0.4922, "step": 4450 }, { "epoch": 0.3300215021872915, "grad_norm": 0.3418186604976654, "learning_rate": 0.00015552888222055515, "loss": 0.5142, "step": 4451 }, { "epoch": 0.3300956476607103, "grad_norm": 0.3842024505138397, "learning_rate": 0.00015551887971993, "loss": 0.5273, "step": 4452 }, { "epoch": 0.33016979313412914, "grad_norm": 0.36182916164398193, "learning_rate": 0.00015550887721930482, "loss": 0.5028, "step": 4453 }, { "epoch": 0.330243938607548, "grad_norm": 0.3518436551094055, "learning_rate": 0.00015549887471867969, "loss": 0.516, "step": 4454 }, { "epoch": 0.33031808408096686, "grad_norm": 0.37937843799591064, "learning_rate": 0.00015548887221805452, "loss": 0.5466, "step": 4455 }, { "epoch": 0.3303922295543857, "grad_norm": 0.3657107949256897, "learning_rate": 0.00015547886971742936, "loss": 0.5643, "step": 4456 }, { "epoch": 0.33046637502780457, "grad_norm": 0.3608752191066742, "learning_rate": 0.0001554688672168042, "loss": 0.5399, "step": 4457 }, { "epoch": 0.3305405205012234, "grad_norm": 0.37656551599502563, "learning_rate": 0.00015545886471617906, "loss": 0.5406, "step": 4458 }, { "epoch": 0.33061466597464223, "grad_norm": 0.3740786910057068, "learning_rate": 0.0001554488622155539, "loss": 0.534, "step": 4459 }, { "epoch": 0.3306888114480611, "grad_norm": 0.35087329149246216, "learning_rate": 0.00015543885971492873, "loss": 0.5094, "step": 4460 }, { "epoch": 0.33076295692147994, "grad_norm": 0.3459804654121399, "learning_rate": 0.00015542885721430357, "loss": 0.5337, "step": 4461 }, { "epoch": 0.3308371023948988, "grad_norm": 0.3476269841194153, "learning_rate": 0.00015541885471367844, "loss": 0.4887, "step": 4462 }, { "epoch": 0.33091124786831766, "grad_norm": 0.3144828677177429, "learning_rate": 0.00015540885221305327, "loss": 0.4764, "step": 4463 }, { "epoch": 0.3309853933417365, "grad_norm": 0.36310523748397827, "learning_rate": 0.0001553988497124281, "loss": 0.516, "step": 4464 }, { "epoch": 0.3310595388151553, "grad_norm": 0.34762224555015564, "learning_rate": 0.00015538884721180295, "loss": 0.4831, "step": 4465 }, { "epoch": 0.3311336842885742, "grad_norm": 0.3836202919483185, "learning_rate": 0.0001553788447111778, "loss": 0.5721, "step": 4466 }, { "epoch": 0.33120782976199303, "grad_norm": 0.3475218415260315, "learning_rate": 0.00015536884221055265, "loss": 0.5096, "step": 4467 }, { "epoch": 0.33128197523541186, "grad_norm": 0.36092624068260193, "learning_rate": 0.00015535883970992749, "loss": 0.5436, "step": 4468 }, { "epoch": 0.33135612070883075, "grad_norm": 0.37589722871780396, "learning_rate": 0.00015534883720930232, "loss": 0.5284, "step": 4469 }, { "epoch": 0.3314302661822496, "grad_norm": 0.36320558190345764, "learning_rate": 0.0001553388347086772, "loss": 0.5088, "step": 4470 }, { "epoch": 0.3315044116556684, "grad_norm": 0.3691226541996002, "learning_rate": 0.00015532883220805202, "loss": 0.4892, "step": 4471 }, { "epoch": 0.3315785571290873, "grad_norm": 0.3720380365848541, "learning_rate": 0.00015531882970742686, "loss": 0.5494, "step": 4472 }, { "epoch": 0.3316527026025061, "grad_norm": 0.3508848249912262, "learning_rate": 0.0001553088272068017, "loss": 0.5259, "step": 4473 }, { "epoch": 0.33172684807592495, "grad_norm": 0.3749612867832184, "learning_rate": 0.00015529882470617656, "loss": 0.5321, "step": 4474 }, { "epoch": 0.33180099354934384, "grad_norm": 0.3648953437805176, "learning_rate": 0.0001552888222055514, "loss": 0.4884, "step": 4475 }, { "epoch": 0.33187513902276267, "grad_norm": 0.37393343448638916, "learning_rate": 0.00015527881970492624, "loss": 0.5476, "step": 4476 }, { "epoch": 0.3319492844961815, "grad_norm": 0.3722972571849823, "learning_rate": 0.00015526881720430107, "loss": 0.4789, "step": 4477 }, { "epoch": 0.3320234299696004, "grad_norm": 0.37910279631614685, "learning_rate": 0.00015525881470367594, "loss": 0.5248, "step": 4478 }, { "epoch": 0.3320975754430192, "grad_norm": 0.3486582338809967, "learning_rate": 0.00015524881220305077, "loss": 0.5061, "step": 4479 }, { "epoch": 0.33217172091643804, "grad_norm": 0.38280487060546875, "learning_rate": 0.0001552388097024256, "loss": 0.5588, "step": 4480 }, { "epoch": 0.3322458663898569, "grad_norm": 0.3822971284389496, "learning_rate": 0.00015522880720180048, "loss": 0.54, "step": 4481 }, { "epoch": 0.33232001186327575, "grad_norm": 0.363666832447052, "learning_rate": 0.00015521880470117529, "loss": 0.4903, "step": 4482 }, { "epoch": 0.3323941573366946, "grad_norm": 0.36384403705596924, "learning_rate": 0.00015520880220055015, "loss": 0.4936, "step": 4483 }, { "epoch": 0.33246830281011347, "grad_norm": 0.36438214778900146, "learning_rate": 0.000155198799699925, "loss": 0.4905, "step": 4484 }, { "epoch": 0.3325424482835323, "grad_norm": 0.3562285006046295, "learning_rate": 0.00015518879719929985, "loss": 0.5535, "step": 4485 }, { "epoch": 0.33261659375695113, "grad_norm": 0.3826378881931305, "learning_rate": 0.00015517879469867466, "loss": 0.5369, "step": 4486 }, { "epoch": 0.33269073923036996, "grad_norm": 0.3592546880245209, "learning_rate": 0.00015516879219804953, "loss": 0.4921, "step": 4487 }, { "epoch": 0.33276488470378884, "grad_norm": 0.3970310091972351, "learning_rate": 0.00015515878969742436, "loss": 0.5375, "step": 4488 }, { "epoch": 0.3328390301772077, "grad_norm": 0.3740635812282562, "learning_rate": 0.00015514878719679923, "loss": 0.5243, "step": 4489 }, { "epoch": 0.3329131756506265, "grad_norm": 0.3461584448814392, "learning_rate": 0.00015513878469617404, "loss": 0.5075, "step": 4490 }, { "epoch": 0.3329873211240454, "grad_norm": 0.3506239354610443, "learning_rate": 0.0001551287821955489, "loss": 0.5084, "step": 4491 }, { "epoch": 0.3330614665974642, "grad_norm": 0.3538186550140381, "learning_rate": 0.00015511877969492374, "loss": 0.4959, "step": 4492 }, { "epoch": 0.33313561207088305, "grad_norm": 0.366509348154068, "learning_rate": 0.00015510877719429858, "loss": 0.4924, "step": 4493 }, { "epoch": 0.33320975754430193, "grad_norm": 0.38180622458457947, "learning_rate": 0.0001550987746936734, "loss": 0.565, "step": 4494 }, { "epoch": 0.33328390301772076, "grad_norm": 0.3876919746398926, "learning_rate": 0.00015508877219304828, "loss": 0.5175, "step": 4495 }, { "epoch": 0.3333580484911396, "grad_norm": 0.3740277886390686, "learning_rate": 0.00015507876969242311, "loss": 0.546, "step": 4496 }, { "epoch": 0.3334321939645585, "grad_norm": 0.3864303529262543, "learning_rate": 0.00015506876719179795, "loss": 0.5328, "step": 4497 }, { "epoch": 0.3335063394379773, "grad_norm": 0.34468576312065125, "learning_rate": 0.0001550587646911728, "loss": 0.4733, "step": 4498 }, { "epoch": 0.33358048491139614, "grad_norm": 0.36649268865585327, "learning_rate": 0.00015504876219054765, "loss": 0.5264, "step": 4499 }, { "epoch": 0.333654630384815, "grad_norm": 0.3597595989704132, "learning_rate": 0.0001550387596899225, "loss": 0.502, "step": 4500 }, { "epoch": 0.33372877585823385, "grad_norm": 0.3519309461116791, "learning_rate": 0.00015502875718929733, "loss": 0.5408, "step": 4501 }, { "epoch": 0.3338029213316527, "grad_norm": 0.3397335708141327, "learning_rate": 0.00015501875468867216, "loss": 0.5425, "step": 4502 }, { "epoch": 0.33387706680507157, "grad_norm": 0.37509244680404663, "learning_rate": 0.00015500875218804703, "loss": 0.5049, "step": 4503 }, { "epoch": 0.3339512122784904, "grad_norm": 0.34507766366004944, "learning_rate": 0.00015499874968742186, "loss": 0.4684, "step": 4504 }, { "epoch": 0.3340253577519092, "grad_norm": 0.3815193176269531, "learning_rate": 0.0001549887471867967, "loss": 0.5862, "step": 4505 }, { "epoch": 0.3340995032253281, "grad_norm": 0.3713020384311676, "learning_rate": 0.00015497874468617154, "loss": 0.5041, "step": 4506 }, { "epoch": 0.33417364869874694, "grad_norm": 0.3354581892490387, "learning_rate": 0.0001549687421855464, "loss": 0.5015, "step": 4507 }, { "epoch": 0.33424779417216577, "grad_norm": 0.38669219613075256, "learning_rate": 0.00015495873968492124, "loss": 0.5789, "step": 4508 }, { "epoch": 0.33432193964558465, "grad_norm": 0.34404268860816956, "learning_rate": 0.00015494873718429608, "loss": 0.502, "step": 4509 }, { "epoch": 0.3343960851190035, "grad_norm": 0.37682613730430603, "learning_rate": 0.00015493873468367091, "loss": 0.5688, "step": 4510 }, { "epoch": 0.3344702305924223, "grad_norm": 0.3601112365722656, "learning_rate": 0.00015492873218304578, "loss": 0.5444, "step": 4511 }, { "epoch": 0.3345443760658412, "grad_norm": 0.35470741987228394, "learning_rate": 0.00015491872968242062, "loss": 0.5249, "step": 4512 }, { "epoch": 0.33461852153926, "grad_norm": 0.36692532896995544, "learning_rate": 0.00015490872718179545, "loss": 0.5531, "step": 4513 }, { "epoch": 0.33469266701267886, "grad_norm": 0.36448922753334045, "learning_rate": 0.00015489872468117032, "loss": 0.5061, "step": 4514 }, { "epoch": 0.33476681248609774, "grad_norm": 0.3419749140739441, "learning_rate": 0.00015488872218054515, "loss": 0.4683, "step": 4515 }, { "epoch": 0.33484095795951657, "grad_norm": 0.3782820403575897, "learning_rate": 0.00015487871967992, "loss": 0.4976, "step": 4516 }, { "epoch": 0.3349151034329354, "grad_norm": 0.36859628558158875, "learning_rate": 0.00015486871717929483, "loss": 0.5468, "step": 4517 }, { "epoch": 0.3349892489063543, "grad_norm": 0.3555908799171448, "learning_rate": 0.0001548587146786697, "loss": 0.4939, "step": 4518 }, { "epoch": 0.3350633943797731, "grad_norm": 0.3677785396575928, "learning_rate": 0.0001548487121780445, "loss": 0.5198, "step": 4519 }, { "epoch": 0.33513753985319195, "grad_norm": 0.3568856716156006, "learning_rate": 0.00015483870967741937, "loss": 0.5224, "step": 4520 }, { "epoch": 0.33521168532661083, "grad_norm": 0.37683436274528503, "learning_rate": 0.0001548287071767942, "loss": 0.5313, "step": 4521 }, { "epoch": 0.33528583080002966, "grad_norm": 0.37655261158943176, "learning_rate": 0.00015481870467616907, "loss": 0.5614, "step": 4522 }, { "epoch": 0.3353599762734485, "grad_norm": 0.3579092025756836, "learning_rate": 0.00015480870217554388, "loss": 0.4986, "step": 4523 }, { "epoch": 0.3354341217468674, "grad_norm": 0.3686535060405731, "learning_rate": 0.00015479869967491874, "loss": 0.5287, "step": 4524 }, { "epoch": 0.3355082672202862, "grad_norm": 0.3590330183506012, "learning_rate": 0.00015478869717429358, "loss": 0.5377, "step": 4525 }, { "epoch": 0.33558241269370503, "grad_norm": 0.356618732213974, "learning_rate": 0.00015477869467366844, "loss": 0.5246, "step": 4526 }, { "epoch": 0.3356565581671239, "grad_norm": 0.38943883776664734, "learning_rate": 0.00015476869217304325, "loss": 0.5827, "step": 4527 }, { "epoch": 0.33573070364054275, "grad_norm": 0.35759487748146057, "learning_rate": 0.00015475868967241812, "loss": 0.5153, "step": 4528 }, { "epoch": 0.3358048491139616, "grad_norm": 0.3551807105541229, "learning_rate": 0.00015474868717179295, "loss": 0.5267, "step": 4529 }, { "epoch": 0.33587899458738046, "grad_norm": 0.34799137711524963, "learning_rate": 0.0001547386846711678, "loss": 0.4976, "step": 4530 }, { "epoch": 0.3359531400607993, "grad_norm": 0.35562098026275635, "learning_rate": 0.00015472868217054263, "loss": 0.5155, "step": 4531 }, { "epoch": 0.3360272855342181, "grad_norm": 0.4007495641708374, "learning_rate": 0.0001547186796699175, "loss": 0.5733, "step": 4532 }, { "epoch": 0.336101431007637, "grad_norm": 0.3686234652996063, "learning_rate": 0.00015470867716929233, "loss": 0.5647, "step": 4533 }, { "epoch": 0.33617557648105584, "grad_norm": 0.3703155517578125, "learning_rate": 0.00015469867466866717, "loss": 0.5021, "step": 4534 }, { "epoch": 0.33624972195447467, "grad_norm": 0.37798869609832764, "learning_rate": 0.000154688672168042, "loss": 0.5593, "step": 4535 }, { "epoch": 0.33632386742789355, "grad_norm": 0.3721315562725067, "learning_rate": 0.00015467866966741687, "loss": 0.5595, "step": 4536 }, { "epoch": 0.3363980129013124, "grad_norm": 0.3509705364704132, "learning_rate": 0.0001546686671667917, "loss": 0.5501, "step": 4537 }, { "epoch": 0.3364721583747312, "grad_norm": 0.35824891924858093, "learning_rate": 0.00015465866466616654, "loss": 0.522, "step": 4538 }, { "epoch": 0.3365463038481501, "grad_norm": 0.3550051152706146, "learning_rate": 0.00015464866216554138, "loss": 0.5333, "step": 4539 }, { "epoch": 0.3366204493215689, "grad_norm": 0.3663961589336395, "learning_rate": 0.00015463865966491624, "loss": 0.5382, "step": 4540 }, { "epoch": 0.33669459479498776, "grad_norm": 0.36454668641090393, "learning_rate": 0.00015462865716429108, "loss": 0.5193, "step": 4541 }, { "epoch": 0.33676874026840664, "grad_norm": 0.3588313162326813, "learning_rate": 0.00015461865466366592, "loss": 0.4861, "step": 4542 }, { "epoch": 0.33684288574182547, "grad_norm": 0.3449186384677887, "learning_rate": 0.00015460865216304076, "loss": 0.5201, "step": 4543 }, { "epoch": 0.3369170312152443, "grad_norm": 0.3467763662338257, "learning_rate": 0.00015459864966241562, "loss": 0.4891, "step": 4544 }, { "epoch": 0.33699117668866313, "grad_norm": 0.3569897711277008, "learning_rate": 0.00015458864716179046, "loss": 0.5094, "step": 4545 }, { "epoch": 0.337065322162082, "grad_norm": 0.413734495639801, "learning_rate": 0.0001545786446611653, "loss": 0.5769, "step": 4546 }, { "epoch": 0.33713946763550084, "grad_norm": 0.4106450080871582, "learning_rate": 0.00015456864216054016, "loss": 0.5673, "step": 4547 }, { "epoch": 0.3372136131089197, "grad_norm": 0.3443916440010071, "learning_rate": 0.000154558639659915, "loss": 0.5271, "step": 4548 }, { "epoch": 0.33728775858233856, "grad_norm": 0.37324365973472595, "learning_rate": 0.00015454863715928983, "loss": 0.5209, "step": 4549 }, { "epoch": 0.3373619040557574, "grad_norm": 0.3807687759399414, "learning_rate": 0.00015453863465866467, "loss": 0.5282, "step": 4550 }, { "epoch": 0.3374360495291762, "grad_norm": 0.38785111904144287, "learning_rate": 0.00015452863215803953, "loss": 0.5719, "step": 4551 }, { "epoch": 0.3375101950025951, "grad_norm": 0.3432094156742096, "learning_rate": 0.00015451862965741437, "loss": 0.4934, "step": 4552 }, { "epoch": 0.33758434047601393, "grad_norm": 0.39059555530548096, "learning_rate": 0.0001545086271567892, "loss": 0.5259, "step": 4553 }, { "epoch": 0.33765848594943276, "grad_norm": 0.3426321744918823, "learning_rate": 0.00015449862465616404, "loss": 0.5021, "step": 4554 }, { "epoch": 0.33773263142285165, "grad_norm": 0.37958866357803345, "learning_rate": 0.0001544886221555389, "loss": 0.5785, "step": 4555 }, { "epoch": 0.3378067768962705, "grad_norm": 0.3610263168811798, "learning_rate": 0.00015447861965491372, "loss": 0.5639, "step": 4556 }, { "epoch": 0.3378809223696893, "grad_norm": 0.38313525915145874, "learning_rate": 0.00015446861715428858, "loss": 0.5166, "step": 4557 }, { "epoch": 0.3379550678431082, "grad_norm": 0.3925975561141968, "learning_rate": 0.00015445861465366342, "loss": 0.4959, "step": 4558 }, { "epoch": 0.338029213316527, "grad_norm": 0.36447271704673767, "learning_rate": 0.00015444861215303828, "loss": 0.5104, "step": 4559 }, { "epoch": 0.33810335878994585, "grad_norm": 0.34544044733047485, "learning_rate": 0.0001544386096524131, "loss": 0.5128, "step": 4560 }, { "epoch": 0.33817750426336474, "grad_norm": 0.35996416211128235, "learning_rate": 0.00015442860715178796, "loss": 0.5109, "step": 4561 }, { "epoch": 0.33825164973678357, "grad_norm": 0.35649746656417847, "learning_rate": 0.0001544186046511628, "loss": 0.4846, "step": 4562 }, { "epoch": 0.3383257952102024, "grad_norm": 0.36313536763191223, "learning_rate": 0.00015440860215053766, "loss": 0.5467, "step": 4563 }, { "epoch": 0.3383999406836213, "grad_norm": 0.3973277807235718, "learning_rate": 0.00015439859964991247, "loss": 0.5441, "step": 4564 }, { "epoch": 0.3384740861570401, "grad_norm": 0.3535130023956299, "learning_rate": 0.00015438859714928733, "loss": 0.509, "step": 4565 }, { "epoch": 0.33854823163045894, "grad_norm": 0.4094129502773285, "learning_rate": 0.00015437859464866217, "loss": 0.6152, "step": 4566 }, { "epoch": 0.3386223771038778, "grad_norm": 0.36630943417549133, "learning_rate": 0.000154368592148037, "loss": 0.5303, "step": 4567 }, { "epoch": 0.33869652257729665, "grad_norm": 0.37876588106155396, "learning_rate": 0.00015435858964741185, "loss": 0.5207, "step": 4568 }, { "epoch": 0.3387706680507155, "grad_norm": 0.3560371398925781, "learning_rate": 0.0001543485871467867, "loss": 0.5206, "step": 4569 }, { "epoch": 0.33884481352413437, "grad_norm": 0.3688036799430847, "learning_rate": 0.00015433858464616155, "loss": 0.5194, "step": 4570 }, { "epoch": 0.3389189589975532, "grad_norm": 0.3600160777568817, "learning_rate": 0.00015432858214553638, "loss": 0.5561, "step": 4571 }, { "epoch": 0.33899310447097203, "grad_norm": 0.35347220301628113, "learning_rate": 0.00015431857964491122, "loss": 0.5079, "step": 4572 }, { "epoch": 0.3390672499443909, "grad_norm": 0.34002453088760376, "learning_rate": 0.00015430857714428608, "loss": 0.476, "step": 4573 }, { "epoch": 0.33914139541780974, "grad_norm": 0.3753575384616852, "learning_rate": 0.00015429857464366092, "loss": 0.5348, "step": 4574 }, { "epoch": 0.3392155408912286, "grad_norm": 0.4365794062614441, "learning_rate": 0.00015428857214303576, "loss": 0.5237, "step": 4575 }, { "epoch": 0.33928968636464746, "grad_norm": 0.35650834441185, "learning_rate": 0.0001542785696424106, "loss": 0.5167, "step": 4576 }, { "epoch": 0.3393638318380663, "grad_norm": 0.3510310649871826, "learning_rate": 0.00015426856714178546, "loss": 0.5116, "step": 4577 }, { "epoch": 0.3394379773114851, "grad_norm": 0.38032808899879456, "learning_rate": 0.0001542585646411603, "loss": 0.5568, "step": 4578 }, { "epoch": 0.339512122784904, "grad_norm": 0.3801775276660919, "learning_rate": 0.00015424856214053513, "loss": 0.5385, "step": 4579 }, { "epoch": 0.33958626825832283, "grad_norm": 0.37075045704841614, "learning_rate": 0.00015423855963991, "loss": 0.4949, "step": 4580 }, { "epoch": 0.33966041373174166, "grad_norm": 0.36673110723495483, "learning_rate": 0.00015422855713928484, "loss": 0.5198, "step": 4581 }, { "epoch": 0.33973455920516055, "grad_norm": 0.3627071678638458, "learning_rate": 0.00015421855463865967, "loss": 0.5148, "step": 4582 }, { "epoch": 0.3398087046785794, "grad_norm": 0.35931798815727234, "learning_rate": 0.0001542085521380345, "loss": 0.5249, "step": 4583 }, { "epoch": 0.3398828501519982, "grad_norm": 0.3482913076877594, "learning_rate": 0.00015419854963740937, "loss": 0.4855, "step": 4584 }, { "epoch": 0.3399569956254171, "grad_norm": 0.3974505066871643, "learning_rate": 0.0001541885471367842, "loss": 0.5784, "step": 4585 }, { "epoch": 0.3400311410988359, "grad_norm": 0.3570422828197479, "learning_rate": 0.00015417854463615905, "loss": 0.5341, "step": 4586 }, { "epoch": 0.34010528657225475, "grad_norm": 0.34713509678840637, "learning_rate": 0.00015416854213553389, "loss": 0.5082, "step": 4587 }, { "epoch": 0.34017943204567364, "grad_norm": 0.3846456706523895, "learning_rate": 0.00015415853963490875, "loss": 0.5458, "step": 4588 }, { "epoch": 0.34025357751909246, "grad_norm": 0.350970059633255, "learning_rate": 0.0001541485371342836, "loss": 0.4965, "step": 4589 }, { "epoch": 0.3403277229925113, "grad_norm": 0.38542476296424866, "learning_rate": 0.00015413853463365842, "loss": 0.5873, "step": 4590 }, { "epoch": 0.3404018684659302, "grad_norm": 0.3670554757118225, "learning_rate": 0.00015412853213303326, "loss": 0.4966, "step": 4591 }, { "epoch": 0.340476013939349, "grad_norm": 0.352541446685791, "learning_rate": 0.00015411852963240812, "loss": 0.549, "step": 4592 }, { "epoch": 0.34055015941276784, "grad_norm": 0.3608146905899048, "learning_rate": 0.00015410852713178293, "loss": 0.4778, "step": 4593 }, { "epoch": 0.3406243048861867, "grad_norm": 0.36231106519699097, "learning_rate": 0.0001540985246311578, "loss": 0.5938, "step": 4594 }, { "epoch": 0.34069845035960555, "grad_norm": 0.34912827610969543, "learning_rate": 0.00015408852213053264, "loss": 0.5482, "step": 4595 }, { "epoch": 0.3407725958330244, "grad_norm": 0.35971537232398987, "learning_rate": 0.0001540785196299075, "loss": 0.5549, "step": 4596 }, { "epoch": 0.34084674130644327, "grad_norm": 0.3396613597869873, "learning_rate": 0.0001540685171292823, "loss": 0.5019, "step": 4597 }, { "epoch": 0.3409208867798621, "grad_norm": 0.37002331018447876, "learning_rate": 0.00015405851462865717, "loss": 0.5313, "step": 4598 }, { "epoch": 0.3409950322532809, "grad_norm": 0.3349321782588959, "learning_rate": 0.000154048512128032, "loss": 0.4952, "step": 4599 }, { "epoch": 0.3410691777266998, "grad_norm": 0.3532625436782837, "learning_rate": 0.00015403850962740688, "loss": 0.5552, "step": 4600 }, { "epoch": 0.34114332320011864, "grad_norm": 0.35544681549072266, "learning_rate": 0.00015402850712678169, "loss": 0.5577, "step": 4601 }, { "epoch": 0.34121746867353747, "grad_norm": 0.3516555726528168, "learning_rate": 0.00015401850462615655, "loss": 0.4817, "step": 4602 }, { "epoch": 0.3412916141469563, "grad_norm": 0.3869626224040985, "learning_rate": 0.0001540085021255314, "loss": 0.5309, "step": 4603 }, { "epoch": 0.3413657596203752, "grad_norm": 0.3737764358520508, "learning_rate": 0.00015399849962490622, "loss": 0.4722, "step": 4604 }, { "epoch": 0.341439905093794, "grad_norm": 0.34235644340515137, "learning_rate": 0.00015398849712428106, "loss": 0.5094, "step": 4605 }, { "epoch": 0.34151405056721285, "grad_norm": 0.3658558130264282, "learning_rate": 0.00015397849462365593, "loss": 0.5365, "step": 4606 }, { "epoch": 0.34158819604063173, "grad_norm": 0.36116883158683777, "learning_rate": 0.00015396849212303076, "loss": 0.5378, "step": 4607 }, { "epoch": 0.34166234151405056, "grad_norm": 0.3847736120223999, "learning_rate": 0.0001539584896224056, "loss": 0.5433, "step": 4608 }, { "epoch": 0.3417364869874694, "grad_norm": 0.3790445327758789, "learning_rate": 0.00015394848712178044, "loss": 0.5304, "step": 4609 }, { "epoch": 0.3418106324608883, "grad_norm": 0.3860444128513336, "learning_rate": 0.0001539384846211553, "loss": 0.5096, "step": 4610 }, { "epoch": 0.3418847779343071, "grad_norm": 0.3651910126209259, "learning_rate": 0.00015392848212053014, "loss": 0.5267, "step": 4611 }, { "epoch": 0.34195892340772593, "grad_norm": 0.3919074535369873, "learning_rate": 0.00015391847961990498, "loss": 0.5585, "step": 4612 }, { "epoch": 0.3420330688811448, "grad_norm": 0.36783576011657715, "learning_rate": 0.0001539084771192798, "loss": 0.5362, "step": 4613 }, { "epoch": 0.34210721435456365, "grad_norm": 0.3506301939487457, "learning_rate": 0.00015389847461865468, "loss": 0.4824, "step": 4614 }, { "epoch": 0.3421813598279825, "grad_norm": 0.3590008318424225, "learning_rate": 0.00015388847211802951, "loss": 0.532, "step": 4615 }, { "epoch": 0.34225550530140136, "grad_norm": 0.3854101896286011, "learning_rate": 0.00015387846961740435, "loss": 0.5564, "step": 4616 }, { "epoch": 0.3423296507748202, "grad_norm": 0.40358325839042664, "learning_rate": 0.00015386846711677921, "loss": 0.6194, "step": 4617 }, { "epoch": 0.342403796248239, "grad_norm": 0.339695006608963, "learning_rate": 0.00015385846461615405, "loss": 0.4991, "step": 4618 }, { "epoch": 0.3424779417216579, "grad_norm": 0.3679536283016205, "learning_rate": 0.0001538484621155289, "loss": 0.5945, "step": 4619 }, { "epoch": 0.34255208719507674, "grad_norm": 0.3649662137031555, "learning_rate": 0.00015383845961490373, "loss": 0.5308, "step": 4620 }, { "epoch": 0.34262623266849557, "grad_norm": 0.37800031900405884, "learning_rate": 0.0001538284571142786, "loss": 0.5414, "step": 4621 }, { "epoch": 0.34270037814191445, "grad_norm": 0.3760984539985657, "learning_rate": 0.00015381845461365343, "loss": 0.5281, "step": 4622 }, { "epoch": 0.3427745236153333, "grad_norm": 0.3583092987537384, "learning_rate": 0.00015380845211302826, "loss": 0.5044, "step": 4623 }, { "epoch": 0.3428486690887521, "grad_norm": 0.34054648876190186, "learning_rate": 0.0001537984496124031, "loss": 0.5062, "step": 4624 }, { "epoch": 0.342922814562171, "grad_norm": 0.3670580983161926, "learning_rate": 0.00015378844711177797, "loss": 0.5129, "step": 4625 }, { "epoch": 0.3429969600355898, "grad_norm": 0.3573213815689087, "learning_rate": 0.0001537784446111528, "loss": 0.5488, "step": 4626 }, { "epoch": 0.34307110550900866, "grad_norm": 0.36053380370140076, "learning_rate": 0.00015376844211052764, "loss": 0.5386, "step": 4627 }, { "epoch": 0.34314525098242754, "grad_norm": 0.3477456271648407, "learning_rate": 0.00015375843960990248, "loss": 0.4971, "step": 4628 }, { "epoch": 0.34321939645584637, "grad_norm": 0.3799554109573364, "learning_rate": 0.00015374843710927734, "loss": 0.5818, "step": 4629 }, { "epoch": 0.3432935419292652, "grad_norm": 0.3518963158130646, "learning_rate": 0.00015373843460865215, "loss": 0.527, "step": 4630 }, { "epoch": 0.3433676874026841, "grad_norm": 0.35760247707366943, "learning_rate": 0.00015372843210802702, "loss": 0.5273, "step": 4631 }, { "epoch": 0.3434418328761029, "grad_norm": 0.36151018738746643, "learning_rate": 0.00015371842960740185, "loss": 0.5585, "step": 4632 }, { "epoch": 0.34351597834952174, "grad_norm": 0.39289405941963196, "learning_rate": 0.00015370842710677672, "loss": 0.5726, "step": 4633 }, { "epoch": 0.34359012382294063, "grad_norm": 0.34563320875167847, "learning_rate": 0.00015369842460615153, "loss": 0.5231, "step": 4634 }, { "epoch": 0.34366426929635946, "grad_norm": 0.36736825108528137, "learning_rate": 0.0001536884221055264, "loss": 0.5354, "step": 4635 }, { "epoch": 0.3437384147697783, "grad_norm": 0.39592766761779785, "learning_rate": 0.00015367841960490123, "loss": 0.5961, "step": 4636 }, { "epoch": 0.3438125602431972, "grad_norm": 0.34263503551483154, "learning_rate": 0.0001536684171042761, "loss": 0.5095, "step": 4637 }, { "epoch": 0.343886705716616, "grad_norm": 0.3545536994934082, "learning_rate": 0.0001536584146036509, "loss": 0.5119, "step": 4638 }, { "epoch": 0.34396085119003483, "grad_norm": 0.37814947962760925, "learning_rate": 0.00015364841210302577, "loss": 0.5349, "step": 4639 }, { "epoch": 0.3440349966634537, "grad_norm": 0.3663647770881653, "learning_rate": 0.0001536384096024006, "loss": 0.5153, "step": 4640 }, { "epoch": 0.34410914213687255, "grad_norm": 0.35478922724723816, "learning_rate": 0.00015362840710177547, "loss": 0.5411, "step": 4641 }, { "epoch": 0.3441832876102914, "grad_norm": 0.34728190302848816, "learning_rate": 0.00015361840460115028, "loss": 0.4973, "step": 4642 }, { "epoch": 0.34425743308371026, "grad_norm": 0.34873491525650024, "learning_rate": 0.00015360840210052514, "loss": 0.5289, "step": 4643 }, { "epoch": 0.3443315785571291, "grad_norm": 0.3869282007217407, "learning_rate": 0.00015359839959989998, "loss": 0.5657, "step": 4644 }, { "epoch": 0.3444057240305479, "grad_norm": 0.3685241639614105, "learning_rate": 0.00015358839709927482, "loss": 0.5207, "step": 4645 }, { "epoch": 0.3444798695039668, "grad_norm": 0.3608337342739105, "learning_rate": 0.00015357839459864965, "loss": 0.5767, "step": 4646 }, { "epoch": 0.34455401497738564, "grad_norm": 0.3603336215019226, "learning_rate": 0.00015356839209802452, "loss": 0.5365, "step": 4647 }, { "epoch": 0.34462816045080447, "grad_norm": 0.37283140420913696, "learning_rate": 0.00015355838959739935, "loss": 0.5487, "step": 4648 }, { "epoch": 0.34470230592422335, "grad_norm": 0.3778919279575348, "learning_rate": 0.0001535483870967742, "loss": 0.5702, "step": 4649 }, { "epoch": 0.3447764513976422, "grad_norm": 0.339324027299881, "learning_rate": 0.00015353838459614906, "loss": 0.4875, "step": 4650 }, { "epoch": 0.344850596871061, "grad_norm": 0.38679879903793335, "learning_rate": 0.0001535283820955239, "loss": 0.5535, "step": 4651 }, { "epoch": 0.3449247423444799, "grad_norm": 0.3471203148365021, "learning_rate": 0.00015351837959489873, "loss": 0.5109, "step": 4652 }, { "epoch": 0.3449988878178987, "grad_norm": 0.354826420545578, "learning_rate": 0.00015350837709427357, "loss": 0.5119, "step": 4653 }, { "epoch": 0.34507303329131755, "grad_norm": 0.3470647931098938, "learning_rate": 0.00015349837459364843, "loss": 0.5108, "step": 4654 }, { "epoch": 0.34514717876473644, "grad_norm": 0.3657236397266388, "learning_rate": 0.00015348837209302327, "loss": 0.4995, "step": 4655 }, { "epoch": 0.34522132423815527, "grad_norm": 0.366223007440567, "learning_rate": 0.0001534783695923981, "loss": 0.5031, "step": 4656 }, { "epoch": 0.3452954697115741, "grad_norm": 0.3535331189632416, "learning_rate": 0.00015346836709177294, "loss": 0.5168, "step": 4657 }, { "epoch": 0.34536961518499293, "grad_norm": 0.3634316027164459, "learning_rate": 0.0001534583645911478, "loss": 0.546, "step": 4658 }, { "epoch": 0.3454437606584118, "grad_norm": 0.34174659848213196, "learning_rate": 0.00015344836209052264, "loss": 0.5263, "step": 4659 }, { "epoch": 0.34551790613183064, "grad_norm": 0.37191203236579895, "learning_rate": 0.00015343835958989748, "loss": 0.5616, "step": 4660 }, { "epoch": 0.3455920516052495, "grad_norm": 0.35919100046157837, "learning_rate": 0.00015342835708927232, "loss": 0.5195, "step": 4661 }, { "epoch": 0.34566619707866836, "grad_norm": 0.3579886555671692, "learning_rate": 0.00015341835458864718, "loss": 0.4913, "step": 4662 }, { "epoch": 0.3457403425520872, "grad_norm": 0.3521590828895569, "learning_rate": 0.00015340835208802202, "loss": 0.4906, "step": 4663 }, { "epoch": 0.345814488025506, "grad_norm": 0.3723827302455902, "learning_rate": 0.00015339834958739686, "loss": 0.5163, "step": 4664 }, { "epoch": 0.3458886334989249, "grad_norm": 0.374008446931839, "learning_rate": 0.0001533883470867717, "loss": 0.5502, "step": 4665 }, { "epoch": 0.34596277897234373, "grad_norm": 0.3758941888809204, "learning_rate": 0.00015337834458614656, "loss": 0.5272, "step": 4666 }, { "epoch": 0.34603692444576256, "grad_norm": 0.3710227608680725, "learning_rate": 0.00015336834208552137, "loss": 0.4917, "step": 4667 }, { "epoch": 0.34611106991918145, "grad_norm": 0.371066153049469, "learning_rate": 0.00015335833958489623, "loss": 0.5294, "step": 4668 }, { "epoch": 0.3461852153926003, "grad_norm": 0.3455944359302521, "learning_rate": 0.00015334833708427107, "loss": 0.505, "step": 4669 }, { "epoch": 0.3462593608660191, "grad_norm": 0.3636184632778168, "learning_rate": 0.00015333833458364593, "loss": 0.5755, "step": 4670 }, { "epoch": 0.346333506339438, "grad_norm": 0.3973265588283539, "learning_rate": 0.00015332833208302074, "loss": 0.5136, "step": 4671 }, { "epoch": 0.3464076518128568, "grad_norm": 0.34754490852355957, "learning_rate": 0.0001533183295823956, "loss": 0.5194, "step": 4672 }, { "epoch": 0.34648179728627565, "grad_norm": 0.3756029009819031, "learning_rate": 0.00015330832708177044, "loss": 0.5064, "step": 4673 }, { "epoch": 0.34655594275969454, "grad_norm": 0.34457460045814514, "learning_rate": 0.0001532983245811453, "loss": 0.5054, "step": 4674 }, { "epoch": 0.34663008823311336, "grad_norm": 0.355092316865921, "learning_rate": 0.00015328832208052012, "loss": 0.4911, "step": 4675 }, { "epoch": 0.3467042337065322, "grad_norm": 0.36052241921424866, "learning_rate": 0.00015327831957989498, "loss": 0.5133, "step": 4676 }, { "epoch": 0.3467783791799511, "grad_norm": 0.37084370851516724, "learning_rate": 0.00015326831707926982, "loss": 0.559, "step": 4677 }, { "epoch": 0.3468525246533699, "grad_norm": 0.3672504425048828, "learning_rate": 0.00015325831457864468, "loss": 0.5308, "step": 4678 }, { "epoch": 0.34692667012678874, "grad_norm": 0.3467627167701721, "learning_rate": 0.0001532483120780195, "loss": 0.4799, "step": 4679 }, { "epoch": 0.3470008156002076, "grad_norm": 0.38557949662208557, "learning_rate": 0.00015323830957739436, "loss": 0.5468, "step": 4680 }, { "epoch": 0.34707496107362645, "grad_norm": 0.3664573132991791, "learning_rate": 0.0001532283070767692, "loss": 0.5098, "step": 4681 }, { "epoch": 0.3471491065470453, "grad_norm": 0.36256566643714905, "learning_rate": 0.00015321830457614403, "loss": 0.4967, "step": 4682 }, { "epoch": 0.34722325202046417, "grad_norm": 0.4321514666080475, "learning_rate": 0.0001532083020755189, "loss": 0.5288, "step": 4683 }, { "epoch": 0.347297397493883, "grad_norm": 0.34453722834587097, "learning_rate": 0.00015319829957489373, "loss": 0.474, "step": 4684 }, { "epoch": 0.3473715429673018, "grad_norm": 0.3600115180015564, "learning_rate": 0.00015318829707426857, "loss": 0.5013, "step": 4685 }, { "epoch": 0.3474456884407207, "grad_norm": 0.39372479915618896, "learning_rate": 0.0001531782945736434, "loss": 0.5484, "step": 4686 }, { "epoch": 0.34751983391413954, "grad_norm": 0.35624271631240845, "learning_rate": 0.00015316829207301827, "loss": 0.4988, "step": 4687 }, { "epoch": 0.34759397938755837, "grad_norm": 0.37584641575813293, "learning_rate": 0.0001531582895723931, "loss": 0.5241, "step": 4688 }, { "epoch": 0.34766812486097726, "grad_norm": 0.36972156167030334, "learning_rate": 0.00015314828707176795, "loss": 0.5544, "step": 4689 }, { "epoch": 0.3477422703343961, "grad_norm": 0.3611251413822174, "learning_rate": 0.00015313828457114278, "loss": 0.5242, "step": 4690 }, { "epoch": 0.3478164158078149, "grad_norm": 0.3726882040500641, "learning_rate": 0.00015312828207051765, "loss": 0.5397, "step": 4691 }, { "epoch": 0.3478905612812338, "grad_norm": 0.3711725175380707, "learning_rate": 0.00015311827956989248, "loss": 0.5206, "step": 4692 }, { "epoch": 0.34796470675465263, "grad_norm": 0.37246033549308777, "learning_rate": 0.00015310827706926732, "loss": 0.5431, "step": 4693 }, { "epoch": 0.34803885222807146, "grad_norm": 0.36759820580482483, "learning_rate": 0.00015309827456864216, "loss": 0.5559, "step": 4694 }, { "epoch": 0.34811299770149035, "grad_norm": 0.37999552488327026, "learning_rate": 0.00015308827206801702, "loss": 0.5202, "step": 4695 }, { "epoch": 0.3481871431749092, "grad_norm": 0.3398481011390686, "learning_rate": 0.00015307826956739186, "loss": 0.4883, "step": 4696 }, { "epoch": 0.348261288648328, "grad_norm": 0.34143710136413574, "learning_rate": 0.0001530682670667667, "loss": 0.5061, "step": 4697 }, { "epoch": 0.3483354341217469, "grad_norm": 0.39111292362213135, "learning_rate": 0.00015305826456614153, "loss": 0.5175, "step": 4698 }, { "epoch": 0.3484095795951657, "grad_norm": 0.36219653487205505, "learning_rate": 0.0001530482620655164, "loss": 0.5323, "step": 4699 }, { "epoch": 0.34848372506858455, "grad_norm": 0.36281412839889526, "learning_rate": 0.00015303825956489124, "loss": 0.5109, "step": 4700 }, { "epoch": 0.34855787054200343, "grad_norm": 0.3687044680118561, "learning_rate": 0.00015302825706426607, "loss": 0.5452, "step": 4701 }, { "epoch": 0.34863201601542226, "grad_norm": 0.3554045259952545, "learning_rate": 0.0001530182545636409, "loss": 0.523, "step": 4702 }, { "epoch": 0.3487061614888411, "grad_norm": 0.37705451250076294, "learning_rate": 0.00015300825206301577, "loss": 0.5156, "step": 4703 }, { "epoch": 0.34878030696226, "grad_norm": 0.3656252920627594, "learning_rate": 0.00015299824956239058, "loss": 0.5512, "step": 4704 }, { "epoch": 0.3488544524356788, "grad_norm": 0.3570212423801422, "learning_rate": 0.00015298824706176545, "loss": 0.5242, "step": 4705 }, { "epoch": 0.34892859790909764, "grad_norm": 0.3752516210079193, "learning_rate": 0.00015297824456114029, "loss": 0.5094, "step": 4706 }, { "epoch": 0.3490027433825165, "grad_norm": 0.3667277693748474, "learning_rate": 0.00015296824206051515, "loss": 0.5266, "step": 4707 }, { "epoch": 0.34907688885593535, "grad_norm": 0.35929322242736816, "learning_rate": 0.00015295823955988996, "loss": 0.5023, "step": 4708 }, { "epoch": 0.3491510343293542, "grad_norm": 0.3420259654521942, "learning_rate": 0.00015294823705926482, "loss": 0.479, "step": 4709 }, { "epoch": 0.34922517980277307, "grad_norm": 0.37148764729499817, "learning_rate": 0.00015293823455863966, "loss": 0.5562, "step": 4710 }, { "epoch": 0.3492993252761919, "grad_norm": 0.36533141136169434, "learning_rate": 0.00015292823205801452, "loss": 0.5178, "step": 4711 }, { "epoch": 0.3493734707496107, "grad_norm": 0.3558844029903412, "learning_rate": 0.00015291822955738933, "loss": 0.5346, "step": 4712 }, { "epoch": 0.3494476162230296, "grad_norm": 0.3439483046531677, "learning_rate": 0.0001529082270567642, "loss": 0.4918, "step": 4713 }, { "epoch": 0.34952176169644844, "grad_norm": 0.3788340985774994, "learning_rate": 0.00015289822455613904, "loss": 0.5434, "step": 4714 }, { "epoch": 0.34959590716986727, "grad_norm": 0.36775320768356323, "learning_rate": 0.0001528882220555139, "loss": 0.4783, "step": 4715 }, { "epoch": 0.3496700526432861, "grad_norm": 0.3319256007671356, "learning_rate": 0.00015287821955488874, "loss": 0.5126, "step": 4716 }, { "epoch": 0.349744198116705, "grad_norm": 0.3582650125026703, "learning_rate": 0.00015286821705426357, "loss": 0.4904, "step": 4717 }, { "epoch": 0.3498183435901238, "grad_norm": 0.39722225069999695, "learning_rate": 0.0001528582145536384, "loss": 0.6114, "step": 4718 }, { "epoch": 0.34989248906354264, "grad_norm": 0.36013489961624146, "learning_rate": 0.00015284821205301325, "loss": 0.5598, "step": 4719 }, { "epoch": 0.34996663453696153, "grad_norm": 0.35259494185447693, "learning_rate": 0.0001528382095523881, "loss": 0.4989, "step": 4720 }, { "epoch": 0.35004078001038036, "grad_norm": 0.35924258828163147, "learning_rate": 0.00015282820705176295, "loss": 0.5398, "step": 4721 }, { "epoch": 0.3501149254837992, "grad_norm": 0.36051636934280396, "learning_rate": 0.0001528182045511378, "loss": 0.5201, "step": 4722 }, { "epoch": 0.3501890709572181, "grad_norm": 0.35472366213798523, "learning_rate": 0.00015280820205051262, "loss": 0.5043, "step": 4723 }, { "epoch": 0.3502632164306369, "grad_norm": 0.3544977903366089, "learning_rate": 0.0001527981995498875, "loss": 0.5166, "step": 4724 }, { "epoch": 0.35033736190405573, "grad_norm": 0.36575692892074585, "learning_rate": 0.00015278819704926233, "loss": 0.4941, "step": 4725 }, { "epoch": 0.3504115073774746, "grad_norm": 0.3448162376880646, "learning_rate": 0.0001527781945486372, "loss": 0.4928, "step": 4726 }, { "epoch": 0.35048565285089345, "grad_norm": 0.34838011860847473, "learning_rate": 0.000152768192048012, "loss": 0.5193, "step": 4727 }, { "epoch": 0.3505597983243123, "grad_norm": 0.3639306128025055, "learning_rate": 0.00015275818954738686, "loss": 0.5332, "step": 4728 }, { "epoch": 0.35063394379773116, "grad_norm": 0.3450857400894165, "learning_rate": 0.0001527481870467617, "loss": 0.4951, "step": 4729 }, { "epoch": 0.35070808927115, "grad_norm": 0.3929961621761322, "learning_rate": 0.00015273818454613654, "loss": 0.5926, "step": 4730 }, { "epoch": 0.3507822347445688, "grad_norm": 0.35551923513412476, "learning_rate": 0.00015272818204551137, "loss": 0.503, "step": 4731 }, { "epoch": 0.3508563802179877, "grad_norm": 0.341613233089447, "learning_rate": 0.00015271817954488624, "loss": 0.4823, "step": 4732 }, { "epoch": 0.35093052569140654, "grad_norm": 0.35586196184158325, "learning_rate": 0.00015270817704426108, "loss": 0.527, "step": 4733 }, { "epoch": 0.35100467116482537, "grad_norm": 0.36568355560302734, "learning_rate": 0.0001526981745436359, "loss": 0.5164, "step": 4734 }, { "epoch": 0.35107881663824425, "grad_norm": 0.3339957594871521, "learning_rate": 0.00015268817204301075, "loss": 0.4784, "step": 4735 }, { "epoch": 0.3511529621116631, "grad_norm": 0.38814011216163635, "learning_rate": 0.00015267816954238561, "loss": 0.5325, "step": 4736 }, { "epoch": 0.3512271075850819, "grad_norm": 0.3566195070743561, "learning_rate": 0.00015266816704176045, "loss": 0.5326, "step": 4737 }, { "epoch": 0.3513012530585008, "grad_norm": 0.3476565182209015, "learning_rate": 0.0001526581645411353, "loss": 0.5388, "step": 4738 }, { "epoch": 0.3513753985319196, "grad_norm": 0.3691495358943939, "learning_rate": 0.00015264816204051013, "loss": 0.5027, "step": 4739 }, { "epoch": 0.35144954400533845, "grad_norm": 0.3645973801612854, "learning_rate": 0.000152638159539885, "loss": 0.536, "step": 4740 }, { "epoch": 0.35152368947875734, "grad_norm": 0.34945330023765564, "learning_rate": 0.0001526281570392598, "loss": 0.5258, "step": 4741 }, { "epoch": 0.35159783495217617, "grad_norm": 0.3567379415035248, "learning_rate": 0.00015261815453863466, "loss": 0.5042, "step": 4742 }, { "epoch": 0.351671980425595, "grad_norm": 0.36244457960128784, "learning_rate": 0.0001526081520380095, "loss": 0.5334, "step": 4743 }, { "epoch": 0.3517461258990139, "grad_norm": 0.4065931737422943, "learning_rate": 0.00015259814953738437, "loss": 0.5538, "step": 4744 }, { "epoch": 0.3518202713724327, "grad_norm": 0.37495970726013184, "learning_rate": 0.00015258814703675918, "loss": 0.5524, "step": 4745 }, { "epoch": 0.35189441684585154, "grad_norm": 0.35386303067207336, "learning_rate": 0.00015257814453613404, "loss": 0.5166, "step": 4746 }, { "epoch": 0.35196856231927043, "grad_norm": 0.33865275979042053, "learning_rate": 0.00015256814203550888, "loss": 0.4917, "step": 4747 }, { "epoch": 0.35204270779268926, "grad_norm": 0.3658398985862732, "learning_rate": 0.00015255813953488374, "loss": 0.5395, "step": 4748 }, { "epoch": 0.3521168532661081, "grad_norm": 0.35798636078834534, "learning_rate": 0.00015254813703425858, "loss": 0.507, "step": 4749 }, { "epoch": 0.352190998739527, "grad_norm": 0.3658464550971985, "learning_rate": 0.00015253813453363342, "loss": 0.5247, "step": 4750 }, { "epoch": 0.3522651442129458, "grad_norm": 0.37887871265411377, "learning_rate": 0.00015252813203300825, "loss": 0.5102, "step": 4751 }, { "epoch": 0.35233928968636463, "grad_norm": 0.36267000436782837, "learning_rate": 0.00015251812953238312, "loss": 0.5226, "step": 4752 }, { "epoch": 0.3524134351597835, "grad_norm": 0.34702086448669434, "learning_rate": 0.00015250812703175795, "loss": 0.5222, "step": 4753 }, { "epoch": 0.35248758063320235, "grad_norm": 0.35811442136764526, "learning_rate": 0.0001524981245311328, "loss": 0.5089, "step": 4754 }, { "epoch": 0.3525617261066212, "grad_norm": 0.3446536064147949, "learning_rate": 0.00015248812203050763, "loss": 0.4794, "step": 4755 }, { "epoch": 0.35263587158004006, "grad_norm": 0.3498639166355133, "learning_rate": 0.00015247811952988246, "loss": 0.5341, "step": 4756 }, { "epoch": 0.3527100170534589, "grad_norm": 0.35330238938331604, "learning_rate": 0.00015246811702925733, "loss": 0.5002, "step": 4757 }, { "epoch": 0.3527841625268777, "grad_norm": 0.3364550471305847, "learning_rate": 0.00015245811452863217, "loss": 0.4765, "step": 4758 }, { "epoch": 0.3528583080002966, "grad_norm": 0.3447117507457733, "learning_rate": 0.00015244811202800703, "loss": 0.4925, "step": 4759 }, { "epoch": 0.35293245347371544, "grad_norm": 0.36218470335006714, "learning_rate": 0.00015243810952738184, "loss": 0.5286, "step": 4760 }, { "epoch": 0.35300659894713426, "grad_norm": 0.3890037536621094, "learning_rate": 0.0001524281070267567, "loss": 0.5941, "step": 4761 }, { "epoch": 0.35308074442055315, "grad_norm": 0.338519424200058, "learning_rate": 0.00015241810452613154, "loss": 0.4606, "step": 4762 }, { "epoch": 0.353154889893972, "grad_norm": 0.3573763370513916, "learning_rate": 0.0001524081020255064, "loss": 0.5071, "step": 4763 }, { "epoch": 0.3532290353673908, "grad_norm": 0.3709942400455475, "learning_rate": 0.00015239809952488122, "loss": 0.5164, "step": 4764 }, { "epoch": 0.3533031808408097, "grad_norm": 0.3571522533893585, "learning_rate": 0.00015238809702425608, "loss": 0.4821, "step": 4765 }, { "epoch": 0.3533773263142285, "grad_norm": 0.3511929214000702, "learning_rate": 0.00015237809452363092, "loss": 0.5278, "step": 4766 }, { "epoch": 0.35345147178764735, "grad_norm": 0.3621924817562103, "learning_rate": 0.00015236809202300575, "loss": 0.5416, "step": 4767 }, { "epoch": 0.35352561726106624, "grad_norm": 0.35172462463378906, "learning_rate": 0.0001523580895223806, "loss": 0.4942, "step": 4768 }, { "epoch": 0.35359976273448507, "grad_norm": 0.3532716929912567, "learning_rate": 0.00015234808702175546, "loss": 0.4811, "step": 4769 }, { "epoch": 0.3536739082079039, "grad_norm": 0.3504057228565216, "learning_rate": 0.0001523380845211303, "loss": 0.5016, "step": 4770 }, { "epoch": 0.3537480536813228, "grad_norm": 0.34959346055984497, "learning_rate": 0.00015232808202050513, "loss": 0.5168, "step": 4771 }, { "epoch": 0.3538221991547416, "grad_norm": 0.3466894030570984, "learning_rate": 0.00015231807951987997, "loss": 0.4967, "step": 4772 }, { "epoch": 0.35389634462816044, "grad_norm": 0.36713269352912903, "learning_rate": 0.00015230807701925483, "loss": 0.5539, "step": 4773 }, { "epoch": 0.35397049010157927, "grad_norm": 0.35023191571235657, "learning_rate": 0.00015229807451862967, "loss": 0.4993, "step": 4774 }, { "epoch": 0.35404463557499816, "grad_norm": 0.3548642694950104, "learning_rate": 0.0001522880720180045, "loss": 0.5218, "step": 4775 }, { "epoch": 0.354118781048417, "grad_norm": 0.3642808794975281, "learning_rate": 0.00015227806951737934, "loss": 0.5162, "step": 4776 }, { "epoch": 0.3541929265218358, "grad_norm": 0.36894580721855164, "learning_rate": 0.0001522680670167542, "loss": 0.5095, "step": 4777 }, { "epoch": 0.3542670719952547, "grad_norm": 0.3544287383556366, "learning_rate": 0.00015225806451612902, "loss": 0.5182, "step": 4778 }, { "epoch": 0.35434121746867353, "grad_norm": 0.3678164482116699, "learning_rate": 0.00015224806201550388, "loss": 0.5042, "step": 4779 }, { "epoch": 0.35441536294209236, "grad_norm": 0.3583504855632782, "learning_rate": 0.00015223805951487872, "loss": 0.5166, "step": 4780 }, { "epoch": 0.35448950841551125, "grad_norm": 0.3503889739513397, "learning_rate": 0.00015222805701425358, "loss": 0.5074, "step": 4781 }, { "epoch": 0.3545636538889301, "grad_norm": 0.3541962504386902, "learning_rate": 0.0001522180545136284, "loss": 0.5265, "step": 4782 }, { "epoch": 0.3546377993623489, "grad_norm": 0.3894755244255066, "learning_rate": 0.00015220805201300326, "loss": 0.5909, "step": 4783 }, { "epoch": 0.3547119448357678, "grad_norm": 0.3488773703575134, "learning_rate": 0.0001521980495123781, "loss": 0.5364, "step": 4784 }, { "epoch": 0.3547860903091866, "grad_norm": 0.35259199142456055, "learning_rate": 0.00015218804701175296, "loss": 0.5225, "step": 4785 }, { "epoch": 0.35486023578260545, "grad_norm": 0.3711255192756653, "learning_rate": 0.0001521780445111278, "loss": 0.5207, "step": 4786 }, { "epoch": 0.35493438125602433, "grad_norm": 0.3580823838710785, "learning_rate": 0.00015216804201050263, "loss": 0.5277, "step": 4787 }, { "epoch": 0.35500852672944316, "grad_norm": 0.3569403886795044, "learning_rate": 0.00015215803950987747, "loss": 0.5333, "step": 4788 }, { "epoch": 0.355082672202862, "grad_norm": 0.3818404972553253, "learning_rate": 0.00015214803700925233, "loss": 0.531, "step": 4789 }, { "epoch": 0.3551568176762809, "grad_norm": 0.37498676776885986, "learning_rate": 0.00015213803450862717, "loss": 0.5419, "step": 4790 }, { "epoch": 0.3552309631496997, "grad_norm": 0.35872021317481995, "learning_rate": 0.000152128032008002, "loss": 0.4906, "step": 4791 }, { "epoch": 0.35530510862311854, "grad_norm": 0.3744332194328308, "learning_rate": 0.00015211802950737687, "loss": 0.5239, "step": 4792 }, { "epoch": 0.3553792540965374, "grad_norm": 0.38360396027565, "learning_rate": 0.00015210802700675168, "loss": 0.561, "step": 4793 }, { "epoch": 0.35545339956995625, "grad_norm": 0.3485589325428009, "learning_rate": 0.00015209802450612655, "loss": 0.5, "step": 4794 }, { "epoch": 0.3555275450433751, "grad_norm": 0.3480745851993561, "learning_rate": 0.00015208802200550138, "loss": 0.5086, "step": 4795 }, { "epoch": 0.35560169051679397, "grad_norm": 0.3913453221321106, "learning_rate": 0.00015207801950487625, "loss": 0.5413, "step": 4796 }, { "epoch": 0.3556758359902128, "grad_norm": 0.35552820563316345, "learning_rate": 0.00015206801700425106, "loss": 0.5388, "step": 4797 }, { "epoch": 0.3557499814636316, "grad_norm": 0.3670283555984497, "learning_rate": 0.00015205801450362592, "loss": 0.5027, "step": 4798 }, { "epoch": 0.3558241269370505, "grad_norm": 0.33970940113067627, "learning_rate": 0.00015204801200300076, "loss": 0.4969, "step": 4799 }, { "epoch": 0.35589827241046934, "grad_norm": 0.37764862179756165, "learning_rate": 0.00015203800950237562, "loss": 0.5989, "step": 4800 }, { "epoch": 0.35597241788388817, "grad_norm": 0.3731380105018616, "learning_rate": 0.00015202800700175043, "loss": 0.523, "step": 4801 }, { "epoch": 0.35604656335730706, "grad_norm": 0.34038472175598145, "learning_rate": 0.0001520180045011253, "loss": 0.5053, "step": 4802 }, { "epoch": 0.3561207088307259, "grad_norm": 0.35674816370010376, "learning_rate": 0.00015200800200050013, "loss": 0.5126, "step": 4803 }, { "epoch": 0.3561948543041447, "grad_norm": 0.3645617365837097, "learning_rate": 0.00015199799949987497, "loss": 0.57, "step": 4804 }, { "epoch": 0.3562689997775636, "grad_norm": 0.37063080072402954, "learning_rate": 0.0001519879969992498, "loss": 0.5167, "step": 4805 }, { "epoch": 0.35634314525098243, "grad_norm": 0.3437645435333252, "learning_rate": 0.00015197799449862467, "loss": 0.4706, "step": 4806 }, { "epoch": 0.35641729072440126, "grad_norm": 0.35926157236099243, "learning_rate": 0.0001519679919979995, "loss": 0.5066, "step": 4807 }, { "epoch": 0.35649143619782014, "grad_norm": 0.3441578447818756, "learning_rate": 0.00015195798949737435, "loss": 0.4926, "step": 4808 }, { "epoch": 0.356565581671239, "grad_norm": 0.360284686088562, "learning_rate": 0.00015194798699674918, "loss": 0.5113, "step": 4809 }, { "epoch": 0.3566397271446578, "grad_norm": 0.34725111722946167, "learning_rate": 0.00015193798449612405, "loss": 0.516, "step": 4810 }, { "epoch": 0.3567138726180767, "grad_norm": 0.35995402932167053, "learning_rate": 0.00015192798199549888, "loss": 0.506, "step": 4811 }, { "epoch": 0.3567880180914955, "grad_norm": 0.377091646194458, "learning_rate": 0.00015191797949487372, "loss": 0.5744, "step": 4812 }, { "epoch": 0.35686216356491435, "grad_norm": 0.3695342540740967, "learning_rate": 0.00015190797699424856, "loss": 0.5408, "step": 4813 }, { "epoch": 0.35693630903833323, "grad_norm": 0.3624439239501953, "learning_rate": 0.00015189797449362342, "loss": 0.5307, "step": 4814 }, { "epoch": 0.35701045451175206, "grad_norm": 0.40692535042762756, "learning_rate": 0.00015188797199299823, "loss": 0.5456, "step": 4815 }, { "epoch": 0.3570845999851709, "grad_norm": 0.34606286883354187, "learning_rate": 0.0001518779694923731, "loss": 0.5117, "step": 4816 }, { "epoch": 0.3571587454585898, "grad_norm": 0.34419870376586914, "learning_rate": 0.00015186796699174793, "loss": 0.5109, "step": 4817 }, { "epoch": 0.3572328909320086, "grad_norm": 0.36179783940315247, "learning_rate": 0.0001518579644911228, "loss": 0.552, "step": 4818 }, { "epoch": 0.35730703640542744, "grad_norm": 0.3725307583808899, "learning_rate": 0.00015184796199049764, "loss": 0.5373, "step": 4819 }, { "epoch": 0.3573811818788463, "grad_norm": 0.3638765215873718, "learning_rate": 0.00015183795948987247, "loss": 0.4721, "step": 4820 }, { "epoch": 0.35745532735226515, "grad_norm": 0.3635946214199066, "learning_rate": 0.0001518279569892473, "loss": 0.5044, "step": 4821 }, { "epoch": 0.357529472825684, "grad_norm": 0.35486897826194763, "learning_rate": 0.00015181795448862217, "loss": 0.5207, "step": 4822 }, { "epoch": 0.35760361829910287, "grad_norm": 0.3582630753517151, "learning_rate": 0.000151807951987997, "loss": 0.514, "step": 4823 }, { "epoch": 0.3576777637725217, "grad_norm": 0.35046204924583435, "learning_rate": 0.00015179794948737185, "loss": 0.5091, "step": 4824 }, { "epoch": 0.3577519092459405, "grad_norm": 0.3445088863372803, "learning_rate": 0.0001517879469867467, "loss": 0.5149, "step": 4825 }, { "epoch": 0.3578260547193594, "grad_norm": 0.3683907687664032, "learning_rate": 0.00015177794448612155, "loss": 0.5315, "step": 4826 }, { "epoch": 0.35790020019277824, "grad_norm": 0.36937370896339417, "learning_rate": 0.00015176794198549639, "loss": 0.5767, "step": 4827 }, { "epoch": 0.35797434566619707, "grad_norm": 0.33374783396720886, "learning_rate": 0.00015175793948487122, "loss": 0.4944, "step": 4828 }, { "epoch": 0.35804849113961595, "grad_norm": 0.3850536644458771, "learning_rate": 0.0001517479369842461, "loss": 0.576, "step": 4829 }, { "epoch": 0.3581226366130348, "grad_norm": 0.33539333939552307, "learning_rate": 0.0001517379344836209, "loss": 0.5224, "step": 4830 }, { "epoch": 0.3581967820864536, "grad_norm": 0.37622958421707153, "learning_rate": 0.00015172793198299576, "loss": 0.5294, "step": 4831 }, { "epoch": 0.35827092755987244, "grad_norm": 0.3408782482147217, "learning_rate": 0.0001517179294823706, "loss": 0.5145, "step": 4832 }, { "epoch": 0.35834507303329133, "grad_norm": 0.38107597827911377, "learning_rate": 0.00015170792698174546, "loss": 0.5273, "step": 4833 }, { "epoch": 0.35841921850671016, "grad_norm": 0.374325692653656, "learning_rate": 0.00015169792448112027, "loss": 0.5416, "step": 4834 }, { "epoch": 0.358493363980129, "grad_norm": 0.35506343841552734, "learning_rate": 0.00015168792198049514, "loss": 0.5448, "step": 4835 }, { "epoch": 0.3585675094535479, "grad_norm": 0.3622756898403168, "learning_rate": 0.00015167791947986997, "loss": 0.5385, "step": 4836 }, { "epoch": 0.3586416549269667, "grad_norm": 0.39368054270744324, "learning_rate": 0.00015166791697924484, "loss": 0.509, "step": 4837 }, { "epoch": 0.35871580040038553, "grad_norm": 0.36317217350006104, "learning_rate": 0.00015165791447861965, "loss": 0.5139, "step": 4838 }, { "epoch": 0.3587899458738044, "grad_norm": 0.3767569363117218, "learning_rate": 0.0001516479119779945, "loss": 0.545, "step": 4839 }, { "epoch": 0.35886409134722325, "grad_norm": 0.394460529088974, "learning_rate": 0.00015163790947736935, "loss": 0.5368, "step": 4840 }, { "epoch": 0.3589382368206421, "grad_norm": 0.34084075689315796, "learning_rate": 0.0001516279069767442, "loss": 0.5134, "step": 4841 }, { "epoch": 0.35901238229406096, "grad_norm": 0.35881462693214417, "learning_rate": 0.00015161790447611902, "loss": 0.4967, "step": 4842 }, { "epoch": 0.3590865277674798, "grad_norm": 0.3471643030643463, "learning_rate": 0.0001516079019754939, "loss": 0.4815, "step": 4843 }, { "epoch": 0.3591606732408986, "grad_norm": 0.3603894114494324, "learning_rate": 0.00015159789947486873, "loss": 0.5443, "step": 4844 }, { "epoch": 0.3592348187143175, "grad_norm": 0.3945273160934448, "learning_rate": 0.00015158789697424356, "loss": 0.568, "step": 4845 }, { "epoch": 0.35930896418773633, "grad_norm": 0.35929450392723083, "learning_rate": 0.0001515778944736184, "loss": 0.5367, "step": 4846 }, { "epoch": 0.35938310966115516, "grad_norm": 0.36192718148231506, "learning_rate": 0.00015156789197299326, "loss": 0.5104, "step": 4847 }, { "epoch": 0.35945725513457405, "grad_norm": 0.4032362103462219, "learning_rate": 0.0001515578894723681, "loss": 0.5375, "step": 4848 }, { "epoch": 0.3595314006079929, "grad_norm": 0.36845943331718445, "learning_rate": 0.00015154788697174294, "loss": 0.5516, "step": 4849 }, { "epoch": 0.3596055460814117, "grad_norm": 0.3697085380554199, "learning_rate": 0.00015153788447111777, "loss": 0.5455, "step": 4850 }, { "epoch": 0.3596796915548306, "grad_norm": 0.3481850326061249, "learning_rate": 0.00015152788197049264, "loss": 0.4986, "step": 4851 }, { "epoch": 0.3597538370282494, "grad_norm": 0.35787224769592285, "learning_rate": 0.00015151787946986748, "loss": 0.519, "step": 4852 }, { "epoch": 0.35982798250166825, "grad_norm": 0.37595757842063904, "learning_rate": 0.0001515078769692423, "loss": 0.5276, "step": 4853 }, { "epoch": 0.35990212797508714, "grad_norm": 0.38575461506843567, "learning_rate": 0.00015149787446861715, "loss": 0.573, "step": 4854 }, { "epoch": 0.35997627344850597, "grad_norm": 0.37380170822143555, "learning_rate": 0.00015148787196799201, "loss": 0.5545, "step": 4855 }, { "epoch": 0.3600504189219248, "grad_norm": 0.3770095407962799, "learning_rate": 0.00015147786946736685, "loss": 0.5438, "step": 4856 }, { "epoch": 0.3601245643953437, "grad_norm": 0.3638509511947632, "learning_rate": 0.0001514678669667417, "loss": 0.5264, "step": 4857 }, { "epoch": 0.3601987098687625, "grad_norm": 0.3744293451309204, "learning_rate": 0.00015145786446611653, "loss": 0.5589, "step": 4858 }, { "epoch": 0.36027285534218134, "grad_norm": 0.351093590259552, "learning_rate": 0.0001514478619654914, "loss": 0.5137, "step": 4859 }, { "epoch": 0.3603470008156002, "grad_norm": 0.3692874014377594, "learning_rate": 0.00015143785946486623, "loss": 0.5334, "step": 4860 }, { "epoch": 0.36042114628901906, "grad_norm": 0.3577617406845093, "learning_rate": 0.00015142785696424106, "loss": 0.5107, "step": 4861 }, { "epoch": 0.3604952917624379, "grad_norm": 0.3236370384693146, "learning_rate": 0.00015141785446361593, "loss": 0.4826, "step": 4862 }, { "epoch": 0.36056943723585677, "grad_norm": 0.34438854455947876, "learning_rate": 0.00015140785196299077, "loss": 0.4736, "step": 4863 }, { "epoch": 0.3606435827092756, "grad_norm": 0.33198532462120056, "learning_rate": 0.0001513978494623656, "loss": 0.5132, "step": 4864 }, { "epoch": 0.36071772818269443, "grad_norm": 0.36648333072662354, "learning_rate": 0.00015138784696174044, "loss": 0.5032, "step": 4865 }, { "epoch": 0.3607918736561133, "grad_norm": 0.333159863948822, "learning_rate": 0.0001513778444611153, "loss": 0.4764, "step": 4866 }, { "epoch": 0.36086601912953215, "grad_norm": 0.3412780165672302, "learning_rate": 0.00015136784196049011, "loss": 0.4906, "step": 4867 }, { "epoch": 0.360940164602951, "grad_norm": 0.34526556730270386, "learning_rate": 0.00015135783945986498, "loss": 0.5022, "step": 4868 }, { "epoch": 0.36101431007636986, "grad_norm": 0.3632180988788605, "learning_rate": 0.00015134783695923981, "loss": 0.5326, "step": 4869 }, { "epoch": 0.3610884555497887, "grad_norm": 0.3444159924983978, "learning_rate": 0.00015133783445861468, "loss": 0.5031, "step": 4870 }, { "epoch": 0.3611626010232075, "grad_norm": 0.37482479214668274, "learning_rate": 0.0001513278319579895, "loss": 0.5502, "step": 4871 }, { "epoch": 0.3612367464966264, "grad_norm": 0.36366280913352966, "learning_rate": 0.00015131782945736435, "loss": 0.5234, "step": 4872 }, { "epoch": 0.36131089197004523, "grad_norm": 0.3784324526786804, "learning_rate": 0.0001513078269567392, "loss": 0.5101, "step": 4873 }, { "epoch": 0.36138503744346406, "grad_norm": 0.3494550287723541, "learning_rate": 0.00015129782445611405, "loss": 0.5083, "step": 4874 }, { "epoch": 0.36145918291688295, "grad_norm": 0.3609694242477417, "learning_rate": 0.00015128782195548886, "loss": 0.5329, "step": 4875 }, { "epoch": 0.3615333283903018, "grad_norm": 0.3670002222061157, "learning_rate": 0.00015127781945486373, "loss": 0.5415, "step": 4876 }, { "epoch": 0.3616074738637206, "grad_norm": 0.3506358861923218, "learning_rate": 0.00015126781695423857, "loss": 0.4914, "step": 4877 }, { "epoch": 0.3616816193371395, "grad_norm": 0.3653147518634796, "learning_rate": 0.0001512578144536134, "loss": 0.4982, "step": 4878 }, { "epoch": 0.3617557648105583, "grad_norm": 0.3508821427822113, "learning_rate": 0.00015124781195298824, "loss": 0.5274, "step": 4879 }, { "epoch": 0.36182991028397715, "grad_norm": 0.3931906819343567, "learning_rate": 0.0001512378094523631, "loss": 0.5836, "step": 4880 }, { "epoch": 0.36190405575739604, "grad_norm": 0.36515337228775024, "learning_rate": 0.00015122780695173794, "loss": 0.4961, "step": 4881 }, { "epoch": 0.36197820123081487, "grad_norm": 0.34976136684417725, "learning_rate": 0.00015121780445111278, "loss": 0.5318, "step": 4882 }, { "epoch": 0.3620523467042337, "grad_norm": 0.3649763762950897, "learning_rate": 0.00015120780195048762, "loss": 0.5202, "step": 4883 }, { "epoch": 0.3621264921776526, "grad_norm": 0.3824881613254547, "learning_rate": 0.00015119779944986248, "loss": 0.5598, "step": 4884 }, { "epoch": 0.3622006376510714, "grad_norm": 0.40079355239868164, "learning_rate": 0.00015118779694923732, "loss": 0.5094, "step": 4885 }, { "epoch": 0.36227478312449024, "grad_norm": 0.3471870720386505, "learning_rate": 0.00015117779444861215, "loss": 0.5072, "step": 4886 }, { "epoch": 0.36234892859790907, "grad_norm": 0.37662452459335327, "learning_rate": 0.000151167791947987, "loss": 0.5163, "step": 4887 }, { "epoch": 0.36242307407132796, "grad_norm": 0.3655759394168854, "learning_rate": 0.00015115778944736186, "loss": 0.538, "step": 4888 }, { "epoch": 0.3624972195447468, "grad_norm": 0.3509065508842468, "learning_rate": 0.0001511477869467367, "loss": 0.5267, "step": 4889 }, { "epoch": 0.3625713650181656, "grad_norm": 0.37913960218429565, "learning_rate": 0.00015113778444611153, "loss": 0.5677, "step": 4890 }, { "epoch": 0.3626455104915845, "grad_norm": 0.3928094208240509, "learning_rate": 0.00015112778194548637, "loss": 0.5569, "step": 4891 }, { "epoch": 0.36271965596500333, "grad_norm": 0.37429603934288025, "learning_rate": 0.00015111777944486123, "loss": 0.5313, "step": 4892 }, { "epoch": 0.36279380143842216, "grad_norm": 0.33822670578956604, "learning_rate": 0.00015110777694423607, "loss": 0.4782, "step": 4893 }, { "epoch": 0.36286794691184104, "grad_norm": 0.3669990301132202, "learning_rate": 0.0001510977744436109, "loss": 0.5577, "step": 4894 }, { "epoch": 0.3629420923852599, "grad_norm": 0.3684785068035126, "learning_rate": 0.00015108777194298577, "loss": 0.5728, "step": 4895 }, { "epoch": 0.3630162378586787, "grad_norm": 0.34328559041023254, "learning_rate": 0.0001510777694423606, "loss": 0.4516, "step": 4896 }, { "epoch": 0.3630903833320976, "grad_norm": 0.37764424085617065, "learning_rate": 0.00015106776694173544, "loss": 0.5542, "step": 4897 }, { "epoch": 0.3631645288055164, "grad_norm": 0.3708190619945526, "learning_rate": 0.00015105776444111028, "loss": 0.5102, "step": 4898 }, { "epoch": 0.36323867427893525, "grad_norm": 0.3631777763366699, "learning_rate": 0.00015104776194048514, "loss": 0.5415, "step": 4899 }, { "epoch": 0.36331281975235413, "grad_norm": 0.36873412132263184, "learning_rate": 0.00015103775943985998, "loss": 0.5541, "step": 4900 }, { "epoch": 0.36338696522577296, "grad_norm": 0.41787630319595337, "learning_rate": 0.00015102775693923482, "loss": 0.6157, "step": 4901 }, { "epoch": 0.3634611106991918, "grad_norm": 0.3601725399494171, "learning_rate": 0.00015101775443860966, "loss": 0.5074, "step": 4902 }, { "epoch": 0.3635352561726107, "grad_norm": 0.3731086552143097, "learning_rate": 0.00015100775193798452, "loss": 0.5528, "step": 4903 }, { "epoch": 0.3636094016460295, "grad_norm": 0.36351537704467773, "learning_rate": 0.00015099774943735933, "loss": 0.5422, "step": 4904 }, { "epoch": 0.36368354711944834, "grad_norm": 0.36617833375930786, "learning_rate": 0.0001509877469367342, "loss": 0.531, "step": 4905 }, { "epoch": 0.3637576925928672, "grad_norm": 0.3686490058898926, "learning_rate": 0.00015097774443610903, "loss": 0.5309, "step": 4906 }, { "epoch": 0.36383183806628605, "grad_norm": 0.36296340823173523, "learning_rate": 0.0001509677419354839, "loss": 0.5322, "step": 4907 }, { "epoch": 0.3639059835397049, "grad_norm": 0.34782177209854126, "learning_rate": 0.0001509577394348587, "loss": 0.4898, "step": 4908 }, { "epoch": 0.36398012901312377, "grad_norm": 0.3625771105289459, "learning_rate": 0.00015094773693423357, "loss": 0.5158, "step": 4909 }, { "epoch": 0.3640542744865426, "grad_norm": 0.351706862449646, "learning_rate": 0.0001509377344336084, "loss": 0.5005, "step": 4910 }, { "epoch": 0.3641284199599614, "grad_norm": 0.37087687849998474, "learning_rate": 0.00015092773193298327, "loss": 0.5517, "step": 4911 }, { "epoch": 0.3642025654333803, "grad_norm": 0.36376625299453735, "learning_rate": 0.00015091772943235808, "loss": 0.542, "step": 4912 }, { "epoch": 0.36427671090679914, "grad_norm": 0.3925579786300659, "learning_rate": 0.00015090772693173294, "loss": 0.5651, "step": 4913 }, { "epoch": 0.36435085638021797, "grad_norm": 0.3966774642467499, "learning_rate": 0.00015089772443110778, "loss": 0.5397, "step": 4914 }, { "epoch": 0.36442500185363685, "grad_norm": 0.38148632645606995, "learning_rate": 0.00015088772193048262, "loss": 0.5213, "step": 4915 }, { "epoch": 0.3644991473270557, "grad_norm": 0.3608018755912781, "learning_rate": 0.00015087771942985746, "loss": 0.5145, "step": 4916 }, { "epoch": 0.3645732928004745, "grad_norm": 0.38422104716300964, "learning_rate": 0.00015086771692923232, "loss": 0.5263, "step": 4917 }, { "epoch": 0.3646474382738934, "grad_norm": 0.36057162284851074, "learning_rate": 0.00015085771442860716, "loss": 0.5168, "step": 4918 }, { "epoch": 0.36472158374731223, "grad_norm": 0.36715400218963623, "learning_rate": 0.000150847711927982, "loss": 0.4989, "step": 4919 }, { "epoch": 0.36479572922073106, "grad_norm": 0.39085230231285095, "learning_rate": 0.00015083770942735683, "loss": 0.5664, "step": 4920 }, { "epoch": 0.36486987469414994, "grad_norm": 0.36464622616767883, "learning_rate": 0.0001508277069267317, "loss": 0.524, "step": 4921 }, { "epoch": 0.3649440201675688, "grad_norm": 0.3759694993495941, "learning_rate": 0.00015081770442610653, "loss": 0.4867, "step": 4922 }, { "epoch": 0.3650181656409876, "grad_norm": 0.39123591780662537, "learning_rate": 0.00015080770192548137, "loss": 0.5494, "step": 4923 }, { "epoch": 0.3650923111144065, "grad_norm": 0.36241164803504944, "learning_rate": 0.0001507976994248562, "loss": 0.5085, "step": 4924 }, { "epoch": 0.3651664565878253, "grad_norm": 0.3894081115722656, "learning_rate": 0.00015078769692423107, "loss": 0.5872, "step": 4925 }, { "epoch": 0.36524060206124415, "grad_norm": 0.341047465801239, "learning_rate": 0.0001507776944236059, "loss": 0.4763, "step": 4926 }, { "epoch": 0.36531474753466303, "grad_norm": 0.3470946252346039, "learning_rate": 0.00015076769192298075, "loss": 0.5124, "step": 4927 }, { "epoch": 0.36538889300808186, "grad_norm": 0.3723413944244385, "learning_rate": 0.0001507576894223556, "loss": 0.5124, "step": 4928 }, { "epoch": 0.3654630384815007, "grad_norm": 0.371625691652298, "learning_rate": 0.00015074768692173045, "loss": 0.5213, "step": 4929 }, { "epoch": 0.3655371839549196, "grad_norm": 0.3829830586910248, "learning_rate": 0.00015073768442110528, "loss": 0.5499, "step": 4930 }, { "epoch": 0.3656113294283384, "grad_norm": 0.4016857147216797, "learning_rate": 0.00015072768192048012, "loss": 0.6085, "step": 4931 }, { "epoch": 0.36568547490175723, "grad_norm": 0.3577847182750702, "learning_rate": 0.00015071767941985499, "loss": 0.516, "step": 4932 }, { "epoch": 0.3657596203751761, "grad_norm": 0.35106924176216125, "learning_rate": 0.00015070767691922982, "loss": 0.4882, "step": 4933 }, { "epoch": 0.36583376584859495, "grad_norm": 0.37487784028053284, "learning_rate": 0.00015069767441860466, "loss": 0.5726, "step": 4934 }, { "epoch": 0.3659079113220138, "grad_norm": 0.3718107342720032, "learning_rate": 0.0001506876719179795, "loss": 0.5445, "step": 4935 }, { "epoch": 0.36598205679543266, "grad_norm": 0.36295565962791443, "learning_rate": 0.00015067766941735436, "loss": 0.5094, "step": 4936 }, { "epoch": 0.3660562022688515, "grad_norm": 0.36528998613357544, "learning_rate": 0.0001506676669167292, "loss": 0.5477, "step": 4937 }, { "epoch": 0.3661303477422703, "grad_norm": 0.36971545219421387, "learning_rate": 0.00015065766441610403, "loss": 0.5586, "step": 4938 }, { "epoch": 0.3662044932156892, "grad_norm": 0.3581800162792206, "learning_rate": 0.00015064766191547887, "loss": 0.4891, "step": 4939 }, { "epoch": 0.36627863868910804, "grad_norm": 0.3202615976333618, "learning_rate": 0.00015063765941485374, "loss": 0.4552, "step": 4940 }, { "epoch": 0.36635278416252687, "grad_norm": 0.36452174186706543, "learning_rate": 0.00015062765691422855, "loss": 0.5228, "step": 4941 }, { "epoch": 0.36642692963594575, "grad_norm": 0.34901779890060425, "learning_rate": 0.0001506176544136034, "loss": 0.5278, "step": 4942 }, { "epoch": 0.3665010751093646, "grad_norm": 0.32974690198898315, "learning_rate": 0.00015060765191297825, "loss": 0.4787, "step": 4943 }, { "epoch": 0.3665752205827834, "grad_norm": 0.3688529431819916, "learning_rate": 0.0001505976494123531, "loss": 0.5116, "step": 4944 }, { "epoch": 0.36664936605620224, "grad_norm": 0.34201428294181824, "learning_rate": 0.00015058764691172792, "loss": 0.4885, "step": 4945 }, { "epoch": 0.3667235115296211, "grad_norm": 0.35414376854896545, "learning_rate": 0.00015057764441110279, "loss": 0.5219, "step": 4946 }, { "epoch": 0.36679765700303996, "grad_norm": 0.3328327238559723, "learning_rate": 0.00015056764191047762, "loss": 0.4947, "step": 4947 }, { "epoch": 0.3668718024764588, "grad_norm": 0.3410974442958832, "learning_rate": 0.0001505576394098525, "loss": 0.4964, "step": 4948 }, { "epoch": 0.36694594794987767, "grad_norm": 0.3613680899143219, "learning_rate": 0.0001505476369092273, "loss": 0.5472, "step": 4949 }, { "epoch": 0.3670200934232965, "grad_norm": 0.37449994683265686, "learning_rate": 0.00015053763440860216, "loss": 0.5175, "step": 4950 }, { "epoch": 0.36709423889671533, "grad_norm": 0.36181163787841797, "learning_rate": 0.000150527631907977, "loss": 0.4732, "step": 4951 }, { "epoch": 0.3671683843701342, "grad_norm": 0.3667404353618622, "learning_rate": 0.00015051762940735184, "loss": 0.5361, "step": 4952 }, { "epoch": 0.36724252984355304, "grad_norm": 0.3447911739349365, "learning_rate": 0.00015050762690672667, "loss": 0.5089, "step": 4953 }, { "epoch": 0.3673166753169719, "grad_norm": 0.3809639811515808, "learning_rate": 0.00015049762440610154, "loss": 0.59, "step": 4954 }, { "epoch": 0.36739082079039076, "grad_norm": 0.3688342869281769, "learning_rate": 0.00015048762190547637, "loss": 0.5413, "step": 4955 }, { "epoch": 0.3674649662638096, "grad_norm": 0.363498330116272, "learning_rate": 0.0001504776194048512, "loss": 0.5598, "step": 4956 }, { "epoch": 0.3675391117372284, "grad_norm": 0.3523581624031067, "learning_rate": 0.00015046761690422605, "loss": 0.5118, "step": 4957 }, { "epoch": 0.3676132572106473, "grad_norm": 0.35438865423202515, "learning_rate": 0.0001504576144036009, "loss": 0.5716, "step": 4958 }, { "epoch": 0.36768740268406613, "grad_norm": 0.32308876514434814, "learning_rate": 0.00015044761190297575, "loss": 0.4751, "step": 4959 }, { "epoch": 0.36776154815748496, "grad_norm": 0.3535875976085663, "learning_rate": 0.00015043760940235059, "loss": 0.5446, "step": 4960 }, { "epoch": 0.36783569363090385, "grad_norm": 0.36031702160835266, "learning_rate": 0.00015042760690172545, "loss": 0.5361, "step": 4961 }, { "epoch": 0.3679098391043227, "grad_norm": 0.35894742608070374, "learning_rate": 0.0001504176044011003, "loss": 0.5297, "step": 4962 }, { "epoch": 0.3679839845777415, "grad_norm": 0.402041494846344, "learning_rate": 0.00015040760190047512, "loss": 0.5124, "step": 4963 }, { "epoch": 0.3680581300511604, "grad_norm": 0.368353009223938, "learning_rate": 0.00015039759939984996, "loss": 0.5502, "step": 4964 }, { "epoch": 0.3681322755245792, "grad_norm": 0.37518858909606934, "learning_rate": 0.00015038759689922483, "loss": 0.5349, "step": 4965 }, { "epoch": 0.36820642099799805, "grad_norm": 0.3747809827327728, "learning_rate": 0.00015037759439859966, "loss": 0.5353, "step": 4966 }, { "epoch": 0.36828056647141694, "grad_norm": 0.34343579411506653, "learning_rate": 0.0001503675918979745, "loss": 0.4629, "step": 4967 }, { "epoch": 0.36835471194483577, "grad_norm": 0.3645436465740204, "learning_rate": 0.00015035758939734934, "loss": 0.4885, "step": 4968 }, { "epoch": 0.3684288574182546, "grad_norm": 0.36557379364967346, "learning_rate": 0.0001503475868967242, "loss": 0.5332, "step": 4969 }, { "epoch": 0.3685030028916735, "grad_norm": 0.35843950510025024, "learning_rate": 0.00015033758439609904, "loss": 0.5177, "step": 4970 }, { "epoch": 0.3685771483650923, "grad_norm": 0.35746026039123535, "learning_rate": 0.00015032758189547388, "loss": 0.5372, "step": 4971 }, { "epoch": 0.36865129383851114, "grad_norm": 0.34996020793914795, "learning_rate": 0.0001503175793948487, "loss": 0.5177, "step": 4972 }, { "epoch": 0.36872543931193, "grad_norm": 0.36909231543540955, "learning_rate": 0.00015030757689422358, "loss": 0.5203, "step": 4973 }, { "epoch": 0.36879958478534886, "grad_norm": 0.3391627371311188, "learning_rate": 0.00015029757439359841, "loss": 0.4699, "step": 4974 }, { "epoch": 0.3688737302587677, "grad_norm": 0.35805371403694153, "learning_rate": 0.00015028757189297325, "loss": 0.5116, "step": 4975 }, { "epoch": 0.36894787573218657, "grad_norm": 0.37459614872932434, "learning_rate": 0.0001502775693923481, "loss": 0.5417, "step": 4976 }, { "epoch": 0.3690220212056054, "grad_norm": 0.33628278970718384, "learning_rate": 0.00015026756689172295, "loss": 0.4922, "step": 4977 }, { "epoch": 0.36909616667902423, "grad_norm": 0.39773496985435486, "learning_rate": 0.00015025756439109776, "loss": 0.544, "step": 4978 }, { "epoch": 0.3691703121524431, "grad_norm": 0.38173723220825195, "learning_rate": 0.00015024756189047263, "loss": 0.5633, "step": 4979 }, { "epoch": 0.36924445762586194, "grad_norm": 0.36063215136528015, "learning_rate": 0.00015023755938984746, "loss": 0.5066, "step": 4980 }, { "epoch": 0.3693186030992808, "grad_norm": 0.36236771941185, "learning_rate": 0.00015022755688922233, "loss": 0.5074, "step": 4981 }, { "epoch": 0.36939274857269966, "grad_norm": 0.39143937826156616, "learning_rate": 0.00015021755438859714, "loss": 0.5557, "step": 4982 }, { "epoch": 0.3694668940461185, "grad_norm": 0.343015193939209, "learning_rate": 0.000150207551887972, "loss": 0.4957, "step": 4983 }, { "epoch": 0.3695410395195373, "grad_norm": 0.39214497804641724, "learning_rate": 0.00015019754938734684, "loss": 0.5487, "step": 4984 }, { "epoch": 0.3696151849929562, "grad_norm": 0.34968653321266174, "learning_rate": 0.0001501875468867217, "loss": 0.524, "step": 4985 }, { "epoch": 0.36968933046637503, "grad_norm": 0.358284056186676, "learning_rate": 0.0001501775443860965, "loss": 0.523, "step": 4986 }, { "epoch": 0.36976347593979386, "grad_norm": 0.36714550852775574, "learning_rate": 0.00015016754188547138, "loss": 0.4967, "step": 4987 }, { "epoch": 0.36983762141321275, "grad_norm": 0.366117000579834, "learning_rate": 0.00015015753938484621, "loss": 0.5281, "step": 4988 }, { "epoch": 0.3699117668866316, "grad_norm": 0.34884706139564514, "learning_rate": 0.00015014753688422105, "loss": 0.4973, "step": 4989 }, { "epoch": 0.3699859123600504, "grad_norm": 0.3500468134880066, "learning_rate": 0.0001501375343835959, "loss": 0.5206, "step": 4990 }, { "epoch": 0.3700600578334693, "grad_norm": 0.3650459051132202, "learning_rate": 0.00015012753188297075, "loss": 0.5406, "step": 4991 }, { "epoch": 0.3701342033068881, "grad_norm": 0.35764840245246887, "learning_rate": 0.0001501175293823456, "loss": 0.4813, "step": 4992 }, { "epoch": 0.37020834878030695, "grad_norm": 0.3520738184452057, "learning_rate": 0.00015010752688172043, "loss": 0.4991, "step": 4993 }, { "epoch": 0.37028249425372584, "grad_norm": 0.39278581738471985, "learning_rate": 0.0001500975243810953, "loss": 0.5161, "step": 4994 }, { "epoch": 0.37035663972714467, "grad_norm": 0.3667864501476288, "learning_rate": 0.00015008752188047013, "loss": 0.5535, "step": 4995 }, { "epoch": 0.3704307852005635, "grad_norm": 0.36622944474220276, "learning_rate": 0.00015007751937984497, "loss": 0.5253, "step": 4996 }, { "epoch": 0.3705049306739824, "grad_norm": 0.3665153682231903, "learning_rate": 0.0001500675168792198, "loss": 0.4968, "step": 4997 }, { "epoch": 0.3705790761474012, "grad_norm": 0.3710644245147705, "learning_rate": 0.00015005751437859467, "loss": 0.524, "step": 4998 }, { "epoch": 0.37065322162082004, "grad_norm": 0.37539488077163696, "learning_rate": 0.0001500475118779695, "loss": 0.5134, "step": 4999 }, { "epoch": 0.3707273670942389, "grad_norm": 0.34637686610221863, "learning_rate": 0.00015003750937734434, "loss": 0.4889, "step": 5000 }, { "epoch": 0.37080151256765775, "grad_norm": 0.36018991470336914, "learning_rate": 0.00015002750687671918, "loss": 0.476, "step": 5001 }, { "epoch": 0.3708756580410766, "grad_norm": 0.35942795872688293, "learning_rate": 0.00015001750437609404, "loss": 0.5072, "step": 5002 }, { "epoch": 0.3709498035144954, "grad_norm": 0.41221699118614197, "learning_rate": 0.00015000750187546888, "loss": 0.5001, "step": 5003 }, { "epoch": 0.3710239489879143, "grad_norm": 0.3985093832015991, "learning_rate": 0.00014999749937484372, "loss": 0.5746, "step": 5004 }, { "epoch": 0.37109809446133313, "grad_norm": 0.35056009888648987, "learning_rate": 0.00014998749687421855, "loss": 0.503, "step": 5005 }, { "epoch": 0.37117223993475196, "grad_norm": 0.37258896231651306, "learning_rate": 0.00014997749437359342, "loss": 0.5062, "step": 5006 }, { "epoch": 0.37124638540817084, "grad_norm": 0.41158968210220337, "learning_rate": 0.00014996749187296825, "loss": 0.5598, "step": 5007 }, { "epoch": 0.37132053088158967, "grad_norm": 0.42173701524734497, "learning_rate": 0.0001499574893723431, "loss": 0.5291, "step": 5008 }, { "epoch": 0.3713946763550085, "grad_norm": 0.4021681845188141, "learning_rate": 0.00014994748687171793, "loss": 0.6151, "step": 5009 }, { "epoch": 0.3714688218284274, "grad_norm": 0.36782485246658325, "learning_rate": 0.0001499374843710928, "loss": 0.551, "step": 5010 }, { "epoch": 0.3715429673018462, "grad_norm": 0.35015159845352173, "learning_rate": 0.00014992748187046763, "loss": 0.5335, "step": 5011 }, { "epoch": 0.37161711277526505, "grad_norm": 0.3729798197746277, "learning_rate": 0.00014991747936984247, "loss": 0.5516, "step": 5012 }, { "epoch": 0.37169125824868393, "grad_norm": 0.3658592402935028, "learning_rate": 0.0001499074768692173, "loss": 0.54, "step": 5013 }, { "epoch": 0.37176540372210276, "grad_norm": 0.3912920355796814, "learning_rate": 0.00014989747436859217, "loss": 0.5229, "step": 5014 }, { "epoch": 0.3718395491955216, "grad_norm": 0.372543603181839, "learning_rate": 0.00014988747186796698, "loss": 0.5582, "step": 5015 }, { "epoch": 0.3719136946689405, "grad_norm": 0.35236939787864685, "learning_rate": 0.00014987746936734184, "loss": 0.5054, "step": 5016 }, { "epoch": 0.3719878401423593, "grad_norm": 0.350679486989975, "learning_rate": 0.00014986746686671668, "loss": 0.5244, "step": 5017 }, { "epoch": 0.37206198561577813, "grad_norm": 0.4081745445728302, "learning_rate": 0.00014985746436609154, "loss": 0.5549, "step": 5018 }, { "epoch": 0.372136131089197, "grad_norm": 0.3554583787918091, "learning_rate": 0.00014984746186546635, "loss": 0.5358, "step": 5019 }, { "epoch": 0.37221027656261585, "grad_norm": 0.37055304646492004, "learning_rate": 0.00014983745936484122, "loss": 0.5654, "step": 5020 }, { "epoch": 0.3722844220360347, "grad_norm": 0.3666818141937256, "learning_rate": 0.00014982745686421606, "loss": 0.5217, "step": 5021 }, { "epoch": 0.37235856750945356, "grad_norm": 0.3636489510536194, "learning_rate": 0.00014981745436359092, "loss": 0.5148, "step": 5022 }, { "epoch": 0.3724327129828724, "grad_norm": 0.34240707755088806, "learning_rate": 0.00014980745186296573, "loss": 0.5072, "step": 5023 }, { "epoch": 0.3725068584562912, "grad_norm": 0.3433224856853485, "learning_rate": 0.0001497974493623406, "loss": 0.4923, "step": 5024 }, { "epoch": 0.3725810039297101, "grad_norm": 0.36888888478279114, "learning_rate": 0.00014978744686171543, "loss": 0.5222, "step": 5025 }, { "epoch": 0.37265514940312894, "grad_norm": 0.35141247510910034, "learning_rate": 0.00014977744436109027, "loss": 0.4937, "step": 5026 }, { "epoch": 0.37272929487654777, "grad_norm": 0.35111773014068604, "learning_rate": 0.0001497674418604651, "loss": 0.4927, "step": 5027 }, { "epoch": 0.37280344034996665, "grad_norm": 0.3636402189731598, "learning_rate": 0.00014975743935983997, "loss": 0.5288, "step": 5028 }, { "epoch": 0.3728775858233855, "grad_norm": 0.349761039018631, "learning_rate": 0.0001497474368592148, "loss": 0.5192, "step": 5029 }, { "epoch": 0.3729517312968043, "grad_norm": 0.39364108443260193, "learning_rate": 0.00014973743435858964, "loss": 0.5877, "step": 5030 }, { "epoch": 0.3730258767702232, "grad_norm": 0.3486521244049072, "learning_rate": 0.0001497274318579645, "loss": 0.5146, "step": 5031 }, { "epoch": 0.373100022243642, "grad_norm": 0.35109826922416687, "learning_rate": 0.00014971742935733934, "loss": 0.5309, "step": 5032 }, { "epoch": 0.37317416771706086, "grad_norm": 0.3703248202800751, "learning_rate": 0.00014970742685671418, "loss": 0.5035, "step": 5033 }, { "epoch": 0.37324831319047974, "grad_norm": 0.37638059258461, "learning_rate": 0.00014969742435608902, "loss": 0.552, "step": 5034 }, { "epoch": 0.37332245866389857, "grad_norm": 0.3504345417022705, "learning_rate": 0.00014968742185546388, "loss": 0.5258, "step": 5035 }, { "epoch": 0.3733966041373174, "grad_norm": 0.3521352708339691, "learning_rate": 0.00014967741935483872, "loss": 0.5257, "step": 5036 }, { "epoch": 0.3734707496107363, "grad_norm": 0.3487701714038849, "learning_rate": 0.00014966741685421358, "loss": 0.5007, "step": 5037 }, { "epoch": 0.3735448950841551, "grad_norm": 0.336894690990448, "learning_rate": 0.0001496574143535884, "loss": 0.5357, "step": 5038 }, { "epoch": 0.37361904055757394, "grad_norm": 0.37809431552886963, "learning_rate": 0.00014964741185296326, "loss": 0.5203, "step": 5039 }, { "epoch": 0.37369318603099283, "grad_norm": 0.36602529883384705, "learning_rate": 0.0001496374093523381, "loss": 0.5341, "step": 5040 }, { "epoch": 0.37376733150441166, "grad_norm": 0.35790613293647766, "learning_rate": 0.00014962740685171293, "loss": 0.5285, "step": 5041 }, { "epoch": 0.3738414769778305, "grad_norm": 0.3558042049407959, "learning_rate": 0.00014961740435108777, "loss": 0.5293, "step": 5042 }, { "epoch": 0.3739156224512494, "grad_norm": 0.33103761076927185, "learning_rate": 0.00014960740185046263, "loss": 0.4864, "step": 5043 }, { "epoch": 0.3739897679246682, "grad_norm": 0.3480120599269867, "learning_rate": 0.00014959739934983747, "loss": 0.4935, "step": 5044 }, { "epoch": 0.37406391339808703, "grad_norm": 0.3840160667896271, "learning_rate": 0.0001495873968492123, "loss": 0.5545, "step": 5045 }, { "epoch": 0.3741380588715059, "grad_norm": 0.35211890935897827, "learning_rate": 0.00014957739434858715, "loss": 0.5218, "step": 5046 }, { "epoch": 0.37421220434492475, "grad_norm": 0.35534757375717163, "learning_rate": 0.000149567391847962, "loss": 0.5628, "step": 5047 }, { "epoch": 0.3742863498183436, "grad_norm": 0.37627607583999634, "learning_rate": 0.00014955738934733685, "loss": 0.5802, "step": 5048 }, { "epoch": 0.37436049529176246, "grad_norm": 0.3603009581565857, "learning_rate": 0.00014954738684671168, "loss": 0.5259, "step": 5049 }, { "epoch": 0.3744346407651813, "grad_norm": 0.3671620488166809, "learning_rate": 0.00014953738434608652, "loss": 0.5214, "step": 5050 }, { "epoch": 0.3745087862386001, "grad_norm": 0.3346768319606781, "learning_rate": 0.00014952738184546138, "loss": 0.509, "step": 5051 }, { "epoch": 0.374582931712019, "grad_norm": 0.3662242293357849, "learning_rate": 0.0001495173793448362, "loss": 0.5287, "step": 5052 }, { "epoch": 0.37465707718543784, "grad_norm": 0.3717985451221466, "learning_rate": 0.00014950737684421106, "loss": 0.5822, "step": 5053 }, { "epoch": 0.37473122265885667, "grad_norm": 0.3598591387271881, "learning_rate": 0.0001494973743435859, "loss": 0.5146, "step": 5054 }, { "epoch": 0.37480536813227555, "grad_norm": 0.34387338161468506, "learning_rate": 0.00014948737184296076, "loss": 0.5253, "step": 5055 }, { "epoch": 0.3748795136056944, "grad_norm": 0.38666588068008423, "learning_rate": 0.00014947736934233557, "loss": 0.5419, "step": 5056 }, { "epoch": 0.3749536590791132, "grad_norm": 0.3658897280693054, "learning_rate": 0.00014946736684171043, "loss": 0.5466, "step": 5057 }, { "epoch": 0.37502780455253204, "grad_norm": 0.33836042881011963, "learning_rate": 0.00014945736434108527, "loss": 0.4745, "step": 5058 }, { "epoch": 0.3751019500259509, "grad_norm": 0.3648619055747986, "learning_rate": 0.00014944736184046014, "loss": 0.5469, "step": 5059 }, { "epoch": 0.37517609549936975, "grad_norm": 0.3516271114349365, "learning_rate": 0.00014943735933983495, "loss": 0.5112, "step": 5060 }, { "epoch": 0.3752502409727886, "grad_norm": 0.37885063886642456, "learning_rate": 0.0001494273568392098, "loss": 0.5246, "step": 5061 }, { "epoch": 0.37532438644620747, "grad_norm": 0.370570570230484, "learning_rate": 0.00014941735433858465, "loss": 0.5668, "step": 5062 }, { "epoch": 0.3753985319196263, "grad_norm": 0.3504244387149811, "learning_rate": 0.00014940735183795948, "loss": 0.5225, "step": 5063 }, { "epoch": 0.37547267739304513, "grad_norm": 0.3612081706523895, "learning_rate": 0.00014939734933733435, "loss": 0.5086, "step": 5064 }, { "epoch": 0.375546822866464, "grad_norm": 0.3555235266685486, "learning_rate": 0.00014938734683670919, "loss": 0.5284, "step": 5065 }, { "epoch": 0.37562096833988284, "grad_norm": 0.3411254584789276, "learning_rate": 0.00014937734433608402, "loss": 0.4753, "step": 5066 }, { "epoch": 0.3756951138133017, "grad_norm": 0.36504098773002625, "learning_rate": 0.00014936734183545886, "loss": 0.5088, "step": 5067 }, { "epoch": 0.37576925928672056, "grad_norm": 0.4026203155517578, "learning_rate": 0.00014935733933483372, "loss": 0.5193, "step": 5068 }, { "epoch": 0.3758434047601394, "grad_norm": 0.35152557492256165, "learning_rate": 0.00014934733683420856, "loss": 0.4649, "step": 5069 }, { "epoch": 0.3759175502335582, "grad_norm": 0.40059876441955566, "learning_rate": 0.00014933733433358343, "loss": 0.5315, "step": 5070 }, { "epoch": 0.3759916957069771, "grad_norm": 0.3656393587589264, "learning_rate": 0.00014932733183295824, "loss": 0.5238, "step": 5071 }, { "epoch": 0.37606584118039593, "grad_norm": 0.3727003037929535, "learning_rate": 0.0001493173293323331, "loss": 0.5214, "step": 5072 }, { "epoch": 0.37613998665381476, "grad_norm": 0.36124682426452637, "learning_rate": 0.00014930732683170794, "loss": 0.5179, "step": 5073 }, { "epoch": 0.37621413212723365, "grad_norm": 0.3729184567928314, "learning_rate": 0.0001492973243310828, "loss": 0.5413, "step": 5074 }, { "epoch": 0.3762882776006525, "grad_norm": 0.34192827343940735, "learning_rate": 0.0001492873218304576, "loss": 0.498, "step": 5075 }, { "epoch": 0.3763624230740713, "grad_norm": 0.3694377541542053, "learning_rate": 0.00014927731932983247, "loss": 0.5246, "step": 5076 }, { "epoch": 0.3764365685474902, "grad_norm": 0.37752631306648254, "learning_rate": 0.0001492673168292073, "loss": 0.5489, "step": 5077 }, { "epoch": 0.376510714020909, "grad_norm": 0.38468778133392334, "learning_rate": 0.00014925731432858215, "loss": 0.5177, "step": 5078 }, { "epoch": 0.37658485949432785, "grad_norm": 0.3899971842765808, "learning_rate": 0.00014924731182795699, "loss": 0.5368, "step": 5079 }, { "epoch": 0.37665900496774674, "grad_norm": 0.3682261109352112, "learning_rate": 0.00014923730932733185, "loss": 0.5088, "step": 5080 }, { "epoch": 0.37673315044116557, "grad_norm": 0.38243281841278076, "learning_rate": 0.0001492273068267067, "loss": 0.5524, "step": 5081 }, { "epoch": 0.3768072959145844, "grad_norm": 0.35549768805503845, "learning_rate": 0.00014921730432608152, "loss": 0.5061, "step": 5082 }, { "epoch": 0.3768814413880033, "grad_norm": 0.375183641910553, "learning_rate": 0.00014920730182545636, "loss": 0.53, "step": 5083 }, { "epoch": 0.3769555868614221, "grad_norm": 0.3515879511833191, "learning_rate": 0.00014919729932483123, "loss": 0.4862, "step": 5084 }, { "epoch": 0.37702973233484094, "grad_norm": 0.3431909680366516, "learning_rate": 0.00014918729682420606, "loss": 0.5097, "step": 5085 }, { "epoch": 0.3771038778082598, "grad_norm": 0.3605482578277588, "learning_rate": 0.0001491772943235809, "loss": 0.5012, "step": 5086 }, { "epoch": 0.37717802328167865, "grad_norm": 0.36795181035995483, "learning_rate": 0.00014916729182295574, "loss": 0.5163, "step": 5087 }, { "epoch": 0.3772521687550975, "grad_norm": 0.3662823736667633, "learning_rate": 0.0001491572893223306, "loss": 0.5591, "step": 5088 }, { "epoch": 0.37732631422851637, "grad_norm": 0.35511645674705505, "learning_rate": 0.0001491472868217054, "loss": 0.5013, "step": 5089 }, { "epoch": 0.3774004597019352, "grad_norm": 0.3743792176246643, "learning_rate": 0.00014913728432108028, "loss": 0.4998, "step": 5090 }, { "epoch": 0.377474605175354, "grad_norm": 0.3605458736419678, "learning_rate": 0.0001491272818204551, "loss": 0.4938, "step": 5091 }, { "epoch": 0.3775487506487729, "grad_norm": 0.39573973417282104, "learning_rate": 0.00014911727931982998, "loss": 0.5558, "step": 5092 }, { "epoch": 0.37762289612219174, "grad_norm": 0.3409721553325653, "learning_rate": 0.0001491072768192048, "loss": 0.4821, "step": 5093 }, { "epoch": 0.37769704159561057, "grad_norm": 0.374874085187912, "learning_rate": 0.00014909727431857965, "loss": 0.5313, "step": 5094 }, { "epoch": 0.37777118706902946, "grad_norm": 0.33842530846595764, "learning_rate": 0.0001490872718179545, "loss": 0.4952, "step": 5095 }, { "epoch": 0.3778453325424483, "grad_norm": 0.34389734268188477, "learning_rate": 0.00014907726931732935, "loss": 0.4837, "step": 5096 }, { "epoch": 0.3779194780158671, "grad_norm": 0.3631696403026581, "learning_rate": 0.0001490672668167042, "loss": 0.5118, "step": 5097 }, { "epoch": 0.377993623489286, "grad_norm": 0.37672823667526245, "learning_rate": 0.00014905726431607903, "loss": 0.5752, "step": 5098 }, { "epoch": 0.37806776896270483, "grad_norm": 0.35682544112205505, "learning_rate": 0.00014904726181545386, "loss": 0.546, "step": 5099 }, { "epoch": 0.37814191443612366, "grad_norm": 0.3738464415073395, "learning_rate": 0.0001490372593148287, "loss": 0.5169, "step": 5100 }, { "epoch": 0.37821605990954255, "grad_norm": 0.36497846245765686, "learning_rate": 0.00014902725681420356, "loss": 0.542, "step": 5101 }, { "epoch": 0.3782902053829614, "grad_norm": 0.3587421476840973, "learning_rate": 0.0001490172543135784, "loss": 0.4939, "step": 5102 }, { "epoch": 0.3783643508563802, "grad_norm": 0.3795832693576813, "learning_rate": 0.00014900725181295327, "loss": 0.5638, "step": 5103 }, { "epoch": 0.3784384963297991, "grad_norm": 0.3332255482673645, "learning_rate": 0.00014899724931232808, "loss": 0.4989, "step": 5104 }, { "epoch": 0.3785126418032179, "grad_norm": 0.35430067777633667, "learning_rate": 0.00014898724681170294, "loss": 0.5127, "step": 5105 }, { "epoch": 0.37858678727663675, "grad_norm": 0.35829922556877136, "learning_rate": 0.00014897724431107778, "loss": 0.5027, "step": 5106 }, { "epoch": 0.37866093275005563, "grad_norm": 0.3506041467189789, "learning_rate": 0.00014896724181045264, "loss": 0.5525, "step": 5107 }, { "epoch": 0.37873507822347446, "grad_norm": 0.35700955986976624, "learning_rate": 0.00014895723930982745, "loss": 0.5012, "step": 5108 }, { "epoch": 0.3788092236968933, "grad_norm": 0.37431076169013977, "learning_rate": 0.00014894723680920232, "loss": 0.5488, "step": 5109 }, { "epoch": 0.3788833691703122, "grad_norm": 0.36357033252716064, "learning_rate": 0.00014893723430857715, "loss": 0.5152, "step": 5110 }, { "epoch": 0.378957514643731, "grad_norm": 0.3931942582130432, "learning_rate": 0.00014892723180795202, "loss": 0.5466, "step": 5111 }, { "epoch": 0.37903166011714984, "grad_norm": 0.35729095339775085, "learning_rate": 0.00014891722930732683, "loss": 0.5159, "step": 5112 }, { "epoch": 0.3791058055905687, "grad_norm": 0.4199448227882385, "learning_rate": 0.0001489072268067017, "loss": 0.5589, "step": 5113 }, { "epoch": 0.37917995106398755, "grad_norm": 0.3702043890953064, "learning_rate": 0.00014889722430607653, "loss": 0.5387, "step": 5114 }, { "epoch": 0.3792540965374064, "grad_norm": 0.37560421228408813, "learning_rate": 0.00014888722180545137, "loss": 0.5515, "step": 5115 }, { "epoch": 0.3793282420108252, "grad_norm": 0.3619811534881592, "learning_rate": 0.0001488772193048262, "loss": 0.5188, "step": 5116 }, { "epoch": 0.3794023874842441, "grad_norm": 0.36329299211502075, "learning_rate": 0.00014886721680420107, "loss": 0.5373, "step": 5117 }, { "epoch": 0.3794765329576629, "grad_norm": 0.36013272404670715, "learning_rate": 0.0001488572143035759, "loss": 0.5353, "step": 5118 }, { "epoch": 0.37955067843108176, "grad_norm": 0.36001449823379517, "learning_rate": 0.00014884721180295074, "loss": 0.514, "step": 5119 }, { "epoch": 0.37962482390450064, "grad_norm": 0.3770798444747925, "learning_rate": 0.00014883720930232558, "loss": 0.5131, "step": 5120 }, { "epoch": 0.37969896937791947, "grad_norm": 0.36522534489631653, "learning_rate": 0.00014882720680170044, "loss": 0.5565, "step": 5121 }, { "epoch": 0.3797731148513383, "grad_norm": 0.34669092297554016, "learning_rate": 0.00014881720430107528, "loss": 0.4897, "step": 5122 }, { "epoch": 0.3798472603247572, "grad_norm": 0.3571053743362427, "learning_rate": 0.00014880720180045012, "loss": 0.5101, "step": 5123 }, { "epoch": 0.379921405798176, "grad_norm": 0.3627471625804901, "learning_rate": 0.00014879719929982495, "loss": 0.5348, "step": 5124 }, { "epoch": 0.37999555127159484, "grad_norm": 0.34881505370140076, "learning_rate": 0.00014878719679919982, "loss": 0.4988, "step": 5125 }, { "epoch": 0.38006969674501373, "grad_norm": 0.40178585052490234, "learning_rate": 0.00014877719429857463, "loss": 0.5466, "step": 5126 }, { "epoch": 0.38014384221843256, "grad_norm": 0.3610983192920685, "learning_rate": 0.0001487671917979495, "loss": 0.4869, "step": 5127 }, { "epoch": 0.3802179876918514, "grad_norm": 0.3577709496021271, "learning_rate": 0.00014875718929732433, "loss": 0.5229, "step": 5128 }, { "epoch": 0.3802921331652703, "grad_norm": 0.3453003168106079, "learning_rate": 0.0001487471867966992, "loss": 0.505, "step": 5129 }, { "epoch": 0.3803662786386891, "grad_norm": 0.3605257272720337, "learning_rate": 0.00014873718429607403, "loss": 0.5242, "step": 5130 }, { "epoch": 0.38044042411210793, "grad_norm": 0.3731325566768646, "learning_rate": 0.00014872718179544887, "loss": 0.549, "step": 5131 }, { "epoch": 0.3805145695855268, "grad_norm": 0.3483254909515381, "learning_rate": 0.0001487171792948237, "loss": 0.5079, "step": 5132 }, { "epoch": 0.38058871505894565, "grad_norm": 0.3451705873012543, "learning_rate": 0.00014870717679419857, "loss": 0.4933, "step": 5133 }, { "epoch": 0.3806628605323645, "grad_norm": 0.3550415337085724, "learning_rate": 0.0001486971742935734, "loss": 0.5179, "step": 5134 }, { "epoch": 0.38073700600578336, "grad_norm": 0.3533968925476074, "learning_rate": 0.00014868717179294824, "loss": 0.5312, "step": 5135 }, { "epoch": 0.3808111514792022, "grad_norm": 0.3621142506599426, "learning_rate": 0.00014867716929232308, "loss": 0.5507, "step": 5136 }, { "epoch": 0.380885296952621, "grad_norm": 0.3445530831813812, "learning_rate": 0.00014866716679169794, "loss": 0.5099, "step": 5137 }, { "epoch": 0.3809594424260399, "grad_norm": 0.39015400409698486, "learning_rate": 0.00014865716429107278, "loss": 0.5434, "step": 5138 }, { "epoch": 0.38103358789945874, "grad_norm": 0.3651333451271057, "learning_rate": 0.00014864716179044762, "loss": 0.5336, "step": 5139 }, { "epoch": 0.38110773337287757, "grad_norm": 0.36120229959487915, "learning_rate": 0.00014863715928982248, "loss": 0.5386, "step": 5140 }, { "epoch": 0.38118187884629645, "grad_norm": 0.37599751353263855, "learning_rate": 0.0001486271567891973, "loss": 0.5448, "step": 5141 }, { "epoch": 0.3812560243197153, "grad_norm": 0.3531903922557831, "learning_rate": 0.00014861715428857216, "loss": 0.5423, "step": 5142 }, { "epoch": 0.3813301697931341, "grad_norm": 0.3834711015224457, "learning_rate": 0.000148607151787947, "loss": 0.5836, "step": 5143 }, { "epoch": 0.381404315266553, "grad_norm": 0.36947888135910034, "learning_rate": 0.00014859714928732186, "loss": 0.579, "step": 5144 }, { "epoch": 0.3814784607399718, "grad_norm": 0.36424702405929565, "learning_rate": 0.00014858714678669667, "loss": 0.5389, "step": 5145 }, { "epoch": 0.38155260621339065, "grad_norm": 0.33320167660713196, "learning_rate": 0.00014857714428607153, "loss": 0.4629, "step": 5146 }, { "epoch": 0.38162675168680954, "grad_norm": 0.3520750403404236, "learning_rate": 0.00014856714178544637, "loss": 0.5282, "step": 5147 }, { "epoch": 0.38170089716022837, "grad_norm": 0.35435208678245544, "learning_rate": 0.00014855713928482123, "loss": 0.5422, "step": 5148 }, { "epoch": 0.3817750426336472, "grad_norm": 0.36522114276885986, "learning_rate": 0.00014854713678419604, "loss": 0.5724, "step": 5149 }, { "epoch": 0.3818491881070661, "grad_norm": 0.3535992503166199, "learning_rate": 0.0001485371342835709, "loss": 0.5052, "step": 5150 }, { "epoch": 0.3819233335804849, "grad_norm": 0.3692737817764282, "learning_rate": 0.00014852713178294574, "loss": 0.5411, "step": 5151 }, { "epoch": 0.38199747905390374, "grad_norm": 0.36906948685646057, "learning_rate": 0.00014851712928232058, "loss": 0.5558, "step": 5152 }, { "epoch": 0.38207162452732263, "grad_norm": 0.3647000193595886, "learning_rate": 0.00014850712678169542, "loss": 0.5543, "step": 5153 }, { "epoch": 0.38214577000074146, "grad_norm": 0.3830810785293579, "learning_rate": 0.00014849712428107028, "loss": 0.5276, "step": 5154 }, { "epoch": 0.3822199154741603, "grad_norm": 0.3630921244621277, "learning_rate": 0.00014848712178044512, "loss": 0.51, "step": 5155 }, { "epoch": 0.3822940609475792, "grad_norm": 0.35902804136276245, "learning_rate": 0.00014847711927981996, "loss": 0.5265, "step": 5156 }, { "epoch": 0.382368206420998, "grad_norm": 0.3592281937599182, "learning_rate": 0.0001484671167791948, "loss": 0.5269, "step": 5157 }, { "epoch": 0.38244235189441683, "grad_norm": 0.4178517162799835, "learning_rate": 0.00014845711427856966, "loss": 0.5702, "step": 5158 }, { "epoch": 0.3825164973678357, "grad_norm": 0.34550121426582336, "learning_rate": 0.0001484471117779445, "loss": 0.5063, "step": 5159 }, { "epoch": 0.38259064284125455, "grad_norm": 0.38038745522499084, "learning_rate": 0.00014843710927731933, "loss": 0.5479, "step": 5160 }, { "epoch": 0.3826647883146734, "grad_norm": 0.35275861620903015, "learning_rate": 0.00014842710677669417, "loss": 0.4881, "step": 5161 }, { "epoch": 0.38273893378809226, "grad_norm": 0.3817461431026459, "learning_rate": 0.00014841710427606903, "loss": 0.5402, "step": 5162 }, { "epoch": 0.3828130792615111, "grad_norm": 0.3602007329463959, "learning_rate": 0.00014840710177544387, "loss": 0.4708, "step": 5163 }, { "epoch": 0.3828872247349299, "grad_norm": 0.3481135070323944, "learning_rate": 0.0001483970992748187, "loss": 0.5389, "step": 5164 }, { "epoch": 0.3829613702083488, "grad_norm": 0.35611292719841003, "learning_rate": 0.00014838709677419355, "loss": 0.4949, "step": 5165 }, { "epoch": 0.38303551568176764, "grad_norm": 0.3713480532169342, "learning_rate": 0.0001483770942735684, "loss": 0.5038, "step": 5166 }, { "epoch": 0.38310966115518647, "grad_norm": 0.385672003030777, "learning_rate": 0.00014836709177294325, "loss": 0.5549, "step": 5167 }, { "epoch": 0.38318380662860535, "grad_norm": 0.35027793049812317, "learning_rate": 0.00014835708927231808, "loss": 0.5084, "step": 5168 }, { "epoch": 0.3832579521020242, "grad_norm": 0.34214451909065247, "learning_rate": 0.00014834708677169292, "loss": 0.5009, "step": 5169 }, { "epoch": 0.383332097575443, "grad_norm": 0.3749628961086273, "learning_rate": 0.00014833708427106778, "loss": 0.535, "step": 5170 }, { "epoch": 0.3834062430488619, "grad_norm": 0.3860909342765808, "learning_rate": 0.00014832708177044262, "loss": 0.5219, "step": 5171 }, { "epoch": 0.3834803885222807, "grad_norm": 0.3820408284664154, "learning_rate": 0.00014831707926981746, "loss": 0.5414, "step": 5172 }, { "epoch": 0.38355453399569955, "grad_norm": 0.3808507025241852, "learning_rate": 0.00014830707676919232, "loss": 0.5342, "step": 5173 }, { "epoch": 0.3836286794691184, "grad_norm": 0.37445077300071716, "learning_rate": 0.00014829707426856716, "loss": 0.5549, "step": 5174 }, { "epoch": 0.38370282494253727, "grad_norm": 0.3502435088157654, "learning_rate": 0.000148287071767942, "loss": 0.5022, "step": 5175 }, { "epoch": 0.3837769704159561, "grad_norm": 0.3398893475532532, "learning_rate": 0.00014827706926731683, "loss": 0.5036, "step": 5176 }, { "epoch": 0.3838511158893749, "grad_norm": 0.3601578176021576, "learning_rate": 0.0001482670667666917, "loss": 0.5414, "step": 5177 }, { "epoch": 0.3839252613627938, "grad_norm": 0.3593074083328247, "learning_rate": 0.0001482570642660665, "loss": 0.5293, "step": 5178 }, { "epoch": 0.38399940683621264, "grad_norm": 0.3629259765148163, "learning_rate": 0.00014824706176544137, "loss": 0.5225, "step": 5179 }, { "epoch": 0.38407355230963147, "grad_norm": 0.37120822072029114, "learning_rate": 0.0001482370592648162, "loss": 0.5879, "step": 5180 }, { "epoch": 0.38414769778305036, "grad_norm": 0.36089059710502625, "learning_rate": 0.00014822705676419107, "loss": 0.5059, "step": 5181 }, { "epoch": 0.3842218432564692, "grad_norm": 0.3396725356578827, "learning_rate": 0.00014821705426356588, "loss": 0.5126, "step": 5182 }, { "epoch": 0.384295988729888, "grad_norm": 0.3713687062263489, "learning_rate": 0.00014820705176294075, "loss": 0.5463, "step": 5183 }, { "epoch": 0.3843701342033069, "grad_norm": 0.335955947637558, "learning_rate": 0.00014819704926231559, "loss": 0.5043, "step": 5184 }, { "epoch": 0.38444427967672573, "grad_norm": 0.3486110270023346, "learning_rate": 0.00014818704676169045, "loss": 0.5097, "step": 5185 }, { "epoch": 0.38451842515014456, "grad_norm": 0.36401134729385376, "learning_rate": 0.00014817704426106526, "loss": 0.4925, "step": 5186 }, { "epoch": 0.38459257062356345, "grad_norm": 0.35876786708831787, "learning_rate": 0.00014816704176044012, "loss": 0.5118, "step": 5187 }, { "epoch": 0.3846667160969823, "grad_norm": 0.3525061309337616, "learning_rate": 0.00014815703925981496, "loss": 0.5137, "step": 5188 }, { "epoch": 0.3847408615704011, "grad_norm": 0.3952378034591675, "learning_rate": 0.0001481470367591898, "loss": 0.5654, "step": 5189 }, { "epoch": 0.38481500704382, "grad_norm": 0.3819061517715454, "learning_rate": 0.00014813703425856463, "loss": 0.5294, "step": 5190 }, { "epoch": 0.3848891525172388, "grad_norm": 0.3751055896282196, "learning_rate": 0.0001481270317579395, "loss": 0.5148, "step": 5191 }, { "epoch": 0.38496329799065765, "grad_norm": 0.34336167573928833, "learning_rate": 0.00014811702925731434, "loss": 0.5144, "step": 5192 }, { "epoch": 0.38503744346407653, "grad_norm": 0.3792412579059601, "learning_rate": 0.00014810702675668917, "loss": 0.5438, "step": 5193 }, { "epoch": 0.38511158893749536, "grad_norm": 0.3843245804309845, "learning_rate": 0.000148097024256064, "loss": 0.5596, "step": 5194 }, { "epoch": 0.3851857344109142, "grad_norm": 0.3529599905014038, "learning_rate": 0.00014808702175543887, "loss": 0.525, "step": 5195 }, { "epoch": 0.3852598798843331, "grad_norm": 0.34171411395072937, "learning_rate": 0.0001480770192548137, "loss": 0.4975, "step": 5196 }, { "epoch": 0.3853340253577519, "grad_norm": 0.3712897002696991, "learning_rate": 0.00014806701675418855, "loss": 0.5602, "step": 5197 }, { "epoch": 0.38540817083117074, "grad_norm": 0.35347992181777954, "learning_rate": 0.00014805701425356339, "loss": 0.4575, "step": 5198 }, { "epoch": 0.3854823163045896, "grad_norm": 0.3787135183811188, "learning_rate": 0.00014804701175293825, "loss": 0.5417, "step": 5199 }, { "epoch": 0.38555646177800845, "grad_norm": 0.34663155674934387, "learning_rate": 0.0001480370092523131, "loss": 0.5013, "step": 5200 }, { "epoch": 0.3856306072514273, "grad_norm": 0.37285545468330383, "learning_rate": 0.00014802700675168792, "loss": 0.5199, "step": 5201 }, { "epoch": 0.38570475272484617, "grad_norm": 0.37487468123435974, "learning_rate": 0.00014801700425106276, "loss": 0.5538, "step": 5202 }, { "epoch": 0.385778898198265, "grad_norm": 0.36670857667922974, "learning_rate": 0.00014800700175043763, "loss": 0.5255, "step": 5203 }, { "epoch": 0.3858530436716838, "grad_norm": 0.33846068382263184, "learning_rate": 0.00014799699924981246, "loss": 0.4901, "step": 5204 }, { "epoch": 0.3859271891451027, "grad_norm": 0.33181512355804443, "learning_rate": 0.0001479869967491873, "loss": 0.5109, "step": 5205 }, { "epoch": 0.38600133461852154, "grad_norm": 0.37586259841918945, "learning_rate": 0.00014797699424856216, "loss": 0.5081, "step": 5206 }, { "epoch": 0.38607548009194037, "grad_norm": 0.3597969114780426, "learning_rate": 0.000147966991747937, "loss": 0.528, "step": 5207 }, { "epoch": 0.38614962556535926, "grad_norm": 0.3398433029651642, "learning_rate": 0.00014795698924731184, "loss": 0.4975, "step": 5208 }, { "epoch": 0.3862237710387781, "grad_norm": 0.38021618127822876, "learning_rate": 0.00014794698674668668, "loss": 0.5821, "step": 5209 }, { "epoch": 0.3862979165121969, "grad_norm": 0.36177024245262146, "learning_rate": 0.00014793698424606154, "loss": 0.4962, "step": 5210 }, { "epoch": 0.3863720619856158, "grad_norm": 0.3508056104183197, "learning_rate": 0.00014792698174543638, "loss": 0.5042, "step": 5211 }, { "epoch": 0.38644620745903463, "grad_norm": 0.35889896750450134, "learning_rate": 0.0001479169792448112, "loss": 0.5197, "step": 5212 }, { "epoch": 0.38652035293245346, "grad_norm": 0.3517848551273346, "learning_rate": 0.00014790697674418605, "loss": 0.506, "step": 5213 }, { "epoch": 0.38659449840587234, "grad_norm": 0.3633245825767517, "learning_rate": 0.00014789697424356091, "loss": 0.5429, "step": 5214 }, { "epoch": 0.3866686438792912, "grad_norm": 0.3862713873386383, "learning_rate": 0.00014788697174293572, "loss": 0.552, "step": 5215 }, { "epoch": 0.38674278935271, "grad_norm": 0.3622390031814575, "learning_rate": 0.0001478769692423106, "loss": 0.5158, "step": 5216 }, { "epoch": 0.3868169348261289, "grad_norm": 0.3629072606563568, "learning_rate": 0.00014786696674168543, "loss": 0.5177, "step": 5217 }, { "epoch": 0.3868910802995477, "grad_norm": 0.34803545475006104, "learning_rate": 0.0001478569642410603, "loss": 0.4945, "step": 5218 }, { "epoch": 0.38696522577296655, "grad_norm": 0.3819146454334259, "learning_rate": 0.0001478469617404351, "loss": 0.5258, "step": 5219 }, { "epoch": 0.38703937124638543, "grad_norm": 0.3596147894859314, "learning_rate": 0.00014783695923980996, "loss": 0.5047, "step": 5220 }, { "epoch": 0.38711351671980426, "grad_norm": 0.3706953525543213, "learning_rate": 0.0001478269567391848, "loss": 0.5437, "step": 5221 }, { "epoch": 0.3871876621932231, "grad_norm": 0.34358853101730347, "learning_rate": 0.00014781695423855967, "loss": 0.5054, "step": 5222 }, { "epoch": 0.387261807666642, "grad_norm": 0.37736019492149353, "learning_rate": 0.00014780695173793448, "loss": 0.5536, "step": 5223 }, { "epoch": 0.3873359531400608, "grad_norm": 0.35367876291275024, "learning_rate": 0.00014779694923730934, "loss": 0.5118, "step": 5224 }, { "epoch": 0.38741009861347964, "grad_norm": 0.33721745014190674, "learning_rate": 0.00014778694673668418, "loss": 0.4836, "step": 5225 }, { "epoch": 0.3874842440868985, "grad_norm": 0.3534198999404907, "learning_rate": 0.00014777694423605901, "loss": 0.5422, "step": 5226 }, { "epoch": 0.38755838956031735, "grad_norm": 0.3685152530670166, "learning_rate": 0.00014776694173543385, "loss": 0.5255, "step": 5227 }, { "epoch": 0.3876325350337362, "grad_norm": 0.350033700466156, "learning_rate": 0.00014775693923480872, "loss": 0.5278, "step": 5228 }, { "epoch": 0.38770668050715507, "grad_norm": 0.3542184829711914, "learning_rate": 0.00014774693673418355, "loss": 0.5221, "step": 5229 }, { "epoch": 0.3877808259805739, "grad_norm": 0.3425024449825287, "learning_rate": 0.0001477369342335584, "loss": 0.4873, "step": 5230 }, { "epoch": 0.3878549714539927, "grad_norm": 0.3677924871444702, "learning_rate": 0.00014772693173293323, "loss": 0.5371, "step": 5231 }, { "epoch": 0.38792911692741155, "grad_norm": 0.35184192657470703, "learning_rate": 0.0001477169292323081, "loss": 0.5246, "step": 5232 }, { "epoch": 0.38800326240083044, "grad_norm": 0.35894957184791565, "learning_rate": 0.00014770692673168293, "loss": 0.5266, "step": 5233 }, { "epoch": 0.38807740787424927, "grad_norm": 0.34643664956092834, "learning_rate": 0.00014769692423105776, "loss": 0.5056, "step": 5234 }, { "epoch": 0.3881515533476681, "grad_norm": 0.3530252277851105, "learning_rate": 0.0001476869217304326, "loss": 0.5163, "step": 5235 }, { "epoch": 0.388225698821087, "grad_norm": 0.3724387586116791, "learning_rate": 0.00014767691922980747, "loss": 0.5841, "step": 5236 }, { "epoch": 0.3882998442945058, "grad_norm": 0.38234275579452515, "learning_rate": 0.0001476669167291823, "loss": 0.5383, "step": 5237 }, { "epoch": 0.38837398976792464, "grad_norm": 0.36008885502815247, "learning_rate": 0.00014765691422855714, "loss": 0.4815, "step": 5238 }, { "epoch": 0.38844813524134353, "grad_norm": 0.37683355808258057, "learning_rate": 0.000147646911727932, "loss": 0.531, "step": 5239 }, { "epoch": 0.38852228071476236, "grad_norm": 0.3805006742477417, "learning_rate": 0.00014763690922730684, "loss": 0.5354, "step": 5240 }, { "epoch": 0.3885964261881812, "grad_norm": 0.3657790720462799, "learning_rate": 0.00014762690672668168, "loss": 0.531, "step": 5241 }, { "epoch": 0.3886705716616001, "grad_norm": 0.3829960227012634, "learning_rate": 0.00014761690422605652, "loss": 0.5649, "step": 5242 }, { "epoch": 0.3887447171350189, "grad_norm": 0.3673323392868042, "learning_rate": 0.00014760690172543138, "loss": 0.5041, "step": 5243 }, { "epoch": 0.38881886260843773, "grad_norm": 0.3541480600833893, "learning_rate": 0.00014759689922480622, "loss": 0.5269, "step": 5244 }, { "epoch": 0.3888930080818566, "grad_norm": 0.3832269012928009, "learning_rate": 0.00014758689672418105, "loss": 0.4864, "step": 5245 }, { "epoch": 0.38896715355527545, "grad_norm": 0.3849915862083435, "learning_rate": 0.0001475768942235559, "loss": 0.5527, "step": 5246 }, { "epoch": 0.3890412990286943, "grad_norm": 0.3536520004272461, "learning_rate": 0.00014756689172293076, "loss": 0.4773, "step": 5247 }, { "epoch": 0.38911544450211316, "grad_norm": 0.35713696479797363, "learning_rate": 0.0001475568892223056, "loss": 0.545, "step": 5248 }, { "epoch": 0.389189589975532, "grad_norm": 0.38289737701416016, "learning_rate": 0.00014754688672168043, "loss": 0.5489, "step": 5249 }, { "epoch": 0.3892637354489508, "grad_norm": 0.364083856344223, "learning_rate": 0.00014753688422105527, "loss": 0.5331, "step": 5250 }, { "epoch": 0.3893378809223697, "grad_norm": 0.3708508014678955, "learning_rate": 0.00014752688172043013, "loss": 0.5055, "step": 5251 }, { "epoch": 0.38941202639578854, "grad_norm": 0.34895703196525574, "learning_rate": 0.00014751687921980494, "loss": 0.5409, "step": 5252 }, { "epoch": 0.38948617186920736, "grad_norm": 0.34833118319511414, "learning_rate": 0.0001475068767191798, "loss": 0.4894, "step": 5253 }, { "epoch": 0.38956031734262625, "grad_norm": 0.3640887141227722, "learning_rate": 0.00014749687421855464, "loss": 0.5097, "step": 5254 }, { "epoch": 0.3896344628160451, "grad_norm": 0.3491535186767578, "learning_rate": 0.0001474868717179295, "loss": 0.4916, "step": 5255 }, { "epoch": 0.3897086082894639, "grad_norm": 0.35096627473831177, "learning_rate": 0.00014747686921730432, "loss": 0.5029, "step": 5256 }, { "epoch": 0.3897827537628828, "grad_norm": 0.34538331627845764, "learning_rate": 0.00014746686671667918, "loss": 0.5, "step": 5257 }, { "epoch": 0.3898568992363016, "grad_norm": 0.35182738304138184, "learning_rate": 0.00014745686421605402, "loss": 0.5286, "step": 5258 }, { "epoch": 0.38993104470972045, "grad_norm": 0.3722439110279083, "learning_rate": 0.00014744686171542888, "loss": 0.5275, "step": 5259 }, { "epoch": 0.39000519018313934, "grad_norm": 0.3804242014884949, "learning_rate": 0.0001474368592148037, "loss": 0.5295, "step": 5260 }, { "epoch": 0.39007933565655817, "grad_norm": 0.3663203716278076, "learning_rate": 0.00014742685671417856, "loss": 0.5163, "step": 5261 }, { "epoch": 0.390153481129977, "grad_norm": 0.34489941596984863, "learning_rate": 0.0001474168542135534, "loss": 0.4938, "step": 5262 }, { "epoch": 0.3902276266033959, "grad_norm": 0.3466194272041321, "learning_rate": 0.00014740685171292823, "loss": 0.521, "step": 5263 }, { "epoch": 0.3903017720768147, "grad_norm": 0.36402180790901184, "learning_rate": 0.00014739684921230307, "loss": 0.5658, "step": 5264 }, { "epoch": 0.39037591755023354, "grad_norm": 0.38077425956726074, "learning_rate": 0.00014738684671167793, "loss": 0.585, "step": 5265 }, { "epoch": 0.3904500630236524, "grad_norm": 0.37412625551223755, "learning_rate": 0.00014737684421105277, "loss": 0.5328, "step": 5266 }, { "epoch": 0.39052420849707126, "grad_norm": 0.36971187591552734, "learning_rate": 0.0001473668417104276, "loss": 0.5423, "step": 5267 }, { "epoch": 0.3905983539704901, "grad_norm": 0.3386740982532501, "learning_rate": 0.00014735683920980244, "loss": 0.4892, "step": 5268 }, { "epoch": 0.39067249944390897, "grad_norm": 0.3584461510181427, "learning_rate": 0.0001473468367091773, "loss": 0.5407, "step": 5269 }, { "epoch": 0.3907466449173278, "grad_norm": 0.3598567843437195, "learning_rate": 0.00014733683420855214, "loss": 0.5146, "step": 5270 }, { "epoch": 0.39082079039074663, "grad_norm": 0.3513199985027313, "learning_rate": 0.00014732683170792698, "loss": 0.5158, "step": 5271 }, { "epoch": 0.3908949358641655, "grad_norm": 0.33728650212287903, "learning_rate": 0.00014731682920730185, "loss": 0.4832, "step": 5272 }, { "epoch": 0.39096908133758435, "grad_norm": 0.3537749648094177, "learning_rate": 0.00014730682670667668, "loss": 0.5219, "step": 5273 }, { "epoch": 0.3910432268110032, "grad_norm": 0.3762612044811249, "learning_rate": 0.00014729682420605152, "loss": 0.5183, "step": 5274 }, { "epoch": 0.39111737228442206, "grad_norm": 0.3721936345100403, "learning_rate": 0.00014728682170542636, "loss": 0.4924, "step": 5275 }, { "epoch": 0.3911915177578409, "grad_norm": 0.3754572570323944, "learning_rate": 0.00014727681920480122, "loss": 0.5504, "step": 5276 }, { "epoch": 0.3912656632312597, "grad_norm": 0.3743118345737457, "learning_rate": 0.00014726681670417606, "loss": 0.5539, "step": 5277 }, { "epoch": 0.3913398087046786, "grad_norm": 0.3630833327770233, "learning_rate": 0.0001472568142035509, "loss": 0.4882, "step": 5278 }, { "epoch": 0.39141395417809743, "grad_norm": 0.36798882484436035, "learning_rate": 0.00014724681170292573, "loss": 0.5667, "step": 5279 }, { "epoch": 0.39148809965151626, "grad_norm": 0.3809374272823334, "learning_rate": 0.0001472368092023006, "loss": 0.5363, "step": 5280 }, { "epoch": 0.39156224512493515, "grad_norm": 0.39033859968185425, "learning_rate": 0.00014722680670167543, "loss": 0.5556, "step": 5281 }, { "epoch": 0.391636390598354, "grad_norm": 0.38011303544044495, "learning_rate": 0.00014721680420105027, "loss": 0.5116, "step": 5282 }, { "epoch": 0.3917105360717728, "grad_norm": 0.37980425357818604, "learning_rate": 0.0001472068017004251, "loss": 0.5728, "step": 5283 }, { "epoch": 0.3917846815451917, "grad_norm": 0.4096375107765198, "learning_rate": 0.00014719679919979997, "loss": 0.5711, "step": 5284 }, { "epoch": 0.3918588270186105, "grad_norm": 0.3793862760066986, "learning_rate": 0.0001471867966991748, "loss": 0.5389, "step": 5285 }, { "epoch": 0.39193297249202935, "grad_norm": 0.35412871837615967, "learning_rate": 0.00014717679419854965, "loss": 0.5054, "step": 5286 }, { "epoch": 0.3920071179654482, "grad_norm": 0.35359740257263184, "learning_rate": 0.00014716679169792448, "loss": 0.5557, "step": 5287 }, { "epoch": 0.39208126343886707, "grad_norm": 0.35314956307411194, "learning_rate": 0.00014715678919729935, "loss": 0.4877, "step": 5288 }, { "epoch": 0.3921554089122859, "grad_norm": 0.3545042872428894, "learning_rate": 0.00014714678669667416, "loss": 0.5314, "step": 5289 }, { "epoch": 0.3922295543857047, "grad_norm": 0.3561767041683197, "learning_rate": 0.00014713678419604902, "loss": 0.534, "step": 5290 }, { "epoch": 0.3923036998591236, "grad_norm": 0.40643519163131714, "learning_rate": 0.00014712678169542386, "loss": 0.5666, "step": 5291 }, { "epoch": 0.39237784533254244, "grad_norm": 0.3517022132873535, "learning_rate": 0.00014711677919479872, "loss": 0.5925, "step": 5292 }, { "epoch": 0.39245199080596127, "grad_norm": 0.34301745891571045, "learning_rate": 0.00014710677669417353, "loss": 0.4918, "step": 5293 }, { "epoch": 0.39252613627938016, "grad_norm": 0.3436640799045563, "learning_rate": 0.0001470967741935484, "loss": 0.5441, "step": 5294 }, { "epoch": 0.392600281752799, "grad_norm": 0.34329259395599365, "learning_rate": 0.00014708677169292323, "loss": 0.4941, "step": 5295 }, { "epoch": 0.3926744272262178, "grad_norm": 0.3648201823234558, "learning_rate": 0.0001470767691922981, "loss": 0.5182, "step": 5296 }, { "epoch": 0.3927485726996367, "grad_norm": 0.33174389600753784, "learning_rate": 0.0001470667666916729, "loss": 0.5045, "step": 5297 }, { "epoch": 0.39282271817305553, "grad_norm": 0.37906500697135925, "learning_rate": 0.00014705676419104777, "loss": 0.5442, "step": 5298 }, { "epoch": 0.39289686364647436, "grad_norm": 0.384363055229187, "learning_rate": 0.0001470467616904226, "loss": 0.5657, "step": 5299 }, { "epoch": 0.39297100911989324, "grad_norm": 0.34049248695373535, "learning_rate": 0.00014703675918979745, "loss": 0.5007, "step": 5300 }, { "epoch": 0.3930451545933121, "grad_norm": 0.3468066453933716, "learning_rate": 0.00014702675668917228, "loss": 0.5118, "step": 5301 }, { "epoch": 0.3931193000667309, "grad_norm": 0.3667943477630615, "learning_rate": 0.00014701675418854715, "loss": 0.5419, "step": 5302 }, { "epoch": 0.3931934455401498, "grad_norm": 0.3677408993244171, "learning_rate": 0.00014700675168792198, "loss": 0.5094, "step": 5303 }, { "epoch": 0.3932675910135686, "grad_norm": 0.35591134428977966, "learning_rate": 0.00014699674918729682, "loss": 0.5321, "step": 5304 }, { "epoch": 0.39334173648698745, "grad_norm": 0.3848787844181061, "learning_rate": 0.00014698674668667166, "loss": 0.5776, "step": 5305 }, { "epoch": 0.39341588196040633, "grad_norm": 0.35964182019233704, "learning_rate": 0.00014697674418604652, "loss": 0.5314, "step": 5306 }, { "epoch": 0.39349002743382516, "grad_norm": 0.3494051992893219, "learning_rate": 0.00014696674168542136, "loss": 0.5066, "step": 5307 }, { "epoch": 0.393564172907244, "grad_norm": 0.352180540561676, "learning_rate": 0.0001469567391847962, "loss": 0.4937, "step": 5308 }, { "epoch": 0.3936383183806629, "grad_norm": 0.36875370144844055, "learning_rate": 0.00014694673668417106, "loss": 0.5036, "step": 5309 }, { "epoch": 0.3937124638540817, "grad_norm": 0.35347166657447815, "learning_rate": 0.0001469367341835459, "loss": 0.4909, "step": 5310 }, { "epoch": 0.39378660932750054, "grad_norm": 0.38089290261268616, "learning_rate": 0.00014692673168292074, "loss": 0.5227, "step": 5311 }, { "epoch": 0.3938607548009194, "grad_norm": 0.40593501925468445, "learning_rate": 0.00014691672918229557, "loss": 0.5281, "step": 5312 }, { "epoch": 0.39393490027433825, "grad_norm": 0.3706323802471161, "learning_rate": 0.00014690672668167044, "loss": 0.5343, "step": 5313 }, { "epoch": 0.3940090457477571, "grad_norm": 0.36816591024398804, "learning_rate": 0.00014689672418104527, "loss": 0.5163, "step": 5314 }, { "epoch": 0.39408319122117597, "grad_norm": 0.3766843378543854, "learning_rate": 0.0001468867216804201, "loss": 0.5555, "step": 5315 }, { "epoch": 0.3941573366945948, "grad_norm": 0.39287078380584717, "learning_rate": 0.00014687671917979495, "loss": 0.5382, "step": 5316 }, { "epoch": 0.3942314821680136, "grad_norm": 0.37265530228614807, "learning_rate": 0.0001468667166791698, "loss": 0.537, "step": 5317 }, { "epoch": 0.3943056276414325, "grad_norm": 0.35920289158821106, "learning_rate": 0.00014685671417854465, "loss": 0.5363, "step": 5318 }, { "epoch": 0.39437977311485134, "grad_norm": 0.35398897528648376, "learning_rate": 0.0001468467116779195, "loss": 0.4997, "step": 5319 }, { "epoch": 0.39445391858827017, "grad_norm": 0.39602673053741455, "learning_rate": 0.00014683670917729432, "loss": 0.5724, "step": 5320 }, { "epoch": 0.39452806406168905, "grad_norm": 0.3694996237754822, "learning_rate": 0.0001468267066766692, "loss": 0.5088, "step": 5321 }, { "epoch": 0.3946022095351079, "grad_norm": 0.3840658664703369, "learning_rate": 0.00014681670417604403, "loss": 0.5738, "step": 5322 }, { "epoch": 0.3946763550085267, "grad_norm": 0.3551271855831146, "learning_rate": 0.00014680670167541886, "loss": 0.5368, "step": 5323 }, { "epoch": 0.3947505004819456, "grad_norm": 0.36152389645576477, "learning_rate": 0.0001467966991747937, "loss": 0.4851, "step": 5324 }, { "epoch": 0.39482464595536443, "grad_norm": 0.3514433205127716, "learning_rate": 0.00014678669667416856, "loss": 0.5171, "step": 5325 }, { "epoch": 0.39489879142878326, "grad_norm": 0.35648590326309204, "learning_rate": 0.00014677669417354337, "loss": 0.5147, "step": 5326 }, { "epoch": 0.39497293690220214, "grad_norm": 0.3640752136707306, "learning_rate": 0.00014676669167291824, "loss": 0.5556, "step": 5327 }, { "epoch": 0.395047082375621, "grad_norm": 0.38854795694351196, "learning_rate": 0.00014675668917229307, "loss": 0.5184, "step": 5328 }, { "epoch": 0.3951212278490398, "grad_norm": 0.3456234931945801, "learning_rate": 0.00014674668667166794, "loss": 0.514, "step": 5329 }, { "epoch": 0.3951953733224587, "grad_norm": 0.3529954254627228, "learning_rate": 0.00014673668417104275, "loss": 0.5443, "step": 5330 }, { "epoch": 0.3952695187958775, "grad_norm": 0.34273403882980347, "learning_rate": 0.0001467266816704176, "loss": 0.4672, "step": 5331 }, { "epoch": 0.39534366426929635, "grad_norm": 0.3674306571483612, "learning_rate": 0.00014671667916979245, "loss": 0.5363, "step": 5332 }, { "epoch": 0.39541780974271523, "grad_norm": 0.37039563059806824, "learning_rate": 0.00014670667666916731, "loss": 0.4962, "step": 5333 }, { "epoch": 0.39549195521613406, "grad_norm": 0.35368120670318604, "learning_rate": 0.00014669667416854212, "loss": 0.5326, "step": 5334 }, { "epoch": 0.3955661006895529, "grad_norm": 0.3836553394794464, "learning_rate": 0.000146686671667917, "loss": 0.5393, "step": 5335 }, { "epoch": 0.3956402461629718, "grad_norm": 0.3718526065349579, "learning_rate": 0.00014667666916729183, "loss": 0.5458, "step": 5336 }, { "epoch": 0.3957143916363906, "grad_norm": 0.3557626008987427, "learning_rate": 0.00014666666666666666, "loss": 0.5103, "step": 5337 }, { "epoch": 0.39578853710980944, "grad_norm": 0.37324437499046326, "learning_rate": 0.0001466566641660415, "loss": 0.5617, "step": 5338 }, { "epoch": 0.3958626825832283, "grad_norm": 0.34424251317977905, "learning_rate": 0.00014664666166541636, "loss": 0.5112, "step": 5339 }, { "epoch": 0.39593682805664715, "grad_norm": 0.34673377871513367, "learning_rate": 0.0001466366591647912, "loss": 0.5227, "step": 5340 }, { "epoch": 0.396010973530066, "grad_norm": 0.35040029883384705, "learning_rate": 0.00014662665666416604, "loss": 0.5322, "step": 5341 }, { "epoch": 0.39608511900348486, "grad_norm": 0.36944863200187683, "learning_rate": 0.0001466166541635409, "loss": 0.5401, "step": 5342 }, { "epoch": 0.3961592644769037, "grad_norm": 0.3666720390319824, "learning_rate": 0.00014660665166291574, "loss": 0.5593, "step": 5343 }, { "epoch": 0.3962334099503225, "grad_norm": 0.3499193489551544, "learning_rate": 0.00014659664916229058, "loss": 0.5141, "step": 5344 }, { "epoch": 0.39630755542374135, "grad_norm": 0.3630261719226837, "learning_rate": 0.00014658664666166541, "loss": 0.5206, "step": 5345 }, { "epoch": 0.39638170089716024, "grad_norm": 0.3579539656639099, "learning_rate": 0.00014657664416104028, "loss": 0.5259, "step": 5346 }, { "epoch": 0.39645584637057907, "grad_norm": 0.35173261165618896, "learning_rate": 0.00014656664166041512, "loss": 0.4888, "step": 5347 }, { "epoch": 0.3965299918439979, "grad_norm": 0.38085728883743286, "learning_rate": 0.00014655663915978995, "loss": 0.5238, "step": 5348 }, { "epoch": 0.3966041373174168, "grad_norm": 0.35835152864456177, "learning_rate": 0.0001465466366591648, "loss": 0.5126, "step": 5349 }, { "epoch": 0.3966782827908356, "grad_norm": 0.3725239932537079, "learning_rate": 0.00014653663415853965, "loss": 0.4741, "step": 5350 }, { "epoch": 0.39675242826425444, "grad_norm": 0.3528483808040619, "learning_rate": 0.0001465266316579145, "loss": 0.5051, "step": 5351 }, { "epoch": 0.3968265737376733, "grad_norm": 0.34485048055648804, "learning_rate": 0.00014651662915728933, "loss": 0.517, "step": 5352 }, { "epoch": 0.39690071921109216, "grad_norm": 0.3705088496208191, "learning_rate": 0.00014650662665666416, "loss": 0.5181, "step": 5353 }, { "epoch": 0.396974864684511, "grad_norm": 0.39711740612983704, "learning_rate": 0.00014649662415603903, "loss": 0.5319, "step": 5354 }, { "epoch": 0.39704901015792987, "grad_norm": 0.42439714074134827, "learning_rate": 0.00014648662165541387, "loss": 0.551, "step": 5355 }, { "epoch": 0.3971231556313487, "grad_norm": 0.3475205898284912, "learning_rate": 0.0001464766191547887, "loss": 0.4964, "step": 5356 }, { "epoch": 0.39719730110476753, "grad_norm": 0.3675456941127777, "learning_rate": 0.00014646661665416354, "loss": 0.5102, "step": 5357 }, { "epoch": 0.3972714465781864, "grad_norm": 0.33223435282707214, "learning_rate": 0.0001464566141535384, "loss": 0.4935, "step": 5358 }, { "epoch": 0.39734559205160525, "grad_norm": 0.37543556094169617, "learning_rate": 0.00014644661165291324, "loss": 0.5197, "step": 5359 }, { "epoch": 0.3974197375250241, "grad_norm": 0.35666561126708984, "learning_rate": 0.00014643660915228808, "loss": 0.4958, "step": 5360 }, { "epoch": 0.39749388299844296, "grad_norm": 0.376210480928421, "learning_rate": 0.00014642660665166292, "loss": 0.4659, "step": 5361 }, { "epoch": 0.3975680284718618, "grad_norm": 0.3585437536239624, "learning_rate": 0.00014641660415103778, "loss": 0.4976, "step": 5362 }, { "epoch": 0.3976421739452806, "grad_norm": 0.37765100598335266, "learning_rate": 0.0001464066016504126, "loss": 0.5094, "step": 5363 }, { "epoch": 0.3977163194186995, "grad_norm": 0.37166839838027954, "learning_rate": 0.00014639659914978745, "loss": 0.5377, "step": 5364 }, { "epoch": 0.39779046489211833, "grad_norm": 0.35111165046691895, "learning_rate": 0.0001463865966491623, "loss": 0.5285, "step": 5365 }, { "epoch": 0.39786461036553716, "grad_norm": 0.3473764657974243, "learning_rate": 0.00014637659414853716, "loss": 0.475, "step": 5366 }, { "epoch": 0.39793875583895605, "grad_norm": 0.3687153160572052, "learning_rate": 0.00014636659164791197, "loss": 0.5118, "step": 5367 }, { "epoch": 0.3980129013123749, "grad_norm": 0.42163100838661194, "learning_rate": 0.00014635658914728683, "loss": 0.5911, "step": 5368 }, { "epoch": 0.3980870467857937, "grad_norm": 0.33938267827033997, "learning_rate": 0.00014634658664666167, "loss": 0.4959, "step": 5369 }, { "epoch": 0.3981611922592126, "grad_norm": 0.3763183057308197, "learning_rate": 0.00014633658414603653, "loss": 0.5199, "step": 5370 }, { "epoch": 0.3982353377326314, "grad_norm": 0.37072715163230896, "learning_rate": 0.00014632658164541134, "loss": 0.5901, "step": 5371 }, { "epoch": 0.39830948320605025, "grad_norm": 0.35362929105758667, "learning_rate": 0.0001463165791447862, "loss": 0.4801, "step": 5372 }, { "epoch": 0.39838362867946914, "grad_norm": 0.37494149804115295, "learning_rate": 0.00014630657664416104, "loss": 0.4949, "step": 5373 }, { "epoch": 0.39845777415288797, "grad_norm": 0.3639068901538849, "learning_rate": 0.00014629657414353588, "loss": 0.5192, "step": 5374 }, { "epoch": 0.3985319196263068, "grad_norm": 0.36745211482048035, "learning_rate": 0.00014628657164291074, "loss": 0.5174, "step": 5375 }, { "epoch": 0.3986060650997257, "grad_norm": 0.3654904067516327, "learning_rate": 0.00014627656914228558, "loss": 0.5096, "step": 5376 }, { "epoch": 0.3986802105731445, "grad_norm": 0.36296698451042175, "learning_rate": 0.00014626656664166042, "loss": 0.4916, "step": 5377 }, { "epoch": 0.39875435604656334, "grad_norm": 0.3323928713798523, "learning_rate": 0.00014625656414103525, "loss": 0.4923, "step": 5378 }, { "epoch": 0.3988285015199822, "grad_norm": 0.3566431403160095, "learning_rate": 0.00014624656164041012, "loss": 0.5488, "step": 5379 }, { "epoch": 0.39890264699340106, "grad_norm": 0.35622021555900574, "learning_rate": 0.00014623655913978496, "loss": 0.5158, "step": 5380 }, { "epoch": 0.3989767924668199, "grad_norm": 0.37569281458854675, "learning_rate": 0.0001462265566391598, "loss": 0.5486, "step": 5381 }, { "epoch": 0.39905093794023877, "grad_norm": 0.3564712405204773, "learning_rate": 0.00014621655413853463, "loss": 0.5583, "step": 5382 }, { "epoch": 0.3991250834136576, "grad_norm": 0.34775310754776, "learning_rate": 0.0001462065516379095, "loss": 0.4797, "step": 5383 }, { "epoch": 0.39919922888707643, "grad_norm": 0.35356754064559937, "learning_rate": 0.00014619654913728433, "loss": 0.5191, "step": 5384 }, { "epoch": 0.3992733743604953, "grad_norm": 0.3756600618362427, "learning_rate": 0.00014618654663665917, "loss": 0.5528, "step": 5385 }, { "epoch": 0.39934751983391414, "grad_norm": 0.37860265374183655, "learning_rate": 0.000146176544136034, "loss": 0.5728, "step": 5386 }, { "epoch": 0.399421665307333, "grad_norm": 0.3826025724411011, "learning_rate": 0.00014616654163540887, "loss": 0.5435, "step": 5387 }, { "epoch": 0.39949581078075186, "grad_norm": 0.3699050843715668, "learning_rate": 0.0001461565391347837, "loss": 0.5834, "step": 5388 }, { "epoch": 0.3995699562541707, "grad_norm": 0.36537519097328186, "learning_rate": 0.00014614653663415854, "loss": 0.5089, "step": 5389 }, { "epoch": 0.3996441017275895, "grad_norm": 0.34752023220062256, "learning_rate": 0.00014613653413353338, "loss": 0.5156, "step": 5390 }, { "epoch": 0.3997182472010084, "grad_norm": 0.3555268943309784, "learning_rate": 0.00014612653163290825, "loss": 0.5571, "step": 5391 }, { "epoch": 0.39979239267442723, "grad_norm": 0.4051647484302521, "learning_rate": 0.00014611652913228308, "loss": 0.5575, "step": 5392 }, { "epoch": 0.39986653814784606, "grad_norm": 0.3577519655227661, "learning_rate": 0.00014610652663165792, "loss": 0.5185, "step": 5393 }, { "epoch": 0.39994068362126495, "grad_norm": 0.3680310547351837, "learning_rate": 0.00014609652413103276, "loss": 0.5159, "step": 5394 }, { "epoch": 0.4000148290946838, "grad_norm": 0.3732515275478363, "learning_rate": 0.00014608652163040762, "loss": 0.5257, "step": 5395 }, { "epoch": 0.4000889745681026, "grad_norm": 0.3712649643421173, "learning_rate": 0.00014607651912978246, "loss": 0.527, "step": 5396 }, { "epoch": 0.4001631200415215, "grad_norm": 0.3755754232406616, "learning_rate": 0.0001460665166291573, "loss": 0.4776, "step": 5397 }, { "epoch": 0.4002372655149403, "grad_norm": 0.33847230672836304, "learning_rate": 0.00014605651412853213, "loss": 0.5115, "step": 5398 }, { "epoch": 0.40031141098835915, "grad_norm": 0.3596429228782654, "learning_rate": 0.000146046511627907, "loss": 0.5362, "step": 5399 }, { "epoch": 0.40038555646177804, "grad_norm": 0.35598257184028625, "learning_rate": 0.0001460365091272818, "loss": 0.5119, "step": 5400 }, { "epoch": 0.40045970193519687, "grad_norm": 0.34105390310287476, "learning_rate": 0.00014602650662665667, "loss": 0.5044, "step": 5401 }, { "epoch": 0.4005338474086157, "grad_norm": 0.36445239186286926, "learning_rate": 0.0001460165041260315, "loss": 0.515, "step": 5402 }, { "epoch": 0.4006079928820345, "grad_norm": 0.35437873005867004, "learning_rate": 0.00014600650162540637, "loss": 0.5171, "step": 5403 }, { "epoch": 0.4006821383554534, "grad_norm": 0.3696897625923157, "learning_rate": 0.00014599649912478118, "loss": 0.5402, "step": 5404 }, { "epoch": 0.40075628382887224, "grad_norm": 0.32071349024772644, "learning_rate": 0.00014598649662415605, "loss": 0.4857, "step": 5405 }, { "epoch": 0.40083042930229107, "grad_norm": 0.3621322810649872, "learning_rate": 0.00014597649412353088, "loss": 0.5176, "step": 5406 }, { "epoch": 0.40090457477570995, "grad_norm": 0.37033671140670776, "learning_rate": 0.00014596649162290575, "loss": 0.5313, "step": 5407 }, { "epoch": 0.4009787202491288, "grad_norm": 0.3536565601825714, "learning_rate": 0.00014595648912228058, "loss": 0.4964, "step": 5408 }, { "epoch": 0.4010528657225476, "grad_norm": 0.36336347460746765, "learning_rate": 0.00014594648662165542, "loss": 0.5085, "step": 5409 }, { "epoch": 0.4011270111959665, "grad_norm": 0.3669128715991974, "learning_rate": 0.00014593648412103026, "loss": 0.5416, "step": 5410 }, { "epoch": 0.40120115666938533, "grad_norm": 0.3664756417274475, "learning_rate": 0.0001459264816204051, "loss": 0.5222, "step": 5411 }, { "epoch": 0.40127530214280416, "grad_norm": 0.3767852485179901, "learning_rate": 0.00014591647911977996, "loss": 0.5293, "step": 5412 }, { "epoch": 0.40134944761622304, "grad_norm": 0.3782379627227783, "learning_rate": 0.0001459064766191548, "loss": 0.5247, "step": 5413 }, { "epoch": 0.4014235930896419, "grad_norm": 0.3668828010559082, "learning_rate": 0.00014589647411852963, "loss": 0.5151, "step": 5414 }, { "epoch": 0.4014977385630607, "grad_norm": 0.3608115017414093, "learning_rate": 0.00014588647161790447, "loss": 0.5166, "step": 5415 }, { "epoch": 0.4015718840364796, "grad_norm": 0.3534063696861267, "learning_rate": 0.00014587646911727934, "loss": 0.4927, "step": 5416 }, { "epoch": 0.4016460295098984, "grad_norm": 0.36659565567970276, "learning_rate": 0.00014586646661665417, "loss": 0.4637, "step": 5417 }, { "epoch": 0.40172017498331725, "grad_norm": 0.3627648651599884, "learning_rate": 0.00014585646411602904, "loss": 0.5375, "step": 5418 }, { "epoch": 0.40179432045673613, "grad_norm": 0.3534542918205261, "learning_rate": 0.00014584646161540385, "loss": 0.5116, "step": 5419 }, { "epoch": 0.40186846593015496, "grad_norm": 0.3646208643913269, "learning_rate": 0.0001458364591147787, "loss": 0.5142, "step": 5420 }, { "epoch": 0.4019426114035738, "grad_norm": 0.3719312846660614, "learning_rate": 0.00014582645661415355, "loss": 0.5413, "step": 5421 }, { "epoch": 0.4020167568769927, "grad_norm": 0.3570326268672943, "learning_rate": 0.0001458164541135284, "loss": 0.5317, "step": 5422 }, { "epoch": 0.4020909023504115, "grad_norm": 0.35736867785453796, "learning_rate": 0.00014580645161290322, "loss": 0.5347, "step": 5423 }, { "epoch": 0.40216504782383034, "grad_norm": 0.34433838725090027, "learning_rate": 0.00014579644911227809, "loss": 0.5091, "step": 5424 }, { "epoch": 0.4022391932972492, "grad_norm": 0.3504939675331116, "learning_rate": 0.00014578644661165292, "loss": 0.5288, "step": 5425 }, { "epoch": 0.40231333877066805, "grad_norm": 0.3538515865802765, "learning_rate": 0.00014577644411102776, "loss": 0.5313, "step": 5426 }, { "epoch": 0.4023874842440869, "grad_norm": 0.3821633756160736, "learning_rate": 0.0001457664416104026, "loss": 0.5688, "step": 5427 }, { "epoch": 0.40246162971750576, "grad_norm": 0.35759100317955017, "learning_rate": 0.00014575643910977746, "loss": 0.5055, "step": 5428 }, { "epoch": 0.4025357751909246, "grad_norm": 0.3353385627269745, "learning_rate": 0.0001457464366091523, "loss": 0.482, "step": 5429 }, { "epoch": 0.4026099206643434, "grad_norm": 0.3530988097190857, "learning_rate": 0.00014573643410852714, "loss": 0.5291, "step": 5430 }, { "epoch": 0.4026840661377623, "grad_norm": 0.35932520031929016, "learning_rate": 0.00014572643160790197, "loss": 0.5428, "step": 5431 }, { "epoch": 0.40275821161118114, "grad_norm": 0.37009772658348083, "learning_rate": 0.00014571642910727684, "loss": 0.5395, "step": 5432 }, { "epoch": 0.40283235708459997, "grad_norm": 0.35469773411750793, "learning_rate": 0.00014570642660665167, "loss": 0.5551, "step": 5433 }, { "epoch": 0.40290650255801885, "grad_norm": 0.34610888361930847, "learning_rate": 0.0001456964241060265, "loss": 0.519, "step": 5434 }, { "epoch": 0.4029806480314377, "grad_norm": 0.34553489089012146, "learning_rate": 0.00014568642160540135, "loss": 0.5172, "step": 5435 }, { "epoch": 0.4030547935048565, "grad_norm": 0.40318894386291504, "learning_rate": 0.0001456764191047762, "loss": 0.5499, "step": 5436 }, { "epoch": 0.4031289389782754, "grad_norm": 0.3758791983127594, "learning_rate": 0.00014566641660415102, "loss": 0.5099, "step": 5437 }, { "epoch": 0.4032030844516942, "grad_norm": 0.3672524392604828, "learning_rate": 0.0001456564141035259, "loss": 0.5514, "step": 5438 }, { "epoch": 0.40327722992511306, "grad_norm": 0.3850097060203552, "learning_rate": 0.00014564641160290072, "loss": 0.538, "step": 5439 }, { "epoch": 0.40335137539853194, "grad_norm": 0.3611335754394531, "learning_rate": 0.0001456364091022756, "loss": 0.5338, "step": 5440 }, { "epoch": 0.40342552087195077, "grad_norm": 0.39768996834754944, "learning_rate": 0.00014562640660165042, "loss": 0.5807, "step": 5441 }, { "epoch": 0.4034996663453696, "grad_norm": 0.38328734040260315, "learning_rate": 0.00014561640410102526, "loss": 0.4859, "step": 5442 }, { "epoch": 0.4035738118187885, "grad_norm": 0.35615766048431396, "learning_rate": 0.0001456064016004001, "loss": 0.5175, "step": 5443 }, { "epoch": 0.4036479572922073, "grad_norm": 0.3657638132572174, "learning_rate": 0.00014559639909977496, "loss": 0.5149, "step": 5444 }, { "epoch": 0.40372210276562615, "grad_norm": 0.3624582290649414, "learning_rate": 0.0001455863965991498, "loss": 0.5161, "step": 5445 }, { "epoch": 0.40379624823904503, "grad_norm": 0.39679235219955444, "learning_rate": 0.00014557639409852464, "loss": 0.4672, "step": 5446 }, { "epoch": 0.40387039371246386, "grad_norm": 0.40502598881721497, "learning_rate": 0.00014556639159789947, "loss": 0.5446, "step": 5447 }, { "epoch": 0.4039445391858827, "grad_norm": 0.3677510619163513, "learning_rate": 0.0001455563890972743, "loss": 0.5313, "step": 5448 }, { "epoch": 0.4040186846593016, "grad_norm": 0.3307429254055023, "learning_rate": 0.00014554638659664918, "loss": 0.5025, "step": 5449 }, { "epoch": 0.4040928301327204, "grad_norm": 0.36487364768981934, "learning_rate": 0.000145536384096024, "loss": 0.5042, "step": 5450 }, { "epoch": 0.40416697560613923, "grad_norm": 0.3737335205078125, "learning_rate": 0.00014552638159539888, "loss": 0.5166, "step": 5451 }, { "epoch": 0.4042411210795581, "grad_norm": 0.3695918917655945, "learning_rate": 0.0001455163790947737, "loss": 0.5711, "step": 5452 }, { "epoch": 0.40431526655297695, "grad_norm": 0.39588290452957153, "learning_rate": 0.00014550637659414855, "loss": 0.5064, "step": 5453 }, { "epoch": 0.4043894120263958, "grad_norm": 0.3725627064704895, "learning_rate": 0.0001454963740935234, "loss": 0.5129, "step": 5454 }, { "epoch": 0.40446355749981466, "grad_norm": 0.3609890937805176, "learning_rate": 0.00014548637159289825, "loss": 0.5332, "step": 5455 }, { "epoch": 0.4045377029732335, "grad_norm": 0.38651564717292786, "learning_rate": 0.00014547636909227306, "loss": 0.5569, "step": 5456 }, { "epoch": 0.4046118484466523, "grad_norm": 0.3787982165813446, "learning_rate": 0.00014546636659164793, "loss": 0.5529, "step": 5457 }, { "epoch": 0.40468599392007115, "grad_norm": 0.37772664427757263, "learning_rate": 0.00014545636409102276, "loss": 0.5385, "step": 5458 }, { "epoch": 0.40476013939349004, "grad_norm": 0.38511553406715393, "learning_rate": 0.00014544636159039763, "loss": 0.5212, "step": 5459 }, { "epoch": 0.40483428486690887, "grad_norm": 0.351742684841156, "learning_rate": 0.00014543635908977244, "loss": 0.4986, "step": 5460 }, { "epoch": 0.4049084303403277, "grad_norm": 0.3659670948982239, "learning_rate": 0.0001454263565891473, "loss": 0.5519, "step": 5461 }, { "epoch": 0.4049825758137466, "grad_norm": 0.36023521423339844, "learning_rate": 0.00014541635408852214, "loss": 0.5136, "step": 5462 }, { "epoch": 0.4050567212871654, "grad_norm": 0.370124876499176, "learning_rate": 0.00014540635158789698, "loss": 0.498, "step": 5463 }, { "epoch": 0.40513086676058424, "grad_norm": 0.40885689854621887, "learning_rate": 0.0001453963490872718, "loss": 0.5167, "step": 5464 }, { "epoch": 0.4052050122340031, "grad_norm": 0.34979915618896484, "learning_rate": 0.00014538634658664668, "loss": 0.5489, "step": 5465 }, { "epoch": 0.40527915770742196, "grad_norm": 0.3743324279785156, "learning_rate": 0.00014537634408602151, "loss": 0.5441, "step": 5466 }, { "epoch": 0.4053533031808408, "grad_norm": 0.37718474864959717, "learning_rate": 0.00014536634158539635, "loss": 0.5417, "step": 5467 }, { "epoch": 0.40542744865425967, "grad_norm": 0.3584037125110626, "learning_rate": 0.0001453563390847712, "loss": 0.4952, "step": 5468 }, { "epoch": 0.4055015941276785, "grad_norm": 0.37413448095321655, "learning_rate": 0.00014534633658414605, "loss": 0.5452, "step": 5469 }, { "epoch": 0.40557573960109733, "grad_norm": 0.3452147841453552, "learning_rate": 0.0001453363340835209, "loss": 0.4993, "step": 5470 }, { "epoch": 0.4056498850745162, "grad_norm": 0.34278997778892517, "learning_rate": 0.00014532633158289573, "loss": 0.4872, "step": 5471 }, { "epoch": 0.40572403054793504, "grad_norm": 0.36337602138519287, "learning_rate": 0.00014531632908227056, "loss": 0.5111, "step": 5472 }, { "epoch": 0.4057981760213539, "grad_norm": 0.3359016478061676, "learning_rate": 0.00014530632658164543, "loss": 0.4852, "step": 5473 }, { "epoch": 0.40587232149477276, "grad_norm": 0.3784099817276001, "learning_rate": 0.00014529632408102024, "loss": 0.5624, "step": 5474 }, { "epoch": 0.4059464669681916, "grad_norm": 0.36320194602012634, "learning_rate": 0.0001452863215803951, "loss": 0.4993, "step": 5475 }, { "epoch": 0.4060206124416104, "grad_norm": 0.3669346570968628, "learning_rate": 0.00014527631907976994, "loss": 0.5077, "step": 5476 }, { "epoch": 0.4060947579150293, "grad_norm": 0.38530609011650085, "learning_rate": 0.0001452663165791448, "loss": 0.5603, "step": 5477 }, { "epoch": 0.40616890338844813, "grad_norm": 0.3734789490699768, "learning_rate": 0.00014525631407851964, "loss": 0.519, "step": 5478 }, { "epoch": 0.40624304886186696, "grad_norm": 0.34742534160614014, "learning_rate": 0.00014524631157789448, "loss": 0.5009, "step": 5479 }, { "epoch": 0.40631719433528585, "grad_norm": 0.33853286504745483, "learning_rate": 0.00014523630907726932, "loss": 0.4543, "step": 5480 }, { "epoch": 0.4063913398087047, "grad_norm": 0.34888792037963867, "learning_rate": 0.00014522630657664418, "loss": 0.5059, "step": 5481 }, { "epoch": 0.4064654852821235, "grad_norm": 0.3580942451953888, "learning_rate": 0.00014521630407601902, "loss": 0.5041, "step": 5482 }, { "epoch": 0.4065396307555424, "grad_norm": 0.3736546039581299, "learning_rate": 0.00014520630157539385, "loss": 0.5267, "step": 5483 }, { "epoch": 0.4066137762289612, "grad_norm": 0.34785717725753784, "learning_rate": 0.00014519629907476872, "loss": 0.5014, "step": 5484 }, { "epoch": 0.40668792170238005, "grad_norm": 0.3658953011035919, "learning_rate": 0.00014518629657414353, "loss": 0.5228, "step": 5485 }, { "epoch": 0.40676206717579894, "grad_norm": 0.34520423412323, "learning_rate": 0.0001451762940735184, "loss": 0.4689, "step": 5486 }, { "epoch": 0.40683621264921777, "grad_norm": 0.3709779381752014, "learning_rate": 0.00014516629157289323, "loss": 0.5462, "step": 5487 }, { "epoch": 0.4069103581226366, "grad_norm": 0.3840011954307556, "learning_rate": 0.0001451562890722681, "loss": 0.5653, "step": 5488 }, { "epoch": 0.4069845035960555, "grad_norm": 0.3724023997783661, "learning_rate": 0.0001451462865716429, "loss": 0.5381, "step": 5489 }, { "epoch": 0.4070586490694743, "grad_norm": 0.3806876838207245, "learning_rate": 0.00014513628407101777, "loss": 0.5052, "step": 5490 }, { "epoch": 0.40713279454289314, "grad_norm": 0.38200974464416504, "learning_rate": 0.0001451262815703926, "loss": 0.5194, "step": 5491 }, { "epoch": 0.407206940016312, "grad_norm": 0.39152991771698, "learning_rate": 0.00014511627906976747, "loss": 0.5034, "step": 5492 }, { "epoch": 0.40728108548973085, "grad_norm": 0.37738123536109924, "learning_rate": 0.00014510627656914228, "loss": 0.5342, "step": 5493 }, { "epoch": 0.4073552309631497, "grad_norm": 0.3570821285247803, "learning_rate": 0.00014509627406851714, "loss": 0.4911, "step": 5494 }, { "epoch": 0.40742937643656857, "grad_norm": 0.36014777421951294, "learning_rate": 0.00014508627156789198, "loss": 0.5142, "step": 5495 }, { "epoch": 0.4075035219099874, "grad_norm": 0.36257806420326233, "learning_rate": 0.00014507626906726684, "loss": 0.5282, "step": 5496 }, { "epoch": 0.40757766738340623, "grad_norm": 0.3658194839954376, "learning_rate": 0.00014506626656664165, "loss": 0.5469, "step": 5497 }, { "epoch": 0.4076518128568251, "grad_norm": 0.3566634953022003, "learning_rate": 0.00014505626406601652, "loss": 0.557, "step": 5498 }, { "epoch": 0.40772595833024394, "grad_norm": 0.36132973432540894, "learning_rate": 0.00014504626156539136, "loss": 0.5507, "step": 5499 }, { "epoch": 0.4078001038036628, "grad_norm": 0.3395989239215851, "learning_rate": 0.0001450362590647662, "loss": 0.5179, "step": 5500 }, { "epoch": 0.40787424927708166, "grad_norm": 0.3301539123058319, "learning_rate": 0.00014502625656414103, "loss": 0.5, "step": 5501 }, { "epoch": 0.4079483947505005, "grad_norm": 0.3509870767593384, "learning_rate": 0.0001450162540635159, "loss": 0.5303, "step": 5502 }, { "epoch": 0.4080225402239193, "grad_norm": 0.39103713631629944, "learning_rate": 0.00014500625156289073, "loss": 0.5685, "step": 5503 }, { "epoch": 0.4080966856973382, "grad_norm": 0.3574947118759155, "learning_rate": 0.00014499624906226557, "loss": 0.5125, "step": 5504 }, { "epoch": 0.40817083117075703, "grad_norm": 0.3443583548069, "learning_rate": 0.0001449862465616404, "loss": 0.4975, "step": 5505 }, { "epoch": 0.40824497664417586, "grad_norm": 0.358487993478775, "learning_rate": 0.00014497624406101527, "loss": 0.509, "step": 5506 }, { "epoch": 0.40831912211759475, "grad_norm": 0.3484098017215729, "learning_rate": 0.0001449662415603901, "loss": 0.5424, "step": 5507 }, { "epoch": 0.4083932675910136, "grad_norm": 0.3291948139667511, "learning_rate": 0.00014495623905976494, "loss": 0.4969, "step": 5508 }, { "epoch": 0.4084674130644324, "grad_norm": 0.34577202796936035, "learning_rate": 0.00014494623655913978, "loss": 0.5012, "step": 5509 }, { "epoch": 0.4085415585378513, "grad_norm": 0.3547263741493225, "learning_rate": 0.00014493623405851464, "loss": 0.5305, "step": 5510 }, { "epoch": 0.4086157040112701, "grad_norm": 0.3519277572631836, "learning_rate": 0.00014492623155788948, "loss": 0.489, "step": 5511 }, { "epoch": 0.40868984948468895, "grad_norm": 0.3462868928909302, "learning_rate": 0.00014491622905726432, "loss": 0.4911, "step": 5512 }, { "epoch": 0.40876399495810783, "grad_norm": 0.3674899935722351, "learning_rate": 0.00014490622655663916, "loss": 0.5323, "step": 5513 }, { "epoch": 0.40883814043152666, "grad_norm": 0.37918418645858765, "learning_rate": 0.00014489622405601402, "loss": 0.5208, "step": 5514 }, { "epoch": 0.4089122859049455, "grad_norm": 0.36899104714393616, "learning_rate": 0.00014488622155538886, "loss": 0.5485, "step": 5515 }, { "epoch": 0.4089864313783643, "grad_norm": 0.36141887307167053, "learning_rate": 0.0001448762190547637, "loss": 0.4987, "step": 5516 }, { "epoch": 0.4090605768517832, "grad_norm": 0.3506726920604706, "learning_rate": 0.00014486621655413856, "loss": 0.4832, "step": 5517 }, { "epoch": 0.40913472232520204, "grad_norm": 0.3820066750049591, "learning_rate": 0.0001448562140535134, "loss": 0.5482, "step": 5518 }, { "epoch": 0.40920886779862087, "grad_norm": 0.3731716275215149, "learning_rate": 0.00014484621155288823, "loss": 0.538, "step": 5519 }, { "epoch": 0.40928301327203975, "grad_norm": 0.3287188708782196, "learning_rate": 0.00014483620905226307, "loss": 0.4686, "step": 5520 }, { "epoch": 0.4093571587454586, "grad_norm": 0.3353845775127411, "learning_rate": 0.00014482620655163793, "loss": 0.455, "step": 5521 }, { "epoch": 0.4094313042188774, "grad_norm": 0.3631347417831421, "learning_rate": 0.00014481620405101274, "loss": 0.5542, "step": 5522 }, { "epoch": 0.4095054496922963, "grad_norm": 0.36210933327674866, "learning_rate": 0.0001448062015503876, "loss": 0.5328, "step": 5523 }, { "epoch": 0.4095795951657151, "grad_norm": 0.38167616724967957, "learning_rate": 0.00014479619904976245, "loss": 0.5323, "step": 5524 }, { "epoch": 0.40965374063913396, "grad_norm": 0.34557464718818665, "learning_rate": 0.0001447861965491373, "loss": 0.4892, "step": 5525 }, { "epoch": 0.40972788611255284, "grad_norm": 0.34774982929229736, "learning_rate": 0.00014477619404851212, "loss": 0.4985, "step": 5526 }, { "epoch": 0.40980203158597167, "grad_norm": 0.3644322156906128, "learning_rate": 0.00014476619154788698, "loss": 0.5087, "step": 5527 }, { "epoch": 0.4098761770593905, "grad_norm": 0.3789530396461487, "learning_rate": 0.00014475618904726182, "loss": 0.5365, "step": 5528 }, { "epoch": 0.4099503225328094, "grad_norm": 0.36333760619163513, "learning_rate": 0.00014474618654663669, "loss": 0.484, "step": 5529 }, { "epoch": 0.4100244680062282, "grad_norm": 0.3806998133659363, "learning_rate": 0.0001447361840460115, "loss": 0.5463, "step": 5530 }, { "epoch": 0.41009861347964705, "grad_norm": 0.36609217524528503, "learning_rate": 0.00014472618154538636, "loss": 0.5258, "step": 5531 }, { "epoch": 0.41017275895306593, "grad_norm": 0.3861962854862213, "learning_rate": 0.0001447161790447612, "loss": 0.5882, "step": 5532 }, { "epoch": 0.41024690442648476, "grad_norm": 0.3565601408481598, "learning_rate": 0.00014470617654413606, "loss": 0.5245, "step": 5533 }, { "epoch": 0.4103210498999036, "grad_norm": 0.3382302522659302, "learning_rate": 0.00014469617404351087, "loss": 0.4953, "step": 5534 }, { "epoch": 0.4103951953733225, "grad_norm": 0.3668692111968994, "learning_rate": 0.00014468617154288573, "loss": 0.5175, "step": 5535 }, { "epoch": 0.4104693408467413, "grad_norm": 0.34870126843452454, "learning_rate": 0.00014467616904226057, "loss": 0.4906, "step": 5536 }, { "epoch": 0.41054348632016013, "grad_norm": 0.3508685231208801, "learning_rate": 0.0001446661665416354, "loss": 0.5136, "step": 5537 }, { "epoch": 0.410617631793579, "grad_norm": 0.36952826380729675, "learning_rate": 0.00014465616404101025, "loss": 0.5408, "step": 5538 }, { "epoch": 0.41069177726699785, "grad_norm": 0.36409991979599, "learning_rate": 0.0001446461615403851, "loss": 0.524, "step": 5539 }, { "epoch": 0.4107659227404167, "grad_norm": 0.37884849309921265, "learning_rate": 0.00014463615903975995, "loss": 0.5257, "step": 5540 }, { "epoch": 0.41084006821383556, "grad_norm": 0.3667502701282501, "learning_rate": 0.00014462615653913478, "loss": 0.5691, "step": 5541 }, { "epoch": 0.4109142136872544, "grad_norm": 0.34592148661613464, "learning_rate": 0.00014461615403850962, "loss": 0.5041, "step": 5542 }, { "epoch": 0.4109883591606732, "grad_norm": 0.3714090585708618, "learning_rate": 0.00014460615153788449, "loss": 0.5393, "step": 5543 }, { "epoch": 0.4110625046340921, "grad_norm": 0.37901610136032104, "learning_rate": 0.00014459614903725932, "loss": 0.5445, "step": 5544 }, { "epoch": 0.41113665010751094, "grad_norm": 0.363346666097641, "learning_rate": 0.00014458614653663416, "loss": 0.5189, "step": 5545 }, { "epoch": 0.41121079558092977, "grad_norm": 0.3543715178966522, "learning_rate": 0.000144576144036009, "loss": 0.4823, "step": 5546 }, { "epoch": 0.41128494105434865, "grad_norm": 0.33231690526008606, "learning_rate": 0.00014456614153538386, "loss": 0.4983, "step": 5547 }, { "epoch": 0.4113590865277675, "grad_norm": 0.33900660276412964, "learning_rate": 0.0001445561390347587, "loss": 0.4868, "step": 5548 }, { "epoch": 0.4114332320011863, "grad_norm": 0.36064499616622925, "learning_rate": 0.00014454613653413354, "loss": 0.5034, "step": 5549 }, { "epoch": 0.4115073774746052, "grad_norm": 0.34773677587509155, "learning_rate": 0.00014453613403350837, "loss": 0.5021, "step": 5550 }, { "epoch": 0.411581522948024, "grad_norm": 0.3640957176685333, "learning_rate": 0.00014452613153288324, "loss": 0.5444, "step": 5551 }, { "epoch": 0.41165566842144286, "grad_norm": 0.3889352083206177, "learning_rate": 0.00014451612903225807, "loss": 0.5741, "step": 5552 }, { "epoch": 0.41172981389486174, "grad_norm": 0.34894871711730957, "learning_rate": 0.0001445061265316329, "loss": 0.4665, "step": 5553 }, { "epoch": 0.41180395936828057, "grad_norm": 0.35971584916114807, "learning_rate": 0.00014449612403100777, "loss": 0.5739, "step": 5554 }, { "epoch": 0.4118781048416994, "grad_norm": 0.3503250777721405, "learning_rate": 0.0001444861215303826, "loss": 0.5328, "step": 5555 }, { "epoch": 0.4119522503151183, "grad_norm": 0.396456778049469, "learning_rate": 0.00014447611902975745, "loss": 0.5567, "step": 5556 }, { "epoch": 0.4120263957885371, "grad_norm": 0.3706618845462799, "learning_rate": 0.00014446611652913229, "loss": 0.5598, "step": 5557 }, { "epoch": 0.41210054126195594, "grad_norm": 0.34425532817840576, "learning_rate": 0.00014445611402850715, "loss": 0.4952, "step": 5558 }, { "epoch": 0.41217468673537483, "grad_norm": 0.356357604265213, "learning_rate": 0.00014444611152788196, "loss": 0.4856, "step": 5559 }, { "epoch": 0.41224883220879366, "grad_norm": 0.3449479937553406, "learning_rate": 0.00014443610902725682, "loss": 0.5096, "step": 5560 }, { "epoch": 0.4123229776822125, "grad_norm": 0.3417598605155945, "learning_rate": 0.00014442610652663166, "loss": 0.48, "step": 5561 }, { "epoch": 0.4123971231556314, "grad_norm": 0.4137543737888336, "learning_rate": 0.00014441610402600653, "loss": 0.5503, "step": 5562 }, { "epoch": 0.4124712686290502, "grad_norm": 0.3563957214355469, "learning_rate": 0.00014440610152538134, "loss": 0.5173, "step": 5563 }, { "epoch": 0.41254541410246903, "grad_norm": 0.36064282059669495, "learning_rate": 0.0001443960990247562, "loss": 0.5314, "step": 5564 }, { "epoch": 0.4126195595758879, "grad_norm": 0.36805078387260437, "learning_rate": 0.00014438609652413104, "loss": 0.519, "step": 5565 }, { "epoch": 0.41269370504930675, "grad_norm": 0.39121800661087036, "learning_rate": 0.0001443760940235059, "loss": 0.6336, "step": 5566 }, { "epoch": 0.4127678505227256, "grad_norm": 0.362850546836853, "learning_rate": 0.0001443660915228807, "loss": 0.5479, "step": 5567 }, { "epoch": 0.41284199599614446, "grad_norm": 0.3530351519584656, "learning_rate": 0.00014435608902225558, "loss": 0.4914, "step": 5568 }, { "epoch": 0.4129161414695633, "grad_norm": 0.3722062110900879, "learning_rate": 0.0001443460865216304, "loss": 0.5201, "step": 5569 }, { "epoch": 0.4129902869429821, "grad_norm": 0.3701034188270569, "learning_rate": 0.00014433608402100528, "loss": 0.5311, "step": 5570 }, { "epoch": 0.413064432416401, "grad_norm": 0.3538113832473755, "learning_rate": 0.0001443260815203801, "loss": 0.4859, "step": 5571 }, { "epoch": 0.41313857788981984, "grad_norm": 0.35297891497612, "learning_rate": 0.00014431607901975495, "loss": 0.4991, "step": 5572 }, { "epoch": 0.41321272336323867, "grad_norm": 0.3778578042984009, "learning_rate": 0.0001443060765191298, "loss": 0.5454, "step": 5573 }, { "epoch": 0.4132868688366575, "grad_norm": 0.34235894680023193, "learning_rate": 0.00014429607401850463, "loss": 0.505, "step": 5574 }, { "epoch": 0.4133610143100764, "grad_norm": 0.3724377751350403, "learning_rate": 0.00014428607151787946, "loss": 0.5334, "step": 5575 }, { "epoch": 0.4134351597834952, "grad_norm": 0.36378157138824463, "learning_rate": 0.00014427606901725433, "loss": 0.5517, "step": 5576 }, { "epoch": 0.41350930525691404, "grad_norm": 0.3852967321872711, "learning_rate": 0.00014426606651662916, "loss": 0.5554, "step": 5577 }, { "epoch": 0.4135834507303329, "grad_norm": 0.39053624868392944, "learning_rate": 0.000144256064016004, "loss": 0.5445, "step": 5578 }, { "epoch": 0.41365759620375175, "grad_norm": 0.3473355174064636, "learning_rate": 0.00014424606151537884, "loss": 0.5152, "step": 5579 }, { "epoch": 0.4137317416771706, "grad_norm": 0.34326040744781494, "learning_rate": 0.0001442360590147537, "loss": 0.4769, "step": 5580 }, { "epoch": 0.41380588715058947, "grad_norm": 0.3654695451259613, "learning_rate": 0.00014422605651412854, "loss": 0.5231, "step": 5581 }, { "epoch": 0.4138800326240083, "grad_norm": 0.34246906638145447, "learning_rate": 0.00014421605401350338, "loss": 0.4984, "step": 5582 }, { "epoch": 0.41395417809742713, "grad_norm": 0.3413587510585785, "learning_rate": 0.0001442060515128782, "loss": 0.5211, "step": 5583 }, { "epoch": 0.414028323570846, "grad_norm": 0.3515372574329376, "learning_rate": 0.00014419604901225308, "loss": 0.5103, "step": 5584 }, { "epoch": 0.41410246904426484, "grad_norm": 0.34788599610328674, "learning_rate": 0.00014418604651162791, "loss": 0.5035, "step": 5585 }, { "epoch": 0.41417661451768367, "grad_norm": 0.35313767194747925, "learning_rate": 0.00014417604401100275, "loss": 0.4879, "step": 5586 }, { "epoch": 0.41425075999110256, "grad_norm": 0.3476939797401428, "learning_rate": 0.00014416604151037762, "loss": 0.5325, "step": 5587 }, { "epoch": 0.4143249054645214, "grad_norm": 0.3729972541332245, "learning_rate": 0.00014415603900975245, "loss": 0.5276, "step": 5588 }, { "epoch": 0.4143990509379402, "grad_norm": 0.35095909237861633, "learning_rate": 0.0001441460365091273, "loss": 0.4977, "step": 5589 }, { "epoch": 0.4144731964113591, "grad_norm": 0.3973798453807831, "learning_rate": 0.00014413603400850213, "loss": 0.6243, "step": 5590 }, { "epoch": 0.41454734188477793, "grad_norm": 0.34712639451026917, "learning_rate": 0.000144126031507877, "loss": 0.5052, "step": 5591 }, { "epoch": 0.41462148735819676, "grad_norm": 0.34342437982559204, "learning_rate": 0.00014411602900725183, "loss": 0.5051, "step": 5592 }, { "epoch": 0.41469563283161565, "grad_norm": 0.36899837851524353, "learning_rate": 0.00014410602650662667, "loss": 0.5277, "step": 5593 }, { "epoch": 0.4147697783050345, "grad_norm": 0.33861610293388367, "learning_rate": 0.0001440960240060015, "loss": 0.4621, "step": 5594 }, { "epoch": 0.4148439237784533, "grad_norm": 0.34496283531188965, "learning_rate": 0.00014408602150537637, "loss": 0.4883, "step": 5595 }, { "epoch": 0.4149180692518722, "grad_norm": 0.3511240780353546, "learning_rate": 0.00014407601900475118, "loss": 0.513, "step": 5596 }, { "epoch": 0.414992214725291, "grad_norm": 0.36265408992767334, "learning_rate": 0.00014406601650412604, "loss": 0.5012, "step": 5597 }, { "epoch": 0.41506636019870985, "grad_norm": 0.3935423493385315, "learning_rate": 0.00014405601400350088, "loss": 0.5611, "step": 5598 }, { "epoch": 0.41514050567212873, "grad_norm": 0.3505668342113495, "learning_rate": 0.00014404601150287574, "loss": 0.477, "step": 5599 }, { "epoch": 0.41521465114554756, "grad_norm": 0.35982975363731384, "learning_rate": 0.00014403600900225055, "loss": 0.5316, "step": 5600 }, { "epoch": 0.4152887966189664, "grad_norm": 0.3561864495277405, "learning_rate": 0.00014402600650162542, "loss": 0.4831, "step": 5601 }, { "epoch": 0.4153629420923853, "grad_norm": 0.3565751016139984, "learning_rate": 0.00014401600400100025, "loss": 0.497, "step": 5602 }, { "epoch": 0.4154370875658041, "grad_norm": 0.3804733455181122, "learning_rate": 0.00014400600150037512, "loss": 0.548, "step": 5603 }, { "epoch": 0.41551123303922294, "grad_norm": 0.3790908455848694, "learning_rate": 0.00014399599899974993, "loss": 0.5775, "step": 5604 }, { "epoch": 0.4155853785126418, "grad_norm": 0.3660643696784973, "learning_rate": 0.0001439859964991248, "loss": 0.5396, "step": 5605 }, { "epoch": 0.41565952398606065, "grad_norm": 0.36210471391677856, "learning_rate": 0.00014397599399849963, "loss": 0.529, "step": 5606 }, { "epoch": 0.4157336694594795, "grad_norm": 0.38907843828201294, "learning_rate": 0.0001439659914978745, "loss": 0.5709, "step": 5607 }, { "epoch": 0.41580781493289837, "grad_norm": 0.3736410140991211, "learning_rate": 0.0001439559889972493, "loss": 0.5752, "step": 5608 }, { "epoch": 0.4158819604063172, "grad_norm": 0.36805829405784607, "learning_rate": 0.00014394598649662417, "loss": 0.5086, "step": 5609 }, { "epoch": 0.415956105879736, "grad_norm": 0.3484995365142822, "learning_rate": 0.000143935983995999, "loss": 0.5105, "step": 5610 }, { "epoch": 0.4160302513531549, "grad_norm": 0.357585608959198, "learning_rate": 0.00014392598149537384, "loss": 0.5069, "step": 5611 }, { "epoch": 0.41610439682657374, "grad_norm": 0.3759303390979767, "learning_rate": 0.00014391597899474868, "loss": 0.5281, "step": 5612 }, { "epoch": 0.41617854229999257, "grad_norm": 0.35525891184806824, "learning_rate": 0.00014390597649412354, "loss": 0.5098, "step": 5613 }, { "epoch": 0.41625268777341146, "grad_norm": 0.3586999773979187, "learning_rate": 0.00014389597399349838, "loss": 0.5193, "step": 5614 }, { "epoch": 0.4163268332468303, "grad_norm": 0.3706878125667572, "learning_rate": 0.00014388597149287322, "loss": 0.5115, "step": 5615 }, { "epoch": 0.4164009787202491, "grad_norm": 0.36093971133232117, "learning_rate": 0.00014387596899224805, "loss": 0.521, "step": 5616 }, { "epoch": 0.416475124193668, "grad_norm": 0.35055047273635864, "learning_rate": 0.00014386596649162292, "loss": 0.501, "step": 5617 }, { "epoch": 0.41654926966708683, "grad_norm": 0.3619668483734131, "learning_rate": 0.00014385596399099776, "loss": 0.5167, "step": 5618 }, { "epoch": 0.41662341514050566, "grad_norm": 0.37336087226867676, "learning_rate": 0.0001438459614903726, "loss": 0.5337, "step": 5619 }, { "epoch": 0.41669756061392454, "grad_norm": 0.3798108696937561, "learning_rate": 0.00014383595898974746, "loss": 0.5707, "step": 5620 }, { "epoch": 0.4167717060873434, "grad_norm": 0.3614225387573242, "learning_rate": 0.0001438259564891223, "loss": 0.5029, "step": 5621 }, { "epoch": 0.4168458515607622, "grad_norm": 0.35182106494903564, "learning_rate": 0.00014381595398849713, "loss": 0.5036, "step": 5622 }, { "epoch": 0.4169199970341811, "grad_norm": 0.34795475006103516, "learning_rate": 0.00014380595148787197, "loss": 0.4911, "step": 5623 }, { "epoch": 0.4169941425075999, "grad_norm": 0.3477431535720825, "learning_rate": 0.00014379594898724683, "loss": 0.4913, "step": 5624 }, { "epoch": 0.41706828798101875, "grad_norm": 0.40232279896736145, "learning_rate": 0.00014378594648662167, "loss": 0.4716, "step": 5625 }, { "epoch": 0.41714243345443763, "grad_norm": 0.3546924293041229, "learning_rate": 0.0001437759439859965, "loss": 0.5049, "step": 5626 }, { "epoch": 0.41721657892785646, "grad_norm": 0.36314719915390015, "learning_rate": 0.00014376594148537134, "loss": 0.493, "step": 5627 }, { "epoch": 0.4172907244012753, "grad_norm": 0.36353859305381775, "learning_rate": 0.0001437559389847462, "loss": 0.4851, "step": 5628 }, { "epoch": 0.4173648698746942, "grad_norm": 0.3659752309322357, "learning_rate": 0.00014374593648412104, "loss": 0.5551, "step": 5629 }, { "epoch": 0.417439015348113, "grad_norm": 0.3979252278804779, "learning_rate": 0.00014373593398349588, "loss": 0.6007, "step": 5630 }, { "epoch": 0.41751316082153184, "grad_norm": 0.357713520526886, "learning_rate": 0.00014372593148287072, "loss": 0.5245, "step": 5631 }, { "epoch": 0.41758730629495067, "grad_norm": 0.3614163100719452, "learning_rate": 0.00014371592898224558, "loss": 0.5038, "step": 5632 }, { "epoch": 0.41766145176836955, "grad_norm": 0.3611854910850525, "learning_rate": 0.0001437059264816204, "loss": 0.502, "step": 5633 }, { "epoch": 0.4177355972417884, "grad_norm": 0.34597885608673096, "learning_rate": 0.00014369592398099526, "loss": 0.493, "step": 5634 }, { "epoch": 0.4178097427152072, "grad_norm": 0.3840094208717346, "learning_rate": 0.0001436859214803701, "loss": 0.5379, "step": 5635 }, { "epoch": 0.4178838881886261, "grad_norm": 0.3731662631034851, "learning_rate": 0.00014367591897974496, "loss": 0.5225, "step": 5636 }, { "epoch": 0.4179580336620449, "grad_norm": 0.38505205512046814, "learning_rate": 0.00014366591647911977, "loss": 0.529, "step": 5637 }, { "epoch": 0.41803217913546376, "grad_norm": 0.35233011841773987, "learning_rate": 0.00014365591397849463, "loss": 0.4947, "step": 5638 }, { "epoch": 0.41810632460888264, "grad_norm": 0.34152400493621826, "learning_rate": 0.00014364591147786947, "loss": 0.4939, "step": 5639 }, { "epoch": 0.41818047008230147, "grad_norm": 0.37648624181747437, "learning_rate": 0.00014363590897724433, "loss": 0.5337, "step": 5640 }, { "epoch": 0.4182546155557203, "grad_norm": 0.385163277387619, "learning_rate": 0.00014362590647661914, "loss": 0.5779, "step": 5641 }, { "epoch": 0.4183287610291392, "grad_norm": 0.38460540771484375, "learning_rate": 0.000143615903975994, "loss": 0.5541, "step": 5642 }, { "epoch": 0.418402906502558, "grad_norm": 0.3697705864906311, "learning_rate": 0.00014360590147536885, "loss": 0.521, "step": 5643 }, { "epoch": 0.41847705197597684, "grad_norm": 0.37114274501800537, "learning_rate": 0.0001435958989747437, "loss": 0.5462, "step": 5644 }, { "epoch": 0.41855119744939573, "grad_norm": 0.35880914330482483, "learning_rate": 0.00014358589647411852, "loss": 0.5191, "step": 5645 }, { "epoch": 0.41862534292281456, "grad_norm": 0.35937488079071045, "learning_rate": 0.00014357589397349338, "loss": 0.5014, "step": 5646 }, { "epoch": 0.4186994883962334, "grad_norm": 0.34751054644584656, "learning_rate": 0.00014356589147286822, "loss": 0.5185, "step": 5647 }, { "epoch": 0.4187736338696523, "grad_norm": 0.36022648215293884, "learning_rate": 0.00014355588897224306, "loss": 0.5278, "step": 5648 }, { "epoch": 0.4188477793430711, "grad_norm": 0.39098361134529114, "learning_rate": 0.0001435458864716179, "loss": 0.5625, "step": 5649 }, { "epoch": 0.41892192481648993, "grad_norm": 0.3632911741733551, "learning_rate": 0.00014353588397099276, "loss": 0.548, "step": 5650 }, { "epoch": 0.4189960702899088, "grad_norm": 0.34896206855773926, "learning_rate": 0.0001435258814703676, "loss": 0.5238, "step": 5651 }, { "epoch": 0.41907021576332765, "grad_norm": 0.37898799777030945, "learning_rate": 0.00014351587896974243, "loss": 0.5585, "step": 5652 }, { "epoch": 0.4191443612367465, "grad_norm": 0.3712969124317169, "learning_rate": 0.0001435058764691173, "loss": 0.5123, "step": 5653 }, { "epoch": 0.41921850671016536, "grad_norm": 0.3384936451911926, "learning_rate": 0.00014349587396849213, "loss": 0.4867, "step": 5654 }, { "epoch": 0.4192926521835842, "grad_norm": 0.34284672141075134, "learning_rate": 0.00014348587146786697, "loss": 0.4971, "step": 5655 }, { "epoch": 0.419366797657003, "grad_norm": 0.35510480403900146, "learning_rate": 0.0001434758689672418, "loss": 0.5389, "step": 5656 }, { "epoch": 0.4194409431304219, "grad_norm": 0.36078140139579773, "learning_rate": 0.00014346586646661667, "loss": 0.5447, "step": 5657 }, { "epoch": 0.41951508860384074, "grad_norm": 0.39182889461517334, "learning_rate": 0.0001434558639659915, "loss": 0.5343, "step": 5658 }, { "epoch": 0.41958923407725957, "grad_norm": 0.347312331199646, "learning_rate": 0.00014344586146536635, "loss": 0.4887, "step": 5659 }, { "epoch": 0.41966337955067845, "grad_norm": 0.3613635003566742, "learning_rate": 0.00014343585896474118, "loss": 0.5273, "step": 5660 }, { "epoch": 0.4197375250240973, "grad_norm": 0.35983386635780334, "learning_rate": 0.00014342585646411605, "loss": 0.5683, "step": 5661 }, { "epoch": 0.4198116704975161, "grad_norm": 0.37036749720573425, "learning_rate": 0.00014341585396349089, "loss": 0.5312, "step": 5662 }, { "epoch": 0.419885815970935, "grad_norm": 0.34611910581588745, "learning_rate": 0.00014340585146286572, "loss": 0.4976, "step": 5663 }, { "epoch": 0.4199599614443538, "grad_norm": 0.35150137543678284, "learning_rate": 0.00014339584896224056, "loss": 0.5149, "step": 5664 }, { "epoch": 0.42003410691777265, "grad_norm": 0.3615265488624573, "learning_rate": 0.00014338584646161542, "loss": 0.5074, "step": 5665 }, { "epoch": 0.42010825239119154, "grad_norm": 0.3514833450317383, "learning_rate": 0.00014337584396099026, "loss": 0.5216, "step": 5666 }, { "epoch": 0.42018239786461037, "grad_norm": 0.35439684987068176, "learning_rate": 0.0001433658414603651, "loss": 0.4922, "step": 5667 }, { "epoch": 0.4202565433380292, "grad_norm": 0.3432159721851349, "learning_rate": 0.00014335583895973994, "loss": 0.5121, "step": 5668 }, { "epoch": 0.4203306888114481, "grad_norm": 0.34613049030303955, "learning_rate": 0.0001433458364591148, "loss": 0.4914, "step": 5669 }, { "epoch": 0.4204048342848669, "grad_norm": 0.347783625125885, "learning_rate": 0.00014333583395848964, "loss": 0.473, "step": 5670 }, { "epoch": 0.42047897975828574, "grad_norm": 0.361467570066452, "learning_rate": 0.00014332583145786447, "loss": 0.513, "step": 5671 }, { "epoch": 0.4205531252317046, "grad_norm": 0.35521942377090454, "learning_rate": 0.0001433158289572393, "loss": 0.4857, "step": 5672 }, { "epoch": 0.42062727070512346, "grad_norm": 0.39109471440315247, "learning_rate": 0.00014330582645661417, "loss": 0.5593, "step": 5673 }, { "epoch": 0.4207014161785423, "grad_norm": 0.36650872230529785, "learning_rate": 0.00014329582395598898, "loss": 0.5261, "step": 5674 }, { "epoch": 0.42077556165196117, "grad_norm": 0.35812804102897644, "learning_rate": 0.00014328582145536385, "loss": 0.508, "step": 5675 }, { "epoch": 0.42084970712538, "grad_norm": 0.36069270968437195, "learning_rate": 0.00014327581895473869, "loss": 0.522, "step": 5676 }, { "epoch": 0.42092385259879883, "grad_norm": 0.3537238538265228, "learning_rate": 0.00014326581645411355, "loss": 0.5028, "step": 5677 }, { "epoch": 0.4209979980722177, "grad_norm": 0.35818982124328613, "learning_rate": 0.00014325581395348836, "loss": 0.4995, "step": 5678 }, { "epoch": 0.42107214354563655, "grad_norm": 0.3798484206199646, "learning_rate": 0.00014324581145286322, "loss": 0.5478, "step": 5679 }, { "epoch": 0.4211462890190554, "grad_norm": 0.3523408770561218, "learning_rate": 0.00014323580895223806, "loss": 0.5393, "step": 5680 }, { "epoch": 0.42122043449247426, "grad_norm": 0.3507483899593353, "learning_rate": 0.00014322580645161293, "loss": 0.5013, "step": 5681 }, { "epoch": 0.4212945799658931, "grad_norm": 0.36197927594184875, "learning_rate": 0.00014321580395098774, "loss": 0.5216, "step": 5682 }, { "epoch": 0.4213687254393119, "grad_norm": 0.36295944452285767, "learning_rate": 0.0001432058014503626, "loss": 0.5144, "step": 5683 }, { "epoch": 0.4214428709127308, "grad_norm": 0.33328279852867126, "learning_rate": 0.00014319579894973744, "loss": 0.5092, "step": 5684 }, { "epoch": 0.42151701638614963, "grad_norm": 0.338856965303421, "learning_rate": 0.00014318579644911227, "loss": 0.4931, "step": 5685 }, { "epoch": 0.42159116185956846, "grad_norm": 0.37814587354660034, "learning_rate": 0.00014317579394848714, "loss": 0.5529, "step": 5686 }, { "epoch": 0.4216653073329873, "grad_norm": 0.36617061495780945, "learning_rate": 0.00014316579144786198, "loss": 0.555, "step": 5687 }, { "epoch": 0.4217394528064062, "grad_norm": 0.3627009093761444, "learning_rate": 0.0001431557889472368, "loss": 0.4806, "step": 5688 }, { "epoch": 0.421813598279825, "grad_norm": 0.34914299845695496, "learning_rate": 0.00014314578644661165, "loss": 0.4834, "step": 5689 }, { "epoch": 0.42188774375324384, "grad_norm": 0.3614680767059326, "learning_rate": 0.00014313578394598651, "loss": 0.501, "step": 5690 }, { "epoch": 0.4219618892266627, "grad_norm": 0.32936426997184753, "learning_rate": 0.00014312578144536135, "loss": 0.4929, "step": 5691 }, { "epoch": 0.42203603470008155, "grad_norm": 0.35012122988700867, "learning_rate": 0.0001431157789447362, "loss": 0.511, "step": 5692 }, { "epoch": 0.4221101801735004, "grad_norm": 0.36903831362724304, "learning_rate": 0.00014310577644411102, "loss": 0.5438, "step": 5693 }, { "epoch": 0.42218432564691927, "grad_norm": 0.337219774723053, "learning_rate": 0.0001430957739434859, "loss": 0.4641, "step": 5694 }, { "epoch": 0.4222584711203381, "grad_norm": 0.36369144916534424, "learning_rate": 0.00014308577144286073, "loss": 0.5064, "step": 5695 }, { "epoch": 0.4223326165937569, "grad_norm": 0.35534989833831787, "learning_rate": 0.00014307576894223556, "loss": 0.4764, "step": 5696 }, { "epoch": 0.4224067620671758, "grad_norm": 0.35382863879203796, "learning_rate": 0.0001430657664416104, "loss": 0.4789, "step": 5697 }, { "epoch": 0.42248090754059464, "grad_norm": 0.359672874212265, "learning_rate": 0.00014305576394098526, "loss": 0.5049, "step": 5698 }, { "epoch": 0.42255505301401347, "grad_norm": 0.36410287022590637, "learning_rate": 0.0001430457614403601, "loss": 0.4837, "step": 5699 }, { "epoch": 0.42262919848743236, "grad_norm": 0.3738282024860382, "learning_rate": 0.00014303575893973494, "loss": 0.5292, "step": 5700 }, { "epoch": 0.4227033439608512, "grad_norm": 0.3546549081802368, "learning_rate": 0.00014302575643910978, "loss": 0.5071, "step": 5701 }, { "epoch": 0.42277748943427, "grad_norm": 0.33963218331336975, "learning_rate": 0.00014301575393848464, "loss": 0.5057, "step": 5702 }, { "epoch": 0.4228516349076889, "grad_norm": 0.3805730938911438, "learning_rate": 0.00014300575143785948, "loss": 0.5045, "step": 5703 }, { "epoch": 0.42292578038110773, "grad_norm": 0.36794233322143555, "learning_rate": 0.00014299574893723431, "loss": 0.5251, "step": 5704 }, { "epoch": 0.42299992585452656, "grad_norm": 0.33936524391174316, "learning_rate": 0.00014298574643660915, "loss": 0.4834, "step": 5705 }, { "epoch": 0.42307407132794544, "grad_norm": 0.35642972588539124, "learning_rate": 0.00014297574393598402, "loss": 0.5164, "step": 5706 }, { "epoch": 0.4231482168013643, "grad_norm": 0.3533554673194885, "learning_rate": 0.00014296574143535885, "loss": 0.4928, "step": 5707 }, { "epoch": 0.4232223622747831, "grad_norm": 0.38066351413726807, "learning_rate": 0.0001429557389347337, "loss": 0.5459, "step": 5708 }, { "epoch": 0.423296507748202, "grad_norm": 0.35656365752220154, "learning_rate": 0.00014294573643410853, "loss": 0.5307, "step": 5709 }, { "epoch": 0.4233706532216208, "grad_norm": 0.37708231806755066, "learning_rate": 0.0001429357339334834, "loss": 0.5222, "step": 5710 }, { "epoch": 0.42344479869503965, "grad_norm": 0.36121705174446106, "learning_rate": 0.0001429257314328582, "loss": 0.5157, "step": 5711 }, { "epoch": 0.42351894416845853, "grad_norm": 0.3876655697822571, "learning_rate": 0.00014291572893223307, "loss": 0.5233, "step": 5712 }, { "epoch": 0.42359308964187736, "grad_norm": 0.3467617332935333, "learning_rate": 0.0001429057264316079, "loss": 0.489, "step": 5713 }, { "epoch": 0.4236672351152962, "grad_norm": 0.3629533350467682, "learning_rate": 0.00014289572393098277, "loss": 0.5105, "step": 5714 }, { "epoch": 0.4237413805887151, "grad_norm": 0.3653801679611206, "learning_rate": 0.00014288572143035758, "loss": 0.5483, "step": 5715 }, { "epoch": 0.4238155260621339, "grad_norm": 0.3923763632774353, "learning_rate": 0.00014287571892973244, "loss": 0.5784, "step": 5716 }, { "epoch": 0.42388967153555274, "grad_norm": 0.3759954571723938, "learning_rate": 0.00014286571642910728, "loss": 0.496, "step": 5717 }, { "epoch": 0.4239638170089716, "grad_norm": 0.3648175895214081, "learning_rate": 0.00014285571392848214, "loss": 0.5483, "step": 5718 }, { "epoch": 0.42403796248239045, "grad_norm": 0.36168789863586426, "learning_rate": 0.00014284571142785695, "loss": 0.4663, "step": 5719 }, { "epoch": 0.4241121079558093, "grad_norm": 0.37148967385292053, "learning_rate": 0.00014283570892723182, "loss": 0.5584, "step": 5720 }, { "epoch": 0.42418625342922817, "grad_norm": 0.35077592730522156, "learning_rate": 0.00014282570642660665, "loss": 0.5166, "step": 5721 }, { "epoch": 0.424260398902647, "grad_norm": 0.37638354301452637, "learning_rate": 0.0001428157039259815, "loss": 0.5255, "step": 5722 }, { "epoch": 0.4243345443760658, "grad_norm": 0.3658425509929657, "learning_rate": 0.00014280570142535635, "loss": 0.5434, "step": 5723 }, { "epoch": 0.4244086898494847, "grad_norm": 0.3470044732093811, "learning_rate": 0.0001427956989247312, "loss": 0.5269, "step": 5724 }, { "epoch": 0.42448283532290354, "grad_norm": 0.3614761233329773, "learning_rate": 0.00014278569642410603, "loss": 0.5233, "step": 5725 }, { "epoch": 0.42455698079632237, "grad_norm": 0.34189683198928833, "learning_rate": 0.00014277569392348087, "loss": 0.4872, "step": 5726 }, { "epoch": 0.42463112626974125, "grad_norm": 0.3514789044857025, "learning_rate": 0.00014276569142285573, "loss": 0.5006, "step": 5727 }, { "epoch": 0.4247052717431601, "grad_norm": 0.38251596689224243, "learning_rate": 0.00014275568892223057, "loss": 0.5136, "step": 5728 }, { "epoch": 0.4247794172165789, "grad_norm": 0.3824033737182617, "learning_rate": 0.00014274568642160543, "loss": 0.5319, "step": 5729 }, { "epoch": 0.4248535626899978, "grad_norm": 0.34994035959243774, "learning_rate": 0.00014273568392098024, "loss": 0.5125, "step": 5730 }, { "epoch": 0.42492770816341663, "grad_norm": 0.3689294755458832, "learning_rate": 0.0001427256814203551, "loss": 0.5086, "step": 5731 }, { "epoch": 0.42500185363683546, "grad_norm": 0.379430890083313, "learning_rate": 0.00014271567891972994, "loss": 0.5619, "step": 5732 }, { "epoch": 0.42507599911025434, "grad_norm": 0.3974028527736664, "learning_rate": 0.00014270567641910478, "loss": 0.5603, "step": 5733 }, { "epoch": 0.4251501445836732, "grad_norm": 0.35435381531715393, "learning_rate": 0.00014269567391847962, "loss": 0.5096, "step": 5734 }, { "epoch": 0.425224290057092, "grad_norm": 0.42415928840637207, "learning_rate": 0.00014268567141785448, "loss": 0.6196, "step": 5735 }, { "epoch": 0.4252984355305109, "grad_norm": 0.3994363844394684, "learning_rate": 0.00014267566891722932, "loss": 0.5479, "step": 5736 }, { "epoch": 0.4253725810039297, "grad_norm": 0.364532470703125, "learning_rate": 0.00014266566641660416, "loss": 0.5297, "step": 5737 }, { "epoch": 0.42544672647734855, "grad_norm": 0.3566719889640808, "learning_rate": 0.000142655663915979, "loss": 0.5044, "step": 5738 }, { "epoch": 0.42552087195076743, "grad_norm": 0.3485257029533386, "learning_rate": 0.00014264566141535386, "loss": 0.5299, "step": 5739 }, { "epoch": 0.42559501742418626, "grad_norm": 0.3801499307155609, "learning_rate": 0.0001426356589147287, "loss": 0.541, "step": 5740 }, { "epoch": 0.4256691628976051, "grad_norm": 0.34073197841644287, "learning_rate": 0.00014262565641410353, "loss": 0.5304, "step": 5741 }, { "epoch": 0.425743308371024, "grad_norm": 0.35283878445625305, "learning_rate": 0.00014261565391347837, "loss": 0.5055, "step": 5742 }, { "epoch": 0.4258174538444428, "grad_norm": 0.34058645367622375, "learning_rate": 0.00014260565141285323, "loss": 0.472, "step": 5743 }, { "epoch": 0.42589159931786164, "grad_norm": 0.34972232580184937, "learning_rate": 0.00014259564891222807, "loss": 0.5057, "step": 5744 }, { "epoch": 0.42596574479128047, "grad_norm": 0.36295077204704285, "learning_rate": 0.0001425856464116029, "loss": 0.5297, "step": 5745 }, { "epoch": 0.42603989026469935, "grad_norm": 0.36252909898757935, "learning_rate": 0.00014257564391097774, "loss": 0.5227, "step": 5746 }, { "epoch": 0.4261140357381182, "grad_norm": 0.33562690019607544, "learning_rate": 0.0001425656414103526, "loss": 0.4862, "step": 5747 }, { "epoch": 0.426188181211537, "grad_norm": 0.359164834022522, "learning_rate": 0.00014255563890972742, "loss": 0.5415, "step": 5748 }, { "epoch": 0.4262623266849559, "grad_norm": 0.34210383892059326, "learning_rate": 0.00014254563640910228, "loss": 0.4624, "step": 5749 }, { "epoch": 0.4263364721583747, "grad_norm": 0.3534594774246216, "learning_rate": 0.00014253563390847712, "loss": 0.4995, "step": 5750 }, { "epoch": 0.42641061763179355, "grad_norm": 0.3499293029308319, "learning_rate": 0.00014252563140785198, "loss": 0.4879, "step": 5751 }, { "epoch": 0.42648476310521244, "grad_norm": 0.35082125663757324, "learning_rate": 0.0001425156289072268, "loss": 0.4713, "step": 5752 }, { "epoch": 0.42655890857863127, "grad_norm": 0.38667285442352295, "learning_rate": 0.00014250562640660166, "loss": 0.5176, "step": 5753 }, { "epoch": 0.4266330540520501, "grad_norm": 0.37969890236854553, "learning_rate": 0.0001424956239059765, "loss": 0.5056, "step": 5754 }, { "epoch": 0.426707199525469, "grad_norm": 0.36563533544540405, "learning_rate": 0.00014248562140535136, "loss": 0.561, "step": 5755 }, { "epoch": 0.4267813449988878, "grad_norm": 0.36865124106407166, "learning_rate": 0.0001424756189047262, "loss": 0.5068, "step": 5756 }, { "epoch": 0.42685549047230664, "grad_norm": 0.3212534487247467, "learning_rate": 0.00014246561640410103, "loss": 0.4525, "step": 5757 }, { "epoch": 0.4269296359457255, "grad_norm": 0.3558371067047119, "learning_rate": 0.00014245561390347587, "loss": 0.4792, "step": 5758 }, { "epoch": 0.42700378141914436, "grad_norm": 0.36126476526260376, "learning_rate": 0.0001424456114028507, "loss": 0.5346, "step": 5759 }, { "epoch": 0.4270779268925632, "grad_norm": 0.3753640353679657, "learning_rate": 0.00014243560890222557, "loss": 0.4976, "step": 5760 }, { "epoch": 0.42715207236598207, "grad_norm": 0.3748569190502167, "learning_rate": 0.0001424256064016004, "loss": 0.521, "step": 5761 }, { "epoch": 0.4272262178394009, "grad_norm": 0.3471347689628601, "learning_rate": 0.00014241560390097527, "loss": 0.4773, "step": 5762 }, { "epoch": 0.42730036331281973, "grad_norm": 0.34247174859046936, "learning_rate": 0.00014240560140035008, "loss": 0.4683, "step": 5763 }, { "epoch": 0.4273745087862386, "grad_norm": 0.3480434715747833, "learning_rate": 0.00014239559889972495, "loss": 0.4883, "step": 5764 }, { "epoch": 0.42744865425965745, "grad_norm": 0.3626059889793396, "learning_rate": 0.00014238559639909978, "loss": 0.5271, "step": 5765 }, { "epoch": 0.4275227997330763, "grad_norm": 0.35282400250434875, "learning_rate": 0.00014237559389847465, "loss": 0.517, "step": 5766 }, { "epoch": 0.42759694520649516, "grad_norm": 0.36799418926239014, "learning_rate": 0.00014236559139784946, "loss": 0.5081, "step": 5767 }, { "epoch": 0.427671090679914, "grad_norm": 0.3548973798751831, "learning_rate": 0.00014235558889722432, "loss": 0.5122, "step": 5768 }, { "epoch": 0.4277452361533328, "grad_norm": 0.3460337817668915, "learning_rate": 0.00014234558639659916, "loss": 0.5114, "step": 5769 }, { "epoch": 0.4278193816267517, "grad_norm": 0.34213441610336304, "learning_rate": 0.000142335583895974, "loss": 0.4922, "step": 5770 }, { "epoch": 0.42789352710017053, "grad_norm": 0.361488938331604, "learning_rate": 0.00014232558139534883, "loss": 0.504, "step": 5771 }, { "epoch": 0.42796767257358936, "grad_norm": 0.3884842097759247, "learning_rate": 0.0001423155788947237, "loss": 0.5291, "step": 5772 }, { "epoch": 0.42804181804700825, "grad_norm": 0.3398195803165436, "learning_rate": 0.00014230557639409853, "loss": 0.4666, "step": 5773 }, { "epoch": 0.4281159635204271, "grad_norm": 0.3600122630596161, "learning_rate": 0.00014229557389347337, "loss": 0.5097, "step": 5774 }, { "epoch": 0.4281901089938459, "grad_norm": 0.37923645973205566, "learning_rate": 0.0001422855713928482, "loss": 0.5297, "step": 5775 }, { "epoch": 0.4282642544672648, "grad_norm": 0.3336390256881714, "learning_rate": 0.00014227556889222307, "loss": 0.4778, "step": 5776 }, { "epoch": 0.4283383999406836, "grad_norm": 0.3694206476211548, "learning_rate": 0.0001422655663915979, "loss": 0.5204, "step": 5777 }, { "epoch": 0.42841254541410245, "grad_norm": 0.36990904808044434, "learning_rate": 0.00014225556389097275, "loss": 0.5391, "step": 5778 }, { "epoch": 0.42848669088752134, "grad_norm": 0.3324855864048004, "learning_rate": 0.00014224556139034758, "loss": 0.4677, "step": 5779 }, { "epoch": 0.42856083636094017, "grad_norm": 0.37015897035598755, "learning_rate": 0.00014223555888972245, "loss": 0.4986, "step": 5780 }, { "epoch": 0.428634981834359, "grad_norm": 0.34548890590667725, "learning_rate": 0.00014222555638909729, "loss": 0.5165, "step": 5781 }, { "epoch": 0.4287091273077779, "grad_norm": 0.34334027767181396, "learning_rate": 0.00014221555388847212, "loss": 0.5429, "step": 5782 }, { "epoch": 0.4287832727811967, "grad_norm": 0.34156927466392517, "learning_rate": 0.00014220555138784696, "loss": 0.4976, "step": 5783 }, { "epoch": 0.42885741825461554, "grad_norm": 0.3452537953853607, "learning_rate": 0.00014219554888722182, "loss": 0.5185, "step": 5784 }, { "epoch": 0.4289315637280344, "grad_norm": 0.368133008480072, "learning_rate": 0.00014218554638659663, "loss": 0.5479, "step": 5785 }, { "epoch": 0.42900570920145326, "grad_norm": 0.36145925521850586, "learning_rate": 0.0001421755438859715, "loss": 0.533, "step": 5786 }, { "epoch": 0.4290798546748721, "grad_norm": 0.34935829043388367, "learning_rate": 0.00014216554138534633, "loss": 0.5451, "step": 5787 }, { "epoch": 0.42915400014829097, "grad_norm": 0.3650127351284027, "learning_rate": 0.0001421555388847212, "loss": 0.5082, "step": 5788 }, { "epoch": 0.4292281456217098, "grad_norm": 0.36994749307632446, "learning_rate": 0.00014214553638409604, "loss": 0.5571, "step": 5789 }, { "epoch": 0.42930229109512863, "grad_norm": 0.3542785942554474, "learning_rate": 0.00014213553388347087, "loss": 0.5186, "step": 5790 }, { "epoch": 0.4293764365685475, "grad_norm": 0.35819634795188904, "learning_rate": 0.0001421255313828457, "loss": 0.5015, "step": 5791 }, { "epoch": 0.42945058204196634, "grad_norm": 0.3603178560733795, "learning_rate": 0.00014211552888222057, "loss": 0.5206, "step": 5792 }, { "epoch": 0.4295247275153852, "grad_norm": 0.3671955168247223, "learning_rate": 0.0001421055263815954, "loss": 0.5099, "step": 5793 }, { "epoch": 0.42959887298880406, "grad_norm": 0.35918131470680237, "learning_rate": 0.00014209552388097025, "loss": 0.539, "step": 5794 }, { "epoch": 0.4296730184622229, "grad_norm": 0.37000593543052673, "learning_rate": 0.00014208552138034509, "loss": 0.5219, "step": 5795 }, { "epoch": 0.4297471639356417, "grad_norm": 0.34678635001182556, "learning_rate": 0.00014207551887971992, "loss": 0.5179, "step": 5796 }, { "epoch": 0.4298213094090606, "grad_norm": 0.3700203001499176, "learning_rate": 0.0001420655163790948, "loss": 0.5248, "step": 5797 }, { "epoch": 0.42989545488247943, "grad_norm": 0.35506272315979004, "learning_rate": 0.00014205551387846962, "loss": 0.5308, "step": 5798 }, { "epoch": 0.42996960035589826, "grad_norm": 0.3493562936782837, "learning_rate": 0.0001420455113778445, "loss": 0.463, "step": 5799 }, { "epoch": 0.43004374582931715, "grad_norm": 0.37460973858833313, "learning_rate": 0.0001420355088772193, "loss": 0.544, "step": 5800 }, { "epoch": 0.430117891302736, "grad_norm": 0.3347226679325104, "learning_rate": 0.00014202550637659416, "loss": 0.511, "step": 5801 }, { "epoch": 0.4301920367761548, "grad_norm": 0.349195659160614, "learning_rate": 0.000142015503875969, "loss": 0.5092, "step": 5802 }, { "epoch": 0.43026618224957364, "grad_norm": 0.37738218903541565, "learning_rate": 0.00014200550137534386, "loss": 0.5506, "step": 5803 }, { "epoch": 0.4303403277229925, "grad_norm": 0.3852379620075226, "learning_rate": 0.00014199549887471867, "loss": 0.5219, "step": 5804 }, { "epoch": 0.43041447319641135, "grad_norm": 0.34732291102409363, "learning_rate": 0.00014198549637409354, "loss": 0.4896, "step": 5805 }, { "epoch": 0.4304886186698302, "grad_norm": 0.3778276741504669, "learning_rate": 0.00014197549387346838, "loss": 0.5302, "step": 5806 }, { "epoch": 0.43056276414324907, "grad_norm": 0.353595495223999, "learning_rate": 0.0001419654913728432, "loss": 0.4849, "step": 5807 }, { "epoch": 0.4306369096166679, "grad_norm": 0.3606713116168976, "learning_rate": 0.00014195548887221805, "loss": 0.5255, "step": 5808 }, { "epoch": 0.4307110550900867, "grad_norm": 0.3605286180973053, "learning_rate": 0.0001419454863715929, "loss": 0.5208, "step": 5809 }, { "epoch": 0.4307852005635056, "grad_norm": 0.3607872724533081, "learning_rate": 0.00014193548387096775, "loss": 0.5088, "step": 5810 }, { "epoch": 0.43085934603692444, "grad_norm": 0.37052467465400696, "learning_rate": 0.0001419254813703426, "loss": 0.5453, "step": 5811 }, { "epoch": 0.43093349151034327, "grad_norm": 0.3379790782928467, "learning_rate": 0.00014191547886971742, "loss": 0.5012, "step": 5812 }, { "epoch": 0.43100763698376215, "grad_norm": 0.3371259272098541, "learning_rate": 0.0001419054763690923, "loss": 0.4712, "step": 5813 }, { "epoch": 0.431081782457181, "grad_norm": 0.3652292788028717, "learning_rate": 0.00014189547386846713, "loss": 0.5119, "step": 5814 }, { "epoch": 0.4311559279305998, "grad_norm": 0.3591395318508148, "learning_rate": 0.00014188547136784196, "loss": 0.4867, "step": 5815 }, { "epoch": 0.4312300734040187, "grad_norm": 0.33558839559555054, "learning_rate": 0.0001418754688672168, "loss": 0.4837, "step": 5816 }, { "epoch": 0.43130421887743753, "grad_norm": 0.3709341883659363, "learning_rate": 0.00014186546636659166, "loss": 0.4879, "step": 5817 }, { "epoch": 0.43137836435085636, "grad_norm": 0.34645357728004456, "learning_rate": 0.0001418554638659665, "loss": 0.5076, "step": 5818 }, { "epoch": 0.43145250982427524, "grad_norm": 0.3662346303462982, "learning_rate": 0.00014184546136534134, "loss": 0.5246, "step": 5819 }, { "epoch": 0.4315266552976941, "grad_norm": 0.3637968599796295, "learning_rate": 0.00014183545886471618, "loss": 0.4897, "step": 5820 }, { "epoch": 0.4316008007711129, "grad_norm": 0.35029542446136475, "learning_rate": 0.00014182545636409104, "loss": 0.5141, "step": 5821 }, { "epoch": 0.4316749462445318, "grad_norm": 0.36543047428131104, "learning_rate": 0.00014181545386346588, "loss": 0.5073, "step": 5822 }, { "epoch": 0.4317490917179506, "grad_norm": 0.3429579436779022, "learning_rate": 0.00014180545136284071, "loss": 0.5057, "step": 5823 }, { "epoch": 0.43182323719136945, "grad_norm": 0.3716259300708771, "learning_rate": 0.00014179544886221555, "loss": 0.5314, "step": 5824 }, { "epoch": 0.43189738266478833, "grad_norm": 0.3603084683418274, "learning_rate": 0.00014178544636159042, "loss": 0.5358, "step": 5825 }, { "epoch": 0.43197152813820716, "grad_norm": 0.37702786922454834, "learning_rate": 0.00014177544386096525, "loss": 0.5244, "step": 5826 }, { "epoch": 0.432045673611626, "grad_norm": 0.39236971735954285, "learning_rate": 0.0001417654413603401, "loss": 0.5958, "step": 5827 }, { "epoch": 0.4321198190850449, "grad_norm": 0.3434748649597168, "learning_rate": 0.00014175543885971493, "loss": 0.4877, "step": 5828 }, { "epoch": 0.4321939645584637, "grad_norm": 0.38281646370887756, "learning_rate": 0.0001417454363590898, "loss": 0.552, "step": 5829 }, { "epoch": 0.43226811003188254, "grad_norm": 0.3546072244644165, "learning_rate": 0.00014173543385846463, "loss": 0.5311, "step": 5830 }, { "epoch": 0.4323422555053014, "grad_norm": 0.3478342294692993, "learning_rate": 0.00014172543135783946, "loss": 0.5072, "step": 5831 }, { "epoch": 0.43241640097872025, "grad_norm": 0.34025707840919495, "learning_rate": 0.00014171542885721433, "loss": 0.4878, "step": 5832 }, { "epoch": 0.4324905464521391, "grad_norm": 0.3571125268936157, "learning_rate": 0.00014170542635658914, "loss": 0.5236, "step": 5833 }, { "epoch": 0.43256469192555796, "grad_norm": 0.3638688027858734, "learning_rate": 0.000141695423855964, "loss": 0.5221, "step": 5834 }, { "epoch": 0.4326388373989768, "grad_norm": 0.3371364176273346, "learning_rate": 0.00014168542135533884, "loss": 0.4831, "step": 5835 }, { "epoch": 0.4327129828723956, "grad_norm": 0.35197556018829346, "learning_rate": 0.0001416754188547137, "loss": 0.5244, "step": 5836 }, { "epoch": 0.4327871283458145, "grad_norm": 0.362073689699173, "learning_rate": 0.00014166541635408851, "loss": 0.5356, "step": 5837 }, { "epoch": 0.43286127381923334, "grad_norm": 0.34803175926208496, "learning_rate": 0.00014165541385346338, "loss": 0.4932, "step": 5838 }, { "epoch": 0.43293541929265217, "grad_norm": 0.3509584367275238, "learning_rate": 0.00014164541135283822, "loss": 0.4909, "step": 5839 }, { "epoch": 0.43300956476607105, "grad_norm": 0.3437286913394928, "learning_rate": 0.00014163540885221308, "loss": 0.4976, "step": 5840 }, { "epoch": 0.4330837102394899, "grad_norm": 0.36754289269447327, "learning_rate": 0.0001416254063515879, "loss": 0.5221, "step": 5841 }, { "epoch": 0.4331578557129087, "grad_norm": 0.34924760460853577, "learning_rate": 0.00014161540385096275, "loss": 0.5487, "step": 5842 }, { "epoch": 0.4332320011863276, "grad_norm": 0.37455207109451294, "learning_rate": 0.0001416054013503376, "loss": 0.5416, "step": 5843 }, { "epoch": 0.4333061466597464, "grad_norm": 0.36039406061172485, "learning_rate": 0.00014159539884971243, "loss": 0.5359, "step": 5844 }, { "epoch": 0.43338029213316526, "grad_norm": 0.36981916427612305, "learning_rate": 0.00014158539634908727, "loss": 0.5305, "step": 5845 }, { "epoch": 0.43345443760658414, "grad_norm": 0.34406474232673645, "learning_rate": 0.00014157539384846213, "loss": 0.4826, "step": 5846 }, { "epoch": 0.43352858308000297, "grad_norm": 0.34327325224876404, "learning_rate": 0.00014156539134783697, "loss": 0.4934, "step": 5847 }, { "epoch": 0.4336027285534218, "grad_norm": 0.3813394606113434, "learning_rate": 0.0001415553888472118, "loss": 0.5444, "step": 5848 }, { "epoch": 0.4336768740268407, "grad_norm": 0.3437238037586212, "learning_rate": 0.00014154538634658664, "loss": 0.4658, "step": 5849 }, { "epoch": 0.4337510195002595, "grad_norm": 0.33587202429771423, "learning_rate": 0.0001415353838459615, "loss": 0.462, "step": 5850 }, { "epoch": 0.43382516497367835, "grad_norm": 0.3739044666290283, "learning_rate": 0.00014152538134533634, "loss": 0.5543, "step": 5851 }, { "epoch": 0.43389931044709723, "grad_norm": 0.3450310230255127, "learning_rate": 0.00014151537884471118, "loss": 0.5264, "step": 5852 }, { "epoch": 0.43397345592051606, "grad_norm": 0.38650232553482056, "learning_rate": 0.00014150537634408602, "loss": 0.5257, "step": 5853 }, { "epoch": 0.4340476013939349, "grad_norm": 0.36617058515548706, "learning_rate": 0.00014149537384346088, "loss": 0.5985, "step": 5854 }, { "epoch": 0.4341217468673538, "grad_norm": 0.343817800283432, "learning_rate": 0.00014148537134283572, "loss": 0.4862, "step": 5855 }, { "epoch": 0.4341958923407726, "grad_norm": 0.35988762974739075, "learning_rate": 0.00014147536884221055, "loss": 0.4972, "step": 5856 }, { "epoch": 0.43427003781419143, "grad_norm": 0.33391180634498596, "learning_rate": 0.0001414653663415854, "loss": 0.4907, "step": 5857 }, { "epoch": 0.43434418328761026, "grad_norm": 0.36759012937545776, "learning_rate": 0.00014145536384096026, "loss": 0.5307, "step": 5858 }, { "epoch": 0.43441832876102915, "grad_norm": 0.3458505868911743, "learning_rate": 0.0001414453613403351, "loss": 0.4919, "step": 5859 }, { "epoch": 0.434492474234448, "grad_norm": 0.33904746174812317, "learning_rate": 0.00014143535883970993, "loss": 0.4763, "step": 5860 }, { "epoch": 0.4345666197078668, "grad_norm": 0.32697436213493347, "learning_rate": 0.00014142535633908477, "loss": 0.4952, "step": 5861 }, { "epoch": 0.4346407651812857, "grad_norm": 0.3458271324634552, "learning_rate": 0.00014141535383845963, "loss": 0.4908, "step": 5862 }, { "epoch": 0.4347149106547045, "grad_norm": 0.3731786608695984, "learning_rate": 0.00014140535133783447, "loss": 0.5101, "step": 5863 }, { "epoch": 0.43478905612812335, "grad_norm": 0.3668362498283386, "learning_rate": 0.0001413953488372093, "loss": 0.5033, "step": 5864 }, { "epoch": 0.43486320160154224, "grad_norm": 0.3684491217136383, "learning_rate": 0.00014138534633658417, "loss": 0.5341, "step": 5865 }, { "epoch": 0.43493734707496107, "grad_norm": 0.34411245584487915, "learning_rate": 0.000141375343835959, "loss": 0.5192, "step": 5866 }, { "epoch": 0.4350114925483799, "grad_norm": 0.3640264570713043, "learning_rate": 0.00014136534133533384, "loss": 0.4951, "step": 5867 }, { "epoch": 0.4350856380217988, "grad_norm": 0.36958813667297363, "learning_rate": 0.00014135533883470868, "loss": 0.5031, "step": 5868 }, { "epoch": 0.4351597834952176, "grad_norm": 0.36095497012138367, "learning_rate": 0.00014134533633408355, "loss": 0.5194, "step": 5869 }, { "epoch": 0.43523392896863644, "grad_norm": 0.34124964475631714, "learning_rate": 0.00014133533383345836, "loss": 0.4621, "step": 5870 }, { "epoch": 0.4353080744420553, "grad_norm": 0.3756920397281647, "learning_rate": 0.00014132533133283322, "loss": 0.5273, "step": 5871 }, { "epoch": 0.43538221991547416, "grad_norm": 0.3349483907222748, "learning_rate": 0.00014131532883220806, "loss": 0.5182, "step": 5872 }, { "epoch": 0.435456365388893, "grad_norm": 0.37080228328704834, "learning_rate": 0.00014130532633158292, "loss": 0.5507, "step": 5873 }, { "epoch": 0.43553051086231187, "grad_norm": 0.36952000856399536, "learning_rate": 0.00014129532383095773, "loss": 0.5193, "step": 5874 }, { "epoch": 0.4356046563357307, "grad_norm": 0.362452894449234, "learning_rate": 0.0001412853213303326, "loss": 0.5129, "step": 5875 }, { "epoch": 0.43567880180914953, "grad_norm": 0.3832274079322815, "learning_rate": 0.00014127531882970743, "loss": 0.5665, "step": 5876 }, { "epoch": 0.4357529472825684, "grad_norm": 0.3629010021686554, "learning_rate": 0.0001412653163290823, "loss": 0.5368, "step": 5877 }, { "epoch": 0.43582709275598724, "grad_norm": 0.36655211448669434, "learning_rate": 0.0001412553138284571, "loss": 0.5156, "step": 5878 }, { "epoch": 0.4359012382294061, "grad_norm": 0.3689925968647003, "learning_rate": 0.00014124531132783197, "loss": 0.5023, "step": 5879 }, { "epoch": 0.43597538370282496, "grad_norm": 0.37038975954055786, "learning_rate": 0.0001412353088272068, "loss": 0.5586, "step": 5880 }, { "epoch": 0.4360495291762438, "grad_norm": 0.35822123289108276, "learning_rate": 0.00014122530632658164, "loss": 0.4922, "step": 5881 }, { "epoch": 0.4361236746496626, "grad_norm": 0.3829022943973541, "learning_rate": 0.00014121530382595648, "loss": 0.5496, "step": 5882 }, { "epoch": 0.4361978201230815, "grad_norm": 0.3244225084781647, "learning_rate": 0.00014120530132533135, "loss": 0.4581, "step": 5883 }, { "epoch": 0.43627196559650033, "grad_norm": 0.3651547133922577, "learning_rate": 0.00014119529882470618, "loss": 0.5239, "step": 5884 }, { "epoch": 0.43634611106991916, "grad_norm": 0.34379544854164124, "learning_rate": 0.00014118529632408102, "loss": 0.4748, "step": 5885 }, { "epoch": 0.43642025654333805, "grad_norm": 0.32548657059669495, "learning_rate": 0.00014117529382345586, "loss": 0.4519, "step": 5886 }, { "epoch": 0.4364944020167569, "grad_norm": 0.3780216574668884, "learning_rate": 0.00014116529132283072, "loss": 0.5337, "step": 5887 }, { "epoch": 0.4365685474901757, "grad_norm": 0.3672764003276825, "learning_rate": 0.00014115528882220556, "loss": 0.5516, "step": 5888 }, { "epoch": 0.4366426929635946, "grad_norm": 0.3798353970050812, "learning_rate": 0.0001411452863215804, "loss": 0.5344, "step": 5889 }, { "epoch": 0.4367168384370134, "grad_norm": 0.3593282997608185, "learning_rate": 0.00014113528382095523, "loss": 0.5306, "step": 5890 }, { "epoch": 0.43679098391043225, "grad_norm": 0.3519977927207947, "learning_rate": 0.0001411252813203301, "loss": 0.5161, "step": 5891 }, { "epoch": 0.43686512938385114, "grad_norm": 0.3558397889137268, "learning_rate": 0.00014111527881970493, "loss": 0.5013, "step": 5892 }, { "epoch": 0.43693927485726997, "grad_norm": 0.36890068650245667, "learning_rate": 0.00014110527631907977, "loss": 0.4983, "step": 5893 }, { "epoch": 0.4370134203306888, "grad_norm": 0.39057159423828125, "learning_rate": 0.0001410952738184546, "loss": 0.4955, "step": 5894 }, { "epoch": 0.4370875658041077, "grad_norm": 0.37591904401779175, "learning_rate": 0.00014108527131782947, "loss": 0.5251, "step": 5895 }, { "epoch": 0.4371617112775265, "grad_norm": 0.35917380452156067, "learning_rate": 0.0001410752688172043, "loss": 0.5332, "step": 5896 }, { "epoch": 0.43723585675094534, "grad_norm": 0.35792702436447144, "learning_rate": 0.00014106526631657915, "loss": 0.5257, "step": 5897 }, { "epoch": 0.4373100022243642, "grad_norm": 0.36899256706237793, "learning_rate": 0.000141055263815954, "loss": 0.4971, "step": 5898 }, { "epoch": 0.43738414769778305, "grad_norm": 0.33504316210746765, "learning_rate": 0.00014104526131532885, "loss": 0.5032, "step": 5899 }, { "epoch": 0.4374582931712019, "grad_norm": 0.38043534755706787, "learning_rate": 0.00014103525881470368, "loss": 0.5717, "step": 5900 }, { "epoch": 0.43753243864462077, "grad_norm": 0.35315027832984924, "learning_rate": 0.00014102525631407852, "loss": 0.5147, "step": 5901 }, { "epoch": 0.4376065841180396, "grad_norm": 0.35690203309059143, "learning_rate": 0.00014101525381345339, "loss": 0.5077, "step": 5902 }, { "epoch": 0.43768072959145843, "grad_norm": 0.35799840092658997, "learning_rate": 0.00014100525131282822, "loss": 0.5361, "step": 5903 }, { "epoch": 0.4377548750648773, "grad_norm": 0.3642095625400543, "learning_rate": 0.00014099524881220306, "loss": 0.5142, "step": 5904 }, { "epoch": 0.43782902053829614, "grad_norm": 0.3549066483974457, "learning_rate": 0.0001409852463115779, "loss": 0.5033, "step": 5905 }, { "epoch": 0.437903166011715, "grad_norm": 0.36307293176651, "learning_rate": 0.00014097524381095276, "loss": 0.5053, "step": 5906 }, { "epoch": 0.43797731148513386, "grad_norm": 0.3465915620326996, "learning_rate": 0.00014096524131032757, "loss": 0.4936, "step": 5907 }, { "epoch": 0.4380514569585527, "grad_norm": 0.3381500840187073, "learning_rate": 0.00014095523880970244, "loss": 0.4955, "step": 5908 }, { "epoch": 0.4381256024319715, "grad_norm": 0.36438095569610596, "learning_rate": 0.00014094523630907727, "loss": 0.5083, "step": 5909 }, { "epoch": 0.4381997479053904, "grad_norm": 0.3561786711215973, "learning_rate": 0.00014093523380845214, "loss": 0.5093, "step": 5910 }, { "epoch": 0.43827389337880923, "grad_norm": 0.3420898914337158, "learning_rate": 0.00014092523130782695, "loss": 0.4926, "step": 5911 }, { "epoch": 0.43834803885222806, "grad_norm": 0.351216197013855, "learning_rate": 0.0001409152288072018, "loss": 0.5291, "step": 5912 }, { "epoch": 0.43842218432564695, "grad_norm": 0.3804541826248169, "learning_rate": 0.00014090522630657665, "loss": 0.5408, "step": 5913 }, { "epoch": 0.4384963297990658, "grad_norm": 0.34546878933906555, "learning_rate": 0.0001408952238059515, "loss": 0.5204, "step": 5914 }, { "epoch": 0.4385704752724846, "grad_norm": 0.3252241015434265, "learning_rate": 0.00014088522130532632, "loss": 0.469, "step": 5915 }, { "epoch": 0.43864462074590344, "grad_norm": 0.35157614946365356, "learning_rate": 0.0001408752188047012, "loss": 0.4938, "step": 5916 }, { "epoch": 0.4387187662193223, "grad_norm": 0.33298832178115845, "learning_rate": 0.00014086521630407602, "loss": 0.4757, "step": 5917 }, { "epoch": 0.43879291169274115, "grad_norm": 0.3568967580795288, "learning_rate": 0.00014085521380345086, "loss": 0.4889, "step": 5918 }, { "epoch": 0.43886705716616, "grad_norm": 0.36707714200019836, "learning_rate": 0.0001408452113028257, "loss": 0.5326, "step": 5919 }, { "epoch": 0.43894120263957886, "grad_norm": 0.3484991788864136, "learning_rate": 0.00014083520880220056, "loss": 0.4927, "step": 5920 }, { "epoch": 0.4390153481129977, "grad_norm": 0.3365621566772461, "learning_rate": 0.0001408252063015754, "loss": 0.5097, "step": 5921 }, { "epoch": 0.4390894935864165, "grad_norm": 0.3842115104198456, "learning_rate": 0.00014081520380095024, "loss": 0.5375, "step": 5922 }, { "epoch": 0.4391636390598354, "grad_norm": 0.3486058712005615, "learning_rate": 0.00014080520130032507, "loss": 0.4673, "step": 5923 }, { "epoch": 0.43923778453325424, "grad_norm": 0.3516964018344879, "learning_rate": 0.00014079519879969994, "loss": 0.5103, "step": 5924 }, { "epoch": 0.43931193000667307, "grad_norm": 0.35428354144096375, "learning_rate": 0.00014078519629907477, "loss": 0.5089, "step": 5925 }, { "epoch": 0.43938607548009195, "grad_norm": 0.36226484179496765, "learning_rate": 0.0001407751937984496, "loss": 0.5387, "step": 5926 }, { "epoch": 0.4394602209535108, "grad_norm": 0.36973002552986145, "learning_rate": 0.00014076519129782445, "loss": 0.5324, "step": 5927 }, { "epoch": 0.4395343664269296, "grad_norm": 0.38934221863746643, "learning_rate": 0.0001407551887971993, "loss": 0.5497, "step": 5928 }, { "epoch": 0.4396085119003485, "grad_norm": 0.35622870922088623, "learning_rate": 0.00014074518629657415, "loss": 0.4945, "step": 5929 }, { "epoch": 0.4396826573737673, "grad_norm": 0.36292609572410583, "learning_rate": 0.000140735183795949, "loss": 0.5361, "step": 5930 }, { "epoch": 0.43975680284718616, "grad_norm": 0.3629823625087738, "learning_rate": 0.00014072518129532385, "loss": 0.5354, "step": 5931 }, { "epoch": 0.43983094832060504, "grad_norm": 0.39258673787117004, "learning_rate": 0.0001407151787946987, "loss": 0.515, "step": 5932 }, { "epoch": 0.43990509379402387, "grad_norm": 0.36137619614601135, "learning_rate": 0.00014070517629407353, "loss": 0.5366, "step": 5933 }, { "epoch": 0.4399792392674427, "grad_norm": 0.3350689113140106, "learning_rate": 0.00014069517379344836, "loss": 0.4867, "step": 5934 }, { "epoch": 0.4400533847408616, "grad_norm": 0.35050907731056213, "learning_rate": 0.00014068517129282323, "loss": 0.4894, "step": 5935 }, { "epoch": 0.4401275302142804, "grad_norm": 0.35911116003990173, "learning_rate": 0.00014067516879219806, "loss": 0.5096, "step": 5936 }, { "epoch": 0.44020167568769925, "grad_norm": 0.36334970593452454, "learning_rate": 0.0001406651662915729, "loss": 0.5413, "step": 5937 }, { "epoch": 0.44027582116111813, "grad_norm": 0.38821667432785034, "learning_rate": 0.00014065516379094774, "loss": 0.5736, "step": 5938 }, { "epoch": 0.44034996663453696, "grad_norm": 0.3838498890399933, "learning_rate": 0.0001406451612903226, "loss": 0.5751, "step": 5939 }, { "epoch": 0.4404241121079558, "grad_norm": 0.35044533014297485, "learning_rate": 0.00014063515878969744, "loss": 0.5207, "step": 5940 }, { "epoch": 0.4404982575813747, "grad_norm": 0.3734649121761322, "learning_rate": 0.00014062515628907228, "loss": 0.5294, "step": 5941 }, { "epoch": 0.4405724030547935, "grad_norm": 0.3646673262119293, "learning_rate": 0.00014061515378844711, "loss": 0.5456, "step": 5942 }, { "epoch": 0.44064654852821233, "grad_norm": 0.3737414479255676, "learning_rate": 0.00014060515128782198, "loss": 0.546, "step": 5943 }, { "epoch": 0.4407206940016312, "grad_norm": 0.3447316586971283, "learning_rate": 0.0001405951487871968, "loss": 0.4618, "step": 5944 }, { "epoch": 0.44079483947505005, "grad_norm": 0.3626424968242645, "learning_rate": 0.00014058514628657165, "loss": 0.5172, "step": 5945 }, { "epoch": 0.4408689849484689, "grad_norm": 0.37271609902381897, "learning_rate": 0.0001405751437859465, "loss": 0.5162, "step": 5946 }, { "epoch": 0.44094313042188776, "grad_norm": 0.3455592095851898, "learning_rate": 0.00014056514128532135, "loss": 0.4718, "step": 5947 }, { "epoch": 0.4410172758953066, "grad_norm": 0.3887438178062439, "learning_rate": 0.00014055513878469616, "loss": 0.5072, "step": 5948 }, { "epoch": 0.4410914213687254, "grad_norm": 0.3684934675693512, "learning_rate": 0.00014054513628407103, "loss": 0.5595, "step": 5949 }, { "epoch": 0.4411655668421443, "grad_norm": 0.35878652334213257, "learning_rate": 0.00014053513378344586, "loss": 0.4829, "step": 5950 }, { "epoch": 0.44123971231556314, "grad_norm": 0.35023343563079834, "learning_rate": 0.00014052513128282073, "loss": 0.4925, "step": 5951 }, { "epoch": 0.44131385778898197, "grad_norm": 0.37340864539146423, "learning_rate": 0.00014051512878219554, "loss": 0.4884, "step": 5952 }, { "epoch": 0.44138800326240085, "grad_norm": 0.3759305775165558, "learning_rate": 0.0001405051262815704, "loss": 0.5104, "step": 5953 }, { "epoch": 0.4414621487358197, "grad_norm": 0.3489420711994171, "learning_rate": 0.00014049512378094524, "loss": 0.4969, "step": 5954 }, { "epoch": 0.4415362942092385, "grad_norm": 0.3807731866836548, "learning_rate": 0.0001404851212803201, "loss": 0.5116, "step": 5955 }, { "epoch": 0.4416104396826574, "grad_norm": 0.38212233781814575, "learning_rate": 0.00014047511877969491, "loss": 0.5069, "step": 5956 }, { "epoch": 0.4416845851560762, "grad_norm": 0.3566282391548157, "learning_rate": 0.00014046511627906978, "loss": 0.4907, "step": 5957 }, { "epoch": 0.44175873062949506, "grad_norm": 0.39055198431015015, "learning_rate": 0.00014045511377844462, "loss": 0.5342, "step": 5958 }, { "epoch": 0.44183287610291394, "grad_norm": 0.33370789885520935, "learning_rate": 0.00014044511127781945, "loss": 0.4939, "step": 5959 }, { "epoch": 0.44190702157633277, "grad_norm": 0.3732883632183075, "learning_rate": 0.0001404351087771943, "loss": 0.5163, "step": 5960 }, { "epoch": 0.4419811670497516, "grad_norm": 0.36549362540245056, "learning_rate": 0.00014042510627656915, "loss": 0.5184, "step": 5961 }, { "epoch": 0.4420553125231705, "grad_norm": 0.35846373438835144, "learning_rate": 0.000140415103775944, "loss": 0.5382, "step": 5962 }, { "epoch": 0.4421294579965893, "grad_norm": 0.35038211941719055, "learning_rate": 0.00014040510127531883, "loss": 0.5043, "step": 5963 }, { "epoch": 0.44220360347000814, "grad_norm": 0.3517366051673889, "learning_rate": 0.00014039509877469367, "loss": 0.4897, "step": 5964 }, { "epoch": 0.44227774894342703, "grad_norm": 0.3602326512336731, "learning_rate": 0.00014038509627406853, "loss": 0.5166, "step": 5965 }, { "epoch": 0.44235189441684586, "grad_norm": 0.35004180669784546, "learning_rate": 0.00014037509377344337, "loss": 0.5097, "step": 5966 }, { "epoch": 0.4424260398902647, "grad_norm": 0.35688212513923645, "learning_rate": 0.0001403650912728182, "loss": 0.5028, "step": 5967 }, { "epoch": 0.4425001853636836, "grad_norm": 0.32430094480514526, "learning_rate": 0.00014035508877219307, "loss": 0.4774, "step": 5968 }, { "epoch": 0.4425743308371024, "grad_norm": 0.3682028651237488, "learning_rate": 0.0001403450862715679, "loss": 0.5816, "step": 5969 }, { "epoch": 0.44264847631052123, "grad_norm": 0.36073368787765503, "learning_rate": 0.00014033508377094274, "loss": 0.5467, "step": 5970 }, { "epoch": 0.4427226217839401, "grad_norm": 0.3599269986152649, "learning_rate": 0.00014032508127031758, "loss": 0.5398, "step": 5971 }, { "epoch": 0.44279676725735895, "grad_norm": 0.3478601276874542, "learning_rate": 0.00014031507876969244, "loss": 0.4898, "step": 5972 }, { "epoch": 0.4428709127307778, "grad_norm": 0.3733583390712738, "learning_rate": 0.00014030507626906728, "loss": 0.5365, "step": 5973 }, { "epoch": 0.4429450582041966, "grad_norm": 0.39190205931663513, "learning_rate": 0.00014029507376844212, "loss": 0.5345, "step": 5974 }, { "epoch": 0.4430192036776155, "grad_norm": 0.3586801290512085, "learning_rate": 0.00014028507126781695, "loss": 0.505, "step": 5975 }, { "epoch": 0.4430933491510343, "grad_norm": 0.3495999574661255, "learning_rate": 0.00014027506876719182, "loss": 0.5264, "step": 5976 }, { "epoch": 0.44316749462445315, "grad_norm": 0.3424350321292877, "learning_rate": 0.00014026506626656666, "loss": 0.4996, "step": 5977 }, { "epoch": 0.44324164009787204, "grad_norm": 0.3528473377227783, "learning_rate": 0.0001402550637659415, "loss": 0.5456, "step": 5978 }, { "epoch": 0.44331578557129087, "grad_norm": 0.3525545299053192, "learning_rate": 0.00014024506126531633, "loss": 0.5224, "step": 5979 }, { "epoch": 0.4433899310447097, "grad_norm": 0.35573703050613403, "learning_rate": 0.0001402350587646912, "loss": 0.5217, "step": 5980 }, { "epoch": 0.4434640765181286, "grad_norm": 0.36633116006851196, "learning_rate": 0.000140225056264066, "loss": 0.5256, "step": 5981 }, { "epoch": 0.4435382219915474, "grad_norm": 0.3661402463912964, "learning_rate": 0.00014021505376344087, "loss": 0.5452, "step": 5982 }, { "epoch": 0.44361236746496624, "grad_norm": 0.3520137071609497, "learning_rate": 0.0001402050512628157, "loss": 0.4885, "step": 5983 }, { "epoch": 0.4436865129383851, "grad_norm": 0.33125272393226624, "learning_rate": 0.00014019504876219057, "loss": 0.4958, "step": 5984 }, { "epoch": 0.44376065841180395, "grad_norm": 0.34634360671043396, "learning_rate": 0.00014018504626156538, "loss": 0.5069, "step": 5985 }, { "epoch": 0.4438348038852228, "grad_norm": 0.3993016481399536, "learning_rate": 0.00014017504376094024, "loss": 0.5443, "step": 5986 }, { "epoch": 0.44390894935864167, "grad_norm": 0.36959800124168396, "learning_rate": 0.00014016504126031508, "loss": 0.4997, "step": 5987 }, { "epoch": 0.4439830948320605, "grad_norm": 0.3718577027320862, "learning_rate": 0.00014015503875968995, "loss": 0.4951, "step": 5988 }, { "epoch": 0.44405724030547933, "grad_norm": 0.3420746326446533, "learning_rate": 0.00014014503625906476, "loss": 0.5105, "step": 5989 }, { "epoch": 0.4441313857788982, "grad_norm": 0.34687307476997375, "learning_rate": 0.00014013503375843962, "loss": 0.4776, "step": 5990 }, { "epoch": 0.44420553125231704, "grad_norm": 0.3604744076728821, "learning_rate": 0.00014012503125781446, "loss": 0.5246, "step": 5991 }, { "epoch": 0.4442796767257359, "grad_norm": 0.35398539900779724, "learning_rate": 0.00014011502875718932, "loss": 0.4775, "step": 5992 }, { "epoch": 0.44435382219915476, "grad_norm": 0.3545844554901123, "learning_rate": 0.00014010502625656413, "loss": 0.4984, "step": 5993 }, { "epoch": 0.4444279676725736, "grad_norm": 0.3518257439136505, "learning_rate": 0.000140095023755939, "loss": 0.5078, "step": 5994 }, { "epoch": 0.4445021131459924, "grad_norm": 0.3656350374221802, "learning_rate": 0.00014008502125531383, "loss": 0.485, "step": 5995 }, { "epoch": 0.4445762586194113, "grad_norm": 0.34465456008911133, "learning_rate": 0.00014007501875468867, "loss": 0.4953, "step": 5996 }, { "epoch": 0.44465040409283013, "grad_norm": 0.3767096996307373, "learning_rate": 0.0001400650162540635, "loss": 0.4956, "step": 5997 }, { "epoch": 0.44472454956624896, "grad_norm": 0.3863239884376526, "learning_rate": 0.00014005501375343837, "loss": 0.5186, "step": 5998 }, { "epoch": 0.44479869503966785, "grad_norm": 0.34915661811828613, "learning_rate": 0.0001400450112528132, "loss": 0.4811, "step": 5999 }, { "epoch": 0.4448728405130867, "grad_norm": 0.3644023835659027, "learning_rate": 0.00014003500875218804, "loss": 0.53, "step": 6000 }, { "epoch": 0.4449469859865055, "grad_norm": 0.36781415343284607, "learning_rate": 0.0001400250062515629, "loss": 0.5061, "step": 6001 }, { "epoch": 0.4450211314599244, "grad_norm": 0.37486913800239563, "learning_rate": 0.00014001500375093775, "loss": 0.4948, "step": 6002 }, { "epoch": 0.4450952769333432, "grad_norm": 0.3491513431072235, "learning_rate": 0.00014000500125031258, "loss": 0.4853, "step": 6003 }, { "epoch": 0.44516942240676205, "grad_norm": 0.3717685043811798, "learning_rate": 0.00013999499874968742, "loss": 0.5345, "step": 6004 }, { "epoch": 0.44524356788018093, "grad_norm": 0.35514238476753235, "learning_rate": 0.00013998499624906228, "loss": 0.4843, "step": 6005 }, { "epoch": 0.44531771335359976, "grad_norm": 0.3424520790576935, "learning_rate": 0.00013997499374843712, "loss": 0.4749, "step": 6006 }, { "epoch": 0.4453918588270186, "grad_norm": 0.3430659770965576, "learning_rate": 0.00013996499124781196, "loss": 0.5237, "step": 6007 }, { "epoch": 0.4454660043004375, "grad_norm": 0.3601722717285156, "learning_rate": 0.0001399549887471868, "loss": 0.4899, "step": 6008 }, { "epoch": 0.4455401497738563, "grad_norm": 0.34917473793029785, "learning_rate": 0.00013994498624656166, "loss": 0.5079, "step": 6009 }, { "epoch": 0.44561429524727514, "grad_norm": 0.36029258370399475, "learning_rate": 0.0001399349837459365, "loss": 0.5072, "step": 6010 }, { "epoch": 0.445688440720694, "grad_norm": 0.3655228614807129, "learning_rate": 0.00013992498124531133, "loss": 0.5361, "step": 6011 }, { "epoch": 0.44576258619411285, "grad_norm": 0.37276700139045715, "learning_rate": 0.00013991497874468617, "loss": 0.4976, "step": 6012 }, { "epoch": 0.4458367316675317, "grad_norm": 0.36007630825042725, "learning_rate": 0.00013990497624406103, "loss": 0.5108, "step": 6013 }, { "epoch": 0.44591087714095057, "grad_norm": 0.34819498658180237, "learning_rate": 0.00013989497374343587, "loss": 0.5282, "step": 6014 }, { "epoch": 0.4459850226143694, "grad_norm": 0.3518850803375244, "learning_rate": 0.0001398849712428107, "loss": 0.5274, "step": 6015 }, { "epoch": 0.4460591680877882, "grad_norm": 0.3472367823123932, "learning_rate": 0.00013987496874218555, "loss": 0.5217, "step": 6016 }, { "epoch": 0.4461333135612071, "grad_norm": 0.3533244729042053, "learning_rate": 0.0001398649662415604, "loss": 0.5521, "step": 6017 }, { "epoch": 0.44620745903462594, "grad_norm": 0.3577701449394226, "learning_rate": 0.00013985496374093522, "loss": 0.4948, "step": 6018 }, { "epoch": 0.44628160450804477, "grad_norm": 0.3529660701751709, "learning_rate": 0.00013984496124031008, "loss": 0.5541, "step": 6019 }, { "epoch": 0.44635574998146366, "grad_norm": 0.3428351581096649, "learning_rate": 0.00013983495873968492, "loss": 0.52, "step": 6020 }, { "epoch": 0.4464298954548825, "grad_norm": 0.37322455644607544, "learning_rate": 0.00013982495623905979, "loss": 0.5336, "step": 6021 }, { "epoch": 0.4465040409283013, "grad_norm": 0.3422256112098694, "learning_rate": 0.0001398149537384346, "loss": 0.4876, "step": 6022 }, { "epoch": 0.4465781864017202, "grad_norm": 0.37490010261535645, "learning_rate": 0.00013980495123780946, "loss": 0.5258, "step": 6023 }, { "epoch": 0.44665233187513903, "grad_norm": 0.35858505964279175, "learning_rate": 0.0001397949487371843, "loss": 0.5095, "step": 6024 }, { "epoch": 0.44672647734855786, "grad_norm": 0.3800572454929352, "learning_rate": 0.00013978494623655916, "loss": 0.5429, "step": 6025 }, { "epoch": 0.44680062282197675, "grad_norm": 0.3621794283390045, "learning_rate": 0.00013977494373593397, "loss": 0.5299, "step": 6026 }, { "epoch": 0.4468747682953956, "grad_norm": 0.35545557737350464, "learning_rate": 0.00013976494123530884, "loss": 0.5307, "step": 6027 }, { "epoch": 0.4469489137688144, "grad_norm": 0.37408682703971863, "learning_rate": 0.00013975493873468367, "loss": 0.5379, "step": 6028 }, { "epoch": 0.4470230592422333, "grad_norm": 0.38016632199287415, "learning_rate": 0.00013974493623405854, "loss": 0.5407, "step": 6029 }, { "epoch": 0.4470972047156521, "grad_norm": 0.35201555490493774, "learning_rate": 0.00013973493373343335, "loss": 0.5175, "step": 6030 }, { "epoch": 0.44717135018907095, "grad_norm": 0.35836362838745117, "learning_rate": 0.0001397249312328082, "loss": 0.5383, "step": 6031 }, { "epoch": 0.4472454956624898, "grad_norm": 0.3489021956920624, "learning_rate": 0.00013971492873218305, "loss": 0.4872, "step": 6032 }, { "epoch": 0.44731964113590866, "grad_norm": 0.37305670976638794, "learning_rate": 0.00013970492623155789, "loss": 0.5306, "step": 6033 }, { "epoch": 0.4473937866093275, "grad_norm": 0.3539618253707886, "learning_rate": 0.00013969492373093275, "loss": 0.5072, "step": 6034 }, { "epoch": 0.4474679320827463, "grad_norm": 0.3517804741859436, "learning_rate": 0.0001396849212303076, "loss": 0.5112, "step": 6035 }, { "epoch": 0.4475420775561652, "grad_norm": 0.33018118143081665, "learning_rate": 0.00013967491872968242, "loss": 0.4757, "step": 6036 }, { "epoch": 0.44761622302958404, "grad_norm": 0.33588534593582153, "learning_rate": 0.00013966491622905726, "loss": 0.4826, "step": 6037 }, { "epoch": 0.44769036850300287, "grad_norm": 0.3429635167121887, "learning_rate": 0.00013965491372843212, "loss": 0.4906, "step": 6038 }, { "epoch": 0.44776451397642175, "grad_norm": 0.3393740653991699, "learning_rate": 0.00013964491122780696, "loss": 0.4914, "step": 6039 }, { "epoch": 0.4478386594498406, "grad_norm": 0.35223105549812317, "learning_rate": 0.0001396349087271818, "loss": 0.4873, "step": 6040 }, { "epoch": 0.4479128049232594, "grad_norm": 0.3425414264202118, "learning_rate": 0.00013962490622655664, "loss": 0.5094, "step": 6041 }, { "epoch": 0.4479869503966783, "grad_norm": 0.369923859834671, "learning_rate": 0.0001396149037259315, "loss": 0.5244, "step": 6042 }, { "epoch": 0.4480610958700971, "grad_norm": 0.3499971330165863, "learning_rate": 0.00013960490122530634, "loss": 0.5279, "step": 6043 }, { "epoch": 0.44813524134351596, "grad_norm": 0.3499656319618225, "learning_rate": 0.00013959489872468117, "loss": 0.5092, "step": 6044 }, { "epoch": 0.44820938681693484, "grad_norm": 0.37282824516296387, "learning_rate": 0.000139584896224056, "loss": 0.5421, "step": 6045 }, { "epoch": 0.44828353229035367, "grad_norm": 0.37012094259262085, "learning_rate": 0.00013957489372343088, "loss": 0.5379, "step": 6046 }, { "epoch": 0.4483576777637725, "grad_norm": 0.35518714785575867, "learning_rate": 0.0001395648912228057, "loss": 0.536, "step": 6047 }, { "epoch": 0.4484318232371914, "grad_norm": 0.355271577835083, "learning_rate": 0.00013955488872218055, "loss": 0.5138, "step": 6048 }, { "epoch": 0.4485059687106102, "grad_norm": 0.34308403730392456, "learning_rate": 0.0001395448862215554, "loss": 0.4939, "step": 6049 }, { "epoch": 0.44858011418402904, "grad_norm": 0.35449379682540894, "learning_rate": 0.00013953488372093025, "loss": 0.5355, "step": 6050 }, { "epoch": 0.44865425965744793, "grad_norm": 0.36554473638534546, "learning_rate": 0.0001395248812203051, "loss": 0.5371, "step": 6051 }, { "epoch": 0.44872840513086676, "grad_norm": 0.3710390329360962, "learning_rate": 0.00013951487871967993, "loss": 0.5008, "step": 6052 }, { "epoch": 0.4488025506042856, "grad_norm": 0.34797707200050354, "learning_rate": 0.00013950487621905476, "loss": 0.4928, "step": 6053 }, { "epoch": 0.4488766960777045, "grad_norm": 0.3544122278690338, "learning_rate": 0.00013949487371842963, "loss": 0.5058, "step": 6054 }, { "epoch": 0.4489508415511233, "grad_norm": 0.37233108282089233, "learning_rate": 0.00013948487121780444, "loss": 0.5042, "step": 6055 }, { "epoch": 0.44902498702454213, "grad_norm": 0.37828516960144043, "learning_rate": 0.0001394748687171793, "loss": 0.507, "step": 6056 }, { "epoch": 0.449099132497961, "grad_norm": 0.35935208201408386, "learning_rate": 0.00013946486621655414, "loss": 0.48, "step": 6057 }, { "epoch": 0.44917327797137985, "grad_norm": 0.3739432990550995, "learning_rate": 0.000139454863715929, "loss": 0.5413, "step": 6058 }, { "epoch": 0.4492474234447987, "grad_norm": 0.36761677265167236, "learning_rate": 0.0001394448612153038, "loss": 0.563, "step": 6059 }, { "epoch": 0.44932156891821756, "grad_norm": 0.35509002208709717, "learning_rate": 0.00013943485871467868, "loss": 0.5174, "step": 6060 }, { "epoch": 0.4493957143916364, "grad_norm": 0.3639644682407379, "learning_rate": 0.0001394248562140535, "loss": 0.5422, "step": 6061 }, { "epoch": 0.4494698598650552, "grad_norm": 0.32326099276542664, "learning_rate": 0.00013941485371342838, "loss": 0.4886, "step": 6062 }, { "epoch": 0.4495440053384741, "grad_norm": 0.3404315710067749, "learning_rate": 0.0001394048512128032, "loss": 0.4929, "step": 6063 }, { "epoch": 0.44961815081189294, "grad_norm": 0.34649187326431274, "learning_rate": 0.00013939484871217805, "loss": 0.495, "step": 6064 }, { "epoch": 0.44969229628531177, "grad_norm": 0.3294244110584259, "learning_rate": 0.0001393848462115529, "loss": 0.4957, "step": 6065 }, { "epoch": 0.44976644175873065, "grad_norm": 0.3862980008125305, "learning_rate": 0.00013937484371092775, "loss": 0.5227, "step": 6066 }, { "epoch": 0.4498405872321495, "grad_norm": 0.367343008518219, "learning_rate": 0.0001393648412103026, "loss": 0.529, "step": 6067 }, { "epoch": 0.4499147327055683, "grad_norm": 0.3558178246021271, "learning_rate": 0.00013935483870967743, "loss": 0.509, "step": 6068 }, { "epoch": 0.4499888781789872, "grad_norm": 0.3481386601924896, "learning_rate": 0.00013934483620905226, "loss": 0.506, "step": 6069 }, { "epoch": 0.450063023652406, "grad_norm": 0.3568516671657562, "learning_rate": 0.0001393348337084271, "loss": 0.5354, "step": 6070 }, { "epoch": 0.45013716912582485, "grad_norm": 0.36717692017555237, "learning_rate": 0.00013932483120780197, "loss": 0.4705, "step": 6071 }, { "epoch": 0.45021131459924374, "grad_norm": 0.36991745233535767, "learning_rate": 0.0001393148287071768, "loss": 0.5304, "step": 6072 }, { "epoch": 0.45028546007266257, "grad_norm": 0.3574371039867401, "learning_rate": 0.00013930482620655164, "loss": 0.537, "step": 6073 }, { "epoch": 0.4503596055460814, "grad_norm": 0.367008775472641, "learning_rate": 0.00013929482370592648, "loss": 0.488, "step": 6074 }, { "epoch": 0.4504337510195003, "grad_norm": 0.3584689497947693, "learning_rate": 0.00013928482120530134, "loss": 0.4892, "step": 6075 }, { "epoch": 0.4505078964929191, "grad_norm": 0.33398449420928955, "learning_rate": 0.00013927481870467618, "loss": 0.478, "step": 6076 }, { "epoch": 0.45058204196633794, "grad_norm": 0.34777191281318665, "learning_rate": 0.00013926481620405104, "loss": 0.5044, "step": 6077 }, { "epoch": 0.45065618743975683, "grad_norm": 0.37110039591789246, "learning_rate": 0.00013925481370342585, "loss": 0.5074, "step": 6078 }, { "epoch": 0.45073033291317566, "grad_norm": 0.349420964717865, "learning_rate": 0.00013924481120280072, "loss": 0.497, "step": 6079 }, { "epoch": 0.4508044783865945, "grad_norm": 0.3454957604408264, "learning_rate": 0.00013923480870217555, "loss": 0.4829, "step": 6080 }, { "epoch": 0.45087862386001337, "grad_norm": 0.35292160511016846, "learning_rate": 0.0001392248062015504, "loss": 0.4939, "step": 6081 }, { "epoch": 0.4509527693334322, "grad_norm": 0.37174123525619507, "learning_rate": 0.00013921480370092523, "loss": 0.5408, "step": 6082 }, { "epoch": 0.45102691480685103, "grad_norm": 0.37629273533821106, "learning_rate": 0.0001392048012003001, "loss": 0.5027, "step": 6083 }, { "epoch": 0.4511010602802699, "grad_norm": 0.38619759678840637, "learning_rate": 0.00013919479869967493, "loss": 0.5593, "step": 6084 }, { "epoch": 0.45117520575368875, "grad_norm": 0.34324538707733154, "learning_rate": 0.00013918479619904977, "loss": 0.4873, "step": 6085 }, { "epoch": 0.4512493512271076, "grad_norm": 0.3786071240901947, "learning_rate": 0.0001391747936984246, "loss": 0.5252, "step": 6086 }, { "epoch": 0.4513234967005264, "grad_norm": 0.360135018825531, "learning_rate": 0.00013916479119779947, "loss": 0.5068, "step": 6087 }, { "epoch": 0.4513976421739453, "grad_norm": 0.3714231252670288, "learning_rate": 0.0001391547886971743, "loss": 0.5134, "step": 6088 }, { "epoch": 0.4514717876473641, "grad_norm": 0.3467938005924225, "learning_rate": 0.00013914478619654914, "loss": 0.4756, "step": 6089 }, { "epoch": 0.45154593312078295, "grad_norm": 0.39044806361198425, "learning_rate": 0.00013913478369592398, "loss": 0.5244, "step": 6090 }, { "epoch": 0.45162007859420183, "grad_norm": 0.3666701316833496, "learning_rate": 0.00013912478119529884, "loss": 0.57, "step": 6091 }, { "epoch": 0.45169422406762066, "grad_norm": 0.3572736084461212, "learning_rate": 0.00013911477869467365, "loss": 0.511, "step": 6092 }, { "epoch": 0.4517683695410395, "grad_norm": 0.4170839488506317, "learning_rate": 0.00013910477619404852, "loss": 0.5368, "step": 6093 }, { "epoch": 0.4518425150144584, "grad_norm": 0.35615167021751404, "learning_rate": 0.00013909477369342335, "loss": 0.5126, "step": 6094 }, { "epoch": 0.4519166604878772, "grad_norm": 0.3434775769710541, "learning_rate": 0.00013908477119279822, "loss": 0.4868, "step": 6095 }, { "epoch": 0.45199080596129604, "grad_norm": 0.3459241986274719, "learning_rate": 0.00013907476869217303, "loss": 0.4984, "step": 6096 }, { "epoch": 0.4520649514347149, "grad_norm": 0.3578900396823883, "learning_rate": 0.0001390647661915479, "loss": 0.5083, "step": 6097 }, { "epoch": 0.45213909690813375, "grad_norm": 0.35885727405548096, "learning_rate": 0.00013905476369092273, "loss": 0.514, "step": 6098 }, { "epoch": 0.4522132423815526, "grad_norm": 0.35854703187942505, "learning_rate": 0.0001390447611902976, "loss": 0.5202, "step": 6099 }, { "epoch": 0.45228738785497147, "grad_norm": 0.3815547525882721, "learning_rate": 0.00013903475868967243, "loss": 0.5152, "step": 6100 }, { "epoch": 0.4523615333283903, "grad_norm": 0.3541218638420105, "learning_rate": 0.00013902475618904727, "loss": 0.5149, "step": 6101 }, { "epoch": 0.4524356788018091, "grad_norm": 0.36035895347595215, "learning_rate": 0.0001390147536884221, "loss": 0.5241, "step": 6102 }, { "epoch": 0.452509824275228, "grad_norm": 0.37207940220832825, "learning_rate": 0.00013900475118779697, "loss": 0.4864, "step": 6103 }, { "epoch": 0.45258396974864684, "grad_norm": 0.3524438738822937, "learning_rate": 0.0001389947486871718, "loss": 0.5045, "step": 6104 }, { "epoch": 0.45265811522206567, "grad_norm": 0.36215242743492126, "learning_rate": 0.00013898474618654664, "loss": 0.5495, "step": 6105 }, { "epoch": 0.45273226069548456, "grad_norm": 0.3487512469291687, "learning_rate": 0.00013897474368592148, "loss": 0.5196, "step": 6106 }, { "epoch": 0.4528064061689034, "grad_norm": 0.35316577553749084, "learning_rate": 0.00013896474118529632, "loss": 0.4935, "step": 6107 }, { "epoch": 0.4528805516423222, "grad_norm": 0.3506993353366852, "learning_rate": 0.00013895473868467118, "loss": 0.5235, "step": 6108 }, { "epoch": 0.4529546971157411, "grad_norm": 0.3598221242427826, "learning_rate": 0.00013894473618404602, "loss": 0.5205, "step": 6109 }, { "epoch": 0.45302884258915993, "grad_norm": 0.34521618485450745, "learning_rate": 0.00013893473368342088, "loss": 0.5047, "step": 6110 }, { "epoch": 0.45310298806257876, "grad_norm": 0.36984413862228394, "learning_rate": 0.0001389247311827957, "loss": 0.5026, "step": 6111 }, { "epoch": 0.45317713353599764, "grad_norm": 0.3575895428657532, "learning_rate": 0.00013891472868217056, "loss": 0.5267, "step": 6112 }, { "epoch": 0.4532512790094165, "grad_norm": 0.34999987483024597, "learning_rate": 0.0001389047261815454, "loss": 0.4728, "step": 6113 }, { "epoch": 0.4533254244828353, "grad_norm": 0.33895543217658997, "learning_rate": 0.00013889472368092026, "loss": 0.4826, "step": 6114 }, { "epoch": 0.4533995699562542, "grad_norm": 0.3754728436470032, "learning_rate": 0.00013888472118029507, "loss": 0.5575, "step": 6115 }, { "epoch": 0.453473715429673, "grad_norm": 0.3734983801841736, "learning_rate": 0.00013887471867966993, "loss": 0.5181, "step": 6116 }, { "epoch": 0.45354786090309185, "grad_norm": 0.37768229842185974, "learning_rate": 0.00013886471617904477, "loss": 0.5561, "step": 6117 }, { "epoch": 0.45362200637651073, "grad_norm": 0.40374359488487244, "learning_rate": 0.0001388547136784196, "loss": 0.5435, "step": 6118 }, { "epoch": 0.45369615184992956, "grad_norm": 0.36970582604408264, "learning_rate": 0.00013884471117779444, "loss": 0.529, "step": 6119 }, { "epoch": 0.4537702973233484, "grad_norm": 0.3712885081768036, "learning_rate": 0.0001388347086771693, "loss": 0.5433, "step": 6120 }, { "epoch": 0.4538444427967673, "grad_norm": 0.3730667531490326, "learning_rate": 0.00013882470617654415, "loss": 0.5123, "step": 6121 }, { "epoch": 0.4539185882701861, "grad_norm": 0.3635011613368988, "learning_rate": 0.00013881470367591898, "loss": 0.5125, "step": 6122 }, { "epoch": 0.45399273374360494, "grad_norm": 0.39688295125961304, "learning_rate": 0.00013880470117529382, "loss": 0.544, "step": 6123 }, { "epoch": 0.4540668792170238, "grad_norm": 0.3706457018852234, "learning_rate": 0.00013879469867466868, "loss": 0.5246, "step": 6124 }, { "epoch": 0.45414102469044265, "grad_norm": 0.3590751588344574, "learning_rate": 0.00013878469617404352, "loss": 0.4693, "step": 6125 }, { "epoch": 0.4542151701638615, "grad_norm": 0.36044806241989136, "learning_rate": 0.00013877469367341836, "loss": 0.5119, "step": 6126 }, { "epoch": 0.45428931563728037, "grad_norm": 0.36903825402259827, "learning_rate": 0.0001387646911727932, "loss": 0.4945, "step": 6127 }, { "epoch": 0.4543634611106992, "grad_norm": 0.3645397126674652, "learning_rate": 0.00013875468867216806, "loss": 0.5009, "step": 6128 }, { "epoch": 0.454437606584118, "grad_norm": 0.3593982458114624, "learning_rate": 0.00013874468617154287, "loss": 0.5301, "step": 6129 }, { "epoch": 0.4545117520575369, "grad_norm": 0.3638302981853485, "learning_rate": 0.00013873468367091773, "loss": 0.5114, "step": 6130 }, { "epoch": 0.45458589753095574, "grad_norm": 0.3821985125541687, "learning_rate": 0.00013872468117029257, "loss": 0.5253, "step": 6131 }, { "epoch": 0.45466004300437457, "grad_norm": 0.35320809483528137, "learning_rate": 0.00013871467866966743, "loss": 0.4773, "step": 6132 }, { "epoch": 0.45473418847779346, "grad_norm": 0.37482544779777527, "learning_rate": 0.00013870467616904224, "loss": 0.5135, "step": 6133 }, { "epoch": 0.4548083339512123, "grad_norm": 0.3662591278553009, "learning_rate": 0.0001386946736684171, "loss": 0.5323, "step": 6134 }, { "epoch": 0.4548824794246311, "grad_norm": 0.348117858171463, "learning_rate": 0.00013868467116779195, "loss": 0.4918, "step": 6135 }, { "epoch": 0.45495662489805, "grad_norm": 0.3732987344264984, "learning_rate": 0.0001386746686671668, "loss": 0.5373, "step": 6136 }, { "epoch": 0.45503077037146883, "grad_norm": 0.3820822238922119, "learning_rate": 0.00013866466616654165, "loss": 0.5647, "step": 6137 }, { "epoch": 0.45510491584488766, "grad_norm": 0.3749089241027832, "learning_rate": 0.00013865466366591648, "loss": 0.536, "step": 6138 }, { "epoch": 0.45517906131830654, "grad_norm": 0.3596203029155731, "learning_rate": 0.00013864466116529132, "loss": 0.5266, "step": 6139 }, { "epoch": 0.4552532067917254, "grad_norm": 0.3609651029109955, "learning_rate": 0.00013863465866466619, "loss": 0.5346, "step": 6140 }, { "epoch": 0.4553273522651442, "grad_norm": 0.35271698236465454, "learning_rate": 0.00013862465616404102, "loss": 0.4747, "step": 6141 }, { "epoch": 0.4554014977385631, "grad_norm": 0.36107271909713745, "learning_rate": 0.00013861465366341586, "loss": 0.5153, "step": 6142 }, { "epoch": 0.4554756432119819, "grad_norm": 0.33628806471824646, "learning_rate": 0.00013860465116279072, "loss": 0.5191, "step": 6143 }, { "epoch": 0.45554978868540075, "grad_norm": 0.3323644995689392, "learning_rate": 0.00013859464866216553, "loss": 0.4815, "step": 6144 }, { "epoch": 0.4556239341588196, "grad_norm": 0.3735108971595764, "learning_rate": 0.0001385846461615404, "loss": 0.5421, "step": 6145 }, { "epoch": 0.45569807963223846, "grad_norm": 0.3476869463920593, "learning_rate": 0.00013857464366091524, "loss": 0.5422, "step": 6146 }, { "epoch": 0.4557722251056573, "grad_norm": 0.3503420948982239, "learning_rate": 0.0001385646411602901, "loss": 0.5063, "step": 6147 }, { "epoch": 0.4558463705790761, "grad_norm": 0.33795708417892456, "learning_rate": 0.0001385546386596649, "loss": 0.4744, "step": 6148 }, { "epoch": 0.455920516052495, "grad_norm": 0.3289434611797333, "learning_rate": 0.00013854463615903977, "loss": 0.477, "step": 6149 }, { "epoch": 0.45599466152591384, "grad_norm": 0.3647271394729614, "learning_rate": 0.0001385346336584146, "loss": 0.535, "step": 6150 }, { "epoch": 0.45606880699933267, "grad_norm": 0.3689267039299011, "learning_rate": 0.00013852463115778947, "loss": 0.5582, "step": 6151 }, { "epoch": 0.45614295247275155, "grad_norm": 0.3568647503852844, "learning_rate": 0.00013851462865716428, "loss": 0.4911, "step": 6152 }, { "epoch": 0.4562170979461704, "grad_norm": 0.3411670923233032, "learning_rate": 0.00013850462615653915, "loss": 0.4744, "step": 6153 }, { "epoch": 0.4562912434195892, "grad_norm": 0.35540568828582764, "learning_rate": 0.00013849462365591399, "loss": 0.5035, "step": 6154 }, { "epoch": 0.4563653888930081, "grad_norm": 0.34684163331985474, "learning_rate": 0.00013848462115528882, "loss": 0.4892, "step": 6155 }, { "epoch": 0.4564395343664269, "grad_norm": 0.36853843927383423, "learning_rate": 0.00013847461865466366, "loss": 0.4911, "step": 6156 }, { "epoch": 0.45651367983984575, "grad_norm": 0.3607867360115051, "learning_rate": 0.00013846461615403852, "loss": 0.5015, "step": 6157 }, { "epoch": 0.45658782531326464, "grad_norm": 0.39895036816596985, "learning_rate": 0.00013845461365341336, "loss": 0.5229, "step": 6158 }, { "epoch": 0.45666197078668347, "grad_norm": 0.3798406422138214, "learning_rate": 0.0001384446111527882, "loss": 0.5356, "step": 6159 }, { "epoch": 0.4567361162601023, "grad_norm": 0.3772999048233032, "learning_rate": 0.00013843460865216304, "loss": 0.508, "step": 6160 }, { "epoch": 0.4568102617335212, "grad_norm": 0.32874706387519836, "learning_rate": 0.0001384246061515379, "loss": 0.4761, "step": 6161 }, { "epoch": 0.45688440720694, "grad_norm": 0.3502436578273773, "learning_rate": 0.00013841460365091274, "loss": 0.5002, "step": 6162 }, { "epoch": 0.45695855268035884, "grad_norm": 0.3534312844276428, "learning_rate": 0.00013840460115028757, "loss": 0.5106, "step": 6163 }, { "epoch": 0.45703269815377773, "grad_norm": 0.3846338093280792, "learning_rate": 0.0001383945986496624, "loss": 0.5155, "step": 6164 }, { "epoch": 0.45710684362719656, "grad_norm": 0.35170382261276245, "learning_rate": 0.00013838459614903728, "loss": 0.5369, "step": 6165 }, { "epoch": 0.4571809891006154, "grad_norm": 0.3574081361293793, "learning_rate": 0.00013837459364841209, "loss": 0.4963, "step": 6166 }, { "epoch": 0.45725513457403427, "grad_norm": 0.35088402032852173, "learning_rate": 0.00013836459114778695, "loss": 0.5435, "step": 6167 }, { "epoch": 0.4573292800474531, "grad_norm": 0.35212278366088867, "learning_rate": 0.0001383545886471618, "loss": 0.5124, "step": 6168 }, { "epoch": 0.45740342552087193, "grad_norm": 0.38025110960006714, "learning_rate": 0.00013834458614653665, "loss": 0.5452, "step": 6169 }, { "epoch": 0.4574775709942908, "grad_norm": 0.34476104378700256, "learning_rate": 0.0001383345836459115, "loss": 0.4909, "step": 6170 }, { "epoch": 0.45755171646770965, "grad_norm": 0.344465047121048, "learning_rate": 0.00013832458114528633, "loss": 0.5154, "step": 6171 }, { "epoch": 0.4576258619411285, "grad_norm": 0.347419410943985, "learning_rate": 0.00013831457864466116, "loss": 0.5386, "step": 6172 }, { "epoch": 0.45770000741454736, "grad_norm": 0.3510735332965851, "learning_rate": 0.00013830457614403603, "loss": 0.5084, "step": 6173 }, { "epoch": 0.4577741528879662, "grad_norm": 0.3799515962600708, "learning_rate": 0.00013829457364341086, "loss": 0.5426, "step": 6174 }, { "epoch": 0.457848298361385, "grad_norm": 0.35376402735710144, "learning_rate": 0.0001382845711427857, "loss": 0.5512, "step": 6175 }, { "epoch": 0.4579224438348039, "grad_norm": 0.3575175106525421, "learning_rate": 0.00013827456864216056, "loss": 0.5289, "step": 6176 }, { "epoch": 0.45799658930822273, "grad_norm": 0.3467923700809479, "learning_rate": 0.0001382645661415354, "loss": 0.4757, "step": 6177 }, { "epoch": 0.45807073478164156, "grad_norm": 0.35645073652267456, "learning_rate": 0.00013825456364091024, "loss": 0.4865, "step": 6178 }, { "epoch": 0.45814488025506045, "grad_norm": 0.3715612292289734, "learning_rate": 0.00013824456114028508, "loss": 0.4884, "step": 6179 }, { "epoch": 0.4582190257284793, "grad_norm": 0.33473342657089233, "learning_rate": 0.00013823455863965994, "loss": 0.5113, "step": 6180 }, { "epoch": 0.4582931712018981, "grad_norm": 0.345164954662323, "learning_rate": 0.00013822455613903475, "loss": 0.5022, "step": 6181 }, { "epoch": 0.458367316675317, "grad_norm": 0.33755046129226685, "learning_rate": 0.00013821455363840961, "loss": 0.4902, "step": 6182 }, { "epoch": 0.4584414621487358, "grad_norm": 0.3686865568161011, "learning_rate": 0.00013820455113778445, "loss": 0.5021, "step": 6183 }, { "epoch": 0.45851560762215465, "grad_norm": 0.393155574798584, "learning_rate": 0.00013819454863715932, "loss": 0.5491, "step": 6184 }, { "epoch": 0.45858975309557354, "grad_norm": 0.37022310495376587, "learning_rate": 0.00013818454613653413, "loss": 0.5662, "step": 6185 }, { "epoch": 0.45866389856899237, "grad_norm": 0.3722997307777405, "learning_rate": 0.000138174543635909, "loss": 0.5398, "step": 6186 }, { "epoch": 0.4587380440424112, "grad_norm": 0.3467787206172943, "learning_rate": 0.00013816454113528383, "loss": 0.5181, "step": 6187 }, { "epoch": 0.4588121895158301, "grad_norm": 0.3692983388900757, "learning_rate": 0.0001381545386346587, "loss": 0.5516, "step": 6188 }, { "epoch": 0.4588863349892489, "grad_norm": 0.33678561449050903, "learning_rate": 0.0001381445361340335, "loss": 0.4921, "step": 6189 }, { "epoch": 0.45896048046266774, "grad_norm": 0.3575766384601593, "learning_rate": 0.00013813453363340837, "loss": 0.5198, "step": 6190 }, { "epoch": 0.4590346259360866, "grad_norm": 0.3730403482913971, "learning_rate": 0.0001381245311327832, "loss": 0.5421, "step": 6191 }, { "epoch": 0.45910877140950546, "grad_norm": 0.35701438784599304, "learning_rate": 0.00013811452863215804, "loss": 0.5327, "step": 6192 }, { "epoch": 0.4591829168829243, "grad_norm": 0.34233853220939636, "learning_rate": 0.00013810452613153288, "loss": 0.4888, "step": 6193 }, { "epoch": 0.45925706235634317, "grad_norm": 0.3507554829120636, "learning_rate": 0.00013809452363090774, "loss": 0.5102, "step": 6194 }, { "epoch": 0.459331207829762, "grad_norm": 0.36241912841796875, "learning_rate": 0.00013808452113028258, "loss": 0.5208, "step": 6195 }, { "epoch": 0.45940535330318083, "grad_norm": 0.34356558322906494, "learning_rate": 0.00013807451862965742, "loss": 0.4903, "step": 6196 }, { "epoch": 0.4594794987765997, "grad_norm": 0.3924340605735779, "learning_rate": 0.00013806451612903225, "loss": 0.5469, "step": 6197 }, { "epoch": 0.45955364425001854, "grad_norm": 0.3895018398761749, "learning_rate": 0.00013805451362840712, "loss": 0.5639, "step": 6198 }, { "epoch": 0.4596277897234374, "grad_norm": 0.3576069176197052, "learning_rate": 0.00013804451112778195, "loss": 0.4871, "step": 6199 }, { "epoch": 0.45970193519685626, "grad_norm": 0.3820935785770416, "learning_rate": 0.0001380345086271568, "loss": 0.5131, "step": 6200 }, { "epoch": 0.4597760806702751, "grad_norm": 0.3526609539985657, "learning_rate": 0.00013802450612653163, "loss": 0.4767, "step": 6201 }, { "epoch": 0.4598502261436939, "grad_norm": 0.36746060848236084, "learning_rate": 0.0001380145036259065, "loss": 0.5485, "step": 6202 }, { "epoch": 0.45992437161711275, "grad_norm": 0.3638516664505005, "learning_rate": 0.00013800450112528133, "loss": 0.5149, "step": 6203 }, { "epoch": 0.45999851709053163, "grad_norm": 0.361889123916626, "learning_rate": 0.00013799449862465617, "loss": 0.4847, "step": 6204 }, { "epoch": 0.46007266256395046, "grad_norm": 0.3683771789073944, "learning_rate": 0.000137984496124031, "loss": 0.5236, "step": 6205 }, { "epoch": 0.4601468080373693, "grad_norm": 0.36444926261901855, "learning_rate": 0.00013797449362340587, "loss": 0.5526, "step": 6206 }, { "epoch": 0.4602209535107882, "grad_norm": 0.39373311400413513, "learning_rate": 0.0001379644911227807, "loss": 0.5654, "step": 6207 }, { "epoch": 0.460295098984207, "grad_norm": 0.37694111466407776, "learning_rate": 0.00013795448862215554, "loss": 0.521, "step": 6208 }, { "epoch": 0.46036924445762584, "grad_norm": 0.3771551251411438, "learning_rate": 0.00013794448612153038, "loss": 0.526, "step": 6209 }, { "epoch": 0.4604433899310447, "grad_norm": 0.35467472672462463, "learning_rate": 0.00013793448362090524, "loss": 0.4807, "step": 6210 }, { "epoch": 0.46051753540446355, "grad_norm": 0.3889911472797394, "learning_rate": 0.00013792448112028008, "loss": 0.5568, "step": 6211 }, { "epoch": 0.4605916808778824, "grad_norm": 0.3658875823020935, "learning_rate": 0.00013791447861965492, "loss": 0.538, "step": 6212 }, { "epoch": 0.46066582635130127, "grad_norm": 0.3412138521671295, "learning_rate": 0.00013790447611902978, "loss": 0.4968, "step": 6213 }, { "epoch": 0.4607399718247201, "grad_norm": 0.3742477297782898, "learning_rate": 0.00013789447361840462, "loss": 0.5308, "step": 6214 }, { "epoch": 0.4608141172981389, "grad_norm": 0.38273969292640686, "learning_rate": 0.00013788447111777946, "loss": 0.5542, "step": 6215 }, { "epoch": 0.4608882627715578, "grad_norm": 0.34350231289863586, "learning_rate": 0.0001378744686171543, "loss": 0.4926, "step": 6216 }, { "epoch": 0.46096240824497664, "grad_norm": 0.36436375975608826, "learning_rate": 0.00013786446611652916, "loss": 0.4972, "step": 6217 }, { "epoch": 0.46103655371839547, "grad_norm": 0.31975045800209045, "learning_rate": 0.00013785446361590397, "loss": 0.4524, "step": 6218 }, { "epoch": 0.46111069919181435, "grad_norm": 0.3711205720901489, "learning_rate": 0.00013784446111527883, "loss": 0.5348, "step": 6219 }, { "epoch": 0.4611848446652332, "grad_norm": 0.3397088944911957, "learning_rate": 0.00013783445861465367, "loss": 0.483, "step": 6220 }, { "epoch": 0.461258990138652, "grad_norm": 0.3591291606426239, "learning_rate": 0.00013782445611402853, "loss": 0.5286, "step": 6221 }, { "epoch": 0.4613331356120709, "grad_norm": 0.3571327030658722, "learning_rate": 0.00013781445361340334, "loss": 0.5429, "step": 6222 }, { "epoch": 0.46140728108548973, "grad_norm": 0.34812131524086, "learning_rate": 0.0001378044511127782, "loss": 0.5086, "step": 6223 }, { "epoch": 0.46148142655890856, "grad_norm": 0.3646305203437805, "learning_rate": 0.00013779444861215304, "loss": 0.5263, "step": 6224 }, { "epoch": 0.46155557203232744, "grad_norm": 0.3450324237346649, "learning_rate": 0.0001377844461115279, "loss": 0.5206, "step": 6225 }, { "epoch": 0.4616297175057463, "grad_norm": 0.37729814648628235, "learning_rate": 0.00013777444361090272, "loss": 0.5437, "step": 6226 }, { "epoch": 0.4617038629791651, "grad_norm": 0.36934754252433777, "learning_rate": 0.00013776444111027758, "loss": 0.4935, "step": 6227 }, { "epoch": 0.461778008452584, "grad_norm": 0.34318605065345764, "learning_rate": 0.00013775443860965242, "loss": 0.466, "step": 6228 }, { "epoch": 0.4618521539260028, "grad_norm": 0.3556266725063324, "learning_rate": 0.00013774443610902726, "loss": 0.4828, "step": 6229 }, { "epoch": 0.46192629939942165, "grad_norm": 0.3492845594882965, "learning_rate": 0.0001377344336084021, "loss": 0.5015, "step": 6230 }, { "epoch": 0.46200044487284053, "grad_norm": 0.37644827365875244, "learning_rate": 0.00013772443110777696, "loss": 0.5059, "step": 6231 }, { "epoch": 0.46207459034625936, "grad_norm": 0.3848118782043457, "learning_rate": 0.0001377144286071518, "loss": 0.5349, "step": 6232 }, { "epoch": 0.4621487358196782, "grad_norm": 0.39173364639282227, "learning_rate": 0.00013770442610652663, "loss": 0.5468, "step": 6233 }, { "epoch": 0.4622228812930971, "grad_norm": 0.34683141112327576, "learning_rate": 0.00013769442360590147, "loss": 0.4772, "step": 6234 }, { "epoch": 0.4622970267665159, "grad_norm": 0.3578660488128662, "learning_rate": 0.00013768442110527633, "loss": 0.5195, "step": 6235 }, { "epoch": 0.46237117223993474, "grad_norm": 0.3756784498691559, "learning_rate": 0.00013767441860465117, "loss": 0.5257, "step": 6236 }, { "epoch": 0.4624453177133536, "grad_norm": 0.33937492966651917, "learning_rate": 0.000137664416104026, "loss": 0.4814, "step": 6237 }, { "epoch": 0.46251946318677245, "grad_norm": 0.3549532890319824, "learning_rate": 0.00013765441360340084, "loss": 0.4891, "step": 6238 }, { "epoch": 0.4625936086601913, "grad_norm": 0.35102778673171997, "learning_rate": 0.0001376444111027757, "loss": 0.5168, "step": 6239 }, { "epoch": 0.46266775413361017, "grad_norm": 0.34683480858802795, "learning_rate": 0.00013763440860215055, "loss": 0.5154, "step": 6240 }, { "epoch": 0.462741899607029, "grad_norm": 0.35924768447875977, "learning_rate": 0.00013762440610152538, "loss": 0.515, "step": 6241 }, { "epoch": 0.4628160450804478, "grad_norm": 0.3587247133255005, "learning_rate": 0.00013761440360090022, "loss": 0.5002, "step": 6242 }, { "epoch": 0.4628901905538667, "grad_norm": 0.35047322511672974, "learning_rate": 0.00013760440110027508, "loss": 0.5094, "step": 6243 }, { "epoch": 0.46296433602728554, "grad_norm": 0.3717944920063019, "learning_rate": 0.00013759439859964992, "loss": 0.5637, "step": 6244 }, { "epoch": 0.46303848150070437, "grad_norm": 0.3448526859283447, "learning_rate": 0.00013758439609902476, "loss": 0.4949, "step": 6245 }, { "epoch": 0.46311262697412325, "grad_norm": 0.35877498984336853, "learning_rate": 0.00013757439359839962, "loss": 0.5148, "step": 6246 }, { "epoch": 0.4631867724475421, "grad_norm": 0.3493490517139435, "learning_rate": 0.00013756439109777446, "loss": 0.4848, "step": 6247 }, { "epoch": 0.4632609179209609, "grad_norm": 0.3477213978767395, "learning_rate": 0.0001375543885971493, "loss": 0.4946, "step": 6248 }, { "epoch": 0.4633350633943798, "grad_norm": 0.34225115180015564, "learning_rate": 0.00013754438609652413, "loss": 0.51, "step": 6249 }, { "epoch": 0.4634092088677986, "grad_norm": 0.3381706774234772, "learning_rate": 0.000137534383595899, "loss": 0.4673, "step": 6250 }, { "epoch": 0.46348335434121746, "grad_norm": 0.3707940876483917, "learning_rate": 0.00013752438109527383, "loss": 0.5242, "step": 6251 }, { "epoch": 0.46355749981463634, "grad_norm": 0.36371248960494995, "learning_rate": 0.00013751437859464867, "loss": 0.5125, "step": 6252 }, { "epoch": 0.46363164528805517, "grad_norm": 0.3467034697532654, "learning_rate": 0.0001375043760940235, "loss": 0.4684, "step": 6253 }, { "epoch": 0.463705790761474, "grad_norm": 0.41838502883911133, "learning_rate": 0.00013749437359339837, "loss": 0.5807, "step": 6254 }, { "epoch": 0.4637799362348929, "grad_norm": 0.3884989619255066, "learning_rate": 0.00013748437109277318, "loss": 0.5509, "step": 6255 }, { "epoch": 0.4638540817083117, "grad_norm": 0.3670986294746399, "learning_rate": 0.00013747436859214805, "loss": 0.5222, "step": 6256 }, { "epoch": 0.46392822718173055, "grad_norm": 0.346405565738678, "learning_rate": 0.00013746436609152288, "loss": 0.473, "step": 6257 }, { "epoch": 0.4640023726551494, "grad_norm": 0.3566574454307556, "learning_rate": 0.00013745436359089775, "loss": 0.5532, "step": 6258 }, { "epoch": 0.46407651812856826, "grad_norm": 0.37570807337760925, "learning_rate": 0.00013744436109027256, "loss": 0.5494, "step": 6259 }, { "epoch": 0.4641506636019871, "grad_norm": 0.36298868060112, "learning_rate": 0.00013743435858964742, "loss": 0.5015, "step": 6260 }, { "epoch": 0.4642248090754059, "grad_norm": 0.37609902024269104, "learning_rate": 0.00013742435608902226, "loss": 0.5676, "step": 6261 }, { "epoch": 0.4642989545488248, "grad_norm": 0.3629249036312103, "learning_rate": 0.00013741435358839712, "loss": 0.5341, "step": 6262 }, { "epoch": 0.46437310002224363, "grad_norm": 0.3477993905544281, "learning_rate": 0.00013740435108777193, "loss": 0.4981, "step": 6263 }, { "epoch": 0.46444724549566246, "grad_norm": 0.3411383330821991, "learning_rate": 0.0001373943485871468, "loss": 0.4904, "step": 6264 }, { "epoch": 0.46452139096908135, "grad_norm": 0.37031999230384827, "learning_rate": 0.00013738434608652163, "loss": 0.5145, "step": 6265 }, { "epoch": 0.4645955364425002, "grad_norm": 0.3817087709903717, "learning_rate": 0.00013737434358589647, "loss": 0.5021, "step": 6266 }, { "epoch": 0.464669681915919, "grad_norm": 0.37424972653388977, "learning_rate": 0.0001373643410852713, "loss": 0.5227, "step": 6267 }, { "epoch": 0.4647438273893379, "grad_norm": 0.3738095164299011, "learning_rate": 0.00013735433858464617, "loss": 0.5047, "step": 6268 }, { "epoch": 0.4648179728627567, "grad_norm": 0.35270726680755615, "learning_rate": 0.000137344336084021, "loss": 0.5158, "step": 6269 }, { "epoch": 0.46489211833617555, "grad_norm": 0.3344038724899292, "learning_rate": 0.00013733433358339585, "loss": 0.4714, "step": 6270 }, { "epoch": 0.46496626380959444, "grad_norm": 0.34487858414649963, "learning_rate": 0.00013732433108277068, "loss": 0.4873, "step": 6271 }, { "epoch": 0.46504040928301327, "grad_norm": 0.3444303870201111, "learning_rate": 0.00013731432858214555, "loss": 0.5109, "step": 6272 }, { "epoch": 0.4651145547564321, "grad_norm": 0.3692622184753418, "learning_rate": 0.00013730432608152039, "loss": 0.514, "step": 6273 }, { "epoch": 0.465188700229851, "grad_norm": 0.35831981897354126, "learning_rate": 0.00013729432358089522, "loss": 0.5323, "step": 6274 }, { "epoch": 0.4652628457032698, "grad_norm": 0.3894769847393036, "learning_rate": 0.00013728432108027006, "loss": 0.5576, "step": 6275 }, { "epoch": 0.46533699117668864, "grad_norm": 0.37629595398902893, "learning_rate": 0.00013727431857964492, "loss": 0.5252, "step": 6276 }, { "epoch": 0.4654111366501075, "grad_norm": 0.36821573972702026, "learning_rate": 0.00013726431607901976, "loss": 0.5392, "step": 6277 }, { "epoch": 0.46548528212352636, "grad_norm": 0.38834357261657715, "learning_rate": 0.0001372543135783946, "loss": 0.5507, "step": 6278 }, { "epoch": 0.4655594275969452, "grad_norm": 0.35192036628723145, "learning_rate": 0.00013724431107776946, "loss": 0.5047, "step": 6279 }, { "epoch": 0.46563357307036407, "grad_norm": 0.360580712556839, "learning_rate": 0.0001372343085771443, "loss": 0.5263, "step": 6280 }, { "epoch": 0.4657077185437829, "grad_norm": 0.3626927435398102, "learning_rate": 0.00013722430607651914, "loss": 0.5334, "step": 6281 }, { "epoch": 0.46578186401720173, "grad_norm": 0.36876198649406433, "learning_rate": 0.00013721430357589397, "loss": 0.5205, "step": 6282 }, { "epoch": 0.4658560094906206, "grad_norm": 0.36986249685287476, "learning_rate": 0.00013720430107526884, "loss": 0.5611, "step": 6283 }, { "epoch": 0.46593015496403944, "grad_norm": 0.3609503209590912, "learning_rate": 0.00013719429857464368, "loss": 0.5039, "step": 6284 }, { "epoch": 0.4660043004374583, "grad_norm": 0.35100769996643066, "learning_rate": 0.0001371842960740185, "loss": 0.4816, "step": 6285 }, { "epoch": 0.46607844591087716, "grad_norm": 0.3776429295539856, "learning_rate": 0.00013717429357339335, "loss": 0.5786, "step": 6286 }, { "epoch": 0.466152591384296, "grad_norm": 0.37492144107818604, "learning_rate": 0.00013716429107276821, "loss": 0.5054, "step": 6287 }, { "epoch": 0.4662267368577148, "grad_norm": 0.35484471917152405, "learning_rate": 0.00013715428857214305, "loss": 0.5054, "step": 6288 }, { "epoch": 0.4663008823311337, "grad_norm": 0.3413679003715515, "learning_rate": 0.0001371442860715179, "loss": 0.4647, "step": 6289 }, { "epoch": 0.46637502780455253, "grad_norm": 0.3830370008945465, "learning_rate": 0.00013713428357089272, "loss": 0.576, "step": 6290 }, { "epoch": 0.46644917327797136, "grad_norm": 0.34455838799476624, "learning_rate": 0.0001371242810702676, "loss": 0.4759, "step": 6291 }, { "epoch": 0.46652331875139025, "grad_norm": 0.3694137632846832, "learning_rate": 0.0001371142785696424, "loss": 0.5509, "step": 6292 }, { "epoch": 0.4665974642248091, "grad_norm": 0.34259340167045593, "learning_rate": 0.00013710427606901726, "loss": 0.4703, "step": 6293 }, { "epoch": 0.4666716096982279, "grad_norm": 0.3934311270713806, "learning_rate": 0.0001370942735683921, "loss": 0.5174, "step": 6294 }, { "epoch": 0.4667457551716468, "grad_norm": 0.36951160430908203, "learning_rate": 0.00013708427106776696, "loss": 0.4978, "step": 6295 }, { "epoch": 0.4668199006450656, "grad_norm": 0.37434151768684387, "learning_rate": 0.00013707426856714177, "loss": 0.5385, "step": 6296 }, { "epoch": 0.46689404611848445, "grad_norm": 0.3250945508480072, "learning_rate": 0.00013706426606651664, "loss": 0.4897, "step": 6297 }, { "epoch": 0.46696819159190334, "grad_norm": 0.3359949588775635, "learning_rate": 0.00013705426356589148, "loss": 0.4834, "step": 6298 }, { "epoch": 0.46704233706532217, "grad_norm": 0.3669392466545105, "learning_rate": 0.00013704426106526634, "loss": 0.5651, "step": 6299 }, { "epoch": 0.467116482538741, "grad_norm": 0.35803869366645813, "learning_rate": 0.00013703425856464115, "loss": 0.5261, "step": 6300 }, { "epoch": 0.4671906280121599, "grad_norm": 0.38228747248649597, "learning_rate": 0.00013702425606401601, "loss": 0.5554, "step": 6301 }, { "epoch": 0.4672647734855787, "grad_norm": 0.3613738715648651, "learning_rate": 0.00013701425356339085, "loss": 0.519, "step": 6302 }, { "epoch": 0.46733891895899754, "grad_norm": 0.32821232080459595, "learning_rate": 0.0001370042510627657, "loss": 0.4628, "step": 6303 }, { "epoch": 0.4674130644324164, "grad_norm": 0.33912748098373413, "learning_rate": 0.00013699424856214053, "loss": 0.5103, "step": 6304 }, { "epoch": 0.46748720990583525, "grad_norm": 0.37166833877563477, "learning_rate": 0.0001369842460615154, "loss": 0.5264, "step": 6305 }, { "epoch": 0.4675613553792541, "grad_norm": 0.3519686460494995, "learning_rate": 0.00013697424356089023, "loss": 0.5222, "step": 6306 }, { "epoch": 0.46763550085267297, "grad_norm": 0.3735285997390747, "learning_rate": 0.00013696424106026506, "loss": 0.5109, "step": 6307 }, { "epoch": 0.4677096463260918, "grad_norm": 0.34181955456733704, "learning_rate": 0.0001369542385596399, "loss": 0.4993, "step": 6308 }, { "epoch": 0.46778379179951063, "grad_norm": 0.3517739474773407, "learning_rate": 0.00013694423605901477, "loss": 0.516, "step": 6309 }, { "epoch": 0.4678579372729295, "grad_norm": 0.34258052706718445, "learning_rate": 0.0001369342335583896, "loss": 0.4728, "step": 6310 }, { "epoch": 0.46793208274634834, "grad_norm": 0.36619555950164795, "learning_rate": 0.00013692423105776444, "loss": 0.5087, "step": 6311 }, { "epoch": 0.4680062282197672, "grad_norm": 0.38567811250686646, "learning_rate": 0.0001369142285571393, "loss": 0.5099, "step": 6312 }, { "epoch": 0.46808037369318606, "grad_norm": 0.36304280161857605, "learning_rate": 0.00013690422605651414, "loss": 0.5191, "step": 6313 }, { "epoch": 0.4681545191666049, "grad_norm": 0.33736270666122437, "learning_rate": 0.00013689422355588898, "loss": 0.4733, "step": 6314 }, { "epoch": 0.4682286646400237, "grad_norm": 0.36306869983673096, "learning_rate": 0.00013688422105526381, "loss": 0.4955, "step": 6315 }, { "epoch": 0.46830281011344255, "grad_norm": 0.37080904841423035, "learning_rate": 0.00013687421855463868, "loss": 0.5496, "step": 6316 }, { "epoch": 0.46837695558686143, "grad_norm": 0.3929561972618103, "learning_rate": 0.00013686421605401352, "loss": 0.5813, "step": 6317 }, { "epoch": 0.46845110106028026, "grad_norm": 0.3776000440120697, "learning_rate": 0.00013685421355338835, "loss": 0.5089, "step": 6318 }, { "epoch": 0.4685252465336991, "grad_norm": 0.36220625042915344, "learning_rate": 0.0001368442110527632, "loss": 0.5432, "step": 6319 }, { "epoch": 0.468599392007118, "grad_norm": 0.3439844250679016, "learning_rate": 0.00013683420855213805, "loss": 0.4778, "step": 6320 }, { "epoch": 0.4686735374805368, "grad_norm": 0.3450429439544678, "learning_rate": 0.0001368242060515129, "loss": 0.4628, "step": 6321 }, { "epoch": 0.46874768295395564, "grad_norm": 0.3411663770675659, "learning_rate": 0.00013681420355088773, "loss": 0.4738, "step": 6322 }, { "epoch": 0.4688218284273745, "grad_norm": 0.35537776350975037, "learning_rate": 0.00013680420105026257, "loss": 0.5172, "step": 6323 }, { "epoch": 0.46889597390079335, "grad_norm": 0.37039995193481445, "learning_rate": 0.00013679419854963743, "loss": 0.5469, "step": 6324 }, { "epoch": 0.4689701193742122, "grad_norm": 0.37043264508247375, "learning_rate": 0.00013678419604901227, "loss": 0.5278, "step": 6325 }, { "epoch": 0.46904426484763106, "grad_norm": 0.35248997807502747, "learning_rate": 0.0001367741935483871, "loss": 0.5238, "step": 6326 }, { "epoch": 0.4691184103210499, "grad_norm": 0.34577691555023193, "learning_rate": 0.00013676419104776194, "loss": 0.5061, "step": 6327 }, { "epoch": 0.4691925557944687, "grad_norm": 0.35610833764076233, "learning_rate": 0.0001367541885471368, "loss": 0.5043, "step": 6328 }, { "epoch": 0.4692667012678876, "grad_norm": 0.35583552718162537, "learning_rate": 0.00013674418604651162, "loss": 0.5211, "step": 6329 }, { "epoch": 0.46934084674130644, "grad_norm": 0.3539409637451172, "learning_rate": 0.00013673418354588648, "loss": 0.4949, "step": 6330 }, { "epoch": 0.46941499221472527, "grad_norm": 0.3734513223171234, "learning_rate": 0.00013672418104526132, "loss": 0.5704, "step": 6331 }, { "epoch": 0.46948913768814415, "grad_norm": 0.3347901403903961, "learning_rate": 0.00013671417854463618, "loss": 0.5298, "step": 6332 }, { "epoch": 0.469563283161563, "grad_norm": 0.3387455642223358, "learning_rate": 0.000136704176044011, "loss": 0.4951, "step": 6333 }, { "epoch": 0.4696374286349818, "grad_norm": 0.35797402262687683, "learning_rate": 0.00013669417354338585, "loss": 0.5362, "step": 6334 }, { "epoch": 0.4697115741084007, "grad_norm": 0.3550552427768707, "learning_rate": 0.0001366841710427607, "loss": 0.5294, "step": 6335 }, { "epoch": 0.4697857195818195, "grad_norm": 0.383463591337204, "learning_rate": 0.00013667416854213556, "loss": 0.5563, "step": 6336 }, { "epoch": 0.46985986505523836, "grad_norm": 0.36674991250038147, "learning_rate": 0.00013666416604151037, "loss": 0.5535, "step": 6337 }, { "epoch": 0.46993401052865724, "grad_norm": 0.34751883149147034, "learning_rate": 0.00013665416354088523, "loss": 0.4924, "step": 6338 }, { "epoch": 0.47000815600207607, "grad_norm": 0.37212666869163513, "learning_rate": 0.00013664416104026007, "loss": 0.5244, "step": 6339 }, { "epoch": 0.4700823014754949, "grad_norm": 0.36438947916030884, "learning_rate": 0.0001366341585396349, "loss": 0.5337, "step": 6340 }, { "epoch": 0.4701564469489138, "grad_norm": 0.3301752507686615, "learning_rate": 0.00013662415603900974, "loss": 0.5028, "step": 6341 }, { "epoch": 0.4702305924223326, "grad_norm": 0.3641398847103119, "learning_rate": 0.0001366141535383846, "loss": 0.5036, "step": 6342 }, { "epoch": 0.47030473789575145, "grad_norm": 0.39051195979118347, "learning_rate": 0.00013660415103775944, "loss": 0.5695, "step": 6343 }, { "epoch": 0.47037888336917033, "grad_norm": 0.3596656620502472, "learning_rate": 0.00013659414853713428, "loss": 0.5356, "step": 6344 }, { "epoch": 0.47045302884258916, "grad_norm": 0.3671811819076538, "learning_rate": 0.00013658414603650914, "loss": 0.5203, "step": 6345 }, { "epoch": 0.470527174316008, "grad_norm": 0.36444997787475586, "learning_rate": 0.00013657414353588398, "loss": 0.4679, "step": 6346 }, { "epoch": 0.4706013197894269, "grad_norm": 0.3814525902271271, "learning_rate": 0.00013656414103525882, "loss": 0.4884, "step": 6347 }, { "epoch": 0.4706754652628457, "grad_norm": 0.3774849474430084, "learning_rate": 0.00013655413853463366, "loss": 0.4941, "step": 6348 }, { "epoch": 0.47074961073626453, "grad_norm": 0.3582368493080139, "learning_rate": 0.00013654413603400852, "loss": 0.4865, "step": 6349 }, { "epoch": 0.4708237562096834, "grad_norm": 0.3655477464199066, "learning_rate": 0.00013653413353338336, "loss": 0.5016, "step": 6350 }, { "epoch": 0.47089790168310225, "grad_norm": 0.3788613975048065, "learning_rate": 0.0001365241310327582, "loss": 0.4975, "step": 6351 }, { "epoch": 0.4709720471565211, "grad_norm": 0.35120221972465515, "learning_rate": 0.00013651412853213303, "loss": 0.5069, "step": 6352 }, { "epoch": 0.47104619262993996, "grad_norm": 0.3555515706539154, "learning_rate": 0.0001365041260315079, "loss": 0.4937, "step": 6353 }, { "epoch": 0.4711203381033588, "grad_norm": 0.38892611861228943, "learning_rate": 0.00013649412353088273, "loss": 0.5903, "step": 6354 }, { "epoch": 0.4711944835767776, "grad_norm": 0.3784848153591156, "learning_rate": 0.00013648412103025757, "loss": 0.561, "step": 6355 }, { "epoch": 0.4712686290501965, "grad_norm": 0.3598879873752594, "learning_rate": 0.0001364741185296324, "loss": 0.5221, "step": 6356 }, { "epoch": 0.47134277452361534, "grad_norm": 0.3649037480354309, "learning_rate": 0.00013646411602900727, "loss": 0.5298, "step": 6357 }, { "epoch": 0.47141691999703417, "grad_norm": 0.34637314081192017, "learning_rate": 0.0001364541135283821, "loss": 0.5304, "step": 6358 }, { "epoch": 0.47149106547045305, "grad_norm": 0.3629116415977478, "learning_rate": 0.00013644411102775694, "loss": 0.5286, "step": 6359 }, { "epoch": 0.4715652109438719, "grad_norm": 0.3487672209739685, "learning_rate": 0.00013643410852713178, "loss": 0.4979, "step": 6360 }, { "epoch": 0.4716393564172907, "grad_norm": 0.3543787896633148, "learning_rate": 0.00013642410602650665, "loss": 0.4977, "step": 6361 }, { "epoch": 0.4717135018907096, "grad_norm": 0.373727947473526, "learning_rate": 0.00013641410352588148, "loss": 0.4938, "step": 6362 }, { "epoch": 0.4717876473641284, "grad_norm": 0.34062859416007996, "learning_rate": 0.00013640410102525632, "loss": 0.5051, "step": 6363 }, { "epoch": 0.47186179283754726, "grad_norm": 0.3394447863101959, "learning_rate": 0.00013639409852463116, "loss": 0.4811, "step": 6364 }, { "epoch": 0.47193593831096614, "grad_norm": 0.35689327120780945, "learning_rate": 0.00013638409602400602, "loss": 0.5194, "step": 6365 }, { "epoch": 0.47201008378438497, "grad_norm": 0.38078662753105164, "learning_rate": 0.00013637409352338083, "loss": 0.5679, "step": 6366 }, { "epoch": 0.4720842292578038, "grad_norm": 0.35956084728240967, "learning_rate": 0.0001363640910227557, "loss": 0.5245, "step": 6367 }, { "epoch": 0.4721583747312227, "grad_norm": 0.3462930917739868, "learning_rate": 0.00013635408852213053, "loss": 0.4841, "step": 6368 }, { "epoch": 0.4722325202046415, "grad_norm": 0.3836093842983246, "learning_rate": 0.0001363440860215054, "loss": 0.5495, "step": 6369 }, { "epoch": 0.47230666567806034, "grad_norm": 0.37694358825683594, "learning_rate": 0.0001363340835208802, "loss": 0.5581, "step": 6370 }, { "epoch": 0.47238081115147923, "grad_norm": 0.3587513267993927, "learning_rate": 0.00013632408102025507, "loss": 0.5168, "step": 6371 }, { "epoch": 0.47245495662489806, "grad_norm": 0.3433588147163391, "learning_rate": 0.0001363140785196299, "loss": 0.512, "step": 6372 }, { "epoch": 0.4725291020983169, "grad_norm": 0.3568352162837982, "learning_rate": 0.00013630407601900477, "loss": 0.5165, "step": 6373 }, { "epoch": 0.4726032475717357, "grad_norm": 0.3783012628555298, "learning_rate": 0.00013629407351837958, "loss": 0.5386, "step": 6374 }, { "epoch": 0.4726773930451546, "grad_norm": 0.37141168117523193, "learning_rate": 0.00013628407101775445, "loss": 0.5176, "step": 6375 }, { "epoch": 0.47275153851857343, "grad_norm": 0.3633905351161957, "learning_rate": 0.00013627406851712928, "loss": 0.4741, "step": 6376 }, { "epoch": 0.47282568399199226, "grad_norm": 0.3694569766521454, "learning_rate": 0.00013626406601650412, "loss": 0.5003, "step": 6377 }, { "epoch": 0.47289982946541115, "grad_norm": 0.3509931266307831, "learning_rate": 0.00013625406351587896, "loss": 0.478, "step": 6378 }, { "epoch": 0.47297397493883, "grad_norm": 0.3695332407951355, "learning_rate": 0.00013624406101525382, "loss": 0.5802, "step": 6379 }, { "epoch": 0.4730481204122488, "grad_norm": 0.3543345630168915, "learning_rate": 0.00013623405851462866, "loss": 0.4851, "step": 6380 }, { "epoch": 0.4731222658856677, "grad_norm": 0.37618565559387207, "learning_rate": 0.0001362240560140035, "loss": 0.5385, "step": 6381 }, { "epoch": 0.4731964113590865, "grad_norm": 0.38070112466812134, "learning_rate": 0.00013621405351337836, "loss": 0.5524, "step": 6382 }, { "epoch": 0.47327055683250535, "grad_norm": 0.37582865357398987, "learning_rate": 0.0001362040510127532, "loss": 0.5318, "step": 6383 }, { "epoch": 0.47334470230592424, "grad_norm": 0.35681334137916565, "learning_rate": 0.00013619404851212803, "loss": 0.4896, "step": 6384 }, { "epoch": 0.47341884777934307, "grad_norm": 0.34897860884666443, "learning_rate": 0.00013618404601150287, "loss": 0.5013, "step": 6385 }, { "epoch": 0.4734929932527619, "grad_norm": 0.35371747612953186, "learning_rate": 0.00013617404351087774, "loss": 0.5063, "step": 6386 }, { "epoch": 0.4735671387261808, "grad_norm": 0.3678499758243561, "learning_rate": 0.00013616404101025257, "loss": 0.5446, "step": 6387 }, { "epoch": 0.4736412841995996, "grad_norm": 0.3564913868904114, "learning_rate": 0.00013615403850962744, "loss": 0.4989, "step": 6388 }, { "epoch": 0.47371542967301844, "grad_norm": 0.35710954666137695, "learning_rate": 0.00013614403600900225, "loss": 0.4931, "step": 6389 }, { "epoch": 0.4737895751464373, "grad_norm": 0.37807366251945496, "learning_rate": 0.0001361340335083771, "loss": 0.5067, "step": 6390 }, { "epoch": 0.47386372061985615, "grad_norm": 0.3567354679107666, "learning_rate": 0.00013612403100775195, "loss": 0.506, "step": 6391 }, { "epoch": 0.473937866093275, "grad_norm": 0.3663228452205658, "learning_rate": 0.00013611402850712679, "loss": 0.4818, "step": 6392 }, { "epoch": 0.47401201156669387, "grad_norm": 0.3672091066837311, "learning_rate": 0.00013610402600650162, "loss": 0.4804, "step": 6393 }, { "epoch": 0.4740861570401127, "grad_norm": 0.3699306547641754, "learning_rate": 0.0001360940235058765, "loss": 0.5154, "step": 6394 }, { "epoch": 0.47416030251353153, "grad_norm": 0.39380472898483276, "learning_rate": 0.00013608402100525132, "loss": 0.6098, "step": 6395 }, { "epoch": 0.4742344479869504, "grad_norm": 0.3888048529624939, "learning_rate": 0.00013607401850462616, "loss": 0.5414, "step": 6396 }, { "epoch": 0.47430859346036924, "grad_norm": 0.3645224869251251, "learning_rate": 0.000136064016004001, "loss": 0.518, "step": 6397 }, { "epoch": 0.4743827389337881, "grad_norm": 0.3607601821422577, "learning_rate": 0.00013605401350337586, "loss": 0.5152, "step": 6398 }, { "epoch": 0.47445688440720696, "grad_norm": 0.36183109879493713, "learning_rate": 0.0001360440110027507, "loss": 0.496, "step": 6399 }, { "epoch": 0.4745310298806258, "grad_norm": 0.34411996603012085, "learning_rate": 0.00013603400850212554, "loss": 0.4853, "step": 6400 }, { "epoch": 0.4746051753540446, "grad_norm": 0.3643604516983032, "learning_rate": 0.00013602400600150037, "loss": 0.5226, "step": 6401 }, { "epoch": 0.4746793208274635, "grad_norm": 0.3826771378517151, "learning_rate": 0.00013601400350087524, "loss": 0.5777, "step": 6402 }, { "epoch": 0.47475346630088233, "grad_norm": 0.3810330927371979, "learning_rate": 0.00013600400100025005, "loss": 0.5575, "step": 6403 }, { "epoch": 0.47482761177430116, "grad_norm": 0.3463783860206604, "learning_rate": 0.0001359939984996249, "loss": 0.5132, "step": 6404 }, { "epoch": 0.47490175724772005, "grad_norm": 0.35958898067474365, "learning_rate": 0.00013598399599899975, "loss": 0.5341, "step": 6405 }, { "epoch": 0.4749759027211389, "grad_norm": 0.33255112171173096, "learning_rate": 0.0001359739934983746, "loss": 0.4833, "step": 6406 }, { "epoch": 0.4750500481945577, "grad_norm": 0.3546402156352997, "learning_rate": 0.00013596399099774942, "loss": 0.5378, "step": 6407 }, { "epoch": 0.4751241936679766, "grad_norm": 0.3552972078323364, "learning_rate": 0.0001359539884971243, "loss": 0.4937, "step": 6408 }, { "epoch": 0.4751983391413954, "grad_norm": 0.34302598237991333, "learning_rate": 0.00013594398599649912, "loss": 0.4832, "step": 6409 }, { "epoch": 0.47527248461481425, "grad_norm": 0.33902955055236816, "learning_rate": 0.000135933983495874, "loss": 0.4998, "step": 6410 }, { "epoch": 0.47534663008823314, "grad_norm": 0.3739015758037567, "learning_rate": 0.0001359239809952488, "loss": 0.5588, "step": 6411 }, { "epoch": 0.47542077556165196, "grad_norm": 0.39548489451408386, "learning_rate": 0.00013591397849462366, "loss": 0.5239, "step": 6412 }, { "epoch": 0.4754949210350708, "grad_norm": 0.3614535629749298, "learning_rate": 0.0001359039759939985, "loss": 0.523, "step": 6413 }, { "epoch": 0.4755690665084897, "grad_norm": 0.33391740918159485, "learning_rate": 0.00013589397349337334, "loss": 0.4922, "step": 6414 }, { "epoch": 0.4756432119819085, "grad_norm": 0.3773519694805145, "learning_rate": 0.0001358839709927482, "loss": 0.5264, "step": 6415 }, { "epoch": 0.47571735745532734, "grad_norm": 0.36343032121658325, "learning_rate": 0.00013587396849212304, "loss": 0.5054, "step": 6416 }, { "epoch": 0.4757915029287462, "grad_norm": 0.35171443223953247, "learning_rate": 0.00013586396599149788, "loss": 0.4998, "step": 6417 }, { "epoch": 0.47586564840216505, "grad_norm": 0.3606116473674774, "learning_rate": 0.0001358539634908727, "loss": 0.4917, "step": 6418 }, { "epoch": 0.4759397938755839, "grad_norm": 0.3555144667625427, "learning_rate": 0.00013584396099024758, "loss": 0.463, "step": 6419 }, { "epoch": 0.47601393934900277, "grad_norm": 0.3653929531574249, "learning_rate": 0.00013583395848962241, "loss": 0.5445, "step": 6420 }, { "epoch": 0.4760880848224216, "grad_norm": 0.374356746673584, "learning_rate": 0.00013582395598899728, "loss": 0.4945, "step": 6421 }, { "epoch": 0.4761622302958404, "grad_norm": 0.3796643614768982, "learning_rate": 0.0001358139534883721, "loss": 0.5412, "step": 6422 }, { "epoch": 0.4762363757692593, "grad_norm": 0.3666728734970093, "learning_rate": 0.00013580395098774695, "loss": 0.5195, "step": 6423 }, { "epoch": 0.47631052124267814, "grad_norm": 0.3497881293296814, "learning_rate": 0.0001357939484871218, "loss": 0.4905, "step": 6424 }, { "epoch": 0.47638466671609697, "grad_norm": 0.3668268620967865, "learning_rate": 0.00013578394598649665, "loss": 0.4936, "step": 6425 }, { "epoch": 0.47645881218951586, "grad_norm": 0.37537047266960144, "learning_rate": 0.00013577394348587146, "loss": 0.5102, "step": 6426 }, { "epoch": 0.4765329576629347, "grad_norm": 0.3675481677055359, "learning_rate": 0.00013576394098524633, "loss": 0.4901, "step": 6427 }, { "epoch": 0.4766071031363535, "grad_norm": 0.3652592897415161, "learning_rate": 0.00013575393848462116, "loss": 0.5038, "step": 6428 }, { "epoch": 0.4766812486097724, "grad_norm": 0.3539675772190094, "learning_rate": 0.000135743935983996, "loss": 0.4984, "step": 6429 }, { "epoch": 0.47675539408319123, "grad_norm": 0.36154139041900635, "learning_rate": 0.00013573393348337084, "loss": 0.5053, "step": 6430 }, { "epoch": 0.47682953955661006, "grad_norm": 0.3595755398273468, "learning_rate": 0.0001357239309827457, "loss": 0.5382, "step": 6431 }, { "epoch": 0.4769036850300289, "grad_norm": 0.3400070369243622, "learning_rate": 0.00013571392848212054, "loss": 0.4687, "step": 6432 }, { "epoch": 0.4769778305034478, "grad_norm": 0.3684256076812744, "learning_rate": 0.00013570392598149538, "loss": 0.5023, "step": 6433 }, { "epoch": 0.4770519759768666, "grad_norm": 0.34819790720939636, "learning_rate": 0.00013569392348087021, "loss": 0.4943, "step": 6434 }, { "epoch": 0.47712612145028543, "grad_norm": 0.3759095072746277, "learning_rate": 0.00013568392098024508, "loss": 0.5424, "step": 6435 }, { "epoch": 0.4772002669237043, "grad_norm": 0.377421498298645, "learning_rate": 0.00013567391847961992, "loss": 0.5036, "step": 6436 }, { "epoch": 0.47727441239712315, "grad_norm": 0.3677801191806793, "learning_rate": 0.00013566391597899475, "loss": 0.5134, "step": 6437 }, { "epoch": 0.477348557870542, "grad_norm": 0.3706549406051636, "learning_rate": 0.0001356539134783696, "loss": 0.4852, "step": 6438 }, { "epoch": 0.47742270334396086, "grad_norm": 0.3670257031917572, "learning_rate": 0.00013564391097774445, "loss": 0.5339, "step": 6439 }, { "epoch": 0.4774968488173797, "grad_norm": 0.38083240389823914, "learning_rate": 0.00013563390847711926, "loss": 0.5703, "step": 6440 }, { "epoch": 0.4775709942907985, "grad_norm": 0.3707564175128937, "learning_rate": 0.00013562390597649413, "loss": 0.5491, "step": 6441 }, { "epoch": 0.4776451397642174, "grad_norm": 0.32315975427627563, "learning_rate": 0.00013561390347586897, "loss": 0.4541, "step": 6442 }, { "epoch": 0.47771928523763624, "grad_norm": 0.34919577836990356, "learning_rate": 0.00013560390097524383, "loss": 0.4936, "step": 6443 }, { "epoch": 0.47779343071105507, "grad_norm": 0.3503824472427368, "learning_rate": 0.00013559389847461864, "loss": 0.524, "step": 6444 }, { "epoch": 0.47786757618447395, "grad_norm": 0.37429144978523254, "learning_rate": 0.0001355838959739935, "loss": 0.5466, "step": 6445 }, { "epoch": 0.4779417216578928, "grad_norm": 0.3730836510658264, "learning_rate": 0.00013557389347336834, "loss": 0.5629, "step": 6446 }, { "epoch": 0.4780158671313116, "grad_norm": 0.3576561510562897, "learning_rate": 0.0001355638909727432, "loss": 0.5218, "step": 6447 }, { "epoch": 0.4780900126047305, "grad_norm": 0.3521636426448822, "learning_rate": 0.00013555388847211804, "loss": 0.5183, "step": 6448 }, { "epoch": 0.4781641580781493, "grad_norm": 0.3480604887008667, "learning_rate": 0.00013554388597149288, "loss": 0.5062, "step": 6449 }, { "epoch": 0.47823830355156816, "grad_norm": 0.358530193567276, "learning_rate": 0.00013553388347086772, "loss": 0.4948, "step": 6450 }, { "epoch": 0.47831244902498704, "grad_norm": 0.343112051486969, "learning_rate": 0.00013552388097024258, "loss": 0.4724, "step": 6451 }, { "epoch": 0.47838659449840587, "grad_norm": 0.3657340705394745, "learning_rate": 0.00013551387846961742, "loss": 0.516, "step": 6452 }, { "epoch": 0.4784607399718247, "grad_norm": 0.34351226687431335, "learning_rate": 0.00013550387596899225, "loss": 0.4533, "step": 6453 }, { "epoch": 0.4785348854452436, "grad_norm": 0.3729808032512665, "learning_rate": 0.0001354938734683671, "loss": 0.5023, "step": 6454 }, { "epoch": 0.4786090309186624, "grad_norm": 0.3648975193500519, "learning_rate": 0.00013548387096774193, "loss": 0.5084, "step": 6455 }, { "epoch": 0.47868317639208124, "grad_norm": 0.35998302698135376, "learning_rate": 0.0001354738684671168, "loss": 0.5002, "step": 6456 }, { "epoch": 0.47875732186550013, "grad_norm": 0.35784024000167847, "learning_rate": 0.00013546386596649163, "loss": 0.5, "step": 6457 }, { "epoch": 0.47883146733891896, "grad_norm": 0.3795572817325592, "learning_rate": 0.0001354538634658665, "loss": 0.5267, "step": 6458 }, { "epoch": 0.4789056128123378, "grad_norm": 0.38436028361320496, "learning_rate": 0.0001354438609652413, "loss": 0.5548, "step": 6459 }, { "epoch": 0.4789797582857567, "grad_norm": 0.37131422758102417, "learning_rate": 0.00013543385846461617, "loss": 0.4976, "step": 6460 }, { "epoch": 0.4790539037591755, "grad_norm": 0.35902154445648193, "learning_rate": 0.000135423855963991, "loss": 0.4916, "step": 6461 }, { "epoch": 0.47912804923259433, "grad_norm": 0.33962568640708923, "learning_rate": 0.00013541385346336587, "loss": 0.4922, "step": 6462 }, { "epoch": 0.4792021947060132, "grad_norm": 0.35927847027778625, "learning_rate": 0.00013540385096274068, "loss": 0.522, "step": 6463 }, { "epoch": 0.47927634017943205, "grad_norm": 0.3503618538379669, "learning_rate": 0.00013539384846211554, "loss": 0.5259, "step": 6464 }, { "epoch": 0.4793504856528509, "grad_norm": 0.3248516619205475, "learning_rate": 0.00013538384596149038, "loss": 0.4849, "step": 6465 }, { "epoch": 0.47942463112626976, "grad_norm": 0.34954825043678284, "learning_rate": 0.00013537384346086522, "loss": 0.478, "step": 6466 }, { "epoch": 0.4794987765996886, "grad_norm": 0.34720292687416077, "learning_rate": 0.00013536384096024006, "loss": 0.5538, "step": 6467 }, { "epoch": 0.4795729220731074, "grad_norm": 0.33857470750808716, "learning_rate": 0.00013535383845961492, "loss": 0.4954, "step": 6468 }, { "epoch": 0.4796470675465263, "grad_norm": 0.3368900716304779, "learning_rate": 0.00013534383595898976, "loss": 0.5025, "step": 6469 }, { "epoch": 0.47972121301994514, "grad_norm": 0.33175504207611084, "learning_rate": 0.0001353338334583646, "loss": 0.4852, "step": 6470 }, { "epoch": 0.47979535849336397, "grad_norm": 0.3793545365333557, "learning_rate": 0.00013532383095773943, "loss": 0.5329, "step": 6471 }, { "epoch": 0.47986950396678285, "grad_norm": 0.37273624539375305, "learning_rate": 0.0001353138284571143, "loss": 0.5305, "step": 6472 }, { "epoch": 0.4799436494402017, "grad_norm": 0.3513183295726776, "learning_rate": 0.00013530382595648913, "loss": 0.5107, "step": 6473 }, { "epoch": 0.4800177949136205, "grad_norm": 0.3515956997871399, "learning_rate": 0.00013529382345586397, "loss": 0.5235, "step": 6474 }, { "epoch": 0.4800919403870394, "grad_norm": 0.37603268027305603, "learning_rate": 0.0001352838209552388, "loss": 0.5107, "step": 6475 }, { "epoch": 0.4801660858604582, "grad_norm": 0.3402623236179352, "learning_rate": 0.00013527381845461367, "loss": 0.4993, "step": 6476 }, { "epoch": 0.48024023133387705, "grad_norm": 0.37328627705574036, "learning_rate": 0.00013526381595398848, "loss": 0.5526, "step": 6477 }, { "epoch": 0.48031437680729594, "grad_norm": 0.36960718035697937, "learning_rate": 0.00013525381345336334, "loss": 0.5077, "step": 6478 }, { "epoch": 0.48038852228071477, "grad_norm": 0.3819752037525177, "learning_rate": 0.00013524381095273818, "loss": 0.564, "step": 6479 }, { "epoch": 0.4804626677541336, "grad_norm": 0.36355236172676086, "learning_rate": 0.00013523380845211305, "loss": 0.4988, "step": 6480 }, { "epoch": 0.4805368132275525, "grad_norm": 0.3896143138408661, "learning_rate": 0.00013522380595148788, "loss": 0.578, "step": 6481 }, { "epoch": 0.4806109587009713, "grad_norm": 0.3757263422012329, "learning_rate": 0.00013521380345086272, "loss": 0.5065, "step": 6482 }, { "epoch": 0.48068510417439014, "grad_norm": 0.35957837104797363, "learning_rate": 0.00013520380095023756, "loss": 0.5167, "step": 6483 }, { "epoch": 0.48075924964780903, "grad_norm": 0.3725661635398865, "learning_rate": 0.00013519379844961242, "loss": 0.5105, "step": 6484 }, { "epoch": 0.48083339512122786, "grad_norm": 0.35596194863319397, "learning_rate": 0.00013518379594898726, "loss": 0.5165, "step": 6485 }, { "epoch": 0.4809075405946467, "grad_norm": 0.37178611755371094, "learning_rate": 0.0001351737934483621, "loss": 0.5276, "step": 6486 }, { "epoch": 0.4809816860680655, "grad_norm": 0.38727429509162903, "learning_rate": 0.00013516379094773693, "loss": 0.5655, "step": 6487 }, { "epoch": 0.4810558315414844, "grad_norm": 0.3405962884426117, "learning_rate": 0.0001351537884471118, "loss": 0.5045, "step": 6488 }, { "epoch": 0.48112997701490323, "grad_norm": 0.33310946822166443, "learning_rate": 0.00013514378594648663, "loss": 0.4661, "step": 6489 }, { "epoch": 0.48120412248832206, "grad_norm": 0.3518099784851074, "learning_rate": 0.00013513378344586147, "loss": 0.4797, "step": 6490 }, { "epoch": 0.48127826796174095, "grad_norm": 0.38327234983444214, "learning_rate": 0.00013512378094523634, "loss": 0.5367, "step": 6491 }, { "epoch": 0.4813524134351598, "grad_norm": 0.355377197265625, "learning_rate": 0.00013511377844461115, "loss": 0.5199, "step": 6492 }, { "epoch": 0.4814265589085786, "grad_norm": 0.3616490960121155, "learning_rate": 0.000135103775943986, "loss": 0.4968, "step": 6493 }, { "epoch": 0.4815007043819975, "grad_norm": 0.37958839535713196, "learning_rate": 0.00013509377344336085, "loss": 0.505, "step": 6494 }, { "epoch": 0.4815748498554163, "grad_norm": 0.3348217010498047, "learning_rate": 0.0001350837709427357, "loss": 0.5022, "step": 6495 }, { "epoch": 0.48164899532883515, "grad_norm": 0.36545026302337646, "learning_rate": 0.00013507376844211052, "loss": 0.4934, "step": 6496 }, { "epoch": 0.48172314080225404, "grad_norm": 0.34712904691696167, "learning_rate": 0.00013506376594148538, "loss": 0.4945, "step": 6497 }, { "epoch": 0.48179728627567286, "grad_norm": 0.37082239985466003, "learning_rate": 0.00013505376344086022, "loss": 0.5144, "step": 6498 }, { "epoch": 0.4818714317490917, "grad_norm": 0.3943297266960144, "learning_rate": 0.00013504376094023509, "loss": 0.5248, "step": 6499 }, { "epoch": 0.4819455772225106, "grad_norm": 0.40031352639198303, "learning_rate": 0.0001350337584396099, "loss": 0.5746, "step": 6500 }, { "epoch": 0.4820197226959294, "grad_norm": 0.3612340986728668, "learning_rate": 0.00013502375593898476, "loss": 0.5239, "step": 6501 }, { "epoch": 0.48209386816934824, "grad_norm": 0.4004204273223877, "learning_rate": 0.0001350137534383596, "loss": 0.5131, "step": 6502 }, { "epoch": 0.4821680136427671, "grad_norm": 0.37803715467453003, "learning_rate": 0.00013500375093773443, "loss": 0.5254, "step": 6503 }, { "epoch": 0.48224215911618595, "grad_norm": 0.36531275510787964, "learning_rate": 0.00013499374843710927, "loss": 0.5272, "step": 6504 }, { "epoch": 0.4823163045896048, "grad_norm": 0.3662126064300537, "learning_rate": 0.00013498374593648414, "loss": 0.518, "step": 6505 }, { "epoch": 0.48239045006302367, "grad_norm": 0.3664836287498474, "learning_rate": 0.00013497374343585897, "loss": 0.5769, "step": 6506 }, { "epoch": 0.4824645955364425, "grad_norm": 0.3798316717147827, "learning_rate": 0.0001349637409352338, "loss": 0.5598, "step": 6507 }, { "epoch": 0.4825387410098613, "grad_norm": 0.34824272990226746, "learning_rate": 0.00013495373843460865, "loss": 0.4931, "step": 6508 }, { "epoch": 0.4826128864832802, "grad_norm": 0.35383957624435425, "learning_rate": 0.0001349437359339835, "loss": 0.542, "step": 6509 }, { "epoch": 0.48268703195669904, "grad_norm": 0.37162303924560547, "learning_rate": 0.00013493373343335835, "loss": 0.5161, "step": 6510 }, { "epoch": 0.48276117743011787, "grad_norm": 0.3730892241001129, "learning_rate": 0.00013492373093273319, "loss": 0.5446, "step": 6511 }, { "epoch": 0.48283532290353676, "grad_norm": 0.3475194573402405, "learning_rate": 0.00013491372843210802, "loss": 0.4817, "step": 6512 }, { "epoch": 0.4829094683769556, "grad_norm": 0.3397110402584076, "learning_rate": 0.0001349037259314829, "loss": 0.4836, "step": 6513 }, { "epoch": 0.4829836138503744, "grad_norm": 0.3593451678752899, "learning_rate": 0.00013489372343085772, "loss": 0.5275, "step": 6514 }, { "epoch": 0.4830577593237933, "grad_norm": 0.375315397977829, "learning_rate": 0.00013488372093023256, "loss": 0.5466, "step": 6515 }, { "epoch": 0.48313190479721213, "grad_norm": 0.3346780836582184, "learning_rate": 0.0001348737184296074, "loss": 0.4928, "step": 6516 }, { "epoch": 0.48320605027063096, "grad_norm": 0.3741994798183441, "learning_rate": 0.00013486371592898226, "loss": 0.5474, "step": 6517 }, { "epoch": 0.48328019574404985, "grad_norm": 0.369707316160202, "learning_rate": 0.0001348537134283571, "loss": 0.5041, "step": 6518 }, { "epoch": 0.4833543412174687, "grad_norm": 0.3560985326766968, "learning_rate": 0.00013484371092773194, "loss": 0.5172, "step": 6519 }, { "epoch": 0.4834284866908875, "grad_norm": 0.38391950726509094, "learning_rate": 0.00013483370842710677, "loss": 0.5032, "step": 6520 }, { "epoch": 0.4835026321643064, "grad_norm": 0.3797623813152313, "learning_rate": 0.00013482370592648164, "loss": 0.526, "step": 6521 }, { "epoch": 0.4835767776377252, "grad_norm": 0.36588403582572937, "learning_rate": 0.00013481370342585647, "loss": 0.5542, "step": 6522 }, { "epoch": 0.48365092311114405, "grad_norm": 0.3415191173553467, "learning_rate": 0.0001348037009252313, "loss": 0.5146, "step": 6523 }, { "epoch": 0.48372506858456293, "grad_norm": 0.3494170606136322, "learning_rate": 0.00013479369842460618, "loss": 0.508, "step": 6524 }, { "epoch": 0.48379921405798176, "grad_norm": 0.32965776324272156, "learning_rate": 0.000134783695923981, "loss": 0.5096, "step": 6525 }, { "epoch": 0.4838733595314006, "grad_norm": 0.3466821014881134, "learning_rate": 0.00013477369342335585, "loss": 0.521, "step": 6526 }, { "epoch": 0.4839475050048195, "grad_norm": 0.3680642247200012, "learning_rate": 0.0001347636909227307, "loss": 0.5418, "step": 6527 }, { "epoch": 0.4840216504782383, "grad_norm": 0.38264816999435425, "learning_rate": 0.00013475368842210555, "loss": 0.5578, "step": 6528 }, { "epoch": 0.48409579595165714, "grad_norm": 0.36928626894950867, "learning_rate": 0.00013474368592148036, "loss": 0.5271, "step": 6529 }, { "epoch": 0.484169941425076, "grad_norm": 0.34704992175102234, "learning_rate": 0.00013473368342085523, "loss": 0.4843, "step": 6530 }, { "epoch": 0.48424408689849485, "grad_norm": 0.3698643743991852, "learning_rate": 0.00013472368092023006, "loss": 0.4934, "step": 6531 }, { "epoch": 0.4843182323719137, "grad_norm": 0.36810603737831116, "learning_rate": 0.00013471367841960493, "loss": 0.5672, "step": 6532 }, { "epoch": 0.48439237784533257, "grad_norm": 0.35485783219337463, "learning_rate": 0.00013470367591897974, "loss": 0.5156, "step": 6533 }, { "epoch": 0.4844665233187514, "grad_norm": 0.35255953669548035, "learning_rate": 0.0001346936734183546, "loss": 0.4885, "step": 6534 }, { "epoch": 0.4845406687921702, "grad_norm": 0.35776445269584656, "learning_rate": 0.00013468367091772944, "loss": 0.4798, "step": 6535 }, { "epoch": 0.4846148142655891, "grad_norm": 0.3546157479286194, "learning_rate": 0.0001346736684171043, "loss": 0.5191, "step": 6536 }, { "epoch": 0.48468895973900794, "grad_norm": 0.3543609380722046, "learning_rate": 0.0001346636659164791, "loss": 0.481, "step": 6537 }, { "epoch": 0.48476310521242677, "grad_norm": 0.3649398684501648, "learning_rate": 0.00013465366341585398, "loss": 0.4847, "step": 6538 }, { "epoch": 0.48483725068584566, "grad_norm": 0.3808706998825073, "learning_rate": 0.00013464366091522881, "loss": 0.5263, "step": 6539 }, { "epoch": 0.4849113961592645, "grad_norm": 0.35155487060546875, "learning_rate": 0.00013463365841460365, "loss": 0.4969, "step": 6540 }, { "epoch": 0.4849855416326833, "grad_norm": 0.35908928513526917, "learning_rate": 0.0001346236559139785, "loss": 0.5014, "step": 6541 }, { "epoch": 0.4850596871061022, "grad_norm": 0.3477247655391693, "learning_rate": 0.00013461365341335335, "loss": 0.4786, "step": 6542 }, { "epoch": 0.48513383257952103, "grad_norm": 0.3426162004470825, "learning_rate": 0.0001346036509127282, "loss": 0.4968, "step": 6543 }, { "epoch": 0.48520797805293986, "grad_norm": 0.3569897711277008, "learning_rate": 0.00013459364841210303, "loss": 0.5388, "step": 6544 }, { "epoch": 0.4852821235263587, "grad_norm": 0.34849151968955994, "learning_rate": 0.00013458364591147786, "loss": 0.4778, "step": 6545 }, { "epoch": 0.4853562689997776, "grad_norm": 0.34553462266921997, "learning_rate": 0.00013457364341085273, "loss": 0.5047, "step": 6546 }, { "epoch": 0.4854304144731964, "grad_norm": 0.3679822087287903, "learning_rate": 0.00013456364091022756, "loss": 0.5209, "step": 6547 }, { "epoch": 0.48550455994661523, "grad_norm": 0.3676431179046631, "learning_rate": 0.0001345536384096024, "loss": 0.5204, "step": 6548 }, { "epoch": 0.4855787054200341, "grad_norm": 0.33493325114250183, "learning_rate": 0.00013454363590897724, "loss": 0.4904, "step": 6549 }, { "epoch": 0.48565285089345295, "grad_norm": 0.3537653982639313, "learning_rate": 0.0001345336334083521, "loss": 0.4833, "step": 6550 }, { "epoch": 0.4857269963668718, "grad_norm": 0.3476586639881134, "learning_rate": 0.00013452363090772694, "loss": 0.4983, "step": 6551 }, { "epoch": 0.48580114184029066, "grad_norm": 0.36251217126846313, "learning_rate": 0.00013451362840710178, "loss": 0.5398, "step": 6552 }, { "epoch": 0.4858752873137095, "grad_norm": 0.36621448397636414, "learning_rate": 0.00013450362590647661, "loss": 0.5258, "step": 6553 }, { "epoch": 0.4859494327871283, "grad_norm": 0.34835416078567505, "learning_rate": 0.00013449362340585148, "loss": 0.5076, "step": 6554 }, { "epoch": 0.4860235782605472, "grad_norm": 0.3929285407066345, "learning_rate": 0.00013448362090522632, "loss": 0.5226, "step": 6555 }, { "epoch": 0.48609772373396604, "grad_norm": 0.3591032326221466, "learning_rate": 0.00013447361840460115, "loss": 0.5405, "step": 6556 }, { "epoch": 0.48617186920738487, "grad_norm": 0.3706973195075989, "learning_rate": 0.00013446361590397602, "loss": 0.5309, "step": 6557 }, { "epoch": 0.48624601468080375, "grad_norm": 0.33870649337768555, "learning_rate": 0.00013445361340335085, "loss": 0.494, "step": 6558 }, { "epoch": 0.4863201601542226, "grad_norm": 0.3525405526161194, "learning_rate": 0.0001344436109027257, "loss": 0.504, "step": 6559 }, { "epoch": 0.4863943056276414, "grad_norm": 0.3874291479587555, "learning_rate": 0.00013443360840210053, "loss": 0.4742, "step": 6560 }, { "epoch": 0.4864684511010603, "grad_norm": 0.3579494059085846, "learning_rate": 0.0001344236059014754, "loss": 0.4936, "step": 6561 }, { "epoch": 0.4865425965744791, "grad_norm": 0.31913939118385315, "learning_rate": 0.00013441360340085023, "loss": 0.4822, "step": 6562 }, { "epoch": 0.48661674204789795, "grad_norm": 0.3636752963066101, "learning_rate": 0.00013440360090022507, "loss": 0.5095, "step": 6563 }, { "epoch": 0.48669088752131684, "grad_norm": 0.36975008249282837, "learning_rate": 0.0001343935983995999, "loss": 0.5288, "step": 6564 }, { "epoch": 0.48676503299473567, "grad_norm": 0.3251638114452362, "learning_rate": 0.00013438359589897477, "loss": 0.4922, "step": 6565 }, { "epoch": 0.4868391784681545, "grad_norm": 0.34569621086120605, "learning_rate": 0.00013437359339834958, "loss": 0.4938, "step": 6566 }, { "epoch": 0.4869133239415734, "grad_norm": 0.3799072504043579, "learning_rate": 0.00013436359089772444, "loss": 0.5326, "step": 6567 }, { "epoch": 0.4869874694149922, "grad_norm": 0.3583623170852661, "learning_rate": 0.00013435358839709928, "loss": 0.5356, "step": 6568 }, { "epoch": 0.48706161488841104, "grad_norm": 0.3509497046470642, "learning_rate": 0.00013434358589647414, "loss": 0.491, "step": 6569 }, { "epoch": 0.48713576036182993, "grad_norm": 0.33864468336105347, "learning_rate": 0.00013433358339584895, "loss": 0.5253, "step": 6570 }, { "epoch": 0.48720990583524876, "grad_norm": 0.35015615820884705, "learning_rate": 0.00013432358089522382, "loss": 0.532, "step": 6571 }, { "epoch": 0.4872840513086676, "grad_norm": 0.3497140407562256, "learning_rate": 0.00013431357839459865, "loss": 0.5139, "step": 6572 }, { "epoch": 0.4873581967820865, "grad_norm": 0.3904334306716919, "learning_rate": 0.00013430357589397352, "loss": 0.5416, "step": 6573 }, { "epoch": 0.4874323422555053, "grad_norm": 0.37551453709602356, "learning_rate": 0.00013429357339334833, "loss": 0.5361, "step": 6574 }, { "epoch": 0.48750648772892413, "grad_norm": 0.3627299964427948, "learning_rate": 0.0001342835708927232, "loss": 0.4982, "step": 6575 }, { "epoch": 0.487580633202343, "grad_norm": 0.3391686975955963, "learning_rate": 0.00013427356839209803, "loss": 0.4955, "step": 6576 }, { "epoch": 0.48765477867576185, "grad_norm": 0.3678604066371918, "learning_rate": 0.00013426356589147287, "loss": 0.5312, "step": 6577 }, { "epoch": 0.4877289241491807, "grad_norm": 0.33793947100639343, "learning_rate": 0.0001342535633908477, "loss": 0.5171, "step": 6578 }, { "epoch": 0.48780306962259956, "grad_norm": 0.3494738042354584, "learning_rate": 0.00013424356089022257, "loss": 0.5451, "step": 6579 }, { "epoch": 0.4878772150960184, "grad_norm": 0.35876718163490295, "learning_rate": 0.0001342335583895974, "loss": 0.5397, "step": 6580 }, { "epoch": 0.4879513605694372, "grad_norm": 0.36914491653442383, "learning_rate": 0.00013422355588897224, "loss": 0.5578, "step": 6581 }, { "epoch": 0.4880255060428561, "grad_norm": 0.3629806339740753, "learning_rate": 0.00013421355338834708, "loss": 0.5239, "step": 6582 }, { "epoch": 0.48809965151627493, "grad_norm": 0.3532676696777344, "learning_rate": 0.00013420355088772194, "loss": 0.495, "step": 6583 }, { "epoch": 0.48817379698969376, "grad_norm": 0.35469114780426025, "learning_rate": 0.00013419354838709678, "loss": 0.5346, "step": 6584 }, { "epoch": 0.48824794246311265, "grad_norm": 0.35434314608573914, "learning_rate": 0.00013418354588647162, "loss": 0.5182, "step": 6585 }, { "epoch": 0.4883220879365315, "grad_norm": 0.3592025935649872, "learning_rate": 0.00013417354338584645, "loss": 0.5459, "step": 6586 }, { "epoch": 0.4883962334099503, "grad_norm": 0.35740193724632263, "learning_rate": 0.00013416354088522132, "loss": 0.4953, "step": 6587 }, { "epoch": 0.4884703788833692, "grad_norm": 0.35949021577835083, "learning_rate": 0.00013415353838459616, "loss": 0.4826, "step": 6588 }, { "epoch": 0.488544524356788, "grad_norm": 0.341937780380249, "learning_rate": 0.000134143535883971, "loss": 0.4852, "step": 6589 }, { "epoch": 0.48861866983020685, "grad_norm": 0.3472062051296234, "learning_rate": 0.00013413353338334586, "loss": 0.4834, "step": 6590 }, { "epoch": 0.48869281530362574, "grad_norm": 0.3775700330734253, "learning_rate": 0.0001341235308827207, "loss": 0.5193, "step": 6591 }, { "epoch": 0.48876696077704457, "grad_norm": 0.36333951354026794, "learning_rate": 0.00013411352838209553, "loss": 0.5016, "step": 6592 }, { "epoch": 0.4888411062504634, "grad_norm": 0.38447055220603943, "learning_rate": 0.00013410352588147037, "loss": 0.5562, "step": 6593 }, { "epoch": 0.4889152517238823, "grad_norm": 0.3270704746246338, "learning_rate": 0.00013409352338084523, "loss": 0.4541, "step": 6594 }, { "epoch": 0.4889893971973011, "grad_norm": 0.36160483956336975, "learning_rate": 0.00013408352088022007, "loss": 0.5183, "step": 6595 }, { "epoch": 0.48906354267071994, "grad_norm": 0.3803979158401489, "learning_rate": 0.0001340735183795949, "loss": 0.5516, "step": 6596 }, { "epoch": 0.4891376881441388, "grad_norm": 0.352016806602478, "learning_rate": 0.00013406351587896974, "loss": 0.5265, "step": 6597 }, { "epoch": 0.48921183361755766, "grad_norm": 0.3550780415534973, "learning_rate": 0.0001340535133783446, "loss": 0.5041, "step": 6598 }, { "epoch": 0.4892859790909765, "grad_norm": 0.3756774365901947, "learning_rate": 0.00013404351087771945, "loss": 0.538, "step": 6599 }, { "epoch": 0.48936012456439537, "grad_norm": 0.36558935046195984, "learning_rate": 0.00013403350837709428, "loss": 0.526, "step": 6600 }, { "epoch": 0.4894342700378142, "grad_norm": 0.34883174300193787, "learning_rate": 0.00013402350587646912, "loss": 0.4982, "step": 6601 }, { "epoch": 0.48950841551123303, "grad_norm": 0.3611617386341095, "learning_rate": 0.00013401350337584398, "loss": 0.5015, "step": 6602 }, { "epoch": 0.48958256098465186, "grad_norm": 0.3553368151187897, "learning_rate": 0.0001340035008752188, "loss": 0.4741, "step": 6603 }, { "epoch": 0.48965670645807075, "grad_norm": 0.3555423617362976, "learning_rate": 0.00013399349837459366, "loss": 0.4974, "step": 6604 }, { "epoch": 0.4897308519314896, "grad_norm": 0.3606511056423187, "learning_rate": 0.0001339834958739685, "loss": 0.5039, "step": 6605 }, { "epoch": 0.4898049974049084, "grad_norm": 0.35381051898002625, "learning_rate": 0.00013397349337334336, "loss": 0.5151, "step": 6606 }, { "epoch": 0.4898791428783273, "grad_norm": 0.3602343499660492, "learning_rate": 0.00013396349087271817, "loss": 0.4841, "step": 6607 }, { "epoch": 0.4899532883517461, "grad_norm": 0.35090547800064087, "learning_rate": 0.00013395348837209303, "loss": 0.4979, "step": 6608 }, { "epoch": 0.49002743382516495, "grad_norm": 0.3581750690937042, "learning_rate": 0.00013394348587146787, "loss": 0.5031, "step": 6609 }, { "epoch": 0.49010157929858383, "grad_norm": 0.3821916878223419, "learning_rate": 0.00013393348337084273, "loss": 0.4863, "step": 6610 }, { "epoch": 0.49017572477200266, "grad_norm": 0.3762820363044739, "learning_rate": 0.00013392348087021754, "loss": 0.5573, "step": 6611 }, { "epoch": 0.4902498702454215, "grad_norm": 0.3652706444263458, "learning_rate": 0.0001339134783695924, "loss": 0.5233, "step": 6612 }, { "epoch": 0.4903240157188404, "grad_norm": 0.39078354835510254, "learning_rate": 0.00013390347586896725, "loss": 0.5286, "step": 6613 }, { "epoch": 0.4903981611922592, "grad_norm": 0.3558860719203949, "learning_rate": 0.00013389347336834208, "loss": 0.5047, "step": 6614 }, { "epoch": 0.49047230666567804, "grad_norm": 0.3493507504463196, "learning_rate": 0.00013388347086771692, "loss": 0.4909, "step": 6615 }, { "epoch": 0.4905464521390969, "grad_norm": 0.34185224771499634, "learning_rate": 0.00013387346836709178, "loss": 0.4826, "step": 6616 }, { "epoch": 0.49062059761251575, "grad_norm": 0.3868551254272461, "learning_rate": 0.00013386346586646662, "loss": 0.5278, "step": 6617 }, { "epoch": 0.4906947430859346, "grad_norm": 0.35697540640830994, "learning_rate": 0.00013385346336584146, "loss": 0.4807, "step": 6618 }, { "epoch": 0.49076888855935347, "grad_norm": 0.38900020718574524, "learning_rate": 0.0001338434608652163, "loss": 0.5543, "step": 6619 }, { "epoch": 0.4908430340327723, "grad_norm": 0.3465578854084015, "learning_rate": 0.00013383345836459116, "loss": 0.5025, "step": 6620 }, { "epoch": 0.4909171795061911, "grad_norm": 0.3483496308326721, "learning_rate": 0.000133823455863966, "loss": 0.4834, "step": 6621 }, { "epoch": 0.49099132497961, "grad_norm": 0.35824841260910034, "learning_rate": 0.00013381345336334083, "loss": 0.486, "step": 6622 }, { "epoch": 0.49106547045302884, "grad_norm": 0.3213825225830078, "learning_rate": 0.00013380345086271567, "loss": 0.4692, "step": 6623 }, { "epoch": 0.49113961592644767, "grad_norm": 0.35249170660972595, "learning_rate": 0.00013379344836209054, "loss": 0.4858, "step": 6624 }, { "epoch": 0.49121376139986656, "grad_norm": 0.3498983085155487, "learning_rate": 0.00013378344586146537, "loss": 0.5181, "step": 6625 }, { "epoch": 0.4912879068732854, "grad_norm": 0.38347429037094116, "learning_rate": 0.0001337734433608402, "loss": 0.5269, "step": 6626 }, { "epoch": 0.4913620523467042, "grad_norm": 0.3635953664779663, "learning_rate": 0.00013376344086021507, "loss": 0.5379, "step": 6627 }, { "epoch": 0.4914361978201231, "grad_norm": 0.34759604930877686, "learning_rate": 0.0001337534383595899, "loss": 0.4583, "step": 6628 }, { "epoch": 0.49151034329354193, "grad_norm": 0.36018893122673035, "learning_rate": 0.00013374343585896475, "loss": 0.509, "step": 6629 }, { "epoch": 0.49158448876696076, "grad_norm": 0.35684823989868164, "learning_rate": 0.00013373343335833959, "loss": 0.5229, "step": 6630 }, { "epoch": 0.49165863424037964, "grad_norm": 0.3561597466468811, "learning_rate": 0.00013372343085771445, "loss": 0.5241, "step": 6631 }, { "epoch": 0.4917327797137985, "grad_norm": 0.3633691072463989, "learning_rate": 0.00013371342835708929, "loss": 0.5259, "step": 6632 }, { "epoch": 0.4918069251872173, "grad_norm": 0.3885546326637268, "learning_rate": 0.00013370342585646412, "loss": 0.5041, "step": 6633 }, { "epoch": 0.4918810706606362, "grad_norm": 0.36273476481437683, "learning_rate": 0.00013369342335583896, "loss": 0.533, "step": 6634 }, { "epoch": 0.491955216134055, "grad_norm": 0.3641825318336487, "learning_rate": 0.00013368342085521382, "loss": 0.4896, "step": 6635 }, { "epoch": 0.49202936160747385, "grad_norm": 0.3367290794849396, "learning_rate": 0.00013367341835458866, "loss": 0.4903, "step": 6636 }, { "epoch": 0.49210350708089273, "grad_norm": 0.3343888223171234, "learning_rate": 0.0001336634158539635, "loss": 0.486, "step": 6637 }, { "epoch": 0.49217765255431156, "grad_norm": 0.3857872188091278, "learning_rate": 0.00013365341335333834, "loss": 0.5638, "step": 6638 }, { "epoch": 0.4922517980277304, "grad_norm": 0.37165558338165283, "learning_rate": 0.0001336434108527132, "loss": 0.5526, "step": 6639 }, { "epoch": 0.4923259435011493, "grad_norm": 0.3750844895839691, "learning_rate": 0.000133633408352088, "loss": 0.5323, "step": 6640 }, { "epoch": 0.4924000889745681, "grad_norm": 0.3702077269554138, "learning_rate": 0.00013362340585146287, "loss": 0.5622, "step": 6641 }, { "epoch": 0.49247423444798694, "grad_norm": 0.3546149432659149, "learning_rate": 0.0001336134033508377, "loss": 0.5015, "step": 6642 }, { "epoch": 0.4925483799214058, "grad_norm": 0.3688663840293884, "learning_rate": 0.00013360340085021258, "loss": 0.5688, "step": 6643 }, { "epoch": 0.49262252539482465, "grad_norm": 0.34995976090431213, "learning_rate": 0.00013359339834958739, "loss": 0.5147, "step": 6644 }, { "epoch": 0.4926966708682435, "grad_norm": 0.35416027903556824, "learning_rate": 0.00013358339584896225, "loss": 0.5286, "step": 6645 }, { "epoch": 0.49277081634166237, "grad_norm": 0.35788026452064514, "learning_rate": 0.0001335733933483371, "loss": 0.5193, "step": 6646 }, { "epoch": 0.4928449618150812, "grad_norm": 0.37363937497138977, "learning_rate": 0.00013356339084771195, "loss": 0.4851, "step": 6647 }, { "epoch": 0.4929191072885, "grad_norm": 0.3734034597873688, "learning_rate": 0.00013355338834708676, "loss": 0.5169, "step": 6648 }, { "epoch": 0.4929932527619189, "grad_norm": 0.3541716933250427, "learning_rate": 0.00013354338584646163, "loss": 0.4734, "step": 6649 }, { "epoch": 0.49306739823533774, "grad_norm": 0.32764920592308044, "learning_rate": 0.00013353338334583646, "loss": 0.4845, "step": 6650 }, { "epoch": 0.49314154370875657, "grad_norm": 0.3480457663536072, "learning_rate": 0.0001335233808452113, "loss": 0.5089, "step": 6651 }, { "epoch": 0.49321568918217545, "grad_norm": 0.3574621379375458, "learning_rate": 0.00013351337834458614, "loss": 0.496, "step": 6652 }, { "epoch": 0.4932898346555943, "grad_norm": 0.3678317070007324, "learning_rate": 0.000133503375843961, "loss": 0.5487, "step": 6653 }, { "epoch": 0.4933639801290131, "grad_norm": 0.3875795006752014, "learning_rate": 0.00013349337334333584, "loss": 0.5275, "step": 6654 }, { "epoch": 0.493438125602432, "grad_norm": 0.36034998297691345, "learning_rate": 0.00013348337084271067, "loss": 0.5127, "step": 6655 }, { "epoch": 0.49351227107585083, "grad_norm": 0.39600077271461487, "learning_rate": 0.0001334733683420855, "loss": 0.4868, "step": 6656 }, { "epoch": 0.49358641654926966, "grad_norm": 0.35355257987976074, "learning_rate": 0.00013346336584146038, "loss": 0.4901, "step": 6657 }, { "epoch": 0.4936605620226885, "grad_norm": 0.38552969694137573, "learning_rate": 0.0001334533633408352, "loss": 0.5313, "step": 6658 }, { "epoch": 0.4937347074961074, "grad_norm": 0.3620304763317108, "learning_rate": 0.00013344336084021005, "loss": 0.4874, "step": 6659 }, { "epoch": 0.4938088529695262, "grad_norm": 0.36373478174209595, "learning_rate": 0.00013343335833958491, "loss": 0.4841, "step": 6660 }, { "epoch": 0.49388299844294503, "grad_norm": 0.3740766942501068, "learning_rate": 0.00013342335583895975, "loss": 0.4999, "step": 6661 }, { "epoch": 0.4939571439163639, "grad_norm": 0.3844864070415497, "learning_rate": 0.0001334133533383346, "loss": 0.5349, "step": 6662 }, { "epoch": 0.49403128938978275, "grad_norm": 0.36902710795402527, "learning_rate": 0.00013340335083770943, "loss": 0.5176, "step": 6663 }, { "epoch": 0.4941054348632016, "grad_norm": 0.35531488060951233, "learning_rate": 0.0001333933483370843, "loss": 0.5088, "step": 6664 }, { "epoch": 0.49417958033662046, "grad_norm": 0.36711299419403076, "learning_rate": 0.00013338334583645913, "loss": 0.5191, "step": 6665 }, { "epoch": 0.4942537258100393, "grad_norm": 0.3697594702243805, "learning_rate": 0.00013337334333583396, "loss": 0.5378, "step": 6666 }, { "epoch": 0.4943278712834581, "grad_norm": 0.3483433425426483, "learning_rate": 0.0001333633408352088, "loss": 0.4997, "step": 6667 }, { "epoch": 0.494402016756877, "grad_norm": 0.40442273020744324, "learning_rate": 0.00013335333833458367, "loss": 0.5857, "step": 6668 }, { "epoch": 0.49447616223029583, "grad_norm": 0.36732184886932373, "learning_rate": 0.0001333433358339585, "loss": 0.4944, "step": 6669 }, { "epoch": 0.49455030770371466, "grad_norm": 0.3561113476753235, "learning_rate": 0.00013333333333333334, "loss": 0.4965, "step": 6670 }, { "epoch": 0.49462445317713355, "grad_norm": 0.37172698974609375, "learning_rate": 0.00013332333083270818, "loss": 0.5559, "step": 6671 }, { "epoch": 0.4946985986505524, "grad_norm": 0.3618784546852112, "learning_rate": 0.00013331332833208304, "loss": 0.4958, "step": 6672 }, { "epoch": 0.4947727441239712, "grad_norm": 0.3780965507030487, "learning_rate": 0.00013330332583145788, "loss": 0.5097, "step": 6673 }, { "epoch": 0.4948468895973901, "grad_norm": 0.36212751269340515, "learning_rate": 0.00013329332333083272, "loss": 0.5519, "step": 6674 }, { "epoch": 0.4949210350708089, "grad_norm": 0.37577250599861145, "learning_rate": 0.00013328332083020755, "loss": 0.5278, "step": 6675 }, { "epoch": 0.49499518054422775, "grad_norm": 0.3794649541378021, "learning_rate": 0.00013327331832958242, "loss": 0.5714, "step": 6676 }, { "epoch": 0.49506932601764664, "grad_norm": 0.33787909150123596, "learning_rate": 0.00013326331582895723, "loss": 0.5295, "step": 6677 }, { "epoch": 0.49514347149106547, "grad_norm": 0.34029993414878845, "learning_rate": 0.0001332533133283321, "loss": 0.4649, "step": 6678 }, { "epoch": 0.4952176169644843, "grad_norm": 0.36064180731773376, "learning_rate": 0.00013324331082770693, "loss": 0.5481, "step": 6679 }, { "epoch": 0.4952917624379032, "grad_norm": 0.36594823002815247, "learning_rate": 0.0001332333083270818, "loss": 0.5113, "step": 6680 }, { "epoch": 0.495365907911322, "grad_norm": 0.3661801815032959, "learning_rate": 0.0001332233058264566, "loss": 0.5674, "step": 6681 }, { "epoch": 0.49544005338474084, "grad_norm": 0.3591672480106354, "learning_rate": 0.00013321330332583147, "loss": 0.4997, "step": 6682 }, { "epoch": 0.4955141988581597, "grad_norm": 0.35681989789009094, "learning_rate": 0.0001332033008252063, "loss": 0.5285, "step": 6683 }, { "epoch": 0.49558834433157856, "grad_norm": 0.3506720960140228, "learning_rate": 0.00013319329832458117, "loss": 0.4939, "step": 6684 }, { "epoch": 0.4956624898049974, "grad_norm": 0.35322239995002747, "learning_rate": 0.00013318329582395598, "loss": 0.4969, "step": 6685 }, { "epoch": 0.49573663527841627, "grad_norm": 0.32887670397758484, "learning_rate": 0.00013317329332333084, "loss": 0.4614, "step": 6686 }, { "epoch": 0.4958107807518351, "grad_norm": 0.35784825682640076, "learning_rate": 0.00013316329082270568, "loss": 0.4898, "step": 6687 }, { "epoch": 0.49588492622525393, "grad_norm": 0.3477022051811218, "learning_rate": 0.00013315328832208052, "loss": 0.5068, "step": 6688 }, { "epoch": 0.4959590716986728, "grad_norm": 0.3396294414997101, "learning_rate": 0.00013314328582145535, "loss": 0.4628, "step": 6689 }, { "epoch": 0.49603321717209164, "grad_norm": 0.3629264235496521, "learning_rate": 0.00013313328332083022, "loss": 0.5197, "step": 6690 }, { "epoch": 0.4961073626455105, "grad_norm": 0.3707168400287628, "learning_rate": 0.00013312328082020505, "loss": 0.5219, "step": 6691 }, { "epoch": 0.49618150811892936, "grad_norm": 0.39749863743782043, "learning_rate": 0.0001331132783195799, "loss": 0.5285, "step": 6692 }, { "epoch": 0.4962556535923482, "grad_norm": 0.3816176950931549, "learning_rate": 0.00013310327581895476, "loss": 0.5403, "step": 6693 }, { "epoch": 0.496329799065767, "grad_norm": 0.35964909195899963, "learning_rate": 0.0001330932733183296, "loss": 0.5342, "step": 6694 }, { "epoch": 0.4964039445391859, "grad_norm": 0.36839839816093445, "learning_rate": 0.00013308327081770443, "loss": 0.5171, "step": 6695 }, { "epoch": 0.49647809001260473, "grad_norm": 0.35360971093177795, "learning_rate": 0.00013307326831707927, "loss": 0.524, "step": 6696 }, { "epoch": 0.49655223548602356, "grad_norm": 0.37522029876708984, "learning_rate": 0.00013306326581645413, "loss": 0.5044, "step": 6697 }, { "epoch": 0.49662638095944245, "grad_norm": 0.3833596706390381, "learning_rate": 0.00013305326331582897, "loss": 0.5333, "step": 6698 }, { "epoch": 0.4967005264328613, "grad_norm": 0.3627595007419586, "learning_rate": 0.0001330432608152038, "loss": 0.5108, "step": 6699 }, { "epoch": 0.4967746719062801, "grad_norm": 0.3726494610309601, "learning_rate": 0.00013303325831457864, "loss": 0.5327, "step": 6700 }, { "epoch": 0.496848817379699, "grad_norm": 0.3736337125301361, "learning_rate": 0.0001330232558139535, "loss": 0.5155, "step": 6701 }, { "epoch": 0.4969229628531178, "grad_norm": 0.3657910227775574, "learning_rate": 0.00013301325331332834, "loss": 0.5046, "step": 6702 }, { "epoch": 0.49699710832653665, "grad_norm": 0.3604627549648285, "learning_rate": 0.00013300325081270318, "loss": 0.533, "step": 6703 }, { "epoch": 0.49707125379995554, "grad_norm": 0.3710744082927704, "learning_rate": 0.00013299324831207802, "loss": 0.5258, "step": 6704 }, { "epoch": 0.49714539927337437, "grad_norm": 0.3785456418991089, "learning_rate": 0.00013298324581145288, "loss": 0.5247, "step": 6705 }, { "epoch": 0.4972195447467932, "grad_norm": 0.35101109743118286, "learning_rate": 0.00013297324331082772, "loss": 0.4944, "step": 6706 }, { "epoch": 0.4972936902202121, "grad_norm": 0.3392941653728485, "learning_rate": 0.00013296324081020256, "loss": 0.4952, "step": 6707 }, { "epoch": 0.4973678356936309, "grad_norm": 0.3842884600162506, "learning_rate": 0.0001329532383095774, "loss": 0.5808, "step": 6708 }, { "epoch": 0.49744198116704974, "grad_norm": 0.34643349051475525, "learning_rate": 0.00013294323580895226, "loss": 0.501, "step": 6709 }, { "epoch": 0.4975161266404686, "grad_norm": 0.37433162331581116, "learning_rate": 0.0001329332333083271, "loss": 0.5046, "step": 6710 }, { "epoch": 0.49759027211388746, "grad_norm": 0.3319459855556488, "learning_rate": 0.00013292323080770193, "loss": 0.4741, "step": 6711 }, { "epoch": 0.4976644175873063, "grad_norm": 0.37827441096305847, "learning_rate": 0.00013291322830707677, "loss": 0.5575, "step": 6712 }, { "epoch": 0.49773856306072517, "grad_norm": 0.3720438480377197, "learning_rate": 0.00013290322580645163, "loss": 0.5279, "step": 6713 }, { "epoch": 0.497812708534144, "grad_norm": 0.38210222125053406, "learning_rate": 0.00013289322330582644, "loss": 0.501, "step": 6714 }, { "epoch": 0.49788685400756283, "grad_norm": 0.34760215878486633, "learning_rate": 0.0001328832208052013, "loss": 0.4935, "step": 6715 }, { "epoch": 0.49796099948098166, "grad_norm": 0.34404873847961426, "learning_rate": 0.00013287321830457614, "loss": 0.4893, "step": 6716 }, { "epoch": 0.49803514495440054, "grad_norm": 0.338972806930542, "learning_rate": 0.000132863215803951, "loss": 0.4632, "step": 6717 }, { "epoch": 0.4981092904278194, "grad_norm": 0.3460709750652313, "learning_rate": 0.00013285321330332582, "loss": 0.4915, "step": 6718 }, { "epoch": 0.4981834359012382, "grad_norm": 0.33115673065185547, "learning_rate": 0.00013284321080270068, "loss": 0.5044, "step": 6719 }, { "epoch": 0.4982575813746571, "grad_norm": 0.3564261496067047, "learning_rate": 0.00013283320830207552, "loss": 0.5332, "step": 6720 }, { "epoch": 0.4983317268480759, "grad_norm": 0.35458633303642273, "learning_rate": 0.00013282320580145038, "loss": 0.4945, "step": 6721 }, { "epoch": 0.49840587232149475, "grad_norm": 0.3894752264022827, "learning_rate": 0.0001328132033008252, "loss": 0.5534, "step": 6722 }, { "epoch": 0.49848001779491363, "grad_norm": 0.361113041639328, "learning_rate": 0.00013280320080020006, "loss": 0.5195, "step": 6723 }, { "epoch": 0.49855416326833246, "grad_norm": 0.3715810775756836, "learning_rate": 0.0001327931982995749, "loss": 0.5305, "step": 6724 }, { "epoch": 0.4986283087417513, "grad_norm": 0.3596873879432678, "learning_rate": 0.00013278319579894973, "loss": 0.4815, "step": 6725 }, { "epoch": 0.4987024542151702, "grad_norm": 0.33243614435195923, "learning_rate": 0.0001327731932983246, "loss": 0.4625, "step": 6726 }, { "epoch": 0.498776599688589, "grad_norm": 0.3582807183265686, "learning_rate": 0.00013276319079769943, "loss": 0.5406, "step": 6727 }, { "epoch": 0.49885074516200784, "grad_norm": 0.349693238735199, "learning_rate": 0.00013275318829707427, "loss": 0.4833, "step": 6728 }, { "epoch": 0.4989248906354267, "grad_norm": 0.36013269424438477, "learning_rate": 0.0001327431857964491, "loss": 0.5233, "step": 6729 }, { "epoch": 0.49899903610884555, "grad_norm": 0.43895378708839417, "learning_rate": 0.00013273318329582397, "loss": 0.5589, "step": 6730 }, { "epoch": 0.4990731815822644, "grad_norm": 0.3607907295227051, "learning_rate": 0.0001327231807951988, "loss": 0.5265, "step": 6731 }, { "epoch": 0.49914732705568327, "grad_norm": 0.3545684218406677, "learning_rate": 0.00013271317829457365, "loss": 0.5015, "step": 6732 }, { "epoch": 0.4992214725291021, "grad_norm": 0.35451483726501465, "learning_rate": 0.00013270317579394848, "loss": 0.5344, "step": 6733 }, { "epoch": 0.4992956180025209, "grad_norm": 0.3563019335269928, "learning_rate": 0.00013269317329332335, "loss": 0.5102, "step": 6734 }, { "epoch": 0.4993697634759398, "grad_norm": 0.37882477045059204, "learning_rate": 0.00013268317079269818, "loss": 0.5251, "step": 6735 }, { "epoch": 0.49944390894935864, "grad_norm": 0.3642534613609314, "learning_rate": 0.00013267316829207305, "loss": 0.4973, "step": 6736 }, { "epoch": 0.49951805442277747, "grad_norm": 0.36589929461479187, "learning_rate": 0.00013266316579144786, "loss": 0.5215, "step": 6737 }, { "epoch": 0.49959219989619635, "grad_norm": 0.3663289248943329, "learning_rate": 0.00013265316329082272, "loss": 0.5141, "step": 6738 }, { "epoch": 0.4996663453696152, "grad_norm": 0.34566614031791687, "learning_rate": 0.00013264316079019756, "loss": 0.5022, "step": 6739 }, { "epoch": 0.499740490843034, "grad_norm": 0.38690701127052307, "learning_rate": 0.0001326331582895724, "loss": 0.5235, "step": 6740 }, { "epoch": 0.4998146363164529, "grad_norm": 0.3716033101081848, "learning_rate": 0.00013262315578894723, "loss": 0.5309, "step": 6741 }, { "epoch": 0.49988878178987173, "grad_norm": 0.3655371069908142, "learning_rate": 0.0001326131532883221, "loss": 0.5141, "step": 6742 }, { "epoch": 0.49996292726329056, "grad_norm": 0.37975725531578064, "learning_rate": 0.00013260315078769694, "loss": 0.538, "step": 6743 }, { "epoch": 0.5000370727367094, "grad_norm": 0.37531033158302307, "learning_rate": 0.00013259314828707177, "loss": 0.5647, "step": 6744 }, { "epoch": 0.5001112182101283, "grad_norm": 0.37478742003440857, "learning_rate": 0.0001325831457864466, "loss": 0.5551, "step": 6745 }, { "epoch": 0.5001853636835472, "grad_norm": 0.36375245451927185, "learning_rate": 0.00013257314328582147, "loss": 0.5169, "step": 6746 }, { "epoch": 0.5002595091569659, "grad_norm": 0.3611370623111725, "learning_rate": 0.0001325631407851963, "loss": 0.5022, "step": 6747 }, { "epoch": 0.5003336546303848, "grad_norm": 0.3741641938686371, "learning_rate": 0.00013255313828457115, "loss": 0.459, "step": 6748 }, { "epoch": 0.5004078001038037, "grad_norm": 0.3588627278804779, "learning_rate": 0.00013254313578394598, "loss": 0.5266, "step": 6749 }, { "epoch": 0.5004819455772225, "grad_norm": 0.3794173002243042, "learning_rate": 0.00013253313328332085, "loss": 0.5375, "step": 6750 }, { "epoch": 0.5005560910506414, "grad_norm": 0.3434228301048279, "learning_rate": 0.00013252313078269566, "loss": 0.5252, "step": 6751 }, { "epoch": 0.5006302365240602, "grad_norm": 0.3537376821041107, "learning_rate": 0.00013251312828207052, "loss": 0.5082, "step": 6752 }, { "epoch": 0.500704381997479, "grad_norm": 0.34972333908081055, "learning_rate": 0.00013250312578144536, "loss": 0.4656, "step": 6753 }, { "epoch": 0.5007785274708979, "grad_norm": 0.3414440453052521, "learning_rate": 0.00013249312328082022, "loss": 0.5173, "step": 6754 }, { "epoch": 0.5008526729443168, "grad_norm": 0.35314613580703735, "learning_rate": 0.00013248312078019503, "loss": 0.5221, "step": 6755 }, { "epoch": 0.5009268184177356, "grad_norm": 0.3764379620552063, "learning_rate": 0.0001324731182795699, "loss": 0.5229, "step": 6756 }, { "epoch": 0.5010009638911544, "grad_norm": 0.35994696617126465, "learning_rate": 0.00013246311577894474, "loss": 0.5373, "step": 6757 }, { "epoch": 0.5010751093645733, "grad_norm": 0.3433174788951874, "learning_rate": 0.0001324531132783196, "loss": 0.5095, "step": 6758 }, { "epoch": 0.5011492548379921, "grad_norm": 0.3927929997444153, "learning_rate": 0.00013244311077769444, "loss": 0.5993, "step": 6759 }, { "epoch": 0.501223400311411, "grad_norm": 0.3585349917411804, "learning_rate": 0.00013243310827706927, "loss": 0.518, "step": 6760 }, { "epoch": 0.5012975457848299, "grad_norm": 0.37163642048835754, "learning_rate": 0.0001324231057764441, "loss": 0.5377, "step": 6761 }, { "epoch": 0.5013716912582487, "grad_norm": 0.365878701210022, "learning_rate": 0.00013241310327581895, "loss": 0.5471, "step": 6762 }, { "epoch": 0.5014458367316675, "grad_norm": 0.34371253848075867, "learning_rate": 0.0001324031007751938, "loss": 0.5375, "step": 6763 }, { "epoch": 0.5015199822050864, "grad_norm": 0.36039531230926514, "learning_rate": 0.00013239309827456865, "loss": 0.528, "step": 6764 }, { "epoch": 0.5015941276785052, "grad_norm": 0.3570438325405121, "learning_rate": 0.0001323830957739435, "loss": 0.4729, "step": 6765 }, { "epoch": 0.5016682731519241, "grad_norm": 0.359424889087677, "learning_rate": 0.00013237309327331832, "loss": 0.5228, "step": 6766 }, { "epoch": 0.501742418625343, "grad_norm": 0.3415399491786957, "learning_rate": 0.0001323630907726932, "loss": 0.5211, "step": 6767 }, { "epoch": 0.5018165640987617, "grad_norm": 0.3674754798412323, "learning_rate": 0.00013235308827206803, "loss": 0.5444, "step": 6768 }, { "epoch": 0.5018907095721806, "grad_norm": 0.35361725091934204, "learning_rate": 0.0001323430857714429, "loss": 0.5196, "step": 6769 }, { "epoch": 0.5019648550455995, "grad_norm": 0.35148146748542786, "learning_rate": 0.0001323330832708177, "loss": 0.4781, "step": 6770 }, { "epoch": 0.5020390005190183, "grad_norm": 0.37113383412361145, "learning_rate": 0.00013232308077019256, "loss": 0.5461, "step": 6771 }, { "epoch": 0.5021131459924372, "grad_norm": 0.3563367426395416, "learning_rate": 0.0001323130782695674, "loss": 0.5063, "step": 6772 }, { "epoch": 0.5021872914658561, "grad_norm": 0.33977779746055603, "learning_rate": 0.00013230307576894226, "loss": 0.4715, "step": 6773 }, { "epoch": 0.5022614369392748, "grad_norm": 0.3440571427345276, "learning_rate": 0.00013229307326831707, "loss": 0.4668, "step": 6774 }, { "epoch": 0.5023355824126937, "grad_norm": 0.3574184477329254, "learning_rate": 0.00013228307076769194, "loss": 0.5089, "step": 6775 }, { "epoch": 0.5024097278861126, "grad_norm": 0.3660424053668976, "learning_rate": 0.00013227306826706678, "loss": 0.5587, "step": 6776 }, { "epoch": 0.5024838733595314, "grad_norm": 0.38025981187820435, "learning_rate": 0.0001322630657664416, "loss": 0.5421, "step": 6777 }, { "epoch": 0.5025580188329503, "grad_norm": 0.3550576865673065, "learning_rate": 0.00013225306326581645, "loss": 0.4791, "step": 6778 }, { "epoch": 0.5026321643063691, "grad_norm": 0.35454609990119934, "learning_rate": 0.00013224306076519131, "loss": 0.4784, "step": 6779 }, { "epoch": 0.5027063097797879, "grad_norm": 0.3927958607673645, "learning_rate": 0.00013223305826456615, "loss": 0.5511, "step": 6780 }, { "epoch": 0.5027804552532068, "grad_norm": 0.3653404712677002, "learning_rate": 0.000132223055763941, "loss": 0.5347, "step": 6781 }, { "epoch": 0.5028546007266257, "grad_norm": 0.3934316635131836, "learning_rate": 0.00013221305326331583, "loss": 0.5282, "step": 6782 }, { "epoch": 0.5029287462000445, "grad_norm": 0.35973668098449707, "learning_rate": 0.0001322030507626907, "loss": 0.5081, "step": 6783 }, { "epoch": 0.5030028916734633, "grad_norm": 0.3801019489765167, "learning_rate": 0.00013219304826206553, "loss": 0.5366, "step": 6784 }, { "epoch": 0.5030770371468822, "grad_norm": 0.37192898988723755, "learning_rate": 0.00013218304576144036, "loss": 0.5246, "step": 6785 }, { "epoch": 0.503151182620301, "grad_norm": 0.3585677146911621, "learning_rate": 0.0001321730432608152, "loss": 0.5247, "step": 6786 }, { "epoch": 0.5032253280937199, "grad_norm": 0.35056376457214355, "learning_rate": 0.00013216304076019007, "loss": 0.5067, "step": 6787 }, { "epoch": 0.5032994735671388, "grad_norm": 0.3456207513809204, "learning_rate": 0.00013215303825956488, "loss": 0.4972, "step": 6788 }, { "epoch": 0.5033736190405576, "grad_norm": 0.3801870346069336, "learning_rate": 0.00013214303575893974, "loss": 0.5602, "step": 6789 }, { "epoch": 0.5034477645139764, "grad_norm": 0.34586724638938904, "learning_rate": 0.00013213303325831458, "loss": 0.4862, "step": 6790 }, { "epoch": 0.5035219099873953, "grad_norm": 0.36527934670448303, "learning_rate": 0.00013212303075768944, "loss": 0.5456, "step": 6791 }, { "epoch": 0.5035960554608141, "grad_norm": 0.3491443693637848, "learning_rate": 0.00013211302825706425, "loss": 0.5075, "step": 6792 }, { "epoch": 0.503670200934233, "grad_norm": 0.33784759044647217, "learning_rate": 0.00013210302575643911, "loss": 0.4864, "step": 6793 }, { "epoch": 0.5037443464076519, "grad_norm": 0.37285155057907104, "learning_rate": 0.00013209302325581395, "loss": 0.525, "step": 6794 }, { "epoch": 0.5038184918810706, "grad_norm": 0.3615849018096924, "learning_rate": 0.00013208302075518882, "loss": 0.5223, "step": 6795 }, { "epoch": 0.5038926373544895, "grad_norm": 0.3582593500614166, "learning_rate": 0.00013207301825456365, "loss": 0.5101, "step": 6796 }, { "epoch": 0.5039667828279084, "grad_norm": 0.3586139678955078, "learning_rate": 0.0001320630157539385, "loss": 0.4992, "step": 6797 }, { "epoch": 0.5040409283013272, "grad_norm": 0.3525404930114746, "learning_rate": 0.00013205301325331333, "loss": 0.5125, "step": 6798 }, { "epoch": 0.5041150737747461, "grad_norm": 0.37337803840637207, "learning_rate": 0.00013204301075268816, "loss": 0.5472, "step": 6799 }, { "epoch": 0.504189219248165, "grad_norm": 0.3589365482330322, "learning_rate": 0.00013203300825206303, "loss": 0.5137, "step": 6800 }, { "epoch": 0.5042633647215837, "grad_norm": 0.37013036012649536, "learning_rate": 0.00013202300575143787, "loss": 0.4963, "step": 6801 }, { "epoch": 0.5043375101950026, "grad_norm": 0.3669063150882721, "learning_rate": 0.00013201300325081273, "loss": 0.4785, "step": 6802 }, { "epoch": 0.5044116556684214, "grad_norm": 0.3325061500072479, "learning_rate": 0.00013200300075018754, "loss": 0.4681, "step": 6803 }, { "epoch": 0.5044858011418403, "grad_norm": 0.36329764127731323, "learning_rate": 0.0001319929982495624, "loss": 0.5062, "step": 6804 }, { "epoch": 0.5045599466152592, "grad_norm": 0.3614291548728943, "learning_rate": 0.00013198299574893724, "loss": 0.5148, "step": 6805 }, { "epoch": 0.5046340920886779, "grad_norm": 0.3657486140727997, "learning_rate": 0.0001319729932483121, "loss": 0.5196, "step": 6806 }, { "epoch": 0.5047082375620968, "grad_norm": 0.3834474980831146, "learning_rate": 0.00013196299074768692, "loss": 0.5309, "step": 6807 }, { "epoch": 0.5047823830355157, "grad_norm": 0.35452210903167725, "learning_rate": 0.00013195298824706178, "loss": 0.5034, "step": 6808 }, { "epoch": 0.5048565285089345, "grad_norm": 0.3680874705314636, "learning_rate": 0.00013194298574643662, "loss": 0.5242, "step": 6809 }, { "epoch": 0.5049306739823534, "grad_norm": 0.34737589955329895, "learning_rate": 0.00013193298324581148, "loss": 0.497, "step": 6810 }, { "epoch": 0.5050048194557722, "grad_norm": 0.33966127038002014, "learning_rate": 0.0001319229807451863, "loss": 0.4842, "step": 6811 }, { "epoch": 0.505078964929191, "grad_norm": 0.3537489175796509, "learning_rate": 0.00013191297824456116, "loss": 0.487, "step": 6812 }, { "epoch": 0.5051531104026099, "grad_norm": 0.3942893147468567, "learning_rate": 0.000131902975743936, "loss": 0.5301, "step": 6813 }, { "epoch": 0.5052272558760288, "grad_norm": 0.37748461961746216, "learning_rate": 0.00013189297324331083, "loss": 0.5421, "step": 6814 }, { "epoch": 0.5053014013494476, "grad_norm": 0.3432323932647705, "learning_rate": 0.00013188297074268567, "loss": 0.455, "step": 6815 }, { "epoch": 0.5053755468228665, "grad_norm": 0.3874692916870117, "learning_rate": 0.00013187296824206053, "loss": 0.5424, "step": 6816 }, { "epoch": 0.5054496922962853, "grad_norm": 0.37855756282806396, "learning_rate": 0.00013186296574143537, "loss": 0.5478, "step": 6817 }, { "epoch": 0.5055238377697041, "grad_norm": 0.3588804006576538, "learning_rate": 0.0001318529632408102, "loss": 0.5112, "step": 6818 }, { "epoch": 0.505597983243123, "grad_norm": 0.36674764752388, "learning_rate": 0.00013184296074018504, "loss": 0.5197, "step": 6819 }, { "epoch": 0.5056721287165419, "grad_norm": 0.36238738894462585, "learning_rate": 0.0001318329582395599, "loss": 0.5059, "step": 6820 }, { "epoch": 0.5057462741899607, "grad_norm": 0.3557130694389343, "learning_rate": 0.00013182295573893474, "loss": 0.5047, "step": 6821 }, { "epoch": 0.5058204196633795, "grad_norm": 0.3622131943702698, "learning_rate": 0.00013181295323830958, "loss": 0.5343, "step": 6822 }, { "epoch": 0.5058945651367984, "grad_norm": 0.3489252030849457, "learning_rate": 0.00013180295073768442, "loss": 0.4751, "step": 6823 }, { "epoch": 0.5059687106102172, "grad_norm": 0.3508610427379608, "learning_rate": 0.00013179294823705928, "loss": 0.5134, "step": 6824 }, { "epoch": 0.5060428560836361, "grad_norm": 0.3710440397262573, "learning_rate": 0.0001317829457364341, "loss": 0.5146, "step": 6825 }, { "epoch": 0.506117001557055, "grad_norm": 0.3501710295677185, "learning_rate": 0.00013177294323580896, "loss": 0.5066, "step": 6826 }, { "epoch": 0.5061911470304737, "grad_norm": 0.3586060106754303, "learning_rate": 0.0001317629407351838, "loss": 0.4983, "step": 6827 }, { "epoch": 0.5062652925038926, "grad_norm": 0.3773355185985565, "learning_rate": 0.00013175293823455866, "loss": 0.5184, "step": 6828 }, { "epoch": 0.5063394379773115, "grad_norm": 0.3763066530227661, "learning_rate": 0.0001317429357339335, "loss": 0.5318, "step": 6829 }, { "epoch": 0.5064135834507303, "grad_norm": 0.3662862181663513, "learning_rate": 0.00013173293323330833, "loss": 0.5383, "step": 6830 }, { "epoch": 0.5064877289241492, "grad_norm": 0.3311742842197418, "learning_rate": 0.00013172293073268317, "loss": 0.4563, "step": 6831 }, { "epoch": 0.5065618743975681, "grad_norm": 0.3612975776195526, "learning_rate": 0.00013171292823205803, "loss": 0.5184, "step": 6832 }, { "epoch": 0.5066360198709868, "grad_norm": 0.37694600224494934, "learning_rate": 0.00013170292573143287, "loss": 0.5289, "step": 6833 }, { "epoch": 0.5067101653444057, "grad_norm": 0.34961140155792236, "learning_rate": 0.0001316929232308077, "loss": 0.5118, "step": 6834 }, { "epoch": 0.5067843108178246, "grad_norm": 0.41253548860549927, "learning_rate": 0.00013168292073018257, "loss": 0.4865, "step": 6835 }, { "epoch": 0.5068584562912434, "grad_norm": 0.36856070160865784, "learning_rate": 0.00013167291822955738, "loss": 0.509, "step": 6836 }, { "epoch": 0.5069326017646623, "grad_norm": 0.3686131238937378, "learning_rate": 0.00013166291572893225, "loss": 0.5287, "step": 6837 }, { "epoch": 0.5070067472380811, "grad_norm": 0.37398019433021545, "learning_rate": 0.00013165291322830708, "loss": 0.5521, "step": 6838 }, { "epoch": 0.5070808927114999, "grad_norm": 0.36879438161849976, "learning_rate": 0.00013164291072768195, "loss": 0.4966, "step": 6839 }, { "epoch": 0.5071550381849188, "grad_norm": 0.38311272859573364, "learning_rate": 0.00013163290822705676, "loss": 0.5294, "step": 6840 }, { "epoch": 0.5072291836583377, "grad_norm": 0.37118056416511536, "learning_rate": 0.00013162290572643162, "loss": 0.5295, "step": 6841 }, { "epoch": 0.5073033291317565, "grad_norm": 0.3945316672325134, "learning_rate": 0.00013161290322580646, "loss": 0.5406, "step": 6842 }, { "epoch": 0.5073774746051753, "grad_norm": 0.3807591497898102, "learning_rate": 0.00013160290072518132, "loss": 0.5376, "step": 6843 }, { "epoch": 0.5074516200785942, "grad_norm": 0.3610311448574066, "learning_rate": 0.00013159289822455613, "loss": 0.5303, "step": 6844 }, { "epoch": 0.507525765552013, "grad_norm": 0.36536216735839844, "learning_rate": 0.000131582895723931, "loss": 0.5116, "step": 6845 }, { "epoch": 0.5075999110254319, "grad_norm": 0.377631813287735, "learning_rate": 0.00013157289322330583, "loss": 0.5247, "step": 6846 }, { "epoch": 0.5076740564988508, "grad_norm": 0.3482397794723511, "learning_rate": 0.0001315628907226807, "loss": 0.4826, "step": 6847 }, { "epoch": 0.5077482019722696, "grad_norm": 0.37516769766807556, "learning_rate": 0.0001315528882220555, "loss": 0.5353, "step": 6848 }, { "epoch": 0.5078223474456884, "grad_norm": 0.3371986448764801, "learning_rate": 0.00013154288572143037, "loss": 0.4831, "step": 6849 }, { "epoch": 0.5078964929191073, "grad_norm": 0.3718486726284027, "learning_rate": 0.0001315328832208052, "loss": 0.5506, "step": 6850 }, { "epoch": 0.5079706383925261, "grad_norm": 0.3664332926273346, "learning_rate": 0.00013152288072018005, "loss": 0.5065, "step": 6851 }, { "epoch": 0.508044783865945, "grad_norm": 0.3604752719402313, "learning_rate": 0.00013151287821955488, "loss": 0.4777, "step": 6852 }, { "epoch": 0.5081189293393639, "grad_norm": 0.3601020276546478, "learning_rate": 0.00013150287571892975, "loss": 0.5328, "step": 6853 }, { "epoch": 0.5081930748127826, "grad_norm": 0.35130786895751953, "learning_rate": 0.00013149287321830458, "loss": 0.4964, "step": 6854 }, { "epoch": 0.5082672202862015, "grad_norm": 0.3460885286331177, "learning_rate": 0.00013148287071767942, "loss": 0.522, "step": 6855 }, { "epoch": 0.5083413657596204, "grad_norm": 0.35945579409599304, "learning_rate": 0.00013147286821705426, "loss": 0.5097, "step": 6856 }, { "epoch": 0.5084155112330392, "grad_norm": 0.34439703822135925, "learning_rate": 0.00013146286571642912, "loss": 0.521, "step": 6857 }, { "epoch": 0.5084896567064581, "grad_norm": 0.3813939690589905, "learning_rate": 0.00013145286321580396, "loss": 0.5412, "step": 6858 }, { "epoch": 0.508563802179877, "grad_norm": 0.32340437173843384, "learning_rate": 0.0001314428607151788, "loss": 0.4397, "step": 6859 }, { "epoch": 0.5086379476532957, "grad_norm": 0.35332807898521423, "learning_rate": 0.00013143285821455363, "loss": 0.4841, "step": 6860 }, { "epoch": 0.5087120931267146, "grad_norm": 0.3429652154445648, "learning_rate": 0.0001314228557139285, "loss": 0.4888, "step": 6861 }, { "epoch": 0.5087862386001335, "grad_norm": 0.3570738732814789, "learning_rate": 0.00013141285321330333, "loss": 0.4954, "step": 6862 }, { "epoch": 0.5088603840735523, "grad_norm": 0.3714521527290344, "learning_rate": 0.00013140285071267817, "loss": 0.5284, "step": 6863 }, { "epoch": 0.5089345295469712, "grad_norm": 0.3557024896144867, "learning_rate": 0.000131392848212053, "loss": 0.5316, "step": 6864 }, { "epoch": 0.50900867502039, "grad_norm": 0.3534093201160431, "learning_rate": 0.00013138284571142787, "loss": 0.4922, "step": 6865 }, { "epoch": 0.5090828204938088, "grad_norm": 0.35406702756881714, "learning_rate": 0.0001313728432108027, "loss": 0.5073, "step": 6866 }, { "epoch": 0.5091569659672277, "grad_norm": 0.36252158880233765, "learning_rate": 0.00013136284071017755, "loss": 0.5077, "step": 6867 }, { "epoch": 0.5092311114406466, "grad_norm": 0.36410799622535706, "learning_rate": 0.00013135283820955238, "loss": 0.517, "step": 6868 }, { "epoch": 0.5093052569140654, "grad_norm": 0.3580762445926666, "learning_rate": 0.00013134283570892725, "loss": 0.4745, "step": 6869 }, { "epoch": 0.5093794023874842, "grad_norm": 0.3523726463317871, "learning_rate": 0.00013133283320830209, "loss": 0.473, "step": 6870 }, { "epoch": 0.5094535478609031, "grad_norm": 0.35746240615844727, "learning_rate": 0.00013132283070767692, "loss": 0.4877, "step": 6871 }, { "epoch": 0.5095276933343219, "grad_norm": 0.35679662227630615, "learning_rate": 0.0001313128282070518, "loss": 0.4942, "step": 6872 }, { "epoch": 0.5096018388077408, "grad_norm": 0.36675578355789185, "learning_rate": 0.0001313028257064266, "loss": 0.5066, "step": 6873 }, { "epoch": 0.5096759842811597, "grad_norm": 0.34693393111228943, "learning_rate": 0.00013129282320580146, "loss": 0.4863, "step": 6874 }, { "epoch": 0.5097501297545785, "grad_norm": 0.3876148462295532, "learning_rate": 0.0001312828207051763, "loss": 0.5043, "step": 6875 }, { "epoch": 0.5098242752279973, "grad_norm": 0.3446981906890869, "learning_rate": 0.00013127281820455116, "loss": 0.4718, "step": 6876 }, { "epoch": 0.5098984207014162, "grad_norm": 0.3632439374923706, "learning_rate": 0.00013126281570392597, "loss": 0.5017, "step": 6877 }, { "epoch": 0.509972566174835, "grad_norm": 0.36907994747161865, "learning_rate": 0.00013125281320330084, "loss": 0.5273, "step": 6878 }, { "epoch": 0.5100467116482539, "grad_norm": 0.36254391074180603, "learning_rate": 0.00013124281070267567, "loss": 0.4958, "step": 6879 }, { "epoch": 0.5101208571216728, "grad_norm": 0.3590930700302124, "learning_rate": 0.00013123280820205054, "loss": 0.5142, "step": 6880 }, { "epoch": 0.5101950025950915, "grad_norm": 0.3484073281288147, "learning_rate": 0.00013122280570142535, "loss": 0.503, "step": 6881 }, { "epoch": 0.5102691480685104, "grad_norm": 0.39135581254959106, "learning_rate": 0.0001312128032008002, "loss": 0.5456, "step": 6882 }, { "epoch": 0.5103432935419293, "grad_norm": 0.36569762229919434, "learning_rate": 0.00013120280070017505, "loss": 0.4914, "step": 6883 }, { "epoch": 0.5104174390153481, "grad_norm": 0.3614839017391205, "learning_rate": 0.0001311927981995499, "loss": 0.504, "step": 6884 }, { "epoch": 0.510491584488767, "grad_norm": 0.3484618365764618, "learning_rate": 0.00013118279569892472, "loss": 0.4916, "step": 6885 }, { "epoch": 0.5105657299621859, "grad_norm": 0.3694186508655548, "learning_rate": 0.0001311727931982996, "loss": 0.5215, "step": 6886 }, { "epoch": 0.5106398754356046, "grad_norm": 0.3578733205795288, "learning_rate": 0.00013116279069767442, "loss": 0.4968, "step": 6887 }, { "epoch": 0.5107140209090235, "grad_norm": 0.3791671395301819, "learning_rate": 0.00013115278819704926, "loss": 0.5179, "step": 6888 }, { "epoch": 0.5107881663824424, "grad_norm": 0.36890465021133423, "learning_rate": 0.0001311427856964241, "loss": 0.5632, "step": 6889 }, { "epoch": 0.5108623118558612, "grad_norm": 0.367500901222229, "learning_rate": 0.00013113278319579896, "loss": 0.5494, "step": 6890 }, { "epoch": 0.5109364573292801, "grad_norm": 0.37081828713417053, "learning_rate": 0.0001311227806951738, "loss": 0.5355, "step": 6891 }, { "epoch": 0.5110106028026989, "grad_norm": 0.37378984689712524, "learning_rate": 0.00013111277819454864, "loss": 0.5176, "step": 6892 }, { "epoch": 0.5110847482761177, "grad_norm": 0.3460536599159241, "learning_rate": 0.00013110277569392347, "loss": 0.4644, "step": 6893 }, { "epoch": 0.5111588937495366, "grad_norm": 0.35411104559898376, "learning_rate": 0.00013109277319329834, "loss": 0.4917, "step": 6894 }, { "epoch": 0.5112330392229555, "grad_norm": 0.34239694476127625, "learning_rate": 0.00013108277069267318, "loss": 0.4763, "step": 6895 }, { "epoch": 0.5113071846963743, "grad_norm": 0.3627853989601135, "learning_rate": 0.000131072768192048, "loss": 0.5552, "step": 6896 }, { "epoch": 0.5113813301697931, "grad_norm": 0.33860135078430176, "learning_rate": 0.00013106276569142285, "loss": 0.4706, "step": 6897 }, { "epoch": 0.511455475643212, "grad_norm": 0.3598496615886688, "learning_rate": 0.00013105276319079771, "loss": 0.5237, "step": 6898 }, { "epoch": 0.5115296211166308, "grad_norm": 0.3589763045310974, "learning_rate": 0.00013104276069017255, "loss": 0.5483, "step": 6899 }, { "epoch": 0.5116037665900497, "grad_norm": 0.35084760189056396, "learning_rate": 0.0001310327581895474, "loss": 0.4982, "step": 6900 }, { "epoch": 0.5116779120634686, "grad_norm": 0.3481113910675049, "learning_rate": 0.00013102275568892223, "loss": 0.4968, "step": 6901 }, { "epoch": 0.5117520575368874, "grad_norm": 0.3491312563419342, "learning_rate": 0.0001310127531882971, "loss": 0.5219, "step": 6902 }, { "epoch": 0.5118262030103062, "grad_norm": 0.38050171732902527, "learning_rate": 0.00013100275068767193, "loss": 0.5773, "step": 6903 }, { "epoch": 0.5119003484837251, "grad_norm": 0.3463365435600281, "learning_rate": 0.00013099274818704676, "loss": 0.4932, "step": 6904 }, { "epoch": 0.5119744939571439, "grad_norm": 0.348880797624588, "learning_rate": 0.00013098274568642163, "loss": 0.5025, "step": 6905 }, { "epoch": 0.5120486394305628, "grad_norm": 0.35654476284980774, "learning_rate": 0.00013097274318579646, "loss": 0.5191, "step": 6906 }, { "epoch": 0.5121227849039817, "grad_norm": 0.365560382604599, "learning_rate": 0.0001309627406851713, "loss": 0.5314, "step": 6907 }, { "epoch": 0.5121969303774004, "grad_norm": 0.3551885783672333, "learning_rate": 0.00013095273818454614, "loss": 0.5289, "step": 6908 }, { "epoch": 0.5122710758508193, "grad_norm": 0.3569539785385132, "learning_rate": 0.000130942735683921, "loss": 0.4927, "step": 6909 }, { "epoch": 0.5123452213242382, "grad_norm": 0.3458092212677002, "learning_rate": 0.0001309327331832958, "loss": 0.4725, "step": 6910 }, { "epoch": 0.512419366797657, "grad_norm": 0.35840991139411926, "learning_rate": 0.00013092273068267068, "loss": 0.516, "step": 6911 }, { "epoch": 0.5124935122710759, "grad_norm": 0.36437633633613586, "learning_rate": 0.00013091272818204551, "loss": 0.4839, "step": 6912 }, { "epoch": 0.5125676577444948, "grad_norm": 0.37506920099258423, "learning_rate": 0.00013090272568142038, "loss": 0.5106, "step": 6913 }, { "epoch": 0.5126418032179135, "grad_norm": 0.3269767463207245, "learning_rate": 0.0001308927231807952, "loss": 0.4742, "step": 6914 }, { "epoch": 0.5127159486913324, "grad_norm": 0.36520442366600037, "learning_rate": 0.00013088272068017005, "loss": 0.5316, "step": 6915 }, { "epoch": 0.5127900941647512, "grad_norm": 0.3661205470561981, "learning_rate": 0.0001308727181795449, "loss": 0.5439, "step": 6916 }, { "epoch": 0.5128642396381701, "grad_norm": 0.3601547181606293, "learning_rate": 0.00013086271567891975, "loss": 0.477, "step": 6917 }, { "epoch": 0.512938385111589, "grad_norm": 0.3751097321510315, "learning_rate": 0.00013085271317829456, "loss": 0.4941, "step": 6918 }, { "epoch": 0.5130125305850077, "grad_norm": 0.3652706742286682, "learning_rate": 0.00013084271067766943, "loss": 0.5087, "step": 6919 }, { "epoch": 0.5130866760584266, "grad_norm": 0.3697284162044525, "learning_rate": 0.00013083270817704427, "loss": 0.5195, "step": 6920 }, { "epoch": 0.5131608215318455, "grad_norm": 0.37991204857826233, "learning_rate": 0.00013082270567641913, "loss": 0.5407, "step": 6921 }, { "epoch": 0.5132349670052643, "grad_norm": 0.364128977060318, "learning_rate": 0.00013081270317579394, "loss": 0.5271, "step": 6922 }, { "epoch": 0.5133091124786832, "grad_norm": 0.3608447313308716, "learning_rate": 0.0001308027006751688, "loss": 0.5176, "step": 6923 }, { "epoch": 0.513383257952102, "grad_norm": 0.3353155851364136, "learning_rate": 0.00013079269817454364, "loss": 0.4638, "step": 6924 }, { "epoch": 0.5134574034255208, "grad_norm": 0.3690376877784729, "learning_rate": 0.00013078269567391848, "loss": 0.5261, "step": 6925 }, { "epoch": 0.5135315488989397, "grad_norm": 0.38302507996559143, "learning_rate": 0.00013077269317329332, "loss": 0.5317, "step": 6926 }, { "epoch": 0.5136056943723586, "grad_norm": 0.3734620213508606, "learning_rate": 0.00013076269067266818, "loss": 0.5219, "step": 6927 }, { "epoch": 0.5136798398457774, "grad_norm": 0.3702090382575989, "learning_rate": 0.00013075268817204302, "loss": 0.477, "step": 6928 }, { "epoch": 0.5137539853191962, "grad_norm": 0.3757145404815674, "learning_rate": 0.00013074268567141785, "loss": 0.5481, "step": 6929 }, { "epoch": 0.5138281307926151, "grad_norm": 0.3345782458782196, "learning_rate": 0.0001307326831707927, "loss": 0.4806, "step": 6930 }, { "epoch": 0.5139022762660339, "grad_norm": 0.34092357754707336, "learning_rate": 0.00013072268067016755, "loss": 0.5176, "step": 6931 }, { "epoch": 0.5139764217394528, "grad_norm": 0.33551880717277527, "learning_rate": 0.0001307126781695424, "loss": 0.4829, "step": 6932 }, { "epoch": 0.5140505672128717, "grad_norm": 0.3519268035888672, "learning_rate": 0.00013070267566891723, "loss": 0.4932, "step": 6933 }, { "epoch": 0.5141247126862905, "grad_norm": 0.3432702422142029, "learning_rate": 0.00013069267316829207, "loss": 0.5053, "step": 6934 }, { "epoch": 0.5141988581597093, "grad_norm": 0.3358326256275177, "learning_rate": 0.00013068267066766693, "loss": 0.514, "step": 6935 }, { "epoch": 0.5142730036331282, "grad_norm": 0.37846481800079346, "learning_rate": 0.00013067266816704177, "loss": 0.5056, "step": 6936 }, { "epoch": 0.514347149106547, "grad_norm": 0.3731759190559387, "learning_rate": 0.0001306626656664166, "loss": 0.5442, "step": 6937 }, { "epoch": 0.5144212945799659, "grad_norm": 0.35809099674224854, "learning_rate": 0.00013065266316579147, "loss": 0.5137, "step": 6938 }, { "epoch": 0.5144954400533848, "grad_norm": 0.3561422526836395, "learning_rate": 0.0001306426606651663, "loss": 0.5016, "step": 6939 }, { "epoch": 0.5145695855268035, "grad_norm": 0.3611351549625397, "learning_rate": 0.00013063265816454114, "loss": 0.5337, "step": 6940 }, { "epoch": 0.5146437310002224, "grad_norm": 0.3386591970920563, "learning_rate": 0.00013062265566391598, "loss": 0.467, "step": 6941 }, { "epoch": 0.5147178764736413, "grad_norm": 0.34227123856544495, "learning_rate": 0.00013061265316329084, "loss": 0.4914, "step": 6942 }, { "epoch": 0.5147920219470601, "grad_norm": 0.37116315960884094, "learning_rate": 0.00013060265066266568, "loss": 0.4995, "step": 6943 }, { "epoch": 0.514866167420479, "grad_norm": 0.35347476601600647, "learning_rate": 0.00013059264816204052, "loss": 0.5079, "step": 6944 }, { "epoch": 0.5149403128938979, "grad_norm": 0.3581200838088989, "learning_rate": 0.00013058264566141536, "loss": 0.5405, "step": 6945 }, { "epoch": 0.5150144583673166, "grad_norm": 0.34386348724365234, "learning_rate": 0.00013057264316079022, "loss": 0.4706, "step": 6946 }, { "epoch": 0.5150886038407355, "grad_norm": 0.3520018458366394, "learning_rate": 0.00013056264066016503, "loss": 0.5268, "step": 6947 }, { "epoch": 0.5151627493141544, "grad_norm": 0.3825925588607788, "learning_rate": 0.0001305526381595399, "loss": 0.5741, "step": 6948 }, { "epoch": 0.5152368947875732, "grad_norm": 0.3452507257461548, "learning_rate": 0.00013054263565891473, "loss": 0.4635, "step": 6949 }, { "epoch": 0.5153110402609921, "grad_norm": 0.35415390133857727, "learning_rate": 0.0001305326331582896, "loss": 0.5516, "step": 6950 }, { "epoch": 0.5153851857344109, "grad_norm": 0.38453012704849243, "learning_rate": 0.0001305226306576644, "loss": 0.4625, "step": 6951 }, { "epoch": 0.5154593312078297, "grad_norm": 0.3626198172569275, "learning_rate": 0.00013051262815703927, "loss": 0.5101, "step": 6952 }, { "epoch": 0.5155334766812486, "grad_norm": 0.3575897514820099, "learning_rate": 0.0001305026256564141, "loss": 0.5117, "step": 6953 }, { "epoch": 0.5156076221546675, "grad_norm": 0.3760622441768646, "learning_rate": 0.00013049262315578897, "loss": 0.5352, "step": 6954 }, { "epoch": 0.5156817676280863, "grad_norm": 0.401737779378891, "learning_rate": 0.00013048262065516378, "loss": 0.5232, "step": 6955 }, { "epoch": 0.5157559131015051, "grad_norm": 0.37866973876953125, "learning_rate": 0.00013047261815453864, "loss": 0.5541, "step": 6956 }, { "epoch": 0.515830058574924, "grad_norm": 0.3725205957889557, "learning_rate": 0.00013046261565391348, "loss": 0.5239, "step": 6957 }, { "epoch": 0.5159042040483428, "grad_norm": 0.35674434900283813, "learning_rate": 0.00013045261315328835, "loss": 0.4712, "step": 6958 }, { "epoch": 0.5159783495217617, "grad_norm": 0.386917382478714, "learning_rate": 0.00013044261065266316, "loss": 0.5051, "step": 6959 }, { "epoch": 0.5160524949951806, "grad_norm": 0.3600074052810669, "learning_rate": 0.00013043260815203802, "loss": 0.4678, "step": 6960 }, { "epoch": 0.5161266404685994, "grad_norm": 0.3777140974998474, "learning_rate": 0.00013042260565141286, "loss": 0.5072, "step": 6961 }, { "epoch": 0.5162007859420182, "grad_norm": 0.3841266930103302, "learning_rate": 0.0001304126031507877, "loss": 0.5538, "step": 6962 }, { "epoch": 0.5162749314154371, "grad_norm": 0.3625231087207794, "learning_rate": 0.00013040260065016253, "loss": 0.5371, "step": 6963 }, { "epoch": 0.5163490768888559, "grad_norm": 0.3639528453350067, "learning_rate": 0.0001303925981495374, "loss": 0.5497, "step": 6964 }, { "epoch": 0.5164232223622748, "grad_norm": 0.35617923736572266, "learning_rate": 0.00013038259564891223, "loss": 0.4812, "step": 6965 }, { "epoch": 0.5164973678356937, "grad_norm": 0.3635520040988922, "learning_rate": 0.00013037259314828707, "loss": 0.5192, "step": 6966 }, { "epoch": 0.5165715133091124, "grad_norm": 0.35610610246658325, "learning_rate": 0.0001303625906476619, "loss": 0.4913, "step": 6967 }, { "epoch": 0.5166456587825313, "grad_norm": 0.37239938974380493, "learning_rate": 0.00013035258814703677, "loss": 0.541, "step": 6968 }, { "epoch": 0.5167198042559502, "grad_norm": 0.3605855107307434, "learning_rate": 0.0001303425856464116, "loss": 0.5248, "step": 6969 }, { "epoch": 0.516793949729369, "grad_norm": 0.33904600143432617, "learning_rate": 0.00013033258314578645, "loss": 0.4934, "step": 6970 }, { "epoch": 0.5168680952027879, "grad_norm": 0.36012232303619385, "learning_rate": 0.0001303225806451613, "loss": 0.5184, "step": 6971 }, { "epoch": 0.5169422406762068, "grad_norm": 0.35657238960266113, "learning_rate": 0.00013031257814453615, "loss": 0.494, "step": 6972 }, { "epoch": 0.5170163861496255, "grad_norm": 0.35617518424987793, "learning_rate": 0.00013030257564391098, "loss": 0.511, "step": 6973 }, { "epoch": 0.5170905316230444, "grad_norm": 0.3563475012779236, "learning_rate": 0.00013029257314328582, "loss": 0.5367, "step": 6974 }, { "epoch": 0.5171646770964633, "grad_norm": 0.35169780254364014, "learning_rate": 0.00013028257064266068, "loss": 0.5287, "step": 6975 }, { "epoch": 0.5172388225698821, "grad_norm": 0.3491409718990326, "learning_rate": 0.00013027256814203552, "loss": 0.5175, "step": 6976 }, { "epoch": 0.517312968043301, "grad_norm": 0.3757411539554596, "learning_rate": 0.00013026256564141036, "loss": 0.5608, "step": 6977 }, { "epoch": 0.5173871135167198, "grad_norm": 0.34871670603752136, "learning_rate": 0.0001302525631407852, "loss": 0.4936, "step": 6978 }, { "epoch": 0.5174612589901386, "grad_norm": 0.34538713097572327, "learning_rate": 0.00013024256064016006, "loss": 0.4732, "step": 6979 }, { "epoch": 0.5175354044635575, "grad_norm": 0.3917732238769531, "learning_rate": 0.0001302325581395349, "loss": 0.5281, "step": 6980 }, { "epoch": 0.5176095499369764, "grad_norm": 0.3493468165397644, "learning_rate": 0.00013022255563890973, "loss": 0.4813, "step": 6981 }, { "epoch": 0.5176836954103952, "grad_norm": 0.35632339119911194, "learning_rate": 0.00013021255313828457, "loss": 0.5118, "step": 6982 }, { "epoch": 0.517757840883814, "grad_norm": 0.36001330614089966, "learning_rate": 0.00013020255063765944, "loss": 0.4945, "step": 6983 }, { "epoch": 0.5178319863572329, "grad_norm": 0.3585435152053833, "learning_rate": 0.00013019254813703427, "loss": 0.5065, "step": 6984 }, { "epoch": 0.5179061318306517, "grad_norm": 0.3702959418296814, "learning_rate": 0.0001301825456364091, "loss": 0.4983, "step": 6985 }, { "epoch": 0.5179802773040706, "grad_norm": 0.3880032002925873, "learning_rate": 0.00013017254313578395, "loss": 0.5383, "step": 6986 }, { "epoch": 0.5180544227774895, "grad_norm": 0.33895474672317505, "learning_rate": 0.0001301625406351588, "loss": 0.5064, "step": 6987 }, { "epoch": 0.5181285682509083, "grad_norm": 0.3654939830303192, "learning_rate": 0.00013015253813453362, "loss": 0.4943, "step": 6988 }, { "epoch": 0.5182027137243271, "grad_norm": 0.3610716462135315, "learning_rate": 0.00013014253563390849, "loss": 0.507, "step": 6989 }, { "epoch": 0.518276859197746, "grad_norm": 0.3650744557380676, "learning_rate": 0.00013013253313328332, "loss": 0.5152, "step": 6990 }, { "epoch": 0.5183510046711648, "grad_norm": 0.33765435218811035, "learning_rate": 0.0001301225306326582, "loss": 0.4737, "step": 6991 }, { "epoch": 0.5184251501445837, "grad_norm": 0.3741423189640045, "learning_rate": 0.000130112528132033, "loss": 0.5529, "step": 6992 }, { "epoch": 0.5184992956180026, "grad_norm": 0.373630553483963, "learning_rate": 0.00013010252563140786, "loss": 0.5069, "step": 6993 }, { "epoch": 0.5185734410914213, "grad_norm": 0.3467598557472229, "learning_rate": 0.0001300925231307827, "loss": 0.4676, "step": 6994 }, { "epoch": 0.5186475865648402, "grad_norm": 0.3563489317893982, "learning_rate": 0.00013008252063015756, "loss": 0.4793, "step": 6995 }, { "epoch": 0.5187217320382591, "grad_norm": 0.3504464328289032, "learning_rate": 0.00013007251812953237, "loss": 0.4989, "step": 6996 }, { "epoch": 0.5187958775116779, "grad_norm": 0.39537638425827026, "learning_rate": 0.00013006251562890724, "loss": 0.5324, "step": 6997 }, { "epoch": 0.5188700229850968, "grad_norm": 0.3733987510204315, "learning_rate": 0.00013005251312828207, "loss": 0.5241, "step": 6998 }, { "epoch": 0.5189441684585157, "grad_norm": 0.37126287817955017, "learning_rate": 0.0001300425106276569, "loss": 0.5095, "step": 6999 }, { "epoch": 0.5190183139319344, "grad_norm": 0.3762523829936981, "learning_rate": 0.00013003250812703175, "loss": 0.5217, "step": 7000 }, { "epoch": 0.5190924594053533, "grad_norm": 0.3506222665309906, "learning_rate": 0.0001300225056264066, "loss": 0.5021, "step": 7001 }, { "epoch": 0.5191666048787722, "grad_norm": 0.3973727524280548, "learning_rate": 0.00013001250312578145, "loss": 0.542, "step": 7002 }, { "epoch": 0.519240750352191, "grad_norm": 0.3813343644142151, "learning_rate": 0.00013000250062515629, "loss": 0.548, "step": 7003 }, { "epoch": 0.5193148958256099, "grad_norm": 0.3537099361419678, "learning_rate": 0.00012999249812453115, "loss": 0.5185, "step": 7004 }, { "epoch": 0.5193890412990287, "grad_norm": 0.3481787443161011, "learning_rate": 0.000129982495623906, "loss": 0.4918, "step": 7005 }, { "epoch": 0.5194631867724475, "grad_norm": 0.36574873328208923, "learning_rate": 0.00012997249312328082, "loss": 0.5217, "step": 7006 }, { "epoch": 0.5195373322458664, "grad_norm": 0.3371863067150116, "learning_rate": 0.00012996249062265566, "loss": 0.4839, "step": 7007 }, { "epoch": 0.5196114777192853, "grad_norm": 0.38124772906303406, "learning_rate": 0.00012995248812203053, "loss": 0.5127, "step": 7008 }, { "epoch": 0.5196856231927041, "grad_norm": 0.3632916808128357, "learning_rate": 0.00012994248562140536, "loss": 0.5278, "step": 7009 }, { "epoch": 0.519759768666123, "grad_norm": 0.3387964963912964, "learning_rate": 0.0001299324831207802, "loss": 0.4711, "step": 7010 }, { "epoch": 0.5198339141395418, "grad_norm": 0.4307781457901001, "learning_rate": 0.00012992248062015504, "loss": 0.5562, "step": 7011 }, { "epoch": 0.5199080596129606, "grad_norm": 0.3899767994880676, "learning_rate": 0.0001299124781195299, "loss": 0.5242, "step": 7012 }, { "epoch": 0.5199822050863795, "grad_norm": 0.33968886733055115, "learning_rate": 0.00012990247561890474, "loss": 0.477, "step": 7013 }, { "epoch": 0.5200563505597984, "grad_norm": 0.377901554107666, "learning_rate": 0.00012989247311827958, "loss": 0.5349, "step": 7014 }, { "epoch": 0.5201304960332171, "grad_norm": 0.3631397783756256, "learning_rate": 0.0001298824706176544, "loss": 0.5263, "step": 7015 }, { "epoch": 0.520204641506636, "grad_norm": 0.3659835457801819, "learning_rate": 0.00012987246811702928, "loss": 0.5248, "step": 7016 }, { "epoch": 0.5202787869800549, "grad_norm": 0.37084853649139404, "learning_rate": 0.00012986246561640411, "loss": 0.5129, "step": 7017 }, { "epoch": 0.5203529324534737, "grad_norm": 0.39943578839302063, "learning_rate": 0.00012985246311577895, "loss": 0.5381, "step": 7018 }, { "epoch": 0.5204270779268926, "grad_norm": 0.34284690022468567, "learning_rate": 0.0001298424606151538, "loss": 0.5062, "step": 7019 }, { "epoch": 0.5205012234003115, "grad_norm": 0.36056748032569885, "learning_rate": 0.00012983245811452865, "loss": 0.524, "step": 7020 }, { "epoch": 0.5205753688737302, "grad_norm": 0.3598483204841614, "learning_rate": 0.0001298224556139035, "loss": 0.4997, "step": 7021 }, { "epoch": 0.5206495143471491, "grad_norm": 0.36309072375297546, "learning_rate": 0.00012981245311327833, "loss": 0.5198, "step": 7022 }, { "epoch": 0.520723659820568, "grad_norm": 0.37714287638664246, "learning_rate": 0.00012980245061265316, "loss": 0.5588, "step": 7023 }, { "epoch": 0.5207978052939868, "grad_norm": 0.3649931252002716, "learning_rate": 0.00012979244811202803, "loss": 0.5386, "step": 7024 }, { "epoch": 0.5208719507674057, "grad_norm": 0.3649112284183502, "learning_rate": 0.00012978244561140284, "loss": 0.4993, "step": 7025 }, { "epoch": 0.5209460962408246, "grad_norm": 0.36072662472724915, "learning_rate": 0.0001297724431107777, "loss": 0.5333, "step": 7026 }, { "epoch": 0.5210202417142433, "grad_norm": 0.351866751909256, "learning_rate": 0.00012976244061015254, "loss": 0.5218, "step": 7027 }, { "epoch": 0.5210943871876622, "grad_norm": 0.32096797227859497, "learning_rate": 0.0001297524381095274, "loss": 0.4503, "step": 7028 }, { "epoch": 0.5211685326610811, "grad_norm": 0.3537501394748688, "learning_rate": 0.0001297424356089022, "loss": 0.4979, "step": 7029 }, { "epoch": 0.5212426781344999, "grad_norm": 0.35253793001174927, "learning_rate": 0.00012973243310827708, "loss": 0.5182, "step": 7030 }, { "epoch": 0.5213168236079188, "grad_norm": 0.35651645064353943, "learning_rate": 0.00012972243060765191, "loss": 0.5111, "step": 7031 }, { "epoch": 0.5213909690813375, "grad_norm": 0.3716343641281128, "learning_rate": 0.00012971242810702678, "loss": 0.5328, "step": 7032 }, { "epoch": 0.5214651145547564, "grad_norm": 0.3656297028064728, "learning_rate": 0.0001297024256064016, "loss": 0.5161, "step": 7033 }, { "epoch": 0.5215392600281753, "grad_norm": 0.35865217447280884, "learning_rate": 0.00012969242310577645, "loss": 0.5206, "step": 7034 }, { "epoch": 0.5216134055015941, "grad_norm": 0.353450745344162, "learning_rate": 0.0001296824206051513, "loss": 0.5142, "step": 7035 }, { "epoch": 0.521687550975013, "grad_norm": 0.361284464597702, "learning_rate": 0.00012967241810452613, "loss": 0.5165, "step": 7036 }, { "epoch": 0.5217616964484318, "grad_norm": 0.3604165315628052, "learning_rate": 0.00012966241560390096, "loss": 0.5346, "step": 7037 }, { "epoch": 0.5218358419218506, "grad_norm": 0.35702601075172424, "learning_rate": 0.00012965241310327583, "loss": 0.5249, "step": 7038 }, { "epoch": 0.5219099873952695, "grad_norm": 0.35868194699287415, "learning_rate": 0.00012964241060265067, "loss": 0.5165, "step": 7039 }, { "epoch": 0.5219841328686884, "grad_norm": 0.34080779552459717, "learning_rate": 0.0001296324081020255, "loss": 0.4792, "step": 7040 }, { "epoch": 0.5220582783421072, "grad_norm": 0.35477399826049805, "learning_rate": 0.00012962240560140037, "loss": 0.5029, "step": 7041 }, { "epoch": 0.522132423815526, "grad_norm": 0.3318859040737152, "learning_rate": 0.0001296124031007752, "loss": 0.4872, "step": 7042 }, { "epoch": 0.5222065692889449, "grad_norm": 0.349115788936615, "learning_rate": 0.00012960240060015004, "loss": 0.4813, "step": 7043 }, { "epoch": 0.5222807147623637, "grad_norm": 0.3541381061077118, "learning_rate": 0.00012959239809952488, "loss": 0.5354, "step": 7044 }, { "epoch": 0.5223548602357826, "grad_norm": 0.34181636571884155, "learning_rate": 0.00012958239559889974, "loss": 0.4549, "step": 7045 }, { "epoch": 0.5224290057092015, "grad_norm": 0.3466079831123352, "learning_rate": 0.00012957239309827458, "loss": 0.4934, "step": 7046 }, { "epoch": 0.5225031511826203, "grad_norm": 0.3965844511985779, "learning_rate": 0.00012956239059764942, "loss": 0.5496, "step": 7047 }, { "epoch": 0.5225772966560391, "grad_norm": 0.34920766949653625, "learning_rate": 0.00012955238809702425, "loss": 0.5345, "step": 7048 }, { "epoch": 0.522651442129458, "grad_norm": 0.34289246797561646, "learning_rate": 0.00012954238559639912, "loss": 0.4956, "step": 7049 }, { "epoch": 0.5227255876028768, "grad_norm": 0.3503973186016083, "learning_rate": 0.00012953238309577395, "loss": 0.5283, "step": 7050 }, { "epoch": 0.5227997330762957, "grad_norm": 0.3833175301551819, "learning_rate": 0.0001295223805951488, "loss": 0.5274, "step": 7051 }, { "epoch": 0.5228738785497146, "grad_norm": 0.35615038871765137, "learning_rate": 0.00012951237809452363, "loss": 0.4846, "step": 7052 }, { "epoch": 0.5229480240231333, "grad_norm": 0.3183786869049072, "learning_rate": 0.0001295023755938985, "loss": 0.4749, "step": 7053 }, { "epoch": 0.5230221694965522, "grad_norm": 0.3326863944530487, "learning_rate": 0.00012949237309327333, "loss": 0.4982, "step": 7054 }, { "epoch": 0.5230963149699711, "grad_norm": 0.37955886125564575, "learning_rate": 0.00012948237059264817, "loss": 0.5194, "step": 7055 }, { "epoch": 0.5231704604433899, "grad_norm": 0.3425392210483551, "learning_rate": 0.000129472368092023, "loss": 0.4784, "step": 7056 }, { "epoch": 0.5232446059168088, "grad_norm": 0.3746482729911804, "learning_rate": 0.00012946236559139787, "loss": 0.6083, "step": 7057 }, { "epoch": 0.5233187513902277, "grad_norm": 0.3540767729282379, "learning_rate": 0.0001294523630907727, "loss": 0.4887, "step": 7058 }, { "epoch": 0.5233928968636464, "grad_norm": 0.3484721779823303, "learning_rate": 0.00012944236059014754, "loss": 0.4917, "step": 7059 }, { "epoch": 0.5234670423370653, "grad_norm": 0.35219472646713257, "learning_rate": 0.00012943235808952238, "loss": 0.4971, "step": 7060 }, { "epoch": 0.5235411878104842, "grad_norm": 0.35378319025039673, "learning_rate": 0.00012942235558889724, "loss": 0.5119, "step": 7061 }, { "epoch": 0.523615333283903, "grad_norm": 0.35093262791633606, "learning_rate": 0.00012941235308827205, "loss": 0.4814, "step": 7062 }, { "epoch": 0.5236894787573219, "grad_norm": 0.33696284890174866, "learning_rate": 0.00012940235058764692, "loss": 0.4823, "step": 7063 }, { "epoch": 0.5237636242307407, "grad_norm": 0.33978796005249023, "learning_rate": 0.00012939234808702176, "loss": 0.497, "step": 7064 }, { "epoch": 0.5238377697041595, "grad_norm": 0.3590414226055145, "learning_rate": 0.00012938234558639662, "loss": 0.534, "step": 7065 }, { "epoch": 0.5239119151775784, "grad_norm": 0.33754995465278625, "learning_rate": 0.00012937234308577143, "loss": 0.4913, "step": 7066 }, { "epoch": 0.5239860606509973, "grad_norm": 0.3557897210121155, "learning_rate": 0.0001293623405851463, "loss": 0.5021, "step": 7067 }, { "epoch": 0.5240602061244161, "grad_norm": 0.37489622831344604, "learning_rate": 0.00012935233808452113, "loss": 0.5254, "step": 7068 }, { "epoch": 0.524134351597835, "grad_norm": 0.34130731225013733, "learning_rate": 0.000129342335583896, "loss": 0.5118, "step": 7069 }, { "epoch": 0.5242084970712538, "grad_norm": 0.34324920177459717, "learning_rate": 0.0001293323330832708, "loss": 0.4871, "step": 7070 }, { "epoch": 0.5242826425446726, "grad_norm": 0.3287922143936157, "learning_rate": 0.00012932233058264567, "loss": 0.4615, "step": 7071 }, { "epoch": 0.5243567880180915, "grad_norm": 0.3451409935951233, "learning_rate": 0.0001293123280820205, "loss": 0.4936, "step": 7072 }, { "epoch": 0.5244309334915104, "grad_norm": 0.33988428115844727, "learning_rate": 0.00012930232558139534, "loss": 0.4948, "step": 7073 }, { "epoch": 0.5245050789649292, "grad_norm": 0.35085806250572205, "learning_rate": 0.0001292923230807702, "loss": 0.4848, "step": 7074 }, { "epoch": 0.524579224438348, "grad_norm": 0.3545953333377838, "learning_rate": 0.00012928232058014504, "loss": 0.5301, "step": 7075 }, { "epoch": 0.5246533699117669, "grad_norm": 0.33975285291671753, "learning_rate": 0.00012927231807951988, "loss": 0.4613, "step": 7076 }, { "epoch": 0.5247275153851857, "grad_norm": 0.3691222667694092, "learning_rate": 0.00012926231557889472, "loss": 0.5328, "step": 7077 }, { "epoch": 0.5248016608586046, "grad_norm": 0.3729194104671478, "learning_rate": 0.00012925231307826958, "loss": 0.5413, "step": 7078 }, { "epoch": 0.5248758063320235, "grad_norm": 0.3336848020553589, "learning_rate": 0.00012924231057764442, "loss": 0.4653, "step": 7079 }, { "epoch": 0.5249499518054422, "grad_norm": 0.37313684821128845, "learning_rate": 0.00012923230807701928, "loss": 0.5186, "step": 7080 }, { "epoch": 0.5250240972788611, "grad_norm": 0.38438403606414795, "learning_rate": 0.0001292223055763941, "loss": 0.5241, "step": 7081 }, { "epoch": 0.52509824275228, "grad_norm": 0.3809378147125244, "learning_rate": 0.00012921230307576896, "loss": 0.5205, "step": 7082 }, { "epoch": 0.5251723882256988, "grad_norm": 0.3573048710823059, "learning_rate": 0.0001292023005751438, "loss": 0.5211, "step": 7083 }, { "epoch": 0.5252465336991177, "grad_norm": 0.36776772141456604, "learning_rate": 0.00012919229807451863, "loss": 0.5522, "step": 7084 }, { "epoch": 0.5253206791725366, "grad_norm": 0.34150609374046326, "learning_rate": 0.00012918229557389347, "loss": 0.4995, "step": 7085 }, { "epoch": 0.5253948246459553, "grad_norm": 0.3675459921360016, "learning_rate": 0.00012917229307326833, "loss": 0.5266, "step": 7086 }, { "epoch": 0.5254689701193742, "grad_norm": 0.37373530864715576, "learning_rate": 0.00012916229057264317, "loss": 0.5282, "step": 7087 }, { "epoch": 0.5255431155927931, "grad_norm": 0.38684016466140747, "learning_rate": 0.000129152288072018, "loss": 0.5106, "step": 7088 }, { "epoch": 0.5256172610662119, "grad_norm": 0.3797837793827057, "learning_rate": 0.00012914228557139285, "loss": 0.5255, "step": 7089 }, { "epoch": 0.5256914065396308, "grad_norm": 0.3691765069961548, "learning_rate": 0.0001291322830707677, "loss": 0.5079, "step": 7090 }, { "epoch": 0.5257655520130496, "grad_norm": 0.3524433672428131, "learning_rate": 0.00012912228057014255, "loss": 0.541, "step": 7091 }, { "epoch": 0.5258396974864684, "grad_norm": 0.3703574538230896, "learning_rate": 0.00012911227806951738, "loss": 0.5115, "step": 7092 }, { "epoch": 0.5259138429598873, "grad_norm": 0.34604009985923767, "learning_rate": 0.00012910227556889222, "loss": 0.4856, "step": 7093 }, { "epoch": 0.5259879884333062, "grad_norm": 0.37421518564224243, "learning_rate": 0.00012909227306826708, "loss": 0.5318, "step": 7094 }, { "epoch": 0.526062133906725, "grad_norm": 0.3558404743671417, "learning_rate": 0.00012908227056764192, "loss": 0.4983, "step": 7095 }, { "epoch": 0.5261362793801438, "grad_norm": 0.3894381523132324, "learning_rate": 0.00012907226806701676, "loss": 0.4891, "step": 7096 }, { "epoch": 0.5262104248535627, "grad_norm": 0.33489325642585754, "learning_rate": 0.0001290622655663916, "loss": 0.4971, "step": 7097 }, { "epoch": 0.5262845703269815, "grad_norm": 0.32531821727752686, "learning_rate": 0.00012905226306576646, "loss": 0.4619, "step": 7098 }, { "epoch": 0.5263587158004004, "grad_norm": 0.3558308780193329, "learning_rate": 0.00012904226056514127, "loss": 0.5261, "step": 7099 }, { "epoch": 0.5264328612738193, "grad_norm": 0.38013097643852234, "learning_rate": 0.00012903225806451613, "loss": 0.5075, "step": 7100 }, { "epoch": 0.526507006747238, "grad_norm": 0.39235931634902954, "learning_rate": 0.00012902225556389097, "loss": 0.5527, "step": 7101 }, { "epoch": 0.5265811522206569, "grad_norm": 0.33644533157348633, "learning_rate": 0.00012901225306326584, "loss": 0.4884, "step": 7102 }, { "epoch": 0.5266552976940758, "grad_norm": 0.3619502782821655, "learning_rate": 0.00012900225056264065, "loss": 0.5576, "step": 7103 }, { "epoch": 0.5267294431674946, "grad_norm": 0.35482892394065857, "learning_rate": 0.0001289922480620155, "loss": 0.509, "step": 7104 }, { "epoch": 0.5268035886409135, "grad_norm": 0.34937784075737, "learning_rate": 0.00012898224556139035, "loss": 0.4817, "step": 7105 }, { "epoch": 0.5268777341143324, "grad_norm": 0.3611081540584564, "learning_rate": 0.0001289722430607652, "loss": 0.4851, "step": 7106 }, { "epoch": 0.5269518795877511, "grad_norm": 0.38258281350135803, "learning_rate": 0.00012896224056014005, "loss": 0.5204, "step": 7107 }, { "epoch": 0.52702602506117, "grad_norm": 0.38714540004730225, "learning_rate": 0.00012895223805951489, "loss": 0.5093, "step": 7108 }, { "epoch": 0.5271001705345889, "grad_norm": 0.36326709389686584, "learning_rate": 0.00012894223555888972, "loss": 0.5196, "step": 7109 }, { "epoch": 0.5271743160080077, "grad_norm": 0.37972292304039, "learning_rate": 0.00012893223305826456, "loss": 0.4906, "step": 7110 }, { "epoch": 0.5272484614814266, "grad_norm": 0.36628660559654236, "learning_rate": 0.00012892223055763942, "loss": 0.5197, "step": 7111 }, { "epoch": 0.5273226069548455, "grad_norm": 0.3713582456111908, "learning_rate": 0.00012891222805701426, "loss": 0.5045, "step": 7112 }, { "epoch": 0.5273967524282642, "grad_norm": 0.36937734484672546, "learning_rate": 0.0001289022255563891, "loss": 0.5218, "step": 7113 }, { "epoch": 0.5274708979016831, "grad_norm": 0.3423128128051758, "learning_rate": 0.00012889222305576393, "loss": 0.5045, "step": 7114 }, { "epoch": 0.527545043375102, "grad_norm": 0.34700319170951843, "learning_rate": 0.0001288822205551388, "loss": 0.499, "step": 7115 }, { "epoch": 0.5276191888485208, "grad_norm": 0.3645997643470764, "learning_rate": 0.00012887221805451364, "loss": 0.4868, "step": 7116 }, { "epoch": 0.5276933343219397, "grad_norm": 0.3671487867832184, "learning_rate": 0.0001288622155538885, "loss": 0.4963, "step": 7117 }, { "epoch": 0.5277674797953585, "grad_norm": 0.3503818213939667, "learning_rate": 0.0001288522130532633, "loss": 0.4892, "step": 7118 }, { "epoch": 0.5278416252687773, "grad_norm": 0.37045595049858093, "learning_rate": 0.00012884221055263817, "loss": 0.5363, "step": 7119 }, { "epoch": 0.5279157707421962, "grad_norm": 0.3756081163883209, "learning_rate": 0.000128832208052013, "loss": 0.4897, "step": 7120 }, { "epoch": 0.5279899162156151, "grad_norm": 0.369304895401001, "learning_rate": 0.00012882220555138785, "loss": 0.5689, "step": 7121 }, { "epoch": 0.5280640616890339, "grad_norm": 0.3775099515914917, "learning_rate": 0.00012881220305076269, "loss": 0.5233, "step": 7122 }, { "epoch": 0.5281382071624527, "grad_norm": 0.3871869444847107, "learning_rate": 0.00012880220055013755, "loss": 0.5914, "step": 7123 }, { "epoch": 0.5282123526358716, "grad_norm": 0.37433910369873047, "learning_rate": 0.0001287921980495124, "loss": 0.5253, "step": 7124 }, { "epoch": 0.5282864981092904, "grad_norm": 0.3333829641342163, "learning_rate": 0.00012878219554888722, "loss": 0.4676, "step": 7125 }, { "epoch": 0.5283606435827093, "grad_norm": 0.3528692424297333, "learning_rate": 0.00012877219304826206, "loss": 0.504, "step": 7126 }, { "epoch": 0.5284347890561282, "grad_norm": 0.35802632570266724, "learning_rate": 0.00012876219054763693, "loss": 0.5031, "step": 7127 }, { "epoch": 0.528508934529547, "grad_norm": 0.37207162380218506, "learning_rate": 0.00012875218804701176, "loss": 0.5172, "step": 7128 }, { "epoch": 0.5285830800029658, "grad_norm": 0.3435436189174652, "learning_rate": 0.0001287421855463866, "loss": 0.487, "step": 7129 }, { "epoch": 0.5286572254763847, "grad_norm": 0.3653901517391205, "learning_rate": 0.00012873218304576144, "loss": 0.51, "step": 7130 }, { "epoch": 0.5287313709498035, "grad_norm": 0.35012000799179077, "learning_rate": 0.0001287221805451363, "loss": 0.4816, "step": 7131 }, { "epoch": 0.5288055164232224, "grad_norm": 0.3739607334136963, "learning_rate": 0.00012871217804451114, "loss": 0.5048, "step": 7132 }, { "epoch": 0.5288796618966413, "grad_norm": 0.36863651871681213, "learning_rate": 0.00012870217554388598, "loss": 0.5424, "step": 7133 }, { "epoch": 0.52895380737006, "grad_norm": 0.3701850473880768, "learning_rate": 0.0001286921730432608, "loss": 0.5126, "step": 7134 }, { "epoch": 0.5290279528434789, "grad_norm": 0.3411649465560913, "learning_rate": 0.00012868217054263568, "loss": 0.5135, "step": 7135 }, { "epoch": 0.5291020983168978, "grad_norm": 0.36885935068130493, "learning_rate": 0.00012867216804201049, "loss": 0.5516, "step": 7136 }, { "epoch": 0.5291762437903166, "grad_norm": 0.35773059725761414, "learning_rate": 0.00012866216554138535, "loss": 0.5183, "step": 7137 }, { "epoch": 0.5292503892637355, "grad_norm": 0.348699688911438, "learning_rate": 0.0001286521630407602, "loss": 0.4893, "step": 7138 }, { "epoch": 0.5293245347371544, "grad_norm": 0.3574714958667755, "learning_rate": 0.00012864216054013505, "loss": 0.5122, "step": 7139 }, { "epoch": 0.5293986802105731, "grad_norm": 0.3738923668861389, "learning_rate": 0.0001286321580395099, "loss": 0.5472, "step": 7140 }, { "epoch": 0.529472825683992, "grad_norm": 0.36129021644592285, "learning_rate": 0.00012862215553888473, "loss": 0.5052, "step": 7141 }, { "epoch": 0.5295469711574109, "grad_norm": 0.39714181423187256, "learning_rate": 0.00012861215303825956, "loss": 0.5054, "step": 7142 }, { "epoch": 0.5296211166308297, "grad_norm": 0.3487113118171692, "learning_rate": 0.00012860215053763443, "loss": 0.4814, "step": 7143 }, { "epoch": 0.5296952621042486, "grad_norm": 0.3347206115722656, "learning_rate": 0.00012859214803700926, "loss": 0.4835, "step": 7144 }, { "epoch": 0.5297694075776673, "grad_norm": 0.3528164327144623, "learning_rate": 0.0001285821455363841, "loss": 0.4976, "step": 7145 }, { "epoch": 0.5298435530510862, "grad_norm": 0.3585350513458252, "learning_rate": 0.00012857214303575894, "loss": 0.4928, "step": 7146 }, { "epoch": 0.5299176985245051, "grad_norm": 0.3421332836151123, "learning_rate": 0.00012856214053513378, "loss": 0.4961, "step": 7147 }, { "epoch": 0.5299918439979239, "grad_norm": 0.3771037459373474, "learning_rate": 0.00012855213803450864, "loss": 0.5344, "step": 7148 }, { "epoch": 0.5300659894713428, "grad_norm": 0.3727899491786957, "learning_rate": 0.00012854213553388348, "loss": 0.5175, "step": 7149 }, { "epoch": 0.5301401349447616, "grad_norm": 0.3530861437320709, "learning_rate": 0.00012853213303325834, "loss": 0.5097, "step": 7150 }, { "epoch": 0.5302142804181804, "grad_norm": 0.35603901743888855, "learning_rate": 0.00012852213053263315, "loss": 0.5003, "step": 7151 }, { "epoch": 0.5302884258915993, "grad_norm": 0.3983398377895355, "learning_rate": 0.00012851212803200802, "loss": 0.5046, "step": 7152 }, { "epoch": 0.5303625713650182, "grad_norm": 0.3670448660850525, "learning_rate": 0.00012850212553138285, "loss": 0.5458, "step": 7153 }, { "epoch": 0.530436716838437, "grad_norm": 0.37243014574050903, "learning_rate": 0.00012849212303075772, "loss": 0.5388, "step": 7154 }, { "epoch": 0.5305108623118558, "grad_norm": 0.36501649022102356, "learning_rate": 0.00012848212053013253, "loss": 0.5052, "step": 7155 }, { "epoch": 0.5305850077852747, "grad_norm": 0.34230539202690125, "learning_rate": 0.0001284721180295074, "loss": 0.5163, "step": 7156 }, { "epoch": 0.5306591532586935, "grad_norm": 0.37127378582954407, "learning_rate": 0.00012846211552888223, "loss": 0.526, "step": 7157 }, { "epoch": 0.5307332987321124, "grad_norm": 0.34352099895477295, "learning_rate": 0.00012845211302825707, "loss": 0.4802, "step": 7158 }, { "epoch": 0.5308074442055313, "grad_norm": 0.38001641631126404, "learning_rate": 0.0001284421105276319, "loss": 0.5967, "step": 7159 }, { "epoch": 0.53088158967895, "grad_norm": 0.33506008982658386, "learning_rate": 0.00012843210802700677, "loss": 0.4902, "step": 7160 }, { "epoch": 0.5309557351523689, "grad_norm": 0.35523277521133423, "learning_rate": 0.0001284221055263816, "loss": 0.4741, "step": 7161 }, { "epoch": 0.5310298806257878, "grad_norm": 0.3603801429271698, "learning_rate": 0.00012841210302575644, "loss": 0.5238, "step": 7162 }, { "epoch": 0.5311040260992066, "grad_norm": 0.35622915625572205, "learning_rate": 0.00012840210052513128, "loss": 0.4934, "step": 7163 }, { "epoch": 0.5311781715726255, "grad_norm": 0.35882577300071716, "learning_rate": 0.00012839209802450614, "loss": 0.5132, "step": 7164 }, { "epoch": 0.5312523170460444, "grad_norm": 0.35801467299461365, "learning_rate": 0.00012838209552388098, "loss": 0.4992, "step": 7165 }, { "epoch": 0.5313264625194631, "grad_norm": 0.34660524129867554, "learning_rate": 0.00012837209302325582, "loss": 0.4973, "step": 7166 }, { "epoch": 0.531400607992882, "grad_norm": 0.3511310815811157, "learning_rate": 0.00012836209052263065, "loss": 0.4896, "step": 7167 }, { "epoch": 0.5314747534663009, "grad_norm": 0.330795019865036, "learning_rate": 0.00012835208802200552, "loss": 0.4905, "step": 7168 }, { "epoch": 0.5315488989397197, "grad_norm": 0.34290212392807007, "learning_rate": 0.00012834208552138035, "loss": 0.4853, "step": 7169 }, { "epoch": 0.5316230444131386, "grad_norm": 0.341728538274765, "learning_rate": 0.0001283320830207552, "loss": 0.4921, "step": 7170 }, { "epoch": 0.5316971898865575, "grad_norm": 0.3700513243675232, "learning_rate": 0.00012832208052013003, "loss": 0.5115, "step": 7171 }, { "epoch": 0.5317713353599762, "grad_norm": 0.38327643275260925, "learning_rate": 0.0001283120780195049, "loss": 0.5142, "step": 7172 }, { "epoch": 0.5318454808333951, "grad_norm": 0.35625410079956055, "learning_rate": 0.00012830207551887973, "loss": 0.48, "step": 7173 }, { "epoch": 0.531919626306814, "grad_norm": 0.3665059506893158, "learning_rate": 0.00012829207301825457, "loss": 0.5024, "step": 7174 }, { "epoch": 0.5319937717802328, "grad_norm": 0.3425460755825043, "learning_rate": 0.0001282820705176294, "loss": 0.471, "step": 7175 }, { "epoch": 0.5320679172536517, "grad_norm": 0.3492903411388397, "learning_rate": 0.00012827206801700427, "loss": 0.4986, "step": 7176 }, { "epoch": 0.5321420627270705, "grad_norm": 0.3552698791027069, "learning_rate": 0.0001282620655163791, "loss": 0.5179, "step": 7177 }, { "epoch": 0.5322162082004893, "grad_norm": 0.35227957367897034, "learning_rate": 0.00012825206301575394, "loss": 0.4836, "step": 7178 }, { "epoch": 0.5322903536739082, "grad_norm": 0.3725096881389618, "learning_rate": 0.00012824206051512878, "loss": 0.4967, "step": 7179 }, { "epoch": 0.5323644991473271, "grad_norm": 0.34252917766571045, "learning_rate": 0.00012823205801450364, "loss": 0.4934, "step": 7180 }, { "epoch": 0.5324386446207459, "grad_norm": 0.39423269033432007, "learning_rate": 0.00012822205551387848, "loss": 0.5189, "step": 7181 }, { "epoch": 0.5325127900941647, "grad_norm": 0.3582017421722412, "learning_rate": 0.00012821205301325332, "loss": 0.5183, "step": 7182 }, { "epoch": 0.5325869355675836, "grad_norm": 0.35744303464889526, "learning_rate": 0.00012820205051262818, "loss": 0.4977, "step": 7183 }, { "epoch": 0.5326610810410024, "grad_norm": 0.3736807703971863, "learning_rate": 0.000128192048012003, "loss": 0.503, "step": 7184 }, { "epoch": 0.5327352265144213, "grad_norm": 0.3309790790081024, "learning_rate": 0.00012818204551137786, "loss": 0.4714, "step": 7185 }, { "epoch": 0.5328093719878402, "grad_norm": 0.35306692123413086, "learning_rate": 0.0001281720430107527, "loss": 0.5406, "step": 7186 }, { "epoch": 0.532883517461259, "grad_norm": 0.362564355134964, "learning_rate": 0.00012816204051012756, "loss": 0.5045, "step": 7187 }, { "epoch": 0.5329576629346778, "grad_norm": 0.37336674332618713, "learning_rate": 0.00012815203800950237, "loss": 0.5182, "step": 7188 }, { "epoch": 0.5330318084080967, "grad_norm": 0.3914889395236969, "learning_rate": 0.00012814203550887723, "loss": 0.488, "step": 7189 }, { "epoch": 0.5331059538815155, "grad_norm": 0.37716445326805115, "learning_rate": 0.00012813203300825207, "loss": 0.5384, "step": 7190 }, { "epoch": 0.5331800993549344, "grad_norm": 0.33736032247543335, "learning_rate": 0.00012812203050762693, "loss": 0.4792, "step": 7191 }, { "epoch": 0.5332542448283533, "grad_norm": 0.34308579564094543, "learning_rate": 0.00012811202800700174, "loss": 0.4897, "step": 7192 }, { "epoch": 0.533328390301772, "grad_norm": 0.34559306502342224, "learning_rate": 0.0001281020255063766, "loss": 0.4709, "step": 7193 }, { "epoch": 0.5334025357751909, "grad_norm": 0.3430536389350891, "learning_rate": 0.00012809202300575144, "loss": 0.5051, "step": 7194 }, { "epoch": 0.5334766812486098, "grad_norm": 0.34899142384529114, "learning_rate": 0.00012808202050512628, "loss": 0.4828, "step": 7195 }, { "epoch": 0.5335508267220286, "grad_norm": 0.3721078634262085, "learning_rate": 0.00012807201800450112, "loss": 0.531, "step": 7196 }, { "epoch": 0.5336249721954475, "grad_norm": 0.3667210340499878, "learning_rate": 0.00012806201550387598, "loss": 0.5525, "step": 7197 }, { "epoch": 0.5336991176688664, "grad_norm": 0.3426697552204132, "learning_rate": 0.00012805201300325082, "loss": 0.4843, "step": 7198 }, { "epoch": 0.5337732631422851, "grad_norm": 0.3593873679637909, "learning_rate": 0.00012804201050262566, "loss": 0.5299, "step": 7199 }, { "epoch": 0.533847408615704, "grad_norm": 0.37630075216293335, "learning_rate": 0.0001280320080020005, "loss": 0.5318, "step": 7200 }, { "epoch": 0.5339215540891229, "grad_norm": 0.34980374574661255, "learning_rate": 0.00012802200550137536, "loss": 0.5013, "step": 7201 }, { "epoch": 0.5339956995625417, "grad_norm": 0.3777419924736023, "learning_rate": 0.0001280120030007502, "loss": 0.5406, "step": 7202 }, { "epoch": 0.5340698450359606, "grad_norm": 0.3727801442146301, "learning_rate": 0.00012800200050012503, "loss": 0.5274, "step": 7203 }, { "epoch": 0.5341439905093794, "grad_norm": 0.34346094727516174, "learning_rate": 0.00012799199799949987, "loss": 0.4822, "step": 7204 }, { "epoch": 0.5342181359827982, "grad_norm": 0.3538914918899536, "learning_rate": 0.00012798199549887473, "loss": 0.528, "step": 7205 }, { "epoch": 0.5342922814562171, "grad_norm": 0.3540041148662567, "learning_rate": 0.00012797199299824957, "loss": 0.4797, "step": 7206 }, { "epoch": 0.534366426929636, "grad_norm": 0.3653797507286072, "learning_rate": 0.0001279619904976244, "loss": 0.532, "step": 7207 }, { "epoch": 0.5344405724030548, "grad_norm": 0.38805490732192993, "learning_rate": 0.00012795198799699924, "loss": 0.5292, "step": 7208 }, { "epoch": 0.5345147178764736, "grad_norm": 0.3476819694042206, "learning_rate": 0.0001279419854963741, "loss": 0.487, "step": 7209 }, { "epoch": 0.5345888633498925, "grad_norm": 0.37044382095336914, "learning_rate": 0.00012793198299574895, "loss": 0.5129, "step": 7210 }, { "epoch": 0.5346630088233113, "grad_norm": 0.3557039201259613, "learning_rate": 0.00012792198049512378, "loss": 0.4993, "step": 7211 }, { "epoch": 0.5347371542967302, "grad_norm": 0.358075350522995, "learning_rate": 0.00012791197799449862, "loss": 0.5182, "step": 7212 }, { "epoch": 0.5348112997701491, "grad_norm": 0.3582004904747009, "learning_rate": 0.00012790197549387348, "loss": 0.5005, "step": 7213 }, { "epoch": 0.5348854452435678, "grad_norm": 0.3893207609653473, "learning_rate": 0.00012789197299324832, "loss": 0.5281, "step": 7214 }, { "epoch": 0.5349595907169867, "grad_norm": 0.3844684660434723, "learning_rate": 0.00012788197049262316, "loss": 0.546, "step": 7215 }, { "epoch": 0.5350337361904056, "grad_norm": 0.38723593950271606, "learning_rate": 0.00012787196799199802, "loss": 0.5369, "step": 7216 }, { "epoch": 0.5351078816638244, "grad_norm": 0.3845733404159546, "learning_rate": 0.00012786196549137286, "loss": 0.5591, "step": 7217 }, { "epoch": 0.5351820271372433, "grad_norm": 0.3900396525859833, "learning_rate": 0.0001278519629907477, "loss": 0.5528, "step": 7218 }, { "epoch": 0.5352561726106622, "grad_norm": 0.37690871953964233, "learning_rate": 0.00012784196049012253, "loss": 0.5342, "step": 7219 }, { "epoch": 0.5353303180840809, "grad_norm": 0.3552815020084381, "learning_rate": 0.0001278319579894974, "loss": 0.4633, "step": 7220 }, { "epoch": 0.5354044635574998, "grad_norm": 0.3648386299610138, "learning_rate": 0.0001278219554888722, "loss": 0.502, "step": 7221 }, { "epoch": 0.5354786090309187, "grad_norm": 0.3603462278842926, "learning_rate": 0.00012781195298824707, "loss": 0.5139, "step": 7222 }, { "epoch": 0.5355527545043375, "grad_norm": 0.37191954255104065, "learning_rate": 0.0001278019504876219, "loss": 0.5241, "step": 7223 }, { "epoch": 0.5356268999777564, "grad_norm": 0.3450738787651062, "learning_rate": 0.00012779194798699677, "loss": 0.4821, "step": 7224 }, { "epoch": 0.5357010454511753, "grad_norm": 0.3540944457054138, "learning_rate": 0.00012778194548637158, "loss": 0.518, "step": 7225 }, { "epoch": 0.535775190924594, "grad_norm": 0.3382912278175354, "learning_rate": 0.00012777194298574645, "loss": 0.4764, "step": 7226 }, { "epoch": 0.5358493363980129, "grad_norm": 0.36129021644592285, "learning_rate": 0.00012776194048512128, "loss": 0.5204, "step": 7227 }, { "epoch": 0.5359234818714318, "grad_norm": 0.3701836168766022, "learning_rate": 0.00012775193798449615, "loss": 0.5351, "step": 7228 }, { "epoch": 0.5359976273448506, "grad_norm": 0.35095715522766113, "learning_rate": 0.00012774193548387096, "loss": 0.5153, "step": 7229 }, { "epoch": 0.5360717728182695, "grad_norm": 0.36026012897491455, "learning_rate": 0.00012773193298324582, "loss": 0.4939, "step": 7230 }, { "epoch": 0.5361459182916883, "grad_norm": 0.33977752923965454, "learning_rate": 0.00012772193048262066, "loss": 0.4902, "step": 7231 }, { "epoch": 0.5362200637651071, "grad_norm": 0.353054940700531, "learning_rate": 0.0001277119279819955, "loss": 0.4832, "step": 7232 }, { "epoch": 0.536294209238526, "grad_norm": 0.34340280294418335, "learning_rate": 0.00012770192548137033, "loss": 0.4966, "step": 7233 }, { "epoch": 0.5363683547119449, "grad_norm": 0.39646297693252563, "learning_rate": 0.0001276919229807452, "loss": 0.5764, "step": 7234 }, { "epoch": 0.5364425001853637, "grad_norm": 0.3468452990055084, "learning_rate": 0.00012768192048012004, "loss": 0.5009, "step": 7235 }, { "epoch": 0.5365166456587825, "grad_norm": 0.3670138418674469, "learning_rate": 0.00012767191797949487, "loss": 0.4806, "step": 7236 }, { "epoch": 0.5365907911322014, "grad_norm": 0.32592394948005676, "learning_rate": 0.0001276619154788697, "loss": 0.4504, "step": 7237 }, { "epoch": 0.5366649366056202, "grad_norm": 0.3663318157196045, "learning_rate": 0.00012765191297824457, "loss": 0.5639, "step": 7238 }, { "epoch": 0.5367390820790391, "grad_norm": 0.34986215829849243, "learning_rate": 0.0001276419104776194, "loss": 0.5147, "step": 7239 }, { "epoch": 0.536813227552458, "grad_norm": 0.3532473146915436, "learning_rate": 0.00012763190797699425, "loss": 0.4899, "step": 7240 }, { "epoch": 0.5368873730258767, "grad_norm": 0.3608250319957733, "learning_rate": 0.00012762190547636909, "loss": 0.5184, "step": 7241 }, { "epoch": 0.5369615184992956, "grad_norm": 0.3369043469429016, "learning_rate": 0.00012761190297574395, "loss": 0.4912, "step": 7242 }, { "epoch": 0.5370356639727145, "grad_norm": 0.3529641628265381, "learning_rate": 0.0001276019004751188, "loss": 0.5251, "step": 7243 }, { "epoch": 0.5371098094461333, "grad_norm": 0.3256716728210449, "learning_rate": 0.00012759189797449362, "loss": 0.4819, "step": 7244 }, { "epoch": 0.5371839549195522, "grad_norm": 0.3471624553203583, "learning_rate": 0.00012758189547386846, "loss": 0.469, "step": 7245 }, { "epoch": 0.5372581003929711, "grad_norm": 0.37430843710899353, "learning_rate": 0.00012757189297324333, "loss": 0.5113, "step": 7246 }, { "epoch": 0.5373322458663898, "grad_norm": 0.3666324317455292, "learning_rate": 0.00012756189047261816, "loss": 0.493, "step": 7247 }, { "epoch": 0.5374063913398087, "grad_norm": 0.338316947221756, "learning_rate": 0.000127551887971993, "loss": 0.5184, "step": 7248 }, { "epoch": 0.5374805368132276, "grad_norm": 0.3828795552253723, "learning_rate": 0.00012754188547136786, "loss": 0.5271, "step": 7249 }, { "epoch": 0.5375546822866464, "grad_norm": 0.34885263442993164, "learning_rate": 0.0001275318829707427, "loss": 0.5027, "step": 7250 }, { "epoch": 0.5376288277600653, "grad_norm": 0.35997238755226135, "learning_rate": 0.00012752188047011754, "loss": 0.4725, "step": 7251 }, { "epoch": 0.5377029732334841, "grad_norm": 0.36563071608543396, "learning_rate": 0.00012751187796949237, "loss": 0.486, "step": 7252 }, { "epoch": 0.5377771187069029, "grad_norm": 0.37146490812301636, "learning_rate": 0.00012750187546886724, "loss": 0.4707, "step": 7253 }, { "epoch": 0.5378512641803218, "grad_norm": 0.3544524908065796, "learning_rate": 0.00012749187296824208, "loss": 0.475, "step": 7254 }, { "epoch": 0.5379254096537407, "grad_norm": 0.3703920543193817, "learning_rate": 0.0001274818704676169, "loss": 0.488, "step": 7255 }, { "epoch": 0.5379995551271595, "grad_norm": 0.3527323007583618, "learning_rate": 0.00012747186796699175, "loss": 0.5057, "step": 7256 }, { "epoch": 0.5380737006005784, "grad_norm": 0.35742634534835815, "learning_rate": 0.00012746186546636661, "loss": 0.527, "step": 7257 }, { "epoch": 0.5381478460739971, "grad_norm": 0.3738943636417389, "learning_rate": 0.00012745186296574142, "loss": 0.505, "step": 7258 }, { "epoch": 0.538221991547416, "grad_norm": 0.3543088138103485, "learning_rate": 0.0001274418604651163, "loss": 0.551, "step": 7259 }, { "epoch": 0.5382961370208349, "grad_norm": 0.3938145041465759, "learning_rate": 0.00012743185796449113, "loss": 0.5817, "step": 7260 }, { "epoch": 0.5383702824942537, "grad_norm": 0.33910033106803894, "learning_rate": 0.000127421855463866, "loss": 0.4612, "step": 7261 }, { "epoch": 0.5384444279676726, "grad_norm": 0.36324650049209595, "learning_rate": 0.0001274118529632408, "loss": 0.5084, "step": 7262 }, { "epoch": 0.5385185734410914, "grad_norm": 0.33549556136131287, "learning_rate": 0.00012740185046261566, "loss": 0.485, "step": 7263 }, { "epoch": 0.5385927189145102, "grad_norm": 0.35822802782058716, "learning_rate": 0.0001273918479619905, "loss": 0.5046, "step": 7264 }, { "epoch": 0.5386668643879291, "grad_norm": 0.37135839462280273, "learning_rate": 0.00012738184546136537, "loss": 0.5191, "step": 7265 }, { "epoch": 0.538741009861348, "grad_norm": 0.3536699414253235, "learning_rate": 0.00012737184296074018, "loss": 0.4988, "step": 7266 }, { "epoch": 0.5388151553347668, "grad_norm": 0.35941001772880554, "learning_rate": 0.00012736184046011504, "loss": 0.4917, "step": 7267 }, { "epoch": 0.5388893008081856, "grad_norm": 0.3510887920856476, "learning_rate": 0.00012735183795948988, "loss": 0.4607, "step": 7268 }, { "epoch": 0.5389634462816045, "grad_norm": 0.34057915210723877, "learning_rate": 0.00012734183545886474, "loss": 0.4888, "step": 7269 }, { "epoch": 0.5390375917550233, "grad_norm": 0.3429250419139862, "learning_rate": 0.00012733183295823955, "loss": 0.4747, "step": 7270 }, { "epoch": 0.5391117372284422, "grad_norm": 0.409634530544281, "learning_rate": 0.00012732183045761442, "loss": 0.4987, "step": 7271 }, { "epoch": 0.5391858827018611, "grad_norm": 0.3484416604042053, "learning_rate": 0.00012731182795698925, "loss": 0.4971, "step": 7272 }, { "epoch": 0.5392600281752798, "grad_norm": 0.3663753271102905, "learning_rate": 0.0001273018254563641, "loss": 0.5148, "step": 7273 }, { "epoch": 0.5393341736486987, "grad_norm": 0.34822896122932434, "learning_rate": 0.00012729182295573893, "loss": 0.4763, "step": 7274 }, { "epoch": 0.5394083191221176, "grad_norm": 0.3697715401649475, "learning_rate": 0.0001272818204551138, "loss": 0.4912, "step": 7275 }, { "epoch": 0.5394824645955364, "grad_norm": 0.36158838868141174, "learning_rate": 0.00012727181795448863, "loss": 0.4849, "step": 7276 }, { "epoch": 0.5395566100689553, "grad_norm": 0.3560589849948883, "learning_rate": 0.00012726181545386346, "loss": 0.4953, "step": 7277 }, { "epoch": 0.5396307555423742, "grad_norm": 0.3566135764122009, "learning_rate": 0.0001272518129532383, "loss": 0.5012, "step": 7278 }, { "epoch": 0.5397049010157929, "grad_norm": 0.37185800075531006, "learning_rate": 0.00012724181045261317, "loss": 0.5239, "step": 7279 }, { "epoch": 0.5397790464892118, "grad_norm": 0.38787075877189636, "learning_rate": 0.000127231807951988, "loss": 0.5421, "step": 7280 }, { "epoch": 0.5398531919626307, "grad_norm": 0.34566211700439453, "learning_rate": 0.00012722180545136284, "loss": 0.4675, "step": 7281 }, { "epoch": 0.5399273374360495, "grad_norm": 0.3798096179962158, "learning_rate": 0.00012721180295073768, "loss": 0.5429, "step": 7282 }, { "epoch": 0.5400014829094684, "grad_norm": 0.3508783280849457, "learning_rate": 0.00012720180045011254, "loss": 0.5117, "step": 7283 }, { "epoch": 0.5400756283828873, "grad_norm": 0.35825830698013306, "learning_rate": 0.00012719179794948738, "loss": 0.5031, "step": 7284 }, { "epoch": 0.540149773856306, "grad_norm": 0.3543848991394043, "learning_rate": 0.00012718179544886222, "loss": 0.5057, "step": 7285 }, { "epoch": 0.5402239193297249, "grad_norm": 0.34239694476127625, "learning_rate": 0.00012717179294823708, "loss": 0.4665, "step": 7286 }, { "epoch": 0.5402980648031438, "grad_norm": 0.3661828339099884, "learning_rate": 0.00012716179044761192, "loss": 0.5215, "step": 7287 }, { "epoch": 0.5403722102765626, "grad_norm": 0.3567352294921875, "learning_rate": 0.00012715178794698675, "loss": 0.5309, "step": 7288 }, { "epoch": 0.5404463557499815, "grad_norm": 0.3656647205352783, "learning_rate": 0.0001271417854463616, "loss": 0.4894, "step": 7289 }, { "epoch": 0.5405205012234003, "grad_norm": 0.3635871410369873, "learning_rate": 0.00012713178294573646, "loss": 0.5278, "step": 7290 }, { "epoch": 0.5405946466968191, "grad_norm": 0.37324225902557373, "learning_rate": 0.0001271217804451113, "loss": 0.5097, "step": 7291 }, { "epoch": 0.540668792170238, "grad_norm": 0.4058102071285248, "learning_rate": 0.00012711177794448613, "loss": 0.5995, "step": 7292 }, { "epoch": 0.5407429376436569, "grad_norm": 0.3446764051914215, "learning_rate": 0.00012710177544386097, "loss": 0.5008, "step": 7293 }, { "epoch": 0.5408170831170757, "grad_norm": 0.39203765988349915, "learning_rate": 0.00012709177294323583, "loss": 0.5292, "step": 7294 }, { "epoch": 0.5408912285904945, "grad_norm": 0.36507394909858704, "learning_rate": 0.00012708177044261064, "loss": 0.5136, "step": 7295 }, { "epoch": 0.5409653740639134, "grad_norm": 0.3413647711277008, "learning_rate": 0.0001270717679419855, "loss": 0.4797, "step": 7296 }, { "epoch": 0.5410395195373322, "grad_norm": 0.34027937054634094, "learning_rate": 0.00012706176544136034, "loss": 0.4997, "step": 7297 }, { "epoch": 0.5411136650107511, "grad_norm": 0.3456675410270691, "learning_rate": 0.0001270517629407352, "loss": 0.4825, "step": 7298 }, { "epoch": 0.54118781048417, "grad_norm": 0.3513215482234955, "learning_rate": 0.00012704176044011002, "loss": 0.4995, "step": 7299 }, { "epoch": 0.5412619559575887, "grad_norm": 0.34693217277526855, "learning_rate": 0.00012703175793948488, "loss": 0.5172, "step": 7300 }, { "epoch": 0.5413361014310076, "grad_norm": 0.3354248106479645, "learning_rate": 0.00012702175543885972, "loss": 0.4538, "step": 7301 }, { "epoch": 0.5414102469044265, "grad_norm": 0.35871973633766174, "learning_rate": 0.00012701175293823458, "loss": 0.5226, "step": 7302 }, { "epoch": 0.5414843923778453, "grad_norm": 0.3566943109035492, "learning_rate": 0.0001270017504376094, "loss": 0.4843, "step": 7303 }, { "epoch": 0.5415585378512642, "grad_norm": 0.3528972864151001, "learning_rate": 0.00012699174793698426, "loss": 0.4893, "step": 7304 }, { "epoch": 0.5416326833246831, "grad_norm": 0.37977954745292664, "learning_rate": 0.0001269817454363591, "loss": 0.5183, "step": 7305 }, { "epoch": 0.5417068287981018, "grad_norm": 0.35259607434272766, "learning_rate": 0.00012697174293573396, "loss": 0.5022, "step": 7306 }, { "epoch": 0.5417809742715207, "grad_norm": 0.35832467675209045, "learning_rate": 0.00012696174043510877, "loss": 0.5036, "step": 7307 }, { "epoch": 0.5418551197449396, "grad_norm": 0.3565920889377594, "learning_rate": 0.00012695173793448363, "loss": 0.4922, "step": 7308 }, { "epoch": 0.5419292652183584, "grad_norm": 0.34110355377197266, "learning_rate": 0.00012694173543385847, "loss": 0.4977, "step": 7309 }, { "epoch": 0.5420034106917773, "grad_norm": 0.3649918735027313, "learning_rate": 0.0001269317329332333, "loss": 0.5057, "step": 7310 }, { "epoch": 0.5420775561651962, "grad_norm": 0.36903080344200134, "learning_rate": 0.00012692173043260814, "loss": 0.5033, "step": 7311 }, { "epoch": 0.5421517016386149, "grad_norm": 0.33133363723754883, "learning_rate": 0.000126911727931983, "loss": 0.4665, "step": 7312 }, { "epoch": 0.5422258471120338, "grad_norm": 0.3480895459651947, "learning_rate": 0.00012690172543135784, "loss": 0.5146, "step": 7313 }, { "epoch": 0.5422999925854527, "grad_norm": 0.3495211899280548, "learning_rate": 0.00012689172293073268, "loss": 0.5399, "step": 7314 }, { "epoch": 0.5423741380588715, "grad_norm": 0.3971417248249054, "learning_rate": 0.00012688172043010752, "loss": 0.5387, "step": 7315 }, { "epoch": 0.5424482835322904, "grad_norm": 0.37001246213912964, "learning_rate": 0.00012687171792948238, "loss": 0.5462, "step": 7316 }, { "epoch": 0.5425224290057092, "grad_norm": 0.36557018756866455, "learning_rate": 0.00012686171542885722, "loss": 0.5067, "step": 7317 }, { "epoch": 0.542596574479128, "grad_norm": 0.36713290214538574, "learning_rate": 0.00012685171292823206, "loss": 0.551, "step": 7318 }, { "epoch": 0.5426707199525469, "grad_norm": 0.3483871817588806, "learning_rate": 0.00012684171042760692, "loss": 0.4915, "step": 7319 }, { "epoch": 0.5427448654259658, "grad_norm": 0.3386613428592682, "learning_rate": 0.00012683170792698176, "loss": 0.4815, "step": 7320 }, { "epoch": 0.5428190108993846, "grad_norm": 0.329706609249115, "learning_rate": 0.0001268217054263566, "loss": 0.513, "step": 7321 }, { "epoch": 0.5428931563728034, "grad_norm": 0.3607254922389984, "learning_rate": 0.00012681170292573143, "loss": 0.5186, "step": 7322 }, { "epoch": 0.5429673018462223, "grad_norm": 0.37056097388267517, "learning_rate": 0.0001268017004251063, "loss": 0.5102, "step": 7323 }, { "epoch": 0.5430414473196411, "grad_norm": 0.3329195976257324, "learning_rate": 0.00012679169792448113, "loss": 0.4742, "step": 7324 }, { "epoch": 0.54311559279306, "grad_norm": 0.3664306104183197, "learning_rate": 0.00012678169542385597, "loss": 0.5001, "step": 7325 }, { "epoch": 0.5431897382664789, "grad_norm": 0.3582397997379303, "learning_rate": 0.0001267716929232308, "loss": 0.5194, "step": 7326 }, { "epoch": 0.5432638837398976, "grad_norm": 0.3524998128414154, "learning_rate": 0.00012676169042260567, "loss": 0.4866, "step": 7327 }, { "epoch": 0.5433380292133165, "grad_norm": 0.36648985743522644, "learning_rate": 0.0001267516879219805, "loss": 0.5026, "step": 7328 }, { "epoch": 0.5434121746867354, "grad_norm": 0.3311615288257599, "learning_rate": 0.00012674168542135535, "loss": 0.5124, "step": 7329 }, { "epoch": 0.5434863201601542, "grad_norm": 0.3627655804157257, "learning_rate": 0.00012673168292073018, "loss": 0.4857, "step": 7330 }, { "epoch": 0.5435604656335731, "grad_norm": 0.3966333866119385, "learning_rate": 0.00012672168042010505, "loss": 0.5473, "step": 7331 }, { "epoch": 0.543634611106992, "grad_norm": 0.37662753462791443, "learning_rate": 0.00012671167791947986, "loss": 0.5059, "step": 7332 }, { "epoch": 0.5437087565804107, "grad_norm": 0.3851191997528076, "learning_rate": 0.00012670167541885472, "loss": 0.5532, "step": 7333 }, { "epoch": 0.5437829020538296, "grad_norm": 0.3453838527202606, "learning_rate": 0.00012669167291822956, "loss": 0.4797, "step": 7334 }, { "epoch": 0.5438570475272485, "grad_norm": 0.3511013686656952, "learning_rate": 0.00012668167041760442, "loss": 0.5064, "step": 7335 }, { "epoch": 0.5439311930006673, "grad_norm": 0.3540079891681671, "learning_rate": 0.00012667166791697923, "loss": 0.5138, "step": 7336 }, { "epoch": 0.5440053384740862, "grad_norm": 0.35417380928993225, "learning_rate": 0.0001266616654163541, "loss": 0.4982, "step": 7337 }, { "epoch": 0.544079483947505, "grad_norm": 0.36740389466285706, "learning_rate": 0.00012665166291572893, "loss": 0.5305, "step": 7338 }, { "epoch": 0.5441536294209238, "grad_norm": 0.3772587478160858, "learning_rate": 0.0001266416604151038, "loss": 0.5259, "step": 7339 }, { "epoch": 0.5442277748943427, "grad_norm": 0.3828781545162201, "learning_rate": 0.0001266316579144786, "loss": 0.5288, "step": 7340 }, { "epoch": 0.5443019203677616, "grad_norm": 0.3433588445186615, "learning_rate": 0.00012662165541385347, "loss": 0.4949, "step": 7341 }, { "epoch": 0.5443760658411804, "grad_norm": 0.3406387269496918, "learning_rate": 0.0001266116529132283, "loss": 0.4819, "step": 7342 }, { "epoch": 0.5444502113145993, "grad_norm": 0.36822032928466797, "learning_rate": 0.00012660165041260317, "loss": 0.5161, "step": 7343 }, { "epoch": 0.5445243567880181, "grad_norm": 0.37331435084342957, "learning_rate": 0.00012659164791197798, "loss": 0.5419, "step": 7344 }, { "epoch": 0.5445985022614369, "grad_norm": 0.37781858444213867, "learning_rate": 0.00012658164541135285, "loss": 0.5743, "step": 7345 }, { "epoch": 0.5446726477348558, "grad_norm": 0.365910142660141, "learning_rate": 0.00012657164291072768, "loss": 0.4937, "step": 7346 }, { "epoch": 0.5447467932082747, "grad_norm": 0.3622342348098755, "learning_rate": 0.00012656164041010252, "loss": 0.488, "step": 7347 }, { "epoch": 0.5448209386816935, "grad_norm": 0.36615025997161865, "learning_rate": 0.00012655163790947736, "loss": 0.5646, "step": 7348 }, { "epoch": 0.5448950841551123, "grad_norm": 0.374947190284729, "learning_rate": 0.00012654163540885222, "loss": 0.5243, "step": 7349 }, { "epoch": 0.5449692296285312, "grad_norm": 0.36409348249435425, "learning_rate": 0.00012653163290822706, "loss": 0.5081, "step": 7350 }, { "epoch": 0.54504337510195, "grad_norm": 0.36079350113868713, "learning_rate": 0.0001265216304076019, "loss": 0.5203, "step": 7351 }, { "epoch": 0.5451175205753689, "grad_norm": 0.3483407497406006, "learning_rate": 0.00012651162790697676, "loss": 0.4783, "step": 7352 }, { "epoch": 0.5451916660487878, "grad_norm": 0.34424835443496704, "learning_rate": 0.0001265016254063516, "loss": 0.4673, "step": 7353 }, { "epoch": 0.5452658115222065, "grad_norm": 0.34261736273765564, "learning_rate": 0.00012649162290572644, "loss": 0.5227, "step": 7354 }, { "epoch": 0.5453399569956254, "grad_norm": 0.34175583720207214, "learning_rate": 0.00012648162040510127, "loss": 0.4822, "step": 7355 }, { "epoch": 0.5454141024690443, "grad_norm": 0.38525792956352234, "learning_rate": 0.00012647161790447614, "loss": 0.5358, "step": 7356 }, { "epoch": 0.5454882479424631, "grad_norm": 0.38309356570243835, "learning_rate": 0.00012646161540385097, "loss": 0.5623, "step": 7357 }, { "epoch": 0.545562393415882, "grad_norm": 0.36202070116996765, "learning_rate": 0.0001264516129032258, "loss": 0.5554, "step": 7358 }, { "epoch": 0.5456365388893009, "grad_norm": 0.38454434275627136, "learning_rate": 0.00012644161040260065, "loss": 0.577, "step": 7359 }, { "epoch": 0.5457106843627196, "grad_norm": 0.34795793890953064, "learning_rate": 0.0001264316079019755, "loss": 0.5115, "step": 7360 }, { "epoch": 0.5457848298361385, "grad_norm": 0.3544546365737915, "learning_rate": 0.00012642160540135035, "loss": 0.4994, "step": 7361 }, { "epoch": 0.5458589753095574, "grad_norm": 0.3382195234298706, "learning_rate": 0.0001264116029007252, "loss": 0.4802, "step": 7362 }, { "epoch": 0.5459331207829762, "grad_norm": 0.3444426357746124, "learning_rate": 0.00012640160040010002, "loss": 0.5032, "step": 7363 }, { "epoch": 0.5460072662563951, "grad_norm": 0.355205237865448, "learning_rate": 0.0001263915978994749, "loss": 0.5271, "step": 7364 }, { "epoch": 0.546081411729814, "grad_norm": 0.3937973976135254, "learning_rate": 0.00012638159539884972, "loss": 0.539, "step": 7365 }, { "epoch": 0.5461555572032327, "grad_norm": 0.3691761791706085, "learning_rate": 0.00012637159289822456, "loss": 0.5445, "step": 7366 }, { "epoch": 0.5462297026766516, "grad_norm": 0.3451838195323944, "learning_rate": 0.0001263615903975994, "loss": 0.4984, "step": 7367 }, { "epoch": 0.5463038481500705, "grad_norm": 0.35018396377563477, "learning_rate": 0.00012635158789697426, "loss": 0.485, "step": 7368 }, { "epoch": 0.5463779936234893, "grad_norm": 0.3795400559902191, "learning_rate": 0.00012634158539634907, "loss": 0.5459, "step": 7369 }, { "epoch": 0.5464521390969082, "grad_norm": 0.36265891790390015, "learning_rate": 0.00012633158289572394, "loss": 0.5532, "step": 7370 }, { "epoch": 0.546526284570327, "grad_norm": 0.3775970935821533, "learning_rate": 0.00012632158039509877, "loss": 0.522, "step": 7371 }, { "epoch": 0.5466004300437458, "grad_norm": 0.32783010601997375, "learning_rate": 0.00012631157789447364, "loss": 0.5136, "step": 7372 }, { "epoch": 0.5466745755171647, "grad_norm": 0.3590959310531616, "learning_rate": 0.00012630157539384845, "loss": 0.5418, "step": 7373 }, { "epoch": 0.5467487209905835, "grad_norm": 0.3488559126853943, "learning_rate": 0.0001262915728932233, "loss": 0.4876, "step": 7374 }, { "epoch": 0.5468228664640024, "grad_norm": 0.3741139769554138, "learning_rate": 0.00012628157039259815, "loss": 0.5267, "step": 7375 }, { "epoch": 0.5468970119374212, "grad_norm": 0.36026784777641296, "learning_rate": 0.00012627156789197301, "loss": 0.509, "step": 7376 }, { "epoch": 0.54697115741084, "grad_norm": 0.34988337755203247, "learning_rate": 0.00012626156539134782, "loss": 0.4807, "step": 7377 }, { "epoch": 0.5470453028842589, "grad_norm": 0.3746396005153656, "learning_rate": 0.0001262515628907227, "loss": 0.5427, "step": 7378 }, { "epoch": 0.5471194483576778, "grad_norm": 0.36529064178466797, "learning_rate": 0.00012624156039009753, "loss": 0.5419, "step": 7379 }, { "epoch": 0.5471935938310966, "grad_norm": 0.36004742980003357, "learning_rate": 0.0001262315578894724, "loss": 0.4775, "step": 7380 }, { "epoch": 0.5472677393045154, "grad_norm": 0.3467215597629547, "learning_rate": 0.0001262215553888472, "loss": 0.5071, "step": 7381 }, { "epoch": 0.5473418847779343, "grad_norm": 0.3530271053314209, "learning_rate": 0.00012621155288822206, "loss": 0.4802, "step": 7382 }, { "epoch": 0.5474160302513531, "grad_norm": 0.36317959427833557, "learning_rate": 0.0001262015503875969, "loss": 0.5253, "step": 7383 }, { "epoch": 0.547490175724772, "grad_norm": 0.4652720093727112, "learning_rate": 0.00012619154788697174, "loss": 0.5282, "step": 7384 }, { "epoch": 0.5475643211981909, "grad_norm": 0.36971035599708557, "learning_rate": 0.0001261815453863466, "loss": 0.5163, "step": 7385 }, { "epoch": 0.5476384666716096, "grad_norm": 0.3694557547569275, "learning_rate": 0.00012617154288572144, "loss": 0.5089, "step": 7386 }, { "epoch": 0.5477126121450285, "grad_norm": 0.3781857192516327, "learning_rate": 0.00012616154038509628, "loss": 0.5505, "step": 7387 }, { "epoch": 0.5477867576184474, "grad_norm": 0.39046698808670044, "learning_rate": 0.0001261515378844711, "loss": 0.4959, "step": 7388 }, { "epoch": 0.5478609030918662, "grad_norm": 0.37166836857795715, "learning_rate": 0.00012614153538384598, "loss": 0.5253, "step": 7389 }, { "epoch": 0.5479350485652851, "grad_norm": 0.3539445400238037, "learning_rate": 0.00012613153288322081, "loss": 0.5234, "step": 7390 }, { "epoch": 0.548009194038704, "grad_norm": 0.3359561860561371, "learning_rate": 0.00012612153038259565, "loss": 0.4833, "step": 7391 }, { "epoch": 0.5480833395121227, "grad_norm": 0.35231858491897583, "learning_rate": 0.0001261115278819705, "loss": 0.5001, "step": 7392 }, { "epoch": 0.5481574849855416, "grad_norm": 0.35674458742141724, "learning_rate": 0.00012610152538134535, "loss": 0.5629, "step": 7393 }, { "epoch": 0.5482316304589605, "grad_norm": 0.3831821084022522, "learning_rate": 0.0001260915228807202, "loss": 0.5255, "step": 7394 }, { "epoch": 0.5483057759323793, "grad_norm": 0.3240084946155548, "learning_rate": 0.00012608152038009503, "loss": 0.4668, "step": 7395 }, { "epoch": 0.5483799214057982, "grad_norm": 0.3607373833656311, "learning_rate": 0.00012607151787946986, "loss": 0.4819, "step": 7396 }, { "epoch": 0.548454066879217, "grad_norm": 0.3717989921569824, "learning_rate": 0.00012606151537884473, "loss": 0.5496, "step": 7397 }, { "epoch": 0.5485282123526358, "grad_norm": 0.3661680221557617, "learning_rate": 0.00012605151287821957, "loss": 0.5136, "step": 7398 }, { "epoch": 0.5486023578260547, "grad_norm": 0.3349338471889496, "learning_rate": 0.0001260415103775944, "loss": 0.4811, "step": 7399 }, { "epoch": 0.5486765032994736, "grad_norm": 0.34698012471199036, "learning_rate": 0.00012603150787696924, "loss": 0.4973, "step": 7400 }, { "epoch": 0.5487506487728924, "grad_norm": 0.3715498149394989, "learning_rate": 0.0001260215053763441, "loss": 0.5054, "step": 7401 }, { "epoch": 0.5488247942463113, "grad_norm": 0.3455786406993866, "learning_rate": 0.00012601150287571894, "loss": 0.5213, "step": 7402 }, { "epoch": 0.5488989397197301, "grad_norm": 0.37933358550071716, "learning_rate": 0.00012600150037509378, "loss": 0.4723, "step": 7403 }, { "epoch": 0.5489730851931489, "grad_norm": 0.35422372817993164, "learning_rate": 0.00012599149787446862, "loss": 0.5341, "step": 7404 }, { "epoch": 0.5490472306665678, "grad_norm": 0.36903661489486694, "learning_rate": 0.00012598149537384348, "loss": 0.5274, "step": 7405 }, { "epoch": 0.5491213761399867, "grad_norm": 0.34699806571006775, "learning_rate": 0.0001259714928732183, "loss": 0.4796, "step": 7406 }, { "epoch": 0.5491955216134055, "grad_norm": 0.3663420081138611, "learning_rate": 0.00012596149037259315, "loss": 0.5109, "step": 7407 }, { "epoch": 0.5492696670868243, "grad_norm": 0.37623330950737, "learning_rate": 0.000125951487871968, "loss": 0.5557, "step": 7408 }, { "epoch": 0.5493438125602432, "grad_norm": 0.3462178707122803, "learning_rate": 0.00012594148537134286, "loss": 0.4867, "step": 7409 }, { "epoch": 0.549417958033662, "grad_norm": 0.3691945970058441, "learning_rate": 0.00012593148287071767, "loss": 0.5271, "step": 7410 }, { "epoch": 0.5494921035070809, "grad_norm": 0.3448996841907501, "learning_rate": 0.00012592148037009253, "loss": 0.4843, "step": 7411 }, { "epoch": 0.5495662489804998, "grad_norm": 0.34893473982810974, "learning_rate": 0.00012591147786946737, "loss": 0.475, "step": 7412 }, { "epoch": 0.5496403944539185, "grad_norm": 0.32235947251319885, "learning_rate": 0.00012590147536884223, "loss": 0.4616, "step": 7413 }, { "epoch": 0.5497145399273374, "grad_norm": 0.33891016244888306, "learning_rate": 0.00012589147286821704, "loss": 0.4945, "step": 7414 }, { "epoch": 0.5497886854007563, "grad_norm": 0.3609851002693176, "learning_rate": 0.0001258814703675919, "loss": 0.4937, "step": 7415 }, { "epoch": 0.5498628308741751, "grad_norm": 0.33442068099975586, "learning_rate": 0.00012587146786696674, "loss": 0.4687, "step": 7416 }, { "epoch": 0.549936976347594, "grad_norm": 0.363252729177475, "learning_rate": 0.0001258614653663416, "loss": 0.5122, "step": 7417 }, { "epoch": 0.5500111218210129, "grad_norm": 0.3829270601272583, "learning_rate": 0.00012585146286571644, "loss": 0.5438, "step": 7418 }, { "epoch": 0.5500852672944316, "grad_norm": 0.386149138212204, "learning_rate": 0.00012584146036509128, "loss": 0.4943, "step": 7419 }, { "epoch": 0.5501594127678505, "grad_norm": 0.36051446199417114, "learning_rate": 0.00012583145786446612, "loss": 0.5331, "step": 7420 }, { "epoch": 0.5502335582412694, "grad_norm": 0.3422638773918152, "learning_rate": 0.00012582145536384095, "loss": 0.4746, "step": 7421 }, { "epoch": 0.5503077037146882, "grad_norm": 0.38133543729782104, "learning_rate": 0.00012581145286321582, "loss": 0.5375, "step": 7422 }, { "epoch": 0.5503818491881071, "grad_norm": 0.3784352242946625, "learning_rate": 0.00012580145036259066, "loss": 0.4814, "step": 7423 }, { "epoch": 0.550455994661526, "grad_norm": 0.35509443283081055, "learning_rate": 0.0001257914478619655, "loss": 0.5104, "step": 7424 }, { "epoch": 0.5505301401349447, "grad_norm": 0.335232138633728, "learning_rate": 0.00012578144536134033, "loss": 0.4681, "step": 7425 }, { "epoch": 0.5506042856083636, "grad_norm": 0.33565083146095276, "learning_rate": 0.0001257714428607152, "loss": 0.4836, "step": 7426 }, { "epoch": 0.5506784310817825, "grad_norm": 0.36034759879112244, "learning_rate": 0.00012576144036009003, "loss": 0.5131, "step": 7427 }, { "epoch": 0.5507525765552013, "grad_norm": 0.3719807267189026, "learning_rate": 0.0001257514378594649, "loss": 0.5004, "step": 7428 }, { "epoch": 0.5508267220286202, "grad_norm": 0.37268438935279846, "learning_rate": 0.0001257414353588397, "loss": 0.4967, "step": 7429 }, { "epoch": 0.550900867502039, "grad_norm": 0.3619964122772217, "learning_rate": 0.00012573143285821457, "loss": 0.5429, "step": 7430 }, { "epoch": 0.5509750129754578, "grad_norm": 0.3503613770008087, "learning_rate": 0.0001257214303575894, "loss": 0.5052, "step": 7431 }, { "epoch": 0.5510491584488767, "grad_norm": 0.33863508701324463, "learning_rate": 0.00012571142785696424, "loss": 0.4816, "step": 7432 }, { "epoch": 0.5511233039222956, "grad_norm": 0.33807075023651123, "learning_rate": 0.00012570142535633908, "loss": 0.4829, "step": 7433 }, { "epoch": 0.5511974493957144, "grad_norm": 0.3545317053794861, "learning_rate": 0.00012569142285571394, "loss": 0.4902, "step": 7434 }, { "epoch": 0.5512715948691332, "grad_norm": 0.34421974420547485, "learning_rate": 0.00012568142035508878, "loss": 0.5006, "step": 7435 }, { "epoch": 0.5513457403425521, "grad_norm": 0.33056604862213135, "learning_rate": 0.00012567141785446362, "loss": 0.4545, "step": 7436 }, { "epoch": 0.5514198858159709, "grad_norm": 0.3467172086238861, "learning_rate": 0.00012566141535383846, "loss": 0.4769, "step": 7437 }, { "epoch": 0.5514940312893898, "grad_norm": 0.36026251316070557, "learning_rate": 0.00012565141285321332, "loss": 0.4774, "step": 7438 }, { "epoch": 0.5515681767628087, "grad_norm": 0.36362409591674805, "learning_rate": 0.00012564141035258816, "loss": 0.5276, "step": 7439 }, { "epoch": 0.5516423222362274, "grad_norm": 0.337065726518631, "learning_rate": 0.000125631407851963, "loss": 0.4589, "step": 7440 }, { "epoch": 0.5517164677096463, "grad_norm": 0.3749960660934448, "learning_rate": 0.00012562140535133783, "loss": 0.5345, "step": 7441 }, { "epoch": 0.5517906131830652, "grad_norm": 0.4297749102115631, "learning_rate": 0.0001256114028507127, "loss": 0.5078, "step": 7442 }, { "epoch": 0.551864758656484, "grad_norm": 0.3528011739253998, "learning_rate": 0.0001256014003500875, "loss": 0.502, "step": 7443 }, { "epoch": 0.5519389041299029, "grad_norm": 0.34255433082580566, "learning_rate": 0.00012559139784946237, "loss": 0.474, "step": 7444 }, { "epoch": 0.5520130496033218, "grad_norm": 0.387048602104187, "learning_rate": 0.0001255813953488372, "loss": 0.5494, "step": 7445 }, { "epoch": 0.5520871950767405, "grad_norm": 0.38122978806495667, "learning_rate": 0.00012557139284821207, "loss": 0.4965, "step": 7446 }, { "epoch": 0.5521613405501594, "grad_norm": 0.3784339129924774, "learning_rate": 0.00012556139034758688, "loss": 0.482, "step": 7447 }, { "epoch": 0.5522354860235783, "grad_norm": 0.3891516327857971, "learning_rate": 0.00012555138784696175, "loss": 0.5783, "step": 7448 }, { "epoch": 0.5523096314969971, "grad_norm": 0.344220370054245, "learning_rate": 0.00012554138534633658, "loss": 0.4926, "step": 7449 }, { "epoch": 0.552383776970416, "grad_norm": 0.35565653443336487, "learning_rate": 0.00012553138284571145, "loss": 0.4791, "step": 7450 }, { "epoch": 0.5524579224438348, "grad_norm": 0.36533933877944946, "learning_rate": 0.00012552138034508626, "loss": 0.5162, "step": 7451 }, { "epoch": 0.5525320679172536, "grad_norm": 0.36282992362976074, "learning_rate": 0.00012551137784446112, "loss": 0.5136, "step": 7452 }, { "epoch": 0.5526062133906725, "grad_norm": 0.34104517102241516, "learning_rate": 0.00012550137534383596, "loss": 0.4878, "step": 7453 }, { "epoch": 0.5526803588640914, "grad_norm": 0.3672712445259094, "learning_rate": 0.00012549137284321082, "loss": 0.5436, "step": 7454 }, { "epoch": 0.5527545043375102, "grad_norm": 0.3626929521560669, "learning_rate": 0.00012548137034258566, "loss": 0.509, "step": 7455 }, { "epoch": 0.552828649810929, "grad_norm": 0.3562909960746765, "learning_rate": 0.0001254713678419605, "loss": 0.52, "step": 7456 }, { "epoch": 0.5529027952843479, "grad_norm": 0.34964558482170105, "learning_rate": 0.00012546136534133533, "loss": 0.5081, "step": 7457 }, { "epoch": 0.5529769407577667, "grad_norm": 0.35107991099357605, "learning_rate": 0.00012545136284071017, "loss": 0.523, "step": 7458 }, { "epoch": 0.5530510862311856, "grad_norm": 0.34053707122802734, "learning_rate": 0.00012544136034008503, "loss": 0.4854, "step": 7459 }, { "epoch": 0.5531252317046045, "grad_norm": 0.37775900959968567, "learning_rate": 0.00012543135783945987, "loss": 0.5219, "step": 7460 }, { "epoch": 0.5531993771780233, "grad_norm": 0.36906859278678894, "learning_rate": 0.00012542135533883474, "loss": 0.4872, "step": 7461 }, { "epoch": 0.5532735226514421, "grad_norm": 0.34206610918045044, "learning_rate": 0.00012541135283820955, "loss": 0.4761, "step": 7462 }, { "epoch": 0.553347668124861, "grad_norm": 0.34501612186431885, "learning_rate": 0.0001254013503375844, "loss": 0.5046, "step": 7463 }, { "epoch": 0.5534218135982798, "grad_norm": 0.3779245913028717, "learning_rate": 0.00012539134783695925, "loss": 0.5395, "step": 7464 }, { "epoch": 0.5534959590716987, "grad_norm": 0.38417044281959534, "learning_rate": 0.0001253813453363341, "loss": 0.561, "step": 7465 }, { "epoch": 0.5535701045451176, "grad_norm": 0.36131954193115234, "learning_rate": 0.00012537134283570892, "loss": 0.5351, "step": 7466 }, { "epoch": 0.5536442500185363, "grad_norm": 0.35813137888908386, "learning_rate": 0.00012536134033508379, "loss": 0.5538, "step": 7467 }, { "epoch": 0.5537183954919552, "grad_norm": 0.38172686100006104, "learning_rate": 0.00012535133783445862, "loss": 0.4897, "step": 7468 }, { "epoch": 0.5537925409653741, "grad_norm": 0.3647800087928772, "learning_rate": 0.00012534133533383346, "loss": 0.5254, "step": 7469 }, { "epoch": 0.5538666864387929, "grad_norm": 0.349967360496521, "learning_rate": 0.0001253313328332083, "loss": 0.5086, "step": 7470 }, { "epoch": 0.5539408319122118, "grad_norm": 0.34746164083480835, "learning_rate": 0.00012532133033258316, "loss": 0.4855, "step": 7471 }, { "epoch": 0.5540149773856307, "grad_norm": 0.3692401945590973, "learning_rate": 0.000125311327831958, "loss": 0.5171, "step": 7472 }, { "epoch": 0.5540891228590494, "grad_norm": 0.3637937009334564, "learning_rate": 0.00012530132533133284, "loss": 0.519, "step": 7473 }, { "epoch": 0.5541632683324683, "grad_norm": 0.3664627969264984, "learning_rate": 0.00012529132283070767, "loss": 0.5249, "step": 7474 }, { "epoch": 0.5542374138058872, "grad_norm": 0.3795571029186249, "learning_rate": 0.00012528132033008254, "loss": 0.5082, "step": 7475 }, { "epoch": 0.554311559279306, "grad_norm": 0.347962349653244, "learning_rate": 0.00012527131782945737, "loss": 0.4806, "step": 7476 }, { "epoch": 0.5543857047527249, "grad_norm": 0.3597589135169983, "learning_rate": 0.0001252613153288322, "loss": 0.5074, "step": 7477 }, { "epoch": 0.5544598502261437, "grad_norm": 0.35189878940582275, "learning_rate": 0.00012525131282820705, "loss": 0.4796, "step": 7478 }, { "epoch": 0.5545339956995625, "grad_norm": 0.37427186965942383, "learning_rate": 0.0001252413103275819, "loss": 0.5434, "step": 7479 }, { "epoch": 0.5546081411729814, "grad_norm": 0.36806946992874146, "learning_rate": 0.00012523130782695672, "loss": 0.5022, "step": 7480 }, { "epoch": 0.5546822866464003, "grad_norm": 0.35655826330184937, "learning_rate": 0.00012522130532633159, "loss": 0.5028, "step": 7481 }, { "epoch": 0.5547564321198191, "grad_norm": 0.37098780274391174, "learning_rate": 0.00012521130282570642, "loss": 0.5394, "step": 7482 }, { "epoch": 0.554830577593238, "grad_norm": 0.3598725497722626, "learning_rate": 0.0001252013003250813, "loss": 0.5026, "step": 7483 }, { "epoch": 0.5549047230666568, "grad_norm": 0.3543207347393036, "learning_rate": 0.0001251912978244561, "loss": 0.5155, "step": 7484 }, { "epoch": 0.5549788685400756, "grad_norm": 0.40135514736175537, "learning_rate": 0.00012518129532383096, "loss": 0.5305, "step": 7485 }, { "epoch": 0.5550530140134945, "grad_norm": 0.3486589193344116, "learning_rate": 0.0001251712928232058, "loss": 0.4904, "step": 7486 }, { "epoch": 0.5551271594869133, "grad_norm": 0.35551318526268005, "learning_rate": 0.00012516129032258066, "loss": 0.5253, "step": 7487 }, { "epoch": 0.5552013049603322, "grad_norm": 0.34912732243537903, "learning_rate": 0.0001251512878219555, "loss": 0.5149, "step": 7488 }, { "epoch": 0.555275450433751, "grad_norm": 0.3467455506324768, "learning_rate": 0.00012514128532133034, "loss": 0.4888, "step": 7489 }, { "epoch": 0.5553495959071698, "grad_norm": 0.3505609333515167, "learning_rate": 0.00012513128282070517, "loss": 0.4991, "step": 7490 }, { "epoch": 0.5554237413805887, "grad_norm": 0.37377581000328064, "learning_rate": 0.00012512128032008004, "loss": 0.5428, "step": 7491 }, { "epoch": 0.5554978868540076, "grad_norm": 0.3784129023551941, "learning_rate": 0.00012511127781945488, "loss": 0.541, "step": 7492 }, { "epoch": 0.5555720323274264, "grad_norm": 0.3406027853488922, "learning_rate": 0.0001251012753188297, "loss": 0.4906, "step": 7493 }, { "epoch": 0.5556461778008452, "grad_norm": 0.3649967610836029, "learning_rate": 0.00012509127281820458, "loss": 0.5394, "step": 7494 }, { "epoch": 0.5557203232742641, "grad_norm": 0.35536837577819824, "learning_rate": 0.0001250812703175794, "loss": 0.5007, "step": 7495 }, { "epoch": 0.5557944687476829, "grad_norm": 0.3599308431148529, "learning_rate": 0.00012507126781695425, "loss": 0.5197, "step": 7496 }, { "epoch": 0.5558686142211018, "grad_norm": 0.36174505949020386, "learning_rate": 0.0001250612653163291, "loss": 0.5276, "step": 7497 }, { "epoch": 0.5559427596945207, "grad_norm": 0.37582096457481384, "learning_rate": 0.00012505126281570395, "loss": 0.5377, "step": 7498 }, { "epoch": 0.5560169051679394, "grad_norm": 0.34011751413345337, "learning_rate": 0.00012504126031507876, "loss": 0.4497, "step": 7499 }, { "epoch": 0.5560910506413583, "grad_norm": 0.3713642358779907, "learning_rate": 0.00012503125781445363, "loss": 0.5301, "step": 7500 }, { "epoch": 0.5561651961147772, "grad_norm": 0.34211060404777527, "learning_rate": 0.00012502125531382846, "loss": 0.4835, "step": 7501 }, { "epoch": 0.556239341588196, "grad_norm": 0.34828177094459534, "learning_rate": 0.00012501125281320333, "loss": 0.4574, "step": 7502 }, { "epoch": 0.5563134870616149, "grad_norm": 0.35632097721099854, "learning_rate": 0.00012500125031257814, "loss": 0.5019, "step": 7503 }, { "epoch": 0.5563876325350338, "grad_norm": 0.36929330229759216, "learning_rate": 0.000124991247811953, "loss": 0.5198, "step": 7504 }, { "epoch": 0.5564617780084525, "grad_norm": 0.393321692943573, "learning_rate": 0.00012498124531132784, "loss": 0.6157, "step": 7505 }, { "epoch": 0.5565359234818714, "grad_norm": 0.3829360604286194, "learning_rate": 0.00012497124281070268, "loss": 0.5223, "step": 7506 }, { "epoch": 0.5566100689552903, "grad_norm": 0.3507555425167084, "learning_rate": 0.0001249612403100775, "loss": 0.5302, "step": 7507 }, { "epoch": 0.5566842144287091, "grad_norm": 0.34553390741348267, "learning_rate": 0.00012495123780945238, "loss": 0.4907, "step": 7508 }, { "epoch": 0.556758359902128, "grad_norm": 0.3610307574272156, "learning_rate": 0.00012494123530882721, "loss": 0.4576, "step": 7509 }, { "epoch": 0.5568325053755468, "grad_norm": 0.35027697682380676, "learning_rate": 0.00012493123280820205, "loss": 0.4721, "step": 7510 }, { "epoch": 0.5569066508489656, "grad_norm": 0.36570146679878235, "learning_rate": 0.0001249212303075769, "loss": 0.5026, "step": 7511 }, { "epoch": 0.5569807963223845, "grad_norm": 0.3655916452407837, "learning_rate": 0.00012491122780695175, "loss": 0.5122, "step": 7512 }, { "epoch": 0.5570549417958034, "grad_norm": 0.3644005060195923, "learning_rate": 0.0001249012253063266, "loss": 0.4976, "step": 7513 }, { "epoch": 0.5571290872692222, "grad_norm": 0.35651224851608276, "learning_rate": 0.00012489122280570143, "loss": 0.492, "step": 7514 }, { "epoch": 0.557203232742641, "grad_norm": 0.38731715083122253, "learning_rate": 0.00012488122030507626, "loss": 0.5594, "step": 7515 }, { "epoch": 0.5572773782160599, "grad_norm": 0.3649967312812805, "learning_rate": 0.00012487121780445113, "loss": 0.4962, "step": 7516 }, { "epoch": 0.5573515236894787, "grad_norm": 0.380106121301651, "learning_rate": 0.00012486121530382597, "loss": 0.5335, "step": 7517 }, { "epoch": 0.5574256691628976, "grad_norm": 0.37753617763519287, "learning_rate": 0.0001248512128032008, "loss": 0.5441, "step": 7518 }, { "epoch": 0.5574998146363165, "grad_norm": 0.35148942470550537, "learning_rate": 0.00012484121030257564, "loss": 0.4865, "step": 7519 }, { "epoch": 0.5575739601097353, "grad_norm": 0.36560285091400146, "learning_rate": 0.0001248312078019505, "loss": 0.5283, "step": 7520 }, { "epoch": 0.5576481055831541, "grad_norm": 0.35140469670295715, "learning_rate": 0.00012482120530132534, "loss": 0.5391, "step": 7521 }, { "epoch": 0.557722251056573, "grad_norm": 0.3538288474082947, "learning_rate": 0.00012481120280070018, "loss": 0.4754, "step": 7522 }, { "epoch": 0.5577963965299918, "grad_norm": 0.38983482122421265, "learning_rate": 0.00012480120030007502, "loss": 0.5861, "step": 7523 }, { "epoch": 0.5578705420034107, "grad_norm": 0.34873712062835693, "learning_rate": 0.00012479119779944988, "loss": 0.4781, "step": 7524 }, { "epoch": 0.5579446874768296, "grad_norm": 0.3695698082447052, "learning_rate": 0.00012478119529882472, "loss": 0.532, "step": 7525 }, { "epoch": 0.5580188329502483, "grad_norm": 0.3608647584915161, "learning_rate": 0.00012477119279819955, "loss": 0.468, "step": 7526 }, { "epoch": 0.5580929784236672, "grad_norm": 0.35822629928588867, "learning_rate": 0.0001247611902975744, "loss": 0.5027, "step": 7527 }, { "epoch": 0.5581671238970861, "grad_norm": 0.3776105046272278, "learning_rate": 0.00012475118779694925, "loss": 0.5208, "step": 7528 }, { "epoch": 0.5582412693705049, "grad_norm": 0.3575015664100647, "learning_rate": 0.0001247411852963241, "loss": 0.5144, "step": 7529 }, { "epoch": 0.5583154148439238, "grad_norm": 0.3405572474002838, "learning_rate": 0.00012473118279569893, "loss": 0.4742, "step": 7530 }, { "epoch": 0.5583895603173427, "grad_norm": 0.34504663944244385, "learning_rate": 0.0001247211802950738, "loss": 0.4925, "step": 7531 }, { "epoch": 0.5584637057907614, "grad_norm": 0.35814282298088074, "learning_rate": 0.0001247111777944486, "loss": 0.527, "step": 7532 }, { "epoch": 0.5585378512641803, "grad_norm": 0.3343733847141266, "learning_rate": 0.00012470117529382347, "loss": 0.4989, "step": 7533 }, { "epoch": 0.5586119967375992, "grad_norm": 0.35173848271369934, "learning_rate": 0.0001246911727931983, "loss": 0.4964, "step": 7534 }, { "epoch": 0.558686142211018, "grad_norm": 0.39780429005622864, "learning_rate": 0.00012468117029257317, "loss": 0.5479, "step": 7535 }, { "epoch": 0.5587602876844369, "grad_norm": 0.3765057623386383, "learning_rate": 0.00012467116779194798, "loss": 0.5125, "step": 7536 }, { "epoch": 0.5588344331578557, "grad_norm": 0.3740805387496948, "learning_rate": 0.00012466116529132284, "loss": 0.5129, "step": 7537 }, { "epoch": 0.5589085786312745, "grad_norm": 0.34879860281944275, "learning_rate": 0.00012465116279069768, "loss": 0.4619, "step": 7538 }, { "epoch": 0.5589827241046934, "grad_norm": 0.39114561676979065, "learning_rate": 0.00012464116029007254, "loss": 0.5382, "step": 7539 }, { "epoch": 0.5590568695781123, "grad_norm": 0.35413163900375366, "learning_rate": 0.00012463115778944735, "loss": 0.5161, "step": 7540 }, { "epoch": 0.5591310150515311, "grad_norm": 0.3364805281162262, "learning_rate": 0.00012462115528882222, "loss": 0.4584, "step": 7541 }, { "epoch": 0.55920516052495, "grad_norm": 0.35112422704696655, "learning_rate": 0.00012461115278819706, "loss": 0.4974, "step": 7542 }, { "epoch": 0.5592793059983688, "grad_norm": 0.38075655698776245, "learning_rate": 0.0001246011502875719, "loss": 0.559, "step": 7543 }, { "epoch": 0.5593534514717876, "grad_norm": 0.3802224397659302, "learning_rate": 0.00012459114778694673, "loss": 0.4939, "step": 7544 }, { "epoch": 0.5594275969452065, "grad_norm": 0.37565693259239197, "learning_rate": 0.0001245811452863216, "loss": 0.5458, "step": 7545 }, { "epoch": 0.5595017424186254, "grad_norm": 0.3569283187389374, "learning_rate": 0.00012457114278569643, "loss": 0.5533, "step": 7546 }, { "epoch": 0.5595758878920442, "grad_norm": 0.3535924255847931, "learning_rate": 0.00012456114028507127, "loss": 0.5192, "step": 7547 }, { "epoch": 0.559650033365463, "grad_norm": 0.39625871181488037, "learning_rate": 0.0001245511377844461, "loss": 0.5825, "step": 7548 }, { "epoch": 0.5597241788388819, "grad_norm": 0.36905068159103394, "learning_rate": 0.00012454113528382097, "loss": 0.5155, "step": 7549 }, { "epoch": 0.5597983243123007, "grad_norm": 0.36245080828666687, "learning_rate": 0.0001245311327831958, "loss": 0.501, "step": 7550 }, { "epoch": 0.5598724697857196, "grad_norm": 0.3487902581691742, "learning_rate": 0.00012452113028257064, "loss": 0.4905, "step": 7551 }, { "epoch": 0.5599466152591385, "grad_norm": 0.34072551131248474, "learning_rate": 0.00012451112778194548, "loss": 0.4664, "step": 7552 }, { "epoch": 0.5600207607325572, "grad_norm": 0.354932576417923, "learning_rate": 0.00012450112528132034, "loss": 0.5061, "step": 7553 }, { "epoch": 0.5600949062059761, "grad_norm": 0.3300913870334625, "learning_rate": 0.00012449112278069518, "loss": 0.4677, "step": 7554 }, { "epoch": 0.560169051679395, "grad_norm": 0.34671807289123535, "learning_rate": 0.00012448112028007002, "loss": 0.4965, "step": 7555 }, { "epoch": 0.5602431971528138, "grad_norm": 0.3497573137283325, "learning_rate": 0.00012447111777944486, "loss": 0.4946, "step": 7556 }, { "epoch": 0.5603173426262327, "grad_norm": 0.350582480430603, "learning_rate": 0.00012446111527881972, "loss": 0.4982, "step": 7557 }, { "epoch": 0.5603914880996516, "grad_norm": 0.37209659814834595, "learning_rate": 0.00012445111277819456, "loss": 0.5355, "step": 7558 }, { "epoch": 0.5604656335730703, "grad_norm": 0.35700592398643494, "learning_rate": 0.0001244411102775694, "loss": 0.4971, "step": 7559 }, { "epoch": 0.5605397790464892, "grad_norm": 0.3698734641075134, "learning_rate": 0.00012443110777694423, "loss": 0.5345, "step": 7560 }, { "epoch": 0.5606139245199081, "grad_norm": 0.364479124546051, "learning_rate": 0.0001244211052763191, "loss": 0.528, "step": 7561 }, { "epoch": 0.5606880699933269, "grad_norm": 0.35073113441467285, "learning_rate": 0.00012441110277569393, "loss": 0.5253, "step": 7562 }, { "epoch": 0.5607622154667458, "grad_norm": 0.3600864112377167, "learning_rate": 0.00012440110027506877, "loss": 0.5335, "step": 7563 }, { "epoch": 0.5608363609401646, "grad_norm": 0.3404958248138428, "learning_rate": 0.00012439109777444363, "loss": 0.4677, "step": 7564 }, { "epoch": 0.5609105064135834, "grad_norm": 0.3708242177963257, "learning_rate": 0.00012438109527381847, "loss": 0.5675, "step": 7565 }, { "epoch": 0.5609846518870023, "grad_norm": 0.33849284052848816, "learning_rate": 0.0001243710927731933, "loss": 0.4841, "step": 7566 }, { "epoch": 0.5610587973604212, "grad_norm": 0.3477077782154083, "learning_rate": 0.00012436109027256815, "loss": 0.4634, "step": 7567 }, { "epoch": 0.56113294283384, "grad_norm": 0.35521194338798523, "learning_rate": 0.000124351087771943, "loss": 0.5155, "step": 7568 }, { "epoch": 0.5612070883072589, "grad_norm": 0.39580288529396057, "learning_rate": 0.00012434108527131782, "loss": 0.5746, "step": 7569 }, { "epoch": 0.5612812337806777, "grad_norm": 0.3490569591522217, "learning_rate": 0.00012433108277069268, "loss": 0.4935, "step": 7570 }, { "epoch": 0.5613553792540965, "grad_norm": 0.35219070315361023, "learning_rate": 0.00012432108027006752, "loss": 0.476, "step": 7571 }, { "epoch": 0.5614295247275154, "grad_norm": 0.34670940041542053, "learning_rate": 0.00012431107776944238, "loss": 0.4909, "step": 7572 }, { "epoch": 0.5615036702009343, "grad_norm": 0.34337908029556274, "learning_rate": 0.0001243010752688172, "loss": 0.4767, "step": 7573 }, { "epoch": 0.561577815674353, "grad_norm": 0.3335748016834259, "learning_rate": 0.00012429107276819206, "loss": 0.47, "step": 7574 }, { "epoch": 0.5616519611477719, "grad_norm": 0.3333697021007538, "learning_rate": 0.0001242810702675669, "loss": 0.4642, "step": 7575 }, { "epoch": 0.5617261066211908, "grad_norm": 0.3725191652774811, "learning_rate": 0.00012427106776694176, "loss": 0.573, "step": 7576 }, { "epoch": 0.5618002520946096, "grad_norm": 0.3807142376899719, "learning_rate": 0.00012426106526631657, "loss": 0.5594, "step": 7577 }, { "epoch": 0.5618743975680285, "grad_norm": 0.3717150390148163, "learning_rate": 0.00012425106276569143, "loss": 0.5485, "step": 7578 }, { "epoch": 0.5619485430414474, "grad_norm": 0.3850148022174835, "learning_rate": 0.00012424106026506627, "loss": 0.515, "step": 7579 }, { "epoch": 0.5620226885148661, "grad_norm": 0.3553541600704193, "learning_rate": 0.0001242310577644411, "loss": 0.4929, "step": 7580 }, { "epoch": 0.562096833988285, "grad_norm": 0.37159600853919983, "learning_rate": 0.00012422105526381595, "loss": 0.4918, "step": 7581 }, { "epoch": 0.5621709794617039, "grad_norm": 0.3616524636745453, "learning_rate": 0.0001242110527631908, "loss": 0.4938, "step": 7582 }, { "epoch": 0.5622451249351227, "grad_norm": 0.3497854173183441, "learning_rate": 0.00012420105026256565, "loss": 0.5488, "step": 7583 }, { "epoch": 0.5623192704085416, "grad_norm": 0.34121596813201904, "learning_rate": 0.00012419104776194048, "loss": 0.5204, "step": 7584 }, { "epoch": 0.5623934158819605, "grad_norm": 0.32557564973831177, "learning_rate": 0.00012418104526131532, "loss": 0.4383, "step": 7585 }, { "epoch": 0.5624675613553792, "grad_norm": 0.38068118691444397, "learning_rate": 0.00012417104276069019, "loss": 0.563, "step": 7586 }, { "epoch": 0.5625417068287981, "grad_norm": 0.3558681011199951, "learning_rate": 0.00012416104026006502, "loss": 0.5258, "step": 7587 }, { "epoch": 0.562615852302217, "grad_norm": 0.3434196710586548, "learning_rate": 0.00012415103775943986, "loss": 0.4869, "step": 7588 }, { "epoch": 0.5626899977756358, "grad_norm": 0.37046366930007935, "learning_rate": 0.0001241410352588147, "loss": 0.4947, "step": 7589 }, { "epoch": 0.5627641432490547, "grad_norm": 0.33954060077667236, "learning_rate": 0.00012413103275818956, "loss": 0.4671, "step": 7590 }, { "epoch": 0.5628382887224735, "grad_norm": 0.35419461131095886, "learning_rate": 0.0001241210302575644, "loss": 0.5101, "step": 7591 }, { "epoch": 0.5629124341958923, "grad_norm": 0.3611812889575958, "learning_rate": 0.00012411102775693924, "loss": 0.4797, "step": 7592 }, { "epoch": 0.5629865796693112, "grad_norm": 0.373042494058609, "learning_rate": 0.00012410102525631407, "loss": 0.5216, "step": 7593 }, { "epoch": 0.5630607251427301, "grad_norm": 0.32805439829826355, "learning_rate": 0.00012409102275568894, "loss": 0.4394, "step": 7594 }, { "epoch": 0.5631348706161489, "grad_norm": 0.3404342234134674, "learning_rate": 0.00012408102025506377, "loss": 0.4841, "step": 7595 }, { "epoch": 0.5632090160895677, "grad_norm": 0.36031782627105713, "learning_rate": 0.0001240710177544386, "loss": 0.4978, "step": 7596 }, { "epoch": 0.5632831615629866, "grad_norm": 0.3539774417877197, "learning_rate": 0.00012406101525381347, "loss": 0.5137, "step": 7597 }, { "epoch": 0.5633573070364054, "grad_norm": 0.36944910883903503, "learning_rate": 0.0001240510127531883, "loss": 0.5469, "step": 7598 }, { "epoch": 0.5634314525098243, "grad_norm": 0.3713299036026001, "learning_rate": 0.00012404101025256315, "loss": 0.5134, "step": 7599 }, { "epoch": 0.5635055979832432, "grad_norm": 0.36284342408180237, "learning_rate": 0.00012403100775193799, "loss": 0.5327, "step": 7600 }, { "epoch": 0.563579743456662, "grad_norm": 0.3407673239707947, "learning_rate": 0.00012402100525131285, "loss": 0.4864, "step": 7601 }, { "epoch": 0.5636538889300808, "grad_norm": 0.3486020267009735, "learning_rate": 0.0001240110027506877, "loss": 0.461, "step": 7602 }, { "epoch": 0.5637280344034996, "grad_norm": 0.3647395074367523, "learning_rate": 0.00012400100025006252, "loss": 0.5112, "step": 7603 }, { "epoch": 0.5638021798769185, "grad_norm": 0.3777535557746887, "learning_rate": 0.00012399099774943736, "loss": 0.5165, "step": 7604 }, { "epoch": 0.5638763253503374, "grad_norm": 0.378292053937912, "learning_rate": 0.00012398099524881223, "loss": 0.5066, "step": 7605 }, { "epoch": 0.5639504708237562, "grad_norm": 0.35282474756240845, "learning_rate": 0.00012397099274818704, "loss": 0.4858, "step": 7606 }, { "epoch": 0.564024616297175, "grad_norm": 0.3658555746078491, "learning_rate": 0.0001239609902475619, "loss": 0.5157, "step": 7607 }, { "epoch": 0.5640987617705939, "grad_norm": 0.3518604040145874, "learning_rate": 0.00012395098774693674, "loss": 0.5503, "step": 7608 }, { "epoch": 0.5641729072440127, "grad_norm": 0.38639509677886963, "learning_rate": 0.0001239409852463116, "loss": 0.5866, "step": 7609 }, { "epoch": 0.5642470527174316, "grad_norm": 0.32256126403808594, "learning_rate": 0.0001239309827456864, "loss": 0.4682, "step": 7610 }, { "epoch": 0.5643211981908505, "grad_norm": 0.37711092829704285, "learning_rate": 0.00012392098024506128, "loss": 0.4932, "step": 7611 }, { "epoch": 0.5643953436642692, "grad_norm": 0.35912126302719116, "learning_rate": 0.0001239109777444361, "loss": 0.5334, "step": 7612 }, { "epoch": 0.5644694891376881, "grad_norm": 0.36276277899742126, "learning_rate": 0.00012390097524381098, "loss": 0.5163, "step": 7613 }, { "epoch": 0.564543634611107, "grad_norm": 0.34374862909317017, "learning_rate": 0.0001238909727431858, "loss": 0.5112, "step": 7614 }, { "epoch": 0.5646177800845258, "grad_norm": 0.3478861451148987, "learning_rate": 0.00012388097024256065, "loss": 0.5015, "step": 7615 }, { "epoch": 0.5646919255579447, "grad_norm": 0.3447573482990265, "learning_rate": 0.0001238709677419355, "loss": 0.4812, "step": 7616 }, { "epoch": 0.5647660710313636, "grad_norm": 0.36448514461517334, "learning_rate": 0.00012386096524131032, "loss": 0.5294, "step": 7617 }, { "epoch": 0.5648402165047823, "grad_norm": 0.3367800712585449, "learning_rate": 0.00012385096274068516, "loss": 0.4871, "step": 7618 }, { "epoch": 0.5649143619782012, "grad_norm": 0.3454263210296631, "learning_rate": 0.00012384096024006003, "loss": 0.4817, "step": 7619 }, { "epoch": 0.5649885074516201, "grad_norm": 0.3479072153568268, "learning_rate": 0.00012383095773943486, "loss": 0.4409, "step": 7620 }, { "epoch": 0.5650626529250389, "grad_norm": 0.35792359709739685, "learning_rate": 0.0001238209552388097, "loss": 0.4978, "step": 7621 }, { "epoch": 0.5651367983984578, "grad_norm": 0.3538142144680023, "learning_rate": 0.00012381095273818454, "loss": 0.5368, "step": 7622 }, { "epoch": 0.5652109438718766, "grad_norm": 0.35439255833625793, "learning_rate": 0.0001238009502375594, "loss": 0.5157, "step": 7623 }, { "epoch": 0.5652850893452954, "grad_norm": 0.3644980192184448, "learning_rate": 0.00012379094773693424, "loss": 0.5071, "step": 7624 }, { "epoch": 0.5653592348187143, "grad_norm": 0.35755372047424316, "learning_rate": 0.00012378094523630908, "loss": 0.5062, "step": 7625 }, { "epoch": 0.5654333802921332, "grad_norm": 0.38452550768852234, "learning_rate": 0.0001237709427356839, "loss": 0.5171, "step": 7626 }, { "epoch": 0.565507525765552, "grad_norm": 0.35230615735054016, "learning_rate": 0.00012376094023505878, "loss": 0.5112, "step": 7627 }, { "epoch": 0.5655816712389709, "grad_norm": 0.3495989143848419, "learning_rate": 0.00012375093773443361, "loss": 0.4755, "step": 7628 }, { "epoch": 0.5656558167123897, "grad_norm": 0.34283241629600525, "learning_rate": 0.00012374093523380845, "loss": 0.4818, "step": 7629 }, { "epoch": 0.5657299621858085, "grad_norm": 0.36277252435684204, "learning_rate": 0.00012373093273318332, "loss": 0.4982, "step": 7630 }, { "epoch": 0.5658041076592274, "grad_norm": 0.3578779697418213, "learning_rate": 0.00012372093023255815, "loss": 0.5207, "step": 7631 }, { "epoch": 0.5658782531326463, "grad_norm": 0.3654206097126007, "learning_rate": 0.000123710927731933, "loss": 0.5033, "step": 7632 }, { "epoch": 0.565952398606065, "grad_norm": 0.364501953125, "learning_rate": 0.00012370092523130783, "loss": 0.5119, "step": 7633 }, { "epoch": 0.5660265440794839, "grad_norm": 0.3548794090747833, "learning_rate": 0.0001236909227306827, "loss": 0.5443, "step": 7634 }, { "epoch": 0.5661006895529028, "grad_norm": 0.3580988347530365, "learning_rate": 0.00012368092023005753, "loss": 0.5112, "step": 7635 }, { "epoch": 0.5661748350263216, "grad_norm": 0.3668052554130554, "learning_rate": 0.00012367091772943237, "loss": 0.5267, "step": 7636 }, { "epoch": 0.5662489804997405, "grad_norm": 0.3641362190246582, "learning_rate": 0.0001236609152288072, "loss": 0.5035, "step": 7637 }, { "epoch": 0.5663231259731594, "grad_norm": 0.34865519404411316, "learning_rate": 0.00012365091272818207, "loss": 0.4845, "step": 7638 }, { "epoch": 0.5663972714465781, "grad_norm": 0.33815068006515503, "learning_rate": 0.0001236409102275569, "loss": 0.4759, "step": 7639 }, { "epoch": 0.566471416919997, "grad_norm": 0.39118874073028564, "learning_rate": 0.00012363090772693174, "loss": 0.5482, "step": 7640 }, { "epoch": 0.5665455623934159, "grad_norm": 0.32563573122024536, "learning_rate": 0.00012362090522630658, "loss": 0.4638, "step": 7641 }, { "epoch": 0.5666197078668347, "grad_norm": 0.3408149778842926, "learning_rate": 0.00012361090272568144, "loss": 0.4754, "step": 7642 }, { "epoch": 0.5666938533402536, "grad_norm": 0.34237781167030334, "learning_rate": 0.00012360090022505625, "loss": 0.5102, "step": 7643 }, { "epoch": 0.5667679988136725, "grad_norm": 0.3427239656448364, "learning_rate": 0.00012359089772443112, "loss": 0.4964, "step": 7644 }, { "epoch": 0.5668421442870912, "grad_norm": 0.37136197090148926, "learning_rate": 0.00012358089522380595, "loss": 0.4807, "step": 7645 }, { "epoch": 0.5669162897605101, "grad_norm": 0.36723968386650085, "learning_rate": 0.00012357089272318082, "loss": 0.5203, "step": 7646 }, { "epoch": 0.566990435233929, "grad_norm": 0.34775441884994507, "learning_rate": 0.00012356089022255563, "loss": 0.4794, "step": 7647 }, { "epoch": 0.5670645807073478, "grad_norm": 0.34356552362442017, "learning_rate": 0.0001235508877219305, "loss": 0.5016, "step": 7648 }, { "epoch": 0.5671387261807667, "grad_norm": 0.35071370005607605, "learning_rate": 0.00012354088522130533, "loss": 0.5111, "step": 7649 }, { "epoch": 0.5672128716541855, "grad_norm": 0.34785836935043335, "learning_rate": 0.0001235308827206802, "loss": 0.4928, "step": 7650 }, { "epoch": 0.5672870171276043, "grad_norm": 0.33987587690353394, "learning_rate": 0.000123520880220055, "loss": 0.4691, "step": 7651 }, { "epoch": 0.5673611626010232, "grad_norm": 0.37761998176574707, "learning_rate": 0.00012351087771942987, "loss": 0.5713, "step": 7652 }, { "epoch": 0.5674353080744421, "grad_norm": 0.3632824420928955, "learning_rate": 0.0001235008752188047, "loss": 0.5402, "step": 7653 }, { "epoch": 0.5675094535478609, "grad_norm": 0.3456856310367584, "learning_rate": 0.00012349087271817954, "loss": 0.5126, "step": 7654 }, { "epoch": 0.5675835990212797, "grad_norm": 0.37800315022468567, "learning_rate": 0.00012348087021755438, "loss": 0.5338, "step": 7655 }, { "epoch": 0.5676577444946986, "grad_norm": 0.3486292064189911, "learning_rate": 0.00012347086771692924, "loss": 0.4709, "step": 7656 }, { "epoch": 0.5677318899681174, "grad_norm": 0.395722359418869, "learning_rate": 0.00012346086521630408, "loss": 0.5097, "step": 7657 }, { "epoch": 0.5678060354415363, "grad_norm": 0.3423427939414978, "learning_rate": 0.00012345086271567892, "loss": 0.5118, "step": 7658 }, { "epoch": 0.5678801809149552, "grad_norm": 0.3423959016799927, "learning_rate": 0.00012344086021505375, "loss": 0.4867, "step": 7659 }, { "epoch": 0.567954326388374, "grad_norm": 0.352405309677124, "learning_rate": 0.00012343085771442862, "loss": 0.5168, "step": 7660 }, { "epoch": 0.5680284718617928, "grad_norm": 0.3600670397281647, "learning_rate": 0.00012342085521380346, "loss": 0.5408, "step": 7661 }, { "epoch": 0.5681026173352117, "grad_norm": 0.338670015335083, "learning_rate": 0.0001234108527131783, "loss": 0.4584, "step": 7662 }, { "epoch": 0.5681767628086305, "grad_norm": 0.3553425967693329, "learning_rate": 0.00012340085021255316, "loss": 0.4976, "step": 7663 }, { "epoch": 0.5682509082820494, "grad_norm": 0.3526972234249115, "learning_rate": 0.000123390847711928, "loss": 0.5132, "step": 7664 }, { "epoch": 0.5683250537554683, "grad_norm": 0.3500680923461914, "learning_rate": 0.00012338084521130283, "loss": 0.4974, "step": 7665 }, { "epoch": 0.568399199228887, "grad_norm": 0.3458208441734314, "learning_rate": 0.00012337084271067767, "loss": 0.5074, "step": 7666 }, { "epoch": 0.5684733447023059, "grad_norm": 0.36567142605781555, "learning_rate": 0.00012336084021005253, "loss": 0.5093, "step": 7667 }, { "epoch": 0.5685474901757248, "grad_norm": 0.35339391231536865, "learning_rate": 0.00012335083770942737, "loss": 0.5025, "step": 7668 }, { "epoch": 0.5686216356491436, "grad_norm": 0.3522091507911682, "learning_rate": 0.0001233408352088022, "loss": 0.4812, "step": 7669 }, { "epoch": 0.5686957811225625, "grad_norm": 0.36061781644821167, "learning_rate": 0.00012333083270817704, "loss": 0.5114, "step": 7670 }, { "epoch": 0.5687699265959814, "grad_norm": 0.365420937538147, "learning_rate": 0.0001233208302075519, "loss": 0.5135, "step": 7671 }, { "epoch": 0.5688440720694001, "grad_norm": 0.371665358543396, "learning_rate": 0.00012331082770692674, "loss": 0.558, "step": 7672 }, { "epoch": 0.568918217542819, "grad_norm": 0.35471031069755554, "learning_rate": 0.00012330082520630158, "loss": 0.491, "step": 7673 }, { "epoch": 0.5689923630162379, "grad_norm": 0.3649576008319855, "learning_rate": 0.00012329082270567642, "loss": 0.5192, "step": 7674 }, { "epoch": 0.5690665084896567, "grad_norm": 0.3610188961029053, "learning_rate": 0.00012328082020505128, "loss": 0.4993, "step": 7675 }, { "epoch": 0.5691406539630756, "grad_norm": 0.36077240109443665, "learning_rate": 0.00012327081770442612, "loss": 0.4855, "step": 7676 }, { "epoch": 0.5692147994364944, "grad_norm": 0.37327975034713745, "learning_rate": 0.00012326081520380096, "loss": 0.5223, "step": 7677 }, { "epoch": 0.5692889449099132, "grad_norm": 0.3741134703159332, "learning_rate": 0.0001232508127031758, "loss": 0.5065, "step": 7678 }, { "epoch": 0.5693630903833321, "grad_norm": 0.3698692321777344, "learning_rate": 0.00012324081020255066, "loss": 0.5051, "step": 7679 }, { "epoch": 0.569437235856751, "grad_norm": 0.36261945962905884, "learning_rate": 0.00012323080770192547, "loss": 0.5241, "step": 7680 }, { "epoch": 0.5695113813301698, "grad_norm": 0.3574073612689972, "learning_rate": 0.00012322080520130033, "loss": 0.4929, "step": 7681 }, { "epoch": 0.5695855268035886, "grad_norm": 0.3440300226211548, "learning_rate": 0.00012321080270067517, "loss": 0.4695, "step": 7682 }, { "epoch": 0.5696596722770075, "grad_norm": 0.3837168216705322, "learning_rate": 0.00012320080020005003, "loss": 0.5345, "step": 7683 }, { "epoch": 0.5697338177504263, "grad_norm": 0.3613276779651642, "learning_rate": 0.00012319079769942484, "loss": 0.5106, "step": 7684 }, { "epoch": 0.5698079632238452, "grad_norm": 0.3832623362541199, "learning_rate": 0.0001231807951987997, "loss": 0.5688, "step": 7685 }, { "epoch": 0.5698821086972641, "grad_norm": 0.358029305934906, "learning_rate": 0.00012317079269817454, "loss": 0.4923, "step": 7686 }, { "epoch": 0.5699562541706829, "grad_norm": 0.35361775755882263, "learning_rate": 0.0001231607901975494, "loss": 0.5098, "step": 7687 }, { "epoch": 0.5700303996441017, "grad_norm": 0.3532618284225464, "learning_rate": 0.00012315078769692422, "loss": 0.4852, "step": 7688 }, { "epoch": 0.5701045451175206, "grad_norm": 0.3521096706390381, "learning_rate": 0.00012314078519629908, "loss": 0.5024, "step": 7689 }, { "epoch": 0.5701786905909394, "grad_norm": 0.32622623443603516, "learning_rate": 0.00012313078269567392, "loss": 0.4529, "step": 7690 }, { "epoch": 0.5702528360643583, "grad_norm": 0.347358375787735, "learning_rate": 0.00012312078019504876, "loss": 0.4878, "step": 7691 }, { "epoch": 0.5703269815377772, "grad_norm": 0.3390815854072571, "learning_rate": 0.0001231107776944236, "loss": 0.4568, "step": 7692 }, { "epoch": 0.5704011270111959, "grad_norm": 0.3664950430393219, "learning_rate": 0.00012310077519379846, "loss": 0.5192, "step": 7693 }, { "epoch": 0.5704752724846148, "grad_norm": 0.3542894423007965, "learning_rate": 0.0001230907726931733, "loss": 0.4848, "step": 7694 }, { "epoch": 0.5705494179580337, "grad_norm": 0.35684850811958313, "learning_rate": 0.00012308077019254813, "loss": 0.5078, "step": 7695 }, { "epoch": 0.5706235634314525, "grad_norm": 0.377427339553833, "learning_rate": 0.00012307076769192297, "loss": 0.5397, "step": 7696 }, { "epoch": 0.5706977089048714, "grad_norm": 0.37577664852142334, "learning_rate": 0.00012306076519129783, "loss": 0.5184, "step": 7697 }, { "epoch": 0.5707718543782903, "grad_norm": 0.35153672099113464, "learning_rate": 0.00012305076269067267, "loss": 0.4831, "step": 7698 }, { "epoch": 0.570845999851709, "grad_norm": 0.348748117685318, "learning_rate": 0.0001230407601900475, "loss": 0.4953, "step": 7699 }, { "epoch": 0.5709201453251279, "grad_norm": 0.38248366117477417, "learning_rate": 0.00012303075768942237, "loss": 0.5655, "step": 7700 }, { "epoch": 0.5709942907985468, "grad_norm": 0.35536065697669983, "learning_rate": 0.0001230207551887972, "loss": 0.4717, "step": 7701 }, { "epoch": 0.5710684362719656, "grad_norm": 0.35648173093795776, "learning_rate": 0.00012301075268817205, "loss": 0.4936, "step": 7702 }, { "epoch": 0.5711425817453845, "grad_norm": 0.3662683963775635, "learning_rate": 0.00012300075018754688, "loss": 0.5102, "step": 7703 }, { "epoch": 0.5712167272188033, "grad_norm": 0.3631732761859894, "learning_rate": 0.00012299074768692175, "loss": 0.468, "step": 7704 }, { "epoch": 0.5712908726922221, "grad_norm": 0.35809922218322754, "learning_rate": 0.00012298074518629659, "loss": 0.4986, "step": 7705 }, { "epoch": 0.571365018165641, "grad_norm": 0.3632048964500427, "learning_rate": 0.00012297074268567142, "loss": 0.4791, "step": 7706 }, { "epoch": 0.5714391636390599, "grad_norm": 0.3574922978878021, "learning_rate": 0.00012296074018504626, "loss": 0.511, "step": 7707 }, { "epoch": 0.5715133091124787, "grad_norm": 0.35865411162376404, "learning_rate": 0.00012295073768442112, "loss": 0.4733, "step": 7708 }, { "epoch": 0.5715874545858975, "grad_norm": 0.3764439523220062, "learning_rate": 0.00012294073518379596, "loss": 0.5036, "step": 7709 }, { "epoch": 0.5716616000593164, "grad_norm": 0.36388346552848816, "learning_rate": 0.0001229307326831708, "loss": 0.5078, "step": 7710 }, { "epoch": 0.5717357455327352, "grad_norm": 0.36452576518058777, "learning_rate": 0.00012292073018254563, "loss": 0.5034, "step": 7711 }, { "epoch": 0.5718098910061541, "grad_norm": 0.36103254556655884, "learning_rate": 0.0001229107276819205, "loss": 0.487, "step": 7712 }, { "epoch": 0.571884036479573, "grad_norm": 0.34063050150871277, "learning_rate": 0.00012290072518129534, "loss": 0.4806, "step": 7713 }, { "epoch": 0.5719581819529918, "grad_norm": 0.34428122639656067, "learning_rate": 0.00012289072268067017, "loss": 0.474, "step": 7714 }, { "epoch": 0.5720323274264106, "grad_norm": 0.36309337615966797, "learning_rate": 0.000122880720180045, "loss": 0.4691, "step": 7715 }, { "epoch": 0.5721064728998294, "grad_norm": 0.412217378616333, "learning_rate": 0.00012287071767941987, "loss": 0.5208, "step": 7716 }, { "epoch": 0.5721806183732483, "grad_norm": 0.37788671255111694, "learning_rate": 0.00012286071517879468, "loss": 0.5088, "step": 7717 }, { "epoch": 0.5722547638466672, "grad_norm": 0.4094054102897644, "learning_rate": 0.00012285071267816955, "loss": 0.5459, "step": 7718 }, { "epoch": 0.572328909320086, "grad_norm": 0.3557129204273224, "learning_rate": 0.00012284071017754439, "loss": 0.523, "step": 7719 }, { "epoch": 0.5724030547935048, "grad_norm": 0.3522024154663086, "learning_rate": 0.00012283070767691925, "loss": 0.5265, "step": 7720 }, { "epoch": 0.5724772002669237, "grad_norm": 0.3562451899051666, "learning_rate": 0.00012282070517629406, "loss": 0.5402, "step": 7721 }, { "epoch": 0.5725513457403425, "grad_norm": 0.3727414011955261, "learning_rate": 0.00012281070267566892, "loss": 0.4901, "step": 7722 }, { "epoch": 0.5726254912137614, "grad_norm": 0.35931888222694397, "learning_rate": 0.00012280070017504376, "loss": 0.4893, "step": 7723 }, { "epoch": 0.5726996366871803, "grad_norm": 0.3695593476295471, "learning_rate": 0.00012279069767441863, "loss": 0.5085, "step": 7724 }, { "epoch": 0.572773782160599, "grad_norm": 0.36010828614234924, "learning_rate": 0.00012278069517379344, "loss": 0.5156, "step": 7725 }, { "epoch": 0.5728479276340179, "grad_norm": 0.3527994751930237, "learning_rate": 0.0001227706926731683, "loss": 0.4951, "step": 7726 }, { "epoch": 0.5729220731074368, "grad_norm": 0.34606629610061646, "learning_rate": 0.00012276069017254314, "loss": 0.4765, "step": 7727 }, { "epoch": 0.5729962185808556, "grad_norm": 0.3409670889377594, "learning_rate": 0.00012275068767191797, "loss": 0.4936, "step": 7728 }, { "epoch": 0.5730703640542745, "grad_norm": 0.3712744414806366, "learning_rate": 0.0001227406851712928, "loss": 0.4793, "step": 7729 }, { "epoch": 0.5731445095276934, "grad_norm": 0.35082128643989563, "learning_rate": 0.00012273068267066768, "loss": 0.4766, "step": 7730 }, { "epoch": 0.5732186550011121, "grad_norm": 0.33954790234565735, "learning_rate": 0.0001227206801700425, "loss": 0.4584, "step": 7731 }, { "epoch": 0.573292800474531, "grad_norm": 0.3735632300376892, "learning_rate": 0.00012271067766941735, "loss": 0.5188, "step": 7732 }, { "epoch": 0.5733669459479499, "grad_norm": 0.36034679412841797, "learning_rate": 0.0001227006751687922, "loss": 0.5093, "step": 7733 }, { "epoch": 0.5734410914213687, "grad_norm": 0.3493288457393646, "learning_rate": 0.00012269067266816705, "loss": 0.4958, "step": 7734 }, { "epoch": 0.5735152368947876, "grad_norm": 0.36077332496643066, "learning_rate": 0.0001226806701675419, "loss": 0.4961, "step": 7735 }, { "epoch": 0.5735893823682064, "grad_norm": 0.389703631401062, "learning_rate": 0.00012267066766691672, "loss": 0.5417, "step": 7736 }, { "epoch": 0.5736635278416252, "grad_norm": 0.3575299084186554, "learning_rate": 0.0001226606651662916, "loss": 0.5279, "step": 7737 }, { "epoch": 0.5737376733150441, "grad_norm": 0.3574373722076416, "learning_rate": 0.00012265066266566643, "loss": 0.52, "step": 7738 }, { "epoch": 0.573811818788463, "grad_norm": 0.3894183337688446, "learning_rate": 0.0001226406601650413, "loss": 0.5365, "step": 7739 }, { "epoch": 0.5738859642618818, "grad_norm": 0.34103307127952576, "learning_rate": 0.0001226306576644161, "loss": 0.4893, "step": 7740 }, { "epoch": 0.5739601097353006, "grad_norm": 0.406210333108902, "learning_rate": 0.00012262065516379096, "loss": 0.5785, "step": 7741 }, { "epoch": 0.5740342552087195, "grad_norm": 0.36429011821746826, "learning_rate": 0.0001226106526631658, "loss": 0.5274, "step": 7742 }, { "epoch": 0.5741084006821383, "grad_norm": 0.3477676808834076, "learning_rate": 0.00012260065016254064, "loss": 0.5134, "step": 7743 }, { "epoch": 0.5741825461555572, "grad_norm": 0.34313449263572693, "learning_rate": 0.00012259064766191548, "loss": 0.5142, "step": 7744 }, { "epoch": 0.5742566916289761, "grad_norm": 0.331079363822937, "learning_rate": 0.00012258064516129034, "loss": 0.491, "step": 7745 }, { "epoch": 0.5743308371023949, "grad_norm": 0.3272823095321655, "learning_rate": 0.00012257064266066518, "loss": 0.4801, "step": 7746 }, { "epoch": 0.5744049825758137, "grad_norm": 0.3480225205421448, "learning_rate": 0.00012256064016004001, "loss": 0.5075, "step": 7747 }, { "epoch": 0.5744791280492326, "grad_norm": 0.3645436763763428, "learning_rate": 0.00012255063765941485, "loss": 0.5253, "step": 7748 }, { "epoch": 0.5745532735226514, "grad_norm": 0.3726150393486023, "learning_rate": 0.00012254063515878972, "loss": 0.544, "step": 7749 }, { "epoch": 0.5746274189960703, "grad_norm": 0.34113210439682007, "learning_rate": 0.00012253063265816455, "loss": 0.4856, "step": 7750 }, { "epoch": 0.5747015644694892, "grad_norm": 0.38043439388275146, "learning_rate": 0.0001225206301575394, "loss": 0.5466, "step": 7751 }, { "epoch": 0.5747757099429079, "grad_norm": 0.3788822293281555, "learning_rate": 0.00012251062765691423, "loss": 0.5363, "step": 7752 }, { "epoch": 0.5748498554163268, "grad_norm": 0.3498013913631439, "learning_rate": 0.0001225006251562891, "loss": 0.5026, "step": 7753 }, { "epoch": 0.5749240008897457, "grad_norm": 0.3772180378437042, "learning_rate": 0.0001224906226556639, "loss": 0.5253, "step": 7754 }, { "epoch": 0.5749981463631645, "grad_norm": 0.39217597246170044, "learning_rate": 0.00012248062015503876, "loss": 0.5396, "step": 7755 }, { "epoch": 0.5750722918365834, "grad_norm": 0.36250874400138855, "learning_rate": 0.0001224706176544136, "loss": 0.51, "step": 7756 }, { "epoch": 0.5751464373100023, "grad_norm": 0.3487406373023987, "learning_rate": 0.00012246061515378847, "loss": 0.5107, "step": 7757 }, { "epoch": 0.575220582783421, "grad_norm": 0.3662140965461731, "learning_rate": 0.00012245061265316328, "loss": 0.5064, "step": 7758 }, { "epoch": 0.5752947282568399, "grad_norm": 0.37755438685417175, "learning_rate": 0.00012244061015253814, "loss": 0.5873, "step": 7759 }, { "epoch": 0.5753688737302588, "grad_norm": 0.37359070777893066, "learning_rate": 0.00012243060765191298, "loss": 0.5356, "step": 7760 }, { "epoch": 0.5754430192036776, "grad_norm": 0.364936500787735, "learning_rate": 0.00012242060515128784, "loss": 0.525, "step": 7761 }, { "epoch": 0.5755171646770965, "grad_norm": 0.35465624928474426, "learning_rate": 0.00012241060265066265, "loss": 0.5091, "step": 7762 }, { "epoch": 0.5755913101505153, "grad_norm": 0.34174445271492004, "learning_rate": 0.00012240060015003752, "loss": 0.4706, "step": 7763 }, { "epoch": 0.5756654556239341, "grad_norm": 0.36394113302230835, "learning_rate": 0.00012239059764941235, "loss": 0.5163, "step": 7764 }, { "epoch": 0.575739601097353, "grad_norm": 0.3710611164569855, "learning_rate": 0.0001223805951487872, "loss": 0.5435, "step": 7765 }, { "epoch": 0.5758137465707719, "grad_norm": 0.3596462905406952, "learning_rate": 0.00012237059264816205, "loss": 0.51, "step": 7766 }, { "epoch": 0.5758878920441907, "grad_norm": 0.36166515946388245, "learning_rate": 0.0001223605901475369, "loss": 0.509, "step": 7767 }, { "epoch": 0.5759620375176095, "grad_norm": 0.35484302043914795, "learning_rate": 0.00012235058764691173, "loss": 0.4903, "step": 7768 }, { "epoch": 0.5760361829910284, "grad_norm": 0.3667812943458557, "learning_rate": 0.00012234058514628657, "loss": 0.5301, "step": 7769 }, { "epoch": 0.5761103284644472, "grad_norm": 0.388901948928833, "learning_rate": 0.00012233058264566143, "loss": 0.5309, "step": 7770 }, { "epoch": 0.5761844739378661, "grad_norm": 0.35321372747421265, "learning_rate": 0.00012232058014503627, "loss": 0.4783, "step": 7771 }, { "epoch": 0.576258619411285, "grad_norm": 0.35718008875846863, "learning_rate": 0.0001223105776444111, "loss": 0.5043, "step": 7772 }, { "epoch": 0.5763327648847038, "grad_norm": 0.3577355146408081, "learning_rate": 0.00012230057514378594, "loss": 0.4822, "step": 7773 }, { "epoch": 0.5764069103581226, "grad_norm": 0.3468373417854309, "learning_rate": 0.0001222905726431608, "loss": 0.4742, "step": 7774 }, { "epoch": 0.5764810558315415, "grad_norm": 0.3725098967552185, "learning_rate": 0.00012228057014253564, "loss": 0.5098, "step": 7775 }, { "epoch": 0.5765552013049603, "grad_norm": 0.3535717725753784, "learning_rate": 0.0001222705676419105, "loss": 0.4992, "step": 7776 }, { "epoch": 0.5766293467783792, "grad_norm": 0.3636244237422943, "learning_rate": 0.00012226056514128532, "loss": 0.5063, "step": 7777 }, { "epoch": 0.5767034922517981, "grad_norm": 0.3622763752937317, "learning_rate": 0.00012225056264066018, "loss": 0.4929, "step": 7778 }, { "epoch": 0.5767776377252168, "grad_norm": 0.3528883457183838, "learning_rate": 0.00012224056014003502, "loss": 0.4996, "step": 7779 }, { "epoch": 0.5768517831986357, "grad_norm": 0.3485385775566101, "learning_rate": 0.00012223055763940985, "loss": 0.496, "step": 7780 }, { "epoch": 0.5769259286720546, "grad_norm": 0.3588191270828247, "learning_rate": 0.0001222205551387847, "loss": 0.5213, "step": 7781 }, { "epoch": 0.5770000741454734, "grad_norm": 0.37957602739334106, "learning_rate": 0.00012221055263815956, "loss": 0.5451, "step": 7782 }, { "epoch": 0.5770742196188923, "grad_norm": 0.34289634227752686, "learning_rate": 0.0001222005501375344, "loss": 0.4844, "step": 7783 }, { "epoch": 0.5771483650923112, "grad_norm": 0.37520936131477356, "learning_rate": 0.00012219054763690923, "loss": 0.5656, "step": 7784 }, { "epoch": 0.5772225105657299, "grad_norm": 0.3581433892250061, "learning_rate": 0.00012218054513628407, "loss": 0.528, "step": 7785 }, { "epoch": 0.5772966560391488, "grad_norm": 0.3534667491912842, "learning_rate": 0.00012217054263565893, "loss": 0.4976, "step": 7786 }, { "epoch": 0.5773708015125677, "grad_norm": 0.33753034472465515, "learning_rate": 0.00012216054013503377, "loss": 0.4603, "step": 7787 }, { "epoch": 0.5774449469859865, "grad_norm": 0.3530212640762329, "learning_rate": 0.0001221505376344086, "loss": 0.4843, "step": 7788 }, { "epoch": 0.5775190924594054, "grad_norm": 0.3404211401939392, "learning_rate": 0.00012214053513378344, "loss": 0.4716, "step": 7789 }, { "epoch": 0.5775932379328242, "grad_norm": 0.35090723633766174, "learning_rate": 0.0001221305326331583, "loss": 0.512, "step": 7790 }, { "epoch": 0.577667383406243, "grad_norm": 0.34959524869918823, "learning_rate": 0.00012212053013253312, "loss": 0.474, "step": 7791 }, { "epoch": 0.5777415288796619, "grad_norm": 0.33993878960609436, "learning_rate": 0.00012211052763190798, "loss": 0.4926, "step": 7792 }, { "epoch": 0.5778156743530808, "grad_norm": 0.3229988217353821, "learning_rate": 0.00012210052513128282, "loss": 0.4901, "step": 7793 }, { "epoch": 0.5778898198264996, "grad_norm": 0.39070120453834534, "learning_rate": 0.00012209052263065768, "loss": 0.5556, "step": 7794 }, { "epoch": 0.5779639652999184, "grad_norm": 0.32902881503105164, "learning_rate": 0.0001220805201300325, "loss": 0.4779, "step": 7795 }, { "epoch": 0.5780381107733373, "grad_norm": 0.37863555550575256, "learning_rate": 0.00012207051762940736, "loss": 0.5005, "step": 7796 }, { "epoch": 0.5781122562467561, "grad_norm": 0.338563472032547, "learning_rate": 0.00012206051512878221, "loss": 0.4986, "step": 7797 }, { "epoch": 0.578186401720175, "grad_norm": 0.34965986013412476, "learning_rate": 0.00012205051262815706, "loss": 0.4927, "step": 7798 }, { "epoch": 0.5782605471935939, "grad_norm": 0.3656279742717743, "learning_rate": 0.00012204051012753188, "loss": 0.4933, "step": 7799 }, { "epoch": 0.5783346926670127, "grad_norm": 0.35265207290649414, "learning_rate": 0.00012203050762690673, "loss": 0.5078, "step": 7800 }, { "epoch": 0.5784088381404315, "grad_norm": 0.3585958778858185, "learning_rate": 0.00012202050512628158, "loss": 0.5148, "step": 7801 }, { "epoch": 0.5784829836138504, "grad_norm": 0.4019569754600525, "learning_rate": 0.00012201050262565643, "loss": 0.5575, "step": 7802 }, { "epoch": 0.5785571290872692, "grad_norm": 0.38878753781318665, "learning_rate": 0.00012200050012503126, "loss": 0.535, "step": 7803 }, { "epoch": 0.5786312745606881, "grad_norm": 0.35828185081481934, "learning_rate": 0.00012199049762440611, "loss": 0.472, "step": 7804 }, { "epoch": 0.578705420034107, "grad_norm": 0.3919070363044739, "learning_rate": 0.00012198049512378096, "loss": 0.5679, "step": 7805 }, { "epoch": 0.5787795655075257, "grad_norm": 0.35584887862205505, "learning_rate": 0.00012197049262315578, "loss": 0.515, "step": 7806 }, { "epoch": 0.5788537109809446, "grad_norm": 0.34186574816703796, "learning_rate": 0.00012196049012253063, "loss": 0.4784, "step": 7807 }, { "epoch": 0.5789278564543635, "grad_norm": 0.3884412348270416, "learning_rate": 0.00012195048762190548, "loss": 0.5451, "step": 7808 }, { "epoch": 0.5790020019277823, "grad_norm": 0.3755818009376526, "learning_rate": 0.00012194048512128033, "loss": 0.4876, "step": 7809 }, { "epoch": 0.5790761474012012, "grad_norm": 0.3632664382457733, "learning_rate": 0.00012193048262065516, "loss": 0.5383, "step": 7810 }, { "epoch": 0.57915029287462, "grad_norm": 0.3795631527900696, "learning_rate": 0.00012192048012003001, "loss": 0.5402, "step": 7811 }, { "epoch": 0.5792244383480388, "grad_norm": 0.3476898670196533, "learning_rate": 0.00012191047761940486, "loss": 0.5057, "step": 7812 }, { "epoch": 0.5792985838214577, "grad_norm": 0.3699498772621155, "learning_rate": 0.00012190047511877971, "loss": 0.5332, "step": 7813 }, { "epoch": 0.5793727292948766, "grad_norm": 0.350373238325119, "learning_rate": 0.00012189047261815453, "loss": 0.4901, "step": 7814 }, { "epoch": 0.5794468747682954, "grad_norm": 0.3445344567298889, "learning_rate": 0.00012188047011752938, "loss": 0.4845, "step": 7815 }, { "epoch": 0.5795210202417143, "grad_norm": 0.36789193749427795, "learning_rate": 0.00012187046761690423, "loss": 0.5251, "step": 7816 }, { "epoch": 0.5795951657151331, "grad_norm": 0.36304327845573425, "learning_rate": 0.00012186046511627907, "loss": 0.475, "step": 7817 }, { "epoch": 0.5796693111885519, "grad_norm": 0.37736353278160095, "learning_rate": 0.00012185046261565392, "loss": 0.5178, "step": 7818 }, { "epoch": 0.5797434566619708, "grad_norm": 0.3474605977535248, "learning_rate": 0.00012184046011502876, "loss": 0.4832, "step": 7819 }, { "epoch": 0.5798176021353897, "grad_norm": 0.34797757863998413, "learning_rate": 0.00012183045761440361, "loss": 0.5124, "step": 7820 }, { "epoch": 0.5798917476088085, "grad_norm": 0.3651677370071411, "learning_rate": 0.00012182045511377845, "loss": 0.5227, "step": 7821 }, { "epoch": 0.5799658930822273, "grad_norm": 0.36082422733306885, "learning_rate": 0.0001218104526131533, "loss": 0.4849, "step": 7822 }, { "epoch": 0.5800400385556462, "grad_norm": 0.3476872742176056, "learning_rate": 0.00012180045011252815, "loss": 0.4861, "step": 7823 }, { "epoch": 0.580114184029065, "grad_norm": 0.37306731939315796, "learning_rate": 0.00012179044761190298, "loss": 0.5499, "step": 7824 }, { "epoch": 0.5801883295024839, "grad_norm": 0.37395307421684265, "learning_rate": 0.00012178044511127782, "loss": 0.5277, "step": 7825 }, { "epoch": 0.5802624749759028, "grad_norm": 0.36360082030296326, "learning_rate": 0.00012177044261065267, "loss": 0.48, "step": 7826 }, { "epoch": 0.5803366204493215, "grad_norm": 0.3483443260192871, "learning_rate": 0.00012176044011002752, "loss": 0.4765, "step": 7827 }, { "epoch": 0.5804107659227404, "grad_norm": 0.363925576210022, "learning_rate": 0.00012175043760940235, "loss": 0.524, "step": 7828 }, { "epoch": 0.5804849113961593, "grad_norm": 0.37659743428230286, "learning_rate": 0.0001217404351087772, "loss": 0.5718, "step": 7829 }, { "epoch": 0.5805590568695781, "grad_norm": 0.3361043632030487, "learning_rate": 0.00012173043260815205, "loss": 0.4688, "step": 7830 }, { "epoch": 0.580633202342997, "grad_norm": 0.36729809641838074, "learning_rate": 0.0001217204301075269, "loss": 0.4998, "step": 7831 }, { "epoch": 0.5807073478164158, "grad_norm": 0.3344394862651825, "learning_rate": 0.00012171042760690172, "loss": 0.4552, "step": 7832 }, { "epoch": 0.5807814932898346, "grad_norm": 0.3863058388233185, "learning_rate": 0.00012170042510627657, "loss": 0.5414, "step": 7833 }, { "epoch": 0.5808556387632535, "grad_norm": 0.33845075964927673, "learning_rate": 0.00012169042260565142, "loss": 0.4789, "step": 7834 }, { "epoch": 0.5809297842366723, "grad_norm": 0.3514708876609802, "learning_rate": 0.00012168042010502627, "loss": 0.4911, "step": 7835 }, { "epoch": 0.5810039297100912, "grad_norm": 0.3664785325527191, "learning_rate": 0.0001216704176044011, "loss": 0.5415, "step": 7836 }, { "epoch": 0.5810780751835101, "grad_norm": 0.3536122739315033, "learning_rate": 0.00012166041510377595, "loss": 0.4912, "step": 7837 }, { "epoch": 0.5811522206569288, "grad_norm": 0.35295745730400085, "learning_rate": 0.0001216504126031508, "loss": 0.5083, "step": 7838 }, { "epoch": 0.5812263661303477, "grad_norm": 0.3775959610939026, "learning_rate": 0.00012164041010252565, "loss": 0.5351, "step": 7839 }, { "epoch": 0.5813005116037666, "grad_norm": 0.34972286224365234, "learning_rate": 0.00012163040760190047, "loss": 0.5028, "step": 7840 }, { "epoch": 0.5813746570771854, "grad_norm": 0.3761788308620453, "learning_rate": 0.00012162040510127532, "loss": 0.5263, "step": 7841 }, { "epoch": 0.5814488025506043, "grad_norm": 0.33751219511032104, "learning_rate": 0.00012161040260065017, "loss": 0.4779, "step": 7842 }, { "epoch": 0.5815229480240232, "grad_norm": 0.33802300691604614, "learning_rate": 0.000121600400100025, "loss": 0.4703, "step": 7843 }, { "epoch": 0.5815970934974419, "grad_norm": 0.34736308455467224, "learning_rate": 0.00012159039759939985, "loss": 0.5146, "step": 7844 }, { "epoch": 0.5816712389708608, "grad_norm": 0.35401493310928345, "learning_rate": 0.0001215803950987747, "loss": 0.5, "step": 7845 }, { "epoch": 0.5817453844442797, "grad_norm": 0.35525187849998474, "learning_rate": 0.00012157039259814955, "loss": 0.5114, "step": 7846 }, { "epoch": 0.5818195299176985, "grad_norm": 0.37281739711761475, "learning_rate": 0.00012156039009752437, "loss": 0.5045, "step": 7847 }, { "epoch": 0.5818936753911174, "grad_norm": 0.3702263832092285, "learning_rate": 0.00012155038759689922, "loss": 0.5082, "step": 7848 }, { "epoch": 0.5819678208645362, "grad_norm": 0.37288814783096313, "learning_rate": 0.00012154038509627407, "loss": 0.4905, "step": 7849 }, { "epoch": 0.582041966337955, "grad_norm": 0.35198262333869934, "learning_rate": 0.00012153038259564893, "loss": 0.4976, "step": 7850 }, { "epoch": 0.5821161118113739, "grad_norm": 0.34362828731536865, "learning_rate": 0.00012152038009502376, "loss": 0.4882, "step": 7851 }, { "epoch": 0.5821902572847928, "grad_norm": 0.37092891335487366, "learning_rate": 0.0001215103775943986, "loss": 0.5085, "step": 7852 }, { "epoch": 0.5822644027582116, "grad_norm": 0.3753529489040375, "learning_rate": 0.00012150037509377345, "loss": 0.4973, "step": 7853 }, { "epoch": 0.5823385482316304, "grad_norm": 0.3707869052886963, "learning_rate": 0.00012149037259314829, "loss": 0.5102, "step": 7854 }, { "epoch": 0.5824126937050493, "grad_norm": 0.36640432476997375, "learning_rate": 0.00012148037009252314, "loss": 0.501, "step": 7855 }, { "epoch": 0.5824868391784681, "grad_norm": 0.35416439175605774, "learning_rate": 0.00012147036759189799, "loss": 0.4719, "step": 7856 }, { "epoch": 0.582560984651887, "grad_norm": 0.37231290340423584, "learning_rate": 0.00012146036509127283, "loss": 0.5443, "step": 7857 }, { "epoch": 0.5826351301253059, "grad_norm": 0.3544313311576843, "learning_rate": 0.00012145036259064766, "loss": 0.489, "step": 7858 }, { "epoch": 0.5827092755987247, "grad_norm": 0.3734924793243408, "learning_rate": 0.00012144036009002251, "loss": 0.4928, "step": 7859 }, { "epoch": 0.5827834210721435, "grad_norm": 0.3758822977542877, "learning_rate": 0.00012143035758939736, "loss": 0.4982, "step": 7860 }, { "epoch": 0.5828575665455624, "grad_norm": 0.38821685314178467, "learning_rate": 0.00012142035508877221, "loss": 0.494, "step": 7861 }, { "epoch": 0.5829317120189812, "grad_norm": 0.3307192623615265, "learning_rate": 0.00012141035258814704, "loss": 0.4759, "step": 7862 }, { "epoch": 0.5830058574924001, "grad_norm": 0.3448368012905121, "learning_rate": 0.00012140035008752189, "loss": 0.4922, "step": 7863 }, { "epoch": 0.583080002965819, "grad_norm": 0.36591196060180664, "learning_rate": 0.00012139034758689674, "loss": 0.4851, "step": 7864 }, { "epoch": 0.5831541484392377, "grad_norm": 0.34308651089668274, "learning_rate": 0.00012138034508627156, "loss": 0.4901, "step": 7865 }, { "epoch": 0.5832282939126566, "grad_norm": 0.36750441789627075, "learning_rate": 0.00012137034258564641, "loss": 0.5271, "step": 7866 }, { "epoch": 0.5833024393860755, "grad_norm": 0.358414888381958, "learning_rate": 0.00012136034008502126, "loss": 0.4925, "step": 7867 }, { "epoch": 0.5833765848594943, "grad_norm": 0.367404967546463, "learning_rate": 0.00012135033758439612, "loss": 0.5474, "step": 7868 }, { "epoch": 0.5834507303329132, "grad_norm": 0.36963555216789246, "learning_rate": 0.00012134033508377094, "loss": 0.5553, "step": 7869 }, { "epoch": 0.583524875806332, "grad_norm": 0.3904498815536499, "learning_rate": 0.00012133033258314579, "loss": 0.5814, "step": 7870 }, { "epoch": 0.5835990212797508, "grad_norm": 0.34815558791160583, "learning_rate": 0.00012132033008252064, "loss": 0.5189, "step": 7871 }, { "epoch": 0.5836731667531697, "grad_norm": 0.3372185230255127, "learning_rate": 0.00012131032758189549, "loss": 0.4838, "step": 7872 }, { "epoch": 0.5837473122265886, "grad_norm": 0.34821611642837524, "learning_rate": 0.00012130032508127031, "loss": 0.4771, "step": 7873 }, { "epoch": 0.5838214577000074, "grad_norm": 0.3475644290447235, "learning_rate": 0.00012129032258064516, "loss": 0.512, "step": 7874 }, { "epoch": 0.5838956031734263, "grad_norm": 0.35606932640075684, "learning_rate": 0.00012128032008002002, "loss": 0.5076, "step": 7875 }, { "epoch": 0.5839697486468451, "grad_norm": 0.35187503695487976, "learning_rate": 0.00012127031757939487, "loss": 0.512, "step": 7876 }, { "epoch": 0.5840438941202639, "grad_norm": 0.3315590023994446, "learning_rate": 0.00012126031507876969, "loss": 0.4799, "step": 7877 }, { "epoch": 0.5841180395936828, "grad_norm": 0.3451839089393616, "learning_rate": 0.00012125031257814454, "loss": 0.5108, "step": 7878 }, { "epoch": 0.5841921850671017, "grad_norm": 0.3680016100406647, "learning_rate": 0.00012124031007751939, "loss": 0.5037, "step": 7879 }, { "epoch": 0.5842663305405205, "grad_norm": 0.36765897274017334, "learning_rate": 0.00012123030757689421, "loss": 0.5103, "step": 7880 }, { "epoch": 0.5843404760139393, "grad_norm": 0.32860755920410156, "learning_rate": 0.00012122030507626906, "loss": 0.4644, "step": 7881 }, { "epoch": 0.5844146214873582, "grad_norm": 0.3797222375869751, "learning_rate": 0.00012121030257564392, "loss": 0.544, "step": 7882 }, { "epoch": 0.584488766960777, "grad_norm": 0.37947359681129456, "learning_rate": 0.00012120030007501877, "loss": 0.5427, "step": 7883 }, { "epoch": 0.5845629124341959, "grad_norm": 0.3913768231868744, "learning_rate": 0.00012119029757439359, "loss": 0.5329, "step": 7884 }, { "epoch": 0.5846370579076148, "grad_norm": 0.3744969666004181, "learning_rate": 0.00012118029507376844, "loss": 0.511, "step": 7885 }, { "epoch": 0.5847112033810336, "grad_norm": 0.3731608986854553, "learning_rate": 0.00012117029257314329, "loss": 0.5228, "step": 7886 }, { "epoch": 0.5847853488544524, "grad_norm": 0.37786391377449036, "learning_rate": 0.00012116029007251814, "loss": 0.4779, "step": 7887 }, { "epoch": 0.5848594943278713, "grad_norm": 0.39776355028152466, "learning_rate": 0.00012115028757189298, "loss": 0.5402, "step": 7888 }, { "epoch": 0.5849336398012901, "grad_norm": 0.36069056391716003, "learning_rate": 0.00012114028507126783, "loss": 0.5067, "step": 7889 }, { "epoch": 0.585007785274709, "grad_norm": 0.3826787769794464, "learning_rate": 0.00012113028257064267, "loss": 0.5534, "step": 7890 }, { "epoch": 0.5850819307481279, "grad_norm": 0.35608652234077454, "learning_rate": 0.0001211202800700175, "loss": 0.5064, "step": 7891 }, { "epoch": 0.5851560762215466, "grad_norm": 0.3677041530609131, "learning_rate": 0.00012111027756939235, "loss": 0.5347, "step": 7892 }, { "epoch": 0.5852302216949655, "grad_norm": 0.36772385239601135, "learning_rate": 0.0001211002750687672, "loss": 0.5206, "step": 7893 }, { "epoch": 0.5853043671683844, "grad_norm": 0.34551721811294556, "learning_rate": 0.00012109027256814206, "loss": 0.4582, "step": 7894 }, { "epoch": 0.5853785126418032, "grad_norm": 0.3499281704425812, "learning_rate": 0.00012108027006751688, "loss": 0.5107, "step": 7895 }, { "epoch": 0.5854526581152221, "grad_norm": 0.34583526849746704, "learning_rate": 0.00012107026756689173, "loss": 0.4719, "step": 7896 }, { "epoch": 0.585526803588641, "grad_norm": 0.363215833902359, "learning_rate": 0.00012106026506626658, "loss": 0.505, "step": 7897 }, { "epoch": 0.5856009490620597, "grad_norm": 0.35540345311164856, "learning_rate": 0.00012105026256564143, "loss": 0.4988, "step": 7898 }, { "epoch": 0.5856750945354786, "grad_norm": 0.3705730438232422, "learning_rate": 0.00012104026006501625, "loss": 0.536, "step": 7899 }, { "epoch": 0.5857492400088975, "grad_norm": 0.36613672971725464, "learning_rate": 0.0001210302575643911, "loss": 0.5099, "step": 7900 }, { "epoch": 0.5858233854823163, "grad_norm": 0.3540559709072113, "learning_rate": 0.00012102025506376596, "loss": 0.4731, "step": 7901 }, { "epoch": 0.5858975309557352, "grad_norm": 0.3524826467037201, "learning_rate": 0.00012101025256314078, "loss": 0.4972, "step": 7902 }, { "epoch": 0.585971676429154, "grad_norm": 0.3413426876068115, "learning_rate": 0.00012100025006251563, "loss": 0.479, "step": 7903 }, { "epoch": 0.5860458219025728, "grad_norm": 0.3568592667579651, "learning_rate": 0.00012099024756189048, "loss": 0.5177, "step": 7904 }, { "epoch": 0.5861199673759917, "grad_norm": 0.3501918613910675, "learning_rate": 0.00012098024506126533, "loss": 0.4989, "step": 7905 }, { "epoch": 0.5861941128494106, "grad_norm": 0.3429771661758423, "learning_rate": 0.00012097024256064015, "loss": 0.4794, "step": 7906 }, { "epoch": 0.5862682583228294, "grad_norm": 0.3393910229206085, "learning_rate": 0.000120960240060015, "loss": 0.507, "step": 7907 }, { "epoch": 0.5863424037962482, "grad_norm": 0.3473048508167267, "learning_rate": 0.00012095023755938986, "loss": 0.5043, "step": 7908 }, { "epoch": 0.5864165492696671, "grad_norm": 0.3497467637062073, "learning_rate": 0.0001209402350587647, "loss": 0.482, "step": 7909 }, { "epoch": 0.5864906947430859, "grad_norm": 0.3504798710346222, "learning_rate": 0.00012093023255813953, "loss": 0.5159, "step": 7910 }, { "epoch": 0.5865648402165048, "grad_norm": 0.355509877204895, "learning_rate": 0.00012092023005751438, "loss": 0.5069, "step": 7911 }, { "epoch": 0.5866389856899237, "grad_norm": 0.36244291067123413, "learning_rate": 0.00012091022755688923, "loss": 0.4863, "step": 7912 }, { "epoch": 0.5867131311633424, "grad_norm": 0.3195638656616211, "learning_rate": 0.00012090022505626408, "loss": 0.4802, "step": 7913 }, { "epoch": 0.5867872766367613, "grad_norm": 0.3680269122123718, "learning_rate": 0.0001208902225556389, "loss": 0.518, "step": 7914 }, { "epoch": 0.5868614221101802, "grad_norm": 0.36911600828170776, "learning_rate": 0.00012088022005501376, "loss": 0.5181, "step": 7915 }, { "epoch": 0.586935567583599, "grad_norm": 0.35816690325737, "learning_rate": 0.00012087021755438861, "loss": 0.5289, "step": 7916 }, { "epoch": 0.5870097130570179, "grad_norm": 0.35004353523254395, "learning_rate": 0.00012086021505376343, "loss": 0.4975, "step": 7917 }, { "epoch": 0.5870838585304368, "grad_norm": 0.38076382875442505, "learning_rate": 0.00012085021255313828, "loss": 0.5177, "step": 7918 }, { "epoch": 0.5871580040038555, "grad_norm": 0.3448251485824585, "learning_rate": 0.00012084021005251313, "loss": 0.468, "step": 7919 }, { "epoch": 0.5872321494772744, "grad_norm": 0.36162269115448, "learning_rate": 0.00012083020755188798, "loss": 0.5132, "step": 7920 }, { "epoch": 0.5873062949506933, "grad_norm": 0.3833799660205841, "learning_rate": 0.00012082020505126282, "loss": 0.5076, "step": 7921 }, { "epoch": 0.5873804404241121, "grad_norm": 0.37769702076911926, "learning_rate": 0.00012081020255063766, "loss": 0.5548, "step": 7922 }, { "epoch": 0.587454585897531, "grad_norm": 0.3473769724369049, "learning_rate": 0.00012080020005001251, "loss": 0.4737, "step": 7923 }, { "epoch": 0.5875287313709499, "grad_norm": 0.369422048330307, "learning_rate": 0.00012079019754938736, "loss": 0.5316, "step": 7924 }, { "epoch": 0.5876028768443686, "grad_norm": 0.3517076075077057, "learning_rate": 0.0001207801950487622, "loss": 0.53, "step": 7925 }, { "epoch": 0.5876770223177875, "grad_norm": 0.34127533435821533, "learning_rate": 0.00012077019254813705, "loss": 0.4712, "step": 7926 }, { "epoch": 0.5877511677912064, "grad_norm": 0.3951975107192993, "learning_rate": 0.0001207601900475119, "loss": 0.547, "step": 7927 }, { "epoch": 0.5878253132646252, "grad_norm": 0.35036832094192505, "learning_rate": 0.00012075018754688672, "loss": 0.4644, "step": 7928 }, { "epoch": 0.5878994587380441, "grad_norm": 0.3328867256641388, "learning_rate": 0.00012074018504626157, "loss": 0.4833, "step": 7929 }, { "epoch": 0.5879736042114629, "grad_norm": 0.3609682619571686, "learning_rate": 0.00012073018254563642, "loss": 0.5242, "step": 7930 }, { "epoch": 0.5880477496848817, "grad_norm": 0.3689658045768738, "learning_rate": 0.00012072018004501127, "loss": 0.5023, "step": 7931 }, { "epoch": 0.5881218951583006, "grad_norm": 0.32889214158058167, "learning_rate": 0.0001207101775443861, "loss": 0.4852, "step": 7932 }, { "epoch": 0.5881960406317195, "grad_norm": 0.3635363280773163, "learning_rate": 0.00012070017504376095, "loss": 0.4805, "step": 7933 }, { "epoch": 0.5882701861051383, "grad_norm": 0.36678212881088257, "learning_rate": 0.0001206901725431358, "loss": 0.5336, "step": 7934 }, { "epoch": 0.5883443315785571, "grad_norm": 0.3493174910545349, "learning_rate": 0.00012068017004251065, "loss": 0.4453, "step": 7935 }, { "epoch": 0.588418477051976, "grad_norm": 0.33199527859687805, "learning_rate": 0.00012067016754188547, "loss": 0.4662, "step": 7936 }, { "epoch": 0.5884926225253948, "grad_norm": 0.3524979054927826, "learning_rate": 0.00012066016504126032, "loss": 0.4648, "step": 7937 }, { "epoch": 0.5885667679988137, "grad_norm": 0.38825762271881104, "learning_rate": 0.00012065016254063517, "loss": 0.5358, "step": 7938 }, { "epoch": 0.5886409134722326, "grad_norm": 0.37297549843788147, "learning_rate": 0.00012064016004001, "loss": 0.5296, "step": 7939 }, { "epoch": 0.5887150589456513, "grad_norm": 0.3524392545223236, "learning_rate": 0.00012063015753938485, "loss": 0.4607, "step": 7940 }, { "epoch": 0.5887892044190702, "grad_norm": 0.35368970036506653, "learning_rate": 0.0001206201550387597, "loss": 0.5189, "step": 7941 }, { "epoch": 0.5888633498924891, "grad_norm": 0.3468043804168701, "learning_rate": 0.00012061015253813455, "loss": 0.4634, "step": 7942 }, { "epoch": 0.5889374953659079, "grad_norm": 0.3440872132778168, "learning_rate": 0.00012060015003750937, "loss": 0.5097, "step": 7943 }, { "epoch": 0.5890116408393268, "grad_norm": 0.35519328713417053, "learning_rate": 0.00012059014753688422, "loss": 0.5215, "step": 7944 }, { "epoch": 0.5890857863127456, "grad_norm": 0.37622326612472534, "learning_rate": 0.00012058014503625907, "loss": 0.5246, "step": 7945 }, { "epoch": 0.5891599317861644, "grad_norm": 0.3628593683242798, "learning_rate": 0.00012057014253563392, "loss": 0.5312, "step": 7946 }, { "epoch": 0.5892340772595833, "grad_norm": 0.34160009026527405, "learning_rate": 0.00012056014003500875, "loss": 0.4976, "step": 7947 }, { "epoch": 0.5893082227330021, "grad_norm": 0.36911144852638245, "learning_rate": 0.0001205501375343836, "loss": 0.4954, "step": 7948 }, { "epoch": 0.589382368206421, "grad_norm": 0.35664358735084534, "learning_rate": 0.00012054013503375845, "loss": 0.5026, "step": 7949 }, { "epoch": 0.5894565136798399, "grad_norm": 0.35369059443473816, "learning_rate": 0.0001205301325331333, "loss": 0.484, "step": 7950 }, { "epoch": 0.5895306591532586, "grad_norm": 0.34931454062461853, "learning_rate": 0.00012052013003250812, "loss": 0.5128, "step": 7951 }, { "epoch": 0.5896048046266775, "grad_norm": 0.3724246323108673, "learning_rate": 0.00012051012753188297, "loss": 0.5183, "step": 7952 }, { "epoch": 0.5896789501000964, "grad_norm": 0.3767242133617401, "learning_rate": 0.00012050012503125782, "loss": 0.5176, "step": 7953 }, { "epoch": 0.5897530955735152, "grad_norm": 0.3259052634239197, "learning_rate": 0.00012049012253063266, "loss": 0.4827, "step": 7954 }, { "epoch": 0.5898272410469341, "grad_norm": 0.365865021944046, "learning_rate": 0.0001204801200300075, "loss": 0.4942, "step": 7955 }, { "epoch": 0.589901386520353, "grad_norm": 0.3927336037158966, "learning_rate": 0.00012047011752938235, "loss": 0.5262, "step": 7956 }, { "epoch": 0.5899755319937717, "grad_norm": 0.35591500997543335, "learning_rate": 0.0001204601150287572, "loss": 0.4871, "step": 7957 }, { "epoch": 0.5900496774671906, "grad_norm": 0.35333874821662903, "learning_rate": 0.00012045011252813204, "loss": 0.4846, "step": 7958 }, { "epoch": 0.5901238229406095, "grad_norm": 0.3394932150840759, "learning_rate": 0.00012044011002750689, "loss": 0.4767, "step": 7959 }, { "epoch": 0.5901979684140283, "grad_norm": 0.372745543718338, "learning_rate": 0.00012043010752688172, "loss": 0.5213, "step": 7960 }, { "epoch": 0.5902721138874472, "grad_norm": 0.3688942790031433, "learning_rate": 0.00012042010502625657, "loss": 0.5062, "step": 7961 }, { "epoch": 0.590346259360866, "grad_norm": 0.3646604120731354, "learning_rate": 0.00012041010252563141, "loss": 0.512, "step": 7962 }, { "epoch": 0.5904204048342848, "grad_norm": 0.3777763843536377, "learning_rate": 0.00012040010002500626, "loss": 0.4967, "step": 7963 }, { "epoch": 0.5904945503077037, "grad_norm": 0.33357954025268555, "learning_rate": 0.00012039009752438111, "loss": 0.458, "step": 7964 }, { "epoch": 0.5905686957811226, "grad_norm": 0.35587525367736816, "learning_rate": 0.00012038009502375594, "loss": 0.522, "step": 7965 }, { "epoch": 0.5906428412545414, "grad_norm": 0.341431200504303, "learning_rate": 0.00012037009252313079, "loss": 0.5001, "step": 7966 }, { "epoch": 0.5907169867279602, "grad_norm": 0.36117154359817505, "learning_rate": 0.00012036009002250564, "loss": 0.4952, "step": 7967 }, { "epoch": 0.5907911322013791, "grad_norm": 0.3357948660850525, "learning_rate": 0.00012035008752188049, "loss": 0.4685, "step": 7968 }, { "epoch": 0.5908652776747979, "grad_norm": 0.3942931294441223, "learning_rate": 0.00012034008502125531, "loss": 0.5542, "step": 7969 }, { "epoch": 0.5909394231482168, "grad_norm": 0.35958218574523926, "learning_rate": 0.00012033008252063016, "loss": 0.4842, "step": 7970 }, { "epoch": 0.5910135686216357, "grad_norm": 0.3579372465610504, "learning_rate": 0.00012032008002000501, "loss": 0.5235, "step": 7971 }, { "epoch": 0.5910877140950545, "grad_norm": 0.337642639875412, "learning_rate": 0.00012031007751937986, "loss": 0.5028, "step": 7972 }, { "epoch": 0.5911618595684733, "grad_norm": 0.3572986125946045, "learning_rate": 0.00012030007501875469, "loss": 0.4864, "step": 7973 }, { "epoch": 0.5912360050418922, "grad_norm": 0.3591485619544983, "learning_rate": 0.00012029007251812954, "loss": 0.5129, "step": 7974 }, { "epoch": 0.591310150515311, "grad_norm": 0.3464386761188507, "learning_rate": 0.00012028007001750439, "loss": 0.4898, "step": 7975 }, { "epoch": 0.5913842959887299, "grad_norm": 0.3415737450122833, "learning_rate": 0.00012027006751687921, "loss": 0.4638, "step": 7976 }, { "epoch": 0.5914584414621488, "grad_norm": 0.37729611992836, "learning_rate": 0.00012026006501625406, "loss": 0.5282, "step": 7977 }, { "epoch": 0.5915325869355675, "grad_norm": 0.3394661843776703, "learning_rate": 0.00012025006251562891, "loss": 0.4584, "step": 7978 }, { "epoch": 0.5916067324089864, "grad_norm": 0.37868982553482056, "learning_rate": 0.00012024006001500376, "loss": 0.5069, "step": 7979 }, { "epoch": 0.5916808778824053, "grad_norm": 0.36118000745773315, "learning_rate": 0.00012023005751437859, "loss": 0.4979, "step": 7980 }, { "epoch": 0.5917550233558241, "grad_norm": 0.38600805401802063, "learning_rate": 0.00012022005501375344, "loss": 0.5104, "step": 7981 }, { "epoch": 0.591829168829243, "grad_norm": 0.3760703504085541, "learning_rate": 0.00012021005251312829, "loss": 0.5085, "step": 7982 }, { "epoch": 0.5919033143026619, "grad_norm": 0.3671892583370209, "learning_rate": 0.00012020005001250314, "loss": 0.5024, "step": 7983 }, { "epoch": 0.5919774597760806, "grad_norm": 0.3685070872306824, "learning_rate": 0.00012019004751187796, "loss": 0.4858, "step": 7984 }, { "epoch": 0.5920516052494995, "grad_norm": 0.3518582582473755, "learning_rate": 0.00012018004501125281, "loss": 0.4816, "step": 7985 }, { "epoch": 0.5921257507229184, "grad_norm": 0.37671706080436707, "learning_rate": 0.00012017004251062766, "loss": 0.5456, "step": 7986 }, { "epoch": 0.5921998961963372, "grad_norm": 0.3691364526748657, "learning_rate": 0.00012016004001000251, "loss": 0.5088, "step": 7987 }, { "epoch": 0.5922740416697561, "grad_norm": 0.38579049706459045, "learning_rate": 0.00012015003750937734, "loss": 0.5637, "step": 7988 }, { "epoch": 0.5923481871431749, "grad_norm": 0.3603156805038452, "learning_rate": 0.00012014003500875219, "loss": 0.4811, "step": 7989 }, { "epoch": 0.5924223326165937, "grad_norm": 0.3352448344230652, "learning_rate": 0.00012013003250812704, "loss": 0.4746, "step": 7990 }, { "epoch": 0.5924964780900126, "grad_norm": 0.36525166034698486, "learning_rate": 0.00012012003000750188, "loss": 0.5743, "step": 7991 }, { "epoch": 0.5925706235634315, "grad_norm": 0.3473221957683563, "learning_rate": 0.00012011002750687673, "loss": 0.5139, "step": 7992 }, { "epoch": 0.5926447690368503, "grad_norm": 0.35766488313674927, "learning_rate": 0.00012010002500625156, "loss": 0.5036, "step": 7993 }, { "epoch": 0.5927189145102691, "grad_norm": 0.35705801844596863, "learning_rate": 0.00012009002250562642, "loss": 0.4905, "step": 7994 }, { "epoch": 0.592793059983688, "grad_norm": 0.3549797832965851, "learning_rate": 0.00012008002000500125, "loss": 0.495, "step": 7995 }, { "epoch": 0.5928672054571068, "grad_norm": 0.3232368528842926, "learning_rate": 0.0001200700175043761, "loss": 0.4475, "step": 7996 }, { "epoch": 0.5929413509305257, "grad_norm": 0.34953072667121887, "learning_rate": 0.00012006001500375095, "loss": 0.4622, "step": 7997 }, { "epoch": 0.5930154964039446, "grad_norm": 0.37249764800071716, "learning_rate": 0.00012005001250312579, "loss": 0.5067, "step": 7998 }, { "epoch": 0.5930896418773633, "grad_norm": 0.36296892166137695, "learning_rate": 0.00012004001000250063, "loss": 0.5393, "step": 7999 }, { "epoch": 0.5931637873507822, "grad_norm": 0.3623892664909363, "learning_rate": 0.00012003000750187548, "loss": 0.5194, "step": 8000 }, { "epoch": 0.5932379328242011, "grad_norm": 0.34642329812049866, "learning_rate": 0.00012002000500125033, "loss": 0.4952, "step": 8001 }, { "epoch": 0.5933120782976199, "grad_norm": 0.3595186173915863, "learning_rate": 0.00012001000250062515, "loss": 0.4936, "step": 8002 }, { "epoch": 0.5933862237710388, "grad_norm": 0.36930838227272034, "learning_rate": 0.00012, "loss": 0.4886, "step": 8003 }, { "epoch": 0.5934603692444577, "grad_norm": 0.38262298703193665, "learning_rate": 0.00011998999749937485, "loss": 0.5608, "step": 8004 }, { "epoch": 0.5935345147178764, "grad_norm": 0.34361523389816284, "learning_rate": 0.0001199799949987497, "loss": 0.5076, "step": 8005 }, { "epoch": 0.5936086601912953, "grad_norm": 0.3365902900695801, "learning_rate": 0.00011996999249812453, "loss": 0.4708, "step": 8006 }, { "epoch": 0.5936828056647142, "grad_norm": 0.36163344979286194, "learning_rate": 0.00011995998999749938, "loss": 0.523, "step": 8007 }, { "epoch": 0.593756951138133, "grad_norm": 0.36227062344551086, "learning_rate": 0.00011994998749687423, "loss": 0.531, "step": 8008 }, { "epoch": 0.5938310966115519, "grad_norm": 0.3581906855106354, "learning_rate": 0.00011993998499624908, "loss": 0.4928, "step": 8009 }, { "epoch": 0.5939052420849708, "grad_norm": 0.3693586587905884, "learning_rate": 0.0001199299824956239, "loss": 0.5675, "step": 8010 }, { "epoch": 0.5939793875583895, "grad_norm": 0.3531649708747864, "learning_rate": 0.00011991997999499875, "loss": 0.5043, "step": 8011 }, { "epoch": 0.5940535330318084, "grad_norm": 0.35239177942276, "learning_rate": 0.0001199099774943736, "loss": 0.4952, "step": 8012 }, { "epoch": 0.5941276785052273, "grad_norm": 0.36198368668556213, "learning_rate": 0.00011989997499374843, "loss": 0.5056, "step": 8013 }, { "epoch": 0.5942018239786461, "grad_norm": 0.36810776591300964, "learning_rate": 0.00011988997249312328, "loss": 0.5391, "step": 8014 }, { "epoch": 0.594275969452065, "grad_norm": 0.3447846472263336, "learning_rate": 0.00011987996999249813, "loss": 0.492, "step": 8015 }, { "epoch": 0.5943501149254838, "grad_norm": 0.3570903539657593, "learning_rate": 0.00011986996749187298, "loss": 0.5175, "step": 8016 }, { "epoch": 0.5944242603989026, "grad_norm": 0.361265629529953, "learning_rate": 0.0001198599649912478, "loss": 0.5025, "step": 8017 }, { "epoch": 0.5944984058723215, "grad_norm": 0.36753252148628235, "learning_rate": 0.00011984996249062265, "loss": 0.5524, "step": 8018 }, { "epoch": 0.5945725513457404, "grad_norm": 0.34752121567726135, "learning_rate": 0.0001198399599899975, "loss": 0.4863, "step": 8019 }, { "epoch": 0.5946466968191592, "grad_norm": 0.3595372438430786, "learning_rate": 0.00011982995748937236, "loss": 0.4996, "step": 8020 }, { "epoch": 0.594720842292578, "grad_norm": 0.35799548029899597, "learning_rate": 0.00011981995498874718, "loss": 0.4912, "step": 8021 }, { "epoch": 0.5947949877659969, "grad_norm": 0.3452135920524597, "learning_rate": 0.00011980995248812203, "loss": 0.499, "step": 8022 }, { "epoch": 0.5948691332394157, "grad_norm": 0.35470160841941833, "learning_rate": 0.00011979994998749688, "loss": 0.4963, "step": 8023 }, { "epoch": 0.5949432787128346, "grad_norm": 0.3625800311565399, "learning_rate": 0.00011978994748687173, "loss": 0.5233, "step": 8024 }, { "epoch": 0.5950174241862535, "grad_norm": 0.3606196641921997, "learning_rate": 0.00011977994498624657, "loss": 0.5032, "step": 8025 }, { "epoch": 0.5950915696596722, "grad_norm": 0.35989415645599365, "learning_rate": 0.0001197699424856214, "loss": 0.5106, "step": 8026 }, { "epoch": 0.5951657151330911, "grad_norm": 0.3433079123497009, "learning_rate": 0.00011975993998499626, "loss": 0.4696, "step": 8027 }, { "epoch": 0.59523986060651, "grad_norm": 0.37674474716186523, "learning_rate": 0.00011974993748437109, "loss": 0.4949, "step": 8028 }, { "epoch": 0.5953140060799288, "grad_norm": 0.3447806239128113, "learning_rate": 0.00011973993498374594, "loss": 0.476, "step": 8029 }, { "epoch": 0.5953881515533477, "grad_norm": 0.3651629090309143, "learning_rate": 0.0001197299324831208, "loss": 0.5152, "step": 8030 }, { "epoch": 0.5954622970267666, "grad_norm": 0.36113062500953674, "learning_rate": 0.00011971992998249563, "loss": 0.4925, "step": 8031 }, { "epoch": 0.5955364425001853, "grad_norm": 0.3716897666454315, "learning_rate": 0.00011970992748187047, "loss": 0.5339, "step": 8032 }, { "epoch": 0.5956105879736042, "grad_norm": 0.3739136755466461, "learning_rate": 0.00011969992498124532, "loss": 0.5318, "step": 8033 }, { "epoch": 0.5956847334470231, "grad_norm": 0.3592604100704193, "learning_rate": 0.00011968992248062017, "loss": 0.4853, "step": 8034 }, { "epoch": 0.5957588789204419, "grad_norm": 0.37699705362319946, "learning_rate": 0.00011967991997999502, "loss": 0.5444, "step": 8035 }, { "epoch": 0.5958330243938608, "grad_norm": 0.4021334648132324, "learning_rate": 0.00011966991747936984, "loss": 0.5416, "step": 8036 }, { "epoch": 0.5959071698672797, "grad_norm": 0.35365548729896545, "learning_rate": 0.0001196599149787447, "loss": 0.4957, "step": 8037 }, { "epoch": 0.5959813153406984, "grad_norm": 0.36434170603752136, "learning_rate": 0.00011964991247811955, "loss": 0.5297, "step": 8038 }, { "epoch": 0.5960554608141173, "grad_norm": 0.34361860156059265, "learning_rate": 0.00011963990997749437, "loss": 0.4586, "step": 8039 }, { "epoch": 0.5961296062875362, "grad_norm": 0.35763558745384216, "learning_rate": 0.00011962990747686922, "loss": 0.5658, "step": 8040 }, { "epoch": 0.596203751760955, "grad_norm": 0.3554210364818573, "learning_rate": 0.00011961990497624407, "loss": 0.4915, "step": 8041 }, { "epoch": 0.5962778972343739, "grad_norm": 0.35500243306159973, "learning_rate": 0.00011960990247561892, "loss": 0.4991, "step": 8042 }, { "epoch": 0.5963520427077927, "grad_norm": 0.3513069748878479, "learning_rate": 0.00011959989997499374, "loss": 0.492, "step": 8043 }, { "epoch": 0.5964261881812115, "grad_norm": 0.35911086201667786, "learning_rate": 0.0001195898974743686, "loss": 0.5306, "step": 8044 }, { "epoch": 0.5965003336546304, "grad_norm": 0.3562283515930176, "learning_rate": 0.00011957989497374345, "loss": 0.5107, "step": 8045 }, { "epoch": 0.5965744791280493, "grad_norm": 0.3720504939556122, "learning_rate": 0.0001195698924731183, "loss": 0.4986, "step": 8046 }, { "epoch": 0.5966486246014681, "grad_norm": 0.3381791412830353, "learning_rate": 0.00011955988997249312, "loss": 0.4635, "step": 8047 }, { "epoch": 0.5967227700748869, "grad_norm": 0.3423105478286743, "learning_rate": 0.00011954988747186797, "loss": 0.4811, "step": 8048 }, { "epoch": 0.5967969155483058, "grad_norm": 0.4183781147003174, "learning_rate": 0.00011953988497124282, "loss": 0.5544, "step": 8049 }, { "epoch": 0.5968710610217246, "grad_norm": 0.3454340994358063, "learning_rate": 0.00011952988247061767, "loss": 0.4837, "step": 8050 }, { "epoch": 0.5969452064951435, "grad_norm": 0.40388718247413635, "learning_rate": 0.0001195198799699925, "loss": 0.567, "step": 8051 }, { "epoch": 0.5970193519685624, "grad_norm": 0.36146989464759827, "learning_rate": 0.00011950987746936735, "loss": 0.4947, "step": 8052 }, { "epoch": 0.5970934974419811, "grad_norm": 0.37601950764656067, "learning_rate": 0.0001194998749687422, "loss": 0.5312, "step": 8053 }, { "epoch": 0.5971676429154, "grad_norm": 0.3505353331565857, "learning_rate": 0.00011948987246811702, "loss": 0.5185, "step": 8054 }, { "epoch": 0.5972417883888189, "grad_norm": 0.3817189335823059, "learning_rate": 0.00011947986996749187, "loss": 0.5436, "step": 8055 }, { "epoch": 0.5973159338622377, "grad_norm": 0.3473699390888214, "learning_rate": 0.00011946986746686672, "loss": 0.5041, "step": 8056 }, { "epoch": 0.5973900793356566, "grad_norm": 0.37944477796554565, "learning_rate": 0.00011945986496624157, "loss": 0.533, "step": 8057 }, { "epoch": 0.5974642248090754, "grad_norm": 0.3568613827228546, "learning_rate": 0.00011944986246561641, "loss": 0.5143, "step": 8058 }, { "epoch": 0.5975383702824942, "grad_norm": 0.3599793314933777, "learning_rate": 0.00011943985996499125, "loss": 0.5152, "step": 8059 }, { "epoch": 0.5976125157559131, "grad_norm": 0.334088534116745, "learning_rate": 0.0001194298574643661, "loss": 0.4854, "step": 8060 }, { "epoch": 0.5976866612293319, "grad_norm": 0.3549222946166992, "learning_rate": 0.00011941985496374095, "loss": 0.5288, "step": 8061 }, { "epoch": 0.5977608067027508, "grad_norm": 0.35481196641921997, "learning_rate": 0.00011940985246311578, "loss": 0.5019, "step": 8062 }, { "epoch": 0.5978349521761697, "grad_norm": 0.3817932903766632, "learning_rate": 0.00011939984996249064, "loss": 0.4886, "step": 8063 }, { "epoch": 0.5979090976495884, "grad_norm": 0.36350035667419434, "learning_rate": 0.00011938984746186547, "loss": 0.516, "step": 8064 }, { "epoch": 0.5979832431230073, "grad_norm": 0.3625657260417938, "learning_rate": 0.00011937984496124031, "loss": 0.518, "step": 8065 }, { "epoch": 0.5980573885964262, "grad_norm": 0.36546751856803894, "learning_rate": 0.00011936984246061516, "loss": 0.4964, "step": 8066 }, { "epoch": 0.598131534069845, "grad_norm": 0.35285720229148865, "learning_rate": 0.00011935983995999001, "loss": 0.5181, "step": 8067 }, { "epoch": 0.5982056795432639, "grad_norm": 0.3787290155887604, "learning_rate": 0.00011934983745936486, "loss": 0.4981, "step": 8068 }, { "epoch": 0.5982798250166828, "grad_norm": 0.3412288725376129, "learning_rate": 0.00011933983495873968, "loss": 0.4814, "step": 8069 }, { "epoch": 0.5983539704901015, "grad_norm": 0.3694833219051361, "learning_rate": 0.00011932983245811454, "loss": 0.5262, "step": 8070 }, { "epoch": 0.5984281159635204, "grad_norm": 0.34914037585258484, "learning_rate": 0.00011931982995748939, "loss": 0.4914, "step": 8071 }, { "epoch": 0.5985022614369393, "grad_norm": 0.3357715606689453, "learning_rate": 0.00011930982745686424, "loss": 0.4986, "step": 8072 }, { "epoch": 0.5985764069103581, "grad_norm": 0.3578357994556427, "learning_rate": 0.00011929982495623906, "loss": 0.5103, "step": 8073 }, { "epoch": 0.598650552383777, "grad_norm": 0.367714524269104, "learning_rate": 0.00011928982245561391, "loss": 0.5453, "step": 8074 }, { "epoch": 0.5987246978571958, "grad_norm": 0.37492918968200684, "learning_rate": 0.00011927981995498876, "loss": 0.5138, "step": 8075 }, { "epoch": 0.5987988433306146, "grad_norm": 0.36778080463409424, "learning_rate": 0.00011926981745436358, "loss": 0.4928, "step": 8076 }, { "epoch": 0.5988729888040335, "grad_norm": 0.359701007604599, "learning_rate": 0.00011925981495373844, "loss": 0.4887, "step": 8077 }, { "epoch": 0.5989471342774524, "grad_norm": 0.36238324642181396, "learning_rate": 0.00011924981245311329, "loss": 0.4902, "step": 8078 }, { "epoch": 0.5990212797508712, "grad_norm": 0.3910089433193207, "learning_rate": 0.00011923980995248814, "loss": 0.5398, "step": 8079 }, { "epoch": 0.59909542522429, "grad_norm": 0.37784460186958313, "learning_rate": 0.00011922980745186296, "loss": 0.5036, "step": 8080 }, { "epoch": 0.5991695706977089, "grad_norm": 0.35421156883239746, "learning_rate": 0.00011921980495123781, "loss": 0.4871, "step": 8081 }, { "epoch": 0.5992437161711277, "grad_norm": 0.3594769537448883, "learning_rate": 0.00011920980245061266, "loss": 0.5181, "step": 8082 }, { "epoch": 0.5993178616445466, "grad_norm": 0.3745902478694916, "learning_rate": 0.00011919979994998751, "loss": 0.4878, "step": 8083 }, { "epoch": 0.5993920071179655, "grad_norm": 0.36195096373558044, "learning_rate": 0.00011918979744936234, "loss": 0.5087, "step": 8084 }, { "epoch": 0.5994661525913842, "grad_norm": 0.37690964341163635, "learning_rate": 0.00011917979494873719, "loss": 0.5274, "step": 8085 }, { "epoch": 0.5995402980648031, "grad_norm": 0.3566948175430298, "learning_rate": 0.00011916979244811204, "loss": 0.5001, "step": 8086 }, { "epoch": 0.599614443538222, "grad_norm": 0.3735761046409607, "learning_rate": 0.00011915978994748689, "loss": 0.5052, "step": 8087 }, { "epoch": 0.5996885890116408, "grad_norm": 0.4483816921710968, "learning_rate": 0.00011914978744686171, "loss": 0.5561, "step": 8088 }, { "epoch": 0.5997627344850597, "grad_norm": 0.3978064954280853, "learning_rate": 0.00011913978494623656, "loss": 0.5623, "step": 8089 }, { "epoch": 0.5998368799584786, "grad_norm": 0.35813865065574646, "learning_rate": 0.00011912978244561141, "loss": 0.4928, "step": 8090 }, { "epoch": 0.5999110254318973, "grad_norm": 0.3521752655506134, "learning_rate": 0.00011911977994498624, "loss": 0.5059, "step": 8091 }, { "epoch": 0.5999851709053162, "grad_norm": 0.33832022547721863, "learning_rate": 0.00011910977744436109, "loss": 0.4959, "step": 8092 }, { "epoch": 0.6000593163787351, "grad_norm": 0.3855595886707306, "learning_rate": 0.00011909977494373594, "loss": 0.5544, "step": 8093 }, { "epoch": 0.6001334618521539, "grad_norm": 0.3301265835762024, "learning_rate": 0.00011908977244311079, "loss": 0.4869, "step": 8094 }, { "epoch": 0.6002076073255728, "grad_norm": 0.3465556800365448, "learning_rate": 0.00011907976994248563, "loss": 0.5008, "step": 8095 }, { "epoch": 0.6002817527989917, "grad_norm": 0.3880307972431183, "learning_rate": 0.00011906976744186048, "loss": 0.5215, "step": 8096 }, { "epoch": 0.6003558982724104, "grad_norm": 0.3706596791744232, "learning_rate": 0.00011905976494123531, "loss": 0.515, "step": 8097 }, { "epoch": 0.6004300437458293, "grad_norm": 0.3529438078403473, "learning_rate": 0.00011904976244061016, "loss": 0.5234, "step": 8098 }, { "epoch": 0.6005041892192482, "grad_norm": 0.3563961982727051, "learning_rate": 0.000119039759939985, "loss": 0.5293, "step": 8099 }, { "epoch": 0.600578334692667, "grad_norm": 0.3326714336872101, "learning_rate": 0.00011902975743935985, "loss": 0.4961, "step": 8100 }, { "epoch": 0.6006524801660859, "grad_norm": 0.3664962649345398, "learning_rate": 0.0001190197549387347, "loss": 0.5235, "step": 8101 }, { "epoch": 0.6007266256395047, "grad_norm": 0.3704240024089813, "learning_rate": 0.00011900975243810953, "loss": 0.5178, "step": 8102 }, { "epoch": 0.6008007711129235, "grad_norm": 0.36501672863960266, "learning_rate": 0.00011899974993748438, "loss": 0.5091, "step": 8103 }, { "epoch": 0.6008749165863424, "grad_norm": 0.3627963662147522, "learning_rate": 0.00011898974743685923, "loss": 0.5096, "step": 8104 }, { "epoch": 0.6009490620597613, "grad_norm": 0.34414732456207275, "learning_rate": 0.00011897974493623408, "loss": 0.5033, "step": 8105 }, { "epoch": 0.6010232075331801, "grad_norm": 0.34538254141807556, "learning_rate": 0.0001189697424356089, "loss": 0.475, "step": 8106 }, { "epoch": 0.601097353006599, "grad_norm": 0.3497619032859802, "learning_rate": 0.00011895973993498375, "loss": 0.4788, "step": 8107 }, { "epoch": 0.6011714984800178, "grad_norm": 0.334204763174057, "learning_rate": 0.0001189497374343586, "loss": 0.499, "step": 8108 }, { "epoch": 0.6012456439534366, "grad_norm": 0.33783602714538574, "learning_rate": 0.00011893973493373345, "loss": 0.4836, "step": 8109 }, { "epoch": 0.6013197894268555, "grad_norm": 0.33768001198768616, "learning_rate": 0.00011892973243310828, "loss": 0.4661, "step": 8110 }, { "epoch": 0.6013939349002744, "grad_norm": 0.3713936507701874, "learning_rate": 0.00011891972993248313, "loss": 0.509, "step": 8111 }, { "epoch": 0.6014680803736931, "grad_norm": 0.3623426854610443, "learning_rate": 0.00011890972743185798, "loss": 0.5174, "step": 8112 }, { "epoch": 0.601542225847112, "grad_norm": 0.3494303226470947, "learning_rate": 0.0001188997249312328, "loss": 0.5163, "step": 8113 }, { "epoch": 0.6016163713205309, "grad_norm": 0.36982524394989014, "learning_rate": 0.00011888972243060765, "loss": 0.5399, "step": 8114 }, { "epoch": 0.6016905167939497, "grad_norm": 0.36302071809768677, "learning_rate": 0.0001188797199299825, "loss": 0.5221, "step": 8115 }, { "epoch": 0.6017646622673686, "grad_norm": 0.33900895714759827, "learning_rate": 0.00011886971742935735, "loss": 0.4977, "step": 8116 }, { "epoch": 0.6018388077407875, "grad_norm": 0.35637617111206055, "learning_rate": 0.00011885971492873218, "loss": 0.5092, "step": 8117 }, { "epoch": 0.6019129532142062, "grad_norm": 0.36151450872421265, "learning_rate": 0.00011884971242810703, "loss": 0.5177, "step": 8118 }, { "epoch": 0.6019870986876251, "grad_norm": 0.37678757309913635, "learning_rate": 0.00011883970992748188, "loss": 0.5434, "step": 8119 }, { "epoch": 0.602061244161044, "grad_norm": 0.38292160630226135, "learning_rate": 0.00011882970742685673, "loss": 0.5039, "step": 8120 }, { "epoch": 0.6021353896344628, "grad_norm": 0.39045611023902893, "learning_rate": 0.00011881970492623155, "loss": 0.5598, "step": 8121 }, { "epoch": 0.6022095351078817, "grad_norm": 0.34152328968048096, "learning_rate": 0.0001188097024256064, "loss": 0.4985, "step": 8122 }, { "epoch": 0.6022836805813006, "grad_norm": 0.35986223816871643, "learning_rate": 0.00011879969992498125, "loss": 0.5182, "step": 8123 }, { "epoch": 0.6023578260547193, "grad_norm": 0.35024750232696533, "learning_rate": 0.0001187896974243561, "loss": 0.4983, "step": 8124 }, { "epoch": 0.6024319715281382, "grad_norm": 0.3548937737941742, "learning_rate": 0.00011877969492373093, "loss": 0.4781, "step": 8125 }, { "epoch": 0.6025061170015571, "grad_norm": 0.3693281412124634, "learning_rate": 0.00011876969242310578, "loss": 0.5385, "step": 8126 }, { "epoch": 0.6025802624749759, "grad_norm": 0.3662000000476837, "learning_rate": 0.00011875968992248063, "loss": 0.5012, "step": 8127 }, { "epoch": 0.6026544079483948, "grad_norm": 0.3573523759841919, "learning_rate": 0.00011874968742185547, "loss": 0.5034, "step": 8128 }, { "epoch": 0.6027285534218136, "grad_norm": 0.3792550265789032, "learning_rate": 0.0001187396849212303, "loss": 0.5259, "step": 8129 }, { "epoch": 0.6028026988952324, "grad_norm": 0.34082040190696716, "learning_rate": 0.00011872968242060515, "loss": 0.4785, "step": 8130 }, { "epoch": 0.6028768443686513, "grad_norm": 0.3487507402896881, "learning_rate": 0.00011871967991998, "loss": 0.5043, "step": 8131 }, { "epoch": 0.6029509898420702, "grad_norm": 0.33966341614723206, "learning_rate": 0.00011870967741935484, "loss": 0.4668, "step": 8132 }, { "epoch": 0.603025135315489, "grad_norm": 0.3593069016933441, "learning_rate": 0.00011869967491872969, "loss": 0.5318, "step": 8133 }, { "epoch": 0.6030992807889078, "grad_norm": 0.3511950969696045, "learning_rate": 0.00011868967241810454, "loss": 0.4656, "step": 8134 }, { "epoch": 0.6031734262623267, "grad_norm": 0.3924407660961151, "learning_rate": 0.00011867966991747938, "loss": 0.5543, "step": 8135 }, { "epoch": 0.6032475717357455, "grad_norm": 0.37189170718193054, "learning_rate": 0.00011866966741685422, "loss": 0.5274, "step": 8136 }, { "epoch": 0.6033217172091644, "grad_norm": 0.39844807982444763, "learning_rate": 0.00011865966491622907, "loss": 0.54, "step": 8137 }, { "epoch": 0.6033958626825833, "grad_norm": 0.3725387454032898, "learning_rate": 0.00011864966241560392, "loss": 0.5362, "step": 8138 }, { "epoch": 0.603470008156002, "grad_norm": 0.35972774028778076, "learning_rate": 0.00011863965991497874, "loss": 0.4843, "step": 8139 }, { "epoch": 0.6035441536294209, "grad_norm": 0.3536033034324646, "learning_rate": 0.00011862965741435359, "loss": 0.4835, "step": 8140 }, { "epoch": 0.6036182991028398, "grad_norm": 0.3358043432235718, "learning_rate": 0.00011861965491372844, "loss": 0.4518, "step": 8141 }, { "epoch": 0.6036924445762586, "grad_norm": 0.3778880834579468, "learning_rate": 0.0001186096524131033, "loss": 0.5292, "step": 8142 }, { "epoch": 0.6037665900496775, "grad_norm": 0.35461193323135376, "learning_rate": 0.00011859964991247812, "loss": 0.5009, "step": 8143 }, { "epoch": 0.6038407355230964, "grad_norm": 0.3764636218547821, "learning_rate": 0.00011858964741185297, "loss": 0.4889, "step": 8144 }, { "epoch": 0.6039148809965151, "grad_norm": 0.36212411522865295, "learning_rate": 0.00011857964491122782, "loss": 0.5191, "step": 8145 }, { "epoch": 0.603989026469934, "grad_norm": 0.37009817361831665, "learning_rate": 0.00011856964241060267, "loss": 0.5562, "step": 8146 }, { "epoch": 0.6040631719433529, "grad_norm": 0.35568559169769287, "learning_rate": 0.00011855963990997749, "loss": 0.4969, "step": 8147 }, { "epoch": 0.6041373174167717, "grad_norm": 0.33984071016311646, "learning_rate": 0.00011854963740935234, "loss": 0.4982, "step": 8148 }, { "epoch": 0.6042114628901906, "grad_norm": 0.32792341709136963, "learning_rate": 0.0001185396349087272, "loss": 0.4606, "step": 8149 }, { "epoch": 0.6042856083636094, "grad_norm": 0.38550683856010437, "learning_rate": 0.00011852963240810202, "loss": 0.5566, "step": 8150 }, { "epoch": 0.6043597538370282, "grad_norm": 0.3426325023174286, "learning_rate": 0.00011851962990747687, "loss": 0.4696, "step": 8151 }, { "epoch": 0.6044338993104471, "grad_norm": 0.33806300163269043, "learning_rate": 0.00011850962740685172, "loss": 0.4658, "step": 8152 }, { "epoch": 0.604508044783866, "grad_norm": 0.3466017246246338, "learning_rate": 0.00011849962490622657, "loss": 0.4992, "step": 8153 }, { "epoch": 0.6045821902572848, "grad_norm": 0.36748555302619934, "learning_rate": 0.00011848962240560139, "loss": 0.5367, "step": 8154 }, { "epoch": 0.6046563357307037, "grad_norm": 0.3523646593093872, "learning_rate": 0.00011847961990497624, "loss": 0.5115, "step": 8155 }, { "epoch": 0.6047304812041225, "grad_norm": 0.3208259344100952, "learning_rate": 0.0001184696174043511, "loss": 0.4616, "step": 8156 }, { "epoch": 0.6048046266775413, "grad_norm": 0.35457855463027954, "learning_rate": 0.00011845961490372594, "loss": 0.4932, "step": 8157 }, { "epoch": 0.6048787721509602, "grad_norm": 0.35958966612815857, "learning_rate": 0.00011844961240310077, "loss": 0.508, "step": 8158 }, { "epoch": 0.6049529176243791, "grad_norm": 0.35753458738327026, "learning_rate": 0.00011843960990247562, "loss": 0.532, "step": 8159 }, { "epoch": 0.6050270630977979, "grad_norm": 0.38513121008872986, "learning_rate": 0.00011842960740185047, "loss": 0.4916, "step": 8160 }, { "epoch": 0.6051012085712167, "grad_norm": 0.33476901054382324, "learning_rate": 0.00011841960490122532, "loss": 0.4989, "step": 8161 }, { "epoch": 0.6051753540446356, "grad_norm": 0.35851582884788513, "learning_rate": 0.00011840960240060014, "loss": 0.5178, "step": 8162 }, { "epoch": 0.6052494995180544, "grad_norm": 0.3832091689109802, "learning_rate": 0.000118399599899975, "loss": 0.5331, "step": 8163 }, { "epoch": 0.6053236449914733, "grad_norm": 0.39050593972206116, "learning_rate": 0.00011838959739934985, "loss": 0.5284, "step": 8164 }, { "epoch": 0.6053977904648922, "grad_norm": 0.35745376348495483, "learning_rate": 0.00011837959489872468, "loss": 0.4853, "step": 8165 }, { "epoch": 0.605471935938311, "grad_norm": 0.3229226768016815, "learning_rate": 0.00011836959239809953, "loss": 0.4764, "step": 8166 }, { "epoch": 0.6055460814117298, "grad_norm": 0.34061577916145325, "learning_rate": 0.00011835958989747437, "loss": 0.475, "step": 8167 }, { "epoch": 0.6056202268851487, "grad_norm": 0.32902592420578003, "learning_rate": 0.00011834958739684922, "loss": 0.4627, "step": 8168 }, { "epoch": 0.6056943723585675, "grad_norm": 0.35108682513237, "learning_rate": 0.00011833958489622406, "loss": 0.4936, "step": 8169 }, { "epoch": 0.6057685178319864, "grad_norm": 0.3372374475002289, "learning_rate": 0.00011832958239559891, "loss": 0.4717, "step": 8170 }, { "epoch": 0.6058426633054053, "grad_norm": 0.38885697722435, "learning_rate": 0.00011831957989497376, "loss": 0.5453, "step": 8171 }, { "epoch": 0.605916808778824, "grad_norm": 0.3551870584487915, "learning_rate": 0.00011830957739434861, "loss": 0.5009, "step": 8172 }, { "epoch": 0.6059909542522429, "grad_norm": 0.34885770082473755, "learning_rate": 0.00011829957489372343, "loss": 0.4949, "step": 8173 }, { "epoch": 0.6060650997256617, "grad_norm": 0.3812972903251648, "learning_rate": 0.00011828957239309828, "loss": 0.6324, "step": 8174 }, { "epoch": 0.6061392451990806, "grad_norm": 0.33947473764419556, "learning_rate": 0.00011827956989247313, "loss": 0.4592, "step": 8175 }, { "epoch": 0.6062133906724995, "grad_norm": 0.3500603437423706, "learning_rate": 0.00011826956739184796, "loss": 0.4898, "step": 8176 }, { "epoch": 0.6062875361459182, "grad_norm": 0.3334426283836365, "learning_rate": 0.00011825956489122281, "loss": 0.4746, "step": 8177 }, { "epoch": 0.6063616816193371, "grad_norm": 0.349992036819458, "learning_rate": 0.00011824956239059766, "loss": 0.5068, "step": 8178 }, { "epoch": 0.606435827092756, "grad_norm": 0.340493381023407, "learning_rate": 0.00011823955988997251, "loss": 0.4766, "step": 8179 }, { "epoch": 0.6065099725661748, "grad_norm": 0.3839522898197174, "learning_rate": 0.00011822955738934733, "loss": 0.5005, "step": 8180 }, { "epoch": 0.6065841180395937, "grad_norm": 0.38043323159217834, "learning_rate": 0.00011821955488872218, "loss": 0.5102, "step": 8181 }, { "epoch": 0.6066582635130126, "grad_norm": 0.38393235206604004, "learning_rate": 0.00011820955238809703, "loss": 0.5397, "step": 8182 }, { "epoch": 0.6067324089864313, "grad_norm": 0.37899020314216614, "learning_rate": 0.00011819954988747189, "loss": 0.5277, "step": 8183 }, { "epoch": 0.6068065544598502, "grad_norm": 0.3585484027862549, "learning_rate": 0.00011818954738684671, "loss": 0.5051, "step": 8184 }, { "epoch": 0.6068806999332691, "grad_norm": 0.3318922221660614, "learning_rate": 0.00011817954488622156, "loss": 0.4586, "step": 8185 }, { "epoch": 0.6069548454066879, "grad_norm": 0.3599957227706909, "learning_rate": 0.00011816954238559641, "loss": 0.5239, "step": 8186 }, { "epoch": 0.6070289908801068, "grad_norm": 0.35230278968811035, "learning_rate": 0.00011815953988497123, "loss": 0.4846, "step": 8187 }, { "epoch": 0.6071031363535256, "grad_norm": 0.37262389063835144, "learning_rate": 0.00011814953738434608, "loss": 0.4847, "step": 8188 }, { "epoch": 0.6071772818269444, "grad_norm": 0.35095515847206116, "learning_rate": 0.00011813953488372094, "loss": 0.5173, "step": 8189 }, { "epoch": 0.6072514273003633, "grad_norm": 0.34112975001335144, "learning_rate": 0.00011812953238309579, "loss": 0.4659, "step": 8190 }, { "epoch": 0.6073255727737822, "grad_norm": 0.3516896367073059, "learning_rate": 0.00011811952988247061, "loss": 0.5043, "step": 8191 }, { "epoch": 0.607399718247201, "grad_norm": 0.3685738444328308, "learning_rate": 0.00011810952738184546, "loss": 0.5147, "step": 8192 }, { "epoch": 0.6074738637206198, "grad_norm": 0.35338184237480164, "learning_rate": 0.00011809952488122031, "loss": 0.4783, "step": 8193 }, { "epoch": 0.6075480091940387, "grad_norm": 0.34418928623199463, "learning_rate": 0.00011808952238059516, "loss": 0.4704, "step": 8194 }, { "epoch": 0.6076221546674575, "grad_norm": 0.3533618152141571, "learning_rate": 0.00011807951987996998, "loss": 0.49, "step": 8195 }, { "epoch": 0.6076963001408764, "grad_norm": 0.3591638207435608, "learning_rate": 0.00011806951737934484, "loss": 0.4965, "step": 8196 }, { "epoch": 0.6077704456142953, "grad_norm": 0.3455269932746887, "learning_rate": 0.00011805951487871969, "loss": 0.508, "step": 8197 }, { "epoch": 0.607844591087714, "grad_norm": 0.357838898897171, "learning_rate": 0.00011804951237809454, "loss": 0.4897, "step": 8198 }, { "epoch": 0.6079187365611329, "grad_norm": 0.34469470381736755, "learning_rate": 0.00011803950987746937, "loss": 0.4981, "step": 8199 }, { "epoch": 0.6079928820345518, "grad_norm": 0.38874179124832153, "learning_rate": 0.00011802950737684421, "loss": 0.5211, "step": 8200 }, { "epoch": 0.6080670275079706, "grad_norm": 0.39064186811447144, "learning_rate": 0.00011801950487621906, "loss": 0.5044, "step": 8201 }, { "epoch": 0.6081411729813895, "grad_norm": 0.36243581771850586, "learning_rate": 0.0001180095023755939, "loss": 0.5342, "step": 8202 }, { "epoch": 0.6082153184548084, "grad_norm": 0.3550574779510498, "learning_rate": 0.00011799949987496875, "loss": 0.4923, "step": 8203 }, { "epoch": 0.6082894639282271, "grad_norm": 0.34964892268180847, "learning_rate": 0.0001179894973743436, "loss": 0.5328, "step": 8204 }, { "epoch": 0.608363609401646, "grad_norm": 0.361968457698822, "learning_rate": 0.00011797949487371844, "loss": 0.4651, "step": 8205 }, { "epoch": 0.6084377548750649, "grad_norm": 0.3472563624382019, "learning_rate": 0.00011796949237309327, "loss": 0.4856, "step": 8206 }, { "epoch": 0.6085119003484837, "grad_norm": 0.3576217591762543, "learning_rate": 0.00011795948987246812, "loss": 0.4882, "step": 8207 }, { "epoch": 0.6085860458219026, "grad_norm": 0.36033809185028076, "learning_rate": 0.00011794948737184298, "loss": 0.5046, "step": 8208 }, { "epoch": 0.6086601912953215, "grad_norm": 0.319063276052475, "learning_rate": 0.00011793948487121783, "loss": 0.4583, "step": 8209 }, { "epoch": 0.6087343367687402, "grad_norm": 0.37761178612709045, "learning_rate": 0.00011792948237059265, "loss": 0.5036, "step": 8210 }, { "epoch": 0.6088084822421591, "grad_norm": 0.38468456268310547, "learning_rate": 0.0001179194798699675, "loss": 0.4973, "step": 8211 }, { "epoch": 0.608882627715578, "grad_norm": 0.3717069923877716, "learning_rate": 0.00011790947736934235, "loss": 0.5217, "step": 8212 }, { "epoch": 0.6089567731889968, "grad_norm": 0.3673045337200165, "learning_rate": 0.00011789947486871717, "loss": 0.5392, "step": 8213 }, { "epoch": 0.6090309186624157, "grad_norm": 0.3850369453430176, "learning_rate": 0.00011788947236809202, "loss": 0.5217, "step": 8214 }, { "epoch": 0.6091050641358345, "grad_norm": 0.33561620116233826, "learning_rate": 0.00011787946986746688, "loss": 0.503, "step": 8215 }, { "epoch": 0.6091792096092533, "grad_norm": 0.3583024740219116, "learning_rate": 0.00011786946736684173, "loss": 0.5146, "step": 8216 }, { "epoch": 0.6092533550826722, "grad_norm": 0.36735638976097107, "learning_rate": 0.00011785946486621655, "loss": 0.491, "step": 8217 }, { "epoch": 0.6093275005560911, "grad_norm": 0.34098899364471436, "learning_rate": 0.0001178494623655914, "loss": 0.473, "step": 8218 }, { "epoch": 0.6094016460295099, "grad_norm": 0.3567383885383606, "learning_rate": 0.00011783945986496625, "loss": 0.4886, "step": 8219 }, { "epoch": 0.6094757915029287, "grad_norm": 0.36457696557044983, "learning_rate": 0.0001178294573643411, "loss": 0.5089, "step": 8220 }, { "epoch": 0.6095499369763476, "grad_norm": 0.367279052734375, "learning_rate": 0.00011781945486371593, "loss": 0.5075, "step": 8221 }, { "epoch": 0.6096240824497664, "grad_norm": 0.36676305532455444, "learning_rate": 0.00011780945236309078, "loss": 0.4832, "step": 8222 }, { "epoch": 0.6096982279231853, "grad_norm": 0.3519074022769928, "learning_rate": 0.00011779944986246563, "loss": 0.499, "step": 8223 }, { "epoch": 0.6097723733966042, "grad_norm": 0.3674156069755554, "learning_rate": 0.00011778944736184045, "loss": 0.5074, "step": 8224 }, { "epoch": 0.609846518870023, "grad_norm": 0.36774691939353943, "learning_rate": 0.0001177794448612153, "loss": 0.4788, "step": 8225 }, { "epoch": 0.6099206643434418, "grad_norm": 0.3695976734161377, "learning_rate": 0.00011776944236059015, "loss": 0.5442, "step": 8226 }, { "epoch": 0.6099948098168607, "grad_norm": 0.3585420250892639, "learning_rate": 0.000117759439859965, "loss": 0.4694, "step": 8227 }, { "epoch": 0.6100689552902795, "grad_norm": 0.35638219118118286, "learning_rate": 0.00011774943735933983, "loss": 0.526, "step": 8228 }, { "epoch": 0.6101431007636984, "grad_norm": 0.3568047285079956, "learning_rate": 0.00011773943485871468, "loss": 0.5168, "step": 8229 }, { "epoch": 0.6102172462371173, "grad_norm": 0.32863375544548035, "learning_rate": 0.00011772943235808953, "loss": 0.4762, "step": 8230 }, { "epoch": 0.610291391710536, "grad_norm": 0.37218940258026123, "learning_rate": 0.00011771942985746438, "loss": 0.5004, "step": 8231 }, { "epoch": 0.6103655371839549, "grad_norm": 0.3945079743862152, "learning_rate": 0.00011770942735683921, "loss": 0.5161, "step": 8232 }, { "epoch": 0.6104396826573738, "grad_norm": 0.37575533986091614, "learning_rate": 0.00011769942485621405, "loss": 0.536, "step": 8233 }, { "epoch": 0.6105138281307926, "grad_norm": 0.3801383674144745, "learning_rate": 0.0001176894223555889, "loss": 0.5147, "step": 8234 }, { "epoch": 0.6105879736042115, "grad_norm": 0.3718646466732025, "learning_rate": 0.00011767941985496375, "loss": 0.5478, "step": 8235 }, { "epoch": 0.6106621190776303, "grad_norm": 0.3755801320075989, "learning_rate": 0.00011766941735433859, "loss": 0.5639, "step": 8236 }, { "epoch": 0.6107362645510491, "grad_norm": 0.37792232632637024, "learning_rate": 0.00011765941485371344, "loss": 0.5219, "step": 8237 }, { "epoch": 0.610810410024468, "grad_norm": 0.36045634746551514, "learning_rate": 0.00011764941235308828, "loss": 0.5248, "step": 8238 }, { "epoch": 0.6108845554978869, "grad_norm": 0.35408082604408264, "learning_rate": 0.00011763940985246311, "loss": 0.5089, "step": 8239 }, { "epoch": 0.6109587009713057, "grad_norm": 0.3510207533836365, "learning_rate": 0.00011762940735183797, "loss": 0.4875, "step": 8240 }, { "epoch": 0.6110328464447246, "grad_norm": 0.3424507677555084, "learning_rate": 0.00011761940485121282, "loss": 0.4868, "step": 8241 }, { "epoch": 0.6111069919181434, "grad_norm": 0.3569038510322571, "learning_rate": 0.00011760940235058767, "loss": 0.478, "step": 8242 }, { "epoch": 0.6111811373915622, "grad_norm": 0.34247830510139465, "learning_rate": 0.00011759939984996249, "loss": 0.4979, "step": 8243 }, { "epoch": 0.6112552828649811, "grad_norm": 0.3640533983707428, "learning_rate": 0.00011758939734933734, "loss": 0.4885, "step": 8244 }, { "epoch": 0.6113294283384, "grad_norm": 0.34785863757133484, "learning_rate": 0.00011757939484871219, "loss": 0.4736, "step": 8245 }, { "epoch": 0.6114035738118188, "grad_norm": 0.34521907567977905, "learning_rate": 0.00011756939234808704, "loss": 0.4703, "step": 8246 }, { "epoch": 0.6114777192852376, "grad_norm": 0.345096230506897, "learning_rate": 0.00011755938984746187, "loss": 0.4792, "step": 8247 }, { "epoch": 0.6115518647586565, "grad_norm": 0.3658061623573303, "learning_rate": 0.00011754938734683672, "loss": 0.5256, "step": 8248 }, { "epoch": 0.6116260102320753, "grad_norm": 0.3558577299118042, "learning_rate": 0.00011753938484621157, "loss": 0.486, "step": 8249 }, { "epoch": 0.6117001557054942, "grad_norm": 0.3741971254348755, "learning_rate": 0.00011752938234558639, "loss": 0.5043, "step": 8250 }, { "epoch": 0.6117743011789131, "grad_norm": 0.3885989487171173, "learning_rate": 0.00011751937984496124, "loss": 0.5154, "step": 8251 }, { "epoch": 0.6118484466523318, "grad_norm": 0.3576740026473999, "learning_rate": 0.00011750937734433609, "loss": 0.5101, "step": 8252 }, { "epoch": 0.6119225921257507, "grad_norm": 0.363230437040329, "learning_rate": 0.00011749937484371094, "loss": 0.4979, "step": 8253 }, { "epoch": 0.6119967375991696, "grad_norm": 0.3483096659183502, "learning_rate": 0.00011748937234308577, "loss": 0.5123, "step": 8254 }, { "epoch": 0.6120708830725884, "grad_norm": 0.3884086310863495, "learning_rate": 0.00011747936984246062, "loss": 0.5387, "step": 8255 }, { "epoch": 0.6121450285460073, "grad_norm": 0.37183451652526855, "learning_rate": 0.00011746936734183547, "loss": 0.5151, "step": 8256 }, { "epoch": 0.6122191740194262, "grad_norm": 0.3548058569431305, "learning_rate": 0.00011745936484121032, "loss": 0.5209, "step": 8257 }, { "epoch": 0.6122933194928449, "grad_norm": 0.37571847438812256, "learning_rate": 0.00011744936234058514, "loss": 0.5106, "step": 8258 }, { "epoch": 0.6123674649662638, "grad_norm": 0.3719201683998108, "learning_rate": 0.00011743935983995999, "loss": 0.5006, "step": 8259 }, { "epoch": 0.6124416104396827, "grad_norm": 0.3440748453140259, "learning_rate": 0.00011742935733933484, "loss": 0.486, "step": 8260 }, { "epoch": 0.6125157559131015, "grad_norm": 0.3677162528038025, "learning_rate": 0.00011741935483870967, "loss": 0.5074, "step": 8261 }, { "epoch": 0.6125899013865204, "grad_norm": 0.33774030208587646, "learning_rate": 0.00011740935233808452, "loss": 0.4527, "step": 8262 }, { "epoch": 0.6126640468599392, "grad_norm": 0.36563727259635925, "learning_rate": 0.00011739934983745937, "loss": 0.5235, "step": 8263 }, { "epoch": 0.612738192333358, "grad_norm": 0.380344033241272, "learning_rate": 0.00011738934733683422, "loss": 0.5334, "step": 8264 }, { "epoch": 0.6128123378067769, "grad_norm": 0.3771666884422302, "learning_rate": 0.00011737934483620906, "loss": 0.5301, "step": 8265 }, { "epoch": 0.6128864832801958, "grad_norm": 0.33436134457588196, "learning_rate": 0.00011736934233558389, "loss": 0.4737, "step": 8266 }, { "epoch": 0.6129606287536146, "grad_norm": 0.33163177967071533, "learning_rate": 0.00011735933983495874, "loss": 0.4729, "step": 8267 }, { "epoch": 0.6130347742270335, "grad_norm": 0.33720511198043823, "learning_rate": 0.0001173493373343336, "loss": 0.4872, "step": 8268 }, { "epoch": 0.6131089197004523, "grad_norm": 0.3593771457672119, "learning_rate": 0.00011733933483370843, "loss": 0.4755, "step": 8269 }, { "epoch": 0.6131830651738711, "grad_norm": 0.35649222135543823, "learning_rate": 0.00011732933233308328, "loss": 0.4993, "step": 8270 }, { "epoch": 0.61325721064729, "grad_norm": 0.35612136125564575, "learning_rate": 0.00011731932983245812, "loss": 0.4927, "step": 8271 }, { "epoch": 0.6133313561207089, "grad_norm": 0.33985358476638794, "learning_rate": 0.00011730932733183297, "loss": 0.4684, "step": 8272 }, { "epoch": 0.6134055015941277, "grad_norm": 0.36951738595962524, "learning_rate": 0.0001172993248312078, "loss": 0.5257, "step": 8273 }, { "epoch": 0.6134796470675465, "grad_norm": 0.3593910336494446, "learning_rate": 0.00011728932233058266, "loss": 0.5501, "step": 8274 }, { "epoch": 0.6135537925409654, "grad_norm": 0.37484708428382874, "learning_rate": 0.00011727931982995751, "loss": 0.5128, "step": 8275 }, { "epoch": 0.6136279380143842, "grad_norm": 0.37252455949783325, "learning_rate": 0.00011726931732933233, "loss": 0.5314, "step": 8276 }, { "epoch": 0.6137020834878031, "grad_norm": 0.3738322854042053, "learning_rate": 0.00011725931482870718, "loss": 0.5095, "step": 8277 }, { "epoch": 0.613776228961222, "grad_norm": 0.33926069736480713, "learning_rate": 0.00011724931232808203, "loss": 0.4828, "step": 8278 }, { "epoch": 0.6138503744346407, "grad_norm": 0.3757955729961395, "learning_rate": 0.00011723930982745688, "loss": 0.529, "step": 8279 }, { "epoch": 0.6139245199080596, "grad_norm": 0.3620207607746124, "learning_rate": 0.0001172293073268317, "loss": 0.5306, "step": 8280 }, { "epoch": 0.6139986653814785, "grad_norm": 0.3474735915660858, "learning_rate": 0.00011721930482620656, "loss": 0.4656, "step": 8281 }, { "epoch": 0.6140728108548973, "grad_norm": 0.35653412342071533, "learning_rate": 0.00011720930232558141, "loss": 0.4905, "step": 8282 }, { "epoch": 0.6141469563283162, "grad_norm": 0.38148176670074463, "learning_rate": 0.00011719929982495626, "loss": 0.5223, "step": 8283 }, { "epoch": 0.6142211018017351, "grad_norm": 0.37453293800354004, "learning_rate": 0.00011718929732433108, "loss": 0.5163, "step": 8284 }, { "epoch": 0.6142952472751538, "grad_norm": 0.3753186762332916, "learning_rate": 0.00011717929482370593, "loss": 0.5239, "step": 8285 }, { "epoch": 0.6143693927485727, "grad_norm": 0.35398995876312256, "learning_rate": 0.00011716929232308078, "loss": 0.4861, "step": 8286 }, { "epoch": 0.6144435382219915, "grad_norm": 0.34371083974838257, "learning_rate": 0.0001171592898224556, "loss": 0.4893, "step": 8287 }, { "epoch": 0.6145176836954104, "grad_norm": 0.34863194823265076, "learning_rate": 0.00011714928732183046, "loss": 0.5183, "step": 8288 }, { "epoch": 0.6145918291688293, "grad_norm": 0.35045865178108215, "learning_rate": 0.00011713928482120531, "loss": 0.4524, "step": 8289 }, { "epoch": 0.614665974642248, "grad_norm": 0.36006322503089905, "learning_rate": 0.00011712928232058016, "loss": 0.5182, "step": 8290 }, { "epoch": 0.6147401201156669, "grad_norm": 0.36271077394485474, "learning_rate": 0.00011711927981995498, "loss": 0.4666, "step": 8291 }, { "epoch": 0.6148142655890858, "grad_norm": 0.34928378462791443, "learning_rate": 0.00011710927731932983, "loss": 0.4857, "step": 8292 }, { "epoch": 0.6148884110625046, "grad_norm": 0.3503003716468811, "learning_rate": 0.00011709927481870468, "loss": 0.4904, "step": 8293 }, { "epoch": 0.6149625565359235, "grad_norm": 0.35839617252349854, "learning_rate": 0.00011708927231807953, "loss": 0.4982, "step": 8294 }, { "epoch": 0.6150367020093424, "grad_norm": 0.3701941668987274, "learning_rate": 0.00011707926981745436, "loss": 0.5328, "step": 8295 }, { "epoch": 0.6151108474827611, "grad_norm": 0.3761591613292694, "learning_rate": 0.00011706926731682921, "loss": 0.5039, "step": 8296 }, { "epoch": 0.61518499295618, "grad_norm": 0.351502001285553, "learning_rate": 0.00011705926481620406, "loss": 0.4974, "step": 8297 }, { "epoch": 0.6152591384295989, "grad_norm": 0.35760724544525146, "learning_rate": 0.00011704926231557891, "loss": 0.5145, "step": 8298 }, { "epoch": 0.6153332839030177, "grad_norm": 0.3518727719783783, "learning_rate": 0.00011703925981495373, "loss": 0.4978, "step": 8299 }, { "epoch": 0.6154074293764366, "grad_norm": 0.34871187806129456, "learning_rate": 0.00011702925731432858, "loss": 0.5107, "step": 8300 }, { "epoch": 0.6154815748498554, "grad_norm": 0.3699646592140198, "learning_rate": 0.00011701925481370343, "loss": 0.5593, "step": 8301 }, { "epoch": 0.6155557203232742, "grad_norm": 0.3801913261413574, "learning_rate": 0.00011700925231307827, "loss": 0.5288, "step": 8302 }, { "epoch": 0.6156298657966931, "grad_norm": 0.35110190510749817, "learning_rate": 0.00011699924981245312, "loss": 0.4884, "step": 8303 }, { "epoch": 0.615704011270112, "grad_norm": 0.35841798782348633, "learning_rate": 0.00011698924731182796, "loss": 0.5177, "step": 8304 }, { "epoch": 0.6157781567435308, "grad_norm": 0.3637603223323822, "learning_rate": 0.00011697924481120281, "loss": 0.5061, "step": 8305 }, { "epoch": 0.6158523022169496, "grad_norm": 0.3655757009983063, "learning_rate": 0.00011696924231057765, "loss": 0.5151, "step": 8306 }, { "epoch": 0.6159264476903685, "grad_norm": 0.3475775420665741, "learning_rate": 0.0001169592398099525, "loss": 0.475, "step": 8307 }, { "epoch": 0.6160005931637873, "grad_norm": 0.3446075916290283, "learning_rate": 0.00011694923730932735, "loss": 0.4616, "step": 8308 }, { "epoch": 0.6160747386372062, "grad_norm": 0.3583718538284302, "learning_rate": 0.00011693923480870219, "loss": 0.5076, "step": 8309 }, { "epoch": 0.6161488841106251, "grad_norm": 0.3522875905036926, "learning_rate": 0.00011692923230807702, "loss": 0.5327, "step": 8310 }, { "epoch": 0.6162230295840438, "grad_norm": 0.35220232605934143, "learning_rate": 0.00011691922980745187, "loss": 0.5141, "step": 8311 }, { "epoch": 0.6162971750574627, "grad_norm": 0.3587549328804016, "learning_rate": 0.00011690922730682672, "loss": 0.4931, "step": 8312 }, { "epoch": 0.6163713205308816, "grad_norm": 0.35740992426872253, "learning_rate": 0.00011689922480620155, "loss": 0.495, "step": 8313 }, { "epoch": 0.6164454660043004, "grad_norm": 0.3485647141933441, "learning_rate": 0.0001168892223055764, "loss": 0.4659, "step": 8314 }, { "epoch": 0.6165196114777193, "grad_norm": 0.368151992559433, "learning_rate": 0.00011687921980495125, "loss": 0.5021, "step": 8315 }, { "epoch": 0.6165937569511382, "grad_norm": 0.3655143082141876, "learning_rate": 0.0001168692173043261, "loss": 0.5008, "step": 8316 }, { "epoch": 0.6166679024245569, "grad_norm": 0.3705134689807892, "learning_rate": 0.00011685921480370092, "loss": 0.5082, "step": 8317 }, { "epoch": 0.6167420478979758, "grad_norm": 0.36215347051620483, "learning_rate": 0.00011684921230307577, "loss": 0.4985, "step": 8318 }, { "epoch": 0.6168161933713947, "grad_norm": 0.3688381612300873, "learning_rate": 0.00011683920980245062, "loss": 0.4977, "step": 8319 }, { "epoch": 0.6168903388448135, "grad_norm": 0.3566496670246124, "learning_rate": 0.00011682920730182547, "loss": 0.5216, "step": 8320 }, { "epoch": 0.6169644843182324, "grad_norm": 0.3340362310409546, "learning_rate": 0.0001168192048012003, "loss": 0.4735, "step": 8321 }, { "epoch": 0.6170386297916512, "grad_norm": 0.35981181263923645, "learning_rate": 0.00011680920230057515, "loss": 0.4446, "step": 8322 }, { "epoch": 0.61711277526507, "grad_norm": 0.40151727199554443, "learning_rate": 0.00011679919979995, "loss": 0.4902, "step": 8323 }, { "epoch": 0.6171869207384889, "grad_norm": 0.36098089814186096, "learning_rate": 0.00011678919729932482, "loss": 0.4751, "step": 8324 }, { "epoch": 0.6172610662119078, "grad_norm": 0.34175148606300354, "learning_rate": 0.00011677919479869967, "loss": 0.4661, "step": 8325 }, { "epoch": 0.6173352116853266, "grad_norm": 0.3434131145477295, "learning_rate": 0.00011676919229807452, "loss": 0.4898, "step": 8326 }, { "epoch": 0.6174093571587455, "grad_norm": 0.37028905749320984, "learning_rate": 0.00011675918979744937, "loss": 0.5249, "step": 8327 }, { "epoch": 0.6174835026321643, "grad_norm": 0.35237857699394226, "learning_rate": 0.0001167491872968242, "loss": 0.4943, "step": 8328 }, { "epoch": 0.6175576481055831, "grad_norm": 0.3522878885269165, "learning_rate": 0.00011673918479619905, "loss": 0.4853, "step": 8329 }, { "epoch": 0.617631793579002, "grad_norm": 0.38018786907196045, "learning_rate": 0.0001167291822955739, "loss": 0.5597, "step": 8330 }, { "epoch": 0.6177059390524209, "grad_norm": 0.3449271619319916, "learning_rate": 0.00011671917979494875, "loss": 0.4871, "step": 8331 }, { "epoch": 0.6177800845258397, "grad_norm": 0.36148300766944885, "learning_rate": 0.00011670917729432357, "loss": 0.507, "step": 8332 }, { "epoch": 0.6178542299992585, "grad_norm": 0.3610636293888092, "learning_rate": 0.00011669917479369842, "loss": 0.4811, "step": 8333 }, { "epoch": 0.6179283754726774, "grad_norm": 0.37070348858833313, "learning_rate": 0.00011668917229307328, "loss": 0.507, "step": 8334 }, { "epoch": 0.6180025209460962, "grad_norm": 0.3594962954521179, "learning_rate": 0.00011667916979244813, "loss": 0.4935, "step": 8335 }, { "epoch": 0.6180766664195151, "grad_norm": 0.38496068120002747, "learning_rate": 0.00011666916729182295, "loss": 0.5367, "step": 8336 }, { "epoch": 0.618150811892934, "grad_norm": 0.376351535320282, "learning_rate": 0.0001166591647911978, "loss": 0.5688, "step": 8337 }, { "epoch": 0.6182249573663527, "grad_norm": 0.36313262581825256, "learning_rate": 0.00011664916229057265, "loss": 0.4873, "step": 8338 }, { "epoch": 0.6182991028397716, "grad_norm": 0.3754861652851105, "learning_rate": 0.00011663915978994749, "loss": 0.519, "step": 8339 }, { "epoch": 0.6183732483131905, "grad_norm": 0.3705216944217682, "learning_rate": 0.00011662915728932234, "loss": 0.529, "step": 8340 }, { "epoch": 0.6184473937866093, "grad_norm": 0.3626951277256012, "learning_rate": 0.00011661915478869719, "loss": 0.493, "step": 8341 }, { "epoch": 0.6185215392600282, "grad_norm": 0.3489729166030884, "learning_rate": 0.00011660915228807203, "loss": 0.5215, "step": 8342 }, { "epoch": 0.6185956847334471, "grad_norm": 0.3469228446483612, "learning_rate": 0.00011659914978744686, "loss": 0.4794, "step": 8343 }, { "epoch": 0.6186698302068658, "grad_norm": 0.3534775674343109, "learning_rate": 0.00011658914728682171, "loss": 0.4983, "step": 8344 }, { "epoch": 0.6187439756802847, "grad_norm": 0.3525943458080292, "learning_rate": 0.00011657914478619656, "loss": 0.4781, "step": 8345 }, { "epoch": 0.6188181211537036, "grad_norm": 0.3996127247810364, "learning_rate": 0.00011656914228557142, "loss": 0.5646, "step": 8346 }, { "epoch": 0.6188922666271224, "grad_norm": 0.3375546336174011, "learning_rate": 0.00011655913978494624, "loss": 0.4844, "step": 8347 }, { "epoch": 0.6189664121005413, "grad_norm": 0.3461850881576538, "learning_rate": 0.00011654913728432109, "loss": 0.4803, "step": 8348 }, { "epoch": 0.6190405575739601, "grad_norm": 0.35010427236557007, "learning_rate": 0.00011653913478369594, "loss": 0.5211, "step": 8349 }, { "epoch": 0.6191147030473789, "grad_norm": 0.33639058470726013, "learning_rate": 0.00011652913228307076, "loss": 0.4917, "step": 8350 }, { "epoch": 0.6191888485207978, "grad_norm": 0.3705667555332184, "learning_rate": 0.00011651912978244561, "loss": 0.5167, "step": 8351 }, { "epoch": 0.6192629939942167, "grad_norm": 0.38036414980888367, "learning_rate": 0.00011650912728182046, "loss": 0.4733, "step": 8352 }, { "epoch": 0.6193371394676355, "grad_norm": 0.3547497093677521, "learning_rate": 0.00011649912478119532, "loss": 0.5138, "step": 8353 }, { "epoch": 0.6194112849410544, "grad_norm": 0.40701785683631897, "learning_rate": 0.00011648912228057014, "loss": 0.5243, "step": 8354 }, { "epoch": 0.6194854304144732, "grad_norm": 0.34735170006752014, "learning_rate": 0.00011647911977994499, "loss": 0.4678, "step": 8355 }, { "epoch": 0.619559575887892, "grad_norm": 0.3685932755470276, "learning_rate": 0.00011646911727931984, "loss": 0.518, "step": 8356 }, { "epoch": 0.6196337213613109, "grad_norm": 0.36401546001434326, "learning_rate": 0.00011645911477869469, "loss": 0.5094, "step": 8357 }, { "epoch": 0.6197078668347298, "grad_norm": 0.3860323131084442, "learning_rate": 0.00011644911227806951, "loss": 0.5213, "step": 8358 }, { "epoch": 0.6197820123081486, "grad_norm": 0.3562842607498169, "learning_rate": 0.00011643910977744437, "loss": 0.5035, "step": 8359 }, { "epoch": 0.6198561577815674, "grad_norm": 0.35403451323509216, "learning_rate": 0.00011642910727681922, "loss": 0.4956, "step": 8360 }, { "epoch": 0.6199303032549863, "grad_norm": 0.3562604486942291, "learning_rate": 0.00011641910477619404, "loss": 0.4761, "step": 8361 }, { "epoch": 0.6200044487284051, "grad_norm": 0.366136372089386, "learning_rate": 0.00011640910227556889, "loss": 0.4749, "step": 8362 }, { "epoch": 0.620078594201824, "grad_norm": 0.38078773021698, "learning_rate": 0.00011639909977494374, "loss": 0.5183, "step": 8363 }, { "epoch": 0.6201527396752429, "grad_norm": 0.36788907647132874, "learning_rate": 0.00011638909727431859, "loss": 0.4989, "step": 8364 }, { "epoch": 0.6202268851486616, "grad_norm": 0.35608598589897156, "learning_rate": 0.00011637909477369341, "loss": 0.4966, "step": 8365 }, { "epoch": 0.6203010306220805, "grad_norm": 0.3533947169780731, "learning_rate": 0.00011636909227306827, "loss": 0.4997, "step": 8366 }, { "epoch": 0.6203751760954994, "grad_norm": 0.36512961983680725, "learning_rate": 0.00011635908977244312, "loss": 0.5134, "step": 8367 }, { "epoch": 0.6204493215689182, "grad_norm": 0.36475250124931335, "learning_rate": 0.00011634908727181797, "loss": 0.4844, "step": 8368 }, { "epoch": 0.6205234670423371, "grad_norm": 0.3581561744213104, "learning_rate": 0.00011633908477119279, "loss": 0.5008, "step": 8369 }, { "epoch": 0.620597612515756, "grad_norm": 0.34931695461273193, "learning_rate": 0.00011632908227056764, "loss": 0.5186, "step": 8370 }, { "epoch": 0.6206717579891747, "grad_norm": 0.3723771572113037, "learning_rate": 0.00011631907976994249, "loss": 0.5334, "step": 8371 }, { "epoch": 0.6207459034625936, "grad_norm": 0.3665502965450287, "learning_rate": 0.00011630907726931734, "loss": 0.519, "step": 8372 }, { "epoch": 0.6208200489360125, "grad_norm": 0.33171895146369934, "learning_rate": 0.00011629907476869218, "loss": 0.4709, "step": 8373 }, { "epoch": 0.6208941944094313, "grad_norm": 0.3524007201194763, "learning_rate": 0.00011628907226806702, "loss": 0.5312, "step": 8374 }, { "epoch": 0.6209683398828502, "grad_norm": 0.3876904249191284, "learning_rate": 0.00011627906976744187, "loss": 0.5185, "step": 8375 }, { "epoch": 0.621042485356269, "grad_norm": 0.3468281924724579, "learning_rate": 0.0001162690672668167, "loss": 0.4735, "step": 8376 }, { "epoch": 0.6211166308296878, "grad_norm": 0.36500295996665955, "learning_rate": 0.00011625906476619155, "loss": 0.5408, "step": 8377 }, { "epoch": 0.6211907763031067, "grad_norm": 0.3572501242160797, "learning_rate": 0.0001162490622655664, "loss": 0.4977, "step": 8378 }, { "epoch": 0.6212649217765256, "grad_norm": 0.3440505564212799, "learning_rate": 0.00011623905976494126, "loss": 0.5113, "step": 8379 }, { "epoch": 0.6213390672499444, "grad_norm": 0.3743734359741211, "learning_rate": 0.00011622905726431608, "loss": 0.5376, "step": 8380 }, { "epoch": 0.6214132127233633, "grad_norm": 0.3510096073150635, "learning_rate": 0.00011621905476369093, "loss": 0.4921, "step": 8381 }, { "epoch": 0.6214873581967821, "grad_norm": 0.35507357120513916, "learning_rate": 0.00011620905226306578, "loss": 0.4809, "step": 8382 }, { "epoch": 0.6215615036702009, "grad_norm": 0.39122915267944336, "learning_rate": 0.00011619904976244063, "loss": 0.5686, "step": 8383 }, { "epoch": 0.6216356491436198, "grad_norm": 0.3695257306098938, "learning_rate": 0.00011618904726181546, "loss": 0.5213, "step": 8384 }, { "epoch": 0.6217097946170387, "grad_norm": 0.37680378556251526, "learning_rate": 0.0001161790447611903, "loss": 0.5039, "step": 8385 }, { "epoch": 0.6217839400904575, "grad_norm": 0.3439285159111023, "learning_rate": 0.00011616904226056516, "loss": 0.5055, "step": 8386 }, { "epoch": 0.6218580855638763, "grad_norm": 0.3609078824520111, "learning_rate": 0.00011615903975993998, "loss": 0.5266, "step": 8387 }, { "epoch": 0.6219322310372952, "grad_norm": 0.3545624315738678, "learning_rate": 0.00011614903725931483, "loss": 0.4972, "step": 8388 }, { "epoch": 0.622006376510714, "grad_norm": 0.34497684240341187, "learning_rate": 0.00011613903475868968, "loss": 0.5052, "step": 8389 }, { "epoch": 0.6220805219841329, "grad_norm": 0.3663181662559509, "learning_rate": 0.00011612903225806453, "loss": 0.5235, "step": 8390 }, { "epoch": 0.6221546674575518, "grad_norm": 0.35956504940986633, "learning_rate": 0.00011611902975743936, "loss": 0.5163, "step": 8391 }, { "epoch": 0.6222288129309705, "grad_norm": 0.3818526864051819, "learning_rate": 0.0001161090272568142, "loss": 0.4874, "step": 8392 }, { "epoch": 0.6223029584043894, "grad_norm": 0.35081490874290466, "learning_rate": 0.00011609902475618906, "loss": 0.5072, "step": 8393 }, { "epoch": 0.6223771038778083, "grad_norm": 0.3671087324619293, "learning_rate": 0.00011608902225556391, "loss": 0.4982, "step": 8394 }, { "epoch": 0.6224512493512271, "grad_norm": 0.34460604190826416, "learning_rate": 0.00011607901975493873, "loss": 0.4983, "step": 8395 }, { "epoch": 0.622525394824646, "grad_norm": 0.3716515600681305, "learning_rate": 0.00011606901725431358, "loss": 0.5033, "step": 8396 }, { "epoch": 0.6225995402980649, "grad_norm": 0.3912642300128937, "learning_rate": 0.00011605901475368843, "loss": 0.5115, "step": 8397 }, { "epoch": 0.6226736857714836, "grad_norm": 0.3324280083179474, "learning_rate": 0.00011604901225306326, "loss": 0.5009, "step": 8398 }, { "epoch": 0.6227478312449025, "grad_norm": 0.3685082495212555, "learning_rate": 0.0001160390097524381, "loss": 0.4827, "step": 8399 }, { "epoch": 0.6228219767183214, "grad_norm": 0.36343857645988464, "learning_rate": 0.00011602900725181296, "loss": 0.4847, "step": 8400 }, { "epoch": 0.6228961221917402, "grad_norm": 0.3755951523780823, "learning_rate": 0.00011601900475118781, "loss": 0.5405, "step": 8401 }, { "epoch": 0.6229702676651591, "grad_norm": 0.3425261080265045, "learning_rate": 0.00011600900225056263, "loss": 0.4667, "step": 8402 }, { "epoch": 0.6230444131385778, "grad_norm": 0.3598259389400482, "learning_rate": 0.00011599899974993748, "loss": 0.4923, "step": 8403 }, { "epoch": 0.6231185586119967, "grad_norm": 0.37001854181289673, "learning_rate": 0.00011598899724931233, "loss": 0.5426, "step": 8404 }, { "epoch": 0.6231927040854156, "grad_norm": 0.3416057527065277, "learning_rate": 0.00011597899474868718, "loss": 0.4605, "step": 8405 }, { "epoch": 0.6232668495588344, "grad_norm": 0.3645964562892914, "learning_rate": 0.00011596899224806202, "loss": 0.5028, "step": 8406 }, { "epoch": 0.6233409950322533, "grad_norm": 0.3696443736553192, "learning_rate": 0.00011595898974743686, "loss": 0.4948, "step": 8407 }, { "epoch": 0.6234151405056721, "grad_norm": 0.344199538230896, "learning_rate": 0.00011594898724681171, "loss": 0.4927, "step": 8408 }, { "epoch": 0.6234892859790909, "grad_norm": 0.37317216396331787, "learning_rate": 0.00011593898474618656, "loss": 0.5115, "step": 8409 }, { "epoch": 0.6235634314525098, "grad_norm": 0.38250547647476196, "learning_rate": 0.0001159289822455614, "loss": 0.5327, "step": 8410 }, { "epoch": 0.6236375769259287, "grad_norm": 0.38370078802108765, "learning_rate": 0.00011591897974493625, "loss": 0.5297, "step": 8411 }, { "epoch": 0.6237117223993475, "grad_norm": 0.35897183418273926, "learning_rate": 0.00011590897724431108, "loss": 0.5014, "step": 8412 }, { "epoch": 0.6237858678727664, "grad_norm": 0.33549046516418457, "learning_rate": 0.00011589897474368592, "loss": 0.4498, "step": 8413 }, { "epoch": 0.6238600133461852, "grad_norm": 0.36842966079711914, "learning_rate": 0.00011588897224306077, "loss": 0.5037, "step": 8414 }, { "epoch": 0.623934158819604, "grad_norm": 0.3595353364944458, "learning_rate": 0.00011587896974243562, "loss": 0.4912, "step": 8415 }, { "epoch": 0.6240083042930229, "grad_norm": 0.366303026676178, "learning_rate": 0.00011586896724181047, "loss": 0.5428, "step": 8416 }, { "epoch": 0.6240824497664418, "grad_norm": 0.3655880391597748, "learning_rate": 0.0001158589647411853, "loss": 0.5024, "step": 8417 }, { "epoch": 0.6241565952398606, "grad_norm": 0.36703839898109436, "learning_rate": 0.00011584896224056015, "loss": 0.5244, "step": 8418 }, { "epoch": 0.6242307407132794, "grad_norm": 0.37158626317977905, "learning_rate": 0.000115838959739935, "loss": 0.4986, "step": 8419 }, { "epoch": 0.6243048861866983, "grad_norm": 0.3945683240890503, "learning_rate": 0.00011582895723930985, "loss": 0.5165, "step": 8420 }, { "epoch": 0.6243790316601171, "grad_norm": 0.36980798840522766, "learning_rate": 0.00011581895473868467, "loss": 0.5083, "step": 8421 }, { "epoch": 0.624453177133536, "grad_norm": 0.3377360999584198, "learning_rate": 0.00011580895223805952, "loss": 0.4755, "step": 8422 }, { "epoch": 0.6245273226069549, "grad_norm": 0.34534239768981934, "learning_rate": 0.00011579894973743437, "loss": 0.5004, "step": 8423 }, { "epoch": 0.6246014680803736, "grad_norm": 0.3708827495574951, "learning_rate": 0.0001157889472368092, "loss": 0.5363, "step": 8424 }, { "epoch": 0.6246756135537925, "grad_norm": 0.35758236050605774, "learning_rate": 0.00011577894473618405, "loss": 0.5104, "step": 8425 }, { "epoch": 0.6247497590272114, "grad_norm": 0.3528095483779907, "learning_rate": 0.0001157689422355589, "loss": 0.5108, "step": 8426 }, { "epoch": 0.6248239045006302, "grad_norm": 0.36076200008392334, "learning_rate": 0.00011575893973493375, "loss": 0.4901, "step": 8427 }, { "epoch": 0.6248980499740491, "grad_norm": 0.33917292952537537, "learning_rate": 0.00011574893723430857, "loss": 0.4829, "step": 8428 }, { "epoch": 0.624972195447468, "grad_norm": 0.3403700292110443, "learning_rate": 0.00011573893473368342, "loss": 0.4967, "step": 8429 }, { "epoch": 0.6250463409208867, "grad_norm": 0.36879006028175354, "learning_rate": 0.00011572893223305827, "loss": 0.5253, "step": 8430 }, { "epoch": 0.6251204863943056, "grad_norm": 0.3511125147342682, "learning_rate": 0.00011571892973243312, "loss": 0.5284, "step": 8431 }, { "epoch": 0.6251946318677245, "grad_norm": 0.3475728929042816, "learning_rate": 0.00011570892723180795, "loss": 0.5131, "step": 8432 }, { "epoch": 0.6252687773411433, "grad_norm": 0.3601863980293274, "learning_rate": 0.0001156989247311828, "loss": 0.5106, "step": 8433 }, { "epoch": 0.6253429228145622, "grad_norm": 0.3675861954689026, "learning_rate": 0.00011568892223055765, "loss": 0.5207, "step": 8434 }, { "epoch": 0.625417068287981, "grad_norm": 0.35293033719062805, "learning_rate": 0.00011567891972993247, "loss": 0.52, "step": 8435 }, { "epoch": 0.6254912137613998, "grad_norm": 0.33178770542144775, "learning_rate": 0.00011566891722930732, "loss": 0.4305, "step": 8436 }, { "epoch": 0.6255653592348187, "grad_norm": 0.3700694441795349, "learning_rate": 0.00011565891472868217, "loss": 0.5034, "step": 8437 }, { "epoch": 0.6256395047082376, "grad_norm": 0.36240753531455994, "learning_rate": 0.00011564891222805702, "loss": 0.5231, "step": 8438 }, { "epoch": 0.6257136501816564, "grad_norm": 0.374180406332016, "learning_rate": 0.00011563890972743186, "loss": 0.4927, "step": 8439 }, { "epoch": 0.6257877956550753, "grad_norm": 0.3577299118041992, "learning_rate": 0.0001156289072268067, "loss": 0.4927, "step": 8440 }, { "epoch": 0.6258619411284941, "grad_norm": 0.3600868284702301, "learning_rate": 0.00011561890472618155, "loss": 0.5355, "step": 8441 }, { "epoch": 0.6259360866019129, "grad_norm": 0.3620564043521881, "learning_rate": 0.0001156089022255564, "loss": 0.4929, "step": 8442 }, { "epoch": 0.6260102320753318, "grad_norm": 0.35818830132484436, "learning_rate": 0.00011559889972493124, "loss": 0.4559, "step": 8443 }, { "epoch": 0.6260843775487507, "grad_norm": 0.3409063518047333, "learning_rate": 0.00011558889722430609, "loss": 0.4869, "step": 8444 }, { "epoch": 0.6261585230221695, "grad_norm": 0.35178399085998535, "learning_rate": 0.00011557889472368092, "loss": 0.4946, "step": 8445 }, { "epoch": 0.6262326684955883, "grad_norm": 0.3611736297607422, "learning_rate": 0.00011556889222305577, "loss": 0.4946, "step": 8446 }, { "epoch": 0.6263068139690072, "grad_norm": 0.35573840141296387, "learning_rate": 0.00011555888972243061, "loss": 0.5075, "step": 8447 }, { "epoch": 0.626380959442426, "grad_norm": 0.35107457637786865, "learning_rate": 0.00011554888722180546, "loss": 0.4974, "step": 8448 }, { "epoch": 0.6264551049158449, "grad_norm": 0.3467138111591339, "learning_rate": 0.00011553888472118031, "loss": 0.5023, "step": 8449 }, { "epoch": 0.6265292503892638, "grad_norm": 0.372183620929718, "learning_rate": 0.00011552888222055514, "loss": 0.5646, "step": 8450 }, { "epoch": 0.6266033958626825, "grad_norm": 0.3482043743133545, "learning_rate": 0.00011551887971992999, "loss": 0.4938, "step": 8451 }, { "epoch": 0.6266775413361014, "grad_norm": 0.37942245602607727, "learning_rate": 0.00011550887721930484, "loss": 0.5101, "step": 8452 }, { "epoch": 0.6267516868095203, "grad_norm": 0.345076322555542, "learning_rate": 0.00011549887471867969, "loss": 0.4977, "step": 8453 }, { "epoch": 0.6268258322829391, "grad_norm": 0.3656318187713623, "learning_rate": 0.00011548887221805451, "loss": 0.5251, "step": 8454 }, { "epoch": 0.626899977756358, "grad_norm": 0.36058375239372253, "learning_rate": 0.00011547886971742936, "loss": 0.5155, "step": 8455 }, { "epoch": 0.6269741232297769, "grad_norm": 0.3386649489402771, "learning_rate": 0.00011546886721680421, "loss": 0.4796, "step": 8456 }, { "epoch": 0.6270482687031956, "grad_norm": 0.3324510455131531, "learning_rate": 0.00011545886471617906, "loss": 0.4853, "step": 8457 }, { "epoch": 0.6271224141766145, "grad_norm": 0.36031001806259155, "learning_rate": 0.00011544886221555389, "loss": 0.4755, "step": 8458 }, { "epoch": 0.6271965596500334, "grad_norm": 0.34359845519065857, "learning_rate": 0.00011543885971492874, "loss": 0.4514, "step": 8459 }, { "epoch": 0.6272707051234522, "grad_norm": 0.3704281747341156, "learning_rate": 0.00011542885721430359, "loss": 0.5173, "step": 8460 }, { "epoch": 0.6273448505968711, "grad_norm": 0.36735621094703674, "learning_rate": 0.00011541885471367841, "loss": 0.5032, "step": 8461 }, { "epoch": 0.62741899607029, "grad_norm": 0.372186541557312, "learning_rate": 0.00011540885221305326, "loss": 0.5148, "step": 8462 }, { "epoch": 0.6274931415437087, "grad_norm": 0.3612196743488312, "learning_rate": 0.00011539884971242811, "loss": 0.5468, "step": 8463 }, { "epoch": 0.6275672870171276, "grad_norm": 0.3692563474178314, "learning_rate": 0.00011538884721180296, "loss": 0.5115, "step": 8464 }, { "epoch": 0.6276414324905465, "grad_norm": 0.33941885828971863, "learning_rate": 0.00011537884471117779, "loss": 0.4558, "step": 8465 }, { "epoch": 0.6277155779639653, "grad_norm": 0.3359280824661255, "learning_rate": 0.00011536884221055264, "loss": 0.492, "step": 8466 }, { "epoch": 0.6277897234373842, "grad_norm": 0.3495597243309021, "learning_rate": 0.00011535883970992749, "loss": 0.4689, "step": 8467 }, { "epoch": 0.627863868910803, "grad_norm": 0.37907660007476807, "learning_rate": 0.00011534883720930234, "loss": 0.592, "step": 8468 }, { "epoch": 0.6279380143842218, "grad_norm": 0.3452785909175873, "learning_rate": 0.00011533883470867716, "loss": 0.4765, "step": 8469 }, { "epoch": 0.6280121598576407, "grad_norm": 0.3542364239692688, "learning_rate": 0.00011532883220805201, "loss": 0.4544, "step": 8470 }, { "epoch": 0.6280863053310596, "grad_norm": 0.37085986137390137, "learning_rate": 0.00011531882970742686, "loss": 0.5013, "step": 8471 }, { "epoch": 0.6281604508044784, "grad_norm": 0.3511097729206085, "learning_rate": 0.0001153088272068017, "loss": 0.489, "step": 8472 }, { "epoch": 0.6282345962778972, "grad_norm": 0.36409991979599, "learning_rate": 0.00011529882470617654, "loss": 0.5146, "step": 8473 }, { "epoch": 0.6283087417513161, "grad_norm": 0.3547581136226654, "learning_rate": 0.00011528882220555139, "loss": 0.4726, "step": 8474 }, { "epoch": 0.6283828872247349, "grad_norm": 0.3486550450325012, "learning_rate": 0.00011527881970492624, "loss": 0.4877, "step": 8475 }, { "epoch": 0.6284570326981538, "grad_norm": 0.3728642761707306, "learning_rate": 0.00011526881720430108, "loss": 0.5514, "step": 8476 }, { "epoch": 0.6285311781715727, "grad_norm": 0.39242023229599, "learning_rate": 0.00011525881470367593, "loss": 0.5104, "step": 8477 }, { "epoch": 0.6286053236449914, "grad_norm": 0.39461472630500793, "learning_rate": 0.00011524881220305076, "loss": 0.5516, "step": 8478 }, { "epoch": 0.6286794691184103, "grad_norm": 0.36887574195861816, "learning_rate": 0.00011523880970242562, "loss": 0.5206, "step": 8479 }, { "epoch": 0.6287536145918292, "grad_norm": 0.3300490975379944, "learning_rate": 0.00011522880720180045, "loss": 0.4581, "step": 8480 }, { "epoch": 0.628827760065248, "grad_norm": 0.3809490501880646, "learning_rate": 0.0001152188047011753, "loss": 0.5124, "step": 8481 }, { "epoch": 0.6289019055386669, "grad_norm": 0.35068461298942566, "learning_rate": 0.00011520880220055015, "loss": 0.4983, "step": 8482 }, { "epoch": 0.6289760510120858, "grad_norm": 0.37140464782714844, "learning_rate": 0.00011519879969992499, "loss": 0.5023, "step": 8483 }, { "epoch": 0.6290501964855045, "grad_norm": 0.3713722825050354, "learning_rate": 0.00011518879719929983, "loss": 0.5145, "step": 8484 }, { "epoch": 0.6291243419589234, "grad_norm": 0.33465051651000977, "learning_rate": 0.00011517879469867468, "loss": 0.4686, "step": 8485 }, { "epoch": 0.6291984874323423, "grad_norm": 0.3501039743423462, "learning_rate": 0.00011516879219804953, "loss": 0.4781, "step": 8486 }, { "epoch": 0.6292726329057611, "grad_norm": 0.3639433681964874, "learning_rate": 0.00011515878969742435, "loss": 0.5121, "step": 8487 }, { "epoch": 0.62934677837918, "grad_norm": 0.37166091799736023, "learning_rate": 0.0001151487871967992, "loss": 0.5355, "step": 8488 }, { "epoch": 0.6294209238525988, "grad_norm": 0.36376944184303284, "learning_rate": 0.00011513878469617405, "loss": 0.4898, "step": 8489 }, { "epoch": 0.6294950693260176, "grad_norm": 0.322615385055542, "learning_rate": 0.0001151287821955489, "loss": 0.4708, "step": 8490 }, { "epoch": 0.6295692147994365, "grad_norm": 0.37492918968200684, "learning_rate": 0.00011511877969492373, "loss": 0.5541, "step": 8491 }, { "epoch": 0.6296433602728554, "grad_norm": 0.35733336210250854, "learning_rate": 0.00011510877719429858, "loss": 0.525, "step": 8492 }, { "epoch": 0.6297175057462742, "grad_norm": 0.3394257128238678, "learning_rate": 0.00011509877469367343, "loss": 0.4742, "step": 8493 }, { "epoch": 0.629791651219693, "grad_norm": 0.3702225089073181, "learning_rate": 0.00011508877219304828, "loss": 0.551, "step": 8494 }, { "epoch": 0.6298657966931119, "grad_norm": 0.3448406159877777, "learning_rate": 0.0001150787696924231, "loss": 0.5058, "step": 8495 }, { "epoch": 0.6299399421665307, "grad_norm": 0.3678189814090729, "learning_rate": 0.00011506876719179795, "loss": 0.5125, "step": 8496 }, { "epoch": 0.6300140876399496, "grad_norm": 0.3312377631664276, "learning_rate": 0.0001150587646911728, "loss": 0.4467, "step": 8497 }, { "epoch": 0.6300882331133685, "grad_norm": 0.3414095640182495, "learning_rate": 0.00011504876219054763, "loss": 0.4913, "step": 8498 }, { "epoch": 0.6301623785867873, "grad_norm": 0.37469542026519775, "learning_rate": 0.00011503875968992248, "loss": 0.4884, "step": 8499 }, { "epoch": 0.6302365240602061, "grad_norm": 0.34893855452537537, "learning_rate": 0.00011502875718929733, "loss": 0.4708, "step": 8500 }, { "epoch": 0.630310669533625, "grad_norm": 0.3620334267616272, "learning_rate": 0.00011501875468867218, "loss": 0.5154, "step": 8501 }, { "epoch": 0.6303848150070438, "grad_norm": 0.35932666063308716, "learning_rate": 0.000115008752188047, "loss": 0.5021, "step": 8502 }, { "epoch": 0.6304589604804627, "grad_norm": 0.396045982837677, "learning_rate": 0.00011499874968742185, "loss": 0.5581, "step": 8503 }, { "epoch": 0.6305331059538816, "grad_norm": 0.353211909532547, "learning_rate": 0.0001149887471867967, "loss": 0.4959, "step": 8504 }, { "epoch": 0.6306072514273003, "grad_norm": 0.348749041557312, "learning_rate": 0.00011497874468617156, "loss": 0.4962, "step": 8505 }, { "epoch": 0.6306813969007192, "grad_norm": 0.37598416209220886, "learning_rate": 0.00011496874218554638, "loss": 0.5164, "step": 8506 }, { "epoch": 0.6307555423741381, "grad_norm": 0.35771462321281433, "learning_rate": 0.00011495873968492123, "loss": 0.4979, "step": 8507 }, { "epoch": 0.6308296878475569, "grad_norm": 0.34528833627700806, "learning_rate": 0.00011494873718429608, "loss": 0.4952, "step": 8508 }, { "epoch": 0.6309038333209758, "grad_norm": 0.3530285656452179, "learning_rate": 0.00011493873468367092, "loss": 0.4743, "step": 8509 }, { "epoch": 0.6309779787943947, "grad_norm": 0.3538447916507721, "learning_rate": 0.00011492873218304577, "loss": 0.5224, "step": 8510 }, { "epoch": 0.6310521242678134, "grad_norm": 0.342552125453949, "learning_rate": 0.0001149187296824206, "loss": 0.49, "step": 8511 }, { "epoch": 0.6311262697412323, "grad_norm": 0.3652328550815582, "learning_rate": 0.00011490872718179546, "loss": 0.5607, "step": 8512 }, { "epoch": 0.6312004152146512, "grad_norm": 0.36539170145988464, "learning_rate": 0.0001148987246811703, "loss": 0.4963, "step": 8513 }, { "epoch": 0.63127456068807, "grad_norm": 0.3689567744731903, "learning_rate": 0.00011488872218054514, "loss": 0.4877, "step": 8514 }, { "epoch": 0.6313487061614889, "grad_norm": 0.3536539375782013, "learning_rate": 0.00011487871967992, "loss": 0.4648, "step": 8515 }, { "epoch": 0.6314228516349076, "grad_norm": 0.38889768719673157, "learning_rate": 0.00011486871717929483, "loss": 0.5891, "step": 8516 }, { "epoch": 0.6314969971083265, "grad_norm": 0.3571968674659729, "learning_rate": 0.00011485871467866967, "loss": 0.4965, "step": 8517 }, { "epoch": 0.6315711425817454, "grad_norm": 0.3685200810432434, "learning_rate": 0.00011484871217804452, "loss": 0.5466, "step": 8518 }, { "epoch": 0.6316452880551642, "grad_norm": 0.35270363092422485, "learning_rate": 0.00011483870967741937, "loss": 0.5126, "step": 8519 }, { "epoch": 0.6317194335285831, "grad_norm": 0.33763790130615234, "learning_rate": 0.00011482870717679422, "loss": 0.4922, "step": 8520 }, { "epoch": 0.631793579002002, "grad_norm": 0.36652758717536926, "learning_rate": 0.00011481870467616904, "loss": 0.5431, "step": 8521 }, { "epoch": 0.6318677244754207, "grad_norm": 0.32708975672721863, "learning_rate": 0.0001148087021755439, "loss": 0.4508, "step": 8522 }, { "epoch": 0.6319418699488396, "grad_norm": 0.3511047661304474, "learning_rate": 0.00011479869967491875, "loss": 0.5026, "step": 8523 }, { "epoch": 0.6320160154222585, "grad_norm": 0.3299635946750641, "learning_rate": 0.00011478869717429357, "loss": 0.5051, "step": 8524 }, { "epoch": 0.6320901608956773, "grad_norm": 0.34506168961524963, "learning_rate": 0.00011477869467366842, "loss": 0.4842, "step": 8525 }, { "epoch": 0.6321643063690962, "grad_norm": 0.35359811782836914, "learning_rate": 0.00011476869217304327, "loss": 0.5051, "step": 8526 }, { "epoch": 0.632238451842515, "grad_norm": 0.3781575858592987, "learning_rate": 0.00011475868967241812, "loss": 0.5391, "step": 8527 }, { "epoch": 0.6323125973159338, "grad_norm": 0.3451736271381378, "learning_rate": 0.00011474868717179294, "loss": 0.4999, "step": 8528 }, { "epoch": 0.6323867427893527, "grad_norm": 0.35996440052986145, "learning_rate": 0.0001147386846711678, "loss": 0.4966, "step": 8529 }, { "epoch": 0.6324608882627716, "grad_norm": 0.3638383746147156, "learning_rate": 0.00011472868217054265, "loss": 0.4955, "step": 8530 }, { "epoch": 0.6325350337361904, "grad_norm": 0.3668394982814789, "learning_rate": 0.0001147186796699175, "loss": 0.4935, "step": 8531 }, { "epoch": 0.6326091792096092, "grad_norm": 0.3456343114376068, "learning_rate": 0.00011470867716929232, "loss": 0.4992, "step": 8532 }, { "epoch": 0.6326833246830281, "grad_norm": 0.3446000814437866, "learning_rate": 0.00011469867466866717, "loss": 0.5061, "step": 8533 }, { "epoch": 0.6327574701564469, "grad_norm": 0.3786812424659729, "learning_rate": 0.00011468867216804202, "loss": 0.5654, "step": 8534 }, { "epoch": 0.6328316156298658, "grad_norm": 0.34761491417884827, "learning_rate": 0.00011467866966741684, "loss": 0.498, "step": 8535 }, { "epoch": 0.6329057611032847, "grad_norm": 0.3429306447505951, "learning_rate": 0.0001146686671667917, "loss": 0.5074, "step": 8536 }, { "epoch": 0.6329799065767034, "grad_norm": 0.36908158659935, "learning_rate": 0.00011465866466616655, "loss": 0.4953, "step": 8537 }, { "epoch": 0.6330540520501223, "grad_norm": 0.3597201108932495, "learning_rate": 0.0001146486621655414, "loss": 0.5273, "step": 8538 }, { "epoch": 0.6331281975235412, "grad_norm": 0.3524170219898224, "learning_rate": 0.00011463865966491622, "loss": 0.4787, "step": 8539 }, { "epoch": 0.63320234299696, "grad_norm": 0.3613862693309784, "learning_rate": 0.00011462865716429107, "loss": 0.5052, "step": 8540 }, { "epoch": 0.6332764884703789, "grad_norm": 0.3597727417945862, "learning_rate": 0.00011461865466366592, "loss": 0.4806, "step": 8541 }, { "epoch": 0.6333506339437978, "grad_norm": 0.3648364245891571, "learning_rate": 0.00011460865216304077, "loss": 0.5162, "step": 8542 }, { "epoch": 0.6334247794172165, "grad_norm": 0.34728604555130005, "learning_rate": 0.0001145986496624156, "loss": 0.5044, "step": 8543 }, { "epoch": 0.6334989248906354, "grad_norm": 0.37794405221939087, "learning_rate": 0.00011458864716179045, "loss": 0.5177, "step": 8544 }, { "epoch": 0.6335730703640543, "grad_norm": 0.39053115248680115, "learning_rate": 0.0001145786446611653, "loss": 0.5503, "step": 8545 }, { "epoch": 0.6336472158374731, "grad_norm": 0.3920425772666931, "learning_rate": 0.00011456864216054013, "loss": 0.5206, "step": 8546 }, { "epoch": 0.633721361310892, "grad_norm": 0.3842904567718506, "learning_rate": 0.00011455863965991498, "loss": 0.5503, "step": 8547 }, { "epoch": 0.6337955067843108, "grad_norm": 0.3734493851661682, "learning_rate": 0.00011454863715928984, "loss": 0.5345, "step": 8548 }, { "epoch": 0.6338696522577296, "grad_norm": 0.37330538034439087, "learning_rate": 0.00011453863465866467, "loss": 0.5431, "step": 8549 }, { "epoch": 0.6339437977311485, "grad_norm": 0.3204697370529175, "learning_rate": 0.00011452863215803951, "loss": 0.4456, "step": 8550 }, { "epoch": 0.6340179432045674, "grad_norm": 0.36485978960990906, "learning_rate": 0.00011451862965741436, "loss": 0.5326, "step": 8551 }, { "epoch": 0.6340920886779862, "grad_norm": 0.3575007915496826, "learning_rate": 0.00011450862715678921, "loss": 0.5287, "step": 8552 }, { "epoch": 0.634166234151405, "grad_norm": 0.3675742745399475, "learning_rate": 0.00011449862465616406, "loss": 0.5144, "step": 8553 }, { "epoch": 0.6342403796248239, "grad_norm": 0.3484244644641876, "learning_rate": 0.00011448862215553889, "loss": 0.51, "step": 8554 }, { "epoch": 0.6343145250982427, "grad_norm": 0.36545848846435547, "learning_rate": 0.00011447861965491374, "loss": 0.5112, "step": 8555 }, { "epoch": 0.6343886705716616, "grad_norm": 0.33882129192352295, "learning_rate": 0.00011446861715428859, "loss": 0.4977, "step": 8556 }, { "epoch": 0.6344628160450805, "grad_norm": 0.3541110157966614, "learning_rate": 0.00011445861465366344, "loss": 0.4918, "step": 8557 }, { "epoch": 0.6345369615184993, "grad_norm": 0.3639625906944275, "learning_rate": 0.00011444861215303826, "loss": 0.5, "step": 8558 }, { "epoch": 0.6346111069919181, "grad_norm": 0.3785148561000824, "learning_rate": 0.00011443860965241311, "loss": 0.5472, "step": 8559 }, { "epoch": 0.634685252465337, "grad_norm": 0.35763421654701233, "learning_rate": 0.00011442860715178796, "loss": 0.4737, "step": 8560 }, { "epoch": 0.6347593979387558, "grad_norm": 0.35302719473838806, "learning_rate": 0.00011441860465116279, "loss": 0.4962, "step": 8561 }, { "epoch": 0.6348335434121747, "grad_norm": 0.3719359338283539, "learning_rate": 0.00011440860215053764, "loss": 0.522, "step": 8562 }, { "epoch": 0.6349076888855936, "grad_norm": 0.36600959300994873, "learning_rate": 0.00011439859964991249, "loss": 0.4934, "step": 8563 }, { "epoch": 0.6349818343590123, "grad_norm": 0.3767015337944031, "learning_rate": 0.00011438859714928734, "loss": 0.5184, "step": 8564 }, { "epoch": 0.6350559798324312, "grad_norm": 0.373859167098999, "learning_rate": 0.00011437859464866216, "loss": 0.5111, "step": 8565 }, { "epoch": 0.6351301253058501, "grad_norm": 0.37160295248031616, "learning_rate": 0.00011436859214803701, "loss": 0.5258, "step": 8566 }, { "epoch": 0.6352042707792689, "grad_norm": 0.3478938043117523, "learning_rate": 0.00011435858964741186, "loss": 0.5159, "step": 8567 }, { "epoch": 0.6352784162526878, "grad_norm": 0.37537044286727905, "learning_rate": 0.00011434858714678671, "loss": 0.506, "step": 8568 }, { "epoch": 0.6353525617261067, "grad_norm": 0.3704380691051483, "learning_rate": 0.00011433858464616154, "loss": 0.5036, "step": 8569 }, { "epoch": 0.6354267071995254, "grad_norm": 0.35414639115333557, "learning_rate": 0.00011432858214553639, "loss": 0.5068, "step": 8570 }, { "epoch": 0.6355008526729443, "grad_norm": 0.3767618238925934, "learning_rate": 0.00011431857964491124, "loss": 0.5063, "step": 8571 }, { "epoch": 0.6355749981463632, "grad_norm": 0.3575684726238251, "learning_rate": 0.00011430857714428606, "loss": 0.479, "step": 8572 }, { "epoch": 0.635649143619782, "grad_norm": 0.3513491451740265, "learning_rate": 0.00011429857464366091, "loss": 0.467, "step": 8573 }, { "epoch": 0.6357232890932009, "grad_norm": 0.38135746121406555, "learning_rate": 0.00011428857214303576, "loss": 0.5441, "step": 8574 }, { "epoch": 0.6357974345666197, "grad_norm": 0.36668938398361206, "learning_rate": 0.00011427856964241061, "loss": 0.542, "step": 8575 }, { "epoch": 0.6358715800400385, "grad_norm": 0.352763295173645, "learning_rate": 0.00011426856714178544, "loss": 0.4891, "step": 8576 }, { "epoch": 0.6359457255134574, "grad_norm": 0.3983723819255829, "learning_rate": 0.00011425856464116029, "loss": 0.5314, "step": 8577 }, { "epoch": 0.6360198709868763, "grad_norm": 0.3463214635848999, "learning_rate": 0.00011424856214053514, "loss": 0.4847, "step": 8578 }, { "epoch": 0.6360940164602951, "grad_norm": 0.3537573516368866, "learning_rate": 0.00011423855963990999, "loss": 0.5027, "step": 8579 }, { "epoch": 0.636168161933714, "grad_norm": 0.3421398401260376, "learning_rate": 0.00011422855713928483, "loss": 0.4764, "step": 8580 }, { "epoch": 0.6362423074071328, "grad_norm": 0.3619804084300995, "learning_rate": 0.00011421855463865966, "loss": 0.5102, "step": 8581 }, { "epoch": 0.6363164528805516, "grad_norm": 0.34036973118782043, "learning_rate": 0.00011420855213803451, "loss": 0.4751, "step": 8582 }, { "epoch": 0.6363905983539705, "grad_norm": 0.34651997685432434, "learning_rate": 0.00011419854963740936, "loss": 0.513, "step": 8583 }, { "epoch": 0.6364647438273894, "grad_norm": 0.3386041820049286, "learning_rate": 0.0001141885471367842, "loss": 0.4842, "step": 8584 }, { "epoch": 0.6365388893008082, "grad_norm": 0.34565863013267517, "learning_rate": 0.00011417854463615905, "loss": 0.4947, "step": 8585 }, { "epoch": 0.636613034774227, "grad_norm": 0.3441994786262512, "learning_rate": 0.0001141685421355339, "loss": 0.4741, "step": 8586 }, { "epoch": 0.6366871802476459, "grad_norm": 0.3428966999053955, "learning_rate": 0.00011415853963490873, "loss": 0.4878, "step": 8587 }, { "epoch": 0.6367613257210647, "grad_norm": 0.34575551748275757, "learning_rate": 0.00011414853713428358, "loss": 0.4895, "step": 8588 }, { "epoch": 0.6368354711944836, "grad_norm": 0.36901870369911194, "learning_rate": 0.00011413853463365843, "loss": 0.5017, "step": 8589 }, { "epoch": 0.6369096166679025, "grad_norm": 0.34502509236335754, "learning_rate": 0.00011412853213303328, "loss": 0.4739, "step": 8590 }, { "epoch": 0.6369837621413212, "grad_norm": 0.3599104881286621, "learning_rate": 0.0001141185296324081, "loss": 0.517, "step": 8591 }, { "epoch": 0.6370579076147401, "grad_norm": 0.358948677778244, "learning_rate": 0.00011410852713178295, "loss": 0.4887, "step": 8592 }, { "epoch": 0.637132053088159, "grad_norm": 0.3449130952358246, "learning_rate": 0.0001140985246311578, "loss": 0.4735, "step": 8593 }, { "epoch": 0.6372061985615778, "grad_norm": 0.3587159216403961, "learning_rate": 0.00011408852213053265, "loss": 0.4733, "step": 8594 }, { "epoch": 0.6372803440349967, "grad_norm": 0.3623099625110626, "learning_rate": 0.00011407851962990748, "loss": 0.4988, "step": 8595 }, { "epoch": 0.6373544895084156, "grad_norm": 0.35852596163749695, "learning_rate": 0.00011406851712928233, "loss": 0.5056, "step": 8596 }, { "epoch": 0.6374286349818343, "grad_norm": 0.3697446584701538, "learning_rate": 0.00011405851462865718, "loss": 0.5397, "step": 8597 }, { "epoch": 0.6375027804552532, "grad_norm": 0.3551730215549469, "learning_rate": 0.000114048512128032, "loss": 0.5137, "step": 8598 }, { "epoch": 0.6375769259286721, "grad_norm": 0.35720905661582947, "learning_rate": 0.00011403850962740685, "loss": 0.5096, "step": 8599 }, { "epoch": 0.6376510714020909, "grad_norm": 0.3588992953300476, "learning_rate": 0.0001140285071267817, "loss": 0.4788, "step": 8600 }, { "epoch": 0.6377252168755098, "grad_norm": 0.35231345891952515, "learning_rate": 0.00011401850462615655, "loss": 0.488, "step": 8601 }, { "epoch": 0.6377993623489286, "grad_norm": 0.36798912286758423, "learning_rate": 0.00011400850212553138, "loss": 0.5099, "step": 8602 }, { "epoch": 0.6378735078223474, "grad_norm": 0.35780057311058044, "learning_rate": 0.00011399849962490623, "loss": 0.4774, "step": 8603 }, { "epoch": 0.6379476532957663, "grad_norm": 0.37037691473960876, "learning_rate": 0.00011398849712428108, "loss": 0.5303, "step": 8604 }, { "epoch": 0.6380217987691852, "grad_norm": 0.36044377088546753, "learning_rate": 0.00011397849462365593, "loss": 0.4866, "step": 8605 }, { "epoch": 0.638095944242604, "grad_norm": 0.3504844903945923, "learning_rate": 0.00011396849212303075, "loss": 0.5015, "step": 8606 }, { "epoch": 0.6381700897160228, "grad_norm": 0.34355267882347107, "learning_rate": 0.0001139584896224056, "loss": 0.4737, "step": 8607 }, { "epoch": 0.6382442351894417, "grad_norm": 0.37917834520339966, "learning_rate": 0.00011394848712178045, "loss": 0.5306, "step": 8608 }, { "epoch": 0.6383183806628605, "grad_norm": 0.3731840252876282, "learning_rate": 0.00011393848462115528, "loss": 0.5475, "step": 8609 }, { "epoch": 0.6383925261362794, "grad_norm": 0.3754374086856842, "learning_rate": 0.00011392848212053013, "loss": 0.5065, "step": 8610 }, { "epoch": 0.6384666716096983, "grad_norm": 0.3585868775844574, "learning_rate": 0.00011391847961990498, "loss": 0.4718, "step": 8611 }, { "epoch": 0.638540817083117, "grad_norm": 0.3930988907814026, "learning_rate": 0.00011390847711927983, "loss": 0.5359, "step": 8612 }, { "epoch": 0.6386149625565359, "grad_norm": 0.3655991852283478, "learning_rate": 0.00011389847461865467, "loss": 0.4959, "step": 8613 }, { "epoch": 0.6386891080299548, "grad_norm": 0.3770415782928467, "learning_rate": 0.0001138884721180295, "loss": 0.492, "step": 8614 }, { "epoch": 0.6387632535033736, "grad_norm": 0.37225469946861267, "learning_rate": 0.00011387846961740435, "loss": 0.5617, "step": 8615 }, { "epoch": 0.6388373989767925, "grad_norm": 0.3727419972419739, "learning_rate": 0.0001138684671167792, "loss": 0.5251, "step": 8616 }, { "epoch": 0.6389115444502114, "grad_norm": 0.35759884119033813, "learning_rate": 0.00011385846461615404, "loss": 0.4687, "step": 8617 }, { "epoch": 0.6389856899236301, "grad_norm": 0.3806547224521637, "learning_rate": 0.00011384846211552889, "loss": 0.4945, "step": 8618 }, { "epoch": 0.639059835397049, "grad_norm": 0.3492751121520996, "learning_rate": 0.00011383845961490373, "loss": 0.4523, "step": 8619 }, { "epoch": 0.6391339808704679, "grad_norm": 0.3319496810436249, "learning_rate": 0.00011382845711427858, "loss": 0.4732, "step": 8620 }, { "epoch": 0.6392081263438867, "grad_norm": 0.3559638261795044, "learning_rate": 0.00011381845461365342, "loss": 0.4663, "step": 8621 }, { "epoch": 0.6392822718173056, "grad_norm": 0.3483467996120453, "learning_rate": 0.00011380845211302827, "loss": 0.4741, "step": 8622 }, { "epoch": 0.6393564172907245, "grad_norm": 0.3801717162132263, "learning_rate": 0.00011379844961240312, "loss": 0.5117, "step": 8623 }, { "epoch": 0.6394305627641432, "grad_norm": 0.36324581503868103, "learning_rate": 0.00011378844711177794, "loss": 0.4936, "step": 8624 }, { "epoch": 0.6395047082375621, "grad_norm": 0.3499624729156494, "learning_rate": 0.00011377844461115279, "loss": 0.5147, "step": 8625 }, { "epoch": 0.639578853710981, "grad_norm": 0.37371328473091125, "learning_rate": 0.00011376844211052764, "loss": 0.5139, "step": 8626 }, { "epoch": 0.6396529991843998, "grad_norm": 0.36508145928382874, "learning_rate": 0.0001137584396099025, "loss": 0.5065, "step": 8627 }, { "epoch": 0.6397271446578187, "grad_norm": 0.388552725315094, "learning_rate": 0.00011374843710927732, "loss": 0.5528, "step": 8628 }, { "epoch": 0.6398012901312375, "grad_norm": 0.3512903153896332, "learning_rate": 0.00011373843460865217, "loss": 0.5154, "step": 8629 }, { "epoch": 0.6398754356046563, "grad_norm": 0.36566755175590515, "learning_rate": 0.00011372843210802702, "loss": 0.5202, "step": 8630 }, { "epoch": 0.6399495810780752, "grad_norm": 0.36935216188430786, "learning_rate": 0.00011371842960740187, "loss": 0.5348, "step": 8631 }, { "epoch": 0.640023726551494, "grad_norm": 0.38689088821411133, "learning_rate": 0.00011370842710677669, "loss": 0.5265, "step": 8632 }, { "epoch": 0.6400978720249129, "grad_norm": 0.33967265486717224, "learning_rate": 0.00011369842460615154, "loss": 0.4824, "step": 8633 }, { "epoch": 0.6401720174983317, "grad_norm": 0.3543209731578827, "learning_rate": 0.0001136884221055264, "loss": 0.5094, "step": 8634 }, { "epoch": 0.6402461629717505, "grad_norm": 0.34833288192749023, "learning_rate": 0.00011367841960490122, "loss": 0.5093, "step": 8635 }, { "epoch": 0.6403203084451694, "grad_norm": 0.3363930881023407, "learning_rate": 0.00011366841710427607, "loss": 0.4899, "step": 8636 }, { "epoch": 0.6403944539185883, "grad_norm": 0.3578992784023285, "learning_rate": 0.00011365841460365092, "loss": 0.5423, "step": 8637 }, { "epoch": 0.6404685993920071, "grad_norm": 0.33964598178863525, "learning_rate": 0.00011364841210302577, "loss": 0.4725, "step": 8638 }, { "epoch": 0.640542744865426, "grad_norm": 0.37360990047454834, "learning_rate": 0.0001136384096024006, "loss": 0.5226, "step": 8639 }, { "epoch": 0.6406168903388448, "grad_norm": 0.3370915353298187, "learning_rate": 0.00011362840710177544, "loss": 0.4989, "step": 8640 }, { "epoch": 0.6406910358122636, "grad_norm": 0.3602871596813202, "learning_rate": 0.0001136184046011503, "loss": 0.5082, "step": 8641 }, { "epoch": 0.6407651812856825, "grad_norm": 0.3530580401420593, "learning_rate": 0.00011360840210052515, "loss": 0.5235, "step": 8642 }, { "epoch": 0.6408393267591014, "grad_norm": 0.39710310101509094, "learning_rate": 0.00011359839959989997, "loss": 0.5646, "step": 8643 }, { "epoch": 0.6409134722325202, "grad_norm": 0.3627442419528961, "learning_rate": 0.00011358839709927482, "loss": 0.5426, "step": 8644 }, { "epoch": 0.640987617705939, "grad_norm": 0.35406482219696045, "learning_rate": 0.00011357839459864967, "loss": 0.4961, "step": 8645 }, { "epoch": 0.6410617631793579, "grad_norm": 0.3556644320487976, "learning_rate": 0.00011356839209802451, "loss": 0.5281, "step": 8646 }, { "epoch": 0.6411359086527767, "grad_norm": 0.330949604511261, "learning_rate": 0.00011355838959739934, "loss": 0.4503, "step": 8647 }, { "epoch": 0.6412100541261956, "grad_norm": 0.38053739070892334, "learning_rate": 0.0001135483870967742, "loss": 0.5731, "step": 8648 }, { "epoch": 0.6412841995996145, "grad_norm": 0.35202649235725403, "learning_rate": 0.00011353838459614905, "loss": 0.5077, "step": 8649 }, { "epoch": 0.6413583450730332, "grad_norm": 0.38397520780563354, "learning_rate": 0.00011352838209552388, "loss": 0.5512, "step": 8650 }, { "epoch": 0.6414324905464521, "grad_norm": 0.39772507548332214, "learning_rate": 0.00011351837959489873, "loss": 0.5425, "step": 8651 }, { "epoch": 0.641506636019871, "grad_norm": 0.35063982009887695, "learning_rate": 0.00011350837709427357, "loss": 0.503, "step": 8652 }, { "epoch": 0.6415807814932898, "grad_norm": 0.347277969121933, "learning_rate": 0.00011349837459364842, "loss": 0.4711, "step": 8653 }, { "epoch": 0.6416549269667087, "grad_norm": 0.3529090881347656, "learning_rate": 0.00011348837209302326, "loss": 0.4947, "step": 8654 }, { "epoch": 0.6417290724401276, "grad_norm": 0.36123350262641907, "learning_rate": 0.00011347836959239811, "loss": 0.5206, "step": 8655 }, { "epoch": 0.6418032179135463, "grad_norm": 0.3522661626338959, "learning_rate": 0.00011346836709177296, "loss": 0.4964, "step": 8656 }, { "epoch": 0.6418773633869652, "grad_norm": 0.3585779666900635, "learning_rate": 0.0001134583645911478, "loss": 0.5409, "step": 8657 }, { "epoch": 0.6419515088603841, "grad_norm": 0.3925395607948303, "learning_rate": 0.00011344836209052263, "loss": 0.5337, "step": 8658 }, { "epoch": 0.6420256543338029, "grad_norm": 0.3492332398891449, "learning_rate": 0.00011343835958989748, "loss": 0.4877, "step": 8659 }, { "epoch": 0.6420997998072218, "grad_norm": 0.36880505084991455, "learning_rate": 0.00011342835708927233, "loss": 0.5081, "step": 8660 }, { "epoch": 0.6421739452806406, "grad_norm": 0.3773365318775177, "learning_rate": 0.00011341835458864716, "loss": 0.4838, "step": 8661 }, { "epoch": 0.6422480907540594, "grad_norm": 0.3646014928817749, "learning_rate": 0.00011340835208802201, "loss": 0.5205, "step": 8662 }, { "epoch": 0.6423222362274783, "grad_norm": 0.354187548160553, "learning_rate": 0.00011339834958739686, "loss": 0.4768, "step": 8663 }, { "epoch": 0.6423963817008972, "grad_norm": 0.35873666405677795, "learning_rate": 0.00011338834708677171, "loss": 0.4962, "step": 8664 }, { "epoch": 0.642470527174316, "grad_norm": 0.349918395280838, "learning_rate": 0.00011337834458614653, "loss": 0.4926, "step": 8665 }, { "epoch": 0.6425446726477348, "grad_norm": 0.3511751890182495, "learning_rate": 0.00011336834208552138, "loss": 0.5099, "step": 8666 }, { "epoch": 0.6426188181211537, "grad_norm": 0.3473072052001953, "learning_rate": 0.00011335833958489624, "loss": 0.4869, "step": 8667 }, { "epoch": 0.6426929635945725, "grad_norm": 0.3656640648841858, "learning_rate": 0.00011334833708427109, "loss": 0.5094, "step": 8668 }, { "epoch": 0.6427671090679914, "grad_norm": 0.3612516522407532, "learning_rate": 0.00011333833458364591, "loss": 0.5315, "step": 8669 }, { "epoch": 0.6428412545414103, "grad_norm": 0.3342747092247009, "learning_rate": 0.00011332833208302076, "loss": 0.468, "step": 8670 }, { "epoch": 0.642915400014829, "grad_norm": 0.3631604015827179, "learning_rate": 0.00011331832958239561, "loss": 0.4953, "step": 8671 }, { "epoch": 0.6429895454882479, "grad_norm": 0.3484276533126831, "learning_rate": 0.00011330832708177043, "loss": 0.4925, "step": 8672 }, { "epoch": 0.6430636909616668, "grad_norm": 0.37336021661758423, "learning_rate": 0.00011329832458114528, "loss": 0.5384, "step": 8673 }, { "epoch": 0.6431378364350856, "grad_norm": 0.4031154215335846, "learning_rate": 0.00011328832208052014, "loss": 0.5262, "step": 8674 }, { "epoch": 0.6432119819085045, "grad_norm": 0.3668609857559204, "learning_rate": 0.00011327831957989499, "loss": 0.5078, "step": 8675 }, { "epoch": 0.6432861273819234, "grad_norm": 0.36139580607414246, "learning_rate": 0.00011326831707926981, "loss": 0.4845, "step": 8676 }, { "epoch": 0.6433602728553421, "grad_norm": 0.3333219587802887, "learning_rate": 0.00011325831457864466, "loss": 0.4613, "step": 8677 }, { "epoch": 0.643434418328761, "grad_norm": 0.3680759370326996, "learning_rate": 0.00011324831207801951, "loss": 0.5329, "step": 8678 }, { "epoch": 0.6435085638021799, "grad_norm": 0.3511950373649597, "learning_rate": 0.00011323830957739436, "loss": 0.4879, "step": 8679 }, { "epoch": 0.6435827092755987, "grad_norm": 0.3624907433986664, "learning_rate": 0.00011322830707676919, "loss": 0.5055, "step": 8680 }, { "epoch": 0.6436568547490176, "grad_norm": 0.33855676651000977, "learning_rate": 0.00011321830457614404, "loss": 0.4552, "step": 8681 }, { "epoch": 0.6437310002224365, "grad_norm": 0.3695961833000183, "learning_rate": 0.00011320830207551889, "loss": 0.4672, "step": 8682 }, { "epoch": 0.6438051456958552, "grad_norm": 0.35491207242012024, "learning_rate": 0.00011319829957489372, "loss": 0.5347, "step": 8683 }, { "epoch": 0.6438792911692741, "grad_norm": 0.37201201915740967, "learning_rate": 0.00011318829707426857, "loss": 0.5048, "step": 8684 }, { "epoch": 0.643953436642693, "grad_norm": 0.36937007308006287, "learning_rate": 0.00011317829457364341, "loss": 0.529, "step": 8685 }, { "epoch": 0.6440275821161118, "grad_norm": 0.3449699282646179, "learning_rate": 0.00011316829207301826, "loss": 0.4508, "step": 8686 }, { "epoch": 0.6441017275895307, "grad_norm": 0.37193652987480164, "learning_rate": 0.0001131582895723931, "loss": 0.4926, "step": 8687 }, { "epoch": 0.6441758730629495, "grad_norm": 0.37393519282341003, "learning_rate": 0.00011314828707176795, "loss": 0.5088, "step": 8688 }, { "epoch": 0.6442500185363683, "grad_norm": 0.3671988248825073, "learning_rate": 0.0001131382845711428, "loss": 0.5171, "step": 8689 }, { "epoch": 0.6443241640097872, "grad_norm": 0.3611290156841278, "learning_rate": 0.00011312828207051764, "loss": 0.4705, "step": 8690 }, { "epoch": 0.6443983094832061, "grad_norm": 0.3431844711303711, "learning_rate": 0.00011311827956989247, "loss": 0.4706, "step": 8691 }, { "epoch": 0.6444724549566249, "grad_norm": 0.3487251102924347, "learning_rate": 0.00011310827706926733, "loss": 0.4762, "step": 8692 }, { "epoch": 0.6445466004300437, "grad_norm": 0.3512471914291382, "learning_rate": 0.00011309827456864218, "loss": 0.4992, "step": 8693 }, { "epoch": 0.6446207459034626, "grad_norm": 0.36085161566734314, "learning_rate": 0.00011308827206801703, "loss": 0.4924, "step": 8694 }, { "epoch": 0.6446948913768814, "grad_norm": 0.3613933324813843, "learning_rate": 0.00011307826956739185, "loss": 0.4952, "step": 8695 }, { "epoch": 0.6447690368503003, "grad_norm": 0.34374257922172546, "learning_rate": 0.0001130682670667667, "loss": 0.4893, "step": 8696 }, { "epoch": 0.6448431823237192, "grad_norm": 0.3729201853275299, "learning_rate": 0.00011305826456614155, "loss": 0.5532, "step": 8697 }, { "epoch": 0.644917327797138, "grad_norm": 0.3521791994571686, "learning_rate": 0.00011304826206551637, "loss": 0.4736, "step": 8698 }, { "epoch": 0.6449914732705568, "grad_norm": 0.37622156739234924, "learning_rate": 0.00011303825956489123, "loss": 0.5292, "step": 8699 }, { "epoch": 0.6450656187439757, "grad_norm": 0.3837914764881134, "learning_rate": 0.00011302825706426608, "loss": 0.5528, "step": 8700 }, { "epoch": 0.6451397642173945, "grad_norm": 0.37295418977737427, "learning_rate": 0.00011301825456364093, "loss": 0.5408, "step": 8701 }, { "epoch": 0.6452139096908134, "grad_norm": 0.35978832840919495, "learning_rate": 0.00011300825206301575, "loss": 0.4756, "step": 8702 }, { "epoch": 0.6452880551642323, "grad_norm": 0.3555504381656647, "learning_rate": 0.0001129982495623906, "loss": 0.4933, "step": 8703 }, { "epoch": 0.645362200637651, "grad_norm": 0.35773980617523193, "learning_rate": 0.00011298824706176545, "loss": 0.5049, "step": 8704 }, { "epoch": 0.6454363461110699, "grad_norm": 0.3713650107383728, "learning_rate": 0.0001129782445611403, "loss": 0.5349, "step": 8705 }, { "epoch": 0.6455104915844888, "grad_norm": 0.35432180762290955, "learning_rate": 0.00011296824206051513, "loss": 0.5124, "step": 8706 }, { "epoch": 0.6455846370579076, "grad_norm": 0.36907848715782166, "learning_rate": 0.00011295823955988998, "loss": 0.5416, "step": 8707 }, { "epoch": 0.6456587825313265, "grad_norm": 0.3692152500152588, "learning_rate": 0.00011294823705926483, "loss": 0.5303, "step": 8708 }, { "epoch": 0.6457329280047454, "grad_norm": 0.3674774169921875, "learning_rate": 0.00011293823455863965, "loss": 0.5464, "step": 8709 }, { "epoch": 0.6458070734781641, "grad_norm": 0.3547026813030243, "learning_rate": 0.0001129282320580145, "loss": 0.5022, "step": 8710 }, { "epoch": 0.645881218951583, "grad_norm": 0.34941989183425903, "learning_rate": 0.00011291822955738935, "loss": 0.4699, "step": 8711 }, { "epoch": 0.6459553644250019, "grad_norm": 0.35410889983177185, "learning_rate": 0.0001129082270567642, "loss": 0.5192, "step": 8712 }, { "epoch": 0.6460295098984207, "grad_norm": 0.3634650409221649, "learning_rate": 0.00011289822455613903, "loss": 0.5298, "step": 8713 }, { "epoch": 0.6461036553718396, "grad_norm": 0.35354307293891907, "learning_rate": 0.00011288822205551388, "loss": 0.5065, "step": 8714 }, { "epoch": 0.6461778008452584, "grad_norm": 0.358790785074234, "learning_rate": 0.00011287821955488873, "loss": 0.5199, "step": 8715 }, { "epoch": 0.6462519463186772, "grad_norm": 0.36818942427635193, "learning_rate": 0.00011286821705426358, "loss": 0.502, "step": 8716 }, { "epoch": 0.6463260917920961, "grad_norm": 0.3336133360862732, "learning_rate": 0.00011285821455363841, "loss": 0.4436, "step": 8717 }, { "epoch": 0.646400237265515, "grad_norm": 0.3563005030155182, "learning_rate": 0.00011284821205301325, "loss": 0.5228, "step": 8718 }, { "epoch": 0.6464743827389338, "grad_norm": 0.3669237792491913, "learning_rate": 0.0001128382095523881, "loss": 0.5277, "step": 8719 }, { "epoch": 0.6465485282123526, "grad_norm": 0.3769887685775757, "learning_rate": 0.00011282820705176294, "loss": 0.5429, "step": 8720 }, { "epoch": 0.6466226736857715, "grad_norm": 0.3811766505241394, "learning_rate": 0.00011281820455113779, "loss": 0.5408, "step": 8721 }, { "epoch": 0.6466968191591903, "grad_norm": 0.3906998038291931, "learning_rate": 0.00011280820205051264, "loss": 0.5395, "step": 8722 }, { "epoch": 0.6467709646326092, "grad_norm": 0.34815046191215515, "learning_rate": 0.00011279819954988748, "loss": 0.4716, "step": 8723 }, { "epoch": 0.6468451101060281, "grad_norm": 0.37771451473236084, "learning_rate": 0.00011278819704926232, "loss": 0.5698, "step": 8724 }, { "epoch": 0.6469192555794469, "grad_norm": 0.36946359276771545, "learning_rate": 0.00011277819454863717, "loss": 0.5254, "step": 8725 }, { "epoch": 0.6469934010528657, "grad_norm": 0.3656352460384369, "learning_rate": 0.00011276819204801202, "loss": 0.5127, "step": 8726 }, { "epoch": 0.6470675465262846, "grad_norm": 0.37738558650016785, "learning_rate": 0.00011275818954738687, "loss": 0.5113, "step": 8727 }, { "epoch": 0.6471416919997034, "grad_norm": 0.34141790866851807, "learning_rate": 0.00011274818704676169, "loss": 0.4993, "step": 8728 }, { "epoch": 0.6472158374731223, "grad_norm": 0.34677934646606445, "learning_rate": 0.00011273818454613654, "loss": 0.4897, "step": 8729 }, { "epoch": 0.6472899829465412, "grad_norm": 0.3971535861492157, "learning_rate": 0.00011272818204551139, "loss": 0.5115, "step": 8730 }, { "epoch": 0.6473641284199599, "grad_norm": 0.35514310002326965, "learning_rate": 0.00011271817954488624, "loss": 0.5332, "step": 8731 }, { "epoch": 0.6474382738933788, "grad_norm": 0.37002032995224, "learning_rate": 0.00011270817704426107, "loss": 0.5285, "step": 8732 }, { "epoch": 0.6475124193667977, "grad_norm": 0.3608430027961731, "learning_rate": 0.00011269817454363592, "loss": 0.5235, "step": 8733 }, { "epoch": 0.6475865648402165, "grad_norm": 0.3518328368663788, "learning_rate": 0.00011268817204301077, "loss": 0.5123, "step": 8734 }, { "epoch": 0.6476607103136354, "grad_norm": 0.3678167164325714, "learning_rate": 0.00011267816954238559, "loss": 0.5114, "step": 8735 }, { "epoch": 0.6477348557870543, "grad_norm": 0.34989285469055176, "learning_rate": 0.00011266816704176044, "loss": 0.5104, "step": 8736 }, { "epoch": 0.647809001260473, "grad_norm": 0.3464055359363556, "learning_rate": 0.00011265816454113529, "loss": 0.5333, "step": 8737 }, { "epoch": 0.6478831467338919, "grad_norm": 0.3293551206588745, "learning_rate": 0.00011264816204051014, "loss": 0.4702, "step": 8738 }, { "epoch": 0.6479572922073108, "grad_norm": 0.3630637228488922, "learning_rate": 0.00011263815953988497, "loss": 0.5298, "step": 8739 }, { "epoch": 0.6480314376807296, "grad_norm": 0.3448040187358856, "learning_rate": 0.00011262815703925982, "loss": 0.4637, "step": 8740 }, { "epoch": 0.6481055831541485, "grad_norm": 0.3616679608821869, "learning_rate": 0.00011261815453863467, "loss": 0.4806, "step": 8741 }, { "epoch": 0.6481797286275673, "grad_norm": 0.3450203239917755, "learning_rate": 0.00011260815203800952, "loss": 0.4912, "step": 8742 }, { "epoch": 0.6482538741009861, "grad_norm": 0.3515965938568115, "learning_rate": 0.00011259814953738434, "loss": 0.5233, "step": 8743 }, { "epoch": 0.648328019574405, "grad_norm": 0.3954874575138092, "learning_rate": 0.00011258814703675919, "loss": 0.55, "step": 8744 }, { "epoch": 0.6484021650478238, "grad_norm": 0.3658466637134552, "learning_rate": 0.00011257814453613404, "loss": 0.4722, "step": 8745 }, { "epoch": 0.6484763105212427, "grad_norm": 0.35498085618019104, "learning_rate": 0.00011256814203550887, "loss": 0.4893, "step": 8746 }, { "epoch": 0.6485504559946615, "grad_norm": 0.33666056394577026, "learning_rate": 0.00011255813953488372, "loss": 0.4569, "step": 8747 }, { "epoch": 0.6486246014680803, "grad_norm": 0.3698987364768982, "learning_rate": 0.00011254813703425857, "loss": 0.5117, "step": 8748 }, { "epoch": 0.6486987469414992, "grad_norm": 0.3650628626346588, "learning_rate": 0.00011253813453363342, "loss": 0.5323, "step": 8749 }, { "epoch": 0.6487728924149181, "grad_norm": 0.3516225218772888, "learning_rate": 0.00011252813203300824, "loss": 0.4833, "step": 8750 }, { "epoch": 0.6488470378883369, "grad_norm": 0.37139463424682617, "learning_rate": 0.00011251812953238309, "loss": 0.5472, "step": 8751 }, { "epoch": 0.6489211833617557, "grad_norm": 0.3756784200668335, "learning_rate": 0.00011250812703175794, "loss": 0.5224, "step": 8752 }, { "epoch": 0.6489953288351746, "grad_norm": 0.3711493909358978, "learning_rate": 0.0001124981245311328, "loss": 0.518, "step": 8753 }, { "epoch": 0.6490694743085934, "grad_norm": 0.3484797477722168, "learning_rate": 0.00011248812203050763, "loss": 0.4699, "step": 8754 }, { "epoch": 0.6491436197820123, "grad_norm": 0.3970729112625122, "learning_rate": 0.00011247811952988248, "loss": 0.4983, "step": 8755 }, { "epoch": 0.6492177652554312, "grad_norm": 0.3411169648170471, "learning_rate": 0.00011246811702925732, "loss": 0.45, "step": 8756 }, { "epoch": 0.64929191072885, "grad_norm": 0.37162071466445923, "learning_rate": 0.00011245811452863216, "loss": 0.5673, "step": 8757 }, { "epoch": 0.6493660562022688, "grad_norm": 0.3651652932167053, "learning_rate": 0.000112448112028007, "loss": 0.5214, "step": 8758 }, { "epoch": 0.6494402016756877, "grad_norm": 0.3629673719406128, "learning_rate": 0.00011243810952738186, "loss": 0.4726, "step": 8759 }, { "epoch": 0.6495143471491065, "grad_norm": 0.34002062678337097, "learning_rate": 0.00011242810702675671, "loss": 0.4666, "step": 8760 }, { "epoch": 0.6495884926225254, "grad_norm": 0.37398332357406616, "learning_rate": 0.00011241810452613153, "loss": 0.4972, "step": 8761 }, { "epoch": 0.6496626380959443, "grad_norm": 0.3588859736919403, "learning_rate": 0.00011240810202550638, "loss": 0.4871, "step": 8762 }, { "epoch": 0.649736783569363, "grad_norm": 0.36923637986183167, "learning_rate": 0.00011239809952488123, "loss": 0.5052, "step": 8763 }, { "epoch": 0.6498109290427819, "grad_norm": 0.37160149216651917, "learning_rate": 0.00011238809702425608, "loss": 0.4907, "step": 8764 }, { "epoch": 0.6498850745162008, "grad_norm": 0.35067200660705566, "learning_rate": 0.00011237809452363091, "loss": 0.4793, "step": 8765 }, { "epoch": 0.6499592199896196, "grad_norm": 0.37577104568481445, "learning_rate": 0.00011236809202300576, "loss": 0.5009, "step": 8766 }, { "epoch": 0.6500333654630385, "grad_norm": 0.4190217852592468, "learning_rate": 0.00011235808952238061, "loss": 0.5648, "step": 8767 }, { "epoch": 0.6501075109364574, "grad_norm": 0.3503623604774475, "learning_rate": 0.00011234808702175546, "loss": 0.4839, "step": 8768 }, { "epoch": 0.6501816564098761, "grad_norm": 0.3815145790576935, "learning_rate": 0.00011233808452113028, "loss": 0.5214, "step": 8769 }, { "epoch": 0.650255801883295, "grad_norm": 0.35986751317977905, "learning_rate": 0.00011232808202050513, "loss": 0.483, "step": 8770 }, { "epoch": 0.6503299473567139, "grad_norm": 0.3779960870742798, "learning_rate": 0.00011231807951987998, "loss": 0.5027, "step": 8771 }, { "epoch": 0.6504040928301327, "grad_norm": 0.37749621272087097, "learning_rate": 0.00011230807701925481, "loss": 0.5143, "step": 8772 }, { "epoch": 0.6504782383035516, "grad_norm": 0.3725900650024414, "learning_rate": 0.00011229807451862966, "loss": 0.5254, "step": 8773 }, { "epoch": 0.6505523837769704, "grad_norm": 0.35010913014411926, "learning_rate": 0.00011228807201800451, "loss": 0.467, "step": 8774 }, { "epoch": 0.6506265292503892, "grad_norm": 0.36542168259620667, "learning_rate": 0.00011227806951737936, "loss": 0.5047, "step": 8775 }, { "epoch": 0.6507006747238081, "grad_norm": 0.3610917925834656, "learning_rate": 0.00011226806701675418, "loss": 0.4837, "step": 8776 }, { "epoch": 0.650774820197227, "grad_norm": 0.3784610331058502, "learning_rate": 0.00011225806451612903, "loss": 0.4746, "step": 8777 }, { "epoch": 0.6508489656706458, "grad_norm": 0.3740015923976898, "learning_rate": 0.00011224806201550388, "loss": 0.5419, "step": 8778 }, { "epoch": 0.6509231111440646, "grad_norm": 0.33691391348838806, "learning_rate": 0.00011223805951487873, "loss": 0.4835, "step": 8779 }, { "epoch": 0.6509972566174835, "grad_norm": 0.3607368767261505, "learning_rate": 0.00011222805701425356, "loss": 0.463, "step": 8780 }, { "epoch": 0.6510714020909023, "grad_norm": 0.3452506363391876, "learning_rate": 0.00011221805451362841, "loss": 0.4852, "step": 8781 }, { "epoch": 0.6511455475643212, "grad_norm": 0.36794009804725647, "learning_rate": 0.00011220805201300326, "loss": 0.4665, "step": 8782 }, { "epoch": 0.6512196930377401, "grad_norm": 0.3399165868759155, "learning_rate": 0.00011219804951237808, "loss": 0.4966, "step": 8783 }, { "epoch": 0.6512938385111589, "grad_norm": 0.3407506048679352, "learning_rate": 0.00011218804701175293, "loss": 0.4707, "step": 8784 }, { "epoch": 0.6513679839845777, "grad_norm": 0.3934791684150696, "learning_rate": 0.00011217804451112778, "loss": 0.5068, "step": 8785 }, { "epoch": 0.6514421294579966, "grad_norm": 0.37941911816596985, "learning_rate": 0.00011216804201050263, "loss": 0.5394, "step": 8786 }, { "epoch": 0.6515162749314154, "grad_norm": 0.4052014648914337, "learning_rate": 0.00011215803950987747, "loss": 0.5553, "step": 8787 }, { "epoch": 0.6515904204048343, "grad_norm": 0.3900722861289978, "learning_rate": 0.00011214803700925231, "loss": 0.5403, "step": 8788 }, { "epoch": 0.6516645658782532, "grad_norm": 0.3448120355606079, "learning_rate": 0.00011213803450862716, "loss": 0.4953, "step": 8789 }, { "epoch": 0.6517387113516719, "grad_norm": 0.3834049105644226, "learning_rate": 0.00011212803200800201, "loss": 0.5597, "step": 8790 }, { "epoch": 0.6518128568250908, "grad_norm": 0.35630419850349426, "learning_rate": 0.00011211802950737685, "loss": 0.4849, "step": 8791 }, { "epoch": 0.6518870022985097, "grad_norm": 0.35686635971069336, "learning_rate": 0.0001121080270067517, "loss": 0.4904, "step": 8792 }, { "epoch": 0.6519611477719285, "grad_norm": 0.3651827871799469, "learning_rate": 0.00011209802450612655, "loss": 0.4901, "step": 8793 }, { "epoch": 0.6520352932453474, "grad_norm": 0.36229223012924194, "learning_rate": 0.00011208802200550137, "loss": 0.5021, "step": 8794 }, { "epoch": 0.6521094387187663, "grad_norm": 0.3551543951034546, "learning_rate": 0.00011207801950487622, "loss": 0.5058, "step": 8795 }, { "epoch": 0.652183584192185, "grad_norm": 0.34893879294395447, "learning_rate": 0.00011206801700425107, "loss": 0.4861, "step": 8796 }, { "epoch": 0.6522577296656039, "grad_norm": 0.35652342438697815, "learning_rate": 0.00011205801450362592, "loss": 0.4943, "step": 8797 }, { "epoch": 0.6523318751390228, "grad_norm": 0.4122098982334137, "learning_rate": 0.00011204801200300075, "loss": 0.5736, "step": 8798 }, { "epoch": 0.6524060206124416, "grad_norm": 0.3589245080947876, "learning_rate": 0.0001120380095023756, "loss": 0.499, "step": 8799 }, { "epoch": 0.6524801660858605, "grad_norm": 0.35003408789634705, "learning_rate": 0.00011202800700175045, "loss": 0.5077, "step": 8800 }, { "epoch": 0.6525543115592793, "grad_norm": 0.3802425265312195, "learning_rate": 0.0001120180045011253, "loss": 0.5218, "step": 8801 }, { "epoch": 0.6526284570326981, "grad_norm": 0.37719908356666565, "learning_rate": 0.00011200800200050012, "loss": 0.5196, "step": 8802 }, { "epoch": 0.652702602506117, "grad_norm": 0.3826882541179657, "learning_rate": 0.00011199799949987497, "loss": 0.5382, "step": 8803 }, { "epoch": 0.6527767479795359, "grad_norm": 0.36548125743865967, "learning_rate": 0.00011198799699924982, "loss": 0.4908, "step": 8804 }, { "epoch": 0.6528508934529547, "grad_norm": 0.361351877450943, "learning_rate": 0.00011197799449862468, "loss": 0.535, "step": 8805 }, { "epoch": 0.6529250389263735, "grad_norm": 0.34502196311950684, "learning_rate": 0.0001119679919979995, "loss": 0.4704, "step": 8806 }, { "epoch": 0.6529991843997924, "grad_norm": 0.3773176372051239, "learning_rate": 0.00011195798949737435, "loss": 0.5444, "step": 8807 }, { "epoch": 0.6530733298732112, "grad_norm": 0.34615442156791687, "learning_rate": 0.0001119479869967492, "loss": 0.5021, "step": 8808 }, { "epoch": 0.6531474753466301, "grad_norm": 0.36163225769996643, "learning_rate": 0.00011193798449612402, "loss": 0.5001, "step": 8809 }, { "epoch": 0.653221620820049, "grad_norm": 0.359266459941864, "learning_rate": 0.00011192798199549887, "loss": 0.5413, "step": 8810 }, { "epoch": 0.6532957662934678, "grad_norm": 0.3503558337688446, "learning_rate": 0.00011191797949487372, "loss": 0.4885, "step": 8811 }, { "epoch": 0.6533699117668866, "grad_norm": 0.35349828004837036, "learning_rate": 0.00011190797699424858, "loss": 0.4942, "step": 8812 }, { "epoch": 0.6534440572403055, "grad_norm": 0.34654149413108826, "learning_rate": 0.0001118979744936234, "loss": 0.4644, "step": 8813 }, { "epoch": 0.6535182027137243, "grad_norm": 0.35663461685180664, "learning_rate": 0.00011188797199299825, "loss": 0.5174, "step": 8814 }, { "epoch": 0.6535923481871432, "grad_norm": 0.35626184940338135, "learning_rate": 0.0001118779694923731, "loss": 0.4957, "step": 8815 }, { "epoch": 0.6536664936605621, "grad_norm": 0.3407404124736786, "learning_rate": 0.00011186796699174795, "loss": 0.4829, "step": 8816 }, { "epoch": 0.6537406391339808, "grad_norm": 0.3679596185684204, "learning_rate": 0.00011185796449112277, "loss": 0.4678, "step": 8817 }, { "epoch": 0.6538147846073997, "grad_norm": 0.3394465446472168, "learning_rate": 0.00011184796199049763, "loss": 0.4639, "step": 8818 }, { "epoch": 0.6538889300808186, "grad_norm": 0.35940682888031006, "learning_rate": 0.00011183795948987248, "loss": 0.5081, "step": 8819 }, { "epoch": 0.6539630755542374, "grad_norm": 0.3714331090450287, "learning_rate": 0.00011182795698924731, "loss": 0.5108, "step": 8820 }, { "epoch": 0.6540372210276563, "grad_norm": 0.3586554527282715, "learning_rate": 0.00011181795448862215, "loss": 0.5294, "step": 8821 }, { "epoch": 0.6541113665010752, "grad_norm": 0.36842918395996094, "learning_rate": 0.000111807951987997, "loss": 0.5198, "step": 8822 }, { "epoch": 0.6541855119744939, "grad_norm": 0.36762699484825134, "learning_rate": 0.00011179794948737185, "loss": 0.4891, "step": 8823 }, { "epoch": 0.6542596574479128, "grad_norm": 0.37013375759124756, "learning_rate": 0.00011178794698674669, "loss": 0.5117, "step": 8824 }, { "epoch": 0.6543338029213317, "grad_norm": 0.3580760657787323, "learning_rate": 0.00011177794448612154, "loss": 0.4927, "step": 8825 }, { "epoch": 0.6544079483947505, "grad_norm": 0.3462485074996948, "learning_rate": 0.00011176794198549638, "loss": 0.4798, "step": 8826 }, { "epoch": 0.6544820938681694, "grad_norm": 0.34846681356430054, "learning_rate": 0.00011175793948487123, "loss": 0.5127, "step": 8827 }, { "epoch": 0.6545562393415882, "grad_norm": 0.40487539768218994, "learning_rate": 0.00011174793698424606, "loss": 0.5682, "step": 8828 }, { "epoch": 0.654630384815007, "grad_norm": 0.37789034843444824, "learning_rate": 0.00011173793448362091, "loss": 0.5258, "step": 8829 }, { "epoch": 0.6547045302884259, "grad_norm": 0.3491688072681427, "learning_rate": 0.00011172793198299577, "loss": 0.5045, "step": 8830 }, { "epoch": 0.6547786757618448, "grad_norm": 0.3529582619667053, "learning_rate": 0.00011171792948237062, "loss": 0.4739, "step": 8831 }, { "epoch": 0.6548528212352636, "grad_norm": 0.37320762872695923, "learning_rate": 0.00011170792698174544, "loss": 0.5267, "step": 8832 }, { "epoch": 0.6549269667086824, "grad_norm": 0.3600004315376282, "learning_rate": 0.00011169792448112029, "loss": 0.5081, "step": 8833 }, { "epoch": 0.6550011121821013, "grad_norm": 0.3721366226673126, "learning_rate": 0.00011168792198049514, "loss": 0.5151, "step": 8834 }, { "epoch": 0.6550752576555201, "grad_norm": 0.35195451974868774, "learning_rate": 0.00011167791947986996, "loss": 0.4692, "step": 8835 }, { "epoch": 0.655149403128939, "grad_norm": 0.3513534963130951, "learning_rate": 0.00011166791697924481, "loss": 0.5012, "step": 8836 }, { "epoch": 0.6552235486023579, "grad_norm": 0.33282706141471863, "learning_rate": 0.00011165791447861967, "loss": 0.4517, "step": 8837 }, { "epoch": 0.6552976940757766, "grad_norm": 0.3362390100955963, "learning_rate": 0.00011164791197799452, "loss": 0.4871, "step": 8838 }, { "epoch": 0.6553718395491955, "grad_norm": 0.3526909053325653, "learning_rate": 0.00011163790947736934, "loss": 0.5083, "step": 8839 }, { "epoch": 0.6554459850226144, "grad_norm": 0.3435160219669342, "learning_rate": 0.00011162790697674419, "loss": 0.5207, "step": 8840 }, { "epoch": 0.6555201304960332, "grad_norm": 0.3516910970211029, "learning_rate": 0.00011161790447611904, "loss": 0.4897, "step": 8841 }, { "epoch": 0.6555942759694521, "grad_norm": 0.33880719542503357, "learning_rate": 0.00011160790197549389, "loss": 0.4649, "step": 8842 }, { "epoch": 0.655668421442871, "grad_norm": 0.37497150897979736, "learning_rate": 0.00011159789947486871, "loss": 0.5301, "step": 8843 }, { "epoch": 0.6557425669162897, "grad_norm": 0.34401974081993103, "learning_rate": 0.00011158789697424357, "loss": 0.478, "step": 8844 }, { "epoch": 0.6558167123897086, "grad_norm": 0.3666244149208069, "learning_rate": 0.00011157789447361842, "loss": 0.5013, "step": 8845 }, { "epoch": 0.6558908578631275, "grad_norm": 0.35361531376838684, "learning_rate": 0.00011156789197299324, "loss": 0.5195, "step": 8846 }, { "epoch": 0.6559650033365463, "grad_norm": 0.35288700461387634, "learning_rate": 0.00011155788947236809, "loss": 0.4929, "step": 8847 }, { "epoch": 0.6560391488099652, "grad_norm": 0.3524412214756012, "learning_rate": 0.00011154788697174294, "loss": 0.4951, "step": 8848 }, { "epoch": 0.656113294283384, "grad_norm": 0.36852696537971497, "learning_rate": 0.00011153788447111779, "loss": 0.5626, "step": 8849 }, { "epoch": 0.6561874397568028, "grad_norm": 0.3558533787727356, "learning_rate": 0.00011152788197049262, "loss": 0.4975, "step": 8850 }, { "epoch": 0.6562615852302217, "grad_norm": 0.32742249965667725, "learning_rate": 0.00011151787946986747, "loss": 0.4437, "step": 8851 }, { "epoch": 0.6563357307036406, "grad_norm": 0.3519536256790161, "learning_rate": 0.00011150787696924232, "loss": 0.4947, "step": 8852 }, { "epoch": 0.6564098761770594, "grad_norm": 0.38615652918815613, "learning_rate": 0.00011149787446861717, "loss": 0.5468, "step": 8853 }, { "epoch": 0.6564840216504783, "grad_norm": 0.3419593870639801, "learning_rate": 0.00011148787196799199, "loss": 0.4768, "step": 8854 }, { "epoch": 0.6565581671238971, "grad_norm": 0.355864554643631, "learning_rate": 0.00011147786946736684, "loss": 0.5201, "step": 8855 }, { "epoch": 0.6566323125973159, "grad_norm": 0.3362215459346771, "learning_rate": 0.00011146786696674169, "loss": 0.448, "step": 8856 }, { "epoch": 0.6567064580707348, "grad_norm": 0.3760545551776886, "learning_rate": 0.00011145786446611653, "loss": 0.5353, "step": 8857 }, { "epoch": 0.6567806035441536, "grad_norm": 0.36659833788871765, "learning_rate": 0.00011144786196549138, "loss": 0.5008, "step": 8858 }, { "epoch": 0.6568547490175725, "grad_norm": 0.35590505599975586, "learning_rate": 0.00011143785946486622, "loss": 0.4683, "step": 8859 }, { "epoch": 0.6569288944909913, "grad_norm": 0.36580368876457214, "learning_rate": 0.00011142785696424107, "loss": 0.4978, "step": 8860 }, { "epoch": 0.6570030399644101, "grad_norm": 0.3552360534667969, "learning_rate": 0.0001114178544636159, "loss": 0.4988, "step": 8861 }, { "epoch": 0.657077185437829, "grad_norm": 0.3455599248409271, "learning_rate": 0.00011140785196299076, "loss": 0.4823, "step": 8862 }, { "epoch": 0.6571513309112479, "grad_norm": 0.3997443914413452, "learning_rate": 0.0001113978494623656, "loss": 0.4941, "step": 8863 }, { "epoch": 0.6572254763846667, "grad_norm": 0.3828672766685486, "learning_rate": 0.00011138784696174044, "loss": 0.533, "step": 8864 }, { "epoch": 0.6572996218580855, "grad_norm": 0.36641645431518555, "learning_rate": 0.00011137784446111528, "loss": 0.4765, "step": 8865 }, { "epoch": 0.6573737673315044, "grad_norm": 0.38653838634490967, "learning_rate": 0.00011136784196049013, "loss": 0.5134, "step": 8866 }, { "epoch": 0.6574479128049232, "grad_norm": 0.3663020730018616, "learning_rate": 0.00011135783945986498, "loss": 0.4698, "step": 8867 }, { "epoch": 0.6575220582783421, "grad_norm": 0.3904915153980255, "learning_rate": 0.00011134783695923983, "loss": 0.5212, "step": 8868 }, { "epoch": 0.657596203751761, "grad_norm": 0.3460754156112671, "learning_rate": 0.00011133783445861466, "loss": 0.4905, "step": 8869 }, { "epoch": 0.6576703492251798, "grad_norm": 0.35526400804519653, "learning_rate": 0.0001113278319579895, "loss": 0.5087, "step": 8870 }, { "epoch": 0.6577444946985986, "grad_norm": 0.35558396577835083, "learning_rate": 0.00011131782945736436, "loss": 0.4653, "step": 8871 }, { "epoch": 0.6578186401720175, "grad_norm": 0.3626982271671295, "learning_rate": 0.00011130782695673918, "loss": 0.5051, "step": 8872 }, { "epoch": 0.6578927856454363, "grad_norm": 0.3453567326068878, "learning_rate": 0.00011129782445611403, "loss": 0.4616, "step": 8873 }, { "epoch": 0.6579669311188552, "grad_norm": 0.366135835647583, "learning_rate": 0.00011128782195548888, "loss": 0.4855, "step": 8874 }, { "epoch": 0.6580410765922741, "grad_norm": 0.3572728633880615, "learning_rate": 0.00011127781945486373, "loss": 0.4716, "step": 8875 }, { "epoch": 0.6581152220656928, "grad_norm": 0.36778777837753296, "learning_rate": 0.00011126781695423856, "loss": 0.5004, "step": 8876 }, { "epoch": 0.6581893675391117, "grad_norm": 0.3663083016872406, "learning_rate": 0.0001112578144536134, "loss": 0.5007, "step": 8877 }, { "epoch": 0.6582635130125306, "grad_norm": 0.3883764147758484, "learning_rate": 0.00011124781195298826, "loss": 0.4993, "step": 8878 }, { "epoch": 0.6583376584859494, "grad_norm": 0.3983544409275055, "learning_rate": 0.00011123780945236311, "loss": 0.5469, "step": 8879 }, { "epoch": 0.6584118039593683, "grad_norm": 0.35671427845954895, "learning_rate": 0.00011122780695173793, "loss": 0.5112, "step": 8880 }, { "epoch": 0.6584859494327872, "grad_norm": 0.3555515706539154, "learning_rate": 0.00011121780445111278, "loss": 0.4741, "step": 8881 }, { "epoch": 0.6585600949062059, "grad_norm": 0.35557568073272705, "learning_rate": 0.00011120780195048763, "loss": 0.5084, "step": 8882 }, { "epoch": 0.6586342403796248, "grad_norm": 0.3435540795326233, "learning_rate": 0.00011119779944986246, "loss": 0.4854, "step": 8883 }, { "epoch": 0.6587083858530437, "grad_norm": 0.39828017354011536, "learning_rate": 0.0001111877969492373, "loss": 0.5728, "step": 8884 }, { "epoch": 0.6587825313264625, "grad_norm": 0.3777962923049927, "learning_rate": 0.00011117779444861216, "loss": 0.4834, "step": 8885 }, { "epoch": 0.6588566767998814, "grad_norm": 0.37447622418403625, "learning_rate": 0.00011116779194798701, "loss": 0.5137, "step": 8886 }, { "epoch": 0.6589308222733002, "grad_norm": 0.37079888582229614, "learning_rate": 0.00011115778944736183, "loss": 0.4959, "step": 8887 }, { "epoch": 0.659004967746719, "grad_norm": 0.40261098742485046, "learning_rate": 0.00011114778694673668, "loss": 0.5492, "step": 8888 }, { "epoch": 0.6590791132201379, "grad_norm": 0.35229063034057617, "learning_rate": 0.00011113778444611153, "loss": 0.5053, "step": 8889 }, { "epoch": 0.6591532586935568, "grad_norm": 0.3672223687171936, "learning_rate": 0.00011112778194548638, "loss": 0.5359, "step": 8890 }, { "epoch": 0.6592274041669756, "grad_norm": 0.3578079342842102, "learning_rate": 0.00011111777944486122, "loss": 0.4862, "step": 8891 }, { "epoch": 0.6593015496403944, "grad_norm": 0.37840020656585693, "learning_rate": 0.00011110777694423606, "loss": 0.5163, "step": 8892 }, { "epoch": 0.6593756951138133, "grad_norm": 0.36277762055397034, "learning_rate": 0.00011109777444361091, "loss": 0.4794, "step": 8893 }, { "epoch": 0.6594498405872321, "grad_norm": 0.38859662413597107, "learning_rate": 0.00011108777194298575, "loss": 0.5348, "step": 8894 }, { "epoch": 0.659523986060651, "grad_norm": 0.34040892124176025, "learning_rate": 0.0001110777694423606, "loss": 0.465, "step": 8895 }, { "epoch": 0.6595981315340699, "grad_norm": 0.34127077460289, "learning_rate": 0.00011106776694173545, "loss": 0.489, "step": 8896 }, { "epoch": 0.6596722770074886, "grad_norm": 0.3575139045715332, "learning_rate": 0.00011105776444111028, "loss": 0.4882, "step": 8897 }, { "epoch": 0.6597464224809075, "grad_norm": 0.3559010922908783, "learning_rate": 0.00011104776194048512, "loss": 0.4898, "step": 8898 }, { "epoch": 0.6598205679543264, "grad_norm": 0.3631537854671478, "learning_rate": 0.00011103775943985997, "loss": 0.4964, "step": 8899 }, { "epoch": 0.6598947134277452, "grad_norm": 0.3522205054759979, "learning_rate": 0.00011102775693923482, "loss": 0.5094, "step": 8900 }, { "epoch": 0.6599688589011641, "grad_norm": 0.37249016761779785, "learning_rate": 0.00011101775443860967, "loss": 0.5181, "step": 8901 }, { "epoch": 0.660043004374583, "grad_norm": 0.3650285005569458, "learning_rate": 0.0001110077519379845, "loss": 0.5154, "step": 8902 }, { "epoch": 0.6601171498480017, "grad_norm": 0.36086976528167725, "learning_rate": 0.00011099774943735935, "loss": 0.508, "step": 8903 }, { "epoch": 0.6601912953214206, "grad_norm": 0.3659749925136566, "learning_rate": 0.0001109877469367342, "loss": 0.4941, "step": 8904 }, { "epoch": 0.6602654407948395, "grad_norm": 0.37385502457618713, "learning_rate": 0.00011097774443610905, "loss": 0.5429, "step": 8905 }, { "epoch": 0.6603395862682583, "grad_norm": 0.3858794867992401, "learning_rate": 0.00011096774193548387, "loss": 0.5335, "step": 8906 }, { "epoch": 0.6604137317416772, "grad_norm": 0.3814708888530731, "learning_rate": 0.00011095773943485872, "loss": 0.515, "step": 8907 }, { "epoch": 0.660487877215096, "grad_norm": 0.39121800661087036, "learning_rate": 0.00011094773693423357, "loss": 0.5011, "step": 8908 }, { "epoch": 0.6605620226885148, "grad_norm": 0.3613215982913971, "learning_rate": 0.0001109377344336084, "loss": 0.5186, "step": 8909 }, { "epoch": 0.6606361681619337, "grad_norm": 0.36003032326698303, "learning_rate": 0.00011092773193298325, "loss": 0.5015, "step": 8910 }, { "epoch": 0.6607103136353526, "grad_norm": 0.34536436200141907, "learning_rate": 0.0001109177294323581, "loss": 0.5261, "step": 8911 }, { "epoch": 0.6607844591087714, "grad_norm": 0.37275803089141846, "learning_rate": 0.00011090772693173295, "loss": 0.49, "step": 8912 }, { "epoch": 0.6608586045821903, "grad_norm": 0.35219910740852356, "learning_rate": 0.00011089772443110777, "loss": 0.4912, "step": 8913 }, { "epoch": 0.6609327500556091, "grad_norm": 0.35941216349601746, "learning_rate": 0.00011088772193048262, "loss": 0.5289, "step": 8914 }, { "epoch": 0.6610068955290279, "grad_norm": 0.36623042821884155, "learning_rate": 0.00011087771942985747, "loss": 0.5239, "step": 8915 }, { "epoch": 0.6610810410024468, "grad_norm": 0.3335724174976349, "learning_rate": 0.00011086771692923232, "loss": 0.4922, "step": 8916 }, { "epoch": 0.6611551864758657, "grad_norm": 0.3604157865047455, "learning_rate": 0.00011085771442860715, "loss": 0.5082, "step": 8917 }, { "epoch": 0.6612293319492845, "grad_norm": 0.3638764023780823, "learning_rate": 0.000110847711927982, "loss": 0.5234, "step": 8918 }, { "epoch": 0.6613034774227033, "grad_norm": 0.37706249952316284, "learning_rate": 0.00011083770942735685, "loss": 0.5134, "step": 8919 }, { "epoch": 0.6613776228961222, "grad_norm": 0.37201449275016785, "learning_rate": 0.00011082770692673167, "loss": 0.5317, "step": 8920 }, { "epoch": 0.661451768369541, "grad_norm": 0.3522190451622009, "learning_rate": 0.00011081770442610652, "loss": 0.4673, "step": 8921 }, { "epoch": 0.6615259138429599, "grad_norm": 0.3952111601829529, "learning_rate": 0.00011080770192548137, "loss": 0.5308, "step": 8922 }, { "epoch": 0.6616000593163788, "grad_norm": 0.351765513420105, "learning_rate": 0.00011079769942485622, "loss": 0.4665, "step": 8923 }, { "epoch": 0.6616742047897975, "grad_norm": 0.37047553062438965, "learning_rate": 0.00011078769692423106, "loss": 0.4715, "step": 8924 }, { "epoch": 0.6617483502632164, "grad_norm": 0.3801669180393219, "learning_rate": 0.0001107776944236059, "loss": 0.5061, "step": 8925 }, { "epoch": 0.6618224957366353, "grad_norm": 0.35665762424468994, "learning_rate": 0.00011076769192298075, "loss": 0.4991, "step": 8926 }, { "epoch": 0.6618966412100541, "grad_norm": 0.37205836176872253, "learning_rate": 0.0001107576894223556, "loss": 0.5149, "step": 8927 }, { "epoch": 0.661970786683473, "grad_norm": 0.36163797974586487, "learning_rate": 0.00011074768692173044, "loss": 0.5024, "step": 8928 }, { "epoch": 0.6620449321568919, "grad_norm": 0.3607529401779175, "learning_rate": 0.00011073768442110529, "loss": 0.504, "step": 8929 }, { "epoch": 0.6621190776303106, "grad_norm": 0.3722712993621826, "learning_rate": 0.00011072768192048012, "loss": 0.4931, "step": 8930 }, { "epoch": 0.6621932231037295, "grad_norm": 0.35263586044311523, "learning_rate": 0.00011071767941985496, "loss": 0.4967, "step": 8931 }, { "epoch": 0.6622673685771484, "grad_norm": 0.35151219367980957, "learning_rate": 0.00011070767691922981, "loss": 0.5318, "step": 8932 }, { "epoch": 0.6623415140505672, "grad_norm": 0.38965821266174316, "learning_rate": 0.00011069767441860466, "loss": 0.5533, "step": 8933 }, { "epoch": 0.6624156595239861, "grad_norm": 0.3435323238372803, "learning_rate": 0.00011068767191797951, "loss": 0.5034, "step": 8934 }, { "epoch": 0.662489804997405, "grad_norm": 0.3579493463039398, "learning_rate": 0.00011067766941735434, "loss": 0.5593, "step": 8935 }, { "epoch": 0.6625639504708237, "grad_norm": 0.3582857549190521, "learning_rate": 0.00011066766691672919, "loss": 0.5144, "step": 8936 }, { "epoch": 0.6626380959442426, "grad_norm": 0.3437146544456482, "learning_rate": 0.00011065766441610404, "loss": 0.483, "step": 8937 }, { "epoch": 0.6627122414176615, "grad_norm": 0.35923558473587036, "learning_rate": 0.00011064766191547889, "loss": 0.4996, "step": 8938 }, { "epoch": 0.6627863868910803, "grad_norm": 0.3535863757133484, "learning_rate": 0.00011063765941485371, "loss": 0.4956, "step": 8939 }, { "epoch": 0.6628605323644992, "grad_norm": 0.3742672801017761, "learning_rate": 0.00011062765691422856, "loss": 0.4998, "step": 8940 }, { "epoch": 0.662934677837918, "grad_norm": 0.3597225844860077, "learning_rate": 0.00011061765441360341, "loss": 0.4978, "step": 8941 }, { "epoch": 0.6630088233113368, "grad_norm": 0.33182623982429504, "learning_rate": 0.00011060765191297826, "loss": 0.4644, "step": 8942 }, { "epoch": 0.6630829687847557, "grad_norm": 0.380733847618103, "learning_rate": 0.00011059764941235309, "loss": 0.5282, "step": 8943 }, { "epoch": 0.6631571142581746, "grad_norm": 0.3654848039150238, "learning_rate": 0.00011058764691172794, "loss": 0.5109, "step": 8944 }, { "epoch": 0.6632312597315934, "grad_norm": 0.3448181748390198, "learning_rate": 0.00011057764441110279, "loss": 0.481, "step": 8945 }, { "epoch": 0.6633054052050122, "grad_norm": 0.36775392293930054, "learning_rate": 0.00011056764191047761, "loss": 0.4818, "step": 8946 }, { "epoch": 0.6633795506784311, "grad_norm": 0.3537725806236267, "learning_rate": 0.00011055763940985246, "loss": 0.4738, "step": 8947 }, { "epoch": 0.6634536961518499, "grad_norm": 0.35059288144111633, "learning_rate": 0.00011054763690922731, "loss": 0.5089, "step": 8948 }, { "epoch": 0.6635278416252688, "grad_norm": 0.3711710274219513, "learning_rate": 0.00011053763440860216, "loss": 0.4878, "step": 8949 }, { "epoch": 0.6636019870986877, "grad_norm": 0.35050931572914124, "learning_rate": 0.00011052763190797699, "loss": 0.4594, "step": 8950 }, { "epoch": 0.6636761325721064, "grad_norm": 0.37525784969329834, "learning_rate": 0.00011051762940735184, "loss": 0.5332, "step": 8951 }, { "epoch": 0.6637502780455253, "grad_norm": 0.3339020311832428, "learning_rate": 0.00011050762690672669, "loss": 0.4169, "step": 8952 }, { "epoch": 0.6638244235189442, "grad_norm": 0.3860345184803009, "learning_rate": 0.00011049762440610154, "loss": 0.5157, "step": 8953 }, { "epoch": 0.663898568992363, "grad_norm": 0.34594249725341797, "learning_rate": 0.00011048762190547636, "loss": 0.4842, "step": 8954 }, { "epoch": 0.6639727144657819, "grad_norm": 0.38767218589782715, "learning_rate": 0.00011047761940485121, "loss": 0.5458, "step": 8955 }, { "epoch": 0.6640468599392008, "grad_norm": 0.35804039239883423, "learning_rate": 0.00011046761690422607, "loss": 0.5032, "step": 8956 }, { "epoch": 0.6641210054126195, "grad_norm": 0.3721153140068054, "learning_rate": 0.00011045761440360089, "loss": 0.4947, "step": 8957 }, { "epoch": 0.6641951508860384, "grad_norm": 0.3814346194267273, "learning_rate": 0.00011044761190297574, "loss": 0.5362, "step": 8958 }, { "epoch": 0.6642692963594573, "grad_norm": 0.3748835325241089, "learning_rate": 0.00011043760940235059, "loss": 0.5193, "step": 8959 }, { "epoch": 0.6643434418328761, "grad_norm": 0.3453173339366913, "learning_rate": 0.00011042760690172544, "loss": 0.486, "step": 8960 }, { "epoch": 0.664417587306295, "grad_norm": 0.3681824207305908, "learning_rate": 0.00011041760440110028, "loss": 0.5391, "step": 8961 }, { "epoch": 0.6644917327797139, "grad_norm": 0.3658437430858612, "learning_rate": 0.00011040760190047513, "loss": 0.4853, "step": 8962 }, { "epoch": 0.6645658782531326, "grad_norm": 0.33901235461235046, "learning_rate": 0.00011039759939984997, "loss": 0.485, "step": 8963 }, { "epoch": 0.6646400237265515, "grad_norm": 0.34112533926963806, "learning_rate": 0.00011038759689922482, "loss": 0.4791, "step": 8964 }, { "epoch": 0.6647141691999704, "grad_norm": 0.35395172238349915, "learning_rate": 0.00011037759439859965, "loss": 0.4812, "step": 8965 }, { "epoch": 0.6647883146733892, "grad_norm": 0.34447720646858215, "learning_rate": 0.0001103675918979745, "loss": 0.4869, "step": 8966 }, { "epoch": 0.664862460146808, "grad_norm": 0.36947888135910034, "learning_rate": 0.00011035758939734935, "loss": 0.521, "step": 8967 }, { "epoch": 0.6649366056202269, "grad_norm": 0.3760451376438141, "learning_rate": 0.00011034758689672418, "loss": 0.5126, "step": 8968 }, { "epoch": 0.6650107510936457, "grad_norm": 0.3684535622596741, "learning_rate": 0.00011033758439609903, "loss": 0.4972, "step": 8969 }, { "epoch": 0.6650848965670646, "grad_norm": 0.37175101041793823, "learning_rate": 0.00011032758189547388, "loss": 0.5183, "step": 8970 }, { "epoch": 0.6651590420404835, "grad_norm": 0.3473237156867981, "learning_rate": 0.00011031757939484873, "loss": 0.4904, "step": 8971 }, { "epoch": 0.6652331875139023, "grad_norm": 0.36588624119758606, "learning_rate": 0.00011030757689422355, "loss": 0.4748, "step": 8972 }, { "epoch": 0.6653073329873211, "grad_norm": 0.35418450832366943, "learning_rate": 0.0001102975743935984, "loss": 0.4808, "step": 8973 }, { "epoch": 0.6653814784607399, "grad_norm": 0.3365418314933777, "learning_rate": 0.00011028757189297325, "loss": 0.4754, "step": 8974 }, { "epoch": 0.6654556239341588, "grad_norm": 0.3567802309989929, "learning_rate": 0.0001102775693923481, "loss": 0.4989, "step": 8975 }, { "epoch": 0.6655297694075777, "grad_norm": 0.3550167381763458, "learning_rate": 0.00011026756689172293, "loss": 0.5006, "step": 8976 }, { "epoch": 0.6656039148809965, "grad_norm": 0.36969876289367676, "learning_rate": 0.00011025756439109778, "loss": 0.4905, "step": 8977 }, { "epoch": 0.6656780603544153, "grad_norm": 0.37726569175720215, "learning_rate": 0.00011024756189047263, "loss": 0.5174, "step": 8978 }, { "epoch": 0.6657522058278342, "grad_norm": 0.40012773871421814, "learning_rate": 0.00011023755938984748, "loss": 0.5129, "step": 8979 }, { "epoch": 0.665826351301253, "grad_norm": 0.3503589928150177, "learning_rate": 0.0001102275568892223, "loss": 0.5181, "step": 8980 }, { "epoch": 0.6659004967746719, "grad_norm": 0.37069809436798096, "learning_rate": 0.00011021755438859715, "loss": 0.5376, "step": 8981 }, { "epoch": 0.6659746422480908, "grad_norm": 0.35494744777679443, "learning_rate": 0.000110207551887972, "loss": 0.47, "step": 8982 }, { "epoch": 0.6660487877215095, "grad_norm": 0.3504970669746399, "learning_rate": 0.00011019754938734683, "loss": 0.4696, "step": 8983 }, { "epoch": 0.6661229331949284, "grad_norm": 0.3771095275878906, "learning_rate": 0.00011018754688672168, "loss": 0.4932, "step": 8984 }, { "epoch": 0.6661970786683473, "grad_norm": 0.348714143037796, "learning_rate": 0.00011017754438609653, "loss": 0.4728, "step": 8985 }, { "epoch": 0.6662712241417661, "grad_norm": 0.35884889960289, "learning_rate": 0.00011016754188547138, "loss": 0.4852, "step": 8986 }, { "epoch": 0.666345369615185, "grad_norm": 0.3987523019313812, "learning_rate": 0.0001101575393848462, "loss": 0.5374, "step": 8987 }, { "epoch": 0.6664195150886039, "grad_norm": 0.3827769160270691, "learning_rate": 0.00011014753688422106, "loss": 0.5299, "step": 8988 }, { "epoch": 0.6664936605620226, "grad_norm": 0.36596202850341797, "learning_rate": 0.0001101375343835959, "loss": 0.5058, "step": 8989 }, { "epoch": 0.6665678060354415, "grad_norm": 0.35518887639045715, "learning_rate": 0.00011012753188297076, "loss": 0.4994, "step": 8990 }, { "epoch": 0.6666419515088604, "grad_norm": 0.3390601873397827, "learning_rate": 0.00011011752938234558, "loss": 0.4629, "step": 8991 }, { "epoch": 0.6667160969822792, "grad_norm": 0.3557116687297821, "learning_rate": 0.00011010752688172043, "loss": 0.5044, "step": 8992 }, { "epoch": 0.6667902424556981, "grad_norm": 0.3715432286262512, "learning_rate": 0.00011009752438109528, "loss": 0.5384, "step": 8993 }, { "epoch": 0.666864387929117, "grad_norm": 0.3670808672904968, "learning_rate": 0.00011008752188047012, "loss": 0.4973, "step": 8994 }, { "epoch": 0.6669385334025357, "grad_norm": 0.3662213683128357, "learning_rate": 0.00011007751937984496, "loss": 0.5022, "step": 8995 }, { "epoch": 0.6670126788759546, "grad_norm": 0.3425581455230713, "learning_rate": 0.0001100675168792198, "loss": 0.4607, "step": 8996 }, { "epoch": 0.6670868243493735, "grad_norm": 0.36740586161613464, "learning_rate": 0.00011005751437859466, "loss": 0.4983, "step": 8997 }, { "epoch": 0.6671609698227923, "grad_norm": 0.34895390272140503, "learning_rate": 0.0001100475118779695, "loss": 0.5092, "step": 8998 }, { "epoch": 0.6672351152962112, "grad_norm": 0.35497385263442993, "learning_rate": 0.00011003750937734434, "loss": 0.4878, "step": 8999 }, { "epoch": 0.66730926076963, "grad_norm": 0.3395164906978607, "learning_rate": 0.0001100275068767192, "loss": 0.5044, "step": 9000 }, { "epoch": 0.6673834062430488, "grad_norm": 0.3402450382709503, "learning_rate": 0.00011001750437609403, "loss": 0.4677, "step": 9001 }, { "epoch": 0.6674575517164677, "grad_norm": 0.37875768542289734, "learning_rate": 0.00011000750187546887, "loss": 0.5095, "step": 9002 }, { "epoch": 0.6675316971898866, "grad_norm": 0.3501322567462921, "learning_rate": 0.00010999749937484372, "loss": 0.494, "step": 9003 }, { "epoch": 0.6676058426633054, "grad_norm": 0.34909331798553467, "learning_rate": 0.00010998749687421857, "loss": 0.4863, "step": 9004 }, { "epoch": 0.6676799881367242, "grad_norm": 0.3462585210800171, "learning_rate": 0.0001099774943735934, "loss": 0.4768, "step": 9005 }, { "epoch": 0.6677541336101431, "grad_norm": 0.35788246989250183, "learning_rate": 0.00010996749187296824, "loss": 0.5287, "step": 9006 }, { "epoch": 0.6678282790835619, "grad_norm": 0.3493776321411133, "learning_rate": 0.0001099574893723431, "loss": 0.4787, "step": 9007 }, { "epoch": 0.6679024245569808, "grad_norm": 0.3735778331756592, "learning_rate": 0.00010994748687171795, "loss": 0.5171, "step": 9008 }, { "epoch": 0.6679765700303997, "grad_norm": 0.360078364610672, "learning_rate": 0.00010993748437109277, "loss": 0.5054, "step": 9009 }, { "epoch": 0.6680507155038184, "grad_norm": 0.3261552155017853, "learning_rate": 0.00010992748187046762, "loss": 0.4575, "step": 9010 }, { "epoch": 0.6681248609772373, "grad_norm": 0.3439791202545166, "learning_rate": 0.00010991747936984247, "loss": 0.4803, "step": 9011 }, { "epoch": 0.6681990064506562, "grad_norm": 0.342547208070755, "learning_rate": 0.00010990747686921732, "loss": 0.471, "step": 9012 }, { "epoch": 0.668273151924075, "grad_norm": 0.3568231761455536, "learning_rate": 0.00010989747436859215, "loss": 0.4992, "step": 9013 }, { "epoch": 0.6683472973974939, "grad_norm": 0.3348945081233978, "learning_rate": 0.000109887471867967, "loss": 0.4671, "step": 9014 }, { "epoch": 0.6684214428709128, "grad_norm": 0.3605952560901642, "learning_rate": 0.00010987746936734185, "loss": 0.5123, "step": 9015 }, { "epoch": 0.6684955883443315, "grad_norm": 0.3366895020008087, "learning_rate": 0.0001098674668667167, "loss": 0.5097, "step": 9016 }, { "epoch": 0.6685697338177504, "grad_norm": 0.3441711664199829, "learning_rate": 0.00010985746436609152, "loss": 0.4384, "step": 9017 }, { "epoch": 0.6686438792911693, "grad_norm": 0.37817710638046265, "learning_rate": 0.00010984746186546637, "loss": 0.5212, "step": 9018 }, { "epoch": 0.6687180247645881, "grad_norm": 0.3544389009475708, "learning_rate": 0.00010983745936484122, "loss": 0.4784, "step": 9019 }, { "epoch": 0.668792170238007, "grad_norm": 0.3478761911392212, "learning_rate": 0.00010982745686421605, "loss": 0.4869, "step": 9020 }, { "epoch": 0.6688663157114259, "grad_norm": 0.3651321530342102, "learning_rate": 0.0001098174543635909, "loss": 0.502, "step": 9021 }, { "epoch": 0.6689404611848446, "grad_norm": 0.3405432403087616, "learning_rate": 0.00010980745186296575, "loss": 0.4701, "step": 9022 }, { "epoch": 0.6690146066582635, "grad_norm": 0.34456852078437805, "learning_rate": 0.0001097974493623406, "loss": 0.4895, "step": 9023 }, { "epoch": 0.6690887521316824, "grad_norm": 0.32028284668922424, "learning_rate": 0.00010978744686171542, "loss": 0.4885, "step": 9024 }, { "epoch": 0.6691628976051012, "grad_norm": 0.36883544921875, "learning_rate": 0.00010977744436109027, "loss": 0.5167, "step": 9025 }, { "epoch": 0.66923704307852, "grad_norm": 0.35314273834228516, "learning_rate": 0.00010976744186046512, "loss": 0.5212, "step": 9026 }, { "epoch": 0.6693111885519389, "grad_norm": 0.37837380170822144, "learning_rate": 0.00010975743935983997, "loss": 0.4968, "step": 9027 }, { "epoch": 0.6693853340253577, "grad_norm": 0.35164323449134827, "learning_rate": 0.0001097474368592148, "loss": 0.4923, "step": 9028 }, { "epoch": 0.6694594794987766, "grad_norm": 0.3555159568786621, "learning_rate": 0.00010973743435858965, "loss": 0.4644, "step": 9029 }, { "epoch": 0.6695336249721955, "grad_norm": 0.3707311451435089, "learning_rate": 0.0001097274318579645, "loss": 0.4955, "step": 9030 }, { "epoch": 0.6696077704456143, "grad_norm": 0.3531847596168518, "learning_rate": 0.00010971742935733933, "loss": 0.5076, "step": 9031 }, { "epoch": 0.6696819159190331, "grad_norm": 0.3789023458957672, "learning_rate": 0.00010970742685671419, "loss": 0.5444, "step": 9032 }, { "epoch": 0.669756061392452, "grad_norm": 0.33515650033950806, "learning_rate": 0.00010969742435608902, "loss": 0.4699, "step": 9033 }, { "epoch": 0.6698302068658708, "grad_norm": 0.3635168969631195, "learning_rate": 0.00010968742185546387, "loss": 0.5153, "step": 9034 }, { "epoch": 0.6699043523392897, "grad_norm": 0.3390504717826843, "learning_rate": 0.00010967741935483871, "loss": 0.4534, "step": 9035 }, { "epoch": 0.6699784978127086, "grad_norm": 0.3496343195438385, "learning_rate": 0.00010966741685421356, "loss": 0.48, "step": 9036 }, { "epoch": 0.6700526432861273, "grad_norm": 0.37331950664520264, "learning_rate": 0.00010965741435358841, "loss": 0.4946, "step": 9037 }, { "epoch": 0.6701267887595462, "grad_norm": 0.35134685039520264, "learning_rate": 0.00010964741185296326, "loss": 0.5182, "step": 9038 }, { "epoch": 0.6702009342329651, "grad_norm": 0.36373236775398254, "learning_rate": 0.00010963740935233809, "loss": 0.5047, "step": 9039 }, { "epoch": 0.6702750797063839, "grad_norm": 0.37409090995788574, "learning_rate": 0.00010962740685171294, "loss": 0.5312, "step": 9040 }, { "epoch": 0.6703492251798028, "grad_norm": 0.3474617004394531, "learning_rate": 0.00010961740435108779, "loss": 0.4863, "step": 9041 }, { "epoch": 0.6704233706532217, "grad_norm": 0.3578753173351288, "learning_rate": 0.00010960740185046261, "loss": 0.4783, "step": 9042 }, { "epoch": 0.6704975161266404, "grad_norm": 0.3759060204029083, "learning_rate": 0.00010959739934983746, "loss": 0.5157, "step": 9043 }, { "epoch": 0.6705716616000593, "grad_norm": 0.3919251263141632, "learning_rate": 0.00010958739684921231, "loss": 0.5, "step": 9044 }, { "epoch": 0.6706458070734782, "grad_norm": 0.3611155152320862, "learning_rate": 0.00010957739434858716, "loss": 0.489, "step": 9045 }, { "epoch": 0.670719952546897, "grad_norm": 0.3526819348335266, "learning_rate": 0.00010956739184796199, "loss": 0.4759, "step": 9046 }, { "epoch": 0.6707940980203159, "grad_norm": 0.3603340983390808, "learning_rate": 0.00010955738934733684, "loss": 0.4988, "step": 9047 }, { "epoch": 0.6708682434937348, "grad_norm": 0.36088642477989197, "learning_rate": 0.00010954738684671169, "loss": 0.5318, "step": 9048 }, { "epoch": 0.6709423889671535, "grad_norm": 0.3516823947429657, "learning_rate": 0.00010953738434608654, "loss": 0.4916, "step": 9049 }, { "epoch": 0.6710165344405724, "grad_norm": 0.3545812666416168, "learning_rate": 0.00010952738184546136, "loss": 0.4885, "step": 9050 }, { "epoch": 0.6710906799139913, "grad_norm": 0.3852003216743469, "learning_rate": 0.00010951737934483621, "loss": 0.5279, "step": 9051 }, { "epoch": 0.6711648253874101, "grad_norm": 0.3574109673500061, "learning_rate": 0.00010950737684421106, "loss": 0.4882, "step": 9052 }, { "epoch": 0.671238970860829, "grad_norm": 0.35218900442123413, "learning_rate": 0.00010949737434358591, "loss": 0.4519, "step": 9053 }, { "epoch": 0.6713131163342478, "grad_norm": 0.3786713182926178, "learning_rate": 0.00010948737184296074, "loss": 0.5116, "step": 9054 }, { "epoch": 0.6713872618076666, "grad_norm": 0.35931316018104553, "learning_rate": 0.00010947736934233559, "loss": 0.5203, "step": 9055 }, { "epoch": 0.6714614072810855, "grad_norm": 0.395988404750824, "learning_rate": 0.00010946736684171044, "loss": 0.5004, "step": 9056 }, { "epoch": 0.6715355527545044, "grad_norm": 0.3601589500904083, "learning_rate": 0.00010945736434108526, "loss": 0.5154, "step": 9057 }, { "epoch": 0.6716096982279232, "grad_norm": 0.37691113352775574, "learning_rate": 0.00010944736184046011, "loss": 0.5281, "step": 9058 }, { "epoch": 0.671683843701342, "grad_norm": 0.3778701424598694, "learning_rate": 0.00010943735933983496, "loss": 0.565, "step": 9059 }, { "epoch": 0.6717579891747609, "grad_norm": 0.3617425262928009, "learning_rate": 0.00010942735683920981, "loss": 0.4903, "step": 9060 }, { "epoch": 0.6718321346481797, "grad_norm": 0.3674405813217163, "learning_rate": 0.00010941735433858464, "loss": 0.4904, "step": 9061 }, { "epoch": 0.6719062801215986, "grad_norm": 0.3811866044998169, "learning_rate": 0.00010940735183795949, "loss": 0.5412, "step": 9062 }, { "epoch": 0.6719804255950175, "grad_norm": 0.3539695739746094, "learning_rate": 0.00010939734933733434, "loss": 0.5044, "step": 9063 }, { "epoch": 0.6720545710684362, "grad_norm": 0.35474592447280884, "learning_rate": 0.00010938734683670919, "loss": 0.5219, "step": 9064 }, { "epoch": 0.6721287165418551, "grad_norm": 0.36193716526031494, "learning_rate": 0.00010937734433608403, "loss": 0.497, "step": 9065 }, { "epoch": 0.672202862015274, "grad_norm": 0.3838407099246979, "learning_rate": 0.00010936734183545886, "loss": 0.5521, "step": 9066 }, { "epoch": 0.6722770074886928, "grad_norm": 0.35952121019363403, "learning_rate": 0.00010935733933483371, "loss": 0.5271, "step": 9067 }, { "epoch": 0.6723511529621117, "grad_norm": 0.3334338963031769, "learning_rate": 0.00010934733683420855, "loss": 0.4745, "step": 9068 }, { "epoch": 0.6724252984355306, "grad_norm": 0.37885037064552307, "learning_rate": 0.0001093373343335834, "loss": 0.5219, "step": 9069 }, { "epoch": 0.6724994439089493, "grad_norm": 0.3704588711261749, "learning_rate": 0.00010932733183295825, "loss": 0.5049, "step": 9070 }, { "epoch": 0.6725735893823682, "grad_norm": 0.3780757188796997, "learning_rate": 0.00010931732933233309, "loss": 0.5307, "step": 9071 }, { "epoch": 0.6726477348557871, "grad_norm": 0.3540503978729248, "learning_rate": 0.00010930732683170793, "loss": 0.5047, "step": 9072 }, { "epoch": 0.6727218803292059, "grad_norm": 0.36949414014816284, "learning_rate": 0.00010929732433108278, "loss": 0.5565, "step": 9073 }, { "epoch": 0.6727960258026248, "grad_norm": 0.36437147855758667, "learning_rate": 0.00010928732183045763, "loss": 0.5287, "step": 9074 }, { "epoch": 0.6728701712760436, "grad_norm": 0.3582088053226471, "learning_rate": 0.00010927731932983248, "loss": 0.4938, "step": 9075 }, { "epoch": 0.6729443167494624, "grad_norm": 0.3595307469367981, "learning_rate": 0.0001092673168292073, "loss": 0.5241, "step": 9076 }, { "epoch": 0.6730184622228813, "grad_norm": 0.33576127886772156, "learning_rate": 0.00010925731432858215, "loss": 0.4728, "step": 9077 }, { "epoch": 0.6730926076963002, "grad_norm": 0.35949742794036865, "learning_rate": 0.000109247311827957, "loss": 0.5192, "step": 9078 }, { "epoch": 0.673166753169719, "grad_norm": 0.3843957781791687, "learning_rate": 0.00010923730932733183, "loss": 0.5485, "step": 9079 }, { "epoch": 0.6732408986431379, "grad_norm": 0.3670947551727295, "learning_rate": 0.00010922730682670668, "loss": 0.4841, "step": 9080 }, { "epoch": 0.6733150441165567, "grad_norm": 0.34332191944122314, "learning_rate": 0.00010921730432608153, "loss": 0.4838, "step": 9081 }, { "epoch": 0.6733891895899755, "grad_norm": 0.3777061104774475, "learning_rate": 0.00010920730182545638, "loss": 0.5693, "step": 9082 }, { "epoch": 0.6734633350633944, "grad_norm": 0.3625407814979553, "learning_rate": 0.0001091972993248312, "loss": 0.4828, "step": 9083 }, { "epoch": 0.6735374805368133, "grad_norm": 0.35985100269317627, "learning_rate": 0.00010918729682420605, "loss": 0.5195, "step": 9084 }, { "epoch": 0.6736116260102321, "grad_norm": 0.3428165316581726, "learning_rate": 0.0001091772943235809, "loss": 0.4861, "step": 9085 }, { "epoch": 0.6736857714836509, "grad_norm": 0.3837181329727173, "learning_rate": 0.00010916729182295575, "loss": 0.4937, "step": 9086 }, { "epoch": 0.6737599169570697, "grad_norm": 0.3438301980495453, "learning_rate": 0.00010915728932233058, "loss": 0.4963, "step": 9087 }, { "epoch": 0.6738340624304886, "grad_norm": 0.36734479665756226, "learning_rate": 0.00010914728682170543, "loss": 0.4932, "step": 9088 }, { "epoch": 0.6739082079039075, "grad_norm": 0.3346405327320099, "learning_rate": 0.00010913728432108028, "loss": 0.4897, "step": 9089 }, { "epoch": 0.6739823533773263, "grad_norm": 0.3694862425327301, "learning_rate": 0.00010912728182045513, "loss": 0.4998, "step": 9090 }, { "epoch": 0.6740564988507451, "grad_norm": 0.3490615487098694, "learning_rate": 0.00010911727931982995, "loss": 0.4761, "step": 9091 }, { "epoch": 0.674130644324164, "grad_norm": 0.3465363085269928, "learning_rate": 0.0001091072768192048, "loss": 0.4743, "step": 9092 }, { "epoch": 0.6742047897975828, "grad_norm": 0.3904900848865509, "learning_rate": 0.00010909727431857965, "loss": 0.4841, "step": 9093 }, { "epoch": 0.6742789352710017, "grad_norm": 0.3434426486492157, "learning_rate": 0.00010908727181795448, "loss": 0.4473, "step": 9094 }, { "epoch": 0.6743530807444206, "grad_norm": 0.37036216259002686, "learning_rate": 0.00010907726931732933, "loss": 0.5574, "step": 9095 }, { "epoch": 0.6744272262178393, "grad_norm": 0.3799957036972046, "learning_rate": 0.00010906726681670418, "loss": 0.5231, "step": 9096 }, { "epoch": 0.6745013716912582, "grad_norm": 0.36762452125549316, "learning_rate": 0.00010905726431607903, "loss": 0.4705, "step": 9097 }, { "epoch": 0.6745755171646771, "grad_norm": 0.3614429235458374, "learning_rate": 0.00010904726181545387, "loss": 0.5236, "step": 9098 }, { "epoch": 0.6746496626380959, "grad_norm": 0.3763953745365143, "learning_rate": 0.0001090372593148287, "loss": 0.5058, "step": 9099 }, { "epoch": 0.6747238081115148, "grad_norm": 0.3518953323364258, "learning_rate": 0.00010902725681420355, "loss": 0.4954, "step": 9100 }, { "epoch": 0.6747979535849337, "grad_norm": 0.36868900060653687, "learning_rate": 0.0001090172543135784, "loss": 0.5179, "step": 9101 }, { "epoch": 0.6748720990583524, "grad_norm": 0.3273063004016876, "learning_rate": 0.00010900725181295324, "loss": 0.4762, "step": 9102 }, { "epoch": 0.6749462445317713, "grad_norm": 0.36057981848716736, "learning_rate": 0.00010899724931232809, "loss": 0.5013, "step": 9103 }, { "epoch": 0.6750203900051902, "grad_norm": 0.3662770986557007, "learning_rate": 0.00010898724681170293, "loss": 0.5084, "step": 9104 }, { "epoch": 0.675094535478609, "grad_norm": 0.32823845744132996, "learning_rate": 0.00010897724431107777, "loss": 0.4721, "step": 9105 }, { "epoch": 0.6751686809520279, "grad_norm": 0.38041019439697266, "learning_rate": 0.00010896724181045262, "loss": 0.576, "step": 9106 }, { "epoch": 0.6752428264254468, "grad_norm": 0.34608393907546997, "learning_rate": 0.00010895723930982747, "loss": 0.4667, "step": 9107 }, { "epoch": 0.6753169718988655, "grad_norm": 0.3853490948677063, "learning_rate": 0.00010894723680920232, "loss": 0.5523, "step": 9108 }, { "epoch": 0.6753911173722844, "grad_norm": 0.36391618847846985, "learning_rate": 0.00010893723430857714, "loss": 0.5293, "step": 9109 }, { "epoch": 0.6754652628457033, "grad_norm": 0.33756938576698303, "learning_rate": 0.000108927231807952, "loss": 0.5148, "step": 9110 }, { "epoch": 0.6755394083191221, "grad_norm": 0.36934077739715576, "learning_rate": 0.00010891722930732684, "loss": 0.5172, "step": 9111 }, { "epoch": 0.675613553792541, "grad_norm": 0.36434653401374817, "learning_rate": 0.0001089072268067017, "loss": 0.5134, "step": 9112 }, { "epoch": 0.6756876992659598, "grad_norm": 0.3695566952228546, "learning_rate": 0.00010889722430607652, "loss": 0.5158, "step": 9113 }, { "epoch": 0.6757618447393786, "grad_norm": 0.3838656544685364, "learning_rate": 0.00010888722180545137, "loss": 0.5392, "step": 9114 }, { "epoch": 0.6758359902127975, "grad_norm": 0.37071773409843445, "learning_rate": 0.00010887721930482622, "loss": 0.5643, "step": 9115 }, { "epoch": 0.6759101356862164, "grad_norm": 0.35793060064315796, "learning_rate": 0.00010886721680420107, "loss": 0.479, "step": 9116 }, { "epoch": 0.6759842811596352, "grad_norm": 0.35565972328186035, "learning_rate": 0.0001088572143035759, "loss": 0.5011, "step": 9117 }, { "epoch": 0.676058426633054, "grad_norm": 0.3472578823566437, "learning_rate": 0.00010884721180295074, "loss": 0.5109, "step": 9118 }, { "epoch": 0.6761325721064729, "grad_norm": 0.36369121074676514, "learning_rate": 0.0001088372093023256, "loss": 0.4976, "step": 9119 }, { "epoch": 0.6762067175798917, "grad_norm": 0.3520163893699646, "learning_rate": 0.00010882720680170042, "loss": 0.5086, "step": 9120 }, { "epoch": 0.6762808630533106, "grad_norm": 0.37867215275764465, "learning_rate": 0.00010881720430107527, "loss": 0.5029, "step": 9121 }, { "epoch": 0.6763550085267295, "grad_norm": 0.3206562399864197, "learning_rate": 0.00010880720180045012, "loss": 0.4552, "step": 9122 }, { "epoch": 0.6764291540001482, "grad_norm": 0.3354053199291229, "learning_rate": 0.00010879719929982497, "loss": 0.4658, "step": 9123 }, { "epoch": 0.6765032994735671, "grad_norm": 0.3813732862472534, "learning_rate": 0.0001087871967991998, "loss": 0.5262, "step": 9124 }, { "epoch": 0.676577444946986, "grad_norm": 0.3909372389316559, "learning_rate": 0.00010877719429857464, "loss": 0.5342, "step": 9125 }, { "epoch": 0.6766515904204048, "grad_norm": 0.34839653968811035, "learning_rate": 0.0001087671917979495, "loss": 0.4887, "step": 9126 }, { "epoch": 0.6767257358938237, "grad_norm": 0.3753599524497986, "learning_rate": 0.00010875718929732435, "loss": 0.5619, "step": 9127 }, { "epoch": 0.6767998813672426, "grad_norm": 0.3747877776622772, "learning_rate": 0.00010874718679669917, "loss": 0.527, "step": 9128 }, { "epoch": 0.6768740268406613, "grad_norm": 0.3941866457462311, "learning_rate": 0.00010873718429607402, "loss": 0.5316, "step": 9129 }, { "epoch": 0.6769481723140802, "grad_norm": 0.3516162037849426, "learning_rate": 0.00010872718179544887, "loss": 0.48, "step": 9130 }, { "epoch": 0.6770223177874991, "grad_norm": 0.38839828968048096, "learning_rate": 0.00010871717929482371, "loss": 0.4905, "step": 9131 }, { "epoch": 0.6770964632609179, "grad_norm": 0.3577558100223541, "learning_rate": 0.00010870717679419854, "loss": 0.5076, "step": 9132 }, { "epoch": 0.6771706087343368, "grad_norm": 0.39611706137657166, "learning_rate": 0.0001086971742935734, "loss": 0.5263, "step": 9133 }, { "epoch": 0.6772447542077557, "grad_norm": 0.3424984812736511, "learning_rate": 0.00010868717179294825, "loss": 0.4728, "step": 9134 }, { "epoch": 0.6773188996811744, "grad_norm": 0.3705410063266754, "learning_rate": 0.00010867716929232308, "loss": 0.4762, "step": 9135 }, { "epoch": 0.6773930451545933, "grad_norm": 0.34356847405433655, "learning_rate": 0.00010866716679169793, "loss": 0.4772, "step": 9136 }, { "epoch": 0.6774671906280122, "grad_norm": 0.37194934487342834, "learning_rate": 0.00010865716429107277, "loss": 0.5011, "step": 9137 }, { "epoch": 0.677541336101431, "grad_norm": 0.34568697214126587, "learning_rate": 0.00010864716179044762, "loss": 0.473, "step": 9138 }, { "epoch": 0.6776154815748499, "grad_norm": 0.32972800731658936, "learning_rate": 0.00010863715928982246, "loss": 0.4678, "step": 9139 }, { "epoch": 0.6776896270482687, "grad_norm": 0.34931477904319763, "learning_rate": 0.00010862715678919731, "loss": 0.4832, "step": 9140 }, { "epoch": 0.6777637725216875, "grad_norm": 0.38496679067611694, "learning_rate": 0.00010861715428857216, "loss": 0.4816, "step": 9141 }, { "epoch": 0.6778379179951064, "grad_norm": 0.34870466589927673, "learning_rate": 0.00010860715178794698, "loss": 0.5077, "step": 9142 }, { "epoch": 0.6779120634685253, "grad_norm": 0.37389469146728516, "learning_rate": 0.00010859714928732183, "loss": 0.5038, "step": 9143 }, { "epoch": 0.6779862089419441, "grad_norm": 0.3742384612560272, "learning_rate": 0.00010858714678669668, "loss": 0.5271, "step": 9144 }, { "epoch": 0.6780603544153629, "grad_norm": 0.3449383080005646, "learning_rate": 0.00010857714428607154, "loss": 0.4798, "step": 9145 }, { "epoch": 0.6781344998887818, "grad_norm": 0.35365089774131775, "learning_rate": 0.00010856714178544636, "loss": 0.525, "step": 9146 }, { "epoch": 0.6782086453622006, "grad_norm": 0.34613168239593506, "learning_rate": 0.00010855713928482121, "loss": 0.493, "step": 9147 }, { "epoch": 0.6782827908356195, "grad_norm": 0.4116126298904419, "learning_rate": 0.00010854713678419606, "loss": 0.5319, "step": 9148 }, { "epoch": 0.6783569363090384, "grad_norm": 0.3782390058040619, "learning_rate": 0.00010853713428357091, "loss": 0.5229, "step": 9149 }, { "epoch": 0.6784310817824571, "grad_norm": 0.3557966947555542, "learning_rate": 0.00010852713178294573, "loss": 0.4937, "step": 9150 }, { "epoch": 0.678505227255876, "grad_norm": 0.37858518958091736, "learning_rate": 0.00010851712928232059, "loss": 0.4963, "step": 9151 }, { "epoch": 0.6785793727292949, "grad_norm": 0.3532433807849884, "learning_rate": 0.00010850712678169544, "loss": 0.4854, "step": 9152 }, { "epoch": 0.6786535182027137, "grad_norm": 0.36038702726364136, "learning_rate": 0.00010849712428107029, "loss": 0.5401, "step": 9153 }, { "epoch": 0.6787276636761326, "grad_norm": 0.3543373644351959, "learning_rate": 0.00010848712178044511, "loss": 0.4926, "step": 9154 }, { "epoch": 0.6788018091495515, "grad_norm": 0.33606937527656555, "learning_rate": 0.00010847711927981996, "loss": 0.4718, "step": 9155 }, { "epoch": 0.6788759546229702, "grad_norm": 0.3532671332359314, "learning_rate": 0.00010846711677919481, "loss": 0.4936, "step": 9156 }, { "epoch": 0.6789501000963891, "grad_norm": 0.35184428095817566, "learning_rate": 0.00010845711427856963, "loss": 0.4982, "step": 9157 }, { "epoch": 0.679024245569808, "grad_norm": 0.3402559459209442, "learning_rate": 0.00010844711177794449, "loss": 0.4628, "step": 9158 }, { "epoch": 0.6790983910432268, "grad_norm": 0.3749161958694458, "learning_rate": 0.00010843710927731934, "loss": 0.5475, "step": 9159 }, { "epoch": 0.6791725365166457, "grad_norm": 0.36433860659599304, "learning_rate": 0.00010842710677669419, "loss": 0.5181, "step": 9160 }, { "epoch": 0.6792466819900645, "grad_norm": 0.3791179358959198, "learning_rate": 0.00010841710427606901, "loss": 0.5475, "step": 9161 }, { "epoch": 0.6793208274634833, "grad_norm": 0.339382141828537, "learning_rate": 0.00010840710177544386, "loss": 0.4573, "step": 9162 }, { "epoch": 0.6793949729369022, "grad_norm": 0.35442808270454407, "learning_rate": 0.00010839709927481871, "loss": 0.4933, "step": 9163 }, { "epoch": 0.6794691184103211, "grad_norm": 0.34716686606407166, "learning_rate": 0.00010838709677419356, "loss": 0.4668, "step": 9164 }, { "epoch": 0.6795432638837399, "grad_norm": 0.3641692101955414, "learning_rate": 0.00010837709427356839, "loss": 0.5033, "step": 9165 }, { "epoch": 0.6796174093571588, "grad_norm": 0.3606448769569397, "learning_rate": 0.00010836709177294324, "loss": 0.4903, "step": 9166 }, { "epoch": 0.6796915548305776, "grad_norm": 0.33754268288612366, "learning_rate": 0.00010835708927231809, "loss": 0.4783, "step": 9167 }, { "epoch": 0.6797657003039964, "grad_norm": 0.3538111448287964, "learning_rate": 0.00010834708677169292, "loss": 0.49, "step": 9168 }, { "epoch": 0.6798398457774153, "grad_norm": 0.3765849471092224, "learning_rate": 0.00010833708427106777, "loss": 0.5233, "step": 9169 }, { "epoch": 0.6799139912508342, "grad_norm": 0.335933119058609, "learning_rate": 0.00010832708177044261, "loss": 0.4792, "step": 9170 }, { "epoch": 0.679988136724253, "grad_norm": 0.3555384576320648, "learning_rate": 0.00010831707926981746, "loss": 0.496, "step": 9171 }, { "epoch": 0.6800622821976718, "grad_norm": 0.37154683470726013, "learning_rate": 0.0001083070767691923, "loss": 0.524, "step": 9172 }, { "epoch": 0.6801364276710907, "grad_norm": 0.3632809519767761, "learning_rate": 0.00010829707426856715, "loss": 0.5127, "step": 9173 }, { "epoch": 0.6802105731445095, "grad_norm": 0.36321181058883667, "learning_rate": 0.000108287071767942, "loss": 0.482, "step": 9174 }, { "epoch": 0.6802847186179284, "grad_norm": 0.3633822500705719, "learning_rate": 0.00010827706926731684, "loss": 0.4836, "step": 9175 }, { "epoch": 0.6803588640913473, "grad_norm": 0.35230663418769836, "learning_rate": 0.00010826706676669167, "loss": 0.4976, "step": 9176 }, { "epoch": 0.680433009564766, "grad_norm": 0.3777303695678711, "learning_rate": 0.00010825706426606653, "loss": 0.5018, "step": 9177 }, { "epoch": 0.6805071550381849, "grad_norm": 0.34802699089050293, "learning_rate": 0.00010824706176544138, "loss": 0.4579, "step": 9178 }, { "epoch": 0.6805813005116038, "grad_norm": 0.3538070321083069, "learning_rate": 0.0001082370592648162, "loss": 0.5007, "step": 9179 }, { "epoch": 0.6806554459850226, "grad_norm": 0.40097370743751526, "learning_rate": 0.00010822705676419105, "loss": 0.5592, "step": 9180 }, { "epoch": 0.6807295914584415, "grad_norm": 0.3623118996620178, "learning_rate": 0.0001082170542635659, "loss": 0.5319, "step": 9181 }, { "epoch": 0.6808037369318604, "grad_norm": 0.3637049198150635, "learning_rate": 0.00010820705176294075, "loss": 0.4894, "step": 9182 }, { "epoch": 0.6808778824052791, "grad_norm": 0.3793320059776306, "learning_rate": 0.00010819704926231558, "loss": 0.534, "step": 9183 }, { "epoch": 0.680952027878698, "grad_norm": 0.35941144824028015, "learning_rate": 0.00010818704676169043, "loss": 0.5074, "step": 9184 }, { "epoch": 0.6810261733521169, "grad_norm": 0.3670473098754883, "learning_rate": 0.00010817704426106528, "loss": 0.5488, "step": 9185 }, { "epoch": 0.6811003188255357, "grad_norm": 0.3522094190120697, "learning_rate": 0.00010816704176044013, "loss": 0.4917, "step": 9186 }, { "epoch": 0.6811744642989546, "grad_norm": 0.36100292205810547, "learning_rate": 0.00010815703925981495, "loss": 0.5309, "step": 9187 }, { "epoch": 0.6812486097723734, "grad_norm": 0.38115543127059937, "learning_rate": 0.0001081470367591898, "loss": 0.5122, "step": 9188 }, { "epoch": 0.6813227552457922, "grad_norm": 0.36364415287971497, "learning_rate": 0.00010813703425856465, "loss": 0.4837, "step": 9189 }, { "epoch": 0.6813969007192111, "grad_norm": 0.34107062220573425, "learning_rate": 0.0001081270317579395, "loss": 0.4747, "step": 9190 }, { "epoch": 0.68147104619263, "grad_norm": 0.35684090852737427, "learning_rate": 0.00010811702925731433, "loss": 0.483, "step": 9191 }, { "epoch": 0.6815451916660488, "grad_norm": 0.36546823382377625, "learning_rate": 0.00010810702675668918, "loss": 0.4833, "step": 9192 }, { "epoch": 0.6816193371394677, "grad_norm": 0.34610074758529663, "learning_rate": 0.00010809702425606403, "loss": 0.4873, "step": 9193 }, { "epoch": 0.6816934826128865, "grad_norm": 0.35289332270622253, "learning_rate": 0.00010808702175543885, "loss": 0.4903, "step": 9194 }, { "epoch": 0.6817676280863053, "grad_norm": 0.3712863326072693, "learning_rate": 0.0001080770192548137, "loss": 0.524, "step": 9195 }, { "epoch": 0.6818417735597242, "grad_norm": 0.37310224771499634, "learning_rate": 0.00010806701675418855, "loss": 0.5043, "step": 9196 }, { "epoch": 0.6819159190331431, "grad_norm": 0.38445162773132324, "learning_rate": 0.0001080570142535634, "loss": 0.5183, "step": 9197 }, { "epoch": 0.6819900645065619, "grad_norm": 0.36475467681884766, "learning_rate": 0.00010804701175293823, "loss": 0.4981, "step": 9198 }, { "epoch": 0.6820642099799807, "grad_norm": 0.3706536591053009, "learning_rate": 0.00010803700925231308, "loss": 0.5207, "step": 9199 }, { "epoch": 0.6821383554533996, "grad_norm": 0.35903701186180115, "learning_rate": 0.00010802700675168793, "loss": 0.5097, "step": 9200 }, { "epoch": 0.6822125009268184, "grad_norm": 0.37340009212493896, "learning_rate": 0.00010801700425106278, "loss": 0.4971, "step": 9201 }, { "epoch": 0.6822866464002373, "grad_norm": 0.35680827498435974, "learning_rate": 0.0001080070017504376, "loss": 0.5024, "step": 9202 }, { "epoch": 0.6823607918736561, "grad_norm": 0.3542453944683075, "learning_rate": 0.00010799699924981245, "loss": 0.5157, "step": 9203 }, { "epoch": 0.6824349373470749, "grad_norm": 0.3901200592517853, "learning_rate": 0.0001079869967491873, "loss": 0.5209, "step": 9204 }, { "epoch": 0.6825090828204938, "grad_norm": 0.382449746131897, "learning_rate": 0.00010797699424856214, "loss": 0.491, "step": 9205 }, { "epoch": 0.6825832282939126, "grad_norm": 0.38026881217956543, "learning_rate": 0.00010796699174793699, "loss": 0.5331, "step": 9206 }, { "epoch": 0.6826573737673315, "grad_norm": 0.3441128134727478, "learning_rate": 0.00010795698924731184, "loss": 0.4968, "step": 9207 }, { "epoch": 0.6827315192407504, "grad_norm": 0.3414430022239685, "learning_rate": 0.00010794698674668668, "loss": 0.5123, "step": 9208 }, { "epoch": 0.6828056647141691, "grad_norm": 0.35674548149108887, "learning_rate": 0.00010793698424606152, "loss": 0.518, "step": 9209 }, { "epoch": 0.682879810187588, "grad_norm": 0.352111279964447, "learning_rate": 0.00010792698174543637, "loss": 0.5064, "step": 9210 }, { "epoch": 0.6829539556610069, "grad_norm": 0.366971880197525, "learning_rate": 0.00010791697924481122, "loss": 0.5373, "step": 9211 }, { "epoch": 0.6830281011344257, "grad_norm": 0.37707963585853577, "learning_rate": 0.00010790697674418607, "loss": 0.5217, "step": 9212 }, { "epoch": 0.6831022466078446, "grad_norm": 0.3649729788303375, "learning_rate": 0.00010789697424356089, "loss": 0.5147, "step": 9213 }, { "epoch": 0.6831763920812635, "grad_norm": 0.345339298248291, "learning_rate": 0.00010788697174293574, "loss": 0.4974, "step": 9214 }, { "epoch": 0.6832505375546822, "grad_norm": 0.3786914646625519, "learning_rate": 0.00010787696924231059, "loss": 0.5122, "step": 9215 }, { "epoch": 0.6833246830281011, "grad_norm": 0.3458319306373596, "learning_rate": 0.00010786696674168542, "loss": 0.5065, "step": 9216 }, { "epoch": 0.68339882850152, "grad_norm": 0.3482508659362793, "learning_rate": 0.00010785696424106027, "loss": 0.4801, "step": 9217 }, { "epoch": 0.6834729739749388, "grad_norm": 0.3632117509841919, "learning_rate": 0.00010784696174043512, "loss": 0.5284, "step": 9218 }, { "epoch": 0.6835471194483577, "grad_norm": 0.3562195301055908, "learning_rate": 0.00010783695923980997, "loss": 0.4927, "step": 9219 }, { "epoch": 0.6836212649217766, "grad_norm": 0.33013251423835754, "learning_rate": 0.00010782695673918479, "loss": 0.4476, "step": 9220 }, { "epoch": 0.6836954103951953, "grad_norm": 0.33893105387687683, "learning_rate": 0.00010781695423855964, "loss": 0.4798, "step": 9221 }, { "epoch": 0.6837695558686142, "grad_norm": 0.35933586955070496, "learning_rate": 0.00010780695173793449, "loss": 0.4752, "step": 9222 }, { "epoch": 0.6838437013420331, "grad_norm": 0.36498960852622986, "learning_rate": 0.00010779694923730934, "loss": 0.4987, "step": 9223 }, { "epoch": 0.6839178468154519, "grad_norm": 0.3674136698246002, "learning_rate": 0.00010778694673668417, "loss": 0.5206, "step": 9224 }, { "epoch": 0.6839919922888708, "grad_norm": 0.3984517455101013, "learning_rate": 0.00010777694423605902, "loss": 0.5288, "step": 9225 }, { "epoch": 0.6840661377622896, "grad_norm": 0.3780442774295807, "learning_rate": 0.00010776694173543387, "loss": 0.4998, "step": 9226 }, { "epoch": 0.6841402832357084, "grad_norm": 0.3819374740123749, "learning_rate": 0.00010775693923480872, "loss": 0.5319, "step": 9227 }, { "epoch": 0.6842144287091273, "grad_norm": 0.38054460287094116, "learning_rate": 0.00010774693673418354, "loss": 0.5301, "step": 9228 }, { "epoch": 0.6842885741825462, "grad_norm": 0.3589634299278259, "learning_rate": 0.00010773693423355839, "loss": 0.5121, "step": 9229 }, { "epoch": 0.684362719655965, "grad_norm": 0.34740978479385376, "learning_rate": 0.00010772693173293324, "loss": 0.4783, "step": 9230 }, { "epoch": 0.6844368651293838, "grad_norm": 0.38168010115623474, "learning_rate": 0.00010771692923230807, "loss": 0.5177, "step": 9231 }, { "epoch": 0.6845110106028027, "grad_norm": 0.36121031641960144, "learning_rate": 0.00010770692673168292, "loss": 0.4732, "step": 9232 }, { "epoch": 0.6845851560762215, "grad_norm": 0.3690917491912842, "learning_rate": 0.00010769692423105777, "loss": 0.495, "step": 9233 }, { "epoch": 0.6846593015496404, "grad_norm": 0.37076491117477417, "learning_rate": 0.00010768692173043262, "loss": 0.5254, "step": 9234 }, { "epoch": 0.6847334470230593, "grad_norm": 0.3518432378768921, "learning_rate": 0.00010767691922980744, "loss": 0.4912, "step": 9235 }, { "epoch": 0.684807592496478, "grad_norm": 0.3483612835407257, "learning_rate": 0.0001076669167291823, "loss": 0.5091, "step": 9236 }, { "epoch": 0.6848817379698969, "grad_norm": 0.34950873255729675, "learning_rate": 0.00010765691422855714, "loss": 0.4755, "step": 9237 }, { "epoch": 0.6849558834433158, "grad_norm": 0.3497231602668762, "learning_rate": 0.000107646911727932, "loss": 0.5014, "step": 9238 }, { "epoch": 0.6850300289167346, "grad_norm": 0.3797464370727539, "learning_rate": 0.00010763690922730683, "loss": 0.5293, "step": 9239 }, { "epoch": 0.6851041743901535, "grad_norm": 0.36789801716804504, "learning_rate": 0.00010762690672668167, "loss": 0.51, "step": 9240 }, { "epoch": 0.6851783198635724, "grad_norm": 0.33882808685302734, "learning_rate": 0.00010761690422605652, "loss": 0.4658, "step": 9241 }, { "epoch": 0.6852524653369911, "grad_norm": 0.33374476432800293, "learning_rate": 0.00010760690172543136, "loss": 0.4661, "step": 9242 }, { "epoch": 0.68532661081041, "grad_norm": 0.3598877489566803, "learning_rate": 0.00010759689922480621, "loss": 0.5179, "step": 9243 }, { "epoch": 0.6854007562838289, "grad_norm": 0.3575194180011749, "learning_rate": 0.00010758689672418106, "loss": 0.5489, "step": 9244 }, { "epoch": 0.6854749017572477, "grad_norm": 0.34032219648361206, "learning_rate": 0.00010757689422355591, "loss": 0.4923, "step": 9245 }, { "epoch": 0.6855490472306666, "grad_norm": 0.3329140841960907, "learning_rate": 0.00010756689172293073, "loss": 0.4561, "step": 9246 }, { "epoch": 0.6856231927040854, "grad_norm": 0.3608163595199585, "learning_rate": 0.00010755688922230558, "loss": 0.5099, "step": 9247 }, { "epoch": 0.6856973381775042, "grad_norm": 0.34667232632637024, "learning_rate": 0.00010754688672168043, "loss": 0.4772, "step": 9248 }, { "epoch": 0.6857714836509231, "grad_norm": 0.3725939393043518, "learning_rate": 0.00010753688422105528, "loss": 0.5208, "step": 9249 }, { "epoch": 0.685845629124342, "grad_norm": 0.35860759019851685, "learning_rate": 0.00010752688172043011, "loss": 0.5118, "step": 9250 }, { "epoch": 0.6859197745977608, "grad_norm": 0.3757672905921936, "learning_rate": 0.00010751687921980496, "loss": 0.5002, "step": 9251 }, { "epoch": 0.6859939200711797, "grad_norm": 0.3840903043746948, "learning_rate": 0.00010750687671917981, "loss": 0.5101, "step": 9252 }, { "epoch": 0.6860680655445985, "grad_norm": 0.3734789490699768, "learning_rate": 0.00010749687421855463, "loss": 0.5071, "step": 9253 }, { "epoch": 0.6861422110180173, "grad_norm": 0.35235726833343506, "learning_rate": 0.00010748687171792948, "loss": 0.4883, "step": 9254 }, { "epoch": 0.6862163564914362, "grad_norm": 0.38421404361724854, "learning_rate": 0.00010747686921730433, "loss": 0.519, "step": 9255 }, { "epoch": 0.6862905019648551, "grad_norm": 0.3685387670993805, "learning_rate": 0.00010746686671667918, "loss": 0.5226, "step": 9256 }, { "epoch": 0.6863646474382739, "grad_norm": 0.3392428457736969, "learning_rate": 0.00010745686421605401, "loss": 0.473, "step": 9257 }, { "epoch": 0.6864387929116927, "grad_norm": 0.3315616548061371, "learning_rate": 0.00010744686171542886, "loss": 0.4639, "step": 9258 }, { "epoch": 0.6865129383851116, "grad_norm": 0.367724746465683, "learning_rate": 0.00010743685921480371, "loss": 0.5111, "step": 9259 }, { "epoch": 0.6865870838585304, "grad_norm": 0.3720313012599945, "learning_rate": 0.00010742685671417856, "loss": 0.538, "step": 9260 }, { "epoch": 0.6866612293319493, "grad_norm": 0.3603943884372711, "learning_rate": 0.00010741685421355338, "loss": 0.4942, "step": 9261 }, { "epoch": 0.6867353748053682, "grad_norm": 0.3605147898197174, "learning_rate": 0.00010740685171292823, "loss": 0.5364, "step": 9262 }, { "epoch": 0.686809520278787, "grad_norm": 0.3380812704563141, "learning_rate": 0.00010739684921230308, "loss": 0.4711, "step": 9263 }, { "epoch": 0.6868836657522058, "grad_norm": 0.3438034951686859, "learning_rate": 0.00010738684671167794, "loss": 0.4665, "step": 9264 }, { "epoch": 0.6869578112256247, "grad_norm": 0.34968602657318115, "learning_rate": 0.00010737684421105276, "loss": 0.4807, "step": 9265 }, { "epoch": 0.6870319566990435, "grad_norm": 0.35059401392936707, "learning_rate": 0.00010736684171042761, "loss": 0.4998, "step": 9266 }, { "epoch": 0.6871061021724624, "grad_norm": 0.3617478907108307, "learning_rate": 0.00010735683920980246, "loss": 0.5456, "step": 9267 }, { "epoch": 0.6871802476458813, "grad_norm": 0.3609112501144409, "learning_rate": 0.00010734683670917728, "loss": 0.505, "step": 9268 }, { "epoch": 0.6872543931193, "grad_norm": 0.3573351800441742, "learning_rate": 0.00010733683420855213, "loss": 0.4785, "step": 9269 }, { "epoch": 0.6873285385927189, "grad_norm": 0.3306906819343567, "learning_rate": 0.00010732683170792698, "loss": 0.4499, "step": 9270 }, { "epoch": 0.6874026840661378, "grad_norm": 0.3866124153137207, "learning_rate": 0.00010731682920730184, "loss": 0.5439, "step": 9271 }, { "epoch": 0.6874768295395566, "grad_norm": 0.37422508001327515, "learning_rate": 0.00010730682670667667, "loss": 0.5112, "step": 9272 }, { "epoch": 0.6875509750129755, "grad_norm": 0.323772132396698, "learning_rate": 0.00010729682420605151, "loss": 0.462, "step": 9273 }, { "epoch": 0.6876251204863943, "grad_norm": 0.3704039454460144, "learning_rate": 0.00010728682170542636, "loss": 0.5223, "step": 9274 }, { "epoch": 0.6876992659598131, "grad_norm": 0.3517409563064575, "learning_rate": 0.00010727681920480121, "loss": 0.4923, "step": 9275 }, { "epoch": 0.687773411433232, "grad_norm": 0.3520941436290741, "learning_rate": 0.00010726681670417605, "loss": 0.5151, "step": 9276 }, { "epoch": 0.6878475569066509, "grad_norm": 0.3656076490879059, "learning_rate": 0.0001072568142035509, "loss": 0.519, "step": 9277 }, { "epoch": 0.6879217023800697, "grad_norm": 0.3662492334842682, "learning_rate": 0.00010724681170292574, "loss": 0.5358, "step": 9278 }, { "epoch": 0.6879958478534886, "grad_norm": 0.3615677058696747, "learning_rate": 0.00010723680920230057, "loss": 0.522, "step": 9279 }, { "epoch": 0.6880699933269074, "grad_norm": 0.3319123387336731, "learning_rate": 0.00010722680670167542, "loss": 0.4554, "step": 9280 }, { "epoch": 0.6881441388003262, "grad_norm": 0.3619000017642975, "learning_rate": 0.00010721680420105027, "loss": 0.5105, "step": 9281 }, { "epoch": 0.6882182842737451, "grad_norm": 0.3368958830833435, "learning_rate": 0.00010720680170042512, "loss": 0.471, "step": 9282 }, { "epoch": 0.688292429747164, "grad_norm": 0.3339928090572357, "learning_rate": 0.00010719679919979995, "loss": 0.4733, "step": 9283 }, { "epoch": 0.6883665752205828, "grad_norm": 0.3526519536972046, "learning_rate": 0.0001071867966991748, "loss": 0.5155, "step": 9284 }, { "epoch": 0.6884407206940016, "grad_norm": 0.36571836471557617, "learning_rate": 0.00010717679419854965, "loss": 0.512, "step": 9285 }, { "epoch": 0.6885148661674205, "grad_norm": 0.3720945417881012, "learning_rate": 0.0001071667916979245, "loss": 0.5202, "step": 9286 }, { "epoch": 0.6885890116408393, "grad_norm": 0.3574332892894745, "learning_rate": 0.00010715678919729932, "loss": 0.5065, "step": 9287 }, { "epoch": 0.6886631571142582, "grad_norm": 0.36857372522354126, "learning_rate": 0.00010714678669667417, "loss": 0.5058, "step": 9288 }, { "epoch": 0.6887373025876771, "grad_norm": 0.3467526435852051, "learning_rate": 0.00010713678419604902, "loss": 0.489, "step": 9289 }, { "epoch": 0.6888114480610958, "grad_norm": 0.33275219798088074, "learning_rate": 0.00010712678169542385, "loss": 0.4693, "step": 9290 }, { "epoch": 0.6888855935345147, "grad_norm": 0.34469977021217346, "learning_rate": 0.0001071167791947987, "loss": 0.4709, "step": 9291 }, { "epoch": 0.6889597390079336, "grad_norm": 0.3711187243461609, "learning_rate": 0.00010710677669417355, "loss": 0.4863, "step": 9292 }, { "epoch": 0.6890338844813524, "grad_norm": 0.36615189909935, "learning_rate": 0.0001070967741935484, "loss": 0.5111, "step": 9293 }, { "epoch": 0.6891080299547713, "grad_norm": 0.3676312267780304, "learning_rate": 0.00010708677169292322, "loss": 0.5219, "step": 9294 }, { "epoch": 0.6891821754281902, "grad_norm": 0.37067490816116333, "learning_rate": 0.00010707676919229807, "loss": 0.5112, "step": 9295 }, { "epoch": 0.6892563209016089, "grad_norm": 0.36140504479408264, "learning_rate": 0.00010706676669167293, "loss": 0.4801, "step": 9296 }, { "epoch": 0.6893304663750278, "grad_norm": 0.3691214919090271, "learning_rate": 0.00010705676419104778, "loss": 0.5201, "step": 9297 }, { "epoch": 0.6894046118484467, "grad_norm": 0.3533732295036316, "learning_rate": 0.0001070467616904226, "loss": 0.5095, "step": 9298 }, { "epoch": 0.6894787573218655, "grad_norm": 0.351249098777771, "learning_rate": 0.00010703675918979745, "loss": 0.5088, "step": 9299 }, { "epoch": 0.6895529027952844, "grad_norm": 0.360445499420166, "learning_rate": 0.0001070267566891723, "loss": 0.5033, "step": 9300 }, { "epoch": 0.6896270482687032, "grad_norm": 0.34714582562446594, "learning_rate": 0.00010701675418854715, "loss": 0.4857, "step": 9301 }, { "epoch": 0.689701193742122, "grad_norm": 0.3677225410938263, "learning_rate": 0.00010700675168792197, "loss": 0.4854, "step": 9302 }, { "epoch": 0.6897753392155409, "grad_norm": 0.3741621673107147, "learning_rate": 0.00010699674918729683, "loss": 0.4634, "step": 9303 }, { "epoch": 0.6898494846889598, "grad_norm": 0.34378817677497864, "learning_rate": 0.00010698674668667168, "loss": 0.4895, "step": 9304 }, { "epoch": 0.6899236301623786, "grad_norm": 0.37091344594955444, "learning_rate": 0.00010697674418604651, "loss": 0.497, "step": 9305 }, { "epoch": 0.6899977756357975, "grad_norm": 0.36653581261634827, "learning_rate": 0.00010696674168542135, "loss": 0.5104, "step": 9306 }, { "epoch": 0.6900719211092163, "grad_norm": 0.33817213773727417, "learning_rate": 0.0001069567391847962, "loss": 0.4534, "step": 9307 }, { "epoch": 0.6901460665826351, "grad_norm": 0.366605281829834, "learning_rate": 0.00010694673668417105, "loss": 0.4911, "step": 9308 }, { "epoch": 0.690220212056054, "grad_norm": 0.36557960510253906, "learning_rate": 0.00010693673418354589, "loss": 0.4904, "step": 9309 }, { "epoch": 0.6902943575294729, "grad_norm": 0.35840100049972534, "learning_rate": 0.00010692673168292074, "loss": 0.4955, "step": 9310 }, { "epoch": 0.6903685030028917, "grad_norm": 0.3704025149345398, "learning_rate": 0.00010691672918229558, "loss": 0.5076, "step": 9311 }, { "epoch": 0.6904426484763105, "grad_norm": 0.36758020520210266, "learning_rate": 0.00010690672668167043, "loss": 0.5254, "step": 9312 }, { "epoch": 0.6905167939497294, "grad_norm": 0.3445869982242584, "learning_rate": 0.00010689672418104526, "loss": 0.517, "step": 9313 }, { "epoch": 0.6905909394231482, "grad_norm": 0.3474668264389038, "learning_rate": 0.00010688672168042011, "loss": 0.4988, "step": 9314 }, { "epoch": 0.6906650848965671, "grad_norm": 0.37121161818504333, "learning_rate": 0.00010687671917979497, "loss": 0.4869, "step": 9315 }, { "epoch": 0.6907392303699859, "grad_norm": 0.3310870826244354, "learning_rate": 0.00010686671667916979, "loss": 0.4293, "step": 9316 }, { "epoch": 0.6908133758434047, "grad_norm": 0.32344377040863037, "learning_rate": 0.00010685671417854464, "loss": 0.4186, "step": 9317 }, { "epoch": 0.6908875213168236, "grad_norm": 0.3452662527561188, "learning_rate": 0.00010684671167791949, "loss": 0.4581, "step": 9318 }, { "epoch": 0.6909616667902424, "grad_norm": 0.35780996084213257, "learning_rate": 0.00010683670917729434, "loss": 0.4747, "step": 9319 }, { "epoch": 0.6910358122636613, "grad_norm": 0.35826829075813293, "learning_rate": 0.00010682670667666916, "loss": 0.4636, "step": 9320 }, { "epoch": 0.6911099577370802, "grad_norm": 0.35708728432655334, "learning_rate": 0.00010681670417604402, "loss": 0.4982, "step": 9321 }, { "epoch": 0.691184103210499, "grad_norm": 0.3604728877544403, "learning_rate": 0.00010680670167541887, "loss": 0.4866, "step": 9322 }, { "epoch": 0.6912582486839178, "grad_norm": 0.36095258593559265, "learning_rate": 0.00010679669917479372, "loss": 0.4908, "step": 9323 }, { "epoch": 0.6913323941573367, "grad_norm": 0.37223997712135315, "learning_rate": 0.00010678669667416854, "loss": 0.4981, "step": 9324 }, { "epoch": 0.6914065396307555, "grad_norm": 0.3479596674442291, "learning_rate": 0.00010677669417354339, "loss": 0.477, "step": 9325 }, { "epoch": 0.6914806851041744, "grad_norm": 0.35991963744163513, "learning_rate": 0.00010676669167291824, "loss": 0.4948, "step": 9326 }, { "epoch": 0.6915548305775933, "grad_norm": 0.36958155035972595, "learning_rate": 0.00010675668917229306, "loss": 0.5078, "step": 9327 }, { "epoch": 0.691628976051012, "grad_norm": 0.3629721403121948, "learning_rate": 0.00010674668667166792, "loss": 0.4829, "step": 9328 }, { "epoch": 0.6917031215244309, "grad_norm": 0.3812195360660553, "learning_rate": 0.00010673668417104277, "loss": 0.5463, "step": 9329 }, { "epoch": 0.6917772669978498, "grad_norm": 0.3444578945636749, "learning_rate": 0.00010672668167041762, "loss": 0.4963, "step": 9330 }, { "epoch": 0.6918514124712686, "grad_norm": 0.37056684494018555, "learning_rate": 0.00010671667916979244, "loss": 0.493, "step": 9331 }, { "epoch": 0.6919255579446875, "grad_norm": 0.3784717619419098, "learning_rate": 0.00010670667666916729, "loss": 0.5003, "step": 9332 }, { "epoch": 0.6919997034181063, "grad_norm": 0.36054301261901855, "learning_rate": 0.00010669667416854214, "loss": 0.5115, "step": 9333 }, { "epoch": 0.6920738488915251, "grad_norm": 0.3712099492549896, "learning_rate": 0.00010668667166791699, "loss": 0.5139, "step": 9334 }, { "epoch": 0.692147994364944, "grad_norm": 0.3389476239681244, "learning_rate": 0.00010667666916729182, "loss": 0.477, "step": 9335 }, { "epoch": 0.6922221398383629, "grad_norm": 0.37984538078308105, "learning_rate": 0.00010666666666666667, "loss": 0.4899, "step": 9336 }, { "epoch": 0.6922962853117817, "grad_norm": 0.3693445026874542, "learning_rate": 0.00010665666416604152, "loss": 0.4949, "step": 9337 }, { "epoch": 0.6923704307852006, "grad_norm": 0.3659782111644745, "learning_rate": 0.00010664666166541637, "loss": 0.4896, "step": 9338 }, { "epoch": 0.6924445762586194, "grad_norm": 0.3628120720386505, "learning_rate": 0.00010663665916479119, "loss": 0.5108, "step": 9339 }, { "epoch": 0.6925187217320382, "grad_norm": 0.37714821100234985, "learning_rate": 0.00010662665666416604, "loss": 0.5531, "step": 9340 }, { "epoch": 0.6925928672054571, "grad_norm": 0.35284191370010376, "learning_rate": 0.00010661665416354089, "loss": 0.498, "step": 9341 }, { "epoch": 0.692667012678876, "grad_norm": 0.35044705867767334, "learning_rate": 0.00010660665166291573, "loss": 0.4822, "step": 9342 }, { "epoch": 0.6927411581522948, "grad_norm": 0.3524501621723175, "learning_rate": 0.00010659664916229058, "loss": 0.5086, "step": 9343 }, { "epoch": 0.6928153036257136, "grad_norm": 0.33459222316741943, "learning_rate": 0.00010658664666166542, "loss": 0.4892, "step": 9344 }, { "epoch": 0.6928894490991325, "grad_norm": 0.3679102957248688, "learning_rate": 0.00010657664416104027, "loss": 0.4763, "step": 9345 }, { "epoch": 0.6929635945725513, "grad_norm": 0.3475196361541748, "learning_rate": 0.0001065666416604151, "loss": 0.47, "step": 9346 }, { "epoch": 0.6930377400459702, "grad_norm": 0.35895493626594543, "learning_rate": 0.00010655663915978996, "loss": 0.5062, "step": 9347 }, { "epoch": 0.6931118855193891, "grad_norm": 0.3510485589504242, "learning_rate": 0.0001065466366591648, "loss": 0.4667, "step": 9348 }, { "epoch": 0.6931860309928078, "grad_norm": 0.3462931513786316, "learning_rate": 0.00010653663415853964, "loss": 0.4859, "step": 9349 }, { "epoch": 0.6932601764662267, "grad_norm": 0.3468218445777893, "learning_rate": 0.00010652663165791448, "loss": 0.478, "step": 9350 }, { "epoch": 0.6933343219396456, "grad_norm": 0.3605404496192932, "learning_rate": 0.00010651662915728933, "loss": 0.4935, "step": 9351 }, { "epoch": 0.6934084674130644, "grad_norm": 0.3440820574760437, "learning_rate": 0.00010650662665666418, "loss": 0.5049, "step": 9352 }, { "epoch": 0.6934826128864833, "grad_norm": 0.369995653629303, "learning_rate": 0.000106496624156039, "loss": 0.508, "step": 9353 }, { "epoch": 0.6935567583599022, "grad_norm": 0.36028510332107544, "learning_rate": 0.00010648662165541386, "loss": 0.4854, "step": 9354 }, { "epoch": 0.6936309038333209, "grad_norm": 0.37012407183647156, "learning_rate": 0.0001064766191547887, "loss": 0.5079, "step": 9355 }, { "epoch": 0.6937050493067398, "grad_norm": 0.38182270526885986, "learning_rate": 0.00010646661665416356, "loss": 0.5382, "step": 9356 }, { "epoch": 0.6937791947801587, "grad_norm": 0.37190186977386475, "learning_rate": 0.00010645661415353838, "loss": 0.5279, "step": 9357 }, { "epoch": 0.6938533402535775, "grad_norm": 0.4000013768672943, "learning_rate": 0.00010644661165291323, "loss": 0.5414, "step": 9358 }, { "epoch": 0.6939274857269964, "grad_norm": 0.35855892300605774, "learning_rate": 0.00010643660915228808, "loss": 0.4734, "step": 9359 }, { "epoch": 0.6940016312004152, "grad_norm": 0.35305073857307434, "learning_rate": 0.00010642660665166293, "loss": 0.4962, "step": 9360 }, { "epoch": 0.694075776673834, "grad_norm": 0.37260106205940247, "learning_rate": 0.00010641660415103776, "loss": 0.5305, "step": 9361 }, { "epoch": 0.6941499221472529, "grad_norm": 0.33141836524009705, "learning_rate": 0.00010640660165041261, "loss": 0.4544, "step": 9362 }, { "epoch": 0.6942240676206718, "grad_norm": 0.35745999217033386, "learning_rate": 0.00010639659914978746, "loss": 0.4986, "step": 9363 }, { "epoch": 0.6942982130940906, "grad_norm": 0.3440180718898773, "learning_rate": 0.00010638659664916231, "loss": 0.4707, "step": 9364 }, { "epoch": 0.6943723585675095, "grad_norm": 0.3762041926383972, "learning_rate": 0.00010637659414853713, "loss": 0.5293, "step": 9365 }, { "epoch": 0.6944465040409283, "grad_norm": 0.3658784031867981, "learning_rate": 0.00010636659164791198, "loss": 0.4997, "step": 9366 }, { "epoch": 0.6945206495143471, "grad_norm": 0.3709651529788971, "learning_rate": 0.00010635658914728683, "loss": 0.5325, "step": 9367 }, { "epoch": 0.694594794987766, "grad_norm": 0.35699087381362915, "learning_rate": 0.00010634658664666166, "loss": 0.5169, "step": 9368 }, { "epoch": 0.6946689404611849, "grad_norm": 0.34975332021713257, "learning_rate": 0.00010633658414603651, "loss": 0.4654, "step": 9369 }, { "epoch": 0.6947430859346037, "grad_norm": 0.3629875183105469, "learning_rate": 0.00010632658164541136, "loss": 0.5007, "step": 9370 }, { "epoch": 0.6948172314080225, "grad_norm": 0.3618191182613373, "learning_rate": 0.00010631657914478621, "loss": 0.5299, "step": 9371 }, { "epoch": 0.6948913768814414, "grad_norm": 0.3618724048137665, "learning_rate": 0.00010630657664416103, "loss": 0.4977, "step": 9372 }, { "epoch": 0.6949655223548602, "grad_norm": 0.37242770195007324, "learning_rate": 0.00010629657414353588, "loss": 0.5387, "step": 9373 }, { "epoch": 0.6950396678282791, "grad_norm": 0.3344036638736725, "learning_rate": 0.00010628657164291073, "loss": 0.4747, "step": 9374 }, { "epoch": 0.695113813301698, "grad_norm": 0.40613943338394165, "learning_rate": 0.00010627656914228558, "loss": 0.5343, "step": 9375 }, { "epoch": 0.6951879587751167, "grad_norm": 0.3688651919364929, "learning_rate": 0.00010626656664166042, "loss": 0.5199, "step": 9376 }, { "epoch": 0.6952621042485356, "grad_norm": 0.3714115023612976, "learning_rate": 0.00010625656414103526, "loss": 0.5316, "step": 9377 }, { "epoch": 0.6953362497219545, "grad_norm": 0.36308324337005615, "learning_rate": 0.00010624656164041011, "loss": 0.4774, "step": 9378 }, { "epoch": 0.6954103951953733, "grad_norm": 0.3583947718143463, "learning_rate": 0.00010623655913978495, "loss": 0.4692, "step": 9379 }, { "epoch": 0.6954845406687922, "grad_norm": 0.3998671770095825, "learning_rate": 0.0001062265566391598, "loss": 0.4738, "step": 9380 }, { "epoch": 0.6955586861422111, "grad_norm": 0.35064035654067993, "learning_rate": 0.00010621655413853465, "loss": 0.4803, "step": 9381 }, { "epoch": 0.6956328316156298, "grad_norm": 0.35875964164733887, "learning_rate": 0.00010620655163790948, "loss": 0.5064, "step": 9382 }, { "epoch": 0.6957069770890487, "grad_norm": 0.337406724691391, "learning_rate": 0.00010619654913728432, "loss": 0.4772, "step": 9383 }, { "epoch": 0.6957811225624676, "grad_norm": 0.3625706434249878, "learning_rate": 0.00010618654663665917, "loss": 0.5, "step": 9384 }, { "epoch": 0.6958552680358864, "grad_norm": 0.36380577087402344, "learning_rate": 0.00010617654413603402, "loss": 0.5181, "step": 9385 }, { "epoch": 0.6959294135093053, "grad_norm": 0.38619109988212585, "learning_rate": 0.00010616654163540887, "loss": 0.5634, "step": 9386 }, { "epoch": 0.6960035589827241, "grad_norm": 0.3565179705619812, "learning_rate": 0.0001061565391347837, "loss": 0.5069, "step": 9387 }, { "epoch": 0.6960777044561429, "grad_norm": 0.3574581444263458, "learning_rate": 0.00010614653663415855, "loss": 0.4813, "step": 9388 }, { "epoch": 0.6961518499295618, "grad_norm": 0.3764524459838867, "learning_rate": 0.0001061365341335334, "loss": 0.4977, "step": 9389 }, { "epoch": 0.6962259954029807, "grad_norm": 0.3683662712574005, "learning_rate": 0.00010612653163290822, "loss": 0.5048, "step": 9390 }, { "epoch": 0.6963001408763995, "grad_norm": 0.36142000555992126, "learning_rate": 0.00010611652913228307, "loss": 0.5322, "step": 9391 }, { "epoch": 0.6963742863498183, "grad_norm": 0.3507023751735687, "learning_rate": 0.00010610652663165792, "loss": 0.4957, "step": 9392 }, { "epoch": 0.6964484318232372, "grad_norm": 0.3567802608013153, "learning_rate": 0.00010609652413103277, "loss": 0.5057, "step": 9393 }, { "epoch": 0.696522577296656, "grad_norm": 0.38055220246315, "learning_rate": 0.0001060865216304076, "loss": 0.5396, "step": 9394 }, { "epoch": 0.6965967227700749, "grad_norm": 0.36683207750320435, "learning_rate": 0.00010607651912978245, "loss": 0.5194, "step": 9395 }, { "epoch": 0.6966708682434938, "grad_norm": 0.3772121071815491, "learning_rate": 0.0001060665166291573, "loss": 0.5487, "step": 9396 }, { "epoch": 0.6967450137169126, "grad_norm": 0.36499282717704773, "learning_rate": 0.00010605651412853215, "loss": 0.5311, "step": 9397 }, { "epoch": 0.6968191591903314, "grad_norm": 0.36509257555007935, "learning_rate": 0.00010604651162790697, "loss": 0.5152, "step": 9398 }, { "epoch": 0.6968933046637503, "grad_norm": 0.3853662610054016, "learning_rate": 0.00010603650912728182, "loss": 0.5028, "step": 9399 }, { "epoch": 0.6969674501371691, "grad_norm": 0.33600887656211853, "learning_rate": 0.00010602650662665667, "loss": 0.4456, "step": 9400 }, { "epoch": 0.697041595610588, "grad_norm": 0.3574877679347992, "learning_rate": 0.00010601650412603152, "loss": 0.4793, "step": 9401 }, { "epoch": 0.6971157410840069, "grad_norm": 0.3448794484138489, "learning_rate": 0.00010600650162540635, "loss": 0.4642, "step": 9402 }, { "epoch": 0.6971898865574256, "grad_norm": 0.39930328726768494, "learning_rate": 0.0001059964991247812, "loss": 0.5738, "step": 9403 }, { "epoch": 0.6972640320308445, "grad_norm": 0.3388984799385071, "learning_rate": 0.00010598649662415605, "loss": 0.4766, "step": 9404 }, { "epoch": 0.6973381775042634, "grad_norm": 0.3702084720134735, "learning_rate": 0.00010597649412353087, "loss": 0.5366, "step": 9405 }, { "epoch": 0.6974123229776822, "grad_norm": 0.3510676920413971, "learning_rate": 0.00010596649162290572, "loss": 0.4891, "step": 9406 }, { "epoch": 0.6974864684511011, "grad_norm": 0.33014360070228577, "learning_rate": 0.00010595648912228057, "loss": 0.4684, "step": 9407 }, { "epoch": 0.69756061392452, "grad_norm": 0.366705060005188, "learning_rate": 0.00010594648662165542, "loss": 0.5152, "step": 9408 }, { "epoch": 0.6976347593979387, "grad_norm": 0.33002758026123047, "learning_rate": 0.00010593648412103025, "loss": 0.4797, "step": 9409 }, { "epoch": 0.6977089048713576, "grad_norm": 0.3414531350135803, "learning_rate": 0.0001059264816204051, "loss": 0.4658, "step": 9410 }, { "epoch": 0.6977830503447765, "grad_norm": 0.38479575514793396, "learning_rate": 0.00010591647911977995, "loss": 0.5414, "step": 9411 }, { "epoch": 0.6978571958181953, "grad_norm": 0.360015332698822, "learning_rate": 0.0001059064766191548, "loss": 0.4949, "step": 9412 }, { "epoch": 0.6979313412916142, "grad_norm": 0.3674261271953583, "learning_rate": 0.00010589647411852964, "loss": 0.5216, "step": 9413 }, { "epoch": 0.698005486765033, "grad_norm": 0.38279417157173157, "learning_rate": 0.00010588647161790449, "loss": 0.5092, "step": 9414 }, { "epoch": 0.6980796322384518, "grad_norm": 0.35624897480010986, "learning_rate": 0.00010587646911727932, "loss": 0.4946, "step": 9415 }, { "epoch": 0.6981537777118707, "grad_norm": 0.3522964119911194, "learning_rate": 0.00010586646661665416, "loss": 0.5067, "step": 9416 }, { "epoch": 0.6982279231852896, "grad_norm": 0.3572034239768982, "learning_rate": 0.00010585646411602901, "loss": 0.5052, "step": 9417 }, { "epoch": 0.6983020686587084, "grad_norm": 0.3584262728691101, "learning_rate": 0.00010584646161540386, "loss": 0.4808, "step": 9418 }, { "epoch": 0.6983762141321272, "grad_norm": 0.3559655547142029, "learning_rate": 0.00010583645911477871, "loss": 0.5255, "step": 9419 }, { "epoch": 0.6984503596055461, "grad_norm": 0.34367135167121887, "learning_rate": 0.00010582645661415354, "loss": 0.4916, "step": 9420 }, { "epoch": 0.6985245050789649, "grad_norm": 0.3490225076675415, "learning_rate": 0.00010581645411352839, "loss": 0.4781, "step": 9421 }, { "epoch": 0.6985986505523838, "grad_norm": 0.36218592524528503, "learning_rate": 0.00010580645161290324, "loss": 0.4877, "step": 9422 }, { "epoch": 0.6986727960258027, "grad_norm": 0.3635188639163971, "learning_rate": 0.00010579644911227809, "loss": 0.5217, "step": 9423 }, { "epoch": 0.6987469414992215, "grad_norm": 0.3442160189151764, "learning_rate": 0.00010578644661165291, "loss": 0.4935, "step": 9424 }, { "epoch": 0.6988210869726403, "grad_norm": 0.3845890760421753, "learning_rate": 0.00010577644411102776, "loss": 0.4864, "step": 9425 }, { "epoch": 0.6988952324460592, "grad_norm": 0.3685743510723114, "learning_rate": 0.00010576644161040261, "loss": 0.5363, "step": 9426 }, { "epoch": 0.698969377919478, "grad_norm": 0.35901346802711487, "learning_rate": 0.00010575643910977744, "loss": 0.4824, "step": 9427 }, { "epoch": 0.6990435233928969, "grad_norm": 0.3702475130558014, "learning_rate": 0.00010574643660915229, "loss": 0.5251, "step": 9428 }, { "epoch": 0.6991176688663158, "grad_norm": 0.3544864356517792, "learning_rate": 0.00010573643410852714, "loss": 0.4931, "step": 9429 }, { "epoch": 0.6991918143397345, "grad_norm": 0.368115097284317, "learning_rate": 0.00010572643160790199, "loss": 0.5182, "step": 9430 }, { "epoch": 0.6992659598131534, "grad_norm": 0.3755926489830017, "learning_rate": 0.00010571642910727681, "loss": 0.5323, "step": 9431 }, { "epoch": 0.6993401052865722, "grad_norm": 0.34725940227508545, "learning_rate": 0.00010570642660665166, "loss": 0.4857, "step": 9432 }, { "epoch": 0.6994142507599911, "grad_norm": 0.3741472065448761, "learning_rate": 0.00010569642410602651, "loss": 0.4984, "step": 9433 }, { "epoch": 0.69948839623341, "grad_norm": 0.3734101951122284, "learning_rate": 0.00010568642160540137, "loss": 0.4919, "step": 9434 }, { "epoch": 0.6995625417068287, "grad_norm": 0.3472282886505127, "learning_rate": 0.00010567641910477619, "loss": 0.4824, "step": 9435 }, { "epoch": 0.6996366871802476, "grad_norm": 0.3830071687698364, "learning_rate": 0.00010566641660415104, "loss": 0.5251, "step": 9436 }, { "epoch": 0.6997108326536665, "grad_norm": 0.35192054510116577, "learning_rate": 0.00010565641410352589, "loss": 0.474, "step": 9437 }, { "epoch": 0.6997849781270853, "grad_norm": 0.3450991213321686, "learning_rate": 0.00010564641160290074, "loss": 0.4994, "step": 9438 }, { "epoch": 0.6998591236005042, "grad_norm": 0.357756644487381, "learning_rate": 0.00010563640910227556, "loss": 0.4849, "step": 9439 }, { "epoch": 0.6999332690739231, "grad_norm": 0.34770768880844116, "learning_rate": 0.00010562640660165041, "loss": 0.462, "step": 9440 }, { "epoch": 0.7000074145473418, "grad_norm": 0.36909908056259155, "learning_rate": 0.00010561640410102527, "loss": 0.5211, "step": 9441 }, { "epoch": 0.7000815600207607, "grad_norm": 0.3304181694984436, "learning_rate": 0.00010560640160040009, "loss": 0.4815, "step": 9442 }, { "epoch": 0.7001557054941796, "grad_norm": 0.36324307322502136, "learning_rate": 0.00010559639909977494, "loss": 0.5135, "step": 9443 }, { "epoch": 0.7002298509675984, "grad_norm": 0.35367313027381897, "learning_rate": 0.00010558639659914979, "loss": 0.5022, "step": 9444 }, { "epoch": 0.7003039964410173, "grad_norm": 0.3826049268245697, "learning_rate": 0.00010557639409852464, "loss": 0.5203, "step": 9445 }, { "epoch": 0.7003781419144361, "grad_norm": 0.37801697850227356, "learning_rate": 0.00010556639159789948, "loss": 0.5255, "step": 9446 }, { "epoch": 0.7004522873878549, "grad_norm": 0.33380475640296936, "learning_rate": 0.00010555638909727432, "loss": 0.4883, "step": 9447 }, { "epoch": 0.7005264328612738, "grad_norm": 0.37719082832336426, "learning_rate": 0.00010554638659664917, "loss": 0.5347, "step": 9448 }, { "epoch": 0.7006005783346927, "grad_norm": 0.353135347366333, "learning_rate": 0.00010553638409602402, "loss": 0.4714, "step": 9449 }, { "epoch": 0.7006747238081115, "grad_norm": 0.386740505695343, "learning_rate": 0.00010552638159539885, "loss": 0.5533, "step": 9450 }, { "epoch": 0.7007488692815304, "grad_norm": 0.3481692671775818, "learning_rate": 0.0001055163790947737, "loss": 0.4755, "step": 9451 }, { "epoch": 0.7008230147549492, "grad_norm": 0.3735068440437317, "learning_rate": 0.00010550637659414855, "loss": 0.5026, "step": 9452 }, { "epoch": 0.700897160228368, "grad_norm": 0.3509000241756439, "learning_rate": 0.00010549637409352338, "loss": 0.5004, "step": 9453 }, { "epoch": 0.7009713057017869, "grad_norm": 0.3616466522216797, "learning_rate": 0.00010548637159289823, "loss": 0.5177, "step": 9454 }, { "epoch": 0.7010454511752058, "grad_norm": 0.3589468002319336, "learning_rate": 0.00010547636909227308, "loss": 0.4946, "step": 9455 }, { "epoch": 0.7011195966486246, "grad_norm": 0.398625910282135, "learning_rate": 0.00010546636659164793, "loss": 0.5586, "step": 9456 }, { "epoch": 0.7011937421220434, "grad_norm": 0.32767751812934875, "learning_rate": 0.00010545636409102275, "loss": 0.4755, "step": 9457 }, { "epoch": 0.7012678875954623, "grad_norm": 0.3728162944316864, "learning_rate": 0.0001054463615903976, "loss": 0.5233, "step": 9458 }, { "epoch": 0.7013420330688811, "grad_norm": 0.37181609869003296, "learning_rate": 0.00010543635908977246, "loss": 0.5174, "step": 9459 }, { "epoch": 0.7014161785423, "grad_norm": 0.3371308743953705, "learning_rate": 0.0001054263565891473, "loss": 0.467, "step": 9460 }, { "epoch": 0.7014903240157189, "grad_norm": 0.3597448468208313, "learning_rate": 0.00010541635408852213, "loss": 0.4974, "step": 9461 }, { "epoch": 0.7015644694891376, "grad_norm": 0.3432777523994446, "learning_rate": 0.00010540635158789698, "loss": 0.4772, "step": 9462 }, { "epoch": 0.7016386149625565, "grad_norm": 0.3455277383327484, "learning_rate": 0.00010539634908727183, "loss": 0.4802, "step": 9463 }, { "epoch": 0.7017127604359754, "grad_norm": 0.3376588523387909, "learning_rate": 0.00010538634658664665, "loss": 0.4738, "step": 9464 }, { "epoch": 0.7017869059093942, "grad_norm": 0.35473886132240295, "learning_rate": 0.0001053763440860215, "loss": 0.5204, "step": 9465 }, { "epoch": 0.7018610513828131, "grad_norm": 0.3582288324832916, "learning_rate": 0.00010536634158539636, "loss": 0.4901, "step": 9466 }, { "epoch": 0.701935196856232, "grad_norm": 0.34654539823532104, "learning_rate": 0.0001053563390847712, "loss": 0.4859, "step": 9467 }, { "epoch": 0.7020093423296507, "grad_norm": 0.36037325859069824, "learning_rate": 0.00010534633658414603, "loss": 0.4935, "step": 9468 }, { "epoch": 0.7020834878030696, "grad_norm": 0.3552662134170532, "learning_rate": 0.00010533633408352088, "loss": 0.4865, "step": 9469 }, { "epoch": 0.7021576332764885, "grad_norm": 0.36461493372917175, "learning_rate": 0.00010532633158289573, "loss": 0.4925, "step": 9470 }, { "epoch": 0.7022317787499073, "grad_norm": 0.35555747151374817, "learning_rate": 0.00010531632908227058, "loss": 0.5114, "step": 9471 }, { "epoch": 0.7023059242233262, "grad_norm": 0.3531716763973236, "learning_rate": 0.0001053063265816454, "loss": 0.5026, "step": 9472 }, { "epoch": 0.702380069696745, "grad_norm": 0.35571807622909546, "learning_rate": 0.00010529632408102026, "loss": 0.4653, "step": 9473 }, { "epoch": 0.7024542151701638, "grad_norm": 0.3752630949020386, "learning_rate": 0.0001052863215803951, "loss": 0.5173, "step": 9474 }, { "epoch": 0.7025283606435827, "grad_norm": 0.34921032190322876, "learning_rate": 0.00010527631907976996, "loss": 0.4965, "step": 9475 }, { "epoch": 0.7026025061170016, "grad_norm": 0.3619142472743988, "learning_rate": 0.00010526631657914478, "loss": 0.504, "step": 9476 }, { "epoch": 0.7026766515904204, "grad_norm": 0.36621785163879395, "learning_rate": 0.00010525631407851963, "loss": 0.5044, "step": 9477 }, { "epoch": 0.7027507970638392, "grad_norm": 0.3801593780517578, "learning_rate": 0.00010524631157789448, "loss": 0.4919, "step": 9478 }, { "epoch": 0.7028249425372581, "grad_norm": 0.35248634219169617, "learning_rate": 0.00010523630907726932, "loss": 0.4771, "step": 9479 }, { "epoch": 0.7028990880106769, "grad_norm": 0.38880059123039246, "learning_rate": 0.00010522630657664416, "loss": 0.5303, "step": 9480 }, { "epoch": 0.7029732334840958, "grad_norm": 0.36234569549560547, "learning_rate": 0.000105216304076019, "loss": 0.515, "step": 9481 }, { "epoch": 0.7030473789575147, "grad_norm": 0.3523939847946167, "learning_rate": 0.00010520630157539386, "loss": 0.5002, "step": 9482 }, { "epoch": 0.7031215244309335, "grad_norm": 0.3548489809036255, "learning_rate": 0.0001051962990747687, "loss": 0.5107, "step": 9483 }, { "epoch": 0.7031956699043523, "grad_norm": 0.38299208879470825, "learning_rate": 0.00010518629657414354, "loss": 0.5328, "step": 9484 }, { "epoch": 0.7032698153777712, "grad_norm": 0.3357279896736145, "learning_rate": 0.00010517629407351838, "loss": 0.4955, "step": 9485 }, { "epoch": 0.70334396085119, "grad_norm": 0.3586626946926117, "learning_rate": 0.00010516629157289323, "loss": 0.5043, "step": 9486 }, { "epoch": 0.7034181063246089, "grad_norm": 0.3451513648033142, "learning_rate": 0.00010515628907226807, "loss": 0.4862, "step": 9487 }, { "epoch": 0.7034922517980278, "grad_norm": 0.3744458854198456, "learning_rate": 0.00010514628657164292, "loss": 0.5044, "step": 9488 }, { "epoch": 0.7035663972714465, "grad_norm": 0.36453792452812195, "learning_rate": 0.00010513628407101777, "loss": 0.4946, "step": 9489 }, { "epoch": 0.7036405427448654, "grad_norm": 0.3529966473579407, "learning_rate": 0.0001051262815703926, "loss": 0.5367, "step": 9490 }, { "epoch": 0.7037146882182843, "grad_norm": 0.34609466791152954, "learning_rate": 0.00010511627906976745, "loss": 0.464, "step": 9491 }, { "epoch": 0.7037888336917031, "grad_norm": 0.3537514805793762, "learning_rate": 0.0001051062765691423, "loss": 0.4925, "step": 9492 }, { "epoch": 0.703862979165122, "grad_norm": 0.3500954508781433, "learning_rate": 0.00010509627406851715, "loss": 0.5051, "step": 9493 }, { "epoch": 0.7039371246385409, "grad_norm": 0.36406001448631287, "learning_rate": 0.00010508627156789197, "loss": 0.5142, "step": 9494 }, { "epoch": 0.7040112701119596, "grad_norm": 0.3691297769546509, "learning_rate": 0.00010507626906726682, "loss": 0.5379, "step": 9495 }, { "epoch": 0.7040854155853785, "grad_norm": 0.3764229118824005, "learning_rate": 0.00010506626656664167, "loss": 0.5027, "step": 9496 }, { "epoch": 0.7041595610587974, "grad_norm": 0.3394198715686798, "learning_rate": 0.00010505626406601652, "loss": 0.4656, "step": 9497 }, { "epoch": 0.7042337065322162, "grad_norm": 0.33920103311538696, "learning_rate": 0.00010504626156539135, "loss": 0.4581, "step": 9498 }, { "epoch": 0.7043078520056351, "grad_norm": 0.3622130751609802, "learning_rate": 0.0001050362590647662, "loss": 0.4986, "step": 9499 }, { "epoch": 0.704381997479054, "grad_norm": 0.36156168580055237, "learning_rate": 0.00010502625656414105, "loss": 0.4976, "step": 9500 }, { "epoch": 0.7044561429524727, "grad_norm": 0.3538612127304077, "learning_rate": 0.00010501625406351587, "loss": 0.4745, "step": 9501 }, { "epoch": 0.7045302884258916, "grad_norm": 0.3848985433578491, "learning_rate": 0.00010500625156289072, "loss": 0.5628, "step": 9502 }, { "epoch": 0.7046044338993105, "grad_norm": 0.3676832616329193, "learning_rate": 0.00010499624906226557, "loss": 0.5224, "step": 9503 }, { "epoch": 0.7046785793727293, "grad_norm": 0.35596346855163574, "learning_rate": 0.00010498624656164042, "loss": 0.5088, "step": 9504 }, { "epoch": 0.7047527248461481, "grad_norm": 0.3597610592842102, "learning_rate": 0.00010497624406101525, "loss": 0.5176, "step": 9505 }, { "epoch": 0.704826870319567, "grad_norm": 0.378897488117218, "learning_rate": 0.0001049662415603901, "loss": 0.4864, "step": 9506 }, { "epoch": 0.7049010157929858, "grad_norm": 0.3351847529411316, "learning_rate": 0.00010495623905976495, "loss": 0.4693, "step": 9507 }, { "epoch": 0.7049751612664047, "grad_norm": 0.3659401535987854, "learning_rate": 0.0001049462365591398, "loss": 0.497, "step": 9508 }, { "epoch": 0.7050493067398236, "grad_norm": 0.36956021189689636, "learning_rate": 0.00010493623405851462, "loss": 0.5042, "step": 9509 }, { "epoch": 0.7051234522132424, "grad_norm": 0.35233184695243835, "learning_rate": 0.00010492623155788947, "loss": 0.4884, "step": 9510 }, { "epoch": 0.7051975976866612, "grad_norm": 0.3498879373073578, "learning_rate": 0.00010491622905726432, "loss": 0.4765, "step": 9511 }, { "epoch": 0.7052717431600801, "grad_norm": 0.3636883795261383, "learning_rate": 0.00010490622655663917, "loss": 0.4857, "step": 9512 }, { "epoch": 0.7053458886334989, "grad_norm": 0.3413707911968231, "learning_rate": 0.000104896224056014, "loss": 0.4795, "step": 9513 }, { "epoch": 0.7054200341069178, "grad_norm": 0.3398986756801605, "learning_rate": 0.00010488622155538885, "loss": 0.4555, "step": 9514 }, { "epoch": 0.7054941795803367, "grad_norm": 0.33862388134002686, "learning_rate": 0.0001048762190547637, "loss": 0.487, "step": 9515 }, { "epoch": 0.7055683250537554, "grad_norm": 0.3644692599773407, "learning_rate": 0.00010486621655413854, "loss": 0.4968, "step": 9516 }, { "epoch": 0.7056424705271743, "grad_norm": 0.3572000563144684, "learning_rate": 0.00010485621405351339, "loss": 0.5238, "step": 9517 }, { "epoch": 0.7057166160005932, "grad_norm": 0.3392467200756073, "learning_rate": 0.00010484621155288822, "loss": 0.4529, "step": 9518 }, { "epoch": 0.705790761474012, "grad_norm": 0.3716793656349182, "learning_rate": 0.00010483620905226307, "loss": 0.5487, "step": 9519 }, { "epoch": 0.7058649069474309, "grad_norm": 0.38817521929740906, "learning_rate": 0.00010482620655163791, "loss": 0.5214, "step": 9520 }, { "epoch": 0.7059390524208498, "grad_norm": 0.3832046687602997, "learning_rate": 0.00010481620405101276, "loss": 0.5089, "step": 9521 }, { "epoch": 0.7060131978942685, "grad_norm": 0.3759783208370209, "learning_rate": 0.00010480620155038761, "loss": 0.4987, "step": 9522 }, { "epoch": 0.7060873433676874, "grad_norm": 0.387521892786026, "learning_rate": 0.00010479619904976245, "loss": 0.5063, "step": 9523 }, { "epoch": 0.7061614888411063, "grad_norm": 0.3633085787296295, "learning_rate": 0.00010478619654913729, "loss": 0.5039, "step": 9524 }, { "epoch": 0.7062356343145251, "grad_norm": 0.35268789529800415, "learning_rate": 0.00010477619404851214, "loss": 0.492, "step": 9525 }, { "epoch": 0.706309779787944, "grad_norm": 0.38124823570251465, "learning_rate": 0.00010476619154788699, "loss": 0.5147, "step": 9526 }, { "epoch": 0.7063839252613628, "grad_norm": 0.3554544746875763, "learning_rate": 0.00010475618904726181, "loss": 0.5007, "step": 9527 }, { "epoch": 0.7064580707347816, "grad_norm": 0.3703176975250244, "learning_rate": 0.00010474618654663666, "loss": 0.507, "step": 9528 }, { "epoch": 0.7065322162082005, "grad_norm": 0.3705887496471405, "learning_rate": 0.00010473618404601151, "loss": 0.497, "step": 9529 }, { "epoch": 0.7066063616816194, "grad_norm": 0.37605831027030945, "learning_rate": 0.00010472618154538636, "loss": 0.5587, "step": 9530 }, { "epoch": 0.7066805071550382, "grad_norm": 0.3610551357269287, "learning_rate": 0.00010471617904476119, "loss": 0.4785, "step": 9531 }, { "epoch": 0.706754652628457, "grad_norm": 0.34655383229255676, "learning_rate": 0.00010470617654413604, "loss": 0.4917, "step": 9532 }, { "epoch": 0.7068287981018759, "grad_norm": 0.34209415316581726, "learning_rate": 0.00010469617404351089, "loss": 0.4711, "step": 9533 }, { "epoch": 0.7069029435752947, "grad_norm": 0.3703288435935974, "learning_rate": 0.00010468617154288574, "loss": 0.496, "step": 9534 }, { "epoch": 0.7069770890487136, "grad_norm": 0.34101036190986633, "learning_rate": 0.00010467616904226056, "loss": 0.4766, "step": 9535 }, { "epoch": 0.7070512345221325, "grad_norm": 0.36834651231765747, "learning_rate": 0.00010466616654163541, "loss": 0.5255, "step": 9536 }, { "epoch": 0.7071253799955513, "grad_norm": 0.3866344392299652, "learning_rate": 0.00010465616404101026, "loss": 0.5226, "step": 9537 }, { "epoch": 0.7071995254689701, "grad_norm": 0.37273192405700684, "learning_rate": 0.00010464616154038509, "loss": 0.5126, "step": 9538 }, { "epoch": 0.707273670942389, "grad_norm": 0.35748520493507385, "learning_rate": 0.00010463615903975994, "loss": 0.5103, "step": 9539 }, { "epoch": 0.7073478164158078, "grad_norm": 0.37956154346466064, "learning_rate": 0.00010462615653913479, "loss": 0.4841, "step": 9540 }, { "epoch": 0.7074219618892267, "grad_norm": 0.3530421555042267, "learning_rate": 0.00010461615403850964, "loss": 0.4957, "step": 9541 }, { "epoch": 0.7074961073626456, "grad_norm": 0.3633171319961548, "learning_rate": 0.00010460615153788446, "loss": 0.4865, "step": 9542 }, { "epoch": 0.7075702528360643, "grad_norm": 0.3731708526611328, "learning_rate": 0.00010459614903725931, "loss": 0.537, "step": 9543 }, { "epoch": 0.7076443983094832, "grad_norm": 0.3636835515499115, "learning_rate": 0.00010458614653663416, "loss": 0.476, "step": 9544 }, { "epoch": 0.707718543782902, "grad_norm": 0.3761727809906006, "learning_rate": 0.00010457614403600901, "loss": 0.5078, "step": 9545 }, { "epoch": 0.7077926892563209, "grad_norm": 0.3640875220298767, "learning_rate": 0.00010456614153538384, "loss": 0.4875, "step": 9546 }, { "epoch": 0.7078668347297398, "grad_norm": 0.35236215591430664, "learning_rate": 0.00010455613903475869, "loss": 0.5123, "step": 9547 }, { "epoch": 0.7079409802031585, "grad_norm": 0.35401320457458496, "learning_rate": 0.00010454613653413354, "loss": 0.4827, "step": 9548 }, { "epoch": 0.7080151256765774, "grad_norm": 0.365820050239563, "learning_rate": 0.00010453613403350839, "loss": 0.499, "step": 9549 }, { "epoch": 0.7080892711499963, "grad_norm": 0.3773272633552551, "learning_rate": 0.00010452613153288323, "loss": 0.501, "step": 9550 }, { "epoch": 0.7081634166234151, "grad_norm": 0.3385913074016571, "learning_rate": 0.00010451612903225806, "loss": 0.4807, "step": 9551 }, { "epoch": 0.708237562096834, "grad_norm": 0.35215967893600464, "learning_rate": 0.00010450612653163291, "loss": 0.4671, "step": 9552 }, { "epoch": 0.7083117075702529, "grad_norm": 0.36291372776031494, "learning_rate": 0.00010449612403100775, "loss": 0.4743, "step": 9553 }, { "epoch": 0.7083858530436716, "grad_norm": 0.37077248096466064, "learning_rate": 0.0001044861215303826, "loss": 0.5102, "step": 9554 }, { "epoch": 0.7084599985170905, "grad_norm": 0.3390711843967438, "learning_rate": 0.00010447611902975745, "loss": 0.4771, "step": 9555 }, { "epoch": 0.7085341439905094, "grad_norm": 0.36713218688964844, "learning_rate": 0.00010446611652913229, "loss": 0.5061, "step": 9556 }, { "epoch": 0.7086082894639282, "grad_norm": 0.3562251329421997, "learning_rate": 0.00010445611402850713, "loss": 0.4827, "step": 9557 }, { "epoch": 0.7086824349373471, "grad_norm": 0.3670627176761627, "learning_rate": 0.00010444611152788198, "loss": 0.5316, "step": 9558 }, { "epoch": 0.708756580410766, "grad_norm": 0.35854288935661316, "learning_rate": 0.00010443610902725683, "loss": 0.4539, "step": 9559 }, { "epoch": 0.7088307258841847, "grad_norm": 0.37181639671325684, "learning_rate": 0.00010442610652663168, "loss": 0.5092, "step": 9560 }, { "epoch": 0.7089048713576036, "grad_norm": 0.36624595522880554, "learning_rate": 0.0001044161040260065, "loss": 0.5493, "step": 9561 }, { "epoch": 0.7089790168310225, "grad_norm": 0.3470240533351898, "learning_rate": 0.00010440610152538135, "loss": 0.4588, "step": 9562 }, { "epoch": 0.7090531623044413, "grad_norm": 0.3600395917892456, "learning_rate": 0.0001043960990247562, "loss": 0.4936, "step": 9563 }, { "epoch": 0.7091273077778601, "grad_norm": 0.33206337690353394, "learning_rate": 0.00010438609652413103, "loss": 0.4642, "step": 9564 }, { "epoch": 0.709201453251279, "grad_norm": 0.3603493571281433, "learning_rate": 0.00010437609402350588, "loss": 0.4865, "step": 9565 }, { "epoch": 0.7092755987246978, "grad_norm": 0.3621978461742401, "learning_rate": 0.00010436609152288073, "loss": 0.539, "step": 9566 }, { "epoch": 0.7093497441981167, "grad_norm": 0.34289202094078064, "learning_rate": 0.00010435608902225558, "loss": 0.4432, "step": 9567 }, { "epoch": 0.7094238896715356, "grad_norm": 0.3698923885822296, "learning_rate": 0.0001043460865216304, "loss": 0.5549, "step": 9568 }, { "epoch": 0.7094980351449544, "grad_norm": 0.37672653794288635, "learning_rate": 0.00010433608402100525, "loss": 0.5424, "step": 9569 }, { "epoch": 0.7095721806183732, "grad_norm": 0.3313029706478119, "learning_rate": 0.0001043260815203801, "loss": 0.4766, "step": 9570 }, { "epoch": 0.7096463260917921, "grad_norm": 0.3572425842285156, "learning_rate": 0.00010431607901975495, "loss": 0.5025, "step": 9571 }, { "epoch": 0.7097204715652109, "grad_norm": 0.3868156969547272, "learning_rate": 0.00010430607651912978, "loss": 0.541, "step": 9572 }, { "epoch": 0.7097946170386298, "grad_norm": 0.35863763093948364, "learning_rate": 0.00010429607401850463, "loss": 0.5083, "step": 9573 }, { "epoch": 0.7098687625120487, "grad_norm": 0.35964521765708923, "learning_rate": 0.00010428607151787948, "loss": 0.5217, "step": 9574 }, { "epoch": 0.7099429079854674, "grad_norm": 0.3631681501865387, "learning_rate": 0.0001042760690172543, "loss": 0.5405, "step": 9575 }, { "epoch": 0.7100170534588863, "grad_norm": 0.34756195545196533, "learning_rate": 0.00010426606651662915, "loss": 0.4681, "step": 9576 }, { "epoch": 0.7100911989323052, "grad_norm": 0.308993399143219, "learning_rate": 0.000104256064016004, "loss": 0.4456, "step": 9577 }, { "epoch": 0.710165344405724, "grad_norm": 0.3660984933376312, "learning_rate": 0.00010424606151537885, "loss": 0.5092, "step": 9578 }, { "epoch": 0.7102394898791429, "grad_norm": 0.34496834874153137, "learning_rate": 0.00010423605901475368, "loss": 0.4814, "step": 9579 }, { "epoch": 0.7103136353525618, "grad_norm": 0.35976994037628174, "learning_rate": 0.00010422605651412853, "loss": 0.522, "step": 9580 }, { "epoch": 0.7103877808259805, "grad_norm": 0.34185874462127686, "learning_rate": 0.00010421605401350338, "loss": 0.4903, "step": 9581 }, { "epoch": 0.7104619262993994, "grad_norm": 0.3620394468307495, "learning_rate": 0.00010420605151287823, "loss": 0.4956, "step": 9582 }, { "epoch": 0.7105360717728183, "grad_norm": 0.38979989290237427, "learning_rate": 0.00010419604901225307, "loss": 0.5678, "step": 9583 }, { "epoch": 0.7106102172462371, "grad_norm": 0.35134854912757874, "learning_rate": 0.0001041860465116279, "loss": 0.4916, "step": 9584 }, { "epoch": 0.710684362719656, "grad_norm": 0.3617027997970581, "learning_rate": 0.00010417604401100276, "loss": 0.4894, "step": 9585 }, { "epoch": 0.7107585081930748, "grad_norm": 0.3567997217178345, "learning_rate": 0.0001041660415103776, "loss": 0.4977, "step": 9586 }, { "epoch": 0.7108326536664936, "grad_norm": 0.3645617365837097, "learning_rate": 0.00010415603900975244, "loss": 0.5092, "step": 9587 }, { "epoch": 0.7109067991399125, "grad_norm": 0.327719122171402, "learning_rate": 0.0001041460365091273, "loss": 0.4885, "step": 9588 }, { "epoch": 0.7109809446133314, "grad_norm": 0.34321489930152893, "learning_rate": 0.00010413603400850213, "loss": 0.4843, "step": 9589 }, { "epoch": 0.7110550900867502, "grad_norm": 0.3504442870616913, "learning_rate": 0.00010412603150787697, "loss": 0.493, "step": 9590 }, { "epoch": 0.711129235560169, "grad_norm": 0.3571521043777466, "learning_rate": 0.00010411602900725182, "loss": 0.5456, "step": 9591 }, { "epoch": 0.7112033810335879, "grad_norm": 0.3647155463695526, "learning_rate": 0.00010410602650662667, "loss": 0.5352, "step": 9592 }, { "epoch": 0.7112775265070067, "grad_norm": 0.3585415482521057, "learning_rate": 0.00010409602400600152, "loss": 0.5435, "step": 9593 }, { "epoch": 0.7113516719804256, "grad_norm": 0.35883474349975586, "learning_rate": 0.00010408602150537634, "loss": 0.5074, "step": 9594 }, { "epoch": 0.7114258174538445, "grad_norm": 0.34288889169692993, "learning_rate": 0.0001040760190047512, "loss": 0.475, "step": 9595 }, { "epoch": 0.7114999629272633, "grad_norm": 0.3596421480178833, "learning_rate": 0.00010406601650412604, "loss": 0.5404, "step": 9596 }, { "epoch": 0.7115741084006821, "grad_norm": 0.3657815754413605, "learning_rate": 0.0001040560140035009, "loss": 0.516, "step": 9597 }, { "epoch": 0.711648253874101, "grad_norm": 0.3625108003616333, "learning_rate": 0.00010404601150287572, "loss": 0.52, "step": 9598 }, { "epoch": 0.7117223993475198, "grad_norm": 0.36516937613487244, "learning_rate": 0.00010403600900225057, "loss": 0.4744, "step": 9599 }, { "epoch": 0.7117965448209387, "grad_norm": 0.37508875131607056, "learning_rate": 0.00010402600650162542, "loss": 0.511, "step": 9600 }, { "epoch": 0.7118706902943576, "grad_norm": 0.38991203904151917, "learning_rate": 0.00010401600400100024, "loss": 0.588, "step": 9601 }, { "epoch": 0.7119448357677763, "grad_norm": 0.3963712751865387, "learning_rate": 0.0001040060015003751, "loss": 0.5474, "step": 9602 }, { "epoch": 0.7120189812411952, "grad_norm": 0.32601797580718994, "learning_rate": 0.00010399599899974994, "loss": 0.4698, "step": 9603 }, { "epoch": 0.7120931267146141, "grad_norm": 0.37227410078048706, "learning_rate": 0.0001039859964991248, "loss": 0.5439, "step": 9604 }, { "epoch": 0.7121672721880329, "grad_norm": 0.3479924201965332, "learning_rate": 0.00010397599399849962, "loss": 0.461, "step": 9605 }, { "epoch": 0.7122414176614518, "grad_norm": 0.36539986729621887, "learning_rate": 0.00010396599149787447, "loss": 0.4957, "step": 9606 }, { "epoch": 0.7123155631348707, "grad_norm": 0.3833867609500885, "learning_rate": 0.00010395598899724932, "loss": 0.5416, "step": 9607 }, { "epoch": 0.7123897086082894, "grad_norm": 0.38427847623825073, "learning_rate": 0.00010394598649662417, "loss": 0.5521, "step": 9608 }, { "epoch": 0.7124638540817083, "grad_norm": 0.3426275849342346, "learning_rate": 0.000103935983995999, "loss": 0.4809, "step": 9609 }, { "epoch": 0.7125379995551272, "grad_norm": 0.3383016586303711, "learning_rate": 0.00010392598149537384, "loss": 0.4631, "step": 9610 }, { "epoch": 0.712612145028546, "grad_norm": 0.347278892993927, "learning_rate": 0.0001039159789947487, "loss": 0.4762, "step": 9611 }, { "epoch": 0.7126862905019649, "grad_norm": 0.36901089549064636, "learning_rate": 0.00010390597649412355, "loss": 0.4662, "step": 9612 }, { "epoch": 0.7127604359753837, "grad_norm": 0.3347110152244568, "learning_rate": 0.00010389597399349837, "loss": 0.4965, "step": 9613 }, { "epoch": 0.7128345814488025, "grad_norm": 0.36193594336509705, "learning_rate": 0.00010388597149287322, "loss": 0.5222, "step": 9614 }, { "epoch": 0.7129087269222214, "grad_norm": 0.3751789629459381, "learning_rate": 0.00010387596899224807, "loss": 0.5304, "step": 9615 }, { "epoch": 0.7129828723956403, "grad_norm": 0.35513070225715637, "learning_rate": 0.0001038659664916229, "loss": 0.5158, "step": 9616 }, { "epoch": 0.7130570178690591, "grad_norm": 0.3745092451572418, "learning_rate": 0.00010385596399099775, "loss": 0.5226, "step": 9617 }, { "epoch": 0.713131163342478, "grad_norm": 0.36447837948799133, "learning_rate": 0.0001038459614903726, "loss": 0.5239, "step": 9618 }, { "epoch": 0.7132053088158968, "grad_norm": 0.3631798326969147, "learning_rate": 0.00010383595898974745, "loss": 0.5225, "step": 9619 }, { "epoch": 0.7132794542893156, "grad_norm": 0.36233752965927124, "learning_rate": 0.00010382595648912228, "loss": 0.5224, "step": 9620 }, { "epoch": 0.7133535997627345, "grad_norm": 0.3432908356189728, "learning_rate": 0.00010381595398849713, "loss": 0.4977, "step": 9621 }, { "epoch": 0.7134277452361534, "grad_norm": 0.38221192359924316, "learning_rate": 0.00010380595148787197, "loss": 0.543, "step": 9622 }, { "epoch": 0.7135018907095722, "grad_norm": 0.3696666955947876, "learning_rate": 0.00010379594898724682, "loss": 0.4956, "step": 9623 }, { "epoch": 0.713576036182991, "grad_norm": 0.34568020701408386, "learning_rate": 0.00010378594648662166, "loss": 0.4813, "step": 9624 }, { "epoch": 0.7136501816564099, "grad_norm": 0.34729182720184326, "learning_rate": 0.00010377594398599651, "loss": 0.4822, "step": 9625 }, { "epoch": 0.7137243271298287, "grad_norm": 0.35582712292671204, "learning_rate": 0.00010376594148537136, "loss": 0.5156, "step": 9626 }, { "epoch": 0.7137984726032476, "grad_norm": 0.3808380663394928, "learning_rate": 0.00010375593898474618, "loss": 0.5296, "step": 9627 }, { "epoch": 0.7138726180766665, "grad_norm": 0.3623589277267456, "learning_rate": 0.00010374593648412103, "loss": 0.4806, "step": 9628 }, { "epoch": 0.7139467635500852, "grad_norm": 0.36742711067199707, "learning_rate": 0.00010373593398349589, "loss": 0.5019, "step": 9629 }, { "epoch": 0.7140209090235041, "grad_norm": 0.371166467666626, "learning_rate": 0.00010372593148287074, "loss": 0.5423, "step": 9630 }, { "epoch": 0.714095054496923, "grad_norm": 0.37295472621917725, "learning_rate": 0.00010371592898224556, "loss": 0.5165, "step": 9631 }, { "epoch": 0.7141691999703418, "grad_norm": 0.3600933849811554, "learning_rate": 0.00010370592648162041, "loss": 0.5002, "step": 9632 }, { "epoch": 0.7142433454437607, "grad_norm": 0.34575310349464417, "learning_rate": 0.00010369592398099526, "loss": 0.4922, "step": 9633 }, { "epoch": 0.7143174909171796, "grad_norm": 0.3617161810398102, "learning_rate": 0.00010368592148037011, "loss": 0.4931, "step": 9634 }, { "epoch": 0.7143916363905983, "grad_norm": 0.34830841422080994, "learning_rate": 0.00010367591897974493, "loss": 0.5123, "step": 9635 }, { "epoch": 0.7144657818640172, "grad_norm": 0.35378217697143555, "learning_rate": 0.00010366591647911979, "loss": 0.5285, "step": 9636 }, { "epoch": 0.7145399273374361, "grad_norm": 0.3444756269454956, "learning_rate": 0.00010365591397849464, "loss": 0.4788, "step": 9637 }, { "epoch": 0.7146140728108549, "grad_norm": 0.36407092213630676, "learning_rate": 0.00010364591147786946, "loss": 0.5224, "step": 9638 }, { "epoch": 0.7146882182842738, "grad_norm": 0.36927804350852966, "learning_rate": 0.00010363590897724431, "loss": 0.5637, "step": 9639 }, { "epoch": 0.7147623637576926, "grad_norm": 0.3443388342857361, "learning_rate": 0.00010362590647661916, "loss": 0.4982, "step": 9640 }, { "epoch": 0.7148365092311114, "grad_norm": 0.380292147397995, "learning_rate": 0.00010361590397599401, "loss": 0.5531, "step": 9641 }, { "epoch": 0.7149106547045303, "grad_norm": 0.36183592677116394, "learning_rate": 0.00010360590147536884, "loss": 0.5035, "step": 9642 }, { "epoch": 0.7149848001779492, "grad_norm": 0.3697739243507385, "learning_rate": 0.00010359589897474369, "loss": 0.5108, "step": 9643 }, { "epoch": 0.715058945651368, "grad_norm": 0.3593501150608063, "learning_rate": 0.00010358589647411854, "loss": 0.4881, "step": 9644 }, { "epoch": 0.7151330911247868, "grad_norm": 0.3596026301383972, "learning_rate": 0.00010357589397349339, "loss": 0.4864, "step": 9645 }, { "epoch": 0.7152072365982057, "grad_norm": 0.36497822403907776, "learning_rate": 0.00010356589147286821, "loss": 0.4916, "step": 9646 }, { "epoch": 0.7152813820716245, "grad_norm": 0.3719693422317505, "learning_rate": 0.00010355588897224306, "loss": 0.5482, "step": 9647 }, { "epoch": 0.7153555275450434, "grad_norm": 0.33686670660972595, "learning_rate": 0.00010354588647161791, "loss": 0.4705, "step": 9648 }, { "epoch": 0.7154296730184623, "grad_norm": 0.36593636870384216, "learning_rate": 0.00010353588397099276, "loss": 0.5183, "step": 9649 }, { "epoch": 0.715503818491881, "grad_norm": 0.34755802154541016, "learning_rate": 0.00010352588147036759, "loss": 0.4881, "step": 9650 }, { "epoch": 0.7155779639652999, "grad_norm": 0.356306791305542, "learning_rate": 0.00010351587896974244, "loss": 0.4873, "step": 9651 }, { "epoch": 0.7156521094387188, "grad_norm": 0.3427097797393799, "learning_rate": 0.00010350587646911729, "loss": 0.5042, "step": 9652 }, { "epoch": 0.7157262549121376, "grad_norm": 0.3369997441768646, "learning_rate": 0.00010349587396849212, "loss": 0.4808, "step": 9653 }, { "epoch": 0.7158004003855565, "grad_norm": 0.3482336103916168, "learning_rate": 0.00010348587146786696, "loss": 0.4896, "step": 9654 }, { "epoch": 0.7158745458589754, "grad_norm": 0.3327934741973877, "learning_rate": 0.00010347586896724181, "loss": 0.4592, "step": 9655 }, { "epoch": 0.7159486913323941, "grad_norm": 0.35156065225601196, "learning_rate": 0.00010346586646661666, "loss": 0.4591, "step": 9656 }, { "epoch": 0.716022836805813, "grad_norm": 0.3456558585166931, "learning_rate": 0.0001034558639659915, "loss": 0.4819, "step": 9657 }, { "epoch": 0.7160969822792319, "grad_norm": 0.36162349581718445, "learning_rate": 0.00010344586146536635, "loss": 0.4865, "step": 9658 }, { "epoch": 0.7161711277526507, "grad_norm": 0.3558482229709625, "learning_rate": 0.0001034358589647412, "loss": 0.4903, "step": 9659 }, { "epoch": 0.7162452732260696, "grad_norm": 0.34398096799850464, "learning_rate": 0.00010342585646411604, "loss": 0.4804, "step": 9660 }, { "epoch": 0.7163194186994883, "grad_norm": 0.3527904152870178, "learning_rate": 0.00010341585396349088, "loss": 0.4685, "step": 9661 }, { "epoch": 0.7163935641729072, "grad_norm": 0.3508869707584381, "learning_rate": 0.00010340585146286573, "loss": 0.4874, "step": 9662 }, { "epoch": 0.7164677096463261, "grad_norm": 0.38276880979537964, "learning_rate": 0.00010339584896224058, "loss": 0.5247, "step": 9663 }, { "epoch": 0.7165418551197449, "grad_norm": 0.3673514127731323, "learning_rate": 0.0001033858464616154, "loss": 0.5047, "step": 9664 }, { "epoch": 0.7166160005931638, "grad_norm": 0.3816140592098236, "learning_rate": 0.00010337584396099025, "loss": 0.522, "step": 9665 }, { "epoch": 0.7166901460665827, "grad_norm": 0.37269923090934753, "learning_rate": 0.0001033658414603651, "loss": 0.514, "step": 9666 }, { "epoch": 0.7167642915400014, "grad_norm": 0.35025402903556824, "learning_rate": 0.00010335583895973995, "loss": 0.497, "step": 9667 }, { "epoch": 0.7168384370134203, "grad_norm": 0.37447285652160645, "learning_rate": 0.00010334583645911478, "loss": 0.473, "step": 9668 }, { "epoch": 0.7169125824868392, "grad_norm": 0.36443886160850525, "learning_rate": 0.00010333583395848963, "loss": 0.497, "step": 9669 }, { "epoch": 0.716986727960258, "grad_norm": 0.3463485836982727, "learning_rate": 0.00010332583145786448, "loss": 0.4714, "step": 9670 }, { "epoch": 0.7170608734336769, "grad_norm": 0.38711193203926086, "learning_rate": 0.00010331582895723933, "loss": 0.5682, "step": 9671 }, { "epoch": 0.7171350189070957, "grad_norm": 0.3466128408908844, "learning_rate": 0.00010330582645661415, "loss": 0.4916, "step": 9672 }, { "epoch": 0.7172091643805145, "grad_norm": 0.3399793207645416, "learning_rate": 0.000103295823955989, "loss": 0.443, "step": 9673 }, { "epoch": 0.7172833098539334, "grad_norm": 0.3775970935821533, "learning_rate": 0.00010328582145536385, "loss": 0.5477, "step": 9674 }, { "epoch": 0.7173574553273523, "grad_norm": 0.3439714312553406, "learning_rate": 0.00010327581895473868, "loss": 0.4662, "step": 9675 }, { "epoch": 0.7174316008007711, "grad_norm": 0.37168174982070923, "learning_rate": 0.00010326581645411353, "loss": 0.4666, "step": 9676 }, { "epoch": 0.71750574627419, "grad_norm": 0.38713008165359497, "learning_rate": 0.00010325581395348838, "loss": 0.534, "step": 9677 }, { "epoch": 0.7175798917476088, "grad_norm": 0.3663727939128876, "learning_rate": 0.00010324581145286323, "loss": 0.5155, "step": 9678 }, { "epoch": 0.7176540372210276, "grad_norm": 0.3695509433746338, "learning_rate": 0.00010323580895223805, "loss": 0.4971, "step": 9679 }, { "epoch": 0.7177281826944465, "grad_norm": 0.36585721373558044, "learning_rate": 0.0001032258064516129, "loss": 0.5003, "step": 9680 }, { "epoch": 0.7178023281678654, "grad_norm": 0.33671316504478455, "learning_rate": 0.00010321580395098775, "loss": 0.483, "step": 9681 }, { "epoch": 0.7178764736412842, "grad_norm": 0.34142202138900757, "learning_rate": 0.0001032058014503626, "loss": 0.4517, "step": 9682 }, { "epoch": 0.717950619114703, "grad_norm": 0.3452979028224945, "learning_rate": 0.00010319579894973743, "loss": 0.4754, "step": 9683 }, { "epoch": 0.7180247645881219, "grad_norm": 0.3332175016403198, "learning_rate": 0.00010318579644911228, "loss": 0.4685, "step": 9684 }, { "epoch": 0.7180989100615407, "grad_norm": 0.32325077056884766, "learning_rate": 0.00010317579394848713, "loss": 0.4449, "step": 9685 }, { "epoch": 0.7181730555349596, "grad_norm": 0.3666386604309082, "learning_rate": 0.00010316579144786198, "loss": 0.4926, "step": 9686 }, { "epoch": 0.7182472010083785, "grad_norm": 0.36045005917549133, "learning_rate": 0.0001031557889472368, "loss": 0.5311, "step": 9687 }, { "epoch": 0.7183213464817972, "grad_norm": 0.3759250044822693, "learning_rate": 0.00010314578644661165, "loss": 0.5698, "step": 9688 }, { "epoch": 0.7183954919552161, "grad_norm": 0.35966646671295166, "learning_rate": 0.0001031357839459865, "loss": 0.5201, "step": 9689 }, { "epoch": 0.718469637428635, "grad_norm": 0.36206576228141785, "learning_rate": 0.00010312578144536134, "loss": 0.4753, "step": 9690 }, { "epoch": 0.7185437829020538, "grad_norm": 0.34979739785194397, "learning_rate": 0.00010311577894473619, "loss": 0.4915, "step": 9691 }, { "epoch": 0.7186179283754727, "grad_norm": 0.3631936311721802, "learning_rate": 0.00010310577644411103, "loss": 0.5022, "step": 9692 }, { "epoch": 0.7186920738488916, "grad_norm": 0.3591117262840271, "learning_rate": 0.00010309577394348588, "loss": 0.5261, "step": 9693 }, { "epoch": 0.7187662193223103, "grad_norm": 0.34290823340415955, "learning_rate": 0.00010308577144286072, "loss": 0.4977, "step": 9694 }, { "epoch": 0.7188403647957292, "grad_norm": 0.3393253982067108, "learning_rate": 0.00010307576894223557, "loss": 0.4693, "step": 9695 }, { "epoch": 0.7189145102691481, "grad_norm": 0.3640480637550354, "learning_rate": 0.00010306576644161042, "loss": 0.5217, "step": 9696 }, { "epoch": 0.7189886557425669, "grad_norm": 0.37789517641067505, "learning_rate": 0.00010305576394098527, "loss": 0.511, "step": 9697 }, { "epoch": 0.7190628012159858, "grad_norm": 0.35852012038230896, "learning_rate": 0.00010304576144036009, "loss": 0.4945, "step": 9698 }, { "epoch": 0.7191369466894046, "grad_norm": 0.3423953950405121, "learning_rate": 0.00010303575893973494, "loss": 0.469, "step": 9699 }, { "epoch": 0.7192110921628234, "grad_norm": 0.3515600264072418, "learning_rate": 0.00010302575643910979, "loss": 0.4807, "step": 9700 }, { "epoch": 0.7192852376362423, "grad_norm": 0.3576926589012146, "learning_rate": 0.00010301575393848462, "loss": 0.5178, "step": 9701 }, { "epoch": 0.7193593831096612, "grad_norm": 0.362484872341156, "learning_rate": 0.00010300575143785947, "loss": 0.4988, "step": 9702 }, { "epoch": 0.71943352858308, "grad_norm": 0.34923291206359863, "learning_rate": 0.00010299574893723432, "loss": 0.4799, "step": 9703 }, { "epoch": 0.7195076740564988, "grad_norm": 0.4015672206878662, "learning_rate": 0.00010298574643660917, "loss": 0.5555, "step": 9704 }, { "epoch": 0.7195818195299177, "grad_norm": 0.3897232115268707, "learning_rate": 0.00010297574393598399, "loss": 0.5045, "step": 9705 }, { "epoch": 0.7196559650033365, "grad_norm": 0.35629600286483765, "learning_rate": 0.00010296574143535884, "loss": 0.5052, "step": 9706 }, { "epoch": 0.7197301104767554, "grad_norm": 0.37771931290626526, "learning_rate": 0.00010295573893473369, "loss": 0.5067, "step": 9707 }, { "epoch": 0.7198042559501743, "grad_norm": 0.36245766282081604, "learning_rate": 0.00010294573643410854, "loss": 0.4953, "step": 9708 }, { "epoch": 0.719878401423593, "grad_norm": 0.36222320795059204, "learning_rate": 0.00010293573393348337, "loss": 0.4789, "step": 9709 }, { "epoch": 0.7199525468970119, "grad_norm": 0.33499446511268616, "learning_rate": 0.00010292573143285822, "loss": 0.4619, "step": 9710 }, { "epoch": 0.7200266923704308, "grad_norm": 0.34255075454711914, "learning_rate": 0.00010291572893223307, "loss": 0.4764, "step": 9711 }, { "epoch": 0.7201008378438496, "grad_norm": 0.3383685052394867, "learning_rate": 0.00010290572643160789, "loss": 0.4957, "step": 9712 }, { "epoch": 0.7201749833172685, "grad_norm": 0.3368259072303772, "learning_rate": 0.00010289572393098274, "loss": 0.4853, "step": 9713 }, { "epoch": 0.7202491287906874, "grad_norm": 0.3752235770225525, "learning_rate": 0.0001028857214303576, "loss": 0.53, "step": 9714 }, { "epoch": 0.7203232742641061, "grad_norm": 0.3435654938220978, "learning_rate": 0.00010287571892973244, "loss": 0.477, "step": 9715 }, { "epoch": 0.720397419737525, "grad_norm": 0.37171196937561035, "learning_rate": 0.00010286571642910727, "loss": 0.4756, "step": 9716 }, { "epoch": 0.7204715652109439, "grad_norm": 0.35298293828964233, "learning_rate": 0.00010285571392848212, "loss": 0.4893, "step": 9717 }, { "epoch": 0.7205457106843627, "grad_norm": 0.35486507415771484, "learning_rate": 0.00010284571142785697, "loss": 0.5097, "step": 9718 }, { "epoch": 0.7206198561577816, "grad_norm": 0.3841607868671417, "learning_rate": 0.00010283570892723182, "loss": 0.5127, "step": 9719 }, { "epoch": 0.7206940016312005, "grad_norm": 0.35250288248062134, "learning_rate": 0.00010282570642660664, "loss": 0.5069, "step": 9720 }, { "epoch": 0.7207681471046192, "grad_norm": 0.3848665654659271, "learning_rate": 0.0001028157039259815, "loss": 0.5731, "step": 9721 }, { "epoch": 0.7208422925780381, "grad_norm": 0.33708128333091736, "learning_rate": 0.00010280570142535634, "loss": 0.4474, "step": 9722 }, { "epoch": 0.720916438051457, "grad_norm": 0.3600212037563324, "learning_rate": 0.0001027956989247312, "loss": 0.4742, "step": 9723 }, { "epoch": 0.7209905835248758, "grad_norm": 0.3786916136741638, "learning_rate": 0.00010278569642410603, "loss": 0.5412, "step": 9724 }, { "epoch": 0.7210647289982947, "grad_norm": 0.3466437757015228, "learning_rate": 0.00010277569392348087, "loss": 0.4992, "step": 9725 }, { "epoch": 0.7211388744717135, "grad_norm": 0.3392660617828369, "learning_rate": 0.00010276569142285572, "loss": 0.4675, "step": 9726 }, { "epoch": 0.7212130199451323, "grad_norm": 0.3709917366504669, "learning_rate": 0.00010275568892223056, "loss": 0.5329, "step": 9727 }, { "epoch": 0.7212871654185512, "grad_norm": 0.3510358929634094, "learning_rate": 0.00010274568642160541, "loss": 0.4801, "step": 9728 }, { "epoch": 0.7213613108919701, "grad_norm": 0.33418962359428406, "learning_rate": 0.00010273568392098026, "loss": 0.4563, "step": 9729 }, { "epoch": 0.7214354563653889, "grad_norm": 0.3419716954231262, "learning_rate": 0.0001027256814203551, "loss": 0.5112, "step": 9730 }, { "epoch": 0.7215096018388077, "grad_norm": 0.35731834173202515, "learning_rate": 0.00010271567891972993, "loss": 0.4995, "step": 9731 }, { "epoch": 0.7215837473122266, "grad_norm": 0.37698984146118164, "learning_rate": 0.00010270567641910478, "loss": 0.5149, "step": 9732 }, { "epoch": 0.7216578927856454, "grad_norm": 0.3656899034976959, "learning_rate": 0.00010269567391847963, "loss": 0.5152, "step": 9733 }, { "epoch": 0.7217320382590643, "grad_norm": 0.35909393429756165, "learning_rate": 0.00010268567141785448, "loss": 0.5151, "step": 9734 }, { "epoch": 0.7218061837324832, "grad_norm": 0.3755998909473419, "learning_rate": 0.00010267566891722931, "loss": 0.4873, "step": 9735 }, { "epoch": 0.721880329205902, "grad_norm": 0.3326190114021301, "learning_rate": 0.00010266566641660416, "loss": 0.4658, "step": 9736 }, { "epoch": 0.7219544746793208, "grad_norm": 0.3479914963245392, "learning_rate": 0.00010265566391597901, "loss": 0.4525, "step": 9737 }, { "epoch": 0.7220286201527397, "grad_norm": 0.34581801295280457, "learning_rate": 0.00010264566141535383, "loss": 0.4823, "step": 9738 }, { "epoch": 0.7221027656261585, "grad_norm": 0.3518017530441284, "learning_rate": 0.00010263565891472868, "loss": 0.466, "step": 9739 }, { "epoch": 0.7221769110995774, "grad_norm": 0.3483041226863861, "learning_rate": 0.00010262565641410353, "loss": 0.4861, "step": 9740 }, { "epoch": 0.7222510565729963, "grad_norm": 0.34209495782852173, "learning_rate": 0.00010261565391347838, "loss": 0.4814, "step": 9741 }, { "epoch": 0.722325202046415, "grad_norm": 0.34975168108940125, "learning_rate": 0.00010260565141285321, "loss": 0.4912, "step": 9742 }, { "epoch": 0.7223993475198339, "grad_norm": 0.35078340768814087, "learning_rate": 0.00010259564891222806, "loss": 0.525, "step": 9743 }, { "epoch": 0.7224734929932528, "grad_norm": 0.35062864422798157, "learning_rate": 0.00010258564641160291, "loss": 0.5046, "step": 9744 }, { "epoch": 0.7225476384666716, "grad_norm": 0.3441038727760315, "learning_rate": 0.00010257564391097776, "loss": 0.5067, "step": 9745 }, { "epoch": 0.7226217839400905, "grad_norm": 0.351455956697464, "learning_rate": 0.00010256564141035258, "loss": 0.4839, "step": 9746 }, { "epoch": 0.7226959294135094, "grad_norm": 0.3585837781429291, "learning_rate": 0.00010255563890972743, "loss": 0.4958, "step": 9747 }, { "epoch": 0.7227700748869281, "grad_norm": 0.33063367009162903, "learning_rate": 0.00010254563640910228, "loss": 0.4714, "step": 9748 }, { "epoch": 0.722844220360347, "grad_norm": 0.36661675572395325, "learning_rate": 0.00010253563390847711, "loss": 0.5159, "step": 9749 }, { "epoch": 0.7229183658337659, "grad_norm": 0.35828354954719543, "learning_rate": 0.00010252563140785196, "loss": 0.4768, "step": 9750 }, { "epoch": 0.7229925113071847, "grad_norm": 0.3666268289089203, "learning_rate": 0.00010251562890722681, "loss": 0.5131, "step": 9751 }, { "epoch": 0.7230666567806036, "grad_norm": 0.3679625689983368, "learning_rate": 0.00010250562640660166, "loss": 0.5243, "step": 9752 }, { "epoch": 0.7231408022540224, "grad_norm": 0.35886847972869873, "learning_rate": 0.00010249562390597648, "loss": 0.4878, "step": 9753 }, { "epoch": 0.7232149477274412, "grad_norm": 0.3519250452518463, "learning_rate": 0.00010248562140535133, "loss": 0.4996, "step": 9754 }, { "epoch": 0.7232890932008601, "grad_norm": 0.36040163040161133, "learning_rate": 0.00010247561890472619, "loss": 0.485, "step": 9755 }, { "epoch": 0.723363238674279, "grad_norm": 0.3558456301689148, "learning_rate": 0.00010246561640410104, "loss": 0.4935, "step": 9756 }, { "epoch": 0.7234373841476978, "grad_norm": 0.36817291378974915, "learning_rate": 0.00010245561390347587, "loss": 0.5564, "step": 9757 }, { "epoch": 0.7235115296211166, "grad_norm": 0.36339157819747925, "learning_rate": 0.00010244561140285071, "loss": 0.4683, "step": 9758 }, { "epoch": 0.7235856750945355, "grad_norm": 0.3454005718231201, "learning_rate": 0.00010243560890222556, "loss": 0.4738, "step": 9759 }, { "epoch": 0.7236598205679543, "grad_norm": 0.3500618636608124, "learning_rate": 0.00010242560640160041, "loss": 0.4736, "step": 9760 }, { "epoch": 0.7237339660413732, "grad_norm": 0.3485095798969269, "learning_rate": 0.00010241560390097525, "loss": 0.5082, "step": 9761 }, { "epoch": 0.7238081115147921, "grad_norm": 0.3483562469482422, "learning_rate": 0.0001024056014003501, "loss": 0.4982, "step": 9762 }, { "epoch": 0.7238822569882108, "grad_norm": 0.3801118731498718, "learning_rate": 0.00010239559889972494, "loss": 0.5648, "step": 9763 }, { "epoch": 0.7239564024616297, "grad_norm": 0.3795664310455322, "learning_rate": 0.00010238559639909977, "loss": 0.5322, "step": 9764 }, { "epoch": 0.7240305479350486, "grad_norm": 0.32788804173469543, "learning_rate": 0.00010237559389847462, "loss": 0.4625, "step": 9765 }, { "epoch": 0.7241046934084674, "grad_norm": 0.34051942825317383, "learning_rate": 0.00010236559139784947, "loss": 0.505, "step": 9766 }, { "epoch": 0.7241788388818863, "grad_norm": 0.34326937794685364, "learning_rate": 0.00010235558889722433, "loss": 0.4733, "step": 9767 }, { "epoch": 0.7242529843553052, "grad_norm": 0.327134370803833, "learning_rate": 0.00010234558639659915, "loss": 0.4612, "step": 9768 }, { "epoch": 0.7243271298287239, "grad_norm": 0.34926077723503113, "learning_rate": 0.000102335583895974, "loss": 0.4579, "step": 9769 }, { "epoch": 0.7244012753021428, "grad_norm": 0.3483288884162903, "learning_rate": 0.00010232558139534885, "loss": 0.5067, "step": 9770 }, { "epoch": 0.7244754207755617, "grad_norm": 0.3790113627910614, "learning_rate": 0.0001023155788947237, "loss": 0.5236, "step": 9771 }, { "epoch": 0.7245495662489805, "grad_norm": 0.3245033919811249, "learning_rate": 0.00010230557639409852, "loss": 0.4635, "step": 9772 }, { "epoch": 0.7246237117223994, "grad_norm": 0.3470630347728729, "learning_rate": 0.00010229557389347337, "loss": 0.4772, "step": 9773 }, { "epoch": 0.7246978571958181, "grad_norm": 0.35842961072921753, "learning_rate": 0.00010228557139284823, "loss": 0.4752, "step": 9774 }, { "epoch": 0.724772002669237, "grad_norm": 0.3505284786224365, "learning_rate": 0.00010227556889222305, "loss": 0.4874, "step": 9775 }, { "epoch": 0.7248461481426559, "grad_norm": 0.3431873917579651, "learning_rate": 0.0001022655663915979, "loss": 0.4268, "step": 9776 }, { "epoch": 0.7249202936160747, "grad_norm": 0.3773212134838104, "learning_rate": 0.00010225556389097275, "loss": 0.55, "step": 9777 }, { "epoch": 0.7249944390894936, "grad_norm": 0.36995819211006165, "learning_rate": 0.0001022455613903476, "loss": 0.5026, "step": 9778 }, { "epoch": 0.7250685845629125, "grad_norm": 0.34571272134780884, "learning_rate": 0.00010223555888972242, "loss": 0.4555, "step": 9779 }, { "epoch": 0.7251427300363312, "grad_norm": 0.36047229170799255, "learning_rate": 0.00010222555638909728, "loss": 0.4976, "step": 9780 }, { "epoch": 0.7252168755097501, "grad_norm": 0.34880995750427246, "learning_rate": 0.00010221555388847213, "loss": 0.4501, "step": 9781 }, { "epoch": 0.725291020983169, "grad_norm": 0.35202935338020325, "learning_rate": 0.00010220555138784698, "loss": 0.4975, "step": 9782 }, { "epoch": 0.7253651664565878, "grad_norm": 0.37747761607170105, "learning_rate": 0.0001021955488872218, "loss": 0.5088, "step": 9783 }, { "epoch": 0.7254393119300067, "grad_norm": 0.3734073042869568, "learning_rate": 0.00010218554638659665, "loss": 0.4974, "step": 9784 }, { "epoch": 0.7255134574034255, "grad_norm": 0.337614506483078, "learning_rate": 0.0001021755438859715, "loss": 0.4592, "step": 9785 }, { "epoch": 0.7255876028768443, "grad_norm": 0.3522647023200989, "learning_rate": 0.00010216554138534632, "loss": 0.4811, "step": 9786 }, { "epoch": 0.7256617483502632, "grad_norm": 0.35012757778167725, "learning_rate": 0.00010215553888472118, "loss": 0.4912, "step": 9787 }, { "epoch": 0.7257358938236821, "grad_norm": 0.3521864712238312, "learning_rate": 0.00010214553638409603, "loss": 0.4553, "step": 9788 }, { "epoch": 0.7258100392971009, "grad_norm": 0.3916214108467102, "learning_rate": 0.00010213553388347088, "loss": 0.5077, "step": 9789 }, { "epoch": 0.7258841847705197, "grad_norm": 0.34654730558395386, "learning_rate": 0.00010212553138284571, "loss": 0.4747, "step": 9790 }, { "epoch": 0.7259583302439386, "grad_norm": 0.3452068567276001, "learning_rate": 0.00010211552888222055, "loss": 0.4668, "step": 9791 }, { "epoch": 0.7260324757173574, "grad_norm": 0.3403995633125305, "learning_rate": 0.0001021055263815954, "loss": 0.4471, "step": 9792 }, { "epoch": 0.7261066211907763, "grad_norm": 0.3598477840423584, "learning_rate": 0.00010209552388097025, "loss": 0.4766, "step": 9793 }, { "epoch": 0.7261807666641952, "grad_norm": 0.4077015817165375, "learning_rate": 0.00010208552138034509, "loss": 0.541, "step": 9794 }, { "epoch": 0.726254912137614, "grad_norm": 0.3655163645744324, "learning_rate": 0.00010207551887971994, "loss": 0.4933, "step": 9795 }, { "epoch": 0.7263290576110328, "grad_norm": 0.397950142621994, "learning_rate": 0.00010206551637909478, "loss": 0.5283, "step": 9796 }, { "epoch": 0.7264032030844517, "grad_norm": 0.3870863616466522, "learning_rate": 0.00010205551387846963, "loss": 0.5337, "step": 9797 }, { "epoch": 0.7264773485578705, "grad_norm": 0.3620512783527374, "learning_rate": 0.00010204551137784446, "loss": 0.4719, "step": 9798 }, { "epoch": 0.7265514940312894, "grad_norm": 0.34323006868362427, "learning_rate": 0.00010203550887721932, "loss": 0.4964, "step": 9799 }, { "epoch": 0.7266256395047083, "grad_norm": 0.3455500900745392, "learning_rate": 0.00010202550637659417, "loss": 0.4764, "step": 9800 }, { "epoch": 0.726699784978127, "grad_norm": 0.36972761154174805, "learning_rate": 0.00010201550387596899, "loss": 0.4727, "step": 9801 }, { "epoch": 0.7267739304515459, "grad_norm": 0.3585589826107025, "learning_rate": 0.00010200550137534384, "loss": 0.5016, "step": 9802 }, { "epoch": 0.7268480759249648, "grad_norm": 0.36726638674736023, "learning_rate": 0.00010199549887471869, "loss": 0.4826, "step": 9803 }, { "epoch": 0.7269222213983836, "grad_norm": 0.353174090385437, "learning_rate": 0.00010198549637409354, "loss": 0.4782, "step": 9804 }, { "epoch": 0.7269963668718025, "grad_norm": 0.3389478325843811, "learning_rate": 0.00010197549387346836, "loss": 0.4709, "step": 9805 }, { "epoch": 0.7270705123452214, "grad_norm": 0.37331315875053406, "learning_rate": 0.00010196549137284322, "loss": 0.49, "step": 9806 }, { "epoch": 0.7271446578186401, "grad_norm": 0.3632277846336365, "learning_rate": 0.00010195548887221807, "loss": 0.505, "step": 9807 }, { "epoch": 0.727218803292059, "grad_norm": 0.36647558212280273, "learning_rate": 0.00010194548637159292, "loss": 0.4812, "step": 9808 }, { "epoch": 0.7272929487654779, "grad_norm": 0.35871875286102295, "learning_rate": 0.00010193548387096774, "loss": 0.4725, "step": 9809 }, { "epoch": 0.7273670942388967, "grad_norm": 0.339747816324234, "learning_rate": 0.00010192548137034259, "loss": 0.4907, "step": 9810 }, { "epoch": 0.7274412397123156, "grad_norm": 0.34360161423683167, "learning_rate": 0.00010191547886971744, "loss": 0.4741, "step": 9811 }, { "epoch": 0.7275153851857344, "grad_norm": 0.34721219539642334, "learning_rate": 0.00010190547636909227, "loss": 0.509, "step": 9812 }, { "epoch": 0.7275895306591532, "grad_norm": 0.33799558877944946, "learning_rate": 0.00010189547386846712, "loss": 0.4666, "step": 9813 }, { "epoch": 0.7276636761325721, "grad_norm": 0.35619431734085083, "learning_rate": 0.00010188547136784197, "loss": 0.5204, "step": 9814 }, { "epoch": 0.727737821605991, "grad_norm": 0.3779049217700958, "learning_rate": 0.00010187546886721682, "loss": 0.5349, "step": 9815 }, { "epoch": 0.7278119670794098, "grad_norm": 0.3376104235649109, "learning_rate": 0.00010186546636659164, "loss": 0.4842, "step": 9816 }, { "epoch": 0.7278861125528286, "grad_norm": 0.35594654083251953, "learning_rate": 0.00010185546386596649, "loss": 0.4904, "step": 9817 }, { "epoch": 0.7279602580262475, "grad_norm": 0.3495792746543884, "learning_rate": 0.00010184546136534134, "loss": 0.4928, "step": 9818 }, { "epoch": 0.7280344034996663, "grad_norm": 0.347889244556427, "learning_rate": 0.00010183545886471619, "loss": 0.4851, "step": 9819 }, { "epoch": 0.7281085489730852, "grad_norm": 0.34139391779899597, "learning_rate": 0.00010182545636409102, "loss": 0.4885, "step": 9820 }, { "epoch": 0.7281826944465041, "grad_norm": 0.33914074301719666, "learning_rate": 0.00010181545386346587, "loss": 0.47, "step": 9821 }, { "epoch": 0.7282568399199228, "grad_norm": 0.36449065804481506, "learning_rate": 0.00010180545136284072, "loss": 0.5069, "step": 9822 }, { "epoch": 0.7283309853933417, "grad_norm": 0.3296051621437073, "learning_rate": 0.00010179544886221554, "loss": 0.44, "step": 9823 }, { "epoch": 0.7284051308667606, "grad_norm": 0.35576388239860535, "learning_rate": 0.00010178544636159039, "loss": 0.47, "step": 9824 }, { "epoch": 0.7284792763401794, "grad_norm": 0.3581241965293884, "learning_rate": 0.00010177544386096524, "loss": 0.4987, "step": 9825 }, { "epoch": 0.7285534218135983, "grad_norm": 0.34486275911331177, "learning_rate": 0.00010176544136034009, "loss": 0.4702, "step": 9826 }, { "epoch": 0.7286275672870172, "grad_norm": 0.38434791564941406, "learning_rate": 0.00010175543885971493, "loss": 0.5137, "step": 9827 }, { "epoch": 0.7287017127604359, "grad_norm": 0.3534594774246216, "learning_rate": 0.00010174543635908978, "loss": 0.4951, "step": 9828 }, { "epoch": 0.7287758582338548, "grad_norm": 0.34415745735168457, "learning_rate": 0.00010173543385846462, "loss": 0.4695, "step": 9829 }, { "epoch": 0.7288500037072737, "grad_norm": 0.3563942313194275, "learning_rate": 0.00010172543135783947, "loss": 0.5047, "step": 9830 }, { "epoch": 0.7289241491806925, "grad_norm": 0.3536151647567749, "learning_rate": 0.0001017154288572143, "loss": 0.4801, "step": 9831 }, { "epoch": 0.7289982946541114, "grad_norm": 0.36200082302093506, "learning_rate": 0.00010170542635658916, "loss": 0.5225, "step": 9832 }, { "epoch": 0.7290724401275303, "grad_norm": 0.36445024609565735, "learning_rate": 0.000101695423855964, "loss": 0.4914, "step": 9833 }, { "epoch": 0.729146585600949, "grad_norm": 0.3659290671348572, "learning_rate": 0.00010168542135533884, "loss": 0.4807, "step": 9834 }, { "epoch": 0.7292207310743679, "grad_norm": 0.38458824157714844, "learning_rate": 0.00010167541885471368, "loss": 0.5329, "step": 9835 }, { "epoch": 0.7292948765477868, "grad_norm": 0.33955320715904236, "learning_rate": 0.00010166541635408853, "loss": 0.492, "step": 9836 }, { "epoch": 0.7293690220212056, "grad_norm": 0.3759421706199646, "learning_rate": 0.00010165541385346338, "loss": 0.5202, "step": 9837 }, { "epoch": 0.7294431674946245, "grad_norm": 0.34863248467445374, "learning_rate": 0.0001016454113528382, "loss": 0.4895, "step": 9838 }, { "epoch": 0.7295173129680433, "grad_norm": 0.37666556239128113, "learning_rate": 0.00010163540885221306, "loss": 0.5323, "step": 9839 }, { "epoch": 0.7295914584414621, "grad_norm": 0.36212441325187683, "learning_rate": 0.00010162540635158791, "loss": 0.5154, "step": 9840 }, { "epoch": 0.729665603914881, "grad_norm": 0.3788467347621918, "learning_rate": 0.00010161540385096276, "loss": 0.5238, "step": 9841 }, { "epoch": 0.7297397493882999, "grad_norm": 0.3188433349132538, "learning_rate": 0.00010160540135033758, "loss": 0.4274, "step": 9842 }, { "epoch": 0.7298138948617187, "grad_norm": 0.3635058104991913, "learning_rate": 0.00010159539884971243, "loss": 0.4704, "step": 9843 }, { "epoch": 0.7298880403351375, "grad_norm": 0.3586661219596863, "learning_rate": 0.00010158539634908728, "loss": 0.4937, "step": 9844 }, { "epoch": 0.7299621858085564, "grad_norm": 0.3573656380176544, "learning_rate": 0.00010157539384846213, "loss": 0.4959, "step": 9845 }, { "epoch": 0.7300363312819752, "grad_norm": 0.34812483191490173, "learning_rate": 0.00010156539134783696, "loss": 0.5225, "step": 9846 }, { "epoch": 0.7301104767553941, "grad_norm": 0.34577715396881104, "learning_rate": 0.00010155538884721181, "loss": 0.4767, "step": 9847 }, { "epoch": 0.730184622228813, "grad_norm": 0.3351801931858063, "learning_rate": 0.00010154538634658666, "loss": 0.4583, "step": 9848 }, { "epoch": 0.7302587677022317, "grad_norm": 0.39908096194267273, "learning_rate": 0.00010153538384596148, "loss": 0.4761, "step": 9849 }, { "epoch": 0.7303329131756506, "grad_norm": 0.3640032708644867, "learning_rate": 0.00010152538134533633, "loss": 0.5211, "step": 9850 }, { "epoch": 0.7304070586490695, "grad_norm": 0.3623122274875641, "learning_rate": 0.00010151537884471118, "loss": 0.4964, "step": 9851 }, { "epoch": 0.7304812041224883, "grad_norm": 0.3650669753551483, "learning_rate": 0.00010150537634408603, "loss": 0.5112, "step": 9852 }, { "epoch": 0.7305553495959072, "grad_norm": 0.370225191116333, "learning_rate": 0.00010149537384346086, "loss": 0.4973, "step": 9853 }, { "epoch": 0.7306294950693261, "grad_norm": 0.35546332597732544, "learning_rate": 0.00010148537134283571, "loss": 0.468, "step": 9854 }, { "epoch": 0.7307036405427448, "grad_norm": 0.3528200685977936, "learning_rate": 0.00010147536884221056, "loss": 0.4958, "step": 9855 }, { "epoch": 0.7307777860161637, "grad_norm": 0.36985161900520325, "learning_rate": 0.00010146536634158541, "loss": 0.4754, "step": 9856 }, { "epoch": 0.7308519314895826, "grad_norm": 0.3414093255996704, "learning_rate": 0.00010145536384096023, "loss": 0.4591, "step": 9857 }, { "epoch": 0.7309260769630014, "grad_norm": 0.37636345624923706, "learning_rate": 0.00010144536134033508, "loss": 0.5019, "step": 9858 }, { "epoch": 0.7310002224364203, "grad_norm": 0.4063322842121124, "learning_rate": 0.00010143535883970993, "loss": 0.5651, "step": 9859 }, { "epoch": 0.7310743679098392, "grad_norm": 0.3472941219806671, "learning_rate": 0.00010142535633908477, "loss": 0.459, "step": 9860 }, { "epoch": 0.7311485133832579, "grad_norm": 0.3273974359035492, "learning_rate": 0.00010141535383845961, "loss": 0.4478, "step": 9861 }, { "epoch": 0.7312226588566768, "grad_norm": 0.35957959294319153, "learning_rate": 0.00010140535133783446, "loss": 0.5028, "step": 9862 }, { "epoch": 0.7312968043300957, "grad_norm": 0.35831794142723083, "learning_rate": 0.00010139534883720931, "loss": 0.5111, "step": 9863 }, { "epoch": 0.7313709498035145, "grad_norm": 0.37785089015960693, "learning_rate": 0.00010138534633658415, "loss": 0.5244, "step": 9864 }, { "epoch": 0.7314450952769334, "grad_norm": 0.34978410601615906, "learning_rate": 0.000101375343835959, "loss": 0.4513, "step": 9865 }, { "epoch": 0.7315192407503522, "grad_norm": 0.34950098395347595, "learning_rate": 0.00010136534133533385, "loss": 0.4851, "step": 9866 }, { "epoch": 0.731593386223771, "grad_norm": 0.35019955039024353, "learning_rate": 0.00010135533883470868, "loss": 0.4647, "step": 9867 }, { "epoch": 0.7316675316971899, "grad_norm": 0.3605979382991791, "learning_rate": 0.00010134533633408352, "loss": 0.4916, "step": 9868 }, { "epoch": 0.7317416771706088, "grad_norm": 0.3589261770248413, "learning_rate": 0.00010133533383345837, "loss": 0.4925, "step": 9869 }, { "epoch": 0.7318158226440276, "grad_norm": 0.36756637692451477, "learning_rate": 0.00010132533133283322, "loss": 0.5561, "step": 9870 }, { "epoch": 0.7318899681174464, "grad_norm": 0.3453710079193115, "learning_rate": 0.00010131532883220807, "loss": 0.4891, "step": 9871 }, { "epoch": 0.7319641135908653, "grad_norm": 0.38089719414711, "learning_rate": 0.0001013053263315829, "loss": 0.5042, "step": 9872 }, { "epoch": 0.7320382590642841, "grad_norm": 0.3517780303955078, "learning_rate": 0.00010129532383095775, "loss": 0.4836, "step": 9873 }, { "epoch": 0.732112404537703, "grad_norm": 0.33563295006752014, "learning_rate": 0.0001012853213303326, "loss": 0.4766, "step": 9874 }, { "epoch": 0.7321865500111219, "grad_norm": 0.3557016849517822, "learning_rate": 0.00010127531882970742, "loss": 0.4861, "step": 9875 }, { "epoch": 0.7322606954845406, "grad_norm": 0.36855873465538025, "learning_rate": 0.00010126531632908227, "loss": 0.4976, "step": 9876 }, { "epoch": 0.7323348409579595, "grad_norm": 0.35029536485671997, "learning_rate": 0.00010125531382845712, "loss": 0.4904, "step": 9877 }, { "epoch": 0.7324089864313784, "grad_norm": 0.37710386514663696, "learning_rate": 0.00010124531132783197, "loss": 0.5052, "step": 9878 }, { "epoch": 0.7324831319047972, "grad_norm": 0.35244494676589966, "learning_rate": 0.0001012353088272068, "loss": 0.4792, "step": 9879 }, { "epoch": 0.7325572773782161, "grad_norm": 0.37599295377731323, "learning_rate": 0.00010122530632658165, "loss": 0.5639, "step": 9880 }, { "epoch": 0.732631422851635, "grad_norm": 0.3433098793029785, "learning_rate": 0.0001012153038259565, "loss": 0.4699, "step": 9881 }, { "epoch": 0.7327055683250537, "grad_norm": 0.3513484299182892, "learning_rate": 0.00010120530132533135, "loss": 0.4772, "step": 9882 }, { "epoch": 0.7327797137984726, "grad_norm": 0.38829469680786133, "learning_rate": 0.00010119529882470617, "loss": 0.5363, "step": 9883 }, { "epoch": 0.7328538592718915, "grad_norm": 0.36456921696662903, "learning_rate": 0.00010118529632408102, "loss": 0.4958, "step": 9884 }, { "epoch": 0.7329280047453103, "grad_norm": 0.36434808373451233, "learning_rate": 0.00010117529382345587, "loss": 0.4735, "step": 9885 }, { "epoch": 0.7330021502187292, "grad_norm": 0.37890395522117615, "learning_rate": 0.0001011652913228307, "loss": 0.4885, "step": 9886 }, { "epoch": 0.7330762956921479, "grad_norm": 0.3437708914279938, "learning_rate": 0.00010115528882220555, "loss": 0.474, "step": 9887 }, { "epoch": 0.7331504411655668, "grad_norm": 0.32673001289367676, "learning_rate": 0.0001011452863215804, "loss": 0.4534, "step": 9888 }, { "epoch": 0.7332245866389857, "grad_norm": 0.3576214015483856, "learning_rate": 0.00010113528382095525, "loss": 0.5092, "step": 9889 }, { "epoch": 0.7332987321124045, "grad_norm": 0.3626607358455658, "learning_rate": 0.00010112528132033007, "loss": 0.4563, "step": 9890 }, { "epoch": 0.7333728775858234, "grad_norm": 0.34929805994033813, "learning_rate": 0.00010111527881970492, "loss": 0.4979, "step": 9891 }, { "epoch": 0.7334470230592423, "grad_norm": 0.34647783637046814, "learning_rate": 0.00010110527631907977, "loss": 0.4593, "step": 9892 }, { "epoch": 0.733521168532661, "grad_norm": 0.3462729752063751, "learning_rate": 0.00010109527381845463, "loss": 0.4949, "step": 9893 }, { "epoch": 0.7335953140060799, "grad_norm": 0.3438795208930969, "learning_rate": 0.00010108527131782945, "loss": 0.4525, "step": 9894 }, { "epoch": 0.7336694594794988, "grad_norm": 0.36878177523612976, "learning_rate": 0.0001010752688172043, "loss": 0.481, "step": 9895 }, { "epoch": 0.7337436049529176, "grad_norm": 0.37325945496559143, "learning_rate": 0.00010106526631657915, "loss": 0.5021, "step": 9896 }, { "epoch": 0.7338177504263365, "grad_norm": 0.34222573041915894, "learning_rate": 0.000101055263815954, "loss": 0.4587, "step": 9897 }, { "epoch": 0.7338918958997553, "grad_norm": 0.35452502965927124, "learning_rate": 0.00010104526131532884, "loss": 0.4662, "step": 9898 }, { "epoch": 0.7339660413731741, "grad_norm": 0.36822497844696045, "learning_rate": 0.00010103525881470367, "loss": 0.5096, "step": 9899 }, { "epoch": 0.734040186846593, "grad_norm": 0.3402572274208069, "learning_rate": 0.00010102525631407853, "loss": 0.4637, "step": 9900 }, { "epoch": 0.7341143323200119, "grad_norm": 0.3756723701953888, "learning_rate": 0.00010101525381345336, "loss": 0.509, "step": 9901 }, { "epoch": 0.7341884777934307, "grad_norm": 0.3556862771511078, "learning_rate": 0.00010100525131282821, "loss": 0.5192, "step": 9902 }, { "epoch": 0.7342626232668495, "grad_norm": 0.3487955331802368, "learning_rate": 0.00010099524881220306, "loss": 0.4734, "step": 9903 }, { "epoch": 0.7343367687402684, "grad_norm": 0.3681011497974396, "learning_rate": 0.00010098524631157791, "loss": 0.4909, "step": 9904 }, { "epoch": 0.7344109142136872, "grad_norm": 0.36742639541625977, "learning_rate": 0.00010097524381095274, "loss": 0.5099, "step": 9905 }, { "epoch": 0.7344850596871061, "grad_norm": 0.369303435087204, "learning_rate": 0.00010096524131032759, "loss": 0.4819, "step": 9906 }, { "epoch": 0.734559205160525, "grad_norm": 0.346034437417984, "learning_rate": 0.00010095523880970244, "loss": 0.4708, "step": 9907 }, { "epoch": 0.7346333506339437, "grad_norm": 0.36435243487358093, "learning_rate": 0.00010094523630907729, "loss": 0.479, "step": 9908 }, { "epoch": 0.7347074961073626, "grad_norm": 0.37752893567085266, "learning_rate": 0.00010093523380845211, "loss": 0.491, "step": 9909 }, { "epoch": 0.7347816415807815, "grad_norm": 0.3850514590740204, "learning_rate": 0.00010092523130782696, "loss": 0.512, "step": 9910 }, { "epoch": 0.7348557870542003, "grad_norm": 0.36606359481811523, "learning_rate": 0.00010091522880720181, "loss": 0.5107, "step": 9911 }, { "epoch": 0.7349299325276192, "grad_norm": 0.34571927785873413, "learning_rate": 0.00010090522630657664, "loss": 0.4896, "step": 9912 }, { "epoch": 0.7350040780010381, "grad_norm": 0.3516993820667267, "learning_rate": 0.00010089522380595149, "loss": 0.4763, "step": 9913 }, { "epoch": 0.7350782234744568, "grad_norm": 0.3796703517436981, "learning_rate": 0.00010088522130532634, "loss": 0.5209, "step": 9914 }, { "epoch": 0.7351523689478757, "grad_norm": 0.33915433287620544, "learning_rate": 0.00010087521880470119, "loss": 0.4592, "step": 9915 }, { "epoch": 0.7352265144212946, "grad_norm": 0.39271557331085205, "learning_rate": 0.00010086521630407601, "loss": 0.5459, "step": 9916 }, { "epoch": 0.7353006598947134, "grad_norm": 0.36355680227279663, "learning_rate": 0.00010085521380345086, "loss": 0.4925, "step": 9917 }, { "epoch": 0.7353748053681323, "grad_norm": 0.36867278814315796, "learning_rate": 0.00010084521130282572, "loss": 0.5368, "step": 9918 }, { "epoch": 0.7354489508415512, "grad_norm": 0.3854970932006836, "learning_rate": 0.00010083520880220057, "loss": 0.5175, "step": 9919 }, { "epoch": 0.7355230963149699, "grad_norm": 0.3492545783519745, "learning_rate": 0.00010082520630157539, "loss": 0.4816, "step": 9920 }, { "epoch": 0.7355972417883888, "grad_norm": 0.35512682795524597, "learning_rate": 0.00010081520380095024, "loss": 0.5101, "step": 9921 }, { "epoch": 0.7356713872618077, "grad_norm": 0.36367732286453247, "learning_rate": 0.00010080520130032509, "loss": 0.5198, "step": 9922 }, { "epoch": 0.7357455327352265, "grad_norm": 0.3593961298465729, "learning_rate": 0.00010079519879969991, "loss": 0.5356, "step": 9923 }, { "epoch": 0.7358196782086454, "grad_norm": 0.3616650700569153, "learning_rate": 0.00010078519629907476, "loss": 0.4814, "step": 9924 }, { "epoch": 0.7358938236820642, "grad_norm": 0.36612656712532043, "learning_rate": 0.00010077519379844962, "loss": 0.5209, "step": 9925 }, { "epoch": 0.735967969155483, "grad_norm": 0.33815568685531616, "learning_rate": 0.00010076519129782447, "loss": 0.4767, "step": 9926 }, { "epoch": 0.7360421146289019, "grad_norm": 0.3621980547904968, "learning_rate": 0.00010075518879719929, "loss": 0.5338, "step": 9927 }, { "epoch": 0.7361162601023208, "grad_norm": 0.36989668011665344, "learning_rate": 0.00010074518629657414, "loss": 0.5207, "step": 9928 }, { "epoch": 0.7361904055757396, "grad_norm": 0.36601024866104126, "learning_rate": 0.00010073518379594899, "loss": 0.5464, "step": 9929 }, { "epoch": 0.7362645510491584, "grad_norm": 0.3784588873386383, "learning_rate": 0.00010072518129532384, "loss": 0.4911, "step": 9930 }, { "epoch": 0.7363386965225773, "grad_norm": 0.37799498438835144, "learning_rate": 0.00010071517879469868, "loss": 0.5181, "step": 9931 }, { "epoch": 0.7364128419959961, "grad_norm": 0.36219528317451477, "learning_rate": 0.00010070517629407352, "loss": 0.5122, "step": 9932 }, { "epoch": 0.736486987469415, "grad_norm": 0.3737243115901947, "learning_rate": 0.00010069517379344837, "loss": 0.4842, "step": 9933 }, { "epoch": 0.7365611329428339, "grad_norm": 0.37111696600914, "learning_rate": 0.00010068517129282322, "loss": 0.5255, "step": 9934 }, { "epoch": 0.7366352784162526, "grad_norm": 0.3554646074771881, "learning_rate": 0.00010067516879219805, "loss": 0.4714, "step": 9935 }, { "epoch": 0.7367094238896715, "grad_norm": 0.358582079410553, "learning_rate": 0.0001006651662915729, "loss": 0.4401, "step": 9936 }, { "epoch": 0.7367835693630904, "grad_norm": 0.3494209349155426, "learning_rate": 0.00010065516379094774, "loss": 0.4801, "step": 9937 }, { "epoch": 0.7368577148365092, "grad_norm": 0.3317667841911316, "learning_rate": 0.00010064516129032258, "loss": 0.4757, "step": 9938 }, { "epoch": 0.7369318603099281, "grad_norm": 0.34697413444519043, "learning_rate": 0.00010063515878969743, "loss": 0.4831, "step": 9939 }, { "epoch": 0.737006005783347, "grad_norm": 0.342973917722702, "learning_rate": 0.00010062515628907228, "loss": 0.4826, "step": 9940 }, { "epoch": 0.7370801512567657, "grad_norm": 0.3708855211734772, "learning_rate": 0.00010061515378844713, "loss": 0.5344, "step": 9941 }, { "epoch": 0.7371542967301846, "grad_norm": 0.3934559226036072, "learning_rate": 0.00010060515128782195, "loss": 0.5463, "step": 9942 }, { "epoch": 0.7372284422036035, "grad_norm": 0.35956552624702454, "learning_rate": 0.0001005951487871968, "loss": 0.487, "step": 9943 }, { "epoch": 0.7373025876770223, "grad_norm": 0.3550291657447815, "learning_rate": 0.00010058514628657166, "loss": 0.4765, "step": 9944 }, { "epoch": 0.7373767331504412, "grad_norm": 0.3885299265384674, "learning_rate": 0.0001005751437859465, "loss": 0.5272, "step": 9945 }, { "epoch": 0.73745087862386, "grad_norm": 0.38529834151268005, "learning_rate": 0.00010056514128532133, "loss": 0.5083, "step": 9946 }, { "epoch": 0.7375250240972788, "grad_norm": 0.3800079822540283, "learning_rate": 0.00010055513878469618, "loss": 0.559, "step": 9947 }, { "epoch": 0.7375991695706977, "grad_norm": 0.38295671343803406, "learning_rate": 0.00010054513628407103, "loss": 0.4836, "step": 9948 }, { "epoch": 0.7376733150441166, "grad_norm": 0.3587893545627594, "learning_rate": 0.00010053513378344585, "loss": 0.4942, "step": 9949 }, { "epoch": 0.7377474605175354, "grad_norm": 0.340629518032074, "learning_rate": 0.0001005251312828207, "loss": 0.4732, "step": 9950 }, { "epoch": 0.7378216059909543, "grad_norm": 0.37853142619132996, "learning_rate": 0.00010051512878219556, "loss": 0.5176, "step": 9951 }, { "epoch": 0.7378957514643731, "grad_norm": 0.32168200612068176, "learning_rate": 0.0001005051262815704, "loss": 0.4636, "step": 9952 }, { "epoch": 0.7379698969377919, "grad_norm": 0.35180750489234924, "learning_rate": 0.00010049512378094523, "loss": 0.4982, "step": 9953 }, { "epoch": 0.7380440424112108, "grad_norm": 0.3407387137413025, "learning_rate": 0.00010048512128032008, "loss": 0.4501, "step": 9954 }, { "epoch": 0.7381181878846297, "grad_norm": 0.3381001055240631, "learning_rate": 0.00010047511877969493, "loss": 0.5002, "step": 9955 }, { "epoch": 0.7381923333580485, "grad_norm": 0.337488055229187, "learning_rate": 0.00010046511627906978, "loss": 0.4688, "step": 9956 }, { "epoch": 0.7382664788314673, "grad_norm": 0.3488987386226654, "learning_rate": 0.0001004551137784446, "loss": 0.4787, "step": 9957 }, { "epoch": 0.7383406243048862, "grad_norm": 0.36139893531799316, "learning_rate": 0.00010044511127781946, "loss": 0.5068, "step": 9958 }, { "epoch": 0.738414769778305, "grad_norm": 0.3354896903038025, "learning_rate": 0.00010043510877719431, "loss": 0.485, "step": 9959 }, { "epoch": 0.7384889152517239, "grad_norm": 0.349455863237381, "learning_rate": 0.00010042510627656913, "loss": 0.4754, "step": 9960 }, { "epoch": 0.7385630607251428, "grad_norm": 0.34735870361328125, "learning_rate": 0.00010041510377594398, "loss": 0.4769, "step": 9961 }, { "epoch": 0.7386372061985615, "grad_norm": 0.36705610156059265, "learning_rate": 0.00010040510127531883, "loss": 0.5215, "step": 9962 }, { "epoch": 0.7387113516719804, "grad_norm": 0.36679771542549133, "learning_rate": 0.00010039509877469368, "loss": 0.457, "step": 9963 }, { "epoch": 0.7387854971453993, "grad_norm": 0.3612273931503296, "learning_rate": 0.00010038509627406852, "loss": 0.5383, "step": 9964 }, { "epoch": 0.7388596426188181, "grad_norm": 0.3538142144680023, "learning_rate": 0.00010037509377344336, "loss": 0.493, "step": 9965 }, { "epoch": 0.738933788092237, "grad_norm": 0.35586515069007874, "learning_rate": 0.00010036509127281821, "loss": 0.5185, "step": 9966 }, { "epoch": 0.7390079335656559, "grad_norm": 0.35676053166389465, "learning_rate": 0.00010035508877219306, "loss": 0.4952, "step": 9967 }, { "epoch": 0.7390820790390746, "grad_norm": 0.38988637924194336, "learning_rate": 0.0001003450862715679, "loss": 0.5547, "step": 9968 }, { "epoch": 0.7391562245124935, "grad_norm": 0.37093785405158997, "learning_rate": 0.00010033508377094275, "loss": 0.5139, "step": 9969 }, { "epoch": 0.7392303699859124, "grad_norm": 0.3844412863254547, "learning_rate": 0.00010032508127031758, "loss": 0.5042, "step": 9970 }, { "epoch": 0.7393045154593312, "grad_norm": 0.38572028279304504, "learning_rate": 0.00010031507876969243, "loss": 0.4917, "step": 9971 }, { "epoch": 0.7393786609327501, "grad_norm": 0.3581622540950775, "learning_rate": 0.00010030507626906727, "loss": 0.4962, "step": 9972 }, { "epoch": 0.739452806406169, "grad_norm": 0.3797801733016968, "learning_rate": 0.00010029507376844212, "loss": 0.5508, "step": 9973 }, { "epoch": 0.7395269518795877, "grad_norm": 0.38890984654426575, "learning_rate": 0.00010028507126781697, "loss": 0.552, "step": 9974 }, { "epoch": 0.7396010973530066, "grad_norm": 0.37026140093803406, "learning_rate": 0.0001002750687671918, "loss": 0.5361, "step": 9975 }, { "epoch": 0.7396752428264255, "grad_norm": 0.33901485800743103, "learning_rate": 0.00010026506626656665, "loss": 0.4588, "step": 9976 }, { "epoch": 0.7397493882998443, "grad_norm": 0.36426645517349243, "learning_rate": 0.0001002550637659415, "loss": 0.5241, "step": 9977 }, { "epoch": 0.7398235337732632, "grad_norm": 0.343216210603714, "learning_rate": 0.00010024506126531635, "loss": 0.4974, "step": 9978 }, { "epoch": 0.739897679246682, "grad_norm": 0.3556203544139862, "learning_rate": 0.00010023505876469117, "loss": 0.4953, "step": 9979 }, { "epoch": 0.7399718247201008, "grad_norm": 0.3418378233909607, "learning_rate": 0.00010022505626406602, "loss": 0.4765, "step": 9980 }, { "epoch": 0.7400459701935197, "grad_norm": 0.3400087356567383, "learning_rate": 0.00010021505376344087, "loss": 0.4743, "step": 9981 }, { "epoch": 0.7401201156669386, "grad_norm": 0.37837696075439453, "learning_rate": 0.00010020505126281572, "loss": 0.5294, "step": 9982 }, { "epoch": 0.7401942611403574, "grad_norm": 0.3504321873188019, "learning_rate": 0.00010019504876219055, "loss": 0.4871, "step": 9983 }, { "epoch": 0.7402684066137762, "grad_norm": 0.37083637714385986, "learning_rate": 0.0001001850462615654, "loss": 0.5297, "step": 9984 }, { "epoch": 0.7403425520871951, "grad_norm": 0.37755003571510315, "learning_rate": 0.00010017504376094025, "loss": 0.5839, "step": 9985 }, { "epoch": 0.7404166975606139, "grad_norm": 0.3374440670013428, "learning_rate": 0.00010016504126031507, "loss": 0.4839, "step": 9986 }, { "epoch": 0.7404908430340328, "grad_norm": 0.36657053232192993, "learning_rate": 0.00010015503875968992, "loss": 0.4824, "step": 9987 }, { "epoch": 0.7405649885074517, "grad_norm": 0.34876760840415955, "learning_rate": 0.00010014503625906477, "loss": 0.4839, "step": 9988 }, { "epoch": 0.7406391339808704, "grad_norm": 0.37910279631614685, "learning_rate": 0.00010013503375843962, "loss": 0.527, "step": 9989 }, { "epoch": 0.7407132794542893, "grad_norm": 0.353914350271225, "learning_rate": 0.00010012503125781445, "loss": 0.4885, "step": 9990 }, { "epoch": 0.7407874249277082, "grad_norm": 0.36001089215278625, "learning_rate": 0.0001001150287571893, "loss": 0.4945, "step": 9991 }, { "epoch": 0.740861570401127, "grad_norm": 0.3749687671661377, "learning_rate": 0.00010010502625656415, "loss": 0.4849, "step": 9992 }, { "epoch": 0.7409357158745459, "grad_norm": 0.36260610818862915, "learning_rate": 0.000100095023755939, "loss": 0.5129, "step": 9993 }, { "epoch": 0.7410098613479648, "grad_norm": 0.36755815148353577, "learning_rate": 0.00010008502125531382, "loss": 0.537, "step": 9994 }, { "epoch": 0.7410840068213835, "grad_norm": 0.3541785776615143, "learning_rate": 0.00010007501875468867, "loss": 0.4709, "step": 9995 }, { "epoch": 0.7411581522948024, "grad_norm": 0.3725077211856842, "learning_rate": 0.00010006501625406352, "loss": 0.5048, "step": 9996 }, { "epoch": 0.7412322977682213, "grad_norm": 0.33019518852233887, "learning_rate": 0.00010005501375343836, "loss": 0.4808, "step": 9997 }, { "epoch": 0.7413064432416401, "grad_norm": 0.3182913661003113, "learning_rate": 0.0001000450112528132, "loss": 0.4418, "step": 9998 }, { "epoch": 0.741380588715059, "grad_norm": 0.37601786851882935, "learning_rate": 0.00010003500875218805, "loss": 0.509, "step": 9999 }, { "epoch": 0.7414547341884778, "grad_norm": 0.3705173134803772, "learning_rate": 0.0001000250062515629, "loss": 0.5093, "step": 10000 }, { "epoch": 0.7415288796618966, "grad_norm": 0.3765875995159149, "learning_rate": 0.00010001500375093774, "loss": 0.5369, "step": 10001 }, { "epoch": 0.7416030251353155, "grad_norm": 0.34051576256752014, "learning_rate": 0.00010000500125031259, "loss": 0.4663, "step": 10002 }, { "epoch": 0.7416771706087343, "grad_norm": 0.3562418222427368, "learning_rate": 9.999499874968742e-05, "loss": 0.5018, "step": 10003 }, { "epoch": 0.7417513160821532, "grad_norm": 0.35506486892700195, "learning_rate": 9.998499624906227e-05, "loss": 0.4898, "step": 10004 }, { "epoch": 0.741825461555572, "grad_norm": 0.3585818409919739, "learning_rate": 9.997499374843712e-05, "loss": 0.4753, "step": 10005 }, { "epoch": 0.7418996070289908, "grad_norm": 0.362395703792572, "learning_rate": 9.996499124781196e-05, "loss": 0.505, "step": 10006 }, { "epoch": 0.7419737525024097, "grad_norm": 0.3345792889595032, "learning_rate": 9.995498874718681e-05, "loss": 0.4675, "step": 10007 }, { "epoch": 0.7420478979758286, "grad_norm": 0.36615949869155884, "learning_rate": 9.994498624656165e-05, "loss": 0.488, "step": 10008 }, { "epoch": 0.7421220434492474, "grad_norm": 0.35948261618614197, "learning_rate": 9.993498374593649e-05, "loss": 0.5104, "step": 10009 }, { "epoch": 0.7421961889226663, "grad_norm": 0.3507312834262848, "learning_rate": 9.992498124531134e-05, "loss": 0.4782, "step": 10010 }, { "epoch": 0.7422703343960851, "grad_norm": 0.3668261766433716, "learning_rate": 9.991497874468617e-05, "loss": 0.5039, "step": 10011 }, { "epoch": 0.7423444798695039, "grad_norm": 0.3489595353603363, "learning_rate": 9.990497624406102e-05, "loss": 0.4926, "step": 10012 }, { "epoch": 0.7424186253429228, "grad_norm": 0.3593759536743164, "learning_rate": 9.989497374343586e-05, "loss": 0.4932, "step": 10013 }, { "epoch": 0.7424927708163417, "grad_norm": 0.3374972343444824, "learning_rate": 9.988497124281071e-05, "loss": 0.4827, "step": 10014 }, { "epoch": 0.7425669162897605, "grad_norm": 0.3538496494293213, "learning_rate": 9.987496874218555e-05, "loss": 0.4623, "step": 10015 }, { "epoch": 0.7426410617631793, "grad_norm": 0.3343598246574402, "learning_rate": 9.98649662415604e-05, "loss": 0.4752, "step": 10016 }, { "epoch": 0.7427152072365982, "grad_norm": 0.34127476811408997, "learning_rate": 9.985496374093524e-05, "loss": 0.4597, "step": 10017 }, { "epoch": 0.742789352710017, "grad_norm": 0.3619319498538971, "learning_rate": 9.984496124031009e-05, "loss": 0.5227, "step": 10018 }, { "epoch": 0.7428634981834359, "grad_norm": 0.3977532684803009, "learning_rate": 9.983495873968493e-05, "loss": 0.5323, "step": 10019 }, { "epoch": 0.7429376436568548, "grad_norm": 0.35275956988334656, "learning_rate": 9.982495623905978e-05, "loss": 0.4661, "step": 10020 }, { "epoch": 0.7430117891302735, "grad_norm": 0.3339717984199524, "learning_rate": 9.981495373843461e-05, "loss": 0.4774, "step": 10021 }, { "epoch": 0.7430859346036924, "grad_norm": 0.3510655164718628, "learning_rate": 9.980495123780945e-05, "loss": 0.4948, "step": 10022 }, { "epoch": 0.7431600800771113, "grad_norm": 0.3720792531967163, "learning_rate": 9.97949487371843e-05, "loss": 0.5452, "step": 10023 }, { "epoch": 0.7432342255505301, "grad_norm": 0.3856961727142334, "learning_rate": 9.978494623655914e-05, "loss": 0.5454, "step": 10024 }, { "epoch": 0.743308371023949, "grad_norm": 0.34659355878829956, "learning_rate": 9.977494373593399e-05, "loss": 0.5544, "step": 10025 }, { "epoch": 0.7433825164973679, "grad_norm": 0.3866281807422638, "learning_rate": 9.976494123530883e-05, "loss": 0.5665, "step": 10026 }, { "epoch": 0.7434566619707866, "grad_norm": 0.36172378063201904, "learning_rate": 9.975493873468368e-05, "loss": 0.5311, "step": 10027 }, { "epoch": 0.7435308074442055, "grad_norm": 0.36152184009552, "learning_rate": 9.974493623405851e-05, "loss": 0.5295, "step": 10028 }, { "epoch": 0.7436049529176244, "grad_norm": 0.30902987718582153, "learning_rate": 9.973493373343336e-05, "loss": 0.4372, "step": 10029 }, { "epoch": 0.7436790983910432, "grad_norm": 0.34876495599746704, "learning_rate": 9.97249312328082e-05, "loss": 0.4636, "step": 10030 }, { "epoch": 0.7437532438644621, "grad_norm": 0.3610982298851013, "learning_rate": 9.971492873218305e-05, "loss": 0.5033, "step": 10031 }, { "epoch": 0.743827389337881, "grad_norm": 0.35730624198913574, "learning_rate": 9.970492623155789e-05, "loss": 0.4917, "step": 10032 }, { "epoch": 0.7439015348112997, "grad_norm": 0.3894965350627899, "learning_rate": 9.969492373093274e-05, "loss": 0.5475, "step": 10033 }, { "epoch": 0.7439756802847186, "grad_norm": 0.33972278237342834, "learning_rate": 9.968492123030758e-05, "loss": 0.4513, "step": 10034 }, { "epoch": 0.7440498257581375, "grad_norm": 0.3653634786605835, "learning_rate": 9.967491872968243e-05, "loss": 0.4828, "step": 10035 }, { "epoch": 0.7441239712315563, "grad_norm": 0.3466009497642517, "learning_rate": 9.966491622905726e-05, "loss": 0.4891, "step": 10036 }, { "epoch": 0.7441981167049752, "grad_norm": 0.3874719440937042, "learning_rate": 9.965491372843211e-05, "loss": 0.5099, "step": 10037 }, { "epoch": 0.744272262178394, "grad_norm": 0.37360772490501404, "learning_rate": 9.964491122780695e-05, "loss": 0.4691, "step": 10038 }, { "epoch": 0.7443464076518128, "grad_norm": 0.3484889566898346, "learning_rate": 9.96349087271818e-05, "loss": 0.496, "step": 10039 }, { "epoch": 0.7444205531252317, "grad_norm": 0.35672643780708313, "learning_rate": 9.962490622655665e-05, "loss": 0.5128, "step": 10040 }, { "epoch": 0.7444946985986506, "grad_norm": 0.3372753858566284, "learning_rate": 9.961490372593149e-05, "loss": 0.4694, "step": 10041 }, { "epoch": 0.7445688440720694, "grad_norm": 0.37839120626449585, "learning_rate": 9.960490122530634e-05, "loss": 0.5224, "step": 10042 }, { "epoch": 0.7446429895454882, "grad_norm": 0.35097557306289673, "learning_rate": 9.959489872468118e-05, "loss": 0.4943, "step": 10043 }, { "epoch": 0.7447171350189071, "grad_norm": 0.350800484418869, "learning_rate": 9.958489622405603e-05, "loss": 0.5052, "step": 10044 }, { "epoch": 0.7447912804923259, "grad_norm": 0.3692048490047455, "learning_rate": 9.957489372343087e-05, "loss": 0.4801, "step": 10045 }, { "epoch": 0.7448654259657448, "grad_norm": 0.3324204981327057, "learning_rate": 9.95648912228057e-05, "loss": 0.4606, "step": 10046 }, { "epoch": 0.7449395714391637, "grad_norm": 0.3564186990261078, "learning_rate": 9.955488872218055e-05, "loss": 0.4916, "step": 10047 }, { "epoch": 0.7450137169125824, "grad_norm": 0.36025145649909973, "learning_rate": 9.954488622155539e-05, "loss": 0.5076, "step": 10048 }, { "epoch": 0.7450878623860013, "grad_norm": 0.34502798318862915, "learning_rate": 9.953488372093024e-05, "loss": 0.4757, "step": 10049 }, { "epoch": 0.7451620078594202, "grad_norm": 0.34934765100479126, "learning_rate": 9.952488122030508e-05, "loss": 0.4636, "step": 10050 }, { "epoch": 0.745236153332839, "grad_norm": 0.35491472482681274, "learning_rate": 9.951487871967993e-05, "loss": 0.4877, "step": 10051 }, { "epoch": 0.7453102988062579, "grad_norm": 0.361537903547287, "learning_rate": 9.950487621905477e-05, "loss": 0.4956, "step": 10052 }, { "epoch": 0.7453844442796768, "grad_norm": 0.3667871654033661, "learning_rate": 9.949487371842962e-05, "loss": 0.5034, "step": 10053 }, { "epoch": 0.7454585897530955, "grad_norm": 0.34593942761421204, "learning_rate": 9.948487121780445e-05, "loss": 0.4847, "step": 10054 }, { "epoch": 0.7455327352265144, "grad_norm": 0.3638220727443695, "learning_rate": 9.94748687171793e-05, "loss": 0.4902, "step": 10055 }, { "epoch": 0.7456068806999333, "grad_norm": 0.3524557650089264, "learning_rate": 9.946486621655414e-05, "loss": 0.4938, "step": 10056 }, { "epoch": 0.7456810261733521, "grad_norm": 0.3577626645565033, "learning_rate": 9.945486371592899e-05, "loss": 0.4952, "step": 10057 }, { "epoch": 0.745755171646771, "grad_norm": 0.3575383424758911, "learning_rate": 9.944486121530383e-05, "loss": 0.5078, "step": 10058 }, { "epoch": 0.7458293171201898, "grad_norm": 0.34866198897361755, "learning_rate": 9.943485871467867e-05, "loss": 0.484, "step": 10059 }, { "epoch": 0.7459034625936086, "grad_norm": 0.36349526047706604, "learning_rate": 9.942485621405352e-05, "loss": 0.4878, "step": 10060 }, { "epoch": 0.7459776080670275, "grad_norm": 0.39746037125587463, "learning_rate": 9.941485371342835e-05, "loss": 0.5525, "step": 10061 }, { "epoch": 0.7460517535404464, "grad_norm": 0.34555041790008545, "learning_rate": 9.94048512128032e-05, "loss": 0.5014, "step": 10062 }, { "epoch": 0.7461258990138652, "grad_norm": 0.3859083652496338, "learning_rate": 9.939484871217804e-05, "loss": 0.5307, "step": 10063 }, { "epoch": 0.746200044487284, "grad_norm": 0.37606924772262573, "learning_rate": 9.938484621155289e-05, "loss": 0.5278, "step": 10064 }, { "epoch": 0.7462741899607029, "grad_norm": 0.35637322068214417, "learning_rate": 9.937484371092773e-05, "loss": 0.4879, "step": 10065 }, { "epoch": 0.7463483354341217, "grad_norm": 0.3666801452636719, "learning_rate": 9.936484121030258e-05, "loss": 0.5241, "step": 10066 }, { "epoch": 0.7464224809075406, "grad_norm": 0.35753461718559265, "learning_rate": 9.935483870967742e-05, "loss": 0.5198, "step": 10067 }, { "epoch": 0.7464966263809595, "grad_norm": 0.3303223252296448, "learning_rate": 9.934483620905227e-05, "loss": 0.4712, "step": 10068 }, { "epoch": 0.7465707718543783, "grad_norm": 0.3401878774166107, "learning_rate": 9.93348337084271e-05, "loss": 0.4799, "step": 10069 }, { "epoch": 0.7466449173277971, "grad_norm": 0.3624083995819092, "learning_rate": 9.932483120780196e-05, "loss": 0.51, "step": 10070 }, { "epoch": 0.746719062801216, "grad_norm": 0.33909958600997925, "learning_rate": 9.931482870717679e-05, "loss": 0.4674, "step": 10071 }, { "epoch": 0.7467932082746348, "grad_norm": 0.33451783657073975, "learning_rate": 9.930482620655164e-05, "loss": 0.4665, "step": 10072 }, { "epoch": 0.7468673537480537, "grad_norm": 0.3752326965332031, "learning_rate": 9.92948237059265e-05, "loss": 0.5255, "step": 10073 }, { "epoch": 0.7469414992214726, "grad_norm": 0.36246436834335327, "learning_rate": 9.928482120530133e-05, "loss": 0.489, "step": 10074 }, { "epoch": 0.7470156446948913, "grad_norm": 0.38097044825553894, "learning_rate": 9.927481870467618e-05, "loss": 0.5075, "step": 10075 }, { "epoch": 0.7470897901683102, "grad_norm": 0.3692586123943329, "learning_rate": 9.926481620405102e-05, "loss": 0.5065, "step": 10076 }, { "epoch": 0.7471639356417291, "grad_norm": 0.3665402829647064, "learning_rate": 9.925481370342587e-05, "loss": 0.5073, "step": 10077 }, { "epoch": 0.7472380811151479, "grad_norm": 0.38045910000801086, "learning_rate": 9.92448112028007e-05, "loss": 0.5193, "step": 10078 }, { "epoch": 0.7473122265885668, "grad_norm": 0.3793416917324066, "learning_rate": 9.923480870217556e-05, "loss": 0.5017, "step": 10079 }, { "epoch": 0.7473863720619857, "grad_norm": 0.3413918614387512, "learning_rate": 9.92248062015504e-05, "loss": 0.4879, "step": 10080 }, { "epoch": 0.7474605175354044, "grad_norm": 0.3595883846282959, "learning_rate": 9.921480370092524e-05, "loss": 0.4934, "step": 10081 }, { "epoch": 0.7475346630088233, "grad_norm": 0.3688569962978363, "learning_rate": 9.920480120030008e-05, "loss": 0.5018, "step": 10082 }, { "epoch": 0.7476088084822422, "grad_norm": 0.3887184262275696, "learning_rate": 9.919479869967493e-05, "loss": 0.5021, "step": 10083 }, { "epoch": 0.747682953955661, "grad_norm": 0.37193435430526733, "learning_rate": 9.918479619904977e-05, "loss": 0.5318, "step": 10084 }, { "epoch": 0.7477570994290799, "grad_norm": 0.3910921812057495, "learning_rate": 9.917479369842461e-05, "loss": 0.4992, "step": 10085 }, { "epoch": 0.7478312449024987, "grad_norm": 0.3388250172138214, "learning_rate": 9.916479119779946e-05, "loss": 0.4696, "step": 10086 }, { "epoch": 0.7479053903759175, "grad_norm": 0.3408256471157074, "learning_rate": 9.91547886971743e-05, "loss": 0.4869, "step": 10087 }, { "epoch": 0.7479795358493364, "grad_norm": 0.3407551944255829, "learning_rate": 9.914478619654915e-05, "loss": 0.4759, "step": 10088 }, { "epoch": 0.7480536813227553, "grad_norm": 0.36582690477371216, "learning_rate": 9.913478369592398e-05, "loss": 0.5224, "step": 10089 }, { "epoch": 0.7481278267961741, "grad_norm": 0.3560807406902313, "learning_rate": 9.912478119529883e-05, "loss": 0.4865, "step": 10090 }, { "epoch": 0.748201972269593, "grad_norm": 0.37307608127593994, "learning_rate": 9.911477869467367e-05, "loss": 0.5017, "step": 10091 }, { "epoch": 0.7482761177430118, "grad_norm": 0.3619062900543213, "learning_rate": 9.910477619404852e-05, "loss": 0.4846, "step": 10092 }, { "epoch": 0.7483502632164306, "grad_norm": 0.37277811765670776, "learning_rate": 9.909477369342336e-05, "loss": 0.5367, "step": 10093 }, { "epoch": 0.7484244086898495, "grad_norm": 0.3589397966861725, "learning_rate": 9.908477119279821e-05, "loss": 0.4835, "step": 10094 }, { "epoch": 0.7484985541632684, "grad_norm": 0.3363340497016907, "learning_rate": 9.907476869217305e-05, "loss": 0.4776, "step": 10095 }, { "epoch": 0.7485726996366872, "grad_norm": 0.37918218970298767, "learning_rate": 9.906476619154788e-05, "loss": 0.535, "step": 10096 }, { "epoch": 0.748646845110106, "grad_norm": 0.36337295174598694, "learning_rate": 9.905476369092273e-05, "loss": 0.4872, "step": 10097 }, { "epoch": 0.7487209905835249, "grad_norm": 0.34862616658210754, "learning_rate": 9.904476119029757e-05, "loss": 0.4918, "step": 10098 }, { "epoch": 0.7487951360569437, "grad_norm": 0.3486023545265198, "learning_rate": 9.903475868967242e-05, "loss": 0.4838, "step": 10099 }, { "epoch": 0.7488692815303626, "grad_norm": 0.35577014088630676, "learning_rate": 9.902475618904726e-05, "loss": 0.5205, "step": 10100 }, { "epoch": 0.7489434270037815, "grad_norm": 0.38612326979637146, "learning_rate": 9.901475368842211e-05, "loss": 0.5318, "step": 10101 }, { "epoch": 0.7490175724772002, "grad_norm": 0.35478150844573975, "learning_rate": 9.900475118779695e-05, "loss": 0.4629, "step": 10102 }, { "epoch": 0.7490917179506191, "grad_norm": 0.35493457317352295, "learning_rate": 9.89947486871718e-05, "loss": 0.4626, "step": 10103 }, { "epoch": 0.749165863424038, "grad_norm": 0.34813785552978516, "learning_rate": 9.898474618654663e-05, "loss": 0.4846, "step": 10104 }, { "epoch": 0.7492400088974568, "grad_norm": 0.39040428400039673, "learning_rate": 9.897474368592148e-05, "loss": 0.5051, "step": 10105 }, { "epoch": 0.7493141543708757, "grad_norm": 0.38617274165153503, "learning_rate": 9.896474118529633e-05, "loss": 0.5478, "step": 10106 }, { "epoch": 0.7493882998442946, "grad_norm": 0.3503841459751129, "learning_rate": 9.895473868467117e-05, "loss": 0.473, "step": 10107 }, { "epoch": 0.7494624453177133, "grad_norm": 0.3536120057106018, "learning_rate": 9.894473618404602e-05, "loss": 0.4793, "step": 10108 }, { "epoch": 0.7495365907911322, "grad_norm": 0.35925793647766113, "learning_rate": 9.893473368342086e-05, "loss": 0.51, "step": 10109 }, { "epoch": 0.7496107362645511, "grad_norm": 0.3663129508495331, "learning_rate": 9.892473118279571e-05, "loss": 0.5013, "step": 10110 }, { "epoch": 0.7496848817379699, "grad_norm": 0.35977330803871155, "learning_rate": 9.891472868217055e-05, "loss": 0.4946, "step": 10111 }, { "epoch": 0.7497590272113888, "grad_norm": 0.3729119300842285, "learning_rate": 9.89047261815454e-05, "loss": 0.4905, "step": 10112 }, { "epoch": 0.7498331726848076, "grad_norm": 0.389600545167923, "learning_rate": 9.889472368092024e-05, "loss": 0.5655, "step": 10113 }, { "epoch": 0.7499073181582264, "grad_norm": 0.35729002952575684, "learning_rate": 9.888472118029509e-05, "loss": 0.4994, "step": 10114 }, { "epoch": 0.7499814636316453, "grad_norm": 0.3779301345348358, "learning_rate": 9.887471867966992e-05, "loss": 0.5125, "step": 10115 }, { "epoch": 0.7500556091050641, "grad_norm": 0.3652007281780243, "learning_rate": 9.886471617904477e-05, "loss": 0.491, "step": 10116 }, { "epoch": 0.750129754578483, "grad_norm": 0.36905717849731445, "learning_rate": 9.885471367841961e-05, "loss": 0.4981, "step": 10117 }, { "epoch": 0.7502039000519019, "grad_norm": 0.35571059584617615, "learning_rate": 9.884471117779446e-05, "loss": 0.4893, "step": 10118 }, { "epoch": 0.7502780455253206, "grad_norm": 0.3625701367855072, "learning_rate": 9.88347086771693e-05, "loss": 0.5084, "step": 10119 }, { "epoch": 0.7503521909987395, "grad_norm": 0.35859715938568115, "learning_rate": 9.882470617654415e-05, "loss": 0.503, "step": 10120 }, { "epoch": 0.7504263364721584, "grad_norm": 0.3504720628261566, "learning_rate": 9.881470367591899e-05, "loss": 0.4433, "step": 10121 }, { "epoch": 0.7505004819455772, "grad_norm": 0.3698630928993225, "learning_rate": 9.880470117529382e-05, "loss": 0.4872, "step": 10122 }, { "epoch": 0.750574627418996, "grad_norm": 0.3578074872493744, "learning_rate": 9.879469867466867e-05, "loss": 0.4844, "step": 10123 }, { "epoch": 0.7506487728924149, "grad_norm": 0.3704688251018524, "learning_rate": 9.878469617404351e-05, "loss": 0.5132, "step": 10124 }, { "epoch": 0.7507229183658337, "grad_norm": 0.3933497667312622, "learning_rate": 9.877469367341836e-05, "loss": 0.5157, "step": 10125 }, { "epoch": 0.7507970638392526, "grad_norm": 0.32969415187835693, "learning_rate": 9.87646911727932e-05, "loss": 0.4653, "step": 10126 }, { "epoch": 0.7508712093126715, "grad_norm": 0.3550504446029663, "learning_rate": 9.875468867216805e-05, "loss": 0.5009, "step": 10127 }, { "epoch": 0.7509453547860903, "grad_norm": 0.33887404203414917, "learning_rate": 9.874468617154289e-05, "loss": 0.4684, "step": 10128 }, { "epoch": 0.7510195002595091, "grad_norm": 0.3710575997829437, "learning_rate": 9.873468367091774e-05, "loss": 0.491, "step": 10129 }, { "epoch": 0.751093645732928, "grad_norm": 0.34810394048690796, "learning_rate": 9.872468117029257e-05, "loss": 0.475, "step": 10130 }, { "epoch": 0.7511677912063468, "grad_norm": 0.37523677945137024, "learning_rate": 9.871467866966742e-05, "loss": 0.5571, "step": 10131 }, { "epoch": 0.7512419366797657, "grad_norm": 0.36266660690307617, "learning_rate": 9.870467616904226e-05, "loss": 0.5181, "step": 10132 }, { "epoch": 0.7513160821531846, "grad_norm": 0.37436482310295105, "learning_rate": 9.86946736684171e-05, "loss": 0.5331, "step": 10133 }, { "epoch": 0.7513902276266033, "grad_norm": 0.35838401317596436, "learning_rate": 9.868467116779195e-05, "loss": 0.49, "step": 10134 }, { "epoch": 0.7514643731000222, "grad_norm": 0.34067589044570923, "learning_rate": 9.867466866716679e-05, "loss": 0.4597, "step": 10135 }, { "epoch": 0.7515385185734411, "grad_norm": 0.35530298948287964, "learning_rate": 9.866466616654164e-05, "loss": 0.5195, "step": 10136 }, { "epoch": 0.7516126640468599, "grad_norm": 0.3839002549648285, "learning_rate": 9.865466366591647e-05, "loss": 0.5345, "step": 10137 }, { "epoch": 0.7516868095202788, "grad_norm": 0.37202975153923035, "learning_rate": 9.864466116529132e-05, "loss": 0.5245, "step": 10138 }, { "epoch": 0.7517609549936977, "grad_norm": 0.36819732189178467, "learning_rate": 9.863465866466616e-05, "loss": 0.5136, "step": 10139 }, { "epoch": 0.7518351004671164, "grad_norm": 0.36116641759872437, "learning_rate": 9.862465616404101e-05, "loss": 0.4818, "step": 10140 }, { "epoch": 0.7519092459405353, "grad_norm": 0.35631975531578064, "learning_rate": 9.861465366341586e-05, "loss": 0.4911, "step": 10141 }, { "epoch": 0.7519833914139542, "grad_norm": 0.3480009436607361, "learning_rate": 9.86046511627907e-05, "loss": 0.503, "step": 10142 }, { "epoch": 0.752057536887373, "grad_norm": 0.3740939497947693, "learning_rate": 9.859464866216555e-05, "loss": 0.5036, "step": 10143 }, { "epoch": 0.7521316823607919, "grad_norm": 0.3361457884311676, "learning_rate": 9.85846461615404e-05, "loss": 0.4728, "step": 10144 }, { "epoch": 0.7522058278342107, "grad_norm": 0.36703476309776306, "learning_rate": 9.857464366091524e-05, "loss": 0.4865, "step": 10145 }, { "epoch": 0.7522799733076295, "grad_norm": 0.35689908266067505, "learning_rate": 9.856464116029008e-05, "loss": 0.4814, "step": 10146 }, { "epoch": 0.7523541187810484, "grad_norm": 0.34616944193840027, "learning_rate": 9.855463865966493e-05, "loss": 0.4819, "step": 10147 }, { "epoch": 0.7524282642544673, "grad_norm": 0.3680407404899597, "learning_rate": 9.854463615903976e-05, "loss": 0.521, "step": 10148 }, { "epoch": 0.7525024097278861, "grad_norm": 0.3601420223712921, "learning_rate": 9.853463365841461e-05, "loss": 0.5107, "step": 10149 }, { "epoch": 0.752576555201305, "grad_norm": 0.36870235204696655, "learning_rate": 9.852463115778945e-05, "loss": 0.4798, "step": 10150 }, { "epoch": 0.7526507006747238, "grad_norm": 0.370989054441452, "learning_rate": 9.85146286571643e-05, "loss": 0.5161, "step": 10151 }, { "epoch": 0.7527248461481426, "grad_norm": 0.34527885913848877, "learning_rate": 9.850462615653914e-05, "loss": 0.4674, "step": 10152 }, { "epoch": 0.7527989916215615, "grad_norm": 0.34671109914779663, "learning_rate": 9.849462365591399e-05, "loss": 0.467, "step": 10153 }, { "epoch": 0.7528731370949804, "grad_norm": 0.36863815784454346, "learning_rate": 9.848462115528883e-05, "loss": 0.5216, "step": 10154 }, { "epoch": 0.7529472825683992, "grad_norm": 0.3619883358478546, "learning_rate": 9.847461865466368e-05, "loss": 0.5035, "step": 10155 }, { "epoch": 0.753021428041818, "grad_norm": 0.37487655878067017, "learning_rate": 9.846461615403851e-05, "loss": 0.5216, "step": 10156 }, { "epoch": 0.7530955735152369, "grad_norm": 0.35260432958602905, "learning_rate": 9.845461365341337e-05, "loss": 0.4817, "step": 10157 }, { "epoch": 0.7531697189886557, "grad_norm": 0.35136252641677856, "learning_rate": 9.84446111527882e-05, "loss": 0.4907, "step": 10158 }, { "epoch": 0.7532438644620746, "grad_norm": 0.408519446849823, "learning_rate": 9.843460865216304e-05, "loss": 0.5499, "step": 10159 }, { "epoch": 0.7533180099354935, "grad_norm": 0.36346638202667236, "learning_rate": 9.842460615153789e-05, "loss": 0.4755, "step": 10160 }, { "epoch": 0.7533921554089122, "grad_norm": 0.33849671483039856, "learning_rate": 9.841460365091273e-05, "loss": 0.4795, "step": 10161 }, { "epoch": 0.7534663008823311, "grad_norm": 0.35567355155944824, "learning_rate": 9.840460115028758e-05, "loss": 0.483, "step": 10162 }, { "epoch": 0.75354044635575, "grad_norm": 0.3676149845123291, "learning_rate": 9.839459864966241e-05, "loss": 0.4567, "step": 10163 }, { "epoch": 0.7536145918291688, "grad_norm": 0.3359205722808838, "learning_rate": 9.838459614903727e-05, "loss": 0.4806, "step": 10164 }, { "epoch": 0.7536887373025877, "grad_norm": 0.3491876423358917, "learning_rate": 9.83745936484121e-05, "loss": 0.4806, "step": 10165 }, { "epoch": 0.7537628827760066, "grad_norm": 0.37394559383392334, "learning_rate": 9.836459114778695e-05, "loss": 0.4928, "step": 10166 }, { "epoch": 0.7538370282494253, "grad_norm": 0.35573408007621765, "learning_rate": 9.835458864716179e-05, "loss": 0.4828, "step": 10167 }, { "epoch": 0.7539111737228442, "grad_norm": 0.3471428453922272, "learning_rate": 9.834458614653664e-05, "loss": 0.44, "step": 10168 }, { "epoch": 0.7539853191962631, "grad_norm": 0.34554266929626465, "learning_rate": 9.833458364591148e-05, "loss": 0.4602, "step": 10169 }, { "epoch": 0.7540594646696819, "grad_norm": 0.37214967608451843, "learning_rate": 9.832458114528632e-05, "loss": 0.5035, "step": 10170 }, { "epoch": 0.7541336101431008, "grad_norm": 0.3515111207962036, "learning_rate": 9.831457864466117e-05, "loss": 0.4922, "step": 10171 }, { "epoch": 0.7542077556165196, "grad_norm": 0.37508347630500793, "learning_rate": 9.8304576144036e-05, "loss": 0.5506, "step": 10172 }, { "epoch": 0.7542819010899384, "grad_norm": 0.34961599111557007, "learning_rate": 9.829457364341085e-05, "loss": 0.4858, "step": 10173 }, { "epoch": 0.7543560465633573, "grad_norm": 0.3616822361946106, "learning_rate": 9.82845711427857e-05, "loss": 0.4662, "step": 10174 }, { "epoch": 0.7544301920367762, "grad_norm": 0.36320585012435913, "learning_rate": 9.827456864216054e-05, "loss": 0.5282, "step": 10175 }, { "epoch": 0.754504337510195, "grad_norm": 0.3444063365459442, "learning_rate": 9.826456614153539e-05, "loss": 0.4698, "step": 10176 }, { "epoch": 0.7545784829836139, "grad_norm": 0.36501672863960266, "learning_rate": 9.825456364091023e-05, "loss": 0.5192, "step": 10177 }, { "epoch": 0.7546526284570327, "grad_norm": 0.3906938135623932, "learning_rate": 9.824456114028508e-05, "loss": 0.5387, "step": 10178 }, { "epoch": 0.7547267739304515, "grad_norm": 0.34770768880844116, "learning_rate": 9.823455863965993e-05, "loss": 0.479, "step": 10179 }, { "epoch": 0.7548009194038704, "grad_norm": 0.36690467596054077, "learning_rate": 9.822455613903477e-05, "loss": 0.5231, "step": 10180 }, { "epoch": 0.7548750648772893, "grad_norm": 0.35358554124832153, "learning_rate": 9.821455363840962e-05, "loss": 0.5149, "step": 10181 }, { "epoch": 0.754949210350708, "grad_norm": 0.37338027358055115, "learning_rate": 9.820455113778446e-05, "loss": 0.4821, "step": 10182 }, { "epoch": 0.7550233558241269, "grad_norm": 0.35361990332603455, "learning_rate": 9.819454863715929e-05, "loss": 0.4575, "step": 10183 }, { "epoch": 0.7550975012975458, "grad_norm": 0.33902275562286377, "learning_rate": 9.818454613653414e-05, "loss": 0.4768, "step": 10184 }, { "epoch": 0.7551716467709646, "grad_norm": 0.3523785471916199, "learning_rate": 9.817454363590898e-05, "loss": 0.4874, "step": 10185 }, { "epoch": 0.7552457922443835, "grad_norm": 0.3541758060455322, "learning_rate": 9.816454113528383e-05, "loss": 0.4832, "step": 10186 }, { "epoch": 0.7553199377178024, "grad_norm": 0.3488328158855438, "learning_rate": 9.815453863465867e-05, "loss": 0.5217, "step": 10187 }, { "epoch": 0.7553940831912211, "grad_norm": 0.3629981279373169, "learning_rate": 9.814453613403352e-05, "loss": 0.5053, "step": 10188 }, { "epoch": 0.75546822866464, "grad_norm": 0.3580688536167145, "learning_rate": 9.813453363340836e-05, "loss": 0.5161, "step": 10189 }, { "epoch": 0.7555423741380589, "grad_norm": 0.32830265164375305, "learning_rate": 9.81245311327832e-05, "loss": 0.4546, "step": 10190 }, { "epoch": 0.7556165196114777, "grad_norm": 0.35847562551498413, "learning_rate": 9.811452863215804e-05, "loss": 0.4818, "step": 10191 }, { "epoch": 0.7556906650848966, "grad_norm": 0.3640361726284027, "learning_rate": 9.81045261315329e-05, "loss": 0.506, "step": 10192 }, { "epoch": 0.7557648105583155, "grad_norm": 0.3449519872665405, "learning_rate": 9.809452363090773e-05, "loss": 0.4872, "step": 10193 }, { "epoch": 0.7558389560317342, "grad_norm": 0.353484570980072, "learning_rate": 9.808452113028258e-05, "loss": 0.4669, "step": 10194 }, { "epoch": 0.7559131015051531, "grad_norm": 0.3826415240764618, "learning_rate": 9.807451862965742e-05, "loss": 0.5052, "step": 10195 }, { "epoch": 0.755987246978572, "grad_norm": 0.3568328022956848, "learning_rate": 9.806451612903226e-05, "loss": 0.4982, "step": 10196 }, { "epoch": 0.7560613924519908, "grad_norm": 0.3711591958999634, "learning_rate": 9.80545136284071e-05, "loss": 0.5357, "step": 10197 }, { "epoch": 0.7561355379254097, "grad_norm": 0.3454180955886841, "learning_rate": 9.804451112778194e-05, "loss": 0.499, "step": 10198 }, { "epoch": 0.7562096833988285, "grad_norm": 0.3469143807888031, "learning_rate": 9.80345086271568e-05, "loss": 0.4991, "step": 10199 }, { "epoch": 0.7562838288722473, "grad_norm": 0.3718167841434479, "learning_rate": 9.802450612653163e-05, "loss": 0.5139, "step": 10200 }, { "epoch": 0.7563579743456662, "grad_norm": 0.3477468192577362, "learning_rate": 9.801450362590648e-05, "loss": 0.5159, "step": 10201 }, { "epoch": 0.7564321198190851, "grad_norm": 0.35643815994262695, "learning_rate": 9.800450112528132e-05, "loss": 0.5009, "step": 10202 }, { "epoch": 0.7565062652925039, "grad_norm": 0.35126492381095886, "learning_rate": 9.799449862465617e-05, "loss": 0.4624, "step": 10203 }, { "epoch": 0.7565804107659228, "grad_norm": 0.3497622609138489, "learning_rate": 9.7984496124031e-05, "loss": 0.4662, "step": 10204 }, { "epoch": 0.7566545562393416, "grad_norm": 0.35378140211105347, "learning_rate": 9.797449362340586e-05, "loss": 0.5095, "step": 10205 }, { "epoch": 0.7567287017127604, "grad_norm": 0.35924437642097473, "learning_rate": 9.79644911227807e-05, "loss": 0.53, "step": 10206 }, { "epoch": 0.7568028471861793, "grad_norm": 0.3590027987957001, "learning_rate": 9.795448862215554e-05, "loss": 0.5208, "step": 10207 }, { "epoch": 0.7568769926595982, "grad_norm": 0.3574943542480469, "learning_rate": 9.794448612153038e-05, "loss": 0.5067, "step": 10208 }, { "epoch": 0.756951138133017, "grad_norm": 0.35265040397644043, "learning_rate": 9.793448362090523e-05, "loss": 0.5024, "step": 10209 }, { "epoch": 0.7570252836064358, "grad_norm": 0.3745036721229553, "learning_rate": 9.792448112028007e-05, "loss": 0.4963, "step": 10210 }, { "epoch": 0.7570994290798547, "grad_norm": 0.3801138699054718, "learning_rate": 9.791447861965492e-05, "loss": 0.5217, "step": 10211 }, { "epoch": 0.7571735745532735, "grad_norm": 0.3453464210033417, "learning_rate": 9.790447611902977e-05, "loss": 0.4932, "step": 10212 }, { "epoch": 0.7572477200266924, "grad_norm": 0.36126643419265747, "learning_rate": 9.789447361840461e-05, "loss": 0.5075, "step": 10213 }, { "epoch": 0.7573218655001113, "grad_norm": 0.34239858388900757, "learning_rate": 9.788447111777946e-05, "loss": 0.4789, "step": 10214 }, { "epoch": 0.75739601097353, "grad_norm": 0.36452993750572205, "learning_rate": 9.78744686171543e-05, "loss": 0.4897, "step": 10215 }, { "epoch": 0.7574701564469489, "grad_norm": 0.3470347225666046, "learning_rate": 9.786446611652915e-05, "loss": 0.4979, "step": 10216 }, { "epoch": 0.7575443019203678, "grad_norm": 0.37642180919647217, "learning_rate": 9.785446361590398e-05, "loss": 0.5085, "step": 10217 }, { "epoch": 0.7576184473937866, "grad_norm": 0.35929298400878906, "learning_rate": 9.784446111527883e-05, "loss": 0.5168, "step": 10218 }, { "epoch": 0.7576925928672055, "grad_norm": 0.34773826599121094, "learning_rate": 9.783445861465367e-05, "loss": 0.4998, "step": 10219 }, { "epoch": 0.7577667383406244, "grad_norm": 0.3312397599220276, "learning_rate": 9.782445611402851e-05, "loss": 0.4605, "step": 10220 }, { "epoch": 0.7578408838140431, "grad_norm": 0.3581726849079132, "learning_rate": 9.781445361340336e-05, "loss": 0.5083, "step": 10221 }, { "epoch": 0.757915029287462, "grad_norm": 0.3522912561893463, "learning_rate": 9.78044511127782e-05, "loss": 0.4941, "step": 10222 }, { "epoch": 0.7579891747608809, "grad_norm": 0.3972053825855255, "learning_rate": 9.779444861215305e-05, "loss": 0.5421, "step": 10223 }, { "epoch": 0.7580633202342997, "grad_norm": 0.38321229815483093, "learning_rate": 9.778444611152788e-05, "loss": 0.5389, "step": 10224 }, { "epoch": 0.7581374657077186, "grad_norm": 0.34827783703804016, "learning_rate": 9.777444361090273e-05, "loss": 0.4736, "step": 10225 }, { "epoch": 0.7582116111811374, "grad_norm": 0.3826291263103485, "learning_rate": 9.776444111027757e-05, "loss": 0.5271, "step": 10226 }, { "epoch": 0.7582857566545562, "grad_norm": 0.3630203902721405, "learning_rate": 9.775443860965242e-05, "loss": 0.506, "step": 10227 }, { "epoch": 0.7583599021279751, "grad_norm": 0.3652452826499939, "learning_rate": 9.774443610902726e-05, "loss": 0.4914, "step": 10228 }, { "epoch": 0.758434047601394, "grad_norm": 0.36299434304237366, "learning_rate": 9.773443360840211e-05, "loss": 0.4924, "step": 10229 }, { "epoch": 0.7585081930748128, "grad_norm": 0.3600659668445587, "learning_rate": 9.772443110777695e-05, "loss": 0.4711, "step": 10230 }, { "epoch": 0.7585823385482316, "grad_norm": 0.36564818024635315, "learning_rate": 9.77144286071518e-05, "loss": 0.5185, "step": 10231 }, { "epoch": 0.7586564840216504, "grad_norm": 0.3599850535392761, "learning_rate": 9.770442610652663e-05, "loss": 0.5005, "step": 10232 }, { "epoch": 0.7587306294950693, "grad_norm": 0.36565905809402466, "learning_rate": 9.769442360590147e-05, "loss": 0.5018, "step": 10233 }, { "epoch": 0.7588047749684882, "grad_norm": 0.36820024251937866, "learning_rate": 9.768442110527632e-05, "loss": 0.5011, "step": 10234 }, { "epoch": 0.758878920441907, "grad_norm": 0.3545131981372833, "learning_rate": 9.767441860465116e-05, "loss": 0.4662, "step": 10235 }, { "epoch": 0.7589530659153259, "grad_norm": 0.3742107152938843, "learning_rate": 9.766441610402601e-05, "loss": 0.5535, "step": 10236 }, { "epoch": 0.7590272113887447, "grad_norm": 0.37167444825172424, "learning_rate": 9.765441360340085e-05, "loss": 0.5231, "step": 10237 }, { "epoch": 0.7591013568621635, "grad_norm": 0.3604283034801483, "learning_rate": 9.76444111027757e-05, "loss": 0.5091, "step": 10238 }, { "epoch": 0.7591755023355824, "grad_norm": 0.3471769094467163, "learning_rate": 9.763440860215054e-05, "loss": 0.4579, "step": 10239 }, { "epoch": 0.7592496478090013, "grad_norm": 0.34585756063461304, "learning_rate": 9.762440610152539e-05, "loss": 0.4675, "step": 10240 }, { "epoch": 0.7593237932824201, "grad_norm": 0.3359997868537903, "learning_rate": 9.761440360090022e-05, "loss": 0.4486, "step": 10241 }, { "epoch": 0.7593979387558389, "grad_norm": 0.33402955532073975, "learning_rate": 9.760440110027507e-05, "loss": 0.4712, "step": 10242 }, { "epoch": 0.7594720842292578, "grad_norm": 0.3731062710285187, "learning_rate": 9.759439859964991e-05, "loss": 0.5345, "step": 10243 }, { "epoch": 0.7595462297026766, "grad_norm": 0.3572670519351959, "learning_rate": 9.758439609902476e-05, "loss": 0.4767, "step": 10244 }, { "epoch": 0.7596203751760955, "grad_norm": 0.3476724326610565, "learning_rate": 9.75743935983996e-05, "loss": 0.4611, "step": 10245 }, { "epoch": 0.7596945206495144, "grad_norm": 0.36171597242355347, "learning_rate": 9.756439109777445e-05, "loss": 0.4865, "step": 10246 }, { "epoch": 0.7597686661229331, "grad_norm": 0.36382436752319336, "learning_rate": 9.75543885971493e-05, "loss": 0.4807, "step": 10247 }, { "epoch": 0.759842811596352, "grad_norm": 0.35201945900917053, "learning_rate": 9.754438609652414e-05, "loss": 0.4774, "step": 10248 }, { "epoch": 0.7599169570697709, "grad_norm": 0.35111063718795776, "learning_rate": 9.753438359589899e-05, "loss": 0.4798, "step": 10249 }, { "epoch": 0.7599911025431897, "grad_norm": 0.3592272698879242, "learning_rate": 9.752438109527382e-05, "loss": 0.4835, "step": 10250 }, { "epoch": 0.7600652480166086, "grad_norm": 0.3894747495651245, "learning_rate": 9.751437859464868e-05, "loss": 0.5495, "step": 10251 }, { "epoch": 0.7601393934900275, "grad_norm": 0.37414780259132385, "learning_rate": 9.750437609402351e-05, "loss": 0.5345, "step": 10252 }, { "epoch": 0.7602135389634462, "grad_norm": 0.3650225102901459, "learning_rate": 9.749437359339836e-05, "loss": 0.5245, "step": 10253 }, { "epoch": 0.7602876844368651, "grad_norm": 0.3652661144733429, "learning_rate": 9.74843710927732e-05, "loss": 0.5039, "step": 10254 }, { "epoch": 0.760361829910284, "grad_norm": 0.4015263020992279, "learning_rate": 9.747436859214805e-05, "loss": 0.535, "step": 10255 }, { "epoch": 0.7604359753837028, "grad_norm": 0.36217138171195984, "learning_rate": 9.746436609152289e-05, "loss": 0.5017, "step": 10256 }, { "epoch": 0.7605101208571217, "grad_norm": 0.34330880641937256, "learning_rate": 9.745436359089772e-05, "loss": 0.474, "step": 10257 }, { "epoch": 0.7605842663305405, "grad_norm": 0.37244415283203125, "learning_rate": 9.744436109027258e-05, "loss": 0.522, "step": 10258 }, { "epoch": 0.7606584118039593, "grad_norm": 0.3654603362083435, "learning_rate": 9.743435858964741e-05, "loss": 0.4924, "step": 10259 }, { "epoch": 0.7607325572773782, "grad_norm": 0.36880025267601013, "learning_rate": 9.742435608902226e-05, "loss": 0.5196, "step": 10260 }, { "epoch": 0.7608067027507971, "grad_norm": 0.39867666363716125, "learning_rate": 9.74143535883971e-05, "loss": 0.5637, "step": 10261 }, { "epoch": 0.7608808482242159, "grad_norm": 0.36199668049812317, "learning_rate": 9.740435108777195e-05, "loss": 0.4956, "step": 10262 }, { "epoch": 0.7609549936976348, "grad_norm": 0.3617141842842102, "learning_rate": 9.739434858714679e-05, "loss": 0.4828, "step": 10263 }, { "epoch": 0.7610291391710536, "grad_norm": 0.34820038080215454, "learning_rate": 9.738434608652164e-05, "loss": 0.4689, "step": 10264 }, { "epoch": 0.7611032846444724, "grad_norm": 0.37433695793151855, "learning_rate": 9.737434358589648e-05, "loss": 0.5162, "step": 10265 }, { "epoch": 0.7611774301178913, "grad_norm": 0.3500790297985077, "learning_rate": 9.736434108527133e-05, "loss": 0.4817, "step": 10266 }, { "epoch": 0.7612515755913102, "grad_norm": 0.37766632437705994, "learning_rate": 9.735433858464616e-05, "loss": 0.5107, "step": 10267 }, { "epoch": 0.761325721064729, "grad_norm": 0.35735470056533813, "learning_rate": 9.734433608402101e-05, "loss": 0.5167, "step": 10268 }, { "epoch": 0.7613998665381478, "grad_norm": 0.3554949462413788, "learning_rate": 9.733433358339585e-05, "loss": 0.4988, "step": 10269 }, { "epoch": 0.7614740120115667, "grad_norm": 0.38481253385543823, "learning_rate": 9.732433108277069e-05, "loss": 0.525, "step": 10270 }, { "epoch": 0.7615481574849855, "grad_norm": 0.3596526086330414, "learning_rate": 9.731432858214554e-05, "loss": 0.4899, "step": 10271 }, { "epoch": 0.7616223029584044, "grad_norm": 0.36414775252342224, "learning_rate": 9.730432608152038e-05, "loss": 0.5199, "step": 10272 }, { "epoch": 0.7616964484318233, "grad_norm": 0.34239766001701355, "learning_rate": 9.729432358089523e-05, "loss": 0.4819, "step": 10273 }, { "epoch": 0.761770593905242, "grad_norm": 0.35797908902168274, "learning_rate": 9.728432108027006e-05, "loss": 0.4755, "step": 10274 }, { "epoch": 0.7618447393786609, "grad_norm": 0.3523350954055786, "learning_rate": 9.727431857964491e-05, "loss": 0.4782, "step": 10275 }, { "epoch": 0.7619188848520798, "grad_norm": 0.3480892479419708, "learning_rate": 9.726431607901975e-05, "loss": 0.4727, "step": 10276 }, { "epoch": 0.7619930303254986, "grad_norm": 0.353802889585495, "learning_rate": 9.72543135783946e-05, "loss": 0.4585, "step": 10277 }, { "epoch": 0.7620671757989175, "grad_norm": 0.374586284160614, "learning_rate": 9.724431107776944e-05, "loss": 0.5099, "step": 10278 }, { "epoch": 0.7621413212723364, "grad_norm": 0.36402183771133423, "learning_rate": 9.723430857714429e-05, "loss": 0.4903, "step": 10279 }, { "epoch": 0.7622154667457551, "grad_norm": 0.3780229389667511, "learning_rate": 9.722430607651914e-05, "loss": 0.5463, "step": 10280 }, { "epoch": 0.762289612219174, "grad_norm": 0.37410998344421387, "learning_rate": 9.721430357589398e-05, "loss": 0.5368, "step": 10281 }, { "epoch": 0.7623637576925929, "grad_norm": 0.3518487513065338, "learning_rate": 9.720430107526883e-05, "loss": 0.4894, "step": 10282 }, { "epoch": 0.7624379031660117, "grad_norm": 0.35528072714805603, "learning_rate": 9.719429857464367e-05, "loss": 0.5227, "step": 10283 }, { "epoch": 0.7625120486394306, "grad_norm": 0.39096081256866455, "learning_rate": 9.718429607401852e-05, "loss": 0.537, "step": 10284 }, { "epoch": 0.7625861941128494, "grad_norm": 0.3478642702102661, "learning_rate": 9.717429357339335e-05, "loss": 0.4907, "step": 10285 }, { "epoch": 0.7626603395862682, "grad_norm": 0.3343842923641205, "learning_rate": 9.71642910727682e-05, "loss": 0.4707, "step": 10286 }, { "epoch": 0.7627344850596871, "grad_norm": 0.3525501787662506, "learning_rate": 9.715428857214304e-05, "loss": 0.489, "step": 10287 }, { "epoch": 0.762808630533106, "grad_norm": 0.3707776665687561, "learning_rate": 9.714428607151789e-05, "loss": 0.5147, "step": 10288 }, { "epoch": 0.7628827760065248, "grad_norm": 0.36365607380867004, "learning_rate": 9.713428357089273e-05, "loss": 0.4756, "step": 10289 }, { "epoch": 0.7629569214799437, "grad_norm": 0.3643920421600342, "learning_rate": 9.712428107026758e-05, "loss": 0.504, "step": 10290 }, { "epoch": 0.7630310669533625, "grad_norm": 0.3822956383228302, "learning_rate": 9.711427856964242e-05, "loss": 0.503, "step": 10291 }, { "epoch": 0.7631052124267813, "grad_norm": 0.3660220801830292, "learning_rate": 9.710427606901727e-05, "loss": 0.5073, "step": 10292 }, { "epoch": 0.7631793579002002, "grad_norm": 0.3801155090332031, "learning_rate": 9.70942735683921e-05, "loss": 0.5152, "step": 10293 }, { "epoch": 0.7632535033736191, "grad_norm": 0.35712307691574097, "learning_rate": 9.708427106776694e-05, "loss": 0.5092, "step": 10294 }, { "epoch": 0.7633276488470379, "grad_norm": 0.34173712134361267, "learning_rate": 9.707426856714179e-05, "loss": 0.478, "step": 10295 }, { "epoch": 0.7634017943204567, "grad_norm": 0.35811924934387207, "learning_rate": 9.706426606651663e-05, "loss": 0.4887, "step": 10296 }, { "epoch": 0.7634759397938756, "grad_norm": 0.3459070920944214, "learning_rate": 9.705426356589148e-05, "loss": 0.4987, "step": 10297 }, { "epoch": 0.7635500852672944, "grad_norm": 0.37274640798568726, "learning_rate": 9.704426106526632e-05, "loss": 0.5108, "step": 10298 }, { "epoch": 0.7636242307407133, "grad_norm": 0.3694532513618469, "learning_rate": 9.703425856464117e-05, "loss": 0.5095, "step": 10299 }, { "epoch": 0.7636983762141322, "grad_norm": 0.35787802934646606, "learning_rate": 9.7024256064016e-05, "loss": 0.5171, "step": 10300 }, { "epoch": 0.7637725216875509, "grad_norm": 0.37388473749160767, "learning_rate": 9.701425356339085e-05, "loss": 0.5218, "step": 10301 }, { "epoch": 0.7638466671609698, "grad_norm": 0.3711100220680237, "learning_rate": 9.700425106276569e-05, "loss": 0.4913, "step": 10302 }, { "epoch": 0.7639208126343887, "grad_norm": 0.3477531373500824, "learning_rate": 9.699424856214054e-05, "loss": 0.464, "step": 10303 }, { "epoch": 0.7639949581078075, "grad_norm": 0.3440611958503723, "learning_rate": 9.698424606151538e-05, "loss": 0.4994, "step": 10304 }, { "epoch": 0.7640691035812264, "grad_norm": 0.34559205174446106, "learning_rate": 9.697424356089023e-05, "loss": 0.4861, "step": 10305 }, { "epoch": 0.7641432490546453, "grad_norm": 0.3554020822048187, "learning_rate": 9.696424106026507e-05, "loss": 0.4885, "step": 10306 }, { "epoch": 0.764217394528064, "grad_norm": 0.35157063603401184, "learning_rate": 9.69542385596399e-05, "loss": 0.4996, "step": 10307 }, { "epoch": 0.7642915400014829, "grad_norm": 0.36847037076950073, "learning_rate": 9.694423605901476e-05, "loss": 0.4978, "step": 10308 }, { "epoch": 0.7643656854749018, "grad_norm": 0.3676539361476898, "learning_rate": 9.693423355838959e-05, "loss": 0.5071, "step": 10309 }, { "epoch": 0.7644398309483206, "grad_norm": 0.37275341153144836, "learning_rate": 9.692423105776444e-05, "loss": 0.5265, "step": 10310 }, { "epoch": 0.7645139764217395, "grad_norm": 0.3649749755859375, "learning_rate": 9.691422855713928e-05, "loss": 0.509, "step": 10311 }, { "epoch": 0.7645881218951583, "grad_norm": 0.3817954659461975, "learning_rate": 9.690422605651413e-05, "loss": 0.4987, "step": 10312 }, { "epoch": 0.7646622673685771, "grad_norm": 0.33145463466644287, "learning_rate": 9.689422355588898e-05, "loss": 0.4801, "step": 10313 }, { "epoch": 0.764736412841996, "grad_norm": 0.37813377380371094, "learning_rate": 9.688422105526382e-05, "loss": 0.519, "step": 10314 }, { "epoch": 0.7648105583154149, "grad_norm": 0.35292232036590576, "learning_rate": 9.687421855463867e-05, "loss": 0.4714, "step": 10315 }, { "epoch": 0.7648847037888337, "grad_norm": 0.4022344648838043, "learning_rate": 9.68642160540135e-05, "loss": 0.4548, "step": 10316 }, { "epoch": 0.7649588492622525, "grad_norm": 0.3912814259529114, "learning_rate": 9.685421355338836e-05, "loss": 0.5597, "step": 10317 }, { "epoch": 0.7650329947356714, "grad_norm": 0.3502289056777954, "learning_rate": 9.684421105276321e-05, "loss": 0.4947, "step": 10318 }, { "epoch": 0.7651071402090902, "grad_norm": 0.34885114431381226, "learning_rate": 9.683420855213804e-05, "loss": 0.4742, "step": 10319 }, { "epoch": 0.7651812856825091, "grad_norm": 0.38036152720451355, "learning_rate": 9.682420605151288e-05, "loss": 0.5533, "step": 10320 }, { "epoch": 0.765255431155928, "grad_norm": 0.36543065309524536, "learning_rate": 9.681420355088773e-05, "loss": 0.5173, "step": 10321 }, { "epoch": 0.7653295766293468, "grad_norm": 0.36900562047958374, "learning_rate": 9.680420105026257e-05, "loss": 0.5118, "step": 10322 }, { "epoch": 0.7654037221027656, "grad_norm": 0.3724912703037262, "learning_rate": 9.679419854963742e-05, "loss": 0.4745, "step": 10323 }, { "epoch": 0.7654778675761845, "grad_norm": 0.357514888048172, "learning_rate": 9.678419604901226e-05, "loss": 0.4789, "step": 10324 }, { "epoch": 0.7655520130496033, "grad_norm": 0.3651023805141449, "learning_rate": 9.677419354838711e-05, "loss": 0.4993, "step": 10325 }, { "epoch": 0.7656261585230222, "grad_norm": 0.3590785562992096, "learning_rate": 9.676419104776194e-05, "loss": 0.4792, "step": 10326 }, { "epoch": 0.7657003039964411, "grad_norm": 0.35702797770500183, "learning_rate": 9.67541885471368e-05, "loss": 0.478, "step": 10327 }, { "epoch": 0.7657744494698598, "grad_norm": 0.36410707235336304, "learning_rate": 9.674418604651163e-05, "loss": 0.5027, "step": 10328 }, { "epoch": 0.7658485949432787, "grad_norm": 0.3862502872943878, "learning_rate": 9.673418354588648e-05, "loss": 0.5349, "step": 10329 }, { "epoch": 0.7659227404166976, "grad_norm": 0.3682675063610077, "learning_rate": 9.672418104526132e-05, "loss": 0.5021, "step": 10330 }, { "epoch": 0.7659968858901164, "grad_norm": 0.3509948253631592, "learning_rate": 9.671417854463616e-05, "loss": 0.4706, "step": 10331 }, { "epoch": 0.7660710313635353, "grad_norm": 0.35898005962371826, "learning_rate": 9.670417604401101e-05, "loss": 0.4868, "step": 10332 }, { "epoch": 0.7661451768369542, "grad_norm": 0.3460615873336792, "learning_rate": 9.669417354338584e-05, "loss": 0.4819, "step": 10333 }, { "epoch": 0.7662193223103729, "grad_norm": 0.3827294111251831, "learning_rate": 9.66841710427607e-05, "loss": 0.5235, "step": 10334 }, { "epoch": 0.7662934677837918, "grad_norm": 0.3405618369579315, "learning_rate": 9.667416854213553e-05, "loss": 0.4684, "step": 10335 }, { "epoch": 0.7663676132572107, "grad_norm": 0.36022722721099854, "learning_rate": 9.666416604151038e-05, "loss": 0.4999, "step": 10336 }, { "epoch": 0.7664417587306295, "grad_norm": 0.3641928732395172, "learning_rate": 9.665416354088522e-05, "loss": 0.4848, "step": 10337 }, { "epoch": 0.7665159042040484, "grad_norm": 0.35011881589889526, "learning_rate": 9.664416104026007e-05, "loss": 0.4709, "step": 10338 }, { "epoch": 0.7665900496774672, "grad_norm": 0.3393026292324066, "learning_rate": 9.663415853963491e-05, "loss": 0.4588, "step": 10339 }, { "epoch": 0.766664195150886, "grad_norm": 0.3756476640701294, "learning_rate": 9.662415603900976e-05, "loss": 0.5195, "step": 10340 }, { "epoch": 0.7667383406243049, "grad_norm": 0.34323716163635254, "learning_rate": 9.66141535383846e-05, "loss": 0.483, "step": 10341 }, { "epoch": 0.7668124860977238, "grad_norm": 0.37051597237586975, "learning_rate": 9.660415103775945e-05, "loss": 0.517, "step": 10342 }, { "epoch": 0.7668866315711426, "grad_norm": 0.3612458407878876, "learning_rate": 9.659414853713428e-05, "loss": 0.5003, "step": 10343 }, { "epoch": 0.7669607770445614, "grad_norm": 0.374797523021698, "learning_rate": 9.658414603650912e-05, "loss": 0.4969, "step": 10344 }, { "epoch": 0.7670349225179802, "grad_norm": 0.36771348118782043, "learning_rate": 9.657414353588397e-05, "loss": 0.4955, "step": 10345 }, { "epoch": 0.7671090679913991, "grad_norm": 0.3757427930831909, "learning_rate": 9.656414103525881e-05, "loss": 0.5018, "step": 10346 }, { "epoch": 0.767183213464818, "grad_norm": 0.36118990182876587, "learning_rate": 9.655413853463366e-05, "loss": 0.5052, "step": 10347 }, { "epoch": 0.7672573589382368, "grad_norm": 0.3733411431312561, "learning_rate": 9.654413603400851e-05, "loss": 0.5106, "step": 10348 }, { "epoch": 0.7673315044116557, "grad_norm": 0.3918684422969818, "learning_rate": 9.653413353338335e-05, "loss": 0.5164, "step": 10349 }, { "epoch": 0.7674056498850745, "grad_norm": 0.36340802907943726, "learning_rate": 9.65241310327582e-05, "loss": 0.5277, "step": 10350 }, { "epoch": 0.7674797953584933, "grad_norm": 0.3866863548755646, "learning_rate": 9.651412853213305e-05, "loss": 0.5436, "step": 10351 }, { "epoch": 0.7675539408319122, "grad_norm": 0.35838592052459717, "learning_rate": 9.650412603150789e-05, "loss": 0.4987, "step": 10352 }, { "epoch": 0.7676280863053311, "grad_norm": 0.37290480732917786, "learning_rate": 9.649412353088274e-05, "loss": 0.5015, "step": 10353 }, { "epoch": 0.7677022317787499, "grad_norm": 0.3521709442138672, "learning_rate": 9.648412103025757e-05, "loss": 0.503, "step": 10354 }, { "epoch": 0.7677763772521687, "grad_norm": 0.3437837064266205, "learning_rate": 9.647411852963242e-05, "loss": 0.5068, "step": 10355 }, { "epoch": 0.7678505227255876, "grad_norm": 0.35506463050842285, "learning_rate": 9.646411602900726e-05, "loss": 0.4931, "step": 10356 }, { "epoch": 0.7679246681990064, "grad_norm": 0.3610176742076874, "learning_rate": 9.64541135283821e-05, "loss": 0.5055, "step": 10357 }, { "epoch": 0.7679988136724253, "grad_norm": 0.37168562412261963, "learning_rate": 9.644411102775695e-05, "loss": 0.4923, "step": 10358 }, { "epoch": 0.7680729591458442, "grad_norm": 0.35967111587524414, "learning_rate": 9.643410852713179e-05, "loss": 0.4833, "step": 10359 }, { "epoch": 0.7681471046192629, "grad_norm": 0.3462454378604889, "learning_rate": 9.642410602650664e-05, "loss": 0.4845, "step": 10360 }, { "epoch": 0.7682212500926818, "grad_norm": 0.34421011805534363, "learning_rate": 9.641410352588147e-05, "loss": 0.4665, "step": 10361 }, { "epoch": 0.7682953955661007, "grad_norm": 0.36342108249664307, "learning_rate": 9.640410102525632e-05, "loss": 0.4956, "step": 10362 }, { "epoch": 0.7683695410395195, "grad_norm": 0.35796287655830383, "learning_rate": 9.639409852463116e-05, "loss": 0.4663, "step": 10363 }, { "epoch": 0.7684436865129384, "grad_norm": 0.35824406147003174, "learning_rate": 9.638409602400601e-05, "loss": 0.5125, "step": 10364 }, { "epoch": 0.7685178319863573, "grad_norm": 0.3663225471973419, "learning_rate": 9.637409352338085e-05, "loss": 0.5024, "step": 10365 }, { "epoch": 0.768591977459776, "grad_norm": 0.38451433181762695, "learning_rate": 9.63640910227557e-05, "loss": 0.5083, "step": 10366 }, { "epoch": 0.7686661229331949, "grad_norm": 0.36836501955986023, "learning_rate": 9.635408852213054e-05, "loss": 0.4991, "step": 10367 }, { "epoch": 0.7687402684066138, "grad_norm": 0.3601319491863251, "learning_rate": 9.634408602150539e-05, "loss": 0.4868, "step": 10368 }, { "epoch": 0.7688144138800326, "grad_norm": 0.37879446148872375, "learning_rate": 9.633408352088022e-05, "loss": 0.4999, "step": 10369 }, { "epoch": 0.7688885593534515, "grad_norm": 0.35809099674224854, "learning_rate": 9.632408102025506e-05, "loss": 0.4927, "step": 10370 }, { "epoch": 0.7689627048268703, "grad_norm": 0.37712225317955017, "learning_rate": 9.631407851962991e-05, "loss": 0.4943, "step": 10371 }, { "epoch": 0.7690368503002891, "grad_norm": 0.36169636249542236, "learning_rate": 9.630407601900475e-05, "loss": 0.5234, "step": 10372 }, { "epoch": 0.769110995773708, "grad_norm": 0.36429306864738464, "learning_rate": 9.62940735183796e-05, "loss": 0.512, "step": 10373 }, { "epoch": 0.7691851412471269, "grad_norm": 0.35878297686576843, "learning_rate": 9.628407101775444e-05, "loss": 0.4918, "step": 10374 }, { "epoch": 0.7692592867205457, "grad_norm": 0.3649570643901825, "learning_rate": 9.627406851712929e-05, "loss": 0.5016, "step": 10375 }, { "epoch": 0.7693334321939646, "grad_norm": 0.36596181988716125, "learning_rate": 9.626406601650412e-05, "loss": 0.5184, "step": 10376 }, { "epoch": 0.7694075776673834, "grad_norm": 0.37099528312683105, "learning_rate": 9.625406351587898e-05, "loss": 0.5206, "step": 10377 }, { "epoch": 0.7694817231408022, "grad_norm": 0.37940457463264465, "learning_rate": 9.624406101525381e-05, "loss": 0.4943, "step": 10378 }, { "epoch": 0.7695558686142211, "grad_norm": 0.348511666059494, "learning_rate": 9.623405851462866e-05, "loss": 0.4802, "step": 10379 }, { "epoch": 0.76963001408764, "grad_norm": 0.3491617441177368, "learning_rate": 9.62240560140035e-05, "loss": 0.5192, "step": 10380 }, { "epoch": 0.7697041595610588, "grad_norm": 0.3736232817173004, "learning_rate": 9.621405351337835e-05, "loss": 0.5222, "step": 10381 }, { "epoch": 0.7697783050344776, "grad_norm": 0.3452153205871582, "learning_rate": 9.620405101275319e-05, "loss": 0.4916, "step": 10382 }, { "epoch": 0.7698524505078965, "grad_norm": 0.3605182468891144, "learning_rate": 9.619404851212804e-05, "loss": 0.5009, "step": 10383 }, { "epoch": 0.7699265959813153, "grad_norm": 0.35718196630477905, "learning_rate": 9.618404601150288e-05, "loss": 0.4944, "step": 10384 }, { "epoch": 0.7700007414547342, "grad_norm": 0.34545719623565674, "learning_rate": 9.617404351087773e-05, "loss": 0.4801, "step": 10385 }, { "epoch": 0.7700748869281531, "grad_norm": 0.3567116856575012, "learning_rate": 9.616404101025258e-05, "loss": 0.4976, "step": 10386 }, { "epoch": 0.7701490324015718, "grad_norm": 0.3473701477050781, "learning_rate": 9.615403850962741e-05, "loss": 0.501, "step": 10387 }, { "epoch": 0.7702231778749907, "grad_norm": 0.3932405412197113, "learning_rate": 9.614403600900226e-05, "loss": 0.53, "step": 10388 }, { "epoch": 0.7702973233484096, "grad_norm": 0.333158403635025, "learning_rate": 9.61340335083771e-05, "loss": 0.4715, "step": 10389 }, { "epoch": 0.7703714688218284, "grad_norm": 0.36194613575935364, "learning_rate": 9.612403100775195e-05, "loss": 0.5092, "step": 10390 }, { "epoch": 0.7704456142952473, "grad_norm": 0.3717033863067627, "learning_rate": 9.611402850712679e-05, "loss": 0.5117, "step": 10391 }, { "epoch": 0.7705197597686662, "grad_norm": 0.3462866544723511, "learning_rate": 9.610402600650164e-05, "loss": 0.4612, "step": 10392 }, { "epoch": 0.7705939052420849, "grad_norm": 0.3683437705039978, "learning_rate": 9.609402350587648e-05, "loss": 0.4969, "step": 10393 }, { "epoch": 0.7706680507155038, "grad_norm": 0.35788631439208984, "learning_rate": 9.608402100525131e-05, "loss": 0.4967, "step": 10394 }, { "epoch": 0.7707421961889227, "grad_norm": 0.35277947783470154, "learning_rate": 9.607401850462616e-05, "loss": 0.4922, "step": 10395 }, { "epoch": 0.7708163416623415, "grad_norm": 0.3542066514492035, "learning_rate": 9.6064016004001e-05, "loss": 0.521, "step": 10396 }, { "epoch": 0.7708904871357604, "grad_norm": 0.3648354411125183, "learning_rate": 9.605401350337585e-05, "loss": 0.5328, "step": 10397 }, { "epoch": 0.7709646326091792, "grad_norm": 0.3530896008014679, "learning_rate": 9.604401100275069e-05, "loss": 0.4871, "step": 10398 }, { "epoch": 0.771038778082598, "grad_norm": 0.35186469554901123, "learning_rate": 9.603400850212554e-05, "loss": 0.4761, "step": 10399 }, { "epoch": 0.7711129235560169, "grad_norm": 0.35775327682495117, "learning_rate": 9.602400600150038e-05, "loss": 0.4919, "step": 10400 }, { "epoch": 0.7711870690294358, "grad_norm": 0.3705780506134033, "learning_rate": 9.601400350087523e-05, "loss": 0.5266, "step": 10401 }, { "epoch": 0.7712612145028546, "grad_norm": 0.35807934403419495, "learning_rate": 9.600400100025006e-05, "loss": 0.5097, "step": 10402 }, { "epoch": 0.7713353599762734, "grad_norm": 0.3781988024711609, "learning_rate": 9.599399849962492e-05, "loss": 0.4849, "step": 10403 }, { "epoch": 0.7714095054496923, "grad_norm": 0.3677757978439331, "learning_rate": 9.598399599899975e-05, "loss": 0.5143, "step": 10404 }, { "epoch": 0.7714836509231111, "grad_norm": 0.37869155406951904, "learning_rate": 9.59739934983746e-05, "loss": 0.5228, "step": 10405 }, { "epoch": 0.77155779639653, "grad_norm": 0.3523259162902832, "learning_rate": 9.596399099774944e-05, "loss": 0.4607, "step": 10406 }, { "epoch": 0.7716319418699489, "grad_norm": 0.35645759105682373, "learning_rate": 9.595398849712428e-05, "loss": 0.4984, "step": 10407 }, { "epoch": 0.7717060873433677, "grad_norm": 0.36043116450309753, "learning_rate": 9.594398599649913e-05, "loss": 0.4935, "step": 10408 }, { "epoch": 0.7717802328167865, "grad_norm": 0.3746243417263031, "learning_rate": 9.593398349587397e-05, "loss": 0.5233, "step": 10409 }, { "epoch": 0.7718543782902054, "grad_norm": 0.3550746738910675, "learning_rate": 9.592398099524882e-05, "loss": 0.4836, "step": 10410 }, { "epoch": 0.7719285237636242, "grad_norm": 0.3731708526611328, "learning_rate": 9.591397849462365e-05, "loss": 0.471, "step": 10411 }, { "epoch": 0.7720026692370431, "grad_norm": 0.37088823318481445, "learning_rate": 9.59039759939985e-05, "loss": 0.519, "step": 10412 }, { "epoch": 0.772076814710462, "grad_norm": 0.37379515171051025, "learning_rate": 9.589397349337334e-05, "loss": 0.521, "step": 10413 }, { "epoch": 0.7721509601838807, "grad_norm": 0.3451468348503113, "learning_rate": 9.588397099274819e-05, "loss": 0.4869, "step": 10414 }, { "epoch": 0.7722251056572996, "grad_norm": 0.35909202694892883, "learning_rate": 9.587396849212303e-05, "loss": 0.5206, "step": 10415 }, { "epoch": 0.7722992511307185, "grad_norm": 0.36281347274780273, "learning_rate": 9.586396599149788e-05, "loss": 0.4897, "step": 10416 }, { "epoch": 0.7723733966041373, "grad_norm": 0.34736868739128113, "learning_rate": 9.585396349087272e-05, "loss": 0.4996, "step": 10417 }, { "epoch": 0.7724475420775562, "grad_norm": 0.3674878776073456, "learning_rate": 9.584396099024757e-05, "loss": 0.4911, "step": 10418 }, { "epoch": 0.7725216875509751, "grad_norm": 0.34705978631973267, "learning_rate": 9.583395848962242e-05, "loss": 0.4923, "step": 10419 }, { "epoch": 0.7725958330243938, "grad_norm": 0.3446896970272064, "learning_rate": 9.582395598899725e-05, "loss": 0.4969, "step": 10420 }, { "epoch": 0.7726699784978127, "grad_norm": 0.3695549964904785, "learning_rate": 9.58139534883721e-05, "loss": 0.5146, "step": 10421 }, { "epoch": 0.7727441239712316, "grad_norm": 0.35250434279441833, "learning_rate": 9.580395098774694e-05, "loss": 0.4974, "step": 10422 }, { "epoch": 0.7728182694446504, "grad_norm": 0.36500394344329834, "learning_rate": 9.579394848712179e-05, "loss": 0.4912, "step": 10423 }, { "epoch": 0.7728924149180693, "grad_norm": 0.3620055019855499, "learning_rate": 9.578394598649663e-05, "loss": 0.4801, "step": 10424 }, { "epoch": 0.7729665603914881, "grad_norm": 0.3655063807964325, "learning_rate": 9.577394348587148e-05, "loss": 0.533, "step": 10425 }, { "epoch": 0.7730407058649069, "grad_norm": 0.3501662015914917, "learning_rate": 9.576394098524632e-05, "loss": 0.4891, "step": 10426 }, { "epoch": 0.7731148513383258, "grad_norm": 0.354973703622818, "learning_rate": 9.575393848462117e-05, "loss": 0.4685, "step": 10427 }, { "epoch": 0.7731889968117447, "grad_norm": 0.3703301250934601, "learning_rate": 9.5743935983996e-05, "loss": 0.5413, "step": 10428 }, { "epoch": 0.7732631422851635, "grad_norm": 0.3526768088340759, "learning_rate": 9.573393348337086e-05, "loss": 0.4911, "step": 10429 }, { "epoch": 0.7733372877585823, "grad_norm": 0.3509540259838104, "learning_rate": 9.572393098274569e-05, "loss": 0.5033, "step": 10430 }, { "epoch": 0.7734114332320012, "grad_norm": 0.3507682979106903, "learning_rate": 9.571392848212053e-05, "loss": 0.4884, "step": 10431 }, { "epoch": 0.77348557870542, "grad_norm": 0.3577234745025635, "learning_rate": 9.570392598149538e-05, "loss": 0.521, "step": 10432 }, { "epoch": 0.7735597241788389, "grad_norm": 0.3337925970554352, "learning_rate": 9.569392348087022e-05, "loss": 0.4726, "step": 10433 }, { "epoch": 0.7736338696522578, "grad_norm": 0.3712296783924103, "learning_rate": 9.568392098024507e-05, "loss": 0.5087, "step": 10434 }, { "epoch": 0.7737080151256766, "grad_norm": 0.37947016954421997, "learning_rate": 9.56739184796199e-05, "loss": 0.5834, "step": 10435 }, { "epoch": 0.7737821605990954, "grad_norm": 0.37229928374290466, "learning_rate": 9.566391597899476e-05, "loss": 0.5252, "step": 10436 }, { "epoch": 0.7738563060725143, "grad_norm": 0.342955082654953, "learning_rate": 9.56539134783696e-05, "loss": 0.4799, "step": 10437 }, { "epoch": 0.7739304515459331, "grad_norm": 0.35229820013046265, "learning_rate": 9.564391097774444e-05, "loss": 0.5202, "step": 10438 }, { "epoch": 0.774004597019352, "grad_norm": 0.3486478626728058, "learning_rate": 9.563390847711928e-05, "loss": 0.5069, "step": 10439 }, { "epoch": 0.7740787424927709, "grad_norm": 0.36142656207084656, "learning_rate": 9.562390597649413e-05, "loss": 0.4724, "step": 10440 }, { "epoch": 0.7741528879661896, "grad_norm": 0.3472938537597656, "learning_rate": 9.561390347586897e-05, "loss": 0.4997, "step": 10441 }, { "epoch": 0.7742270334396085, "grad_norm": 0.3384167551994324, "learning_rate": 9.560390097524382e-05, "loss": 0.473, "step": 10442 }, { "epoch": 0.7743011789130274, "grad_norm": 0.33538034558296204, "learning_rate": 9.559389847461866e-05, "loss": 0.4878, "step": 10443 }, { "epoch": 0.7743753243864462, "grad_norm": 0.3510264456272125, "learning_rate": 9.55838959739935e-05, "loss": 0.4951, "step": 10444 }, { "epoch": 0.7744494698598651, "grad_norm": 0.3509939908981323, "learning_rate": 9.557389347336834e-05, "loss": 0.4883, "step": 10445 }, { "epoch": 0.774523615333284, "grad_norm": 0.33825355768203735, "learning_rate": 9.556389097274318e-05, "loss": 0.4471, "step": 10446 }, { "epoch": 0.7745977608067027, "grad_norm": 0.33542993664741516, "learning_rate": 9.555388847211803e-05, "loss": 0.457, "step": 10447 }, { "epoch": 0.7746719062801216, "grad_norm": 0.35257434844970703, "learning_rate": 9.554388597149287e-05, "loss": 0.4774, "step": 10448 }, { "epoch": 0.7747460517535405, "grad_norm": 0.3648506700992584, "learning_rate": 9.553388347086772e-05, "loss": 0.542, "step": 10449 }, { "epoch": 0.7748201972269593, "grad_norm": 0.3761986792087555, "learning_rate": 9.552388097024256e-05, "loss": 0.5219, "step": 10450 }, { "epoch": 0.7748943427003782, "grad_norm": 0.34928110241889954, "learning_rate": 9.551387846961741e-05, "loss": 0.5017, "step": 10451 }, { "epoch": 0.774968488173797, "grad_norm": 0.3591623902320862, "learning_rate": 9.550387596899224e-05, "loss": 0.5037, "step": 10452 }, { "epoch": 0.7750426336472158, "grad_norm": 0.37628498673439026, "learning_rate": 9.54938734683671e-05, "loss": 0.514, "step": 10453 }, { "epoch": 0.7751167791206347, "grad_norm": 0.3263327479362488, "learning_rate": 9.548387096774195e-05, "loss": 0.4407, "step": 10454 }, { "epoch": 0.7751909245940536, "grad_norm": 0.36253365874290466, "learning_rate": 9.547386846711678e-05, "loss": 0.4791, "step": 10455 }, { "epoch": 0.7752650700674724, "grad_norm": 0.3716861307621002, "learning_rate": 9.546386596649163e-05, "loss": 0.5179, "step": 10456 }, { "epoch": 0.7753392155408912, "grad_norm": 0.3629286587238312, "learning_rate": 9.545386346586647e-05, "loss": 0.5358, "step": 10457 }, { "epoch": 0.7754133610143101, "grad_norm": 0.35134801268577576, "learning_rate": 9.544386096524132e-05, "loss": 0.4952, "step": 10458 }, { "epoch": 0.7754875064877289, "grad_norm": 0.3444322943687439, "learning_rate": 9.543385846461616e-05, "loss": 0.4831, "step": 10459 }, { "epoch": 0.7755616519611478, "grad_norm": 0.3355075716972351, "learning_rate": 9.542385596399101e-05, "loss": 0.465, "step": 10460 }, { "epoch": 0.7756357974345666, "grad_norm": 0.35429587960243225, "learning_rate": 9.541385346336585e-05, "loss": 0.5014, "step": 10461 }, { "epoch": 0.7757099429079855, "grad_norm": 0.32878562808036804, "learning_rate": 9.54038509627407e-05, "loss": 0.4401, "step": 10462 }, { "epoch": 0.7757840883814043, "grad_norm": 0.35795721411705017, "learning_rate": 9.539384846211553e-05, "loss": 0.4918, "step": 10463 }, { "epoch": 0.7758582338548231, "grad_norm": 0.34742993116378784, "learning_rate": 9.538384596149038e-05, "loss": 0.4764, "step": 10464 }, { "epoch": 0.775932379328242, "grad_norm": 0.36046546697616577, "learning_rate": 9.537384346086522e-05, "loss": 0.5097, "step": 10465 }, { "epoch": 0.7760065248016609, "grad_norm": 0.3642370402812958, "learning_rate": 9.536384096024007e-05, "loss": 0.4639, "step": 10466 }, { "epoch": 0.7760806702750797, "grad_norm": 0.3801479637622833, "learning_rate": 9.535383845961491e-05, "loss": 0.5205, "step": 10467 }, { "epoch": 0.7761548157484985, "grad_norm": 0.36473897099494934, "learning_rate": 9.534383595898975e-05, "loss": 0.5271, "step": 10468 }, { "epoch": 0.7762289612219174, "grad_norm": 0.3374035060405731, "learning_rate": 9.53338334583646e-05, "loss": 0.4648, "step": 10469 }, { "epoch": 0.7763031066953362, "grad_norm": 0.359539270401001, "learning_rate": 9.532383095773943e-05, "loss": 0.5025, "step": 10470 }, { "epoch": 0.7763772521687551, "grad_norm": 0.35763102769851685, "learning_rate": 9.531382845711428e-05, "loss": 0.5028, "step": 10471 }, { "epoch": 0.776451397642174, "grad_norm": 0.3523227870464325, "learning_rate": 9.530382595648912e-05, "loss": 0.4959, "step": 10472 }, { "epoch": 0.7765255431155927, "grad_norm": 0.3472549021244049, "learning_rate": 9.529382345586397e-05, "loss": 0.501, "step": 10473 }, { "epoch": 0.7765996885890116, "grad_norm": 0.35530030727386475, "learning_rate": 9.528382095523881e-05, "loss": 0.5164, "step": 10474 }, { "epoch": 0.7766738340624305, "grad_norm": 0.3462807536125183, "learning_rate": 9.527381845461366e-05, "loss": 0.4702, "step": 10475 }, { "epoch": 0.7767479795358493, "grad_norm": 0.35968542098999023, "learning_rate": 9.52638159539885e-05, "loss": 0.5011, "step": 10476 }, { "epoch": 0.7768221250092682, "grad_norm": 0.3718216121196747, "learning_rate": 9.525381345336335e-05, "loss": 0.5217, "step": 10477 }, { "epoch": 0.7768962704826871, "grad_norm": 0.3631480038166046, "learning_rate": 9.524381095273819e-05, "loss": 0.5163, "step": 10478 }, { "epoch": 0.7769704159561058, "grad_norm": 0.35434380173683167, "learning_rate": 9.523380845211304e-05, "loss": 0.4593, "step": 10479 }, { "epoch": 0.7770445614295247, "grad_norm": 0.3556475043296814, "learning_rate": 9.522380595148787e-05, "loss": 0.4995, "step": 10480 }, { "epoch": 0.7771187069029436, "grad_norm": 0.3556798994541168, "learning_rate": 9.521380345086271e-05, "loss": 0.4744, "step": 10481 }, { "epoch": 0.7771928523763624, "grad_norm": 0.34275564551353455, "learning_rate": 9.520380095023756e-05, "loss": 0.4981, "step": 10482 }, { "epoch": 0.7772669978497813, "grad_norm": 0.36247462034225464, "learning_rate": 9.51937984496124e-05, "loss": 0.4958, "step": 10483 }, { "epoch": 0.7773411433232001, "grad_norm": 0.3421976566314697, "learning_rate": 9.518379594898725e-05, "loss": 0.4895, "step": 10484 }, { "epoch": 0.7774152887966189, "grad_norm": 0.3602966070175171, "learning_rate": 9.517379344836209e-05, "loss": 0.4984, "step": 10485 }, { "epoch": 0.7774894342700378, "grad_norm": 0.3840923607349396, "learning_rate": 9.516379094773694e-05, "loss": 0.5174, "step": 10486 }, { "epoch": 0.7775635797434567, "grad_norm": 0.32931768894195557, "learning_rate": 9.515378844711179e-05, "loss": 0.4506, "step": 10487 }, { "epoch": 0.7776377252168755, "grad_norm": 0.35988593101501465, "learning_rate": 9.514378594648662e-05, "loss": 0.4825, "step": 10488 }, { "epoch": 0.7777118706902943, "grad_norm": 0.37660112977027893, "learning_rate": 9.513378344586147e-05, "loss": 0.517, "step": 10489 }, { "epoch": 0.7777860161637132, "grad_norm": 0.3609338104724884, "learning_rate": 9.512378094523631e-05, "loss": 0.5123, "step": 10490 }, { "epoch": 0.777860161637132, "grad_norm": 0.3468986749649048, "learning_rate": 9.511377844461116e-05, "loss": 0.4778, "step": 10491 }, { "epoch": 0.7779343071105509, "grad_norm": 0.38998910784721375, "learning_rate": 9.510377594398601e-05, "loss": 0.5276, "step": 10492 }, { "epoch": 0.7780084525839698, "grad_norm": 0.3681139647960663, "learning_rate": 9.509377344336085e-05, "loss": 0.4832, "step": 10493 }, { "epoch": 0.7780825980573886, "grad_norm": 0.3743395507335663, "learning_rate": 9.508377094273569e-05, "loss": 0.5425, "step": 10494 }, { "epoch": 0.7781567435308074, "grad_norm": 0.3678527772426605, "learning_rate": 9.507376844211054e-05, "loss": 0.4888, "step": 10495 }, { "epoch": 0.7782308890042263, "grad_norm": 0.35270920395851135, "learning_rate": 9.506376594148537e-05, "loss": 0.4671, "step": 10496 }, { "epoch": 0.7783050344776451, "grad_norm": 0.3801622986793518, "learning_rate": 9.505376344086023e-05, "loss": 0.5629, "step": 10497 }, { "epoch": 0.778379179951064, "grad_norm": 0.36560043692588806, "learning_rate": 9.504376094023506e-05, "loss": 0.5399, "step": 10498 }, { "epoch": 0.7784533254244829, "grad_norm": 0.38590872287750244, "learning_rate": 9.503375843960991e-05, "loss": 0.5338, "step": 10499 }, { "epoch": 0.7785274708979016, "grad_norm": 0.34018802642822266, "learning_rate": 9.502375593898475e-05, "loss": 0.4657, "step": 10500 }, { "epoch": 0.7786016163713205, "grad_norm": 0.3679725229740143, "learning_rate": 9.50137534383596e-05, "loss": 0.5331, "step": 10501 }, { "epoch": 0.7786757618447394, "grad_norm": 0.3463255763053894, "learning_rate": 9.500375093773444e-05, "loss": 0.472, "step": 10502 }, { "epoch": 0.7787499073181582, "grad_norm": 0.340957909822464, "learning_rate": 9.499374843710929e-05, "loss": 0.4704, "step": 10503 }, { "epoch": 0.7788240527915771, "grad_norm": 0.36794453859329224, "learning_rate": 9.498374593648413e-05, "loss": 0.4678, "step": 10504 }, { "epoch": 0.778898198264996, "grad_norm": 0.38012611865997314, "learning_rate": 9.497374343585896e-05, "loss": 0.5138, "step": 10505 }, { "epoch": 0.7789723437384147, "grad_norm": 0.34744659066200256, "learning_rate": 9.496374093523381e-05, "loss": 0.4761, "step": 10506 }, { "epoch": 0.7790464892118336, "grad_norm": 0.3463093638420105, "learning_rate": 9.495373843460865e-05, "loss": 0.497, "step": 10507 }, { "epoch": 0.7791206346852525, "grad_norm": 0.34323474764823914, "learning_rate": 9.49437359339835e-05, "loss": 0.4712, "step": 10508 }, { "epoch": 0.7791947801586713, "grad_norm": 0.3546808660030365, "learning_rate": 9.493373343335834e-05, "loss": 0.4732, "step": 10509 }, { "epoch": 0.7792689256320902, "grad_norm": 0.3448430895805359, "learning_rate": 9.492373093273319e-05, "loss": 0.4986, "step": 10510 }, { "epoch": 0.779343071105509, "grad_norm": 0.37819764018058777, "learning_rate": 9.491372843210803e-05, "loss": 0.521, "step": 10511 }, { "epoch": 0.7794172165789278, "grad_norm": 0.3874823749065399, "learning_rate": 9.490372593148288e-05, "loss": 0.5287, "step": 10512 }, { "epoch": 0.7794913620523467, "grad_norm": 0.38166648149490356, "learning_rate": 9.489372343085771e-05, "loss": 0.5224, "step": 10513 }, { "epoch": 0.7795655075257656, "grad_norm": 0.34690532088279724, "learning_rate": 9.488372093023256e-05, "loss": 0.4667, "step": 10514 }, { "epoch": 0.7796396529991844, "grad_norm": 0.34247785806655884, "learning_rate": 9.48737184296074e-05, "loss": 0.485, "step": 10515 }, { "epoch": 0.7797137984726032, "grad_norm": 0.35802680253982544, "learning_rate": 9.486371592898225e-05, "loss": 0.4862, "step": 10516 }, { "epoch": 0.7797879439460221, "grad_norm": 0.34656989574432373, "learning_rate": 9.485371342835709e-05, "loss": 0.4929, "step": 10517 }, { "epoch": 0.7798620894194409, "grad_norm": 0.3471470773220062, "learning_rate": 9.484371092773193e-05, "loss": 0.4538, "step": 10518 }, { "epoch": 0.7799362348928598, "grad_norm": 0.3593457341194153, "learning_rate": 9.483370842710678e-05, "loss": 0.4609, "step": 10519 }, { "epoch": 0.7800103803662787, "grad_norm": 0.3616950511932373, "learning_rate": 9.482370592648163e-05, "loss": 0.5059, "step": 10520 }, { "epoch": 0.7800845258396975, "grad_norm": 0.3593864142894745, "learning_rate": 9.481370342585646e-05, "loss": 0.4955, "step": 10521 }, { "epoch": 0.7801586713131163, "grad_norm": 0.35672762989997864, "learning_rate": 9.480370092523132e-05, "loss": 0.4991, "step": 10522 }, { "epoch": 0.7802328167865352, "grad_norm": 0.35521647334098816, "learning_rate": 9.479369842460615e-05, "loss": 0.4955, "step": 10523 }, { "epoch": 0.780306962259954, "grad_norm": 0.3769858777523041, "learning_rate": 9.4783695923981e-05, "loss": 0.5212, "step": 10524 }, { "epoch": 0.7803811077333729, "grad_norm": 0.3501023054122925, "learning_rate": 9.477369342335585e-05, "loss": 0.4949, "step": 10525 }, { "epoch": 0.7804552532067918, "grad_norm": 0.35952362418174744, "learning_rate": 9.476369092273069e-05, "loss": 0.5055, "step": 10526 }, { "epoch": 0.7805293986802105, "grad_norm": 0.3665998578071594, "learning_rate": 9.475368842210554e-05, "loss": 0.4763, "step": 10527 }, { "epoch": 0.7806035441536294, "grad_norm": 0.35703811049461365, "learning_rate": 9.474368592148038e-05, "loss": 0.4962, "step": 10528 }, { "epoch": 0.7806776896270483, "grad_norm": 0.3515901565551758, "learning_rate": 9.473368342085523e-05, "loss": 0.4897, "step": 10529 }, { "epoch": 0.7807518351004671, "grad_norm": 0.3517795503139496, "learning_rate": 9.472368092023007e-05, "loss": 0.4679, "step": 10530 }, { "epoch": 0.780825980573886, "grad_norm": 0.359671413898468, "learning_rate": 9.47136784196049e-05, "loss": 0.4895, "step": 10531 }, { "epoch": 0.7809001260473049, "grad_norm": 0.3810347020626068, "learning_rate": 9.470367591897975e-05, "loss": 0.5406, "step": 10532 }, { "epoch": 0.7809742715207236, "grad_norm": 0.3706575036048889, "learning_rate": 9.469367341835459e-05, "loss": 0.5024, "step": 10533 }, { "epoch": 0.7810484169941425, "grad_norm": 0.372577041387558, "learning_rate": 9.468367091772944e-05, "loss": 0.5099, "step": 10534 }, { "epoch": 0.7811225624675614, "grad_norm": 0.36204248666763306, "learning_rate": 9.467366841710428e-05, "loss": 0.4919, "step": 10535 }, { "epoch": 0.7811967079409802, "grad_norm": 0.35456523299217224, "learning_rate": 9.466366591647913e-05, "loss": 0.4824, "step": 10536 }, { "epoch": 0.7812708534143991, "grad_norm": 0.36014196276664734, "learning_rate": 9.465366341585397e-05, "loss": 0.5011, "step": 10537 }, { "epoch": 0.7813449988878179, "grad_norm": 0.3921435475349426, "learning_rate": 9.464366091522882e-05, "loss": 0.5385, "step": 10538 }, { "epoch": 0.7814191443612367, "grad_norm": 0.3612404465675354, "learning_rate": 9.463365841460365e-05, "loss": 0.5039, "step": 10539 }, { "epoch": 0.7814932898346556, "grad_norm": 0.36008667945861816, "learning_rate": 9.46236559139785e-05, "loss": 0.4829, "step": 10540 }, { "epoch": 0.7815674353080745, "grad_norm": 0.35930633544921875, "learning_rate": 9.461365341335334e-05, "loss": 0.4818, "step": 10541 }, { "epoch": 0.7816415807814933, "grad_norm": 0.34703949093818665, "learning_rate": 9.460365091272818e-05, "loss": 0.4828, "step": 10542 }, { "epoch": 0.7817157262549121, "grad_norm": 0.3544808626174927, "learning_rate": 9.459364841210303e-05, "loss": 0.5083, "step": 10543 }, { "epoch": 0.781789871728331, "grad_norm": 0.3555953800678253, "learning_rate": 9.458364591147787e-05, "loss": 0.5021, "step": 10544 }, { "epoch": 0.7818640172017498, "grad_norm": 0.3703163266181946, "learning_rate": 9.457364341085272e-05, "loss": 0.4716, "step": 10545 }, { "epoch": 0.7819381626751687, "grad_norm": 0.3736041486263275, "learning_rate": 9.456364091022755e-05, "loss": 0.4961, "step": 10546 }, { "epoch": 0.7820123081485876, "grad_norm": 0.3716461956501007, "learning_rate": 9.45536384096024e-05, "loss": 0.4979, "step": 10547 }, { "epoch": 0.7820864536220064, "grad_norm": 0.3612592816352844, "learning_rate": 9.454363590897724e-05, "loss": 0.4962, "step": 10548 }, { "epoch": 0.7821605990954252, "grad_norm": 0.37287721037864685, "learning_rate": 9.453363340835209e-05, "loss": 0.5121, "step": 10549 }, { "epoch": 0.7822347445688441, "grad_norm": 0.3670576810836792, "learning_rate": 9.452363090772693e-05, "loss": 0.5087, "step": 10550 }, { "epoch": 0.7823088900422629, "grad_norm": 0.3383767902851105, "learning_rate": 9.451362840710178e-05, "loss": 0.4546, "step": 10551 }, { "epoch": 0.7823830355156818, "grad_norm": 0.3621656894683838, "learning_rate": 9.450362590647662e-05, "loss": 0.4983, "step": 10552 }, { "epoch": 0.7824571809891007, "grad_norm": 0.36681750416755676, "learning_rate": 9.449362340585147e-05, "loss": 0.5277, "step": 10553 }, { "epoch": 0.7825313264625194, "grad_norm": 0.3703027367591858, "learning_rate": 9.44836209052263e-05, "loss": 0.4726, "step": 10554 }, { "epoch": 0.7826054719359383, "grad_norm": 0.3608035743236542, "learning_rate": 9.447361840460116e-05, "loss": 0.5249, "step": 10555 }, { "epoch": 0.7826796174093572, "grad_norm": 0.35969439148902893, "learning_rate": 9.446361590397599e-05, "loss": 0.5054, "step": 10556 }, { "epoch": 0.782753762882776, "grad_norm": 0.3484356999397278, "learning_rate": 9.445361340335084e-05, "loss": 0.4712, "step": 10557 }, { "epoch": 0.7828279083561949, "grad_norm": 0.3667081594467163, "learning_rate": 9.44436109027257e-05, "loss": 0.5291, "step": 10558 }, { "epoch": 0.7829020538296138, "grad_norm": 0.3478836119174957, "learning_rate": 9.443360840210053e-05, "loss": 0.4799, "step": 10559 }, { "epoch": 0.7829761993030325, "grad_norm": 0.35571402311325073, "learning_rate": 9.442360590147538e-05, "loss": 0.5128, "step": 10560 }, { "epoch": 0.7830503447764514, "grad_norm": 0.3741092383861542, "learning_rate": 9.441360340085022e-05, "loss": 0.5419, "step": 10561 }, { "epoch": 0.7831244902498703, "grad_norm": 0.35566043853759766, "learning_rate": 9.440360090022507e-05, "loss": 0.5043, "step": 10562 }, { "epoch": 0.7831986357232891, "grad_norm": 0.34608593583106995, "learning_rate": 9.439359839959991e-05, "loss": 0.5205, "step": 10563 }, { "epoch": 0.783272781196708, "grad_norm": 0.35136228799819946, "learning_rate": 9.438359589897476e-05, "loss": 0.4843, "step": 10564 }, { "epoch": 0.7833469266701268, "grad_norm": 0.37318670749664307, "learning_rate": 9.43735933983496e-05, "loss": 0.5171, "step": 10565 }, { "epoch": 0.7834210721435456, "grad_norm": 0.3388651907444, "learning_rate": 9.436359089772445e-05, "loss": 0.4249, "step": 10566 }, { "epoch": 0.7834952176169645, "grad_norm": 0.3579186499118805, "learning_rate": 9.435358839709928e-05, "loss": 0.496, "step": 10567 }, { "epoch": 0.7835693630903834, "grad_norm": 0.34778934717178345, "learning_rate": 9.434358589647412e-05, "loss": 0.4664, "step": 10568 }, { "epoch": 0.7836435085638022, "grad_norm": 0.35789796710014343, "learning_rate": 9.433358339584897e-05, "loss": 0.5118, "step": 10569 }, { "epoch": 0.783717654037221, "grad_norm": 0.37993714213371277, "learning_rate": 9.432358089522381e-05, "loss": 0.5546, "step": 10570 }, { "epoch": 0.7837917995106399, "grad_norm": 0.36980196833610535, "learning_rate": 9.431357839459866e-05, "loss": 0.5109, "step": 10571 }, { "epoch": 0.7838659449840587, "grad_norm": 0.4165746569633484, "learning_rate": 9.43035758939735e-05, "loss": 0.5379, "step": 10572 }, { "epoch": 0.7839400904574776, "grad_norm": 0.3441542387008667, "learning_rate": 9.429357339334835e-05, "loss": 0.4627, "step": 10573 }, { "epoch": 0.7840142359308964, "grad_norm": 0.34945419430732727, "learning_rate": 9.428357089272318e-05, "loss": 0.4667, "step": 10574 }, { "epoch": 0.7840883814043152, "grad_norm": 0.3472583591938019, "learning_rate": 9.427356839209803e-05, "loss": 0.4697, "step": 10575 }, { "epoch": 0.7841625268777341, "grad_norm": 0.36634594202041626, "learning_rate": 9.426356589147287e-05, "loss": 0.4872, "step": 10576 }, { "epoch": 0.7842366723511529, "grad_norm": 0.36000797152519226, "learning_rate": 9.425356339084772e-05, "loss": 0.4899, "step": 10577 }, { "epoch": 0.7843108178245718, "grad_norm": 0.3724544048309326, "learning_rate": 9.424356089022256e-05, "loss": 0.5029, "step": 10578 }, { "epoch": 0.7843849632979907, "grad_norm": 0.3584758937358856, "learning_rate": 9.42335583895974e-05, "loss": 0.4858, "step": 10579 }, { "epoch": 0.7844591087714095, "grad_norm": 0.37405911087989807, "learning_rate": 9.422355588897225e-05, "loss": 0.5048, "step": 10580 }, { "epoch": 0.7845332542448283, "grad_norm": 0.4056214392185211, "learning_rate": 9.421355338834708e-05, "loss": 0.5276, "step": 10581 }, { "epoch": 0.7846073997182472, "grad_norm": 0.36647459864616394, "learning_rate": 9.420355088772193e-05, "loss": 0.4772, "step": 10582 }, { "epoch": 0.784681545191666, "grad_norm": 0.37061455845832825, "learning_rate": 9.419354838709677e-05, "loss": 0.4975, "step": 10583 }, { "epoch": 0.7847556906650849, "grad_norm": 0.36860767006874084, "learning_rate": 9.418354588647162e-05, "loss": 0.5218, "step": 10584 }, { "epoch": 0.7848298361385038, "grad_norm": 0.37089717388153076, "learning_rate": 9.417354338584646e-05, "loss": 0.553, "step": 10585 }, { "epoch": 0.7849039816119225, "grad_norm": 0.3560965657234192, "learning_rate": 9.416354088522131e-05, "loss": 0.4561, "step": 10586 }, { "epoch": 0.7849781270853414, "grad_norm": 0.341459184885025, "learning_rate": 9.415353838459615e-05, "loss": 0.4762, "step": 10587 }, { "epoch": 0.7850522725587603, "grad_norm": 0.36447104811668396, "learning_rate": 9.4143535883971e-05, "loss": 0.5274, "step": 10588 }, { "epoch": 0.7851264180321791, "grad_norm": 0.36935845017433167, "learning_rate": 9.413353338334583e-05, "loss": 0.516, "step": 10589 }, { "epoch": 0.785200563505598, "grad_norm": 0.33687615394592285, "learning_rate": 9.412353088272068e-05, "loss": 0.4904, "step": 10590 }, { "epoch": 0.7852747089790169, "grad_norm": 0.36523565649986267, "learning_rate": 9.411352838209552e-05, "loss": 0.5183, "step": 10591 }, { "epoch": 0.7853488544524356, "grad_norm": 0.37743255496025085, "learning_rate": 9.410352588147037e-05, "loss": 0.5463, "step": 10592 }, { "epoch": 0.7854229999258545, "grad_norm": 0.35147103667259216, "learning_rate": 9.409352338084522e-05, "loss": 0.4827, "step": 10593 }, { "epoch": 0.7854971453992734, "grad_norm": 0.3648141920566559, "learning_rate": 9.408352088022006e-05, "loss": 0.519, "step": 10594 }, { "epoch": 0.7855712908726922, "grad_norm": 0.3710338771343231, "learning_rate": 9.407351837959491e-05, "loss": 0.541, "step": 10595 }, { "epoch": 0.7856454363461111, "grad_norm": 0.37891823053359985, "learning_rate": 9.406351587896975e-05, "loss": 0.5521, "step": 10596 }, { "epoch": 0.7857195818195299, "grad_norm": 0.3518955409526825, "learning_rate": 9.40535133783446e-05, "loss": 0.5081, "step": 10597 }, { "epoch": 0.7857937272929487, "grad_norm": 0.34685441851615906, "learning_rate": 9.404351087771944e-05, "loss": 0.4856, "step": 10598 }, { "epoch": 0.7858678727663676, "grad_norm": 0.3577578365802765, "learning_rate": 9.403350837709429e-05, "loss": 0.4982, "step": 10599 }, { "epoch": 0.7859420182397865, "grad_norm": 0.36138132214546204, "learning_rate": 9.402350587646912e-05, "loss": 0.4766, "step": 10600 }, { "epoch": 0.7860161637132053, "grad_norm": 0.3807581961154938, "learning_rate": 9.401350337584397e-05, "loss": 0.5285, "step": 10601 }, { "epoch": 0.7860903091866241, "grad_norm": 0.34315237402915955, "learning_rate": 9.400350087521881e-05, "loss": 0.4613, "step": 10602 }, { "epoch": 0.786164454660043, "grad_norm": 0.3529280126094818, "learning_rate": 9.399349837459366e-05, "loss": 0.4957, "step": 10603 }, { "epoch": 0.7862386001334618, "grad_norm": 0.35455521941185, "learning_rate": 9.39834958739685e-05, "loss": 0.4732, "step": 10604 }, { "epoch": 0.7863127456068807, "grad_norm": 0.3520265519618988, "learning_rate": 9.397349337334334e-05, "loss": 0.4961, "step": 10605 }, { "epoch": 0.7863868910802996, "grad_norm": 0.35875195264816284, "learning_rate": 9.396349087271819e-05, "loss": 0.4895, "step": 10606 }, { "epoch": 0.7864610365537184, "grad_norm": 0.35028383135795593, "learning_rate": 9.395348837209302e-05, "loss": 0.4839, "step": 10607 }, { "epoch": 0.7865351820271372, "grad_norm": 0.37428098917007446, "learning_rate": 9.394348587146787e-05, "loss": 0.4591, "step": 10608 }, { "epoch": 0.7866093275005561, "grad_norm": 0.3784806728363037, "learning_rate": 9.393348337084271e-05, "loss": 0.5278, "step": 10609 }, { "epoch": 0.7866834729739749, "grad_norm": 0.3740738034248352, "learning_rate": 9.392348087021756e-05, "loss": 0.4968, "step": 10610 }, { "epoch": 0.7867576184473938, "grad_norm": 0.3776615858078003, "learning_rate": 9.39134783695924e-05, "loss": 0.5257, "step": 10611 }, { "epoch": 0.7868317639208127, "grad_norm": 0.3616619110107422, "learning_rate": 9.390347586896725e-05, "loss": 0.5159, "step": 10612 }, { "epoch": 0.7869059093942314, "grad_norm": 0.3514534533023834, "learning_rate": 9.389347336834209e-05, "loss": 0.4908, "step": 10613 }, { "epoch": 0.7869800548676503, "grad_norm": 0.38150864839553833, "learning_rate": 9.388347086771694e-05, "loss": 0.5409, "step": 10614 }, { "epoch": 0.7870542003410692, "grad_norm": 0.35773175954818726, "learning_rate": 9.387346836709177e-05, "loss": 0.5221, "step": 10615 }, { "epoch": 0.787128345814488, "grad_norm": 0.3391464948654175, "learning_rate": 9.386346586646663e-05, "loss": 0.4595, "step": 10616 }, { "epoch": 0.7872024912879069, "grad_norm": 0.35723716020584106, "learning_rate": 9.385346336584146e-05, "loss": 0.4851, "step": 10617 }, { "epoch": 0.7872766367613258, "grad_norm": 0.37366771697998047, "learning_rate": 9.38434608652163e-05, "loss": 0.4544, "step": 10618 }, { "epoch": 0.7873507822347445, "grad_norm": 0.38605308532714844, "learning_rate": 9.383345836459115e-05, "loss": 0.5365, "step": 10619 }, { "epoch": 0.7874249277081634, "grad_norm": 0.3554704189300537, "learning_rate": 9.382345586396599e-05, "loss": 0.4818, "step": 10620 }, { "epoch": 0.7874990731815823, "grad_norm": 0.3550224006175995, "learning_rate": 9.381345336334084e-05, "loss": 0.5068, "step": 10621 }, { "epoch": 0.7875732186550011, "grad_norm": 0.3445671498775482, "learning_rate": 9.380345086271567e-05, "loss": 0.4989, "step": 10622 }, { "epoch": 0.78764736412842, "grad_norm": 0.3840441107749939, "learning_rate": 9.379344836209053e-05, "loss": 0.4642, "step": 10623 }, { "epoch": 0.7877215096018388, "grad_norm": 0.33169087767601013, "learning_rate": 9.378344586146536e-05, "loss": 0.4819, "step": 10624 }, { "epoch": 0.7877956550752576, "grad_norm": 0.3587847054004669, "learning_rate": 9.377344336084021e-05, "loss": 0.4679, "step": 10625 }, { "epoch": 0.7878698005486765, "grad_norm": 0.34913742542266846, "learning_rate": 9.376344086021506e-05, "loss": 0.4868, "step": 10626 }, { "epoch": 0.7879439460220954, "grad_norm": 0.3311695158481598, "learning_rate": 9.37534383595899e-05, "loss": 0.4708, "step": 10627 }, { "epoch": 0.7880180914955142, "grad_norm": 0.3881404399871826, "learning_rate": 9.374343585896475e-05, "loss": 0.4922, "step": 10628 }, { "epoch": 0.788092236968933, "grad_norm": 0.3521953821182251, "learning_rate": 9.373343335833959e-05, "loss": 0.5054, "step": 10629 }, { "epoch": 0.7881663824423519, "grad_norm": 0.32783058285713196, "learning_rate": 9.372343085771444e-05, "loss": 0.4846, "step": 10630 }, { "epoch": 0.7882405279157707, "grad_norm": 0.3673240840435028, "learning_rate": 9.371342835708928e-05, "loss": 0.5208, "step": 10631 }, { "epoch": 0.7883146733891896, "grad_norm": 0.38212794065475464, "learning_rate": 9.370342585646413e-05, "loss": 0.5158, "step": 10632 }, { "epoch": 0.7883888188626085, "grad_norm": 0.36537468433380127, "learning_rate": 9.369342335583896e-05, "loss": 0.5455, "step": 10633 }, { "epoch": 0.7884629643360272, "grad_norm": 0.350455641746521, "learning_rate": 9.368342085521381e-05, "loss": 0.5135, "step": 10634 }, { "epoch": 0.7885371098094461, "grad_norm": 0.37372368574142456, "learning_rate": 9.367341835458865e-05, "loss": 0.5332, "step": 10635 }, { "epoch": 0.788611255282865, "grad_norm": 0.37927284836769104, "learning_rate": 9.36634158539635e-05, "loss": 0.5327, "step": 10636 }, { "epoch": 0.7886854007562838, "grad_norm": 0.36181437969207764, "learning_rate": 9.365341335333834e-05, "loss": 0.5019, "step": 10637 }, { "epoch": 0.7887595462297027, "grad_norm": 0.38530054688453674, "learning_rate": 9.364341085271319e-05, "loss": 0.4812, "step": 10638 }, { "epoch": 0.7888336917031216, "grad_norm": 0.3495978116989136, "learning_rate": 9.363340835208803e-05, "loss": 0.4699, "step": 10639 }, { "epoch": 0.7889078371765403, "grad_norm": 0.34301844239234924, "learning_rate": 9.362340585146288e-05, "loss": 0.4414, "step": 10640 }, { "epoch": 0.7889819826499592, "grad_norm": 0.36853712797164917, "learning_rate": 9.361340335083771e-05, "loss": 0.5207, "step": 10641 }, { "epoch": 0.7890561281233781, "grad_norm": 0.35084235668182373, "learning_rate": 9.360340085021255e-05, "loss": 0.4716, "step": 10642 }, { "epoch": 0.7891302735967969, "grad_norm": 0.3769429326057434, "learning_rate": 9.35933983495874e-05, "loss": 0.5177, "step": 10643 }, { "epoch": 0.7892044190702158, "grad_norm": 0.3565126955509186, "learning_rate": 9.358339584896224e-05, "loss": 0.4882, "step": 10644 }, { "epoch": 0.7892785645436347, "grad_norm": 0.3490021824836731, "learning_rate": 9.357339334833709e-05, "loss": 0.4854, "step": 10645 }, { "epoch": 0.7893527100170534, "grad_norm": 0.3652941882610321, "learning_rate": 9.356339084771193e-05, "loss": 0.5075, "step": 10646 }, { "epoch": 0.7894268554904723, "grad_norm": 0.39561349153518677, "learning_rate": 9.355338834708678e-05, "loss": 0.5292, "step": 10647 }, { "epoch": 0.7895010009638912, "grad_norm": 0.34971803426742554, "learning_rate": 9.354338584646162e-05, "loss": 0.4765, "step": 10648 }, { "epoch": 0.78957514643731, "grad_norm": 0.33757296204566956, "learning_rate": 9.353338334583647e-05, "loss": 0.4642, "step": 10649 }, { "epoch": 0.7896492919107289, "grad_norm": 0.36840760707855225, "learning_rate": 9.35233808452113e-05, "loss": 0.5268, "step": 10650 }, { "epoch": 0.7897234373841477, "grad_norm": 0.36981886625289917, "learning_rate": 9.351337834458615e-05, "loss": 0.4938, "step": 10651 }, { "epoch": 0.7897975828575665, "grad_norm": 0.3788394033908844, "learning_rate": 9.350337584396099e-05, "loss": 0.5576, "step": 10652 }, { "epoch": 0.7898717283309854, "grad_norm": 0.36111894249916077, "learning_rate": 9.349337334333584e-05, "loss": 0.4635, "step": 10653 }, { "epoch": 0.7899458738044043, "grad_norm": 0.3434140086174011, "learning_rate": 9.348337084271068e-05, "loss": 0.4728, "step": 10654 }, { "epoch": 0.7900200192778231, "grad_norm": 0.3593287765979767, "learning_rate": 9.347336834208552e-05, "loss": 0.5053, "step": 10655 }, { "epoch": 0.790094164751242, "grad_norm": 0.3677487373352051, "learning_rate": 9.346336584146037e-05, "loss": 0.4825, "step": 10656 }, { "epoch": 0.7901683102246608, "grad_norm": 0.365540474653244, "learning_rate": 9.34533633408352e-05, "loss": 0.472, "step": 10657 }, { "epoch": 0.7902424556980796, "grad_norm": 0.3415781557559967, "learning_rate": 9.344336084021005e-05, "loss": 0.4598, "step": 10658 }, { "epoch": 0.7903166011714985, "grad_norm": 0.34772545099258423, "learning_rate": 9.343335833958489e-05, "loss": 0.5127, "step": 10659 }, { "epoch": 0.7903907466449174, "grad_norm": 0.3529919385910034, "learning_rate": 9.342335583895974e-05, "loss": 0.4908, "step": 10660 }, { "epoch": 0.7904648921183361, "grad_norm": 0.3542899489402771, "learning_rate": 9.341335333833459e-05, "loss": 0.4435, "step": 10661 }, { "epoch": 0.790539037591755, "grad_norm": 0.33181488513946533, "learning_rate": 9.340335083770943e-05, "loss": 0.4522, "step": 10662 }, { "epoch": 0.7906131830651739, "grad_norm": 0.34063979983329773, "learning_rate": 9.339334833708428e-05, "loss": 0.4637, "step": 10663 }, { "epoch": 0.7906873285385927, "grad_norm": 0.37220948934555054, "learning_rate": 9.338334583645913e-05, "loss": 0.4808, "step": 10664 }, { "epoch": 0.7907614740120116, "grad_norm": 0.38459476828575134, "learning_rate": 9.337334333583397e-05, "loss": 0.5043, "step": 10665 }, { "epoch": 0.7908356194854305, "grad_norm": 0.3898926079273224, "learning_rate": 9.33633408352088e-05, "loss": 0.5267, "step": 10666 }, { "epoch": 0.7909097649588492, "grad_norm": 0.3780859410762787, "learning_rate": 9.335333833458366e-05, "loss": 0.5291, "step": 10667 }, { "epoch": 0.7909839104322681, "grad_norm": 0.33933025598526, "learning_rate": 9.334333583395849e-05, "loss": 0.4712, "step": 10668 }, { "epoch": 0.791058055905687, "grad_norm": 0.3671548366546631, "learning_rate": 9.333333333333334e-05, "loss": 0.5146, "step": 10669 }, { "epoch": 0.7911322013791058, "grad_norm": 0.34904149174690247, "learning_rate": 9.332333083270818e-05, "loss": 0.4919, "step": 10670 }, { "epoch": 0.7912063468525247, "grad_norm": 0.37680187821388245, "learning_rate": 9.331332833208303e-05, "loss": 0.5408, "step": 10671 }, { "epoch": 0.7912804923259436, "grad_norm": 0.3581548035144806, "learning_rate": 9.330332583145787e-05, "loss": 0.4868, "step": 10672 }, { "epoch": 0.7913546377993623, "grad_norm": 0.3614915907382965, "learning_rate": 9.329332333083272e-05, "loss": 0.5152, "step": 10673 }, { "epoch": 0.7914287832727812, "grad_norm": 0.36391836404800415, "learning_rate": 9.328332083020756e-05, "loss": 0.4912, "step": 10674 }, { "epoch": 0.7915029287462001, "grad_norm": 0.3658095598220825, "learning_rate": 9.32733183295824e-05, "loss": 0.493, "step": 10675 }, { "epoch": 0.7915770742196189, "grad_norm": 0.3458023965358734, "learning_rate": 9.326331582895724e-05, "loss": 0.4719, "step": 10676 }, { "epoch": 0.7916512196930378, "grad_norm": 0.340280145406723, "learning_rate": 9.32533133283321e-05, "loss": 0.4621, "step": 10677 }, { "epoch": 0.7917253651664566, "grad_norm": 0.34546858072280884, "learning_rate": 9.324331082770693e-05, "loss": 0.4771, "step": 10678 }, { "epoch": 0.7917995106398754, "grad_norm": 0.3467850983142853, "learning_rate": 9.323330832708177e-05, "loss": 0.488, "step": 10679 }, { "epoch": 0.7918736561132943, "grad_norm": 0.3469753563404083, "learning_rate": 9.322330582645662e-05, "loss": 0.4835, "step": 10680 }, { "epoch": 0.7919478015867132, "grad_norm": 0.34571897983551025, "learning_rate": 9.321330332583146e-05, "loss": 0.48, "step": 10681 }, { "epoch": 0.792021947060132, "grad_norm": 0.3533506691455841, "learning_rate": 9.32033008252063e-05, "loss": 0.5119, "step": 10682 }, { "epoch": 0.7920960925335508, "grad_norm": 0.3577142059803009, "learning_rate": 9.319329832458114e-05, "loss": 0.4891, "step": 10683 }, { "epoch": 0.7921702380069697, "grad_norm": 0.35070720314979553, "learning_rate": 9.3183295823956e-05, "loss": 0.4997, "step": 10684 }, { "epoch": 0.7922443834803885, "grad_norm": 0.35648906230926514, "learning_rate": 9.317329332333083e-05, "loss": 0.4698, "step": 10685 }, { "epoch": 0.7923185289538074, "grad_norm": 0.3809748589992523, "learning_rate": 9.316329082270568e-05, "loss": 0.5131, "step": 10686 }, { "epoch": 0.7923926744272262, "grad_norm": 0.3393338620662689, "learning_rate": 9.315328832208052e-05, "loss": 0.4789, "step": 10687 }, { "epoch": 0.792466819900645, "grad_norm": 0.3686874806880951, "learning_rate": 9.314328582145537e-05, "loss": 0.4851, "step": 10688 }, { "epoch": 0.7925409653740639, "grad_norm": 0.3358619511127472, "learning_rate": 9.313328332083021e-05, "loss": 0.4561, "step": 10689 }, { "epoch": 0.7926151108474827, "grad_norm": 0.3783683478832245, "learning_rate": 9.312328082020506e-05, "loss": 0.5094, "step": 10690 }, { "epoch": 0.7926892563209016, "grad_norm": 0.35972297191619873, "learning_rate": 9.31132783195799e-05, "loss": 0.4782, "step": 10691 }, { "epoch": 0.7927634017943205, "grad_norm": 0.34003108739852905, "learning_rate": 9.310327581895473e-05, "loss": 0.449, "step": 10692 }, { "epoch": 0.7928375472677393, "grad_norm": 0.35901251435279846, "learning_rate": 9.309327331832958e-05, "loss": 0.4966, "step": 10693 }, { "epoch": 0.7929116927411581, "grad_norm": 0.3437103033065796, "learning_rate": 9.308327081770443e-05, "loss": 0.4725, "step": 10694 }, { "epoch": 0.792985838214577, "grad_norm": 0.35264644026756287, "learning_rate": 9.307326831707927e-05, "loss": 0.4795, "step": 10695 }, { "epoch": 0.7930599836879958, "grad_norm": 0.3773893415927887, "learning_rate": 9.306326581645412e-05, "loss": 0.472, "step": 10696 }, { "epoch": 0.7931341291614147, "grad_norm": 0.38212281465530396, "learning_rate": 9.305326331582896e-05, "loss": 0.5444, "step": 10697 }, { "epoch": 0.7932082746348336, "grad_norm": 0.3663574755191803, "learning_rate": 9.304326081520381e-05, "loss": 0.5182, "step": 10698 }, { "epoch": 0.7932824201082523, "grad_norm": 0.36701762676239014, "learning_rate": 9.303325831457866e-05, "loss": 0.5152, "step": 10699 }, { "epoch": 0.7933565655816712, "grad_norm": 0.3827488422393799, "learning_rate": 9.30232558139535e-05, "loss": 0.5214, "step": 10700 }, { "epoch": 0.7934307110550901, "grad_norm": 0.3656034469604492, "learning_rate": 9.301325331332835e-05, "loss": 0.5122, "step": 10701 }, { "epoch": 0.7935048565285089, "grad_norm": 0.3569278419017792, "learning_rate": 9.300325081270318e-05, "loss": 0.4911, "step": 10702 }, { "epoch": 0.7935790020019278, "grad_norm": 0.34459441900253296, "learning_rate": 9.299324831207802e-05, "loss": 0.4898, "step": 10703 }, { "epoch": 0.7936531474753467, "grad_norm": 0.38288822770118713, "learning_rate": 9.298324581145287e-05, "loss": 0.5338, "step": 10704 }, { "epoch": 0.7937272929487654, "grad_norm": 0.3755917251110077, "learning_rate": 9.297324331082771e-05, "loss": 0.5317, "step": 10705 }, { "epoch": 0.7938014384221843, "grad_norm": 0.3539672791957855, "learning_rate": 9.296324081020256e-05, "loss": 0.4878, "step": 10706 }, { "epoch": 0.7938755838956032, "grad_norm": 0.31966572999954224, "learning_rate": 9.29532383095774e-05, "loss": 0.4409, "step": 10707 }, { "epoch": 0.793949729369022, "grad_norm": 0.3495025932788849, "learning_rate": 9.294323580895225e-05, "loss": 0.4966, "step": 10708 }, { "epoch": 0.7940238748424409, "grad_norm": 0.36787673830986023, "learning_rate": 9.293323330832708e-05, "loss": 0.5027, "step": 10709 }, { "epoch": 0.7940980203158597, "grad_norm": 0.32879894971847534, "learning_rate": 9.292323080770193e-05, "loss": 0.4723, "step": 10710 }, { "epoch": 0.7941721657892785, "grad_norm": 0.3789636492729187, "learning_rate": 9.291322830707677e-05, "loss": 0.5337, "step": 10711 }, { "epoch": 0.7942463112626974, "grad_norm": 0.3505846858024597, "learning_rate": 9.290322580645162e-05, "loss": 0.4718, "step": 10712 }, { "epoch": 0.7943204567361163, "grad_norm": 0.34630104899406433, "learning_rate": 9.289322330582646e-05, "loss": 0.4842, "step": 10713 }, { "epoch": 0.7943946022095351, "grad_norm": 0.36779725551605225, "learning_rate": 9.288322080520131e-05, "loss": 0.4878, "step": 10714 }, { "epoch": 0.794468747682954, "grad_norm": 0.34861359000205994, "learning_rate": 9.287321830457615e-05, "loss": 0.4707, "step": 10715 }, { "epoch": 0.7945428931563728, "grad_norm": 0.34249764680862427, "learning_rate": 9.286321580395098e-05, "loss": 0.4625, "step": 10716 }, { "epoch": 0.7946170386297916, "grad_norm": 0.34145811200141907, "learning_rate": 9.285321330332584e-05, "loss": 0.4783, "step": 10717 }, { "epoch": 0.7946911841032105, "grad_norm": 0.4063206613063812, "learning_rate": 9.284321080270067e-05, "loss": 0.5268, "step": 10718 }, { "epoch": 0.7947653295766294, "grad_norm": 0.36946770548820496, "learning_rate": 9.283320830207552e-05, "loss": 0.4939, "step": 10719 }, { "epoch": 0.7948394750500481, "grad_norm": 0.37233999371528625, "learning_rate": 9.282320580145036e-05, "loss": 0.5337, "step": 10720 }, { "epoch": 0.794913620523467, "grad_norm": 0.3496728539466858, "learning_rate": 9.281320330082521e-05, "loss": 0.502, "step": 10721 }, { "epoch": 0.7949877659968859, "grad_norm": 0.3756648004055023, "learning_rate": 9.280320080020005e-05, "loss": 0.5004, "step": 10722 }, { "epoch": 0.7950619114703047, "grad_norm": 0.3594758212566376, "learning_rate": 9.27931982995749e-05, "loss": 0.5116, "step": 10723 }, { "epoch": 0.7951360569437236, "grad_norm": 0.36738184094429016, "learning_rate": 9.278319579894974e-05, "loss": 0.5333, "step": 10724 }, { "epoch": 0.7952102024171425, "grad_norm": 0.3337337076663971, "learning_rate": 9.277319329832459e-05, "loss": 0.4646, "step": 10725 }, { "epoch": 0.7952843478905612, "grad_norm": 0.36507630348205566, "learning_rate": 9.276319079769942e-05, "loss": 0.4972, "step": 10726 }, { "epoch": 0.7953584933639801, "grad_norm": 0.35289981961250305, "learning_rate": 9.275318829707427e-05, "loss": 0.5076, "step": 10727 }, { "epoch": 0.795432638837399, "grad_norm": 0.3540422022342682, "learning_rate": 9.274318579644911e-05, "loss": 0.5074, "step": 10728 }, { "epoch": 0.7955067843108178, "grad_norm": 0.3745579421520233, "learning_rate": 9.273318329582396e-05, "loss": 0.5057, "step": 10729 }, { "epoch": 0.7955809297842367, "grad_norm": 0.39622434973716736, "learning_rate": 9.27231807951988e-05, "loss": 0.5246, "step": 10730 }, { "epoch": 0.7956550752576556, "grad_norm": 0.3466435372829437, "learning_rate": 9.271317829457365e-05, "loss": 0.4737, "step": 10731 }, { "epoch": 0.7957292207310743, "grad_norm": 0.3697254955768585, "learning_rate": 9.27031757939485e-05, "loss": 0.4898, "step": 10732 }, { "epoch": 0.7958033662044932, "grad_norm": 0.35742974281311035, "learning_rate": 9.269317329332334e-05, "loss": 0.5053, "step": 10733 }, { "epoch": 0.7958775116779121, "grad_norm": 0.3678221106529236, "learning_rate": 9.268317079269819e-05, "loss": 0.4804, "step": 10734 }, { "epoch": 0.7959516571513309, "grad_norm": 0.34074661135673523, "learning_rate": 9.267316829207302e-05, "loss": 0.4748, "step": 10735 }, { "epoch": 0.7960258026247498, "grad_norm": 0.3485271632671356, "learning_rate": 9.266316579144788e-05, "loss": 0.488, "step": 10736 }, { "epoch": 0.7960999480981686, "grad_norm": 0.3583712577819824, "learning_rate": 9.265316329082271e-05, "loss": 0.444, "step": 10737 }, { "epoch": 0.7961740935715874, "grad_norm": 0.3784906566143036, "learning_rate": 9.264316079019756e-05, "loss": 0.4652, "step": 10738 }, { "epoch": 0.7962482390450063, "grad_norm": 0.3971347212791443, "learning_rate": 9.26331582895724e-05, "loss": 0.5285, "step": 10739 }, { "epoch": 0.7963223845184252, "grad_norm": 0.373648464679718, "learning_rate": 9.262315578894725e-05, "loss": 0.4788, "step": 10740 }, { "epoch": 0.796396529991844, "grad_norm": 0.35574057698249817, "learning_rate": 9.261315328832209e-05, "loss": 0.5098, "step": 10741 }, { "epoch": 0.7964706754652628, "grad_norm": 0.359948992729187, "learning_rate": 9.260315078769693e-05, "loss": 0.4811, "step": 10742 }, { "epoch": 0.7965448209386817, "grad_norm": 0.3588821589946747, "learning_rate": 9.259314828707178e-05, "loss": 0.5074, "step": 10743 }, { "epoch": 0.7966189664121005, "grad_norm": 0.3770267367362976, "learning_rate": 9.258314578644661e-05, "loss": 0.5, "step": 10744 }, { "epoch": 0.7966931118855194, "grad_norm": 0.36829590797424316, "learning_rate": 9.257314328582146e-05, "loss": 0.4907, "step": 10745 }, { "epoch": 0.7967672573589383, "grad_norm": 0.37989872694015503, "learning_rate": 9.25631407851963e-05, "loss": 0.5239, "step": 10746 }, { "epoch": 0.796841402832357, "grad_norm": 0.3725427985191345, "learning_rate": 9.255313828457115e-05, "loss": 0.5336, "step": 10747 }, { "epoch": 0.7969155483057759, "grad_norm": 0.35637539625167847, "learning_rate": 9.254313578394599e-05, "loss": 0.502, "step": 10748 }, { "epoch": 0.7969896937791948, "grad_norm": 0.3613760769367218, "learning_rate": 9.253313328332084e-05, "loss": 0.497, "step": 10749 }, { "epoch": 0.7970638392526136, "grad_norm": 0.3623945116996765, "learning_rate": 9.252313078269568e-05, "loss": 0.5025, "step": 10750 }, { "epoch": 0.7971379847260325, "grad_norm": 0.3672873377799988, "learning_rate": 9.251312828207053e-05, "loss": 0.5315, "step": 10751 }, { "epoch": 0.7972121301994514, "grad_norm": 0.36168375611305237, "learning_rate": 9.250312578144536e-05, "loss": 0.4848, "step": 10752 }, { "epoch": 0.7972862756728701, "grad_norm": 0.37404730916023254, "learning_rate": 9.24931232808202e-05, "loss": 0.5368, "step": 10753 }, { "epoch": 0.797360421146289, "grad_norm": 0.3620457351207733, "learning_rate": 9.248312078019505e-05, "loss": 0.519, "step": 10754 }, { "epoch": 0.7974345666197079, "grad_norm": 0.3618786633014679, "learning_rate": 9.247311827956989e-05, "loss": 0.5124, "step": 10755 }, { "epoch": 0.7975087120931267, "grad_norm": 0.3344404399394989, "learning_rate": 9.246311577894474e-05, "loss": 0.452, "step": 10756 }, { "epoch": 0.7975828575665456, "grad_norm": 0.3552578389644623, "learning_rate": 9.245311327831958e-05, "loss": 0.472, "step": 10757 }, { "epoch": 0.7976570030399645, "grad_norm": 0.3486561179161072, "learning_rate": 9.244311077769443e-05, "loss": 0.5004, "step": 10758 }, { "epoch": 0.7977311485133832, "grad_norm": 0.37225672602653503, "learning_rate": 9.243310827706926e-05, "loss": 0.55, "step": 10759 }, { "epoch": 0.7978052939868021, "grad_norm": 0.3789905905723572, "learning_rate": 9.242310577644411e-05, "loss": 0.5057, "step": 10760 }, { "epoch": 0.797879439460221, "grad_norm": 0.349774032831192, "learning_rate": 9.241310327581895e-05, "loss": 0.4788, "step": 10761 }, { "epoch": 0.7979535849336398, "grad_norm": 0.3585333228111267, "learning_rate": 9.24031007751938e-05, "loss": 0.5204, "step": 10762 }, { "epoch": 0.7980277304070587, "grad_norm": 0.34559738636016846, "learning_rate": 9.239309827456864e-05, "loss": 0.4863, "step": 10763 }, { "epoch": 0.7981018758804775, "grad_norm": 0.3663913607597351, "learning_rate": 9.238309577394349e-05, "loss": 0.5479, "step": 10764 }, { "epoch": 0.7981760213538963, "grad_norm": 0.3540685474872589, "learning_rate": 9.237309327331834e-05, "loss": 0.498, "step": 10765 }, { "epoch": 0.7982501668273152, "grad_norm": 0.3937431275844574, "learning_rate": 9.236309077269318e-05, "loss": 0.5178, "step": 10766 }, { "epoch": 0.7983243123007341, "grad_norm": 0.3663994371891022, "learning_rate": 9.235308827206803e-05, "loss": 0.5025, "step": 10767 }, { "epoch": 0.7983984577741529, "grad_norm": 0.3533487319946289, "learning_rate": 9.234308577144287e-05, "loss": 0.4828, "step": 10768 }, { "epoch": 0.7984726032475717, "grad_norm": 0.3832164406776428, "learning_rate": 9.233308327081772e-05, "loss": 0.5055, "step": 10769 }, { "epoch": 0.7985467487209906, "grad_norm": 0.3450629413127899, "learning_rate": 9.232308077019255e-05, "loss": 0.4644, "step": 10770 }, { "epoch": 0.7986208941944094, "grad_norm": 0.3741133511066437, "learning_rate": 9.23130782695674e-05, "loss": 0.4835, "step": 10771 }, { "epoch": 0.7986950396678283, "grad_norm": 0.36059343814849854, "learning_rate": 9.230307576894224e-05, "loss": 0.4917, "step": 10772 }, { "epoch": 0.7987691851412472, "grad_norm": 0.3642174005508423, "learning_rate": 9.229307326831709e-05, "loss": 0.4708, "step": 10773 }, { "epoch": 0.798843330614666, "grad_norm": 0.4094926416873932, "learning_rate": 9.228307076769193e-05, "loss": 0.516, "step": 10774 }, { "epoch": 0.7989174760880848, "grad_norm": 0.3618448078632355, "learning_rate": 9.227306826706678e-05, "loss": 0.51, "step": 10775 }, { "epoch": 0.7989916215615037, "grad_norm": 0.3634682893753052, "learning_rate": 9.226306576644162e-05, "loss": 0.4944, "step": 10776 }, { "epoch": 0.7990657670349225, "grad_norm": 0.3471217453479767, "learning_rate": 9.225306326581647e-05, "loss": 0.4784, "step": 10777 }, { "epoch": 0.7991399125083414, "grad_norm": 0.3844742178916931, "learning_rate": 9.22430607651913e-05, "loss": 0.485, "step": 10778 }, { "epoch": 0.7992140579817603, "grad_norm": 0.3549949526786804, "learning_rate": 9.223305826456614e-05, "loss": 0.4948, "step": 10779 }, { "epoch": 0.799288203455179, "grad_norm": 0.3497442901134491, "learning_rate": 9.222305576394099e-05, "loss": 0.4808, "step": 10780 }, { "epoch": 0.7993623489285979, "grad_norm": 0.35462895035743713, "learning_rate": 9.221305326331583e-05, "loss": 0.4917, "step": 10781 }, { "epoch": 0.7994364944020168, "grad_norm": 0.35920971632003784, "learning_rate": 9.220305076269068e-05, "loss": 0.4844, "step": 10782 }, { "epoch": 0.7995106398754356, "grad_norm": 0.3484926223754883, "learning_rate": 9.219304826206552e-05, "loss": 0.472, "step": 10783 }, { "epoch": 0.7995847853488545, "grad_norm": 0.3604165315628052, "learning_rate": 9.218304576144037e-05, "loss": 0.4778, "step": 10784 }, { "epoch": 0.7996589308222734, "grad_norm": 0.3720673620700836, "learning_rate": 9.21730432608152e-05, "loss": 0.5576, "step": 10785 }, { "epoch": 0.7997330762956921, "grad_norm": 0.36372077465057373, "learning_rate": 9.216304076019006e-05, "loss": 0.5, "step": 10786 }, { "epoch": 0.799807221769111, "grad_norm": 0.3242473602294922, "learning_rate": 9.215303825956489e-05, "loss": 0.4966, "step": 10787 }, { "epoch": 0.7998813672425299, "grad_norm": 0.3379283845424652, "learning_rate": 9.214303575893974e-05, "loss": 0.4889, "step": 10788 }, { "epoch": 0.7999555127159487, "grad_norm": 0.3584557771682739, "learning_rate": 9.213303325831458e-05, "loss": 0.5081, "step": 10789 }, { "epoch": 0.8000296581893676, "grad_norm": 0.35701784491539, "learning_rate": 9.212303075768942e-05, "loss": 0.4896, "step": 10790 }, { "epoch": 0.8001038036627864, "grad_norm": 0.34285804629325867, "learning_rate": 9.211302825706427e-05, "loss": 0.4914, "step": 10791 }, { "epoch": 0.8001779491362052, "grad_norm": 0.38003242015838623, "learning_rate": 9.21030257564391e-05, "loss": 0.4991, "step": 10792 }, { "epoch": 0.8002520946096241, "grad_norm": 0.35892683267593384, "learning_rate": 9.209302325581396e-05, "loss": 0.4995, "step": 10793 }, { "epoch": 0.800326240083043, "grad_norm": 0.3841765224933624, "learning_rate": 9.208302075518879e-05, "loss": 0.5555, "step": 10794 }, { "epoch": 0.8004003855564618, "grad_norm": 0.34980329871177673, "learning_rate": 9.207301825456364e-05, "loss": 0.4903, "step": 10795 }, { "epoch": 0.8004745310298806, "grad_norm": 0.36810246109962463, "learning_rate": 9.206301575393848e-05, "loss": 0.4909, "step": 10796 }, { "epoch": 0.8005486765032995, "grad_norm": 0.35674017667770386, "learning_rate": 9.205301325331333e-05, "loss": 0.4947, "step": 10797 }, { "epoch": 0.8006228219767183, "grad_norm": 0.3662070035934448, "learning_rate": 9.204301075268817e-05, "loss": 0.5075, "step": 10798 }, { "epoch": 0.8006969674501372, "grad_norm": 0.34290871024131775, "learning_rate": 9.203300825206302e-05, "loss": 0.4569, "step": 10799 }, { "epoch": 0.8007711129235561, "grad_norm": 0.34211164712905884, "learning_rate": 9.202300575143787e-05, "loss": 0.488, "step": 10800 }, { "epoch": 0.8008452583969748, "grad_norm": 0.37831005454063416, "learning_rate": 9.20130032508127e-05, "loss": 0.5297, "step": 10801 }, { "epoch": 0.8009194038703937, "grad_norm": 0.36378729343414307, "learning_rate": 9.200300075018756e-05, "loss": 0.4723, "step": 10802 }, { "epoch": 0.8009935493438125, "grad_norm": 0.35848450660705566, "learning_rate": 9.19929982495624e-05, "loss": 0.4765, "step": 10803 }, { "epoch": 0.8010676948172314, "grad_norm": 0.3432336151599884, "learning_rate": 9.198299574893724e-05, "loss": 0.4699, "step": 10804 }, { "epoch": 0.8011418402906503, "grad_norm": 0.36226192116737366, "learning_rate": 9.197299324831208e-05, "loss": 0.4966, "step": 10805 }, { "epoch": 0.801215985764069, "grad_norm": 0.38218939304351807, "learning_rate": 9.196299074768693e-05, "loss": 0.4958, "step": 10806 }, { "epoch": 0.8012901312374879, "grad_norm": 0.37353411316871643, "learning_rate": 9.195298824706177e-05, "loss": 0.5266, "step": 10807 }, { "epoch": 0.8013642767109068, "grad_norm": 0.3789544105529785, "learning_rate": 9.194298574643662e-05, "loss": 0.5351, "step": 10808 }, { "epoch": 0.8014384221843256, "grad_norm": 0.3481104373931885, "learning_rate": 9.193298324581146e-05, "loss": 0.5098, "step": 10809 }, { "epoch": 0.8015125676577445, "grad_norm": 0.3337341248989105, "learning_rate": 9.192298074518631e-05, "loss": 0.4455, "step": 10810 }, { "epoch": 0.8015867131311634, "grad_norm": 0.36411088705062866, "learning_rate": 9.191297824456115e-05, "loss": 0.5023, "step": 10811 }, { "epoch": 0.8016608586045821, "grad_norm": 0.34675440192222595, "learning_rate": 9.1902975743936e-05, "loss": 0.4877, "step": 10812 }, { "epoch": 0.801735004078001, "grad_norm": 0.37522047758102417, "learning_rate": 9.189297324331083e-05, "loss": 0.5095, "step": 10813 }, { "epoch": 0.8018091495514199, "grad_norm": 0.38710835576057434, "learning_rate": 9.188297074268568e-05, "loss": 0.5201, "step": 10814 }, { "epoch": 0.8018832950248387, "grad_norm": 0.4018363654613495, "learning_rate": 9.187296824206052e-05, "loss": 0.5296, "step": 10815 }, { "epoch": 0.8019574404982576, "grad_norm": 0.42017287015914917, "learning_rate": 9.186296574143536e-05, "loss": 0.5318, "step": 10816 }, { "epoch": 0.8020315859716765, "grad_norm": 0.36120495200157166, "learning_rate": 9.185296324081021e-05, "loss": 0.486, "step": 10817 }, { "epoch": 0.8021057314450952, "grad_norm": 0.3697347044944763, "learning_rate": 9.184296074018505e-05, "loss": 0.4871, "step": 10818 }, { "epoch": 0.8021798769185141, "grad_norm": 0.35714229941368103, "learning_rate": 9.18329582395599e-05, "loss": 0.4983, "step": 10819 }, { "epoch": 0.802254022391933, "grad_norm": 0.3750304877758026, "learning_rate": 9.182295573893473e-05, "loss": 0.5085, "step": 10820 }, { "epoch": 0.8023281678653518, "grad_norm": 0.36742210388183594, "learning_rate": 9.181295323830958e-05, "loss": 0.4962, "step": 10821 }, { "epoch": 0.8024023133387707, "grad_norm": 0.37872153520584106, "learning_rate": 9.180295073768442e-05, "loss": 0.5111, "step": 10822 }, { "epoch": 0.8024764588121895, "grad_norm": 0.3563788831233978, "learning_rate": 9.179294823705927e-05, "loss": 0.4676, "step": 10823 }, { "epoch": 0.8025506042856083, "grad_norm": 0.3725840449333191, "learning_rate": 9.178294573643411e-05, "loss": 0.5274, "step": 10824 }, { "epoch": 0.8026247497590272, "grad_norm": 0.3670809864997864, "learning_rate": 9.177294323580896e-05, "loss": 0.4885, "step": 10825 }, { "epoch": 0.8026988952324461, "grad_norm": 0.36512500047683716, "learning_rate": 9.17629407351838e-05, "loss": 0.5116, "step": 10826 }, { "epoch": 0.8027730407058649, "grad_norm": 0.3789636194705963, "learning_rate": 9.175293823455863e-05, "loss": 0.5233, "step": 10827 }, { "epoch": 0.8028471861792837, "grad_norm": 0.3528367877006531, "learning_rate": 9.174293573393348e-05, "loss": 0.4297, "step": 10828 }, { "epoch": 0.8029213316527026, "grad_norm": 0.35497233271598816, "learning_rate": 9.173293323330832e-05, "loss": 0.4623, "step": 10829 }, { "epoch": 0.8029954771261214, "grad_norm": 0.3821674585342407, "learning_rate": 9.172293073268317e-05, "loss": 0.5272, "step": 10830 }, { "epoch": 0.8030696225995403, "grad_norm": 0.3535051643848419, "learning_rate": 9.171292823205801e-05, "loss": 0.5042, "step": 10831 }, { "epoch": 0.8031437680729592, "grad_norm": 0.37905895709991455, "learning_rate": 9.170292573143286e-05, "loss": 0.5235, "step": 10832 }, { "epoch": 0.803217913546378, "grad_norm": 0.3474695682525635, "learning_rate": 9.169292323080771e-05, "loss": 0.4501, "step": 10833 }, { "epoch": 0.8032920590197968, "grad_norm": 0.3528017997741699, "learning_rate": 9.168292073018255e-05, "loss": 0.5001, "step": 10834 }, { "epoch": 0.8033662044932157, "grad_norm": 0.36001598834991455, "learning_rate": 9.16729182295574e-05, "loss": 0.4797, "step": 10835 }, { "epoch": 0.8034403499666345, "grad_norm": 0.39338693022727966, "learning_rate": 9.166291572893223e-05, "loss": 0.5325, "step": 10836 }, { "epoch": 0.8035144954400534, "grad_norm": 0.347903311252594, "learning_rate": 9.165291322830709e-05, "loss": 0.4934, "step": 10837 }, { "epoch": 0.8035886409134723, "grad_norm": 0.3372175395488739, "learning_rate": 9.164291072768194e-05, "loss": 0.4509, "step": 10838 }, { "epoch": 0.803662786386891, "grad_norm": 0.3705730140209198, "learning_rate": 9.163290822705677e-05, "loss": 0.5607, "step": 10839 }, { "epoch": 0.8037369318603099, "grad_norm": 0.36409756541252136, "learning_rate": 9.162290572643161e-05, "loss": 0.5033, "step": 10840 }, { "epoch": 0.8038110773337288, "grad_norm": 0.37101292610168457, "learning_rate": 9.161290322580646e-05, "loss": 0.482, "step": 10841 }, { "epoch": 0.8038852228071476, "grad_norm": 0.3667733669281006, "learning_rate": 9.16029007251813e-05, "loss": 0.4588, "step": 10842 }, { "epoch": 0.8039593682805665, "grad_norm": 0.37679681181907654, "learning_rate": 9.159289822455615e-05, "loss": 0.49, "step": 10843 }, { "epoch": 0.8040335137539854, "grad_norm": 0.3527785837650299, "learning_rate": 9.158289572393099e-05, "loss": 0.4668, "step": 10844 }, { "epoch": 0.8041076592274041, "grad_norm": 0.3910191059112549, "learning_rate": 9.157289322330584e-05, "loss": 0.5247, "step": 10845 }, { "epoch": 0.804181804700823, "grad_norm": 0.34444230794906616, "learning_rate": 9.156289072268067e-05, "loss": 0.4712, "step": 10846 }, { "epoch": 0.8042559501742419, "grad_norm": 0.3390134572982788, "learning_rate": 9.155288822205552e-05, "loss": 0.48, "step": 10847 }, { "epoch": 0.8043300956476607, "grad_norm": 0.37447071075439453, "learning_rate": 9.154288572143036e-05, "loss": 0.4933, "step": 10848 }, { "epoch": 0.8044042411210796, "grad_norm": 0.3962658941745758, "learning_rate": 9.153288322080521e-05, "loss": 0.5332, "step": 10849 }, { "epoch": 0.8044783865944984, "grad_norm": 0.3584563434123993, "learning_rate": 9.152288072018005e-05, "loss": 0.4637, "step": 10850 }, { "epoch": 0.8045525320679172, "grad_norm": 0.35255545377731323, "learning_rate": 9.15128782195549e-05, "loss": 0.4799, "step": 10851 }, { "epoch": 0.8046266775413361, "grad_norm": 0.35864993929862976, "learning_rate": 9.150287571892974e-05, "loss": 0.4867, "step": 10852 }, { "epoch": 0.804700823014755, "grad_norm": 0.364067405462265, "learning_rate": 9.149287321830457e-05, "loss": 0.4712, "step": 10853 }, { "epoch": 0.8047749684881738, "grad_norm": 0.35110723972320557, "learning_rate": 9.148287071767942e-05, "loss": 0.4575, "step": 10854 }, { "epoch": 0.8048491139615926, "grad_norm": 0.389247328042984, "learning_rate": 9.147286821705426e-05, "loss": 0.532, "step": 10855 }, { "epoch": 0.8049232594350115, "grad_norm": 0.3665666878223419, "learning_rate": 9.146286571642911e-05, "loss": 0.4699, "step": 10856 }, { "epoch": 0.8049974049084303, "grad_norm": 0.3609781563282013, "learning_rate": 9.145286321580395e-05, "loss": 0.5184, "step": 10857 }, { "epoch": 0.8050715503818492, "grad_norm": 0.3619830906391144, "learning_rate": 9.14428607151788e-05, "loss": 0.4964, "step": 10858 }, { "epoch": 0.8051456958552681, "grad_norm": 0.35170719027519226, "learning_rate": 9.143285821455364e-05, "loss": 0.4935, "step": 10859 }, { "epoch": 0.8052198413286868, "grad_norm": 0.3742431700229645, "learning_rate": 9.142285571392849e-05, "loss": 0.5017, "step": 10860 }, { "epoch": 0.8052939868021057, "grad_norm": 0.364337682723999, "learning_rate": 9.141285321330332e-05, "loss": 0.4938, "step": 10861 }, { "epoch": 0.8053681322755246, "grad_norm": 0.3698093891143799, "learning_rate": 9.140285071267818e-05, "loss": 0.4985, "step": 10862 }, { "epoch": 0.8054422777489434, "grad_norm": 0.32802867889404297, "learning_rate": 9.139284821205301e-05, "loss": 0.4798, "step": 10863 }, { "epoch": 0.8055164232223623, "grad_norm": 0.36929425597190857, "learning_rate": 9.138284571142786e-05, "loss": 0.5347, "step": 10864 }, { "epoch": 0.8055905686957812, "grad_norm": 0.3863399028778076, "learning_rate": 9.13728432108027e-05, "loss": 0.5446, "step": 10865 }, { "epoch": 0.8056647141691999, "grad_norm": 0.413947194814682, "learning_rate": 9.136284071017754e-05, "loss": 0.5513, "step": 10866 }, { "epoch": 0.8057388596426188, "grad_norm": 0.3748295307159424, "learning_rate": 9.135283820955239e-05, "loss": 0.4971, "step": 10867 }, { "epoch": 0.8058130051160377, "grad_norm": 0.36861929297447205, "learning_rate": 9.134283570892724e-05, "loss": 0.4822, "step": 10868 }, { "epoch": 0.8058871505894565, "grad_norm": 0.3535784184932709, "learning_rate": 9.133283320830208e-05, "loss": 0.4997, "step": 10869 }, { "epoch": 0.8059612960628754, "grad_norm": 0.35416311025619507, "learning_rate": 9.132283070767693e-05, "loss": 0.491, "step": 10870 }, { "epoch": 0.8060354415362943, "grad_norm": 0.36167633533477783, "learning_rate": 9.131282820705178e-05, "loss": 0.5016, "step": 10871 }, { "epoch": 0.806109587009713, "grad_norm": 0.35938847064971924, "learning_rate": 9.130282570642661e-05, "loss": 0.5084, "step": 10872 }, { "epoch": 0.8061837324831319, "grad_norm": 0.3544241786003113, "learning_rate": 9.129282320580146e-05, "loss": 0.4829, "step": 10873 }, { "epoch": 0.8062578779565508, "grad_norm": 0.35235127806663513, "learning_rate": 9.12828207051763e-05, "loss": 0.4913, "step": 10874 }, { "epoch": 0.8063320234299696, "grad_norm": 0.38658857345581055, "learning_rate": 9.127281820455115e-05, "loss": 0.5374, "step": 10875 }, { "epoch": 0.8064061689033885, "grad_norm": 0.36547279357910156, "learning_rate": 9.126281570392599e-05, "loss": 0.4988, "step": 10876 }, { "epoch": 0.8064803143768073, "grad_norm": 0.3663710057735443, "learning_rate": 9.125281320330083e-05, "loss": 0.5221, "step": 10877 }, { "epoch": 0.8065544598502261, "grad_norm": 0.36771509051322937, "learning_rate": 9.124281070267568e-05, "loss": 0.5238, "step": 10878 }, { "epoch": 0.806628605323645, "grad_norm": 0.3851507306098938, "learning_rate": 9.123280820205051e-05, "loss": 0.4857, "step": 10879 }, { "epoch": 0.8067027507970639, "grad_norm": 0.37486231327056885, "learning_rate": 9.122280570142537e-05, "loss": 0.4965, "step": 10880 }, { "epoch": 0.8067768962704827, "grad_norm": 0.36325350403785706, "learning_rate": 9.12128032008002e-05, "loss": 0.4668, "step": 10881 }, { "epoch": 0.8068510417439015, "grad_norm": 0.3609519302845001, "learning_rate": 9.120280070017505e-05, "loss": 0.4961, "step": 10882 }, { "epoch": 0.8069251872173204, "grad_norm": 0.38005396723747253, "learning_rate": 9.119279819954989e-05, "loss": 0.5499, "step": 10883 }, { "epoch": 0.8069993326907392, "grad_norm": 0.3255661129951477, "learning_rate": 9.118279569892474e-05, "loss": 0.4873, "step": 10884 }, { "epoch": 0.8070734781641581, "grad_norm": 0.3687625527381897, "learning_rate": 9.117279319829958e-05, "loss": 0.5419, "step": 10885 }, { "epoch": 0.807147623637577, "grad_norm": 0.3651282489299774, "learning_rate": 9.116279069767443e-05, "loss": 0.4916, "step": 10886 }, { "epoch": 0.8072217691109957, "grad_norm": 0.38880208134651184, "learning_rate": 9.115278819704927e-05, "loss": 0.5447, "step": 10887 }, { "epoch": 0.8072959145844146, "grad_norm": 0.3462653160095215, "learning_rate": 9.114278569642412e-05, "loss": 0.4776, "step": 10888 }, { "epoch": 0.8073700600578335, "grad_norm": 0.33544567227363586, "learning_rate": 9.113278319579895e-05, "loss": 0.4605, "step": 10889 }, { "epoch": 0.8074442055312523, "grad_norm": 0.38452935218811035, "learning_rate": 9.112278069517379e-05, "loss": 0.5458, "step": 10890 }, { "epoch": 0.8075183510046712, "grad_norm": 0.3500419855117798, "learning_rate": 9.111277819454864e-05, "loss": 0.4775, "step": 10891 }, { "epoch": 0.8075924964780901, "grad_norm": 0.3510412871837616, "learning_rate": 9.110277569392348e-05, "loss": 0.5078, "step": 10892 }, { "epoch": 0.8076666419515088, "grad_norm": 0.3695540428161621, "learning_rate": 9.109277319329833e-05, "loss": 0.5061, "step": 10893 }, { "epoch": 0.8077407874249277, "grad_norm": 0.3538913130760193, "learning_rate": 9.108277069267317e-05, "loss": 0.491, "step": 10894 }, { "epoch": 0.8078149328983466, "grad_norm": 0.35493192076683044, "learning_rate": 9.107276819204802e-05, "loss": 0.4656, "step": 10895 }, { "epoch": 0.8078890783717654, "grad_norm": 0.34581777453422546, "learning_rate": 9.106276569142285e-05, "loss": 0.4852, "step": 10896 }, { "epoch": 0.8079632238451843, "grad_norm": 0.36050575971603394, "learning_rate": 9.10527631907977e-05, "loss": 0.4743, "step": 10897 }, { "epoch": 0.8080373693186031, "grad_norm": 0.36283421516418457, "learning_rate": 9.104276069017254e-05, "loss": 0.4676, "step": 10898 }, { "epoch": 0.8081115147920219, "grad_norm": 0.37098199129104614, "learning_rate": 9.103275818954739e-05, "loss": 0.4951, "step": 10899 }, { "epoch": 0.8081856602654408, "grad_norm": 0.3605058193206787, "learning_rate": 9.102275568892223e-05, "loss": 0.4989, "step": 10900 }, { "epoch": 0.8082598057388597, "grad_norm": 0.37616294622421265, "learning_rate": 9.101275318829708e-05, "loss": 0.5397, "step": 10901 }, { "epoch": 0.8083339512122785, "grad_norm": 0.3478463888168335, "learning_rate": 9.100275068767192e-05, "loss": 0.4739, "step": 10902 }, { "epoch": 0.8084080966856974, "grad_norm": 0.3259580135345459, "learning_rate": 9.099274818704677e-05, "loss": 0.4262, "step": 10903 }, { "epoch": 0.8084822421591162, "grad_norm": 0.383256196975708, "learning_rate": 9.09827456864216e-05, "loss": 0.5326, "step": 10904 }, { "epoch": 0.808556387632535, "grad_norm": 0.38485392928123474, "learning_rate": 9.097274318579645e-05, "loss": 0.5749, "step": 10905 }, { "epoch": 0.8086305331059539, "grad_norm": 0.33821362257003784, "learning_rate": 9.09627406851713e-05, "loss": 0.4725, "step": 10906 }, { "epoch": 0.8087046785793728, "grad_norm": 0.37332263588905334, "learning_rate": 9.095273818454614e-05, "loss": 0.5167, "step": 10907 }, { "epoch": 0.8087788240527916, "grad_norm": 0.3309481143951416, "learning_rate": 9.0942735683921e-05, "loss": 0.476, "step": 10908 }, { "epoch": 0.8088529695262104, "grad_norm": 0.3690737187862396, "learning_rate": 9.093273318329583e-05, "loss": 0.4918, "step": 10909 }, { "epoch": 0.8089271149996293, "grad_norm": 0.3574572205543518, "learning_rate": 9.092273068267068e-05, "loss": 0.5028, "step": 10910 }, { "epoch": 0.8090012604730481, "grad_norm": 0.3829030394554138, "learning_rate": 9.091272818204552e-05, "loss": 0.498, "step": 10911 }, { "epoch": 0.809075405946467, "grad_norm": 0.36198729276657104, "learning_rate": 9.090272568142037e-05, "loss": 0.4788, "step": 10912 }, { "epoch": 0.8091495514198859, "grad_norm": 0.35868167877197266, "learning_rate": 9.08927231807952e-05, "loss": 0.4854, "step": 10913 }, { "epoch": 0.8092236968933046, "grad_norm": 0.37290170788764954, "learning_rate": 9.088272068017004e-05, "loss": 0.495, "step": 10914 }, { "epoch": 0.8092978423667235, "grad_norm": 0.3436721861362457, "learning_rate": 9.08727181795449e-05, "loss": 0.4485, "step": 10915 }, { "epoch": 0.8093719878401423, "grad_norm": 0.3411274254322052, "learning_rate": 9.086271567891973e-05, "loss": 0.4799, "step": 10916 }, { "epoch": 0.8094461333135612, "grad_norm": 0.35555610060691833, "learning_rate": 9.085271317829458e-05, "loss": 0.4699, "step": 10917 }, { "epoch": 0.8095202787869801, "grad_norm": 0.3534837067127228, "learning_rate": 9.084271067766942e-05, "loss": 0.482, "step": 10918 }, { "epoch": 0.8095944242603988, "grad_norm": 0.3599141538143158, "learning_rate": 9.083270817704427e-05, "loss": 0.4868, "step": 10919 }, { "epoch": 0.8096685697338177, "grad_norm": 0.3511689007282257, "learning_rate": 9.08227056764191e-05, "loss": 0.5059, "step": 10920 }, { "epoch": 0.8097427152072366, "grad_norm": 0.38241881132125854, "learning_rate": 9.081270317579396e-05, "loss": 0.5196, "step": 10921 }, { "epoch": 0.8098168606806554, "grad_norm": 0.36912479996681213, "learning_rate": 9.08027006751688e-05, "loss": 0.5038, "step": 10922 }, { "epoch": 0.8098910061540743, "grad_norm": 0.3777669370174408, "learning_rate": 9.079269817454364e-05, "loss": 0.5276, "step": 10923 }, { "epoch": 0.8099651516274932, "grad_norm": 0.3578609526157379, "learning_rate": 9.078269567391848e-05, "loss": 0.4689, "step": 10924 }, { "epoch": 0.8100392971009119, "grad_norm": 0.35369282960891724, "learning_rate": 9.077269317329333e-05, "loss": 0.4648, "step": 10925 }, { "epoch": 0.8101134425743308, "grad_norm": 0.35031837224960327, "learning_rate": 9.076269067266817e-05, "loss": 0.4682, "step": 10926 }, { "epoch": 0.8101875880477497, "grad_norm": 0.3315430283546448, "learning_rate": 9.0752688172043e-05, "loss": 0.4643, "step": 10927 }, { "epoch": 0.8102617335211685, "grad_norm": 0.3583666682243347, "learning_rate": 9.074268567141786e-05, "loss": 0.4742, "step": 10928 }, { "epoch": 0.8103358789945874, "grad_norm": 0.35927873849868774, "learning_rate": 9.07326831707927e-05, "loss": 0.4974, "step": 10929 }, { "epoch": 0.8104100244680063, "grad_norm": 0.35412442684173584, "learning_rate": 9.072268067016754e-05, "loss": 0.5376, "step": 10930 }, { "epoch": 0.810484169941425, "grad_norm": 0.3536222279071808, "learning_rate": 9.071267816954238e-05, "loss": 0.4872, "step": 10931 }, { "epoch": 0.8105583154148439, "grad_norm": 0.35189810395240784, "learning_rate": 9.070267566891723e-05, "loss": 0.4917, "step": 10932 }, { "epoch": 0.8106324608882628, "grad_norm": 0.35796597599983215, "learning_rate": 9.069267316829207e-05, "loss": 0.4978, "step": 10933 }, { "epoch": 0.8107066063616816, "grad_norm": 0.35043901205062866, "learning_rate": 9.068267066766692e-05, "loss": 0.4658, "step": 10934 }, { "epoch": 0.8107807518351005, "grad_norm": 0.3543623089790344, "learning_rate": 9.067266816704176e-05, "loss": 0.4849, "step": 10935 }, { "epoch": 0.8108548973085193, "grad_norm": 0.3270396888256073, "learning_rate": 9.066266566641661e-05, "loss": 0.4878, "step": 10936 }, { "epoch": 0.8109290427819381, "grad_norm": 0.34352219104766846, "learning_rate": 9.065266316579145e-05, "loss": 0.4768, "step": 10937 }, { "epoch": 0.811003188255357, "grad_norm": 0.3628884255886078, "learning_rate": 9.06426606651663e-05, "loss": 0.4894, "step": 10938 }, { "epoch": 0.8110773337287759, "grad_norm": 0.35685110092163086, "learning_rate": 9.063265816454115e-05, "loss": 0.4714, "step": 10939 }, { "epoch": 0.8111514792021947, "grad_norm": 0.3577975928783417, "learning_rate": 9.062265566391598e-05, "loss": 0.4822, "step": 10940 }, { "epoch": 0.8112256246756135, "grad_norm": 0.36293256282806396, "learning_rate": 9.061265316329083e-05, "loss": 0.5049, "step": 10941 }, { "epoch": 0.8112997701490324, "grad_norm": 0.34260696172714233, "learning_rate": 9.060265066266567e-05, "loss": 0.4548, "step": 10942 }, { "epoch": 0.8113739156224512, "grad_norm": 0.3618978261947632, "learning_rate": 9.059264816204052e-05, "loss": 0.4836, "step": 10943 }, { "epoch": 0.8114480610958701, "grad_norm": 0.3629687428474426, "learning_rate": 9.058264566141536e-05, "loss": 0.4994, "step": 10944 }, { "epoch": 0.811522206569289, "grad_norm": 0.35324135422706604, "learning_rate": 9.057264316079021e-05, "loss": 0.4869, "step": 10945 }, { "epoch": 0.8115963520427077, "grad_norm": 0.3709394931793213, "learning_rate": 9.056264066016505e-05, "loss": 0.499, "step": 10946 }, { "epoch": 0.8116704975161266, "grad_norm": 0.3694465160369873, "learning_rate": 9.05526381595399e-05, "loss": 0.5406, "step": 10947 }, { "epoch": 0.8117446429895455, "grad_norm": 0.3448885679244995, "learning_rate": 9.054263565891473e-05, "loss": 0.4657, "step": 10948 }, { "epoch": 0.8118187884629643, "grad_norm": 0.3931717574596405, "learning_rate": 9.053263315828959e-05, "loss": 0.5363, "step": 10949 }, { "epoch": 0.8118929339363832, "grad_norm": 0.346084326505661, "learning_rate": 9.052263065766442e-05, "loss": 0.4664, "step": 10950 }, { "epoch": 0.8119670794098021, "grad_norm": 0.3570600152015686, "learning_rate": 9.051262815703926e-05, "loss": 0.4909, "step": 10951 }, { "epoch": 0.8120412248832208, "grad_norm": 0.35587653517723083, "learning_rate": 9.050262565641411e-05, "loss": 0.4849, "step": 10952 }, { "epoch": 0.8121153703566397, "grad_norm": 0.36566105484962463, "learning_rate": 9.049262315578895e-05, "loss": 0.5532, "step": 10953 }, { "epoch": 0.8121895158300586, "grad_norm": 0.35761743783950806, "learning_rate": 9.04826206551638e-05, "loss": 0.5143, "step": 10954 }, { "epoch": 0.8122636613034774, "grad_norm": 0.35274210572242737, "learning_rate": 9.047261815453863e-05, "loss": 0.4874, "step": 10955 }, { "epoch": 0.8123378067768963, "grad_norm": 0.3555752635002136, "learning_rate": 9.046261565391349e-05, "loss": 0.4916, "step": 10956 }, { "epoch": 0.8124119522503152, "grad_norm": 0.36727848649024963, "learning_rate": 9.045261315328832e-05, "loss": 0.4924, "step": 10957 }, { "epoch": 0.8124860977237339, "grad_norm": 0.36246949434280396, "learning_rate": 9.044261065266317e-05, "loss": 0.4945, "step": 10958 }, { "epoch": 0.8125602431971528, "grad_norm": 0.35506290197372437, "learning_rate": 9.043260815203801e-05, "loss": 0.4938, "step": 10959 }, { "epoch": 0.8126343886705717, "grad_norm": 0.3602018654346466, "learning_rate": 9.042260565141286e-05, "loss": 0.4862, "step": 10960 }, { "epoch": 0.8127085341439905, "grad_norm": 0.36039307713508606, "learning_rate": 9.04126031507877e-05, "loss": 0.5036, "step": 10961 }, { "epoch": 0.8127826796174094, "grad_norm": 0.37325042486190796, "learning_rate": 9.040260065016255e-05, "loss": 0.5474, "step": 10962 }, { "epoch": 0.8128568250908282, "grad_norm": 0.3636985421180725, "learning_rate": 9.039259814953739e-05, "loss": 0.4883, "step": 10963 }, { "epoch": 0.812930970564247, "grad_norm": 0.32907891273498535, "learning_rate": 9.038259564891222e-05, "loss": 0.441, "step": 10964 }, { "epoch": 0.8130051160376659, "grad_norm": 0.33256229758262634, "learning_rate": 9.037259314828707e-05, "loss": 0.4708, "step": 10965 }, { "epoch": 0.8130792615110848, "grad_norm": 0.3806195855140686, "learning_rate": 9.036259064766191e-05, "loss": 0.5403, "step": 10966 }, { "epoch": 0.8131534069845036, "grad_norm": 0.35167402029037476, "learning_rate": 9.035258814703676e-05, "loss": 0.4801, "step": 10967 }, { "epoch": 0.8132275524579224, "grad_norm": 0.37340056896209717, "learning_rate": 9.03425856464116e-05, "loss": 0.5463, "step": 10968 }, { "epoch": 0.8133016979313413, "grad_norm": 0.3504984974861145, "learning_rate": 9.033258314578645e-05, "loss": 0.503, "step": 10969 }, { "epoch": 0.8133758434047601, "grad_norm": 0.3491746485233307, "learning_rate": 9.032258064516129e-05, "loss": 0.4988, "step": 10970 }, { "epoch": 0.813449988878179, "grad_norm": 0.36839917302131653, "learning_rate": 9.031257814453614e-05, "loss": 0.5076, "step": 10971 }, { "epoch": 0.8135241343515979, "grad_norm": 0.37765204906463623, "learning_rate": 9.030257564391099e-05, "loss": 0.5044, "step": 10972 }, { "epoch": 0.8135982798250166, "grad_norm": 0.3804789185523987, "learning_rate": 9.029257314328582e-05, "loss": 0.5227, "step": 10973 }, { "epoch": 0.8136724252984355, "grad_norm": 0.34362196922302246, "learning_rate": 9.028257064266067e-05, "loss": 0.4876, "step": 10974 }, { "epoch": 0.8137465707718544, "grad_norm": 0.3465958535671234, "learning_rate": 9.027256814203551e-05, "loss": 0.4927, "step": 10975 }, { "epoch": 0.8138207162452732, "grad_norm": 0.3360372483730316, "learning_rate": 9.026256564141036e-05, "loss": 0.4677, "step": 10976 }, { "epoch": 0.8138948617186921, "grad_norm": 0.373473584651947, "learning_rate": 9.02525631407852e-05, "loss": 0.4953, "step": 10977 }, { "epoch": 0.813969007192111, "grad_norm": 0.34313592314720154, "learning_rate": 9.024256064016005e-05, "loss": 0.4563, "step": 10978 }, { "epoch": 0.8140431526655297, "grad_norm": 0.36074113845825195, "learning_rate": 9.023255813953489e-05, "loss": 0.5134, "step": 10979 }, { "epoch": 0.8141172981389486, "grad_norm": 0.3482445776462555, "learning_rate": 9.022255563890974e-05, "loss": 0.4788, "step": 10980 }, { "epoch": 0.8141914436123675, "grad_norm": 0.3589150309562683, "learning_rate": 9.021255313828458e-05, "loss": 0.5371, "step": 10981 }, { "epoch": 0.8142655890857863, "grad_norm": 0.3698323667049408, "learning_rate": 9.020255063765943e-05, "loss": 0.5299, "step": 10982 }, { "epoch": 0.8143397345592052, "grad_norm": 0.3583366870880127, "learning_rate": 9.019254813703426e-05, "loss": 0.5292, "step": 10983 }, { "epoch": 0.814413880032624, "grad_norm": 0.3521531820297241, "learning_rate": 9.018254563640911e-05, "loss": 0.4706, "step": 10984 }, { "epoch": 0.8144880255060428, "grad_norm": 0.35443010926246643, "learning_rate": 9.017254313578395e-05, "loss": 0.53, "step": 10985 }, { "epoch": 0.8145621709794617, "grad_norm": 0.34179195761680603, "learning_rate": 9.01625406351588e-05, "loss": 0.4948, "step": 10986 }, { "epoch": 0.8146363164528806, "grad_norm": 0.362565815448761, "learning_rate": 9.015253813453364e-05, "loss": 0.4515, "step": 10987 }, { "epoch": 0.8147104619262994, "grad_norm": 0.33210280537605286, "learning_rate": 9.014253563390848e-05, "loss": 0.4604, "step": 10988 }, { "epoch": 0.8147846073997183, "grad_norm": 0.3848990201950073, "learning_rate": 9.013253313328333e-05, "loss": 0.5011, "step": 10989 }, { "epoch": 0.8148587528731371, "grad_norm": 0.36849939823150635, "learning_rate": 9.012253063265816e-05, "loss": 0.5211, "step": 10990 }, { "epoch": 0.8149328983465559, "grad_norm": 0.35602179169654846, "learning_rate": 9.011252813203301e-05, "loss": 0.4761, "step": 10991 }, { "epoch": 0.8150070438199748, "grad_norm": 0.3878301680088043, "learning_rate": 9.010252563140785e-05, "loss": 0.5345, "step": 10992 }, { "epoch": 0.8150811892933937, "grad_norm": 0.3529108166694641, "learning_rate": 9.00925231307827e-05, "loss": 0.4766, "step": 10993 }, { "epoch": 0.8151553347668125, "grad_norm": 0.3599037826061249, "learning_rate": 9.008252063015754e-05, "loss": 0.5341, "step": 10994 }, { "epoch": 0.8152294802402313, "grad_norm": 0.3502562642097473, "learning_rate": 9.007251812953239e-05, "loss": 0.4567, "step": 10995 }, { "epoch": 0.8153036257136502, "grad_norm": 0.3511078655719757, "learning_rate": 9.006251562890723e-05, "loss": 0.4881, "step": 10996 }, { "epoch": 0.815377771187069, "grad_norm": 0.3607335686683655, "learning_rate": 9.005251312828208e-05, "loss": 0.5015, "step": 10997 }, { "epoch": 0.8154519166604879, "grad_norm": 0.37251144647598267, "learning_rate": 9.004251062765691e-05, "loss": 0.5008, "step": 10998 }, { "epoch": 0.8155260621339068, "grad_norm": 0.35037457942962646, "learning_rate": 9.003250812703176e-05, "loss": 0.5004, "step": 10999 }, { "epoch": 0.8156002076073255, "grad_norm": 0.3719036281108856, "learning_rate": 9.00225056264066e-05, "loss": 0.5147, "step": 11000 }, { "epoch": 0.8156743530807444, "grad_norm": 0.3650340437889099, "learning_rate": 9.001250312578144e-05, "loss": 0.5241, "step": 11001 }, { "epoch": 0.8157484985541633, "grad_norm": 0.35595017671585083, "learning_rate": 9.000250062515629e-05, "loss": 0.4642, "step": 11002 }, { "epoch": 0.8158226440275821, "grad_norm": 0.3795633316040039, "learning_rate": 8.999249812453113e-05, "loss": 0.4933, "step": 11003 }, { "epoch": 0.815896789501001, "grad_norm": 0.3360164165496826, "learning_rate": 8.998249562390598e-05, "loss": 0.4702, "step": 11004 }, { "epoch": 0.8159709349744199, "grad_norm": 0.3836157023906708, "learning_rate": 8.997249312328081e-05, "loss": 0.5271, "step": 11005 }, { "epoch": 0.8160450804478386, "grad_norm": 0.3738442063331604, "learning_rate": 8.996249062265567e-05, "loss": 0.5041, "step": 11006 }, { "epoch": 0.8161192259212575, "grad_norm": 0.35283246636390686, "learning_rate": 8.995248812203052e-05, "loss": 0.488, "step": 11007 }, { "epoch": 0.8161933713946764, "grad_norm": 0.3518413007259369, "learning_rate": 8.994248562140535e-05, "loss": 0.4783, "step": 11008 }, { "epoch": 0.8162675168680952, "grad_norm": 0.3596951365470886, "learning_rate": 8.99324831207802e-05, "loss": 0.4645, "step": 11009 }, { "epoch": 0.8163416623415141, "grad_norm": 0.3476240932941437, "learning_rate": 8.992248062015505e-05, "loss": 0.5198, "step": 11010 }, { "epoch": 0.816415807814933, "grad_norm": 0.3454711139202118, "learning_rate": 8.991247811952989e-05, "loss": 0.4851, "step": 11011 }, { "epoch": 0.8164899532883517, "grad_norm": 0.3632952868938446, "learning_rate": 8.990247561890474e-05, "loss": 0.4786, "step": 11012 }, { "epoch": 0.8165640987617706, "grad_norm": 0.36777424812316895, "learning_rate": 8.989247311827958e-05, "loss": 0.5376, "step": 11013 }, { "epoch": 0.8166382442351895, "grad_norm": 0.3804871439933777, "learning_rate": 8.988247061765442e-05, "loss": 0.525, "step": 11014 }, { "epoch": 0.8167123897086083, "grad_norm": 0.36839261651039124, "learning_rate": 8.987246811702927e-05, "loss": 0.478, "step": 11015 }, { "epoch": 0.8167865351820272, "grad_norm": 0.36022257804870605, "learning_rate": 8.98624656164041e-05, "loss": 0.4998, "step": 11016 }, { "epoch": 0.816860680655446, "grad_norm": 0.3601973354816437, "learning_rate": 8.985246311577895e-05, "loss": 0.5197, "step": 11017 }, { "epoch": 0.8169348261288648, "grad_norm": 0.3527315557003021, "learning_rate": 8.984246061515379e-05, "loss": 0.4584, "step": 11018 }, { "epoch": 0.8170089716022837, "grad_norm": 0.3830864429473877, "learning_rate": 8.983245811452864e-05, "loss": 0.5384, "step": 11019 }, { "epoch": 0.8170831170757026, "grad_norm": 0.3552989065647125, "learning_rate": 8.982245561390348e-05, "loss": 0.4816, "step": 11020 }, { "epoch": 0.8171572625491214, "grad_norm": 0.3571743965148926, "learning_rate": 8.981245311327833e-05, "loss": 0.5081, "step": 11021 }, { "epoch": 0.8172314080225402, "grad_norm": 0.35528379678726196, "learning_rate": 8.980245061265317e-05, "loss": 0.4895, "step": 11022 }, { "epoch": 0.8173055534959591, "grad_norm": 0.33101004362106323, "learning_rate": 8.979244811202802e-05, "loss": 0.4472, "step": 11023 }, { "epoch": 0.8173796989693779, "grad_norm": 0.35229432582855225, "learning_rate": 8.978244561140285e-05, "loss": 0.5073, "step": 11024 }, { "epoch": 0.8174538444427968, "grad_norm": 0.36946582794189453, "learning_rate": 8.97724431107777e-05, "loss": 0.5257, "step": 11025 }, { "epoch": 0.8175279899162157, "grad_norm": 0.36930981278419495, "learning_rate": 8.976244061015254e-05, "loss": 0.5133, "step": 11026 }, { "epoch": 0.8176021353896344, "grad_norm": 0.3592629134654999, "learning_rate": 8.975243810952738e-05, "loss": 0.4949, "step": 11027 }, { "epoch": 0.8176762808630533, "grad_norm": 0.3935692310333252, "learning_rate": 8.974243560890223e-05, "loss": 0.5139, "step": 11028 }, { "epoch": 0.8177504263364722, "grad_norm": 0.3612435758113861, "learning_rate": 8.973243310827707e-05, "loss": 0.4947, "step": 11029 }, { "epoch": 0.817824571809891, "grad_norm": 0.34095683693885803, "learning_rate": 8.972243060765192e-05, "loss": 0.4674, "step": 11030 }, { "epoch": 0.8178987172833099, "grad_norm": 0.36413049697875977, "learning_rate": 8.971242810702675e-05, "loss": 0.5257, "step": 11031 }, { "epoch": 0.8179728627567286, "grad_norm": 0.3554422855377197, "learning_rate": 8.97024256064016e-05, "loss": 0.5131, "step": 11032 }, { "epoch": 0.8180470082301475, "grad_norm": 0.34016022086143494, "learning_rate": 8.969242310577644e-05, "loss": 0.4452, "step": 11033 }, { "epoch": 0.8181211537035664, "grad_norm": 0.3652435541152954, "learning_rate": 8.96824206051513e-05, "loss": 0.4861, "step": 11034 }, { "epoch": 0.8181952991769852, "grad_norm": 0.37418898940086365, "learning_rate": 8.967241810452613e-05, "loss": 0.5192, "step": 11035 }, { "epoch": 0.8182694446504041, "grad_norm": 0.37145930528640747, "learning_rate": 8.966241560390098e-05, "loss": 0.5145, "step": 11036 }, { "epoch": 0.818343590123823, "grad_norm": 0.3473862409591675, "learning_rate": 8.965241310327582e-05, "loss": 0.4861, "step": 11037 }, { "epoch": 0.8184177355972417, "grad_norm": 0.3464545011520386, "learning_rate": 8.964241060265066e-05, "loss": 0.4653, "step": 11038 }, { "epoch": 0.8184918810706606, "grad_norm": 0.3634180724620819, "learning_rate": 8.96324081020255e-05, "loss": 0.5161, "step": 11039 }, { "epoch": 0.8185660265440795, "grad_norm": 0.37199926376342773, "learning_rate": 8.962240560140036e-05, "loss": 0.5317, "step": 11040 }, { "epoch": 0.8186401720174983, "grad_norm": 0.3871135413646698, "learning_rate": 8.96124031007752e-05, "loss": 0.5059, "step": 11041 }, { "epoch": 0.8187143174909172, "grad_norm": 0.33924174308776855, "learning_rate": 8.960240060015004e-05, "loss": 0.481, "step": 11042 }, { "epoch": 0.818788462964336, "grad_norm": 0.34578830003738403, "learning_rate": 8.959239809952488e-05, "loss": 0.4635, "step": 11043 }, { "epoch": 0.8188626084377548, "grad_norm": 0.36511361598968506, "learning_rate": 8.958239559889973e-05, "loss": 0.4808, "step": 11044 }, { "epoch": 0.8189367539111737, "grad_norm": 0.3531060218811035, "learning_rate": 8.957239309827458e-05, "loss": 0.4657, "step": 11045 }, { "epoch": 0.8190108993845926, "grad_norm": 0.3566177189350128, "learning_rate": 8.956239059764942e-05, "loss": 0.497, "step": 11046 }, { "epoch": 0.8190850448580114, "grad_norm": 0.355114609003067, "learning_rate": 8.955238809702427e-05, "loss": 0.5094, "step": 11047 }, { "epoch": 0.8191591903314303, "grad_norm": 0.3829163908958435, "learning_rate": 8.954238559639911e-05, "loss": 0.5532, "step": 11048 }, { "epoch": 0.8192333358048491, "grad_norm": 0.37396717071533203, "learning_rate": 8.953238309577396e-05, "loss": 0.5211, "step": 11049 }, { "epoch": 0.8193074812782679, "grad_norm": 0.37553492188453674, "learning_rate": 8.95223805951488e-05, "loss": 0.5175, "step": 11050 }, { "epoch": 0.8193816267516868, "grad_norm": 0.33505871891975403, "learning_rate": 8.951237809452363e-05, "loss": 0.4664, "step": 11051 }, { "epoch": 0.8194557722251057, "grad_norm": 0.3507862091064453, "learning_rate": 8.950237559389848e-05, "loss": 0.4941, "step": 11052 }, { "epoch": 0.8195299176985245, "grad_norm": 0.33842554688453674, "learning_rate": 8.949237309327332e-05, "loss": 0.451, "step": 11053 }, { "epoch": 0.8196040631719433, "grad_norm": 0.38720598816871643, "learning_rate": 8.948237059264817e-05, "loss": 0.5356, "step": 11054 }, { "epoch": 0.8196782086453622, "grad_norm": 0.3651324510574341, "learning_rate": 8.947236809202301e-05, "loss": 0.4769, "step": 11055 }, { "epoch": 0.819752354118781, "grad_norm": 0.3792618215084076, "learning_rate": 8.946236559139786e-05, "loss": 0.5146, "step": 11056 }, { "epoch": 0.8198264995921999, "grad_norm": 0.3681706190109253, "learning_rate": 8.94523630907727e-05, "loss": 0.5084, "step": 11057 }, { "epoch": 0.8199006450656188, "grad_norm": 0.3632797598838806, "learning_rate": 8.944236059014755e-05, "loss": 0.5103, "step": 11058 }, { "epoch": 0.8199747905390375, "grad_norm": 0.36028486490249634, "learning_rate": 8.943235808952238e-05, "loss": 0.5002, "step": 11059 }, { "epoch": 0.8200489360124564, "grad_norm": 0.35967394709587097, "learning_rate": 8.942235558889723e-05, "loss": 0.4911, "step": 11060 }, { "epoch": 0.8201230814858753, "grad_norm": 0.3551005423069, "learning_rate": 8.941235308827207e-05, "loss": 0.4932, "step": 11061 }, { "epoch": 0.8201972269592941, "grad_norm": 0.3592704236507416, "learning_rate": 8.940235058764692e-05, "loss": 0.489, "step": 11062 }, { "epoch": 0.820271372432713, "grad_norm": 0.3519846200942993, "learning_rate": 8.939234808702176e-05, "loss": 0.4623, "step": 11063 }, { "epoch": 0.8203455179061319, "grad_norm": 0.35809212923049927, "learning_rate": 8.93823455863966e-05, "loss": 0.487, "step": 11064 }, { "epoch": 0.8204196633795506, "grad_norm": 0.3741070628166199, "learning_rate": 8.937234308577145e-05, "loss": 0.5141, "step": 11065 }, { "epoch": 0.8204938088529695, "grad_norm": 0.36201950907707214, "learning_rate": 8.936234058514628e-05, "loss": 0.4946, "step": 11066 }, { "epoch": 0.8205679543263884, "grad_norm": 0.36369001865386963, "learning_rate": 8.935233808452113e-05, "loss": 0.5038, "step": 11067 }, { "epoch": 0.8206420997998072, "grad_norm": 0.3646072447299957, "learning_rate": 8.934233558389597e-05, "loss": 0.5205, "step": 11068 }, { "epoch": 0.8207162452732261, "grad_norm": 0.37541288137435913, "learning_rate": 8.933233308327082e-05, "loss": 0.5334, "step": 11069 }, { "epoch": 0.820790390746645, "grad_norm": 0.3894508183002472, "learning_rate": 8.932233058264566e-05, "loss": 0.5491, "step": 11070 }, { "epoch": 0.8208645362200637, "grad_norm": 0.3700951337814331, "learning_rate": 8.931232808202051e-05, "loss": 0.5187, "step": 11071 }, { "epoch": 0.8209386816934826, "grad_norm": 0.34121808409690857, "learning_rate": 8.930232558139535e-05, "loss": 0.4329, "step": 11072 }, { "epoch": 0.8210128271669015, "grad_norm": 0.38097083568573, "learning_rate": 8.92923230807702e-05, "loss": 0.4881, "step": 11073 }, { "epoch": 0.8210869726403203, "grad_norm": 0.3634990453720093, "learning_rate": 8.928232058014503e-05, "loss": 0.5139, "step": 11074 }, { "epoch": 0.8211611181137392, "grad_norm": 0.33184128999710083, "learning_rate": 8.927231807951989e-05, "loss": 0.4486, "step": 11075 }, { "epoch": 0.821235263587158, "grad_norm": 0.3489062786102295, "learning_rate": 8.926231557889472e-05, "loss": 0.4619, "step": 11076 }, { "epoch": 0.8213094090605768, "grad_norm": 0.3428463041782379, "learning_rate": 8.925231307826957e-05, "loss": 0.4673, "step": 11077 }, { "epoch": 0.8213835545339957, "grad_norm": 0.3610955476760864, "learning_rate": 8.924231057764442e-05, "loss": 0.5013, "step": 11078 }, { "epoch": 0.8214577000074146, "grad_norm": 0.37489306926727295, "learning_rate": 8.923230807701926e-05, "loss": 0.5076, "step": 11079 }, { "epoch": 0.8215318454808334, "grad_norm": 0.3664226531982422, "learning_rate": 8.922230557639411e-05, "loss": 0.4649, "step": 11080 }, { "epoch": 0.8216059909542522, "grad_norm": 0.33490318059921265, "learning_rate": 8.921230307576895e-05, "loss": 0.4714, "step": 11081 }, { "epoch": 0.8216801364276711, "grad_norm": 0.3703753352165222, "learning_rate": 8.92023005751438e-05, "loss": 0.5391, "step": 11082 }, { "epoch": 0.8217542819010899, "grad_norm": 0.35744616389274597, "learning_rate": 8.919229807451864e-05, "loss": 0.5239, "step": 11083 }, { "epoch": 0.8218284273745088, "grad_norm": 0.38508161902427673, "learning_rate": 8.918229557389349e-05, "loss": 0.4847, "step": 11084 }, { "epoch": 0.8219025728479277, "grad_norm": 0.3471069037914276, "learning_rate": 8.917229307326832e-05, "loss": 0.4683, "step": 11085 }, { "epoch": 0.8219767183213464, "grad_norm": 0.3677787184715271, "learning_rate": 8.916229057264317e-05, "loss": 0.4935, "step": 11086 }, { "epoch": 0.8220508637947653, "grad_norm": 0.34300699830055237, "learning_rate": 8.915228807201801e-05, "loss": 0.4972, "step": 11087 }, { "epoch": 0.8221250092681842, "grad_norm": 0.3703146278858185, "learning_rate": 8.914228557139285e-05, "loss": 0.5056, "step": 11088 }, { "epoch": 0.822199154741603, "grad_norm": 0.3671397864818573, "learning_rate": 8.91322830707677e-05, "loss": 0.4894, "step": 11089 }, { "epoch": 0.8222733002150219, "grad_norm": 0.3728916049003601, "learning_rate": 8.912228057014254e-05, "loss": 0.5523, "step": 11090 }, { "epoch": 0.8223474456884408, "grad_norm": 0.358662486076355, "learning_rate": 8.911227806951739e-05, "loss": 0.5088, "step": 11091 }, { "epoch": 0.8224215911618595, "grad_norm": 0.36662107706069946, "learning_rate": 8.910227556889222e-05, "loss": 0.5298, "step": 11092 }, { "epoch": 0.8224957366352784, "grad_norm": 0.3377799391746521, "learning_rate": 8.909227306826707e-05, "loss": 0.4577, "step": 11093 }, { "epoch": 0.8225698821086973, "grad_norm": 0.39906662702560425, "learning_rate": 8.908227056764191e-05, "loss": 0.5148, "step": 11094 }, { "epoch": 0.8226440275821161, "grad_norm": 0.3584776222705841, "learning_rate": 8.907226806701676e-05, "loss": 0.5039, "step": 11095 }, { "epoch": 0.822718173055535, "grad_norm": 0.33703184127807617, "learning_rate": 8.90622655663916e-05, "loss": 0.4542, "step": 11096 }, { "epoch": 0.8227923185289538, "grad_norm": 0.33966344594955444, "learning_rate": 8.905226306576645e-05, "loss": 0.4519, "step": 11097 }, { "epoch": 0.8228664640023726, "grad_norm": 0.3582284152507782, "learning_rate": 8.904226056514129e-05, "loss": 0.5067, "step": 11098 }, { "epoch": 0.8229406094757915, "grad_norm": 0.3905051350593567, "learning_rate": 8.903225806451614e-05, "loss": 0.5237, "step": 11099 }, { "epoch": 0.8230147549492104, "grad_norm": 0.36678963899612427, "learning_rate": 8.902225556389097e-05, "loss": 0.48, "step": 11100 }, { "epoch": 0.8230889004226292, "grad_norm": 0.3870273530483246, "learning_rate": 8.901225306326581e-05, "loss": 0.5014, "step": 11101 }, { "epoch": 0.823163045896048, "grad_norm": 0.3548330068588257, "learning_rate": 8.900225056264066e-05, "loss": 0.5147, "step": 11102 }, { "epoch": 0.8232371913694669, "grad_norm": 0.3534836769104004, "learning_rate": 8.89922480620155e-05, "loss": 0.4887, "step": 11103 }, { "epoch": 0.8233113368428857, "grad_norm": 0.38357779383659363, "learning_rate": 8.898224556139035e-05, "loss": 0.5388, "step": 11104 }, { "epoch": 0.8233854823163046, "grad_norm": 0.3597860336303711, "learning_rate": 8.897224306076519e-05, "loss": 0.4815, "step": 11105 }, { "epoch": 0.8234596277897235, "grad_norm": 0.3417215943336487, "learning_rate": 8.896224056014004e-05, "loss": 0.4905, "step": 11106 }, { "epoch": 0.8235337732631423, "grad_norm": 0.37987905740737915, "learning_rate": 8.895223805951488e-05, "loss": 0.5317, "step": 11107 }, { "epoch": 0.8236079187365611, "grad_norm": 0.34785112738609314, "learning_rate": 8.894223555888973e-05, "loss": 0.4745, "step": 11108 }, { "epoch": 0.82368206420998, "grad_norm": 0.35152193903923035, "learning_rate": 8.893223305826456e-05, "loss": 0.4634, "step": 11109 }, { "epoch": 0.8237562096833988, "grad_norm": 0.35819557309150696, "learning_rate": 8.892223055763941e-05, "loss": 0.4932, "step": 11110 }, { "epoch": 0.8238303551568177, "grad_norm": 0.3875444531440735, "learning_rate": 8.891222805701425e-05, "loss": 0.5309, "step": 11111 }, { "epoch": 0.8239045006302366, "grad_norm": 0.3756196200847626, "learning_rate": 8.89022255563891e-05, "loss": 0.5532, "step": 11112 }, { "epoch": 0.8239786461036553, "grad_norm": 0.3676479458808899, "learning_rate": 8.889222305576395e-05, "loss": 0.4887, "step": 11113 }, { "epoch": 0.8240527915770742, "grad_norm": 0.3847915530204773, "learning_rate": 8.888222055513879e-05, "loss": 0.5387, "step": 11114 }, { "epoch": 0.8241269370504931, "grad_norm": 0.37046194076538086, "learning_rate": 8.887221805451364e-05, "loss": 0.4925, "step": 11115 }, { "epoch": 0.8242010825239119, "grad_norm": 0.347919762134552, "learning_rate": 8.886221555388848e-05, "loss": 0.4876, "step": 11116 }, { "epoch": 0.8242752279973308, "grad_norm": 0.34964749217033386, "learning_rate": 8.885221305326333e-05, "loss": 0.527, "step": 11117 }, { "epoch": 0.8243493734707497, "grad_norm": 0.3607200086116791, "learning_rate": 8.884221055263816e-05, "loss": 0.5051, "step": 11118 }, { "epoch": 0.8244235189441684, "grad_norm": 0.3587793707847595, "learning_rate": 8.883220805201302e-05, "loss": 0.5256, "step": 11119 }, { "epoch": 0.8244976644175873, "grad_norm": 0.37408339977264404, "learning_rate": 8.882220555138785e-05, "loss": 0.5048, "step": 11120 }, { "epoch": 0.8245718098910062, "grad_norm": 0.37271884083747864, "learning_rate": 8.88122030507627e-05, "loss": 0.4972, "step": 11121 }, { "epoch": 0.824645955364425, "grad_norm": 0.343822181224823, "learning_rate": 8.880220055013754e-05, "loss": 0.4981, "step": 11122 }, { "epoch": 0.8247201008378439, "grad_norm": 0.3593316674232483, "learning_rate": 8.879219804951239e-05, "loss": 0.5011, "step": 11123 }, { "epoch": 0.8247942463112627, "grad_norm": 0.35988447070121765, "learning_rate": 8.878219554888723e-05, "loss": 0.5221, "step": 11124 }, { "epoch": 0.8248683917846815, "grad_norm": 0.35350626707077026, "learning_rate": 8.877219304826206e-05, "loss": 0.5029, "step": 11125 }, { "epoch": 0.8249425372581004, "grad_norm": 0.3326171934604645, "learning_rate": 8.876219054763692e-05, "loss": 0.4573, "step": 11126 }, { "epoch": 0.8250166827315193, "grad_norm": 0.3717656433582306, "learning_rate": 8.875218804701175e-05, "loss": 0.5601, "step": 11127 }, { "epoch": 0.8250908282049381, "grad_norm": 0.3443772494792938, "learning_rate": 8.87421855463866e-05, "loss": 0.4835, "step": 11128 }, { "epoch": 0.825164973678357, "grad_norm": 0.3587319850921631, "learning_rate": 8.873218304576144e-05, "loss": 0.4862, "step": 11129 }, { "epoch": 0.8252391191517758, "grad_norm": 0.3624648451805115, "learning_rate": 8.872218054513629e-05, "loss": 0.5083, "step": 11130 }, { "epoch": 0.8253132646251946, "grad_norm": 0.3776462972164154, "learning_rate": 8.871217804451113e-05, "loss": 0.5297, "step": 11131 }, { "epoch": 0.8253874100986135, "grad_norm": 0.38195720314979553, "learning_rate": 8.870217554388598e-05, "loss": 0.5518, "step": 11132 }, { "epoch": 0.8254615555720324, "grad_norm": 0.3627121150493622, "learning_rate": 8.869217304326082e-05, "loss": 0.4901, "step": 11133 }, { "epoch": 0.8255357010454512, "grad_norm": 0.34242942929267883, "learning_rate": 8.868217054263567e-05, "loss": 0.4951, "step": 11134 }, { "epoch": 0.82560984651887, "grad_norm": 0.366993248462677, "learning_rate": 8.86721680420105e-05, "loss": 0.4914, "step": 11135 }, { "epoch": 0.8256839919922889, "grad_norm": 0.3695486783981323, "learning_rate": 8.866216554138535e-05, "loss": 0.4666, "step": 11136 }, { "epoch": 0.8257581374657077, "grad_norm": 0.3599245250225067, "learning_rate": 8.865216304076019e-05, "loss": 0.5241, "step": 11137 }, { "epoch": 0.8258322829391266, "grad_norm": 0.37447845935821533, "learning_rate": 8.864216054013503e-05, "loss": 0.5271, "step": 11138 }, { "epoch": 0.8259064284125455, "grad_norm": 0.3524257242679596, "learning_rate": 8.863215803950988e-05, "loss": 0.4957, "step": 11139 }, { "epoch": 0.8259805738859642, "grad_norm": 0.35985565185546875, "learning_rate": 8.862215553888472e-05, "loss": 0.5263, "step": 11140 }, { "epoch": 0.8260547193593831, "grad_norm": 0.3455909192562103, "learning_rate": 8.861215303825957e-05, "loss": 0.4724, "step": 11141 }, { "epoch": 0.826128864832802, "grad_norm": 0.3646751940250397, "learning_rate": 8.86021505376344e-05, "loss": 0.5004, "step": 11142 }, { "epoch": 0.8262030103062208, "grad_norm": 0.3564991056919098, "learning_rate": 8.859214803700925e-05, "loss": 0.4946, "step": 11143 }, { "epoch": 0.8262771557796397, "grad_norm": 0.36784377694129944, "learning_rate": 8.858214553638409e-05, "loss": 0.5338, "step": 11144 }, { "epoch": 0.8263513012530584, "grad_norm": 0.3581027090549469, "learning_rate": 8.857214303575894e-05, "loss": 0.4951, "step": 11145 }, { "epoch": 0.8264254467264773, "grad_norm": 0.37536945939064026, "learning_rate": 8.856214053513379e-05, "loss": 0.4782, "step": 11146 }, { "epoch": 0.8264995921998962, "grad_norm": 0.35677891969680786, "learning_rate": 8.855213803450863e-05, "loss": 0.4838, "step": 11147 }, { "epoch": 0.826573737673315, "grad_norm": 0.36610057950019836, "learning_rate": 8.854213553388348e-05, "loss": 0.5026, "step": 11148 }, { "epoch": 0.8266478831467339, "grad_norm": 0.38459181785583496, "learning_rate": 8.853213303325832e-05, "loss": 0.5292, "step": 11149 }, { "epoch": 0.8267220286201528, "grad_norm": 0.3676813840866089, "learning_rate": 8.852213053263317e-05, "loss": 0.517, "step": 11150 }, { "epoch": 0.8267961740935715, "grad_norm": 0.340283066034317, "learning_rate": 8.8512128032008e-05, "loss": 0.4542, "step": 11151 }, { "epoch": 0.8268703195669904, "grad_norm": 0.3647754192352295, "learning_rate": 8.850212553138286e-05, "loss": 0.4884, "step": 11152 }, { "epoch": 0.8269444650404093, "grad_norm": 0.3686801791191101, "learning_rate": 8.849212303075769e-05, "loss": 0.489, "step": 11153 }, { "epoch": 0.8270186105138281, "grad_norm": 0.34986820816993713, "learning_rate": 8.848212053013254e-05, "loss": 0.4667, "step": 11154 }, { "epoch": 0.827092755987247, "grad_norm": 0.34824177622795105, "learning_rate": 8.847211802950738e-05, "loss": 0.4915, "step": 11155 }, { "epoch": 0.8271669014606658, "grad_norm": 0.3638366758823395, "learning_rate": 8.846211552888223e-05, "loss": 0.5039, "step": 11156 }, { "epoch": 0.8272410469340846, "grad_norm": 0.35544082522392273, "learning_rate": 8.845211302825707e-05, "loss": 0.4421, "step": 11157 }, { "epoch": 0.8273151924075035, "grad_norm": 0.3901078402996063, "learning_rate": 8.844211052763192e-05, "loss": 0.5207, "step": 11158 }, { "epoch": 0.8273893378809224, "grad_norm": 0.35916668176651, "learning_rate": 8.843210802700676e-05, "loss": 0.5208, "step": 11159 }, { "epoch": 0.8274634833543412, "grad_norm": 0.38134610652923584, "learning_rate": 8.842210552638161e-05, "loss": 0.523, "step": 11160 }, { "epoch": 0.82753762882776, "grad_norm": 0.3266731798648834, "learning_rate": 8.841210302575644e-05, "loss": 0.4695, "step": 11161 }, { "epoch": 0.8276117743011789, "grad_norm": 0.3747055232524872, "learning_rate": 8.840210052513128e-05, "loss": 0.5089, "step": 11162 }, { "epoch": 0.8276859197745977, "grad_norm": 0.37103012204170227, "learning_rate": 8.839209802450613e-05, "loss": 0.4896, "step": 11163 }, { "epoch": 0.8277600652480166, "grad_norm": 0.3570898175239563, "learning_rate": 8.838209552388097e-05, "loss": 0.4968, "step": 11164 }, { "epoch": 0.8278342107214355, "grad_norm": 0.34902554750442505, "learning_rate": 8.837209302325582e-05, "loss": 0.4759, "step": 11165 }, { "epoch": 0.8279083561948543, "grad_norm": 0.3642508089542389, "learning_rate": 8.836209052263066e-05, "loss": 0.4891, "step": 11166 }, { "epoch": 0.8279825016682731, "grad_norm": 0.37135952711105347, "learning_rate": 8.835208802200551e-05, "loss": 0.4915, "step": 11167 }, { "epoch": 0.828056647141692, "grad_norm": 0.3995223045349121, "learning_rate": 8.834208552138034e-05, "loss": 0.4749, "step": 11168 }, { "epoch": 0.8281307926151108, "grad_norm": 0.38534054160118103, "learning_rate": 8.83320830207552e-05, "loss": 0.549, "step": 11169 }, { "epoch": 0.8282049380885297, "grad_norm": 0.36791178584098816, "learning_rate": 8.832208052013003e-05, "loss": 0.5283, "step": 11170 }, { "epoch": 0.8282790835619486, "grad_norm": 0.3471795320510864, "learning_rate": 8.831207801950488e-05, "loss": 0.4571, "step": 11171 }, { "epoch": 0.8283532290353673, "grad_norm": 0.34981098771095276, "learning_rate": 8.830207551887972e-05, "loss": 0.4649, "step": 11172 }, { "epoch": 0.8284273745087862, "grad_norm": 0.353819876909256, "learning_rate": 8.829207301825457e-05, "loss": 0.477, "step": 11173 }, { "epoch": 0.8285015199822051, "grad_norm": 0.36787697672843933, "learning_rate": 8.828207051762941e-05, "loss": 0.5055, "step": 11174 }, { "epoch": 0.8285756654556239, "grad_norm": 0.39683422446250916, "learning_rate": 8.827206801700424e-05, "loss": 0.5351, "step": 11175 }, { "epoch": 0.8286498109290428, "grad_norm": 0.3648909032344818, "learning_rate": 8.82620655163791e-05, "loss": 0.4553, "step": 11176 }, { "epoch": 0.8287239564024617, "grad_norm": 0.34111177921295166, "learning_rate": 8.825206301575393e-05, "loss": 0.4779, "step": 11177 }, { "epoch": 0.8287981018758804, "grad_norm": 0.4086942970752716, "learning_rate": 8.824206051512878e-05, "loss": 0.5698, "step": 11178 }, { "epoch": 0.8288722473492993, "grad_norm": 0.3789013624191284, "learning_rate": 8.823205801450363e-05, "loss": 0.5352, "step": 11179 }, { "epoch": 0.8289463928227182, "grad_norm": 0.3390820324420929, "learning_rate": 8.822205551387847e-05, "loss": 0.4671, "step": 11180 }, { "epoch": 0.829020538296137, "grad_norm": 0.3646802008152008, "learning_rate": 8.821205301325332e-05, "loss": 0.5128, "step": 11181 }, { "epoch": 0.8290946837695559, "grad_norm": 0.3563327491283417, "learning_rate": 8.820205051262816e-05, "loss": 0.4997, "step": 11182 }, { "epoch": 0.8291688292429747, "grad_norm": 0.3422268033027649, "learning_rate": 8.819204801200301e-05, "loss": 0.5239, "step": 11183 }, { "epoch": 0.8292429747163935, "grad_norm": 0.34811529517173767, "learning_rate": 8.818204551137786e-05, "loss": 0.4619, "step": 11184 }, { "epoch": 0.8293171201898124, "grad_norm": 0.3370758295059204, "learning_rate": 8.81720430107527e-05, "loss": 0.4643, "step": 11185 }, { "epoch": 0.8293912656632313, "grad_norm": 0.38059696555137634, "learning_rate": 8.816204051012755e-05, "loss": 0.4927, "step": 11186 }, { "epoch": 0.8294654111366501, "grad_norm": 0.36454614996910095, "learning_rate": 8.815203800950238e-05, "loss": 0.495, "step": 11187 }, { "epoch": 0.829539556610069, "grad_norm": 0.38574716448783875, "learning_rate": 8.814203550887722e-05, "loss": 0.5308, "step": 11188 }, { "epoch": 0.8296137020834878, "grad_norm": 0.3723447322845459, "learning_rate": 8.813203300825207e-05, "loss": 0.486, "step": 11189 }, { "epoch": 0.8296878475569066, "grad_norm": 0.3577049672603607, "learning_rate": 8.812203050762691e-05, "loss": 0.4827, "step": 11190 }, { "epoch": 0.8297619930303255, "grad_norm": 0.35980576276779175, "learning_rate": 8.811202800700176e-05, "loss": 0.498, "step": 11191 }, { "epoch": 0.8298361385037444, "grad_norm": 0.3585248589515686, "learning_rate": 8.81020255063766e-05, "loss": 0.4935, "step": 11192 }, { "epoch": 0.8299102839771632, "grad_norm": 0.36380428075790405, "learning_rate": 8.809202300575145e-05, "loss": 0.5242, "step": 11193 }, { "epoch": 0.829984429450582, "grad_norm": 0.3384777307510376, "learning_rate": 8.808202050512628e-05, "loss": 0.4835, "step": 11194 }, { "epoch": 0.8300585749240009, "grad_norm": 0.36171045899391174, "learning_rate": 8.807201800450114e-05, "loss": 0.5091, "step": 11195 }, { "epoch": 0.8301327203974197, "grad_norm": 0.38072699308395386, "learning_rate": 8.806201550387597e-05, "loss": 0.4989, "step": 11196 }, { "epoch": 0.8302068658708386, "grad_norm": 0.33805787563323975, "learning_rate": 8.805201300325082e-05, "loss": 0.4638, "step": 11197 }, { "epoch": 0.8302810113442575, "grad_norm": 0.3638848662376404, "learning_rate": 8.804201050262566e-05, "loss": 0.5038, "step": 11198 }, { "epoch": 0.8303551568176762, "grad_norm": 0.3517533540725708, "learning_rate": 8.80320080020005e-05, "loss": 0.4898, "step": 11199 }, { "epoch": 0.8304293022910951, "grad_norm": 0.4051373600959778, "learning_rate": 8.802200550137535e-05, "loss": 0.5318, "step": 11200 }, { "epoch": 0.830503447764514, "grad_norm": 0.36041024327278137, "learning_rate": 8.801200300075019e-05, "loss": 0.5094, "step": 11201 }, { "epoch": 0.8305775932379328, "grad_norm": 0.4032149910926819, "learning_rate": 8.800200050012504e-05, "loss": 0.5316, "step": 11202 }, { "epoch": 0.8306517387113517, "grad_norm": 0.34306004643440247, "learning_rate": 8.799199799949987e-05, "loss": 0.4672, "step": 11203 }, { "epoch": 0.8307258841847706, "grad_norm": 0.3597985506057739, "learning_rate": 8.798199549887472e-05, "loss": 0.4906, "step": 11204 }, { "epoch": 0.8308000296581893, "grad_norm": 0.3438200354576111, "learning_rate": 8.797199299824956e-05, "loss": 0.4912, "step": 11205 }, { "epoch": 0.8308741751316082, "grad_norm": 0.3437723219394684, "learning_rate": 8.796199049762441e-05, "loss": 0.4611, "step": 11206 }, { "epoch": 0.8309483206050271, "grad_norm": 0.3407055139541626, "learning_rate": 8.795198799699925e-05, "loss": 0.4731, "step": 11207 }, { "epoch": 0.8310224660784459, "grad_norm": 0.34453827142715454, "learning_rate": 8.79419854963741e-05, "loss": 0.4901, "step": 11208 }, { "epoch": 0.8310966115518648, "grad_norm": 0.336221307516098, "learning_rate": 8.793198299574894e-05, "loss": 0.449, "step": 11209 }, { "epoch": 0.8311707570252836, "grad_norm": 0.40183165669441223, "learning_rate": 8.792198049512379e-05, "loss": 0.5126, "step": 11210 }, { "epoch": 0.8312449024987024, "grad_norm": 0.4031599462032318, "learning_rate": 8.791197799449862e-05, "loss": 0.5339, "step": 11211 }, { "epoch": 0.8313190479721213, "grad_norm": 0.3346342444419861, "learning_rate": 8.790197549387346e-05, "loss": 0.4696, "step": 11212 }, { "epoch": 0.8313931934455402, "grad_norm": 0.35738542675971985, "learning_rate": 8.789197299324831e-05, "loss": 0.4911, "step": 11213 }, { "epoch": 0.831467338918959, "grad_norm": 0.3585471212863922, "learning_rate": 8.788197049262316e-05, "loss": 0.4877, "step": 11214 }, { "epoch": 0.8315414843923778, "grad_norm": 0.33172494173049927, "learning_rate": 8.7871967991998e-05, "loss": 0.4448, "step": 11215 }, { "epoch": 0.8316156298657967, "grad_norm": 0.3592740297317505, "learning_rate": 8.786196549137285e-05, "loss": 0.4878, "step": 11216 }, { "epoch": 0.8316897753392155, "grad_norm": 0.3698748052120209, "learning_rate": 8.78519629907477e-05, "loss": 0.4869, "step": 11217 }, { "epoch": 0.8317639208126344, "grad_norm": 0.3596709668636322, "learning_rate": 8.784196049012254e-05, "loss": 0.5008, "step": 11218 }, { "epoch": 0.8318380662860533, "grad_norm": 0.34755218029022217, "learning_rate": 8.783195798949739e-05, "loss": 0.4876, "step": 11219 }, { "epoch": 0.831912211759472, "grad_norm": 0.36600396037101746, "learning_rate": 8.782195548887223e-05, "loss": 0.4875, "step": 11220 }, { "epoch": 0.8319863572328909, "grad_norm": 0.367254376411438, "learning_rate": 8.781195298824708e-05, "loss": 0.5009, "step": 11221 }, { "epoch": 0.8320605027063098, "grad_norm": 0.3576389253139496, "learning_rate": 8.780195048762191e-05, "loss": 0.4908, "step": 11222 }, { "epoch": 0.8321346481797286, "grad_norm": 0.37434232234954834, "learning_rate": 8.779194798699676e-05, "loss": 0.5314, "step": 11223 }, { "epoch": 0.8322087936531475, "grad_norm": 0.3750442862510681, "learning_rate": 8.77819454863716e-05, "loss": 0.5342, "step": 11224 }, { "epoch": 0.8322829391265664, "grad_norm": 0.36306658387184143, "learning_rate": 8.777194298574644e-05, "loss": 0.4972, "step": 11225 }, { "epoch": 0.8323570845999851, "grad_norm": 0.34549281001091003, "learning_rate": 8.776194048512129e-05, "loss": 0.4879, "step": 11226 }, { "epoch": 0.832431230073404, "grad_norm": 0.3703691065311432, "learning_rate": 8.775193798449613e-05, "loss": 0.5024, "step": 11227 }, { "epoch": 0.8325053755468229, "grad_norm": 0.37786173820495605, "learning_rate": 8.774193548387098e-05, "loss": 0.5176, "step": 11228 }, { "epoch": 0.8325795210202417, "grad_norm": 0.34852731227874756, "learning_rate": 8.773193298324581e-05, "loss": 0.4931, "step": 11229 }, { "epoch": 0.8326536664936606, "grad_norm": 0.3591529130935669, "learning_rate": 8.772193048262066e-05, "loss": 0.4664, "step": 11230 }, { "epoch": 0.8327278119670795, "grad_norm": 0.3463129997253418, "learning_rate": 8.77119279819955e-05, "loss": 0.4898, "step": 11231 }, { "epoch": 0.8328019574404982, "grad_norm": 0.34953051805496216, "learning_rate": 8.770192548137035e-05, "loss": 0.496, "step": 11232 }, { "epoch": 0.8328761029139171, "grad_norm": 0.34486016631126404, "learning_rate": 8.769192298074519e-05, "loss": 0.5193, "step": 11233 }, { "epoch": 0.832950248387336, "grad_norm": 0.3866726756095886, "learning_rate": 8.768192048012004e-05, "loss": 0.5147, "step": 11234 }, { "epoch": 0.8330243938607548, "grad_norm": 0.3639717102050781, "learning_rate": 8.767191797949488e-05, "loss": 0.4948, "step": 11235 }, { "epoch": 0.8330985393341737, "grad_norm": 0.35237953066825867, "learning_rate": 8.766191547886971e-05, "loss": 0.4815, "step": 11236 }, { "epoch": 0.8331726848075925, "grad_norm": 0.35864412784576416, "learning_rate": 8.765191297824456e-05, "loss": 0.4923, "step": 11237 }, { "epoch": 0.8332468302810113, "grad_norm": 0.35739222168922424, "learning_rate": 8.76419104776194e-05, "loss": 0.479, "step": 11238 }, { "epoch": 0.8333209757544302, "grad_norm": 0.34677034616470337, "learning_rate": 8.763190797699425e-05, "loss": 0.4557, "step": 11239 }, { "epoch": 0.8333951212278491, "grad_norm": 0.348702996969223, "learning_rate": 8.762190547636909e-05, "loss": 0.4622, "step": 11240 }, { "epoch": 0.8334692667012679, "grad_norm": 0.3652549684047699, "learning_rate": 8.761190297574394e-05, "loss": 0.4892, "step": 11241 }, { "epoch": 0.8335434121746867, "grad_norm": 0.327724426984787, "learning_rate": 8.760190047511878e-05, "loss": 0.4772, "step": 11242 }, { "epoch": 0.8336175576481056, "grad_norm": 0.3765978217124939, "learning_rate": 8.759189797449363e-05, "loss": 0.5253, "step": 11243 }, { "epoch": 0.8336917031215244, "grad_norm": 0.35873347520828247, "learning_rate": 8.758189547386846e-05, "loss": 0.4967, "step": 11244 }, { "epoch": 0.8337658485949433, "grad_norm": 0.373883455991745, "learning_rate": 8.757189297324332e-05, "loss": 0.4815, "step": 11245 }, { "epoch": 0.8338399940683622, "grad_norm": 0.36793017387390137, "learning_rate": 8.756189047261815e-05, "loss": 0.5065, "step": 11246 }, { "epoch": 0.833914139541781, "grad_norm": 0.37441280484199524, "learning_rate": 8.7551887971993e-05, "loss": 0.5268, "step": 11247 }, { "epoch": 0.8339882850151998, "grad_norm": 0.36028945446014404, "learning_rate": 8.754188547136784e-05, "loss": 0.4979, "step": 11248 }, { "epoch": 0.8340624304886187, "grad_norm": 0.4164731800556183, "learning_rate": 8.753188297074269e-05, "loss": 0.5201, "step": 11249 }, { "epoch": 0.8341365759620375, "grad_norm": 0.36454880237579346, "learning_rate": 8.752188047011753e-05, "loss": 0.5087, "step": 11250 }, { "epoch": 0.8342107214354564, "grad_norm": 0.36855265498161316, "learning_rate": 8.751187796949238e-05, "loss": 0.499, "step": 11251 }, { "epoch": 0.8342848669088753, "grad_norm": 0.3606919050216675, "learning_rate": 8.750187546886723e-05, "loss": 0.484, "step": 11252 }, { "epoch": 0.834359012382294, "grad_norm": 0.34483662247657776, "learning_rate": 8.749187296824207e-05, "loss": 0.4504, "step": 11253 }, { "epoch": 0.8344331578557129, "grad_norm": 0.34419235587120056, "learning_rate": 8.748187046761692e-05, "loss": 0.4662, "step": 11254 }, { "epoch": 0.8345073033291318, "grad_norm": 0.3799090087413788, "learning_rate": 8.747186796699175e-05, "loss": 0.4923, "step": 11255 }, { "epoch": 0.8345814488025506, "grad_norm": 0.37074917554855347, "learning_rate": 8.74618654663666e-05, "loss": 0.4719, "step": 11256 }, { "epoch": 0.8346555942759695, "grad_norm": 0.3881815969944, "learning_rate": 8.745186296574144e-05, "loss": 0.554, "step": 11257 }, { "epoch": 0.8347297397493884, "grad_norm": 0.35443976521492004, "learning_rate": 8.744186046511629e-05, "loss": 0.4517, "step": 11258 }, { "epoch": 0.8348038852228071, "grad_norm": 0.3605443239212036, "learning_rate": 8.743185796449113e-05, "loss": 0.5249, "step": 11259 }, { "epoch": 0.834878030696226, "grad_norm": 0.3524761199951172, "learning_rate": 8.742185546386598e-05, "loss": 0.4567, "step": 11260 }, { "epoch": 0.8349521761696448, "grad_norm": 0.3745114207267761, "learning_rate": 8.741185296324082e-05, "loss": 0.5099, "step": 11261 }, { "epoch": 0.8350263216430637, "grad_norm": 0.36862751841545105, "learning_rate": 8.740185046261565e-05, "loss": 0.5239, "step": 11262 }, { "epoch": 0.8351004671164826, "grad_norm": 0.35141220688819885, "learning_rate": 8.73918479619905e-05, "loss": 0.467, "step": 11263 }, { "epoch": 0.8351746125899013, "grad_norm": 0.3460978865623474, "learning_rate": 8.738184546136534e-05, "loss": 0.4596, "step": 11264 }, { "epoch": 0.8352487580633202, "grad_norm": 0.3618852496147156, "learning_rate": 8.737184296074019e-05, "loss": 0.4947, "step": 11265 }, { "epoch": 0.8353229035367391, "grad_norm": 0.35639122128486633, "learning_rate": 8.736184046011503e-05, "loss": 0.4735, "step": 11266 }, { "epoch": 0.8353970490101579, "grad_norm": 0.36630287766456604, "learning_rate": 8.735183795948988e-05, "loss": 0.4741, "step": 11267 }, { "epoch": 0.8354711944835768, "grad_norm": 0.3625391125679016, "learning_rate": 8.734183545886472e-05, "loss": 0.455, "step": 11268 }, { "epoch": 0.8355453399569956, "grad_norm": 0.371473491191864, "learning_rate": 8.733183295823957e-05, "loss": 0.4749, "step": 11269 }, { "epoch": 0.8356194854304144, "grad_norm": 0.3575145900249481, "learning_rate": 8.73218304576144e-05, "loss": 0.4862, "step": 11270 }, { "epoch": 0.8356936309038333, "grad_norm": 0.368645042181015, "learning_rate": 8.731182795698926e-05, "loss": 0.5046, "step": 11271 }, { "epoch": 0.8357677763772522, "grad_norm": 0.3717888593673706, "learning_rate": 8.730182545636409e-05, "loss": 0.5083, "step": 11272 }, { "epoch": 0.835841921850671, "grad_norm": 0.3493616282939911, "learning_rate": 8.729182295573894e-05, "loss": 0.466, "step": 11273 }, { "epoch": 0.8359160673240899, "grad_norm": 0.3458845317363739, "learning_rate": 8.728182045511378e-05, "loss": 0.4644, "step": 11274 }, { "epoch": 0.8359902127975087, "grad_norm": 0.3616481423377991, "learning_rate": 8.727181795448862e-05, "loss": 0.4657, "step": 11275 }, { "epoch": 0.8360643582709275, "grad_norm": 0.38107186555862427, "learning_rate": 8.726181545386347e-05, "loss": 0.5305, "step": 11276 }, { "epoch": 0.8361385037443464, "grad_norm": 0.3603219985961914, "learning_rate": 8.72518129532383e-05, "loss": 0.4904, "step": 11277 }, { "epoch": 0.8362126492177653, "grad_norm": 0.38206762075424194, "learning_rate": 8.724181045261316e-05, "loss": 0.5095, "step": 11278 }, { "epoch": 0.836286794691184, "grad_norm": 0.35186901688575745, "learning_rate": 8.723180795198799e-05, "loss": 0.4801, "step": 11279 }, { "epoch": 0.8363609401646029, "grad_norm": 0.387713760137558, "learning_rate": 8.722180545136284e-05, "loss": 0.5628, "step": 11280 }, { "epoch": 0.8364350856380218, "grad_norm": 0.3397371172904968, "learning_rate": 8.721180295073768e-05, "loss": 0.4581, "step": 11281 }, { "epoch": 0.8365092311114406, "grad_norm": 0.3702295422554016, "learning_rate": 8.720180045011253e-05, "loss": 0.5114, "step": 11282 }, { "epoch": 0.8365833765848595, "grad_norm": 0.35252973437309265, "learning_rate": 8.719179794948737e-05, "loss": 0.499, "step": 11283 }, { "epoch": 0.8366575220582784, "grad_norm": 0.3693440854549408, "learning_rate": 8.718179544886222e-05, "loss": 0.5139, "step": 11284 }, { "epoch": 0.8367316675316971, "grad_norm": 0.35695716738700867, "learning_rate": 8.717179294823707e-05, "loss": 0.485, "step": 11285 }, { "epoch": 0.836805813005116, "grad_norm": 0.34802883863449097, "learning_rate": 8.716179044761191e-05, "loss": 0.4719, "step": 11286 }, { "epoch": 0.8368799584785349, "grad_norm": 0.35535821318626404, "learning_rate": 8.715178794698676e-05, "loss": 0.4889, "step": 11287 }, { "epoch": 0.8369541039519537, "grad_norm": 0.35101646184921265, "learning_rate": 8.71417854463616e-05, "loss": 0.4827, "step": 11288 }, { "epoch": 0.8370282494253726, "grad_norm": 0.3469407260417938, "learning_rate": 8.713178294573645e-05, "loss": 0.4887, "step": 11289 }, { "epoch": 0.8371023948987915, "grad_norm": 0.3688293695449829, "learning_rate": 8.712178044511128e-05, "loss": 0.4832, "step": 11290 }, { "epoch": 0.8371765403722102, "grad_norm": 0.35622021555900574, "learning_rate": 8.711177794448613e-05, "loss": 0.5052, "step": 11291 }, { "epoch": 0.8372506858456291, "grad_norm": 0.33553269505500793, "learning_rate": 8.710177544386097e-05, "loss": 0.4617, "step": 11292 }, { "epoch": 0.837324831319048, "grad_norm": 0.36935389041900635, "learning_rate": 8.709177294323582e-05, "loss": 0.5148, "step": 11293 }, { "epoch": 0.8373989767924668, "grad_norm": 0.3578699827194214, "learning_rate": 8.708177044261066e-05, "loss": 0.5249, "step": 11294 }, { "epoch": 0.8374731222658857, "grad_norm": 0.3621857166290283, "learning_rate": 8.707176794198551e-05, "loss": 0.4976, "step": 11295 }, { "epoch": 0.8375472677393045, "grad_norm": 0.34995701909065247, "learning_rate": 8.706176544136035e-05, "loss": 0.4724, "step": 11296 }, { "epoch": 0.8376214132127233, "grad_norm": 0.3733617961406708, "learning_rate": 8.70517629407352e-05, "loss": 0.531, "step": 11297 }, { "epoch": 0.8376955586861422, "grad_norm": 0.38057300448417664, "learning_rate": 8.704176044011003e-05, "loss": 0.5467, "step": 11298 }, { "epoch": 0.8377697041595611, "grad_norm": 0.3649918735027313, "learning_rate": 8.703175793948487e-05, "loss": 0.4937, "step": 11299 }, { "epoch": 0.8378438496329799, "grad_norm": 0.34344717860221863, "learning_rate": 8.702175543885972e-05, "loss": 0.4598, "step": 11300 }, { "epoch": 0.8379179951063987, "grad_norm": 0.35100916028022766, "learning_rate": 8.701175293823456e-05, "loss": 0.4729, "step": 11301 }, { "epoch": 0.8379921405798176, "grad_norm": 0.3574994206428528, "learning_rate": 8.700175043760941e-05, "loss": 0.5096, "step": 11302 }, { "epoch": 0.8380662860532364, "grad_norm": 0.3509078621864319, "learning_rate": 8.699174793698425e-05, "loss": 0.4999, "step": 11303 }, { "epoch": 0.8381404315266553, "grad_norm": 0.35314327478408813, "learning_rate": 8.69817454363591e-05, "loss": 0.5182, "step": 11304 }, { "epoch": 0.8382145770000742, "grad_norm": 0.35819146037101746, "learning_rate": 8.697174293573393e-05, "loss": 0.4961, "step": 11305 }, { "epoch": 0.838288722473493, "grad_norm": 0.3741656243801117, "learning_rate": 8.696174043510878e-05, "loss": 0.5117, "step": 11306 }, { "epoch": 0.8383628679469118, "grad_norm": 0.36063745617866516, "learning_rate": 8.695173793448362e-05, "loss": 0.5268, "step": 11307 }, { "epoch": 0.8384370134203307, "grad_norm": 0.34292104840278625, "learning_rate": 8.694173543385847e-05, "loss": 0.4769, "step": 11308 }, { "epoch": 0.8385111588937495, "grad_norm": 0.35003355145454407, "learning_rate": 8.693173293323331e-05, "loss": 0.5044, "step": 11309 }, { "epoch": 0.8385853043671684, "grad_norm": 0.3568572998046875, "learning_rate": 8.692173043260816e-05, "loss": 0.4915, "step": 11310 }, { "epoch": 0.8386594498405873, "grad_norm": 0.34006303548812866, "learning_rate": 8.6911727931983e-05, "loss": 0.4678, "step": 11311 }, { "epoch": 0.838733595314006, "grad_norm": 0.3793726861476898, "learning_rate": 8.690172543135783e-05, "loss": 0.5296, "step": 11312 }, { "epoch": 0.8388077407874249, "grad_norm": 0.36007559299468994, "learning_rate": 8.689172293073268e-05, "loss": 0.5283, "step": 11313 }, { "epoch": 0.8388818862608438, "grad_norm": 0.3709653615951538, "learning_rate": 8.688172043010752e-05, "loss": 0.4901, "step": 11314 }, { "epoch": 0.8389560317342626, "grad_norm": 0.373165488243103, "learning_rate": 8.687171792948237e-05, "loss": 0.4806, "step": 11315 }, { "epoch": 0.8390301772076815, "grad_norm": 0.36383214592933655, "learning_rate": 8.686171542885721e-05, "loss": 0.4736, "step": 11316 }, { "epoch": 0.8391043226811004, "grad_norm": 0.36302539706230164, "learning_rate": 8.685171292823206e-05, "loss": 0.4792, "step": 11317 }, { "epoch": 0.8391784681545191, "grad_norm": 0.3473854959011078, "learning_rate": 8.68417104276069e-05, "loss": 0.4875, "step": 11318 }, { "epoch": 0.839252613627938, "grad_norm": 0.37054628133773804, "learning_rate": 8.683170792698175e-05, "loss": 0.5017, "step": 11319 }, { "epoch": 0.8393267591013569, "grad_norm": 0.35878241062164307, "learning_rate": 8.68217054263566e-05, "loss": 0.4734, "step": 11320 }, { "epoch": 0.8394009045747757, "grad_norm": 0.35994765162467957, "learning_rate": 8.681170292573144e-05, "loss": 0.5205, "step": 11321 }, { "epoch": 0.8394750500481946, "grad_norm": 0.3745739459991455, "learning_rate": 8.680170042510629e-05, "loss": 0.4964, "step": 11322 }, { "epoch": 0.8395491955216134, "grad_norm": 0.34177154302597046, "learning_rate": 8.679169792448112e-05, "loss": 0.4757, "step": 11323 }, { "epoch": 0.8396233409950322, "grad_norm": 0.3343190848827362, "learning_rate": 8.678169542385597e-05, "loss": 0.463, "step": 11324 }, { "epoch": 0.8396974864684511, "grad_norm": 0.3509640395641327, "learning_rate": 8.677169292323081e-05, "loss": 0.5018, "step": 11325 }, { "epoch": 0.83977163194187, "grad_norm": 0.33369457721710205, "learning_rate": 8.676169042260566e-05, "loss": 0.4539, "step": 11326 }, { "epoch": 0.8398457774152888, "grad_norm": 0.3431704044342041, "learning_rate": 8.67516879219805e-05, "loss": 0.4836, "step": 11327 }, { "epoch": 0.8399199228887076, "grad_norm": 0.3653879165649414, "learning_rate": 8.674168542135535e-05, "loss": 0.4845, "step": 11328 }, { "epoch": 0.8399940683621265, "grad_norm": 0.36392125487327576, "learning_rate": 8.673168292073019e-05, "loss": 0.4741, "step": 11329 }, { "epoch": 0.8400682138355453, "grad_norm": 0.3408660292625427, "learning_rate": 8.672168042010504e-05, "loss": 0.4698, "step": 11330 }, { "epoch": 0.8401423593089642, "grad_norm": 0.3857508897781372, "learning_rate": 8.671167791947987e-05, "loss": 0.5204, "step": 11331 }, { "epoch": 0.8402165047823831, "grad_norm": 0.36466097831726074, "learning_rate": 8.670167541885472e-05, "loss": 0.4679, "step": 11332 }, { "epoch": 0.8402906502558019, "grad_norm": 0.35235628485679626, "learning_rate": 8.669167291822956e-05, "loss": 0.4957, "step": 11333 }, { "epoch": 0.8403647957292207, "grad_norm": 0.35070037841796875, "learning_rate": 8.668167041760441e-05, "loss": 0.4793, "step": 11334 }, { "epoch": 0.8404389412026396, "grad_norm": 0.3668156564235687, "learning_rate": 8.667166791697925e-05, "loss": 0.5234, "step": 11335 }, { "epoch": 0.8405130866760584, "grad_norm": 0.3519876003265381, "learning_rate": 8.666166541635409e-05, "loss": 0.4643, "step": 11336 }, { "epoch": 0.8405872321494773, "grad_norm": 0.3531254231929779, "learning_rate": 8.665166291572894e-05, "loss": 0.4849, "step": 11337 }, { "epoch": 0.8406613776228962, "grad_norm": 0.36850741505622864, "learning_rate": 8.664166041510377e-05, "loss": 0.4989, "step": 11338 }, { "epoch": 0.8407355230963149, "grad_norm": 0.3536955416202545, "learning_rate": 8.663165791447863e-05, "loss": 0.4871, "step": 11339 }, { "epoch": 0.8408096685697338, "grad_norm": 0.3522993326187134, "learning_rate": 8.662165541385346e-05, "loss": 0.4914, "step": 11340 }, { "epoch": 0.8408838140431527, "grad_norm": 0.3427308201789856, "learning_rate": 8.661165291322831e-05, "loss": 0.4749, "step": 11341 }, { "epoch": 0.8409579595165715, "grad_norm": 0.36194175481796265, "learning_rate": 8.660165041260315e-05, "loss": 0.5086, "step": 11342 }, { "epoch": 0.8410321049899904, "grad_norm": 0.374455064535141, "learning_rate": 8.6591647911978e-05, "loss": 0.5306, "step": 11343 }, { "epoch": 0.8411062504634093, "grad_norm": 0.3673086166381836, "learning_rate": 8.658164541135284e-05, "loss": 0.4535, "step": 11344 }, { "epoch": 0.841180395936828, "grad_norm": 0.3542517423629761, "learning_rate": 8.657164291072769e-05, "loss": 0.4746, "step": 11345 }, { "epoch": 0.8412545414102469, "grad_norm": 0.34832483530044556, "learning_rate": 8.656164041010253e-05, "loss": 0.4754, "step": 11346 }, { "epoch": 0.8413286868836658, "grad_norm": 0.35019877552986145, "learning_rate": 8.655163790947738e-05, "loss": 0.4867, "step": 11347 }, { "epoch": 0.8414028323570846, "grad_norm": 0.3759027123451233, "learning_rate": 8.654163540885221e-05, "loss": 0.5064, "step": 11348 }, { "epoch": 0.8414769778305035, "grad_norm": 0.35254964232444763, "learning_rate": 8.653163290822705e-05, "loss": 0.4768, "step": 11349 }, { "epoch": 0.8415511233039223, "grad_norm": 0.3494580388069153, "learning_rate": 8.65216304076019e-05, "loss": 0.5286, "step": 11350 }, { "epoch": 0.8416252687773411, "grad_norm": 0.36404427886009216, "learning_rate": 8.651162790697674e-05, "loss": 0.5267, "step": 11351 }, { "epoch": 0.84169941425076, "grad_norm": 0.34572869539260864, "learning_rate": 8.650162540635159e-05, "loss": 0.4769, "step": 11352 }, { "epoch": 0.8417735597241789, "grad_norm": 0.3350917100906372, "learning_rate": 8.649162290572644e-05, "loss": 0.4613, "step": 11353 }, { "epoch": 0.8418477051975977, "grad_norm": 0.3400014638900757, "learning_rate": 8.648162040510128e-05, "loss": 0.4694, "step": 11354 }, { "epoch": 0.8419218506710165, "grad_norm": 0.36616453528404236, "learning_rate": 8.647161790447613e-05, "loss": 0.4854, "step": 11355 }, { "epoch": 0.8419959961444354, "grad_norm": 0.38297826051712036, "learning_rate": 8.646161540385096e-05, "loss": 0.5331, "step": 11356 }, { "epoch": 0.8420701416178542, "grad_norm": 0.34018415212631226, "learning_rate": 8.645161290322581e-05, "loss": 0.4577, "step": 11357 }, { "epoch": 0.8421442870912731, "grad_norm": 0.36482688784599304, "learning_rate": 8.644161040260067e-05, "loss": 0.505, "step": 11358 }, { "epoch": 0.842218432564692, "grad_norm": 0.3821985125541687, "learning_rate": 8.64316079019755e-05, "loss": 0.5518, "step": 11359 }, { "epoch": 0.8422925780381108, "grad_norm": 0.3547609746456146, "learning_rate": 8.642160540135034e-05, "loss": 0.5207, "step": 11360 }, { "epoch": 0.8423667235115296, "grad_norm": 0.3715182840824127, "learning_rate": 8.641160290072519e-05, "loss": 0.4901, "step": 11361 }, { "epoch": 0.8424408689849485, "grad_norm": 0.3423064053058624, "learning_rate": 8.640160040010003e-05, "loss": 0.5014, "step": 11362 }, { "epoch": 0.8425150144583673, "grad_norm": 0.34653735160827637, "learning_rate": 8.639159789947488e-05, "loss": 0.4907, "step": 11363 }, { "epoch": 0.8425891599317862, "grad_norm": 0.36882373690605164, "learning_rate": 8.638159539884971e-05, "loss": 0.4986, "step": 11364 }, { "epoch": 0.8426633054052051, "grad_norm": 0.350894570350647, "learning_rate": 8.637159289822457e-05, "loss": 0.4949, "step": 11365 }, { "epoch": 0.8427374508786238, "grad_norm": 0.3502345681190491, "learning_rate": 8.63615903975994e-05, "loss": 0.4879, "step": 11366 }, { "epoch": 0.8428115963520427, "grad_norm": 0.3574056327342987, "learning_rate": 8.635158789697425e-05, "loss": 0.4666, "step": 11367 }, { "epoch": 0.8428857418254616, "grad_norm": 0.3521539270877838, "learning_rate": 8.634158539634909e-05, "loss": 0.4688, "step": 11368 }, { "epoch": 0.8429598872988804, "grad_norm": 0.3660106360912323, "learning_rate": 8.633158289572394e-05, "loss": 0.4979, "step": 11369 }, { "epoch": 0.8430340327722993, "grad_norm": 0.32893019914627075, "learning_rate": 8.632158039509878e-05, "loss": 0.4779, "step": 11370 }, { "epoch": 0.8431081782457182, "grad_norm": 0.3612692654132843, "learning_rate": 8.631157789447363e-05, "loss": 0.495, "step": 11371 }, { "epoch": 0.8431823237191369, "grad_norm": 0.3395293653011322, "learning_rate": 8.630157539384847e-05, "loss": 0.496, "step": 11372 }, { "epoch": 0.8432564691925558, "grad_norm": 0.3828408122062683, "learning_rate": 8.62915728932233e-05, "loss": 0.51, "step": 11373 }, { "epoch": 0.8433306146659746, "grad_norm": 0.36013343930244446, "learning_rate": 8.628157039259815e-05, "loss": 0.4824, "step": 11374 }, { "epoch": 0.8434047601393935, "grad_norm": 0.3823208808898926, "learning_rate": 8.627156789197299e-05, "loss": 0.5012, "step": 11375 }, { "epoch": 0.8434789056128124, "grad_norm": 0.34787604212760925, "learning_rate": 8.626156539134784e-05, "loss": 0.4982, "step": 11376 }, { "epoch": 0.8435530510862311, "grad_norm": 0.37900510430336, "learning_rate": 8.625156289072268e-05, "loss": 0.5384, "step": 11377 }, { "epoch": 0.84362719655965, "grad_norm": 0.34025806188583374, "learning_rate": 8.624156039009753e-05, "loss": 0.4769, "step": 11378 }, { "epoch": 0.8437013420330689, "grad_norm": 0.38643166422843933, "learning_rate": 8.623155788947237e-05, "loss": 0.5118, "step": 11379 }, { "epoch": 0.8437754875064877, "grad_norm": 0.36718201637268066, "learning_rate": 8.622155538884722e-05, "loss": 0.5139, "step": 11380 }, { "epoch": 0.8438496329799066, "grad_norm": 0.35343480110168457, "learning_rate": 8.621155288822205e-05, "loss": 0.448, "step": 11381 }, { "epoch": 0.8439237784533254, "grad_norm": 0.39561334252357483, "learning_rate": 8.62015503875969e-05, "loss": 0.5351, "step": 11382 }, { "epoch": 0.8439979239267442, "grad_norm": 0.35729503631591797, "learning_rate": 8.619154788697174e-05, "loss": 0.4989, "step": 11383 }, { "epoch": 0.8440720694001631, "grad_norm": 0.362976998090744, "learning_rate": 8.618154538634659e-05, "loss": 0.4989, "step": 11384 }, { "epoch": 0.844146214873582, "grad_norm": 0.3492981493473053, "learning_rate": 8.617154288572143e-05, "loss": 0.4752, "step": 11385 }, { "epoch": 0.8442203603470008, "grad_norm": 0.3768841028213501, "learning_rate": 8.616154038509628e-05, "loss": 0.4883, "step": 11386 }, { "epoch": 0.8442945058204196, "grad_norm": 0.34992191195487976, "learning_rate": 8.615153788447112e-05, "loss": 0.5005, "step": 11387 }, { "epoch": 0.8443686512938385, "grad_norm": 0.34742340445518494, "learning_rate": 8.614153538384597e-05, "loss": 0.4798, "step": 11388 }, { "epoch": 0.8444427967672573, "grad_norm": 0.352926105260849, "learning_rate": 8.61315328832208e-05, "loss": 0.5178, "step": 11389 }, { "epoch": 0.8445169422406762, "grad_norm": 0.3569906949996948, "learning_rate": 8.612153038259566e-05, "loss": 0.5013, "step": 11390 }, { "epoch": 0.8445910877140951, "grad_norm": 0.35429060459136963, "learning_rate": 8.61115278819705e-05, "loss": 0.4895, "step": 11391 }, { "epoch": 0.8446652331875139, "grad_norm": 0.3720686435699463, "learning_rate": 8.610152538134534e-05, "loss": 0.4809, "step": 11392 }, { "epoch": 0.8447393786609327, "grad_norm": 0.3430246412754059, "learning_rate": 8.60915228807202e-05, "loss": 0.4871, "step": 11393 }, { "epoch": 0.8448135241343516, "grad_norm": 0.33899715542793274, "learning_rate": 8.608152038009503e-05, "loss": 0.4621, "step": 11394 }, { "epoch": 0.8448876696077704, "grad_norm": 0.3471563458442688, "learning_rate": 8.607151787946988e-05, "loss": 0.4873, "step": 11395 }, { "epoch": 0.8449618150811893, "grad_norm": 0.3676677644252777, "learning_rate": 8.606151537884472e-05, "loss": 0.5062, "step": 11396 }, { "epoch": 0.8450359605546082, "grad_norm": 0.34970295429229736, "learning_rate": 8.605151287821957e-05, "loss": 0.5047, "step": 11397 }, { "epoch": 0.8451101060280269, "grad_norm": 0.371002733707428, "learning_rate": 8.60415103775944e-05, "loss": 0.5043, "step": 11398 }, { "epoch": 0.8451842515014458, "grad_norm": 0.367148220539093, "learning_rate": 8.603150787696924e-05, "loss": 0.5186, "step": 11399 }, { "epoch": 0.8452583969748647, "grad_norm": 0.36196261644363403, "learning_rate": 8.60215053763441e-05, "loss": 0.5009, "step": 11400 }, { "epoch": 0.8453325424482835, "grad_norm": 0.3825235366821289, "learning_rate": 8.601150287571893e-05, "loss": 0.5003, "step": 11401 }, { "epoch": 0.8454066879217024, "grad_norm": 0.35130536556243896, "learning_rate": 8.600150037509378e-05, "loss": 0.4841, "step": 11402 }, { "epoch": 0.8454808333951213, "grad_norm": 0.38727885484695435, "learning_rate": 8.599149787446862e-05, "loss": 0.5219, "step": 11403 }, { "epoch": 0.84555497886854, "grad_norm": 0.32962191104888916, "learning_rate": 8.598149537384347e-05, "loss": 0.44, "step": 11404 }, { "epoch": 0.8456291243419589, "grad_norm": 0.38338378071784973, "learning_rate": 8.59714928732183e-05, "loss": 0.5352, "step": 11405 }, { "epoch": 0.8457032698153778, "grad_norm": 0.3478950560092926, "learning_rate": 8.596149037259316e-05, "loss": 0.4767, "step": 11406 }, { "epoch": 0.8457774152887966, "grad_norm": 0.3628107011318207, "learning_rate": 8.5951487871968e-05, "loss": 0.4921, "step": 11407 }, { "epoch": 0.8458515607622155, "grad_norm": 0.3749772012233734, "learning_rate": 8.594148537134285e-05, "loss": 0.5007, "step": 11408 }, { "epoch": 0.8459257062356343, "grad_norm": 0.35387900471687317, "learning_rate": 8.593148287071768e-05, "loss": 0.4946, "step": 11409 }, { "epoch": 0.8459998517090531, "grad_norm": 0.3606278598308563, "learning_rate": 8.592148037009252e-05, "loss": 0.4859, "step": 11410 }, { "epoch": 0.846073997182472, "grad_norm": 0.3774593472480774, "learning_rate": 8.591147786946737e-05, "loss": 0.5524, "step": 11411 }, { "epoch": 0.8461481426558909, "grad_norm": 0.3704562187194824, "learning_rate": 8.590147536884221e-05, "loss": 0.5041, "step": 11412 }, { "epoch": 0.8462222881293097, "grad_norm": 0.35490211844444275, "learning_rate": 8.589147286821706e-05, "loss": 0.4718, "step": 11413 }, { "epoch": 0.8462964336027285, "grad_norm": 0.3569101095199585, "learning_rate": 8.58814703675919e-05, "loss": 0.5099, "step": 11414 }, { "epoch": 0.8463705790761474, "grad_norm": 0.3602672517299652, "learning_rate": 8.587146786696675e-05, "loss": 0.5183, "step": 11415 }, { "epoch": 0.8464447245495662, "grad_norm": 0.3657006025314331, "learning_rate": 8.586146536634158e-05, "loss": 0.523, "step": 11416 }, { "epoch": 0.8465188700229851, "grad_norm": 0.383444607257843, "learning_rate": 8.585146286571643e-05, "loss": 0.508, "step": 11417 }, { "epoch": 0.846593015496404, "grad_norm": 0.4008674919605255, "learning_rate": 8.584146036509127e-05, "loss": 0.537, "step": 11418 }, { "epoch": 0.8466671609698228, "grad_norm": 0.3563884496688843, "learning_rate": 8.583145786446612e-05, "loss": 0.4763, "step": 11419 }, { "epoch": 0.8467413064432416, "grad_norm": 0.3195132315158844, "learning_rate": 8.582145536384096e-05, "loss": 0.442, "step": 11420 }, { "epoch": 0.8468154519166605, "grad_norm": 0.3664233684539795, "learning_rate": 8.581145286321581e-05, "loss": 0.4754, "step": 11421 }, { "epoch": 0.8468895973900793, "grad_norm": 0.36377301812171936, "learning_rate": 8.580145036259065e-05, "loss": 0.5005, "step": 11422 }, { "epoch": 0.8469637428634982, "grad_norm": 0.34815365076065063, "learning_rate": 8.57914478619655e-05, "loss": 0.4893, "step": 11423 }, { "epoch": 0.8470378883369171, "grad_norm": 0.3450101613998413, "learning_rate": 8.578144536134035e-05, "loss": 0.4918, "step": 11424 }, { "epoch": 0.8471120338103358, "grad_norm": 0.3577689230442047, "learning_rate": 8.577144286071518e-05, "loss": 0.501, "step": 11425 }, { "epoch": 0.8471861792837547, "grad_norm": 0.35682299733161926, "learning_rate": 8.576144036009003e-05, "loss": 0.5151, "step": 11426 }, { "epoch": 0.8472603247571736, "grad_norm": 0.3709268569946289, "learning_rate": 8.575143785946487e-05, "loss": 0.517, "step": 11427 }, { "epoch": 0.8473344702305924, "grad_norm": 0.3685608506202698, "learning_rate": 8.574143535883972e-05, "loss": 0.4842, "step": 11428 }, { "epoch": 0.8474086157040113, "grad_norm": 0.3743578791618347, "learning_rate": 8.573143285821456e-05, "loss": 0.5504, "step": 11429 }, { "epoch": 0.8474827611774302, "grad_norm": 0.3527820110321045, "learning_rate": 8.572143035758941e-05, "loss": 0.4696, "step": 11430 }, { "epoch": 0.8475569066508489, "grad_norm": 0.33982446789741516, "learning_rate": 8.571142785696425e-05, "loss": 0.4423, "step": 11431 }, { "epoch": 0.8476310521242678, "grad_norm": 0.3394658863544464, "learning_rate": 8.57014253563391e-05, "loss": 0.4697, "step": 11432 }, { "epoch": 0.8477051975976867, "grad_norm": 0.35574787855148315, "learning_rate": 8.569142285571393e-05, "loss": 0.4986, "step": 11433 }, { "epoch": 0.8477793430711055, "grad_norm": 0.34157928824424744, "learning_rate": 8.568142035508879e-05, "loss": 0.4675, "step": 11434 }, { "epoch": 0.8478534885445244, "grad_norm": 0.3406233489513397, "learning_rate": 8.567141785446362e-05, "loss": 0.4746, "step": 11435 }, { "epoch": 0.8479276340179432, "grad_norm": 0.3815142810344696, "learning_rate": 8.566141535383846e-05, "loss": 0.5351, "step": 11436 }, { "epoch": 0.848001779491362, "grad_norm": 0.36320436000823975, "learning_rate": 8.565141285321331e-05, "loss": 0.473, "step": 11437 }, { "epoch": 0.8480759249647809, "grad_norm": 0.3742474913597107, "learning_rate": 8.564141035258815e-05, "loss": 0.507, "step": 11438 }, { "epoch": 0.8481500704381998, "grad_norm": 0.3697865307331085, "learning_rate": 8.5631407851963e-05, "loss": 0.5254, "step": 11439 }, { "epoch": 0.8482242159116186, "grad_norm": 0.3356456160545349, "learning_rate": 8.562140535133784e-05, "loss": 0.4713, "step": 11440 }, { "epoch": 0.8482983613850374, "grad_norm": 0.35729408264160156, "learning_rate": 8.561140285071269e-05, "loss": 0.4775, "step": 11441 }, { "epoch": 0.8483725068584563, "grad_norm": 0.34676215052604675, "learning_rate": 8.560140035008752e-05, "loss": 0.482, "step": 11442 }, { "epoch": 0.8484466523318751, "grad_norm": 0.33999428153038025, "learning_rate": 8.559139784946237e-05, "loss": 0.4733, "step": 11443 }, { "epoch": 0.848520797805294, "grad_norm": 0.35598599910736084, "learning_rate": 8.558139534883721e-05, "loss": 0.5238, "step": 11444 }, { "epoch": 0.8485949432787129, "grad_norm": 0.3451860845088959, "learning_rate": 8.557139284821206e-05, "loss": 0.4761, "step": 11445 }, { "epoch": 0.8486690887521317, "grad_norm": 0.3437279760837555, "learning_rate": 8.55613903475869e-05, "loss": 0.4889, "step": 11446 }, { "epoch": 0.8487432342255505, "grad_norm": 0.35773080587387085, "learning_rate": 8.555138784696174e-05, "loss": 0.4679, "step": 11447 }, { "epoch": 0.8488173796989694, "grad_norm": 0.35553064942359924, "learning_rate": 8.554138534633659e-05, "loss": 0.4743, "step": 11448 }, { "epoch": 0.8488915251723882, "grad_norm": 0.3649074137210846, "learning_rate": 8.553138284571142e-05, "loss": 0.4888, "step": 11449 }, { "epoch": 0.8489656706458071, "grad_norm": 0.36579638719558716, "learning_rate": 8.552138034508627e-05, "loss": 0.4693, "step": 11450 }, { "epoch": 0.849039816119226, "grad_norm": 0.3586617410182953, "learning_rate": 8.551137784446111e-05, "loss": 0.4768, "step": 11451 }, { "epoch": 0.8491139615926447, "grad_norm": 0.3736242651939392, "learning_rate": 8.550137534383596e-05, "loss": 0.4765, "step": 11452 }, { "epoch": 0.8491881070660636, "grad_norm": 0.3613903224468231, "learning_rate": 8.54913728432108e-05, "loss": 0.4946, "step": 11453 }, { "epoch": 0.8492622525394825, "grad_norm": 0.33438190817832947, "learning_rate": 8.548137034258565e-05, "loss": 0.4602, "step": 11454 }, { "epoch": 0.8493363980129013, "grad_norm": 0.3707336187362671, "learning_rate": 8.547136784196049e-05, "loss": 0.5015, "step": 11455 }, { "epoch": 0.8494105434863202, "grad_norm": 0.36431685090065, "learning_rate": 8.546136534133534e-05, "loss": 0.5067, "step": 11456 }, { "epoch": 0.849484688959739, "grad_norm": 0.37427839636802673, "learning_rate": 8.545136284071017e-05, "loss": 0.5028, "step": 11457 }, { "epoch": 0.8495588344331578, "grad_norm": 0.39613404870033264, "learning_rate": 8.544136034008502e-05, "loss": 0.5392, "step": 11458 }, { "epoch": 0.8496329799065767, "grad_norm": 0.3642808794975281, "learning_rate": 8.543135783945988e-05, "loss": 0.4676, "step": 11459 }, { "epoch": 0.8497071253799956, "grad_norm": 0.36476558446884155, "learning_rate": 8.542135533883471e-05, "loss": 0.5124, "step": 11460 }, { "epoch": 0.8497812708534144, "grad_norm": 0.34341147541999817, "learning_rate": 8.541135283820956e-05, "loss": 0.4727, "step": 11461 }, { "epoch": 0.8498554163268333, "grad_norm": 0.3499769866466522, "learning_rate": 8.54013503375844e-05, "loss": 0.4855, "step": 11462 }, { "epoch": 0.8499295618002521, "grad_norm": 0.3535006046295166, "learning_rate": 8.539134783695925e-05, "loss": 0.4612, "step": 11463 }, { "epoch": 0.8500037072736709, "grad_norm": 0.3501948416233063, "learning_rate": 8.538134533633409e-05, "loss": 0.4555, "step": 11464 }, { "epoch": 0.8500778527470898, "grad_norm": 0.36446502804756165, "learning_rate": 8.537134283570894e-05, "loss": 0.5033, "step": 11465 }, { "epoch": 0.8501519982205087, "grad_norm": 0.3517643213272095, "learning_rate": 8.536134033508378e-05, "loss": 0.4801, "step": 11466 }, { "epoch": 0.8502261436939275, "grad_norm": 0.36338183283805847, "learning_rate": 8.535133783445863e-05, "loss": 0.4714, "step": 11467 }, { "epoch": 0.8503002891673463, "grad_norm": 0.3584369719028473, "learning_rate": 8.534133533383346e-05, "loss": 0.4846, "step": 11468 }, { "epoch": 0.8503744346407652, "grad_norm": 0.36492371559143066, "learning_rate": 8.533133283320831e-05, "loss": 0.4815, "step": 11469 }, { "epoch": 0.850448580114184, "grad_norm": 0.35179954767227173, "learning_rate": 8.532133033258315e-05, "loss": 0.4745, "step": 11470 }, { "epoch": 0.8505227255876029, "grad_norm": 0.38146212697029114, "learning_rate": 8.5311327831958e-05, "loss": 0.506, "step": 11471 }, { "epoch": 0.8505968710610218, "grad_norm": 0.39248552918434143, "learning_rate": 8.530132533133284e-05, "loss": 0.5708, "step": 11472 }, { "epoch": 0.8506710165344405, "grad_norm": 0.34693264961242676, "learning_rate": 8.529132283070768e-05, "loss": 0.4562, "step": 11473 }, { "epoch": 0.8507451620078594, "grad_norm": 0.3558144271373749, "learning_rate": 8.528132033008253e-05, "loss": 0.5152, "step": 11474 }, { "epoch": 0.8508193074812783, "grad_norm": 0.3378300666809082, "learning_rate": 8.527131782945736e-05, "loss": 0.4621, "step": 11475 }, { "epoch": 0.8508934529546971, "grad_norm": 0.3660542368888855, "learning_rate": 8.526131532883221e-05, "loss": 0.5094, "step": 11476 }, { "epoch": 0.850967598428116, "grad_norm": 0.3259750008583069, "learning_rate": 8.525131282820705e-05, "loss": 0.4428, "step": 11477 }, { "epoch": 0.8510417439015349, "grad_norm": 0.3612135946750641, "learning_rate": 8.52413103275819e-05, "loss": 0.4726, "step": 11478 }, { "epoch": 0.8511158893749536, "grad_norm": 0.3607768714427948, "learning_rate": 8.523130782695674e-05, "loss": 0.4724, "step": 11479 }, { "epoch": 0.8511900348483725, "grad_norm": 0.3769817352294922, "learning_rate": 8.522130532633159e-05, "loss": 0.5391, "step": 11480 }, { "epoch": 0.8512641803217914, "grad_norm": 0.3411159813404083, "learning_rate": 8.521130282570643e-05, "loss": 0.4903, "step": 11481 }, { "epoch": 0.8513383257952102, "grad_norm": 0.340617835521698, "learning_rate": 8.520130032508128e-05, "loss": 0.4857, "step": 11482 }, { "epoch": 0.8514124712686291, "grad_norm": 0.35087960958480835, "learning_rate": 8.519129782445611e-05, "loss": 0.5073, "step": 11483 }, { "epoch": 0.851486616742048, "grad_norm": 0.3528817892074585, "learning_rate": 8.518129532383095e-05, "loss": 0.5061, "step": 11484 }, { "epoch": 0.8515607622154667, "grad_norm": 0.36051514744758606, "learning_rate": 8.51712928232058e-05, "loss": 0.476, "step": 11485 }, { "epoch": 0.8516349076888856, "grad_norm": 0.36470863223075867, "learning_rate": 8.516129032258064e-05, "loss": 0.5115, "step": 11486 }, { "epoch": 0.8517090531623044, "grad_norm": 0.3528868556022644, "learning_rate": 8.515128782195549e-05, "loss": 0.48, "step": 11487 }, { "epoch": 0.8517831986357233, "grad_norm": 0.35212212800979614, "learning_rate": 8.514128532133033e-05, "loss": 0.4919, "step": 11488 }, { "epoch": 0.8518573441091422, "grad_norm": 0.40140998363494873, "learning_rate": 8.513128282070518e-05, "loss": 0.5337, "step": 11489 }, { "epoch": 0.8519314895825609, "grad_norm": 0.34078624844551086, "learning_rate": 8.512128032008001e-05, "loss": 0.4681, "step": 11490 }, { "epoch": 0.8520056350559798, "grad_norm": 0.374172180891037, "learning_rate": 8.511127781945487e-05, "loss": 0.5121, "step": 11491 }, { "epoch": 0.8520797805293987, "grad_norm": 0.3605145215988159, "learning_rate": 8.510127531882972e-05, "loss": 0.475, "step": 11492 }, { "epoch": 0.8521539260028175, "grad_norm": 0.3713059425354004, "learning_rate": 8.509127281820455e-05, "loss": 0.5116, "step": 11493 }, { "epoch": 0.8522280714762364, "grad_norm": 0.3607991933822632, "learning_rate": 8.50812703175794e-05, "loss": 0.4867, "step": 11494 }, { "epoch": 0.8523022169496552, "grad_norm": 0.36093661189079285, "learning_rate": 8.507126781695424e-05, "loss": 0.4885, "step": 11495 }, { "epoch": 0.852376362423074, "grad_norm": 0.33860963582992554, "learning_rate": 8.506126531632909e-05, "loss": 0.4485, "step": 11496 }, { "epoch": 0.8524505078964929, "grad_norm": 0.38486841320991516, "learning_rate": 8.505126281570393e-05, "loss": 0.5075, "step": 11497 }, { "epoch": 0.8525246533699118, "grad_norm": 0.3724074065685272, "learning_rate": 8.504126031507878e-05, "loss": 0.4999, "step": 11498 }, { "epoch": 0.8525987988433306, "grad_norm": 0.3585270643234253, "learning_rate": 8.503125781445362e-05, "loss": 0.5138, "step": 11499 }, { "epoch": 0.8526729443167494, "grad_norm": 0.3721861243247986, "learning_rate": 8.502125531382847e-05, "loss": 0.487, "step": 11500 }, { "epoch": 0.8527470897901683, "grad_norm": 0.3622666597366333, "learning_rate": 8.50112528132033e-05, "loss": 0.4914, "step": 11501 }, { "epoch": 0.8528212352635871, "grad_norm": 0.35995447635650635, "learning_rate": 8.500125031257815e-05, "loss": 0.5103, "step": 11502 }, { "epoch": 0.852895380737006, "grad_norm": 0.3636545240879059, "learning_rate": 8.499124781195299e-05, "loss": 0.5411, "step": 11503 }, { "epoch": 0.8529695262104249, "grad_norm": 0.35355913639068604, "learning_rate": 8.498124531132784e-05, "loss": 0.4773, "step": 11504 }, { "epoch": 0.8530436716838437, "grad_norm": 0.352150559425354, "learning_rate": 8.497124281070268e-05, "loss": 0.5143, "step": 11505 }, { "epoch": 0.8531178171572625, "grad_norm": 0.37384605407714844, "learning_rate": 8.496124031007753e-05, "loss": 0.4858, "step": 11506 }, { "epoch": 0.8531919626306814, "grad_norm": 0.3756520748138428, "learning_rate": 8.495123780945237e-05, "loss": 0.5068, "step": 11507 }, { "epoch": 0.8532661081041002, "grad_norm": 0.3496893644332886, "learning_rate": 8.494123530882722e-05, "loss": 0.468, "step": 11508 }, { "epoch": 0.8533402535775191, "grad_norm": 0.3806248903274536, "learning_rate": 8.493123280820206e-05, "loss": 0.5085, "step": 11509 }, { "epoch": 0.853414399050938, "grad_norm": 0.3672305941581726, "learning_rate": 8.492123030757689e-05, "loss": 0.5204, "step": 11510 }, { "epoch": 0.8534885445243567, "grad_norm": 0.35756272077560425, "learning_rate": 8.491122780695174e-05, "loss": 0.4683, "step": 11511 }, { "epoch": 0.8535626899977756, "grad_norm": 0.3744675815105438, "learning_rate": 8.490122530632658e-05, "loss": 0.473, "step": 11512 }, { "epoch": 0.8536368354711945, "grad_norm": 0.36174651980400085, "learning_rate": 8.489122280570143e-05, "loss": 0.5118, "step": 11513 }, { "epoch": 0.8537109809446133, "grad_norm": 0.36523380875587463, "learning_rate": 8.488122030507627e-05, "loss": 0.5003, "step": 11514 }, { "epoch": 0.8537851264180322, "grad_norm": 0.36998552083969116, "learning_rate": 8.487121780445112e-05, "loss": 0.5048, "step": 11515 }, { "epoch": 0.853859271891451, "grad_norm": 0.3821640610694885, "learning_rate": 8.486121530382596e-05, "loss": 0.5055, "step": 11516 }, { "epoch": 0.8539334173648698, "grad_norm": 0.38429689407348633, "learning_rate": 8.48512128032008e-05, "loss": 0.4923, "step": 11517 }, { "epoch": 0.8540075628382887, "grad_norm": 0.3817364275455475, "learning_rate": 8.484121030257564e-05, "loss": 0.5271, "step": 11518 }, { "epoch": 0.8540817083117076, "grad_norm": 0.38598087430000305, "learning_rate": 8.48312078019505e-05, "loss": 0.5258, "step": 11519 }, { "epoch": 0.8541558537851264, "grad_norm": 0.35038667917251587, "learning_rate": 8.482120530132533e-05, "loss": 0.4654, "step": 11520 }, { "epoch": 0.8542299992585453, "grad_norm": 0.36158043146133423, "learning_rate": 8.481120280070018e-05, "loss": 0.4969, "step": 11521 }, { "epoch": 0.8543041447319641, "grad_norm": 0.365377277135849, "learning_rate": 8.480120030007502e-05, "loss": 0.5069, "step": 11522 }, { "epoch": 0.8543782902053829, "grad_norm": 0.36449381709098816, "learning_rate": 8.479119779944986e-05, "loss": 0.5116, "step": 11523 }, { "epoch": 0.8544524356788018, "grad_norm": 0.3469057083129883, "learning_rate": 8.47811952988247e-05, "loss": 0.4823, "step": 11524 }, { "epoch": 0.8545265811522207, "grad_norm": 0.3442673683166504, "learning_rate": 8.477119279819954e-05, "loss": 0.4537, "step": 11525 }, { "epoch": 0.8546007266256395, "grad_norm": 0.3681716024875641, "learning_rate": 8.47611902975744e-05, "loss": 0.515, "step": 11526 }, { "epoch": 0.8546748720990583, "grad_norm": 0.32448333501815796, "learning_rate": 8.475118779694924e-05, "loss": 0.4388, "step": 11527 }, { "epoch": 0.8547490175724772, "grad_norm": 0.3466922342777252, "learning_rate": 8.474118529632408e-05, "loss": 0.5011, "step": 11528 }, { "epoch": 0.854823163045896, "grad_norm": 0.35349196195602417, "learning_rate": 8.473118279569893e-05, "loss": 0.4606, "step": 11529 }, { "epoch": 0.8548973085193149, "grad_norm": 0.35385045409202576, "learning_rate": 8.472118029507378e-05, "loss": 0.4611, "step": 11530 }, { "epoch": 0.8549714539927338, "grad_norm": 0.3427353501319885, "learning_rate": 8.471117779444862e-05, "loss": 0.479, "step": 11531 }, { "epoch": 0.8550455994661526, "grad_norm": 0.3778385818004608, "learning_rate": 8.470117529382347e-05, "loss": 0.4905, "step": 11532 }, { "epoch": 0.8551197449395714, "grad_norm": 0.35498756170272827, "learning_rate": 8.469117279319831e-05, "loss": 0.4917, "step": 11533 }, { "epoch": 0.8551938904129903, "grad_norm": 0.3395951986312866, "learning_rate": 8.468117029257315e-05, "loss": 0.4628, "step": 11534 }, { "epoch": 0.8552680358864091, "grad_norm": 0.39590877294540405, "learning_rate": 8.4671167791948e-05, "loss": 0.5479, "step": 11535 }, { "epoch": 0.855342181359828, "grad_norm": 0.3570532500743866, "learning_rate": 8.466116529132283e-05, "loss": 0.5135, "step": 11536 }, { "epoch": 0.8554163268332469, "grad_norm": 0.3251437544822693, "learning_rate": 8.465116279069768e-05, "loss": 0.4273, "step": 11537 }, { "epoch": 0.8554904723066656, "grad_norm": 0.35535725951194763, "learning_rate": 8.464116029007252e-05, "loss": 0.4721, "step": 11538 }, { "epoch": 0.8555646177800845, "grad_norm": 0.36588239669799805, "learning_rate": 8.463115778944737e-05, "loss": 0.4941, "step": 11539 }, { "epoch": 0.8556387632535034, "grad_norm": 0.3793106973171234, "learning_rate": 8.462115528882221e-05, "loss": 0.5026, "step": 11540 }, { "epoch": 0.8557129087269222, "grad_norm": 0.3604636788368225, "learning_rate": 8.461115278819706e-05, "loss": 0.4986, "step": 11541 }, { "epoch": 0.8557870542003411, "grad_norm": 0.3755802512168884, "learning_rate": 8.46011502875719e-05, "loss": 0.4629, "step": 11542 }, { "epoch": 0.85586119967376, "grad_norm": 0.3550081253051758, "learning_rate": 8.459114778694675e-05, "loss": 0.4724, "step": 11543 }, { "epoch": 0.8559353451471787, "grad_norm": 0.36638617515563965, "learning_rate": 8.458114528632158e-05, "loss": 0.4965, "step": 11544 }, { "epoch": 0.8560094906205976, "grad_norm": 0.3523412048816681, "learning_rate": 8.457114278569643e-05, "loss": 0.5075, "step": 11545 }, { "epoch": 0.8560836360940165, "grad_norm": 0.34201133251190186, "learning_rate": 8.456114028507127e-05, "loss": 0.4765, "step": 11546 }, { "epoch": 0.8561577815674353, "grad_norm": 0.3557768166065216, "learning_rate": 8.455113778444611e-05, "loss": 0.4945, "step": 11547 }, { "epoch": 0.8562319270408542, "grad_norm": 0.3582128882408142, "learning_rate": 8.454113528382096e-05, "loss": 0.4812, "step": 11548 }, { "epoch": 0.856306072514273, "grad_norm": 0.36747637391090393, "learning_rate": 8.45311327831958e-05, "loss": 0.4968, "step": 11549 }, { "epoch": 0.8563802179876918, "grad_norm": 0.3814467787742615, "learning_rate": 8.452113028257065e-05, "loss": 0.5548, "step": 11550 }, { "epoch": 0.8564543634611107, "grad_norm": 0.3572767674922943, "learning_rate": 8.451112778194548e-05, "loss": 0.5077, "step": 11551 }, { "epoch": 0.8565285089345296, "grad_norm": 0.36409008502960205, "learning_rate": 8.450112528132033e-05, "loss": 0.4874, "step": 11552 }, { "epoch": 0.8566026544079484, "grad_norm": 0.3797178268432617, "learning_rate": 8.449112278069517e-05, "loss": 0.5187, "step": 11553 }, { "epoch": 0.8566767998813672, "grad_norm": 0.3719564378261566, "learning_rate": 8.448112028007002e-05, "loss": 0.4954, "step": 11554 }, { "epoch": 0.8567509453547861, "grad_norm": 0.36063921451568604, "learning_rate": 8.447111777944486e-05, "loss": 0.5063, "step": 11555 }, { "epoch": 0.8568250908282049, "grad_norm": 0.3666316866874695, "learning_rate": 8.446111527881971e-05, "loss": 0.5072, "step": 11556 }, { "epoch": 0.8568992363016238, "grad_norm": 0.3449663817882538, "learning_rate": 8.445111277819455e-05, "loss": 0.4763, "step": 11557 }, { "epoch": 0.8569733817750427, "grad_norm": 0.331215500831604, "learning_rate": 8.44411102775694e-05, "loss": 0.4363, "step": 11558 }, { "epoch": 0.8570475272484614, "grad_norm": 0.4039321839809418, "learning_rate": 8.443110777694423e-05, "loss": 0.5278, "step": 11559 }, { "epoch": 0.8571216727218803, "grad_norm": 0.37405523657798767, "learning_rate": 8.442110527631909e-05, "loss": 0.5121, "step": 11560 }, { "epoch": 0.8571958181952992, "grad_norm": 0.3662812411785126, "learning_rate": 8.441110277569392e-05, "loss": 0.5082, "step": 11561 }, { "epoch": 0.857269963668718, "grad_norm": 0.3688361942768097, "learning_rate": 8.440110027506877e-05, "loss": 0.4993, "step": 11562 }, { "epoch": 0.8573441091421369, "grad_norm": 0.3496631979942322, "learning_rate": 8.439109777444361e-05, "loss": 0.5205, "step": 11563 }, { "epoch": 0.8574182546155558, "grad_norm": 0.3432881534099579, "learning_rate": 8.438109527381846e-05, "loss": 0.4663, "step": 11564 }, { "epoch": 0.8574924000889745, "grad_norm": 0.3684123754501343, "learning_rate": 8.437109277319331e-05, "loss": 0.5087, "step": 11565 }, { "epoch": 0.8575665455623934, "grad_norm": 0.3888777494430542, "learning_rate": 8.436109027256815e-05, "loss": 0.5494, "step": 11566 }, { "epoch": 0.8576406910358123, "grad_norm": 0.3523465394973755, "learning_rate": 8.4351087771943e-05, "loss": 0.5039, "step": 11567 }, { "epoch": 0.8577148365092311, "grad_norm": 0.3437611162662506, "learning_rate": 8.434108527131784e-05, "loss": 0.4664, "step": 11568 }, { "epoch": 0.85778898198265, "grad_norm": 0.3600233197212219, "learning_rate": 8.433108277069269e-05, "loss": 0.4794, "step": 11569 }, { "epoch": 0.8578631274560689, "grad_norm": 0.34135329723358154, "learning_rate": 8.432108027006752e-05, "loss": 0.4425, "step": 11570 }, { "epoch": 0.8579372729294876, "grad_norm": 0.33355820178985596, "learning_rate": 8.431107776944236e-05, "loss": 0.4391, "step": 11571 }, { "epoch": 0.8580114184029065, "grad_norm": 0.3481719195842743, "learning_rate": 8.430107526881721e-05, "loss": 0.4756, "step": 11572 }, { "epoch": 0.8580855638763254, "grad_norm": 0.34989115595817566, "learning_rate": 8.429107276819205e-05, "loss": 0.4623, "step": 11573 }, { "epoch": 0.8581597093497442, "grad_norm": 0.3585019111633301, "learning_rate": 8.42810702675669e-05, "loss": 0.5013, "step": 11574 }, { "epoch": 0.8582338548231631, "grad_norm": 0.3874867260456085, "learning_rate": 8.427106776694174e-05, "loss": 0.5, "step": 11575 }, { "epoch": 0.8583080002965819, "grad_norm": 0.37288689613342285, "learning_rate": 8.426106526631659e-05, "loss": 0.4871, "step": 11576 }, { "epoch": 0.8583821457700007, "grad_norm": 0.36482998728752136, "learning_rate": 8.425106276569142e-05, "loss": 0.4803, "step": 11577 }, { "epoch": 0.8584562912434196, "grad_norm": 0.3789561986923218, "learning_rate": 8.424106026506628e-05, "loss": 0.4732, "step": 11578 }, { "epoch": 0.8585304367168385, "grad_norm": 0.3814055323600769, "learning_rate": 8.423105776444111e-05, "loss": 0.5179, "step": 11579 }, { "epoch": 0.8586045821902573, "grad_norm": 0.35168901085853577, "learning_rate": 8.422105526381596e-05, "loss": 0.485, "step": 11580 }, { "epoch": 0.8586787276636761, "grad_norm": 0.35506218671798706, "learning_rate": 8.42110527631908e-05, "loss": 0.4813, "step": 11581 }, { "epoch": 0.858752873137095, "grad_norm": 0.3687075674533844, "learning_rate": 8.420105026256565e-05, "loss": 0.5339, "step": 11582 }, { "epoch": 0.8588270186105138, "grad_norm": 0.3768359422683716, "learning_rate": 8.419104776194049e-05, "loss": 0.5084, "step": 11583 }, { "epoch": 0.8589011640839327, "grad_norm": 0.3658454716205597, "learning_rate": 8.418104526131532e-05, "loss": 0.4797, "step": 11584 }, { "epoch": 0.8589753095573516, "grad_norm": 0.3726375699043274, "learning_rate": 8.417104276069018e-05, "loss": 0.4863, "step": 11585 }, { "epoch": 0.8590494550307703, "grad_norm": 0.3336321711540222, "learning_rate": 8.416104026006501e-05, "loss": 0.4589, "step": 11586 }, { "epoch": 0.8591236005041892, "grad_norm": 0.3768633008003235, "learning_rate": 8.415103775943986e-05, "loss": 0.5493, "step": 11587 }, { "epoch": 0.8591977459776081, "grad_norm": 0.364768385887146, "learning_rate": 8.41410352588147e-05, "loss": 0.5004, "step": 11588 }, { "epoch": 0.8592718914510269, "grad_norm": 0.4004618525505066, "learning_rate": 8.413103275818955e-05, "loss": 0.544, "step": 11589 }, { "epoch": 0.8593460369244458, "grad_norm": 0.34042418003082275, "learning_rate": 8.412103025756439e-05, "loss": 0.4542, "step": 11590 }, { "epoch": 0.8594201823978647, "grad_norm": 0.35738956928253174, "learning_rate": 8.411102775693924e-05, "loss": 0.5206, "step": 11591 }, { "epoch": 0.8594943278712834, "grad_norm": 0.3456222414970398, "learning_rate": 8.410102525631408e-05, "loss": 0.4858, "step": 11592 }, { "epoch": 0.8595684733447023, "grad_norm": 0.36990058422088623, "learning_rate": 8.409102275568893e-05, "loss": 0.5251, "step": 11593 }, { "epoch": 0.8596426188181212, "grad_norm": 0.34635618329048157, "learning_rate": 8.408102025506376e-05, "loss": 0.4727, "step": 11594 }, { "epoch": 0.85971676429154, "grad_norm": 0.3594783842563629, "learning_rate": 8.407101775443861e-05, "loss": 0.5361, "step": 11595 }, { "epoch": 0.8597909097649589, "grad_norm": 0.3497706949710846, "learning_rate": 8.406101525381345e-05, "loss": 0.4449, "step": 11596 }, { "epoch": 0.8598650552383778, "grad_norm": 0.35986700654029846, "learning_rate": 8.40510127531883e-05, "loss": 0.504, "step": 11597 }, { "epoch": 0.8599392007117965, "grad_norm": 0.33929866552352905, "learning_rate": 8.404101025256315e-05, "loss": 0.4639, "step": 11598 }, { "epoch": 0.8600133461852154, "grad_norm": 0.37893447279930115, "learning_rate": 8.403100775193799e-05, "loss": 0.5075, "step": 11599 }, { "epoch": 0.8600874916586343, "grad_norm": 0.3573082983493805, "learning_rate": 8.402100525131284e-05, "loss": 0.5046, "step": 11600 }, { "epoch": 0.8601616371320531, "grad_norm": 0.3752482235431671, "learning_rate": 8.401100275068768e-05, "loss": 0.5067, "step": 11601 }, { "epoch": 0.860235782605472, "grad_norm": 0.3331862688064575, "learning_rate": 8.400100025006253e-05, "loss": 0.4715, "step": 11602 }, { "epoch": 0.8603099280788907, "grad_norm": 0.38205698132514954, "learning_rate": 8.399099774943736e-05, "loss": 0.5497, "step": 11603 }, { "epoch": 0.8603840735523096, "grad_norm": 0.3859010338783264, "learning_rate": 8.398099524881222e-05, "loss": 0.538, "step": 11604 }, { "epoch": 0.8604582190257285, "grad_norm": 0.36159640550613403, "learning_rate": 8.397099274818705e-05, "loss": 0.4833, "step": 11605 }, { "epoch": 0.8605323644991473, "grad_norm": 0.36351293325424194, "learning_rate": 8.39609902475619e-05, "loss": 0.4701, "step": 11606 }, { "epoch": 0.8606065099725662, "grad_norm": 0.3441663980484009, "learning_rate": 8.395098774693674e-05, "loss": 0.4762, "step": 11607 }, { "epoch": 0.860680655445985, "grad_norm": 0.3546861708164215, "learning_rate": 8.394098524631158e-05, "loss": 0.4783, "step": 11608 }, { "epoch": 0.8607548009194038, "grad_norm": 0.34919658303260803, "learning_rate": 8.393098274568643e-05, "loss": 0.4774, "step": 11609 }, { "epoch": 0.8608289463928227, "grad_norm": 0.358325719833374, "learning_rate": 8.392098024506127e-05, "loss": 0.5097, "step": 11610 }, { "epoch": 0.8609030918662416, "grad_norm": 0.364531010389328, "learning_rate": 8.391097774443612e-05, "loss": 0.5264, "step": 11611 }, { "epoch": 0.8609772373396604, "grad_norm": 0.34816688299179077, "learning_rate": 8.390097524381095e-05, "loss": 0.462, "step": 11612 }, { "epoch": 0.8610513828130792, "grad_norm": 0.33695438504219055, "learning_rate": 8.38909727431858e-05, "loss": 0.4713, "step": 11613 }, { "epoch": 0.8611255282864981, "grad_norm": 0.3649353086948395, "learning_rate": 8.388097024256064e-05, "loss": 0.4984, "step": 11614 }, { "epoch": 0.8611996737599169, "grad_norm": 0.3499941825866699, "learning_rate": 8.387096774193549e-05, "loss": 0.4685, "step": 11615 }, { "epoch": 0.8612738192333358, "grad_norm": 0.36056673526763916, "learning_rate": 8.386096524131033e-05, "loss": 0.4881, "step": 11616 }, { "epoch": 0.8613479647067547, "grad_norm": 0.3742772340774536, "learning_rate": 8.385096274068518e-05, "loss": 0.5088, "step": 11617 }, { "epoch": 0.8614221101801735, "grad_norm": 0.34842804074287415, "learning_rate": 8.384096024006002e-05, "loss": 0.4799, "step": 11618 }, { "epoch": 0.8614962556535923, "grad_norm": 0.3467600345611572, "learning_rate": 8.383095773943487e-05, "loss": 0.4627, "step": 11619 }, { "epoch": 0.8615704011270112, "grad_norm": 0.38309070467948914, "learning_rate": 8.38209552388097e-05, "loss": 0.5255, "step": 11620 }, { "epoch": 0.86164454660043, "grad_norm": 0.35037124156951904, "learning_rate": 8.381095273818454e-05, "loss": 0.4672, "step": 11621 }, { "epoch": 0.8617186920738489, "grad_norm": 0.37307530641555786, "learning_rate": 8.380095023755939e-05, "loss": 0.5201, "step": 11622 }, { "epoch": 0.8617928375472678, "grad_norm": 0.36021772027015686, "learning_rate": 8.379094773693423e-05, "loss": 0.4672, "step": 11623 }, { "epoch": 0.8618669830206865, "grad_norm": 0.3898204565048218, "learning_rate": 8.378094523630908e-05, "loss": 0.5148, "step": 11624 }, { "epoch": 0.8619411284941054, "grad_norm": 0.38779833912849426, "learning_rate": 8.377094273568392e-05, "loss": 0.4957, "step": 11625 }, { "epoch": 0.8620152739675243, "grad_norm": 0.3667895793914795, "learning_rate": 8.376094023505877e-05, "loss": 0.4955, "step": 11626 }, { "epoch": 0.8620894194409431, "grad_norm": 0.3372872769832611, "learning_rate": 8.37509377344336e-05, "loss": 0.4515, "step": 11627 }, { "epoch": 0.862163564914362, "grad_norm": 0.3707887828350067, "learning_rate": 8.374093523380845e-05, "loss": 0.5518, "step": 11628 }, { "epoch": 0.8622377103877809, "grad_norm": 0.3495267331600189, "learning_rate": 8.373093273318329e-05, "loss": 0.4668, "step": 11629 }, { "epoch": 0.8623118558611996, "grad_norm": 0.3924015462398529, "learning_rate": 8.372093023255814e-05, "loss": 0.5046, "step": 11630 }, { "epoch": 0.8623860013346185, "grad_norm": 0.3624289631843567, "learning_rate": 8.3710927731933e-05, "loss": 0.4889, "step": 11631 }, { "epoch": 0.8624601468080374, "grad_norm": 0.3556775748729706, "learning_rate": 8.370092523130783e-05, "loss": 0.5132, "step": 11632 }, { "epoch": 0.8625342922814562, "grad_norm": 0.34811756014823914, "learning_rate": 8.369092273068268e-05, "loss": 0.46, "step": 11633 }, { "epoch": 0.8626084377548751, "grad_norm": 0.3740328848361969, "learning_rate": 8.368092023005752e-05, "loss": 0.5073, "step": 11634 }, { "epoch": 0.8626825832282939, "grad_norm": 0.33829671144485474, "learning_rate": 8.367091772943237e-05, "loss": 0.4758, "step": 11635 }, { "epoch": 0.8627567287017127, "grad_norm": 0.36607682704925537, "learning_rate": 8.36609152288072e-05, "loss": 0.4746, "step": 11636 }, { "epoch": 0.8628308741751316, "grad_norm": 0.3402419686317444, "learning_rate": 8.365091272818206e-05, "loss": 0.4745, "step": 11637 }, { "epoch": 0.8629050196485505, "grad_norm": 0.39267295598983765, "learning_rate": 8.36409102275569e-05, "loss": 0.5525, "step": 11638 }, { "epoch": 0.8629791651219693, "grad_norm": 0.33241650462150574, "learning_rate": 8.363090772693174e-05, "loss": 0.4472, "step": 11639 }, { "epoch": 0.8630533105953881, "grad_norm": 0.3284902274608612, "learning_rate": 8.362090522630658e-05, "loss": 0.4545, "step": 11640 }, { "epoch": 0.863127456068807, "grad_norm": 0.3578989505767822, "learning_rate": 8.361090272568143e-05, "loss": 0.4778, "step": 11641 }, { "epoch": 0.8632016015422258, "grad_norm": 0.3649851381778717, "learning_rate": 8.360090022505627e-05, "loss": 0.5095, "step": 11642 }, { "epoch": 0.8632757470156447, "grad_norm": 0.36048802733421326, "learning_rate": 8.359089772443112e-05, "loss": 0.4742, "step": 11643 }, { "epoch": 0.8633498924890636, "grad_norm": 0.35149329900741577, "learning_rate": 8.358089522380596e-05, "loss": 0.4646, "step": 11644 }, { "epoch": 0.8634240379624823, "grad_norm": 0.37341704964637756, "learning_rate": 8.35708927231808e-05, "loss": 0.4932, "step": 11645 }, { "epoch": 0.8634981834359012, "grad_norm": 0.35678988695144653, "learning_rate": 8.356089022255564e-05, "loss": 0.483, "step": 11646 }, { "epoch": 0.8635723289093201, "grad_norm": 0.39862462878227234, "learning_rate": 8.355088772193048e-05, "loss": 0.517, "step": 11647 }, { "epoch": 0.8636464743827389, "grad_norm": 0.37612220644950867, "learning_rate": 8.354088522130533e-05, "loss": 0.4977, "step": 11648 }, { "epoch": 0.8637206198561578, "grad_norm": 0.3499213755130768, "learning_rate": 8.353088272068017e-05, "loss": 0.4868, "step": 11649 }, { "epoch": 0.8637947653295767, "grad_norm": 0.3636825978755951, "learning_rate": 8.352088022005502e-05, "loss": 0.5105, "step": 11650 }, { "epoch": 0.8638689108029954, "grad_norm": 0.34156534075737, "learning_rate": 8.351087771942986e-05, "loss": 0.4702, "step": 11651 }, { "epoch": 0.8639430562764143, "grad_norm": 0.3392332196235657, "learning_rate": 8.350087521880471e-05, "loss": 0.4863, "step": 11652 }, { "epoch": 0.8640172017498332, "grad_norm": 0.33392685651779175, "learning_rate": 8.349087271817954e-05, "loss": 0.4627, "step": 11653 }, { "epoch": 0.864091347223252, "grad_norm": 0.3524988293647766, "learning_rate": 8.34808702175544e-05, "loss": 0.4909, "step": 11654 }, { "epoch": 0.8641654926966709, "grad_norm": 0.3393431007862091, "learning_rate": 8.347086771692923e-05, "loss": 0.46, "step": 11655 }, { "epoch": 0.8642396381700898, "grad_norm": 0.3406030535697937, "learning_rate": 8.346086521630408e-05, "loss": 0.465, "step": 11656 }, { "epoch": 0.8643137836435085, "grad_norm": 0.3619324862957001, "learning_rate": 8.345086271567892e-05, "loss": 0.5186, "step": 11657 }, { "epoch": 0.8643879291169274, "grad_norm": 0.3698757588863373, "learning_rate": 8.344086021505376e-05, "loss": 0.5226, "step": 11658 }, { "epoch": 0.8644620745903463, "grad_norm": 0.37396326661109924, "learning_rate": 8.343085771442861e-05, "loss": 0.5169, "step": 11659 }, { "epoch": 0.8645362200637651, "grad_norm": 0.3528938889503479, "learning_rate": 8.342085521380345e-05, "loss": 0.4613, "step": 11660 }, { "epoch": 0.864610365537184, "grad_norm": 0.33711859583854675, "learning_rate": 8.34108527131783e-05, "loss": 0.4662, "step": 11661 }, { "epoch": 0.8646845110106028, "grad_norm": 0.3642832040786743, "learning_rate": 8.340085021255313e-05, "loss": 0.5057, "step": 11662 }, { "epoch": 0.8647586564840216, "grad_norm": 0.3717685043811798, "learning_rate": 8.339084771192798e-05, "loss": 0.4966, "step": 11663 }, { "epoch": 0.8648328019574405, "grad_norm": 0.35983237624168396, "learning_rate": 8.338084521130282e-05, "loss": 0.5088, "step": 11664 }, { "epoch": 0.8649069474308594, "grad_norm": 0.36445870995521545, "learning_rate": 8.337084271067767e-05, "loss": 0.5089, "step": 11665 }, { "epoch": 0.8649810929042782, "grad_norm": 0.35679662227630615, "learning_rate": 8.336084021005252e-05, "loss": 0.5065, "step": 11666 }, { "epoch": 0.865055238377697, "grad_norm": 0.3845294713973999, "learning_rate": 8.335083770942736e-05, "loss": 0.5473, "step": 11667 }, { "epoch": 0.8651293838511159, "grad_norm": 0.36450836062431335, "learning_rate": 8.334083520880221e-05, "loss": 0.5079, "step": 11668 }, { "epoch": 0.8652035293245347, "grad_norm": 0.34916967153549194, "learning_rate": 8.333083270817706e-05, "loss": 0.4585, "step": 11669 }, { "epoch": 0.8652776747979536, "grad_norm": 0.34001830220222473, "learning_rate": 8.33208302075519e-05, "loss": 0.4653, "step": 11670 }, { "epoch": 0.8653518202713725, "grad_norm": 0.35549333691596985, "learning_rate": 8.331082770692673e-05, "loss": 0.5086, "step": 11671 }, { "epoch": 0.8654259657447912, "grad_norm": 0.34928178787231445, "learning_rate": 8.330082520630158e-05, "loss": 0.5024, "step": 11672 }, { "epoch": 0.8655001112182101, "grad_norm": 0.3390156030654907, "learning_rate": 8.329082270567642e-05, "loss": 0.4564, "step": 11673 }, { "epoch": 0.865574256691629, "grad_norm": 0.34502270817756653, "learning_rate": 8.328082020505127e-05, "loss": 0.4715, "step": 11674 }, { "epoch": 0.8656484021650478, "grad_norm": 0.36214378476142883, "learning_rate": 8.327081770442611e-05, "loss": 0.4772, "step": 11675 }, { "epoch": 0.8657225476384667, "grad_norm": 0.3254498243331909, "learning_rate": 8.326081520380096e-05, "loss": 0.4484, "step": 11676 }, { "epoch": 0.8657966931118856, "grad_norm": 0.34127652645111084, "learning_rate": 8.32508127031758e-05, "loss": 0.4954, "step": 11677 }, { "epoch": 0.8658708385853043, "grad_norm": 0.35422399640083313, "learning_rate": 8.324081020255065e-05, "loss": 0.4813, "step": 11678 }, { "epoch": 0.8659449840587232, "grad_norm": 0.3449486196041107, "learning_rate": 8.323080770192549e-05, "loss": 0.4594, "step": 11679 }, { "epoch": 0.8660191295321421, "grad_norm": 0.3430573642253876, "learning_rate": 8.322080520130034e-05, "loss": 0.4683, "step": 11680 }, { "epoch": 0.8660932750055609, "grad_norm": 0.3754158914089203, "learning_rate": 8.321080270067517e-05, "loss": 0.51, "step": 11681 }, { "epoch": 0.8661674204789798, "grad_norm": 0.36466169357299805, "learning_rate": 8.320080020005002e-05, "loss": 0.4803, "step": 11682 }, { "epoch": 0.8662415659523987, "grad_norm": 0.3651632070541382, "learning_rate": 8.319079769942486e-05, "loss": 0.4898, "step": 11683 }, { "epoch": 0.8663157114258174, "grad_norm": 0.35610660910606384, "learning_rate": 8.31807951987997e-05, "loss": 0.4567, "step": 11684 }, { "epoch": 0.8663898568992363, "grad_norm": 0.3577408790588379, "learning_rate": 8.317079269817455e-05, "loss": 0.4974, "step": 11685 }, { "epoch": 0.8664640023726552, "grad_norm": 0.3910984694957733, "learning_rate": 8.316079019754939e-05, "loss": 0.53, "step": 11686 }, { "epoch": 0.866538147846074, "grad_norm": 0.3613820970058441, "learning_rate": 8.315078769692424e-05, "loss": 0.5033, "step": 11687 }, { "epoch": 0.8666122933194929, "grad_norm": 0.38311079144477844, "learning_rate": 8.314078519629907e-05, "loss": 0.5343, "step": 11688 }, { "epoch": 0.8666864387929117, "grad_norm": 0.39249274134635925, "learning_rate": 8.313078269567392e-05, "loss": 0.5083, "step": 11689 }, { "epoch": 0.8667605842663305, "grad_norm": 0.36275044083595276, "learning_rate": 8.312078019504876e-05, "loss": 0.4862, "step": 11690 }, { "epoch": 0.8668347297397494, "grad_norm": 0.3354801833629608, "learning_rate": 8.311077769442361e-05, "loss": 0.4679, "step": 11691 }, { "epoch": 0.8669088752131683, "grad_norm": 0.356682151556015, "learning_rate": 8.310077519379845e-05, "loss": 0.465, "step": 11692 }, { "epoch": 0.8669830206865871, "grad_norm": 0.3698229193687439, "learning_rate": 8.30907726931733e-05, "loss": 0.5086, "step": 11693 }, { "epoch": 0.8670571661600059, "grad_norm": 0.373922735452652, "learning_rate": 8.308077019254814e-05, "loss": 0.4951, "step": 11694 }, { "epoch": 0.8671313116334248, "grad_norm": 0.34801554679870605, "learning_rate": 8.307076769192297e-05, "loss": 0.4628, "step": 11695 }, { "epoch": 0.8672054571068436, "grad_norm": 0.3753833770751953, "learning_rate": 8.306076519129782e-05, "loss": 0.5337, "step": 11696 }, { "epoch": 0.8672796025802625, "grad_norm": 0.37053704261779785, "learning_rate": 8.305076269067266e-05, "loss": 0.5189, "step": 11697 }, { "epoch": 0.8673537480536814, "grad_norm": 0.3897501528263092, "learning_rate": 8.304076019004751e-05, "loss": 0.4766, "step": 11698 }, { "epoch": 0.8674278935271001, "grad_norm": 0.35529184341430664, "learning_rate": 8.303075768942236e-05, "loss": 0.4867, "step": 11699 }, { "epoch": 0.867502039000519, "grad_norm": 0.3812382221221924, "learning_rate": 8.30207551887972e-05, "loss": 0.5446, "step": 11700 }, { "epoch": 0.8675761844739379, "grad_norm": 0.3595046103000641, "learning_rate": 8.301075268817205e-05, "loss": 0.4979, "step": 11701 }, { "epoch": 0.8676503299473567, "grad_norm": 0.36008694767951965, "learning_rate": 8.300075018754689e-05, "loss": 0.4937, "step": 11702 }, { "epoch": 0.8677244754207756, "grad_norm": 0.37955087423324585, "learning_rate": 8.299074768692174e-05, "loss": 0.5308, "step": 11703 }, { "epoch": 0.8677986208941945, "grad_norm": 0.3536772131919861, "learning_rate": 8.298074518629659e-05, "loss": 0.4914, "step": 11704 }, { "epoch": 0.8678727663676132, "grad_norm": 0.33861756324768066, "learning_rate": 8.297074268567143e-05, "loss": 0.4735, "step": 11705 }, { "epoch": 0.8679469118410321, "grad_norm": 0.3697017431259155, "learning_rate": 8.296074018504628e-05, "loss": 0.5089, "step": 11706 }, { "epoch": 0.868021057314451, "grad_norm": 0.3704412579536438, "learning_rate": 8.295073768442111e-05, "loss": 0.4943, "step": 11707 }, { "epoch": 0.8680952027878698, "grad_norm": 0.35577458143234253, "learning_rate": 8.294073518379595e-05, "loss": 0.4993, "step": 11708 }, { "epoch": 0.8681693482612887, "grad_norm": 0.35644757747650146, "learning_rate": 8.29307326831708e-05, "loss": 0.4858, "step": 11709 }, { "epoch": 0.8682434937347075, "grad_norm": 0.3777138292789459, "learning_rate": 8.292073018254564e-05, "loss": 0.5094, "step": 11710 }, { "epoch": 0.8683176392081263, "grad_norm": 0.35644885897636414, "learning_rate": 8.291072768192049e-05, "loss": 0.5032, "step": 11711 }, { "epoch": 0.8683917846815452, "grad_norm": 0.36739787459373474, "learning_rate": 8.290072518129533e-05, "loss": 0.5206, "step": 11712 }, { "epoch": 0.8684659301549641, "grad_norm": 0.3688899278640747, "learning_rate": 8.289072268067018e-05, "loss": 0.5414, "step": 11713 }, { "epoch": 0.8685400756283829, "grad_norm": 0.3635037839412689, "learning_rate": 8.288072018004501e-05, "loss": 0.4659, "step": 11714 }, { "epoch": 0.8686142211018018, "grad_norm": 0.39022883772850037, "learning_rate": 8.287071767941986e-05, "loss": 0.4946, "step": 11715 }, { "epoch": 0.8686883665752205, "grad_norm": 0.3837730288505554, "learning_rate": 8.28607151787947e-05, "loss": 0.5268, "step": 11716 }, { "epoch": 0.8687625120486394, "grad_norm": 0.3577916920185089, "learning_rate": 8.285071267816955e-05, "loss": 0.4873, "step": 11717 }, { "epoch": 0.8688366575220583, "grad_norm": 0.3763296604156494, "learning_rate": 8.284071017754439e-05, "loss": 0.5006, "step": 11718 }, { "epoch": 0.8689108029954771, "grad_norm": 0.3637072741985321, "learning_rate": 8.283070767691924e-05, "loss": 0.5045, "step": 11719 }, { "epoch": 0.868984948468896, "grad_norm": 0.36154109239578247, "learning_rate": 8.282070517629408e-05, "loss": 0.4887, "step": 11720 }, { "epoch": 0.8690590939423148, "grad_norm": 0.3804943263530731, "learning_rate": 8.281070267566891e-05, "loss": 0.5265, "step": 11721 }, { "epoch": 0.8691332394157336, "grad_norm": 0.3569841980934143, "learning_rate": 8.280070017504376e-05, "loss": 0.4664, "step": 11722 }, { "epoch": 0.8692073848891525, "grad_norm": 0.39890429377555847, "learning_rate": 8.27906976744186e-05, "loss": 0.5066, "step": 11723 }, { "epoch": 0.8692815303625714, "grad_norm": 0.33598756790161133, "learning_rate": 8.278069517379345e-05, "loss": 0.4671, "step": 11724 }, { "epoch": 0.8693556758359902, "grad_norm": 0.35603711009025574, "learning_rate": 8.277069267316829e-05, "loss": 0.5077, "step": 11725 }, { "epoch": 0.869429821309409, "grad_norm": 0.39612090587615967, "learning_rate": 8.276069017254314e-05, "loss": 0.4999, "step": 11726 }, { "epoch": 0.8695039667828279, "grad_norm": 0.36741483211517334, "learning_rate": 8.275068767191798e-05, "loss": 0.4855, "step": 11727 }, { "epoch": 0.8695781122562467, "grad_norm": 0.3698205351829529, "learning_rate": 8.274068517129283e-05, "loss": 0.4989, "step": 11728 }, { "epoch": 0.8696522577296656, "grad_norm": 0.3608520030975342, "learning_rate": 8.273068267066767e-05, "loss": 0.4701, "step": 11729 }, { "epoch": 0.8697264032030845, "grad_norm": 0.3769518733024597, "learning_rate": 8.272068017004252e-05, "loss": 0.5159, "step": 11730 }, { "epoch": 0.8698005486765032, "grad_norm": 0.371474027633667, "learning_rate": 8.271067766941735e-05, "loss": 0.4718, "step": 11731 }, { "epoch": 0.8698746941499221, "grad_norm": 0.4079935848712921, "learning_rate": 8.270067516879219e-05, "loss": 0.5025, "step": 11732 }, { "epoch": 0.869948839623341, "grad_norm": 0.34275364875793457, "learning_rate": 8.269067266816704e-05, "loss": 0.461, "step": 11733 }, { "epoch": 0.8700229850967598, "grad_norm": 0.35926029086112976, "learning_rate": 8.268067016754189e-05, "loss": 0.4976, "step": 11734 }, { "epoch": 0.8700971305701787, "grad_norm": 0.3874722421169281, "learning_rate": 8.267066766691673e-05, "loss": 0.499, "step": 11735 }, { "epoch": 0.8701712760435976, "grad_norm": 0.36505475640296936, "learning_rate": 8.266066516629158e-05, "loss": 0.4886, "step": 11736 }, { "epoch": 0.8702454215170163, "grad_norm": 0.3458728492259979, "learning_rate": 8.265066266566643e-05, "loss": 0.4655, "step": 11737 }, { "epoch": 0.8703195669904352, "grad_norm": 0.37973231077194214, "learning_rate": 8.264066016504127e-05, "loss": 0.5115, "step": 11738 }, { "epoch": 0.8703937124638541, "grad_norm": 0.34738510847091675, "learning_rate": 8.263065766441612e-05, "loss": 0.4788, "step": 11739 }, { "epoch": 0.8704678579372729, "grad_norm": 0.354621559381485, "learning_rate": 8.262065516379095e-05, "loss": 0.4885, "step": 11740 }, { "epoch": 0.8705420034106918, "grad_norm": 0.3375888168811798, "learning_rate": 8.26106526631658e-05, "loss": 0.4611, "step": 11741 }, { "epoch": 0.8706161488841107, "grad_norm": 0.35983720421791077, "learning_rate": 8.260065016254064e-05, "loss": 0.485, "step": 11742 }, { "epoch": 0.8706902943575294, "grad_norm": 0.35377392172813416, "learning_rate": 8.259064766191549e-05, "loss": 0.4777, "step": 11743 }, { "epoch": 0.8707644398309483, "grad_norm": 0.3548296391963959, "learning_rate": 8.258064516129033e-05, "loss": 0.4638, "step": 11744 }, { "epoch": 0.8708385853043672, "grad_norm": 0.37261030077934265, "learning_rate": 8.257064266066517e-05, "loss": 0.5198, "step": 11745 }, { "epoch": 0.870912730777786, "grad_norm": 0.35586243867874146, "learning_rate": 8.256064016004002e-05, "loss": 0.5195, "step": 11746 }, { "epoch": 0.8709868762512049, "grad_norm": 0.3684420883655548, "learning_rate": 8.255063765941485e-05, "loss": 0.4896, "step": 11747 }, { "epoch": 0.8710610217246237, "grad_norm": 0.36432114243507385, "learning_rate": 8.25406351587897e-05, "loss": 0.5212, "step": 11748 }, { "epoch": 0.8711351671980425, "grad_norm": 0.34806501865386963, "learning_rate": 8.253063265816454e-05, "loss": 0.4705, "step": 11749 }, { "epoch": 0.8712093126714614, "grad_norm": 0.3534393310546875, "learning_rate": 8.252063015753939e-05, "loss": 0.4583, "step": 11750 }, { "epoch": 0.8712834581448803, "grad_norm": 0.3391083776950836, "learning_rate": 8.251062765691423e-05, "loss": 0.477, "step": 11751 }, { "epoch": 0.8713576036182991, "grad_norm": 0.3517700433731079, "learning_rate": 8.250062515628908e-05, "loss": 0.4543, "step": 11752 }, { "epoch": 0.8714317490917179, "grad_norm": 0.3446834981441498, "learning_rate": 8.249062265566392e-05, "loss": 0.4895, "step": 11753 }, { "epoch": 0.8715058945651368, "grad_norm": 0.35160884261131287, "learning_rate": 8.248062015503877e-05, "loss": 0.4764, "step": 11754 }, { "epoch": 0.8715800400385556, "grad_norm": 0.3844711482524872, "learning_rate": 8.24706176544136e-05, "loss": 0.496, "step": 11755 }, { "epoch": 0.8716541855119745, "grad_norm": 0.3824302554130554, "learning_rate": 8.246061515378846e-05, "loss": 0.4944, "step": 11756 }, { "epoch": 0.8717283309853934, "grad_norm": 0.36176952719688416, "learning_rate": 8.24506126531633e-05, "loss": 0.4801, "step": 11757 }, { "epoch": 0.8718024764588121, "grad_norm": 0.3781255781650543, "learning_rate": 8.244061015253813e-05, "loss": 0.5149, "step": 11758 }, { "epoch": 0.871876621932231, "grad_norm": 0.3608815371990204, "learning_rate": 8.243060765191298e-05, "loss": 0.4683, "step": 11759 }, { "epoch": 0.8719507674056499, "grad_norm": 0.37689685821533203, "learning_rate": 8.242060515128782e-05, "loss": 0.4963, "step": 11760 }, { "epoch": 0.8720249128790687, "grad_norm": 0.35716092586517334, "learning_rate": 8.241060265066267e-05, "loss": 0.4693, "step": 11761 }, { "epoch": 0.8720990583524876, "grad_norm": 0.38627830147743225, "learning_rate": 8.24006001500375e-05, "loss": 0.5197, "step": 11762 }, { "epoch": 0.8721732038259065, "grad_norm": 0.3532002866268158, "learning_rate": 8.239059764941236e-05, "loss": 0.4764, "step": 11763 }, { "epoch": 0.8722473492993252, "grad_norm": 0.368111252784729, "learning_rate": 8.23805951487872e-05, "loss": 0.4951, "step": 11764 }, { "epoch": 0.8723214947727441, "grad_norm": 0.3745194971561432, "learning_rate": 8.237059264816204e-05, "loss": 0.4781, "step": 11765 }, { "epoch": 0.872395640246163, "grad_norm": 0.39134594798088074, "learning_rate": 8.236059014753688e-05, "loss": 0.5024, "step": 11766 }, { "epoch": 0.8724697857195818, "grad_norm": 0.37226995825767517, "learning_rate": 8.235058764691173e-05, "loss": 0.5053, "step": 11767 }, { "epoch": 0.8725439311930007, "grad_norm": 0.36705976724624634, "learning_rate": 8.234058514628657e-05, "loss": 0.4862, "step": 11768 }, { "epoch": 0.8726180766664196, "grad_norm": 0.37739235162734985, "learning_rate": 8.233058264566142e-05, "loss": 0.5444, "step": 11769 }, { "epoch": 0.8726922221398383, "grad_norm": 0.3633512258529663, "learning_rate": 8.232058014503626e-05, "loss": 0.5167, "step": 11770 }, { "epoch": 0.8727663676132572, "grad_norm": 0.36646103858947754, "learning_rate": 8.231057764441111e-05, "loss": 0.4891, "step": 11771 }, { "epoch": 0.8728405130866761, "grad_norm": 0.34912988543510437, "learning_rate": 8.230057514378596e-05, "loss": 0.4829, "step": 11772 }, { "epoch": 0.8729146585600949, "grad_norm": 0.36436668038368225, "learning_rate": 8.22905726431608e-05, "loss": 0.5012, "step": 11773 }, { "epoch": 0.8729888040335138, "grad_norm": 0.3700326979160309, "learning_rate": 8.228057014253565e-05, "loss": 0.5076, "step": 11774 }, { "epoch": 0.8730629495069326, "grad_norm": 0.37405505776405334, "learning_rate": 8.227056764191048e-05, "loss": 0.541, "step": 11775 }, { "epoch": 0.8731370949803514, "grad_norm": 0.34765201807022095, "learning_rate": 8.226056514128533e-05, "loss": 0.4821, "step": 11776 }, { "epoch": 0.8732112404537703, "grad_norm": 0.3481632173061371, "learning_rate": 8.225056264066017e-05, "loss": 0.4768, "step": 11777 }, { "epoch": 0.8732853859271892, "grad_norm": 0.3510053753852844, "learning_rate": 8.224056014003502e-05, "loss": 0.4884, "step": 11778 }, { "epoch": 0.873359531400608, "grad_norm": 0.36411476135253906, "learning_rate": 8.223055763940986e-05, "loss": 0.4823, "step": 11779 }, { "epoch": 0.8734336768740268, "grad_norm": 0.33079659938812256, "learning_rate": 8.222055513878471e-05, "loss": 0.4727, "step": 11780 }, { "epoch": 0.8735078223474457, "grad_norm": 0.3927633464336395, "learning_rate": 8.221055263815955e-05, "loss": 0.5318, "step": 11781 }, { "epoch": 0.8735819678208645, "grad_norm": 0.37990570068359375, "learning_rate": 8.220055013753438e-05, "loss": 0.5001, "step": 11782 }, { "epoch": 0.8736561132942834, "grad_norm": 0.36052271723747253, "learning_rate": 8.219054763690923e-05, "loss": 0.461, "step": 11783 }, { "epoch": 0.8737302587677023, "grad_norm": 0.37225544452667236, "learning_rate": 8.218054513628407e-05, "loss": 0.4982, "step": 11784 }, { "epoch": 0.873804404241121, "grad_norm": 0.36279842257499695, "learning_rate": 8.217054263565892e-05, "loss": 0.4943, "step": 11785 }, { "epoch": 0.8738785497145399, "grad_norm": 0.35148584842681885, "learning_rate": 8.216054013503376e-05, "loss": 0.505, "step": 11786 }, { "epoch": 0.8739526951879588, "grad_norm": 0.36948710680007935, "learning_rate": 8.215053763440861e-05, "loss": 0.4989, "step": 11787 }, { "epoch": 0.8740268406613776, "grad_norm": 0.3462343215942383, "learning_rate": 8.214053513378345e-05, "loss": 0.4744, "step": 11788 }, { "epoch": 0.8741009861347965, "grad_norm": 0.3554348647594452, "learning_rate": 8.21305326331583e-05, "loss": 0.4938, "step": 11789 }, { "epoch": 0.8741751316082154, "grad_norm": 0.41878366470336914, "learning_rate": 8.212053013253313e-05, "loss": 0.5398, "step": 11790 }, { "epoch": 0.8742492770816341, "grad_norm": 0.3453024923801422, "learning_rate": 8.211052763190798e-05, "loss": 0.4789, "step": 11791 }, { "epoch": 0.874323422555053, "grad_norm": 0.3582899570465088, "learning_rate": 8.210052513128282e-05, "loss": 0.5039, "step": 11792 }, { "epoch": 0.8743975680284719, "grad_norm": 0.3557051420211792, "learning_rate": 8.209052263065767e-05, "loss": 0.4806, "step": 11793 }, { "epoch": 0.8744717135018907, "grad_norm": 0.3456798493862152, "learning_rate": 8.208052013003251e-05, "loss": 0.4759, "step": 11794 }, { "epoch": 0.8745458589753096, "grad_norm": 0.36019790172576904, "learning_rate": 8.207051762940735e-05, "loss": 0.482, "step": 11795 }, { "epoch": 0.8746200044487284, "grad_norm": 0.36495572328567505, "learning_rate": 8.20605151287822e-05, "loss": 0.5095, "step": 11796 }, { "epoch": 0.8746941499221472, "grad_norm": 0.3726238012313843, "learning_rate": 8.205051262815703e-05, "loss": 0.4992, "step": 11797 }, { "epoch": 0.8747682953955661, "grad_norm": 0.35442671179771423, "learning_rate": 8.204051012753188e-05, "loss": 0.475, "step": 11798 }, { "epoch": 0.874842440868985, "grad_norm": 0.36886656284332275, "learning_rate": 8.203050762690672e-05, "loss": 0.513, "step": 11799 }, { "epoch": 0.8749165863424038, "grad_norm": 0.3482615053653717, "learning_rate": 8.202050512628157e-05, "loss": 0.4638, "step": 11800 }, { "epoch": 0.8749907318158227, "grad_norm": 0.37307003140449524, "learning_rate": 8.201050262565641e-05, "loss": 0.4849, "step": 11801 }, { "epoch": 0.8750648772892415, "grad_norm": 0.36895012855529785, "learning_rate": 8.200050012503126e-05, "loss": 0.4903, "step": 11802 }, { "epoch": 0.8751390227626603, "grad_norm": 0.3543134331703186, "learning_rate": 8.19904976244061e-05, "loss": 0.488, "step": 11803 }, { "epoch": 0.8752131682360792, "grad_norm": 0.351529061794281, "learning_rate": 8.198049512378095e-05, "loss": 0.4682, "step": 11804 }, { "epoch": 0.8752873137094981, "grad_norm": 0.3562440872192383, "learning_rate": 8.19704926231558e-05, "loss": 0.4663, "step": 11805 }, { "epoch": 0.8753614591829169, "grad_norm": 0.34652361273765564, "learning_rate": 8.196049012253064e-05, "loss": 0.4844, "step": 11806 }, { "epoch": 0.8754356046563357, "grad_norm": 0.3401212692260742, "learning_rate": 8.195048762190549e-05, "loss": 0.4607, "step": 11807 }, { "epoch": 0.8755097501297546, "grad_norm": 0.35283952951431274, "learning_rate": 8.194048512128032e-05, "loss": 0.5038, "step": 11808 }, { "epoch": 0.8755838956031734, "grad_norm": 0.3314790427684784, "learning_rate": 8.193048262065517e-05, "loss": 0.4545, "step": 11809 }, { "epoch": 0.8756580410765923, "grad_norm": 0.34218645095825195, "learning_rate": 8.192048012003001e-05, "loss": 0.495, "step": 11810 }, { "epoch": 0.8757321865500112, "grad_norm": 0.3524606227874756, "learning_rate": 8.191047761940486e-05, "loss": 0.4814, "step": 11811 }, { "epoch": 0.87580633202343, "grad_norm": 0.3542008101940155, "learning_rate": 8.19004751187797e-05, "loss": 0.5005, "step": 11812 }, { "epoch": 0.8758804774968488, "grad_norm": 0.36023351550102234, "learning_rate": 8.189047261815455e-05, "loss": 0.5021, "step": 11813 }, { "epoch": 0.8759546229702677, "grad_norm": 0.36129215359687805, "learning_rate": 8.188047011752939e-05, "loss": 0.4643, "step": 11814 }, { "epoch": 0.8760287684436865, "grad_norm": 0.39615902304649353, "learning_rate": 8.187046761690424e-05, "loss": 0.563, "step": 11815 }, { "epoch": 0.8761029139171054, "grad_norm": 0.3475479781627655, "learning_rate": 8.186046511627907e-05, "loss": 0.5085, "step": 11816 }, { "epoch": 0.8761770593905243, "grad_norm": 0.3609868586063385, "learning_rate": 8.185046261565393e-05, "loss": 0.4996, "step": 11817 }, { "epoch": 0.876251204863943, "grad_norm": 0.35663339495658875, "learning_rate": 8.184046011502876e-05, "loss": 0.4943, "step": 11818 }, { "epoch": 0.8763253503373619, "grad_norm": 0.34060871601104736, "learning_rate": 8.18304576144036e-05, "loss": 0.4627, "step": 11819 }, { "epoch": 0.8763994958107808, "grad_norm": 0.3721545338630676, "learning_rate": 8.182045511377845e-05, "loss": 0.5214, "step": 11820 }, { "epoch": 0.8764736412841996, "grad_norm": 0.34665343165397644, "learning_rate": 8.181045261315329e-05, "loss": 0.4903, "step": 11821 }, { "epoch": 0.8765477867576185, "grad_norm": 0.34381017088890076, "learning_rate": 8.180045011252814e-05, "loss": 0.4819, "step": 11822 }, { "epoch": 0.8766219322310373, "grad_norm": 0.35737770795822144, "learning_rate": 8.179044761190297e-05, "loss": 0.4897, "step": 11823 }, { "epoch": 0.8766960777044561, "grad_norm": 0.3472457230091095, "learning_rate": 8.178044511127783e-05, "loss": 0.464, "step": 11824 }, { "epoch": 0.876770223177875, "grad_norm": 0.343909353017807, "learning_rate": 8.177044261065266e-05, "loss": 0.4814, "step": 11825 }, { "epoch": 0.8768443686512939, "grad_norm": 0.34717413783073425, "learning_rate": 8.176044011002751e-05, "loss": 0.4507, "step": 11826 }, { "epoch": 0.8769185141247127, "grad_norm": 0.3474622368812561, "learning_rate": 8.175043760940235e-05, "loss": 0.4715, "step": 11827 }, { "epoch": 0.8769926595981316, "grad_norm": 0.3496578633785248, "learning_rate": 8.17404351087772e-05, "loss": 0.4522, "step": 11828 }, { "epoch": 0.8770668050715504, "grad_norm": 0.37351280450820923, "learning_rate": 8.173043260815204e-05, "loss": 0.4698, "step": 11829 }, { "epoch": 0.8771409505449692, "grad_norm": 0.36267781257629395, "learning_rate": 8.172043010752689e-05, "loss": 0.5009, "step": 11830 }, { "epoch": 0.8772150960183881, "grad_norm": 0.35285523533821106, "learning_rate": 8.171042760690173e-05, "loss": 0.4729, "step": 11831 }, { "epoch": 0.8772892414918069, "grad_norm": 0.36157482862472534, "learning_rate": 8.170042510627656e-05, "loss": 0.5007, "step": 11832 }, { "epoch": 0.8773633869652258, "grad_norm": 0.37940463423728943, "learning_rate": 8.169042260565141e-05, "loss": 0.516, "step": 11833 }, { "epoch": 0.8774375324386446, "grad_norm": 0.3479544222354889, "learning_rate": 8.168042010502625e-05, "loss": 0.48, "step": 11834 }, { "epoch": 0.8775116779120634, "grad_norm": 0.3608179986476898, "learning_rate": 8.16704176044011e-05, "loss": 0.4736, "step": 11835 }, { "epoch": 0.8775858233854823, "grad_norm": 0.3793008625507355, "learning_rate": 8.166041510377594e-05, "loss": 0.5129, "step": 11836 }, { "epoch": 0.8776599688589012, "grad_norm": 0.38155755400657654, "learning_rate": 8.165041260315079e-05, "loss": 0.4836, "step": 11837 }, { "epoch": 0.87773411433232, "grad_norm": 0.36121928691864014, "learning_rate": 8.164041010252564e-05, "loss": 0.5015, "step": 11838 }, { "epoch": 0.8778082598057388, "grad_norm": 0.3561004102230072, "learning_rate": 8.163040760190048e-05, "loss": 0.4711, "step": 11839 }, { "epoch": 0.8778824052791577, "grad_norm": 0.3751841187477112, "learning_rate": 8.162040510127533e-05, "loss": 0.5156, "step": 11840 }, { "epoch": 0.8779565507525765, "grad_norm": 0.39267638325691223, "learning_rate": 8.161040260065016e-05, "loss": 0.5114, "step": 11841 }, { "epoch": 0.8780306962259954, "grad_norm": 0.37628597021102905, "learning_rate": 8.160040010002502e-05, "loss": 0.4873, "step": 11842 }, { "epoch": 0.8781048416994143, "grad_norm": 0.3775082230567932, "learning_rate": 8.159039759939987e-05, "loss": 0.4859, "step": 11843 }, { "epoch": 0.878178987172833, "grad_norm": 0.35037657618522644, "learning_rate": 8.15803950987747e-05, "loss": 0.5015, "step": 11844 }, { "epoch": 0.8782531326462519, "grad_norm": 0.3733867406845093, "learning_rate": 8.157039259814954e-05, "loss": 0.5017, "step": 11845 }, { "epoch": 0.8783272781196708, "grad_norm": 0.4021492600440979, "learning_rate": 8.156039009752439e-05, "loss": 0.5525, "step": 11846 }, { "epoch": 0.8784014235930896, "grad_norm": 0.3435080349445343, "learning_rate": 8.155038759689923e-05, "loss": 0.4628, "step": 11847 }, { "epoch": 0.8784755690665085, "grad_norm": 0.3562690019607544, "learning_rate": 8.154038509627408e-05, "loss": 0.4746, "step": 11848 }, { "epoch": 0.8785497145399274, "grad_norm": 0.357272207736969, "learning_rate": 8.153038259564892e-05, "loss": 0.4823, "step": 11849 }, { "epoch": 0.8786238600133461, "grad_norm": 0.34860995411872864, "learning_rate": 8.152038009502377e-05, "loss": 0.4669, "step": 11850 }, { "epoch": 0.878698005486765, "grad_norm": 0.3930502235889435, "learning_rate": 8.15103775943986e-05, "loss": 0.5097, "step": 11851 }, { "epoch": 0.8787721509601839, "grad_norm": 0.34864023327827454, "learning_rate": 8.150037509377345e-05, "loss": 0.476, "step": 11852 }, { "epoch": 0.8788462964336027, "grad_norm": 0.3597799241542816, "learning_rate": 8.149037259314829e-05, "loss": 0.4874, "step": 11853 }, { "epoch": 0.8789204419070216, "grad_norm": 0.3777303993701935, "learning_rate": 8.148037009252314e-05, "loss": 0.5312, "step": 11854 }, { "epoch": 0.8789945873804405, "grad_norm": 0.36807724833488464, "learning_rate": 8.147036759189798e-05, "loss": 0.4611, "step": 11855 }, { "epoch": 0.8790687328538592, "grad_norm": 0.3712652325630188, "learning_rate": 8.146036509127282e-05, "loss": 0.4899, "step": 11856 }, { "epoch": 0.8791428783272781, "grad_norm": 0.3569472134113312, "learning_rate": 8.145036259064767e-05, "loss": 0.512, "step": 11857 }, { "epoch": 0.879217023800697, "grad_norm": 0.35730817914009094, "learning_rate": 8.14403600900225e-05, "loss": 0.4765, "step": 11858 }, { "epoch": 0.8792911692741158, "grad_norm": 0.3718297481536865, "learning_rate": 8.143035758939735e-05, "loss": 0.5, "step": 11859 }, { "epoch": 0.8793653147475347, "grad_norm": 0.37118563055992126, "learning_rate": 8.142035508877219e-05, "loss": 0.5051, "step": 11860 }, { "epoch": 0.8794394602209535, "grad_norm": 0.3759261667728424, "learning_rate": 8.141035258814704e-05, "loss": 0.5387, "step": 11861 }, { "epoch": 0.8795136056943723, "grad_norm": 0.3429732322692871, "learning_rate": 8.140035008752188e-05, "loss": 0.456, "step": 11862 }, { "epoch": 0.8795877511677912, "grad_norm": 0.3453664481639862, "learning_rate": 8.139034758689673e-05, "loss": 0.4435, "step": 11863 }, { "epoch": 0.8796618966412101, "grad_norm": 0.37154558300971985, "learning_rate": 8.138034508627157e-05, "loss": 0.532, "step": 11864 }, { "epoch": 0.8797360421146289, "grad_norm": 0.3832162320613861, "learning_rate": 8.137034258564642e-05, "loss": 0.5062, "step": 11865 }, { "epoch": 0.8798101875880477, "grad_norm": 0.37078917026519775, "learning_rate": 8.136034008502125e-05, "loss": 0.4899, "step": 11866 }, { "epoch": 0.8798843330614666, "grad_norm": 0.37907472252845764, "learning_rate": 8.13503375843961e-05, "loss": 0.5047, "step": 11867 }, { "epoch": 0.8799584785348854, "grad_norm": 0.3553737998008728, "learning_rate": 8.134033508377094e-05, "loss": 0.494, "step": 11868 }, { "epoch": 0.8800326240083043, "grad_norm": 0.37707534432411194, "learning_rate": 8.133033258314578e-05, "loss": 0.5556, "step": 11869 }, { "epoch": 0.8801067694817232, "grad_norm": 0.38112059235572815, "learning_rate": 8.132033008252063e-05, "loss": 0.5067, "step": 11870 }, { "epoch": 0.880180914955142, "grad_norm": 0.35457760095596313, "learning_rate": 8.131032758189547e-05, "loss": 0.479, "step": 11871 }, { "epoch": 0.8802550604285608, "grad_norm": 0.36074209213256836, "learning_rate": 8.130032508127032e-05, "loss": 0.5415, "step": 11872 }, { "epoch": 0.8803292059019797, "grad_norm": 0.3559904396533966, "learning_rate": 8.129032258064517e-05, "loss": 0.4771, "step": 11873 }, { "epoch": 0.8804033513753985, "grad_norm": 0.36444729566574097, "learning_rate": 8.128032008002e-05, "loss": 0.4965, "step": 11874 }, { "epoch": 0.8804774968488174, "grad_norm": 0.3396749198436737, "learning_rate": 8.127031757939486e-05, "loss": 0.4331, "step": 11875 }, { "epoch": 0.8805516423222363, "grad_norm": 0.3502194583415985, "learning_rate": 8.12603150787697e-05, "loss": 0.4885, "step": 11876 }, { "epoch": 0.880625787795655, "grad_norm": 0.3621748983860016, "learning_rate": 8.125031257814454e-05, "loss": 0.4938, "step": 11877 }, { "epoch": 0.8806999332690739, "grad_norm": 0.3515734076499939, "learning_rate": 8.12403100775194e-05, "loss": 0.4738, "step": 11878 }, { "epoch": 0.8807740787424928, "grad_norm": 0.35549503564834595, "learning_rate": 8.123030757689423e-05, "loss": 0.4837, "step": 11879 }, { "epoch": 0.8808482242159116, "grad_norm": 0.3330535292625427, "learning_rate": 8.122030507626908e-05, "loss": 0.4759, "step": 11880 }, { "epoch": 0.8809223696893305, "grad_norm": 0.3974197804927826, "learning_rate": 8.121030257564392e-05, "loss": 0.5205, "step": 11881 }, { "epoch": 0.8809965151627493, "grad_norm": 0.34464049339294434, "learning_rate": 8.120030007501876e-05, "loss": 0.4847, "step": 11882 }, { "epoch": 0.8810706606361681, "grad_norm": 0.3320695161819458, "learning_rate": 8.119029757439361e-05, "loss": 0.4485, "step": 11883 }, { "epoch": 0.881144806109587, "grad_norm": 0.3616564869880676, "learning_rate": 8.118029507376844e-05, "loss": 0.5101, "step": 11884 }, { "epoch": 0.8812189515830059, "grad_norm": 0.3998618423938751, "learning_rate": 8.11702925731433e-05, "loss": 0.5208, "step": 11885 }, { "epoch": 0.8812930970564247, "grad_norm": 0.33804851770401, "learning_rate": 8.116029007251813e-05, "loss": 0.4676, "step": 11886 }, { "epoch": 0.8813672425298436, "grad_norm": 0.3475343585014343, "learning_rate": 8.115028757189298e-05, "loss": 0.4819, "step": 11887 }, { "epoch": 0.8814413880032624, "grad_norm": 0.3661973476409912, "learning_rate": 8.114028507126782e-05, "loss": 0.5254, "step": 11888 }, { "epoch": 0.8815155334766812, "grad_norm": 0.34863850474357605, "learning_rate": 8.113028257064267e-05, "loss": 0.5061, "step": 11889 }, { "epoch": 0.8815896789501001, "grad_norm": 0.35819682478904724, "learning_rate": 8.112028007001751e-05, "loss": 0.481, "step": 11890 }, { "epoch": 0.881663824423519, "grad_norm": 0.3446599543094635, "learning_rate": 8.111027756939236e-05, "loss": 0.456, "step": 11891 }, { "epoch": 0.8817379698969378, "grad_norm": 0.3951475918292999, "learning_rate": 8.11002750687672e-05, "loss": 0.5389, "step": 11892 }, { "epoch": 0.8818121153703566, "grad_norm": 0.3684679865837097, "learning_rate": 8.109027256814203e-05, "loss": 0.5222, "step": 11893 }, { "epoch": 0.8818862608437755, "grad_norm": 0.36895447969436646, "learning_rate": 8.108027006751688e-05, "loss": 0.491, "step": 11894 }, { "epoch": 0.8819604063171943, "grad_norm": 0.36355850100517273, "learning_rate": 8.107026756689172e-05, "loss": 0.4876, "step": 11895 }, { "epoch": 0.8820345517906132, "grad_norm": 0.3442832827568054, "learning_rate": 8.106026506626657e-05, "loss": 0.4981, "step": 11896 }, { "epoch": 0.8821086972640321, "grad_norm": 0.3612908720970154, "learning_rate": 8.105026256564141e-05, "loss": 0.5015, "step": 11897 }, { "epoch": 0.8821828427374508, "grad_norm": 0.36678990721702576, "learning_rate": 8.104026006501626e-05, "loss": 0.5143, "step": 11898 }, { "epoch": 0.8822569882108697, "grad_norm": 0.3751452565193176, "learning_rate": 8.10302575643911e-05, "loss": 0.4951, "step": 11899 }, { "epoch": 0.8823311336842886, "grad_norm": 0.3550160527229309, "learning_rate": 8.102025506376595e-05, "loss": 0.5062, "step": 11900 }, { "epoch": 0.8824052791577074, "grad_norm": 0.3912220597267151, "learning_rate": 8.101025256314078e-05, "loss": 0.4775, "step": 11901 }, { "epoch": 0.8824794246311263, "grad_norm": 0.38802242279052734, "learning_rate": 8.100025006251563e-05, "loss": 0.5595, "step": 11902 }, { "epoch": 0.8825535701045452, "grad_norm": 0.34308791160583496, "learning_rate": 8.099024756189047e-05, "loss": 0.4623, "step": 11903 }, { "epoch": 0.8826277155779639, "grad_norm": 0.3508852422237396, "learning_rate": 8.098024506126532e-05, "loss": 0.4527, "step": 11904 }, { "epoch": 0.8827018610513828, "grad_norm": 0.36303579807281494, "learning_rate": 8.097024256064016e-05, "loss": 0.5028, "step": 11905 }, { "epoch": 0.8827760065248017, "grad_norm": 0.3733154237270355, "learning_rate": 8.096024006001501e-05, "loss": 0.4734, "step": 11906 }, { "epoch": 0.8828501519982205, "grad_norm": 0.38788363337516785, "learning_rate": 8.095023755938985e-05, "loss": 0.5577, "step": 11907 }, { "epoch": 0.8829242974716394, "grad_norm": 0.35250332951545715, "learning_rate": 8.09402350587647e-05, "loss": 0.4802, "step": 11908 }, { "epoch": 0.8829984429450582, "grad_norm": 0.37188056111335754, "learning_rate": 8.093023255813953e-05, "loss": 0.5066, "step": 11909 }, { "epoch": 0.883072588418477, "grad_norm": 0.3714388310909271, "learning_rate": 8.092023005751438e-05, "loss": 0.4951, "step": 11910 }, { "epoch": 0.8831467338918959, "grad_norm": 0.40681594610214233, "learning_rate": 8.091022755688924e-05, "loss": 0.5479, "step": 11911 }, { "epoch": 0.8832208793653148, "grad_norm": 0.3620094358921051, "learning_rate": 8.090022505626407e-05, "loss": 0.4794, "step": 11912 }, { "epoch": 0.8832950248387336, "grad_norm": 0.3663245737552643, "learning_rate": 8.089022255563892e-05, "loss": 0.5099, "step": 11913 }, { "epoch": 0.8833691703121525, "grad_norm": 0.38782915472984314, "learning_rate": 8.088022005501376e-05, "loss": 0.5207, "step": 11914 }, { "epoch": 0.8834433157855713, "grad_norm": 0.35163235664367676, "learning_rate": 8.087021755438861e-05, "loss": 0.5003, "step": 11915 }, { "epoch": 0.8835174612589901, "grad_norm": 0.3642041087150574, "learning_rate": 8.086021505376345e-05, "loss": 0.5494, "step": 11916 }, { "epoch": 0.883591606732409, "grad_norm": 0.3471689522266388, "learning_rate": 8.08502125531383e-05, "loss": 0.4647, "step": 11917 }, { "epoch": 0.8836657522058279, "grad_norm": 0.36203518509864807, "learning_rate": 8.084021005251314e-05, "loss": 0.4754, "step": 11918 }, { "epoch": 0.8837398976792467, "grad_norm": 0.3915911614894867, "learning_rate": 8.083020755188797e-05, "loss": 0.4972, "step": 11919 }, { "epoch": 0.8838140431526655, "grad_norm": 0.3879670798778534, "learning_rate": 8.082020505126282e-05, "loss": 0.5659, "step": 11920 }, { "epoch": 0.8838881886260844, "grad_norm": 0.3590998351573944, "learning_rate": 8.081020255063766e-05, "loss": 0.4777, "step": 11921 }, { "epoch": 0.8839623340995032, "grad_norm": 0.3482339084148407, "learning_rate": 8.080020005001251e-05, "loss": 0.4578, "step": 11922 }, { "epoch": 0.8840364795729221, "grad_norm": 0.36059775948524475, "learning_rate": 8.079019754938735e-05, "loss": 0.4309, "step": 11923 }, { "epoch": 0.884110625046341, "grad_norm": 0.36405688524246216, "learning_rate": 8.07801950487622e-05, "loss": 0.5221, "step": 11924 }, { "epoch": 0.8841847705197597, "grad_norm": 0.3493637442588806, "learning_rate": 8.077019254813704e-05, "loss": 0.4917, "step": 11925 }, { "epoch": 0.8842589159931786, "grad_norm": 0.3731789290904999, "learning_rate": 8.076019004751189e-05, "loss": 0.5349, "step": 11926 }, { "epoch": 0.8843330614665975, "grad_norm": 0.35202720761299133, "learning_rate": 8.075018754688672e-05, "loss": 0.4739, "step": 11927 }, { "epoch": 0.8844072069400163, "grad_norm": 0.35504254698753357, "learning_rate": 8.074018504626157e-05, "loss": 0.4782, "step": 11928 }, { "epoch": 0.8844813524134352, "grad_norm": 0.36829978227615356, "learning_rate": 8.073018254563641e-05, "loss": 0.5085, "step": 11929 }, { "epoch": 0.8845554978868541, "grad_norm": 0.3511871099472046, "learning_rate": 8.072018004501126e-05, "loss": 0.4951, "step": 11930 }, { "epoch": 0.8846296433602728, "grad_norm": 0.3376803696155548, "learning_rate": 8.07101775443861e-05, "loss": 0.4457, "step": 11931 }, { "epoch": 0.8847037888336917, "grad_norm": 0.32430300116539, "learning_rate": 8.070017504376094e-05, "loss": 0.463, "step": 11932 }, { "epoch": 0.8847779343071106, "grad_norm": 0.3593340218067169, "learning_rate": 8.069017254313579e-05, "loss": 0.481, "step": 11933 }, { "epoch": 0.8848520797805294, "grad_norm": 0.3635845482349396, "learning_rate": 8.068017004251062e-05, "loss": 0.4955, "step": 11934 }, { "epoch": 0.8849262252539483, "grad_norm": 0.35189342498779297, "learning_rate": 8.067016754188547e-05, "loss": 0.486, "step": 11935 }, { "epoch": 0.8850003707273671, "grad_norm": 0.39595067501068115, "learning_rate": 8.066016504126031e-05, "loss": 0.5494, "step": 11936 }, { "epoch": 0.8850745162007859, "grad_norm": 0.3605855107307434, "learning_rate": 8.065016254063516e-05, "loss": 0.4713, "step": 11937 }, { "epoch": 0.8851486616742048, "grad_norm": 0.34426555037498474, "learning_rate": 8.064016004001e-05, "loss": 0.455, "step": 11938 }, { "epoch": 0.8852228071476237, "grad_norm": 0.3860090672969818, "learning_rate": 8.063015753938485e-05, "loss": 0.5372, "step": 11939 }, { "epoch": 0.8852969526210425, "grad_norm": 0.3262347877025604, "learning_rate": 8.062015503875969e-05, "loss": 0.4258, "step": 11940 }, { "epoch": 0.8853710980944614, "grad_norm": 0.3689984977245331, "learning_rate": 8.061015253813454e-05, "loss": 0.4391, "step": 11941 }, { "epoch": 0.8854452435678802, "grad_norm": 0.37236031889915466, "learning_rate": 8.060015003750937e-05, "loss": 0.5065, "step": 11942 }, { "epoch": 0.885519389041299, "grad_norm": 0.3647071421146393, "learning_rate": 8.059014753688423e-05, "loss": 0.4507, "step": 11943 }, { "epoch": 0.8855935345147179, "grad_norm": 0.3476943373680115, "learning_rate": 8.058014503625908e-05, "loss": 0.4812, "step": 11944 }, { "epoch": 0.8856676799881367, "grad_norm": 0.3470821976661682, "learning_rate": 8.057014253563391e-05, "loss": 0.483, "step": 11945 }, { "epoch": 0.8857418254615556, "grad_norm": 0.3353639543056488, "learning_rate": 8.056014003500876e-05, "loss": 0.445, "step": 11946 }, { "epoch": 0.8858159709349744, "grad_norm": 0.34729310870170593, "learning_rate": 8.05501375343836e-05, "loss": 0.4735, "step": 11947 }, { "epoch": 0.8858901164083932, "grad_norm": 0.348086953163147, "learning_rate": 8.054013503375845e-05, "loss": 0.4707, "step": 11948 }, { "epoch": 0.8859642618818121, "grad_norm": 0.370673805475235, "learning_rate": 8.053013253313329e-05, "loss": 0.5172, "step": 11949 }, { "epoch": 0.886038407355231, "grad_norm": 0.35706621408462524, "learning_rate": 8.052013003250814e-05, "loss": 0.4882, "step": 11950 }, { "epoch": 0.8861125528286498, "grad_norm": 0.3449532985687256, "learning_rate": 8.051012753188298e-05, "loss": 0.5245, "step": 11951 }, { "epoch": 0.8861866983020686, "grad_norm": 0.35540837049484253, "learning_rate": 8.050012503125783e-05, "loss": 0.5186, "step": 11952 }, { "epoch": 0.8862608437754875, "grad_norm": 0.3691602945327759, "learning_rate": 8.049012253063266e-05, "loss": 0.4823, "step": 11953 }, { "epoch": 0.8863349892489063, "grad_norm": 0.3481806516647339, "learning_rate": 8.048012003000751e-05, "loss": 0.4489, "step": 11954 }, { "epoch": 0.8864091347223252, "grad_norm": 0.3498668968677521, "learning_rate": 8.047011752938235e-05, "loss": 0.4802, "step": 11955 }, { "epoch": 0.8864832801957441, "grad_norm": 0.3824887275695801, "learning_rate": 8.046011502875719e-05, "loss": 0.5193, "step": 11956 }, { "epoch": 0.8865574256691628, "grad_norm": 0.3351771831512451, "learning_rate": 8.045011252813204e-05, "loss": 0.4789, "step": 11957 }, { "epoch": 0.8866315711425817, "grad_norm": 0.3720507025718689, "learning_rate": 8.044011002750688e-05, "loss": 0.506, "step": 11958 }, { "epoch": 0.8867057166160006, "grad_norm": 0.38348639011383057, "learning_rate": 8.043010752688173e-05, "loss": 0.5466, "step": 11959 }, { "epoch": 0.8867798620894194, "grad_norm": 0.3686926066875458, "learning_rate": 8.042010502625656e-05, "loss": 0.5115, "step": 11960 }, { "epoch": 0.8868540075628383, "grad_norm": 0.37082403898239136, "learning_rate": 8.041010252563141e-05, "loss": 0.5256, "step": 11961 }, { "epoch": 0.8869281530362572, "grad_norm": 0.3310479521751404, "learning_rate": 8.040010002500625e-05, "loss": 0.4586, "step": 11962 }, { "epoch": 0.8870022985096759, "grad_norm": 0.3441735804080963, "learning_rate": 8.03900975243811e-05, "loss": 0.4728, "step": 11963 }, { "epoch": 0.8870764439830948, "grad_norm": 0.36083725094795227, "learning_rate": 8.038009502375594e-05, "loss": 0.4867, "step": 11964 }, { "epoch": 0.8871505894565137, "grad_norm": 0.3707008957862854, "learning_rate": 8.037009252313079e-05, "loss": 0.5128, "step": 11965 }, { "epoch": 0.8872247349299325, "grad_norm": 0.3702291250228882, "learning_rate": 8.036009002250563e-05, "loss": 0.5038, "step": 11966 }, { "epoch": 0.8872988804033514, "grad_norm": 0.3524163067340851, "learning_rate": 8.035008752188048e-05, "loss": 0.4435, "step": 11967 }, { "epoch": 0.8873730258767702, "grad_norm": 0.35368362069129944, "learning_rate": 8.034008502125532e-05, "loss": 0.5037, "step": 11968 }, { "epoch": 0.887447171350189, "grad_norm": 0.3444759249687195, "learning_rate": 8.033008252063015e-05, "loss": 0.4772, "step": 11969 }, { "epoch": 0.8875213168236079, "grad_norm": 0.34025439620018005, "learning_rate": 8.0320080020005e-05, "loss": 0.4458, "step": 11970 }, { "epoch": 0.8875954622970268, "grad_norm": 0.36749210953712463, "learning_rate": 8.031007751937984e-05, "loss": 0.5133, "step": 11971 }, { "epoch": 0.8876696077704456, "grad_norm": 0.3648946285247803, "learning_rate": 8.030007501875469e-05, "loss": 0.533, "step": 11972 }, { "epoch": 0.8877437532438645, "grad_norm": 0.363474577665329, "learning_rate": 8.029007251812953e-05, "loss": 0.4609, "step": 11973 }, { "epoch": 0.8878178987172833, "grad_norm": 0.3547273874282837, "learning_rate": 8.028007001750438e-05, "loss": 0.4781, "step": 11974 }, { "epoch": 0.8878920441907021, "grad_norm": 0.3781885504722595, "learning_rate": 8.027006751687922e-05, "loss": 0.5128, "step": 11975 }, { "epoch": 0.887966189664121, "grad_norm": 0.35456499457359314, "learning_rate": 8.026006501625407e-05, "loss": 0.4603, "step": 11976 }, { "epoch": 0.8880403351375399, "grad_norm": 0.34991133213043213, "learning_rate": 8.02500625156289e-05, "loss": 0.4783, "step": 11977 }, { "epoch": 0.8881144806109587, "grad_norm": 0.34334224462509155, "learning_rate": 8.024006001500375e-05, "loss": 0.4746, "step": 11978 }, { "epoch": 0.8881886260843775, "grad_norm": 0.3724707067012787, "learning_rate": 8.02300575143786e-05, "loss": 0.4969, "step": 11979 }, { "epoch": 0.8882627715577964, "grad_norm": 0.3617823123931885, "learning_rate": 8.022005501375344e-05, "loss": 0.4882, "step": 11980 }, { "epoch": 0.8883369170312152, "grad_norm": 0.3685113787651062, "learning_rate": 8.021005251312829e-05, "loss": 0.5011, "step": 11981 }, { "epoch": 0.8884110625046341, "grad_norm": 0.38458365201950073, "learning_rate": 8.020005001250313e-05, "loss": 0.5039, "step": 11982 }, { "epoch": 0.888485207978053, "grad_norm": 0.36196020245552063, "learning_rate": 8.019004751187798e-05, "loss": 0.5114, "step": 11983 }, { "epoch": 0.8885593534514717, "grad_norm": 0.36942145228385925, "learning_rate": 8.018004501125282e-05, "loss": 0.4806, "step": 11984 }, { "epoch": 0.8886334989248906, "grad_norm": 0.34363728761672974, "learning_rate": 8.017004251062767e-05, "loss": 0.4711, "step": 11985 }, { "epoch": 0.8887076443983095, "grad_norm": 0.36494800448417664, "learning_rate": 8.01600400100025e-05, "loss": 0.4777, "step": 11986 }, { "epoch": 0.8887817898717283, "grad_norm": 0.36174166202545166, "learning_rate": 8.015003750937736e-05, "loss": 0.4971, "step": 11987 }, { "epoch": 0.8888559353451472, "grad_norm": 0.3632010221481323, "learning_rate": 8.014003500875219e-05, "loss": 0.5072, "step": 11988 }, { "epoch": 0.8889300808185661, "grad_norm": 0.3380146324634552, "learning_rate": 8.013003250812704e-05, "loss": 0.4872, "step": 11989 }, { "epoch": 0.8890042262919848, "grad_norm": 0.3492533564567566, "learning_rate": 8.012003000750188e-05, "loss": 0.4806, "step": 11990 }, { "epoch": 0.8890783717654037, "grad_norm": 0.32812032103538513, "learning_rate": 8.011002750687673e-05, "loss": 0.4443, "step": 11991 }, { "epoch": 0.8891525172388226, "grad_norm": 0.3699386715888977, "learning_rate": 8.010002500625157e-05, "loss": 0.5371, "step": 11992 }, { "epoch": 0.8892266627122414, "grad_norm": 0.3491969108581543, "learning_rate": 8.00900225056264e-05, "loss": 0.4588, "step": 11993 }, { "epoch": 0.8893008081856603, "grad_norm": 0.40856489539146423, "learning_rate": 8.008002000500126e-05, "loss": 0.54, "step": 11994 }, { "epoch": 0.8893749536590791, "grad_norm": 0.3335014581680298, "learning_rate": 8.007001750437609e-05, "loss": 0.5054, "step": 11995 }, { "epoch": 0.8894490991324979, "grad_norm": 0.3471134603023529, "learning_rate": 8.006001500375094e-05, "loss": 0.4778, "step": 11996 }, { "epoch": 0.8895232446059168, "grad_norm": 0.36110982298851013, "learning_rate": 8.005001250312578e-05, "loss": 0.4739, "step": 11997 }, { "epoch": 0.8895973900793357, "grad_norm": 0.34261229634284973, "learning_rate": 8.004001000250063e-05, "loss": 0.4995, "step": 11998 }, { "epoch": 0.8896715355527545, "grad_norm": 0.35027310252189636, "learning_rate": 8.003000750187547e-05, "loss": 0.4693, "step": 11999 }, { "epoch": 0.8897456810261734, "grad_norm": 0.3758077323436737, "learning_rate": 8.002000500125032e-05, "loss": 0.5493, "step": 12000 }, { "epoch": 0.8898198264995922, "grad_norm": 0.3867603838443756, "learning_rate": 8.001000250062516e-05, "loss": 0.5709, "step": 12001 }, { "epoch": 0.889893971973011, "grad_norm": 0.3635488450527191, "learning_rate": 8e-05, "loss": 0.4899, "step": 12002 }, { "epoch": 0.8899681174464299, "grad_norm": 0.3341580629348755, "learning_rate": 7.998999749937484e-05, "loss": 0.4339, "step": 12003 }, { "epoch": 0.8900422629198488, "grad_norm": 0.3528949022293091, "learning_rate": 7.99799949987497e-05, "loss": 0.483, "step": 12004 }, { "epoch": 0.8901164083932676, "grad_norm": 0.3435216248035431, "learning_rate": 7.996999249812453e-05, "loss": 0.4563, "step": 12005 }, { "epoch": 0.8901905538666864, "grad_norm": 0.35579922795295715, "learning_rate": 7.995998999749937e-05, "loss": 0.4778, "step": 12006 }, { "epoch": 0.8902646993401053, "grad_norm": 0.3285210132598877, "learning_rate": 7.994998749687422e-05, "loss": 0.4515, "step": 12007 }, { "epoch": 0.8903388448135241, "grad_norm": 0.33363813161849976, "learning_rate": 7.993998499624906e-05, "loss": 0.4364, "step": 12008 }, { "epoch": 0.890412990286943, "grad_norm": 0.3499665856361389, "learning_rate": 7.992998249562391e-05, "loss": 0.5134, "step": 12009 }, { "epoch": 0.8904871357603619, "grad_norm": 0.36432817578315735, "learning_rate": 7.991997999499874e-05, "loss": 0.5079, "step": 12010 }, { "epoch": 0.8905612812337806, "grad_norm": 0.3799093961715698, "learning_rate": 7.99099774943736e-05, "loss": 0.5093, "step": 12011 }, { "epoch": 0.8906354267071995, "grad_norm": 0.3626672923564911, "learning_rate": 7.989997499374845e-05, "loss": 0.5328, "step": 12012 }, { "epoch": 0.8907095721806184, "grad_norm": 0.35331735014915466, "learning_rate": 7.988997249312328e-05, "loss": 0.4575, "step": 12013 }, { "epoch": 0.8907837176540372, "grad_norm": 0.3739887475967407, "learning_rate": 7.987996999249813e-05, "loss": 0.5407, "step": 12014 }, { "epoch": 0.8908578631274561, "grad_norm": 0.37994077801704407, "learning_rate": 7.986996749187297e-05, "loss": 0.5057, "step": 12015 }, { "epoch": 0.890932008600875, "grad_norm": 0.3562171161174774, "learning_rate": 7.985996499124782e-05, "loss": 0.4918, "step": 12016 }, { "epoch": 0.8910061540742937, "grad_norm": 0.3677302598953247, "learning_rate": 7.984996249062266e-05, "loss": 0.466, "step": 12017 }, { "epoch": 0.8910802995477126, "grad_norm": 0.3516072928905487, "learning_rate": 7.983995998999751e-05, "loss": 0.4791, "step": 12018 }, { "epoch": 0.8911544450211315, "grad_norm": 0.3565463125705719, "learning_rate": 7.982995748937235e-05, "loss": 0.5095, "step": 12019 }, { "epoch": 0.8912285904945503, "grad_norm": 0.3848736584186554, "learning_rate": 7.98199549887472e-05, "loss": 0.545, "step": 12020 }, { "epoch": 0.8913027359679692, "grad_norm": 0.3368920683860779, "learning_rate": 7.980995248812203e-05, "loss": 0.4681, "step": 12021 }, { "epoch": 0.891376881441388, "grad_norm": 0.34319862723350525, "learning_rate": 7.979994998749688e-05, "loss": 0.4841, "step": 12022 }, { "epoch": 0.8914510269148068, "grad_norm": 0.353942334651947, "learning_rate": 7.978994748687172e-05, "loss": 0.46, "step": 12023 }, { "epoch": 0.8915251723882257, "grad_norm": 0.3540211319923401, "learning_rate": 7.977994498624657e-05, "loss": 0.4673, "step": 12024 }, { "epoch": 0.8915993178616446, "grad_norm": 0.3488525450229645, "learning_rate": 7.976994248562141e-05, "loss": 0.4822, "step": 12025 }, { "epoch": 0.8916734633350634, "grad_norm": 0.3532106578350067, "learning_rate": 7.975993998499626e-05, "loss": 0.465, "step": 12026 }, { "epoch": 0.8917476088084823, "grad_norm": 0.37726524472236633, "learning_rate": 7.97499374843711e-05, "loss": 0.5408, "step": 12027 }, { "epoch": 0.8918217542819011, "grad_norm": 0.34543943405151367, "learning_rate": 7.973993498374595e-05, "loss": 0.5146, "step": 12028 }, { "epoch": 0.8918958997553199, "grad_norm": 0.38792696595191956, "learning_rate": 7.972993248312078e-05, "loss": 0.4589, "step": 12029 }, { "epoch": 0.8919700452287388, "grad_norm": 0.37046098709106445, "learning_rate": 7.971992998249562e-05, "loss": 0.5763, "step": 12030 }, { "epoch": 0.8920441907021577, "grad_norm": 0.3527106046676636, "learning_rate": 7.970992748187047e-05, "loss": 0.489, "step": 12031 }, { "epoch": 0.8921183361755765, "grad_norm": 0.36776939034461975, "learning_rate": 7.969992498124531e-05, "loss": 0.53, "step": 12032 }, { "epoch": 0.8921924816489953, "grad_norm": 0.35472381114959717, "learning_rate": 7.968992248062016e-05, "loss": 0.5023, "step": 12033 }, { "epoch": 0.8922666271224142, "grad_norm": 0.32134950160980225, "learning_rate": 7.9679919979995e-05, "loss": 0.4721, "step": 12034 }, { "epoch": 0.892340772595833, "grad_norm": 0.37451204657554626, "learning_rate": 7.966991747936985e-05, "loss": 0.4866, "step": 12035 }, { "epoch": 0.8924149180692519, "grad_norm": 0.3504354655742645, "learning_rate": 7.965991497874468e-05, "loss": 0.4645, "step": 12036 }, { "epoch": 0.8924890635426708, "grad_norm": 0.3684691786766052, "learning_rate": 7.964991247811954e-05, "loss": 0.4685, "step": 12037 }, { "epoch": 0.8925632090160895, "grad_norm": 0.36534354090690613, "learning_rate": 7.963990997749437e-05, "loss": 0.4905, "step": 12038 }, { "epoch": 0.8926373544895084, "grad_norm": 0.3514598608016968, "learning_rate": 7.962990747686922e-05, "loss": 0.4731, "step": 12039 }, { "epoch": 0.8927114999629273, "grad_norm": 0.35792839527130127, "learning_rate": 7.961990497624406e-05, "loss": 0.4961, "step": 12040 }, { "epoch": 0.8927856454363461, "grad_norm": 0.35426366329193115, "learning_rate": 7.960990247561891e-05, "loss": 0.4868, "step": 12041 }, { "epoch": 0.892859790909765, "grad_norm": 0.35921233892440796, "learning_rate": 7.959989997499375e-05, "loss": 0.4789, "step": 12042 }, { "epoch": 0.8929339363831839, "grad_norm": 0.3542916476726532, "learning_rate": 7.958989747436858e-05, "loss": 0.4959, "step": 12043 }, { "epoch": 0.8930080818566026, "grad_norm": 0.394242525100708, "learning_rate": 7.957989497374344e-05, "loss": 0.5148, "step": 12044 }, { "epoch": 0.8930822273300215, "grad_norm": 0.374450147151947, "learning_rate": 7.956989247311829e-05, "loss": 0.5499, "step": 12045 }, { "epoch": 0.8931563728034404, "grad_norm": 0.3380838930606842, "learning_rate": 7.955988997249312e-05, "loss": 0.4694, "step": 12046 }, { "epoch": 0.8932305182768592, "grad_norm": 0.3678417205810547, "learning_rate": 7.954988747186797e-05, "loss": 0.5051, "step": 12047 }, { "epoch": 0.8933046637502781, "grad_norm": 0.3791748285293579, "learning_rate": 7.953988497124281e-05, "loss": 0.5391, "step": 12048 }, { "epoch": 0.893378809223697, "grad_norm": 0.3430442810058594, "learning_rate": 7.952988247061766e-05, "loss": 0.4821, "step": 12049 }, { "epoch": 0.8934529546971157, "grad_norm": 0.3390556871891022, "learning_rate": 7.951987996999251e-05, "loss": 0.4732, "step": 12050 }, { "epoch": 0.8935271001705346, "grad_norm": 0.34127897024154663, "learning_rate": 7.950987746936735e-05, "loss": 0.4415, "step": 12051 }, { "epoch": 0.8936012456439535, "grad_norm": 0.3520984351634979, "learning_rate": 7.94998749687422e-05, "loss": 0.4794, "step": 12052 }, { "epoch": 0.8936753911173723, "grad_norm": 0.35790976881980896, "learning_rate": 7.948987246811704e-05, "loss": 0.4595, "step": 12053 }, { "epoch": 0.8937495365907911, "grad_norm": 0.3624449074268341, "learning_rate": 7.947986996749189e-05, "loss": 0.5125, "step": 12054 }, { "epoch": 0.89382368206421, "grad_norm": 0.3849494159221649, "learning_rate": 7.946986746686672e-05, "loss": 0.5375, "step": 12055 }, { "epoch": 0.8938978275376288, "grad_norm": 0.35693359375, "learning_rate": 7.945986496624156e-05, "loss": 0.4927, "step": 12056 }, { "epoch": 0.8939719730110477, "grad_norm": 0.3514622747898102, "learning_rate": 7.944986246561641e-05, "loss": 0.4753, "step": 12057 }, { "epoch": 0.8940461184844666, "grad_norm": 0.35528457164764404, "learning_rate": 7.943985996499125e-05, "loss": 0.4859, "step": 12058 }, { "epoch": 0.8941202639578854, "grad_norm": 0.38883712887763977, "learning_rate": 7.94298574643661e-05, "loss": 0.5221, "step": 12059 }, { "epoch": 0.8941944094313042, "grad_norm": 0.3633393943309784, "learning_rate": 7.941985496374094e-05, "loss": 0.4799, "step": 12060 }, { "epoch": 0.894268554904723, "grad_norm": 0.36181002855300903, "learning_rate": 7.940985246311579e-05, "loss": 0.4754, "step": 12061 }, { "epoch": 0.8943427003781419, "grad_norm": 0.36658722162246704, "learning_rate": 7.939984996249062e-05, "loss": 0.5074, "step": 12062 }, { "epoch": 0.8944168458515608, "grad_norm": 0.3436172604560852, "learning_rate": 7.938984746186548e-05, "loss": 0.4748, "step": 12063 }, { "epoch": 0.8944909913249796, "grad_norm": 0.3657340109348297, "learning_rate": 7.937984496124031e-05, "loss": 0.5175, "step": 12064 }, { "epoch": 0.8945651367983984, "grad_norm": 0.3637843728065491, "learning_rate": 7.936984246061516e-05, "loss": 0.4795, "step": 12065 }, { "epoch": 0.8946392822718173, "grad_norm": 0.3645486533641815, "learning_rate": 7.935983995999e-05, "loss": 0.4759, "step": 12066 }, { "epoch": 0.8947134277452361, "grad_norm": 0.36158910393714905, "learning_rate": 7.934983745936484e-05, "loss": 0.4935, "step": 12067 }, { "epoch": 0.894787573218655, "grad_norm": 0.3914186358451843, "learning_rate": 7.933983495873969e-05, "loss": 0.5059, "step": 12068 }, { "epoch": 0.8948617186920739, "grad_norm": 0.3617817759513855, "learning_rate": 7.932983245811453e-05, "loss": 0.4843, "step": 12069 }, { "epoch": 0.8949358641654926, "grad_norm": 0.3489018380641937, "learning_rate": 7.931982995748938e-05, "loss": 0.4715, "step": 12070 }, { "epoch": 0.8950100096389115, "grad_norm": 0.3624245524406433, "learning_rate": 7.930982745686421e-05, "loss": 0.469, "step": 12071 }, { "epoch": 0.8950841551123304, "grad_norm": 0.38255876302719116, "learning_rate": 7.929982495623906e-05, "loss": 0.472, "step": 12072 }, { "epoch": 0.8951583005857492, "grad_norm": 0.35393255949020386, "learning_rate": 7.92898224556139e-05, "loss": 0.4972, "step": 12073 }, { "epoch": 0.8952324460591681, "grad_norm": 0.3398275375366211, "learning_rate": 7.927981995498875e-05, "loss": 0.4608, "step": 12074 }, { "epoch": 0.895306591532587, "grad_norm": 0.36527758836746216, "learning_rate": 7.926981745436359e-05, "loss": 0.5097, "step": 12075 }, { "epoch": 0.8953807370060057, "grad_norm": 0.38722026348114014, "learning_rate": 7.925981495373844e-05, "loss": 0.5335, "step": 12076 }, { "epoch": 0.8954548824794246, "grad_norm": 0.35345035791397095, "learning_rate": 7.924981245311328e-05, "loss": 0.4834, "step": 12077 }, { "epoch": 0.8955290279528435, "grad_norm": 0.3792721927165985, "learning_rate": 7.923980995248813e-05, "loss": 0.5137, "step": 12078 }, { "epoch": 0.8956031734262623, "grad_norm": 0.3584330976009369, "learning_rate": 7.922980745186296e-05, "loss": 0.4977, "step": 12079 }, { "epoch": 0.8956773188996812, "grad_norm": 0.3782522976398468, "learning_rate": 7.921980495123781e-05, "loss": 0.5281, "step": 12080 }, { "epoch": 0.8957514643731, "grad_norm": 0.3733925223350525, "learning_rate": 7.920980245061265e-05, "loss": 0.4936, "step": 12081 }, { "epoch": 0.8958256098465188, "grad_norm": 0.37671852111816406, "learning_rate": 7.91997999499875e-05, "loss": 0.5194, "step": 12082 }, { "epoch": 0.8958997553199377, "grad_norm": 0.35063982009887695, "learning_rate": 7.918979744936235e-05, "loss": 0.4624, "step": 12083 }, { "epoch": 0.8959739007933566, "grad_norm": 0.33607950806617737, "learning_rate": 7.917979494873719e-05, "loss": 0.4559, "step": 12084 }, { "epoch": 0.8960480462667754, "grad_norm": 0.3696252405643463, "learning_rate": 7.916979244811204e-05, "loss": 0.4848, "step": 12085 }, { "epoch": 0.8961221917401943, "grad_norm": 0.3792378008365631, "learning_rate": 7.915978994748688e-05, "loss": 0.5047, "step": 12086 }, { "epoch": 0.8961963372136131, "grad_norm": 0.371565580368042, "learning_rate": 7.914978744686173e-05, "loss": 0.5077, "step": 12087 }, { "epoch": 0.8962704826870319, "grad_norm": 0.3654515743255615, "learning_rate": 7.913978494623657e-05, "loss": 0.515, "step": 12088 }, { "epoch": 0.8963446281604508, "grad_norm": 0.3626851439476013, "learning_rate": 7.912978244561142e-05, "loss": 0.4755, "step": 12089 }, { "epoch": 0.8964187736338697, "grad_norm": 0.34275591373443604, "learning_rate": 7.911977994498625e-05, "loss": 0.4814, "step": 12090 }, { "epoch": 0.8964929191072885, "grad_norm": 0.34008997678756714, "learning_rate": 7.91097774443611e-05, "loss": 0.4701, "step": 12091 }, { "epoch": 0.8965670645807073, "grad_norm": 0.3654283285140991, "learning_rate": 7.909977494373594e-05, "loss": 0.5289, "step": 12092 }, { "epoch": 0.8966412100541262, "grad_norm": 0.3579677641391754, "learning_rate": 7.908977244311078e-05, "loss": 0.4679, "step": 12093 }, { "epoch": 0.896715355527545, "grad_norm": 0.3937441110610962, "learning_rate": 7.907976994248563e-05, "loss": 0.54, "step": 12094 }, { "epoch": 0.8967895010009639, "grad_norm": 0.36110299825668335, "learning_rate": 7.906976744186047e-05, "loss": 0.512, "step": 12095 }, { "epoch": 0.8968636464743828, "grad_norm": 0.3489172160625458, "learning_rate": 7.905976494123532e-05, "loss": 0.4897, "step": 12096 }, { "epoch": 0.8969377919478015, "grad_norm": 0.3861660659313202, "learning_rate": 7.904976244061015e-05, "loss": 0.5361, "step": 12097 }, { "epoch": 0.8970119374212204, "grad_norm": 0.32787227630615234, "learning_rate": 7.9039759939985e-05, "loss": 0.4626, "step": 12098 }, { "epoch": 0.8970860828946393, "grad_norm": 0.3721844255924225, "learning_rate": 7.902975743935984e-05, "loss": 0.5042, "step": 12099 }, { "epoch": 0.8971602283680581, "grad_norm": 0.3413139283657074, "learning_rate": 7.901975493873469e-05, "loss": 0.4549, "step": 12100 }, { "epoch": 0.897234373841477, "grad_norm": 0.32557904720306396, "learning_rate": 7.900975243810953e-05, "loss": 0.4329, "step": 12101 }, { "epoch": 0.8973085193148959, "grad_norm": 0.38123276829719543, "learning_rate": 7.899974993748438e-05, "loss": 0.5101, "step": 12102 }, { "epoch": 0.8973826647883146, "grad_norm": 0.3479158580303192, "learning_rate": 7.898974743685922e-05, "loss": 0.4711, "step": 12103 }, { "epoch": 0.8974568102617335, "grad_norm": 0.33232131600379944, "learning_rate": 7.897974493623405e-05, "loss": 0.4516, "step": 12104 }, { "epoch": 0.8975309557351524, "grad_norm": 0.366533488035202, "learning_rate": 7.89697424356089e-05, "loss": 0.503, "step": 12105 }, { "epoch": 0.8976051012085712, "grad_norm": 0.3705022633075714, "learning_rate": 7.895973993498374e-05, "loss": 0.4907, "step": 12106 }, { "epoch": 0.8976792466819901, "grad_norm": 0.3588717579841614, "learning_rate": 7.894973743435859e-05, "loss": 0.4967, "step": 12107 }, { "epoch": 0.897753392155409, "grad_norm": 0.3519098460674286, "learning_rate": 7.893973493373343e-05, "loss": 0.4617, "step": 12108 }, { "epoch": 0.8978275376288277, "grad_norm": 0.36569881439208984, "learning_rate": 7.892973243310828e-05, "loss": 0.5263, "step": 12109 }, { "epoch": 0.8979016831022466, "grad_norm": 0.36940547823905945, "learning_rate": 7.891972993248312e-05, "loss": 0.4884, "step": 12110 }, { "epoch": 0.8979758285756655, "grad_norm": 0.3662746548652649, "learning_rate": 7.890972743185797e-05, "loss": 0.5102, "step": 12111 }, { "epoch": 0.8980499740490843, "grad_norm": 0.3335985541343689, "learning_rate": 7.88997249312328e-05, "loss": 0.4552, "step": 12112 }, { "epoch": 0.8981241195225032, "grad_norm": 0.3784137964248657, "learning_rate": 7.888972243060766e-05, "loss": 0.5232, "step": 12113 }, { "epoch": 0.898198264995922, "grad_norm": 0.36521631479263306, "learning_rate": 7.887971992998249e-05, "loss": 0.5112, "step": 12114 }, { "epoch": 0.8982724104693408, "grad_norm": 0.32125377655029297, "learning_rate": 7.886971742935734e-05, "loss": 0.4184, "step": 12115 }, { "epoch": 0.8983465559427597, "grad_norm": 0.39539098739624023, "learning_rate": 7.885971492873218e-05, "loss": 0.5449, "step": 12116 }, { "epoch": 0.8984207014161786, "grad_norm": 0.35354533791542053, "learning_rate": 7.884971242810703e-05, "loss": 0.494, "step": 12117 }, { "epoch": 0.8984948468895974, "grad_norm": 0.3517606258392334, "learning_rate": 7.883970992748188e-05, "loss": 0.4814, "step": 12118 }, { "epoch": 0.8985689923630162, "grad_norm": 0.35346901416778564, "learning_rate": 7.882970742685672e-05, "loss": 0.4739, "step": 12119 }, { "epoch": 0.8986431378364351, "grad_norm": 0.37064802646636963, "learning_rate": 7.881970492623157e-05, "loss": 0.4832, "step": 12120 }, { "epoch": 0.8987172833098539, "grad_norm": 0.3563239872455597, "learning_rate": 7.88097024256064e-05, "loss": 0.4722, "step": 12121 }, { "epoch": 0.8987914287832728, "grad_norm": 0.34085318446159363, "learning_rate": 7.879969992498126e-05, "loss": 0.4813, "step": 12122 }, { "epoch": 0.8988655742566917, "grad_norm": 0.36308103799819946, "learning_rate": 7.87896974243561e-05, "loss": 0.5031, "step": 12123 }, { "epoch": 0.8989397197301104, "grad_norm": 0.3724968433380127, "learning_rate": 7.877969492373094e-05, "loss": 0.5273, "step": 12124 }, { "epoch": 0.8990138652035293, "grad_norm": 0.3622940480709076, "learning_rate": 7.876969242310578e-05, "loss": 0.5029, "step": 12125 }, { "epoch": 0.8990880106769482, "grad_norm": 0.38173702359199524, "learning_rate": 7.875968992248063e-05, "loss": 0.5244, "step": 12126 }, { "epoch": 0.899162156150367, "grad_norm": 0.3490830957889557, "learning_rate": 7.874968742185547e-05, "loss": 0.5023, "step": 12127 }, { "epoch": 0.8992363016237859, "grad_norm": 0.3498741388320923, "learning_rate": 7.873968492123032e-05, "loss": 0.4657, "step": 12128 }, { "epoch": 0.8993104470972048, "grad_norm": 0.3443988263607025, "learning_rate": 7.872968242060516e-05, "loss": 0.4842, "step": 12129 }, { "epoch": 0.8993845925706235, "grad_norm": 0.3625577986240387, "learning_rate": 7.871967991998e-05, "loss": 0.4863, "step": 12130 }, { "epoch": 0.8994587380440424, "grad_norm": 0.34971535205841064, "learning_rate": 7.870967741935484e-05, "loss": 0.5013, "step": 12131 }, { "epoch": 0.8995328835174613, "grad_norm": 0.3602607846260071, "learning_rate": 7.869967491872968e-05, "loss": 0.5028, "step": 12132 }, { "epoch": 0.8996070289908801, "grad_norm": 0.3440014123916626, "learning_rate": 7.868967241810453e-05, "loss": 0.4579, "step": 12133 }, { "epoch": 0.899681174464299, "grad_norm": 0.35592153668403625, "learning_rate": 7.867966991747937e-05, "loss": 0.4779, "step": 12134 }, { "epoch": 0.8997553199377178, "grad_norm": 0.348172128200531, "learning_rate": 7.866966741685422e-05, "loss": 0.4925, "step": 12135 }, { "epoch": 0.8998294654111366, "grad_norm": 0.35587891936302185, "learning_rate": 7.865966491622906e-05, "loss": 0.5192, "step": 12136 }, { "epoch": 0.8999036108845555, "grad_norm": 0.33947208523750305, "learning_rate": 7.864966241560391e-05, "loss": 0.4595, "step": 12137 }, { "epoch": 0.8999777563579744, "grad_norm": 0.34820783138275146, "learning_rate": 7.863965991497875e-05, "loss": 0.4621, "step": 12138 }, { "epoch": 0.9000519018313932, "grad_norm": 0.3462175130844116, "learning_rate": 7.86296574143536e-05, "loss": 0.4851, "step": 12139 }, { "epoch": 0.900126047304812, "grad_norm": 0.34662026166915894, "learning_rate": 7.861965491372843e-05, "loss": 0.507, "step": 12140 }, { "epoch": 0.9002001927782309, "grad_norm": 0.36034467816352844, "learning_rate": 7.860965241310327e-05, "loss": 0.484, "step": 12141 }, { "epoch": 0.9002743382516497, "grad_norm": 0.35226285457611084, "learning_rate": 7.859964991247812e-05, "loss": 0.479, "step": 12142 }, { "epoch": 0.9003484837250686, "grad_norm": 0.35954323410987854, "learning_rate": 7.858964741185296e-05, "loss": 0.5247, "step": 12143 }, { "epoch": 0.9004226291984875, "grad_norm": 0.3489994406700134, "learning_rate": 7.857964491122781e-05, "loss": 0.4402, "step": 12144 }, { "epoch": 0.9004967746719063, "grad_norm": 0.3407439589500427, "learning_rate": 7.856964241060265e-05, "loss": 0.4548, "step": 12145 }, { "epoch": 0.9005709201453251, "grad_norm": 0.3405288755893707, "learning_rate": 7.85596399099775e-05, "loss": 0.4863, "step": 12146 }, { "epoch": 0.900645065618744, "grad_norm": 0.33662810921669006, "learning_rate": 7.854963740935233e-05, "loss": 0.4656, "step": 12147 }, { "epoch": 0.9007192110921628, "grad_norm": 0.3710915744304657, "learning_rate": 7.853963490872718e-05, "loss": 0.5039, "step": 12148 }, { "epoch": 0.9007933565655817, "grad_norm": 0.3532405197620392, "learning_rate": 7.852963240810202e-05, "loss": 0.5054, "step": 12149 }, { "epoch": 0.9008675020390006, "grad_norm": 0.3623901903629303, "learning_rate": 7.851962990747687e-05, "loss": 0.4789, "step": 12150 }, { "epoch": 0.9009416475124193, "grad_norm": 0.3544818162918091, "learning_rate": 7.850962740685172e-05, "loss": 0.491, "step": 12151 }, { "epoch": 0.9010157929858382, "grad_norm": 0.36368367075920105, "learning_rate": 7.849962490622656e-05, "loss": 0.492, "step": 12152 }, { "epoch": 0.9010899384592571, "grad_norm": 0.3746209144592285, "learning_rate": 7.848962240560141e-05, "loss": 0.4691, "step": 12153 }, { "epoch": 0.9011640839326759, "grad_norm": 0.37304383516311646, "learning_rate": 7.847961990497625e-05, "loss": 0.5023, "step": 12154 }, { "epoch": 0.9012382294060948, "grad_norm": 0.35835301876068115, "learning_rate": 7.84696174043511e-05, "loss": 0.4795, "step": 12155 }, { "epoch": 0.9013123748795137, "grad_norm": 0.36355218291282654, "learning_rate": 7.845961490372593e-05, "loss": 0.4564, "step": 12156 }, { "epoch": 0.9013865203529324, "grad_norm": 0.352119117975235, "learning_rate": 7.844961240310079e-05, "loss": 0.4832, "step": 12157 }, { "epoch": 0.9014606658263513, "grad_norm": 0.35760247707366943, "learning_rate": 7.843960990247562e-05, "loss": 0.4736, "step": 12158 }, { "epoch": 0.9015348112997702, "grad_norm": 0.3426532745361328, "learning_rate": 7.842960740185047e-05, "loss": 0.4345, "step": 12159 }, { "epoch": 0.901608956773189, "grad_norm": 0.3504444658756256, "learning_rate": 7.841960490122531e-05, "loss": 0.4833, "step": 12160 }, { "epoch": 0.9016831022466079, "grad_norm": 0.36891934275627136, "learning_rate": 7.840960240060016e-05, "loss": 0.4683, "step": 12161 }, { "epoch": 0.9017572477200267, "grad_norm": 0.350500226020813, "learning_rate": 7.8399599899975e-05, "loss": 0.4598, "step": 12162 }, { "epoch": 0.9018313931934455, "grad_norm": 0.3525405824184418, "learning_rate": 7.838959739934985e-05, "loss": 0.494, "step": 12163 }, { "epoch": 0.9019055386668644, "grad_norm": 0.3750620484352112, "learning_rate": 7.837959489872469e-05, "loss": 0.5269, "step": 12164 }, { "epoch": 0.9019796841402833, "grad_norm": 0.352832555770874, "learning_rate": 7.836959239809954e-05, "loss": 0.466, "step": 12165 }, { "epoch": 0.9020538296137021, "grad_norm": 0.36906152963638306, "learning_rate": 7.835958989747437e-05, "loss": 0.505, "step": 12166 }, { "epoch": 0.902127975087121, "grad_norm": 0.35973915457725525, "learning_rate": 7.834958739684921e-05, "loss": 0.4669, "step": 12167 }, { "epoch": 0.9022021205605398, "grad_norm": 0.3666229248046875, "learning_rate": 7.833958489622406e-05, "loss": 0.5253, "step": 12168 }, { "epoch": 0.9022762660339586, "grad_norm": 0.3629952669143677, "learning_rate": 7.83295823955989e-05, "loss": 0.5116, "step": 12169 }, { "epoch": 0.9023504115073775, "grad_norm": 0.33755287528038025, "learning_rate": 7.831957989497375e-05, "loss": 0.4743, "step": 12170 }, { "epoch": 0.9024245569807964, "grad_norm": 0.36207687854766846, "learning_rate": 7.830957739434859e-05, "loss": 0.5014, "step": 12171 }, { "epoch": 0.9024987024542152, "grad_norm": 0.36554065346717834, "learning_rate": 7.829957489372344e-05, "loss": 0.5183, "step": 12172 }, { "epoch": 0.902572847927634, "grad_norm": 0.3348500430583954, "learning_rate": 7.828957239309827e-05, "loss": 0.4605, "step": 12173 }, { "epoch": 0.9026469934010528, "grad_norm": 0.35971012711524963, "learning_rate": 7.827956989247312e-05, "loss": 0.5039, "step": 12174 }, { "epoch": 0.9027211388744717, "grad_norm": 0.3417973816394806, "learning_rate": 7.826956739184796e-05, "loss": 0.4844, "step": 12175 }, { "epoch": 0.9027952843478906, "grad_norm": 0.34238049387931824, "learning_rate": 7.825956489122281e-05, "loss": 0.4678, "step": 12176 }, { "epoch": 0.9028694298213094, "grad_norm": 0.3655411899089813, "learning_rate": 7.824956239059765e-05, "loss": 0.5242, "step": 12177 }, { "epoch": 0.9029435752947282, "grad_norm": 0.3819589614868164, "learning_rate": 7.82395598899725e-05, "loss": 0.5049, "step": 12178 }, { "epoch": 0.9030177207681471, "grad_norm": 0.33990174531936646, "learning_rate": 7.822955738934734e-05, "loss": 0.457, "step": 12179 }, { "epoch": 0.9030918662415659, "grad_norm": 0.3238125443458557, "learning_rate": 7.821955488872217e-05, "loss": 0.4478, "step": 12180 }, { "epoch": 0.9031660117149848, "grad_norm": 0.3673919439315796, "learning_rate": 7.820955238809702e-05, "loss": 0.5052, "step": 12181 }, { "epoch": 0.9032401571884037, "grad_norm": 0.36980393528938293, "learning_rate": 7.819954988747186e-05, "loss": 0.5197, "step": 12182 }, { "epoch": 0.9033143026618224, "grad_norm": 0.36502811312675476, "learning_rate": 7.818954738684671e-05, "loss": 0.5361, "step": 12183 }, { "epoch": 0.9033884481352413, "grad_norm": 0.3726959228515625, "learning_rate": 7.817954488622155e-05, "loss": 0.5066, "step": 12184 }, { "epoch": 0.9034625936086602, "grad_norm": 0.35372889041900635, "learning_rate": 7.81695423855964e-05, "loss": 0.4804, "step": 12185 }, { "epoch": 0.903536739082079, "grad_norm": 0.3733518719673157, "learning_rate": 7.815953988497125e-05, "loss": 0.4817, "step": 12186 }, { "epoch": 0.9036108845554979, "grad_norm": 0.32922104001045227, "learning_rate": 7.814953738434609e-05, "loss": 0.4511, "step": 12187 }, { "epoch": 0.9036850300289168, "grad_norm": 0.3625817894935608, "learning_rate": 7.813953488372094e-05, "loss": 0.4836, "step": 12188 }, { "epoch": 0.9037591755023355, "grad_norm": 0.3671766221523285, "learning_rate": 7.812953238309579e-05, "loss": 0.5177, "step": 12189 }, { "epoch": 0.9038333209757544, "grad_norm": 0.37186869978904724, "learning_rate": 7.811952988247063e-05, "loss": 0.5149, "step": 12190 }, { "epoch": 0.9039074664491733, "grad_norm": 0.3531602621078491, "learning_rate": 7.810952738184546e-05, "loss": 0.4876, "step": 12191 }, { "epoch": 0.9039816119225921, "grad_norm": 0.36100271344184875, "learning_rate": 7.809952488122031e-05, "loss": 0.4813, "step": 12192 }, { "epoch": 0.904055757396011, "grad_norm": 0.3808072507381439, "learning_rate": 7.808952238059515e-05, "loss": 0.5193, "step": 12193 }, { "epoch": 0.9041299028694298, "grad_norm": 0.3881760537624359, "learning_rate": 7.807951987997e-05, "loss": 0.4873, "step": 12194 }, { "epoch": 0.9042040483428486, "grad_norm": 0.3469327688217163, "learning_rate": 7.806951737934484e-05, "loss": 0.4587, "step": 12195 }, { "epoch": 0.9042781938162675, "grad_norm": 0.351518452167511, "learning_rate": 7.805951487871969e-05, "loss": 0.4792, "step": 12196 }, { "epoch": 0.9043523392896864, "grad_norm": 0.3497157692909241, "learning_rate": 7.804951237809453e-05, "loss": 0.5052, "step": 12197 }, { "epoch": 0.9044264847631052, "grad_norm": 0.35975393652915955, "learning_rate": 7.803950987746938e-05, "loss": 0.4863, "step": 12198 }, { "epoch": 0.904500630236524, "grad_norm": 0.36563870310783386, "learning_rate": 7.802950737684421e-05, "loss": 0.4992, "step": 12199 }, { "epoch": 0.9045747757099429, "grad_norm": 0.38105955719947815, "learning_rate": 7.801950487621906e-05, "loss": 0.5125, "step": 12200 }, { "epoch": 0.9046489211833617, "grad_norm": 0.3685294985771179, "learning_rate": 7.80095023755939e-05, "loss": 0.5068, "step": 12201 }, { "epoch": 0.9047230666567806, "grad_norm": 0.34946054220199585, "learning_rate": 7.799949987496875e-05, "loss": 0.457, "step": 12202 }, { "epoch": 0.9047972121301995, "grad_norm": 0.3630867302417755, "learning_rate": 7.798949737434359e-05, "loss": 0.5235, "step": 12203 }, { "epoch": 0.9048713576036183, "grad_norm": 0.34101784229278564, "learning_rate": 7.797949487371843e-05, "loss": 0.4997, "step": 12204 }, { "epoch": 0.9049455030770371, "grad_norm": 0.36565423011779785, "learning_rate": 7.796949237309328e-05, "loss": 0.5293, "step": 12205 }, { "epoch": 0.905019648550456, "grad_norm": 0.3575752079486847, "learning_rate": 7.795948987246811e-05, "loss": 0.4945, "step": 12206 }, { "epoch": 0.9050937940238748, "grad_norm": 0.36154383420944214, "learning_rate": 7.794948737184297e-05, "loss": 0.5217, "step": 12207 }, { "epoch": 0.9051679394972937, "grad_norm": 0.35615870356559753, "learning_rate": 7.79394848712178e-05, "loss": 0.4987, "step": 12208 }, { "epoch": 0.9052420849707126, "grad_norm": 0.37743595242500305, "learning_rate": 7.792948237059265e-05, "loss": 0.5046, "step": 12209 }, { "epoch": 0.9053162304441313, "grad_norm": 0.34130603075027466, "learning_rate": 7.791947986996749e-05, "loss": 0.4703, "step": 12210 }, { "epoch": 0.9053903759175502, "grad_norm": 0.3362508714199066, "learning_rate": 7.790947736934234e-05, "loss": 0.4564, "step": 12211 }, { "epoch": 0.9054645213909691, "grad_norm": 0.364364892244339, "learning_rate": 7.789947486871718e-05, "loss": 0.483, "step": 12212 }, { "epoch": 0.9055386668643879, "grad_norm": 0.36374813318252563, "learning_rate": 7.788947236809203e-05, "loss": 0.4936, "step": 12213 }, { "epoch": 0.9056128123378068, "grad_norm": 0.3630427420139313, "learning_rate": 7.787946986746687e-05, "loss": 0.5282, "step": 12214 }, { "epoch": 0.9056869578112257, "grad_norm": 0.3448548913002014, "learning_rate": 7.786946736684172e-05, "loss": 0.4673, "step": 12215 }, { "epoch": 0.9057611032846444, "grad_norm": 0.3577180504798889, "learning_rate": 7.785946486621655e-05, "loss": 0.4637, "step": 12216 }, { "epoch": 0.9058352487580633, "grad_norm": 0.36460205912590027, "learning_rate": 7.784946236559139e-05, "loss": 0.4753, "step": 12217 }, { "epoch": 0.9059093942314822, "grad_norm": 0.37613338232040405, "learning_rate": 7.783945986496624e-05, "loss": 0.5142, "step": 12218 }, { "epoch": 0.905983539704901, "grad_norm": 0.3873370885848999, "learning_rate": 7.782945736434109e-05, "loss": 0.4751, "step": 12219 }, { "epoch": 0.9060576851783199, "grad_norm": 0.36442068219184875, "learning_rate": 7.781945486371593e-05, "loss": 0.4892, "step": 12220 }, { "epoch": 0.9061318306517387, "grad_norm": 0.37267789244651794, "learning_rate": 7.780945236309078e-05, "loss": 0.5123, "step": 12221 }, { "epoch": 0.9062059761251575, "grad_norm": 0.3730350136756897, "learning_rate": 7.779944986246562e-05, "loss": 0.5104, "step": 12222 }, { "epoch": 0.9062801215985764, "grad_norm": 0.3603893220424652, "learning_rate": 7.778944736184047e-05, "loss": 0.4898, "step": 12223 }, { "epoch": 0.9063542670719953, "grad_norm": 0.3394359052181244, "learning_rate": 7.777944486121532e-05, "loss": 0.4641, "step": 12224 }, { "epoch": 0.9064284125454141, "grad_norm": 0.3800501823425293, "learning_rate": 7.776944236059015e-05, "loss": 0.5234, "step": 12225 }, { "epoch": 0.906502558018833, "grad_norm": 0.36132436990737915, "learning_rate": 7.7759439859965e-05, "loss": 0.4691, "step": 12226 }, { "epoch": 0.9065767034922518, "grad_norm": 0.32849252223968506, "learning_rate": 7.774943735933984e-05, "loss": 0.433, "step": 12227 }, { "epoch": 0.9066508489656706, "grad_norm": 0.3501740097999573, "learning_rate": 7.773943485871468e-05, "loss": 0.4522, "step": 12228 }, { "epoch": 0.9067249944390895, "grad_norm": 0.3878188729286194, "learning_rate": 7.772943235808953e-05, "loss": 0.5454, "step": 12229 }, { "epoch": 0.9067991399125084, "grad_norm": 0.34906038641929626, "learning_rate": 7.771942985746437e-05, "loss": 0.4765, "step": 12230 }, { "epoch": 0.9068732853859272, "grad_norm": 0.3808542788028717, "learning_rate": 7.770942735683922e-05, "loss": 0.4938, "step": 12231 }, { "epoch": 0.906947430859346, "grad_norm": 0.3673005998134613, "learning_rate": 7.769942485621406e-05, "loss": 0.4682, "step": 12232 }, { "epoch": 0.9070215763327649, "grad_norm": 0.3379857838153839, "learning_rate": 7.76894223555889e-05, "loss": 0.4626, "step": 12233 }, { "epoch": 0.9070957218061837, "grad_norm": 0.3512555658817291, "learning_rate": 7.767941985496374e-05, "loss": 0.4724, "step": 12234 }, { "epoch": 0.9071698672796026, "grad_norm": 0.3519890010356903, "learning_rate": 7.76694173543386e-05, "loss": 0.5032, "step": 12235 }, { "epoch": 0.9072440127530215, "grad_norm": 0.36485880613327026, "learning_rate": 7.765941485371343e-05, "loss": 0.474, "step": 12236 }, { "epoch": 0.9073181582264402, "grad_norm": 0.3761054575443268, "learning_rate": 7.764941235308828e-05, "loss": 0.4906, "step": 12237 }, { "epoch": 0.9073923036998591, "grad_norm": 0.3648870885372162, "learning_rate": 7.763940985246312e-05, "loss": 0.5311, "step": 12238 }, { "epoch": 0.907466449173278, "grad_norm": 0.37978124618530273, "learning_rate": 7.762940735183797e-05, "loss": 0.4962, "step": 12239 }, { "epoch": 0.9075405946466968, "grad_norm": 0.36188727617263794, "learning_rate": 7.76194048512128e-05, "loss": 0.4817, "step": 12240 }, { "epoch": 0.9076147401201157, "grad_norm": 0.35821282863616943, "learning_rate": 7.760940235058764e-05, "loss": 0.4628, "step": 12241 }, { "epoch": 0.9076888855935346, "grad_norm": 0.34960705041885376, "learning_rate": 7.75993998499625e-05, "loss": 0.4537, "step": 12242 }, { "epoch": 0.9077630310669533, "grad_norm": 0.37517228722572327, "learning_rate": 7.758939734933733e-05, "loss": 0.5513, "step": 12243 }, { "epoch": 0.9078371765403722, "grad_norm": 0.34248119592666626, "learning_rate": 7.757939484871218e-05, "loss": 0.4776, "step": 12244 }, { "epoch": 0.9079113220137911, "grad_norm": 0.3665655553340912, "learning_rate": 7.756939234808702e-05, "loss": 0.5117, "step": 12245 }, { "epoch": 0.9079854674872099, "grad_norm": 0.3915019929409027, "learning_rate": 7.755938984746187e-05, "loss": 0.5247, "step": 12246 }, { "epoch": 0.9080596129606288, "grad_norm": 0.34508028626441956, "learning_rate": 7.75493873468367e-05, "loss": 0.4808, "step": 12247 }, { "epoch": 0.9081337584340476, "grad_norm": 0.33891522884368896, "learning_rate": 7.753938484621156e-05, "loss": 0.4627, "step": 12248 }, { "epoch": 0.9082079039074664, "grad_norm": 0.3655557334423065, "learning_rate": 7.75293823455864e-05, "loss": 0.5176, "step": 12249 }, { "epoch": 0.9082820493808853, "grad_norm": 0.36177071928977966, "learning_rate": 7.751937984496124e-05, "loss": 0.4841, "step": 12250 }, { "epoch": 0.9083561948543042, "grad_norm": 0.3542259633541107, "learning_rate": 7.750937734433608e-05, "loss": 0.5103, "step": 12251 }, { "epoch": 0.908430340327723, "grad_norm": 0.3643378019332886, "learning_rate": 7.749937484371093e-05, "loss": 0.5099, "step": 12252 }, { "epoch": 0.9085044858011418, "grad_norm": 0.3869318664073944, "learning_rate": 7.748937234308577e-05, "loss": 0.5621, "step": 12253 }, { "epoch": 0.9085786312745607, "grad_norm": 0.3524673879146576, "learning_rate": 7.747936984246062e-05, "loss": 0.463, "step": 12254 }, { "epoch": 0.9086527767479795, "grad_norm": 0.36208420991897583, "learning_rate": 7.746936734183546e-05, "loss": 0.4801, "step": 12255 }, { "epoch": 0.9087269222213984, "grad_norm": 0.3470224440097809, "learning_rate": 7.745936484121031e-05, "loss": 0.4647, "step": 12256 }, { "epoch": 0.9088010676948173, "grad_norm": 0.35648271441459656, "learning_rate": 7.744936234058516e-05, "loss": 0.4969, "step": 12257 }, { "epoch": 0.908875213168236, "grad_norm": 0.3486587107181549, "learning_rate": 7.743935983996e-05, "loss": 0.489, "step": 12258 }, { "epoch": 0.9089493586416549, "grad_norm": 0.350781112909317, "learning_rate": 7.742935733933485e-05, "loss": 0.4561, "step": 12259 }, { "epoch": 0.9090235041150738, "grad_norm": 0.36324408650398254, "learning_rate": 7.741935483870968e-05, "loss": 0.5087, "step": 12260 }, { "epoch": 0.9090976495884926, "grad_norm": 0.36282840371131897, "learning_rate": 7.740935233808453e-05, "loss": 0.5015, "step": 12261 }, { "epoch": 0.9091717950619115, "grad_norm": 0.3779114782810211, "learning_rate": 7.739934983745937e-05, "loss": 0.5015, "step": 12262 }, { "epoch": 0.9092459405353304, "grad_norm": 0.36912134289741516, "learning_rate": 7.738934733683422e-05, "loss": 0.497, "step": 12263 }, { "epoch": 0.9093200860087491, "grad_norm": 0.35842424631118774, "learning_rate": 7.737934483620906e-05, "loss": 0.4977, "step": 12264 }, { "epoch": 0.909394231482168, "grad_norm": 0.36107268929481506, "learning_rate": 7.73693423355839e-05, "loss": 0.4924, "step": 12265 }, { "epoch": 0.9094683769555869, "grad_norm": 0.3549515902996063, "learning_rate": 7.735933983495875e-05, "loss": 0.4935, "step": 12266 }, { "epoch": 0.9095425224290057, "grad_norm": 0.3712345361709595, "learning_rate": 7.734933733433358e-05, "loss": 0.5051, "step": 12267 }, { "epoch": 0.9096166679024246, "grad_norm": 0.3467949330806732, "learning_rate": 7.733933483370843e-05, "loss": 0.4716, "step": 12268 }, { "epoch": 0.9096908133758435, "grad_norm": 0.3778625428676605, "learning_rate": 7.732933233308327e-05, "loss": 0.4789, "step": 12269 }, { "epoch": 0.9097649588492622, "grad_norm": 0.37642407417297363, "learning_rate": 7.731932983245812e-05, "loss": 0.4723, "step": 12270 }, { "epoch": 0.9098391043226811, "grad_norm": 0.3366379141807556, "learning_rate": 7.730932733183296e-05, "loss": 0.4422, "step": 12271 }, { "epoch": 0.9099132497961, "grad_norm": 0.3770594000816345, "learning_rate": 7.729932483120781e-05, "loss": 0.4958, "step": 12272 }, { "epoch": 0.9099873952695188, "grad_norm": 0.35192543268203735, "learning_rate": 7.728932233058265e-05, "loss": 0.465, "step": 12273 }, { "epoch": 0.9100615407429377, "grad_norm": 0.3657963275909424, "learning_rate": 7.72793198299575e-05, "loss": 0.4854, "step": 12274 }, { "epoch": 0.9101356862163565, "grad_norm": 0.3487366735935211, "learning_rate": 7.726931732933233e-05, "loss": 0.416, "step": 12275 }, { "epoch": 0.9102098316897753, "grad_norm": 0.37534037232398987, "learning_rate": 7.725931482870719e-05, "loss": 0.5182, "step": 12276 }, { "epoch": 0.9102839771631942, "grad_norm": 0.387076735496521, "learning_rate": 7.724931232808202e-05, "loss": 0.5321, "step": 12277 }, { "epoch": 0.9103581226366131, "grad_norm": 0.3833845853805542, "learning_rate": 7.723930982745686e-05, "loss": 0.5115, "step": 12278 }, { "epoch": 0.9104322681100319, "grad_norm": 0.37540921568870544, "learning_rate": 7.722930732683171e-05, "loss": 0.5148, "step": 12279 }, { "epoch": 0.9105064135834507, "grad_norm": 0.368088036775589, "learning_rate": 7.721930482620655e-05, "loss": 0.5122, "step": 12280 }, { "epoch": 0.9105805590568696, "grad_norm": 0.38346394896507263, "learning_rate": 7.72093023255814e-05, "loss": 0.51, "step": 12281 }, { "epoch": 0.9106547045302884, "grad_norm": 0.348906010389328, "learning_rate": 7.719929982495623e-05, "loss": 0.4904, "step": 12282 }, { "epoch": 0.9107288500037073, "grad_norm": 0.3682903051376343, "learning_rate": 7.718929732433109e-05, "loss": 0.4658, "step": 12283 }, { "epoch": 0.9108029954771262, "grad_norm": 0.39014461636543274, "learning_rate": 7.717929482370592e-05, "loss": 0.5641, "step": 12284 }, { "epoch": 0.910877140950545, "grad_norm": 0.3606891930103302, "learning_rate": 7.716929232308077e-05, "loss": 0.4925, "step": 12285 }, { "epoch": 0.9109512864239638, "grad_norm": 0.35773563385009766, "learning_rate": 7.715928982245561e-05, "loss": 0.5051, "step": 12286 }, { "epoch": 0.9110254318973826, "grad_norm": 0.352607399225235, "learning_rate": 7.714928732183046e-05, "loss": 0.4703, "step": 12287 }, { "epoch": 0.9110995773708015, "grad_norm": 0.37602677941322327, "learning_rate": 7.71392848212053e-05, "loss": 0.4828, "step": 12288 }, { "epoch": 0.9111737228442204, "grad_norm": 0.3760469853878021, "learning_rate": 7.712928232058015e-05, "loss": 0.5286, "step": 12289 }, { "epoch": 0.9112478683176392, "grad_norm": 0.3501727879047394, "learning_rate": 7.7119279819955e-05, "loss": 0.4728, "step": 12290 }, { "epoch": 0.911322013791058, "grad_norm": 0.3921213746070862, "learning_rate": 7.710927731932984e-05, "loss": 0.5326, "step": 12291 }, { "epoch": 0.9113961592644769, "grad_norm": 0.37251338362693787, "learning_rate": 7.709927481870469e-05, "loss": 0.5163, "step": 12292 }, { "epoch": 0.9114703047378957, "grad_norm": 0.35159456729888916, "learning_rate": 7.708927231807952e-05, "loss": 0.4776, "step": 12293 }, { "epoch": 0.9115444502113146, "grad_norm": 0.36998629570007324, "learning_rate": 7.707926981745437e-05, "loss": 0.4832, "step": 12294 }, { "epoch": 0.9116185956847335, "grad_norm": 0.3737533688545227, "learning_rate": 7.706926731682921e-05, "loss": 0.4918, "step": 12295 }, { "epoch": 0.9116927411581522, "grad_norm": 0.38122543692588806, "learning_rate": 7.705926481620406e-05, "loss": 0.4979, "step": 12296 }, { "epoch": 0.9117668866315711, "grad_norm": 0.36308354139328003, "learning_rate": 7.70492623155789e-05, "loss": 0.4823, "step": 12297 }, { "epoch": 0.91184103210499, "grad_norm": 0.3721024990081787, "learning_rate": 7.703925981495375e-05, "loss": 0.4914, "step": 12298 }, { "epoch": 0.9119151775784088, "grad_norm": 0.35598233342170715, "learning_rate": 7.702925731432859e-05, "loss": 0.4839, "step": 12299 }, { "epoch": 0.9119893230518277, "grad_norm": 0.34776607155799866, "learning_rate": 7.701925481370344e-05, "loss": 0.4698, "step": 12300 }, { "epoch": 0.9120634685252466, "grad_norm": 0.3778003752231598, "learning_rate": 7.700925231307828e-05, "loss": 0.4965, "step": 12301 }, { "epoch": 0.9121376139986653, "grad_norm": 0.371147483587265, "learning_rate": 7.699924981245311e-05, "loss": 0.5084, "step": 12302 }, { "epoch": 0.9122117594720842, "grad_norm": 0.34697890281677246, "learning_rate": 7.698924731182796e-05, "loss": 0.4633, "step": 12303 }, { "epoch": 0.9122859049455031, "grad_norm": 0.34632351994514465, "learning_rate": 7.69792448112028e-05, "loss": 0.4487, "step": 12304 }, { "epoch": 0.9123600504189219, "grad_norm": 0.36665040254592896, "learning_rate": 7.696924231057765e-05, "loss": 0.5181, "step": 12305 }, { "epoch": 0.9124341958923408, "grad_norm": 0.33998891711235046, "learning_rate": 7.695923980995249e-05, "loss": 0.4672, "step": 12306 }, { "epoch": 0.9125083413657596, "grad_norm": 0.3707277178764343, "learning_rate": 7.694923730932734e-05, "loss": 0.518, "step": 12307 }, { "epoch": 0.9125824868391784, "grad_norm": 0.39012736082077026, "learning_rate": 7.693923480870218e-05, "loss": 0.5728, "step": 12308 }, { "epoch": 0.9126566323125973, "grad_norm": 0.3671033978462219, "learning_rate": 7.692923230807703e-05, "loss": 0.4788, "step": 12309 }, { "epoch": 0.9127307777860162, "grad_norm": 0.37107205390930176, "learning_rate": 7.691922980745186e-05, "loss": 0.5021, "step": 12310 }, { "epoch": 0.912804923259435, "grad_norm": 0.35684171319007874, "learning_rate": 7.690922730682671e-05, "loss": 0.4858, "step": 12311 }, { "epoch": 0.9128790687328538, "grad_norm": 0.34751415252685547, "learning_rate": 7.689922480620155e-05, "loss": 0.487, "step": 12312 }, { "epoch": 0.9129532142062727, "grad_norm": 0.3712983727455139, "learning_rate": 7.68892223055764e-05, "loss": 0.4904, "step": 12313 }, { "epoch": 0.9130273596796915, "grad_norm": 0.37322795391082764, "learning_rate": 7.687921980495124e-05, "loss": 0.4932, "step": 12314 }, { "epoch": 0.9131015051531104, "grad_norm": 0.3593966066837311, "learning_rate": 7.686921730432608e-05, "loss": 0.491, "step": 12315 }, { "epoch": 0.9131756506265293, "grad_norm": 0.3627384305000305, "learning_rate": 7.685921480370093e-05, "loss": 0.5033, "step": 12316 }, { "epoch": 0.913249796099948, "grad_norm": 0.36533981561660767, "learning_rate": 7.684921230307576e-05, "loss": 0.4773, "step": 12317 }, { "epoch": 0.9133239415733669, "grad_norm": 0.3392719626426697, "learning_rate": 7.683920980245061e-05, "loss": 0.463, "step": 12318 }, { "epoch": 0.9133980870467858, "grad_norm": 0.35438093543052673, "learning_rate": 7.682920730182545e-05, "loss": 0.4864, "step": 12319 }, { "epoch": 0.9134722325202046, "grad_norm": 0.37328651547431946, "learning_rate": 7.68192048012003e-05, "loss": 0.5041, "step": 12320 }, { "epoch": 0.9135463779936235, "grad_norm": 0.350928395986557, "learning_rate": 7.680920230057514e-05, "loss": 0.4532, "step": 12321 }, { "epoch": 0.9136205234670424, "grad_norm": 0.37532368302345276, "learning_rate": 7.679919979994999e-05, "loss": 0.5146, "step": 12322 }, { "epoch": 0.9136946689404611, "grad_norm": 0.36526745557785034, "learning_rate": 7.678919729932483e-05, "loss": 0.5071, "step": 12323 }, { "epoch": 0.91376881441388, "grad_norm": 0.36086180806159973, "learning_rate": 7.677919479869968e-05, "loss": 0.4742, "step": 12324 }, { "epoch": 0.9138429598872989, "grad_norm": 0.35461482405662537, "learning_rate": 7.676919229807453e-05, "loss": 0.485, "step": 12325 }, { "epoch": 0.9139171053607177, "grad_norm": 0.3481079339981079, "learning_rate": 7.675918979744936e-05, "loss": 0.4722, "step": 12326 }, { "epoch": 0.9139912508341366, "grad_norm": 0.37207815051078796, "learning_rate": 7.674918729682422e-05, "loss": 0.5275, "step": 12327 }, { "epoch": 0.9140653963075555, "grad_norm": 0.34493929147720337, "learning_rate": 7.673918479619905e-05, "loss": 0.4621, "step": 12328 }, { "epoch": 0.9141395417809742, "grad_norm": 0.3446365296840668, "learning_rate": 7.67291822955739e-05, "loss": 0.4393, "step": 12329 }, { "epoch": 0.9142136872543931, "grad_norm": 0.3831775486469269, "learning_rate": 7.671917979494874e-05, "loss": 0.5142, "step": 12330 }, { "epoch": 0.914287832727812, "grad_norm": 0.3588979244232178, "learning_rate": 7.670917729432359e-05, "loss": 0.5029, "step": 12331 }, { "epoch": 0.9143619782012308, "grad_norm": 0.3550351560115814, "learning_rate": 7.669917479369843e-05, "loss": 0.4657, "step": 12332 }, { "epoch": 0.9144361236746497, "grad_norm": 0.3579588532447815, "learning_rate": 7.668917229307328e-05, "loss": 0.5145, "step": 12333 }, { "epoch": 0.9145102691480685, "grad_norm": 0.37534669041633606, "learning_rate": 7.667916979244812e-05, "loss": 0.5173, "step": 12334 }, { "epoch": 0.9145844146214873, "grad_norm": 0.3603922724723816, "learning_rate": 7.666916729182297e-05, "loss": 0.5008, "step": 12335 }, { "epoch": 0.9146585600949062, "grad_norm": 0.37768033146858215, "learning_rate": 7.66591647911978e-05, "loss": 0.5164, "step": 12336 }, { "epoch": 0.9147327055683251, "grad_norm": 0.3579665720462799, "learning_rate": 7.664916229057265e-05, "loss": 0.5254, "step": 12337 }, { "epoch": 0.9148068510417439, "grad_norm": 0.340798944234848, "learning_rate": 7.663915978994749e-05, "loss": 0.4505, "step": 12338 }, { "epoch": 0.9148809965151627, "grad_norm": 0.37946441769599915, "learning_rate": 7.662915728932234e-05, "loss": 0.5123, "step": 12339 }, { "epoch": 0.9149551419885816, "grad_norm": 0.38519513607025146, "learning_rate": 7.661915478869718e-05, "loss": 0.5054, "step": 12340 }, { "epoch": 0.9150292874620004, "grad_norm": 0.3508912920951843, "learning_rate": 7.660915228807202e-05, "loss": 0.4831, "step": 12341 }, { "epoch": 0.9151034329354193, "grad_norm": 0.3668072521686554, "learning_rate": 7.659914978744687e-05, "loss": 0.485, "step": 12342 }, { "epoch": 0.9151775784088382, "grad_norm": 0.3582935333251953, "learning_rate": 7.65891472868217e-05, "loss": 0.4997, "step": 12343 }, { "epoch": 0.915251723882257, "grad_norm": 0.32857346534729004, "learning_rate": 7.657914478619655e-05, "loss": 0.4697, "step": 12344 }, { "epoch": 0.9153258693556758, "grad_norm": 0.34653440117836, "learning_rate": 7.656914228557139e-05, "loss": 0.4476, "step": 12345 }, { "epoch": 0.9154000148290947, "grad_norm": 0.3496309518814087, "learning_rate": 7.655913978494624e-05, "loss": 0.4557, "step": 12346 }, { "epoch": 0.9154741603025135, "grad_norm": 0.3771413266658783, "learning_rate": 7.654913728432108e-05, "loss": 0.4961, "step": 12347 }, { "epoch": 0.9155483057759324, "grad_norm": 0.358271062374115, "learning_rate": 7.653913478369593e-05, "loss": 0.4832, "step": 12348 }, { "epoch": 0.9156224512493513, "grad_norm": 0.3934309184551239, "learning_rate": 7.652913228307077e-05, "loss": 0.5498, "step": 12349 }, { "epoch": 0.91569659672277, "grad_norm": 0.35414913296699524, "learning_rate": 7.651912978244562e-05, "loss": 0.5166, "step": 12350 }, { "epoch": 0.9157707421961889, "grad_norm": 0.36952129006385803, "learning_rate": 7.650912728182045e-05, "loss": 0.4915, "step": 12351 }, { "epoch": 0.9158448876696078, "grad_norm": 0.3545053005218506, "learning_rate": 7.649912478119529e-05, "loss": 0.5234, "step": 12352 }, { "epoch": 0.9159190331430266, "grad_norm": 0.371724396944046, "learning_rate": 7.648912228057014e-05, "loss": 0.4849, "step": 12353 }, { "epoch": 0.9159931786164455, "grad_norm": 0.32736942172050476, "learning_rate": 7.647911977994498e-05, "loss": 0.4249, "step": 12354 }, { "epoch": 0.9160673240898644, "grad_norm": 0.3688769042491913, "learning_rate": 7.646911727931983e-05, "loss": 0.4937, "step": 12355 }, { "epoch": 0.9161414695632831, "grad_norm": 0.3746674060821533, "learning_rate": 7.645911477869467e-05, "loss": 0.4698, "step": 12356 }, { "epoch": 0.916215615036702, "grad_norm": 0.3373420238494873, "learning_rate": 7.644911227806952e-05, "loss": 0.4662, "step": 12357 }, { "epoch": 0.9162897605101209, "grad_norm": 0.3492874503135681, "learning_rate": 7.643910977744437e-05, "loss": 0.4814, "step": 12358 }, { "epoch": 0.9163639059835397, "grad_norm": 0.36517855525016785, "learning_rate": 7.64291072768192e-05, "loss": 0.4946, "step": 12359 }, { "epoch": 0.9164380514569586, "grad_norm": 0.35666999220848083, "learning_rate": 7.641910477619406e-05, "loss": 0.4353, "step": 12360 }, { "epoch": 0.9165121969303774, "grad_norm": 0.3596087694168091, "learning_rate": 7.64091022755689e-05, "loss": 0.4907, "step": 12361 }, { "epoch": 0.9165863424037962, "grad_norm": 0.3727652132511139, "learning_rate": 7.639909977494374e-05, "loss": 0.5172, "step": 12362 }, { "epoch": 0.9166604878772151, "grad_norm": 0.348560631275177, "learning_rate": 7.63890972743186e-05, "loss": 0.4877, "step": 12363 }, { "epoch": 0.916734633350634, "grad_norm": 0.3518667221069336, "learning_rate": 7.637909477369343e-05, "loss": 0.5004, "step": 12364 }, { "epoch": 0.9168087788240528, "grad_norm": 0.3694843649864197, "learning_rate": 7.636909227306827e-05, "loss": 0.4636, "step": 12365 }, { "epoch": 0.9168829242974716, "grad_norm": 0.35784730315208435, "learning_rate": 7.635908977244312e-05, "loss": 0.483, "step": 12366 }, { "epoch": 0.9169570697708905, "grad_norm": 0.35617169737815857, "learning_rate": 7.634908727181796e-05, "loss": 0.4721, "step": 12367 }, { "epoch": 0.9170312152443093, "grad_norm": 0.3371557295322418, "learning_rate": 7.633908477119281e-05, "loss": 0.4653, "step": 12368 }, { "epoch": 0.9171053607177282, "grad_norm": 0.34220215678215027, "learning_rate": 7.632908227056764e-05, "loss": 0.4408, "step": 12369 }, { "epoch": 0.9171795061911471, "grad_norm": 0.3637031614780426, "learning_rate": 7.63190797699425e-05, "loss": 0.4709, "step": 12370 }, { "epoch": 0.9172536516645658, "grad_norm": 0.3651461601257324, "learning_rate": 7.630907726931733e-05, "loss": 0.4825, "step": 12371 }, { "epoch": 0.9173277971379847, "grad_norm": 0.3551505506038666, "learning_rate": 7.629907476869218e-05, "loss": 0.4919, "step": 12372 }, { "epoch": 0.9174019426114036, "grad_norm": 0.33308231830596924, "learning_rate": 7.628907226806702e-05, "loss": 0.445, "step": 12373 }, { "epoch": 0.9174760880848224, "grad_norm": 0.36347106099128723, "learning_rate": 7.627906976744187e-05, "loss": 0.503, "step": 12374 }, { "epoch": 0.9175502335582413, "grad_norm": 0.36745724081993103, "learning_rate": 7.626906726681671e-05, "loss": 0.5054, "step": 12375 }, { "epoch": 0.9176243790316602, "grad_norm": 0.38225582242012024, "learning_rate": 7.625906476619156e-05, "loss": 0.5482, "step": 12376 }, { "epoch": 0.9176985245050789, "grad_norm": 0.35248032212257385, "learning_rate": 7.62490622655664e-05, "loss": 0.4593, "step": 12377 }, { "epoch": 0.9177726699784978, "grad_norm": 0.35594868659973145, "learning_rate": 7.623905976494123e-05, "loss": 0.4888, "step": 12378 }, { "epoch": 0.9178468154519167, "grad_norm": 0.3589613735675812, "learning_rate": 7.622905726431608e-05, "loss": 0.5037, "step": 12379 }, { "epoch": 0.9179209609253355, "grad_norm": 0.36188051104545593, "learning_rate": 7.621905476369092e-05, "loss": 0.5168, "step": 12380 }, { "epoch": 0.9179951063987544, "grad_norm": 0.3557167947292328, "learning_rate": 7.620905226306577e-05, "loss": 0.4973, "step": 12381 }, { "epoch": 0.9180692518721733, "grad_norm": 0.349258154630661, "learning_rate": 7.619904976244061e-05, "loss": 0.4765, "step": 12382 }, { "epoch": 0.918143397345592, "grad_norm": 0.3836480677127838, "learning_rate": 7.618904726181546e-05, "loss": 0.4888, "step": 12383 }, { "epoch": 0.9182175428190109, "grad_norm": 0.36747241020202637, "learning_rate": 7.61790447611903e-05, "loss": 0.4522, "step": 12384 }, { "epoch": 0.9182916882924298, "grad_norm": 0.37865445017814636, "learning_rate": 7.616904226056515e-05, "loss": 0.4833, "step": 12385 }, { "epoch": 0.9183658337658486, "grad_norm": 0.37186741828918457, "learning_rate": 7.615903975993998e-05, "loss": 0.5304, "step": 12386 }, { "epoch": 0.9184399792392675, "grad_norm": 0.3870313763618469, "learning_rate": 7.614903725931483e-05, "loss": 0.5431, "step": 12387 }, { "epoch": 0.9185141247126863, "grad_norm": 0.38110628724098206, "learning_rate": 7.613903475868967e-05, "loss": 0.5186, "step": 12388 }, { "epoch": 0.9185882701861051, "grad_norm": 0.363450288772583, "learning_rate": 7.612903225806451e-05, "loss": 0.4859, "step": 12389 }, { "epoch": 0.918662415659524, "grad_norm": 0.3938356637954712, "learning_rate": 7.611902975743936e-05, "loss": 0.5366, "step": 12390 }, { "epoch": 0.9187365611329429, "grad_norm": 0.3734666705131531, "learning_rate": 7.61090272568142e-05, "loss": 0.5098, "step": 12391 }, { "epoch": 0.9188107066063617, "grad_norm": 0.3699693977832794, "learning_rate": 7.609902475618905e-05, "loss": 0.4811, "step": 12392 }, { "epoch": 0.9188848520797805, "grad_norm": 0.3615417778491974, "learning_rate": 7.60890222555639e-05, "loss": 0.5518, "step": 12393 }, { "epoch": 0.9189589975531994, "grad_norm": 0.3706604540348053, "learning_rate": 7.607901975493873e-05, "loss": 0.4737, "step": 12394 }, { "epoch": 0.9190331430266182, "grad_norm": 0.343799352645874, "learning_rate": 7.606901725431358e-05, "loss": 0.4686, "step": 12395 }, { "epoch": 0.9191072885000371, "grad_norm": 0.3672512173652649, "learning_rate": 7.605901475368844e-05, "loss": 0.4983, "step": 12396 }, { "epoch": 0.919181433973456, "grad_norm": 0.3989816904067993, "learning_rate": 7.604901225306327e-05, "loss": 0.5813, "step": 12397 }, { "epoch": 0.9192555794468747, "grad_norm": 0.39718011021614075, "learning_rate": 7.603900975243812e-05, "loss": 0.5147, "step": 12398 }, { "epoch": 0.9193297249202936, "grad_norm": 0.3329892158508301, "learning_rate": 7.602900725181296e-05, "loss": 0.4809, "step": 12399 }, { "epoch": 0.9194038703937125, "grad_norm": 0.35658156871795654, "learning_rate": 7.601900475118781e-05, "loss": 0.5, "step": 12400 }, { "epoch": 0.9194780158671313, "grad_norm": 0.3854674696922302, "learning_rate": 7.600900225056265e-05, "loss": 0.4918, "step": 12401 }, { "epoch": 0.9195521613405502, "grad_norm": 0.34232714772224426, "learning_rate": 7.599899974993749e-05, "loss": 0.4779, "step": 12402 }, { "epoch": 0.919626306813969, "grad_norm": 0.36688533425331116, "learning_rate": 7.598899724931234e-05, "loss": 0.4985, "step": 12403 }, { "epoch": 0.9197004522873878, "grad_norm": 0.3630974292755127, "learning_rate": 7.597899474868717e-05, "loss": 0.4736, "step": 12404 }, { "epoch": 0.9197745977608067, "grad_norm": 0.376235693693161, "learning_rate": 7.596899224806202e-05, "loss": 0.4741, "step": 12405 }, { "epoch": 0.9198487432342255, "grad_norm": 0.3795393407344818, "learning_rate": 7.595898974743686e-05, "loss": 0.495, "step": 12406 }, { "epoch": 0.9199228887076444, "grad_norm": 0.3827970623970032, "learning_rate": 7.594898724681171e-05, "loss": 0.525, "step": 12407 }, { "epoch": 0.9199970341810633, "grad_norm": 0.34787818789482117, "learning_rate": 7.593898474618655e-05, "loss": 0.4552, "step": 12408 }, { "epoch": 0.920071179654482, "grad_norm": 0.38262608647346497, "learning_rate": 7.59289822455614e-05, "loss": 0.4737, "step": 12409 }, { "epoch": 0.9201453251279009, "grad_norm": 0.3921487033367157, "learning_rate": 7.591897974493624e-05, "loss": 0.5322, "step": 12410 }, { "epoch": 0.9202194706013198, "grad_norm": 0.3426423966884613, "learning_rate": 7.590897724431109e-05, "loss": 0.4597, "step": 12411 }, { "epoch": 0.9202936160747386, "grad_norm": 0.35566651821136475, "learning_rate": 7.589897474368592e-05, "loss": 0.4811, "step": 12412 }, { "epoch": 0.9203677615481575, "grad_norm": 0.35717833042144775, "learning_rate": 7.588897224306077e-05, "loss": 0.4988, "step": 12413 }, { "epoch": 0.9204419070215764, "grad_norm": 0.3569641411304474, "learning_rate": 7.587896974243561e-05, "loss": 0.4634, "step": 12414 }, { "epoch": 0.9205160524949951, "grad_norm": 0.419711172580719, "learning_rate": 7.586896724181045e-05, "loss": 0.6015, "step": 12415 }, { "epoch": 0.920590197968414, "grad_norm": 0.37297865748405457, "learning_rate": 7.58589647411853e-05, "loss": 0.4675, "step": 12416 }, { "epoch": 0.9206643434418329, "grad_norm": 0.34791257977485657, "learning_rate": 7.584896224056014e-05, "loss": 0.4721, "step": 12417 }, { "epoch": 0.9207384889152517, "grad_norm": 0.37515518069267273, "learning_rate": 7.583895973993499e-05, "loss": 0.5026, "step": 12418 }, { "epoch": 0.9208126343886706, "grad_norm": 0.3555123507976532, "learning_rate": 7.582895723930982e-05, "loss": 0.4562, "step": 12419 }, { "epoch": 0.9208867798620894, "grad_norm": 0.3784284293651581, "learning_rate": 7.581895473868467e-05, "loss": 0.4872, "step": 12420 }, { "epoch": 0.9209609253355082, "grad_norm": 0.354417085647583, "learning_rate": 7.580895223805951e-05, "loss": 0.4984, "step": 12421 }, { "epoch": 0.9210350708089271, "grad_norm": 0.3574739098548889, "learning_rate": 7.579894973743436e-05, "loss": 0.4606, "step": 12422 }, { "epoch": 0.921109216282346, "grad_norm": 0.3408328890800476, "learning_rate": 7.57889472368092e-05, "loss": 0.4556, "step": 12423 }, { "epoch": 0.9211833617557648, "grad_norm": 0.3782733380794525, "learning_rate": 7.577894473618405e-05, "loss": 0.4938, "step": 12424 }, { "epoch": 0.9212575072291836, "grad_norm": 0.3781554698944092, "learning_rate": 7.576894223555889e-05, "loss": 0.4944, "step": 12425 }, { "epoch": 0.9213316527026025, "grad_norm": 0.3553416132926941, "learning_rate": 7.575893973493374e-05, "loss": 0.4905, "step": 12426 }, { "epoch": 0.9214057981760213, "grad_norm": 0.34633904695510864, "learning_rate": 7.574893723430858e-05, "loss": 0.4573, "step": 12427 }, { "epoch": 0.9214799436494402, "grad_norm": 0.37783822417259216, "learning_rate": 7.573893473368343e-05, "loss": 0.5265, "step": 12428 }, { "epoch": 0.9215540891228591, "grad_norm": 0.3615224361419678, "learning_rate": 7.572893223305826e-05, "loss": 0.4638, "step": 12429 }, { "epoch": 0.9216282345962779, "grad_norm": 0.3774965703487396, "learning_rate": 7.571892973243311e-05, "loss": 0.5332, "step": 12430 }, { "epoch": 0.9217023800696967, "grad_norm": 0.38820740580558777, "learning_rate": 7.570892723180796e-05, "loss": 0.5032, "step": 12431 }, { "epoch": 0.9217765255431156, "grad_norm": 0.3792641758918762, "learning_rate": 7.56989247311828e-05, "loss": 0.4684, "step": 12432 }, { "epoch": 0.9218506710165344, "grad_norm": 0.38120150566101074, "learning_rate": 7.568892223055765e-05, "loss": 0.5566, "step": 12433 }, { "epoch": 0.9219248164899533, "grad_norm": 0.3376290500164032, "learning_rate": 7.567891972993249e-05, "loss": 0.4582, "step": 12434 }, { "epoch": 0.9219989619633722, "grad_norm": 0.34599441289901733, "learning_rate": 7.566891722930734e-05, "loss": 0.477, "step": 12435 }, { "epoch": 0.9220731074367909, "grad_norm": 0.35098183155059814, "learning_rate": 7.565891472868218e-05, "loss": 0.4827, "step": 12436 }, { "epoch": 0.9221472529102098, "grad_norm": 0.36875665187835693, "learning_rate": 7.564891222805703e-05, "loss": 0.5135, "step": 12437 }, { "epoch": 0.9222213983836287, "grad_norm": 0.3376328945159912, "learning_rate": 7.563890972743186e-05, "loss": 0.4689, "step": 12438 }, { "epoch": 0.9222955438570475, "grad_norm": 0.35341042280197144, "learning_rate": 7.56289072268067e-05, "loss": 0.4848, "step": 12439 }, { "epoch": 0.9223696893304664, "grad_norm": 0.3777451515197754, "learning_rate": 7.561890472618155e-05, "loss": 0.5265, "step": 12440 }, { "epoch": 0.9224438348038853, "grad_norm": 0.35465192794799805, "learning_rate": 7.560890222555639e-05, "loss": 0.4833, "step": 12441 }, { "epoch": 0.922517980277304, "grad_norm": 0.3486011326313019, "learning_rate": 7.559889972493124e-05, "loss": 0.4778, "step": 12442 }, { "epoch": 0.9225921257507229, "grad_norm": 0.3915579617023468, "learning_rate": 7.558889722430608e-05, "loss": 0.5101, "step": 12443 }, { "epoch": 0.9226662712241418, "grad_norm": 0.3594628870487213, "learning_rate": 7.557889472368093e-05, "loss": 0.4653, "step": 12444 }, { "epoch": 0.9227404166975606, "grad_norm": 0.3408556282520294, "learning_rate": 7.556889222305576e-05, "loss": 0.5186, "step": 12445 }, { "epoch": 0.9228145621709795, "grad_norm": 0.33907371759414673, "learning_rate": 7.555888972243062e-05, "loss": 0.4466, "step": 12446 }, { "epoch": 0.9228887076443983, "grad_norm": 0.3774392604827881, "learning_rate": 7.554888722180545e-05, "loss": 0.5105, "step": 12447 }, { "epoch": 0.9229628531178171, "grad_norm": 0.35582035779953003, "learning_rate": 7.55388847211803e-05, "loss": 0.467, "step": 12448 }, { "epoch": 0.923036998591236, "grad_norm": 0.36798587441444397, "learning_rate": 7.552888222055514e-05, "loss": 0.5035, "step": 12449 }, { "epoch": 0.9231111440646549, "grad_norm": 0.3445532023906708, "learning_rate": 7.551887971992999e-05, "loss": 0.471, "step": 12450 }, { "epoch": 0.9231852895380737, "grad_norm": 0.35535702109336853, "learning_rate": 7.550887721930483e-05, "loss": 0.4988, "step": 12451 }, { "epoch": 0.9232594350114925, "grad_norm": 0.36792510747909546, "learning_rate": 7.549887471867966e-05, "loss": 0.5171, "step": 12452 }, { "epoch": 0.9233335804849114, "grad_norm": 0.34439346194267273, "learning_rate": 7.548887221805452e-05, "loss": 0.4782, "step": 12453 }, { "epoch": 0.9234077259583302, "grad_norm": 0.368578165769577, "learning_rate": 7.547886971742935e-05, "loss": 0.4986, "step": 12454 }, { "epoch": 0.9234818714317491, "grad_norm": 0.32263073325157166, "learning_rate": 7.54688672168042e-05, "loss": 0.4131, "step": 12455 }, { "epoch": 0.923556016905168, "grad_norm": 0.3527402877807617, "learning_rate": 7.545886471617904e-05, "loss": 0.5207, "step": 12456 }, { "epoch": 0.9236301623785867, "grad_norm": 0.3589516580104828, "learning_rate": 7.544886221555389e-05, "loss": 0.4647, "step": 12457 }, { "epoch": 0.9237043078520056, "grad_norm": 0.35595306754112244, "learning_rate": 7.543885971492873e-05, "loss": 0.5003, "step": 12458 }, { "epoch": 0.9237784533254245, "grad_norm": 0.3510929346084595, "learning_rate": 7.542885721430358e-05, "loss": 0.4705, "step": 12459 }, { "epoch": 0.9238525987988433, "grad_norm": 0.3734757602214813, "learning_rate": 7.541885471367842e-05, "loss": 0.4699, "step": 12460 }, { "epoch": 0.9239267442722622, "grad_norm": 0.3811098337173462, "learning_rate": 7.540885221305327e-05, "loss": 0.5547, "step": 12461 }, { "epoch": 0.9240008897456811, "grad_norm": 0.3614274561405182, "learning_rate": 7.53988497124281e-05, "loss": 0.4704, "step": 12462 }, { "epoch": 0.9240750352190998, "grad_norm": 0.35262709856033325, "learning_rate": 7.538884721180295e-05, "loss": 0.4793, "step": 12463 }, { "epoch": 0.9241491806925187, "grad_norm": 0.3706681728363037, "learning_rate": 7.53788447111778e-05, "loss": 0.5234, "step": 12464 }, { "epoch": 0.9242233261659376, "grad_norm": 0.3381134271621704, "learning_rate": 7.536884221055264e-05, "loss": 0.4536, "step": 12465 }, { "epoch": 0.9242974716393564, "grad_norm": 0.37991800904273987, "learning_rate": 7.535883970992749e-05, "loss": 0.5113, "step": 12466 }, { "epoch": 0.9243716171127753, "grad_norm": 0.3623043894767761, "learning_rate": 7.534883720930233e-05, "loss": 0.472, "step": 12467 }, { "epoch": 0.9244457625861942, "grad_norm": 0.35597118735313416, "learning_rate": 7.533883470867718e-05, "loss": 0.4637, "step": 12468 }, { "epoch": 0.9245199080596129, "grad_norm": 0.38407501578330994, "learning_rate": 7.532883220805202e-05, "loss": 0.5485, "step": 12469 }, { "epoch": 0.9245940535330318, "grad_norm": 0.3642515540122986, "learning_rate": 7.531882970742687e-05, "loss": 0.5013, "step": 12470 }, { "epoch": 0.9246681990064507, "grad_norm": 0.38831108808517456, "learning_rate": 7.53088272068017e-05, "loss": 0.492, "step": 12471 }, { "epoch": 0.9247423444798695, "grad_norm": 0.39370930194854736, "learning_rate": 7.529882470617656e-05, "loss": 0.4586, "step": 12472 }, { "epoch": 0.9248164899532884, "grad_norm": 0.37721455097198486, "learning_rate": 7.528882220555139e-05, "loss": 0.5112, "step": 12473 }, { "epoch": 0.9248906354267072, "grad_norm": 0.3645356297492981, "learning_rate": 7.527881970492624e-05, "loss": 0.4978, "step": 12474 }, { "epoch": 0.924964780900126, "grad_norm": 0.3582991659641266, "learning_rate": 7.526881720430108e-05, "loss": 0.4961, "step": 12475 }, { "epoch": 0.9250389263735449, "grad_norm": 0.3454737961292267, "learning_rate": 7.525881470367592e-05, "loss": 0.4533, "step": 12476 }, { "epoch": 0.9251130718469638, "grad_norm": 0.35651978850364685, "learning_rate": 7.524881220305077e-05, "loss": 0.4887, "step": 12477 }, { "epoch": 0.9251872173203826, "grad_norm": 0.36642611026763916, "learning_rate": 7.52388097024256e-05, "loss": 0.5105, "step": 12478 }, { "epoch": 0.9252613627938014, "grad_norm": 0.3784686028957367, "learning_rate": 7.522880720180046e-05, "loss": 0.4944, "step": 12479 }, { "epoch": 0.9253355082672203, "grad_norm": 0.37267276644706726, "learning_rate": 7.521880470117529e-05, "loss": 0.4755, "step": 12480 }, { "epoch": 0.9254096537406391, "grad_norm": 0.3757612407207489, "learning_rate": 7.520880220055014e-05, "loss": 0.514, "step": 12481 }, { "epoch": 0.925483799214058, "grad_norm": 0.34459152817726135, "learning_rate": 7.519879969992498e-05, "loss": 0.4748, "step": 12482 }, { "epoch": 0.9255579446874769, "grad_norm": 0.36428627371788025, "learning_rate": 7.518879719929983e-05, "loss": 0.4763, "step": 12483 }, { "epoch": 0.9256320901608956, "grad_norm": 0.36532580852508545, "learning_rate": 7.517879469867467e-05, "loss": 0.5215, "step": 12484 }, { "epoch": 0.9257062356343145, "grad_norm": 0.35031288862228394, "learning_rate": 7.516879219804952e-05, "loss": 0.4776, "step": 12485 }, { "epoch": 0.9257803811077334, "grad_norm": 0.3481278121471405, "learning_rate": 7.515878969742436e-05, "loss": 0.4797, "step": 12486 }, { "epoch": 0.9258545265811522, "grad_norm": 0.3420957624912262, "learning_rate": 7.514878719679921e-05, "loss": 0.48, "step": 12487 }, { "epoch": 0.9259286720545711, "grad_norm": 0.3732546865940094, "learning_rate": 7.513878469617404e-05, "loss": 0.5135, "step": 12488 }, { "epoch": 0.92600281752799, "grad_norm": 0.3491486608982086, "learning_rate": 7.512878219554888e-05, "loss": 0.5073, "step": 12489 }, { "epoch": 0.9260769630014087, "grad_norm": 0.3398941457271576, "learning_rate": 7.511877969492373e-05, "loss": 0.4793, "step": 12490 }, { "epoch": 0.9261511084748276, "grad_norm": 0.36711540818214417, "learning_rate": 7.510877719429857e-05, "loss": 0.4938, "step": 12491 }, { "epoch": 0.9262252539482465, "grad_norm": 0.37224751710891724, "learning_rate": 7.509877469367342e-05, "loss": 0.4668, "step": 12492 }, { "epoch": 0.9262993994216653, "grad_norm": 0.3374097943305969, "learning_rate": 7.508877219304826e-05, "loss": 0.4588, "step": 12493 }, { "epoch": 0.9263735448950842, "grad_norm": 0.36722588539123535, "learning_rate": 7.507876969242311e-05, "loss": 0.4791, "step": 12494 }, { "epoch": 0.926447690368503, "grad_norm": 0.34919968247413635, "learning_rate": 7.506876719179794e-05, "loss": 0.4819, "step": 12495 }, { "epoch": 0.9265218358419218, "grad_norm": 0.3738672435283661, "learning_rate": 7.50587646911728e-05, "loss": 0.4952, "step": 12496 }, { "epoch": 0.9265959813153407, "grad_norm": 0.3816346228122711, "learning_rate": 7.504876219054765e-05, "loss": 0.5188, "step": 12497 }, { "epoch": 0.9266701267887596, "grad_norm": 0.3708685040473938, "learning_rate": 7.503875968992248e-05, "loss": 0.5015, "step": 12498 }, { "epoch": 0.9267442722621784, "grad_norm": 0.36223676800727844, "learning_rate": 7.502875718929733e-05, "loss": 0.528, "step": 12499 }, { "epoch": 0.9268184177355973, "grad_norm": 0.38683003187179565, "learning_rate": 7.501875468867217e-05, "loss": 0.5601, "step": 12500 }, { "epoch": 0.9268925632090161, "grad_norm": 0.3801520764827728, "learning_rate": 7.500875218804702e-05, "loss": 0.5139, "step": 12501 }, { "epoch": 0.9269667086824349, "grad_norm": 0.34861159324645996, "learning_rate": 7.499874968742186e-05, "loss": 0.4749, "step": 12502 }, { "epoch": 0.9270408541558538, "grad_norm": 0.38505125045776367, "learning_rate": 7.498874718679671e-05, "loss": 0.5116, "step": 12503 }, { "epoch": 0.9271149996292727, "grad_norm": 0.3596327602863312, "learning_rate": 7.497874468617155e-05, "loss": 0.4841, "step": 12504 }, { "epoch": 0.9271891451026915, "grad_norm": 0.3722608685493469, "learning_rate": 7.49687421855464e-05, "loss": 0.4966, "step": 12505 }, { "epoch": 0.9272632905761103, "grad_norm": 0.361613392829895, "learning_rate": 7.495873968492123e-05, "loss": 0.4516, "step": 12506 }, { "epoch": 0.9273374360495292, "grad_norm": 0.3696054518222809, "learning_rate": 7.494873718429608e-05, "loss": 0.5177, "step": 12507 }, { "epoch": 0.927411581522948, "grad_norm": 0.37342071533203125, "learning_rate": 7.493873468367092e-05, "loss": 0.5303, "step": 12508 }, { "epoch": 0.9274857269963669, "grad_norm": 0.38945472240448, "learning_rate": 7.492873218304577e-05, "loss": 0.5126, "step": 12509 }, { "epoch": 0.9275598724697858, "grad_norm": 0.3712955117225647, "learning_rate": 7.491872968242061e-05, "loss": 0.4818, "step": 12510 }, { "epoch": 0.9276340179432045, "grad_norm": 0.3642694056034088, "learning_rate": 7.490872718179546e-05, "loss": 0.5034, "step": 12511 }, { "epoch": 0.9277081634166234, "grad_norm": 0.3780011534690857, "learning_rate": 7.48987246811703e-05, "loss": 0.5099, "step": 12512 }, { "epoch": 0.9277823088900423, "grad_norm": 0.34464335441589355, "learning_rate": 7.488872218054513e-05, "loss": 0.5054, "step": 12513 }, { "epoch": 0.9278564543634611, "grad_norm": 0.3766547739505768, "learning_rate": 7.487871967991998e-05, "loss": 0.4681, "step": 12514 }, { "epoch": 0.92793059983688, "grad_norm": 0.3771117329597473, "learning_rate": 7.486871717929482e-05, "loss": 0.4979, "step": 12515 }, { "epoch": 0.9280047453102988, "grad_norm": 0.3543343245983124, "learning_rate": 7.485871467866967e-05, "loss": 0.4547, "step": 12516 }, { "epoch": 0.9280788907837176, "grad_norm": 0.38869306445121765, "learning_rate": 7.484871217804451e-05, "loss": 0.4926, "step": 12517 }, { "epoch": 0.9281530362571365, "grad_norm": 0.3307114243507385, "learning_rate": 7.483870967741936e-05, "loss": 0.4336, "step": 12518 }, { "epoch": 0.9282271817305553, "grad_norm": 0.3692863881587982, "learning_rate": 7.48287071767942e-05, "loss": 0.4844, "step": 12519 }, { "epoch": 0.9283013272039742, "grad_norm": 0.3503321409225464, "learning_rate": 7.481870467616905e-05, "loss": 0.496, "step": 12520 }, { "epoch": 0.9283754726773931, "grad_norm": 0.37934598326683044, "learning_rate": 7.480870217554388e-05, "loss": 0.4545, "step": 12521 }, { "epoch": 0.9284496181508118, "grad_norm": 0.3655300438404083, "learning_rate": 7.479869967491874e-05, "loss": 0.514, "step": 12522 }, { "epoch": 0.9285237636242307, "grad_norm": 0.36558103561401367, "learning_rate": 7.478869717429357e-05, "loss": 0.4993, "step": 12523 }, { "epoch": 0.9285979090976496, "grad_norm": 0.35874512791633606, "learning_rate": 7.477869467366842e-05, "loss": 0.4822, "step": 12524 }, { "epoch": 0.9286720545710684, "grad_norm": 0.35686156153678894, "learning_rate": 7.476869217304326e-05, "loss": 0.469, "step": 12525 }, { "epoch": 0.9287462000444873, "grad_norm": 0.3587223291397095, "learning_rate": 7.47586896724181e-05, "loss": 0.4731, "step": 12526 }, { "epoch": 0.9288203455179062, "grad_norm": 0.3636866807937622, "learning_rate": 7.474868717179295e-05, "loss": 0.4588, "step": 12527 }, { "epoch": 0.9288944909913249, "grad_norm": 0.37691494822502136, "learning_rate": 7.473868467116779e-05, "loss": 0.5342, "step": 12528 }, { "epoch": 0.9289686364647438, "grad_norm": 0.3700583875179291, "learning_rate": 7.472868217054264e-05, "loss": 0.4809, "step": 12529 }, { "epoch": 0.9290427819381627, "grad_norm": 0.36473625898361206, "learning_rate": 7.471867966991747e-05, "loss": 0.5143, "step": 12530 }, { "epoch": 0.9291169274115815, "grad_norm": 0.3725985884666443, "learning_rate": 7.470867716929232e-05, "loss": 0.4961, "step": 12531 }, { "epoch": 0.9291910728850004, "grad_norm": 0.3858032524585724, "learning_rate": 7.469867466866717e-05, "loss": 0.4647, "step": 12532 }, { "epoch": 0.9292652183584192, "grad_norm": 0.35019242763519287, "learning_rate": 7.468867216804201e-05, "loss": 0.4621, "step": 12533 }, { "epoch": 0.929339363831838, "grad_norm": 0.37216392159461975, "learning_rate": 7.467866966741686e-05, "loss": 0.486, "step": 12534 }, { "epoch": 0.9294135093052569, "grad_norm": 0.3496500849723816, "learning_rate": 7.466866716679171e-05, "loss": 0.4839, "step": 12535 }, { "epoch": 0.9294876547786758, "grad_norm": 0.3544599413871765, "learning_rate": 7.465866466616655e-05, "loss": 0.4957, "step": 12536 }, { "epoch": 0.9295618002520946, "grad_norm": 0.35691404342651367, "learning_rate": 7.46486621655414e-05, "loss": 0.4872, "step": 12537 }, { "epoch": 0.9296359457255134, "grad_norm": 0.3550892472267151, "learning_rate": 7.463865966491624e-05, "loss": 0.477, "step": 12538 }, { "epoch": 0.9297100911989323, "grad_norm": 0.3692510426044464, "learning_rate": 7.462865716429107e-05, "loss": 0.4808, "step": 12539 }, { "epoch": 0.9297842366723511, "grad_norm": 0.3772641122341156, "learning_rate": 7.461865466366593e-05, "loss": 0.4915, "step": 12540 }, { "epoch": 0.92985838214577, "grad_norm": 0.3771009147167206, "learning_rate": 7.460865216304076e-05, "loss": 0.5287, "step": 12541 }, { "epoch": 0.9299325276191889, "grad_norm": 0.33997493982315063, "learning_rate": 7.459864966241561e-05, "loss": 0.4538, "step": 12542 }, { "epoch": 0.9300066730926076, "grad_norm": 0.3796267509460449, "learning_rate": 7.458864716179045e-05, "loss": 0.5074, "step": 12543 }, { "epoch": 0.9300808185660265, "grad_norm": 0.35007837414741516, "learning_rate": 7.45786446611653e-05, "loss": 0.4583, "step": 12544 }, { "epoch": 0.9301549640394454, "grad_norm": 0.37312015891075134, "learning_rate": 7.456864216054014e-05, "loss": 0.5046, "step": 12545 }, { "epoch": 0.9302291095128642, "grad_norm": 0.3623892068862915, "learning_rate": 7.455863965991499e-05, "loss": 0.4757, "step": 12546 }, { "epoch": 0.9303032549862831, "grad_norm": 0.3662863075733185, "learning_rate": 7.454863715928983e-05, "loss": 0.4811, "step": 12547 }, { "epoch": 0.930377400459702, "grad_norm": 0.3533133268356323, "learning_rate": 7.453863465866468e-05, "loss": 0.4782, "step": 12548 }, { "epoch": 0.9304515459331207, "grad_norm": 0.3353319764137268, "learning_rate": 7.452863215803951e-05, "loss": 0.4733, "step": 12549 }, { "epoch": 0.9305256914065396, "grad_norm": 0.34523048996925354, "learning_rate": 7.451862965741435e-05, "loss": 0.4469, "step": 12550 }, { "epoch": 0.9305998368799585, "grad_norm": 0.35739776492118835, "learning_rate": 7.45086271567892e-05, "loss": 0.4727, "step": 12551 }, { "epoch": 0.9306739823533773, "grad_norm": 0.38050660490989685, "learning_rate": 7.449862465616404e-05, "loss": 0.5656, "step": 12552 }, { "epoch": 0.9307481278267962, "grad_norm": 0.336891770362854, "learning_rate": 7.448862215553889e-05, "loss": 0.4547, "step": 12553 }, { "epoch": 0.930822273300215, "grad_norm": 0.3856878876686096, "learning_rate": 7.447861965491373e-05, "loss": 0.5084, "step": 12554 }, { "epoch": 0.9308964187736338, "grad_norm": 0.3381787836551666, "learning_rate": 7.446861715428858e-05, "loss": 0.4406, "step": 12555 }, { "epoch": 0.9309705642470527, "grad_norm": 0.36366432905197144, "learning_rate": 7.445861465366341e-05, "loss": 0.4745, "step": 12556 }, { "epoch": 0.9310447097204716, "grad_norm": 0.3652551770210266, "learning_rate": 7.444861215303826e-05, "loss": 0.4684, "step": 12557 }, { "epoch": 0.9311188551938904, "grad_norm": 0.36810001730918884, "learning_rate": 7.44386096524131e-05, "loss": 0.5129, "step": 12558 }, { "epoch": 0.9311930006673093, "grad_norm": 0.357940137386322, "learning_rate": 7.442860715178795e-05, "loss": 0.4814, "step": 12559 }, { "epoch": 0.9312671461407281, "grad_norm": 0.35263848304748535, "learning_rate": 7.441860465116279e-05, "loss": 0.4515, "step": 12560 }, { "epoch": 0.9313412916141469, "grad_norm": 0.43264240026474, "learning_rate": 7.440860215053764e-05, "loss": 0.5324, "step": 12561 }, { "epoch": 0.9314154370875658, "grad_norm": 0.4012320339679718, "learning_rate": 7.439859964991248e-05, "loss": 0.5407, "step": 12562 }, { "epoch": 0.9314895825609847, "grad_norm": 0.3788892924785614, "learning_rate": 7.438859714928731e-05, "loss": 0.4967, "step": 12563 }, { "epoch": 0.9315637280344035, "grad_norm": 0.3622603118419647, "learning_rate": 7.437859464866216e-05, "loss": 0.5219, "step": 12564 }, { "epoch": 0.9316378735078223, "grad_norm": 0.36548519134521484, "learning_rate": 7.436859214803702e-05, "loss": 0.4926, "step": 12565 }, { "epoch": 0.9317120189812412, "grad_norm": 0.361782044172287, "learning_rate": 7.435858964741185e-05, "loss": 0.4506, "step": 12566 }, { "epoch": 0.93178616445466, "grad_norm": 0.37184062600135803, "learning_rate": 7.43485871467867e-05, "loss": 0.5054, "step": 12567 }, { "epoch": 0.9318603099280789, "grad_norm": 0.36832717061042786, "learning_rate": 7.433858464616154e-05, "loss": 0.5118, "step": 12568 }, { "epoch": 0.9319344554014978, "grad_norm": 0.35926544666290283, "learning_rate": 7.432858214553639e-05, "loss": 0.471, "step": 12569 }, { "epoch": 0.9320086008749165, "grad_norm": 0.3602876365184784, "learning_rate": 7.431857964491124e-05, "loss": 0.4862, "step": 12570 }, { "epoch": 0.9320827463483354, "grad_norm": 0.3668113648891449, "learning_rate": 7.430857714428608e-05, "loss": 0.4667, "step": 12571 }, { "epoch": 0.9321568918217543, "grad_norm": 0.3675438463687897, "learning_rate": 7.429857464366093e-05, "loss": 0.5376, "step": 12572 }, { "epoch": 0.9322310372951731, "grad_norm": 0.3783434331417084, "learning_rate": 7.428857214303577e-05, "loss": 0.5101, "step": 12573 }, { "epoch": 0.932305182768592, "grad_norm": 0.37862223386764526, "learning_rate": 7.427856964241062e-05, "loss": 0.4866, "step": 12574 }, { "epoch": 0.9323793282420109, "grad_norm": 0.34475430846214294, "learning_rate": 7.426856714178545e-05, "loss": 0.4963, "step": 12575 }, { "epoch": 0.9324534737154296, "grad_norm": 0.3569222390651703, "learning_rate": 7.425856464116029e-05, "loss": 0.4629, "step": 12576 }, { "epoch": 0.9325276191888485, "grad_norm": 0.3474470376968384, "learning_rate": 7.424856214053514e-05, "loss": 0.4731, "step": 12577 }, { "epoch": 0.9326017646622674, "grad_norm": 0.39677125215530396, "learning_rate": 7.423855963990998e-05, "loss": 0.5358, "step": 12578 }, { "epoch": 0.9326759101356862, "grad_norm": 0.3520713746547699, "learning_rate": 7.422855713928483e-05, "loss": 0.455, "step": 12579 }, { "epoch": 0.9327500556091051, "grad_norm": 0.3594093918800354, "learning_rate": 7.421855463865967e-05, "loss": 0.4845, "step": 12580 }, { "epoch": 0.932824201082524, "grad_norm": 0.36820778250694275, "learning_rate": 7.420855213803452e-05, "loss": 0.5074, "step": 12581 }, { "epoch": 0.9328983465559427, "grad_norm": 0.36617761850357056, "learning_rate": 7.419854963740935e-05, "loss": 0.4961, "step": 12582 }, { "epoch": 0.9329724920293616, "grad_norm": 0.36656683683395386, "learning_rate": 7.41885471367842e-05, "loss": 0.5136, "step": 12583 }, { "epoch": 0.9330466375027805, "grad_norm": 0.33604350686073303, "learning_rate": 7.417854463615904e-05, "loss": 0.4767, "step": 12584 }, { "epoch": 0.9331207829761993, "grad_norm": 0.3722134232521057, "learning_rate": 7.416854213553389e-05, "loss": 0.4894, "step": 12585 }, { "epoch": 0.9331949284496182, "grad_norm": 0.37430062890052795, "learning_rate": 7.415853963490873e-05, "loss": 0.5079, "step": 12586 }, { "epoch": 0.933269073923037, "grad_norm": 0.3783397972583771, "learning_rate": 7.414853713428358e-05, "loss": 0.5302, "step": 12587 }, { "epoch": 0.9333432193964558, "grad_norm": 0.35407841205596924, "learning_rate": 7.413853463365842e-05, "loss": 0.4814, "step": 12588 }, { "epoch": 0.9334173648698747, "grad_norm": 0.34376683831214905, "learning_rate": 7.412853213303325e-05, "loss": 0.4611, "step": 12589 }, { "epoch": 0.9334915103432936, "grad_norm": 0.34799134731292725, "learning_rate": 7.41185296324081e-05, "loss": 0.4829, "step": 12590 }, { "epoch": 0.9335656558167124, "grad_norm": 0.38180607557296753, "learning_rate": 7.410852713178294e-05, "loss": 0.5025, "step": 12591 }, { "epoch": 0.9336398012901312, "grad_norm": 0.3360190987586975, "learning_rate": 7.409852463115779e-05, "loss": 0.4436, "step": 12592 }, { "epoch": 0.9337139467635501, "grad_norm": 0.38372713327407837, "learning_rate": 7.408852213053263e-05, "loss": 0.5293, "step": 12593 }, { "epoch": 0.9337880922369689, "grad_norm": 0.3626744747161865, "learning_rate": 7.407851962990748e-05, "loss": 0.4976, "step": 12594 }, { "epoch": 0.9338622377103878, "grad_norm": 0.32625123858451843, "learning_rate": 7.406851712928232e-05, "loss": 0.4508, "step": 12595 }, { "epoch": 0.9339363831838067, "grad_norm": 0.3588066101074219, "learning_rate": 7.405851462865717e-05, "loss": 0.47, "step": 12596 }, { "epoch": 0.9340105286572254, "grad_norm": 0.36808934807777405, "learning_rate": 7.4048512128032e-05, "loss": 0.4833, "step": 12597 }, { "epoch": 0.9340846741306443, "grad_norm": 0.36009833216667175, "learning_rate": 7.403850962740686e-05, "loss": 0.4817, "step": 12598 }, { "epoch": 0.9341588196040632, "grad_norm": 0.342312216758728, "learning_rate": 7.402850712678169e-05, "loss": 0.4773, "step": 12599 }, { "epoch": 0.934232965077482, "grad_norm": 0.334254652261734, "learning_rate": 7.401850462615654e-05, "loss": 0.4512, "step": 12600 }, { "epoch": 0.9343071105509009, "grad_norm": 0.3532531261444092, "learning_rate": 7.400850212553138e-05, "loss": 0.4694, "step": 12601 }, { "epoch": 0.9343812560243198, "grad_norm": 0.3325973451137543, "learning_rate": 7.399849962490623e-05, "loss": 0.4577, "step": 12602 }, { "epoch": 0.9344554014977385, "grad_norm": 0.33915671706199646, "learning_rate": 7.398849712428108e-05, "loss": 0.4622, "step": 12603 }, { "epoch": 0.9345295469711574, "grad_norm": 0.37766265869140625, "learning_rate": 7.397849462365592e-05, "loss": 0.4764, "step": 12604 }, { "epoch": 0.9346036924445763, "grad_norm": 0.35939159989356995, "learning_rate": 7.396849212303077e-05, "loss": 0.5027, "step": 12605 }, { "epoch": 0.9346778379179951, "grad_norm": 0.33251267671585083, "learning_rate": 7.39584896224056e-05, "loss": 0.4793, "step": 12606 }, { "epoch": 0.934751983391414, "grad_norm": 0.3732410669326782, "learning_rate": 7.394848712178046e-05, "loss": 0.481, "step": 12607 }, { "epoch": 0.9348261288648329, "grad_norm": 0.36018818616867065, "learning_rate": 7.39384846211553e-05, "loss": 0.473, "step": 12608 }, { "epoch": 0.9349002743382516, "grad_norm": 0.359063059091568, "learning_rate": 7.392848212053015e-05, "loss": 0.4879, "step": 12609 }, { "epoch": 0.9349744198116705, "grad_norm": 0.38851284980773926, "learning_rate": 7.391847961990498e-05, "loss": 0.4848, "step": 12610 }, { "epoch": 0.9350485652850894, "grad_norm": 0.3680099844932556, "learning_rate": 7.390847711927983e-05, "loss": 0.4542, "step": 12611 }, { "epoch": 0.9351227107585082, "grad_norm": 0.36065801978111267, "learning_rate": 7.389847461865467e-05, "loss": 0.4898, "step": 12612 }, { "epoch": 0.935196856231927, "grad_norm": 0.3508821427822113, "learning_rate": 7.388847211802951e-05, "loss": 0.475, "step": 12613 }, { "epoch": 0.9352710017053459, "grad_norm": 0.3461889624595642, "learning_rate": 7.387846961740436e-05, "loss": 0.4541, "step": 12614 }, { "epoch": 0.9353451471787647, "grad_norm": 0.3598422408103943, "learning_rate": 7.38684671167792e-05, "loss": 0.4953, "step": 12615 }, { "epoch": 0.9354192926521836, "grad_norm": 0.38608333468437195, "learning_rate": 7.385846461615405e-05, "loss": 0.5235, "step": 12616 }, { "epoch": 0.9354934381256025, "grad_norm": 0.3356086015701294, "learning_rate": 7.384846211552888e-05, "loss": 0.4544, "step": 12617 }, { "epoch": 0.9355675835990213, "grad_norm": 0.3694072365760803, "learning_rate": 7.383845961490373e-05, "loss": 0.4905, "step": 12618 }, { "epoch": 0.9356417290724401, "grad_norm": 0.36217087507247925, "learning_rate": 7.382845711427857e-05, "loss": 0.4899, "step": 12619 }, { "epoch": 0.935715874545859, "grad_norm": 0.356863796710968, "learning_rate": 7.381845461365342e-05, "loss": 0.5073, "step": 12620 }, { "epoch": 0.9357900200192778, "grad_norm": 0.3506380617618561, "learning_rate": 7.380845211302826e-05, "loss": 0.4731, "step": 12621 }, { "epoch": 0.9358641654926967, "grad_norm": 0.3551088869571686, "learning_rate": 7.379844961240311e-05, "loss": 0.5073, "step": 12622 }, { "epoch": 0.9359383109661156, "grad_norm": 0.364650696516037, "learning_rate": 7.378844711177795e-05, "loss": 0.4746, "step": 12623 }, { "epoch": 0.9360124564395343, "grad_norm": 0.34201690554618835, "learning_rate": 7.37784446111528e-05, "loss": 0.4676, "step": 12624 }, { "epoch": 0.9360866019129532, "grad_norm": 0.3706342279911041, "learning_rate": 7.376844211052763e-05, "loss": 0.4997, "step": 12625 }, { "epoch": 0.9361607473863721, "grad_norm": 0.3608691692352295, "learning_rate": 7.375843960990247e-05, "loss": 0.4959, "step": 12626 }, { "epoch": 0.9362348928597909, "grad_norm": 0.3759165108203888, "learning_rate": 7.374843710927732e-05, "loss": 0.5258, "step": 12627 }, { "epoch": 0.9363090383332098, "grad_norm": 0.37009215354919434, "learning_rate": 7.373843460865216e-05, "loss": 0.4577, "step": 12628 }, { "epoch": 0.9363831838066287, "grad_norm": 0.38913390040397644, "learning_rate": 7.372843210802701e-05, "loss": 0.5573, "step": 12629 }, { "epoch": 0.9364573292800474, "grad_norm": 0.34923186898231506, "learning_rate": 7.371842960740185e-05, "loss": 0.437, "step": 12630 }, { "epoch": 0.9365314747534663, "grad_norm": 0.3951009511947632, "learning_rate": 7.37084271067767e-05, "loss": 0.5287, "step": 12631 }, { "epoch": 0.9366056202268851, "grad_norm": 0.3493008315563202, "learning_rate": 7.369842460615153e-05, "loss": 0.4485, "step": 12632 }, { "epoch": 0.936679765700304, "grad_norm": 0.334536612033844, "learning_rate": 7.368842210552638e-05, "loss": 0.4432, "step": 12633 }, { "epoch": 0.9367539111737229, "grad_norm": 0.367182195186615, "learning_rate": 7.367841960490122e-05, "loss": 0.4933, "step": 12634 }, { "epoch": 0.9368280566471416, "grad_norm": 0.3867112696170807, "learning_rate": 7.366841710427607e-05, "loss": 0.5304, "step": 12635 }, { "epoch": 0.9369022021205605, "grad_norm": 0.3443032205104828, "learning_rate": 7.365841460365092e-05, "loss": 0.4872, "step": 12636 }, { "epoch": 0.9369763475939794, "grad_norm": 0.3687652349472046, "learning_rate": 7.364841210302576e-05, "loss": 0.5265, "step": 12637 }, { "epoch": 0.9370504930673982, "grad_norm": 0.35267356038093567, "learning_rate": 7.363840960240061e-05, "loss": 0.4882, "step": 12638 }, { "epoch": 0.9371246385408171, "grad_norm": 0.3565536141395569, "learning_rate": 7.362840710177545e-05, "loss": 0.457, "step": 12639 }, { "epoch": 0.937198784014236, "grad_norm": 0.3672448694705963, "learning_rate": 7.36184046011503e-05, "loss": 0.5275, "step": 12640 }, { "epoch": 0.9372729294876547, "grad_norm": 0.3521357476711273, "learning_rate": 7.360840210052514e-05, "loss": 0.4687, "step": 12641 }, { "epoch": 0.9373470749610736, "grad_norm": 0.3255048394203186, "learning_rate": 7.359839959989999e-05, "loss": 0.4565, "step": 12642 }, { "epoch": 0.9374212204344925, "grad_norm": 0.36710765957832336, "learning_rate": 7.358839709927482e-05, "loss": 0.5093, "step": 12643 }, { "epoch": 0.9374953659079113, "grad_norm": 0.3514358699321747, "learning_rate": 7.357839459864967e-05, "loss": 0.4636, "step": 12644 }, { "epoch": 0.9375695113813302, "grad_norm": 0.375060111284256, "learning_rate": 7.356839209802451e-05, "loss": 0.4665, "step": 12645 }, { "epoch": 0.937643656854749, "grad_norm": 0.37243205308914185, "learning_rate": 7.355838959739936e-05, "loss": 0.4897, "step": 12646 }, { "epoch": 0.9377178023281678, "grad_norm": 0.3549444377422333, "learning_rate": 7.35483870967742e-05, "loss": 0.4729, "step": 12647 }, { "epoch": 0.9377919478015867, "grad_norm": 0.36249011754989624, "learning_rate": 7.353838459614905e-05, "loss": 0.4702, "step": 12648 }, { "epoch": 0.9378660932750056, "grad_norm": 0.34390994906425476, "learning_rate": 7.352838209552389e-05, "loss": 0.4514, "step": 12649 }, { "epoch": 0.9379402387484244, "grad_norm": 0.36310508847236633, "learning_rate": 7.351837959489872e-05, "loss": 0.4692, "step": 12650 }, { "epoch": 0.9380143842218432, "grad_norm": 0.36365559697151184, "learning_rate": 7.350837709427357e-05, "loss": 0.4988, "step": 12651 }, { "epoch": 0.9380885296952621, "grad_norm": 0.35015714168548584, "learning_rate": 7.349837459364841e-05, "loss": 0.488, "step": 12652 }, { "epoch": 0.9381626751686809, "grad_norm": 0.3612143397331238, "learning_rate": 7.348837209302326e-05, "loss": 0.5032, "step": 12653 }, { "epoch": 0.9382368206420998, "grad_norm": 0.3440679907798767, "learning_rate": 7.34783695923981e-05, "loss": 0.4453, "step": 12654 }, { "epoch": 0.9383109661155187, "grad_norm": 0.3463965952396393, "learning_rate": 7.346836709177295e-05, "loss": 0.4967, "step": 12655 }, { "epoch": 0.9383851115889374, "grad_norm": 0.38567230105400085, "learning_rate": 7.345836459114779e-05, "loss": 0.4923, "step": 12656 }, { "epoch": 0.9384592570623563, "grad_norm": 0.36155086755752563, "learning_rate": 7.344836209052264e-05, "loss": 0.4743, "step": 12657 }, { "epoch": 0.9385334025357752, "grad_norm": 0.3766467273235321, "learning_rate": 7.343835958989747e-05, "loss": 0.5067, "step": 12658 }, { "epoch": 0.938607548009194, "grad_norm": 0.3617345094680786, "learning_rate": 7.342835708927232e-05, "loss": 0.5189, "step": 12659 }, { "epoch": 0.9386816934826129, "grad_norm": 0.3420042097568512, "learning_rate": 7.341835458864716e-05, "loss": 0.4649, "step": 12660 }, { "epoch": 0.9387558389560318, "grad_norm": 0.35277339816093445, "learning_rate": 7.340835208802201e-05, "loss": 0.4703, "step": 12661 }, { "epoch": 0.9388299844294505, "grad_norm": 0.3785082995891571, "learning_rate": 7.339834958739685e-05, "loss": 0.5003, "step": 12662 }, { "epoch": 0.9389041299028694, "grad_norm": 0.379930317401886, "learning_rate": 7.338834708677169e-05, "loss": 0.5012, "step": 12663 }, { "epoch": 0.9389782753762883, "grad_norm": 0.36139118671417236, "learning_rate": 7.337834458614654e-05, "loss": 0.4903, "step": 12664 }, { "epoch": 0.9390524208497071, "grad_norm": 0.35227882862091064, "learning_rate": 7.336834208552137e-05, "loss": 0.4651, "step": 12665 }, { "epoch": 0.939126566323126, "grad_norm": 0.36496275663375854, "learning_rate": 7.335833958489623e-05, "loss": 0.4876, "step": 12666 }, { "epoch": 0.9392007117965449, "grad_norm": 0.3452761471271515, "learning_rate": 7.334833708427106e-05, "loss": 0.476, "step": 12667 }, { "epoch": 0.9392748572699636, "grad_norm": 0.3572264015674591, "learning_rate": 7.333833458364591e-05, "loss": 0.47, "step": 12668 }, { "epoch": 0.9393490027433825, "grad_norm": 0.39452967047691345, "learning_rate": 7.332833208302075e-05, "loss": 0.5317, "step": 12669 }, { "epoch": 0.9394231482168014, "grad_norm": 0.37420588731765747, "learning_rate": 7.33183295823956e-05, "loss": 0.4896, "step": 12670 }, { "epoch": 0.9394972936902202, "grad_norm": 0.37548142671585083, "learning_rate": 7.330832708177045e-05, "loss": 0.5252, "step": 12671 }, { "epoch": 0.939571439163639, "grad_norm": 0.35959455370903015, "learning_rate": 7.329832458114529e-05, "loss": 0.476, "step": 12672 }, { "epoch": 0.9396455846370579, "grad_norm": 0.36445775628089905, "learning_rate": 7.328832208052014e-05, "loss": 0.5004, "step": 12673 }, { "epoch": 0.9397197301104767, "grad_norm": 0.35961636900901794, "learning_rate": 7.327831957989498e-05, "loss": 0.5089, "step": 12674 }, { "epoch": 0.9397938755838956, "grad_norm": 0.3858284056186676, "learning_rate": 7.326831707926983e-05, "loss": 0.5039, "step": 12675 }, { "epoch": 0.9398680210573145, "grad_norm": 0.3509649336338043, "learning_rate": 7.325831457864466e-05, "loss": 0.4924, "step": 12676 }, { "epoch": 0.9399421665307333, "grad_norm": 0.364663690328598, "learning_rate": 7.324831207801951e-05, "loss": 0.4818, "step": 12677 }, { "epoch": 0.9400163120041521, "grad_norm": 0.3597727417945862, "learning_rate": 7.323830957739435e-05, "loss": 0.4642, "step": 12678 }, { "epoch": 0.940090457477571, "grad_norm": 0.36796796321868896, "learning_rate": 7.32283070767692e-05, "loss": 0.5088, "step": 12679 }, { "epoch": 0.9401646029509898, "grad_norm": 0.35449984669685364, "learning_rate": 7.321830457614404e-05, "loss": 0.4819, "step": 12680 }, { "epoch": 0.9402387484244087, "grad_norm": 0.3639141917228699, "learning_rate": 7.320830207551889e-05, "loss": 0.5249, "step": 12681 }, { "epoch": 0.9403128938978276, "grad_norm": 0.34562310576438904, "learning_rate": 7.319829957489373e-05, "loss": 0.4508, "step": 12682 }, { "epoch": 0.9403870393712463, "grad_norm": 0.34428155422210693, "learning_rate": 7.318829707426858e-05, "loss": 0.4308, "step": 12683 }, { "epoch": 0.9404611848446652, "grad_norm": 0.3527359068393707, "learning_rate": 7.317829457364341e-05, "loss": 0.4854, "step": 12684 }, { "epoch": 0.9405353303180841, "grad_norm": 0.37206050753593445, "learning_rate": 7.316829207301827e-05, "loss": 0.5052, "step": 12685 }, { "epoch": 0.9406094757915029, "grad_norm": 0.3387371599674225, "learning_rate": 7.31582895723931e-05, "loss": 0.4865, "step": 12686 }, { "epoch": 0.9406836212649218, "grad_norm": 0.358368843793869, "learning_rate": 7.314828707176794e-05, "loss": 0.4997, "step": 12687 }, { "epoch": 0.9407577667383407, "grad_norm": 0.35566943883895874, "learning_rate": 7.313828457114279e-05, "loss": 0.4824, "step": 12688 }, { "epoch": 0.9408319122117594, "grad_norm": 0.35596126317977905, "learning_rate": 7.312828207051763e-05, "loss": 0.4847, "step": 12689 }, { "epoch": 0.9409060576851783, "grad_norm": 0.35554808378219604, "learning_rate": 7.311827956989248e-05, "loss": 0.4647, "step": 12690 }, { "epoch": 0.9409802031585972, "grad_norm": 0.37755218148231506, "learning_rate": 7.310827706926732e-05, "loss": 0.5094, "step": 12691 }, { "epoch": 0.941054348632016, "grad_norm": 0.37661755084991455, "learning_rate": 7.309827456864217e-05, "loss": 0.4899, "step": 12692 }, { "epoch": 0.9411284941054349, "grad_norm": 0.39503660798072815, "learning_rate": 7.3088272068017e-05, "loss": 0.54, "step": 12693 }, { "epoch": 0.9412026395788538, "grad_norm": 0.37441980838775635, "learning_rate": 7.307826956739185e-05, "loss": 0.5109, "step": 12694 }, { "epoch": 0.9412767850522725, "grad_norm": 0.3815944790840149, "learning_rate": 7.306826706676669e-05, "loss": 0.5422, "step": 12695 }, { "epoch": 0.9413509305256914, "grad_norm": 0.3629320561885834, "learning_rate": 7.305826456614154e-05, "loss": 0.4752, "step": 12696 }, { "epoch": 0.9414250759991103, "grad_norm": 0.3711802065372467, "learning_rate": 7.304826206551638e-05, "loss": 0.5117, "step": 12697 }, { "epoch": 0.9414992214725291, "grad_norm": 0.36574992537498474, "learning_rate": 7.303825956489123e-05, "loss": 0.4967, "step": 12698 }, { "epoch": 0.941573366945948, "grad_norm": 0.353750079870224, "learning_rate": 7.302825706426607e-05, "loss": 0.482, "step": 12699 }, { "epoch": 0.9416475124193668, "grad_norm": 0.3407554626464844, "learning_rate": 7.30182545636409e-05, "loss": 0.4634, "step": 12700 }, { "epoch": 0.9417216578927856, "grad_norm": 0.3930665850639343, "learning_rate": 7.300825206301575e-05, "loss": 0.5656, "step": 12701 }, { "epoch": 0.9417958033662045, "grad_norm": 0.36046555638313293, "learning_rate": 7.299824956239059e-05, "loss": 0.4743, "step": 12702 }, { "epoch": 0.9418699488396234, "grad_norm": 0.34632426500320435, "learning_rate": 7.298824706176544e-05, "loss": 0.4683, "step": 12703 }, { "epoch": 0.9419440943130422, "grad_norm": 0.3824433386325836, "learning_rate": 7.297824456114029e-05, "loss": 0.5021, "step": 12704 }, { "epoch": 0.942018239786461, "grad_norm": 0.34146296977996826, "learning_rate": 7.296824206051513e-05, "loss": 0.459, "step": 12705 }, { "epoch": 0.9420923852598799, "grad_norm": 0.3670293092727661, "learning_rate": 7.295823955988998e-05, "loss": 0.4825, "step": 12706 }, { "epoch": 0.9421665307332987, "grad_norm": 0.383053719997406, "learning_rate": 7.294823705926482e-05, "loss": 0.5334, "step": 12707 }, { "epoch": 0.9422406762067176, "grad_norm": 0.36488452553749084, "learning_rate": 7.293823455863967e-05, "loss": 0.5037, "step": 12708 }, { "epoch": 0.9423148216801365, "grad_norm": 0.37379708886146545, "learning_rate": 7.292823205801452e-05, "loss": 0.5217, "step": 12709 }, { "epoch": 0.9423889671535552, "grad_norm": 0.36500152945518494, "learning_rate": 7.291822955738936e-05, "loss": 0.4867, "step": 12710 }, { "epoch": 0.9424631126269741, "grad_norm": 0.3563423752784729, "learning_rate": 7.29082270567642e-05, "loss": 0.4683, "step": 12711 }, { "epoch": 0.942537258100393, "grad_norm": 0.3528416156768799, "learning_rate": 7.289822455613904e-05, "loss": 0.4764, "step": 12712 }, { "epoch": 0.9426114035738118, "grad_norm": 0.3495711386203766, "learning_rate": 7.288822205551388e-05, "loss": 0.4496, "step": 12713 }, { "epoch": 0.9426855490472307, "grad_norm": 0.36039668321609497, "learning_rate": 7.287821955488873e-05, "loss": 0.4599, "step": 12714 }, { "epoch": 0.9427596945206496, "grad_norm": 0.37054330110549927, "learning_rate": 7.286821705426357e-05, "loss": 0.5211, "step": 12715 }, { "epoch": 0.9428338399940683, "grad_norm": 0.3554651737213135, "learning_rate": 7.285821455363842e-05, "loss": 0.4611, "step": 12716 }, { "epoch": 0.9429079854674872, "grad_norm": 0.34829363226890564, "learning_rate": 7.284821205301326e-05, "loss": 0.4902, "step": 12717 }, { "epoch": 0.9429821309409061, "grad_norm": 0.38922443985939026, "learning_rate": 7.28382095523881e-05, "loss": 0.5246, "step": 12718 }, { "epoch": 0.9430562764143249, "grad_norm": 0.3545495569705963, "learning_rate": 7.282820705176294e-05, "loss": 0.51, "step": 12719 }, { "epoch": 0.9431304218877438, "grad_norm": 0.3709557056427002, "learning_rate": 7.28182045511378e-05, "loss": 0.4988, "step": 12720 }, { "epoch": 0.9432045673611626, "grad_norm": 0.37327542901039124, "learning_rate": 7.280820205051263e-05, "loss": 0.5082, "step": 12721 }, { "epoch": 0.9432787128345814, "grad_norm": 0.3747099041938782, "learning_rate": 7.279819954988748e-05, "loss": 0.4766, "step": 12722 }, { "epoch": 0.9433528583080003, "grad_norm": 0.35443779826164246, "learning_rate": 7.278819704926232e-05, "loss": 0.4542, "step": 12723 }, { "epoch": 0.9434270037814192, "grad_norm": 0.354324609041214, "learning_rate": 7.277819454863716e-05, "loss": 0.4591, "step": 12724 }, { "epoch": 0.943501149254838, "grad_norm": 0.3866470754146576, "learning_rate": 7.2768192048012e-05, "loss": 0.5206, "step": 12725 }, { "epoch": 0.9435752947282569, "grad_norm": 0.3442578911781311, "learning_rate": 7.275818954738684e-05, "loss": 0.4874, "step": 12726 }, { "epoch": 0.9436494402016757, "grad_norm": 0.34116214513778687, "learning_rate": 7.27481870467617e-05, "loss": 0.4335, "step": 12727 }, { "epoch": 0.9437235856750945, "grad_norm": 0.3745110034942627, "learning_rate": 7.273818454613653e-05, "loss": 0.5004, "step": 12728 }, { "epoch": 0.9437977311485134, "grad_norm": 0.36600902676582336, "learning_rate": 7.272818204551138e-05, "loss": 0.5174, "step": 12729 }, { "epoch": 0.9438718766219323, "grad_norm": 0.3470575511455536, "learning_rate": 7.271817954488622e-05, "loss": 0.4763, "step": 12730 }, { "epoch": 0.9439460220953511, "grad_norm": 0.3621063232421875, "learning_rate": 7.270817704426107e-05, "loss": 0.4969, "step": 12731 }, { "epoch": 0.9440201675687699, "grad_norm": 0.3514854907989502, "learning_rate": 7.26981745436359e-05, "loss": 0.5012, "step": 12732 }, { "epoch": 0.9440943130421888, "grad_norm": 0.35898256301879883, "learning_rate": 7.268817204301076e-05, "loss": 0.4977, "step": 12733 }, { "epoch": 0.9441684585156076, "grad_norm": 0.372446745634079, "learning_rate": 7.26781695423856e-05, "loss": 0.4827, "step": 12734 }, { "epoch": 0.9442426039890265, "grad_norm": 0.36470794677734375, "learning_rate": 7.266816704176045e-05, "loss": 0.4893, "step": 12735 }, { "epoch": 0.9443167494624454, "grad_norm": 0.3697460889816284, "learning_rate": 7.265816454113528e-05, "loss": 0.5056, "step": 12736 }, { "epoch": 0.9443908949358641, "grad_norm": 0.38600954413414, "learning_rate": 7.264816204051012e-05, "loss": 0.5312, "step": 12737 }, { "epoch": 0.944465040409283, "grad_norm": 0.3704920709133148, "learning_rate": 7.263815953988497e-05, "loss": 0.4927, "step": 12738 }, { "epoch": 0.9445391858827019, "grad_norm": 0.37557452917099, "learning_rate": 7.262815703925982e-05, "loss": 0.4972, "step": 12739 }, { "epoch": 0.9446133313561207, "grad_norm": 0.3582669198513031, "learning_rate": 7.261815453863466e-05, "loss": 0.4914, "step": 12740 }, { "epoch": 0.9446874768295396, "grad_norm": 0.3646637499332428, "learning_rate": 7.260815203800951e-05, "loss": 0.4782, "step": 12741 }, { "epoch": 0.9447616223029585, "grad_norm": 0.3618316352367401, "learning_rate": 7.259814953738436e-05, "loss": 0.4666, "step": 12742 }, { "epoch": 0.9448357677763772, "grad_norm": 0.36272725462913513, "learning_rate": 7.25881470367592e-05, "loss": 0.5041, "step": 12743 }, { "epoch": 0.9449099132497961, "grad_norm": 0.3785581886768341, "learning_rate": 7.257814453613405e-05, "loss": 0.5096, "step": 12744 }, { "epoch": 0.9449840587232149, "grad_norm": 0.35329127311706543, "learning_rate": 7.256814203550888e-05, "loss": 0.4898, "step": 12745 }, { "epoch": 0.9450582041966338, "grad_norm": 0.3850589096546173, "learning_rate": 7.255813953488373e-05, "loss": 0.541, "step": 12746 }, { "epoch": 0.9451323496700527, "grad_norm": 0.3566162884235382, "learning_rate": 7.254813703425857e-05, "loss": 0.5018, "step": 12747 }, { "epoch": 0.9452064951434714, "grad_norm": 0.3868442177772522, "learning_rate": 7.253813453363342e-05, "loss": 0.4961, "step": 12748 }, { "epoch": 0.9452806406168903, "grad_norm": 0.3848858177661896, "learning_rate": 7.252813203300826e-05, "loss": 0.5267, "step": 12749 }, { "epoch": 0.9453547860903092, "grad_norm": 0.35237643122673035, "learning_rate": 7.25181295323831e-05, "loss": 0.5212, "step": 12750 }, { "epoch": 0.945428931563728, "grad_norm": 0.3786137104034424, "learning_rate": 7.250812703175795e-05, "loss": 0.5292, "step": 12751 }, { "epoch": 0.9455030770371469, "grad_norm": 0.3358807861804962, "learning_rate": 7.249812453113278e-05, "loss": 0.4878, "step": 12752 }, { "epoch": 0.9455772225105658, "grad_norm": 0.36725082993507385, "learning_rate": 7.248812203050763e-05, "loss": 0.4647, "step": 12753 }, { "epoch": 0.9456513679839845, "grad_norm": 0.3803938925266266, "learning_rate": 7.247811952988247e-05, "loss": 0.5327, "step": 12754 }, { "epoch": 0.9457255134574034, "grad_norm": 0.344963014125824, "learning_rate": 7.246811702925732e-05, "loss": 0.4699, "step": 12755 }, { "epoch": 0.9457996589308223, "grad_norm": 0.3456801474094391, "learning_rate": 7.245811452863216e-05, "loss": 0.4644, "step": 12756 }, { "epoch": 0.9458738044042411, "grad_norm": 0.3357424736022949, "learning_rate": 7.244811202800701e-05, "loss": 0.4545, "step": 12757 }, { "epoch": 0.94594794987766, "grad_norm": 0.3574605882167816, "learning_rate": 7.243810952738185e-05, "loss": 0.4981, "step": 12758 }, { "epoch": 0.9460220953510788, "grad_norm": 0.34735244512557983, "learning_rate": 7.24281070267567e-05, "loss": 0.49, "step": 12759 }, { "epoch": 0.9460962408244976, "grad_norm": 0.3546578586101532, "learning_rate": 7.241810452613154e-05, "loss": 0.4967, "step": 12760 }, { "epoch": 0.9461703862979165, "grad_norm": 0.3520466089248657, "learning_rate": 7.240810202550637e-05, "loss": 0.4655, "step": 12761 }, { "epoch": 0.9462445317713354, "grad_norm": 0.36938634514808655, "learning_rate": 7.239809952488122e-05, "loss": 0.4949, "step": 12762 }, { "epoch": 0.9463186772447542, "grad_norm": 0.35735782980918884, "learning_rate": 7.238809702425606e-05, "loss": 0.48, "step": 12763 }, { "epoch": 0.946392822718173, "grad_norm": 0.3740631639957428, "learning_rate": 7.237809452363091e-05, "loss": 0.5325, "step": 12764 }, { "epoch": 0.9464669681915919, "grad_norm": 0.36064833402633667, "learning_rate": 7.236809202300575e-05, "loss": 0.4461, "step": 12765 }, { "epoch": 0.9465411136650107, "grad_norm": 0.3764350116252899, "learning_rate": 7.23580895223806e-05, "loss": 0.5125, "step": 12766 }, { "epoch": 0.9466152591384296, "grad_norm": 0.36901602149009705, "learning_rate": 7.234808702175544e-05, "loss": 0.4789, "step": 12767 }, { "epoch": 0.9466894046118485, "grad_norm": 0.3760896921157837, "learning_rate": 7.233808452113029e-05, "loss": 0.5016, "step": 12768 }, { "epoch": 0.9467635500852672, "grad_norm": 0.350852906703949, "learning_rate": 7.232808202050512e-05, "loss": 0.4758, "step": 12769 }, { "epoch": 0.9468376955586861, "grad_norm": 0.3957947790622711, "learning_rate": 7.231807951987997e-05, "loss": 0.5138, "step": 12770 }, { "epoch": 0.946911841032105, "grad_norm": 0.36417803168296814, "learning_rate": 7.230807701925481e-05, "loss": 0.4914, "step": 12771 }, { "epoch": 0.9469859865055238, "grad_norm": 0.3447990417480469, "learning_rate": 7.229807451862966e-05, "loss": 0.477, "step": 12772 }, { "epoch": 0.9470601319789427, "grad_norm": 0.3680471181869507, "learning_rate": 7.22880720180045e-05, "loss": 0.4746, "step": 12773 }, { "epoch": 0.9471342774523616, "grad_norm": 0.34974169731140137, "learning_rate": 7.227806951737935e-05, "loss": 0.4498, "step": 12774 }, { "epoch": 0.9472084229257803, "grad_norm": 0.3458268642425537, "learning_rate": 7.226806701675419e-05, "loss": 0.4357, "step": 12775 }, { "epoch": 0.9472825683991992, "grad_norm": 0.3569639325141907, "learning_rate": 7.225806451612904e-05, "loss": 0.4799, "step": 12776 }, { "epoch": 0.9473567138726181, "grad_norm": 0.3567394018173218, "learning_rate": 7.224806201550389e-05, "loss": 0.4837, "step": 12777 }, { "epoch": 0.9474308593460369, "grad_norm": 0.3527597188949585, "learning_rate": 7.223805951487872e-05, "loss": 0.4844, "step": 12778 }, { "epoch": 0.9475050048194558, "grad_norm": 0.3525080680847168, "learning_rate": 7.222805701425358e-05, "loss": 0.4735, "step": 12779 }, { "epoch": 0.9475791502928747, "grad_norm": 0.4013825058937073, "learning_rate": 7.221805451362841e-05, "loss": 0.4785, "step": 12780 }, { "epoch": 0.9476532957662934, "grad_norm": 0.36306875944137573, "learning_rate": 7.220805201300326e-05, "loss": 0.5167, "step": 12781 }, { "epoch": 0.9477274412397123, "grad_norm": 0.3389529585838318, "learning_rate": 7.21980495123781e-05, "loss": 0.4653, "step": 12782 }, { "epoch": 0.9478015867131312, "grad_norm": 0.36614537239074707, "learning_rate": 7.218804701175295e-05, "loss": 0.4931, "step": 12783 }, { "epoch": 0.94787573218655, "grad_norm": 0.37027281522750854, "learning_rate": 7.217804451112779e-05, "loss": 0.5325, "step": 12784 }, { "epoch": 0.9479498776599689, "grad_norm": 0.3593825101852417, "learning_rate": 7.216804201050264e-05, "loss": 0.4966, "step": 12785 }, { "epoch": 0.9480240231333877, "grad_norm": 0.3468841314315796, "learning_rate": 7.215803950987748e-05, "loss": 0.4566, "step": 12786 }, { "epoch": 0.9480981686068065, "grad_norm": 0.3906525671482086, "learning_rate": 7.214803700925231e-05, "loss": 0.502, "step": 12787 }, { "epoch": 0.9481723140802254, "grad_norm": 0.363772451877594, "learning_rate": 7.213803450862716e-05, "loss": 0.512, "step": 12788 }, { "epoch": 0.9482464595536443, "grad_norm": 0.35075318813323975, "learning_rate": 7.2128032008002e-05, "loss": 0.4769, "step": 12789 }, { "epoch": 0.9483206050270631, "grad_norm": 0.3646467328071594, "learning_rate": 7.211802950737685e-05, "loss": 0.4682, "step": 12790 }, { "epoch": 0.9483947505004819, "grad_norm": 0.35007598996162415, "learning_rate": 7.210802700675169e-05, "loss": 0.4585, "step": 12791 }, { "epoch": 0.9484688959739008, "grad_norm": 0.3542552888393402, "learning_rate": 7.209802450612654e-05, "loss": 0.4701, "step": 12792 }, { "epoch": 0.9485430414473196, "grad_norm": 0.356784850358963, "learning_rate": 7.208802200550138e-05, "loss": 0.5048, "step": 12793 }, { "epoch": 0.9486171869207385, "grad_norm": 0.36564838886260986, "learning_rate": 7.207801950487623e-05, "loss": 0.4806, "step": 12794 }, { "epoch": 0.9486913323941574, "grad_norm": 0.3650089502334595, "learning_rate": 7.206801700425106e-05, "loss": 0.5141, "step": 12795 }, { "epoch": 0.9487654778675761, "grad_norm": 0.34967514872550964, "learning_rate": 7.205801450362591e-05, "loss": 0.4661, "step": 12796 }, { "epoch": 0.948839623340995, "grad_norm": 0.37193363904953003, "learning_rate": 7.204801200300075e-05, "loss": 0.52, "step": 12797 }, { "epoch": 0.9489137688144139, "grad_norm": 0.34721827507019043, "learning_rate": 7.203800950237559e-05, "loss": 0.4533, "step": 12798 }, { "epoch": 0.9489879142878327, "grad_norm": 0.3685016930103302, "learning_rate": 7.202800700175044e-05, "loss": 0.4949, "step": 12799 }, { "epoch": 0.9490620597612516, "grad_norm": 0.37195876240730286, "learning_rate": 7.201800450112528e-05, "loss": 0.501, "step": 12800 }, { "epoch": 0.9491362052346705, "grad_norm": 0.3447183668613434, "learning_rate": 7.200800200050013e-05, "loss": 0.4688, "step": 12801 }, { "epoch": 0.9492103507080892, "grad_norm": 0.37533581256866455, "learning_rate": 7.199799949987496e-05, "loss": 0.5242, "step": 12802 }, { "epoch": 0.9492844961815081, "grad_norm": 0.3816244602203369, "learning_rate": 7.198799699924981e-05, "loss": 0.505, "step": 12803 }, { "epoch": 0.949358641654927, "grad_norm": 0.3457428216934204, "learning_rate": 7.197799449862465e-05, "loss": 0.4588, "step": 12804 }, { "epoch": 0.9494327871283458, "grad_norm": 0.33914950489997864, "learning_rate": 7.19679919979995e-05, "loss": 0.4724, "step": 12805 }, { "epoch": 0.9495069326017647, "grad_norm": 0.3477778434753418, "learning_rate": 7.195798949737434e-05, "loss": 0.4658, "step": 12806 }, { "epoch": 0.9495810780751835, "grad_norm": 0.3711676597595215, "learning_rate": 7.194798699674919e-05, "loss": 0.5145, "step": 12807 }, { "epoch": 0.9496552235486023, "grad_norm": 0.35872089862823486, "learning_rate": 7.193798449612403e-05, "loss": 0.479, "step": 12808 }, { "epoch": 0.9497293690220212, "grad_norm": 0.3616136312484741, "learning_rate": 7.192798199549888e-05, "loss": 0.4896, "step": 12809 }, { "epoch": 0.9498035144954401, "grad_norm": 0.35647082328796387, "learning_rate": 7.191797949487373e-05, "loss": 0.4872, "step": 12810 }, { "epoch": 0.9498776599688589, "grad_norm": 0.38944801688194275, "learning_rate": 7.190797699424857e-05, "loss": 0.5302, "step": 12811 }, { "epoch": 0.9499518054422778, "grad_norm": 0.35339853167533875, "learning_rate": 7.189797449362342e-05, "loss": 0.4538, "step": 12812 }, { "epoch": 0.9500259509156966, "grad_norm": 0.39561644196510315, "learning_rate": 7.188797199299825e-05, "loss": 0.5148, "step": 12813 }, { "epoch": 0.9501000963891154, "grad_norm": 0.37241819500923157, "learning_rate": 7.18779694923731e-05, "loss": 0.516, "step": 12814 }, { "epoch": 0.9501742418625343, "grad_norm": 0.375940203666687, "learning_rate": 7.186796699174794e-05, "loss": 0.5143, "step": 12815 }, { "epoch": 0.9502483873359532, "grad_norm": 0.3805354833602905, "learning_rate": 7.185796449112279e-05, "loss": 0.5026, "step": 12816 }, { "epoch": 0.950322532809372, "grad_norm": 0.3552609384059906, "learning_rate": 7.184796199049763e-05, "loss": 0.4778, "step": 12817 }, { "epoch": 0.9503966782827908, "grad_norm": 0.35775652527809143, "learning_rate": 7.183795948987248e-05, "loss": 0.4626, "step": 12818 }, { "epoch": 0.9504708237562097, "grad_norm": 0.3582926392555237, "learning_rate": 7.182795698924732e-05, "loss": 0.4833, "step": 12819 }, { "epoch": 0.9505449692296285, "grad_norm": 0.35239124298095703, "learning_rate": 7.181795448862217e-05, "loss": 0.4643, "step": 12820 }, { "epoch": 0.9506191147030474, "grad_norm": 0.34930217266082764, "learning_rate": 7.1807951987997e-05, "loss": 0.4772, "step": 12821 }, { "epoch": 0.9506932601764663, "grad_norm": 0.3424629271030426, "learning_rate": 7.179794948737185e-05, "loss": 0.4731, "step": 12822 }, { "epoch": 0.950767405649885, "grad_norm": 0.32781943678855896, "learning_rate": 7.178794698674669e-05, "loss": 0.4935, "step": 12823 }, { "epoch": 0.9508415511233039, "grad_norm": 0.3696540594100952, "learning_rate": 7.177794448612153e-05, "loss": 0.5041, "step": 12824 }, { "epoch": 0.9509156965967228, "grad_norm": 0.3508152663707733, "learning_rate": 7.176794198549638e-05, "loss": 0.4662, "step": 12825 }, { "epoch": 0.9509898420701416, "grad_norm": 0.3506000339984894, "learning_rate": 7.175793948487122e-05, "loss": 0.441, "step": 12826 }, { "epoch": 0.9510639875435605, "grad_norm": 0.358999103307724, "learning_rate": 7.174793698424607e-05, "loss": 0.498, "step": 12827 }, { "epoch": 0.9511381330169794, "grad_norm": 0.3719440996646881, "learning_rate": 7.17379344836209e-05, "loss": 0.5085, "step": 12828 }, { "epoch": 0.9512122784903981, "grad_norm": 0.36629003286361694, "learning_rate": 7.172793198299575e-05, "loss": 0.5415, "step": 12829 }, { "epoch": 0.951286423963817, "grad_norm": 0.33817267417907715, "learning_rate": 7.171792948237059e-05, "loss": 0.4717, "step": 12830 }, { "epoch": 0.9513605694372359, "grad_norm": 0.3732171654701233, "learning_rate": 7.170792698174544e-05, "loss": 0.5182, "step": 12831 }, { "epoch": 0.9514347149106547, "grad_norm": 0.35649824142456055, "learning_rate": 7.169792448112028e-05, "loss": 0.4742, "step": 12832 }, { "epoch": 0.9515088603840736, "grad_norm": 0.3975258767604828, "learning_rate": 7.168792198049513e-05, "loss": 0.5228, "step": 12833 }, { "epoch": 0.9515830058574924, "grad_norm": 0.3576347231864929, "learning_rate": 7.167791947986997e-05, "loss": 0.4947, "step": 12834 }, { "epoch": 0.9516571513309112, "grad_norm": 0.3612188398838043, "learning_rate": 7.166791697924482e-05, "loss": 0.4922, "step": 12835 }, { "epoch": 0.9517312968043301, "grad_norm": 0.3637819290161133, "learning_rate": 7.165791447861966e-05, "loss": 0.4802, "step": 12836 }, { "epoch": 0.951805442277749, "grad_norm": 0.3669983744621277, "learning_rate": 7.164791197799449e-05, "loss": 0.4948, "step": 12837 }, { "epoch": 0.9518795877511678, "grad_norm": 0.38674336671829224, "learning_rate": 7.163790947736934e-05, "loss": 0.5616, "step": 12838 }, { "epoch": 0.9519537332245867, "grad_norm": 0.36216720938682556, "learning_rate": 7.162790697674418e-05, "loss": 0.4682, "step": 12839 }, { "epoch": 0.9520278786980055, "grad_norm": 0.3638579547405243, "learning_rate": 7.161790447611903e-05, "loss": 0.5008, "step": 12840 }, { "epoch": 0.9521020241714243, "grad_norm": 0.34385958313941956, "learning_rate": 7.160790197549387e-05, "loss": 0.4636, "step": 12841 }, { "epoch": 0.9521761696448432, "grad_norm": 0.37310606241226196, "learning_rate": 7.159789947486872e-05, "loss": 0.4926, "step": 12842 }, { "epoch": 0.9522503151182621, "grad_norm": 0.33849719166755676, "learning_rate": 7.158789697424357e-05, "loss": 0.4741, "step": 12843 }, { "epoch": 0.9523244605916809, "grad_norm": 0.37361082434654236, "learning_rate": 7.15778944736184e-05, "loss": 0.4797, "step": 12844 }, { "epoch": 0.9523986060650997, "grad_norm": 0.3507545590400696, "learning_rate": 7.156789197299326e-05, "loss": 0.4927, "step": 12845 }, { "epoch": 0.9524727515385186, "grad_norm": 0.3801778256893158, "learning_rate": 7.15578894723681e-05, "loss": 0.5555, "step": 12846 }, { "epoch": 0.9525468970119374, "grad_norm": 0.33181875944137573, "learning_rate": 7.154788697174294e-05, "loss": 0.4706, "step": 12847 }, { "epoch": 0.9526210424853563, "grad_norm": 0.3654823899269104, "learning_rate": 7.153788447111778e-05, "loss": 0.5088, "step": 12848 }, { "epoch": 0.9526951879587752, "grad_norm": 0.3683156371116638, "learning_rate": 7.152788197049263e-05, "loss": 0.5286, "step": 12849 }, { "epoch": 0.9527693334321939, "grad_norm": 0.3421316146850586, "learning_rate": 7.151787946986747e-05, "loss": 0.4588, "step": 12850 }, { "epoch": 0.9528434789056128, "grad_norm": 0.37590137124061584, "learning_rate": 7.150787696924232e-05, "loss": 0.515, "step": 12851 }, { "epoch": 0.9529176243790317, "grad_norm": 0.35468998551368713, "learning_rate": 7.149787446861716e-05, "loss": 0.4728, "step": 12852 }, { "epoch": 0.9529917698524505, "grad_norm": 0.36017468571662903, "learning_rate": 7.148787196799201e-05, "loss": 0.4564, "step": 12853 }, { "epoch": 0.9530659153258694, "grad_norm": 0.34767982363700867, "learning_rate": 7.147786946736684e-05, "loss": 0.4597, "step": 12854 }, { "epoch": 0.9531400607992883, "grad_norm": 0.3599044680595398, "learning_rate": 7.14678669667417e-05, "loss": 0.4498, "step": 12855 }, { "epoch": 0.953214206272707, "grad_norm": 0.3660403788089752, "learning_rate": 7.145786446611653e-05, "loss": 0.5027, "step": 12856 }, { "epoch": 0.9532883517461259, "grad_norm": 0.37193623185157776, "learning_rate": 7.144786196549138e-05, "loss": 0.5212, "step": 12857 }, { "epoch": 0.9533624972195448, "grad_norm": 0.3681927025318146, "learning_rate": 7.143785946486622e-05, "loss": 0.4827, "step": 12858 }, { "epoch": 0.9534366426929636, "grad_norm": 0.34152519702911377, "learning_rate": 7.142785696424107e-05, "loss": 0.4587, "step": 12859 }, { "epoch": 0.9535107881663825, "grad_norm": 0.36935165524482727, "learning_rate": 7.141785446361591e-05, "loss": 0.5019, "step": 12860 }, { "epoch": 0.9535849336398012, "grad_norm": 0.3337896168231964, "learning_rate": 7.140785196299075e-05, "loss": 0.4488, "step": 12861 }, { "epoch": 0.9536590791132201, "grad_norm": 0.3957294821739197, "learning_rate": 7.13978494623656e-05, "loss": 0.55, "step": 12862 }, { "epoch": 0.953733224586639, "grad_norm": 0.36221054196357727, "learning_rate": 7.138784696174043e-05, "loss": 0.4807, "step": 12863 }, { "epoch": 0.9538073700600578, "grad_norm": 0.36818811297416687, "learning_rate": 7.137784446111528e-05, "loss": 0.5067, "step": 12864 }, { "epoch": 0.9538815155334767, "grad_norm": 0.35645195841789246, "learning_rate": 7.136784196049012e-05, "loss": 0.4679, "step": 12865 }, { "epoch": 0.9539556610068956, "grad_norm": 0.35128816962242126, "learning_rate": 7.135783945986497e-05, "loss": 0.475, "step": 12866 }, { "epoch": 0.9540298064803143, "grad_norm": 0.35061925649642944, "learning_rate": 7.134783695923981e-05, "loss": 0.4965, "step": 12867 }, { "epoch": 0.9541039519537332, "grad_norm": 0.37218454480171204, "learning_rate": 7.133783445861466e-05, "loss": 0.476, "step": 12868 }, { "epoch": 0.9541780974271521, "grad_norm": 0.354247510433197, "learning_rate": 7.13278319579895e-05, "loss": 0.4565, "step": 12869 }, { "epoch": 0.9542522429005709, "grad_norm": 0.37689492106437683, "learning_rate": 7.131782945736435e-05, "loss": 0.5118, "step": 12870 }, { "epoch": 0.9543263883739898, "grad_norm": 0.3702598512172699, "learning_rate": 7.130782695673918e-05, "loss": 0.5022, "step": 12871 }, { "epoch": 0.9544005338474086, "grad_norm": 0.36752763390541077, "learning_rate": 7.129782445611403e-05, "loss": 0.492, "step": 12872 }, { "epoch": 0.9544746793208274, "grad_norm": 0.3967041075229645, "learning_rate": 7.128782195548887e-05, "loss": 0.5755, "step": 12873 }, { "epoch": 0.9545488247942463, "grad_norm": 0.3692801892757416, "learning_rate": 7.127781945486371e-05, "loss": 0.4959, "step": 12874 }, { "epoch": 0.9546229702676652, "grad_norm": 0.3483528196811676, "learning_rate": 7.126781695423856e-05, "loss": 0.4586, "step": 12875 }, { "epoch": 0.954697115741084, "grad_norm": 0.3551625907421112, "learning_rate": 7.12578144536134e-05, "loss": 0.4806, "step": 12876 }, { "epoch": 0.9547712612145028, "grad_norm": 0.34033915400505066, "learning_rate": 7.124781195298825e-05, "loss": 0.4861, "step": 12877 }, { "epoch": 0.9548454066879217, "grad_norm": 0.3679744005203247, "learning_rate": 7.12378094523631e-05, "loss": 0.5127, "step": 12878 }, { "epoch": 0.9549195521613405, "grad_norm": 0.34776756167411804, "learning_rate": 7.122780695173793e-05, "loss": 0.4694, "step": 12879 }, { "epoch": 0.9549936976347594, "grad_norm": 0.36195042729377747, "learning_rate": 7.121780445111279e-05, "loss": 0.4709, "step": 12880 }, { "epoch": 0.9550678431081783, "grad_norm": 0.35951513051986694, "learning_rate": 7.120780195048764e-05, "loss": 0.4656, "step": 12881 }, { "epoch": 0.955141988581597, "grad_norm": 0.3659261465072632, "learning_rate": 7.119779944986247e-05, "loss": 0.5003, "step": 12882 }, { "epoch": 0.9552161340550159, "grad_norm": 0.3722638487815857, "learning_rate": 7.118779694923732e-05, "loss": 0.4988, "step": 12883 }, { "epoch": 0.9552902795284348, "grad_norm": 0.37564361095428467, "learning_rate": 7.117779444861216e-05, "loss": 0.5236, "step": 12884 }, { "epoch": 0.9553644250018536, "grad_norm": 0.3557382822036743, "learning_rate": 7.1167791947987e-05, "loss": 0.4981, "step": 12885 }, { "epoch": 0.9554385704752725, "grad_norm": 0.36178624629974365, "learning_rate": 7.115778944736185e-05, "loss": 0.5141, "step": 12886 }, { "epoch": 0.9555127159486914, "grad_norm": 0.3456205725669861, "learning_rate": 7.114778694673669e-05, "loss": 0.4613, "step": 12887 }, { "epoch": 0.9555868614221101, "grad_norm": 0.3595566153526306, "learning_rate": 7.113778444611154e-05, "loss": 0.5061, "step": 12888 }, { "epoch": 0.955661006895529, "grad_norm": 0.3958456218242645, "learning_rate": 7.112778194548637e-05, "loss": 0.5325, "step": 12889 }, { "epoch": 0.9557351523689479, "grad_norm": 0.34858909249305725, "learning_rate": 7.111777944486122e-05, "loss": 0.4597, "step": 12890 }, { "epoch": 0.9558092978423667, "grad_norm": 0.35411641001701355, "learning_rate": 7.110777694423606e-05, "loss": 0.4671, "step": 12891 }, { "epoch": 0.9558834433157856, "grad_norm": 0.3709735572338104, "learning_rate": 7.109777444361091e-05, "loss": 0.5011, "step": 12892 }, { "epoch": 0.9559575887892044, "grad_norm": 0.3342306613922119, "learning_rate": 7.108777194298575e-05, "loss": 0.4514, "step": 12893 }, { "epoch": 0.9560317342626232, "grad_norm": 0.36481404304504395, "learning_rate": 7.10777694423606e-05, "loss": 0.4818, "step": 12894 }, { "epoch": 0.9561058797360421, "grad_norm": 0.33871451020240784, "learning_rate": 7.106776694173544e-05, "loss": 0.4549, "step": 12895 }, { "epoch": 0.956180025209461, "grad_norm": 0.35684823989868164, "learning_rate": 7.105776444111029e-05, "loss": 0.4835, "step": 12896 }, { "epoch": 0.9562541706828798, "grad_norm": 0.36216190457344055, "learning_rate": 7.104776194048512e-05, "loss": 0.5051, "step": 12897 }, { "epoch": 0.9563283161562987, "grad_norm": 0.36332225799560547, "learning_rate": 7.103775943985996e-05, "loss": 0.4654, "step": 12898 }, { "epoch": 0.9564024616297175, "grad_norm": 0.3616460859775543, "learning_rate": 7.102775693923481e-05, "loss": 0.4898, "step": 12899 }, { "epoch": 0.9564766071031363, "grad_norm": 0.3530033528804779, "learning_rate": 7.101775443860965e-05, "loss": 0.4701, "step": 12900 }, { "epoch": 0.9565507525765552, "grad_norm": 0.3688696622848511, "learning_rate": 7.10077519379845e-05, "loss": 0.4726, "step": 12901 }, { "epoch": 0.9566248980499741, "grad_norm": 0.36507076025009155, "learning_rate": 7.099774943735934e-05, "loss": 0.4756, "step": 12902 }, { "epoch": 0.9566990435233929, "grad_norm": 0.37419039011001587, "learning_rate": 7.098774693673419e-05, "loss": 0.5316, "step": 12903 }, { "epoch": 0.9567731889968117, "grad_norm": 0.388791024684906, "learning_rate": 7.097774443610902e-05, "loss": 0.5214, "step": 12904 }, { "epoch": 0.9568473344702306, "grad_norm": 0.3637554943561554, "learning_rate": 7.096774193548388e-05, "loss": 0.4794, "step": 12905 }, { "epoch": 0.9569214799436494, "grad_norm": 0.37075838446617126, "learning_rate": 7.095773943485871e-05, "loss": 0.4876, "step": 12906 }, { "epoch": 0.9569956254170683, "grad_norm": 0.35330262780189514, "learning_rate": 7.094773693423356e-05, "loss": 0.4749, "step": 12907 }, { "epoch": 0.9570697708904872, "grad_norm": 0.35340893268585205, "learning_rate": 7.09377344336084e-05, "loss": 0.4822, "step": 12908 }, { "epoch": 0.9571439163639059, "grad_norm": 0.34755003452301025, "learning_rate": 7.092773193298325e-05, "loss": 0.4994, "step": 12909 }, { "epoch": 0.9572180618373248, "grad_norm": 0.34424155950546265, "learning_rate": 7.091772943235809e-05, "loss": 0.4751, "step": 12910 }, { "epoch": 0.9572922073107437, "grad_norm": 0.3810393214225769, "learning_rate": 7.090772693173294e-05, "loss": 0.5109, "step": 12911 }, { "epoch": 0.9573663527841625, "grad_norm": 0.32854291796684265, "learning_rate": 7.089772443110778e-05, "loss": 0.4444, "step": 12912 }, { "epoch": 0.9574404982575814, "grad_norm": 0.37376776337623596, "learning_rate": 7.088772193048263e-05, "loss": 0.5113, "step": 12913 }, { "epoch": 0.9575146437310003, "grad_norm": 0.3545248806476593, "learning_rate": 7.087771942985746e-05, "loss": 0.4891, "step": 12914 }, { "epoch": 0.957588789204419, "grad_norm": 0.38758179545402527, "learning_rate": 7.086771692923231e-05, "loss": 0.5097, "step": 12915 }, { "epoch": 0.9576629346778379, "grad_norm": 0.3681524097919464, "learning_rate": 7.085771442860716e-05, "loss": 0.5031, "step": 12916 }, { "epoch": 0.9577370801512568, "grad_norm": 0.3566811680793762, "learning_rate": 7.0847711927982e-05, "loss": 0.4357, "step": 12917 }, { "epoch": 0.9578112256246756, "grad_norm": 0.3600139915943146, "learning_rate": 7.083770942735685e-05, "loss": 0.4728, "step": 12918 }, { "epoch": 0.9578853710980945, "grad_norm": 0.354747474193573, "learning_rate": 7.082770692673169e-05, "loss": 0.4743, "step": 12919 }, { "epoch": 0.9579595165715133, "grad_norm": 0.36370787024497986, "learning_rate": 7.081770442610654e-05, "loss": 0.4762, "step": 12920 }, { "epoch": 0.9580336620449321, "grad_norm": 0.37237367033958435, "learning_rate": 7.080770192548138e-05, "loss": 0.4682, "step": 12921 }, { "epoch": 0.958107807518351, "grad_norm": 0.39008229970932007, "learning_rate": 7.079769942485621e-05, "loss": 0.4858, "step": 12922 }, { "epoch": 0.9581819529917699, "grad_norm": 0.3546036183834076, "learning_rate": 7.078769692423106e-05, "loss": 0.4751, "step": 12923 }, { "epoch": 0.9582560984651887, "grad_norm": 0.38891157507896423, "learning_rate": 7.07776944236059e-05, "loss": 0.5387, "step": 12924 }, { "epoch": 0.9583302439386076, "grad_norm": 0.3560616672039032, "learning_rate": 7.076769192298075e-05, "loss": 0.4616, "step": 12925 }, { "epoch": 0.9584043894120264, "grad_norm": 0.3717428147792816, "learning_rate": 7.075768942235559e-05, "loss": 0.5148, "step": 12926 }, { "epoch": 0.9584785348854452, "grad_norm": 0.3368633985519409, "learning_rate": 7.074768692173044e-05, "loss": 0.4498, "step": 12927 }, { "epoch": 0.9585526803588641, "grad_norm": 0.4148867428302765, "learning_rate": 7.073768442110528e-05, "loss": 0.545, "step": 12928 }, { "epoch": 0.958626825832283, "grad_norm": 0.3655499517917633, "learning_rate": 7.072768192048013e-05, "loss": 0.4984, "step": 12929 }, { "epoch": 0.9587009713057018, "grad_norm": 0.3731435537338257, "learning_rate": 7.071767941985497e-05, "loss": 0.4928, "step": 12930 }, { "epoch": 0.9587751167791206, "grad_norm": 0.376192569732666, "learning_rate": 7.070767691922982e-05, "loss": 0.514, "step": 12931 }, { "epoch": 0.9588492622525395, "grad_norm": 0.3932946026325226, "learning_rate": 7.069767441860465e-05, "loss": 0.5213, "step": 12932 }, { "epoch": 0.9589234077259583, "grad_norm": 0.37732717394828796, "learning_rate": 7.06876719179795e-05, "loss": 0.5162, "step": 12933 }, { "epoch": 0.9589975531993772, "grad_norm": 0.35717251896858215, "learning_rate": 7.067766941735434e-05, "loss": 0.4533, "step": 12934 }, { "epoch": 0.9590716986727961, "grad_norm": 0.34615960717201233, "learning_rate": 7.066766691672918e-05, "loss": 0.4426, "step": 12935 }, { "epoch": 0.9591458441462148, "grad_norm": 0.37867388129234314, "learning_rate": 7.065766441610403e-05, "loss": 0.4729, "step": 12936 }, { "epoch": 0.9592199896196337, "grad_norm": 0.3386756479740143, "learning_rate": 7.064766191547887e-05, "loss": 0.4638, "step": 12937 }, { "epoch": 0.9592941350930526, "grad_norm": 0.3735784888267517, "learning_rate": 7.063765941485372e-05, "loss": 0.5156, "step": 12938 }, { "epoch": 0.9593682805664714, "grad_norm": 0.3552737236022949, "learning_rate": 7.062765691422855e-05, "loss": 0.4928, "step": 12939 }, { "epoch": 0.9594424260398903, "grad_norm": 0.35114356875419617, "learning_rate": 7.06176544136034e-05, "loss": 0.4841, "step": 12940 }, { "epoch": 0.9595165715133092, "grad_norm": 0.33625543117523193, "learning_rate": 7.060765191297824e-05, "loss": 0.4364, "step": 12941 }, { "epoch": 0.9595907169867279, "grad_norm": 0.35457539558410645, "learning_rate": 7.059764941235309e-05, "loss": 0.4706, "step": 12942 }, { "epoch": 0.9596648624601468, "grad_norm": 0.36503514647483826, "learning_rate": 7.058764691172793e-05, "loss": 0.5183, "step": 12943 }, { "epoch": 0.9597390079335657, "grad_norm": 0.3651967942714691, "learning_rate": 7.057764441110278e-05, "loss": 0.4888, "step": 12944 }, { "epoch": 0.9598131534069845, "grad_norm": 0.3751230537891388, "learning_rate": 7.056764191047762e-05, "loss": 0.5219, "step": 12945 }, { "epoch": 0.9598872988804034, "grad_norm": 0.36118441820144653, "learning_rate": 7.055763940985247e-05, "loss": 0.4946, "step": 12946 }, { "epoch": 0.9599614443538222, "grad_norm": 0.36791789531707764, "learning_rate": 7.05476369092273e-05, "loss": 0.4925, "step": 12947 }, { "epoch": 0.960035589827241, "grad_norm": 0.34464550018310547, "learning_rate": 7.053763440860215e-05, "loss": 0.4628, "step": 12948 }, { "epoch": 0.9601097353006599, "grad_norm": 0.35838833451271057, "learning_rate": 7.0527631907977e-05, "loss": 0.4939, "step": 12949 }, { "epoch": 0.9601838807740788, "grad_norm": 0.38716980814933777, "learning_rate": 7.051762940735184e-05, "loss": 0.5371, "step": 12950 }, { "epoch": 0.9602580262474976, "grad_norm": 0.3924266993999481, "learning_rate": 7.050762690672669e-05, "loss": 0.5561, "step": 12951 }, { "epoch": 0.9603321717209164, "grad_norm": 0.3895171880722046, "learning_rate": 7.049762440610153e-05, "loss": 0.5778, "step": 12952 }, { "epoch": 0.9604063171943353, "grad_norm": 0.3560029864311218, "learning_rate": 7.048762190547638e-05, "loss": 0.501, "step": 12953 }, { "epoch": 0.9604804626677541, "grad_norm": 0.3403944969177246, "learning_rate": 7.047761940485122e-05, "loss": 0.4692, "step": 12954 }, { "epoch": 0.960554608141173, "grad_norm": 0.3335234522819519, "learning_rate": 7.046761690422607e-05, "loss": 0.4565, "step": 12955 }, { "epoch": 0.9606287536145919, "grad_norm": 0.3671684265136719, "learning_rate": 7.04576144036009e-05, "loss": 0.5009, "step": 12956 }, { "epoch": 0.9607028990880107, "grad_norm": 0.3710257112979889, "learning_rate": 7.044761190297576e-05, "loss": 0.5046, "step": 12957 }, { "epoch": 0.9607770445614295, "grad_norm": 0.3327952027320862, "learning_rate": 7.04376094023506e-05, "loss": 0.4359, "step": 12958 }, { "epoch": 0.9608511900348484, "grad_norm": 0.3903459906578064, "learning_rate": 7.042760690172543e-05, "loss": 0.5518, "step": 12959 }, { "epoch": 0.9609253355082672, "grad_norm": 0.3654841184616089, "learning_rate": 7.041760440110028e-05, "loss": 0.5135, "step": 12960 }, { "epoch": 0.9609994809816861, "grad_norm": 0.3853150010108948, "learning_rate": 7.040760190047512e-05, "loss": 0.4745, "step": 12961 }, { "epoch": 0.961073626455105, "grad_norm": 0.3590145409107208, "learning_rate": 7.039759939984997e-05, "loss": 0.4922, "step": 12962 }, { "epoch": 0.9611477719285237, "grad_norm": 0.3707042336463928, "learning_rate": 7.03875968992248e-05, "loss": 0.4775, "step": 12963 }, { "epoch": 0.9612219174019426, "grad_norm": 0.35983848571777344, "learning_rate": 7.037759439859966e-05, "loss": 0.4901, "step": 12964 }, { "epoch": 0.9612960628753615, "grad_norm": 0.3510342240333557, "learning_rate": 7.03675918979745e-05, "loss": 0.4592, "step": 12965 }, { "epoch": 0.9613702083487803, "grad_norm": 0.3626967668533325, "learning_rate": 7.035758939734934e-05, "loss": 0.4771, "step": 12966 }, { "epoch": 0.9614443538221992, "grad_norm": 0.36060646176338196, "learning_rate": 7.034758689672418e-05, "loss": 0.4958, "step": 12967 }, { "epoch": 0.9615184992956181, "grad_norm": 0.35635748505592346, "learning_rate": 7.033758439609903e-05, "loss": 0.478, "step": 12968 }, { "epoch": 0.9615926447690368, "grad_norm": 0.3409948945045471, "learning_rate": 7.032758189547387e-05, "loss": 0.464, "step": 12969 }, { "epoch": 0.9616667902424557, "grad_norm": 0.37925612926483154, "learning_rate": 7.031757939484872e-05, "loss": 0.4958, "step": 12970 }, { "epoch": 0.9617409357158746, "grad_norm": 0.37349367141723633, "learning_rate": 7.030757689422356e-05, "loss": 0.4834, "step": 12971 }, { "epoch": 0.9618150811892934, "grad_norm": 0.365367591381073, "learning_rate": 7.02975743935984e-05, "loss": 0.4834, "step": 12972 }, { "epoch": 0.9618892266627123, "grad_norm": 0.35932523012161255, "learning_rate": 7.028757189297324e-05, "loss": 0.4905, "step": 12973 }, { "epoch": 0.961963372136131, "grad_norm": 0.36842644214630127, "learning_rate": 7.027756939234808e-05, "loss": 0.4977, "step": 12974 }, { "epoch": 0.9620375176095499, "grad_norm": 0.34322771430015564, "learning_rate": 7.026756689172293e-05, "loss": 0.4352, "step": 12975 }, { "epoch": 0.9621116630829688, "grad_norm": 0.3686724305152893, "learning_rate": 7.025756439109777e-05, "loss": 0.5099, "step": 12976 }, { "epoch": 0.9621858085563876, "grad_norm": 0.37538719177246094, "learning_rate": 7.024756189047262e-05, "loss": 0.5267, "step": 12977 }, { "epoch": 0.9622599540298065, "grad_norm": 0.3622084856033325, "learning_rate": 7.023755938984746e-05, "loss": 0.4909, "step": 12978 }, { "epoch": 0.9623340995032253, "grad_norm": 0.3600929379463196, "learning_rate": 7.022755688922231e-05, "loss": 0.4771, "step": 12979 }, { "epoch": 0.9624082449766441, "grad_norm": 0.3808809816837311, "learning_rate": 7.021755438859714e-05, "loss": 0.5073, "step": 12980 }, { "epoch": 0.962482390450063, "grad_norm": 0.3437039852142334, "learning_rate": 7.0207551887972e-05, "loss": 0.4534, "step": 12981 }, { "epoch": 0.9625565359234819, "grad_norm": 0.3588520884513855, "learning_rate": 7.019754938734683e-05, "loss": 0.5153, "step": 12982 }, { "epoch": 0.9626306813969007, "grad_norm": 0.36302679777145386, "learning_rate": 7.018754688672168e-05, "loss": 0.4618, "step": 12983 }, { "epoch": 0.9627048268703196, "grad_norm": 0.3587527871131897, "learning_rate": 7.017754438609653e-05, "loss": 0.4678, "step": 12984 }, { "epoch": 0.9627789723437384, "grad_norm": 0.3531219959259033, "learning_rate": 7.016754188547137e-05, "loss": 0.4902, "step": 12985 }, { "epoch": 0.9628531178171572, "grad_norm": 0.37788477540016174, "learning_rate": 7.015753938484622e-05, "loss": 0.5043, "step": 12986 }, { "epoch": 0.9629272632905761, "grad_norm": 0.3588030934333801, "learning_rate": 7.014753688422106e-05, "loss": 0.4984, "step": 12987 }, { "epoch": 0.963001408763995, "grad_norm": 0.38430309295654297, "learning_rate": 7.013753438359591e-05, "loss": 0.5373, "step": 12988 }, { "epoch": 0.9630755542374138, "grad_norm": 0.3467023968696594, "learning_rate": 7.012753188297075e-05, "loss": 0.4891, "step": 12989 }, { "epoch": 0.9631496997108326, "grad_norm": 0.3661195635795593, "learning_rate": 7.01175293823456e-05, "loss": 0.504, "step": 12990 }, { "epoch": 0.9632238451842515, "grad_norm": 0.3532239496707916, "learning_rate": 7.010752688172043e-05, "loss": 0.4627, "step": 12991 }, { "epoch": 0.9632979906576703, "grad_norm": 0.35693010687828064, "learning_rate": 7.009752438109528e-05, "loss": 0.4552, "step": 12992 }, { "epoch": 0.9633721361310892, "grad_norm": 0.35975706577301025, "learning_rate": 7.008752188047012e-05, "loss": 0.4778, "step": 12993 }, { "epoch": 0.9634462816045081, "grad_norm": 0.346139520406723, "learning_rate": 7.007751937984497e-05, "loss": 0.4804, "step": 12994 }, { "epoch": 0.9635204270779268, "grad_norm": 0.37546229362487793, "learning_rate": 7.006751687921981e-05, "loss": 0.4994, "step": 12995 }, { "epoch": 0.9635945725513457, "grad_norm": 0.3368432819843292, "learning_rate": 7.005751437859466e-05, "loss": 0.468, "step": 12996 }, { "epoch": 0.9636687180247646, "grad_norm": 0.36519908905029297, "learning_rate": 7.00475118779695e-05, "loss": 0.4534, "step": 12997 }, { "epoch": 0.9637428634981834, "grad_norm": 0.40139076113700867, "learning_rate": 7.003750937734433e-05, "loss": 0.4995, "step": 12998 }, { "epoch": 0.9638170089716023, "grad_norm": 0.37065932154655457, "learning_rate": 7.002750687671919e-05, "loss": 0.5113, "step": 12999 }, { "epoch": 0.9638911544450212, "grad_norm": 0.3758029341697693, "learning_rate": 7.001750437609402e-05, "loss": 0.5006, "step": 13000 }, { "epoch": 0.9639652999184399, "grad_norm": 0.35921016335487366, "learning_rate": 7.000750187546887e-05, "loss": 0.4647, "step": 13001 }, { "epoch": 0.9640394453918588, "grad_norm": 0.36115971207618713, "learning_rate": 6.999749937484371e-05, "loss": 0.491, "step": 13002 }, { "epoch": 0.9641135908652777, "grad_norm": 0.3503580093383789, "learning_rate": 6.998749687421856e-05, "loss": 0.496, "step": 13003 }, { "epoch": 0.9641877363386965, "grad_norm": 0.3750561773777008, "learning_rate": 6.99774943735934e-05, "loss": 0.4908, "step": 13004 }, { "epoch": 0.9642618818121154, "grad_norm": 0.3506925404071808, "learning_rate": 6.996749187296825e-05, "loss": 0.4537, "step": 13005 }, { "epoch": 0.9643360272855342, "grad_norm": 0.34548419713974, "learning_rate": 6.995748937234309e-05, "loss": 0.4857, "step": 13006 }, { "epoch": 0.964410172758953, "grad_norm": 0.36306941509246826, "learning_rate": 6.994748687171794e-05, "loss": 0.4635, "step": 13007 }, { "epoch": 0.9644843182323719, "grad_norm": 0.3415880501270294, "learning_rate": 6.993748437109277e-05, "loss": 0.4674, "step": 13008 }, { "epoch": 0.9645584637057908, "grad_norm": 0.38572683930397034, "learning_rate": 6.992748187046761e-05, "loss": 0.5159, "step": 13009 }, { "epoch": 0.9646326091792096, "grad_norm": 0.3484489619731903, "learning_rate": 6.991747936984246e-05, "loss": 0.4781, "step": 13010 }, { "epoch": 0.9647067546526285, "grad_norm": 0.3600475788116455, "learning_rate": 6.99074768692173e-05, "loss": 0.4937, "step": 13011 }, { "epoch": 0.9647809001260473, "grad_norm": 0.35201847553253174, "learning_rate": 6.989747436859215e-05, "loss": 0.464, "step": 13012 }, { "epoch": 0.9648550455994661, "grad_norm": 0.3743647634983063, "learning_rate": 6.988747186796699e-05, "loss": 0.5121, "step": 13013 }, { "epoch": 0.964929191072885, "grad_norm": 0.35326769948005676, "learning_rate": 6.987746936734184e-05, "loss": 0.4669, "step": 13014 }, { "epoch": 0.9650033365463039, "grad_norm": 0.3690583109855652, "learning_rate": 6.986746686671667e-05, "loss": 0.5192, "step": 13015 }, { "epoch": 0.9650774820197227, "grad_norm": 0.3712625801563263, "learning_rate": 6.985746436609152e-05, "loss": 0.5161, "step": 13016 }, { "epoch": 0.9651516274931415, "grad_norm": 0.346301794052124, "learning_rate": 6.984746186546637e-05, "loss": 0.478, "step": 13017 }, { "epoch": 0.9652257729665604, "grad_norm": 0.3768378794193268, "learning_rate": 6.983745936484121e-05, "loss": 0.5276, "step": 13018 }, { "epoch": 0.9652999184399792, "grad_norm": 0.34704503417015076, "learning_rate": 6.982745686421606e-05, "loss": 0.446, "step": 13019 }, { "epoch": 0.9653740639133981, "grad_norm": 0.37938204407691956, "learning_rate": 6.98174543635909e-05, "loss": 0.4916, "step": 13020 }, { "epoch": 0.965448209386817, "grad_norm": 0.35975179076194763, "learning_rate": 6.980745186296575e-05, "loss": 0.4961, "step": 13021 }, { "epoch": 0.9655223548602357, "grad_norm": 0.35876014828681946, "learning_rate": 6.979744936234059e-05, "loss": 0.4834, "step": 13022 }, { "epoch": 0.9655965003336546, "grad_norm": 0.35784706473350525, "learning_rate": 6.978744686171544e-05, "loss": 0.4886, "step": 13023 }, { "epoch": 0.9656706458070735, "grad_norm": 0.36613816022872925, "learning_rate": 6.977744436109027e-05, "loss": 0.4916, "step": 13024 }, { "epoch": 0.9657447912804923, "grad_norm": 0.35230353474617004, "learning_rate": 6.976744186046513e-05, "loss": 0.4801, "step": 13025 }, { "epoch": 0.9658189367539112, "grad_norm": 0.3912203907966614, "learning_rate": 6.975743935983996e-05, "loss": 0.5482, "step": 13026 }, { "epoch": 0.9658930822273301, "grad_norm": 0.34883344173431396, "learning_rate": 6.974743685921481e-05, "loss": 0.466, "step": 13027 }, { "epoch": 0.9659672277007488, "grad_norm": 0.3286963999271393, "learning_rate": 6.973743435858965e-05, "loss": 0.4536, "step": 13028 }, { "epoch": 0.9660413731741677, "grad_norm": 0.3653842508792877, "learning_rate": 6.97274318579645e-05, "loss": 0.5098, "step": 13029 }, { "epoch": 0.9661155186475866, "grad_norm": 0.3514271676540375, "learning_rate": 6.971742935733934e-05, "loss": 0.5088, "step": 13030 }, { "epoch": 0.9661896641210054, "grad_norm": 0.372572124004364, "learning_rate": 6.970742685671419e-05, "loss": 0.5527, "step": 13031 }, { "epoch": 0.9662638095944243, "grad_norm": 0.3508608937263489, "learning_rate": 6.969742435608903e-05, "loss": 0.4767, "step": 13032 }, { "epoch": 0.9663379550678431, "grad_norm": 0.3587830662727356, "learning_rate": 6.968742185546388e-05, "loss": 0.5046, "step": 13033 }, { "epoch": 0.9664121005412619, "grad_norm": 0.3708875775337219, "learning_rate": 6.967741935483871e-05, "loss": 0.5108, "step": 13034 }, { "epoch": 0.9664862460146808, "grad_norm": 0.3432284891605377, "learning_rate": 6.966741685421355e-05, "loss": 0.456, "step": 13035 }, { "epoch": 0.9665603914880997, "grad_norm": 0.3643324673175812, "learning_rate": 6.96574143535884e-05, "loss": 0.4823, "step": 13036 }, { "epoch": 0.9666345369615185, "grad_norm": 0.34403160214424133, "learning_rate": 6.964741185296324e-05, "loss": 0.4257, "step": 13037 }, { "epoch": 0.9667086824349373, "grad_norm": 0.3489411175251007, "learning_rate": 6.963740935233809e-05, "loss": 0.4944, "step": 13038 }, { "epoch": 0.9667828279083562, "grad_norm": 0.37669283151626587, "learning_rate": 6.962740685171293e-05, "loss": 0.5021, "step": 13039 }, { "epoch": 0.966856973381775, "grad_norm": 0.36490461230278015, "learning_rate": 6.961740435108778e-05, "loss": 0.4784, "step": 13040 }, { "epoch": 0.9669311188551939, "grad_norm": 0.35455524921417236, "learning_rate": 6.960740185046261e-05, "loss": 0.492, "step": 13041 }, { "epoch": 0.9670052643286128, "grad_norm": 0.35987648367881775, "learning_rate": 6.959739934983746e-05, "loss": 0.4988, "step": 13042 }, { "epoch": 0.9670794098020316, "grad_norm": 0.3401961326599121, "learning_rate": 6.95873968492123e-05, "loss": 0.4417, "step": 13043 }, { "epoch": 0.9671535552754504, "grad_norm": 0.3714922368526459, "learning_rate": 6.957739434858715e-05, "loss": 0.4932, "step": 13044 }, { "epoch": 0.9672277007488693, "grad_norm": 0.34990522265434265, "learning_rate": 6.956739184796199e-05, "loss": 0.4875, "step": 13045 }, { "epoch": 0.9673018462222881, "grad_norm": 0.35929256677627563, "learning_rate": 6.955738934733683e-05, "loss": 0.5098, "step": 13046 }, { "epoch": 0.967375991695707, "grad_norm": 0.3732442855834961, "learning_rate": 6.954738684671168e-05, "loss": 0.461, "step": 13047 }, { "epoch": 0.9674501371691259, "grad_norm": 0.3485364019870758, "learning_rate": 6.953738434608651e-05, "loss": 0.4529, "step": 13048 }, { "epoch": 0.9675242826425446, "grad_norm": 0.3505331873893738, "learning_rate": 6.952738184546136e-05, "loss": 0.4614, "step": 13049 }, { "epoch": 0.9675984281159635, "grad_norm": 0.35910212993621826, "learning_rate": 6.951737934483622e-05, "loss": 0.4858, "step": 13050 }, { "epoch": 0.9676725735893824, "grad_norm": 0.3604978919029236, "learning_rate": 6.950737684421105e-05, "loss": 0.4816, "step": 13051 }, { "epoch": 0.9677467190628012, "grad_norm": 0.36538729071617126, "learning_rate": 6.94973743435859e-05, "loss": 0.4941, "step": 13052 }, { "epoch": 0.9678208645362201, "grad_norm": 0.37794795632362366, "learning_rate": 6.948737184296074e-05, "loss": 0.4855, "step": 13053 }, { "epoch": 0.967895010009639, "grad_norm": 0.3572554290294647, "learning_rate": 6.947736934233559e-05, "loss": 0.4609, "step": 13054 }, { "epoch": 0.9679691554830577, "grad_norm": 0.357610821723938, "learning_rate": 6.946736684171044e-05, "loss": 0.4851, "step": 13055 }, { "epoch": 0.9680433009564766, "grad_norm": 0.35199370980262756, "learning_rate": 6.945736434108528e-05, "loss": 0.4778, "step": 13056 }, { "epoch": 0.9681174464298955, "grad_norm": 0.3629341423511505, "learning_rate": 6.944736184046013e-05, "loss": 0.5082, "step": 13057 }, { "epoch": 0.9681915919033143, "grad_norm": 0.354131817817688, "learning_rate": 6.943735933983497e-05, "loss": 0.447, "step": 13058 }, { "epoch": 0.9682657373767332, "grad_norm": 0.3597468137741089, "learning_rate": 6.94273568392098e-05, "loss": 0.4723, "step": 13059 }, { "epoch": 0.968339882850152, "grad_norm": 0.3587569296360016, "learning_rate": 6.941735433858465e-05, "loss": 0.4696, "step": 13060 }, { "epoch": 0.9684140283235708, "grad_norm": 0.3552185595035553, "learning_rate": 6.940735183795949e-05, "loss": 0.4968, "step": 13061 }, { "epoch": 0.9684881737969897, "grad_norm": 0.37086620926856995, "learning_rate": 6.939734933733434e-05, "loss": 0.5129, "step": 13062 }, { "epoch": 0.9685623192704086, "grad_norm": 0.3496204912662506, "learning_rate": 6.938734683670918e-05, "loss": 0.4469, "step": 13063 }, { "epoch": 0.9686364647438274, "grad_norm": 0.35573825240135193, "learning_rate": 6.937734433608403e-05, "loss": 0.4751, "step": 13064 }, { "epoch": 0.9687106102172462, "grad_norm": 0.3526800572872162, "learning_rate": 6.936734183545887e-05, "loss": 0.4477, "step": 13065 }, { "epoch": 0.9687847556906651, "grad_norm": 0.37247616052627563, "learning_rate": 6.935733933483372e-05, "loss": 0.4847, "step": 13066 }, { "epoch": 0.9688589011640839, "grad_norm": 0.3490072190761566, "learning_rate": 6.934733683420855e-05, "loss": 0.4884, "step": 13067 }, { "epoch": 0.9689330466375028, "grad_norm": 0.3853467106819153, "learning_rate": 6.93373343335834e-05, "loss": 0.533, "step": 13068 }, { "epoch": 0.9690071921109217, "grad_norm": 0.3690889775753021, "learning_rate": 6.932733183295824e-05, "loss": 0.4951, "step": 13069 }, { "epoch": 0.9690813375843405, "grad_norm": 0.37549906969070435, "learning_rate": 6.931732933233309e-05, "loss": 0.5063, "step": 13070 }, { "epoch": 0.9691554830577593, "grad_norm": 0.362779825925827, "learning_rate": 6.930732683170793e-05, "loss": 0.4694, "step": 13071 }, { "epoch": 0.9692296285311782, "grad_norm": 0.3429103493690491, "learning_rate": 6.929732433108277e-05, "loss": 0.4528, "step": 13072 }, { "epoch": 0.969303774004597, "grad_norm": 0.3681514263153076, "learning_rate": 6.928732183045762e-05, "loss": 0.5259, "step": 13073 }, { "epoch": 0.9693779194780159, "grad_norm": 0.3673373758792877, "learning_rate": 6.927731932983245e-05, "loss": 0.4691, "step": 13074 }, { "epoch": 0.9694520649514348, "grad_norm": 0.38499969244003296, "learning_rate": 6.92673168292073e-05, "loss": 0.5308, "step": 13075 }, { "epoch": 0.9695262104248535, "grad_norm": 0.351468026638031, "learning_rate": 6.925731432858214e-05, "loss": 0.489, "step": 13076 }, { "epoch": 0.9696003558982724, "grad_norm": 0.35188543796539307, "learning_rate": 6.924731182795699e-05, "loss": 0.4785, "step": 13077 }, { "epoch": 0.9696745013716913, "grad_norm": 0.3403048515319824, "learning_rate": 6.923730932733183e-05, "loss": 0.4501, "step": 13078 }, { "epoch": 0.9697486468451101, "grad_norm": 0.36271801590919495, "learning_rate": 6.922730682670668e-05, "loss": 0.4977, "step": 13079 }, { "epoch": 0.969822792318529, "grad_norm": 0.35998091101646423, "learning_rate": 6.921730432608152e-05, "loss": 0.5141, "step": 13080 }, { "epoch": 0.9698969377919479, "grad_norm": 0.3631279468536377, "learning_rate": 6.920730182545637e-05, "loss": 0.4911, "step": 13081 }, { "epoch": 0.9699710832653666, "grad_norm": 0.3477436900138855, "learning_rate": 6.91972993248312e-05, "loss": 0.474, "step": 13082 }, { "epoch": 0.9700452287387855, "grad_norm": 0.3559390604496002, "learning_rate": 6.918729682420604e-05, "loss": 0.49, "step": 13083 }, { "epoch": 0.9701193742122044, "grad_norm": 0.34750959277153015, "learning_rate": 6.91772943235809e-05, "loss": 0.4733, "step": 13084 }, { "epoch": 0.9701935196856232, "grad_norm": 0.37061917781829834, "learning_rate": 6.916729182295574e-05, "loss": 0.5076, "step": 13085 }, { "epoch": 0.9702676651590421, "grad_norm": 0.37588638067245483, "learning_rate": 6.915728932233058e-05, "loss": 0.4806, "step": 13086 }, { "epoch": 0.9703418106324608, "grad_norm": 0.35351645946502686, "learning_rate": 6.914728682170543e-05, "loss": 0.4876, "step": 13087 }, { "epoch": 0.9704159561058797, "grad_norm": 0.33626899123191833, "learning_rate": 6.913728432108028e-05, "loss": 0.4681, "step": 13088 }, { "epoch": 0.9704901015792986, "grad_norm": 0.3879266679286957, "learning_rate": 6.912728182045512e-05, "loss": 0.5369, "step": 13089 }, { "epoch": 0.9705642470527174, "grad_norm": 0.35897889733314514, "learning_rate": 6.911727931982997e-05, "loss": 0.5044, "step": 13090 }, { "epoch": 0.9706383925261363, "grad_norm": 0.35357099771499634, "learning_rate": 6.910727681920481e-05, "loss": 0.4785, "step": 13091 }, { "epoch": 0.9707125379995551, "grad_norm": 0.3665847182273865, "learning_rate": 6.909727431857966e-05, "loss": 0.5033, "step": 13092 }, { "epoch": 0.9707866834729739, "grad_norm": 0.3660220801830292, "learning_rate": 6.90872718179545e-05, "loss": 0.4778, "step": 13093 }, { "epoch": 0.9708608289463928, "grad_norm": 0.3729289770126343, "learning_rate": 6.907726931732935e-05, "loss": 0.5211, "step": 13094 }, { "epoch": 0.9709349744198117, "grad_norm": 0.34344157576560974, "learning_rate": 6.906726681670418e-05, "loss": 0.4736, "step": 13095 }, { "epoch": 0.9710091198932305, "grad_norm": 0.37594175338745117, "learning_rate": 6.905726431607902e-05, "loss": 0.5126, "step": 13096 }, { "epoch": 0.9710832653666494, "grad_norm": 0.34659337997436523, "learning_rate": 6.904726181545387e-05, "loss": 0.4911, "step": 13097 }, { "epoch": 0.9711574108400682, "grad_norm": 0.35343948006629944, "learning_rate": 6.903725931482871e-05, "loss": 0.4946, "step": 13098 }, { "epoch": 0.971231556313487, "grad_norm": 0.3441373109817505, "learning_rate": 6.902725681420356e-05, "loss": 0.4637, "step": 13099 }, { "epoch": 0.9713057017869059, "grad_norm": 0.3708246052265167, "learning_rate": 6.90172543135784e-05, "loss": 0.5228, "step": 13100 }, { "epoch": 0.9713798472603248, "grad_norm": 0.3625262677669525, "learning_rate": 6.900725181295325e-05, "loss": 0.5052, "step": 13101 }, { "epoch": 0.9714539927337436, "grad_norm": 0.3675873279571533, "learning_rate": 6.899724931232808e-05, "loss": 0.4958, "step": 13102 }, { "epoch": 0.9715281382071624, "grad_norm": 0.3788892924785614, "learning_rate": 6.898724681170293e-05, "loss": 0.4948, "step": 13103 }, { "epoch": 0.9716022836805813, "grad_norm": 0.356667697429657, "learning_rate": 6.897724431107777e-05, "loss": 0.4566, "step": 13104 }, { "epoch": 0.9716764291540001, "grad_norm": 0.36410072445869446, "learning_rate": 6.896724181045262e-05, "loss": 0.4799, "step": 13105 }, { "epoch": 0.971750574627419, "grad_norm": 0.3658409118652344, "learning_rate": 6.895723930982746e-05, "loss": 0.5201, "step": 13106 }, { "epoch": 0.9718247201008379, "grad_norm": 0.35712894797325134, "learning_rate": 6.894723680920231e-05, "loss": 0.4938, "step": 13107 }, { "epoch": 0.9718988655742566, "grad_norm": 0.37310099601745605, "learning_rate": 6.893723430857715e-05, "loss": 0.4946, "step": 13108 }, { "epoch": 0.9719730110476755, "grad_norm": 0.3498448431491852, "learning_rate": 6.892723180795198e-05, "loss": 0.471, "step": 13109 }, { "epoch": 0.9720471565210944, "grad_norm": 0.36503368616104126, "learning_rate": 6.891722930732683e-05, "loss": 0.4679, "step": 13110 }, { "epoch": 0.9721213019945132, "grad_norm": 0.34544119238853455, "learning_rate": 6.890722680670167e-05, "loss": 0.475, "step": 13111 }, { "epoch": 0.9721954474679321, "grad_norm": 0.36456939578056335, "learning_rate": 6.889722430607652e-05, "loss": 0.5059, "step": 13112 }, { "epoch": 0.972269592941351, "grad_norm": 0.3726378381252289, "learning_rate": 6.888722180545136e-05, "loss": 0.5074, "step": 13113 }, { "epoch": 0.9723437384147697, "grad_norm": 0.3491937816143036, "learning_rate": 6.887721930482621e-05, "loss": 0.4583, "step": 13114 }, { "epoch": 0.9724178838881886, "grad_norm": 0.37050125002861023, "learning_rate": 6.886721680420105e-05, "loss": 0.4752, "step": 13115 }, { "epoch": 0.9724920293616075, "grad_norm": 0.3788219690322876, "learning_rate": 6.88572143035759e-05, "loss": 0.4774, "step": 13116 }, { "epoch": 0.9725661748350263, "grad_norm": 0.3507362902164459, "learning_rate": 6.884721180295073e-05, "loss": 0.4814, "step": 13117 }, { "epoch": 0.9726403203084452, "grad_norm": 0.35195574164390564, "learning_rate": 6.883720930232558e-05, "loss": 0.5046, "step": 13118 }, { "epoch": 0.972714465781864, "grad_norm": 0.35988280177116394, "learning_rate": 6.882720680170042e-05, "loss": 0.5041, "step": 13119 }, { "epoch": 0.9727886112552828, "grad_norm": 0.3532305359840393, "learning_rate": 6.881720430107527e-05, "loss": 0.4675, "step": 13120 }, { "epoch": 0.9728627567287017, "grad_norm": 0.35882502794265747, "learning_rate": 6.880720180045011e-05, "loss": 0.4707, "step": 13121 }, { "epoch": 0.9729369022021206, "grad_norm": 0.34210240840911865, "learning_rate": 6.879719929982496e-05, "loss": 0.4429, "step": 13122 }, { "epoch": 0.9730110476755394, "grad_norm": 0.36483052372932434, "learning_rate": 6.878719679919981e-05, "loss": 0.4743, "step": 13123 }, { "epoch": 0.9730851931489582, "grad_norm": 0.35866302251815796, "learning_rate": 6.877719429857465e-05, "loss": 0.4871, "step": 13124 }, { "epoch": 0.9731593386223771, "grad_norm": 0.32990819215774536, "learning_rate": 6.87671917979495e-05, "loss": 0.4627, "step": 13125 }, { "epoch": 0.9732334840957959, "grad_norm": 0.35228076577186584, "learning_rate": 6.875718929732434e-05, "loss": 0.4814, "step": 13126 }, { "epoch": 0.9733076295692148, "grad_norm": 0.36899134516716003, "learning_rate": 6.874718679669919e-05, "loss": 0.4975, "step": 13127 }, { "epoch": 0.9733817750426337, "grad_norm": 0.36644771695137024, "learning_rate": 6.873718429607402e-05, "loss": 0.4885, "step": 13128 }, { "epoch": 0.9734559205160525, "grad_norm": 0.3697381019592285, "learning_rate": 6.872718179544887e-05, "loss": 0.4987, "step": 13129 }, { "epoch": 0.9735300659894713, "grad_norm": 0.35740768909454346, "learning_rate": 6.871717929482371e-05, "loss": 0.4809, "step": 13130 }, { "epoch": 0.9736042114628902, "grad_norm": 0.3862540125846863, "learning_rate": 6.870717679419856e-05, "loss": 0.4771, "step": 13131 }, { "epoch": 0.973678356936309, "grad_norm": 0.3740079402923584, "learning_rate": 6.86971742935734e-05, "loss": 0.5092, "step": 13132 }, { "epoch": 0.9737525024097279, "grad_norm": 0.3596722483634949, "learning_rate": 6.868717179294824e-05, "loss": 0.4742, "step": 13133 }, { "epoch": 0.9738266478831468, "grad_norm": 0.36401641368865967, "learning_rate": 6.867716929232309e-05, "loss": 0.5057, "step": 13134 }, { "epoch": 0.9739007933565655, "grad_norm": 0.3688211143016815, "learning_rate": 6.866716679169792e-05, "loss": 0.5357, "step": 13135 }, { "epoch": 0.9739749388299844, "grad_norm": 0.39002326130867004, "learning_rate": 6.865716429107277e-05, "loss": 0.5387, "step": 13136 }, { "epoch": 0.9740490843034033, "grad_norm": 0.3673442006111145, "learning_rate": 6.864716179044761e-05, "loss": 0.5019, "step": 13137 }, { "epoch": 0.9741232297768221, "grad_norm": 0.3420558571815491, "learning_rate": 6.863715928982246e-05, "loss": 0.4508, "step": 13138 }, { "epoch": 0.974197375250241, "grad_norm": 0.3464168608188629, "learning_rate": 6.86271567891973e-05, "loss": 0.4572, "step": 13139 }, { "epoch": 0.9742715207236599, "grad_norm": 0.3686908185482025, "learning_rate": 6.861715428857215e-05, "loss": 0.4962, "step": 13140 }, { "epoch": 0.9743456661970786, "grad_norm": 0.3613579273223877, "learning_rate": 6.860715178794699e-05, "loss": 0.4954, "step": 13141 }, { "epoch": 0.9744198116704975, "grad_norm": 0.36751362681388855, "learning_rate": 6.859714928732184e-05, "loss": 0.4793, "step": 13142 }, { "epoch": 0.9744939571439164, "grad_norm": 0.3769993185997009, "learning_rate": 6.858714678669667e-05, "loss": 0.508, "step": 13143 }, { "epoch": 0.9745681026173352, "grad_norm": 0.365833580493927, "learning_rate": 6.857714428607153e-05, "loss": 0.4782, "step": 13144 }, { "epoch": 0.9746422480907541, "grad_norm": 0.3957188129425049, "learning_rate": 6.856714178544636e-05, "loss": 0.5509, "step": 13145 }, { "epoch": 0.974716393564173, "grad_norm": 0.3577587902545929, "learning_rate": 6.85571392848212e-05, "loss": 0.4733, "step": 13146 }, { "epoch": 0.9747905390375917, "grad_norm": 0.3486081659793854, "learning_rate": 6.854713678419605e-05, "loss": 0.4366, "step": 13147 }, { "epoch": 0.9748646845110106, "grad_norm": 0.3586556017398834, "learning_rate": 6.853713428357089e-05, "loss": 0.4956, "step": 13148 }, { "epoch": 0.9749388299844295, "grad_norm": 0.3760491907596588, "learning_rate": 6.852713178294574e-05, "loss": 0.478, "step": 13149 }, { "epoch": 0.9750129754578483, "grad_norm": 0.35139766335487366, "learning_rate": 6.851712928232057e-05, "loss": 0.4842, "step": 13150 }, { "epoch": 0.9750871209312671, "grad_norm": 0.3488849401473999, "learning_rate": 6.850712678169543e-05, "loss": 0.4941, "step": 13151 }, { "epoch": 0.975161266404686, "grad_norm": 0.3533267378807068, "learning_rate": 6.849712428107026e-05, "loss": 0.4536, "step": 13152 }, { "epoch": 0.9752354118781048, "grad_norm": 0.35658642649650574, "learning_rate": 6.848712178044511e-05, "loss": 0.4522, "step": 13153 }, { "epoch": 0.9753095573515237, "grad_norm": 0.38604873418807983, "learning_rate": 6.847711927981995e-05, "loss": 0.4927, "step": 13154 }, { "epoch": 0.9753837028249426, "grad_norm": 0.34531325101852417, "learning_rate": 6.84671167791948e-05, "loss": 0.467, "step": 13155 }, { "epoch": 0.9754578482983614, "grad_norm": 0.3703347146511078, "learning_rate": 6.845711427856965e-05, "loss": 0.4713, "step": 13156 }, { "epoch": 0.9755319937717802, "grad_norm": 0.33440276980400085, "learning_rate": 6.844711177794449e-05, "loss": 0.4641, "step": 13157 }, { "epoch": 0.9756061392451991, "grad_norm": 0.3787010908126831, "learning_rate": 6.843710927731934e-05, "loss": 0.5382, "step": 13158 }, { "epoch": 0.9756802847186179, "grad_norm": 0.3824026584625244, "learning_rate": 6.842710677669418e-05, "loss": 0.5117, "step": 13159 }, { "epoch": 0.9757544301920368, "grad_norm": 0.35215234756469727, "learning_rate": 6.841710427606903e-05, "loss": 0.4793, "step": 13160 }, { "epoch": 0.9758285756654557, "grad_norm": 0.3692036271095276, "learning_rate": 6.840710177544386e-05, "loss": 0.491, "step": 13161 }, { "epoch": 0.9759027211388744, "grad_norm": 0.3636375367641449, "learning_rate": 6.839709927481871e-05, "loss": 0.4764, "step": 13162 }, { "epoch": 0.9759768666122933, "grad_norm": 0.3686641752719879, "learning_rate": 6.838709677419355e-05, "loss": 0.53, "step": 13163 }, { "epoch": 0.9760510120857122, "grad_norm": 0.34319886565208435, "learning_rate": 6.83770942735684e-05, "loss": 0.4723, "step": 13164 }, { "epoch": 0.976125157559131, "grad_norm": 0.34678569436073303, "learning_rate": 6.836709177294324e-05, "loss": 0.5027, "step": 13165 }, { "epoch": 0.9761993030325499, "grad_norm": 0.3586236834526062, "learning_rate": 6.835708927231809e-05, "loss": 0.4687, "step": 13166 }, { "epoch": 0.9762734485059688, "grad_norm": 0.3521687984466553, "learning_rate": 6.834708677169293e-05, "loss": 0.5162, "step": 13167 }, { "epoch": 0.9763475939793875, "grad_norm": 0.35445383191108704, "learning_rate": 6.833708427106778e-05, "loss": 0.5074, "step": 13168 }, { "epoch": 0.9764217394528064, "grad_norm": 0.366232693195343, "learning_rate": 6.832708177044262e-05, "loss": 0.4876, "step": 13169 }, { "epoch": 0.9764958849262253, "grad_norm": 0.3763573169708252, "learning_rate": 6.831707926981745e-05, "loss": 0.5352, "step": 13170 }, { "epoch": 0.9765700303996441, "grad_norm": 0.37052395939826965, "learning_rate": 6.83070767691923e-05, "loss": 0.5111, "step": 13171 }, { "epoch": 0.976644175873063, "grad_norm": 0.36632347106933594, "learning_rate": 6.829707426856714e-05, "loss": 0.4901, "step": 13172 }, { "epoch": 0.9767183213464818, "grad_norm": 0.3420270085334778, "learning_rate": 6.828707176794199e-05, "loss": 0.4971, "step": 13173 }, { "epoch": 0.9767924668199006, "grad_norm": 0.3541351556777954, "learning_rate": 6.827706926731683e-05, "loss": 0.4852, "step": 13174 }, { "epoch": 0.9768666122933195, "grad_norm": 0.3775588274002075, "learning_rate": 6.826706676669168e-05, "loss": 0.516, "step": 13175 }, { "epoch": 0.9769407577667384, "grad_norm": 0.3567996323108673, "learning_rate": 6.825706426606652e-05, "loss": 0.494, "step": 13176 }, { "epoch": 0.9770149032401572, "grad_norm": 0.33789893984794617, "learning_rate": 6.824706176544137e-05, "loss": 0.4488, "step": 13177 }, { "epoch": 0.977089048713576, "grad_norm": 0.3341384530067444, "learning_rate": 6.82370592648162e-05, "loss": 0.4097, "step": 13178 }, { "epoch": 0.9771631941869949, "grad_norm": 0.37579333782196045, "learning_rate": 6.822705676419105e-05, "loss": 0.4503, "step": 13179 }, { "epoch": 0.9772373396604137, "grad_norm": 0.34583809971809387, "learning_rate": 6.821705426356589e-05, "loss": 0.4395, "step": 13180 }, { "epoch": 0.9773114851338326, "grad_norm": 0.35022929310798645, "learning_rate": 6.820705176294074e-05, "loss": 0.4752, "step": 13181 }, { "epoch": 0.9773856306072515, "grad_norm": 0.37953606247901917, "learning_rate": 6.819704926231558e-05, "loss": 0.4808, "step": 13182 }, { "epoch": 0.9774597760806703, "grad_norm": 0.3760455846786499, "learning_rate": 6.818704676169042e-05, "loss": 0.4699, "step": 13183 }, { "epoch": 0.9775339215540891, "grad_norm": 0.36804237961769104, "learning_rate": 6.817704426106527e-05, "loss": 0.4901, "step": 13184 }, { "epoch": 0.977608067027508, "grad_norm": 0.3747917115688324, "learning_rate": 6.81670417604401e-05, "loss": 0.4537, "step": 13185 }, { "epoch": 0.9776822125009268, "grad_norm": 0.37716203927993774, "learning_rate": 6.815703925981495e-05, "loss": 0.5, "step": 13186 }, { "epoch": 0.9777563579743457, "grad_norm": 0.361538827419281, "learning_rate": 6.814703675918979e-05, "loss": 0.502, "step": 13187 }, { "epoch": 0.9778305034477646, "grad_norm": 0.3913570046424866, "learning_rate": 6.813703425856464e-05, "loss": 0.5047, "step": 13188 }, { "epoch": 0.9779046489211833, "grad_norm": 0.39642202854156494, "learning_rate": 6.812703175793948e-05, "loss": 0.4874, "step": 13189 }, { "epoch": 0.9779787943946022, "grad_norm": 0.35438236594200134, "learning_rate": 6.811702925731433e-05, "loss": 0.4722, "step": 13190 }, { "epoch": 0.9780529398680211, "grad_norm": 0.37582916021347046, "learning_rate": 6.810702675668918e-05, "loss": 0.5296, "step": 13191 }, { "epoch": 0.9781270853414399, "grad_norm": 0.35850808024406433, "learning_rate": 6.809702425606402e-05, "loss": 0.478, "step": 13192 }, { "epoch": 0.9782012308148588, "grad_norm": 0.34749332070350647, "learning_rate": 6.808702175543887e-05, "loss": 0.4345, "step": 13193 }, { "epoch": 0.9782753762882777, "grad_norm": 0.3464149236679077, "learning_rate": 6.807701925481372e-05, "loss": 0.4512, "step": 13194 }, { "epoch": 0.9783495217616964, "grad_norm": 0.345604807138443, "learning_rate": 6.806701675418856e-05, "loss": 0.4565, "step": 13195 }, { "epoch": 0.9784236672351153, "grad_norm": 0.3699512481689453, "learning_rate": 6.805701425356339e-05, "loss": 0.5153, "step": 13196 }, { "epoch": 0.9784978127085342, "grad_norm": 0.357766330242157, "learning_rate": 6.804701175293824e-05, "loss": 0.4457, "step": 13197 }, { "epoch": 0.978571958181953, "grad_norm": 0.36376917362213135, "learning_rate": 6.803700925231308e-05, "loss": 0.5087, "step": 13198 }, { "epoch": 0.9786461036553719, "grad_norm": 0.37072500586509705, "learning_rate": 6.802700675168793e-05, "loss": 0.5011, "step": 13199 }, { "epoch": 0.9787202491287907, "grad_norm": 0.35641101002693176, "learning_rate": 6.801700425106277e-05, "loss": 0.4714, "step": 13200 }, { "epoch": 0.9787943946022095, "grad_norm": 0.3430979549884796, "learning_rate": 6.800700175043762e-05, "loss": 0.4614, "step": 13201 }, { "epoch": 0.9788685400756284, "grad_norm": 0.3613409996032715, "learning_rate": 6.799699924981246e-05, "loss": 0.4729, "step": 13202 }, { "epoch": 0.9789426855490472, "grad_norm": 0.3521021604537964, "learning_rate": 6.79869967491873e-05, "loss": 0.4605, "step": 13203 }, { "epoch": 0.9790168310224661, "grad_norm": 0.35272809863090515, "learning_rate": 6.797699424856214e-05, "loss": 0.4706, "step": 13204 }, { "epoch": 0.979090976495885, "grad_norm": 0.38491109013557434, "learning_rate": 6.7966991747937e-05, "loss": 0.5369, "step": 13205 }, { "epoch": 0.9791651219693037, "grad_norm": 0.34896692633628845, "learning_rate": 6.795698924731183e-05, "loss": 0.4603, "step": 13206 }, { "epoch": 0.9792392674427226, "grad_norm": 0.36494266986846924, "learning_rate": 6.794698674668667e-05, "loss": 0.4883, "step": 13207 }, { "epoch": 0.9793134129161415, "grad_norm": 0.3370858132839203, "learning_rate": 6.793698424606152e-05, "loss": 0.4702, "step": 13208 }, { "epoch": 0.9793875583895603, "grad_norm": 0.37658241391181946, "learning_rate": 6.792698174543636e-05, "loss": 0.472, "step": 13209 }, { "epoch": 0.9794617038629791, "grad_norm": 0.35318759083747864, "learning_rate": 6.791697924481121e-05, "loss": 0.4868, "step": 13210 }, { "epoch": 0.979535849336398, "grad_norm": 0.3664093017578125, "learning_rate": 6.790697674418604e-05, "loss": 0.5006, "step": 13211 }, { "epoch": 0.9796099948098168, "grad_norm": 0.34839117527008057, "learning_rate": 6.78969742435609e-05, "loss": 0.4816, "step": 13212 }, { "epoch": 0.9796841402832357, "grad_norm": 0.351914644241333, "learning_rate": 6.788697174293573e-05, "loss": 0.4713, "step": 13213 }, { "epoch": 0.9797582857566546, "grad_norm": 0.34846606850624084, "learning_rate": 6.787696924231058e-05, "loss": 0.4703, "step": 13214 }, { "epoch": 0.9798324312300734, "grad_norm": 0.3732849359512329, "learning_rate": 6.786696674168542e-05, "loss": 0.493, "step": 13215 }, { "epoch": 0.9799065767034922, "grad_norm": 0.3529457151889801, "learning_rate": 6.785696424106027e-05, "loss": 0.485, "step": 13216 }, { "epoch": 0.9799807221769111, "grad_norm": 0.3576987385749817, "learning_rate": 6.784696174043511e-05, "loss": 0.4769, "step": 13217 }, { "epoch": 0.9800548676503299, "grad_norm": 0.3613622188568115, "learning_rate": 6.783695923980996e-05, "loss": 0.4894, "step": 13218 }, { "epoch": 0.9801290131237488, "grad_norm": 0.36525681614875793, "learning_rate": 6.78269567391848e-05, "loss": 0.5012, "step": 13219 }, { "epoch": 0.9802031585971677, "grad_norm": 0.37233543395996094, "learning_rate": 6.781695423855963e-05, "loss": 0.503, "step": 13220 }, { "epoch": 0.9802773040705864, "grad_norm": 0.3570554852485657, "learning_rate": 6.780695173793448e-05, "loss": 0.4871, "step": 13221 }, { "epoch": 0.9803514495440053, "grad_norm": 0.3825734555721283, "learning_rate": 6.779694923730932e-05, "loss": 0.5031, "step": 13222 }, { "epoch": 0.9804255950174242, "grad_norm": 0.3474442958831787, "learning_rate": 6.778694673668417e-05, "loss": 0.4222, "step": 13223 }, { "epoch": 0.980499740490843, "grad_norm": 0.35855618119239807, "learning_rate": 6.777694423605902e-05, "loss": 0.463, "step": 13224 }, { "epoch": 0.9805738859642619, "grad_norm": 0.36579370498657227, "learning_rate": 6.776694173543386e-05, "loss": 0.497, "step": 13225 }, { "epoch": 0.9806480314376808, "grad_norm": 0.37663668394088745, "learning_rate": 6.775693923480871e-05, "loss": 0.4944, "step": 13226 }, { "epoch": 0.9807221769110995, "grad_norm": 0.35434314608573914, "learning_rate": 6.774693673418355e-05, "loss": 0.4882, "step": 13227 }, { "epoch": 0.9807963223845184, "grad_norm": 0.34119269251823425, "learning_rate": 6.77369342335584e-05, "loss": 0.4448, "step": 13228 }, { "epoch": 0.9808704678579373, "grad_norm": 0.3788169026374817, "learning_rate": 6.772693173293325e-05, "loss": 0.5167, "step": 13229 }, { "epoch": 0.9809446133313561, "grad_norm": 0.3548777401447296, "learning_rate": 6.771692923230808e-05, "loss": 0.4644, "step": 13230 }, { "epoch": 0.981018758804775, "grad_norm": 0.3813820779323578, "learning_rate": 6.770692673168293e-05, "loss": 0.4887, "step": 13231 }, { "epoch": 0.9810929042781938, "grad_norm": 0.3580969572067261, "learning_rate": 6.769692423105777e-05, "loss": 0.4861, "step": 13232 }, { "epoch": 0.9811670497516126, "grad_norm": 0.3719416558742523, "learning_rate": 6.768692173043261e-05, "loss": 0.4684, "step": 13233 }, { "epoch": 0.9812411952250315, "grad_norm": 0.37855228781700134, "learning_rate": 6.767691922980746e-05, "loss": 0.5384, "step": 13234 }, { "epoch": 0.9813153406984504, "grad_norm": 0.38560977578163147, "learning_rate": 6.76669167291823e-05, "loss": 0.5278, "step": 13235 }, { "epoch": 0.9813894861718692, "grad_norm": 0.3749857246875763, "learning_rate": 6.765691422855715e-05, "loss": 0.5046, "step": 13236 }, { "epoch": 0.981463631645288, "grad_norm": 0.3390498757362366, "learning_rate": 6.764691172793198e-05, "loss": 0.4476, "step": 13237 }, { "epoch": 0.9815377771187069, "grad_norm": 0.3712673783302307, "learning_rate": 6.763690922730684e-05, "loss": 0.5054, "step": 13238 }, { "epoch": 0.9816119225921257, "grad_norm": 0.3493378460407257, "learning_rate": 6.762690672668167e-05, "loss": 0.5145, "step": 13239 }, { "epoch": 0.9816860680655446, "grad_norm": 0.3756914436817169, "learning_rate": 6.761690422605652e-05, "loss": 0.5175, "step": 13240 }, { "epoch": 0.9817602135389635, "grad_norm": 0.36746731400489807, "learning_rate": 6.760690172543136e-05, "loss": 0.4994, "step": 13241 }, { "epoch": 0.9818343590123823, "grad_norm": 0.3688088059425354, "learning_rate": 6.759689922480621e-05, "loss": 0.5035, "step": 13242 }, { "epoch": 0.9819085044858011, "grad_norm": 0.33976444602012634, "learning_rate": 6.758689672418105e-05, "loss": 0.4608, "step": 13243 }, { "epoch": 0.98198264995922, "grad_norm": 0.3579324781894684, "learning_rate": 6.75768942235559e-05, "loss": 0.4784, "step": 13244 }, { "epoch": 0.9820567954326388, "grad_norm": 0.3447133004665375, "learning_rate": 6.756689172293074e-05, "loss": 0.4632, "step": 13245 }, { "epoch": 0.9821309409060577, "grad_norm": 0.3431934416294098, "learning_rate": 6.755688922230557e-05, "loss": 0.4653, "step": 13246 }, { "epoch": 0.9822050863794766, "grad_norm": 0.3478931784629822, "learning_rate": 6.754688672168042e-05, "loss": 0.4441, "step": 13247 }, { "epoch": 0.9822792318528953, "grad_norm": 0.3450727164745331, "learning_rate": 6.753688422105526e-05, "loss": 0.4722, "step": 13248 }, { "epoch": 0.9823533773263142, "grad_norm": 0.35098373889923096, "learning_rate": 6.752688172043011e-05, "loss": 0.4829, "step": 13249 }, { "epoch": 0.9824275227997331, "grad_norm": 0.3858092129230499, "learning_rate": 6.751687921980495e-05, "loss": 0.5188, "step": 13250 }, { "epoch": 0.9825016682731519, "grad_norm": 0.3663034439086914, "learning_rate": 6.75068767191798e-05, "loss": 0.5042, "step": 13251 }, { "epoch": 0.9825758137465708, "grad_norm": 0.37364670634269714, "learning_rate": 6.749687421855464e-05, "loss": 0.5115, "step": 13252 }, { "epoch": 0.9826499592199897, "grad_norm": 0.3534863293170929, "learning_rate": 6.748687171792949e-05, "loss": 0.4836, "step": 13253 }, { "epoch": 0.9827241046934084, "grad_norm": 0.3701000213623047, "learning_rate": 6.747686921730432e-05, "loss": 0.4772, "step": 13254 }, { "epoch": 0.9827982501668273, "grad_norm": 0.34728437662124634, "learning_rate": 6.746686671667917e-05, "loss": 0.4461, "step": 13255 }, { "epoch": 0.9828723956402462, "grad_norm": 0.40968698263168335, "learning_rate": 6.745686421605401e-05, "loss": 0.5737, "step": 13256 }, { "epoch": 0.982946541113665, "grad_norm": 0.3753627836704254, "learning_rate": 6.744686171542886e-05, "loss": 0.49, "step": 13257 }, { "epoch": 0.9830206865870839, "grad_norm": 0.35092321038246155, "learning_rate": 6.74368592148037e-05, "loss": 0.4684, "step": 13258 }, { "epoch": 0.9830948320605027, "grad_norm": 0.37224769592285156, "learning_rate": 6.742685671417855e-05, "loss": 0.5426, "step": 13259 }, { "epoch": 0.9831689775339215, "grad_norm": 0.3494206964969635, "learning_rate": 6.741685421355339e-05, "loss": 0.4575, "step": 13260 }, { "epoch": 0.9832431230073404, "grad_norm": 0.35763832926750183, "learning_rate": 6.740685171292824e-05, "loss": 0.4911, "step": 13261 }, { "epoch": 0.9833172684807593, "grad_norm": 0.37709975242614746, "learning_rate": 6.739684921230309e-05, "loss": 0.5167, "step": 13262 }, { "epoch": 0.9833914139541781, "grad_norm": 0.3629358410835266, "learning_rate": 6.738684671167793e-05, "loss": 0.5025, "step": 13263 }, { "epoch": 0.983465559427597, "grad_norm": 0.36514583230018616, "learning_rate": 6.737684421105278e-05, "loss": 0.5002, "step": 13264 }, { "epoch": 0.9835397049010158, "grad_norm": 0.36770716309547424, "learning_rate": 6.736684171042761e-05, "loss": 0.4742, "step": 13265 }, { "epoch": 0.9836138503744346, "grad_norm": 0.3586125671863556, "learning_rate": 6.735683920980246e-05, "loss": 0.4993, "step": 13266 }, { "epoch": 0.9836879958478535, "grad_norm": 0.3462476432323456, "learning_rate": 6.73468367091773e-05, "loss": 0.4749, "step": 13267 }, { "epoch": 0.9837621413212724, "grad_norm": 0.3475719392299652, "learning_rate": 6.733683420855215e-05, "loss": 0.452, "step": 13268 }, { "epoch": 0.9838362867946912, "grad_norm": 0.3766119182109833, "learning_rate": 6.732683170792699e-05, "loss": 0.5192, "step": 13269 }, { "epoch": 0.98391043226811, "grad_norm": 0.34389904141426086, "learning_rate": 6.731682920730183e-05, "loss": 0.4883, "step": 13270 }, { "epoch": 0.9839845777415289, "grad_norm": 0.357448935508728, "learning_rate": 6.730682670667668e-05, "loss": 0.506, "step": 13271 }, { "epoch": 0.9840587232149477, "grad_norm": 0.3539287745952606, "learning_rate": 6.729682420605151e-05, "loss": 0.4726, "step": 13272 }, { "epoch": 0.9841328686883666, "grad_norm": 0.35982567071914673, "learning_rate": 6.728682170542636e-05, "loss": 0.5074, "step": 13273 }, { "epoch": 0.9842070141617855, "grad_norm": 0.3408370316028595, "learning_rate": 6.72768192048012e-05, "loss": 0.4509, "step": 13274 }, { "epoch": 0.9842811596352042, "grad_norm": 0.34689807891845703, "learning_rate": 6.726681670417605e-05, "loss": 0.5008, "step": 13275 }, { "epoch": 0.9843553051086231, "grad_norm": 0.36045193672180176, "learning_rate": 6.725681420355089e-05, "loss": 0.4841, "step": 13276 }, { "epoch": 0.984429450582042, "grad_norm": 0.3443487286567688, "learning_rate": 6.724681170292574e-05, "loss": 0.4815, "step": 13277 }, { "epoch": 0.9845035960554608, "grad_norm": 0.35374966263771057, "learning_rate": 6.723680920230058e-05, "loss": 0.4696, "step": 13278 }, { "epoch": 0.9845777415288797, "grad_norm": 0.36156222224235535, "learning_rate": 6.722680670167543e-05, "loss": 0.4774, "step": 13279 }, { "epoch": 0.9846518870022986, "grad_norm": 0.3833055794239044, "learning_rate": 6.721680420105026e-05, "loss": 0.5071, "step": 13280 }, { "epoch": 0.9847260324757173, "grad_norm": 0.39015161991119385, "learning_rate": 6.720680170042511e-05, "loss": 0.5414, "step": 13281 }, { "epoch": 0.9848001779491362, "grad_norm": 0.37784916162490845, "learning_rate": 6.719679919979995e-05, "loss": 0.5094, "step": 13282 }, { "epoch": 0.9848743234225551, "grad_norm": 0.38250187039375305, "learning_rate": 6.718679669917479e-05, "loss": 0.4688, "step": 13283 }, { "epoch": 0.9849484688959739, "grad_norm": 0.36609578132629395, "learning_rate": 6.717679419854964e-05, "loss": 0.4833, "step": 13284 }, { "epoch": 0.9850226143693928, "grad_norm": 0.34966596961021423, "learning_rate": 6.716679169792448e-05, "loss": 0.4765, "step": 13285 }, { "epoch": 0.9850967598428116, "grad_norm": 0.3606345057487488, "learning_rate": 6.715678919729933e-05, "loss": 0.4616, "step": 13286 }, { "epoch": 0.9851709053162304, "grad_norm": 0.37208494544029236, "learning_rate": 6.714678669667416e-05, "loss": 0.4983, "step": 13287 }, { "epoch": 0.9852450507896493, "grad_norm": 0.39054611325263977, "learning_rate": 6.713678419604901e-05, "loss": 0.5176, "step": 13288 }, { "epoch": 0.9853191962630682, "grad_norm": 0.36066189408302307, "learning_rate": 6.712678169542385e-05, "loss": 0.4961, "step": 13289 }, { "epoch": 0.985393341736487, "grad_norm": 0.33084920048713684, "learning_rate": 6.71167791947987e-05, "loss": 0.4573, "step": 13290 }, { "epoch": 0.9854674872099058, "grad_norm": 0.37306907773017883, "learning_rate": 6.710677669417354e-05, "loss": 0.4914, "step": 13291 }, { "epoch": 0.9855416326833247, "grad_norm": 0.3777422606945038, "learning_rate": 6.709677419354839e-05, "loss": 0.5389, "step": 13292 }, { "epoch": 0.9856157781567435, "grad_norm": 0.3810291886329651, "learning_rate": 6.708677169292323e-05, "loss": 0.5386, "step": 13293 }, { "epoch": 0.9856899236301624, "grad_norm": 0.3688313663005829, "learning_rate": 6.707676919229808e-05, "loss": 0.4797, "step": 13294 }, { "epoch": 0.9857640691035813, "grad_norm": 0.3462752401828766, "learning_rate": 6.706676669167293e-05, "loss": 0.4709, "step": 13295 }, { "epoch": 0.985838214577, "grad_norm": 0.3710181415081024, "learning_rate": 6.705676419104777e-05, "loss": 0.4962, "step": 13296 }, { "epoch": 0.9859123600504189, "grad_norm": 0.3576034605503082, "learning_rate": 6.704676169042262e-05, "loss": 0.4816, "step": 13297 }, { "epoch": 0.9859865055238378, "grad_norm": 0.37360429763793945, "learning_rate": 6.703675918979745e-05, "loss": 0.4484, "step": 13298 }, { "epoch": 0.9860606509972566, "grad_norm": 0.3686513304710388, "learning_rate": 6.70267566891723e-05, "loss": 0.461, "step": 13299 }, { "epoch": 0.9861347964706755, "grad_norm": 0.37071359157562256, "learning_rate": 6.701675418854714e-05, "loss": 0.4998, "step": 13300 }, { "epoch": 0.9862089419440944, "grad_norm": 0.3937339782714844, "learning_rate": 6.700675168792199e-05, "loss": 0.5345, "step": 13301 }, { "epoch": 0.9862830874175131, "grad_norm": 0.3610111474990845, "learning_rate": 6.699674918729683e-05, "loss": 0.4886, "step": 13302 }, { "epoch": 0.986357232890932, "grad_norm": 0.3804261386394501, "learning_rate": 6.698674668667168e-05, "loss": 0.5019, "step": 13303 }, { "epoch": 0.9864313783643509, "grad_norm": 0.3710918724536896, "learning_rate": 6.697674418604652e-05, "loss": 0.521, "step": 13304 }, { "epoch": 0.9865055238377697, "grad_norm": 0.3618128001689911, "learning_rate": 6.696674168542137e-05, "loss": 0.4842, "step": 13305 }, { "epoch": 0.9865796693111886, "grad_norm": 0.37516286969184875, "learning_rate": 6.69567391847962e-05, "loss": 0.4772, "step": 13306 }, { "epoch": 0.9866538147846075, "grad_norm": 0.3752754032611847, "learning_rate": 6.694673668417104e-05, "loss": 0.5065, "step": 13307 }, { "epoch": 0.9867279602580262, "grad_norm": 0.3936833143234253, "learning_rate": 6.693673418354589e-05, "loss": 0.5246, "step": 13308 }, { "epoch": 0.9868021057314451, "grad_norm": 0.3388447165489197, "learning_rate": 6.692673168292073e-05, "loss": 0.4486, "step": 13309 }, { "epoch": 0.986876251204864, "grad_norm": 0.3425229787826538, "learning_rate": 6.691672918229558e-05, "loss": 0.4572, "step": 13310 }, { "epoch": 0.9869503966782828, "grad_norm": 0.33696508407592773, "learning_rate": 6.690672668167042e-05, "loss": 0.4691, "step": 13311 }, { "epoch": 0.9870245421517017, "grad_norm": 0.3776588439941406, "learning_rate": 6.689672418104527e-05, "loss": 0.4885, "step": 13312 }, { "epoch": 0.9870986876251205, "grad_norm": 0.3374192714691162, "learning_rate": 6.68867216804201e-05, "loss": 0.4688, "step": 13313 }, { "epoch": 0.9871728330985393, "grad_norm": 0.3640762269496918, "learning_rate": 6.687671917979496e-05, "loss": 0.4725, "step": 13314 }, { "epoch": 0.9872469785719582, "grad_norm": 0.37623047828674316, "learning_rate": 6.686671667916979e-05, "loss": 0.4999, "step": 13315 }, { "epoch": 0.987321124045377, "grad_norm": 0.3499128222465515, "learning_rate": 6.685671417854464e-05, "loss": 0.4829, "step": 13316 }, { "epoch": 0.9873952695187959, "grad_norm": 0.36814048886299133, "learning_rate": 6.684671167791948e-05, "loss": 0.4912, "step": 13317 }, { "epoch": 0.9874694149922147, "grad_norm": 0.3733349144458771, "learning_rate": 6.683670917729433e-05, "loss": 0.4745, "step": 13318 }, { "epoch": 0.9875435604656335, "grad_norm": 0.38269367814064026, "learning_rate": 6.682670667666917e-05, "loss": 0.5509, "step": 13319 }, { "epoch": 0.9876177059390524, "grad_norm": 0.34412863850593567, "learning_rate": 6.6816704176044e-05, "loss": 0.4907, "step": 13320 }, { "epoch": 0.9876918514124713, "grad_norm": 0.36324021220207214, "learning_rate": 6.680670167541886e-05, "loss": 0.5109, "step": 13321 }, { "epoch": 0.9877659968858901, "grad_norm": 0.34629008173942566, "learning_rate": 6.679669917479369e-05, "loss": 0.4703, "step": 13322 }, { "epoch": 0.987840142359309, "grad_norm": 0.32824045419692993, "learning_rate": 6.678669667416854e-05, "loss": 0.4368, "step": 13323 }, { "epoch": 0.9879142878327278, "grad_norm": 0.37292781472206116, "learning_rate": 6.677669417354338e-05, "loss": 0.5014, "step": 13324 }, { "epoch": 0.9879884333061466, "grad_norm": 0.37177523970603943, "learning_rate": 6.676669167291823e-05, "loss": 0.5201, "step": 13325 }, { "epoch": 0.9880625787795655, "grad_norm": 0.34920835494995117, "learning_rate": 6.675668917229307e-05, "loss": 0.5069, "step": 13326 }, { "epoch": 0.9881367242529844, "grad_norm": 0.3546678125858307, "learning_rate": 6.674668667166792e-05, "loss": 0.4852, "step": 13327 }, { "epoch": 0.9882108697264032, "grad_norm": 0.3650451600551605, "learning_rate": 6.673668417104276e-05, "loss": 0.4752, "step": 13328 }, { "epoch": 0.988285015199822, "grad_norm": 0.38921821117401123, "learning_rate": 6.67266816704176e-05, "loss": 0.574, "step": 13329 }, { "epoch": 0.9883591606732409, "grad_norm": 0.3574563264846802, "learning_rate": 6.671667916979246e-05, "loss": 0.4828, "step": 13330 }, { "epoch": 0.9884333061466597, "grad_norm": 0.3719465732574463, "learning_rate": 6.67066766691673e-05, "loss": 0.4986, "step": 13331 }, { "epoch": 0.9885074516200786, "grad_norm": 0.33625563979148865, "learning_rate": 6.669667416854215e-05, "loss": 0.4441, "step": 13332 }, { "epoch": 0.9885815970934975, "grad_norm": 0.35104402899742126, "learning_rate": 6.668667166791698e-05, "loss": 0.5024, "step": 13333 }, { "epoch": 0.9886557425669162, "grad_norm": 0.3682994544506073, "learning_rate": 6.667666916729183e-05, "loss": 0.4487, "step": 13334 }, { "epoch": 0.9887298880403351, "grad_norm": 0.34915435314178467, "learning_rate": 6.666666666666667e-05, "loss": 0.4689, "step": 13335 }, { "epoch": 0.988804033513754, "grad_norm": 0.35677164793014526, "learning_rate": 6.665666416604152e-05, "loss": 0.4955, "step": 13336 }, { "epoch": 0.9888781789871728, "grad_norm": 0.35879647731781006, "learning_rate": 6.664666166541636e-05, "loss": 0.485, "step": 13337 }, { "epoch": 0.9889523244605917, "grad_norm": 0.3817184567451477, "learning_rate": 6.663665916479121e-05, "loss": 0.4868, "step": 13338 }, { "epoch": 0.9890264699340106, "grad_norm": 0.3532137870788574, "learning_rate": 6.662665666416605e-05, "loss": 0.4682, "step": 13339 }, { "epoch": 0.9891006154074293, "grad_norm": 0.3523367941379547, "learning_rate": 6.66166541635409e-05, "loss": 0.4856, "step": 13340 }, { "epoch": 0.9891747608808482, "grad_norm": 0.3707314133644104, "learning_rate": 6.660665166291573e-05, "loss": 0.4456, "step": 13341 }, { "epoch": 0.9892489063542671, "grad_norm": 0.38197094202041626, "learning_rate": 6.659664916229058e-05, "loss": 0.5111, "step": 13342 }, { "epoch": 0.9893230518276859, "grad_norm": 0.3602227568626404, "learning_rate": 6.658664666166542e-05, "loss": 0.4752, "step": 13343 }, { "epoch": 0.9893971973011048, "grad_norm": 0.3641626238822937, "learning_rate": 6.657664416104026e-05, "loss": 0.514, "step": 13344 }, { "epoch": 0.9894713427745236, "grad_norm": 0.3895972967147827, "learning_rate": 6.656664166041511e-05, "loss": 0.5279, "step": 13345 }, { "epoch": 0.9895454882479424, "grad_norm": 0.346504271030426, "learning_rate": 6.655663915978995e-05, "loss": 0.4513, "step": 13346 }, { "epoch": 0.9896196337213613, "grad_norm": 0.37569743394851685, "learning_rate": 6.65466366591648e-05, "loss": 0.537, "step": 13347 }, { "epoch": 0.9896937791947802, "grad_norm": 0.35773536562919617, "learning_rate": 6.653663415853963e-05, "loss": 0.4647, "step": 13348 }, { "epoch": 0.989767924668199, "grad_norm": 0.34471479058265686, "learning_rate": 6.652663165791448e-05, "loss": 0.4712, "step": 13349 }, { "epoch": 0.9898420701416178, "grad_norm": 0.37067222595214844, "learning_rate": 6.651662915728932e-05, "loss": 0.5024, "step": 13350 }, { "epoch": 0.9899162156150367, "grad_norm": 0.3715617060661316, "learning_rate": 6.650662665666417e-05, "loss": 0.4928, "step": 13351 }, { "epoch": 0.9899903610884555, "grad_norm": 0.4058486223220825, "learning_rate": 6.649662415603901e-05, "loss": 0.5418, "step": 13352 }, { "epoch": 0.9900645065618744, "grad_norm": 0.35476765036582947, "learning_rate": 6.648662165541386e-05, "loss": 0.4718, "step": 13353 }, { "epoch": 0.9901386520352933, "grad_norm": 0.3422802686691284, "learning_rate": 6.64766191547887e-05, "loss": 0.4666, "step": 13354 }, { "epoch": 0.990212797508712, "grad_norm": 0.357997864484787, "learning_rate": 6.646661665416355e-05, "loss": 0.4864, "step": 13355 }, { "epoch": 0.9902869429821309, "grad_norm": 0.38179028034210205, "learning_rate": 6.645661415353838e-05, "loss": 0.5186, "step": 13356 }, { "epoch": 0.9903610884555498, "grad_norm": 0.3959274888038635, "learning_rate": 6.644661165291322e-05, "loss": 0.5134, "step": 13357 }, { "epoch": 0.9904352339289686, "grad_norm": 0.35204726457595825, "learning_rate": 6.643660915228807e-05, "loss": 0.4629, "step": 13358 }, { "epoch": 0.9905093794023875, "grad_norm": 0.3433099091053009, "learning_rate": 6.642660665166291e-05, "loss": 0.4829, "step": 13359 }, { "epoch": 0.9905835248758064, "grad_norm": 0.3460026979446411, "learning_rate": 6.641660415103776e-05, "loss": 0.4441, "step": 13360 }, { "epoch": 0.9906576703492251, "grad_norm": 0.3244757652282715, "learning_rate": 6.64066016504126e-05, "loss": 0.4511, "step": 13361 }, { "epoch": 0.990731815822644, "grad_norm": 0.3950486183166504, "learning_rate": 6.639659914978745e-05, "loss": 0.5777, "step": 13362 }, { "epoch": 0.9908059612960629, "grad_norm": 0.3671734929084778, "learning_rate": 6.63865966491623e-05, "loss": 0.5181, "step": 13363 }, { "epoch": 0.9908801067694817, "grad_norm": 0.3620392680168152, "learning_rate": 6.637659414853714e-05, "loss": 0.527, "step": 13364 }, { "epoch": 0.9909542522429006, "grad_norm": 0.3556441068649292, "learning_rate": 6.636659164791199e-05, "loss": 0.4646, "step": 13365 }, { "epoch": 0.9910283977163195, "grad_norm": 0.3488537669181824, "learning_rate": 6.635658914728682e-05, "loss": 0.4561, "step": 13366 }, { "epoch": 0.9911025431897382, "grad_norm": 0.35685378313064575, "learning_rate": 6.634658664666167e-05, "loss": 0.4601, "step": 13367 }, { "epoch": 0.9911766886631571, "grad_norm": 0.3692108690738678, "learning_rate": 6.633658414603652e-05, "loss": 0.5042, "step": 13368 }, { "epoch": 0.991250834136576, "grad_norm": 0.35115760564804077, "learning_rate": 6.632658164541136e-05, "loss": 0.4819, "step": 13369 }, { "epoch": 0.9913249796099948, "grad_norm": 0.3897306025028229, "learning_rate": 6.63165791447862e-05, "loss": 0.5202, "step": 13370 }, { "epoch": 0.9913991250834137, "grad_norm": 0.3568287193775177, "learning_rate": 6.630657664416105e-05, "loss": 0.4392, "step": 13371 }, { "epoch": 0.9914732705568325, "grad_norm": 0.34615349769592285, "learning_rate": 6.629657414353589e-05, "loss": 0.4511, "step": 13372 }, { "epoch": 0.9915474160302513, "grad_norm": 0.37936145067214966, "learning_rate": 6.628657164291074e-05, "loss": 0.5224, "step": 13373 }, { "epoch": 0.9916215615036702, "grad_norm": 0.37822842597961426, "learning_rate": 6.627656914228557e-05, "loss": 0.5098, "step": 13374 }, { "epoch": 0.9916957069770891, "grad_norm": 0.34743472933769226, "learning_rate": 6.626656664166042e-05, "loss": 0.4736, "step": 13375 }, { "epoch": 0.9917698524505079, "grad_norm": 0.37111538648605347, "learning_rate": 6.625656414103526e-05, "loss": 0.4986, "step": 13376 }, { "epoch": 0.9918439979239267, "grad_norm": 0.3866577744483948, "learning_rate": 6.624656164041011e-05, "loss": 0.5081, "step": 13377 }, { "epoch": 0.9919181433973456, "grad_norm": 0.38228780031204224, "learning_rate": 6.623655913978495e-05, "loss": 0.5115, "step": 13378 }, { "epoch": 0.9919922888707644, "grad_norm": 0.3402661085128784, "learning_rate": 6.62265566391598e-05, "loss": 0.4692, "step": 13379 }, { "epoch": 0.9920664343441833, "grad_norm": 0.3541297912597656, "learning_rate": 6.621655413853464e-05, "loss": 0.4692, "step": 13380 }, { "epoch": 0.9921405798176022, "grad_norm": 0.3694072663784027, "learning_rate": 6.620655163790947e-05, "loss": 0.5109, "step": 13381 }, { "epoch": 0.992214725291021, "grad_norm": 0.3650653660297394, "learning_rate": 6.619654913728432e-05, "loss": 0.475, "step": 13382 }, { "epoch": 0.9922888707644398, "grad_norm": 0.36752772331237793, "learning_rate": 6.618654663665916e-05, "loss": 0.4986, "step": 13383 }, { "epoch": 0.9923630162378587, "grad_norm": 0.3691200315952301, "learning_rate": 6.617654413603401e-05, "loss": 0.5246, "step": 13384 }, { "epoch": 0.9924371617112775, "grad_norm": 0.3962826430797577, "learning_rate": 6.616654163540885e-05, "loss": 0.534, "step": 13385 }, { "epoch": 0.9925113071846964, "grad_norm": 0.36148518323898315, "learning_rate": 6.61565391347837e-05, "loss": 0.4886, "step": 13386 }, { "epoch": 0.9925854526581153, "grad_norm": 0.3610318899154663, "learning_rate": 6.614653663415854e-05, "loss": 0.4834, "step": 13387 }, { "epoch": 0.992659598131534, "grad_norm": 0.3597036898136139, "learning_rate": 6.613653413353339e-05, "loss": 0.4697, "step": 13388 }, { "epoch": 0.9927337436049529, "grad_norm": 0.3732532560825348, "learning_rate": 6.612653163290823e-05, "loss": 0.5043, "step": 13389 }, { "epoch": 0.9928078890783718, "grad_norm": 0.3769145905971527, "learning_rate": 6.611652913228308e-05, "loss": 0.4859, "step": 13390 }, { "epoch": 0.9928820345517906, "grad_norm": 0.3590293824672699, "learning_rate": 6.610652663165791e-05, "loss": 0.4806, "step": 13391 }, { "epoch": 0.9929561800252095, "grad_norm": 0.3689097762107849, "learning_rate": 6.609652413103276e-05, "loss": 0.5142, "step": 13392 }, { "epoch": 0.9930303254986284, "grad_norm": 0.33285483717918396, "learning_rate": 6.60865216304076e-05, "loss": 0.4495, "step": 13393 }, { "epoch": 0.9931044709720471, "grad_norm": 0.38150855898857117, "learning_rate": 6.607651912978244e-05, "loss": 0.5035, "step": 13394 }, { "epoch": 0.993178616445466, "grad_norm": 0.3458050787448883, "learning_rate": 6.606651662915729e-05, "loss": 0.4844, "step": 13395 }, { "epoch": 0.9932527619188849, "grad_norm": 0.39404940605163574, "learning_rate": 6.605651412853213e-05, "loss": 0.5179, "step": 13396 }, { "epoch": 0.9933269073923037, "grad_norm": 0.3682429790496826, "learning_rate": 6.604651162790698e-05, "loss": 0.5029, "step": 13397 }, { "epoch": 0.9934010528657226, "grad_norm": 0.3510531485080719, "learning_rate": 6.603650912728183e-05, "loss": 0.4771, "step": 13398 }, { "epoch": 0.9934751983391414, "grad_norm": 0.37355518341064453, "learning_rate": 6.602650662665666e-05, "loss": 0.5336, "step": 13399 }, { "epoch": 0.9935493438125602, "grad_norm": 0.33697688579559326, "learning_rate": 6.601650412603151e-05, "loss": 0.4566, "step": 13400 }, { "epoch": 0.9936234892859791, "grad_norm": 0.3447900116443634, "learning_rate": 6.600650162540637e-05, "loss": 0.4404, "step": 13401 }, { "epoch": 0.993697634759398, "grad_norm": 0.3480142652988434, "learning_rate": 6.59964991247812e-05, "loss": 0.4742, "step": 13402 }, { "epoch": 0.9937717802328168, "grad_norm": 0.33421558141708374, "learning_rate": 6.598649662415605e-05, "loss": 0.4429, "step": 13403 }, { "epoch": 0.9938459257062356, "grad_norm": 0.3950321674346924, "learning_rate": 6.597649412353089e-05, "loss": 0.5133, "step": 13404 }, { "epoch": 0.9939200711796545, "grad_norm": 0.33959534764289856, "learning_rate": 6.596649162290574e-05, "loss": 0.4706, "step": 13405 }, { "epoch": 0.9939942166530733, "grad_norm": 0.37648579478263855, "learning_rate": 6.595648912228058e-05, "loss": 0.4816, "step": 13406 }, { "epoch": 0.9940683621264922, "grad_norm": 0.34431734681129456, "learning_rate": 6.594648662165541e-05, "loss": 0.4706, "step": 13407 }, { "epoch": 0.9941425075999111, "grad_norm": 0.3547893464565277, "learning_rate": 6.593648412103027e-05, "loss": 0.474, "step": 13408 }, { "epoch": 0.9942166530733298, "grad_norm": 0.3744117021560669, "learning_rate": 6.59264816204051e-05, "loss": 0.5359, "step": 13409 }, { "epoch": 0.9942907985467487, "grad_norm": 0.3868696987628937, "learning_rate": 6.591647911977995e-05, "loss": 0.5107, "step": 13410 }, { "epoch": 0.9943649440201676, "grad_norm": 0.33039581775665283, "learning_rate": 6.590647661915479e-05, "loss": 0.4272, "step": 13411 }, { "epoch": 0.9944390894935864, "grad_norm": 0.35105910897254944, "learning_rate": 6.589647411852964e-05, "loss": 0.4555, "step": 13412 }, { "epoch": 0.9945132349670053, "grad_norm": 0.3617892563343048, "learning_rate": 6.588647161790448e-05, "loss": 0.4717, "step": 13413 }, { "epoch": 0.9945873804404242, "grad_norm": 0.34675416350364685, "learning_rate": 6.587646911727933e-05, "loss": 0.4683, "step": 13414 }, { "epoch": 0.9946615259138429, "grad_norm": 0.3652799129486084, "learning_rate": 6.586646661665417e-05, "loss": 0.4846, "step": 13415 }, { "epoch": 0.9947356713872618, "grad_norm": 0.3582814335823059, "learning_rate": 6.585646411602902e-05, "loss": 0.4635, "step": 13416 }, { "epoch": 0.9948098168606807, "grad_norm": 0.3736548125743866, "learning_rate": 6.584646161540385e-05, "loss": 0.5037, "step": 13417 }, { "epoch": 0.9948839623340995, "grad_norm": 0.39326637983322144, "learning_rate": 6.583645911477869e-05, "loss": 0.5318, "step": 13418 }, { "epoch": 0.9949581078075184, "grad_norm": 0.33008214831352234, "learning_rate": 6.582645661415354e-05, "loss": 0.4518, "step": 13419 }, { "epoch": 0.9950322532809373, "grad_norm": 0.36750754714012146, "learning_rate": 6.581645411352838e-05, "loss": 0.5075, "step": 13420 }, { "epoch": 0.995106398754356, "grad_norm": 0.34826475381851196, "learning_rate": 6.580645161290323e-05, "loss": 0.5122, "step": 13421 }, { "epoch": 0.9951805442277749, "grad_norm": 0.39329656958580017, "learning_rate": 6.579644911227807e-05, "loss": 0.5042, "step": 13422 }, { "epoch": 0.9952546897011938, "grad_norm": 0.34444957971572876, "learning_rate": 6.578644661165292e-05, "loss": 0.4321, "step": 13423 }, { "epoch": 0.9953288351746126, "grad_norm": 0.36519843339920044, "learning_rate": 6.577644411102775e-05, "loss": 0.5154, "step": 13424 }, { "epoch": 0.9954029806480315, "grad_norm": 0.3635964095592499, "learning_rate": 6.57664416104026e-05, "loss": 0.4862, "step": 13425 }, { "epoch": 0.9954771261214503, "grad_norm": 0.3791328966617584, "learning_rate": 6.575643910977744e-05, "loss": 0.4722, "step": 13426 }, { "epoch": 0.9955512715948691, "grad_norm": 0.3332778215408325, "learning_rate": 6.574643660915229e-05, "loss": 0.4714, "step": 13427 }, { "epoch": 0.995625417068288, "grad_norm": 0.3531191945075989, "learning_rate": 6.573643410852713e-05, "loss": 0.4847, "step": 13428 }, { "epoch": 0.9956995625417069, "grad_norm": 0.3327026069164276, "learning_rate": 6.572643160790198e-05, "loss": 0.4436, "step": 13429 }, { "epoch": 0.9957737080151257, "grad_norm": 0.34301772713661194, "learning_rate": 6.571642910727682e-05, "loss": 0.4505, "step": 13430 }, { "epoch": 0.9958478534885445, "grad_norm": 0.3580775558948517, "learning_rate": 6.570642660665167e-05, "loss": 0.4546, "step": 13431 }, { "epoch": 0.9959219989619633, "grad_norm": 0.3394424319267273, "learning_rate": 6.56964241060265e-05, "loss": 0.4674, "step": 13432 }, { "epoch": 0.9959961444353822, "grad_norm": 0.36407947540283203, "learning_rate": 6.568642160540136e-05, "loss": 0.4901, "step": 13433 }, { "epoch": 0.9960702899088011, "grad_norm": 0.3444095551967621, "learning_rate": 6.567641910477619e-05, "loss": 0.4561, "step": 13434 }, { "epoch": 0.9961444353822199, "grad_norm": 0.40377774834632874, "learning_rate": 6.566641660415104e-05, "loss": 0.5264, "step": 13435 }, { "epoch": 0.9962185808556387, "grad_norm": 0.3565199673175812, "learning_rate": 6.56564141035259e-05, "loss": 0.4786, "step": 13436 }, { "epoch": 0.9962927263290576, "grad_norm": 0.40221109986305237, "learning_rate": 6.564641160290073e-05, "loss": 0.5413, "step": 13437 }, { "epoch": 0.9963668718024764, "grad_norm": 0.34231093525886536, "learning_rate": 6.563640910227558e-05, "loss": 0.4631, "step": 13438 }, { "epoch": 0.9964410172758953, "grad_norm": 0.38707658648490906, "learning_rate": 6.562640660165042e-05, "loss": 0.5039, "step": 13439 }, { "epoch": 0.9965151627493142, "grad_norm": 0.3607996702194214, "learning_rate": 6.561640410102527e-05, "loss": 0.5058, "step": 13440 }, { "epoch": 0.996589308222733, "grad_norm": 0.34667518734931946, "learning_rate": 6.56064016004001e-05, "loss": 0.4532, "step": 13441 }, { "epoch": 0.9966634536961518, "grad_norm": 0.3494761884212494, "learning_rate": 6.559639909977496e-05, "loss": 0.4609, "step": 13442 }, { "epoch": 0.9967375991695707, "grad_norm": 0.3785703778266907, "learning_rate": 6.55863965991498e-05, "loss": 0.515, "step": 13443 }, { "epoch": 0.9968117446429895, "grad_norm": 0.3609180152416229, "learning_rate": 6.557639409852463e-05, "loss": 0.4841, "step": 13444 }, { "epoch": 0.9968858901164084, "grad_norm": 0.35714489221572876, "learning_rate": 6.556639159789948e-05, "loss": 0.4898, "step": 13445 }, { "epoch": 0.9969600355898273, "grad_norm": 0.35183265805244446, "learning_rate": 6.555638909727432e-05, "loss": 0.466, "step": 13446 }, { "epoch": 0.997034181063246, "grad_norm": 0.3469674289226532, "learning_rate": 6.554638659664917e-05, "loss": 0.5005, "step": 13447 }, { "epoch": 0.9971083265366649, "grad_norm": 0.34997764229774475, "learning_rate": 6.5536384096024e-05, "loss": 0.4739, "step": 13448 }, { "epoch": 0.9971824720100838, "grad_norm": 0.36499303579330444, "learning_rate": 6.552638159539886e-05, "loss": 0.4883, "step": 13449 }, { "epoch": 0.9972566174835026, "grad_norm": 0.3427310585975647, "learning_rate": 6.55163790947737e-05, "loss": 0.4706, "step": 13450 }, { "epoch": 0.9973307629569215, "grad_norm": 0.3979019522666931, "learning_rate": 6.550637659414854e-05, "loss": 0.5297, "step": 13451 }, { "epoch": 0.9974049084303404, "grad_norm": 0.370298832654953, "learning_rate": 6.549637409352338e-05, "loss": 0.4802, "step": 13452 }, { "epoch": 0.9974790539037591, "grad_norm": 0.3410465121269226, "learning_rate": 6.548637159289823e-05, "loss": 0.4695, "step": 13453 }, { "epoch": 0.997553199377178, "grad_norm": 0.36313340067863464, "learning_rate": 6.547636909227307e-05, "loss": 0.486, "step": 13454 }, { "epoch": 0.9976273448505969, "grad_norm": 0.3834725022315979, "learning_rate": 6.54663665916479e-05, "loss": 0.5089, "step": 13455 }, { "epoch": 0.9977014903240157, "grad_norm": 0.3414956331253052, "learning_rate": 6.545636409102276e-05, "loss": 0.4577, "step": 13456 }, { "epoch": 0.9977756357974346, "grad_norm": 0.35464394092559814, "learning_rate": 6.54463615903976e-05, "loss": 0.4773, "step": 13457 }, { "epoch": 0.9978497812708534, "grad_norm": 0.3629676401615143, "learning_rate": 6.543635908977245e-05, "loss": 0.4679, "step": 13458 }, { "epoch": 0.9979239267442722, "grad_norm": 0.34362471103668213, "learning_rate": 6.542635658914728e-05, "loss": 0.4714, "step": 13459 }, { "epoch": 0.9979980722176911, "grad_norm": 0.3895672857761383, "learning_rate": 6.541635408852213e-05, "loss": 0.5098, "step": 13460 }, { "epoch": 0.99807221769111, "grad_norm": 0.3568822145462036, "learning_rate": 6.540635158789697e-05, "loss": 0.4711, "step": 13461 }, { "epoch": 0.9981463631645288, "grad_norm": 0.376465767621994, "learning_rate": 6.539634908727182e-05, "loss": 0.4924, "step": 13462 }, { "epoch": 0.9982205086379476, "grad_norm": 0.37607720494270325, "learning_rate": 6.538634658664666e-05, "loss": 0.4902, "step": 13463 }, { "epoch": 0.9982946541113665, "grad_norm": 0.34647566080093384, "learning_rate": 6.537634408602151e-05, "loss": 0.4767, "step": 13464 }, { "epoch": 0.9983687995847853, "grad_norm": 0.37529563903808594, "learning_rate": 6.536634158539635e-05, "loss": 0.5073, "step": 13465 }, { "epoch": 0.9984429450582042, "grad_norm": 0.3607476055622101, "learning_rate": 6.53563390847712e-05, "loss": 0.4596, "step": 13466 }, { "epoch": 0.9985170905316231, "grad_norm": 0.3624010682106018, "learning_rate": 6.534633658414603e-05, "loss": 0.4771, "step": 13467 }, { "epoch": 0.9985912360050418, "grad_norm": 0.3591022491455078, "learning_rate": 6.533633408352088e-05, "loss": 0.4726, "step": 13468 }, { "epoch": 0.9986653814784607, "grad_norm": 0.3864991366863251, "learning_rate": 6.532633158289573e-05, "loss": 0.4983, "step": 13469 }, { "epoch": 0.9987395269518796, "grad_norm": 0.3834711015224457, "learning_rate": 6.531632908227057e-05, "loss": 0.4735, "step": 13470 }, { "epoch": 0.9988136724252984, "grad_norm": 0.3419872224330902, "learning_rate": 6.530632658164542e-05, "loss": 0.4671, "step": 13471 }, { "epoch": 0.9988878178987173, "grad_norm": 0.37881729006767273, "learning_rate": 6.529632408102026e-05, "loss": 0.4935, "step": 13472 }, { "epoch": 0.9989619633721362, "grad_norm": 0.3822660446166992, "learning_rate": 6.528632158039511e-05, "loss": 0.5103, "step": 13473 }, { "epoch": 0.9990361088455549, "grad_norm": 0.37027862668037415, "learning_rate": 6.527631907976995e-05, "loss": 0.4907, "step": 13474 }, { "epoch": 0.9991102543189738, "grad_norm": 0.3413994014263153, "learning_rate": 6.52663165791448e-05, "loss": 0.4374, "step": 13475 }, { "epoch": 0.9991843997923927, "grad_norm": 0.3614973723888397, "learning_rate": 6.525631407851963e-05, "loss": 0.4981, "step": 13476 }, { "epoch": 0.9992585452658115, "grad_norm": 0.3491631746292114, "learning_rate": 6.524631157789449e-05, "loss": 0.4856, "step": 13477 }, { "epoch": 0.9993326907392304, "grad_norm": 0.4087764024734497, "learning_rate": 6.523630907726932e-05, "loss": 0.5492, "step": 13478 }, { "epoch": 0.9994068362126493, "grad_norm": 0.3633492588996887, "learning_rate": 6.522630657664417e-05, "loss": 0.5145, "step": 13479 }, { "epoch": 0.999480981686068, "grad_norm": 0.36459317803382874, "learning_rate": 6.521630407601901e-05, "loss": 0.5188, "step": 13480 }, { "epoch": 0.9995551271594869, "grad_norm": 0.349263459444046, "learning_rate": 6.520630157539385e-05, "loss": 0.4694, "step": 13481 }, { "epoch": 0.9996292726329058, "grad_norm": 0.35959890484809875, "learning_rate": 6.51962990747687e-05, "loss": 0.504, "step": 13482 }, { "epoch": 0.9997034181063246, "grad_norm": 0.3503957688808441, "learning_rate": 6.518629657414353e-05, "loss": 0.4316, "step": 13483 }, { "epoch": 0.9997775635797435, "grad_norm": 0.3546396493911743, "learning_rate": 6.517629407351839e-05, "loss": 0.4214, "step": 13484 }, { "epoch": 0.9998517090531623, "grad_norm": 0.34611621499061584, "learning_rate": 6.516629157289322e-05, "loss": 0.4579, "step": 13485 }, { "epoch": 0.9999258545265811, "grad_norm": 0.3619985282421112, "learning_rate": 6.515628907226807e-05, "loss": 0.5014, "step": 13486 }, { "epoch": 1.0, "grad_norm": 0.5177510380744934, "learning_rate": 6.514628657164291e-05, "loss": 0.5273, "step": 13487 }, { "epoch": 1.0000741454734188, "grad_norm": 0.3356313705444336, "learning_rate": 6.513628407101776e-05, "loss": 0.4491, "step": 13488 }, { "epoch": 1.0001482909468378, "grad_norm": 0.33757224678993225, "learning_rate": 6.51262815703926e-05, "loss": 0.4371, "step": 13489 }, { "epoch": 1.0002224364202565, "grad_norm": 0.38275960087776184, "learning_rate": 6.511627906976745e-05, "loss": 0.4625, "step": 13490 }, { "epoch": 1.0002965818936753, "grad_norm": 0.35669100284576416, "learning_rate": 6.510627656914229e-05, "loss": 0.4863, "step": 13491 }, { "epoch": 1.0003707273670943, "grad_norm": 0.34047701954841614, "learning_rate": 6.509627406851714e-05, "loss": 0.4369, "step": 13492 }, { "epoch": 1.000444872840513, "grad_norm": 0.34640252590179443, "learning_rate": 6.508627156789197e-05, "loss": 0.4571, "step": 13493 }, { "epoch": 1.0005190183139319, "grad_norm": 0.3708333373069763, "learning_rate": 6.507626906726681e-05, "loss": 0.4716, "step": 13494 }, { "epoch": 1.0005931637873509, "grad_norm": 0.34541258215904236, "learning_rate": 6.506626656664166e-05, "loss": 0.4412, "step": 13495 }, { "epoch": 1.0006673092607696, "grad_norm": 0.3713463842868805, "learning_rate": 6.50562640660165e-05, "loss": 0.4627, "step": 13496 }, { "epoch": 1.0007414547341884, "grad_norm": 0.3730805814266205, "learning_rate": 6.504626156539135e-05, "loss": 0.4727, "step": 13497 }, { "epoch": 1.0008156002076074, "grad_norm": 0.34651806950569153, "learning_rate": 6.503625906476619e-05, "loss": 0.4237, "step": 13498 }, { "epoch": 1.0008897456810262, "grad_norm": 0.36746618151664734, "learning_rate": 6.502625656414104e-05, "loss": 0.4765, "step": 13499 }, { "epoch": 1.000963891154445, "grad_norm": 0.34332048892974854, "learning_rate": 6.501625406351587e-05, "loss": 0.4456, "step": 13500 }, { "epoch": 1.001038036627864, "grad_norm": 0.3673732578754425, "learning_rate": 6.500625156289072e-05, "loss": 0.4586, "step": 13501 }, { "epoch": 1.0011121821012827, "grad_norm": 0.3640478253364563, "learning_rate": 6.499624906226558e-05, "loss": 0.4667, "step": 13502 }, { "epoch": 1.0011863275747015, "grad_norm": 0.3599587678909302, "learning_rate": 6.498624656164041e-05, "loss": 0.4386, "step": 13503 }, { "epoch": 1.0012604730481205, "grad_norm": 0.34722861647605896, "learning_rate": 6.497624406101526e-05, "loss": 0.4125, "step": 13504 }, { "epoch": 1.0013346185215393, "grad_norm": 0.36957505345344543, "learning_rate": 6.49662415603901e-05, "loss": 0.4542, "step": 13505 }, { "epoch": 1.001408763994958, "grad_norm": 0.3651711344718933, "learning_rate": 6.495623905976495e-05, "loss": 0.4512, "step": 13506 }, { "epoch": 1.001482909468377, "grad_norm": 0.38618773221969604, "learning_rate": 6.494623655913979e-05, "loss": 0.4902, "step": 13507 }, { "epoch": 1.0015570549417958, "grad_norm": 0.3763055205345154, "learning_rate": 6.493623405851464e-05, "loss": 0.4883, "step": 13508 }, { "epoch": 1.0016312004152146, "grad_norm": 0.3857514262199402, "learning_rate": 6.492623155788948e-05, "loss": 0.4811, "step": 13509 }, { "epoch": 1.0017053458886336, "grad_norm": 0.37486183643341064, "learning_rate": 6.491622905726433e-05, "loss": 0.4628, "step": 13510 }, { "epoch": 1.0017794913620524, "grad_norm": 0.38033509254455566, "learning_rate": 6.490622655663916e-05, "loss": 0.4792, "step": 13511 }, { "epoch": 1.0018536368354711, "grad_norm": 0.37555426359176636, "learning_rate": 6.489622405601401e-05, "loss": 0.5002, "step": 13512 }, { "epoch": 1.0019277823088901, "grad_norm": 0.35884469747543335, "learning_rate": 6.488622155538885e-05, "loss": 0.464, "step": 13513 }, { "epoch": 1.002001927782309, "grad_norm": 0.3755136728286743, "learning_rate": 6.48762190547637e-05, "loss": 0.4553, "step": 13514 }, { "epoch": 1.0020760732557277, "grad_norm": 0.3747953474521637, "learning_rate": 6.486621655413854e-05, "loss": 0.4591, "step": 13515 }, { "epoch": 1.0021502187291467, "grad_norm": 0.3587745428085327, "learning_rate": 6.485621405351339e-05, "loss": 0.4381, "step": 13516 }, { "epoch": 1.0022243642025654, "grad_norm": 0.35978469252586365, "learning_rate": 6.484621155288823e-05, "loss": 0.4547, "step": 13517 }, { "epoch": 1.0022985096759842, "grad_norm": 0.3721400797367096, "learning_rate": 6.483620905226306e-05, "loss": 0.4391, "step": 13518 }, { "epoch": 1.0023726551494032, "grad_norm": 0.3800019919872284, "learning_rate": 6.482620655163791e-05, "loss": 0.462, "step": 13519 }, { "epoch": 1.002446800622822, "grad_norm": 0.3682410717010498, "learning_rate": 6.481620405101275e-05, "loss": 0.468, "step": 13520 }, { "epoch": 1.0025209460962408, "grad_norm": 0.3698469400405884, "learning_rate": 6.48062015503876e-05, "loss": 0.4564, "step": 13521 }, { "epoch": 1.0025950915696598, "grad_norm": 0.3593471646308899, "learning_rate": 6.479619904976244e-05, "loss": 0.4473, "step": 13522 }, { "epoch": 1.0026692370430785, "grad_norm": 0.3585173785686493, "learning_rate": 6.478619654913729e-05, "loss": 0.4448, "step": 13523 }, { "epoch": 1.0027433825164973, "grad_norm": 0.3891347348690033, "learning_rate": 6.477619404851213e-05, "loss": 0.5119, "step": 13524 }, { "epoch": 1.0028175279899163, "grad_norm": 0.3764028251171112, "learning_rate": 6.476619154788698e-05, "loss": 0.4485, "step": 13525 }, { "epoch": 1.002891673463335, "grad_norm": 0.36511921882629395, "learning_rate": 6.475618904726181e-05, "loss": 0.4955, "step": 13526 }, { "epoch": 1.0029658189367539, "grad_norm": 0.3559761345386505, "learning_rate": 6.474618654663667e-05, "loss": 0.4501, "step": 13527 }, { "epoch": 1.0030399644101728, "grad_norm": 0.4042354226112366, "learning_rate": 6.47361840460115e-05, "loss": 0.4647, "step": 13528 }, { "epoch": 1.0031141098835916, "grad_norm": 0.3374786674976349, "learning_rate": 6.472618154538635e-05, "loss": 0.4387, "step": 13529 }, { "epoch": 1.0031882553570104, "grad_norm": 0.3467094898223877, "learning_rate": 6.471617904476119e-05, "loss": 0.4516, "step": 13530 }, { "epoch": 1.0032624008304294, "grad_norm": 0.3601115942001343, "learning_rate": 6.470617654413603e-05, "loss": 0.4685, "step": 13531 }, { "epoch": 1.0033365463038482, "grad_norm": 0.3511591851711273, "learning_rate": 6.469617404351088e-05, "loss": 0.4489, "step": 13532 }, { "epoch": 1.003410691777267, "grad_norm": 0.3602231740951538, "learning_rate": 6.468617154288571e-05, "loss": 0.4294, "step": 13533 }, { "epoch": 1.003484837250686, "grad_norm": 0.3564523458480835, "learning_rate": 6.467616904226057e-05, "loss": 0.4579, "step": 13534 }, { "epoch": 1.0035589827241047, "grad_norm": 0.3415050208568573, "learning_rate": 6.46661665416354e-05, "loss": 0.444, "step": 13535 }, { "epoch": 1.0036331281975235, "grad_norm": 0.34956902265548706, "learning_rate": 6.465616404101025e-05, "loss": 0.4407, "step": 13536 }, { "epoch": 1.0037072736709425, "grad_norm": 0.3589249849319458, "learning_rate": 6.46461615403851e-05, "loss": 0.4724, "step": 13537 }, { "epoch": 1.0037814191443613, "grad_norm": 0.3684578835964203, "learning_rate": 6.463615903975994e-05, "loss": 0.4623, "step": 13538 }, { "epoch": 1.00385556461778, "grad_norm": 0.3569583594799042, "learning_rate": 6.462615653913479e-05, "loss": 0.4422, "step": 13539 }, { "epoch": 1.003929710091199, "grad_norm": 0.3525838553905487, "learning_rate": 6.461615403850964e-05, "loss": 0.4444, "step": 13540 }, { "epoch": 1.0040038555646178, "grad_norm": 0.33385542035102844, "learning_rate": 6.460615153788448e-05, "loss": 0.4116, "step": 13541 }, { "epoch": 1.0040780010380366, "grad_norm": 0.3737771809101105, "learning_rate": 6.459614903725932e-05, "loss": 0.462, "step": 13542 }, { "epoch": 1.0041521465114556, "grad_norm": 0.37476664781570435, "learning_rate": 6.458614653663417e-05, "loss": 0.4711, "step": 13543 }, { "epoch": 1.0042262919848743, "grad_norm": 0.34636014699935913, "learning_rate": 6.4576144036009e-05, "loss": 0.4394, "step": 13544 }, { "epoch": 1.0043004374582931, "grad_norm": 0.34677693247795105, "learning_rate": 6.456614153538385e-05, "loss": 0.4326, "step": 13545 }, { "epoch": 1.0043745829317121, "grad_norm": 0.3637552261352539, "learning_rate": 6.455613903475869e-05, "loss": 0.4519, "step": 13546 }, { "epoch": 1.0044487284051309, "grad_norm": 0.37126827239990234, "learning_rate": 6.454613653413354e-05, "loss": 0.5034, "step": 13547 }, { "epoch": 1.0045228738785497, "grad_norm": 0.3722313940525055, "learning_rate": 6.453613403350838e-05, "loss": 0.4406, "step": 13548 }, { "epoch": 1.0045970193519687, "grad_norm": 0.3543456792831421, "learning_rate": 6.452613153288323e-05, "loss": 0.4167, "step": 13549 }, { "epoch": 1.0046711648253874, "grad_norm": 0.3477095067501068, "learning_rate": 6.451612903225807e-05, "loss": 0.4315, "step": 13550 }, { "epoch": 1.0047453102988062, "grad_norm": 0.33789345622062683, "learning_rate": 6.450612653163292e-05, "loss": 0.4305, "step": 13551 }, { "epoch": 1.0048194557722252, "grad_norm": 0.36382853984832764, "learning_rate": 6.449612403100775e-05, "loss": 0.4659, "step": 13552 }, { "epoch": 1.004893601245644, "grad_norm": 0.36278221011161804, "learning_rate": 6.44861215303826e-05, "loss": 0.4677, "step": 13553 }, { "epoch": 1.0049677467190627, "grad_norm": 0.3711426556110382, "learning_rate": 6.447611902975744e-05, "loss": 0.4904, "step": 13554 }, { "epoch": 1.0050418921924817, "grad_norm": 0.3648534417152405, "learning_rate": 6.446611652913228e-05, "loss": 0.4692, "step": 13555 }, { "epoch": 1.0051160376659005, "grad_norm": 0.36198556423187256, "learning_rate": 6.445611402850713e-05, "loss": 0.4208, "step": 13556 }, { "epoch": 1.0051901831393193, "grad_norm": 0.39191320538520813, "learning_rate": 6.444611152788197e-05, "loss": 0.4895, "step": 13557 }, { "epoch": 1.0052643286127383, "grad_norm": 0.3729378879070282, "learning_rate": 6.443610902725682e-05, "loss": 0.4555, "step": 13558 }, { "epoch": 1.005338474086157, "grad_norm": 0.39096972346305847, "learning_rate": 6.442610652663166e-05, "loss": 0.4719, "step": 13559 }, { "epoch": 1.0054126195595758, "grad_norm": 0.3793165981769562, "learning_rate": 6.44161040260065e-05, "loss": 0.4603, "step": 13560 }, { "epoch": 1.0054867650329948, "grad_norm": 0.3808535933494568, "learning_rate": 6.440610152538134e-05, "loss": 0.4782, "step": 13561 }, { "epoch": 1.0055609105064136, "grad_norm": 0.35827282071113586, "learning_rate": 6.43960990247562e-05, "loss": 0.4564, "step": 13562 }, { "epoch": 1.0056350559798324, "grad_norm": 0.38781002163887024, "learning_rate": 6.438609652413103e-05, "loss": 0.5093, "step": 13563 }, { "epoch": 1.0057092014532514, "grad_norm": 0.38553890585899353, "learning_rate": 6.437609402350588e-05, "loss": 0.4499, "step": 13564 }, { "epoch": 1.0057833469266702, "grad_norm": 0.36673179268836975, "learning_rate": 6.436609152288072e-05, "loss": 0.4628, "step": 13565 }, { "epoch": 1.005857492400089, "grad_norm": 0.36428308486938477, "learning_rate": 6.435608902225557e-05, "loss": 0.4319, "step": 13566 }, { "epoch": 1.005931637873508, "grad_norm": 0.34594041109085083, "learning_rate": 6.43460865216304e-05, "loss": 0.4367, "step": 13567 }, { "epoch": 1.0060057833469267, "grad_norm": 0.37107452750205994, "learning_rate": 6.433608402100524e-05, "loss": 0.4831, "step": 13568 }, { "epoch": 1.0060799288203455, "grad_norm": 0.39089953899383545, "learning_rate": 6.43260815203801e-05, "loss": 0.5286, "step": 13569 }, { "epoch": 1.0061540742937645, "grad_norm": 0.3998761773109436, "learning_rate": 6.431607901975494e-05, "loss": 0.5007, "step": 13570 }, { "epoch": 1.0062282197671832, "grad_norm": 0.3771378993988037, "learning_rate": 6.430607651912978e-05, "loss": 0.4772, "step": 13571 }, { "epoch": 1.006302365240602, "grad_norm": 0.3880189061164856, "learning_rate": 6.429607401850463e-05, "loss": 0.505, "step": 13572 }, { "epoch": 1.006376510714021, "grad_norm": 0.34396877884864807, "learning_rate": 6.428607151787947e-05, "loss": 0.4071, "step": 13573 }, { "epoch": 1.0064506561874398, "grad_norm": 0.3530159890651703, "learning_rate": 6.427606901725432e-05, "loss": 0.4262, "step": 13574 }, { "epoch": 1.0065248016608586, "grad_norm": 0.36411726474761963, "learning_rate": 6.426606651662917e-05, "loss": 0.425, "step": 13575 }, { "epoch": 1.0065989471342776, "grad_norm": 0.377349853515625, "learning_rate": 6.425606401600401e-05, "loss": 0.472, "step": 13576 }, { "epoch": 1.0066730926076963, "grad_norm": 0.3865800201892853, "learning_rate": 6.424606151537886e-05, "loss": 0.482, "step": 13577 }, { "epoch": 1.006747238081115, "grad_norm": 0.36254534125328064, "learning_rate": 6.42360590147537e-05, "loss": 0.4399, "step": 13578 }, { "epoch": 1.006821383554534, "grad_norm": 0.3802310526371002, "learning_rate": 6.422605651412853e-05, "loss": 0.4797, "step": 13579 }, { "epoch": 1.0068955290279529, "grad_norm": 0.3471031188964844, "learning_rate": 6.421605401350338e-05, "loss": 0.4263, "step": 13580 }, { "epoch": 1.0069696745013716, "grad_norm": 0.39927938580513, "learning_rate": 6.420605151287822e-05, "loss": 0.4902, "step": 13581 }, { "epoch": 1.0070438199747906, "grad_norm": 0.37001854181289673, "learning_rate": 6.419604901225307e-05, "loss": 0.4617, "step": 13582 }, { "epoch": 1.0071179654482094, "grad_norm": 0.3802953064441681, "learning_rate": 6.418604651162791e-05, "loss": 0.4657, "step": 13583 }, { "epoch": 1.0071921109216282, "grad_norm": 0.36678048968315125, "learning_rate": 6.417604401100276e-05, "loss": 0.4863, "step": 13584 }, { "epoch": 1.0072662563950472, "grad_norm": 0.40228790044784546, "learning_rate": 6.41660415103776e-05, "loss": 0.462, "step": 13585 }, { "epoch": 1.007340401868466, "grad_norm": 0.35971906781196594, "learning_rate": 6.415603900975245e-05, "loss": 0.4481, "step": 13586 }, { "epoch": 1.0074145473418847, "grad_norm": 0.3627149164676666, "learning_rate": 6.414603650912728e-05, "loss": 0.4207, "step": 13587 }, { "epoch": 1.0074886928153037, "grad_norm": 0.3667859137058258, "learning_rate": 6.413603400850213e-05, "loss": 0.4605, "step": 13588 }, { "epoch": 1.0075628382887225, "grad_norm": 0.3955632150173187, "learning_rate": 6.412603150787697e-05, "loss": 0.4731, "step": 13589 }, { "epoch": 1.0076369837621413, "grad_norm": 0.39457643032073975, "learning_rate": 6.411602900725182e-05, "loss": 0.4927, "step": 13590 }, { "epoch": 1.0077111292355603, "grad_norm": 0.39648959040641785, "learning_rate": 6.410602650662666e-05, "loss": 0.4982, "step": 13591 }, { "epoch": 1.007785274708979, "grad_norm": 0.37107527256011963, "learning_rate": 6.40960240060015e-05, "loss": 0.4532, "step": 13592 }, { "epoch": 1.0078594201823978, "grad_norm": 0.3620554208755493, "learning_rate": 6.408602150537635e-05, "loss": 0.4295, "step": 13593 }, { "epoch": 1.0079335656558168, "grad_norm": 0.36949625611305237, "learning_rate": 6.407601900475118e-05, "loss": 0.4569, "step": 13594 }, { "epoch": 1.0080077111292356, "grad_norm": 0.36603838205337524, "learning_rate": 6.406601650412603e-05, "loss": 0.4648, "step": 13595 }, { "epoch": 1.0080818566026544, "grad_norm": 0.3788157105445862, "learning_rate": 6.405601400350087e-05, "loss": 0.4908, "step": 13596 }, { "epoch": 1.0081560020760734, "grad_norm": 0.42225417494773865, "learning_rate": 6.404601150287572e-05, "loss": 0.4836, "step": 13597 }, { "epoch": 1.0082301475494921, "grad_norm": 0.3820217549800873, "learning_rate": 6.403600900225056e-05, "loss": 0.4951, "step": 13598 }, { "epoch": 1.008304293022911, "grad_norm": 0.3748120665550232, "learning_rate": 6.402600650162541e-05, "loss": 0.49, "step": 13599 }, { "epoch": 1.00837843849633, "grad_norm": 0.3656558394432068, "learning_rate": 6.401600400100025e-05, "loss": 0.4506, "step": 13600 }, { "epoch": 1.0084525839697487, "grad_norm": 0.34536442160606384, "learning_rate": 6.40060015003751e-05, "loss": 0.4297, "step": 13601 }, { "epoch": 1.0085267294431675, "grad_norm": 0.3781962990760803, "learning_rate": 6.399599899974993e-05, "loss": 0.4895, "step": 13602 }, { "epoch": 1.0086008749165862, "grad_norm": 0.35506096482276917, "learning_rate": 6.398599649912479e-05, "loss": 0.4419, "step": 13603 }, { "epoch": 1.0086750203900052, "grad_norm": 0.37135085463523865, "learning_rate": 6.397599399849962e-05, "loss": 0.4445, "step": 13604 }, { "epoch": 1.008749165863424, "grad_norm": 0.37814927101135254, "learning_rate": 6.396599149787447e-05, "loss": 0.4667, "step": 13605 }, { "epoch": 1.0088233113368428, "grad_norm": 0.35279279947280884, "learning_rate": 6.395598899724931e-05, "loss": 0.4213, "step": 13606 }, { "epoch": 1.0088974568102618, "grad_norm": 0.3585694134235382, "learning_rate": 6.394598649662416e-05, "loss": 0.4634, "step": 13607 }, { "epoch": 1.0089716022836805, "grad_norm": 0.37978506088256836, "learning_rate": 6.393598399599901e-05, "loss": 0.4854, "step": 13608 }, { "epoch": 1.0090457477570993, "grad_norm": 0.36171385645866394, "learning_rate": 6.392598149537385e-05, "loss": 0.4249, "step": 13609 }, { "epoch": 1.0091198932305183, "grad_norm": 0.381754994392395, "learning_rate": 6.39159789947487e-05, "loss": 0.4795, "step": 13610 }, { "epoch": 1.009194038703937, "grad_norm": 0.37552109360694885, "learning_rate": 6.390597649412354e-05, "loss": 0.4824, "step": 13611 }, { "epoch": 1.0092681841773559, "grad_norm": 0.37661677598953247, "learning_rate": 6.389597399349839e-05, "loss": 0.4622, "step": 13612 }, { "epoch": 1.0093423296507749, "grad_norm": 0.37310683727264404, "learning_rate": 6.388597149287322e-05, "loss": 0.4534, "step": 13613 }, { "epoch": 1.0094164751241936, "grad_norm": 0.4041929543018341, "learning_rate": 6.387596899224807e-05, "loss": 0.5215, "step": 13614 }, { "epoch": 1.0094906205976124, "grad_norm": 0.35426509380340576, "learning_rate": 6.386596649162291e-05, "loss": 0.4572, "step": 13615 }, { "epoch": 1.0095647660710314, "grad_norm": 0.3803214728832245, "learning_rate": 6.385596399099775e-05, "loss": 0.4644, "step": 13616 }, { "epoch": 1.0096389115444502, "grad_norm": 0.3530060052871704, "learning_rate": 6.38459614903726e-05, "loss": 0.442, "step": 13617 }, { "epoch": 1.009713057017869, "grad_norm": 0.3840217888355255, "learning_rate": 6.383595898974744e-05, "loss": 0.4716, "step": 13618 }, { "epoch": 1.009787202491288, "grad_norm": 0.3717573881149292, "learning_rate": 6.382595648912229e-05, "loss": 0.4741, "step": 13619 }, { "epoch": 1.0098613479647067, "grad_norm": 0.3940744996070862, "learning_rate": 6.381595398849712e-05, "loss": 0.4807, "step": 13620 }, { "epoch": 1.0099354934381255, "grad_norm": 0.37957990169525146, "learning_rate": 6.380595148787197e-05, "loss": 0.4622, "step": 13621 }, { "epoch": 1.0100096389115445, "grad_norm": 0.3674151599407196, "learning_rate": 6.379594898724681e-05, "loss": 0.4807, "step": 13622 }, { "epoch": 1.0100837843849633, "grad_norm": 0.39042556285858154, "learning_rate": 6.378594648662166e-05, "loss": 0.5329, "step": 13623 }, { "epoch": 1.010157929858382, "grad_norm": 0.34596627950668335, "learning_rate": 6.37759439859965e-05, "loss": 0.4347, "step": 13624 }, { "epoch": 1.010232075331801, "grad_norm": 0.35879215598106384, "learning_rate": 6.376594148537135e-05, "loss": 0.4558, "step": 13625 }, { "epoch": 1.0103062208052198, "grad_norm": 0.3814578056335449, "learning_rate": 6.375593898474619e-05, "loss": 0.4957, "step": 13626 }, { "epoch": 1.0103803662786386, "grad_norm": 0.3667789101600647, "learning_rate": 6.374593648412104e-05, "loss": 0.4632, "step": 13627 }, { "epoch": 1.0104545117520576, "grad_norm": 0.352777898311615, "learning_rate": 6.373593398349588e-05, "loss": 0.4417, "step": 13628 }, { "epoch": 1.0105286572254764, "grad_norm": 0.3344837725162506, "learning_rate": 6.372593148287071e-05, "loss": 0.4451, "step": 13629 }, { "epoch": 1.0106028026988951, "grad_norm": 0.3755112290382385, "learning_rate": 6.371592898224556e-05, "loss": 0.4552, "step": 13630 }, { "epoch": 1.0106769481723141, "grad_norm": 0.3940955400466919, "learning_rate": 6.37059264816204e-05, "loss": 0.4948, "step": 13631 }, { "epoch": 1.010751093645733, "grad_norm": 0.36511722207069397, "learning_rate": 6.369592398099525e-05, "loss": 0.4502, "step": 13632 }, { "epoch": 1.0108252391191517, "grad_norm": 0.3665062189102173, "learning_rate": 6.368592148037009e-05, "loss": 0.454, "step": 13633 }, { "epoch": 1.0108993845925707, "grad_norm": 0.39233243465423584, "learning_rate": 6.367591897974494e-05, "loss": 0.4999, "step": 13634 }, { "epoch": 1.0109735300659894, "grad_norm": 0.3768373727798462, "learning_rate": 6.366591647911978e-05, "loss": 0.4563, "step": 13635 }, { "epoch": 1.0110476755394082, "grad_norm": 0.3744276165962219, "learning_rate": 6.365591397849463e-05, "loss": 0.41, "step": 13636 }, { "epoch": 1.0111218210128272, "grad_norm": 0.3610749840736389, "learning_rate": 6.364591147786946e-05, "loss": 0.437, "step": 13637 }, { "epoch": 1.011195966486246, "grad_norm": 0.39171692728996277, "learning_rate": 6.363590897724431e-05, "loss": 0.4849, "step": 13638 }, { "epoch": 1.0112701119596648, "grad_norm": 0.36745786666870117, "learning_rate": 6.362590647661915e-05, "loss": 0.4724, "step": 13639 }, { "epoch": 1.0113442574330838, "grad_norm": 0.37751099467277527, "learning_rate": 6.3615903975994e-05, "loss": 0.4433, "step": 13640 }, { "epoch": 1.0114184029065025, "grad_norm": 0.36490926146507263, "learning_rate": 6.360590147536884e-05, "loss": 0.4531, "step": 13641 }, { "epoch": 1.0114925483799213, "grad_norm": 0.3599764406681061, "learning_rate": 6.359589897474369e-05, "loss": 0.4317, "step": 13642 }, { "epoch": 1.0115666938533403, "grad_norm": 0.40068385004997253, "learning_rate": 6.358589647411854e-05, "loss": 0.5103, "step": 13643 }, { "epoch": 1.011640839326759, "grad_norm": 0.35099688172340393, "learning_rate": 6.357589397349338e-05, "loss": 0.4571, "step": 13644 }, { "epoch": 1.0117149848001779, "grad_norm": 0.35608819127082825, "learning_rate": 6.356589147286823e-05, "loss": 0.4416, "step": 13645 }, { "epoch": 1.0117891302735968, "grad_norm": 0.36787471175193787, "learning_rate": 6.355588897224306e-05, "loss": 0.4584, "step": 13646 }, { "epoch": 1.0118632757470156, "grad_norm": 0.35336923599243164, "learning_rate": 6.354588647161792e-05, "loss": 0.4448, "step": 13647 }, { "epoch": 1.0119374212204344, "grad_norm": 0.3598252236843109, "learning_rate": 6.353588397099275e-05, "loss": 0.4544, "step": 13648 }, { "epoch": 1.0120115666938534, "grad_norm": 0.36212438344955444, "learning_rate": 6.35258814703676e-05, "loss": 0.4512, "step": 13649 }, { "epoch": 1.0120857121672722, "grad_norm": 0.3568609356880188, "learning_rate": 6.351587896974244e-05, "loss": 0.4407, "step": 13650 }, { "epoch": 1.012159857640691, "grad_norm": 0.37657734751701355, "learning_rate": 6.350587646911729e-05, "loss": 0.5253, "step": 13651 }, { "epoch": 1.01223400311411, "grad_norm": 0.3465058505535126, "learning_rate": 6.349587396849213e-05, "loss": 0.4356, "step": 13652 }, { "epoch": 1.0123081485875287, "grad_norm": 0.35692861676216125, "learning_rate": 6.348587146786698e-05, "loss": 0.4576, "step": 13653 }, { "epoch": 1.0123822940609475, "grad_norm": 0.36896854639053345, "learning_rate": 6.347586896724182e-05, "loss": 0.4775, "step": 13654 }, { "epoch": 1.0124564395343665, "grad_norm": 0.36190706491470337, "learning_rate": 6.346586646661665e-05, "loss": 0.4598, "step": 13655 }, { "epoch": 1.0125305850077853, "grad_norm": 0.3556760549545288, "learning_rate": 6.34558639659915e-05, "loss": 0.4705, "step": 13656 }, { "epoch": 1.012604730481204, "grad_norm": 0.3966600000858307, "learning_rate": 6.344586146536634e-05, "loss": 0.457, "step": 13657 }, { "epoch": 1.012678875954623, "grad_norm": 0.3591488301753998, "learning_rate": 6.343585896474119e-05, "loss": 0.454, "step": 13658 }, { "epoch": 1.0127530214280418, "grad_norm": 0.3541303873062134, "learning_rate": 6.342585646411603e-05, "loss": 0.4383, "step": 13659 }, { "epoch": 1.0128271669014606, "grad_norm": 0.36725398898124695, "learning_rate": 6.341585396349088e-05, "loss": 0.4594, "step": 13660 }, { "epoch": 1.0129013123748796, "grad_norm": 0.3664472699165344, "learning_rate": 6.340585146286572e-05, "loss": 0.4724, "step": 13661 }, { "epoch": 1.0129754578482983, "grad_norm": 0.38284751772880554, "learning_rate": 6.339584896224057e-05, "loss": 0.4554, "step": 13662 }, { "epoch": 1.0130496033217171, "grad_norm": 0.37299972772598267, "learning_rate": 6.33858464616154e-05, "loss": 0.4491, "step": 13663 }, { "epoch": 1.0131237487951361, "grad_norm": 0.3872368633747101, "learning_rate": 6.337584396099025e-05, "loss": 0.484, "step": 13664 }, { "epoch": 1.0131978942685549, "grad_norm": 0.39174437522888184, "learning_rate": 6.336584146036509e-05, "loss": 0.4707, "step": 13665 }, { "epoch": 1.0132720397419737, "grad_norm": 0.39805668592453003, "learning_rate": 6.335583895973993e-05, "loss": 0.5145, "step": 13666 }, { "epoch": 1.0133461852153927, "grad_norm": 0.36081400513648987, "learning_rate": 6.334583645911478e-05, "loss": 0.4628, "step": 13667 }, { "epoch": 1.0134203306888114, "grad_norm": 0.3831169307231903, "learning_rate": 6.333583395848962e-05, "loss": 0.4979, "step": 13668 }, { "epoch": 1.0134944761622302, "grad_norm": 0.354872465133667, "learning_rate": 6.332583145786447e-05, "loss": 0.422, "step": 13669 }, { "epoch": 1.0135686216356492, "grad_norm": 0.35966959595680237, "learning_rate": 6.33158289572393e-05, "loss": 0.4323, "step": 13670 }, { "epoch": 1.013642767109068, "grad_norm": 0.35487741231918335, "learning_rate": 6.330582645661415e-05, "loss": 0.4261, "step": 13671 }, { "epoch": 1.0137169125824868, "grad_norm": 0.3876495063304901, "learning_rate": 6.329582395598899e-05, "loss": 0.4675, "step": 13672 }, { "epoch": 1.0137910580559057, "grad_norm": 0.357302188873291, "learning_rate": 6.328582145536384e-05, "loss": 0.4475, "step": 13673 }, { "epoch": 1.0138652035293245, "grad_norm": 0.37036529183387756, "learning_rate": 6.327581895473868e-05, "loss": 0.4546, "step": 13674 }, { "epoch": 1.0139393490027433, "grad_norm": 0.3512178957462311, "learning_rate": 6.326581645411353e-05, "loss": 0.4225, "step": 13675 }, { "epoch": 1.0140134944761623, "grad_norm": 0.3706960380077362, "learning_rate": 6.325581395348838e-05, "loss": 0.4652, "step": 13676 }, { "epoch": 1.014087639949581, "grad_norm": 0.37353354692459106, "learning_rate": 6.324581145286322e-05, "loss": 0.4768, "step": 13677 }, { "epoch": 1.0141617854229998, "grad_norm": 0.3673558831214905, "learning_rate": 6.323580895223807e-05, "loss": 0.4574, "step": 13678 }, { "epoch": 1.0142359308964188, "grad_norm": 0.3770337700843811, "learning_rate": 6.32258064516129e-05, "loss": 0.4476, "step": 13679 }, { "epoch": 1.0143100763698376, "grad_norm": 0.37900683283805847, "learning_rate": 6.321580395098776e-05, "loss": 0.4783, "step": 13680 }, { "epoch": 1.0143842218432564, "grad_norm": 0.37343305349349976, "learning_rate": 6.32058014503626e-05, "loss": 0.4808, "step": 13681 }, { "epoch": 1.0144583673166754, "grad_norm": 0.3564067780971527, "learning_rate": 6.319579894973744e-05, "loss": 0.4452, "step": 13682 }, { "epoch": 1.0145325127900942, "grad_norm": 0.38794395327568054, "learning_rate": 6.318579644911228e-05, "loss": 0.4729, "step": 13683 }, { "epoch": 1.014606658263513, "grad_norm": 0.3559887409210205, "learning_rate": 6.317579394848713e-05, "loss": 0.4259, "step": 13684 }, { "epoch": 1.014680803736932, "grad_norm": 0.36216259002685547, "learning_rate": 6.316579144786197e-05, "loss": 0.4459, "step": 13685 }, { "epoch": 1.0147549492103507, "grad_norm": 0.3832170069217682, "learning_rate": 6.315578894723682e-05, "loss": 0.4452, "step": 13686 }, { "epoch": 1.0148290946837695, "grad_norm": 0.36982831358909607, "learning_rate": 6.314578644661166e-05, "loss": 0.4523, "step": 13687 }, { "epoch": 1.0149032401571885, "grad_norm": 0.3852800726890564, "learning_rate": 6.313578394598651e-05, "loss": 0.4758, "step": 13688 }, { "epoch": 1.0149773856306072, "grad_norm": 0.3825790286064148, "learning_rate": 6.312578144536134e-05, "loss": 0.4561, "step": 13689 }, { "epoch": 1.015051531104026, "grad_norm": 0.3758372366428375, "learning_rate": 6.31157789447362e-05, "loss": 0.4557, "step": 13690 }, { "epoch": 1.015125676577445, "grad_norm": 0.372881680727005, "learning_rate": 6.310577644411103e-05, "loss": 0.4561, "step": 13691 }, { "epoch": 1.0151998220508638, "grad_norm": 0.35953405499458313, "learning_rate": 6.309577394348587e-05, "loss": 0.4423, "step": 13692 }, { "epoch": 1.0152739675242826, "grad_norm": 0.373182088136673, "learning_rate": 6.308577144286072e-05, "loss": 0.4461, "step": 13693 }, { "epoch": 1.0153481129977016, "grad_norm": 0.39113563299179077, "learning_rate": 6.307576894223556e-05, "loss": 0.4817, "step": 13694 }, { "epoch": 1.0154222584711203, "grad_norm": 0.38221412897109985, "learning_rate": 6.306576644161041e-05, "loss": 0.4584, "step": 13695 }, { "epoch": 1.015496403944539, "grad_norm": 0.3600408434867859, "learning_rate": 6.305576394098524e-05, "loss": 0.4703, "step": 13696 }, { "epoch": 1.015570549417958, "grad_norm": 0.35827651619911194, "learning_rate": 6.30457614403601e-05, "loss": 0.435, "step": 13697 }, { "epoch": 1.0156446948913769, "grad_norm": 0.36607664823532104, "learning_rate": 6.303575893973493e-05, "loss": 0.4328, "step": 13698 }, { "epoch": 1.0157188403647956, "grad_norm": 0.3961176872253418, "learning_rate": 6.302575643910978e-05, "loss": 0.5211, "step": 13699 }, { "epoch": 1.0157929858382146, "grad_norm": 0.38185206055641174, "learning_rate": 6.301575393848462e-05, "loss": 0.4573, "step": 13700 }, { "epoch": 1.0158671313116334, "grad_norm": 0.36590972542762756, "learning_rate": 6.300575143785947e-05, "loss": 0.4584, "step": 13701 }, { "epoch": 1.0159412767850522, "grad_norm": 0.36135730147361755, "learning_rate": 6.299574893723431e-05, "loss": 0.4243, "step": 13702 }, { "epoch": 1.0160154222584712, "grad_norm": 0.3853888213634491, "learning_rate": 6.298574643660914e-05, "loss": 0.4826, "step": 13703 }, { "epoch": 1.01608956773189, "grad_norm": 0.35857829451560974, "learning_rate": 6.2975743935984e-05, "loss": 0.4495, "step": 13704 }, { "epoch": 1.0161637132053087, "grad_norm": 0.38788533210754395, "learning_rate": 6.296574143535883e-05, "loss": 0.4549, "step": 13705 }, { "epoch": 1.0162378586787277, "grad_norm": 0.3703979253768921, "learning_rate": 6.295573893473368e-05, "loss": 0.4562, "step": 13706 }, { "epoch": 1.0163120041521465, "grad_norm": 0.36672958731651306, "learning_rate": 6.294573643410852e-05, "loss": 0.453, "step": 13707 }, { "epoch": 1.0163861496255653, "grad_norm": 0.3627055883407593, "learning_rate": 6.293573393348337e-05, "loss": 0.4592, "step": 13708 }, { "epoch": 1.0164602950989843, "grad_norm": 0.3702697157859802, "learning_rate": 6.292573143285822e-05, "loss": 0.4619, "step": 13709 }, { "epoch": 1.016534440572403, "grad_norm": 0.35915854573249817, "learning_rate": 6.291572893223306e-05, "loss": 0.4666, "step": 13710 }, { "epoch": 1.0166085860458218, "grad_norm": 0.3836973309516907, "learning_rate": 6.290572643160791e-05, "loss": 0.4661, "step": 13711 }, { "epoch": 1.0166827315192408, "grad_norm": 0.36834147572517395, "learning_rate": 6.289572393098275e-05, "loss": 0.4858, "step": 13712 }, { "epoch": 1.0167568769926596, "grad_norm": 0.37379178404808044, "learning_rate": 6.28857214303576e-05, "loss": 0.4677, "step": 13713 }, { "epoch": 1.0168310224660784, "grad_norm": 0.36248841881752014, "learning_rate": 6.287571892973245e-05, "loss": 0.4419, "step": 13714 }, { "epoch": 1.0169051679394974, "grad_norm": 0.4015713036060333, "learning_rate": 6.286571642910728e-05, "loss": 0.4525, "step": 13715 }, { "epoch": 1.0169793134129161, "grad_norm": 0.3372393250465393, "learning_rate": 6.285571392848212e-05, "loss": 0.4284, "step": 13716 }, { "epoch": 1.017053458886335, "grad_norm": 0.3975926339626312, "learning_rate": 6.284571142785697e-05, "loss": 0.4642, "step": 13717 }, { "epoch": 1.017127604359754, "grad_norm": 0.3467410206794739, "learning_rate": 6.283570892723181e-05, "loss": 0.4344, "step": 13718 }, { "epoch": 1.0172017498331727, "grad_norm": 0.37135177850723267, "learning_rate": 6.282570642660666e-05, "loss": 0.4583, "step": 13719 }, { "epoch": 1.0172758953065915, "grad_norm": 0.3629962205886841, "learning_rate": 6.28157039259815e-05, "loss": 0.458, "step": 13720 }, { "epoch": 1.0173500407800105, "grad_norm": 0.3723220229148865, "learning_rate": 6.280570142535635e-05, "loss": 0.4559, "step": 13721 }, { "epoch": 1.0174241862534292, "grad_norm": 0.36470162868499756, "learning_rate": 6.279569892473119e-05, "loss": 0.4585, "step": 13722 }, { "epoch": 1.017498331726848, "grad_norm": 0.3876856863498688, "learning_rate": 6.278569642410604e-05, "loss": 0.5436, "step": 13723 }, { "epoch": 1.017572477200267, "grad_norm": 0.39168164134025574, "learning_rate": 6.277569392348087e-05, "loss": 0.4962, "step": 13724 }, { "epoch": 1.0176466226736858, "grad_norm": 0.35807275772094727, "learning_rate": 6.276569142285572e-05, "loss": 0.4561, "step": 13725 }, { "epoch": 1.0177207681471045, "grad_norm": 0.36952587962150574, "learning_rate": 6.275568892223056e-05, "loss": 0.4645, "step": 13726 }, { "epoch": 1.0177949136205235, "grad_norm": 0.3746474087238312, "learning_rate": 6.274568642160541e-05, "loss": 0.4381, "step": 13727 }, { "epoch": 1.0178690590939423, "grad_norm": 0.3716086447238922, "learning_rate": 6.273568392098025e-05, "loss": 0.4775, "step": 13728 }, { "epoch": 1.017943204567361, "grad_norm": 0.34905770421028137, "learning_rate": 6.272568142035509e-05, "loss": 0.4261, "step": 13729 }, { "epoch": 1.01801735004078, "grad_norm": 0.3841310739517212, "learning_rate": 6.271567891972994e-05, "loss": 0.4729, "step": 13730 }, { "epoch": 1.0180914955141989, "grad_norm": 0.3856276571750641, "learning_rate": 6.270567641910477e-05, "loss": 0.4795, "step": 13731 }, { "epoch": 1.0181656409876176, "grad_norm": 0.37838056683540344, "learning_rate": 6.269567391847962e-05, "loss": 0.4654, "step": 13732 }, { "epoch": 1.0182397864610366, "grad_norm": 0.3814026415348053, "learning_rate": 6.268567141785446e-05, "loss": 0.4616, "step": 13733 }, { "epoch": 1.0183139319344554, "grad_norm": 0.35073354840278625, "learning_rate": 6.267566891722931e-05, "loss": 0.4295, "step": 13734 }, { "epoch": 1.0183880774078742, "grad_norm": 0.3830854296684265, "learning_rate": 6.266566641660415e-05, "loss": 0.4977, "step": 13735 }, { "epoch": 1.0184622228812932, "grad_norm": 0.37689444422721863, "learning_rate": 6.2655663915979e-05, "loss": 0.4537, "step": 13736 }, { "epoch": 1.018536368354712, "grad_norm": 0.3576487600803375, "learning_rate": 6.264566141535384e-05, "loss": 0.4446, "step": 13737 }, { "epoch": 1.0186105138281307, "grad_norm": 0.36587485671043396, "learning_rate": 6.263565891472869e-05, "loss": 0.4559, "step": 13738 }, { "epoch": 1.0186846593015497, "grad_norm": 0.35503295063972473, "learning_rate": 6.262565641410352e-05, "loss": 0.4456, "step": 13739 }, { "epoch": 1.0187588047749685, "grad_norm": 0.3949270248413086, "learning_rate": 6.261565391347836e-05, "loss": 0.5157, "step": 13740 }, { "epoch": 1.0188329502483873, "grad_norm": 0.37206384539604187, "learning_rate": 6.260565141285321e-05, "loss": 0.4588, "step": 13741 }, { "epoch": 1.0189070957218063, "grad_norm": 0.3660654127597809, "learning_rate": 6.259564891222805e-05, "loss": 0.4197, "step": 13742 }, { "epoch": 1.018981241195225, "grad_norm": 0.35650208592414856, "learning_rate": 6.25856464116029e-05, "loss": 0.4507, "step": 13743 }, { "epoch": 1.0190553866686438, "grad_norm": 0.38423779606819153, "learning_rate": 6.257564391097775e-05, "loss": 0.4855, "step": 13744 }, { "epoch": 1.0191295321420628, "grad_norm": 0.38211771845817566, "learning_rate": 6.256564141035259e-05, "loss": 0.4786, "step": 13745 }, { "epoch": 1.0192036776154816, "grad_norm": 0.3634051978588104, "learning_rate": 6.255563890972744e-05, "loss": 0.4625, "step": 13746 }, { "epoch": 1.0192778230889004, "grad_norm": 0.34382250905036926, "learning_rate": 6.254563640910229e-05, "loss": 0.4249, "step": 13747 }, { "epoch": 1.0193519685623194, "grad_norm": 0.37288010120391846, "learning_rate": 6.253563390847713e-05, "loss": 0.4634, "step": 13748 }, { "epoch": 1.0194261140357381, "grad_norm": 0.3701457381248474, "learning_rate": 6.252563140785198e-05, "loss": 0.4693, "step": 13749 }, { "epoch": 1.019500259509157, "grad_norm": 0.3455333709716797, "learning_rate": 6.251562890722681e-05, "loss": 0.4267, "step": 13750 }, { "epoch": 1.019574404982576, "grad_norm": 0.3688807189464569, "learning_rate": 6.250562640660166e-05, "loss": 0.4479, "step": 13751 }, { "epoch": 1.0196485504559947, "grad_norm": 0.4032703638076782, "learning_rate": 6.24956239059765e-05, "loss": 0.5101, "step": 13752 }, { "epoch": 1.0197226959294134, "grad_norm": 0.368794322013855, "learning_rate": 6.248562140535134e-05, "loss": 0.4448, "step": 13753 }, { "epoch": 1.0197968414028324, "grad_norm": 0.38493964076042175, "learning_rate": 6.247561890472619e-05, "loss": 0.4736, "step": 13754 }, { "epoch": 1.0198709868762512, "grad_norm": 0.37129679322242737, "learning_rate": 6.246561640410103e-05, "loss": 0.4594, "step": 13755 }, { "epoch": 1.01994513234967, "grad_norm": 0.3785812258720398, "learning_rate": 6.245561390347588e-05, "loss": 0.449, "step": 13756 }, { "epoch": 1.020019277823089, "grad_norm": 0.35817664861679077, "learning_rate": 6.244561140285071e-05, "loss": 0.4273, "step": 13757 }, { "epoch": 1.0200934232965078, "grad_norm": 0.3623519539833069, "learning_rate": 6.243560890222556e-05, "loss": 0.44, "step": 13758 }, { "epoch": 1.0201675687699265, "grad_norm": 0.355914831161499, "learning_rate": 6.24256064016004e-05, "loss": 0.4459, "step": 13759 }, { "epoch": 1.0202417142433455, "grad_norm": 0.3792632222175598, "learning_rate": 6.241560390097525e-05, "loss": 0.459, "step": 13760 }, { "epoch": 1.0203158597167643, "grad_norm": 0.35951268672943115, "learning_rate": 6.240560140035009e-05, "loss": 0.4612, "step": 13761 }, { "epoch": 1.020390005190183, "grad_norm": 0.3851078748703003, "learning_rate": 6.239559889972494e-05, "loss": 0.4635, "step": 13762 }, { "epoch": 1.020464150663602, "grad_norm": 0.35471653938293457, "learning_rate": 6.238559639909978e-05, "loss": 0.431, "step": 13763 }, { "epoch": 1.0205382961370209, "grad_norm": 0.38060063123703003, "learning_rate": 6.237559389847463e-05, "loss": 0.4471, "step": 13764 }, { "epoch": 1.0206124416104396, "grad_norm": 0.3576975464820862, "learning_rate": 6.236559139784946e-05, "loss": 0.4372, "step": 13765 }, { "epoch": 1.0206865870838586, "grad_norm": 0.37889960408210754, "learning_rate": 6.23555888972243e-05, "loss": 0.4659, "step": 13766 }, { "epoch": 1.0207607325572774, "grad_norm": 0.373278945684433, "learning_rate": 6.234558639659915e-05, "loss": 0.4684, "step": 13767 }, { "epoch": 1.0208348780306962, "grad_norm": 0.385886549949646, "learning_rate": 6.233558389597399e-05, "loss": 0.4681, "step": 13768 }, { "epoch": 1.0209090235041152, "grad_norm": 0.39985597133636475, "learning_rate": 6.232558139534884e-05, "loss": 0.4597, "step": 13769 }, { "epoch": 1.020983168977534, "grad_norm": 0.36862534284591675, "learning_rate": 6.231557889472368e-05, "loss": 0.448, "step": 13770 }, { "epoch": 1.0210573144509527, "grad_norm": 0.3742021322250366, "learning_rate": 6.230557639409853e-05, "loss": 0.4764, "step": 13771 }, { "epoch": 1.0211314599243717, "grad_norm": 0.37339261174201965, "learning_rate": 6.229557389347336e-05, "loss": 0.4559, "step": 13772 }, { "epoch": 1.0212056053977905, "grad_norm": 0.37141311168670654, "learning_rate": 6.228557139284822e-05, "loss": 0.4522, "step": 13773 }, { "epoch": 1.0212797508712093, "grad_norm": 0.3716897666454315, "learning_rate": 6.227556889222305e-05, "loss": 0.4497, "step": 13774 }, { "epoch": 1.0213538963446283, "grad_norm": 0.3734998106956482, "learning_rate": 6.22655663915979e-05, "loss": 0.4656, "step": 13775 }, { "epoch": 1.021428041818047, "grad_norm": 0.3709234893321991, "learning_rate": 6.225556389097274e-05, "loss": 0.4501, "step": 13776 }, { "epoch": 1.0215021872914658, "grad_norm": 0.3731309175491333, "learning_rate": 6.224556139034759e-05, "loss": 0.4437, "step": 13777 }, { "epoch": 1.0215763327648848, "grad_norm": 0.3793312609195709, "learning_rate": 6.223555888972243e-05, "loss": 0.4815, "step": 13778 }, { "epoch": 1.0216504782383036, "grad_norm": 0.36347395181655884, "learning_rate": 6.222555638909728e-05, "loss": 0.4573, "step": 13779 }, { "epoch": 1.0217246237117223, "grad_norm": 0.3530541956424713, "learning_rate": 6.221555388847212e-05, "loss": 0.4166, "step": 13780 }, { "epoch": 1.0217987691851413, "grad_norm": 0.3669015169143677, "learning_rate": 6.220555138784697e-05, "loss": 0.4439, "step": 13781 }, { "epoch": 1.0218729146585601, "grad_norm": 0.3872112035751343, "learning_rate": 6.219554888722182e-05, "loss": 0.4742, "step": 13782 }, { "epoch": 1.021947060131979, "grad_norm": 0.38339781761169434, "learning_rate": 6.218554638659665e-05, "loss": 0.4634, "step": 13783 }, { "epoch": 1.0220212056053979, "grad_norm": 0.3468380272388458, "learning_rate": 6.21755438859715e-05, "loss": 0.4372, "step": 13784 }, { "epoch": 1.0220953510788167, "grad_norm": 0.35902225971221924, "learning_rate": 6.216554138534634e-05, "loss": 0.4259, "step": 13785 }, { "epoch": 1.0221694965522354, "grad_norm": 0.3842103183269501, "learning_rate": 6.215553888472119e-05, "loss": 0.4611, "step": 13786 }, { "epoch": 1.0222436420256544, "grad_norm": 0.3684981167316437, "learning_rate": 6.214553638409603e-05, "loss": 0.4666, "step": 13787 }, { "epoch": 1.0223177874990732, "grad_norm": 0.36065801978111267, "learning_rate": 6.213553388347088e-05, "loss": 0.4329, "step": 13788 }, { "epoch": 1.022391932972492, "grad_norm": 0.37285685539245605, "learning_rate": 6.212553138284572e-05, "loss": 0.4614, "step": 13789 }, { "epoch": 1.022466078445911, "grad_norm": 0.3434697985649109, "learning_rate": 6.211552888222055e-05, "loss": 0.4235, "step": 13790 }, { "epoch": 1.0225402239193297, "grad_norm": 0.37101802229881287, "learning_rate": 6.21055263815954e-05, "loss": 0.4462, "step": 13791 }, { "epoch": 1.0226143693927485, "grad_norm": 0.37823161482810974, "learning_rate": 6.209552388097024e-05, "loss": 0.4735, "step": 13792 }, { "epoch": 1.0226885148661675, "grad_norm": 0.3860846757888794, "learning_rate": 6.208552138034509e-05, "loss": 0.47, "step": 13793 }, { "epoch": 1.0227626603395863, "grad_norm": 0.3613361418247223, "learning_rate": 6.207551887971993e-05, "loss": 0.4481, "step": 13794 }, { "epoch": 1.022836805813005, "grad_norm": 0.34609755873680115, "learning_rate": 6.206551637909478e-05, "loss": 0.4133, "step": 13795 }, { "epoch": 1.022910951286424, "grad_norm": 0.37198275327682495, "learning_rate": 6.205551387846962e-05, "loss": 0.475, "step": 13796 }, { "epoch": 1.0229850967598428, "grad_norm": 0.39051684737205505, "learning_rate": 6.204551137784447e-05, "loss": 0.4825, "step": 13797 }, { "epoch": 1.0230592422332616, "grad_norm": 0.3957137167453766, "learning_rate": 6.20355088772193e-05, "loss": 0.4538, "step": 13798 }, { "epoch": 1.0231333877066806, "grad_norm": 0.37033727765083313, "learning_rate": 6.202550637659416e-05, "loss": 0.4706, "step": 13799 }, { "epoch": 1.0232075331800994, "grad_norm": 0.3551233112812042, "learning_rate": 6.201550387596899e-05, "loss": 0.444, "step": 13800 }, { "epoch": 1.0232816786535182, "grad_norm": 0.3339748978614807, "learning_rate": 6.200550137534384e-05, "loss": 0.4113, "step": 13801 }, { "epoch": 1.0233558241269372, "grad_norm": 0.3599558174610138, "learning_rate": 6.199549887471868e-05, "loss": 0.3932, "step": 13802 }, { "epoch": 1.023429969600356, "grad_norm": 0.38553720712661743, "learning_rate": 6.198549637409352e-05, "loss": 0.4615, "step": 13803 }, { "epoch": 1.0235041150737747, "grad_norm": 0.3670625388622284, "learning_rate": 6.197549387346837e-05, "loss": 0.4458, "step": 13804 }, { "epoch": 1.0235782605471937, "grad_norm": 0.3677724599838257, "learning_rate": 6.19654913728432e-05, "loss": 0.4361, "step": 13805 }, { "epoch": 1.0236524060206125, "grad_norm": 0.37834739685058594, "learning_rate": 6.195548887221806e-05, "loss": 0.4352, "step": 13806 }, { "epoch": 1.0237265514940312, "grad_norm": 0.35491472482681274, "learning_rate": 6.19454863715929e-05, "loss": 0.474, "step": 13807 }, { "epoch": 1.0238006969674502, "grad_norm": 0.36852821707725525, "learning_rate": 6.193548387096774e-05, "loss": 0.4272, "step": 13808 }, { "epoch": 1.023874842440869, "grad_norm": 0.36506739258766174, "learning_rate": 6.192548137034258e-05, "loss": 0.442, "step": 13809 }, { "epoch": 1.0239489879142878, "grad_norm": 0.39339694380760193, "learning_rate": 6.191547886971743e-05, "loss": 0.4879, "step": 13810 }, { "epoch": 1.0240231333877068, "grad_norm": 0.37353259325027466, "learning_rate": 6.190547636909227e-05, "loss": 0.4613, "step": 13811 }, { "epoch": 1.0240972788611256, "grad_norm": 0.418751984834671, "learning_rate": 6.189547386846712e-05, "loss": 0.5361, "step": 13812 }, { "epoch": 1.0241714243345443, "grad_norm": 0.39337003231048584, "learning_rate": 6.188547136784196e-05, "loss": 0.4836, "step": 13813 }, { "epoch": 1.0242455698079633, "grad_norm": 0.373865008354187, "learning_rate": 6.187546886721681e-05, "loss": 0.4444, "step": 13814 }, { "epoch": 1.024319715281382, "grad_norm": 0.3663986921310425, "learning_rate": 6.186546636659166e-05, "loss": 0.4344, "step": 13815 }, { "epoch": 1.0243938607548009, "grad_norm": 0.42183995246887207, "learning_rate": 6.18554638659665e-05, "loss": 0.4731, "step": 13816 }, { "epoch": 1.0244680062282199, "grad_norm": 0.3707672357559204, "learning_rate": 6.184546136534135e-05, "loss": 0.451, "step": 13817 }, { "epoch": 1.0245421517016386, "grad_norm": 0.3504708707332611, "learning_rate": 6.183545886471618e-05, "loss": 0.4236, "step": 13818 }, { "epoch": 1.0246162971750574, "grad_norm": 0.37125033140182495, "learning_rate": 6.182545636409103e-05, "loss": 0.4552, "step": 13819 }, { "epoch": 1.0246904426484764, "grad_norm": 0.38009658455848694, "learning_rate": 6.181545386346587e-05, "loss": 0.4536, "step": 13820 }, { "epoch": 1.0247645881218952, "grad_norm": 0.3940674960613251, "learning_rate": 6.180545136284072e-05, "loss": 0.477, "step": 13821 }, { "epoch": 1.024838733595314, "grad_norm": 0.3546959161758423, "learning_rate": 6.179544886221556e-05, "loss": 0.4424, "step": 13822 }, { "epoch": 1.024912879068733, "grad_norm": 0.3689368963241577, "learning_rate": 6.178544636159041e-05, "loss": 0.4655, "step": 13823 }, { "epoch": 1.0249870245421517, "grad_norm": 0.3660738170146942, "learning_rate": 6.177544386096525e-05, "loss": 0.4362, "step": 13824 }, { "epoch": 1.0250611700155705, "grad_norm": 0.37221863865852356, "learning_rate": 6.17654413603401e-05, "loss": 0.4683, "step": 13825 }, { "epoch": 1.0251353154889893, "grad_norm": 0.37959426641464233, "learning_rate": 6.175543885971493e-05, "loss": 0.4654, "step": 13826 }, { "epoch": 1.0252094609624083, "grad_norm": 0.3645092248916626, "learning_rate": 6.174543635908977e-05, "loss": 0.4564, "step": 13827 }, { "epoch": 1.025283606435827, "grad_norm": 0.3834740221500397, "learning_rate": 6.173543385846462e-05, "loss": 0.4706, "step": 13828 }, { "epoch": 1.025357751909246, "grad_norm": 0.3770941495895386, "learning_rate": 6.172543135783946e-05, "loss": 0.4702, "step": 13829 }, { "epoch": 1.0254318973826648, "grad_norm": 0.37362754344940186, "learning_rate": 6.171542885721431e-05, "loss": 0.4557, "step": 13830 }, { "epoch": 1.0255060428560836, "grad_norm": 0.37135669589042664, "learning_rate": 6.170542635658915e-05, "loss": 0.4852, "step": 13831 }, { "epoch": 1.0255801883295024, "grad_norm": 0.3708464801311493, "learning_rate": 6.1695423855964e-05, "loss": 0.458, "step": 13832 }, { "epoch": 1.0256543338029214, "grad_norm": 0.3420923352241516, "learning_rate": 6.168542135533883e-05, "loss": 0.4309, "step": 13833 }, { "epoch": 1.0257284792763401, "grad_norm": 0.3863985538482666, "learning_rate": 6.167541885471368e-05, "loss": 0.4934, "step": 13834 }, { "epoch": 1.0258026247497591, "grad_norm": 0.36793190240859985, "learning_rate": 6.166541635408852e-05, "loss": 0.4398, "step": 13835 }, { "epoch": 1.025876770223178, "grad_norm": 0.367535263299942, "learning_rate": 6.165541385346337e-05, "loss": 0.4631, "step": 13836 }, { "epoch": 1.0259509156965967, "grad_norm": 0.3721461594104767, "learning_rate": 6.164541135283821e-05, "loss": 0.42, "step": 13837 }, { "epoch": 1.0260250611700155, "grad_norm": 0.3637293875217438, "learning_rate": 6.163540885221306e-05, "loss": 0.4188, "step": 13838 }, { "epoch": 1.0260992066434345, "grad_norm": 0.39368903636932373, "learning_rate": 6.16254063515879e-05, "loss": 0.4439, "step": 13839 }, { "epoch": 1.0261733521168532, "grad_norm": 0.3728233873844147, "learning_rate": 6.161540385096273e-05, "loss": 0.4906, "step": 13840 }, { "epoch": 1.026247497590272, "grad_norm": 0.36659398674964905, "learning_rate": 6.160540135033758e-05, "loss": 0.4211, "step": 13841 }, { "epoch": 1.026321643063691, "grad_norm": 0.37387752532958984, "learning_rate": 6.159539884971242e-05, "loss": 0.4838, "step": 13842 }, { "epoch": 1.0263957885371098, "grad_norm": 0.3511739671230316, "learning_rate": 6.158539634908727e-05, "loss": 0.4427, "step": 13843 }, { "epoch": 1.0264699340105286, "grad_norm": 0.3801327049732208, "learning_rate": 6.157539384846211e-05, "loss": 0.4887, "step": 13844 }, { "epoch": 1.0265440794839475, "grad_norm": 0.36945831775665283, "learning_rate": 6.156539134783696e-05, "loss": 0.4273, "step": 13845 }, { "epoch": 1.0266182249573663, "grad_norm": 0.38125720620155334, "learning_rate": 6.15553888472118e-05, "loss": 0.457, "step": 13846 }, { "epoch": 1.026692370430785, "grad_norm": 0.3476647138595581, "learning_rate": 6.154538634658665e-05, "loss": 0.4321, "step": 13847 }, { "epoch": 1.026766515904204, "grad_norm": 0.369093656539917, "learning_rate": 6.153538384596149e-05, "loss": 0.4707, "step": 13848 }, { "epoch": 1.0268406613776229, "grad_norm": 0.3507879376411438, "learning_rate": 6.152538134533634e-05, "loss": 0.4526, "step": 13849 }, { "epoch": 1.0269148068510416, "grad_norm": 0.38398295640945435, "learning_rate": 6.151537884471119e-05, "loss": 0.5017, "step": 13850 }, { "epoch": 1.0269889523244606, "grad_norm": 0.3784703016281128, "learning_rate": 6.150537634408602e-05, "loss": 0.4726, "step": 13851 }, { "epoch": 1.0270630977978794, "grad_norm": 0.3798927366733551, "learning_rate": 6.149537384346087e-05, "loss": 0.4654, "step": 13852 }, { "epoch": 1.0271372432712982, "grad_norm": 0.35202446579933167, "learning_rate": 6.148537134283571e-05, "loss": 0.4289, "step": 13853 }, { "epoch": 1.0272113887447172, "grad_norm": 0.3773544132709503, "learning_rate": 6.147536884221056e-05, "loss": 0.4798, "step": 13854 }, { "epoch": 1.027285534218136, "grad_norm": 0.37187257409095764, "learning_rate": 6.14653663415854e-05, "loss": 0.4415, "step": 13855 }, { "epoch": 1.0273596796915547, "grad_norm": 0.37895116209983826, "learning_rate": 6.145536384096025e-05, "loss": 0.4784, "step": 13856 }, { "epoch": 1.0274338251649737, "grad_norm": 0.39490634202957153, "learning_rate": 6.144536134033509e-05, "loss": 0.4624, "step": 13857 }, { "epoch": 1.0275079706383925, "grad_norm": 0.3851332366466522, "learning_rate": 6.143535883970994e-05, "loss": 0.4783, "step": 13858 }, { "epoch": 1.0275821161118113, "grad_norm": 0.361954927444458, "learning_rate": 6.142535633908477e-05, "loss": 0.4766, "step": 13859 }, { "epoch": 1.0276562615852303, "grad_norm": 0.3616301119327545, "learning_rate": 6.141535383845962e-05, "loss": 0.486, "step": 13860 }, { "epoch": 1.027730407058649, "grad_norm": 0.3700524866580963, "learning_rate": 6.140535133783446e-05, "loss": 0.4581, "step": 13861 }, { "epoch": 1.0278045525320678, "grad_norm": 0.3511163294315338, "learning_rate": 6.139534883720931e-05, "loss": 0.4359, "step": 13862 }, { "epoch": 1.0278786980054868, "grad_norm": 0.3798116445541382, "learning_rate": 6.138534633658415e-05, "loss": 0.4751, "step": 13863 }, { "epoch": 1.0279528434789056, "grad_norm": 0.3570370376110077, "learning_rate": 6.137534383595899e-05, "loss": 0.4487, "step": 13864 }, { "epoch": 1.0280269889523244, "grad_norm": 0.35898974537849426, "learning_rate": 6.136534133533384e-05, "loss": 0.426, "step": 13865 }, { "epoch": 1.0281011344257434, "grad_norm": 0.4258306920528412, "learning_rate": 6.135533883470867e-05, "loss": 0.5149, "step": 13866 }, { "epoch": 1.0281752798991621, "grad_norm": 0.38227567076683044, "learning_rate": 6.134533633408353e-05, "loss": 0.4813, "step": 13867 }, { "epoch": 1.028249425372581, "grad_norm": 0.3641892373561859, "learning_rate": 6.133533383345836e-05, "loss": 0.4456, "step": 13868 }, { "epoch": 1.028323570846, "grad_norm": 0.3873429000377655, "learning_rate": 6.132533133283321e-05, "loss": 0.4564, "step": 13869 }, { "epoch": 1.0283977163194187, "grad_norm": 0.3979024589061737, "learning_rate": 6.131532883220805e-05, "loss": 0.4935, "step": 13870 }, { "epoch": 1.0284718617928374, "grad_norm": 0.3847406804561615, "learning_rate": 6.13053263315829e-05, "loss": 0.4479, "step": 13871 }, { "epoch": 1.0285460072662564, "grad_norm": 0.3610366880893707, "learning_rate": 6.129532383095774e-05, "loss": 0.4546, "step": 13872 }, { "epoch": 1.0286201527396752, "grad_norm": 0.3627724349498749, "learning_rate": 6.128532133033259e-05, "loss": 0.4374, "step": 13873 }, { "epoch": 1.028694298213094, "grad_norm": 0.37467703223228455, "learning_rate": 6.127531882970743e-05, "loss": 0.453, "step": 13874 }, { "epoch": 1.028768443686513, "grad_norm": 0.36704227328300476, "learning_rate": 6.126531632908228e-05, "loss": 0.4612, "step": 13875 }, { "epoch": 1.0288425891599318, "grad_norm": 0.3685576319694519, "learning_rate": 6.125531382845711e-05, "loss": 0.4323, "step": 13876 }, { "epoch": 1.0289167346333505, "grad_norm": 0.37557730078697205, "learning_rate": 6.124531132783195e-05, "loss": 0.4639, "step": 13877 }, { "epoch": 1.0289908801067695, "grad_norm": 0.36068564653396606, "learning_rate": 6.12353088272068e-05, "loss": 0.4709, "step": 13878 }, { "epoch": 1.0290650255801883, "grad_norm": 0.3627378046512604, "learning_rate": 6.122530632658164e-05, "loss": 0.4499, "step": 13879 }, { "epoch": 1.029139171053607, "grad_norm": 0.3865765333175659, "learning_rate": 6.121530382595649e-05, "loss": 0.4645, "step": 13880 }, { "epoch": 1.029213316527026, "grad_norm": 0.37827256321907043, "learning_rate": 6.120530132533133e-05, "loss": 0.4567, "step": 13881 }, { "epoch": 1.0292874620004449, "grad_norm": 0.36821091175079346, "learning_rate": 6.119529882470618e-05, "loss": 0.479, "step": 13882 }, { "epoch": 1.0293616074738636, "grad_norm": 0.3813537657260895, "learning_rate": 6.118529632408103e-05, "loss": 0.4803, "step": 13883 }, { "epoch": 1.0294357529472826, "grad_norm": 0.3673918843269348, "learning_rate": 6.117529382345586e-05, "loss": 0.4379, "step": 13884 }, { "epoch": 1.0295098984207014, "grad_norm": 0.34440088272094727, "learning_rate": 6.116529132283071e-05, "loss": 0.4272, "step": 13885 }, { "epoch": 1.0295840438941202, "grad_norm": 0.35921710729599, "learning_rate": 6.115528882220555e-05, "loss": 0.4273, "step": 13886 }, { "epoch": 1.0296581893675392, "grad_norm": 0.37133240699768066, "learning_rate": 6.11452863215804e-05, "loss": 0.4501, "step": 13887 }, { "epoch": 1.029732334840958, "grad_norm": 0.35901591181755066, "learning_rate": 6.113528382095525e-05, "loss": 0.4493, "step": 13888 }, { "epoch": 1.0298064803143767, "grad_norm": 0.37150290608406067, "learning_rate": 6.112528132033009e-05, "loss": 0.476, "step": 13889 }, { "epoch": 1.0298806257877957, "grad_norm": 0.3980831801891327, "learning_rate": 6.111527881970493e-05, "loss": 0.4975, "step": 13890 }, { "epoch": 1.0299547712612145, "grad_norm": 0.3842487633228302, "learning_rate": 6.110527631907978e-05, "loss": 0.4586, "step": 13891 }, { "epoch": 1.0300289167346333, "grad_norm": 0.36464986205101013, "learning_rate": 6.109527381845462e-05, "loss": 0.4424, "step": 13892 }, { "epoch": 1.0301030622080523, "grad_norm": 0.3749871850013733, "learning_rate": 6.108527131782947e-05, "loss": 0.4595, "step": 13893 }, { "epoch": 1.030177207681471, "grad_norm": 0.3840009868144989, "learning_rate": 6.10752688172043e-05, "loss": 0.4519, "step": 13894 }, { "epoch": 1.0302513531548898, "grad_norm": 0.3786211907863617, "learning_rate": 6.106526631657915e-05, "loss": 0.4683, "step": 13895 }, { "epoch": 1.0303254986283088, "grad_norm": 0.3749697208404541, "learning_rate": 6.105526381595399e-05, "loss": 0.4605, "step": 13896 }, { "epoch": 1.0303996441017276, "grad_norm": 0.38753101229667664, "learning_rate": 6.104526131532884e-05, "loss": 0.4833, "step": 13897 }, { "epoch": 1.0304737895751463, "grad_norm": 0.3872445821762085, "learning_rate": 6.103525881470368e-05, "loss": 0.4574, "step": 13898 }, { "epoch": 1.0305479350485653, "grad_norm": 0.3723839521408081, "learning_rate": 6.102525631407853e-05, "loss": 0.4605, "step": 13899 }, { "epoch": 1.0306220805219841, "grad_norm": 0.35924211144447327, "learning_rate": 6.1015253813453366e-05, "loss": 0.4377, "step": 13900 }, { "epoch": 1.030696225995403, "grad_norm": 0.3758063316345215, "learning_rate": 6.100525131282822e-05, "loss": 0.4421, "step": 13901 }, { "epoch": 1.0307703714688219, "grad_norm": 0.3964223563671112, "learning_rate": 6.0995248812203054e-05, "loss": 0.4532, "step": 13902 }, { "epoch": 1.0308445169422407, "grad_norm": 0.3647625744342804, "learning_rate": 6.098524631157789e-05, "loss": 0.4676, "step": 13903 }, { "epoch": 1.0309186624156594, "grad_norm": 0.33569973707199097, "learning_rate": 6.097524381095274e-05, "loss": 0.4225, "step": 13904 }, { "epoch": 1.0309928078890784, "grad_norm": 0.3739515244960785, "learning_rate": 6.096524131032758e-05, "loss": 0.4556, "step": 13905 }, { "epoch": 1.0310669533624972, "grad_norm": 0.36444365978240967, "learning_rate": 6.095523880970243e-05, "loss": 0.4512, "step": 13906 }, { "epoch": 1.031141098835916, "grad_norm": 0.36580637097358704, "learning_rate": 6.0945236309077266e-05, "loss": 0.4429, "step": 13907 }, { "epoch": 1.031215244309335, "grad_norm": 0.3695870339870453, "learning_rate": 6.093523380845212e-05, "loss": 0.4617, "step": 13908 }, { "epoch": 1.0312893897827538, "grad_norm": 0.38062626123428345, "learning_rate": 6.092523130782696e-05, "loss": 0.511, "step": 13909 }, { "epoch": 1.0313635352561725, "grad_norm": 0.3794918358325958, "learning_rate": 6.0915228807201805e-05, "loss": 0.4885, "step": 13910 }, { "epoch": 1.0314376807295915, "grad_norm": 0.36560818552970886, "learning_rate": 6.090522630657665e-05, "loss": 0.4412, "step": 13911 }, { "epoch": 1.0315118262030103, "grad_norm": 0.3869355618953705, "learning_rate": 6.089522380595149e-05, "loss": 0.4568, "step": 13912 }, { "epoch": 1.031585971676429, "grad_norm": 0.3886266052722931, "learning_rate": 6.0885221305326336e-05, "loss": 0.5052, "step": 13913 }, { "epoch": 1.031660117149848, "grad_norm": 0.3592434823513031, "learning_rate": 6.0875218804701173e-05, "loss": 0.4367, "step": 13914 }, { "epoch": 1.0317342626232668, "grad_norm": 0.3918633759021759, "learning_rate": 6.0865216304076024e-05, "loss": 0.477, "step": 13915 }, { "epoch": 1.0318084080966856, "grad_norm": 0.34716683626174927, "learning_rate": 6.085521380345086e-05, "loss": 0.4236, "step": 13916 }, { "epoch": 1.0318825535701046, "grad_norm": 0.36923885345458984, "learning_rate": 6.084521130282571e-05, "loss": 0.4711, "step": 13917 }, { "epoch": 1.0319566990435234, "grad_norm": 0.35724401473999023, "learning_rate": 6.083520880220055e-05, "loss": 0.4153, "step": 13918 }, { "epoch": 1.0320308445169422, "grad_norm": 0.3774741590023041, "learning_rate": 6.08252063015754e-05, "loss": 0.4602, "step": 13919 }, { "epoch": 1.0321049899903612, "grad_norm": 0.3759528696537018, "learning_rate": 6.0815203800950237e-05, "loss": 0.4763, "step": 13920 }, { "epoch": 1.03217913546378, "grad_norm": 0.38544774055480957, "learning_rate": 6.080520130032509e-05, "loss": 0.4753, "step": 13921 }, { "epoch": 1.0322532809371987, "grad_norm": 0.3831244111061096, "learning_rate": 6.0795198799699924e-05, "loss": 0.4464, "step": 13922 }, { "epoch": 1.0323274264106177, "grad_norm": 0.34792250394821167, "learning_rate": 6.0785196299074775e-05, "loss": 0.4496, "step": 13923 }, { "epoch": 1.0324015718840365, "grad_norm": 0.38788607716560364, "learning_rate": 6.077519379844961e-05, "loss": 0.4797, "step": 13924 }, { "epoch": 1.0324757173574552, "grad_norm": 0.36017531156539917, "learning_rate": 6.076519129782446e-05, "loss": 0.4446, "step": 13925 }, { "epoch": 1.0325498628308742, "grad_norm": 0.36503690481185913, "learning_rate": 6.07551887971993e-05, "loss": 0.4074, "step": 13926 }, { "epoch": 1.032624008304293, "grad_norm": 0.3657728433609009, "learning_rate": 6.0745186296574144e-05, "loss": 0.4487, "step": 13927 }, { "epoch": 1.0326981537777118, "grad_norm": 0.3860059380531311, "learning_rate": 6.0735183795948994e-05, "loss": 0.4889, "step": 13928 }, { "epoch": 1.0327722992511308, "grad_norm": 0.34998422861099243, "learning_rate": 6.072518129532383e-05, "loss": 0.42, "step": 13929 }, { "epoch": 1.0328464447245496, "grad_norm": 0.37678149342536926, "learning_rate": 6.071517879469868e-05, "loss": 0.4802, "step": 13930 }, { "epoch": 1.0329205901979683, "grad_norm": 0.377903550863266, "learning_rate": 6.070517629407352e-05, "loss": 0.4506, "step": 13931 }, { "epoch": 1.0329947356713873, "grad_norm": 0.38115882873535156, "learning_rate": 6.069517379344837e-05, "loss": 0.4586, "step": 13932 }, { "epoch": 1.033068881144806, "grad_norm": 0.3486802279949188, "learning_rate": 6.068517129282321e-05, "loss": 0.4219, "step": 13933 }, { "epoch": 1.0331430266182249, "grad_norm": 0.39249157905578613, "learning_rate": 6.067516879219806e-05, "loss": 0.4794, "step": 13934 }, { "epoch": 1.0332171720916439, "grad_norm": 0.3704233169555664, "learning_rate": 6.0665166291572895e-05, "loss": 0.465, "step": 13935 }, { "epoch": 1.0332913175650627, "grad_norm": 0.3579782247543335, "learning_rate": 6.0655163790947745e-05, "loss": 0.4253, "step": 13936 }, { "epoch": 1.0333654630384814, "grad_norm": 0.37588611245155334, "learning_rate": 6.064516129032258e-05, "loss": 0.4366, "step": 13937 }, { "epoch": 1.0334396085119004, "grad_norm": 0.3824479281902313, "learning_rate": 6.063515878969743e-05, "loss": 0.4837, "step": 13938 }, { "epoch": 1.0335137539853192, "grad_norm": 0.37958794832229614, "learning_rate": 6.062515628907227e-05, "loss": 0.4552, "step": 13939 }, { "epoch": 1.033587899458738, "grad_norm": 0.38280799984931946, "learning_rate": 6.061515378844711e-05, "loss": 0.4618, "step": 13940 }, { "epoch": 1.033662044932157, "grad_norm": 0.38571614027023315, "learning_rate": 6.060515128782196e-05, "loss": 0.4842, "step": 13941 }, { "epoch": 1.0337361904055757, "grad_norm": 0.3754442036151886, "learning_rate": 6.0595148787196795e-05, "loss": 0.5149, "step": 13942 }, { "epoch": 1.0338103358789945, "grad_norm": 0.3871697783470154, "learning_rate": 6.0585146286571646e-05, "loss": 0.4549, "step": 13943 }, { "epoch": 1.0338844813524135, "grad_norm": 0.3610668480396271, "learning_rate": 6.057514378594649e-05, "loss": 0.4594, "step": 13944 }, { "epoch": 1.0339586268258323, "grad_norm": 0.36982011795043945, "learning_rate": 6.056514128532133e-05, "loss": 0.4502, "step": 13945 }, { "epoch": 1.034032772299251, "grad_norm": 0.374748170375824, "learning_rate": 6.055513878469618e-05, "loss": 0.4478, "step": 13946 }, { "epoch": 1.03410691777267, "grad_norm": 0.3966861367225647, "learning_rate": 6.054513628407103e-05, "loss": 0.5092, "step": 13947 }, { "epoch": 1.0341810632460888, "grad_norm": 0.3829025626182556, "learning_rate": 6.0535133783445865e-05, "loss": 0.4437, "step": 13948 }, { "epoch": 1.0342552087195076, "grad_norm": 0.36754339933395386, "learning_rate": 6.0525131282820715e-05, "loss": 0.4386, "step": 13949 }, { "epoch": 1.0343293541929266, "grad_norm": 0.37394240498542786, "learning_rate": 6.051512878219555e-05, "loss": 0.4362, "step": 13950 }, { "epoch": 1.0344034996663454, "grad_norm": 0.3677847981452942, "learning_rate": 6.050512628157039e-05, "loss": 0.451, "step": 13951 }, { "epoch": 1.0344776451397641, "grad_norm": 0.40811407566070557, "learning_rate": 6.049512378094524e-05, "loss": 0.4918, "step": 13952 }, { "epoch": 1.0345517906131831, "grad_norm": 0.37685495615005493, "learning_rate": 6.048512128032008e-05, "loss": 0.4463, "step": 13953 }, { "epoch": 1.034625936086602, "grad_norm": 0.36242228746414185, "learning_rate": 6.047511877969493e-05, "loss": 0.4361, "step": 13954 }, { "epoch": 1.0347000815600207, "grad_norm": 0.36561936140060425, "learning_rate": 6.0465116279069765e-05, "loss": 0.4525, "step": 13955 }, { "epoch": 1.0347742270334397, "grad_norm": 0.3624245524406433, "learning_rate": 6.0455113778444616e-05, "loss": 0.426, "step": 13956 }, { "epoch": 1.0348483725068585, "grad_norm": 0.37497979402542114, "learning_rate": 6.044511127781945e-05, "loss": 0.4388, "step": 13957 }, { "epoch": 1.0349225179802772, "grad_norm": 0.3603576719760895, "learning_rate": 6.0435108777194304e-05, "loss": 0.4441, "step": 13958 }, { "epoch": 1.0349966634536962, "grad_norm": 0.3427235782146454, "learning_rate": 6.042510627656914e-05, "loss": 0.426, "step": 13959 }, { "epoch": 1.035070808927115, "grad_norm": 0.3860428035259247, "learning_rate": 6.041510377594399e-05, "loss": 0.4726, "step": 13960 }, { "epoch": 1.0351449544005338, "grad_norm": 0.35441234707832336, "learning_rate": 6.040510127531883e-05, "loss": 0.4156, "step": 13961 }, { "epoch": 1.0352190998739528, "grad_norm": 0.3681468665599823, "learning_rate": 6.039509877469368e-05, "loss": 0.4402, "step": 13962 }, { "epoch": 1.0352932453473715, "grad_norm": 0.3632572591304779, "learning_rate": 6.038509627406852e-05, "loss": 0.469, "step": 13963 }, { "epoch": 1.0353673908207903, "grad_norm": 0.3778252601623535, "learning_rate": 6.037509377344336e-05, "loss": 0.434, "step": 13964 }, { "epoch": 1.0354415362942093, "grad_norm": 0.3983745276927948, "learning_rate": 6.036509127281821e-05, "loss": 0.4877, "step": 13965 }, { "epoch": 1.035515681767628, "grad_norm": 0.3892870843410492, "learning_rate": 6.035508877219305e-05, "loss": 0.4713, "step": 13966 }, { "epoch": 1.0355898272410469, "grad_norm": 0.4065605103969574, "learning_rate": 6.03450862715679e-05, "loss": 0.5028, "step": 13967 }, { "epoch": 1.0356639727144659, "grad_norm": 0.35369205474853516, "learning_rate": 6.0335083770942735e-05, "loss": 0.4303, "step": 13968 }, { "epoch": 1.0357381181878846, "grad_norm": 0.38642358779907227, "learning_rate": 6.0325081270317586e-05, "loss": 0.4766, "step": 13969 }, { "epoch": 1.0358122636613034, "grad_norm": 0.4184059500694275, "learning_rate": 6.031507876969242e-05, "loss": 0.5264, "step": 13970 }, { "epoch": 1.0358864091347224, "grad_norm": 0.37901946902275085, "learning_rate": 6.0305076269067274e-05, "loss": 0.4725, "step": 13971 }, { "epoch": 1.0359605546081412, "grad_norm": 0.40548181533813477, "learning_rate": 6.029507376844211e-05, "loss": 0.4759, "step": 13972 }, { "epoch": 1.03603470008156, "grad_norm": 0.36896762251853943, "learning_rate": 6.028507126781696e-05, "loss": 0.4566, "step": 13973 }, { "epoch": 1.036108845554979, "grad_norm": 0.3935464024543762, "learning_rate": 6.02750687671918e-05, "loss": 0.5103, "step": 13974 }, { "epoch": 1.0361829910283977, "grad_norm": 0.4080335795879364, "learning_rate": 6.026506626656665e-05, "loss": 0.5118, "step": 13975 }, { "epoch": 1.0362571365018165, "grad_norm": 0.3765278160572052, "learning_rate": 6.0255063765941486e-05, "loss": 0.4318, "step": 13976 }, { "epoch": 1.0363312819752355, "grad_norm": 0.3647381365299225, "learning_rate": 6.024506126531633e-05, "loss": 0.4712, "step": 13977 }, { "epoch": 1.0364054274486543, "grad_norm": 0.37618568539619446, "learning_rate": 6.0235058764691174e-05, "loss": 0.4436, "step": 13978 }, { "epoch": 1.036479572922073, "grad_norm": 0.3789175748825073, "learning_rate": 6.022505626406602e-05, "loss": 0.4447, "step": 13979 }, { "epoch": 1.036553718395492, "grad_norm": 0.3912706971168518, "learning_rate": 6.021505376344086e-05, "loss": 0.4719, "step": 13980 }, { "epoch": 1.0366278638689108, "grad_norm": 0.39656373858451843, "learning_rate": 6.0205051262815706e-05, "loss": 0.4532, "step": 13981 }, { "epoch": 1.0367020093423296, "grad_norm": 0.3965631425380707, "learning_rate": 6.0195048762190556e-05, "loss": 0.4736, "step": 13982 }, { "epoch": 1.0367761548157486, "grad_norm": 0.3491508960723877, "learning_rate": 6.018504626156539e-05, "loss": 0.4236, "step": 13983 }, { "epoch": 1.0368503002891674, "grad_norm": 0.3922244608402252, "learning_rate": 6.0175043760940244e-05, "loss": 0.4789, "step": 13984 }, { "epoch": 1.0369244457625861, "grad_norm": 0.4000846743583679, "learning_rate": 6.016504126031508e-05, "loss": 0.4816, "step": 13985 }, { "epoch": 1.0369985912360051, "grad_norm": 0.3662322461605072, "learning_rate": 6.015503875968993e-05, "loss": 0.4739, "step": 13986 }, { "epoch": 1.037072736709424, "grad_norm": 0.38804933428764343, "learning_rate": 6.014503625906477e-05, "loss": 0.4936, "step": 13987 }, { "epoch": 1.0371468821828427, "grad_norm": 0.4163666367530823, "learning_rate": 6.0135033758439606e-05, "loss": 0.5067, "step": 13988 }, { "epoch": 1.0372210276562617, "grad_norm": 0.3688827157020569, "learning_rate": 6.0125031257814457e-05, "loss": 0.4373, "step": 13989 }, { "epoch": 1.0372951731296804, "grad_norm": 0.3642045557498932, "learning_rate": 6.0115028757189294e-05, "loss": 0.4474, "step": 13990 }, { "epoch": 1.0373693186030992, "grad_norm": 0.36583152413368225, "learning_rate": 6.0105026256564144e-05, "loss": 0.4365, "step": 13991 }, { "epoch": 1.0374434640765182, "grad_norm": 0.36769625544548035, "learning_rate": 6.009502375593898e-05, "loss": 0.4682, "step": 13992 }, { "epoch": 1.037517609549937, "grad_norm": 0.37264132499694824, "learning_rate": 6.008502125531383e-05, "loss": 0.4614, "step": 13993 }, { "epoch": 1.0375917550233558, "grad_norm": 0.36812761425971985, "learning_rate": 6.007501875468867e-05, "loss": 0.4583, "step": 13994 }, { "epoch": 1.0376659004967748, "grad_norm": 0.3900917172431946, "learning_rate": 6.006501625406352e-05, "loss": 0.47, "step": 13995 }, { "epoch": 1.0377400459701935, "grad_norm": 0.3731202483177185, "learning_rate": 6.0055013753438364e-05, "loss": 0.4291, "step": 13996 }, { "epoch": 1.0378141914436123, "grad_norm": 0.39911022782325745, "learning_rate": 6.004501125281321e-05, "loss": 0.4841, "step": 13997 }, { "epoch": 1.0378883369170313, "grad_norm": 0.3904803693294525, "learning_rate": 6.003500875218805e-05, "loss": 0.4894, "step": 13998 }, { "epoch": 1.03796248239045, "grad_norm": 0.3793337941169739, "learning_rate": 6.0025006251562895e-05, "loss": 0.4371, "step": 13999 }, { "epoch": 1.0380366278638689, "grad_norm": 0.37912386655807495, "learning_rate": 6.001500375093774e-05, "loss": 0.4322, "step": 14000 } ], "logging_steps": 1, "max_steps": 20000, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "total_flos": 1.5918840344143626e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }